Commit e3ba1ed8 authored by Damien Leroux's avatar Damien Leroux
Browse files

Big commit pre-merge back into master.

parent 82f7cf46
ROOT_DIR:=$(shell dirname $(realpath $(lastword $(MAKEFILE_LIST))))
GCC_VERSION=-4.9
CXX=g++$(GCC_VERSION) --std=gnu++0x -Wno-unused-local-typedefs -fPIC
#CXX=clang++ -std=c++11 -stdlib=libc++
INC=-I${ROOT_DIR}/include -I${ROOT_DIR}/include/input -I/home/daleroux/include/eigen3 -I/home/daleroux/include -I${ROOT_DIR}/include/bayes
COV=gcov #llvm-cov-3.4 #gcov #$(GCC_VERSION)
LD=$(CXX)
#LD=g++-4.9
#CXX=clang++
COV=gcov$(GCC_VERSION)
LD=$(CXX)
LIBS=-rdynamic -lexpat -ldl
VERSION_PATCH=$(shell git rev-list --count `git rev-list --tags --max-count=1`..HEAD 2>/dev/null || echo 0)
VERSION_DEFINES=$(shell git tag | tail -1 | sed 's/\([0-9]*\)[.]\([0-9]*\)\([.][0-9]*\)\?/-DVERSION_MAJOR=\\\"\1\\\" -DVERSION_MINOR=\\\"\2\\\" -DVERSION_PATCH=\\\"$(VERSION_PATCH)\\\"/')
CXXARGS=-Wextra -Wall -pthread -fPIC -DEIGEN_NO_DEPRECATED_WARNING $(VERSION_DEFINES)
#CXXARGS=-Wextra -Wall -pthread -fPIC -DEIGEN_NO_DEPRECATED_WARNING $(VERSION_DEFINES)
#DEBUG_OPTS=-ggdb -O0
OPT_OPTS=-O3 -DEIGEN_NO_DEBUG -DNDEBUG
#OPT_OPTS=-ggdb -O2 -DEIGEN_NO_DEBUG -DNDEBUG
#DEBUG_OPTS=-ggdb -DNDEBUG
#OPT_OPTS=-O3 -DNDEBUG
#PROF_OPTS=-O2 -DEIGEN_NO_DEBUG -DNDEBUG -g -pg
#DEBUG_OPTS=-ggdb
#OPT_OPTS=-O3 -DNDEBUG
#OPT_OPTS=-O -DEIGEN_NO_DEBUG -DNDEBUG
#OPT_OPTS=-DEIGEN_NO_DEBUG -DNDEBUG
#OPT_OPTS=-O -DNDEBUG
#OPT_OPTS=-DNDEBUG
#OPT_OPTS=-O3 -DNDEBUG
#OPT_OPTS=-O3 -DNDEBUG
#DEBUG_OPTS=-Winvalid-pch -ggdb #-H
#DEBUG_OPTS=-ggdb -DNDEBUG
#EXTRA_OPTS=-g
#EXTRA_OPTS=-fomit-frame-pointer -g
#OPT_OPTS=-O0 -ggdb -DEIGEN_NO_DEBUG -DNDEBUG
#OPT_OPTS=-O2 -DEIGEN_NO_DEBUG -DNDEBUG $(EXTRA_OPTS)
#OPT_OPTS=-O2 -DEIGEN_NO_DEBUG -DNDEBUG -g -fPIE -pie -fsanitize=thread
#OPT_OPTS=-O0 -ggdb -DEIGEN_NO_DEBUG -DNDEBUG -g -fPIE -pie -fsanitize=thread
#COV_OPTS=$(DEBUG_OPTS) --coverage
C=$(CXX) $(CXXARGS) $(INC) $(OPT_OPTS) $(DEBUG_OPTS) $(PROF_OPTS) $(EXTRA_OPTS)
L=$(LD) $(CXXARGS) $(INC) $(OPT_OPTS) $(DEBUG_OPTS) $(PROF_OPTS) $(EXTRA_OPTS)
......@@ -52,6 +52,7 @@ struct bn_settings_t {
/* cleanup */
std::set<std::string> job_filenames;
std::mutex job_filenames_mutex;
bn_settings_t()
: scheme(JDS_None)
......@@ -79,6 +80,7 @@ struct bn_settings_t {
, marker_names()
, marker_index()
, job_filenames()
, job_filenames_mutex()
{}
......@@ -94,10 +96,15 @@ struct bn_settings_t {
static bn_settings_t* from_args(int argc, const char** argv);
std::string
job_filename(const char* job_name, size_t job) const
job_filename(const char* job_name, size_t job)
{
/*bn_settings_t* unconst_this = const_cast<bn_settings_t*>(this);*/
/*if (!unconst_this) {*/
/*MSG_ERROR("Couldn't unconst the pointer to settings.", "Sacrifice a goat and hope for the best.");*/
/*}*/
std::string ret = MESSAGE(work_directory << '/' << job_name << '.' << job << ".data");
const_cast<bn_settings_t*>(this)->job_filenames.insert(ret);
std::lock_guard<std::mutex> lock(/*unconst_this->*/job_filenames_mutex);
/*unconst_this->*/job_filenames.insert(ret);
return ret;
}
......@@ -110,7 +117,7 @@ struct bn_settings_t {
}
void
load_full_pedigree_data() const
load_full_pedigree_data()
{
/* NOT const. But called in a const context (job "collect-LV").
* It is just not possible in EVERY case to handle this before the job is scheduled,
......@@ -118,7 +125,8 @@ struct bn_settings_t {
* refactoring later with some settings->prepare_job(job_name) just before the job
* is started.
*/
const_cast<pedigree_type&>(pedigree).load(pedigree_filename, false);
/*const_cast<pedigree_type&>(pedigree).load(pedigree_filename, false);*/
pedigree.load(pedigree_filename, false);
}
};
......
......@@ -3,8 +3,8 @@
#include "bayes/cli.h"
typedef std::function<bool(const bn_settings_t*, size_t)> job_type;
typedef std::function<size_t(const bn_settings_t*)> count_jobs_type;
typedef std::function<bool(bn_settings_t*, size_t)> job_type;
typedef std::function<size_t(bn_settings_t*)> count_jobs_type;
extern std::map<std::string, std::pair<count_jobs_type, job_type>> job_registry;
......
......@@ -61,6 +61,12 @@ struct bn_message_type {
return it->second;
}
const_iterator
find(const genotype_comb_type::key_list& keys) const
{
return m_map.find(keys);
}
double
default_val() const { return m_default_val; }
......@@ -965,11 +971,15 @@ struct compute_labels {
struct factor_graph {
factor_graph()
: m_variable_domains(), m_factors(), m_interfaces(), m_n_alleles(0)
, m_updaters(), m_extracters(), m_message_headers()
, m_noise(0)
{}
factor_graph(const pedigree_type& ped, size_t n_alleles)
factor_graph(const pedigree_type& ped, size_t n_alleles, double noise=0)
: m_variable_domains(), m_factors(), m_interfaces()
, m_n_alleles(n_alleles)
, m_updaters(), m_extracters(), m_message_headers()
, m_noise(noise)
{
/*compute_factors_and_domains(ped);*/
/*compute_interfaces();*/
......@@ -1324,6 +1334,23 @@ struct factor_graph {
compute_interfaces();
}
void
init_variable_message(size_t var, const bn_message_type& ref, bn_message_type& vm) const
{
MSG_DEBUG("Using noise value = " << m_noise);
double eps_2 = m_noise * .5;
double noisy_one = 1. - m_noise;
for (const bn_label_type& lab: m_variable_domains.find(var)->second) {
genotype_comb_type::key_list kl = {{var, lab}};
auto it = ref.find(kl);
if (it == ref.end()) {
vm.force_set(kl, eps_2);
} else {
vm.force_set(kl, eps_2 + noisy_one * ref[kl]);
}
}
}
friend
std::ostream&
operator << (std::ostream& os, const factor_graph& fg)
......@@ -1474,16 +1501,23 @@ struct factor_graph {
instance_type&
evidence(size_t var, bn_label_type label, double prob)
{
m_observations.force_set({{var, label}}, prob);
m_observations.force_set({{var, label}}, m_fg->m_noise * .5 + (1. - m_fg->m_noise) * prob);
return *this;
}
instance_type&
evidence(const bn_message_type& buffer)
{
std::set<size_t> vars;
for (const auto& kv: buffer) {
m_observations.set(kv.first, kv.second);
vars.insert(kv.first.begin()->parent);
}
for (size_t v: vars) {
m_fg->init_variable_message(v, buffer, m_observations);
}
/*for (const auto& kv: buffer) {*/
/*m_observations.set(kv.first, kv.second);*/
/*}*/
return *this;
}
......@@ -1798,6 +1832,9 @@ struct factor_graph {
auto mupd_rw = [&](STREAM_TYPE& fs, bn_message_updater_type& mupd) { mupd.file_io(fs, factor_dic, interface_dic); };
rw(fs, m_updaters, mupd_rw);
rw(fs, m_extracters, mupd_rw);
MSG_DEBUG("file i/o m_noise before=" << m_noise);
rw(fs, m_noise);
MSG_DEBUG("file i/o m_noise after=" << m_noise);
}
void save(const std::string& filename) { std::ofstream ofs(filename); file_io(ofs); }
......@@ -1812,6 +1849,7 @@ struct factor_graph {
std::vector<bn_message_updater_type> m_updaters;
std::vector<bn_message_updater_type> m_extracters;
std::vector<std::string> m_message_headers;
double m_noise;
};
......
This diff is collapsed.
#ifndef _SPEL_BRACKET_H_
#define _SPEL_BRACKET_H_
#include "eigen.h"
#include "error.h"
#include <vector>
#include <map>
#include <algorithm>
namespace impl {
struct generation_rs;
};
namespace BK {
/*typedef SparseMatrix<bool> BracketMatrix;*/
typedef SparseMatrix<bool, Eigen::ColMajor> BraMatrix;
typedef SparseMatrix<bool, Eigen::RowMajor> KetMatrix;
typedef BraMatrix ProjMatrix;
/*typedef std::vector<BracketMatrix> state_list_type;*/
typedef std::vector<BraMatrix> bra_list_type;
typedef std::vector<KetMatrix> ket_list_type;
struct bracket_type {
bra_list_type bra;
ket_list_type ket;
friend std::ostream& operator << (std::ostream& os, const bracket_type& b)
{
return os << "<|:" << b.bra.size() << " |>:" << b.ket.size();
}
};
typedef std::map<size_t, bracket_type> ancestor_list_type;
inline
std::ostream& operator << (std::ostream& os, const ancestor_list_type& al)
{
for (const auto& kv: al) {
os << " <" << kv.first << "|:" << kv.second.bra.size();
os << " |" << kv.first << ">:" << kv.second.ket.size();
}
return os;
}
struct individual_type {
size_t index;
bracket_type bracket;
ancestor_list_type mother;
ancestor_list_type father;
friend std::ostream& operator << (std::ostream& os, const individual_type& i)
{
os << "[Ind. #" << i.index << " constraint =" << i.bracket << std::endl
<< " inherited constraints" << std::endl
<< " * M#" << i.mother.size() << " " << i.mother << std::endl
<< " * P#" << i.father.size() << " " << i.father << std::endl
<< ']' << std::endl;
os << "inherited :" << std::endl;
for (const auto& kv: i.mother) {
os << "<" << kv.first << '|' << std::endl;
for (const auto& m: kv.second.bra) { os << m << std::endl; }
os << "|" << kv.first << '>' << std::endl;
for (const auto& m: kv.second.ket) { os << m << std::endl; }
}
os << "bra:" << std::endl;
for (const auto& m: i.bracket.bra) { os << m << std::endl; }
os << "ket:" << std::endl;
for (const auto& m: i.bracket.ket) { os << m << std::endl; }
return os;
}
};
template <typename BK_TYPE, typename MATRIX_TYPE>
std::vector<BK_TYPE>
combine_m(const std::vector<BK_TYPE>& b1, MATRIX_TYPE&& m)
{
std::vector<BK_TYPE> ret;
ret.reserve(b1.size());
for (size_t i = 0; i < b1.size(); ++i) {
ret.emplace_back(kroneckerProduct(b1[i], m));
}
return ret;
}
template <typename BK_TYPE, typename MATRIX_TYPE>
std::vector<BK_TYPE>
combine_m(MATRIX_TYPE&& m, const std::vector<BK_TYPE>& b1)
{
std::vector<BK_TYPE> ret;
ret.reserve(b1.size());
for (size_t i = 0; i < b1.size(); ++i) {
ret.emplace_back(kroneckerProduct(m, b1[i]));
}
return ret;
}
template <typename BK_OUT>
std::vector<BK_OUT>
combine_bk(const std::vector<BK_OUT>& b1, const std::vector<BK_OUT>& b2)
{
std::vector<BK_OUT> ret;
if (b1.size() != b2.size()) {
throw std::runtime_error("Incompatible bra|ket for product");
}
ret.reserve(b1.size());
for (size_t i = 0; i < b1.size(); ++i) {
ret.emplace_back(kroneckerProduct(b1[i], b2[i]));
}
return ret;
}
template <typename BK_MATRIX>
void
apply_redux(bracket_type& b1, const BK_MATRIX& m)
{
MSG_DEBUG("apply_redux");
MSG_DEBUG(m);
for (size_t i = 0; i < b1.bra.size(); ++i) {
MSG_DEBUG("apply_redux #" << i);
MSG_QUEUE_FLUSH();
MSG_DEBUG(b1.bra[i]);
MSG_QUEUE_FLUSH();
MSG_DEBUG(b1.ket[i]);
MSG_QUEUE_FLUSH();
b1.bra[i] = ((m * b1.bra[i]).pruned() * m.transpose()).pruned();
b1.ket[i] = ((m * b1.ket[i]).pruned() * m.transpose()).pruned();
}
}
bracket_type b_init(size_t n_states)
{
/*BracketMatrix I(n_states, n_states);*/
/*I.setIdentity();*/
/*auto ones = MatrixXb::Ones(1, n_states);*/
bracket_type ret;
ret.bra.resize(n_states);
ret.ket.resize(n_states);
std::vector<Eigen::Triplet<bool>> vt;
vt.reserve(n_states);
for (size_t i = 0; i < n_states; ++i) {
vt.clear();
vt.emplace_back(i, i, true);
ret.bra[i].resize(n_states, n_states);
ret.bra[i].setFromTriplets(vt.begin(), vt.end());
vt.clear();
for (size_t k = 0; k < n_states; ++k) {
vt.emplace_back(i, k, true);
}
ret.ket[i].resize(n_states, n_states);
ret.ket[i].setFromTriplets(vt.begin(), vt.end());
/*ret.bra[i] = I.col(i) * I.row(i);*/
/*ret.ket[i] = I.col(i) * ones;*/
/*MSG_DEBUG("bra[" << i << "]");*/
/*MSG_DEBUG(ret.bra[i]);*/
/*MSG_DEBUG("ket[" << i << "]");*/
/*MSG_DEBUG(ret.ket[i]);*/
}
return ret;
}
bracket_type b_gamete(const bracket_type& b)
{
static auto I = MatrixXb::Identity(2, 2);
return {combine_m(b.bra, I), combine_m(b.ket, I)};
}
ProjMatrix bracket(const bra_list_type& bra, const ket_list_type& ket)
{
const auto& b = bra.front();
const auto& k = ket.front();
ProjMatrix ret(b.rows() * k.rows(), b.cols() * k.cols());
for (size_t i = 0; i < bra.size(); ++i) {
/*ret += (bra[i] * ket[i]).pruned();*/
ret += kroneckerProduct(bra[i], ket[i]);
MSG_DEBUG("ACCUMULATING BRA|KET");
MSG_DEBUG(ret);
}
MSG_DEBUG("---");
return ret;
}
inline
individual_type
i_line(size_t index)
{
return {index, b_init(1), {}, {}};
}
inline
individual_type
i_gamete_impl(const individual_type& I, bool is_dh=false)
{
static auto GI = MatrixXb::Identity(2, 2);
individual_type ret;
ret.index = I.index;
for (const auto& kv: I.mother) {
/*if (!kv.second.bra.size()) {*/
/*continue;*/
/*}*/
ret.mother.insert({kv.first,
{combine_m(kv.second.bra, GI),
combine_m(kv.second.ket, GI)}
});
}
for (const auto& kv: I.father) {
/*if (!kv.second.bra.size()) {*/
/*continue;*/
/*}*/
ret.mother.insert({kv.first,
{combine_m(kv.second.bra, GI),
combine_m(kv.second.ket, GI)}
});
}
if (is_dh) {
ret.bracket.bra = combine_m(I.bracket.bra, GI);
ret.bracket.ket = combine_m(I.bracket.ket, GI);
} else {
ret.mother.insert({I.index,
{combine_m(I.bracket.bra, GI),
combine_m(I.bracket.ket, GI)}
});
}
MSG_DEBUG("gamete contains " << ret.mother.size() << " projectors");
return ret;
}
inline
individual_type
i_gamete(const individual_type& I) { return i_gamete_impl(I, false); }
inline
individual_type
i_dh(const individual_type& I) { return i_gamete_impl(I, true); }
/* algo gamete
*
* une seule liste de bra|ket
* - combiner les listes mother/father et kroneckerProduct(bra|ket, Id(2)) \ la liste mother est utilisée.
* - ajouter le bra|ket de l'individu, kroneckerProduct(~, Id(2)) aussi /
*
* algo crossing
*
* - identifier parents communs
* - combiner bra entre gamètes
* - pour tout parent commun X : bra(X, mother) (x) bra(X, father) (pareil pour ket)
* - sinon pour tout autre X : bra(X, mother) (x) Id(taille gamète father), Id(taille gamète mother) (x) bra(X, father)
* - associer bra(X, mother) et ket(X, father), et bra(X, father) et ket(X, mother) pour tout X commun
* - calculer bra(X).ket(X) => proj(X).
* - calculer le produit de tous les proj(X) et supprimer les colonnes vides => PROJ.
* - TEST : NORMALEMENT LES PROJECTEURS DEVRAIENT COMMUTER.
* - passer tous les bra|ket ainsi que generation_rs à la moulinette (t(PROJ) . x . PROJ).
*
* haploïde doublé
* - je prends une gamète
* mais on ajoute pas individu::bracket à la liste, on le laisse en bracket ((x) Id(2))
* - ...
* - PROFIT!!!
*
*/
inline
individual_type
i_cross(size_t idx, const individual_type& M, const individual_type& P)
{
individual_type ret;
ret.index = idx;
MSG_DEBUG("CROSSING");
MSG_DEBUG(M);
MSG_DEBUG(P);
/* GAMETES */
individual_type GM = i_gamete(M);
individual_type GP = i_gamete(P);
size_t sgm = GM.mother.size();
size_t sgp = GP.mother.size();
MSG_DEBUG("sgm=" << sgm);
MSG_DEBUG("sgp=" << sgp);
/*ret.mother = GM.mother;*/
/*ret.father = GP.mother;*/
/* COMMON ANCESTORS */
std::vector<size_t> gmk, gpk;
gmk.reserve(sgm);
gpk.reserve(sgp);
for (const auto& kv: GM.mother) { gmk.push_back(kv.first); }
for (const auto& kv: GP.mother) { gpk.push_back(kv.first); }
std::vector<size_t> common(sgm > sgp ? sgm : sgp);
auto it = std::set_intersection(gmk.begin(), gmk.end(),
gpk.begin(), gpk.end(),
common.begin());
common.resize(it - common.begin());
/* COMBINE BRAs and KETs */
ancestor_list_type al;
auto common_i = common.begin(), common_j = common.end();
auto mi = GM.mother.begin(), mj = GM.mother.end();
auto pi = GP.mother.begin(), pj = GP.mother.end();
size_t msz = GM.mother.begin()->second.bra.front().cols();
size_t psz = GP.mother.begin()->second.bra.front().cols();
auto IP = MatrixXb::Identity(psz, psz);
auto IM = MatrixXb::Identity(msz, msz);
while (mi != mj && pi != pj) {
/*if (mi->second.bra.size() == 1) {*/
/*++mi;*/
/*continue;*/
/*}*/
/*if (pi->second.bra.size() == 1) {*/
/*++pi;*/
/*continue;*/
/*}*/
if (common_i != common_j && mi->first == pi->first && mi->first == *common_i) {
al.insert({*common_i,
{combine_bk<BraMatrix>(pi->second.bra, mi->second.bra),
combine_bk<KetMatrix>(pi->second.ket, mi->second.ket)}
/*combine_m<KetMatrix>(pi->second.ket, IM)}*/
/*{combine_bk<BraMatrix>(mi->second.bra, pi->second.bra),*/
/*combine_bk<KetMatrix>(mi->second.ket, pi->second.ket)}*/
});
++common_i;
++pi;
++mi;
} else if (mi->first < pi->first) {
al.insert({mi->first,
{combine_m(IP, mi->second.bra),
combine_m(IP, mi->second.ket)}
/*{combine_m(mi->second.bra, IP),*/
/*combine_m(mi->second.ket, IP)}*/
});
++mi;
} else if (pi->first < mi->first) {
al.insert({pi->first,
{combine_m(pi->second.bra, IM),
combine_m(pi->second.ket, IM)}
/*{combine_m(IM, pi->second.bra),*/
/*combine_m(IM, pi->second.ket)}*/
});
++pi;
} else {
throw std::runtime_error("Hein??");
}
}
while (mi != mj) {
if (mi->second.bra.size() == 1) {
++mi;
continue;
}
al.insert({mi->first,
{combine_m(IP, mi->second.bra),
combine_m(IP, mi->second.ket)}
/*{combine_m(mi->second.bra, IP),*/
/*combine_m(mi->second.ket, IP)}*/
});
++mi;
}
while (pi != pj) {
if (pi->second.bra.size() == 1) {
++pi;
continue;
}
al.insert({pi->first,
{combine_m(pi->second.bra, IM),
combine_m(pi->second.ket, IM)}
/*{combine_m(IM, pi->second.bra),*/
/*combine_m(IM, pi->second.ket)}*/
});
++pi;
}
ret.mother = al;
if (common.size() != 0) {
/* COMPUTE PROJECTOR */
/* for debug only. should accumulate in P from the start. */
std::vector<ProjMatrix> proj;
/*BracketMatrix PROJ = BracketMatrix::Identity(msz * psz);*/
ProjMatrix PROJ;
proj.reserve(common.size());
for (size_t i: common) {
MSG_DEBUG("common parent #" << i << " " << al[i]);
MSG_QUEUE_FLUSH();
proj.emplace_back(bracket(GM.mother[i].bra, GP.mother[i].ket));
}
/*for (const auto& m: proj) { PROJ *= m; }*/
for (const auto& m: proj) {