Commit ebaefeeb authored by Damien Leroux's avatar Damien Leroux
Browse files

New implementation working fine. Factor graph properly built with joint...

New implementation working fine. Factor graph properly built with joint factors as appropriate and spanning tree computation.
parent dd7b2d97
......@@ -4,9 +4,9 @@ include Makefile.conf
all: spell-qtl spell-pedigree spell-marker
depend:
cd src && make .depend
cd src/bayes/ && make .depend
cd src/pedigree/ && make .depend
+cd src && make .depend
+cd src/bayes/ && make .depend
+cd src/pedigree/ && make .depend
spell-qtl: depend
+cd src && $(MAKE)
......@@ -18,9 +18,9 @@ spell-marker: depend
+cd src/bayes && $(MAKE)
clean:
cd src && make clean
cd src/bayes && make clean
cd src/pedigree && make clean
+cd src && make clean
+cd src/bayes && make clean
+cd src/pedigree && make clean
install: spell-qtl spell-pedigree spell-marker
......
......@@ -4,7 +4,11 @@ INSTALL_DIR=/usr/local
ROOT_DIR:=$(shell dirname $(realpath $(lastword $(MAKEFILE_LIST))))
GCC_VERSION=-4.9
CXX=g++$(GCC_VERSION) --std=gnu++0x -Wno-unused-local-typedefs -fPIC -pipe
COMP=g++$(GCC_VERSION)
#COMP=clang++-3.6 -Wno-undefined-bool-conversion -Wno-tautological-undefined-compare
CXX=$(COMP) --std=c++11 -Wno-unused-local-typedefs -fPIC -pipe
#CXX=clang++ -std=c++11 -stdlib=libc++
INC=-I${ROOT_DIR}/include -I${ROOT_DIR}/include/input -I/home/daleroux/include/eigen3 -I/home/daleroux/include -I${ROOT_DIR}/include/bayes
......@@ -21,15 +25,15 @@ VERSION_DEFINES=$(shell git tag | tail -1 | sed 's/\([0-9]*\)[.]\([0-9]*\)\([.][
CXXARGS=-Wextra -Wall -pthread -fPIC -DEIGEN_NO_DEPRECATED_WARNING $(VERSION_DEFINES)
#CXXARGS=-Wextra -Wall -pthread -fPIC -DEIGEN_NO_DEPRECATED_WARNING $(VERSION_DEFINES)
DEBUG_OPTS=-ggdb -O0
#DEBUG_OPTS=-ggdb -O0
#OPT_OPTS=-O3 -DNDEBUG
OPT_OPTS=-O3 -DEIGEN_NO_DEBUG -DNDEBUG
#DEBUG_OPTS=-ggdb -O0 -DNDEBUG
#DEBUG_OPTS=-ggdb -O2 -DNDEBUG
#OPT_OPTS=-O2 -DEIGEN_NO_DEBUG -DNDEBUG
#OPT_OPTS=-O3 -DEIGEN_NO_DEBUG -DNDEBUG
#OPT_OPTS=-ggdb -O2 -DEIGEN_NO_DEBUG -DNDEBUG
#OPT_OPTS=-ggdb -O2 -DNDEBUG
#DEBUG_OPTS=-ggdb -DNDEBUG
#OPT_OPTS=-O3 -DNDEBUG
#PROF_OPTS=-O2 -DEIGEN_NO_DEBUG -DNDEBUG -g -pg
#DEBUG_OPTS=-ggdb
......
......@@ -82,7 +82,6 @@ struct bn_settings_t {
, marker_index()
, job_filenames()
, job_filenames_mutex()
{}
size_t count_markers() const
......@@ -113,7 +112,7 @@ struct bn_settings_t {
cleanup_job_files() const
{
for (const auto& path: job_filenames) {
unlink(path.c_str());
/*unlink(path.c_str());*/
}
}
......
This diff is collapsed.
......@@ -683,13 +683,13 @@ struct LV_database {
for (int c = 0; c < mat.cols(); ++c, ++hi) {
double sum = mat.col(c).sum();
if (sum == 0) {
std::stringstream ss;
ss << "Detected inconsistent observations! Generation is " << gen << ", chromosome " << chr.name << ", individual #" << (ind + 1) << ", involving markers";
auto haplo = *hi;
for (size_t i = haplo.first; i < haplo.second; ++i) {
ss << ' ' << chr.raw.marker_name[i];
}
MSG_ERROR(ss.str(), "Fix the genotype data");
/*std::stringstream ss;*/
/*ss << "Detected inconsistent observations! Generation is " << gen << ", chromosome " << chr.name << ", individual #" << (ind + 1) << ", involving markers";*/
/*auto haplo = *hi;*/
/*for (size_t i = haplo.first; i < haplo.second; ++i) {*/
/*ss << ' ' << chr.raw.marker_name[i];*/
/*}*/
/*MSG_WARNING(ss.str());*/
} else {
mat.col(c) /= sum;
}
......@@ -904,7 +904,11 @@ struct qtl_pop_type {
ret->qtl_generation_name = qtl_generation_name;
ret->gen = gen;
ret->observed_traits_filename = observed_traits_filename;
ret->observed_traits.emplace_back(this_trait);
for (const auto& t: observed_traits) {
if (t.name == this_trait.name) {
ret->observed_traits.emplace_back(t);
}
}
ret->ancestor_names = ancestor_names;
auto i = indices.begin();
auto j = indices.end();
......@@ -1159,6 +1163,10 @@ struct pop_data_type {
/*ret[ti].values.push_back(traits[ti].values[i]);*/
/*ret[ti].good_indices.push_back(traits[ti].good_indices[i]);*/
/*}*/
/*MSG_DEBUG("extracted " << ret.size() << " traits");*/
/*for (const auto& t: ret) {*/
/*MSG_DEBUG(" " << t.values.size() << " values");*/
/*}*/
}
return ret;
};
......@@ -1168,7 +1176,10 @@ struct pop_data_type {
}
auto aqg = all_qtl_geno_matrix();
for (const auto& kv: aqg) {
std::shared_ptr<qtl_pop_type> this_pop = std::make_shared<qtl_pop_type>(name, qtl_generation_name, kv.second, kv.first, LV.extract(qtl_generation_name, kv.second), traits_filename, extract_traits(kv.second), ancestor_names);
/*MSG_DEBUG("subpop kv.first " << kv.first << " kv.second " << kv.second);*/
std::shared_ptr<qtl_pop_type> this_pop = std::make_shared<qtl_pop_type>(name, qtl_generation_name, kv.second, kv.first,
LV.extract(qtl_generation_name, kv.second), traits_filename,
extract_traits(kv.second), ancestor_names);
for (const auto& this_trait: traits) {
auto& lp = linked_pops[this_trait.name];
auto pop_ptr = this_pop->filter_clone(this_trait);
......
......@@ -5,6 +5,7 @@
#include "basic_data.h"
#include "model.h"
/*#include "model/tests.h"*/
#include <regex>
#include <boost/math/distributions/normal.hpp> // for normal_distribution
using boost::math::normal; // typedef provides default type is double.
......@@ -532,6 +533,12 @@ struct test_result {
};
locus_probabilities_type
locus_probabilities(const context_key& ck, const locus_key& lk,
/*const MatrixXd& mgo,*/
int ind,
const std::vector<double>& loci);
struct search_interval_type {
probability_mode mode;
const chromosome* chrom;
......@@ -607,7 +614,7 @@ struct search_interval_type {
if (mode == Joint) {
lk = selection;
}
MSG_DEBUG("Running test along interval with lk=" << lk);
/*MSG_DEBUG("Running test along interval with lk=" << lk);*/
test(all_pops, i0, cac, vct, vcr, Mcurrent, Mbase, lk, positions, threshold);
}
......@@ -677,7 +684,18 @@ struct search_interval_type {
} else {
effective_pos.assign(i, j);
}
MSG_DEBUG("search_interval recompute mode=" << (mode == Joint ? "Joint" : "Single") << " lk=" << lk);
/*MSG_DEBUG("search_interval recompute mode=" << (mode == Joint ? "Joint" : "Single") << " lk=" << lk);*/
/* precompute locus probabilities first */
MSG_DEBUG("Precomputing locus probabilities per individual");
for (const auto& vpop: all_pops) {
context_key ck(new context_key_struc(*vpop, chrom, loci));
for (auto& x: make_collection<Disk>(locus_probabilities,
as_value(ck), as_value(lk), range<int>(0, (*vpop)->size(), 1),
as_value(ck->loci))) {
(void)*x;
}
}
MSG_DEBUG("Computing parental origin probabilities");
locus_blocks
= compute_parental_origins_multipop(
all_pops,
......@@ -685,6 +703,7 @@ struct search_interval_type {
as_value(lk),
loci,
effective_pos);
MSG_DEBUG("Computing dominance probabilities");
dominance_blocks
= compute_dominance_multipop(
all_pops,
......@@ -762,7 +781,9 @@ struct search_interval_type {
MSG_DEBUG("Adding block " << tr.pop_block_key);
M->add_block(tr.pop_block_key, tr.pop_block);
if (tr.dom_block_key) {
M->add_block(tr.dom_block_key, tr.dom_block);
model ref = *M;
ref.compute();
M->add_block_if_test_is_good(tr.dom_block_key, tr.dom_block, ref);
}
}
......@@ -916,6 +937,10 @@ full_search_intervals()
}
void
read_locus_list(std::string& s, settings_t* target);
struct model_manager {
/* name of the studied trait */
......@@ -959,7 +984,7 @@ struct model_manager {
SolverType st=SolverType::QR)
: trait_name(trait)
, all_pops(colpops)
, vMcurrent(model{y, colpops, (*colpops.front())->ancestor_names, maximum_order, st})
, vMcurrent(model{y, active_settings->get_threshold(trait), colpops, (*colpops.front())->ancestor_names, maximum_order, st})
, max_order(maximum_order)
, search_intervals()
, cac()
......@@ -1024,9 +1049,11 @@ struct model_manager {
if (ins_rm_key.second) {
vMcurrent->remove_block(ins_rm_key.second);
}
add_block_with_interactions(ins_rm_key.first, blocks.first);
vMcurrent->add_block_with_interactions(ins_rm_key.first, blocks.first);
if (blocks.second) {
add_block_with_interactions(model_block_key_struc::dominance(ins_rm_key.first), blocks.second);
model ref(*vMcurrent);
ref.compute();
vMcurrent->add_block_with_interactions(model_block_key_struc::dominance(ins_rm_key.first), blocks.second, &ref);
}
}
......@@ -1064,6 +1091,36 @@ struct model_manager {
}
}
#if 0
std::vector<std::string>
split(const std::string& input, const std::string& regex)
{
// passing -1 as the submatch index parameter performs splitting
std::regex re(regex);
std::regex_token_iterator
first{input.begin(), input.end(), re, -1},
last;
return {first, last};
}
#endif
std::vector<std::string>
split(const std::string& str, const std::string& regex)
{
return {std::sregex_token_iterator(str.begin(), str.end(), std::regex(regex), -1), std::sregex_token_iterator()};
}
void
set_selection(const std::string& selstr)
{
for (const auto& qtl: split(selstr, "\\s+")) {
auto chr_loc = split(qtl, ":");
double d;
std::stringstream(chr_loc[1]) >> d;
select(active_settings->find_chromosome(chr_loc[0]), d);
}
}
value<model_block_type>
create_interaction_block(const value<model_block_type>& b1, const value<model_block_type>& b2)
{
......@@ -1416,9 +1473,11 @@ test_result::select(model_manager& mm) const
{
this_interval->select(*this, mm.vMcurrent);
/* TODO apply on model_manager */
mm.add_block_with_interactions(pop_block_key, pop_block);
mm.vMcurrent->add_block_with_interactions(pop_block_key, pop_block);
if (dom_block_key) {
mm.add_block_with_interactions(dom_block_key, dom_block);
model ref(*mm.vMcurrent);
ref.compute();
mm.vMcurrent->add_block_with_interactions(dom_block_key, dom_block, &ref);
}
}
......@@ -1826,7 +1885,7 @@ struct test_manager {
, epistasis_max(epis_max)
, dominance_depth(dom_depth)
, with_constraints(constr)
, M0(y, colpops, 1, st)
, M0(y, active_settings->get_threshold(trait), colpops, 1, st)
, test_type(ct)
{}
......@@ -1838,7 +1897,7 @@ struct test_manager {
, epistasis_max(epis_max)
, dominance_depth(dom_depth)
, with_constraints(constr)
, M0(trait_matrix(trait, all_pops), all_pops, 1, SolverType::QR)
, M0(trait_matrix(trait, all_pops), active_settings->get_threshold(trait), all_pops, 1, SolverType::QR)
, test_type(ct)
{}
......
......@@ -150,7 +150,7 @@ int max_col(const MatrixXd& mat)
}
model
init_model(const value<MatrixXd>& Y, const value<model_block_type>& indicator);
init_model(const value<MatrixXd>& Y, const value<model_block_type>& indicator, double threshold);
VectorXd
row_max(const MatrixXd& mat);
......
......@@ -516,7 +516,7 @@ template <typename PARENT_TYPE, typename STATE_TYPE>
combination_type<PARENT_TYPE, STATE_TYPE>
kronecker(const combination_type<PARENT_TYPE, STATE_TYPE>& c1, const combination_type<PARENT_TYPE, STATE_TYPE>& c2)
{
typedef typename combination_type<PARENT_TYPE, STATE_TYPE>::key_type key_type;
/*typedef typename combination_type<PARENT_TYPE, STATE_TYPE>::key_type key_type;*/
typedef typename combination_type<PARENT_TYPE, STATE_TYPE>::key_list key_list;
typedef typename combination_type<PARENT_TYPE, STATE_TYPE>::element_type element_type;
combination_type<PARENT_TYPE, STATE_TYPE> ret;
......
......@@ -352,7 +352,7 @@ struct lumper {
}
#if 0
scalar_type compute_sum(size_t s, const subset& B, const VectorXd& abundances)
{
cs.start();
......@@ -406,6 +406,7 @@ struct lumper {
/*MSG_DEBUG("Created lumped matrix in " << tm.accum << " seconds");*/
return ret;
}
#endif
};
......
......@@ -27,6 +27,9 @@ typedef std::shared_ptr<model_block_key_struc> model_block_key;
struct model;
static inline
void f_test(const model& model_current, const model& model_new, int col_num, MatrixXd* pvalue, MatrixXd* lod);
struct key_already_exists : public std::exception {
std::string m_what;
key_already_exists(const model&, const model_block_key&);
......@@ -782,11 +785,11 @@ struct gauss_elimination {
struct model {
model()
: m_Y(), m_blocks(), m_X(), m_rank(), m_rss(), m_coefficients(), m_solver_type(), m_computed(false), m_with_constraints(false), m_ghost_constraint(), m_all_pops(), m_ancestor_names(), m_max_order(1)
: m_Y(), m_blocks(), m_X(), m_rank(), m_rss(), m_coefficients(), m_solver_type(), m_computed(false), m_with_constraints(false), m_ghost_constraint(), m_all_pops(), m_ancestor_names(), m_max_order(1), m_threshold(0)
{}
model(const value<MatrixXd>& y, const collection<const qtl_pop_type*>& pops, const std::map<char, std::string>& anam, size_t mo=1, SolverType st=SolverType::QR)
model(const value<MatrixXd>& y, double threshold, const collection<const qtl_pop_type*>& pops, const std::map<char, std::string>& anam, size_t mo=1, SolverType st=SolverType::QR)
: m_Y(y)
, m_blocks(), m_X()
, m_rank(), m_rss(), m_coefficients(), m_residuals()
......@@ -797,10 +800,11 @@ struct model {
, m_all_pops(pops)
, m_ancestor_names(anam)
, m_max_order(mo)
, m_threshold(threshold)
/*{ MSG_DEBUG("new model " << __LINE__ << " with Y(" << y.innerSize() << ',' << y.outerSize() << ')'); }*/
{}
model(const value<MatrixXd>& y, const collection<const qtl_pop_type*>& pops, size_t mo=1, SolverType st=SolverType::QR)
model(const value<MatrixXd>& y, double threshold, const collection<const qtl_pop_type*>& pops, size_t mo=1, SolverType st=SolverType::QR)
: m_Y(y)
, m_blocks(), m_X()
, m_rank(), m_rss(), m_coefficients(), m_residuals()
......@@ -811,6 +815,7 @@ struct model {
, m_all_pops(pops)
, m_ancestor_names((*pops.front())->ancestor_names)
, m_max_order(mo)
, m_threshold(threshold)
/*{ MSG_DEBUG("new model " << __LINE__ << " with Y(" << y.innerSize() << ',' << y.outerSize() << ')'); }*/
{}
......@@ -826,6 +831,7 @@ struct model {
, m_all_pops(mo.m_all_pops)
, m_ancestor_names(mo.m_ancestor_names)
, m_max_order(mo.m_max_order)
, m_threshold(mo.m_threshold)
/*{ MSG_DEBUG("new model " << __LINE__ << " with Y(" << m_Y->innerSize() << ',' << m_Y->outerSize() << ')'); }*/
{}
......@@ -841,6 +847,7 @@ struct model {
, m_all_pops(mo.m_all_pops)
, m_ancestor_names(mo.m_ancestor_names)
, m_max_order(mo.m_max_order)
, m_threshold(mo.m_threshold)
/*{ MSG_DEBUG("new model " << __LINE__ << " with Y(" << m_Y->innerSize() << ',' << m_Y->outerSize() << ')'); }*/
{}
......@@ -865,6 +872,7 @@ struct model {
m_all_pops = mo.m_all_pops;
m_ancestor_names = mo.m_ancestor_names;
m_max_order = mo.m_max_order;
m_threshold = mo.m_threshold;
return *this;
}
......@@ -884,6 +892,7 @@ struct model {
m_rank = mo.m_rank;
m_ancestor_names.swap(mo.m_ancestor_names);
m_max_order = mo.m_max_order;
m_threshold = mo.m_threshold;
return *this;
}
#endif
......@@ -1018,12 +1027,28 @@ struct model {
/*throw key_already_exists(*this, key);*/
}
m_computed = false;
m_blocks[key] = x;
m_blocks.insert({key, x});
/*MSG_DEBUG("selection now: " << keys());*/
/*m_blocks.push_back(x);*/
/*m_keys.push_back(key);*/
}
bool
add_block_if_test_is_good(const model_block_key& key, const value<model_block_type>& x, const model& ref)
{
MatrixXd pvalue(Y().cols(), 1);
model tmp(*this);
tmp.add_block(key, x);
tmp.compute();
f_test(ref, tmp, 0, &pvalue, NULL);
if (pvalue(0, 0) < m_threshold) {
return false;
}
add_block(key, x);
MSG_DEBUG("New block " << key << " has an f-test pvalue of " << pvalue(0, 0) << " >= " << m_threshold);
return true;
}
void
add_blocks(const model_block_collection& mbc)
{
......@@ -1637,13 +1662,19 @@ struct model {
}
void
add_block_with_interactions(model_block_key mbk, const value<model_block_type>& block)
add_block_with_interactions(model_block_key mbk, const value<model_block_type>& block, const model* refptr=NULL)
{
if (mbk->order() > m_max_order) {
return;
}
model_block_collection new_blocks;
add_block(mbk, block);
if (refptr) {
if (!add_block_if_test_is_good(mbk, block, *refptr)) {
return;
}
} else {
add_block(mbk, block);
}
for (const auto& kv: m_blocks) {
if (mbk->can_interact_with(kv.first)) {
model_block_key new_key = model_block_key_struc::interaction(mbk, kv.first);
......@@ -1658,11 +1689,15 @@ struct model {
}
}
}
for (const auto& kv: new_blocks) {
if (kv.first->order() < m_max_order) {
add_block_with_interactions(kv.first, kv.second);
} else {
add_block(kv.first, kv.second);
if (new_blocks.size()) {
model ref(*this);
ref.compute();
for (const auto& kv: new_blocks) {
if (kv.first->order() < m_max_order) {
add_block_with_interactions(kv.first, kv.second, &ref);
} else {
add_block_if_test_is_good(kv.first, kv.second, ref);
}
}
}
}
......@@ -1686,6 +1721,7 @@ struct model {
collection<const qtl_pop_type*> m_all_pops;
std::map<char, std::string> m_ancestor_names;
size_t m_max_order;
double m_threshold;
public:
friend
......
......@@ -530,7 +530,7 @@ struct pedigree_type {
individual_index_type fill_db(const std::string& name, individual_index_type ind)
{
geno_matrix_by_generation_name[name].insert(get_gen_index(ind)).first;
geno_matrix_by_generation_name[name].insert(get_gen_index(ind));
individuals_by_generation_name[name].push_back(ind);
auto it = individuals_by_generation_name.find(name);
generation_name_by_individual[ind] = &it->first;
......@@ -1026,6 +1026,7 @@ struct pedigree_type {
/*throw skip_eval_exception();*/
/*}*/
return reent[node];
(void)skip_predicate;
}
static label_type reentrant_label(size_t, const label_type& l) { return l; }
......
......@@ -293,23 +293,24 @@ struct pedigree_tree_type {
ancestor_node_list_type
cleanup_reentrants(int node) const
{
#if 0
auto A = count_ancestors(node);
auto Ap1 = count_ancestors(m_nodes[node].p1);
auto Ap2 = count_ancestors(m_nodes[node].p2);
/*auto Ap1 = count_ancestors(m_nodes[node].p1);*/
/*auto Ap2 = count_ancestors(m_nodes[node].p2);*/
auto R = reentrants(A);
auto Rp1 = reentrants(Ap1);
auto Rp2 = reentrants(Ap2);
/*auto Rp1 = reentrants(Ap1);*/
/*auto Rp2 = reentrants(Ap2);*/
/*MSG_DEBUG_INDENT_EXPR("[cleanup_reentrants] ");*/
/*MSG_DEBUG("A: " << A);*/
MSG_DEBUG_INDENT_EXPR("[cleanup_reentrants] ");
MSG_DEBUG("A: " << A);
/*MSG_DEBUG("Ap1: " << Ap1);*/
/*MSG_DEBUG("Ap2: " << Ap2);*/
/*MSG_DEBUG("R: " << R);*/
MSG_DEBUG("R: " << R);
/*MSG_DEBUG("Rp1: " << Rp1);*/
/*MSG_DEBUG("Rp2: " << Rp2);*/
R = R - Rp1 - Rp2;
/*R = R - Rp1 - Rp2;*/
ancestor_node_list_type ret = R;
auto i = R.rbegin();
......@@ -321,8 +322,64 @@ struct pedigree_tree_type {
ret = ret - sub_re;
/*MSG_DEBUG(" current list = " << ret);*/
}
/*MSG_DEBUG_DEDENT;*/
MSG_DEBUG("final R: " << ret);
MSG_DEBUG_DEDENT;
#else
/* Merci à Alexandre Heurteau pour l'idée de la recherche de cycle */
scoped_indent _(MESSAGE("[reentrants(" << node << ")] "));
std::vector<bool> visited(m_nodes.size(), false);
std::vector<int> left_ancestors, right_ancestors;
left_ancestors.reserve(m_nodes.size());
right_ancestors.reserve(m_nodes.size());
std::vector<int> stack;
stack.reserve(m_nodes.size());
auto ancestor_list
= [&] (int n0, std::vector<int>& anc)
{
stack.clear();
stack.push_back(n0);
while (stack.size()) {
int n = stack.back();
stack.pop_back();
/*MSG_DEBUG("ancestor_list on " << n);*/
if (!visited[n]) {
if (m_nodes[n].has_p1()) {
stack.push_back(m_nodes[n].p1);
}
if (m_nodes[n].has_p2()) {
stack.push_back(m_nodes[n].p2);
}
}
anc.push_back(n);
visited[n] = true;
}
std::sort(anc.begin(), anc.end());
auto last = std::unique(anc.begin(), anc.end());
anc.erase(last, anc.end());
};
ancestor_list(m_nodes[node].p1, left_ancestors);
/*MSG_DEBUG("left ancestors " << left_ancestors);*/
/*MSG_DEBUG("visited " << visited);*/
ancestor_list(m_nodes[node].p2, right_ancestors);
/*MSG_DEBUG("right ancestors " << right_ancestors);*/
/*MSG_DEBUG("visited " << visited);*/
std::vector<int> intersect(std::min(left_ancestors.size(), right_ancestors.size()), -1);
auto it = std::set_intersection(left_ancestors.begin(), left_ancestors.end(), right_ancestors.begin(), right_ancestors.end(), intersect.begin());
intersect.resize(it - intersect.begin());
ancestor_node_list_type ret;
for (int n: intersect) {
ret[n] = 1;
}
MSG_DEBUG("reentrants = " << ret);
#endif
return ret;
}
......@@ -345,8 +402,10 @@ struct pedigree_tree_type {
while (stack.size()) {
int n = stack.back();
stack.pop_back();
if (node_is_clone(n)) { continue; }