Commit cfdec1f8 authored by Damien Leroux's avatar Damien Leroux
Browse files

Huge refactoring of frontends (and lower layers as required). WIP.

parent 85ba6695
This diff is collapsed.
......@@ -357,7 +357,7 @@ void
compute_along_chromosome(computation_along_chromosome& ret,
ComputationType ct, ComputationResults cr,
const value<model>& Mcurrent, const value<model>& M0,
const model_block_key& base_key,
const locus_key& base_key,
chromosome_value chr,
const std::vector<double>& loci,
const collection<parental_origin_per_locus_type>& popl)
......@@ -401,8 +401,10 @@ compute_along_chromosome(computation_along_chromosome& ret,
/* FIXME: create correct keys? FIXED */
for (auto i: range<int>(0, popl.size(), 1)) {
/*DUMP_FILE_LINE();*/
value<model_block_key> vmbk = model_block_key(base_key);
*vmbk += std::make_pair(chr, loci[*i]);
/*value<model_block_key> vmbk = model_block_key(base_key);*/
/**vmbk += std::make_pair(chr, loci[*i]);*/
/*(*vmbk)->loci = (*vmbk)->loci + loci[*i];*/
value<model_block_key> vmbk = model_block_key_struc::pop(chr, base_key + loci[*i]);
/*MSG_DEBUG("new block key " << vmbk);*/
joiner.push_back(make_value<_Policy>(compute_one,
i, pcac, /* flower */
......@@ -459,7 +461,7 @@ void
compute_along_interval(int i0, computation_along_chromosome& ret,
value<ComputationType> vct, value<ComputationResults> vcr,
const value<model>& Mcurrent, const value<model>& M0,
const model_block_key& base_key,
const locus_key& base_key,
chromosome_value chr,
const std::vector<double>& loci,
const collection<parental_origin_per_locus_type>& popl)
......@@ -473,8 +475,9 @@ compute_along_interval(int i0, computation_along_chromosome& ret,
/* FIXME: create correct keys? FIXED */
for (auto i: range<int>(i0, i0 + popl.size(), 1)) {
/*DUMP_FILE_LINE();*/
value<model_block_key> vmbk = model_block_key(base_key);
*vmbk += std::make_pair(chr, loci[*i]);
/*value<model_block_key> vmbk = model_block_key(base_key);*/
/**vmbk += std::make_pair(chr, loci[*i]);*/
value<model_block_key> vmbk = model_block_key_struc::pop(chr, base_key + loci[*i]);
/*MSG_DEBUG("new block key " << vmbk);*/
joiner.push_back(make_value<_Policy>(compute_one,
i, pcac, /* flower */
......
......@@ -264,6 +264,43 @@ struct locus_key_struc {
}
}
/* helper to create the reduction matrix when removing a locus from a complex model block. works on one chromosome of the selection. */
Eigen::MatrixXd reduce(const std::map<double, int>& parent_count_per_position, double loc_remove) const
{
if (locus == no_locus) {
return MatrixXd::Ones(1, 1);
}
MatrixXd ret;
int n_geno = parent_count_per_position.find(locus)->second;
/*MSG_DEBUG("reduce " << (*this) << " / " << loc_remove);*/
if (loc_remove == locus) {
/*MSG_DEBUG("this locus");*/
ret = MatrixXd::Ones(n_geno, 1);
} else {
/*MSG_DEBUG("not this locus");*/
ret = VectorXd::Ones(n_geno).asDiagonal();
}
if (parent) {
return kroneckerProduct(
#ifdef LK_REDUCE_R2L
ret
,
#endif
parent->reduce(parent_count_per_position, loc_remove)
#ifndef LK_REDUCE_R2L
,
ret
#endif
);
} else {
return ret;
}
}
struct iterator {
const locus_key_struc* k;
iterator() : k(NULL) {}
......
......@@ -25,7 +25,7 @@ settings_t* read_settings(ifile& is);
design_type* read_design(std::istream& is);
pedigree_type read_pedigree(const design_type* design, const std::string& filename, std::istream& is);
void read_ld(settings_t* settings, const std::string& filename, std::istream& is);
void read_ld(settings_t* settings, const std::string& qtl_gen, const std::string& filename, std::istream& is);
#endif
This diff is collapsed.
......@@ -155,8 +155,10 @@ struct settings_t {
double tolerance;
std::map<const chromosome*, std::vector<double>> estimation_loci;
std::vector<std::string> ld_parents;
std::map<const chromosome*, LD_matrices> ld_data;
/* FIXME LD data is global to the pedigree (to the ancestors), not local to a population */
std::map<std::string, std::vector<std::string>> ld_parents;
std::map<std::string, std::map<const chromosome*, LD_matrices>> ld_data;
std::map<std::string, std::map<const chromosome*, std::map<double, int>>> parent_count_per_pop_per_chr;
LV_database LV;
......@@ -198,6 +200,7 @@ struct settings_t {
, estimation_loci()
, ld_parents()
, ld_data()
, parent_count_per_pop_per_chr()
, LV()
, linked_pops()
, thread_stacks()
......@@ -252,22 +255,8 @@ struct settings_t {
bool sanity_check() const;
void finalize()
{
/*for (auto& c: map) {*/
/*c.compute_haplo_sizes();*/
/*}*/
if (parallel > 1) {
pool = new ThreadPool(parallel, &thread_stacks);
}
void finalize();
/* Build lists of populations per observed trait */
/*for (const auto& kv_pop: populations) {*/
/*for (const auto& trait: kv_pop.second.observed_traits) {*/
/*pops_by_trait[trait.name].emplace_back(&kv_pop.second);*/
/*}*/
/*}*/
}
template <class F, class... Args>
auto enqueue(int _Sync, F&& f, Args&&... args)
-> std::future<typename std::result_of<F(Args...)>::type>
......@@ -447,5 +436,62 @@ namespace std {
};
}
int
get_n_parents(const context_key& ck, const locus_key& lk);
inline
void
settings_t::finalize()
{
/*for (auto& c: map) {*/
/*c.compute_haplo_sizes();*/
/*}*/
if (parallel > 1) {
pool = new ThreadPool(parallel, &thread_stacks);
}
for (const auto& chr: map) {
estimation_loci[&chr] = compute_steps(chr.condensed.marker_locus, step);
}
for (const auto& pptr: populations) {
auto& parent_count_per_chr = parent_count_per_pop_per_chr[pptr->qtl_generation_name];
auto ldi = ld_data.find(pptr->qtl_generation_name);
if (ldi != ld_data.end()) {
for (const auto& chr: map) {
auto& parent_count = parent_count_per_chr[&chr];
auto& ld = (*ldi).second[&chr];
const auto& loci = estimation_loci[&chr];
for (size_t i = 0; i < loci.size(); ++i) {
if (loci[i] != ld.loci[i]) {
MSG_ERROR("Discrepancy between LD locus (" << ld.loci[i] << ") and estimation locus (" << loci[i] << ") on chromosome " << chr.name, "Ensure the LD data is consistent with the map and step size you provided.");
}
parent_count[ld.loci[i]] = ld.ld[i].cols();
}
}
} else {
context_key ck(new context_key_struc(pptr->gen.get()));
locus_key lk;
size_t n_par = get_n_parents(ck, lk);
for (const auto& chr: map) {
auto& parent_count = parent_count_per_chr[&chr];
const auto& loci = estimation_loci[&chr];
for (size_t i = 0; i < loci.size(); ++i) {
parent_count[loci[i]] = (int) n_par;
}
}
}
}
/* Build lists of populations per observed trait */
/*for (const auto& kv_pop: populations) {*/
/*for (const auto& trait: kv_pop.second.observed_traits) {*/
/*pops_by_trait[trait.name].emplace_back(&kv_pop.second);*/
/*}*/
/*}*/
}
#endif
......@@ -243,6 +243,14 @@ get_stpom_data(const context_key& ck, const locus_key& lk)
}
int
get_n_parents(const context_key& ck, const locus_key& lk)
{
return (int) get_stpom_data(ck, lk)->row_labels.size();
}
MatrixXb
get_contrast_groups(generation_value gen, const locus_key& lk)
{
......
#include "computations/frontends2.h"
#include "computations/frontends4.h"
#include <unordered_set>
#if 0
......@@ -49,7 +49,6 @@ forward(model_manager& mm,
}
return cofactors;
}
#endif
model_manager&
manual_skeleton(model_manager& mm,
......@@ -98,29 +97,25 @@ init_skeleton(model_manager& mm)
return mm;
}
#endif
model_manager&
forward(model_manager& mm)
{
/*mm.output_test = true;*/
/*mm.output_rank = true;*/
/*mm.output_rss = true;*/
/*mm.output_model = true;*/
/*MSG_DEBUG("FORWARD");*/
model_manager::test_result result;
do {
result = mm.find_max_over_all_chromosomes();
while (true) {
const auto& result = mm.search_new_best();
/*MSG_DEBUG(result);*/
if (result.over_threshold) {
mm.add(result);
/*MSG_DEBUG(mm.Mcurrent);*/
mm.remove_test_locus(result.chrom, result.locus);
result.select(mm);
MSG_INFO("Select " << result.chrom->name << ':' << result.locus << " as cofactor");
} else {
break;
}
} while(result.over_threshold);
MSG_INFO("Cofactors: " << mm.keys());
}
MSG_INFO("Cofactors: " << mm.get_selection());
return mm;
}
......@@ -128,39 +123,19 @@ forward(model_manager& mm)
model_manager&
backward(model_manager& mm)
{
model_manager::test_result result;
const chromosome* chr;
locus_key lk;
mm.add_all_loci();
mm.init_loci_by_hand({});
model_block_collection mbcoll = mm.Mcurrent.m_blocks;
do {
result.reset();
result.test_value = 1.e10;
result.over_threshold = true;
for (const auto& mb: mbcoll) {
if (mb.first.empty()) {
continue;
}
std::tie(chr, lk) = *mb.first.selection.begin();
mm.select_chromosome(chr);
mm.loci = {lk->locus};
mm.Mcurrent.remove_block(mb.first);
mm.Mcurrent.compute();
mm.test_along_chromosome(0, mm.max_testpos());
auto tmp_result = mm.find_max();
/*MSG_DEBUG("backward testing " << chr->name << " @" << lk << ' ' << tmp_result);*/
if (tmp_result.test_value < result.test_value) {
result = tmp_result;
}
mm.Mcurrent.add_block(mb.first, mb.second);
mm.select_all_loci();
while (true) {
auto worst_kb = mm.weakest_link();
if (worst_kb.second >= mm.threshold) {
break;
}
if (!result.over_threshold) {
mm.Mcurrent.remove_block(result.block_key);
mbcoll.erase(result.block_key);
MSG_DEBUG("Removed " << result.block_key << " which scored only " << result.test_value << " and now model has " << mm.Mcurrent.m_blocks.size() << " blocks");
const model_block_key& mbk = worst_kb.first.first;
assert(mbk->type == mbk_POP);
for (double d: mbk->loci) {
mm.deselect(mbk->chr, d);
}
} while (!result.over_threshold);
/*MSG_DEBUG("Removed " << result.block_key << " which scored only " << result.test_value << " and now model has " << mm.Mcurrent.m_blocks.size() << " blocks");*/
}
return mm;
}
......@@ -169,6 +144,7 @@ backward(model_manager& mm)
model_manager&
qtl_detect_cim(model_manager& mm)
{
return mm;
}
......@@ -176,218 +152,151 @@ qtl_detect_cim(model_manager& mm)
model_manager&
qtl_detect_cim_minus(model_manager& mm)
{
/*mm.output_test = true;*/
/*mm.output_rank = true;*/
/*mm.output_rss = true;*/
/*mm.output_model = true;*/
value<model> Mbase = mm.Mcurrent;
model_manager::test_result result;
mm.init_loci_by_step(active_settings->step);
model_block_collection detected_qtls;
auto ic = mm.Mcurrent.m_blocks.begin();
detected_qtls[ic->first] = ic->second;
/*double qtl = -1;*/
for (const chromosome& chr: active_settings->map) {
mm.select_chromosome(&chr);
model_block_collection cofactors = mm.Mcurrent.extract_blocks_with_chromosome(&chr);
mm.Mcurrent.compute();
mm.test_along_chromosome(0, mm.max_testpos());
auto result = mm.find_max();
if (result.over_threshold) {
detected_qtls[result.block_key] = result.block;
/*qtl = result.locus;*/
/* Confidence interval */
#if 0
auto LOD = mm.custom_test_along_chromosome(ComputationType::FTestLOD, ComputationResults::Test, Mbase, 0, mm.max_testpos()).ftest_lod;
double maxLOD = LOD.maxCoeff();
double lod_cap = maxLOD - 1.5;
int i;
for (i = 0; i < mm.testpos.size() && mm.testpos[i] < qtl && LOD(i) < lod_cap; ++i);
MSG_DEBUG("LEFT i=" << i);
double left = mm.testpos[i];
for (i = mm.testpos.size() - 1; i >= 0 && mm.testpos[i] > qtl && LOD(i) < lod_cap; --i);
MSG_DEBUG("RIGHT i=" << i);
double right = mm.testpos[i];
mm.qtl_confidence_interval[chr.name][qtl] = {left, right};
MSG_INFO("Confidence interval for " << chr.name << ':' << qtl << " {" << left << ':' << right << '}');
#endif
auto chrom_best = mm.search_new_best_per_chromosome(true);
mm.clear_selection();
for (const auto& cb: chrom_best) {
if (cb.second.over_threshold) {
cb.second.select(mm);
}
mm.Mcurrent.add_blocks(cofactors);
}
mm.Mcurrent.m_blocks = detected_qtls;
mm.Mcurrent.m_computed = false;
mm.Mcurrent.compute();
#if 0
{
/*auto p = mm.pmode;*/
/*mm.pmode = Joint;*/
/*auto cac = mm.custom_test_along_chromosome(ComputationType::Mahalanobis, ComputationResults::Test);*/
/*mm.testpos.clear();*/
/*mm._recompute(mm.testpos);*/
value<model> Mbase;
Mbase = mm.Mcurrent.reduce(mm.chrom_under_study);
Mbase->compute();
model_block_key locus_base_key;
computation_along_chromosome cac;
compute_along_chromosome(cac, ComputationType::Mahalanobis, ComputationResults::Test,
mm.Mcurrent, Mbase,
/*Mcurrent, Mcurrent,*/
locus_base_key,
mm.chrom_under_study,
mm.testpos,
mm.locus_blocks);
MSG_DEBUG("maha <- scan(text='" << cac.mahalanobis.transpose() << "')");
/*mm.pmode = p;*/
}
#endif
#if 0
if (0 && qtl != -1) {
mm.remove(qtl);
mm.Mcurrent.compute();
active_settings->set_title(MESSAGE("Computing confidence interval for QTL " << mm.chrom_under_study->name << ':' << qtl));
auto LOD = mm.custom_test_along_chromosome(ComputationType::FTestLOD, ComputationResults::Test, Mbase, 0, mm.max_testpos()).ftest_lod;
double maxLOD = LOD.maxCoeff();
double lod_cap = maxLOD - 1.5;
int i;
for (i = 0; i < mm.testpos.size() && mm.testpos[i] < qtl && LOD(i) < lod_cap; ++i);
MSG_DEBUG("LEFT i=" << i);
double left = mm.testpos[i];
for (i = mm.testpos.size() - 1; i >= 0 && mm.testpos[i] > qtl && LOD(i) < lod_cap; --i);
MSG_DEBUG("RIGHT i=" << i);
double right = mm.testpos[i];
mm.qtl_confidence_interval[mm.chrom_under_study->name][qtl] = {left, right};
MSG_INFO("Confidence interval for " << mm.chrom_under_study->name << ':' << qtl << " {" << left << ':' << right << '}');
mm.add(mm.chrom_under_study, qtl);
mm.Mcurrent.compute();
}
if (qtl != -1) {
mm.remove(qtl);
mm.Mcurrent.compute();
MSG_DEBUG("LOD <- scan(text='" << mm.custom_test_along_chromosome(ComputationType::FTestLOD, ComputationResults::Test, Mbase, 0, mm.max_testpos()).ftest_lod << "')");
mm.add(mm.chrom_under_study, qtl);
mm.Mcurrent.compute();
mm.Mcurrent.enable_constraints();
mm.Mcurrent.compute();
MSG_DEBUG("cos.maha.ctr <- scan(text='" << mm.study_maha(mm.chrom_under_study, qtl, 0, mm.max_testpos()).transpose() << "')");
mm.Mcurrent.disable_constraints();
mm.Mcurrent.compute();
MSG_DEBUG("cos.maha <- scan(text='" << mm.study_maha(mm.chrom_under_study, qtl, 0, mm.max_testpos()).transpose() << "')");
}
#endif
return mm;
}
struct loop_exception {};
#if 0
std::vector<genome_search_domain>
search_domain(const test_manager& tm, const std::vector<genome_search_domain>& full_dom)
bool
iqtlm_backward_one(model_manager& mm)
{
std::map<const chromosome*, std::set<double>> blacklist;
for (const auto& sel: tm.M0_descr) {
for (const auto& c_l: sel) {
blacklist[c_l.first].insert(c_l.second);
std::vector<std::pair<bool, test_result>> results;
size_t sz = 0;
for (const auto& chr_sel: mm.get_selection()) { sz += chr_sel.second->depth(); }
results.reserve(sz);
bool not_modified = true;
for (const auto& chr_sel: mm.get_selection()) {
for (double d: chr_sel.second) {
results.emplace_back(mm.challenge_qtl(chr_sel.first, d));
not_modified &= results.back().first;
}
}
std::vector<genome_search_domain> dom(1);
for (const auto& cv: full_dom) {
std::vector<double> loc;
loc.reserve(cv.second.size());
const auto& bl = blacklist[cv.first];
for (double l: cv) {
if (bl.find(l) == bl.end()) {
loc.push_back(l);
}
}
if (loc.size()) {
dom.back().emplace_back(cv.first, {});
dom.back().back().loci.swap(loc);
if (not_modified) {
return false;
}
mm.clear_selection();
for (const auto& good_result: results) {
if (good_result.second.over_threshold) {
good_result.second.select(mm);
}
}
return dom;
};
return true;
}
bool
iqtlm_search(const char* prefix, double threshold, test_manager& tm, const std::vector<genome_search_domain>& full_domain)
in_history(std::set<std::map<chromosome_value, locus_key>>& history, const std::map<chromosome_value, locus_key>& selection)
{
test_all_domain tad;
tm.compute(search_domain(tm, full_domain), tad);
auto best = tm.best_result();
if (best && best->second > threshold) {
MSG_DEBUG(prefix << " Found good result at " << best.first << " (test value " << best.second << " > " << threshold << ')');
tm.M0_descr.back().insert(tm.M0_descr.back().end(), best);
std::sort(tm.M0_descr.back().begin(), tm.M0_descr.back().end());
return true;
}
return false;
return !history.insert(selection).second;
}
void
iqtlm_forward(double threshold, test_manager& tm, const std::vector<genome_search_domain>& full_domain)
bool
iqtlm_backward(model_manager& mm, std::set<std::map<chromosome_value, locus_key>>& history)
{
return iqtlm_search("[iQTLm forward]", threshold, tm, full_domain);
while (iqtlm_backward_one(mm)) {
if (in_history(history, mm.get_selection())) {
return false;
}
}
return true;
}
void
iqtlm_backward(double threshold, test_manager& tm, const std::vector<genome_search_domain>& full_domain)
bool
iqtlm_forward(model_manager& mm, std::set<std::map<chromosome_value, locus_key>>& history)
{
model_descriptor full_sel = tm.M0_descr.back();
model_descriptor M0_sub(1);
M0_sub.back().reserve(full_sel.size());
for (const auto& current: full_sel) {
MSG_DEBUG("[iQTLm backward] Challenging " << current);
M0_sub.back().clear();
for (const auto& selected: full_sel) {
if (selected != current) {
M0_sub.back().push_back(selected);
while (true) {
auto results = mm.search_new_best_per_chromosome();
bool modified = false;
for (const auto& result: results) {
modified |= result.second.over_threshold;
if (result.second.over_threshold) {
result.second.select(mm);
}
}
tm.M0_descr.swap(M0_sub);
iqtlm_search("[iQTLm backward]", threshold, tm, full_domain);
if (!modified) {
return true;
}
if (in_history(history, mm.get_selection())) {
return false;
}
}
return full_sel == tm.M0_descr;
}
model_manager&
qtl_detect_iqtlm1(model_manager& mm)
bool
iqtlm_forward_gw(model_manager& mm, std::set<std::map<chromosome_value, locus_key>>& history)
{
bool go_on = true;
test_manager tm(mm.trait_name, 1, 0, false);
std::vector<genome_search_domain> full_dom(1);
for (const auto& chr_vlvec: mm.all_loci) {
full_dom.back().emplace_back(chr_vlvec.first, *chr_vlvec.second);
mm.select_chromosome(chr_vlvec.first);
mm.set_selection(locus_key{});
while (true) {
auto result = mm.search_new_best();
bool modified = result.over_threshold;
if (result.over_threshold) {
result.select(mm);
}
if (!modified) {
return true;
}
if (in_history(history, mm.get_selection())) {
return false;
}
}
}
tm.M0_descr = mm.get_model_descriptor(true);
std::set<model_descriptor> history;
history.insert(tm.M0_descr);
while (true) {
if (!iqtlm_backward(mm.threshold, tm, full_dom)) {
MSG_DEBUG("[iQTLm] Selection is stable. Exiting.");
model_manager&
qtl_detect_iqtlm(model_manager& mm)
{