Commit 85ba6695 authored by Damien Leroux's avatar Damien Leroux
Browse files

Commit before new experimental branch (new model/frontend implementation).

parent e93089bb
......@@ -4,7 +4,7 @@ INSTALL_DIR=/usr/local
ROOT_DIR:=$(shell dirname $(realpath $(lastword $(MAKEFILE_LIST))))
GCC_VERSION=-4.9
CXX=g++$(GCC_VERSION) --std=gnu++0x -Wno-unused-local-typedefs -fPIC
CXX=g++$(GCC_VERSION) --std=gnu++0x -Wno-unused-local-typedefs -fPIC -pipe
#CXX=clang++ -std=c++11 -stdlib=libc++
INC=-I${ROOT_DIR}/include -I${ROOT_DIR}/include/input -I/home/daleroux/include/eigen3 -I/home/daleroux/include -I${ROOT_DIR}/include/bayes
......
......@@ -8,11 +8,12 @@
#include "commandline.h"
/*#include "bn.h"*/
#include "factor_var3.h"
#include "file.h"
enum JobDispatchScheme { JDS_None, JDS_MT, JDS_SGE, JDS_SSH };
void read_format(std::map<std::string, marker_observation_spec>& settings, const std::string& filename, std::istream& is);
void read_format(std::map<std::string, marker_observation_spec>& settings, const std::string& filename, file& is);
#define SPELL_CASTER ((size_t) -1)
......
......@@ -15,6 +15,81 @@
using boost::math::complement;
typedef std::pair<const chromosome*, double> selected_locus;
inline bool operator < (const selected_locus& sl1, const selected_locus& sl2) { return sl1.first < sl2.first || (sl1.first == sl2.first && sl1.second < sl2.second); }
inline std::ostream& operator << (std::ostream& os, const selected_locus& sl) { return os << sl.first->name << ':' << sl.second; }
struct chromosome_search_domain {
const chromosome* chrom;
std::vector<double> loci;
chromosome_search_domain(const chromosome* c, const std::vector<double>& l) : chrom(c), loci(l) {}
struct const_iterator {
const chromosome_search_domain* this_csd;
std::vector<double>::const_iterator i;
const_iterator() : this_csd(NULL), i(__()) {}
const_iterator(const chromosome_search_domain* t, const std::vector<double>::const_iterator& i_) : this_csd(t), i(i_) {}
bool operator == (const const_iterator& other) const { return i == other.i; }
bool operator != (const const_iterator& other) const { return i != other.i; }
/*bool operator < (const const_iterator& other) const { return i < other.i; }*/
/*size_t operator - (const const_iterator& other) const { return i - other.i; }*/
std::pair<const chromosome*, double> operator * () const { return {this_csd->chrom, *i}; }
const_iterator& operator ++ () { ++i; return *this; }
const_iterator& operator -- () { --i; return *this; }
static std::vector<double>::const_iterator __() { static std::vector<double> _; return _.end(); }
};
const_iterator begin() const { return {this, loci.begin()}; }
const_iterator end() const { return {this, loci.end()}; }
const_iterator cbegin() const { return {this, loci.begin()}; }
const_iterator cend() const { return {this, loci.end()}; }
};
typedef std::vector<chromosome_search_domain> genome_search_domain;
struct gsd_iterator {
std::vector<chromosome_search_domain>::const_iterator csd_i, csd_j;
chromosome_search_domain::const_iterator i, j;
bool operator == (const gsd_iterator& other) const { return csd_i == other.csd_i && i == other.i; }
bool operator != (const gsd_iterator& other) const { return csd_i != other.csd_i || i != other.i; }
gsd_iterator&
operator ++ ()
{
++i;
if (i == j) {
MSG_DEBUG("at end of chromosome!");
++csd_i;
if (csd_i != csd_j) {
i = csd_i->begin();
j = csd_i->end();
} else {
i = {};
j = {};
}
}
return *this;
}
std::pair<const chromosome*, double> operator * () const { return *i; }
};
namespace std {
inline gsd_iterator begin(const genome_search_domain& gsd) { return {gsd.begin(), gsd.end(), gsd.begin()->begin(), gsd.begin()->end()}; }
inline gsd_iterator end(const genome_search_domain& gsd) { return {gsd.end(), gsd.end(), {}, {}}; }
}
typedef std::vector<selected_locus> locus_set;
typedef std::vector<locus_set> model_descriptor;
std::pair<bool, double>
detect_strongest_qtl(chromosome_value chr, const locus_key& lk,
const model& M0, const std::vector<double> pos);
......@@ -431,8 +506,8 @@ struct model_manager {
{
Mcurrent.add_block({}, cross_indicator(trait));
Mcurrent.compute();
MSG_DEBUG("INITIAL MODEL SIZE: " << MATRIX_SIZE(Mcurrent.X()));
MSG_QUEUE_FLUSH();
/*MSG_DEBUG("INITIAL MODEL SIZE: " << MATRIX_SIZE(Mcurrent.X()));*/
/*MSG_QUEUE_FLUSH();*/
/*for (const auto& kpop: active_settings->populations) {*/
for (const auto& kpop: all_pops) {
context_key ck(new context_key_struc(*kpop,
......@@ -448,7 +523,8 @@ struct model_manager {
model_manager(const model_manager& mm)
: all_pops(mm.all_pops)
: trait_name(mm.trait_name)
, all_pops(mm.all_pops)
, pop_blocks(mm.pop_blocks)
, n_par(mm.n_par)
, Mcurrent(mm.Mcurrent)
......@@ -563,14 +639,14 @@ struct model_manager {
void
set_joint_mode()
{
MSG_DEBUG("[Model Manager] JOINT MODE");
/*MSG_DEBUG("[Model Manager] JOINT MODE");*/
pmode = Joint;
}
void
set_product_mode()
{
MSG_DEBUG("[Model Manager] PRODUCT MODE");
/*MSG_DEBUG("[Model Manager] PRODUCT MODE");*/
pmode = Single;
}
......@@ -924,6 +1000,8 @@ struct model_manager {
double
get_threshold()
{
return threshold;
#if 0
if (pmode == Joint) {
/*model Mperm(trait_permutations(trait_name, active_settings->n_permutations), Mcurrent);*/
/*Mperm.compute();*/
......@@ -939,6 +1017,7 @@ struct model_manager {
} else {
return threshold;
}
#endif
}
void set_selection(const model_block_collection& mbk) {
......@@ -954,6 +1033,27 @@ struct model_manager {
Mcurrent.compute();
}
model_descriptor
get_model_descriptor(bool joined) const
{
model_descriptor ret;
if (joined) {
ret.emplace_back();
for (const auto& chr_lk: locus_base_key.selection) {
for (double l: chr_lk.second) {
ret.back().emplace_back(chr_lk.first, l);
}
}
} else {
for (const auto& chr_lk: locus_base_key.selection) {
for (double l: chr_lk.second) {
ret.emplace_back(std::vector<selected_locus>{{chr_lk.first, l}});
}
}
}
return ret;
}
void set_selection(const locus_key& lk)
{
/*MSG_DEBUG("POUET " << pmode);*/
......@@ -1047,7 +1147,7 @@ struct model_manager {
ret.add_sub_blocks(chr_sel.first, chr_sel.second);
}
}
MSG_DEBUG("New keys: " << ret.Mcurrent.keys());
/*MSG_DEBUG("New keys: " << ret.Mcurrent.keys());*/
ret.Mcurrent.compute();
return ret;
}
......@@ -1065,6 +1165,24 @@ struct model_manager {
}
#endif
/* create the reduction matrix to remove a locus from a model block. work across all chromosomes in selection. */
MatrixXd reduce(int n_ancestors, const model_block_key& mbk, const chromosome* chr, double locus)
{
MatrixXd ret = MatrixXd::Ones(1, 1);
/* TODO: use LD data if available */
for (const auto& chr_lk: mbk.selection) {
MatrixXd tmp = kroneckerProduct(
ret,
chr_lk.second->reduce([=] (double) -> int { return n_ancestors; },
chr_lk.first == chr ? locus : -1));
ret.swap(tmp);
}
return ret;
}
model_block_collection remove(double loc)
{
model_block_collection
......@@ -1087,7 +1205,8 @@ struct model_manager {
/* reduce each population block */
const qtl_pop_type* pop = **pop_it;
context_key ck(new context_key_struc(pop, chrom_under_study, std::vector<double>()));
MatrixXd red = lk->reduce(*npar_it, loc);
MatrixXd red = lk->reduce([&](double) -> int { return *npar_it; }, loc);
++npar_it;
/*MSG_DEBUG(MATRIX_SIZE(vmat->data));*/
/*MSG_DEBUG(MATRIX_SIZE(red));*/
/*MSG_DEBUG("vmat BEFORE red" << std::endl << vmat);*/
......@@ -1173,7 +1292,17 @@ private:
while (i != j && (*i) <= maxpos) { ++i; }
maxi = i;
/*testpos = *loci;*/
testpos.assign(mini, maxi);
testpos.clear();
testpos.reserve(maxi - mini);
if (lk) {
for (; mini != maxi; ++mini) {
if (!lk->has(*mini)) {
testpos.push_back(*mini);
}
}
} else {
testpos.assign(mini, maxi);
}
/*MSG_DEBUG(__LINE__ << ":chromosome " << chrom_under_study->name << std::endl*/
/*<< " loci " << (*loci) << std::endl*/
/*<< " testpos " << testpos); MSG_QUEUE_FLUSH();*/
......@@ -1197,7 +1326,7 @@ void analysis_report::attach_model_manager(model_manager& mm)
ensure_directories_exist(full_path);
report_trait(trait_name, mm.Mcurrent.Y());
report_file.close();
report_file.open(MESSAGE(full_path << "/report.txt"), std::fstream::out);
report_file.open(MESSAGE(full_path << '/' << trait_name << "_report.txt"), std::fstream::out);
}
inline
......@@ -1286,6 +1415,17 @@ filter_columns(const MatrixXd& conrow, const model_block_collection& blocks)
}
inline
file& section_header(file& f, const std::string& str)
{
if (f.tellp() != 0) { f << std::endl; }
f << "=================================================================================================================" << std::endl
<< ' ' << str << std::endl
<< "-----------------------------------------------------------------------------------------------------------------" << std::endl << std::endl;
return f;
}
inline
file& header(file& f, const std::string& str)
{
......@@ -1302,6 +1442,8 @@ file& header(file& f, const std::string& str)
inline
void analysis_report::report_final_model(model_manager& mm)
{
static std::string emptystr;
mm.Mcurrent.output_X_to_file(full_path);
mm.Mcurrent.output_XtX_inv_to_file(full_path);
model::xtx_printable xtx(mm.Mcurrent, true);
......@@ -1312,31 +1454,50 @@ void analysis_report::report_final_model(model_manager& mm)
/*size_t dof = mm.Mcurrent.rank();*/
/*size_t n = mm.Mcurrent.Y().rows();*/
auto qtls = mm.QTLs();
section_header(report_file, MESSAGE("Report for trait " << mm.trait_name));
report_qtls(qtls);
//*
report_file
<< "Final model" << std::endl << mm.Mcurrent << std::endl
<< "XtX^-1" << std::endl << xtx << std::endl
<< "RSS " << mm.Mcurrent.rss() << std::endl
<< std::endl;
/*header(report_file, "RSS ") << mm.Mcurrent.rss() << std::endl;*/
//*/
/* TODO ajouter R2 global */
if (mm.Mcurrent.n_obs() <= mm.Mcurrent.dof() - 1) {
report_file << "* There are not enough observations to compute a variance/covariance matrix." << std::endl;
report_file << "* There are not enough observations to compute a variance/covariance matrix. Results not displayed." << std::endl;
} else {
static std::string coefstr;
for (int i = 0; i < rss.size(); ++i) {
report_file
<< "##########################" << std::endl
<< "## Variable #" << (i + 1) << std::endl;
section_header(report_file, MESSAGE(mm.trait_name << '#' << (i + 1)));
{
MatrixXd R2(1, mm.Mcurrent.m_blocks.size() - 1);
auto bi = mm.Mcurrent.m_blocks.begin(), bj = mm.Mcurrent.m_blocks.end();
int c = 0;
++bi; // skip Cross Indicators block
for (; bi != bj; ++bi, ++c) {
auto M0 = mm.Mcurrent;
M0.remove_block(bi->first);
M0.compute();
double r1 = mm.Mcurrent.rss()(i);
double r0 = M0.rss()(i);
R2(0, c) = (r0 - r1) / r0;
}
model_print::matrix_with_sections<std::string, void, model_block_key, std::vector<char>> mws(R2);
static std::vector<std::vector<char>> empty_label = {{' '}};
bi = mm.Mcurrent.m_blocks.begin();
for (++bi; bi != bj; ++bi) { mws.add_column_section(bi->first, empty_label); }
/*mm.Mcurrent.set_column_sections(mws, true);*/
mws.add_row_section(emptystr, 1);
header(report_file, "R2") << mws << std::endl;
}
MatrixXd coef = mm.Mcurrent.coefficients().col(i).transpose();
MSG_DEBUG(MATRIX_SIZE(coef));
/*MSG_DEBUG(MATRIX_SIZE(coef));*/
{
model_print::matrix_with_sections<std::string, void, model_block_key, std::vector<char>> mws(coef);
for (const auto& kv: mm.Mcurrent.m_blocks) {
mws.add_column_section(kv.first, mm.Mcurrent.labels_to_ancestor_names(*kv.second));
}
mws.add_row_section(coefstr, 1);
mm.Mcurrent.set_column_sections(mws);
/*for (const auto& kv: mm.Mcurrent.m_blocks) {*/
/*mws.add_column_section(kv.first, mm.Mcurrent.labels_to_ancestor_names(*kv.second));*/
/*}*/
mws.add_row_section(emptystr, 1);
header(report_file, "Coefficients") << mws << std::endl;
}
/*report_file*/
......@@ -1371,11 +1532,19 @@ void analysis_report::report_final_model(model_manager& mm)
model_print::matrix_with_sections<void, std::vector<char>, model_block_key, std::vector<char>, Eigen::Matrix<std::string, Eigen::Dynamic, Eigen::Dynamic>> ctrsig(pvmat);
model_print::matrix_with_sections<void, std::vector<char>, model_block_key, std::vector<char>, Eigen::Matrix<std::string, Eigen::Dynamic, Eigen::Dynamic>> ctr(ctrmat);
for (const auto& kl: blocks) {
auto names = mm.Mcurrent.labels_to_ancestor_names(kl.second);
for (auto& n: names) { n.push_back(' '); n.push_back(' '); n.push_back(' '); n.push_back(' '); }
std::vector<std::vector<char>> names;
if (kl.first.empty()) {
for (const auto& kp: mm.Mcurrent.m_all_pops) {
const auto& qgn = (*kp)->qtl_generation_name;
names.emplace_back(qgn.begin(), qgn.end());
}
} else {
names = mm.Mcurrent.labels_to_ancestor_names(kl.second);
}
ctr.add_row_section(names);
ctr.add_column_section(kl.first, names);
ctrsig.add_row_section(names);
for (auto& n: names) { n.push_back(' '); n.push_back(' '); n.push_back(' '); n.push_back(' '); }
ctr.add_column_section(kl.first, names);
ctrsig.add_column_section(kl.first, names);
}
header(report_file, "Contrasts") << ctr << std::endl << significance_legend() << std::endl;
......@@ -1432,6 +1601,8 @@ void analysis_report::report_final_model(model_manager& mm)
#endif
}
}
section_header(report_file, "Final model") << std::endl << mm.Mcurrent << std::endl;
section_header(report_file, "XtX^-1") << std::endl << xtx << std::endl;
}
inline
......@@ -1474,75 +1645,6 @@ qtl_detect_iqtlm_gw(model_manager& mm);
typedef std::pair<const chromosome*, double> selected_locus;
struct chromosome_search_domain {
const chromosome* chrom;
std::vector<double> loci;
chromosome_search_domain(const chromosome* c, const std::vector<double>& l) : chrom(c), loci(l) {}
struct const_iterator {
const chromosome_search_domain* this_csd;
std::vector<double>::const_iterator i;
const_iterator() : this_csd(NULL), i(__()) {}
const_iterator(const chromosome_search_domain* t, const std::vector<double>::const_iterator& i_) : this_csd(t), i(i_) {}
bool operator == (const const_iterator& other) const { return i == other.i; }
bool operator != (const const_iterator& other) const { return i != other.i; }
/*bool operator < (const const_iterator& other) const { return i < other.i; }*/
/*size_t operator - (const const_iterator& other) const { return i - other.i; }*/
std::pair<const chromosome*, double> operator * () const { return {this_csd->chrom, *i}; }
const_iterator& operator ++ () { ++i; return *this; }
const_iterator& operator -- () { --i; return *this; }
static std::vector<double>::const_iterator __() { static std::vector<double> _; return _.end(); }
};
const_iterator begin() const { return {this, loci.begin()}; }
const_iterator end() const { return {this, loci.end()}; }
const_iterator cbegin() const { return {this, loci.begin()}; }
const_iterator cend() const { return {this, loci.end()}; }
};
typedef std::vector<chromosome_search_domain> genome_search_domain;
struct gsd_iterator {
std::vector<chromosome_search_domain>::const_iterator csd_i, csd_j;
chromosome_search_domain::const_iterator i, j;
bool operator == (const gsd_iterator& other) const { return csd_i == other.csd_i && i == other.i; }
bool operator != (const gsd_iterator& other) const { return csd_i != other.csd_i || i != other.i; }
gsd_iterator&
operator ++ ()
{
++i;
if (i == j) {
MSG_DEBUG("at end of chromosome!");
++csd_i;
if (csd_i != csd_j) {
i = csd_i->begin();
j = csd_i->end();
} else {
i = {};
j = {};
}
}
return *this;
}
std::pair<const chromosome*, double> operator * () const { return *i; }
};
namespace std {
inline gsd_iterator begin(const genome_search_domain& gsd) { return {gsd.begin(), gsd.end(), gsd.begin()->begin(), gsd.begin()->end()}; }
inline gsd_iterator end(const genome_search_domain& gsd) { return {gsd.end(), gsd.end(), {}, {}}; }
}
typedef std::vector<selected_locus> locus_set;
typedef std::vector<locus_set> model_descriptor;
struct test_manager;
......@@ -1763,7 +1865,7 @@ struct test_manager {
/* test M1 = M0 + (expansion from search domain) vs M0 */
void
compute(const std::vector<genome_search_domain>& domain, test_domain_search_method& tds)
compute(const std::vector<genome_search_domain>& domain, test_domain_search_method& tds, bool link_to_search=false)
{
static MatrixXd tmp(1, 1);
locus_set point;
......@@ -1797,6 +1899,20 @@ struct test_manager {
};
return tmp(0, 0);
}
const std::map<locus_set, double>::value_type*
best_result() const
{
const std::map<locus_set, double>::value_type* ret = NULL;
double best = -std::numeric_limits<double>::infinity();
for (const auto& kv: results) {
if (kv.second > best) {
ret = &kv;
best = kv.second;
}
}
return ret;
}
};
......
This diff is collapsed.
......@@ -47,9 +47,9 @@ struct block_builder {
iterator&
operator += (const MATRIX& block)
{
if (ei->n_rows != block.rows() || ei->columns.size() != block.cols()) {
if (ei->n_rows != block.rows() || ei->columns.size() != ((size_t) block.cols())) {
MSG_ERROR("Invalid part size in model block. Have " << MATRIX_SIZE(block) << " but expected (" << ei->n_rows << ", " << ei->columns.size() << ')', "Fix the code");
exit(-1);
abort();
}
/*MSG_DEBUG("output " << output);*/
/*MSG_DEBUG(output->innerSize() << ',' << output->outerSize());*/
......@@ -179,7 +179,7 @@ init_connected_block_builder(
int sz = index.size();
index.insert({l, sz});
}
#if 1
#if 0
MSG_DEBUG("index size " << index.size());
std::stringstream s;
s << '{';
......@@ -394,13 +394,13 @@ compute_along_chromosome(computation_along_chromosome& ret,
value<ComputationResults> vcr = cr;
value<computation_along_chromosome*> pcac = &ret;
DUMP_FILE_LINE();
/*DUMP_FILE_LINE();*/
collection<int> joiner;
joiner.reserve(popl.size());
/* FIXME: create correct keys? FIXED */
for (auto i: range<int>(0, popl.size(), 1)) {
DUMP_FILE_LINE();
/*DUMP_FILE_LINE();*/
value<model_block_key> vmbk = model_block_key(base_key);
*vmbk += std::make_pair(chr, loci[*i]);
/*MSG_DEBUG("new block key " << vmbk);*/
......@@ -414,7 +414,79 @@ compute_along_chromosome(computation_along_chromosome& ret,
}
for (auto& k: joiner) {
DUMP_FILE_LINE();
/*DUMP_FILE_LINE();*/
(void) *k; /* join them all */
}
}
inline
void
init_computation(computation_along_chromosome& ret, ComputationType ct, ComputationResults cr, size_t n_steps, size_t Y_rows, size_t Y_cols)
{
if (cr & RSS) {
ret.rss.resize(Y_cols, n_steps);
}
if (cr & Residuals) {
ret.residuals.resize(Y_rows, Y_cols * n_steps);
}
if (cr & Coefficients) {
ret.residuals.resize(Y_rows, Y_cols * n_steps);
}
if (cr & Rank) {
ret.rank.resize(n_steps);
}
if (ct & FTest) {
ret.ftest_pvalue.resize(Y_cols, n_steps);
}
if (ct & FTestLOD) {
ret.ftest_lod.resize(Y_cols, n_steps);
}
if (ct & Chi2) {
ret.chi2.resize(n_steps, 1);
}
if (ct & Mahalanobis) {
ret.mahalanobis.resize(n_steps, 1);
}
if (ct & R2) {
ret.r2.resize(Y_cols, n_steps);
}
}
template <CachingPolicy _Policy = CachingPolicy::Oneshot>
void
compute_along_interval(int i0, computation_along_chromosome& ret,
value<ComputationType> vct, value<ComputationResults> vcr,
const value<model>& Mcurrent, const value<model>& M0,
const model_block_key& base_key,
chromosome_value chr,
const std::vector<double>& loci,
const collection<parental_origin_per_locus_type>& popl)
{
value<computation_along_chromosome*> pcac = &ret;
/*DUMP_FILE_LINE();*/
collection<int> joiner;
joiner.reserve(popl.size());
/* FIXME: create correct keys? FIXED */
for (auto i: range<int>(i0, i0 + popl.size(), 1)) {
/*DUMP_FILE_LINE();*/
value<model_block_key> vmbk = model_block_key(base_key);
*vmbk += std::make_pair(chr, loci[*i]);
/*MSG_DEBUG("new block key " << vmbk);*/
joiner.push_back(make_value<_Policy>(compute_one,
i, pcac, /* flower */