Commit 6d4e2399 authored by Damien Leroux's avatar Damien Leroux
Browse files

Finished implementing haplotype "compression" (multiple markers on map).

parent 7d3a22fb
......@@ -26,7 +26,7 @@ md5_digest& operator << (md5_digest& md5, const allele_pair& ap)
static inline
md5_digest& operator << (md5_digest& md5, const chromosome& chr)
{
return md5 << chr.name << chr.marker_name.size();
return md5 << chr.name << chr.raw.marker_name.size();
}
static inline
......
......@@ -3,32 +3,40 @@
#include <vector>
struct chromosome {
std::string name;
struct marker_name_and_position {
std::vector<std::string> marker_name;
std::vector<double> marker_locus;
};
struct chromosome {
std::string name;
marker_name_and_position raw;
std::vector<int> haplo_sizes;
marker_name_and_position condensed;
chromosome() : name(), marker_name(), marker_locus(), haplo_sizes() {}
chromosome() : name(), raw(), haplo_sizes(), condensed() {}
chromosome(const chromosome& c)
: name(c.name), marker_name(c.marker_name)
, marker_locus(c.marker_locus)
: name(c.name), raw(c.raw)
, haplo_sizes(c.haplo_sizes)
, condensed(c.condensed)
{}
chromosome(const std::string& n,
const std::vector<std::string>& mn,
const std::vector<double>& ml)
: name(n), marker_name(mn), marker_locus(ml), haplo_sizes()
{ compute_haplo_sizes(); }
: name(n), raw{mn, ml}, haplo_sizes(), condensed()
{
compute_haplo_sizes();
}
void compute_haplo_sizes()
{
double prev = -1.;
int hs = 0;
haplo_sizes.reserve(marker_locus.size());
for (double l: marker_locus) {
haplo_sizes.reserve(raw.marker_locus.size());
for (double l: raw.marker_locus) {
++hs;
if (l != prev) {
haplo_sizes.push_back(hs);
......@@ -39,14 +47,24 @@ struct chromosome {
if (hs) {
haplo_sizes.push_back(hs);
}
/* now compute condensed */
condensed.marker_name.clear();
condensed.marker_locus.clear();
condensed.marker_name.reserve(haplo_sizes.size());
condensed.marker_locus.reserve(haplo_sizes.size());
for (auto h: *this) {
condensed.marker_name.push_back(raw.marker_name[h.first]);
condensed.marker_locus.push_back(raw.marker_locus[h.first]);
}
}
chromosome& operator = (const chromosome& c)
{
name = c.name;
marker_name = c.marker_name;
marker_locus = c.marker_locus;
raw = c.raw;
haplo_sizes = c.haplo_sizes;
condensed = c.condensed;
return *this;
}
......@@ -58,7 +76,7 @@ struct chromosome {
haplotype_iterator(const chromosome& c, bool at_end)
: hs_i(at_end ? c.haplo_sizes.end() : c.haplo_sizes.begin())
, hs_j(c.haplo_sizes.end())
, m_locus_index(at_end ? c.marker_locus.size() : 0)
, m_locus_index(at_end ? c.raw.marker_locus.size() : 0)
{}
value_type operator * () const
......@@ -93,9 +111,9 @@ std::ostream&
operator << (std::ostream& os, const chromosome& c)
{
os << "<chromosome:" << c.name;
auto nbeg = c.marker_name.begin();
auto nend = c.marker_name.end();
auto lbeg = c.marker_locus.begin();
auto nbeg = c.condensed.marker_name.begin();
auto nend = c.condensed.marker_name.end();
auto lbeg = c.condensed.marker_locus.begin();
for (; nbeg != nend; ++nbeg, ++lbeg) {
os << ' ' << (*nbeg) << ':' << (*lbeg);
}
......
......@@ -258,8 +258,9 @@ struct qtl_chromosome {
void copy_loci(std::vector<double>& loci) const
{
loci.clear();
loci.resize(chr->marker_locus.size() + qtl.size());
auto end = std::set_union(chr->marker_locus.begin(), chr->marker_locus.end(), qtl.begin(), qtl.end(), loci.begin());
loci.resize(chr->condensed.marker_locus.size() + qtl.size());
auto end = std::set_union(chr->condensed.marker_locus.begin(), chr->condensed.marker_locus.end(),
qtl.begin(), qtl.end(), loci.begin());
loci.resize(end - loci.begin());
}
......@@ -316,8 +317,8 @@ struct qtl_chromosome {
/*MSG_DEBUG(MATRIX_SIZE(output) << std::endl << output.transpose());*/
int obs_col = 0;
int output_col = 0;
auto li = chr->chr->marker_locus.begin();
auto lj = chr->chr->marker_locus.end();
auto li = chr->chr->condensed.marker_locus.begin();
auto lj = chr->chr->condensed.marker_locus.end();
auto qi = chr->qtl.begin();
auto qj = chr->qtl.end();
auto qsi = qtl_state.begin();
......
......@@ -513,15 +513,15 @@ struct generation_rs {
ArrayXb obs = mgo.obs(chr, g, i).array();
ArrayXb mask = compute_mask(chr, mgo, i).array();
MatrixXb ret = (obs * mask).matrix();
MSG_DEBUG("=============================================================");
MSG_DEBUG(g->name << '#' << i);
MSG_DEBUG(MATRIX_SIZE(obs));
dump_binmat(std::cout, g->main_process().column_labels, obs);
MSG_DEBUG(MATRIX_SIZE(mask));
dump_binmat(std::cout, g->main_process().column_labels, mask);
MSG_DEBUG(MATRIX_SIZE(ret));
dump_binmat(std::cout, g->main_process().column_labels, ret);
MSG_DEBUG("=============================================================");
/*MSG_DEBUG("=============================================================");*/
/*MSG_DEBUG(g->name << '#' << i);*/
/*MSG_DEBUG(MATRIX_SIZE(obs));*/
/*dump_binmat(std::cout, g->main_process().column_labels, obs);*/
/*MSG_DEBUG(MATRIX_SIZE(mask));*/
/*dump_binmat(std::cout, g->main_process().column_labels, mask);*/
/*MSG_DEBUG(MATRIX_SIZE(ret));*/
/*dump_binmat(std::cout, g->main_process().column_labels, ret);*/
/*MSG_DEBUG("=============================================================");*/
return ret;
}
generation_design_base* clone(const generation_rs* my_gen) const
......@@ -696,10 +696,10 @@ struct generation_rs {
auto possibles = parent->design->compute_possibles(chr, mgo, mgo.get_mother(g, i));
auto kp = kroneckerProduct(possibles, MatrixXb::Ones(2, 1));
/*MSG_DEBUG("g->lumper : " << g->lumper.innerSize() << 'x' << g->lumper.outerSize());*/
MSG_DEBUG(MATRIX_SIZE(possibles));
MSG_DEBUG(possibles);
MSG_DEBUG(MATRIX_SIZE(kp));
MSG_DEBUG(kp);
/*MSG_DEBUG(MATRIX_SIZE(possibles));*/
/*MSG_DEBUG(possibles);*/
/*MSG_DEBUG(MATRIX_SIZE(kp));*/
/*MSG_DEBUG(kp);*/
/*MSG_DEBUG("parent->design->compute_possibles(mgo)" << std::endl << parent->design->compute_possibles(chr, mgo, mgo.get_mother(g, i)));*/
MatrixXb ret = g->lumper * kp;
/*MSG_DEBUG(MATRIX_SIZE(g->lumper));*/
......@@ -738,7 +738,7 @@ struct generation_rs {
MatrixXb ret = g->lumper * kp;
/*std::cout << "RIL MASK " << g->name << "#" << i << std::endl;*/
/*dump_binmat(std::cout, g->main_process().column_labels, ret);*/
check_LV(g, i, ret);
/*check_LV(g, i, ret);*/
return ret;
}
generation_design_base* _clone(const generation_rs* my_gen) const { return new generation_design_ril(my_gen, parent); }
......@@ -1617,13 +1617,26 @@ MatrixXb& multi_generation_observations::operator [] (const impl::generation_rs*
inline
multi_generation_observations::multi_generation_observations(const population* p, const chromosome* chr, const impl::generation_rs* g, int i)
/*: std::map<const impl::generation_rs*, MatrixXb>(), n_mark(nm)*/
: n_mark(chr->marker_locus.size()), LV(), pop(p)
: n_mark(chr->condensed.marker_locus.size()), LV(), pop(p)
{
/*g->design->init_mgo(*this);*/
LV = g->design->compute_possibles(chr, *this, i);
MSG_DEBUG("=====================================================================================");
MSG_DEBUG("LV for " << g->name << "#" << i << std::endl << LV);
MSG_DEBUG("=====================================================================================");
MatrixXb tmp = g->design->compute_possibles(chr, *this, i);
/* Here thy redundant LV shalt be shrunkened */
LV = MatrixXb::Ones(tmp.innerSize(), n_mark);
int l = 0;
for (auto h: *chr) {
for (size_t i = h.first; i < h.second; ++i) {
LV.array().col(l) *= tmp.array().col(i);
}
++l;
}
/*MSG_DEBUG("=====================================================================================");*/
/*MSG_DEBUG("LV for " << g->name << "#" << i << std::endl << LV);*/
/*MSG_DEBUG("=====================================================================================");*/
/*MSG_DEBUG("::MGO-LV::" << std::endl << LV);*/
}
......@@ -1634,9 +1647,9 @@ multi_generation_observations::obs(const chromosome* chr, const impl::generation
{
MatrixXb ret;
pop->init_observations(ret, chr, g, i);
MSG_DEBUG("OBS for " << g->name << '#' << i);
MSG_DEBUG(ret);
check_LV(g, i, ret);
/*MSG_DEBUG("OBS for " << g->name << '#' << i);*/
/*MSG_DEBUG(ret);*/
/*check_LV(g, i, ret);*/
return ret;
}
......@@ -1662,7 +1675,7 @@ bool population::is_observed(const impl::generation_rs* g, int i) const
inline
std::vector<char> population::get_observations(const chromosome* chr, const impl::generation_rs* g, int i) const
{
return get_observed_mark(g->name).observations.get_obs(chr->marker_name.begin(), chr->marker_name.end(), i);
return get_observed_mark(g->name).observations.get_obs(chr->raw.marker_name.begin(), chr->raw.marker_name.end(), i);
}
inline
......@@ -1672,13 +1685,13 @@ void population::init_observations(
{
const population_marker_observation& om = get_observed_mark(g->name);
if (i == -1 || !is_observed(g, i)) {
ret = MatrixXb::Ones(g->main_process().innerSize(), chr->marker_name.size());
ret = MatrixXb::Ones(g->main_process().innerSize(), chr->raw.marker_name.size());
} else {
std::vector<char> obs = get_observations(chr, g, i);
if (obs.size()) {
ret = om.raw_observations(obs);
} else {
ret = MatrixXb::Ones(g->main_process().innerSize(), chr->marker_name.size());
ret = MatrixXb::Ones(g->main_process().innerSize(), chr->raw.marker_name.size());
}
}
#if 0
......
......@@ -283,11 +283,20 @@ struct context_key_struc {
_init_locus_indices();
}
context_key_struc(const population* p, const generation_rs* g, const chromosome* c)
: pop(p)
, gen(g)
, chr(c)
, loci(compute_steps(chr->condensed.marker_locus, active_settings->step))
{
_init_locus_indices();
}
context_key_struc(const population* p, const chromosome* c)
: pop(p)
, gen(active_settings->design->generation[pop->qtl_generation_name])
, chr(c)
, loci(compute_steps(chr->marker_locus, active_settings->step))
, loci(compute_steps(chr->condensed.marker_locus, active_settings->step))
{
_init_locus_indices();
}
......
......@@ -109,12 +109,13 @@ all_observed_generation_names(population_value pop)
}
/* DEPRECAITIDE */
std::vector<char>
marker_observations(population_value pop, chromosome_value chr, generation_value gen, int ind)
{
return pop->get_observed_mark(gen->name)
.observations
.get_obs(chr->marker_name.begin(), chr->marker_name.end(), ind);
.get_obs(chr->raw.marker_name.begin(), chr->raw.marker_name.end(), ind);
}
......@@ -130,7 +131,7 @@ test_loci(chromosome_value chr)
value<std::vector<double>>
test_loci(chromosome_value chr)
{
return compute_steps(chr->marker_locus, active_settings->step);
return compute_steps(chr->condensed.marker_locus, active_settings->step);
}
......
......@@ -34,9 +34,9 @@ test_probapop(const chromosome& chr, const marker_data& md, double step)
std::ofstream ofgen("/tmp/probapop.gen");
std::ofstream ofphen("/tmp/probapop.phen");
ofmap << "*chr1 " << chr.marker_locus.size() << ' ' << chr.marker_name[0];
for (size_t i = 1; i < chr.marker_locus.size(); ++i) {
ofmap << ' ' << (chr.marker_locus[i] - chr.marker_locus[i - 1]) << ' ' << chr.marker_name[i];
ofmap << "*chr1 " << chr.raw.marker_locus.size() << ' ' << chr.raw.marker_name[0];
for (size_t i = 1; i < chr.raw.marker_locus.size(); ++i) {
ofmap << ' ' << (chr.raw.marker_locus[i] - chr.raw.marker_locus[i - 1]) << ' ' << chr.raw.marker_name[i];
}
ofmap << std::endl << std::flush;
......
......@@ -22,8 +22,8 @@ std::vector<chromosome> read_map(std::istream& is) {
if (n <= 0) {
throw input_exception(is, "expected the number of markers after the marker name.");
}
ret.back().marker_name.reserve(n);
ret.back().marker_locus.reserve(n);
ret.back().raw.marker_name.reserve(n);
ret.back().raw.marker_locus.reserve(n);
ws_nonl(is);
d = 0;
s.clear();
......@@ -31,8 +31,8 @@ std::vector<chromosome> read_map(std::istream& is) {
if (s.size() == 0) {
throw input_exception(is, "expected a marker name");
}
ret.back().marker_name.push_back(s);
ret.back().marker_locus.push_back(0);
ret.back().raw.marker_name.push_back(s);
ret.back().raw.marker_locus.push_back(0);
while (--n && !is.eof()) {
ws_nonl(is);
d = -1;
......@@ -46,8 +46,8 @@ std::vector<chromosome> read_map(std::istream& is) {
if (s.size() == 0) {
throw input_exception(is, "expected a marker name");
}
ret.back().marker_name.push_back(s);
ret.back().marker_locus.push_back(ret.back().marker_locus.back() + d);
ret.back().raw.marker_name.push_back(s);
ret.back().raw.marker_locus.push_back(ret.back().raw.marker_locus.back() + d);
}
ws_nl(is);
}
......
......@@ -102,12 +102,14 @@ int main(int argc, const char** argv)
msg_handler_t::check(true);
MSG_DEBUG("finalize");
active_settings->finalize();
MSG_DEBUG("sanity_check");
if (!active_settings->sanity_check()) {
exit(-1);
}
active_settings->finalize();
msg_handler_t::check(true);
if (0) {
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment