Commit 2f3fbfde authored by Damien Leroux's avatar Damien Leroux
Browse files

WIP. Computations look nice.

parent bf3d08d3
......@@ -3,6 +3,9 @@
#include <map>
#include <vector>
#include <map>
#include <set>
#include <unordered_set>
#include <string>
#include <iostream>
#include <fstream>
......@@ -655,12 +658,15 @@ struct LV_database {
size_t n_ind = mark_ind_lv.begin()->second.size();
auto& ind_LVmat = data[chr.name][gen];
ind_LVmat.resize(n_ind, {n_states, n_mark});
size_t m = 0;
for (const auto& mark: chr.raw.marker_name) {
for (size_t ind = 0; ind < n_ind; ++ind) {
ind_LVmat[ind].col(m) = mark_ind_lv.find(mark)->second[ind];
for (size_t ind = 0; ind < n_ind; ++ind) {
auto& mat = ind_LVmat[ind];
mat.resize(n_states, n_mark);
size_t m = 0;
for (const auto& mark: chr.raw.marker_name) {
const auto& svec = mark_ind_lv.find(mark)->second;
mat.col(m) = svec[ind];
++m;
}
++m;
}
}
}
......@@ -828,6 +834,7 @@ struct LV_database {
struct qtl_pop_type {
std::string name;
std::string qtl_generation_name;
std::vector<size_t> indices;
std::shared_ptr<geno_matrix> gen;
std::map<std::string, std::vector<MatrixXd>> LV;
......@@ -1024,6 +1031,7 @@ struct pop_data_type {
auto it =
pops.insert({name, {
name,
qtl_generation_name,
kv.second,
kv.first,
LV.extract(qtl_generation_name, kv.second),
......@@ -1082,13 +1090,13 @@ struct pop_data_type {
}
} else {
for (const auto& kv: pop_data.LV.data_by_marker) {
os << "| Marker " << kv.first << std::endl;
for (const auto& gen_lv: kv.second) {
os << "| Generation " << gen_lv.first << std::endl;
const auto& family = pop_data.families.find(gen_lv.first)->second;
for (size_t i = 0; i < gen_lv.second.size(); ++i) {
os << "| #" << i << " " << pop_data.get_geno_matrix(gen_lv.first, i)->name << std::endl;
prepend(os, "| ", gen_lv.second[i].transpose());
os << "| Generation " << kv.first << std::endl;
const auto& family = pop_data.families.find(kv.first)->second;
for (const auto& mark_lv: kv.second) {
os << "| Marker " << kv.first << std::endl;
for (size_t i = 0; i < mark_lv.second.size(); ++i) {
os << "| #" << i << std::endl;
prepend(os, "| ", mark_lv.second[i].transpose());
}
}
}
......
......@@ -582,19 +582,43 @@ template <typename Ret, typename... Args>
cached_computation(const std::string& name, Ret (*f) (Args...), const value<typename clean_type<Args>::type>&... args)
: m_name(name)
, m_func(f)
#ifndef NDEBUG
, dump_call()
#endif
/*, m_md5_hash{.accum = compute_md5(args...), .append = append_md5(args...)}*/
{
m_md5_hash.md5 = new_redux::feed_md5(args...);
m_md5_hash.accum = m_md5_hash.md5;
m_md5_hash.append = new_redux::md5_append(args...);
/*std::cout << "NEW CACHED_COMPUTATION WITH ARGS" << std::endl;*/
/*do_with_arg_pack(std::cout << args << std::endl);*/
/*std::cout << "================================" << std::endl;*/
/*std::cout << m_md5_hash.accum << std::endl;*/
/*std::cout << m_md5_hash.append << std::endl;*/
#ifndef NDEBUG
dump_call = [=]() {
std::stringstream ss;
ss << m_name << '(';
std::vector<std::string> avec;
do_with_arg_pack(avec.push_back(MESSAGE(args)));
for (size_t i = 0; i < avec.size() - 1; ++i) {
ss << avec[i] << ", ";
}
ss << avec.back() << ')';
return ss.str();
};
#endif
/*MSG_DEBUG("NEW CACHED_COMPUTATION WITH ARGS " << dump());*/
/*MSG_DEBUG("MD5 (accum) = " << m_md5_hash.accum);*/
/*MSG_DEBUG("MD5 (append) = " << m_md5_hash.append);*/
/*std::cout << "NEW CACHED_COMPUTATION " << m_md5_hash.accum << ' ' << m_md5_hash.append << std::endl;*/
}
std::string
dump()
{
#ifndef NDEBUG
return dump_call();
#else
return m_name;
#endif
}
std::string get_path() const
{
std::stringstream ss;
......@@ -621,7 +645,7 @@ template <typename Ret, typename... Args>
/* return value read from file */
return true;
} else {
MSG_WARNING("Collision in cache! " << path);
MSG_WARNING("Collision in cache! " << dump() << " " << path);
}
return false;
}
......@@ -641,17 +665,17 @@ template <typename Ret, typename... Args>
if (check == m_md5_hash.append) {
/* if same */
/* CACHE FOUND */
/*MSG_INFO('(' << m_name << ") Found data in cache");*/
/*MSG_INFO(dump() << " Found data in cache");*/
ci & data;
/* return value read from file */
return data;
}
/* CACHE INVALID */
/*MSG_INFO('(' << m_name << ") Cache is invalid. Computing data.");*/
/*MSG_INFO(dump() << " Cache is invalid. Computing data.");*/
} else {
/* else */
/* COMPUTE AND SAVE */
/*MSG_INFO('(' << m_name << ") Computing data.");*/
/*MSG_INFO(dump() << " Computing data.");*/
}
/* compute value = m_func(x...) */
data = m_func(x...);
......@@ -679,6 +703,10 @@ template <typename Ret, typename... Args>
std::string m_name;
Ret (*m_func) (Args...);
md5_hash_type m_md5_hash;
#ifndef NDEBUG
std::function<std::string()> dump_call;
#endif
};
template <typename Ret, typename... Args>
......
......@@ -24,7 +24,8 @@ static inline
md5_digest& operator << (md5_digest& md5, const qtl_pop_type& pop)
{
/*return md5 << pop.qtl_generation_name << pop.observed_traits_filename << pop.observed_mark.size() << pop.noise;*/
return md5 << pop.name;
MSG_DEBUG("QTL_POP MD5 using pop_name='" << pop.name << "' and qtl_gen_name='" << pop.qtl_generation_name << "'");
return md5 << pop.name << pop.qtl_generation_name;
}
static inline
......@@ -33,6 +34,13 @@ md5_digest& operator << (md5_digest& md5, const geno_matrix& gen)
return md5 << gen.name;
}
static inline
md5_digest& operator << (md5_digest& md5, const context_key& ck)
{
return md5 << ck->pop->name << ck->pop->gen->name << ck->loci;
}
#if 0
static inline
md5_digest& operator << (md5_digest& md5, const impl::generation_rs::segment_computer_t& sc)
......
......@@ -3,6 +3,7 @@
#include "geno_matrix.h"
#include "labelled_matrix.h"
#include "data/qtl_chromosome.h"
struct out_of_segment_exception : public std::exception {};
......@@ -234,6 +235,11 @@ struct geno_prob_computer {
/* FIXME: optimize the CORRECT initialization of l & r */
std::tie(l, r) = find_lr(steps[i]);
ret.data.col(i) = fast_compute_at(steps[i], l, r);
for (int k = 0; k < ret.data.rows(); ++k) {
if (ret.data(k, i) < FLOAT_TOL && ret.data(k, i) > -FLOAT_TOL) {
ret.data(k, i) = 0;
}
}
if ((ret.data.col(i).array() < 0).any()) {
MSG_DEBUG("Bad value at column #" << i << std::endl << ret.data.col(i).transpose());
MSG_DEBUG((*this_gen));
......@@ -270,5 +276,124 @@ struct geno_prob_computer {
}
};
struct segment_computer_t {
const geno_matrix* g_this;
const qtl_chromosome* chr;
geno_prob_computer gpc_vec;
std::vector<double> steps;
std::vector<double> loci;
double noise;
segment_computer_t() : g_this(0), gpc_vec(), steps(), noise(0) {}
segment_computer_t(segment_computer_t&& sc)
: g_this(sc.g_this), chr(sc.chr), gpc_vec(std::move(sc.gpc_vec)), steps(std::move(sc.steps)), loci(std::move(sc.loci)), noise(sc.noise)
{}
segment_computer_t(const geno_matrix& g, const qtl_chromosome* c, double step, double nz)
: g_this(&g), chr(c), gpc_vec(), noise(nz)
{
chr->copy_loci(loci);
steps = compute_steps(loci, step);
init();
}
segment_computer_t(const geno_matrix& g, const qtl_chromosome* c, double nz, const std::vector<double>& loc_vec)
: g_this(&g), chr(c), gpc_vec(), noise(nz)
{
chr->copy_loci(loci);
steps = loc_vec;
init();
}
bool
operator == (const segment_computer_t& sc) const { return g_this == sc.g_this && steps == sc.steps && noise == sc.noise; }
void
init()
{
/*MSG_DEBUG("init !");*/
gpc_vec.locus = &loci;
gpc_vec.init(g_this);
}
segment_computer_t&
operator = (segment_computer_t&& sc)
{
/*MSG_DEBUG("OPERATOR = :" << __LINE__);*/
g_this = sc.g_this;
chr = sc.chr;
chr->copy_loci(loci);
steps.swap(sc.steps);
/*gpc_vec.swap(sc.gpc_vec);*/
gpc_vec = std::move(sc.gpc_vec);
noise = sc.noise;
init();
return *this;
}
labelled_matrix<MatrixXd, label_type, double>
compute(const MatrixXd& LV)
{
/*DUMP_FILE_LINE();*/
auto unphased_LV = g_this->unphased_LV();
auto joint_qtl_iterator = chr->qtl_state_iterator(unphased_LV);
/*MatrixXd lv = g_this->noisy_LV(LV, noise);*/
MatrixXd qtl_lv(LV.innerSize(), loci.size());
/*MSG_DEBUG(MATRIX_SIZE(lv) << std::endl << lv.transpose());*/
gpc_vec.LV = &qtl_lv;
joint_qtl_iterator.init_expansion(LV, qtl_lv);
/*MSG_DEBUG(MATRIX_SIZE(qtl_lv));*/
/*MSG_DEBUG(qtl_lv.transpose());*/
auto tmp = compute();
int n_states = tmp.innerSize();
std::vector<label_type> row_labels;
row_labels.reserve(n_states * joint_qtl_iterator.size());
/*MSG_DEBUG("qtl_iterator.size = " << joint_qtl_iterator.size());*/
for (int i = 0; i < joint_qtl_iterator.size(); ++i) {
row_labels.insert(row_labels.end(), tmp.row_labels.begin(), tmp.row_labels.end());
}
labelled_matrix<MatrixXd, label_type, double> ret(row_labels, steps);
/*MSG_DEBUG("ret.data$dim(" << ret.data.innerSize() << ',' << ret.data.outerSize() << ") tmp.data$dim(" << tmp.innerSize() << ',' << tmp.outerSize() << ") n_states=" << n_states);*/
/*ret.data.topRows(n_states) = compute().data;*/
ret.data.topRows(n_states) = tmp.data;
/*ret.emplace_back(compute());*/
int row_ofs = n_states;
while (!joint_qtl_iterator.next()) {
if (joint_qtl_iterator.update(LV, qtl_lv)) {
/*ret.emplace_back(compute());*/
/*MSG_DEBUG(MATRIX_SIZE(qtl_lv));*/
/*MSG_DEBUG(qtl_lv.transpose());*/
ret.data.middleRows(row_ofs, n_states) = compute().data;
} else {
ret.data.middleRows(row_ofs, n_states) = MatrixXd::Zero(n_states, ret.data.outerSize());
}
row_ofs += n_states;
}
return ret;
}
labelled_matrix<MatrixXd, label_type, double>
compute()
{
return gpc_vec.fast_compute_over_segment(steps);
}
friend
inline
std::ostream&
operator << (std::ostream& os, const segment_computer_t& sc)
{
return os << '<' << sc.g_this->name
<< " n_steps=" << sc.steps.size()
<< " noise=" << sc.noise << '>';
}
};
#endif
......@@ -327,7 +327,7 @@ struct geno_matrix {
MatrixXd exp(double d) const
{
MatrixXd ret = p * ((d * diag).array().exp().matrix().asDiagonal()) * p_inv;
MatrixXd ret = p * ((d * .01 * diag).array().exp().matrix().asDiagonal()) * p_inv;
/*MatrixXd check = (d * inf_mat).exp();*/
/*if (!ret.isApprox(check)) {*/
/*MSG_ERROR("BAD EXP" << std::endl << "with diag:" << std::endl << check << std::endl << "with exp:" << std::endl << ret, "");*/
......@@ -494,6 +494,35 @@ struct geno_matrix {
std::map<label_type, VectorXd>
LV_map() const;
std::vector<label_type>
get_unique_labels() const
{
std::vector<label_type> unique_labels = labels;
std::sort(unique_labels.begin(), unique_labels.end());
auto it = std::unique(unique_labels.begin(), unique_labels.end());
unique_labels.resize(it - unique_labels.begin());
return unique_labels;
}
std::vector<VectorXd>
unphased_LV() const
{
std::vector<VectorXd> unique_unphased_LV;
std::vector<label_type> unique_labels = get_unique_labels();
int sz = unique_labels.size();
unique_unphased_LV.reserve(sz);
for (int i = 0; i < sz; ++i) {
VectorXd tmp = VectorXd::Zero(size());
for (size_t s = 0; s < size(); ++s) {
if (labels[s] == unique_labels[i]) {
tmp(s) = 1;
}
}
unique_unphased_LV.push_back(tmp);
}
return unique_unphased_LV;
}
};
......
......@@ -267,6 +267,7 @@ operator * (const labelled_matrix<MATRIX, RL, CL1>& m1, const labelled_matrix<MA
{
labelled_matrix<MATRIX, RL, CL2> ret;
#ifndef EIGEN_NO_DEBUG
#if 0
if (m1.column_labels != m2.row_labels) {
MSG_ERROR("Matrix labels mismatch! Can't multiply.", "");
MSG_ERROR("m1::cols " << m1.column_labels, "");
......@@ -278,6 +279,7 @@ operator * (const labelled_matrix<MATRIX, RL, CL1>& m1, const labelled_matrix<MA
/*std::cerr << "m2:" << std::endl << m2 << std::endl;*/
throw -1;
}
#endif
#endif
ret.row_labels = m1.row_labels;
ret.column_labels = m2.column_labels;
......
......@@ -498,9 +498,12 @@ struct filename_stream {
};
/*#define ULTRA_MEGA_PARANOID*/
static inline
MatrixXd matrix_inverse(const MatrixXd& m)
{
/*return m.fullPivHouseholderQr().inverse();*/
/*return m.inverse();*/
/* FIXME Should NOT use SVD (see note in compute() */
JacobiSVD<MatrixXd> inverter(m, ComputeFullV);
auto& V = inverter.matrixV();
......@@ -1078,7 +1081,7 @@ public:
int n_rows = 0;
for (const auto& kp: m.m_all_pops) {
std::stringstream s;
s << "POP " << (*kp)->name;
s << "POP " << (*kp)->name << '/' << (*kp)->qtl_generation_name;
pop_sections.push_back(s.str());
mws.add_row_section(pop_sections.back(), (*kp)->size());
n_rows += (*kp)->size();
......@@ -1087,7 +1090,7 @@ public:
static std::string constraints("CONSTRAINTS");
mws.add_row_section(constraints, m.X().innerSize() - n_rows);
}
os << "Model Y(" << m.m_Y->innerSize() << ',' << m.m_Y->outerSize() << "), " << m.m_blocks.size() << " blocks" << std::endl;
os << "Model X(" << m.m_Y->innerSize() << ',' << m.m_Y->outerSize() << "), " << m.m_blocks.size() << " blocks" << std::endl;
return os << mws;
}
......
......@@ -248,8 +248,12 @@ struct matrix_with_sections {
}
++ri;
for (const auto& s: m_column_sections) {
MSG_DEBUG("for s: column_sections");
MSG_DEBUG(" initial d(begin, r) = " << (ri - ret.begin()));
for (const auto&f: s.field_labels()) {
*ri++ = to_string(f).size();
MSG_DEBUG(" d(begin, r) = " << (ri - ret.begin()));
MSG_QUEUE_FLUSH();
}
}
for (int i = 0; i < m_matrix.outerSize(); ++i) {
......
......@@ -33,8 +33,8 @@ void f_test(const model& model_current, const model& model_new, int col_num, Mat
/*MSG_DEBUG("XtX^-1" << std::endl << model_new.XtX_pseudo_inverse());*/
MSG_DEBUG("dof_cur " << dof_cur << ' ' << MATRIX_SIZE(model_current.X()) << " dof_new " << dof_new << ' ' << MATRIX_SIZE(model_new.X()) << " rss_cur " << rss_cur.transpose() << " rss_new " << rss_new.transpose() << " keys_cur " << model_current.keys() << " keys_new " << model_new.keys());
MSG_DEBUG("Y_cur" << std::endl << model_current.Y());
MSG_DEBUG("Y_new" << std::endl << model_new.Y());
/*MSG_DEBUG("Y_cur" << std::endl << model_current.Y());*/
/*MSG_DEBUG("Y_new" << std::endl << model_new.Y());*/
MSG_DEBUG("X_cur" << std::endl << model_current.X());
MSG_DEBUG("X_new" << std::endl << model_new.X());
/*MSG_DEBUG("XtX_cur" << std::endl << model_current.XtX());*/
......
......@@ -59,14 +59,27 @@ class Chromosome(object):
def __str__(self):
size = (len(self)
- len(set.intersection(self.map.qtls, self.M[1:-1])))
self.map_file_hack[0] = (size, self.map_file_hack[0][1])
map_items = [(size, self.map_file_hack[0][1])]
offset = 0
print self.map.qtls
for i in xrange(1, len(self.map_file_hack) - 1):
offset += self.map_file_hack[i][0]
print self.M[i], self.map.qtls,
print self.M[i] not in self.map.qtls
if self.M[i] not in self.map.qtls:
map_items.append((offset, self.map_file_hack[i][1]))
offset = 0
map_items.append((offset + self.map_file_hack[-1][0],
self.map_file_hack[-1][1]))
#self.map_file_hack[0] = (size, self.map_file_hack[0][1])
#print "chromosome has", size, "non-qtl markers"
return ' '.join(chain(['*' + self.name],
[' '.join(imap(str, x))
for x in self.map_file_hack
if x == self.map_file_hack[0]
or x == self.map_file_hack[-1]
or x[1] not in self.map.qtl_names]))
for x in map_items]))
# if x == self.map_file_hack[0]
# or x == self.map_file_hack[-1]
# or x[1] not in self.map.qtl_names]))
__repr__ = __str__
......
<format-specification>
<format name="ABHCD2">
<domain type="ancestor" alphabet-from="ac"/>
<scores>
<score symbol="A">
<from mother="a" father="a"/>
</score>
<score symbol="H">
<from mother="a" father="c"/>
<from mother="c" father="a"/>
</score>
<score symbol="B">
<from mother="c" father="c"/>
</score>
<score symbol="C">
<from mother="a" father="c"/>
<from mother="c" father="a"/>
<from mother="c" father="c"/>
</score>
<score symbol="D">
<from mother="a" father="c"/>
<from mother="c" father="a"/>
<from mother="a" father="a"/>
</score>
<score symbol="-">
<from mother="a" father="a"/>
<from mother="a" father="c"/>
<from mother="c" father="a"/>
<from mother="c" father="c"/>
</score>
</scores>
</format>
</format-specification>
<format-specification>
<format name="ABHCD3">
<domain type="bncestor" alphabet-from="bc"/>
<scores>
<score symbol="A">
<from mother="b" father="b"/>
</score>
<score symbol="H">
<from mother="b" father="c"/>
<from mother="c" father="b"/>
</score>
<score symbol="B">
<from mother="c" father="c"/>
</score>
<score symbol="C">
<from mother="b" father="c"/>
<from mother="c" father="b"/>
<from mother="c" father="c"/>
</score>
<score symbol="D">
<from mother="b" father="c"/>
<from mother="c" father="b"/>
<from mother="b" father="b"/>
</score>
<score symbol="-">
<from mother="b" father="b"/>
<from mother="b" father="c"/>
<from mother="c" father="b"/>
<from mother="c" father="c"/>
</score>
</scores>
</format>
</format-specification>
<format-specification>
<format name="ABHCD">
<domain type="ancestor" alphabet-from="abc"/>
<scores>
<score symbol="A">
<from mother="a" father="a"/>
</score>
<score symbol="H">
<from mother="a" father="b"/>
<from mother="b" father="a"/>
<from mother="a" father="c"/>
<from mother="c" father="a"/>
</score>
<score symbol="B">
<from mother="b" father="b"/>
<from mother="c" father="c"/>
</score>
<score symbol="C">
<from mother="a" father="b"/>
<from mother="b" father="a"/>
<from mother="b" father="b"/>
<from mother="a" father="c"/>
<from mother="c" father="a"/>
<from mother="c" father="c"/>
</score>
<score symbol="D">
<from mother="a" father="c"/>
<from mother="c" father="a"/>
<from mother="a" father="b"/>
<from mother="b" father="a"/>
<from mother="a" father="a"/>
</score>
<score symbol="-">
<from mother="a" father="a"/>
<from mother="a" father="b"/>
<from mother="b" father="a"/>
<from mother="b" father="b"/>
<from mother="a" father="c"/>
<from mother="c" father="a"/>
<from mother="c" father="c"/>
</score>
</scores>
</format>
</format-specification>
#!/bin/bash
set -e
(cd ../../src/pedigree && make -j)