Commit 2410745a authored by Damien Leroux's avatar Damien Leroux
Browse files

Implemented model pretty-print.

parent 620770f4
......@@ -4,6 +4,7 @@
#include <iostream>
#include "cache2.h"
#include "model/fwd.h"
#include "model/print.h"
#include "model/model.h"
#include "model/model_element.h"
#include "model/tests.h"
......
......@@ -3,8 +3,9 @@
#include <stdexcept>
#include <Eigen/SVD>
#include <Eigen/QR>
/*#include <Eigen/SVD>*/
/*#include <Eigen/QR>*/
#include "eigen.h"
#include <cmath>
#include <boost/math/special_functions/beta.hpp>
......@@ -13,6 +14,7 @@
#include "labelled_matrix.h"
#include "settings.h"
#include "print.h"
/*#define COMPONENT_EPSILON (1.e-10)*/
#define COMPONENT_EPSILON (active_settings->tolerance)
......@@ -20,6 +22,262 @@
typedef labelled_matrix<Eigen::Matrix<double, -1, -1>, int, std::vector<char>> model_block_type;
struct model_block_key {
typedef std::map<const chromosome*, locus_key> selection_type;
typedef selection_type::value_type selection_value_type;
std::map<const chromosome*, locus_key> selection;
model_block_key() : selection() {}
model_block_key(std::initializer_list<selection_value_type> il)
: selection(std::forward<std::initializer_list<selection_value_type>>(il))
{}
locus_key
operator [] (const chromosome* chr) const
{
auto it = selection.find(chr);
return it == selection.end() ? locus_key() : it->second;
}
bool empty() const
{
if (selection.size()) {
for (const auto& kv: selection) {
if (!kv.second->is_empty()) {
return false;
}
}
}
return true;
}
model_block_key
only_chromosome(const chromosome* c) const
{
return {{{c, selection.find(c)->second}}};
}
model_block_key
without_locus(const chromosome* c, double l) const
{
model_block_key mbk = *this;
locus_key& lk = mbk.selection.find(c)->second;
lk = lk - l;
return mbk;
}
model_block_key
with_locus(const chromosome* c, double l) const
{
model_block_key mbk = *this;
locus_key& lk = mbk.selection.find(c)->second;
lk = lk + l;
return mbk;
}
model_block_key&
operator += (std::pair<const chromosome*, double> loc)
{
/*locus_key& lk = selection.find(loc.first)->second;*/
/*lk = lk + loc.second;*/
selection[loc.first] = selection[loc.first] + loc.second;
return *this;
}
model_block_key&
operator += (const model_block_key& mbk)
{
for (const auto& c_lk: mbk.selection) {
auto it = selection.find(c_lk.first);
if (it == selection.end()) {
selection.insert(c_lk);
} else {
it->second = it->second + c_lk.second;
}
}
return *this;
}
model_block_key&
operator -= (std::pair<const chromosome*, double> loc)
{
locus_key& lk = selection.find(loc.first)->second;
lk = lk - loc.second;
return *this;
}
model_block_key
operator / (const chromosome* c) const
{
model_block_key mbk;
for (const auto& kv: selection) {
if (kv.first != c) {
mbk.selection.insert(kv);
}
}
return mbk;
}
model_block_key
operator % (const chromosome* c) const
{
return only_chromosome(c);
}
std::vector<model_block_key>
split() const
{
std::vector<model_block_key> ret;
for (const auto& x: selection) {
ret.push_back({x});
}
return ret;
}
std::vector<model_block_key>
split_loci() const
{
std::vector<model_block_key> ret;
for (const auto& x: selection) {
for (double l: *x.second) {
/*locus_key lk(new locus_key_struc(NULL, l));*/
model_block_key k;
k.selection[x.first] = k.selection[x.first] + l;
/*ret.emplace_back({{x.first, lk}});*/
ret.push_back(std::move(k));
}
}
return ret;
}
static
model_block_key
join(const std::vector<model_block_key>& list)
{
model_block_key ret;
for (const auto& k: list) {
ret += k;
}
return ret;
}
static
model_block_key
join(const std::map<model_block_key, value<model_block_type>>& coll)
{
model_block_key ret;
for (const auto& kv: coll) {
ret += kv.first;
}
return ret;
}
bool
operator == (const model_block_key& mbk) const
{
return selection == mbk.selection;
}
bool
operator != (const model_block_key& mbk) const
{
return selection != mbk.selection;
}
bool
operator < (const model_block_key& mbk) const
{
/*MSG_DEBUG((*this) << " < " << mbk << "?");*/
if (selection.size() == mbk.selection.size()) {
auto i = selection.begin();
auto j = selection.end();
auto mi = mbk.selection.begin();
while (i != j) {
if (i->first < mi->first) {
/*MSG_DEBUG("YES");*/
return true;
} else if (i->first == mi->first) {
if (i->second < mi->second) {
/*MSG_DEBUG("YES");*/
return true;
} else if (i->second > mi->second) {
/*MSG_DEBUG("NO");*/
return false;
}
} else {
/*MSG_DEBUG("NO");*/
return false;
}
++i; ++mi;
}
/*MSG_DEBUG("NO");*/
return false;
}
/*MSG_DEBUG((selection.size() < mbk.selection.size() ? "YES" : "NO"));*/
return selection.size() < mbk.selection.size();
}
friend
model_block_key
operator + (const model_block_key& k, std::pair<const chromosome*, double> loc)
{
model_block_key ret = k;
locus_key& lk = ret.selection.find(loc.first)->second;
lk = lk + loc.second;
return ret;
}
friend
model_block_key
operator - (const model_block_key& k, std::pair<const chromosome*, double> loc)
{
model_block_key ret = k;
locus_key& lk = ret.selection.find(loc.first)->second;
lk = lk - loc.second;
return ret;
}
friend
std::ostream&
operator << (std::ostream& os, const model_block_key& mbk)
{
auto i = mbk.selection.begin();
auto j = mbk.selection.end();
if (i == j) {
return os << "I";
}
os << i->first->name << '{' << i->second << '}';
for (++i; i != j; ++i) {
os << ',' << i->first << '{' << i->second << '}';
}
return os;
}
};
typedef std::map<model_block_key, value<model_block_type>> model_block_collection;
namespace std {
template <>
struct hash<model_block_key> {
size_t operator () (const model_block_key& mbk) const
{
/* WARNING FIXME this MUST NOT be used to hash a function parameter
* in a disk-cached task, because a POINTER is HASHED and the order
* is not guaranteed to be the same in every run.
*/
std::hash<const chromosome*> hc;
std::hash<locus_key> hlk;
size_t accum = 0xdeadbe3f;
for (const auto& kv: mbk.selection) {
accum = impl::ROTATE<7>(impl::ROTATE<7>(accum * hc(kv.first)) ^ hlk(kv.second));
}
return accum;
}
};
} // namespace std
static inline
bool
around_zero(double o)
......@@ -87,6 +345,27 @@ MatrixXd concat_right(const collection<model_block_type>& mat_vec)
return ret;
}
static inline
MatrixXd concat_right(const model_block_collection& mat_map)
{
size_t full_size = 0;
MatrixXd ret;
for (auto m: mat_map) {
full_size += m.second->outerSize();
/*MSG_DEBUG("preparing concat_right with matrix(" << m->innerSize() << ',' << m->outerSize() << ')');*/
}
ret.resize(mat_map.begin()->second->innerSize(), full_size);
full_size = 0;
for (auto m: mat_map) {
/*MSG_DEBUG("concat_right in M(" << ret.innerSize() << ',' << ret.outerSize() << ") at col " << full_size << "matrix(" << m->innerSize() << ',' << m->outerSize() << ')');*/
/*ret.block(0, full_size, ret.innerSize(), m->outerSize()) = *m;*/
ret.middleCols(full_size, m.second->outerSize()) = m.second->data;
full_size += m.second->outerSize();
}
return ret;
}
static inline
MatrixXd concat_down(const std::vector<MatrixXd>& mat_vec)
{
......@@ -153,12 +432,12 @@ enum class SolverType { QR, SVD };
struct model {
model()
: m_Y(), m_blocs(), m_X(), m_rank(), m_rss(), m_coefficients(), m_solver_type(), m_computed(false)
: m_Y(), m_blocks(), m_X(), m_rank(), m_rss(), m_coefficients(), m_solver_type(), m_computed(false)
{}
model(const value<MatrixXd>& y, SolverType st = SolverType::QR)
: m_Y(y)
, m_blocs(), m_X()
, m_blocks(), m_X()
, m_rank(), m_rss(), m_coefficients(), m_residuals()
, m_solver_type(st)
, m_computed(false)
......@@ -167,7 +446,7 @@ struct model {
model(const model& mo)
: m_Y(mo.m_Y)
, m_blocs(mo.m_blocs), m_X(mo.m_X)
, m_blocks(mo.m_blocks), m_X(mo.m_X)
, m_rank(mo.m_rank), m_rss(mo.m_rss), m_coefficients(mo.m_coefficients)
, m_residuals(mo.m_residuals)
, m_solver_type(mo.m_solver_type)
......@@ -181,7 +460,7 @@ struct model {
#else
{
m_Y = mo.m_Y;
m_blocs = mo.m_blocs;
m_blocks = mo.m_blocks;
m_computed = mo.m_computed;
m_X = mo.m_X;
m_rss = mo.m_rss;
......@@ -194,8 +473,8 @@ struct model {
model& operator = (model&& mo)
{
m_Y = mo.m_Y;
m_blocs.clear();
m_blocs.swap(mo.m_blocs);
m_blocks.clear();
m_blocks.swap(mo.m_blocks);
m_computed = mo.m_computed;
m_X = mo.m_X;
m_rss = mo.m_rss;
......@@ -208,19 +487,37 @@ struct model {
bool operator == (const model& m) const
{
return m_Y == m.m_Y && m_blocs == m.m_blocs && m_rss == m.m_rss;
return m_Y == m.m_Y && m_blocks == m.m_blocks && m_rss == m.m_rss;
}
void add_bloc(const value<model_block_type>& x)
void add_block(const model_block_key& key, const value<model_block_type>& x)
{
m_computed = false;
m_blocs.push_back(x);
/*MSG_DEBUG("add_block(" << key << ", <some block>)");*/
m_blocks[key] = x;
/*m_blocks.push_back(x);*/
/*m_keys.push_back(key);*/
}
void remove_bloc(const value<model_block_type>& x)
void remove_block(const value<model_block_type>& x)
{
m_computed = false;
m_blocs.erase(std::find(m_blocs.begin(), m_blocs.end(), x));
auto i = m_blocks.begin();
auto j = m_blocks.end();
while (i != j && i->second != x) ++i;
if (i != j) {
m_blocks.erase(i);
}
/*auto it = m_blocks.erase(std::find(m_blocks.begin(), m_blocks.end(), x));*/
/*m_keys.erase(m_keys.begin() + (it - m_blocks.begin()));*/
}
void remove_block(const model_block_key& x)
{
m_computed = false;
m_blocks.erase(x);
/*auto it = m_keys.erase(std::find(m_keys.begin(), m_keys.end(), x));*/
/*m_blocks.erase(m_blocks.begin() + (it - m_keys.begin()));*/
}
void use_SVD()
......@@ -251,8 +548,8 @@ struct model {
m_coefficients = new immediate_value<MatrixXd>(MatrixXd());
m_residuals = new immediate_value<MatrixXd>(MatrixXd());
m_rss = new immediate_value<VectorXd>(VectorXd());
if (m_blocs.size() == 2) {
MatrixXd tmp = concat_right(m_blocs);
if (m_blocks.size() == 2) {
MatrixXd tmp = concat_right(m_blocks);
m_X->resize(tmp.innerSize() + 1, tmp.outerSize());
m_X->topRows(tmp.innerSize()) = tmp;
(*m_X)(tmp.innerSize(), 0) = 0;
......@@ -274,7 +571,7 @@ struct model {
DUMP_FILE_LINE();
}
} else {
*m_X = concat_right(m_blocs);
*m_X = concat_right(m_blocks);
}
if (m_solver_type == SolverType::QR) {
int m_columns = m_X->outerSize();
......@@ -341,13 +638,13 @@ struct model {
return *m_Y;
}
model extend(const value<model_block_type>& m) const
model extend(const model_block_key& k, const value<model_block_type>& m) const
{
model ret(*this);
/*MSG_DEBUG("extend model " << __LINE__ << " with Y(" << m_Y->innerSize() << ',' << m_Y->outerSize() << ')');*/
/*model ret(m_Y, m_solver_type);*/
/*ret.add_bloc(X());*/
ret.add_bloc(m);
/*ret.add_block(X());*/
ret.add_block(k, m);
return ret;
}
......@@ -371,9 +668,99 @@ struct model {
return V * inv_sv.asDiagonal() * V.transpose();
}
model_block_collection
blocks_with_chromosome(const chromosome* chr)
{
model_block_collection ret;
for (const auto& bkv: m_blocks) {
for (const auto c_lk: bkv.first.selection) {
if (c_lk.first == chr) {
ret.insert(bkv);
break;
}
}
}
return ret;
}
model_block_collection
extract_blocks_with_chromosome_and_locus(const chromosome* chr, double locus)
{
auto bi = m_blocks.begin();
auto bj = m_blocks.end();
std::vector<decltype(bi)> sub;
for (;bi != bj; ++bi) {
for (const auto c_lk: bi->first.selection) {
if (c_lk.first == chr && c_lk.second->has(locus)) {
sub.push_back(bi);
break;
}
}
}
model_block_collection ret;
for (auto x: sub) {
ret.insert(*x);
m_blocks.erase(x);
}
return ret;
}
model_block_collection
extract_blocks_with_chromosome(const chromosome* chr)
{
auto bi = m_blocks.begin();
auto bj = m_blocks.end();
std::vector<decltype(bi)> sub;
for (;bi != bj; ++bi) {
for (const auto c_lk: bi->first.selection) {
if (c_lk.first == chr) {
sub.push_back(bi);
break;
}
}
}
model_block_collection ret;
for (auto x: sub) {
ret.insert(*x);
m_blocks.erase(x);
}
return ret;
}
void remove_blocks_with_chromosome(const chromosome* chr)
{
auto bi = m_blocks.begin();
auto bj = m_blocks.end();
std::vector<decltype(bi)> sub;
for (;bi != bj; ++bi) {
for (const auto c_lk: bi->first.selection) {
if (c_lk.first == chr) {
sub.push_back(bi);
break;
}
}
}
for (auto x: sub) {
m_blocks.erase(x);
}
}
std::vector<model_block_key>
find_epistasis_dependencies(const model_block_key& mbk)
{
if (mbk.selection.size() == 1) {
/* FIXME: all on the same chromosome, nothing to return? */
return {mbk};
}
/* FIXME: how to handle epistasis with N QTLs on M chromosomes and N > M ? */
return {};
}
/*private:*/
value<MatrixXd> m_Y;
collection<model_block_type> m_blocs;
/*collection<model_block_type> m_blocks;*/
/*std::vector<model_block_key> m_keys;*/
model_block_collection m_blocks;
value<MatrixXd> m_X;
int m_rank;
value<VectorXd> m_rss;
......@@ -388,7 +775,7 @@ public:
inline
md5_digest& operator << (md5_digest& md5, const model& m)
{
md5 << m.Y() << m.m_blocs;
md5 << m.Y() << m.m_blocks;
return md5;
}
......@@ -396,16 +783,39 @@ public:
inline
std::ostream& operator << (std::ostream& os, const model& m)
{
os << "<model Y(" << m.m_Y->innerSize() << ',' << m.m_Y->outerSize() << "), " << m.m_blocs.size() << " blocs>";
return os;
/*os << "<model Y(" << m.m_Y->innerSize() << ',' << m.m_Y->outerSize() << "), " << m.m_blocks.size() << " blocks>";*/
model_print::matrix_with_sections<std::string, void, model_block_key, std::vector<char>> mws(m.X());
/*model_print::matrix_with_sections<std::string, void, model_block_key, void> mws(m.X());*/
for (const auto& kv: m.m_blocks) {
mws.add_column_section(kv.first, kv.second->column_labels);
/*mws.add_column_section(kv.first, kv.second->outerSize());*/
}
std::vector<std::string> pop_sections;
pop_sections.reserve(active_settings->populations.size());
int n_rows = 0;
for (const auto& kp: active_settings->populations) {
std::stringstream s;
s << "POP " << kp.second.name;
pop_sections.push_back(s.str());
mws.add_row_section(pop_sections.back(), kp.second.size());
n_rows += kp.second.size();
}
if (n_rows < m.X().innerSize()) {
static std::string constraints("CONSTRAINTS");
mws.add_row_section(constraints, m.X().innerSize() - n_rows);
}
os << "Model Y(" << m.m_Y->innerSize() << ',' << m.m_Y->outerSize() << "), " << m.m_blocks.size() << " blocks" << std::endl;
return os << mws;
}
size_t hash() const
{
std::hash<model_block_type> hm;
std::hash<model_block_key> hk;
size_t accum = 0;
for (const auto& b: m_blocs) {
accum ^= hm(*b);
for (const auto& b: m_blocks) {
accum ^= hk(b.first);
accum ^= hm(*b.second);
}
return accum;
}
......
......@@ -7,6 +7,15 @@
#include "error.h"
#include "labelled_matrix.h"
inline
std::ostream& operator << (std::ostream& os, const std::vector<char>& v)
{
for (char c: v) { os << c; }
return os;
}
namespace model_print {
extern int precision;
extern std::string field_sep;
......@@ -27,7 +36,9 @@ inline
template <typename T>
std::string to_string(T x) { std::stringstream s; s << x; return s.str(); }
std::string to_string(const T& x) { std::stringstream s; s << x; return s.str(); }
inline
std::string to_string(const std::vector<char>& x) { return std::string(x.begin(), x.end()); }
inline
std::string to_string(const std::string& s) { return s; }
inline
......@@ -38,14 +49,15 @@ template <typename _Printable>
std::ostream&