Commit c55ce20e authored by Damien Leroux's avatar Damien Leroux
Browse files

Now fixing rank before reporting. Added missing files.

parent a56e0c93
...@@ -862,7 +862,7 @@ struct gauss_elimination { ...@@ -862,7 +862,7 @@ struct gauss_elimination {
struct model { struct model {
model() model()
: m_Y(), m_blocks(), m_X(), m_rank(), m_rss(), m_coefficients(), m_solver_type(), m_computed(false), m_with_constraints(false), m_ghost_constraint(), m_all_pops(), m_ancestor_names(), m_max_order(1), m_threshold(0) : m_Y(), m_blocks(), m_X(), m_rank(), m_rss(), m_coefficients(), m_solver_type(), m_computed(false), m_with_constraints(false), m_ghost_constraint(), m_all_pops(), m_ancestor_names(), m_max_order(1), m_threshold(0), m_rank_fixer()
{} {}
...@@ -878,6 +878,7 @@ struct model { ...@@ -878,6 +878,7 @@ struct model {
, m_ancestor_names(anam) , m_ancestor_names(anam)
, m_max_order(mo) , m_max_order(mo)
, m_threshold(threshold) , m_threshold(threshold)
, m_rank_fixer()
/*{ MSG_DEBUG("new model " << __LINE__ << " with Y(" << y.innerSize() << ',' << y.outerSize() << ')'); }*/ /*{ MSG_DEBUG("new model " << __LINE__ << " with Y(" << y.innerSize() << ',' << y.outerSize() << ')'); }*/
{} {}
...@@ -893,6 +894,7 @@ struct model { ...@@ -893,6 +894,7 @@ struct model {
, m_ancestor_names((*pops.front())->ancestor_names) , m_ancestor_names((*pops.front())->ancestor_names)
, m_max_order(mo) , m_max_order(mo)
, m_threshold(threshold) , m_threshold(threshold)
, m_rank_fixer()
/*{ MSG_DEBUG("new model " << __LINE__ << " with Y(" << y.innerSize() << ',' << y.outerSize() << ')'); }*/ /*{ MSG_DEBUG("new model " << __LINE__ << " with Y(" << y.innerSize() << ',' << y.outerSize() << ')'); }*/
{} {}
...@@ -909,6 +911,7 @@ struct model { ...@@ -909,6 +911,7 @@ struct model {
, m_ancestor_names(mo.m_ancestor_names) , m_ancestor_names(mo.m_ancestor_names)
, m_max_order(mo.m_max_order) , m_max_order(mo.m_max_order)
, m_threshold(mo.m_threshold) , m_threshold(mo.m_threshold)
, m_rank_fixer()
/*{ MSG_DEBUG("new model " << __LINE__ << " with Y(" << m_Y->innerSize() << ',' << m_Y->outerSize() << ')'); }*/ /*{ MSG_DEBUG("new model " << __LINE__ << " with Y(" << m_Y->innerSize() << ',' << m_Y->outerSize() << ')'); }*/
{ {
for (const auto& kv: mo.m_blocks) { for (const auto& kv: mo.m_blocks) {
...@@ -930,6 +933,7 @@ struct model { ...@@ -930,6 +933,7 @@ struct model {
, m_ancestor_names(mo.m_ancestor_names) , m_ancestor_names(mo.m_ancestor_names)
, m_max_order(mo.m_max_order) , m_max_order(mo.m_max_order)
, m_threshold(mo.m_threshold) , m_threshold(mo.m_threshold)
, m_rank_fixer()
/*{ MSG_DEBUG("new model " << __LINE__ << " with Y(" << m_Y->innerSize() << ',' << m_Y->outerSize() << ')'); }*/ /*{ MSG_DEBUG("new model " << __LINE__ << " with Y(" << m_Y->innerSize() << ',' << m_Y->outerSize() << ')'); }*/
{ {
for (const auto& kv: mo.m_blocks) { for (const auto& kv: mo.m_blocks) {
...@@ -984,8 +988,96 @@ struct model { ...@@ -984,8 +988,96 @@ struct model {
} }
#endif #endif
struct rank_fixer_type {
bool m_enabled = false;
model* m_model = NULL;
typedef std::map<model_block_key, std::vector<int>> col_order_map_type;
col_order_map_type m_col_order, m_ctf;
col_order_map_type::reverse_iterator m_current_block;
std::vector<int>::iterator m_current_col;
rank_fixer_type() : m_enabled(false), m_model(NULL), m_col_order(), m_ctf() {}
std::vector<int> cols_to_fix(const model_block_key& mbk) { return m_ctf[mbk]; }
bool enabled() const { return m_enabled; }
void disable() { m_enabled = false; }
void enable(model& this_model)
{
m_enabled = true;
m_model = &this_model;
m_ctf.clear();
for (const auto& kv: m_model->m_blocks) {
auto& mb = kv.second;
auto mass = mb->data.colwise().sum();
std::vector<int> col_order(mb->cols());
std::iota(col_order.begin(), col_order.end(), 0);
std::sort(col_order.begin(), col_order.end(), [&] (int a, int b) { return mass(a) < mass(b); });
std::swap(m_col_order[kv.first], col_order);
}
MSG_DEBUG("Have col_order: " << m_col_order);
m_current_block = m_col_order.rbegin();
m_current_col = m_current_block->second.begin();
if (m_current_block != m_col_order.rend()) {
m_ctf[m_current_block->first].push_back(*m_current_col);
}
}
bool next(bool validate_previous_column)
{
MSG_DEBUG("[rank_fixer] next (did" << (validate_previous_column ? "" : "n't") << " keep previously constrained column)");
if (!validate_previous_column) {
m_ctf[m_current_block->first].pop_back();
}
if (m_current_block == m_col_order.rend()) {
MSG_DEBUG("[rank_fixer] at end.");
return false;
}
if (m_current_col == m_current_block->second.end()) {
MSG_DEBUG("[rank_fixer] next block.");
++m_current_block;
if (m_current_block != m_col_order.rend()) {
m_current_col = m_current_block->second.begin();
}
} else {
MSG_DEBUG("[rank_fixer] next column.");
++m_current_col;
if (m_current_col == m_current_block->second.end()) {
MSG_DEBUG("[rank_fixer] next block.");
++m_current_block;
if (m_current_block != m_col_order.rend()) {
m_current_col = m_current_block->second.begin();
}
}
}
if (m_current_block != m_col_order.rend()) {
MSG_DEBUG("[rank_fixer] adding " << m_current_block->first << '#' << (*m_current_col));
m_ctf[m_current_block->first].push_back(*m_current_col);
return true;
}
return false;
}
};
constraint_list constraint_list
compute_constraint(const model_block_key& mbk, const model_block_type& mb) compute_constraint(const model_block_key& mbk, const model_block_type& mb)
{
auto ret = compute_constraint_for_block_type(mbk, mb);
// MSG_DEBUG("compute_constraint");
if (m_rank_fixer.enabled()) {
// MSG_DEBUG("constraint count before column zeroing: " << ret.size());
std::vector<int> cols_to_fix = m_rank_fixer.cols_to_fix(mbk);
for (int c: cols_to_fix) {
MatrixXd colzero = MatrixXd::Zero(1, mb.cols());
colzero(0, c) = 1;
ret.emplace_back(std::map<model_block_key, MatrixXd>{{mbk, colzero}});
}
// MSG_DEBUG("constraint count after column zeroing: " << ret.size());
}
return ret;
}
constraint_list
compute_constraint_for_block_type(const model_block_key& mbk, const model_block_type& mb)
{ {
constraint_list ret; constraint_list ret;
if (mbk->type == mbk_CI) { if (mbk->type == mbk_CI) {
...@@ -1282,7 +1374,28 @@ struct model { ...@@ -1282,7 +1374,28 @@ struct model {
return m_solver_type; return m_solver_type;
} }
void compute() void compute()
{
compute_impl();
if (m_with_constraints && rank() < m_X->cols()) {
MSG_WARNING("Insufficient rank in model matrix. Attempting to fix this automatically.");
m_rank_fixer.enable(*this);
bool keep;
// MSG_DEBUG(X());
// MSG_DEBUG("rank: " << rank());
do {
auto old_rank = rank();
m_computed = false;
compute_impl();
// MSG_DEBUG(X());
// MSG_DEBUG("rank: " << rank());
keep = old_rank < rank();
} while (rank() < m_X->cols() && m_rank_fixer.next(keep));
m_rank_fixer.disable();
}
}
void compute_impl()
{ {
// MSG_DEBUG('[' << std::this_thread::get_id() << "] Recomputing model with selection " << keys()); // MSG_DEBUG('[' << std::this_thread::get_id() << "] Recomputing model with selection " << keys());
if (m_computed) { if (m_computed) {
...@@ -1885,6 +1998,7 @@ struct model { ...@@ -1885,6 +1998,7 @@ struct model {
std::map<char, std::string> m_ancestor_names; std::map<char, std::string> m_ancestor_names;
size_t m_max_order; size_t m_max_order;
double m_threshold; double m_threshold;
rank_fixer_type m_rank_fixer;
public: public:
friend friend
......
#ifndef SPELL_QTL_OUTPUT_IMPL_H
#define SPELL_QTL_OUTPUT_IMPL_H
#ifdef SPELL_QTL_STL_OUTPUT_H
namespace output {
template <typename _Stream, typename _Container>
_Stream& output_value_container(_Stream& os, _Container&& container) {
typedef traits<_Container> _Traits;
auto beg = container.begin(), end = container.end();
os << _Traits::prefix;
if (beg != end) {
os << (*beg);
for (++beg; beg != end; ++beg) {
os << _Traits::separator << (*beg);
}
}
return os << _Traits::suffix;
};
}
#endif //SPEL_QTL_STL_OUTPUT_H
#ifdef _SPEL_ERROR_H_
template <typename S> builder<S>& builder<S>::operator << (const char* x) { std::move(_s) << x; return *this; }
template <typename S> template <typename X> builder<S>& builder<S>::operator << (X&& x) { _s << x; return *this; }
template <typename S> builder<S>& builder<S>::operator << (_Stream&(*x)(_Stream&)) { _s << x; return *this; }
template <typename S> builder<S> endpoint<S>::operator << (const char* x) { builder<S> b(out); b._s << x; return b; }
template <typename S> template <typename X> builder<S> endpoint<S>::operator << (X&& x) { builder<S> b(out); b._s << x; return b; }
template <typename S> builder<S> endpoint<S>::operator << (_Stream&(*x)(_Stream&)) { builder<S> b(out); b._s << x; return b; }
#endif
#ifdef _SPELL_FILE_H_
template <typename ARG>
file& operator << (file& f, ARG&& arg)
{
if (f.m_impl) {
f.m_impl << arg;
f.check_state();
}
return f;
}
template <typename ARG>
file& operator >> (file& f, ARG&& arg)
{
if (f.m_impl) {
f.check_state();
f.m_impl >> arg;
}
return f;
}
inline
file& operator << (file& f, std::ostream& (&func) (std::ostream&))
{
if (f.m_impl) {
f.m_impl << func;
f.check_state();
}
return f;
}
#endif
#endif
#include <iostream>
#include <fstream>
#include <sstream>
/*#include <random>*/
/*#include <bitset>*/
#include <array>
#include <cstring>
#include <cmath>
#include <sys/time.h>
#if 0
unsigned int seed()
{
struct timeval tv;
gettimeofday(&tv, NULL);
return tv.tv_sec * 1000000 + tv.tv_usec;
}
/*typedef std::minstd_rand random_t;*/
/*bool toss_coin_slow(double t)*/
/*{*/
/*static std::uniform_real_distribution<> dis(0, 1);*/
/*static random_t r(seed());*/
/*return dis(r) < t;*/
/*}*/
/* The state must be seeded so that it is not everywhere zero. */
uint64_t s[2] = {seed64(), seed64()};
inline
uint64_t xorshift128plus(void) {
uint64_t x = s[0];
uint64_t const y = s[1];
s[0] = y;
x ^= x << 23; // a
x ^= x >> 17; // b
x ^= y ^ (y >> 26); // c
s[1] = x;
return x + y;
}
bool toss_coin_less_slow(double t)
{
uint64_t ut = (uint64_t) -1;
ut *= t;
return xorshift128plus() < ut;
}
inline
bool toss_coin(double t)
{
static uint64_t rnd = seed64();
uint64_t ut = (uint64_t) -1;
ut *= t;
rnd = 2862933555777941757ULL * rnd + 3037000493ULL; /* http://nuclear.llnl.gov/CNP/rng/rngman/node4.html */
return rnd <= ut;
}
#endif
inline
uint64_t seed64()
{
struct timeval tv;
gettimeofday(&tv, NULL);
uint64_t ret = tv.tv_sec;
ret <<= 32;
ret += tv.tv_usec;
return ret;
}
inline
bool toss_coin_fast(uint64_t ut)
{
static uint64_t rnd = seed64();
rnd = 2862933555777941757ULL * rnd + 3037000493ULL; /* http://nuclear.llnl.gov/CNP/rng/rngman/node4.html */
return rnd < ut;
}
#ifndef N_MARK
#error "You MUST define N_MARK at compile-time using -DN_MARK=<number of markers between M1 and M2>"
#endif
#ifndef N_GEN
#error "You MUST define N_GEN at compile-time using -DN_GEN=<number of INTERMEDIARY generations (n-1 for a Fn result)>"
#endif
/*typedef std::array<bool, (N_MARK + 3) * 2> chromosome_t;*/
/*typedef std::array<bool, N_MARK + 3> gamete_t;*/
/*typedef bool chromosome_t[(N_MARK + 3) * 2];*/
/*typedef bool gamete_t[N_MARK + 3];*/
typedef bool chromosome_t[(N_MARK + 3) * 2];
typedef bool gamete_t[N_MARK + 3];
inline
size_t get_mark(const chromosome_t& chr, size_t mark)
{
size_t base = mark << 1;
size_t ret = (size_t) chr[base] + (chr[base + 1] << 1);
/*std::cout << "mark_value=" << ret << std::endl;*/
return ret;
}
static char mrk_obs[3] = {'A', 'H', 'B'};
inline
size_t get_obs_i(const chromosome_t& chr, size_t mark)
{
/*static size_t mrk_obs_i[4] = {0, 1, 1, 2};*/
/*return mrk_obs_i[get_mark(chr, mark)];*/
size_t base = mark << 1;
return (size_t) chr[base] + chr[base + 1];
}
inline
char get_obs(const chromosome_t& chr, size_t mark)
{
return mrk_obs[get_obs_i(chr, mark)];
}
/*double gen_map[N_MARK + 2];*/
uint64_t gen_map[N_MARK + 2];
#define UINT64_HALF ((((uint64_t) -1) >> 1) + 1ULL)
void init_gen_map(double dist_M2, double dist_M3)
{
double delta = dist_M2 / (N_MARK + 1);
for (size_t i = 0; i <= N_MARK; ++i) {
gen_map[i] = UINT64_MAX * delta;
}
gen_map[N_MARK + 1] = UINT64_MAX * dist_M3;
}
template <int I>
struct unroll_gen_gamete {
void operator () (gamete_t& ret)
{
ret[I] = ret[I - 1] ^ toss_coin_fast(gen_map[I - 1]);
unroll_gen_gamete<I + 1>()(ret);
}
};
template <>
struct unroll_gen_gamete<0> {
void operator () (gamete_t& ret)
{
ret[0] = toss_coin_fast(UINT64_HALF);
unroll_gen_gamete<1>()(ret);
}
};
template <>
struct unroll_gen_gamete<N_MARK + 3> {
void operator () (gamete_t& ret) {}
};
inline
void gen_gamete(gamete_t& ret)
{
#if 0
bool strand = toss_coin_fast(UINT64_HALF); /* .5 */
ret[0] = strand;
for (size_t i = 0; i < (N_MARK + 2); ++i) {
strand ^= toss_coin_fast(gen_map[i]);
ret[i + 1] = strand;
}
#else
unroll_gen_gamete<0>()(ret);
#endif
/*std::cout << "GAMETE " << ret << std::endl;*/
}
#if 0
gamete_t gen_haplo(const chromosome_t& chr, const gamete_t& gam)
{
gamete_t ret;
size_t base = 0;
for (size_t i = 0; i < N_MARK + 3; ++i, base += 2) {
ret[i] = chr[base + gam[i]];
}
/*std::cout << "HAPLO " << gam << " X " << chr << " => " << ret << std::endl;*/
return ret;
}
#elif 0
void gen_haplo(const chromosome_t& chr, const gamete_t& gam, gamete_t& ret)
{
for (size_t i = 0; i < N_MARK + 3; ++i) {
ret[i] = chr[(i << 1) + gam[i]];
}
/*std::cout << "HAPLO " << gam << " X " << chr << " => " << ret << std::endl;*/
}
#else
template <int I>
struct unroll_gen_haplo {
void operator () (const chromosome_t& chr, const gamete_t& gam, gamete_t& ret)
{
ret[I] = chr[(I << 1) + gam[I]];
unroll_gen_haplo<I + 1>()(chr, gam, ret);
}
};
template <>
struct unroll_gen_haplo<0> {
void operator () (const chromosome_t& chr, const gamete_t& gam, gamete_t& ret)
{
ret[0] = chr[gam[0]];
unroll_gen_haplo<1>()(chr, gam, ret);
}
};
template <>
struct unroll_gen_haplo<N_MARK + 3> {
void operator () (const chromosome_t& chr, const gamete_t& gam, gamete_t& ret) {}
};
void gen_haplo(const chromosome_t& chr, const gamete_t& gam, gamete_t& ret)
{
unroll_gen_haplo<0>()(chr, gam, ret);
}
#endif
#if 0
void do_cross(const gamete_t& h1, const gamete_t& h2, chromosome_t& ret)
{
size_t base = 0;
for (size_t i = 0; i < N_MARK + 3; ++i, base += 2) {
ret[base] = h1[i];
ret[base + 1] = h2[i];
}
}
void do_cross_2(const chromosome_t& c1, const gamete_t& g1, const chromosome_t& c2, const gamete_t& g2, chromosome_t& ret)
{
size_t base = 0;
for (size_t i = 0; i < N_MARK + 3; ++i, base += 2) {
ret[base] = c1[base + g1[i]];
ret[base + 1] = c2[base + g2[i]];
}
}
#elif 0
template <int I>
struct unroll_do_cross {
void operator () (const chromosome_t& c1, const gamete_t& g1, const chromosome_t& c2, const gamete_t& g2, chromosome_t& ret)
{
ret[I << 1] = c1[(I << 1) + g1[I]];
ret[(I << 1) + 1] = c2[(I << 1) + g2[I]];
unroll_do_cross<I + 1>()(c1, g1, c2, g2, ret);
}
};
template <>
struct unroll_do_cross<0> {
void operator () (const chromosome_t& c1, const gamete_t& g1, const chromosome_t& c2, const gamete_t& g2, chromosome_t& ret)
{
ret[0] = c1[g1[0]];
ret[1] = c2[g2[0]];
unroll_do_cross<1>()(c1, g1, c2, g2, ret);
}
};
template <>
struct unroll_do_cross<N_MARK + 3> {
void operator () (const chromosome_t& c1, const gamete_t& g1, const chromosome_t& c2, const gamete_t& g2, chromosome_t& ret) {}
};
inline
void do_cross_2(const chromosome_t& c1, const gamete_t& g1, const chromosome_t& c2, const gamete_t& g2, chromosome_t& ret)
{
unroll_do_cross<0>()(c1, g1, c2, g2, ret);
}
#else
template <int I>
struct unroll_do_cross {
void operator () (const chromosome_t& c1, register bool& strand1, const chromosome_t& c2, register bool& strand2, chromosome_t& ret)
{
strand1 ^= toss_coin_fast(gen_map[I - 1]);
strand2 ^= toss_coin_fast(gen_map[I - 1]);
ret[I << 1] = c1[(I << 1) + strand1];
ret[(I << 1) + 1] = c2[(I << 1) + strand2];
unroll_do_cross<I + 1>()(c1, strand1, c2, strand2, ret);
}
};
template <>
struct unroll_do_cross<0> {
void operator () (const chromosome_t& c1, const chromosome_t& c2, chromosome_t& ret)
{
register bool strand1 = toss_coin_fast(UINT64_HALF);
register bool strand2 = toss_coin_fast(UINT64_HALF);
ret[0] = c1[strand1];
ret[1] = c2[strand2];
unroll_do_cross<1>()(c1, strand1, c2, strand2, ret);
}
};
template <>
struct unroll_do_cross<N_MARK + 3> {
void operator () (const chromosome_t& c1, register bool& strand1, const chromosome_t& c2, register bool& strand2, chromosome_t& ret) {}
};
inline
void do_cross_2(const chromosome_t& c1, const chromosome_t& c2, chromosome_t& ret)
{
unroll_do_cross<0>()(c1, c2, ret);
}
#endif
inline
void crossing(const chromosome_t& c1, const chromosome_t& c2, chromosome_t& ret)
{
static gamete_t g1, g2, h1, h2;
/*gen_gamete(g1);*/
/*gen_gamete(g2);*/