Commit 862fbcf4 authored by Damien Leroux's avatar Damien Leroux
Browse files

Big rework. Joint probabilities and reduction to a subset of loci implemented.

parent 64a00d69
<breeding-design>
<generation name="A">
<ancestor haplotype="aa"/>
<ancestor haplotype="a"/>
</generation>
<generation name="B">
<ancestor haplotype="bb"/>
<ancestor haplotype="b"/>
</generation>
<generation name="F1">
<cross p1="A" p2="B"/>
</generation>
<generation name="BC">
<cross p2="F1" p1="A"/>
<cross p1="F1" p2="A"/>
</generation>
</breeding-design>
<format-specification>
<marker-observations generation="F2" format="mapmaker">
<marker-observations name="ABHCD">
<observation symbol="A">
<genotype from-mother="a" from-father="a"/>
</observation>
......@@ -27,17 +27,4 @@
<genotype from-mother="b" from-father="b"/>
</observation>
</marker-observations>
<copy generation="F3" from="F2"/>
<copy generation="F4" from="F2"/>
<copy generation="F5" from="F2"/>
<copy generation="F6" from="F2"/>
<copy generation="F7" from="F2"/>
<marker-observations generation="BC" format="mapmaker">
<observation symbol="A">
<genotype from-mother="a" from-father="a"/>
</observation>
<observation symbol="H">
<genotype from-mother="a" from-father="b"/>
</observation>
</marker-observations>
</format-specification>
......@@ -14,6 +14,8 @@
/*#define FUZZY_COMPARE*/
#define COMPARE_NORM
#define DEBUG_ALGEBRAIC_GENOTYPE
namespace impl {
template <typename... X> struct multi_compare;
......@@ -371,6 +373,9 @@ struct algebraic_genotype { /* must fit in a QWORD */
enum class Type: char {Null, Genotype, Gamete, DoublingGamete, Haplotype, Cross, Polynom, RilPolynom} type;
/*rs_polynom poly;*/
fast_polynom poly;
#ifdef DEBUG_ALGEBRAIC_GENOTYPE
/*std::pair<const algebraic_genotype*, const algebraic_genotype*> history;*/
#endif
/*
* Genotype * Genotype : boom
* Genotype * Gamete : Haplotype
......@@ -388,6 +393,9 @@ struct algebraic_genotype { /* must fit in a QWORD */
*/
algebraic_genotype()
#ifdef DEBUG_ALGEBRAIC_GENOTYPE
/*: history(NULL, NULL)*/
#endif
{
genotype = {{0, 0}, {0, 0}};
type = Type::Null;
......
......@@ -161,7 +161,7 @@ operator & (cache_output& c, Eigen::Matrix<_Scalar, _Rows, _Cols, _Options, _Max
/*MSG_DEBUG("outer size " << sz);*/
c & sz;
_Scalar* data = mat.data();
for (size_t i = 0; i < mat.size(); ++i) {
for (int i = 0; i < mat.size(); ++i) {
c & data[i];
}
#if 0
......@@ -185,7 +185,7 @@ operator & (cache_input& c, Eigen::Matrix<_Scalar, _Rows, _Cols, _Options, _MaxR
/*MSG_DEBUG(__FILE__ << ':' << __LINE__ << '\t' << __func__);*/
mat.resize(sz1, sz2);
_Scalar* data = mat.data();
for (size_t i = 0; i < mat.size(); ++i) {
for (int i = 0; i < mat.size(); ++i) {
c & data[i];
}
#if 0
......
......@@ -362,7 +362,7 @@ template <typename _Scalar, int _Rows, int _Cols, int _Options, int _MaxRows, in
md5_digest& operator << (md5_digest& md5, const Eigen::Matrix<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols>& mat)
{
const _Scalar* data = mat.data();
for (size_t i = 0; i < mat.size(); ++i) {
for (int i = 0; i < mat.size(); ++i) {
md5 << data[i];
}
return md5;
......
......@@ -659,6 +659,45 @@ template <typename T>
};
template <typename _Coll>
struct as_collection {
typedef typename _Coll::value_type T;
typedef typename _Coll::const_iterator ci_type;
ci_type m_begin, m_end;
as_collection(const _Coll& c)
: m_begin(c.begin()), m_end(c.end())
{}
struct iterator {
ci_type ci;
iterator(const ci_type& it)
: ci(it)
{}
iterator& operator ++ ()
{
++ci;
return *this;
}
bool operator == (const iterator& i) const
{
return ci == i.ci;
}
bool operator != (const iterator& i) const
{
return ci != i.ci;
}
value<T> operator * () const { return value<T>{*ci}; }
};
iterator begin() const { return {m_begin}; }
iterator end() const { return {m_end}; }
};
template <typename _Coll>
as_collection<_Coll> values_of(const _Coll& c) { return {c}; }
#if 0
template <typename ValueType, typename... AllArgs> struct registry_impl_type;
template <typename ValueType>
......@@ -680,7 +719,7 @@ struct computation_registry {
std::unordered_map<Arg0, value_type> m_registry;
value_type& get_(const Arg0& arg)
{
MSG_DEBUG("size=" << size() << " finding " << arg << "... " << m_registry[arg]);
/*MSG_DEBUG("size=" << size() << " finding " << arg << "... " << m_registry[arg]);*/
return m_registry[arg];
}
......@@ -692,7 +731,7 @@ struct computation_registry {
std::unordered_map<Arg0, registry_impl<Args...>> m_registry;
value_type& get_(const Arg0& car, const Args&... cdr)
{
MSG_DEBUG("size=" << size() << " finding " << car << "...");
/*MSG_DEBUG("size=" << size() << " finding " << car << "...");*/
return m_registry[car].get_(cdr...);
}
......@@ -929,6 +968,25 @@ struct make_coll_impl<_Policy, Ret(FuncArgs...), tuple<collection<T>, ArgsRemain
}
};
template <CachingPolicy _Policy, typename Ret, typename... FuncArgs, typename T, typename... ArgsRemaining, typename... PreviousArgs>
struct make_coll_impl<_Policy, Ret(FuncArgs...), tuple<as_collection<T>, ArgsRemaining...>, tuple<PreviousArgs...>> {
void operator () (collection<Ret>& coll,
Ret (&f) (FuncArgs...),
const as_collection<T>& car,
const ArgsRemaining&... cdr,
const PreviousArgs&... pargs)
{
typedef value<typename T::value_type> vtype;
for (const vtype& v: car) {
make_coll_impl<
_Policy,
Ret(FuncArgs...),
tuple<ArgsRemaining...>,
tuple<PreviousArgs..., vtype>>() (coll, f, cdr..., pargs..., v);
}
}
};
template <CachingPolicy _Policy = Oneshot, typename Ret, typename... Args, typename... CollArgs>
......
......@@ -13,6 +13,7 @@ typedef const qtl_chromosome* qtl_chromosome_value;
typedef std::vector<double> selected_qtls_on_chromosome_type;
typedef std::map<char, VectorXb> observation_vectors_type;
typedef labelled_matrix<MatrixXd, allele_pair, double> locus_probabilities_type;
typedef labelled_matrix<MatrixXd, allele_pair, int> geno_prob_type;
typedef labelled_matrix<MatrixXd, std::vector<char>, allele_pair> state_to_parental_origin_matrix_type;
typedef labelled_matrix<MatrixXd, std::vector<char>, double> parental_origin_type;
typedef labelled_matrix<MatrixXd, int, std::vector<char>> parental_origin_per_locus_type;
......@@ -58,6 +59,9 @@ marker_observation_spec
marker_observation_specifications(generation_value);
labelled_matrix<MatrixXd, std::vector<char>, allele_pair>
compute_state_to_parental_origin(const context_key& ck, const locus_key& lk);
/*observation_vectors_type*/
/*observation_vectors(const generation_rs*);*/
......@@ -85,13 +89,46 @@ struct pop_mgo_data {
}
};
struct stpom_data {
std::vector<std::vector<char>> row_labels;
std::vector<allele_pair> col_labels;
MatrixXb haplo1;
MatrixXb haplo2;
friend
std::ostream&
operator << (std::ostream& os, const stpom_data& sd)
{
os << "Row labels: " << sd.row_labels << std::endl;
os << "Col labels: " << sd.col_labels << std::endl;
os << "Haplo1: " << std::endl << sd.haplo1 << std::endl;
os << "Haplo2: " << std::endl << sd.haplo2 << std::endl;
return os;
}
};
template <typename CACHE_DIRECTION>
CACHE_DIRECTION& operator & (CACHE_DIRECTION& cd, stpom_data& sd)
{
return cd & sd.row_labels & sd.col_labels & sd.haplo1 & sd.haplo2;
}
static inline md5_digest& operator << (md5_digest& md5, const stpom_data& sd)
{
return md5 << sd.haplo1 << sd.haplo2;
}
static inline md5_digest& operator << (md5_digest& md5, const pop_mgo_data&)
{
return md5;
}
/*multi_generation_observations*/
/*population_marker_obs(population_value, chromosome_value, int, const pop_mgo_data*);*/
multi_generation_observations
population_marker_obs(population_value, chromosome_value, int, const pop_mgo_data*);
population_marker_obs(const context_key&, int);
collection<std::string>
all_traits();
......
......@@ -44,10 +44,13 @@ struct bloc_builder {
iterator&
operator += (const MatrixXd& block)
{
/*MSG_DEBUG("output " << output);*/
/*MSG_DEBUG(output->innerSize() << ',' << output->outerSize());*/
/*MSG_DEBUG("first_row " << ei->first_row);*/
/*MSG_DEBUG("block(" << block.innerSize() << ',' << block.outerSize() << ')');*/
auto slice = output->middleRows(ei->first_row, block.innerSize());
/*MSG_DEBUG("slice(" << slice.innerSize() << ',' << slice.outerSize() << ')');*/
/*MSG_DEBUG("block(" << block.innerSize() << ',' << block.outerSize() << ')');*/
for (size_t i = 0; i < block.outerSize(); ++i) {
for (int i = 0; i < block.outerSize(); ++i) {
slice.col(ei->columns[i]) = block.col(i);
}
++ei;
......@@ -193,18 +196,19 @@ init_disconnected_bloc_builder(
collection<model_block_type>
compute_parental_origins_multipop(const collection<population_value>& all_pops,
const value<qtl_chromosome_value>& qtl_chr,
const value<chromosome_value>& chr,
const value<locus_key>& lk,
const value<std::vector<double>>& loci);
collection<std::vector<char>>
contrast_groups_connected();
VectorXd
f_test_with_permutations(const std::string& trait, int n_permut, qtl_chromosome_value qtl_chr);
f_test_with_permutations(const std::string& trait, int n_permut, chromosome_value chr, const locus_key& lk);
double
qtl_threshold(const std::string& trait_name, qtl_chromosome_value qtl_chr, double quantile, int n_permut);
qtl_threshold(const std::string& trait_name, chromosome_value chr, const locus_key& lk, double quantile, int n_permut);
double
......
......@@ -9,10 +9,14 @@
#include "basic_data.h"
#endif
/*locus_probabilities_type*/
/*locus_probabilities(qtl_chromosome_value, population_value,*/
/*const multi_generation_observations&,*/
/*const selected_qtls_on_chromosome_type&);*/
locus_probabilities_type
locus_probabilities(qtl_chromosome_value, population_value,
const multi_generation_observations&,
const selected_qtls_on_chromosome_type&);
locus_probabilities(const context_key&, const locus_key&,
const multi_generation_observations&);
state_to_parental_origin_matrix_type
state_to_parental_origin_matrix(generation_value, int);
......@@ -20,14 +24,18 @@ state_to_parental_origin_matrix(generation_value, int);
parental_origin_type
parental_origin(const locus_probabilities_type&, generation_value, const selected_qtls_on_chromosome_type&);
parental_origin_per_locus_type
joint_parental_origin_at_locus(const context_key& ck, const locus_key& lk, double locus);
collection<parental_origin_per_locus_type>
parental_origin_per_locus(const collection<parental_origin_type>&);
/*
collection<parental_origin_per_locus_type>
compute_parental_origins(const value<population_value>& pop,
const value<chromosome_value>& chr,
const value<std::vector<double>>& loci);
*/
#endif
......@@ -7,23 +7,85 @@ struct chromosome {
std::string name;
std::vector<std::string> marker_name;
std::vector<double> marker_locus;
std::vector<int> haplo_sizes;
chromosome() : name(), marker_name(), marker_locus(), haplo_sizes() {}
chromosome() : name(), marker_name(), marker_locus() {}
chromosome(const chromosome& c)
: name(c.name), marker_name(c.marker_name)
, marker_locus(c.marker_locus)
{}
chromosome(const std::string& n, const std::vector<std::string>& mn, const std::vector<double>& ml)
: name(n), marker_name(mn), marker_locus(ml)
, haplo_sizes(c.haplo_sizes)
{}
chromosome(const std::string& n,
const std::vector<std::string>& mn,
const std::vector<double>& ml)
: name(n), marker_name(mn), marker_locus(ml), haplo_sizes()
{ compute_haplo_sizes(); }
void compute_haplo_sizes()
{
double prev = -1.;
int hs = 0;
haplo_sizes.reserve(marker_locus.size());
for (double l: marker_locus) {
++hs;
if (l != prev) {
haplo_sizes.push_back(hs);
hs = 0;
}
prev = l;
}
if (hs) {
haplo_sizes.push_back(hs);
}
}
chromosome& operator = (const chromosome& c)
{
name = c.name;
marker_name = c.marker_name;
marker_locus = c.marker_locus;
haplo_sizes = c.haplo_sizes;
return *this;
}
struct haplotype_iterator {
typedef std::pair<size_t, size_t> value_type;
std::vector<int>::const_iterator hs_i, hs_j;
size_t m_locus_index;
haplotype_iterator(const chromosome& c, bool at_end)
: hs_i(at_end ? c.haplo_sizes.end() : c.haplo_sizes.begin())
, hs_j(c.haplo_sizes.end())
, m_locus_index(at_end ? c.marker_locus.size() : 0)
{}
value_type operator * () const
{
return {m_locus_index, m_locus_index + *hs_i};
}
haplotype_iterator& operator ++ ()
{
m_locus_index += *hs_i;
++hs_i;
return *this;
}
bool operator == (const haplotype_iterator& hi) const
{
return hs_i == hi.hs_i;
}
bool operator != (const haplotype_iterator& hi) const
{
return !(*this == hi);
}
};
haplotype_iterator begin() const { return {*this, false}; }
haplotype_iterator end() const { return {*this, true}; }
};
static inline
......
......@@ -307,6 +307,8 @@ struct geno_prob_computer {
left[0] = kernel = LV->col(0);
lstring[0] = "LV0.";
/*MSG_DEBUG("0 " << lstring[0]);*/
/*MSG_DEBUG(left[0]);*/
for(++l; l <= r; ++l) {
kernel = LV->col(l).asDiagonal() * TR[l - 1] * kernel;
double s = kernel.sum();
......@@ -318,6 +320,8 @@ struct geno_prob_computer {
std::stringstream ss;
ss << "LV" << l << ".TR" << (l-1) << '.' << lstring[l-1];
lstring[l] = ss.str();
/*MSG_DEBUG(l << " " << lstring[l]);*/
/*MSG_DEBUG(left[l]);*/
}
right[r+1] = VectorXd::Ones(this_gen->main_process().innerSize()) / this_gen->main_process().innerSize();
......@@ -357,11 +361,36 @@ struct geno_prob_computer {
VectorXd fast_compute_at(double loc, size_t l, size_t r, int order)
{
#if 0
MSG_DEBUG("l=" << l);
MSG_DEBUG("r=" << r);
MSG_DEBUG("locus_l=" << (*locus)[l]);
MSG_DEBUG("locus_r=" << (*locus)[r]);
MSG_DEBUG("locus=" << loc);
#endif
MatrixXd& TRL = compute_tr(loc - (*locus)[l], order);
MatrixXd& TRR = compute_tr((*locus)[r] - loc, order);
#if 0
MSG_DEBUG(MATRIX_SIZE(TRL));
MSG_DEBUG(TRL);
MSG_DEBUG(MATRIX_SIZE(TRR));
MSG_DEBUG(TRR);
MSG_DEBUG(MATRIX_SIZE(left[l]));
MSG_DEBUG(left[l]);
MSG_DEBUG(MATRIX_SIZE(right[l]));
MSG_DEBUG(right[l]);
#endif
auto L = (TRL * left[l]).array();
auto R = (TRR * right[l]).array();
#if 0
/*std::cout << loc << std::endl << (L * R).matrix().transpose() << std::endl;*/
MSG_DEBUG(MATRIX_SIZE(L));
MSG_DEBUG(L);
MSG_DEBUG(MATRIX_SIZE(R));
MSG_DEBUG(R);
MSG_DEBUG(MATRIX_SIZE(this_gen->get_redux()));
MSG_DEBUG(this_gen->get_redux());
#endif
VectorXd vv = this_gen->get_redux() * (L * R).matrix();
double s = vv.sum();
if (s == 0) {
......@@ -400,9 +429,32 @@ struct geno_prob_computer {
size_t l = 0, last = steps.size() - 1, r = !!(LV->outerSize() - 1);
for (size_t i = 0; i < steps.size(); ++i) {
ret.data.col(i) = fast_compute_at(steps[i], l, r, order);
if (steps[i] >= (*locus)[r]) {
if ((ret.data.col(i).array() < 0).any()) {
MSG_DEBUG("Bad value at column #" << i << std::endl << ret.data.col(i).transpose());
#if 0
MSG_DEBUG(*this_gen);
for (const auto& m: TR) {
MSG_DEBUG(m);
}
for (const auto& kv: left) {
MSG_DEBUG("left " << kv.first);
MSG_DEBUG(kv.second);
}
for (const auto& kv: right) {
MSG_DEBUG("right " << kv.first);
MSG_DEBUG(kv.second);
}
#endif
throw 0;
}
while (steps[i] >= (*locus)[r]) {
++l;
r += i < last;
if (i < last) {
++r;
} else {
break;
}
/*r += i < last;*/
}
}
/*MSG_DEBUG("STATE PROBABILITIES" << std::endl << ret);*/
......
#ifndef _SPEL_QTL_CHROMOSOME_H_
#define _SPEL_QTL_CHROMOSOME_H_
#include <Eigen/Core>
#include <Eigen/SparseCore>
#include <unsupported/Eigen/MatrixFunctions>
#include <unsupported/Eigen/KroneckerProduct>
/*using namespace Eigen;*/
struct locus_key_struc;
typedef std::shared_ptr<const locus_key_struc> locus_key;
struct locus_key_struc {
struct no_locus_exception {
const char* what() const { return "Bad locus"; }
};
static constexpr const double no_locus = -1;
locus_key parent;
double locus;
locus_key_struc() : parent(NULL), locus(no_locus) {}
locus_key_struc(const locus_key_struc* p, double l) : parent(p), locus(l) {}
locus_key_struc(const locus_key& p, double l) : parent(p), locus(l) {}
locus_key_struc(const std::vector<double>& loci)
{
if (!loci.size()) {
parent = NULL;
locus = no_locus;
} else {
std::vector<double> sorted = loci;
std::sort(sorted.begin(), sorted.end());
locus_key p = NULL;
double last = sorted.back();
sorted.pop_back();
for (double l: sorted) {
p = locus_key(new locus_key_struc(p, l));
}
parent = p;
locus = last;
}
}
int depth() const
{
if (!this || locus == no_locus) {
return 0;
}
if (parent) {
return 1 + parent->depth();
}
return 1;
}
bool has(double loc) const
{
return loc == locus
|| (parent && parent->has(loc));
}
/*locus_key_struc* operator + (const locus_key& k) = delete;*/
/*locus_key_struc* operator - (const locus_key& k) = delete;*/
friend
locus_key operator + (const locus_key& k, double l)
{
if (!k || (k->parent == NULL && k->locus == no_locus)) {
return locus_key(new locus_key_struc(NULL, l));
} else if (l == k->locus) {
return k;
} else if (l > k->locus) {
return locus_key(new locus_key_struc(k, l));
} else {
locus_key k2 = k->parent + l;
return locus_key(new locus_key_struc(k2, k->locus));
}
}
friend
locus_key operator - (const locus_key& k, double l)
{
if (!k || (k->parent == NULL && k->locus == no_locus)) {
return k;
} else if (l == k->locus) {
return k->parent;
} else {
locus_key k2 = k->parent - l;
return locus_key(new locus_key_struc(k2, k->locus));
}
}
friend
std::ostream& operator << (std::ostream& os, const locus_key_struc k)