Commit 82a8cd1e authored by Damien Leroux's avatar Damien Leroux
Browse files

Working implementation of using joint meioses wherever applicable.

parent 198b00c0
......@@ -712,31 +712,132 @@ struct rw_base : public rw_any<rw_base> {
/** **/
struct gamete_LV_type {
bool is_single;
typedef union _gamete_lv_value_type {
Eigen::Vector2d unary;
Eigen::Vector4d binary;
template <typename STREAM_TYPE>
void
file_io(STREAM_TYPE& fs)
{
rw_base rw;
rw(fs, binary);
}
_gamete_lv_value_type() : binary() {}
_gamete_lv_value_type(const _gamete_lv_value_type& other) : binary(other.binary) {}
_gamete_lv_value_type(const Eigen::Vector2d& v) { unary = v; }
_gamete_lv_value_type(const Eigen::Vector4d& v) { binary = v; }
friend std::ostream& operator << (std::ostream& os, const _gamete_lv_value_type& glv) { return os << glv.binary.transpose(); }
} gamete_lv_value_type;
std::map<std::string, gamete_lv_value_type> lv;
friend std::ostream& operator << (std::ostream& os, const gamete_LV_type& glv) { return os << (glv.is_single ? "Unary{" : "Binary{") << glv.lv << '}'; }
void get(const std::string& key, Eigen::Vector2d& value) const
{
auto it = lv.find(key);
if (it == lv.end()) {
value = Eigen::Vector2d::Zero();
} else {
value = it->second.unary;
}
}
void get(const std::string& key, Eigen::Vector4d& value) const
{
auto it = lv.find(key);
if (it == lv.end()) {
value = Eigen::Vector4d::Zero();
} else {
value = it->second.binary;
}
}
void add_normalized(const std::string& key, const Eigen::VectorXd& value) { if (value.size() == 2) { lv[key].unary = value; } else { lv[key].binary = value; } }
template <typename T>
void add(const std::string& key, const T& value) { add_normalized(key, value); }
template <typename STREAM_TYPE>
void
file_io(STREAM_TYPE& fs)
{
rw_base rw;
if (rw.fourcc(fs, "GLVT")) { return; }
rw(fs, is_single);
rw(fs, lv);
}
};
struct gamete_LV_database {
/* IND.ped / MARK => 1 or 2 gametes (.second will be empty when cross type is DH) */
std::map<int, std::map<std::string, Eigen::Vector2d>> data;
std::map<double, Eigen::Matrix2d> cache;
std::map<int, gamete_LV_type> data;
std::map<double, Eigen::Matrix2d> cache2;
std::map<double, Eigen::Matrix4d> cache4;
std::vector<Eigen::Matrix2d> tr2;
std::vector<Eigen::Matrix4d> tr4;
gamete_LV_database() : data() {}
gamete_LV_database() : data(), cache2(), cache4() {}
Eigen::Matrix2d
get_TR(double dist)
get_TR_unary(double dist)
{
auto it = cache.find(dist);
if (it == cache.end()) {
double s = .5 + exp(-dist * .02) * .5;
auto it = cache2.find(dist);
if (it == cache2.end()) {
double s = .5 + exp(-2. * dist) * .5;
double r = 1. - s;
Eigen::Matrix2d ret;
ret << s, r, r, s;
cache[dist] = ret;
cache2[dist] = ret;
return ret;
}
return it->second;
}
Eigen::Matrix4d
get_TR_binary(double dist)
{
auto it = cache4.find(dist);
if (it == cache4.end()) {
double s = .5 + exp(-2. * dist) * .5;
double r = 1. - s;
double rs = r * s;
double r2 = r * r;
double s2 = s * s;
Eigen::Matrix4d ret;
ret <<
s2, rs, rs, r2,
rs, s2, r2, rs,
rs, r2, s2, rs,
r2, rs, rs, s2;
cache4[dist] = ret;
return ret;
}
return it->second;
}
void get_TR(double dist, Eigen::Matrix2d& result) { result = get_TR_unary(dist); }
void get_TR(double dist, Eigen::Matrix4d& result) { result = get_TR_binary(dist); }
void
add_gametes(const std::string& mark, int ind, const VectorXd& lv, bool dh)
{
auto it = data.find(ind);
if (it == data.end()) {
data[ind].is_single = dh;
} else {
if (dh != it->second.is_single) {
MSG_ERROR("Individual #" << ind << " was first declared as " << (it->second.is_single ? "intercross" : "doubled haploid") << " and is now requested to be the other kind.", "");
}
}
data[ind].add(mark, (lv.array() > 0).select(VectorXd::Ones(lv.size()), VectorXd::Zero(lv.size())).matrix());
// data[ind].add(mark, lv * (lv.array() > 0).cast<double>().sum());
// data[ind].add(mark, lv);
#if 0
Eigen::Vector2d g1, g2;
MSG_DEBUG("add_gametes mark=" << mark << " ind=" << ind << " lv=" << lv.transpose() << " dh=" << std::boolalpha << dh);
if (!dh) {
......@@ -750,6 +851,60 @@ struct gamete_LV_database {
}
data[ind][mark] = g1;
data[-ind][mark] = g2;
#endif
}
void init_tr(bool is_single, const std::vector<double>& distances)
{
if (is_single) {
if (tr2.size()) {
return;
}
tr2.reserve(distances.size());
for (double d: distances) {
tr2.emplace_back();
get_TR(d, tr2.back());
}
} else {
if (tr4.size()) {
return;
}
tr4.reserve(distances.size());
for (double d: distances) {
tr4.emplace_back();
get_TR(d, tr4.back());
}
}
}
template <typename V, typename M>
double gamete_likelihood_impl(std::vector<M>& TR, V& accum, const std::vector<std::string>& marker_names, const gamete_LV_type& gam)
{
auto name_i = marker_names.begin();
V tmp;
gam.get(*name_i++, tmp);
// accum = tmp / tmp.sum();
// // accum = tmp / (tmp.array() > 0).template cast<double>().sum();
accum = tmp * .25;
for (const auto& t: TR) {
gam.get(*name_i++, tmp);
MSG_DEBUG("accum = " << accum.transpose() << " LV = " << tmp.transpose());
accum = accum.transpose() * t * tmp.asDiagonal();
}
MSG_DEBUG("final accum = " << accum.transpose());
return accum.sum();
}
double gamete_likelihood(int ind, const gamete_LV_type& gam, const std::vector<std::string>& marker_names, const std::vector<double>& distances)
{
init_tr(gam.is_single, distances);
if (gam.is_single) {
Eigen::Vector2d accum;
return gamete_likelihood_impl(tr2, accum, marker_names, gam);
} else {
Eigen::Vector4d accum;
return gamete_likelihood_impl(tr4, accum, marker_names, gam);
}
}
double
......@@ -759,6 +914,12 @@ struct gamete_LV_database {
MSG_ERROR("Need to have exactly one more marker name than the number of inter-marker distances", "");
return NAN;
}
double ret = 0;
for (const auto& kv: data) {
ret += log(gamete_likelihood(kv.first, kv.second, marker_names, distances));
}
return ret;
#if 0
// implicitly, we just garanteed there is at least one marker name in the list
Eigen::Vector2d all_accum;
all_accum(0) = all_accum(1) = 0;
......@@ -771,33 +932,36 @@ struct gamete_LV_database {
MSG_DEBUG("DATA" << std::endl << data);
for (auto& kv: data) {
// int ind = kv.first;
MSG_DEBUG("ON GAMETES #" << kv.first);
MSG_DEBUG("ON GAMETE #" << kv.first);
auto& LV = kv.second;
auto name_i = marker_names.begin();
auto& gam0 = LV[*name_i++];
Eigen::Vector2d accum = gam0;
//accum << 1, 1;
std::vector<double> norms;
double norm_accum = 0;
norms.reserve(distances.size());
norms.push_back(1.);
norm_accum -= log(norms.back());
// accum << .5, .5;
// std::vector<double> norms;
// double norm_accum = 0;
// norms.reserve(distances.size());
// norms.push_back(1.);
// norm_accum -= log(norms.back());
for (size_t i = 0; i < TR.size(); ++i, ++name_i) {
MSG_DEBUG("accum" << std::endl << accum);
MSG_DEBUG("TR[" << i << ']' << std::endl << TR[i]);
MSG_DEBUG("LV[" << (*name_i) << "] = " << LV[*name_i].transpose());
// MSG_DEBUG("accum = " << accum.transpose());
// MSG_DEBUG("TR[" << i << ']' << std::endl << TR[i]);
MSG_DEBUG("accum = " << accum.transpose() << " LV[" << (*name_i) << "] = " << LV[*name_i].transpose());
accum = accum.transpose() * TR[i] * LV[*name_i].asDiagonal();
norms.push_back(accum.lpNorm<2>());
if (norms.back() == 0) {
MSG_ERROR("Have a zero vector, aborting.", "");
return NAN;
}
accum.array() /= norms.back();
norm_accum += log(norms.back());
// norms.push_back(accum.lpNorm<2>());
// if (norms.back() == 0) {
// MSG_ERROR("Have a zero vector, aborting.", "");
// return NAN;
// }
// accum.array() /= norms.back();
// norm_accum += log(norms.back());
}
ret += norm_accum + log(accum.lpNorm<1>());
MSG_DEBUG("final accum = " << accum.transpose());
// ret += norm_accum + log(accum.lpNorm<1>());
ret += log(accum.lpNorm<1>());
}
return ret;
#endif
}
template <typename STREAM_TYPE>
......
......@@ -384,7 +384,6 @@ job_registry = {
if (std::find(settings->output_generations.begin(), settings->output_generations.end(),
gen->name) != settings->output_generations.end()) {
output_prob[node] = state_prob[node];
}
if ((settings->output_mode & bn_settings_t::OutputGametes) && !settings->pedigree.items[ind - 1].is_ancestor()) {
auto& mat = marginals[-variable];
MSG_DEBUG("marginals for gamete " << mat);
......@@ -402,6 +401,7 @@ job_registry = {
MSG_DEBUG("GAMETE #" << ind << " prob " << gamete_prob[ind - 1].transpose());
}
}
}
MSG_DEBUG("LOCUS VECTORS");
......
......@@ -18,10 +18,14 @@ int main(int argc, const char** argv)
}
MSG_DEBUG("Have gamete_LV");
MSG_DEBUG(gamete_LV.data);
MSG_DEBUG("Marker order: " << settings->group.raw.marker_name);
MSG_DEBUG("Distances: " << settings->group.raw.marker_locus);
const auto& locvec = settings->group.raw.marker_locus;
const auto& nvec = settings->group.raw.marker_name;
MSG_DEBUG("Computed likelihood (log10): " << gamete_LV.map_likelihood(nvec, locvec));
double lh = gamete_LV.map_likelihood(nvec, locvec);
MSG_DEBUG("Computed likelihood (log): " << lh);
MSG_DEBUG("Computed likelihood (log10): " << (lh / log(10.)));
return 0;
}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment