Commit 98be4b2f authored by Damien Leroux's avatar Damien Leroux
Browse files

New features, bugfixes, itś a big mess, but it works.

parent 19488f00
......@@ -137,7 +137,7 @@ endif()
if (${BUILD_FOR_DEPLOYMENT})
add_executable(spell-pedigree ${SPELL_PEDIGREE_SRC} ${libstdcpp})
add_executable(spell-map ${SPELL_MAP_SRC} ${libstdcpp})
# add_executable(spell-map ${SPELL_MAP_SRC} ${libstdcpp})
add_executable(spell-marker ${SPELL_MARKER_SRC} ${libstdcpp})
add_executable(spell-qtl ${SPELL_QTL_SRC} ${libstdcpp})
add_custom_command(
......@@ -174,7 +174,7 @@ else()
add_executable(spell-marker ${SPELL_MARKER_SRC})
add_executable(spell-qtl ${SPELL_QTL_SRC})
# TODO add spell-map to BUILD_FOR_DEPLOYMENT
add_executable(spell-map ${SPELL_MAP_SRC})
# add_executable(spell-map ${SPELL_MAP_SRC})
SET_SOURCE_FILES_PROPERTIES(${SPELL_PEDIGREE_SRC} ${SPELL_MARKER_SRC} ${SPELL_QTL_SRC}
PROPERTIES
COMPILE_FLAGS "-Wno-int-in-bool-context -U_FORTIFY_SOURCE -D_FORTIFY_SOURCE=0 ${SANITIZER_OPT}")
......
......@@ -3,12 +3,12 @@ INSTALL_DIR=/usr/local
ROOT_DIR:=$(shell dirname $(realpath $(lastword $(MAKEFILE_LIST))))
GCC_VERSION=-4.9
GCC_VERSION=
COMP=g++$(GCC_VERSION)
#COMP=clang++-3.6 -Wno-undefined-bool-conversion -Wno-tautological-undefined-compare
CXX=$(COMP) --std=c++11 -Wno-unused-local-typedefs -fPIC -pipe
CXX=$(COMP) --std=c++14 -Wno-unused-local-typedefs -fPIC -pipe -Wno-int-in-bool-context
#CXX=clang++ -std=c++11 -stdlib=libc++
INC=-I${ROOT_DIR}/include -I${ROOT_DIR}/include/input -I/home/daleroux/include/eigen3 -I/home/daleroux/include -I${ROOT_DIR}/include/bayes
......
......@@ -946,7 +946,7 @@ struct bayesian_network {
if (pending_messages.size()) {
if (m_exact) {
if (verbose >= 2) {
MSG_DEBUG("GOT " << pending_messages.size() << " PENDING MESSAGE(S)!");
MSG_DEBUG("GOT " << pending_messages.size() << " PENDING SPELL_STRING(S)!");
}
m_exact = false;
m_inexact_messages = pending_messages;
......
......@@ -938,7 +938,7 @@ struct factor_graph {
size_t
make_joint_factor_given(const pedigree_tree_type& T, const std::vector<size_t>& ind_vec, const std::vector<size_t>& given)
{
/*scoped_indent _(MESSAGE("[make_joint_factor_given {" << ind_vec << "} {" << given << "}] "));*/
/*scoped_indent _(SPELL_STRING("[make_joint_factor_given {" << ind_vec << "} {" << given << "}] "));*/
std::vector<std::vector<size_t>> given_per_ind;
std::vector<size_t> joint_parents;
......@@ -1026,7 +1026,7 @@ struct factor_graph {
void
add_ind(const pedigree_type& ped, size_t ind_node, size_t n_alleles)
{
/*scoped_indent _(MESSAGE("[add_ind(" << ind_node << ")] "));*/
/*scoped_indent _(SPELL_STRING("[add_ind(" << ind_node << ")] "));*/
if (ped.tree[ind_node].is_ancestor()) {
MSG_DEBUG("add_ind(" << ind_node << ") is ancestor");
/*MSG_DEBUG("... is ancestor");*/
......
......@@ -1336,7 +1336,7 @@ struct factor_graph {
size_t
make_joint_factor_given(const pedigree_tree_type& T, const std::vector<size_t>& ind_vec, const std::vector<size_t>& given)
{
/*scoped_indent _(MESSAGE("[make_joint_factor_given {" << ind_vec << "} {" << given << "}] "));*/
/*scoped_indent _(SPELL_STRING("[make_joint_factor_given {" << ind_vec << "} {" << given << "}] "));*/
std::vector<std::vector<size_t>> given_per_ind;
std::vector<size_t> joint_parents;
......@@ -1446,7 +1446,7 @@ struct factor_graph {
std::vector<size_t>
ensure_factor(const pedigree_tree_type& T, size_t p_node, const std::vector<size_t>& reent)
{
scoped_indent _(MESSAGE("[ensure_factor(" << p_node << ", " << reent << ")] "));
scoped_indent _(SPELL_STRING("[ensure_factor(" << p_node << ", " << reent << ")] "));
size_t p1 = (size_t) T.get_p1(T.get_p1(p_node));
size_t p2 = (size_t) T.get_p1(T.get_p2(p_node));
if (T[p_node].is_ancestor()) {
......@@ -1510,7 +1510,7 @@ struct factor_graph {
void
add_ind(const pedigree_type& ped, size_t ind_node, size_t n_alleles)
{
/*scoped_indent _(MESSAGE("[add_ind(" << ind_node << ")] "));*/
/*scoped_indent _(SPELL_STRING("[add_ind(" << ind_node << ")] "));*/
if (ped.tree[ind_node].is_ancestor()) {
/*MSG_DEBUG("add_ind(" << ind_node << ") is ancestor");*/
/*MSG_DEBUG("... is ancestor");*/
......
......@@ -540,6 +540,17 @@ struct joint_variable_product_type {
return true;
}
inline
void
add_table_impl(const genotype_comb_type& t, std::vector<int>& vars)
{
tables.emplace_back();
tables.back().data = &t;
tables.back().variable_names.swap(vars);
tables.back().exponent = 1.;
MSG_DEBUG("[joint_product] Added table " << (&t) << " " << t);
}
inline
void
add_table(const genotype_comb_type& t)
......@@ -561,11 +572,7 @@ struct joint_variable_product_type {
}
}
all_variable_names = all_variable_names + vars;
tables.emplace_back();
tables.back().data = &t;
tables.back().variable_names.swap(vars);
tables.back().exponent = 1.;
/*MSG_DEBUG("[joint_product] Added table " << (&t) << " " << t);*/
add_table_impl(t, vars);
}
inline
......@@ -574,15 +581,21 @@ struct joint_variable_product_type {
{
std::vector<table_descr> tmp_tables;
tmp_tables.swap(tables);
owned_tables.reserve(tmp_tables.size());
for (auto& t: tmp_tables) {
if (t.exponent != 1.) {
owned_tables.emplace_back(*t.data);
MSG_DEBUG("Have table with multiplicity " << t.exponent);
MSG_DEBUG((*t.data));
owned_tables.emplace_back();
owned_tables.back() = *t.data;
MSG_DEBUG(owned_tables.back());
for (auto& e: owned_tables.back()) {
e.coef = pow(e.coef, t.exponent);
}
add_table(owned_tables.back());
MSG_DEBUG(owned_tables.back());
add_table_impl(owned_tables.back(), t.variable_names);
} else {
add_table(*t.data);
add_table_impl(*t.data, t.variable_names);
}
}
}
......@@ -813,7 +826,7 @@ struct joint_variable_product_type {
merge_cursors(const table_descr& t, state_index_type global, state_index_type local)
{
/* merge cursors, and increment if smaller than global cursor. otherwise, zero to the right of the leftmost modified digit. */
/*scoped_indent _(MESSAGE("[merge_cursors " << dump(global) << " / " << dump(local, t) << "] "));*/
/*scoped_indent _(SPELL_STRING("[merge_cursors " << dump(global) << " / " << dump(local, t) << "] "));*/
state_index_type merged = t.merge(local, global);
/*MSG_DEBUG("merged " << dump(merged, t));*/
/*MSG_DEBUG("versus " << dump(global));*/
......@@ -1248,7 +1261,7 @@ template <typename Iterator>
genotype_comb_type
compute_product(Iterator tables_begin, Iterator tables_end, const std::vector<int>& output_variables, const std::map<std::vector<int>, genotype_comb_type>& domains)
{
/*scoped_indent _(MESSAGE("[compute product > " << output_variables << "] "));*/
/*scoped_indent _(SPELL_STRING("[compute product > " << output_variables << "] "));*/
joint_variable_product_type jvp;
__table_getter<typename std::iterator_traits<Iterator>::reference> getter;
for (; tables_begin != tables_end; ++tables_begin) {
......
......@@ -1224,7 +1224,7 @@ struct graph_type {
#define starting_point (uninitialized >> 1)
#define node_is_parent(_n) (from[_n] & starting_point)
#define node_is_initialized(_n) (!(from[_n] & uninitialized))
/*scoped_indent _(MESSAGE("[breadth-first " << between << "] "));*/
/*scoped_indent _(SPELL_STRING("[breadth-first " << between << "] "));*/
/*MSG_DEBUG("uninitialized " << uninitialized << " starting_point " << starting_point);*/
node_vec from(rank.size(), uninitialized);
std::deque<node_index_type> stack;
......@@ -1324,7 +1324,7 @@ struct graph_type {
std::list<node_index_type>
find_path(node_index_type p1, node_index_type p2) const
{
/*scoped_indent _(MESSAGE("[find_path " << p1 << ' ' << p2 << "] "));*/
/*scoped_indent _(SPELL_STRING("[find_path " << p1 << ' ' << p2 << "] "));*/
std::vector<bool> visited(rank.size(), false);
std::list<node_index_type> path;
if (recursive_path_finder(p1, p2, path, visited)) {
......@@ -1381,7 +1381,7 @@ struct graph_type {
std::list<node_index_type>
find_path1(node_index_type p1, node_index_type p2) const
{
/*scoped_indent _(MESSAGE("[find_path " << p1 << ' ' << p2 << "] "));*/
/*scoped_indent _(SPELL_STRING("[find_path " << p1 << ' ' << p2 << "] "));*/
std::vector<bool> anc_visited1(rank.size(), false), anc_visited2(rank.size(), false);
auto a1 = find_all_ancestors(p1, anc_visited2, anc_visited1);
auto a2 = find_all_ancestors(p2, anc_visited1, anc_visited2);
......@@ -1410,7 +1410,7 @@ struct graph_type {
std::list<node_index_type>
find_path2(node_index_type p1, node_index_type p2) const
{
/*scoped_indent _(MESSAGE("[find_path " << p1 << ' ' << p2 << "] "));*/
/*scoped_indent _(SPELL_STRING("[find_path " << p1 << ' ' << p2 << "] "));*/
std::vector<bool> visited(rank.size(), false);
std::list<node_index_type> path;
if (recursive_path_finder_a(p1, p2, path, visited) || recursive_path_finder_d(p1, p2, path, visited)) {
......@@ -2003,7 +2003,7 @@ struct graph_type {
get_joint_domain(const var_vec& varset)
{
MSG_QUEUE_FLUSH();
/*scoped_indent _(MESSAGE("[get_joint_domain " << varset << "] "));*/
/*scoped_indent _(SPELL_STRING("[get_joint_domain " << varset << "] "));*/
auto path = find_vpath(varset);
message_type ret;
for (const auto& cliq: path) {
......@@ -2067,7 +2067,7 @@ struct graph_type {
/*}*/
visited[this] = true;
domains[V] = dom;
/*MSG_DEBUG("Domains " << (parent ? MESSAGE(parent << "->" << index_in_parent) : std::string("top-level")) << ' ' << domains);*/
/*MSG_DEBUG("Domains " << (parent ? SPELL_STRING(parent << "->" << index_in_parent) : std::string("top-level")) << ' ' << domains);*/
if (parent && !visited[parent]) {
const_cast<graph_type*>(parent)->propagate_spawnling_domain(V, dom);
}
......
......@@ -28,7 +28,7 @@ struct graph_type : public graph_base_type {
void
add_variable(const var_vec& rule, variable_index_type new_variable)
{
/*scoped_indent _(MESSAGE("[add_variable " << rule << ' ' << new_variable << "] "));*/
/*scoped_indent _(SPELL_STRING("[add_variable " << rule << ' ' << new_variable << "] "));*/
if (rule.size() > 0) {
node_vec parents;
if (rule.size() == 2) {
......@@ -280,7 +280,7 @@ private:
std::shared_ptr<Derived>
make_subgraph(node_index_type n)
{
/*scoped_indent _(MESSAGE("[subgraph " << n << "] "));*/
/*scoped_indent _(SPELL_STRING("[subgraph " << n << "] "));*/
/*MSG_DEBUG("Creating subgraph...");*/
std::shared_ptr<Derived> ret = dynamic_cast<Derived*>(this)->create_subgraph(n);
foreach_in_if(inner_nodes(n),
......@@ -466,7 +466,7 @@ struct factor_graph_type : public recursive_graph_type<factor_graph_type> {
void
add_node_to_subgraph(std::shared_ptr<factor_graph_type> sub, node_index_type n)
{
/*scoped_indent _(MESSAGE("[add_node_to_subgraph " << n << "] "));*/
/*scoped_indent _(SPELL_STRING("[add_node_to_subgraph " << n << "] "));*/
variable_index_type v = own_variable_of(n);
sub->add_variable(rule(n), v);
sub->is_dh[v] = is_dh[v];
......@@ -903,7 +903,7 @@ struct factor_graph_type : public recursive_graph_type<factor_graph_type> {
get_joint_domain(const var_vec& varset)
{
/*MSG_QUEUE_FLUSH();*/
/*scoped_indent _(MESSAGE("[get_joint_domain " << varset << "] "));*/
/*scoped_indent _(SPELL_STRING("[get_joint_domain " << varset << "] "));*/
/*MSG_DEBUG("[get_joint_domain " << varset << "] ");*/
auto path = find_vpath(varset);
message_type ret;
......@@ -972,7 +972,7 @@ struct factor_graph_type : public recursive_graph_type<factor_graph_type> {
/*}*/
visited[this] = true;
domains[V] = dom;
/*MSG_DEBUG("Domains " << (parent() ? MESSAGE(parent() << "->" << index_in_parent()) : std::string("top-level")) << ' ' << domains);*/
/*MSG_DEBUG("Domains " << (parent() ? SPELL_STRING(parent() << "->" << index_in_parent()) : std::string("top-level")) << ' ' << domains);*/
if (parent() && !visited[parent()]) {
const_cast<factor_graph_type*>(parent())->propagate_spawnling_domain(V, dom);
}
......@@ -1004,7 +1004,7 @@ struct factor_graph_type : public recursive_graph_type<factor_graph_type> {
void
compute_node_domain(node_index_type n)
{
/*scoped_indent _(MESSAGE("[compute_node_domain #" << n << "] "));*/
/*scoped_indent _(SPELL_STRING("[compute_node_domain #" << n << "] "));*/
auto inputs = nei_in(n);
auto vv = variables_of(n);
if (node_is_interface(n)) {
......@@ -1104,7 +1104,7 @@ struct factor_graph_type : public recursive_graph_type<factor_graph_type> {
void
compute_domains_and_factors()
{
/*scoped_indent _(MESSAGE("[compute_domains_and_factors " << (parent() ? MESSAGE(parent() << "->" << index_in_parent()) : std::string("top-level")) << "] "));*/
/*scoped_indent _(SPELL_STRING("[compute_domains_and_factors " << (parent() ? SPELL_STRING(parent() << "->" << index_in_parent()) : std::string("top-level")) << "] "));*/
/*MSG_DEBUG("n_alleles = " << n_alleles);*/
if (parent()) {
domains = parent()->domains;
......@@ -1423,7 +1423,7 @@ struct instance_type {
: tables(g->node_domains.size()), evidence(g->node_domains.size()), sub_instances(g->node_domains.size()), parent(par)
{
var_vec varz; for (node_index_type n: g->nodes()) { varz = varz + g->variables_of(n); }
/*scoped_indent _(MESSAGE("[instance " << varz << "] "));*/
/*scoped_indent _(SPELL_STRING("[instance " << varz << "] "));*/
size_t mi = 0;
for (node_index_type n: g->nodes()) {
if (g->node_is_interface(n) || g->node_is_factor(n)) {
......@@ -1551,7 +1551,7 @@ struct instance_type {
compute(node_index_type n, const std::map<var_vec, genotype_comb_type>& domains) /* compute an external message (this -> external node #n) */
{
const auto& variant = variants[n];
/*scoped_indent _(MESSAGE("[compute " << n << "] "));*/
/*scoped_indent _(SPELL_STRING("[compute " << n << "] "));*/
/*MSG_DEBUG("" << variant);*/
clear_internal_evidence();
for (size_t i = 0; i < variant.outer_inputs.size(); ++i) {
......
......@@ -287,7 +287,7 @@ inline
message_type
product(const message_type& accum, const message_type& msg, const std::map<var_vec, genotype_comb_type>& domains)
{
/*scoped_indent _(MESSAGE("[product] "));*/
/*scoped_indent _(SPELL_STRING("[product] "));*/
/*MSG_DEBUG("" << accum);*/
/*MSG_DEBUG("" << msg);*/
multiple_product_type mp;
......@@ -611,7 +611,7 @@ struct graph_base_type {
node_index_type
add_interface(variable_index_type v)
{
/*scoped_indent _(MESSAGE("[+I " << v << "] "));*/
/*scoped_indent _(SPELL_STRING("[+I " << v << "] "));*/
node_index_type ret = create_node(Interface, v);
m_nodes[ret].all_variables.push_back(v);
/*MSG_DEBUG("new node " << m_nodes[ret]);*/
......@@ -631,7 +631,7 @@ struct graph_base_type {
node_index_type
add_factor(const var_vec& rule, variable_index_type v)
{
/*scoped_indent _(MESSAGE("[+F " << rule << ' ' << v << "] "));*/
/*scoped_indent _(SPELL_STRING("[+F " << rule << ' ' << v << "] "));*/
node_index_type ret = create_node(Factor, v);
auto sorted_rule = rule;
std::sort(sorted_rule.begin(), sorted_rule.end());
......@@ -680,7 +680,7 @@ struct graph_base_type {
node_index_type
aggregate(const node_vec& nodes)
{
/*scoped_indent _(MESSAGE("[+A " << nodes << "] "));*/
/*scoped_indent _(SPELL_STRING("[+A " << nodes << "] "));*/
node_index_type ret = create_node(Aggregate);
m_nodes[ret].inner_nodes.clear();
/*MSG_DEBUG("Creating aggregate " << ret);*/
......@@ -747,7 +747,7 @@ struct graph_base_type {
bool
add_edge(node_index_type from, node_index_type to, path_type& path)
{
/*scoped_indent _(MESSAGE("[add edge " << from << "->" << to << "] "));*/
/*scoped_indent _(SPELL_STRING("[add edge " << from << "->" << to << "] "));*/
bool have_cycle = colour_equal(m_nodes[from].colour, m_nodes[to].colour);
if (have_cycle) {
path = find_path(from, to);
......@@ -956,7 +956,7 @@ struct graph_base_type {
#define starting_point (uninitialized >> 1)
#define node_is_parent(_n) (from[_n] & starting_point)
#define node_is_initialized(_n) (!(from[_n] & uninitialized))
/*scoped_indent _(MESSAGE("[breadth-first " << between << "] "));*/
/*scoped_indent _(SPELL_STRING("[breadth-first " << between << "] "));*/
/*MSG_DEBUG("uninitialized " << uninitialized << " starting_point " << starting_point);*/
node_vec from(m_nodes.size(), uninitialized);
std::vector<bool> visited(m_nodes.size(), false);
......
......@@ -38,6 +38,8 @@
#include "tensor.h"
#include "linear_combination.h"
#include <boost/math/tools/minima.hpp>
/** FOURCC **/
inline
......@@ -859,6 +861,10 @@ struct EM_helpers {
// to current internal points x1, x2 and then calculating
// a new internal point until the length of the interval
// is less than or equal to the tolerance.
MSG_INFO(std::setprecision(10));
for (double x = .42; x < .5; x += .001) {
MSG_INFO(x << "\t" << f(x));
}
while (!interval_too_small(ret.a, ret.b)) {
if (fx1 < fx2) {
......@@ -938,13 +944,13 @@ struct EM_helpers {
S_to_dist(double s)
{
scoped_indent _("[S_to_dist] ");
#define EM_S_MIN .499999
#define EM_R_MAX .499999
#define EM_S_MAX .999999
MSG_DEBUG("s = " << s);
if (s > EM_S_MAX) {
s = EM_S_MAX;
} else if (s < EM_S_MIN) {
s = EM_S_MIN;
} else if (s < EM_R_MAX) {
s = EM_R_MAX;
}
MSG_DEBUG("s = " << s);
......@@ -965,15 +971,15 @@ struct EM_map {
};
struct gamete_LV_database : public EM_helpers {
struct gamete_LV_database {
/* IND.ped / MARK => 1 or 2 gametes (.second will be empty when cross type is DH) */
std::unordered_map<int, gamete_LV_type> data;
std::unordered_map<double, Eigen::Matrix2d> cache2;
std::unordered_map<double, Eigen::Matrix4d> cache4;
std::vector<Eigen::Matrix2d> tr2;
std::vector<Eigen::Matrix4d> tr4;
gamete_LV_database() : data(), cache2(), cache4() {}
// FIXME need mutexes around get_TR_* and cache* stuff
gamete_LV_database() : data(), cache2(), cache4(), tr2(), tr4() {}
Eigen::Matrix2d
get_TR_unary(double dist_or_r, bool use_r=false)
......@@ -986,14 +992,14 @@ struct gamete_LV_database : public EM_helpers {
s = .5 + exp(-2. * dist_or_r) * .5;
r = 1. - s;
}
auto it = cache2.find(r);
if (it == cache2.end()) {
// auto it = cache2.find(r);
// if (it == cache2.end()) {
Eigen::Matrix2d ret;
ret << s, r, r, s;
cache2[r] = ret;
// cache2[r] = ret;
return ret;
}
return it->second;
// }
// return it->second;
}
Eigen::Matrix4d
......@@ -1007,8 +1013,8 @@ struct gamete_LV_database : public EM_helpers {
s = .5 + exp(-2. * dist_or_r) * .5;
r = 1. - s;
}
auto it = cache4.find(r);
if (it == cache4.end()) {
// auto it = cache4.find(r);
// if (it == cache4.end()) {
double rs = r * s;
double r2 = r * r;
double s2 = s * s;
......@@ -1018,10 +1024,10 @@ struct gamete_LV_database : public EM_helpers {
rs, s2, r2, rs,
rs, r2, s2, rs,
r2, rs, rs, s2;
cache4[r] = ret;
// cache4[r] = ret;
return ret;
}
return it->second;
// }
// return it->second;
}
void get_TR(double dist, Eigen::Matrix2d& result) { result = get_TR_unary(dist); }
......@@ -1117,393 +1123,412 @@ struct gamete_LV_database : public EM_helpers {
}
return ret;
}
std::vector<double> em_distances;
std::vector<double> em_old_distances;
std::vector<Eigen::Matrix2d> em_TR2;
std::vector<Eigen::Matrix4d> em_TR4;
bool em_have_single = false;
bool em_have_double = false;
std::vector<std::vector<gamete_lv_value_type>> em_state_per_mark_per_ind;
std::vector<std::vector<gamete_lv_value_type>> em_forward, em_backward, em_obs;
double em_likelihood;
std::vector<bool> em_chain_is_single;
void
EM_init(const std::vector<std::string>& marker_names)
{
em_distances.clear();
em_distances.resize(marker_names.size() - 1, .5);
em_old_distances.clear();
em_old_distances.resize(marker_names.size() - 1, 0);
em_chain_is_single.clear();
em_chain_is_single.reserve(data.size());
em_state_per_mark_per_ind.resize(marker_names.size());
auto spmpi = em_state_per_mark_per_ind.begin();
em_forward.clear();
em_forward.resize(marker_names.size());
em_backward.clear();
em_backward.resize(marker_names.size());
auto forwardi = em_forward.begin();
auto backwardi = em_backward.begin();
// MSG_DEBUG("forward " << em_forward);
// MSG_DEBUG("backward " << em_backward);
// size_t i = 0;
for (const auto& m: marker_names) {
(spmpi++)->resize(data.size());
(forwardi++)->resize(data.size(), gamete_lv_value_type::unknown_binary());
(backwardi++)->resize(data.size(), gamete_lv_value_type::unknown_binary());
}
// MSG_DEBUG("data");
// MSG_DEBUG(data);
for (const auto& kv: data) { // all markers for one individual
em_chain_is_single.push_back(kv.second.is_single);
em_have_single |= kv.second.is_single;
em_have_double |= !kv.second.is_single;
// auto mi = marker_names.begin();
// MSG_DEBUG("marker_names " << marker_names);
// if (em_chain_is_single.back()) {
// for (auto& vec: *spmpi) {
// vec.unary = kv.second.lv.find(*mi++)->second.unary;
// }
// } else {
// for (auto& vec: *spmpi) {
// vec.binary = kv.second.lv.find(*mi++)->second.binary;
// }
// }
// ++i;
}
auto di = data.begin();
for (size_t i = 0; i < data.size(); ++i, ++di) {
size_t mi = 0;
struct EM_computer_type : public EM_helpers {
gamete_LV_database* db;
std::vector<double> em_distances;
std::vector<double> em_old_distances;
std::vector<Eigen::Matrix2d> em_TR2;
std::vector<Eigen::Matrix4d> em_TR4;
bool em_have_single = false;
bool em_have_double = false;
std::vector<std::vector<gamete_lv_value_type>> em_state_per_mark_per_ind;
std::vector<std::vector<gamete_lv_value_type>> em_forward, em_backward, em_obs;
double em_likelihood;
std::vector<bool> em_chain_is_single;
EM_computer_type(gamete_LV_database* ptr)
: db(ptr)
, em_distances(), em_old_distances(), em_TR2(), em_TR4()
, em_have_single(false), em_have_double(false)
, em_state_per_mark_per_ind(), em_forward(), em_backward(), em_obs()
, em_likelihood(0), em_chain_is_single(false)
{}
void
EM_init(const std::vector<std::string>& marker_names)
{
em_distances.clear();
em_distances.resize(marker_names.size() - 1, .5);
em_old_distances.clear();
em_old_distances.resize(marker_names.size() - 1, 0);
em_chain_is_single.clear();
em_chain_is_single.reserve(db->data.size());
em_state_per_mark_per_ind.resize(marker_names.size());
auto spmpi = em_state_per_mark_per_ind.begin();
em_forward.clear();
em_forward.resize(marker_names.size());
em_backward.clear();
em_backward.resize(marker_names.size());
auto forwardi = em_forward.begin();
auto backwardi = em_backward.begin();
// MSG_DEBUG("forward " << em_forward);
// MSG_DEBUG("backward " << em_backward);
// size_t i = 0;
for (const auto& m: marker_names) {
em_state_per_mark_per_ind[mi][i].binary = di->second.lv.find(m)->second.binary;
++mi;
(spmpi++)->resize(db->data.size());
(forwardi++)->resize(db->data.size(), gamete_lv_value_type::unknown_binary());
(backwardi++)->resize(db->data.size(), gamete_lv_value_type::unknown_binary());
}
// MSG_DEBUG("data");
// MSG_DEBUG(data);
for (const auto& kv: db->data) { // all markers for one individual
em_chain_is_single.push_back(kv.second.is_single);
em_have_single |= kv.second.is_single;
em_have_double |= !kv.second.is_single;
// auto mi = marker_names.begin();
// MSG_DEBUG("marker_names " << marker_names);
// if (em_chain_is_single.back()) {
// for (auto& vec: *spmpi) {
// vec.unary = kv.second.lv.find(*mi++)->second.unary;
// }
// } else {
// for (auto& vec: *spmpi) {
// vec.binary = kv.second.lv.find(*mi++)->second.binary;
// }
// }
// ++i;
}
auto di = db->data.begin();
for (size_t i = 0; i < db->data.size(); ++i, ++di) {
size_t mi = 0;
for (const auto& m: marker_names) {
em_state_per_mark_per_ind[mi][i].binary = di->second.lv.find(m)->second.binary;
++mi;
}
}
if (em_have_double) {
em_TR4.resize(em_distances.size());
}
if (em_have_single) {
em_TR2.resize(em_distances.size());
}
}
if (em_have_double) {
em_TR4.resize(em_distances.size());
}
if (em_have_single) {
em_TR2.resize(em_distances.size());
}
em_obs = em_state_per_mark_per_ind;
em_obs = em_state_per_mark_per_ind;
// MSG_DEBUG("data");