Commit 5fc743f3 authored by Damien Leroux's avatar Damien Leroux
Browse files

Creating clean graphs now. Or so it seems.

parent eab275fa
......@@ -25,9 +25,9 @@ VERSION_DEFINES=$(shell git tag | tail -1 | sed 's/\([0-9]*\)[.]\([0-9]*\)\([.][
CXXARGS=-Wextra -Wall -pthread -fPIC -DEIGEN_NO_DEPRECATED_WARNING $(VERSION_DEFINES)
#CXXARGS=-Wextra -Wall -pthread -fPIC -DEIGEN_NO_DEPRECATED_WARNING $(VERSION_DEFINES)
#DEBUG_OPTS=-ggdb -O0
DEBUG_OPTS=-ggdb -O0
#OPT_OPTS=-O3 -DNDEBUG
OPT_OPTS=-O3 -DEIGEN_NO_DEBUG -DNDEBUG
#OPT_OPTS=-O3 -DEIGEN_NO_DEBUG -DNDEBUG
#DEBUG_OPTS=-ggdb -O0 -DNDEBUG
#DEBUG_OPTS=-ggdb -O2 -DNDEBUG
#OPT_OPTS=-O2 -DEIGEN_NO_DEBUG -DNDEBUG
......
......@@ -41,7 +41,8 @@ size_t size(const std::vector<size_t>& dim) {
#include "pedigree.h"
/*#include "bayes/bn.h"*/
#include "bayes/factor_var4.h"
/*#include "bayes/factor_var4.h"*/
#include "graphnode.h"
#include "bayes/cli.h"
#include "bayes/dispatch.h"
......
......@@ -7,7 +7,7 @@
/*#include "generation_rs.h"*/
#include "commandline.h"
/*#include "bn.h"*/
#include "factor_var4.h"
/*#include "factor_var4.h"*/
#include "file.h"
......@@ -55,6 +55,9 @@ struct bn_settings_t {
std::set<std::string> job_filenames;
std::mutex job_filenames_mutex;
std::vector<std::string> input_generations;
std::vector<std::string> output_generations;
bn_settings_t()
: scheme(JDS_None)
, command_line()
......@@ -82,6 +85,8 @@ struct bn_settings_t {
, marker_index()
, job_filenames()
, job_filenames_mutex()
, input_generations()
, output_generations()
{}
size_t count_markers() const
......
......@@ -8,7 +8,13 @@
#include <boost/dynamic_bitset.hpp>
/* La spec est dans le code. */
/* Gère un curseur incrémentable multi-dimensionnel et les différentes opérations nécessaires desuss. */
struct state_index_type {
/* TODO make data an array in case we need more than 64 bits of indexing space. */
typedef uint64_t block_type;
block_type data;
......@@ -38,6 +44,10 @@ struct state_index_type {
inline
void reset() { data = 0; }
/* Aide à la compilation des données nécessaires à calculer un produit.
* Donne le bitshift suivant en s'assurant qu'on ne franchit pas une barrière de 64 bits.
* Si un indice devait être encodé à cheval sur deux mots, son extraction et son insertion seraient très coûteuses.
*/
inline
static
size_t
......@@ -53,6 +63,7 @@ struct state_index_type {
}
}
/* Calcule le masque local (au mot concerné) pour un indice (bitshift, bitwidth) donné. */
inline
static
block_type
......@@ -62,6 +73,7 @@ struct state_index_type {
return ((1 << bit_width) - 1) << (shift & 0x3f);
}
/* Seuls domain_descr et table_descr ont le droit de manipuler les indices au-delà des opérations de base. */
friend struct domain_descr;
friend struct table_descr;
......@@ -100,6 +112,7 @@ struct state_index_type {
}
protected:
/* Donne la valeur max shiftée d'un indice, pour comparaison avec extract_unshifted() */
static
inline
block_type
......@@ -108,6 +121,7 @@ protected:
return (domain_size - 1) << (shift & 0x3f);
}
/* extrait un indice */
inline
uint64_t
extract(size_t shift, block_type mask) const
......@@ -124,6 +138,7 @@ protected:
data |= value << (shift & 0x3f);
}
/* Incrémente un indice et retourne carry */
inline
bool
incr(size_t shift, block_type mask, block_type max)
......@@ -138,6 +153,7 @@ protected:
}
}
/* met un indice à zéro */
inline
void
zero(size_t shift, block_type mask)
......@@ -146,6 +162,7 @@ protected:
data &= ~mask;
}
/* fusionne deux curseurs étant donné un masque (utilise les dimensions de *this pour mask et d'other pour !mask) */
inline
state_index_type
merge(const state_index_type& other, const state_index_type& mask) const
......@@ -156,6 +173,7 @@ protected:
return ret;
}
/* extrait un indice mais économise le shift. à utiliser pour comparer avec max (voir incr()). */
inline
block_type
extract_unshifted(size_t shift, block_type mask) const
......@@ -164,6 +182,7 @@ protected:
return data & mask;
}
/* calcule le résultat de *this & m. Permet de passer d'un curseur global à un curseur local à une table. */
inline
state_index_type
mask(const state_index_type& m) const
......@@ -205,8 +224,8 @@ namespace std {
}
/*#define BAD_STATE_INDEX ((state_index_type) -1)*/
/* Permet d'itérer joliment sur la séquence inverse (rbegin->rend) avec la syntaxe for (v: container) {} */
template <typename CONTAINER> struct reversed_impl {
const CONTAINER& ref;
typedef typename std::decay<CONTAINER>::type::reverse_iterator iterator;
......@@ -217,7 +236,7 @@ template <typename CONTAINER> struct reversed_impl {
template <typename CONTAINER> reversed_impl<CONTAINER> reversed(CONTAINER&& x) { return {x}; }
/* Descripteur du domaine d'une variable participant au produit */
struct domain_descr {
int variable_name;
size_t size;
......@@ -282,6 +301,7 @@ struct domain_descr {
};
/* Descripteur d'une table participant au produit */
struct table_descr {
const genotype_comb_type* data;
std::vector<int> variable_names;
......@@ -315,6 +335,8 @@ struct table_descr {
}
};
/* L'algorithme à proprement parler. */
struct joint_variable_product_type {
std::vector<domain_descr> domains;
std::vector<table_descr> tables;
......@@ -392,7 +414,10 @@ struct joint_variable_product_type {
auto vi = t.variable_indices.begin();
for (const auto& k: t.data->m_combination[offset].keys) {
/**i++ = k.state;*/
*i++ = domains[*vi++].label_to_index.find(k.state)->second;
/**i++ = domains[*vi++].label_to_index.find(k.state)->second;*/
*i = domains[*vi].label_to_index.find(k.state)->second;
++i;
++vi;
}
}
......@@ -490,7 +515,7 @@ struct joint_variable_product_type {
auto& d = domains.back();
d.variable_name = v;
d.size = domain.size();
d.bit_width = 0;
d.bit_width = 1;
while ((1ULL << d.bit_width) < d.size) { ++d.bit_width; }
total_bits += d.bit_width;
/*MSG_DEBUG("Have domain for variable #" << d.variable_name << " size " << d.size << " bit_width " << d.bit_width);*/
......@@ -542,7 +567,8 @@ struct joint_variable_product_type {
for (size_t v = 0; v < tab.variable_indices[0]; ++v) {
tab.variables_left[v] = true;
}
/*MSG_DEBUG("Table variables " << tab.variable_names << " mask " << tab.mask);*/
MSG_DEBUG("Table " << (*tab.data));
MSG_DEBUG("Table variables " << tab.variable_names << " mask " << tab.mask);
tab.offset = 0;
coordinates.resize(tab.variable_names.size());
tab.offset_to_cursor.reserve(tab.data->size());
......@@ -572,7 +598,9 @@ struct joint_variable_product_type {
void
compile_output()
{
MSG_DEBUG("all output variables " << output_variable_names);
output_variable_names = output_variable_names % all_variable_names;
MSG_DEBUG("all_variable_names " << all_variable_names << " output_variable_names " << output_variable_names);
output_variables_in_use.resize(all_variable_names.size(), false);
for (int v: output_variable_names) {
size_t index = std::find(all_variable_names.begin(), all_variable_names.end(), v) - all_variable_names.begin();
......@@ -580,20 +608,22 @@ struct joint_variable_product_type {
for (size_t t = 0; t < tables.size(); ++t) {
auto iter = std::find(tables[t].variable_names.begin(), tables[t].variable_names.end(), v);
if (iter != tables[t].variable_names.end()) {
MSG_DEBUG("variable " << v << " found in table " << t << " at index " << (iter - tables[t].variable_names.begin()));
key_extractors.emplace_back(t, iter - tables[t].variable_names.begin());
break;
/*} else {*/
/*MSG_DEBUG("variable " << v << " not found in table " << t);*/
} else {
MSG_DEBUG("variable " << v << " not found in table " << t);
}
}
}
if (0) {
if (1) {
std::stringstream ss;
ss << "key extractors";
for (const auto& t_ki: key_extractors) {
ss << ' ' << t_ki.first << ':' << t_ki.second;
}
MSG_DEBUG(ss.str());
MSG_QUEUE_FLUSH();
}
}
......@@ -728,6 +758,50 @@ struct joint_variable_product_type {
}
}
struct update_key {
size_t
key_size(const joint_variable_product_type& jvp) const { return jvp.output_variable_names.size(); }
void
operator () (const joint_variable_product_type& jvp, genotype_comb_type::key_list& key) const
{
auto ki = key.begin();
for (const auto& t_ki: jvp.key_extractors) {
*ki++ = jvp.tables[t_ki.first].current_element().keys.keys[t_ki.second];
}
}
};
struct update_key_build_factor {
size_t
key_size(const joint_variable_product_type& jvp) const { return jvp.output_variable_names.size(); } /* parents + gameteS, where gameteS will be replaced by the output variable */
void
operator () (joint_variable_product_type& jvp, genotype_comb_type::key_list& key) const
{
auto ki = key.begin();
auto ti = jvp.tables.begin();
bn_label_type G = ti->current_element().keys.keys.front().state;
/*MSG_DEBUG("Have gamete label " << G); MSG_QUEUE_FLUSH();*/
for (const auto& t_ki: jvp.key_extractors) {
*ki++ = jvp.tables[t_ki.first].current_element().keys.keys[t_ki.second];
/**ki++ = ti->current_element().keys.keys.front();*/
}
/*MSG_DEBUG("Have temp key " << key); MSG_QUEUE_FLUSH();*/
const auto& p1 = key.keys[G.first_allele];
const auto& p2 = key.keys[G.second_allele];
bool f1 = G.first == GAMETE_L;
bool f2 = G.second == GAMETE_L;
/*MSG_DEBUG("Have p1 " << p1 << " p2 " << p2 << " f1 " << f1 << " f2 " << f2); MSG_QUEUE_FLUSH();*/
*ki = {
jvp.output_variable_names.back(), {f1 ? p1.state.first : p1.state.second,
f2 ? p2.state.first : p2.state.second,
f1 ? p1.state.first_allele : p1.state.second_allele,
f2 ? p2.state.first_allele : p2.state.second_allele}
};
/*MSG_DEBUG("Have complete key " << key); MSG_QUEUE_FLUSH();*/
}
};
#if 0
inline
void
update_key(genotype_comb_type::key_list& key)
......@@ -737,6 +811,7 @@ struct joint_variable_product_type {
*ki++ = tables[t_ki.first].current_element().keys.keys[t_ki.second];
}
}
#endif
inline
double
......@@ -794,16 +869,42 @@ struct joint_variable_product_type {
return from;
}
inline
genotype_comb_type
compute()
extract_domain()
{
genotype_comb_type ret;
genotype_comb_type::key_list key;
update_key uk;
key.keys.resize(output_variable_names.size());
std::unordered_map<genotype_comb_type::key_list, double> values;
size_t& offset = tables.front().offset;
size_t max = tables.front().data->size();
for (offset = 0; offset < max; ++offset) {
uk(*this, key);
values[key] = 1;
}
for (const auto& kv: values) {
/*ret.m_combination.emplace_back(kv.first, kv.second * norm);*/
ret.m_combination.emplace_back(kv.first, kv.second);
}
std::sort(ret.m_combination.begin(), ret.m_combination.end());
/*MSG_DEBUG("have domain " << ret);*/
return ret;
}
template <typename key_computing_variant>
genotype_comb_type
compute_generic()
{
key_computing_variant key_update;
if (invalid_product) { return {}; }
genotype_comb_type ret;
size_t counter = 0;
genotype_comb_type::key_list key;
key.keys.resize(output_variable_names.size());
key.keys.resize(key_update.key_size(*this));
/*MSG_DEBUG(key.size()); MSG_QUEUE_FLUSH();*/
state_index_type z;
z.reset();
state_index_type cursor = next_state(z), last = cursor;
......@@ -822,10 +923,12 @@ struct joint_variable_product_type {
/*MSG_DEBUG("COMPUTE @" << dump(cursor));*/
update_key(key);
key_update(*this, key);
double coef = compute_coef();
norm += coef;
values[key] += coef;
if (coef != 0) {
norm += coef;
values[key] += coef;
}
++counter;
......@@ -835,7 +938,6 @@ struct joint_variable_product_type {
break;
}
/*MSG_DEBUG("cursor after increment = " << dump(cursor));*/
/*cursor = find_valid_state(cursor, last);*/
cursor = next_state(cursor);
/*MSG_DEBUG("next valid cursor = " << dump(cursor));*/
if (cursor.is_bad()) {
......@@ -848,17 +950,124 @@ struct joint_variable_product_type {
}
}
ret.m_combination.reserve(values.size());
/*if (norm > 0) {*/
/*norm = 1. / norm;*/
/*}*/
if (norm > 0) {
norm = 1. / norm;
}
for (const auto& kv: values) {
/*ret.m_combination.emplace_back(kv.first, kv.second * norm);*/
ret.m_combination.emplace_back(kv.first, kv.second);
ret.m_combination.emplace_back(kv.first, kv.second * norm);
/*ret.m_combination.emplace_back(kv.first, kv.second);*/
}
std::sort(ret.m_combination.begin(), ret.m_combination.end());
/*MSG_DEBUG("Computed " << counter << " coefficients, projected onto " << ret.size() << " elements");*/
return ret;
}
inline genotype_comb_type compute() { return compute_generic<update_key>(); }
inline genotype_comb_type build_factor() { return compute_generic<update_key_build_factor>(); }
};
template <typename Ref> struct __table_getter;
template <typename K> struct __table_getter<std::pair<const K, genotype_comb_type>&> {
const genotype_comb_type& operator () (std::pair<const K, genotype_comb_type>& ref) const { return ref.second; }
};
template <> struct __table_getter<const genotype_comb_type* const&> {
const genotype_comb_type& operator () (const genotype_comb_type* const& ref) const { return *ref; }
};
template <> struct __table_getter<genotype_comb_type&> {
const genotype_comb_type& operator () (genotype_comb_type& ref) const { return ref; }
};
template <> struct __table_getter<genotype_comb_type*&> {
const genotype_comb_type& operator () (genotype_comb_type*& ref) const { return *ref; }
};
template <typename Iterator>
genotype_comb_type
compute_product(Iterator tables_begin, Iterator tables_end, const std::vector<int>& output_variables, const std::map<std::vector<int>, genotype_comb_type>& domains)
{
/*scoped_indent _(MESSAGE("[compute product > " << output_variables << "] "));*/
joint_variable_product_type jvp;
__table_getter<typename std::iterator_traits<Iterator>::reference> getter;
for (; tables_begin != tables_end; ++tables_begin) {
jvp.add_table(getter(*tables_begin));
}
jvp.set_output(output_variables);
jvp.compile(domains);
return jvp.compute();
}
inline
genotype_comb_type
generate_factor(const std::vector<int>& parents, bool dh, int spawnling, const genotype_comb_type& parent_domain, std::map<std::vector<int>, genotype_comb_type>& domains)
{
/*scoped_indent _(MESSAGE("[generate_factor " << (dh ? "dh " : "") << spawnling << " from parents " << parents << " domain " << parent_domain << "] "));*/
std::vector<bn_label_type> label_g;
std::vector<int> sorted_parents(parents);
if (parents.size() == 2) {
if (parents[0] > parents[1]) {
label_g = {
{GAMETE_L, GAMETE_L, 1, 0},
{GAMETE_L, GAMETE_R, 1, 0},
{GAMETE_R, GAMETE_L, 1, 0},
{GAMETE_R, GAMETE_R, 1, 0}
};
std::swap(sorted_parents[0], sorted_parents[1]);
} else {
label_g = {
{GAMETE_L, GAMETE_L, 0, 1},
{GAMETE_L, GAMETE_R, 0, 1},
{GAMETE_R, GAMETE_L, 0, 1},
{GAMETE_R, GAMETE_R, 0, 1}
};
}
} else if (dh) {
label_g = {
{GAMETE_L, GAMETE_L, 0, 0},
{GAMETE_R, GAMETE_R, 0, 0},
};
} else {
label_g = {
{GAMETE_L, GAMETE_L, 0, 0},
{GAMETE_L, GAMETE_R, 0, 0},
{GAMETE_R, GAMETE_L, 0, 0},
{GAMETE_R, GAMETE_R, 0, 0}
};
}
genotype_comb_type
G = state_to_combination(-1, label_g) * (1. / label_g.size());
domains[{-1}] = G;
joint_variable_product_type jvp;
/*MSG_DEBUG("joint parent domain " << parent_domain);*/
jvp.add_table(G);
jvp.add_table(parent_domain);
jvp.set_output(sorted_parents);
jvp.compile(domains);
jvp.output_variable_names.push_back(spawnling);
return jvp.build_factor();
}
inline
void
extract_domain(const genotype_comb_type& factor, const std::vector<int>& variables, std::map<std::vector<int>, genotype_comb_type>& domains)
{
joint_variable_product_type jvp;
/*MSG_DEBUG("extract " << variables << " from " << factor);*/
/*MSG_QUEUE_FLUSH();*/
jvp.add_table(factor);
jvp.set_output(variables);
jvp.compile_output();
auto& ret = domains[variables] = jvp.extract_domain();
/*MSG_DEBUG("variables " << variables << " domain " << domains[variables]); MSG_QUEUE_FLUSH();*/
for (auto& e: ret) {
e.coef = 1;
}
}
#endif
This diff is collapsed.
......@@ -3,6 +3,7 @@
#include <list>
#include "eigen.h"
#include <unordered_map>
#define _EPSILON 1.e-10
......@@ -626,7 +627,7 @@ normalize_over(const combination_type<PARENT_TYPE, STATE_TYPE>& comb, const std:
{
typedef typename combination_type<PARENT_TYPE, STATE_TYPE>::key_list key_list;
combination_type<PARENT_TYPE, STATE_TYPE> ret;
std::map<key_list, double> norm_coef;
std::unordered_map<key_list, double> norm_coef;
for (const auto& e: comb) {
auto k = e.keys % variables;
norm_coef[k] += e.coef;
......
......@@ -99,6 +99,8 @@ struct pedigree_item {
int p1;
int p2;
pedigree_item() : gen_name(), id(0), p1(0), p2(0) {}
pedigree_item(const char* gn, int i, int a, int b)
: gen_name(gn), id(i), p1(a), p2(b)
{}
......@@ -351,6 +353,22 @@ struct rw_tree : public rw_any<rw_tree> {
{
tree_io_impl(fs, tree);
}
void operator () (ifile& fs, pedigree_item& pi)
{
(*this)(fs, pi.gen_name);
(*this)(fs, pi.id);
(*this)(fs, pi.p1);
(*this)(fs, pi.p2);
}
void operator () (ofile& fs, const pedigree_item& pi)
{
(*this)(fs, pi.gen_name);
(*this)(fs, pi.id);
(*this)(fs, pi.p1);
(*this)(fs, pi.p2);
}
};
......@@ -366,6 +384,11 @@ struct rw_tree : public rw_any<rw_tree> {
* pedigree_type: implements all facilities to compute proper geno_matrices for any pedigree, including reentrant individuals.
*/
struct pedigree_type {
/*
* original data
*/
std::vector<pedigree_item> items;
/*
* pedigree tree implementation
*/
......@@ -381,6 +404,7 @@ struct pedigree_type {
std::map<individual_index_type, char> ancestor_letters;
std::map<char, std::string> ancestor_names;
std::map<geno_matrix_index_type, std::string> generation_names;
std::map<int, int> m_id;
/*std::vector<VectorLC> LC;*/
/*
......@@ -425,7 +449,7 @@ struct pedigree_type {
* default ctor
*/
pedigree_type()
: tree(), node_generations(), ancestor_letters(), ancestor_names(), generation_names(),
: tree(), node_generations(), ancestor_letters(), ancestor_names(), generation_names(), m_id(),
cache_gamete(), cache_geno(),
max_states(NONE),
n_alleles(1),
......@@ -889,6 +913,12 @@ struct pedigree_type {
return generations[get_gen_index(ind)];
}
int
ind2id(int i) const
{
return items[i - 1].id;
}
const geno_matrix&
get_geno_matrix_by_individual(size_t ind) const { return *generations[node_generations[tree.ind2node(ind)]]; }
const std::set<geno_matrix_index_type>&
......@@ -1290,6 +1320,8 @@ struct pedigree_type {
rw_tree tree_rw;
rw_base rw;
if (rw.fourcc(fs, ".CSV")) { return; }
tree_rw(fs, items);
if (rw.fourcc(fs, "PDAT")) { return; }
tree_rw(fs, tree);
if (rw.fourcc(fs, "ALET")) { return; }
......@@ -1310,6 +1342,8 @@ struct pedigree_type {
comb_rw(fs, LC);
comb_rw(fs, state_labels);
comb_rw(fs, stat_dists);
if (rw.fourcc(fs, "ID__")) { return; }
comb_rw(fs, m_id);
if (!skip_gen) {
if (rw.fourcc(fs, "PGEN")) {
return;
......@@ -1413,22 +1447,21 @@ inline
pedigree_type
read_pedigree(const std::string& filename, char sep=';', size_t max_states=std::numeric_limits<size_t>::max())
{
auto items = read_csv(filename, sep);
pedigree_type ret;
ret.items = read_csv(filename, sep);
ret.max_states = max_states;
std::map<int, int> id;
size_t cur = 0;