Commit 19488f00 authored by Damien Leroux's avatar Damien Leroux
Browse files

WIP.

parent 60f0b923
......@@ -371,6 +371,7 @@ struct table_descr {
uint64_t offset;
state_index_type mask;
bool unif;
double exponent;
inline
const genotype_comb_type::element_type&
......@@ -399,6 +400,7 @@ struct table_descr {
struct joint_variable_product_type {
std::vector<domain_descr> domains;
std::vector<table_descr> tables;
std::vector<genotype_comb_type> owned_tables;
std::vector<int> all_variable_names;
std::vector<int> output_variable_names;
std::vector<bool> output_variables_in_use;
......@@ -411,6 +413,7 @@ struct joint_variable_product_type {
size_t last_variable_index;
bool invalid_product = false;
bool have_multiple_tables = false;
const std::map<std::vector<int>, genotype_comb_type>* all_domains = nullptr;
......@@ -550,13 +553,40 @@ struct joint_variable_product_type {
for (auto k: t.m_combination[0].keys) {
vars.push_back(k.parent);
}
for (auto& td: tables) {
if (vars == td.variable_names && t == *td.data) {
td.exponent += 1.;
have_multiple_tables = true;
return;
}
}
all_variable_names = all_variable_names + vars;
tables.emplace_back();
tables.back().data = &t;
tables.back().variable_names.swap(vars);
tables.back().exponent = 1.;
/*MSG_DEBUG("[joint_product] Added table " << (&t) << " " << t);*/
}
inline
void
precompute_multiple_tables()
{
std::vector<table_descr> tmp_tables;
tmp_tables.swap(tables);
for (auto& t: tmp_tables) {
if (t.exponent != 1.) {
owned_tables.emplace_back(*t.data);
for (auto& e: owned_tables.back()) {
e.coef = pow(e.coef, t.exponent);
}
add_table(owned_tables.back());
} else {
add_table(*t.data);
}
}
}
inline
void
compile(const std::map<std::vector<int>, genotype_comb_type>& all_variable_domains)
......@@ -621,6 +651,7 @@ struct joint_variable_product_type {
compile_tables()
{
scoped_indent _("[tables] ");
if (have_multiple_tables) { precompute_multiple_tables(); }
std::vector<size_t> coordinates;
coordinates.reserve(all_variable_names.size());
for (auto& tab: tables) {
......@@ -942,6 +973,7 @@ struct joint_variable_product_type {
mpf_mul(accum, accum, coef);
}
}
mpf_clear(coef);
}
inline
......@@ -1081,11 +1113,11 @@ struct joint_variable_product_type {
cursor = next_state(z),
last = cursor;
std::unordered_map<genotype_comb_type::key_list, mpf_t> values;
mpf_t norm;
mpf_init(norm);
if (cursor.is_bad()) {
return ret;
}
mpf_t norm;
mpf_init(norm);
mpf_t accum;
mpf_init(accum);
for (;;) {
......@@ -1135,11 +1167,14 @@ struct joint_variable_product_type {
mpf_ui_div(norm, 1, norm);
// norm = 1. / norm;
}
for (const auto& kv: values) {
for (auto& kv: values) {
mpf_mul(accum, kv.second, norm);
ret.m_combination.emplace_back(kv.first, mpf_get_d(accum));
/*ret.m_combination.emplace_back(kv.first, kv.second);*/
mpf_clear(kv.second);
}
mpf_clear(accum);
mpf_clear(norm);
std::sort(ret.m_combination.begin(), ret.m_combination.end());
MSG_DEBUG("Computed " << counter << " coefficients, projected onto " << ret.size() << " elements");
MSG_DEBUG(ret);
......
......@@ -381,7 +381,7 @@ label_type read_label(ifile& ifs)
{
char f = 0, s = 0;
ifs >> f >> s;
MSG_DEBUG("[read_label] " << int(f) << ':' << int(s));
// MSG_DEBUG("[read_label] " << int(f) << ':' << int(s));
return {f, s};
}
......@@ -746,7 +746,7 @@ typedef union _gamete_lv_value_type {
struct gamete_LV_type {
bool is_single;
std::map<std::string, gamete_lv_value_type> lv;
std::unordered_map<std::string, gamete_lv_value_type> lv;
friend std::ostream& operator << (std::ostream& os, const gamete_LV_type& glv) { return os << (glv.is_single ? "Unary{" : "Binary{") << glv.lv << '}'; }
......@@ -909,10 +909,10 @@ struct EM_helpers {
const auto& kernel = S_kernel(a);
double ret = a.transpose() * kernel * b;
scoped_indent _("[compute_S] ");
MSG_DEBUG("a = " << a.transpose());
MSG_DEBUG("b = " << b.transpose());
MSG_DEBUG("kernel" << std::endl << kernel);
MSG_DEBUG("ret = " << ret);
// MSG_DEBUG("a = " << a.transpose());
// MSG_DEBUG("b = " << b.transpose());
// MSG_DEBUG("kernel" << std::endl << kernel);
// MSG_DEBUG("ret = " << ret);
return ret;
}
......@@ -967,9 +967,9 @@ struct EM_map {
struct gamete_LV_database : public EM_helpers {
/* IND.ped / MARK => 1 or 2 gametes (.second will be empty when cross type is DH) */
std::map<int, gamete_LV_type> data;
std::map<double, Eigen::Matrix2d> cache2;
std::map<double, Eigen::Matrix4d> cache4;
std::unordered_map<int, gamete_LV_type> data;
std::unordered_map<double, Eigen::Matrix2d> cache2;
std::unordered_map<double, Eigen::Matrix4d> cache4;
std::vector<Eigen::Matrix2d> tr2;
std::vector<Eigen::Matrix4d> tr4;
......@@ -1085,10 +1085,10 @@ struct gamete_LV_database : public EM_helpers {
accum = tmp/* * .25*/;
for (const auto& t: TR) {
gam.get(*name_i++, tmp);
MSG_DEBUG("accum = " << accum.transpose() << " LV = " << tmp.transpose());
// MSG_DEBUG("accum = " << accum.transpose() << " LV = " << tmp.transpose());
accum = accum.transpose() * t * tmp.asDiagonal();
}
MSG_DEBUG("final accum = " << accum.transpose());
// MSG_DEBUG("final accum = " << accum.transpose());
return accum.sum();
}
......@@ -1147,16 +1147,16 @@ struct gamete_LV_database : public EM_helpers {
em_backward.resize(marker_names.size());
auto forwardi = em_forward.begin();
auto backwardi = em_backward.begin();
MSG_DEBUG("forward " << em_forward);
MSG_DEBUG("backward " << em_backward);
// MSG_DEBUG("forward " << em_forward);
// MSG_DEBUG("backward " << em_backward);
// size_t i = 0;
for (const auto& m: marker_names) {
(spmpi++)->resize(data.size());
(forwardi++)->resize(data.size(), gamete_lv_value_type::unknown_binary());
(backwardi++)->resize(data.size(), gamete_lv_value_type::unknown_binary());
}
MSG_DEBUG("data");
MSG_DEBUG(data);
// MSG_DEBUG("data");
// MSG_DEBUG(data);
for (const auto& kv: data) { // all markers for one individual
em_chain_is_single.push_back(kv.second.is_single);
em_have_single |= kv.second.is_single;
......@@ -1193,13 +1193,13 @@ struct gamete_LV_database : public EM_helpers {
em_obs = em_state_per_mark_per_ind;
MSG_DEBUG("data");
MSG_DEBUG(data);
MSG_DEBUG("em_state_per_mark_per_ind");
MSG_DEBUG(em_state_per_mark_per_ind);
MSG_DEBUG("Have single? " << std::boolalpha << em_have_single);
MSG_DEBUG("Have double? " << std::boolalpha << em_have_double);
// MSG_DEBUG("data");
// MSG_DEBUG(data);
//
// MSG_DEBUG("em_state_per_mark_per_ind");
// MSG_DEBUG(em_state_per_mark_per_ind);
// MSG_DEBUG("Have single? " << std::boolalpha << em_have_single);
// MSG_DEBUG("Have double? " << std::boolalpha << em_have_double);
/*EM_update_distances();*/
}
......@@ -1308,14 +1308,14 @@ struct gamete_LV_database : public EM_helpers {
}
}
MSG_DEBUG("em_forward");
MSG_DEBUG(em_forward);
MSG_DEBUG("em_backward");
MSG_DEBUG(em_backward);
MSG_DEBUG("em_obs");
MSG_DEBUG(em_obs);
MSG_DEBUG("em_state_per_mark_per_ind");
MSG_DEBUG(em_state_per_mark_per_ind);
// MSG_DEBUG("em_forward");
// MSG_DEBUG(em_forward);
// MSG_DEBUG("em_backward");
// MSG_DEBUG(em_backward);
// MSG_DEBUG("em_obs");
// MSG_DEBUG(em_obs);
// MSG_DEBUG("em_state_per_mark_per_ind");
// MSG_DEBUG(em_state_per_mark_per_ind);
}
double
......@@ -1414,9 +1414,9 @@ struct gamete_LV_database : public EM_helpers {
double
compute_2pt_dist(size_t i)
{
for (double r = 0; r < .5; r += .05) {
MSG_DEBUG("i=" << i << " r=" << r << " log(L)=" << twoMarkerLikelihood(i, r ? r : 0.0001));
}
// for (double r = 0; r < .5; r += .05) {
// MSG_DEBUG("i=" << i << " r=" << r << " log(L)=" << twoMarkerLikelihood(i, r ? r : 0.0001));
// }
auto opt = find_max([&] (double r) { return twoMarkerLikelihood(i, r); }, .5 - EM_S_MIN, EM_S_MIN);
return (opt.a + opt.b) * .5;
/*double s = 1. - (opt.a + opt.b) * .5;*/
......@@ -1448,11 +1448,11 @@ struct gamete_LV_database : public EM_helpers {
EM_update_distances()
{
em_old_distances = em_distances;
MSG_DEBUG("[update dist] old distances " << em_old_distances);
// MSG_DEBUG("[update dist] old distances " << em_old_distances);
for (size_t i = 0; i < em_distances.size(); ++i) {
em_distances[i] = compute_2pt_dist(i);
}
MSG_DEBUG("[update dist] new distances " << em_distances);
// MSG_DEBUG("[update dist] new distances " << em_distances);
}
double
......@@ -1463,7 +1463,7 @@ struct gamete_LV_database : public EM_helpers {
for (; i != j; ++i, ++k) {
accum += fabs(*i - *k);
}
MSG_DEBUG("distances old " << em_old_distances << " new " << em_distances << " => " << accum);
// MSG_DEBUG("distances old " << em_old_distances << " new " << em_distances << " => " << accum);
return accum;
}
......@@ -1478,7 +1478,7 @@ struct gamete_LV_database : public EM_helpers {
EM_init(marker_names);
EM_update_gamete_prob(); // E
do {
MSG_DEBUG("*** ITERATION ***");
// MSG_DEBUG("*** ITERATION ***");
EM_update_gamete_prob(); // E
EM_update_distances(); // M
delta = EM_delta();
......
......@@ -575,8 +575,11 @@ void message_queue::run()
#define MSG_WARNING(_msg_expr_) do { std::cout << _msg_expr_ << std::endl; } while (0)
#define MSG_INFO(_msg_expr_) do { std::cout << _msg_expr_ << std::endl; } while (0)
#define MSG_QUEUE_FLUSH()
// #define MSG_DEBUG(_msg_expr_) do { std::clog << _msg_expr_ << std::endl; } while (0)
/*
#define MSG_DEBUG(_msg_expr_) do { std::clog << _msg_expr_ << std::endl; } while (0)
/*/
#define MSG_DEBUG(_)
//*/
#define MSG_DEBUG_INDENT
#define MSG_DEBUG_INDENT_EXPR(_str_)
#define MSG_DEBUG_DEDENT
......
......@@ -41,7 +41,7 @@ settings_t* read_settings(ifile& is);
/*format_specification_t* read_format(std::istream& is);*/
design_type* read_design(std::istream& is);
pedigree_type read_pedigree(const design_type* design, const std::string& filename, std::istream& is);
pedigree_type read_pedigree(const design_type* design, const std::string& filename, std::istream& is, bool with_LC);
void read_ld(settings_t* settings, const std::string& qtl_gen, const std::string& filename, std::istream& is);
#endif
......
......@@ -168,11 +168,14 @@ struct marker_obs_formats {
ret.scores.emplace_back();
auto& score = ret.scores.back();
score.first = it.key()[0];
// MSG_INFO("score " << score.first);
for (const auto& sc: it.value()) {
std::string s = sc;
score.second.emplace_back(s[0], s[1]);
}
MSG_INFO("score " << score.first << " // " << score.second);
}
// MSG_INFO(ret);
return ret;
}
......
......@@ -422,6 +422,7 @@ struct pedigree_type {
std::map<char, std::string> ancestor_names;
std::map<geno_matrix_index_type, std::string> generation_names;
std::map<int, int> m_id;
bool with_LC;
/*std::vector<VectorLC> LC;*/
/*
......@@ -471,6 +472,7 @@ struct pedigree_type {
max_states(NONE),
n_alleles(1),
filename("<none>"),
with_LC(true),
LC(),
factor_messages()
{
......@@ -787,6 +789,9 @@ struct pedigree_type {
void
compute_LC(int n)
{
if (!with_LC) {
return;
}
int p1 = tree.get_p1(n);
std::vector<gencomb_type> lc;
const geno_matrix& m = *generations[node_generations[n]];
......@@ -1358,6 +1363,7 @@ struct pedigree_type {
if (rw.fourcc(fs, "Fnam")) { return; }
rw(fs, filename);
if (rw.fourcc(fs, "Comb")) { return; }
rw(fs, with_LC);
comb_rw(fs, LC);
comb_rw(fs, state_labels);
comb_rw(fs, stat_dists);
......@@ -1464,9 +1470,10 @@ read_csv(const std::string& pedigree_file, char field_sep)
inline
pedigree_type
read_pedigree(const std::string& filename, char sep=';', size_t max_states=std::numeric_limits<size_t>::max())
read_pedigree(const std::string& filename, bool with_LC, char sep=';', size_t max_states=std::numeric_limits<size_t>::max())
{
pedigree_type ret;
ret.with_LC = with_LC;
ret.items = read_csv(filename, sep);
ret.max_states = max_states;
size_t cur = 0;
......
......@@ -29,6 +29,7 @@ struct ped_settings_t {
char csv_sep;
std::string work_directory;
size_t max_states;
bool with_LC;
std::string pop_name;
......@@ -39,6 +40,7 @@ struct ped_settings_t {
, csv_sep(';')
, work_directory()
, max_states(std::numeric_limits<decltype(max_states)>::max())
, with_LC(true)
, pop_name()
{}
......
......@@ -432,25 +432,62 @@ job_registry = {
size_t n_mark = settings->count_markers();
bool have_gametes = settings->output_mode & bn_settings_t::OutputGametes;
gamete_LV_database gamete_LV;
// for (size_t m = 0; m < n_mark; ++m) {
// ifile ifs(settings->job_filename("compute-LV", m));
// rw_base() (ifs, mark_state_prob[settings->marker_names[m]]);
// if (have_gametes) {
// ifile ig(settings->job_filename("gametes-LV", m));
// std::map<int, MatrixXd> gp;
// rw_base() (ig, gp);
// for (const auto& kv: gp) {
// if (kv.second.size() == 0) {
// MSG_DEBUG("HAVE EMPTY GAMETE LV " << kv.first);
// } else {
// if (kv.second.sum() == 0) {
// MSG_DEBUG("HAVE ZERO GAMETE LV " << kv.first);
// } else {
// gamete_LV.add_gametes(settings->marker_names[m], kv.first, kv.second, ped.items[kv.first - 1].is_dh());
// }
// }
// }
// }
// }
pop_data_type pop_data;
pop_data.ancestor_names = settings->pedigree.ancestor_names;
MSG_DEBUG("with_LC? " << std::boolalpha << settings->pedigree.with_LC);
for (size_t m = 0; m < n_mark; ++m) {
std::map<size_t, std::vector<double>> mark_states;
ifile ifs(settings->job_filename("compute-LV", m));
rw_base() (ifs, mark_state_prob[settings->marker_names[m]]);
msg_handler_t::cout() << "\rReading marker data #" << m << '/' << n_mark << "...";
rw_base() (ifs, mark_states);
const std::string& mark = settings->marker_names[m];
if (settings->pedigree.with_LC) {
msg_handler_t::cout() << " LC...";
for (auto& ind_sp: mark_states) {
const std::string& gen_name = ped.generations[ped.node_generations[ind_sp.first]]->name;
Eigen::Map<Eigen::VectorXd> sp(ind_sp.second.data(), ind_sp.second.size());
pop_data.LV.data_by_marker[gen_name][mark].emplace_back(sp);
}
}
if (have_gametes) {
msg_handler_t::cout() << " gametes...";
ifile ig(settings->job_filename("gametes-LV", m));
std::map<int, MatrixXd> gp;
rw_base() (ig, gp);
for (const auto& kv: gp) {
if (kv.second.size() == 0) {
MSG_DEBUG("HAVE EMPTY GAMETE LV " << kv.first);
} else {
if (kv.second.sum() == 0) {
} else if (kv.second.sum() == 0) {
MSG_DEBUG("HAVE ZERO GAMETE LV " << kv.first);
} else {
gamete_LV.add_gametes(settings->marker_names[m], kv.first, kv.second, ped.items[kv.first - 1].is_dh());
}
}
}
}
msg_handler_t::cout() << "\x1b[K";
}
if (settings->output_mode & bn_settings_t::OutputGametes) {
......@@ -461,21 +498,6 @@ job_registry = {
// abort();
}
pop_data_type pop_data;
pop_data.ancestor_names = settings->pedigree.ancestor_names;
for (size_t m = 0; m < n_mark; ++m) {
std::map<size_t, std::vector<double>> mark_states;
ifile ifs(settings->job_filename("compute-LV", m));
rw_base() (ifs, mark_states);
const std::string& mark = settings->marker_names[m];
for (auto& ind_sp: mark_states) {
const std::string& gen_name = ped.generations[ped.node_generations[ind_sp.first]]->name;
Eigen::Map<Eigen::VectorXd> sp(ind_sp.second.data(), ind_sp.second.size());
pop_data.LV.data_by_marker[gen_name][mark].emplace_back(sp);
}
}
pop_data.name = settings->pop_name;
for (const auto& om: settings->observed_mark) {
......
......@@ -57,6 +57,17 @@ arguments = {
SAFE_IGNORE_CALLBACK_ARGS;
}},
{{"--no-LC"},
{},
"Don't compute LC for each individual",
false,
{false},
[](CALLBACK_ARGS)
{
ensure(target)->with_LC = false;
SAFE_IGNORE_CALLBACK_ARGS;
}},
}},
{"Miscellaneous", "", false, {
{{"-h", "--help"},
......
......@@ -19,7 +19,7 @@
#include "pedigree_settings.h"
#include "output_impl.h"
void print_usage();
void print_usage_pedigree();
#ifndef SPELL_PEDIGREE_MAIN
......@@ -32,9 +32,10 @@ int SPELL_PEDIGREE_MAIN(int argc, const char** argv)
ped_settings_t* settings = ped_settings_t::from_args(argc, argv);
if (!settings) {
prg_name(basename(argv[0]));
print_usage();
print_usage_pedigree();
} else {
pedigree_type ped = read_pedigree(settings->pedigree_filename, settings->csv_sep, settings->max_states);
MSG_DEBUG("with_LC? " << std::boolalpha << settings->with_LC);
pedigree_type ped = read_pedigree(settings->pedigree_filename, settings->with_LC, settings->csv_sep, settings->max_states);
if (msg_handler_t::check(false)) {
return -1;
}
......
......@@ -4,7 +4,7 @@ CXX_STD=CXX14
CXX14STD="-std=c++14"
GCC_ARGS=-I . -I ./include -I ./include/RWrap -I ./include/eigen3 -I ./include/bayes -I ./include/input -pthread $(DEFINES)
GCC_ARGS=-I . -I ./include -I ./include/RWrap -I ./include/eigen3 -I ./include/bayes -I ./include/input -pthread $(DEFINES) -DEIGEN_NO_DEBUG -O3
PKG_CXX14FLAGS=$(GCC_ARGS)
PKG_CXXFLAGS=$(GCC_ARGS)
......
......@@ -18,6 +18,10 @@
#include "RWrap/RWrap.h"
#include <numeric>
extern "C" {
#include <ftw.h>
}
// Load and initialize data
// SEM (order)
......@@ -38,15 +42,41 @@ extern int SPELL_PEDIGREE_MAIN(int, const char**);
class Spell2PtMatrix;
extern "C" {
int clear_cache(const char *fpath, const struct stat *, int typeflag, struct FTW*)
{
/* FIXME check for some magic bytes or something before deleting! */
static std::regex is_cache_file("^.*/[a-f0-9]{1,16}$");
/*std::string f(fpath);*/
if (typeflag == FTW_F) { /* ignore all but regular files */
if (remove(fpath) != 0) {
MSG_WARNING("Couldn't remove file \"" << fpath << "\": " << strerror(errno));
/*} else {*/
/*MSG_DEBUG("Deleted cache file <" << fpath << '>');*/
}
} else if (typeflag == FTW_D) {
if (remove(fpath) != 0) {
MSG_WARNING("Couldn't remove directory \"" << fpath << "\": " << strerror(errno));
}
}
return 0;
}
}
class SpellMapTools {
private:
gamete_LV_database gamete_LV;
double m_convergence_threshold = 1.e-6;
int m_max_iterations = 100;
bool m_active = false;
std::string wd, name;
std::string _ped;
std::string _base;
Rwrap::DataFrame _obs;
public:
/* observations_specs must have three columns: generation, format, filename */
SpellMapTools(std::string ped_filename, Rwrap::DataFrame observation_specs, int mt);
SpellMapTools(std::string ped_filename, Rwrap::DataFrame observation_specs, int mt, std::string basedir);
Rwrap::List
SEM(marker_vec order);
Rwrap::List
......@@ -89,6 +119,37 @@ public:
}
bool is_active() { return m_active; }
std::vector<std::string>
get_marker_names()
{
std::vector<std::string> ret;
if (gamete_LV.data.size() == 0) {
return ret;
}
const auto& lv = gamete_LV.data.begin()->second.lv;
ret.reserve(lv.size());
for (const auto& kv: lv) {
ret.emplace_back(kv.first);
}
return ret;
}
void
cleanup_disk_cache()
{
nftw(wd.c_str(), clear_cache, 10, FTW_DEPTH);
}
Rwrap::List
get_session_data() {
Rwrap::List ret;
ret.add("session.id", name);
ret.add("base.dir", _base);
ret.add("pedigree.filename", _ped);
ret.add("observation.specs", _obs);
return ret;
}
};
......@@ -133,6 +194,7 @@ public:
{
if (i < 1 || j < 1 || i > rownames.size() || j > colnames.size()) {
lod = r = NA_REAL;
return;
}
--i; --j;
if (rownames[i] == colnames[j]) {
......@@ -215,7 +277,7 @@ extern "C" {