Commit bf3d08d3 authored by Damien Leroux's avatar Damien Leroux
Browse files

Spell-marker now produces output. On to spell-qtl.

parent 15f72f0b
......@@ -50,6 +50,9 @@ struct bn_settings_t {
std::vector<std::string> marker_names;
std::map<std::string, size_t> marker_index;
/* cleanup */
std::set<std::string> job_filenames;
bn_settings_t()
: scheme(JDS_None)
, command_line()
......@@ -75,6 +78,7 @@ struct bn_settings_t {
, alleles_per_marker()
, marker_names()
, marker_index()
, job_filenames()
{}
......@@ -88,6 +92,34 @@ struct bn_settings_t {
bool is_master() const { return job_start == SPELL_CASTER; }
static bn_settings_t* from_args(int argc, const char** argv);
std::string
job_filename(const char* job_name, size_t job) const
{
std::string ret = MESSAGE(work_directory << '/' << job_name << '.' << job << ".data");
const_cast<bn_settings_t*>(this)->job_filenames.insert(ret);
return ret;
}
void
cleanup_job_files() const
{
for (const auto& path: job_filenames) {
unlink(path.c_str());
}
}
void
load_full_pedigree_data() const
{
/* NOT const. But called in a const context (job "collect-LV").
* It is just not possible in EVERY case to handle this before the job is scheduled,
* depending on which job dispatch method is used. Maybe there should be some
* refactoring later with some settings->prepare_job(job_name) just before the job
* is started.
*/
const_cast<pedigree_type&>(pedigree).load(pedigree_filename, false);
}
};
#endif
......
......@@ -8,6 +8,6 @@ typedef std::function<size_t(const bn_settings_t*)> count_jobs_type;
extern std::map<std::string, std::pair<count_jobs_type, job_type>> job_registry;
void do_the_job(bn_settings_t* settings, std::string job);
bool do_the_job(bn_settings_t* settings, std::string job);
#endif
......@@ -246,7 +246,7 @@ typedef std::map<ptrdiff_t, std::shared_ptr<bn_factor_interface_type>> interface
struct bn_message_buffer_type {
bn_message_buffer_type(size_t n)
: m_front(n, 1.), m_back(n, 1.), m_double_buffering(true)
: m_double_buffering(true), m_front(n, 1.), m_back(n, 1.)
{}
std::vector<bn_message_type>& get(bool front_or_back) { return m_double_buffering && front_or_back ? m_back : m_front; }
......@@ -370,7 +370,7 @@ struct bn_message_updater_type {
}
void
file_io_computer(std::ofstream& ofs, factor_dic_type& fdic, interface_dic_type& idic)
file_io_computer(std::ofstream& ofs, factor_dic_type& /*fdic*/, interface_dic_type& /*idic*/)
{
rw_base()(ofs, (ptrdiff_t) m_computer.get());
}
......
......@@ -10,8 +10,10 @@
#include "eigen.h"
#include "error.h"
#include "data/chromosome.h"
/*#include "generation_rs_fwd.h"*/
#include "input/read_trait.h"
#include "tensor.h"
/** FOURCC **/
......@@ -634,10 +636,50 @@ struct rw_base : public rw_any<rw_base> {
struct LV_database {
/* GEN / MARK / IND => LV_vec */
std::map<std::string, std::map<std::string, std::vector<VectorXd>>> data_by_marker;
/* CHROM / GEN / IND => LV_mat */
std::map<std::string, std::map<std::string, std::vector<MatrixXd>>> data;
LV_database() : data() {}
LV_database() : data_by_marker(), data() {}
void assemble_using_map(const std::vector<chromosome>& map)
{
data.clear();
for (const auto& chr: map) {
for (const auto& gen_dat: data_by_marker) {
const auto& gen = gen_dat.first;
const auto& mark_ind_lv = gen_dat.second;
size_t n_states = mark_ind_lv.begin()->second.front().size();
size_t n_mark = chr.raw.marker_name.size();
size_t n_ind = mark_ind_lv.begin()->second.size();
auto& ind_LVmat = data[chr.name][gen];
ind_LVmat.resize(n_ind, {n_states, n_mark});
size_t m = 0;
for (const auto& mark: chr.raw.marker_name) {
for (size_t ind = 0; ind < n_ind; ++ind) {
ind_LVmat[ind].col(m) = mark_ind_lv.find(mark)->second[ind];
}
++m;
}
}
}
}
MatrixXd operator () (
std::vector<std::string>::const_iterator begin,
std::vector<std::string>::const_iterator end,
const std::string& gen, size_t ind) const
{
const auto& milv = data_by_marker.find(gen)->second;
size_t n_states = milv.find(*begin)->second.front().size();
MatrixXd LV(n_states, end - begin);
for (size_t i = 0; i < n_states; ++i) {
LV.col(i) = milv.find(*begin)->second[ind];
}
return LV;
}
MatrixXd& operator () (const std::string& chr, const std::string& gen, size_t ind)
{
return data[chr][gen][ind];
......@@ -664,6 +706,34 @@ struct LV_database {
return ret;
}
template <typename STREAM_TYPE>
void
file_io(STREAM_TYPE& fs)
{
rw_base rw;
if (rw.fourcc(fs, "SMLV")) { return; }
rw(fs, data_by_marker);
rw(fs, data);
}
static
LV_database
load_from(const std::string& filename)
{
std::ifstream ifs(filename);
LV_database ret;
ret.file_io(ifs);
return ret;
}
void
save_to(const std::string& filename)
{
std::ofstream ofs(filename);
file_io(ofs);
}
/*
static
bool lv_check_fourcc(std::ifstream& ifs, const char* fourcc)
{
......@@ -731,6 +801,7 @@ struct LV_database {
}
}
}
*/
friend
std::ostream& operator << (std::ostream& os, const LV_database& LV)
......@@ -758,7 +829,7 @@ struct LV_database {
struct qtl_pop_type {
std::string name;
std::vector<size_t> indices;
const geno_matrix* gen;
std::shared_ptr<geno_matrix> gen;
std::map<std::string, std::vector<MatrixXd>> LV;
std::string observed_traits_filename;
std::vector<trait> observed_traits;
......@@ -785,10 +856,22 @@ struct pop_data_type {
std::string genetic_map_filename;
std::string pedigree_filename;
std::string qtl_generation_name;
std::map<std::string, geno_matrix*> generations;
std::vector<std::shared_ptr<geno_matrix>> generations;
LV_database LV;
std::map<std::string, std::vector<int>> families;
std::map<size_t, const geno_matrix*> gen_by_id;
std::map<std::string, std::vector<size_t>> families;
std::map<size_t, std::shared_ptr<geno_matrix>> gen_by_id;
void
assemble_chromosomes(const std::vector<chromosome>& map)
{
LV.assemble_using_map(map);
}
void
set_qtl_generation(const std::string& name)
{
qtl_generation_name = name;
}
std::string save()
{
......@@ -820,26 +903,30 @@ struct pop_data_type {
write_str(ofs, pedigree_filename);
write_str(ofs, qtl_generation_name);
write_size(ofs, generations.size());
for (const auto& kv: generations) {
for (const auto& g: generations) {
write_fourcc(ofs, "SGTE");
write_str(ofs, kv.first);
write_geno_matrix(ofs, kv.second);
if (g) {
write_geno_matrix(ofs, *g);
} else {
geno_matrix _;
write_geno_matrix(ofs, _);
}
}
LV.save_to(ofs);
LV.file_io(ofs);
write_fourcc(ofs, "SFAM");
write_size(ofs, families.size());
for (const auto& kv: families) {
write_str(ofs, kv.first);
write_size(ofs, kv.second.size());
for (int i: kv.second) {
write_int(ofs, i);
for (size_t i: kv.second) {
write_size(ofs, i);
}
}
write_fourcc(ofs, "SGBI");
write_size(ofs, gen_by_id.size());
for (const auto& kv: gen_by_id) {
write_size(ofs, kv.first);
write_str(ofs, kv.second->name);
write_size(ofs, std::find(generations.begin(), generations.end(), kv.second) - generations.begin());
}
}
......@@ -873,10 +960,11 @@ struct pop_data_type {
size_t n_gen = read_size(ifs);
for (size_t i = 0; i < n_gen; ++i) {
CHECK_4CC("SGTE");
std::string k = read_str(ifs);
ret.generations[k] = read_geno_matrix(ifs);
ret.generations.emplace_back(std::make_shared<geno_matrix>());
read_geno_matrix(ifs, *ret.generations.back());
}
ret.LV = LV_database::load_from(ifs);
/*ret.LV = LV_database::load_from(ifs);*/
ret.LV.file_io(ifs);
CHECK_4CC("SFAM");
size_t n_fam = read_size(ifs);
for (size_t f = 0; f < n_fam; ++f) {
......@@ -884,30 +972,29 @@ struct pop_data_type {
size_t n_ind = read_size(ifs);
auto& fam = ret.families[k];
for (size_t i = 0; i < n_ind; ++i) {
ret.families[k].push_back(read_int(ifs));
ret.families[k].push_back(read_size(ifs));
}
}
CHECK_4CC("SGBI");
size_t n_gbi = read_size(ifs);
for (size_t g = 0; g < n_gbi; ++g) {
size_t k = read_size(ifs);
std::string gen = read_str(ifs);
ret.gen_by_id[k] = ret.generations[gen];
ret.gen_by_id[k] = ret.generations[read_size(ifs)];
}
return ret;
}
size_t size() const { return families.find(qtl_generation_name)->second.size(); }
const geno_matrix* get_geno_matrix(const std::string& family, size_t ind) const
std::shared_ptr<geno_matrix> get_geno_matrix(const std::string& family, size_t ind) const
{
return gen_by_id.find(families.find(family)->second[ind])->second;
}
std::map<const geno_matrix*, std::vector<size_t>>
std::map<std::shared_ptr<geno_matrix>, std::vector<size_t>>
all_qtl_geno_matrix() const
{
std::map<const geno_matrix*, std::vector<size_t>> ret;
std::map<std::shared_ptr<geno_matrix>, std::vector<size_t>> ret;
size_t i = 0;
for (size_t ind: families.find(qtl_generation_name)->second) {
ret[gen_by_id.find(ind)->second].push_back(i);
......@@ -971,8 +1058,9 @@ struct pop_data_type {
os << "| - " << kv.first << ": " << kv.second << std::endl;
}
os << "| Generation matrices:" << std::endl;
for (const auto& kv: pop_data.generations) {
prepend(os, "| ", (*kv.second));
for (const auto& gen: pop_data.generations) {
if (!gen) { continue; }
prepend(os, "| ", (*gen));
}
os << "| Families" << std::endl;
for (const auto& kv: pop_data.families) {
......@@ -980,14 +1068,28 @@ struct pop_data_type {
}
os << "| Computed locus vectors:" << std::endl;
/*prepend(os, "| ", pop_data.LV);*/
for (const auto& kv: pop_data.LV.data) {
os << "| Chromosome " << kv.first << std::endl;
for (const auto& gen_lv: kv.second) {
os << "| Generation " << gen_lv.first << std::endl;
const auto& family = pop_data.families.find(gen_lv.first)->second;
for (size_t i = 0; i < gen_lv.second.size(); ++i) {
os << "| #" << i << " " << pop_data.get_geno_matrix(gen_lv.first, i)->name << std::endl;
prepend(os, "| ", gen_lv.second[i]);
if (pop_data.LV.data.size()) {
for (const auto& kv: pop_data.LV.data) {
os << "| Chromosome " << kv.first << std::endl;
for (const auto& gen_lv: kv.second) {
os << "| Generation " << gen_lv.first << std::endl;
const auto& family = pop_data.families.find(gen_lv.first)->second;
for (size_t i = 0; i < gen_lv.second.size(); ++i) {
os << "| #" << i << " " << pop_data.get_geno_matrix(gen_lv.first, i)->name << std::endl;
prepend(os, "| ", gen_lv.second[i]);
}
}
}
} else {
for (const auto& kv: pop_data.LV.data_by_marker) {
os << "| Marker " << kv.first << std::endl;
for (const auto& gen_lv: kv.second) {
os << "| Generation " << gen_lv.first << std::endl;
const auto& family = pop_data.families.find(gen_lv.first)->second;
for (size_t i = 0; i < gen_lv.second.size(); ++i) {
os << "| #" << i << " " << pop_data.get_geno_matrix(gen_lv.first, i)->name << std::endl;
prepend(os, "| ", gen_lv.second[i].transpose());
}
}
}
}
......
......@@ -518,6 +518,7 @@ std::ostream& operator << (std::ostream& os, const geno_matrix& gm)
tmp(i + 1, j + 1) = ss.str();
}
}
os << gm.name << std::endl;
os << tmp;
os << std::endl;
/*os << "P" << std::endl << gm.p << std::endl;*/
......@@ -525,15 +526,15 @@ std::ostream& operator << (std::ostream& os, const geno_matrix& gm)
/*MatrixXd diag = gm.diag.asDiagonal();*/
os << "D" << std::endl << gm.diag.transpose() << std::endl;
os << "STATIONARY DISTRIBUTION " << gm.stat_dist.transpose() << std::endl;
braille_histogram<double> hist(200, 20);
for (int i = 0; i < gm.stat_dist.size(); ++i) {
hist.add_point(gm.stat_dist(i));
}
hist.render().set_background(0, 64, 0);
os << "" << hist << std::endl;
/*braille_histogram<double> hist(200, 20);*/
/*for (int i = 0; i < gm.stat_dist.size(); ++i) {*/
/*hist.add_point(gm.stat_dist(i));*/
/*}*/
/*hist.render().set_background(0, 64, 0);*/
/*os << "" << hist << std::endl;*/
#ifdef WITH_OVERLUMPING
os << "SYMMETRIES" << std::endl << gm.symmetries << std::endl;
os << "LATENT SYMMETRIES" << std::endl << gm.latent_symmetries << std::endl;
/*os << "SYMMETRIES" << std::endl << gm.symmetries << std::endl;*/
/*os << "LATENT SYMMETRIES" << std::endl << gm.latent_symmetries << std::endl;*/
#endif
os << "COLLECT " << MATRIX_SIZE(gm.collect) << std::endl;
os << braille_grid(gm.collect) << std::endl;
......
......@@ -1374,9 +1374,9 @@ struct pedigree_type {
int p1 = ped.tree.get_p1(g1);
int p2 = ped.tree.get_p1(g2);
if (g1 == g2) {
os << p1 << ";0" << std::endl;
os << ped.tree.node2ind(p1) << ";0" << std::endl;
}
os << p1 << ';' << p2 << std::endl;
os << ped.tree.node2ind(p1) << ';' << ped.tree.node2ind(p2) << std::endl;
}
}
return os;
......
......@@ -141,6 +141,13 @@ struct pedigree_tree_type {
: m_leaves(), m_roots(), m_nodes(), m_must_recompute(), m_node_number_to_ind_number(), m_ind_number_to_node_number(1, NONE)
{}
bool
node_is_clone(int n) const
{
const auto& N = m_nodes[n];
return N.is_ancestor()
|| (N.is_genotype() && m_nodes[get_p1(N.p1)].is_ancestor() && m_nodes[get_p1(N.p2)].is_ancestor());
}
int get_p1(int n) const { return m_nodes[n].p1; }
int get_p2(int n) const { return m_nodes[n].p2; }
......@@ -338,9 +345,8 @@ struct pedigree_tree_type {
while (stack.size()) {
int n = stack.back();
stack.pop_back();
if (m_nodes[n].has_p1()) {
stack.push_back(m_nodes[n].p1);
}
if (node_is_clone(n)) { continue; }
stack.push_back(m_nodes[n].p1);
if (m_nodes[n].has_p2()) {
stack.push_back(m_nodes[n].p2);
}
......
......@@ -351,7 +351,7 @@ struct context_key_struc {
context_key_struc(const qtl_pop_type* p, const chromosome* c, const std::vector<double>& l)
: pop(p)
, gen(p->gen)
, gen(p->gen.get())
, chr(c)
, loci(l)
{
......@@ -378,7 +378,7 @@ struct context_key_struc {
#if 1
context_key_struc(const qtl_pop_type* p, const chromosome* c)
: pop(p)
, gen(p->gen)
, gen(p->gen.get())
, chr(c)
, loci(compute_steps(chr->condensed.marker_locus, active_settings->step))
{
......
......@@ -60,6 +60,7 @@ struct tensor_view {
++i_nd;
}
offset += indices[i_skip] * m_strides[dimensions[i_skip]];
std::cout << "new offset = " << offset << std::endl;
++i_dim;
}
for (; i_dim < N_Dim; ++i_dim) {
......
......@@ -2,6 +2,6 @@ GCC_VERSION=-4.9
CXX=g++$(GCC_VERSION) --std=gnu++0x -Wno-unused-local-typedefs -fPIC
#CXX=clang++ -std=c++11 -stdlib=libc++
INC=-I../../include -I../../include/input -I/home/daleroux/include/eigen3.2 -I/home/daleroux/include -I../../include/bayes
INC=-I../../include -I../../include/input -I/home/daleroux/include/eigen3 -I/home/daleroux/include -I../../include/bayes
COV=gcov #llvm-cov-3.4 #gcov #$(GCC_VERSION)
LD=$(CXX)
......@@ -228,6 +228,7 @@ arguments = {
/*convert_to_mapmaker(pop, ensure(target)->map, gen_name, spec, unknown, data_type, output);*/
exit(0);
SAFE_IGNORE_CALLBACK_ARGS;
(void) unknown;
}},
}},
{"Miscellaneous", "", false, {
......
......@@ -111,6 +111,8 @@ struct master_base {
}
}
bool all_good() const { return m_job_failed_counter == 0; }
virtual std::shared_ptr<worker_base> spawn_worker(size_t i, size_t slice_start ,size_t slice_end, const std::string& job) = 0;
virtual void wait_for_jobs() = 0;
......@@ -122,7 +124,7 @@ struct master_base {
++m_job_failed_counter;
}
++m_job_done_counter;
CREATE_MESSAGE(msg_channel::Out, MESSAGE("\033[100D\033[KProgress: " << m_job_done_counter << '/' << m_job_total << " (" << m_job_ok_counter << " good, " << m_job_failed_counter << " failed)" << std::flush));
CREATE_MESSAGE(msg_channel::Out, MESSAGE("\033[100D\033[KProgress: " << YELLOW << m_job_done_counter << NORMAL << '/' << YELLOW << m_job_total << NORMAL << " (" << m_job_ok_counter << " good, " << m_job_failed_counter << " failed)" << std::flush));
}
};
......@@ -141,7 +143,7 @@ struct master_none : public master_base {
void wait_for_jobs();
void notify(size_t i, bool ok)
void notify(size_t /*i*/, bool ok)
{
update_job(ok);
}
......@@ -193,7 +195,7 @@ struct master_thread : public master_base {
void wait_for_jobs();
void notify(size_t i, bool ok)
void notify(size_t /*i*/, bool ok)
{
update_job(ok);
}
......@@ -248,7 +250,7 @@ struct master_ssh : public master_base {
void wait_for_jobs();
void notify(size_t i, bool ok)
void notify(size_t /*i*/, bool ok)
{
update_job(ok);
}
......@@ -334,7 +336,7 @@ struct master_sge : public master_base {
void wait_for_jobs() { wait.lock(); wait.unlock(); }
void notify(size_t i, bool ok)
void notify(size_t /*i*/, bool ok)
{
++jobs_done;
/*MSG_DEBUG("Job #" << i << (ok ? " successful" : " failed") << " (" << jobs_done << '/' << jobs_total << ')');*/
......@@ -415,17 +417,23 @@ void reader(int)
} // extern "C"
void do_the_job(bn_settings_t* settings, std::string job)
bool do_the_job(bn_settings_t* settings, std::string job)
{
bool all_good = false;
if (settings->is_master()) {
CREATE_MESSAGE(msg_channel::Out, MESSAGE(std::endl << GREEN << "Running jobs " << job << NORMAL << std::endl));
if (settings->scheme == JDS_MT) {
master_thread mt(settings, job);
mt.run();
mt.wait_for_jobs();
all_good = mt.all_good();
} else if (settings->scheme == JDS_SSH) {
master_ssh mssh(settings, job);
mssh.run();
mssh.wait_for_jobs();
all_good = mssh.all_good();
} else if (settings->scheme == JDS_SGE) {
char wdbuf[PATH_MAX];
const char* wd = getcwd(wdbuf, PATH_MAX);
......@@ -447,11 +455,20 @@ void do_the_job(bn_settings_t* settings, std::string job)
unlink(settings->fifo_path.c_str());
signal(SIGIO, old_sigio);
fclose(master_sge::fifo);
all_good = msge.all_good();
} else {
master_none mnone(settings, job);
mnone.run();
mnone.wait_for_jobs();
all_good = mnone.all_good();
}
if (!all_good) {
CREATE_MESSAGE(msg_channel::Out, MESSAGE(std::endl));
MSG_ERROR("At least one job failed. Aborting.", "Check error messages in job logs.");
}
return all_good;
} else {
if (settings->scheme == JDS_SSH) {
worker_local_ssh wl(settings, settings->job_start, settings->job_end, job);
......@@ -460,6 +477,7 @@ void do_the_job(bn_settings_t* settings, std::string job)
worker_local_sge wl(settings, settings->job_start, settings->job_end, job);
wl.run_slice();
}
return true;
}
}
......@@ -2,12 +2,7 @@
#include "output.h"
#include "pedigree.h"
#include "factor_var3.h"
std::string job_filename(const bn_settings_t* settings, const char* job_name, size_t job)
{
return MESSAGE(settings->work_directory << '/' << job_name << '.' << job << ".data");
}
#include "tensor.h"
bool job_check_fourcc(std::ifstream& ifs, const char* fourcc)
{
......@@ -19,14 +14,6 @@ bool job_check_fourcc(std::ifstream& ifs, const char* fourcc)
}
inline
bn_message_type
make_obs(const std::vector<bn_label_type>& domain, const marker_observation_spec& spec, char obs)
{
return {};
}