Commit e565d8ed authored by Damien Leroux's avatar Damien Leroux
Browse files

Done with the refactorings, ready for v0.1 NOW!

parent b1319a60
......@@ -6,13 +6,11 @@
/*#define VERSION_PATCH "0"*/
#define BANNER ( \
"┌─────────────────────────────────────────────────────────────────────────────┐\n" \
"│Spell-QTL v" VERSION_MAJOR "." VERSION_MINOR "." VERSION_PATCH \
" │\n" \
"│© 2013-ad aeternam S. Jasson, D. Leroux, Inra │\n" \
"│ │\n" \
"│Species Perscrutandis Enixe Locis Locabuntur │\n" \
"└─────────────────────────────────────────────────────────────────────────────┘")
"┌──────────────────────────────────────────────\n" \
"│Spell-QTL v" VERSION_MAJOR "." VERSION_MINOR "." VERSION_PATCH "\n" \
"│© 2013-ad aeternam S. Jasson, D. Leroux, Inra\n" \
"│\n" \
"│Species Perscrutandis Enixe Locis Locabuntur\n")
#if 0
"│▞▀▖ ▗ ▌ ▗ ▛▀▖ ▐ ▌▗ ▞▀▖ ▜▜ ▌ ▐ │\n" \
......
......@@ -11,6 +11,7 @@
#include "eigen.h"
#include "error.h"
#include "generation_rs_fwd.h"
#include "input/read_trait.h"
/** FOURCC **/
......@@ -294,6 +295,22 @@ struct LV_database {
return data.find(chr)->second.find(gen)->second[ind];
}
std::map<std::string, std::vector<MatrixXd>>
extract(const std::string& gen, const std::vector<size_t> ind_vec) const
{
std::map<std::string, std::vector<MatrixXd>> ret;
for (const auto& chr_gen_lv_vec: data) {
const std::string& chr = chr_gen_lv_vec.first;
const auto& lv_vec = chr_gen_lv_vec.second.find(gen)->second;
auto& ret_lv_vec = ret[chr];
ret_lv_vec.reserve(ind_vec.size());
for (size_t i: ind_vec) {
ret_lv_vec.push_back(lv_vec[i]);
}
}
return ret;
}
static
bool lv_check_fourcc(std::ifstream& ifs, const char* fourcc)
{
......@@ -385,6 +402,30 @@ struct LV_database {
/** **/
struct qtl_pop_type {
std::string name;
std::vector<size_t> indices;
const generation_rs* gen;
std::map<std::string, std::vector<MatrixXd>> LV;
std::string observed_traits_filename;
std::vector<trait> observed_traits;
/*qtl_pop_type(const std::string& n, const std::vector<size_t>& ind, const generation_rs* g,*/
/*std::vector<MatrixXd>&& lv, const std::string& otf, std::vector<trait>&& ot)*/
/*: name(n), indices(ind), gen(g), LV(std::move(lv)), observed_traits_filename(ofs), observed_traits(std::move(ot))*/
/*{}*/
size_t size() const
{
return observed_traits.front().values.size();
}
const MatrixXd& get_LV(const std::string& chr, size_t i) const { return LV.find(chr)->second[i]; }
};
/** **/
struct pop_data_type {
std::string name;
std::map<std::string, std::string> marker_observation_filenames;
......@@ -393,6 +434,8 @@ struct pop_data_type {
std::string qtl_generation_name;
std::map<std::string, generation_rs*> generations;
LV_database LV;
std::map<std::string, std::vector<int>> families;
std::map<size_t, const generation_rs*> gen_by_id;
std::string save()
{
......@@ -430,17 +473,40 @@ struct pop_data_type {
write_generation_rs(ofs, kv.second);
}
LV.save_to(ofs);
write_fourcc(ofs, "SFAM");
write_size(ofs, families.size());
for (const auto& kv: families) {
write_str(ofs, kv.first);
write_size(ofs, kv.second.size());
for (int i: kv.second) {
write_int(ofs, i);
}
}
write_fourcc(ofs, "SGBI");
write_size(ofs, gen_by_id.size());
for (const auto& kv: gen_by_id) {
write_size(ofs, kv.first);
write_str(ofs, kv.second->name);
}
}
static
bool pop_check_fourcc(std::ifstream& ifs, const char* fourcc)
{
if (check_fourcc(ifs, fourcc)) {
MSG_ERROR("Could not read FOURCC \"" << fourcc << "\". This is not a valid population data file.", "");
return true;
}
return false;
}
static
pop_data_type load_from(const std::string& filename)
{
#define CHECK_4CC(_fourcc_) if (pop_check_fourcc(ifs, _fourcc_)) { return ret; }
std::ifstream ifs(filename);
pop_data_type ret;
if (check_fourcc(ifs, "SPOP")) {
MSG_ERROR("This is not a valid population data file.", "");
return ret;
}
CHECK_4CC("SPOP");
ret.name = read_str(ifs);
size_t n_mof = read_size(ifs);
for (size_t i = 0; i < n_mof; ++i) {
......@@ -453,20 +519,78 @@ struct pop_data_type {
ret.qtl_generation_name = read_str(ifs);
size_t n_gen = read_size(ifs);
for (size_t i = 0; i < n_gen; ++i) {
if (check_fourcc(ifs, "SGTE")) {
MSG_ERROR("This is not a valid population data file.", "");
return ret;
}
CHECK_4CC("SGTE");
std::string k = read_str(ifs);
MSG_DEBUG("*** reading generation " << k);
MSG_QUEUE_FLUSH();
ret.generations[k] = read_generation_rs(ifs);
MSG_DEBUG("*** done"); MSG_QUEUE_FLUSH();
}
ret.LV = LV_database::load_from(ifs);
CHECK_4CC("SFAM");
size_t n_fam = read_size(ifs);
for (size_t f = 0; f < n_fam; ++f) {
std::string k = read_str(ifs);
size_t n_ind = read_size(ifs);
auto& fam = ret.families[k];
for (size_t i = 0; i < n_ind; ++i) {
ret.families[k].push_back(read_int(ifs));
}
}
CHECK_4CC("SGBI");
size_t n_gbi = read_size(ifs);
for (size_t g = 0; g < n_gbi; ++g) {
size_t k = read_size(ifs);
std::string gen = read_str(ifs);
ret.gen_by_id[k] = ret.generations[gen];
}
return ret;
}
size_t size() const { return families.find(qtl_generation_name)->second.size(); }
const generation_rs* get_generation_rs(const std::string& family, size_t ind) const
{
return gen_by_id.find(families.find(family)->second[ind])->second;
}
std::map<const generation_rs*, std::vector<size_t>>
all_qtl_generation_rs() const
{
std::map<const generation_rs*, std::vector<size_t>> ret;
size_t i = 0;
for (size_t ind: families.find(qtl_generation_name)->second) {
ret[gen_by_id.find(ind)->second].push_back(i);
++i;
}
return ret;
}
std::string extract_subpops(std::multimap<std::string, qtl_pop_type>& pops, const std::string& traits_filename, const std::vector<trait>& traits)
{
auto extract_traits
= [&] (const std::vector<size_t>& ind_vec)
{
std::vector<trait> ret;
ret.resize(traits.size());
for (size_t ti = 0; ti < traits.size(); ++ti) {
ret[ti].name = traits[ti].name;
ret[ti].values.reserve(ind_vec.size());
for (size_t i: ind_vec) { ret[ti].values.push_back(traits[ti].values[i]); }
}
return ret;
};
auto aqg = all_qtl_generation_rs();
for (const auto& kv: aqg) {
pops.insert({name, {
name,
kv.second,
kv.first,
LV.extract(qtl_generation_name, kv.second),
traits_filename,
extract_traits(kv.second)
}});
}
return name;
}
template <typename PRINTABLE>
static
void prepend(std::ostream& os, const std::string& pfx, PRINTABLE&& p)
......@@ -493,14 +617,19 @@ struct pop_data_type {
for (const auto& kv: pop_data.generations) {
prepend(os, "| ", (*kv.second));
}
os << "| Families" << std::endl;
for (const auto& kv: pop_data.families) {
os << "| * " << kv.first << ": " << kv.second << std::endl;
}
os << "| Computed locus vectors:" << std::endl;
/*prepend(os, "| ", pop_data.LV);*/
for (const auto& kv: pop_data.LV.data) {
os << "| Chromosome " << kv.first << std::endl;
for (const auto& gen_lv: kv.second) {
os << "| Generation " << gen_lv.first << std::endl;
const auto& family = pop_data.families.find(gen_lv.first)->second;
for (size_t i = 0; i < gen_lv.second.size(); ++i) {
os << "| #" << i << std::endl;
os << "| #" << i << " " << pop_data.get_generation_rs(gen_lv.first, i)->name << std::endl;
prepend(os, "| ", gen_lv.second[i]);
}
}
......
......@@ -484,7 +484,7 @@ template <typename Ret, typename... Args>
/*if (m_future.valid()) {*/
/*return m_storage = m_future.get();*/
/*}*/
while (!mutex.try_lock());
mutex.lock();
if (m_future.valid()) {
m_storage = m_future.get();
}
......@@ -879,8 +879,8 @@ struct with_mem_cache_traits {
return_type
create(CachingPolicy _Sync, Ret (&f) (Args...), const clean_value_type<Args>&... x)
{
static std::mutex _;
std::lock_guard<std::mutex> lock_guard(_);
static std::recursive_mutex _;
std::lock_guard<decltype(_)> lock_guard(_);
/*value<Ret>* ret_in_progress = __get_in_progress_registry<Ret, Args...>().find(&f, x...);*/
......@@ -906,8 +906,8 @@ struct without_mem_cache_traits {
return_type
create(CachingPolicy _Sync, Ret (&f) (Args...), const clean_value_type<Args>&... x)
{
static std::mutex _;
std::lock_guard<std::mutex> lock_guard(_);
static std::recursive_mutex _;
std::lock_guard<decltype(_)> lock_guard(_);
/*value<Ret>* ret_in_progress = __get_in_progress_registry<Ret, Args...>().find(&f, x...);*/
/*if (ret_in_progress) {*/
......
......@@ -6,7 +6,7 @@
#endif
typedef const generation_rs* generation_value;
typedef const population* population_value;
typedef const qtl_pop_type* population_value;
typedef const chromosome* chromosome_value;
typedef const qtl_chromosome* qtl_chromosome_value;
......@@ -42,10 +42,16 @@ std::vector<double>
selected_qtls_on_chromosome(qtl_chromosome_value);
generation_value
generation_by_name(const std::string&);
generation_by_name(const population_value&, const std::string&, int);
generation_value
qtl_generation(const population_value&);
qtl_generation(const population_value&, int);
/*generation_value*/
/*generation_by_name(const std::string&);*/
/*generation_value*/
/*qtl_generation(const population_value&);*/
population_value
population_by_name(const std::string&);
......@@ -62,8 +68,8 @@ qtl_chromosome_by_name(const std::string&);
collection<std::string>
all_observed_generation_names(population_value);
std::vector<char>
marker_observations(population_value, chromosome_value, generation_value, int);
/*std::vector<char>*/
/*marker_observations(population_value, chromosome_value, generation_value, int);*/
/*collection<double>*/
value<std::vector<double>>
......@@ -72,8 +78,8 @@ test_loci(chromosome_value);
generation_rs::segment_computer_t
genoprob_computer(population_value, generation_value, qtl_chromosome_value);
marker_observation_spec
marker_observation_specifications(generation_value);
/*marker_observation_spec*/
/*marker_observation_specifications(generation_value);*/
labelled_matrix<MatrixXd, std::vector<char>, allele_pair>
compute_state_to_parental_origin(const context_key& ck, const locus_key& lk);
......@@ -94,6 +100,7 @@ individual_range(population_value);
/*observation_vectors_type*/
/*get_observation_vectors(generation_value gen);*/
#if 0
struct pop_mgo_data {
generation_value qtl_gen;
collection<std::string> aogn;
......@@ -108,7 +115,7 @@ struct pop_mgo_data {
/*MSG_DEBUG("new pop_mgo_data");*/
}
};
#endif
struct stpom_data {
std::vector<std::vector<char>> row_labels;
......@@ -142,10 +149,10 @@ static inline md5_digest& operator << (md5_digest& md5, const stpom_data& sd)
return md5 << sd.haplo1 << sd.haplo2;
}
static inline md5_digest& operator << (md5_digest& md5, const pop_mgo_data&)
{
return md5;
}
/*static inline md5_digest& operator << (md5_digest& md5, const pop_mgo_data&)*/
/*{*/
/*return md5;*/
/*}*/
MatrixXd
population_marker_obs(const context_key&, int);
......@@ -155,7 +162,7 @@ all_traits();
static inline
cache_input& operator & (cache_input& ci, const population*& pop)
cache_input& operator & (cache_input& ci, const qtl_pop_type*& pop)
{
std::string name;
ci & name;
......@@ -165,7 +172,7 @@ cache_input& operator & (cache_input& ci, const population*& pop)
static inline
cache_output& operator & (cache_output& co, const population*& pop)
cache_output& operator & (cache_output& co, const qtl_pop_type*& pop)
{
return co & pop->name;
}
......
......@@ -69,17 +69,30 @@ struct signal_display {
signal_display(const VectorXd& v, int imax, bool above)
: values(v.innerSize()), imax_(imax), above_(above)
{
int sig_cols = msg_handler_t::termcols() - 3;
values = v;
/*MSG_DEBUG("values.innerSize = " << values.innerSize());*/
#if 0
int sig_cols = msg_handler_t::termcols() - 3;
MSG_DEBUG("values.innerSize = " << values.innerSize());
MSG_QUEUE_FLUSH();
while (values.innerSize() >= sig_cols) {
if (values.innerSize() & 1) {
int sz = values.innerSize();
values.conservativeResize(sz + 1);
values(sz) = values(sz - 1);
}
int i = values.innerSize() >> 1;
values = values.transpose() * kroneckerProduct(MatrixXd::Identity(i, i), MatrixXd::Constant(1, 2, .5));
/*MSG_DEBUG("values.innerSize = " << values.innerSize());*/
MSG_DEBUG("values.innerSize = " << values.innerSize());
MSG_QUEUE_FLUSH();
}
#endif
double vmin = values.minCoeff();
double vmax = values.maxCoeff();
values = ((values.array() - vmin) / (vmax - vmin)).matrix();
if (vmin == vmax) {
values = (values.array() - vmin).matrix();
} else {
values = ((values.array() - vmin) / (vmax - vmin)).matrix();
}
}
friend std::ostream& operator << (std::ostream& os, const signal_display& sd)
......@@ -213,7 +226,7 @@ struct model_manager {
= compute_steps(
c.condensed.marker_locus,
step);
MSG_DEBUG("test loci for chromosome " << c << std::endl << all_loci[&c]);
/*MSG_DEBUG("test loci for chromosome " << c << std::endl << all_loci[&c]);*/
}
pop_blocks.clear();
}
......@@ -609,8 +622,12 @@ struct model_manager {
model_block_key mbk = locus_base_key;
mbk += std::make_pair(chrom_under_study, (*loci)[i_max]);
/*MSG_DEBUG("locus_base_key " << locus_base_key << " mbk " << mbk);*/
/*MSG_DEBUG("last_computation@" << last_computation);*/
/*MSG_QUEUE_FLUSH();*/
/*MSG_DEBUG((*last_computation));*/
/*MSG_QUEUE_FLUSH();*/
signal_display sd(*last_computation, i_max, max > threshold);
signal_display sd(last_computation->transpose(), i_max, max > threshold);
MSG_DEBUG("[COMPUTATION] " << loci->front() << sd << loci->back() << " max=" << max << " at " << (*loci)[i_max]);
return {chrom_under_study,
......@@ -785,7 +802,7 @@ struct model_manager {
all_popl.reserve(pb.size());
for (auto& vmat: pb) {
/* reduce each population block */
const population* pop = &pop_it->second;
const qtl_pop_type* pop = &pop_it->second;
context_key ck(new context_key_struc(pop, chrom_under_study, std::vector<double>()));
MatrixXd red = lk->reduce(*npar_it, loc);
/*MSG_DEBUG(MATRIX_SIZE(vmat->data));*/
......
......@@ -159,8 +159,8 @@ template <typename L>
void
init_connected_block_builder(
block_builder<L>& ret,
std::function<int (const population&)> get_pop_size,
std::function<std::vector<L> (const population&)> get_labels,
std::function<int (const qtl_pop_type&)> get_pop_size,
std::function<std::vector<L> (const qtl_pop_type&)> get_labels,
const collection<population_value>& all_pops)
{
std::map<L, int> index;
......@@ -206,8 +206,8 @@ template <typename L>
void
init_disconnected_block_builder(
block_builder<L>& ret,
std::function<int (const population&)> get_pop_size,
std::function<std::vector<L>(const population&)> get_labels,
std::function<int (const qtl_pop_type&)> get_pop_size,
std::function<std::vector<L>(const qtl_pop_type&)> get_labels,
const collection<population_value>& all_pops)
{
ret.n_rows = 0;
......
......@@ -27,9 +27,10 @@ md5_digest& operator << (md5_digest& md5, const qtl_chromosome& chr)
}
static inline
md5_digest& operator << (md5_digest& md5, const population& pop)
md5_digest& operator << (md5_digest& md5, const qtl_pop_type& pop)
{
return md5 << pop.qtl_generation_name << pop.observed_traits_filename << pop.observed_mark.size() << pop.noise;
/*return md5 << pop.qtl_generation_name << pop.observed_traits_filename << pop.observed_mark.size() << pop.noise;*/
return md5 << pop.name;
}
static inline
......
......@@ -425,6 +425,7 @@ inline void msg_handler_t::state_t::reset()
#define WHITE (msg_handler_t::instance().color ? _WHITE : "")
#define YELLOW (msg_handler_t::instance().color ? _YELLOW : "")
#define GREEN (msg_handler_t::instance().color ? _GREEN : "")
#define RED (msg_handler_t::instance().color ? _RED : "")
#define CYAN (msg_handler_t::instance().color ? _CYAN : "")
#define NORMAL (msg_handler_t::instance().color ? _NORMAL : "")
......
......@@ -314,14 +314,14 @@ population_marker_observation::raw_observations(const std::vector<char>& observa
#if 0
static inline
std::ostream&
operator << (std::ostream& os, const population&)
{
return os << "<population>";
}
#endif
namespace impl {
......@@ -1288,7 +1288,7 @@ generation_rs::genotype_probabilities() const
#if 0
inline
bool population::is_observed(const impl::generation_rs* g, int i) const
{
......@@ -1332,7 +1332,7 @@ pedigree_node population::pedigree_get_parents(const impl::generation_rs* g, int
return {-1, -1};
}
}
#endif
inline
......
......@@ -72,6 +72,7 @@ struct population_marker_observation {
};
#if 0
struct population {
std::string name;
std::string qtl_generation_name;
......@@ -118,7 +119,7 @@ struct population {
pedigree_node pedigree_get_parents(const impl::generation_rs* g, int i) const;
};
#endif
namespace impl {
struct generation_rs {
......
......@@ -93,7 +93,7 @@ bool ensure_directory_exists(const std::string& path)
#include "settings.h"
settings_t* read_settings(std::ifstream& is);
format_specification_t* read_format(std::istream& is);
/*format_specification_t* read_format(std::istream& is);*/
design_type* read_design(std::istream& is);
pedigree_type read_pedigree(const design_type* design, const std::string& filename, std::istream& is);
......
......@@ -291,10 +291,12 @@ operator * (const labelled_matrix<MATRIX, RL, CL1>& m1, const labelled_matrix<MA
labelled_matrix<MATRIX, RL, CL2> ret;
#ifndef EIGEN_NO_DEBUG
if (m1.column_labels != m2.row_labels) {
/*MSG_ERROR("Matrix labels mismatch! Can't multiply.");*/
std::cerr << "Matrix labels mismatch! Can't multiply." << std::endl;
std::cerr << "m1::cols " << m1.column_labels << std::endl;
std::cerr << "m2::rows " << m2.row_labels << std::endl;
MSG_ERROR("Matrix labels mismatch! Can't multiply.", "");
MSG_ERROR("m1::cols " << m1.column_labels, "");
MSG_ERROR("m2::rows " << m2.row_labels, "");
/*std::cerr << "Matrix labels mismatch! Can't multiply." << std::endl;*/
/*std::cerr << "m1::cols " << m1.column_labels << std::endl;*/
/*std::cerr << "m2::rows " << m2.row_labels << std::endl;*/
/*std::cerr << "m1:" << std::endl << m1 << std::endl;*/
/*std::cerr << "m2:" << std::endl << m2 << std::endl;*/
throw -1;
......
......@@ -448,7 +448,7 @@ typedef std::vector<std::map<model_block_key, MatrixXd>> constraint_list;
MatrixXd
contrast_groups(const collection<const population*>& all_pops, const locus_key& lk);
contrast_groups(const collection<const qtl_pop_type*>& all_pops, const locus_key& lk);
......@@ -535,7 +535,7 @@ struct model {
: m_Y(), m_blocks(), m_X(), m_rank(), m_rss(), m_coefficients(), m_solver_type(), m_computed(false), m_with_constraints(false), m_ghost_constraint(), m_all_pops()
{}
model(const value<MatrixXd>& y, const collection<const population*>& pops, SolverType st = SolverType::QR)
model(const value<MatrixXd>& y, const collection<const qtl_pop_type*>& pops, SolverType st = SolverType::QR)
: m_Y(y)
, m_blocks(), m_X()
, m_rank(), m_rss(), m_coefficients(), m_residuals()
......@@ -1051,7 +1051,7 @@ struct model {
bool m_computed;
bool m_with_constraints;
std::pair<model_block_key, model_block_key> m_ghost_constraint;
collection<const population*> m_all_pops;
collection<const qtl_pop_type*> m_all_pops;
public:
friend
......
......@@ -37,15 +37,10 @@ struct settings_t {
std::string name;
std::string notes;
design_type* design;
std::string map_filename;
std::vector<chromosome> map;
/*format_specification_t* marker_observation_specs;*/
enum observation_type_t { Geno, Line };
observation_type_t observation_type;
std::map<std::string, marker_observation_spec> marker_observation_specs;
std::vector<std::string> marker_observation_specs_filenames;
std::map<std::string, population> populations;
std::multimap<std::string, qtl_pop_type> populations;
std::vector<qtl_chromosome> working_set;
......@@ -104,10 +99,7 @@ struct settings_t {
settings_t()
: notes()
, design(0)
, map_filename(), map()
, observation_type(observation_type_t::Line)
, marker_observation_specs()