Commit 8d8683cb authored by Damien Leroux's avatar Damien Leroux
Browse files

XML I/O implemented and working.

parent 79a813db
<breeding-design>
<generation name="A">
<ancestor haplotype="aa"/>
<ancestor haplotype="a"/>
</generation>
<generation name="B">
<ancestor haplotype="bb"/>
<ancestor haplotype="b"/>
</generation>
<generation name="F1">
<cross p1="A" p2="B"/>
......
#ifndef _MCQTL_BANNER_H_
#define _MCQTL_BANNER_H_
/*#define VERSION_MAJOR "0"*/
/*#define VERSION_MINOR "1"*/
/*#define VERSION_PATCH "0"*/
#define BANNER ( \
"┌─────────────────────────────────────────────────────────────────────────────┐\n" \
"│SpeL v" VERSION_MAJOR "." VERSION_MINOR "." VERSION_PATCH \
" │\n" \
"│© 2013 S. Jasson, D. Leroux, Inra │\n" \
"│ │\n" \
"│Species Loci Perscrutandi Collocabuntur │\n" \
"└─────────────────────────────────────────────────────────────────────────────┘")
#if 0
"│▞▀▖ ▗ ▌ ▗ ▛▀▖ ▐ ▌▗ ▞▀▖ ▜▜ ▌ ▐ │\n" \
"│▚▄ ▛▀▖▞▀▖▞▀▖▄ ▞▀▖▞▀▘ ▌ ▞▀▖▞▀▖▄ ▙▄▘▞▀▖▙▀▖▞▀▘▞▀▖▙▀▖▌ ▌▜▀ ▝▀▖▛▀▖▞▀▌▄ ▌ ▞▀▖▐▐ ▞▀▖▞▀▖▝▀▖▛▀▖▌ ▌▛▀▖▜▀ ▌ ▌▙▀▖ │\n" \
"│▖ ▌▙▄▘▛▀ ▌ ▖▐ ▛▀ ▝▀▖ ▌ ▌ ▌▌ ▖▐ ▌ ▛▀ ▌ ▝▀▖▌ ▖▌ ▌ ▌▐ ▖▞▀▌▌ ▌▌ ▌▐ ▌ ▖▌ ▌▐▐ ▌ ▌▌ ▖▞▀▌▌ ▌▌ ▌▌ ▌▐ ▖▌ ▌▌ ▗▖│\n" \
"│▝▀ ▌ ▝▀▘▝▀ ▀▘▝▀▘▀▀ ▀▀▘▝▀ ▝▀ ▀▘ ▘ ▝▀▘▘ ▀▀ ▝▀ ▘ ▝▀▘ ▀ ▝▀▘▘ ▘▝▀▘▀▘ ▝▀ ▝▀ ▘▘▝▀ ▝▀ ▝▀▘▀▀ ▝▀▘▘ ▘ ▀ ▝▀▘▘ ▝▘│\n" \
"└───────────────────────────────────────────────────────────────────────────────────────────────────────────┘")
#endif
#endif
#ifndef _MCQTL_CHRONO_H_
#define _MCQTL_CHRONO_H_
#include <map>
#include <sys/time.h>
struct chrono {
struct chrono_map : public std::map<std::string, chrono> {
~chrono_map()
{
for (auto& kv: *this) {
std::cout << kv.first << ": " << kv.second.accum << " seconds." << std::endl;
}
}
};
static chrono_map& map()
{
static chrono_map ret;
return ret;
}
double t0;
double accum;
static double time_()
{
struct timeval tv;
gettimeofday(&tv, NULL);
return tv.tv_sec + tv.tv_usec * .000001;
}
chrono() : t0(0), accum(0) {}
void start() { t0 = time_(); }
void stop() { accum += time_() - t0; }
static void start(const std::string& s) { map()[s].start(); }
static void stop(const std::string& s) { map()[s].stop(); }
};
#endif
#ifndef _MCQTL_ERROR_H_
#define _MCQTL_ERROR_H_
#include <string>
#include <iostream>
#include <set>
#include <utility>
#include <sstream>
#define _WHITE "\x1b[37;1m"
#define _RED "\x1b[31;1m"
#define _YELLOW "\x1b[33;1m"
#define _CYAN "\x1b[36;1m"
#define _NORMAL "\x1b[0;m"
struct msg_handler_t {
struct state_t {
bool color;
std::set<std::string> workarounds;
int count;
const char* error() { ++count; return color ? _RED : ""; }
const char* warning() { return color ? _YELLOW : ""; }
const char* info() { return color ? _CYAN : ""; }
const char* normal() { return color ? _NORMAL : ""; }
state_t() : color(true), workarounds(), count(0) {}
void check(bool fatal);
void reset();
};
static state_t& instance() { static state_t _; return _; }
static void set_color(bool _) { instance().color = _; }
static bool color() { return instance().color; }
static const char* e() { return instance().error(); }
static const char* w() { return instance().warning(); }
static const char* i() { return instance().info(); }
static const char* n() { return instance().normal(); }
static void check(bool fatal) { instance().check(fatal); }
static void reset() { instance().reset(); }
};
#define ERROR(_msg_expr_, _workaround_expr_) do {\
std::cerr << msg_handler_t::e() << "[ERR] " << _msg_expr_ << msg_handler_t::n() << std::endl;\
std::stringstream s; s << _workaround_expr_;\
if (s.str().size()) { msg_handler_t::instance().workarounds.insert(s.str()); } } while (0)
#define WARNING(_msg_expr_) std::cerr << msg_handler_t::w() << "[WRN] " << _msg_expr_ << msg_handler_t::n() << std::endl;
#define INFO(_msg_expr_) std::cout << msg_handler_t::i() << "[MSG] " << _msg_expr_ << msg_handler_t::n() << std::endl;
inline void msg_handler_t::state_t::check(bool fatal)
{
if (count > 0) {
std::cerr << info() << "[MSG] " << count;
if (count > 1) {
std::cout << " errors were";
} else {
std::cout << " error was";
}
std::cout << " reported. Suggestions to fix this:" << std::endl;
for (auto& w: workarounds) {
std::cout << " - " << w << std::endl;
}
if (fatal) {
exit(-count);
} else {
reset();
}
std::cout << normal();
}
}
inline void msg_handler_t::state_t::reset()
{
if (workarounds.size()) {
WARNING(workarounds.size() << " workarounds silently discarded");
}
count = 0;
workarounds.clear();
}
#define WHITE (msg_handler_t::instance().color ? _WHITE : "")
#define YELLOW (msg_handler_t::instance().color ? _YELLOW : "")
#define RED (msg_handler_t::instance().color ? _RED : "")
#define CYAN (msg_handler_t::instance().color ? _CYAN : "")
#define NORMAL (msg_handler_t::instance().color ? _NORMAL : "")
#endif
#ifndef _MCQTL_FAST_POLY_MATRIX_H_
#define _MCQTL_FAST_POLY_MATRIX_H_
#include "labelled_matrix.h"
#include "fast_polynom.h"
using Eigen::Matrix;
using Eigen::Dynamic;
using Eigen::MatrixXd;
#endif
#ifndef _MCQTL_GENOPROB_COMPUTER_H_
#define _MCQTL_GENOPROB_COMPUTER_H_
#include "markov_population.h"
#include "algebraic_genotype.h"
template <typename GENERATION>
struct geno_prob_computer {
std::vector<double> locus;
MatrixXd LV;
GENERATION breakmat;
std::unordered_map<double, MatrixXd> tr_cache;
eval_poly_optim epo;
MatrixXd& compute_tr(double dist)
{
return epo.eval(dist);
}
std::vector<MatrixXd> TR;
MatrixXd init_kernel;
MatrixXd redux;
std::map<int, MatrixXd> left, right;
MatrixXd normalize_by_col(const MatrixXd& m)
{
MatrixXd ret = MatrixXd::Zero(m.innerSize(), m.outerSize());
VectorXd sums = m.colwise().sum();
for (int i = 0; i < m.outerSize(); ++i) {
if (sums(i) != 0) {
ret.col(i) = m.col(i) / sums(i);
}
}
return ret;
}
MatrixXd lv(int i)
{
return LV.col(i).asDiagonal();
}
void init(const GENERATION& break_matrix)
{
epo.init(break_matrix);
tr_cache.clear();
breakmat = break_matrix;
TR.clear();
for (size_t i = 1; i < locus.size(); ++i) {
TR.emplace_back(compute_tr(locus[i] - locus[i - 1]));
}
redux = make_redux(matrix_labels_to_cliques(break_matrix.column_labels));
/*init_kernel = eval_poly(breakmat.data, .5, r_pow, s_pow);*/
init_kernel = epo.eval(std::numeric_limits<double>::infinity());
}
void compute_partial_kernels()
{
std::map<int, std::string> lstring, rstring;
left.clear();
right.clear();
int l = 0, r = LV.outerSize() - 1;
/*std::cout << __FILE__ << ':' << __LINE__ << std::endl;*/
MatrixXd kernel;
/*left[0] = kernel = VectorXd::Ones(breakmat.innerSize()).asDiagonal();*/
left[0] = kernel = LV.col(0);
/* c'est foireux
* TODO: partir de LV0 pour left[0] et Id pour right[r], avec right[r-1]=LVr, puis boucler normalement.
* et pas de cas particulier dans partial_kernels[].
*/
lstring[0] = "LV0.";
for(++l; l <= r; ++l) {
kernel = LV.col(l).asDiagonal() * TR[l - 1] * kernel;
kernel /= kernel.sum();
left[l] = kernel;
std::stringstream ss;
ss << "LV" << l << ".TR" << (l-1) << '.' << lstring[l-1];
lstring[l] = ss.str();
}
right[r+1] = VectorXd::Ones(breakmat.innerSize()) / breakmat.innerSize();
rstring[r+1] = "";
right[r-1] = right[r] = kernel = LV.col(r) / LV.col(r).sum();
{ std::stringstream s; s << ".LV" << r; rstring[r-1] = rstring[r] = s.str(); }
for (--r; r > 0; --r) {
/*kernel = kernel * TR[r] * LV.col(r).transpose();*/
kernel = LV.col(r).asDiagonal() * TR[r] * kernel;
kernel /= kernel.sum();
right[r-1] = kernel;
std::stringstream ss;
ss << rstring[r] << ".TR" << r << ".LV" << r;
rstring[r-1] = ss.str();
}
r = LV.outerSize() - 1;
/*std::cout << __FILE__ << ':' << __LINE__ << std::endl;*/
/*kernel = LV.col(0).asDiagonal() * init_kernel * LV.col(r).asDiagonal();*/
kernel = init_kernel;
/*kernel = init_kernel;*/
/*std::cout << __FILE__ << ':' << __LINE__ << std::endl;*/
/*partial_kernels.clear();*/
MatrixXd tmp;
for (size_t i = 0; i <= r; ++i) {
/*MatrixXd tmp = right[i] * left[i];*/
/*std::cout << __FILE__ << ':' << __LINE__ << std::endl;*/
/*tmp = left[i] * kernel * right[i];*/
/*std::cout << __FILE__ << ':' << __LINE__ << std::endl;*/
/*partial_kernels.push_back(tmp);*/
/*std::cout << "PK" << i << " = " << lstring[i] << "K" << rstring[i] << std::endl;*/
/*std::cout << __FILE__ << ':' << __LINE__ << std::endl;*/
}
/*tmp = left[r] * LV.col(0).asDiagonal() * init_kernel * right[r];*/
/*partial_kernels.push_back(tmp);*/
/*std::cout << "PK" << r << " = " << lstring[r] << "LV0.K" << rstring[r] << std::endl;*/
/*std::cout << __FILE__ << ':' << __LINE__ << std::endl;*/
#if 0
for (auto& kv: left) {
std::cout << "L" << kv.first << '\t' << kv.second.transpose() << std::endl;
}
for (auto& kv: right) {
std::cout << "R" << kv.first << '\t' << kv.second.transpose() << std::endl;
}
#endif
/*int i = 0;*/
/*for (auto& pk: partial_kernels) {*/
/*std::cout << "PK" << (i++) << std::endl << pk << std::endl;*/
/*}*/
}
std::pair<size_t, size_t> find_lr(double loc)
{
size_t l = 0, r = LV.outerSize() - 1;
if (loc < locus.front() || loc > locus.back()) {
throw out_of_segment_exception();
}
for (; (l + 1) < r && locus[l + 1] <= loc; ++l);
for (; locus[r - 1] > loc; --r);
return {l, r};
}
VectorXd fast_compute_at(double loc, size_t l, size_t r)
{
#if 1
MatrixXd& TRL = compute_tr(loc - locus[l]);
MatrixXd& TRR = compute_tr(locus[r] - loc);
auto L = (TRL * left[l]).array();
auto R = (TRR * right[l]).array();
VectorXd vv = redux * (L * R).matrix();
#else
VectorXd vv = redux * ((compute_tr(loc - locus[l]) * left[l]).array() * (compute_tr(locus[r] - loc) * right[l]).array()).matrix();
#endif
return vv / vv.sum();
}
labelled_matrix<MatrixXd, typename GENERATION::col_label_type, double>
fast_compute_over_segment(const std::vector<double>& steps)
{
compute_partial_kernels();
/*std::cout << "==column_labels== " << breakmat.column_labels << std::endl;*/
std::vector<typename GENERATION::col_label_type> labels;
size_t i = 0;
size_t j = 0;
while (i < redux.innerSize()) {
while (j < breakmat.column_labels.size() && redux(i, j) != 1) ++j;
labels.push_back(breakmat.column_labels[j]);
/*std::cout << "i=" << i << " j=" << j << " | " << labels << std::endl;*/
while (j < breakmat.column_labels.size() && redux(i, j) == 1) ++j;
++i;
}
/*for (size_t j = 0; i < redux.innerSize() && j < redux.outerSize(); ++j) {*/
/*if (redux(i, j) != 0) {*/
/*labels.push_back(breakmat.column_labels[j]);*/
/*++i;*/
/*}*/
/*}*/
labelled_matrix<MatrixXd, typename GENERATION::col_label_type, double>
ret(labels, steps);
/*ret(redux.innerSize(), steps.size());*/
/*std::cout << "==labels== " << ret.row_labels << std::endl;*/
/*std::cout << steps << std::endl << locus << std::endl;*/
size_t l = 0, last = steps.size() - 1, r = !!(LV.outerSize() - 1);
for (size_t i = 0; i < steps.size(); ++i) {
/*std::cout << "## " << steps[i] << std::endl;*/
ret.data.col(i) = fast_compute_at(steps[i], l, r);
if (steps[i] >= locus[r]) {
++l;
r += i < last;
}
}
std::set<typename GENERATION::col_label_type> ordered_labels(labels.begin(), labels.end());
MatrixXd reorder = MatrixXd::Zero(labels.size(), labels.size());
i = 0;
for (auto l: ordered_labels) {
reorder(i++, std::find(labels.begin(), labels.end(), l) - labels.begin()) = 1;
}
/*std::cout << "dim() = " << ret.data.innerSize() << ", " << ret.data.outerSize() << std::endl;*/
ret.data = reorder * ret.data;
ret.row_labels.assign(ordered_labels.begin(), ordered_labels.end());
/*std::cout << "==row.labels== " << ret.row_labels << std::endl;*/
/*std::cout << "==col.labels== " << ret.column_labels << std::endl;*/
/*std::cout << "dim() = " << ret.data.innerSize() << ", " << ret.data.outerSize() << std::endl;*/
return ret;
}
};
#endif
#ifndef _MCQTL_INPUT_H_
#define _MCQTL_INPUT_H_
extern "C" {
#include <string.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <unistd.h>
}
#include "error.h"
static inline
bool check_file(const std::string& path, bool req_directory, bool req_writable)
{
struct stat st;
if (stat(path.c_str(), &st)) {
ERROR("Error occurred trying to stat " << path << ": " << strerror(errno), "Check whether " << path << " exists and is accessible");
return false;
}
bool req_dir_ok = req_directory ? S_ISDIR(st.st_mode) : S_ISREG(st.st_mode);
bool req_rd_ok = st.st_mode & (S_IRUSR | S_IRGRP | S_IROTH);
bool req_wr_ok = st.st_mode & (S_IWUSR | S_IWGRP | S_IWOTH);
if (!req_dir_ok) {
if (req_directory) {
ERROR(path << " is not a directory", "Specify the path to directory, not a file [" << path << ']');
} else {
ERROR(path << " is not a regular file or a symbolic link to a regular file", "Specify the path to a file, not a directory [" << path << ']');
}
}
if (!req_rd_ok) {
ERROR(path << " is not readable", "Verify the permissions of " << path);
}
if (!req_wr_ok) {
ERROR(path << " is not writable", "Verify the permissions of " << path);
}
return req_dir_ok && req_rd_ok && req_wr_ok;
}
#include "input/input.h"
#include "input/design.h"
#include "input/read_map.h"
#include "input/read_mark.h"
#include "input/read_trait.h"
#include "input/design.h"
#include "settings.h"
#endif
......@@ -7,9 +7,10 @@
#include "generation_rs.h"
struct design_type {
std::string filename;
std::map<std::string, generation_rs*> generation;
design_type()
: generation()
: filename(), generation()
{}
};
......@@ -17,7 +18,7 @@ design_type* read_design(std::istream& is);
inline std::ostream& operator << (std::ostream& os, const design_type& d)
{
os << "Design:" << std::endl;
os << "Design (" << d.filename << ')' << std::endl;
auto g = d.generation.begin();
auto gend = d.generation.end();
for (; g != gend; ++g) {
......
#ifndef _MCQTL_INPUT_H_
#define _MCQTL_INPUT_H_
#ifndef _MCQTL_INPUT_BASE_H_
#define _MCQTL_INPUT_BASE_H_
#include <exception>
#include <sstream>
......
......@@ -25,23 +25,47 @@ struct bad_settings_exception : public std::exception {
};
struct population_marker_observation {
std::string filename;
marker_data observations;
operator marker_data& () { return observations; }
operator std::string () { return filename; }
};
struct population {
std::string name;
std::string qtl_generation_name;
std::string observed_traits_filename;
std::vector<trait> observed_traits;
std::map<std::string, marker_data> observed_mark;
std::map<std::string, population_marker_observation> observed_mark;
/* TODO: pedigree... */
std::string pedigree_filename;
double noise;
population()
: qtl_generation_name(), observed_traits_filename()
, observed_traits(), observed_mark()
, pedigree_filename()
, noise(0)
{}
};
struct format_specification_t {
std::string filename;
std::map<std::string, marker_observation_spec> map;
};
struct settings_t {
std::string name;
std::string notes;
design_type* design;
std::string map_filename;
std::vector<chromosome> map;
std::map<std::string, marker_observation_spec>* marker_observation_specs;
format_specification_t* marker_observation_specs;
std::map<std::string, population> populations;
double step;
......@@ -55,9 +79,29 @@ struct settings_t {
std::string cofactor_marker_selection_filename;
double cofactor_marker_selection_distance;
std::string epistasis_qtl_selection_filename;
enum class detection_method_t : char {
Undef,
iQTLm,
CIM
};
double qtl_threshold;
double cofactor_threshold;
bool P_required;
bool T_required;
bool E_required;
bool D_required;
detection_method_t detection_method;
double detection_window;
settings_t()
: notes(), design(0), map()
, marker_observation_specs(0)
: notes()
, design(0)
, map_filename(), map()
, marker_observation_specs()
, populations()
, step(1)
, parallel(0)
......@@ -69,18 +113,27 @@ struct settings_t {
, chromosome_selection()
, cofactor_marker_selection_filename()
, cofactor_marker_selection_distance(10)
, epistasis_qtl_selection_filename()
, qtl_threshold(0)
, cofactor_threshold(0)
, detection_method(detection_method_t::Undef)
, detection_window(10.)
{}
~settings_t()
{
if (design) { delete design; }
if (marker_observation_specs) { delete marker_observation_specs; }
/*if (marker_observation_specs) { delete marker_observation_specs; }*/
}
static settings_t* from_args(int argc, const char** argv);
bool sanity_check() const;
};
std::ostream