Commit ba677a50 authored by Damien Leroux's avatar Damien Leroux
Browse files

Added a small scripting language.

parent 61558c39
......@@ -53,51 +53,6 @@ ftest_along_chromosome(chromosome_value chr, const locus_key& lk,
*/
struct filename_stream {
struct filtering_streambuf : public std::streambuf {
std::streambuf* original;
filtering_streambuf(std::streambuf* _) : original(_) {}
protected:
int overflow(int ch) override
{
switch (ch) {
case ',':
case '}':
return ch;
case '{':
ch = '-';
break;
case ' ':
ch = '_';
default:;
};
return original->sputc(ch);
}
};
std::ostringstream sstream;
filtering_streambuf rdbuf;
std::ostream ostream;
filename_stream()
: sstream(), rdbuf(sstream.rdbuf()), ostream(&rdbuf)
{}
template <typename T>
friend
filename_stream& operator << (filename_stream& fs, T x)
{
fs.ostream << x;
return fs;
}
operator
std::string () const
{
return sstream.str();
}
};
enum probability_mode { Joint, Single };
......@@ -180,6 +135,28 @@ struct model_manager {
}
model_manager(const model_manager& mm)
: all_pops(mm.all_pops)
, pop_blocks(mm.pop_blocks)
, n_par(mm.n_par)
, Mcurrent(mm.Mcurrent)
, threshold(mm.threshold)
, test_type(mm.test_type)
, all_loci(mm.all_loci)
, chrom_under_study(mm.chrom_under_study)
, loci(mm.loci)
, locus_blocks(mm.locus_blocks)
, cac()
, last_computation(NULL)
, pmode(Single)
, output_rss(mm.output_rss)
, output_rank(mm.output_rank)
, output_test(mm.output_test)
, output_model(mm.output_model)
, testpos(mm.testpos)
{}
/* Initializing test positions (TODO: optional exclusion window)
*/
......@@ -350,6 +327,12 @@ struct model_manager {
: chrom(c), locus(l), test_value(tv), index(i), over_threshold(ot), block_key(mbk), block(mb)
{}
test_result(const test_result& tr)
: chrom(tr.chrom), locus(tr.locus), test_value(tr.test_value),
index(tr.index), over_threshold(tr.over_threshold),
block_key(tr.block_key), block(tr.block)
{}
test_result&
operator = (const test_result& tr)
{
......@@ -388,46 +371,68 @@ struct model_manager {
}
};
/* todo: study_maha(locus)
* creates base model by removing locus from proper block and computes maha distance cleanly
*/
MatrixXd study_maha(const chromosome* chr, double locus, double minpos, double maxpos)
{
model_manager tmp(*this);
tmp.select_chromosome(chr);
tmp.remove(locus);
value<model> Mbase = tmp.Mcurrent;
return custom_test_along_chromosome(ComputationType::Mahalanobis, ComputationResults::Test, Mbase, minpos, maxpos).mahalanobis;
}
/*enum ComputationType { NoTest=0, FTest=1, FTestLOD=2, R2=4, Chi2=8, Mahalanobis=16 };*/
std::string
computation_type_to_string(ComputationType ct)
{
std::stringstream ret;
if (ct & FTest) { ret << "_FTest"; }
if (ct & FTestLOD) { ret << "_FTestLOD"; }
if (ct & R2) { ret << "_R2"; }
if (ct & Chi2) { ret << "_Chi2"; }
if (ct & Mahalanobis) { ret << "_Mahalanobis"; }
return ret.str();
}
double max_testpos() const { return max_testpos(chrom_under_study); }
double max_testpos(const chromosome* c) const { return all_loci.find(c)->second->back(); }
const computation_along_chromosome&
custom_test_along_chromosome(ComputationType ct, ComputationResults cr)
custom_test_along_chromosome(ComputationType ct, ComputationResults cr, value<model> Mbase, double minpos, double maxpos)
{
Mcurrent.compute();
if (output_model) {
static Eigen::IOFormat model_format(Eigen::FullPrecision, Eigen::DontAlignCols, "\t", "\n", "", "", "", "");
filename_stream filename;
filename << "Model_" << Mcurrent.keys() << ".txt";
std::ofstream(filename) << Mcurrent.X().format(model_format);
Mcurrent.output_X_to_file();
Mcurrent.output_XtX_inv_to_file();
}
if (!loci->size()) {
static computation_along_chromosome dummy_cac; // = {{}, {}, {}, {}, {}, {}, {}, {}, {}};
return dummy_cac;
}
testpos.clear();
_recompute(testpos);
value<model> Mbase;
if (pmode == Joint) {
Mbase = Mcurrent.reduce(chrom_under_study);
Mbase->compute();
} else {
Mbase = Mcurrent.clone();
}
MSG_DEBUG("Rank(Mcurrent " << Mcurrent.keys() << ") = " << Mcurrent.rank());
MSG_DEBUG("Rank(Mbase " << Mbase->keys() << ") = " << Mbase->rank());
_recompute(testpos, minpos, maxpos);
/*MSG_DEBUG("Rank(Mcurrent " << Mcurrent.keys() << ") = " << Mcurrent.rank());*/
/*MSG_DEBUG("Rank(Mbase " << Mbase->keys() << ") = " << Mbase->rank());*/
compute_along_chromosome(cac, ct, cr,
Mcurrent, Mbase,
/*Mcurrent, Mcurrent,*/
locus_base_key,
chrom_under_study,
testpos,
locus_blocks);
if (output_test | output_rss | output_rank) {
MSG_DEBUG(MATRIX_SIZE(cac.ftest_pvalue));
MSG_DEBUG(MATRIX_SIZE(cac.rss));
/*MSG_DEBUG(MATRIX_SIZE(cac.ftest_pvalue));*/
/*MSG_DEBUG(MATRIX_SIZE(cac.rss));*/
filename_stream filename;
filename << Mcurrent.keys();
if (output_test) { filename << '_' << "Test"; }
if (output_test) { filename << computation_type_to_string(ct); }
if (output_rss) { filename << '_' << "RSS"; }
if (output_rank) { filename << '_' << "Rank"; }
filename << (pmode == Joint ? "_Joint" : "");
filename << ".txt";
std::ofstream f(filename);
if (output_test) { f << '\t' << "Test"; }
......@@ -467,7 +472,7 @@ struct model_manager {
}
test_result
test_along_chromosome()
test_along_chromosome(double min_pos, double max_pos)
{
if (!loci->size()) {
last_computation = NULL;
......@@ -476,7 +481,17 @@ struct model_manager {
ComputationResults cr = Test;
if (output_rss) { cr = cr | RSS; }
if (output_rank) { cr = cr | Rank; }
custom_test_along_chromosome(test_type, cr);
value<model> Mbase;
if (pmode == Joint) {
Mbase = Mcurrent.reduce(chrom_under_study);
Mbase->compute();
} else {
Mbase = Mcurrent.clone();
}
custom_test_along_chromosome(test_type, cr, Mbase, min_pos, max_pos);
switch(test_type) {
case ComputationType::FTest:
last_computation = &cac.ftest_pvalue;
......@@ -493,6 +508,36 @@ struct model_manager {
return find_max();
}
#if 0
double is_ghost_qtl(double locus)
{
locus_key selection = get_selection(chrom_under_study);
double highest_test = 0;
double highest_locus = -1;
auto pmode_backup = pmode;
pmode = Single;
model m0 = Mcurrent;
m0.add(chrom_under_study, locus);
for (double l: selection) {
model mA = m0;
mA.add(chrom_under_study, l);
model mB = mA;
mB.ghost_constraint(chrom_under_study, locus, l);
MSG_DEBUG("GHOST CHECK");
MSG_DEBUG(mA);
MSG_DEBUG(mB);
/* FIXME: which test? */
double test = ftest(mA, mB);
if (test > highest_test) {
highest_test = test;
}
}
pmode = pmode_backup;
return highest_test;
}
#endif
test_result
find_max()
{
......@@ -532,7 +577,7 @@ struct model_manager {
if (!loci->size()) {
continue;
}
auto tmp = test_along_chromosome();
auto tmp = test_along_chromosome(0, max_testpos());
if (tmp.test_value > ret.test_value) {
ret = tmp;
}
......@@ -726,15 +771,27 @@ struct model_manager {
model::printable_keys keys() const { return Mcurrent.keys(); }
private:
void _recompute(std::vector<double>& testpos)
void _recompute(std::vector<double>& testpos, double minpos, double maxpos)
{
locus_key lk;
if (pmode == Joint) {
/*lk = locus_base_key[chrom_under_study];*/
lk = get_selection(chrom_under_study);
}
_recompute(lk, testpos, minpos, maxpos);
}
void _recompute(const locus_key& lk, std::vector<double>& testpos, double minpos, double maxpos)
{
/*testpos = compute_effective_loci(lk, *loci);*/
testpos = *loci;
std::vector<double>::const_iterator mini, maxi;
std::vector<double>::const_iterator i = loci->begin(), j = loci->end();
while (i != j && (*i) < minpos) { ++i; }
mini = i;
while (i != j && (*i) < maxpos) { ++i; }
maxi = i;
/*testpos = *loci;*/
testpos.assign(mini, maxi);
/*MSG_DEBUG(__LINE__ <<":chromosome " << chrom_under_study->name << std::endl*/
/*<< " loci " << (*loci) << std::endl*/
/*<< " testpos " << testpos); MSG_QUEUE_FLUSH();*/
......
......@@ -71,6 +71,12 @@ std::ostream& operator << (std::ostream& os, const std::vector<double>& v)
#endif
struct DirectOutputIsForbidden : public std::runtime_error {
DirectOutputIsForbidden()
: std::runtime_error("Direct output to std::cerr, cour, or clog is forbidden. Use CREATE_MESSAGE(channel, MESSAGE(expression)) instead.")
{}
};
struct ostream_manager {
/* FIXME indent/dedent must be managed by this very class... Maybe use \x1 for indent and \x2 for dedent... */
......
......@@ -451,9 +451,88 @@ MatrixXd
contrast_groups(const collection<const population*>& all_pops, const locus_key& lk);
struct filename_stream {
struct filtering_streambuf : public std::streambuf {
std::streambuf* original;
filtering_streambuf(std::streambuf* _) : original(_) {}
protected:
int overflow(int ch) override
{
switch (ch) {
case ',':
case '}':
return ch;
case '{':
ch = '-';
break;
case ' ':
ch = '_';
default:;
};
return original->sputc(ch);
}
};
std::ostringstream sstream;
filtering_streambuf rdbuf;
std::ostream ostream;
filename_stream()
: sstream(), rdbuf(sstream.rdbuf()), ostream(&rdbuf)
{}
template <typename T>
friend
filename_stream& operator << (filename_stream& fs, T x)
{
fs.ostream << x;
return fs;
}
operator
std::string () const
{
return sstream.str();
}
};
static inline
MatrixXd matrix_inverse(const MatrixXd& m)
{
/* FIXME Should NOT use SVD (see note in compute() */
JacobiSVD<MatrixXd> inverter(m, ComputeFullV);
auto& V = inverter.matrixV();
VectorXd inv_sv(inverter.singularValues());
for (int i = 0; i < inv_sv.innerSize(); ++i) {
if (!around_zero(inv_sv(i))) {
inv_sv(i) = 1. / inv_sv(i);
} else {
inv_sv(i) = 0.;
}
}
#ifdef ULTRA_MEGA_PARANOID
/* CHECK THAT THIS IS STABLE BY THE BEN ISRAEL SEQUENCE */
MatrixXd svd_ret = V * inv_sv.asDiagonal() * V.transpose();
MatrixXd ret = svd_ret;
MatrixXd test = 2 * ret - ret * m * ret;
if (!test.isApprox(ret, .00001)) {
MSG_DEBUG("ret" << std::endl << ret << std::endl << "test" << std::endl << test);
} else {
MSG_DEBUG("m^-1 IS GOOD! Yeah man.");
}
return ret;
#else
return V * inv_sv.asDiagonal() * V.transpose();
#endif
}
struct model {
model()
: m_Y(), m_blocks(), m_X(), m_rank(), m_rss(), m_coefficients(), m_solver_type(), m_computed(false), m_with_constraints(false), m_all_pops()
: m_Y(), m_blocks(), m_X(), m_rank(), m_rss(), m_coefficients(), m_solver_type(), m_computed(false), m_with_constraints(false), m_ghost_constraint(), m_all_pops()
{}
model(const value<MatrixXd>& y, const collection<const population*>& pops, SolverType st = SolverType::QR)
......@@ -463,6 +542,7 @@ struct model {
, m_solver_type(st)
, m_computed(false)
, m_with_constraints(false)
, m_ghost_constraint()
, m_all_pops(pops)
/*{ MSG_DEBUG("new model " << __LINE__ << " with Y(" << y.innerSize() << ',' << y.outerSize() << ')'); }*/
{}
......@@ -474,7 +554,8 @@ struct model {
, m_residuals(mo.m_residuals)
, m_solver_type(mo.m_solver_type)
, m_computed(mo.m_computed)
, m_with_constraints(false)
, m_with_constraints(mo.m_with_constraints)
, m_ghost_constraint()
, m_all_pops(mo.m_all_pops)
/*{ MSG_DEBUG("new model " << __LINE__ << " with Y(" << m_Y->innerSize() << ',' << m_Y->outerSize() << ')'); }*/
{}
......@@ -488,6 +569,7 @@ struct model {
m_blocks = mo.m_blocks;
m_computed = mo.m_computed;
m_with_constraints = mo.m_with_constraints;
m_ghost_constraint = mo.m_ghost_constraint;
m_X = mo.m_X;
m_rss = mo.m_rss;
m_coefficients = mo.m_coefficients;
......@@ -504,6 +586,7 @@ struct model {
m_blocks.swap(mo.m_blocks);
m_computed = mo.m_computed;
m_with_constraints = mo.m_with_constraints;
m_ghost_constraint = mo.m_ghost_constraint;
m_X = mo.m_X;
m_rss = mo.m_rss;
m_coefficients = mo.m_coefficients;
......@@ -516,16 +599,22 @@ struct model {
constraint_list
compute_constraint(const model_block_key& mbk)
{
constraint_list ret;
if (mbk.empty()) {
return {};
return ret;
} else if (mbk.selection.size() == 1) {
/* only one chromosome: do the contrast groups trick */
return {{{mbk, contrast_groups(m_all_pops, mbk.selection.begin()->second)}}};
ret = {{{mbk, contrast_groups(m_all_pops, mbk.selection.begin()->second)}}};
} else {
/* need the epistasis magic here */
return {};
}
return {};
if (mbk == m_ghost_constraint.first) {
int n_par = m_blocks[mbk]->outerSize();
ret.emplace_back();
ret.back().insert({{m_ghost_constraint.first, MatrixXd::Identity(n_par, n_par)},
{m_ghost_constraint.second, -1. * MatrixXd::Identity(n_par, n_par)}});
}
return ret;
}
......@@ -591,6 +680,15 @@ struct model {
}
}
void ghost_constraint(const chromosome* chr, double l1, double l2)
{
model_block_key k1, k2;
k1 += std::make_pair(chr, l1);
k2 += std::make_pair(chr, l2);
m_ghost_constraint.first = k1;
m_ghost_constraint.second = k2;
}
SolverType solver_type() const
{
return m_solver_type;
......@@ -790,6 +888,8 @@ struct model {
MatrixXd XtX_pseudo_inverse() const
{
return matrix_inverse(XtX());
#if 0
/* FIXME Should NOT use SVD (see note in compute() */
JacobiSVD<MatrixXd> inverter(XtX(), ComputeFullV);
auto& V = inverter.matrixV();
......@@ -801,7 +901,37 @@ struct model {
inv_sv(i) = 0.;
}
}
#ifdef ULTRA_MEGA_PARANOID
/* CHECK THAT THIS IS STABLE BY THE BEN ISRAEL SEQUENCE */
MatrixXd svd_ret = V * inv_sv.asDiagonal() * V.transpose();
MatrixXd ret = svd_ret;
MatrixXd test = 2 * ret - ret * XtX() * ret;
if (!test.isApprox(ret, .00001)) {
MSG_DEBUG("ret" << std::endl << ret << std::endl << "test" << std::endl << test);
} else {
MSG_DEBUG("XtX^-1 IS GOOD! Yeah man.");
}
return ret;
#else
return V * inv_sv.asDiagonal() * V.transpose();
#endif
#endif
}
void output_X_to_file() const
{
static Eigen::IOFormat model_format(Eigen::FullPrecision, Eigen::DontAlignCols, "\t", "\n", "", "", "", "");
filename_stream filename;
filename << "Model_" << keys() << "_X.txt";
std::ofstream(filename) << X().format(model_format);
}
void output_XtX_inv_to_file() const
{
static Eigen::IOFormat model_format(Eigen::FullPrecision, Eigen::DontAlignCols, "\t", "\n", "", "", "", "");
filename_stream filename;
filename << "Model_" << keys() << "_XtX_inv.txt";
std::ofstream(filename) << XtX_pseudo_inverse().format(model_format);
}
model_block_collection
......@@ -920,6 +1050,7 @@ struct model {
/*decomposition_base* m_solver;*/
bool m_computed;
bool m_with_constraints;
std::pair<model_block_key, model_block_key> m_ghost_constraint;
collection<const population*> m_all_pops;
public:
......
......@@ -210,9 +210,10 @@ VectorXd r2(const model& mini_model, const model& final_model)
static inline
void
mahalanobis(const model& MQ, const model& Mx, int col_num, MatrixXd* ret_mat)
___mahalanobis(const model& MQ, const model& Mx, int col_num, MatrixXd* ret_mat)
{
#define DUMP(_x_) MSG_DEBUG(#_x_ << std::endl << _x_)
/*#define DUMP(_x_) MSG_DEBUG(#_x_ << std::endl << _x_)*/
#define DUMP(_x_) #_x_ << std::endl << _x_
const auto& CQ = MQ.coefficients();
const auto& Cx = Mx.coefficients();
/*DUMP(CQ);*/
......@@ -225,20 +226,70 @@ mahalanobis(const model& MQ, const model& Mx, int col_num, MatrixXd* ret_mat)
double dofQ = .5 / (MQ.Y().innerSize() - MQ.rank());
double dofx = .5 / (Mx.Y().innerSize() - Mx.rank());
MatrixXd VarCovar = (dofQ * RSSQ(0, 0) * XtXQ_inv)
+ (dofx * RSSx(0, 0) * XtXx_inv);
+ (dofx * RSSx(0, 0) * XtXx_inv)
;
#if 0
MSG_DEBUG("========== MAHALANOBIS ==========================================================" << std::endl
<< DUMP(RSSQ) << std::endl
<< DUMP(RSSx) << std::endl
/*<< "XQ" << std::endl << MQ.X().transpose() << std::endl*/
<< MATRIX_SIZE(MQ.X()) << std::endl
<< MATRIX_SIZE(XtXQ_inv) << std::endl
/*<< "Xx" << std::endl << Mx.X().transpose() << std::endl*/
<< MATRIX_SIZE(Mx.X()) << std::endl
<< MATRIX_SIZE(XtXx_inv) << std::endl
<< DUMP(dofQ) << std::endl
<< DUMP(dofx) << std::endl
<< DUMP(VarCovar) << std::endl
<< "=================================================================================");
#endif
(*ret_mat)(col_num, 0) = (Cdiff.transpose() * matrix_inverse(VarCovar) * Cdiff)(0, 0);
#undef DUMP
}
static inline
void
mahalanobis(const model& MQ, const model& Mx, int col_num, MatrixXd* ret_mat)
{
/*#define DUMP(_x_) MSG_DEBUG(#_x_ << std::endl << _x_)*/
#define DUMP(_x_) #_x_ << std::endl << _x_
auto CQ = MQ.coefficients();
auto Cx = Mx.coefficients();
CQ(0, 0) = 0;
Cx(0, 0) = 0;
/*DUMP(CQ);*/
/*DUMP(Cx);*/
/*const auto& Cdiff = CQ - Cx;*/
const auto& RSSQ = MQ.rss();
const auto& RSSx = Mx.rss();
const auto XtXQ_inv = MQ.XtX_pseudo_inverse();
const auto XtXx_inv = Mx.XtX_pseudo_inverse();
double dofQ = .5 / (MQ.Y().innerSize() - MQ.rank());
double dofx = .5 / (Mx.Y().innerSize() - Mx.rank());
MatrixXd VarCovar = (dofQ * RSSQ(0, 0) * XtXQ_inv)
+ (dofx * RSSx(0, 0) * XtXx_inv)
;
#if 0
DUMP(RSSQ);
DUMP(RSSx);
MSG_DEBUG("XQ" << std::endl << MQ.X().transpose());
DUMP(XtXQ_inv);
MSG_DEBUG("Xx" << std::endl << Mx.X().transpose());
DUMP(XtXx_inv);
DUMP(dofQ);
DUMP(dofx);
DUMP(VarCovar);
MSG_DEBUG("=================================================================================");
MSG_DEBUG("========== MAHALANOBIS ==========================================================" << std::endl
<< DUMP(RSSQ) << std::endl
<< DUMP(RSSx) << std::endl
/*<< "XQ" << std::endl << MQ.X().transpose() << std::endl*/
<< MATRIX_SIZE(MQ.X()) << std::endl
<< MATRIX_SIZE(XtXQ_inv) << std::endl
/*<< "Xx" << std::endl << Mx.X().transpose() << std::endl*/
<< MATRIX_SIZE(Mx.X()) << std::endl
<< MATRIX_SIZE(XtXx_inv) << std::endl
<< DUMP(dofQ) << std::endl
<< DUMP(dofx) << std::endl
<< DUMP(VarCovar) << std::endl
<< "=================================================================================");
#endif
(*ret_mat)(col_num, 0) = (Cdiff.transpose() * VarCovar.inverse() * Cdiff)(0, 0);
double dot = (CQ.transpose() * matrix_inverse(VarCovar) * Cx)(0, 0);
(*ret_mat)(col_num, 0) = dot
/ sqrt((CQ.transpose() * matrix_inverse(VarCovar) * CQ)(0, 0))
/ sqrt((Cx.transpose() * matrix_inverse(VarCovar) * Cx)(0, 0))
;
#undef DUMP
}
......
This diff is collapsed.
#ifndef _SPELL_SCRIPT_PARSER_H_
#define _SPELL_SCRIPT_PARSER_H_