Commit ec86fce1 authored by Damien Leroux's avatar Damien Leroux
Browse files

Reporting WIP.

parent 63f9dadd
......@@ -100,6 +100,7 @@ set(SPELL_QTL_SRC
# src/input/xml/xml_format.cc src/input/xml/xml_settings.cc
src/static_data.cc src/main.cc src/beta_gamma.cc
src/computations/basic_data.cc src/computations/probabilities.cc src/computations/model.cc src/computations/frontends.cc
src/computations/report.cc
)
MESSAGE(STATUS "spell-pedigree src = ${SPELL_PEDIGREE_SRC}")
......
This diff is collapsed.
......@@ -362,12 +362,6 @@ inline void arg_write(std::ostream&, const computation_along_chromosome*) {}
inline bool arg_match(std::istream&, const computation_along_chromosome*) { return true; }
namespace std { template<> struct hash<computation_along_chromosome*> { bool operator () (const computation_along_chromosome*) const { return 1; } }; }
enum ComputationType { NoTest=0, FTest=1, FTestLOD=2, R2=4, Chi2=8, Chi2LOD=16, Mahalanobis=32 };
enum ComputationResults { Test=0, RSS=1, Residuals=2, Coefficients=4, Rank=8 };
constexpr ComputationType operator | (ComputationType a, ComputationType b) { return (ComputationType) (((int)a) | ((int)b)); }
constexpr ComputationResults operator | (ComputationResults a, ComputationResults b) { return (ComputationResults) (((int)a) | ((int)b)); }
namespace std {
template <>
......
This diff is collapsed.
/* Spell-QTL Software suite for the QTL analysis of modern datasets.
* Copyright (C) 2016,2017 Damien Leroux <damien.leroux@inra.fr>, Sylvain Jasson <sylvain.jasson@inra.fr>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef _SPEL_REPORT_FRONTEND_H_
#define _SPEL_REPORT_FRONTEND_H_
#include "computations/frontends4.h"
#endif
This diff is collapsed.
......@@ -89,6 +89,7 @@ struct builder {
builder<S>& operator << (const char* x);
template <typename X> builder<S>& operator << (X&& x);
builder<S>& operator << (_Stream&(*x)(_Stream&));
builder<S>& operator << (std::ios_base& (&)(std::ios_base&));
};
......@@ -103,6 +104,7 @@ struct endpoint {
builder<S> operator << (const char* x);
template <typename X> builder<S> operator << (X&& x);
builder<S> operator << (_Stream&(*x)(_Stream&));
builder<S> operator << (std::ios_base& (&)(std::ios_base&));
};
......
......@@ -210,6 +210,18 @@ struct section<SectionLabel, void>
};
template <>
struct section<void, void>
: public with_section_label<void>,
public with_field_labels<void> {
section(size_t nf)
: with_section_label<void>(), with_field_labels<void>(nf)
{}
const std::string& label() const { static std::string _; return _; }
};
struct section_size {
size_t label_length;
size_t size;
......
......@@ -23,11 +23,13 @@ namespace output {
template <typename S> builder<S>& builder<S>::operator << (const char* x) { std::move(_s) << x; return *this; }
template <typename S> template <typename X> builder<S>& builder<S>::operator << (X&& x) { _s << x; return *this; }
template <typename S> builder<S>& builder<S>::operator << (_Stream&(*x)(_Stream&)) { _s << x; return *this; }
template <typename S> builder<S>& builder<S>::operator << (std::ios_base& (&x)(std::ios_base&)) { _s << x; return *this; }
template <typename S> builder<S> endpoint<S>::operator << (const char* x) { builder<S> b(out); b._s << x; return b; }
template <typename S> template <typename X> builder<S> endpoint<S>::operator << (X&& x) { builder<S> b(out); b._s << x; return b; }
template <typename S> builder<S> endpoint<S>::operator << (_Stream&(*x)(_Stream&)) { builder<S> b(out); b._s << x; return b; }
template <typename S> builder<S> endpoint<S>::operator << (std::ios_base& (&x)(std::ios_base&)) { builder<S> b(out); b._s << x; return b; }
#endif
#ifdef _SPELL_FILE_H_
......
......@@ -45,6 +45,13 @@
#include "task_pool.h"
enum ComputationType { NoTest=0, FTest=1, FTestLOD=2, R2=4, Chi2=8, Chi2LOD=16, Mahalanobis=32 };
enum ComputationResults { Test=0, RSS=1, Residuals=2, Coefficients=4, Rank=8 };
constexpr ComputationType operator | (ComputationType a, ComputationType b) { return (ComputationType) (((int)a) | ((int)b)); }
constexpr ComputationResults operator | (ComputationResults a, ComputationResults b) { return (ComputationResults) (((int)a) | ((int)b)); }
struct pleiotropy_descr {
std::string name;
......@@ -217,12 +224,15 @@ struct settings_t {
std::vector<trait> covar_per_pop;
double lod_support;
double lod_support_inner, lod_support_outer;
std::map<std::string, trait_metadata_type> trait_metadata;
double Rjoin;
double Rskip;
std::string report_format_string;
ComputationResults additional_table_output;
settings_t()
: notes()
......@@ -274,9 +284,13 @@ struct settings_t {
, with_traits()
, with_lg()
, covar_per_pop()
, lod_support(1.)
, lod_support_inner(1.)
, lod_support_outer(lod_support_inner * 2)
, Rjoin(10.)
, Rskip(1.)
, report_format_string("text,r,excel")
// , additional_table_output((ComputationResults) 0)
, additional_table_output(ComputationResults::RSS | ComputationResults::Rank)
{}
std::vector<std::pair<const chromosome*, double>>
......
......@@ -122,6 +122,7 @@ init_skeleton(model_manager& mm)
model_manager&
forward(model_manager& mm)
{
report_algo_phase("Cofactor detection: forward");
/*MSG_DEBUG("FORWARD");*/
while (true) {
const auto& result = mm.search_new_best();
......@@ -141,6 +142,7 @@ forward(model_manager& mm)
model_manager&
backward(model_manager& mm)
{
report_algo_phase("Cofactor detection: backward");
mm.select_all_loci();
while (true) {
auto worst_kb = mm.weakest_link();
......@@ -162,6 +164,7 @@ backward(model_manager& mm)
model_manager&
qtl_detect_cim(model_manager& mm)
{
report_algo_phase("QTL Detection: CIM");
return mm;
}
......@@ -170,6 +173,7 @@ qtl_detect_cim(model_manager& mm)
model_manager&
qtl_detect_cim_minus(model_manager& mm)
{
report_algo_phase("QTL Detection: CIM-");
auto chrom_best = mm.search_new_best_per_chromosome(true);
mm.clear_selection();
for (const auto& cb: chrom_best) {
......@@ -237,17 +241,18 @@ in_history(std::set<std::map<chromosome_value, locus_key>>& history, const std::
bool
iqtlm_backward(model_manager& mm, std::set<std::map<chromosome_value, locus_key>>& history)
iqtlm_backward(std::string algo_suffix, model_manager& mm, std::set<std::map<chromosome_value, locus_key>>& history)
{
DUMP_FILE_LINE();
report_algo_phase(MESSAGE("iQTLm" << algo_suffix << " backward"));
// DUMP_FILE_LINE();
while (iqtlm_backward_one(mm)) {
DUMP_FILE_LINE();
// DUMP_FILE_LINE();
if (in_history(history, mm.get_selection())) {
DUMP_FILE_LINE();
// DUMP_FILE_LINE();
return false;
}
}
DUMP_FILE_LINE();
// DUMP_FILE_LINE();
return true;
}
......@@ -255,27 +260,28 @@ iqtlm_backward(model_manager& mm, std::set<std::map<chromosome_value, locus_key>
bool
iqtlm_forward(model_manager& mm, std::set<std::map<chromosome_value, locus_key>>& history)
{
DUMP_FILE_LINE();
report_algo_phase("iQTLm forward");
// DUMP_FILE_LINE();
while (true) {
DUMP_FILE_LINE();
// DUMP_FILE_LINE();
auto results = mm.search_new_best_per_chromosome();
bool modified = false;
for (const auto& result: results) {
DUMP_FILE_LINE();
// DUMP_FILE_LINE();
modified |= result.second.over_threshold;
if (result.second.over_threshold) {
DUMP_FILE_LINE();
// DUMP_FILE_LINE();
result.second.select(mm);
MSG_DEBUG("added " << result);
}
}
DUMP_FILE_LINE();
// DUMP_FILE_LINE();
if (!modified) {
DUMP_FILE_LINE();
// DUMP_FILE_LINE();
return true;
}
if (in_history(history, mm.get_selection())) {
DUMP_FILE_LINE();
// DUMP_FILE_LINE();
return false;
}
}
......@@ -285,6 +291,7 @@ iqtlm_forward(model_manager& mm, std::set<std::map<chromosome_value, locus_key>>
bool
iqtlm_forward_gw(model_manager& mm, std::set<std::map<chromosome_value, locus_key>>& history)
{
report_algo_phase("iQTLm-GW forward");
while (true) {
auto result = mm.search_new_best();
bool modified = result.over_threshold;
......@@ -304,12 +311,13 @@ iqtlm_forward_gw(model_manager& mm, std::set<std::map<chromosome_value, locus_ke
model_manager&
qtl_detect_iqtlm(model_manager& mm)
{
report_algo_phase("QTL detection: iQTLm");
MSG_INFO("iQTLm starts with: " << mm.get_selection());
std::set<std::map<chromosome_value, locus_key>> history;
std::map<chromosome_value, locus_key> last_sel, new_sel;
new_sel = mm.get_selection();
do {
if (iqtlm_backward(mm, history) && iqtlm_forward(mm, history)) {
if (iqtlm_backward("", mm, history) && iqtlm_forward(mm, history)) {
last_sel = new_sel;
new_sel = mm.get_selection();
} else {
......@@ -323,11 +331,12 @@ qtl_detect_iqtlm(model_manager& mm)
model_manager&
qtl_detect_iqtlm_gw(model_manager& mm)
{
report_algo_phase("QTL detection: iQTLm-GW");
std::set<std::map<chromosome_value, locus_key>> history;
std::map<chromosome_value, locus_key> last_sel, new_sel;
new_sel = mm.get_selection();
while (new_sel != last_sel) {
if (iqtlm_backward(mm, history) && iqtlm_forward_gw(mm, history)) {
if (iqtlm_backward("-GW", mm, history) && iqtlm_forward_gw(mm, history)) {
last_sel = new_sel;
new_sel = mm.get_selection();
} else {
......
#include "computations/frontends4.h"
std::unique_ptr<analysis_report> ar_ptr;
void
init_analysis_report(model_manager& mm)
{
ar_ptr.reset(new analysis_report(mm));
ar_ptr->trait(mm.trait_name);
}
analysis_report&
ar_instance()
{
return *ar_ptr;
}
analysis_report_computations::analysis_report_computations(model_manager& _)
: mm(_)
, poi()
, roi()
, X(mm.vMcurrent->X())
{}
void
analysis_report_computations::check_constraints()
{
auto X = mm.vMcurrent->X();
if (mm.vMcurrent->rank() < X.cols()) {
MSG_ERROR("Insufficient rank for model matrix.", "Report to the developers");
}
int n_constraints = X.rows();
for (const auto& p: mm.all_pops) {
n_constraints -= (*p)->size();
}
auto res = mm.vMcurrent->residuals().bottomRows(n_constraints);
if (res.lpNorm<1>() > CONSTRAINT_RESIDUAL_EPSILON) {
// MSG_DEBUG(mm.vMcurrent->residuals().transpose());
// MSG_DEBUG(res.transpose());
double prout = res.lpNorm<1>();
double eps = CONSTRAINT_RESIDUAL_EPSILON;
MSG_ERROR("Inconsistent contraints for model matrix. " << prout << " > " << eps, "Report to the developers");
}
}
void
analysis_report_computations::prepare_computations()
{
TM = active_settings->trait_metadata[mm.trait_name];
mm.vMcurrent->compute();
Pt = TM.P.transpose();
all_coefficients = mm.vMcurrent->coefficients() * Pt;
/* FIXME use actual full Y instead of un-projecting the traits? */
all_residuals = mm.vMcurrent->residuals() * Pt;
all_rss = all_residuals.array().square().colwise().sum();
pseudo_variances = (mm.vMcurrent->rss().transpose() * Pt.array().square().matrix()).transpose()
/ (mm.vMcurrent->n_obs() - mm.vMcurrent->dof() - 1);
}
mws_result_by_block
analysis_report_computations::R2(int i)
{
MatrixXd R2(1, mm.vMcurrent->m_blocks.size());
auto bi = mm.vMcurrent->m_blocks.begin(), bj = mm.vMcurrent->m_blocks.end();
int c = 0;
for (; bi != bj; ++bi, ++c) {
auto M0 = *mm.vMcurrent;
M0.remove_block(bi->first);
M0.compute();
MatrixXd M0residuals = M0.residuals() * Pt;
VectorXd M0rss = M0residuals.array().square().colwise().sum();
double r1 = all_rss(i);
double r0 = M0rss(i);
R2(0, c) = fabs(r0) < 1.e-6 ? 0 : (r0 - r1) / r0; /* FIXME use some real epsilon */
}
mws_result_by_block mws(R2);
static std::vector<std::vector<char>> empty_label = {{' '}};
bi = mm.vMcurrent->m_blocks.begin();
for (/*++bi*/; bi != bj; ++bi) { mws.add_column_section(bi->first, empty_label); }
mws.add_row_section("", 1);
return mws;
}
int
analysis_report_computations::significance_level(double q)
{
static const auto& L = significance_legend();
return std::find_if(L.thresholds.begin(), L.thresholds.end(), [=] (double d) { return q < d; }) - L.thresholds.begin();
}
std::vector<qtl_description>
analysis_report_computations::prepare_qtls(std::vector<QTL> &qtls, ComputationType lod_test_type)
{
std::vector<qtl_description> ret;
for (auto& qtl: qtls) {
ret.emplace_back(qtl_description{qtl.chromosome, qtl.locus, 0., 0., 0., 0.});
auto& descr = ret.back();
auto ci = qtl.confidence_interval();
descr.outer_interval_start = ci[0];
descr.outer_interval_end = ci[1];
descr.inner_interval_start = ci[2];
descr.inner_interval_end = ci[3];
// std::tie(descr.inner_interval_start, descr.inner_interval_end,
// descr.outer_interval_start, descr.outer_interval_end) = qtl.confidence_interval();
roi[qtl.chromosome][qtl.locus] = {descr.outer_interval_start, descr.outer_interval_end};
// report_lod(qtl);
std::string name = MESSAGE(mm.trait_name << " @ " << qtl.locus << " [" << descr.outer_interval_start << ':' << descr.outer_interval_end << ']');
if (poi[qtl.chromosome][qtl.locus].size()) {
poi[qtl.chromosome][qtl.locus] = MESSAGE(poi[qtl.chromosome][qtl.locus] << ',' << name);
} else {
poi[qtl.chromosome][qtl.locus] = name;
}
}
return ret;
}
inline
std::pair<std::vector<size_t>, std::vector<std::pair<model_block_key, std::vector<std::vector<char>>>>>
analysis_report_computations::filter_columns(const MatrixXd& conrow, const model_block_collection& blocks)
{
std::pair<std::vector<size_t>, std::vector<std::pair<model_block_key, std::vector<std::vector<char>>>>> ret;
auto& columns = ret.first;
auto& block_parts = ret.second;
size_t j = 0;
for (const auto& kv: blocks) {
const auto& labels = kv.second->column_labels;
for (size_t i = 0; i < labels.size(); ++i, ++j) {
if (conrow(j)) {
if (!block_parts.size() || block_parts.back().first != kv.first) {
MSG_DEBUG("first used column in block " << kv.first);
block_parts.emplace_back();
block_parts.back().first = kv.first;
block_parts.back().second.push_back(labels[i]);
} else {
block_parts.back().second.push_back(labels[i]);
}
MSG_DEBUG("added column #" << j);
MSG_QUEUE_FLUSH();
columns.push_back(j);
}
}
}
return ret;
}
std::tuple<MatrixXd, MatrixXd, MatrixXi>
analysis_report_computations::contrasts(const std::vector<size_t>& columns, const MatrixXd& conmat, const MatrixXd& vcov, const MatrixXd& coef, int r)
{
// auto subset = filter_columns(conmat.row(r), mm.vMcurrent->m_blocks);
// const auto& columns = subset.first;
// const auto& blocks = subset.second;
std::tuple<MatrixXd, MatrixXd, MatrixXi> ret {
MatrixXd::Zero(columns.size(), columns.size()),
MatrixXd::Zero(columns.size(), columns.size()),
MatrixXi::Zero(columns.size(), columns.size())
};
auto& pvmat = std::get<0>(ret);
auto& ctrmat = std::get<1>(ret);
auto& stymat = std::get<2>(ret);
MatrixXd pvalue_mat = MatrixXd::Zero(pvmat.rows(), pvmat.cols());
for (int i1 = 0; i1 < pvmat.cols(); ++i1) {
stymat(i1, i1) = significance_level(1.);
for (int i2 = i1 + 1; i2 < pvmat.cols(); ++i2) {
int a1 = columns[i1];
int a2 = columns[i2];
double sigma = vcov(a1, a1) + vcov(a2, a2) - 2 * vcov(a1, a2);
double delta = coef(a1) - coef(a2);
normal s(0, sigma > 0 ? sqrt(sigma) : 1.e-9);
double pvalue = 2 * cdf(complement(s, fabs(delta)));
pvmat(i1, i2) = pvmat(i2, i1) = pvalue;
ctrmat(i1, i2) = -delta;
ctrmat(i2, i1) = delta;
stymat(i1, i2) = stymat(i2, i1) = significance_level(pvalue);
}
}
return ret;
}
analysis_report&
analysis_report::report_computation(std::string chrom, std::string trait_name, std::vector<double>& loci, const computation_along_chromosome& cac, const test_result& tr)
{
report_algo_selection(chrom, tr.test_value, tr.locus, mm.threshold);
std::vector<std::string> colnames = {"locus"};
MatrixXd values = MatrixXd::Zero(loci.size(),
1
+ cac.chi2.cols() + cac.chi2_lod.cols() + cac.coefficients.cols()
+ cac.ftest_lod.rows() + cac.ftest_pvalue.rows() + cac.mahalanobis.cols()
+ cac.r2.cols() + cac.rank.cols() + cac.residuals.cols() + cac.rss.rows());
values.col(0) = Eigen::Map<VectorXd>(loci.data(), loci.size());
int c = 1;
include_data<MatrixXd, &computation_along_chromosome::chi2, false>(c, colnames, values, "chi2.pvalue", cac);
include_data<MatrixXd, &computation_along_chromosome::chi2_lod, false>(c, colnames, values, "chi2.lod", cac);
include_data<MatrixXd, &computation_along_chromosome::coefficients, false>(c, colnames, values, "coef", cac);
include_data<MatrixXd, &computation_along_chromosome::ftest_lod, true>(c, colnames, values, "ftest.lod", cac);
include_data<MatrixXd, &computation_along_chromosome::ftest_pvalue, true>(c, colnames, values, "ftest.pvalue", cac);
include_data<MatrixXd, &computation_along_chromosome::mahalanobis, false>(c, colnames, values, "mahalanobis", cac);
include_data<MatrixXd, &computation_along_chromosome::r2, false>(c, colnames, values, "R2", cac);
include_data<VectorXd, &computation_along_chromosome::rank, false>(c, colnames, values, "rank", cac);
include_data<MatrixXd, &computation_along_chromosome::residuals, false>(c, colnames, values, "residuals", cac);
include_data<MatrixXd, &computation_along_chromosome::rss, true>(c, colnames, values, "RSS", cac);
m_stream.mode(ReportMode::LOD);
select_book("computations").select_sheet(MESSAGE("computation " << m_selection_index));
m_stream
.line(colnames)
.matrix(values);
select_previous_book();
return *this;
}
void
analysis_report::init_selection_report()
{
static bool must_init = true;
if (must_init) {
must_init = false;
m_stream.mode(ReportMode::AlgoSel);
select_book("algo_trace").select_sheet("Computations");
m_stream.init_selection_report(std::vector<std::string> {"Index", "Group", "Selection", "Result.score", "Result.locus", "Result.threshold"});
}
}
analysis_report&
analysis_report::report_algo_selection(std::string chrom, double score, double locus, double threshold)
{
init_selection_report();
++m_selection_index;
m_stream.mode(ReportMode::AlgoSel);
select_book("algo_trace").select_sheet("Computations");
m_stream
.selection(m_selection_index, chrom, MESSAGE(mm.vMcurrent->keys()), score, locus, threshold);
// if (m_what & AR::Model) {
select_book("algo_trace").select_sheet(MESSAGE(m_selection_index << " M0"));
m_stream
.matrix(MESSAGE("M0 (" << mm.vMcurrent->keys() << ")"), mm.vMcurrent->printable_X());
// }
select_previous_book();
return *this;
}
inline
analysis_report&
analysis_report::report_model(const model& Mcurrent)
{
std::string modelx = MESSAGE("Final model X");
std::string modelxtx = MESSAGE("Final model XtX");
m_stream.mode(ReportMode::Model);
select_book("models");
select_sheet(modelx);
m_stream
.matrix(modelx, Mcurrent.printable_X());
select_sheet(modelxtx);
m_stream
.matrix(modelxtx, model::xtx_printable(Mcurrent, true).mws);
return *this;
}
analysis_report&
analysis_report::report_qtls()
{
ComputationType ret = mm.vMcurrent->Y().cols() == 1 ? ComputationType::FTestLOD : ComputationType::Chi2LOD;
auto vq = mm.QTLs();
auto descr = prepare_qtls(vq, ret);
MSG_DEBUG("Have " << descr.size() << " QTL(s) to report.");
select_book("report").select_sheet("metadata");
m_stream.qtls(descr);
return *this;
}
void make_report(model_manager& mm, const std::vector<std::string>& cmdline)
{
static std::string emptystr;
analysis_report& report = ar_instance();
std::string book_name("report");
const trait_metadata_type& TM = report.trait_md();
mm.vMcurrent->compute();
MatrixXd X = mm.vMcurrent->X();
std::chrono::time_point<std::chrono::system_clock> tnow = std::chrono::system_clock::now();
std::time_t now = std::chrono::system_clock::to_time_t(tnow);
report
.report_model(*mm.vMcurrent)
.select_book(book_name)
.select_sheet("metadata")
.line("Spell-QTL")
.line()
.header(MESSAGE("Report for " << (TM.dim_names.size() > 1 ? "pleiotropic" : "single") << " trait " << mm.trait_name))
.line()
.line(MESSAGE("Date: " << std::ctime(&now)))
.line(MESSAGE("Command line: " << cmdline))
.report_qtls()
.line();
mm.vMcurrent->compute();
report.check_constraints();
if (mm.vMcurrent->n_obs() <= mm.vMcurrent->dof() + 1) {
report.line("* There are not enough observations to compute a variance/covariance matrix. Results not displayed.");
} else {
report.prepare_computations();
for (int i = 0; i < (int) TM.dim_names.size(); ++i) {
std::string title;