Commit 6d23722a authored by damien's avatar damien
Browse files

Organized the output files and added a log file for spell-qtl.

parent 7d15b909
This diff is collapsed.
......@@ -134,7 +134,7 @@ inline void ThreadPool::display_progress()
msg << RESTORE_CURSOR /*<< SHOW_CURSOR*/;
/*CREATE_MESSAGE(msg_channel::Out, msg.str());*/
if (!msg_handler_t::instance().queue.m_stop) {
msg_handler_t::instance().queue.cout << msg.str() << std::flush;
msg_handler_t::instance().raw_cout << msg.str() << std::flush;
}
{
/*std::lock_guard<std::mutex> lock(queue_mutex);*/
......
......@@ -20,6 +20,9 @@ project(spell_qtl)
#LIST(APPEND CMAKE_MODULE_PATH "${CMAKE_SOURCE_DIR}/CMake")
#INCLUDE(pandocology)
find_package(Boost 1.57.0 REQUIRED)
include_directories(${Boost_INCLUDE_DIRS})
set(CMAKE_CONFIGURATION_TYPES Debug Release CACHE TYPE INTERNAL FORCE)
set(CMAKE_VERBOSE_MAKEFILE ON)
......
......@@ -166,7 +166,7 @@ encompasses.
"c": ["bc", "bd"],
"d": ["ac", "bc", "bd"],
"e": ["ad", "bc", "bd"],
"f": ["ac", "ad", "bc", "bd"]
"f": ["ac", "ad", "bc", "bd"],
"-": ["ac", "ad", "bc", "bd"]
}
}
......
......@@ -18,6 +18,9 @@
#ifndef _SPELL_BASIC_FILE_CHECKS_H_
#define _SPELL_BASIC_FILE_CHECKS_H_
extern "C" {
#include <sys/stat.h>
};
struct file_stat {
bool exists;
......
......@@ -602,7 +602,7 @@ struct factor_graph_type : public recursive_graph_type<factor_graph_type> {
finalize()
{
scoped_indent _(MESSAGE("[finalize] "));
to_image("pre-optim", "png");
// to_image("pre-optim", "png");
MSG_DEBUG("DOMAINS");
compute_domains_and_factors();
MSG_DEBUG("OPTIMIZE");
......
......@@ -1058,18 +1058,18 @@ struct pop_data_type {
qtl_generation_name = name;
}
std::string save()
std::string save(const std::string& dir)
{
static const char* forbidden = ":?*/\\";
// static const char* forbidden = ":?*/\\";
std::stringstream filename;
for (char c: name) {
if (strchr(forbidden, c)) {
filename << '_';
} else {
filename << c;
}
}
filename << ".popdata";
// for (char c: name) {
// if (strchr(forbidden, c)) {
// filename << '_';
// } else {
// filename << c;
// }
// }
filename << dir << '/' << name << ".spell-marker.data";
save_to(filename.str());
return filename.str();
}
......@@ -1132,30 +1132,30 @@ struct pop_data_type {
}
static
pop_data_type load_from(const std::string& filename)
pop_data_type* load_from(const std::string& filename)
{
#define CHECK_4CC(_fourcc_) if (pop_check_fourcc(ifs, _fourcc_)) { return ret; }
ifile ifs(filename);
pop_data_type ret;
pop_data_type* ret = new pop_data_type();
CHECK_4CC("SPOP");
ret.name = read_str(ifs);
ret->name = read_str(ifs);
size_t n_mof = read_size(ifs);
for (size_t i = 0; i < n_mof; ++i) {
std::string k = read_str(ifs);
std::string v = read_str(ifs);
ret.marker_observation_filenames[k] = v;
ret->marker_observation_filenames[k] = v;
}
ret.genetic_map_filename = read_str(ifs);
ret.pedigree_filename = read_str(ifs);
ret.qtl_generation_name = read_str(ifs);
ret->genetic_map_filename = read_str(ifs);
ret->pedigree_filename = read_str(ifs);
ret->qtl_generation_name = read_str(ifs);
size_t n_gen = read_size(ifs);
for (size_t i = 0; i < n_gen; ++i) {
CHECK_4CC("SGTE");
ret.generations.emplace_back(std::make_shared<geno_matrix>());
read_geno_matrix(ifs, *ret.generations.back());
ret->generations.emplace_back(std::make_shared<geno_matrix>());
read_geno_matrix(ifs, *ret->generations.back());
}
/*ret.LV = LV_database::load_from(ifs);*/
ret.LV.file_io(ifs);
ret->LV.file_io(ifs);
CHECK_4CC("SFAM");
size_t n_fam = read_size(ifs);
for (size_t f = 0; f < n_fam; ++f) {
......@@ -1163,20 +1163,20 @@ struct pop_data_type {
size_t n_ind = read_size(ifs);
/*auto& fam = ret.families[k];*/
for (size_t i = 0; i < n_ind; ++i) {
ret.families[k].push_back(read_size(ifs));
ret->families[k].push_back(read_size(ifs));
}
}
CHECK_4CC("SGBI");
size_t n_gbi = read_size(ifs);
for (size_t g = 0; g < n_gbi; ++g) {
size_t k = read_size(ifs);
ret.gen_by_id[k] = ret.generations[read_size(ifs)];
ret->gen_by_id[k] = ret->generations[read_size(ifs)];
}
CHECK_4CC("ANAM");
size_t n_an = read_size(ifs);
for (size_t a = 0; a < n_an; ++a) {
char c = read_char(ifs);
ret.ancestor_names[c] = read_str(ifs);
ret->ancestor_names[c] = read_str(ifs);
}
return ret;
}
......
......@@ -255,7 +255,7 @@ template <typename Ret, typename... Args>
Ret operator () (Args... x)
{
static disk_hashtable<Ret(Args...)> dht(MESSAGE(active_settings->work_directory << "/cache/" << m_name));
static disk_hashtable<Ret(Args...)> dht(MESSAGE(active_settings->work_directory << '/' << active_settings->name << ".cache/" << m_name));
return dht.get_or_compute(m_func, std::forward<Args>(x)...);
}
......
......@@ -18,7 +18,7 @@
#ifndef _SPEL_FRONTENDS_H_
#define _SPEL_FRONTENDS_H_
#include "cache2.h"
#include "../cache2.h"
#include "basic_data.h"
#include "model.h"
/*#include "model/tests.h"*/
......
......@@ -1722,7 +1722,7 @@ void analysis_report::report_final_model(model_manager& mm)
int a2 = columns[i2];
double sigma = vcov(a1, a1) + vcov(a2, a2) - 2 * vcov(a1, a2);
double delta = coef(a1) - coef(a2);
normal s(0, sqrt(sigma));
normal s(0, sigma > 0 ? sqrt(sigma) : 1.e-9);
double pvalue = 2 * cdf(complement(s, fabs(delta)));
ctrmat(i1, i2) = MESSAGE(std::setprecision(3) << -delta << ' ' << std::setw(3) << std::left << significance_string(pvalue));
ctrmat(i2, i1) = MESSAGE(std::setprecision(3) << delta << ' ' << std::setw(3) << std::left << significance_string(pvalue));
......
......@@ -20,6 +20,7 @@
#include <string>
#include <iostream>
#include <fstream>
#include <exception>
#include <set>
#include <utility>
......@@ -139,10 +140,38 @@ struct ostream_manager {
{
/* Direct output is forbidden */
/*throw std::ios_base::failure("Direct output is forbidden");*/
throw DirectOutputIsForbidden();
abort();
}
};
/* from http://wordaligned.org/articles/cpp-streambufs */
class teebuf: public std::streambuf
{
public:
teebuf(std::streambuf * sb1, std::streambuf * sb2) : sb1(sb1), sb2(sb2) {}
private:
virtual int overflow(int c)
{
if (c == EOF) {
return !EOF;
} else {
int const r1 = sb1->sputc(c);
int const r2 = sb2->sputc(c);
return r1 == EOF || r2 == EOF ? EOF : c;
}
}
virtual int sync() {
int const r1 = sb1->pubsync();
int const r2 = sb2->pubsync();
return r1 == 0 && r2 == 0 ? 0 : -1;
}
private:
std::streambuf * sb1;
std::streambuf * sb2;
};
std::streambuf* old_cout_rdbuf;
std::streambuf* old_clog_rdbuf;
std::streambuf* old_cerr_rdbuf;
......@@ -150,6 +179,8 @@ struct ostream_manager {
ForbidOutput forbid;
std::ostream cerr, cout, clog;
std::vector<teebuf> tees;
ostream_manager()
: old_cout_rdbuf(std::cout.rdbuf())
, old_clog_rdbuf(std::clog.rdbuf())
......@@ -157,7 +188,9 @@ struct ostream_manager {
, hi(old_cout_rdbuf)
, forbid()
, cerr(old_cerr_rdbuf), cout(old_cout_rdbuf), clog(&hi)
, tees()
{
tees.reserve(3);
#ifndef SPELL_UNSAFE_OUTPUT
std::clog.rdbuf(&forbid);
std::cout.rdbuf(&forbid);
......@@ -191,8 +224,23 @@ struct ostream_manager {
cerr.rdbuf(old_cerr_rdbuf);
cout.rdbuf(old_cout_rdbuf);
}
void
tee(std::ostream& os)
{
restore();
tees.clear();
tees.emplace_back(old_cout_rdbuf, os.rdbuf());
cout.rdbuf(&tees.back());
tees.emplace_back(old_cerr_rdbuf, os.rdbuf());
cerr.rdbuf(&tees.back());
tees.emplace_back(old_clog_rdbuf, os.rdbuf());
hi.rdbuf(&tees.back());
}
};
struct message_queue : public ostream_manager {
typedef std::mutex mutex_type;
typedef std::unique_lock<mutex_type> scoped_lock_type;
......@@ -253,6 +301,8 @@ struct message_queue : public ostream_manager {
};
struct msg_handler_t {
#ifdef MSG_HANDLER_IS_SYNCED
typedef std::recursive_mutex lock_type;
......@@ -275,7 +325,9 @@ struct msg_handler_t {
bool debug_enabled;
bool quiet;
std::vector<std::function<void()>> hooks;
std::ostream raw_cout;
message_queue queue;
std::unique_ptr<std::ofstream> log_file;
const char* error() { ++count; return color ? _RED : ""; }
const char* warning() { return color ? _YELLOW : ""; }
......@@ -284,15 +336,29 @@ struct msg_handler_t {
state_t()
: color(!!isatty(fileno(stdout))), workarounds(), count(0), debug_indent(0), debug_enabled(true), quiet(false), hooks()
, queue()
, raw_cout(std::cout.rdbuf()), queue(), log_file(nullptr)
{
/*std::cout << "Message handler instance created." << std::endl;*/
}
~state_t()
{}
~state_t() { close_log_file(); }
void check(bool fatal);
void reset();
void run_hooks() { for (auto& f: hooks) { f(); } }
void close_log_file()
{
if (log_file) {
queue.restore();
log_file->close();
log_file.reset(nullptr);
}
}
void set_log_file(const std::string& path)
{
close_log_file();
log_file.reset(new std::ofstream(path, std::ios_base::out|std::ios_base::app));
queue.tee(*log_file);
}
};
static state_t& instance() { static state_t _; return _; }
......@@ -312,8 +378,8 @@ struct msg_handler_t {
static void run_hooks() { instance().run_hooks(); }
static void indent() { instance().debug_indent += 3; }
static void dedent() { instance().debug_indent -= 3; }
static int get_indent() { return instance().debug_indent; }
// static void dedent() { instance().debug_indent -= 3; }
// static int get_indent() { return instance().debug_indent; }
static void enqueue(const message_handle& mh) { instance().queue.enqueue(mh); }
static void wait_for_flush() { instance().queue.wait_for_flush(); }
......@@ -332,8 +398,11 @@ struct msg_handler_t {
static void redirect(std::ostream& os) { instance().queue.redirect(os); }
static void restore() { instance().queue.restore(); }
static void set_log_file(const std::string& path) { instance().set_log_file(path); }
};
inline
int ostream_manager::HeaderInserter::overflow(int ch)
{
......@@ -361,14 +430,16 @@ int ostream_manager::HeaderInserter::overflow(int ch)
if (start_of_line) {
for (const std::string& s: prefix) {
for (const char c: s) {
dest->sputc(c);
dest->sputc(c);
// dest->overflow(c);
}
}
/*for (int i = 0; i < indent; ++i) {*/
/*dest->sputc(' ');*/
/*}*/
}
retval = dest->sputc( ch );
retval = dest->sputc(ch);
// retval = dest->overflow(ch);
start_of_line = ch == '\n';
}
return retval;
......@@ -501,6 +572,7 @@ void message_queue::catch_SIGSEG(int)
signal(SIGSEGV, old_sighandler());
raise(SIGSEGV);
}
/*#endif*/
struct scoped_indent {
......
......@@ -87,7 +87,7 @@ struct marker_obs_formats {
"c": ["bc", "bd"],
"d": ["ac", "bc", "bd"],
"e": ["ad", "bc", "bd"],
"f": ["ac", "ad", "bc", "bd"]
"f": ["ac", "ad", "bc", "bd"],
"-": ["ac", "ad", "bc", "bd"]
}
}
......
......@@ -30,6 +30,8 @@ struct ped_settings_t {
std::string work_directory;
size_t max_states;
std::string pop_name;
ped_settings_t()
: prg_name()
, command_line()
......@@ -37,6 +39,7 @@ struct ped_settings_t {
, csv_sep(';')
, work_directory()
, max_states(std::numeric_limits<decltype(max_states)>::max())
, pop_name()
{}
static ped_settings_t* from_args(int argc, const char** argv);
......
......@@ -38,6 +38,8 @@
#include "../3rd-party/ThreadPool/ThreadPool.h"
#include "basic_file_checks.h"
struct marker_observation_spec {
ObservationDomain domain;
......@@ -192,6 +194,9 @@ struct settings_t {
bool output_npoint;
std::unique_ptr<pop_data_type> pop_data;
std::vector<std::pair<std::string, std::string>> datasets;
settings_t()
: notes()
, map_filename(), map()
......@@ -200,7 +205,7 @@ struct settings_t {
, working_set()
, step(1)
, parallel(0)
, work_directory("/tmp")
, work_directory(".")
, connected(false)
, epistasis(false)
, pleiotropy(false)
......@@ -234,6 +239,7 @@ struct settings_t {
, cross_indicator_can_interact(false)
, npoint_gen()
, output_npoint(false)
, pop_data()
{}
std::vector<std::pair<const chromosome*, double>>
......@@ -474,6 +480,18 @@ inline
void
settings_t::finalize()
{
{
std::string log_file = MESSAGE(active_settings->work_directory << '/' << active_settings->name << ".spell-qtl.log");
ensure_directories_exist(active_settings->work_directory);
msg_handler_t::set_log_file(log_file);
std::chrono::system_clock::time_point now = std::chrono::system_clock::now();
std::time_t now_c = std::chrono::system_clock::to_time_t(now);
(*msg_handler_t::instance().log_file)
<< "========================================================" << std::endl
<< "Log started on " << std::put_time(std::localtime(&now_c), "%F %T") << std::endl
<< "========================================================" << std::endl;
}
/*for (auto& c: map) {*/
/*c.compute_haplo_sizes();*/
/*}*/
......@@ -481,6 +499,20 @@ settings_t::finalize()
pool = new ThreadPool(parallel, &thread_stacks);
}
std::string pop_data_filename = work_directory + "/" + name + ".cache/" + name + ".spell-marker.data";
if (!check_file(pop_data_filename, false, false, false)) {
MSG_ERROR("The spell-marker data file was not found at " << pop_data_filename << ".", "Please run spell-pedigree and spell-marker prior to running spell-qtl.");
}
pop_data.reset(pop_data_type::load_from(pop_data_filename));
pop_data->assemble_chromosomes(map);
for (const auto& gt: datasets) {
ifile ifs(gt.second);
auto observed_traits = read_data::read_trait(ifs);
pop_data->set_qtl_generation(gt.first);
pop_data->extract_subpops(populations, gt.second, observed_traits, linked_pops);
}
for (const auto& chr: map) {
estimation_loci[&chr] = compute_steps(chr.condensed.marker_locus, step);
}
......
......@@ -210,8 +210,8 @@ arguments = {
{false},
[](CALLBACK_ARGS)
{
pop_data_type test = pop_data_type::load_from(*++ai);
MSG_DEBUG(test);
std::unique_ptr<pop_data_type> test(pop_data_type::load_from(*++ai));
MSG_DEBUG((*test));
exit(0);
SAFE_IGNORE_CALLBACK_ARGS;
}},
......@@ -247,14 +247,14 @@ arguments = {
{},
[](CALLBACK_ARGS)
{
pop_data_type pop = pop_data_type::load_from(*++ai);
std::unique_ptr<pop_data_type> pop(pop_data_type::load_from(*++ai));
std::string gen_name = *++ai;
const auto& spec = ensure(target)->marker_observation_specs[*++ai]; (void) spec;
char unknown = (*++ai)[0];
std::string data_type = *++ai;
std::string output = *++ai;
/* TODO FIXME no genetic map now, need to pass it as argument here */
/*convert_to_mapmaker(pop, ensure(target)->map, gen_name, spec, unknown, data_type, output);*/
/*convert_to_mapmaker(*pop, ensure(target)->map, gen_name, spec, unknown, data_type, output);*/
exit(0);
SAFE_IGNORE_CALLBACK_ARGS;
(void) unknown;
......@@ -315,7 +315,10 @@ arguments = {
{&bn_settings_t::work_directory},
[](CALLBACK_ARGS) {
ensure(target)->work_directory = *++ai;
check_file(ensure(target)->work_directory, true, true, true);
if (!check_file(ensure(target)->work_directory, true, true, false)) {
ensure_directories_exist(ensure(target)->work_directory);
check_file(ensure(target)->work_directory, true, true, true);
}
SAFE_IGNORE_CALLBACK_ARGS;
}},
......@@ -395,6 +398,7 @@ arguments = {
ensure(target)->marker_observation_specs_filenames.push_back(filename);
SAFE_IGNORE_CALLBACK_ARGS;
}},
/*
{{"-p", "--pedigree"},
{"path"},
"Path to the output from spell-pedigree.",
......@@ -415,6 +419,8 @@ arguments = {
MSG_DEBUG_DEDENT;
SAFE_IGNORE_CALLBACK_ARGS;
}},
*/
// {{"-d", "--breeding-design"},
// {"path"},
// "Path to the breeding design specification file. The pedigree data will be created assuming one individual per unobserved generation.\nIf multiple generations are observed, please provide pedigree data instead.",
......@@ -537,9 +543,6 @@ bn_settings_t* bn_settings_t::from_args(int argc, const char** argv)
if (ret) {
ret->prg_name = basename(argv[0]);
ret->command_line.assign(argv, argv + argc);
for (const auto& kv: ret->observed_mark) {
ret->marker_observation_specs[kv.second.format_name] = marker_obs_formats::get_format(ret->pedigree, kv.second.format_name);
}
if (ret->output_mode == 0) {
ret->output_mode = bn_settings_t::OutputPopData;
}
......
......@@ -189,7 +189,7 @@ job_registry = {
/*fg.finalize();*/
/*fg.save(settings->job_filename("factor-graph", unique_n_alleles[n]));*/
/*fg->dump();*/
fg->to_image(MESSAGE("factor-graph-" << unique_n_alleles[n]), "png");
//fg->to_image(MESSAGE("factor-graph-" << unique_n_alleles[n]), "png");
auto I = instance(fg);
ofile of(settings->job_filename("factor-graph", unique_n_alleles[n]));
I->file_io(of);
......@@ -326,20 +326,22 @@ job_registry = {
}
}
if (settings->output_mode & bn_settings_t::OutputOnePoint){
std::string op_filename = MESSAGE(settings->work_directory << '/' << settings->pop_name << "_pedigree-and-probabilities." << mark << ".csv");
if (settings->output_mode & bn_settings_t::OutputOnePoint) {
std::string op_dirname = MESSAGE(settings->work_directory << '/' << settings->pop_name << ".1-point");
ensure_directories_exist(op_dirname);
std::string op_filename = MESSAGE(op_dirname << '/' << settings->pop_name << ".pedigree-and-probabilities." << marker_name << ".csv");
std::ofstream ofs(op_filename);
ofs << "Gen;Id;P1;P2;Prob" << std::endl;
for (const auto& pi: settings->pedigree.items) {
ofs << pi.gen_name << ';' << pi.id << ';' << pi.p1 << ';' << pi.p2 << ";{";
bool first = true;
for (const auto& kv: marginals[pi.id]) {
ofs << (first ? "'" : ", '") << kv.first << "': " << kv.second;
ofs << (first ? "\"" : ", \"") << kv.first << "\": " << kv.second;
first = false;
}
ofs << '}' << std::endl;
}
settings->closing_messages.push_back(MESSAGE(std::endl << std::endl << GREEN << "1-point Parental Origin Probabilities for marker `" << mark << "' written in file " << WHITE << op_filename << NORMAL << std::endl));
settings->closing_messages.push_back(MESSAGE(GREEN << "1-point Parental Origin Probabilities for marker `" << marker_name << "' written in file " << WHITE << op_filename << NORMAL << std::endl));
}
if (settings->output_mode & bn_settings_t::OutputPopData) {
......@@ -596,7 +598,7 @@ job_registry = {
}
MSG_DEBUG("POP-DATA:");
MSG_DEBUG("" << pop_data);
std::string filename = pop_data.save();
std::string filename = pop_data.save(settings->work_directory + "/" + settings->pop_name + ".cache");
MSG_DEBUG("Saved output in " << filename);
{
ofile output(settings->job_filename("output", 0));
......
......@@ -21,6 +21,7 @@
#include "cli.h"
#include "dispatch.h"
#include <fenv.h>
#include <input/marker_obs_formats.h>
void print_usage();
......@@ -41,6 +42,22 @@ int main(int argc, const char** argv)
if (!settings) {
print_usage();
} else {
settings->pedigree_filename = settings->work_directory + "/" + settings->pop_name + ".cache/" + settings->pop_name + ".spell-pedigree.data";
if (!check_file(settings->pedigree_filename, false, false)) {
MSG_ERROR("Couldn'f find the spell-pedigree output file.", "Please run spell-pedigree first with the same name and work_directory parameters.");
return -1;
}
settings->pedigree.load(settings->pedigree_filename, false);
MSG_DEBUG_INDENT_EXPR("[Pedigree] ");
MSG_DEBUG("loaded from " << settings->pedigree_filename);
MSG_DEBUG("" << settings->pedigree);
MSG_DEBUG_DEDENT;
for (const auto& kv: settings->observed_mark) {
settings->marker_observation_specs[kv.second.format_name]
= marker_obs_formats::get_format(settings->pedigree, kv.second.format_name);
}
std::map<std::string, ObservationDomain> obs_gen;
for (const auto& kv: settings->observed_mark) {
obs_gen[kv.first] = settings->marker_observation_specs[kv.second.format_name].domain;
......@@ -104,21 +121,21 @@ int main(int argc, const char** argv)
rw_base()(unafs, settings->unique_n_alleles);
}
MSG_DEBUG(