Commit 5303d460 authored by Damien Leroux's avatar Damien Leroux
Browse files

Replaced cumbersome XML marker obs spec implementation with a sexier...

Replaced cumbersome XML marker obs spec implementation with a sexier json-based one. More user-friendly too.
parent c6835611
......@@ -11,12 +11,19 @@
<excludeRoots>
<file path="$PROJECT_DIR$/cmake-build-debug/CMakeFiles" />
<file path="$PROJECT_DIR$/cmake-build-release/CMakeFiles" />
<file path="$PROJECT_DIR$/include/bayes" />
<file path="$PROJECT_DIR$/include/cache" />
<file path="$PROJECT_DIR$/include/computations" />
<file path="$PROJECT_DIR$/include/data" />
<file path="$PROJECT_DIR$/include/input" />
<file path="$PROJECT_DIR$/include/model" />
<file path="$PROJECT_DIR$/include/attic" />
<file path="$PROJECT_DIR$/simulation/gen_BC2" />
<file path="$PROJECT_DIR$/simulation/gen_F9" />
<file path="$PROJECT_DIR$/simulation/gen_MAGIC4" />
<file path="$PROJECT_DIR$/simulation/gen_MAGIC8" />
<file path="$PROJECT_DIR$/simulation/gen_RIL" />
<file path="$PROJECT_DIR$/simulation/gen_RIL.old" />
<file path="$PROJECT_DIR$/simulation/gen_SIB" />
<file path="$PROJECT_DIR$/simulation/test/gen_RIL" />
<file path="$PROJECT_DIR$/src/bayes/gros_tests" />
<file path="$PROJECT_DIR$/src/bayes/test-data" />
<file path="$PROJECT_DIR$/src/bayes/tmp" />
<file path="$PROJECT_DIR$/src/experiments" />
<file path="$PROJECT_DIR$/tests/TestCodeCoverage" />
</excludeRoots>
</component>
......
This diff is collapsed.
......@@ -56,7 +56,8 @@ set(SPELL_PEDIGREE_SRC
)
set(SPELL_MARKER_SRC
src/input/read_mark.cc src/input/design.cc src/input/read_map.cc src/input/xml/xml_design.cc src/input/xml/xml_format.cc
src/input/read_mark.cc src/input/design.cc src/input/read_map.cc
#src/input/xml/xml_design.cc src/input/xml/xml_format.cc
src/static_data.cc
src/bayes/main.cc src/bayes/dispatch.cc src/bayes/jobs.cc src/bayes/cli.cc
)
......@@ -76,3 +77,8 @@ target_link_libraries(spell-qtl expat dl)
set(CMAKE_EXE_LINKER_FLAGS "-rdynamic")
SET(EXECUTABLE_OUTPUT_PATH ${PROJECT_BINARY_DIR}/bin)
# experimental binaries
add_executable(test-json sandbox/test-json.cc)
\ No newline at end of file
spell-marker
IN sample-data/data_magic
-p
pedigree_clean-norepeat.csv.ped-data
-n
tomates
-wd
/media/RAM/
-mos
format-02.xml
-m
Cervil:02
Cervil.gen
-m
Levovil:02
Levovil.gen
-m
Criollo:02
Criollo.gen
-m
Stupicke:02
Stupicke.gen
-m
Plovdiv:02
Plovdiv.gen
-m
LA1420:02
LA1420.gen
-m
Ferum:02
Ferum.gen
-m
LA0147:02
LA0147.gen
-m
ICS3:02
pop_geno_transposed.txt
-D
---
IN simulator/data-multipop2
-mos format-ABHCD.xml -p 100.ped.ped-data -m F2:A/B 100_F2.gen -m F2C:A/C 100_F2C.gen -o F2,F2C
\ No newline at end of file
......@@ -226,7 +226,9 @@ struct signal_display {
std::ostream&
operator << (std::ostream& os, const signal_display& sd)
{
return os << sd.grid;
std::stringstream tmp;
tmp << sd.grid;
return os << tmp.str();
}
#endif
};
......@@ -295,9 +297,11 @@ struct QTL {
};
enum class AR { RSS=1, Rank=2, Test=4, Model=8, All=0xFF };
enum class AR: int { RSS=1, Rank=2, Test=4, Model=8, All=0xFF };
constexpr bool operator & (AR a, AR b) { return !!(((int) a) & ((int) b)); }
inline
bool
operator & (AR a, AR b) { return !!(((int) a) & ((int) b)); }
enum probability_mode { Joint, Single };
......@@ -714,8 +718,9 @@ struct search_interval_type {
}
test_result
find_max(int i0, value<ComputationType> vct, computation_along_chromosome& cac, double threshold)
find_max(int i0, value<ComputationType> vct, computation_along_chromosome &cac, double threshold)
{
MSG_DEBUG("call to find_max");
auto ct = *vct;
auto last_computation =
(ct == ComputationType::FTest ? cac.ftest_pvalue
......@@ -732,7 +737,16 @@ struct search_interval_type {
i_max = i;
}
}
if (i_max == -1) {
#ifdef SIGNAL_DISPLAY_ONELINER
signal_display sd(last_computation.transpose(), i_max, max > threshold);
MSG_DEBUG("[COMPUTATION] " << effective_positions.front() << sd << effective_positions.back() << " max=" << max << " at " << effective_positions[i_max]);
#else
signal_display sd(*chrom, effective_positions, last_computation.transpose(), i_max, threshold);
MSG_DEBUG("[COMPUTATION] " << effective_positions.front() << " ... " << effective_positions.back() << " max=" << max << " at " << effective_positions[i_max] << std::endl << sd);
#endif
if (i_max == -1) {
return {};
}
/*model_block_key k = locus_base_key;*/
......@@ -745,18 +759,12 @@ struct search_interval_type {
/*model_block_key mbk = locus_base_key;*/
/*mbk += std::make_pair(chrom, (*loci)[i_max]);*/
/*MSG_DEBUG("locus_base_key " << locus_base_key << " mbk " << mbk);*/
/*MSG_DEBUG("last_computation@" << last_computation);*/
/*MSG_QUEUE_FLUSH();*/
/*MSG_DEBUG((*last_computation));*/
/*MSG_QUEUE_FLUSH();*/
#ifdef SIGNAL_DISPLAY_ONELINER
/*signal_display sd(last_computation.transpose(), i_max, max > threshold);*/
/*MSG_DEBUG("[COMPUTATION] " << loci->front() << sd << loci->back() << " max=" << max << " at " << (*loci)[i_max]);*/
#else
/*signal_display sd(*chrom, effective_positions, last_computation.transpose(), i_max, threshold);*/
/*MSG_DEBUG("[COMPUTATION] " << loci->front() << " ... " << loci->back() << " max=" << max << " at " << (*loci)[i_max] << std::endl << sd);*/
#endif
/*
MSG_DEBUG("last_computation@" << last_computation);
MSG_QUEUE_FLUSH();
MSG_DEBUG((*last_computation));
MSG_QUEUE_FLUSH();
*/
return {
this, chrom,
......
//
// Created by daleroux on 22/11/16.
//
#ifndef SPELL_QTL_MARKER_OBS_FORMATS_H_H
#define SPELL_QTL_MARKER_OBS_FORMATS_H_H
#include <string>
#include <fstream>
#include <pedigree.h>
#include <regex>
#include <commandline.h>
#include "settings.h"
#include "../../3rd-party/json.hpp"
#include "error.h"
#include "observations.h"
struct marker_obs_formats {
static
nlohmann::json&
get_internal_formats_json() {
static std::string formats_str = R"(
{
"02": {
"domain": "allele",
"alphabet_from": "01",
"scores": {
"0": ["00"],
"1": ["01", "10"],
"2": ["11"],
"-": ["00", "01", "10", "11"]
}
},
"ABHCD": {
"domain": "ancestor",
"alphabet_from": "ab",
"scores": {
"A": ["aa"],
"H": ["ab", "ba"],
"B": ["bb"],
"-": ["aa", "ab", "ba", "bb"],
"C": ["ab", "ba", "bb"],
"D": ["aa", "ab", "ba"]
}
},
"CP": {
"domain": "ancestor",
"alphabet_from": "abcd",
"scores": {
"0": ["ac", "ad", "bc", "bd"],
"1": [],
"2": [],
"3": [],
"4": [],
"5": [],
"6": [],
"7": [],
"8": [],
"9": [],
"A": [],
"B": [],
"C": [],
"D": [],
"E": [],
"F": ["ac", "ad", "bc", "bd"]
}
}
})";
static nlohmann::json formats = nlohmann::json::parse(formats_str);
return formats;
}
static
std::string
build_all_internal_format_names()
{
std::vector<std::string> tmp;
const auto& json = get_internal_formats_json();
for (auto it = json.begin(); it != json.end(); ++it) {
tmp.push_back(it.key());
}
std::stringstream ss;
auto i = tmp.begin(), j = tmp.end();
ss << (*i++);
for (; i != j; ++i) {
ss << ' ' << (*i);
}
return ss.str();
}
static
void
add_format(std::ifstream& ifs)
{
nlohmann::json fmt = nlohmann::json::parse(ifs);
nlohmann::json& internal = get_internal_formats_json();
MSG_DEBUG("INTERNAL " << internal.dump());
MSG_QUEUE_FLUSH();
for (auto it = fmt.begin(); it != fmt.end(); ++it) {
std::string key = it.key();
if (internal.count(key) == 0) {
internal[key] = it.value();
// internal += {it.key(), it.value()};
} else {
MSG_ERROR("Format " << it.key() << " is already defined.", "");
}
}
}
static
const std::string&
all_internal_format_names()
{
static std::string ret(build_all_internal_format_names());
return ret;
}
static
nlohmann::json
get_format_json(const std::string& key)
{
const nlohmann::json& formats = get_internal_formats_json();
auto it = formats.find(key);
if (it == formats.end()) {
MSG_ERROR("Marker observation format is not defined: " << key, MESSAGE("Use one of the predefined formats " << build_all_internal_format_names() << " or add a -mos <format-filename> argument to the commandline."));
return {};
}
return it.value();
}
static
marker_observation_spec
make_observation_spec(nlohmann::json format) {
marker_observation_spec ret;
ret.domain = format["domain"] == "ancestor" ? ODAncestor : ODAllele;
ret.domain_size = format["alphabet_from"].size();
const auto& fmt_scores = format["scores"];
for (auto it = fmt_scores.begin(); it != fmt_scores.end(); ++it) {
ret.scores.emplace_back();
auto& score = ret.scores.back();
score.first = it.key()[0];
for (const auto& sc: it.value()) {
std::string s = sc;
score.second.emplace_back(s[0], s[1]);
}
}
return ret;
}
static
marker_observation_spec
get_format(const pedigree_type& ped, const std::string& format_name)
{
// MSG_DEBUG("Requested format " << format_name);
// MSG_QUEUE_FLUSH();
auto sep = format_name.find('/');
if (sep != std::string::npos) {
std::smatch m;
int i1, i2;
char l1, l2;
if (std::regex_match(format_name, m, std::regex("^([0-9]+)/([0-9]+)$"))) {
/* fetch letters by ancestor id number */
int v1 = to<int>(m[1].str());
int v2 = to<int>(m[2].str());
// MSG_DEBUG("Matched ancestor IDs " << v1 << " and " << v2);
i1 = (int) (std::find_if(ped.items.begin(), ped.items.end(), [&] (const pedigree_item& i) { return i.id == v1; }) - ped.items.begin());
i2 = (int) (std::find_if(ped.items.begin(), ped.items.end(), [&] (const pedigree_item& i) { return i.id == v2; }) - ped.items.begin());
} else {
std::string name1(format_name.begin(), format_name.begin() + sep);
std::string name2(format_name.begin() + sep + 1, format_name.end());
// MSG_DEBUG("Looking for ancestors named " << name1 << " and " << name2);
i1 = (int) (std::find_if(ped.items.begin(), ped.items.end(), [&] (const pedigree_item& i) { return i.gen_name == name1; }) - ped.items.begin());
i2 = (int) (std::find_if(ped.items.begin(), ped.items.end(), [&] (const pedigree_item& i) { return i.gen_name == name2; }) - ped.items.begin());
}
// MSG_DEBUG("Have ancestor numbers " << i1 << " and " << i2);
// for (const auto& kv: ped.ancestor_letters) {
// MSG_DEBUG(kv.first << ':' << kv.second);
// }
l1 = ped.ancestor_letters.find(i1)->second;
l2 = ped.ancestor_letters.find(i2)->second;
nlohmann::json json, src = get_format_json("ABHCD");
json["domain"] = src["domain"];
json["alphabet_from"] = (l1 < l2 ? MESSAGE(l1 << l2) : MESSAGE(l2 << l1));
json["scores"] = nullptr;
const auto& fmt_scores = src["scores"];
for (auto it = fmt_scores.begin(); it != fmt_scores.end(); ++it) {
json["scores"][it.key()] = nlohmann::json::array();
for (auto& geno: it.value()) {
std::string s = geno;
s[0] = (s[0] == 'a' ? l1 : l2);
s[1] = (s[1] == 'a' ? l1 : l2);
json["scores"][it.key()] += s;
}
}
MSG_DEBUG("Created marker observation spec " << json.dump());
return make_observation_spec(json);
}
return make_observation_spec(get_format_json(format_name));
}
};
#endif //SPELL_QTL_MARKER_OBS_FORMATS_H_H
#include <input/marker_obs_formats.h>
#include "bayes.h"
#include "pedigree.h"
#include "bayes/cli.h"
......@@ -369,9 +370,11 @@ arguments = {
std::string filename = *++ai;
check_file(filename, false, false);
/*ensure(target)->marker_observation_specs_filename = *++ai;*/
ifile ifs(filename);
/*ifile ifs(filename);*/
/*ensure(target)->marker_observation_specs = read_format(filename, ifs);*/
read_format(ensure(target)->marker_observation_specs, filename, ifs);
/*read_format(ensure(target)->marker_observation_specs, filename, ifs);*/
std::ifstream ifs(filename);
marker_obs_formats::add_format(ifs);
ensure(target)->marker_observation_specs_filenames.push_back(filename);
SAFE_IGNORE_CALLBACK_ARGS;
}},
......@@ -495,6 +498,9 @@ bn_settings_t* bn_settings_t::from_args(int argc, const char** argv)
if (ret) {
ret->prg_name = basename(argv[0]);
ret->command_line.assign(argv, argv + argc);
for (const auto& kv: ret->observed_mark) {
ret->marker_observation_specs[kv.second.format_name] = marker_obs_formats::get_format(ret->pedigree, kv.second.format_name);
}
}
return ret;
......
......@@ -44,12 +44,14 @@ dispatch_geno_probs(
size_t ind, /* spawnling number */
std::map<size_t, std::vector<double>>& state_prob) /* all computed locus vectors to fetch the parents' states */
{
/*
scoped_indent _(MESSAGE("[dispatchGP] "));
MSG_DEBUG("ind " << ind);
MSG_DEBUG("lc " << lc);
MSG_DEBUG("labels " << labels);
MSG_DEBUG("geno_probs " << geno_probs);
MSG_DEBUG("state_prob " << state_prob);
*/
// MSG_QUEUE_FLUSH();
std::vector<double> ret(lc.size());
std::map<label_type, double> norms;
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment