Commit 4f0af463 authored by Damien Leroux's avatar Damien Leroux
Browse files

Need to re-implement the creation of the LV.

Bayesian computation seems to work but we still need the output in the
form of locus vectors.
parent 8e784d99
......@@ -42,7 +42,7 @@ size_t size(const std::vector<size_t>& dim) {
#include "pedigree.h"
/*#include "bayes/bn.h"*/
/*#include "bayes/factor_var4.h"*/
#include "graphnode.h"
#include "graphnode2.h"
#include "bayes/cli.h"
#include "bayes/dispatch.h"
......
This diff is collapsed.
......@@ -557,11 +557,14 @@ struct graph_base_type {
virtual ~graph_base_type() {}
graph_base_type()
: m_nodes(), variable_to_node() { create_node(Deleted); }
node_index_type
create_node(node_type t, variable_index_type v=-1)
{
node_index_type ret = m_nodes.size();
MSG_DEBUG("creating node " << ret);
/*MSG_DEBUG("creating node " << ret);*/
m_nodes.emplace_back();
m_nodes.back().colour = create_colour();
m_nodes.back().type = t;
......@@ -591,7 +594,7 @@ struct graph_base_type {
node_index_type
add_interface(variable_index_type v)
{
scoped_indent _(MESSAGE("[+I " << v << "] "));
/*scoped_indent _(MESSAGE("[+I " << v << "] "));*/
node_index_type ret = create_node(Interface, v);
m_nodes[ret].all_variables.push_back(v);
/*MSG_DEBUG("new node " << m_nodes[ret]);*/
......@@ -611,7 +614,7 @@ struct graph_base_type {
node_index_type
add_factor(const var_vec& rule, variable_index_type v)
{
scoped_indent _(MESSAGE("[+F " << rule << ' ' << v << "] "));
/*scoped_indent _(MESSAGE("[+F " << rule << ' ' << v << "] "));*/
node_index_type ret = create_node(Factor, v);
auto sorted_rule = rule;
std::sort(sorted_rule.begin(), sorted_rule.end());
......@@ -632,7 +635,7 @@ struct graph_base_type {
void
move_node_to_aggregate(node_index_type inner, node_index_type aggr)
{
MSG_DEBUG("move node to aggregate " << aggr << ": " << inner);
/*MSG_DEBUG("move node to aggregate " << aggr << ": " << inner);*/
if (!(m_nodes[inner].type & Aggregate)) {
m_nodes[aggr].type |= m_nodes[inner].type & ~InAggregate;
m_nodes[aggr].inner_nodes.push_back(inner);
......@@ -660,17 +663,17 @@ struct graph_base_type {
node_index_type
aggregate(const node_vec& nodes)
{
scoped_indent _(MESSAGE("[+A " << nodes << "] "));
/*scoped_indent _(MESSAGE("[+A " << nodes << "] "));*/
node_index_type ret = create_node(Aggregate);
m_nodes[ret].inner_nodes.clear();
MSG_DEBUG("Creating aggregate " << ret);
/*MSG_DEBUG("Creating aggregate " << ret);*/
foreach_in(nodes, [&,this] (node_index_type n) { move_node_to_aggregate(n, ret); });
MSG_DEBUG("updating ranks...");
/*MSG_DEBUG("updating ranks...");*/
MSG_QUEUE_FLUSH();
update_ranks(ret);
std::sort(m_nodes[ret].inner_nodes.begin(), m_nodes[ret].inner_nodes.end());
remove_edge(ret, ret);
MSG_DEBUG("new node " << m_nodes[ret]);
/*MSG_DEBUG("new node " << m_nodes[ret]);*/
for (variable_index_type v: variables_of(ret)) {
variable_to_node[v] = ret;
}
......@@ -680,7 +683,7 @@ struct graph_base_type {
void
rec_update_rank(node_index_type n, const std::vector<bool>& modified, std::vector<bool>& updated)
{
MSG_DEBUG("rec_update_rank on " << n);
/*MSG_DEBUG("rec_update_rank on " << n);*/
MSG_QUEUE_FLUSH();
size_t r = 0;
for (node_index_type i: nei_in(n)) {
......@@ -727,7 +730,7 @@ struct graph_base_type {
bool
add_edge(node_index_type from, node_index_type to, path_type& path)
{
scoped_indent _(MESSAGE("[add edge " << from << "->" << to << "] "));
/*scoped_indent _(MESSAGE("[add edge " << from << "->" << to << "] "));*/
bool have_cycle = colour_equal(m_nodes[from].colour, m_nodes[to].colour);
if (have_cycle) {
path = find_path(from, to);
......@@ -740,7 +743,7 @@ struct graph_base_type {
if (rank(to) <= rank(from)) {
m_nodes[to].rank = rank(from) + 1;
}
MSG_DEBUG("cycle? " << have_cycle);
/*MSG_DEBUG("cycle? " << have_cycle);*/
return have_cycle;
}
......@@ -825,22 +828,28 @@ struct graph_base_type {
var_vec variables_of(node_vec nn) const { var_vec ret; for (node_index_type n: nn) { ret += variables_of(n); } return ret; }
size_t rank(node_index_type n) const { return m_nodes[n].rank; }
bool
node_is_deleted(node_index_type n) const
{
return m_nodes[n].type & Deleted;
}
bool
node_is_subgraph(node_index_type n) const
{
return (m_nodes[n].type & Aggregate) && (m_nodes[n].type & Factor);
return !node_is_deleted(n) && (m_nodes[n].type & Aggregate) && (m_nodes[n].type & Factor);
}
bool
node_is_interface(node_index_type n) const
{
return (m_nodes[n].type & Interface) && !(m_nodes[n].type & Factor);
return !node_is_deleted(n) && (m_nodes[n].type & Interface) && !(m_nodes[n].type & Factor);
}
bool
node_is_factor(node_index_type n) const
{
return (m_nodes[n].type & Factor) && !(m_nodes[n].type & Aggregate);
return !node_is_deleted(n) && (m_nodes[n].type & Factor) && !(m_nodes[n].type & Aggregate);
}
node_index_type
......
......@@ -556,7 +556,7 @@ main(int argc, char** argv)
std::unique_ptr<factor_graph_type> g;
if (argc == 1) {
g = factor_graph_type::from_pedigree(read_csv("random.ped", ';'), 2, {}, {}, "boiboite");
g = factor_graph_type::from_pedigree(read_csv("random.ped", ';'), 2, {}, {}/*, "boiboite"*/);
g->to_image("boiboite", "png");
/*g.dump_active();*/
......@@ -604,7 +604,7 @@ main(int argc, char** argv)
/*g.to_image(MESSAGE(prefix << "-pre-optim"), "png");*/
/*g.optimize();*/
/*g.to_image(prefix, "png");*/
g = factor_graph_type::from_pedigree(filter_pedigree(read_csv(pedfile, ';'), in, out), n_al, in, out, MESSAGE("debug-" << prefix));
g = factor_graph_type::from_pedigree(filter_pedigree(read_csv(pedfile, ';'), in, out), n_al, in, out/*, MESSAGE("debug-" << prefix)*/);
/*g->build_subgraphs();*/
g->dump();
/*g->to_image(MESSAGE(prefix << "-filtered-pre-optim"), "png");*/
......@@ -612,14 +612,14 @@ main(int argc, char** argv)
/*g->optimize();*/
/*MSG_DEBUG("Creating png...");*/
g->to_image(MESSAGE(prefix << "-filtered"), "png");
/*MSG_DEBUG("Creating instance...");*/
/*auto I = g->instance();*/
/*ofile of(MESSAGE(prefix << "-instance.data"));*/
/*I->file_io(of);*/
/*rw_comb<int, bn_label_type>()(of, g->domains);*/
#if 0
MSG_DEBUG("Creating instance...");
auto I = instance(g);
ofile of(MESSAGE(prefix << "-instance.data"));
I->file_io(of);
rw_comb<int, bn_label_type>()(of, g->domains);
#if 1
} else {
std::unique_ptr<graph_type::instance_type> I(new graph_type::instance_type());
std::unique_ptr<instance_type> I(new instance_type());
std::map<var_vec, genotype_comb_type> domains;
ifile ifs(pedfile);
I->file_io(ifs);
......
......@@ -2,7 +2,7 @@
#include "output.h"
#include "pedigree.h"
/*#include "factor_var4.h"*/
#include "graphnode.h"
#include "graphnode2.h"
#include "tensor.h"
bool job_check_fourcc(ifile& ifs, const char* fourcc)
......@@ -164,13 +164,13 @@ job_registry = {
/*MSG_DEBUG("Pedigree" << std::endl << settings->pedigree);*/
/*MSG_DEBUG("COMPUTING FACTOR GRAPH FOR " << unique_n_alleles[n] << " ALLELES.");*/
/*factor_graph fg(settings->pedigree, unique_n_alleles[n], settings->noise);*/
auto fg = graph_type::from_pedigree(settings->pedigree.items, unique_n_alleles[n], settings->input_generations, settings->output_generations);
auto fg = factor_graph_type::from_pedigree(settings->pedigree.items, unique_n_alleles[n], settings->input_generations, settings->output_generations);
/*MSG_DEBUG("COMPUTED FACTOR GRAPH" << std::endl << fg);*/
/*fg.finalize();*/
/*fg.save(settings->job_filename("factor-graph", unique_n_alleles[n]));*/
fg->optimize();
fg->dump_active();
auto I = fg->instance();
/*fg->dump();*/
fg->to_image(MESSAGE("factor-graph-" << unique_n_alleles[n]), "png");
auto I = instance(fg);
ofile of(settings->job_filename("factor-graph", unique_n_alleles[n]));
I->file_io(of);
rw_comb<int, bn_label_type>()(of, fg->domains);
......@@ -196,7 +196,7 @@ job_registry = {
/*graph_type fg;*/
/*fg.load(settings->job_filename("factor-graph", used_alleles.size()));*/
ifile ifs(settings->job_filename("factor-graph", used_alleles.size()));
std::unique_ptr<graph_type::instance_type> I(new graph_type::instance_type());
std::unique_ptr<instance_type> I(new instance_type());
std::map<var_vec, genotype_comb_type> domains;
I->file_io(ifs);
rw_comb<int, bn_label_type>()(ifs, domains);
......@@ -220,6 +220,7 @@ job_registry = {
MSG_DEBUG("i " << i);
MSG_DEBUG("obs " << obsdat);
MSG_DEBUG("obs_vec " << obs_vec);
MSG_DEBUG("ind_dom = " << get_domain(domains, n));
MSG_DEBUG("ind_obs = " << ind_obs);
/*MSG_DEBUG("obs spec");*/
/*for (const auto& s: obs_spec.scores) {*/
......@@ -293,6 +294,12 @@ job_registry = {
/*fg.compute_messages(evidence.begin(), evidence.end(), messages);*/
/*fg.compute_full_factor_state(messages, marginals);*/
message_type output = I->compute(0, domains);
MSG_DEBUG("OUTPUT:");
for (const auto& t: output) {
MSG_DEBUG(" * " << t << std::endl);
}
MSG_QUEUE_FLUSH();
#if 0
MSG_DEBUG("MARGINALS:");
for (const auto& t: output) {
var_vec varz = get_parents(t);
......@@ -359,17 +366,17 @@ job_registry = {
for (const auto& v_prob: marginals) {
int v = v_prob.first;
auto& pbi = prob_by_ind[v];
MSG_DEBUG("### v=" << v << " pbi " << pbi);
MSG_QUEUE_FLUSH();
/*MSG_DEBUG("### v=" << v << " pbi " << pbi);*/
/*MSG_QUEUE_FLUSH();*/
for (const auto& e: v_prob.second) {
MSG_DEBUG("[element] " << e);
MSG_QUEUE_FLUSH();
/*MSG_DEBUG("[element] " << e);*/
/*MSG_QUEUE_FLUSH();*/
const genotype_comb_type::key_type& key = e.keys.keys[0];
MSG_DEBUG("[geno prob for #" << v << "] " << key << " += " << e.coef);
MSG_QUEUE_FLUSH();
/*MSG_DEBUG("[geno prob for #" << v << "] " << key << " += " << e.coef);*/
/*MSG_QUEUE_FLUSH();*/
pbi[{key.state.first, key.state.second}] += e.coef;
MSG_DEBUG("--");
MSG_QUEUE_FLUSH();
/*MSG_DEBUG("--");*/
/*MSG_QUEUE_FLUSH();*/
}
}
#if 0
......@@ -435,6 +442,9 @@ job_registry = {
rw_base() (ofs, state_prob);
return true;
#else
return false;
#endif
}
}},
{"collect-LV", {
......
data type BC2
4 7
*M1 2122
*M2 1101
*M3 1001
*M4 2112
*M5 1112
*M6 1010
*M7 0000
data type BC2
4 1
*M1 ----
data type BC2
4 7
*M1 AHAA
*M2 HHAH
*M3 HAAH
*M4 AHHA
*M5 HHHA
*M6 H---
*M7 AAAA
g;i;1;2;L
L;1;0;0;A
L;2;0;0;B
F1;11;1;2;F1
F2;21;11;11;F2
F3;31;21;21;F3_1
F3;32;21;21;F3_2
F4;41;31;31;F4_1
F4;42;31;31;F4_2
F4;43;31;31;F4_3
F4;44;31;31;F4_4
F4;45;32;32;F4_5
F4;46;32;32;F4_6
F4;47;32;32;F4_7
F4;48;32;32;F4_8
data type F4
8 5 0 0
*M1 120120--
*M2 --1-2-2-
*M3 2-1-2-12
*M4 12121212
*M5 --------
data type L
2 5 0 0
*M1 02
*M2 20
*M3 02
*M4 2-
*M5 20
<format-specification>
<format name="02">
<domain type="allele" alphabet-from="02"/>
<scores>
<score symbol="0">
<from mother="0" father="0"/>
</score>
<score symbol="2">
<from mother="2" father="2"/>
</score>
<score symbol="1">
<from mother="2" father="0"/>
<from mother="0" father="2"/>
</score>
<score symbol="-">
<from mother="0" father="0"/>
<from mother="2" father="2"/>
<from mother="2" father="0"/>
<from mother="0" father="2"/>
</score>
</scores>
</format>
</format-specification>
<format-specification>
<format name="ABHCD">
<domain type="ancestor" alphabet-from="ab"/>
<scores>
<score symbol="A">
<from mother="a" father="a"/>
</score>
<score symbol="H">
<from mother="a" father="b"/>
<from mother="b" father="a"/>
</score>
<score symbol="B">
<from mother="b" father="b"/>
</score>
<score symbol="C">
<from mother="a" father="b"/>
<from mother="b" father="a"/>
<from mother="b" father="b"/>
</score>
<score symbol="D">
<from mother="a" father="b"/>
<from mother="b" father="a"/>
<from mother="a" father="a"/>
</score>
<score symbol="-">
<from mother="a" father="a"/>
<from mother="a" father="b"/>
<from mother="b" father="a"/>
<from mother="b" father="b"/>
</score>
</scores>
</format>
</format-specification>
data type f2
2 7
*M1 20
*M2 0-
*M3 02
*M4 -0
*M5 20
*M6 02
*M7 00
data type F4
2 1 0 0
*M1 02
gen;id;p1;p2
L;1;0;0
L;2;0;0
F1;3;1;2
F1;4;1;2
F2;5;3;4
F2;6;3;4
F3;7;5;6
F3;8;5;6
F4;9;7;8
(cd ~/devel/spel/src/bayes/ && make -j) && gdb --args ../spell-marker -D -n toto -p pedigree_BC2.txt.ped-data -mos format-02.xml -m BC2:02 bc2.gen -o BC2
data type f2
11 7
*M1 02002202020
*M2 22020200220
*M3 20020200020
*M4 02202022000
*M5 22202000220
*M6 20202020202
*M7 00000000000
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment