Commit 7b81f94f authored by Damien Leroux's avatar Damien Leroux
Browse files

Working prototype to use matrix exponentials.

parent 6ce50d33
#ifndef _SPEL_LUMPERJACK_H_
#define _SPEL_LUMPERJACK_H_
/*#include <boost/dynamic_bitset.hpp>*/
/*#include <Eigen/Jacobi>*/
/*#include <Eigen/IterativeLinearSolvers>*/
#include <deque>
#include <set>
#include <unordered_map>
#include <map>
#include "algebraic_genotype.h"
#include <iostream>
#include "chrono.h"
/*typedef boost::dynamic_bitset<> bitset;*/
/*typedef std::set<bitset> Partition;*/
/*typedef std::set<int> subset;*/
typedef std::vector<int> subset;
#define _EPSILON (1.e-10)
namespace std {
template <>
struct less<double> {
bool operator () (double a, double b) const
{
return a < b && abs(a - b) > _EPSILON;
}
};
}
#if 0
template <typename K, typename V, typename H=std::hash<K>, typename A=std::allocator<std::pair<const K, V>>>
using lumper_map = std::unordered_map<K, V, H, std::equal_to<K>, A>;
#else
template <typename K, typename V, typename C=std::less<K>, typename A=std::allocator<std::pair<const K, V>>>
using lumper_map = std::map<K, V, C, A>;
#endif
inline std::ostream& operator << (std::ostream& os, const subset& s)
{
os << '{';
auto cur = s.begin(), end = s.end();
if (cur != end) {
os << (*cur);
for (++cur; cur != end; ++cur) {
os << ' ' << (*cur);
}
}
return os << '}';
}
inline std::ostream& operator << (std::ostream& os, const std::set<subset>& S)
{
for (auto s: S) {
os << s << std::endl;
}
return os;
}
template <typename K>
std::ostream& operator << (std::ostream& os, const lumper_map<K, subset>& km)
{
std::string sep = "";
os << '{';
for (const auto& kv: km) {
os << sep << kv.first << " => " << kv.second;
if (sep.size() == 0) { sep = ", "; }
}
return os << '}';
}
template <typename scalar_type>
struct exact_compare {
typedef scalar_type key_type;
bool operator () (const scalar_type& s1, const scalar_type& s2) const
{
return s1 < s2;
}
template <typename V, typename COMP, typename A>
static V& get(lumper_map<key_type, V, COMP, A>& m, const scalar_type& k)
{
return m[k];
}
template <typename V, typename COMP, typename A>
static lumper_map<key_type, V, COMP, A>
postprocess(const lumper_map<key_type, V, COMP, A>& m) { return m; }
};
#define LUMP_BY_COLS
/*#define OLD_IMPL_WITH_ALGEBRAIC_GENOTYPES*/
/*struct fp_comp {*/
/*bool operator () (fast_polynom p1, fast_polynom p2) const { return p1.value < p2.value; }*/
/*};*/
template <typename MATRIX_TYPE, typename LABEL_TYPE>
struct lumper {
typedef LABEL_TYPE label_type;
typedef typename MATRIX_TYPE::Scalar scalar_type;
typedef lumper_map<scalar_type, subset> key_map;
const MATRIX_TYPE& G;
const std::vector<label_type>& labels;
bool exact;
chrono pol, cs, ck, tr, tm;
lumper(const MATRIX_TYPE& x, const std::vector<label_type>& labs, bool e=false)
: G(x), labels(labs), exact(e)
, pol(), cs(), ck(), tr(), tm()
{}
scalar_type compute_sum(size_t s, const subset& B)
{
cs.start();
scalar_type accum = scalar_type();
/*fast_polynom accum = fast_polynom::zero;*/
/*std::cout << "accum = " << accum << std::endl;*/
/*for (size_t j = B.find_first(); j != subset::npos; j = B.find_next(j)) {*/
for (int j: B) {
/*std::cout << "accum + " << G.data(i, j);*/
#ifdef LUMP_BY_COLS
accum += G(j, s);
#else
accum += G(s, j);
#endif
/*std::cout << " = " << accum << std::endl;*/
}
cs.stop();
return accum;
}
key_map compute_keys(const subset& C, const subset& B)
{
ck.start();
/*lumper_map<scalar_type, subset> ret;*/
key_map ret;
/*for (size_t i = C.find_first(); i != subset::npos; i = C.find_next(i)) {*/
for (int i: C) {
scalar_type accum = compute_sum(i, B);
/*if (ret[accum].size() == 0) {*/
/*ret[accum].resize(G.column_labels.size());*/
/*}*/
/*ret[accum].set(i);*/
ret[accum].push_back(i);
/*std::cout << "added " << accum << ", " << i << " to ";*/
/*auto kv = ret.find(accum);*/
/*if (kv == ret.end()) {*/
/*std::cout << "BUG!" << std::endl;*/
/*} else {*/
/*std::cout << kv->first << ", " << kv->second << std::endl;*/
/*}*/
}
#if 0
std::cout << std::endl << "Subsets " << C << " vs " << B << std::endl;
for (auto kv: ret) {
std::cout << kv.first << std::endl << " " << kv.second << std::endl;
}
#endif
ck.stop();
return ret;
}
std::vector<subset> partition_on_labels()
{
pol.start();
std::vector<subset> ret;
lumper_map<label_type, subset> tmp;
for (size_t i = 0; i < labels.size(); ++i) {
auto& l = labels[i];
/*if (tmp[l].size() == 0) {*/
/*tmp[l].resize(G.column_labels.size());*/
/*}*/
/*tmp[l].set(i);*/
tmp[l]./*insert*/push_back(i);
}
ret.reserve(tmp.size());
for (auto& kv: tmp) {
/*std::cout << kv.first << "\t" << kv.second << std::endl;*/
ret.push_back(kv.second);
}
pol.stop();
return ret;
}
std::set<subset> try_refine(std::set<subset>& P)
{
tr.start();
/*std::vector<const subset*> stack;*/
std::deque<const subset*> stack;
for (auto& x: P) {
stack.push_back(&x);
}
while (stack.size()) {
const subset& C = *stack.back();
/*const subset& C = *stack.front();*/
/*std::cout << "** " << C << std::endl;*/
/*std::cout << '.' << std::flush;*/
/*MSG_DEBUG("TACKLING " << C);*/
stack.pop_back();
/*stack.pop_front();*/
if (C.size() <= 1) {
continue;
}
for (auto& B: P) {
/*if (B.size() <= 1) {*/
/*continue;*/
/*}*/
key_map subsets = compute_keys(C, B);
/*MSG_DEBUG("KEYS: " << subsets);*/
/*size_t size = 0;*/
/*while (size != subsets.size()) {*/
/*size = subsets.size();*/
/*}*/
/* MARK */
if (subsets.size() > 1) {
MSG_DEBUG("KEYS " << labels[subsets.begin()->second.front()] << ": " << subsets);
P.erase(C);
for (auto& s: subsets) {
auto io = P.insert(s.second);
if (!io.second) {
/*MSG_DEBUG("PROUT SUBSET ALREADY IN P " << (*io.first));*/
}
if (io.first->size() > 1) {
stack.push_back(&*io.first);
}
}
break;
}
}
}
tr.stop();
/*std::cout << std::endl;*/
/*std::cout << P.size() << std::endl;*/
/*std::cout << P << std::endl;*/
/*MSG_DEBUG("P.size() = " << P.size());*/
/*MSG_DEBUG("P = " << P);*/
/*MSG_DEBUG("Lumping time: ck=" << ck.accum << " cs=" << cs.accum << " pol=" << pol.accum << " tr=" << tr.accum);*/
return P;
}
std::set<subset> try_harder(std::set<subset>& P)
{
tr.start();
/*std::vector<const subset*> stack;*/
std::deque<const subset*> stack;
for (auto& x: P) {
stack.push_back(&x);
}
while (stack.size()) {
const subset& C = *stack.back();
/*const subset& C = *stack.front();*/
/*std::cout << "** " << C << std::endl;*/
/*std::cout << '.' << std::flush;*/
/*MSG_DEBUG("TACKLING " << C);*/
stack.pop_back();
/*stack.pop_front();*/
if (C.size() <= 1) {
continue;
}
for (auto& B: P) {
/*if (labels[B.front()] != labels[C.front()]) {*/
/*MSG_DEBUG("SKIP TEST BETWEEN " << C << " AND " << B);*/
/*continue;*/
/*}*/
/*if (B.size() <= 1) {*/
/*continue;*/
/*}*/
key_map subsets = compute_keys(C, B);
/*MSG_DEBUG("KEYS: " << subsets);*/
/*size_t size = 0;*/
/*while (size != subsets.size()) {*/
/*size = subsets.size();*/
/*}*/
/* MARK */
if (subsets.size() > 1) {
MSG_DEBUG("KEYS " << labels[subsets.begin()->second.front()] << ": " << subsets);
P.erase(C);
for (auto& s: subsets) {
auto io = P.insert(s.second);
if (!io.second) {
/*MSG_DEBUG("PROUT SUBSET ALREADY IN P " << (*io.first));*/
}
if (io.first->size() > 1) {
stack.push_back(&*io.first);
}
}
break;
}
}
}
tr.stop();
/*std::cout << std::endl;*/
/*std::cout << P.size() << std::endl;*/
/*std::cout << P << std::endl;*/
/*MSG_DEBUG("P.size() = " << P.size());*/
/*MSG_DEBUG("P = " << P);*/
/*MSG_DEBUG("Lumping time: ck=" << ck.accum << " cs=" << cs.accum << " pol=" << pol.accum << " tr=" << tr.accum);*/
return P;
}
std::set<subset> refine_all()
{
std::vector<subset> tmp = partition_on_labels();
std::set<subset> P(tmp.begin(), tmp.end());
return try_refine(P);
}
MATRIX_TYPE to_matrix(const std::set<subset>& P, std::vector<label_type>& reduced_labels)
{
tm.start();
reduced_labels.clear();
reduced_labels.reserve(P.size());
for (auto& S: P) {
reduced_labels.push_back(labels[*S.begin()]);
}
MATRIX_TYPE ret(reduced_labels.size(), reduced_labels.size());
size_t i = 0;
size_t j;
for (auto& C: P) {
j = 0;
for (auto& B: P) {
/*algebraic_genotype accum = compute_sum(*C.begin(), B);*/
/*accum.type = algebraic_genotype::Type::Genotype;*/
/*accum.genotype = {reduced_labels[B.front()], reduced_labels[C.front()]};*/
scalar_type accum = compute_sum(*C.begin(), B);
#ifdef LUMP_BY_COLS
ret(j, i) = accum;
#else
ret(i, j) = accum;
#endif
++j;
}
++i;
}
tm.stop();
/*MSG_DEBUG("Created lumped matrix in " << tm.accum << " seconds");*/
return ret;
}
};
#endif
......@@ -25,7 +25,7 @@ COMPUTATIONS_SRC=basic_data.cc probabilities.cc model.cc frontends.cc
SRC=$(addprefix input/,$(INPUT_SRC)) $(addprefix computations/,$(COMPUTATIONS_SRC)) $(MAIN_SRC)
OBJ=$(subst .cc,.o,$(SRC))
DEP=$(subst .cc,.d,$(SRC)) test_script.d bayes.d
DEP=$(subst .cc,.d,$(SRC)) test_script.d matrixexp.d
COV_OBJ=$(subst .cc,.cov.o,$(SRC))
ALL_BUT_MAIN_OBJ=$(subst main.o ,,$(OBJ))
......@@ -163,3 +163,6 @@ test_bayes_dai: ../sandbox/bayes.cc bayes.d $(ALL_BUT_MAIN_OBJ)
test_sib: pedigree_analysis.cc static_data.o
$C $< static_data.o -o $@
matrixexp: matrixexp.cc ../include/lumping2.h
$C $< -o $@
......@@ -25,9 +25,9 @@ OBJ=$(subst .cc,.o,$(SRC))
DEP=$(subst .cc,.d,$(SRC))
COV_OBJ=$(subst .cc,.cov.o,$(SRC))
#DEBUG_OPTS=-ggdb
DEBUG_OPTS=-ggdb
#DEBUG_OPTS=-ggdb -DNDEBUG
OPT_OPTS=-O3 -DEIGEN_NO_DEBUG -DNDEBUG
#OPT_OPTS=-O3 -DEIGEN_NO_DEBUG -DNDEBUG
#PROF_OPTS=-O2 -DEIGEN_NO_DEBUG -DNDEBUG -g -pg
#DEBUG_OPTS=-ggdb
......
This diff is collapsed.
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment