Commit 1507a980 authored by Damien Leroux's avatar Damien Leroux
Browse files

Optimized segment_computer_t for reuse.

parent 9b024df3
......@@ -21,7 +21,8 @@ VERSION_DEFINES=$(shell git tag | tail -1 | sed 's/\([0-9]*\)[.]\([0-9]*\)\([.][
CXXARGS=-Wextra -Wall -pthread -fPIC -DEIGEN_NO_DEPRECATED_WARNING $(VERSION_DEFINES)
#CXXARGS=-Wextra -Wall -pthread -fPIC -DEIGEN_NO_DEPRECATED_WARNING $(VERSION_DEFINES)
DEBUG_OPTS=-ggdb -O0 -DNDEBUG
DEBUG_OPTS=-ggdb -O0
#DEBUG_OPTS=-ggdb -O0 -DNDEBUG
#DEBUG_OPTS=-ggdb -O2 -DNDEBUG
#OPT_OPTS=-O2 -DEIGEN_NO_DEBUG -DNDEBUG
#OPT_OPTS=-O3 -DEIGEN_NO_DEBUG -DNDEBUG
......
......@@ -79,7 +79,7 @@ bool ensure_directory_exists(const std::string& path)
}
#if 1
#if 0
static inline
bool ensure_directories_exist(const std::string& path)
{
......@@ -98,7 +98,7 @@ bool ensure_directories_exist(const std::string& path)
return ok;
}
#else
#elif 0
static inline
bool ensure_directories_exist(const std::string& path)
......@@ -117,6 +117,144 @@ bool ensure_directories_exist(const std::string& path)
/*MSG_DEBUG("ensure_directories_exist(" << path << ") bad.");*/
return false;
}
#else
//extern "C" {
/*
@(#)File: $RCSfile: mkpath.c,v $
@(#)Version: $Revision: 1.13 $
@(#)Last changed: $Date: 2012/07/15 00:40:37 $
@(#)Purpose: Create all directories in path
@(#)Author: J Leffler
@(#)Copyright: (C) JLSS 1990-91,1997-98,2001,2005,2008,2012
*/
/*TABSTOP=4*/
#include <errno.h>
#include <unistd.h>
#include <string.h>
typedef struct stat Stat;
#if 0 /* FIXME: move ID string to static_data */
#ifndef lint
/* Prevent over-aggressive optimizers from eliminating ID string */
const char jlss_id_mkpath_c[] = "@(#)$Id: mkpath.c,v 1.13 2012/07/15 00:40:37 jleffler Exp $";
#endif /* lint */
#endif
static inline int do_mkdir(const char *path, mode_t mode)
{
Stat st;
int status = 0;
if (stat(path, &st) != 0)
{
/* Directory does not exist. EEXIST for race condition */
if (mkdir(path, mode) != 0 && errno != EEXIST)
status = -1;
}
else if (!S_ISDIR(st.st_mode))
{
errno = ENOTDIR;
status = -1;
}
return(status);
}
/**
** mkpath - ensure all directories in path exist
** Algorithm takes the pessimistic view and works top-down to ensure
** each directory in path exists, rather than optimistically creating
** the last element and working backwards.
*/
static inline int mkpath(const char *path, mode_t mode)
{
char *pp;
char *sp;
int status;
char *copypath = strdup(path);
status = 0;
pp = copypath;
while (status == 0 && (sp = strchr(pp, '/')) != 0)
{
if (sp != pp)
{
/* Neither root nor double slash in path */
*sp = '\0';
status = do_mkdir(copypath, mode);
*sp = '/';
}
pp = sp + 1;
}
if (status == 0)
status = do_mkdir(path, mode);
free(copypath);
return (status);
}
#ifdef TEST
#include <stdio.h>
/*
** Stress test with parallel running of mkpath() function.
** Before the EEXIST test, code would fail.
** With the EEXIST test, code does not fail.
**
** Test shell script
** PREFIX=mkpath.$$
** NAME=./$PREFIX/sa/32/ad/13/23/13/12/13/sd/ds/ww/qq/ss/dd/zz/xx/dd/rr/ff/ff/ss/ss/ss/ss/ss/ss/ss/ss
** : ${MKPATH:=mkpath}
** ./$MKPATH $NAME &
** [...repeat a dozen times or so...]
** ./$MKPATH $NAME &
** wait
** rm -fr ./$PREFIX/
*/
int main(int argc, char **argv)
{
int i;
for (i = 1; i < argc; i++)
{
for (int j = 0; j < 20; j++)
{
if (fork() == 0)
{
int rc = mkpath(argv[i], 0777);
if (rc != 0)
fprintf(stderr, "%d: failed to create (%d: %s): %s\n",
(int)getpid(), errno, strerror(errno), argv[i]);
exit(rc == 0 ? EXIT_SUCCESS : EXIT_FAILURE);
}
}
int status;
int fail = 0;
while (wait(&status) != -1)
{
if (WEXITSTATUS(status) != 0)
fail = 1;
}
if (fail == 0)
printf("created: %s\n", argv[i]);
}
return(0);
}
//} /* extern "C" */
#endif /* TEST */
static inline
bool ensure_directories_exist(const std::string& path)
{
return mkpath(path.c_str(), 0700) == 0;
}
#endif
#endif
......
......@@ -27,6 +27,17 @@ struct geno_prob_computer {
: locus(0), LV(0), this_gen(0), TR(), init_kernel(), left(), right(), redux(), inv_stat_dist()
{}
geno_prob_computer(const geno_prob_computer& tmp)
: locus(tmp.locus)
, LV(tmp.LV)
, this_gen(tmp.this_gen)
, left(std::map<int, MatrixXd>(tmp.left))
, right(std::map<int, MatrixXd>(tmp.right))
, redux(tmp.redux)
, inv_stat_dist(tmp.inv_stat_dist)
, unique_labels(tmp.unique_labels)
{}
geno_prob_computer(geno_prob_computer&& tmp)
: locus(tmp.locus)
, LV(tmp.LV)
......@@ -35,6 +46,7 @@ struct geno_prob_computer {
, right(std::forward<std::map<int, MatrixXd>>(tmp.right))
, redux(tmp.redux)
, inv_stat_dist(tmp.inv_stat_dist)
, unique_labels(std::forward<std::vector<label_type>>(tmp.unique_labels))
{}
geno_prob_computer&
......@@ -48,6 +60,22 @@ struct geno_prob_computer {
right.swap(tmp.right);
redux = tmp.redux;
inv_stat_dist = tmp.inv_stat_dist;
unique_labels.swap(tmp.unique_labels);
return *this;
}
geno_prob_computer&
operator = (const geno_prob_computer& tmp)
{
locus = tmp.locus;
LV = tmp.LV;
this_gen = tmp.this_gen;
TR = tmp.TR;
left = tmp.left;
right = tmp.right;
redux = tmp.redux;
inv_stat_dist = tmp.inv_stat_dist;
unique_labels = tmp.unique_labels;
return *this;
}
......@@ -203,45 +231,23 @@ struct geno_prob_computer {
return vv / s;
}
labelled_matrix<MatrixXd, label_type, double>
fast_compute_over_segment(const std::vector<double>& steps)
MatrixXd
fast_compute_over_segment_raw(const std::vector<double>& steps)
{
if (!compute_partial_kernels()) {
labelled_matrix<MatrixXd, label_type, double>
ret(unique_labels, steps);
ret.data = MatrixXd::Zero(ret.row_labels.size(), ret.column_labels.size());
return ret;
}
#if 0
std::vector<typename GENERATION::col_label_type> labels;
size_t i = 0;
size_t j = 0;
while (i < redux.innerSize()) {
while (j < breakmat.column_labels.size() && redux(i, j) != 1) ++j;
labels.push_back(breakmat.column_labels[j]);
while (j < breakmat.column_labels.size() && redux(i, j) == 1) ++j;
++i;
}
labelled_matrix<MatrixXd, typename GENERATION::col_label_type, double>
ret(labels, steps);
#endif
/*MSG_DEBUG("LOCUS VECTORS" << std::endl << *LV);*/
labelled_matrix<MatrixXd, label_type, double>
ret(unique_labels, steps);
MatrixXd ret(unique_labels.size(), steps.size());
/*MSG_DEBUG(MATRIX_SIZE(ret));*/
size_t l = 0, last = steps.size() - 1, r = !!(LV->outerSize() - 1);
for (size_t i = 0; i < steps.size(); ++i) {
/* FIXME: optimize the CORRECT initialization of l & r */
std::tie(l, r) = find_lr(steps[i]);
ret.data.col(i) = fast_compute_at(steps[i], l, r);
for (int k = 0; k < ret.data.rows(); ++k) {
if (ret.data(k, i) < FLOAT_TOL && ret.data(k, i) > -FLOAT_TOL) {
ret.data(k, i) = 0;
ret.col(i) = fast_compute_at(steps[i], l, r);
for (int k = 0; k < ret.rows(); ++k) {
if (ret(k, i) < FLOAT_TOL && ret(k, i) > -FLOAT_TOL) {
ret(k, i) = 0;
}
}
if ((ret.data.col(i).array() < 0).any()) {
MSG_DEBUG("Bad value at column #" << i << std::endl << ret.data.col(i).transpose());
if ((ret.col(i).array() < 0).any()) {
MSG_DEBUG("Bad value at column #" << i << std::endl << ret.col(i).transpose());
MSG_DEBUG((*this_gen));
#if 0
MSG_DEBUG("steps: " << steps);
......@@ -274,43 +280,59 @@ struct geno_prob_computer {
/*MSG_DEBUG("STATE PROBABILITIES" << std::endl << ret);*/
return ret;
}
labelled_matrix<MatrixXd, label_type, double>
fast_compute_over_segment(const std::vector<double>& steps)
{
if (!compute_partial_kernels()) {
labelled_matrix<MatrixXd, label_type, double>
ret(unique_labels, steps);
ret.data = MatrixXd::Zero(ret.row_labels.size(), ret.column_labels.size());
return ret;
}
labelled_matrix<MatrixXd, label_type, double>
ret(unique_labels, steps);
ret.data = fast_compute_over_segment_raw(steps);
return ret;
}
};
struct segment_computer_t {
const geno_matrix* g_this;
const qtl_chromosome* chr;
qtl_chromosome chr;
geno_prob_computer gpc_vec;
std::vector<double> steps;
/*std::vector<double> steps;*/
std::vector<double> loci;
double noise;
segment_computer_t() : g_this(0), gpc_vec(), steps(), noise(0) {}
segment_computer_t() : g_this(0), chr(), gpc_vec(), /*steps(),*/ noise(0) {}
segment_computer_t(segment_computer_t&& sc)
: g_this(sc.g_this), chr(sc.chr), gpc_vec(std::move(sc.gpc_vec)), steps(std::move(sc.steps)), loci(std::move(sc.loci)), noise(sc.noise)
: g_this(sc.g_this), chr(sc.chr), gpc_vec(std::move(sc.gpc_vec)), /*steps(std::move(sc.steps)),*/ loci(std::move(sc.loci)), noise(sc.noise)
{}
segment_computer_t(const geno_matrix& g, const qtl_chromosome* c, double step, double nz)
: g_this(&g), chr(c), gpc_vec(), noise(nz)
segment_computer_t(const geno_matrix& g, const chromosome* pchr, const locus_key& lk, double /*step*/, double nz)
: g_this(&g), chr(pchr, lk), gpc_vec(), noise(nz)
{
chr->copy_loci(loci);
steps = compute_steps(loci, step);
chr.copy_loci(loci);
/*steps = compute_steps(loci, step);*/
init();
}
segment_computer_t(const geno_matrix& g, const qtl_chromosome* c, double nz, const std::vector<double>& loc_vec)
: g_this(&g), chr(c), gpc_vec(), noise(nz)
segment_computer_t(const geno_matrix& g, const chromosome* pchr, const locus_key& lk, double nz/*, const std::vector<double>& loc_vec*/)
: g_this(&g), chr(pchr, lk), gpc_vec(), noise(nz)
{
chr->copy_loci(loci);
steps = loc_vec;
chr.copy_loci(loci);
/*steps = loc_vec;*/
init();
}
bool
operator == (const segment_computer_t& sc) const { return g_this == sc.g_this && steps == sc.steps && noise == sc.noise; }
operator == (const segment_computer_t& sc) const { return g_this == sc.g_this /*&& steps == sc.steps*/ && noise == sc.noise; }
void
init()
......@@ -326,8 +348,8 @@ struct segment_computer_t {
/*MSG_DEBUG("OPERATOR = :" << __LINE__);*/
g_this = sc.g_this;
chr = sc.chr;
chr->copy_loci(loci);
steps.swap(sc.steps);
chr.copy_loci(loci);
/*steps.swap(sc.steps);*/
/*gpc_vec.swap(sc.gpc_vec);*/
gpc_vec = std::move(sc.gpc_vec);
noise = sc.noise;
......@@ -336,11 +358,11 @@ struct segment_computer_t {
}
labelled_matrix<MatrixXd, label_type, double>
compute(const MatrixXd& LV)
compute(const MatrixXd& LV, const std::vector<double>& steps)
{
/*DUMP_FILE_LINE();*/
auto unphased_LV = g_this->unphased_LV();
auto joint_qtl_iterator = chr->qtl_state_iterator(unphased_LV);
auto joint_qtl_iterator = chr.qtl_state_iterator(unphased_LV);
/*MatrixXd lv = g_this->noisy_LV(LV, noise);*/
MatrixXd qtl_lv(LV.innerSize(), loci.size());
/*MSG_DEBUG(MATRIX_SIZE(lv) << std::endl << lv.transpose());*/
......@@ -348,7 +370,7 @@ struct segment_computer_t {
joint_qtl_iterator.init_expansion(LV, qtl_lv);
/*MSG_DEBUG(MATRIX_SIZE(qtl_lv));*/
/*MSG_DEBUG(qtl_lv.transpose());*/
auto tmp = compute();
auto tmp = compute(steps);
int n_states = tmp.innerSize();
std::vector<label_type> row_labels;
row_labels.reserve(n_states * joint_qtl_iterator.size());
......@@ -367,7 +389,7 @@ struct segment_computer_t {
/*ret.emplace_back(compute());*/
/*MSG_DEBUG(MATRIX_SIZE(qtl_lv));*/
/*MSG_DEBUG(qtl_lv.transpose());*/
ret.data.middleRows(row_ofs, n_states) = compute().data;
ret.data.middleRows(row_ofs, n_states) = compute(steps).data;
} else {
ret.data.middleRows(row_ofs, n_states) = MatrixXd::Zero(n_states, ret.data.outerSize());
}
......@@ -377,7 +399,7 @@ struct segment_computer_t {
}
labelled_matrix<MatrixXd, label_type, double>
compute()
compute(const std::vector<double>& steps)
{
return gpc_vec.fast_compute_over_segment(steps);
}
......@@ -388,12 +410,175 @@ struct segment_computer_t {
operator << (std::ostream& os, const segment_computer_t& sc)
{
return os << '<' << sc.g_this->name
<< " n_steps=" << sc.steps.size()
/*<< " n_steps=" << sc.steps.size()*/
<< " chr=" << sc.chr.chr->name
<< " noise=" << sc.noise << '>';
}
};
struct optim_segment_computer_t {
const geno_matrix* g_this;
qtl_chromosome chr;
std::vector<geno_prob_computer> gpc_vec;
std::vector<bool> gpc_not_zero;
std::vector<double> loci;
double noise;
std::vector<label_type> row_labels;
std::vector<MatrixXd> lv_vec;
size_t n_states;
optim_segment_computer_t()
: g_this(0), chr(), gpc_vec(), gpc_not_zero(), loci(), noise(), row_labels(), lv_vec(), n_states(0) {}
optim_segment_computer_t(optim_segment_computer_t&& other)
: g_this(other.g_this)
, chr(other.chr)
, gpc_vec(std::move(other.gpc_vec))
, gpc_not_zero(std::move(other.gpc_not_zero))
, loci(std::move(other.loci))
, noise(other.noise)
, row_labels(std::move(other.row_labels))
, lv_vec(std::move(other.lv_vec))
, n_states(other.n_states)
{
for (size_t i = 0; i < gpc_vec.size(); ++i) {
gpc_vec[i].locus = &loci;
gpc_vec[i].LV = &lv_vec[i];
}
}
optim_segment_computer_t(const optim_segment_computer_t& other)
: g_this(other.g_this)
, chr(other.chr)
, gpc_vec(other.gpc_vec)
, gpc_not_zero(other.gpc_not_zero)
, loci(other.loci)
, noise(other.noise)
, row_labels(other.row_labels)
, lv_vec(other.lv_vec)
, n_states(other.n_states)
{
for (size_t i = 0; i < gpc_vec.size(); ++i) {
gpc_vec[i].locus = &loci;
gpc_vec[i].LV = &lv_vec[i];
}
}
optim_segment_computer_t&
operator = (const optim_segment_computer_t& other)
{
g_this = other.g_this;
chr = other.chr;
gpc_vec = other.gpc_vec;
gpc_not_zero = other.gpc_not_zero;
loci = other.loci;
noise = other.noise;
row_labels = other.row_labels;
lv_vec = other.lv_vec;
n_states = other.n_states;
for (size_t i = 0; i < gpc_vec.size(); ++i) {
gpc_vec[i].locus = &loci;
gpc_vec[i].LV = &lv_vec[i];
}
return *this;
}
optim_segment_computer_t(const geno_matrix& g, const chromosome* pchr, const locus_key& lk, double nz, const MatrixXd& LV)
: g_this(&g), chr(pchr, lk), gpc_vec(), gpc_not_zero(), loci(), noise(nz), row_labels(), lv_vec(), n_states(0)
{
auto unphased_LV = g_this->unphased_LV();
chr.copy_loci(loci);
MatrixXd qtl_lv(LV.innerSize(), loci.size());
auto joint_qtl_iterator = chr.qtl_state_iterator(unphased_LV);
bool lv_ok = joint_qtl_iterator.init_expansion(LV, qtl_lv);
gpc_vec.reserve(joint_qtl_iterator.size());
lv_vec.reserve(joint_qtl_iterator.size());
{
geno_prob_computer tmp;
tmp.locus = &loci;
tmp.init(g_this);
n_states = tmp.unique_labels.size();
row_labels.reserve(n_states * joint_qtl_iterator.size());
for (int i = 0; i < joint_qtl_iterator.size(); ++i) {
row_labels.insert(row_labels.end(), tmp.unique_labels.begin(), tmp.unique_labels.end());
}
}
auto
add_gpc = [&] (bool lv_ok)
{
if (lv_ok) {
lv_vec.push_back(qtl_lv);
gpc_vec.emplace_back();
gpc_vec.back().locus = &loci;
gpc_vec.back().LV = &lv_vec.back();
gpc_vec.back().init(g_this);
if (gpc_vec.back().compute_partial_kernels()) {
gpc_not_zero.push_back(true);
MSG_DEBUG("GPC::LV" << std::endl << lv_vec.back());
} else {
gpc_not_zero.push_back(false);
gpc_vec.pop_back();
gpc_vec.emplace_back();
lv_vec.pop_back();
MSG_DEBUG("GPC::LV none. Update was OK but partial kernel computation failed.");
}
} else {
gpc_not_zero.push_back(false);
gpc_vec.emplace_back();
lv_vec.emplace_back();
MSG_DEBUG("GPC::LV none. Update failed.");
}
};
add_gpc(lv_ok);
for (; !joint_qtl_iterator.next(); ) {
add_gpc(joint_qtl_iterator.update(LV, qtl_lv));
}
MSG_DEBUG("NEW GPC row_labels " << row_labels);
#if 0
for (; !joint_qtl_iterator.next();) {
lv_vec.push_back(qtl_lv);
gpc_vec.emplace_back();
gpc_vec.back().locus = &loci;
gpc_vec.back().LV = &lv_vec.back();
gpc_vec.back().init(g_this);
if (joint_qtl_iterator.update(LV, lv_vec.back()) && (lv_vec.push_back(qtl_lv), gpc_vec.back().compute_partial_kernels())) {
gpc_not_zero.push_back(true);
} else {
gpc_not_zero.push_back(false);
gpc_vec.pop_back();
gpc_vec.emplace_back();
lv_vec.pop_back();
lv_vec.emplace_back();
}
MSG_DEBUG("GPC::LV" << std::endl << lv_vec.back());
}
#endif
}
labelled_matrix<MatrixXd, label_type, double>
compute(const std::vector<double>& steps)
{
labelled_matrix<MatrixXd, label_type, double> ret(row_labels, steps);
MSG_DEBUG(MATRIX_SIZE(ret.data));
auto nzi = gpc_not_zero.begin(), nzj = gpc_not_zero.end();
auto gpci = gpc_vec.begin();
int row_ofs = 0;
for (; nzi != nzj; ++nzi, ++gpci, row_ofs += n_states) {
if (*nzi) {
ret.data.middleRows(row_ofs, n_states) = gpci->fast_compute_over_segment_raw(steps);
} else {
ret.data.middleRows(row_ofs, n_states) = MatrixXd::Zero(n_states, steps.size());
}
}
return ret;
}
};
#endif
......@@ -322,6 +322,8 @@ struct qtl_chromosome {
const chromosome* chr;
std::vector<double> qtl;
qtl_chromosome() : chr(NULL), qtl() {}
qtl_chromosome(const chromosome* c)
: chr(c), qtl()
{}
......
......@@ -8,6 +8,8 @@
#include <fstream>
struct file {
static size_t open_count;
std::fstream m_impl;
std::string m_path;
......@@ -21,10 +23,13 @@ struct file {
: m_impl(path, mode), m_path(path)
{
if (!m_impl) {
throw file::error(MESSAGE("Couldn't open file " << path));
throw file::error(MESSAGE("Couldn't open file " << path << " (" << open_count << " already open files)"));
}
++open_count;
}
~file() { if (m_impl) { m_impl.close(); --open_count; } }