Commit aa3dd51d authored by Damien Leroux's avatar Damien Leroux
Browse files

New implementation for geno_matrix and pedigrees.

Aims at a perfect handling of reentrant individuals and symmetries in
order to overlump the transition matrices.
parent ba8b2017
......@@ -31,7 +31,8 @@
#define LOCUS_EPSILON (1.e-5)
typedef labelled_matrix<GenoMatrix, allele_pair> GenerationRS;
/*typedef labelled_matrix<GenoMatrix, allele_pair> GenerationRS;*/
typedef geno_matrix GenerationRS;
typedef Eigen::Matrix<bool, Eigen::Dynamic, 1> VectorXb;
typedef Eigen::Matrix<bool, Eigen::Dynamic, Eigen::Dynamic> MatrixXb;
......
This diff is collapsed.
......@@ -25,15 +25,15 @@ COMPUTATIONS_SRC=basic_data.cc probabilities.cc model.cc frontends.cc
SRC=$(addprefix input/,$(INPUT_SRC)) $(addprefix computations/,$(COMPUTATIONS_SRC)) $(MAIN_SRC)
OBJ=$(subst .cc,.o,$(SRC))
DEP=$(subst .cc,.d,$(SRC)) test_script.d matrixexp4.d
DEP=$(subst .cc,.d,$(SRC)) test_script.d geno_matrix.d
COV_OBJ=$(subst .cc,.cov.o,$(SRC))
ALL_BUT_MAIN_OBJ=$(subst main.o ,,$(OBJ))
#DEBUG_OPTS=-ggdb
#DEBUG_OPTS=-ggdb -DNDEBUG
OPT_OPTS=-O3 -DEIGEN_NO_DEBUG -DNDEBUG
#OPT_OPTS=-O3 -DNDEBUG
#OPT_OPTS=-O3 -DEIGEN_NO_DEBUG -DNDEBUG
OPT_OPTS=-O3 -DNDEBUG
#PROF_OPTS=-O2 -DEIGEN_NO_DEBUG -DNDEBUG -g -pg
#DEBUG_OPTS=-ggdb
......@@ -164,5 +164,5 @@ test_bayes_dai: ../sandbox/bayes.cc bayes.d $(ALL_BUT_MAIN_OBJ)
test_sib: pedigree_analysis.cc static_data.o
$C $< static_data.o -o $@
matrixexp: matrixexp4.cc ../include/lumping2.h
$C -DORDER=$(ORDER) $< -o $@
matrixexp: geno_matrix.cc ../include/lumping2.h ../include/geno_matrix.h Makefile
$C -DORDER=$(ORDER) $< -o $@ -DTEST_GENO_MATRIX
for (i in (2:30)*2) {
x <- read.table(paste('min_angles_', i, '.txt', sep=''), header=T)
#for (i in (2:32)*2) {
for (i in c(12, 18, 24, 30, 36, 42, 48, 54, 60)) {
x <- read.table(paste('min_angles_MAGIC4_', i, '.txt', sep=''), header=T)
cat("Best gen for", i, "states:", x$gen[which.max(x$max - x$min)], "\n")
}
This diff is collapsed.
This diff is collapsed.
if (0) {
all.data <- array(dim=c(27, 3, 3, 3, 3, 11, 11, 19))
all.counts <- array(dim=c(27, 3, 3, 3, 3, 11, 11, 19))
L <- c('A', 'H', 'B')
......@@ -41,3 +42,102 @@ make.plot <- function(g, d1, d2, m123) {
plot(tab[, 1], ylab="Frequency", ylim=c(0, 1), col="blue", type="p", main=titolo)
lines(tab[, 2], col="green", type="p"); lines(tab[, 3], col="red", type="p")
}
}
library(multicore)
inv.h <- function(r) if (r < .5) round(50 * log(1 / (1 - 2 * r)), 3) else "+∞"
get.base.dir <- function(gen.path) paste(gen.path, dir(gen.path), sep='/')
get.letters <- function(path) rawToChar(unique(unlist(lapply(dir(path=paste(path, '0/0', sep='/'), pattern='^...$'), charToRaw))))
.get.files <- function(path, pattern='^.*$', name.func=NULL) {
f <- dir(path=path, pattern=pattern)
ret <- lapply(f, function(x) paste(path, x, sep='/'))
if (!is.null(name.func)) {
names(ret) <- sapply(f, name.func)
}
ret
}
get.all.files <- function(gen.path) {
lapply(.get.files(gen.path, name.func=function(n) n),
function(path.d1) {
lapply(.get.files(path.d1, name.func=function(n) n),
function(path.d2) {
.get.files(path.d2, '.*txt$', function(n) substr(n, 1, 3))
})
})
}
get.all.data <- function(filelist) {
mclapply(filelist,
function(fl.d1) {
mclapply(fl.d1,
function(fl.d2) {
mclapply(fl.d2,
read.table, sep="\t", header=T)
})
})
}
load.gen <- function(gen.path) {
base <- get.base.dir(gen.path)
dat <- get.all.data(get.all.files(base))
geno <- colnames(dat[[1]][[1]][[1]])[-1]
list(gen=gen.path,
base=base,
L=get.letters(base),
geno=geno,
data=dat)
}
make.plot <- function(g, d1, d2, m123) {
m1 <- substr(m123, 1, 1)
m2 <- substr(m123, 2, 2)
m3 <- substr(m123, 3, 3)
tab <- g$data[[paste(d1)]][[paste(d2)]][[m123]][, -1]
tab <- tab[-nrow(tab), ]
tab <- tab / rowSums(tab)
#print(tab)
d.coef <- 2 * (length(g$data) - 1)
titolo <- paste("Genotype frequencies between M1 and M2 in generation ", g$gen,
"\nM1[", m1, "]--", inv.h(d1 / d.coef), "cM--M2[", m2, "]--", inv.h(d2 / d.coef), "cM--M3[", m3, "]", sep="")
palette(rainbow(length(g$geno)))
color <- 1
#print(g$geno[1])
plot(tab[, g$geno[1]], ylab="Frequency", ylim=c(0, 1), col=color, main=titolo)
for (geno in g$geno[-1]) {
color <- color + 1
lines(tab[, geno], col=color, type="p")
}
}
col.to.geno.Fx <- list(A='aa', B='bb', H=c('ab', 'ba'))
col.to.geno.MAGIC <- list(A='aa', B='bb', C='cc', D='dd', E='ee', F='ff', G='gg', H='hh')
all_cols <- function(df, prefix) colnames(df)[which(substr(colnames(df), 1, nchar(prefix)) == prefix)]
sum_cols <- function(df, prefix) {
ac <- unlist(lapply(c(prefix), function(p) all_cols(df, p)))
if (length(ac) > 1) rowSums(df[, ac]) else df[, ac]
}
compare.plot <- function(g, d1, d2, m123, values.suffix, col.to.geno) {
make.plot(g, d1, d2, m123)
tab <- read.table(paste('../values-', m123, '.', values.suffix, '.txt', sep=''), header=T)
print(g$L)
for (i in 1:nchar(g$L)) {
lines(sum_cols(tab, col.to.geno[[substr(g$L, i, i)]]), col=i)
}
}
......@@ -49,9 +49,9 @@ void add_stats(const chromosome_t& chr)
size_t m3 = get_obs_i(chr, N_MARK + 2);
auto& table = tables[m1][m2][m3];
/*dump_chromosome(chr);*/
std::cout << "m1=" << m1 << " m2=" << m2 << " m3=" << m3 << std::endl;
/*std::cout << "m1=" << m1 << " m2=" << m2 << " m3=" << m3 << std::endl;*/
for (size_t i = 0; i < N_MARK + 3; ++i) {
std::cout << "#" << i << ": " << get_obs_i(chr, i) << std::endl;
/*std::cout << "#" << i << ": " << get_obs_i(chr, i) << std::endl;*/
table[i][get_obs_i(chr, i)] += 1;
}
}
......@@ -233,10 +233,10 @@ void init_job(int argc, char** argv)
M3_DIST_I = atoi(argv[4]);
N_REP = atoi(argv[5]);
std::cout << "DIST_N=" << DIST_N << std::endl;
std::cout << "M2_DIST_I=" << M2_DIST_I << std::endl;
std::cout << "M3_DIST_I=" << M3_DIST_I << std::endl;
std::cout << "DIST_N=" << DIST_N << std::endl;
/*std::cout << "DIST_N=" << DIST_N << std::endl;*/
/*std::cout << "M2_DIST_I=" << M2_DIST_I << std::endl;*/
/*std::cout << "M3_DIST_I=" << M3_DIST_I << std::endl;*/
/*std::cout << "DIST_N=" << DIST_N << std::endl;*/
/* prepare map */
double r2 = .5 * M2_DIST_I / DIST_N;
......@@ -245,11 +245,11 @@ void init_job(int argc, char** argv)
init_gen_map(r2, r3);
std::cout << "MAP:";
for (uint64_t d: gen_map) {
std::cout << ' ' << (d / (double) UINT64_MAX);
}
std::cout << std::endl;
/*std::cout << "MAP:";*/
/*for (uint64_t d: gen_map) {*/
/*std::cout << ' ' << (d / (double) UINT64_MAX);*/
/*}*/
/*std::cout << std::endl;*/
}
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment