Commit 2099e079 authored by Sylvain Jasson's avatar Sylvain Jasson
Browse files
parents 1b76b172 294a9c43
This diff is collapsed.
......@@ -21,10 +21,12 @@ project(spell_qtl)
#INCLUDE(pandocology)
find_package(Boost 1.55.0 REQUIRED)
set(CMAKE_CONFIGURATION_TYPES Debug Release CACHE TYPE INTERNAL FORCE)
#set(CMAKE_CONFIGURATION_TYPES Debug Release CACHE INTERNAL FORCE)
set(CMAKE_VERBOSE_MAKEFILE ON)
set(BUILD_FOR_DEPLOYMENT FALSE CACHE BOOL "Link against static libc++ and use minimal symbol version where possible")
MESSAGE(STATUS "CMAKE VERSION ${CMAKE_VERSION}")
MESSAGE(STATUS "${CMAKE_CURRENT_SOURCE_DIR}")
......@@ -70,6 +72,14 @@ include_directories(AFTER ${CMAKE_SOURCE_DIR}/include/ ${CMAKE_SOURCE_DIR}/inclu
include_directories(SYSTEM /usr/include ${Boost_INCLUDE_DIRS} ${EXPAT_INCLUDE_DIR} ${X2C_INCLUDE_DIR})
include_directories(AFTER /usr/include ${Boost_INCLUDE_DIRS} ${EXPAT_INCLUDE_DIR} ${X2C_INCLUDE_DIR})
if(${BUILD_FOR_DEPLOYMENT})
set(libstdcpp ${CMAKE_BINARY_DIR}/libstdc++.a)
else(${BUILD_FOR_DEPLOYMENT})
set(libstdcpp "")
endif(${BUILD_FOR_DEPLOYMENT})
MESSAGE(STATUS "libstdcpp = ${libstdcpp}")
set(SPELL_PEDIGREE_SRC
src/static_data.cc
src/pedigree/main.cc
......@@ -89,13 +99,66 @@ set(SPELL_QTL_SRC
src/computations/basic_data.cc src/computations/probabilities.cc src/computations/model.cc src/computations/frontends.cc
)
add_executable(spell-pedigree ${SPELL_PEDIGREE_SRC})
add_executable(spell-marker ${SPELL_MARKER_SRC})
add_executable(spell-qtl ${SPELL_QTL_SRC})
target_link_libraries(spell-marker dl)
target_link_libraries(spell-qtl expat dl)
set(CMAKE_EXE_LINKER_FLAGS "-rdynamic")
MESSAGE(STATUS "spell-pedigree src = ${SPELL_PEDIGREE_SRC}")
#get_cmake_property(_variableNames VARIABLES)
#foreach (_variableName ${_variableNames})
# message(STATUS "${_variableName}=${${_variableName}}")
#endforeach()
if(${BUILD_FOR_DEPLOYMENT})
add_executable(spell-pedigree ${SPELL_PEDIGREE_SRC} ${libstdcpp})
add_executable(spell-marker ${SPELL_MARKER_SRC})
add_executable(spell-qtl ${SPELL_QTL_SRC} ${libstdcpp})
add_custom_command(OUTPUT glibc.h libstdc++.a COMMAND ${CMAKE_SOURCE_DIR}/deploy/make_old_libc_header.sh WORKING_DIRECTORY ${CMAKE_BINARY_DIR})
add_custom_command(OUTPUT libgmp.a COMMAND ${CMAKE_SOURCE_DIR}/deploy/compile_gmp.sh ${CMAKE_BINARY_DIR}/glibc.h ${CMAKE_BINARY_DIR} WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}/deploy)
add_custom_target(glibc.h)
#add_custom_target(stdc++ ${CMAKE_BINARY_DIR}/libstdc++.a)
#add_library(stdc++ INTERFACE IMPORTED)
#set_property(TARGET stdc++ PROPERTY INTERFACE_LINK_LIBRARIES ${CMAKE_BINARY_DIR}/libstdc++.a)
add_library(gmp INTERFACE IMPORTED)
add_custom_target(pouet ALL DEPENDS ${CMAKE_BINARY_DIR}/libgmp.a)
#set_property(TARGET gmp PROPERTY INTERFACE_LINK_LIBRARIES ${CMAKE_BINARY_DIR}/libgmp.a)
#set_target_properties(stdc++ PROPERTIES LINKER_LANGUAGE CXX)
#set_target_properties(gmp PROPERTIES LINKER_LANGUAGE C)
#add_dependencies(gmp stdc++)
#set_source_files_properties(${CMAKE_BINARY_DIR}/libgmp.a PROPERTIES GENERATED TRUE)
#set_source_files_properties(${libstdcpp} PROPERTIES GENERATED TRUE)
#set_source_files_properties(${CMAKE_BINARY_DIR}/libstdc++.a PROPERTIES EXTERNAL_OBJECT TRUE GENERATED TRUE)
SET_SOURCE_FILES_PROPERTIES(${SPELL_PEDIGREE_SRC} ${SPELL_MARKER_SRC} ${SPELL_QTL_SRC}
PROPERTIES
OBJECT_DEPENDS glibc.h
COMPILE_FLAGS "-include ${CMAKE_BINARY_DIR}/glibc.h -U_FORTIFY_SOURCE -D_FORTIFY_SOURCE=0")
#add_dependencies(stdc++ libstdc++.a)
#add_dependencies(gmp libgmp.a)
add_dependencies(spell-pedigree glibc.h)
add_dependencies(spell-marker glibc.h pouet)
add_dependencies(spell-qtl glibc.h)
MESSAGE(STATUS " PROUT ${libstdcpp}")
set(CMAKE_EXE_LINKER_FLAGS "-include ${CMAKE_BINARY_DIR}/glibc.h -rdynamic -static-libgcc")
target_link_libraries(spell-marker ${libstdcpp} ${CMAKE_BINARY_DIR}/libgmp.a)
target_link_libraries(spell-qtl ${libstdcpp})
target_link_libraries(spell-pedigree ${libstdcpp})
#list(APPEND spell-pedigree_SRC "${CMAKE_BINARY_DIR}/libstdc++.a")
#list(APPEND spell-qtl_SRC "${CMAKE_BINARY_DIR}/libstdc++.a")
#list(APPEND spell-marker_SRC "${CMAKE_BINARY_DIR}/libstdc++.a")
else()
add_executable(spell-pedigree ${SPELL_PEDIGREE_SRC})
add_executable(spell-marker ${SPELL_MARKER_SRC})
add_executable(spell-qtl ${SPELL_QTL_SRC})
set(CMAKE_EXE_LINKER_FLAGS "-rdynamic")
endif()
target_link_libraries(spell-marker dl gmp)
target_link_libraries(spell-qtl expat dl rt)
SET(EXECUTABLE_OUTPUT_PATH ${PROJECT_BINARY_DIR}/bin)
......
#!/bin/bash
PATH_TO_GLIBC_H=$1
OUTPUT_PATH=$2
cd gmp
# fetch GMP
cd gmp-6.1.2 || (curl https://gmplib.org/download/gmp/gmp-6.1.2.tar.xz -o - | xz -d | tar xf - && cd gmp-6.1.2)
# cleanup and configure
[ -f Makefile ] && make clean
CFLAGS="-include $PATH_TO_GLIBC_H" ./configure --disable-shared --enable-fake-cpuid --enable-fat
# build
make
# deploy lib
cp .libs/libgmp.a $OUTPUT_PATH
#!/bin/bash
LIBCPP=/usr/lib/gcc/x86_64-linux-gnu/5/libstdc++.a
maxver=2.9
headerf=${1:-glibc.h}
set -e
for lib in libc.so.6 libm.so.6 libpthread.so.0 libdl.so.2 libresolv.so.2 librt.so.1; do
objdump -T /lib/x86_64-linux-gnu/$lib
done | awk -v maxver=${maxver} -vheaderf=${headerf} -vredeff=${headerf}.redef -f <(cat <<'EOF'
BEGIN {
split(maxver, ver, /\./)
limit_ver = ver[1] * 10000 + ver[2]*100 + ver[3]
}
/GLIBC_/ {
gsub(/\(|\)/, "",$(NF-1))
split($(NF-1), ver, /GLIBC_|\./)
vers = ver[2] * 10000 + ver[3]*100 + ver[4]
if (vers > 0) {
if (symvertext[$(NF)] != $(NF-1))
count[$(NF)]++
if (vers <= limit_ver && vers > symvers[$(NF)]) {
symvers[$(NF)] = vers
symvertext[$(NF)] = $(NF-1)
}
}
}
END {
for (s in symvers) {
if (count[s] > 1) {
printf("__asm__(\".symver %s,%s@%s\");\n", s, s, symvertext[s]) > headerf
printf("%s %s@%s\n", s, s, symvertext[s]) > redeff
}
}
}
EOF
)
sort ${headerf} -o ${headerf}
objcopy --redefine-syms=${headerf}.redef $LIBCPP libstdc++.a
rm ${headerf}.redef
......@@ -58,24 +58,47 @@ Enabled by default only if output is on a terminal.
**-na**,**--no-ansi**
: Don't use ANSI escape sequences, don't display colors or realtime progress information.
## Model options
**-rj**,**--join-radius** *DISTANCE*
: Specify the maximum distance from a selected locus to compute joint probabilities. Default is **10**.
**-rs**,**--skip-radius** *DISTANCE*
: Specify the maximum distance from a selected locus to skip tests. Default is **1**.
## Input datasets
The following configures the construction of the linear model.
The following specifies the datasets you want processed. A dataset specification starts with argument -p, followed by one or more arguments -m.
**output-nppop**
: Compute the n-point Parental Origin Probabilities and exit. The results will be written under the **WORK_DIRECTORY/NAME.n-point** directory.
**-gm**,**-genetic-map** *PATH*
: Path to the genetic map file.
**-p**,**-population** *QTL_generation_name* *PHEN_PATH*
: Specify a new population (dataset) to work on.
## Model options
The following configures the construction of the linear model.
**connected**
: Select connected mode. Disabled by default.
In connected mode, the same ancestors in two datasets share the same column in the linear model.
**epistasis**
: Detect epistasis. Disabled by default.
## Working set options
The following configures the analysis domain.
**lg** *LINKAGE GROUP NAMES*
: Specify the list of linkage groupe to study.
**covar** *COVARIABLE NAMES*
: Specify the list of covariables to put in the model.
**traits** *TRAIT NAMES*
: Specify the list of traits to analyse.
**pleiotropy** *TOLERANCE*
: Detect pleiotropic QTLs. Disabled by default.
**pleiotropy** *PLEIOTROPIC_TRAIT_NAME* *TOLERANCE* *TRAIT_NAMES*
: Specify a pleiotropic trait. This trait will be added to the list of traits to analyze. 1.e-3 is a good default value for *TOLERANCE*.
## Processing options
......@@ -86,6 +109,9 @@ The standard pipeline is:
3. QTLs detection
4. effects estimation
**output-nppop**
: Compute the n-point Parental Origin Probabilities and exit. The results will be written under the **WORK_DIRECTORY/NAME.n-point** directory.
**qtl-threshold-permutations** *VALUE*
: Set the number of permutations to compute the QTL threshold value in automatic mode. Default is **10000**.
......@@ -104,6 +130,9 @@ The standard pipeline is:
**step** *VALUE*
: Step size in cM. Defaults to **1**.
**lod-support** *VALUE*
: LOD support value. Defaults to **1**.
**skeleton** *MODE* *marker,...\ OR \ distance*
: Setup the cofactor detection skeleton. Mode can be either *manual*, *auto* or *none*.
......
......@@ -24,7 +24,7 @@
#include <cstring>
#include <unordered_map>
#include <boost/dynamic_bitset.hpp>
#include <gmp.h>
/* La spec est dans le code. */
......@@ -369,6 +369,7 @@ struct table_descr {
std::vector<state_index_type> offset_to_cursor;
uint64_t offset;
state_index_type mask;
bool unif;
inline
const genotype_comb_type::element_type&
......@@ -639,11 +640,14 @@ struct joint_variable_product_type {
tab.offset = 0;
coordinates.resize(tab.variable_names.size());
tab.offset_to_cursor.reserve(tab.data->size());
tab.unif = true;
double coef = tab.data->begin()->coef;
for (uint64_t i = 0; i < tab.data->size(); ++i) {
get_coords_at(tab, i, coordinates);
state_index_type cursor = compute_local_index(coordinates, tab);
tab.offset_to_cursor.push_back(cursor);
tab.domain_to_offset[cursor] = i;
tab.unif = tab.unif && std::abs(coef - tab.data->m_combination[i].coef) > _EPSILON;
}
if (0) {
std::stringstream ss;
......@@ -881,33 +885,40 @@ struct joint_variable_product_type {
#endif
inline
double
compute_coef()
void
compute_coef(mpf_t& accum)
{
auto ti = tables.begin();
auto tj = tables.end();
double accum = ti->current_element().coef;
mpf_t coef;
mpf_init_set_d(accum, ti->current_element().coef);
mpf_init(coef);
for (++ti; ti != tj; ++ti) {
accum *= ti->current_element().coef;
if (!ti->unif) {
mpf_set_d(coef, ti->current_element().coef);
mpf_mul(accum, accum, coef);
}
}
return accum;
}
inline
state_index_type
next_state(state_index_type from)
{
uint64_t valid = 0, all = (1 << tables.size()) - 1;
// uint64_t valid = 0, all = (1 << tables.size()) - 1;
boost::dynamic_bitset<> valid;
valid.resize(tables.size(), 0);
size_t ti = 0;
while (valid != all && !from.is_bad()) {
/*MSG_DEBUG("valid " << valid << " all " << all << " table " << (*tables[ti].data));*/
while (!valid.all() && !from.is_bad()) {
// MSG_DEBUG("valid " << valid << " all " << all << " table " << (*tables[ti].data));
if (!move_cursor_if_state_is_valid(tables[ti], from)) {
auto range = find_offsets_around_cursor(tables[ti], tables[ti].local(from));
/*MSG_DEBUG("offsets are " << range.first << " and " << range.second);*/
// MSG_DEBUG("offsets are " << range.first << " and " << range.second);
state_index_type
c1 = merge_cursors(tables[ti], from, tables[ti].offset_to_cursor[range.first]),
c2 = merge_cursors(tables[ti], from, tables[ti].offset_to_cursor[range.second]);
/*MSG_DEBUG("from = " << dump(from) << " c1 = " << dump(c1, tables[ti]) << " c2 = " << dump(c2, tables[ti]));*/
// MSG_DEBUG("from = " << dump(from) << " c1 = " << dump(c1, tables[ti]) << " c2 = " << dump(c2, tables[ti]));
state_index_type tmp; // = std::min(c1, c2);
if (c1 > from) {
if (c2 > from) {
......@@ -920,15 +931,16 @@ struct joint_variable_product_type {
} else {
return state_index_type::bad();
}
/*MSG_DEBUG("new lower bound is now " << dump(tmp));*/
// MSG_DEBUG("new lower bound is now " << dump(tmp));
if (tmp < from) {
return state_index_type::bad();
}
from = tmp;
valid = 0;
valid.reset();
// valid = 0;
/*ti = !ti;*/
} else {
valid |= 1 << ti;
valid.set(ti);
}
++ti;
ti %= tables.size();
......@@ -964,6 +976,8 @@ struct joint_variable_product_type {
genotype_comb_type
compute_generic()
{
if (invalid_product) { return {}; }
if (total_bits >= 32) {
std::vector<size_t> output_variable_indices;
for (size_t i = 0; i < output_variables_in_use.size(); ++i) {
......@@ -1010,7 +1024,6 @@ struct joint_variable_product_type {
}
key_computing_variant key_update;
if (invalid_product) { return {}; }
genotype_comb_type ret;
size_t counter = 0;
......@@ -1019,27 +1032,39 @@ struct joint_variable_product_type {
/*MSG_DEBUG(key.size()); MSG_QUEUE_FLUSH();*/
state_index_type z;
z.reset();
state_index_type cursor = next_state(z), last = cursor;
std::unordered_map<genotype_comb_type::key_list, double> values;
double norm = 0;
MSG_DEBUG("Initializing cursor");
state_index_type
cursor = next_state(z),
last = cursor;
std::unordered_map<genotype_comb_type::key_list, mpf_t> values;
mpf_t norm;
mpf_init(norm);
if (cursor.is_bad()) {
return ret;
}
for (;;)
{
mpf_t accum;
mpf_init(accum);
for (;;) {
/*if (++counter > 10) {*/
/*break;*/
/*}*/
/*jvp.compute_coordinates(cursor, coords);*/
/*MSG_DEBUG(std::bitset<7>(cursor) << ' ' << coords[0] << ' ' << coords[1] << ' ' << coords[2]);*/
/*MSG_DEBUG("COMPUTE @" << dump(cursor));*/
// MSG_DEBUG("COMPUTE @" << dump(cursor));
key_update(*this, key);
double coef = compute_coef();
if (coef != 0) {
norm += coef;
values[key] += coef;
mpf_clear(accum);
compute_coef(accum);
if (mpf_cmp_d(accum, 0)) {
mpf_add(norm, norm, accum);
auto it = values.find(key);
mpf_t& value = values[key];
if (it == values.end()) {
mpf_init_set(value, accum);
} else {
mpf_add(value, value, accum);
}
}
++counter;
......@@ -1062,15 +1087,17 @@ struct joint_variable_product_type {
}
}
ret.m_combination.reserve(values.size());
if (norm > 0) {
norm = 1. / norm;
if (mpf_cmp_d(norm, 0) > 0) {
mpf_ui_div(norm, 1, norm);
// norm = 1. / norm;
}
for (const auto& kv: values) {
ret.m_combination.emplace_back(kv.first, kv.second * norm);
mpf_mul(accum, kv.second, norm);
ret.m_combination.emplace_back(kv.first, mpf_get_d(accum));
/*ret.m_combination.emplace_back(kv.first, kv.second);*/
}
std::sort(ret.m_combination.begin(), ret.m_combination.end());
/*MSG_DEBUG("Computed " << counter << " coefficients, projected onto " << ret.size() << " elements");*/
MSG_DEBUG("Computed " << counter << " coefficients, projected onto " << ret.size() << " elements");
return ret;
}
......
......@@ -1494,18 +1494,20 @@ struct instance_type {
return sub_instances[op.emitter]->compute(op.receiver, domains);
}
multiple_product_type mp;
/*MSG_DEBUG("evidence: " << evidence[op.emitter]);*/
MSG_DEBUG("evidence: " << evidence[op.emitter]);
mp.add(evidence[op.emitter]);
/*MSG_DEBUG("internal evidence: " << internal_evidence[op.emitter]);*/
MSG_DEBUG("internal evidence: " << internal_evidence[op.emitter]);
mp.add(internal_evidence[op.emitter]);
/*MSG_DEBUG("table: " << tables[op.emitter]);*/
// MSG_DEBUG("table: " << tables[op.emitter]);
/*MSG_DEBUG("incoming domains: " << op.incoming_domains);*/
MSG_DEBUG("Add emitter");
mp.add(tables[op.emitter]);
auto inci = op.incoming_messages.begin(), incj = op.incoming_messages.end();
auto domi = op.incoming_domains.begin();
std::vector<message_type> msgs;
msgs.reserve(op.incoming_messages.size());
for (; inci != incj; ++inci, ++domi) {
MSG_DEBUG("Add incoming");
msgs.emplace_back(messages[*inci] % *domi);
/*MSG_DEBUG("input #" << (*inci) << ": raw " << messages[*inci]);*/
/*MSG_DEBUG("input #" << (*inci) << ": dom " << (*domi));*/
......@@ -1513,6 +1515,7 @@ struct instance_type {
/*mp.add(msgs.back());*/
mp.add(messages[*inci]);
}
MSG_DEBUG("Compute.");
auto ret = mp.compute(op.output, domains);
MSG_DEBUG("" << ret);
MSG_DEBUG("");
......
......@@ -34,7 +34,7 @@ extern "C" {
#include <ftw.h>
}
#define OUT(__x__) do { msg_handler_t::cout() << __x__ << std::endl; } while (0)
#define OUT(__x__) do { msg_handler_t::cout() << __x__; } while (0)
/*#include "computations.h"*/
......
......@@ -525,11 +525,16 @@ inline
std::vector<trait>
assemble_traits(const std::vector<trait>& src, const std::vector<std::string>& with_traits, const std::vector<pleiotropy_descr>& pleio) {
std::vector<trait> ret(src.size() + pleio.size());
auto it = std::copy_if(src.begin(), src.end(), ret.begin(), [&](const trait& t) { return std::find(with_traits.begin(), with_traits.end(), t.name) != with_traits.end(); });
MSG_DEBUG("with_traits.empty() ? " << with_traits.empty());
auto it = with_traits.empty()
? std::copy(src.begin(), src.end(), ret.begin())
: std::copy_if(src.begin(), src.end(), ret.begin(),
[&] (const trait& t) { return std::find(with_traits.begin(), with_traits.end(), t.name) != with_traits.end(); });
ret.resize(it - ret.begin());
for (const auto& d: pleio) {
ret.emplace_back(src, d.traits, d.name, d.tolerance);
}
MSG_DEBUG("assembled traits " << ret);
return ret;
}
......
......@@ -35,7 +35,7 @@ extern "C" {
#include <ftw.h>
}
#define OUT(__x__) do { msg_handler_t::cout() << __x__ << std::endl; } while (0)
#define OUT(__x__) do { msg_handler_t::cout() << __x__; } while (0)
/*#include "computations.h"*/
......
......@@ -61,7 +61,7 @@ std::vector<trait> read_trait(file& is, const std::vector<std::string>& filter_n
}
++counter;
}
if (ret.back().good_indices.size() == 0 || std::find(filter_names.begin(), filter_names.end(), name) == filter_names.end()) {
if (ret.back().good_indices.size() == 0 || (!!filter_names.size() && std::find(filter_names.begin(), filter_names.end(), name) == filter_names.end())) {
ret.pop_back();
} else {
ret.back().dim_names = {ret.back().name};
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment