frontends4.h 59 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
/* Spell-QTL  Software suite for the QTL analysis of modern datasets.
 * Copyright (C) 2016,2017  Damien Leroux <damien.leroux@inra.fr>, Sylvain Jasson <sylvain.jasson@inra.fr>
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */

18
19
20
21
22
23
#ifndef _SPEL_FRONTENDS_H_
#define _SPEL_FRONTENDS_H_

#include "cache2.h"
#include "basic_data.h"
#include "model.h"
24
// #include "../../attic/frontends2.h"
25
/*#include "model/tests.h"*/
26
#include <regex>
27
28

#include <boost/math/distributions/normal.hpp> // for normal_distribution
29

Damien Leroux's avatar
Damien Leroux committed
30
31
32
33
// #include "report_frontend.h"


// #include "excel_report.h"
34

35
36
#define SIGNAL_DISPLAY_ONELINER

37
38
#define CONSTRAINT_RESIDUAL_EPSILON 1.e-5

39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
  using boost::math::normal; // typedef provides default type is double.
  using boost::math::cdf;
  using boost::math::mean;
  using boost::math::variance;
  using boost::math::quantile;
  using boost::math::complement;


typedef std::pair<const chromosome*, double> selected_locus;

inline bool operator < (const selected_locus& sl1, const selected_locus& sl2) { return sl1.first < sl2.first || (sl1.first == sl2.first && sl1.second < sl2.second); }

inline std::ostream& operator << (std::ostream& os, const selected_locus& sl) { return os << sl.first->name << ':' << sl.second; }


struct chromosome_search_domain {
    const chromosome* chrom;
    std::vector<double> loci;

    chromosome_search_domain(const chromosome* c, const std::vector<double>& l) : chrom(c), loci(l) {}

    struct const_iterator {
        const chromosome_search_domain* this_csd;
        std::vector<double>::const_iterator i;
        const_iterator() : this_csd(NULL), i(__()) {}
        const_iterator(const chromosome_search_domain* t, const std::vector<double>::const_iterator& i_) : this_csd(t), i(i_) {}
        bool operator == (const const_iterator& other) const { return i == other.i; }
        bool operator != (const const_iterator& other) const { return i != other.i; }
        /*bool operator < (const const_iterator& other) const { return i < other.i; }*/
        /*size_t operator - (const const_iterator& other) const { return i - other.i; }*/
        std::pair<const chromosome*, double> operator * () const { return {this_csd->chrom, *i}; }
        const_iterator& operator ++ () { ++i; return *this; }
        const_iterator& operator -- () { --i; return *this; }
        static std::vector<double>::const_iterator __() { static std::vector<double> _; return _.end(); }
    };

    const_iterator begin() const { return {this, loci.begin()}; }
    const_iterator end() const { return {this, loci.end()}; }
    const_iterator cbegin() const { return {this, loci.begin()}; }
    const_iterator cend() const { return {this, loci.end()}; }
};


typedef std::vector<chromosome_search_domain> genome_search_domain;

struct gsd_iterator {
    std::vector<chromosome_search_domain>::const_iterator csd_i, csd_j;
    chromosome_search_domain::const_iterator i, j;

    bool operator == (const gsd_iterator& other) const { return csd_i == other.csd_i && i == other.i; }
    bool operator != (const gsd_iterator& other) const { return csd_i != other.csd_i || i != other.i; }

    gsd_iterator&
        operator ++ ()
        {
            ++i;
            if (i == j) {
                MSG_DEBUG("at end of chromosome!");
                ++csd_i;
                if (csd_i != csd_j) {
                    i = csd_i->begin();
                    j = csd_i->end();
                } else {
                    i = {};
                    j = {};
                }
            }
            return *this;
        }

    std::pair<const chromosome*, double> operator * () const { return *i; }
};

namespace std {
    inline gsd_iterator begin(const genome_search_domain& gsd) { return {gsd.begin(), gsd.end(), gsd.begin()->begin(), gsd.begin()->end()}; }
    inline gsd_iterator end(const genome_search_domain& gsd) { return {gsd.end(), gsd.end(), {}, {}}; }
}


typedef std::vector<selected_locus> locus_set;

typedef std::vector<locus_set> model_descriptor;

std::pair<bool, double>
detect_strongest_qtl(chromosome_value chr, const locus_key& lk,
                     const model& M0, const std::vector<double> pos);

MatrixXd
ftest_along_chromosome(chromosome_value chr, const locus_key& lk,
                       const model& M0, const std::vector<double> pos);



struct signal_display {
#ifdef SIGNAL_DISPLAY_ONELINER
    static const char* tick(double x)
    {
        static const char* ticks[9] = { " ", "\u2581", "\u2582", "\u2583", "\u2584", "\u2585", "\u2586", "\u2587", "\u2588" };
        return ticks[x < 0. ? 0
                            : x >= 1. ? 8
                                      : ((int) floor(x * 9))];
    }

    VectorXd values;
    int imax_;
    bool above_;

    signal_display(const VectorXd& v, int imax, bool above)
        : values(v.innerSize()), imax_(imax), above_(above)
    {
        values = v;
        double vmin = values.minCoeff();
        double vmax = values.maxCoeff();
        if (vmin == vmax) {
            values = (values.array() - vmin).matrix();
        } else {
            values = ((values.array() - vmin) / (vmax - vmin)).matrix();
        }
    }

    friend std::ostream& operator << (std::ostream& os, const signal_display& sd)
    {
        os << _WHITE << '[';
        for (int i = 0; i < sd.values.innerSize(); ++i) {
            if (i == sd.imax_) {
                os << (sd.above_ ? _GREEN : _RED);
            }
            os << tick(sd.values(i));
            if (i == sd.imax_) {
                os << _WHITE;
            }
        }
        return os << ']' << _NORMAL;
    }
#else
    braille_grid grid;

    signal_display(const chromosome& chr, const std::vector<double>& X, const VectorXd& y, int imax, double threshold)
        : grid(build(chr, X, y, imax, threshold))
    {}

    braille_grid
        build(const chromosome& chr, const std::vector<double>& X, const VectorXd& y, int imax, double threshold)
        {
            std::vector<double> Y(y.data(), y.data() + y.size());
            int padding_left = 0;
            int W = (int) (msg_handler_t::termcols() * .8);
            if (W > 1000) {
                W = 80;
            }
            braille_grid chr_map = chr.pretty_print(W, {}, {}, padding_left, false);

Damien Leroux's avatar
Damien Leroux committed
191
            braille_plot plot(W - padding_left, 5, 0, X.back(), 0, std::max(threshold, Y[imax]));
192
193
            plot.plot(X, Y);
            plot.hline(threshold, 1, 1, 0, 255, 0);
Damien Leroux's avatar
Damien Leroux committed
194
            bool above = Y[imax] > threshold;
195
196
197
198
199
200
201
202
            plot.vline(X[imax], 1, 0, above ? 0 : 255, above ? 255 : 0, 0);
            return plot.compose_vert(true, chr_map, false);
        }

    friend
        std::ostream&
        operator << (std::ostream& os, const signal_display& sd)
        {
203
204
205
            std::stringstream tmp;
            tmp << sd.grid;
            return os << tmp.str();
206
207
208
209
210
        }
#endif
};


211
struct model_manager;
Damien Leroux's avatar
Damien Leroux committed
212
213
214
215
216
217
struct test_result;

void make_report(model_manager& mm, const std::vector<std::string>& cmdline);
void report_computation(std::string trait_name, chromosome_value chr, std::vector<double>& loci, const computation_along_chromosome& cac, const test_result& tr);
void report_algo_phase(std::string descr);

218

219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
struct QTL {
    std::string chromosome;
    double locus;
    std::vector<double> LOD_loci;
    std::vector<double> LOD;

    QTL(const std::string& n, double l, const std::vector<double>& x, const MatrixXd& y)
        : chromosome(n), locus(l), LOD_loci(x), LOD(y.data(), y.data() + y.size())
    {
        /*MSG_DEBUG("QTL at " << chromosome << ':' << locus);*/
        /*MSG_DEBUG(y);*/
        /*MSG_DEBUG(MATRIX_SIZE(y));*/
        /*MSG_DEBUG("" << LOD);*/
    }

    static
        double
        interpolate(double x0, double y0, double x1, double y1, double yT)
        {
            double delta_x = x1 - x0;
            double delta_y = y1 - y0;
            return delta_x * (yT - y0) / delta_y + x0;
        }

Damien Leroux's avatar
Damien Leroux committed
243
    std::array<double, 4>
244
        confidence_interval(const std::string &trait, const std::vector<QTL> &selection, ComputationType lod_test_type);
245

Damien Leroux's avatar
Damien Leroux committed
246
    std::array<double, 4>
247
        confidence_interval();
248
249
250
};


Damien Leroux's avatar
Damien Leroux committed
251
#include "report.h"
252
253
254



Damien Leroux's avatar
Damien Leroux committed
255
enum probability_mode { Joint, Single, Skip };
256
257
258
259
260
261
262
263
264
265



typedef std::pair<double, double> forbidden_interval_type;
typedef std::vector<forbidden_interval_type> forbidden_interval_vector_type;
typedef std::map<chromosome_value, forbidden_interval_vector_type> forbidden_interval_map_type;

inline bool operator < (const forbidden_interval_type& fi1, const forbidden_interval_type& fi2) { return fi1.first < fi2.first; }

struct search_interval_type;
Damien Leroux's avatar
Damien Leroux committed
266
struct search_lg_type;
267
268

struct test_result {
Damien Leroux's avatar
Damien Leroux committed
269
    search_lg_type* this_interval;
270
271
272
273
274
    const chromosome* chrom;
    double locus;
    double test_value;
    int index;
    bool over_threshold;
275
276
277
278
    model_block_key pop_block_key;
    value<model_block_type> pop_block;
    model_block_key dom_block_key;
    value<model_block_type> dom_block;
279
280

    test_result()
281
        : chrom(NULL), locus(0), test_value(0), index(0), over_threshold(false), pop_block_key(), pop_block(), dom_block_key(), dom_block()
282
283
    {}

Damien Leroux's avatar
Damien Leroux committed
284
    test_result(search_lg_type* ti, const chromosome* c, double l, double tv, int i, bool ot, const model_block_key& mbk, const value<model_block_type>& mb, const model_block_key& mbkd, const value<model_block_type>& mbd)
285
        : this_interval(ti), chrom(c), locus(l), test_value(tv), index(i), over_threshold(ot), pop_block_key(mbk), pop_block(mb), dom_block_key(mbkd), dom_block(mbd)
286
287
288
289
290
291
    {}

    test_result(const test_result& tr)
        : this_interval(tr.this_interval),
        chrom(tr.chrom), locus(tr.locus), test_value(tr.test_value),
        index(tr.index), over_threshold(tr.over_threshold),
292
293
        pop_block_key(tr.pop_block_key), pop_block(tr.pop_block),
        dom_block_key(tr.dom_block_key), dom_block(tr.dom_block)
294
295
296
297
298
299
300
301
302
303
304
    {}

    test_result&
        operator = (const test_result& tr)
        {
            this_interval = tr.this_interval;
            chrom = tr.chrom;
            locus = tr.locus;
            test_value = tr.test_value;
            index = tr.index;
            over_threshold = tr.over_threshold;
305
306
307
308
            pop_block_key = tr.pop_block_key;
            pop_block = tr.pop_block;
            dom_block_key = tr.dom_block_key;
            dom_block = tr.dom_block;
309
310
311
312
313
314
315
316
317
318
            return *this;
        }

    void reset()
    {
        chrom = NULL;
        locus = 0;
        test_value = 0;
        index = 0;
        over_threshold = false;
319
        /*block_key.selection.clear();*/
320
321
322
323
        pop_block_key.reset();
        pop_block = value<model_block_type>();
        dom_block_key.reset();
        dom_block = value<model_block_type>();
324
325
326
327
328
    }

    friend
        std::ostream& operator << (std::ostream& os, const test_result& tr)
        {
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
            os << "<result chrom=" << (tr.chrom ? tr.chrom->name : "nil");
            os << " locus=" << tr.locus;
            os << " test=" << tr.test_value;
            os << " at=" << tr.index;
            os << " over?=" << tr.over_threshold;
            if (tr.pop_block_key) {
                os << " block_key=" << tr.pop_block_key;
            } else {
                os << " block_key=none";
            }
            if (tr.dom_block_key) {
                os << " dominance_block_key=" << tr.dom_block_key;
            } else {
                os << " dominance_block_key=none";
            }
            return os << '>';
345
346
        }

347
348
    bool
        operator < (const test_result& other) const
349
        {
350
            return test_value < other.test_value;
351
        }
352
353
354

    void
        select(model_manager& mm) const;
355
356
357
};


358
359
360
361
362
363
locus_probabilities_type
locus_probabilities(const context_key& ck, const locus_key& lk,
                    /*const MatrixXd& mgo,*/
                    int ind,
                    const std::vector<double>& loci);

Damien Leroux's avatar
Damien Leroux committed
364
365

struct search_lg_type {
366
    const chromosome* chrom;
367
    /* all positions in this interval */
Damien Leroux's avatar
Damien Leroux committed
368
369
370
371
    std::vector<probability_mode> local_modes;
    std::vector<locus_key> local_selections;
    std::map<locus_key, std::vector<double>> loci_by_selection;
    std::vector<double> all_positions, active_loci;
372
373
374
375
376
377
378
    /* all USED positions in this interval */
    std::vector<double> positions;
    /* positions actually used in the segment test (all_positions \ selection \ forbidden_intervals) */
    std::vector<double> effective_positions;
    /* current selection (subset of base model's selection for this search interval) */
    locus_key selection;
    /* the model blocks along the chromosome currently under study */
Damien Leroux's avatar
Damien Leroux committed
379
    collection<model_block_type> locus_blocks, single_blocks;
380
    /* dominance matrices per locus per population */
381
    collection<parental_origin_per_locus_type> dominance_blocks;
382
383
    test_result local_max;

Damien Leroux's avatar
Damien Leroux committed
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
    search_lg_type(const chromosome* chr, const std::vector<double>& steps, locus_key sel)
            : chrom(chr)
            , local_modes()
            , local_selections()
            , loci_by_selection()
            , all_positions(steps)
            , active_loci()
            , positions()
            , effective_positions()
            , selection(sel)
            , locus_blocks()
            , dominance_blocks()
            , local_max()
    {
        recompute_modes();
    }
400

Damien Leroux's avatar
Damien Leroux committed
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
    search_lg_type(const search_lg_type& slg)
            : chrom(slg.chrom)
            , local_modes(slg.local_modes)
            , local_selections(slg.local_selections)
            , loci_by_selection()
            , all_positions(slg.all_positions)
            , active_loci()
            , positions(slg.positions)
            , effective_positions(slg.effective_positions)
            , selection(slg.selection)
            , locus_blocks(slg.locus_blocks)
            , dominance_blocks(slg.dominance_blocks)
            , local_max(slg.local_max)
    {
        recompute_modes();
    }
417

418
    void
Damien Leroux's avatar
Damien Leroux committed
419
420
421
422
    compute_blocks(const collection<population_value>& all_pops)
    {
        if (locus_blocks.size()) {
            return;
423
        }
Damien Leroux's avatar
Damien Leroux committed
424
425
426
427
428
429
        for (const auto& vpop: all_pops) {
            context_key ck(new context_key_struc(*vpop, chrom, all_positions));
            for (auto& x: make_collection<Disk>(locus_probabilities,
                                                as_value(ck), as_value(locus_key{}), range<int>(0, (*vpop)->size(), 1),
                                                as_value(ck->loci))) {
                (void)*x;
430
431
            }
        }
Damien Leroux's avatar
Damien Leroux committed
432
433
434
435
436
437
438
        locus_blocks
                = compute_parental_origins_multipop(
                all_pops,
                as_value(chrom),
                local_selections,
                all_positions,
                active_loci);
439
//        MSG_DEBUG("Computing dominance probabilities");
Damien Leroux's avatar
Damien Leroux committed
440
441
442
443
444
445
446
447
        dominance_blocks
                = compute_dominance_multipop(
                all_pops,
                as_value(chrom),
                local_selections,
                all_positions,
                active_loci);
    }
448
449
450


    void
Damien Leroux's avatar
Damien Leroux committed
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
    recompute_modes(double force_pos=-1)
    {
        local_modes.clear();
        local_modes.resize(all_positions.size(), Single);
        local_selections.clear();
        local_selections.resize(all_positions.size());
        active_loci.clear();
        active_loci.reserve(all_positions.size());
        for (size_t i = 0; i < all_positions.size(); ++i) {
            double locus = all_positions[i];
            if (!!selection) {
                for (double l: selection) {
                    if (locus != force_pos && locus >= l - active_settings->Rskip && locus <= l + active_settings->Rskip) {
                        local_modes[i] = Skip;
                        local_selections[i].reset();
                        break;
                    } else if (locus >= l - active_settings->Rjoin && locus <= l + active_settings->Rjoin) {
                        local_modes[i] = Joint;
                        local_selections[i] = local_selections[i] + l;
                    }
471
472
                }
            }
Damien Leroux's avatar
Damien Leroux committed
473
474
            if (local_modes[i] != Skip) {
                active_loci.push_back(locus);
475
            }
Damien Leroux's avatar
Damien Leroux committed
476
            loci_by_selection[local_selections[i]].push_back(locus);
477
        }
Damien Leroux's avatar
Damien Leroux committed
478
479
480
        locus_blocks.clear();
        dominance_blocks.clear();
    }
481

482

483
    void
Damien Leroux's avatar
Damien Leroux committed
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
    test(const collection<population_value>& all_pops, int i0, computation_along_chromosome& cac,
         value<ComputationType> vct, value<ComputationResults> vcr, size_t y_block_cols,
         const value<model>& Mcurrent, const value<model>& Mbase, double threshold)
    {
        if (active_loci.size()) {
            compute_blocks(all_pops);
            compute_along_interval<>(i0, cac, vct, vcr, y_block_cols, Mcurrent, Mbase, local_selections, chrom, active_loci, locus_blocks);
            local_max = find_max(i0, vct, cac, threshold);
        } else {
            local_max.reset();
        }
    }

    std::pair<value<model_block_type>, value<model_block_type>>
    compute_at(const collection<population_value>& all_pops, double position)
    {
        recompute_modes(position);
        compute_blocks(all_pops);
        auto it = std::find(active_loci.begin(), active_loci.end(), position);
        if (it != active_loci.end()) {
            auto ofs = it - active_loci.begin();
            return {locus_blocks[ofs], dominance_blocks[ofs]};
506
        }
Damien Leroux's avatar
Damien Leroux committed
507
508
        return {};
    }
Damien Leroux's avatar
Damien Leroux committed
509
    
510
    test_result
Damien Leroux's avatar
Damien Leroux committed
511
512
513
514
515
    find_max(int i0, value<ComputationType> vct, computation_along_chromosome &cac, double threshold)
    {
        MSG_DEBUG("call to find_max");
        auto ct = *vct;
        auto last_computation =
516
                (ct == ComputationType::FTest    ? cac.ftest_pvalue
Damien Leroux's avatar
Damien Leroux committed
517
518
519
520
521
522
523
524
525
526
527
528
529
                                                 :ct == ComputationType::FTestLOD ? cac.ftest_lod
                                                                                  :ct == ComputationType::Chi2     ? cac.chi2
                                                                                                                   :/* has to be Chi2LOD */           cac.chi2_lod).middleCols(i0, active_loci.size());
        if (active_loci.size() != (size_t)(last_computation.outerSize())) {
            MSG_ERROR("LOCI INCONSISTENT WITH COMPUTATION RESULT (" << active_loci.size() << " vs " << last_computation.outerSize() << ")", "");
            abort();
        }
        int i_max = -1;
        double max = -1;
        for (int i = 0; i < last_computation.outerSize(); ++i) {
            if (last_computation(0, i) >= max) {
                max = last_computation(0, i);
                i_max = i;
530
            }
Damien Leroux's avatar
Damien Leroux committed
531
        }
532
533
534

#ifdef SIGNAL_DISPLAY_ONELINER
        signal_display sd(last_computation.transpose(), i_max, max > threshold);
535
        MSG_DEBUG("[COMPUTATION] " << active_loci.front() << sd << active_loci.back() << " max=" << max << " at " << active_loci[i_max]);
536
#else
Damien Leroux's avatar
Damien Leroux committed
537
538
        signal_display sd(*chrom, active_loci, last_computation.transpose(), i_max, threshold);
        MSG_DEBUG("[COMPUTATION] " << active_loci.front() << " ... " << active_loci.back() << " max=" << max << " at " << active_loci[i_max] << std::endl << sd);
539
540
541
#endif

        if (i_max == -1) {
Damien Leroux's avatar
Damien Leroux committed
542
543
544
545
            return {};
        }
        /*model_block_key k = locus_base_key;*/
        model_block_key mbk = model_block_key_struc::pop(chrom, local_selections[i_max] + active_loci[i_max]);
546

Damien Leroux's avatar
Damien Leroux committed
547
        return {
548
                this, chrom,
Damien Leroux's avatar
Damien Leroux committed
549
                active_loci[i_max], max, i_max, max > threshold,
550
                mbk,
551
552
553
                locus_blocks[i_max],
                dominance_blocks[i_max]->cols() ? model_block_key_struc::dominance(mbk) : model_block_key{},
                dominance_blocks[i_max]
Damien Leroux's avatar
Damien Leroux committed
554
555
        };
    }
556

557
    void
Damien Leroux's avatar
Damien Leroux committed
558
559
560
561
562
563
564
565
566
567
568
    select(const test_result& tr, value<model> M)
    {
        MSG_DEBUG("select test result " << tr << " model " << M->keys());
        assert(chrom == tr.chrom);
        selection = selection + tr.pop_block_key->loci;
        recompute_modes();
        M->add_block(tr.pop_block_key, tr.pop_block);
        if (tr.dom_block_key) {
            model ref = *M;
            ref.compute();
            M->add_block_if_test_is_good(tr.dom_block_key, tr.dom_block, ref);
569
        }
Damien Leroux's avatar
Damien Leroux committed
570
    }
571
572

    void
Damien Leroux's avatar
Damien Leroux committed
573
574
    deselect(double position, const collection<population_value>& all_pops, value<model> M)
    {
575
        auto i = M->m_blocks.begin(), j = M->m_blocks.end();
576
577
        std::vector<decltype(M->m_blocks.begin())> to_remove;
        for (; i != j; ++i) {
578
            auto& key = i->first;
Damien Leroux's avatar
Damien Leroux committed
579
//             auto& value = i->second;
580
            if (key->has_locus(chrom, position)) {
581
582
583
584
585
586
587
588
                to_remove.emplace_back(i);
            }
        }

        for (auto it: to_remove) {
            auto& key = it->first;
            if (!key->remove_position(chrom, position)) {
                M->m_blocks.erase(it);
589
            } else {
590
                reduce(all_pops, position, it->second);
Damien Leroux's avatar
Damien Leroux committed
591
592
593
594
595
596
            }
        }
        selection = selection - position;
        recompute_modes();
        /*deselect(position);*/
    }
597
598

    std::pair<model_block_key, model_block_key>
Damien Leroux's avatar
Damien Leroux committed
599
600
601
602
603
604
605
606
607
608
609
610
611
    select(double pos)
    {
        MSG_DEBUG("select position " << pos);
        size_t i_select = std::find(all_positions.begin(), all_positions.end(), pos) - all_positions.begin();
        std::pair<model_block_key, model_block_key> ret;
        if (local_modes[i_select] == Joint) {
            ret.second = model_block_key_struc::pop(chrom, selection);
        }
        selection = selection + pos;
        if (local_modes[i_select] == Single) {
            ret.first = model_block_key_struc::pop(chrom, locus_key{} + pos);
        } else {
            ret.first = model_block_key_struc::pop(chrom, selection);
612
        }
Damien Leroux's avatar
Damien Leroux committed
613
614
615
616
        locus_blocks.clear();
        recompute_modes();
        return ret;
    }
617
618

    std::pair<model_block_key, model_block_key>
Damien Leroux's avatar
Damien Leroux committed
619
620
621
622
623
624
625
626
627
628
629
630
631
    deselect(double pos)
    {
        MSG_DEBUG("deselect position " << pos);
        size_t i_select = std::find(all_positions.begin(), all_positions.end(), pos) - all_positions.begin();
        std::pair<model_block_key, model_block_key> ret;
        if (local_modes[i_select] == Joint) {
            ret.second = model_block_key_struc::pop(chrom, selection);
        } else {
            ret.second = model_block_key_struc::pop(chrom, locus_key{} + pos);
        }
        selection = selection - pos;
        if (local_modes[i_select] == Joint && selection) {
            ret.first = model_block_key_struc::pop(chrom, selection);
632
        }
Damien Leroux's avatar
Damien Leroux committed
633
634
635
636
        locus_blocks.clear();
        recompute_modes();
        return ret;
    }
637
638

    void
Damien Leroux's avatar
Damien Leroux committed
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
    reduce(const collection<population_value>& all_pops, double position, value<model_block_type>& vblock)
    {
        /* FIXME what about dominance?? */
        locus_key lk2 = selection - position;
        auto pop_it = all_pops.begin();
        auto pb = disassemble_parental_origins_multipop(chrom, selection->parent, *vblock, all_pops);
        std::vector<collection<parental_origin_per_locus_type>> all_popl;
        all_popl.reserve(pb.size());
        for (auto& vmat: pb) {
            const qtl_pop_type* pop = **pop_it++;
            context_key ck(new context_key_struc(pop, chrom, std::vector<double>()));
            MatrixXd red = selection->reduce(active_settings->parent_count_per_pop_per_chr
                                                     .find(pop->qtl_generation_name)->second.find(chrom)->second,
                                             position);
            MatrixXd data = vmat->data * red;
            vmat->data = data;
            vmat->column_labels = get_stpom_data(ck, lk2->parent)->row_labels;
            all_popl.emplace_back();
            all_popl.back().push_back(vmat);
        }
        vblock = assemble_parental_origins_multipop(as_value(chrom),
                                                    as_value(lk2->parent),
                                                    all_popl,
                                                    all_pops,
                                                    true)[0];
    }

    void
    clear()
    {
        selection = locus_key{};
        recompute_modes();
    }

    friend
    std::ostream&
    operator << (std::ostream& os, const search_lg_type& sl)
    {
        os << '{' << sl.selection << ' ';
        for (size_t i = 0; i < sl.all_positions.size(); ++i) {
            double locus = sl.all_positions[i];
            probability_mode mode = sl.local_modes[i];
            bool selected = !!sl.selection && sl.selection->has(locus);
            if (i) {
                os << ' ';
            }
            if (selected) {
                os << '[';
            }
            os << locus << ':' << (mode == Skip ? "Skip" : mode == Joint ? "Joint" : "Single");
            if (selected) {
                os << ']';
691
692
            }
        }
Damien Leroux's avatar
Damien Leroux committed
693
694
        return os << '}';
    }
695
696
697
698
};


typedef std::map<chromosome_value, std::vector<search_interval_type>> search_interval_map_type;
Damien Leroux's avatar
Damien Leroux committed
699
typedef std::map<chromosome_value, search_lg_type> search_lg_map_type;
700
701
702


inline
Damien Leroux's avatar
Damien Leroux committed
703
search_lg_map_type
704
705
skeleton_search_intervals()
{
Damien Leroux's avatar
Damien Leroux committed
706
    search_lg_map_type ret;
707

708
709
    if (active_settings->skeleton_mode == "auto") {
        for (const chromosome& chr: active_settings->map) {
Damien Leroux's avatar
Damien Leroux committed
710
            std::vector<double> pos;
711
712
713
            double accept = -1.;
            for (double l: chr.condensed.marker_locus) {
                if (l > accept) {
Damien Leroux's avatar
Damien Leroux committed
714
                    pos.push_back(l);
715
716
717
                    accept = l + active_settings->skeleton_interval;
                }
            }
Damien Leroux's avatar
Damien Leroux committed
718
719
720
            if (pos.size() > 0) {
                ret.emplace(&chr, search_lg_type{&chr, pos, locus_key{}});
            }
721
722
        }
    } else if (active_settings->skeleton_mode == "manual") {
Damien Leroux's avatar
Damien Leroux committed
723
        std::map<chromosome_value, std::vector<double>> pos;
724
725
726
727
728
729
730
731
        for (const auto& name: active_settings->skeleton_markers) {
            bool found = false;
            for (const chromosome& chr: active_settings->map) {
                auto li = chr.condensed.marker_locus.begin();
                auto lj = chr.condensed.marker_locus.end();
                auto ni = chr.condensed.marker_name.begin();
                for (; li != lj; ++li, ++ni) {
                    if (*ni == name) {
Damien Leroux's avatar
Damien Leroux committed
732
                        pos[&chr].push_back(*li);
733
734
735
736
737
738
739
740
                        found = true;
                        break;
                    }
                }
                if (found) {
                    break;
                }
            }
741
        }
Damien Leroux's avatar
Damien Leroux committed
742
743
744
        for (const auto& kv: pos) {
            ret.emplace(kv.first, search_lg_type{kv.first, kv.second, {}});
        }
745
746
    }

747
748
749
750
    return ret;
}


Damien Leroux's avatar
Damien Leroux committed
751
752


753
inline
Damien Leroux's avatar
Damien Leroux committed
754
search_lg_map_type
755
full_search_intervals(const std::map<chromosome_value, locus_key>& selection)
756
{
Damien Leroux's avatar
Damien Leroux committed
757
    search_lg_map_type ret;
758
759
760
    /* TODO: add definition of intervals to command line: chromosome, mode, beginning, end */

    for (const chromosome& chr: active_settings->map) {
Damien Leroux's avatar
Damien Leroux committed
761
762
763
764
765
        MSG_DEBUG("full_search_intervals on " << chr.name);
        auto si = selection.find(&chr);
        locus_key sel;
        if (si != selection.end()) {
            sel = si->second;
766
        }
Damien Leroux's avatar
Damien Leroux committed
767
768
        ret.emplace(&chr, search_lg_type{&chr, active_settings->estimation_loci[&chr], sel});
        MSG_DEBUG("Current interval for chromosome " << chr.name << ": " << ret.find(&chr)->second);
769
770
771
772
    }

    return ret;
}
773
774


775
776
777
778
void
read_locus_list(std::string& s, settings_t* target);


779
struct model_manager {
780

781
    /* name of the studied single_trait */
782
783
784
785
786
    std::string trait_name;
    double threshold;
    /* populations used in this model */
    collection<population_value> all_pops;
    /* thy reference model */
787
    value<model> vMcurrent;
788
    size_t y_block_cols;
789
790
    /* thy maximum order for interaction blocks */
    size_t max_order;
791
792

    /* all steps, split by haplotypic intervals, inside which joint probabilities must be computed */
Damien Leroux's avatar
Damien Leroux committed
793
    search_lg_map_type search_intervals;
794
795
    /* the current selection split by haplotypic interval */
    std::map<chromosome_value, std::vector<locus_key>> selection;
796
    /* a collection of intervals to not search in, if the user wishes so */
797
798
    forbidden_interval_map_type forbidden_intervals;

799
800


801
802
803
    /* structure to cache the results of the last computation along the chromosome */
    std::map<chromosome_value, computation_along_chromosome> cac;

804
    std::map<chromosome_value, std::vector<double>> testpos;
805
806
807
808

    /* additiona, colpops.front()->ancestor_namesl data for output and postprocessing */
    std::map<std::string, std::map<double, std::pair<double, double>>> qtl_confidence_interval;

809
    std::map<chromosome_value, MatrixXd*> last_computation;
Damien Leroux's avatar
Damien Leroux committed
810
    std::map<chromosome_value, std::vector<double>> last_test_positions;
811
812
    std::map<chromosome_value, test_result> last_best;

813
814
815
816
    analysis_report* reporter;

/* Construction
 */
817
    model_manager(const std::string& trait, const collection<population_value>& colpops,
818
                   const value<MatrixXd>& y,
819
                   size_t maximum_order = 1,
820
                   /*ComputationType ct = ComputationType::FTest,*/
821
                   SolverType st=SolverType::QR)
822
823
        : trait_name(trait)
        , all_pops(colpops)
824
825
        , vMcurrent(model{y, active_settings->get_threshold(trait, y->cols()), colpops, (*colpops.front())->ancestor_names, maximum_order, st})
        , y_block_cols(vMcurrent->Y().cols())
826
        , max_order(maximum_order)
827
        , search_intervals()
828
829
830
831
832
        , cac()
        , testpos()
        , qtl_confidence_interval()
        , reporter(NULL)
    {
833
        vMcurrent->add_block(model_block_key_struc::cross_indicator(), cross_indicator(trait));
834
835
836
837
838
        auto covars = make_covariables_block(trait, active_settings->with_covariables);
        MSG_DEBUG("(model_manager) covariables " << active_settings->with_covariables << " covars.cols=" << covars->data.cols());
        if (covars->data.cols() > 0) {
            vMcurrent->add_block(model_block_key_struc::covariables(), covars);
        }
839
840
        vMcurrent->compute();
        /*MSG_DEBUG("INITIAL MODEL SIZE: " << MATRIX_SIZE(vMcurrent->X()));*/
841
        /*MSG_QUEUE_FLUSH();*/
842
        active_settings->cross_indicator_can_interact = colpops.size() > 1;
843
844
    }

845
846
847
848
849
850
    void
        compute_model()
        {
            vMcurrent->compute();
        }

851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
    void
        cleanup_intervals(forbidden_interval_vector_type& intervals)
        {
            std::sort(intervals.begin(), intervals.end());
            forbidden_interval_vector_type output;
            output.reserve(intervals.size());
            output.push_back(intervals.front());
            for (size_t i = 1; i < intervals.size(); ++i) {
                if (output.back().second > intervals[i].first) {
                    output.back().second = std::max(output.back().second, intervals[i].second);
                } else {
                    output.push_back(intervals[i]);
                }
            }
            intervals.swap(output);
        }

    void
        add_forbidden_interval(chromosome_value chr, double start, double end)
        {
            auto& i = forbidden_intervals[chr];
            i.emplace_back(start, end);
            cleanup_intervals(i);
        }

    /* FIXME this can't remove a sub-interval or split intervals. need previous/next position info for each locus to do so. */
    void remove_forbidden_interval(chromosome_value chr, double start, double end)
        {
            auto& i = forbidden_intervals[chr];
            std::pair<double, double> p {start, end};
            auto it = std::find(i.begin(), i.end(), p);
            if (it != i.end()) {
                i.erase(it);
            }
        }

887
    void
Damien Leroux's avatar
Damien Leroux committed
888
        select(search_lg_type& si, double pos)
889
        {
890
            auto blocks = si.compute_at(all_pops, pos);
891
            auto ins_rm_key = si.select(pos);
Damien Leroux's avatar
Damien Leroux committed
892
            si.compute_blocks(all_pops);
893
894
895
            if (ins_rm_key.second) {
                vMcurrent->remove_block(ins_rm_key.second);
            }
896
            vMcurrent->add_block_with_interactions(ins_rm_key.first, blocks.first);
897
            if (blocks.second) {
898
899
900
                model ref(*vMcurrent);
                ref.compute();
                vMcurrent->add_block_with_interactions(model_block_key_struc::dominance(ins_rm_key.first), blocks.second, &ref);
901
            }
Damien Leroux's avatar
Damien Leroux committed
902
            debug_selection(MESSAGE("select(" << si.chrom->name << '[' << si.active_loci.front() << '-' << si.active_loci.back() << "]:" << pos << ')'));
903
904
        }

905
906
907
    void
        select(chromosome_value chr, double pos)
        {
Damien Leroux's avatar
Damien Leroux committed
908
909
910
            auto it = search_intervals.find(chr);
            if (it != search_intervals.end()) {
                select(it->second, pos);
911
            }
912
            debug_selection(MESSAGE("select(" << chr->name << ':' << pos << ')'));
913
914
915
916
917
        }

    void
        deselect(chromosome_value chr, double pos)
        {
Damien Leroux's avatar
Damien Leroux committed
918
919
920
            auto it = search_intervals.find(chr);
            if (it != search_intervals.end()) {
                it->second.deselect(pos, all_pops, vMcurrent);
921
            }
Damien Leroux's avatar
Damien Leroux committed
922
            it->second.compute_blocks(all_pops);
923
            debug_selection(MESSAGE("deselect(" << chr->name << ':' << pos << ')'));
924
925
926
927
928
929
930
        }

    void
        clear_selection()
        {
            for (auto& chr_vsi: search_intervals) {
                vMcurrent->remove_blocks_with_chromosome(chr_vsi.first);
Damien Leroux's avatar
Damien Leroux committed
931
                chr_vsi.second.clear();
932
            }
933
            debug_selection("clear_selection");
934
935
        }

936
937
938
939
940
    void
    debug_selection(const std::string& pfx)
    {
        MSG_DEBUG("OPERATION " << pfx << " Current selection " << get_selection() << " current keys " << vMcurrent->keys());
    }
941
942
943
944

    std::vector<std::string>
        split(const std::string& str, const std::string& regex)
        {
damien's avatar
damien committed
945
946
            std::regex re(regex);
            return {std::sregex_token_iterator(str.begin(), str.end(), re, -1), std::sregex_token_iterator()};
947
948
949
950
951
952
953
954
955
956
957
958
959
        }

    void
        set_selection(const std::string& selstr)
        {
            for (const auto& qtl: split(selstr, "\\s+")) {
                auto chr_loc = split(qtl, ":");
                double d;
                std::stringstream(chr_loc[1]) >> d;
                select(active_settings->find_chromosome(chr_loc[0]), d);
            }
        }

960
961
962
963
964
965
966
967
968
    value<model_block_type>
        create_interaction_block(const value<model_block_type>& b1, const value<model_block_type>& b2)
        {
            value<model_block_type> new_block = model_block_type{};
            auto& labels = new_block->column_labels;
            auto& mat = new_block->data;
            labels.reserve(b1->column_labels.size() * b2->column_labels.size());
            for (const auto& v1: b1->column_labels) {
                for (const auto& v2: b2->column_labels) {
969
970
971
                    /*MSG_DEBUG("creating interaction label from [" << v1 << "] and [" << v2 << ']');*/
                    labels.emplace_back();
                    labels.back().reserve(3 + v1.size() + v2.size());
972
973
974
975
976
977
978
979
980
981
982
983
984
                    labels.back().push_back('(');
                    labels.back().insert(labels.back().end(), v1.begin(), v1.end());
                    labels.back().push_back('/');
                    labels.back().insert(labels.back().end(), v2.begin(), v2.end());
                    labels.back().push_back(')');
                }
            }
            mat.resize(b1->data.rows(), b1->data.cols() * b2->data.cols());
            for (int i = 0; i < mat.rows(); ++i) {
                mat.row(i) = kroneckerProduct(b1->data.row(i), b2->data.row(i));
            }
            return new_block;
        }
985

986
987
    void
        add_block_with_interactions(model_block_key mbk, const value<model_block_type>& block)
988
        {
989
            vMcurrent->add_block_with_interactions(mbk, block);
990
991
        }

992
993
994

    const std::map<chromosome_value, computation_along_chromosome>&
        custom_test(ComputationType ct, ComputationResults cr, value<model>& Mbase,
995
                    const forbidden_interval_map_type& forbidden, bool clear_chromosome=false, chromosome_value chr=NULL)
996
        {
997
            std::pair<model_block_collection, std::vector<locus_key>> chrom_sel_backup;
998
            value<ComputationType> vct = ct;
999

Damien Leroux's avatar
Damien Leroux committed
1000
//             if (reporter && reporter->output_rss) { cr = cr | RSS; }