output.h 42.7 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
/* Spell-QTL  Software suite for the QTL analysis of modern datasets.
 * Copyright (C) 2016,2017  Damien Leroux <damien.leroux@inra.fr>, Sylvain Jasson <sylvain.jasson@inra.fr>
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */

18
19
20
21
22
#ifndef _SPELL_BAYES_OUTPUT_H_
#define _SPELL_BAYES_OUTPUT_H_

#include <map>
#include <vector>
Damien Leroux's avatar
Damien Leroux committed
23
24
25
#include <map>
#include <set>
#include <unordered_set>
26
#include <unordered_map>
27
28
#include <string>
#include <iostream>
29
#include "file.h"
30
31
32
33
#include <cstring>

#include "eigen.h"
#include "error.h"
34
#include "data/chromosome.h"
35
/*#include "generation_rs_fwd.h"*/
36
#include "input/read_trait.h"
37
#include "tensor.h"
38
#include "linear_combination.h"
39
40
41
42

/** FOURCC **/

inline
43
bool check_fourcc(ifile& ifs, const char* fourcc)
44
45
46
47
48
49
50
{
    char buf[4] = {0, 0, 0, 0};
    ifs.read(buf, 4);
    return strncmp(fourcc, buf, 4);
}

inline
51
void write_fourcc(ofile& ofs, const char* fourcc)
52
53
54
55
56
57
58
{
    ofs.write(fourcc, 4);
}

/** SIZE_T **/

inline
59
void write_size(ofile& ofs, size_t sz)
60
61
62
63
64
{
    ofs.write((const char*) &sz, sizeof sz);
}

inline
65
size_t read_size(ifile& ifs)
66
67
68
69
70
71
{
    size_t ret;
    ifs.read((char*) &ret, sizeof ret);
    return ret;
}

72
73
74
/** PTRDIFF_T **/

inline
75
void write_ptrdiff(ofile& ofs, ptrdiff_t sz)
76
77
78
79
80
{
    ofs.write((const char*) &sz, sizeof sz);
}

inline
81
ptrdiff_t read_ptrdiff(ifile& ifs)
82
83
84
85
86
87
{
    ptrdiff_t ret;
    ifs.read((char*) &ret, sizeof ret);
    return ret;
}

88
89
90
/** STRING **/

inline
91
std::string read_str(ifile& ifs)
92
93
94
95
96
97
98
99
{
    size_t sz = read_size(ifs);
    std::vector<char> tmp(sz);
    ifs.read(&tmp[0], sz);
    return {tmp.begin(), tmp.end()};
}

inline
100
void write_str(ofile& ofs, const std::string& s)
101
102
103
104
105
106
107
108
{
    write_size(ofs, s.size());
    ofs.write(s.c_str(), s.size());
}

/** DOUBLE **/

inline
109
void write_double(ofile& ofs, double sz)
110
111
112
113
114
{
    ofs.write((const char*) &sz, sizeof sz);
}

inline
115
double read_double(ifile& ifs)
116
117
118
119
120
121
122
123
124
{
    double ret;
    ifs.read((char*) &ret, sizeof ret);
    return ret;
}

/** INT **/

inline
125
void write_int(ofile& ofs, int sz)
126
127
128
129
130
{
    ofs.write((const char*) &sz, sizeof sz);
}

inline
131
int read_int(ifile& ifs)
132
133
134
135
136
137
{
    int ret;
    ifs.read((char*) &ret, sizeof ret);
    return ret;
}

138
139
140
/** CHAR **/

inline
141
void write_char(ofile& ofs, char sz)
142
143
144
145
146
{
    ofs.write((const char*) &sz, sizeof sz);
}

inline
147
char read_char(ifile& ifs)
148
149
150
151
152
153
154
{
    char ret;
    ifs.read(&ret, sizeof ret);
    return ret;
}

#if 0
155
156
157
/** FAST_POLYNOM **/

inline
158
void write_fast_polynom(ofile& ofs, const fast_polynom& fp)
159
160
161
162
163
164
165
166
167
168
169
170
{
    impl::f_polynom f = fp;
    write_int(ofs, fp.value);
    write_int(ofs, f.r_exp);
    write_int(ofs, f.s_exp);
    write_size(ofs, f.P.size());
    for (double c: f.P) {
        write_double(ofs, c);
    }
}

inline
171
fast_polynom read_fast_polynom(ifile& ifs, int& original_key)
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
{
    original_key = read_int(ifs);
    impl::f_polynom ret = fast_polynom::zero;
    ret.r_exp = read_int(ifs);
    ret.s_exp = read_int(ifs);
    size_t sz = read_size(ifs);
    ret.P.resize(sz);
    for (size_t i = 0; i < sz; ++i) {
        ret.P[i] = read_double(ifs);
    }
    return ret;
}

/** ALGREBRAIC GENOTYPE **/

187
inline void write_algebraic_genotype(ofile& ofs, const algebraic_genotype& ag)
188
189
190
191
192
193
{
    ofs.write((const char*) &ag.genotype, sizeof ag.genotype);
    ofs.write((const char*) &ag.type, sizeof ag.type);
    write_int(ofs, ag.poly.value);
}

194
inline algebraic_genotype read_algebraic_genotype(ifile& ifs, const std::map<int, fast_polynom>& pt)
195
196
197
198
199
200
201
202
203
204
{
    algebraic_genotype ag;
    ifs.read((char*) &ag.genotype, sizeof ag.genotype);
    ifs.read((char*) &ag.type, sizeof ag.type);
    ag.poly = pt.find(read_int(ifs))->second;
    return ag;
}

/** GENOMATRIX **/

205
inline void write_genomatrix(ofile& ofs, const GenoMatrix& mat)
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
{
    write_fourcc(ofs, "SGEM");
    /*std::map<decltype(fast_polynom::value), impl::f_polynom>*/
    std::set<fast_polynom> poly_table;
    for (int j = 0; j < mat.cols(); ++j) {
        for (int i = 0; i < mat.rows(); ++i) {
            poly_table.insert(mat(i, j).poly);
        }
    }
    write_size(ofs, poly_table.size());
    for (const auto& fp: poly_table) {
        write_fast_polynom(ofs, fp);
    }
    write_int(ofs, mat.cols());
    write_int(ofs, mat.rows());
    /*MSG_DEBUG("[write_genomatrix] cols=" << mat.cols() << " rows=" << mat.rows());*/
    for (int j = 0; j < mat.cols(); ++j) {
        for (int i = 0; i < mat.rows(); ++i) {
            /*write_int(ofs, mat(i, j).value);*/
            write_algebraic_genotype(ofs, mat(i, j));
        }
    }
}

inline
231
void read_genomatrix(ifile& ifs, GenoMatrix& mat)
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
{
    if (check_fourcc(ifs, "SGEM")) {
        MSG_ERROR("File is not valid or has been corrupted", "");
        return;
    }
    std::map<int, fast_polynom> poly_map;

    size_t table_size = read_size(ifs);
    int key;

    for (size_t i = 0; i < table_size; ++i) {
        auto f = read_fast_polynom(ifs, key);
        poly_map[key] = f;
    }

    int cols = read_int(ifs);
    int rows = read_int(ifs);
    /*MSG_DEBUG("[read_genomatrix] cols=" << cols << " rows=" << rows);*/
    mat.resize(rows, cols);
    for (int j = 0; j < mat.cols(); ++j) {
        for (int i = 0; i < mat.rows(); ++i) {
            mat(i, j) = read_algebraic_genotype(ifs, poly_map);
        }
    }
}

/** GENERATION_RS **/

260
inline void write_geno_matrix(ofile& ofs, const geno_matrix* gen)
261
262
263
264
265
266
267
268
269
270
271
{
    write_fourcc(ofs, "SGRS");
    write_str(ofs, gen->name);
    write_size(ofs, gen->P.size());
    for (const auto& p: gen->P) {
        write_double(ofs, p.weight);
        write_genomatrix(ofs, p.G.data);
    }
}

inline
272
geno_matrix* read_geno_matrix(ifile& ifs)
273
274
275
276
277
278
279
{
    if (check_fourcc(ifs, "SGRS")) {
        MSG_ERROR("File is not valid or has been corrupted", "");
    }
    /*MSG_DEBUG("pouet 1"); MSG_QUEUE_FLUSH();*/
    std::string name = read_str(ifs);
    /*MSG_DEBUG("pouet 2"); MSG_QUEUE_FLUSH();*/
280
    geno_matrix* ret = geno_matrix::blank(name);
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
    /*MSG_DEBUG("pouet 3"); MSG_QUEUE_FLUSH();*/
    size_t n_p = read_size(ifs);
    /*MSG_DEBUG("Have " << n_p << " processes"); MSG_QUEUE_FLUSH();*/
    ret->P.resize(n_p);
    for (size_t i = 0; i < n_p; ++i) {
        ret->P[i].weight = read_double(ifs);
        GenoMatrix tmp;
        read_genomatrix(ifs, tmp);
        ret->P[i].G = convert(tmp);
        /*MSG_DEBUG("Read process");*/
        /*MSG_DEBUG(ret->P[i]);*/
    }
    ret->precompute();
    return ret;
}
296
297
#endif

298
299
300
301
302
303
304
305
306

/** MATRIX<SCALAR, R, C> **/

template <typename MATRIX_TYPE>
struct resize_matrix_impl;

template <> struct resize_matrix_impl<VectorXd> { void operator () (VectorXd& v, size_t rows, size_t) { v.resize(rows); } };
template <> struct resize_matrix_impl<MatrixXd> { void operator () (MatrixXd& m, size_t rows, size_t cols) { m.resize(rows, cols); } };

307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
template <typename SCALAR, int ROW, int COL, int C, int D, int E>
struct resize_matrix_impl<Eigen::Matrix<SCALAR, ROW, COL, C, D, E>> {
    void operator () (Eigen::Matrix<SCALAR, ROW, COL, C, D, E>& m, size_t r, size_t c) { m.resize(r, c); }
};

template <typename SCALAR, int ROW, int C, int D, int E>
struct resize_matrix_impl<Eigen::Matrix<SCALAR, ROW, 1, C, D, E>> {
    void operator () (Eigen::Matrix<SCALAR, ROW, 1, C, D, E>& v, size_t r, size_t) { v.resize(r); }
};

template <typename SCALAR, int COL, int C, int D, int E>
struct resize_matrix_impl<Eigen::Matrix<SCALAR, 1, COL, C, D, E>> {
    void operator () (Eigen::Matrix<SCALAR, 1, COL, C, D, E>& v, size_t, size_t c) { v.resize(c); }
};

322
323
324
325
template <typename MATRIX_TYPE>
void resize_matrix(MATRIX_TYPE& m, size_t r, size_t c) { resize_matrix_impl<MATRIX_TYPE>()(m, r, c); }

template <typename SCALAR, int ROW, int COL, int C, int D, int E>
326
void read_matrix(ifile& ifs, Eigen::Matrix<SCALAR, ROW, COL, C, D, E>& mat)
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
{
    size_t scalar_sz = read_size(ifs);
    if (scalar_sz != sizeof(SCALAR)) {
        MSG_ERROR("WRONG SIZE OF SCALAR, CAN'T READ FILE", "Make sure spell-marker and spell-qtl are always executed on machines with same word size.");
    }
    size_t n_row = read_size(ifs);
    size_t n_col = read_size(ifs);
    if (ROW != Eigen::Dynamic && ((int) n_row) != ROW) {
        MSG_ERROR("WRONG ROW COUNT. FILE IS NOT A LOCUS VECTOR FILE OR IS CORRUPTED", "You may want to run spell-marker again");
    }
    if (COL != Eigen::Dynamic && ((int) n_col) != COL) {
        MSG_ERROR("WRONG COLUMN COUNT. FILE IS NOT A LOCUS VECTOR FILE OR IS CORRUPTED", "You may want to run spell-marker again");
    }
    resize_matrix(mat, n_row, n_col);
    ifs.read((char*) mat.data(), n_row * n_col * sizeof(SCALAR));
}

template <typename SCALAR, int ROW, int COL, int C, int D, int E>
345
void write_matrix(ofile& ofs, const Eigen::Matrix<SCALAR, ROW, COL, C, D, E>& mat)
346
347
348
349
350
351
352
353
{
    write_size(ofs, sizeof(SCALAR));
    write_size(ofs, mat.rows());
    write_size(ofs, mat.cols());
    ofs.write((const char*) mat.data(), mat.size() * sizeof(SCALAR));
}


354
template <typename V, typename READ_ELEM_FUNC>
355
void read_vector(ifile& ifs, std::vector<V>& vec, READ_ELEM_FUNC read_elem)
356
357
358
359
360
361
362
363
364
365
366
{
    size_t sz = read_size(ifs);
    vec.clear();
    vec.reserve(sz);
    for (size_t i = 0; i < sz; ++i) {
        vec.emplace_back(read_elem(ifs));
    }
}


template <typename V, typename WRITE_ELEM_FUNC>
367
void write_vector(ofile& ofs, const std::vector<V>& vec, WRITE_ELEM_FUNC write_elem)
368
369
370
371
372
373
374
375
376
377
{
    write_size(ofs, vec.size());
    for (const auto& e: vec) {
        write_elem(ofs, e);
    }
}


/** LABEL_TYPE **/

378
inline
379
label_type read_label(ifile& ifs)
380
381
382
383
384
385
{
    char f, s;
    ifs >> f >> s;
    return {f, s};
}

386
inline
387
void write_label(ofile& ofs, const label_type& l)
388
389
390
391
392
393
394
{
    ofs << l.first() << l.second();
}


/** GENO_MATRIX **/

395
inline
396
void write_geno_matrix(ofile& ofs, const geno_matrix& mat)
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
{
    write_fourcc(ofs, "SGEM");
    write_str(ofs, mat.name);
    /* skip variant, it's deprecated. Or should be. */
    write_vector(ofs, mat.labels, write_label);
    write_matrix(ofs, mat.inf_mat);
    write_matrix(ofs, mat.p);
    write_matrix(ofs, mat.p_inv);
    write_matrix(ofs, mat.diag);
    write_matrix(ofs, mat.stat_dist);
    write_matrix(ofs, mat.collect);
    write_matrix(ofs, mat.dispatch);
    /* also skip symmetries for now. */
}

inline
413
void read_geno_matrix(ifile& ifs, geno_matrix& mat)
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
{
    if (check_fourcc(ifs, "SGEM")) {
        MSG_ERROR("File is not valid or has been corrupted", "");
        return;
    }
    mat.name = read_str(ifs);
    /* skip variant. */
    read_vector(ifs, mat.labels, read_label);
    read_matrix(ifs, mat.inf_mat);
    read_matrix(ifs, mat.p);
    read_matrix(ifs, mat.p_inv);
    read_matrix(ifs, mat.diag);
    read_matrix(ifs, mat.stat_dist);
    read_matrix(ifs, mat.collect);
    read_matrix(ifs, mat.dispatch);
    /* also skip symmetries for now. */
}


433
inline
434
void write_geno_matrix(ofile& ofs, const geno_matrix* ptr)
435
436
437
438
{
    write_geno_matrix(ofs, *ptr);
}

439
inline
440
geno_matrix* read_geno_matrix(ifile& ifs)
441
442
443
444
445
446
447
{
    geno_matrix* ret = new geno_matrix();
    read_geno_matrix(ifs, *ret);
    return ret;
}


448
449
450

template <class DERIVED>
struct rw_any {
451
    bool fourcc(ifile& ifs, const char* cc)
452
453
454
455
456
457
458
459
    {
        if (check_fourcc(ifs, cc)) {
            MSG_ERROR("File is not valid or has been corrupted (expected " << cc << ')', "");
            return true;
        }
        return false;
    }

460
    bool fourcc(ofile& ofs, const char* cc)
461
462
463
464
465
466
467
468
469
    {
        write_fourcc(ofs, cc);
        return false;
    }

    virtual ~rw_any() {}

    DERIVED& ref() { return *dynamic_cast<DERIVED*>(this); }

470
471
    void operator () (ifile& ifs, std::string& s) { s = read_str(ifs); }
    void operator () (ofile& ofs, const std::string& s) { write_str(ofs, s); }
472

473
474
    void operator () (ifile& ifs, char& i) { i = read_char(ifs); }
    void operator () (ofile& ofs, char i) { write_char(ofs, i); }
475

476
477
    void operator () (ifile& ifs, int& i) { i = read_int(ifs); }
    void operator () (ofile& ofs, int i) { write_int(ofs, i); }
478

479
480
    void operator () (ifile& ifs, bool& i) { i = !!read_char(ifs); }
    void operator () (ofile& ofs, bool i) { write_char(ofs, i); }
481

482
483
    void operator () (ifile& ifs, double& i) { i = read_double(ifs); }
    void operator () (ofile& ofs, double i) { write_double(ofs, i); }
484

485
486
    void operator () (ifile& ifs, size_t& i) { i = read_size(ifs); }
    void operator () (ofile& ofs, size_t i) { write_size(ofs, i); }
487

488
489
    void operator () (ifile& ifs, ptrdiff_t& i) { i = read_ptrdiff(ifs); }
    void operator () (ofile& ofs, ptrdiff_t i) { write_ptrdiff(ofs, i); }
490

491
    void operator () (ifile& ifs, label_type& l) { l = {read_char(ifs), read_char(ifs)}; }
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
    void operator () (ofile& ofs, label_type& l) { write_char(ofs, l.first()); write_char(ofs, l.second()); }

    template <typename K>
        auto operator () (ifile& fs, K& has_file_io)
        -> decltype(has_file_io.file_io(fs), void())
        {
            has_file_io.file_io(fs);
        }

    template <typename K>
        auto operator () (ofile& fs, K& has_file_io)
        -> decltype(has_file_io.file_io(fs), void())
        {
            has_file_io.file_io(fs);
        }
507
508

    template <typename V>
509
        void operator () (ifile& ifs, std::set<V>& vec)
510
511
512
513
514
515
516
517
518
519
520
521
        {
            if (fourcc(ifs, "OSET")) { return; }
            size_t sz = read_size(ifs);
            vec.clear();
            for (size_t i = 0; i < sz; ++i) {
                V tmp;
                ref() (ifs, tmp);
                vec.emplace(tmp);
            }
        }

    template <typename V>
522
        void operator () (ofile& ofs, std::set<V>& vec)
523
524
525
        {
            if (fourcc(ofs, "OSET")) { return; }
            write_size(ofs, vec.size());
526
            for (auto& e: vec) {
527
528
529
530
531
532
                ref() (ofs, e);
            }
        }


    template <typename V>
533
        void operator () (ifile& ifs, std::unordered_set<V>& vec)
534
535
536
537
538
539
540
541
542
543
544
545
        {
            if (fourcc(ifs, "USET")) { return; }
            size_t sz = read_size(ifs);
            vec.clear();
            for (size_t i = 0; i < sz; ++i) {
                V tmp;
                ref() (ifs, tmp);
                vec.emplace(tmp);
            }
        }

    template <typename V>
546
        void operator () (ofile& ofs, std::unordered_set<V>& vec)
547
548
549
        {
            if (fourcc(ofs, "USET")) { return; }
            write_size(ofs, vec.size());
550
            for (auto& e: vec) {
551
552
553
554
                ref() (ofs, e);
            }
        }

555
    void operator () (ifile& ifs, std::vector<bool>::reference i) { i = !!read_char(ifs); }
556
557

    template <typename V, typename A>
558
        void operator () (ifile& ifs, std::vector<V, A>& vec)
559
560
561
562
563
564
565
566
567
568
569
570
        {
            if (fourcc(ifs, "VECT")) { return; }
            size_t sz = read_size(ifs);
            vec.clear();
            vec.reserve(sz);
            for (size_t i = 0; i < sz; ++i) {
                vec.emplace_back();
                ref() (ifs, vec.back());
            }
        }

    template <typename V, typename A>
571
572
573
574
575
576
577
578
579
580
        void operator () (ofile& ofs, std::vector<V, A>& vec)
        {
            if (fourcc(ofs, "VECT")) { return; }
            write_size(ofs, vec.size());
            for (auto& e: vec) {
                ref() (ofs, e);
            }
        }

        void operator () (ofile& ofs, std::vector<bool>& vec)
581
582
583
584
585
586
587
588
589
        {
            if (fourcc(ofs, "VECT")) { return; }
            write_size(ofs, vec.size());
            for (const auto& e: vec) {
                ref() (ofs, e);
            }
        }

    template <typename V, typename A, typename RWElem>
590
        void operator () (ifile& ifs, std::vector<V, A>& vec, RWElem rw_elem)
591
592
593
594
595
596
597
598
599
600
601
602
        {
            if (fourcc(ifs, "VECT")) { return; }
            size_t sz = read_size(ifs);
            vec.clear();
            vec.reserve(sz);
            for (size_t i = 0; i < sz; ++i) {
                vec.emplace_back();
                rw_elem(ifs, vec.back());
            }
        }

    template <typename V, typename A, typename RWElem>
603
        void operator () (ofile& ofs, std::vector<V, A>& vec, RWElem rw_elem)
604
605
606
607
608
609
610
611
612
        {
            if (fourcc(ofs, "VECT")) { return; }
            write_size(ofs, vec.size());
            for (auto& e: vec) {
                rw_elem(ofs, e);
            }
        }

    template <typename K, typename V, typename A, typename C>
613
        void operator () (ifile& ifs, std::map<K, V, A, C>& map)
614
615
616
617
618
619
620
621
622
623
624
625
626
627
        {
            if (fourcc(ifs, "MAP ")) { return; }
            size_t count = read_size(ifs);
            map.clear();
            for (size_t i = 0; i < count; ++i) {
                K key;
                V value;
                ref() (ifs, key);
                ref() (ifs, value);
                map.emplace(std::move(key), std::move(value));
            }
        }

    template <typename K, typename V, typename A, typename C>
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
        void operator () (ifile& ifs, std::unordered_map<K, V, A, C>& map)
        {
            if (fourcc(ifs, "MAP ")) { return; }
            size_t count = read_size(ifs);
            map.clear();
            for (size_t i = 0; i < count; ++i) {
                K key;
                V value;
                ref() (ifs, key);
                ref() (ifs, value);
                map.emplace(std::move(key), std::move(value));
            }
        }

    template <typename K, typename V, typename A, typename C>
        void operator () (ofile& ofs, std::map<K, V, A, C>& map)
644
645
646
        {
            if (fourcc(ofs, "MAP ")) { return; }
            write_size(ofs, map.size());
647
648
649
650
651
652
653
654
655
656
657
658
            for (auto& kv: map) {
                ref() (ofs, const_cast<K&>(kv.first));
                ref() (ofs, kv.second);
            }
        }

    template <typename K, typename V, typename A, typename C>
        void operator () (ofile& ofs, std::unordered_map<K, V, A, C>& map)
        {
            if (fourcc(ofs, "MAP ")) { return; }
            write_size(ofs, map.size());
            for (auto& kv: map) {
659
660
661
662
663
                ref() (ofs, kv.first);
                ref() (ofs, kv.second);
            }
        }

664
665
666
667
668
669
670
    template <typename IO, typename A, typename B>
        void operator () (IO& fs, std::pair<A, B>& p)
        {
            ref() (fs, p.first);
            ref() (fs, p.second);
        }

671
    template <typename SCALAR, int ROW, int COL, int C, int D, int E>
672
        void operator () (ifile& ifs, Eigen::Matrix<SCALAR, ROW, COL, C, D, E>& mat) { read_matrix(ifs, mat); }
673
674

    template <typename SCALAR, int ROW, int COL, int C, int D, int E>
675
        void operator () (ofile& ofs, Eigen::Matrix<SCALAR, ROW, COL, C, D, E>& mat) { write_matrix(ofs, mat); }
676

677
678
    void operator () (ifile& ifs, geno_matrix& mat) { read_geno_matrix(ifs, mat); }
    void operator () (ofile& ofs, geno_matrix& mat) { write_geno_matrix(ofs, mat); }
679

680
    void operator () (ifile& ifs, std::shared_ptr<geno_matrix>& ptr)
681
682
683
684
685
686
687
688
689
    {
        ptr.reset();
        ptr = std::make_shared<geno_matrix>();
        ref() (ifs, *ptr);
        if (!ptr->size()) {
            ptr.reset();
        }
    }

690
    void operator () (ofile& ofs, std::shared_ptr<geno_matrix>& ptr)
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
    {
        if (ptr) {
            ref() (ofs, *ptr.get());
        } else {
            geno_matrix _;
            ref() (ofs, _);
        }
    }
};


struct rw_base : public rw_any<rw_base> {
    virtual ~rw_base() {}
    using rw_any<rw_base>::fourcc;
    using rw_any<rw_base>::ref;
    using rw_any<rw_base>::operator ();
};



711
712
713
714
/** **/


struct LV_database {
715
716
717
    /* GEN / MARK / IND => LV_vec */
    std::map<std::string, std::map<std::string, std::vector<VectorXd>>> data_by_marker;
    /* CHROM / GEN / IND => LV_mat */
718
719
    std::map<std::string, std::map<std::string, std::vector<MatrixXd>>> data;

720
721
722
723
724
725
726
727
728
729
730
    LV_database() : data_by_marker(), data() {}

    void assemble_using_map(const std::vector<chromosome>& map)
    {
        data.clear();
        for (const auto& chr: map) {
            for (const auto& gen_dat: data_by_marker) {
                const auto& gen = gen_dat.first;
                const auto& mark_ind_lv = gen_dat.second;
                size_t n_states = mark_ind_lv.begin()->second.front().size();
                size_t n_mark = chr.raw.marker_name.size();
731
                size_t n_condensed_mark = chr.condensed.marker_name.size();
732
733
734
                size_t n_ind = mark_ind_lv.begin()->second.size();
                auto& ind_LVmat = data[chr.name][gen];
                ind_LVmat.resize(n_ind, {n_states, n_mark});
735
736
                /*MSG_DEBUG("[assemble.using.map] #haplo_size=" << chr.haplo_sizes.size());*/
                /*MSG_QUEUE_FLUSH();*/
Damien Leroux's avatar
Damien Leroux committed
737
738
                for (size_t ind = 0; ind < n_ind; ++ind) {
                    auto& mat = ind_LVmat[ind];
739
                    mat = MatrixXd::Ones(n_states, n_condensed_mark);
Damien Leroux's avatar
Damien Leroux committed
740
                    size_t m = 0;
741
742
                    int haplo_counter = 0;
                    auto haplo_size_i = chr.haplo_sizes.begin();
Damien Leroux's avatar
Damien Leroux committed
743
744
                    for (const auto& mark: chr.raw.marker_name) {
                        const auto& svec = mark_ind_lv.find(mark)->second;
745
746
747
748
749
750
751
752
753
754
755
756
757
758
                        /*MSG_DEBUG("[assemble.using.map] haplo_size=" << (*haplo_size_i) << " column=" << m);*/
                        /*MSG_QUEUE_FLUSH();*/
                        mat.col(m).array() *= svec[ind].array();
                        ++haplo_counter;
                        if (haplo_counter == *haplo_size_i) {
                            ++m;
                            ++haplo_size_i;
                            haplo_counter = 0;
                        }
                    }
                    auto hi = chr.begin();
                    for (int c = 0; c < mat.cols(); ++c, ++hi) {
                        double sum = mat.col(c).sum();
                        if (sum == 0) {
759
760
761
762
763
764
765
                            /*std::stringstream ss;*/
                            /*ss << "Detected inconsistent observations! Generation is " << gen << ", chromosome " << chr.name << ", individual #" << (ind + 1) << ", involving markers";*/
                            /*auto haplo = *hi;*/
                            /*for (size_t i = haplo.first; i < haplo.second; ++i) {*/
                                /*ss << ' ' << chr.raw.marker_name[i];*/
                            /*}*/
                            /*MSG_WARNING(ss.str());*/
766
767
768
                        } else {
                            mat.col(c) /= sum;
                        }
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
                    }
                }
            }
        }
    }

    MatrixXd operator () (
            std::vector<std::string>::const_iterator begin,
            std::vector<std::string>::const_iterator end,
            const std::string& gen, size_t ind) const
    {
        const auto& milv = data_by_marker.find(gen)->second;
        size_t n_states = milv.find(*begin)->second.front().size();
        MatrixXd LV(n_states, end - begin);
        for (size_t i = 0; i < n_states; ++i) {
            LV.col(i) = milv.find(*begin)->second[ind];
        }
        return LV;
    }

789
790
791
792
793
794
795
796
797
798
    MatrixXd& operator () (const std::string& chr, const std::string& gen, size_t ind)
    {
        return data[chr][gen][ind];
    }

    const MatrixXd& operator () (const std::string& chr, const std::string& gen, size_t ind) const
    {
        return data.find(chr)->second.find(gen)->second[ind];
    }

799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
    std::map<std::string, std::vector<MatrixXd>>
        extract(const std::string& gen, const std::vector<size_t> ind_vec) const
        {
            std::map<std::string, std::vector<MatrixXd>> ret;
            for (const auto& chr_gen_lv_vec: data) {
                const std::string& chr = chr_gen_lv_vec.first;
                const auto& lv_vec = chr_gen_lv_vec.second.find(gen)->second;
                auto& ret_lv_vec = ret[chr];
                ret_lv_vec.reserve(ind_vec.size());
                for (size_t i: ind_vec) {
                    ret_lv_vec.push_back(lv_vec[i]);
                }
            }
            return ret;
        }

815
816
817
818
819
820
821
822
823
824
825
826
827
828
    template <typename STREAM_TYPE>
        void
        file_io(STREAM_TYPE& fs)
        {
            rw_base rw;
            if (rw.fourcc(fs, "SMLV")) { return; }
            rw(fs, data_by_marker);
            rw(fs, data);
        }

    static
        LV_database
        load_from(const std::string& filename)
        {
829
            ifile ifs(filename);
830
831
832
833
834
835
836
837
            LV_database ret;
            ret.file_io(ifs);
            return ret;
        }

    void
        save_to(const std::string& filename)
        {
838
            ofile ofs(filename);
839
840
841
842
            file_io(ofs);
        }

    /*
843
    static
844
        bool lv_check_fourcc(ifile& ifs, const char* fourcc)
845
846
847
848
849
850
851
852
853
854
855
        {
            if (check_fourcc(ifs, fourcc)) {
                MSG_ERROR("FILE IS NOT A LOCUS VECTOR FILE OR IS CORRUPTED", "You may want to run spell-marker again.");
                return true;
            }
            return false;
        }

    static
        LV_database load_from(const std::string& filename)
        {
856
            ifile ifs(filename);
857
858
859
860
            return load_from(ifs);
        }

    static
861
        LV_database load_from(ifile& ifs)
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
        {
            LV_database LV;

            if (lv_check_fourcc(ifs, "SMLV")) { return {}; }
            size_t n_chrom = read_size(ifs);
            size_t n_gen = read_size(ifs);
            for (size_t c = 0; c < n_chrom; ++c) {
                if (lv_check_fourcc(ifs, "SCHR")) { return {}; }
                std::string chr_name = read_str(ifs);
                for (size_t g = 0; g < n_gen; ++g) {
                    if (lv_check_fourcc(ifs, "SGEN")) { return {}; }
                    std::string gen_name = read_str(ifs);
                    size_t n_ind = read_size(ifs);
                    LV.data[chr_name][gen_name].resize(n_ind);
                    for (size_t i = 0; i < n_ind; ++i) {
                        if (lv_check_fourcc(ifs, "SLV_")) { return {}; }
                        read_matrix(ifs, LV.data[chr_name][gen_name][i]);
                    }
                }
            }
            return LV;
        }

    void save_to(const std::string& filename)
    {
887
        ofile ofs(filename);
888
889
890
        save_to(ofs);
    }

891
    void save_to(ofile& ofs)
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
    {
        write_fourcc(ofs, "SMLV");
        write_size(ofs, data.size());
        write_size(ofs, data.begin()->second.size());
        for (const auto& chr_gen_vec_lv: data) {
            write_fourcc(ofs, "SCHR");
            write_str(ofs, chr_gen_vec_lv.first);
            for (const auto& gen_vec_lv: chr_gen_vec_lv.second) {
                write_fourcc(ofs, "SGEN");
                write_str(ofs, gen_vec_lv.first);
                write_size(ofs, gen_vec_lv.second.size());
                for (const auto& lv: gen_vec_lv.second) {
                    write_fourcc(ofs, "SLV_");
                    write_matrix(ofs, lv);
                }
            }
        }
    }
910
    */
911
912
913
914
915

    friend
        std::ostream& operator << (std::ostream& os, const LV_database& LV)
        {
            for (const auto& chr_gen_vec_lv: LV.data) {
916
                os << "CHROMOSOME " << chr_gen_vec_lv.first << std::endl;
917
                for (const auto& gen_vec_lv: chr_gen_vec_lv.second) {
918
                    os << "* generation " << gen_vec_lv.first << std::endl;
919
920
                    size_t i = 0;
                    for (const auto& lv: gen_vec_lv.second) {
921
                        os << "  #" << i << std::endl;
922
                        ++i;
923
                        os << lv << std::endl;
924
925
926
927
928
929
930
931
932
933
934
                    }
                }
            }
            return os;
        }
};


/** **/


935
936
937
938
939
940
941
942
943
944
945
946
947
template <typename V>
std::vector<V>
filter_vector(const std::vector<V>& vec, const std::vector<size_t>& list)
{
    std::vector<V> ret;
    ret.reserve(list.size());
    for (size_t i: list) {
        ret.push_back(vec[i]);
    }
    return ret;
}


948
949
struct qtl_pop_type {
    std::string name;
Damien Leroux's avatar
Damien Leroux committed
950
    std::string qtl_generation_name;
951
    std::vector<size_t> indices;
952
    std::shared_ptr<geno_matrix> gen;
953
954
955
    std::map<std::string, std::vector<MatrixXd>> LV;
    std::string observed_traits_filename;
    std::vector<trait> observed_traits;
956
    std::map<char, std::string> ancestor_names;
957
    trait covariables;
958
959

    qtl_pop_type()
960
        : name(), qtl_generation_name(), indices(), gen(), LV(), observed_traits_filename(), observed_traits(), covariables()
961
962
    {}
    qtl_pop_type(const std::string& n, const std::string& qgn, const std::vector<size_t>& i, std::shared_ptr<geno_matrix> g,
963
964
965
                 const std::map<std::string, std::vector<MatrixXd>>& lv, const std::string& otf, const std::vector<trait>& ot,
                 const std::map<char, std::string>& anam, const trait& covar)
        : name(n), qtl_generation_name(qgn), indices(i), gen(g), LV(lv), observed_traits_filename(otf), observed_traits(ot), ancestor_names(anam), covariables(covar)
966
967
968
969
970
971
    {
        /*MSG_DEBUG("new qtl_pop with " << ancestor_names.size() << " ancestor names");*/
        /*for (const auto& kv: ancestor_names) {*/
            /*MSG_DEBUG(" * " << kv.first << ": " << kv.second);*/
        /*}*/
    }
972
973
974
975
976
977
978
979
980
981

    std::shared_ptr<qtl_pop_type>
        filter_clone(const trait& this_trait) const
        {
            const std::vector<size_t>& keep = this_trait.good_indices;
            std::shared_ptr<qtl_pop_type> ret = std::make_shared<qtl_pop_type>();
            ret->name = name;
            ret->qtl_generation_name = qtl_generation_name;
            ret->gen = gen;
            ret->observed_traits_filename = observed_traits_filename;
982
983
984
985
986
            for (const auto& t: observed_traits) {
                if (t.name == this_trait.name) {
                    ret->observed_traits.emplace_back(t);
                }
            }
987
            ret->ancestor_names = ancestor_names;
988
989
990
991
992
            auto i = indices.begin();
            auto j = indices.end();
            auto filt = keep.begin();
            auto filt_end = keep.end();

993
994
            ret->covariables = covariables % this_trait;

995
996
997
998
999
1000
            /*MSG_DEBUG("Filtering <" << indices << "> with <" << keep << '>');*/
            std::vector<size_t> keep_indices;
            keep_indices.reserve(keep.size());
            while (filt != filt_end && i != j) {
                if (*i == *filt) {
                    keep_indices.push_back(i - indices.begin());
For faster browsing, not all history is shown. View entire blame