output.h 42.3 KB
Newer Older
1
2
3
4
5
#ifndef _SPELL_BAYES_OUTPUT_H_
#define _SPELL_BAYES_OUTPUT_H_

#include <map>
#include <vector>
Damien Leroux's avatar
Damien Leroux committed
6
7
8
#include <map>
#include <set>
#include <unordered_set>
9
#include <unordered_map>
10
11
#include <string>
#include <iostream>
12
#include "file.h"
13
14
15
16
#include <cstring>

#include "eigen.h"
#include "error.h"
17
#include "data/chromosome.h"
18
/*#include "generation_rs_fwd.h"*/
19
#include "input/read_trait.h"
20
#include "tensor.h"
21
22
23
24

/** FOURCC **/

inline
25
bool check_fourcc(ifile& ifs, const char* fourcc)
26
27
28
29
30
31
32
{
    char buf[4] = {0, 0, 0, 0};
    ifs.read(buf, 4);
    return strncmp(fourcc, buf, 4);
}

inline
33
void write_fourcc(ofile& ofs, const char* fourcc)
34
35
36
37
38
39
40
{
    ofs.write(fourcc, 4);
}

/** SIZE_T **/

inline
41
void write_size(ofile& ofs, size_t sz)
42
43
44
45
46
{
    ofs.write((const char*) &sz, sizeof sz);
}

inline
47
size_t read_size(ifile& ifs)
48
49
50
51
52
53
{
    size_t ret;
    ifs.read((char*) &ret, sizeof ret);
    return ret;
}

54
55
56
/** PTRDIFF_T **/

inline
57
void write_ptrdiff(ofile& ofs, ptrdiff_t sz)
58
59
60
61
62
{
    ofs.write((const char*) &sz, sizeof sz);
}

inline
63
ptrdiff_t read_ptrdiff(ifile& ifs)
64
65
66
67
68
69
{
    ptrdiff_t ret;
    ifs.read((char*) &ret, sizeof ret);
    return ret;
}

70
71
72
/** STRING **/

inline
73
std::string read_str(ifile& ifs)
74
75
76
77
78
79
80
81
{
    size_t sz = read_size(ifs);
    std::vector<char> tmp(sz);
    ifs.read(&tmp[0], sz);
    return {tmp.begin(), tmp.end()};
}

inline
82
void write_str(ofile& ofs, const std::string& s)
83
84
85
86
87
88
89
90
{
    write_size(ofs, s.size());
    ofs.write(s.c_str(), s.size());
}

/** DOUBLE **/

inline
91
void write_double(ofile& ofs, double sz)
92
93
94
95
96
{
    ofs.write((const char*) &sz, sizeof sz);
}

inline
97
double read_double(ifile& ifs)
98
99
100
101
102
103
104
105
106
{
    double ret;
    ifs.read((char*) &ret, sizeof ret);
    return ret;
}

/** INT **/

inline
107
void write_int(ofile& ofs, int sz)
108
109
110
111
112
{
    ofs.write((const char*) &sz, sizeof sz);
}

inline
113
int read_int(ifile& ifs)
114
115
116
117
118
119
{
    int ret;
    ifs.read((char*) &ret, sizeof ret);
    return ret;
}

120
121
122
/** CHAR **/

inline
123
void write_char(ofile& ofs, char sz)
124
125
126
127
128
{
    ofs.write((const char*) &sz, sizeof sz);
}

inline
129
char read_char(ifile& ifs)
130
131
132
133
134
135
136
{
    char ret;
    ifs.read(&ret, sizeof ret);
    return ret;
}

#if 0
137
138
139
/** FAST_POLYNOM **/

inline
140
void write_fast_polynom(ofile& ofs, const fast_polynom& fp)
141
142
143
144
145
146
147
148
149
150
151
152
{
    impl::f_polynom f = fp;
    write_int(ofs, fp.value);
    write_int(ofs, f.r_exp);
    write_int(ofs, f.s_exp);
    write_size(ofs, f.P.size());
    for (double c: f.P) {
        write_double(ofs, c);
    }
}

inline
153
fast_polynom read_fast_polynom(ifile& ifs, int& original_key)
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
{
    original_key = read_int(ifs);
    impl::f_polynom ret = fast_polynom::zero;
    ret.r_exp = read_int(ifs);
    ret.s_exp = read_int(ifs);
    size_t sz = read_size(ifs);
    ret.P.resize(sz);
    for (size_t i = 0; i < sz; ++i) {
        ret.P[i] = read_double(ifs);
    }
    return ret;
}

/** ALGREBRAIC GENOTYPE **/

169
inline void write_algebraic_genotype(ofile& ofs, const algebraic_genotype& ag)
170
171
172
173
174
175
{
    ofs.write((const char*) &ag.genotype, sizeof ag.genotype);
    ofs.write((const char*) &ag.type, sizeof ag.type);
    write_int(ofs, ag.poly.value);
}

176
inline algebraic_genotype read_algebraic_genotype(ifile& ifs, const std::map<int, fast_polynom>& pt)
177
178
179
180
181
182
183
184
185
186
{
    algebraic_genotype ag;
    ifs.read((char*) &ag.genotype, sizeof ag.genotype);
    ifs.read((char*) &ag.type, sizeof ag.type);
    ag.poly = pt.find(read_int(ifs))->second;
    return ag;
}

/** GENOMATRIX **/

187
inline void write_genomatrix(ofile& ofs, const GenoMatrix& mat)
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
{
    write_fourcc(ofs, "SGEM");
    /*std::map<decltype(fast_polynom::value), impl::f_polynom>*/
    std::set<fast_polynom> poly_table;
    for (int j = 0; j < mat.cols(); ++j) {
        for (int i = 0; i < mat.rows(); ++i) {
            poly_table.insert(mat(i, j).poly);
        }
    }
    write_size(ofs, poly_table.size());
    for (const auto& fp: poly_table) {
        write_fast_polynom(ofs, fp);
    }
    write_int(ofs, mat.cols());
    write_int(ofs, mat.rows());
    /*MSG_DEBUG("[write_genomatrix] cols=" << mat.cols() << " rows=" << mat.rows());*/
    for (int j = 0; j < mat.cols(); ++j) {
        for (int i = 0; i < mat.rows(); ++i) {
            /*write_int(ofs, mat(i, j).value);*/
            write_algebraic_genotype(ofs, mat(i, j));
        }
    }
}

inline
213
void read_genomatrix(ifile& ifs, GenoMatrix& mat)
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
{
    if (check_fourcc(ifs, "SGEM")) {
        MSG_ERROR("File is not valid or has been corrupted", "");
        return;
    }
    std::map<int, fast_polynom> poly_map;

    size_t table_size = read_size(ifs);
    int key;

    for (size_t i = 0; i < table_size; ++i) {
        auto f = read_fast_polynom(ifs, key);
        poly_map[key] = f;
    }

    int cols = read_int(ifs);
    int rows = read_int(ifs);
    /*MSG_DEBUG("[read_genomatrix] cols=" << cols << " rows=" << rows);*/
    mat.resize(rows, cols);
    for (int j = 0; j < mat.cols(); ++j) {
        for (int i = 0; i < mat.rows(); ++i) {
            mat(i, j) = read_algebraic_genotype(ifs, poly_map);
        }
    }
}

/** GENERATION_RS **/

242
inline void write_geno_matrix(ofile& ofs, const geno_matrix* gen)
243
244
245
246
247
248
249
250
251
252
253
{
    write_fourcc(ofs, "SGRS");
    write_str(ofs, gen->name);
    write_size(ofs, gen->P.size());
    for (const auto& p: gen->P) {
        write_double(ofs, p.weight);
        write_genomatrix(ofs, p.G.data);
    }
}

inline
254
geno_matrix* read_geno_matrix(ifile& ifs)
255
256
257
258
259
260
261
{
    if (check_fourcc(ifs, "SGRS")) {
        MSG_ERROR("File is not valid or has been corrupted", "");
    }
    /*MSG_DEBUG("pouet 1"); MSG_QUEUE_FLUSH();*/
    std::string name = read_str(ifs);
    /*MSG_DEBUG("pouet 2"); MSG_QUEUE_FLUSH();*/
262
    geno_matrix* ret = geno_matrix::blank(name);
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
    /*MSG_DEBUG("pouet 3"); MSG_QUEUE_FLUSH();*/
    size_t n_p = read_size(ifs);
    /*MSG_DEBUG("Have " << n_p << " processes"); MSG_QUEUE_FLUSH();*/
    ret->P.resize(n_p);
    for (size_t i = 0; i < n_p; ++i) {
        ret->P[i].weight = read_double(ifs);
        GenoMatrix tmp;
        read_genomatrix(ifs, tmp);
        ret->P[i].G = convert(tmp);
        /*MSG_DEBUG("Read process");*/
        /*MSG_DEBUG(ret->P[i]);*/
    }
    ret->precompute();
    return ret;
}
278
279
#endif

280
281
282
283
284
285
286
287
288

/** MATRIX<SCALAR, R, C> **/

template <typename MATRIX_TYPE>
struct resize_matrix_impl;

template <> struct resize_matrix_impl<VectorXd> { void operator () (VectorXd& v, size_t rows, size_t) { v.resize(rows); } };
template <> struct resize_matrix_impl<MatrixXd> { void operator () (MatrixXd& m, size_t rows, size_t cols) { m.resize(rows, cols); } };

289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
template <typename SCALAR, int ROW, int COL, int C, int D, int E>
struct resize_matrix_impl<Eigen::Matrix<SCALAR, ROW, COL, C, D, E>> {
    void operator () (Eigen::Matrix<SCALAR, ROW, COL, C, D, E>& m, size_t r, size_t c) { m.resize(r, c); }
};

template <typename SCALAR, int ROW, int C, int D, int E>
struct resize_matrix_impl<Eigen::Matrix<SCALAR, ROW, 1, C, D, E>> {
    void operator () (Eigen::Matrix<SCALAR, ROW, 1, C, D, E>& v, size_t r, size_t) { v.resize(r); }
};

template <typename SCALAR, int COL, int C, int D, int E>
struct resize_matrix_impl<Eigen::Matrix<SCALAR, 1, COL, C, D, E>> {
    void operator () (Eigen::Matrix<SCALAR, 1, COL, C, D, E>& v, size_t, size_t c) { v.resize(c); }
};

304
305
306
307
template <typename MATRIX_TYPE>
void resize_matrix(MATRIX_TYPE& m, size_t r, size_t c) { resize_matrix_impl<MATRIX_TYPE>()(m, r, c); }

template <typename SCALAR, int ROW, int COL, int C, int D, int E>
308
void read_matrix(ifile& ifs, Eigen::Matrix<SCALAR, ROW, COL, C, D, E>& mat)
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
{
    size_t scalar_sz = read_size(ifs);
    if (scalar_sz != sizeof(SCALAR)) {
        MSG_ERROR("WRONG SIZE OF SCALAR, CAN'T READ FILE", "Make sure spell-marker and spell-qtl are always executed on machines with same word size.");
    }
    size_t n_row = read_size(ifs);
    size_t n_col = read_size(ifs);
    if (ROW != Eigen::Dynamic && ((int) n_row) != ROW) {
        MSG_ERROR("WRONG ROW COUNT. FILE IS NOT A LOCUS VECTOR FILE OR IS CORRUPTED", "You may want to run spell-marker again");
    }
    if (COL != Eigen::Dynamic && ((int) n_col) != COL) {
        MSG_ERROR("WRONG COLUMN COUNT. FILE IS NOT A LOCUS VECTOR FILE OR IS CORRUPTED", "You may want to run spell-marker again");
    }
    resize_matrix(mat, n_row, n_col);
    ifs.read((char*) mat.data(), n_row * n_col * sizeof(SCALAR));
}

template <typename SCALAR, int ROW, int COL, int C, int D, int E>
327
void write_matrix(ofile& ofs, const Eigen::Matrix<SCALAR, ROW, COL, C, D, E>& mat)
328
329
330
331
332
333
334
335
{
    write_size(ofs, sizeof(SCALAR));
    write_size(ofs, mat.rows());
    write_size(ofs, mat.cols());
    ofs.write((const char*) mat.data(), mat.size() * sizeof(SCALAR));
}


336
template <typename V, typename READ_ELEM_FUNC>
337
void read_vector(ifile& ifs, std::vector<V>& vec, READ_ELEM_FUNC read_elem)
338
339
340
341
342
343
344
345
346
347
348
{
    size_t sz = read_size(ifs);
    vec.clear();
    vec.reserve(sz);
    for (size_t i = 0; i < sz; ++i) {
        vec.emplace_back(read_elem(ifs));
    }
}


template <typename V, typename WRITE_ELEM_FUNC>
349
void write_vector(ofile& ofs, const std::vector<V>& vec, WRITE_ELEM_FUNC write_elem)
350
351
352
353
354
355
356
357
358
359
{
    write_size(ofs, vec.size());
    for (const auto& e: vec) {
        write_elem(ofs, e);
    }
}


/** LABEL_TYPE **/

360
inline
361
label_type read_label(ifile& ifs)
362
363
364
365
366
367
{
    char f, s;
    ifs >> f >> s;
    return {f, s};
}

368
inline
369
void write_label(ofile& ofs, const label_type& l)
370
371
372
373
374
375
376
{
    ofs << l.first() << l.second();
}


/** GENO_MATRIX **/

377
inline
378
void write_geno_matrix(ofile& ofs, const geno_matrix& mat)
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
{
    write_fourcc(ofs, "SGEM");
    write_str(ofs, mat.name);
    /* skip variant, it's deprecated. Or should be. */
    write_vector(ofs, mat.labels, write_label);
    write_matrix(ofs, mat.inf_mat);
    write_matrix(ofs, mat.p);
    write_matrix(ofs, mat.p_inv);
    write_matrix(ofs, mat.diag);
    write_matrix(ofs, mat.stat_dist);
    write_matrix(ofs, mat.collect);
    write_matrix(ofs, mat.dispatch);
    /* also skip symmetries for now. */
}

inline
395
void read_geno_matrix(ifile& ifs, geno_matrix& mat)
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
{
    if (check_fourcc(ifs, "SGEM")) {
        MSG_ERROR("File is not valid or has been corrupted", "");
        return;
    }
    mat.name = read_str(ifs);
    /* skip variant. */
    read_vector(ifs, mat.labels, read_label);
    read_matrix(ifs, mat.inf_mat);
    read_matrix(ifs, mat.p);
    read_matrix(ifs, mat.p_inv);
    read_matrix(ifs, mat.diag);
    read_matrix(ifs, mat.stat_dist);
    read_matrix(ifs, mat.collect);
    read_matrix(ifs, mat.dispatch);
    /* also skip symmetries for now. */
}


415
inline
416
void write_geno_matrix(ofile& ofs, const geno_matrix* ptr)
417
418
419
420
{
    write_geno_matrix(ofs, *ptr);
}

421
inline
422
geno_matrix* read_geno_matrix(ifile& ifs)
423
424
425
426
427
428
429
{
    geno_matrix* ret = new geno_matrix();
    read_geno_matrix(ifs, *ret);
    return ret;
}


430
431
432

template <class DERIVED>
struct rw_any {
433
    bool fourcc(ifile& ifs, const char* cc)
434
435
436
437
438
439
440
441
    {
        if (check_fourcc(ifs, cc)) {
            MSG_ERROR("File is not valid or has been corrupted (expected " << cc << ')', "");
            return true;
        }
        return false;
    }

442
    bool fourcc(ofile& ofs, const char* cc)
443
444
445
446
447
448
449
450
451
    {
        write_fourcc(ofs, cc);
        return false;
    }

    virtual ~rw_any() {}

    DERIVED& ref() { return *dynamic_cast<DERIVED*>(this); }

452
453
    void operator () (ifile& ifs, std::string& s) { s = read_str(ifs); }
    void operator () (ofile& ofs, const std::string& s) { write_str(ofs, s); }
454

455
456
    void operator () (ifile& ifs, char& i) { i = read_char(ifs); }
    void operator () (ofile& ofs, char i) { write_char(ofs, i); }
457

458
459
    void operator () (ifile& ifs, int& i) { i = read_int(ifs); }
    void operator () (ofile& ofs, int i) { write_int(ofs, i); }
460

461
462
    void operator () (ifile& ifs, bool& i) { i = !!read_char(ifs); }
    void operator () (ofile& ofs, bool i) { write_char(ofs, i); }
463

464
465
    void operator () (ifile& ifs, double& i) { i = read_double(ifs); }
    void operator () (ofile& ofs, double i) { write_double(ofs, i); }
466

467
468
    void operator () (ifile& ifs, size_t& i) { i = read_size(ifs); }
    void operator () (ofile& ofs, size_t i) { write_size(ofs, i); }
469

470
471
    void operator () (ifile& ifs, ptrdiff_t& i) { i = read_ptrdiff(ifs); }
    void operator () (ofile& ofs, ptrdiff_t i) { write_ptrdiff(ofs, i); }
472

473
    void operator () (ifile& ifs, label_type& l) { l = {read_char(ifs), read_char(ifs)}; }
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
    void operator () (ofile& ofs, label_type& l) { write_char(ofs, l.first()); write_char(ofs, l.second()); }

    template <typename K>
        auto operator () (ifile& fs, K& has_file_io)
        -> decltype(has_file_io.file_io(fs), void())
        {
            has_file_io.file_io(fs);
        }

    template <typename K>
        auto operator () (ofile& fs, K& has_file_io)
        -> decltype(has_file_io.file_io(fs), void())
        {
            has_file_io.file_io(fs);
        }
489
490

    template <typename V>
491
        void operator () (ifile& ifs, std::set<V>& vec)
492
493
494
495
496
497
498
499
500
501
502
503
        {
            if (fourcc(ifs, "OSET")) { return; }
            size_t sz = read_size(ifs);
            vec.clear();
            for (size_t i = 0; i < sz; ++i) {
                V tmp;
                ref() (ifs, tmp);
                vec.emplace(tmp);
            }
        }

    template <typename V>
504
        void operator () (ofile& ofs, std::set<V>& vec)
505
506
507
        {
            if (fourcc(ofs, "OSET")) { return; }
            write_size(ofs, vec.size());
508
            for (auto& e: vec) {
509
510
511
512
513
514
                ref() (ofs, e);
            }
        }


    template <typename V>
515
        void operator () (ifile& ifs, std::unordered_set<V>& vec)
516
517
518
519
520
521
522
523
524
525
526
527
        {
            if (fourcc(ifs, "USET")) { return; }
            size_t sz = read_size(ifs);
            vec.clear();
            for (size_t i = 0; i < sz; ++i) {
                V tmp;
                ref() (ifs, tmp);
                vec.emplace(tmp);
            }
        }

    template <typename V>
528
        void operator () (ofile& ofs, std::unordered_set<V>& vec)
529
530
531
        {
            if (fourcc(ofs, "USET")) { return; }
            write_size(ofs, vec.size());
532
            for (auto& e: vec) {
533
534
535
536
                ref() (ofs, e);
            }
        }

537
    void operator () (ifile& ifs, std::vector<bool>::reference i) { i = !!read_char(ifs); }
538
539

    template <typename V, typename A>
540
        void operator () (ifile& ifs, std::vector<V, A>& vec)
541
542
543
544
545
546
547
548
549
550
551
552
        {
            if (fourcc(ifs, "VECT")) { return; }
            size_t sz = read_size(ifs);
            vec.clear();
            vec.reserve(sz);
            for (size_t i = 0; i < sz; ++i) {
                vec.emplace_back();
                ref() (ifs, vec.back());
            }
        }

    template <typename V, typename A>
553
554
555
556
557
558
559
560
561
562
        void operator () (ofile& ofs, std::vector<V, A>& vec)
        {
            if (fourcc(ofs, "VECT")) { return; }
            write_size(ofs, vec.size());
            for (auto& e: vec) {
                ref() (ofs, e);
            }
        }

        void operator () (ofile& ofs, std::vector<bool>& vec)
563
564
565
566
567
568
569
570
571
        {
            if (fourcc(ofs, "VECT")) { return; }
            write_size(ofs, vec.size());
            for (const auto& e: vec) {
                ref() (ofs, e);
            }
        }

    template <typename V, typename A, typename RWElem>
572
        void operator () (ifile& ifs, std::vector<V, A>& vec, RWElem rw_elem)
573
574
575
576
577
578
579
580
581
582
583
584
        {
            if (fourcc(ifs, "VECT")) { return; }
            size_t sz = read_size(ifs);
            vec.clear();
            vec.reserve(sz);
            for (size_t i = 0; i < sz; ++i) {
                vec.emplace_back();
                rw_elem(ifs, vec.back());
            }
        }

    template <typename V, typename A, typename RWElem>
585
        void operator () (ofile& ofs, std::vector<V, A>& vec, RWElem rw_elem)
586
587
588
589
590
591
592
593
594
        {
            if (fourcc(ofs, "VECT")) { return; }
            write_size(ofs, vec.size());
            for (auto& e: vec) {
                rw_elem(ofs, e);
            }
        }

    template <typename K, typename V, typename A, typename C>
595
        void operator () (ifile& ifs, std::map<K, V, A, C>& map)
596
597
598
599
600
601
602
603
604
605
606
607
608
609
        {
            if (fourcc(ifs, "MAP ")) { return; }
            size_t count = read_size(ifs);
            map.clear();
            for (size_t i = 0; i < count; ++i) {
                K key;
                V value;
                ref() (ifs, key);
                ref() (ifs, value);
                map.emplace(std::move(key), std::move(value));
            }
        }

    template <typename K, typename V, typename A, typename C>
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
        void operator () (ifile& ifs, std::unordered_map<K, V, A, C>& map)
        {
            if (fourcc(ifs, "MAP ")) { return; }
            size_t count = read_size(ifs);
            map.clear();
            for (size_t i = 0; i < count; ++i) {
                K key;
                V value;
                ref() (ifs, key);
                ref() (ifs, value);
                map.emplace(std::move(key), std::move(value));
            }
        }

    template <typename K, typename V, typename A, typename C>
        void operator () (ofile& ofs, std::map<K, V, A, C>& map)
626
627
628
        {
            if (fourcc(ofs, "MAP ")) { return; }
            write_size(ofs, map.size());
629
630
631
632
633
634
635
636
637
638
639
640
            for (auto& kv: map) {
                ref() (ofs, const_cast<K&>(kv.first));
                ref() (ofs, kv.second);
            }
        }

    template <typename K, typename V, typename A, typename C>
        void operator () (ofile& ofs, std::unordered_map<K, V, A, C>& map)
        {
            if (fourcc(ofs, "MAP ")) { return; }
            write_size(ofs, map.size());
            for (auto& kv: map) {
641
642
643
644
645
                ref() (ofs, kv.first);
                ref() (ofs, kv.second);
            }
        }

646
647
648
649
650
651
652
    template <typename IO, typename A, typename B>
        void operator () (IO& fs, std::pair<A, B>& p)
        {
            ref() (fs, p.first);
            ref() (fs, p.second);
        }

653
    template <typename SCALAR, int ROW, int COL, int C, int D, int E>
654
        void operator () (ifile& ifs, Eigen::Matrix<SCALAR, ROW, COL, C, D, E>& mat) { read_matrix(ifs, mat); }
655
656

    template <typename SCALAR, int ROW, int COL, int C, int D, int E>
657
        void operator () (ofile& ofs, Eigen::Matrix<SCALAR, ROW, COL, C, D, E>& mat) { write_matrix(ofs, mat); }
658

659
660
    void operator () (ifile& ifs, geno_matrix& mat) { read_geno_matrix(ifs, mat); }
    void operator () (ofile& ofs, geno_matrix& mat) { write_geno_matrix(ofs, mat); }
661

662
    void operator () (ifile& ifs, std::shared_ptr<geno_matrix>& ptr)
663
664
665
666
667
668
669
670
671
    {
        ptr.reset();
        ptr = std::make_shared<geno_matrix>();
        ref() (ifs, *ptr);
        if (!ptr->size()) {
            ptr.reset();
        }
    }

672
    void operator () (ofile& ofs, std::shared_ptr<geno_matrix>& ptr)
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
    {
        if (ptr) {
            ref() (ofs, *ptr.get());
        } else {
            geno_matrix _;
            ref() (ofs, _);
        }
    }
};


struct rw_base : public rw_any<rw_base> {
    virtual ~rw_base() {}
    using rw_any<rw_base>::fourcc;
    using rw_any<rw_base>::ref;
    using rw_any<rw_base>::operator ();
};



693
694
695
696
/** **/


struct LV_database {
697
698
699
    /* GEN / MARK / IND => LV_vec */
    std::map<std::string, std::map<std::string, std::vector<VectorXd>>> data_by_marker;
    /* CHROM / GEN / IND => LV_mat */
700
701
    std::map<std::string, std::map<std::string, std::vector<MatrixXd>>> data;

702
703
704
705
706
707
708
709
710
711
712
    LV_database() : data_by_marker(), data() {}

    void assemble_using_map(const std::vector<chromosome>& map)
    {
        data.clear();
        for (const auto& chr: map) {
            for (const auto& gen_dat: data_by_marker) {
                const auto& gen = gen_dat.first;
                const auto& mark_ind_lv = gen_dat.second;
                size_t n_states = mark_ind_lv.begin()->second.front().size();
                size_t n_mark = chr.raw.marker_name.size();
713
                size_t n_condensed_mark = chr.condensed.marker_name.size();
714
715
716
                size_t n_ind = mark_ind_lv.begin()->second.size();
                auto& ind_LVmat = data[chr.name][gen];
                ind_LVmat.resize(n_ind, {n_states, n_mark});
717
718
                /*MSG_DEBUG("[assemble.using.map] #haplo_size=" << chr.haplo_sizes.size());*/
                /*MSG_QUEUE_FLUSH();*/
Damien Leroux's avatar
Damien Leroux committed
719
720
                for (size_t ind = 0; ind < n_ind; ++ind) {
                    auto& mat = ind_LVmat[ind];
721
                    mat = MatrixXd::Ones(n_states, n_condensed_mark);
Damien Leroux's avatar
Damien Leroux committed
722
                    size_t m = 0;
723
724
                    int haplo_counter = 0;
                    auto haplo_size_i = chr.haplo_sizes.begin();
Damien Leroux's avatar
Damien Leroux committed
725
726
                    for (const auto& mark: chr.raw.marker_name) {
                        const auto& svec = mark_ind_lv.find(mark)->second;
727
728
729
730
731
732
733
734
735
736
737
738
739
740
                        /*MSG_DEBUG("[assemble.using.map] haplo_size=" << (*haplo_size_i) << " column=" << m);*/
                        /*MSG_QUEUE_FLUSH();*/
                        mat.col(m).array() *= svec[ind].array();
                        ++haplo_counter;
                        if (haplo_counter == *haplo_size_i) {
                            ++m;
                            ++haplo_size_i;
                            haplo_counter = 0;
                        }
                    }
                    auto hi = chr.begin();
                    for (int c = 0; c < mat.cols(); ++c, ++hi) {
                        double sum = mat.col(c).sum();
                        if (sum == 0) {
741
742
743
744
745
746
747
                            /*std::stringstream ss;*/
                            /*ss << "Detected inconsistent observations! Generation is " << gen << ", chromosome " << chr.name << ", individual #" << (ind + 1) << ", involving markers";*/
                            /*auto haplo = *hi;*/
                            /*for (size_t i = haplo.first; i < haplo.second; ++i) {*/
                                /*ss << ' ' << chr.raw.marker_name[i];*/
                            /*}*/
                            /*MSG_WARNING(ss.str());*/
748
749
750
                        } else {
                            mat.col(c) /= sum;
                        }
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
                    }
                }
            }
        }
    }

    MatrixXd operator () (
            std::vector<std::string>::const_iterator begin,
            std::vector<std::string>::const_iterator end,
            const std::string& gen, size_t ind) const
    {
        const auto& milv = data_by_marker.find(gen)->second;
        size_t n_states = milv.find(*begin)->second.front().size();
        MatrixXd LV(n_states, end - begin);
        for (size_t i = 0; i < n_states; ++i) {
            LV.col(i) = milv.find(*begin)->second[ind];
        }
        return LV;
    }

771
772
773
774
775
776
777
778
779
780
    MatrixXd& operator () (const std::string& chr, const std::string& gen, size_t ind)
    {
        return data[chr][gen][ind];
    }

    const MatrixXd& operator () (const std::string& chr, const std::string& gen, size_t ind) const
    {
        return data.find(chr)->second.find(gen)->second[ind];
    }

781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
    std::map<std::string, std::vector<MatrixXd>>
        extract(const std::string& gen, const std::vector<size_t> ind_vec) const
        {
            std::map<std::string, std::vector<MatrixXd>> ret;
            for (const auto& chr_gen_lv_vec: data) {
                const std::string& chr = chr_gen_lv_vec.first;
                const auto& lv_vec = chr_gen_lv_vec.second.find(gen)->second;
                auto& ret_lv_vec = ret[chr];
                ret_lv_vec.reserve(ind_vec.size());
                for (size_t i: ind_vec) {
                    ret_lv_vec.push_back(lv_vec[i]);
                }
            }
            return ret;
        }

797
798
799
800
801
802
803
804
805
806
807
808
809
810
    template <typename STREAM_TYPE>
        void
        file_io(STREAM_TYPE& fs)
        {
            rw_base rw;
            if (rw.fourcc(fs, "SMLV")) { return; }
            rw(fs, data_by_marker);
            rw(fs, data);
        }

    static
        LV_database
        load_from(const std::string& filename)
        {
811
            ifile ifs(filename);
812
813
814
815
816
817
818
819
            LV_database ret;
            ret.file_io(ifs);
            return ret;
        }

    void
        save_to(const std::string& filename)
        {
820
            ofile ofs(filename);
821
822
823
824
            file_io(ofs);
        }

    /*
825
    static
826
        bool lv_check_fourcc(ifile& ifs, const char* fourcc)
827
828
829
830
831
832
833
834
835
836
837
        {
            if (check_fourcc(ifs, fourcc)) {
                MSG_ERROR("FILE IS NOT A LOCUS VECTOR FILE OR IS CORRUPTED", "You may want to run spell-marker again.");
                return true;
            }
            return false;
        }

    static
        LV_database load_from(const std::string& filename)
        {
838
            ifile ifs(filename);
839
840
841
842
            return load_from(ifs);
        }

    static
843
        LV_database load_from(ifile& ifs)
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
        {
            LV_database LV;

            if (lv_check_fourcc(ifs, "SMLV")) { return {}; }
            size_t n_chrom = read_size(ifs);
            size_t n_gen = read_size(ifs);
            for (size_t c = 0; c < n_chrom; ++c) {
                if (lv_check_fourcc(ifs, "SCHR")) { return {}; }
                std::string chr_name = read_str(ifs);
                for (size_t g = 0; g < n_gen; ++g) {
                    if (lv_check_fourcc(ifs, "SGEN")) { return {}; }
                    std::string gen_name = read_str(ifs);
                    size_t n_ind = read_size(ifs);
                    LV.data[chr_name][gen_name].resize(n_ind);
                    for (size_t i = 0; i < n_ind; ++i) {
                        if (lv_check_fourcc(ifs, "SLV_")) { return {}; }
                        read_matrix(ifs, LV.data[chr_name][gen_name][i]);
                    }
                }
            }
            return LV;
        }

    void save_to(const std::string& filename)
    {
869
        ofile ofs(filename);
870
871
872
        save_to(ofs);
    }

873
    void save_to(ofile& ofs)
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
    {
        write_fourcc(ofs, "SMLV");
        write_size(ofs, data.size());
        write_size(ofs, data.begin()->second.size());
        for (const auto& chr_gen_vec_lv: data) {
            write_fourcc(ofs, "SCHR");
            write_str(ofs, chr_gen_vec_lv.first);
            for (const auto& gen_vec_lv: chr_gen_vec_lv.second) {
                write_fourcc(ofs, "SGEN");
                write_str(ofs, gen_vec_lv.first);
                write_size(ofs, gen_vec_lv.second.size());
                for (const auto& lv: gen_vec_lv.second) {
                    write_fourcc(ofs, "SLV_");
                    write_matrix(ofs, lv);
                }
            }
        }
    }
892
    */
893
894
895
896
897

    friend
        std::ostream& operator << (std::ostream& os, const LV_database& LV)
        {
            for (const auto& chr_gen_vec_lv: LV.data) {
898
                os << "CHROMOSOME " << chr_gen_vec_lv.first << std::endl;
899
                for (const auto& gen_vec_lv: chr_gen_vec_lv.second) {
900
                    os << "* generation " << gen_vec_lv.first << std::endl;
901
902
                    size_t i = 0;
                    for (const auto& lv: gen_vec_lv.second) {
903
                        os << "  #" << i << std::endl;
904
                        ++i;
905
                        os << lv << std::endl;
906
907
908
909
910
911
912
913
914
915
916
                    }
                }
            }
            return os;
        }
};


/** **/


917
918
919
920
921
922
923
924
925
926
927
928
929
template <typename V>
std::vector<V>
filter_vector(const std::vector<V>& vec, const std::vector<size_t>& list)
{
    std::vector<V> ret;
    ret.reserve(list.size());
    for (size_t i: list) {
        ret.push_back(vec[i]);
    }
    return ret;
}


930
931
struct qtl_pop_type {
    std::string name;
Damien Leroux's avatar
Damien Leroux committed
932
    std::string qtl_generation_name;
933
    std::vector<size_t> indices;
934
    std::shared_ptr<geno_matrix> gen;
935
936
937
    std::map<std::string, std::vector<MatrixXd>> LV;
    std::string observed_traits_filename;
    std::vector<trait> observed_traits;
938
    std::map<char, std::string> ancestor_names;
939
940
941
942
943

    qtl_pop_type()
        : name(), qtl_generation_name(), indices(), gen(), LV(), observed_traits_filename(), observed_traits()
    {}
    qtl_pop_type(const std::string& n, const std::string& qgn, const std::vector<size_t>& i, std::shared_ptr<geno_matrix> g,
944
945
946
947
948
949
950
951
                 const std::map<std::string, std::vector<MatrixXd>>& lv, const std::string& otf, const std::vector<trait>& ot, const std::map<char, std::string>& anam)
        : name(n), qtl_generation_name(qgn), indices(i), gen(g), LV(lv), observed_traits_filename(otf), observed_traits(ot), ancestor_names(anam)
    {
        /*MSG_DEBUG("new qtl_pop with " << ancestor_names.size() << " ancestor names");*/
        /*for (const auto& kv: ancestor_names) {*/
            /*MSG_DEBUG(" * " << kv.first << ": " << kv.second);*/
        /*}*/
    }
952
953
954
955
956
957
958
959
960
961

    std::shared_ptr<qtl_pop_type>
        filter_clone(const trait& this_trait) const
        {
            const std::vector<size_t>& keep = this_trait.good_indices;
            std::shared_ptr<qtl_pop_type> ret = std::make_shared<qtl_pop_type>();
            ret->name = name;
            ret->qtl_generation_name = qtl_generation_name;
            ret->gen = gen;
            ret->observed_traits_filename = observed_traits_filename;
962
963
964
965
966
            for (const auto& t: observed_traits) {
                if (t.name == this_trait.name) {
                    ret->observed_traits.emplace_back(t);
                }
            }
967
            ret->ancestor_names = ancestor_names;
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
            auto i = indices.begin();
            auto j = indices.end();
            auto filt = keep.begin();
            auto filt_end = keep.end();

            /*MSG_DEBUG("Filtering <" << indices << "> with <" << keep << '>');*/
            std::vector<size_t> keep_indices;
            keep_indices.reserve(keep.size());
            while (filt != filt_end && i != j) {
                if (*i == *filt) {
                    keep_indices.push_back(i - indices.begin());
                    ++i;
                    ++filt;
                } else if (*i < *filt) {
                    ++i;
                } else {
                    ++filt;
                }
            }

            ret->indices = filter_vector(indices, keep_indices);
989
            /*MSG_DEBUG("Filtered indices " << ret->indices << " (" << ret->indices.size() << ')');*/
990
            for (const auto& kv: LV) {
991
                /*MSG_DEBUG("Filtering LV on " << kv.first << ", keeping " << keep_indices.size() << " elements");*/
992
993
                ret->LV[kv.first] = filter_vector(kv.second, keep_indices);
            }
994
995
996
997
998
999
1000
            /*MSG_DEBUG("Resulting LV (" << ret->LV.size() << ')');*/
            /*for (const auto& lv: ret->LV) {*/
                /*MSG_DEBUG("** " << lv.first << " (" << lv.second.size() << ')');*/
                /*auto ri = ret->indices.begin();*/
                /*for (const auto& v: lv.second) {*/
                    /*MSG_DEBUG("* " << (*ri++));*/
                    /*MSG_DEBUG("" << v);*/
For faster browsing, not all history is shown. View entire blame