output.h 40.6 KB
Newer Older
1
2
3
4
5
#ifndef _SPELL_BAYES_OUTPUT_H_
#define _SPELL_BAYES_OUTPUT_H_

#include <map>
#include <vector>
Damien Leroux's avatar
Damien Leroux committed
6
7
8
#include <map>
#include <set>
#include <unordered_set>
9
10
#include <string>
#include <iostream>
11
#include "file.h"
12
13
14
15
#include <cstring>

#include "eigen.h"
#include "error.h"
16
#include "data/chromosome.h"
17
/*#include "generation_rs_fwd.h"*/
18
#include "input/read_trait.h"
19
#include "tensor.h"
20
21
22
23

/** FOURCC **/

inline
24
bool check_fourcc(ifile& ifs, const char* fourcc)
25
26
27
28
29
30
31
{
    char buf[4] = {0, 0, 0, 0};
    ifs.read(buf, 4);
    return strncmp(fourcc, buf, 4);
}

inline
32
void write_fourcc(ofile& ofs, const char* fourcc)
33
34
35
36
37
38
39
{
    ofs.write(fourcc, 4);
}

/** SIZE_T **/

inline
40
void write_size(ofile& ofs, size_t sz)
41
42
43
44
45
{
    ofs.write((const char*) &sz, sizeof sz);
}

inline
46
size_t read_size(ifile& ifs)
47
48
49
50
51
52
{
    size_t ret;
    ifs.read((char*) &ret, sizeof ret);
    return ret;
}

53
54
55
/** PTRDIFF_T **/

inline
56
void write_ptrdiff(ofile& ofs, ptrdiff_t sz)
57
58
59
60
61
{
    ofs.write((const char*) &sz, sizeof sz);
}

inline
62
ptrdiff_t read_ptrdiff(ifile& ifs)
63
64
65
66
67
68
{
    ptrdiff_t ret;
    ifs.read((char*) &ret, sizeof ret);
    return ret;
}

69
70
71
/** STRING **/

inline
72
std::string read_str(ifile& ifs)
73
74
75
76
77
78
79
80
{
    size_t sz = read_size(ifs);
    std::vector<char> tmp(sz);
    ifs.read(&tmp[0], sz);
    return {tmp.begin(), tmp.end()};
}

inline
81
void write_str(ofile& ofs, const std::string& s)
82
83
84
85
86
87
88
89
{
    write_size(ofs, s.size());
    ofs.write(s.c_str(), s.size());
}

/** DOUBLE **/

inline
90
void write_double(ofile& ofs, double sz)
91
92
93
94
95
{
    ofs.write((const char*) &sz, sizeof sz);
}

inline
96
double read_double(ifile& ifs)
97
98
99
100
101
102
103
104
105
{
    double ret;
    ifs.read((char*) &ret, sizeof ret);
    return ret;
}

/** INT **/

inline
106
void write_int(ofile& ofs, int sz)
107
108
109
110
111
{
    ofs.write((const char*) &sz, sizeof sz);
}

inline
112
int read_int(ifile& ifs)
113
114
115
116
117
118
{
    int ret;
    ifs.read((char*) &ret, sizeof ret);
    return ret;
}

119
120
121
/** CHAR **/

inline
122
void write_char(ofile& ofs, char sz)
123
124
125
126
127
{
    ofs.write((const char*) &sz, sizeof sz);
}

inline
128
char read_char(ifile& ifs)
129
130
131
132
133
134
135
{
    char ret;
    ifs.read(&ret, sizeof ret);
    return ret;
}

#if 0
136
137
138
/** FAST_POLYNOM **/

inline
139
void write_fast_polynom(ofile& ofs, const fast_polynom& fp)
140
141
142
143
144
145
146
147
148
149
150
151
{
    impl::f_polynom f = fp;
    write_int(ofs, fp.value);
    write_int(ofs, f.r_exp);
    write_int(ofs, f.s_exp);
    write_size(ofs, f.P.size());
    for (double c: f.P) {
        write_double(ofs, c);
    }
}

inline
152
fast_polynom read_fast_polynom(ifile& ifs, int& original_key)
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
{
    original_key = read_int(ifs);
    impl::f_polynom ret = fast_polynom::zero;
    ret.r_exp = read_int(ifs);
    ret.s_exp = read_int(ifs);
    size_t sz = read_size(ifs);
    ret.P.resize(sz);
    for (size_t i = 0; i < sz; ++i) {
        ret.P[i] = read_double(ifs);
    }
    return ret;
}

/** ALGREBRAIC GENOTYPE **/

168
inline void write_algebraic_genotype(ofile& ofs, const algebraic_genotype& ag)
169
170
171
172
173
174
{
    ofs.write((const char*) &ag.genotype, sizeof ag.genotype);
    ofs.write((const char*) &ag.type, sizeof ag.type);
    write_int(ofs, ag.poly.value);
}

175
inline algebraic_genotype read_algebraic_genotype(ifile& ifs, const std::map<int, fast_polynom>& pt)
176
177
178
179
180
181
182
183
184
185
{
    algebraic_genotype ag;
    ifs.read((char*) &ag.genotype, sizeof ag.genotype);
    ifs.read((char*) &ag.type, sizeof ag.type);
    ag.poly = pt.find(read_int(ifs))->second;
    return ag;
}

/** GENOMATRIX **/

186
inline void write_genomatrix(ofile& ofs, const GenoMatrix& mat)
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
{
    write_fourcc(ofs, "SGEM");
    /*std::map<decltype(fast_polynom::value), impl::f_polynom>*/
    std::set<fast_polynom> poly_table;
    for (int j = 0; j < mat.cols(); ++j) {
        for (int i = 0; i < mat.rows(); ++i) {
            poly_table.insert(mat(i, j).poly);
        }
    }
    write_size(ofs, poly_table.size());
    for (const auto& fp: poly_table) {
        write_fast_polynom(ofs, fp);
    }
    write_int(ofs, mat.cols());
    write_int(ofs, mat.rows());
    /*MSG_DEBUG("[write_genomatrix] cols=" << mat.cols() << " rows=" << mat.rows());*/
    for (int j = 0; j < mat.cols(); ++j) {
        for (int i = 0; i < mat.rows(); ++i) {
            /*write_int(ofs, mat(i, j).value);*/
            write_algebraic_genotype(ofs, mat(i, j));
        }
    }
}

inline
212
void read_genomatrix(ifile& ifs, GenoMatrix& mat)
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
{
    if (check_fourcc(ifs, "SGEM")) {
        MSG_ERROR("File is not valid or has been corrupted", "");
        return;
    }
    std::map<int, fast_polynom> poly_map;

    size_t table_size = read_size(ifs);
    int key;

    for (size_t i = 0; i < table_size; ++i) {
        auto f = read_fast_polynom(ifs, key);
        poly_map[key] = f;
    }

    int cols = read_int(ifs);
    int rows = read_int(ifs);
    /*MSG_DEBUG("[read_genomatrix] cols=" << cols << " rows=" << rows);*/
    mat.resize(rows, cols);
    for (int j = 0; j < mat.cols(); ++j) {
        for (int i = 0; i < mat.rows(); ++i) {
            mat(i, j) = read_algebraic_genotype(ifs, poly_map);
        }
    }
}

/** GENERATION_RS **/

241
inline void write_geno_matrix(ofile& ofs, const geno_matrix* gen)
242
243
244
245
246
247
248
249
250
251
252
{
    write_fourcc(ofs, "SGRS");
    write_str(ofs, gen->name);
    write_size(ofs, gen->P.size());
    for (const auto& p: gen->P) {
        write_double(ofs, p.weight);
        write_genomatrix(ofs, p.G.data);
    }
}

inline
253
geno_matrix* read_geno_matrix(ifile& ifs)
254
255
256
257
258
259
260
{
    if (check_fourcc(ifs, "SGRS")) {
        MSG_ERROR("File is not valid or has been corrupted", "");
    }
    /*MSG_DEBUG("pouet 1"); MSG_QUEUE_FLUSH();*/
    std::string name = read_str(ifs);
    /*MSG_DEBUG("pouet 2"); MSG_QUEUE_FLUSH();*/
261
    geno_matrix* ret = geno_matrix::blank(name);
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
    /*MSG_DEBUG("pouet 3"); MSG_QUEUE_FLUSH();*/
    size_t n_p = read_size(ifs);
    /*MSG_DEBUG("Have " << n_p << " processes"); MSG_QUEUE_FLUSH();*/
    ret->P.resize(n_p);
    for (size_t i = 0; i < n_p; ++i) {
        ret->P[i].weight = read_double(ifs);
        GenoMatrix tmp;
        read_genomatrix(ifs, tmp);
        ret->P[i].G = convert(tmp);
        /*MSG_DEBUG("Read process");*/
        /*MSG_DEBUG(ret->P[i]);*/
    }
    ret->precompute();
    return ret;
}
277
278
#endif

279
280
281
282
283
284
285
286
287

/** MATRIX<SCALAR, R, C> **/

template <typename MATRIX_TYPE>
struct resize_matrix_impl;

template <> struct resize_matrix_impl<VectorXd> { void operator () (VectorXd& v, size_t rows, size_t) { v.resize(rows); } };
template <> struct resize_matrix_impl<MatrixXd> { void operator () (MatrixXd& m, size_t rows, size_t cols) { m.resize(rows, cols); } };

288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
template <typename SCALAR, int ROW, int COL, int C, int D, int E>
struct resize_matrix_impl<Eigen::Matrix<SCALAR, ROW, COL, C, D, E>> {
    void operator () (Eigen::Matrix<SCALAR, ROW, COL, C, D, E>& m, size_t r, size_t c) { m.resize(r, c); }
};

template <typename SCALAR, int ROW, int C, int D, int E>
struct resize_matrix_impl<Eigen::Matrix<SCALAR, ROW, 1, C, D, E>> {
    void operator () (Eigen::Matrix<SCALAR, ROW, 1, C, D, E>& v, size_t r, size_t) { v.resize(r); }
};

template <typename SCALAR, int COL, int C, int D, int E>
struct resize_matrix_impl<Eigen::Matrix<SCALAR, 1, COL, C, D, E>> {
    void operator () (Eigen::Matrix<SCALAR, 1, COL, C, D, E>& v, size_t, size_t c) { v.resize(c); }
};

303
304
305
306
template <typename MATRIX_TYPE>
void resize_matrix(MATRIX_TYPE& m, size_t r, size_t c) { resize_matrix_impl<MATRIX_TYPE>()(m, r, c); }

template <typename SCALAR, int ROW, int COL, int C, int D, int E>
307
void read_matrix(ifile& ifs, Eigen::Matrix<SCALAR, ROW, COL, C, D, E>& mat)
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
{
    size_t scalar_sz = read_size(ifs);
    if (scalar_sz != sizeof(SCALAR)) {
        MSG_ERROR("WRONG SIZE OF SCALAR, CAN'T READ FILE", "Make sure spell-marker and spell-qtl are always executed on machines with same word size.");
    }
    size_t n_row = read_size(ifs);
    size_t n_col = read_size(ifs);
    if (ROW != Eigen::Dynamic && ((int) n_row) != ROW) {
        MSG_ERROR("WRONG ROW COUNT. FILE IS NOT A LOCUS VECTOR FILE OR IS CORRUPTED", "You may want to run spell-marker again");
    }
    if (COL != Eigen::Dynamic && ((int) n_col) != COL) {
        MSG_ERROR("WRONG COLUMN COUNT. FILE IS NOT A LOCUS VECTOR FILE OR IS CORRUPTED", "You may want to run spell-marker again");
    }
    resize_matrix(mat, n_row, n_col);
    ifs.read((char*) mat.data(), n_row * n_col * sizeof(SCALAR));
}

template <typename SCALAR, int ROW, int COL, int C, int D, int E>
326
void write_matrix(ofile& ofs, const Eigen::Matrix<SCALAR, ROW, COL, C, D, E>& mat)
327
328
329
330
331
332
333
334
{
    write_size(ofs, sizeof(SCALAR));
    write_size(ofs, mat.rows());
    write_size(ofs, mat.cols());
    ofs.write((const char*) mat.data(), mat.size() * sizeof(SCALAR));
}


335
template <typename V, typename READ_ELEM_FUNC>
336
void read_vector(ifile& ifs, std::vector<V>& vec, READ_ELEM_FUNC read_elem)
337
338
339
340
341
342
343
344
345
346
347
{
    size_t sz = read_size(ifs);
    vec.clear();
    vec.reserve(sz);
    for (size_t i = 0; i < sz; ++i) {
        vec.emplace_back(read_elem(ifs));
    }
}


template <typename V, typename WRITE_ELEM_FUNC>
348
void write_vector(ofile& ofs, const std::vector<V>& vec, WRITE_ELEM_FUNC write_elem)
349
350
351
352
353
354
355
356
357
358
{
    write_size(ofs, vec.size());
    for (const auto& e: vec) {
        write_elem(ofs, e);
    }
}


/** LABEL_TYPE **/

359
inline
360
label_type read_label(ifile& ifs)
361
362
363
364
365
366
{
    char f, s;
    ifs >> f >> s;
    return {f, s};
}

367
inline
368
void write_label(ofile& ofs, const label_type& l)
369
370
371
372
373
374
375
{
    ofs << l.first() << l.second();
}


/** GENO_MATRIX **/

376
inline
377
void write_geno_matrix(ofile& ofs, const geno_matrix& mat)
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
{
    write_fourcc(ofs, "SGEM");
    write_str(ofs, mat.name);
    /* skip variant, it's deprecated. Or should be. */
    write_vector(ofs, mat.labels, write_label);
    write_matrix(ofs, mat.inf_mat);
    write_matrix(ofs, mat.p);
    write_matrix(ofs, mat.p_inv);
    write_matrix(ofs, mat.diag);
    write_matrix(ofs, mat.stat_dist);
    write_matrix(ofs, mat.collect);
    write_matrix(ofs, mat.dispatch);
    /* also skip symmetries for now. */
}

inline
394
void read_geno_matrix(ifile& ifs, geno_matrix& mat)
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
{
    if (check_fourcc(ifs, "SGEM")) {
        MSG_ERROR("File is not valid or has been corrupted", "");
        return;
    }
    mat.name = read_str(ifs);
    /* skip variant. */
    read_vector(ifs, mat.labels, read_label);
    read_matrix(ifs, mat.inf_mat);
    read_matrix(ifs, mat.p);
    read_matrix(ifs, mat.p_inv);
    read_matrix(ifs, mat.diag);
    read_matrix(ifs, mat.stat_dist);
    read_matrix(ifs, mat.collect);
    read_matrix(ifs, mat.dispatch);
    /* also skip symmetries for now. */
}


414
inline
415
void write_geno_matrix(ofile& ofs, const geno_matrix* ptr)
416
417
418
419
{
    write_geno_matrix(ofs, *ptr);
}

420
inline
421
geno_matrix* read_geno_matrix(ifile& ifs)
422
423
424
425
426
427
428
{
    geno_matrix* ret = new geno_matrix();
    read_geno_matrix(ifs, *ret);
    return ret;
}


429
430
431
432
433



template <class DERIVED>
struct rw_any {
434
    bool fourcc(ifile& ifs, const char* cc)
435
436
437
438
439
440
441
442
    {
        if (check_fourcc(ifs, cc)) {
            MSG_ERROR("File is not valid or has been corrupted (expected " << cc << ')', "");
            return true;
        }
        return false;
    }

443
    bool fourcc(ofile& ofs, const char* cc)
444
445
446
447
448
449
450
451
452
    {
        write_fourcc(ofs, cc);
        return false;
    }

    virtual ~rw_any() {}

    DERIVED& ref() { return *dynamic_cast<DERIVED*>(this); }

453
454
    void operator () (ifile& ifs, std::string& s) { s = read_str(ifs); }
    void operator () (ofile& ofs, const std::string& s) { write_str(ofs, s); }
455

456
457
    void operator () (ifile& ifs, char& i) { i = read_char(ifs); }
    void operator () (ofile& ofs, char i) { write_char(ofs, i); }
458

459
460
    void operator () (ifile& ifs, int& i) { i = read_int(ifs); }
    void operator () (ofile& ofs, int i) { write_int(ofs, i); }
461

462
463
    void operator () (ifile& ifs, bool& i) { i = !!read_char(ifs); }
    void operator () (ofile& ofs, bool i) { write_char(ofs, i); }
464

465
466
    void operator () (ifile& ifs, double& i) { i = read_double(ifs); }
    void operator () (ofile& ofs, double i) { write_double(ofs, i); }
467

468
469
    void operator () (ifile& ifs, size_t& i) { i = read_size(ifs); }
    void operator () (ofile& ofs, size_t i) { write_size(ofs, i); }
470

471
472
    void operator () (ifile& ifs, ptrdiff_t& i) { i = read_ptrdiff(ifs); }
    void operator () (ofile& ofs, ptrdiff_t i) { write_ptrdiff(ofs, i); }
473

474
475
    void operator () (ifile& ifs, label_type& l) { l = {read_char(ifs), read_char(ifs)}; }
    void operator () (ofile& ofs, const label_type& l) { write_char(ofs, l.first()); write_char(ofs, l.second()); }
476
477

    template <typename V>
478
        void operator () (ifile& ifs, std::set<V>& vec)
479
480
481
482
483
484
485
486
487
488
489
490
        {
            if (fourcc(ifs, "OSET")) { return; }
            size_t sz = read_size(ifs);
            vec.clear();
            for (size_t i = 0; i < sz; ++i) {
                V tmp;
                ref() (ifs, tmp);
                vec.emplace(tmp);
            }
        }

    template <typename V>
491
        void operator () (ofile& ofs, const std::set<V>& vec)
492
493
494
495
496
497
498
499
500
501
        {
            if (fourcc(ofs, "OSET")) { return; }
            write_size(ofs, vec.size());
            for (const auto& e: vec) {
                ref() (ofs, e);
            }
        }


    template <typename V>
502
        void operator () (ifile& ifs, std::unordered_set<V>& vec)
503
504
505
506
507
508
509
510
511
512
513
514
        {
            if (fourcc(ifs, "USET")) { return; }
            size_t sz = read_size(ifs);
            vec.clear();
            for (size_t i = 0; i < sz; ++i) {
                V tmp;
                ref() (ifs, tmp);
                vec.emplace(tmp);
            }
        }

    template <typename V>
515
        void operator () (ofile& ofs, const std::unordered_set<V>& vec)
516
517
518
519
520
521
522
523
        {
            if (fourcc(ofs, "USET")) { return; }
            write_size(ofs, vec.size());
            for (const auto& e: vec) {
                ref() (ofs, e);
            }
        }

524
    void operator () (ifile& ifs, std::vector<bool>::reference i) { i = !!read_char(ifs); }
525
526

    template <typename V, typename A>
527
        void operator () (ifile& ifs, std::vector<V, A>& vec)
528
529
530
531
532
533
534
535
536
537
538
539
        {
            if (fourcc(ifs, "VECT")) { return; }
            size_t sz = read_size(ifs);
            vec.clear();
            vec.reserve(sz);
            for (size_t i = 0; i < sz; ++i) {
                vec.emplace_back();
                ref() (ifs, vec.back());
            }
        }

    template <typename V, typename A>
540
        void operator () (ofile& ofs, const std::vector<V, A>& vec)
541
542
543
544
545
546
547
548
549
        {
            if (fourcc(ofs, "VECT")) { return; }
            write_size(ofs, vec.size());
            for (const auto& e: vec) {
                ref() (ofs, e);
            }
        }

    template <typename V, typename A, typename RWElem>
550
        void operator () (ifile& ifs, std::vector<V, A>& vec, RWElem rw_elem)
551
552
553
554
555
556
557
558
559
560
561
562
        {
            if (fourcc(ifs, "VECT")) { return; }
            size_t sz = read_size(ifs);
            vec.clear();
            vec.reserve(sz);
            for (size_t i = 0; i < sz; ++i) {
                vec.emplace_back();
                rw_elem(ifs, vec.back());
            }
        }

    template <typename V, typename A, typename RWElem>
563
        void operator () (ofile& ofs, std::vector<V, A>& vec, RWElem rw_elem)
564
565
566
567
568
569
570
571
572
        {
            if (fourcc(ofs, "VECT")) { return; }
            write_size(ofs, vec.size());
            for (auto& e: vec) {
                rw_elem(ofs, e);
            }
        }

    template <typename K, typename V, typename A, typename C>
573
        void operator () (ifile& ifs, std::map<K, V, A, C>& map)
574
575
576
577
578
579
580
581
582
583
584
585
586
587
        {
            if (fourcc(ifs, "MAP ")) { return; }
            size_t count = read_size(ifs);
            map.clear();
            for (size_t i = 0; i < count; ++i) {
                K key;
                V value;
                ref() (ifs, key);
                ref() (ifs, value);
                map.emplace(std::move(key), std::move(value));
            }
        }

    template <typename K, typename V, typename A, typename C>
588
        void operator () (ofile& ofs, const std::map<K, V, A, C>& map)
589
590
591
592
593
594
595
596
597
598
        {
            if (fourcc(ofs, "MAP ")) { return; }
            write_size(ofs, map.size());
            for (const auto& kv: map) {
                ref() (ofs, kv.first);
                ref() (ofs, kv.second);
            }
        }

    template <typename SCALAR, int ROW, int COL, int C, int D, int E>
599
        void operator () (ifile& ifs, Eigen::Matrix<SCALAR, ROW, COL, C, D, E>& mat) { read_matrix(ifs, mat); }
600
601

    template <typename SCALAR, int ROW, int COL, int C, int D, int E>
602
        void operator () (ofile& ofs, const Eigen::Matrix<SCALAR, ROW, COL, C, D, E>& mat) { write_matrix(ofs, mat); }
603

604
605
    void operator () (ifile& ifs, geno_matrix& mat) { read_geno_matrix(ifs, mat); }
    void operator () (ofile& ofs, geno_matrix& mat) { write_geno_matrix(ofs, mat); }
606

607
    void operator () (ifile& ifs, std::shared_ptr<geno_matrix>& ptr)
608
609
610
611
612
613
614
615
616
    {
        ptr.reset();
        ptr = std::make_shared<geno_matrix>();
        ref() (ifs, *ptr);
        if (!ptr->size()) {
            ptr.reset();
        }
    }

617
    void operator () (ofile& ofs, const std::shared_ptr<geno_matrix>& ptr)
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
    {
        if (ptr) {
            ref() (ofs, *ptr.get());
        } else {
            geno_matrix _;
            ref() (ofs, _);
        }
    }
};


struct rw_base : public rw_any<rw_base> {
    virtual ~rw_base() {}
    using rw_any<rw_base>::fourcc;
    using rw_any<rw_base>::ref;
    using rw_any<rw_base>::operator ();
};



638
639
640
641
/** **/


struct LV_database {
642
643
644
    /* GEN / MARK / IND => LV_vec */
    std::map<std::string, std::map<std::string, std::vector<VectorXd>>> data_by_marker;
    /* CHROM / GEN / IND => LV_mat */
645
646
    std::map<std::string, std::map<std::string, std::vector<MatrixXd>>> data;

647
648
649
650
651
652
653
654
655
656
657
    LV_database() : data_by_marker(), data() {}

    void assemble_using_map(const std::vector<chromosome>& map)
    {
        data.clear();
        for (const auto& chr: map) {
            for (const auto& gen_dat: data_by_marker) {
                const auto& gen = gen_dat.first;
                const auto& mark_ind_lv = gen_dat.second;
                size_t n_states = mark_ind_lv.begin()->second.front().size();
                size_t n_mark = chr.raw.marker_name.size();
658
                size_t n_condensed_mark = chr.condensed.marker_name.size();
659
660
661
                size_t n_ind = mark_ind_lv.begin()->second.size();
                auto& ind_LVmat = data[chr.name][gen];
                ind_LVmat.resize(n_ind, {n_states, n_mark});
662
663
                /*MSG_DEBUG("[assemble.using.map] #haplo_size=" << chr.haplo_sizes.size());*/
                /*MSG_QUEUE_FLUSH();*/
Damien Leroux's avatar
Damien Leroux committed
664
665
                for (size_t ind = 0; ind < n_ind; ++ind) {
                    auto& mat = ind_LVmat[ind];
666
                    mat = MatrixXd::Ones(n_states, n_condensed_mark);
Damien Leroux's avatar
Damien Leroux committed
667
                    size_t m = 0;
668
669
                    int haplo_counter = 0;
                    auto haplo_size_i = chr.haplo_sizes.begin();
Damien Leroux's avatar
Damien Leroux committed
670
671
                    for (const auto& mark: chr.raw.marker_name) {
                        const auto& svec = mark_ind_lv.find(mark)->second;
672
673
674
675
676
677
678
679
680
681
682
683
684
685
                        /*MSG_DEBUG("[assemble.using.map] haplo_size=" << (*haplo_size_i) << " column=" << m);*/
                        /*MSG_QUEUE_FLUSH();*/
                        mat.col(m).array() *= svec[ind].array();
                        ++haplo_counter;
                        if (haplo_counter == *haplo_size_i) {
                            ++m;
                            ++haplo_size_i;
                            haplo_counter = 0;
                        }
                    }
                    auto hi = chr.begin();
                    for (int c = 0; c < mat.cols(); ++c, ++hi) {
                        double sum = mat.col(c).sum();
                        if (sum == 0) {
686
687
688
689
690
691
692
                            /*std::stringstream ss;*/
                            /*ss << "Detected inconsistent observations! Generation is " << gen << ", chromosome " << chr.name << ", individual #" << (ind + 1) << ", involving markers";*/
                            /*auto haplo = *hi;*/
                            /*for (size_t i = haplo.first; i < haplo.second; ++i) {*/
                                /*ss << ' ' << chr.raw.marker_name[i];*/
                            /*}*/
                            /*MSG_WARNING(ss.str());*/
693
694
695
                        } else {
                            mat.col(c) /= sum;
                        }
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
                    }
                }
            }
        }
    }

    MatrixXd operator () (
            std::vector<std::string>::const_iterator begin,
            std::vector<std::string>::const_iterator end,
            const std::string& gen, size_t ind) const
    {
        const auto& milv = data_by_marker.find(gen)->second;
        size_t n_states = milv.find(*begin)->second.front().size();
        MatrixXd LV(n_states, end - begin);
        for (size_t i = 0; i < n_states; ++i) {
            LV.col(i) = milv.find(*begin)->second[ind];
        }
        return LV;
    }

716
717
718
719
720
721
722
723
724
725
    MatrixXd& operator () (const std::string& chr, const std::string& gen, size_t ind)
    {
        return data[chr][gen][ind];
    }

    const MatrixXd& operator () (const std::string& chr, const std::string& gen, size_t ind) const
    {
        return data.find(chr)->second.find(gen)->second[ind];
    }

726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
    std::map<std::string, std::vector<MatrixXd>>
        extract(const std::string& gen, const std::vector<size_t> ind_vec) const
        {
            std::map<std::string, std::vector<MatrixXd>> ret;
            for (const auto& chr_gen_lv_vec: data) {
                const std::string& chr = chr_gen_lv_vec.first;
                const auto& lv_vec = chr_gen_lv_vec.second.find(gen)->second;
                auto& ret_lv_vec = ret[chr];
                ret_lv_vec.reserve(ind_vec.size());
                for (size_t i: ind_vec) {
                    ret_lv_vec.push_back(lv_vec[i]);
                }
            }
            return ret;
        }

742
743
744
745
746
747
748
749
750
751
752
753
754
755
    template <typename STREAM_TYPE>
        void
        file_io(STREAM_TYPE& fs)
        {
            rw_base rw;
            if (rw.fourcc(fs, "SMLV")) { return; }
            rw(fs, data_by_marker);
            rw(fs, data);
        }

    static
        LV_database
        load_from(const std::string& filename)
        {
756
            ifile ifs(filename);
757
758
759
760
761
762
763
764
            LV_database ret;
            ret.file_io(ifs);
            return ret;
        }

    void
        save_to(const std::string& filename)
        {
765
            ofile ofs(filename);
766
767
768
769
            file_io(ofs);
        }

    /*
770
    static
771
        bool lv_check_fourcc(ifile& ifs, const char* fourcc)
772
773
774
775
776
777
778
779
780
781
782
        {
            if (check_fourcc(ifs, fourcc)) {
                MSG_ERROR("FILE IS NOT A LOCUS VECTOR FILE OR IS CORRUPTED", "You may want to run spell-marker again.");
                return true;
            }
            return false;
        }

    static
        LV_database load_from(const std::string& filename)
        {
783
            ifile ifs(filename);
784
785
786
787
            return load_from(ifs);
        }

    static
788
        LV_database load_from(ifile& ifs)
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
        {
            LV_database LV;

            if (lv_check_fourcc(ifs, "SMLV")) { return {}; }
            size_t n_chrom = read_size(ifs);
            size_t n_gen = read_size(ifs);
            for (size_t c = 0; c < n_chrom; ++c) {
                if (lv_check_fourcc(ifs, "SCHR")) { return {}; }
                std::string chr_name = read_str(ifs);
                for (size_t g = 0; g < n_gen; ++g) {
                    if (lv_check_fourcc(ifs, "SGEN")) { return {}; }
                    std::string gen_name = read_str(ifs);
                    size_t n_ind = read_size(ifs);
                    LV.data[chr_name][gen_name].resize(n_ind);
                    for (size_t i = 0; i < n_ind; ++i) {
                        if (lv_check_fourcc(ifs, "SLV_")) { return {}; }
                        read_matrix(ifs, LV.data[chr_name][gen_name][i]);
                    }
                }
            }
            return LV;
        }

    void save_to(const std::string& filename)
    {
814
        ofile ofs(filename);
815
816
817
        save_to(ofs);
    }

818
    void save_to(ofile& ofs)
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
    {
        write_fourcc(ofs, "SMLV");
        write_size(ofs, data.size());
        write_size(ofs, data.begin()->second.size());
        for (const auto& chr_gen_vec_lv: data) {
            write_fourcc(ofs, "SCHR");
            write_str(ofs, chr_gen_vec_lv.first);
            for (const auto& gen_vec_lv: chr_gen_vec_lv.second) {
                write_fourcc(ofs, "SGEN");
                write_str(ofs, gen_vec_lv.first);
                write_size(ofs, gen_vec_lv.second.size());
                for (const auto& lv: gen_vec_lv.second) {
                    write_fourcc(ofs, "SLV_");
                    write_matrix(ofs, lv);
                }
            }
        }
    }
837
    */
838
839
840
841
842

    friend
        std::ostream& operator << (std::ostream& os, const LV_database& LV)
        {
            for (const auto& chr_gen_vec_lv: LV.data) {
843
                os << "CHROMOSOME " << chr_gen_vec_lv.first << std::endl;
844
                for (const auto& gen_vec_lv: chr_gen_vec_lv.second) {
845
                    os << "* generation " << gen_vec_lv.first << std::endl;
846
847
                    size_t i = 0;
                    for (const auto& lv: gen_vec_lv.second) {
848
                        os << "  #" << i << std::endl;
849
                        ++i;
850
                        os << lv << std::endl;
851
852
853
854
855
856
857
858
859
860
861
                    }
                }
            }
            return os;
        }
};


/** **/


862
863
864
865
866
867
868
869
870
871
872
873
874
template <typename V>
std::vector<V>
filter_vector(const std::vector<V>& vec, const std::vector<size_t>& list)
{
    std::vector<V> ret;
    ret.reserve(list.size());
    for (size_t i: list) {
        ret.push_back(vec[i]);
    }
    return ret;
}


875
876
struct qtl_pop_type {
    std::string name;
Damien Leroux's avatar
Damien Leroux committed
877
    std::string qtl_generation_name;
878
    std::vector<size_t> indices;
879
    std::shared_ptr<geno_matrix> gen;
880
881
882
    std::map<std::string, std::vector<MatrixXd>> LV;
    std::string observed_traits_filename;
    std::vector<trait> observed_traits;
883
    std::map<char, std::string> ancestor_names;
884
885
886
887
888

    qtl_pop_type()
        : name(), qtl_generation_name(), indices(), gen(), LV(), observed_traits_filename(), observed_traits()
    {}
    qtl_pop_type(const std::string& n, const std::string& qgn, const std::vector<size_t>& i, std::shared_ptr<geno_matrix> g,
889
890
891
892
893
894
895
896
                 const std::map<std::string, std::vector<MatrixXd>>& lv, const std::string& otf, const std::vector<trait>& ot, const std::map<char, std::string>& anam)
        : name(n), qtl_generation_name(qgn), indices(i), gen(g), LV(lv), observed_traits_filename(otf), observed_traits(ot), ancestor_names(anam)
    {
        /*MSG_DEBUG("new qtl_pop with " << ancestor_names.size() << " ancestor names");*/
        /*for (const auto& kv: ancestor_names) {*/
            /*MSG_DEBUG(" * " << kv.first << ": " << kv.second);*/
        /*}*/
    }
897
898
899
900
901
902
903
904
905
906

    std::shared_ptr<qtl_pop_type>
        filter_clone(const trait& this_trait) const
        {
            const std::vector<size_t>& keep = this_trait.good_indices;
            std::shared_ptr<qtl_pop_type> ret = std::make_shared<qtl_pop_type>();
            ret->name = name;
            ret->qtl_generation_name = qtl_generation_name;
            ret->gen = gen;
            ret->observed_traits_filename = observed_traits_filename;
907
908
909
910
911
            for (const auto& t: observed_traits) {
                if (t.name == this_trait.name) {
                    ret->observed_traits.emplace_back(t);
                }
            }
912
            ret->ancestor_names = ancestor_names;
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
            auto i = indices.begin();
            auto j = indices.end();
            auto filt = keep.begin();
            auto filt_end = keep.end();

            /*MSG_DEBUG("Filtering <" << indices << "> with <" << keep << '>');*/
            std::vector<size_t> keep_indices;
            keep_indices.reserve(keep.size());
            while (filt != filt_end && i != j) {
                if (*i == *filt) {
                    keep_indices.push_back(i - indices.begin());
                    ++i;
                    ++filt;
                } else if (*i < *filt) {
                    ++i;
                } else {
                    ++filt;
                }
            }

            ret->indices = filter_vector(indices, keep_indices);
934
            /*MSG_DEBUG("Filtered indices " << ret->indices << " (" << ret->indices.size() << ')');*/
935
            for (const auto& kv: LV) {
936
                /*MSG_DEBUG("Filtering LV on " << kv.first << ", keeping " << keep_indices.size() << " elements");*/
937
938
                ret->LV[kv.first] = filter_vector(kv.second, keep_indices);
            }
939
940
941
942
943
944
945
946
947
            /*MSG_DEBUG("Resulting LV (" << ret->LV.size() << ')');*/
            /*for (const auto& lv: ret->LV) {*/
                /*MSG_DEBUG("** " << lv.first << " (" << lv.second.size() << ')');*/
                /*auto ri = ret->indices.begin();*/
                /*for (const auto& v: lv.second) {*/
                    /*MSG_DEBUG("* " << (*ri++));*/
                    /*MSG_DEBUG("" << v);*/
                /*}*/
            /*}*/
948
949
950

            return ret;
        }
951
952
953

    size_t size() const
    {
954
955
        /*return observed_traits.front().values.size();*/
        return indices.size();
956
957
958
959
960
961
962
963
964
    }

    const MatrixXd& get_LV(const std::string& chr, size_t i) const { return LV.find(chr)->second[i]; }
};


/** **/


965
966
967
968
969
970
struct pop_data_type {
    std::string name;
    std::map<std::string, std::string> marker_observation_filenames;
    std::string genetic_map_filename;
    std::string pedigree_filename;
    std::string qtl_generation_name;
971
    std::vector<std::shared_ptr<geno_matrix>> generations;
972
    LV_database LV;
973
974
    std::map<std::string, std::vector<size_t>> families;
    std::map<size_t, std::shared_ptr<geno_matrix>> gen_by_id;
975
    std::map<char, std::string> ancestor_names;
976
977
978
979
980
981
982
983
984
985
986
987

    void
        assemble_chromosomes(const std::vector<chromosome>& map)
        {
            LV.assemble_using_map(map);
        }

    void
        set_qtl_generation(const std::string& name)
        {
            qtl_generation_name = name;
        }
988
989
990
991
992
993
994
995
996
997
998
999
1000

    std::string save()
    {
        static const char* forbidden = ":?*/\\";
        std::stringstream filename;
        for (char c: name) {
            if (strchr(forbidden, c)) {
                filename << '_';
            } else {
                filename << c;
            }
        }
        filename << ".popdata";
For faster browsing, not all history is shown. View entire blame