AltEst.cc 29.3 KB
Newer Older
Philippe Bardou's avatar
Philippe Bardou committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
// ------------------------------------------------------------------
// Copyright (C) 2005 INRA <eugene@ossau.toulouse.inra.fr>
//
// This program is open source; you can redistribute it and/or modify
// it under the terms of the Artistic License (see LICENSE file).
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
//
// You should have received a copy of Artistic License along with
// this program; if not, please see http://www.opensource.org
//
// $Id$
// ------------------------------------------------------------------
// File:     AltEst.cc
// Contents: class Alternative Est
// ------------------------------------------------------------------

#include "AltEst.h"
21
#include <algorithm>
22
#include "./Hits.h"
23
extern Parameters   PAR;
Philippe Bardou's avatar
Philippe Bardou committed
24
25
26
27
28
29
30
31
32

/*************************************************************
 **                      OneAltEst
 *************************************************************/
// ----------------------
//  Default constructor.
// ----------------------
OneAltEst :: OneAltEst()
{
33
    id[0] = 0;
34
    strand = 0;
35
36
    start = end = index = exonsNumber = 0;
    totalLength = altSplicingEvidence = 0;
sallet's avatar
sallet committed
37
    toRemove = false;
Philippe Bardou's avatar
Philippe Bardou committed
38
39
}

40
41

OneAltEst :: OneAltEst(char *ID, int i, int j, char s)
Philippe Bardou's avatar
Philippe Bardou committed
42
{
43
44
45
    strcpy(id, ID);
    this->start               = i;
    this->index               = -1;
46
    this->strand              = s;
47
48
    this->exonsNumber         = 0;
    this->altSplicingEvidence = 0;
sallet's avatar
sallet committed
49
    this->toRemove = false;
50
51
    this->totalLength         = 0;
    this->AddExon(i, j);
Philippe Bardou's avatar
Philippe Bardou committed
52
53
}

54

Philippe Bardou's avatar
Philippe Bardou committed
55
56
57
58
59
60
61
62
63
64
65
66
// ----------------------
//  Default destructor.
// ----------------------
OneAltEst :: ~OneAltEst ()
{
}

// ------------------------
//  ResetEst
// ------------------------
void OneAltEst :: Reset ()
{
67
68
    this->start               = -1;
    this->end                 = -1;
69
    this->strand              = 0;
70
71
72
    this->index               = -1;
    this->exonsNumber         = 0;
    this->altSplicingEvidence = 0;
sallet's avatar
sallet committed
73
    this->toRemove            = false;
74
75
76
    this->totalLength         = 0;
    vi_ExonStart.clear();
    vi_ExonEnd.clear();
Philippe Bardou's avatar
Philippe Bardou committed
77
78
79
80
81
82
83
}

// ----------------------
//  Push an exon
// ----------------------
void OneAltEst :: AddExon(int i, int j)
{
84
85
86
87
88
    vi_ExonStart.push_back(i);
    vi_ExonEnd.push_back(j);
    this->end = j;
    this->exonsNumber++;
    this->totalLength += (j-i+1);
Philippe Bardou's avatar
Philippe Bardou committed
89
90
91
92
93
94
95
}

// ----------------------
//  Remove an exon
// ----------------------
void OneAltEst :: RemoveExon(int i)
{
96
97
98
99
100
101
    totalLength -= (vi_ExonEnd[i] - vi_ExonStart[i] + 1);
    vi_ExonStart.erase(vi_ExonStart.begin() + i);
    vi_ExonEnd.erase(vi_ExonEnd.begin() + i);
    exonsNumber--;
    if ((i==0) || (i==exonsNumber))
        UpdateBoundaries();
Philippe Bardou's avatar
Philippe Bardou committed
102
103
104
105
}

// ------------------------------------------
//  ExtremitiesTrim
106
107
//   To avoid alignment errors, clean the est
//   by "trimming" the extremities
Philippe Bardou's avatar
Philippe Bardou committed
108
109
110
111
//   like a pac-man or an exonuclease...
// ------------------------------------------
void OneAltEst :: ExtremitiesTrim(int exonuclen)
{
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
    if (exonuclen == 0) return;
    // shortening first exon
    //print();
    if (vi_ExonEnd[0] - vi_ExonStart[0] > exonuclen)
    {
        vi_ExonStart[0] += exonuclen;
        totalLength     -= exonuclen;
    }
    else   // tiny first exon, it has to be removed
    {
        //fprintf(stdout,"\n1 %s %d %d\n", id, vi_ExonStart[0], vi_ExonEnd[0]);
        if (exonsNumber>1) // security test
            RemoveExon(0);
    }

    // same shortening for the last exon
    if (vi_ExonEnd[exonsNumber-1]-vi_ExonStart[exonsNumber-1] > exonuclen)
    {
        vi_ExonEnd[exonsNumber-1] -= exonuclen;
        totalLength               -= exonuclen;
    }
    else   // tiny last exon, it has to be removed
    {
        //fprintf(stdout,"2 %s\n", id);
        if (exonsNumber>1) // security test
            RemoveExon(exonsNumber-1);
    }
    UpdateBoundaries();
    //Print();
Philippe Bardou's avatar
Philippe Bardou committed
141
142
143
144
145
146
147
148
149
}

// -------------------------------------------
//  IsFiltered
//   Return 0 if no filtered
//          1 if filtered because of unspliced
//          2 if filtered because of exon len
// -------------------------------------------
int OneAltEst :: IsFiltered(bool unspliced, bool extremelen, bool verbose,
150
151
                            int  minIn,     int  maxIn,      int  maxEx, int minEx,
                            int  minEstLen, int  maxEstLen)
Philippe Bardou's avatar
Philippe Bardou committed
152
{
153
154
155
156
157
    // remove if unspliced
    if ((unspliced) && (exonsNumber == 1))
    {
        if (verbose) fprintf(stderr,"\n%s removed (unspliced) ...", id);
        return 1;
Philippe Bardou's avatar
Philippe Bardou committed
158
    }
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
    if (extremelen)
    {
        // remove if est too short or too long (after trimming!)
        if (totalLength < minEstLen)
        {
            if (verbose) fprintf(stderr,"\n%s removed (too short) ...", id);
            return 2;
        }
        if (totalLength > maxEstLen)
        {
            if (verbose) fprintf(stderr,"\n%s removed (too long) ...", id);
            return 2;
        }
        if ((exonsNumber>1) && (vi_ExonEnd[0] - vi_ExonStart[0]-1 > maxEx))
        {
            if (verbose) fprintf(stderr,"\n%s removed (exon too long) ...", id);
            return 2;
        }

        // remove if one intron or exon is too short or too long,
        for (int j=1; j<exonsNumber;j++)
        {
            if ((vi_ExonStart[j] - vi_ExonEnd[j-1] -1) < minIn)
            {
                if (verbose) fprintf(stderr,"\n%s removed (intron too short) ...", id);
184
                //fprintf(stderr,"j : %d vi_ExonStart[j] %d vi_ExonEnd[j-1] %d", j, vi_ExonStart[j], vi_ExonEnd[j-1]);
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
                return 2;
            }
            if ((vi_ExonStart[j] - vi_ExonEnd[j-1] -1) > maxIn)
            {
                if (verbose) fprintf(stderr,"\n%s removed (intron too long) ...", id);
                return 2;
            }
            if ((vi_ExonEnd[j] - vi_ExonStart[j] -1) > maxEx)
            {
                if (verbose) fprintf(stderr,"\n%s removed (exon too long) ...", id);
                return 2;
            }
            if ((vi_ExonEnd[j] - vi_ExonStart[j] -1 < minEx) && (j<exonsNumber-1))
            {
                // internal exon too short
                if (verbose) fprintf(stderr,"\n%s removed (int. exon too short) ...", id);
                return 2;
            }
        }
Philippe Bardou's avatar
Philippe Bardou committed
204
    }
205
    return 0;
Philippe Bardou's avatar
Philippe Bardou committed
206
207
208
209
210
211
212
213
}

// -----------------------------------------------------
//  Test splice sites identity of 2 Est
//  they have to be sorted: "this" starts before "other"
// -----------------------------------------------------
bool OneAltEst :: IsInconsistentWith(OneAltEst *other)
{
214
215
216
217
218
219
    int i, j = 0, k;
    //printf("%s IsInconsistentWith %s ? ... ",this->GetId(),other->GetId());

    // if this and other are not overlaping they can't be inconsistent
    if (other->start >= this->end) return false;

220
221
222
	// if this and other are overlapping and, not on the same strand, they are inconsistent
    if ( PAR.getI("AltEst.strandSpecific") && (other->GetStrand() != this->strand) ) return true;

223
224
225
226
227
    // i reaches the first exon that ends after the begin of other,
    // that is the first exon overlaping other
    for (k=0; k<this->exonsNumber; k++)
    {
        if (vi_ExonEnd[k] >= other->start) break;
Philippe Bardou's avatar
Philippe Bardou committed
228
    }
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
    // control if other starts in an intron of this
    if (other->start < this->vi_ExonStart[k]) return true;

    for (i=k; i<this->exonsNumber; i++)
    {
        // for each exon i of this (other starts at exon j=0, because this is before other)
        if ((i < (this->exonsNumber-1)) && (j < (other->exonsNumber-1)))
        {
            // they have each at least one remaining exon
            if ( (this->vi_ExonEnd[i]     != other->vi_ExonEnd[j]) ||
                    (this->vi_ExonStart[i+1] != other->vi_ExonStart[j+1]))
                return true; // a difference in the pairs donor - acceptor
            // same coordinates :
            j++;
            continue; // OK, increment the next exon of this (loop "for" upper)
        }
        else   // at least one est is ending
        {
            if (i == this->exonsNumber-1)   // end of this
            {
                if ((other->vi_ExonEnd[j] >= this->end) || (j == other->exonsNumber-1)) return false;
                // current exon of other ends after the last exon of this, so there can't be any inconsistency
                // if not, it has to be the last exon of other
                return true;
                // an intron begin in other while i is still in its last exon
            }
            else   // only other is in its last exon
            {
                if (this->vi_ExonEnd[i] >= other->end) return false;
                // idem than upper, the last exon of other ends in an exon of this,
                // so no inconsistency is possible.
                // If not, the last exon of other continue in an intron of this
                return true;
            }
        }
Philippe Bardou's avatar
Philippe Bardou committed
264
    }
265
266
267
    // We're not supposed to reach this line!...
    fprintf(stderr, "\n\n INTERNAL ERROR in AlternativeEst::IsInconsistentWith\n");
    return false; // just to avoid a compilation warning
Philippe Bardou's avatar
Philippe Bardou committed
268
269
}

270
271
272
273
// -----------------------------------------------------------------
// Check compatibility of a prediction with an AltEst
// -----------------------------------------------------------------
bool OneAltEst :: CompatibleWith(Prediction *pred)
274
275
276
277
278
{
    int idxf, idxe=0;
    Gene *g;

    //locate gene
279
280
281
282
283
    if (PAR.getI("AltEst.strandSpecific"))
    	g = pred->FindGene(start,end, strand);
    else
    	g = pred->FindGene(start,end);
    
284
285
286
287
288
289
290
291
292
293
294
295
    // if no such gene then it is incompatible.
    if (g == NULL) return false;

    // check first exon start is in transcribed matured region
    int  nbFeature = 0;
    nbFeature=g->nbFea();

    for (idxf = 0; idxf < nbFeature; idxf++)
    {
        if ((g->vFea[idxf]->start-1 <= vi_ExonStart[idxe]) &&
                (g->vFea[idxf]->end-1   >= vi_ExonStart[idxe]))
        {
296
	    if (! g->vFea[idxf]->IsTranscribedAndUnspliced() )
297
298
299
300
301
302
303
304
305
306
307
308
                return false;
            else break;
        }
    }
    //   if (idxf == g->nbFea()) return false;
    idxf++;
    bool firstOk = false;
    // test des fronti�res suivantes
    while ((idxe < exonsNumber-1) && (idxf < nbFeature))
    {
        if (!firstOk && (g->vFea[idxf]->start-1 == vi_ExonEnd[idxe]+1))
            firstOk = true;
309
	if (!firstOk && ( ! g->vFea[idxf]->IsTranscribedAndUnspliced()) ) // IG or intron: broken
310
311
            return false;

312
313
        if (firstOk && (g->vFea[idxf]->end == vi_ExonStart[idxe+1]))
            if ( !g->vFea[idxf]->IsTranscribedAndUnspliced() ) // IF or intron
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
            {
                idxe++;
                idxf++;
                firstOk = false;
                continue;
            }
            else return false;
        idxf++;
    }

    // test de la fin
    for (; idxf < nbFeature; idxf++)
    {
        if ((g->vFea[idxf]->start-1 <= vi_ExonEnd[idxe]) &&
                (g->vFea[idxf]->end-1   >= vi_ExonEnd[idxe]))
329
	      return  (g->vFea[idxf]->IsTranscribedAndUnspliced());
330
    }
331
    return false;
332
}
333

334
Gene* OneAltEst :: GetUncompatibleGene(Prediction *pred, int margin)
335
336
337
{
    int idxf, idxe=0;
    Gene *g;
338
    bool VERBOSE = false;
339

sallet's avatar
sallet committed
340
341
342
343
344
    if (VERBOSE)
    {
        fprintf(stdout, "Recherche un gène incompatible pour l'AltEst suivante :\n ") ;
        this->Print();
    }
345
346
347
348
349
350
351
352
353
    //locate overlapping gene
    if (PAR.getI("AltEst.strandSpecific"))
    	g = pred->FindGene(start,end, strand);
    else
    	g = pred->FindGene(start,end);
    
    // no overlapping gene
    if (g == NULL) 
    {
sallet's avatar
sallet committed
354
        if (VERBOSE) fprintf(stdout, "Pas de gène chevauchat l'altEst (1) ! \n ") ;
355
356
357
358
        return NULL;
    }
    else 
    {
sallet's avatar
sallet committed
359
        // study g to be sure it is incompatible
360
361
362
363
364
365
366
367
        // check first exon start is in transcribed matured region
        int  nbFeature = g->nbFea();

        for (idxf = 0; idxf < nbFeature; idxf++)
        {
            if ((g->vFea[idxf]->start-1 <= vi_ExonStart[idxe]) &&
                (g->vFea[idxf]->end-1   >= vi_ExonStart[idxe]))
            {
368
                if (! g->vFea[idxf]->IsTranscribedAndUnspliced() && 
sallet's avatar
sallet committed
369
                     abs((g->vFea[idxf]->end-1) - vi_ExonStart[idxe]) >= margin)
370
                {
sallet's avatar
sallet committed
371
372
373
374
                    if (VERBOSE) { 
                        fprintf(stdout, "Gene incompatible trouve : EST debute dans intron ! \n ") ;
                        g->Print(); 
                    }
375
376
377
378
379
380
381
382
383
384
385
386
                    return g;
                }
                else break;
            }
        }
    

        idxf++;
        bool firstOk = false;
        // test des frontieres suivantes
        while ((idxe < exonsNumber-1) && (idxf < nbFeature))
        {
387
            if (!firstOk && (abs ((vi_ExonEnd[idxe]+1) - (g->vFea[idxf]->start-1)) <= margin)) 
388
                firstOk = true;
sallet's avatar
sallet committed
389
            // Test if the left border of an intron of the gen is incoherent with the EST
390
391
            if (!firstOk && ( ! g->vFea[idxf]->IsTranscribedAndUnspliced()) ) // IG or intron: broken
            {
sallet's avatar
sallet committed
392
393
394
395
                if (VERBOSE) { 
                        fprintf(stdout, "Gene incompatible trouve: AltEst incoherente avec un intron du gene de reference (left position) ! \n ") ;
                        g->Print(); 
                    }
396
397
                return g;
            }
sallet's avatar
sallet committed
398
399
400
401
402
403
404
405
406
407
408
409
            
            // Test if the right border of an intron of the gene is incoehrent with the EST
            if ( (abs(vi_ExonStart[idxe+1] - g->vFea[idxf]->end) > margin) && 
                !g->vFea[idxf]->IsTranscribedAndUnspliced() ) // IF or intron
            {
                if (VERBOSE) { 
                        fprintf(stdout, "Gene incompatible trouve: AltEst incoherente avec un intron du gene de reference (right position) ! \n ") ;
                        g->Print(); 
                }
                return g;
            }
            
410
            if (firstOk && (abs(vi_ExonStart[idxe+1] - g->vFea[idxf]->end) <= margin)) 
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
            {
                if ( !g->vFea[idxf]->IsTranscribedAndUnspliced() ) // IF or intron
                {
                    idxe++;
                    idxf++;
                    firstOk = false;
                    continue;
                }
            }
            idxf++;
        }

        // test de la fin
        for (; idxf < nbFeature; idxf++)
        {
            if ((g->vFea[idxf]->start-1 <= vi_ExonEnd[idxe]) &&
                (g->vFea[idxf]->end-1   >= vi_ExonEnd[idxe]))
            {
                //return  (g->vFea[idxf]->IsTranscribedAndUnspliced());
sallet's avatar
sallet committed
430
                if (!g->vFea[idxf]->IsTranscribedAndUnspliced() && abs( vi_ExonEnd[idxe] - (g->vFea[idxf]->start-1)) >= margin )
431
                {
sallet's avatar
sallet committed
432
433
434
435
                    if (VERBOSE) { 
                        fprintf(stdout, "Gene incompatible trouve (4) ! \n ") ;
                        g->Print(); 
                    }
436
437
438
439
                    return g;
                }
                else 
                {
sallet's avatar
sallet committed
440
441
442
                    if (VERBOSE) { 
                        fprintf(stdout, "Pas de gene incompatible ! \n ") ;
                    }
443
444
445
446
447
448
449
                    return NULL;
                }
            }
        }
    }
    
    //return false;
450
451
452
453
454
    if (VERBOSE) {
        fprintf(stdout, "je suis a la fin. Je retourne le gene trouve..\n");
    }
    
    return g;
455
456
}

457
458
459
460
461
// --------------------------------------
//  Penalize according to the EST
// --------------------------------------
void OneAltEst :: Penalize(int pos, DATA *Data, double altPenalty)
{
462
463
    int strandSpecific = PAR.getI("AltEst.strandSpecific");

464
    if ((pos < start) | (pos > end)) return;
465

466
    int idx, inExon = 1;
467

468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
    for (idx =0; idx < exonsNumber-1; idx++)
    {
        if ((vi_ExonStart[idx] <= pos) &&
                (vi_ExonEnd[idx] >= pos))
        {
            inExon = 1;
            break;
        }

        if ((vi_ExonEnd[idx] < pos) &&
                (vi_ExonStart[idx+1] > pos))
        {
            inExon = 0;
            break;
        }
483
    }
484

485
    if (inExon == 0) // in intron
486
    {
487
        // penalize UTR, UIR RNA regions and coding region
488
489
490
491
        Data->contents[DATA::UTR5F] -= altPenalty;
        Data->contents[DATA::UTR3F] -= altPenalty;
        Data->contents[DATA::UTR5R] -= altPenalty;
        Data->contents[DATA::UTR3R] -= altPenalty;
492
493
494
495
        Data->contents[DATA::UIRF]  -= altPenalty;
        Data->contents[DATA::UIRR]  -= altPenalty;
        Data->contents[DATA::RNAF]  -= altPenalty;
        Data->contents[DATA::RNAR]  -= altPenalty;
496
497
        for (int i=0; i<6; i++)
            Data->contents[i] -= altPenalty;
498
499
500
501
502
503
504
505
506
507
        if (strandSpecific && strand == '+')
        {
        	Data->contents[DATA::IntronR]    -= altPenalty;
        	Data->contents[DATA::IntronUTRR] -= altPenalty;
        }
        else if (strandSpecific && strand == '-')
        {
        	Data->contents[DATA::IntronF]    -= altPenalty;
        	Data->contents[DATA::IntronUTRF] -= altPenalty;
       }
508
    }
509
    else // exon
510
    {
511
        Data->contents[DATA::IntronF]    -= altPenalty;
512
        Data->contents[DATA::IntronUTRF] -= altPenalty;
513
        Data->contents[DATA::IntronR]    -= altPenalty;
514
        Data->contents[DATA::IntronUTRR] -= altPenalty;
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533

        if (strandSpecific && strand == '+') // penalize utr and coding on strand - 
        {
        	Data->contents[DATA::UTR5R] -= altPenalty;
        	Data->contents[DATA::UTR3R] -= altPenalty;
        	Data->contents[DATA::UIRR]  -= altPenalty;
        	Data->contents[DATA::RNAR]  -= altPenalty;
        	for (int i=3; i<6; i++)
            	Data->contents[i] -= altPenalty;
        }
        else if (strandSpecific && strand == '-') // penalize utr and coding on strand + 
        {
        	Data->contents[DATA::UTR5F] -= altPenalty;
        	Data->contents[DATA::UTR3F] -= altPenalty;
        	Data->contents[DATA::UIRF]  -= altPenalty;
        	Data->contents[DATA::RNAF]  -= altPenalty;
        	for (int i=0; i<3; i++)
            	Data->contents[i] -= altPenalty;
        }	
534
    }
535
    Data->contents[DATA::InterG] -= altPenalty;
536
537
538

}

Philippe Bardou's avatar
Philippe Bardou committed
539
540
541
542
543
// --------------------------------------
//  Print the OneAltEst (nuc coordinates)
// --------------------------------------
void OneAltEst :: Print()
{
544
545
546
547
548
549
    fprintf(stdout," %s:", id);
    for (int i=0; i<(int)vi_ExonStart.size(); i++)
    {
        fprintf(stdout,"\t%d - %d", vi_ExonStart[i]+1, vi_ExonEnd[i]+1);
    }
    fprintf(stdout,"\n");
Philippe Bardou's avatar
Philippe Bardou committed
550
551
}

552
553
554
555
bool StartAtLeft( OneAltEst A,  OneAltEst B)
{
    return (A.GetStart() < B.GetStart());
};
556
557
558
559
560
561
562
563
564
bool StartAtLeftElseEndAtLeft ( OneAltEst A,  OneAltEst B)
{
    return ( (A.GetStart() < B.GetStart()) ? true : (A.GetStart() ==  B.GetStart() && A.GetEnd() <= B.GetEnd()));
};
bool StartAtLeftElseEndAtRight ( OneAltEst A,  OneAltEst B)
{
    return ( (A.GetStart() < B.GetStart()) ? true : (A.GetStart() ==  B.GetStart() && A.GetEnd() > B.GetEnd()));
};

565
566
567
568
bool EndAtRight( OneAltEst A,  OneAltEst B)
{
    return (A.GetEnd() > B.GetEnd());
};
Philippe Bardou's avatar
Philippe Bardou committed
569
570
571
572
573
574
/*************************************************************
 **                        AltEst
 *************************************************************/
// ----------------------
//  Default constructor.
// ----------------------
575
AltEst :: AltEst(DNASeq *X)
Philippe Bardou's avatar
Philippe Bardou committed
576
{
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
    int i,nbIncomp,nbNoevidence,nbIncluded,nbUnspliced, nbExtremLen;
    char tempname[FILENAME_MAX+1];
    i=nbIncomp=nbNoevidence=nbIncluded=nbUnspliced=nbExtremLen=0;

    altPenalty = PAR.getD("AltEst.Penalty");
    includedEstFilter= PAR.getI("AltEst.includedEstFilter");
    compatibleEstFilter= PAR.getI("AltEst.compatibleEstFilter");
    unsplicedEstFilter= PAR.getI("AltEst.unsplicedEstFilter");
    extremeLengthFilter= PAR.getI("AltEst.extremeLengthFilter");
    maxEstLength= PAR.getI("AltEst.maxEstLength");
    minEstLength= PAR.getI("AltEst.minEstLength");
    maxIn= PAR.getI("AltEst.maxIn");
    minIn= PAR.getI("AltEst.minIn");
    maxEx= PAR.getI("AltEst.maxEx");
    minEx= PAR.getI("AltEst.minEx");
    exonucleasicLength= PAR.getI("AltEst.exonucleasicLength");
    altEstDisplay= PAR.getI("AltEst.altEstDisplay");
    verbose= PAR.getI("AltEst.verbose");
    totalAltEstNumber = 0;
596
    keptAltEstNumber  = 0;
597

598
    std::string inputFormat = to_string(PAR.getC("AltEst.format",0,1));	
599
600
    strcpy(tempname, PAR.getC("fstname"));
    strcat(tempname, ".alt.est");
601
602
603
604
605
606
607
608
609
610
611
    if ( inputFormat == "GFF3" )
    {
        strcat(tempname,".gff3");
    }

    if (!ProbeFile(NULL,tempname)) 
    {
        fprintf(stderr, "No alt. est file...");
        fflush(stderr);
        return;
    }
612
613
614

    fprintf(stderr, "Reading alt. spl. evidence...");
    fflush(stderr);
615

616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
    int NumEST=0;
    Hits * AllEST= NULL;
    if ( inputFormat == "GFF3" )
    {
        GeneFeatureSet * geneFeatureSet = new GeneFeatureSet (tempname);
        AllEST = AllEST->ReadFromGeneFeatureSet(*geneFeatureSet, &NumEST, -1, 0, X);
        delete geneFeatureSet;
    }
    else
    {
        FILE* fEST = FileOpen(NULL, tempname, "r", PAR.getI("EuGene.sloppy"));
        if (fEST)
        {
            //ReadFromFile (EstFile  EstNumber  Level  Margin)
            AllEST = AllEST->ReadFromFile(fEST, &NumEST, -1, 0,X->SeqLen);
            fclose(fEST);
        }
633
    }
634
    i=convertHitsToAltEst(AllEST,nbUnspliced,nbExtremLen,NumEST);
635

636
637
638
639
640

    fprintf(stderr, " %d read, ",i);
    fflush(stderr);

    // sort all Est by their begin coordinates
641
642
    sort(voae_AltEst.begin(), voae_AltEst.end(), StartAtLeftElseEndAtRight);
	//sort(voae_AltEst.begin(), voae_AltEst.end(), StartAtLeft);
643

644
645
646
647
648
    Compare(nbIncomp, nbNoevidence, nbIncluded);
    fprintf(stderr,"%d removed (%d incl., %d unsp., %d no alt.spl., %d len.), %d inc. pairs, ",
            nbIncluded+nbNoevidence+nbUnspliced+nbExtremLen,
            nbIncluded,nbUnspliced,nbNoevidence,nbExtremLen,nbIncomp);

649
    fprintf(stderr, "%d kept ...",keptAltEstNumber);
650
651
652
653
654
655
656
657
658
659
660
661
662
    fprintf(stderr, " done\n");
    fflush(stderr);

    // and set indexes
    for (i=0; i<totalAltEstNumber; i++)
    {
        voae_AltEst[i].PutIndex(i);
        // So, with the special AltEst "Init",
        // indexes are the same than the DAG[] indexes
        if (altEstDisplay) voae_AltEst[i].Print();
    }

    nextAdd = nextRemove = (totalAltEstNumber > 1);
663

Philippe Bardou's avatar
Philippe Bardou committed
664
665
666
667
668
669
670
671
672
}

// ----------------------
//  Default destructor.
// ----------------------
AltEst :: ~AltEst ()
{
}

673
674
675
676
677
// ----------------------
//  Read from Hits class
// ----------------------
int AltEst :: convertHitsToAltEst (Hits * AllEST, int &nbUnspliced, int &nbExtremLen, int &NumEST)
{
678
679
680
681
682
683
684
685
    int  read, deb, fin, poids, brin, EstDeb, EstFin, filtertype, totalread;
    OneAltEst oaetmp;
    bool first = 1;
    totalread = 0;
    int i;
    Hits *ThisEST = NULL;
    Block *ThisBlock = NULL;
    for (i = 0, ThisEST = AllEST; i < NumEST; i++, ThisEST = ThisEST->Next)
686
    {
687
688
689
690
691
692

        ThisBlock = ThisEST->Match;
        while (ThisBlock)
        {
            if (ThisBlock == ThisEST->Match) //si premier
            {
693
                oaetmp = OneAltEst(ThisEST->getName(), ThisBlock->Start, ThisBlock->End, ThisEST->Strand); 
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
            }
            else
            {
                oaetmp.AddExon(ThisBlock->Start, ThisBlock->End);
            }
            ThisBlock = ThisBlock->Next;
        }
        oaetmp.ExtremitiesTrim(exonucleasicLength);
        filtertype = oaetmp.IsFiltered(unsplicedEstFilter, extremeLengthFilter, verbose,
                                       minIn, maxIn, maxEx, minEx,
                                       minEstLength, maxEstLength);
        if (filtertype == 0)
        {
            voae_AltEst.push_back(oaetmp);
            totalAltEstNumber++;
        }
        else
        {
            if (filtertype == 1) nbUnspliced++;
            if (filtertype == 2) nbExtremLen++;
        }
        oaetmp.Reset();
        totalread++;
717
    }
718
719

    return totalread;
720
}
Philippe Bardou's avatar
Philippe Bardou committed
721
722
// ----------------------
//  Read .alt file.
723
// Note: never used
Philippe Bardou's avatar
Philippe Bardou committed
724
725
726
// ----------------------
int AltEst :: ReadAltFile (char name[FILENAME_MAX+1], int &nbUnspliced, int &nbExtremLen)
{
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
    int  read, deb, fin, poids, brin, EstDeb, EstFin, filtertype, totalread;
    int  PEstFin;
    char *EstId, *PEstId, *tmp;
    char A[128], B[128];
    FILE *fp;
    OneAltEst oaetmp;
    bool first = 1;

    A[0]   = B[0] = 0;
    EstId  = A;
    PEstId = B;
    PEstFin = -1;

    fp = FileOpen(NULL, name, "r");
    totalread = 0;

    while ((read=fscanf(fp,"%d %d %d %*s %d %s %d %d\n",
                        &deb, &fin, &poids, &brin, EstId, &EstDeb, &EstFin)) == 7)
    {

        if ((strcmp(EstId, PEstId) == 0) && (EstDeb > PEstFin))
        {
            //voae_AltEst[totalAltEstNumber-1].AddExon(deb-1,fin-1);
            oaetmp.AddExon(deb-1, fin-1);
        }
        else
        {
            totalread++;
            if (first) first = 0;
            else
            {
                oaetmp.ExtremitiesTrim(exonucleasicLength);
                filtertype = oaetmp.IsFiltered(unsplicedEstFilter, extremeLengthFilter, verbose,
                                               minIn, maxIn, maxEx, minEx,
                                               minEstLength, maxEstLength);
                if (filtertype == 0)
                {
                    voae_AltEst.push_back(oaetmp);
                    totalAltEstNumber++;
                }
                else
                {
                    if (filtertype == 1) nbUnspliced++;
                    if (filtertype == 2) nbExtremLen++;
                }
                oaetmp.Reset();
            }
774
            oaetmp = OneAltEst(EstId, deb-1, fin-1, brin);
775
776
777
778
779
            tmp    = PEstId;
            PEstId = EstId;
            EstId  = tmp;
        }
        PEstFin = EstFin;
Philippe Bardou's avatar
Philippe Bardou committed
780
    }
781
782
783
784
785
786
787
788
789
790
791
792
793
794
    // last est
    oaetmp.ExtremitiesTrim(exonucleasicLength);
    filtertype = oaetmp.IsFiltered(unsplicedEstFilter, extremeLengthFilter, verbose,
                                   minIn, maxIn, maxEx, minEx,
                                   minEstLength, maxEstLength);
    if (filtertype == 0)
    {
        voae_AltEst.push_back(oaetmp);
        totalAltEstNumber++;
    }
    else
    {
        if (filtertype == 1) nbUnspliced++;
        if (filtertype == 2) nbExtremLen++;
Philippe Bardou's avatar
Philippe Bardou committed
795
    }
796
797
798
799

    if (read != EOF) fprintf(stderr,"Incorrect ALTEST file !\n");
    fclose(fp);
    return totalread;
Philippe Bardou's avatar
Philippe Bardou committed
800
801
802
}

// -----------------------------------------------------------------
803
// Filters the AltEst:
Philippe Bardou's avatar
Philippe Bardou committed
804
//  Remove Est strictly included in another one,
805
806
//  TODO: Set a list of Est clusters,
//   (a cluster can be seen as a connex component in a graph
Philippe Bardou's avatar
Philippe Bardou committed
807
808
809
810
//   built with est as edges and overlap as vertices ;
//   it allows to reduce the time complexity by restricting the
//   pairwise est-est comparisons to est of a same cluster.)
//  And keep only Est confering an evidence of alternative splicing,
811
//  (that is every EST that is inconsistent with another one).
Philippe Bardou's avatar
Philippe Bardou committed
812
813
814
// -----------------------------------------------------------------
void AltEst :: Compare(int &nbIncomp, int &nbNoevidence, int &nbIncluded)
{
sallet's avatar
sallet committed
815
    int i, j = 0;
816
817
818
819
820
    // Until now, only one cluster is considered (time max)
    // test all pairwise est comparisons in the cluster
    // NxN comparisons could be tested, but it has been reduced to
    // ((NxN)-N)/2 , only one comparison per pair, without the diag. (cf. k)
    // WARNING : voae_AltEst[0] is the special INIT (counted in totalAltEstNumber)
821
822
	
    keptAltEstNumber = totalAltEstNumber;
823
824
825
    int strandSpecific = PAR.getI("AltEst.strandSpecific");	

    if (compatibleEstFilter || includedEstFilter)
826
    {
827
828
        for (i=0; i<totalAltEstNumber; i++)
        {
sallet's avatar
sallet committed
829
830
831
			if (voae_AltEst[i].IsToRemove()) continue;
			
            //fprintf(stderr, "\nEST i %d - %d", voae_AltEst[i].GetStart(), voae_AltEst[i].GetEnd());
832
            // Compare this est with the others to check incompatibility or inclusion
sallet's avatar
sallet committed
833
            for (j=1+i; j<totalAltEstNumber; j++)
834
            {
835
836
                // all the next Est have a higher position  than the current one
                if (voae_AltEst[j].GetStart() >  voae_AltEst[i].GetEnd()) break;
837
                if (voae_AltEst[j].IsToRemove()) continue;
838
                
839
840
                if (voae_AltEst[i].IsInconsistentWith(&voae_AltEst[j]))
                {
sallet's avatar
sallet committed
841
                    if (verbose) fprintf(stderr,"\nincompatibility: %s vs. %s ...", voae_AltEst[j].GetId(), voae_AltEst[i].GetId());
842
843
844
845
                    voae_AltEst[i].PutAltSplE(true);
                    voae_AltEst[j].PutAltSplE(true);
                    nbIncomp++;
                }
846
                else   // consistency
847
848
                {
                    // j strictly included in i
849
                    if (includedEstFilter) 
850
                    {
851
852
853
854
855
856
857
                        if ( (voae_AltEst[j].GetEnd() <= voae_AltEst[i].GetEnd() ) &&
                            ( !strandSpecific || (strandSpecific && (voae_AltEst[j].GetStrand() == voae_AltEst[i].GetStrand()) ) ) )	
                        {
                            if (verbose) fprintf(stderr,"\n%s removed (included in %s) ...", voae_AltEst[j].GetId(), voae_AltEst[i].GetId());
                            voae_AltEst[j].FlagToRemove(true);
                            nbIncluded++;
                        }
858
859
860
861
862
                    }
                }
            }
        }

863
        if (compatibleEstFilter)
864
        {
865
            for (i=0; i<totalAltEstNumber; i++)
866
            {
867
868
869
870
871
872
                if  (! voae_AltEst[i].GetAltSplE())
                {
                    if (verbose) fprintf(stderr,"\n%s removed (no alt.spl. evidence) ...", voae_AltEst[i].GetId());
                    voae_AltEst[i].FlagToRemove(true);
                    nbNoevidence++;
                }
873
874
            }
        }
875
876
877
878
879
880
        
        for (int i=0; i<totalAltEstNumber; i++)
        {
            if (voae_AltEst[i].IsToRemove()) keptAltEstNumber--;
        }
    }    
Philippe Bardou's avatar
Philippe Bardou committed
881
}
882

883
884
885
886
887
// -----------------------------------------------------------------
// Penalize according to EST number i
// -----------------------------------------------------------------
void AltEst :: Penalize(int i, int pos, DATA *Data)
{
888
    voae_AltEst[i].Penalize(pos,Data,altPenalty);
889
}