__init__.py 7.39 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
#
# Copyright (C) 2012 INRA
# 
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
# 
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
# 
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.
#

import os
import glob
import sys
21
import logging
22

23
from ng6.ng6workflow import CasavaNG6Workflow
24
from ng6.utils import Utils
25 26


27
class RnaSeqQualityCheck (CasavaNG6Workflow):
28
    
Penom Nom's avatar
Penom Nom committed
29 30 31 32 33 34 35 36
    def get_name(self):
        return 'illumina_rnaseq'
    
    def get_description(self):
        return "rnaseq quality check pipeline"

    def define_parameters(self, function="process"):
        self.add_parameter("delete_bam", "The BAM are not stored", type=bool, default = False)
37
        self.add_parameter("n_threads", "Number of threads to use for the alignment with STAR", type='int', default = 8)
Penom Nom's avatar
Penom Nom committed
38 39 40 41
        self.add_input_file("reference_genome", "Which genome should the read being align on")
        self.add_input_file("reference_transcriptome", "Which transcriptome should the read being align on")
        self.add_input_file_list("annotation", "Which annotation file should be used for processing RNAseq quality")
    
42
    def process(self):
43
        fastqilluminafilter, filtered_read1_files, filtered_read2_files, concat_files, concatenatefastq = self.illumina_process()
44 45 46 47
        logging.getLogger("RnaSeqQualityCheck").debug("process. filtered_read1_files = "+",".join(filtered_read1_files))
        logging.getLogger("RnaSeqQualityCheck").debug("process. filtered_read2_files = "+",".join(filtered_read2_files))
        logging.getLogger("RnaSeqQualityCheck").debug("process. concat_files = "+",".join(concat_files))
        
Penom Nom's avatar
Penom Nom committed
48
        if self.reference_transcriptome != None :
Penom Nom's avatar
Penom Nom committed
49
            # index the reference transcriptome if not already indexed
Penom Nom's avatar
Penom Nom committed
50 51 52
            indexed_ref = self.reference_transcriptome
            if not os.path.exists( self.reference_transcriptome + ".bwt" ):
                bwaindex = self.add_component("BWAIndex", [self.reference_transcriptome])
Penom Nom's avatar
Penom Nom committed
53
                indexed_ref = bwaindex.databank
54
             
55
            # align reads against indexed genome
56
            sample_lane_prefixes = None
57
            
Penom Nom's avatar
fix :  
Penom Nom committed
58
            if self.group_prefix != None :
Penom Nom's avatar
Penom Nom committed
59
                sample_lane_prefixes = list((Utils.get_group_basenames(filtered_read1_files+filtered_read2_files, "lane")).keys())
60 61 62 63 64 65 66 67
            
            if self.align_subset_reads:
                subset = self.add_component("SubsetSeqFiles", [filtered_read1_files, filtered_read2_files], parent = fastqilluminafilter)
                filtered_read1_files = subset.subset_read1
                filtered_read2_files = subset.subset_read2
                bwa = self.add_component("BWA", [indexed_ref, filtered_read1_files, filtered_read2_files, sample_lane_prefixes, "mem", not self.delete_bam], parent = subset)
            else:
                bwa = self.add_component("BWA", [indexed_ref, filtered_read1_files, filtered_read2_files, sample_lane_prefixes, "mem", not self.delete_bam], parent = fastqilluminafilter)
68
       
69
            # make some statistic on the alignement
70
            alignmentstats = self.add_component("AlignmentStats", [bwa.bam_files, self.is_paired_end()], parent = bwa, component_prefix="bwa")
71
            
72
        if self.reference_genome != None:
73
            reference_genome = self.reference_genome
74 75 76 77
            index_dir = os.path.dirname(self.reference_genome)
            if not os.path.exists(os.path.join(index_dir, 'SAindex' ))  :
                starindex = self.add_component("STARIndex", [self.reference_genome])
                index_dir =  starindex.index_directory
78
                reference_genome = starindex.normalized_fasta_file
79
            
Penom Nom's avatar
Penom Nom committed
80
            # spliced alignment of reads against indexed genome
Penom Nom's avatar
Penom Nom committed
81
            if self.is_paired_end() and (self.group_prefix != None):
82
                logging.getLogger("RnaSeqQualityCheck").debug("process. Dans self.reference_genome != None > if self.is_paired_end() and (self.group_prefix != None):")
83
                # split read 1 and read 2 from filtered files list
84
                [concat_read1_files, concat_read2_files] = Utils.split_pair(concat_files, (self.group_prefix != None))
85 86 87 88
                if self.align_subset_reads:
                    subset = self.add_component("SubsetSeqFiles", [concat_read1_files, concat_read2_files], component_prefix="star", parent = fastqilluminafilter)
                    concat_read1_files = subset.subset_read1
                    concat_read2_files = subset.subset_read2
Penom Nom's avatar
fix :  
Penom Nom committed
89
            elif self.group_prefix != None:
90
                logging.getLogger("RnaSeqQualityCheck").debug("process. Dans self.reference_genome != None > elif self.group_prefix != None:")
91
                concat_read1_files = concat_files
92
                concat_read2_files = []
93 94 95 96
                if self.align_subset_reads:
                    subset = self.add_component("SubsetSeqFiles", [concat_files],component_prefix="star", parent = fastqilluminafilter)
                    concat_read1_files = subset.subset_read1
                    concat_read2_files = subset.subset_read2
97
            else:
98
                logging.getLogger("RnaSeqQualityCheck").debug("process. Dans self.reference_genome != None > else")
99 100
                concat_read1_files = filtered_read1_files
                concat_read2_files = filtered_read2_files
101 102 103 104
                if self.align_subset_reads:
                    subset = self.add_component("SubsetSeqFiles", [concat_read1_files,concat_read2_files], component_prefix="star", parent = fastqilluminafilter)
                    concat_read1_files = subset.subset_read1
                    concat_read2_files = subset.subset_read2
105 106
            concat_read1_files = sorted(concat_read1_files)
            concat_read2_files = sorted(concat_read2_files)
107
             
Penom Nom's avatar
fix :  
Penom Nom committed
108 109
            sample_lane_prefixes = None
            if self.group_prefix != None :
110
                sample_lane_prefixes = sorted(list((Utils.get_group_basenames(concat_read1_files+concat_read2_files, "lane")).keys()))
111
                 
112 113 114 115
            if self.align_subset_reads:
                star = self.add_component("STAR", [reference_genome, index_dir, concat_read1_files, concat_read2_files,sample_lane_prefixes, not self.delete_bam, self.n_threads], parent = subset)
            else:
                star = self.add_component("STAR", [reference_genome, index_dir, concat_read1_files, concat_read2_files,sample_lane_prefixes, not self.delete_bam, self.n_threads], parent = fastqilluminafilter)
116
            # make some statistic on the alignment
Penom Nom's avatar
fix :  
Penom Nom committed
117
            alignmentstats = self.add_component("AlignmentStats", [star.output_bams, self.is_paired_end()], parent = star, component_prefix="star")
118
             
119
            #Quality RNA Seq analysis  
120 121 122 123
#            02/08/2018 Audrey Gibert (était-ce un boulet?)
#            C'est commenté parce qu'on a un bug à cause du script inner_distance.py "start must be smaller than end..." c'est embetant, on ne s'en sert jamais voilà voilà 
#             if self.annotation:
#                 rseqc = self.add_component("RSeQC", [star.output_bams, self.annotation], parent = star)