bwa.py 8.86 KB
Newer Older
Penom Nom's avatar
Penom Nom committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
#
# Copyright (C) 2012 INRA
# 
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
# 
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
# 
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.
#

import os
from subprocess import Popen, PIPE
20
from jflow.utils import get_argument_pattern
Penom Nom's avatar
Penom Nom committed
21

Penom Nom's avatar
Penom Nom committed
22
23
from ng6.analysis import Analysis
from ng6.utils import Utils
Penom Nom's avatar
Penom Nom committed
24
25

class BWA (Analysis):
Penom Nom's avatar
Penom Nom committed
26
27

    def define_parameters(self, reference_genome, read1, read2=None, group_prefix=None, algorithm="aln",  keep_bam=True):
Penom Nom's avatar
Penom Nom committed
28
        """
Penom Nom's avatar
Penom Nom committed
29
30
31
32
33
34
          @param reference_genome : [str] Path to the reference genome (it must be indexed).
          @param read1 : [list] Paths to reads 1.
          @param read2 : [list] Paths to reads 2.
          @param group_prefix : [list] The component produces one bam by prefix. Each bam is a merge of all files with the correct prefix.
          @param algorithm : [str] Algorithm for the alignment (aln or bwasw or mem).
          @param keep_bam : [bool] True save bam in nG6
Penom Nom's avatar
Penom Nom committed
35
        """
Penom Nom's avatar
Penom Nom committed
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
        # Parameters
        self.add_parameter_list( "group_prefix", "The component produces one bam by prefix. Each bam is a merge of all files with the correct prefix.", default=group_prefix )
        self.add_parameter( "algorithm", "Algorithm for the alignment : aln or bwasw or mem.", default=algorithm, choices=["aln", "bwasw", "mem"] )
        self.add_parameter("keep_bam", "keep_bam", default=keep_bam, type=bool)
        # Files
        self.add_input_file("reference_genome", "Which reference file should be used", default=reference_genome, required=True)
        self.add_input_file_list( "read1", "Which read1 files should be used.", default=read1, required=True )
        self.add_input_file_list( "read2", "Which read2 files should be used.", default=read2 )
        self.sai1 = None
        self.sai2 = None
        if algorithm == "aln":
            self.add_output_file_list("sai1", "The BWA read1 sai files.", pattern='{basename_woext}.sai', items=self.read1)
            if read2 != None:
                self.add_output_file_list("sai2", "The BWA read2 sai files.", pattern='{basename_woext}.sai', items=self.read2)
        if group_prefix == None:
            self.add_output_file_list("bam_files", "The BWA bam files.", pattern='{basename_woext}.bam', items=self.read1, file_format="bam")
Penom Nom's avatar
Penom Nom committed
52
        else:
Penom Nom's avatar
Penom Nom committed
53
            self.add_output_file_list("bam_files", "The BWA bam files.", pattern='{basename_woext}.bam', items=group_prefix, file_format="bam")
54
55
56
57
58
59
60
61
        
        
        self.add_output_file_list("stderrs", "BWA stderrs files", pattern='{basename_woext}.bwa.stderr', items=self.read1)
        if self.algorithm == 'aln' :
            if self.read2 :
                self.add_output_file_list("stderrs_aln", "BWA stderrs files", pattern='{basename_woext}.bwa_aln.stderr', items=self.read1 + self.read2)
            else :
                self.add_output_file_list("stderrs_aln", "BWA stderrs files", pattern='{basename_woext}.bwa_aln.stderr', items=self.read1)
Penom Nom's avatar
Penom Nom committed
62
    
Penom Nom's avatar
Penom Nom committed
63
64
65
66
67
    def get_version(self):
        cmd = [self.get_exec_path("bwa")]
        p = Popen(cmd, stdout=PIPE, stderr=PIPE)
        stdout, stderr = p.communicate()
        return stderr.split()[7]
Penom Nom's avatar
Penom Nom committed
68
69
70
71
72

    def define_analysis(self):
        self.name = "Alignment"
        self.description = "Reads Alignment"
        self.software = "bwa"
Penom Nom's avatar
Penom Nom committed
73
        self.options = "Algorithm " + self.algorithm + " | Databank " + os.path.basename(self.reference_genome)
Penom Nom's avatar
Penom Nom committed
74
75
76
        source_file = self.reference_genome + '_source'    
        if os.path.exists(source_file) :
            source = open(source_file,'r')
Penom Nom's avatar
Penom Nom committed
77
78
79
            reference_desc = source.readline();
            source.close()
            self.options += " " + reference_desc
Penom Nom's avatar
Penom Nom committed
80
    
Penom Nom's avatar
Penom Nom committed
81
    def post_process(self):
Penom Nom's avatar
Penom Nom committed
82
83
84
        if self.keep_bam:
            # Finally create and add the archive to the analysis
            self._save_files(self.bam_files)
Penom Nom's avatar
Penom Nom committed
85
            
Penom Nom's avatar
Penom Nom committed
86
    def process(self):
87
        unmerged_bam = self.get_outputs( '{basename_woext}.bam', self.read1 )
Penom Nom's avatar
Penom Nom committed
88
89
90

        # Algorithm bwasw or mem
        if self.algorithm == "bwasw" or self.algorithm == "mem":
Penom Nom's avatar
Penom Nom committed
91
            # Paired-end
Penom Nom's avatar
Penom Nom committed
92
            if self.read2:
93
                self.add_shell_execution(self.get_exec_path("bwa") + " " + self.algorithm + " " + self.reference_genome + 
Penom Nom's avatar
Penom Nom committed
94
                                " $1 $2 2>> $4 | " + self.get_exec_path("samtools") + " view -bS - | " + 
Maxime Manno's avatar
Maxime Manno committed
95
                                self.get_exec_path("samtools") + " sort - -o $3 2>> $4;", 
96
97
                                cmd_format='{EXE} {IN} {OUT}' , map=True,
                                inputs=[self.read1, self.read2], outputs=[unmerged_bam, self.stderrs], includes=self.reference_genome)
Penom Nom's avatar
Penom Nom committed
98
            # Single-end
Penom Nom's avatar
Penom Nom committed
99
            else:
100
                self.add_shell_execution(self.get_exec_path("bwa") + " " + self.algorithm + " " + self.reference_genome + 
Penom Nom's avatar
Penom Nom committed
101
                                    " $1 2>> $3 | " + self.get_exec_path("samtools") + " view -bS - | " + 
Maxime Manno's avatar
Maxime Manno committed
102
                                    self.get_exec_path("samtools") + " sort - -o $2 2>> $3 ;", 
103
104
105
                                cmd_format='{EXE} {IN} {OUT}' , map=True,
                                inputs=[self.read1], outputs=[unmerged_bam, self.stderrs], includes=self.reference_genome)

Penom Nom's avatar
Penom Nom committed
106
        # Algorithm aln  
Penom Nom's avatar
Penom Nom committed
107
108
109
110
        else:
            reads, sais = [], []
            reads.extend(self.read1)
            sais.extend(self.sai1)
111
            
Penom Nom's avatar
Penom Nom committed
112
            # Paired-end
Penom Nom's avatar
Penom Nom committed
113
114
115
            if self.read2:
                reads.extend(self.read2)
                sais.extend(self.sai2)
116
117
118
119
                self.add_shell_execution(self.get_exec_path("bwa") + " " + self.algorithm + " " + self.reference_genome + 
                                         " $1 > $2 2>> $3 " , cmd_format='{EXE} {IN} {OUT}', map=True,
                                         inputs=[reads], outputs=[sais, self.stderrs_aln], includes=self.reference_genome)
                self.add_shell_execution(self.get_exec_path("bwa") + " sampe " + self.reference_genome + 
Penom Nom's avatar
Penom Nom committed
120
                                         " $1 $2 $3 $4 2>> $6 | " + self.get_exec_path("samtools") + " view -bS - | " + 
Maxime Manno's avatar
Maxime Manno committed
121
                                         self.get_exec_path("samtools") + " sort - -o $5 2>> $6;",
122
123
                                         cmd_format='{EXE} {IN} {OUT}', map=True,
                                         inputs=[self.sai1, self.sai2, self.read1, self.read2], outputs=[unmerged_bam, self.stderrs], includes=self.reference_genome)
Penom Nom's avatar
Penom Nom committed
124
            # Single-end
Penom Nom's avatar
Penom Nom committed
125
            else:
126
127
128
129
                self.add_shell_execution(self.get_exec_path("bwa") + " " + self.algorithm + " " + self.reference_genome + 
                                         " $1 > $2 2>> $3 " , cmd_format='{EXE} {IN} {OUT}', map=True,
                                         inputs=[reads], outputs=[sais, self.stderrs_aln], includes=self.reference_genome)
                self.add_shell_execution(self.get_exec_path("bwa") + " samse " + self.reference_genome + 
Penom Nom's avatar
Penom Nom committed
130
                                         " $1 $2 2>> $4 | " + self.get_exec_path("samtools") + " view -bS - | " + 
Maxime Manno's avatar
Maxime Manno committed
131
                                         self.get_exec_path("samtools") + " sort - -o $3 2>> $4;", 
132
133
                                         cmd_format='{EXE} {IN} {OUT}', map=True,  
                                         inputs=[self.sai1, self.read1], outputs=[unmerged_bam, self.stderrs], includes=self.reference_genome)
Penom Nom's avatar
Penom Nom committed
134
135

        if self.group_prefix != None:
Penom Nom's avatar
Penom Nom committed
136
137
            # Create dictionary : key = prefix and value = list of files to merge
            groups_path = Utils.get_filepath_by_prefix( unmerged_bam, self.group_prefix)
Penom Nom's avatar
Penom Nom committed
138

Penom Nom's avatar
Penom Nom committed
139
140
            # Create dictionary : key = prefix and value = the output bam
            outputs_path = Utils.get_filepath_by_prefix( self.bam_files, self.group_prefix)
Penom Nom's avatar
Penom Nom committed
141

142
            # Merges bam or rename
Penom Nom's avatar
Penom Nom committed
143
            for prefix in self.group_prefix:
144
                if len(groups_path[prefix]) > 1:
145
                    [cmd_inputs_pattern, next_arg_number] = get_argument_pattern(groups_path[prefix], 1)
146
147
148
                    self.add_shell_execution(self.get_exec_path("samtools") + ' merge ${' + str(next_arg_number) + '} ' + cmd_inputs_pattern , 
                                             cmd_format='{EXE} {IN} {OUT}', map=False,
                                             inputs=groups_path[prefix], outputs=outputs_path[prefix])
Penom Nom's avatar
Penom Nom committed
149
                elif groups_path[prefix] != outputs_path[prefix]:
150
151
                    self.add_shell_execution( "ln -fs ${1} ${2}", cmd_format='{EXE} {IN} {OUT}', map=False,
                                              inputs=groups_path[prefix], outputs=outputs_path[prefix])