bwa.py 8.62 KB
Newer Older
Penom Nom's avatar
Penom Nom committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
#
# Copyright (C) 2012 INRA
# 
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
# 
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
# 
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.
#

import os
from subprocess import Popen, PIPE

from jflow.abstraction import MultiMap
22
from jflow.utils import get_argument_pattern
Penom Nom's avatar
Penom Nom committed
23

Penom Nom's avatar
Penom Nom committed
24
25
from weaver.function import ShellFunction

Penom Nom's avatar
Penom Nom committed
26
27
from ng6.analysis import Analysis
from ng6.utils import Utils
Penom Nom's avatar
Penom Nom committed
28
29

class BWA (Analysis):
Penom Nom's avatar
Penom Nom committed
30
31

    def define_parameters(self, reference_genome, read1, read2=None, group_prefix=None, algorithm="aln",  keep_bam=True):
Penom Nom's avatar
Penom Nom committed
32
        """
Penom Nom's avatar
Penom Nom committed
33
34
35
36
37
38
          @param reference_genome : [str] Path to the reference genome (it must be indexed).
          @param read1 : [list] Paths to reads 1.
          @param read2 : [list] Paths to reads 2.
          @param group_prefix : [list] The component produces one bam by prefix. Each bam is a merge of all files with the correct prefix.
          @param algorithm : [str] Algorithm for the alignment (aln or bwasw or mem).
          @param keep_bam : [bool] True save bam in nG6
Penom Nom's avatar
Penom Nom committed
39
        """
Penom Nom's avatar
Penom Nom committed
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
        # Parameters
        self.add_parameter_list( "group_prefix", "The component produces one bam by prefix. Each bam is a merge of all files with the correct prefix.", default=group_prefix )
        self.add_parameter( "algorithm", "Algorithm for the alignment : aln or bwasw or mem.", default=algorithm, choices=["aln", "bwasw", "mem"] )
        self.add_parameter("keep_bam", "keep_bam", default=keep_bam, type=bool)
        # Files
        self.add_input_file("reference_genome", "Which reference file should be used", default=reference_genome, required=True)
        self.add_input_file_list( "read1", "Which read1 files should be used.", default=read1, required=True )
        self.add_input_file_list( "read2", "Which read2 files should be used.", default=read2 )
        self.sai1 = None
        self.sai2 = None
        if algorithm == "aln":
            self.add_output_file_list("sai1", "The BWA read1 sai files.", pattern='{basename_woext}.sai', items=self.read1)
            if read2 != None:
                self.add_output_file_list("sai2", "The BWA read2 sai files.", pattern='{basename_woext}.sai', items=self.read2)
        if group_prefix == None:
            self.add_output_file_list("bam_files", "The BWA bam files.", pattern='{basename_woext}.bam', items=self.read1, file_format="bam")
Penom Nom's avatar
Penom Nom committed
56
        else:
Penom Nom's avatar
Penom Nom committed
57
            self.add_output_file_list("bam_files", "The BWA bam files.", pattern='{basename_woext}.bam', items=group_prefix, file_format="bam")
58
59
60
61
62
63
64
65
        
        
        self.add_output_file_list("stderrs", "BWA stderrs files", pattern='{basename_woext}.bwa.stderr', items=self.read1)
        if self.algorithm == 'aln' :
            if self.read2 :
                self.add_output_file_list("stderrs_aln", "BWA stderrs files", pattern='{basename_woext}.bwa_aln.stderr', items=self.read1 + self.read2)
            else :
                self.add_output_file_list("stderrs_aln", "BWA stderrs files", pattern='{basename_woext}.bwa_aln.stderr', items=self.read1)
Penom Nom's avatar
Penom Nom committed
66
    
Penom Nom's avatar
Penom Nom committed
67
68
69
70
71
    def get_version(self):
        cmd = [self.get_exec_path("bwa")]
        p = Popen(cmd, stdout=PIPE, stderr=PIPE)
        stdout, stderr = p.communicate()
        return stderr.split()[7]
Penom Nom's avatar
Penom Nom committed
72
73
74
75
76

    def define_analysis(self):
        self.name = "Alignment"
        self.description = "Reads Alignment"
        self.software = "bwa"
Penom Nom's avatar
Penom Nom committed
77
        self.options = "Algorithm " + self.algorithm + " | Databank " + os.path.basename(self.reference_genome)
Penom Nom's avatar
Penom Nom committed
78
79
80
        source_file = self.reference_genome + '_source'    
        if os.path.exists(source_file) :
            source = open(source_file,'r')
Penom Nom's avatar
Penom Nom committed
81
82
83
            reference_desc = source.readline();
            source.close()
            self.options += " " + reference_desc
Penom Nom's avatar
Penom Nom committed
84
    
Penom Nom's avatar
Penom Nom committed
85
    def post_process(self):
Penom Nom's avatar
Penom Nom committed
86
87
88
        if self.keep_bam:
            # Finally create and add the archive to the analysis
            self._save_files(self.bam_files)
Penom Nom's avatar
Penom Nom committed
89
            
Penom Nom's avatar
Penom Nom committed
90
    def process(self):
91
        unmerged_bam = self.get_outputs( '{basename_woext}.bam', self.read1 )
Penom Nom's avatar
Penom Nom committed
92
93
94

        # Algorithm bwasw or mem
        if self.algorithm == "bwasw" or self.algorithm == "mem":
Penom Nom's avatar
Penom Nom committed
95
            # Paired-end
Penom Nom's avatar
Penom Nom committed
96
            if self.read2:
97
                self.add_shell_execution(self.get_exec_path("bwa") + " " + self.algorithm + " " + self.reference_genome + 
Penom Nom's avatar
Penom Nom committed
98
                                " $1 $2 2>> $4 | " + self.get_exec_path("samtools") + " view -bS - | " + 
99
100
101
                                self.get_exec_path("samtools") + " sort - $3 2>> $4; mv $3.bam $3;", 
                                cmd_format='{EXE} {IN} {OUT}' , map=True,
                                inputs=[self.read1, self.read2], outputs=[unmerged_bam, self.stderrs], includes=self.reference_genome)
Penom Nom's avatar
Penom Nom committed
102
            # Single-end
Penom Nom's avatar
Penom Nom committed
103
            else:
104
                self.add_shell_execution(self.get_exec_path("bwa") + " " + self.algorithm + " " + self.reference_genome + 
Penom Nom's avatar
Penom Nom committed
105
                                    " $1 2>> $3 | " + self.get_exec_path("samtools") + " view -bS - | " + 
106
107
108
109
                                    self.get_exec_path("samtools") + " sort - $2 2>> $3 ; mv $2.bam $2;", 
                                cmd_format='{EXE} {IN} {OUT}' , map=True,
                                inputs=[self.read1], outputs=[unmerged_bam, self.stderrs], includes=self.reference_genome)

Penom Nom's avatar
Penom Nom committed
110
        # Algorithm aln  
Penom Nom's avatar
Penom Nom committed
111
112
113
114
        else:
            reads, sais = [], []
            reads.extend(self.read1)
            sais.extend(self.sai1)
Penom Nom's avatar
Penom Nom committed
115
            bwa = ShellFunction(self.get_exec_path("bwa") + " " + self.algorithm + " " + self.reference_genome + 
116
                                " $1 > $2 2>> $3 " , cmd_format='{EXE} {IN} {OUT}')
Penom Nom's avatar
Penom Nom committed
117
            # Paired-end
Penom Nom's avatar
Penom Nom committed
118
119
120
            if self.read2:
                reads.extend(self.read2)
                sais.extend(self.sai2)
121
                bwa_aln = MultiMap(bwa, inputs=[reads], outputs=[sais, self.stderrs_aln], includes=self.reference_genome)
Penom Nom's avatar
Penom Nom committed
122
123
124
                bwasampe = ShellFunction(self.get_exec_path("bwa") + " sampe " + self.reference_genome + 
                                         " $1 $2 $3 $4 2>> $6 | " + self.get_exec_path("samtools") + " view -bS - | " + 
                                         self.get_exec_path("samtools") + " sort - $5 2>> $6; mv $5.bam $5;", 
Jerome Mariette's avatar
Jerome Mariette committed
125
                                         cmd_format='{EXE} {IN} {OUT}')
126
                bwasampe = MultiMap(bwasampe, inputs=[self.sai1, self.sai2, self.read1, self.read2], outputs=[unmerged_bam, self.stderrs], includes=self.reference_genome)
Penom Nom's avatar
Penom Nom committed
127
            # Single-end
Penom Nom's avatar
Penom Nom committed
128
            else:
129
                bwa_aln = MultiMap(bwa, inputs=[reads], outputs=[sais, self.stderrs_aln], includes=self.reference_genome)
Penom Nom's avatar
Penom Nom committed
130
131
132
                bwasamse = ShellFunction(self.get_exec_path("bwa") + " samse " + self.reference_genome + 
                                         " $1 $2 2>> $4 | " + self.get_exec_path("samtools") + " view -bS - | " + 
                                         self.get_exec_path("samtools") + " sort - $3 2>> $4; mv $3.bam $3;", 
Jerome Mariette's avatar
Jerome Mariette committed
133
                                         cmd_format='{EXE} {IN} {OUT}')
134
                bwasamse = MultiMap(bwasamse, inputs=[self.sai1, self.read1], outputs=[unmerged_bam, self.stderrs], includes=self.reference_genome)
Penom Nom's avatar
Penom Nom committed
135
136

        if self.group_prefix != None:
Penom Nom's avatar
Penom Nom committed
137
138
            # Create dictionary : key = prefix and value = list of files to merge
            groups_path = Utils.get_filepath_by_prefix( unmerged_bam, self.group_prefix)
Penom Nom's avatar
Penom Nom committed
139

Penom Nom's avatar
Penom Nom committed
140
141
            # Create dictionary : key = prefix and value = the output bam
            outputs_path = Utils.get_filepath_by_prefix( self.bam_files, self.group_prefix)
Penom Nom's avatar
Penom Nom committed
142

143
            # Merges bam or rename
Penom Nom's avatar
Penom Nom committed
144
            for prefix in self.group_prefix:
145
                if len(groups_path[prefix]) > 1:
146
                    [cmd_inputs_pattern, next_arg_number] = get_argument_pattern(groups_path[prefix], 1)
147
                    samtoolsmerge = ShellFunction( self.get_exec_path("samtools") + ' merge ${' + str(next_arg_number) + '} ' + cmd_inputs_pattern , cmd_format='{EXE} {IN} {OUT}')
148
                    samtoolsmerge(inputs=groups_path[prefix], outputs=outputs_path[prefix])
Penom Nom's avatar
Penom Nom committed
149
150
151
                elif groups_path[prefix] != outputs_path[prefix]:
                    link = ShellFunction( "ln -fs ${1} ${2}", cmd_format='{EXE} {IN} {OUT}')
                    link(inputs=groups_path[prefix], outputs=outputs_path[prefix])