bwa.py 4.93 KB
Newer Older
Jerome Mariette's avatar
Jerome Mariette committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
#
# Copyright (C) 2012 INRA
# 
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
# 
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
# 
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.
#

import os
19
from subprocess import Popen, PIPE
Jerome Mariette's avatar
Jerome Mariette committed
20
21

from jflow.component import Component
Jerome Mariette's avatar
Jerome Mariette committed
22
from workflows.formats import Formats
Jerome Mariette's avatar
Jerome Mariette committed
23
24
25
26
27
28
29
30
31

from weaver.function import ShellFunction
from weaver.abstraction import Map
from jflow.abstraction import MultiMap


class BWA (Component):
    
    def define_parameters(self, reference_genome, read1, read2=None, algorithm="aln"):
Jerome Mariette's avatar
Jerome Mariette committed
32
        self.add_input_file("reference_genome", "Which reference file should be used", default=reference_genome, required=True)
33
34
        self.add_input_file_list("read1", "Which read1 files should be used", file_format=Formats.FASTQ, default=read1, required=True)
        self.add_input_file_list("read2", "Which read2 files should be used", file_format=Formats.FASTQ, default=read2)
Jerome Mariette's avatar
Jerome Mariette committed
35
        self.add_parameter("algorithm", "Which algorithm should be used to align the reads", default=algorithm, choices=["aln", "bwasw"])
Jerome Mariette's avatar
Jerome Mariette committed
36
        if algorithm == "aln":
37
            self.add_output_file_list("sai1", "The BWA sai1 file", pattern='{basename_woext}.sai', items=self.read1)
Jerome Mariette's avatar
Jerome Mariette committed
38
39
40
41
        else: 
            self.sai1 = None
        if read2: 
            if algorithm == "aln":
42
                self.add_output_file_list("sai2", "The BWA sai2 file", pattern='{basename_woext}.sai', items=self.read2)
Jerome Mariette's avatar
Jerome Mariette committed
43
44
            else: 
                self.sai2 = None
45
            self.add_output_file_list("bam_files", "The BWA bam file", pattern='{basename_woext}.bam', items=[self.read1, self.read2], file_format=Formats.BAM)
Jerome Mariette's avatar
Jerome Mariette committed
46
47
        else:
            self.sai2 = None
48
49
            self.add_output_file_list("bam_files", "The BWA bam file", pattern='{basename_woext}.bam', items=self.read1, file_format=Formats.BAM)
        self.add_output_file("stderr", "The BWA stderr file", filename='bwa.stderr')
Jerome Mariette's avatar
Jerome Mariette committed
50
51
52
53
54
55
        
    def process(self):        
        if self.algorithm=="bwasw":
            if self.read2:
                bwa = ShellFunction(self.get_exec_path("bwa") + " " + self.algorithm + " " + self.reference_genome + \
                                " $1 $2 2>> " + self.stderr + " | " + self.get_exec_path("samtools") + " view -bS - > $3 2>> " + self.stderr, cmd_format='{EXE} {IN} {OUT}')
Jerome Mariette's avatar
Jerome Mariette committed
56
                bwasw = MultiMap(bwa, inputs=[self.read1, self.read2], outputs=self.bam_files, includes=self.reference_genome)
Jerome Mariette's avatar
Jerome Mariette committed
57
58
59
            else:
                bwa = ShellFunction(self.get_exec_path("bwa") + " " + self.algorithm + " " + self.reference_genome + \
                                    " $1 2>> " + self.stderr + " | " + self.get_exec_path("samtools") + " view -bS - > $2 2>> " + self.stderr, cmd_format='{EXE} {IN} {OUT}')
Jerome Mariette's avatar
Jerome Mariette committed
60
                bwasw = Map(bwa, self.read1, self.bam_files, includes=self.reference_genome)
Jerome Mariette's avatar
Jerome Mariette committed
61
62
63
64
65
66
67
68
69
        else:
            reads, sais = [], []
            reads.extend(self.read1)
            sais.extend(self.sai1)
            bwa = ShellFunction(self.get_exec_path("bwa") + " " + self.algorithm + " " + self.reference_genome + \
                                " $1 > $2 2>> " + self.stderr, cmd_format='{EXE} {IN} {OUT}')
            if self.read2:
                reads.extend(self.read2)
                sais.extend(self.sai2)
Jerome Mariette's avatar
Jerome Mariette committed
70
                bwa_aln = Map(bwa, inputs=reads, outputs=sais, includes=self.reference_genome)
Jerome Mariette's avatar
Jerome Mariette committed
71
72
                bwasampe = ShellFunction(self.get_exec_path("bwa") + " sampe " + self.reference_genome + \
                                         " $1 $2 $3 $4 2>> " + self.stderr + " | " + self.get_exec_path("samtools") + " view -bS - > $5 2>> " + self.stderr, cmd_format='{EXE} {IN} {OUT}')
Jerome Mariette's avatar
Jerome Mariette committed
73
                bwasampe = MultiMap(bwasampe, inputs=[self.sai1, self.sai2, self.read1, self.read2], outputs=self.bam_files, includes=self.reference_genome)
Jerome Mariette's avatar
Jerome Mariette committed
74
            else:
Jerome Mariette's avatar
Jerome Mariette committed
75
                bwa_aln = Map(bwa, inputs=reads, outputs=sais, includes=self.reference_genome)
Jerome Mariette's avatar
Jerome Mariette committed
76
77
                bwasamse = ShellFunction(self.get_exec_path("bwa") + " samse " + self.reference_genome + \
                                         " $1 $2 2>> " + self.stderr + " | " + self.get_exec_path("samtools") + " view -bS - > $3 2>> " + self.stderr, cmd_format='{EXE} {IN} {OUT}')
Jerome Mariette's avatar
Jerome Mariette committed
78
                bwasamse = MultiMap(bwasamse, inputs=[self.sai1, self.read1], outputs=self.bam_files, includes=self.reference_genome)
Jerome Mariette's avatar
Jerome Mariette committed
79
                
80
81
82
83
84
    def get_version(self):
        cmd = [self.get_exec_path("bwa")]
        p = Popen(cmd, stdout=PIPE, stderr=PIPE)
        stdout, stderr = p.communicate()
        return stderr.split()[7]