Commit 61c85434 authored by Jerome Mariette's avatar Jerome Mariette
Browse files

No commit message

No commit message
parent a0efe0ee
#
# Copyright (C) 2012 INRA
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
\ No newline at end of file
#
# Copyright (C) 2012 INRA
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
import os
from jflow.component import Component
from jflow.iotypes import OutputFile, OutputFileList, InputFile, InputFileList, Formats
from weaver.function import ShellFunction
from weaver.abstraction import Map
from jflow.abstraction import MultiMap
class BWA (Component):
def define_parameters(self, reference_genome, read1, read2=None, algorithm="aln"):
self.read1 = InputFileList(read1, Formats.FASTQ)
self.read2 = None
if algorithm == "aln":
self.sai1 = OutputFileList(self.get_outputs('{basename_woext}.sai', self.read1))
else:
self.sai1 = None
if read2:
self.read2 = InputFileList(read2, Formats.FASTQ)
if algorithm == "aln":
self.sai2 = OutputFileList(self.get_outputs('{basename_woext}.sai', self.read2))
else:
self.sai2 = None
self.bam_files = OutputFileList(self.get_outputs('{basename_woext}.bam', [self.read1, self.read2]), Formats.BAM)
else:
self.sai2 = None
self.bam_files = OutputFileList(self.get_outputs('{basename_woext}.bam', self.read1), Formats.BAM)
self.algorithm = algorithm
self.reference_genome = reference_genome
self.stderr = os.path.join(self.output_directory, 'bwa.stderr')
def process(self):
if self.algorithm=="bwasw":
if self.read2:
bwa = ShellFunction(self.get_exec_path("bwa") + " " + self.algorithm + " " + self.reference_genome + \
" $1 $2 2>> " + self.stderr + " | " + self.get_exec_path("samtools") + " view -bS - > $3 2>> " + self.stderr, cmd_format='{EXE} {IN} {OUT}')
bwasw = MultiMap(bwa, inputs=[self.read1, self.read2], outputs=self.bam_files)
else:
bwa = ShellFunction(self.get_exec_path("bwa") + " " + self.algorithm + " " + self.reference_genome + \
" $1 2>> " + self.stderr + " | " + self.get_exec_path("samtools") + " view -bS - > $2 2>> " + self.stderr, cmd_format='{EXE} {IN} {OUT}')
bwasw = Map(bwa, self.read1, self.bam_files)
else:
reads, sais = [], []
reads.extend(self.read1)
sais.extend(self.sai1)
bwa = ShellFunction(self.get_exec_path("bwa") + " " + self.algorithm + " " + self.reference_genome + \
" $1 > $2 2>> " + self.stderr, cmd_format='{EXE} {IN} {OUT}')
if self.read2:
reads.extend(self.read2)
sais.extend(self.sai2)
bwa_aln = Map(bwa, inputs=reads, outputs=sais)
bwasampe = ShellFunction(self.get_exec_path("bwa") + " sampe " + self.reference_genome + \
" $1 $2 $3 $4 2>> " + self.stderr + " | " + self.get_exec_path("samtools") + " view -bS - > $5 2>> " + self.stderr, cmd_format='{EXE} {IN} {OUT}')
bwasampe = MultiMap(bwasampe, inputs=[self.sai1, self.sai2, self.read1, self.read2], outputs=self.bam_files)
else:
bwa_aln = Map(bwa, inputs=reads, outputs=sais)
bwasamse = ShellFunction(self.get_exec_path("bwa") + " samse " + self.reference_genome + \
" $1 $2 2>> " + self.stderr + " | " + self.get_exec_path("samtools") + " view -bS - > $3 2>> " + self.stderr, cmd_format='{EXE} {IN} {OUT}')
bwasamse = MultiMap(bwasamse, inputs=[self.sai1, self.read1], outputs=self.bam_files)
#
# Copyright (C) 2012 INRA
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
import os
from jflow.component import Component
from jflow.iotypes import OutputFile, InputFile, Formats
from weaver.function import ShellFunction
class BWAIndex (Component):
def __init__(self):
Component.__init__(self, is_dynamic=True)
def define_parameters(self, input_fasta, algorithm="bwtsw"):
self.input_fasta = InputFile(input_fasta, Formats.FASTA)
self.algorithm = algorithm
self.databank = OutputFile(os.path.join(self.output_directory, os.path.basename(input_fasta)))
self.stdout = OutputFile(os.path.join(self.output_directory, "bwaindex.stdout"))
self.stderr = OutputFile(os.path.join(self.output_directory, "bwaindex.stderr"))
def process(self):
# first make the symbolic link
os.symlink(self.input_fasta, self.databank)
bwaindex = ShellFunction(self.get_exec_path("bwa") + " index -a " + self.algorithm + " -p $1 $2 > " + \
self.stdout + " 2> " + self.stderr, cmd_format='{EXE} {OUT} {IN}')
bwaindex(inputs=self.input_fasta, outputs=self.databank)
\ No newline at end of file
#
# Copyright (C) 2012 INRA
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
import os
from jflow.component import Component
from jflow.iotypes import OutputFile, InputFile, Formats
from weaver.function import ShellFunction
class FormatDB (Component):
def __init__(self):
Component.__init__(self, is_dynamic=True)
def define_parameters(self, input_fasta, protein=False):
self.input_fasta = InputFile(input_fasta, Formats.FASTA)
self.protein = protein
self.database = OutputFile(os.path.join(self.output_directory, os.path.basename(input_fasta)))
def process(self):
# first make the symbolic link
os.symlink(self.input_fasta, self.database)
if self.protein: p = "T"
else: p = "F"
formatdb = ShellFunction(self.get_exec_path("formatdb") + " -p " + p + " -i $1 -n $2", cmd_format='{EXE} {IN} {OUT}')
formatdb(inputs=self.input_fasta, outputs=self.database)
\ No newline at end of file
#
# Copyright (C) 2012 INRA
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
import os
from jflow.component import Component
from jflow.iotypes import OutputFileList, InputFileList, Formats
from weaver.abstraction import Merge
class MergeFiles (Component):
def define_parameters(self, input_files, output_file_name="merged.txt", on_basename=False):
self.input_files = InputFileList(input_files)
self.on_basename = on_basename
self.merged_files = OutputFileList([])
if self.on_basename:
self.basenames = {}
self.merged_basenames = {}
for file in self.input_files:
basename = os.path.basename(file).split(".")[0]
if self.basenames.has_key(basename):
self.basenames[basename].append(file)
else:
self.basenames[basename] = [file]
out_name = os.path.join(self.output_directory, basename+"_"+output_file_name)
if not self.merged_basenames.has_key(basename):
self.merged_basenames[basename] = out_name
self.merged_files.append(out_name)
else:
self.merged_files.append(os.path.join(self.output_directory, output_file_name))
def process(self):
if self.on_basename and len(self.basenames) < len(self.input_files):
for basename in self.basenames:
merge = Merge(self.basenames[basename], self.merged_basenames[basename], local=False)
else:
merge = Merge(self.input_files, self.merged_files[0], local=False)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment