Commit 39b0c4a0 authored by Penom Nom's avatar Penom Nom
Browse files

Add merge.

parent bcb6302d
......@@ -106,15 +106,15 @@ class Casava18 (NG6Workflow):
# fastq illumina filter
group_prefix = Utils.get_group_basenames(self.read1_files+self.read2_files, "read")
fastqilluminafilter = self.add_component("FastqIlluminaFilter", [self.read1_files+self.read2_files, self.args["keep_reads"], group_prefix, self.runobj.name+"_fastqilluminafilter.tar.gz"])
# split read 1 and read 2 from filtered files list
[filtered_read1_files, filtered_read2_files] = Utils.split_pair(fastqilluminafilter.fastq_files_filtered, True)
filtered_read1_files.sort()
filtered_read1_files.sort()
group_prefix = Utils.get_group_basenames(filtered_read1_files+filtered_read2_files, "read")
filtered_read2_files.sort()
# concatenate fastq
concatenatefastq = self.add_component("ConcatenateFilesGroups", [filtered_read1_files+filtered_read2_files, group_prefix.keys()])
reads_prefixes = (Utils.get_group_basenames(filtered_read1_files+filtered_read2_files, "read")).keys()
concatenatefastq = self.add_component("ConcatenateFilesGroups", [filtered_read1_files+filtered_read2_files, reads_prefixes])
# archive the files
addrawfiles = self.add_component("AddRawFiles", [self.runobj, concatenatefastq.concat_files, self.args["compression"]])
......@@ -130,10 +130,11 @@ class Casava18 (NG6Workflow):
# index the reference genome if not already indexed
bwaindex = self.add_component("BWAIndex", [self.args["reference_genome"]])
# align reads against indexed genome
bwa = self.add_component("BWA", [bwaindex.databank, filtered_read1_files, filtered_read2_files], parent = fastqilluminafilter)
sample_lane_prefixes = (Utils.get_group_basenames(filtered_read1_files+filtered_read2_files, "lane")).keys()
bwa = self.add_component("BWA", [bwaindex.databank, filtered_read1_files, filtered_read2_files, sample_lane_prefixes], parent = fastqilluminafilter)
# make some statistic on the alignement
alignmentstats = self.add_component("AlignmentStats", [bwa.bam_files], parent = bwa)
if len(self.read2_files) > 0:
# process insert sizes
insertssizes = self.add_component("InsertsSizes",[bwa.bam_files, self.args["histogram_width"], self.args["min_pct"]], parent = bwa)
\ No newline at end of file
insertssizes = self.add_component("InsertsSizes", [bwa.bam_files, self.args["histogram_width"], self.args["min_pct"]], parent = bwa)
\ No newline at end of file
......@@ -21,7 +21,9 @@ from subprocess import Popen, PIPE
from jflow.component import Component
from jflow.iotypes import OutputFile, OutputFileList, InputFile, InputFileList, Formats
from jflow.abstraction import MultiMap
from ng6.analysis import Analysis
from ng6.utils import Utils
from weaver.function import ShellFunction
from weaver.abstraction import Map
......@@ -29,7 +31,7 @@ from weaver.abstraction import Map
class BWA (Analysis):
def define_parameters(self, reference_genome, read1, read2=None, algorithm="aln"):
def define_parameters(self, reference_genome, read1, read2=None, group_prefix=None, algorithm="aln"):
self.read1 = InputFileList(read1, Formats.FASTQ)
self.read2=None
if algorithm=="aln":
......@@ -43,10 +45,15 @@ class BWA (Analysis):
else:
self.sai2 = None
self.bam_files = OutputFileList(self.get_outputs('{basename_woext}.bam', [self.read1, self.read2]), Formats.BAM)
if group_prefix is not None:
self.bam_files = OutputFileList(self.get_outputs('{basename_woext}.bam', group_prefix), Formats.BAM)
else:
self.sai2 = None
self.bam_files = OutputFileList(self.get_outputs('{basename_woext}.bam', self.read1), Formats.BAM)
if group_prefix is not None:
self.bam_files = OutputFileList(self.get_outputs('{basename_woext}.bam', group_prefix), Formats.BAM)
self.group_prefix = group_prefix
self.algorithm = algorithm
self.reference_genome = InputFile(reference_genome)
self.stderr = os.path.join(self.output_directory, 'bwa.stderr')
......@@ -67,24 +74,36 @@ class BWA (Analysis):
stdout, stderr = p.communicate()
return stderr.split()[7]
def process(self):
def process(self):
unmerged_bam = []
if self.group_prefix is None:
unmerged_bam = self.bam_files
else:
unmerged_bam = self.get_outputs('{basename_woext}.bam', self.read1)
# Algorithm bwasw
if self.algorithm=="bwasw":
# Paired-end
if self.read2:
bwa = ShellFunction(self.get_exec_path("bwa") + " " + self.algorithm + " " + self.reference_genome + \
" $1 $2 2>> " + self.stderr + " | " + self.get_exec_path("samtools") + " view -bS - | " + \
self.get_exec_path("samtools") + " sort - $3 2>> " + self.stderr + "; mv $3.bam $3;", cmd_format='{EXE} {IN} {OUT}')
bwasw = MultiMap(bwa, inputs=[self.read1, self.read2], outputs=self.bam_files, includes=self.reference_genome)
bwasw = MultiMap(bwa, inputs=[self.read1, self.read2], outputs=unmerged_bam, includes=self.reference_genome)
# Single-end
else:
bwa = ShellFunction(self.get_exec_path("bwa") + " " + self.algorithm + " " + self.reference_genome + \
" $1 2>> " + self.stderr + " | " + self.get_exec_path("samtools") + " view -bS - | " + \
self.get_exec_path("samtools") + " sort - $2 2>> " + self.stderr + "; mv $2.bam $2;", cmd_format='{EXE} {IN} {OUT}')
bwasw = Map(bwa, self.read1, self.bam_files, includes=self.reference_genome)
bwasw = Map(bwa, self.read1, unmerged_bam, includes=self.reference_genome)
# Algorithm aln
else:
reads, sais = [], []
reads.extend(self.read1)
sais.extend(self.sai1)
bwa = ShellFunction(self.get_exec_path("bwa") + " " + self.algorithm + " " + self.reference_genome + \
" $1 > $2 2>> " + self.stderr, cmd_format='{EXE} {IN} {OUT}')
# Paired-end
if self.read2:
reads.extend(self.read2)
sais.extend(self.sai2)
......@@ -93,11 +112,26 @@ class BWA (Analysis):
" $1 $2 $3 $4 2>> " + self.stderr + " | " + self.get_exec_path("samtools") + " view -bS - | " + \
self.get_exec_path("samtools") + " sort - $5 2>> " + self.stderr + "; mv $5.bam $5;",
cmd_format='{EXE} {IN} {OUT}')
bwasampe = MultiMap(bwasampe, inputs=[self.sai1, self.sai2, self.read1, self.read2], outputs=self.bam_files, includes=self.reference_genome)
bwasampe = MultiMap(bwasampe, inputs=[self.sai1, self.sai2, self.read1, self.read2], outputs=unmerged_bam, includes=self.reference_genome)
# Single-end
else:
bwa_aln = Map(bwa, inputs=reads, outputs=sais, includes=self.reference_genome)
bwasamse = ShellFunction(self.get_exec_path("bwa") + " samse " + self.reference_genome + \
" $1 $2 2>> " + self.stderr + " | " + self.get_exec_path("samtools") + " view -bS - | " + \
self.get_exec_path("samtools") + " sort - $3 2>> " + self.stderr + "; mv $3.bam $3;",
cmd_format='{EXE} {IN} {OUT}')
bwasamse = MultiMap(bwasamse, inputs=[self.sai1, self.read1], outputs=self.bam_files, includes=self.reference_genome)
bwasamse = MultiMap(bwasamse, inputs=[self.sai1, self.read1], outputs=unmerged_bam, includes=self.reference_genome)
if self.group_prefix is not None:
# Create dictionary : key = prefix and value = list of files to merge
groups_path = Utils.get_filepath_by_prefix( unmerged_bam, self.group_prefix)
# Create dictionary : key = prefix and value = the output bam
outputs_path = Utils.get_filepath_by_prefix( self.bam_files, self.group_prefix)
# Merges bam
for prefix in self.group_prefix:
[cmd_inputs_pattern, next_arg_number] = Utils.get_argument_pattern(groups_path[prefix], 1)
samtoolsmerge = ShellFunction( self.get_exec_path("samtools") + " merge $" + str(next_arg_number) + " " + cmd_inputs_pattern + " 2>> " + self.stderr, cmd_format='{EXE} {IN} {OUT}')
samtoolsmerge(inputs=groups_path[prefix], outputs=outputs_path[prefix])
\ No newline at end of file
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment