Commit b6048c96 authored by Celine Noirot's avatar Celine Noirot
Browse files

ADD usage of add_python_execution and add_shell_execution

parent c4323d66
......@@ -17,11 +17,6 @@
import os,re,sys
from jflow.abstraction import MultiMap
from weaver.function import ShellFunction
from weaver.abstraction import Map
from ng6.analysis import Analysis
class AlignmentStats (Analysis):
......@@ -156,20 +151,27 @@ class AlignmentStats (Analysis):
# Duplication stats
if self.search_dupl:
self.tmp_bam = self.get_outputs('{basename_woext}_noDupl.bam', self.bam_files)
duplicationstats = ShellFunction("java -Xmx4g -jar " + self.get_exec_path("MarkDuplicates") + " INPUT=$1 METRICS_FILE=$2 OUTPUT=$3" + self.duplication_options + " 2> $4", cmd_format='{EXE} {IN} {OUT}')
duplicationstats = MultiMap(duplicationstats, inputs=self.bam_files, outputs=[self.duplication_files, self.tmp_bam, self.dupl_stderrs])
deletetmp = ShellFunction("rm $1 2> $2", cmd_format='{EXE} {IN} {OUT}')
deletetmp = Map(deletetmp, self.tmp_bam, self.del_stderrs)
self.add_shell_execution("java -Xmx4g -jar " + self.get_exec_path("MarkDuplicates") + " INPUT=$1 METRICS_FILE=$2 OUTPUT=$3" + self.duplication_options + " 2> $4",
cmd_format='{EXE} {IN} {OUT}', map=True,
inputs=self.bam_files, outputs=[self.duplication_files, self.tmp_bam, self.dupl_stderrs])
self.add_shell_execution("rm $1 2> $2", cmd_format='{EXE} {IN} {OUT}', map=True,
inputs=self.tmp_bam, outputs=self.del_stderrs)
# Alignment quality stats
if self.csv_files_r2:
cigarlinestats = ShellFunction(self.get_exec_path("samtools") + " view -F0x0100 $1 | python " + self.get_exec_path("cigarlineGraph.py") + " -i - -t $2 $3 " + self.cigar_options + " 2> $4", cmd_format='{EXE} {IN} {OUT}')
cigarlinestats = MultiMap(cigarlinestats, inputs=self.bam_files, outputs=[self.csv_files_r1, self.csv_files_r2, self.cigar_stderrs])
else:
cigarlinestats = ShellFunction(self.get_exec_path("samtools") + " view -F0x0100 $1 | python " + self.get_exec_path("cigarlineGraph.py") + " -i - -t $2 " + self.cigar_options + " 2> $3", cmd_format='{EXE} {IN} {OUT}')
cigarlinestats = MultiMap(cigarlinestats, inputs=self.bam_files, outputs=[self.csv_files_r1, self.cigar_stderrs])
self.add_shell_execution(self.get_exec_path("samtools") + " view -F0x0100 $1 | python " + self.get_exec_path("cigarlineGraph.py") + " -i - -t $2 $3 " + self.cigar_options + " 2> $4",
cmd_format='{EXE} {IN} {OUT}', map=True,
inputs=self.bam_files, outputs=[self.csv_files_r1, self.csv_files_r2, self.cigar_stderrs])
else:
self.add_shell_execution(self.get_exec_path("samtools") + " view -F0x0100 $1 | python " + self.get_exec_path("cigarlineGraph.py") + " -i - -t $2 " + self.cigar_options + " 2> $3",
cmd_format='{EXE} {IN} {OUT}', map=True,
inputs=self.bam_files, outputs=[self.csv_files_r1, self.cigar_stderrs])
# Alignment summary
flagstats = ShellFunction(self.get_exec_path("samtools") + " view -F0x0100 -bh $1 | " + self.get_exec_path("samtools") + " flagstat - > $2", cmd_format='{EXE} {IN} {OUT}')
flagstats = Map(flagstats, self.bam_files, self.stat_files)
self.add_shell_execution(self.get_exec_path("samtools") + " view -F0x0100 -bh $1 | " + self.get_exec_path("samtools") + " flagstat - > $2",
cmd_format='{EXE} {IN} {OUT}', map=True,
inputs=self.bam_files, outputs=self.stat_files)
def __parse_flagstat_file (self, flagstat_file):
......
......@@ -94,16 +94,19 @@ class BWA (Analysis):
if self.algorithm == "bwasw" or self.algorithm == "mem":
# Paired-end
if self.read2:
bwa = ShellFunction(self.get_exec_path("bwa") + " " + self.algorithm + " " + self.reference_genome +
self.add_shell_execution(self.get_exec_path("bwa") + " " + self.algorithm + " " + self.reference_genome +
" $1 $2 2>> $4 | " + self.get_exec_path("samtools") + " view -bS - | " +
self.get_exec_path("samtools") + " sort - $3 2>> $4; mv $3.bam $3;", cmd_format='{EXE} {IN} {OUT}')
bwasw = MultiMap(bwa, inputs=[self.read1, self.read2], outputs=[unmerged_bam, self.stderrs], includes=self.reference_genome)
self.get_exec_path("samtools") + " sort - $3 2>> $4; mv $3.bam $3;",
cmd_format='{EXE} {IN} {OUT}' , map=True,
inputs=[self.read1, self.read2], outputs=[unmerged_bam, self.stderrs], includes=self.reference_genome)
# Single-end
else:
bwa = ShellFunction(self.get_exec_path("bwa") + " " + self.algorithm + " " + self.reference_genome +
self.add_shell_execution(self.get_exec_path("bwa") + " " + self.algorithm + " " + self.reference_genome +
" $1 2>> $3 | " + self.get_exec_path("samtools") + " view -bS - | " +
self.get_exec_path("samtools") + " sort - $2 2>> $3 ; mv $2.bam $2;", cmd_format='{EXE} {IN} {OUT}')
bwasw = MultiMap(bwa, inputs=[self.read1], outputs=[unmerged_bam, self.stderrs], includes=self.reference_genome)
self.get_exec_path("samtools") + " sort - $2 2>> $3 ; mv $2.bam $2;",
cmd_format='{EXE} {IN} {OUT}' , map=True,
inputs=[self.read1], outputs=[unmerged_bam, self.stderrs], includes=self.reference_genome)
# Algorithm aln
else:
reads, sais = [], []
......
......@@ -56,5 +56,8 @@ class BWAIndex (Component):
return stderr.split()[7]
def process(self):
bwaindex = PythonFunction(bwa_index, cmd_format="{EXE} {ARG} {IN} {OUT}")
bwaindex(inputs=self.input_fasta, outputs=[self.databank, self.stdout, self.stderr], arguments=[self.get_exec_path("bwa"), self.algorithm])
\ No newline at end of file
self.add_python_execution(bwa_index,
cmd_format="{EXE} {ARG} {IN} {OUT}",
inputs=self.input_fasta, outputs=[self.databank, self.stdout, self.stderr], arguments=[self.get_exec_path("bwa"), self.algorithm],
map=False)
\ No newline at end of file
......@@ -20,8 +20,6 @@ from subprocess import Popen, PIPE
from ng6.analysis import Analysis
from weaver.function import PythonFunction
def inserts_metrics(bam_file, pairs_count_file, metrics_file, hist_file, log_file, samtools_path, collectinsertsizemetrics_path, options_dump_path):
"""
@param bam_file : path for bam
......@@ -137,9 +135,10 @@ class InsertsSizes (Analysis):
options_dump.close()
for i in range(len(self.bam_files)):
insertsSizes = PythonFunction(inserts_metrics, cmd_format="{EXE} {IN} {OUT} {ARG}")
insertsSizes(inputs=self.bam_files[i], outputs=[self.pairs_count_files[i], self.info_files[i], self.hist_files[i], self.log_files[i]], arguments=[self.get_exec_path("samtools"), self.get_exec_path("CollectInsertSizeMetrics"), options_dump_path])
self.add_python_execution(inserts_metrics,cmd_format="{EXE} {IN} {OUT} {ARG}",
inputs=self.bam_files[i], outputs=[self.pairs_count_files[i], self.info_files[i], self.hist_files[i], self.log_files[i]],
arguments=[self.get_exec_path("samtools"), self.get_exec_path("CollectInsertSizeMetrics"), options_dump_path])
def parse_pairs_count_file(self, input_file):
"""
@param input_file : the pairs count file path
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment