Commit aa5d986f authored by Celine Noirot's avatar Celine Noirot
Browse files

ADD usage of add_python_execution and add_shell_execution

parent c2b88099
......@@ -20,8 +20,6 @@ import pickle
from jflow.component import Component
from weaver.function import PythonFunction
def add_raw_files (run_dump_path, compression):
import pickle
# load the analysis object
......@@ -44,6 +42,6 @@ class AddRawFiles (Component):
run_dump = open(run_dump_path, "wb")
pickle.dump(self.runobj, run_dump)
run_dump.close()
addraw = PythonFunction(add_raw_files, cmd_format='{EXE} {ARG} > {OUT}')
addraw(outputs=self.stdout, includes=self.files_to_save, arguments=[run_dump_path, self.compression])
\ No newline at end of file
self.add_python_execution(add_raw_files, cmd_format='{EXE} {ARG} > {OUT}', map=False,
outputs=self.stdout, includes=self.files_to_save, arguments=[run_dump_path, self.compression] )
\ No newline at end of file
......@@ -17,12 +17,8 @@
import os
from subprocess import Popen, PIPE
from jflow.abstraction import MultiMap
from jflow.utils import get_argument_pattern
from weaver.function import ShellFunction
from ng6.analysis import Analysis
from ng6.utils import Utils
......@@ -112,26 +108,29 @@ class BWA (Analysis):
reads, sais = [], []
reads.extend(self.read1)
sais.extend(self.sai1)
bwa = ShellFunction(self.get_exec_path("bwa") + " " + self.algorithm + " " + self.reference_genome +
" $1 > $2 2>> $3 " , cmd_format='{EXE} {IN} {OUT}')
# Paired-end
if self.read2:
reads.extend(self.read2)
sais.extend(self.sai2)
bwa_aln = MultiMap(bwa, inputs=[reads], outputs=[sais, self.stderrs_aln], includes=self.reference_genome)
bwasampe = ShellFunction(self.get_exec_path("bwa") + " sampe " + self.reference_genome +
self.add_shell_execution(self.get_exec_path("bwa") + " " + self.algorithm + " " + self.reference_genome +
" $1 > $2 2>> $3 " , cmd_format='{EXE} {IN} {OUT}', map=True,
inputs=[reads], outputs=[sais, self.stderrs_aln], includes=self.reference_genome)
self.add_shell_execution(self.get_exec_path("bwa") + " sampe " + self.reference_genome +
" $1 $2 $3 $4 2>> $6 | " + self.get_exec_path("samtools") + " view -bS - | " +
self.get_exec_path("samtools") + " sort - $5 2>> $6; mv $5.bam $5;",
cmd_format='{EXE} {IN} {OUT}')
bwasampe = MultiMap(bwasampe, inputs=[self.sai1, self.sai2, self.read1, self.read2], outputs=[unmerged_bam, self.stderrs], includes=self.reference_genome)
self.get_exec_path("samtools") + " sort - $5 2>> $6; mv $5.bam $5;",
cmd_format='{EXE} {IN} {OUT}', map=True,
inputs=[self.sai1, self.sai2, self.read1, self.read2], outputs=[unmerged_bam, self.stderrs], includes=self.reference_genome)
# Single-end
else:
bwa_aln = MultiMap(bwa, inputs=[reads], outputs=[sais, self.stderrs_aln], includes=self.reference_genome)
bwasamse = ShellFunction(self.get_exec_path("bwa") + " samse " + self.reference_genome +
self.add_shell_execution(self.get_exec_path("bwa") + " " + self.algorithm + " " + self.reference_genome +
" $1 > $2 2>> $3 " , cmd_format='{EXE} {IN} {OUT}', map=True,
inputs=[reads], outputs=[sais, self.stderrs_aln], includes=self.reference_genome)
self.add_shell_execution(self.get_exec_path("bwa") + " samse " + self.reference_genome +
" $1 $2 2>> $4 | " + self.get_exec_path("samtools") + " view -bS - | " +
self.get_exec_path("samtools") + " sort - $3 2>> $4; mv $3.bam $3;",
cmd_format='{EXE} {IN} {OUT}')
bwasamse = MultiMap(bwasamse, inputs=[self.sai1, self.read1], outputs=[unmerged_bam, self.stderrs], includes=self.reference_genome)
cmd_format='{EXE} {IN} {OUT}', map=True,
inputs=[self.sai1, self.read1], outputs=[unmerged_bam, self.stderrs], includes=self.reference_genome)
if self.group_prefix != None:
# Create dictionary : key = prefix and value = list of files to merge
......@@ -144,8 +143,9 @@ class BWA (Analysis):
for prefix in self.group_prefix:
if len(groups_path[prefix]) > 1:
[cmd_inputs_pattern, next_arg_number] = get_argument_pattern(groups_path[prefix], 1)
samtoolsmerge = ShellFunction( self.get_exec_path("samtools") + ' merge ${' + str(next_arg_number) + '} ' + cmd_inputs_pattern , cmd_format='{EXE} {IN} {OUT}')
samtoolsmerge(inputs=groups_path[prefix], outputs=outputs_path[prefix])
self.add_shell_execution(self.get_exec_path("samtools") + ' merge ${' + str(next_arg_number) + '} ' + cmd_inputs_pattern ,
cmd_format='{EXE} {IN} {OUT}', map=False,
inputs=groups_path[prefix], outputs=outputs_path[prefix])
elif groups_path[prefix] != outputs_path[prefix]:
link = ShellFunction( "ln -fs ${1} ${2}", cmd_format='{EXE} {IN} {OUT}')
link(inputs=groups_path[prefix], outputs=outputs_path[prefix])
\ No newline at end of file
self.add_shell_execution( "ln -fs ${1} ${2}", cmd_format='{EXE} {IN} {OUT}', map=False,
inputs=groups_path[prefix], outputs=outputs_path[prefix])
\ No newline at end of file
......@@ -20,8 +20,6 @@ from subprocess import Popen, PIPE
from jflow.component import Component
from weaver.function import PythonFunction
def bwa_index(exec_path, algorithm, input_fasta, databank, stdout_path, stderr_path):
from subprocess import Popen, PIPE
# first make the symbolic link
......
......@@ -20,7 +20,6 @@ import os
from jflow.component import Component
from jflow.utils import get_argument_pattern
from weaver.function import ShellFunction
class ConcatenateFiles (Component):
......@@ -39,9 +38,9 @@ class ConcatenateFiles (Component):
def process(self):
arg_pattern, next_number = get_argument_pattern(self.files_list, start_number=2)
if self.extension.endswith(".gz") :
concatenate_files = concatenate_files = ShellFunction('zcat ' + arg_pattern + ' | gzip - > $1', cmd_format='{EXE} {OUT} {IN}')
concatenate_files( outputs = self.output_file, inputs=self.files_list)
self.add_shell_execution('zcat ' + arg_pattern + ' | gzip - > $1', cmd_format='{EXE} {OUT} {IN}',
map=False, outputs = self.output_file, inputs=self.files_list)
else :
concatenate_files = concatenate_files = ShellFunction('cat ' + arg_pattern + ' > $1', cmd_format='{EXE} {OUT} {IN}')
concatenate_files( outputs = self.output_file, inputs=self.files_list)
self.add_shell_execution('cat ' + arg_pattern + ' > $1', cmd_format='{EXE} {OUT} {IN}',
map=False, outputs = self.output_file, inputs=self.files_list)
\ No newline at end of file
......@@ -20,8 +20,6 @@ import os
from jflow.utils import get_argument_pattern
from jflow.component import Component
from weaver.function import ShellFunction
from ng6.utils import Utils
......@@ -57,8 +55,11 @@ class ConcatenateFilesGroups (Component):
# If the file is not zip
if not self.files_list[0].endswith(".gz"):
concatenate_files_groups = ShellFunction('cat ' + cmd_inputs_pattern + ' > ${' + str(next_arg_number) + '}', cmd_format='{EXE} {IN} {OUT}')
self.add_shell_execution('cat ' + cmd_inputs_pattern + ' > ${' + str(next_arg_number) + '}',
cmd_format='{EXE} {IN} {OUT}', map=False,
inputs = concat_groups[prefix], outputs = outputs_path[prefix])
# If the file is zip
else:
concatenate_files_groups = ShellFunction('zcat ' + cmd_inputs_pattern + ' | gzip - > ${' + str(next_arg_number) + '}', cmd_format='{EXE} {IN} {OUT}')
concatenate_files_groups(inputs = concat_groups[prefix], outputs = outputs_path[prefix])
\ No newline at end of file
self.add_shell_execution('zcat ' + cmd_inputs_pattern + ' | gzip - > ${' + str(next_arg_number) + '}',
cmd_format='{EXE} {IN} {OUT}', map=False,
inputs = concat_groups[prefix], outputs = outputs_path[prefix])
......@@ -19,10 +19,6 @@ import os
from subprocess import Popen, PIPE
from jflow.utils import get_argument_pattern
from jflow.abstraction import MultiMap
from weaver.function import ShellFunction
from weaver.abstraction import Map
from ng6.analysis import Analysis
from ng6.utils import Utils
......@@ -78,8 +74,11 @@ class ContaminationSearch (Analysis):
for current_databank in self.databanks:
databank_basename = os.path.basename(current_databank).split(".")[0]
unmerged_name_files = self.get_outputs('{basename_woext}_names.' + databank_basename + '.txt', self.sequence_files)
bwa = ShellFunction(self.get_exec_path("bwa") + ' aln ' + current_databank + ' $1 2>> $3 | ' + self.get_exec_path("bwa") + ' samse ' + current_databank + ' - $1 2>> $3 | ' + self.get_exec_path("samtools") + ' view -SF 260 - 2>> $3 | cut -f1 - 2>> $3 | sort - > $2 2>> $3', cmd_format='{EXE} {IN} {OUT}')
bwa = MultiMap(bwa, inputs=self.sequence_files, outputs=[unmerged_name_files, self.conta_stderr[databank_basename]])
self.add_shell_execution(self.get_exec_path("bwa") + ' aln ' + current_databank + ' $1 2>> $3 | ' +
self.get_exec_path("bwa") + ' samse ' + current_databank + ' - $1 2>> $3 | ' + self.get_exec_path("samtools") +
' view -SF 260 - 2>> $3 | cut -f1 - 2>> $3 | sort - > $2 2>> $3',
cmd_format='{EXE} {IN} {OUT}', map=True,
inputs=self.sequence_files, outputs=[unmerged_name_files, self.conta_stderr[databank_basename]])
if self.group_prefix :
# Create dictionary : key = prefix and value = list of files to merge
......@@ -91,9 +90,10 @@ class ContaminationSearch (Analysis):
# Merges tmp name files in output
for prefix in self.group_prefix:
[cmd_inputs_pattern, next_arg_number] = get_argument_pattern(groups_path[prefix], 1)
merge = ShellFunction('cat ' + cmd_inputs_pattern + ' > ${' + str(next_arg_number) + '}', cmd_format='{EXE} {IN} {OUT}')
merge(inputs=groups_path[prefix], outputs=outputs_path[prefix])
self.add_shell_execution('cat ' + cmd_inputs_pattern + ' > ${' + str(next_arg_number) + '}',
cmd_format='{EXE} {IN} {OUT}', map=False,
inputs=groups_path[prefix], outputs=outputs_path[prefix])
else:
# Rename tmp to output
rename = ShellFunction('mv $1 $2', cmd_format='{EXE} {IN} {OUT}')
rename = Map(rename, inputs=unmerged_name_files, outputs=self.name_files[databank_basename])
\ No newline at end of file
self.add_shell_execution('mv $1 $2', cmd_format='{EXE} {IN} {OUT}', map=False,
inputs=unmerged_name_files, outputs=self.name_files[databank_basename])
\ No newline at end of file
......@@ -22,8 +22,6 @@ from subprocess import Popen, PIPE
from jflow.utils import get_argument_pattern
from weaver.function import ShellFunction
from ng6.analysis import Analysis
from ng6.utils import Utils
......@@ -199,6 +197,8 @@ class FastQC (Analysis):
# Create cmd
[cmd_inputs_pattern, next_arg_number] = get_argument_pattern(file_group, 1)
fastqc = ShellFunction(self.get_exec_path("fastqc") + ' -t ' + self.nb_threads + ' --extract --outdir ' + self.output_directory + ' ' + self.options + ' ' + cmd_inputs_pattern + ' > ${' + str(next_arg_number) + '} 2> ${' + str(next_arg_number+1) + '}', cmd_format='{EXE} {IN} {OUT}')
fastqc(inputs = file_group, outputs = [self.stdouts[output_pos], self.stderrs[output_pos]])
self.add_shell_execution(self.get_exec_path("fastqc") + ' -t ' + self.nb_threads + ' --extract --outdir ' + self.output_directory + ' '
+ self.options + ' ' + cmd_inputs_pattern + ' > ${' + str(next_arg_number) + '} 2> ${' + str(next_arg_number+1) + '}',
cmd_format='{EXE} {IN} {OUT}', map=False,
inputs = file_group, outputs = [self.stdouts[output_pos], self.stderrs[output_pos]])
......@@ -18,13 +18,9 @@
import re, os
from subprocess import Popen, PIPE
from jflow.abstraction import MultiMap
from ng6.analysis import Analysis
from ng6.utils import Utils
from weaver.function import ShellFunction
class FastqIlluminaFilter (Analysis):
def define_parameters(self, fastq_files, keep_reads="pass_illumina_filters", group_prefix=None):
......@@ -102,8 +98,11 @@ class FastqIlluminaFilter (Analysis):
def process(self):
# If the file is not zip
if not self.fastq_files[0].endswith(".gz"):
fastq_illumina_filter = ShellFunction(self.get_exec_path("fastq_illumina_filter") + " --keep " + self.keep_option + " -v -o $1 $3 > $2", cmd_format='{EXE} {OUT} {IN}')
self.add_shell_execution(self.get_exec_path("fastq_illumina_filter") + " --keep " + self.keep_option + " -v -o $1 $3 > $2",
cmd_format='{EXE} {OUT} {IN}', map=True,
inputs = self.fastq_files, outputs = [self.fastq_files_filtered, self.stdout])
# If the file is zip
else:
fastq_illumina_filter = ShellFunction("zcat $3 | " + self.get_exec_path("fastq_illumina_filter") + " --keep " + self.keep_option + " -v 2> $2 | gzip > $1", cmd_format='{EXE} {OUT} {IN}')
fastq_illumina_filter = MultiMap(fastq_illumina_filter, inputs = self.fastq_files, outputs = [self.fastq_files_filtered, self.stdout])
self.add_shell_execution("zcat $3 | " + self.get_exec_path("fastq_illumina_filter") + " --keep " + self.keep_option + " -v 2> $2 | gzip > $1",
cmd_format='{EXE} {OUT} {IN}', map=True,
inputs = self.fastq_files, outputs = [self.fastq_files_filtered, self.stdout])
......@@ -18,8 +18,6 @@
import os
from subprocess import Popen, PIPE
from weaver.function import ShellFunction
from ng6.analysis import Analysis
from ng6.utils import Utils
import jflow.seqio as seqio
......@@ -152,5 +150,8 @@ class Flash (Analysis):
def process(self):
for i in range(0, len(self.prefixes)):
flash = ShellFunction(self.get_exec_path("flash") + " -z $1 $2 " + self.options + " -o " + os.path.basename(self.prefixes[i]) + " -d " + self.output_directory + " 2> $3", cmd_format='{EXE} {IN} {OUT}')
flash( inputs=[self.read1_files[i], self.read2_files[i]], outputs=[self.stderrs[i], self.extended_frags[i], self.not_combined_read_1[i], self.not_combined_read_2[i]] )
\ No newline at end of file
self.add_shell_execution(self.get_exec_path("flash") + " -z $1 $2 " + self.options +
" -o " + os.path.basename(self.prefixes[i]) + " -d " + self.output_directory + " 2> $3",
cmd_format='{EXE} {IN} {OUT}', map=False,
inputs=[self.read1_files[i], self.read2_files[i]],
outputs=[self.stderrs[i], self.extended_frags[i], self.not_combined_read_1[i], self.not_combined_read_2[i]] )
\ No newline at end of file
......@@ -19,11 +19,6 @@ import os, sys
from subprocess import Popen, PIPE
from jflow.utils import get_argument_pattern
from jflow.component import Component
from jflow.abstraction import MultiMap
from weaver.function import ShellFunction,PythonFunction
from weaver.abstraction import Map
from ng6.analysis import Analysis
from ng6.utils import Utils
......@@ -148,13 +143,14 @@ class SubsetAssignation (Analysis):
input_groups = Utils.get_filepath_by_prefix( self.sequence_files, self.group_prefixes )
sub_fasta_groups = Utils.get_filepath_by_prefix( self.sub_fasta_files, self.group_prefixes )
for prefix in self.group_prefixes:
file_extraction = PythonFunction(extract_random_seq, cmd_format="{EXE} " + " ".join([str(self.extract_rate),
str(self.min_nb_seq), str(self.max_nb_seq)]) + " {IN} {OUT}")
file_extraction(inputs = input_groups[prefix], outputs = sub_fasta_groups[prefix])
self.add_python_function(extract_random_seq,cmd_format="{EXE} " +
" ".join([str(self.extract_rate),str(self.min_nb_seq), str(self.max_nb_seq)]) + " {IN} {OUT}",
inputs = input_groups[prefix], outputs = sub_fasta_groups[prefix], map=False)
# Align on databank
blastn = ShellFunction(self.get_exec_path("blastn") + " -max_target_seqs " + str(self.max_target_seqs) + " -use_index " + self.use_index + " -outfmt 7 -db " + self.databank + " -query $1 -out $2",
cmd_format='{EXE} {IN} {OUT}')
blastn = Map(blastn, inputs=self.sub_fasta_files, outputs=self.blast_files)
self.add_shell_function(self.get_exec_path("blastn") + " -max_target_seqs " + str(self.max_target_seqs) + " -use_index " + self.use_index + " -outfmt 7 -db " + self.databank + " -query $1 -out $2",
cmd_format='{EXE} {IN} {OUT}', map=True,
inputs=self.sub_fasta_files, outputs=self.blast_files)
# Create files groups
blast_groups = Utils.get_filepath_by_prefix( self.blast_files, self.group_prefixes )
blast_merges_groups = Utils.get_filepath_by_prefix( self.get_outputs('{basename}_merge.blast', self.group_prefixes), self.group_prefixes )
......@@ -164,9 +160,11 @@ class SubsetAssignation (Analysis):
for prefix in self.group_prefixes:
# Merge blast
[cmd_inputs_pattern, next_arg_number] = get_argument_pattern(blast_groups[prefix], 1)
concatenate = ShellFunction('cat ' + cmd_inputs_pattern + ' > ${' + str(next_arg_number) + '}', cmd_format='{EXE} {IN} {OUT}')
concatenate(inputs = blast_groups[prefix], outputs = blast_merges_groups[prefix])
self.add_shell_function('cat ' + cmd_inputs_pattern + ' > ${' + str(next_arg_number) + '}',
cmd_format='{EXE} {IN} {OUT}', map=False,
inputs = blast_groups[prefix], outputs = blast_merges_groups[prefix])
# Create krona
kronaImportBLAST = ShellFunction(self.get_exec_path("ktImportBLAST") + " -i -b $1 -o $2 > $3", cmd_format='{EXE} {IN} {OUT}')
kronaImportBLAST = MultiMap(kronaImportBLAST, inputs = blast_merges_groups[prefix], outputs = [html_groups[prefix], krona_groups[prefix]])
self.add_shell_function(self.get_exec_path("ktImportBLAST") + " -i -b $1 -o $2 > $3",
cmd_format='{EXE} {IN} {OUT}', map=False,
inputs = blast_merges_groups[prefix], outputs = [html_groups[prefix], krona_groups[prefix]])
\ No newline at end of file
......@@ -18,9 +18,7 @@
import os
from jflow.component import Component
from jflow.abstraction import MultiMap
from weaver.function import ShellFunction
class FastxReverseComplement (Component):
......@@ -37,8 +35,10 @@ class FastxReverseComplement (Component):
def process(self):
if self.input_files[0].endswith(".gz"):
fastxreversecomplement = ShellFunction('zcat $1 2> $3 | ' + self.get_exec_path("fastx_reverse_complement") + ' ' + self.options + ' -z -i - -o $2 2>> $3', cmd_format="{EXE} {IN} {OUT}")
else:
fastxreversecomplement = ShellFunction(self.get_exec_path("fastx_reverse_complement") + ' ' + self.options + ' -i $1 -o $2 2> $3', cmd_format="{EXE} {IN} {OUT}")
fastxreversecomplement = MultiMap(fastxreversecomplement, inputs=self.input_files, outputs=[self.output_files, self.stderr])
\ No newline at end of file
self.add_shell_execution('zcat $1 2> $3 | ' + self.get_exec_path("fastx_reverse_complement") + ' ' + self.options + ' -z -i - -o $2 2>> $3',
cmd_format="{EXE} {IN} {OUT}", map=True,
inputs=self.input_files, outputs=[self.output_files, self.stderr])
else:
self.add_shell_execution(self.get_exec_path("fastx_reverse_complement") + ' ' + self.options + ' -i $1 -o $2 2> $3',
cmd_format="{EXE} {IN} {OUT}", map=True,
inputs=self.input_files, outputs=[self.output_files, self.stderr])
\ No newline at end of file
......@@ -19,13 +19,10 @@ import os
import re
from subprocess import Popen, PIPE
from jflow.abstraction import MultiMap
import jflow.seqio as seqio
from ng6.analysis import Analysis
from weaver.function import ShellFunction
class CutAdapt (Analysis):
def _get_length_table(self, input_file):
"""
......@@ -190,29 +187,29 @@ class CutAdapt (Analysis):
self.cmdline_options.extend(["--paired-output", "$3"])
# cutadapt forward
cutAdapt = ShellFunction(self.get_exec_path("cutadapt") + " " + " ".join(self.options_FWD + self.cmdline_options)
+ " --paired-output $4 -o $3 $1 $2 > $5", cmd_format = '{EXE} {IN} {OUT}')
MultiMap(cutAdapt, inputs = [self.input_files_R1, self.input_files_R2], outputs=[self.tmp_files_R1, self.tmp_files_R2, self.log_files_R1])
self.add_shell_execution(self.get_exec_path("cutadapt") + " " + " ".join(self.options_FWD + self.cmdline_options)
+ " --paired-output $4 -o $3 $1 $2 > $5", cmd_format = '{EXE} {IN} {OUT}', map=True,
inputs = [self.input_files_R1, self.input_files_R2], outputs=[self.tmp_files_R1, self.tmp_files_R2, self.log_files_R1])
#cutadapt reverse
cutAdapt = ShellFunction(self.get_exec_path("cutadapt") + " " + " ".join(self.options_REV + self.cmdline_options)
+ " --paired-output $3 -o $4 $1 $2 > $5", cmd_format = '{EXE} {IN} {OUT}')
MultiMap(cutAdapt, inputs = [self.tmp_files_R2, self.tmp_files_R1], outputs = [self.output_files_R1, self.output_files_R2, self.log_files_R2])
self.add_shell_execution(self.get_exec_path("cutadapt") + " " + " ".join(self.options_REV + self.cmdline_options)
+ " --paired-output $3 -o $4 $1 $2 > $5", cmd_format = '{EXE} {IN} {OUT}', map=True,
inputs = [self.tmp_files_R2, self.tmp_files_R1], outputs = [self.output_files_R1, self.output_files_R2, self.log_files_R2])
else :
#forward read
cutAdapt = ShellFunction(self.get_exec_path("cutadapt") + " " + " ".join(self.options_FWD + self.cmdline_options)
+ " -o $2 $1 > $3", cmd_format = '{EXE} {IN} {OUT}')
MultiMap(cutAdapt, inputs = [self.input_files_R1], outputs=[self.output_files_R1, self.log_files_R1])
self.add_shell_execution(self.get_exec_path("cutadapt") + " " + " ".join(self.options_FWD + self.cmdline_options)
+ " -o $2 $1 > $3", cmd_format = '{EXE} {IN} {OUT}', map=True,
inputs = [self.input_files_R1], outputs=[self.output_files_R1, self.log_files_R1])
# reverse read
cutAdapt = ShellFunction(self.get_exec_path("cutadapt") + " " + " ".join(self.options_REV + self.cmdline_options)
+ " -o $2 $1 > $3", cmd_format = '{EXE} {IN} {OUT}')
MultiMap(cutAdapt, inputs = [self.input_files_R2], outputs=[self.output_files_R2, self.log_files_R2])
self.add_shell_execution(self.get_exec_path("cutadapt") + " " + " ".join(self.options_REV + self.cmdline_options)
+ " -o $2 $1 > $3", cmd_format = '{EXE} {IN} {OUT}', map=True,
inputs = [self.input_files_R2], outputs=[self.output_files_R2, self.log_files_R2])
else :
cutAdapt = ShellFunction(self.get_exec_path("cutadapt") + " " + " ".join(self.options_FWD + self.cmdline_options)
+ " -o $2 $1 > $3", cmd_format = '{EXE} {IN} {OUT}')
MultiMap(cutAdapt, inputs = [self.input_files_R1], outputs=[self.output_files_R1, self.log_files_R1])
self.add_shell_execution(self.get_exec_path("cutadapt") + " " + " ".join(self.options_FWD + self.cmdline_options)
+ " -o $2 $1 > $3", cmd_format = '{EXE} {IN} {OUT}', map=True,
inputs = [self.input_files_R1], outputs=[self.output_files_R1, self.log_files_R1])
def parse_metrics_file(self, input_file):
"""
......
......@@ -19,11 +19,6 @@ import os, re, math, json, functools
from subprocess import Popen, PIPE
from jflow.abstraction import MultiMap
from weaver.function import ShellFunction, PythonFunction
from weaver.abstraction import Map
from ng6.analysis import Analysis
from ng6.utils import Utils
......@@ -137,35 +132,38 @@ class RSeQC (Analysis):
mapped_bam = self.mapped_bams[index]
# get mapped bams only
samtools_view = ShellFunction( "{0} view -h -F4 -b $1 > $2 ".format(self.get_exec_path("samtools") ) , cmd_format='{EXE} {IN} {OUT}')
samtools_view( inputs = bam_file, outputs = mapped_bam)
self.add_shell_execution( "{0} view -h -F4 -b $1 > $2 ".format(self.get_exec_path("samtools") ) , cmd_format='{EXE} {IN} {OUT}',
map=False, inputs = bam_file, outputs = mapped_bam)
#InferExperiment
infer_exp = ShellFunction(self.get_exec_path("infer_experiment.py") + " -s $1 -i $2 -r $3 1> $4 2> $5", cmd_format="{EXE} {ARG} {IN} {OUT}")
infer_exp( arguments = self.sample_size, inputs=[mapped_bam, bed_file], outputs=[self.stdout_infer[index], self.stderr_infer[index]] )
self.add_shell_execution( self.get_exec_path("infer_experiment.py") + " -s $1 -i $2 -r $3 1> $4 2> $5", cmd_format="{EXE} {ARG} {IN} {OUT}",
map=False, arguments = self.sample_size, inputs=[mapped_bam, bed_file], outputs=[self.stdout_infer[index], self.stderr_infer[index]] )
#Inner distance
inner_dist = ShellFunction(self.get_exec_path('inner_distance.py') + " -o $1 -i $2 -r $3 1> $4 2> $5 " , cmd_format="{EXE} {ARG} {IN} {OUT}" )
inner_dist( arguments = output_prefix, inputs = [mapped_bam, bed_file] , outputs = [self.stdout_inner[index], self.stderr_inner[index], self.r_files_inner[index]])
self.add_shell_execution( self.get_exec_path('inner_distance.py') + " -o $1 -i $2 -r $3 1> $4 2> $5 " , cmd_format="{EXE} {ARG} {IN} {OUT}",
map=False, arguments = output_prefix, inputs = [mapped_bam, bed_file] , outputs = [self.stdout_inner[index], self.stderr_inner[index], self.r_files_inner[index]])
#Junction Annotation
junc_annot = ShellFunction(self.get_exec_path("junction_annotation.py") + " -i $1 -r $2 -o " + output_prefix + " -m " +
str(self.min_intron_size_junc_ann) + " > $3 2> $4", cmd_format="{EXE} {IN} {OUT}")
junc_annot(inputs=[mapped_bam, bed_file], outputs=[ self.stdout_junc_ann[index], self.stderr_junc_ann[index], self.r_files_junc_ann[index]])
self.add_shell_execution( self.get_exec_path("junction_annotation.py") + " -i $1 -r $2 -o " + output_prefix + " -m " +
str(self.min_intron_size_junc_ann) + " > $3 2> $4", cmd_format="{EXE} {IN} {OUT}", map=False,
inputs=[mapped_bam, bed_file], outputs=[ self.stdout_junc_ann[index], self.stderr_junc_ann[index], self.r_files_junc_ann[index]])
#Junction Saturation
junc_sat = ShellFunction(self.get_exec_path("junction_saturation.py") + " -i $1 -r $2 -o " + output_prefix + " -m " + str(self.min_intron_size_junc_sat) + " > $3 2> $4",
cmd_format="{EXE} {IN} {OUT}")
junc_sat(inputs=[mapped_bam, bed_file], outputs=[ self.stdout_junc_sat[index], self.stderr_junc_sat[index], self.r_files_junc_sat[index]])
self.add_shell_execution( self.get_exec_path("junction_saturation.py") + " -i $1 -r $2 -o " + output_prefix +
" -m " + str(self.min_intron_size_junc_sat) + " > $3 2> $4",
cmd_format="{EXE} {IN} {OUT}", map=False,
inputs=[mapped_bam, bed_file], outputs=[ self.stdout_junc_sat[index], self.stderr_junc_sat[index], self.r_files_junc_sat[index]])
#GeneBody Coverage
gene_b_cov = ShellFunction(self.get_exec_path("geneBody_coverage.py") + " -i $1 -o " + output_prefix + " -r $2 2> $3 > $4", cmd_format="{EXE} {IN} {OUT}")
gene_b_cov(inputs=[mapped_bam, bed_file], outputs=[ self.stderr_gbc[index], self.stdout_gbc[index], self.cov_files[index]])
self.add_shell_execution( self.get_exec_path("geneBody_coverage.py") + " -i $1 -o " + output_prefix + " -r $2 2> $3 > $4",
cmd_format="{EXE} {IN} {OUT}", map=False,
inputs=[mapped_bam, bed_file], outputs=[ self.stderr_gbc[index], self.stdout_gbc[index], self.cov_files[index]])
#RPKM Saturation
rpkm_sat = PythonFunction(rpkm_saturation, cmd_format="{EXE} {ARG} {IN} {OUT}")
rpkm_sat(arguments = [ self.get_exec_path('RPKM_saturation.py'), output_prefix], inputs = [ mapped_bam, bed_file, self.stdout_infer[index] ],
outputs = self.r_files_rpkm_sat[index] )
self.add_python_execution( rpkm_saturation, cmd_format="{EXE} {ARG} {IN} {OUT}", map=False,
arguments = [ self.get_exec_path('RPKM_saturation.py'), output_prefix],
inputs = [ mapped_bam, bed_file, self.stdout_infer[index] ],
outputs = self.r_files_rpkm_sat[index] )
def post_process(self):
......
......@@ -17,11 +17,8 @@
import os
from jflow.abstraction import MultiMap
from ng6.analysis import Analysis
from weaver.function import ShellFunction
class STAR (Analysis):
def define_parameters(self, fasta_file, genome_dir, read1, read2 = None, group_prefix=None, keep_bam = True, n_threads = 4 ):
......@@ -64,8 +61,9 @@ class STAR (Analysis):
"--runThreadN", str(self.n_threads),
"--outSAMunmapped Within",
"--readFilesIn", "$1", "$2" ]
star = ShellFunction( ' '.join(command), cmd_format='{EXE} {IN} {OUT}')
MultiMap(star, inputs = [self.read1 , self.read2], outputs = [self.output_prefixes, self.output_sams_no_md], includes = self.genome_dir)
self.aff_shell_execution( ' '.join(command), cmd_format='{EXE} {IN} {OUT}', map=True,
inputs = [self.read1 , self.read2], outputs = [self.output_prefixes, self.output_sams_no_md],
includes = self.genome_dir)
else :
command = [ "mkdir $2 ; " , self.get_exec_path("STAR"), "--outFileNamePrefix ","$2/",
"--readFilesCommand", "zcat",
......@@ -73,16 +71,18 @@ class STAR (Analysis):
"--outSAMunmapped Within",
"--runThreadN", str(self.n_threads),
"--readFilesIn", "$1" ]
star = ShellFunction( ' '.join(command), cmd_format='{EXE} {IN} {OUT}')
MultiMap(star, inputs = [self.read1], outputs = [self.output_prefixes, self.output_sams_no_md], includes = self.genome_dir)
self.aff_shell_execution( ' '.join(command), cmd_format='{EXE} {IN} {OUT}', map=True,
inputs = [self.read1], outputs = [self.output_prefixes, self.output_sams_no_md], includes = self.genome_dir)
# sort
sort_sam = ShellFunction( self.get_exec_path("samtools") + ' view -Sb $1 | ' + self.get_exec_path("samtools") + ' sort -@ ' + str(self.n_threads) + ' -f - $2 ' , cmd_format='{EXE} {IN} {OUT}')
MultiMap(sort_sam, inputs = [self.output_sams_no_md], outputs = [self.output_sorted_bams])
self.aff_shell_execution( self.get_exec_path("samtools") + ' view -Sb $1 | ' + self.get_exec_path("samtools") +
' sort -@ ' + str(self.n_threads) + ' -f - $2 ' , cmd_format='{EXE} {IN} {OUT}', map=True,
inputs = [self.output_sams_no_md], outputs = [self.output_sorted_bams])
#calmd and convert to bam
calmd = ShellFunction( self.get_exec_path("samtools") + ' calmd -b $1 ' + self.fasta_file_link + ' > $2' , cmd_format='{EXE} {IN} {OUT}')
MultiMap(calmd, inputs = [self.output_sorted_bams], outputs = [self.output_bams])
self.aff_shell_execution( self.get_exec_path("samtools") + ' calmd -b $1 ' + self.fasta_file_link + ' > $2' ,
cmd_format='{EXE} {IN} {OUT}', map=True,
inputs = [self.output_sorted_bams], outputs = [self.output_bams])
def post_process(self):
if self.keep_bam:
......
......@@ -19,8 +19,6 @@ import os
from jflow.component import Component
from weaver.function import ShellFunction
class STARIndex (Component):
def define_parameters(self, input_fasta):
......@@ -31,11 +29,11 @@ class STARIndex (Component):
def process(self):
# normalize fasta
normalize_fasta = ShellFunction( "java -Xmx4g -jar " + self.get_exec_path("NormalizeFasta.jar") + " I=$1 O=$2 ", cmd_format="{EXE} {IN} {OUT}")
normalize_fasta( inputs = self.input_fasta, outputs = self.normalized_fasta_file)
self.add_shell_execution( "java -Xmx4g -jar " + self.get_exec_path("NormalizeFasta.jar") + " I=$1 O=$2 ",
cmd_format="{EXE} {IN} {OUT}", map=False,
inputs = self.input_fasta, outputs = self.normalized_fasta_file)
# index for star
genomeBuild = ShellFunction( "mkdir $2 ; " + self.get_exec_path("STAR") + " --runMode genomeGenerate --genomeFastaFiles $1 --genomeDir $2",
cmd_format='{EXE} {IN} {OUT}' )
genomeBuild(inputs = self.normalized_fasta_file, outputs = self.index_directory)
self.add_shell_execution( "mkdir $2 ; " + self.get_exec_path("STAR") + " --runMode genomeGenerate --genomeFastaFiles $1 --genomeDir $2",
cmd_format='{EXE} {IN} {OUT}' , map=True, inputs = self.normalized_fasta_file, outputs = self.index_directory)
......@@ -20,7 +20,7 @@ all
--reference-transcriptome
workflows/illumina_rnaseq/data/Danio_cdna_sample.fa
--reference-genome
/home/inabihoudin/workspaces/ng6/nG6/workflows/illumina_rnaseq/data/Bos_taurus.dna.ch1.fa
workflows/illumina_rnaseq/data/Bos_taurus.dna.ch1.fa
--casava
directory=workflows/illumina_rnaseq/data/casava_directory_test/
......
......@@ -111,7 +111,7 @@ class Methylseq (CasavaNG6Workflow):
bam_for_next_step = bismarkReference.output_bam