Commit 7354e85e authored by Penom Nom's avatar Penom Nom
Browse files

Fix bug with large dataset (bash argument syntax).

parent 9c539ff0
......@@ -304,7 +304,7 @@ class Utils(object):
next_number = start_number
for elt in list:
arg_pattern += " $" + str(next_number)
arg_pattern += ' ${' + str(next_number) + '}'
next_number += 1
return [arg_pattern, next_number]
......
......@@ -104,7 +104,7 @@ class Casava18 (NG6Workflow):
def process(self):
# fastq illumina filter
group_prefix = Utils.get_group_basenames(self.read1_files+self.read2_files, "read")
group_prefix = (Utils.get_group_basenames(self.read1_files+self.read2_files, "read")).keys()
fastqilluminafilter = self.add_component("FastqIlluminaFilter", [self.read1_files+self.read2_files, self.args["keep_reads"], group_prefix, self.runobj.name+"_fastqilluminafilter.tar.gz"])
# split read 1 and read 2 from filtered files list
......
......@@ -133,5 +133,5 @@ class BWA (Analysis):
# Merges bam
for prefix in self.group_prefix:
[cmd_inputs_pattern, next_arg_number] = Utils.get_argument_pattern(groups_path[prefix], 1)
samtoolsmerge = ShellFunction( self.get_exec_path("samtools") + " merge $" + str(next_arg_number) + " " + cmd_inputs_pattern + " 2>> " + self.stderr, cmd_format='{EXE} {IN} {OUT}')
samtoolsmerge = ShellFunction( self.get_exec_path("samtools") + ' merge ${' + str(next_arg_number) + '} ' + cmd_inputs_pattern + " 2>> " + self.stderr, cmd_format='{EXE} {IN} {OUT}')
samtoolsmerge(inputs=groups_path[prefix], outputs=outputs_path[prefix])
\ No newline at end of file
......@@ -30,8 +30,7 @@ class ConcatenateFilesGroups (Component):
def define_parameters(self, files_list, group_prefix, archive_name=None):
self.files_list = InputFileList(files_list)
self.group_prefix = group_prefix
self.group_prefix.sort()
self.group_prefix = group_prefix
extensions = os.path.basename(files_list[0]).split(".")[1:]
self.concat_files = OutputFileList(self.get_outputs('{basename_woext}.'+".".join(extensions), self.group_prefix))
self.archive_name = archive_name
......@@ -39,16 +38,12 @@ class ConcatenateFilesGroups (Component):
def process(self):
# Create dictionary : key = prefix and value = list of files to concatenate
concat_groups = {}
for file in self.files_list:
for prefix in self.group_prefix:
if os.path.basename(file).startswith(prefix):
if concat_groups.has_key(prefix):
concat_groups[prefix].append(file)
else:
concat_groups[prefix] = [file]
concat_groups = Utils.get_filepath_by_prefix(self.files_list, self.group_prefix)
# Create dictionary : key = prefix and value = the output file
outputs_path = Utils.get_filepath_by_prefix(self.concat_files, self.group_prefix)
output_pos = 0
for prefix in self.group_prefix:
# Sort list of files to concatenate
concat_groups[prefix].sort()
......@@ -57,9 +52,8 @@ class ConcatenateFilesGroups (Component):
# If the file is not zip
if not self.files_list[0].endswith(".gz"):
concatenate_files_groups = ShellFunction("cat " + cmd_inputs_pattern + " > $" + str(next_arg_number), cmd_format='{EXE} {IN} {OUT}')
concatenate_files_groups = ShellFunction('cat ' + cmd_inputs_pattern + ' > ${' + str(next_arg_number) + '}', cmd_format='{EXE} {IN} {OUT}')
# If the file is zip
else:
concatenate_files_groups = ShellFunction("zcat " + cmd_inputs_pattern + " | gzip - > $" + str(next_arg_number), cmd_format='{EXE} {IN} {OUT}')
concatenate_files_groups(inputs = concat_groups[prefix], outputs = self.concat_files[output_pos])
output_pos += 1
\ No newline at end of file
concatenate_files_groups = ShellFunction('zcat ' + cmd_inputs_pattern + ' | gzip - > ${' + str(next_arg_number) + '}', cmd_format='{EXE} {IN} {OUT}')
concatenate_files_groups(inputs = concat_groups[prefix], outputs = outputs_path[prefix])
\ No newline at end of file
......@@ -168,7 +168,7 @@ class FastQC (Analysis):
# Create cmd
[cmd_inputs_pattern, next_arg_number] = Utils.get_argument_pattern(file_group, 1)
fastqc = ShellFunction(self.get_exec_path("fastqc") + " --outdir " + self.output_directory + " " + self.options + " " + cmd_inputs_pattern + " > $" + str(next_arg_number) + " 2> $" + str(next_arg_number+1), cmd_format='{EXE} {IN} {OUT}')
fastqc = ShellFunction(self.get_exec_path("fastqc") + ' --outdir ' + self.output_directory + ' ' + self.options + ' ' + cmd_inputs_pattern + ' > ${' + str(next_arg_number) + '} 2> ${' + str(next_arg_number+1) + '}', cmd_format='{EXE} {IN} {OUT}')
fastqc(inputs = file_group, outputs = [self.stdouts[output_pos], self.stderrs[output_pos]])
output_pos += 1
\ No newline at end of file
......@@ -23,7 +23,9 @@ from subprocess import Popen, PIPE
from jflow.component import Component
from jflow.iotypes import OutputFileList, InputFileList, OutputFile, Formats
from jflow.abstraction import MultiMap
from ng6.analysis import Analysis
from ng6.utils import Utils
from weaver.function import ShellFunction
......@@ -54,17 +56,10 @@ class FastqIlluminaFilter (Analysis):
def post_process(self):
files = {}
# Create dictionary : key = file name or prefix, value = files path
files = {}
if self.group_prefix is not None:
for file in self.stdout:
for prefix in self.group_prefix:
if os.path.basename(file).startswith(prefix):
if files.has_key(prefix):
files[prefix].append(file)
else:
files[prefix] = [file]
files = Utils.get_filepath_by_prefix( self.stdout, self.group_prefix )
else:
for file in self.stdout:
file_name = os.path.splitext(os.path.basename(file))[0]
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment