Commit 51fb579f authored by Penom Nom's avatar Penom Nom
Browse files

Clean output files names.

parent b5884e18
......@@ -29,26 +29,60 @@ class ConcatenateFiles (Component):
@summary : Concatenates files.
"""
def define_parameters(self, files_list, output_name_woext):
def define_parameters(self, files_list, output_name_woext=None, output_name=None):
"""
@param files_list : path of files that will be concatenated
@param output_name_woext : filename for the output file
@param files_list : [list] path of files that will be concatenated. You can provide a list to create one output file or a list
of list to create different output files.
@param output_name_woext : [list or str] filename(s) without extension for the output file(s). Extensions are automatically
retrieved from first file of the list and added.
@param output_name : [list or str] filename(s) for the output file(s).
@note : use output_name_woext OR output_name.
@example :
usage with single concatenation : define_parameters(["file_1.txt", "file_2.txt"], None, "concat.txt")
usage with lists : define_parameters([["file_1.txt", "file_2.txt"], ["file_A.txt", "file_B.txt"]], None, ["concat_num.txt", "concat_letter.txt"])
"""
self.files_list = InputFileList(files_list)
if len(files_list) == 0 :
extensions = ""
if len(files_list) == 0:
self.files_list = list()
self.output_files = OutputFileList( list() )
self.output_file = OutputFile( "" )
else:
extensions = os.path.basename(files_list[0]).split(".")[1:]
self.output_file = OutputFileList(os.path.join(self.output_directory, output_name_woext + "." + ".".join(extensions)))
# Cast single usage to list with only one concatenation
files_list_tmp = files_list
output_name_woext_tmp = output_name_woext
output_name_tmp = output_name
if not isinstance(files_list[0], list):
files_list_tmp = [files_list]
if output_name_woext is not None:
output_name_woext_tmp = [output_name_woext]
if output_name is not None:
output_name_tmp = [output_name]
# Files
self.files_list = list()
output_files = list()
for i in range( len(files_list_tmp) ):
files_to_concatenate = files_list_tmp[i]
self.files_list.append( InputFileList(files_list_tmp[i]) )
format = ( files_to_concatenate.format if hasattr(files_to_concatenate, "format") else Formats.ANY )
if output_name_woext is not None and output_name is None:
extensions = os.path.basename(files_to_concatenate[0]).split(".")[1:]
output_files.append( os.path.join(self.output_directory, output_name_woext_tmp[i] + "." + ".".join(extensions)) )
elif output_name is not None and output_name_woext is None:
output_files.append( os.path.join(self.output_directory, output_name_tmp[i]) )
else:
raise ValueError( "An 'output_name_woext' OR an 'output_name' must be set." )
self.output_files = OutputFileList( output_files, format )
if not isinstance(files_list[0], list):
self.output_file = OutputFile( output_files[0], format )
def process(self):
[cmd_inputs_pattern, next_arg_number] = get_argument_pattern(self.files_list, 1)
files_list_str = " ".join( self.files_list )
# If the file is not zip
if len(self.files_list) != 0 and not self.files_list[0].endswith(".gz"):
concatenate_files = ShellFunction('cat ' + files_list_str + ' > $1', cmd_format='{EXE} {OUT}')
# If the file is zip
else:
concatenate_files = ShellFunction('zcat ' + files_list_str + ' | gzip - > $1', cmd_format='{EXE} {OUT}')
concatenate_files(outputs = self.output_file, includes = self.files_list)
\ No newline at end of file
# For each concatenation list
for i in range( len(self.files_list) ):
[cmd_inputs_pattern, next_arg_number] = get_argument_pattern(self.files_list[i], 1)
files_list_str = " ".join( self.files_list[i] )
# If the files are not zip
if len(self.files_list[i]) != 0 and not self.files_list[i][0].endswith(".gz"):
concatenate_files = ShellFunction('cat ' + files_list_str + ' > $1', cmd_format='{EXE} {OUT}')
# If the files are zip
else:
concatenate_files = ShellFunction('zcat ' + files_list_str + ' | gzip - > $1', cmd_format='{EXE} {OUT}')
concatenate_files(outputs = self.output_files[i], includes = self.files_list[i])
\ No newline at end of file
......@@ -143,7 +143,7 @@ class GeneDiversity (NG6Workflow):
rename_clusters = self.add_component("AddSamplesNames", [framebot.corrected_proteins, '|', ';size=', new_samples_names])
# Merge sequences
merge = self.add_component("ConcatenateFiles", [rename_clusters.output_files, "all_trad_sequences.fasta"])
merge = self.add_component("ConcatenateFiles", [rename_clusters.output_files, None, "all_trad_sequences.fasta"])
# Create OTU
cdhit = self.add_component("Cdhit", [merge.output_file, self.args["otu_identity_threshold"], self.args["otu_length_diff_cutoff"],
......
......@@ -34,15 +34,15 @@ def to_biom( output_biom, clusters_file, precluster_sample_sep, precluster_size_
@summary : Write a biom file from cdhit results.
@param output_biom : [str] path to the output file.
@param clusters_file : [str] path to the '.clstr' file.
@param precluster_size_sep : [str] used if sequences provided to Cdhit are pre-clusters (ex : dereplication step before cd-hit). The number of sequences
represented by each pre-cluster can be added to the end of its ID. In this place the number is separated by the character
precluster_size_sep.
Example : precluster_size_sep=';' where sequence ID = 'seq10001;83'.
@param precluster_size_sep : [str] used if sequences provided to Cdhit come from differents samples ("none" otherwise). The sample name is stored in each
@param precluster_sample_sep : [str] used if sequences provided to Cdhit come from differents samples ("none" otherwise). The sample name is stored in each
sequence id. It is separated by the character precluster_sample_sep and it is placed before the pre-cluster size information.
Example : sequence ID = 'seq10001|lake_spot_1;83'.
OR
sequence ID = 'seq10001|lake_spot_1'.
@param precluster_size_sep : [str] used if sequences provided to Cdhit are pre-clusters (ex : dereplication step before cd-hit). The number of sequences
represented by each pre-cluster can be added to the end of its ID. In this place the number is separated by the character
precluster_size_sep.
Example : precluster_size_sep=';' where sequence ID = 'seq10001;83'.
"""
from workflows.gene_diversity.lib.Biom import Biom, BiomIO
......@@ -169,9 +169,9 @@ class Cdhit (Analysis):
# Files
self.input_fasta = InputFileList( input_fasta )
self.output_files = OutputFileList( self.get_outputs('{basename_woext}.fasta', self.input_fasta), Formats.FASTA )
self.output_files = OutputFileList( self.get_outputs('{basename_woext}_cdhit.fasta', self.input_fasta), Formats.FASTA )
self.cluster_files = OutputFileList( self.get_outputs('{basename_woext}.clstr', self.input_fasta) )
self.biom_files = OutputFileList( self.get_outputs('{basename_woext}.biom', self.input_fasta) )
self.biom_files = OutputFileList( self.get_outputs('{basename_woext}_cdhit.biom', self.input_fasta) )
self.depth_files = OutputFileList( self.get_outputs('{basename_woext}_depth.tsv', self.input_fasta) )
self.hclust_files = OutputFileList( self.get_outputs('{basename_woext}_hclust.json', self.input_fasta) )
self.stderr = os.path.join(self.output_directory, 'cdhit.stderr')
......
......@@ -17,7 +17,7 @@
[global]
name = gene_diversity
description = [DEV] Analysis the composition and function of a microbial community from a functional gene.
description = Analysis the composition and function of a microbial community from a functional gene.
#
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment