Commit 598705b2 authored by Penom Nom's avatar Penom Nom
Browse files

Add multi-threading management for cluster.split.

parent cd1b4d94
......@@ -131,7 +131,7 @@ class MiSeqDiversity (NG6Workflow):
classifyseqs = self.add_component("MothurClassifySeqs",kwargs={'fasta_files':chimerauchime.pick_fasta_files,'template_files':self.args["classify_template"],'taxonomy_files':self.args["classify_taxonomy"],'count_table_files':chimerauchime.good_count_table_files,'without_krona':self.args["without_krona"],'processors':self.args["processors"]},parent=chimerauchime)
# OTUs approach
if self.args['otu_with_tax']:
cluster = self.add_component("MothurClusterSplit", kwargs={'fasta_files':chimerauchime.pick_fasta_files, 'count_table_files':chimerauchime.good_count_table_files, 'taxonomy_files':classifyseqs.taxonomy_files, 'taxonomy_level':4})
cluster = self.add_component("MothurClusterSplit", kwargs={'fasta_files':chimerauchime.pick_fasta_files, 'count_table_files':chimerauchime.good_count_table_files, 'taxonomy_files':classifyseqs.taxonomy_files, 'taxonomy_level':4, 'processors':self.args["processors"]})
else:
distseqs = self.add_component("MothurDistSeqs", kwargs={'fasta_files':chimerauchime.pick_fasta_files,'processors':self.args["processors"]})
cluster = self.add_component("MothurCluster", kwargs={'dist_files':distseqs.dist_files,'count_table_files':chimerauchime.good_count_table_files})
......
......@@ -27,31 +27,32 @@ class MothurClusterSplit(Component):
@summary : Assign sequences to OTUs in two step : splitting of your files into distinct groupings and the clustering of these groupings.
"""
def define_parameters(self, fasta_files, taxonomy_files, names_files=None, count_table_files=None, taxonomy_level=4, cutoff=None, method="average"):
def define_parameters(self, fasta_files, taxonomy_files, names_files=None, count_table_files=None, taxonomy_level=4, cutoff=None, method="average", processors=1):
"""
/!\ si on realise le cluster avec un fichier en colonne il faut le fichier names
@param fasta_files : Sequences processed.
@param taxonomy_files : The taxonomy file for your sequences.
@param names_files : The first column contains the name of a reference sequence that is in
@param fasta_files : [string] Sequences processed.
@param taxonomy_files : [string] The taxonomy file for your sequences.
@param names_files : [string] The first column contains the name of a reference sequence that is in
a distance matrix and the second column contains the names of the
sequences (separated by commas) that the reference sequence
represents.
@param count_table_files : This file is used to represent the number of duplicate sequences
@param count_table_files : [string] This file is used to represent the number of duplicate sequences
for a given representative sequence. Mothur will use this
information to form the correct OTU's.
@param taxonomy_level : The taxonomy level you want to use to split the distance file,
@param taxonomy_level : [string] The taxonomy level you want to use to split the distance file,
3 meaning use the third taxon in each list.
@paramm cutoff : These numbers indicate the cutoff levels for sequence clusters.
@paramm cutoff : [float] These numbers indicate the cutoff levels for sequence clusters.
'unique' means that a cluster is defined by all sequences that are identical along every single base in the sequence.
'0.01' indicates less than 1% difference.
@param method : Three clustering methods (Nearest neighbor, Furthest neighbor, Average
@param method : [string] Three clustering methods (Nearest neighbor, Furthest neighbor, Average
neighbor).
@param processors : [int] Number of processors used.
"""
# define parameters
self.cutoff = cutoff
self.method = method
self.split_method = "classify"
self.tax_level = taxonomy_level
self.processors_nb = processors
# define input files
self.fasta_files = InputFileList(fasta_files, Formats.FASTA)
self.taxonomy_files = InputFileList(taxonomy_files, Formats.ANY)
......@@ -74,9 +75,9 @@ class MothurClusterSplit(Component):
cutoff_option = ',cutoff='+str(self.cutoff)
if self.names_files :
cluster = ShellFunction(self.get_exec_path("mothur") + ' "#cluster.split(fasta=$1, name=$2, taxonomy=$3, outputdir=' + self.output_directory + '/' + cutoff_option +\
', taxlevel=' + str(self.tax_level) + ', method=' + self.method + ', splitmethod=' + self.split_method + ')" > $4',cmd_format='{EXE} {IN} {OUT}')
', taxlevel=' + str(self.tax_level) + ', method=' + self.method + ', splitmethod=' + self.split_method + ', processor=' + processors_nb + ')" > $4',cmd_format='{EXE} {IN} {OUT}')
cluster = MultiMap(cluster, inputs=[self.fasta_files, self.names_files, self.taxonomy_files], outputs=[self.stdout, self.an_sabund_files, self.an_rabund_files, self.an_list_files])
if self.count_table_files :
cluster = ShellFunction(self.get_exec_path("mothur") + ' "#cluster.split(fasta=$1, count=$2, taxonomy=$3, outputdir=' + self.output_directory + '/' + cutoff_option +\
', taxlevel=' + str(self.tax_level) + ', method=' + self.method + ', splitmethod=' + self.split_method + ')" > $4',cmd_format='{EXE} {IN} {OUT}')
', taxlevel=' + str(self.tax_level) + ', method=' + self.method + ', splitmethod=' + self.split_method + ', processor=' + processors_nb + ')" > $4',cmd_format='{EXE} {IN} {OUT}')
cluster = MultiMap(cluster, inputs=[self.fasta_files, self.count_table_files, self.taxonomy_files], outputs=[self.stdout, self.an_list_files])
\ No newline at end of file
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment