Commit cd66b796 authored by Jerome Mariette's avatar Jerome Mariette
Browse files

No commit message

No commit message
parent a8af9adb
......@@ -38,26 +38,23 @@ class RNAdiversity (Workflow):
# then process the workflow
sffinfo = self.add_component("MothurSffinfo", [self.args["sff_files"]])
summaryseqs = self.add_component("MothurSummarySeqs", [sffinfo.fasta_files])
shhhflows = self.add_component("MothurShhhFlows",[sffinfo.flow_files])
trimseqs = self.add_component("MothurTrimSeqs",[shhhflows.fasta_files,shhhflows.names_files, oligos_fullpath])
summaryseqs_trim = self.add_component("MothurSummarySeqs", [trimseqs.trim_fasta_files,trimseqs.trim_names_files],{},"trim")
uniqueseqs = self.add_component("MothurUniqueSeqs",[trimseqs.trim_fasta_files,trimseqs.trim_names_files])
summaryseqs_unique = self.add_component("MothurSummarySeqs", [uniqueseqs.unique_fasta_files,uniqueseqs.names_files],{},"unique")
trimseqs = self.add_component("MothurTrimSeqs",[shhhflows.fasta_files, oligos_fullpath, shhhflows.names_files])
uniqueseqs = self.add_component("MothurUniqueSeqs",[trimseqs.trim_fasta_files, trimseqs.trim_names_files])
alignseqs = self.add_component("MothurAlignSeqs",[uniqueseqs.unique_fasta_files,self.args["reference_alignment"]])
summaryseqs_align = self.add_component("MothurSummarySeqs", [alignseqs.align_files,uniqueseqs.names_files],{},"align")
screenseqs = self.add_component("MothurScreenSeqs",[alignseqs.align_files,uniqueseqs.names_files,trimseqs.groups_files])
summaryseqs_screen = self.add_component("MothurSummarySeqs", [screenseqs.good_align_files,screenseqs.good_names_files],{},"screen")
filterseqs = self.add_component("MothurFilterSeqs",[screenseqs.good_align_files])
uniqueseqs_filter = self.add_component("MothurUniqueSeqs",[filterseqs.filter_fasta_files,screenseqs.good_names_files],{},"filter")
precluster = self.add_component("MothurPreCluster",[uniqueseqs_filter.unique_fasta_files,uniqueseqs_filter.names_files,screenseqs.good_groups_files])
summaryseqs_precluster = self.add_component("MothurSummarySeqs", [precluster.precluster_fasta_files,precluster.precluster_names_files],{},"precluster")
chimerauchime = self.add_component("MothurChimeraUchime",[precluster.precluster_fasta_files,precluster.precluster_names_files,screenseqs.good_groups_files])
removeseqs = self.add_component("MothurRemoveSeqs",[chimerauchime.uchime_accnos_files,precluster.precluster_fasta_files,precluster.precluster_names_files,\
screenseqs.good_groups_files])
summaryseqs_removeseqs = self.add_component("MothurSummarySeqs", [removeseqs.pick_fasta_files,removeseqs.pick_names_files],{},"removeseqs")
classifyseqs = self.add_component("MothurClassifySeqs",[removeseqs.pick_fasta_files,removeseqs.pick_names_files,removeseqs.pick_groups_files,self.args["template"],\
self.args["taxonomy"]])
#screenseqs = self.add_component("MothurScreenSeqs",[alignseqs.align_files,uniqueseqs.names_files,trimseqs.groups_files])
#summaryseqs_screen = self.add_component("MothurSummarySeqs", [screenseqs.good_align_files,screenseqs.good_names_files],{},"screen")
#filterseqs = self.add_component("MothurFilterSeqs",[screenseqs.good_align_files])
#uniqueseqs_filter = self.add_component("MothurUniqueSeqs",[filterseqs.filter_fasta_files,screenseqs.good_names_files],{},"filter")
#precluster = self.add_component("MothurPreCluster",[uniqueseqs_filter.unique_fasta_files,uniqueseqs_filter.names_files,screenseqs.good_groups_files])
#summaryseqs_precluster = self.add_component("MothurSummarySeqs", [precluster.precluster_fasta_files,precluster.precluster_names_files],{},"precluster")
#chimerauchime = self.add_component("MothurChimeraUchime",[precluster.precluster_fasta_files,precluster.precluster_names_files,screenseqs.good_groups_files])
#removeseqs = self.add_component("MothurRemoveSeqs",[chimerauchime.uchime_accnos_files,precluster.precluster_fasta_files,precluster.precluster_names_files,\
# screenseqs.good_groups_files])
#summaryseqs_removeseqs = self.add_component("MothurSummarySeqs", [removeseqs.pick_fasta_files,removeseqs.pick_names_files],{},"removeseqs")
#classifyseqs = self.add_component("MothurClassifySeqs",[removeseqs.pick_fasta_files,removeseqs.pick_names_files,removeseqs.pick_groups_files,self.args["template"],\
# self.args["taxonomy"]])
# removegroups = self.add_component("MothurRemoveGroups", [removeseqs.pick_fasta_files,removeseqs.pick_names_files,removeseqs.pick_groups_files,classifyseqs.taxonomy_files])
# krona = self.add_component("MothurKrona", [classifyseqs.taxonomy_files])
# distseqs = self.add_component("MothurDistSeqs", [removeseqs.pick_fasta_files, processors])
......
......@@ -10,29 +10,33 @@ class MothurAlignSeqs(Component):
Alignment of our data using the align.seqs command by aligning our data to the SILVA-compatible alignment database reference alignment
"""
def define_parameters(self, fasta_files, reference_alignment_files, processors=4):
def define_parameters(self, fasta_files, reference_alignment_files, search="kmers", ksize=8, align="needleman", processors=1):
"""
Define align.seqs component parameters.
:param fasta_files: fasta files to process
:type fasta_files: str
:param reference_alignment_files: reference alignment files to process
:type reference_alignment_files: str
:param search: mothur offers three methods of finding the template sequence - kmer searching, blast, and suffix tree searching
:type search: str
:param ksize: with the kmer option, change the size of kmers that are used
:type ksize: int
:param align: three alignment methods - blastn, gotoh, and needleman
:type align: str
:param processors: the number of processors to use
:type processors: int
"""
#define parameters
# define parameters
self.processors = processors
#define input files
# define input files
self.fasta_files = InputFileList(fasta_files, Formats.FASTA)
self.reference_alignment_files = InputFileList(reference_alignment_files, Formats.FASTA)
#define output files
# define output files
self.align_files = OutputFileList(self.get_outputs('{basename_woext}.align', self.fasta_files), Formats.FASTA)
self.align_report_files = OutputFileList(self.get_outputs('{basename_woext}.align.report', self.fasta_files))
self.stdout = OutputFileList(self.get_outputs('{basename_woext}.stdout', self.fasta_files))
def process(self):
print ">>>Process align.seqs"
alignseqs = ShellFunction(self.get_exec_path("mothur") + ' "#align.seqs(fasta=$1,reference=$2,outputdir='+self.output_directory+\
alignseqs = ShellFunction(self.get_exec_path("mothur") + ' "#align.seqs(fasta=$1,reference=$2,outputdir='+self.output_directory + \
'/,processors='+str(self.processors)+')" > $3', cmd_format='{EXE} {IN} {OUT}')
alignseqs = MultiMap(alignseqs, inputs=[self.fasta_files,self.reference_alignment_files], outputs=[self.stdout,self.align_files,self.align_report_files])
......@@ -9,8 +9,7 @@ class MothurSummarySeqs(Component):
"""
"""
def define_parameters(self, fasta_files, names_files=None, processors=4):
def define_parameters(self, fasta_files, names_files=None, processors=1):
"""
Define summary.seqs component parameters.
:param fasta_files: fasta files to process
......@@ -19,8 +18,7 @@ class MothurSummarySeqs(Component):
:type processors: int
:param names_files: names files to process
:type names_files: str
"""
"""
#define parameters
self.processors = processors
#define input files
......@@ -33,7 +31,6 @@ class MothurSummarySeqs(Component):
self.stdout = OutputFileList(self.get_outputs('{basename_woext}.stdout', self.fasta_files))
def process(self):
print ">>>Process summary.seqs"
if self.names_files:
filterseqs = ShellFunction(self.get_exec_path("mothur") + ' "#summary.seqs(fasta=$1,name=$2,outputdir='+self.output_directory+'/,processors='+str(self.processors)+')" > $3',\
cmd_format='{EXE} {IN} {OUT}')
......@@ -41,4 +38,4 @@ class MothurSummarySeqs(Component):
else:
filterseqs = ShellFunction(self.get_exec_path("mothur") + ' "#summary.seqs(fasta=$1,outputdir='+self.output_directory+'/,processors='+str(self.processors)+')" > $2',\
cmd_format='{EXE} {IN} {OUT}')
filterseqs = MultiMap(filterseqs, inputs=[self.fasta_files], outputs=[self.stdout,self.summary_files])
filterseqs = MultiMap(filterseqs, inputs=[self.fasta_files], outputs=[self.stdout,self.summary_files])
\ No newline at end of file
......@@ -12,7 +12,7 @@ class MothurTrimSeqs(Component):
based on sequence length and the presence of ambiguous bases and get the reverse complement of your sequences.
"""
def define_parameters(self, fasta_files, names_files, oligos_file, flip=True, qaverage=25, maxambig=0, maxhomop=8,
def define_parameters(self, fasta_files, oligos_file, names_files=None, flip=True, qaverage=25, maxambig=0, maxhomop=8,
minlength=200, maxlength=600, bdiffs=0, pdiffs=2, processors=4):
"""
Define trim.seqs component parameters.
......@@ -53,22 +53,33 @@ class MothurTrimSeqs(Component):
self.pdiffs = pdiffs
# define input files
self.fasta_files = InputFileList(fasta_files, Formats.FASTA)
self.names_files = InputFileList(names_files, Formats.MOTHUR_NAMES)
if names_files: self.names_files = InputFileList(names_files, Formats.MOTHUR_NAMES)
else: self.names_files = None
self.oligos_file = InputFileList(oligos_file, Formats.MOTHUR_OLIGOS)
# define output files
self.trim_fasta_files = OutputFileList(self.get_outputs('{basename_woext}.trim.fasta', self.fasta_files), Formats.FASTA)
self.trim_names_files = OutputFileList(self.get_outputs('{basename_woext}.trim.names', self.names_files), Formats.MOTHUR_NAMES)
if names_files:
self.trim_names_files = OutputFileList(self.get_outputs('{basename_woext}.trim.names', self.fasta_files), Formats.MOTHUR_NAMES)
self.scrap_fasta_files = OutputFileList(self.get_outputs('{basename_woext}.scrap.fasta', self.fasta_files), Formats.FASTA)
self.scrap_names_files = OutputFileList(self.get_outputs('{basename_woext}.scrap.names', self.names_files), Formats.MOTHUR_NAMES)
if names_files:
self.scrap_names_files = OutputFileList(self.get_outputs('{basename_woext}.scrap.names', self.fasta_files), Formats.MOTHUR_NAMES)
self.groups_files = OutputFileList(self.get_outputs('{basename_woext}.groups', self.fasta_files), Formats.MOTHUR_GROUPS)
self.stdout = OutputFileList(self.get_outputs('{basename_woext}.stdout', self.fasta_files))
def process(self):
if self.flip: flip = "T"
else: flip = "F"
trimseqs = ShellFunction(self.get_exec_path("mothur") + ' "#trim.seqs(fasta=$1,name=$2,outputdir='+self.output_directory+ \
'/,oligos=$3,processors='+str(self.processors)+',flip='+flip+',qaverage='+str(self.qaverage)+',maxambig='+ \
str(self.maxambig)+',maxhomop='+str(self.maxhomop)+',minlength='+str(self.minlength)+",maxlength="+ \
str(self.maxlength)+',bdiffs='+str(self.bdiffs)+',pdiffs='+str(self.pdiffs)+')" > $4', cmd_format='{EXE} {IN} {OUT}')
trimseqs = MultiMap(trimseqs, inputs=[self.fasta_files,self.names_files,self.oligos_file], \
outputs=[self.stdout,self.trim_fasta_files,self.trim_names_files,self.scrap_fasta_files,self.scrap_names_files,self.groups_files])
if self.names_files:
trimseqs = ShellFunction(self.get_exec_path("mothur") + ' "#trim.seqs(fasta=$1,name=$2,outputdir='+self.output_directory+ \
'/,oligos=$3,processors='+str(self.processors)+',flip='+flip+',qaverage='+str(self.qaverage)+',maxambig='+ \
str(self.maxambig)+',maxhomop='+str(self.maxhomop)+',minlength='+str(self.minlength)+",maxlength="+ \
str(self.maxlength)+',bdiffs='+str(self.bdiffs)+',pdiffs='+str(self.pdiffs)+')" > $4', cmd_format='{EXE} {IN} {OUT}')
trimseqs = MultiMap(trimseqs, inputs=[self.fasta_files,self.names_files,self.oligos_file], \
outputs=[self.stdout,self.trim_fasta_files,self.trim_names_files,self.scrap_fasta_files,self.scrap_names_files,self.groups_files])
else:
trimseqs = ShellFunction(self.get_exec_path("mothur") + ' "#trim.seqs(fasta=$1,outputdir='+self.output_directory+ \
'/,oligos=$2,processors='+str(self.processors)+',flip='+flip+',qaverage='+str(self.qaverage)+',maxambig='+ \
str(self.maxambig)+',maxhomop='+str(self.maxhomop)+',minlength='+str(self.minlength)+",maxlength="+ \
str(self.maxlength)+',bdiffs='+str(self.bdiffs)+',pdiffs='+str(self.pdiffs)+')" > $3', cmd_format='{EXE} {IN} {OUT}')
trimseqs = MultiMap(trimseqs, inputs=[self.fasta_files,self.oligos_file], \
outputs=[self.stdout,self.trim_fasta_files,self.scrap_fasta_files,self.groups_files])
......@@ -11,7 +11,7 @@ class MothurUniqueSeqs(Component):
and a file that indicates those sequences that are identical to the reference sequence.
"""
def define_parameters(self, fasta_files, names_files):
def define_parameters(self, fasta_files, names_files=None):
"""
Define unique.seqs component parameters.
:param fasta_files: fasta files to process
......@@ -19,16 +19,19 @@ class MothurUniqueSeqs(Component):
:param names_files: names files to process
:type names_files: str
"""
#define input files
self.fasta_files = InputFileList(fasta_files, Formats.FASTA)
self.trim_names_files = InputFileList(names_files, Formats.MOTHUR_NAMES)
if names_files: self.names_files = InputFileList(names_files, Formats.MOTHUR_NAMES)
else: self.names_files = None
#define output files
self.unique_fasta_files = OutputFileList(self.get_outputs('{basename_woext}.unique.fasta', self.fasta_files), Formats.FASTA)
self.names_files = OutputFileList(self.get_outputs('{basename_woext}.names', self.fasta_files), Formats.MOTHUR_NAMES)
self.unique_names_files = OutputFileList(self.get_outputs('{basename_woext}.names', self.fasta_files), Formats.MOTHUR_NAMES)
self.stdout = OutputFileList(self.get_outputs('{basename_woext}.stdout', self.fasta_files))
def process(self):
print ">>>Process unique.seqs"
uniqueseqs = ShellFunction(self.get_exec_path("mothur") + ' "#unique.seqs(fasta=$1,name=$2,outputdir='+self.output_directory+'/)" > $3', cmd_format='{EXE} {IN} {OUT}')
uniqueseqs = MultiMap(uniqueseqs, inputs=[self.fasta_files,self.trim_names_files], outputs=[self.stdout,self.unique_fasta_files,self.names_files])
\ No newline at end of file
if self.names_files:
uniqueseqs = ShellFunction(self.get_exec_path("mothur") + ' "#unique.seqs(fasta=$1,name=$2,outputdir='+self.output_directory+'/)" > $3', cmd_format='{EXE} {IN} {OUT}')
uniqueseqs = MultiMap(uniqueseqs, inputs=[self.fasta_files,self.names_files], outputs=[self.stdout,self.unique_fasta_files,self.unique_names_files])
else:
uniqueseqs = ShellFunction(self.get_exec_path("mothur") + ' "#unique.seqs(fasta=$1,outputdir='+self.output_directory+'/)" > $2', cmd_format='{EXE} {IN} {OUT}')
uniqueseqs = MultiMap(uniqueseqs, inputs=self.fasta_files, outputs=[self.stdout,self.unique_fasta_files,self.unique_names_files])
\ No newline at end of file
......@@ -45,7 +45,7 @@ reference_alignment.required = True
forward_primer.name = forward_primer
forward_primer.flag = --forward-primer
forward_primer.help = Which forward primer has been used in the experimentation : "primerString"
#forward_primer.required = True
forward_primer.required = True
reverse_primer.name = reverse_primer
reverse_primer.flag = --reverse-primer
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment