Commit 1a1f704d authored by Jerome Mariette's avatar Jerome Mariette
Browse files

some updates on r454_diversity

parent 96685eb4
......@@ -41,7 +41,7 @@ class RNAdiversity (Workflow):
sffinfo = self.add_component("MothurSffinfo", [self.args["sff_files"]])
#summaryseqs = self.add_component("MothurSummarySeqs", [sffinfo.output_fasta, processors])
shhhflows = self.add_component("MothurShhhFlows",[sffinfo.flow_files])
#trimseqs = self.add_component("MothurTrimSeqs",[shhhflows.output_shhh_fasta,shhhflows.output_shhh_names, oligos_fullpath, processors])
trimseqs = self.add_component("MothurTrimSeqs",[shhhflows.fasta_files,shhhflows.names_files, oligos_fullpath])
#summaryseqs_trim = self.add_component("MothurSummarySeqs", [trimseqs.output_trim_fasta,processors,trimseqs.output_trim_names],{},"trim")
#uniqueseqs = self.add_component("MothurUniqueSeqs",[trimseqs.output_trim_fasta,trimseqs.output_trim_names])
#summaryseqs_unique = self.add_component("MothurSummarySeqs", [uniqueseqs.output_unique_fasta,processors,uniqueseqs.output_names],{},"unique")
......
......@@ -23,15 +23,15 @@ class MothurShhhFlows(Component):
"""
self.processors = processors
self.flow_files = InputFileList(flow_files, Formats.FLOW)
self.shhh_qual = OutputFileList(self.get_outputs('{basename_woext}.shhh.qual', self.flow_files), Formats.QUAL)
self.shhh_fasta = OutputFileList(self.get_outputs('{basename_woext}.shhh.fasta', self.flow_files), Formats.FASTA)
self.shhh_names = OutputFileList(self.get_outputs('{basename_woext}.shhh.names', self.flow_files), Formats.MOTHUR_NAMES)
self.shhh_counts = OutputFileList(self.get_outputs('{basename_woext}.shhh.counts', self.flow_files))
self.shhh_groups = OutputFileList(self.get_outputs('{basename_woext}.shhh.groups', self.flow_files), Formats.MOTHUR_GROUPS)
self.qual_files = OutputFileList(self.get_outputs('{basename_woext}.shhh.qual', self.flow_files), Formats.QUAL)
self.fasta_files = OutputFileList(self.get_outputs('{basename_woext}.shhh.fasta', self.flow_files), Formats.FASTA)
self.names_files = OutputFileList(self.get_outputs('{basename_woext}.shhh.names', self.flow_files), Formats.MOTHUR_NAMES)
self.counts_files = OutputFileList(self.get_outputs('{basename_woext}.shhh.counts', self.flow_files))
self.groups_files = OutputFileList(self.get_outputs('{basename_woext}.shhh.groups', self.flow_files), Formats.MOTHUR_GROUPS)
self.stdout = OutputFileList(self.get_outputs('{basename_woext}.stdout', self.flow_files))
def process(self):
shhhflows = ShellFunction(self.get_exec_path("mothur") + ' "#shhh.flows(flow=$1,outputdir='+self.output_directory + \
'/,processors=' + str(self.processors) + ')" > $2', cmd_format='{EXE} {IN} {OUT}')
shhhflows = MultiMap(shhhflows, inputs=[self.flow_files], \
outputs=[self.stdout,self.shhh_qual,self.shhh_fasta,self.shhh_names,self.shhh_counts,self.shhh_groups])
outputs=[self.stdout,self.qual_files,self.fasta_files,self.names_files,self.counts_files,self.groups_files])
......@@ -15,26 +15,43 @@ class MothurTrimSeqs(Component):
based on sequence length and the presence of ambiguous bases and get the reverse complement of your sequences.
"""
def define_parameters(self, shhh_fasta_file, shhh_names_file, oligos, processors=1):
def define_parameters(self, fasta_files, names_files, oligos_file, flip=True, qaverage=25, maxambig=0, maxhomop=8,
minlength=200, maxlength=600, bdiffs=0, pdiffs=2, processors=1):
"""
Define shh.flows component parameters.
:param flow_file: a flow file to process
:type sff_files: str
:param processors: the number of processors to use
:type processors: int
"""
# define parameters
self.processors = processors
self.flip = flip
self.qaverage = qaverage
self.maxambig = maxambig
self.maxhomop = maxhomop
self.minlength = minlength
self.maxlength = maxlength
self.bdiffs = bdiffs
self.pdiffs = pdiffs
# define input files
self.fasta_files = InputFileList(fasta_files, Formats.FASTA)
self.names_files = InputFileList(names_files, Formats.MOTHUR_NAMES)
self.oligos_file = InputFileList(oligos_file, Formats.MOTHUR_OLIGOS)
# define output files
self.trim_fasta = OutputFileList(self.get_outputs('{basename_woext}.trim.fasta', self.fasta_files), Formats.FASTA)
self.trim_names = OutputFileList(self.get_outputs('{basename_woext}.trim.names', self.names_files), Formats.MOTHUR_NAMES)
self.scrap_fasta = OutputFileList(self.get_outputs('{basename_woext}.scrap.fasta', self.fasta_files), Formats.FASTA)
self.scrap_names = OutputFileList(self.get_outputs('{basename_woext}.scrap.names', self.names_files), Formats.MOTHUR_NAMES)
self.groups = OutputFileList(self.get_outputs('{basename_woext}.groups', self.fasta_files), Formats.MOTHUR_GROUPS)
self.stdout = OutputFileList(self.get_outputs('{basename_woext}.stdout', self.fasta_files))
self.input_shhh_fasta = InputFileList(shhh_fasta_file, Formats.FASTA)
self.input_shhh_names = InputFileList(shhh_names_file, Formats.MOTHUR_NAMES)
self.input_oligos = InputFileList(oligos, Formats.MOTHUR_OLIGOS)
self.output_trim_fasta = OutputFileList(self.get_outputs('{basename_woext}.trim.fasta', self.input_shhh_fasta), format="fasta")
self.output_trim_names = OutputFileList(self.get_outputs('{basename_woext}.trim.names', self.input_shhh_names), format="names")
self.output_scrap_fasta = OutputFileList(self.get_outputs('{basename_woext}.scrap.fasta', self.input_shhh_fasta), format="fasta")
self.output_scrap_names = OutputFileList(self.get_outputs('{basename_woext}.scrap.names', self.input_shhh_names), format="names")
self.output_groups = OutputFileList(self.get_outputs('{basename_woext}.groups', self.input_shhh_fasta), format="groups")
def process(self):
print ">>>Process trim.seqs"
print ">>>> With samples barcodes in oligos file"
trimseqs = ShellFunction(self.get_exec_path("mothur") + ' "#trim.seqs(fasta=$1,name=$2,outputdir='+self.output_directory +\
'/,oligos=$3,processors='+self.processors+')"', cmd_format='{EXE} {IN} {OUT}')
trimseqs = MultiMap(trimseqs, inputs=[self.input_shhh_fasta,self.input_shhh_names,self.input_oligos], \
outputs=[self.output_trim_fasta,self.output_scrap_fasta,self.output_trim_names,self.output_scrap_names,self.output_groups])
if self.flip: flip = "T"
else: flip = "F"
trimseqs = ShellFunction(self.get_exec_path("mothur") + ' "#trim.seqs(fasta=$1,name=$2,outputdir='+self.output_directory+ \
'/,oligos=$3,processors='+str(self.processors)+',flip='+flip+',qaverage='+str(self.qaverage)+',maxambig='+ \
str(self.maxambig)+',maxhomop='+str(self.maxhomop)+',minlength='+str(self.minlength)+",maxlength="+ \
str(self.maxlength)+',bdiffs='+str(self.bdiffs)+',pdiffs='+str(self.pdiffs)+')" > $4', cmd_format='{EXE} {IN} {OUT}')
trimseqs = MultiMap(trimseqs, inputs=[self.fasta_files,self.names_files,self.oligos_file], \
outputs=[self.stdout,self.trim_fasta,self.trim_names,self.scrap_fasta,self.scrap_names,self.groups])
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment