Commit a8af9adb authored by Penom Nom's avatar Penom Nom
Browse files

add new components

parent daacdefc
......@@ -19,7 +19,6 @@ from jflow.workflow import Workflow
import re
class RNAdiversity (Workflow):
def process(self):
......@@ -32,25 +31,34 @@ class RNAdiversity (Workflow):
if self.args["reverse_primer"]:
f.write('reverse\t%s\n' % self.args["reverse_primer"])
if self.args["sample_barcodes"]:
dict_barcode = dict(re.split(':|;',self.args["sample_barcodes"])[i:i+2] for i in range(0, len(re.split(':|;',self.args["sample_barcodes"])), 2))
dict_barcode = dict(re.findall("[\w.]+", self.args["sample_barcodes"])[i:i+2] for i in range(0, len(re.findall("[\w.]+", self.args["sample_barcodes"])), 2))
for i,j in dict_barcode.items():
f.write('barcode\t%s\t%s\n' % (j,i))
f.close()
# then process the workflow
sffinfo = self.add_component("MothurSffinfo", [self.args["sff_files"]])
#summaryseqs = self.add_component("MothurSummarySeqs", [sffinfo.output_fasta, processors])
summaryseqs = self.add_component("MothurSummarySeqs", [sffinfo.fasta_files])
shhhflows = self.add_component("MothurShhhFlows",[sffinfo.flow_files])
trimseqs = self.add_component("MothurTrimSeqs",[shhhflows.fasta_files,shhhflows.names_files, oligos_fullpath])
#summaryseqs_trim = self.add_component("MothurSummarySeqs", [trimseqs.output_trim_fasta,processors,trimseqs.output_trim_names],{},"trim")
#uniqueseqs = self.add_component("MothurUniqueSeqs",[trimseqs.output_trim_fasta,trimseqs.output_trim_names])
#summaryseqs_unique = self.add_component("MothurSummarySeqs", [uniqueseqs.output_unique_fasta,processors,uniqueseqs.output_names],{},"unique")
#alignseqs = self.add_component("MothurAlignSeqs",[uniqueseqs.output_unique_fasta,self.args["reference_alignment"], processors])
#summaryseqs_align = self.add_component("MothurSummarySeqs", [alignseqs.output_align,processors,uniqueseqs.output_names],{},"align")
#screenseqs = self.add_component("MothurScreenSeqs",[alignseqs.output_align,trimseqs.output_trim_names,trimseqs.output_groups,processors])
#summaryseqs_screen = self.add_component("MothurSummarySeqs", [screenseqs.output_good_align,processors,screenseqs.output_good_names],{},"screen")
#filterseqs = self.add_component("MothurFilterSeqs",[screenseqs.output_good_align,processors])
#uniqueseqs_filter = self.add_component("MothurUniqueSeqs",[filterseqs.output_filter_fasta,screenseqs.output_good_names],{},"filter")
#precluster = self.add_component("MothurPreCluster",[uniqueseqs_filter.output_unique_fasta,uniqueseqs_filter.output_names,screenseqs.output_good_groups])
#summaryseqs_precluster = self.add_component("MothurSummarySeqs", [precluster.output_precluster_fasta,processors,precluster.output_precluster_names],{},"precluster")
#chimerauchime = self.add_component("MothurChimeraUchime",[precluster.output_precluster_fasta,precluster.output_precluster_names,screenseqs.output_good_groups,processors])
summaryseqs_trim = self.add_component("MothurSummarySeqs", [trimseqs.trim_fasta_files,trimseqs.trim_names_files],{},"trim")
uniqueseqs = self.add_component("MothurUniqueSeqs",[trimseqs.trim_fasta_files,trimseqs.trim_names_files])
summaryseqs_unique = self.add_component("MothurSummarySeqs", [uniqueseqs.unique_fasta_files,uniqueseqs.names_files],{},"unique")
alignseqs = self.add_component("MothurAlignSeqs",[uniqueseqs.unique_fasta_files,self.args["reference_alignment"]])
summaryseqs_align = self.add_component("MothurSummarySeqs", [alignseqs.align_files,uniqueseqs.names_files],{},"align")
screenseqs = self.add_component("MothurScreenSeqs",[alignseqs.align_files,uniqueseqs.names_files,trimseqs.groups_files])
summaryseqs_screen = self.add_component("MothurSummarySeqs", [screenseqs.good_align_files,screenseqs.good_names_files],{},"screen")
filterseqs = self.add_component("MothurFilterSeqs",[screenseqs.good_align_files])
uniqueseqs_filter = self.add_component("MothurUniqueSeqs",[filterseqs.filter_fasta_files,screenseqs.good_names_files],{},"filter")
precluster = self.add_component("MothurPreCluster",[uniqueseqs_filter.unique_fasta_files,uniqueseqs_filter.names_files,screenseqs.good_groups_files])
summaryseqs_precluster = self.add_component("MothurSummarySeqs", [precluster.precluster_fasta_files,precluster.precluster_names_files],{},"precluster")
chimerauchime = self.add_component("MothurChimeraUchime",[precluster.precluster_fasta_files,precluster.precluster_names_files,screenseqs.good_groups_files])
removeseqs = self.add_component("MothurRemoveSeqs",[chimerauchime.uchime_accnos_files,precluster.precluster_fasta_files,precluster.precluster_names_files,\
screenseqs.good_groups_files])
summaryseqs_removeseqs = self.add_component("MothurSummarySeqs", [removeseqs.pick_fasta_files,removeseqs.pick_names_files],{},"removeseqs")
classifyseqs = self.add_component("MothurClassifySeqs",[removeseqs.pick_fasta_files,removeseqs.pick_names_files,removeseqs.pick_groups_files,self.args["template"],\
self.args["taxonomy"]])
# removegroups = self.add_component("MothurRemoveGroups", [removeseqs.pick_fasta_files,removeseqs.pick_names_files,removeseqs.pick_groups_files,classifyseqs.taxonomy_files])
# krona = self.add_component("MothurKrona", [classifyseqs.taxonomy_files])
# distseqs = self.add_component("MothurDistSeqs", [removeseqs.pick_fasta_files, processors])
import os
from subprocess import Popen, PIPE
from jflow.component import Component
from jflow.iotypes import OutputFile, OutputFileList, InputFile, InputFileList, Formats
from jflow.abstraction import MultiMap
......@@ -13,20 +10,29 @@ class MothurAlignSeqs(Component):
Alignment of our data using the align.seqs command by aligning our data to the SILVA-compatible alignment database reference alignment
"""
def define_parameters(self, shhh_trim_unique_fasta_file, reference_alignment_file, processors=1):
def define_parameters(self, fasta_files, reference_alignment_files, processors=4):
"""
Define align.seqs component parameters.
:param fasta_files: fasta files to process
:type fasta_files: str
:param reference_alignment_files: reference alignment files to process
:type reference_alignment_files: str
:param processors: the number of processors to use
:type processors: int
"""
#define parameters
self.processors = processors
self.input_shhh_trim_unique_fasta = InputFileList(shhh_trim_unique_fasta_file, Formats.FASTA)
self.input_reference_alignment = InputFileList(reference_alignment_file, Formats.FASTA)
self.output_align = OutputFileList(self.get_outputs('{basename_woext}.align', self.input_shhh_trim_unique_fasta), format="fasta")
self.output_align_report = OutputFileList(self.get_outputs('{basename_woext}.align.report', self.input_shhh_trim_unique_fasta))
#define input files
self.fasta_files = InputFileList(fasta_files, Formats.FASTA)
self.reference_alignment_files = InputFileList(reference_alignment_files, Formats.FASTA)
#define output files
self.align_files = OutputFileList(self.get_outputs('{basename_woext}.align', self.fasta_files), Formats.FASTA)
self.align_report_files = OutputFileList(self.get_outputs('{basename_woext}.align.report', self.fasta_files))
self.stdout = OutputFileList(self.get_outputs('{basename_woext}.stdout', self.fasta_files))
def process(self):
print ">>>Process align.seqs"
alignseqs = ShellFunction(self.get_exec_path("mothur") + ' "#align.seqs(fasta=$1,reference=$2,outputdir='+self.output_directory+\
'/,processors='+self.processors+')";', cmd_format='{EXE} {IN} {OUT}')
alignseqs = MultiMap(alignseqs, inputs=[self.input_shhh_trim_unique_fasta,self.input_reference_alignment], outputs=[self.output_align,self.output_align_report])
'/,processors='+str(self.processors)+')" > $3', cmd_format='{EXE} {IN} {OUT}')
alignseqs = MultiMap(alignseqs, inputs=[self.fasta_files,self.reference_alignment_files], outputs=[self.stdout,self.align_files,self.align_report_files])
import os
from subprocess import Popen, PIPE
from jflow.component import Component
from jflow.iotypes import OutputFile, OutputFileList, InputFile, InputFileList, Formats
from jflow.abstraction import MultiMap
......@@ -12,22 +9,33 @@ class MothurChimeraUchime(Component):
"""
"""
def define_parameters(self, shhh_trim_unique_good_filter_unique_precluster_fasta_file, shhh_trim_unique_good_filter_unique_precluster_names_file,shhh_good_groups_file,processors):
def define_parameters(self, fasta_files, names_files, groups_files, processors=4):
"""
Define chimera.uchime component parameters.
:param fasta_files: fasta files to process
:type fasta_files: str
:param names_files: names files to process
:type names_files: str
:param groups_files: groups files to process
:type groups_files: str
:param processors: the number of processors to use
:type processors: int
"""
#define parameters
self.processors = processors
self.input_shhh_trim_unique_good_filter_unique_precluster_fasta = InputFileList(shhh_trim_unique_good_filter_unique_precluster_fasta_file, Formats.FASTA)
self.input_shhh_trim_unique_good_filter_unique_precluster_names = InputFileList(shhh_trim_unique_good_filter_unique_precluster_names_file, Formats.MOTHUR_NAMES)
self.input_shhh_good_groups = InputFileList(shhh_good_groups_file, Formats.ANY)
self.output_uchime_chimera = OutputFileList(self.get_outputs('{basename_woext}.uchime.chimeras', self.input_shhh_trim_unique_good_filter_unique_precluster_fasta), format="chimera")
self.output_uchime_accnos = OutputFileList(self.get_outputs('{basename_woext}.uchime.accnos', self.input_shhh_trim_unique_good_filter_unique_precluster_fasta))
#define input files
self.fasta_files = InputFileList(fasta_files, Formats.FASTA)
self.names_files = InputFileList(names_files, Formats.MOTHUR_NAMES)
self.groups_files = InputFileList(groups_files, Formats.MOTHUR_GROUPS)
#define output files
self.uchime_chimera_files = OutputFileList(self.get_outputs('{basename_woext}.uchime.chimeras', self.fasta_files), Formats.FASTA)
self.uchime_accnos_files = OutputFileList(self.get_outputs('{basename_woext}.uchime.accnos', self.fasta_files), Formats.MOTHUR_ACCNOS)
self.stdout = OutputFileList(self.get_outputs('{basename_woext}.stdout', self.fasta_files))
def process(self):
print ">>>Process chimera.uchime"
chimerauchime = ShellFunction(self.get_exec_path("mothur") + ' "#chimera.uchime(fasta=$1,name=$2,group=$3,outputdir='+self.output_directory+\
'/,processors='+self.processors+')"', cmd_format='{EXE} {IN} {OUT}')
chimerauchime = MultiMap(chimerauchime, inputs=[self.input_shhh_trim_unique_good_filter_unique_precluster_fasta,self.input_shhh_trim_unique_good_filter_unique_precluster_names,\
self.input_shhh_good_groups], outputs=[self.output_uchime_chimera,self.output_uchime_accnos])
'/,processors='+str(self.processors)+')" > $4', cmd_format='{EXE} {IN} {OUT}')
chimerauchime = MultiMap(chimerauchime, inputs=[self.fasta_files,self.names_files,self.groups_files], \
outputs=[self.stdout,self.uchime_chimera_files,self.uchime_accnos_files])
import os
from subprocess import Popen, PIPE
import re
from jflow.component import Component
from jflow.iotypes import OutputFile, OutputFileList, InputFile, InputFileList, Formats
......@@ -12,26 +11,44 @@ class MothurClassifySeqs(Component):
"""
"""
def define_parameters(self, shhh_trim_unique_good_filter_unique_precluster_pick_fasta_file, shhh_trim_unique_good_filter_unique_precluster_pick_names_file,shhh_good_pick_groups_file,template_file,taxonomy_file,processors):
def define_parameters(self, fasta_files, names_files, groups_files, template_files, taxonomy_files, cutoff=80, processors=4):
"""
/!\ les fichiers generes varient en fonction du nom du template et du taxonomy
Define classify.seqs component parameters.
:param fasta_files: fasta files to process
:type fasta_files: str
:param names_files: names files to process
:type names_files: str
:param groups_files: groups files to process
:type groups_files: str
:param cutoff:
:type cutoff: int
:param processors: the number of processors to use
:type processors: int
"""
#define parameters
self.cutoff = cutoff
self.processors = processors
self.input_shhh_trim_unique_good_filter_unique_precluster_pick_fasta = InputFileList(shhh_trim_unique_good_filter_unique_precluster_pick_fasta_file, Formats.ANY)
self.input_shhh_trim_unique_good_filter_unique_precluster_pick_names = InputFileList(shhh_trim_unique_good_filter_unique_precluster_pick_names_file, Formats.ANY)
self.input_shhh_good_pick_groups = InputFileList(shhh_good_pick_groups_file, Formats.ANY)
self.input_template = InputFileList(shhh_good_groups_file, Formats.ANY)
self.input_taxonomy = InputFileList(shhh_good_groups_file, Formats.ANY)
files_woext=[]
for file in shhh_trim_unique_good_filter_unique_precluster_fasta_file:
files_woext.append(file.split(".")[0])
self.output_shhh_trim_unique_good_filter_unique_precluster_pick_names = OutputFileList(self.get_outputs('{basename}.shhh.trim.unique.good.filter.unique.precluster.pick.names', files_woext))
self.output_shhh_trim_unique_good_filter_unique_precluster_pick_fasta = OutputFileList(self.get_outputs('{basename}.shhh.trim.unique.good.filter.unique.precluster.pick.fasta', files_woext))
self.output_shhh_good_pick_groups = OutputFileList(self.get_outputs('{basename}.shhh.good.pick.groups', files_woext))
#define input files
self.fasta_files = InputFileList(fasta_files, Formats.FASTA)
self.names_files = InputFileList(names_files, Formats.MOTHUR_NAMES)
self.groups_files = InputFileList(groups_files, Formats.MOTHUR_GROUPS)
self.template_files = InputFileList(template_files, Formats.ANY)
self.database_taxonomy_files = InputFileList(taxonomy_files, Formats.ANY)
#define output files
database_prefix = re.split("/|\.",template_files)[-2]
self.taxonomy_files = OutputFileList(self.get_outputs('{basename_woext}.'+database_prefix+'.wang.taxonomy', self.fasta_files), Formats.MOTHUR_TAXONOMY)
# self.pick_pds_flip_accnos = OutputFileList(self.get_outputs('{basename_woext}.pick.pds.flip.accnos', self.pick_fasta))
self.tax_summary_files = OutputFileList(self.get_outputs('{basename_woext}.'+database_prefix+'.wang.tax.summary', self.fasta_files), Formats.MOTHUR_TAX_SUMMARY)
self.stdout = OutputFileList(self.get_outputs('{basename_woext}.stdout', self.fasta_files))
def process(self):
print ">>>Process chimera.uchime"
chimerauchime = ShellFunction(self.get_exec_path("mothur") + ' "#remove.seqs(accnos=$1,fasta=$2,name=$3,group=$4,outputdir='+self.output_directory+'/,processors='+self.processors+')"', cmd_format='{EXE} {IN} {OUT}')
chimerauchime = MultiMap(chimerauchime, inputs=[self.input_shhh_trim_unique_good_filter_unique_precluster_uchine_accnos,self.input_shhh_trim_unique_good_filter_unique_precluster_fasta,self.input_shhh_trim_unique_good_filter_unique_precluster_names,self.input_shhh_good_groups], outputs=[self.output_shhh_trim_unique_good_filter_unique_precluster_pick_names,self.output_shhh_trim_unique_good_filter_unique_precluster_pick_fasta,self.output_shhh_good_pick_groups])
print ">>>Process classify.seqs"
classifyseqs = ShellFunction(self.get_exec_path("mothur") + ' "#classify.seqs(fasta=$1,name=$2,group=$3,template=$4,taxonomy=$5,cutoff='+str(self.cutoff)+\
',outputdir='+self.output_directory+'/,processors='+str(self.processors)+')" > $6', cmd_format='{EXE} {IN} {OUT}')
classifyseqs = MultiMap(classifyseqs, inputs=[self.fasta_files,self.names_files,self.groups_files,self.template_files,self.database_taxonomy_files],\
outputs=[self.stdout,self.taxonomy_files,self.tax_summary_files])
import os
from subprocess import Popen, PIPE
from jflow.component import Component
from jflow.iotypes import OutputFile, OutputFileList, InputFile, InputFileList, Formats
from jflow.abstraction import MultiMap
......@@ -12,18 +9,32 @@ class MothurFilterSeqs(Component):
"""
"""
def define_parameters(self, shhh_trim_unique_good_align, processors=1):
def define_parameters(self, align_files, vertical="T", trump="'.'",processors=4):
"""
Define filter.seqs component parameters.
:param align_files: align files to process
:type align_files: str
:param vertical: column that only contains gap characters (i.e. '-' or '.') is ignored.
:type vertical: string
:param trump: remove a column if the trump character is found at that position ('.', '-', 'N', etc)
:type trump: string
:param processors: the number of processors to use
:type processors: int
"""
#define parameters
self.vertical = vertical
self.trump = trump
self.processors = processors
self.input_shhh_trim_unique_good_align = InputFileList(shhh_trim_unique_good_align, Formats.FASTA)
self.output_filter = OutputFileList(self.get_outputs('{basename_woext}.filter', map(lambda x:x.split('.')[0],self.input_shhh_trim_unique_good_align)))
self.output_filter_fasta = OutputFileList(self.get_outputs('{basename_woext}.filter.fasta', self.input_shhh_trim_unique_good_align), format="fasta")
#define input files
self.align_files = InputFileList(align_files, Formats.FASTA)
#define output files
self.filter_files = OutputFileList(self.get_outputs('{basename_woext}.filter', map(lambda x:x.split('.')[0],self.align_files)))
self.filter_fasta_files = OutputFileList(self.get_outputs('{basename_woext}.filter.fasta', self.align_files), Formats.FASTA)
self.stdout = OutputFileList(self.get_outputs('{basename_woext}.stdout', self.align_files))
def process(self):
print ">>>Process align.seqs"
filterseqs = ShellFunction(self.get_exec_path("mothur") + ' "#filter.seqs(fasta=$1,outputdir='+self.output_directory+'/,processors='+self.processors+')";', cmd_format='{EXE} {IN} {OUT}')
filterseqs = MultiMap(filterseqs, inputs=[self.input_shhh_trim_unique_good_align], outputs=[self.output_filter,self.output_filter_fasta])
filterseqs = ShellFunction(self.get_exec_path("mothur") + ' "#filter.seqs(fasta=$1,outputdir='+self.output_directory+'/,vertical='+self.vertical+\
',trump='+self.trump+',processors='+str(self.processors)+')" > $2',cmd_format='{EXE} {IN} {OUT}')
filterseqs = MultiMap(filterseqs, inputs=[self.align_files], outputs=[self.stdout,self.filter_files,self.filter_fasta_files])
import os
from subprocess import Popen, PIPE
from jflow.component import Component
from jflow.iotypes import OutputFile, OutputFileList, InputFile, InputFileList, Formats
from jflow.abstraction import MultiMap
......@@ -12,20 +9,32 @@ class MothurPreCluster(Component):
"""
"""
def define_parameters(self, shhh_trim_unique_good_filter_unique_fasta_file,shhh_trim_unique_good_filter_names_file,shhh_good_groups_file):
def define_parameters(self, fasta_files,names_files, groups_files, diffs=2):
"""
Define pre.cluster component parameters.
:param fasta_files: fasta files to process
:type fasta_files: str
:param names_files: names files to process
:type names_files: str
:param groups_files: groups files to process
:type groups_files: str
:param diffs: change threshold of mismatch of the sequence
:type diffs: int
"""
self.input_shhh_trim_unique_good_filter_unique_fasta = InputFileList(shhh_trim_unique_good_filter_unique_fasta_file, Formats.FASTA)
self.input_shhh_trim_unique_good_filter_names = InputFileList(shhh_trim_unique_good_filter_names_file, Formats.MOTHUR_NAMES)
self.input_shhh_good_groups = InputFileList(shhh_good_groups_file, Formats.MOTHUR_GROUPS)
self.output_precluster_fasta = OutputFileList(self.get_outputs('{basename_woext}.precluster.fasta', self.input_shhh_trim_unique_good_filter_unique_fasta), format="fasta")
self.output_precluster_names = OutputFileList(self.get_outputs('{basename_woext}.precluster.names', self.input_shhh_trim_unique_good_filter_unique_fasta), format="names")
#define parameters
self.diffs = diffs
#define input files
self.fasta_files = InputFileList(fasta_files, Formats.FASTA)
self.names_files = InputFileList(names_files, Formats.MOTHUR_NAMES)
self.groups_files = InputFileList(groups_files, Formats.MOTHUR_GROUPS)
#define output files
self.precluster_fasta_files = OutputFileList(self.get_outputs('{basename_woext}.precluster.fasta', self.fasta_files), Formats.FASTA)
self.precluster_names_files = OutputFileList(self.get_outputs('{basename_woext}.precluster.names', self.fasta_files), Formats.MOTHUR_NAMES)
self.stdout = OutputFileList(self.get_outputs('{basename_woext}.stdout', self.fasta_files))
def process(self):
print ">>>Process Pre Cluster"
precluster = ShellFunction(self.get_exec_path("mothur") + ' "#pre.cluster(fasta=$1,name=$2,group=$3,outputdir='+self.output_directory+'/)"', cmd_format='{EXE} {IN} {OUT}')
precluster = MultiMap(precluster, inputs=[self.input_shhh_trim_unique_good_filter_unique_fasta,self.input_shhh_trim_unique_good_filter_names,self.input_shhh_good_groups],\
outputs=[self.output_precluster_fasta,self.output_precluster_names])
precluster = ShellFunction(self.get_exec_path("mothur") + ' "#pre.cluster(fasta=$1,name=$2,group=$3,outputdir='+self.output_directory+'/,diffs='+str(self.diffs)+')" > $4',\
cmd_format='{EXE} {IN} {OUT}')
precluster = MultiMap(precluster, inputs=[self.fasta_files,self.names_files,self.groups_files],outputs=[self.stdout,self.precluster_fasta_files,self.precluster_names_files])
import os
from subprocess import Popen, PIPE
from jflow.component import Component
from jflow.iotypes import OutputFile, OutputFileList, InputFile, InputFileList, Formats
from jflow.abstraction import MultiMap
......@@ -12,22 +9,37 @@ class MothurRemoveSeqs(Component):
"""
"""
def define_parameters(self, shhh_trim_unique_good_filter_unique_precluster_uchime_accnos_file, shhh_trim_unique_good_filter_unique_precluster_fasta_file, shhh_trim_unique_good_filter_unique_precluster_names_file,shhh_good_groups_file):
def define_parameters(self, uchime_accnos_files, fasta_files, names_files, groups_files, dups="T"):
"""
Define remove.seqs component parameters.
:param uchime_accnos_files: uchine accnos files to process
:type uchime_accnos_files: str
:param fasta_files: fasta files to process
:type fasta_files: str
:param names_files: names files to process
:type names_files: str
:param groups_files: groups files to process
:type groups_files: str
:param dups: if any sequence in a specific line of the names file is in your .accnos file all sequences in that line will be removed
:type puds: string
"""
self.input_shhh_trim_unique_good_filter_unique_precluster_uchime_accnos = InputFileList(shhh_trim_unique_good_filter_unique_precluster_uchime_accnos_file, Formats.ANY)
self.input_shhh_trim_unique_good_filter_unique_precluster_fasta = InputFileList(shhh_trim_unique_good_filter_unique_precluster_fasta_file, Formats.ANY)
self.input_shhh_trim_unique_good_filter_unique_precluster_names = InputFileList(shhh_trim_unique_good_filter_unique_precluster_names_file, Formats.ANY)
self.input_shhh_good_groups = InputFileList(shhh_good_groups_file, Formats.ANY)
files_woext=[]
for file in shhh_trim_unique_good_filter_unique_precluster_fasta_file:
files_woext.append(file.split(".")[0])
self.output_shhh_trim_unique_good_filter_unique_precluster_pick_names = OutputFileList(self.get_outputs('{basename}.shhh.trim.unique.good.filter.unique.precluster.pick.names', files_woext))
self.output_shhh_trim_unique_good_filter_unique_precluster_pick_fasta = OutputFileList(self.get_outputs('{basename}.shhh.trim.unique.good.filter.unique.precluster.pick.fasta', files_woext))
self.output_shhh_good_pick_groups = OutputFileList(self.get_outputs('{basename}.shhh.good.pick.groups', files_woext))
#define parameters
self.dups = dups
#define input files
self.uchime_accnos_files = InputFileList(uchime_accnos_files, Formats.ANY)
self.fasta_files = InputFileList(fasta_files, Formats.FASTA)
self.names_files = InputFileList(names_files, Formats.MOTHUR_NAMES)
self.groups_files = InputFileList(groups_files, Formats.MOTHUR_GROUPS)
#define output files
self.pick_names_files = OutputFileList(self.get_outputs('{basename_woext}.pick.names', self.fasta_files), Formats.MOTHUR_NAMES)
self.pick_fasta_files = OutputFileList(self.get_outputs('{basename_woext}.pick.fasta', self.fasta_files), Formats.FASTA)
self.pick_groups_files = OutputFileList(self.get_outputs('{basename_woext}.pick.groups', self.groups_files), Formats.MOTHUR_GROUPS)
self.stdout = OutputFileList(self.get_outputs('{basename_woext}.stdout', self.fasta_files))
def process(self):
removeseqs = ShellFunction(self.get_exec_path("mothur") + ' "#remove.seqs(accnos=$1,fasta=$2,name=$3,group=$4,outputdir='+self.output_directory+'/)"', cmd_format='{EXE} {IN} {OUT}')
removeseqs = MultiMap(removeseqs, inputs=[self.input_shhh_trim_unique_good_filter_unique_precluster_uchime_accnos,self.input_shhh_trim_unique_good_filter_unique_precluster_fasta,self.input_shhh_trim_unique_good_filter_unique_precluster_names,self.input_shhh_good_groups], outputs=[self.output_shhh_trim_unique_good_filter_unique_precluster_pick_names,self.output_shhh_trim_unique_good_filter_unique_precluster_pick_fasta,self.output_shhh_good_pick_groups])
print ">>>Process remove.seqs"
removeseqs = ShellFunction(self.get_exec_path("mothur") + ' "#remove.seqs(accnos=$1,fasta=$2,name=$3,group=$4,dups='+self.dups+',outputdir='+self.output_directory+'/)" > $5',\
cmd_format='{EXE} {IN} {OUT}')
removeseqs = MultiMap(removeseqs, inputs=[self.uchime_accnos_files,self.fasta_files,self.names_files,self.groups_files],\
outputs=[self.stdout,self.pick_names_files,self.pick_fasta_files,self.pick_groups_files])
import os
from subprocess import Popen, PIPE
from jflow.component import Component
from jflow.iotypes import OutputFile, OutputFileList, InputFile, InputFileList, Formats
from jflow.abstraction import MultiMap
......@@ -13,23 +10,45 @@ class MothurScreenSeqs(Component):
"""
def define_parameters(self, shhh_trim_unique_align_file, shhh_trim_names_file, shhh_groups_file, processors=1):
def define_parameters(self, align_files, names_files, groups_files, end=27659, optimize="start",criteria=95, processors=4):
"""
Define screen.seqs component parameters.
:param fasta_files: fasta files to process
:type fasta_files: str
:param names_files: names files to process
:type names_files: str
:param groups_files: groups files to process
:type groups_files: str
:param end: remove sequences after this position
:type end: int
:param optimize: remove all sequences that started after the position that criteria percent of the sequences do
:type optimize: string
:param criteria: percent ...
:type criteria: int
:param processors: the number of processors to use
:type processors: int
"""
#define parameters
self.end = end
self.optimize = optimize
self.criteria = criteria
self.processors = processors
self.input_shhh_trim_unique_align = InputFileList(shhh_trim_unique_align_file, Formats.FASTA)
self.input_shhh_trim_names = InputFileList(shhh_trim_names_file, Formats.MOTHUR_NAMES)
self.input_shhh_groups = InputFileList(shhh_groups_file, Formats.MOTHUR_GROUPS)
self.output_good_align = OutputFileList(self.get_outputs('{basename_woext}.good.align', self.input_shhh_trim_unique_align), format="fasta")
self.output_bad_accnos = OutputFileList(self.get_outputs('{basename_woext}.bad.accnos', self.input_shhh_trim_unique_align), format="accnos")
self.output_good_names = OutputFileList(self.get_outputs('{basename_woext}.good.names', self.input_shhh_trim_names), format="names")
self.output_good_groups = OutputFileList(self.get_outputs('{basename_woext}.good.groups', self.input_shhh_groups), format="groups")
#define input files
self.align_files = InputFileList(align_files, Formats.FASTA)
self.shhh_trim_names_files = InputFileList(names_files, Formats.MOTHUR_NAMES)
self.groups_files = InputFileList(groups_files, Formats.MOTHUR_GROUPS)
#define output files
self.good_align_files = OutputFileList(self.get_outputs('{basename_woext}.good.align', self.align_files), Formats.FASTA)
self.bad_accnos_files = OutputFileList(self.get_outputs('{basename_woext}.bad.accnos', self.align_files), Formats.MOTHUR_ACCNOS)
self.good_names_files = OutputFileList(self.get_outputs('{basename_woext}.good.names', self.shhh_trim_names_files), Formats.MOTHUR_NAMES)
self.good_groups_files = OutputFileList(self.get_outputs('{basename_woext}.good.groups', self.groups_files), Formats.MOTHUR_GROUPS)
self.stdout = OutputFileList(self.get_outputs('{basename_woext}.stdout', self.align_files))
def process(self):
print ">>>Process screen.seqs"
screenseqs = ShellFunction(self.get_exec_path("mothur") + ' "#screen.seqs(fasta=$1,name=$2,group=$3,outputdir='+self.output_directory+'/,processors='+self.processors+')";', cmd_format='{EXE} {IN} {OUT}')
screenseqs = MultiMap(screenseqs, inputs=[self.input_shhh_trim_unique_align,self.input_shhh_trim_names,self.input_shhh_groups], \
outputs=[self.output_good_align,self.output_bad_accnos,self.output_good_names,self.output_good_groups])
screenseqs = ShellFunction(self.get_exec_path("mothur") + ' "#screen.seqs(fasta=$1,name=$2,group=$3,outputdir='+self.output_directory+'/,end='+str(self.end)\
+',optimize='+self.optimize+',criteria='+str(self.criteria)+',processors='+str(self.processors)+')" > $4',cmd_format='{EXE} {IN} {OUT}')
screenseqs = MultiMap(screenseqs, inputs=[self.align_files,self.shhh_trim_names_files,self.groups_files], \
outputs=[self.stdout,self.good_align_files,self.bad_accnos_files,self.good_names_files,self.good_groups_files])
import os
from subprocess import Popen, PIPE
from jflow.component import Component
from jflow.iotypes import OutputFile, OutputFileList, InputFile, InputFileList, Formats
from jflow.abstraction import MultiMap
......
import os
from subprocess import Popen, PIPE
from jflow.component import Component
from jflow.iotypes import OutputFile, OutputFileList, InputFile, InputFileList, Formats
from jflow.abstraction import MultiMap
......@@ -13,11 +10,11 @@ class MothurShhhFlows(Component):
The mothur implementation of the PyroNoise component of the AmpliconNoise suite of programs.
"""
def define_parameters(self, flow_files, processors=1):
def define_parameters(self, flow_files, processors=4):
"""
Define shh.flows component parameters.
Define shhh.flows component parameters.
:param flow_file: a flow file to process
:type sff_files: str
:type flow_file: str
:param processors: the number of processors to use
:type processors: int
"""
......
import os
from subprocess import Popen, PIPE
from jflow.component import Component
from jflow.iotypes import OutputFile, OutputFileList, InputFile, InputFileList, Formats
from jflow.abstraction import MultiMap
......@@ -12,25 +9,36 @@ class MothurSummarySeqs(Component):
"""
"""
def define_parameters(self, fasta_file, processors="1", names_file=None, prefix=None, ):
def define_parameters(self, fasta_files, names_files=None, processors=4):
"""
Define summary.seqs component parameters.
:param fasta_files: fasta files to process
:type fasta_files: str
:param processors: the number of processors to use
:type processors: int
:param names_files: names files to process
:type names_files: str
"""
self.processors = processors
self.input_fasta = InputFileList(fasta_file, Formats.FASTA)
self.names_file = names_file
if self.names_file:
self.input_names = InputFileList(self.names_file, Formats.MOTHUR_NAMES)
#define parameters
self.processors = processors
#define input files
self.fasta_files = InputFileList(fasta_files, Formats.FASTA)
self.names_files = names_files
if names_files:
self.names_files = InputFileList(self.names_files, Formats.MOTHUR_NAMES)
#define output files
self.summary_files = OutputFileList(self.get_outputs('{basename_woext}.summary', self.fasta_files), Formats.MOTHUR_SUMMARY)
self.stdout = OutputFileList(self.get_outputs('{basename_woext}.stdout', self.fasta_files))
self.output_summary = OutputFileList(self.get_outputs('{basename_woext}.summary', self.input_fasta), format="summary")
def process(self):
print ">>>Process summary.seqs"
if self.names_file:
filterseqs = ShellFunction(self.get_exec_path("mothur") + ' "#summary.seqs(fasta=$1,name=$2,outputdir='+self.output_directory+'/,processors='+self.processors+')"', cmd_format='{EXE} {IN} {OUT}')
filterseqs = MultiMap(filterseqs, inputs=[self.input_fasta,self.input_names], outputs=[self.output_summary])
if self.names_files:
filterseqs = ShellFunction(self.get_exec_path("mothur") + ' "#summary.seqs(fasta=$1,name=$2,outputdir='+self.output_directory+'/,processors='+str(self.processors)+')" > $3',\
cmd_format='{EXE} {IN} {OUT}')
filterseqs = MultiMap(filterseqs, inputs=[self.fasta_files,self.names_files], outputs=[self.stdout,self.summary_files])
else:
filterseqs = ShellFunction(self.get_exec_path("mothur") + ' "#summary.seqs(fasta=$1,outputdir='+self.output_directory+'/,processors='+self.processors+')"', cmd_format='{EXE} {IN} {OUT}')
filterseqs = MultiMap(filterseqs, inputs=[self.input_fasta], outputs=[self.output_summary])
filterseqs = ShellFunction(self.get_exec_path("mothur") + ' "#summary.seqs(fasta=$1,outputdir='+self.output_directory+'/,processors='+str(self.processors)+')" > $2',\
cmd_format='{EXE} {IN} {OUT}')
filterseqs = MultiMap(filterseqs, inputs=[self.fasta_files], outputs=[self.stdout,self.summary_files])
import os
from subprocess import Popen, PIPE
from jflow.component import Component
from jflow.iotypes import OutputFile, OutputFileList, InputFile, InputFileList, Formats
from jflow.abstraction import MultiMap
......@@ -16,11 +13,11 @@ class MothurTrimSeqs(Component):
"""
def define_parameters(self, fasta_files, names_files, oligos_file, flip=True, qaverage=25, maxambig=0, maxhomop=8,
minlength=200, maxlength=600, bdiffs=0, pdiffs=2, processors=1):
minlength=200, maxlength=600, bdiffs=0, pdiffs=2, processors=4):