Commit 6bfe815a authored by Penom Nom's avatar Penom Nom
Browse files

roche 454 pipeline to asses diversity

parent 59c2cf73
#
# Copyright (C) 2012 INRA
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
"""
RNAdiversity pipeline mothur 454
================================
Parameters requiered
--------------------
"""
from jflow.workflow import Workflow
import re
class RNAdiversity (Workflow):
def process(self):
"""
Test
----
python ~/workspace/nG6/bin/ng6_cli.py r454_diversity --sff-files ~/files_test_mothur/all_sop/cluster/test.sff --sample-barcodes "F003D000:AATGGTAC;F003D002:AACCTGGC;F003D004:TTCGTGGC;F003D006:TTCTTGAC;F003D008:TTCGCGAC;F003D142:TCCAGAAC;F003D144:AAGGCCTC;F003D146:TGACCGTC;F003D148:AGGTTGTC;F003D150:TGGTGAAC;MOCK.GQY1XT001:AACCGTGTC" --forward-primer "CCGTCAATTCMTTTRAGT" --reference-alignment ~/files_test_mothur/silva.bacteria/silva.bacteria.fasta
Create a oligos file
--------------------
Run the workflow
----------------
sffinfo
shhhflows
trimseqs
uniqueseqs
alignseqs
screenseqs
filterseqs
uniqueseqs_filter
precluster
chimerauchime
"""
print ">Run the workflow RNAdiversity "
print
processors = "4"
print ">> Create oligos file"
#create a oligos temp file
oligos_fullpath = self.get_temporary_file(suffix=".oligos")
f = open(oligos_fullpath,"w")
if self.args["forward_primer"]:
f.write('forward\t%s\tGroup1\n' % self.args["forward_primer"])
if self.args["reverse_primer"]:
f.write('reverse\t%s\n' % self.args["reverse_primer"])
if self.args["sample_barcodes"]:
dict_barcode = dict(re.split(':|;',self.args["sample_barcodes"])[i:i+2] for i in range(0, len(re.split(':|;',self.args["sample_barcodes"])), 2))
# if len(dict_barcode.keys())%2:
# print "It's odd-numbered please check if you have a name for each barcode."
# else:
for i,j in dict_barcode.items():
f.write('barcode\t%s\t%s\n' % (j,i))
f.close()
print ">>Getting started"
sffinfo = self.add_component("MothurSffinfo", [self.args["sff_files"]])
print ">>Summary Seqs : sffinfo"
summaryseqs = self.add_component("MothurSummarySeqs", [sffinfo.output_fasta, processors])
print ">>Reducing sequencing error : Using shhh.flows"
shhhflows = self.add_component("MothurShhhFlows",[sffinfo.output_flow, processors])
trimseqs = self.add_component("MothurTrimSeqs",[shhhflows.output_shhh_fasta,shhhflows.output_shhh_names, oligos_fullpath, processors])
print ">>Summary Seqs : trimseqs"
summaryseqs_trim = self.add_component("MothurSummarySeqs", [trimseqs.output_trim_fasta,processors,trimseqs.output_trim_names],{},"trim")
print ">>Processing improved sequences"
uniqueseqs = self.add_component("MothurUniqueSeqs",[trimseqs.output_trim_fasta,trimseqs.output_trim_names])
print ">>Summary Seqs : uniqueseqs"
summaryseqs_unique = self.add_component("MothurSummarySeqs", [uniqueseqs.output_unique_fasta,processors,uniqueseqs.output_names],{},"unique")
alignseqs = self.add_component("MothurAlignSeqs",[uniqueseqs.output_unique_fasta,self.args["reference_alignment"], processors])
print ">>Summary Seqs : alignseqs"
summaryseqs_align = self.add_component("MothurSummarySeqs", [alignseqs.output_align,processors,uniqueseqs.output_names],{},"align")
screenseqs = self.add_component("MothurScreenSeqs",[alignseqs.output_align,trimseqs.output_trim_names,trimseqs.output_groups,processors])
print ">>Summary Seqs : screenseqs"
summaryseqs_screen = self.add_component("MothurSummarySeqs", [screenseqs.output_good_align,processors,screenseqs.output_good_names],{},"screen")
filterseqs = self.add_component("MothurFilterSeqs",[screenseqs.output_good_align,processors])
uniqueseqs_filter = self.add_component("MothurUniqueSeqs",[filterseqs.output_filter_fasta,screenseqs.output_good_names],{},"filter")
precluster = self.add_component("MothurPreCluster",[uniqueseqs_filter.output_unique_fasta,uniqueseqs_filter.output_names,screenseqs.output_good_groups])
print ">>Summary Seqs : precluster"
summaryseqs_precluster = self.add_component("MothurSummarySeqs", [precluster.output_precluster_fasta,processors,precluster.output_precluster_names],{},"precluster")
#
print ">>Removing chimeras"
chimerauchime = self.add_component("MothurChimeraUchime",[precluster.output_precluster_fasta,precluster.output_precluster_names,screenseqs.output_good_groups,processors])
# # removeseqs = self.add_component("RemoveSeqs",[chimerauchime.output_shhh_trim_unique_good_filter_unique_precluster_uchime_accnos ,precluster.output_shhh_trim_unique_good_filter_unique_precluster_fasta,precluster.output_shhh_trim_unique_good_filter_unique_precluster_names,screenseqs.output_shhh_good_groups])
# print ">>Removing \"contaminants\""
# classyseqs = self.add_component("ClassifySeqs",[removeseqs.output_shhh_trim_unique_good_filter_unique_precluster_pick_fasta,removeseqs.output_shhh_trim_unique_good_filter_unique_precluster_pick_names,removeseqs.output_shhh_good_pick_groups,self.args['template'],self.agrs['taxonomy'],processors])
#
# Copyright (C) 2012 INRA
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
\ No newline at end of file
import os
from subprocess import Popen, PIPE
from jflow.component import Component
from jflow.iotypes import OutputFile, OutputFileList, InputFile, InputFileList, Formats
from jflow.abstraction import MultiMap
from weaver.function import ShellFunction
class MothurAlignSeqs(Component):
"""
Alignment of our data using the align.seqs command by aligning our data to the SILVA-compatible alignment database reference alignment
"""
def define_parameters(self, shhh_trim_unique_fasta_file, reference_alignment_file, processors=1):
"""
"""
self.processors = processors
self.input_shhh_trim_unique_fasta = InputFileList(shhh_trim_unique_fasta_file, Formats.FASTA)
self.input_reference_alignment = InputFileList(reference_alignment_file, Formats.FASTA)
self.output_align = OutputFileList(self.get_outputs('{basename_woext}.align', self.input_shhh_trim_unique_fasta), format="fasta")
self.output_align_report = OutputFileList(self.get_outputs('{basename_woext}.align.report', self.input_shhh_trim_unique_fasta))
def process(self):
print ">>>Process align.seqs"
alignseqs = ShellFunction(self.get_exec_path("mothur") + ' "#align.seqs(fasta=$1,reference=$2,outputdir='+self.output_directory+\
'/,processors='+self.processors+')";', cmd_format='{EXE} {IN} {OUT}')
alignseqs = MultiMap(alignseqs, inputs=[self.input_shhh_trim_unique_fasta,self.input_reference_alignment], outputs=[self.output_align,self.output_align_report])
import os
from subprocess import Popen, PIPE
from jflow.component import Component
from jflow.iotypes import OutputFile, OutputFileList, InputFile, InputFileList, Formats
from jflow.abstraction import MultiMap
from weaver.function import ShellFunction
class MothurChimeraUchime(Component):
"""
"""
def define_parameters(self, shhh_trim_unique_good_filter_unique_precluster_fasta_file, shhh_trim_unique_good_filter_unique_precluster_names_file,shhh_good_groups_file,processors):
"""
"""
self.processors = processors
self.input_shhh_trim_unique_good_filter_unique_precluster_fasta = InputFileList(shhh_trim_unique_good_filter_unique_precluster_fasta_file, Formats.FASTA)
self.input_shhh_trim_unique_good_filter_unique_precluster_names = InputFileList(shhh_trim_unique_good_filter_unique_precluster_names_file, Formats.MOTHUR_NAMES)
self.input_shhh_good_groups = InputFileList(shhh_good_groups_file, Formats.ANY)
self.output_uchime_chimera = OutputFileList(self.get_outputs('{basename_woext}.uchime.chimeras', self.input_shhh_trim_unique_good_filter_unique_precluster_fasta), format="chimera")
self.output_uchime_accnos = OutputFileList(self.get_outputs('{basename_woext}.uchime.accnos', self.input_shhh_trim_unique_good_filter_unique_precluster_fasta))
def process(self):
print ">>>Process chimera.uchime"
chimerauchime = ShellFunction(self.get_exec_path("mothur") + ' "#chimera.uchime(fasta=$1,name=$2,group=$3,outputdir='+self.output_directory+\
'/,processors='+self.processors+')"', cmd_format='{EXE} {IN} {OUT}')
chimerauchime = MultiMap(chimerauchime, inputs=[self.input_shhh_trim_unique_good_filter_unique_precluster_fasta,self.input_shhh_trim_unique_good_filter_unique_precluster_names,\
self.input_shhh_good_groups], outputs=[self.output_uchime_chimera,self.output_uchime_accnos])
import os
from subprocess import Popen, PIPE
from jflow.component import Component
from jflow.iotypes import OutputFile, OutputFileList, InputFile, InputFileList, Formats
from jflow.abstraction import MultiMap
from weaver.function import ShellFunction
class MothurClassifySeqs(Component):
"""
"""
def define_parameters(self, shhh_trim_unique_good_filter_unique_precluster_pick_fasta_file, shhh_trim_unique_good_filter_unique_precluster_pick_names_file,shhh_good_pick_groups_file,template_file,taxonomy_file,processors):
"""
"""
self.processors = processors
self.input_shhh_trim_unique_good_filter_unique_precluster_pick_fasta = InputFileList(shhh_trim_unique_good_filter_unique_precluster_pick_fasta_file, Formats.ANY)
self.input_shhh_trim_unique_good_filter_unique_precluster_pick_names = InputFileList(shhh_trim_unique_good_filter_unique_precluster_pick_names_file, Formats.ANY)
self.input_shhh_good_pick_groups = InputFileList(shhh_good_pick_groups_file, Formats.ANY)
self.input_template = InputFileList(shhh_good_groups_file, Formats.ANY)
self.input_taxonomy = InputFileList(shhh_good_groups_file, Formats.ANY)
files_woext=[]
for file in shhh_trim_unique_good_filter_unique_precluster_fasta_file:
files_woext.append(file.split(".")[0])
self.output_shhh_trim_unique_good_filter_unique_precluster_pick_names = OutputFileList(self.get_outputs('{basename}.shhh.trim.unique.good.filter.unique.precluster.pick.names', files_woext))
self.output_shhh_trim_unique_good_filter_unique_precluster_pick_fasta = OutputFileList(self.get_outputs('{basename}.shhh.trim.unique.good.filter.unique.precluster.pick.fasta', files_woext))
self.output_shhh_good_pick_groups = OutputFileList(self.get_outputs('{basename}.shhh.good.pick.groups', files_woext))
def process(self):
print ">>>Process chimera.uchime"
chimerauchime = ShellFunction(self.get_exec_path("mothur") + ' "#remove.seqs(accnos=$1,fasta=$2,name=$3,group=$4,outputdir='+self.output_directory+'/,processors='+self.processors+')"', cmd_format='{EXE} {IN} {OUT}')
chimerauchime = MultiMap(chimerauchime, inputs=[self.input_shhh_trim_unique_good_filter_unique_precluster_uchine_accnos,self.input_shhh_trim_unique_good_filter_unique_precluster_fasta,self.input_shhh_trim_unique_good_filter_unique_precluster_names,self.input_shhh_good_groups], outputs=[self.output_shhh_trim_unique_good_filter_unique_precluster_pick_names,self.output_shhh_trim_unique_good_filter_unique_precluster_pick_fasta,self.output_shhh_good_pick_groups])
import os
from subprocess import Popen, PIPE
from jflow.component import Component
from jflow.iotypes import OutputFile, OutputFileList, InputFile, InputFileList, Formats
from jflow.abstraction import MultiMap
from weaver.function import ShellFunction
class MothurFilterSeqs(Component):
"""
"""
def define_parameters(self, shhh_trim_unique_good_align, processors=1):
"""
"""
self.processors = processors
self.input_shhh_trim_unique_good_align = InputFileList(shhh_trim_unique_good_align, Formats.FASTA)
self.output_filter = OutputFileList(self.get_outputs('{basename_woext}.filter', map(lambda x:x.split('.')[0],self.input_shhh_trim_unique_good_align)))
self.output_filter_fasta = OutputFileList(self.get_outputs('{basename_woext}.filter.fasta', self.input_shhh_trim_unique_good_align), format="fasta")
def process(self):
print ">>>Process align.seqs"
filterseqs = ShellFunction(self.get_exec_path("mothur") + ' "#filter.seqs(fasta=$1,outputdir='+self.output_directory+'/,processors='+self.processors+')";', cmd_format='{EXE} {IN} {OUT}')
filterseqs = MultiMap(filterseqs, inputs=[self.input_shhh_trim_unique_good_align], outputs=[self.output_filter,self.output_filter_fasta])
import os
from subprocess import Popen, PIPE
from jflow.component import Component
from jflow.iotypes import OutputFile, OutputFileList, InputFile, InputFileList, Formats
from jflow.abstraction import MultiMap
from weaver.function import ShellFunction
class MothurPreCluster(Component):
"""
"""
def define_parameters(self, shhh_trim_unique_good_filter_unique_fasta_file,shhh_trim_unique_good_filter_names_file,shhh_good_groups_file):
"""
"""
self.input_shhh_trim_unique_good_filter_unique_fasta = InputFileList(shhh_trim_unique_good_filter_unique_fasta_file, Formats.FASTA)
self.input_shhh_trim_unique_good_filter_names = InputFileList(shhh_trim_unique_good_filter_names_file, Formats.MOTHUR_NAMES)
self.input_shhh_good_groups = InputFileList(shhh_good_groups_file, Formats.MOTHUR_GROUPS)
self.output_precluster_fasta = OutputFileList(self.get_outputs('{basename_woext}.precluster.fasta', self.input_shhh_trim_unique_good_filter_unique_fasta), format="fasta")
self.output_precluster_names = OutputFileList(self.get_outputs('{basename_woext}.precluster.names', self.input_shhh_trim_unique_good_filter_unique_fasta), format="names")
def process(self):
print ">>>Process Pre Cluster"
precluster = ShellFunction(self.get_exec_path("mothur") + ' "#pre.cluster(fasta=$1,name=$2,group=$3,outputdir='+self.output_directory+'/)"', cmd_format='{EXE} {IN} {OUT}')
precluster = MultiMap(precluster, inputs=[self.input_shhh_trim_unique_good_filter_unique_fasta,self.input_shhh_trim_unique_good_filter_names,self.input_shhh_good_groups],\
outputs=[self.output_precluster_fasta,self.output_precluster_names])
import os
from subprocess import Popen, PIPE
from jflow.component import Component
from jflow.iotypes import OutputFile, OutputFileList, InputFile, InputFileList, Formats
from jflow.abstraction import MultiMap
from weaver.function import ShellFunction
class MothurRemoveSeqs(Component):
"""
"""
def define_parameters(self, shhh_trim_unique_good_filter_unique_precluster_uchime_accnos_file, shhh_trim_unique_good_filter_unique_precluster_fasta_file, shhh_trim_unique_good_filter_unique_precluster_names_file,shhh_good_groups_file):
"""
"""
self.input_shhh_trim_unique_good_filter_unique_precluster_uchime_accnos = InputFileList(shhh_trim_unique_good_filter_unique_precluster_uchime_accnos_file, Formats.ANY)
self.input_shhh_trim_unique_good_filter_unique_precluster_fasta = InputFileList(shhh_trim_unique_good_filter_unique_precluster_fasta_file, Formats.ANY)
self.input_shhh_trim_unique_good_filter_unique_precluster_names = InputFileList(shhh_trim_unique_good_filter_unique_precluster_names_file, Formats.ANY)
self.input_shhh_good_groups = InputFileList(shhh_good_groups_file, Formats.ANY)
files_woext=[]
for file in shhh_trim_unique_good_filter_unique_precluster_fasta_file:
files_woext.append(file.split(".")[0])
self.output_shhh_trim_unique_good_filter_unique_precluster_pick_names = OutputFileList(self.get_outputs('{basename}.shhh.trim.unique.good.filter.unique.precluster.pick.names', files_woext))
self.output_shhh_trim_unique_good_filter_unique_precluster_pick_fasta = OutputFileList(self.get_outputs('{basename}.shhh.trim.unique.good.filter.unique.precluster.pick.fasta', files_woext))
self.output_shhh_good_pick_groups = OutputFileList(self.get_outputs('{basename}.shhh.good.pick.groups', files_woext))
def process(self):
removeseqs = ShellFunction(self.get_exec_path("mothur") + ' "#remove.seqs(accnos=$1,fasta=$2,name=$3,group=$4,outputdir='+self.output_directory+'/)"', cmd_format='{EXE} {IN} {OUT}')
removeseqs = MultiMap(removeseqs, inputs=[self.input_shhh_trim_unique_good_filter_unique_precluster_uchime_accnos,self.input_shhh_trim_unique_good_filter_unique_precluster_fasta,self.input_shhh_trim_unique_good_filter_unique_precluster_names,self.input_shhh_good_groups], outputs=[self.output_shhh_trim_unique_good_filter_unique_precluster_pick_names,self.output_shhh_trim_unique_good_filter_unique_precluster_pick_fasta,self.output_shhh_good_pick_groups])
import os
from subprocess import Popen, PIPE
from jflow.component import Component
from jflow.iotypes import OutputFile, OutputFileList, InputFile, InputFileList, Formats
from jflow.abstraction import MultiMap
from weaver.function import ShellFunction
class MothurScreenSeqs(Component):
"""
"""
def define_parameters(self, shhh_trim_unique_align_file, shhh_trim_names_file, shhh_groups_file, processors=1):
"""
"""
self.processors = processors
self.input_shhh_trim_unique_align = InputFileList(shhh_trim_unique_align_file, Formats.FASTA)
self.input_shhh_trim_names = InputFileList(shhh_trim_names_file, Formats.MOTHUR_NAMES)
self.input_shhh_groups = InputFileList(shhh_groups_file, Formats.MOTHUR_GROUPS)
self.output_good_align = OutputFileList(self.get_outputs('{basename_woext}.good.align', self.input_shhh_trim_unique_align), format="fasta")
self.output_bad_accnos = OutputFileList(self.get_outputs('{basename_woext}.bad.accnos', self.input_shhh_trim_unique_align), format="accnos")
self.output_good_names = OutputFileList(self.get_outputs('{basename_woext}.good.names', self.input_shhh_trim_names), format="names")
self.output_good_groups = OutputFileList(self.get_outputs('{basename_woext}.good.groups', self.input_shhh_groups), format="groups")
def process(self):
print ">>>Process screen.seqs"
screenseqs = ShellFunction(self.get_exec_path("mothur") + ' "#screen.seqs(fasta=$1,name=$2,group=$3,outputdir='+self.output_directory+'/,processors='+self.processors+')";', cmd_format='{EXE} {IN} {OUT}')
screenseqs = MultiMap(screenseqs, inputs=[self.input_shhh_trim_unique_align,self.input_shhh_trim_names,self.input_shhh_groups], \
outputs=[self.output_good_align,self.output_bad_accnos,self.output_good_names,self.output_good_groups])
import os
from subprocess import Popen, PIPE
from jflow.component import Component
from jflow.iotypes import OutputFile, OutputFileList, InputFile, InputFileList, Formats
from jflow.abstraction import MultiMap
from weaver.function import ShellFunction
class MothurSffinfo(Component):
"""
The sffinfo class extract sequences reads from a .sff file.
Generated output files : *.fasta, *.qual and *.flow
"""
def define_parameters(self, sff_files):
"""
Define parameters sffinfo component.
:param sff_files: Output results of pyrosequencing from the 454
:type sff_files: Binary file
"""
self.input_sff = InputFileList(sff_files, Formats.SFF)
#Generated 3 ouput files : fasta, qual anf flow by default these are the trimmed with basenamefile and specific extension
self.output_fasta = OutputFileList(self.get_outputs('{basename_woext}.fasta', self.input_sff), format="fasta")
self.output_qual = OutputFileList(self.get_outputs('{basename_woext}.qual', self.input_sff))
self.output_flow = OutputFileList(self.get_outputs('{basename_woext}.flow', self.input_sff), format="flow")
def process(self):
print ">>>Process sffinfo"
sffinfo = ShellFunction(self.get_exec_path("mothur") + ' "#sffinfo(sff=$1,outputdir='+self.output_directory+'/)"', cmd_format='{EXE} {IN} {OUT}')
sffinfo = MultiMap(sffinfo, inputs=[self.input_sff], outputs=[self.output_fasta,self.output_qual,self.output_flow])
\ No newline at end of file
import os
from subprocess import Popen, PIPE
from jflow.component import Component
from jflow.iotypes import OutputFile, OutputFileList, InputFile, InputFileList, Formats
from jflow.abstraction import MultiMap
from weaver.function import ShellFunction
class MothurShhhFlows(Component):
"""
The mothur implementation of the PyroNoise component of the AmpliconNoise suite of programs
"""
def define_parameters(self, flow_file, processors="1"):
"""
"""
self.processors = processors
self.input_flow_file = InputFileList(flow_file, Formats.MOTHUR_FLOW)
self.output_shhh_qual = OutputFileList(self.get_outputs('{basename_woext}.shhh.qual', self.input_flow_file))
self.output_shhh_fasta = OutputFileList(self.get_outputs('{basename_woext}.shhh.fasta', self.input_flow_file), format="fasta")
self.output_shhh_names = OutputFileList(self.get_outputs('{basename_woext}.shhh.names', self.input_flow_file), format="names")
self.output_shhh_counts = OutputFileList(self.get_outputs('{basename_woext}.shhh.counts', self.input_flow_file))
self.output_shhh_groups = OutputFileList(self.get_outputs('{basename_woext}.shhh.groups', self.input_flow_file), format="groups")
def process(self):
print ">>>Process shhh.flows"
shhhflows = ShellFunction(self.get_exec_path("mothur") + ' "#shhh.flows(flow=$1,outputdir='+self.output_directory+\
'/,processors='+self.processors+')"', cmd_format='{EXE} {IN} {OUT}')
shhhflows = MultiMap(shhhflows, inputs=[self.input_flow_file], \
outputs=[self.output_shhh_qual,self.output_shhh_fasta,self.output_shhh_names,self.output_shhh_counts,self.output_shhh_groups])
import os
from subprocess import Popen, PIPE
from jflow.component import Component
from jflow.iotypes import OutputFile, OutputFileList, InputFile, InputFileList, Formats
from jflow.abstraction import MultiMap
from weaver.function import ShellFunction
class MothurSummarySeqs(Component):
"""
"""
def define_parameters(self, fasta_file, processors="1", names_file=None, prefix=None, ):
"""
"""
self.processors = processors
self.input_fasta = InputFileList(fasta_file, Formats.FASTA)
self.names_file = names_file
if self.names_file:
self.input_names = InputFileList(self.names_file, Formats.MOTHUR_NAMES)
self.output_summary = OutputFileList(self.get_outputs('{basename_woext}.summary', self.input_fasta), format="summary")
def process(self):
print ">>>Process summary.seqs"
if self.names_file:
filterseqs = ShellFunction(self.get_exec_path("mothur") + ' "#summary.seqs(fasta=$1,name=$2,outputdir='+self.output_directory+'/,processors='+self.processors+')"', cmd_format='{EXE} {IN} {OUT}')
filterseqs = MultiMap(filterseqs, inputs=[self.input_fasta,self.input_names], outputs=[self.output_summary])
else:
filterseqs = ShellFunction(self.get_exec_path("mothur") + ' "#summary.seqs(fasta=$1,outputdir='+self.output_directory+'/,processors='+self.processors+')"', cmd_format='{EXE} {IN} {OUT}')
filterseqs = MultiMap(filterseqs, inputs=[self.input_fasta], outputs=[self.output_summary])
import os
from subprocess import Popen, PIPE
from jflow.component import Component
from jflow.iotypes import OutputFile, OutputFileList, InputFile, InputFileList, Formats
from jflow.abstraction import MultiMap
from weaver.function import ShellFunction
class MothurTrimSeqs(Component):
"""
The command will enable you to trim off primer sequences and barcodes, use the barcode information to generate a group file
and split a fasta file into sub-files, screen sequences based on the qual file that comes from 454 sequencers, cull sequences
based on sequence length and the presence of ambiguous bases and get the reverse complement of your sequences.
"""
def define_parameters(self, shhh_fasta_file, shhh_names_file, oligos, processors=1):
"""
"""
self.processors = processors
self.input_shhh_fasta = InputFileList(shhh_fasta_file, Formats.FASTA)
self.input_shhh_names = InputFileList(shhh_names_file, Formats.MOTHUR_NAMES)
self.input_oligos = InputFileList(oligos, Formats.MOTHUR_OLIGOS)
self.output_trim_fasta = OutputFileList(self.get_outputs('{basename_woext}.trim.fasta', self.input_shhh_fasta), format="fasta")
self.output_trim_names = OutputFileList(self.get_outputs('{basename_woext}.trim.names', self.input_shhh_names), format="names")
self.output_scrap_fasta = OutputFileList(self.get_outputs('{basename_woext}.scrap.fasta', self.input_shhh_fasta), format="fasta")
self.output_scrap_names = OutputFileList(self.get_outputs('{basename_woext}.scrap.names', self.input_shhh_names), format="names")
self.output_groups = OutputFileList(self.get_outputs('{basename_woext}.groups', self.input_shhh_fasta), format="groups")
def process(self):
print ">>>Process trim.seqs"
print ">>>> With samples barcodes in oligos file"
trimseqs = ShellFunction(self.get_exec_path("mothur") + ' "#trim.seqs(fasta=$1,name=$2,outputdir='+self.output_directory +\
'/,oligos=$3,processors='+self.processors+')"', cmd_format='{EXE} {IN} {OUT}')
trimseqs = MultiMap(trimseqs, inputs=[self.input_shhh_fasta,self.input_shhh_names,self.input_oligos], \
outputs=[self.output_trim_fasta,self.output_scrap_fasta,self.output_trim_names,self.output_scrap_names,self.output_groups])
import os
from subprocess import Popen, PIPE
from jflow.component import Component
from jflow.iotypes import OutputFile, OutputFileList, InputFile, InputFileList, Formats
from jflow.abstraction import MultiMap
from weaver.function import ShellFunction
class MothurUniqueSeqs(Component):
"""
The unique.seqs command returns only the unique sequences found in a fasta-formatted sequence file
and a file that indicates those sequences that are identical to the reference sequence.
"""
def define_parameters(self, shhh_trim_fasta_file, shhh_trim_names_file, prefix=None):
"""
"""
self.input_shhh_trim_fasta = InputFileList(shhh_trim_fasta_file, Formats.FASTA)
self.input_shhh_trim_names = InputFileList(shhh_trim_names_file, Formats.MOTHUR_NAMES)
self.output_unique_fasta = OutputFileList(self.get_outputs('{basename_woext}.unique.fasta', self.input_shhh_trim_fasta), format="fasta")
self.output_names = OutputFileList(self.get_outputs('{basename_woext}.names', self.input_shhh_trim_fasta