Commit 96685eb4 authored by Jerome Mariette's avatar Jerome Mariette
Browse files

some updates on r454_diversity

parent 949ed371
......@@ -53,6 +53,7 @@ kronaImportBLAST = /usr/local/bin/ktImportBLAST
blastn = /usr/bin/blastn
fastq_illumina_filter = /usr/bin/fastq_illumina_filter
CollectInsertSizeMetrics = /usr/bin/CollectInsertSizeMetrics.jar
mothur = /usr/bin/mothur
[454_mids]
MID1 = ACGAGTGCGT
......
......@@ -23,6 +23,11 @@ class Formats(object):
FASTQ = "fastq"
FASTA = "fasta"
SFF = "sff"
QUAL = "qual"
FLOW = "flow"
MOTHUR_NAMES = "mothur_names"
MOTHUR_OLIGOS = "mothur_oligos"
MOTHUR_GROUPS = "mothur_groups"
#
# Inputs classes
......
......@@ -14,118 +14,43 @@
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
"""
RNAdiversity pipeline mothur 454
================================
Parameters requiered
--------------------
"""
from jflow.workflow import Workflow
import re
class RNAdiversity (Workflow):
def process(self):
"""
Test
----
python ~/workspace/nG6/bin/ng6_cli.py r454_diversity --sff-files ~/files_test_mothur/all_sop/cluster/test.sff --sample-barcodes "F003D000:AATGGTAC;F003D002:AACCTGGC;F003D004:TTCGTGGC;F003D006:TTCTTGAC;F003D008:TTCGCGAC;F003D142:TCCAGAAC;F003D144:AAGGCCTC;F003D146:TGACCGTC;F003D148:AGGTTGTC;F003D150:TGGTGAAC;MOCK.GQY1XT001:AACCGTGTC" --forward-primer "CCGTCAATTCMTTTRAGT" --reference-alignment ~/files_test_mothur/silva.bacteria/silva.bacteria.fasta
Create a oligos file
--------------------
Run the workflow
----------------
sffinfo
shhhflows
trimseqs
uniqueseqs
alignseqs
screenseqs
filterseqs
uniqueseqs_filter
precluster
chimerauchime
"""
print ">Run the workflow RNAdiversity "
print
processors = "4"
print ">> Create oligos file"
#create a oligos temp file
# first create a oligos file to give as input to mothur
oligos_fullpath = self.get_temporary_file(suffix=".oligos")
f = open(oligos_fullpath,"w")
if self.args["forward_primer"]:
f.write('forward\t%s\tGroup1\n' % self.args["forward_primer"])
if self.args["reverse_primer"]:
f.write('reverse\t%s\n' % self.args["reverse_primer"])
if self.args["sample_barcodes"]:
dict_barcode = dict(re.split(':|;',self.args["sample_barcodes"])[i:i+2] for i in range(0, len(re.split(':|;',self.args["sample_barcodes"])), 2))
# if len(dict_barcode.keys())%2:
# print "It's odd-numbered please check if you have a name for each barcode."
# else:
for i,j in dict_barcode.items():
f.write('barcode\t%s\t%s\n' % (j,i))
f.close()
print ">>Getting started"
# then process the workflow
sffinfo = self.add_component("MothurSffinfo", [self.args["sff_files"]])
print ">>Summary Seqs : sffinfo"
summaryseqs = self.add_component("MothurSummarySeqs", [sffinfo.output_fasta, processors])
print ">>Reducing sequencing error : Using shhh.flows"
shhhflows = self.add_component("MothurShhhFlows",[sffinfo.output_flow, processors])
trimseqs = self.add_component("MothurTrimSeqs",[shhhflows.output_shhh_fasta,shhhflows.output_shhh_names, oligos_fullpath, processors])
print ">>Summary Seqs : trimseqs"
summaryseqs_trim = self.add_component("MothurSummarySeqs", [trimseqs.output_trim_fasta,processors,trimseqs.output_trim_names],{},"trim")
print ">>Processing improved sequences"
uniqueseqs = self.add_component("MothurUniqueSeqs",[trimseqs.output_trim_fasta,trimseqs.output_trim_names])
print ">>Summary Seqs : uniqueseqs"
summaryseqs_unique = self.add_component("MothurSummarySeqs", [uniqueseqs.output_unique_fasta,processors,uniqueseqs.output_names],{},"unique")
alignseqs = self.add_component("MothurAlignSeqs",[uniqueseqs.output_unique_fasta,self.args["reference_alignment"], processors])
print ">>Summary Seqs : alignseqs"
summaryseqs_align = self.add_component("MothurSummarySeqs", [alignseqs.output_align,processors,uniqueseqs.output_names],{},"align")
screenseqs = self.add_component("MothurScreenSeqs",[alignseqs.output_align,trimseqs.output_trim_names,trimseqs.output_groups,processors])
print ">>Summary Seqs : screenseqs"
summaryseqs_screen = self.add_component("MothurSummarySeqs", [screenseqs.output_good_align,processors,screenseqs.output_good_names],{},"screen")
filterseqs = self.add_component("MothurFilterSeqs",[screenseqs.output_good_align,processors])
uniqueseqs_filter = self.add_component("MothurUniqueSeqs",[filterseqs.output_filter_fasta,screenseqs.output_good_names],{},"filter")
precluster = self.add_component("MothurPreCluster",[uniqueseqs_filter.output_unique_fasta,uniqueseqs_filter.output_names,screenseqs.output_good_groups])
print ">>Summary Seqs : precluster"
summaryseqs_precluster = self.add_component("MothurSummarySeqs", [precluster.output_precluster_fasta,processors,precluster.output_precluster_names],{},"precluster")
#
print ">>Removing chimeras"
chimerauchime = self.add_component("MothurChimeraUchime",[precluster.output_precluster_fasta,precluster.output_precluster_names,screenseqs.output_good_groups,processors])
# # removeseqs = self.add_component("RemoveSeqs",[chimerauchime.output_shhh_trim_unique_good_filter_unique_precluster_uchime_accnos ,precluster.output_shhh_trim_unique_good_filter_unique_precluster_fasta,precluster.output_shhh_trim_unique_good_filter_unique_precluster_names,screenseqs.output_shhh_good_groups])
# print ">>Removing \"contaminants\""
# classyseqs = self.add_component("ClassifySeqs",[removeseqs.output_shhh_trim_unique_good_filter_unique_precluster_pick_fasta,removeseqs.output_shhh_trim_unique_good_filter_unique_precluster_pick_names,removeseqs.output_shhh_good_pick_groups,self.args['template'],self.agrs['taxonomy'],processors])
#summaryseqs = self.add_component("MothurSummarySeqs", [sffinfo.output_fasta, processors])
shhhflows = self.add_component("MothurShhhFlows",[sffinfo.flow_files])
#trimseqs = self.add_component("MothurTrimSeqs",[shhhflows.output_shhh_fasta,shhhflows.output_shhh_names, oligos_fullpath, processors])
#summaryseqs_trim = self.add_component("MothurSummarySeqs", [trimseqs.output_trim_fasta,processors,trimseqs.output_trim_names],{},"trim")
#uniqueseqs = self.add_component("MothurUniqueSeqs",[trimseqs.output_trim_fasta,trimseqs.output_trim_names])
#summaryseqs_unique = self.add_component("MothurSummarySeqs", [uniqueseqs.output_unique_fasta,processors,uniqueseqs.output_names],{},"unique")
#alignseqs = self.add_component("MothurAlignSeqs",[uniqueseqs.output_unique_fasta,self.args["reference_alignment"], processors])
#summaryseqs_align = self.add_component("MothurSummarySeqs", [alignseqs.output_align,processors,uniqueseqs.output_names],{},"align")
#screenseqs = self.add_component("MothurScreenSeqs",[alignseqs.output_align,trimseqs.output_trim_names,trimseqs.output_groups,processors])
#summaryseqs_screen = self.add_component("MothurSummarySeqs", [screenseqs.output_good_align,processors,screenseqs.output_good_names],{},"screen")
#filterseqs = self.add_component("MothurFilterSeqs",[screenseqs.output_good_align,processors])
#uniqueseqs_filter = self.add_component("MothurUniqueSeqs",[filterseqs.output_filter_fasta,screenseqs.output_good_names],{},"filter")
#precluster = self.add_component("MothurPreCluster",[uniqueseqs_filter.output_unique_fasta,uniqueseqs_filter.output_names,screenseqs.output_good_groups])
#summaryseqs_precluster = self.add_component("MothurSummarySeqs", [precluster.output_precluster_fasta,processors,precluster.output_precluster_names],{},"precluster")
#chimerauchime = self.add_component("MothurChimeraUchime",[precluster.output_precluster_fasta,precluster.output_precluster_names,screenseqs.output_good_groups,processors])
......@@ -10,30 +10,24 @@ from weaver.function import ShellFunction
class MothurSffinfo(Component):
"""
The sffinfo class extract sequences reads from a .sff file.
Generated output files : *.fasta, *.qual and *.flow
The sffinfo class extract sequences reads from a .sff file.
Generated output files : *.fasta, *.qual and *.flow
"""
def define_parameters(self, sff_files):
"""
Define parameters sffinfo component.
:param sff_files: Output results of pyrosequencing from the 454
:type sff_files: Binary file
Define sffinfo component parameters.
:param sff_files: a sff file to process
:type sff_files: str
"""
self.input_sff = InputFileList(sff_files, Formats.SFF)
#Generated 3 ouput files : fasta, qual anf flow by default these are the trimmed with basenamefile and specific extension
self.output_fasta = OutputFileList(self.get_outputs('{basename_woext}.fasta', self.input_sff), format="fasta")
self.output_qual = OutputFileList(self.get_outputs('{basename_woext}.qual', self.input_sff))
self.output_flow = OutputFileList(self.get_outputs('{basename_woext}.flow', self.input_sff), format="flow")
self.sff_files = InputFileList(sff_files, Formats.SFF)
# generates 3 output files: fasta, qual and flow
self.fasta_files = OutputFileList(self.get_outputs('{basename_woext}.fasta', self.sff_files), Formats.FASTA)
self.qual_files = OutputFileList(self.get_outputs('{basename_woext}.qual', self.sff_files), Formats.QUAL)
self.flow_files = OutputFileList(self.get_outputs('{basename_woext}.flow', self.sff_files), Formats.FLOW)
self.stdout = OutputFileList(self.get_outputs('{basename_woext}.stdout', self.sff_files))
def process(self):
print ">>>Process sffinfo"
sffinfo = ShellFunction(self.get_exec_path("mothur") + ' "#sffinfo(sff=$1,outputdir='+self.output_directory+'/)"', cmd_format='{EXE} {IN} {OUT}')
sffinfo = MultiMap(sffinfo, inputs=[self.input_sff], outputs=[self.output_fasta,self.output_qual,self.output_flow])
sffinfo = ShellFunction(self.get_exec_path("mothur") + ' "#sffinfo(sff=$1,outputdir='+self.output_directory+'/)" > $2', cmd_format='{EXE} {IN} {OUT}')
sffinfo = MultiMap(sffinfo, inputs=[self.sff_files], outputs=[self.stdout,self.fasta_files,self.qual_files,self.flow_files])
\ No newline at end of file
......@@ -10,28 +10,28 @@ from weaver.function import ShellFunction
class MothurShhhFlows(Component):
"""
The mothur implementation of the PyroNoise component of the AmpliconNoise suite of programs
The mothur implementation of the PyroNoise component of the AmpliconNoise suite of programs.
"""
def define_parameters(self, flow_file, processors="1"):
def define_parameters(self, flow_files, processors=1):
"""
Define shh.flows component parameters.
:param flow_file: a flow file to process
:type sff_files: str
:param processors: the number of processors to use
:type processors: int
"""
self.processors = processors
self.input_flow_file = InputFileList(flow_file, Formats.MOTHUR_FLOW)
self.flow_files = InputFileList(flow_files, Formats.FLOW)
self.shhh_qual = OutputFileList(self.get_outputs('{basename_woext}.shhh.qual', self.flow_files), Formats.QUAL)
self.shhh_fasta = OutputFileList(self.get_outputs('{basename_woext}.shhh.fasta', self.flow_files), Formats.FASTA)
self.shhh_names = OutputFileList(self.get_outputs('{basename_woext}.shhh.names', self.flow_files), Formats.MOTHUR_NAMES)
self.shhh_counts = OutputFileList(self.get_outputs('{basename_woext}.shhh.counts', self.flow_files))
self.shhh_groups = OutputFileList(self.get_outputs('{basename_woext}.shhh.groups', self.flow_files), Formats.MOTHUR_GROUPS)
self.stdout = OutputFileList(self.get_outputs('{basename_woext}.stdout', self.flow_files))
self.output_shhh_qual = OutputFileList(self.get_outputs('{basename_woext}.shhh.qual', self.input_flow_file))
self.output_shhh_fasta = OutputFileList(self.get_outputs('{basename_woext}.shhh.fasta', self.input_flow_file), format="fasta")
self.output_shhh_names = OutputFileList(self.get_outputs('{basename_woext}.shhh.names', self.input_flow_file), format="names")
self.output_shhh_counts = OutputFileList(self.get_outputs('{basename_woext}.shhh.counts', self.input_flow_file))
self.output_shhh_groups = OutputFileList(self.get_outputs('{basename_woext}.shhh.groups', self.input_flow_file), format="groups")
def process(self):
print ">>>Process shhh.flows"
shhhflows = ShellFunction(self.get_exec_path("mothur") + ' "#shhh.flows(flow=$1,outputdir='+self.output_directory+\
'/,processors='+self.processors+')"', cmd_format='{EXE} {IN} {OUT}')
shhhflows = MultiMap(shhhflows, inputs=[self.input_flow_file], \
outputs=[self.output_shhh_qual,self.output_shhh_fasta,self.output_shhh_names,self.output_shhh_counts,self.output_shhh_groups])
shhhflows = ShellFunction(self.get_exec_path("mothur") + ' "#shhh.flows(flow=$1,outputdir='+self.output_directory + \
'/,processors=' + str(self.processors) + ')" > $2', cmd_format='{EXE} {IN} {OUT}')
shhhflows = MultiMap(shhhflows, inputs=[self.flow_files], \
outputs=[self.stdout,self.shhh_qual,self.shhh_fasta,self.shhh_names,self.shhh_counts,self.shhh_groups])
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment