Commit d8bb72ff authored by Jerome Mariette's avatar Jerome Mariette
Browse files

No commit message

No commit message
parent a460e703
......@@ -29,6 +29,9 @@ class Formats(object):
MOTHUR_OLIGOS = "mothur_oligos"
MOTHUR_GROUPS = "mothur_groups"
MOTHUR_ACCNOS = "mothur_accnos"
MOTHUR_TAXONOMY = "mothur_taxonomy"
MOTHUR_TAX_SUMMARY = "mothur_tax_summary"
MOTHUR_DIST = "mothur_dist"
#
# Inputs classes
......
......@@ -23,7 +23,7 @@ class RNAdiversity (Workflow):
def process(self):
# first create a oligos file to give as input to mothur
# create an oligos file to give as input to mothur
oligos_fullpath = self.get_temporary_file(suffix=".oligos")
f = open(oligos_fullpath,"w")
if self.args["forward_primer"]:
......@@ -36,7 +36,7 @@ class RNAdiversity (Workflow):
f.write('barcode\t%s\t%s\n' % (j,i))
f.close()
# then process the workflow
# clean reads and alignment
sffinfo = self.add_component("MothurSffinfo", [self.args["sff_files"]])
shhhflows = self.add_component("MothurShhhFlows", [sffinfo.flow_files])
trimseqs = self.add_component("MothurTrimSeqs", [shhhflows.fasta_files, oligos_fullpath, shhhflows.names_files])
......@@ -53,15 +53,24 @@ class RNAdiversity (Workflow):
'groups_files':screenseqs.good_groups_files})
removeseqs = self.add_component("MothurRemoveSeqs",[chimerauchime.uchime_accnos_files,precluster.precluster_fasta_files,precluster.precluster_names_files,\
screenseqs.good_groups_files])
#Analysis MothurClassify
classifyseqs = self.add_component("MothurClassifySeqs",[removeseqs.pick_fasta_files,self.args["template"],self.args["taxonomy"],removeseqs.pick_names_files,\
# classify reads with the provided taxonomy
classifyseqs = self.add_component("MothurClassifySeqs",[removeseqs.pick_fasta_files,self.args["classify_template"],self.args["classify_taxonomy"],removeseqs.pick_names_files,\
removeseqs.pick_groups_files])
krona_classifyseqs = self.add_component("MothurKrona", [classifyseqs.taxonomy_files,removeseqs.pick_names_files,removeseqs.pick_groups_files],{},"classifyseqs")
# removegroups = self.add_component("MothurRemoveGroups", [removeseqs.pick_fasta_files,removeseqs.pick_names_files,removeseqs.pick_groups_files,chimerauchime.uchime_accnos_files])
#Analysis OTUAnalysis
# krona_classifyseqs = self.add_component("MothurKrona", [classifyseqs.taxonomy_files,removeseqs.pick_names_files,removeseqs.pick_groups_files],component_prefix="classifyseqs")
# OTUs approach
distseqs = self.add_component("MothurDistSeqs", [removeseqs.pick_fasta_files])
cluster = self.add_component("MothurCluster", [distseqs.dist_files,removeseqs.pick_names_files])
makeshared = self.add_component("MothurMakeShared", [cluster.an_list_files,removeseqs.pick_groups_files])
treeshared = self.add_component("MothurTreeShared", [makeshared.an_shared_files])
classifyotu = self.add_component("MothurClassifyOtu", [cluster.an_list_files,classifyseqs.taxonomy_files,removeseqs.pick_names_files,removeseqs.pick_groups_files])
krona_classifyotu = self.add_component("MothurKrona", [classifyotu.cons_taxonomy_files],{},"classifyotu")
# manque le sub sample avec le count pour normaliser les OTUs
# makeshared = self.add_component("MothurMakeShared", [cluster.an_list_files,removeseqs.pick_groups_files])
# classifyotu = self.add_component("MothurClassifyOtu", [cluster.an_list_files,classifyseqs.taxonomy_files,removeseqs.pick_names_files,removeseqs.pick_groups_files])
# krona_classifyotu = self.add_component("MothurKrona", [classifyotu.cons_taxonomy_files],{},"classifyotu")
# Alpha diversity analysis
# manque le summary.single
# manque le rarefaction.single
# Beta diversity analysis
# treeshared = self.add_component("MothurTreeShared", [makeshared.an_shared_files])
# pour l'analyse d'OTU peut etre mettre en place direct EspritTree
\ No newline at end of file
......@@ -36,7 +36,6 @@ class MothurChimeraUchime(Component):
:param processors: the number of processors to use
:type processors: int
"""
#define parameters
self.processors = processors
#define input files
......
import re
#
# Copyright (C) 2012 INRA
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
import os
from jflow.component import Component
from jflow.iotypes import OutputFile, OutputFileList, InputFile, InputFileList, Formats
......@@ -7,14 +24,10 @@ from jflow.abstraction import MultiMap
from weaver.function import PythonFunction, ShellFunction
class MothurClassifySeqs(Component):
"""
"""
def define_parameters(self, fasta_files, template_files, taxonomy_files, names_files=None, groups_files=None, cutoff=80, processors=4):
def define_parameters(self, fasta_files, template_files, taxonomy_files, names_files=None, groups_files=None, cutoff=80, processors=1):
"""
/!\ les fichiers generes varient en fonction du nom du template et du taxonomy
Define classify.seqs component parameters.
:param fasta_files: fasta files to process
:type fasta_files: str
......@@ -27,11 +40,10 @@ class MothurClassifySeqs(Component):
:param processors: the number of processors to use
:type processors: int
"""
#define parameters
# define parameters
self.cutoff = cutoff
self.processors = processors
#define input files
# define input files
self.fasta_files = InputFileList(fasta_files, Formats.FASTA)
self.template_files = InputFile(template_files, Formats.ANY)
self.database_taxonomy_files = InputFile(taxonomy_files, Formats.ANY)
......@@ -39,15 +51,13 @@ class MothurClassifySeqs(Component):
else: self.names_files = None
if groups_files : self.groups_files = InputFileList(groups_files, Formats.MOTHUR_GROUPS)
else: self.groups_files = None
#define output files
database_prefix = re.split("/|\.",template_files)[-2]
# define output files
database_prefix = os.path.basename(taxonomy_files).split(".")[-2]
self.taxonomy_files = OutputFileList(self.get_outputs('{basename_woext}.'+database_prefix+'.wang.taxonomy', self.fasta_files), Formats.MOTHUR_TAXONOMY)
# self.pick_pds_flip_accnos = OutputFileList(self.get_outputs('{basename_woext}.pick.pds.flip.accnos', self.pick_fasta))
self.tax_summary_files = OutputFileList(self.get_outputs('{basename_woext}.'+database_prefix+'.wang.tax.summary', self.fasta_files), Formats.MOTHUR_TAX_SUMMARY)
self.stdout = OutputFileList(self.get_outputs('{basename_woext}.stdout', self.fasta_files))
def process(self):
print ">>>Process classify.seqs"
if self.names_files and not self.groups_files:
classifyseqs = ShellFunction(self.get_exec_path("mothur") + ' "#classify.seqs(fasta=$1,template='+self.template_files+',taxonomy='+self.database_taxonomy_files+\
',name=$2,cutoff='+str(self.cutoff)+',outputdir='+self.output_directory+'/,processors='+str(self.processors)+')" > $3', cmd_format='{EXE} {IN} {OUT}')
......@@ -60,6 +70,3 @@ class MothurClassifySeqs(Component):
classifyseqs = ShellFunction(self.get_exec_path("mothur") + ' "#classify.seqs(fasta=$1,template='+self.template_files+',taxonomy='+self.database_taxonomy_files+\
',cutoff='+str(self.cutoff)+',outputdir='+self.output_directory+'/,processors='+str(self.processors)+')" > $2', cmd_format='{EXE} {IN} {OUT}')
classifyseqs = MultiMap(classifyseqs, inputs=[self.fasta_files],outputs=[self.stdout,self.taxonomy_files,self.tax_summary_files])
......@@ -36,22 +36,20 @@ class MothurCluster(Component):
:type cutoff: int
:param method: three clustering methods (Nearest neighbor, Furthest neighbor, Average neighbor)
:type method: string
"""
#define parameters
"""
# define parameters
self.cutoff = cutoff
self.method = method
#define input files
# define input files
self.dist_files = InputFileList(dist_files, Formats.MOTHUR_DIST)
self.names_files = InputFileList(names_files, Formats.MOTHUR_NAMES)
#define output files
# define output files
self.an_sabund_files = OutputFileList(self.get_outputs('{basename_woext}.an.sabund', self.dist_files))
self.an_rabund_files = OutputFileList(self.get_outputs('{basename_woext}.an.rabund', self.dist_files))
self.an_list_files = OutputFileList(self.get_outputs('{basename_woext}.an.list', self.dist_files))
self.stdout = OutputFileList(self.get_outputs('{basename_woext}.stdout', self.dist_files))
def process(self):
print ">>>Process cluster"
cluster = ShellFunction(self.get_exec_path("mothur") + ' "#cluster(column=$1,name=$2,outputdir='+self.output_directory+'/,cutoff='+str(self.cutoff)+',method='+self.method+')"\
> $3',cmd_format='{EXE} {IN} {OUT}')
cluster = MultiMap(cluster, inputs=[self.dist_files,self.names_files], outputs=[self.stdout,self.an_sabund_files,self.an_rabund_files,self.an_list_files])
......@@ -23,8 +23,6 @@ from weaver.function import ShellFunction
class MothurDistSeqs(Component):
"""
"""
def define_parameters(self, fasta_files, cutoff=0.10, processors=1):
"""
......@@ -34,18 +32,16 @@ class MothurDistSeqs(Component):
:type cutoff: int
:param processors: the number of processors to use
:type processors: int
"""
#define parameters
"""
# define parameters
self.processors = processors
#define input files
# define input files
self.fasta_files = InputFileList(fasta_files, Formats.FASTA)
#define output files
# define output files
self.dist_files = OutputFileList(self.get_outputs('{basename_woext}.dist', self.fasta_files), Formats.MOTHUR_DIST)
self.stdout = OutputFileList(self.get_outputs('{basename_woext}.stdout', self.fasta_files))
def process(self):
print ">>>Process dist.seqs"
distseqs = ShellFunction(self.get_exec_path("mothur") + ' "#dist.seqs(fasta=$1,outputdir='+self.output_directory+'/,processors='+str(self.processors)+')" > $2', \
cmd_format='{EXE} {IN} {OUT}')
distseqs = MultiMap(distseqs, inputs=[self.fasta_files], outputs=[self.stdout,self.dist_files])
\ No newline at end of file
......@@ -55,16 +55,16 @@ sample_barcodes.name = sample_barcodes
sample_barcodes.flag = --sample-barcodes
sample_barcodes.help = Which barcodes were used in the experimentation : "sampleName1:barcodeString1;sampleName2:barcodeString2"
template.name = template
template.flag = --template
template.help = template
template.required = True
taxonomy.name = taxonomy
taxonomy.flag = --taxonomy
taxonomy.help = taxonomy
taxonomy.required = True
reference_chimera.name = reference_chimera
reference_chimera.flag = --reference-chimera
reference_chimera.help = Where is stored the reference chimera
\ No newline at end of file
reference_chimera.help = which reference databank should be used to detect chimer
classify_template.name = classify_template
classify_template.flag = --classify-template
classify_template.help = Which template file should be used to classify reads
classify_template.required = True
classify_taxonomy.name = classify_taxonomy
classify_taxonomy.flag = --classify-taxonomy
classify_taxonomy.help = Which taxonomy file should be used to classify reads
classify_taxonomy.required = True
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment