Commit c39beb06 authored by Penom Nom's avatar Penom Nom
Browse files

fix bug empty accnos file in chimera component

modify the optimize end in align component 
split sff pre-treatment :  trimseqs
parent 91da3aa8
......@@ -33,10 +33,13 @@ def screen_seqs(exec_path, output_directory, processors, criteria, optmize_start
param_cmd = ""
if optmize_start_end_log == "True":
log_summary_files = files[1]
criteria_end = 100-float(criteria)
with open(log_summary_files) as log:
for line in log:
if line.startswith(str(criteria)+"%-tile:"):
optimize_start, optimize_end = line.split()[1:3]
optimize_start = line.split()[1]
if line.startswith(str(criteria_end)+"%-tile:"):
optimize_end = line.split()[2]
# if names files and groups files are True
if len (files) == 9:
trim_names_path = files[2]
......
......@@ -19,10 +19,58 @@ from subprocess import Popen, PIPE
from jflow.iotypes import OutputFile, OutputFileList, InputFile, InputFileList, Formats
from jflow.abstraction import MultiMap
from weaver.function import ShellFunction
from weaver.function import PythonFunction, ShellFunction
from ng6.analysis import Analysis
def remove_chimeras(exec_path,output_directory,dups,*files):
import os, subprocess
accnos_path = files[0]
fasta_path = files[1]
#check if accnos file is blank
if os.path.getsize(accnos_path) != 0:
if len(files) == 3:
cmd = '#remove.seqs(accnos='+accnos_path+',fasta='+fasta_path+',outputdir='+output_directory+'/,dups='+dups+')'
if len(files) == 5:
groups_or_count_path = files[2]
if groups_or_count_path.endswith('.count_table'):
cmd = '#remove.seqs(accnos='+accnos_path+',fasta='+fasta_path+',count='+groups_or_count_path+',outputdir='+output_directory+'/,dups='+dups+')'
if groups_or_count_path.endswith('.groups'):
cmd = '#remove.seqs(accnos='+accnos_path+',fasta='+fasta_path+',group='+groups_or_count_path+',outputdir='+output_directory+'/,dups='+dups+')'
if len(files) == 7:
names_path = files[2]
groups_path = files[3]
cmd = '#remove.seqs(accnos='+accnos_path+',fasta='+fasta_path+',name='+names_path+',group='+groups_path+',outputdir='+output_directory+'/,dups='+dups+')'
logfile = open(output_directory+"/"+os.path.splitext(os.path.basename(fasta_path))[0]+'.removeseq.stdout', 'w')
proc = subprocess.Popen([exec_path, cmd], stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
for line in proc.stdout:
logfile.write(line)
logfile.close()
else :
if len(files) == 3:
pick_fasta_path = files[2]
if not os.path.islink(pick_fasta_path):
os.symlink(fasta_path,pick_fasta_path)
if len(files) == 5:
groups_or_count_path = files[2]
pick_fasta_path = files[3]
pick_groups_or_count_table_path = files[4]
if not os.path.islink(pick_fasta_path):
os.symlink(fasta_path,pick_fasta_path)
if not os.path.islink(pick_groups_or_count_table_path):
os.symlink(groups_or_count_path,pick_groups_or_count_table_path)
if len(files) == 7:
names_path = files[2]
groups_path = files[3]
pick_fasta_path = files[4]
pick_names_path = files[5]
pick_groups_path = files[6]
if not os.path.islink(pick_fasta_path):
os.symlink(fasta_path,pick_fasta_path)
if not os.path.islink(pick_names_path):
os.symlink(names_path,pick_names_path)
if not os.path.islink(pick_groups_path):
os.symlink(groups_path,pick_groups_path)
class MothurChimeraUchime(Analysis):
def define_parameters(self, fasta_files, reference_files=None, names_files=None, groups_files=None, count_table_files=None, dereplicate= 't', dups="T", processors=1):
......@@ -62,19 +110,19 @@ class MothurChimeraUchime(Analysis):
#define output files
self.uchime_chimera_files = OutputFileList(self.get_outputs('{basename_woext}.uchime.chimeras', self.fasta_files), Formats.FASTA)
self.uchime_accnos_files = OutputFileList(self.get_outputs('{basename_woext}.uchime.accnos', self.fasta_files), Formats.MOTHUR_ACCNOS)
if count_table_files:
self.uchime_count_table_files = OutputFileList(self.get_outputs('{basename_woext}.uchime.pick.count_table', self.fasta_files), Formats.MOTHUR_COUNT_TABLE)
self.stdout = OutputFileList(self.get_outputs('{basename_woext}.stdout', self.fasta_files))
if count_table_files: self.uchime_count_table_files = OutputFileList(self.get_outputs('{basename_woext}.uchime.pick.count_table', self.fasta_files), Formats.MOTHUR_COUNT_TABLE)
else: self.uchime_count_table_files = None
self.stdout = OutputFileList(self.get_outputs('{basename_woext}.removeseqs.stdout', self.fasta_files))
# self.stderr = OutputFileList(self.get_outputs('{basename_woext}.stderr', self.fasta_files))
self.pick_fasta_files = OutputFileList(self.get_outputs('{basename_woext}.pick.fasta', self.fasta_files), Formats.FASTA)
if names_files :
self.pick_names_files = OutputFileList(self.get_outputs('{basename_woext}.pick.names', self.fasta_files), Formats.MOTHUR_NAMES)
if groups_files:
self.pick_groups_files = OutputFileList(self.get_outputs('{basename_woext}.pick.groups', self.groups_files), Formats.MOTHUR_GROUPS)
if count_table_files :
self.good_count_table_files = OutputFileList(self.get_outputs('{basename_woext}.pick.count_table', self.uchime_count_table_files), Formats.MOTHUR_COUNT_TABLE)
self.removeseq_stdout = OutputFileList(self.get_outputs('{basename_woext}.removeseq.stdout', self.fasta_files))
if names_files : self.pick_names_files = OutputFileList(self.get_outputs('{basename_woext}.pick.names', self.fasta_files), Formats.MOTHUR_NAMES)
else : self.pick_names_files = None
if groups_files: self.pick_groups_files = OutputFileList(self.get_outputs('{basename_woext}.pick.groups', self.groups_files), Formats.MOTHUR_GROUPS)
else : self.pick_groups_files = None
if count_table_files : self.good_count_table_files = OutputFileList(self.get_outputs('{basename_woext}.pick.count_table', self.uchime_count_table_files), Formats.MOTHUR_COUNT_TABLE)
else : self.good_count_table_files = None
# self.removeseq_stdout = OutputFileList(self.get_outputs('{basename_woext}.removeseq.stdout', self.fasta_files))
def define_analysis(self):
self.name = "ChimeraUchime"
......@@ -151,34 +199,25 @@ class MothurChimeraUchime(Analysis):
chimerauchime = ShellFunction(self.get_exec_path("mothur") + ' "#chimera.uchime(fasta=$1,reference='+self.reference_files+',outputdir='+self.output_directory+\
'/,dereplicate='+self.dereplicate+',processors='+str(self.processors)+')" >> $2 2>&1', cmd_format='{EXE} {IN} {OUT}')
chimerauchime = MultiMap(chimerauchime, inputs=[self.fasta_files], outputs=[self.stdout,self.uchime_chimera_files,self.uchime_accnos_files])
removeseqs = ShellFunction(self.get_exec_path("mothur") + ' "#remove.seqs(accnos=$1,fasta=$2,dups='+self.dups+',outputdir='+self.output_directory+\
'/)" > $3',cmd_format='{EXE} {IN} {OUT}')
removeseqs = MultiMap(removeseqs, inputs=[self.uchime_accnos_files,self.fasta_files],outputs=[self.removeseq_stdout,self.pick_fasta_files])
elif self.names_files and not self.groups_files:
chimerauchime = ShellFunction(self.get_exec_path("mothur") + ' "#chimera.uchime(fasta=$1,name=$2,outputdir='+self.output_directory+\
'/,dereplicate='+self.dereplicate+',processors='+str(self.processors)+')" >> $3 2>&1', cmd_format='{EXE} {IN} {OUT}')
chimerauchime = MultiMap(chimerauchime, inputs=[self.fasta_files,self.names_files], \
outputs=[self.stdout,self.uchime_chimera_files,self.uchime_accnos_files])
removeseqs = ShellFunction(self.get_exec_path("mothur") + ' "#remove.seqs(accnos=$1,fasta=$2,name=$3,dups='+self.dups+',outputdir='+self.output_directory+\
'/)" > $4',cmd_format='{EXE} {IN} {OUT}')
removeseqs = MultiMap(removeseqs, inputs=[self.uchime_accnos_files,self.fasta_files,self.names_files],\
outputs=[self.removeseq_stdout,self.pick_fasta_files,self.pick_names_files])
elif self.count_table_files :
chimerauchime = ShellFunction(self.get_exec_path("mothur") + ' "#chimera.uchime(fasta=$1,count=$2,outputdir='+self.output_directory+\
'/,dereplicate='+self.dereplicate+',processors='+str(self.processors)+')" >> $3 2>&1', cmd_format='{EXE} {IN} {OUT}')
chimerauchime = MultiMap(chimerauchime, inputs=[self.fasta_files,self.count_table_files], \
outputs=[self.stdout,self.uchime_chimera_files,self.uchime_accnos_files,self.uchime_count_table_files])
removeseqs = ShellFunction(self.get_exec_path("mothur") + ' "#remove.seqs(accnos=$1,fasta=$2,count=$3,dups='+self.dups+',outputdir='+self.output_directory+\
'/)" > $4',cmd_format='{EXE} {IN} {OUT}')
removeseqs = MultiMap(removeseqs, inputs=[self.uchime_accnos_files,self.fasta_files,self.uchime_count_table_files],outputs=[self.removeseq_stdout,self.pick_fasta_files,\
self.good_count_table_files])
else:
chimerauchime = ShellFunction(self.get_exec_path("mothur") + ' "#chimera.uchime(fasta=$1,name=$2,group=$3,outputdir='+self.output_directory+\
'/,dereplicate='+self.dereplicate+',processors='+str(self.processors)+')" >> $4 2>&1', cmd_format='{EXE} {IN} {OUT}')
chimerauchime = MultiMap(chimerauchime, inputs=[self.fasta_files,self.names_files,self.groups_files], \
outputs=[self.stdout,self.uchime_chimera_files,self.uchime_accnos_files])
removeseqs = ShellFunction(self.get_exec_path("mothur") + ' "#remove.seqs(accnos=$1,fasta=$2,name=$3,group=$4,dups='+self.dups+',outputdir='+self.output_directory+\
'/)" > $5',cmd_format='{EXE} {IN} {OUT}')
removeseqs = MultiMap(removeseqs, inputs=[self.uchime_accnos_files,self.fasta_files,self.names_files,self.groups_files],\
outputs=[self.removeseq_stdout,self.pick_names_files,self.pick_fasta_files,self.pick_groups_files])
inputs_filter = [input for input in [self.uchime_accnos_files,self.fasta_files,self.names_files,self.groups_files,self.uchime_count_table_files] if input is not None]
outputs_filter = [output for output in [self.pick_fasta_files,self.pick_names_files,self.pick_groups_files,self.good_count_table_files]\
if output is not None]
removeseqs = PythonFunction(remove_chimeras, cmd_format="{EXE} {ARG} {IN} {OUT}")
removeseqs(arguments=[self.get_exec_path("mothur"),self.output_directory,self.dups],inputs=inputs_filter,outputs=outputs_filter)
import os
from subprocess import Popen, PIPE
from ng6.analysis import Analysis
from jflow.iotypes import OutputFile, OutputFileList, InputFile, InputFileList, Formats
from jflow.abstraction import MultiMap
from weaver.abstraction import Map
from weaver.function import PythonFunction, ShellFunction
from ng6.analysis import Analysis
def get_taxonomy_to_Krona(exec_krona,output_directory,*files):
import glob,re,os
tax_stdout = files[0]
......
......@@ -15,10 +15,8 @@
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
import os,re
from subprocess import Popen, PIPE
# from jflow.component import Component
from jflow.iotypes import OutputFile, OutputFileList, InputFile, InputFileList, Formats
from jflow.abstraction import MultiMap
......@@ -31,7 +29,7 @@ class MothurTrimSeqs(Analysis):
"""
"""
def define_parameters(self, fasta_files, names_files, groups_files, forward_primer, reverse_primer=None, sample_barcodes=None, flip=False, qaverage=25, maxambig=0, maxhomop=8, \
def define_parameters(self, fasta_files, names_files, groups_files, forward_primer, sample_barcodes=None, reverse_primer=None, flip=False, qaverage=25, maxambig=0, maxhomop=8, \
minlength=200, maxlength=600, pdiffs=2,processors=1):
"""
Define sffinfo component parameters.
......@@ -155,10 +153,11 @@ class MothurTrimSeqs(Analysis):
for name,sequence in dict_barcode.items():
f_primers.write('barcode\t%s\t%s\n' % (sequence,name))
f_primers.close()
trimseqs = ShellFunction(self.get_exec_path("mothur") + ' "#trim.seqs(fasta=$1,name=$2,outputdir='+self.output_directory+ \
'/,oligos='+self.primers_file+',flip='+flip+',qaverage='+str(self.qaverage)+',maxambig='+ \
str(self.maxambig)+',maxhomop='+str(self.maxhomop)+',minlength='+str(self.minlength)+",maxlength="+ \
str(self.maxlength)+',pdiffs='+str(self.pdiffs)+')" > $3', cmd_format='{EXE} {IN} {OUT}')
'/,oligos='+self.primers_file+',flip='+flip+',qaverage='+str(self.qaverage)+',maxambig='+ \
str(self.maxambig)+',maxhomop='+str(self.maxhomop)+',minlength='+str(self.minlength)+",maxlength="+ \
str(self.maxlength)+',pdiffs='+str(self.pdiffs)+')" > $3', cmd_format='{EXE} {IN} {OUT}')
trimseqs = MultiMap(trimseqs, inputs=[self.fasta_files,self.names_files], outputs=[self.p_trim_stdout,self.p_trim_fasta_files,self.p_trim_names_files,\
self.p_scrap_fasta_files,self.p_scrap_names_files,self.p_groups_files])
\ No newline at end of file
self.p_scrap_fasta_files,self.p_scrap_names_files,self.p_groups_files])
\ No newline at end of file
......@@ -23,27 +23,26 @@ class RNAdiversity (NG6Workflow):
# clean reads and alignment
if self.args["input_files"].endswith('.sff'):
sff_pre_treatment = self.add_component("MothurShhhFlows", kwargs={'sff_files':self.args["input_files"],'sample_barcodes':self.args["sample_barcodes"]})
trim_fasta = self.add_component("MothurTrimSeqs", kwargs={'fasta_files':sff_pre_treatment.shhh_fasta_files,'names_files':sff_pre_treatment.names_files,\
'groups_files':sff_pre_treatment.groups_files,'forward_primer':self.args["forward_primer"],'reverse_primer':self.args["reverse_primer"],\
'sample_barcodes':self.args["sample_barcodes"]},parent = sff_pre_treatment)
trim_fasta = self.add_component("MothurTrimSeqs", kwargs={'fasta_files':sff_pre_treatment.shhh_fasta_files,'names_files':sff_pre_treatment.names_files,\
'groups_files':sff_pre_treatment.groups_files,'forward_primer':self.args["forward_primer"],'reverse_primer':self.args["reverse_primer"],'sample_barcodes':self.args["sample_barcodes"]}\
,parent = sff_pre_treatment)
# elif self.args["input_files"].endswith('.fasta'):
# trim_fasta = self.add_component("MothurTrimSeqs", kwargs={'fasta_files':self.args["input_files"],'forward_primer':self.args["forward_primer"],\
# 'reverse_primer':self.args["reverse_primer"]}, component_prefix="fasta")
uniqueseqs = self.add_component("MothurUniqueSeqs", [trim_fasta.p_trim_fasta_files, trim_fasta.p_trim_names_files])
alignseqs = self.add_component("MothurAlign", kwargs={'fasta_files':uniqueseqs.unique_fasta_files,'reference_alignment_files':self.args["reference_alignment"],\
'names_files':uniqueseqs.unique_names_files,'groups_files':trim_fasta.p_groups_files},parent = trim_fasta)
filterseqs = self.add_component("MothurFilterSeqs",[alignseqs.good_fasta_files])
uniqueseqs_filter = self.add_component("MothurUniqueSeqs",[filterseqs.filtered_fasta_files,alignseqs.good_names_files],component_prefix="filter")
precluster = self.add_component("MothurPreCluster",[uniqueseqs_filter.unique_fasta_files,uniqueseqs_filter.unique_names_files,alignseqs.good_groups_files])
chimerauchime = self.add_component("MothurChimeraUchime",kwargs={'fasta_files':precluster.precluster_fasta_files,'names_files':precluster.precluster_names_files,\
'groups_files':alignseqs.good_groups_files}, parent = alignseqs)
# classify reads with the provided taxonomy
classifyseqs = self.add_component("MothurClassifySeqs",[chimerauchime.pick_fasta_files,self.args["classify_template"],self.args["classify_taxonomy"],\
chimerauchime.pick_names_files,chimerauchime.pick_groups_files],parent=chimerauchime)
# OTUs approach
distseqs = self.add_component("MothurDistSeqs", [chimerauchime.pick_fasta_files])
cluster = self.add_component("MothurCluster", [distseqs.dist_files,chimerauchime.pick_names_files])
otuanalysis = self.add_component("MothurOTUAnalysis", kwargs={'an_list_files':cluster.an_list_files,'groups_files':chimerauchime.pick_groups_files,\
'names_files':chimerauchime.pick_names_files,'taxonomy_files':classifyseqs.taxonomy_files,'label':self.args["label"],'tree_label':self.args["label"]},parent=chimerauchime)
# pour l'analyse d'OTU peut etre mettre en place direct EspritTree
\ No newline at end of file
uniqueseqs = self.add_component("MothurUniqueSeqs", [trim_fasta.p_trim_fasta_files, trim_fasta.p_trim_names_files])
alignseqs = self.add_component("MothurAlign", kwargs={'fasta_files':uniqueseqs.unique_fasta_files,'reference_alignment_files':self.args["reference_alignment"],\
'names_files':uniqueseqs.unique_names_files,'groups_files':trim_fasta.p_groups_files},parent = trim_fasta)
filterseqs = self.add_component("MothurFilterSeqs",[alignseqs.good_fasta_files])
uniqueseqs_filter = self.add_component("MothurUniqueSeqs",[filterseqs.filtered_fasta_files,alignseqs.good_names_files],component_prefix="filter")
precluster = self.add_component("MothurPreCluster",[uniqueseqs_filter.unique_fasta_files,uniqueseqs_filter.unique_names_files,alignseqs.good_groups_files])
chimerauchime = self.add_component("MothurChimeraUchime",kwargs={'fasta_files':precluster.precluster_fasta_files,'names_files':precluster.precluster_names_files,\
'groups_files':alignseqs.good_groups_files}, parent = alignseqs)
# classify reads with the provided taxonomy
classifyseqs = self.add_component("MothurClassifySeqs",[chimerauchime.pick_fasta_files,self.args["classify_template"],self.args["classify_taxonomy"],\
chimerauchime.pick_names_files,chimerauchime.pick_groups_files],parent=chimerauchime)
# OTUs approach
distseqs = self.add_component("MothurDistSeqs", [chimerauchime.pick_fasta_files])
cluster = self.add_component("MothurCluster", [distseqs.dist_files,chimerauchime.pick_names_files])
otuanalysis = self.add_component("MothurOTUAnalysis", kwargs={'an_list_files':cluster.an_list_files,'groups_files':chimerauchime.pick_groups_files,\
'names_files':chimerauchime.pick_names_files,'taxonomy_files':classifyseqs.taxonomy_files,'label':self.args["label"],'tree_label':self.args["label"]},parent=chimerauchime)
# pour l'analyse d'OTU peut etre mettre en place direct EspritTree
#
# Copyright (C) 2012 INRA
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment