Commit ab5b8634 authored by Penom Nom's avatar Penom Nom
Browse files

delete old mothur component

parent 9b606774
#
# Copyright (C) 2012 INRA
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
from jflow.component import Component
from jflow.iotypes import OutputFile, OutputFileList, InputFile, InputFileList, Formats
from jflow.abstraction import MultiMap
from weaver.function import ShellFunction
class MothurAlignSeqs(Component):
"""
Alignment of our data using the align.seqs command by aligning our data to the SILVA-compatible alignment database reference alignment
"""
def define_parameters(self, fasta_files, reference_alignment_files, search="kmers", ksize=8, align="needleman", processors=1):
"""
Define align.seqs component parameters.
:param fasta_files: fasta files to process
:type fasta_files: str
:param reference_alignment_files: reference alignment files to process
:type reference_alignment_files: str
:param search: mothur offers three methods of finding the template sequence - kmer searching, blast, and suffix tree searching
:type search: str
:param ksize: with the kmer option, change the size of kmers that are used
:type ksize: int
:param align: three alignment methods - blastn, gotoh, and needleman
:type align: str
:param processors: the number of processors to use
:type processors: int
"""
# define parameters
self.processors = processors
# define input files
self.fasta_files = InputFileList(fasta_files, Formats.FASTA)
self.reference_alignment_files = InputFileList(reference_alignment_files, Formats.FASTA)
# define output files
self.align_files = OutputFileList(self.get_outputs('{basename_woext}.align', self.fasta_files), Formats.FASTA)
self.align_report_files = OutputFileList(self.get_outputs('{basename_woext}.align.report', self.fasta_files))
self.stdout = OutputFileList(self.get_outputs('{basename_woext}.stdout', self.fasta_files))
def process(self):
alignseqs = ShellFunction(self.get_exec_path("mothur") + ' "#align.seqs(fasta=$1,reference=$2,outputdir='+self.output_directory + \
'/,processors='+str(self.processors)+')" > $3', cmd_format='{EXE} {IN} {OUT}')
alignseqs = MultiMap(alignseqs, inputs=[self.fasta_files,self.reference_alignment_files], outputs=[self.stdout,self.align_files,self.align_report_files])
#
# Copyright (C) 2012 INRA
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
from jflow.component import Component
from jflow.iotypes import OutputFile, OutputFileList, InputFile, InputFileList, Formats
from jflow.abstraction import MultiMap
from weaver.function import ShellFunction
class MothurClassifyOtu(Component):
"""
Assign taxonomy consensus for sequences in each OTU.
"""
def define_parameters(self, an_list_files, taxonomy_files, names_files=None, groups_files=None, count_table_files=None, label=0.01):
"""
:param an_list_files: an_list files to process
:type an_list_files: str
:param taxonomy_files: taxonomy files to process
:type taxonomy_files: str
:param names_files: names files to process
:type names_files: string
:param groups_files: groups files to process
:type groups_files: string
:param label: label
:type label: int
"""
#define parameters
self.label = label
#define input files
self.an_list_files = InputFileList(an_list_files)
self.taxonomy_files = InputFileList(taxonomy_files, Formats.MOTHUR_TAXONOMY)
if names_files : self.names_files = InputFileList(names_files, Formats.MOTHUR_NAMES)
else : self.names_files = None
if groups_files: self.groups_files = InputFileList(groups_files, Formats.MOTHUR_GROUPS)
else: self.groups_files = None
if count_table_files: self.count_table_files = InputFileList(count_table_files, Formats.MOTHUR_COUNT_TABLE)
else: self.count_table_files = None
#define output files
self.cons_taxonomy_files = OutputFileList(self.get_outputs('{basename_woext}.'+str(self.label)+'.cons.taxonomy', self.an_list_files),Formats.MOTHUR_TAXONOMY)
self.cons_tax_taxonomy_files = OutputFileList(self.get_outputs('{basename_woext}.'+str(self.label)+'.cons.tax.summary', self.an_list_files))
self.stdout = OutputFileList(self.get_outputs('{basename_woext}.stdout', self.an_list_files))
def process(self):
print ">>>Process classify.otu"
if self.names_files and not self.groups_files:
cluster = ShellFunction(self.get_exec_path("mothur") + ' "#classify.otu(list=$1,taxonomy=$2,name=$3,outputdir='+self.output_directory+'/,\
label='+str(self.label)+')" > $4',cmd_format='{EXE} {IN} {OUT}')
cluster = MultiMap(cluster, inputs=[self.an_list_files,self.taxonomy_files,self.names_files,], outputs=[self.stdout,self.cons_taxonomy_files,\
self.cons_tax_taxonomy_files])
elif self.names_files and self.groups_files:
cluster = ShellFunction(self.get_exec_path("mothur") + ' "#classify.otu(list=$1,taxonomy=$2,name=$3,group=$4,outputdir='+self.output_directory+'/,\
label='+str(self.label)+')" > $5',cmd_format='{EXE} {IN} {OUT}')
cluster = MultiMap(cluster, inputs=[self.an_list_files,self.taxonomy_files,self.names_files,self.groups_files], outputs=[self.stdout,self.cons_taxonomy_files,\
self.cons_tax_taxonomy_files])
elif self.count_table_files:
cluster = ShellFunction(self.get_exec_path("mothur") + ' "#classify.otu(list=$1,taxonomy=$2,count=$3,outputdir='+self.output_directory+'/,\
label='+str(self.label)+')" > $4',cmd_format='{EXE} {IN} {OUT}')
cluster = MultiMap(cluster, inputs=[self.an_list_files,self.taxonomy_files,self.count_table_files,], outputs=[self.stdout,self.cons_taxonomy_files,\
self.cons_tax_taxonomy_files])
else:
cluster = ShellFunction(self.get_exec_path("mothur") + ' "#classify.otu(list=$1,taxonomy=$2,outputdir='+self.output_directory+'/,label='+str(self.label)+')"\
> $3',cmd_format='{EXE} {IN} {OUT}')
cluster = MultiMap(cluster, inputs=[self.an_list_files,self.taxonomy_files], outputs=[self.stdout,self.cons_taxonomy_files,self.cons_tax_taxonomy_files])
\ No newline at end of file
#
# Copyright (C) 2012 INRA
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
from jflow.component import Component
from jflow.iotypes import OutputFile, OutputFileList, InputFile, InputFileList, Formats
from jflow.abstraction import MultiMap
from weaver.function import PythonFunction, ShellFunction
def classifyseqs_with_sample(exec_path, output_directory, *files):
import os
krona_files = files[0]
taxonomy_files = files[1]
names_files = files[2]
groups_files = files[3]
dst_names_files = os.path.splitext(taxonomy_files)[0]+'.names'
dst_groups_files = os.path.splitext(taxonomy_files)[0]+'.groups'
if len(files) == 4:
if not os.path.islink(dst_names_files):
os.symlink(names_files,dst_names_files)
if not os.path.islink(dst_groups_files):
os.symlink(groups_files,dst_groups_files)
#if names and groups files are not actualize
list_seqs_files = files[4]
unique_seqs_final = [line for line in open(list_seqs_files).read().splitlines()]
dico_names = dict([[line.split()[0],line.split()[1].split(',')] for line in open(names_files).readlines()])
seqs_remove = list(set(dico_names.keys())-set(unique_seqs_final) )
map(lambda key: dico_names.pop(key),seqs_remove)
dico_groups = dict([line.split() for line in open(groups_files).readlines()])
allsequences = [item for sublist in dico_names.values() for item in sublist]
seqs_remove2 = list(set(dico_groups.keys())-set(allsequences))
map(lambda key: dico_groups.pop(key),seqs_remove2)
names_final_files = open(dst_names_files,'w')
[names_final_files.write('%s\t%s\n' % (k,','.join(v))) for k,v in dico_names.items()]
names_final_files.close()
groups_final_files = open(dst_groups_files,'w')
[groups_final_files.write('%s\t%s\n' % (id_seq,sample)) for id_seq,sample in dico_groups.items()]
groups_final_files.close()
cmd = str(exec_path)+ " " +str(taxonomy_files) + ' -o ' + str(krona_files)
p = os.popen(cmd)
class MothurKrona(Component):
def define_parameters(self, taxonomy_files, names_files=None, groups_files=None, fasta_files=None):
"""
:param taxonomy_files: taxonomy files to process
:type taxonomy_files: str
:param names_files: names files to process
:type names_files: string
:param groups_files: groups files to process
:type groups_files: string
"""
# define input files
self.taxonomy_files = InputFileList(taxonomy_files, Formats.MOTHUR_TAXONOMY)
if names_files : self.names_files = InputFileList(names_files, Formats.MOTHUR_NAMES)
else : self.names_files = None
if groups_files : self.groups_files = InputFileList(groups_files, Formats.MOTHUR_GROUPS)
else : self.groups_files = None
if fasta_files : self.fasta_files = InputFileList(fasta_files, Formats.FASTA)
else : self.fasta_files = None
# define output files
self.krona_files = OutputFileList(self.get_outputs('{basename_woext}.html', self.taxonomy_files), Formats.HTML)
if fasta_files : self.listseqs_files = OutputFileList(self.get_outputs('{basename_woext}.accnos', self.fasta_files))
else : self.listseqs_files = None
self.stdout_listseqs = OutputFileList(self.get_outputs('{basename_woext}.stdout', self.taxonomy_files))
self.stdout = OutputFileList(self.get_outputs('{basename_woext}.stdout', self.taxonomy_files))
def process(self):
if self.names_files and self.groups_files:
if self.fasta_files:
listseqs = ShellFunction(self.get_exec_path("mothur") + ' "#list.seqs(fasta=$1,outputdir='+self.output_directory+'/)" > $2',cmd_format='{EXE} {IN} {OUT}')
listseqs = MultiMap(listseqs, inputs=[self.fasta_files], outputs=[self.stdout_listseqs,self.listseqs_files])
inputs_filter = [input for input in [self.taxonomy_files,self.names_files, self.groups_files,self.listseqs_files] if input is not None]
krona = PythonFunction(classifyseqs_with_sample, cmd_format="{EXE} {ARG} {OUT} {IN}")
krona(arguments=[self.get_exec_path("ImportMothurTaxonomy.pl"),self.output_directory],outputs=[self.krona_files],inputs=inputs_filter)
else:
krona = ShellFunction(self.get_exec_path("ImportMothurTaxonomy.pl") + ' $1 -o $3 > $2', cmd_format='{EXE} {IN} {OUT}')
krona = MultiMap(krona, inputs=[self.taxonomy_files], outputs=[self.stdout,self.krona_files])
\ No newline at end of file
from jflow.component import Component
from jflow.iotypes import OutputFile, OutputFileList, InputFile, InputFileList, Formats
from jflow.abstraction import MultiMap
from weaver.function import ShellFunction
class MothurMakeShared(Component):
"""
Create a table that indicates the number of times an OTU shows up in each sample
"""
def define_parameters(self, an_list_files, groups_files=None, count_table_files=None, label="unique-0.01"):
"""
:param an_list_files: an list files to process
:type an_list_files: str
:param groups_files: groups files to process
:type groups_files: str
:param label :
:type label : str
"""
#define parameters
self.label = label
#define input files
self.an_list_files = InputFileList(an_list_files)
if groups_files : self.groups_files = InputFileList(groups_files, Formats.MOTHUR_GROUPS)
else : self.groups_files = None
if count_table_files : self.count_table_files = InputFileList(count_table_files, Formats.MOTHUR_COUNT_TABLE)
else : self.count_table_files = None
#define output files
self.an_shared_files = OutputFileList(self.get_outputs('{basename_woext}.shared', self.an_list_files))
self.stdout = OutputFileList(self.get_outputs('{basename_woext}.stdout', self.an_list_files))
def process(self):
print ">>>Process make.shared"
if self.groups_files :
makeshared = ShellFunction(self.get_exec_path("mothur") + ' "#make.shared(list=$1,group=$2,label='+self.label+',outputdir='+self.output_directory+'/)" > $3',\
cmd_format='{EXE} {IN} {OUT}')
makeshared = MultiMap(makeshared, inputs=[self.an_list_files,self.groups_files], outputs=[self.stdout,self.an_shared_files])
if self.count_table_files :
makeshared = ShellFunction(self.get_exec_path("mothur") + ' "#make.shared(list=$1,count=$2,label='+self.label+',outputdir='+self.output_directory+'/)" > $3',\
cmd_format='{EXE} {IN} {OUT}')
makeshared = MultiMap(makeshared, inputs=[self.an_list_files,self.count_table_files], outputs=[self.stdout,self.an_shared_files])
\ No newline at end of file
from jflow.component import Component
from jflow.iotypes import OutputFile, OutputFileList, InputFile, InputFileList, Formats
from jflow.abstraction import MultiMap
from weaver.function import ShellFunction
class MothurRarefactionSingle(Component):
def define_parameters(self, shared_files, label="unique-0.10-0.05-0.03", freq=100):
"""
:param shared_files: shared files to process
:type shared_files: str
:param label :
:type label : st
:param freq :
:type freq : int
"""
#define parameters
self.label = label
self.freq = freq
#define input files
self.shared_files = InputFileList(shared_files)
#define output files
self.rarefaction_single_files = OutputFileList(self.get_outputs('{basename_woext}.groups.rarefaction', self.shared_files))
self.stdout = OutputFileList(self.get_outputs('{basename_woext}.stdout', self.shared_files))
def process(self):
print ">>>Process rarefaction.single"
rarefactionsingle = ShellFunction(self.get_exec_path("mothur") + ' "#rarefaction.single(shared=$1,label='+self.label+',freq='+str(self.freq)+',outputdir='+self.output_directory+'/)\
" > $2',cmd_format='{EXE} {IN} {OUT}')
rarefactionsingle = MultiMap(rarefactionsingle, inputs=[self.shared_files], outputs=[self.stdout,self.rarefaction_single_files])
\ No newline at end of file
#
# Copyright (C) 2012 INRA
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
# from jflow.component import Component
import os
from subprocess import Popen, PIPE
from collections import Counter
from jflow.iotypes import OutputFile, OutputFileList, InputFile, InputFileList, Formats
from jflow.abstraction import MultiMap
from weaver.function import PythonFunction, ShellFunction
from ng6.analysis import Analysis
def screen_seqs(exec_path, output_directory, processors, criteria, optmize_start_end_log, param_cmd, *files):
fasta_path = files[0]
if param_cmd == ",":
param_cmd = ""
if optmize_start_end_log == "True":
log_summary_files = files[1]
with open(log_summary_files) as log:
for line in log:
if line.startswith(str(criteria)+"%-tile:"):
optimize_start, optimize_end = line.split()[1:3]
# if names files and groups files are True
if len (files) == 8:
trim_names_path = files[2]
groups_path = files [3]
cmd = str(exec_path)+' "#screen.seqs(fasta='+str(fasta_path)+',name='+str(trim_names_path)+',group='+str(groups_path)+',outputdir='+str(output_directory)+'/,\
start='+str(optimize_start)+',end='+str(optimize_end)+',processors='+str(processors)+param_cmd+')"'
elif len(files) == 6:
# if names files is True but without groups files
if os.path.splitext(files[2])[1] == '.names':
trim_names_path = files[2]
cmd = str(exec_path)+' "#screen.seqs(fasta='+str(fasta_path)+',name='+str(trim_names_path)+',outputdir='+str(output_directory)+'/,start='+str(optimize_start)+\
',end='+str(optimize_end)+',processors='+str(processors)+param_cmd+')"'
#if names files is False and groups files is True
elif os.path.splitext(files[2])[1] == '.groups':
groups_path = files[2]
cmd = str(exec_path)+' "#screen.seqs(fasta='+str(fasta_path)+',group='+str(groups_path)+',outputdir='+str(output_directory)+'/,start='+str(optimize_start)+\
',end='+str(optimize_end)+',processors='+str(processors)+param_cmd+')"'
#if names files and groups files are False but count_file is True
else:
count_table_path = files[2]
cmd = str(exec_path)+' "#screen.seqs(fasta='+str(fasta_path)+',count='+str(count_table_path)+',outputdir='+str(output_directory)+'/,start='+str(optimize_start)+\
',end='+str(optimize_end)+',processors='+str(processors)+param_cmd+')"'
# if names files, groups files and count_table are False
else:
cmd = str(exec_path)+' "#screen.seqs(fasta='+str(fasta_path)+',outputdir='+str(output_directory)+'/,start='+str(optimize_start)+\
',end='+str(optimize_end)+',processors='+str(processors)+param_cmd+')"'
else:
# if names files and groups files are True
if len (files) == 7:
trim_names_path = files[1]
groups_path = files [2]
cmd = str(exec_path)+' "#screen.seqs(fasta='+str(fasta_path)+',name='+str(trim_names_path)+',group='+str(groups_path)+',outputdir='+str(output_directory)+'/\
,processors='+str(processors)+param_cmd+')"'
elif len(files) == 5:
# if names files is True but without groups files
if os.path.splitext(files[1])[1] == '.names':
trim_names_path = files[1]
cmd = str(exec_path)+' "#screen.seqs(fasta='+str(fasta_path)+',names='+str(trim_names_path)+',outputdir='+str(output_directory)+'/,processors='+str(processors)+\
param_cmd+')"'
#if names files is False and groups files is True
elif os.path.splitext(files[1])[1] == '.groups':
groups_path = files[1]
cmd = str(exec_path)+' "#screen.seqs(fasta='+str(fasta_path)+',group='+str(groups_path)+',outputdir='+str(output_directory)+'/,processors='+str(processors)+\
param_cmd+')"'
#if names files and groups files are False but count_file is True
else:
count_table_path = files[1]
cmd = str(exec_path)+' "#screen.seqs(fasta='+str(fasta_path)+',count='+str(count_table_path)+',outputdir='+str(output_directory)+'/,processors='+str(processors)+\
param_cmd+')"'
# if names files, groups files and count_table are False
else:
cmd = str(exec_path)+' "#screen.seqs(fasta='+str(fasta_path)+',outputdir='+str(output_directory)+'/,processors='+str(processors)+param_cmd+')"'
p = os.popen(cmd)
class MothurScreenSeqs(Analysis):
def define_parameters(self, fasta_files, reference_alignment_files, names_files=None, groups_files=None, count_table_files=None, optmize_start_end_log=True, start=None,\
end=None, maxambig=None,maxlength=None, maxhomop=None, criteria=97.5, search="kmer", ksize=8, align="needleman", processors=1):
"""
Define screen.seqs component parameters.
:param fasta_files: fasta files to process
:type fasta_files: str
:param names_files: names files to process
:type names_files: str
:param groups_files: groups files to process
:type groups_files: str
:param count_table_files: count table files to process
:type count_table_files: str
:param optmize_start_end_log: use the values "start" and "end" of the summary.seqs
:type optmize_start_end_log: boolean
:param start: remove sequences that start after position "start"
:type start: int
:param end: remove sequences that start after position "end"
:type end: int
:param maxambig: remove any sequences with any ambiguous bases
:type maxambig: int
:param maxlength: remove sequences longer than the value "maxlength"
:type maxlength: int
:param maxhomop: remove any sequences with excessively long homopolymers
:type maxhomop: int
:param criteria: percent : 2.5, 25, 75, 97.5
:type criteria: int
:param reference_alignment_files: reference alignment files to process
:type reference_alignment_files: str
:param search: mothur offers three methods of finding the template sequence - kmer searching, blast, and suffix tree searching
:type search: str
:param ksize: with the kmer option, change the size of kmers that are used
:type ksize: int
:param align: three alignment methods - blastn, gotoh, and needleman
:type align: str
:param processors: the number of processors to use
:type processors: int
"""
# define parameters
self.optmize_start_end_log = optmize_start_end_log
self.start = start
self.end = end
self.criteria = criteria
self.maxambig = maxambig
self.maxlength = maxlength
self.maxhomop = maxhomop
self.search = search
self.ksize = ksize
self.align = align
self.processors = processors
parameters = {'start':self.start,'end':self.end,'maxambig':self.maxambig,'maxlength':self.maxlength,'maxhomop':self.maxhomop}
self.parameters = parameters
# define input files
self.reference_alignment_files = InputFileList(reference_alignment_files, Formats.FASTA)
self.fasta_files = InputFileList(fasta_files, Formats.FASTA)
if names_files: self.names_files = InputFileList(names_files, Formats.MOTHUR_NAMES)
else: self.names_files = None
if groups_files: self.groups_files = InputFileList(groups_files, Formats.MOTHUR_GROUPS)
else: self.groups_files = None
if count_table_files: self.count_table_files = InputFileList(count_table_files, Formats.MOTHUR_COUNT_TABLE)
else: self.count_table_files = None
# define align output files
self.align_files = OutputFileList(self.get_outputs('{basename_woext}.align', self.fasta_files), Formats.FASTA)
self.align_report_files = OutputFileList(self.get_outputs('{basename_woext}.align.report', self.fasta_files))
self.stdout = OutputFileList(self.get_outputs('{basename_woext}.stdout', self.fasta_files))
# define screenseqs output files
if self.optmize_start_end_log: self.log = OutputFileList(self.get_outputs('{basename_woext}.log', self.fasta_files))
else : self.log = None
extension_file = map(lambda x:os.path.basename(x).split('.')[-1],self.align_files)
self.good_fasta_files = OutputFileList(self.get_outputs('{basename_woext}.good.'+extension_file[0], self.align_files), Formats.FASTA)
self.bad_accnos_files = OutputFileList(self.get_outputs('{basename_woext}.bad.accnos', self.align_files), Formats.MOTHUR_ACCNOS)
if names_files:self.good_names_files = OutputFileList(self.get_outputs('{basename_woext}.good.names', self.names_files), Formats.MOTHUR_NAMES)
else: self.good_names_files = None
if groups_files:self.good_groups_files = OutputFileList(self.get_outputs('{basename_woext}.good.groups', self.groups_files), Formats.MOTHUR_GROUPS)
else: self.good_groups_files = None
if count_table_files:self.good_count_table_files = OutputFileList(self.get_outputs('{basename_woext}.good.count_table', self.count_table_files), Formats.MOTHUR_COUNT_TABLE)
else: self.good_count_table_files = None
def define_analysis(self):
self.name = "MothurAlign"
self.description = "Align reads against a 16S bank and clean aligned reads."
self.software = "mothur"
self.options = ""
def get_version(self):
cmd = [self.get_exec_path("mothur"), "-version"]
p = Popen(cmd, stdout=PIPE, stderr=PIPE)
stdout, stderr = p.communicate()
return stdout.split()[1].split('=')[1]
def post_process(self):
if self.groups_files:
for group, good_group in zip(self.groups_files,self.good_groups_files):
file_groups = dict([line.split() for line in open(group).readlines()])
count_group = Counter( v for v in file_groups.itervalues())
good_file_groups = dict([line.split() for line in open(good_group).readlines()])
good_count_group = Counter( v for v in good_file_groups.itervalues())
for sample,count in count_group.items():
self._add_result_element(sample, "nb_aligned_reads", count)
for sample,count in good_count_group.items():
self._add_result_element(sample, "nb_after_screen", count)
if self.count_table_files:
for count_file, good_count_file in zip(self.count_table_files,self.good_count_table_files):
count_table_lines = open(count_file).readlines()
samples = count_table_lines.pop(0).split()[2:]
count = [map(int,line.split()[2:]) for line in count_table_lines]
group_count = dict(zip(samples,[sum(a) for a in zip(*count)]))
good_count_table_lines = open(good_count_file).readlines()
good_samples = good_count_table_lines.pop(0).split()[2:]
good_count = [map(int,line.split()[2:]) for line in good_count_table_lines]
good_group_count = dict(zip(good_samples,[sum(a) for a in zip(*good_count)]))
for sample, count in group_count.items():
self._add_result_element(sample, "nb_aligned_reads", count)
for sample, count in good_group_count.items():
self._add_result_element(sample, "nb_after_screen", count)
def process(self):
alignseqs = ShellFunction(self.get_exec_path("mothur") + ' "#align.seqs(fasta=$1,reference=$2,search='+self.search+',ksize='+str(self.ksize)+',align='+self.align+\
',outputdir='+self.output_directory +'/,processors='+str(self.processors)+')" > $3', cmd_format='{EXE} {IN} {OUT}')
alignseqs = MultiMap(alignseqs, inputs=[self.fasta_files,self.reference_alignment_files], outputs=[self.stdout,self.align_files,self.align_report_files])
if self.optmize_start_end_log:
if self.names_files:
summary = ShellFunction(self.get_exec_path("mothur") + ' "#summary.seqs(fasta=$1,name=$2,outputdir='+self.output_directory+'/,processors='+str(self.processors)+')" > $3',\
cmd_format='{EXE} {IN} {OUT}')
summary = MultiMap(summary, inputs=[self.align_files,self.names_files], outputs=[self.log])
else:
summary = ShellFunction(self.get_exec_path("mothur") + ' "#summary.seqs(fasta=$1,outputdir='+self.output_directory+'/,processors='+str(self.processors)+')" > $2',\
cmd_format='{EXE} {IN} {OUT}')
summary = MultiMap(summary, inputs=[self.align_files], outputs=[self.log])
inputs_filter = [input for input in [self.align_files, self.log, self.names_files, self.groups_files,self.count_table_files] if input is not None]
outputs_filter = [output for output in [self.good_fasta_files,self.bad_accnos_files,self.good_names_files,self.good_groups_files,self.good_count_table_files] if output is not None]
params = ','+','.join(['%s=%s' % (key, value) for (key, value) in self.parameters.items() if value is not None])
ARG = " ".join([str(arg) for arg in [self.get_exec_path("mothur"),self.output_directory,self.processors,self.criteria,self.optmize_start_end_log,params]])
screenseqs = PythonFunction(screen_seqs,cmd_format="{EXE} " + ARG + " {IN} {OUT}")
screenseqs = MultiMap(screenseqs,inputs=inputs_filter,outputs=outputs_filter)
\ No newline at end of file
#
# Copyright (C) 2012 INRA
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
from jflow.component import Component
from jflow.iotypes import OutputFile, OutputFileList, InputFile, InputFileList, Formats
from jflow.abstraction import MultiMap