Commit 8165af41 authored by Penom Nom's avatar Penom Nom
Browse files

- Clean krona call.

- Add error file.
parent 6aa7770f
......@@ -27,8 +27,76 @@ from weaver.function import PythonFunction, ShellFunction
from ng6.analysis import Analysis
def get_taxonomy_to_Krona(exec_krona,output_directory,*files):
import glob,re,os
def get_taxonomy_to_Krona( exec_krona, output_directory, *files ):
import glob,re,os,sys
def get_otu_count_by_sample_cutoff( shared_files ):
"""
@summary : Returns the count for each OTU by cutoff by sample.
@param : [string] the mothur shared file (@see mothur make.shared).
@return : [dict] the count for each OTU by cutoff by sample. Example :
{
'sample1' : {
'0.03':["otu1;12", "otu3;18"],
'0.05':["otu1;2", "otu3;30"]
},
'sample2' : {
'0.03':["otu1;8", "otu3;1"],
'0.05':["otu1;80", "otu3;3"]
}
}
"""
dico_otus = {}
lines = open(shared_files).readlines()
firstline = lines.pop(0).split()[3:]
for otu in firstline:
dico_otus[firstline.index(otu)+4] = otu
dico_samples_level = {}
for line in lines:
temp = {}
line = line.split()
label = line[0]
group = line[1]
nb_otus = line [3:]
list_otus = []
for i in range( len(nb_otus) ):
if nb_otus[i] != '0':
list_otus.append( dico_otus[i+4]+";"+nb_otus[i] )
temp[label] = list_otus
if dico_samples_level.has_key(group):
dico_samples_level[group].update(temp)
else:
dico_samples_level[group] = temp
return dico_samples_level
def get_otu_tax_by_cutoff( taxonomy_files ):
"""
@summary : Returns the taxonomy of each OTU by cutoff.
@param taxonomy_files : [string] the mothur taxonomy file (@see mothur classify.otu).
@return : [dict] the taxonomy of each OTU by cutoff. Example :
{
'0.03' : {
'otu1' : "Bacteria",
'otu3' : "Nostoc"
},
'0.05' : {
'otu1' : "Bacteria",
'otu3': "Nostoc"
}
}
"""
otu_taxonomy_by_cutoff = {}
for current_tax_file in taxonomy_files:
label = os.path.splitext(os.path.basename(current_tax_file))[0]
lines = open(current_tax_file).readlines()
header = lines.pop(0)
temp = {}
for line in lines:
otu_id, otu_count, otu_tax = line.split()
temp[otu_id] = otu_tax
otu_taxonomy_by_cutoff[label] = temp
return otu_taxonomy_by_cutoff
tax_stdout = files[0]
shared_files = files[1]
taxonomy_files = glob.glob(output_directory+"/*.taxonomy")
......@@ -44,50 +112,21 @@ def get_taxonomy_to_Krona(exec_krona,output_directory,*files):
taxonomy_outputs = []
for label, tax_file in result.items():
source = tax_file
destination = os.path.join(output_directory,label+".taxonomy")
destination = os.path.join(output_directory, label+".taxonomy")
taxonomy_outputs.append(destination)
os.rename(source, destination)
#get all otus for each samples and each levels
lines = open(shared_files).readlines()
firstline = lines.pop(0).split()[3:]
dico_otus = {}
for otu in firstline:
dico_otus[firstline.index(otu)+4] = otu
dico_samples_level = {}
for line in lines:
temp = {}
line = line.split()
label = line[0]
group = line[1]
nb_otus = line [3:]
list_otus = []
for i in range(len(nb_otus)):
if nb_otus[i] !='0':
list_otus.append(dico_otus[i+4]+";"+nb_otus[i])
temp[label] = list_otus
if dico_samples_level.has_key(group):
dico_samples_level[group].update(temp)
else:
dico_samples_level[group] = temp
#get all taxonomy for each levels
dico_level_otu_taxonomy = {}
for taxanomy_file in taxonomy_outputs:
label = os.path.splitext(os.path.basename(taxanomy_file))[0]
lines = open(taxanomy_file).readlines()
header = lines.pop(0)
temp = {}
for line in lines:
line = line.split()
num_otu = line[0]
lineage = line[2]
temp[num_otu] = lineage
dico_level_otu_taxonomy[label] = temp
#write level taxonomy files for each samples
# Get count for each OTU by cutoff by sample
otu_count_by_sample = get_otu_count_by_sample_cutoff( shared_files )
# Get all taxonomy for each levels
otu_taxonomy_by_cutoff = get_otu_tax_by_cutoff( taxonomy_outputs )
# Write level taxonomy files for each samples
dico_files = {}
for sample,levels in dico_samples_level.items():
for sample,levels in otu_count_by_sample.items():
for level,otus_desc in levels.items():
name_file = os.path.join(output_directory,sample+"-"+level+".taxonomy")
name_file = os.path.join( output_directory, sample+"-"+level+".taxonomy" )
if dico_files.has_key(sample):
dico_files[sample].append(name_file)
else:
......@@ -96,13 +135,13 @@ def get_taxonomy_to_Krona(exec_krona,output_directory,*files):
fic_taxonomy = open(name_file,'w')
fic_taxonomy.write('%s\t%s\t%s\n' % ('OTU','Size','Taxonomy'))
for item in otus_desc:
item = item.split(';')
otu = item[0]
nb_otu = item[1]
fic_taxonomy.write('%s\t%s\t%s\n' % (otu,nb_otu,dico_level_otu_taxonomy[level][otu]))
otu_id, otu_count = item.split(';')
fic_taxonomy.write( '%s\t%s\t%s\n' % (otu_id, otu_count, otu_taxonomy_by_cutoff[level][otu_id]) )
fic_taxonomy.close()
# Execute import mothur taxonomy
for samp,name in dico_files.items():
cmd = exec_krona+" "+" ".join(name)+" -o "+ os.path.join(output_directory,samp+".krona.html")
cmd = exec_krona+" "+" ".join(name)+" -o "+ os.path.join(output_directory, samp+".krona.html")
p = os.popen(cmd)
class MothurOTUAnalysis(Analysis):
......@@ -159,6 +198,8 @@ class MothurOTUAnalysis(Analysis):
user_labels = [lines.split('\t')[0] for lines in open(self.an_list_files[0]).readlines()]
good_labels = list(set(user_labels).intersection(set(labels)))
self.label = good_labels
# define stderr
self.stderr = os.path.join(self.output_directory, "otuanalysis.stderr")
# define makeshared output files
self.shared_files = OutputFileList(self.get_outputs('{basename_woext}.shared', self.an_list_files))
self.makeshared_stdout = OutputFileList(self.get_outputs('{basename_woext}.makeshared.stdout', self.an_list_files))
......@@ -322,6 +363,7 @@ class MothurOTUAnalysis(Analysis):
biom_files.append(os.path.join(self.output_directory, file))
# First add OTU's values
self._save_file(shared_file, "OTU.shared")
[otus, nb_seq, labels] = self._get_otu_values(shared_file)
for sample in otus.keys():
for level in otus[sample].keys():
......@@ -391,57 +433,57 @@ class MothurOTUAnalysis(Analysis):
#Divertsity
if self.groups_files :
makeshared = ShellFunction(self.get_exec_path("mothur") + ' "#make.shared(list=$1,group=$2,label='+label+',outputdir='+self.output_directory+'/)" > $3',\
cmd_format='{EXE} {IN} {OUT}')
cmd_format='{EXE} {IN} {OUT} 2>> ' + self.stderr)
makeshared = MultiMap(makeshared, inputs=[self.an_list_files,self.groups_files], outputs=[self.makeshared_stdout,self.shared_files])
if self.count_table_files :
makeshared = ShellFunction(self.get_exec_path("mothur") + ' "#make.shared(list=$1,count=$2,label='+label+',outputdir='+self.output_directory+'/)" > $3',\
cmd_format='{EXE} {IN} {OUT}')
cmd_format='{EXE} {IN} {OUT} 2>> ' + self.stderr)
makeshared = MultiMap(makeshared, inputs=[self.an_list_files,self.count_table_files], outputs=[self.makeshared_stdout,self.shared_files])
for l in self.label:
makebiom = ShellFunction(self.get_exec_path("mothur") + ' "#make.biom(shared=$1,label='+l+',outputdir='+self.output_directory+'/)" > $2',\
cmd_format='{EXE} {IN} {OUT}')
cmd_format='{EXE} {IN} {OUT} 2>> ' + self.stderr)
makebiom(inputs=[self.shared_files], outputs=[self.ho_files[l], self.hb_files[l]])
# Alpha diversity analysis
summarysingle = ShellFunction(self.get_exec_path("mothur") + ' "#summary.single(shared=$1,calc='+self.calc+',label='+label+',outputdir='+\
self.output_directory+'/)" > $2',cmd_format='{EXE} {IN} {OUT}')
self.output_directory+'/)" > $2',cmd_format='{EXE} {IN} {OUT} 2>> ' + self.stderr)
summarysingle = MultiMap(summarysingle, inputs=[self.shared_files], outputs=[self.summarysingle_stdout,self.summary_single_files])
rarefactionsingle = ShellFunction(self.get_exec_path("mothur") + ' "#rarefaction.single(shared=$1,label='+label+',freq='+str(self.freq)+',outputdir='+\
self.output_directory+'/)" > $2',cmd_format='{EXE} {IN} {OUT}')
self.output_directory+'/)" > $2',cmd_format='{EXE} {IN} {OUT} 2>> ' + self.stderr)
rarefactionsingle = MultiMap(rarefactionsingle, inputs=[self.shared_files], outputs=[self.rarefactionsingle_stdout,self.rarefaction_single_files])
# Beta diversity analysis
# Classify OTUs
if self.fasta_subsample_files and self.count_table_files:
subsample = ShellFunction(self.get_exec_path("mothur") + ' "#sub.sample(fasta=$1,count=$2,outputdir='+self.output_directory+'/)" > $3',cmd_format='{EXE} {IN} {OUT}')
subsample = ShellFunction(self.get_exec_path("mothur") + ' "#sub.sample(fasta=$1,count=$2,outputdir='+self.output_directory+'/)" > $3',cmd_format='{EXE} {IN} {OUT} 2>> ' + self.stderr)
subsample = MultiMap(subsample, inputs=[self.fasta_files,self.count_table_files], outputs=[self.subsample_stdout,self.subsample_fasta_files,self.subsample_count_files])
elif self.an_list_files and self.count_table_files:
subsample = ShellFunction(self.get_exec_path("mothur") + ' "#sub.sample(list=$1,count=$2,outputdir='+self.output_directory+'/)" > $3',cmd_format='{EXE} {IN} {OUT}')
subsample = ShellFunction(self.get_exec_path("mothur") + ' "#sub.sample(list=$1,count=$2,outputdir='+self.output_directory+'/)" > $3',cmd_format='{EXE} {IN} {OUT} 2>> ' + self.stderr)
subsample = MultiMap(subsample, inputs=[self.an_list_files,self.count_table_files], outputs=[self.subsample_stdout,self.subsample_list_files,self.subsample_count_files])
elif self.fasta_subsample_files and not self.count_table_files:
subsample = ShellFunction(self.get_exec_path("mothur") + ' "#sub.sample(fasta=$1,outputdir='+self.output_directory+'/)" > $2',cmd_format='{EXE} {IN} {OUT}')
subsample = ShellFunction(self.get_exec_path("mothur") + ' "#sub.sample(fasta=$1,outputdir='+self.output_directory+'/)" > $2',cmd_format='{EXE} {IN} {OUT} 2>> ' + self.stderr)
subsample = MultiMap(subsample, inputs=[self.fasta_files], outputs=[self.subsample_stdout,self.subsample_fasta_files])
elif self.an_list_files and not self.count_table_files:
subsample = ShellFunction(self.get_exec_path("mothur") + ' "#sub.sample(list=$1,outputdir='+self.output_directory+'/)" > $2',cmd_format='{EXE} {IN} {OUT}')
subsample = ShellFunction(self.get_exec_path("mothur") + ' "#sub.sample(list=$1,outputdir='+self.output_directory+'/)" > $2',cmd_format='{EXE} {IN} {OUT} 2>> ' + self.stderr)
subsample = MultiMap(subsample, inputs=[self.an_list_files], outputs=[self.subsample_stdout,self.subsample_list_files])
if self.names_files and not self.groups_files:
classifyotu = ShellFunction(self.get_exec_path("mothur") + ' "#classify.otu(list=$1,taxonomy=$2,name=$3,outputdir='+self.output_directory+'/,\
label='+str(label)+')" > $4',cmd_format='{EXE} {IN} {OUT}')
label='+str(label)+')" > $4',cmd_format='{EXE} {IN} {OUT} 2>> ' + self.stderr)
classifyotu = MultiMap(classifyotu, inputs=[self.an_list_files,self.taxonomy_files,self.names_files,], outputs=[self.classifyotu_stdout])
elif self.names_files and self.groups_files:
classifyotu = ShellFunction(self.get_exec_path("mothur") + ' "#classify.otu(list=$1,taxonomy=$2,name=$3,group=$4,outputdir='+self.output_directory+'/,\
label='+str(label)+')" > $5',cmd_format='{EXE} {IN} {OUT}')
label='+str(label)+')" > $5',cmd_format='{EXE} {IN} {OUT} 2>> ' + self.stderr)
classifyotu = MultiMap(classifyotu, inputs=[self.an_list_files,self.taxonomy_files,self.names_files,self.groups_files], outputs=[self.classifyotu_stdout])
elif self.count_table_files:
classifyotu = ShellFunction(self.get_exec_path("mothur") + ' "#classify.otu(list=$1,taxonomy=$2,count=$3,outputdir='+self.output_directory+'/,\
label='+str(label)+')" > $4',cmd_format='{EXE} {IN} {OUT}')
label='+str(label)+')" > $4',cmd_format='{EXE} {IN} {OUT} 2>> ' + self.stderr)
classifyotu = MultiMap(classifyotu, inputs=[self.an_list_files,self.taxonomy_files,self.count_table_files,], outputs=[self.classifyotu_stdout])
else:
classifyotu = ShellFunction(self.get_exec_path("mothur") + ' "#classify.otu(list=$1,taxonomy=$2,outputdir='+self.output_directory+'/,label='+str(label)+')"\
> $3',cmd_format='{EXE} {IN} {OUT}')
> $3',cmd_format='{EXE} {IN} {OUT} 2>> ' + self.stderr)
classifyotu = MultiMap(classifyotu, inputs=[self.an_list_files,self.taxonomy_files], outputs=[self.classifyotu_stdout])
if not self.without_krona:
krona = PythonFunction(get_taxonomy_to_Krona, cmd_format="{EXE} {ARG} {IN} {OUT}")
krona = PythonFunction( get_taxonomy_to_Krona, cmd_format="{EXE} {ARG} {IN} {OUT} 2>> " + self.stderr )
krona(arguments=[self.get_exec_path("ImportMothurTaxonomy.pl"),self.output_directory],inputs=[self.classifyotu_stdout,self.shared_files],\
outputs=[self.cons_taxonomy_files])
\ No newline at end of file
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment