Commit 54df5b41 authored by Penom Nom's avatar Penom Nom
Browse files

Fix some bugs.

parent fc034270
......@@ -62,7 +62,7 @@ class GeneDiversity (NG6Workflow):
# Stat on OTU
blast_index = self.add_component("BlastIndex", [self.args["database"], "prot"])
otu_classify = self.add_component("GeneOTUClassify", [cdhit.biom_files, [merge.output_file], self.args["taxonomy"], blast_index.databank], parent=cdhit)
otu_classify = self.add_component("GeneOTUClassify", [cdhit.biom_files, cdhit.output_files, self.args["taxonomy"], blast_index.databank], parent=cdhit)
# Normalisation
normalisation = self.add_component("BiomNormalisation", [cdhit.biom_files, 1000, 3000, 100, 1], parent=cdhit)
\ No newline at end of file
......@@ -38,6 +38,8 @@ def filter_and_bootstrap( input_biom, output_biom, observation_threshold, nb_del
biom.filter_OTU_by_count( int(observation_threshold) )
# Normalisation
biom.bootstrap_by_sample( int(nb_selected), int(nb_deleted), int(nb_round) )
# Removed the empty observations
biom.filter_OTU_by_count( 1 )
# Write
BiomIO.write( output_biom, biom )
......@@ -89,7 +91,7 @@ class BiomNormalisation (Analysis):
return "-"
def post_process(self):
self._save_files( self.depth_files + self.hclust_files )
self._save_files( self.output_files + self.depth_files + self.hclust_files )
# Parse depths
for filepath in self.depth_files:
[depths, counts, sum, upper_quartile, median, lower_quartile] = observations_depth_to_stat(filepath)
......
......@@ -24,7 +24,8 @@ from weaver.function import ShellFunction, PythonFunction
from weaver.abstraction import Map
from ng6.analysis import Analysis
def biom_to_krona( exec_path, biom_file, krona_data_file, krona_view_file ):
"""
@summary : Create a krona visualisation for the taxonomy data of the biom file.
......@@ -35,6 +36,7 @@ def biom_to_krona( exec_path, biom_file, krona_data_file, krona_view_file ):
"""
from workflows.gene_diversity.lib.Biom import Biom, BiomIO
from subprocess import Popen, PIPE
import sys
# Format data
biom = BiomIO.from_json( biom_file )
......@@ -44,13 +46,9 @@ def biom_to_krona( exec_path, biom_file, krona_data_file, krona_view_file ):
p = Popen(cmd, stdout=PIPE, stderr=PIPE)
stdout, stderr = p.communicate()
# write down the stdout
stdoh = open(stdout_path, "w")
stdoh.write(stdout)
stdoh.close()
sys.stdout.write(stdout)
# write down the stderr
stdeh = open(stderr_path, "w")
stdeh.write(stderr)
stdeh.close()
sys.stderr.write(stderr)
def add_tax_metadata( biom_file, blast_file, taxonomy_file, output_file ):
"""
......@@ -89,17 +87,18 @@ def add_tax_metadata( biom_file, blast_file, taxonomy_file, output_file ):
# Add metadata to biom
biom = BiomIO.from_json( biom_file )
for cluster_id in biom.rows:
for cluster in biom.rows:
cluster_id = cluster['id']
biom.add_metadata( cluster_id, "taxonomy", cluster_annot[cluster_id]['tax'], "observation")
biom.add_metadata( cluster_id, "evalue", cluster_annot[cluster_id]['evalue'], "observation")
biom.add_metadata( cluster_id, "identity", cluster_annot[cluster_id]['identity'], "observation")
biom.add_metadata( cluster_id, "aln_length", cluster_annot[cluster_id]['aln_length'], "observation")
BioIO.write( output_file, biom )
BiomIO.write( output_file, biom )
class GeneOTUClassify( Analysis ):
def define_parameters(self, biom_files, input_fasta, taxonomy_file, databank, blast_used="blastp", evalue="1e-5"):
def define_parameters(self, biom_files, input_fasta, taxonomy_file, databank, blast_used="blastp", evalue="1e-5", word_size=None):
"""
@param biom_files : [list] the Biom files with OTU to process.
@param input_fasta : [list] the fasta file containing the sequences of OTUs. Each biom file correspond to one input fasta.
......@@ -112,6 +111,8 @@ class GeneOTUClassify( Analysis ):
self.blast_used = blast_used
self.databank = databank
self.blast_options = " -max_target_seqs 1 -evalue " + str(evalue) + " -outfmt 6"
if word_size is not None:
self.blast_options += " -word_size " + str(word_size)
# Files
self.biom_files = InputFileList( biom_files )
......
......@@ -661,7 +661,12 @@ class BiomIO:
for idx in range(len(biom.rows)):
count = biom.data.get_row_sum( idx )######################## TO DO wrapping
tax = biom.rows[idx]["metadata"]["taxonomy"]
out_fh.write( str(count) + "\t" + "\t".join(map(str, tax)) + "\n" )
if isinstance(tax, list) or isinstance(tax, tuple):
tax = "\t".join( map(str, tax) )
else:
tax = str( tax )
tax = "\t".join( map(str.strip, tax.split(";")) ) # Replace space separator between ranks by tabulation
out_fh.write( str(count) + "\t" + tax + "\n" )
out_fh.close()
@staticmethod
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment