Commit 0dbc3a98 authored by Penom Nom's avatar Penom Nom
Browse files

No commit message

No commit message
parent 071951bf
......@@ -27,7 +27,8 @@ from ng6.analysis import Analysis
def gene_OTU_write( trace_file, cdhit_file, blast_file, taxonomy_file, stat_file ):
"""
@param cdhit_file : [string] the list of '.clstr' files produced by cdhit.
@param trace_file : [string] each line of this file is a link between cluster ID and sample file ('OLD_ID<tab>FILE_PATH<tab>CLUSTER_ID').
@param cdhit_file : [string] a '.clstr' file produced by cdhit.
@param blast_file : [string] the blastp result in tabular format (outfmt 6 with NCBI Blast+).
@param taxonomy_file : [string] the taxonomy file.
@param stat_file : [string] the ouput file.
......@@ -71,23 +72,23 @@ def gene_OTU_write( trace_file, cdhit_file, blast_file, taxonomy_file, stat_file
# Process stat
out_fh = open(stat_file, "w")
out_fh.write( "#ClusterID\t" + "\t".join(sample_names) + "\tTaxonomy\tEvalue\tIdentity\n" )
out_fh.write( "#Cluster_ID\t" + "\t".join(sample_names) + "\tTaxonomy\tTaxonomy_Evalue\tTaxonomy_Identity\n" )
cluster_count = init_cluster_count(sample_names)
cluster_name, cluster_taxa, taxa_evalue, taxa_identity = None, None, None, None
for line in open(cdhit_file).readlines():
if line.startswith(">"):
if line.startswith(">"): # Line example : '>Cluster 0' => 'Cluster_0'
if cluster_name is not None:
to_print = cluster_name
for sample in sample_names:
to_print += "\t" + str(cluster_count[sample])
out_fh.write( to_print + "\t" + str(cluster_taxa) + "\t" + str(taxa_evalue) + "\t" + str(taxa_identity) + "\n" )
cluster_count = init_cluster_count(sample_names)
cluster_name = line.strip()[1:].replace(" ", "_") #Example : '>Cluster 0' => 'Cluster_0'
else:
pre_cluster_sample = cluster_samples[ line.strip().split()[2][1:-3] ] # Sample line example : '0 126aa, >Cluster33;size=20... at 99.21%'
pre_cluster_count = int(line.strip().split()[2][1:-3].split(";")[-1]) # Sample line example : '0 126aa, >Cluster33;20... at 99.21%'
pre_cluster_name = line.strip().split()[2][1:-3].split(";")[0] # Sample line example : '0 126aa, >Cluster33;size=20... at 99.21%'
cluster_name = line.strip()[1:].replace(" ", "_")
else: # Line example : '60 130aa, >c6104.0;1... at 99.23%'
pre_cluster_sample = cluster_samples[ line.strip().split()[2][1:-3] ]
pre_cluster_count = int(line.strip().split()[2][1:-3].split(";")[-1])
pre_cluster_name = line.strip().split()[2][1:-3].split(";")[0]
# if current pre-cluster is the representative of final cluster
if cluster_annot.has_key(pre_cluster_name):
cluster_taxa = cluster_annot[pre_cluster_name]['tax']
......@@ -105,6 +106,7 @@ class GeneOTUStat (Analysis):
def define_parameters(self, cluster_trace_file, cdhit_cluster_file, blast_file, taxonomy_file):
"""
@param trace_file : [list] each line of this file is a link between cluster ID and sample file ('OLD_ID<tab>FILE_PATH<tab>CLUSTER_ID').
@param cdhit_cluster_file : [list] the list of '.clstr' files produced by cdhit.
@param blast_file : [list] the blastp results in tabular format (outfmt 6 with NCBI Blast+).
@param taxonomy_file : [list] the taxonomy files.
......@@ -119,7 +121,7 @@ class GeneOTUStat (Analysis):
self.name = "GeneOTUAnalysis"
self.description = "Organizational Taxon Unit analysis."
self.software = "-"
self.options = "-"
self.options = ""
def get_version(self):
return "-"
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment