Skip to content
Snippets Groups Projects
Commit 5636099e authored by Robert Bossy's avatar Robert Bossy
Browse files

compute cliques

parent ff908040
No related branches found
No related tags found
No related merge requests found
import taxutils
import collections
import os
import os.path
eppo_taxonomy = taxutils.EPPOParser.read_xmlfull_file('output/EPPO/fullcodes.xml')
......@@ -50,11 +51,17 @@ for eppo_taxon in eppo_taxonomy.taxa():
mapping.add_alignment(ncbi_taxon, eppo_taxon, 'with-ancestor')
os.makedirs('output/EPPO-match', exist_ok=True)
OUTDIR = os.path.join('output', 'EPPO-match')
os.makedirs(OUTDIR, exist_ok=True)
taxutils.Logger.info('writing output/EPPO-match/matched-codes.csv')
with open('output/EPPO-match/matched-codes.csv', 'w') as f:
def outfile(filename):
fullname = os.path.join(OUTDIR, filename)
taxutils.Logger.info('writing %s' % fullname)
return open(fullname, 'w')
with outfile('matched-codes.csv') as f:
f.write('\t'.join((
'EPPO code',
'EPPO name',
......@@ -87,8 +94,7 @@ with open('output/EPPO-match/matched-codes.csv', 'w') as f:
f.write('\n')
taxutils.Logger.info('writing output/EPPO-match/unmatched-codes.csv')
with open('output/EPPO-match/unmatched-codes.csv', 'w') as f:
with outfile('unmatched-codes.csv') as f:
f.write('\t'.join((
'EPPO code',
'EPPO name',
......@@ -105,4 +111,55 @@ with open('output/EPPO-match/unmatched-codes.csv', 'w') as f:
f.write('\n')
CLIQUES = {}
CLIQUE_MAP = {}
for next_clique, eppo_taxon in enumerate(eppo_taxonomy.taxa()):
if eppo_taxon.taxid in CLIQUE_MAP:
eppo_clique = CLIQUE_MAP[eppo_taxon.taxid]
eppo_clique_ncbi_taxa, eppo_clique_eppo_taxa = CLIQUES[eppo_clique]
else:
eppo_clique = next_clique
eppo_clique_ncbi_taxa = []
eppo_clique_eppo_taxa = [eppo_taxon]
CLIQUES[eppo_clique] = (eppo_clique_ncbi_taxa, eppo_clique_eppo_taxa)
CLIQUE_MAP[eppo_taxon.taxid] = eppo_clique
if mapping.has_21(eppo_taxon):
for ncbi_taxon, alignment_type in mapping.get_21(eppo_taxon):
if ncbi_taxon.taxid in CLIQUE_MAP:
ncbi_clique = CLIQUE_MAP[ncbi_taxon.taxid]
if ncbi_clique == eppo_clique:
continue
(ncbi_clique_ncbi_taxa, ncbi_clique_eppo_taxa) = CLIQUES[ncbi_clique]
eppo_clique_ncbi_taxa.extend(ncbi_clique_ncbi_taxa)
eppo_clique_eppo_taxa.extend(ncbi_clique_eppo_taxa)
for t in ncbi_clique_ncbi_taxa:
CLIQUE_MAP[t.taxid] = eppo_clique
for t in ncbi_clique_eppo_taxa:
CLIQUE_MAP[t.taxid] = eppo_clique
del CLIQUES[ncbi_clique]
else:
CLIQUE_MAP[ncbi_taxon.taxid] = eppo_clique
eppo_clique_ncbi_taxa.append(ncbi_taxon)
eppo_max = 0
ncbi_max = 0
both_max = 0
for ncbi_taxa, eppo_taxa in CLIQUES.values():
eppo_max = max(eppo_max, len(eppo_taxa))
ncbi_max = max(ncbi_max, len(ncbi_taxa))
both_max = max(both_max, len(eppo_taxa) + len(ncbi_taxa))
if len(eppo_taxa) == 1:
if len(ncbi_taxa) < 2:
continue
prefix = '>'
elif len(ncbi_taxa) == 1:
prefix = '<'
else:
prefix = '*'
print('%s %s = %s' % (prefix, ', '.join(t.taxid for t in eppo_taxa), ', '.join(t.taxid for t in ncbi_taxa)))
print('eppo_max = %d' % eppo_max)
print('ncbi_max = %d' % ncbi_max)
print('both_max = %d' % both_max)
taxutils.Logger.info('Done')
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment