From 5deb245ff9cd01c57c8ae9ac353950e57244a378 Mon Sep 17 00:00:00 2001
From: Robert Bossy <Robert.Bossy@inra.fr>
Date: Wed, 20 Oct 2021 11:48:57 +0200
Subject: [PATCH] BacDive taxon entries ids in the form `bd:`

---
 README.md     | 9 +++++++++
 dsmz-match.py | 7 +------
 2 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/README.md b/README.md
index 5a00867..c6290e7 100644
--- a/README.md
+++ b/README.md
@@ -67,4 +67,13 @@ snakemake -j 1 -s rewrite-taxonomy.snakefile
 
 This will write the merged taxonomy in a format suitable for text projection.
 
+## Resulting taxonomy
 
+### Namespace prefixes
+
+| Prefix | Domain | URL construct |
+|--------|--------|---------------|
+| `ncbi` | NCBI Taxonomy | `https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?mode=Info&id={ID}` |
+| | | `https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=taxonomy&id={ID}` |
+| `bd`   | BacDive | `https://bacdive.dsmz.de/strain/{ID} |
+| | | `https://api.bacdive.dsmz.de/example/fetch/{ID}` (authentication required) |
diff --git a/dsmz-match.py b/dsmz-match.py
index db0a274..0c06a34 100755
--- a/dsmz-match.py
+++ b/dsmz-match.py
@@ -102,12 +102,7 @@ class BacDiveEntry:
         self.domain = tax.findtext('./domain')
 
     def _get_strain_taxid(self):
-        for sn in self.strain_number:
-            if sn.startswith('DSM'):
-                return sn.lower().replace(' ', ':')
-        if len(self.strain_number) > 0:
-            return self.strain_number[0].lower().replace(' ', ':')
-        return 'bacdive:%s' % os.path.basename(self.filename).replace('.xml', '')
+        return 'bd:' + self.bacdive_id
 
     def _get_canonical(self):
         for sn in self.strain_number:
-- 
GitLab