diff --git a/dsmz-match.py b/dsmz-match.py
index 8294da61e1dc5c263114f51282886622876099d4..03379ddf7af2cc20d73cb3de90963a40761a5fcf 100755
--- a/dsmz-match.py
+++ b/dsmz-match.py
@@ -10,12 +10,13 @@ import collections
 
 
 class NCBITaxon:
-    def __init__(self, taxid, rank):
+    def __init__(self, taxid, rank, division_id):
         self.taxid = taxid
         self.rank = rank
+        self.division_id = division_id
 
-    def q(self, taxid=None, rank=None):
-        return (taxid is None or taxid == self.taxid) and (rank is None or rank == self.rank)
+    def q(self, taxid=None, rank=None, division_id='0'):
+        return (taxid is None or taxid == self.taxid) and (rank is None or rank == self.rank) and (division_id is None or division_id == self.division_id)
 
 
 class NCBIName:
@@ -32,9 +33,10 @@ def read_ncbi_nodes(filename):
     with open(filename) as f:
         for line in f:
             cols = line.split('\t|\t')
-            taxid = cols[0]
+            taxid = 'ncbi:' + cols[0]
             rank = cols[2]
-            result[taxid] = NCBITaxon(taxid, rank)
+            division_id = cols[4]
+            result[taxid] = NCBITaxon(taxid, rank, division_id)
     return result
 
 
@@ -43,7 +45,7 @@ def read_ncbi_names(nodes, filename):
     with open(filename) as f:
         for line in f:
             cols = line.split('\t|\t')
-            taxid = cols[0]
+            taxid = 'ncbi:' + cols[0]
             name = cols[1]
             name_type = cols[3].replace('\t|\n', '')
             result[name] = NCBIName(nodes[taxid], name_type)
@@ -53,6 +55,7 @@ def read_ncbi_names(nodes, filename):
 class BacDiveEntry:
     def __init__(self, ncbi_taxa, filename):
         self.filename = filename
+        self.bacdive_id = os.path.basename(filename).replace('.xml', '')
         self.root = ET.parse(filename).getroot()
         self.strain_number = []
         self.species = None
@@ -66,7 +69,7 @@ class BacDiveEntry:
         self.strain_number = tuple(self._get_strain_numbers())
         self._set_taxonomy()
         self.complete_name = tuple('%s %s' % (self.species, sn) for sn in self.strain_number)
-        self.dispatch, self.ncbi_taxid = self._dispatch(ncbi_taxa)
+        self.dispatch, self.ncbi_taxon = self._dispatch(ncbi_taxa)
         self.strain_taxid = self._get_strain_taxid()
         self.canonical = self._get_canonical()
 
@@ -133,39 +136,39 @@ class BacDiveEntry:
 
         name = self._match_first(ncbi_taxa, 'no rank')
         if name is not None:
-            return 'equivalent', name.taxon.taxid
+            return 'equivalent', name.taxon
 
         name = self._match_first(ncbi_taxa, name_type='type material')
         if name is not None:
-            return 'type-material-of-' + name.taxon.rank, name.taxon.taxid
+            return 'type-material-of-' + name.taxon.rank, name.taxon
 
         rank = 'subspecies' if self.is_subspecies else 'species'
         name = self._match(ncbi_taxa, self.species, rank)
         if name is not None:
-            return 'append-to-' + rank, name.taxon.taxid
+            return 'append-to-' + rank, name.taxon
 
         if rank == 'subspecies':
             name = self._match(ncbi_taxa, self.genus, 'species')
             if name is not None:
-                return 'append-to-species', name.taxon.taxid
+                return 'append-to-species', name.taxon
         name = self._match(ncbi_taxa, self.genus, 'genus')
         if name is not None:
-            return 'append-to-genus', name.taxon.taxid
+            return 'append-to-genus', name.taxon
         name = self._match(ncbi_taxa, self.family, 'family')
         if name is not None:
-            return 'append-to-family', name.taxon.taxid
+            return 'append-to-family', name.taxon
         name = self._match(ncbi_taxa, self.ordo, 'order')
         if name is not None:
-            return 'append-to-order', name.taxon.taxid
+            return 'append-to-order', name.taxon
         name = self._match(ncbi_taxa, self.class_, 'class')
         if name is not None:
-            return 'append-to-class', name.taxon.taxid
+            return 'append-to-class', name.taxon
         name = self._match(ncbi_taxa, self.phylum, 'phylum')
         if name is not None:
-            return 'append-phylum', name.taxon.taxid
+            return 'append-phylum', name.taxon
         name = self._match(ncbi_taxa, self.domain, 'superkingdom')
         if name is not None:
-            return 'append-to-superkingdom', name.taxon.taxid
+            return 'append-to-superkingdom', name.taxon
         return 'fail', None
 
 
@@ -186,7 +189,7 @@ def line(f, sep, *cols, tail=None):
 sys.stderr.write('Reading NCBI taxonomy: %s %s\n' % (NCBI_NODES_FILE, NCBI_NAMES_FILE))
 NCBI_TAXA = read_ncbi_names(read_ncbi_nodes(NCBI_NODES_FILE), NCBI_NAMES_FILE)
 os.makedirs(OUTPUT_DIR, exist_ok=True)
-with of('dispatch-report.txt') as out_dispatch, of('dsmz-nodes.dmp') as out_nodes, of('dsmz-names.dmp') as out_names, of('warnings.txt') as out_warn:
+with of('dispatch-report.txt') as out_dispatch, of('dsmz-nodes.dmp') as out_nodes, of('dsmz-names.dmp') as out_names, of('bacdive-to-taxid.txt') as out_link, of('warnings.txt') as out_warn:
     sys.stderr.write('Reading BacDive entries: %s\n' % INPUT_DIR)
     count_entries = 0
     count_dispatches = collections.defaultdict(int)
@@ -201,27 +204,30 @@ with of('dispatch-report.txt') as out_dispatch, of('dsmz-nodes.dmp') as out_node
             e = BacDiveEntry(NCBI_TAXA, os.path.join(dirpath, fn))
             count_entries += 1
             count_dispatches[e.dispatch] += 1
-            line(out_dispatch, '\t', e.filename, e.dispatch, e.ncbi_taxid)
+            line(out_dispatch, '\t', e.filename, e.dispatch, '' if e.ncbi_taxon is None else e.ncbi_taxon.taxid)
             if e.dispatch.startswith('append-to-'):
                 if e.strain_taxid in new_ids:
                     line(out_warn, '\t', e.filename, 'duplicate id ' + e.strain_taxid)
                     continue
+                if e.ncbi_taxon is not None and e.ncbi_taxon.division_id != '0':
+                    line(out_warn, '\t', e.filename, 'not bacteria (div %s) (strain %s) (taxid %s)' % (e.ncbi_taxon.division_id, e.strain_taxid, e.ncbi_taxon.taxid))
                 new_ids.add(e.strain_taxid)
                 line(out_nodes, '\t|\t',
                      e.strain_taxid,  # taxid
-                     e.ncbi_taxid,  # parent taxid
+                     e.ncbi_taxon.taxid,  # parent taxid
                      'no rank',  # rank
                      '',  # embl code
-                     0,  # division id
-                     1,  # inherited div flag
+                     e.ncbi_taxon.division_id,   # division id
+                     1,   # inherited div flag
                      11,  # genetic code id
-                     1,  # inherited GC  flag
-                     0,  # mitochondrial genetic code id
-                     1,  # inherited MGC flag
-                     1,  # GenBank hidden flag
-                     0,  # hidden subtree root flag
+                     1,   # inherited GC  flag
+                     0,   # mitochondrial genetic code id
+                     1,   # inherited MGC flag
+                     1,   # GenBank hidden flag
+                     0,   # hidden subtree root flag
                      '',  # comments
                      tail='\t|')
+                line(out_link, '\t', e.bacdive_id, e.strain_taxid)
                 count_new_nodes += 1
                 if e.canonical is None:
                     line(out_warn, '\t', e.filename, 'no canonical')
@@ -230,9 +236,10 @@ with of('dispatch-report.txt') as out_dispatch, of('dsmz-nodes.dmp') as out_node
                         line(out_names, '\t|\t', e.strain_taxid, name, '', 'scientific name' if name == e.canonical else 'strain number', tail='\t|')
                         count_names_of_new_nodes += 1
             elif e.dispatch == 'equivalent' or e.dispatch.startswith('type-material-of-'):
+                line(out_link, '\t', e.bacdive_id, e.ncbi_taxon.taxid)
                 for name in itertools.chain(e.strain_number, e.complete_name):
                     if name not in NCBI_TAXA:
-                        line(out_names, '\t|\t', e.ncbi_taxid, name, '', 'strain number', tail='\t|')
+                        line(out_names, '\t|\t', e.ncbi_taxon.taxid, name, '', 'strain number', tail='\t|')
                         count_new_synonyms += 1
     sys.stderr.write('Entries: %d\n' % count_entries)
     sys.stderr.write('Dispatches:\n')