diff --git a/config.yaml b/config.yaml index 96db8a16d94651b34c94045d8ae23a1293712f57..ffa7e07cea268a86a0823603877237975263dae9 100644 --- a/config.yaml +++ b/config.yaml @@ -8,12 +8,15 @@ ALVISNLP: '~/code/alvisnlp/.test/alvisnlp/bin/alvisnlp' REWRITE_TAXONOMY: '~/code/bibliome-java-utils/test/install/bin/rewrite-taxonomy' -# NCBI Taxonomy files -NCBI_DIR: 'ncbi-taxonomy_2021-03-26' - - # Output and working directories OUTDIR: 'test' + + + + + DSMZ_STRAINS_DIR: 'dsmz-strains' DSMZ_MATCH_DIR: 'dsmz-match' +NCBI_DIR: 'ncbi-taxonomy' +NCBI_ZIP_URL: 'https://ftp.ncbi.nlm.nih.gov/pub/taxonomy/taxdmp.zip' diff --git a/dsmz-match.snakefile b/dsmz-match.snakefile index 30dd51eefe80f9855879cf9ac1a6f31e3bbf39f4..672a553659e3b8342f78371a6d134ca1961b34d0 100644 --- a/dsmz-match.snakefile +++ b/dsmz-match.snakefile @@ -1,6 +1,5 @@ configfile: 'config.yaml' -import glob rule match: ''' diff --git a/ncbi-download.snakefile b/ncbi-download.snakefile new file mode 100644 index 0000000000000000000000000000000000000000..5dd6f111f4915c049a43396ed01212b4933ea8a1 --- /dev/null +++ b/ncbi-download.snakefile @@ -0,0 +1,19 @@ +configfile: 'config.yaml' + + +rule unzip: + output: + config['OUTDIR'] + '/' + config['NCBI_DIR'] + '/nodes.dmp' + + input: + config['OUTDIR'] + '/' + config['NCBI_DIR'] + '/taxdmp.zip' + + shell: + '''unzip -d {config[OUTDIR]}/{config[NCBI_DIR]} {input}''' + +rule download: + output: + config['OUTDIR'] + '/' + config['NCBI_DIR'] + '/taxdmp.zip' + + shell: + '''curl -o {output} '{config[NCBI_ZIP_URL]}' '''