diff --git a/rewrite-taxonomy.snakefile b/rewrite-taxonomy.snakefile index f848e6cbf01a6da22e06302a8f721bdc346e1ebd..40c22f63b8e7c5660ef2de71010e6422052ed323 100644 --- a/rewrite-taxonomy.snakefile +++ b/rewrite-taxonomy.snakefile @@ -1,6 +1,12 @@ configfile: 'config.yaml' +rule all: + input: + config['OUTDIR'] + '/finish.txt', + config['OUTDIR'] + '/taxid_microorganisms.txt' + + rule check: input: config['OUTDIR'] + '/taxa+id_microorganisms.txt' @@ -15,17 +21,17 @@ rule check: rule microorganisms: output: - config['OUTDIR'] + '/taxa+id_microorganisms.txt' + config['OUTDIR'] + '/{p}_microorganisms.txt' input: - full=config['OUTDIR'] + '/taxa+id_full.txt', + full=config['OUTDIR'] + '/{p}_full.txt', roots='microorganisms-roots' shell: '''./cut-root.py {input.roots}/* <{input.full} >{output}''' -rule full: +rule taxaid_full: output: config['OUTDIR'] + '/taxa+id_full.txt' @@ -34,3 +40,14 @@ rule full: shell: '''{config[REWRITE_TAXONOMY]} -namesFile {config[OUTDIR]}/{config[NCBI_DIR]}/names.dmp -namesFile {input}/dsmz-names.dmp -prefix ncbi: -rejectionFile reject.txt -saturationFile saturate.txt -pattern '{{NAME}}\t{{TAXID}}\t{{CANONICAL}}\t{{TAXID_PATH}}\t{{POS_TAG}}\t{{RANK}}\t{{SPECIES_TAXID}}\t{{SPECIES_NAME}}\n' {config[OUTDIR]}/{config[NCBI_DIR]}/nodes.dmp {input}/dsmz-nodes.dmp >{output}''' + + +rule taxid_full: + output: + config['OUTDIR'] + '/taxid_full.txt' + + input: + config['OUTDIR'] + '/' + config['DSMZ_MATCH_DIR'] + + shell: + '''{config[REWRITE_TAXONOMY]} -taxaDict -namesFile {config[OUTDIR]}/{config[NCBI_DIR]}/names.dmp -namesFile {input}/dsmz-names.dmp -prefix ncbi: -rejectionFile reject.txt -saturationFile saturate.txt -pattern '{{TAXID}}\t{{CANONICAL}}\t{{TAXID_PATH}}\t{{RANK}}\n' {config[OUTDIR]}/{config[NCBI_DIR]}/nodes.dmp {input}/dsmz-nodes.dmp >{output}'''