diff --git a/snakecnv/align.snk b/snakecnv/align.snk index d8067cc7b08c64257b3172d2a7698540586f6a5e..f36b4111e1e092d240ccfa7bdbbacc6cd28999e3 100644 --- a/snakecnv/align.snk +++ b/snakecnv/align.snk @@ -21,7 +21,7 @@ workdir: wdir snakemake_utils = SnakemakeUtils(samples) -include: "tools/threads.snk" +include: "tools/utils.snk" localrules: all_align @@ -91,4 +91,4 @@ rule markduplicate: """ picard MarkDuplicates I={input.bam} O={output.bam} M={output.metrics} REMOVE_DUPLICATES=true 1> {log.stdout} \ 2> {log.stderr}; - """ \ No newline at end of file + """ diff --git a/snakecnv/popsim.snk b/snakecnv/popsim.snk index f6fdc64e55c3d2b6ef05b718b7fba86c8a8958d1..46c37332d71098a2ecb20069f138c0d1c753019f 100644 --- a/snakecnv/popsim.snk +++ b/snakecnv/popsim.snk @@ -1,6 +1,9 @@ import sys import os +include: "tools/utils.snk" + + ROOTPATH = os.path.dirname(os.path.dirname(workflow.snakefile)) sys.path.insert(0, os.path.join(ROOTPATH, "lib")) os.environ["PYTHONPATH"] = os.path.join(ROOTPATH, "lib") @@ -70,7 +73,6 @@ bp_inputs, bp_params = buildpop_inputs() bp_outputs = buildpop_outputs() -include: "tools/threads.snk" include: "align.snk" include: "referencebundle.snk" include: "detection.snk" @@ -229,4 +231,3 @@ rule buildresults: touch results/{wildcards.svtype}/Summarized_results_{wildcards.svtype}.ipynb touch results/{wildcards.svtype}/Summarized_results_{wildcards.svtype}.html """ - diff --git a/snakecnv/referencebundle.snk b/snakecnv/referencebundle.snk index 6e2a44fab7aa38aba00d3933d62496d885959ba4..8c2d6b944cc45d5bf6acf976ac5382c772fb7be3 100644 --- a/snakecnv/referencebundle.snk +++ b/snakecnv/referencebundle.snk @@ -8,6 +8,8 @@ from os.path import join from pyfaidx import Fasta # from natsort import natsorted +include: "tools/utils.snk" + ROOTPATH = os.path.dirname(workflow.snakefile) sys.path.insert(0, os.path.join(ROOTPATH, "lib")) os.environ["PYTHONPATH"] = os.path.join(ROOTPATH, "lib") @@ -30,15 +32,11 @@ SUFF_TYPES = ("gcmask", "svmask", "lcmask") p_repeats = re.compile('.*(Satellite|Simple_repeat|Low_complexity).*') p_repeats_all = re.compile('.*(Satellite|Simple_repeat|Line|SINE|LTR).*') -def message(mes): - sys.stderr.write("|--- " + mes + "\n") - -include: "tools/threads.snk" - CHROMS = config['chromosomes'] +PERL5LIB = get_perl5lib() -shell.prefix("export PATH=%s/bin:\"$PATH\";" % ROOTPATH) +shell.prefix("export PATH=%s/bin:\"$PATH\"; export PERL5LIB=%s; " % (ROOTPATH, PERL5LIB)) rule final: input: @@ -54,19 +52,24 @@ rule final: rule get_fasta: input: - SUBDIR + PREFIX+"_raw.fasta" + fasta=SUBDIR + PREFIX+"_raw.fasta", + fai=SUBDIR + PREFIX+"_raw.fasta.fai" output: SUBDIR + PREFIX+".fasta" - run: - from Bio import SeqIO - with open(input[0], "rU") as handle, open(output[0], "w") as f: - for seq in SeqIO.parse(handle, "fasta"): - if seq.id in CHROMS: - SeqIO.write([seq], f, "fasta") - os.remove(input[0]) - i_fai = input[0] + ".fai" - if os.path.exists(i_fai): - os.remove(i_fai) + params: + chromosomes=CHROMS + shell: + "samtools faidx {input.fasta} {params.chromosomes} > {output}" + # run: + # from Bio import SeqIO + # with open(input[0], "rU") as handle, open(output[0], "w") as f: + # for seq in SeqIO.parse(handle, "fasta"): + # if seq.id in CHROMS: + # SeqIO.write([seq], f, "fasta") + # os.remove(input[0]) + # i_fai = input[0] + ".fai" + # if os.path.exists(i_fai): + # os.remove(i_fai) rule index: input: @@ -74,7 +77,7 @@ rule index: output: SUBDIR + "{type}.fasta.fai" log: - "logs/index/{type}.log" + SUBDIR + "logs/index/{type}.log" shell: "samtools faidx {input} &> {log}" @@ -100,6 +103,8 @@ rule preparesvmask: "export PATH=$SV_DIR/bwa:\"$PATH\"; export LD_LIBRARY_PATH=$SV_DIR/bwa:\"$LD_LIBRARY_PATH\"; " "ln {input.fasta} {output.seq}; " "samtools faidx {output.seq}; " + # Checking genome size for bwa index + #"genomesize=`cat {output.seq}.fai | awk '{ totsize = totsize +$0}END{ print totsize}'`; " "bwa index -a bwtsw {output.seq} 2> {log} || bwa index -a is {output.seq} 2> {log}" rule mergesvmask: @@ -151,13 +156,16 @@ rule repeatmask: output: SUBDIR + "reference.fasta.out" params: - sp = config['species'] + sp = config['species'], + perl5lib = PERL5LIB threads: get_threads("repeatmask", 12) shell: - "export PATH=$SV_DIR/bwa:\"$PATH\"; export LD_LIBRARY_PATH=$SV_DIR/bwa:\"$LD_LIBRARY_PATH\"; " - "RepeatMasker -pa {threads} -species {params.sp} -xsmall {input.fasta}" - " > {input.fasta}.repeatmasker.log" + """ + export PERL5LIB={params.perl5lib} + RepeatMasker -pa {threads} -species {params.sp} -xsmall {input.fasta} + > {input.fasta}.repeatmasker.log + """ rule rdmask: input: diff --git a/snakecnv/tools/utils.snk b/snakecnv/tools/utils.snk new file mode 100644 index 0000000000000000000000000000000000000000..19a2cf7e71e0e12d2e7bcbab3dd04da08bf5c2fa --- /dev/null +++ b/snakecnv/tools/utils.snk @@ -0,0 +1,21 @@ +import os +import shutil + +ROOTPATH = os.path.dirname(os.path.dirname(workflow.snakefile)) +sys.path.insert(0, os.path.join(ROOTPATH, "lib")) +os.environ["PYTHONPATH"] = os.path.join(ROOTPATH, "lib") + + +def get_threads(rule, default): + cluster_config = snakemake.workflow.cluster_config + if rule in cluster_config and "threads" in cluster_config[rule]: + return cluster_config[rule]["threads"] + if "default" in cluster_config and "threads" in cluster_config["default"]: + return cluster_config["default"]["threads"] + return default + + +def get_perl5lib(): + condaprefix = os.environ["CONDA_PREFIX"] + perllibpath = os.path.join(condaprefix, "lib", "perl5") + return perllibpath