From fd66bc96ab384280c3c247f845d6461c18f537e2 Mon Sep 17 00:00:00 2001 From: tfaraut <Thomas.Faraut@inra.fr> Date: Tue, 4 Dec 2018 16:41:37 +0100 Subject: [PATCH] - delly threads downgraded to 6 - genomestrip .Nstrech file not any more required - pindel max_xrange 2 removed - excluded is now preformed for each sample --- snakecnv/detection.snk | 48 +++++++++++++++++++++++++++------- snakecnv/tools/delly.snk | 8 +++--- snakecnv/tools/genomestrip.snk | 2 +- snakecnv/tools/pindel.snk | 5 ++-- 4 files changed, 46 insertions(+), 17 deletions(-) diff --git a/snakecnv/detection.snk b/snakecnv/detection.snk index 6ea9bf0..5147ca4 100644 --- a/snakecnv/detection.snk +++ b/snakecnv/detection.snk @@ -4,15 +4,17 @@ import sys import re import os import pysam -from functions import get_samples from os.path import join, isfile +from itertools import chain +import glob + +from functions import get_samples ROOTPATH = os.path.dirname(workflow.snakefile) sys.path.insert(0, os.path.join(ROOTPATH, "lib")) os.environ["PYTHONPATH"] = os.path.join(ROOTPATH, "lib") - from svsnakemake_utils import SnakemakeUtils size_batches = int(config["batches"] if "batches" in config else 1) @@ -48,7 +50,18 @@ def get_excluded(config): return "" +def get_all_samples(wdir): + files = glob.glob(wdir + "/samples.list.*") + samples = [] + for f in files: + with open(f) as fin: + for line in fin: + samples.append(line.rstrip()) + return samples + + def get_chr_batches(ref_file, chr, chr_batchsize=5000000, overlap=50000): + fa = pysam.FastaFile(ref_file) len_chr = len(fa.fetch(chr)) @@ -84,7 +97,10 @@ EMPTY_TEMPLATE = os.path.join(ROOTPATH, "lib", "svreader", "resources", "templat config['read_length'] = 100 -# list of samples +# list of all samples +all_samples = get_all_samples(WDIR) + +# list of samples as a dictionnary with batch as keys samples = get_samples(config['sample_file'], size_batches, config['start_batch_nb'] if 'start_batch_nb' in config else 1) # list of tools @@ -207,23 +223,35 @@ if "no_detection_index" not in config or not config["no_detection_index"]: shell: "samtools index -@{threads} {input}" +rule mergeexcluded: + input: + expand("exclusion/excluded_{sample}.bed", sample=all_samples), + output: + "exclusion/excluded.bed" + log: + stdout = "logs/exclusion/exclusion.o", + stderr = "logs/exclusion/exclusion.e" + shell: + "cat {input} | bedtools sort | bedtools merge > {output}" + + rule excluded: input: - single_bam = get_first_bam(samples), - single_bai = get_first_bam(samples) + ".bai", + bam = "data/bams/{sample}.bam", + index = "data/bams/{sample}.bam.bai", reference = REFERENCE, fai = REFERENCE + ".fai" output: - "exclusion/excluded.bed" + "exclusion/excluded_{sample}.bed" params: chromosomes = ",".join(chromosomes) threads: - get_threads("excluded", 8) + get_threads("excluded", 4) log: - stdout = "logs/exclusion/exclusion.o", - stderr = "logs/exclusion/exclusion.e" + stdout = "logs/exclusion/exclusion_{sample}.o", + stderr = "logs/exclusion/exclusion_{sample}.e" shell: - "detect_regions_to_exclude.py -b {input.single_bam}" + "detect_regions_to_exclude.py -b {input.bam}" " -f {input.reference}" " -c {params.chromosomes}" " -o {output}" diff --git a/snakecnv/tools/delly.snk b/snakecnv/tools/delly.snk index 8e94aae..31b3024 100644 --- a/snakecnv/tools/delly.snk +++ b/snakecnv/tools/delly.snk @@ -1,9 +1,9 @@ -delly_sv=['DEL', 'INV', 'DUP'] +delly_sv = ['DEL', 'INV', 'DUP'] rule delly: input: - bamlist="{batch}/bamlist.list", - excluded="exclusion/excluded.bed", + bamlist = "{batch}/bamlist.list", + excluded = "exclusion/excluded.bed", reference = REFERENCE, fai = REFERENCE + ".fai" output: @@ -12,7 +12,7 @@ rule delly: template = EMPTY_TEMPLATE, reference = os.path.abspath(REFERENCE) threads: - get_threads("delly", 8) + get_threads("delly", 6) log: stdout = "{batch}/logs/delly/{chrom}_{svtype}.o", stderr = "{batch}/logs/delly/{chrom}_{svtype}.e" diff --git a/snakecnv/tools/genomestrip.snk b/snakecnv/tools/genomestrip.snk index 3a35223..eaf897d 100644 --- a/snakecnv/tools/genomestrip.snk +++ b/snakecnv/tools/genomestrip.snk @@ -25,7 +25,7 @@ if "genomestrip" in tools: PREFIX+".dict", PREFIX+".ploidymap.txt", PREFIX+".gendermask.bed", - PREFIX+".Nstretch.bed", + # PREFIX+".Nstretch.bed", unpack(snakemake_utils.get_inputs_bams), bamlist = "{batch}/bamlist.list", gendermap = "{batch}/genomestrip/gendermap.txt", diff --git a/snakecnv/tools/pindel.snk b/snakecnv/tools/pindel.snk index 4498c92..d290f74 100644 --- a/snakecnv/tools/pindel.snk +++ b/snakecnv/tools/pindel.snk @@ -34,16 +34,17 @@ rule pindel: "{batch}/pindel/{chrom}/{chrbatch}/pindel_{chrom}_INV.gz", "{batch}/pindel/{chrom}/{chrbatch}/pindel_{chrom}_TD.gz", threads: - get_threads("pindel", 4) + get_threads("pindel", 5) log: stdout = "{batch}/logs/pindel/{chrom}_{chrbatch}.o", stderr = "{batch}/logs/pindel/{chrom}_{chrbatch}.e" shell: "pindel -f {input.genome} -i {input.statfile}" - " --min_perfect_match_around_BP 20 --max_range_index 2" + " --min_perfect_match_around_BP 20 " " --exclude {input.excluded}" " --number_of_threads {threads}" " --report_interchromosomal_events false" + " --window_size 1" # 1Mb " --minimum_support_for_event 3 -c {wildcards.chrom}:{wildcards.chrbatch}" " -o {wildcards.batch}/pindel/{wildcards.chrom}/{wildcards.chrbatch}/pindel_{wildcards.chrom}" " 1>{log.stdout} 2>{log.stderr}" -- GitLab