diff --git a/snakecnv/detection.snk b/snakecnv/detection.snk index 6ea9bf0f646de5dff5e27b9b52a4526d943e0fe8..5147ca4b8ba8d778bc9e40bf97ffe783c538e0a7 100644 --- a/snakecnv/detection.snk +++ b/snakecnv/detection.snk @@ -4,15 +4,17 @@ import sys import re import os import pysam -from functions import get_samples from os.path import join, isfile +from itertools import chain +import glob + +from functions import get_samples ROOTPATH = os.path.dirname(workflow.snakefile) sys.path.insert(0, os.path.join(ROOTPATH, "lib")) os.environ["PYTHONPATH"] = os.path.join(ROOTPATH, "lib") - from svsnakemake_utils import SnakemakeUtils size_batches = int(config["batches"] if "batches" in config else 1) @@ -48,7 +50,18 @@ def get_excluded(config): return "" +def get_all_samples(wdir): + files = glob.glob(wdir + "/samples.list.*") + samples = [] + for f in files: + with open(f) as fin: + for line in fin: + samples.append(line.rstrip()) + return samples + + def get_chr_batches(ref_file, chr, chr_batchsize=5000000, overlap=50000): + fa = pysam.FastaFile(ref_file) len_chr = len(fa.fetch(chr)) @@ -84,7 +97,10 @@ EMPTY_TEMPLATE = os.path.join(ROOTPATH, "lib", "svreader", "resources", "templat config['read_length'] = 100 -# list of samples +# list of all samples +all_samples = get_all_samples(WDIR) + +# list of samples as a dictionnary with batch as keys samples = get_samples(config['sample_file'], size_batches, config['start_batch_nb'] if 'start_batch_nb' in config else 1) # list of tools @@ -207,23 +223,35 @@ if "no_detection_index" not in config or not config["no_detection_index"]: shell: "samtools index -@{threads} {input}" +rule mergeexcluded: + input: + expand("exclusion/excluded_{sample}.bed", sample=all_samples), + output: + "exclusion/excluded.bed" + log: + stdout = "logs/exclusion/exclusion.o", + stderr = "logs/exclusion/exclusion.e" + shell: + "cat {input} | bedtools sort | bedtools merge > {output}" + + rule excluded: input: - single_bam = get_first_bam(samples), - single_bai = get_first_bam(samples) + ".bai", + bam = "data/bams/{sample}.bam", + index = "data/bams/{sample}.bam.bai", reference = REFERENCE, fai = REFERENCE + ".fai" output: - "exclusion/excluded.bed" + "exclusion/excluded_{sample}.bed" params: chromosomes = ",".join(chromosomes) threads: - get_threads("excluded", 8) + get_threads("excluded", 4) log: - stdout = "logs/exclusion/exclusion.o", - stderr = "logs/exclusion/exclusion.e" + stdout = "logs/exclusion/exclusion_{sample}.o", + stderr = "logs/exclusion/exclusion_{sample}.e" shell: - "detect_regions_to_exclude.py -b {input.single_bam}" + "detect_regions_to_exclude.py -b {input.bam}" " -f {input.reference}" " -c {params.chromosomes}" " -o {output}" diff --git a/snakecnv/tools/delly.snk b/snakecnv/tools/delly.snk index 8e94aae6794cc3e39c84ec306474e136b253309e..31b3024829a1d7db4cbd90db7f19cff349ab5cf3 100644 --- a/snakecnv/tools/delly.snk +++ b/snakecnv/tools/delly.snk @@ -1,9 +1,9 @@ -delly_sv=['DEL', 'INV', 'DUP'] +delly_sv = ['DEL', 'INV', 'DUP'] rule delly: input: - bamlist="{batch}/bamlist.list", - excluded="exclusion/excluded.bed", + bamlist = "{batch}/bamlist.list", + excluded = "exclusion/excluded.bed", reference = REFERENCE, fai = REFERENCE + ".fai" output: @@ -12,7 +12,7 @@ rule delly: template = EMPTY_TEMPLATE, reference = os.path.abspath(REFERENCE) threads: - get_threads("delly", 8) + get_threads("delly", 6) log: stdout = "{batch}/logs/delly/{chrom}_{svtype}.o", stderr = "{batch}/logs/delly/{chrom}_{svtype}.e" diff --git a/snakecnv/tools/genomestrip.snk b/snakecnv/tools/genomestrip.snk index 3a3522369c0066583e8858cf2f51ad930ce06450..eaf897d6a9e838926949901a3d422319f9e9a48a 100644 --- a/snakecnv/tools/genomestrip.snk +++ b/snakecnv/tools/genomestrip.snk @@ -25,7 +25,7 @@ if "genomestrip" in tools: PREFIX+".dict", PREFIX+".ploidymap.txt", PREFIX+".gendermask.bed", - PREFIX+".Nstretch.bed", + # PREFIX+".Nstretch.bed", unpack(snakemake_utils.get_inputs_bams), bamlist = "{batch}/bamlist.list", gendermap = "{batch}/genomestrip/gendermap.txt", diff --git a/snakecnv/tools/pindel.snk b/snakecnv/tools/pindel.snk index 4498c9227048fd5808a71717c4b79eaf4f234d85..d290f7407c7379d1ca23931f7bdbec78a0badb6e 100644 --- a/snakecnv/tools/pindel.snk +++ b/snakecnv/tools/pindel.snk @@ -34,16 +34,17 @@ rule pindel: "{batch}/pindel/{chrom}/{chrbatch}/pindel_{chrom}_INV.gz", "{batch}/pindel/{chrom}/{chrbatch}/pindel_{chrom}_TD.gz", threads: - get_threads("pindel", 4) + get_threads("pindel", 5) log: stdout = "{batch}/logs/pindel/{chrom}_{chrbatch}.o", stderr = "{batch}/logs/pindel/{chrom}_{chrbatch}.e" shell: "pindel -f {input.genome} -i {input.statfile}" - " --min_perfect_match_around_BP 20 --max_range_index 2" + " --min_perfect_match_around_BP 20 " " --exclude {input.excluded}" " --number_of_threads {threads}" " --report_interchromosomal_events false" + " --window_size 1" # 1Mb " --minimum_support_for_event 3 -c {wildcards.chrom}:{wildcards.chrbatch}" " -o {wildcards.batch}/pindel/{wildcards.chrom}/{wildcards.chrbatch}/pindel_{wildcards.chrom}" " 1>{log.stdout} 2>{log.stderr}"