From fd66bc96ab384280c3c247f845d6461c18f537e2 Mon Sep 17 00:00:00 2001
From: tfaraut <Thomas.Faraut@inra.fr>
Date: Tue, 4 Dec 2018 16:41:37 +0100
Subject: [PATCH] - delly threads downgraded to 6 - genomestrip .Nstrech file
 not any more required - pindel max_xrange 2 removed - excluded is now
 preformed for each sample

---
 snakecnv/detection.snk         | 48 +++++++++++++++++++++++++++-------
 snakecnv/tools/delly.snk       |  8 +++---
 snakecnv/tools/genomestrip.snk |  2 +-
 snakecnv/tools/pindel.snk      |  5 ++--
 4 files changed, 46 insertions(+), 17 deletions(-)

diff --git a/snakecnv/detection.snk b/snakecnv/detection.snk
index 6ea9bf0..5147ca4 100644
--- a/snakecnv/detection.snk
+++ b/snakecnv/detection.snk
@@ -4,15 +4,17 @@ import sys
 import re
 import os
 import pysam
-from functions import get_samples
 
 from os.path import join, isfile
+from itertools import chain
+import glob
+
+from functions import get_samples
 
 ROOTPATH = os.path.dirname(workflow.snakefile)
 sys.path.insert(0, os.path.join(ROOTPATH, "lib"))
 os.environ["PYTHONPATH"] = os.path.join(ROOTPATH, "lib")
 
-
 from svsnakemake_utils import SnakemakeUtils
 
 size_batches = int(config["batches"] if "batches" in config else 1)
@@ -48,7 +50,18 @@ def get_excluded(config):
         return ""
 
 
+def get_all_samples(wdir):
+    files = glob.glob(wdir + "/samples.list.*")
+    samples = []
+    for f in files:
+        with open(f) as fin:
+            for line in fin:
+                samples.append(line.rstrip())
+    return samples
+
+
 def get_chr_batches(ref_file, chr, chr_batchsize=5000000, overlap=50000):
+
     fa = pysam.FastaFile(ref_file)
     len_chr = len(fa.fetch(chr))
 
@@ -84,7 +97,10 @@ EMPTY_TEMPLATE = os.path.join(ROOTPATH, "lib", "svreader", "resources", "templat
 
 config['read_length'] = 100
 
-#  list of samples
+# list of all samples
+all_samples = get_all_samples(WDIR)
+
+#  list of samples as a dictionnary with batch as keys
 samples = get_samples(config['sample_file'], size_batches, config['start_batch_nb'] if 'start_batch_nb' in config else 1)
 
 # list of tools
@@ -207,23 +223,35 @@ if "no_detection_index" not in config or not config["no_detection_index"]:
         shell:
             "samtools index -@{threads} {input}"
 
+rule mergeexcluded:
+    input:
+        expand("exclusion/excluded_{sample}.bed", sample=all_samples),
+    output:
+        "exclusion/excluded.bed"
+    log:
+        stdout = "logs/exclusion/exclusion.o",
+        stderr = "logs/exclusion/exclusion.e"
+    shell:
+        "cat {input} | bedtools sort | bedtools merge > {output}"
+
+
 rule excluded:
     input:
-        single_bam = get_first_bam(samples),
-        single_bai = get_first_bam(samples) + ".bai",
+        bam = "data/bams/{sample}.bam",
+        index = "data/bams/{sample}.bam.bai",
         reference = REFERENCE,
         fai = REFERENCE + ".fai"
     output:
-        "exclusion/excluded.bed"
+        "exclusion/excluded_{sample}.bed"
     params:
         chromosomes = ",".join(chromosomes)
     threads:
-        get_threads("excluded", 8)
+        get_threads("excluded", 4)
     log:
-        stdout = "logs/exclusion/exclusion.o",
-        stderr = "logs/exclusion/exclusion.e"
+        stdout = "logs/exclusion/exclusion_{sample}.o",
+        stderr = "logs/exclusion/exclusion_{sample}.e"
     shell:
-        "detect_regions_to_exclude.py -b {input.single_bam}"
+        "detect_regions_to_exclude.py -b {input.bam}"
         " -f {input.reference}"
         " -c {params.chromosomes}"
         " -o {output}"
diff --git a/snakecnv/tools/delly.snk b/snakecnv/tools/delly.snk
index 8e94aae..31b3024 100644
--- a/snakecnv/tools/delly.snk
+++ b/snakecnv/tools/delly.snk
@@ -1,9 +1,9 @@
-delly_sv=['DEL', 'INV', 'DUP']
+delly_sv = ['DEL', 'INV', 'DUP']
 
 rule delly:
     input:
-        bamlist="{batch}/bamlist.list",
-        excluded="exclusion/excluded.bed",
+        bamlist = "{batch}/bamlist.list",
+        excluded = "exclusion/excluded.bed",
         reference = REFERENCE,
         fai = REFERENCE + ".fai"
     output:
@@ -12,7 +12,7 @@ rule delly:
         template = EMPTY_TEMPLATE,
         reference = os.path.abspath(REFERENCE)
     threads:
-        get_threads("delly", 8)
+        get_threads("delly", 6)
     log:
         stdout = "{batch}/logs/delly/{chrom}_{svtype}.o",
         stderr = "{batch}/logs/delly/{chrom}_{svtype}.e"
diff --git a/snakecnv/tools/genomestrip.snk b/snakecnv/tools/genomestrip.snk
index 3a35223..eaf897d 100644
--- a/snakecnv/tools/genomestrip.snk
+++ b/snakecnv/tools/genomestrip.snk
@@ -25,7 +25,7 @@ if "genomestrip" in tools:
             PREFIX+".dict",
             PREFIX+".ploidymap.txt",
             PREFIX+".gendermask.bed",
-            PREFIX+".Nstretch.bed",
+            # PREFIX+".Nstretch.bed",
             unpack(snakemake_utils.get_inputs_bams),
             bamlist = "{batch}/bamlist.list",
             gendermap = "{batch}/genomestrip/gendermap.txt",
diff --git a/snakecnv/tools/pindel.snk b/snakecnv/tools/pindel.snk
index 4498c92..d290f74 100644
--- a/snakecnv/tools/pindel.snk
+++ b/snakecnv/tools/pindel.snk
@@ -34,16 +34,17 @@ rule pindel:
         "{batch}/pindel/{chrom}/{chrbatch}/pindel_{chrom}_INV.gz",
         "{batch}/pindel/{chrom}/{chrbatch}/pindel_{chrom}_TD.gz",
     threads:
-        get_threads("pindel", 4)
+        get_threads("pindel", 5)
     log:
         stdout = "{batch}/logs/pindel/{chrom}_{chrbatch}.o",
         stderr = "{batch}/logs/pindel/{chrom}_{chrbatch}.e"
     shell:
         "pindel -f {input.genome} -i {input.statfile}"
-        "       --min_perfect_match_around_BP 20 --max_range_index 2"
+        "       --min_perfect_match_around_BP 20 "
         "       --exclude {input.excluded}"
         "       --number_of_threads {threads}"
         "       --report_interchromosomal_events false"
+        "       --window_size 1" # 1Mb
         "       --minimum_support_for_event 3 -c {wildcards.chrom}:{wildcards.chrbatch}"
         "       -o {wildcards.batch}/pindel/{wildcards.chrom}/{wildcards.chrbatch}/pindel_{wildcards.chrom}"
         "       1>{log.stdout} 2>{log.stderr}"
-- 
GitLab