From fc5c8906bf44edd1d783215c3c6dbc891e9e0c96 Mon Sep 17 00:00:00 2001
From: tfaraut <Thomas.Faraut@inra.fr>
Date: Thu, 12 Apr 2018 16:04:29 +0200
Subject: [PATCH] readlength corrected

---
 snakecnv/Snakefile                        |  5 +++--
 snakecnv/bin/detect_regions_to_exclude.py | 12 +++++++-----
 snakecnv/bin/lumpy.py                     | 11 ++++++++---
 snakecnv/lib/svsnakemake_utils.py         |  5 ++---
 4 files changed, 20 insertions(+), 13 deletions(-)

diff --git a/snakecnv/Snakefile b/snakecnv/Snakefile
index a5f4415..df897d0 100644
--- a/snakecnv/Snakefile
+++ b/snakecnv/Snakefile
@@ -102,7 +102,7 @@ else:
     chromosomes = config["chromosomes"].split(",")
 
 # list of variants to be detected
-varianttypes = ['DEL', 'INV', 'DUP']
+varianttypes = ['DEL', 'INV']
 
 snakemake_utils = SnakemakeUtils(tools)
 
@@ -179,7 +179,8 @@ rule index:
 
 rule excluded:
     input:
-        expand("data/bams/{sample}.bam", sample=samples)
+        expand("data/bams/{sample}.bam", sample=samples),
+        expand("data/bams/{sample}.bam.bai", sample=samples)
     output:
         "exclusion/excluded.bed"
     params:
diff --git a/snakecnv/bin/detect_regions_to_exclude.py b/snakecnv/bin/detect_regions_to_exclude.py
index e70d33d..c19e1ab 100755
--- a/snakecnv/bin/detect_regions_to_exclude.py
+++ b/snakecnv/bin/detect_regions_to_exclude.py
@@ -7,7 +7,9 @@ from shutil import rmtree
 
 import multiprocessing
 from functools import partial
+
 import numpy as np
+from scipy import stats
 
 import pysam
 import pysamstats
@@ -25,11 +27,11 @@ def get_bamcoverage(bamfile, fafile, chrom, window_size=20):
 
 def get_coverage_cutoff(coverage):
     counts = coverage.reads_all[np.where(coverage.reads_all)]
-    # min_c = min(counts)
-    # max_c = max(counts)
-    mean_c = np.average(counts)
-    # stdev_c = np.std(counts)
-    return 6*mean_c
+
+    mode = stats.mode(counts)
+    stdev = np.std(counts)
+    # recommanded by SpeedSeq
+    return mode[0] + 3 * stdev
 
 
 def dump_to_bed(overcovered_regions, fout, window_size):
diff --git a/snakecnv/bin/lumpy.py b/snakecnv/bin/lumpy.py
index 85555d8..17885f4 100755
--- a/snakecnv/bin/lumpy.py
+++ b/snakecnv/bin/lumpy.py
@@ -49,7 +49,8 @@ def get_sample_desc_files(discsplit_files, insert_sizes):
         samples[sample]['lumpy'] = {'id': sample,
                                     'mean': mean,
                                     'stdev': stdev,
-                                    'readlength': readlength,
+                                    'read_length': readlength,
+                                    'min_non_overlap': readlength,
                                     'histo_file': histo_file}
     return samples
 
@@ -93,9 +94,9 @@ def split_string(split_file, sample_info,
 def runLumpy(args):
     bamlist = args.bamlist
     chrom = args.chrom
-    readlength = args.readlength
     output = file_abs_path(args.output)
     threads = args.threads
+    verbose = args.verbose
 
     bamfiles = get_bamfiles(bamlist)
 
@@ -120,13 +121,15 @@ def runLumpy(args):
         sr_str = split_string(infos['split'], infos['lumpy'])
         lumpy_str += " -pe " + pe_str + " -sr " + sr_str
     lumpy_str += " | vcf-sort > " + temp_output
+    if verbose:
+        eprint(lumpy_str)
     completed = run(lumpy_str, shell=True)
 
     tabix_compress(temp_output, output, force=True)
     tabix_index(output, force=True, preset="vcf")
 
     os.chdir(oldir)
-    rmtree(tempdir)
+    # rmtree(tempdir)
 
     return completed.returncode
 
@@ -147,6 +150,8 @@ def parse_arguments():
     parser.add_argument("-t", "--threads",
                         default=1, type=int,
                         help="number of threads")
+    parser.add_argument("-v", "--verbose", action="store_true", default=False,
+                        help="increase verbosity")
 
     args = parser.parse_args()
 
diff --git a/snakecnv/lib/svsnakemake_utils.py b/snakecnv/lib/svsnakemake_utils.py
index 5d2e920..3eb6ecf 100644
--- a/snakecnv/lib/svsnakemake_utils.py
+++ b/snakecnv/lib/svsnakemake_utils.py
@@ -52,8 +52,7 @@ class SnakemakeUtils:
     def getToolOuputFile(self, wildcards):
         tool_prefix = self.get_tool_prefix(wildcards.tool,
                                            wildcards.chrom, wildcards.svtype)
-        return {'tooloutput': os.path.join(wildcards.svtype,
-                                           wildcards.tool, tool_prefix)}
+        return {'tooloutput': os.path.join(wildcards.tool, tool_prefix)}
 
     def set_tool_svtypes(self, tool, svtypes):
         self.tools_sv_types[tool] = svtypes
@@ -62,7 +61,7 @@ class SnakemakeUtils:
         inputs = []
         for tool in self.tools:
             if wildcards.svtype in self.tools_sv_types[tool]:
-                inputs.append("parse/{tool}/{tool}_{chrom}_{svtype}_parsed.vcf.gz".format(
+                inputs.append("parse/{svtype}/{tool}/{tool}_{chrom}_{svtype}_parsed.vcf.gz".format(
                     tool=tool, chrom=wildcards.chrom, svtype=wildcards.svtype
                 ))
         return inputs
-- 
GitLab