From fc5c8906bf44edd1d783215c3c6dbc891e9e0c96 Mon Sep 17 00:00:00 2001 From: tfaraut <Thomas.Faraut@inra.fr> Date: Thu, 12 Apr 2018 16:04:29 +0200 Subject: [PATCH] readlength corrected --- snakecnv/Snakefile | 5 +++-- snakecnv/bin/detect_regions_to_exclude.py | 12 +++++++----- snakecnv/bin/lumpy.py | 11 ++++++++--- snakecnv/lib/svsnakemake_utils.py | 5 ++--- 4 files changed, 20 insertions(+), 13 deletions(-) diff --git a/snakecnv/Snakefile b/snakecnv/Snakefile index a5f4415..df897d0 100644 --- a/snakecnv/Snakefile +++ b/snakecnv/Snakefile @@ -102,7 +102,7 @@ else: chromosomes = config["chromosomes"].split(",") # list of variants to be detected -varianttypes = ['DEL', 'INV', 'DUP'] +varianttypes = ['DEL', 'INV'] snakemake_utils = SnakemakeUtils(tools) @@ -179,7 +179,8 @@ rule index: rule excluded: input: - expand("data/bams/{sample}.bam", sample=samples) + expand("data/bams/{sample}.bam", sample=samples), + expand("data/bams/{sample}.bam.bai", sample=samples) output: "exclusion/excluded.bed" params: diff --git a/snakecnv/bin/detect_regions_to_exclude.py b/snakecnv/bin/detect_regions_to_exclude.py index e70d33d..c19e1ab 100755 --- a/snakecnv/bin/detect_regions_to_exclude.py +++ b/snakecnv/bin/detect_regions_to_exclude.py @@ -7,7 +7,9 @@ from shutil import rmtree import multiprocessing from functools import partial + import numpy as np +from scipy import stats import pysam import pysamstats @@ -25,11 +27,11 @@ def get_bamcoverage(bamfile, fafile, chrom, window_size=20): def get_coverage_cutoff(coverage): counts = coverage.reads_all[np.where(coverage.reads_all)] - # min_c = min(counts) - # max_c = max(counts) - mean_c = np.average(counts) - # stdev_c = np.std(counts) - return 6*mean_c + + mode = stats.mode(counts) + stdev = np.std(counts) + # recommanded by SpeedSeq + return mode[0] + 3 * stdev def dump_to_bed(overcovered_regions, fout, window_size): diff --git a/snakecnv/bin/lumpy.py b/snakecnv/bin/lumpy.py index 85555d8..17885f4 100755 --- a/snakecnv/bin/lumpy.py +++ b/snakecnv/bin/lumpy.py @@ -49,7 +49,8 @@ def get_sample_desc_files(discsplit_files, insert_sizes): samples[sample]['lumpy'] = {'id': sample, 'mean': mean, 'stdev': stdev, - 'readlength': readlength, + 'read_length': readlength, + 'min_non_overlap': readlength, 'histo_file': histo_file} return samples @@ -93,9 +94,9 @@ def split_string(split_file, sample_info, def runLumpy(args): bamlist = args.bamlist chrom = args.chrom - readlength = args.readlength output = file_abs_path(args.output) threads = args.threads + verbose = args.verbose bamfiles = get_bamfiles(bamlist) @@ -120,13 +121,15 @@ def runLumpy(args): sr_str = split_string(infos['split'], infos['lumpy']) lumpy_str += " -pe " + pe_str + " -sr " + sr_str lumpy_str += " | vcf-sort > " + temp_output + if verbose: + eprint(lumpy_str) completed = run(lumpy_str, shell=True) tabix_compress(temp_output, output, force=True) tabix_index(output, force=True, preset="vcf") os.chdir(oldir) - rmtree(tempdir) + # rmtree(tempdir) return completed.returncode @@ -147,6 +150,8 @@ def parse_arguments(): parser.add_argument("-t", "--threads", default=1, type=int, help="number of threads") + parser.add_argument("-v", "--verbose", action="store_true", default=False, + help="increase verbosity") args = parser.parse_args() diff --git a/snakecnv/lib/svsnakemake_utils.py b/snakecnv/lib/svsnakemake_utils.py index 5d2e920..3eb6ecf 100644 --- a/snakecnv/lib/svsnakemake_utils.py +++ b/snakecnv/lib/svsnakemake_utils.py @@ -52,8 +52,7 @@ class SnakemakeUtils: def getToolOuputFile(self, wildcards): tool_prefix = self.get_tool_prefix(wildcards.tool, wildcards.chrom, wildcards.svtype) - return {'tooloutput': os.path.join(wildcards.svtype, - wildcards.tool, tool_prefix)} + return {'tooloutput': os.path.join(wildcards.tool, tool_prefix)} def set_tool_svtypes(self, tool, svtypes): self.tools_sv_types[tool] = svtypes @@ -62,7 +61,7 @@ class SnakemakeUtils: inputs = [] for tool in self.tools: if wildcards.svtype in self.tools_sv_types[tool]: - inputs.append("parse/{tool}/{tool}_{chrom}_{svtype}_parsed.vcf.gz".format( + inputs.append("parse/{svtype}/{tool}/{tool}_{chrom}_{svtype}_parsed.vcf.gz".format( tool=tool, chrom=wildcards.chrom, svtype=wildcards.svtype )) return inputs -- GitLab