Skip to content
Snippets Groups Projects
Commit fc5c8906 authored by Thomas Faraut's avatar Thomas Faraut
Browse files

readlength corrected

parent 22f3bb62
No related branches found
No related tags found
No related merge requests found
......@@ -102,7 +102,7 @@ else:
chromosomes = config["chromosomes"].split(",")
# list of variants to be detected
varianttypes = ['DEL', 'INV', 'DUP']
varianttypes = ['DEL', 'INV']
snakemake_utils = SnakemakeUtils(tools)
......@@ -179,7 +179,8 @@ rule index:
rule excluded:
input:
expand("data/bams/{sample}.bam", sample=samples)
expand("data/bams/{sample}.bam", sample=samples),
expand("data/bams/{sample}.bam.bai", sample=samples)
output:
"exclusion/excluded.bed"
params:
......
......@@ -7,7 +7,9 @@ from shutil import rmtree
import multiprocessing
from functools import partial
import numpy as np
from scipy import stats
import pysam
import pysamstats
......@@ -25,11 +27,11 @@ def get_bamcoverage(bamfile, fafile, chrom, window_size=20):
def get_coverage_cutoff(coverage):
counts = coverage.reads_all[np.where(coverage.reads_all)]
# min_c = min(counts)
# max_c = max(counts)
mean_c = np.average(counts)
# stdev_c = np.std(counts)
return 6*mean_c
mode = stats.mode(counts)
stdev = np.std(counts)
# recommanded by SpeedSeq
return mode[0] + 3 * stdev
def dump_to_bed(overcovered_regions, fout, window_size):
......
......@@ -49,7 +49,8 @@ def get_sample_desc_files(discsplit_files, insert_sizes):
samples[sample]['lumpy'] = {'id': sample,
'mean': mean,
'stdev': stdev,
'readlength': readlength,
'read_length': readlength,
'min_non_overlap': readlength,
'histo_file': histo_file}
return samples
......@@ -93,9 +94,9 @@ def split_string(split_file, sample_info,
def runLumpy(args):
bamlist = args.bamlist
chrom = args.chrom
readlength = args.readlength
output = file_abs_path(args.output)
threads = args.threads
verbose = args.verbose
bamfiles = get_bamfiles(bamlist)
......@@ -120,13 +121,15 @@ def runLumpy(args):
sr_str = split_string(infos['split'], infos['lumpy'])
lumpy_str += " -pe " + pe_str + " -sr " + sr_str
lumpy_str += " | vcf-sort > " + temp_output
if verbose:
eprint(lumpy_str)
completed = run(lumpy_str, shell=True)
tabix_compress(temp_output, output, force=True)
tabix_index(output, force=True, preset="vcf")
os.chdir(oldir)
rmtree(tempdir)
# rmtree(tempdir)
return completed.returncode
......@@ -147,6 +150,8 @@ def parse_arguments():
parser.add_argument("-t", "--threads",
default=1, type=int,
help="number of threads")
parser.add_argument("-v", "--verbose", action="store_true", default=False,
help="increase verbosity")
args = parser.parse_args()
......
......@@ -52,8 +52,7 @@ class SnakemakeUtils:
def getToolOuputFile(self, wildcards):
tool_prefix = self.get_tool_prefix(wildcards.tool,
wildcards.chrom, wildcards.svtype)
return {'tooloutput': os.path.join(wildcards.svtype,
wildcards.tool, tool_prefix)}
return {'tooloutput': os.path.join(wildcards.tool, tool_prefix)}
def set_tool_svtypes(self, tool, svtypes):
self.tools_sv_types[tool] = svtypes
......@@ -62,7 +61,7 @@ class SnakemakeUtils:
inputs = []
for tool in self.tools:
if wildcards.svtype in self.tools_sv_types[tool]:
inputs.append("parse/{tool}/{tool}_{chrom}_{svtype}_parsed.vcf.gz".format(
inputs.append("parse/{svtype}/{tool}/{tool}_{chrom}_{svtype}_parsed.vcf.gz".format(
tool=tool, chrom=wildcards.chrom, svtype=wildcards.svtype
))
return inputs
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment