Skip to content
Snippets Groups Projects
Commit fd66bc96 authored by Thomas Faraut's avatar Thomas Faraut
Browse files

- delly threads downgraded to 6

- genomestrip .Nstrech file not any more required
- pindel max_xrange 2 removed
- excluded is now preformed for each sample
parent 9f69701c
No related branches found
No related tags found
No related merge requests found
......@@ -4,15 +4,17 @@ import sys
import re
import os
import pysam
from functions import get_samples
from os.path import join, isfile
from itertools import chain
import glob
from functions import get_samples
ROOTPATH = os.path.dirname(workflow.snakefile)
sys.path.insert(0, os.path.join(ROOTPATH, "lib"))
os.environ["PYTHONPATH"] = os.path.join(ROOTPATH, "lib")
from svsnakemake_utils import SnakemakeUtils
size_batches = int(config["batches"] if "batches" in config else 1)
......@@ -48,7 +50,18 @@ def get_excluded(config):
return ""
def get_all_samples(wdir):
files = glob.glob(wdir + "/samples.list.*")
samples = []
for f in files:
with open(f) as fin:
for line in fin:
samples.append(line.rstrip())
return samples
def get_chr_batches(ref_file, chr, chr_batchsize=5000000, overlap=50000):
fa = pysam.FastaFile(ref_file)
len_chr = len(fa.fetch(chr))
......@@ -84,7 +97,10 @@ EMPTY_TEMPLATE = os.path.join(ROOTPATH, "lib", "svreader", "resources", "templat
config['read_length'] = 100
# list of samples
# list of all samples
all_samples = get_all_samples(WDIR)
# list of samples as a dictionnary with batch as keys
samples = get_samples(config['sample_file'], size_batches, config['start_batch_nb'] if 'start_batch_nb' in config else 1)
# list of tools
......@@ -207,23 +223,35 @@ if "no_detection_index" not in config or not config["no_detection_index"]:
shell:
"samtools index -@{threads} {input}"
rule mergeexcluded:
input:
expand("exclusion/excluded_{sample}.bed", sample=all_samples),
output:
"exclusion/excluded.bed"
log:
stdout = "logs/exclusion/exclusion.o",
stderr = "logs/exclusion/exclusion.e"
shell:
"cat {input} | bedtools sort | bedtools merge > {output}"
rule excluded:
input:
single_bam = get_first_bam(samples),
single_bai = get_first_bam(samples) + ".bai",
bam = "data/bams/{sample}.bam",
index = "data/bams/{sample}.bam.bai",
reference = REFERENCE,
fai = REFERENCE + ".fai"
output:
"exclusion/excluded.bed"
"exclusion/excluded_{sample}.bed"
params:
chromosomes = ",".join(chromosomes)
threads:
get_threads("excluded", 8)
get_threads("excluded", 4)
log:
stdout = "logs/exclusion/exclusion.o",
stderr = "logs/exclusion/exclusion.e"
stdout = "logs/exclusion/exclusion_{sample}.o",
stderr = "logs/exclusion/exclusion_{sample}.e"
shell:
"detect_regions_to_exclude.py -b {input.single_bam}"
"detect_regions_to_exclude.py -b {input.bam}"
" -f {input.reference}"
" -c {params.chromosomes}"
" -o {output}"
......
delly_sv=['DEL', 'INV', 'DUP']
delly_sv = ['DEL', 'INV', 'DUP']
rule delly:
input:
bamlist="{batch}/bamlist.list",
excluded="exclusion/excluded.bed",
bamlist = "{batch}/bamlist.list",
excluded = "exclusion/excluded.bed",
reference = REFERENCE,
fai = REFERENCE + ".fai"
output:
......@@ -12,7 +12,7 @@ rule delly:
template = EMPTY_TEMPLATE,
reference = os.path.abspath(REFERENCE)
threads:
get_threads("delly", 8)
get_threads("delly", 6)
log:
stdout = "{batch}/logs/delly/{chrom}_{svtype}.o",
stderr = "{batch}/logs/delly/{chrom}_{svtype}.e"
......
......@@ -25,7 +25,7 @@ if "genomestrip" in tools:
PREFIX+".dict",
PREFIX+".ploidymap.txt",
PREFIX+".gendermask.bed",
PREFIX+".Nstretch.bed",
# PREFIX+".Nstretch.bed",
unpack(snakemake_utils.get_inputs_bams),
bamlist = "{batch}/bamlist.list",
gendermap = "{batch}/genomestrip/gendermap.txt",
......
......@@ -34,16 +34,17 @@ rule pindel:
"{batch}/pindel/{chrom}/{chrbatch}/pindel_{chrom}_INV.gz",
"{batch}/pindel/{chrom}/{chrbatch}/pindel_{chrom}_TD.gz",
threads:
get_threads("pindel", 4)
get_threads("pindel", 5)
log:
stdout = "{batch}/logs/pindel/{chrom}_{chrbatch}.o",
stderr = "{batch}/logs/pindel/{chrom}_{chrbatch}.e"
shell:
"pindel -f {input.genome} -i {input.statfile}"
" --min_perfect_match_around_BP 20 --max_range_index 2"
" --min_perfect_match_around_BP 20 "
" --exclude {input.excluded}"
" --number_of_threads {threads}"
" --report_interchromosomal_events false"
" --window_size 1" # 1Mb
" --minimum_support_for_event 3 -c {wildcards.chrom}:{wildcards.chrbatch}"
" -o {wildcards.batch}/pindel/{wildcards.chrom}/{wildcards.chrbatch}/pindel_{wildcards.chrom}"
" 1>{log.stdout} 2>{log.stderr}"
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment