Skip to content
Snippets Groups Projects
Commit fd66bc96 authored by Thomas Faraut's avatar Thomas Faraut
Browse files

- delly threads downgraded to 6

- genomestrip .Nstrech file not any more required
- pindel max_xrange 2 removed
- excluded is now preformed for each sample
parent 9f69701c
No related branches found
No related tags found
No related merge requests found
...@@ -4,15 +4,17 @@ import sys ...@@ -4,15 +4,17 @@ import sys
import re import re
import os import os
import pysam import pysam
from functions import get_samples
from os.path import join, isfile from os.path import join, isfile
from itertools import chain
import glob
from functions import get_samples
ROOTPATH = os.path.dirname(workflow.snakefile) ROOTPATH = os.path.dirname(workflow.snakefile)
sys.path.insert(0, os.path.join(ROOTPATH, "lib")) sys.path.insert(0, os.path.join(ROOTPATH, "lib"))
os.environ["PYTHONPATH"] = os.path.join(ROOTPATH, "lib") os.environ["PYTHONPATH"] = os.path.join(ROOTPATH, "lib")
from svsnakemake_utils import SnakemakeUtils from svsnakemake_utils import SnakemakeUtils
size_batches = int(config["batches"] if "batches" in config else 1) size_batches = int(config["batches"] if "batches" in config else 1)
...@@ -48,7 +50,18 @@ def get_excluded(config): ...@@ -48,7 +50,18 @@ def get_excluded(config):
return "" return ""
def get_all_samples(wdir):
files = glob.glob(wdir + "/samples.list.*")
samples = []
for f in files:
with open(f) as fin:
for line in fin:
samples.append(line.rstrip())
return samples
def get_chr_batches(ref_file, chr, chr_batchsize=5000000, overlap=50000): def get_chr_batches(ref_file, chr, chr_batchsize=5000000, overlap=50000):
fa = pysam.FastaFile(ref_file) fa = pysam.FastaFile(ref_file)
len_chr = len(fa.fetch(chr)) len_chr = len(fa.fetch(chr))
...@@ -84,7 +97,10 @@ EMPTY_TEMPLATE = os.path.join(ROOTPATH, "lib", "svreader", "resources", "templat ...@@ -84,7 +97,10 @@ EMPTY_TEMPLATE = os.path.join(ROOTPATH, "lib", "svreader", "resources", "templat
config['read_length'] = 100 config['read_length'] = 100
# list of samples # list of all samples
all_samples = get_all_samples(WDIR)
# list of samples as a dictionnary with batch as keys
samples = get_samples(config['sample_file'], size_batches, config['start_batch_nb'] if 'start_batch_nb' in config else 1) samples = get_samples(config['sample_file'], size_batches, config['start_batch_nb'] if 'start_batch_nb' in config else 1)
# list of tools # list of tools
...@@ -207,23 +223,35 @@ if "no_detection_index" not in config or not config["no_detection_index"]: ...@@ -207,23 +223,35 @@ if "no_detection_index" not in config or not config["no_detection_index"]:
shell: shell:
"samtools index -@{threads} {input}" "samtools index -@{threads} {input}"
rule mergeexcluded:
input:
expand("exclusion/excluded_{sample}.bed", sample=all_samples),
output:
"exclusion/excluded.bed"
log:
stdout = "logs/exclusion/exclusion.o",
stderr = "logs/exclusion/exclusion.e"
shell:
"cat {input} | bedtools sort | bedtools merge > {output}"
rule excluded: rule excluded:
input: input:
single_bam = get_first_bam(samples), bam = "data/bams/{sample}.bam",
single_bai = get_first_bam(samples) + ".bai", index = "data/bams/{sample}.bam.bai",
reference = REFERENCE, reference = REFERENCE,
fai = REFERENCE + ".fai" fai = REFERENCE + ".fai"
output: output:
"exclusion/excluded.bed" "exclusion/excluded_{sample}.bed"
params: params:
chromosomes = ",".join(chromosomes) chromosomes = ",".join(chromosomes)
threads: threads:
get_threads("excluded", 8) get_threads("excluded", 4)
log: log:
stdout = "logs/exclusion/exclusion.o", stdout = "logs/exclusion/exclusion_{sample}.o",
stderr = "logs/exclusion/exclusion.e" stderr = "logs/exclusion/exclusion_{sample}.e"
shell: shell:
"detect_regions_to_exclude.py -b {input.single_bam}" "detect_regions_to_exclude.py -b {input.bam}"
" -f {input.reference}" " -f {input.reference}"
" -c {params.chromosomes}" " -c {params.chromosomes}"
" -o {output}" " -o {output}"
......
delly_sv=['DEL', 'INV', 'DUP'] delly_sv = ['DEL', 'INV', 'DUP']
rule delly: rule delly:
input: input:
bamlist="{batch}/bamlist.list", bamlist = "{batch}/bamlist.list",
excluded="exclusion/excluded.bed", excluded = "exclusion/excluded.bed",
reference = REFERENCE, reference = REFERENCE,
fai = REFERENCE + ".fai" fai = REFERENCE + ".fai"
output: output:
...@@ -12,7 +12,7 @@ rule delly: ...@@ -12,7 +12,7 @@ rule delly:
template = EMPTY_TEMPLATE, template = EMPTY_TEMPLATE,
reference = os.path.abspath(REFERENCE) reference = os.path.abspath(REFERENCE)
threads: threads:
get_threads("delly", 8) get_threads("delly", 6)
log: log:
stdout = "{batch}/logs/delly/{chrom}_{svtype}.o", stdout = "{batch}/logs/delly/{chrom}_{svtype}.o",
stderr = "{batch}/logs/delly/{chrom}_{svtype}.e" stderr = "{batch}/logs/delly/{chrom}_{svtype}.e"
......
...@@ -25,7 +25,7 @@ if "genomestrip" in tools: ...@@ -25,7 +25,7 @@ if "genomestrip" in tools:
PREFIX+".dict", PREFIX+".dict",
PREFIX+".ploidymap.txt", PREFIX+".ploidymap.txt",
PREFIX+".gendermask.bed", PREFIX+".gendermask.bed",
PREFIX+".Nstretch.bed", # PREFIX+".Nstretch.bed",
unpack(snakemake_utils.get_inputs_bams), unpack(snakemake_utils.get_inputs_bams),
bamlist = "{batch}/bamlist.list", bamlist = "{batch}/bamlist.list",
gendermap = "{batch}/genomestrip/gendermap.txt", gendermap = "{batch}/genomestrip/gendermap.txt",
......
...@@ -34,16 +34,17 @@ rule pindel: ...@@ -34,16 +34,17 @@ rule pindel:
"{batch}/pindel/{chrom}/{chrbatch}/pindel_{chrom}_INV.gz", "{batch}/pindel/{chrom}/{chrbatch}/pindel_{chrom}_INV.gz",
"{batch}/pindel/{chrom}/{chrbatch}/pindel_{chrom}_TD.gz", "{batch}/pindel/{chrom}/{chrbatch}/pindel_{chrom}_TD.gz",
threads: threads:
get_threads("pindel", 4) get_threads("pindel", 5)
log: log:
stdout = "{batch}/logs/pindel/{chrom}_{chrbatch}.o", stdout = "{batch}/logs/pindel/{chrom}_{chrbatch}.o",
stderr = "{batch}/logs/pindel/{chrom}_{chrbatch}.e" stderr = "{batch}/logs/pindel/{chrom}_{chrbatch}.e"
shell: shell:
"pindel -f {input.genome} -i {input.statfile}" "pindel -f {input.genome} -i {input.statfile}"
" --min_perfect_match_around_BP 20 --max_range_index 2" " --min_perfect_match_around_BP 20 "
" --exclude {input.excluded}" " --exclude {input.excluded}"
" --number_of_threads {threads}" " --number_of_threads {threads}"
" --report_interchromosomal_events false" " --report_interchromosomal_events false"
" --window_size 1" # 1Mb
" --minimum_support_for_event 3 -c {wildcards.chrom}:{wildcards.chrbatch}" " --minimum_support_for_event 3 -c {wildcards.chrom}:{wildcards.chrbatch}"
" -o {wildcards.batch}/pindel/{wildcards.chrom}/{wildcards.chrbatch}/pindel_{wildcards.chrom}" " -o {wildcards.batch}/pindel/{wildcards.chrom}/{wildcards.chrbatch}/pindel_{wildcards.chrom}"
" 1>{log.stdout} 2>{log.stderr}" " 1>{log.stdout} 2>{log.stderr}"
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment