Newer
Older
ROOTPATH = os.path.dirname(os.path.dirname(workflow.snakefile))
sys.path.insert(0, os.path.join(ROOTPATH, "lib"))
os.environ["PYTHONPATH"] = os.path.join(ROOTPATH, "lib")
os.environ["PATH"] = ":".join([os.path.join(ROOTPATH, "bin"), os.path.join(ROOTPATH, "popsim"), os.environ["PATH"]])
WDIR = config['wdir']
workdir: WDIR
NB_INDS = config["nb_inds"]
REFERENCE = config["reference"]
REFBUNDLE = config["refbundle"] if "refbundle" in config else None
NSTRETCHES = "exclusion/gaps.bed"
SV_LIST = config["sv_list"] if "sv_list" in config else None
COVERAGE = config["coverage"]
FORCE_POLYMORPHISME = config["force_polymorphism"]
HAPLOID = config["haploid"]
PROBA_DEL = config["proba_del"]
PROBA_INV = config["proba_inv"]
Mathieu Charles
committed
PROBA_DUP = config["proba_dup"]
READ_LEN = config["read_len"]
INSERT_LEN_MEAN = config["insert_len_mean"]
INSERT_LEN_SD = config["insert_len_sd"]
MIN_DELETIONS = config["min_deletions"]
MIN_INVERSIONS = config["min_inversions"]
Mathieu Charles
committed
MIN_DUPLICATIONS = config["min_duplications"]
FREQ_DEL = config["freq_del"]
FREQ_INV = config["freq_inv"]
Mathieu Charles
committed
FREQ_DUP = config["freq_dup"]
MAX_TRY = config["max_try"]
GENOTYPES = config["genotypes"] if "genotypes" in config else None
OVERLAP_CUTOFF = config["overlap_cutoff"]
LEFT_PRECISION = config["left_precision"]
RIGHT_PRECISION = config["right_precision"]
CHROMS = config['chromosomes']
# Functions here
def buildpop_inputs():
inputs = {
"reference": REFERENCE
}
params = {
"nstretches": False,
"genotypes": False,
"sv_list": False
}
if NSTRETCHES is not None:
inputs["nstretches"] = NSTRETCHES
params["nstretches"] = True
if GENOTYPES is not None:
inputs["genotypes"] = GENOTYPES
params["genotypes"] = True
if SV_LIST is not None:
inputs["sv_list"] = SV_LIST
params["sv_list"] = True
return inputs, params
def buildpop_outputs():
outputs = []
for n in range(1, NB_INDS+1):
outputs += [os.path.abspath(os.path.join("pop", "indiv%d_1.fq.gz" % n)), os.path.abspath(os.path.join("pop", "indiv%d_2.fq.gz" % n))]
return outputs
bp_inputs, bp_params = buildpop_inputs()
bp_outputs = buildpop_outputs()
include: "referencebundle.snk"
include: "detection.snk"
expand("results/{svtype}/Summarized_results_{svtype}.ipynb", svtype=config["varianttypes"]),
expand("results/{svtype}/Summarized_results_{svtype}.html", svtype=config["varianttypes"]),
#set_all_inputs
#expand("bams/{sample}.bam", sample=["indiv%d" % i for i in range(1, NB_INDS+1)])
run:
from Bio import SeqIO
with open(input[0], "rU") as handle, open(output[0], "w") as f:
for seq in SeqIO.parse(handle, "fasta"):
if seq.id in CHROMS:
SeqIO.write([seq], f, "fasta")
os.remove(input[0])
i_fai = input[0] + ".fai"
if os.path.exists(i_fai):
os.remove(i_fai)
rule index_fasta:
input:
"reference.fasta"
output:
"reference.fasta.fai"
log:
"logs/index_fasta.log"
shell:
"samtools faidx {input} &> {log}"
*bp_outputs,
os.path.join("pop", "genotypes.vcf.gz")
nb_inds = NB_INDS,
out_dir = "pop",
coverage = COVERAGE,
force_polymorphism = FORCE_POLYMORPHISME,
haploid = HAPLOID,
proba_del = PROBA_DEL,
proba_inv = PROBA_INV,
Mathieu Charles
committed
proba_dup = PROBA_DUP,
read_len = READ_LEN,
insert_len_mean = INSERT_LEN_MEAN,
insert_len_sd = INSERT_LEN_SD,
min_deletions = MIN_DELETIONS,
min_inversions = MIN_INVERSIONS,
Mathieu Charles
committed
min_duplications = MIN_DUPLICATIONS,
freq_del = FREQ_DEL,
Mathieu Charles
committed
freq_dup = FREQ_DUP,
max_try = MAX_TRY
threads:
8
log:
stdout = "logs/popsim.o",
stderr = "logs/popsim.e"
run:
command = ["build_pop.py",
"-n", str(params.nb_inds),
"-r", input.reference,
"-c", str(params.coverage),
"-o", params.out_dir,
"-pd", str(params.proba_del),
"-pi", str(params.proba_inv),
Mathieu Charles
committed
"-pu", str(params.proba_dup),
"-l", str(params.read_len),
"-m", str(params.insert_len_mean),
"-v", str(params.insert_len_sd),
"-q",
"-md", str(params.min_deletions),
"-mi", str(params.min_inversions),
Mathieu Charles
committed
"-mu", str(params.min_duplications),
"-fd", " ".join(map(str,params.freq_del)),
"-fi", " ".join(map(str,params.freq_inv)),
Mathieu Charles
committed
"-fu", " ".join(map(str,params.freq_dup)),
"--max-try", str(params.max_try),
"-t", str(threads),
"-e"]
if hasattr(input, "nstretches"):
command += ["-ns", input.nstretches]
if hasattr(input, "sv_list"):
command += ["-s", input.sv_list]
if params.force_polymorphism:
command.append("-f")
if params.haploid:
command.append("-a")
shell(" ".join(command) + " 1> %s 2> %s" % (log.stdout, log.stderr))
rule linkdatabams:
input:
bam="bams/{sample}.bam",
bai="bams/{sample}.bam.bai"
output:
bam="data/bams/{sample}.bam",
bai="data/bams/{sample}.bam.bai"
run:
os.symlink(os.path.abspath(input.bam), output.bam)
os.symlink(os.path.abspath(input.bai), output.bai)
rule buildinfilesresults:
input:
genotypes = expand("{batch}/genotypes/{{svtype}}/svtyper_{chrom}_{{svtype}}_genotypes.vcf.gz",
chrom=chromosomes,
batch=samples.keys()),
filtered = expand("{batch}/filtered/{{svtype}}/svtyper_{chrom}_{{svtype}}_genotypes_filtered.vcf.gz",
chrom=chromosomes,
batch=samples.keys())
output:
genotypes = "list_files/tools_results_files_{svtype}.list",
filtered = "list_files/filtered_results_files_{svtype}.list"
threads:
1
run:
with open(output.genotypes, "w") as o_gt:
for i_gt in input.genotypes:
o_gt.write("%s\n" % i_gt)
with open(output.filtered, "w") as o_ft:
for i_ft in input.filtered:
o_ft.write("%s\n" % i_ft)
rule buildresults:
input:
genotypes = "list_files/tools_results_files_{svtype}.list",
filtered = "list_files/filtered_results_files_{svtype}.list",
true_vcf = "pop/genotypes.vcf.gz"
output:
"results/{svtype}/Summarized_results_{svtype}.ipynb",
"results/{svtype}/Summarized_results_{svtype}.html"
params:
overlap_cutoff = OVERLAP_CUTOFF,
left_precision = LEFT_PRECISION,
right_precision = RIGHT_PRECISION,
svlist = SV_LIST,
haploid = HAPLOID
threads:
1
log:
stdout = "logs/results_{svtype}.o",
stderr = "logs/results_{svtype}.e"
Mathieu Charles
committed
shell:
"""
touch results/{wildcards.svtype}/Summarized_results_{wildcards.svtype}.ipynb
touch results/{wildcards.svtype}/Summarized_results_{wildcards.svtype}.html
"""