From b58a72b98b05cb1e23e7cfa5adceeac6126ebbfa Mon Sep 17 00:00:00 2001 From: Floreal Cabanettes <floreal.cabanettes@inra.fr> Date: Wed, 18 Jul 2018 11:21:33 +0200 Subject: [PATCH] Fix bugs due to new chromosomes filtering --- cnvpipelines.py | 2 +- snakecnv/popsim.snk | 20 ++++++++++++++++++++ snakecnv/referencebundle.snk | 12 ++++++++++++ 3 files changed, 33 insertions(+), 1 deletion(-) diff --git a/cnvpipelines.py b/cnvpipelines.py index 8b7e1d9..46ceaac 100755 --- a/cnvpipelines.py +++ b/cnvpipelines.py @@ -700,7 +700,7 @@ class CnvPipeline: # Get absolute paths reference = os.path.abspath(reference) - final_reference = os.path.join(self.wdir, "reference.fasta") + final_reference = os.path.join(self.wdir, "reference_raw.fasta") final_fai = final_reference + ".fai" if not os.path.exists(final_reference): os.symlink(reference, final_reference) diff --git a/snakecnv/popsim.snk b/snakecnv/popsim.snk index a37a552..f314f9e 100644 --- a/snakecnv/popsim.snk +++ b/snakecnv/popsim.snk @@ -29,6 +29,7 @@ GENOTYPES = config["genotypes"] if "genotypes" in config else None OVERLAP_CUTOFF = config["overlap_cutoff"] LEFT_PRECISION = config["left_precision"] RIGHT_PRECISION = config["right_precision"] +CHROMS = config['chromosomes'] # Functions here @@ -79,8 +80,27 @@ rule all_popsim: #expand("bams/{sample}.bam", sample=["indiv%d" % i for i in range(1, NB_INDS+1)]) #*bp_outputs +rule get_fasta: + input: + ref = REFERENCE + output: + touch(".filter-chrs") + run: + import os + import shutil + from Bio import SeqIO + + ref_raw = "raw_" + ref + shutil.move(ref, ref_raw) + with open(ref_raw, "rU") as handle, open(ref, "w") as f: + for seq in SeqIO.parse(handle, "fasta"): + if seq.id in CHROMS: + SeqIO.write([seq], f, "fasta") + os.remove(ref_raw) + rule buildpop: input: + ".filter-chrs" **bp_inputs output: *bp_outputs, diff --git a/snakecnv/referencebundle.snk b/snakecnv/referencebundle.snk index ac792f5..fa15bec 100644 --- a/snakecnv/referencebundle.snk +++ b/snakecnv/referencebundle.snk @@ -52,6 +52,18 @@ rule final: SUBDIR + PREFIX+".gendermask.bed", SUBDIR + PREFIX+".Nstretch.bed" +rule get_fasta: + input: + SUBDIR + PREFIX+"_raw.fasta" + output: + SUBDIR + PREFIX+".fasta" + run: + from Bio import SeqIO + with open(input[0], "rU") as handle, open(output[0], "w") as f: + for seq in SeqIO.parse(handle, "fasta"): + if seq.id in CHROMS: + SeqIO.write([seq], f, "fasta") + rule index: input: SUBDIR + "{type}.fasta" -- GitLab