From b58a72b98b05cb1e23e7cfa5adceeac6126ebbfa Mon Sep 17 00:00:00 2001
From: Floreal Cabanettes <floreal.cabanettes@inra.fr>
Date: Wed, 18 Jul 2018 11:21:33 +0200
Subject: [PATCH] Fix bugs due to new chromosomes filtering

---
 cnvpipelines.py              |  2 +-
 snakecnv/popsim.snk          | 20 ++++++++++++++++++++
 snakecnv/referencebundle.snk | 12 ++++++++++++
 3 files changed, 33 insertions(+), 1 deletion(-)

diff --git a/cnvpipelines.py b/cnvpipelines.py
index 8b7e1d9..46ceaac 100755
--- a/cnvpipelines.py
+++ b/cnvpipelines.py
@@ -700,7 +700,7 @@ class CnvPipeline:
             # Get absolute paths
             reference = os.path.abspath(reference)
 
-            final_reference = os.path.join(self.wdir, "reference.fasta")
+            final_reference = os.path.join(self.wdir, "reference_raw.fasta")
             final_fai = final_reference + ".fai"
             if not os.path.exists(final_reference):
                 os.symlink(reference, final_reference)
diff --git a/snakecnv/popsim.snk b/snakecnv/popsim.snk
index a37a552..f314f9e 100644
--- a/snakecnv/popsim.snk
+++ b/snakecnv/popsim.snk
@@ -29,6 +29,7 @@ GENOTYPES = config["genotypes"] if "genotypes" in config else None
 OVERLAP_CUTOFF = config["overlap_cutoff"]
 LEFT_PRECISION = config["left_precision"]
 RIGHT_PRECISION = config["right_precision"]
+CHROMS = config['chromosomes']
 
 
 # Functions here
@@ -79,8 +80,27 @@ rule all_popsim:
         #expand("bams/{sample}.bam", sample=["indiv%d" % i for i in range(1, NB_INDS+1)])
         #*bp_outputs
 
+rule get_fasta:
+    input:
+        ref = REFERENCE
+    output:
+        touch(".filter-chrs")
+    run:
+        import os
+        import shutil
+        from Bio import SeqIO
+
+        ref_raw = "raw_" + ref
+        shutil.move(ref, ref_raw)
+        with open(ref_raw, "rU") as handle, open(ref, "w") as f:
+            for seq in SeqIO.parse(handle, "fasta"):
+                if seq.id in CHROMS:
+                    SeqIO.write([seq], f, "fasta")
+        os.remove(ref_raw)
+
 rule buildpop:
     input:
+        ".filter-chrs"
         **bp_inputs
     output:
         *bp_outputs,
diff --git a/snakecnv/referencebundle.snk b/snakecnv/referencebundle.snk
index ac792f5..fa15bec 100644
--- a/snakecnv/referencebundle.snk
+++ b/snakecnv/referencebundle.snk
@@ -52,6 +52,18 @@ rule final:
         SUBDIR + PREFIX+".gendermask.bed",
         SUBDIR + PREFIX+".Nstretch.bed"
 
+rule get_fasta:
+    input:
+        SUBDIR + PREFIX+"_raw.fasta"
+    output:
+        SUBDIR + PREFIX+".fasta"
+    run:
+        from Bio import SeqIO
+        with open(input[0], "rU") as handle, open(output[0], "w") as f:
+            for seq in SeqIO.parse(handle, "fasta"):
+                if seq.id in CHROMS:
+                    SeqIO.write([seq], f, "fasta")
+
 rule index:
     input:
         SUBDIR + "{type}.fasta"
-- 
GitLab