From 2e9b9abb0daba2abb191f493662fe5c7771e27ff Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Flor=C3=A9al=20Cabanettes?= <floreal.cabanettes@inra.fr>
Date: Thu, 6 Apr 2017 11:32:05 +0200
Subject: [PATCH] Bgzip+tabix genotypes simulated vcf file

---
 build_pop.py | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/build_pop.py b/build_pop.py
index 04f1335..f03e253 100755
--- a/build_pop.py
+++ b/build_pop.py
@@ -112,7 +112,8 @@ with open(os.path.join(prg_path, "template.vcf"), "r") as template:
 
 with open(os.path.join(tmp_dir, "reference-sv.bed"), "r") as bed:
     vcf_reader = vcf.Reader(filename=os.path.join(tmp_dir, 'template.vcf'))
-    vcf_writer = vcf.Writer(open(os.path.join(output_dir, "genotypes.vcf"), "w"), vcf_reader)
+    output_vcf = os.path.join(output_dir, "genotypes.vcf")
+    vcf_writer = vcf.Writer(open(output_vcf, "w"), vcf_reader)
     for line in bed:
         parts = line.replace("\n", "").split("\t")
         freq = float(parts[4])
@@ -136,6 +137,13 @@ with open(os.path.join(tmp_dir, "reference-sv.bed"), "r") as bed:
         vcf_writer.write_record(vcf_record)
     vcf_writer.close()
 
+    # Bgzip + tabix:
+    os.system("bgzip -c " + output_vcf + " > " + output_vcf + ".gz")
+    os.unlink(output_vcf)
+    output_vcf += ".gz"
+    os.system("tabix -p vcf " + output_vcf)
+
+
 
 ###############################################
 # Build fasta chromosomes for each individual #
-- 
GitLab