From 2e9b9abb0daba2abb191f493662fe5c7771e27ff Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Flor=C3=A9al=20Cabanettes?= <floreal.cabanettes@inra.fr> Date: Thu, 6 Apr 2017 11:32:05 +0200 Subject: [PATCH] Bgzip+tabix genotypes simulated vcf file --- build_pop.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/build_pop.py b/build_pop.py index 04f1335..f03e253 100755 --- a/build_pop.py +++ b/build_pop.py @@ -112,7 +112,8 @@ with open(os.path.join(prg_path, "template.vcf"), "r") as template: with open(os.path.join(tmp_dir, "reference-sv.bed"), "r") as bed: vcf_reader = vcf.Reader(filename=os.path.join(tmp_dir, 'template.vcf')) - vcf_writer = vcf.Writer(open(os.path.join(output_dir, "genotypes.vcf"), "w"), vcf_reader) + output_vcf = os.path.join(output_dir, "genotypes.vcf") + vcf_writer = vcf.Writer(open(output_vcf, "w"), vcf_reader) for line in bed: parts = line.replace("\n", "").split("\t") freq = float(parts[4]) @@ -136,6 +137,13 @@ with open(os.path.join(tmp_dir, "reference-sv.bed"), "r") as bed: vcf_writer.write_record(vcf_record) vcf_writer.close() + # Bgzip + tabix: + os.system("bgzip -c " + output_vcf + " > " + output_vcf + ".gz") + os.unlink(output_vcf) + output_vcf += ".gz" + os.system("tabix -p vcf " + output_vcf) + + ############################################### # Build fasta chromosomes for each individual # -- GitLab