From eb62440103fee2283569dd637b39f72781ff3a59 Mon Sep 17 00:00:00 2001 From: Floreal Cabanettes <floreal.cabanettes@inra.fr> Date: Fri, 23 Mar 2018 14:52:11 +0100 Subject: [PATCH] Fix for pysam 0.14: sort vcf file --- build_pop.py | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/build_pop.py b/build_pop.py index 3647095..1ea4a0b 100755 --- a/build_pop.py +++ b/build_pop.py @@ -189,8 +189,7 @@ def build_genotypes_vcf_list(deletions: dict, inversions: dict, output_vcf, hapl # Build VCF header: _build_vcf_header(vcf_file, prg_path, tmp_dir, nb_inds) - vcf_reader = vcf.Reader(filename=vcf_file) - vcf_writer = vcf.Writer(open(output_vcf, "w"), vcf_reader) + records = [] # Deletions: for chrm, deletes in deletions.items(): @@ -213,7 +212,7 @@ def build_genotypes_vcf_list(deletions: dict, inversions: dict, output_vcf, hapl info = {"END": delete["end"], "AF": delete["freq"]} vcf_record = vcf.model._Record(chrm, delete["start"], delete["name"], "N", [vcf.model._SV("DEL")], ".", ".", info, "GT", [0], genotypes) - vcf_writer.write_record(vcf_record) + records.append(vcf_record) # Inversions: genotypes_for_inds_INV = OrderedDict() @@ -237,9 +236,16 @@ def build_genotypes_vcf_list(deletions: dict, inversions: dict, output_vcf, hapl info = {"END": inversion["end"], "AF": inversion["freq"]} vcf_record = vcf.model._Record(chrm, inversion["start"], inversion["name"], "N", [vcf.model._SV("INV")], ".", ".", info, "GT", [0], genotypes) - vcf_writer.write_record(vcf_record) + records.append(vcf_record) - vcf_writer.close() + records.sort(key=lambda r: (r.CHROM, r.start)) + + with open(output_vcf, "w") as o_vcf: + vcf_reader = vcf.Reader(filename=vcf_file) + vcf_writer = vcf.Writer(o_vcf, vcf_reader) + for record in records: + vcf_writer.write_record(record) + vcf_writer.close() tabix_compress(output_vcf, output_vcf + ".gz", True) tabix_index(output_vcf + ".gz", force=True, preset="vcf") -- GitLab