diff --git a/build_pop.py b/build_pop.py index 364709523562da96c8a5190a733281e630eee5af..1ea4a0b0352ec8c20d30ae65c9cb92238971e51a 100755 --- a/build_pop.py +++ b/build_pop.py @@ -189,8 +189,7 @@ def build_genotypes_vcf_list(deletions: dict, inversions: dict, output_vcf, hapl # Build VCF header: _build_vcf_header(vcf_file, prg_path, tmp_dir, nb_inds) - vcf_reader = vcf.Reader(filename=vcf_file) - vcf_writer = vcf.Writer(open(output_vcf, "w"), vcf_reader) + records = [] # Deletions: for chrm, deletes in deletions.items(): @@ -213,7 +212,7 @@ def build_genotypes_vcf_list(deletions: dict, inversions: dict, output_vcf, hapl info = {"END": delete["end"], "AF": delete["freq"]} vcf_record = vcf.model._Record(chrm, delete["start"], delete["name"], "N", [vcf.model._SV("DEL")], ".", ".", info, "GT", [0], genotypes) - vcf_writer.write_record(vcf_record) + records.append(vcf_record) # Inversions: genotypes_for_inds_INV = OrderedDict() @@ -237,9 +236,16 @@ def build_genotypes_vcf_list(deletions: dict, inversions: dict, output_vcf, hapl info = {"END": inversion["end"], "AF": inversion["freq"]} vcf_record = vcf.model._Record(chrm, inversion["start"], inversion["name"], "N", [vcf.model._SV("INV")], ".", ".", info, "GT", [0], genotypes) - vcf_writer.write_record(vcf_record) + records.append(vcf_record) - vcf_writer.close() + records.sort(key=lambda r: (r.CHROM, r.start)) + + with open(output_vcf, "w") as o_vcf: + vcf_reader = vcf.Reader(filename=vcf_file) + vcf_writer = vcf.Writer(o_vcf, vcf_reader) + for record in records: + vcf_writer.write_record(record) + vcf_writer.close() tabix_compress(output_vcf, output_vcf + ".gz", True) tabix_index(output_vcf + ".gz", force=True, preset="vcf")