diff --git a/build_results.py b/build_results.py index a1407f7478fe9117f4eb2a71351af757066670f9..aae6c3cdf1bf99878a1006c406527d9f88e5eef1 100755 --- a/build_results.py +++ b/build_results.py @@ -196,22 +196,19 @@ def get_genotypes(genotypes_file, true_vcf_file): """ genotypes = {} gt_quality = {} - nb_inds = 0 - # Genotypes: - genotypes_raw = os.popen("zcat " + genotypes_file + " " + true_vcf_file + " | " + - os.path.join(prg_path, "vawk") + " '{ print $3,S$*$GT}'").read().split("\n") - for gt in genotypes_raw: - if gt != "": - gt_l = gt.split("\t") - genotypes[gt_l[0]] = gt_l[1:] - nb_inds = len(gt_l[1:]) - - gt_quality_raw = os.popen("zcat " + genotypes_file + " | " + - os.path.join(prg_path, "vawk") + " '{ print $3,S$*$GQ}'").read().split("\n") - for gq in gt_quality_raw: - if gq != "": - gq_l = gq.split("\t") - gt_quality[gq_l[0]] = gq_l[1:] + + # Real data: + reader_t = vcf.VCFReader(filename=true_vcf_file) + for rec_t in reader_t: + genotypes[rec_t.ID] = [x.data.GT for x in rec_t.samples] + + nb_inds = len(list(genotypes.values())[0]) + + # Samples: + reader = vcf.VCFReader(filename=genotypes_file) + for rec in reader: + genotypes[rec.ID] = [x.data.GT for x in rec.samples] + gt_quality[rec.ID] = [x.data.GQ for x in rec.samples] return genotypes, gt_quality, nb_inds @@ -738,7 +735,7 @@ def main(): if args.genotypes: # Genotype (sheets 2&3): cells_gt, cells_gq = fill_genotypes_data(i, 2 + ((nb_tools + 1) * nb_inds), cells_gt, cells_gq, - record["tools"][tool], my_genotypes, args.haploid) + record["tools"][tool], my_genotypes, args.haploid) else: sv_format = {}