diff --git a/build_results.py b/build_results.py index c79c34c2cd0c77990e41c092390597fa18374846..a25e24382b029d36d79aa15c8b931e2ad8244058 100755 --- a/build_results.py +++ b/build_results.py @@ -38,6 +38,8 @@ COLOR_IS_KEPT = "#81F781" COLOR_FALSE_POSITIVE = "#FE642E" COLOR_WRONG_GT = "#B40404" +ALLOW_VARIANTS = ['del', 'inv'] + def get_args(): """ @@ -51,6 +53,7 @@ description: Build results of the simulated data detection") parser.add_argument('-t', '--true-vcf', type=str, required=True, help='VCF file containing the simulated deletions') parser.add_argument('-f', '--filtered-vcf', type=str, required=False, help='File listing VCF files containing the filtered results') + parser.add_argument('-t', '--type', type="str", choices=ALLOW_VARIANTS, help="Type of variant") parser.add_argument('--overlap_cutoff', type=float, default=0.5, help='cutoff for reciprocal overlap') parser.add_argument('--left_precision', type=int, default=-1, help='left breakpoint precision') parser.add_argument('--right_precision', type=int, default=-1, help='right breakpoint precision') @@ -86,19 +89,23 @@ def passed_variant(record): return record.filter is None or len(record.filter) == 0 or "PASS" in record.filter -def read_vcf_file(infile): +def read_vcf_file(infile, type_v): """ Read a vcf file :param infile: vcf file path + :param type_v: type of variant ("del" or "inv") :return: set or records, list of records ids """ + if type_v.lower() not in ALLOW_VARIANTS: + raise ValueError("Invalid variant type: %s" % type_v) SVSet=[] ids = [] for record in VCFReader(infile): - if not passed_variant(record): - continue - SVSet.append(record) - ids.append(record.id) + if record.sv_type.lower() == type_v: + if not passed_variant(record): + continue + SVSet.append(record) + ids.append(record.id) return SVSet, ids @@ -811,8 +818,9 @@ def build_xlsx_cols(): XLSX_COLS.append(alp + j) -def init(output, vcf_files, true_vcf, filtered_vcfs=None, overlap_cutoff=0.5, +def init(output, vcf_files, true_vcf, filtered_vcfs=None, type_v="del", overlap_cutoff=0.5, left_precision=sys.maxsize, right_precision=sys.maxsize, no_xls=False, haploid=False): + build_xlsx_cols() genotypes = {} @@ -837,12 +845,12 @@ def init(output, vcf_files, true_vcf, filtered_vcfs=None, overlap_cutoff=0.5, for infile in vcf_files: eprint(" Reading file %s" % infile) try: - sv_set += read_vcf_file(infile)[0] + sv_set += read_vcf_file(infile, type_v)[0] except: print("Ignoreing file %s" % infile) eprint(" Reading file %s" % true_vcf) - sv_set_to, true_ones_records = read_vcf_file(true_vcf) + sv_set_to, true_ones_records = read_vcf_file(true_vcf, type_v) sv_set += sv_set_to # Compute connected components: @@ -924,8 +932,16 @@ def main(): """ # parse the command line args args = get_args() - init(args.output, get_vcf_files(args.vcfs), args.true_vcf, get_vcf_files(args.filtered_vcf), args.overlap_cutoff, - args.left_precision, args.right_precision, args.no_xls, args.haploid) + init(output=args.output, + vcf_files=get_vcf_files(args.vcfs), + true_vcf=args.true_vcf, + filtered_vcfs=get_vcf_files(args.filtered_vcf), + type_v=args.type, + overlap_cutoff=args.overlap_cutoff, + left_precision=args.left_precision, + right_precision=args.right_precision, + no_xls=args.no_xls, + haploid=args.haploid) # initialize the script diff --git a/lib b/lib index 3cc040bf648e76d911ef7f65ccef78a6ad8695f4..280518a443d37a579762928940355aafb571b671 160000 --- a/lib +++ b/lib @@ -1 +1 @@ -Subproject commit 3cc040bf648e76d911ef7f65ccef78a6ad8695f4 +Subproject commit 280518a443d37a579762928940355aafb571b671