Skip to content
Snippets Groups Projects
Commit 6e6d44c1 authored by Floreal Cabanettes's avatar Floreal Cabanettes
Browse files

Ad type of variant to build_results script

parent 0f6750eb
No related branches found
No related tags found
No related merge requests found
......@@ -38,6 +38,8 @@ COLOR_IS_KEPT = "#81F781"
COLOR_FALSE_POSITIVE = "#FE642E"
COLOR_WRONG_GT = "#B40404"
ALLOW_VARIANTS = ['del', 'inv']
def get_args():
"""
......@@ -51,6 +53,7 @@ description: Build results of the simulated data detection")
parser.add_argument('-t', '--true-vcf', type=str, required=True, help='VCF file containing the simulated deletions')
parser.add_argument('-f', '--filtered-vcf', type=str, required=False,
help='File listing VCF files containing the filtered results')
parser.add_argument('-t', '--type', type="str", choices=ALLOW_VARIANTS, help="Type of variant")
parser.add_argument('--overlap_cutoff', type=float, default=0.5, help='cutoff for reciprocal overlap')
parser.add_argument('--left_precision', type=int, default=-1, help='left breakpoint precision')
parser.add_argument('--right_precision', type=int, default=-1, help='right breakpoint precision')
......@@ -86,19 +89,23 @@ def passed_variant(record):
return record.filter is None or len(record.filter) == 0 or "PASS" in record.filter
def read_vcf_file(infile):
def read_vcf_file(infile, type_v):
"""
Read a vcf file
:param infile: vcf file path
:param type_v: type of variant ("del" or "inv")
:return: set or records, list of records ids
"""
if type_v.lower() not in ALLOW_VARIANTS:
raise ValueError("Invalid variant type: %s" % type_v)
SVSet=[]
ids = []
for record in VCFReader(infile):
if not passed_variant(record):
continue
SVSet.append(record)
ids.append(record.id)
if record.sv_type.lower() == type_v:
if not passed_variant(record):
continue
SVSet.append(record)
ids.append(record.id)
return SVSet, ids
......@@ -811,8 +818,9 @@ def build_xlsx_cols():
XLSX_COLS.append(alp + j)
def init(output, vcf_files, true_vcf, filtered_vcfs=None, overlap_cutoff=0.5,
def init(output, vcf_files, true_vcf, filtered_vcfs=None, type_v="del", overlap_cutoff=0.5,
left_precision=sys.maxsize, right_precision=sys.maxsize, no_xls=False, haploid=False):
build_xlsx_cols()
genotypes = {}
......@@ -837,12 +845,12 @@ def init(output, vcf_files, true_vcf, filtered_vcfs=None, overlap_cutoff=0.5,
for infile in vcf_files:
eprint(" Reading file %s" % infile)
try:
sv_set += read_vcf_file(infile)[0]
sv_set += read_vcf_file(infile, type_v)[0]
except:
print("Ignoreing file %s" % infile)
eprint(" Reading file %s" % true_vcf)
sv_set_to, true_ones_records = read_vcf_file(true_vcf)
sv_set_to, true_ones_records = read_vcf_file(true_vcf, type_v)
sv_set += sv_set_to
# Compute connected components:
......@@ -924,8 +932,16 @@ def main():
"""
# parse the command line args
args = get_args()
init(args.output, get_vcf_files(args.vcfs), args.true_vcf, get_vcf_files(args.filtered_vcf), args.overlap_cutoff,
args.left_precision, args.right_precision, args.no_xls, args.haploid)
init(output=args.output,
vcf_files=get_vcf_files(args.vcfs),
true_vcf=args.true_vcf,
filtered_vcfs=get_vcf_files(args.filtered_vcf),
type_v=args.type,
overlap_cutoff=args.overlap_cutoff,
left_precision=args.left_precision,
right_precision=args.right_precision,
no_xls=args.no_xls,
haploid=args.haploid)
# initialize the script
......
lib @ 280518a4
Subproject commit 3cc040bf648e76d911ef7f65ccef78a6ad8695f4
Subproject commit 280518a443d37a579762928940355aafb571b671
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment