diff --git a/svreader/annotation.py b/svreader/annotation.py index 51ac2d190ecf0a3f37ec6eeea1192bc4241c5fe6..c1aa1cf11170239ecbe6bb856e62c46048f0c4d2 100644 --- a/svreader/annotation.py +++ b/svreader/annotation.py @@ -178,7 +178,7 @@ class AnnotateRecord(VCFRecord): filters = [f for f in record.filter] # We make the assumption when a "." is present no other filter # are present - if len(filters) == 0 or "." in filters: + if not filters or "." in filters: record.filter.clear() record.filter.add("PASS") @@ -416,7 +416,9 @@ def add_redundancy_infos_header(reader): def redundancy_annotator(SVSet, reader, - duplicatescore_threshold=-2, genotyper="svtyper"): + overlap_cutoff, + duplicatescore_threshold=-2, + genotyper="svtyper"): """ Annotating duplicate candidates based on the genotype likelihoods - genotype likelihoods can be provided by svtyper or genomestrip """ @@ -430,7 +432,7 @@ def redundancy_annotator(SVSet, reader, pybed_variants = vcf_to_pybed(SVSet) self_overlap = pybed_variants.intersect(pybed_variants, - f=0.5, r=True, wo=True) + f=overlap_cutoff, r=True, wo=True) seen = defaultdict(tuple) duplicates = defaultdict(list) @@ -519,7 +521,7 @@ def add_filter_infos_header(reader): reader.addFilter("ABFREQ", "AB frequency <0.3 for >50% heterosamples") -def variant_filtration(variants, reader, filter_monomorph=False, +def variant_filtration(variant_set, reader, filter_monomorph=False, filter_callrate=False): """ Filtering the candidate CNVs according to the following criteria - non duplicate sites @@ -534,7 +536,7 @@ def variant_filtration(variants, reader, filter_monomorph=False, add_callrate_infos_header(reader) add_filter_infos_header(reader) - for sv in variants: + for sv in variant_set: info = sv.record.info sv.record.info['CALLRATE'] = sv.call_rate(13) sv.record.info['VARIANTCALLRATE'] = sv.variant_call_rate(13) @@ -563,14 +565,14 @@ def AB_filtering(variant_set): sv.filter.add("ABFREQ") -def get_connected_duplicates(SVSet): +def get_connected_duplicates(variant_set): """ Construct connected components of duplicates and rename the variants """ undirected = Graph() variant_dict = defaultdict() representatives = defaultdict() - for s in SVSet: + for s in variant_set: variant_dict[s.id] = s if "DUPLICATE" in s.filter: for dupli_repr in s.record.info["DUPLICATEOF"]: @@ -604,8 +606,8 @@ def get_tool_name(sv_ident): return sv_ident.split("_")[0] -def set_supporting_tools(SVSet): - for sv in SVSet: +def set_supporting_tools(variant_set): + for sv in variant_set: tools = {get_tool_name(sv.id)} if "DUPLICATES" in sv.record.info: duplicates = sv.record.info['DUPLICATES']