diff --git a/svreader/annotation.py b/svreader/annotation.py index af72a34f57f1b2c0adb6917fd53f36ddfc657cb0..51ac2d190ecf0a3f37ec6eeea1192bc4241c5fe6 100644 --- a/svreader/annotation.py +++ b/svreader/annotation.py @@ -204,7 +204,6 @@ class AnnotateReader(VCFReader): def getHeader(self): return self.vcf_reader.header - def add_annotation_metadata(self): self.addInfo("SOURCEID", 1, "String", "The source sv identifier") @@ -520,7 +519,8 @@ def add_filter_infos_header(reader): reader.addFilter("ABFREQ", "AB frequency <0.3 for >50% heterosamples") -def variant_filtration(SVSet, reader): +def variant_filtration(variants, reader, filter_monomorph=False, + filter_callrate=False): """ Filtering the candidate CNVs according to the following criteria - non duplicate sites - variant sites @@ -534,15 +534,15 @@ def variant_filtration(SVSet, reader): add_callrate_infos_header(reader) add_filter_infos_header(reader) - for sv in SVSet: + for sv in variants: info = sv.record.info sv.record.info['CALLRATE'] = sv.call_rate(13) sv.record.info['VARIANTCALLRATE'] = sv.variant_call_rate(13) - if sv.call_rate(13) < 0.75: + if sv.call_rate(13) < 0.75 and filter_callrate: sv.filter.add("CALLRATE") - if not sv.polymorph(): + if not sv.polymorph() and filter_monomorph: sv.filter.add("MONOMORPH") - if 'NONDUPLICATEOVERLAP' in info and info['NONDUPLICATEOVERLAP'] > 0.7: + if 'NONDUPLICATEOVERLAP' in info and info['NONDUPLICATEOVERLAP'] > 0.8: sv.filter.add("OVERLAP") if "DUPLICATESCORE" in info is not None and info['DUPLICATESCORE'] > -2: sv.filter.add("DUPLICATE") @@ -559,7 +559,7 @@ def AB_filtering(variant_set): if Heterozygote(s): valid_AB_freq.append((s.get('AB')[0] > 0.3)) if (len(valid_AB_freq) > 0 and - sum(valid_AB_freq) < len(valid_AB_freq) / 2): + sum(valid_AB_freq) < len(valid_AB_freq) / 2): sv.filter.add("ABFREQ")