From cc7adfecc03539b058546e6342a48abcbfcb963d Mon Sep 17 00:00:00 2001 From: Thomas Faraut <Thomas.Faraut@inra.fr> Date: Wed, 27 Nov 2019 11:36:47 +0100 Subject: [PATCH] modify pindel specific filter --- svreader/__init__.py | 2 +- svreader/lumpy.py | 2 ++ svreader/pindel.py | 25 ++++++++++++++++++++----- 3 files changed, 23 insertions(+), 6 deletions(-) diff --git a/svreader/__init__.py b/svreader/__init__.py index 8de6c28..f9482a5 100644 --- a/svreader/__init__.py +++ b/svreader/__init__.py @@ -147,7 +147,7 @@ class SVReader(object): svs_supported = set(["DEL", "INS", "DUP", "INV"]) - def __init__(self, file_name, tool_name, reference_handle=None): + def __init__(self, file_name, tool_name="default", reference_handle=None): self.file_name = file_name self.reference_handle = reference_handle self.__tool_name = tool_name diff --git a/svreader/lumpy.py b/svreader/lumpy.py index 17dc75e..618f6a4 100644 --- a/svreader/lumpy.py +++ b/svreader/lumpy.py @@ -137,6 +137,8 @@ class LumpyRecord(SVRecord): return sorted(variant_samples) def MaxIndSupportingRP(self): + # Among all the individuals returns the support of the individual with + # the maximum support sv = self.sv max_ind_supp = 0 for sample in sv: diff --git a/svreader/pindel.py b/svreader/pindel.py index ef65939..fa5ccb8 100644 --- a/svreader/pindel.py +++ b/svreader/pindel.py @@ -75,7 +75,7 @@ reads supporting the SV (in practice, these numbers are the same) total number of unique supporting reads whose anchors are upstream, the total number of supporting reads whose anchors are downstream, and finally the total number of unique supporting reads whose anchors are downstream. -WARNING see below +WARNING see below WARNING WARNING see below ---- @@ -100,7 +100,7 @@ Following lines are repeated for each sample ''' -''' +''' PINDEL Sample specific number of supporting reads New version of pindel changes the output starting from 32+ Check https://trac.nbic.nl/pipermail/pindel-users/2013-March/000229.html @@ -215,8 +215,12 @@ class PindelRecord(SVRecord): start_pos = int(fields[9]) end_pos = int(fields[10]) bp_range = (int(fields[12]), int(fields[13])) + # The number of reads supporting the SV read_supp = int(fields[15]) + # The number of unique reads supporting the SV + # (so not counting duplicate reads) uniq_read_supp = int(fields[16]) + up_read_supp = int(fields[18]) up_uniq_read_supp = int(fields[19]) down_read_supp = int(fields[21]) @@ -226,6 +230,9 @@ class PindelRecord(SVRecord): num_sample_supp = int(fields[29]) # num_sample_uniq_supp = int(fields[30]) + + # Sample section + #http://seqanswers.com/forums/showthread.php?t=41121 sv = {} samples = [] for i in range(31, len(fields), 7): @@ -239,6 +246,7 @@ class PindelRecord(SVRecord): } samples.append(sv[fields[i]]) + self.su = read_supp self.su = read_supp self.pindel_sv_type = sv_type self.up_read_supp = up_read_supp @@ -274,6 +282,10 @@ class PindelRecord(SVRecord): "NUM_SUPP_SAMPLES": num_sample_supp } + @property + def sv_len(self): + return self.__svlen + def addbatch2Id(self, batch=None): if batch: self.id += "_" + batch @@ -373,16 +385,18 @@ class PindelReader(SVReader): # def SpecificFilterPass(self, record): # # Filtering criteria more than 4 PE # # see - # # http://bcb.io/2014/08/12/validated-whole-genome-structural-variation-detection-using-multiple-callers + # # Sudmant et al 2015 SuppInfo # return (record.length() > 60) def SpecificFilterPass(self, record): - # fILTER if (abs(record.start-record.end+1) >= 2000 or - record.MaxIndSupportingRP() <= 3): + record.MaxIndSupportingRP() <= 4): return False else: return True + +###### Old Stuff ############################ + def remove_duplicate(self, records): """ returns a vector of records where duplicates were removed @@ -416,6 +430,7 @@ class PindelReader(SVReader): return True return False +###### Old Stuff ############################ class PindelWriter(SVWriter): def __init__(self, file_name, reference_contigs, template_reader): -- GitLab