Skip to content
Snippets Groups Projects
Commit cc7adfec authored by Thomas Faraut's avatar Thomas Faraut
Browse files

modify pindel specific filter

parent 8d032924
No related branches found
No related tags found
No related merge requests found
...@@ -147,7 +147,7 @@ class SVReader(object): ...@@ -147,7 +147,7 @@ class SVReader(object):
svs_supported = set(["DEL", "INS", "DUP", "INV"]) svs_supported = set(["DEL", "INS", "DUP", "INV"])
def __init__(self, file_name, tool_name, reference_handle=None): def __init__(self, file_name, tool_name="default", reference_handle=None):
self.file_name = file_name self.file_name = file_name
self.reference_handle = reference_handle self.reference_handle = reference_handle
self.__tool_name = tool_name self.__tool_name = tool_name
......
...@@ -137,6 +137,8 @@ class LumpyRecord(SVRecord): ...@@ -137,6 +137,8 @@ class LumpyRecord(SVRecord):
return sorted(variant_samples) return sorted(variant_samples)
def MaxIndSupportingRP(self): def MaxIndSupportingRP(self):
# Among all the individuals returns the support of the individual with
# the maximum support
sv = self.sv sv = self.sv
max_ind_supp = 0 max_ind_supp = 0
for sample in sv: for sample in sv:
......
...@@ -75,7 +75,7 @@ reads supporting the SV (in practice, these numbers are the same) ...@@ -75,7 +75,7 @@ reads supporting the SV (in practice, these numbers are the same)
total number of unique supporting reads whose anchors are upstream, the total number of supporting reads whose anchors total number of unique supporting reads whose anchors are upstream, the total number of supporting reads whose anchors
are downstream, and finally the total number of unique supporting reads whose anchors are downstream. are downstream, and finally the total number of unique supporting reads whose anchors are downstream.
WARNING see below WARNING see below WARNING WARNING see below
---- ----
...@@ -100,7 +100,7 @@ Following lines are repeated for each sample ...@@ -100,7 +100,7 @@ Following lines are repeated for each sample
''' '''
''' ''' PINDEL Sample specific number of supporting reads
New version of pindel changes the output starting from 32+ New version of pindel changes the output starting from 32+
Check https://trac.nbic.nl/pipermail/pindel-users/2013-March/000229.html Check https://trac.nbic.nl/pipermail/pindel-users/2013-March/000229.html
...@@ -215,8 +215,12 @@ class PindelRecord(SVRecord): ...@@ -215,8 +215,12 @@ class PindelRecord(SVRecord):
start_pos = int(fields[9]) start_pos = int(fields[9])
end_pos = int(fields[10]) end_pos = int(fields[10])
bp_range = (int(fields[12]), int(fields[13])) bp_range = (int(fields[12]), int(fields[13]))
# The number of reads supporting the SV
read_supp = int(fields[15]) read_supp = int(fields[15])
# The number of unique reads supporting the SV
# (so not counting duplicate reads)
uniq_read_supp = int(fields[16]) uniq_read_supp = int(fields[16])
up_read_supp = int(fields[18]) up_read_supp = int(fields[18])
up_uniq_read_supp = int(fields[19]) up_uniq_read_supp = int(fields[19])
down_read_supp = int(fields[21]) down_read_supp = int(fields[21])
...@@ -226,6 +230,9 @@ class PindelRecord(SVRecord): ...@@ -226,6 +230,9 @@ class PindelRecord(SVRecord):
num_sample_supp = int(fields[29]) num_sample_supp = int(fields[29])
# num_sample_uniq_supp = int(fields[30]) # num_sample_uniq_supp = int(fields[30])
# Sample section
#http://seqanswers.com/forums/showthread.php?t=41121
sv = {} sv = {}
samples = [] samples = []
for i in range(31, len(fields), 7): for i in range(31, len(fields), 7):
...@@ -239,6 +246,7 @@ class PindelRecord(SVRecord): ...@@ -239,6 +246,7 @@ class PindelRecord(SVRecord):
} }
samples.append(sv[fields[i]]) samples.append(sv[fields[i]])
self.su = read_supp
self.su = read_supp self.su = read_supp
self.pindel_sv_type = sv_type self.pindel_sv_type = sv_type
self.up_read_supp = up_read_supp self.up_read_supp = up_read_supp
...@@ -274,6 +282,10 @@ class PindelRecord(SVRecord): ...@@ -274,6 +282,10 @@ class PindelRecord(SVRecord):
"NUM_SUPP_SAMPLES": num_sample_supp "NUM_SUPP_SAMPLES": num_sample_supp
} }
@property
def sv_len(self):
return self.__svlen
def addbatch2Id(self, batch=None): def addbatch2Id(self, batch=None):
if batch: if batch:
self.id += "_" + batch self.id += "_" + batch
...@@ -373,16 +385,18 @@ class PindelReader(SVReader): ...@@ -373,16 +385,18 @@ class PindelReader(SVReader):
# def SpecificFilterPass(self, record): # def SpecificFilterPass(self, record):
# # Filtering criteria more than 4 PE # # Filtering criteria more than 4 PE
# # see # # see
# # http://bcb.io/2014/08/12/validated-whole-genome-structural-variation-detection-using-multiple-callers # # Sudmant et al 2015 SuppInfo
# return (record.length() > 60) # return (record.length() > 60)
def SpecificFilterPass(self, record): def SpecificFilterPass(self, record):
# fILTER
if (abs(record.start-record.end+1) >= 2000 or if (abs(record.start-record.end+1) >= 2000 or
record.MaxIndSupportingRP() <= 3): record.MaxIndSupportingRP() <= 4):
return False return False
else: else:
return True return True
###### Old Stuff ############################
def remove_duplicate(self, records): def remove_duplicate(self, records):
""" """
returns a vector of records where duplicates were removed returns a vector of records where duplicates were removed
...@@ -416,6 +430,7 @@ class PindelReader(SVReader): ...@@ -416,6 +430,7 @@ class PindelReader(SVReader):
return True return True
return False return False
###### Old Stuff ############################
class PindelWriter(SVWriter): class PindelWriter(SVWriter):
def __init__(self, file_name, reference_contigs, template_reader): def __init__(self, file_name, reference_contigs, template_reader):
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment