Skip to content
Snippets Groups Projects
Commit cc7adfec authored by Thomas Faraut's avatar Thomas Faraut
Browse files

modify pindel specific filter

parent 8d032924
No related branches found
No related tags found
No related merge requests found
......@@ -147,7 +147,7 @@ class SVReader(object):
svs_supported = set(["DEL", "INS", "DUP", "INV"])
def __init__(self, file_name, tool_name, reference_handle=None):
def __init__(self, file_name, tool_name="default", reference_handle=None):
self.file_name = file_name
self.reference_handle = reference_handle
self.__tool_name = tool_name
......
......@@ -137,6 +137,8 @@ class LumpyRecord(SVRecord):
return sorted(variant_samples)
def MaxIndSupportingRP(self):
# Among all the individuals returns the support of the individual with
# the maximum support
sv = self.sv
max_ind_supp = 0
for sample in sv:
......
......@@ -75,7 +75,7 @@ reads supporting the SV (in practice, these numbers are the same)
total number of unique supporting reads whose anchors are upstream, the total number of supporting reads whose anchors
are downstream, and finally the total number of unique supporting reads whose anchors are downstream.
WARNING see below
WARNING see below WARNING WARNING see below
----
......@@ -100,7 +100,7 @@ Following lines are repeated for each sample
'''
'''
''' PINDEL Sample specific number of supporting reads
New version of pindel changes the output starting from 32+
Check https://trac.nbic.nl/pipermail/pindel-users/2013-March/000229.html
......@@ -215,8 +215,12 @@ class PindelRecord(SVRecord):
start_pos = int(fields[9])
end_pos = int(fields[10])
bp_range = (int(fields[12]), int(fields[13]))
# The number of reads supporting the SV
read_supp = int(fields[15])
# The number of unique reads supporting the SV
# (so not counting duplicate reads)
uniq_read_supp = int(fields[16])
up_read_supp = int(fields[18])
up_uniq_read_supp = int(fields[19])
down_read_supp = int(fields[21])
......@@ -226,6 +230,9 @@ class PindelRecord(SVRecord):
num_sample_supp = int(fields[29])
# num_sample_uniq_supp = int(fields[30])
# Sample section
#http://seqanswers.com/forums/showthread.php?t=41121
sv = {}
samples = []
for i in range(31, len(fields), 7):
......@@ -239,6 +246,7 @@ class PindelRecord(SVRecord):
}
samples.append(sv[fields[i]])
self.su = read_supp
self.su = read_supp
self.pindel_sv_type = sv_type
self.up_read_supp = up_read_supp
......@@ -274,6 +282,10 @@ class PindelRecord(SVRecord):
"NUM_SUPP_SAMPLES": num_sample_supp
}
@property
def sv_len(self):
return self.__svlen
def addbatch2Id(self, batch=None):
if batch:
self.id += "_" + batch
......@@ -373,16 +385,18 @@ class PindelReader(SVReader):
# def SpecificFilterPass(self, record):
# # Filtering criteria more than 4 PE
# # see
# # http://bcb.io/2014/08/12/validated-whole-genome-structural-variation-detection-using-multiple-callers
# # Sudmant et al 2015 SuppInfo
# return (record.length() > 60)
def SpecificFilterPass(self, record):
# fILTER
if (abs(record.start-record.end+1) >= 2000 or
record.MaxIndSupportingRP() <= 3):
record.MaxIndSupportingRP() <= 4):
return False
else:
return True
###### Old Stuff ############################
def remove_duplicate(self, records):
"""
returns a vector of records where duplicates were removed
......@@ -416,6 +430,7 @@ class PindelReader(SVReader):
return True
return False
###### Old Stuff ############################
class PindelWriter(SVWriter):
def __init__(self, file_name, reference_contigs, template_reader):
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment