diff --git a/svreader/__init__.py b/svreader/__init__.py index 8de6c2832b0810d862a13194db96d4c2eeb4f724..f9482a5cf90ef79eed57abddf704a2783f698b48 100644 --- a/svreader/__init__.py +++ b/svreader/__init__.py @@ -147,7 +147,7 @@ class SVReader(object): svs_supported = set(["DEL", "INS", "DUP", "INV"]) - def __init__(self, file_name, tool_name, reference_handle=None): + def __init__(self, file_name, tool_name="default", reference_handle=None): self.file_name = file_name self.reference_handle = reference_handle self.__tool_name = tool_name diff --git a/svreader/lumpy.py b/svreader/lumpy.py index 17dc75e0b9f2fb633b952e76f6db1d499f30ed65..618f6a402b2e640658c50a364749a411ea001558 100644 --- a/svreader/lumpy.py +++ b/svreader/lumpy.py @@ -137,6 +137,8 @@ class LumpyRecord(SVRecord): return sorted(variant_samples) def MaxIndSupportingRP(self): + # Among all the individuals returns the support of the individual with + # the maximum support sv = self.sv max_ind_supp = 0 for sample in sv: diff --git a/svreader/pindel.py b/svreader/pindel.py index ef65939ed644979847b3fb479b8dd137b6a65f1b..fa5ccb82c9aafa1e2df4d0337c10e8413b0cc56d 100644 --- a/svreader/pindel.py +++ b/svreader/pindel.py @@ -75,7 +75,7 @@ reads supporting the SV (in practice, these numbers are the same) total number of unique supporting reads whose anchors are upstream, the total number of supporting reads whose anchors are downstream, and finally the total number of unique supporting reads whose anchors are downstream. -WARNING see below +WARNING see below WARNING WARNING see below ---- @@ -100,7 +100,7 @@ Following lines are repeated for each sample ''' -''' +''' PINDEL Sample specific number of supporting reads New version of pindel changes the output starting from 32+ Check https://trac.nbic.nl/pipermail/pindel-users/2013-March/000229.html @@ -215,8 +215,12 @@ class PindelRecord(SVRecord): start_pos = int(fields[9]) end_pos = int(fields[10]) bp_range = (int(fields[12]), int(fields[13])) + # The number of reads supporting the SV read_supp = int(fields[15]) + # The number of unique reads supporting the SV + # (so not counting duplicate reads) uniq_read_supp = int(fields[16]) + up_read_supp = int(fields[18]) up_uniq_read_supp = int(fields[19]) down_read_supp = int(fields[21]) @@ -226,6 +230,9 @@ class PindelRecord(SVRecord): num_sample_supp = int(fields[29]) # num_sample_uniq_supp = int(fields[30]) + + # Sample section + #http://seqanswers.com/forums/showthread.php?t=41121 sv = {} samples = [] for i in range(31, len(fields), 7): @@ -239,6 +246,7 @@ class PindelRecord(SVRecord): } samples.append(sv[fields[i]]) + self.su = read_supp self.su = read_supp self.pindel_sv_type = sv_type self.up_read_supp = up_read_supp @@ -274,6 +282,10 @@ class PindelRecord(SVRecord): "NUM_SUPP_SAMPLES": num_sample_supp } + @property + def sv_len(self): + return self.__svlen + def addbatch2Id(self, batch=None): if batch: self.id += "_" + batch @@ -373,16 +385,18 @@ class PindelReader(SVReader): # def SpecificFilterPass(self, record): # # Filtering criteria more than 4 PE # # see - # # http://bcb.io/2014/08/12/validated-whole-genome-structural-variation-detection-using-multiple-callers + # # Sudmant et al 2015 SuppInfo # return (record.length() > 60) def SpecificFilterPass(self, record): - # fILTER if (abs(record.start-record.end+1) >= 2000 or - record.MaxIndSupportingRP() <= 3): + record.MaxIndSupportingRP() <= 4): return False else: return True + +###### Old Stuff ############################ + def remove_duplicate(self, records): """ returns a vector of records where duplicates were removed @@ -416,6 +430,7 @@ class PindelReader(SVReader): return True return False +###### Old Stuff ############################ class PindelWriter(SVWriter): def __init__(self, file_name, reference_contigs, template_reader):