From cc7adfecc03539b058546e6342a48abcbfcb963d Mon Sep 17 00:00:00 2001
From: Thomas Faraut <Thomas.Faraut@inra.fr>
Date: Wed, 27 Nov 2019 11:36:47 +0100
Subject: [PATCH] modify pindel specific filter

---
 svreader/__init__.py |  2 +-
 svreader/lumpy.py    |  2 ++
 svreader/pindel.py   | 25 ++++++++++++++++++++-----
 3 files changed, 23 insertions(+), 6 deletions(-)

diff --git a/svreader/__init__.py b/svreader/__init__.py
index 8de6c28..f9482a5 100644
--- a/svreader/__init__.py
+++ b/svreader/__init__.py
@@ -147,7 +147,7 @@ class SVReader(object):
 
     svs_supported = set(["DEL", "INS", "DUP", "INV"])
 
-    def __init__(self, file_name, tool_name, reference_handle=None):
+    def __init__(self, file_name, tool_name="default", reference_handle=None):
         self.file_name = file_name
         self.reference_handle = reference_handle
         self.__tool_name = tool_name
diff --git a/svreader/lumpy.py b/svreader/lumpy.py
index 17dc75e..618f6a4 100644
--- a/svreader/lumpy.py
+++ b/svreader/lumpy.py
@@ -137,6 +137,8 @@ class LumpyRecord(SVRecord):
         return sorted(variant_samples)
 
     def MaxIndSupportingRP(self):
+        # Among all the individuals returns the support of the individual with
+        # the maximum support
         sv = self.sv
         max_ind_supp = 0
         for sample in sv:
diff --git a/svreader/pindel.py b/svreader/pindel.py
index ef65939..fa5ccb8 100644
--- a/svreader/pindel.py
+++ b/svreader/pindel.py
@@ -75,7 +75,7 @@ reads supporting the SV (in practice, these numbers are the same)
 total number of unique supporting reads whose anchors are upstream, the total number of supporting reads whose anchors
  are downstream, and finally the total number of unique supporting reads whose anchors are downstream.
 
-WARNING see below
+WARNING see below  WARNING WARNING see below
 
 ----
 
@@ -100,7 +100,7 @@ Following lines are repeated for each sample
 
 '''
 
-'''
+''' PINDEL Sample specific number of supporting reads
 New version of pindel changes the output starting from 32+
  Check https://trac.nbic.nl/pipermail/pindel-users/2013-March/000229.html
 
@@ -215,8 +215,12 @@ class PindelRecord(SVRecord):
         start_pos = int(fields[9])
         end_pos = int(fields[10])
         bp_range = (int(fields[12]), int(fields[13]))
+        # The number of reads supporting the SV
         read_supp = int(fields[15])
+        # The number of unique reads supporting the SV
+        # (so not counting duplicate reads)
         uniq_read_supp = int(fields[16])
+
         up_read_supp = int(fields[18])
         up_uniq_read_supp = int(fields[19])
         down_read_supp = int(fields[21])
@@ -226,6 +230,9 @@ class PindelRecord(SVRecord):
         num_sample_supp = int(fields[29])
         # num_sample_uniq_supp = int(fields[30])
 
+
+        # Sample section
+        #http://seqanswers.com/forums/showthread.php?t=41121
         sv = {}
         samples = []
         for i in range(31, len(fields), 7):
@@ -239,6 +246,7 @@ class PindelRecord(SVRecord):
                              }
             samples.append(sv[fields[i]])
 
+        self.su = read_supp
         self.su = read_supp
         self.pindel_sv_type = sv_type
         self.up_read_supp = up_read_supp
@@ -274,6 +282,10 @@ class PindelRecord(SVRecord):
             "NUM_SUPP_SAMPLES": num_sample_supp
         }
 
+    @property
+    def sv_len(self):
+        return self.__svlen
+
     def addbatch2Id(self, batch=None):
         if batch:
             self.id += "_" + batch
@@ -373,16 +385,18 @@ class PindelReader(SVReader):
     # def SpecificFilterPass(self, record):
     #     # Filtering criteria more than 4 PE
     #     # see
-    #     # http://bcb.io/2014/08/12/validated-whole-genome-structural-variation-detection-using-multiple-callers
+    #     # Sudmant et al 2015 SuppInfo
     #     return (record.length() > 60)
     def SpecificFilterPass(self, record):
-        # fILTER
         if (abs(record.start-record.end+1) >= 2000 or
-            record.MaxIndSupportingRP() <= 3):
+            record.MaxIndSupportingRP() <= 4):
             return False
         else:
             return True
 
+
+###### Old Stuff ############################
+
     def remove_duplicate(self, records):
         """
            returns a vector of records where duplicates were removed
@@ -416,6 +430,7 @@ class PindelReader(SVReader):
             return True
         return False
 
+###### Old Stuff ############################
 
 class PindelWriter(SVWriter):
     def __init__(self, file_name, reference_contigs, template_reader):
-- 
GitLab