Add filter information on recall and precision first graphs

31b9c3bb · Floreal Cabanettes · 2e1554a4 · 31b9c3bb · 31b9c3bb
Commit 31b9c3bb authored 6 years ago by Floreal Cabanettes
--- a/Summarized_results.ipynb
+++ b/Summarized_results.ipynb
@@ -58,34 +58,54 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "def compute_tp_fp_fn(svs, tool):\n",
+    "def compute_tp_fp_fn(svs, tool, only_filter=False, include_duplicates=True):\n",
    "    tp = 0\n",
    "    fp = 0\n",
    "    fn = 0\n",
    "    start_values = svs[\"{0}__Start\".format(tool)]\n",
    "    for i in range(0, len(start_values)):\n",
-    "        if math.isnan(start_values[i]) and not math.isnan(svs[\"Real_data__Start\"][i]):\n",
+    "        if not only_filter or (svs[\"{0}__Filters\".format(tool)][i] == \"PASS\" or (include_duplicates \n",
+    "                and svs[\"{0}__Filters\".format(tool)][i] == \"DUPLICATE\")):\n",
+    "            if math.isnan(start_values[i]) and not math.isnan(svs[\"Real_data__Start\"][i]):\n",
+    "                fn += 1\n",
+    "            elif not math.isnan(start_values[i]) and not math.isnan(svs[\"Real_data__Start\"][i]):\n",
+    "                tp += 1\n",
+    "            elif not math.isnan(start_values[i]) and math.isnan(svs[\"Real_data__Start\"][i]):\n",
+    "                fp += 1\n",
+    "        elif not math.isnan(svs[\"Real_data__Start\"][i]):\n",
    "            fn += 1\n",
-    "        elif not math.isnan(start_values[i]) and not math.isnan(svs[\"Real_data__Start\"][i]):\n",
-    "            tp += 1\n",
-    "        elif not math.isnan(start_values[i]) and math.isnan(svs[\"Real_data__Start\"][i]):\n",
-    "            fp += 1\n",
    "    return tp, fp, fn\n",
    "\n",
    "recall = OrderedDict()\n",
+    "recall_f = OrderedDict()\n",
    "precision = OrderedDict()\n",
+    "precision_f = OrderedDict()\n",
+    "all_colors = [\"#405f93\", \"#468a56\", \"#be4447\", \"#6553a0\", \"#DF3A01\", \"#868A08\"]\n",
+    "all_colors_f = [\"#072352\", \"#0a3614\", \"#450a0b\", \"#20124b\", \"#461707\", \"#2d2e05\"]\n",
+    "colors = {}\n",
+    "i = 0\n",
    "for tool in tools:\n",
    "    if tool + \"__Start\" in results_df:\n",
    "        tp, fp, fn = compute_tp_fp_fn(results_df, tool)\n",
    "        recall[tool] = [tp / (tp+fn) * 100]\n",
    "        precision[tool] = [tp / (tp+fp) * 100]\n",
+    "        colors[tool] = all_colors[i]\n",
+    "        # Filtered ones:\n",
+    "        tp, fp, fn = compute_tp_fp_fn(results_df, tool, True)\n",
+    "        recall_f[tool] = [tp / (tp+fn) * 100]\n",
+    "        precision_f[tool] = [tp / (tp+fp) * 100]\n",
+    "        i += 1\n",
    "        \n",
    "plt.figure(1, figsize=(20,10))\n",
    "\n",
+    "# clrs = ['grey' if (x < max(values)) else 'red' for x in values ]\n",
+    "\n",
    "# Plot recall\n",
    "plt.subplot(121)\n",
    "recall_df = pd.DataFrame.from_dict(recall, orient=\"columns\")\n",
-    "plot = sns.barplot(data=recall_df)\n",
+    "plot = sns.barplot(data=recall_df, palette=all_colors)\n",
+    "recall_f_df = pd.DataFrame.from_dict(recall_f, orient=\"columns\")\n",
+    "plot = sns.barplot(data=recall_f_df, palette=all_colors_f)\n",
    "plot.set_title(\"Recall\", fontsize=30)\n",
    "plot.set_ylabel(\"Recall (%)\", fontsize=20)\n",
    "plot.set_xlabel(\"Tool\", fontsize=20)\n",
@@ -94,8 +114,10 @@
    "\n",
    "# Plot precision\n",
    "plt.subplot(122)\n",
+    "precision_f_df = pd.DataFrame.from_dict(precision_f, orient=\"columns\")\n",
+    "plot2 = sns.barplot(data=precision_f_df, palette=all_colors_f)\n",
    "precision_df = pd.DataFrame.from_dict(precision, orient=\"columns\")\n",
-    "plot2 = sns.barplot(data=precision_df)\n",
+    "plot2 = sns.barplot(data=precision_df, palette=all_colors)\n",
    "plot2.set_title(\"Precision\", fontsize=30)\n",
    "plot2.set_ylabel(\"Precision (%)\", fontsize=20)\n",
    "plot2.set_xlabel(\"Tool\", fontsize=20)\n",

 %% Cell type:markdown id: tags:

 # CNV detection benchmark

 %% Cell type:code id: tags:

 ``` python
 import pandas as pd
 import numpy as np
 from collections import OrderedDict

 import math

 import re
 import os
 import json
 import pylab as P
 import matplotlib as mpl
 import matplotlib.pyplot as plt
 import seaborn as sns
 sns.set(style="whitegrid", color_codes=True)
 %matplotlib inline

 from pysam import VariantFile
 from collections import defaultdict
 from collections import Counter

 from IPython.display import display, HTML

 # Read tsv file
 results_df = pd.read_table("results_sv_per_tools.tsv", header=0, index_col=0)

 # Retrieve list of tools
 tools = set()
 for col in results_df.columns:
    if col.endswith("__Start") and col != "Real_data__Start" and col != "Filtered_results__Start":
        tools.add(col.split("__")[0])
 ```

 %% Cell type:markdown id: tags:

 ### Recall & Precision

 %% Cell type:code id: tags:

 ``` python
-def compute_tp_fp_fn(svs, tool):
+def compute_tp_fp_fn(svs, tool, only_filter=False, include_duplicates=True):
    tp = 0
    fp = 0
    fn = 0
    start_values = svs["{0}__Start".format(tool)]
    for i in range(0, len(start_values)):
-        if math.isnan(start_values[i]) and not math.isnan(svs["Real_data__Start"][i]):
+        if not only_filter or (svs["{0}__Filters".format(tool)][i] == "PASS" or (include_duplicates
+                and svs["{0}__Filters".format(tool)][i] == "DUPLICATE")):
+            if math.isnan(start_values[i]) and not math.isnan(svs["Real_data__Start"][i]):
+                fn += 1
+            elif not math.isnan(start_values[i]) and not math.isnan(svs["Real_data__Start"][i]):
+                tp += 1
+            elif not math.isnan(start_values[i]) and math.isnan(svs["Real_data__Start"][i]):
+                fp += 1
+        elif not math.isnan(svs["Real_data__Start"][i]):
            fn += 1
-        elif not math.isnan(start_values[i]) and not math.isnan(svs["Real_data__Start"][i]):
-            tp += 1
-        elif not math.isnan(start_values[i]) and math.isnan(svs["Real_data__Start"][i]):
-            fp += 1
    return tp, fp, fn

 recall = OrderedDict()
+recall_f = OrderedDict()
 precision = OrderedDict()
+precision_f = OrderedDict()
+all_colors = ["#405f93", "#468a56", "#be4447", "#6553a0", "#DF3A01", "#868A08"]
+all_colors_f = ["#072352", "#0a3614", "#450a0b", "#20124b", "#461707", "#2d2e05"]
+colors = {}
+i = 0
 for tool in tools:
    if tool + "__Start" in results_df:
        tp, fp, fn = compute_tp_fp_fn(results_df, tool)
        recall[tool] = [tp / (tp+fn) * 100]
        precision[tool] = [tp / (tp+fp) * 100]
+        colors[tool] = all_colors[i]
+        # Filtered ones:
+        tp, fp, fn = compute_tp_fp_fn(results_df, tool, True)
+        recall_f[tool] = [tp / (tp+fn) * 100]
+        precision_f[tool] = [tp / (tp+fp) * 100]
+        i += 1

 plt.figure(1, figsize=(20,10))

+# clrs = ['grey' if (x < max(values)) else 'red' for x in values ]
+
 # Plot recall
 plt.subplot(121)
 recall_df = pd.DataFrame.from_dict(recall, orient="columns")
-plot = sns.barplot(data=recall_df)
+plot = sns.barplot(data=recall_df, palette=all_colors)
+recall_f_df = pd.DataFrame.from_dict(recall_f, orient="columns")
+plot = sns.barplot(data=recall_f_df, palette=all_colors_f)
 plot.set_title("Recall", fontsize=30)
 plot.set_ylabel("Recall (%)", fontsize=20)
 plot.set_xlabel("Tool", fontsize=20)
 plot.tick_params(labelsize=14)
 plot.set_ylim([0,100])

 # Plot precision
 plt.subplot(122)
+precision_f_df = pd.DataFrame.from_dict(precision_f, orient="columns")
+plot2 = sns.barplot(data=precision_f_df, palette=all_colors_f)
 precision_df = pd.DataFrame.from_dict(precision, orient="columns")
-plot2 = sns.barplot(data=precision_df)
+plot2 = sns.barplot(data=precision_df, palette=all_colors)
 plot2.set_title("Precision", fontsize=30)
 plot2.set_ylabel("Precision (%)", fontsize=20)
 plot2.set_xlabel("Tool", fontsize=20)
 plot2.tick_params(labelsize=14)

 plt.show()
 ```

 %% Cell type:markdown id: tags:

 ### Influence of variant size on recall

 %% Cell type:code id: tags:

 ``` python
 groups = []
 with open("rules.sim", "r") as rules:
    for line in rules:
        line = line.rstrip()
        if line != "":
            parts = re.split(r"\s+", line)
            groups.append((int(parts[1]), int(parts[2])))

 groups.sort(key=lambda x: x[0])
 ```

 %% Cell type:markdown id: tags:

 #### By tool

 %% Cell type:code id: tags:

 ``` python
 nrows = math.ceil(len(tools)/2)
 ncols = min(2, len(tools))

 palettes = ["Blues_d", "Greens_d", "Reds_d", "Purples_d", "YlOrBr_d", "PuBu_d"]

 plt.figure(1, figsize=(20,nrows * 8))

 nplot=0

 for tool in tools:
    npalette = nplot
    while npalette >= len(palettes):
        npalette -= len(palettes)
    nplot += 1
    results_by_group = OrderedDict()
    for group in groups:
        tmp_res = results_df[(results_df.Real_data__Length >= group[0]) & (results_df.Real_data__Length < group[1])]
        tp, fp, fn = compute_tp_fp_fn(tmp_res, tool)
        results_by_group["-".join(map(str,group))] = [tp / (tp+fn) * 100 if tp+fn >0 else 0]

    recall_df = pd.DataFrame.from_dict(results_by_group, orient="columns")

    plt.subplot(nrows, ncols, nplot)
    plot = sns.barplot(data=recall_df, palette=palettes[npalette])
    plot.set_title(tool, fontsize=25)
    plot.set_ylabel("Recall (%)", fontsize=20)
    plot.set_xlabel("Variant size (bp)", fontsize=20)
    plot.tick_params(labelsize=14)
    plot.set_ylim([0,100])

 plt.show()
 ```

 %% Cell type:markdown id: tags:

 #### Global

 %% Cell type:code id: tags:

 ``` python
 results_by_group = OrderedDict()

 for group in groups:
    tmp_res = results_df[(results_df.Real_data__Length >= group[0]) & (results_df.Real_data__Length < group[1])]
    tp, fp, fn = compute_tp_fp_fn(tmp_res, "Filtered_results")
    results_by_group["-".join(map(str,group))] = [tp / (tp+fn) * 100 if tp+fn >0 else 0]

 plt.figure(1, figsize=(15,8))

 recall_df = pd.DataFrame.from_dict(results_by_group, orient="columns")
 plot = sns.barplot(data=recall_df, color='black')
 plot.set_title("Recall", fontsize=30)
 plot.set_ylabel("Recall (%)", fontsize=20)
 plot.set_xlabel("Variant size (bp)", fontsize=20)
 plot.tick_params(labelsize=14)
 plot.set_ylim([0,100])

 plt.show()
 ```

 %% Cell type:markdown id: tags:

 ### Shared variant by tool

 %% Cell type:code id: tags:

 ``` python
 def compute_found_sv(svs: pd.DataFrame, tool: str):
    variants = []
    start_values = svs["{0}__Start".format(tool)]
    for idx in svs.index:
        if not math.isnan(start_values[idx]) and not math.isnan(svs["Real_data__Start"][idx]):
            variants.append(idx)
    return variants

 variants_by_tool = []

 for tool in tools:
    variants_by_tool.append({"name": tool, "data": compute_found_sv(results_df, tool)})
 ```

 %% Cell type:code id: tags:

 ``` python
 html=HTML("<script type='text/javascript' src='http://jvenn.toulouse.inra.fr/app/js/canvas2svg.js'></script> \
 <script type='text/javascript' src='http://jvenn.toulouse.inra.fr/app/js/jvenn.min.js'></script> \
 <div id='draw'></div> \
 <script type='text/javascript'> \
  $(document).ready(function(){ \
     $('#draw').jvenn({ \
       series: " + json.dumps(variants_by_tool) + " \
     }); \
  }); \
 </script>")

 display(html)
 ```

 %% Cell type:markdown id: tags:

 ### Breakpoints precision

 %% Cell type:code id: tags:

 ``` python
 plt.figure(1, figsize=(20,8))

 # Start precision
 df_diffs=pd.read_table("results_sv_diffs_per_tools.tsv", header=0, index_col=0)
 all_diffs_soft_start = pd.DataFrame()
 for tool in tools:
    all_diffs_soft_start[tool] = df_diffs[tool + "__Start"].abs()

 plt.subplot(121)
 plot = sns.stripplot(data=all_diffs_soft_start, jitter=True)
 plot.set_ylim([0,150])
 plot.tick_params(labelsize=15)
 plot.set_title("Start position", fontsize=28, y=1.04)
 plot.set_ylabel("Diff from real data (abs)", fontsize=20)

 # End precision
 df_diffs=pd.read_table("results_sv_diffs_per_tools.tsv", header=0, index_col=0)
 all_diffs_soft_end = pd.DataFrame()
 for tool in tools:
    all_diffs_soft_end[tool] = df_diffs[tool + "__End"].abs()

 plt.subplot(122)
 plot = sns.stripplot(data=all_diffs_soft_end, jitter=True)
 plot.set_ylim([0,150])
 plot.tick_params(labelsize=15)
 plot.set_title("End position", fontsize=28, y=1.04)
 plot.set_ylabel("Diff from real data (abs)", fontsize=20)

 plt.show()
 ```

 %% Cell type:markdown id: tags:

 ## 2. CNV Genotyping

 We compare quality of genotyping

 %% Cell type:code id: tags:

 ``` python
 # Simulated deletions
 total = len(results_df[results_df.Real_data__Start.notnull()])
 ```

 %% Cell type:code id: tags:

 ``` python
 # Predicted deletions genotyped

 df_svtyper=pd.read_csv("results_genotype.tsv",sep='\t',index_col=False,names=['del','software','delsize','recall','left','right'])
 df_svtyper['precision'] = [ "precise" if (x+y)<20 else "unprecise" for (x,y) in zip(df_svtyper.left,df_svtyper.right)]
 df_svtyper['deltype'] = ['small' if x<200 else 'medium' for x in df_svtyper.delsize]
 counts_svtyper = np.unique(df_svtyper.software,return_counts=True)[1]
 ```

 %% Cell type:markdown id: tags:

 #### Genotype recall for each software prediction

 %% Cell type:code id: tags:

 ``` python
 gt_tools = list(tools) + ["pass"]
 plt.figure(1, figsize=(8,5))
 sns.stripplot(x="software", y="recall", jitter=True, palette="Set2", dodge=True, linewidth=1, edgecolor='gray', order=gt_tools,data=df_svtyper)
 axes = sns.boxplot(x="software", y="recall", palette="Set2", order=gt_tools,data=df_svtyper)
 axes.title.set_position([.5, 1.2])
 axes.set_title(str(total)+" simulated variants",size=25)
 axes.axes.xaxis.label.set_size(20)
 axes.axes.yaxis.label.set_size(20)
 axes.tick_params(labelsize=15)
 ymax = axes.get_ylim()[1]
 for i in range(0, len(counts_svtyper)):
    t1=axes.text(-0.1 + (i * 1), ymax+ymax/100, counts_svtyper[i], fontsize=15)
 ```

 %% Cell type:markdown id: tags:

 #### Inluence of precision : precise means less than 20bp

 %% Cell type:code id: tags:

 ``` python
 plt.figure(1, figsize=(8,5))
 sns.stripplot(x="software", y="recall", jitter=True, hue="precision", palette="Set2", dodge=True, linewidth=1, edgecolor='gray', order=gt_tools,data=df_svtyper)
 axes = sns.boxplot(x="software", y="recall",hue="precision",palette="Set2", order=gt_tools,data=df_svtyper)
 axes.title.set_position([.5, 1.2])
 axes.set_ylim(0.15, 1.05)
 axes.set_title(str(total)+" simulated variants",size=25)
 axes.axes.xaxis.label.set_size(20)
 axes.axes.yaxis.label.set_size(20)
 axes.tick_params(labelsize=15)
 ymax = axes.get_ylim()[1]
 for i in range(0, len(counts_svtyper)):
    t1=axes.text(-0.1 + (i * 1), ymax+ymax/100, counts_svtyper[i], fontsize=15)
 plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
 ```

 %% Cell type:markdown id: tags:

 #### Inluence of deletion size : small means less than 200bp

 %% Cell type:code id: tags:

 ``` python
 plt.figure(1, figsize=(8,5))
 sns.stripplot(x="software", y="recall", jitter=True, hue="deltype", palette="Set2", dodge=True, linewidth=1, edgecolor='gray', order=gt_tools,data=df_svtyper)
 axes = sns.boxplot(x="software", y="recall",hue="deltype",palette="Set2", order=gt_tools,data=df_svtyper)
 axes.title.set_position([.5, 1.2])
 axes.set_ylim(0.15, 1.05)
 axes.set_title(str(total)+" simulated variants",size=25)
 axes.axes.xaxis.label.set_size(20)
 axes.axes.yaxis.label.set_size(20)
 axes.tick_params(labelsize=15)
 ymax = axes.get_ylim()[1]
 for i in range(0, len(counts_svtyper)):
    t1=axes.text(-0.1 + (i * 1), ymax+ymax/100, counts_svtyper[i], fontsize=15)
 plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
 ```

 %% Cell type:markdown id: tags:

 ##### Size VS Precision

 %% Cell type:code id: tags:

 ``` python
 plt.figure(1, figsize=(8,5))
 sns.stripplot(x="precision", y="delsize", jitter=True, palette="Set2", dodge=True, linewidth=1, edgecolor='gray', order=['precise', 'unprecise'],data=df_svtyper)
 axes = sns.boxplot(x="precision", y="delsize",palette="Set2", order=['precise', 'unprecise'],data=df_svtyper)
 axes.axes.xaxis.label.set_size(20)
 axes.axes.yaxis.label.set_size(20)
 axes.tick_params(labelsize=15)
 ```

--- a/build_results.py
+++ b/build_results.py
@@ -175,7 +175,7 @@ def get_genotypes(genotypes_files, true_vcf_file):
    return genotypes, gt_quality, nb_inds


-def build_records(genotypes, SVSet, true_ones_records, filtered_records, gt_quality):
+def build_records(genotypes, SVSet, true_ones_records, filtered_records, gt_quality, all_variants_filter):
    """
    Build records for each SV
    :param genotypes: list of genotypes of each individual for each SV
@@ -216,9 +216,13 @@ def build_records(genotypes, SVSet, true_ones_records, filtered_records, gt_qual
                    "end": node.end,
                    "length": int(node.end) - int(node.start),
                    "filtered": (node.id in filtered_records) if filtered_records is not None else False,
+                    "filters": ",".join([x for x in all_variants_filter[node.id].filter]) if
+                               node.id in all_variants_filter else "",
                    "genotypes": genotypes[node.id] if node.id in genotypes else None,
                    "qualities": gt_quality[node.id] if node.id in gt_quality else None
                }
+                if results[tool_name]["filters"] == "":
+                    results[tool_name]["filters"] = "PASS"
                tools.add(tool_name)
                chromosome = node.chrom

@@ -295,6 +299,12 @@ def build_header(tools, filtered_records, nb_records, max_col_len, nb_inds):
                                                                                             "format": l_format}
        max_col_len[i + 2] = get_max_len("Length", i + 2, max_col_len)

+        if tool not in ["Real data", "Filtered results"]:
+            cells[XLSX_COLS[i + 3] + "2"] = cells[XLSX_COLS[i + 3] + str(2 + nb_records + 3)] = {"text": "Filters",
+                                                                                                 "format": l_format}
+            max_col_len[i + 3] = get_max_len("Filters", i + 3, max_col_len)
+            i += 1
+
        # Genotypes:
        for k in range(0, nb_inds):
            cells_gt[XLSX_COLS[j + k] + "2"] = cells_gq[XLSX_COLS[j + k] + "2"] = \
@@ -352,7 +362,7 @@ def fill_real_data(row, cells, cells_gt, cells_gq, max_col_len, record, rec_id,


 def fill_tool_data(row, col, cells, max_col_len, record, nb_records, my_start, my_end, my_length,
-                   sv_format=None):
+                   sv_format=None, no_filter=False):
    """
    Fill cells for a tool and a record
    :param row: row position
@@ -387,6 +397,10 @@ def fill_tool_data(row, col, cells, max_col_len, record, nb_records, my_start, m
    cells[XLSX_COLS[col + 2] + str(row)] = {"text": record["length"], "format": sv_format}
    max_col_len[col + 2] = get_max_len(record["length"], col + 2, max_col_len)

+    if "filters" in record and not no_filter:
+        cells[XLSX_COLS[col + 3] + str(row)] = {"text": record["filters"], "format": sv_format}
+        max_col_len[col + 3] = get_max_len(record["filters"], col + 3, max_col_len)
+
    ###########
    #  DIFFS: #
    ###########
@@ -408,6 +422,11 @@ def fill_tool_data(row, col, cells, max_col_len, record, nb_records, my_start, m
    cells[XLSX_COLS[col + 2] + str(row + nb_records + 3)] = {"text": length,
                                                             "format": sv_format}

+    # FILTERS:
+    if "filters" in record and not no_filter:
+        cells[XLSX_COLS[col + 3] + str(row + nb_records + 3)] = {"text": record["filters"],
+                                                                 "format": sv_format}
+
    return cells, max_col_len


@@ -454,18 +473,26 @@ def write_headers(workbook, worksheet, headers, cell_len, rows, filtered_records
    :param rows: (list) rows into write the header
    :param filtered_records: (bool) is there filtered records
    """
+    orig_cell_len = cell_len
    for row in rows:
        merge_format = workbook.add_format({'bold': True})
        worksheet.write(XLSX_COLS[1] + str(row + 1) + ":" + XLSX_COLS[cell_len] + str(row + 1), headers[0], merge_format)
    i = 0
+    left_cell_lenth = 2
    for header in headers[1:]:
+        if header not in ["Real data", "Filtered results"]:
+            cell_len = orig_cell_len + 1
+        else:
+            cell_len = orig_cell_len
        if i < len(headers) - 2 or not filtered_records:
            merge_format = workbook.add_format({'align': 'center'})
        else:
            merge_format = workbook.add_format({'align': 'center', 'bg_color': COLOR_COL_FILTER})
        for row in rows:
-            worksheet.merge_range(XLSX_COLS[2 + i * cell_len] + str(row) + ":" + XLSX_COLS[1 + cell_len + i * cell_len] + str(row),
+            worksheet.merge_range(XLSX_COLS[left_cell_lenth] + str(row) + ":" +
+                                  XLSX_COLS[left_cell_lenth + cell_len - 1] + str(row),
                                  header, merge_format)
+        left_cell_lenth += cell_len
        i += 1


@@ -560,15 +587,24 @@ def create_tsv_file(filename: str, headers: list, cells: dict, nb_tools: int, nb
    head = ["", headers[0]]
    top_headers = {}
    h = 2
+    nb_only_tools = nb_tools # Number of tools, except real data and filtered results
+    nb_others = 0
    for header in headers[1:]:
        # Define top headers to each column:
        for i in range(0, nb_per_tool):
            top_headers[h] = header
            head.append("")
            h += 1
+        if header not in ["Real data", "Filtered results"]:
+            top_headers[h] = header
+            head.append("")
+            h += 1
+        else:
+            nb_only_tools -= 1
+            nb_others += 1
    rows = [head]
    for i in range(0, records_range[1]-records_range[0]):
-        rows.append(["" for x in range(0, (nb_tools * nb_per_tool) + 2)])
+        rows.append(["" for x in range(0, (nb_only_tools * (nb_per_tool + 1)) + (nb_others * nb_per_tool) + 2)])

    # Fill content:
    for id_cell, cell in cells.items():
@@ -679,17 +715,17 @@ def apply_style_of_filter_cells(cells, cells_gt, cells_gq, i, is_kept, nb_record
            cells[XLSX_COLS[0] + str(i + nb_records + 3)]["format"]["bg_color"] = COLOR_IS_KEPT
    elif filtered_records is not None:
        # SV does not pass the filter: color bg in red in the filter column
-        cells[XLSX_COLS[2 + ((nb_tools + 1) * 3)] + str(i)] = {"text": "", "format": {"bg_color": COLOR_NOT_FOUND}}
-        cells[XLSX_COLS[2 + ((nb_tools + 1) * 3) + 1] + str(i)] = {"text": "",
+        cells[XLSX_COLS[2 + ((nb_tools * 4) + 3)] + str(i)] = {"text": "", "format": {"bg_color": COLOR_NOT_FOUND}}
+        cells[XLSX_COLS[2 + ((nb_tools * 4) + 3) + 1] + str(i)] = {"text": "",
                                                                   "format": {"bg_color": COLOR_NOT_FOUND}}
-        cells[XLSX_COLS[2 + ((nb_tools + 1) * 3) + 2] + str(i)] = {"text": "",
+        cells[XLSX_COLS[2 + ((nb_tools * 4) + 3) + 2] + str(i)] = {"text": "",
                                                                   "format": {"bg_color": COLOR_NOT_FOUND}}
-        cells[XLSX_COLS[2 + ((nb_tools + 1) * 3)] + str(i + nb_records + 3)] = {"text": "",
+        cells[XLSX_COLS[2 + ((nb_tools * 4) + 3)] + str(i + nb_records + 3)] = {"text": "",
                                                                                "format": {
                                                                                    "bg_color": COLOR_NOT_FOUND}}
-        cells[XLSX_COLS[2 + ((nb_tools + 1) * 3) + 1] + str(i + nb_records + 3)] = {
+        cells[XLSX_COLS[2 + ((nb_tools * 4) + 3) + 1] + str(i + nb_records + 3)] = {
            "text": "", "format": {"bg_color": COLOR_NOT_FOUND}}
-        cells[XLSX_COLS[2 + ((nb_tools + 1) * 3) + 2] + str(i + nb_records + 3)] = {
+        cells[XLSX_COLS[2 + ((nb_tools * 4) + 3) + 2] + str(i + nb_records + 3)] = {
            "text": "", "format": {"bg_color": COLOR_NOT_FOUND}}

        # Genotype:
@@ -743,9 +779,9 @@ def build_body_cells(rec_keys, records, nb_records, nb_inds, tools, cells, cells
                    sv_format = {"bg_color": COLOR_IS_KEPT}

                    # SV data (sheet 1):
-                    cells, max_col_len = fill_tool_data(i, 2 + ((nb_tools + 1) * 3), cells, max_col_len,
+                    cells, max_col_len = fill_tool_data(i, 2 + ((nb_tools * 4) + 3), cells, max_col_len,
                                                        record["tools"][tool], nb_records, my_start, my_end, my_length,
-                                                        {"bg_color": COLOR_COL_FILTER})
+                                                        {"bg_color": COLOR_COL_FILTER}, True)

                    if do_genotype:
                        # Genotype (sheets 2&3):
@@ -775,7 +811,7 @@ def build_body_cells(rec_keys, records, nb_records, nb_inds, tools, cells, cells
                cells, cells_gt, cells_gq = fill_cells_no_tools(cells, cells_gt, cells_gq, i, j, g, nb_records, nb_inds,
                                                                do_genotype)

-            j += 3
+            j += 4
            g += nb_inds

        ###############################################################################
@@ -843,7 +879,8 @@ def init(output, vcf_files, true_vcf, rules, filtered_vcfs=None, type_v="del", o
    construct_overlap_graph(sv_set, overlap_cutoff, left_precision, right_precision)

    # Build records:
-    records, tools, orphans = build_records(genotypes, sv_set, true_variants.keys(), filtered_records, gt_quality)
+    records, tools, orphans = build_records(genotypes, sv_set, true_variants.keys(), filtered_records, gt_quality,
+                                            filtered_all)

    nb_records = len(records)