Add tsv outputs + change colors

73d5978e · Floreal Cabanettes · c40e6856 · 73d5978e
Commit 73d5978e authored 7 years ago by Floreal Cabanettes
--- a/build_xls_results.py
+++ b/build_xls_results.py
@@ -35,6 +35,7 @@ for alp in alphabet:
        xlsx_cols.append(alp + j)

 color_not_found = "#FE2E2E"
+color_not_found_2 = "#dddddd"
 color_col_filter = "#BEF781"
 color_is_kept = "#81F781"
 color_false_positive = "#FE642E"
@@ -54,7 +55,7 @@ description: Merge SV based on reciprocal overlap")
    parser.add_argument('--overlap_cutoff',  type=float, default=0.5, help='cutoff for reciprocal overlap')
    parser.add_argument('--left_precision',  type=int, default=-1, help='left breakpoint precision')
    parser.add_argument('--right_precision',  type=int, default=-1, help='right breakpoint precision')
-    parser.add_argument('-o', '--output', type=str, default="results.xlsx", help='output Excel file')
+    parser.add_argument('-o', '--output', type=str, default="results", help='output prefix')
    # parse the arguments
    args = parser.parse_args()

@@ -483,7 +484,7 @@ def create_xls_document(args, headers, filtered_records, nb_records, nb_inds, ce
    :param cells_gq: cells for third sheet (genotype quality)
    :param max_col_len: max content length for each column
    """
-    with xlsxwriter.Workbook(args.output) as workbook:
+    with xlsxwriter.Workbook(args.output + ".xslx") as workbook:

        #################################
        # First sheet (SV description): #
@@ -536,6 +537,42 @@ def create_xls_document(args, headers, filtered_records, nb_records, nb_inds, ce
            worksheet_gq.set_column(0, 0, max_col_len[0]+1)


+def create_tsv_file(filename: str, headers: list, cells: dict, nb_tools: int, nb_per_tool: int, records_range: ()):
+    # Init rows:
+    head = [""]
+    top_headers = {}
+    h = 1
+    for header in headers:
+        # Define top headers to each column:
+        for i in range(0, nb_per_tool):
+            top_headers[h] = header
+            head.append("")
+            h += 1
+    rows = [head]
+    for i in range(0, records_range[1]-records_range[0]+1):
+        rows.append(["" for x in range(0, (nb_tools * nb_per_tool) + 1)])
+
+    # Fill content:
+    for id_cell, cell in cells.items():
+        id_m = re.match(r"^([A-Z]+)(\d+)$", id_cell)
+        col = xlsx_cols.index(id_m.group(1))
+        row = int(id_m.group(2))
+        if records_range[0] <= row <= records_range[1]:
+            r = row - records_range[0]
+            if r == 0 and col > 0:
+                rows[r][col] = top_headers[col] + " / " + cell["text"]
+            else:
+                rows[r][col] = str(cell["text"])
+
+    # List as text:
+    for r in range(0, len(rows)):
+        rows[r] = "\t".join(rows[r])
+    tsv = "\n".join(rows)
+    with open(filename, "w") as tsv_file:
+        tsv_file.write(tsv)
+
+
+
 # noinspection PyUnresolvedReferences
 def main():
    # parse the command line args
@@ -695,7 +732,7 @@ def main():
                    for gt in range(0, nb_inds):
                        # noinspection PyUnresolvedReferences
                        cells_gt[xlsx_cols[g + gt] + str(i)] = cells_gq[xlsx_cols[g + gt] + str(i)] = \
-                            {"text": "", "format": {"bg_color": "#000000"}}
+                            {"text": "", "format": {"bg_color": color_not_found_2}}
            j += 3
            g += nb_inds

@@ -724,7 +761,7 @@ def main():
            for gt in range(0, nb_inds):
                cells_gt[xlsx_cols[1 + ((nb_tools + 1) * nb_inds) + gt] + str(i)] = \
                    cells_gq[xlsx_cols[1 + ((nb_tools + 1) * nb_inds) + gt] + str(i)] = {"text": "", "format":
-                                                                                        {"bg_color": "#000000"}}
+                                                                                        {"bg_color": color_not_found_2}}

        # False positives (orphans) in orange:
        if re.match(r"^orphan_\d+$", rec_id):
@@ -737,6 +774,20 @@ def main():
    create_xls_document(args, headers, filtered_records is not None, nb_records, nb_inds, cells, cells_gt, cells_gq,
                        max_col_len)

+    # Create CSV files:
+    create_tsv_file(args.output + "_sv_per_tools.tsv", headers, cells,
+                    nb_tools + (2 if filtered_records is not None else 1),
+                    3, (2, nb_records+2))
+    create_tsv_file(args.output + "_sv_diffs_per_tools.tsv", headers, cells,
+                    nb_tools + (2 if filtered_records is not None else 1),
+                    3, (2+nb_records+3, nb_records * 2 + 5))
+    create_tsv_file(args.output + "_sv_genotypes_per_tools.tsv", headers, cells_gt,
+                    nb_tools + (2 if filtered_records is not None else 1),
+                    nb_inds, (2, nb_records + 2))
+    create_tsv_file(args.output + "_sv_genotypes_quality_per_tools.tsv", headers, cells_gq,
+                    nb_tools + (2 if filtered_records is not None else 1),
+                    nb_inds, (2, nb_records + 2))
+
    print("")
    print("###########")
    print("# RESULTS #")
@@ -745,7 +796,13 @@ def main():
    print(str(nb_records) + " Results found")
    print(str(orphans) + " False Positive")
    print("")
-    print("Results saved in " + args.output)
+    print("Results saved in :\n\t- " + args.output + ".xslx")
+    print("")
+    print("TSV files:")
+    print("\t- " + args.output + "_sv_per_tools.tsv")
+    print("\t- " + args.output + "_sv_diffs_per_tools.tsv")
+    print("\t- " + args.output + "_sv_genotypes_per_tools.tsv")
+    print("\t- " + args.output + "_sv_genotypes_quality_per_tools.tsv")
    print("")

 # initialize the script