diff --git a/build_xls_results.py b/build_xls_results.py index 990dbd90233df798b4e44ae3b0b03c049ace7a98..4e83b48afa3b3bc5514c28c4cc851ba66d882590 100755 --- a/build_xls_results.py +++ b/build_xls_results.py @@ -35,6 +35,7 @@ for alp in alphabet: xlsx_cols.append(alp + j) color_not_found = "#FE2E2E" +color_not_found_2 = "#dddddd" color_col_filter = "#BEF781" color_is_kept = "#81F781" color_false_positive = "#FE642E" @@ -54,7 +55,7 @@ description: Merge SV based on reciprocal overlap") parser.add_argument('--overlap_cutoff', type=float, default=0.5, help='cutoff for reciprocal overlap') parser.add_argument('--left_precision', type=int, default=-1, help='left breakpoint precision') parser.add_argument('--right_precision', type=int, default=-1, help='right breakpoint precision') - parser.add_argument('-o', '--output', type=str, default="results.xlsx", help='output Excel file') + parser.add_argument('-o', '--output', type=str, default="results", help='output prefix') # parse the arguments args = parser.parse_args() @@ -483,7 +484,7 @@ def create_xls_document(args, headers, filtered_records, nb_records, nb_inds, ce :param cells_gq: cells for third sheet (genotype quality) :param max_col_len: max content length for each column """ - with xlsxwriter.Workbook(args.output) as workbook: + with xlsxwriter.Workbook(args.output + ".xslx") as workbook: ################################# # First sheet (SV description): # @@ -536,6 +537,42 @@ def create_xls_document(args, headers, filtered_records, nb_records, nb_inds, ce worksheet_gq.set_column(0, 0, max_col_len[0]+1) +def create_tsv_file(filename: str, headers: list, cells: dict, nb_tools: int, nb_per_tool: int, records_range: ()): + # Init rows: + head = [""] + top_headers = {} + h = 1 + for header in headers: + # Define top headers to each column: + for i in range(0, nb_per_tool): + top_headers[h] = header + head.append("") + h += 1 + rows = [head] + for i in range(0, records_range[1]-records_range[0]+1): + rows.append(["" for x in range(0, (nb_tools * nb_per_tool) + 1)]) + + # Fill content: + for id_cell, cell in cells.items(): + id_m = re.match(r"^([A-Z]+)(\d+)$", id_cell) + col = xlsx_cols.index(id_m.group(1)) + row = int(id_m.group(2)) + if records_range[0] <= row <= records_range[1]: + r = row - records_range[0] + if r == 0 and col > 0: + rows[r][col] = top_headers[col] + " / " + cell["text"] + else: + rows[r][col] = str(cell["text"]) + + # List as text: + for r in range(0, len(rows)): + rows[r] = "\t".join(rows[r]) + tsv = "\n".join(rows) + with open(filename, "w") as tsv_file: + tsv_file.write(tsv) + + + # noinspection PyUnresolvedReferences def main(): # parse the command line args @@ -695,7 +732,7 @@ def main(): for gt in range(0, nb_inds): # noinspection PyUnresolvedReferences cells_gt[xlsx_cols[g + gt] + str(i)] = cells_gq[xlsx_cols[g + gt] + str(i)] = \ - {"text": "", "format": {"bg_color": "#000000"}} + {"text": "", "format": {"bg_color": color_not_found_2}} j += 3 g += nb_inds @@ -724,7 +761,7 @@ def main(): for gt in range(0, nb_inds): cells_gt[xlsx_cols[1 + ((nb_tools + 1) * nb_inds) + gt] + str(i)] = \ cells_gq[xlsx_cols[1 + ((nb_tools + 1) * nb_inds) + gt] + str(i)] = {"text": "", "format": - {"bg_color": "#000000"}} + {"bg_color": color_not_found_2}} # False positives (orphans) in orange: if re.match(r"^orphan_\d+$", rec_id): @@ -737,6 +774,20 @@ def main(): create_xls_document(args, headers, filtered_records is not None, nb_records, nb_inds, cells, cells_gt, cells_gq, max_col_len) + # Create CSV files: + create_tsv_file(args.output + "_sv_per_tools.tsv", headers, cells, + nb_tools + (2 if filtered_records is not None else 1), + 3, (2, nb_records+2)) + create_tsv_file(args.output + "_sv_diffs_per_tools.tsv", headers, cells, + nb_tools + (2 if filtered_records is not None else 1), + 3, (2+nb_records+3, nb_records * 2 + 5)) + create_tsv_file(args.output + "_sv_genotypes_per_tools.tsv", headers, cells_gt, + nb_tools + (2 if filtered_records is not None else 1), + nb_inds, (2, nb_records + 2)) + create_tsv_file(args.output + "_sv_genotypes_quality_per_tools.tsv", headers, cells_gq, + nb_tools + (2 if filtered_records is not None else 1), + nb_inds, (2, nb_records + 2)) + print("") print("###########") print("# RESULTS #") @@ -745,7 +796,13 @@ def main(): print(str(nb_records) + " Results found") print(str(orphans) + " False Positive") print("") - print("Results saved in " + args.output) + print("Results saved in :\n\t- " + args.output + ".xslx") + print("") + print("TSV files:") + print("\t- " + args.output + "_sv_per_tools.tsv") + print("\t- " + args.output + "_sv_diffs_per_tools.tsv") + print("\t- " + args.output + "_sv_genotypes_per_tools.tsv") + print("\t- " + args.output + "_sv_genotypes_quality_per_tools.tsv") print("") # initialize the script