Skip to content
Snippets Groups Projects
Commit 73d5978e authored by Floreal Cabanettes's avatar Floreal Cabanettes
Browse files

Add tsv outputs + change colors

parent c40e6856
No related branches found
No related tags found
No related merge requests found
......@@ -35,6 +35,7 @@ for alp in alphabet:
xlsx_cols.append(alp + j)
color_not_found = "#FE2E2E"
color_not_found_2 = "#dddddd"
color_col_filter = "#BEF781"
color_is_kept = "#81F781"
color_false_positive = "#FE642E"
......@@ -54,7 +55,7 @@ description: Merge SV based on reciprocal overlap")
parser.add_argument('--overlap_cutoff', type=float, default=0.5, help='cutoff for reciprocal overlap')
parser.add_argument('--left_precision', type=int, default=-1, help='left breakpoint precision')
parser.add_argument('--right_precision', type=int, default=-1, help='right breakpoint precision')
parser.add_argument('-o', '--output', type=str, default="results.xlsx", help='output Excel file')
parser.add_argument('-o', '--output', type=str, default="results", help='output prefix')
# parse the arguments
args = parser.parse_args()
......@@ -483,7 +484,7 @@ def create_xls_document(args, headers, filtered_records, nb_records, nb_inds, ce
:param cells_gq: cells for third sheet (genotype quality)
:param max_col_len: max content length for each column
"""
with xlsxwriter.Workbook(args.output) as workbook:
with xlsxwriter.Workbook(args.output + ".xslx") as workbook:
#################################
# First sheet (SV description): #
......@@ -536,6 +537,42 @@ def create_xls_document(args, headers, filtered_records, nb_records, nb_inds, ce
worksheet_gq.set_column(0, 0, max_col_len[0]+1)
def create_tsv_file(filename: str, headers: list, cells: dict, nb_tools: int, nb_per_tool: int, records_range: ()):
# Init rows:
head = [""]
top_headers = {}
h = 1
for header in headers:
# Define top headers to each column:
for i in range(0, nb_per_tool):
top_headers[h] = header
head.append("")
h += 1
rows = [head]
for i in range(0, records_range[1]-records_range[0]+1):
rows.append(["" for x in range(0, (nb_tools * nb_per_tool) + 1)])
# Fill content:
for id_cell, cell in cells.items():
id_m = re.match(r"^([A-Z]+)(\d+)$", id_cell)
col = xlsx_cols.index(id_m.group(1))
row = int(id_m.group(2))
if records_range[0] <= row <= records_range[1]:
r = row - records_range[0]
if r == 0 and col > 0:
rows[r][col] = top_headers[col] + " / " + cell["text"]
else:
rows[r][col] = str(cell["text"])
# List as text:
for r in range(0, len(rows)):
rows[r] = "\t".join(rows[r])
tsv = "\n".join(rows)
with open(filename, "w") as tsv_file:
tsv_file.write(tsv)
# noinspection PyUnresolvedReferences
def main():
# parse the command line args
......@@ -695,7 +732,7 @@ def main():
for gt in range(0, nb_inds):
# noinspection PyUnresolvedReferences
cells_gt[xlsx_cols[g + gt] + str(i)] = cells_gq[xlsx_cols[g + gt] + str(i)] = \
{"text": "", "format": {"bg_color": "#000000"}}
{"text": "", "format": {"bg_color": color_not_found_2}}
j += 3
g += nb_inds
......@@ -724,7 +761,7 @@ def main():
for gt in range(0, nb_inds):
cells_gt[xlsx_cols[1 + ((nb_tools + 1) * nb_inds) + gt] + str(i)] = \
cells_gq[xlsx_cols[1 + ((nb_tools + 1) * nb_inds) + gt] + str(i)] = {"text": "", "format":
{"bg_color": "#000000"}}
{"bg_color": color_not_found_2}}
# False positives (orphans) in orange:
if re.match(r"^orphan_\d+$", rec_id):
......@@ -737,6 +774,20 @@ def main():
create_xls_document(args, headers, filtered_records is not None, nb_records, nb_inds, cells, cells_gt, cells_gq,
max_col_len)
# Create CSV files:
create_tsv_file(args.output + "_sv_per_tools.tsv", headers, cells,
nb_tools + (2 if filtered_records is not None else 1),
3, (2, nb_records+2))
create_tsv_file(args.output + "_sv_diffs_per_tools.tsv", headers, cells,
nb_tools + (2 if filtered_records is not None else 1),
3, (2+nb_records+3, nb_records * 2 + 5))
create_tsv_file(args.output + "_sv_genotypes_per_tools.tsv", headers, cells_gt,
nb_tools + (2 if filtered_records is not None else 1),
nb_inds, (2, nb_records + 2))
create_tsv_file(args.output + "_sv_genotypes_quality_per_tools.tsv", headers, cells_gq,
nb_tools + (2 if filtered_records is not None else 1),
nb_inds, (2, nb_records + 2))
print("")
print("###########")
print("# RESULTS #")
......@@ -745,7 +796,13 @@ def main():
print(str(nb_records) + " Results found")
print(str(orphans) + " False Positive")
print("")
print("Results saved in " + args.output)
print("Results saved in :\n\t- " + args.output + ".xslx")
print("")
print("TSV files:")
print("\t- " + args.output + "_sv_per_tools.tsv")
print("\t- " + args.output + "_sv_diffs_per_tools.tsv")
print("\t- " + args.output + "_sv_genotypes_per_tools.tsv")
print("\t- " + args.output + "_sv_genotypes_quality_per_tools.tsv")
print("")
# initialize the script
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment