Newer
Older
"""
This module is designed to process the data obtained during metabolite mapping.
The main function is c_p_o_m_r
"""
import sys
from math import log
from time import time
import pandas as pd
import py4cytoscape as p4c
sys.path.append('C:\\Users\\mumec\\Desktop\\Dossier_gitlab_local\\traitement_des_données')
sys.path.append('C:\\Users\\mumec\\Desktop\\Dossier_gitlab_local\\chebi-ids.git')
from Recovery_of_associated_Chebi_IDs import chebi_horizontal
from utils import excel_file_writer, pre_cut, recup_all_inf_excel
from complete_processing_of_mapping_results import c_p_o_m_r
from Mapping_using_the_API import opti_multimapping
from network_visualization import paths_link_cpdb, network_visu
FOLDER = "C:\\Users\\mumec\\Desktop\\fichier_mis_en_forme_programme_total\\main\\patho_oeil_06-03-2024\\"
def shapping_data(file, folder):
"""
Takes data from an excel file and formats it for further workflow steps
Arg:
file = file with data obtain after analysis
folder = folder in which the Excel file
containing the modification results will be saved
Returns:
Type of return = list and 1 file .xlsx
"""
beg_datas = recup_all_inf_excel(file)
all_id_type = ['chebi', 'hmdb', 'kegg']
l_without_id = []
n_max_id = [0 for ait in range(len(all_id_type))]
for line in beg_datas[1:]:
l_without_id.append(line[0])
else:
new_line = [line[0]]
new_metas = [[] for ait in range(len(all_id_type))]
count_id = [0 for ait in range(len(all_id_type))]
if "," in line[1]:
splited = line[1].split(",")
for i_bdd, bdd in enumerate(all_id_type):
for entree in splited:
if bdd in entree:
new_metas[i_bdd].append(entree)
count_id[i_bdd] += 1
else:
one_id_clean = line[1].strip()
id_type = one_id_clean[:one_id_clean.index(":")]
index_type = all_id_type.index(id_type)
new_metas[index_type].append(one_id_clean)
count_id[index_type] = 1
for i_val_id, val_id in enumerate(count_id):
if n_max_id[i_val_id] < val_id:
n_max_id[i_val_id] = val_id
new_line.append(new_metas)
new_line.append(count_id)
new_line.append(log(float(line[3])/float(line[2])))
first_case.append(new_line)
out_table = [['Name']]
for_pdf_ch = [['Name']]
for_pdf_ch[0] += ["chebi" for ji in range(n_max_id[0])]
print(for_pdf_ch)
out_table[0] += [all_id_type[i_vid] for ji in range(vid)]
print(out_table[0])
out_table[0].append('log(cas/temoin)')
chebi_initial = []
for intrem in first_case:
int_line = [intrem[0]]
if intrem[2][0] != 0:
chebi_initial.append(intrem[1][0][0])
else:
chebi_initial.append("NA")
for i_values, values in enumerate(n_max_id):
for colu in range(values):
if intrem[2][i_values] > colu:
int_line.append(intrem[1][i_values][colu])
else:
int_line.append("NA")
int_line.append(intrem[3])
out_table.append(int_line)
for_pdf_ch.append(int_line[:n_max_id[0]+1])
print("le nombre d'identifiant maximal pour 1 métabolite: ", n_max_id)
chebi_prefix_cut = pre_cut(chebi_initial)
outf_horiz = folder + "chebi add using the ontology.xlsx"
horizontal = chebi_horizontal(chebi_prefix_cut, outf_horiz, flow=True)
inci = n_max_id[0] + 1
for ifline, fline in enumerate(out_table[1:]):
out_table[ifline+1] = fline[:inci] + horizontal[ifline] + fline[inci:]
for_pdf_ch[ifline+1] = fline[:inci] + horizontal[ifline]
l_lchebi = ["chebi" for ait in range(4)] + out_table[0][inci:]
out_table[0] = out_table[0][:inci] + l_lchebi
for_pdf_ch[0] = for_pdf_ch[0][:inci] + ["chebi" for ait in range(4)]
df_dfm = pd.DataFrame(data=datas_for_mapping)
n_o_f = folder + "Datas_mis_en_forme_pour_le_mapping.xlsx"
excel_file_writer(df_dfm, n_o_f)
return (datas_for_mapping, for_pdf_ch)
"""
Execute the all workflow of mapping on RAMP and CPDB.
Parameters:
infile = Excel data file
out_folder = Path to the folder in which you want your data to be written
"""
datas_f_map, chebi_hori = shapping_data(infile, out_folder)
data_input = recup_all_inf_excel(infile)
for i_dat_i, dat_i in enumerate(data_input):
for j in range(len(data_input[0])):
data_input[i_dat_i][j] = str(dat_i[j])
result_cpdb, result_ramp, recap = opti_multimapping(datas_f_map, out_folder,
# ajouter la mention ID exact !
l_visu = c_p_o_m_r(result_ramp, out_folder, "RAMP", f_view_sav=out_folder,
modul="flow", f_modul=recap)
l_visu_c = c_p_o_m_r(result_cpdb, out_folder, "CPDB", f_view_sav=out_folder,
modul="flow", f_modul=recap)
result_ramp_pdf = []
result_cpdb_pdf = []
recap_pdf = []
liste_tc = [result_ramp, result_cpdb, recap]
liste_reverse = [result_ramp_pdf, result_cpdb_pdf, recap_pdf]
for i_liste, liste in enumerate(liste_tc):
for i_col in range(len(liste[0])):
new_line = []
for i_line in liste:
new_line.append(str(i_line[i_col]))
liste_reverse[i_liste].append(new_line)
file_path = "Modele_de_pdf_feneratio_en_anglais_rev_19-02-2024.docx"
out_pdf_mapping(file_path, data_input, chebi_hori, recap_pdf,
result_ramp_pdf[:8], result_cpdb_pdf[:4],
out_folder, l_visu)
l_bdd = ["Wikipathways", "KEGG", "EHMN",
"HumanCyc", "INOH", "Reactome"] # "SMPDB" plantage
print("le temps nécessaires pour effectuer le mapping optimal a était de ",
t2-t1, "secondes.")
t_i_name = [recap[0] + recap[0], recap[1] + recap[2]] #not opti
print(t_i_name)
out_links = out_folder + "Network CPDB with only "+ bddnow + " pathways.xlsx"
edge_data, nodes_data = paths_link_cpdb(result_cpdb, out_links, recap,
bdd=bddnow, flow=True,
tab_id_name=t_i_name)
if bddnow == "Reactome":
print(network_visu(edge_data[0:3], nodes_data,
bdd=bddnow, sav_fol=out_folder))
else:
print(network_visu(edge_data[0:3], nodes_data, bdd=bddnow))
print("le temps nécessaires pour effectuer les visualisation a était de ",
t3-t1, "secondes.")
print("le temps total pour faire tourner le programme est a était ",
t3-t1, "secondes.")
if __name__ == "__main__":
INFILE = FOLDER + "chebi_intensite_patho_oeil_donnes_estelles_rev_06-03-2024.xlsx"