From ef991720e365b9c1826688fcf7887611dc0fed3d Mon Sep 17 00:00:00 2001 From: local_comparaison <mathieu.umec@inrae.fr> Date: Fri, 2 Feb 2024 17:01:04 +0100 Subject: [PATCH] change for optimize the workflow --- .gitignore.txt | 3 +- Mapping_using_the_API.py | 58 ++++++-- Tests_unitaires/test_Mapping_using_the_API.py | 58 ++------ ..._complete_processing_of_mapping_results.py | 42 ------ ...tion_des_donnes_de_mapping.cpython-310.pyc | Bin 7532 -> 7532 bytes main.py | 108 +++++++++++--- network_visualization.py | 137 ++++++++++-------- utils.py | 9 +- 8 files changed, 237 insertions(+), 178 deletions(-) diff --git a/.gitignore.txt b/.gitignore.txt index 7cffe85..0c10313 100644 --- a/.gitignore.txt +++ b/.gitignore.txt @@ -1,3 +1,4 @@ __pycache__/ logs/ -.cpython-310 \ No newline at end of file +.cpython-310 +.pyc \ No newline at end of file diff --git a/Mapping_using_the_API.py b/Mapping_using_the_API.py index 5f7041f..ab5d763 100644 --- a/Mapping_using_the_API.py +++ b/Mapping_using_the_API.py @@ -4,18 +4,16 @@ the functions for ConsensusPathDB mapping are translation of BRGEnrichment. """ import json -from urllib import request +from urllib import request, error import xmltodict import pandas as pd from utils import excel_file_writer, pre_cut, recup_all_inf_excel FOLDER = "C:\\Users\\mumec\\Desktop\\Mini_codes\\" - +""" def send_request_to_mapping_api(url, data_json, head, met='POST'): - """ - This function gives the result of mapping of a metabolites list from RAMP. - Here's an example of 4 metabolites giving 505 lines. - ["KEGG:C01157","hmdb:HMDB0000064","hmdb:HMDB0000148","chebi:16015"] + + Give the result from the API Arg: url = the url to use @@ -25,12 +23,41 @@ def send_request_to_mapping_api(url, data_json, head, met='POST'): Returns: Type of return: 1 excel file whith 5 columns - """ + req = request.Request(url, data=data_json, headers=head, method=met) with request.urlopen(req) as response: result = response.read() out_data = result.decode('utf-8') return out_data +""" + +def send_request_to_mapping_api(url, data_json, head, met='POST'): + """ + This function gives the result of mapping of a metabolites list from RAMP. + Here's an example of 4 metabolites giving 505 lines. + ["KEGG:C01157","hmdb:HMDB0000064","hmdb:HMDB0000148","chebi:16015"] + + Arg: + url = the url to use + data_json = the data to post + head = headers to use + met = 'POST' + + Returns: + Type of return: 1 excel file with 5 columns + """ + try: + req = request.Request(url, data=data_json, headers=head, method=met) + with request.urlopen(req) as response: + result = response.read() + out_data = result.decode('utf-8') + return out_data + except error.URLError as e: + print(f"Error: Unable to connect to the server. {e}") + except error.HTTPError as e: + print(f"Error: The server couldn't fulfill the request. {e}") + except Exception as e: + print(f"An unexpected error occurred: {e}") def mapping_ramp_api(metabolites_list, outfile, inf="flow", flow=False): @@ -108,9 +135,14 @@ def mapping_ramp_api(metabolites_list, outfile, inf="flow", flow=False): def mapping_ramp_api_enrichment(metabolites_list, outfile, inf="all"): """ + This function gives the result of enrichment of a metabolites list from RAMP. Arg: - Returns: + metabolites_list = a list of métabolits id to map + outfile = the name of the xlsx file to write + inf = "all" + + Returns : int and list """ if len(metabolites_list) == 0: badend = " Your metabolite list is empty. Here's an example" @@ -703,9 +735,11 @@ def opti_multimapping(file, outfolder, mapping="flow"): if __name__ == "__main__": - F_ENTER = FOLDER+"Donnees_oeil_mis_en_forme_opti_mapping.xlsx" - opti_multimapping(F_ENTER, FOLDER) - F_O = FOLDER + "test_enrichment_ramp.xlsx" + #F_ENTER = FOLDER+"Donnees_oeil_mis_en_forme_opti_mapping.xlsx" + #opti_multimapping(F_ENTER, FOLDER) + #F_O = FOLDER + "test_enrichment_ramp.xlsx" #a, b = mapping_ramp_api(["KEGG:C01157","hmdb:HMDB0000064","hmdb:HMDB0000148","chebi:16015"], F_O, inf="all") + #print(a, b) #b = pd.DataFrame(data=b).transpose() - #excel_file_writer(b, F_O, sheetname="Resultats") \ No newline at end of file + #excel_file_writer(b, F_O, sheetname="Resultats") + m_ora_cpdb(['C01157','C00002','C00002'], 'kegg', pthreshold=0.05, infos=None, ofile="C:\\Users\\mumec\\Desktop\\test_out_cpdb.xlsx") diff --git a/Tests_unitaires/test_Mapping_using_the_API.py b/Tests_unitaires/test_Mapping_using_the_API.py index 391027b..591a41e 100644 --- a/Tests_unitaires/test_Mapping_using_the_API.py +++ b/Tests_unitaires/test_Mapping_using_the_API.py @@ -4,39 +4,12 @@ sys.path.append('C:\\Users\\mumec\\Desktop\\Dossier_gitlab_local\\traitement_des from Mapping_using_the_API import * import os -class Testutils(unittest.TestCase): - """ - recup_all_inf_excel(file) NON : Nécessite un fichier xlsx - - send_request_to_mapping_api(url, data_json, head, met='POST') ok - - excel_file_writer(dataframe, name_out_file, sheetname="Resultats") ok - - pre_cut(listed) ok - """ - - - def test_send_request_to_mapping_api(self): - result = send_request_to_mapping_api('https://rampdb.nih.gov/api/pathways-from-analytes',json.dumps({"analytes": ["hmdb:HMDB0000064"]}).encode('utf-8'), {'Accept': '*/*', 'Content-Type': 'application/json'}) - self.assertIsInstance(result, str) - self.assertNotEqual(len(result),0) - - - def test_excel_file_writer(self): - dataframe = pd.DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6]}) - name_out_file='test_default_sheetname.xlsx' - excel_file_writer(dataframe, name_out_file,sheetname="test_1") - self.assertTrue(os.path.exists(name_out_file)) - os.remove(name_out_file) - - - def test_pre_cut(self): - result = pre_cut(["kegg:C00085", "kegg:C00075", "NA", "kegg:C00083"]) - self.assertListEqual(result,["C00085", "C00075", "NA", "C00083"]) class TestMappingAPI(unittest.TestCase): """ + send_request_to_mapping_api(url, data_json, head, met='POST') ok + mapping_ramp_api(metabolites_list, outfile, inf="opti") OK m_ora_cpdb(accnumbers, acctype, cpdbidsbg=None, @@ -46,6 +19,11 @@ class TestMappingAPI(unittest.TestCase): equiv_from_ma_api(metabolites_list) OK """ + def test_send_request_to_mapping_api(self): + result = send_request_to_mapping_api('https://rampdb.nih.gov/api/pathways-from-analytes',json.dumps({"analytes": ["hmdb:HMDB0000064"]}).encode('utf-8'), {'Accept': '*/*', 'Content-Type': 'application/json'}) + self.assertIsInstance(result, str) + self.assertNotEqual(len(result),0) + def test_mapping_ramp_api_opti(self): len, l_opti = mapping_ramp_api(["KEGG:C01157","hmdb:HMDB0000064","hmdb:HMDB0000148","chebi:16015"],"C:\\Users\\mumec\\Desktop\\Mini_codes\\r_unittest.xlsx", inf="opti") self.assertGreater(len, 0) @@ -57,7 +35,8 @@ class TestMappingAPI(unittest.TestCase): self.assertIsInstance(l_opti, list) self.assertTrue(os.path.exists("C:\\Users\\mumec\\Desktop\\Mini_codes\\r_unittest.xlsx")) os.remove("C:\\Users\\mumec\\Desktop\\Mini_codes\\r_unittest.xlsx") - + """ + # fail because API down def test_m_ora_cpdb_opti(self): l_result = m_ora_cpdb(["C00037", "C00041", "C00099"], "kegg", infos="opti", ofile="C:\\Users\\mumec\\Desktop\\Mini_codes\\cpdb_unittest.xlsx") self.assertIsInstance(l_result, list) @@ -67,7 +46,7 @@ class TestMappingAPI(unittest.TestCase): self.assertIsInstance(l_result, list) self.assertTrue(os.path.exists("C:\\Users\\mumec\\Desktop\\Mini_codes\\cpdb_unittest.xlsx")) os.remove("C:\\Users\\mumec\\Desktop\\Mini_codes\\cpdb_unittest.xlsx") - + """ def test_equiv_from_ma_api(self): result = equiv_from_ma_api(["chebi:15428", "chebi:16977"]) self.assertIsInstance(result, str) @@ -83,10 +62,12 @@ class TestCPDBannexe(unittest.TestCase): get_cpdb_version() ok """ + def test_get_cpdb_available_fset_types_other(self): with self.assertRaises(ValueError): get_cpdb_available_fset_types("invalid_entity_type") - + """ + #fail because API down def test_valid_entity_type_genes(self): result = get_cpdb_available_fset_types("genes") self.assertIsInstance(result, dict) @@ -98,11 +79,12 @@ class TestCPDBannexe(unittest.TestCase): self.assertIsInstance(result, dict) self.assertIn("ID", result) self.assertIn("description", result) - + """ def test_get_cpdb_available_accesion_types_others(self): with self.assertRaises(ValueError): get_cpdb_available_fset_types("invalid_entity_type") - + """ + #fail because API down def test_get_cpdb_available_accesion_types_metabolites(self): result = get_cpdb_available_accesion_types("genes") self.assertIsInstance(result, list) @@ -111,7 +93,6 @@ class TestCPDBannexe(unittest.TestCase): result = get_cpdb_available_accesion_types("genes") self.assertIsInstance(result, list) - def test_get_cpdb_available_accesion_id(self): result = get_cpdb_available_accesion_id('kegg', ["C00037", "C00041", "C00099"]) self.assertIsInstance(result, dict) @@ -119,13 +100,6 @@ class TestCPDBannexe(unittest.TestCase): def test_get_cpdb_version(self): result = get_cpdb_version() self.assertIsInstance(result, str) - - -class TestMultiMapping(unittest.TestCase): - """ - multimapping_ramp(file, num_col, outfiles, infpath="Yes") NON : Nécessite un fichier xlsx - - opti_multimapping(file, outfolder, mapping="YES") NON : Nécessite un fichier xlsx """ if __name__=='__main__': diff --git a/Tests_unitaires/test_complete_processing_of_mapping_results.py b/Tests_unitaires/test_complete_processing_of_mapping_results.py index 0c9f1a2..1f196c1 100644 --- a/Tests_unitaires/test_complete_processing_of_mapping_results.py +++ b/Tests_unitaires/test_complete_processing_of_mapping_results.py @@ -41,18 +41,6 @@ class Test_treatment_result_smapping(unittest.TestCase): class Test_import_function(unittest.TestCase): - def test_column_recovery(self): - self.temp_file = tempfile.NamedTemporaryFile(delete=False, mode='w', suffix='.csv') - self.temp_file.write("1;it;Doe\n") - self.temp_file.write("2;is;Smith\n") - self.temp_file.write("3;good;Johnson\n") - self.temp_file.close() - result = column_recovery(self.temp_file.name, 1) - expected_result = ['it', 'is', 'good'] - os.remove(self.temp_file.name) - self.assertEqual(result, expected_result) - - def test_recup_ramp_pathways_list(self): self.temp_ramp = tempfile.NamedTemporaryFile(delete=False, mode='w', suffix='.csv') self.temp_ramp.write("pathwayName;pathwaySource;pathwayId;inputId;commonName\n") @@ -272,35 +260,5 @@ class Testc_p_o_m_r_mappeurs(unittest.TestCase): os.remove(self.temp_cpdb.name) -class Test_functiosnutils(unittest.TestCase): - - def cor_index(self): - result_corres=cor_index(['t','x','a'],['a','b','c','t','x'],['1','2','3','20','24']) - self.assertIsInstance(result_corres, list) - self.assertListEqual(result_corres,['20','24','1']) - - def test_comma_cleaning(self): - str_clean=comma_cleaning ('this, str , have, four, comma') - self.assertIsInstance(str_clean, str) - self.assertMultiLineEqual(str_clean,'this_ str _ have_ four_ comma') - - def test_excel_file_writer(self): - dataframe = pd.DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6]}) - name_out_file='test_default_sheetname.xlsx' - excel_file_writer(dataframe, name_out_file,sheetname="test_1") - self.assertTrue(os.path.exists(name_out_file)) - os.remove(name_out_file) - - def excel_m_file_writer(self): - list_of_dataframe = [pd.DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6]}), pd.DataFrame({'X': ['a', 'b', 'c'], 'Y': ['d', 'e', 'f']})] - name_outfile = 'test_output.xlsx' - excel_m_file_writer(list_of_dataframe, name_outfile, ['Sheet1', 'Sheet2']) - self.assertTrue(os.path.exists(name_outfile)) - excel_file = pd.ExcelFile(name_outfile) - sheet_names = excel_file.sheet_names - self.assertEqual(len(sheet_names),2) - excel_file.close() - os.remove(name_outfile) - if __name__=='__main__': unittest.main() \ No newline at end of file diff --git a/__pycache__/Visualisation_des_donnes_de_mapping.cpython-310.pyc b/__pycache__/Visualisation_des_donnes_de_mapping.cpython-310.pyc index a1fe174ff1b8ff659683d23c083016ede401ef4f..54ccbe5cd63b9c590c5c9690a006520f6442329a 100644 GIT binary patch delta 14 WcmaE3^~P$$8cD`qo7YP2X9fT{3<mfB delta 14 WcmaE3^~P$$8cD`mo7YP2X9fT`Oa|Bh diff --git a/main.py b/main.py index b791e2c..011c722 100644 --- a/main.py +++ b/main.py @@ -17,7 +17,7 @@ from Recovery_of_associated_Chebi_IDs import chebi_horizontal, chebi_in_outgouin from Visualisation_des_donnes_de_mapping import up_down_path_plot, barplot from complete_processing_of_mapping_results import recup_ramp_pathways_list, recup_cpdb_pathways_list, recup_me_path_list, recup_ma_pathways_list, pathways_selection, list_f_1, pa_metabo, recov_pos_path_name, df_matrix_r, c_p_o_m_r from Mapping_using_the_API import send_request_to_mapping_api, mapping_ramp_api, m_ora_cpdb, opti_multimapping -from network_visualization import Paths_link_CPDB, network_visu +from network_visualization import paths_link_cpdb, network_visu FOLDER = "C:\\Users\\mumec\\Desktop\\fichier_mis_en_forme_programme_total\\main\\" @@ -28,35 +28,103 @@ def shapping_data(file, folder): Arg: file : file with data obtain after analysis - folder : folder in which the Excel file containing the modification results will be saved + folder : folder in which the Excel file + containing the modification results will be saved Returns: Type of return: list and 1 file .xlsx """ beg_datas = recup_all_inf_excel(file) - """ - if "chebi" in beg_datas[0]: - i_c_chebi = beg_datas.find("chebi") - chebi_increased = chebi_horizontal(beg_datas[i_c_chebi]) # soit modifier pour sortir la liste soit créer une fonction qui fait les 2 directement - chebi_increased.append(chebi_in_outgouing(beg_datas[i_c_chebi])) - datas_for_mapping = chebi_increased + beg_datas[1:i_c_chebi] + beg_datas[i_c_chebi+1:] - """ - datas_for_mapping = beg_datas + first_case = [] + all_id_type =['chebi','hmdb','kegg'] + l_without_id = [] + n_max_id = [0 for ait in range(len(all_id_type))] + for line in beg_datas[1:]: + if line[1]=="NA": + l_without_id.append(line[0]) + else: + new_line = [line[0]] + new_metas = [[] for ait in range(len(all_id_type))] + count_id = [0 for ait in range(len(all_id_type))] + if "," in line[1]: + splited = line[1].split(",") + for i_bdd, bdd in enumerate(all_id_type): + for entree in splited: + if bdd in entree: + new_metas[i_bdd].append(entree) + count_id[i_bdd] += 1 + else: + one_id_clean = line[1].strip() + id_type = one_id_clean[:one_id_clean.index(":")] + index_type = all_id_type.index(id_type) + new_metas[index_type].append(one_id_clean) + count_id[index_type] = 1 + for i_val_id, val_id in enumerate(count_id): + if n_max_id[i_val_id] < val_id: + n_max_id[i_val_id] = val_id + new_line.append(new_metas) + new_line.append(count_id) + new_line.append(log(float(line[3])/float(line[2]))) + first_case.append(new_line) + out_table = [['Name']] + for i_vid, vid in enumerate(n_max_id): + for count_v in range(vid): + out_table[0].append(all_id_type[i_vid]) + out_table[0].append('log(cas/temoin)') + chebi_initial = [] + for i_intrem, intrem in enumerate(first_case): + int_line = [intrem[0]] + + if intrem[2][0] != 0: + chebi_initial.append(intrem[1][0][0]) + else: + chebi_initial.append("NA") + for i_values, values in enumerate(n_max_id): + for colu in range(values): + if intrem[2][i_values]>colu: + int_line.append(intrem[1][i_values][colu]) + else: + int_line.append("NA") + int_line.append(intrem[3]) + out_table.append(int_line) + print("le nombre d'identifiant maximal pour 1 métabolite: ", n_max_id) + chebi_prefix_cut = pre_cut(chebi_initial) + outf_horiz = FOLDER + "chebi_horizontaux.xlsx" + horizontal = chebi_horizontal(chebi_prefix_cut, outf_horiz, flow=True) + inci= n_max_id[0] + 1 + for i_fline, fline in enumerate(out_table[1:]): + out_table[i_fline+1] = fline[:inci] + horizontal[i_fline] + fline[inci:] + out_table[0] = out_table[0][:inci] + ["chebi" for ait in range(4)] + out_table[0][inci:] + datas_for_mapping = out_table df_dfm = pd.DataFrame(data=datas_for_mapping) n_o_f = folder + "Datas_mis_en_forme_pour_le_mapping.xlsx" excel_file_writer(df_dfm, n_o_f) + print("les métabolites suivant n'ont pas d'ID ", l_without_id) return(datas_for_mapping) +def workflow(infile, out_folder): + datas_f_map = shapping_data(infile, out_folder) + result_cpdb, result_ramp, recap = opti_multimapping(datas_f_map, FOLDER, + mapping="flow") + c_p_o_m_r(result_ramp, FOLDER, "RAMP", fold_of_visu_sav=FOLDER, + modul="flow", f_modul=recap) + c_p_o_m_r(result_cpdb, FOLDER, "CPDB", fold_of_visu_sav=FOLDER, + modul="flow", f_modul=recap) + l_bdd = ["Reactome", "Wikipathways", "KEGG", "EHMN", + "HumanCyc", "SMPDB", "INOH"] + for bddnow in l_bdd: + out_path_links = FOLDER + "CPDB_links_network"+ bddnow+"datas_base.xlsx" + edge_data, nodes_data = paths_link_cpdb(result_cpdb, out_path_links, + recap, bdd= bddnow, flow=True) + print(network_visu(edge_data[0:3], nodes_data, bdd=bddnow)) + + if __name__ == "__main__": - INFILE = FOLDER + "Donnees_oeil_mis_en_forme_opti_mapping.xlsx" - datas_f_map = shapping_data(INFILE, FOLDER) - result_cpdb, result_ramp, recap = opti_multimapping(datas_f_map, FOLDER, mapping="flow") - #c_p_o_m_r(result_ramp, FOLDER, "RAMP", fold_of_visu_sav=FOLDER, modul="flow", f_modul=recap) - #c_p_o_m_r(result_cpdb, FOLDER, "CPDB", fold_of_visu_sav=FOLDER, modul="flow", f_modul=recap) - l_bdd = ["Reactome", "Wikipathways", "KEGG", "EHMN", "HumanCyc", "SMPDB", "INOH"] - for bdd in l_bdd: - out_path_links = FOLDER + "CPDB_links_network"+ bdd+"datas_base.xlsx" - edge_data, nodes_data = Paths_link_CPDB(result_cpdb, out_path_links , recap, bdd= bdd, flow=True) - print(network_visu(edge_data[0:3], nodes_data, bdd="HumanCyc")) + INFILE = FOLDER + "shapping\\entree_test_shapping.xlsx" + workflow(INFILE, FOLDER) + + + + diff --git a/network_visualization.py b/network_visualization.py index 3a6692e..19431aa 100644 --- a/network_visualization.py +++ b/network_visualization.py @@ -1,56 +1,66 @@ -import re -import csv -import matplotlib.pyplot as plt -import seaborn as sns -import numpy as np +""" +This module is designed to draw network from CPDB mapping using Cytoscape +""" +import sys import pandas as pd import py4cytoscape as p4c -from py4cytoscape import palette_color_brewer_d_RdBu -from math import log, floor -import sys -sys.path.append('C:\\Users\\mumec\\Desktop\\Dossier_gitlab_local\\traitement_des_données') from utils import excel_file_writer, column_recovery +sys.path.append('C:\\Users\\mumec\\Desktop\\Dossier_gitlab_local\\traitement_des_données') LOCAL = "C:\\Users\\mumec\\Desktop\\Mini_codes\\" -def Paths_link_CPDB(csv_file, out_file, int_file, bdd="Reactome", flow=None): - if flow==None: - all_l_paths = column_recovery(csv_file, 2) - all_l_len_path = column_recovery(csv_file, 8) - all_l_meta_in = column_recovery(csv_file, 5) - all_l_p_value = column_recovery(csv_file, 0) - source = column_recovery(csv_file, 3) - l_all_meta = column_recovery(int_file, 0)[1:] - int_cas = column_recovery(int_file, 1)[1:] - int_tem = column_recovery(int_file, 2)[1:] +def paths_link_cpdb(map_data, out_file, mod_data, bdd="Reactome", flow=None): + """ + Takes datas from CPDB mapping to give data for draw network + + Arg: + map_data : list or csv file from CPDB mapping + out_file : Name of the file to write + mod_data : a list or a csv. file with modulation informations + bdd : he database you wish to use + flow : None if the modulation is not in a workflow + + Returns: + Type of return: 1 xlsx file and 2 list + + """ + if flow is None: + all_l_paths = column_recovery(map_data, 2) + all_l_len_path = column_recovery(map_data, 8) + all_l_meta_in = column_recovery(map_data, 5) + all_l_p_value = column_recovery(map_data, 0) + source = column_recovery(map_data, 3) + l_all_meta = column_recovery(mod_data, 0)[1:] + int_cas = column_recovery(mod_data, 1)[1:] + int_tem = column_recovery(mod_data, 2)[1:] modul = [] for i_cas, cas in enumerate(int_cas): modul.append(float(cas) - float(int_tem[i_cas])) l_all_meta[i_cas] = l_all_meta[i_cas].strip() else: - all_l_paths = csv_file[2] - all_l_len_path = csv_file[8] - all_l_meta_in = csv_file[5] - all_l_p_value = csv_file[0] - source = csv_file[3] + all_l_paths = map_data[2] + all_l_len_path = map_data[8] + all_l_meta_in = map_data[5] + all_l_p_value = map_data[0] + source = map_data[3] if 'HMDB' in all_l_meta_in[1][0]: - l_all_meta = int_file[2][1:]# output cpdb ID probléme bientot + l_all_meta = mod_data[2][1:] # output cpdb ID probléme bientot else: - l_all_meta = int_file[1][1:] - modul = int_file[-1][1:] + l_all_meta = mod_data[1][1:] + modul = mod_data[-1][1:] l_paths = [] l_len_path = [] l_p_value = [] l_meta_in = [] - for ip, np in enumerate(all_l_paths): + for ip, n_p in enumerate(all_l_paths): if source[ip] == bdd: - l_paths.append(np.replace(",",";")) + l_paths.append(n_p.replace(",", ";")) l_p_value.append(all_l_p_value[ip]) l_len_path.append(all_l_len_path[ip]) l_meta_in.append(all_l_meta_in[ip]) for i_lpval, lpval in enumerate(l_p_value[1:]): if "e" in lpval: - pvalac = '0.'+ (int(lpval[-2:])-1)*'0' +lpval[0] +lpval[2:-4] + pvalac = '0.' + (int(lpval[-2:])-1)*'0' + lpval[0] + lpval[2:-4] print(pvalac) l_p_value[i_lpval+1] = float(pvalac) else: @@ -61,33 +71,34 @@ def Paths_link_CPDB(csv_file, out_file, int_file, bdd="Reactome", flow=None): for index_p, act_path in enumerate(l_paths): if index_p != 0 and act_path != l_paths[-1]: edge_now = [] - if flow==None: + if flow is None: splited = l_meta_in[index_p].split(",") else: splited = l_meta_in[index_p] for index_m, try_met in enumerate(l_meta_in[index_p+1:]): mod = 0 links = 0 - for i in range(len(splited)): - splited[i] = splited[i].strip() + for i_spli, spli in enumerate(splited): + splited[i_spli] = spli.strip() for met in splited: - mod += modul[l_all_meta.index(met.strip())] + mod += float(modul[l_all_meta.index(met.strip())]) if met in try_met: links += 1 - edge_now.append([l_paths[index_p+1+index_m], links, mod, len(splited)]) + edge_now.append([l_paths[index_p+1+index_m], + links, mod, len(splited)]) edge.append(edge_now) n_meta_int_in.append(len(splited)) modul_path.append(mod) - elif act_path==l_paths[-1]: + elif act_path == l_paths[-1]: mod = 0 - if flow==None: + if flow is None: splited = l_meta_in[index_p].split(",") else: splited = l_meta_in[index_p] - for i in range(len(splited)): - splited[i] = splited[i].strip() + for i_spl, spl in enumerate(splited): + splited[i_spl] = spl.strip() for met in splited: - mod += modul[l_all_meta.index(met.strip())] + mod += float(modul[l_all_meta.index(met.strip())]) edge.append([[act_path, 0, mod, len(splited)]]) n_meta_int_in.append(len(splited)) modul_path.append(mod) @@ -107,15 +118,28 @@ def Paths_link_CPDB(csv_file, out_file, int_file, bdd="Reactome", flow=None): n_meta_map.append(new_entree[3]) len_path.append(l_len_path[index_edge+1]) p_value.append(l_p_value[index_edge+1]) - out_data = [source, target, n_edge, modulation, n_meta_map, len_path, p_value] + out_data = [source, target, n_edge, modulation, + n_meta_map, len_path, p_value] nodes = [l_paths, l_p_value, n_meta_int_in, l_len_path, modul_path] - print(len(l_paths), len(l_p_value), len(n_meta_int_in), len(l_len_path), len(modul_path)) - network = pd.DataFrame(data = out_data).transpose() + network = pd.DataFrame(data=out_data).transpose() excel_file_writer(network, out_file, sheetname="Network links") return out_data, nodes def network_visu(edge, nodes, bdd="Reactome"): + """ + Takes datas from paths_link_cpdb to draw network on cytoscape + + Arg: + edge : list with first source, second target and third edge weight + nodes : list with first source, second p_value, third + number of metabolites mapped in channels, and fifth channel modulation + bdd : the database you wish to keep + + Returns: + Type of return: cytoscape plot and str + + """ source = nodes[0][1:] p_value = nodes[1][1:] n_meta_in_path = nodes[2][1:] @@ -130,19 +154,17 @@ def network_visu(edge, nodes, bdd="Reactome"): 'N metabolites mapped': n_meta_in_path, 'N metabolites in pathway': len_tot_path, 'Pathway modulation': modul_path}) - df_edges = pd.DataFrame(data={'source': source_for_target, 'target': target, - 'weight': weight_ege}) + df_edges = pd.DataFrame(data={'source': source_for_target, + 'target': target, 'weight': weight_ege}) p4c.create_network_from_data_frames(nodes=df_nodes, edges=df_edges, - title="CPDB_network_"+ bdd, + title="CPDB_network_" + bdd, collection="Network_from_mapping") - #mise en place de paramétres fixe - p4c.set_node_shape_default('ELLIPSE') p4c.set_node_font_size_default(17) - nmm_min = min(n_meta_in_path) - nmm_max = max(n_meta_in_path) - nmm_c = nmm_min + (nmm_max - nmm_min)/2 - p4c.set_node_color_mapping('N metabolites mapped', [nmm_min, nmm_c, nmm_max], + nmmmin = min(n_meta_in_path) + nmmmax = max(n_meta_in_path) + nmmc = nmm_min + (nmm_max - nmm_min)/2 + p4c.set_node_color_mapping('N metabolites mapped', [nmmmin, nmmc, nmmmax], ['#e6eeff', '#6699ff', '#000099'], mapping_type='c') pv_min = min(p_value) @@ -163,11 +185,12 @@ def network_visu(edge, nodes, bdd="Reactome"): p4c.set_node_width_bypass(source, len_tot_path) p4c.layout_network('degree-circle') - return([pv_min, pv_c, pv_max]) + return "Drawing ok" + if __name__ == "__main__": - csv_f = LOCAL + "ora_cpdb_data_yeux_reactome_rev_18-01-2024.csv" - out_file = LOCAL + "reseax_edge_tab_data_oeil_cpdb_reactome_v2_rev_19-01-2024.xlsx" - intens = LOCAL + "chebi_intensite_patho_oeil_donnes_estelles_rev_17-01-2024.csv" - edge_data, nodes_data = Paths_link_CPDB(csv_f, out_file, intens) + CSV_F = LOCAL + "ora_cpdb_data_yeux_reactome_rev_18-01-2024.csv" + OUTFILE = LOCAL + "reseax_edge_tab_data_oeil_cpdb_reactome_v2_rev_19-01-2024.xlsx" + INTENS = LOCAL + "chebi_intensite_patho_oeil_donnes_estelles_rev_17-01-2024.csv" + edge_data, nodes_data = paths_link_cpdb(CSV_F, OUTFILE, INTENS) print(network_visu(edge_data[0:3], nodes_data)) diff --git a/utils.py b/utils.py index c3ac355..4424dac 100644 --- a/utils.py +++ b/utils.py @@ -1,14 +1,14 @@ """ all function utils """ -import pandas as pd import csv import re +import pandas as pd def excel_file_writer(dataframe, n_o_f, sheetname="Resultats"): """ - Take a dataframe and write an excel file with this data + Take a dataframe and write an excel file with these data Arg: dataframe = dataframe of data to write @@ -40,7 +40,7 @@ def column_recovery(file, n, sep=";", enc=None): for line in lines: if line[n].strip() != '': res.append(line[n].strip()) - return(res) + return res def excel_m_file_writer(list_of_dataframe, n_outf, list_of_sheetname): @@ -104,6 +104,7 @@ def recup_all_inf_excel(file): Arg: file = the file to read + Returns: Type of return: 1 list of list line """ @@ -127,4 +128,4 @@ def cor_index(list_objects_to_convert, l_all_obj, l_all_equ): l_to_return = [] for item_to_replace in (list_objects_to_convert): l_to_return.append(l_all_equ[l_all_obj.index(item_to_replace.strip())]) - return l_to_return \ No newline at end of file + return l_to_return -- GitLab