From ef991720e365b9c1826688fcf7887611dc0fed3d Mon Sep 17 00:00:00 2001
From: local_comparaison <mathieu.umec@inrae.fr>
Date: Fri, 2 Feb 2024 17:01:04 +0100
Subject: [PATCH] change for optimize the workflow

---
 .gitignore.txt                                |   3 +-
 Mapping_using_the_API.py                      |  58 ++++++--
 Tests_unitaires/test_Mapping_using_the_API.py |  58 ++------
 ..._complete_processing_of_mapping_results.py |  42 ------
 ...tion_des_donnes_de_mapping.cpython-310.pyc | Bin 7532 -> 7532 bytes
 main.py                                       | 108 +++++++++++---
 network_visualization.py                      | 137 ++++++++++--------
 utils.py                                      |   9 +-
 8 files changed, 237 insertions(+), 178 deletions(-)

diff --git a/.gitignore.txt b/.gitignore.txt
index 7cffe85..0c10313 100644
--- a/.gitignore.txt
+++ b/.gitignore.txt
@@ -1,3 +1,4 @@
 __pycache__/
 logs/
-.cpython-310
\ No newline at end of file
+.cpython-310
+.pyc
\ No newline at end of file
diff --git a/Mapping_using_the_API.py b/Mapping_using_the_API.py
index 5f7041f..ab5d763 100644
--- a/Mapping_using_the_API.py
+++ b/Mapping_using_the_API.py
@@ -4,18 +4,16 @@ the functions for ConsensusPathDB mapping are translation of BRGEnrichment.
 """
 
 import json
-from urllib import request
+from urllib import request, error
 import xmltodict
 import pandas as pd
 from utils import excel_file_writer, pre_cut, recup_all_inf_excel
 FOLDER = "C:\\Users\\mumec\\Desktop\\Mini_codes\\"
 
-
+"""
 def send_request_to_mapping_api(url, data_json, head, met='POST'):
-    """
-    This function gives the result of mapping of a metabolites list from RAMP.
-    Here's an example of 4 metabolites giving 505 lines.
-    ["KEGG:C01157","hmdb:HMDB0000064","hmdb:HMDB0000148","chebi:16015"]
+
+    Give the result from the API
 
     Arg:
         url = the url to use
@@ -25,12 +23,41 @@ def send_request_to_mapping_api(url, data_json, head, met='POST'):
 
     Returns:
         Type of return: 1 excel file whith 5 columns
-    """
+
     req = request.Request(url, data=data_json, headers=head, method=met)
     with request.urlopen(req) as response:
         result = response.read()
     out_data = result.decode('utf-8')
     return out_data
+"""
+
+def send_request_to_mapping_api(url, data_json, head, met='POST'):
+    """
+    This function gives the result of mapping of a metabolites list from RAMP.
+    Here's an example of 4 metabolites giving 505 lines.
+    ["KEGG:C01157","hmdb:HMDB0000064","hmdb:HMDB0000148","chebi:16015"]
+
+    Arg:
+        url = the url to use
+        data_json = the data to post
+        head = headers to use
+        met = 'POST'
+
+    Returns:
+        Type of return: 1 excel file with 5 columns
+    """
+    try:
+        req = request.Request(url, data=data_json, headers=head, method=met)
+        with request.urlopen(req) as response:
+            result = response.read()
+        out_data = result.decode('utf-8')
+        return out_data
+    except error.URLError as e:
+        print(f"Error: Unable to connect to the server. {e}")
+    except error.HTTPError as e:
+        print(f"Error: The server couldn't fulfill the request. {e}")
+    except Exception as e:
+        print(f"An unexpected error occurred: {e}")
 
 
 def mapping_ramp_api(metabolites_list, outfile, inf="flow", flow=False):
@@ -108,9 +135,14 @@ def mapping_ramp_api(metabolites_list, outfile, inf="flow", flow=False):
 
 def mapping_ramp_api_enrichment(metabolites_list, outfile, inf="all"):
     """
+    This function gives the result of enrichment of a metabolites list from RAMP.
 
     Arg:
-    Returns:
+    metabolites_list = a list of mÃ©tabolits id to map
+    outfile = the name of the xlsx file to write
+    inf = "all"
+
+    Returns : int and list
     """
     if len(metabolites_list) == 0:
         badend = " Your metabolite list is empty. Here's an example"
@@ -703,9 +735,11 @@ def opti_multimapping(file, outfolder, mapping="flow"):
 
 
 if __name__ == "__main__":
-    F_ENTER = FOLDER+"Donnees_oeil_mis_en_forme_opti_mapping.xlsx"
-    opti_multimapping(F_ENTER, FOLDER)
-    F_O = FOLDER + "test_enrichment_ramp.xlsx"
+    #F_ENTER = FOLDER+"Donnees_oeil_mis_en_forme_opti_mapping.xlsx"
+    #opti_multimapping(F_ENTER, FOLDER)
+    #F_O = FOLDER + "test_enrichment_ramp.xlsx"
     #a, b = mapping_ramp_api(["KEGG:C01157","hmdb:HMDB0000064","hmdb:HMDB0000148","chebi:16015"], F_O, inf="all")
+    #print(a, b)
     #b = pd.DataFrame(data=b).transpose()
-    #excel_file_writer(b, F_O, sheetname="Resultats")
\ No newline at end of file
+    #excel_file_writer(b, F_O, sheetname="Resultats")
+    m_ora_cpdb(['C01157','C00002','C00002'], 'kegg', pthreshold=0.05, infos=None, ofile="C:\\Users\\mumec\\Desktop\\test_out_cpdb.xlsx")
diff --git a/Tests_unitaires/test_Mapping_using_the_API.py b/Tests_unitaires/test_Mapping_using_the_API.py
index 391027b..591a41e 100644
--- a/Tests_unitaires/test_Mapping_using_the_API.py
+++ b/Tests_unitaires/test_Mapping_using_the_API.py
@@ -4,39 +4,12 @@ sys.path.append('C:\\Users\\mumec\\Desktop\\Dossier_gitlab_local\\traitement_des
 from Mapping_using_the_API import  *
 import os
 
-class Testutils(unittest.TestCase):
-   """
-   recup_all_inf_excel(file) NON : NÃ©cessite un fichier xlsx
-
-   send_request_to_mapping_api(url, data_json, head, met='POST') ok
-
-   excel_file_writer(dataframe, name_out_file, sheetname="Resultats") ok
-
-   pre_cut(listed) ok
-   """
-
-
-   def test_send_request_to_mapping_api(self):
-      result = send_request_to_mapping_api('https://rampdb.nih.gov/api/pathways-from-analytes',json.dumps({"analytes": ["hmdb:HMDB0000064"]}).encode('utf-8'), {'Accept': '*/*', 'Content-Type': 'application/json'})
-      self.assertIsInstance(result, str)
-      self.assertNotEqual(len(result),0)
-
-
-   def test_excel_file_writer(self):
-      dataframe = pd.DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6]})
-      name_out_file='test_default_sheetname.xlsx'
-      excel_file_writer(dataframe, name_out_file,sheetname="test_1")
-      self.assertTrue(os.path.exists(name_out_file))
-      os.remove(name_out_file)
-
-
-   def test_pre_cut(self):
-      result = pre_cut(["kegg:C00085", "kegg:C00075", "NA", "kegg:C00083"])
-      self.assertListEqual(result,["C00085", "C00075", "NA", "C00083"])
 
 class TestMappingAPI(unittest.TestCase):
 
    """
+   send_request_to_mapping_api(url, data_json, head, met='POST') ok
+
    mapping_ramp_api(metabolites_list, outfile, inf="opti") OK
 
    m_ora_cpdb(accnumbers, acctype, cpdbidsbg=None,
@@ -46,6 +19,11 @@ class TestMappingAPI(unittest.TestCase):
    equiv_from_ma_api(metabolites_list) OK
    """
 
+   def test_send_request_to_mapping_api(self):
+      result = send_request_to_mapping_api('https://rampdb.nih.gov/api/pathways-from-analytes',json.dumps({"analytes": ["hmdb:HMDB0000064"]}).encode('utf-8'), {'Accept': '*/*', 'Content-Type': 'application/json'})
+      self.assertIsInstance(result, str)
+      self.assertNotEqual(len(result),0)
+
    def test_mapping_ramp_api_opti(self):
       len, l_opti = mapping_ramp_api(["KEGG:C01157","hmdb:HMDB0000064","hmdb:HMDB0000148","chebi:16015"],"C:\\Users\\mumec\\Desktop\\Mini_codes\\r_unittest.xlsx", inf="opti")
       self.assertGreater(len, 0)
@@ -57,7 +35,8 @@ class TestMappingAPI(unittest.TestCase):
       self.assertIsInstance(l_opti, list)
       self.assertTrue(os.path.exists("C:\\Users\\mumec\\Desktop\\Mini_codes\\r_unittest.xlsx"))
       os.remove("C:\\Users\\mumec\\Desktop\\Mini_codes\\r_unittest.xlsx")
-
+   """
+   # fail because API down
    def test_m_ora_cpdb_opti(self):
       l_result = m_ora_cpdb(["C00037", "C00041", "C00099"], "kegg", infos="opti", ofile="C:\\Users\\mumec\\Desktop\\Mini_codes\\cpdb_unittest.xlsx")
       self.assertIsInstance(l_result, list)
@@ -67,7 +46,7 @@ class TestMappingAPI(unittest.TestCase):
       self.assertIsInstance(l_result, list)
       self.assertTrue(os.path.exists("C:\\Users\\mumec\\Desktop\\Mini_codes\\cpdb_unittest.xlsx"))
       os.remove("C:\\Users\\mumec\\Desktop\\Mini_codes\\cpdb_unittest.xlsx")
-
+   """
    def test_equiv_from_ma_api(self):
       result = equiv_from_ma_api(["chebi:15428", "chebi:16977"])
       self.assertIsInstance(result, str)
@@ -83,10 +62,12 @@ class TestCPDBannexe(unittest.TestCase):
 
    get_cpdb_version() ok
    """
+
    def test_get_cpdb_available_fset_types_other(self):
       with self.assertRaises(ValueError):
             get_cpdb_available_fset_types("invalid_entity_type")
-
+   """
+   #fail because API down
    def test_valid_entity_type_genes(self):
       result = get_cpdb_available_fset_types("genes")
       self.assertIsInstance(result, dict)
@@ -98,11 +79,12 @@ class TestCPDBannexe(unittest.TestCase):
       self.assertIsInstance(result, dict)
       self.assertIn("ID", result)
       self.assertIn("description", result)
-
+   """
    def test_get_cpdb_available_accesion_types_others(self):
       with self.assertRaises(ValueError):
             get_cpdb_available_fset_types("invalid_entity_type")
-
+   """
+   #fail because API down
    def test_get_cpdb_available_accesion_types_metabolites(self):
       result = get_cpdb_available_accesion_types("genes")
       self.assertIsInstance(result, list)
@@ -111,7 +93,6 @@ class TestCPDBannexe(unittest.TestCase):
       result = get_cpdb_available_accesion_types("genes")
       self.assertIsInstance(result, list)
 
-
    def test_get_cpdb_available_accesion_id(self):
       result = get_cpdb_available_accesion_id('kegg', ["C00037", "C00041", "C00099"])
       self.assertIsInstance(result, dict)
@@ -119,13 +100,6 @@ class TestCPDBannexe(unittest.TestCase):
    def test_get_cpdb_version(self):
       result = get_cpdb_version()
       self.assertIsInstance(result, str)
-
-
-class TestMultiMapping(unittest.TestCase):
-   """
-   multimapping_ramp(file, num_col, outfiles, infpath="Yes")  NON : NÃ©cessite un fichier xlsx
-
-   opti_multimapping(file, outfolder, mapping="YES")  NON : NÃ©cessite un fichier xlsx
    """
 
 if __name__=='__main__':
diff --git a/Tests_unitaires/test_complete_processing_of_mapping_results.py b/Tests_unitaires/test_complete_processing_of_mapping_results.py
index 0c9f1a2..1f196c1 100644
--- a/Tests_unitaires/test_complete_processing_of_mapping_results.py
+++ b/Tests_unitaires/test_complete_processing_of_mapping_results.py
@@ -41,18 +41,6 @@ class Test_treatment_result_smapping(unittest.TestCase):
 class Test_import_function(unittest.TestCase):
 
 
-   def test_column_recovery(self):
-      self.temp_file = tempfile.NamedTemporaryFile(delete=False, mode='w', suffix='.csv')
-      self.temp_file.write("1;it;Doe\n")
-      self.temp_file.write("2;is;Smith\n")
-      self.temp_file.write("3;good;Johnson\n")
-      self.temp_file.close()
-      result = column_recovery(self.temp_file.name, 1)
-      expected_result = ['it', 'is', 'good']
-      os.remove(self.temp_file.name)
-      self.assertEqual(result, expected_result)
-
-
    def test_recup_ramp_pathways_list(self):
       self.temp_ramp = tempfile.NamedTemporaryFile(delete=False, mode='w', suffix='.csv')
       self.temp_ramp.write("pathwayName;pathwaySource;pathwayId;inputId;commonName\n")
@@ -272,35 +260,5 @@ class Testc_p_o_m_r_mappeurs(unittest.TestCase):
       os.remove(self.temp_cpdb.name)
 
 
-class Test_functiosnutils(unittest.TestCase):
-
-   def cor_index(self):
-      result_corres=cor_index(['t','x','a'],['a','b','c','t','x'],['1','2','3','20','24'])
-      self.assertIsInstance(result_corres, list)
-      self.assertListEqual(result_corres,['20','24','1'])
-
-   def test_comma_cleaning(self):
-      str_clean=comma_cleaning ('this, str , have, four, comma')
-      self.assertIsInstance(str_clean, str)
-      self.assertMultiLineEqual(str_clean,'this_ str _ have_ four_ comma')
-
-   def test_excel_file_writer(self):
-      dataframe = pd.DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6]})
-      name_out_file='test_default_sheetname.xlsx'
-      excel_file_writer(dataframe, name_out_file,sheetname="test_1")
-      self.assertTrue(os.path.exists(name_out_file))
-      os.remove(name_out_file)
-
-   def excel_m_file_writer(self):
-      list_of_dataframe = [pd.DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6]}), pd.DataFrame({'X': ['a', 'b', 'c'], 'Y': ['d', 'e', 'f']})]
-      name_outfile = 'test_output.xlsx'
-      excel_m_file_writer(list_of_dataframe, name_outfile, ['Sheet1', 'Sheet2'])
-      self.assertTrue(os.path.exists(name_outfile))
-      excel_file = pd.ExcelFile(name_outfile)
-      sheet_names = excel_file.sheet_names
-      self.assertEqual(len(sheet_names),2)
-      excel_file.close()
-      os.remove(name_outfile)
-
 if __name__=='__main__':
     unittest.main()
\ No newline at end of file
diff --git a/__pycache__/Visualisation_des_donnes_de_mapping.cpython-310.pyc b/__pycache__/Visualisation_des_donnes_de_mapping.cpython-310.pyc
index a1fe174ff1b8ff659683d23c083016ede401ef4f..54ccbe5cd63b9c590c5c9690a006520f6442329a 100644
GIT binary patch
delta 14
WcmaE3^~P$$8cD`qo7YP2X9fT{3<mfB

delta 14
WcmaE3^~P$$8cD`mo7YP2X9fT`Oa|Bh

diff --git a/main.py b/main.py
index b791e2c..011c722 100644
--- a/main.py
+++ b/main.py
@@ -17,7 +17,7 @@ from Recovery_of_associated_Chebi_IDs import chebi_horizontal, chebi_in_outgouin
 from Visualisation_des_donnes_de_mapping import up_down_path_plot, barplot
 from complete_processing_of_mapping_results import recup_ramp_pathways_list, recup_cpdb_pathways_list, recup_me_path_list, recup_ma_pathways_list, pathways_selection, list_f_1, pa_metabo, recov_pos_path_name, df_matrix_r, c_p_o_m_r
 from Mapping_using_the_API import send_request_to_mapping_api, mapping_ramp_api, m_ora_cpdb, opti_multimapping
-from network_visualization import Paths_link_CPDB, network_visu
+from network_visualization import paths_link_cpdb, network_visu
 
 FOLDER = "C:\\Users\\mumec\\Desktop\\fichier_mis_en_forme_programme_total\\main\\"
 
@@ -28,35 +28,103 @@ def shapping_data(file, folder):
 
     Arg:
         file : file with data obtain after analysis
-        folder : folder in which the Excel file containing the modification results will be saved
+        folder : folder in which the Excel file
+                 containing the modification results will be saved
 
     Returns:
         Type of return: list and 1 file .xlsx
 
     """
     beg_datas = recup_all_inf_excel(file)
-    """
-    if "chebi" in beg_datas[0]:
-        i_c_chebi = beg_datas.find("chebi")
-        chebi_increased = chebi_horizontal(beg_datas[i_c_chebi]) # soit modifier pour sortir la liste soit crÃ©er une fonction qui fait les 2 directement
-        chebi_increased.append(chebi_in_outgouing(beg_datas[i_c_chebi]))
-        datas_for_mapping = chebi_increased + beg_datas[1:i_c_chebi] + beg_datas[i_c_chebi+1:]
-    """
-    datas_for_mapping = beg_datas
+    first_case = []
+    all_id_type =['chebi','hmdb','kegg']
+    l_without_id = []
+    n_max_id = [0 for ait in range(len(all_id_type))]
+    for line in beg_datas[1:]:
+        if line[1]=="NA":
+            l_without_id.append(line[0])
+        else:
+            new_line = [line[0]]
+            new_metas = [[] for ait in range(len(all_id_type))]
+            count_id = [0 for ait in range(len(all_id_type))]
+            if "," in line[1]:
+                splited = line[1].split(",")
+                for i_bdd, bdd in enumerate(all_id_type):
+                    for entree in splited:
+                        if bdd in entree:
+                            new_metas[i_bdd].append(entree)
+                            count_id[i_bdd] += 1
+            else:
+                one_id_clean = line[1].strip()
+                id_type = one_id_clean[:one_id_clean.index(":")]
+                index_type = all_id_type.index(id_type)
+                new_metas[index_type].append(one_id_clean)
+                count_id[index_type] = 1
+            for i_val_id, val_id in enumerate(count_id):
+                if n_max_id[i_val_id] < val_id:
+                    n_max_id[i_val_id] = val_id
+            new_line.append(new_metas)
+            new_line.append(count_id)
+            new_line.append(log(float(line[3])/float(line[2])))
+            first_case.append(new_line)
+    out_table = [['Name']]
+    for i_vid, vid in enumerate(n_max_id):
+        for count_v in range(vid):
+            out_table[0].append(all_id_type[i_vid])
+    out_table[0].append('log(cas/temoin)')
+    chebi_initial = []
+    for i_intrem, intrem in enumerate(first_case):
+        int_line = [intrem[0]]
+
+        if intrem[2][0] != 0:
+            chebi_initial.append(intrem[1][0][0])
+        else:
+            chebi_initial.append("NA")
+        for i_values, values in enumerate(n_max_id):
+            for colu in range(values):
+                if intrem[2][i_values]>colu:
+                    int_line.append(intrem[1][i_values][colu])
+                else:
+                    int_line.append("NA")
+        int_line.append(intrem[3])
+        out_table.append(int_line)
+    print("le nombre d'identifiant maximal pour 1 mÃ©tabolite: ", n_max_id)
+    chebi_prefix_cut = pre_cut(chebi_initial)
+    outf_horiz = FOLDER + "chebi_horizontaux.xlsx"
+    horizontal = chebi_horizontal(chebi_prefix_cut, outf_horiz, flow=True)
+    inci= n_max_id[0] + 1
+    for i_fline, fline in enumerate(out_table[1:]):
+        out_table[i_fline+1] = fline[:inci] + horizontal[i_fline] + fline[inci:]
+    out_table[0] = out_table[0][:inci] + ["chebi" for ait in range(4)] + out_table[0][inci:]
+    datas_for_mapping = out_table
     df_dfm = pd.DataFrame(data=datas_for_mapping)
     n_o_f = folder + "Datas_mis_en_forme_pour_le_mapping.xlsx"
     excel_file_writer(df_dfm, n_o_f)
+    print("les mÃ©tabolites suivant n'ont pas d'ID ", l_without_id)
     return(datas_for_mapping)
 
 
+def workflow(infile, out_folder):
+    datas_f_map = shapping_data(infile, out_folder)
+    result_cpdb, result_ramp, recap = opti_multimapping(datas_f_map, FOLDER,
+                                                        mapping="flow")
+    c_p_o_m_r(result_ramp, FOLDER, "RAMP", fold_of_visu_sav=FOLDER,
+              modul="flow", f_modul=recap)
+    c_p_o_m_r(result_cpdb, FOLDER, "CPDB", fold_of_visu_sav=FOLDER,
+              modul="flow", f_modul=recap)
+    l_bdd = ["Reactome", "Wikipathways", "KEGG", "EHMN",
+             "HumanCyc", "SMPDB", "INOH"]
+    for bddnow in l_bdd:
+        out_path_links = FOLDER + "CPDB_links_network"+ bddnow+"datas_base.xlsx"
+        edge_data, nodes_data = paths_link_cpdb(result_cpdb, out_path_links,
+                                                recap, bdd= bddnow, flow=True)
+        print(network_visu(edge_data[0:3], nodes_data, bdd=bddnow))
+
+
 if __name__ == "__main__":
-    INFILE = FOLDER + "Donnees_oeil_mis_en_forme_opti_mapping.xlsx"
-    datas_f_map = shapping_data(INFILE, FOLDER)
-    result_cpdb, result_ramp, recap = opti_multimapping(datas_f_map, FOLDER,  mapping="flow")
-    #c_p_o_m_r(result_ramp, FOLDER, "RAMP", fold_of_visu_sav=FOLDER, modul="flow", f_modul=recap)
-    #c_p_o_m_r(result_cpdb, FOLDER, "CPDB", fold_of_visu_sav=FOLDER, modul="flow", f_modul=recap)
-    l_bdd = ["Reactome", "Wikipathways", "KEGG", "EHMN", "HumanCyc", "SMPDB", "INOH"]
-    for bdd in l_bdd:
-        out_path_links = FOLDER + "CPDB_links_network"+ bdd+"datas_base.xlsx"
-        edge_data, nodes_data = Paths_link_CPDB(result_cpdb, out_path_links , recap, bdd= bdd, flow=True)
-        print(network_visu(edge_data[0:3], nodes_data, bdd="HumanCyc"))
+    INFILE = FOLDER + "shapping\\entree_test_shapping.xlsx"
+    workflow(INFILE, FOLDER)
+
+
+
+
diff --git a/network_visualization.py b/network_visualization.py
index 3a6692e..19431aa 100644
--- a/network_visualization.py
+++ b/network_visualization.py
@@ -1,56 +1,66 @@
-import re
-import csv
-import matplotlib.pyplot as plt
-import seaborn as sns
-import numpy as np
+"""
+This module is designed to draw network from CPDB mapping using Cytoscape
+"""
+import sys
 import pandas as pd
 import py4cytoscape as p4c
-from py4cytoscape import palette_color_brewer_d_RdBu
-from math import log, floor
-import sys
-sys.path.append('C:\\Users\\mumec\\Desktop\\Dossier_gitlab_local\\traitement_des_donnÃ©es')
 from utils import excel_file_writer, column_recovery
+sys.path.append('C:\\Users\\mumec\\Desktop\\Dossier_gitlab_local\\traitement_des_donnÃ©es')
 LOCAL = "C:\\Users\\mumec\\Desktop\\Mini_codes\\"
 
 
-def Paths_link_CPDB(csv_file, out_file, int_file, bdd="Reactome",  flow=None):
-    if flow==None:
-        all_l_paths = column_recovery(csv_file, 2)
-        all_l_len_path = column_recovery(csv_file, 8)
-        all_l_meta_in = column_recovery(csv_file, 5)
-        all_l_p_value = column_recovery(csv_file, 0)
-        source = column_recovery(csv_file, 3)
-        l_all_meta = column_recovery(int_file, 0)[1:]
-        int_cas = column_recovery(int_file, 1)[1:]
-        int_tem = column_recovery(int_file, 2)[1:]
+def paths_link_cpdb(map_data, out_file, mod_data, bdd="Reactome", flow=None):
+    """
+    Takes datas from CPDB mapping to give data for draw network
+
+    Arg:
+        map_data : list or csv file from CPDB mapping
+        out_file : Name of the file to write
+        mod_data : a list or a csv. file with modulation informations
+        bdd : he database you wish to use
+        flow : None if the modulation is not in a workflow
+
+    Returns:
+        Type of return: 1 xlsx file and 2 list
+
+    """
+    if flow is None:
+        all_l_paths = column_recovery(map_data, 2)
+        all_l_len_path = column_recovery(map_data, 8)
+        all_l_meta_in = column_recovery(map_data, 5)
+        all_l_p_value = column_recovery(map_data, 0)
+        source = column_recovery(map_data, 3)
+        l_all_meta = column_recovery(mod_data, 0)[1:]
+        int_cas = column_recovery(mod_data, 1)[1:]
+        int_tem = column_recovery(mod_data, 2)[1:]
         modul = []
         for i_cas, cas in enumerate(int_cas):
             modul.append(float(cas) - float(int_tem[i_cas]))
             l_all_meta[i_cas] = l_all_meta[i_cas].strip()
     else:
-        all_l_paths = csv_file[2]
-        all_l_len_path = csv_file[8]
-        all_l_meta_in = csv_file[5]
-        all_l_p_value = csv_file[0]
-        source = csv_file[3]
+        all_l_paths = map_data[2]
+        all_l_len_path = map_data[8]
+        all_l_meta_in = map_data[5]
+        all_l_p_value = map_data[0]
+        source = map_data[3]
         if 'HMDB' in all_l_meta_in[1][0]:
-            l_all_meta = int_file[2][1:]# output cpdb ID problÃ©me bientot
+            l_all_meta = mod_data[2][1:]   # output cpdb ID problÃ©me bientot
         else:
-             l_all_meta = int_file[1][1:]
-        modul = int_file[-1][1:]
+            l_all_meta = mod_data[1][1:]
+        modul = mod_data[-1][1:]
     l_paths = []
     l_len_path = []
     l_p_value = []
     l_meta_in = []
-    for ip, np in enumerate(all_l_paths):
+    for ip, n_p in enumerate(all_l_paths):
         if source[ip] == bdd:
-            l_paths.append(np.replace(",",";"))
+            l_paths.append(n_p.replace(",", ";"))
             l_p_value.append(all_l_p_value[ip])
             l_len_path.append(all_l_len_path[ip])
             l_meta_in.append(all_l_meta_in[ip])
     for i_lpval, lpval in enumerate(l_p_value[1:]):
         if "e" in lpval:
-            pvalac = '0.'+ (int(lpval[-2:])-1)*'0'  +lpval[0] +lpval[2:-4]
+            pvalac = '0.' + (int(lpval[-2:])-1)*'0' + lpval[0] + lpval[2:-4]
             print(pvalac)
             l_p_value[i_lpval+1] = float(pvalac)
         else:
@@ -61,33 +71,34 @@ def Paths_link_CPDB(csv_file, out_file, int_file, bdd="Reactome",  flow=None):
     for index_p, act_path in enumerate(l_paths):
         if index_p != 0 and act_path != l_paths[-1]:
             edge_now = []
-            if flow==None:
+            if flow is None:
                 splited = l_meta_in[index_p].split(",")
             else:
                 splited = l_meta_in[index_p]
             for index_m, try_met in enumerate(l_meta_in[index_p+1:]):
                 mod = 0
                 links = 0
-                for i in range(len(splited)):
-                    splited[i] = splited[i].strip()
+                for i_spli, spli in enumerate(splited):
+                    splited[i_spli] = spli.strip()
                 for met in splited:
-                    mod += modul[l_all_meta.index(met.strip())]
+                    mod += float(modul[l_all_meta.index(met.strip())])
                     if met in try_met:
                         links += 1
-                edge_now.append([l_paths[index_p+1+index_m], links, mod, len(splited)])
+                edge_now.append([l_paths[index_p+1+index_m],
+                                 links, mod, len(splited)])
             edge.append(edge_now)
             n_meta_int_in.append(len(splited))
             modul_path.append(mod)
-        elif act_path==l_paths[-1]:
+        elif act_path == l_paths[-1]:
             mod = 0
-            if flow==None:
+            if flow is None:
                 splited = l_meta_in[index_p].split(",")
             else:
                 splited = l_meta_in[index_p]
-            for i in range(len(splited)):
-                splited[i] = splited[i].strip()
+            for i_spl, spl in enumerate(splited):
+                splited[i_spl] = spl.strip()
             for met in splited:
-                mod += modul[l_all_meta.index(met.strip())]
+                mod += float(modul[l_all_meta.index(met.strip())])
             edge.append([[act_path, 0, mod, len(splited)]])
             n_meta_int_in.append(len(splited))
             modul_path.append(mod)
@@ -107,15 +118,28 @@ def Paths_link_CPDB(csv_file, out_file, int_file, bdd="Reactome",  flow=None):
             n_meta_map.append(new_entree[3])
             len_path.append(l_len_path[index_edge+1])
             p_value.append(l_p_value[index_edge+1])
-    out_data = [source, target, n_edge, modulation, n_meta_map, len_path, p_value]
+    out_data = [source, target, n_edge, modulation,
+                n_meta_map, len_path, p_value]
     nodes = [l_paths, l_p_value, n_meta_int_in, l_len_path, modul_path]
-    print(len(l_paths), len(l_p_value), len(n_meta_int_in), len(l_len_path), len(modul_path))
-    network = pd.DataFrame(data = out_data).transpose()
+    network = pd.DataFrame(data=out_data).transpose()
     excel_file_writer(network, out_file, sheetname="Network links")
     return out_data, nodes
 
 
 def network_visu(edge, nodes, bdd="Reactome"):
+    """
+    Takes datas from paths_link_cpdb to draw network on cytoscape
+
+    Arg:
+        edge : list with first source, second target and third edge weight
+        nodes : list with first source, second p_value, third
+        number of metabolites mapped in channels, and fifth channel modulation
+        bdd : the database you wish to keep
+
+    Returns:
+        Type of return: cytoscape plot and str
+
+    """
     source = nodes[0][1:]
     p_value = nodes[1][1:]
     n_meta_in_path = nodes[2][1:]
@@ -130,19 +154,17 @@ def network_visu(edge, nodes, bdd="Reactome"):
                                   'N metabolites mapped': n_meta_in_path,
                                   'N metabolites in pathway': len_tot_path,
                                   'Pathway modulation': modul_path})
-    df_edges = pd.DataFrame(data={'source': source_for_target, 'target': target,
-                                  'weight': weight_ege})
+    df_edges = pd.DataFrame(data={'source': source_for_target,
+                                  'target': target, 'weight': weight_ege})
     p4c.create_network_from_data_frames(nodes=df_nodes, edges=df_edges,
-                                        title="CPDB_network_"+ bdd,
+                                        title="CPDB_network_" + bdd,
                                         collection="Network_from_mapping")
-    #mise en place de paramÃ©tres fixe
-
     p4c.set_node_shape_default('ELLIPSE')
     p4c.set_node_font_size_default(17)
-    nmm_min = min(n_meta_in_path)
-    nmm_max = max(n_meta_in_path)
-    nmm_c = nmm_min + (nmm_max - nmm_min)/2
-    p4c.set_node_color_mapping('N metabolites mapped', [nmm_min, nmm_c, nmm_max],
+    nmmmin = min(n_meta_in_path)
+    nmmmax = max(n_meta_in_path)
+    nmmc = nmm_min + (nmm_max - nmm_min)/2
+    p4c.set_node_color_mapping('N metabolites mapped', [nmmmin, nmmc, nmmmax],
                                ['#e6eeff', '#6699ff', '#000099'],
                                mapping_type='c')
     pv_min = min(p_value)
@@ -163,11 +185,12 @@ def network_visu(edge, nodes, bdd="Reactome"):
     p4c.set_node_width_bypass(source, len_tot_path)
 
     p4c.layout_network('degree-circle')
-    return([pv_min, pv_c, pv_max])
+    return "Drawing ok"
+
 
 if __name__ == "__main__":
-    csv_f = LOCAL + "ora_cpdb_data_yeux_reactome_rev_18-01-2024.csv"
-    out_file = LOCAL + "reseax_edge_tab_data_oeil_cpdb_reactome_v2_rev_19-01-2024.xlsx"
-    intens = LOCAL + "chebi_intensite_patho_oeil_donnes_estelles_rev_17-01-2024.csv"
-    edge_data, nodes_data = Paths_link_CPDB(csv_f, out_file, intens)
+    CSV_F = LOCAL + "ora_cpdb_data_yeux_reactome_rev_18-01-2024.csv"
+    OUTFILE = LOCAL + "reseax_edge_tab_data_oeil_cpdb_reactome_v2_rev_19-01-2024.xlsx"
+    INTENS = LOCAL + "chebi_intensite_patho_oeil_donnes_estelles_rev_17-01-2024.csv"
+    edge_data, nodes_data = paths_link_cpdb(CSV_F, OUTFILE, INTENS)
     print(network_visu(edge_data[0:3], nodes_data))
diff --git a/utils.py b/utils.py
index c3ac355..4424dac 100644
--- a/utils.py
+++ b/utils.py
@@ -1,14 +1,14 @@
 """
 all function utils
 """
-import pandas as pd
 import csv
 import re
+import pandas as pd
 
 
 def excel_file_writer(dataframe, n_o_f,  sheetname="Resultats"):
     """
-    Take a dataframe and write an excel file with this data
+    Take a dataframe and write an excel file with these data
 
     Arg:
         dataframe = dataframe of data to write
@@ -40,7 +40,7 @@ def column_recovery(file, n, sep=";", enc=None):
             for line in lines:
                 if line[n].strip() != '':
                     res.append(line[n].strip())
-    return(res)
+    return res
 
 
 def excel_m_file_writer(list_of_dataframe, n_outf, list_of_sheetname):
@@ -104,6 +104,7 @@ def recup_all_inf_excel(file):
 
     Arg:
         file = the file to read
+
     Returns:
         Type of return: 1 list of list line
     """
@@ -127,4 +128,4 @@ def cor_index(list_objects_to_convert, l_all_obj, l_all_equ):
     l_to_return = []
     for item_to_replace in (list_objects_to_convert):
         l_to_return.append(l_all_equ[l_all_obj.index(item_to_replace.strip())])
-    return l_to_return
\ No newline at end of file
+    return l_to_return
-- 
GitLab