From 1858b5dd6b4c6476c116ee56245921c2d8a10f4f Mon Sep 17 00:00:00 2001
From: local_comparaison <mathieu.umec@inrae.fr>
Date: Mon, 29 Jan 2024 14:26:08 +0100
Subject: [PATCH] addition of main file and network visualization as well as
 adjustments to other programs to facilitate workflow

---
 Mapping_using_the_API.py                      | 117 +++------
 Visualisation_des_donnes_de_mapping.py        |  25 ++
 ...tion_des_donnes_de_mapping.cpython-310.pyc | Bin 5713 -> 7532 bytes
 complete_processing_of_mapping_results.py     | 246 +++++++-----------
 main.py                                       |  62 +++++
 network_visualization.py                      | 173 ++++++++++++
 utils.py                                      | 130 +++++++++
 7 files changed, 525 insertions(+), 228 deletions(-)
 create mode 100644 main.py
 create mode 100644 network_visualization.py
 create mode 100644 utils.py

diff --git a/Mapping_using_the_API.py b/Mapping_using_the_API.py
index 4884197..5f7041f 100644
--- a/Mapping_using_the_API.py
+++ b/Mapping_using_the_API.py
@@ -7,23 +7,10 @@ import json
 from urllib import request
 import xmltodict
 import pandas as pd
+from utils import excel_file_writer, pre_cut, recup_all_inf_excel
 FOLDER = "C:\\Users\\mumec\\Desktop\\Mini_codes\\"
 
 
-def recup_all_inf_excel(file):
-    """
-    This function takes infos from a .xlsx
-
-    Arg:
-        file = the file to read
-    Returns:
-        Type of return: 1 list of list line
-    """
-    datas = pd.read_excel(file, header=None, na_filter=False)
-    l_datas = datas.values.tolist()
-    return l_datas
-
-
 def send_request_to_mapping_api(url, data_json, head, met='POST'):
     """
     This function gives the result of mapping of a metabolites list from RAMP.
@@ -46,47 +33,7 @@ def send_request_to_mapping_api(url, data_json, head, met='POST'):
     return out_data
 
 
-def excel_file_writer(dataframe, name_out_file, sheetname="Resultats"):
-    """
-    write an excel file
-
-    Arg:
-        dataframe = the data to write on dataframe shape
-        name_out_file = name of the outfile to write
-        sheetname = name of the sheet to write
-
-    Returns:
-        Type of return: 1 excel file whith 5 columns
-    """
-    ex_f = pd.ExcelWriter(name_out_file)
-    dataframe.to_excel(ex_f, sheet_name=sheetname, index=False, header=False)
-    ex_f.close()
-
-
-def pre_cut(listed):
-    """
-    cut only 1 type of ID by the first entree
-
-    Arg:
-        list: 1 list of id
-
-    Returns:
-        Type of return: 1 list
-    """
-    clean_list = []
-    cump = 0
-    while listed[cump] == "NA":
-        cump += 1
-    pos_cut = listed[cump].index(":")
-    for elem in listed:
-        if elem == "NA":
-            clean_list.append("NA")
-        else:
-            clean_list.append(elem[pos_cut+1:])
-    return clean_list
-
-
-def mapping_ramp_api(metabolites_list, outfile, inf="opti"):
+def mapping_ramp_api(metabolites_list, outfile, inf="flow", flow=False):
     """
     This function gives the result of mapping of a metabolites list from RAMP.
     Here's an example of 4 metabolites giving 505 lines.
@@ -94,7 +41,7 @@ def mapping_ramp_api(metabolites_list, outfile, inf="opti"):
 
     Arg:
         metabolites_list = a list of metabolites id
-        outfiles = name of the outfile to write
+        outfile = name of the outfile to write
         inf = if all give the full information
 
     Returns:
@@ -132,7 +79,7 @@ def mapping_ramp_api(metabolites_list, outfile, inf="opti"):
                 break
         print(str(len(l_met_map))+" metabolites were found")
         return (len(l_met_map), l_met_map)
-    if inf == "all":
+    if inf in ("all", "flow"):
         psource = []
         pathwayid = []
         commonname = []
@@ -141,13 +88,13 @@ def mapping_ramp_api(metabolites_list, outfile, inf="opti"):
         onel = datas_to_treat[i_b_l[index_pos]:i_b_l[index_pos+1]]
         pathwayname.append(onel[16:onel.find("pathwaySource")-3])
         inputid.append(onel[onel.find("inputId")+10:onel.find("commonName")-3])
-        if inf == "all":
+        if inf in ("all", "flow"):
             psource.append(onel[onel.find("pathwaySource")+16:onel.find("pathwayId")-3])
             pathwayid.append(onel[onel.find("pathwayId")+12:onel.find("inputId")-3])
             commonname.append(onel[onel.find("commonName")+13:len(onel)-3])
     pathwayname.insert(0, "pathwayName")
     inputid.insert(0, "inputid")
-    if inf == "all":
+    if inf in ("all", "flow"):
         psource.insert(0, "pathway_source")
         pathwayid.insert(0, "pathwayid")
         commonname.insert(0, "commonname")
@@ -431,7 +378,7 @@ def get_cpdb_version():
 
 
 def m_ora_cpdb(accnumbers, acctype, cpdbidsbg=None,
-               pthreshold=0.05, infos="all",
+               pthreshold=0.05, infos="flow",
                ofile="C:\\Users\\mumec\\Desktop\\test_out_cpdb.xlsx"):
     """
     Give the result of id mapping on CPDB
@@ -522,7 +469,7 @@ def m_ora_cpdb(accnumbers, acctype, cpdbidsbg=None,
             id_cln = tab_cor[1].index(id_t)
             l_map_cor.append(tab_cor[0][id_cln])
         return l_map_cor
-    if infos == "all":
+    if infos in ("all", "flow"):
         splited = details.split("',")
         fsetid = ["fsetId"]
         cpdburl = ["URLCPDB"]
@@ -596,6 +543,8 @@ def m_ora_cpdb(accnumbers, acctype, cpdbidsbg=None,
              overlapping, size, e_size, fsetid, pmids, cpdburl]
     out_df = pd.DataFrame(data=out_f).transpose()
     excel_file_writer(out_df, ofile, sheetname="Resultats")
+    if infos == "flow":
+        return out_f
     return ovlent_c
 
 
@@ -628,7 +577,7 @@ def multimapping_ramp(file, num_col, outfiles, infpath="Yes"):
     return l_o_d
 
 
-def opti_multimapping(file, outfolder, mapping="YES"):
+def opti_multimapping(file, outfolder, mapping="flow"):
     """
     Processe optimal mapping of RAMP and CPDB
 
@@ -640,13 +589,19 @@ def opti_multimapping(file, outfolder, mapping="YES"):
     Returns:
         Type of return: 2 excel files
     """
-    inf = recup_all_inf_excel(file)
+    if mapping == "flow":
+        n_mapped = []
+        inf = file
+    else:
+        inf = recup_all_inf_excel(file)
     to_test = []
     recap = [[]]
     id_dif = []
     col_id = []
+    modulation =[]
     for line in inf:
         recap[0].append(line[0])
+        modulation.append(line[-1])
     for ind_head, headers in enumerate(inf[0][1:-1]):
         if headers not in id_dif:
             id_dif.append(headers)
@@ -681,18 +636,24 @@ def opti_multimapping(file, outfolder, mapping="YES"):
                     if col_actu[index_change] != "NA":
                         to_test.remove(col_actu[index_change])
             if len(cpdb_o_opti) == len(inf[1:]) or n_col == col_id[i_t_i][-1]:
-                if mapping == "YES":
+                if mapping == "all":
                     cpdbf = outfolder+acctype+"_mapping_opti_cpdb.xlsx"
                     m_ora_cpdb(cpdb_o_opti, acctype, infos="all", ofile=cpdbf)
                 l_opti_for_this_id[0] = "CPDB "+acctype
+                if mapping == "flow":
+                    n_mapped.append(len(cpdb_o_opti))
                 recap.append(l_opti_for_this_id)
                 break
+    if mapping == "flow":
+        i_map_opt = n_mapped.index(max(n_mapped)) + 1
+        cpdbf = outfolder+recap[i_map_opt][0]+"_mapping_opti.xlsx"
+        datas_cpdb = m_ora_cpdb(cpdb_o_opti, acctype, infos="flow", ofile=cpdbf)
     for line in inf[1:]:
         to_test.append(line[1])
     l_opt_ramp = []
     l_opt_ramp_tri = ["NA" for i in range(len(inf))]
     n_meta_map = 0
-    ramp_outf = FOLDER+"optimapping_ramp.xlsx"
+    ramp_outf = outfolder+"optimapping_ramp.xlsx"
     n_meta_map, l_opt_ramp = mapping_ramp_api(to_test, ramp_outf, inf="opti")
     if n_meta_map == len(inf)-1:
         mapping_ramp_api(l_opt_ramp, ramp_outf, inf="all")
@@ -708,7 +669,6 @@ def opti_multimapping(file, outfolder, mapping="YES"):
             l_opt_ramp_tri[index_l+1] = li[1]
     input_col = 2
     while n_meta_map != len(inf)-1 and input_col != (len(inf[0])-1):
-        # prend en compte la derniÃ©re colones de fold-change
         if len(to_test) != 0:
             n_sup, s_map = mapping_ramp_api(to_test, ramp_outf, inf="opti")
             n_meta_map += n_sup
@@ -725,20 +685,27 @@ def opti_multimapping(file, outfolder, mapping="YES"):
         for ind_ind in index_still:
             if inf[ind_ind][input_col] != "NA":
                 to_test.append(inf[ind_ind][input_col])
-    if mapping == "YES":
-        mapping_ramp_api(l_opt_ramp, ramp_outf, inf="all")
+    if mapping == "all":
+        n_map, datas_ramp = mapping_ramp_api(l_opt_ramp, ramp_outf, inf="all")
+    if mapping == "flow":
+        ramp_outf = outfolder + "ramp_mapping_opti.xlsx"
+        n_map, datas_ramp = mapping_ramp_api(l_opt_ramp, ramp_outf, inf="flow")
+    print("lines Ramp", n_map)
     l_opt_ramp_tri[0] = "RAMP"
     recap.append(l_opt_ramp_tri)
-    recap = pd.DataFrame(data=recap).transpose()
-    n_out_f = outfolder+"recap_mapping_opti_oeil.xlsx"
-    excel_file_writer(recap, n_out_f, sheetname="Resultats")
+    recap.append(modulation)
+    df_recap = pd.DataFrame(data=recap).transpose()
+    n_out_f = outfolder+"recap_multimapping.xlsx"
+    excel_file_writer(df_recap, n_out_f, sheetname="Resultats")
+    if mapping == "flow":
+        return datas_cpdb, datas_ramp, recap
     return "all is ok"
 
 
 if __name__ == "__main__":
     F_ENTER = FOLDER+"Donnees_oeil_mis_en_forme_opti_mapping.xlsx"
-    #opti_multimapping(F_ENTER, FOLDER)
+    opti_multimapping(F_ENTER, FOLDER)
     F_O = FOLDER + "test_enrichment_ramp.xlsx"
-    a, b = mapping_ramp_api(["KEGG:C01157","hmdb:HMDB0000064","hmdb:HMDB0000148","chebi:16015"], F_O, inf="all")
-    b = pd.DataFrame(data=b).transpose()
-    excel_file_writer(b, F_O, sheetname="Resultats")
\ No newline at end of file
+    #a, b = mapping_ramp_api(["KEGG:C01157","hmdb:HMDB0000064","hmdb:HMDB0000148","chebi:16015"], F_O, inf="all")
+    #b = pd.DataFrame(data=b).transpose()
+    #excel_file_writer(b, F_O, sheetname="Resultats")
\ No newline at end of file
diff --git a/Visualisation_des_donnes_de_mapping.py b/Visualisation_des_donnes_de_mapping.py
index 926f7f4..2ebdf96 100644
--- a/Visualisation_des_donnes_de_mapping.py
+++ b/Visualisation_des_donnes_de_mapping.py
@@ -175,6 +175,31 @@ def boite_a_metabolites(file, title_plot="boÃ®te Ã  moustache", num_col_plot=1):
     plt.show()
 
 
+def barplot(column_x, column_y, df_data, title="barplot", figure_size=(30, 5),
+            ax_x_label="voies mÃ©taboliques", ax_y_label='Recouvrement moyen',
+            colors='Spectral', decimal='%.1f', size_of_labels=6):
+    """
+    drawn barplot from data
+
+    Arg:
+        column_x = data for plot absises axis
+        column_y = data for plot ordinate axis
+        df_data = dataframe of data to plot
+
+    Returns:
+        Type de retour:
+    """
+    plt.subplots(1, 1, figsize=figure_size)
+    p1 = sns.barplot(x=column_x, y=column_y, data=df_data, palette=colors)
+    plt.subplots_adjust(top=0.90, bottom=0.26)
+    p1.set(title=title)
+    plt.xlabel(ax_x_label)
+    plt.ylabel(ax_y_label)
+    p1.bar_label(p1.containers[0], fontsize=7, fmt=decimal)
+    p1.tick_params(axis='x', rotation=90, size=0.05, labelsize=size_of_labels)
+    return p1
+
+
 def up_down_path_plot(l_path, up, down, log_p):
     """
     plot regulation of pathways
diff --git a/__pycache__/Visualisation_des_donnes_de_mapping.cpython-310.pyc b/__pycache__/Visualisation_des_donnes_de_mapping.cpython-310.pyc
index e273d9b1eabea1642dccb1dcff7fafa4f1eeb334..a1fe174ff1b8ff659683d23c083016ede401ef4f 100644
GIT binary patch
literal 7532
zcmc&(TZ|)TR(4%&m)-9Bb&`p9!a%$svrJZE!zjsgHp3=C!(=v>U8o&YRQoTx%Tum$
z|5cv8q%0Oq*tga`2YBdV5mLV_0)d3Y8}rB_ka$>8FMzm2Ezn8`<t0KHzH@%t-RVpq
zZ(A+@_e<6J&-u>xpH60GoD!b?pZ{_2oo1=@FH|}ESg35`m;MNeD7ht}2|d#Kx~pqc
zHX_3{Ik#M!^K!r9R>ysgTN6JcY*Bt+bL-VgRTGzmBWkG4h@TZRqJdJQI*az76Z2vL
zwOR3uSQ5)9%~e-1<AP|3C&UWs^WsTy@%@s!5G;zP?&?x>OQQJ)n!AMjOURdzUqZfu
z{AuK?!9uVUEC(yW>V4B)3#v!!4@!71<8A)6zI-rn-q{Y*W<L?bC}^T2f;8;LfoNt)
zb0Cv;kfzOSJ7@|&^P9<5=7-cNhBA!1&3=&iTS*jVL9_1<257p1^{5iY&CW1xXJHc4
zs%>xJC0^f?&Ix2#YH7vlE8T9C^aJT<;l1G1XqaXJt$FLi-~9m3`Wu@kB=@#K<HyfM
z<YoL`#V<Y3OQo#T({etJLw#gqdTtyU4@&nnt(2L$(X;MrBg$=I2=h=ISr}pU%GBR2
zjcokN_*Jq>R?RJ;tz+hW?MCT0o*s?sd3C*%YY_C;^U9Hvn^b>**<7nqY3JG8Mh}<s
z^4T#v&tg>@Z{TFLqk8V-PDg*BjcP(0)$yD8%&uP<HS$JYxmS`qd0kjvD~;+qoyU$`
zAI;{oJo0<_43C`Ic`vu}2HtiSzZ%x5ua&5l&#aYREbY9v)5-PGTsD)>kt&MHp>fp6
zYj~gee7-k3T7YC0#&TJNOqQ}rKF6|Xji6K`DAZ^*o1-z1&(a-zv^HAL*Yjn_=5O;A
ze0Mot8_Q-DwKbN_U*?M_udu|HP+H~IKgs5M3!*BVLprC)KCh29u*U^a8$H32S<E-i
zoX3;-1<bGIOVmmyg>U0>{v?%YU3|gk_S2W`{C75gw3t7^W9N^Sa^4}o@Bl`PH(C~T
zGDa~g<_=Aa{x^(X5%bw<uHV;I)eiGVYivFXZ<kt&2mk0a@tkUI(~m{-UKs2udkNFb
zkK3@RxOuhNR{HJmlviZ;8W+NNFwDG82+O<H<Sgm1p>gj$Kk~x36UHHWQ5gnJbdOuO
z&ckAFym*RX@vy%oQyRDH#~BQD(x-NQE69d2PSx)8_V4TucvZ<ASm0`N5G5HeEH#)J
zEVUZOBG~iVNmN*!DDgA2St7&C6B^;x1~O<TeSAtFiVBsYAPZVX;her&VFM~;*e=RZ
z7~l1~K}$cG#pDw*?_;efcfxKO9t0-|>V>v<;v(<9xAeo;{`ug~uAh+6ADe~g?}cem
zkx2&7NaDhzDMdB%w}Oa@2OCiU1PeQzKn8IJ*im9noAFDxTI+>A5QQTHUwFY@JHY1*
zfH+xUZlkF%Bb-=K#%B)uF?Puk57Hd$7goCM4}!vye%uWTBMRce2G|9$DD-$xSV4b~
z?Mt9hsW3WW=9bm_ctth90t9fFHHfmp#zziiP?Xc*7D>4<Ijv=3`>r?erQgT4>2Q#R
zS%fKRCh>*a$!_bS>zt~J>zpcvB*#@$37b2Z9+3E>7=;v$2#sFmE=L6C-XIxDFOxo?
zx)0H~3#ZVXdg_JU3uoKJSbxVVSX2f{8j{rAnK*%x1zxh%3$oNLXOL*-^<BH;;cx@j
z>X5FucF5Jj3c0#Ssu{@COVB+&7S|b<fcx%(4ANngrITK2TZ}yV8Z`OScF5PmlF`tu
zjc0ptvg=l($wYUS?1oiV?c<ULhwZ?V?LwDr*TSoKgRz7L?jjA2v#=AwWW28Q2it*L
z9->it;c-d+B4qqg56P?7?!24g6YlhJtnE8Dg7j{d4DQ@W(lo@_E{@IL@}i{eM|Vy|
z?ZKmZB8lUN$3c4MTVXo%p=qqZ(=HkjcoP>2o!gj-zwg6tV14RSAe1`ZB;7;On5k=y
zMn9APmb5voL3MpjtDr=!ODLgVSNTO<{~yybH64HEC$_GCVjC46SLamCb2R<Cv@ZRr
zH7;qFu*w`(TR@w90h@ok@D_}Icu%rAfj{gA@d?>L+e9*Xswhky_IFXqN=I<ma62a8
z+Uyx{k5*>(;C%6?!%dT;w%}guQ8}}FWjHkhP;ccrQd3xYS=fixsFGKDm0S~Lav>;J
z@pD84t^saE)D8`9t&JhQDrO1Na~rTd=QjXajBjLoO~1M2OU66`&4Hh7@A~^q0<?R9
z+&_ocpNG_JupX~>jmynWBH0VDHYu2f&cQ6kqqtL91no-%O92nlt_cE@p3)q)Pk=KG
zd+a1Ep=|hz!@R;G1@sl13;_8VWRi~W8!fA_Vwvow$nXNGoI`720|;fXH_rEq%G-mW
z4MmSymRz6#+J0dY>=fmJ4<7*B)BQc5yTw9be{y1D;K%0WD_1+mdQs^lfN278qtnlL
z5xIx~iUWRdd|fW1)~d@5Wby(f!~x|~l(di#UNV-Zz)PTZ8K=phYNSC{R8dpHE1WiV
zBJL_v`5Y#;s)b3%SvaSMSJWp0=Otz)H5RSBN_##}djjRVh2igAg+sZ@@Su{fQRfUX
z0B@o|@1iMPL*g_XHdN=)oNjCOqq@EVV=^^Kj_z1LAt0A`khF>QP97@L;V|LdM1ct}
z?ix&;N<BRXJr$t6IoXc+eo0t|Ht0;LXN+kck(<(y$+V>k{dW%j^{n!@BPi-149+P!
zll^N=@`$9KR84uB(lV&xq-(5p#3i683Ao2;pK9QF!5%~^h~}`BOsd#C9<z-OsR8;q
zBU2<J;JN<~X}P90rmtZ3#d9Q)i*ARXf4|-)o-%o;OiKPXekrMKR?7GiaUVoa=&R^x
z8Mh>f{bTKNOP60k3En?}M`&qEN696eIrGYkk>B4E{>#6G_LNX?p>%M8Ki|V^U3rmx
zLi#caY8>p#R+1}fgLXCn`=UG+Okpe86=i=27Xu@s*J1r=>7XD<TmA|fZo%-#3gnlO
zfMbLKmJPaKCh`~1g5IxF?{8Alreq5VNF2%dyxvx)uS*mxB`;HN)ZsqtM-3gIAEMzS
zvcQgfc)T+Vh}y!lHTw^bCwIV<-||xqfWV(Do)tJo;qx58IiSZu9GW9Lw|h1e++dg|
zcVPD_!UW7)q}~kkcCRWb3id^vU_Nt>YOL-vILOAqf1Ok9$6&Zp?Nc0p7XGwR{EI1m
zM+Q4Mi2bP#Q@XziIUk!!Q(r+@X#pL9{AEhW0_1B*4vfoJI`R#wk|8kk%9~Uo17YmF
z^*P{siw3+&$!$u$Ny%H35V$I(WECt+vNQP(B?N}@Ta-+(RQ@_u&zaL4LnwWmrx@w`
z7{sPZC^`q!oZu()GD6=+<TJLChd)Fd;^7g<2!aWIdpr14@PO2i@bnCpj=2a24w#FW
zii-eGO=28SLStkhWH432tb9~q<uajM*1<opz!}oO(<Y&itz_?~s&vj5#7$q_RsptC
zi8bln3CxRXH;F)tWg>8svltwd9<fG|9^GnHAM2M%HQ<O7g8WRW2ZI#rgMD5frm9fr
zTc2^Ji(3gUES^8s0}3>yzmMh91`<*Rz!NJ26hWeiH4@&GNdoCGYsS4KJPT9+uT*Z*
z=+`MBLuixxojf%ZrkItZLQa<mDhMbP0tln`4061=4mxT8ryPth1?VZrDI9L@^gvq`
zIR)O?xy3Pu(h=~BO6afP2izjUnpY4BA;Ri_!ZKyetD?b#)q#@D9u#~aLi)Qg89HlQ
zM<#t0Dd5g&M1hVDXN4QAx7U101?`B7@+5yN4O2*hZqYpL%;^2kj83EoW87UoJzR7=
zCI@P5zI09)!j!u5+K>0zpa2Lp;4Z2{5X!BQua<X_eE2s%qUS%acmHtklJnyq{M~hM
zL<0`6u(y&dOZsQ<Yl$X34!W2z))WN&Dh--KRWUmui=Hofzz9~(!ro(7T-f`ZpTn|N
z&815<OLSV=cgtfz$ZueaqNdoiM~>BX{5@|^(M~S!t1^4wqAY@T*!Ls1L1XDMta!lV
zfRMHMPq98@-&|usUGKmn6?goUr>$b#lq&uNm9f_9hZ?R^Cazhai$oGZ5h-kKkL=7q
zloX@~moQwkptEIYG)D(T_0WPwsx~dRK`&W1iCpH^ku7GFM)%4TF=Ov+5is(_tP0yO
z#2f{)#iCeJg!IS}%dGpAqnc0=!3x${n69&$)pJ}2#oB5~tg{uFy&16~E{G?%W+7gw
zVy{+j7BOE0!o^df$#z0u{3ZWs_%&8NR(2V5hY{bh&s^+Mg^af;)`A!nu~nXN6XBO^
z8-D-hnDLG;f(RT@Fonkq2k2L~FAlJs36?Z>L(Im2(@lAbNMN%6_aq*2Zd#OdJ=*Lv
z==U6&o*7JIfOHR>yrKG{bMZU+<kLk@$CAP0qPGI|I3WmSn*5=n$v?T?{VG3ye_igP
zugh`x4^FNhG;Yv$jMqbq&xR%XSwencHx$`+VJX8Q;&@PhcPa)9o$UuA9QF})+!-bS
zpOz`_)4uzZkXx5LY5g+2@mY!TKO}%YCjx^&yD8vNXi1?xcThhgjaE@tI}C;ZMl6p&
zqc86U5>Y{1_R?%W3eN1acw1fY$N%tP^>8bbMkYpGY=q~Zu7mmWThh)4wIKq2vMIyv
zc7~37Xp_H<1myr((b$5&^AIvb$ag!D<3%<$!#{fT=n>dF;!A;S8IrDoa+PN2+kR{A
z3`!kP&m~HprfvgBUpTnLdJ2^ab3Y3ExJmDZ@t*v78pp`PL&gAR^!+{FT20)exMB*>
z!y;T(NYkpiHo_1PM4=A{xUl1Jae}0C^4qlTuOV@*$io{jEu^dN^XEC*{BVGv)^5xT
zaZuy0F)gJRo2ygosg7t7#gFVq4z3TCX_>CUD*!S_U4MTM6N-w5*jE^PUdwQ+6UDxI
zT73Sr_*Dpz9Qz4j-3ep8aYFRsWQGz#!Ishei;KVWAK!f8+0E{ce?X4psm<>9|LUV>
z?tFh?69;Bs46lKueu>K0C?QhZ(r?M{P;ST`weh7(b;$mtp;cDAH>HLlCA`Qcm8OKE
zwNaEk2&nCO?reHD3Vaz~8OQKcsMsFh-vxfk3<^;eiZsj1SfiMSZ4rV+;ntPGK9vev
z<*Y4;9tR+rGFE==1=^Nk2z)_fHhjv2<V3n0MD<+zrg!_5cV2h(cW=AqjW@r2%e8L4
z`I?6c{8yLnjAzc7SvkivDle-4557#YNNG#6R5vSps)#Qd`Ww3DJaRVlWzN_16^bxA
SpH$W>muM9IQDoBm_<sS4C;Y$w

delta 2693
zcmZ`)O^n;d73NSBMNyPQ{jUCOE5D8W6W6U>2ZiCJ^(LvE&9<?dq!!Q!lxJ4U4JArL
zYO^t@sOkC;q;QL3u08DL<^Tl><kFmS$hAKx+7r>93j?_Z1p*iCdn0+hS+o-P&71e$
zynhbA`0ig$=lpEeRN(hu`Y{Wezs|Mhzk?lBDDjjb=@Npp>Z#p?T(xdeuKF<5HI}k;
z*Q70)r1~Aw&Cp{sO--OIdYoD`3sjb#pf)W4m7{H1qGh1+v_h+Ql&;MRbmeW8zcEl~
z?IY4H4lXLR4nzqU4Oq*tHeszWn-y7!m04w1>sFbux3aH50O}3-bRH8Ua+UmC{GMFZ
z2Fffgh+3MPQIe9n5}EK2@Uj{iduF8JejhTEI}@qNnMenTq@&bfoynOn9qGGdmI*U^
zR+Nb{8|prpS(MDO@Z>@R<kHJ7*(IyWPtVTsQ9d%Z6&~cGEKOciX4%Oms)~IG$&PF(
z{2<CnS#C0ml2IN4wBfPfXtt){F3Q!EOUh(6*^Ja#A<RYvs?!uT?k4tfkp&?XqvBS6
zRtnXqv<oRf(om~eB{ZUf4CK_T3i}m!YBEUB+9lT^!t$&hm7+R0sX|Tu8m&OSDcsAN
z)L>VaHTf(m1HB@-6`*R8`$=eT6|l#5!6`Zu?<Xqra@43R%L+U(I~FxT--;^8#SVZk
zNk_+!MmrGp|CM7plYfQwUNLG(U3;$-Ne@wTUxl;wRvBlD=4hVUcQv9sF}V-w6<P?Z
zk-AG(7B1}Nj3_shPH|n-l7CV=HE};V7F6eHgF2zp3!T3BhyDY-d*YVw^@N$aFjyHn
zA$M=F-q@e=UdSCcWJ5LzySB464Hk6Ia|a{VEgs^)d4B{v%N<cXz0k-lcMqk-kw5Hn
z)}u^JQYYu_H5V+xt_OVZ<ucw27#Ql^^3WIWKEBlGEe-uI9eb(VV{i_F4!WhqsgVnj
zADC(!W=eA>4}`J4u-{FOro-NVJL64wg)yOEu@nDERqagRPlIJ7_Y{6!JZ6ORRY<`Z
zLB_IfI}hEtqk#ceNiYV;sTMJbrJ6)bXaxUd)h0OtTv!dDG!=LW1GF@?3>=djSJekv
zs!Vj%kWvdYG-AV<97xrlfjwd+EaFIW<Z<|$ATE<42>5rv>z7vmt~02}HkTc25B+!9
zs7+bWrtZeZ=l3|YHE55WaPuAK-JtFFLpGYCblM&|<1wTF%58BqeS&--w(?DJCr$IA
zwr9z^2j|LO7bG(ww?&XSOR}O}sE}vGr<v#Gr@=Qr3(!f&+Dop-;$rBA*!k(ApzdVK
z;^R<l#(f+B&w<{_5$ZV$4aVaFcH<%2bNbBV-$cXb0OG`kJBSm`&be3}pX29|FW~}E
zSK`LD8%!O~4V=)0dE$ra@H!|2Hh>8wtC1FTr@AQHo4~xM-p5Y@zbSrUy`^K4>2>k9
z?AqyPfg2~eKONBy;YHN>y3^RW<T=AWbuJIYTJEj+O;G8mal-R&@t2WK@@Mc=2Ov(2
zT^8#e8?X^w6e<~OtME>C^)*a1=;~LX9x%+Q7k9><Pg$JY2;AH3sM9pohxfpo19k(9
z%Yq3~Nl69JiT)s$gozjc|I3TwbpCj&O~2ueLTLZzA4B_(H2Yfqm+P2xcAZ~_BOPs#
z>GMGI7ZAROfTP6;!ixa66Hh(6!C%6yEK3Qgs|Xr^_)VrYe+74I2(KcnBm59S4vH*`
zoR$>-5prKcxQ=iG;h^dK4cs0n`;%X9^#*4!Tku8(y&;1O&iCA~$%41xWRL^Et~Va_
z+!db{wohTo+IpNG_#R~=?$b^pPRhzIvX=Q{)}1s?xy&a{^?htjxX9^I68h034&sm1
z?;jo2xYYOIiiN2^h?WKKV6a$A3|50a%1-{<(x8(*us`{<dQ;8gaE1Oj*86@K`a`i&
z{}%DYwfa-`Ht6sW0WEP)%<E4Z0gy5AzY@Q#pVycCf7ZW!QtF}bc4j&rGv4OzU^DEf
zgFpTOe_uS$IS4x{?~4~2ml`t9YiKJUO#7IB&~qq$8^ni=PU{p%_$rFcfQ7vs`P#;b
zp|iuO_;cfu{$o^lT@;#?%%Mh#bIt16_dygq31C}rC&EQmhBp&$Izz5ljH<l<iXi2`
z67MzNyKoWBJ%@nv*iqN{PjF4}P2?Gm1bpiu13rVj)+=W)kvJ7F7!f|=azgRm<ZmM2
z`@&rWtT*`jd+W?=IHHH9Plk`z4ZQh7pH4k?8Dj{Z2Cx(ZqQ=P0g!!sznWkwOrqM<&
K{_*zLzWOhf6P=s@

diff --git a/complete_processing_of_mapping_results.py b/complete_processing_of_mapping_results.py
index c0eb5d3..28c5e32 100644
--- a/complete_processing_of_mapping_results.py
+++ b/complete_processing_of_mapping_results.py
@@ -11,53 +11,11 @@ import pandas as pd
 from math import log, floor
 import sys
 sys.path.append('C:\\Users\\mumec\\Desktop\\Dossier_gitlab_local\\traitement_des_donnÃ©es')
-from Visualisation_des_donnes_de_mapping import up_down_path_plot
+from Visualisation_des_donnes_de_mapping import up_down_path_plot, barplot
+from utils import column_recovery, comma_cleaning, cor_index, excel_file_writer, excel_m_file_writer
 LOCAL = "C:\\Users\\mumec\\Desktop\\fichier_mis_en_forme_programme_total\\"
 
-
-def column_recovery(file, n, sep=";", enc=None):
-    """
-    Put the culomn n of the file in list
-
-    Arg:
-        file : A csv file to read
-        n : the number of column to read
-        sep : type of separator
-
-    Returns:
-        Type of return: list
-    """
-    with open(file, "r", encoding=enc) as f:
-        r = csv.reader(f, delimiter=sep)
-        lines = list(r)
-        res = []
-        if abs(n) < len(lines[0]):
-            for line in lines:
-                if line[n].strip() != '':
-                    res.append(line[n].strip())
-    return res
-
-
-def cor_index(list_objects_to_convert, l_all_obj, l_all_equ):
-    """
-    Change elements of a list by the correspondance elements
-
-    Arg:
-        list_objects_to_convert : list of object
-        l_all_obj : list who countain all objet to convert
-        l_all_equ : correspondance list of all object
-
-    Returns:
-        Type of return: list
-    """
-    l_to_return = []
-    for item_to_replace in (list_objects_to_convert):
-        l_to_return.append(l_all_equ[l_all_obj.index(item_to_replace.strip())])
-
-    return l_to_return
-
-
-def recup_ramp_pathways_list(ramp_mapping_result, correspondence_file):
+def recup_ramp_pathways_list(ramp_mapping_result, correspondence_file, flow=False):
     """
     Give a list of pathways with the correspondent metabolites names
 
@@ -68,10 +26,16 @@ def recup_ramp_pathways_list(ramp_mapping_result, correspondence_file):
     Returns:
         Type of return: list
     """
-    column_pathways_name = column_recovery(ramp_mapping_result, 0)
-    c_input_id = column_recovery(ramp_mapping_result, 3)
-    associated_name = column_recovery(correspondence_file, 0)
-    list_aso = column_recovery(correspondence_file, 1)
+    if flow == True:
+        column_pathways_name = ramp_mapping_result[0]
+        c_input_id = ramp_mapping_result[3]
+        list_aso = correspondence_file[-2]
+        associated_name = correspondence_file[0]
+    else:
+        column_pathways_name = column_recovery(ramp_mapping_result, 0)
+        c_input_id = column_recovery(ramp_mapping_result, 3)
+        associated_name = column_recovery(correspondence_file, 0)
+        list_aso = column_recovery(correspondence_file, 1)
 
     all_pathways = []
     m_id_asso_p = []
@@ -84,14 +48,14 @@ def recup_ramp_pathways_list(ramp_mapping_result, correspondence_file):
             m_id_asso_p[all_pathways.index(c_p_n)].append(c_input_id[number])
 
     for path_num, pathways in enumerate(all_pathways):
-        pat = cor_index(m_id_asso_p[path_num], list_aso, associated_name)
+        pat = cor_index(m_id_asso_p[path_num], list_aso, associated_name) ######Ã§a bloque a cause de l'output du mutlti mapping ['....']
         pat.insert(0, pathways)
         l_to_return.append(pat)
 
     return l_to_return
 
 
-def recup_cpdb_pathways_list(cpdb_mapping_result, correspondence_file):
+def recup_cpdb_pathways_list(cpdb_mapping_result, correspondence_file, flow=False):
     """
     Give a list of pathways with the correspondent metabolites names
 
@@ -102,21 +66,49 @@ def recup_cpdb_pathways_list(cpdb_mapping_result, correspondence_file):
     Returns:
         Type of return: list
     """
+    if flow == True:
+        l_pathways = cpdb_mapping_result[2]
+        l_path_metabo_whith_top = cpdb_mapping_result[5]
+        id_use = l_path_metabo_whith_top[1][0]
+        for itraverse, traverse in enumerate(correspondence_file):
+            if id_use in traverse:
+                associated_chebi = correspondence_file[itraverse]
+                break
+        associated_name = correspondence_file[0]
+        p_value = cpdb_mapping_result[0]
+        m_inp_ol = cpdb_mapping_result[8]
+        l_pathways = l_pathways[1:]
+        l_path_metabo = l_path_metabo_whith_top[1:]
+        l_to_return = []
+
+        for num_path_t, l_p_m in enumerate(l_path_metabo):
+            path_cont = l_p_m
+            paths_to_rec = cor_index(path_cont, associated_chebi, associated_name)
+            paths_to_rec.insert(0, l_pathways[num_path_t])
+            paths_to_rec.insert(0, m_inp_ol[num_path_t + 1])
+            paths_to_rec.insert(0, p_value[num_path_t + 1])
+            l_to_return.append(paths_to_rec)
+            print(paths_to_rec)
+        return l_to_return
+
     associated_name = column_recovery(correspondence_file, 0)
     associated_chebi = column_recovery(correspondence_file, 1)
     l_pathways = column_recovery(cpdb_mapping_result, 2)
-    l_pathways = l_pathways[1:]
     l_path_metabo_whith_top = column_recovery(cpdb_mapping_result, 5)
-    l_path_metabo = l_path_metabo_whith_top[1:]
     p_value = column_recovery(cpdb_mapping_result, 0)
     m_inp_ol = column_recovery(cpdb_mapping_result, 8)
+    l_pathways = l_pathways[1:]
+    l_path_metabo = l_path_metabo_whith_top[1:]
     l_to_return = []
+
     for num_path_t, l_p_m in enumerate(l_path_metabo):
+        print(l_p_m)
         path_cont = []
         if (len(l_p_m)) > 6: # regulation a vÃ©rifier
             comma_pos = []
             for index, t_l_p_m in enumerate(l_p_m):
-                if t_l_p_m == ",": # probleme entre ; et ,
+                print(t_l_p_m)
+                if t_l_p_m == ";": # probleme entre ; et ,
                     comma_pos.append(index)
             for n_comma in range(len(comma_pos)+1):
                 if n_comma == 0:
@@ -135,6 +127,7 @@ def recup_cpdb_pathways_list(cpdb_mapping_result, correspondence_file):
         paths_to_rec.insert(0, m_inp_ol[num_path_t + 1])
         paths_to_rec.insert(0, p_value[num_path_t + 1])
         l_to_return.append(paths_to_rec)
+        print(paths_to_rec)
     return l_to_return
 
 
@@ -192,52 +185,6 @@ def recup_ma_pathways_list(ma_mapping_result, number_of_columns):
     return l_to_return
 
 
-def comma_cleaning(str_to_clean):
-    """
-    Replace potential ',' by '_'
-
-    Arg:
-        str_to_clean =  list of character with potentialy ','
-
-    Returns:
-        Type of return: character
-    """
-    if ',' in str_to_clean:
-        while ',' in str_to_clean:
-            str_to_clean = re.sub(",", "_", str(str_to_clean))
-    return str_to_clean
-
-
-def excel_file_writer(dataframe, n_o_f,  sheetname="Resultats"):
-    """
-    Take a dataframe and write an excel file with this data
-
-    Arg:
-        dataframe = dataframe of data to write
-        n_o_f = name and acces path of the new excel file
-        sheetname = The name of the new sheet
-    """
-    ex_f = pd.ExcelWriter(n_o_f)  # pylint: disable=abstract-class-instantiated
-    dataframe.to_excel(ex_f, sheet_name=sheetname, header=False, index=False)
-    ex_f.close()
-
-
-def excel_m_file_writer(list_of_dataframe, n_outf, list_of_sheetname):
-    """
-    Take a list of dataframe and write an excel file with these data
-
-    Arg:
-        list_of_dataframe = list of dataframe to write
-        n_outf = name and acces path of the new excel file
-        list_of_sheetname = list of sheets names to write
-    """
-    e_f = pd.ExcelWriter(n_outf)  # pylint: disable=abstract-class-instantiated
-    for df_index, l_o_d in enumerate(list_of_dataframe):
-        s_n = list_of_sheetname[df_index]
-        l_o_d.to_excel(e_f, sheet_name=s_n, header=False, index=False)
-    e_f.close()
-
-
 def pathways_selection(list_of_list_to_select, list_of_object_to_filter):
     """
     Only keep the object they are not in the filter list
@@ -342,35 +289,9 @@ def df_matrix_r(sim_matrix):
     return look_like
 
 
-def barplot(column_x, column_y, df_data, title="barplot", figure_size=(30, 5),
-            ax_x_label="voies mÃ©taboliques", ax_y_label='Recouvrement moyen',
-            colors='Spectral', decimal='%.1f', size_of_labels=6):
-    """
-    drawn barplot from data
-
-    Arg:
-        column_x = data for plot absises axis
-        column_y = data for plot ordinate axis
-        df_data = dataframe of data to plot
-
-    Returns:
-        Type de retour:
-    """
-    plt.subplots(1, 1, figsize=figure_size)
-    p1 = sns.barplot(x=column_x, y=column_y, data=df_data, palette=colors)
-    plt.subplots_adjust(top=0.90, bottom=0.26)
-    p1.set(title=title)
-    plt.xlabel(ax_x_label)
-    plt.ylabel(ax_y_label)
-    p1.bar_label(p1.containers[0], fontsize=7, fmt=decimal)
-    p1.tick_params(axis='x', rotation=90, size=0.05, labelsize=size_of_labels)
-    return p1
-
-
 def c_p_o_m_r(file, outf, mapper, type_of_view="all", save_plot="all",
               fold_of_visu_sav=LOCAL,
               midfile="Yes",
-              midfile_name=LOCAL+"\\mid_file.xlsx",
               n_path_to_filt="nothing", modul=None, f_modul=None):
     """
     Do the complet treatement of mapping results
@@ -384,16 +305,21 @@ def c_p_o_m_r(file, outf, mapper, type_of_view="all", save_plot="all",
         save_plot = possibility to specify or not the 3 plots
         fold_of_visu_sav = folder where save plot(s)
         midfile = if yes the midfile while be write
-        midfile_name = name of the output midfile
         n_path_to_filt = list of object to filter
 
     """
     if mapper == "CPDB":
-        c_file = input("In which file is the correspondence table?")
-        l_of_pathways_list = recup_cpdb_pathways_list(file, c_file)
+        if modul == "flow":
+            l_of_pathways_list = recup_cpdb_pathways_list(file, f_modul, flow=True)
+        else:
+            c_file = input("In which file is the correspondence table?")
+            l_of_pathways_list = recup_cpdb_pathways_list(file, c_file)
     elif mapper == "RAMP":
-        c_file = input("In which file is the correspondence table ?")
-        l_of_pathways_list = recup_ramp_pathways_list(file, c_file)
+        if modul == "flow":
+            l_of_pathways_list = recup_ramp_pathways_list(file, f_modul, flow=True)
+        else:
+            c_file = input("In which file is the correspondence table ?")
+            l_of_pathways_list = recup_ramp_pathways_list(file, c_file)
     elif mapper == "ME":
         fold = input("in which folder are the files?")  # no "" around acces
         n_files = int(input("how many files you have in the folder?"))
@@ -409,16 +335,24 @@ def c_p_o_m_r(file, outf, mapper, type_of_view="all", save_plot="all",
         log_p = []
         metabo = column_recovery(f_modul, 0)
         value_modul = column_recovery(f_modul, 1)
-    #print(l_of_pathways_list)
+    if modul == "flow":
+        list_path = []
+        up = []
+        down = []
+        log_p = []
+        metabo = f_modul[0]
+        value_modul = f_modul[-1]
     for i_p_l, path_l in enumerate(l_of_pathways_list):
-        l_of_pathways_list[i_p_l][2] = comma_cleaning(path_l[2])
-        if modul == True:
+        if mapper != "RAMP":
+            l_of_pathways_list[i_p_l][2] = comma_cleaning(path_l[2])
+        else:
+            l_of_pathways_list[i_p_l][0] = comma_cleaning(path_l[0])
+        if modul in (True, "flow") and mapper != "RAMP":
             actu_up = 0
             actu_down = 0
             list_path.append(l_of_pathways_list[i_p_l][2])
             for path_meta in path_l[3:]:
                 # print(comma_cleaning(path_meta)) Probable problÃ©me de version entre ME et les autres (a vÃ©riifer)
-                #print(path_meta)
                 if mapper == "ME":
                     if float(value_modul[metabo.index(comma_cleaning(path_meta))]) >= 0:
                         actu_up += 1
@@ -432,7 +366,7 @@ def c_p_o_m_r(file, outf, mapper, type_of_view="all", save_plot="all",
             up.append((actu_up/int(path_l[1]))*100)
             down.append((actu_down/int(path_l[1]))*100)
             log_p.append(-log(float(path_l[0])))
-    if modul == True:
+    if modul in (True, "flow") and mapper != "RAMP":
         n_m_i_p = 200
         if len(log_p) > n_m_i_p:
             print(len(log_p))
@@ -454,13 +388,14 @@ def c_p_o_m_r(file, outf, mapper, type_of_view="all", save_plot="all",
             plt.savefig(fold_of_visu_sav+"up_down_path_plot"+str(under_plot + 1)+".png")
         else :
             plot = up_down_path_plot(list_path, up, down, log_p)
-            plt.savefig(fold_of_visu_sav+"up_down_path_plot.png")
+            plt.savefig(fold_of_visu_sav+ mapper +"up_down_path_plot.png")
     if midfile == "Yes":
+        midfile_name = outf + mapper + "fmid_file.xlsx"
         mid_data = pd.DataFrame(l_of_pathways_list, dtype=object)
         excel_file_writer(mid_data, midfile_name, sheetname="Resultats")
-    for index_cleaning, full in enumerate(l_of_pathways_list):
-        l_of_pathways_list[index_cleaning] = full[2:]
-    #print(l_of_pathways_list)
+    if mapper != "RAMP":
+        for index_cleaning, full in enumerate(l_of_pathways_list):
+            l_of_pathways_list[index_cleaning] = full[2:]
     if n_path_to_filt != "nothing":
         l_path_l_treat = pathways_selection(l_of_pathways_list, n_path_to_filt)
     else:
@@ -481,6 +416,7 @@ def c_p_o_m_r(file, outf, mapper, type_of_view="all", save_plot="all",
         sum_l = 0
         for num_col in range(n_path_to_treat):
             shared = 0
+
             p1 = l_path_l_treat[num_line][1:]
             p2 = l_path_l_treat[num_col][1:]
             for metabolite_search in (p1):
@@ -489,9 +425,12 @@ def c_p_o_m_r(file, outf, mapper, type_of_view="all", save_plot="all",
             mir_table[num_line, num_col] = shared
             sum_l += shared
         if len(p1) == 1:
+            print(p1)
             one_metabo_path.append(l_path_l_treat[num_line][0])
             met_one_met_path.append(l_path_l_treat[num_line][1])
+            print(l_path_l_treat[num_line])
         pathways_names.append(l_path_l_treat[num_line][0])
+
         for metabolite_of_p1 in p1:
             if metabolite_of_p1 not in all_metabolites:
                 all_metabolites.append(metabolite_of_p1)
@@ -513,9 +452,10 @@ def c_p_o_m_r(file, outf, mapper, type_of_view="all", save_plot="all",
         all_metabolites[n_metab] = comma_cleaning(a_m)
     approximate_table = np.array(mir_table, dtype=object)
 
-    metabo_f1, path_metabo_f1 = list_f_1(metabolite_frequency, all_metabolites,
+    metabo_f1, path_metabo_f1 = list_f_1(metabolite_frequency, all_metabolites, # problÃ©me a rÃ©gler
                                          pathways_of_metabo, pathways_names,
                                          l_path_l_treat)
+
     meta_and_path_p = pa_metabo(all_metabolites, pathways_of_metabo)
 
     all_metabolites.insert(0, "Ensemble des mÃ©tabolites")
@@ -533,10 +473,12 @@ def c_p_o_m_r(file, outf, mapper, type_of_view="all", save_plot="all",
     for index_fm in range(1, len(all_metabolites)):
         all_metabolites[index_fm] = f_metabo[index_fm-1][1]
         metabolite_frequency[index_fm] = f_metabo[index_fm-1][0]
+
     inf_shap = [[len(one_metabo_path), one_metabo_path,
                  met_one_met_path],
                 [len(path_metabo_f1), metabo_f1, path_metabo_f1],
                 [len(all_metabolites), all_metabolites, metabolite_frequency]]
+
     inf_shap.sort()
     counter = 0
     metabo_f_order = []
@@ -558,19 +500,19 @@ def c_p_o_m_r(file, outf, mapper, type_of_view="all", save_plot="all",
     metabo_f_order_for_export = np.array(metabo_f_order, dtype=object)
     metabo_f_order_for_export = pd.DataFrame(data=metabo_f_order_for_export)
 
-    patways_reco_order = recov_pos_path_name(totale_recovery, average_recovery,
-                                             pathways_names)
+    patways_reco_order = recov_pos_path_name(totale_recovery, average_recovery, pathways_names)
     patways_reco_order_for_export = pd.DataFrame(data=patways_reco_order)
     df_matrix_table = df_matrix_r(approximate_table)
 
+    result_out_file = outf+ mapper+"resultats_traitment_mapping.xlsx"
     excel_m_file_writer([patways_reco_order_for_export,
                          df_matrix_table, metabo_f_order_for_export,
-                         meta_and_path_p],  outf,
+                         meta_and_path_p],  result_out_file,
                         ["Voies mÃ©taboliques", "Table de ressemblance",
                          "FrÃ©quence mÃ©tabolites", "MÃ©tabolites et leurs P"])
-
     data_for_recovery_visualization = pd.DataFrame(data=patways_reco_order[1:])
     colnames_recovery = list(data_for_recovery_visualization.columns)
+    print(data_for_recovery_visualization)
 
     if type_of_view in ("all", "bar_plot", "bar_plot_r", "bar_r_meta_p"):
         barplot(colnames_recovery[2], colnames_recovery[1],
@@ -578,7 +520,7 @@ def c_p_o_m_r(file, outf, mapper, type_of_view="all", save_plot="all",
                 title="Recouvrement moyen des diffÃ©rentes voies mÃ©taboliques",
                 figure_size=(22, 10), size_of_labels=6)
         if save_plot in ("all", "bar_plot", "bar_plot_r", "bar_r_meta_p"):
-            plt.savefig(fold_of_visu_sav+"bar_plot_of_recovery.png")
+            plt.savefig(fold_of_visu_sav+mapper+"bar_plot_of_recovery.png")
         plt.show()
 
     just_frequency = []
@@ -598,7 +540,7 @@ def c_p_o_m_r(file, outf, mapper, type_of_view="all", save_plot="all",
                 figure_size=(22, 10), ax_x_label="MÃ©tabolites d'intÃ©rÃªt",
                 ax_y_label='FrÃ©quence', decimal='%.0f', size_of_labels=7)
         if save_plot in ("all", "bar_plot", "bar_plot_f", "bar_f_meta_p"):
-            plt.savefig(fold_of_visu_sav+"bar_plot_of_metabolites.png")
+            plt.savefig(fold_of_visu_sav+mapper+"bar_plot_of_metabolites.png")
         plt.show()
 
     if type_of_view in ("all", "meta_box", "bar_f_meta_p", "bar_r_meta_p"):
@@ -607,20 +549,18 @@ def c_p_o_m_r(file, outf, mapper, type_of_view="all", save_plot="all",
         b1.set(title="BoÃ®te Ã  moustache des frÃ©quences des mÃ©tabolites")
         plt.ylabel("frÃ©quence des mÃ©tabolites")
         if save_plot in ("all", "meta_box", "bar_f_meta_p", "bar_r_meta_p"):
-            plt.savefig(fold_of_visu_sav+"metabolites_bo_of_frequency.png")
+            plt.savefig(fold_of_visu_sav+ mapper+"metabolites_bo_of_frequency.png")
         plt.show()
 
-
 if __name__ == "__main__":
     #MAP = 'RAMP'
     MAP = "CPDB"
     #MAP = "ME"
     VIEW = "all"
     SAVE = "all"
-    INFILE = LOCAL + "ora_cpdb_data_yeux_reactome_rev_18-01-2024.csv"
+    INFILE = LOCAL + "CPDB\\Resultats_mapping_Chebi_ID_L100_CPDB.csv"
     #INFILE = "ExportExcel_6843"
     #INFILE = LOCAL + "RAMP\\sortie_Mapping_RAMP_L100_CheEBI.csv"
-    FINISHFILE = LOCAL + "test_oeil.xlsx"
-    FILE_MODUL = LOCAL + "chebi_modulation_intensite_patho_oeil_donnes_estelles_rev_19-01-2024.csv"
-    #FILE_MODUL = LOCAL + "CPDB\\liste_Chebi_des_100_chebi_ConsensusPAthDB_modul.csv"
+    FINISHFILE = LOCAL + "test.xlsx"
+    FILE_MODUL = LOCAL + "CPDB\\liste_Chebi_des_100_chebi_ConsensusPAthDB_modul.csv"
     c_p_o_m_r(INFILE, FINISHFILE, MAP, type_of_view=VIEW, save_plot=SAVE, modul=True, f_modul=FILE_MODUL)
diff --git a/main.py b/main.py
new file mode 100644
index 0000000..b791e2c
--- /dev/null
+++ b/main.py
@@ -0,0 +1,62 @@
+"""
+This module is designed to process the data obtained during metabolite mapping.
+The main function is c_p_o_m_r
+"""
+import re
+import csv
+import matplotlib.pyplot as plt
+import seaborn as sns
+import numpy as np
+import pandas as pd
+from math import log, floor
+import sys
+sys.path.append('C:\\Users\\mumec\\Desktop\\Dossier_gitlab_local\\traitement_des_donnÃ©es')
+sys.path.append('C:\\Users\\mumec\\Desktop\\Dossier_gitlab_local\\chebi-ids.git')
+from utils import excel_file_writer, column_recovery, excel_m_file_writer, comma_cleaning, pre_cut, cor_index, recup_all_inf_excel
+from Recovery_of_associated_Chebi_IDs import chebi_horizontal, chebi_in_outgouing
+from Visualisation_des_donnes_de_mapping import up_down_path_plot, barplot
+from complete_processing_of_mapping_results import recup_ramp_pathways_list, recup_cpdb_pathways_list, recup_me_path_list, recup_ma_pathways_list, pathways_selection, list_f_1, pa_metabo, recov_pos_path_name, df_matrix_r, c_p_o_m_r
+from Mapping_using_the_API import send_request_to_mapping_api, mapping_ramp_api, m_ora_cpdb, opti_multimapping
+from network_visualization import Paths_link_CPDB, network_visu
+
+FOLDER = "C:\\Users\\mumec\\Desktop\\fichier_mis_en_forme_programme_total\\main\\"
+
+
+def shapping_data(file, folder):
+    """
+    Takes data from an excel file and formats it for further workflow steps
+
+    Arg:
+        file : file with data obtain after analysis
+        folder : folder in which the Excel file containing the modification results will be saved
+
+    Returns:
+        Type of return: list and 1 file .xlsx
+
+    """
+    beg_datas = recup_all_inf_excel(file)
+    """
+    if "chebi" in beg_datas[0]:
+        i_c_chebi = beg_datas.find("chebi")
+        chebi_increased = chebi_horizontal(beg_datas[i_c_chebi]) # soit modifier pour sortir la liste soit crÃ©er une fonction qui fait les 2 directement
+        chebi_increased.append(chebi_in_outgouing(beg_datas[i_c_chebi]))
+        datas_for_mapping = chebi_increased + beg_datas[1:i_c_chebi] + beg_datas[i_c_chebi+1:]
+    """
+    datas_for_mapping = beg_datas
+    df_dfm = pd.DataFrame(data=datas_for_mapping)
+    n_o_f = folder + "Datas_mis_en_forme_pour_le_mapping.xlsx"
+    excel_file_writer(df_dfm, n_o_f)
+    return(datas_for_mapping)
+
+
+if __name__ == "__main__":
+    INFILE = FOLDER + "Donnees_oeil_mis_en_forme_opti_mapping.xlsx"
+    datas_f_map = shapping_data(INFILE, FOLDER)
+    result_cpdb, result_ramp, recap = opti_multimapping(datas_f_map, FOLDER,  mapping="flow")
+    #c_p_o_m_r(result_ramp, FOLDER, "RAMP", fold_of_visu_sav=FOLDER, modul="flow", f_modul=recap)
+    #c_p_o_m_r(result_cpdb, FOLDER, "CPDB", fold_of_visu_sav=FOLDER, modul="flow", f_modul=recap)
+    l_bdd = ["Reactome", "Wikipathways", "KEGG", "EHMN", "HumanCyc", "SMPDB", "INOH"]
+    for bdd in l_bdd:
+        out_path_links = FOLDER + "CPDB_links_network"+ bdd+"datas_base.xlsx"
+        edge_data, nodes_data = Paths_link_CPDB(result_cpdb, out_path_links , recap, bdd= bdd, flow=True)
+        print(network_visu(edge_data[0:3], nodes_data, bdd="HumanCyc"))
diff --git a/network_visualization.py b/network_visualization.py
new file mode 100644
index 0000000..3a6692e
--- /dev/null
+++ b/network_visualization.py
@@ -0,0 +1,173 @@
+import re
+import csv
+import matplotlib.pyplot as plt
+import seaborn as sns
+import numpy as np
+import pandas as pd
+import py4cytoscape as p4c
+from py4cytoscape import palette_color_brewer_d_RdBu
+from math import log, floor
+import sys
+sys.path.append('C:\\Users\\mumec\\Desktop\\Dossier_gitlab_local\\traitement_des_donnÃ©es')
+from utils import excel_file_writer, column_recovery
+LOCAL = "C:\\Users\\mumec\\Desktop\\Mini_codes\\"
+
+
+def Paths_link_CPDB(csv_file, out_file, int_file, bdd="Reactome",  flow=None):
+    if flow==None:
+        all_l_paths = column_recovery(csv_file, 2)
+        all_l_len_path = column_recovery(csv_file, 8)
+        all_l_meta_in = column_recovery(csv_file, 5)
+        all_l_p_value = column_recovery(csv_file, 0)
+        source = column_recovery(csv_file, 3)
+        l_all_meta = column_recovery(int_file, 0)[1:]
+        int_cas = column_recovery(int_file, 1)[1:]
+        int_tem = column_recovery(int_file, 2)[1:]
+        modul = []
+        for i_cas, cas in enumerate(int_cas):
+            modul.append(float(cas) - float(int_tem[i_cas]))
+            l_all_meta[i_cas] = l_all_meta[i_cas].strip()
+    else:
+        all_l_paths = csv_file[2]
+        all_l_len_path = csv_file[8]
+        all_l_meta_in = csv_file[5]
+        all_l_p_value = csv_file[0]
+        source = csv_file[3]
+        if 'HMDB' in all_l_meta_in[1][0]:
+            l_all_meta = int_file[2][1:]# output cpdb ID problÃ©me bientot
+        else:
+             l_all_meta = int_file[1][1:]
+        modul = int_file[-1][1:]
+    l_paths = []
+    l_len_path = []
+    l_p_value = []
+    l_meta_in = []
+    for ip, np in enumerate(all_l_paths):
+        if source[ip] == bdd:
+            l_paths.append(np.replace(",",";"))
+            l_p_value.append(all_l_p_value[ip])
+            l_len_path.append(all_l_len_path[ip])
+            l_meta_in.append(all_l_meta_in[ip])
+    for i_lpval, lpval in enumerate(l_p_value[1:]):
+        if "e" in lpval:
+            pvalac = '0.'+ (int(lpval[-2:])-1)*'0'  +lpval[0] +lpval[2:-4]
+            print(pvalac)
+            l_p_value[i_lpval+1] = float(pvalac)
+        else:
+            l_p_value[i_lpval+1] = float(lpval)
+    edge = []
+    modul_path = ["modulation de la voie"]
+    n_meta_int_in = ["numbers of metabolite of interest"]
+    for index_p, act_path in enumerate(l_paths):
+        if index_p != 0 and act_path != l_paths[-1]:
+            edge_now = []
+            if flow==None:
+                splited = l_meta_in[index_p].split(",")
+            else:
+                splited = l_meta_in[index_p]
+            for index_m, try_met in enumerate(l_meta_in[index_p+1:]):
+                mod = 0
+                links = 0
+                for i in range(len(splited)):
+                    splited[i] = splited[i].strip()
+                for met in splited:
+                    mod += modul[l_all_meta.index(met.strip())]
+                    if met in try_met:
+                        links += 1
+                edge_now.append([l_paths[index_p+1+index_m], links, mod, len(splited)])
+            edge.append(edge_now)
+            n_meta_int_in.append(len(splited))
+            modul_path.append(mod)
+        elif act_path==l_paths[-1]:
+            mod = 0
+            if flow==None:
+                splited = l_meta_in[index_p].split(",")
+            else:
+                splited = l_meta_in[index_p]
+            for i in range(len(splited)):
+                splited[i] = splited[i].strip()
+            for met in splited:
+                mod += modul[l_all_meta.index(met.strip())]
+            edge.append([[act_path, 0, mod, len(splited)]])
+            n_meta_int_in.append(len(splited))
+            modul_path.append(mod)
+    source = ["Source"]
+    target = ["Target"]
+    n_edge = ["n_edge"]
+    modulation = ["Modulation"]
+    n_meta_map = ["Metabo_map"]
+    len_path = ["Number of metabolites in Pathway"]
+    p_value = ["p-value"]
+    for index_edge, edge in enumerate(edge):
+        for new_entree in edge:
+            source.append(l_paths[index_edge+1])
+            target.append(new_entree[0])
+            n_edge.append(new_entree[1])
+            modulation.append(new_entree[2])
+            n_meta_map.append(new_entree[3])
+            len_path.append(l_len_path[index_edge+1])
+            p_value.append(l_p_value[index_edge+1])
+    out_data = [source, target, n_edge, modulation, n_meta_map, len_path, p_value]
+    nodes = [l_paths, l_p_value, n_meta_int_in, l_len_path, modul_path]
+    print(len(l_paths), len(l_p_value), len(n_meta_int_in), len(l_len_path), len(modul_path))
+    network = pd.DataFrame(data = out_data).transpose()
+    excel_file_writer(network, out_file, sheetname="Network links")
+    return out_data, nodes
+
+
+def network_visu(edge, nodes, bdd="Reactome"):
+    source = nodes[0][1:]
+    p_value = nodes[1][1:]
+    n_meta_in_path = nodes[2][1:]
+    len_tot_path = nodes[3][1:]
+    modul_path = nodes[4][1:]
+    source_for_target = edge[0][1:-1]
+    target = edge[1][1:-1]
+    weight_ege = edge[2][1:-1]
+    p4c.cytoscape_ping()
+    p4c.cytoscape_version_info()
+    df_nodes = pd.DataFrame(data={'id': source, 'p value': p_value,
+                                  'N metabolites mapped': n_meta_in_path,
+                                  'N metabolites in pathway': len_tot_path,
+                                  'Pathway modulation': modul_path})
+    df_edges = pd.DataFrame(data={'source': source_for_target, 'target': target,
+                                  'weight': weight_ege})
+    p4c.create_network_from_data_frames(nodes=df_nodes, edges=df_edges,
+                                        title="CPDB_network_"+ bdd,
+                                        collection="Network_from_mapping")
+    #mise en place de paramÃ©tres fixe
+
+    p4c.set_node_shape_default('ELLIPSE')
+    p4c.set_node_font_size_default(17)
+    nmm_min = min(n_meta_in_path)
+    nmm_max = max(n_meta_in_path)
+    nmm_c = nmm_min + (nmm_max - nmm_min)/2
+    p4c.set_node_color_mapping('N metabolites mapped', [nmm_min, nmm_c, nmm_max],
+                               ['#e6eeff', '#6699ff', '#000099'],
+                               mapping_type='c')
+    pv_min = min(p_value)
+    pv_max = max(p_value)
+    pv_c = pv_min + (pv_max - pv_min)/3
+    p4c.set_node_label_color_mapping('p value', [pv_min, pv_c, pv_max],
+                                     ['#145214', '#ffb3ff', '#4d004d'],
+                                     mapping_type='c')
+    w_min = min(p_value)
+    w_max = max(p_value)
+    w_c = w_min + (w_max - w_min)/2
+    p4c.set_edge_line_width_mapping('weight', [w_min, w_c, w_max],
+                                    [0.5, 1.75, 3], mapping_type='c')
+
+    for i_ltp, ltp in enumerate(len_tot_path):
+        len_tot_path[i_ltp] = int(ltp)/2
+    p4c.set_node_height_bypass(source, len_tot_path)
+    p4c.set_node_width_bypass(source, len_tot_path)
+
+    p4c.layout_network('degree-circle')
+    return([pv_min, pv_c, pv_max])
+
+if __name__ == "__main__":
+    csv_f = LOCAL + "ora_cpdb_data_yeux_reactome_rev_18-01-2024.csv"
+    out_file = LOCAL + "reseax_edge_tab_data_oeil_cpdb_reactome_v2_rev_19-01-2024.xlsx"
+    intens = LOCAL + "chebi_intensite_patho_oeil_donnes_estelles_rev_17-01-2024.csv"
+    edge_data, nodes_data = Paths_link_CPDB(csv_f, out_file, intens)
+    print(network_visu(edge_data[0:3], nodes_data))
diff --git a/utils.py b/utils.py
new file mode 100644
index 0000000..c3ac355
--- /dev/null
+++ b/utils.py
@@ -0,0 +1,130 @@
+"""
+all function utils
+"""
+import pandas as pd
+import csv
+import re
+
+
+def excel_file_writer(dataframe, n_o_f,  sheetname="Resultats"):
+    """
+    Take a dataframe and write an excel file with this data
+
+    Arg:
+        dataframe = dataframe of data to write
+        n_o_f = name and acces path of the new excel file
+        sheetname = The name of the new sheet
+    """
+    ex_f = pd.ExcelWriter(n_o_f)  # pylint: disable=abstract-class-instantiated
+    dataframe.to_excel(ex_f, sheet_name=sheetname, header=False, index=False)
+    ex_f.close()
+
+
+def column_recovery(file, n, sep=";", enc=None):
+    """
+    Put the culomn n of the file in list
+
+    Arg:
+        file : A csv file to read
+        n : the number of column to read
+        sep : type of separator
+
+    Returns:
+        Type of return: list
+    """
+    with open(file, "r", encoding=enc) as f:
+        r = csv.reader(f, delimiter=sep)
+        lines = list(r)
+        res = []
+        if abs(n) < len(lines[0]):
+            for line in lines:
+                if line[n].strip() != '':
+                    res.append(line[n].strip())
+    return(res)
+
+
+def excel_m_file_writer(list_of_dataframe, n_outf, list_of_sheetname):
+    """
+    Take a list of dataframe and write an excel file with these data
+
+    Arg:
+        list_of_dataframe = list of dataframe to write
+        n_outf = name and acces path of the new excel file
+        list_of_sheetname = list of sheets names to write
+    """
+    e_f = pd.ExcelWriter(n_outf)  # pylint: disable=abstract-class-instantiated
+    for df_index, l_o_d in enumerate(list_of_dataframe):
+        s_n = list_of_sheetname[df_index]
+        l_o_d.to_excel(e_f, sheet_name=s_n, header=False, index=False)
+    e_f.close()
+
+
+def comma_cleaning(str_to_clean):
+    """
+    Replace potential ',' by '_'
+
+    Arg:
+        str_to_clean =  list of character with potentialy ','
+
+    Returns:
+        Type of return: character
+    """
+    if ',' in str_to_clean:
+        while ',' in str_to_clean:
+            str_to_clean = re.sub(",", "_", str(str_to_clean))
+    return str_to_clean
+
+
+def pre_cut(listed):
+    """
+    cut only 1 type of ID by the first entree
+
+    Arg:
+        list: 1 list of id
+
+    Returns:
+        Type of return: 1 list
+    """
+    clean_list = []
+    cump = 0
+    while listed[cump] == "NA":
+        cump += 1
+    pos_cut = listed[cump].index(":")
+    for elem in listed:
+        if elem == "NA":
+            clean_list.append("NA")
+        else:
+            clean_list.append(elem[pos_cut+1:])
+    return clean_list
+
+
+def recup_all_inf_excel(file):
+    """
+    This function takes infos from a .xlsx
+
+    Arg:
+        file = the file to read
+    Returns:
+        Type of return: 1 list of list line
+    """
+    datas = pd.read_excel(file, header=None, na_filter=False)
+    l_datas = datas.values.tolist()
+    return l_datas
+
+
+def cor_index(list_objects_to_convert, l_all_obj, l_all_equ):
+    """
+    Change elements of a list by the correspondance elements
+
+    Arg:
+        list_objects_to_convert : list of object
+        l_all_obj : list who countain all objet to convert
+        l_all_equ : correspondance list of all object
+
+    Returns:
+        Type of return: list
+    """
+    l_to_return = []
+    for item_to_replace in (list_objects_to_convert):
+        l_to_return.append(l_all_equ[l_all_obj.index(item_to_replace.strip())])
+    return l_to_return
\ No newline at end of file
-- 
GitLab