From 5f86a374d6f360aa6fd7471aecf3bcc4696f3a51 Mon Sep 17 00:00:00 2001
From: local_comparaison <mathieu.umec@inrae.fr>
Date: Fri, 17 Nov 2023 16:57:08 +0100
Subject: [PATCH] fixing code

---
 complete_processing_of_mapping_results.py | 48 +++++++++++------------
 1 file changed, 24 insertions(+), 24 deletions(-)

diff --git a/complete_processing_of_mapping_results.py b/complete_processing_of_mapping_results.py
index 6707b85..912a755 100644
--- a/complete_processing_of_mapping_results.py
+++ b/complete_processing_of_mapping_results.py
@@ -68,7 +68,6 @@ def recup_CPDB_pathways_list(CPDB_mapping_result,correspondence_file):
     L_Pathways=L_Pathways[1:]
     L_pathways_metabolites_whith_top=column_recovery(CPDB_mapping_result, 5)
     L_pathways_metabolites=L_pathways_metabolites_whith_top[1:]
-    print(L_pathways_metabolites)
     Chebi_aso_clean=strip_list(associated_chebi)
     L_to_return=[]
     """
@@ -206,13 +205,16 @@ def similarity_matrix(list_of_pathways):
     return(np_table,just_one_interest_metabo_pathways,
     Metabolite_just_one_interest_metabo_pathways, All_metabo, f_of_metabolite, pathways_of_metabo,Names_of_pathways, all_recovery, average_recov)
 
-def list_frequency_1(frequency_of_metabolite,All_metabolites,pathways_of_metabolites) :
+def list_frequency_1(frequency_of_metabolite,All_metabolites,pathways_of_metabolites,list_of_pathways_name,list_of_pathways_list) :
     metabolites_f1=[]
     pathway_metabolites_f1=[]
     for frequency_number in range (len(frequency_of_metabolite)) :
         if frequency_of_metabolite[frequency_number]==1 :
             metabolites_f1.append(All_metabolites[frequency_number])
-            pathway_metabolites_f1.append(pathways_of_metabolites[frequency_number][0])
+            pat=pathways_of_metabolites[frequency_number][0]
+            pos_path=list_of_pathways_name.index(pat)
+            number_of_metabolites_of_this_pathways=len(list_of_pathways_list[pos_path])-1
+            pathway_metabolites_f1.append(pat+'('+str(number_of_metabolites_of_this_pathways)+')')
     return(metabolites_f1,pathway_metabolites_f1)
 
 def pathways_of_metabolite(metabolites_list,pathways_of_metabolites) :
@@ -223,10 +225,18 @@ def pathways_of_metabolite(metabolites_list,pathways_of_metabolites) :
     return(p_of_meta)
 
 def order_frequency(unique_metabo_pathways, Metabolite_unique_metabo_pathways, pathway_metabo_f1, metabo_f1, All_metabo, frequency_of_metabolite) :
-    frequency_of_metabolites_order=[]
+    frequency_metabolite=[]
+    for index_mf in range (len(All_metabo)):
+        frequency_metabolite.append([frequency_of_metabolite[index_mf],All_metabo[index_mf]])
+    f_metabo=frequency_metabolite[1:]
+    f_metabo.sort(reverse=True)
+    for index_fm in range (1, len(All_metabo)):
+        All_metabo[index_fm]=f_metabo[index_fm-1][1]
+        frequency_of_metabolite[index_fm]=f_metabo[index_fm-1][0]
     infos_for_shapping=[[len(unique_metabo_pathways),unique_metabo_pathways,Metabolite_unique_metabo_pathways],[len(pathway_metabo_f1),metabo_f1, pathway_metabo_f1],[len(All_metabo),All_metabo,frequency_of_metabolite]]
     infos_for_shapping.sort()
     counter=0
+    frequency_of_metabolites_order=[]
     while counter<infos_for_shapping[2][0]:
         if counter>=infos_for_shapping[1][0]:
             shape_ok=[infos_for_shapping[2][1][counter],infos_for_shapping[2][2][counter],"","","",""]
@@ -283,7 +293,6 @@ def complete_processing_of_mapping_results (file,outf, mapper,sep=";",type_of_vi
     """
     if mapper=="CPDB" :
         c_file=input('In which file is the correspondence table between ID and current name located? (Current names must be in the first column and ID in the second)')
-        print(c_file)
         L_of_pathways_list=recup_CPDB_pathways_list(file,c_file)
     elif mapper=="RAMP":  #Resultas_Ramp
         c_file=input('In which file is the correspondence table between ID and current name located? (Current names must be in the first column and ID in the second)')
@@ -310,7 +319,7 @@ def complete_processing_of_mapping_results (file,outf, mapper,sep=";",type_of_vi
 
     approximate_table, only_one_interest_metabolite_pathways, Metabolite_only_one_interest_metabolite_pathways, All_metabolites, metabolite_frequency, pathways_of_metabo, pathways_Names ,totale_recovery,average_recovery=similarity_matrix(L_of_pathways_list_to_treat)
 
-    metabolites_f_equal_1,pathway_metabolites_f_equal_1=list_frequency_1(metabolite_frequency,All_metabolites,pathways_of_metabo)
+    metabolites_f_equal_1,pathway_metabolites_f_equal_1=list_frequency_1(metabolite_frequency,All_metabolites,pathways_of_metabo,pathways_Names, L_of_pathways_list_to_treat)
     Metabolites_and_pathways=pathways_of_metabolite(All_metabolites,pathways_of_metabo)
 
     All_metabolites.insert(0,"Ensemble des métabolites")
@@ -327,10 +336,9 @@ def complete_processing_of_mapping_results (file,outf, mapper,sep=";",type_of_vi
     patways_recovery_order=recovery_position_and_pathways_name(totale_recovery,average_recovery,pathways_Names)
     patways_recovery_order_for_export = pd.DataFrame(data=patways_recovery_order)
     df_matrix_table= df_matrix_r (approximate_table)
-    """
+
     excel_multi_file_writer([patways_recovery_order_for_export, df_matrix_table,metabolite_frequency_order_for_export, Metabolites_and_pathways], outf, ["Noms_tables de ressemblance","Table de ressemblance","Fréquence des métabolites","Métabolites et leurs P"])
-    """
-### fonction 7 et 8 les visualisation + possibiliter de sauvegarder les visus
+
 
     data_for_recovery_visualization = pd.DataFrame(data=patways_recovery_order[1:])
     colnames_recovery = list(data_for_recovery_visualization.columns)
@@ -341,17 +349,14 @@ def complete_processing_of_mapping_results (file,outf, mapper,sep=";",type_of_vi
             plt.savefig(folder_of_saving+"bar_plot_of_recovery.png")
         plt.show()
 
-    """
-    the little part of the code under this lines wille no longer be usseful when the issu whith the return of frequency in ordre will be resolved.
-    It's why it's not in function
-    """
     just_frequency=[]
-    for count_index in range (len(metabolite_frequency_order)-1):
-        just_frequency.append([metabolite_frequency_order[1:][count_index][1],metabolite_frequency_order[1:][count_index][0]])
-    just_frequency.sort(reverse=True)
+    pos_all_meta=metabolite_frequency_order[0].index('Ensemble des métabolites')
+    count_index=0
+    while count_index<len(metabolite_frequency_order)-1 and metabolite_frequency_order[1:][count_index][pos_all_meta]!='':
+        just_frequency.append([metabolite_frequency_order[1:][count_index][pos_all_meta+1],metabolite_frequency_order[1:][count_index][pos_all_meta]])
+        count_index+=1
     dataframe_frequency= pd.DataFrame(data=just_frequency) # trouver une méthode qui marche tout le temps
     colnames_frequency = list(dataframe_frequency.columns)
-
     if type_of_view=="all" or type_of_view=="bar_plot" or type_of_view=="bar_plot_f" or type_of_view=="bar_f_meta_plot" :
         barplot_r=barplot(colnames_frequency[1], colnames_frequency[0], dataframe_frequency, title="Fréquence des métabolites", figure_size=(22, 10),ax_x_label="Métabolites d'intérêt", ax_y_label='Fréquence', decimal='%.0f',size_of_labels=7)
         if save_graph=="all" or save_graph=="bar_plot" or save_graph=="bar_plot_f" or save_graph=="bar_f_meta_plot" :
@@ -365,14 +370,9 @@ def complete_processing_of_mapping_results (file,outf, mapper,sep=";",type_of_vi
         plt.show()
 
 if __name__=="__main__":
-    map="ME" #CPDB, MA, ME, RAMP
-    sortie="test.excel"
+    map="RAMP" # RAMP
     view="all" #all, meta_box, bar_plot_f, bar_plot_r,bar_plot, bar_f_meta_plot, bar_r_meta_plot
     save="all" #all, meta_box, bar_plot_f, bar_plot_r,bar_plot, bar_f_meta_plot, bar_r_meta_plot
-    infile="ExportExcel_6843"
-    #infile="C:\\Users\\mumec\\Desktop\\fichier_mis_en_forme_programme_total\\CPDB\\Resultats_mapping_Chebi_ID_L100_CPDB.csv"
-    #infile="C:\\Users\\mumec\\Desktop\\fichier_mis_en_forme_programme_total\\RAMP\\sortie_Mapping_RAMP_L100_CheEBI.csv"
-    #infile="C:\\Users\\mumec\\Desktop\\fichier_mis_en_forme_programme_total\\MA\\MA_EA_Metabolites_L100_mis_en_forme_pour_le_programme.csv"
+    infile="C:\\Users\\mumec\\Desktop\\fichier_mis_en_forme_programme_total\\RAMP\\sortie_Mapping_RAMP_L100_CheEBI.csv"
     finishfile="C:\\Users\\mumec\\Desktop\\fichier_mis_en_forme_programme_total\\test.xlsx"
-    #corsfile="C:\\Users\\mumec\\Desktop\\fichier_mis_en_forme_programme_total\\liste_Chebi_des_100.csv" plus nécessaire
     complete_processing_of_mapping_results(infile,finishfile,map,type_of_view="all",save_graph="all")
\ No newline at end of file
-- 
GitLab