From 5f86a374d6f360aa6fd7471aecf3bcc4696f3a51 Mon Sep 17 00:00:00 2001 From: local_comparaison <mathieu.umec@inrae.fr> Date: Fri, 17 Nov 2023 16:57:08 +0100 Subject: [PATCH] fixing code --- complete_processing_of_mapping_results.py | 48 +++++++++++------------ 1 file changed, 24 insertions(+), 24 deletions(-) diff --git a/complete_processing_of_mapping_results.py b/complete_processing_of_mapping_results.py index 6707b85..912a755 100644 --- a/complete_processing_of_mapping_results.py +++ b/complete_processing_of_mapping_results.py @@ -68,7 +68,6 @@ def recup_CPDB_pathways_list(CPDB_mapping_result,correspondence_file): L_Pathways=L_Pathways[1:] L_pathways_metabolites_whith_top=column_recovery(CPDB_mapping_result, 5) L_pathways_metabolites=L_pathways_metabolites_whith_top[1:] - print(L_pathways_metabolites) Chebi_aso_clean=strip_list(associated_chebi) L_to_return=[] """ @@ -206,13 +205,16 @@ def similarity_matrix(list_of_pathways): return(np_table,just_one_interest_metabo_pathways, Metabolite_just_one_interest_metabo_pathways, All_metabo, f_of_metabolite, pathways_of_metabo,Names_of_pathways, all_recovery, average_recov) -def list_frequency_1(frequency_of_metabolite,All_metabolites,pathways_of_metabolites) : +def list_frequency_1(frequency_of_metabolite,All_metabolites,pathways_of_metabolites,list_of_pathways_name,list_of_pathways_list) : metabolites_f1=[] pathway_metabolites_f1=[] for frequency_number in range (len(frequency_of_metabolite)) : if frequency_of_metabolite[frequency_number]==1 : metabolites_f1.append(All_metabolites[frequency_number]) - pathway_metabolites_f1.append(pathways_of_metabolites[frequency_number][0]) + pat=pathways_of_metabolites[frequency_number][0] + pos_path=list_of_pathways_name.index(pat) + number_of_metabolites_of_this_pathways=len(list_of_pathways_list[pos_path])-1 + pathway_metabolites_f1.append(pat+'('+str(number_of_metabolites_of_this_pathways)+')') return(metabolites_f1,pathway_metabolites_f1) def pathways_of_metabolite(metabolites_list,pathways_of_metabolites) : @@ -223,10 +225,18 @@ def pathways_of_metabolite(metabolites_list,pathways_of_metabolites) : return(p_of_meta) def order_frequency(unique_metabo_pathways, Metabolite_unique_metabo_pathways, pathway_metabo_f1, metabo_f1, All_metabo, frequency_of_metabolite) : - frequency_of_metabolites_order=[] + frequency_metabolite=[] + for index_mf in range (len(All_metabo)): + frequency_metabolite.append([frequency_of_metabolite[index_mf],All_metabo[index_mf]]) + f_metabo=frequency_metabolite[1:] + f_metabo.sort(reverse=True) + for index_fm in range (1, len(All_metabo)): + All_metabo[index_fm]=f_metabo[index_fm-1][1] + frequency_of_metabolite[index_fm]=f_metabo[index_fm-1][0] infos_for_shapping=[[len(unique_metabo_pathways),unique_metabo_pathways,Metabolite_unique_metabo_pathways],[len(pathway_metabo_f1),metabo_f1, pathway_metabo_f1],[len(All_metabo),All_metabo,frequency_of_metabolite]] infos_for_shapping.sort() counter=0 + frequency_of_metabolites_order=[] while counter<infos_for_shapping[2][0]: if counter>=infos_for_shapping[1][0]: shape_ok=[infos_for_shapping[2][1][counter],infos_for_shapping[2][2][counter],"","","",""] @@ -283,7 +293,6 @@ def complete_processing_of_mapping_results (file,outf, mapper,sep=";",type_of_vi """ if mapper=="CPDB" : c_file=input('In which file is the correspondence table between ID and current name located? (Current names must be in the first column and ID in the second)') - print(c_file) L_of_pathways_list=recup_CPDB_pathways_list(file,c_file) elif mapper=="RAMP": #Resultas_Ramp c_file=input('In which file is the correspondence table between ID and current name located? (Current names must be in the first column and ID in the second)') @@ -310,7 +319,7 @@ def complete_processing_of_mapping_results (file,outf, mapper,sep=";",type_of_vi approximate_table, only_one_interest_metabolite_pathways, Metabolite_only_one_interest_metabolite_pathways, All_metabolites, metabolite_frequency, pathways_of_metabo, pathways_Names ,totale_recovery,average_recovery=similarity_matrix(L_of_pathways_list_to_treat) - metabolites_f_equal_1,pathway_metabolites_f_equal_1=list_frequency_1(metabolite_frequency,All_metabolites,pathways_of_metabo) + metabolites_f_equal_1,pathway_metabolites_f_equal_1=list_frequency_1(metabolite_frequency,All_metabolites,pathways_of_metabo,pathways_Names, L_of_pathways_list_to_treat) Metabolites_and_pathways=pathways_of_metabolite(All_metabolites,pathways_of_metabo) All_metabolites.insert(0,"Ensemble des métabolites") @@ -327,10 +336,9 @@ def complete_processing_of_mapping_results (file,outf, mapper,sep=";",type_of_vi patways_recovery_order=recovery_position_and_pathways_name(totale_recovery,average_recovery,pathways_Names) patways_recovery_order_for_export = pd.DataFrame(data=patways_recovery_order) df_matrix_table= df_matrix_r (approximate_table) - """ + excel_multi_file_writer([patways_recovery_order_for_export, df_matrix_table,metabolite_frequency_order_for_export, Metabolites_and_pathways], outf, ["Noms_tables de ressemblance","Table de ressemblance","Fréquence des métabolites","Métabolites et leurs P"]) - """ -### fonction 7 et 8 les visualisation + possibiliter de sauvegarder les visus + data_for_recovery_visualization = pd.DataFrame(data=patways_recovery_order[1:]) colnames_recovery = list(data_for_recovery_visualization.columns) @@ -341,17 +349,14 @@ def complete_processing_of_mapping_results (file,outf, mapper,sep=";",type_of_vi plt.savefig(folder_of_saving+"bar_plot_of_recovery.png") plt.show() - """ - the little part of the code under this lines wille no longer be usseful when the issu whith the return of frequency in ordre will be resolved. - It's why it's not in function - """ just_frequency=[] - for count_index in range (len(metabolite_frequency_order)-1): - just_frequency.append([metabolite_frequency_order[1:][count_index][1],metabolite_frequency_order[1:][count_index][0]]) - just_frequency.sort(reverse=True) + pos_all_meta=metabolite_frequency_order[0].index('Ensemble des métabolites') + count_index=0 + while count_index<len(metabolite_frequency_order)-1 and metabolite_frequency_order[1:][count_index][pos_all_meta]!='': + just_frequency.append([metabolite_frequency_order[1:][count_index][pos_all_meta+1],metabolite_frequency_order[1:][count_index][pos_all_meta]]) + count_index+=1 dataframe_frequency= pd.DataFrame(data=just_frequency) # trouver une méthode qui marche tout le temps colnames_frequency = list(dataframe_frequency.columns) - if type_of_view=="all" or type_of_view=="bar_plot" or type_of_view=="bar_plot_f" or type_of_view=="bar_f_meta_plot" : barplot_r=barplot(colnames_frequency[1], colnames_frequency[0], dataframe_frequency, title="Fréquence des métabolites", figure_size=(22, 10),ax_x_label="Métabolites d'intérêt", ax_y_label='Fréquence', decimal='%.0f',size_of_labels=7) if save_graph=="all" or save_graph=="bar_plot" or save_graph=="bar_plot_f" or save_graph=="bar_f_meta_plot" : @@ -365,14 +370,9 @@ def complete_processing_of_mapping_results (file,outf, mapper,sep=";",type_of_vi plt.show() if __name__=="__main__": - map="ME" #CPDB, MA, ME, RAMP - sortie="test.excel" + map="RAMP" # RAMP view="all" #all, meta_box, bar_plot_f, bar_plot_r,bar_plot, bar_f_meta_plot, bar_r_meta_plot save="all" #all, meta_box, bar_plot_f, bar_plot_r,bar_plot, bar_f_meta_plot, bar_r_meta_plot - infile="ExportExcel_6843" - #infile="C:\\Users\\mumec\\Desktop\\fichier_mis_en_forme_programme_total\\CPDB\\Resultats_mapping_Chebi_ID_L100_CPDB.csv" - #infile="C:\\Users\\mumec\\Desktop\\fichier_mis_en_forme_programme_total\\RAMP\\sortie_Mapping_RAMP_L100_CheEBI.csv" - #infile="C:\\Users\\mumec\\Desktop\\fichier_mis_en_forme_programme_total\\MA\\MA_EA_Metabolites_L100_mis_en_forme_pour_le_programme.csv" + infile="C:\\Users\\mumec\\Desktop\\fichier_mis_en_forme_programme_total\\RAMP\\sortie_Mapping_RAMP_L100_CheEBI.csv" finishfile="C:\\Users\\mumec\\Desktop\\fichier_mis_en_forme_programme_total\\test.xlsx" - #corsfile="C:\\Users\\mumec\\Desktop\\fichier_mis_en_forme_programme_total\\liste_Chebi_des_100.csv" plus nécessaire complete_processing_of_mapping_results(infile,finishfile,map,type_of_view="all",save_graph="all") \ No newline at end of file -- GitLab