Skip to content
Snippets Groups Projects
Commit 5d2a9744 authored by UMEC Mathieu's avatar UMEC Mathieu
Browse files

implementation of the visualization section

parent 9be43264
No related branches found
No related tags found
No related merge requests found
......@@ -7,6 +7,8 @@ import csv
jpype.startJVM()
from asposecells.api import *
import re
import matplotlib.pyplot as plt
import seaborn as sns
def column_recovery(file, n, sep=";"):
'''
......@@ -21,14 +23,12 @@ def column_recovery(file, n, sep=";"):
res.append(l[n].strip())
return res
def complete_processing_of_mapping_results (file,outf, mapper,sep=";",view="all",correspondence_file="" ,midfile="oui",midfile_name="C:\\Users\\mumec\\Desktop\\fichier_mis_en_forme_programme_total\\mid_file.xlsx",name_of_pathways_to_filter=[]):# outfolder can be get by using the file
def complete_processing_of_mapping_results (file,outf, mapper,sep=";",type_of_view="all",graph_title="Graphique 1 : Recouvrement moyen des différentes voies métaboliques obtenues. Graphique 2 : fréquence des métabolites.",correspondence_file="" ,midfile="oui",midfile_name="C:\\Users\\mumec\\Desktop\\fichier_mis_en_forme_programme_total\\mid_file.xlsx",name_of_pathways_to_filter=[]):# outfolder can be get by using the file
"""
This function takes file of mapping and return a treatmentof of mapping pathways whith total coverage, average coverage and metabolite frequency. This data can also be visualised.
file is the results of mapping .csv
mapper is the name of the mapper use it can be CPDB (ConsensusPathDB), MA (MetaboAnalyst), ME (MetExplore) and RAMP
view takes the option to outputting data visualization such as metabolites frequencies.
type_of_view takes the option to outputting data visualization such as metabolites frequencies.
midfile takes the option to outputting file of pathways descriptions.
for results of MetExplore mapping it's a bit different of the 3 others: you need a repertorie whith all your file name : 'name (n)' with n between 1 and the number of excel you have. in file enter the name of your file. for the folder you need to write it without "" and for the number of files and column number dont remove 1
"""
......@@ -141,6 +141,8 @@ def complete_processing_of_mapping_results (file,outf, mapper,sep=";",view="all"
frequency_of_metabolite=[]
pathways_of_metabolites=[]
Names_of_pathways=[]
totale_recovery=[]
average_recovery=[]
P1=[]
P2=[]
for num_line in range(Number_of_pathways_to_treat):
......@@ -168,15 +170,15 @@ def complete_processing_of_mapping_results (file,outf, mapper,sep=";",view="all"
frequency_of_metabolite[index_of_this_metabolite]+=1
pathways_of_metabolites[index_of_this_metabolite].append(L_of_pathways_list_to_treat[num_line][0])
mirror_table[num_line,Number_of_pathways_to_treat]=sum_of_list
totale_recovery.append(sum_of_list)
mirror_table[num_line,Number_of_pathways_to_treat+1]=sum_of_list/(Number_of_pathways_to_treat-1)
average_recovery.append(sum_of_list/(Number_of_pathways_to_treat-1))
for n_metab in range (len(All_metabolites)):
metabolite_name_clean=All_metabolites[n_metab]
if ',' in metabolite_name_clean:
while ',' in metabolite_name_clean:
metabolite_name_clean= re.sub(",", "_", str(metabolite_name_clean))
All_metabolites[n_metab]=metabolite_name_clean
approximate_table=np.array(mirror_table, dtype=object)
metabolites_f_equal_1=[]
pathway_metabolites_f_equal_1=[]
......@@ -185,10 +187,9 @@ def complete_processing_of_mapping_results (file,outf, mapper,sep=";",view="all"
metabolites_f_equal_1.append(All_metabolites[frequency_number])
pathway_metabolites_f_equal_1.append(pathways_of_metabolites[frequency_number][0])
Names_of_pathways.append("recouvrement_totale")
Names_of_pathways.append("recouvrement_moyen")
Metabolites_and_pathways=[All_metabolites,pathways_of_metabolites]
Metabolites_and_pathways=[]
for n_metabo in range (len(All_metabolites)):
Metabolites_and_pathways.append([All_metabolites[n_metabo],pathways_of_metabolites[n_metabo]])
Metabolites_and_pathways = pd.DataFrame(data=Metabolites_and_pathways)
All_metabolites.insert(0,"Ensemble des métabolites")
......@@ -198,34 +199,75 @@ def complete_processing_of_mapping_results (file,outf, mapper,sep=";",view="all"
only_one_interest_metabolite_pathways.insert(0,"Voie métabolique présentant qu'un seul métabolite d'intérêt")
Metabolite_only_one_interest_metabolite_pathways.insert(0,"Unique métabolite d'intérêt de la voie métabolique'")
frequency_of_metabolites_for_export=np.array([All_metabolites, frequency_of_metabolite,metabolites_f_equal_1, pathway_metabolites_f_equal_1,only_one_interest_metabolite_pathways,Metabolite_only_one_interest_metabolite_pathways],dtype=object)
frequency_of_metabolites=[]
infos_for_shapping=[[len(only_one_interest_metabolite_pathways),only_one_interest_metabolite_pathways,Metabolite_only_one_interest_metabolite_pathways],[len(pathway_metabolites_f_equal_1),metabolites_f_equal_1, pathway_metabolites_f_equal_1],[len(All_metabolites),All_metabolites, frequency_of_metabolite]]
infos_for_shapping.sort()
counter=0
while counter<infos_for_shapping[2][0]:
if counter>=infos_for_shapping[1][0]:
shape_ok=[infos_for_shapping[2][1][counter],infos_for_shapping[2][2][counter],"","","",""]
elif counter>=infos_for_shapping[0][0]:
shape_ok=[infos_for_shapping[2][1][counter],infos_for_shapping[2][2][counter],infos_for_shapping[1][1][counter],infos_for_shapping[1][2][counter],"",""]
else :
shape_ok=[infos_for_shapping[2][1][counter],infos_for_shapping[2][2][counter],infos_for_shapping[1][1][counter],infos_for_shapping[1][2][counter],infos_for_shapping[0][1][counter],infos_for_shapping[0][2][counter]]
counter+=1
frequency_of_metabolites.append(shape_ok)
frequency_of_metabolites_for_export=np.array(frequency_of_metabolites,dtype=object)
frequency_of_metabolites_for_export = pd.DataFrame(data=frequency_of_metabolites_for_export)
pathways_recovery=[]
for index_p_recov in range (len(totale_recovery)):
pathways_recovery.append([totale_recovery[index_p_recov],average_recovery[index_p_recov],Names_of_pathways[index_p_recov],index_p_recov])
pathways_recovery.sort(reverse=True)
pathways_recovery.insert(0,["Recouvrement total","Recouvrement moyen","Nom de la voie métabolique","Ordre pour la table de ressemblance"])
look_likes_pathways=Names_of_pathways
look_likes_pathways_2=[]
look_likes_pathways=[]
for indice in range(len(approximate_table)):
look_likes_pathways_2.append(approximate_table[indice])
look_likes_pathways_2 = np.array(look_likes_pathways_2,dtype=object)
look_likes_pathways_2 = pd.DataFrame(data=look_likes_pathways_2)
look_likes_pathways.append(approximate_table[indice])
look_likes_pathways = np.array(look_likes_pathways,dtype=object)
look_likes_pathways = pd.DataFrame(data=look_likes_pathways)
pathways_recovery_for_export = pd.DataFrame(data=pathways_recovery)
excel_file = pd.ExcelWriter(outf)
look_likes_pathways.to_excel(excel_file, sheet_name="Noms_tables de ressemblance")
look_likes_pathways_2.to_excel(excel_file, sheet_name="Table de ressemblance")
pathways_recovery_for_export.to_excel(excel_file, sheet_name="Noms_tables de ressemblance")
look_likes_pathways.to_excel(excel_file, sheet_name="Table de ressemblance")
frequency_of_metabolites_for_export.to_excel(excel_file,sheet_name="Fréquence des métabolites")
Metabolites_and_pathways.to_excel(excel_file,sheet_name='Métabolites et leurs P')
excel_file.close()
data_for_recovery_visualization = pd.DataFrame(data=pathways_recovery[1:])
colnames_recovery = list(data_for_recovery_visualization.columns)
just_frequency=[]
for count_index in range (len(frequency_of_metabolites)-1):
just_frequency.append([frequency_of_metabolites[1:][count_index][1],frequency_of_metabolites[1:][count_index][0]])
just_frequency.sort(reverse=True)
dataframe_frequency= pd.DataFrame(data=just_frequency) # trouver une méthode qui marche tout le temps
colnames_frequency = list(dataframe_frequency.columns)
if type_of_view=="all" or type_of_view=="bar_plot":
fig, ax = plt.subplots(2,1,figsize=(30, 5))
p1 = sns.barplot(x=colnames_recovery[2], y=colnames_recovery[1], data=data_for_recovery_visualization, palette='Spectral', ax=ax[1])
p1.bar_label(p1.containers[0], fontsize=5, fmt='%.1f') # add annotation
p1.tick_params(axis='x',rotation=90, size=0.05,labelsize = 5) #rotation des ticklabels
p2 = sns.barplot(x=colnames_frequency[1], y=colnames_frequency[0], data=dataframe_frequency, palette='Spectral', ax=ax[0])
p2.bar_label(p2.containers[0], fontsize=6, fmt='%.f')
p2.tick_params(axis='x', rotation=90, size=0.05,labelsize = 5)
p2.set(title=graph_title)
plt.show()
if type_of_view=="all" or type_of_view=="meta_box" :
B1= sns.boxplot(
color='goldenrod',
data=dataframe_frequency,
y=colnames_frequency[0]);
B1.set(title="boîte à métabolites")
plt.show()
if __name__=="__main__":
mapping_results=""
map="ME" #CPDB, MA, ME, RAMP
sortie="test.excel"
visu="all" #all, frequence
view="all" #all, meta_box, bar_plot
infile="ExportExcel_6843"
finishfile="C:\\Users\\mumec\\Desktop\\fichier_mis_en_forme_programme_total\\test.xlsx"
corsfile="C:\\Users\\mumec\\Desktop\\fichier_mis_en_forme_programme_total\\liste_Chebi_des_100.csv"
complete_processing_of_mapping_results(infile,finishfile,map,correspondence_file=corsfile,view=visu)
\ No newline at end of file
complete_processing_of_mapping_results(infile,finishfile,map,correspondence_file=corsfile,type_of_view=view)
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment