Skip to content
Snippets Groups Projects
Commit 453f00c9 authored by UMEC Mathieu's avatar UMEC Mathieu
Browse files

begin kegg ans chebi ontology work

parent ecfd9f4f
No related branches found
No related tags found
No related merge requests found
......@@ -6,8 +6,8 @@ import time
import sys
import pandas as pd
from bs4 import BeautifulSoup
sys.path.append('C:\\Users\\mumec\\Desktop\\Dossier_gitlab_local\\traitement_des_données')
from utils import column_recovery
sys.path.append('C:\\Users\\mumec\\Desktop\\Dossier_gitlab_local\\traitement_des_données\\src\\processing_mapping_results')
from utils import column_recovery, excel_file_writer
def chebi_horizontal(file, outfile_ful_name, n=0, sep=";", flow=None):
......@@ -462,11 +462,88 @@ def chebi_in_outgouing(file, n, outfill_ful_name, sep=";"):
print(time_of_running)
def ontologie_gap_kegg_chebi(csv_file, out_folder):
l_kegg = column_recovery(csv_file, 0)
l_cheb = column_recovery(csv_file, 1)
name_kegg = ["associated name of KEGG ID"]
name_chebi = ["associated name of ChEBI ID"]
ontologie_level_chebi = ["associated ontologie level of ChEBI ID"]
ontologie_level_equiv_kegg = ["positional difference betwen kegg et chebi ID"]
chebi_on_kegg = ["ID chebi from KEGG"]
for i_ac_k, ac_kegg in enumerate(l_kegg):
accheb = l_cheb[i_ac_k].strip().upper()
ackegg = ac_kegg.strip().upper()
if accheb == "NA":
url_kegg = "https://www.genome.jp/entry/"+ackegg
print(url_kegg)
soupk = BeautifulSoup(urllib.request.urlopen(url_kegg).read().decode("utf-8"), "html.parser")
textk = soupk.get_text()
#print(text)
i_bnamek = textk.find("Name") + 5
i_enamek = textk.find("Formula")
#print(textk[i_bnamek: i_enameK])
pv_pos = [0]
bef_pv = 0
while(textk[i_bnamek + bef_pv + 1: i_enamek].find(";") != -1):
bef_pv += textk[i_bnamek + bef_pv + 1: i_enamek].find(";") +1
pv_pos.append(bef_pv)
if len(pv_pos) == 1:
name_kegg.append([textk[i_bnamek: i_enamek].strip()])
else:
names_k = []
for ipvp, pvp in enumerate(pv_pos[1:]):
names_k.append(textk[i_bnamek + pv_pos[ipvp]: i_bnamek + pvp].strip(";").strip("\n"))
names_k.append(textk[i_bnamek + pv_pos[-1]: i_enamek].strip(";").strip("\n"))
name_kegg.append(names_k)
if textk.find("ChEBI") != -1:
if textk.find("LIPIDMAPS") != -1:
chebi_aso = textk[textk.find("ChEBI") + 7: textk.find("LIPIDMAPS")].strip()
else:
chebi_aso = textk[textk.find("ChEBI") + 7: textk.find("KCF data")].strip()
print(chebi_aso)
blanc_pos = [0]
blanc = 0
for i_ch_bl, ch_bl in enumerate(chebi_aso):
if ch_bl == " ":
blanc_pos.append(i_ch_bl)
if len(pv_pos) == 1:
chebi_on_kegg.append(chebi_aso)
else:
c_aso = []
for ipca, pca in enumerate(blanc_pos[1:]):
c_aso.append(chebi_aso[blanc_pos[ipca]: pca].strip())
c_aso.append(chebi_aso[blanc_pos[-1]:].strip())
chebi_on_kegg.append(c_aso)
else:
chebi_on_kegg.append("NA")
name_chebi.append("NA")
ontologie_level_chebi.append("NA")
ontologie_level_equiv_kegg.append("NA")
print(chebi_on_kegg)
#print(name_kegg)
out_df = pd.DataFrame([l_kegg, name_kegg, chebi_on_kegg, l_cheb, name_chebi,
ontologie_level_chebi, ontologie_level_equiv_kegg]).transpose()
outf_name = out_folder + "KEGG_and_ChEBI_dif_ontologie_test.xlsx"
excel_file_writer(out_df, outf_name, sheetname="resume_ontology")
"""
elif ackegg == "NA":
name_kegg.append("NA")
name_chebi.append(accheb)
ontologie_level_chebi.append("NA") # voir si on sort le niveau de l'ontologie
ontologie_level_equiv_kegg.append("NA")
else:
url = "https://www.ebi.ac.uk/chebi/searchId.do?chebiId="+ac_chebi
soup = BeautifulSoup(urllib.request.urlopen(url).read().decode("utf-8"), "html.parser")
text = soup.get_text()
i_beg_p_name = text.find("ChEBI Name")
inter_name = text[i_beg_p_name: i_beg_p_name+300]
i_end_p_name = inter_name.find("ChEBI ID")
name_chebi = text[i_beg_p_name+10: i_beg_p_name + i_end_p_name]
name_chebi = name_chebi.strip()
"""
if __name__ == "__main__":
LOCAL = "C:\\Users\\mumec\\Desktop\\fichier_mis_en_forme_programme_total\\recovery_asso_chebi"
OUTF1 = LOCAL + "outgoing_test.xlsx"
OUTF2 = LOCAL + "horizontal_test.xlsx"
INF = LOCAL + "l_chebi_to_change_oeil_for_test.csv"
COL = [1, 3]
chebi_in_outgouing(INF, 0, OUTF1)
print(chebi_horizontal(INF, OUTF2))
LOCAL = "C:\\Users\\mumec\\Desktop\\Mini_codes\\ontologie_kegg_chebi\\"
infile = LOCAL + "ontologie_gap_chebi_na.csv"
ontologie_gap_kegg_chebi(infile, LOCAL)
\ No newline at end of file
......@@ -16,7 +16,7 @@ import urlopen
#from metanetx_sdk import et1_table
#import rpy2.robjects as robjects
LOCAL = "C:\\Users\\mumec\\Desktop\\conversion_ids\\Liste_test_chebi\\"
LOCAL = "C:\\Users\\mumec\\Desktop\\conversion_ids\\c18\\"
def send_request_to_mapping_api(url, data_json, head, met='POST'):
"""
......@@ -152,6 +152,7 @@ def id_conv_opti(out_folder, cts_data=None, ma_data=None,
ramp_data=None, mtx_data=None, start_type_id="chebi"):
"""
Take csv file of id convert and give the best consensus conversion
careful you neeed the exact same number of line and coreespondance between all enter datas
cts_data = csv file of CTS conversion site data
ma_data = csv file of MeaboAnalyst conversion site data
......@@ -308,7 +309,7 @@ if __name__ == "__main__":
"Cholic acid", "Citric acid", "Deoxycholic acid"]
intyp = 'name'
print(equiv_from_ma_api(meta_test_ma, intyp, out_fold))
"""
cts_path = LOCAL + "L100_chebi_cts_rev_23-04-2024.csv"
ma_path = LOCAL + "L100_chebi_metaboanalyst_rev_23-04-2024.csv"
ramp_path = LOCAL + "L100_chebi_ramp_rev_23-04-2024.csv"
......@@ -317,8 +318,7 @@ if __name__ == "__main__":
id_conv_opti(out_fold, cts_data=cts_path, ma_data=ma_path,
ramp_data=ramp_path, mtx_data=mtx_path)
"""
cts_path = LOCAL + "L100_inchikey_cts_rev_23-04-2024.csv"
mtx_path = LOCAL + "L100_inchikey_metanetx_rev_23-04-2024.csv"
cts_path = LOCAL + "CTS_c18_pos_rev_29-04-2024.csv"
ma_path = LOCAL + "MA_c18_pos_rev_29-04-2024.csv"
id_conv_opti(out_fold, cts_data=cts_path, mtx_data=mtx_path)
"""
\ No newline at end of file
id_conv_opti(out_fold, cts_data=cts_path, ma_data=ma_path)
\ No newline at end of file
......@@ -304,13 +304,13 @@ def workflow(infile, out_folder):
val_matching = [0, 0, 0]
for id_map in l_id_map[1:]:
if id_map == "NA":
all_verif[pos_l_id_map].append("Not matched")
all_verif[pos_l_id_map].append("not matched")
val_matching[2] += 1
elif id_map in exa_id:
all_verif[pos_l_id_map].append("Exact matching")
all_verif[pos_l_id_map].append("Exact match")
val_matching[0] += 1
else:
all_verif[pos_l_id_map].append("Non-exact matching")
all_verif[pos_l_id_map].append("Non-exact match")
val_matching[1] += 1
name_sav_vis = vis_dir + "couverture_mapping " + title + ".png"
cam_vis(l_matching, val_matching, title, sav=name_sav_vis)
......
......@@ -104,7 +104,7 @@ def view_recovery(file, title_plot="Recouvrement moyen",
return fig
def color_pie(data, labels, colors, explode=None, fsize=(10, 10), pourcent=True):
def color_pie(data, colors, labels=None, explode=None, fsize=(10, 10), pourcent=True):
"""
plot a clor pie
......@@ -125,16 +125,25 @@ def color_pie(data, labels, colors, explode=None, fsize=(10, 10), pourcent=True)
for posdata, xdata in enumerate(data):
if xdata != 0:
data_clean.append(xdata)
labels_clean.append(labels[posdata])
if labels is not None:
labels_clean.append(labels[posdata])
explode_clean.append(explode[posdata])
colors_clean.append(colors[posdata])
plt.figure(figsize=fsize)
if pourcent is True:
plt.pie(data_clean, labels=labels_clean, pctdistance=0.8, explode=explode_clean,
colors=colors_clean, autopct=lambda x: str(round(x, 1)) + '%', textprops={'fontsize': 14})
if labels is not None:
plt.pie(data_clean, labels=labels_clean, pctdistance=0.9, explode=explode_clean,
colors=colors_clean, autopct=lambda x: str(round(x, 1)) + '%', textprops={'fontsize': 16})
else:
plt.pie(data_clean, pctdistance=0.80, explode=explode_clean, colors=colors_clean,
autopct=lambda x: str(round(x, 1)) + '%', textprops={'fontsize': 18})
else:
plt.pie(data_clean, labels=labels_clean, pctdistance=0.8, explode=explode_clean,
colors=colors_clean, autopct='%d', textprops={'fontsize': 16})
if labels is not False:
plt.pie(data_clean, labels=labels_clean, pctdistance=0.8, explode=explode_clean,
colors=colors_clean, autopct='%.f', textprops={'fontsize': 16})
else:
plt.pie(data_clean, pctdistance=0.8, explode=explode_clean,
colors=colors_clean, autopct='%.f', textprops={'fontsize': 16})
plt.legend(loc=1, fontsize=8)
plt.show()
......@@ -315,6 +324,84 @@ def cam_vis(name, values, title, sav=None):
plt.savefig(sav)
plt.show()
def barplot_temp(column_x, column_y, df_data, title="barplot", figure_size=(15, 30),
ax_x_label="voies métaboliques", ax_y_label='Recouvrement moyen',
colors='Spectral', decimal='%.1f', y_lim=None):
"""
drawn barplot from data
Arg:
column_x = data for plot absises axis
column_y = data for plot ordinate axis
df_data = dataframe of data to plot
Returns:
Type de retour:
"""
f, ax = plt.subplots(1, 1, figsize=figure_size)
p1 = sns.barplot(x=column_y, y=column_x, data=df_data, palette=colors)
max_bot = 0
for boto in df_data[column_x]:
if len(boto) > max_bot:
max_bot=len(boto)
p1.set(title=title)
plt.xlabel(ax_y_label)
plt.ylabel(ax_x_label)
if y_lim != None:
plt.xlim(0, y_lim)
p1.bar_label(p1.containers[0], fontsize=16)
plt.subplots_adjust(top=0.95, bottom=0.05, right=0.95, left=0.25)
p1.tick_params(axis='y', size=0.05, labelsize=16)
return p1
def up_down_path_plot_temp(l_path, up, down, log_p):
"""
plot regulation of pathways
Parameters :
L_path : List of pathways names
up : numbers of metabolites with up regulation
down : numbers of metabolites with down regulation
log_p : value of -log(pvalue of the pathways
return : 1 plot of regulation pathways
"""
fig, ax1 = plt.subplots(figsize=(22, 14))
max_bot = 0
for boto in l_path:
if len(boto) > max_bot:
max_bot=len(boto)
if max_bot>20:
plt.subplots_adjust(top=0.95, bottom=0.40*(max_bot/100))
else:
plt.subplots_adjust(top=0.95, bottom=0.20)
l_bar = 0.8
x = range(len(down))
ax1.bar(x, down, width=l_bar, label="Down regulated", color="tomato")
ax1.bar(x, up, width=l_bar, bottom=down,
label="Up regulated", color="mediumturquoise")
ax1.set_ylabel("% of metabolites modulate", size=16)
plt.ylim(top=100)
ax2 = ax1.twinx()
ax2.plot(l_path, log_p, marker='o', linestyle='-',
label="-log(pvalue)", color="mediumpurple")
plt.title("Pathways modulation", size=16)
ax2.set_xlabel("Pathways", size=16)
ax2.set_ylabel("-log(pvalue)", size=16)
ax2.yaxis.set_tick_params(labelsize = 16)
ax1.yaxis.set_tick_params(labelsize = 16)
plt.ylim(bottom=0, top=max(log_p)+0.5)
fig.legend(loc='upper right', bbox_to_anchor=(0.9, 0.95))
ax1.set_xticks(l_path)
if max_bot>25:
for patch in ax1.patches:
patch.set_width(0.8)
ax1.set_xticklabels(l_path, size=11, rotation=-90, ha='center')
else:
ax1.set_xticklabels(l_path, size=11, rotation=-90, ha='center')
return fig
if __name__ == "__main__":
"""
L_PATH = ["pathways1", "pathways2ffffffffffffffffffffffffffffffffffffffffff", "pathways3"]
......@@ -336,9 +423,30 @@ if __name__ == "__main__":
title = "mapping coverage of MetaboAnalyst on KEGG"
cam_vis(name, values, title)
"""
x = [4, 43, 0, 5, 48, 0]
y=["Identical", "different", "CTS", "MetaboAnalyst", "MetaNetx", "RaMP"]
"""
x = [28, 24, 48, 1, 1, 1]
y=["Identique", "different", "CTS", "MetaboAnalyst", "MetaNetx", "RaMP"]
exp=[0.02, 0.02, 0.02, 0.02, 0.02, 0.02]
co=["steelblue", "firebrick", "darkorange", "limegreen", "yellow", "darkviolet"]
color_pie(x, y, co, explode=exp, fsize=(15, 15), pourcent=False)
x = [2671, 363, 49613, 1377]
y=["Reactome","KEGG","HMDB","Wikipathways"]
exp=[0.02, 0.04,0.04, 0.04]
co=["darkorange","firebrick","wheat","steelblue"]
color_pie(x, co, labels=False, explode=exp, fsize=(15, 15), pourcent=True)
l_p = ["Glycine_ serine_ alanine and threonine metabolism", "Valine_ leucine and isoleucine degradation", "Urea cycle and metabolism of arginine_etc", "Aminosugars metabolism", "Vitamin B3 (nicotinate and nicotinamide) metabolis", "Pentose phosphate pathway", "Lysine metabolism", "Methionine and cysteine metabolism", "Galactose metabolism"]
upp =[5.681818182, 8.064516129, 3.2, 7.143, 2.857, 8.108, 3.846, 2.564, 2.128]
downn = [4.545454545, 0, 1.6, 2.381, 5.714, 0, 1.923, 1.282, 2.128]
llog_p = [20.31115619, 10.35604427, 9.295, 9.106, 6.725, 6.557, 5.575, 4.448, 3.402]
up_down_path_plot_temp(l_p, upp, downn, llog_p)
"""
x = [14, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
y=["amino acids and derivatives", "FA Carn / FA and derivatives", "carbohydrates and derivatives",
"MonoSGL", "LPC", "organic acids", "PC", "PE", "peptides", "PS", "SM", "steroids", "TG", "Autres"]
exp=[0.02, 0.02, 0.02, 0.02, 0.02, 0.02, 0.02, 0.02, 0.02, 0.02, 0.02, 0.02, 0.02, 0.02]
co=["steelblue", "firebrick", "darkorange", "limegreen", "yellow", "darkviolet", "lightsteelblue", "lightcoral", "bisque", "aquamarine", "lightyellow", "plum", "pink", "grey"]
color_pie(x, co, labels=None, explode=exp, fsize=(15, 15), pourcent=True)
plt.show()
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment