From 5dfe64f7b19c3ab08f3f445a64dc28b81e930a94 Mon Sep 17 00:00:00 2001
From: local_comparaison <mathieu.umec@inrae.fr>
Date: Tue, 14 Nov 2023 16:04:37 +0100
Subject: [PATCH] added code for using mapping by API

---
 Mapping_using_the_API.py | 87 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 87 insertions(+)
 create mode 100644 Mapping_using_the_API.py

diff --git a/Mapping_using_the_API.py b/Mapping_using_the_API.py
new file mode 100644
index 0000000..f75ec70
--- /dev/null
+++ b/Mapping_using_the_API.py
@@ -0,0 +1,87 @@
+import pandas as pd
+import json
+from urllib import request, parse
+
+
+def send_request_to_Mapping_API(URL, metabolites_list, head,method='POST'):
+    """
+    This function gives us the response of the api for the metabolites list
+    """
+    if len(metabolites_list)==0 :
+        return("Stop")
+    data_for_request={"analytes": metabolites_list}
+    json_data = json.dumps(data_for_request).encode('utf-8')
+    req = request.Request(URL, data=json_data, headers=head, method='POST')
+    with request.urlopen(req) as response:
+        result = response.read()
+    out_data=result.decode('utf-8')
+    return(out_data)
+
+def excel_file_writer(dataframe,name_out_file, sheetname="Resultats"):
+    """
+    This function is really adapted when we have only 1 dataframe to write
+    """
+    excel_file = pd.ExcelWriter(name_out_file)
+    dataframe.to_excel(excel_file, sheet_name=sheetname)
+    excel_file.close()
+
+
+def mapping_from_RAMP_API(metabolites_list, outfile,infos="all"):
+    """
+This function gives the result of mapping the list of RAMP metabolites to their API.
+To do this, we need a list of metabolites with their identifiers. (identifiers with the wrong form will not be mapped)
+Here's an example of 4 metabolites giving 505 lines. ["KEGG:C01157","hmdb:HMDB0000064","hmdb:HMDB0000148","chebi:16015"]
+The output file consists of five columns: pathwayName, pathwaySource, pathwayId, inputId, commonName.
+
+This function give the result of metabolite list mapping of RAMP using their API
+We need for this a list of metabolites where the ID is notice in the identifiants : for exemple  505 lines on the out file
+The out-file is compose of five columns pathwayName, pathwaySource, pathwayId, inputId, commonName.
+A line is associated with a metabolite and a metabolic pathway
+
+    """
+    API_Datas=send_request_to_Mapping_API('https://rampdb.nih.gov/api/pathways-from-analytes',metabolites_list,{'Accept': '*/*','Content-Type': 'application/json'})
+    if API_Datas=="Stop":
+        print (" Your metabolite list is empty. Please restart mapping with a non-empty metabolite list. Here's an example ['KEGG:C01157','hmdb:HMDB0000064','hmdb:HMDB0000148','chebi:16015']")
+    else:
+        len_out_api=len(API_Datas)
+        print(API_Datas[-3]+" metabolites were found")
+        index_begin_interest=API_Datas.find("[")
+        index_end_interest=API_Datas.find("]")
+        datas_to_treat=API_Datas[index_begin_interest:index_end_interest+1]
+        name_to_search=["pathwayName","pathwaySource","pathwayId","inputId"]
+        index = datas_to_treat.find("{")
+        index_begin_lines=[index]
+        while index != -1:
+            index+=1
+            index = datas_to_treat.find("{",index)
+            index_begin_lines.append(index)
+        index_begin_lines[-1]=len(datas_to_treat)
+        pathwayName=[]
+        inputId=[]
+        if infos=="all" :
+            pathwaySource=[]
+            pathwayId=[]
+            commonName=[]
+        for index_pos in range (len(index_begin_lines)-1):
+            one_line=datas_to_treat[index_begin_lines[index_pos]:index_begin_lines[index_pos+1]]
+            pathwayName.append(one_line[16:one_line.find("pathwaySource")-3])
+            inputId.append(one_line[one_line.find("inputId")+10:one_line.find("commonName")-3])
+            if infos=="all" :
+                pathwaySource.append(one_line[one_line.find("pathwaySource")+16:one_line.find("pathwayId")-3])
+                pathwayId.append(one_line[one_line.find("pathwayId")+12:one_line.find("inputId")-3])
+                commonName.append(one_line[one_line.find("commonName")+13:len(one_line)-3])
+
+        pathwayName.insert(0,"pathwayName")
+        pathwaySource.insert(0,"pathwaySource")
+        pathwayId.insert(0,"pathwayId")
+        inputId.insert(0,"inputId")
+        commonName.insert(0,"commonName")
+        list_result=[pathwayName, pathwaySource, pathwayId, inputId, commonName]
+        df_result=pd.DataFrame(data=list_result).transpose()
+        excel_file_writer(df_result,outfile, sheetname="Resultats du mapping")
+
+if __name__ == "__main__":
+    met_L=["KEGG:C01157","hmdb:HMDB0000064","hmdb:HMDB0000148","chebi:16015"]
+    inf="all" # all,only midfile
+    outf="C:\\Users\\mumec\\Desktop\\fichier_mis_en_forme_programme_total\\API_RAMP_test.xlsx"
+    mapping_from_RAMP_API(met_L,outf)
\ No newline at end of file
-- 
GitLab