diff --git a/__pycache__/Visualisation_des_donnes_de_mapping.cpython-310.pyc b/__pycache__/Visualisation_des_donnes_de_mapping.cpython-310.pyc deleted file mode 100644 index 29488e81ca132689de06d97aa4b1dff35665d9c8..0000000000000000000000000000000000000000 Binary files a/__pycache__/Visualisation_des_donnes_de_mapping.cpython-310.pyc and /dev/null differ diff --git a/Tests_unitaires/Cors_file_for_test/Corres_file_for_CPDB_unittest.csv b/src/processing_mapping_results/Tests_unitaires/Cors_file_for_test/Corres_file_for_CPDB_unittest.csv similarity index 100% rename from Tests_unitaires/Cors_file_for_test/Corres_file_for_CPDB_unittest.csv rename to src/processing_mapping_results/Tests_unitaires/Cors_file_for_test/Corres_file_for_CPDB_unittest.csv diff --git a/Tests_unitaires/Cors_file_for_test/Corres_file_for_RAMP_unittest.csv b/src/processing_mapping_results/Tests_unitaires/Cors_file_for_test/Corres_file_for_RAMP_unittest.csv similarity index 100% rename from Tests_unitaires/Cors_file_for_test/Corres_file_for_RAMP_unittest.csv rename to src/processing_mapping_results/Tests_unitaires/Cors_file_for_test/Corres_file_for_RAMP_unittest.csv diff --git a/main.py b/src/processing_mapping_results/Tests_unitaires/main.py similarity index 100% rename from main.py rename to src/processing_mapping_results/Tests_unitaires/main.py diff --git a/Tests_unitaires/test_Mapping_using_the_API.py b/src/processing_mapping_results/Tests_unitaires/test_Mapping_using_the_API.py similarity index 100% rename from Tests_unitaires/test_Mapping_using_the_API.py rename to src/processing_mapping_results/Tests_unitaires/test_Mapping_using_the_API.py diff --git a/Tests_unitaires/test_complete_processing_of_mapping_results.py b/src/processing_mapping_results/Tests_unitaires/test_complete_processing_of_mapping_results.py similarity index 100% rename from Tests_unitaires/test_complete_processing_of_mapping_results.py rename to src/processing_mapping_results/Tests_unitaires/test_complete_processing_of_mapping_results.py diff --git a/src/processing_mapping_results/Tests_unitaires/test_main.py b/src/processing_mapping_results/Tests_unitaires/test_main.py new file mode 100644 index 0000000000000000000000000000000000000000..ccb3337b9031e4c19369ca8c4612e13d51c34b22 --- /dev/null +++ b/src/processing_mapping_results/Tests_unitaires/test_main.py @@ -0,0 +1,54 @@ +import unittest +import pandas as pd +import os +import tempfile +import sys +import openpyxl +sys.path.append('C:\\Users\\mumec\\Desktop\\Dossier_gitlab_local\\traitement_des_données') +from main import * + +class Test_shapping_data(unittest.TestCase): + """ + return (datas_for_mapping, for_pdf_ch) + "C:\\Users\\mumec\\Desktop\\Datas_mis_en_forme_pour_le_mapping.xlsx" + """ + def test_shapping_data(self): + with tempfile.NamedTemporaryFile(suffix='.xlsx', delete=False) as tmp_file: + wb = openpyxl.Workbook() + ws = wb.active + ws['A1'] = 'C_N' + ws['B1'] = 'ID' + ws['C1'] = 'Cas' + ws['D1'] = 'Temoin' + ws['A2'] = 'choline' + ws['B2'] = 'chebi:15354, hmdb:HMDB0000097' + ws['C2'] = '0.41' + ws['D2'] = '1.59' + ws['A3'] = 'L-Kynurenine' + ws['B3'] = 'NA' + ws['C3'] = '0.24' + ws['D3'] = '1.74' + ws['A4'] = 'Leucylleucine' + ws['B4'] = 'chebi:191208' + ws['C4'] = '3.01' + ws['D4'] = '5.01' + wb.save(tmp_file.name) + + """ + self.temp_file = tempfile.NamedTemporaryFile(delete=False, mode='w', suffix='.xlsx') + self.temp_file.write("C_N;ID;Cas;Temoin\n") + self.temp_file.write("choline;chebi:15354, hmdb:HMDB0000097;0,41;1,59\n") + self.temp_file.write("L-Kynurenine;NA;0,24;1,74\n") + self.temp_file.write("Leucylleucine;chebi:191208;3.01;5.01\n") + self.temp_file.close() + """ + print(tmp_file.name) + tab_to_map, list_fig = shapping_data(tmp_file.name, "C:\\Users\\mumec\\Desktop\\") + os.remove(tmp_file.name) + self.assertIsInstance(tab_to_map, list) + self.assertIsInstance(list_fig, list) + self.assertTrue(os.path.exists("C:\\Users\\mumec\\Desktop\\Datas_mis_en_forme_pour_le_mapping.xlsx")) + os.remove("C:\\Users\\mumec\\Desktop\\Datas_mis_en_forme_pour_le_mapping.xlsx") + +if __name__ == "__main__": + unittest.main() diff --git a/src/processing_mapping_results/Tests_unitaires/test_network_visualization.py b/src/processing_mapping_results/Tests_unitaires/test_network_visualization.py new file mode 100644 index 0000000000000000000000000000000000000000..5069f1f40cc76323fd8c75d348db651c135b9b11 --- /dev/null +++ b/src/processing_mapping_results/Tests_unitaires/test_network_visualization.py @@ -0,0 +1,60 @@ +import unittest +import pandas as pd +import os +import tempfile +import sys +import openpyxl +from unittest.mock import patch +sys.path.append('C:\\Users\\mumec\\Desktop\\Dossier_gitlab_local\\traitement_des_données') +from network_visualization import * + +class Test_paths_link_cpdb(unittest.TestCase): + """ + paths_link_cpdb(map_data, out_file, mod_data, bdd="Reactome", flow=None) + """ + + def test_paths_link_cpdb_flow(self): + mdata = [["1.5","3.5","1.9", "1.9"], ["5.5","7.7","5.2", "5.2"], + ["amine_path","SLC_path","Glutamate", "tryptophan"], + ["Reactome","Reactome","HumanCyc", "Reactome"], + ["R-HA","R-HA","R-HA", "R-HA"], + [["17295""15729"],["17295"],["15729"], ["15729"]], + ["k:C","k:C","k:C","k:C"],["32","164","34","34"], + ["32","164","34","34"]] + modata = [["name","meta1","meta2"], + ["chebi","17295","15729"],["hmdb","15729","17295"], + ["logp","-0.35", "0.35"]] + name_out_file = 'test_default_paths_link_cpdb.xlsx' + edges, nodes = paths_link_cpdb(mdata, name_out_file, modata, + bdd="Reactome", flow=True) + self.assertTrue(os.path.exists(name_out_file)) + self.assertEqual(len(edges), 7) + self.assertEqual(len(nodes), 5) + os.remove(name_out_file) + + + def test_paths_link_cpdb_flow(self): + self.temp_file = tempfile.NamedTemporaryFile(delete=False, mode='w', suffix='.csv') + self.temp_file.write("1.5;5.5;amine_path;Reactome;R-HA;17295,15729;k:C;32;32\n") + self.temp_file.write("3.5;7.7;SLC_path;Reactome;R-HA;17295;k:C;164;164\n") + self.temp_file.write("1.9;5.2;Glutamate;HumanCyc;R-HA;15729;k:C;34;34\n") + self.temp_file.write("1.9;5.2;tryptophan;Reactome;R-HA;15729;k:C;34;34\n") + self.temp_file.close() + self.temp_file1 = tempfile.NamedTemporaryFile(delete=False, mode='w', suffix='.csv') + self.temp_file1.write("Chebi;intensité_cas;intensité_témoin\n") + self.temp_file1.write("17295;25;23\n") + self.temp_file1.write("15729;23;25\n") + self.temp_file1.close() + name_out_file = 'test_default_paths_link_cpdb.xlsx' + edges, nodes = paths_link_cpdb(self.temp_file.name, name_out_file, + self.temp_file1.name, bdd="Reactome") + os.remove(self.temp_file.name) + os.remove(self.temp_file1.name) + self.assertTrue(os.path.exists(name_out_file)) + self.assertEqual(len(edges), 7) + self.assertEqual(len(nodes), 5) + os.remove(name_out_file) + + +if __name__ == "__main__": + unittest.main() \ No newline at end of file diff --git a/src/processing_mapping_results/Tests_unitaires/test_pdf_generation_toolbox.py b/src/processing_mapping_results/Tests_unitaires/test_pdf_generation_toolbox.py new file mode 100644 index 0000000000000000000000000000000000000000..c8efe276c9a499e09b4b88deec8802f1c5019963 --- /dev/null +++ b/src/processing_mapping_results/Tests_unitaires/test_pdf_generation_toolbox.py @@ -0,0 +1,126 @@ +import unittest +import pandas as pd +import os +import tempfile +import sys +import openpyxl +from unittest.mock import patch +from docx import Document +sys.path.append('C:\\Users\\mumec\\Desktop\\Dossier_gitlab_local\\traitement_des_données') +from pdf_generation_toolbox import * + + + +class Testread_docx(unittest.TestCase): + + def test_read_docx(self): + doc = Document() + doc.add_heading('Sample Document', 0) + doc.add_paragraph('This is a sample paragraph.') + temp_file = tempfile.NamedTemporaryFile(suffix='.docx', delete=False) + doc.save(temp_file.name) + temp_file.close() + csv_list = read_docx(temp_file.name) + os.remove(temp_file.name) + self.assertIsInstance(csv_list, list) + + +""" +class Testout_pdf_mapping(unittest.TestCase): +""" +""" +def out_pdf_mapping(file_path, data_input, chebi_hori, recap, results_map_ramp, + results_map_cpdb, out_folder, l_visu): + + result_pdf = PDF() + result_pdf.add_font('DejaVu', 'I', 'DejaVuSansCondensed.ttf') + result_pdf.add_font('DejaVu', 'B', 'DejaVuSansCondensed.ttf') + result_pdf.add_font('DejaVu', '', 'DejaVuSansCondensed.ttf') + result_pdf.set_font('DejaVu', '', 14) + content = read_docx(file_path) + index_toc = content.index('Table of contents') + l_title = [] + titles = [] + for i_strr, strr in enumerate(content[index_toc+1:]): + if strr != '': + if strr[1] == ' ': + l_title.append(strr) + titles.append(strr[2:]) + else: + index_end_toc = i_strr + break + begin_pages = [0, index_toc] + for i_sbp, search_begin_page in enumerate(content[index_end_toc:]): + if search_begin_page in titles: + begin_pages.append(index_end_toc+i_sbp) + result_pdf.guard_page(content[0:begin_pages[1]]) + result_pdf.shaped_page() + result_pdf.summary(l_title) + recap_cpdb = [] + recap_ramp = [] + for line in recap: + recap_ramp.append(line[:2]) + recap_cpdb.append([line[0]]+line[2:-1]) + l_l_tables = [["NA"], ["NA", data_input], ["NA", chebi_hori], + ["NA", results_map_ramp, "NA", "NA", "NA", "NA", recap_ramp], + ["NA"], ["NA", recap_cpdb, "NA", "NA", results_map_cpdb], + ["NA"], ["NA"], ["NA"]] + l_l_images = [["NA"], ["NA"], ["NA"], ["NA"], + ["NA", l_visu[0], "NA", "NA", l_visu[1], "NA", l_visu[2]], + ["NA"], ["NA", "NA", l_visu[3], l_visu[4], "NA", "NA", + l_visu[5], "NA", "NA", "NA", "NA", "NA", l_visu[6]], + ["NA"], ["NA"]] + for p_index, begin in enumerate(begin_pages[2:-1]): + result_pdf.chapter_s_t_i(p_index+1, titles[p_index], + content[begin+1: begin_pages[p_index+3]], + l_l_tables[p_index], l_l_images[p_index]) + result_pdf.chapter_s(len(l_title), titles[-1], content[begin_pages[-1]+1:]) + result_pdf.shaped_page() + result_pdf.end_doc("C:\\Users\\mumec\\Desktop\\Mini_codes\\pdf_PyPDF2\\end_page_for_pdf.docx") + result_pdf.output(out_folder+"test_tab_sortie_pdf_mapping_results.pdf") + + +if __name__ == "__main__": + dinput = [["Current_name", "ID", "Cas", "Temoin"], + ["choline", "chebi:15554, hmdb:HMDB0000267", "0.41", "1.59"], + ["choline", "chebi:15554, hmdb:HMDB0000267", "0.41", "1.59"], + ["choline", "chebi:15554, hmdb:HMDB0000267", "0.41", "1.59"]] + + Chebi_optimal = [["Chebi ex", "is conjugate b of", "is conjugate acide of", + "is tautomer of", "is enantiomer of"], + ["Choline", "15354", "NA", "NA", "NA"], + ["Choline", "15354", "NA", "NA", "NA"], + ["Choline", "15354", "NA", "NA", "NA"]] + + reca = [["Name", "RAMP", "CPDB chebi", "CPDB hmdb"], + ["Choline", "chebi:15554", "NA", "15554"], + ["Ornithine", "NA", "15354", "HMDB0000097"]] + + r_m_r = [["path_Name", "path_source", "pathwayid", "inputid", "commoname"], + ["Choline", "wiki", "WP5552", "chebi:15554", "cho,choline"], + ["Choline", "wiki", "WP5552", "chebi:15554", "cho,choline"], + ["Choline", "wiki", "WP5552", "chebi:15554", "cho,choline"]] + + r_m_c = [["p-value", "q-value", "Pathways", "source", "pathwats links", + "menber input", "input overlap", "size", "effective_size", + "fesetID", "PMIDS", "URLCPDB", "common name mapped"], + ["2.2", "1.2", "Pathways", "wiki", "URL", "HMDB00056450", "5", + "219", "172", "142906", "NA", "URLCPDB", "[choline, ornithine]"], + ["2.2", "1.2", "Pathways", "wiki", "URL", "HMDB00056450", "5", + "219", "172", "142906", "NA", "URLCPDB", "[choline, ornithine]"]] + + FOLD_V = "C:\\Users\\mumec\\Desktop\\fichier_mis_en_forme_programme_total\\main\\" + list_visu = [FOLD_V+"Recouvrement moyen voies métaboliques 0.png", + FOLD_V+"RAMPbar_plot_of_metabolites.png", + FOLD_V+"RAMPmetabolites_bo_of_frequency.png", + FOLD_V+"CPDBbar_plot_of_recovery.png", + FOLD_V+"CPDBbar_plot_of_metabolites.png", + FOLD_V+"CPDBmetabolites_bo_of_frequency.png", + FOLD_V+"CPDBup_down_path_plot.png"] + FP = "Modele_de_pdf_feneratio_en_anglais_rev_19-02-2024.docx" + + out_pdf_mapping(FP, dinput, Chebi_optimal, reca, + r_m_r, r_m_c, FOLD_V, list_visu) + """ +if __name__ == "__main__": + unittest.main() \ No newline at end of file diff --git a/src/processing_mapping_results/Tests_unitaires/test_utils.py b/src/processing_mapping_results/Tests_unitaires/test_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..aa07b654fe0cf694d4c987c4791e7723e1fcb530 --- /dev/null +++ b/src/processing_mapping_results/Tests_unitaires/test_utils.py @@ -0,0 +1,61 @@ +import unittest +import pandas as pd +import os +import tempfile +import sys +sys.path.append('C:\\Users\\mumec\\Desktop\\Dossier_gitlab_local\\traitement_des_données') +from utils import * + +class Test_utils_without_file(unittest.TestCase): + + def test_comma_cleaning(self): + result_clean = comma_cleaning("ceci est un test, enfin, je crois") + self.assertIsInstance(result_clean, str) + self.assertEqual(result_clean, "ceci est un test_ enfin_ je crois") + + + def test_pre_cut(self): + result_cut = pre_cut(["NA", "hmdb:HMDB011011", "NA", "hmdb:HMDB0584"]) + self.assertListEqual(result_cut, ["NA", "HMDB011011", "NA", "HMDB0584"]) + + def test_cor_index(self): + result = cor_index(['d ', 'z', 'b'], ['z', 'b', 'd', 'f', 'h'],['a', 'c', 'e', 'g', 'i']) + self.assertListEqual(result, ['e', 'a', 'c']) + + +class Test_utils_with_file(unittest.TestCase): + + def test_excel_file_writer(self): + dataframe = pd.DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6]}) + name_out_file='test_default_sheetname.xlsx' + excel_file_writer(dataframe, name_out_file,sheetname="test_1") + self.assertTrue(os.path.exists(name_out_file)) + os.remove(name_out_file) + + + def test_column_recovery(self): + self.temp_file = tempfile.NamedTemporaryFile(delete=False, mode='w', suffix='.csv') + self.temp_file.write("1;it;Doe\n") + self.temp_file.write("2;is;Smith\n") + self.temp_file.write("3;good;Johnson\n") + self.temp_file.close() + result = column_recovery(self.temp_file.name, 1) + expected_result = ['it', 'is', 'good'] + os.remove(self.temp_file.name) + self.assertEqual(result, expected_result) + + + def test_excel_m_file_writer(self): + list_of_dataframe = [pd.DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6]}), pd.DataFrame({'X': ['a', 'b', 'c'], 'Y': ['d', 'e', 'f']})] + name_outfile = 'test_output.xlsx' + excel_m_file_writer(list_of_dataframe, name_outfile, ['Sheet1', 'Sheet2']) + self.assertTrue(os.path.exists(name_outfile)) + excel_file = pd.ExcelFile(name_outfile) + sheet_names = excel_file.sheet_names + self.assertEqual(len(sheet_names),2) + excel_file.close() + os.remove(name_outfile) + + +if __name__ == "__main__": + unittest.main() \ No newline at end of file diff --git a/src/processing_mapping_results/additional_ressources/DejaVuSansCondensed.ttf b/src/processing_mapping_results/additional_ressources/DejaVuSansCondensed.ttf new file mode 100644 index 0000000000000000000000000000000000000000..2f96594d6bd5260841d825f10b79fac9eaa78063 Binary files /dev/null and b/src/processing_mapping_results/additional_ressources/DejaVuSansCondensed.ttf differ diff --git a/src/processing_mapping_results/additional_ressources/Ensemble_des_logos_pdp.PNG b/src/processing_mapping_results/additional_ressources/Ensemble_des_logos_pdp.PNG new file mode 100644 index 0000000000000000000000000000000000000000..083a736a7f12cadc65ef30e97fd5052ae1099b58 Binary files /dev/null and b/src/processing_mapping_results/additional_ressources/Ensemble_des_logos_pdp.PNG differ diff --git a/src/processing_mapping_results/additional_ressources/Modele_de_pdf_feneratio_en_anglais_rev_19-02-2024.docx b/src/processing_mapping_results/additional_ressources/Modele_de_pdf_feneratio_en_anglais_rev_19-02-2024.docx new file mode 100644 index 0000000000000000000000000000000000000000..22d294246e0b3d48ff08bd7563e7b3b87e7c838d Binary files /dev/null and b/src/processing_mapping_results/additional_ressources/Modele_de_pdf_feneratio_en_anglais_rev_19-02-2024.docx differ diff --git a/src/processing_mapping_results/additional_ressources/bandeau_fin_doc.PNG b/src/processing_mapping_results/additional_ressources/bandeau_fin_doc.PNG new file mode 100644 index 0000000000000000000000000000000000000000..c5087d08c2e4ab10f079ea02278a72971cb2c133 Binary files /dev/null and b/src/processing_mapping_results/additional_ressources/bandeau_fin_doc.PNG differ diff --git a/src/processing_mapping_results/additional_ressources/carte_france_fin_doc.PNG b/src/processing_mapping_results/additional_ressources/carte_france_fin_doc.PNG new file mode 100644 index 0000000000000000000000000000000000000000..1ac768d549f989b5d251e34612b24200248b4645 Binary files /dev/null and b/src/processing_mapping_results/additional_ressources/carte_france_fin_doc.PNG differ diff --git a/src/processing_mapping_results/additional_ressources/end_page_for_pdf.docx b/src/processing_mapping_results/additional_ressources/end_page_for_pdf.docx new file mode 100644 index 0000000000000000000000000000000000000000..f214c382dda4b44fe3f38c73a36f1d683c52b8e9 Binary files /dev/null and b/src/processing_mapping_results/additional_ressources/end_page_for_pdf.docx differ diff --git a/src/processing_mapping_results/additional_ressources/logo_metabohub_metex.PNG b/src/processing_mapping_results/additional_ressources/logo_metabohub_metex.PNG new file mode 100644 index 0000000000000000000000000000000000000000..3799242a927d20c216abae0699c231354f4184b8 Binary files /dev/null and b/src/processing_mapping_results/additional_ressources/logo_metabohub_metex.PNG differ diff --git a/src/processing_mapping_results/chebi_id/recovery_of_associated_chebi_id.py b/src/processing_mapping_results/chebi_id/recovery_of_associated_chebi_id.py new file mode 100644 index 0000000000000000000000000000000000000000..73f759fa14aec366d70c4af20c28c18c7e0ba6fe --- /dev/null +++ b/src/processing_mapping_results/chebi_id/recovery_of_associated_chebi_id.py @@ -0,0 +1,472 @@ +""" +This module is designed to find the chebi saaociated. +""" +import urllib +import time +import sys +import pandas as pd +from bs4 import BeautifulSoup +sys.path.append('C:\\Users\\mumec\\Desktop\\Dossier_gitlab_local\\traitement_des_données') +from utils import column_recovery + + +def chebi_horizontal(file, outfile_ful_name, n=0, sep=";", flow=None): + """ + Put the culomn n of the file in list + + Arg: + file : A csv file to read + outfile_ful_name : xlsx file to write + sep : type of separator + + Returns: + Type of return: list + """ + t1 = time.time() + if flow is None: + l_chebi_to_test = column_recovery(file, n, sep=sep) + else: + l_chebi_to_test = file + l_equ_to_search = ["is conjugate base of", "is conjugate acid of", + "is tautomer of", "is enantiomer of"] + chebi_conj_b = ["is conjugate base of"] + same_b = ["Same as input Chebi'"] + chebi_conj_a = ["is conjugate acid of"] + same_a = ["Same as input Chebi'"] + chebi_taut_of = ["is tautomer of"] + chebi_enant_of = ["is enantiomer of"] + chebi_n = ["Name in database"] + all_chebi_asso = [] + for ac_chebi in l_chebi_to_test: + ac_chebi = ac_chebi.strip() + ac_chebi = ac_chebi.upper() + if ac_chebi == "NA": + chebi_n.append("NA") + chebi_conj_b.append("NA") + chebi_conj_a.append("NA") + chebi_taut_of.append("NA") + chebi_enant_of.append("NA") + same_a.append("NA") + same_b.append("NA") + all_chebi_asso.append(["NA", "NA", "NA", "NA"]) + else: + rl = ac_chebi + "&treeView=True#visualisation" + url = "https://www.ebi.ac.uk/chebi/searchId.do?chebiId=CHEBI:" + rl + soup = BeautifulSoup(urllib.request.urlopen(url).read().decode("utf-8"), "html.parser") + text = soup.get_text() + i_beg_p_name = text.find("ChEBI Name") + inter_name = text[i_beg_p_name: i_beg_p_name+300] + i_end_p_name = inter_name.find("ChEBI ID") + name_chebi = text[i_beg_p_name + 10: i_beg_p_name + i_end_p_name] + name_chebi = name_chebi.strip() + imp_index = [] + for info in l_equ_to_search: + imp_index.append(text.find(info)) + chebi_asso = [] + for indice in imp_index: + i_betw_d = text[indice:indice+220] + i_cheb = i_betw_d.find("CHEBI") + if i_cheb == -1: + chebi_asso.append("NA") + else: + if ac_chebi == i_betw_d[i_cheb:i_cheb+len(ac_chebi)]: + print(ac_chebi.strip(")")) + chebi_asso.append(ac_chebi.replace(")", "")) + else: + trans = i_betw_d[i_cheb:i_cheb+13] + trans = trans.replace(")", "").strip() + chebi_asso.append(trans) + chebi_n.append(name_chebi) + chebi_conj_b.append(chebi_asso[0]) + chebi_conj_a.append(chebi_asso[1]) + chebi_taut_of.append(chebi_asso[2]) + chebi_enant_of.append(chebi_asso[3]) + if ac_chebi in chebi_asso[0]: + same_b.append("oui") + else: + same_b.append("non") + if ac_chebi in chebi_asso[1]: + same_a.append("oui") + else: + same_a.append("non") + all_chebi_asso.append(chebi_asso) + l_chebi_to_test.insert(0, "Chebi exacte") + resultats = [chebi_n, l_chebi_to_test, chebi_conj_b, same_b, chebi_conj_a, + same_a, chebi_taut_of, chebi_enant_of] + df_exp = pd.DataFrame(resultats) + excel_file = pd.ExcelWriter(outfile_ful_name) # pylint: disable=abstract-class-instantiated + df_exp.to_excel(excel_file, index=False, header=False) + excel_file.close() + t2 = time.time() + time_of_running = t2-t1 + print("le temps de récupération des chebi horizontaux: ", time_of_running) + return all_chebi_asso + + +def chebi_in_outgouing(file, n, outfill_ful_name, sep=";"): + """ + Put the culomn n of the file in list + + Arg: + file : A csv file to read + outfill_ful_name : xlsx file to write + sep : type of separator + + Returns: + Type of return: list + """ + alllinks = ["is a", "has part", "is conjugate base of", + "is conjugate acid of", "is tautomer of", "is enantiomer of", + "has functional parent", "has parent hydride", + "is substituent group from", "has role"] + t1 = time.time() + l_chebi_to_test = column_recovery(file, n, sep=sep) + l_equ_to_search = ["Outgoing", "Incoming"] + + outgoing_ia = ["Outgoing : is a"] + outgoing_hp = ["Outgoing : has part"] + outgoing_bo = ["Outgoing : is conjugate base of"] + outgoing_ao = ["Outgoing : is conjugate acid of"] + outgoing_to = ["Outgoing : is tautomer of"] + outgoing_eo = ["Outgoing : is enantiomer of"] + outgoing_fp = ["Outgoing : has functional parent"] + outgoing_ph = ["Outgoing : has parent hydride"] + outgoing_sgf = ["Outgoing : is substituent group from"] + outgoing_hr = ["Outgoing : has role"] + l_outgoing = [outgoing_ia, outgoing_hp, outgoing_bo, outgoing_ao, + outgoing_to, outgoing_eo, outgoing_fp, outgoing_ph, + outgoing_sgf, outgoing_hr] + + incoming_ia = ["Incoming : is a"] + incoming_hp = ["Incoming : has part"] + incoming_bo = ["Incoming : is conjugate base of"] + incoming_ao = ["Incoming : is conjugate acid of"] + incoming_to = ["Incoming : is tautomer of"] + incoming_eo = ["Incoming : is enantiomer of"] + incoming_fp = ["Incoming : has functional parent"] + incoming_ph = ["Incoming : has parent hydride"] + incoming_sgf = ["Incoming : is substituent group from"] + incoming_hr = ["Incoming : has role"] + l_incoming = [incoming_ia, incoming_hp, incoming_bo, incoming_ao, + incoming_to, incoming_eo, incoming_fp, incoming_ph, + incoming_sgf, incoming_hr] + + outgoing_ia_c = ["Outgoing : is a (commentaire)"] + outgoing_hp_c = ["Outgoing : has part (commentaire)"] + outgoing_bo_c = ["Outgoing : is conjugate base of (commentaire)"] + outgoing_ao_c = ["Outgoing : is conjugate acid of (commentaire)"] + outgoing_to_c = ["Outgoing : is tautomer of (commentaire)"] + outgoing_eo_c = ["Outgoing : is enantiomer of (commentaire)"] + outgoing_fp_c = ["Outgoing : has functional parent (commentaire)"] + outgoing_ph_c = ["Outgoing : has parent hydride (commentaire)"] + outgoing_sgf_c = ["Outgoing : is substituent group from (commentaire)"] + outgoing_hr_c = ["Outgoing : has role (commentaire)"] + l_outgoing_c = [outgoing_ia_c, outgoing_hp_c, outgoing_bo_c, outgoing_ao_c, + outgoing_to_c, outgoing_eo_c, outgoing_fp_c, outgoing_ph_c, + outgoing_sgf_c, outgoing_hr_c] + + incoming_ia_c = ["Incoming : is a (commentaire)"] + incoming_hp_c = ["Incoming : has part (commentaire)"] + incoming_bo_c = ["Incoming : is conjugate base of (commentaire)"] + incoming_ao_c = ["Incoming : is conjugate acid of (commentaire)"] + incoming_to_c = ["Incoming : is tautomer of (commentaire)"] + incoming_eo_c = ["Incoming : is enantiomer of (commentaire)"] + incoming_fp_c = ["Incoming : has functional parent (commentaire)"] + incoming_ph_c = ["Incoming : has parent hydride (commentaire)"] + incoming_sgf_c = ["Incoming : is substituent group from (commentaire)"] + incoming_hr_c = ["Incoming : has role (commentaire)"] + l_incoming_c = [incoming_ia_c, incoming_hp_c, incoming_bo_c, incoming_ao_c, + incoming_to_c, incoming_eo_c, incoming_fp_c, incoming_ph_c, + incoming_sgf_c, incoming_hr_c] + + outgoing = ["Outgoing"] + incoming = ["Incoming"] + chebi_n = ["Chebi exacte Name"] + l_oic = [outgoing, incoming, chebi_n] + + for ac_chebi in l_chebi_to_test: + ac_chebi = ac_chebi.strip() + ac_chebi = ac_chebi.upper() + if ac_chebi == "NA": + for out_now in l_outgoing: + out_now.append("NA") + for in_now in l_incoming: + in_now.append("NA") + for oic in l_oic: + oic.append("NA") + else: + url = "https://www.ebi.ac.uk/chebi/searchId.do?chebiId="+ac_chebi + soup = BeautifulSoup(urllib.request.urlopen(url).read().decode("utf-8"), "html.parser") + text = soup.get_text() + i_beg_p_name = text.find("ChEBI Name") + inter_name = text[i_beg_p_name: i_beg_p_name+300] + i_end_p_name = inter_name.find("ChEBI ID") + name_chebi = text[i_beg_p_name+10: i_beg_p_name + i_end_p_name] + name_chebi = name_chebi.strip() + imp_index = [] + for info in l_equ_to_search: + imp_index.append(text.find(info)) + imp_index.append(text.find("IUPAC Name")) + + all_cor_out_ia = [] + all_cor_out_hp = [] + all_cor_out_bo = [] + all_cor_out_ao = [] + all_cor_out_to = [] + all_cor_out_eo = [] + all_cor_out_fp = [] + all_cor_out_ph = [] + all_cor_out_sgf = [] + all_cor_out_hr = [] + + l_all_cor_out = [all_cor_out_ia, all_cor_out_hp, + all_cor_out_bo, all_cor_out_ao, + all_cor_out_to, all_cor_out_eo, + all_cor_out_fp, all_cor_out_ph, + all_cor_out_sgf, all_cor_out_hr] + + all_cor_in_ia = [] + all_cor_in_hp = [] + all_cor_in_bo = [] + all_cor_in_ao = [] + all_cor_in_to = [] + all_cor_in_eo = [] + all_cor_in_fp = [] + all_cor_in_ph = [] + all_cor_in_sgf = [] + all_cor_in_hr = [] + + l_all_c_i = [all_cor_in_ia, all_cor_in_hp, + all_cor_in_bo, all_cor_in_ao, + all_cor_in_to, all_cor_in_eo, + all_cor_in_fp, all_cor_in_ph, + all_cor_in_sgf, all_cor_in_hr] + + all_cor_out_ia_c = [] + all_cor_out_hp_c = [] + all_cor_out_bo_c = [] + all_cor_out_ao_c = [] + all_cor_out_to_c = [] + all_cor_out_eo_c = [] + all_cor_out_fp_c = [] + all_cor_out_ph_c = [] + all_cor_out_sgf_c = [] + all_cor_out_hr_c = [] + + l_all_cor_out_c = [all_cor_out_ia_c, all_cor_out_hp_c, + all_cor_out_bo_c, all_cor_out_ao_c, + all_cor_out_to_c, all_cor_out_eo_c, + all_cor_out_fp_c, all_cor_out_ph_c, + all_cor_out_sgf_c, all_cor_out_hr_c] + + all_cor_in_ia_c = [] + all_cor_in_hp_c = [] + all_cor_in_bo_c = [] + all_cor_in_ao_c = [] + all_cor_in_to_c = [] + all_cor_in_eo_c = [] + all_cor_in_fp_c = [] + all_cor_in_ph_c = [] + all_cor_in_sgf_c = [] + all_cor_in_hr_c = [] + all_cor_out = [] + all_cor_in = [] + + l_all_c_i_c = [all_cor_in_ia_c, all_cor_in_hp_c, + all_cor_in_bo_c, all_cor_in_ao_c, + all_cor_in_to_c, all_cor_in_eo_c, + all_cor_in_fp_c, all_cor_in_ph_c, + all_cor_in_sgf_c, all_cor_in_hr_c] + + for posi_index in range(2): + indice = imp_index[posi_index] + i_betw_d = text[indice:imp_index[posi_index+1]] + for link in alllinks: + all_posi_of_this_link = [] + len_link = len(link) + for posi in range(len(i_betw_d)-len_link): + if i_betw_d[posi: posi+len_link] == link: + all_posi_of_this_link.append(posi) + if len(all_posi_of_this_link) != 0: + if posi_index == 0: + for posi in all_posi_of_this_link: + for s_pos_fin in range(len(i_betw_d[posi:])-len(name_chebi)): + if i_betw_d[posi+s_pos_fin] == "(": + ind_beg = s_pos_fin + 1 + if i_betw_d[posi+s_pos_fin] == ")" and ind_beg+7 < s_pos_fin < ind_beg+15: + ind_end = s_pos_fin + break + results = i_betw_d[posi+ind_beg:posi+ind_end] + l_all_cor_out[alllinks.index(link)].append(results) + for po_c in all_posi_of_this_link: + for s_pf_c in range(len(i_betw_d[po_c:])-len(name_chebi)): + if i_betw_d[po_c+s_pf_c: po_c+s_pf_c+5] == "CHEBI": + ind_end_c = s_pf_c + break + rtcc = i_betw_d[po_c:po_c+ind_end_c+13] + results_nc = (rtcc.replace("\n", "")).replace(" ", "") + results_c = results_nc.replace(",", "_") + l_all_cor_out_c[alllinks.index(link)].append(results_c) + for posi2 in all_posi_of_this_link: + for s_pos_fin in range(len(i_betw_d[posi2:])-len(name_chebi)): + if i_betw_d[posi2+s_pos_fin: posi2+s_pos_fin+5] == "CHEBI": + ind_end = s_pos_fin + break + result_tc = i_betw_d[posi2:posi2+ind_end+13] + results = ((result_tc.replace("\n", "")).replace(" ", "")).replace(",", "_") + all_cor_out.append(results) + if posi_index == 1: + for posi in all_posi_of_this_link: + s_pos_beg = 0 + for iterator in range(len(i_betw_d[posi:])-len(name_chebi)): + if i_betw_d[posi - 1 - iterator] == ")": + ind_beg = - 1 - iterator + if i_betw_d[posi - 1 - iterator] == "(": + ind_end = - iterator + break + results = i_betw_d[posi+ind_end:posi+ind_beg] + l_all_c_i[alllinks.index(link)].append(results) + + for po_c1 in all_posi_of_this_link: + for s_pf_c1 in range(len(i_betw_d[po_c1:])-len(name_chebi)): + if i_betw_d[po_c1+s_pf_c1: po_c1+s_pf_c1+5] == "CHEBI": + ind_end_c1 = s_pf_c1 + break + for iterator_c1 in range(len(i_betw_d[po_c1:])-len(name_chebi)): + if i_betw_d[po_c1 - iterator_c1 - 1: po_c1 - iterator_c1 - 1 + len(ac_chebi)] == ac_chebi: + if po_c1 - iterator_c1 - 1 + len(ac_chebi) + 5 < 0: + ind_beg_c1 = -po_c1 + else: + ind_beg_c1 = 5 - 1 - iterator_c1 + len(ac_chebi) + break + result_tc_c1 = i_betw_d[po_c1+ind_beg_c1:po_c1+ind_end_c1+13] + results_c1 = ((result_tc_c1.replace("\n", "")).replace(" ", "")).replace(",", "_") + l_all_c_i_c[alllinks.index(link)].append(results_c1) + + for posi3 in all_posi_of_this_link: + for s_pos_fin in range(len(i_betw_d[posi3:])-len(name_chebi)): + if i_betw_d[posi3+s_pos_fin: posi3+s_pos_fin+5] == "CHEBI": + ind_end = s_pos_fin + break + s_pos_beg = 0 + for iterator in range(len(i_betw_d[posi3:])-len(name_chebi)): + s_pos_beg -= 1 + if i_betw_d[posi3+s_pos_beg: posi3+s_pos_beg+len(ac_chebi)] == ac_chebi: + if posi3+s_pos_beg+len(ac_chebi)+5 < 0: + ind_beg = -posi3 + else: + ind_beg = s_pos_beg+len(ac_chebi)+5 + break + result_tc = i_betw_d[posi+ind_beg:posi+ind_end+13] + results = ((result_tc.replace("\n", "")).replace(" ", "")).replace(",", "_") + all_cor_in.append(results) + + all_l_temp = [all_cor_out_ia, all_cor_out_hp, all_cor_out_bo, + all_cor_out_ao, all_cor_out_to, all_cor_out_eo, + all_cor_out_fp, all_cor_out_ph, all_cor_out_sgf, + all_cor_out_hr, all_cor_in_ia, all_cor_in_hp, + all_cor_in_bo, all_cor_in_ao, all_cor_in_to, + all_cor_in_eo, all_cor_in_fp, all_cor_in_ph, + all_cor_in_sgf, all_cor_in_hr, all_cor_out_ia_c, + all_cor_out_hp_c, all_cor_out_bo_c, all_cor_out_ao_c, + all_cor_out_to_c, all_cor_out_eo_c, all_cor_out_fp_c, + all_cor_out_ph_c, all_cor_out_sgf_c, + all_cor_out_hr_c, all_cor_in_ia_c, all_cor_in_hp_c, + all_cor_in_bo_c, all_cor_in_ao_c, all_cor_in_to_c, + all_cor_in_eo_c, all_cor_in_fp_c, all_cor_in_ph_c, + all_cor_in_sgf_c, all_cor_in_hr_c] + for i_verif, verif in enumerate(all_l_temp): + if verif == []: + all_l_temp[i_verif].append("NA") + outgoing.append(all_cor_out) + incoming.append(all_cor_in) + chebi_n.append(ac_chebi) + + tot_try = l_outgoing + l_incoming + l_outgoing_c + l_incoming_c + for i_alt, a_l_t in enumerate(all_l_temp): + tot_try[i_alt].append(a_l_t) + + all_l_temp.insert(0, all_cor_out) + all_l_temp.insert(0, all_cor_in) + + total = [chebi_n, outgoing, outgoing_ia, outgoing_hp, outgoing_bo, + outgoing_ao, outgoing_to, outgoing_eo, outgoing_fp, outgoing_ph, + outgoing_sgf, outgoing_hr, incoming, incoming_ia, incoming_hp, + incoming_bo, incoming_ao, incoming_to, incoming_eo, incoming_fp, + incoming_ph, incoming_sgf, incoming_hr] + + r_out_ia = [chebi_n, outgoing_ia, outgoing_ia_c] + r_out_hp = [chebi_n, outgoing_hp, outgoing_hp_c] + r_out_bo = [chebi_n, outgoing_bo, outgoing_bo_c] + r_out_ao = [chebi_n, outgoing_ao, outgoing_ao_c] + r_out_to = [chebi_n, outgoing_to, outgoing_to_c] + r_out_eo = [chebi_n, outgoing_eo, outgoing_eo_c] + r_out_fp = [chebi_n, outgoing_fp, outgoing_fp_c] + r_out_ph = [chebi_n, outgoing_fp, outgoing_fp_c] + r_out_sgf = [chebi_n, outgoing_sgf, outgoing_sgf_c] + r_out_hr = [chebi_n, outgoing_hr, outgoing_hr_c] + + r_in_ia = [chebi_n, incoming_ia, incoming_ia_c] + r_in_hp = [chebi_n, incoming_hp, incoming_hp_c] + r_in_bo = [chebi_n, incoming_bo, incoming_bo_c] + r_in_ao = [chebi_n, incoming_ao, incoming_ao_c] + r_in_to = [chebi_n, incoming_to, incoming_to_c] + r_in_eo = [chebi_n, incoming_eo, incoming_eo_c] + r_in_fp = [chebi_n, incoming_fp, incoming_fp_c] + r_in_ph = [chebi_n, incoming_fp, incoming_fp_c] + r_in_sgf = [chebi_n, incoming_sgf, incoming_sgf_c] + r_in_hr = [chebi_n, incoming_hr, incoming_hr_c] + + df_out_ia = pd.DataFrame(r_out_ia) + df_out_hp = pd.DataFrame(r_out_hp) + df_out_bo = pd.DataFrame(r_out_bo) + df_out_ao = pd.DataFrame(r_out_ao) + df_out_to = pd.DataFrame(r_out_to) + df_out_eo = pd.DataFrame(r_out_eo) + df_out_fp = pd.DataFrame(r_out_fp) + df_out_ph = pd.DataFrame(r_out_ph) + df_out_substituent_group = pd.DataFrame(r_out_sgf) + df_out_hr = pd.DataFrame(r_out_hr) + + df_in_ia = pd.DataFrame(r_in_ia) + df_in_hp = pd.DataFrame(r_in_hp) + df_in_bo = pd.DataFrame(r_in_bo) + df_in_ao = pd.DataFrame(r_in_ao) + df_in_to = pd.DataFrame(r_in_to) + df_in_eo = pd.DataFrame(r_in_eo) + df_in_fp = pd.DataFrame(r_in_fp) + df_in_ph = pd.DataFrame(r_in_ph) + df_isg = pd.DataFrame(r_in_sgf) + df_ihr = pd.DataFrame(r_in_hr) + df_total = pd.DataFrame(total) + + all_df = [df_total, df_out_ia, df_out_hp, df_out_bo, df_out_ao, df_out_to, + df_out_eo, df_out_fp, df_out_ph, df_out_substituent_group, + df_out_hr, df_in_ia, df_in_hp, df_in_bo, df_in_ao, df_in_to, + df_in_eo, df_in_fp, df_in_ph, df_isg, df_ihr] + + sheets_name = ['Total', 'Out is a', 'Out has part', 'Out base of', + 'Out acid of', 'Out tautomer of', 'Out enantiomer of', + 'Out functional parent', 'Out parent hybride', + 'Out subtituent group', 'Out has role', 'In is a', + 'In has part', 'In base of', 'In acid of', 'In tautomer of', + 'In enantiomer of', 'In functional parent', + 'In parent hybride', 'In subtituent group', 'In has role'] + + excel_file = pd.ExcelWriter(outfill_ful_name) # pylint: disable=abstract-class-instantiated + for i_l_s, l_sheet in enumerate(all_df): + l_sheet.to_excel(excel_file, sheet_name=sheets_name[i_l_s], index=False, header=False) + excel_file.close() + t2 = time.time() + time_of_running = t2-t1 + print(time_of_running) + + +if __name__ == "__main__": + LOCAL = "C:\\Users\\mumec\\Desktop\\fichier_mis_en_forme_programme_total\\recovery_asso_chebi" + OUTF1 = LOCAL + "outgoing_test.xlsx" + OUTF2 = LOCAL + "horizontal_test.xlsx" + INF = LOCAL + "l_chebi_to_change_oeil_for_test.csv" + COL = [1, 3] + chebi_in_outgouing(INF, 0, OUTF1) + print(chebi_horizontal(INF, OUTF2)) diff --git a/src/processing_mapping_results/chebi_id/recovery_one_type_id.py b/src/processing_mapping_results/chebi_id/recovery_one_type_id.py new file mode 100644 index 0000000000000000000000000000000000000000..18212d725f20514cf5eb5a42fde72af1c56d9373 --- /dev/null +++ b/src/processing_mapping_results/chebi_id/recovery_one_type_id.py @@ -0,0 +1,38 @@ +""" +Find all same ID +""" +import pandas as pd + + +def recovery_same_id(input_file, outfill_ful_name, id_search, nb_col_d): + """ + Put the culomn n of the file in list + + Arg: + input_file : A csv file to read + id_search : shape of id to search + outfill_ful_name : file to write + nb_col_d : numbers of columns of the file + + Returns: + Type of return: xlsx file + """ + tot_file = pd.read_excel(input_file, header=None) + enter_tab = tot_file.to_numpy(dtype='str') + all_out_data = [] + for ite_lines in range(len(enter_tab)): + id_lines = [] + for ite_col in range(len(enter_tab[0])): + if ite_col < nb_col_d: + id_lines.append(enter_tab[ite_lines, ite_col]) + elif id_search in enter_tab[ite_lines, ite_col]: + if id_search == "hmdb:": + if len(enter_tab[ite_lines, ite_col].strip()) == 16: + id_lines.append(enter_tab[ite_lines, ite_col].strip()) + else: + id_lines.append(enter_tab[ite_lines, ite_col].strip()) + all_out_data.append(id_lines) + df_sortie = pd.DataFrame(all_out_data) + excel_file = pd.ExcelWriter(outfill_ful_name) # pylint: disable=abstract-class-instantiated + df_sortie.to_excel(excel_file) + excel_file.close() diff --git a/src/processing_mapping_results/chebi_id/shapping_id.py b/src/processing_mapping_results/chebi_id/shapping_id.py new file mode 100644 index 0000000000000000000000000000000000000000..9b91fe582da9994fe34946d632f099ba01bbf728 --- /dev/null +++ b/src/processing_mapping_results/chebi_id/shapping_id.py @@ -0,0 +1,119 @@ +""" +This module is designed to change ID for use others treatment +""" +import csv +import pandas as pd + + +def recup_col(file, n, sep=";", enc=None): + """ + Put the culomn n of the file in list + + Arg: + file : A csv file to read + n : the number of column to read + sep : type of separator + + Returns: + Type of return: list + """ + with open(file, "r", encoding=enc) as f: + r = csv.reader(f, delimiter=sep) + lines = list(r) + res = [] + if abs(n) < len(lines[0]): + for line in lines: + if line[n].strip() != '': + res.append(line[n].strip()) + return res + + +def prefix_cut(file, fout, num_colonnes, prefix): + """ + Give the list of ID without prefix + + Arg: + file = csv file of metabolites + fout = excel file to write + num_colonnes = list of numbers colonnes to change + prefix = list of prefix to remove for each column + + Returns: + Type of return: excel file + """ + total = [] + for i_pos, pos in range(num_colonnes): + clean_list = [] + actu_list = recup_col(file, pos) + if prefix[i_pos] != '': + pref = prefix[i_pos] + for item in actu_list: + clean_list.append(item.strip(pref)) + total.append(clean_list) + else: + total.append(actu_list) + l_out = pd.DataFrame(total, dtype=object) + e_f_o = pd.ExcelWriter(fout) # pylint: disable=abstract-class-instantiated + l_out.to_excel(e_f_o) + e_f_o.close() + + +def ajout_prefix(file, fout, numbers_col, prefix): + """ + Give a list of pathways with the correspondent metabolites names + + Arg: + file = csv file of metabolites + fout = excel file to write + numbers_col = list of numbers colonnes to change + prefix = prefix to add for each column + + Returns: + Type of return: excel file + """ + all_lists = [] + for i_numbers, numbers in range(numbers_col): + l_out = [] + liste_entree = recup_col(file, numbers) + pref = prefix[i_numbers] + if pref == '': + all_lists.append(liste_entree) + else: + for l_inp in liste_entree: + if l_inp in ("", "NA"): + l_out.append("NA") + else: + l_out.append(pref + l_inp) + all_lists.append(l_out) + + liste_final = pd.DataFrame(all_lists, dtype=object) + e_f_o = pd.ExcelWriter(fout) # pylint: disable=abstract-class-instantiated + liste_final.to_excel(e_f_o) + e_f_o.close() + + +def cut_hmdb(file, fout, n_col_tot, n_col_treat, n_numbers_keep): + """ + Give a list of pathways with the correspondent metabolites names + + Arg: + ma_mapping_result : MetaboAnalyst results of id mapping file .csv + number_of_columns : Number of colums in the file + + Returns: + Type of return: list + """ + total = [] + for pos in range(n_col_tot): + actu_list = recup_col(file, pos) + if pos == n_col_treat: + clean_list = [] + for item in actu_list: + clean_list.append(item[0:4] + item[len(item)-n_numbers_keep:]) + total.append(clean_list) + else: + total.append(actu_list) + l_out = pd.DataFrame(total, dtype=object) + e_f_o = pd.ExcelWriter(fout) # pylint: disable=abstract-class-instantiated + l_out.to_excel(e_f_o) + e_f_o.close() diff --git a/complete_processing_of_mapping_results.py b/src/processing_mapping_results/complete_processing_of_mapping_results.py similarity index 100% rename from complete_processing_of_mapping_results.py rename to src/processing_mapping_results/complete_processing_of_mapping_results.py diff --git a/src/processing_mapping_results/conf.py b/src/processing_mapping_results/conf.py new file mode 100644 index 0000000000000000000000000000000000000000..a7348efaef4995d2a90f5258f754771172e04dac --- /dev/null +++ b/src/processing_mapping_results/conf.py @@ -0,0 +1,27 @@ +# Configuration file for the Sphinx documentation builder. +# +# For the full list of built-in configuration values, see the documentation: +# https://www.sphinx-doc.org/en/master/usage/configuration.html + +# -- Project information ----------------------------------------------------- +# https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information + +project = 'Metabolite mapping' +copyright = '2024, UMEC Mathieu' +author = 'UMEC Mathieu' + +# -- General configuration --------------------------------------------------- +# https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration + +extensions = [] + +templates_path = ['_templates'] +exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store'] + + + +# -- Options for HTML output ------------------------------------------------- +# https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output + +html_theme = 'sphinx_rtd_theme' +html_static_path = ['_static'] diff --git a/Id_matcher_correspondence_of_biosource_and_dataset.py b/src/processing_mapping_results/id_matcher_correspondence_of_biosource_and_dataset.py similarity index 100% rename from Id_matcher_correspondence_of_biosource_and_dataset.py rename to src/processing_mapping_results/id_matcher_correspondence_of_biosource_and_dataset.py diff --git a/Id_matcher_frequency_and_pathways_recovery.py b/src/processing_mapping_results/id_matcher_frequency_and_pathways_recovery.py similarity index 100% rename from Id_matcher_frequency_and_pathways_recovery.py rename to src/processing_mapping_results/id_matcher_frequency_and_pathways_recovery.py diff --git a/Mapping_using_the_API.py b/src/processing_mapping_results/mapping_using_api.py similarity index 100% rename from Mapping_using_the_API.py rename to src/processing_mapping_results/mapping_using_api.py diff --git a/network_visualization.py b/src/processing_mapping_results/network_visualization.py similarity index 100% rename from network_visualization.py rename to src/processing_mapping_results/network_visualization.py diff --git a/src/processing_mapping_results/pdf_generation_toolbox.py b/src/processing_mapping_results/pdf_generation_toolbox.py new file mode 100644 index 0000000000000000000000000000000000000000..d18f30e330e2f9ba8dbedafc959644f2eddf314d --- /dev/null +++ b/src/processing_mapping_results/pdf_generation_toolbox.py @@ -0,0 +1,319 @@ +""" +this module is design to create pdf of results mapping +""" +from fpdf import FPDF +from docx import Document + + +class PDF(FPDF): + """ + Tools for creating a PDFs from scratch with the FPDF library + """ + + def footer(self): + """ + Set footer. + """ + self.set_y(-15) + self.set_font('DejaVu', 'I', 14) + self.cell(0, 10, f"{self.page_no()}/{{nb}}", align="C") + + def chapter_title(self, num, label): + """ + Write the chapter title. + + Parameters: + num = chapter number. + label = chapter name. + """ + self.set_font('DejaVu', 'B', 18) + self.cell(0, 6, f"{num} - {label}", new_x="LMARGIN", new_y="NEXT") + self.ln(4) + + def chapter_body(self, txt): + """ + Write the chapter body. + + Parameters: + txt = the character string of the entire section body. + """ + with self.text_columns( + ncols=1, gutter=5, text_align="J", line_height=1.19 + ) as cols: + self.set_font('DejaVu', '', 12) + cols.write(txt) + cols.ln() + self.set_font(style="I") + + def summary(self, l_title_section): + """ + Write the summary. + + Parameters: + l_title_section = list of section titles. + """ + self.set_font('DejaVu', 'B', 18) + self.cell(0, 6, "Table of contents", new_x="LMARGIN", new_y="NEXT") + self.set_font('DejaVu', '', 12) + self.set_fill_color(255, 255, 255) + for title in l_title_section: + self.cell(0, 6, f"{title}", new_x="LMARGIN", new_y="NEXT") + self.ln(4) + + def tab(self, tabbleble): + """ + Write a basic table. + + Parameters: + tabbleble = table in list form. + """ + if len(tabbleble[0]) == 2: + self.set_font('DejaVu', 'I', 16) + else: + self.set_font('DejaVu', 'I', 56/len(tabbleble[0])) + with self.table(line_height=8, text_align="CENTER") as table: + for data_row in tabbleble: + row = table.row() + for datum in data_row: + row.cell(datum) + + def chapter_s(self, num, title, l_paraph): + """ + Write a chapter with more than 1 paragraph. + + Parameters: + num = chapter number. + title = chapter name. + l_paraph = list of string, 1 for each paragraph. + + """ + self.shaped_page() + self.chapter_title(num, title) + for str_l_paraph in l_paraph: + self.chapter_body(str_l_paraph) + + def chapter_s_t(self, num, title, l_str, l_tab): + """ + Write a chapter with more than 1 paragraph and at least 1 table. + + Parameters: + num = chapter number. + title = chapter name. + l_str = list of string, 1 for each paragraph. + l_tab = list of tab, 1 value for each paragraph. "NA" if not table. + """ + self.shaped_page() + self.chapter_title(num, title) + for i_paraf, paraf in enumerate(l_str): + self.chapter_body(paraf) + if i_paraf < len(l_tab) and l_tab[i_paraf] != "NA": + self.tab(l_tab[i_paraf]) + + def chapter_s_t_i(self, num, title, l_str, l_tab, l_image): + """ + Write a chapter with more than 1 paragraph and at least 1 table. + + Parameters: + num = chapter number. + title = chapter name. + l_str = list of string, 1 for each paragraph. + l_tab = list of table, 1 value for each paragraph. "NA" if not table. + l_image = list of image, 1 value for each paragraph. "NA" if no image. + """ + self.shaped_page() + self.chapter_title(num, title) + for i_paraf, paraf in enumerate(l_str): + if i_paraf < len(l_image) and l_image[i_paraf] != "NA": + taille = 200 + x = 105 - taille/2 + y = 70 + self.image(l_image[i_paraf], x, y, taille) + self.shaped_page() + with self.text_columns( + ncols=1, gutter=5, text_align="C", line_height=1.19, + ) as cols: + self.set_font('DejaVu', '', 12) + cols.write(paraf) + cols.ln() + if i_paraf < len(l_tab) and l_tab[i_paraf] != "NA": + self.tab(l_tab[i_paraf]) + else: + self.chapter_body(paraf) + self.set_font(style="I") + if i_paraf < len(l_tab) and l_tab[i_paraf] != "NA": + self.tab(l_tab[i_paraf]) + + def shaped_page(self): + """ + displays a page with the correct format. + """ + self.add_page() + self.image("logo_metabohub_metex.png", 175, 4, 33) # X, Y, taille + self.image("Ensemble_des_logos_pdp.png", 20, 275, 170) + self.ln(15) + + def guard_page(self, str_tw): + """ + displays the guard page. + + Parameters: + str_tw = list of string, 1 for each paragraph. + """ + self.shaped_page() + self.set_font('DejaVu', 'B', 15) + self.cell(30, 10, str_tw[0], align="C") + self.ln(20) + for n_str_tw in str_tw[1:]: + with self.text_columns( + ncols=1, gutter=5, text_align="C", line_height=1.19 + ) as cols: + self.set_font('DejaVu', '', 12) + cols.write(n_str_tw) + cols.ln() + + def end_doc(self, path_end_doc): + """ + displays the end of doc. + + Parameter: + path_end_doc = the path of the docx. file of end pages + """ + cont_end = read_docx(path_end_doc) + mh = "METABOHUB, the National infrastructure in metabolomics & fluxomics" + begin_2pages = cont_end.index(mh) + self.set_font('DejaVu', 'B', 18) + self.cell(0, 6, cont_end[0], new_x="LMARGIN", new_y="NEXT") + self.ln(4) + for str_end1 in cont_end[1:begin_2pages]: + self.chapter_body(str_end1) + self.shaped_page() + self.set_font('DejaVu', 'B', 16) + self.cell(0, 6, cont_end[begin_2pages], new_x="LMARGIN", new_y="NEXT") + self.ln(4) + for str_end1 in cont_end[begin_2pages+1:]: + self.chapter_body(str_end1) + pypdf_f = "C:\\Users\\mumec\\Desktop\\Mini_codes\\pdf_PyPDF2\\" + self.image(pypdf_f + "carte_france_fin_doc.png", 90, 100, 100) + self.image(pypdf_f + "bandeau_fin_doc.png", 15, 100, 60) + + +def read_docx(file_path): + """ + Recover text from a docx file. + + file_path = Path of the file to read. + """ + doc = Document(file_path) + full_text = [] + for para in doc.paragraphs: + full_text.append(para.text) + return full_text + + +def out_pdf_mapping(file_path, data_input, chebi_hori, recap, results_map_ramp, + results_map_cpdb, out_folder, l_visu): + """ + Recover text from a docx file. + + Parameters: + file_path = Path of the file of example. + data_input = tab of data + chebi_hori = tab of chebi aso to the id + recap = tab of recap id of mapping + results_map_ramp = tab of ramp results + results_map_cpdb = tab of cpdb results + out_folder = folder path where write pdf + l_visu = list of path of image to placed + + return: + one PDF of global mapping results. + """ + result_pdf = PDF() + result_pdf.add_font('DejaVu', 'I', 'DejaVuSansCondensed.ttf') + result_pdf.add_font('DejaVu', 'B', 'DejaVuSansCondensed.ttf') + result_pdf.add_font('DejaVu', '', 'DejaVuSansCondensed.ttf') + result_pdf.set_font('DejaVu', '', 14) + content = read_docx(file_path) + index_toc = content.index('Table of contents') + l_title = [] + titles = [] + for i_strr, strr in enumerate(content[index_toc+1:]): + if strr != '': + if strr[1] == ' ': + l_title.append(strr) + titles.append(strr[2:]) + else: + index_end_toc = i_strr + break + begin_pages = [0, index_toc] + for i_sbp, search_begin_page in enumerate(content[index_end_toc:]): + if search_begin_page in titles: + begin_pages.append(index_end_toc+i_sbp) + result_pdf.guard_page(content[0:begin_pages[1]]) + result_pdf.shaped_page() + result_pdf.summary(l_title) + recap_cpdb = [] + recap_ramp = [] + for line in recap: + recap_ramp.append(line[:2]) + recap_cpdb.append([line[0]]+line[2:-1]) + l_l_tables = [["NA"], ["NA", data_input], ["NA", chebi_hori], + ["NA", results_map_ramp, "NA", "NA", "NA", "NA", recap_ramp], + ["NA"], ["NA", recap_cpdb, "NA", "NA", results_map_cpdb], + ["NA"], ["NA"], ["NA"]] + l_l_images = [["NA"], ["NA"], ["NA"], ["NA"], + ["NA", l_visu[0], "NA", "NA", l_visu[1], "NA", l_visu[2]], + ["NA"], ["NA", "NA", l_visu[3], l_visu[4], "NA", "NA", + l_visu[5], "NA", "NA", "NA", "NA", "NA", l_visu[6]], + ["NA"], ["NA"]] + for p_index, begin in enumerate(begin_pages[2:-1]): + result_pdf.chapter_s_t_i(p_index+1, titles[p_index], + content[begin+1: begin_pages[p_index+3]], + l_l_tables[p_index], l_l_images[p_index]) + result_pdf.chapter_s(len(l_title), titles[-1], content[begin_pages[-1]+1:]) + result_pdf.shaped_page() + result_pdf.end_doc("C:\\Users\\mumec\\Desktop\\Mini_codes\\pdf_PyPDF2\\end_page_for_pdf.docx") + result_pdf.output(out_folder+"test_tab_sortie_pdf_mapping_results.pdf") + + +if __name__ == "__main__": + dinput = [["Current_name", "ID", "Cas", "Temoin"], + ["choline", "chebi:15554, hmdb:HMDB0000267", "0.41", "1.59"], + ["choline", "chebi:15554, hmdb:HMDB0000267", "0.41", "1.59"], + ["choline", "chebi:15554, hmdb:HMDB0000267", "0.41", "1.59"]] + + Chebi_optimal = [["Chebi ex", "is conjugate b of", "is conjugate acide of", + "is tautomer of", "is enantiomer of"], + ["Choline", "15354", "NA", "NA", "NA"], + ["Choline", "15354", "NA", "NA", "NA"], + ["Choline", "15354", "NA", "NA", "NA"]] + + reca = [["Name", "RAMP", "CPDB chebi", "CPDB hmdb"], + ["Choline", "chebi:15554", "NA", "15554"], + ["Ornithine", "NA", "15354", "HMDB0000097"]] + + r_m_r = [["path_Name", "path_source", "pathwayid", "inputid", "commoname"], + ["Choline", "wiki", "WP5552", "chebi:15554", "cho,choline"], + ["Choline", "wiki", "WP5552", "chebi:15554", "cho,choline"], + ["Choline", "wiki", "WP5552", "chebi:15554", "cho,choline"]] + + r_m_c = [["p-value", "q-value", "Pathways", "source", "pathwats links", + "menber input", "input overlap", "size", "effective_size", + "fesetID", "PMIDS", "URLCPDB", "common name mapped"], + ["2.2", "1.2", "Pathways", "wiki", "URL", "HMDB00056450", "5", + "219", "172", "142906", "NA", "URLCPDB", "[choline, ornithine]"], + ["2.2", "1.2", "Pathways", "wiki", "URL", "HMDB00056450", "5", + "219", "172", "142906", "NA", "URLCPDB", "[choline, ornithine]"]] + + FOLD_V = "C:\\Users\\mumec\\Desktop\\fichier_mis_en_forme_programme_total\\main\\" + list_visu = [FOLD_V+"Recouvrement moyen voies métaboliques 0.png", + FOLD_V+"RAMPbar_plot_of_metabolites.png", + FOLD_V+"RAMPmetabolites_bo_of_frequency.png", + FOLD_V+"CPDBbar_plot_of_recovery.png", + FOLD_V+"CPDBbar_plot_of_metabolites.png", + FOLD_V+"CPDBmetabolites_bo_of_frequency.png", + FOLD_V+"CPDBup_down_path_plot.png"] + FP = "Modele_de_pdf_feneratio_en_anglais_rev_19-02-2024.docx" + + out_pdf_mapping(FP, dinput, Chebi_optimal, reca, + r_m_r, r_m_c, FOLD_V, list_visu) diff --git a/Tesselation_de_voronoi_modif.ipynb b/src/processing_mapping_results/tesselation_de_voronoi_modif.ipynb similarity index 100% rename from Tesselation_de_voronoi_modif.ipynb rename to src/processing_mapping_results/tesselation_de_voronoi_modif.ipynb diff --git a/utilisation_metaboanalystr.R b/src/processing_mapping_results/utilisation_metaboanalystr.R similarity index 100% rename from utilisation_metaboanalystr.R rename to src/processing_mapping_results/utilisation_metaboanalystr.R diff --git a/utils.py b/src/processing_mapping_results/utils.py similarity index 100% rename from utils.py rename to src/processing_mapping_results/utils.py diff --git a/Visualisation_des_donnes_de_mapping.py b/src/processing_mapping_results/visu_datas_mapping.py similarity index 100% rename from Visualisation_des_donnes_de_mapping.py rename to src/processing_mapping_results/visu_datas_mapping.py