From 02099c63144217844325beb099ebc9ed84590526 Mon Sep 17 00:00:00 2001
From: Olivier Langella <Olivier.Langella@moulon.inra.fr>
Date: Fri, 10 Mar 2017 16:47:27 +0100
Subject: [PATCH] parsing xpip WIP

---
 src/CMakeLists.txt           |   3 +-
 src/input/xpipsaxhandler.cpp | 125 ++++++++++++++++++++++++++++++++---
 src/input/xpipsaxhandler.h   |  28 +++++---
 src/utils/peptidestore.cpp   |  52 +++++++++++++++
 src/utils/peptidestore.h     |  50 ++++++++++++++
 5 files changed, 237 insertions(+), 21 deletions(-)
 create mode 100644 src/utils/peptidestore.cpp
 create mode 100644 src/utils/peptidestore.h

diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 1db1f3d7..63f81e5d 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -53,13 +53,14 @@ configure_file (${CMAKE_SOURCE_DIR}/src/config.h.cmake ${CMAKE_SOURCE_DIR}/src/c
 
 # File list
 SET(CPP_FILES
-  utils/readspectrum.cpp
   core/project.cpp
   core/match.cpp
   core/identification_sources/identificationdatasource.cpp
   core/identification_sources/identificationxtandemfile.cpp
   files/xpipfile.cpp
   input/xpipsaxhandler.cpp
+  utils/peptidestore.cpp
+  utils/readspectrum.cpp
 )
 
 set(QTLIBS ${Qt5Xml_LIBRARIES} ${Qt5Gui_LIBRARIES}  ${Qt5Svg_LIBRARIES})
diff --git a/src/input/xpipsaxhandler.cpp b/src/input/xpipsaxhandler.cpp
index 520c02a7..5b797e3b 100644
--- a/src/input/xpipsaxhandler.cpp
+++ b/src/input/xpipsaxhandler.cpp
@@ -22,6 +22,7 @@
 
 #include "xpipsaxhandler.h"
 #include <pappsomspp/msrun/msrunid.h>
+#include <pappsomspp/exception/exceptionnotfound.h>
 
 XpipSaxHandler::XpipSaxHandler(Project * p_project):_p_project(p_project)
 {
@@ -49,10 +50,14 @@ bool XpipSaxHandler::startElement(const QString & namespaceURI, const QString &
         //<sample value="P6_08_10"/>
         else if (qName == "sample") {
             is_ok = startElement_sample(attributes);
+        } else if (qName == "peptide") {
+            is_ok = startElement_peptide(attributes);
+        } else if (qName == "modifs_mass") {
+            is_ok = startElement_modifs_mass(attributes);
         }
         _current_text.clear();
     }
-    catch (PappsoException exception_pappso) {
+    catch (pappso::PappsoException exception_pappso) {
         _errorStr = QObject::tr("ERROR in XpipSaxHandler::startElement tag %1, PAPPSO exception:\n%2").arg(qName).arg(exception_pappso.qwhat());
         return false;
     }
@@ -73,6 +78,9 @@ bool XpipSaxHandler::endElement(const QString & namespaceURI, const QString & lo
         {
             is_ok = endElement_protein();
         }
+        else if (qName == "peptide") {
+            is_ok = endElement_peptide();
+        }
         else if (qName == "sequence") {
             is_ok = endElement_sequence();
         }
@@ -81,7 +89,7 @@ bool XpipSaxHandler::endElement(const QString & namespaceURI, const QString & lo
         // else if ((_tag_stack.size() > 1) &&
         //         (_tag_stack[_tag_stack.size() - 2] == "detection_moulon"))
     }
-    catch (PappsoException exception_pappso) {
+    catch (pappso::PappsoException exception_pappso) {
         _errorStr = QObject::tr("ERROR in XpipSaxHandler::endElement tag %1, PAPPSO exception:\n%2").arg(qName).arg(exception_pappso.qwhat());
         return false;
     }
@@ -96,34 +104,78 @@ bool XpipSaxHandler::endElement(const QString & namespaceURI, const QString & lo
     return is_ok;
 }
 
+bool XpipSaxHandler::startElement_modifs_mass(QXmlAttributes attributes) {
+
+    /*
+    <modifs_list_mass><modifs_mass modvalue="-18.01056"/>
+    <modifs_mass modvalue="-17.02655"/>
+    <modifs_mass modvalue="15.99491"/>
+    <modifs_mass modvalue="42.01057"/>
+    <modifs_mass modvalue="42.01056"/>
+    <modifs_mass modvalue="57.02146"/>
+    </modifs_list_mass>
+    */
+    qDebug() << "startElement_modifs_mass ";
+    QString mass_str(attributes.value("modvalue").simplified());
+    pappso::mz mass = mass_str.toDouble();
+    
+    pappso::AaModificationP mod = getAaModificationP(mass);
+    
+    _map_massstr_aamod[mass_str] = mod;
+    qDebug() << "startElement_modifs_mass end" ;
+    return true;
+}
+
 //<sample value="P6_21_23"/>
 bool XpipSaxHandler::startElement_sample(QXmlAttributes attributes) {
 
     qDebug() << "startElement_sample ";
-    MsRunId ms_run;
+    pappso::MsRunId ms_run;
     ms_run.setXmlId(attributes.value("value").simplified());
     ms_run.setFilename(attributes.value("value").simplified());
-    
+
     _p_project->addMsRunIdSp(ms_run.makeMsRunIdSp());
     qDebug() << "startElement_sample end" ;
     return true;
 }
 
-/*
- * <protein peptide_number="268" evalue="-432.77353" URL="Genome_Z_mays_5a.fasta" description="GRMZM2G083841_P01 P04711 Phosphoenolpyruvate carboxylase 1 (PEPCase 1)(PEPC 1)(EC 4.1.1.31) seq=translation; coord=9:61296279..61301686:1; parent_transcript=GRMZM2G083841_T01; parent_gene=GRMZM2G083841">
-            <protein_evalue evalue="-399.36093" sample="20120906_balliau_extract_1_A02_urzb-1"/>
-            <protein_evalue evalue="-384.54382" sample="20120906_balliau_extract_1_A01_urnb-1"/>
-            <sequence>MASTKAPGPGEKHHSIDAQLRQLVPGKVSEDDKLIEYDALLVDRFLNILQDLHGPSLREFVQECYEVSADYEGKGDTTKLGELGAKLTGLAPADAILVASSILHMLNLANLAEEVQIAHRRRNSKLKKGGFADEGSATTESDIEETLKRLVSEVGKSPEEVFEALKNQTVDLVFTAHPTQSARRSLLQKNARIRNCLTQLNAKDITDDDKQELDEALQREIQAAFRTDEIRRAQPTPQDEMRYGMSYIHETVWKGVPKFLRRVDTALKNIGINERLPYNVSLIRFSSWMGGDRDGNPRVTPEVTRDVCLLARMMAANLYIDQIEELMFELSMWRCNDELRVRAEELHSSSGSKVTKYYIEFWKQIPPNEPYRVILGHVRDKLYNTRERARHLLASGVSEISAESSFTSIEEFLEPLELCYKSLCDCGDKAIADGSLLDLLRQVFTFGLSLVKLDIRQESERHTDVIDAITTHLGIGSYREWPEDKRQEWLLSELRGKRPLLPPDLPQTDEIADVIGAFHVLAELPPDSFGPYIISMATAPSDVLAVELLQRECGVRQPLPVVPLFERLADLQSAPASVERLFSVDWYMDRIKGKQQVMVGYSDSGKDAGRLSAAWQLYRAQEEMAQVAKRYGVKLTLFHGRGGTVGRGGGPTHLAILSQPPDTINGSIRVTVQGEVIEFCFGEEHLCFQTLQRFTAATLEHGMHPPVSPKPEWRKLMDEMAVVATEEYRSVVVKEARFVEYFRSATPETEYGRMNIGSRPAKRRPGGGITTLRAIPWIFSWTQTRFHLPVWLGVGAAFKFAIDKDVRNFQVLKEMYNEWPFFRVTLDLLEMVFAKGDPGIAGLYDELLVAEELKPFGKQLRDKYVETQQLLLQIAGHKDILEGDPFLKQGLVLRNPYITTLNVFQAYTLKRIRDPNFKVTPQPPLSKEFADENKPAGLVKLNPASEYPPGLEDTLILTMKGIAAGMQNTG</sequence>
-          </protein>
-          */
 bool XpipSaxHandler::startElement_protein(QXmlAttributes attributes) {
 
+    /*
+     * <protein peptide_number="268" evalue="-432.77353" URL="Genome_Z_mays_5a.fasta" description="GRMZM2G083841_P01 P04711 Phosphoenolpyruvate carboxylase 1 (PEPCase 1)(PEPC 1)(EC 4.1.1.31) seq=translation; coord=9:61296279..61301686:1; parent_transcript=GRMZM2G083841_T01; parent_gene=GRMZM2G083841">
+                <protein_evalue evalue="-399.36093" sample="20120906_balliau_extract_1_A02_urzb-1"/>
+                <protein_evalue evalue="-384.54382" sample="20120906_balliau_extract_1_A01_urnb-1"/>
+                <sequence>MASTKAPGPGEKHHSIDAQLRQLVPGKVSEDDKLIEYDALLVDRFLNILQDLHGPSLREFVQECYEVSADYEGKGDTTKLGELGAKLTGLAPADAILVASSILHMLNLANLAEEVQIAHRRRNSKLKKGGFADEGSATTESDIEETLKRLVSEVGKSPEEVFEALKNQTVDLVFTAHPTQSARRSLLQKNARIRNCLTQLNAKDITDDDKQELDEALQREIQAAFRTDEIRRAQPTPQDEMRYGMSYIHETVWKGVPKFLRRVDTALKNIGINERLPYNVSLIRFSSWMGGDRDGNPRVTPEVTRDVCLLARMMAANLYIDQIEELMFELSMWRCNDELRVRAEELHSSSGSKVTKYYIEFWKQIPPNEPYRVILGHVRDKLYNTRERARHLLASGVSEISAESSFTSIEEFLEPLELCYKSLCDCGDKAIADGSLLDLLRQVFTFGLSLVKLDIRQESERHTDVIDAITTHLGIGSYREWPEDKRQEWLLSELRGKRPLLPPDLPQTDEIADVIGAFHVLAELPPDSFGPYIISMATAPSDVLAVELLQRECGVRQPLPVVPLFERLADLQSAPASVERLFSVDWYMDRIKGKQQVMVGYSDSGKDAGRLSAAWQLYRAQEEMAQVAKRYGVKLTLFHGRGGTVGRGGGPTHLAILSQPPDTINGSIRVTVQGEVIEFCFGEEHLCFQTLQRFTAATLEHGMHPPVSPKPEWRKLMDEMAVVATEEYRSVVVKEARFVEYFRSATPETEYGRMNIGSRPAKRRPGGGITTLRAIPWIFSWTQTRFHLPVWLGVGAAFKFAIDKDVRNFQVLKEMYNEWPFFRVTLDLLEMVFAKGDPGIAGLYDELLVAEELKPFGKQLRDKYVETQQLLLQIAGHKDILEGDPFLKQGLVLRNPYITTLNVFQAYTLKRIRDPNFKVTPQPPLSKEFADENKPAGLVKLNPASEYPPGLEDTLILTMKGIAAGMQNTG</sequence>
+              </protein>
+              */
     qDebug() << "startElement_protein ";
     _current_protein.setDescription(attributes.value("description").simplified());
     _current_protein.setAccession(_current_protein.getDescription().split(" ").at(0));
     qDebug() << "startElement_protein end" ;
     return true;
 }
+bool XpipSaxHandler::startElement_peptide(QXmlAttributes attributes) {
+
+//<peptide sample="20120208_Blein_rep4_1_B03_DW21-4-26-328"
+    //sample_file="/gorgone/pappso/moulon/users/Melisande/test-param-masschroq/20120208_Blein_rep4_1_B03_DW21-4-26-328.xml"
+    //scan="2589" scan_in_xtandem="2589" RT="603" mhplus_obser="873.5401" mhplus_theo="873.5408" deltamass="-7.0E-4"
+    //sequence="IATAIEKK" pre="NPAR" post="AADA" start="331" stop="338" charge="2" evalue="9.2E-4" hypercorr="35.2" validate="true">
+//<modifs></modifs></peptide>
+
+    //<modifs><modif aa="M" modvalue="15.99491" posi="17" posi_in_prot="49"/>
+//</modifs>
+    qDebug() << "startElement_peptide ";
+    _current_peptide_sp = pappso::Peptide(attributes.value("sequence").simplified()).makePeptideSp();
+    qDebug() << "startElement_peptide end" ;
+    return true;
+}
+
+bool XpipSaxHandler::endElement_peptide() {
+    qDebug() << "endElement_peptide ";
+
+    _current_peptide_sp = peptide_store.getInstance(_current_peptide_sp);
+    return true;
+}
 
 bool XpipSaxHandler::endElement_sequence() {
     if ((_tag_stack.size() > 1) && (_tag_stack[_tag_stack.size() - 1] == "protein")) {
@@ -175,3 +227,54 @@ bool XpipSaxHandler::characters(const QString &str) {
     return true;
 }
 
+
+pappso::AaModificationP XpipSaxHandler::getAaModificationP(pappso::mz mass) const {
+    pappso::PrecisionP precision = pappso::Precision::getDaltonInstance(0.01);
+
+    pappso::AaModificationP oxidation = pappso::AaModification::getInstance("MOD:00719");
+    if (pappso::MassRange(oxidation->getMass(),precision).contains(mass)) {
+        return oxidation;
+    }
+    pappso::AaModificationP iodoacetamide = pappso::AaModification::getInstance("MOD:00397");
+    if (pappso::MassRange(iodoacetamide->getMass(),precision).contains(mass)) {
+        return iodoacetamide;
+    }
+    pappso::AaModificationP acetylated = pappso::AaModification::getInstance("MOD:00408");
+    if (pappso::MassRange(acetylated->getMass(),precision).contains(mass)) {
+        return acetylated;
+    }
+    pappso::AaModificationP phosphorylated = pappso::AaModification::getInstance("MOD:00696");
+    if (pappso::MassRange(phosphorylated->getMass(),precision).contains(mass)) {
+        return phosphorylated;
+    }
+    pappso::AaModificationP ammonia = pappso::AaModification::getInstance("MOD:01160");
+    if (pappso::MassRange(ammonia->getMass(),precision).contains(mass)) {
+        return ammonia;
+    }
+    pappso::AaModificationP dehydrated = pappso::AaModification::getInstance("MOD:00704");
+    if (pappso::MassRange(dehydrated->getMass(),precision).contains(mass)) {
+        return dehydrated;
+    }
+    pappso::AaModificationP dimethylated = pappso::AaModification::getInstance("MOD:00429");
+    if (pappso::MassRange(dimethylated->getMass(),precision).contains(mass)) {
+        return dimethylated;
+    }
+
+    pappso::AaModificationP dimethylated_medium = pappso::AaModification::getInstance("MOD:00552");
+    if (pappso::MassRange(dimethylated_medium->getMass(),precision).contains(mass)) {
+        return dimethylated_medium;
+    }
+
+    pappso::AaModificationP dimethylated_heavy = pappso::AaModification::getInstance("MOD:00638");
+    if (pappso::MassRange(dimethylated_heavy->getMass(),precision).contains(mass)) {
+        return dimethylated_heavy;
+    }
+    pappso::AaModificationP DimethylpyrroleAdduct = pappso::AaModification::getInstance("MOD:00628");
+    if (pappso::MassRange(DimethylpyrroleAdduct->getMass(),precision).contains(mass)) {
+        return DimethylpyrroleAdduct;
+    }
+
+
+    throw pappso::ExceptionNotFound(QObject::tr("XpipSaxHandler::getAaModificationP => modification not found for mass %1").arg(mass));
+}
+
diff --git a/src/input/xpipsaxhandler.h b/src/input/xpipsaxhandler.h
index e8b2dc45..5eca4cad 100644
--- a/src/input/xpipsaxhandler.h
+++ b/src/input/xpipsaxhandler.h
@@ -26,9 +26,10 @@
 #include <QXmlDefaultHandler>
 #include <pappsomspp/pappsoexception.h>
 #include <pappsomspp/protein/protein.h>
+#include <pappsomspp/peptide/peptide.h>
+#include <pappsomspp/amino_acid/aamodification.h>
 #include "../core/project.h"
-
-using namespace pappso;
+#include "../utils/peptidestore.h"
 
 class XpipSaxHandler: public QXmlDefaultHandler
 {
@@ -53,20 +54,29 @@ public:
 
     QString errorString() const;
 
-    
+
 private:
-  bool startElement_protein(QXmlAttributes attributes);
-  bool startElement_sample(QXmlAttributes attributes);
-  bool endElement_sequence();
-  bool endElement_protein();
+    bool startElement_peptide(QXmlAttributes attributes);
+    bool startElement_protein(QXmlAttributes attributes);
+    bool startElement_sample(QXmlAttributes attributes);
+    bool startElement_modifs_mass(QXmlAttributes attributes);
+    bool endElement_sequence();
+    bool endElement_protein();
+    bool endElement_peptide();
+    
+    pappso::AaModificationP getAaModificationP(pappso::mz mass) const;
 
 private:
     std::vector<QString> _tag_stack;
     QString _errorStr;
     QString _current_text;
-    
+
     Project * _p_project;
-    Protein _current_protein;
+    pappso::Protein _current_protein;
+    pappso::PeptideSp _current_peptide_sp;
+    
+    PeptideStore peptide_store;
+    QMap<QString, pappso::AaModificationP> _map_massstr_aamod;
 };
 
 #endif // XTANDEMRESULTSHANDLER_H
diff --git a/src/utils/peptidestore.cpp b/src/utils/peptidestore.cpp
new file mode 100644
index 00000000..5da496c7
--- /dev/null
+++ b/src/utils/peptidestore.cpp
@@ -0,0 +1,52 @@
+/**
+ * \file utils/peptidestore.h
+ * \date 7/10/2016
+ * \author Olivier Langella
+ * \brief store unique version of peptides
+ */
+
+/*******************************************************************************
+ * Copyright (c) 2016 Olivier Langella <Olivier.Langella@moulon.inra.fr>.
+ *
+ * This file is part of peptider.
+ *
+ *     peptider is free software: you can redistribute it and/or modify
+ *     it under the terms of the GNU General Public License as published by
+ *     the Free Software Foundation, either version 3 of the License, or
+ *     (at your option) any later version.
+ *
+ *     peptider is distributed in the hope that it will be useful,
+ *     but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *     GNU General Public License for more details.
+ *
+ *     You should have received a copy of the GNU General Public License
+ *     along with peptider.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Contributors:
+ *     Olivier Langella <Olivier.Langella@moulon.inra.fr> - initial API and implementation
+ ******************************************************************************/
+
+#include "peptidestore.h"
+
+PeptideStore::PeptideStore()
+{
+
+}
+
+PeptideStore::~PeptideStore()
+{
+
+}
+
+pappso::PeptideSp & PeptideStore::getInstance(pappso::PeptideSp & peptide_in) {
+    std::size_t sequence_li_crc = _hash_fn ( peptide_in.get()->getLiAbsoluteString().toStdString());
+
+    //QByteArray source = peptide_in.get()->getLiAbsoluteString().toUtf8();
+    //quint16 sequence_li_crc = qChecksum(source.data(), source.length());
+
+    std::pair<std::unordered_map< std::size_t, pappso::PeptideSp>::iterator,bool> ret = _map_crc_peptide_list.insert(std::pair<std::size_t, pappso::PeptideSp>(sequence_li_crc,peptide_in));
+    
+    return ret.first->second;
+
+}
diff --git a/src/utils/peptidestore.h b/src/utils/peptidestore.h
new file mode 100644
index 00000000..d6f88ea1
--- /dev/null
+++ b/src/utils/peptidestore.h
@@ -0,0 +1,50 @@
+/**
+ * \file utils/peptidestore.h
+ * \date 7/10/2016
+ * \author Olivier Langella
+ * \brief store unique version of peptides
+ */
+
+/*******************************************************************************
+ * Copyright (c) 2016 Olivier Langella <Olivier.Langella@moulon.inra.fr>.
+ *
+ * This file is part of peptider.
+ *
+ *     peptider is free software: you can redistribute it and/or modify
+ *     it under the terms of the GNU General Public License as published by
+ *     the Free Software Foundation, either version 3 of the License, or
+ *     (at your option) any later version.
+ *
+ *     peptider is distributed in the hope that it will be useful,
+ *     but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *     GNU General Public License for more details.
+ *
+ *     You should have received a copy of the GNU General Public License
+ *     along with peptider.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Contributors:
+ *     Olivier Langella <Olivier.Langella@moulon.inra.fr> - initial API and implementation
+ ******************************************************************************/
+
+#ifndef PEPTIDESTORE_H
+#define PEPTIDESTORE_H
+
+#include <pappsomspp/peptide/peptide.h>
+#include <unordered_map>
+
+class PeptideStore
+{
+public:
+    PeptideStore();
+    ~PeptideStore();
+
+    pappso::PeptideSp & getInstance(pappso::PeptideSp & peptide_in);
+
+private :
+  
+    std::hash<std::string> _hash_fn;
+    std::unordered_map<std::size_t, pappso::PeptideSp> _map_crc_peptide_list;
+};
+
+#endif // PEPTIDESTORE_H
-- 
GitLab