From a337dff2964199158ddec6cfb8511f5910c726f2 Mon Sep 17 00:00:00 2001
From: Olivier Langella <olivier.langella@u-psud.fr>
Date: Wed, 22 Nov 2017 22:27:49 +0100
Subject: [PATCH] WIP: new xpip parser

---
 .../identificationdatasource.cpp              |  3 +
 .../identificationdatasource.h                |  4 ++
 src/input/xtpxpipsaxhandler.cpp               | 68 +++++++++++++++++++
 src/input/xtpxpipsaxhandler.h                 |  9 ++-
 src/utils/identificationdatasourcestore.cpp   | 40 +++++++++--
 src/utils/identificationdatasourcestore.h     |  1 +
 6 files changed, 120 insertions(+), 5 deletions(-)

diff --git a/src/core/identification_sources/identificationdatasource.cpp b/src/core/identification_sources/identificationdatasource.cpp
index 3736af228..7899da8e9 100644
--- a/src/core/identification_sources/identificationdatasource.cpp
+++ b/src/core/identification_sources/identificationdatasource.cpp
@@ -75,6 +75,9 @@ void IdentificationDataSource::setMsRunSp (MsRunSp ms_run_sp) {
 MsRunSp IdentificationDataSource::getMsRunSp () const {
     return (_ms_run_sp);
 }
+void IdentificationDataSource::setIdentificationEngine(IdentificationEngine engine) {
+    _engine = engine;
+}
 
 IdentificationEngine IdentificationDataSource::getIdentificationEngine() const {
     return _engine;
diff --git a/src/core/identification_sources/identificationdatasource.h b/src/core/identification_sources/identificationdatasource.h
index 30ca866e0..01a8d2202 100644
--- a/src/core/identification_sources/identificationdatasource.h
+++ b/src/core/identification_sources/identificationdatasource.h
@@ -70,6 +70,10 @@ public:
      */
     virtual void parseTo(Project* p_project)=0;
     
+    /** \brief set identification engine
+     */
+    virtual void setIdentificationEngine(IdentificationEngine engine);
+
     /** \brief identification engine
      */
     virtual IdentificationEngine getIdentificationEngine() const;
diff --git a/src/input/xtpxpipsaxhandler.cpp b/src/input/xtpxpipsaxhandler.cpp
index b45ceb18d..90520333e 100644
--- a/src/input/xtpxpipsaxhandler.cpp
+++ b/src/input/xtpxpipsaxhandler.cpp
@@ -32,6 +32,7 @@
 
 #include <pappsomspp/exception/exceptionnotfound.h>
 #include <cmath>
+#include "../core/peptideevidence.h"
 #include "../utils/peptidestore.h"
 #include "../utils/proteinstore.h"
 
@@ -67,14 +68,20 @@ bool XtpXpipSaxHandler::startElement(const QString & namespaceURI, const QString
             }
         } else if (qName == "protein_match") {
             is_ok = startElement_protein_match(attributes);
+        } else if (qName == "peptide_match") {
+            is_ok = startElement_peptide_match(attributes);
         } else if (qName == "protein") {
             is_ok = startElement_protein(attributes);
+        } else if (qName == "identification_source") {
+            is_ok = startElement_identification_source(attributes);
         }
         //<sample value="P6_08_10"/>
         else if (qName == "sample") {
             is_ok = startElement_msrun(attributes);
         } else if (qName == "peptide") {
             is_ok = startElement_peptide(attributes);
+        } else if (qName == "peptide_evidence") {
+            is_ok = startElement_peptide_evidence(attributes);
         } else if (qName == "modification") {
             is_ok = startElement_modification(attributes);
         } else if (qName == "mod") {
@@ -187,6 +194,22 @@ bool XtpXpipSaxHandler::startElement_modification(QXmlAttributes attributes) {
     return true;
 }
 
+bool XtpXpipSaxHandler::startElement_identification_source(QXmlAttributes attributes) {
+    //<identification_source id="identa0" msrun_id="sampa0" path="/gorgone/pappso/formation/Janvier2014/TD/xml_tandem/20120906_balliau_extract_1_A01_urnb-1.xml" engine="1" version=""/>
+
+    qDebug() << "startElement_identification_source ";
+
+    IdentificationEngine engine = static_cast<IdentificationEngine>(attributes.value("engine").toUInt());
+    IdentificationDataSourceSp sp_ident_source =  _p_project->getIdentificationDataSourceStore().getInstance(attributes.value("path").simplified(), engine);
+    sp_ident_source.get()->setXmlId(attributes.value("id").simplified());
+    _map_ident_sources.insert(std::pair<QString, IdentificationDataSourceSp>(sp_ident_source.get()->getXmlId(), sp_ident_source));
+    
+    sp_ident_source->setMsRunSp(_map_msruns.at(attributes.value("msrun_id").simplified()));
+    sp_ident_source->setIdentificationEngineVersion(attributes.value("version"));
+    //_current_identification_group_p->addMsRunSp(ms_run);
+    qDebug() << "startElement_identification_source end" ;
+    return true;
+}
 bool XtpXpipSaxHandler::startElement_msrun(QXmlAttributes attributes) {
 //<msrun id="sampa0" name="20120906_balliau_extract_1_A01_urnb-1" format="0" path="20120906_balliau_extract_1_A01_urnb-1"/>
 
@@ -196,11 +219,56 @@ bool XtpXpipSaxHandler::startElement_msrun(QXmlAttributes attributes) {
     ms_run.get()->setFilename(attributes.value("path").simplified());
     ms_run.get()->setSampleName(attributes.value("name").simplified());
     ms_run.get()->setXmlId(attributes.value("id").simplified());
+    _map_msruns.insert(std::pair<QString, MsRunSp>(ms_run.get()->getXmlId(), ms_run));
     //_current_identification_group_p->addMsRunSp(ms_run);
     qDebug() << "startElement_msrun end" ;
     return true;
 }
 
+bool XtpXpipSaxHandler::startElement_peptide_evidence(QXmlAttributes attributes) {
+
+    qDebug() << "startElement_peptide_evidence ";
+    /*
+     * <peptide_evidence id="peb76" source_id="identa0" peptide_id="pd7688" scan="11743" rt="2170" evalue="8.1e-10" exp_mass="1655.755648" charge="2" checked="true">
+                    <param key="0" value="48.5"/>
+                </peptide_evidence>
+
+              */
+    IdentificationDataSourceSp sp_ident_source = _map_ident_sources.at(attributes.value("source_id").simplified());
+    unsigned int scan = attributes.value("scan").toUInt();
+    _p_peptide_evidence = new PeptideEvidence(sp_ident_source.get()->getMsRunSp().get(), scan);
+    _p_peptide_evidence->setIdentificationDataSource(sp_ident_source.get());
+    
+    _p_peptide_evidence->setChecked(false);
+    if (attributes.value("checked").simplified().toLower() == "true") {
+        _p_peptide_evidence->setChecked(true);
+    }
+    _p_peptide_evidence->setCharge(attributes.value("charge").toUInt());
+    _p_peptide_evidence->setRetentionTime(attributes.value("rt").toDouble());
+    _p_peptide_evidence->setEvalue(attributes.value("evalue").toDouble());
+    _p_peptide_evidence->setExperimentalMass(attributes.value("exp_mass").toDouble());
+    _p_peptide_evidence->setPeptideXtpSp(_map_peptides.at( attributes.value("peptide_id").simplified()));
+    
+    PeptideEvidenceSp sp_peptide_evidence = sp_ident_source.get()->getPeptideEvidenceStore().getInstance(_p_peptide_evidence);
+    _map_peptide_evidences.insert(std::pair<QString, PeptideEvidenceSp>(attributes.value("id").simplified(), sp_peptide_evidence));
+    qDebug() << "startElement_peptide_evidence end" ;
+    return true;
+}
+
+bool XtpXpipSaxHandler::startElement_peptide_match(QXmlAttributes attributes) {
+
+    qDebug() << "startElement_peptide_match ";
+    /*
+     * <protein_match acc="GRMZM2G138258_P01" checked="true">
+                <peptide_match source_id="identa0" peptide_id="pd3533" scan="1907" rt="646.406" evalue="0.00044" exp_mass="2190.816924" start="103" charge="3" checked="true">
+                    <param key="0" value="23.6"/>
+                </peptide_match>
+              */
+    _p_protein_match = new ProteinMatch();
+    qDebug() << "startElement_peptide_match end" ;
+    return true;
+}
+
 bool XtpXpipSaxHandler::startElement_protein_match(QXmlAttributes attributes) {
 
     qDebug() << "startElement_protein_match ";
diff --git a/src/input/xtpxpipsaxhandler.h b/src/input/xtpxpipsaxhandler.h
index f8111f430..7aff217af 100644
--- a/src/input/xtpxpipsaxhandler.h
+++ b/src/input/xtpxpipsaxhandler.h
@@ -70,9 +70,12 @@ private:
     bool startElement_description(QXmlAttributes attributes);
     bool startElement_fasta_file(QXmlAttributes attributes);
     bool startElement_protein_match(QXmlAttributes attributes);
+    bool startElement_peptide_match(QXmlAttributes attributes);
     bool startElement_peptide(QXmlAttributes attributes);
+    bool startElement_peptide_evidence(QXmlAttributes attributes);
     bool startElement_protein(QXmlAttributes attributes);
     bool startElement_msrun(QXmlAttributes attributes);
+    bool startElement_identification_source(QXmlAttributes attributes);
     bool startElement_modification(QXmlAttributes attributes);
     bool startElement_mod(QXmlAttributes attributes);
     //bool endElement_identification();
@@ -90,12 +93,16 @@ private:
 
     Project * _p_project;
     ProteinMatch * _p_protein_match;
-    PeptideMatch * _p_peptide_match;
+    PeptideMatch _current_peptide_match;
+    PeptideEvidence * _p_peptide_evidence;
     IdentificationGroup * _current_identification_group_p;
     
     std::map<QString, pappso::AaModificationP> _map_modifs;
     std::map<QString, FastaFileSp> _map_fasta_files;
     std::map<QString, ProteinXtpSp> _map_proteins;
+    std::map<QString, MsRunSp> _map_msruns;
+    std::map<QString, IdentificationDataSourceSp> _map_ident_sources;
+    std::map<QString, PeptideEvidenceSp> _map_peptide_evidences;
     ProteinXtp _current_protein;
     PeptideXtpSp _current_peptide_sp;
     std::map<QString, PeptideXtpSp> _map_peptides;
diff --git a/src/utils/identificationdatasourcestore.cpp b/src/utils/identificationdatasourcestore.cpp
index c01d92ac1..46059e90c 100644
--- a/src/utils/identificationdatasourcestore.cpp
+++ b/src/utils/identificationdatasourcestore.cpp
@@ -44,6 +44,39 @@ IdentificationDataSourceStore::~IdentificationDataSourceStore()
 
 }
 
+IdentificationDataSourceSp IdentificationDataSourceStore::getInstance(const QString & location, IdentificationEngine engine) {
+    qDebug() << "IdentificationDataSourceStore::getInstance begin " << location;
+    qDebug() << " " << _map_identification_data_sources.size();
+    std::map< QString, IdentificationDataSourceSp >::iterator it = _map_identification_data_sources.find(location);
+    if (it != _map_identification_data_sources.end()) {
+        return it->second;
+    }
+    else {
+        QFileInfo location_file(location);
+        QString ext = location_file.suffix();
+        //QString sample_name = location_file.baseName();
+        IdentificationDataSourceSp p_identfile = nullptr;
+        if (ext.toLower() == "xml") {
+            //X!Tandem result file
+            p_identfile = std::make_shared<IdentificationXtandemFile>(location_file);
+        } else if (ext.toLower() == "pep") {
+            //pep xml file
+            p_identfile = std::make_shared<IdentificationPwizFile>(location_file);
+        } else {
+            p_identfile = std::make_shared<IdentificationPwizFile>(location_file);
+        }
+        if (p_identfile == nullptr) {
+            throw pappso::PappsoException(QObject::tr("Identification resource %1 not recognized (null pointer)").arg(location));
+        }
+        p_identfile.get()->setXmlId(QString("ident%1").arg(pappso::Utils::getLexicalOrderedString(_map_identification_data_sources.size())));
+        p_identfile.get()->setIdentificationEngine(engine);
+        _map_identification_data_sources.insert(std::pair< QString, IdentificationDataSourceSp >(location, p_identfile));
+        _map_identification_data_sources.insert(std::pair< QString, IdentificationDataSourceSp >(location_file.absoluteFilePath(), p_identfile));
+        return p_identfile;
+    }
+    throw pappso::PappsoException(QObject::tr("Identification resource %1 not recognized").arg(location));
+}
+
 IdentificationDataSourceSp IdentificationDataSourceStore::getInstance(const QString & location) {
     qDebug() << "IdentificationDataSourceStore::getInstance begin " << location;
     qDebug() << " " << _map_identification_data_sources.size();
@@ -59,18 +92,17 @@ IdentificationDataSourceSp IdentificationDataSourceStore::getInstance(const QStr
         if (ext.toLower() == "xml") {
             //X!Tandem result file
             p_identfile = std::make_shared<IdentificationXtandemFile>(location_file);
-        } else 
-        if (ext.toLower() == "pep") {
+        } else if (ext.toLower() == "pep") {
             //pep xml file
             p_identfile = std::make_shared<IdentificationPwizFile>(location_file);
-        }else {
+        } else {
             p_identfile = std::make_shared<IdentificationPwizFile>(location_file);
         }
         if (p_identfile == nullptr) {
             throw pappso::PappsoException(QObject::tr("Identification resource %1 not recognized (null pointer)").arg(location));
         }
         p_identfile.get()->setXmlId(QString("ident%1").arg(pappso::Utils::getLexicalOrderedString(_map_identification_data_sources.size())));
-        
+
         _map_identification_data_sources.insert(std::pair< QString, IdentificationDataSourceSp >(location, p_identfile));
         _map_identification_data_sources.insert(std::pair< QString, IdentificationDataSourceSp >(location_file.absoluteFilePath(), p_identfile));
         return p_identfile;
diff --git a/src/utils/identificationdatasourcestore.h b/src/utils/identificationdatasourcestore.h
index a2e204f34..a84b06a83 100644
--- a/src/utils/identificationdatasourcestore.h
+++ b/src/utils/identificationdatasourcestore.h
@@ -42,6 +42,7 @@ public:
     IdentificationDataSourceStore();
     ~IdentificationDataSourceStore();
     IdentificationDataSourceSp getInstance(const QString & location);
+    IdentificationDataSourceSp getInstance(const QString & location, IdentificationEngine engine);
     
     std::vector<IdentificationDataSourceSp> getIdentificationDataSourceList() const;
 private :
-- 
GitLab