From a337dff2964199158ddec6cfb8511f5910c726f2 Mon Sep 17 00:00:00 2001 From: Olivier Langella <olivier.langella@u-psud.fr> Date: Wed, 22 Nov 2017 22:27:49 +0100 Subject: [PATCH] WIP: new xpip parser --- .../identificationdatasource.cpp | 3 + .../identificationdatasource.h | 4 ++ src/input/xtpxpipsaxhandler.cpp | 68 +++++++++++++++++++ src/input/xtpxpipsaxhandler.h | 9 ++- src/utils/identificationdatasourcestore.cpp | 40 +++++++++-- src/utils/identificationdatasourcestore.h | 1 + 6 files changed, 120 insertions(+), 5 deletions(-) diff --git a/src/core/identification_sources/identificationdatasource.cpp b/src/core/identification_sources/identificationdatasource.cpp index 3736af228..7899da8e9 100644 --- a/src/core/identification_sources/identificationdatasource.cpp +++ b/src/core/identification_sources/identificationdatasource.cpp @@ -75,6 +75,9 @@ void IdentificationDataSource::setMsRunSp (MsRunSp ms_run_sp) { MsRunSp IdentificationDataSource::getMsRunSp () const { return (_ms_run_sp); } +void IdentificationDataSource::setIdentificationEngine(IdentificationEngine engine) { + _engine = engine; +} IdentificationEngine IdentificationDataSource::getIdentificationEngine() const { return _engine; diff --git a/src/core/identification_sources/identificationdatasource.h b/src/core/identification_sources/identificationdatasource.h index 30ca866e0..01a8d2202 100644 --- a/src/core/identification_sources/identificationdatasource.h +++ b/src/core/identification_sources/identificationdatasource.h @@ -70,6 +70,10 @@ public: */ virtual void parseTo(Project* p_project)=0; + /** \brief set identification engine + */ + virtual void setIdentificationEngine(IdentificationEngine engine); + /** \brief identification engine */ virtual IdentificationEngine getIdentificationEngine() const; diff --git a/src/input/xtpxpipsaxhandler.cpp b/src/input/xtpxpipsaxhandler.cpp index b45ceb18d..90520333e 100644 --- a/src/input/xtpxpipsaxhandler.cpp +++ b/src/input/xtpxpipsaxhandler.cpp @@ -32,6 +32,7 @@ #include <pappsomspp/exception/exceptionnotfound.h> #include <cmath> +#include "../core/peptideevidence.h" #include "../utils/peptidestore.h" #include "../utils/proteinstore.h" @@ -67,14 +68,20 @@ bool XtpXpipSaxHandler::startElement(const QString & namespaceURI, const QString } } else if (qName == "protein_match") { is_ok = startElement_protein_match(attributes); + } else if (qName == "peptide_match") { + is_ok = startElement_peptide_match(attributes); } else if (qName == "protein") { is_ok = startElement_protein(attributes); + } else if (qName == "identification_source") { + is_ok = startElement_identification_source(attributes); } //<sample value="P6_08_10"/> else if (qName == "sample") { is_ok = startElement_msrun(attributes); } else if (qName == "peptide") { is_ok = startElement_peptide(attributes); + } else if (qName == "peptide_evidence") { + is_ok = startElement_peptide_evidence(attributes); } else if (qName == "modification") { is_ok = startElement_modification(attributes); } else if (qName == "mod") { @@ -187,6 +194,22 @@ bool XtpXpipSaxHandler::startElement_modification(QXmlAttributes attributes) { return true; } +bool XtpXpipSaxHandler::startElement_identification_source(QXmlAttributes attributes) { + //<identification_source id="identa0" msrun_id="sampa0" path="/gorgone/pappso/formation/Janvier2014/TD/xml_tandem/20120906_balliau_extract_1_A01_urnb-1.xml" engine="1" version=""/> + + qDebug() << "startElement_identification_source "; + + IdentificationEngine engine = static_cast<IdentificationEngine>(attributes.value("engine").toUInt()); + IdentificationDataSourceSp sp_ident_source = _p_project->getIdentificationDataSourceStore().getInstance(attributes.value("path").simplified(), engine); + sp_ident_source.get()->setXmlId(attributes.value("id").simplified()); + _map_ident_sources.insert(std::pair<QString, IdentificationDataSourceSp>(sp_ident_source.get()->getXmlId(), sp_ident_source)); + + sp_ident_source->setMsRunSp(_map_msruns.at(attributes.value("msrun_id").simplified())); + sp_ident_source->setIdentificationEngineVersion(attributes.value("version")); + //_current_identification_group_p->addMsRunSp(ms_run); + qDebug() << "startElement_identification_source end" ; + return true; +} bool XtpXpipSaxHandler::startElement_msrun(QXmlAttributes attributes) { //<msrun id="sampa0" name="20120906_balliau_extract_1_A01_urnb-1" format="0" path="20120906_balliau_extract_1_A01_urnb-1"/> @@ -196,11 +219,56 @@ bool XtpXpipSaxHandler::startElement_msrun(QXmlAttributes attributes) { ms_run.get()->setFilename(attributes.value("path").simplified()); ms_run.get()->setSampleName(attributes.value("name").simplified()); ms_run.get()->setXmlId(attributes.value("id").simplified()); + _map_msruns.insert(std::pair<QString, MsRunSp>(ms_run.get()->getXmlId(), ms_run)); //_current_identification_group_p->addMsRunSp(ms_run); qDebug() << "startElement_msrun end" ; return true; } +bool XtpXpipSaxHandler::startElement_peptide_evidence(QXmlAttributes attributes) { + + qDebug() << "startElement_peptide_evidence "; + /* + * <peptide_evidence id="peb76" source_id="identa0" peptide_id="pd7688" scan="11743" rt="2170" evalue="8.1e-10" exp_mass="1655.755648" charge="2" checked="true"> + <param key="0" value="48.5"/> + </peptide_evidence> + + */ + IdentificationDataSourceSp sp_ident_source = _map_ident_sources.at(attributes.value("source_id").simplified()); + unsigned int scan = attributes.value("scan").toUInt(); + _p_peptide_evidence = new PeptideEvidence(sp_ident_source.get()->getMsRunSp().get(), scan); + _p_peptide_evidence->setIdentificationDataSource(sp_ident_source.get()); + + _p_peptide_evidence->setChecked(false); + if (attributes.value("checked").simplified().toLower() == "true") { + _p_peptide_evidence->setChecked(true); + } + _p_peptide_evidence->setCharge(attributes.value("charge").toUInt()); + _p_peptide_evidence->setRetentionTime(attributes.value("rt").toDouble()); + _p_peptide_evidence->setEvalue(attributes.value("evalue").toDouble()); + _p_peptide_evidence->setExperimentalMass(attributes.value("exp_mass").toDouble()); + _p_peptide_evidence->setPeptideXtpSp(_map_peptides.at( attributes.value("peptide_id").simplified())); + + PeptideEvidenceSp sp_peptide_evidence = sp_ident_source.get()->getPeptideEvidenceStore().getInstance(_p_peptide_evidence); + _map_peptide_evidences.insert(std::pair<QString, PeptideEvidenceSp>(attributes.value("id").simplified(), sp_peptide_evidence)); + qDebug() << "startElement_peptide_evidence end" ; + return true; +} + +bool XtpXpipSaxHandler::startElement_peptide_match(QXmlAttributes attributes) { + + qDebug() << "startElement_peptide_match "; + /* + * <protein_match acc="GRMZM2G138258_P01" checked="true"> + <peptide_match source_id="identa0" peptide_id="pd3533" scan="1907" rt="646.406" evalue="0.00044" exp_mass="2190.816924" start="103" charge="3" checked="true"> + <param key="0" value="23.6"/> + </peptide_match> + */ + _p_protein_match = new ProteinMatch(); + qDebug() << "startElement_peptide_match end" ; + return true; +} + bool XtpXpipSaxHandler::startElement_protein_match(QXmlAttributes attributes) { qDebug() << "startElement_protein_match "; diff --git a/src/input/xtpxpipsaxhandler.h b/src/input/xtpxpipsaxhandler.h index f8111f430..7aff217af 100644 --- a/src/input/xtpxpipsaxhandler.h +++ b/src/input/xtpxpipsaxhandler.h @@ -70,9 +70,12 @@ private: bool startElement_description(QXmlAttributes attributes); bool startElement_fasta_file(QXmlAttributes attributes); bool startElement_protein_match(QXmlAttributes attributes); + bool startElement_peptide_match(QXmlAttributes attributes); bool startElement_peptide(QXmlAttributes attributes); + bool startElement_peptide_evidence(QXmlAttributes attributes); bool startElement_protein(QXmlAttributes attributes); bool startElement_msrun(QXmlAttributes attributes); + bool startElement_identification_source(QXmlAttributes attributes); bool startElement_modification(QXmlAttributes attributes); bool startElement_mod(QXmlAttributes attributes); //bool endElement_identification(); @@ -90,12 +93,16 @@ private: Project * _p_project; ProteinMatch * _p_protein_match; - PeptideMatch * _p_peptide_match; + PeptideMatch _current_peptide_match; + PeptideEvidence * _p_peptide_evidence; IdentificationGroup * _current_identification_group_p; std::map<QString, pappso::AaModificationP> _map_modifs; std::map<QString, FastaFileSp> _map_fasta_files; std::map<QString, ProteinXtpSp> _map_proteins; + std::map<QString, MsRunSp> _map_msruns; + std::map<QString, IdentificationDataSourceSp> _map_ident_sources; + std::map<QString, PeptideEvidenceSp> _map_peptide_evidences; ProteinXtp _current_protein; PeptideXtpSp _current_peptide_sp; std::map<QString, PeptideXtpSp> _map_peptides; diff --git a/src/utils/identificationdatasourcestore.cpp b/src/utils/identificationdatasourcestore.cpp index c01d92ac1..46059e90c 100644 --- a/src/utils/identificationdatasourcestore.cpp +++ b/src/utils/identificationdatasourcestore.cpp @@ -44,6 +44,39 @@ IdentificationDataSourceStore::~IdentificationDataSourceStore() } +IdentificationDataSourceSp IdentificationDataSourceStore::getInstance(const QString & location, IdentificationEngine engine) { + qDebug() << "IdentificationDataSourceStore::getInstance begin " << location; + qDebug() << " " << _map_identification_data_sources.size(); + std::map< QString, IdentificationDataSourceSp >::iterator it = _map_identification_data_sources.find(location); + if (it != _map_identification_data_sources.end()) { + return it->second; + } + else { + QFileInfo location_file(location); + QString ext = location_file.suffix(); + //QString sample_name = location_file.baseName(); + IdentificationDataSourceSp p_identfile = nullptr; + if (ext.toLower() == "xml") { + //X!Tandem result file + p_identfile = std::make_shared<IdentificationXtandemFile>(location_file); + } else if (ext.toLower() == "pep") { + //pep xml file + p_identfile = std::make_shared<IdentificationPwizFile>(location_file); + } else { + p_identfile = std::make_shared<IdentificationPwizFile>(location_file); + } + if (p_identfile == nullptr) { + throw pappso::PappsoException(QObject::tr("Identification resource %1 not recognized (null pointer)").arg(location)); + } + p_identfile.get()->setXmlId(QString("ident%1").arg(pappso::Utils::getLexicalOrderedString(_map_identification_data_sources.size()))); + p_identfile.get()->setIdentificationEngine(engine); + _map_identification_data_sources.insert(std::pair< QString, IdentificationDataSourceSp >(location, p_identfile)); + _map_identification_data_sources.insert(std::pair< QString, IdentificationDataSourceSp >(location_file.absoluteFilePath(), p_identfile)); + return p_identfile; + } + throw pappso::PappsoException(QObject::tr("Identification resource %1 not recognized").arg(location)); +} + IdentificationDataSourceSp IdentificationDataSourceStore::getInstance(const QString & location) { qDebug() << "IdentificationDataSourceStore::getInstance begin " << location; qDebug() << " " << _map_identification_data_sources.size(); @@ -59,18 +92,17 @@ IdentificationDataSourceSp IdentificationDataSourceStore::getInstance(const QStr if (ext.toLower() == "xml") { //X!Tandem result file p_identfile = std::make_shared<IdentificationXtandemFile>(location_file); - } else - if (ext.toLower() == "pep") { + } else if (ext.toLower() == "pep") { //pep xml file p_identfile = std::make_shared<IdentificationPwizFile>(location_file); - }else { + } else { p_identfile = std::make_shared<IdentificationPwizFile>(location_file); } if (p_identfile == nullptr) { throw pappso::PappsoException(QObject::tr("Identification resource %1 not recognized (null pointer)").arg(location)); } p_identfile.get()->setXmlId(QString("ident%1").arg(pappso::Utils::getLexicalOrderedString(_map_identification_data_sources.size()))); - + _map_identification_data_sources.insert(std::pair< QString, IdentificationDataSourceSp >(location, p_identfile)); _map_identification_data_sources.insert(std::pair< QString, IdentificationDataSourceSp >(location_file.absoluteFilePath(), p_identfile)); return p_identfile; diff --git a/src/utils/identificationdatasourcestore.h b/src/utils/identificationdatasourcestore.h index a2e204f34..a84b06a83 100644 --- a/src/utils/identificationdatasourcestore.h +++ b/src/utils/identificationdatasourcestore.h @@ -42,6 +42,7 @@ public: IdentificationDataSourceStore(); ~IdentificationDataSourceStore(); IdentificationDataSourceSp getInstance(const QString & location); + IdentificationDataSourceSp getInstance(const QString & location, IdentificationEngine engine); std::vector<IdentificationDataSourceSp> getIdentificationDataSourceList() const; private : -- GitLab