From 53a96c0a25d3172fd3bbb8933484584617d76a50 Mon Sep 17 00:00:00 2001 From: Olivier Langella <olivier.langella@u-psud.fr> Date: Tue, 19 Jun 2018 13:58:39 +0200 Subject: [PATCH] WIP: parsing pep xml files --- src/CMakeLists.txt | 2 + .../identificationpepxmlfile.cpp | 157 ++- src/core/peptideevidence.cpp | 385 +++--- src/core/peptideevidence.h | 10 + src/input/pepxmlsaxhandler.cpp | 521 ++++---- src/input/pepxmlsaxhandler.h | 70 +- src/input/xtandemsaxhandler.cpp | 1117 ++++++++++------- src/utils/identificationdatasourcestore.cpp | 8 +- src/utils/types.h | 152 ++- 9 files changed, 1403 insertions(+), 1019 deletions(-) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index a11e04512..840bbd57b 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -92,6 +92,7 @@ SET(CPP_FILES core/identificationgroup.cpp core/identification_sources/identificationdatasource.cpp core/identification_sources/identificationmascotdatfile.cpp + core/identification_sources/identificationpepxmlfile.cpp core/identification_sources/identificationpwizfile.cpp core/identification_sources/identificationxtandemfile.cpp core/labeling/label.cpp @@ -122,6 +123,7 @@ SET(CPP_FILES input/mascot/mimeparser.cpp input/condorqxmlsaxhandler.cpp input/identificationpwizreader.cpp + input/pepxmlsaxhandler.cpp input/xpipsaxhandler.cpp input/xtandemparamsaxhandler.cpp input/xtandemsaxhandler.cpp diff --git a/src/core/identification_sources/identificationpepxmlfile.cpp b/src/core/identification_sources/identificationpepxmlfile.cpp index cbf39e195..464a691b8 100644 --- a/src/core/identification_sources/identificationpepxmlfile.cpp +++ b/src/core/identification_sources/identificationpepxmlfile.cpp @@ -6,92 +6,137 @@ */ /******************************************************************************* -* Copyright (c) 2018 Olivier Langella <olivier.langella@u-psud.fr>. -* -* This file is part of XTPcpp. -* -* XTPcpp is free software: you can redistribute it and/or modify -* it under the terms of the GNU General Public License as published by -* the Free Software Foundation, either version 3 of the License, or -* (at your option) any later version. -* -* XTPcpp is distributed in the hope that it will be useful, -* but WITHOUT ANY WARRANTY; without even the implied warranty of -* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -* GNU General Public License for more details. -* -* You should have received a copy of the GNU General Public License -* along with XTPcpp. If not, see <http://www.gnu.org/licenses/>. -* -******************************************************************************/ + * Copyright (c) 2018 Olivier Langella <olivier.langella@u-psud.fr>. + * + * This file is part of XTPcpp. + * + * XTPcpp is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * XTPcpp is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with XTPcpp. If not, see <http://www.gnu.org/licenses/>. + * + ******************************************************************************/ #include "identificationpepxmlfile.h" #include <pappsomspp/pappsoexception.h> #include "../project.h" -#include "../../input/mascot/mascotdatparser.h" +#include "../../input/pepxmlsaxhandler.h" -IdentificationPepXmlFile::IdentificationPepXmlFile(const QFileInfo & mascot_dat_file) : IdentificationDataSource(mascot_dat_file.absoluteFilePath()), _pep_xml_file(mascot_dat_file) +IdentificationPepXmlFile::IdentificationPepXmlFile( + const QFileInfo &mascot_dat_file) + : IdentificationDataSource(mascot_dat_file.absoluteFilePath()) + , _pep_xml_file(mascot_dat_file) { - _engine = IdentificationEngine::unknown; + _engine = IdentificationEngine::unknown; } -IdentificationPepXmlFile::IdentificationPepXmlFile(const IdentificationPepXmlFile& other) : IdentificationDataSource(other),_pep_xml_file (other._pep_xml_file) +IdentificationPepXmlFile::IdentificationPepXmlFile( + const IdentificationPepXmlFile &other) + : IdentificationDataSource(other), _pep_xml_file(other._pep_xml_file) { - _engine = IdentificationEngine::unknown; + _engine = IdentificationEngine::unknown; } IdentificationPepXmlFile::~IdentificationPepXmlFile() { - } -bool IdentificationPepXmlFile::operator==(const IdentificationPepXmlFile& other) const +bool +IdentificationPepXmlFile:: +operator==(const IdentificationPepXmlFile &other) const { - } -pappso::SpectrumSp IdentificationPepXmlFile::getSpectrumSp(unsigned int scan_number) const { - pappso::SpectrumSp spectrum_sp = IdentificationDataSource::getSpectrumSp(scan_number); - return spectrum_sp; +pappso::SpectrumSp +IdentificationPepXmlFile::getSpectrumSp(unsigned int scan_number) const +{ + pappso::SpectrumSp spectrum_sp = + IdentificationDataSource::getSpectrumSp(scan_number); + return spectrum_sp; } -void IdentificationPepXmlFile::parseTo(Project* p_project) { - qDebug() << __FILE__ << " " << __FUNCTION__ << " " << __LINE__; - - - MsRunSp msrun_sp = p_project->getMsRunStore().getInstance(QFileInfo(_pep_xml_file).baseName()); - setMsRunSp(msrun_sp); - std::vector<IdentificationGroup *> identification_list = p_project->getIdentificationGroupList(); - IdentificationGroup * identification_group_p = nullptr; - if (p_project->getProjectMode() == ProjectMode::combined) { - if (identification_list.size() == 0) { - identification_group_p = p_project->newIdentificationGroup(); +void +IdentificationPepXmlFile::parseTo(Project *p_project) +{ + qDebug() << __FILE__ << " " << __FUNCTION__ << " " << __LINE__; + + + MsRunSp msrun_sp = + p_project->getMsRunStore().getInstance(QFileInfo(_pep_xml_file).baseName()); + setMsRunSp(msrun_sp); + std::vector<IdentificationGroup *> identification_list = + p_project->getIdentificationGroupList(); + IdentificationGroup *identification_group_p = nullptr; + if(p_project->getProjectMode() == ProjectMode::combined) + { + if(identification_list.size() == 0) + { + identification_group_p = p_project->newIdentificationGroup(); } - else { - identification_group_p = identification_list[0]; + else + { + identification_group_p = identification_list[0]; } } - else { - for (IdentificationGroup * identification_p_flist : identification_list) { - if (identification_p_flist->containSample(msrun_sp.get()->getSampleName())) { - identification_group_p = identification_p_flist; - break; + else + { + for(IdentificationGroup *identification_p_flist : identification_list) + { + if(identification_p_flist->containSample( + msrun_sp.get()->getSampleName())) + { + identification_group_p = identification_p_flist; + break; } } - if (identification_group_p == nullptr) { - identification_group_p = p_project->newIdentificationGroup(); + if(identification_group_p == nullptr) + { + identification_group_p = p_project->newIdentificationGroup(); } } - identification_group_p->addIdentificationDataSourceP(this); - MascotDatParser mascot_parser(p_project, identification_group_p, this); + identification_group_p->addIdentificationDataSourceP(this); + + PepXmlSaxHandler *parser = + new PepXmlSaxHandler(p_project, identification_group_p, this); + + QXmlSimpleReader simplereader; + simplereader.setContentHandler(parser); + simplereader.setErrorHandler(parser); + + QFile qfile(_pep_xml_file.absoluteFilePath()); + QXmlInputSource xmlInputSource(&qfile); + + if(simplereader.parse(xmlInputSource)) + { + + qfile.close(); + } + else + { + qDebug() << parser->errorString(); + // throw PappsoException( + // QObject::tr("error reading tandem XML result file :\n").append( + // parser->errorString())); + + qfile.close(); + + throw pappso::PappsoException( + QObject::tr("Error reading %1 pep xml file :\n %2") + .arg(_pep_xml_file.absoluteFilePath()) + .arg(parser->errorString())); + } - QFile qfile(_pep_xml_file.absoluteFilePath()); - mascot_parser.parse(&qfile); - - qfile.close(); - qDebug() << __FILE__ << " " << __FUNCTION__ << " " << __LINE__; + qDebug() << __FILE__ << " " << __FUNCTION__ << " " << __LINE__; } diff --git a/src/core/peptideevidence.cpp b/src/core/peptideevidence.cpp index ee20bb878..5cfd69cf4 100644 --- a/src/core/peptideevidence.cpp +++ b/src/core/peptideevidence.cpp @@ -2,210 +2,315 @@ * \file utils/peptideevidence.cpp * \date 18/11/2017 * \author Olivier Langella - * \brief peptide evidence : a peptide sequence + spectrum + identification engine evaluation (psm) + * \brief peptide evidence : a peptide sequence + spectrum + identification + * engine evaluation (psm) */ /******************************************************************************* -* Copyright (c) 2017 Olivier Langella <Olivier.Langella@u-psud.fr>. -* -* This file is part of XTPcpp. -* -* XTPcpp is free software: you can redistribute it and/or modify -* it under the terms of the GNU General Public License as published by -* the Free Software Foundation, either version 3 of the License, or -* (at your option) any later version. -* -* XTPcpp is distributed in the hope that it will be useful, -* but WITHOUT ANY WARRANTY; without even the implied warranty of -* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -* GNU General Public License for more details. -* -* You should have received a copy of the GNU General Public License -* along with XTPcpp. If not, see <http://www.gnu.org/licenses/>. -* -* Contributors: -* Olivier Langella <Olivier.Langella@u-psud.fr> - initial API and implementation -******************************************************************************/ + * Copyright (c) 2017 Olivier Langella <Olivier.Langella@u-psud.fr>. + * + * This file is part of XTPcpp. + * + * XTPcpp is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * XTPcpp is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with XTPcpp. If not, see <http://www.gnu.org/licenses/>. + * + * Contributors: + * Olivier Langella <Olivier.Langella@u-psud.fr> - initial API and + *implementation + ******************************************************************************/ #include "peptideevidence.h" #include <pappsomspp/pappsoexception.h> std::hash<std::string> PeptideEvidence::_hash_fn; -PeptideEvidence::PeptideEvidence(MsRun * msrunid_sp, unsigned int scan) { - _msrunid_sp = msrunid_sp; - _scan = scan; +PeptideEvidence::PeptideEvidence(MsRun *msrunid_sp, unsigned int scan) +{ + _msrunid_sp = msrunid_sp; + _scan = scan; - _hash_sample_scan = PeptideEvidence::_hash_fn (QString("%1 %2").arg(msrunid_sp->getXmlId()).arg(_scan).toStdString()); + _hash_sample_scan = PeptideEvidence::_hash_fn( + QString("%1 %2").arg(msrunid_sp->getXmlId()).arg(_scan).toStdString()); + _identification_engine = IdentificationEngine::unknown; } -PeptideEvidence::PeptideEvidence(const PeptideEvidence & other): PeptideEvidence(other._msrunid_sp, other._scan) { - _hash_sample_scan = other._hash_sample_scan; - _sp_grp_peptide = other._sp_grp_peptide; - _peptide_sp = other._peptide_sp; - _rt = other._rt; - _evalue = other._evalue; - _exp_mass = other._exp_mass; - _charge= other._charge; - _p_identification_source = other._p_identification_source; - _params = other._params; - _checked = other._checked; - _proxy_valid = other._proxy_valid; - +PeptideEvidence::PeptideEvidence(const PeptideEvidence &other) + : PeptideEvidence(other._msrunid_sp, other._scan) +{ + _hash_sample_scan = other._hash_sample_scan; + _sp_grp_peptide = other._sp_grp_peptide; + _peptide_sp = other._peptide_sp; + _rt = other._rt; + _evalue = other._evalue; + _exp_mass = other._exp_mass; + _charge = other._charge; + _p_identification_source = other._p_identification_source; + _params = other._params; + _checked = other._checked; + _proxy_valid = other._proxy_valid; + _identification_engine = other._identification_engine; } -pappso::mz PeptideEvidence::getTheoreticalMz() const { - return (getPeptideXtpSp().get()->getMz(getCharge())); +pappso::mz +PeptideEvidence::getTheoreticalMz() const +{ + return (getPeptideXtpSp().get()->getMz(getCharge())); } -PeptideEvidenceSp PeptideEvidence::makePeptideEvidenceSp() const { - return std::make_shared<PeptideEvidence>(*this); +PeptideEvidenceSp +PeptideEvidence::makePeptideEvidenceSp() const +{ + return std::make_shared<PeptideEvidence>(*this); } -void PeptideEvidence::updateAutomaticFilters(const AutomaticFilterParameters & automatic_filter_parameters) { - _proxy_valid = false; +void +PeptideEvidence::updateAutomaticFilters( + const AutomaticFilterParameters &automatic_filter_parameters) +{ + _proxy_valid = false; - _proxy_valid = this->_p_identification_source->isValid(this, automatic_filter_parameters); + _proxy_valid = + this->_p_identification_source->isValid(this, automatic_filter_parameters); +} +void +PeptideEvidence::setRetentionTime(pappso::pappso_double rt) +{ + _rt = rt; } -void PeptideEvidence::setRetentionTime(pappso::pappso_double rt) { - _rt = rt; +void +PeptideEvidence::setEvalue(pappso::pappso_double evalue) +{ + _evalue = evalue; } -void PeptideEvidence::setEvalue(pappso::pappso_double evalue) { - _evalue = evalue; + + +void +PeptideEvidence::setIdentificationEngine( + IdentificationEngine identification_engine) +{ + _identification_engine = identification_engine; } /** \brief set specific parameter value */ -void PeptideEvidence::setParam(PeptideEvidenceParam param, const QVariant& value) { - _params.insert(std::pair<PeptideEvidenceParam, QVariant>(param, value)); -} -const QVariant PeptideEvidence::getParam(PeptideEvidenceParam param) const { - try { - return _params.at(param); +void +PeptideEvidence::setParam(PeptideEvidenceParam param, const QVariant &value) +{ + _params.insert(std::pair<PeptideEvidenceParam, QVariant>(param, value)); +} +const QVariant +PeptideEvidence::getParam(PeptideEvidenceParam param) const +{ + try + { + return _params.at(param); } - catch (std::out_of_range) { - return QVariant(); + catch(std::out_of_range) + { + return QVariant(); } } -const std::map<PeptideEvidenceParam, QVariant> & PeptideEvidence::getParamList() const { - return _params; +const std::map<PeptideEvidenceParam, QVariant> & +PeptideEvidence::getParamList() const +{ + return _params; } -std::size_t PeptideEvidence::getHashPeptideMassSample() const { - return PeptideEvidence::_hash_fn (QString("%1 %2").arg(_peptide_sp.get()->toAbsoluteString()).arg(_msrunid_sp->getXmlId()).toStdString()); +std::size_t +PeptideEvidence::getHashPeptideMassSample() const +{ + return PeptideEvidence::_hash_fn(QString("%1 %2") + .arg(_peptide_sp.get()->toAbsoluteString()) + .arg(_msrunid_sp->getXmlId()) + .toStdString()); } -std::size_t PeptideEvidence::getHashSampleScan() const { - return _hash_sample_scan; -} -pappso::pappso_double PeptideEvidence::getEvalue() const { - return _evalue; -} -pappso::pappso_double PeptideEvidence::getExperimentalMass() const { - return _exp_mass; -} -pappso::pappso_double PeptideEvidence::getExperimentalMhplus() const { - return _exp_mass + pappso::MHPLUS; +std::size_t +PeptideEvidence::getHashSampleScan() const +{ + return _hash_sample_scan; +} +pappso::pappso_double +PeptideEvidence::getEvalue() const +{ + return _evalue; +} +pappso::pappso_double +PeptideEvidence::getExperimentalMass() const +{ + return _exp_mass; +} +pappso::pappso_double +PeptideEvidence::getExperimentalMhplus() const +{ + return _exp_mass + pappso::MHPLUS; } -pappso::pappso_double PeptideEvidence::getExperimentalMz() const { - pappso::mz mz = _exp_mass; - for (unsigned int i=0; i < _charge; i++) { - mz+=pappso::MHPLUS; +pappso::pappso_double +PeptideEvidence::getExperimentalMz() const +{ + pappso::mz mz = _exp_mass; + for(unsigned int i = 0; i < _charge; i++) + { + mz += pappso::MHPLUS; } - mz = mz/_charge; - return mz; -} -void PeptideEvidence::setExperimentalMass(pappso::pappso_double exp_mass) { - _exp_mass =exp_mass; -} -pappso::mz PeptideEvidence::getDeltaMass() const { - return ((_exp_mass+pappso::MHPLUS) - _peptide_sp.get()->getMz(1)); -} -pappso::mz PeptideEvidence::getPpmDeltaMass() const { - //return (_peptide_sp.get()->getMz(1) - (_exp_mass+pappso::MHPLUS)); - pappso::pappso_double diff = getDeltaMass(); - while (diff > 0.5) { - diff = diff - pappso::DIFFC12C13; + mz = mz / _charge; + return mz; +} +void +PeptideEvidence::setExperimentalMass(pappso::pappso_double exp_mass) +{ + _exp_mass = exp_mass; +} +pappso::mz +PeptideEvidence::getDeltaMass() const +{ + return ((_exp_mass + pappso::MHPLUS) - _peptide_sp.get()->getMz(1)); +} +pappso::mz +PeptideEvidence::getPpmDeltaMass() const +{ + // return (_peptide_sp.get()->getMz(1) - (_exp_mass+pappso::MHPLUS)); + pappso::pappso_double diff = getDeltaMass(); + while(diff > 0.5) + { + diff = diff - pappso::DIFFC12C13; } - diff = (diff / getPeptideXtpSp().get()->getMz(1)) * pappso::ONEMILLION; - return diff; + diff = (diff / getPeptideXtpSp().get()->getMz(1)) * pappso::ONEMILLION; + return diff; } -void PeptideEvidence::setCharge(unsigned int charge) { - _charge =charge; +void +PeptideEvidence::setCharge(unsigned int charge) +{ + _charge = charge; } -void PeptideEvidence::setPeptideXtpSp (PeptideXtpSp peptide) { - _peptide_sp = peptide; +void +PeptideEvidence::setPeptideXtpSp(PeptideXtpSp peptide) +{ + _peptide_sp = peptide; } -void PeptideEvidence::setChecked(bool arg1) { - _checked = arg1; +void +PeptideEvidence::setChecked(bool arg1) +{ + _checked = arg1; } -ValidationState PeptideEvidence::getValidationState() const { - if (isGrouped()) { - return ValidationState::grouped; - } else if (isValidAndChecked()) { - return ValidationState::validAndChecked; - } else if (isValid()) { - return ValidationState::valid; +ValidationState +PeptideEvidence::getValidationState() const +{ + if(isGrouped()) + { + return ValidationState::grouped; + } + else if(isValidAndChecked()) + { + return ValidationState::validAndChecked; } - return ValidationState::notValid; + else if(isValid()) + { + return ValidationState::valid; + } + return ValidationState::notValid; } -bool PeptideEvidence::isValid() const { - return _proxy_valid; +bool +PeptideEvidence::isValid() const +{ + return _proxy_valid; } -bool PeptideEvidence::isChecked() const { - return _checked; +bool +PeptideEvidence::isChecked() const +{ + return _checked; } -bool PeptideEvidence::isValidAndChecked() const { - return _proxy_valid && _checked; +bool +PeptideEvidence::isValidAndChecked() const +{ + return _proxy_valid && _checked; } -bool PeptideEvidence::isGrouped() const { - if (_sp_grp_peptide.get() == nullptr) { - return false; +bool +PeptideEvidence::isGrouped() const +{ + if(_sp_grp_peptide.get() == nullptr) + { + return false; } - if (_sp_grp_peptide.get()->getGroupNumber() ==0) { - return false; + if(_sp_grp_peptide.get()->getGroupNumber() == 0) + { + return false; } - return true; + return true; } -void PeptideEvidence::setIdentificationDataSource(IdentificationDataSource* identification_source) { - _p_identification_source = identification_source; +void +PeptideEvidence::setIdentificationDataSource( + IdentificationDataSource *identification_source) +{ + _p_identification_source = identification_source; + + if(_identification_engine == IdentificationEngine::unknown) + { + _identification_engine = identification_source->getIdentificationEngine(); + } } -IdentificationDataSource* PeptideEvidence::getIdentificationDataSource () const { - return _p_identification_source; +IdentificationDataSource * +PeptideEvidence::getIdentificationDataSource() const +{ + return _p_identification_source; } -unsigned int PeptideEvidence::getScan() const { - return _scan; +unsigned int +PeptideEvidence::getScan() const +{ + return _scan; } -pappso::pappso_double PeptideEvidence::getRetentionTime() const { - return _rt; +pappso::pappso_double +PeptideEvidence::getRetentionTime() const +{ + return _rt; } -unsigned int PeptideEvidence::getCharge() const { - return _charge; +unsigned int +PeptideEvidence::getCharge() const +{ + return _charge; } -const PeptideXtpSp & PeptideEvidence::getPeptideXtpSp() const { - //if (_sp_grp_peptide.get() == nullptr) { - // throw pappso::PappsoException(QObject::tr("Peptide is null in %1 %2").arg(_msrunid_sp->getXmlId()).arg(this->getScan())); - //} - return _peptide_sp; +const PeptideXtpSp & +PeptideEvidence::getPeptideXtpSp() const +{ + // if (_sp_grp_peptide.get() == nullptr) { + // throw pappso::PappsoException(QObject::tr("Peptide is null in %1 + // %2").arg(_msrunid_sp->getXmlId()).arg(this->getScan())); + //} + return _peptide_sp; } -const MsRun * PeptideEvidence::getMsRunP() const { - return _msrunid_sp; +const MsRun * +PeptideEvidence::getMsRunP() const +{ + return _msrunid_sp; } -void PeptideEvidence::setGrpPeptideSp(const pappso::GrpPeptideSp & sp_grp_peptide) { - _sp_grp_peptide =sp_grp_peptide; +void +PeptideEvidence::setGrpPeptideSp(const pappso::GrpPeptideSp &sp_grp_peptide) +{ + _sp_grp_peptide = sp_grp_peptide; } -const pappso::GrpPeptideSp & PeptideEvidence::getGrpPeptideSp() const { - return _sp_grp_peptide; +const pappso::GrpPeptideSp & +PeptideEvidence::getGrpPeptideSp() const +{ + return _sp_grp_peptide; } - diff --git a/src/core/peptideevidence.h b/src/core/peptideevidence.h index 199310e33..628aaf09e 100644 --- a/src/core/peptideevidence.h +++ b/src/core/peptideevidence.h @@ -93,6 +93,12 @@ public : void setCharge(unsigned int charge); void setPeptideXtpSp (PeptideXtpSp peptide); void setIdentificationDataSource(IdentificationDataSource* identification_source); + + + /** @brief sets the identification engine + * by default, this is the identification engine of the datasource + */ + void setIdentificationEngine(IdentificationEngine identification_engine); void setChecked(bool arg1); bool isChecked() const; bool isValid() const; @@ -157,5 +163,9 @@ private : /** @brief automatic filter result (false by default) */ bool _proxy_valid = false; + + /** @brief the search/identification engine that found this evidence + */ + IdentificationEngine _identification_engine; }; #endif // PEPTIDEEVIDENCE_H diff --git a/src/input/pepxmlsaxhandler.cpp b/src/input/pepxmlsaxhandler.cpp index 0879a049f..a031df10c 100644 --- a/src/input/pepxmlsaxhandler.cpp +++ b/src/input/pepxmlsaxhandler.cpp @@ -34,27 +34,31 @@ #include "../core/peptideevidence.h" #include "../utils/peptidestore.h" #include "../utils/proteinstore.h" +#include "../utils/utils.h" -PepXmlSaxHandler::PepXmlSaxHandler(WorkMonitorInterface *p_monitor, - Project *p_project) + +PepXmlSaxHandler::PepXmlSaxHandler( + Project *p_project, IdentificationGroup *p_identification_group, + IdentificationDataSource *p_identification_data_source) : _p_project(p_project) { qDebug() << __FILE__ << " " << __FUNCTION__ << " " << __LINE__; - _p_monitor = p_monitor; - qDebug() << __FILE__ << " " << __FUNCTION__ << " " << __LINE__; + _p_identification_group = p_identification_group; + + _p_identification_data_source = p_identification_data_source; + _sp_msrun = p_identification_data_source->getMsRunSp(); } PepXmlSaxHandler::~PepXmlSaxHandler() { } - bool PepXmlSaxHandler::startElement(const QString &namespaceURI, const QString &localName, const QString &qName, const QXmlAttributes &attributes) { - qDebug() << __FILE__ << " " << __FUNCTION__ << " " << __LINE__; + // qDebug() << __FILE__ << " " << __FUNCTION__ << " " << __LINE__; _tag_stack.push_back(qName); bool is_ok = true; @@ -63,124 +67,58 @@ PepXmlSaxHandler::startElement(const QString &namespaceURI, // startElement_group if(_tag_stack.size() == 1) { - _is_xtpcpp_xpip = true; - if(qName != "xpip") + if(qName != "msms_pipeline_analysis") { - _is_xtpcpp_xpip = false; throw pappso::ExceptionNotFound( QObject::tr("ERROR this file is not a pep xml file %1") .arg(qName)); } } - else if(qName == "protein_match") - { - if(_count_total % 100 == 0) - { - _p_monitor->message(QString("reading protein matches %1 on %2") - .arg(_count_protein_matches) - .arg(_total_protein_matches), - _count_total); - } - is_ok = startElement_protein_match(attributes); - } - else if(qName == "peptide_match") + else if(qName == "msms_pipeline_analysis") { - is_ok = startElement_peptide_match(attributes); + is_ok = startElement_msms_pipeline_analysis(attributes); } - else if(qName == "protein") + else if(qName == "msms_run_summary") { - if(_count_total % 100 == 0) - { - _p_monitor->message(QString("reading proteins %1 on %2") - .arg(_count_proteins) - .arg(_total_proteins), - _count_total); - } - is_ok = startElement_protein(attributes); + is_ok = startElement_msms_run_summary(attributes); } - else if(qName == "identification_source") + else if(qName == "search_database") { - is_ok = startElement_identification_source(attributes); + is_ok = startElement_search_database(attributes); } - else if(qName == "param") + else if(qName == "search_summary") { - is_ok = startElement_param(attributes); + is_ok = startElement_search_summary(attributes); } - else if(qName == "stat") + else if(qName == "spectrum_query") { - is_ok = startElement_stat(attributes); + is_ok = startElement_spectrum_query(attributes); } - else if(qName == "identification_group") + else if(qName == "search_hit") { - is_ok = startElement_identification_group(attributes); + is_ok = startElement_search_hit(attributes); } - else if(qName == "peptide_evidence_list") + else if(qName == "alternative_protein") { - is_ok = startElement_peptide_evidence_list(attributes); + is_ok = startElement_alternative_protein(attributes); } - else if(qName == "counts") + + else if(qName == "peptideprophet_result") { - is_ok = startElement_counts(attributes); + is_ok = startElement_peptideprophet_result(attributes); } - else if(qName == "label_method") + else if(qName == "interprophet_result") { - is_ok = startElement_label_method(attributes); + is_ok = startElement_interprophet_result(attributes); } //<sample value="P6_08_10"/> - else if(qName == "msrun") - { - is_ok = startElement_msrun(attributes); - } - else if(qName == "peptide") - { - if(_count_total % 100 == 0) - { - _p_monitor->message(QObject::tr("reading peptide %1 on %2") - .arg(_count_peptides) - .arg(_total_peptides), - _count_total); - } - is_ok = startElement_peptide(attributes); - } - else if(qName == "peptide_evidence") - { - if(_count_total % 100 == 0) - { - _p_monitor->message( - QObject::tr("reading peptide evidence %1 on %2") - .arg(_count_peptide_evidences) - .arg(_total_peptide_evidences), - _count_total); - } - is_ok = startElement_peptide_evidence(attributes); - } - else if(qName == "modification") - { - is_ok = startElement_modification(attributes); - } - else if(qName == "mod") - { - is_ok = startElement_mod(attributes); - } - else if(qName == "filter_params") - { - is_ok = startElement_filter_params(attributes); - } - else if(qName == "description") - { - is_ok = startElement_description(attributes); - } - else if(qName == "fasta_file") - { - is_ok = startElement_fasta_file(attributes); - } - else if(qName == "contaminants") + else if(qName == "search_score") { - is_ok = startElement_contaminants(attributes); + is_ok = startElement_search_score(attributes); } - else if(qName == "decoys") + else if(qName == "mod_aminoacid_mass") { - is_ok = startElement_decoys(attributes); + is_ok = startElement_mod_aminoacid_mass(attributes); } _current_text.clear(); @@ -210,46 +148,18 @@ PepXmlSaxHandler::endElement(const QString &namespaceURI, const QString &localName, const QString &qName) { - qDebug() << __FILE__ << " " << __FUNCTION__ << " " << __LINE__; + // qDebug() << __FILE__ << " " << __FUNCTION__ << " " << __LINE__; bool is_ok = true; // endElement_peptide_list try { - if(qName == "protein") - { - is_ok = endElement_protein(); - } - else if(qName == "peptide") - { - is_ok = endElement_peptide(); - } - else if(qName == "msrun") - { - is_ok = endElement_msrun(); - } - else if(qName == "sequence") - { - is_ok = endElement_sequence(); - } - else if(qName == "protein_match") - { - is_ok = endElement_protein_match(); - } - else if(qName == "peptide_evidence") - { - is_ok = endElement_peptide_evidence(); - } - else if(qName == "peptide_evidence_list") - { - is_ok = endElement_peptide_evidence_list(); - } - else if(qName == "identification_group") + if(qName == "search_hit") { - is_ok = endElement_identification_group(); + is_ok = endElement_search_hit(); } - else if(qName == "identification_source") + else if(qName == "modification_info") { - is_ok = endElement_identification_source(); + is_ok = endElement_modification_info(); } // end of detection_moulon @@ -289,7 +199,9 @@ PepXmlSaxHandler::endElement(const QString &namespaceURI, bool PepXmlSaxHandler::startElement_msms_pipeline_analysis(QXmlAttributes attributes) { + bool is_ok = true; QString original_filename = attributes.value("summary_xml"); + return is_ok; } // <msms_run_summary @@ -299,17 +211,31 @@ PepXmlSaxHandler::startElement_msms_pipeline_analysis(QXmlAttributes attributes) bool PepXmlSaxHandler::startElement_msms_run_summary(QXmlAttributes attributes) { + bool is_ok = true; QString mz_datafile = QString("%1%2") .arg(attributes.value("base_name")) .arg(attributes.value("raw_data")); + return is_ok; } // <search_database // local_path="/gorgone/pappso/abrf_2015/fasta/iPRG2015.fasta" type="AA"/> +//<search_database +// local_path="/gorgone/pappso/abrf_2015/fasta/iPRG2015.TargDecoy.fasta" +// database_name="SearchDB_1" size_in_db_entries="13256" type="NA"/> + bool PepXmlSaxHandler::startElement_search_database(QXmlAttributes attributes) { - QString fastafile = attributes.value("local_path"); + bool is_ok = true; + FastaFile fasta_file(attributes.value("local_path")); + if(!attributes.value("database_name").isEmpty()) + { + fasta_file.setXmlId(attributes.value("database_name")); + } + _p_identification_data_source->addFastaFile( + _p_project->getFastaFileStore().getInstance(fasta_file)); + return is_ok; } // <search_summary @@ -319,7 +245,9 @@ PepXmlSaxHandler::startElement_search_database(QXmlAttributes attributes) bool PepXmlSaxHandler::startElement_search_summary(QXmlAttributes attributes) { + bool is_ok = true; _current_search_engine = attributes.value("search_engine"); + return is_ok; } // <spectrum_query spectrum="JD_06232014_sample1-A.00005.00005.2" @@ -331,36 +259,67 @@ PepXmlSaxHandler::startElement_search_summary(QXmlAttributes attributes) bool PepXmlSaxHandler::startElement_spectrum_query(QXmlAttributes attributes) { + bool is_ok = true; unsigned int start_scan = attributes.value("start_scan").toUInt(); unsigned int end_scan = attributes.value("end_scan").toUInt(); if(start_scan != end_scan) { - String message = "ERROR reading pepxml file :\n" + - "unable to read search results from '" + - this.currentSearchEngine + "' as start " + - currentStartScan + " and end " + currentEndScan + - " scans are different"; - logger.error(message); - throw new MSMSException(message); + throw pappso::PappsoException( + QObject::tr("ERROR reading pepxml file :\nunable to read search " + "results from '%1' as start %2 and end %3 scans are " + "different") + .arg(_current_search_engine) + .arg(start_scan) + .arg(end_scan)); } - currentZ = new Integer(attrs.getValue("assumed_charge")); - if(attrs.getValue("retention_time_sec") == null) + _current_charge = attributes.value("assumed_charge").toUInt(); + if(attributes.value("retention_time_sec").isEmpty()) { - String message = - "ERROR reading pepxml file :\n" + - "unable to read search results from '" + this.currentSearchEngine + - "' as retention time is not given in spectrum_query elements"; - logger.warn(message); + QString message = + QObject::tr("ERROR reading pepxml file :\n" + "unable to read search results from '%1' as retention time " + "is not given in spectrum_query elements") + .arg(_current_search_engine); + qDebug() << message; // throw new MSMSException(message); - currentRt = (double)0; + _current_retention_time = 0; } else { - currentRt = new Double(attrs.getValue("retention_time_sec")); + _current_retention_time = + attributes.value("retention_time_sec").toDouble(); } - currentPrecursorNeutralMass = - new Double(attrs.getValue("precursor_neutral_mass")) + Utils.mhplus; + _current_precursor_neutral_mass = + attributes.value("precursor_neutral_mass").toDouble(); + return is_ok; +} + +//<alternative_protein protein="sp|P46784|RS10B_YEAST" protein_descr="40S +// ribosomal protein S10-B OS=Saccharomyces cerevisiae (strain ATCC 204508 +// \ +//S288c) GN=RPS10B PE=1 SV=1" num_tol_term="2" peptide_prev_aa="K" +// peptide_next_aa="N"/> +bool +PepXmlSaxHandler::startElement_alternative_protein(QXmlAttributes attributes) +{ + bool is_ok = true; + ProteinXtpSp sp_xtp_protein = ProteinXtp().makeProteinXtpSp(); + sp_xtp_protein.get()->setAccession(attributes.value("protein")); + sp_xtp_protein.get()->setDescription(attributes.value("protein_descr")); + sp_xtp_protein.get()->setFastaFileP( + _p_identification_data_source->getFastaFileList()[0].get()); + + sp_xtp_protein = _p_project->getProteinStore().getInstance(sp_xtp_protein); + + _p_protein_match_list.push_back( + _p_identification_group->getProteinMatchInstance( + sp_xtp_protein.get()->getAccession())); + + _p_protein_match_list.back()->setProteinXtpSp(sp_xtp_protein); + + _p_protein_match_list.back()->setChecked(true); + return is_ok; } // <search_hit hit_rank="5" peptide="MKDFSTK" peptide_prev_aa="K" @@ -371,81 +330,115 @@ PepXmlSaxHandler::startElement_spectrum_query(QXmlAttributes attributes) bool PepXmlSaxHandler::startElement_search_hit(QXmlAttributes attributes) { - QString proteinName = attributes.value("protein"); + bool is_ok = true; + _p_protein_match_list.clear(); + //_current_protein.setAccession(attributes.value("protein")); + ProteinXtpSp sp_xtp_protein = ProteinXtp().makeProteinXtpSp(); + sp_xtp_protein.get()->setAccession(attributes.value("protein")); + sp_xtp_protein.get()->setDescription(attributes.value("protein_descr")); + sp_xtp_protein.get()->setFastaFileP( + _p_identification_data_source->getFastaFileList()[0].get()); + + sp_xtp_protein = _p_project->getProteinStore().getInstance(sp_xtp_protein); + + _p_protein_match_list.push_back( + _p_identification_group->getProteinMatchInstance( + sp_xtp_protein.get()->getAccession())); + + _p_protein_match_list[0]->setProteinXtpSp(sp_xtp_protein); + + _p_protein_match_list[0]->setChecked(true); + + _current_peptide_sp = + PeptideXtp(attributes.value("peptide").simplified()).makePeptideXtpSp(); + + + _p_peptide_evidence = new PeptideEvidence(_sp_msrun.get(), _scan); + + _p_peptide_evidence->setRetentionTime(_current_retention_time); + _p_peptide_evidence->setCharge(_current_charge); + + pappso::pappso_double xtandem_mhtheoretical = + attributes.value("mh").simplified().toDouble(); + pappso::pappso_double xtandem_delta = + attributes.value("delta").simplified().toDouble(); + + // delta – the spectrum mh minus the calculated mh + + // exp mass computed from X!Tandem mh : - Protein protein = new Protein(); - protein.set_description(proteinName); - if(identification.contain_protein(protein)) + pappso::pappso_double _mass_obser = + attributes.value("calc_neutral_pep_mass").toDouble() + + attributes.value("massdiff").toDouble(); + + + _p_peptide_evidence->setExperimentalMass(_mass_obser); + + _p_peptide_evidence->setIdentificationDataSource( + _p_identification_data_source); + + IdentificationEngine search_engine = IdentificationEngine::unknown; + if(_current_search_engine == "X! Tandem (k-score)") { - // new protein, new match - currentMatch = new Match(); - currentMatch.set_protein_match(protein); - identification.add_protein(protein, currentMatch); + // search_engine="X! Tandem (k-score)" + search_engine = IdentificationEngine::XTandem; } - - else + else if(_current_search_engine == "OMSSA") + { + search_engine = IdentificationEngine::OMSSA; + } + else if(_current_search_engine == "Comet") { - // get existing match for this protein - currentMatch = (Match)identification.get_match(protein); + search_engine = IdentificationEngine::Comet; + } + else if(_current_search_engine == "MS-GF+") + { + search_engine = IdentificationEngine::MSGFplus; } - protein = currentMatch.get_protein_match(); - protein.setDatabase( - identification.getDatabaseSet().getInstance(this.fastaFile.getName())); - this.currentPeptide = new PeptideProphet(); - currentPeptide.set_sample_id( - identification.getMsRunSet().getInstance(identificationDataSource), - identificationDataSource); + if(search_engine == IdentificationEngine::unknown) + { + throw pappso::PappsoException( + QObject::tr( + "ERROR reading pepxml file :\nsearch engine %1 is not known.") + .arg(_current_search_engine)); + } - _current_peptide_sp = - PeptideXtp(attributes.value("peptide").simplified()).makePeptideXtpSp(); + _p_peptide_evidence->setIdentificationEngine(search_engine); + _p_peptide_evidence->setChecked(true); - currentPeptide.set_pre(attrs.getValue("peptide_prev_aa")); - currentPeptide.set_post(attrs.getValue("peptide_next_aa")); - - currentPeptide.set_scan(this.currentEndScan.intValue()); - Double calc_neutral_pep_mass = - new Double(attrs.getValue("calc_neutral_pep_mass")) + Utils.mhplus; - Double massdiff = new Double(attrs.getValue("massdiff")); - - currentPeptide.set_mhplus_theo(calc_neutral_pep_mass); - currentPeptide.set_deltamass(massdiff); - currentPeptide.set_mhplus_obser(currentPrecursorNeutralMass); - currentPeptide.set_charge(this.currentZ); - currentPeptide.setRt(this.currentRt.floatValue()); - - /* - * if (Math.abs(massdiff) > 1) { String message = - * "massdiff is too high"; logger.error(message); throw new - * MSMSException(message); } - */ - // valeur specifique de la séquence matche - // peptide.set_start(Integer.parseInt(attrs.getValue("start"))); - // peptide.set_stop(Integer.parseInt(attrs.getValue("end"))); - // peptide.set_evalue(Float.valueOf(attrs.getValue("expect"))); - // peptide.set_hypercorr(Float.valueOf(attrs.getValue("hyperscore"))); + return is_ok; } bool -XtpXpipSaxHandler::endElement_search_hit() +PepXmlSaxHandler::endElement_search_hit() { - if(currentPeptideProphetProbability != null) + + bool is_ok = true; + _current_peptide_sp = + _p_project->getPeptideStore().getInstance(_current_peptide_sp); + + _p_peptide_evidence->setPeptideXtpSp(_current_peptide_sp); + + _current_peptide_match.setPeptideEvidenceSp( + _p_peptide_evidence->getIdentificationDataSource() + ->getPeptideEvidenceStore() + .getInstance(_p_peptide_evidence)); + if(_p_protein_match_list.size() == 0) { - currentPeptide.setProphetProbability(currentPeptideProphetProbability); + throw pappso::PappsoException( + QObject::tr("ERROR " + "PepXmlSaxHandler::endElement_search_hit:\n_p_protein_" + "match_list.size() == 0")); } - if(currentPeptideInterProphetProbability != null) + + for(auto &protein_match : _p_protein_match_list) { - currentPeptide.setInterProphetProbability( - currentPeptideInterProphetProbability); + protein_match->addPeptideMatch(_current_peptide_match); } - currentPeptide.setSearchEngine(this.currentSearchEngine); - identification.add_peptide(this.currentPeptide); - currentMatch.add_peptide_match(currentPeptide); - - currentPeptideProphetProbability = null; - currentPeptideInterProphetProbability = null; - currentExpectedValue = null; + delete _p_peptide_evidence; + return is_ok; } // <peptideprophet_result probability="0.0245" @@ -453,8 +446,11 @@ XtpXpipSaxHandler::endElement_search_hit() bool PepXmlSaxHandler::startElement_peptideprophet_result(QXmlAttributes attributes) { - this.currentPeptideProphetProbability = - Double.parseDouble(attrs.getValue("probability")); + bool is_ok = true; + _p_peptide_evidence->setParam( + PeptideEvidenceParam::peptide_prophet_probability, + attributes.value("probability").toDouble()); + return is_ok; } // <interprophet_result probability="0.00886064" @@ -462,8 +458,11 @@ PepXmlSaxHandler::startElement_peptideprophet_result(QXmlAttributes attributes) bool PepXmlSaxHandler::startElement_interprophet_result(QXmlAttributes attributes) { - this.currentPeptideInterProphetProbability = - Double.parseDouble(attrs.getValue("probability")); + bool is_ok = true; + _p_peptide_evidence->setParam( + PeptideEvidenceParam::peptide_inter_prophet_probability, + attributes.value("probability").toDouble()); + return is_ok; } /* @@ -475,21 +474,28 @@ PepXmlSaxHandler::startElement_interprophet_result(QXmlAttributes attributes) bool PepXmlSaxHandler::startElement_search_score(QXmlAttributes attributes) { - String name = attrs.getValue("name"); - String valueStr = attrs.getValue("value"); + bool is_ok = true; + QString name = attributes.value("name"); + QString valueStr = attributes.value("value"); if(!valueStr.isEmpty()) { - if(name.equals("expect")) + if(name == "expect") { - currentExpectedValue = new Double(valueStr); - currentPeptide.set_evalue(currentExpectedValue.floatValue()); + _p_peptide_evidence->setEvalue(valueStr.simplified().toDouble()); + } + if(name == "EValue") + { + _p_peptide_evidence->setEvalue(valueStr.simplified().toDouble()); } // <search_score name="hyperscore" value="232"/> - if(name.equals("hyperscore")) + if(name == "hyperscore") { - currentPeptide.set_hypercorr(Float.parseFloat(valueStr)); + _p_peptide_evidence->setParam( + PeptideEvidenceParam::tandem_hyperscore, + QVariant(attributes.value("hyperscore").toDouble())); } } + return is_ok; } // <modification_info modified_peptide="SQRDCR"> <mod_aminoacid_mass @@ -497,59 +503,74 @@ PepXmlSaxHandler::startElement_search_score(QXmlAttributes attributes) bool PepXmlSaxHandler::startElement_mod_aminoacid_mass(QXmlAttributes attributes) { - double massFloat = attributes.value("mass").toDouble(); - int position = attributes.value("position").toUInt(); - String aa = _current_peptide_sp.get()->getSequence().substring(position - 1, position); - Float massDiff = massFloat - Utils.getAaMass(aa.charAt(0)); - - this.currentPeptide.set_Modifs(aa, position - 1, massDiff); - // modifs liste for isotopic analysis - identification.add_modifs_to_liste(new Modifs(massFloat)); - - pappso::AaModificationP modif = Utils::guessAaModificationPbyMonoisotopicMassDelta(attributes.value("modified").simplified().toDouble()); - -pappso::AaModificationP modif = pappso::AaModification::getInstance(attributes.value("mass").simplified()); - unsigned int position = attributes.value("position").simplified().toUInt(); - _current_peptide_sp.get()->addAaModification(modif, position-1); + bool is_ok = true; + double mass = attributes.value("mass").toDouble(); + unsigned int position = attributes.value("position").toUInt() - 1; + const pappso::Aa &aa = _current_peptide_sp.get()->getConstAa(position); + double mass_modif = mass - aa.getMass(); + + pappso::AaModificationP modif = + Utils::guessAaModificationPbyMonoisotopicMassDelta(mass_modif); + + _current_peptide_sp.get()->addAaModification(modif, position); + return is_ok; } bool PepXmlSaxHandler::endElement_modification_info() { + bool is_ok = true; + return is_ok; } -bool PepXmlSaxHandler::error(const QXmlParseException &exception) { - _errorStr = QObject::tr("Parse error at line %1, column %2 :\n" - "%3").arg(exception.lineNumber()).arg(exception.columnNumber()).arg( - exception.message()); +bool +PepXmlSaxHandler::error(const QXmlParseException &exception) +{ + _errorStr = QObject::tr("Parse error at line %1, column %2 :\n" + "%3") + .arg(exception.lineNumber()) + .arg(exception.columnNumber()) + .arg(exception.message()); - return false; + return false; } -bool PepXmlSaxHandler::fatalError(const QXmlParseException &exception) { - _errorStr = QObject::tr("Parse error at line %1, column %2 :\n" - "%3").arg(exception.lineNumber()).arg(exception.columnNumber()).arg( - exception.message()); - return false; +bool +PepXmlSaxHandler::fatalError(const QXmlParseException &exception) +{ + _errorStr = QObject::tr("Parse error at line %1, column %2 :\n" + "%3") + .arg(exception.lineNumber()) + .arg(exception.columnNumber()) + .arg(exception.message()); + return false; } -QString PepXmlSaxHandler::errorString() const { - return _errorStr; +QString +PepXmlSaxHandler::errorString() const +{ + return _errorStr; } -bool PepXmlSaxHandler::endDocument() { - return true; +bool +PepXmlSaxHandler::endDocument() +{ + return true; } -bool PepXmlSaxHandler::startDocument() { - return true; +bool +PepXmlSaxHandler::startDocument() +{ + return true; } -bool PepXmlSaxHandler::characters(const QString &str) { - _current_text += str; - return true; +bool +PepXmlSaxHandler::characters(const QString &str) +{ + _current_text += str; + return true; } diff --git a/src/input/pepxmlsaxhandler.h b/src/input/pepxmlsaxhandler.h index 2892c9859..ed38bc289 100644 --- a/src/input/pepxmlsaxhandler.h +++ b/src/input/pepxmlsaxhandler.h @@ -7,24 +7,24 @@ /******************************************************************************* -* Copyright (c) 2018 Olivier Langella <Olivier.Langella@u-psud.fr>. -* -* This file is part of XTPcpp. -* -* XTPcpp is free software: you can redistribute it and/or modify -* it under the terms of the GNU General Public License as published by -* the Free Software Foundation, either version 3 of the License, or -* (at your option) any later version. -* -* XTPcpp is distributed in the hope that it will be useful, -* but WITHOUT ANY WARRANTY; without even the implied warranty of -* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -* GNU General Public License for more details. -* -* You should have received a copy of the GNU General Public License -* along with XTPcpp. If not, see <http://www.gnu.org/licenses/>. -* -******************************************************************************/ + * Copyright (c) 2018 Olivier Langella <Olivier.Langella@u-psud.fr>. + * + * This file is part of XTPcpp. + * + * XTPcpp is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * XTPcpp is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with XTPcpp. If not, see <http://www.gnu.org/licenses/>. + * + ******************************************************************************/ #ifndef PEPXMLSAXHANDLER_H @@ -42,8 +42,8 @@ class PepXmlSaxHandler : public QXmlDefaultHandler { public: PepXmlSaxHandler(Project *p_project, - IdentificationGroup *p_identification_group, - IdentificationDataSource *p_identification_data_source); + IdentificationGroup *p_identification_group, + IdentificationDataSource *p_identification_data_source); ~PepXmlSaxHandler(); bool startElement(const QString &namespaceURI, const QString &localName, @@ -66,13 +66,18 @@ class PepXmlSaxHandler : public QXmlDefaultHandler private: bool startElement_msms_pipeline_analysis(QXmlAttributes attrs); - bool startElement_protein(QXmlAttributes attributes); - bool startElement_note(QXmlAttributes attributes); - bool startElement_file(QXmlAttributes attributes); - bool startElement_aa(QXmlAttributes attributes); - bool startElement_domain(QXmlAttributes attributes); - bool endElement_domain(); - bool endElement_note(); + bool startElement_msms_run_summary(QXmlAttributes attributes); + bool startElement_search_database(QXmlAttributes attributes); + bool startElement_search_summary(QXmlAttributes attributes); + bool startElement_spectrum_query(QXmlAttributes attributes); + bool startElement_search_hit(QXmlAttributes attributes); + bool startElement_alternative_protein(QXmlAttributes attributes); + bool startElement_peptideprophet_result(QXmlAttributes attributes); + bool startElement_interprophet_result(QXmlAttributes attributes); + bool startElement_search_score(QXmlAttributes attributes); + bool startElement_mod_aminoacid_mass(QXmlAttributes attributes); + bool endElement_search_hit(); + bool endElement_modification_info(); private: std::vector<QString> _tag_stack; @@ -84,22 +89,21 @@ class PepXmlSaxHandler : public QXmlDefaultHandler IdentificationDataSource *_p_identification_data_source; MsRunSp _sp_msrun; - ProteinMatch *_p_protein_match; + std::vector<ProteinMatch *> _p_protein_match_list; PeptideEvidence *_p_peptide_evidence; PeptideMatch _current_peptide_match; - ProteinXtp _current_protein; PeptideXtpSp _current_peptide_sp; QMap<QString, pappso::AaModificationP> _map_massstr_aamod; - QString _current_group_label; + QString _current_search_engine; QString _current_group_type; QString _current_note_label; QString _current_note_type; unsigned int _scan; - pappso::pappso_double _mhplus_obser; - unsigned int _charge; - pappso::pappso_double _retention_time; + unsigned int _current_charge; + pappso::pappso_double _current_retention_time; + pappso::pappso_double _current_precursor_neutral_mass; bool _is_protein_description = false; }; diff --git a/src/input/xtandemsaxhandler.cpp b/src/input/xtandemsaxhandler.cpp index cd3bc25c0..a7fd35aea 100644 --- a/src/input/xtandemsaxhandler.cpp +++ b/src/input/xtandemsaxhandler.cpp @@ -7,26 +7,27 @@ /******************************************************************************* -* Copyright (c) 2017 Olivier Langella <Olivier.Langella@u-psud.fr>. -* -* This file is part of XTPcpp. -* -* XTPcpp is free software: you can redistribute it and/or modify -* it under the terms of the GNU General Public License as published by -* the Free Software Foundation, either version 3 of the License, or -* (at your option) any later version. -* -* XTPcpp is distributed in the hope that it will be useful, -* but WITHOUT ANY WARRANTY; without even the implied warranty of -* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -* GNU General Public License for more details. -* -* You should have received a copy of the GNU General Public License -* along with XTPcpp. If not, see <http://www.gnu.org/licenses/>. -* -* Contributors: -* Olivier Langella <Olivier.Langella@u-psud.fr> - initial API and implementation -******************************************************************************/ + * Copyright (c) 2017 Olivier Langella <Olivier.Langella@u-psud.fr>. + * + * This file is part of XTPcpp. + * + * XTPcpp is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * XTPcpp is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with XTPcpp. If not, see <http://www.gnu.org/licenses/>. + * + * Contributors: + * Olivier Langella <Olivier.Langella@u-psud.fr> - initial API and + *implementation + ******************************************************************************/ #include "xtandemsaxhandler.h" @@ -38,555 +39,721 @@ #include "../utils/utils.h" -XtandemSaxHandler::XtandemSaxHandler(Project * p_project, IdentificationGroup * p_identification_group, - IdentificationDataSource * p_identification_data_source):_p_project(p_project) +XtandemSaxHandler::XtandemSaxHandler( + Project *p_project, IdentificationGroup *p_identification_group, + IdentificationDataSource *p_identification_data_source) + : _p_project(p_project) { - _p_identification_group = p_identification_group; + _p_identification_group = p_identification_group; - _p_identification_data_source =p_identification_data_source; - _sp_msrun = p_identification_data_source->getMsRunSp(); + _p_identification_data_source = p_identification_data_source; + _sp_msrun = p_identification_data_source->getMsRunSp(); } XtandemSaxHandler::~XtandemSaxHandler() { - } -bool XtandemSaxHandler::startElement(const QString & namespaceURI, const QString & localName, - const QString & qName, const QXmlAttributes & attributes) { - // qDebug() << namespaceURI << " " << localName << " " << qName ; - _tag_stack.push_back(qName); - bool is_ok = true; - - try { - //startElement_group - if (qName == "group") { - is_ok = startElement_group(attributes); - } else if (qName == "protein") { - is_ok = startElement_protein(attributes); - } else if (qName == "note") { - is_ok = startElement_note(attributes); +bool +XtandemSaxHandler::startElement(const QString &namespaceURI, + const QString &localName, const QString &qName, + const QXmlAttributes &attributes) +{ + // qDebug() << namespaceURI << " " << localName << " " << qName ; + _tag_stack.push_back(qName); + bool is_ok = true; + + try + { + // startElement_group + if(qName == "group") + { + is_ok = startElement_group(attributes); } - else if (qName == "file") { - is_ok = startElement_file(attributes); - - } else if (qName == "aa") { - is_ok = startElement_aa(attributes); - } else if (qName == "domain") { - is_ok = startElement_domain(attributes); + else if(qName == "protein") + { + is_ok = startElement_protein(attributes); + } + else if(qName == "note") + { + is_ok = startElement_note(attributes); + } + else if(qName == "file") + { + is_ok = startElement_file(attributes); + } + else if(qName == "aa") + { + is_ok = startElement_aa(attributes); + } + else if(qName == "domain") + { + is_ok = startElement_domain(attributes); } - _current_text.clear(); + _current_text.clear(); } - catch (pappso::PappsoException & exception_pappso) { - _errorStr = QObject::tr("ERROR in XtandemSaxHandler::startElement tag %1, PAPPSO exception:\n%2").arg(qName).arg(exception_pappso.qwhat()); - return false; + catch(pappso::PappsoException &exception_pappso) + { + _errorStr = QObject::tr("ERROR in XtandemSaxHandler::startElement tag " + "%1, PAPPSO exception:\n%2") + .arg(qName) + .arg(exception_pappso.qwhat()); + return false; } - catch (std::exception & exception_std) { - _errorStr = QObject::tr("ERROR in XtandemSaxHandler::startElement tag %1, std exception:\n%2").arg(qName).arg(exception_std.what()); - return false; + catch(std::exception &exception_std) + { + _errorStr = + QObject::tr( + "ERROR in XtandemSaxHandler::startElement tag %1, std exception:\n%2") + .arg(qName) + .arg(exception_std.what()); + return false; } - return is_ok; + return is_ok; } -bool XtandemSaxHandler::endElement(const QString & namespaceURI, const QString & localName, - const QString & qName) { +bool +XtandemSaxHandler::endElement(const QString &namespaceURI, + const QString &localName, const QString &qName) +{ - bool is_ok = true; - // endElement_peptide_list - try { - if (qName == "note") + bool is_ok = true; + // endElement_peptide_list + try + { + if(qName == "note") { - is_ok = endElement_note(); - } else if (qName == "domain") + is_ok = endElement_note(); + } + else if(qName == "domain") { - is_ok = endElement_domain(); + is_ok = endElement_domain(); } - // end of detection_moulon - // else if ((_tag_stack.size() > 1) && - // (_tag_stack[_tag_stack.size() - 2] == "detection_moulon")) + // end of detection_moulon + // else if ((_tag_stack.size() > 1) && + // (_tag_stack[_tag_stack.size() - 2] == "detection_moulon")) } - catch (pappso::PappsoException & exception_pappso) { - _errorStr = QObject::tr("ERROR in XtandemSaxHandler::endElement tag %1, PAPPSO exception:\n%2").arg(qName).arg(exception_pappso.qwhat()); - return false; + catch(pappso::PappsoException &exception_pappso) + { + _errorStr = QObject::tr("ERROR in XtandemSaxHandler::endElement tag %1, " + "PAPPSO exception:\n%2") + .arg(qName) + .arg(exception_pappso.qwhat()); + return false; } - catch (std::exception & exception_std) { - _errorStr = QObject::tr("ERROR in XtandemSaxHandler::endElement tag %1, std exception:\n%2").arg(qName).arg(exception_std.what()); - return false; + catch(std::exception &exception_std) + { + _errorStr = + QObject::tr( + "ERROR in XtandemSaxHandler::endElement tag %1, std exception:\n%2") + .arg(qName) + .arg(exception_std.what()); + return false; } - _current_text.clear(); - _tag_stack.pop_back(); + _current_text.clear(); + _tag_stack.pop_back(); - return is_ok; + return is_ok; } -bool XtandemSaxHandler::startElement_group(QXmlAttributes attrs) { - //<group id="1976" mh="1120.529471" z="2" rt="PT667.022S" expect="9.7e-04" label="GRMZM2G083841_P01 P04711 Phosphoenolpyruvate carboxylase 1 (PEPCase 1)(PEPC 1)(EC..." type="model" sumI="5.34" maxI="35986.9" fI="359.869" act="0" > - bool is_ok = true; - // logger.debug("startElementgroup begin"); - // <group label="performance parameters" type="parameters"> - _current_group_label = attrs.value("label"); - _current_group_type = attrs.value("type"); - if (_current_group_type == "model") { - _scan = attrs.value("id").toUInt(); - _mhplus_obser = attrs.value("mh").toDouble(); - _charge = attrs.value("z").toUInt(); - _retention_time = attrs.value("rt").replace("PT","").replace("S","").toDouble(); +bool +XtandemSaxHandler::startElement_group(QXmlAttributes attrs) +{ + //<group id="1976" mh="1120.529471" z="2" rt="PT667.022S" expect="9.7e-04" + //label="GRMZM2G083841_P01 P04711 Phosphoenolpyruvate carboxylase 1 (PEPCase + //1)(PEPC 1)(EC..." type="model" sumI="5.34" maxI="35986.9" fI="359.869" + //act="0" > + bool is_ok = true; + // logger.debug("startElementgroup begin"); + // <group label="performance parameters" type="parameters"> + _current_group_label = attrs.value("label"); + _current_group_type = attrs.value("type"); + if(_current_group_type == "model") + { + _scan = attrs.value("id").toUInt(); + _mhplus_obser = attrs.value("mh").toDouble(); + _charge = attrs.value("z").toUInt(); + _retention_time = + attrs.value("rt").replace("PT", "").replace("S", "").toDouble(); } - //label="input parameters" type="parameters" - return is_ok; + // label="input parameters" type="parameters" + return is_ok; } -bool XtandemSaxHandler::startElement_note(QXmlAttributes attributes) { -//<note label="description">GRMZM2G083841_P01 P04711 Phosphoenolpyruvate carboxylase 1 (PEPCase 1)(PEPC 1)(EC //4.1.1.31) seq=translation; coord=9:61296279..61301686:1; parent_transcript=GRMZM2G083841_T01; - ////parent_gene=GRMZM2G083841</note> - bool is_ok = true; - _current_note_label = attributes.value("label"); - _current_note_type = attributes.value("type"); - _is_protein_description = false; - if (attributes.value("label") == "description") { - if (_tag_stack[_tag_stack.size() - 2] == "protein") { - _is_protein_description = true; +bool +XtandemSaxHandler::startElement_note(QXmlAttributes attributes) +{ + //<note label="description">GRMZM2G083841_P01 P04711 Phosphoenolpyruvate + //carboxylase 1 (PEPCase 1)(PEPC 1)(EC //4.1.1.31) seq=translation; + //coord=9:61296279..61301686:1; parent_transcript=GRMZM2G083841_T01; + ////parent_gene=GRMZM2G083841</note> + bool is_ok = true; + _current_note_label = attributes.value("label"); + _current_note_type = attributes.value("type"); + _is_protein_description = false; + if(attributes.value("label") == "description") + { + if(_tag_stack[_tag_stack.size() - 2] == "protein") + { + _is_protein_description = true; } } - return is_ok; + return is_ok; } -bool XtandemSaxHandler::startElement_file(QXmlAttributes attributes) { - bool is_ok = true; - //<file type="peptide" URL="/gorgone/pappso/formation/TD/Database/Genome_Z_mays_5a.fasta"/> - if (attributes.value("type") == "peptide") { - //prot_.setDatabase(identification_.getDatabaseSet().getInstance( - // attrs.getValue("URL"))); - if (_p_protein_match == nullptr) { - throw pappso::PappsoException("ERROR in XtandemSaxHandler::startElement_file : _p_protein_match == nullptr"); +bool +XtandemSaxHandler::startElement_file(QXmlAttributes attributes) +{ + bool is_ok = true; + //<file type="peptide" + //URL="/gorgone/pappso/formation/TD/Database/Genome_Z_mays_5a.fasta"/> + if(attributes.value("type") == "peptide") + { + // prot_.setDatabase(identification_.getDatabaseSet().getInstance( + // attrs.getValue("URL"))); + if(_p_protein_match == nullptr) + { + throw pappso::PappsoException("ERROR in " + "XtandemSaxHandler::startElement_file " + ": _p_protein_match == nullptr"); } - _p_protein_match->getProteinXtpSp().get()->setFastaFileP(_p_project->getFastaFileStore().getInstance(FastaFile(attributes.value("URL"))).get()); + _p_protein_match->getProteinXtpSp().get()->setFastaFileP( + _p_project->getFastaFileStore() + .getInstance(FastaFile(attributes.value("URL"))) + .get()); } - return is_ok; + return is_ok; } -bool XtandemSaxHandler::startElement_domain(QXmlAttributes attributes) { - //mh="1120.529471" -//<domain id="1976.1.1" start="620" end="629" expect="9.7e-04" mh="1120.5307" delta="-0.0012" hyperscore="29.9" - //nextscore="10.2" y_score="10.4" y_ions="7" b_score="11.2" b_ions="3" pre="QLYR" post="RYGV" - //seq="AQEEMAQVAK" missed_cleavages="0"> - //qDebug() << "startElement_domain "; - bool is_ok = true; - _current_text = _current_text.simplified().replace(" ", ""); - if (!_current_text.isEmpty()) { - //._sequence.replace(QRegExp("\\*"), "")).removeTranslationStop() - //_p_protein_match->getProteinXtpSp().get()->setSequence(_current_text.replace(QRegExp("\\*"), "")); - if (_p_protein_match == nullptr) { - throw pappso::PappsoException("ERROR in XtandemSaxHandler::startElement_domain : _p_protein_match == nullptr"); +bool +XtandemSaxHandler::startElement_domain(QXmlAttributes attributes) +{ + // mh="1120.529471" + //<domain id="1976.1.1" start="620" end="629" expect="9.7e-04" mh="1120.5307" + //delta="-0.0012" hyperscore="29.9" nextscore="10.2" y_score="10.4" y_ions="7" + // b_score="11.2" b_ions="3" pre="QLYR" post="RYGV" seq="AQEEMAQVAK" + // missed_cleavages="0"> qDebug() << "startElement_domain "; + bool is_ok = true; + _current_text = _current_text.simplified().replace(" ", ""); + if(!_current_text.isEmpty()) + { + //._sequence.replace(QRegExp("\\*"), "")).removeTranslationStop() + //_p_protein_match->getProteinXtpSp().get()->setSequence(_current_text.replace(QRegExp("\\*"), + //"")); + if(_p_protein_match == nullptr) + { + throw pappso::PappsoException("ERROR in " + "XtandemSaxHandler::startElement_" + "domain : _p_protein_match == nullptr"); } - _p_protein_match->getProteinXtpSp().get()->setSequence(_current_text); + _p_protein_match->getProteinXtpSp().get()->setSequence(_current_text); } - // <domain id="4017.1.1" start="21" end="31" expect="2.0e-06" - // mh="1263.575" - // delta="0.998" hyperscore="32.9" nextscore="12.2" y_score="10.7" - // y_ions="9" b_score="0.0" b_ions="0" - // pre="VLGR" post="VEFM" seq="TGSQGQCTQVR" missed_cleavages="10"> - /* - * id - – the identifier for t - his particular identified dom - ain (s - pectrum - #).(i - d - #).(dom - ain#) - start - – the first residue - of t - he dom - ain - end - – the last residue - of t - he dom - ain - expect - – the expe - ctation va - lue for t - he peptide identification - mh - – the calculated pe - ptide mass + a prot - on - delta - – the spectrum - mh m - inus - the calculated m - h - hyperscore - – T - ande - m’s score for t - he identification - peak_count - – the num - ber of pe - aks that matched be - tween the theoretical - and t - he test mass spectrum - pre - – the four re - sidue - s pre - ceding t - he dom - ain - post - – the four re - sidue - s fol - lowing t - he dom - ain - seq - – the seque - nce of t - he dom - ain - missed_cleavages - – the num - ber of pot - ential cleavage sites in this - peptide seque - nce*/ - - // valeur généric du scan - _current_peptide_sp = PeptideXtp(attributes.value("seq").simplified()).makePeptideXtpSp(); - _p_peptide_evidence = new PeptideEvidence(_sp_msrun.get(), attributes.value("id").simplified().section(".",0,0).toUInt()); - - _p_peptide_evidence->setRetentionTime(_retention_time); - _p_peptide_evidence->setEvalue(attributes.value("expect").simplified().toDouble()); - //qDebug() << "XtandemSaxHandler::startElement_domain evalue " << _p_peptide_match->getEvalue() << " scan " << _p_peptide_match->getScan(); - - pappso::pappso_double xtandem_mhtheoretical = attributes.value("mh").simplified().toDouble(); - pappso::pappso_double xtandem_delta = attributes.value("delta").simplified().toDouble(); - - //delta – the spectrum mh minus the calculated mh - - //exp mass computed from X!Tandem mh : - pappso::pappso_double exp_mass = _mhplus_obser - pappso::MHPLUS; - - - _p_peptide_evidence->setExperimentalMass(exp_mass); - _current_peptide_match.setStart(attributes.value("start").simplified().toUInt()-1); - _p_peptide_evidence->setCharge(_charge); - - _p_peptide_evidence->setParam(PeptideEvidenceParam::tandem_hyperscore, QVariant( attributes.value("hyperscore").toDouble())); - - _p_peptide_evidence->setIdentificationDataSource( _p_identification_data_source); - _p_peptide_evidence->setChecked(true); - - - - // missing informations - //peptide.set_hypercorr(Float.valueOf(attrs.getValue("hyperscore"))); - //peptide.set_pre(attrs.getValue("pre")); - //peptide.set_post(attrs.getValue("post")); - //qDebug() << "startElement_domain end" ; - return is_ok; + // <domain id="4017.1.1" start="21" end="31" expect="2.0e-06" + // mh="1263.575" + // delta="0.998" hyperscore="32.9" nextscore="12.2" y_score="10.7" + // y_ions="9" b_score="0.0" b_ions="0" + // pre="VLGR" post="VEFM" seq="TGSQGQCTQVR" missed_cleavages="10"> + /* + * id + – the identifier for t + his particular identified dom + ain (s + pectrum + #).(i + d + #).(dom + ain#) + start + – the first residue + of t + he dom + ain + end + – the last residue + of t + he dom + ain + expect + – the expe + ctation va + lue for t + he peptide identification + mh + – the calculated pe + ptide mass + a prot + on + delta + – the spectrum + mh m + inus + the calculated m + h + hyperscore + – T + ande + m’s score for t + he identification + peak_count + – the num + ber of pe + aks that matched be + tween the theoretical + and t + he test mass spectrum + pre + – the four re + sidue + s pre + ceding t + he dom + ain + post + – the four re + sidue + s fol + lowing t + he dom + ain + seq + – the seque + nce of t + he dom + ain + missed_cleavages + – the num + ber of pot + ential cleavage sites in this + peptide seque + nce*/ + + // valeur généric du scan + _current_peptide_sp = + PeptideXtp(attributes.value("seq").simplified()).makePeptideXtpSp(); + _p_peptide_evidence = new PeptideEvidence( + _sp_msrun.get(), + attributes.value("id").simplified().section(".", 0, 0).toUInt()); + + _p_peptide_evidence->setRetentionTime(_retention_time); + _p_peptide_evidence->setEvalue( + attributes.value("expect").simplified().toDouble()); + // qDebug() << "XtandemSaxHandler::startElement_domain evalue " << + // _p_peptide_match->getEvalue() << " scan " << _p_peptide_match->getScan(); + + pappso::pappso_double xtandem_mhtheoretical = + attributes.value("mh").simplified().toDouble(); + pappso::pappso_double xtandem_delta = + attributes.value("delta").simplified().toDouble(); + + // delta – the spectrum mh minus the calculated mh + + // exp mass computed from X!Tandem mh : + pappso::pappso_double exp_mass = _mhplus_obser - pappso::MHPLUS; + + + _p_peptide_evidence->setExperimentalMass(exp_mass); + _current_peptide_match.setStart( + attributes.value("start").simplified().toUInt() - 1); + _p_peptide_evidence->setCharge(_charge); + + _p_peptide_evidence->setParam( + PeptideEvidenceParam::tandem_hyperscore, + QVariant(attributes.value("hyperscore").toDouble())); + + _p_peptide_evidence->setIdentificationDataSource( + _p_identification_data_source); + _p_peptide_evidence->setChecked(true); + + + // missing informations + // peptide.set_hypercorr(Float.valueOf(attrs.getValue("hyperscore"))); + // peptide.set_pre(attrs.getValue("pre")); + // peptide.set_post(attrs.getValue("post")); + // qDebug() << "startElement_domain end" ; + return is_ok; } -bool XtandemSaxHandler::startElement_aa(QXmlAttributes attributes) { -//<aa type="M" at="624" modified="15.99491" /> - bool is_ok = true; - //qDebug() << "startElement_aa "; - pappso::AaModificationP modif = Utils::guessAaModificationPbyMonoisotopicMassDelta(attributes.value("modified").simplified().toDouble()); - unsigned int position_in_prot = attributes.value("at").simplified().toUInt()-1; - _current_peptide_sp.get()->addAaModification(modif, position_in_prot-_current_peptide_match.getStart()); - //qDebug() << "startElement_aa end" ; - return is_ok; +bool +XtandemSaxHandler::startElement_aa(QXmlAttributes attributes) +{ + //<aa type="M" at="624" modified="15.99491" /> + bool is_ok = true; + // qDebug() << "startElement_aa "; + pappso::AaModificationP modif = + Utils::guessAaModificationPbyMonoisotopicMassDelta( + attributes.value("modified").simplified().toDouble()); + unsigned int position_in_prot = + attributes.value("at").simplified().toUInt() - 1; + _current_peptide_sp.get()->addAaModification( + modif, position_in_prot - _current_peptide_match.getStart()); + // qDebug() << "startElement_aa end" ; + return is_ok; } -bool XtandemSaxHandler::endElement_domain() { - bool is_ok = true; - _current_peptide_sp = _p_project->getPeptideStore().getInstance(_current_peptide_sp); - - _p_peptide_evidence->setPeptideXtpSp(_current_peptide_sp); - - _current_peptide_match.setPeptideEvidenceSp(_p_peptide_evidence->getIdentificationDataSource()->getPeptideEvidenceStore().getInstance(_p_peptide_evidence)); - if (_p_protein_match == nullptr) { - throw pappso::PappsoException("ERROR in XtandemSaxHandler::endElement_domain : _p_protein_match == nullptr"); +bool +XtandemSaxHandler::endElement_domain() +{ + bool is_ok = true; + _current_peptide_sp = + _p_project->getPeptideStore().getInstance(_current_peptide_sp); + + _p_peptide_evidence->setPeptideXtpSp(_current_peptide_sp); + + _current_peptide_match.setPeptideEvidenceSp( + _p_peptide_evidence->getIdentificationDataSource() + ->getPeptideEvidenceStore() + .getInstance(_p_peptide_evidence)); + if(_p_protein_match == nullptr) + { + throw pappso::PappsoException("ERROR in " + "XtandemSaxHandler::endElement_domain : " + "_p_protein_match == nullptr"); } - _p_protein_match->addPeptideMatch(_current_peptide_match); - - delete _p_peptide_evidence; - return is_ok; -} + _p_protein_match->addPeptideMatch(_current_peptide_match); + delete _p_peptide_evidence; + return is_ok; +} -bool XtandemSaxHandler::startElement_protein(QXmlAttributes attributes) { -//<protein expect="-704.6" id="1976.1" uid="195701" label="GRMZM2G083841_P01 P04711 Phosphoenolpyruvate carboxylase 1 (PEPCase 1)(PEPC 1)(EC..." sumI="9.36" > - bool is_ok = true; - //the protein label could be truncated => we must wait for the <note label="description">sp|P11413|G6PD_HUMAN Glucose-6-phosphate 1-dehydrogenase OS=Homo sapiens GN=G6PD PE=1 SV=4</note> - //to get the real protein - _p_protein_match = nullptr; - //qDebug() << "startElement_protein end" ; - return is_ok; +bool +XtandemSaxHandler::startElement_protein(QXmlAttributes attributes) +{ + //<protein expect="-704.6" id="1976.1" uid="195701" label="GRMZM2G083841_P01 + //P04711 Phosphoenolpyruvate carboxylase 1 (PEPCase 1)(PEPC 1)(EC..." + //sumI="9.36" > + bool is_ok = true; + + // the protein label could be truncated => we must wait for the <note + // label="description">sp|P11413|G6PD_HUMAN Glucose-6-phosphate 1-dehydrogenase + // OS=Homo sapiens GN=G6PD PE=1 SV=4</note> to get the real protein + _p_protein_match = nullptr; + // qDebug() << "startElement_protein end" ; + return is_ok; } -bool XtandemSaxHandler::endElement_note() { -//<note label="description">GRMZM2G083841_P01 P04711 Phosphoenolpyruvate carboxylase 1 (PEPCase 1)(PEPC 1)(EC //4.1.1.31) seq=translation; coord=9:61296279..61301686:1; parent_transcript=GRMZM2G083841_T01; - ////parent_gene=GRMZM2G083841</note> - bool is_ok = true; - if (_is_protein_description) { - //_p_protein_match->getProteinXtpSp().get()->setDescription(_current_text.section(" ",1)); - _current_protein.setCompleteDescription(_current_text); - if (!_current_protein.getAccession().endsWith(":reversed") && _current_protein.getDescription().endsWith(":reversed")) { - //to fit most cases, just check that the :reversed chars added by X!Tandem are not in the description. if so, then add it too in the accession - _current_protein.setAccession(QString("%1%2").arg(_current_protein.getAccession()).arg(":reversed")); +bool +XtandemSaxHandler::endElement_note() +{ + //<note label="description">GRMZM2G083841_P01 P04711 Phosphoenolpyruvate + //carboxylase 1 (PEPCase 1)(PEPC 1)(EC //4.1.1.31) seq=translation; + //coord=9:61296279..61301686:1; parent_transcript=GRMZM2G083841_T01; + ////parent_gene=GRMZM2G083841</note> + bool is_ok = true; + if(_is_protein_description) + { + //_p_protein_match->getProteinXtpSp().get()->setDescription(_current_text.section(" + //",1)); + _current_protein.setCompleteDescription(_current_text); + if(!_current_protein.getAccession().endsWith(":reversed") && + _current_protein.getDescription().endsWith(":reversed")) + { + // to fit most cases, just check that the :reversed chars added by + // X!Tandem are not in the description. if so, then add it too in the + // accession + _current_protein.setAccession(QString("%1%2") + .arg(_current_protein.getAccession()) + .arg(":reversed")); } - //for older versions < 2013.09.01.1 - if (!_current_protein.getAccession().endsWith("|reversed") && _current_protein.getDescription().endsWith("|reversed")) { - //to fit most cases, just check that the :reversed chars added by X!Tandem are not in the description. if so, then add it too in the accession - _current_protein.setAccession(QString("%1%2").arg(_current_protein.getAccession()).arg("|reversed")); + // for older versions < 2013.09.01.1 + if(!_current_protein.getAccession().endsWith("|reversed") && + _current_protein.getDescription().endsWith("|reversed")) + { + // to fit most cases, just check that the :reversed chars added by + // X!Tandem are not in the description. if so, then add it too in the + // accession + _current_protein.setAccession(QString("%1%2") + .arg(_current_protein.getAccession()) + .arg("|reversed")); } - //qDebug() << "startElement_protein accession" << accession; - _p_protein_match = _p_identification_group->getProteinMatchInstance(_current_protein.getAccession()); - - _p_protein_match->setChecked(false); - //qDebug() << "startElement_protein p_protein_match 3 " << _p_protein_match; - ProteinXtpSp sp_xtp_protein = _current_protein.makeProteinXtpSp(); - _p_protein_match->setProteinXtpSp(_p_project->getProteinStore().getInstance(sp_xtp_protein)); - _p_protein_match->setChecked(true); - + // qDebug() << "startElement_protein accession" << accession; + _p_protein_match = _p_identification_group->getProteinMatchInstance( + _current_protein.getAccession()); + + _p_protein_match->setChecked(false); + // qDebug() << "startElement_protein p_protein_match 3 " << + // _p_protein_match; + ProteinXtpSp sp_xtp_protein = _current_protein.makeProteinXtpSp(); + _p_protein_match->setProteinXtpSp( + _p_project->getProteinStore().getInstance(sp_xtp_protein)); + _p_protein_match->setChecked(true); } - else { + else + { -//<group label="input parameters" type="parameters"> - //<note type="input" label="list path, default parameters">/gorgone/pappso/tmp/temp_condor_job8533994640337729751189420695540169/QExactive_analysis_FDR_nosemi.xml</note> - if (_current_note_label == "list path, default parameters") { - _p_identification_data_source->setIdentificationEngineParam(IdentificationEngineParam::tandem_param,_current_text); + //<group label="input parameters" type="parameters"> + //<note type="input" label="list path, default + //parameters">/gorgone/pappso/tmp/temp_condor_job8533994640337729751189420695540169/QExactive_analysis_FDR_nosemi.xml</note> + if(_current_note_label == "list path, default parameters") + { + _p_identification_data_source->setIdentificationEngineParam( + IdentificationEngineParam::tandem_param, _current_text); } - /* - <note type="input" label="list path, taxonomy information">/gorgone/pappso/tmp/temp_condor_job8533994640337729751189420695540169/database.xml</note> - <note type="input" label="output, histogram column width">30</note> - <note type="input" label="output, histograms">yes</note> - <note type="input" label="output, maximum valid expectation value">0.05</note> - <note type="input" label="output, maximum valid protein expectation value">0.05</note> - <note type="input" label="output, one sequence copy">yes</note> - <note type="input" label="output, parameters">yes</note> - <note type="input" label="output, path">/gorgone/pappso/formation/TD/xml_tandem/20120906_balliau_extract_1_A02_urzb-1.xml</note> - <note type="input" label="output, path hashing">no</note> - <note type="input" label="output, performance">yes</note> - <note type="input" label="output, proteins">yes</note> - <note type="input" label="output, results">valid</note> - <note type="input" label="output, sequences">yes</note> - <note type="input" label="output, sort results by">spectrum</note> - <note type="input" label="output, spectra">yes</note> - <note type="input" label="output, xsl path">tandem-style.xsl</note> - <note type="input" label="protein, C-terminal residue modification mass">0.0</note> - <note type="input" label="protein, N-terminal residue modification mass">0.0</note> - <note type="input" label="protein, cleavage C-terminal mass change">+17.00305</note> - <note type="input" label="protein, cleavage N-terminal mass change">+1.00794</note> - <note type="input" label="protein, cleavage semi">no</note> - <note type="input" label="protein, cleavage site">[RK]|{P}</note> - <note type="input" label="protein, modified residue mass file"></note> - <note type="input" label="protein, quick acetyl">yes</note> - <note type="input" label="protein, quick pyrolidone">yes</note> - <note type="input" label="protein, stP bias">yes</note> - <note type="input" label="protein, taxon">usedefined</note> - <note type="input" label="refine">yes</note> - <note type="input" label="refine, cleavage semi">no</note> - <note type="input" label="refine, maximum valid expectation value">0.01</note> - <note type="input" label="refine, modification mass">57.02146@C</note> - <note type="input" label="refine, modification mass 1"></note> - <note type="input" label="refine, point mutations">no</note> - <note type="input" label="refine, potential C-terminus modifications"></note> - <note type="input" label="refine, potential N-terminus modifications">+42.01056@[</note> - <note type="input" label="refine, potential modification mass">15.99491@M</note> - <note type="input" label="refine, potential modification mass 1"></note> - <note type="input" label="refine, potential modification motif"></note> - <note type="input" label="refine, potential modification motif 1"></note> - <note type="input" label="refine, spectrum synthesis">yes</note> - <note type="input" label="refine, unanticipated cleavage">no</note> - <note type="input" label="refine, use potential modifications for full refinement">yes</note> - <note type="input" label="residue, modification mass">57.02146@C</note> - <note type="input" label="residue, modification mass 1"></note> - <note type="input" label="residue, potential modification mass">15.99491@M</note> - <note type="input" label="residue, potential modification motif"></note> - <note type="input" label="scoring, a ions">no</note> - <note type="input" label="scoring, b ions">yes</note> - <note type="input" label="scoring, c ions">no</note> - <note type="input" label="scoring, cyclic permutation">yes</note> - <note type="input" label="scoring, include reverse">yes</note> - <note type="input" label="scoring, maximum missed cleavage sites">1</note> - <note type="input" label="scoring, minimum ion count">4</note> - <note type="input" label="scoring, x ions">no</note> - <note type="input" label="scoring, y ions">yes</note> - <note type="input" label="scoring, z ions">no</note> - <note type="input" label="spectrum, dynamic range">100.0</note> - <note type="input" label="spectrum, fragment mass type">monoisotopic</note> - <note type="input" label="spectrum, fragment monoisotopic mass error">0.02</note> - <note type="input" label="spectrum, fragment monoisotopic mass error units">Daltons</note> - <note type="input" label="spectrum, maximum parent charge">4</note> - <note type="input" label="spectrum, minimum fragment mz">150.0</note> - <note type="input" label="spectrum, minimum parent m+h">500.0</note> - <note type="input" label="spectrum, minimum peaks">15</note> - <note type="input" label="spectrum, neutral loss mass">18.01057</note> - <note type="input" label="spectrum, neutral loss window">0.02</note> - <note type="input" label="spectrum, parent monoisotopic mass error minus">10</note> - <note type="input" label="spectrum, parent monoisotopic mass error plus">10</note> - <note type="input" label="spectrum, parent monoisotopic mass error units">ppm</note> - <note type="input" label="spectrum, parent monoisotopic mass isotope error">yes</note> - */ - //<note type="input" label="spectrum, path">/gorgone/pappso/formation/TD/mzXML/20120906_balliau_extract_1_A02_urzb-1.mzXML</note> - - if (_current_note_label == "spectrum, path") { - _sp_msrun.get()->setFilename(_current_text); + /* + <note type="input" label="list path, taxonomy + information">/gorgone/pappso/tmp/temp_condor_job8533994640337729751189420695540169/database.xml</note> + <note type="input" label="output, histogram column width">30</note> + <note type="input" label="output, histograms">yes</note> + <note type="input" label="output, maximum valid expectation + value">0.05</note> <note type="input" label="output, maximum valid protein + expectation value">0.05</note> <note type="input" label="output, one + sequence copy">yes</note> <note type="input" label="output, + parameters">yes</note> <note type="input" label="output, + path">/gorgone/pappso/formation/TD/xml_tandem/20120906_balliau_extract_1_A02_urzb-1.xml</note> + <note type="input" label="output, path hashing">no</note> + <note type="input" label="output, performance">yes</note> + <note type="input" label="output, proteins">yes</note> + <note type="input" label="output, results">valid</note> + <note type="input" label="output, sequences">yes</note> + <note type="input" label="output, sort results by">spectrum</note> + <note type="input" label="output, spectra">yes</note> + <note type="input" label="output, xsl path">tandem-style.xsl</note> + <note type="input" label="protein, C-terminal residue modification + mass">0.0</note> <note type="input" label="protein, N-terminal residue + modification mass">0.0</note> <note type="input" label="protein, cleavage + C-terminal mass change">+17.00305</note> <note type="input" + label="protein, cleavage N-terminal mass change">+1.00794</note> <note + type="input" label="protein, cleavage semi">no</note> <note type="input" + label="protein, cleavage site">[RK]|{P}</note> <note type="input" + label="protein, modified residue mass file"></note> <note type="input" + label="protein, quick acetyl">yes</note> <note type="input" + label="protein, quick pyrolidone">yes</note> <note type="input" + label="protein, stP bias">yes</note> <note type="input" label="protein, + taxon">usedefined</note> <note type="input" label="refine">yes</note> + <note type="input" label="refine, cleavage semi">no</note> + <note type="input" label="refine, maximum valid expectation + value">0.01</note> <note type="input" label="refine, modification + mass">57.02146@C</note> <note type="input" label="refine, modification + mass 1"></note> <note type="input" label="refine, point + mutations">no</note> <note type="input" label="refine, potential + C-terminus modifications"></note> <note type="input" label="refine, + potential N-terminus modifications">+42.01056@[</note> <note type="input" + label="refine, potential modification mass">15.99491@M</note> <note + type="input" label="refine, potential modification mass 1"></note> <note + type="input" label="refine, potential modification motif"></note> <note + type="input" label="refine, potential modification motif 1"></note> <note + type="input" label="refine, spectrum synthesis">yes</note> <note + type="input" label="refine, unanticipated cleavage">no</note> <note + type="input" label="refine, use potential modifications for full + refinement">yes</note> <note type="input" label="residue, modification + mass">57.02146@C</note> <note type="input" label="residue, modification + mass 1"></note> <note type="input" label="residue, potential modification + mass">15.99491@M</note> <note type="input" label="residue, potential + modification motif"></note> <note type="input" label="scoring, a + ions">no</note> <note type="input" label="scoring, b ions">yes</note> + <note type="input" label="scoring, c ions">no</note> + <note type="input" label="scoring, cyclic permutation">yes</note> + <note type="input" label="scoring, include reverse">yes</note> + <note type="input" label="scoring, maximum missed cleavage sites">1</note> + <note type="input" label="scoring, minimum ion count">4</note> + <note type="input" label="scoring, x ions">no</note> + <note type="input" label="scoring, y ions">yes</note> + <note type="input" label="scoring, z ions">no</note> + <note type="input" label="spectrum, dynamic range">100.0</note> + <note type="input" label="spectrum, fragment mass + type">monoisotopic</note> <note type="input" label="spectrum, fragment + monoisotopic mass error">0.02</note> <note type="input" label="spectrum, + fragment monoisotopic mass error units">Daltons</note> <note type="input" + label="spectrum, maximum parent charge">4</note> <note type="input" + label="spectrum, minimum fragment mz">150.0</note> <note type="input" + label="spectrum, minimum parent m+h">500.0</note> <note type="input" + label="spectrum, minimum peaks">15</note> <note type="input" + label="spectrum, neutral loss mass">18.01057</note> <note type="input" + label="spectrum, neutral loss window">0.02</note> <note type="input" + label="spectrum, parent monoisotopic mass error minus">10</note> <note + type="input" label="spectrum, parent monoisotopic mass error + plus">10</note> <note type="input" label="spectrum, parent monoisotopic + mass error units">ppm</note> <note type="input" label="spectrum, parent + monoisotopic mass isotope error">yes</note> + */ + //<note type="input" label="spectrum, + //path">/gorgone/pappso/formation/TD/mzXML/20120906_balliau_extract_1_A02_urzb-1.mzXML</note> + + if(_current_note_label == "spectrum, path") + { + _sp_msrun.get()->setFilename(_current_text); } - /* - <note type="input" label="spectrum, sequence batch size">1000</note> - <note type="input" label="spectrum, threads">1</note> - <note type="input" label="spectrum, total peaks">100</note> - <note type="input" label="spectrum, use contrast angle">no</note> - <note type="input" label="spectrum, use neutral loss window">yes</note> - <note type="input" label="spectrum, use noise suppression">yes</note> - </group> - - */ - -//<group label="unused input parameters" type="parameters"> - - /* - <note type="input" label="protein, use minimal annotations">yes</note> - <note type="input" label="refine, modification mass 2"></note> - <note type="input" label="refine, potential modification mass 2"></note> - <note type="input" label="refine, potential modification motif 2"></note> - <note type="input" label="residue, modification mass 2"></note> - <note type="input" label="residue, potential modification mass 1"></note> - <note type="input" label="residue, potential modification mass 2"></note> - <note type="input" label="residue, potential modification motif 1"></note> - <note type="input" label="residue, potential modification motif 2"></note> - <note type="input" label="scoring, pluggable scoring">no</note> - </group> - */ - -//<group label="performance parameters" type="parameters"> - - //<note label="list path, sequence source #1">/gorgone/pappso/formation/TD/Database/Genome_Z_mays_5a.fasta</note> - //<note label="list path, sequence source #2">/gorgone/pappso/formation/TD/Database/contaminants_standarts.fasta</note> - if (_current_note_label.startsWith("list path, sequence source #")) { - _p_identification_data_source->addFastaFile(_p_project->getFastaFileStore().getInstance( FastaFile(_current_text))); + /* + <note type="input" label="spectrum, sequence batch size">1000</note> + <note type="input" label="spectrum, threads">1</note> + <note type="input" label="spectrum, total peaks">100</note> + <note type="input" label="spectrum, use contrast angle">no</note> + <note type="input" label="spectrum, use neutral loss window">yes</note> + <note type="input" label="spectrum, use noise suppression">yes</note> + </group> + + */ + + //<group label="unused input parameters" type="parameters"> + + /* + <note type="input" label="protein, use minimal annotations">yes</note> + <note type="input" label="refine, modification mass 2"></note> + <note type="input" label="refine, potential modification mass 2"></note> + <note type="input" label="refine, potential modification motif + 2"></note> <note type="input" label="residue, modification mass 2"></note> + <note type="input" label="residue, potential modification mass + 1"></note> <note type="input" label="residue, potential modification mass + 2"></note> <note type="input" label="residue, potential modification motif + 1"></note> <note type="input" label="residue, potential modification motif + 2"></note> <note type="input" label="scoring, pluggable scoring">no</note> + </group> + */ + + //<group label="performance parameters" type="parameters"> + + //<note label="list path, sequence source + //#1">/gorgone/pappso/formation/TD/Database/Genome_Z_mays_5a.fasta</note> + //<note label="list path, sequence source + //#2">/gorgone/pappso/formation/TD/Database/contaminants_standarts.fasta</note> + if(_current_note_label.startsWith("list path, sequence source #")) + { + _p_identification_data_source->addFastaFile( + _p_project->getFastaFileStore().getInstance( + FastaFile(_current_text))); } - /* - <note label="list path, sequence source description #1">no description</note> - <note label="list path, sequence source description #2">no description</note> - <note label="modelling, duplicate peptide ids">6019</note> - <note label="modelling, duplicate proteins">19735</note> - <note label="modelling, estimated false positives">18</note> - <note label="modelling, reversed sequence false positives">20</note> - <note label="modelling, spectrum noise suppression ratio">0.00</note> - */ - //<note label="modelling, total peptides used">96618641</note> - if (_current_note_label == "modelling, total peptides used") { - _p_identification_data_source->setIdentificationEngineStatistics(IdentificationEngineStatistics::total_peptide_used, _current_text.toUInt()); + /* + <note label="list path, sequence source description #1">no + description</note> <note label="list path, sequence source description + #2">no description</note> <note label="modelling, duplicate peptide + ids">6019</note> <note label="modelling, duplicate proteins">19735</note> + <note label="modelling, estimated false positives">18</note> + <note label="modelling, reversed sequence false positives">20</note> + <note label="modelling, spectrum noise suppression ratio">0.00</note> + */ + //<note label="modelling, total peptides used">96618641</note> + if(_current_note_label == "modelling, total peptides used") + { + _p_identification_data_source->setIdentificationEngineStatistics( + IdentificationEngineStatistics::total_peptide_used, + _current_text.toUInt()); } - //<note label="modelling, total proteins used">273656</note> - if (_current_note_label == "modelling, total proteins used") { - _p_identification_data_source->setIdentificationEngineStatistics(IdentificationEngineStatistics::total_proteins_used, _current_text.toUInt()); + //<note label="modelling, total proteins used">273656</note> + if(_current_note_label == "modelling, total proteins used") + { + _p_identification_data_source->setIdentificationEngineStatistics( + IdentificationEngineStatistics::total_proteins_used, + _current_text.toUInt()); } - //<note label="modelling, total spectra assigned">7464</note> - if (_current_note_label == "modelling, total spectra assigned") { - _p_identification_data_source->setIdentificationEngineStatistics(IdentificationEngineStatistics::total_spectra_assigned, _current_text.toUInt()); + //<note label="modelling, total spectra assigned">7464</note> + if(_current_note_label == "modelling, total spectra assigned") + { + _p_identification_data_source->setIdentificationEngineStatistics( + IdentificationEngineStatistics::total_spectra_assigned, + _current_text.toUInt()); } - //<note label="modelling, total spectra used">12199</note> - if (_current_note_label == "modelling, total spectra used") { - _p_identification_data_source->setIdentificationEngineStatistics(IdentificationEngineStatistics::total_spectra_used, _current_text.toUInt()); + //<note label="modelling, total spectra used">12199</note> + if(_current_note_label == "modelling, total spectra used") + { + _p_identification_data_source->setIdentificationEngineStatistics( + IdentificationEngineStatistics::total_spectra_used, + _current_text.toUInt()); } - //<note label="modelling, total unique assigned">6260</note> - if (_current_note_label == "modelling, total unique assigned") { - _p_identification_data_source->setIdentificationEngineStatistics(IdentificationEngineStatistics::total_unique_assigned, _current_text.toUInt()); + //<note label="modelling, total unique assigned">6260</note> + if(_current_note_label == "modelling, total unique assigned") + { + _p_identification_data_source->setIdentificationEngineStatistics( + IdentificationEngineStatistics::total_unique_assigned, + _current_text.toUInt()); } - //<note label="process, start time">2013:12:20:16:47:19</note> + //<note label="process, start time">2013:12:20:16:47:19</note> - //<note label="process, version">X! Tandem Sledgehammer (2013.09.01.1)</note> - if (_current_note_label == "process, version") { - QRegExp rx("\\((.*)\\)"); - if (rx.indexIn(_current_text, 0) != -1) { - _p_identification_data_source->setIdentificationEngineVersion(rx.cap(1)); + //<note label="process, version">X! Tandem Sledgehammer + //(2013.09.01.1)</note> + if(_current_note_label == "process, version") + { + QRegExp rx("\\((.*)\\)"); + if(rx.indexIn(_current_text, 0) != -1) + { + _p_identification_data_source->setIdentificationEngineVersion( + rx.cap(1)); } - qDebug() << "XtandemSaxHandler::endElement_note() " << _p_identification_data_source->getIdentificationEngineVersion(); + qDebug() + << "XtandemSaxHandler::endElement_note() " + << _p_identification_data_source->getIdentificationEngineVersion(); } - /* - <note label="quality values">243 476 437 382 384 417 399 416 346 387 390 382 321 355 311 283 253 272 251 228</note> - <note label="refining, # input models">4893</note> - <note label="refining, # input spectra">5520</note> - <note label="refining, # partial cleavage">326</note> - <note label="refining, # point mutations">0</note> - <note label="refining, # potential C-terminii">0</note> - <note label="refining, # potential N-terminii">392</note> - <note label="refining, # unanticipated cleavage">0</note> - <note label="timing, initial modelling total (sec)">170.96</note> - <note label="timing, initial modelling/spectrum (sec)">0.0140</note> - <note label="timing, load sequence models (sec)">0.33</note> - <note label="timing, refinement/spectrum (sec)">0.0141</note> - </group> - */ - + /* + <note label="quality values">243 476 437 382 384 417 399 416 346 387 390 + 382 321 355 311 283 253 272 251 228</note> <note label="refining, # input + models">4893</note> <note label="refining, # input spectra">5520</note> + <note label="refining, # partial cleavage">326</note> + <note label="refining, # point mutations">0</note> + <note label="refining, # potential C-terminii">0</note> + <note label="refining, # potential N-terminii">392</note> + <note label="refining, # unanticipated cleavage">0</note> + <note label="timing, initial modelling total (sec)">170.96</note> + <note label="timing, initial modelling/spectrum (sec)">0.0140</note> + <note label="timing, load sequence models (sec)">0.33</note> + <note label="timing, refinement/spectrum (sec)">0.0141</note> + </group> + */ } - _current_text = ""; - return is_ok; + _current_text = ""; + return is_ok; } +bool +XtandemSaxHandler::error(const QXmlParseException &exception) +{ + _errorStr = QObject::tr("Parse error at line %1, column %2 :\n" + "%3") + .arg(exception.lineNumber()) + .arg(exception.columnNumber()) + .arg(exception.message()); -bool XtandemSaxHandler::error(const QXmlParseException &exception) { - _errorStr = QObject::tr("Parse error at line %1, column %2 :\n" - "%3").arg(exception.lineNumber()).arg(exception.columnNumber()).arg( - exception.message()); - - return false; + return false; } -bool XtandemSaxHandler::fatalError(const QXmlParseException &exception) { - _errorStr = QObject::tr("Parse error at line %1, column %2 :\n" - "%3").arg(exception.lineNumber()).arg(exception.columnNumber()).arg( - exception.message()); - return false; +bool +XtandemSaxHandler::fatalError(const QXmlParseException &exception) +{ + _errorStr = QObject::tr("Parse error at line %1, column %2 :\n" + "%3") + .arg(exception.lineNumber()) + .arg(exception.columnNumber()) + .arg(exception.message()); + return false; } -QString XtandemSaxHandler::errorString() const { - return _errorStr; +QString +XtandemSaxHandler::errorString() const +{ + return _errorStr; } -bool XtandemSaxHandler::endDocument() { - return true; +bool +XtandemSaxHandler::endDocument() +{ + return true; } -bool XtandemSaxHandler::startDocument() { - return true; +bool +XtandemSaxHandler::startDocument() +{ + return true; } -bool XtandemSaxHandler::characters(const QString &str) { - _current_text += str; - return true; +bool +XtandemSaxHandler::characters(const QString &str) +{ + _current_text += str; + return true; } - diff --git a/src/utils/identificationdatasourcestore.cpp b/src/utils/identificationdatasourcestore.cpp index e4e16ba50..e15a027cc 100644 --- a/src/utils/identificationdatasourcestore.cpp +++ b/src/utils/identificationdatasourcestore.cpp @@ -32,6 +32,7 @@ #include "../core/identification_sources/identificationxtandemfile.h" #include "../core/identification_sources/identificationmascotdatfile.h" #include "../core/identification_sources/identificationpwizfile.h" +#include "../core/identification_sources/identificationpepxmlfile.h" #include <pappsomspp/pappsoexception.h> #include <pappsomspp/utils.h> @@ -90,7 +91,7 @@ IdentificationDataSourceSp IdentificationDataSourceStore::getInstance(const QStr } else { QFileInfo location_file(location); - QString ext = location_file.suffix(); + QString ext = location_file.completeSuffix(); //QString sample_name = location_file.baseName(); IdentificationDataSourceSp p_identfile = nullptr; if (ext.toLower() == "xml") { @@ -98,7 +99,10 @@ IdentificationDataSourceSp IdentificationDataSourceStore::getInstance(const QStr p_identfile = std::make_shared<IdentificationXtandemFile>(location_file); } else if (ext.toLower() == "pep") { //pep xml file - p_identfile = std::make_shared<IdentificationPwizFile>(location_file); + p_identfile = std::make_shared<IdentificationPepXmlFile>(location_file); + } else if (ext.toLower() == "pep.xml") { + //pep xml file + p_identfile = std::make_shared<IdentificationPepXmlFile>(location_file); } else if (ext.toLower() == "dat") { //MASCOT dat file p_identfile = std::make_shared<IdentificationMascotDatFile>(location_file); diff --git a/src/utils/types.h b/src/utils/types.h index 0809820b3..1a193327c 100644 --- a/src/utils/types.h +++ b/src/utils/types.h @@ -1,25 +1,26 @@ /******************************************************************************* -* Copyright (c) 2017 Olivier Langella <olivier.langella@u-psud.fr>. -* -* This file is part of XTPcpp. -* -* XTPcpp is free software: you can redistribute it and/or modify -* it under the terms of the GNU General Public License as published by -* the Free Software Foundation, either version 3 of the License, or -* (at your option) any later version. -* -* XTPcpp is distributed in the hope that it will be useful, -* but WITHOUT ANY WARRANTY; without even the implied warranty of -* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -* GNU General Public License for more details. -* -* You should have received a copy of the GNU General Public License -* along with XTPcpp. If not, see <http://www.gnu.org/licenses/>. -* -* Contributors: -* Olivier Langella <olivier.langella@u-psud.fr> - initial API and implementation -******************************************************************************/ + * Copyright (c) 2017 Olivier Langella <olivier.langella@u-psud.fr>. + * + * This file is part of XTPcpp. + * + * XTPcpp is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * XTPcpp is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with XTPcpp. If not, see <http://www.gnu.org/licenses/>. + * + * Contributors: + * Olivier Langella <olivier.langella@u-psud.fr> - initial API and + *implementation + ******************************************************************************/ #ifndef _TYPES_H_ @@ -31,82 +32,104 @@ /** \def ExternalDatabase external database references * */ -enum class ExternalDatabase: std::int8_t { - OboPsiMod =1, ///< OBO PSI MOD - SwissProt =2, ///< Swiss-Prot - TrEMBL=3, ///< TrEMBL - AGI_LocusCode=4, ///< AGI_LocusCode - NCBI_gi=5, ///< NCBI_gi - ref=6 ///< ref +enum class ExternalDatabase : std::int8_t +{ + OboPsiMod = 1, ///< OBO PSI MOD + SwissProt = 2, ///< Swiss-Prot + TrEMBL = 3, ///< TrEMBL + AGI_LocusCode = 4, ///< AGI_LocusCode + NCBI_gi = 5, ///< NCBI_gi + ref = 6 ///< ref }; /** \def IdentificationEngine identification engine * */ -enum class IdentificationEngine: std::int8_t { - unknown=0, ///< X!Tandem - XTandem=1, ///< X!Tandem - mascot=2, ///< Mascot - peptider=3 ///< peptider +enum class IdentificationEngine : std::int8_t +{ + unknown = 0, ///< X!Tandem + XTandem = 1, ///< MS:1001476 X!Tandem was used to analyze the spectra. + mascot = 2, ///< MS:1001207 The name of the Mascot search engine. + peptider = 3, ///< peptider + OMSSA = 4, ///< MS:1001475 Open Mass Spectrometry Search Algorithm was used to + ///< analyze the spectra. + SEQUEST = 5, ///< MS:1001208 The name of the SEQUEST search engine. + Comet = 6, ///< MS:1002251 Comet open-source sequence search engine developed + ///< at the University of Washington. PMID:23148064 + Morpheus = 7, ///< MS:1002661 "Morpheus search engine." [PMID:23323968] + MSGFplus = + 8, ///< MS:1002048 "MS-GF+ software used to analyze the spectra." [PSI:PI] }; /** \def PeptideEvidenceParam peptide evidence specific parameters * is_a: MS:1001143 ! PSM-level search engine specific statistic */ -enum class PeptideEvidenceParam: std::int8_t { - tandem_hyperscore=0, ///< X!Tandem hyperscore MS:1001331 - tandem_expectation_value=1, ///< X!Tandem expectation value MS:1001330 - mascot_score=2, ///< PSI-MS MS:1001171 mascot:score 56.16 - mascot_expectation_value=3///< PSI-MS MS:1001172 mascot:expectation value 2.42102904673618e-006 +enum class PeptideEvidenceParam : std::int8_t +{ + tandem_hyperscore = 0, ///< X!Tandem hyperscore MS:1001331 + tandem_expectation_value = 1, ///< X!Tandem expectation value MS:1001330 + mascot_score = 2, ///< PSI-MS MS:1001171 mascot:score 56.16 + mascot_expectation_value = + 3, ///< PSI-MS MS:1001172 mascot:expectation value 2.42102904673618e-006 + peptide_prophet_probability = 4, ///< no PSI MS description + peptide_inter_prophet_probability = 5, ///< no PSI MS description }; /** \def IdentificationEngineParam identification engine parameters * */ -enum class IdentificationEngineParam: std::int8_t { - tandem_param=0 ///< X!Tandem xml parameters file +enum class IdentificationEngineParam : std::int8_t +{ + tandem_param = 0 ///< X!Tandem xml parameters file }; /** \def IdentificationEngineStatistics identification engine statistics * */ -enum class IdentificationEngineStatistics: std::int8_t { - total_spectra_assigned=1, ///< total_spectra_assigned in one identification file (one sample) - total_spectra_used=2,///< total_spectra_used in one identification file (one sample) - total_peptide_used=3,///< total number of peptides generated and used in identification - total_proteins_used=4,///< total number of proteins generated and used in identification - total_unique_assigned=5,///< total number unique peptide sequence assigned +enum class IdentificationEngineStatistics : std::int8_t +{ + total_spectra_assigned = + 1, ///< total_spectra_assigned in one identification file (one sample) + total_spectra_used = + 2, ///< total_spectra_used in one identification file (one sample) + total_peptide_used = + 3, ///< total number of peptides generated and used in identification + total_proteins_used = + 4, ///< total number of proteins generated and used in identification + total_unique_assigned = 5, ///< total number unique peptide sequence assigned }; /** \def MsRunStatistics MS run statistics * */ -enum class MsRunStatistics: std::int8_t { - total_spectra=1, ///< total number of spectra - total_spectra_ms1=2,///< total number of MS1 spectra - total_spectra_ms2=3,///< total number of MS2 spectra - total_spectra_ms3=4,///< total number of MS3 spectra +enum class MsRunStatistics : std::int8_t +{ + total_spectra = 1, ///< total number of spectra + total_spectra_ms1 = 2, ///< total number of MS1 spectra + total_spectra_ms2 = 3, ///< total number of MS2 spectra + total_spectra_ms3 = 4, ///< total number of MS3 spectra }; - /** \def ProjectMode separate each samples or combine all * */ -enum class ProjectMode { - individual, ///< separate each biological samples (2D spots for example) - combined ///< combine every MS runs to get only one protein list +enum class ProjectMode +{ + individual, ///< separate each biological samples (2D spots for example) + combined ///< combine every MS runs to get only one protein list }; /** \def GroupingType list of available grouping algoritms * */ -enum class GroupingType { - PeptideMass, ///< protein grouper algo - Phospho, ///< phospho peptides grouping - SampleScan ///< X!TandemPipeline algo +enum class GroupingType +{ + PeptideMass, ///< protein grouper algo + Phospho, ///< phospho peptides grouping + SampleScan ///< X!TandemPipeline algo }; @@ -114,11 +137,14 @@ enum class GroupingType { * */ -enum class ValidationState: std::int8_t { - notValid = 0,///< notValid : automatic filter validation failed - valid =1, ///< valid : automatic filter validation passed - validAndChecked=2, ///< validAndChecked : automatic filter validation passed + manual checking - grouped=3 ///< grouped : automatic filter validation passed + manual checking + grouped +enum class ValidationState : std::int8_t +{ + notValid = 0, ///< notValid : automatic filter validation failed + valid = 1, ///< valid : automatic filter validation passed + validAndChecked = 2, ///< validAndChecked : automatic filter validation passed + ///< + manual checking + grouped = 3 ///< grouped : automatic filter validation passed + manual + ///< checking + grouped }; -- GitLab