From 8341bea56a2a5bc263bcd5f98de0c850eed84783 Mon Sep 17 00:00:00 2001 From: Olivier Langella <Olivier.Langella@moulon.inra.fr> Date: Wed, 5 Apr 2017 22:26:52 +0200 Subject: [PATCH] WIP : xtandem sax parser --- src/CMakeLists.txt | 4 +- .../identificationxtandemfile.cpp | 32 ++- src/core/identificationgroup.cpp | 28 +++ src/core/identificationgroup.h | 3 + src/core/project.cpp | 5 +- src/core/project.h | 3 +- src/core/proteinmatch.cpp | 3 - src/core/proteinmatch.h | 10 +- src/core/proteinxtp.cpp | 8 +- src/core/proteinxtp.h | 2 + src/gui/mainwindow.cpp | 2 +- src/gui/peptide_list_view/peptide_view.ui | 13 +- .../peptide_list_view/peptidelistwindow.cpp | 3 +- .../automatic_filter_widget.ui | 4 +- src/gui/protein_view/protein_detail_view.ui | 12 +- src/gui/protein_view/proteinwindow.cpp | 4 +- src/input/xpipsaxhandler.cpp | 4 +- src/input/xtandemsaxhandler.cpp | 195 ++++++------------ src/input/xtandemsaxhandler.h | 33 ++- 19 files changed, 182 insertions(+), 186 deletions(-) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 42d4fb8f..9d83c283 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -29,8 +29,8 @@ SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${Qt5Xml_EXECUTABLE_COMPILE_FLAGS} ${Qt5 #sudo apt-get install libpappsomspp-dev #FIND_PACKAGE( Pappsomspp REQUIRED ) -# SET (PAPPSOMSPP_DIR "/home/olivier/eclipse/git/pappsomspp") - SET (PAPPSOMSPP_DIR "/home/langella/developpement/git/pappsomspp") + SET (PAPPSOMSPP_DIR "/home/olivier/eclipse/git/pappsomspp") +# SET (PAPPSOMSPP_DIR "/home/langella/developpement/git/pappsomspp") SET (PAPPSOMSPP_INCLUDE_DIR "${PAPPSOMSPP_DIR}/src") SET (PAPPSOMSPP_QT4_LIBRARY "${PAPPSOMSPP_DIR}/cbuild/src/libpappsomspp-qt4.so") diff --git a/src/core/identification_sources/identificationxtandemfile.cpp b/src/core/identification_sources/identificationxtandemfile.cpp index e5fd5085..1f21279e 100644 --- a/src/core/identification_sources/identificationxtandemfile.cpp +++ b/src/core/identification_sources/identificationxtandemfile.cpp @@ -50,16 +50,40 @@ pappso::SpectrumSp IdentificationXtandemFile::getSpectrumSp(unsigned int scan_nu void IdentificationXtandemFile::parseTo(Project* p_project) { - qDebug() << "Project::readXpipFile begin"; + qDebug() << "Project::readXpipFile begin"; - XtandemSaxHandler * parser = new XtandemSaxHandler(p_project); + qDebug() << "Read X!Tandem XML result file '" << _xtandem_file.absoluteFilePath() << "'"; + + MsRunSp msrun_sp = p_project->getMsRunStore().getInstance(QFileInfo(_xtandem_file).baseName()); + setMsRunSp(msrun_sp); + std::vector<IdentificationGroup *> identification_list = p_project->getIdentificationGroupList(); + IdentificationGroup * identification_group_p = nullptr; + if (p_project->isCombineMode()) { + if (identification_list.size() == 0) { + identification_group_p = p_project->newIdentificationGroup(); + } + else { + identification_group_p = identification_list[0]; + } + } + else { + for (IdentificationGroup * identification_p_flist : identification_list) { + if (identification_p_flist->contains(msrun_sp.get())) { + identification_group_p = identification_p_flist; + break; + } + } + if (identification_group_p == nullptr) { + identification_group_p = p_project->newIdentificationGroup(); + } + } + + XtandemSaxHandler * parser = new XtandemSaxHandler(p_project, identification_group_p); QXmlSimpleReader simplereader; simplereader.setContentHandler(parser); simplereader.setErrorHandler(parser); - qDebug() << "Read X!Tandem XML result file '" << _xtandem_file.absoluteFilePath() << "'"; - QFile qfile(_xtandem_file.absoluteFilePath()); QXmlInputSource xmlInputSource(&qfile); diff --git a/src/core/identificationgroup.cpp b/src/core/identificationgroup.cpp index 75e8d1c6..f85a149b 100644 --- a/src/core/identificationgroup.cpp +++ b/src/core/identificationgroup.cpp @@ -24,6 +24,7 @@ #include "identificationgroup.h" #include "project.h" #include "../utils/groupstore.h" +#include <pappsomspp/pappsoexception.h> IdentificationGroup::IdentificationGroup(Project * project) @@ -49,9 +50,36 @@ void IdentificationGroup::updateAutomaticFilters(const AutomaticFilterParameters } } +ProteinMatch * IdentificationGroup::getProteinMatch(const QString accession) { + if (accession.isEmpty()) { + throw pappso::PappsoException(QObject::tr("Error protein match not found : accession is empty")); + } + for (ProteinMatch * p_protein_match : _protein_match_list) { + if (p_protein_match->getProteinXtpSp().get()->getAccession() == accession) { + return p_protein_match; + } + } + return nullptr; +} void IdentificationGroup::addProteinMatch(ProteinMatch * protein_match) { + QString accession = protein_match->getProteinXtpSp().get()->getAccession(); + if (accession.isEmpty()) { + throw pappso::PappsoException(QObject::tr("Error adding protein match : accession is empty")); + } + for (ProteinMatch * p_protein_match : _protein_match_list) { + if (p_protein_match->getProteinXtpSp().get()->getAccession() == accession) { + throw pappso::PappsoException(QObject::tr("Error adding protein match : accession %1 already registered").arg(accession)); + } + } _protein_match_list.push_back(protein_match); } + +bool IdentificationGroup::contains (const MsRun * p_msrun) const { + for (const MsRunSp & msrun: _ms_run_list) { + if (msrun.get() == p_msrun) return true; + } + return false; +} void IdentificationGroup::addMsRunSp(MsRunSp ms_run_sp) { _ms_run_list.push_back(ms_run_sp); } diff --git a/src/core/identificationgroup.h b/src/core/identificationgroup.h index 587bde92..e7179375 100644 --- a/src/core/identificationgroup.h +++ b/src/core/identificationgroup.h @@ -43,6 +43,7 @@ public: IdentificationGroup(Project * project); ~IdentificationGroup(); + ProteinMatch * getProteinMatch(const QString accession); void addProteinMatch(ProteinMatch * protein_match); std::vector<ProteinMatch *> & getProteinMatchList(); void addMsRunSp(MsRunSp ms_run_sp); @@ -79,6 +80,8 @@ public: /** @brief get tab name for qtabwidget * */ const QString getTabName() const; + + bool contains (const MsRun * p_msrun) const; private : GroupingExperiment * _p_grp_experiment= nullptr; diff --git a/src/core/project.cpp b/src/core/project.cpp index 1ae87c29..210b0e11 100644 --- a/src/core/project.cpp +++ b/src/core/project.cpp @@ -46,7 +46,10 @@ void Project::readResultFile(QString filename) { ident_source.get()->parseTo(this); } -void Project::setCombine(bool is_combine_mode) { + bool Project::isCombineMode() const { + return _is_combine_mode; + } +void Project::setCombineMode(bool is_combine_mode) { _is_combine_mode = is_combine_mode; } std::vector<IdentificationGroup *> Project::getIdentificationGroupList() { diff --git a/src/core/project.h b/src/core/project.h index 1f9e7190..3d04785b 100644 --- a/src/core/project.h +++ b/src/core/project.h @@ -62,7 +62,8 @@ public: const GroupingType getGroupingType() const; std::vector<IdentificationGroup *> getIdentificationGroupList(); - void setCombine(bool is_combine_mode); + void setCombineMode(bool is_combine_mode); + bool isCombineMode() const; void readResultFile(QString filename); private : diff --git a/src/core/proteinmatch.cpp b/src/core/proteinmatch.cpp index 1b49536a..0df923cc 100644 --- a/src/core/proteinmatch.cpp +++ b/src/core/proteinmatch.cpp @@ -99,9 +99,6 @@ void ProteinMatch::updateAutomaticFilters(const AutomaticFilterParameters & auto const ProteinXtpSp & ProteinMatch::getProteinXtpSp() const { return _protein_sp; } -void ProteinMatch::setEvalue(pappso::pappso_double evalue) { - _evalue = evalue; -} void ProteinMatch::setProteinXtpSp(ProteinXtpSp protein_sp) { _protein_sp = protein_sp; diff --git a/src/core/proteinmatch.h b/src/core/proteinmatch.h index dfce9d76..3044d8d7 100644 --- a/src/core/proteinmatch.h +++ b/src/core/proteinmatch.h @@ -44,7 +44,6 @@ public: ~ProteinMatch(); const ProteinXtpSp & getProteinXtpSp() const; - void setEvalue(pappso::pappso_double evalue); /** @brief compute protein Evalue within samples * */ @@ -109,16 +108,15 @@ private : unsigned int countValidAndCheckedPeptideMassCharge(const MsRun * sp_msrun_id) const; private: - static QColor _color_peptide_background; - static QColor _color_highlighted_peptide_background; - - + static QColor _color_peptide_background; + static QColor _color_highlighted_peptide_background; + + pappso::GrpProteinSp _sp_grp_protein; GroupingGroupSp _sp_group; std::vector<PeptideMatch *> _peptide_match_list; ProteinXtpSp _protein_sp = nullptr; - pappso::pappso_double _evalue=0; /** @brief manually checked by user (true by default) */ bool _checked = true; diff --git a/src/core/proteinxtp.cpp b/src/core/proteinxtp.cpp index f08e97e2..6a3bcf80 100644 --- a/src/core/proteinxtp.cpp +++ b/src/core/proteinxtp.cpp @@ -50,7 +50,13 @@ ProteinXtpSp ProteinXtp::makeProteinXtpSp() const { return std::make_shared<ProteinXtp>(*this); } - +void ProteinXtp::setCompleteDescription(const QString & full_description) { + setAccession (full_description.simplified().section(" ", 0,0)); + setDescription (full_description.simplified().section(" ", 1)); + + + +} void ProteinXtp::setIsContaminant(bool conta) { _is_contaminant = conta; } diff --git a/src/core/proteinxtp.h b/src/core/proteinxtp.h index 0dfa8ff7..b9831814 100644 --- a/src/core/proteinxtp.h +++ b/src/core/proteinxtp.h @@ -44,6 +44,8 @@ public: ProteinXtpSp makeProteinXtpSp() const; + void setCompleteDescription(const QString & full_description); + void setIsContaminant(bool conta); void setIsDecoy(bool conta); bool isContaminant() const; diff --git a/src/gui/mainwindow.cpp b/src/gui/mainwindow.cpp index bf5b9901..609e6995 100644 --- a/src/gui/mainwindow.cpp +++ b/src/gui/mainwindow.cpp @@ -58,7 +58,7 @@ void XtpLoaderThread::doLoadingResults(bool is_individual, AutomaticFilterParame qDebug() << "XtpLoaderThread::doLoadingResults begin "; try { ProjectSp project_sp = Project().makeProjectSp(); - project_sp.get()->setCombine(!is_individual); + project_sp.get()->setCombineMode(!is_individual); for (QString filename : file_list) { project_sp.get()->readResultFile(filename); diff --git a/src/gui/peptide_list_view/peptide_view.ui b/src/gui/peptide_list_view/peptide_view.ui index 726daae1..139cdcd4 100644 --- a/src/gui/peptide_list_view/peptide_view.ui +++ b/src/gui/peptide_list_view/peptide_view.ui @@ -16,7 +16,14 @@ <widget class="QWidget" name="centralwidget"> <layout class="QVBoxLayout" name="verticalLayout"> <item> - <widget class="QLabel" name="proteinLabel"> + <widget class="QLabel" name="accession_label"> + <property name="text"> + <string>TextLabel</string> + </property> + </widget> + </item> + <item> + <widget class="QLabel" name="description_label"> <property name="text"> <string>TextLabel</string> </property> @@ -25,7 +32,6 @@ </property> </widget> </item> - <item> <layout class="QHBoxLayout" name="horizontalLayoutcb"> <item> @@ -69,14 +75,13 @@ <x>0</x> <y>0</y> <width>826</width> - <height>31</height> + <height>25</height> </rect> </property> </widget> <widget class="QStatusBar" name="statusbar"/> </widget> <resources/> - <connections> <connection> <sender>hideNotValidCheckBox</sender> diff --git a/src/gui/peptide_list_view/peptidelistwindow.cpp b/src/gui/peptide_list_view/peptidelistwindow.cpp index 9cda7c0f..ceff83e9 100644 --- a/src/gui/peptide_list_view/peptidelistwindow.cpp +++ b/src/gui/peptide_list_view/peptidelistwindow.cpp @@ -108,7 +108,8 @@ void PeptideListWindow::setProteinMatch(IdentificationGroup * p_identification_g _p_protein_match = p_protein_match; _peptide_table_model_p->setProteinMatch( p_protein_match); _p_proxy_model->setSourceModel(_peptide_table_model_p); - ui->proteinLabel->setText(p_protein_match->getProteinXtpSp().get()->getDescription()); + ui->description_label->setText(p_protein_match->getProteinXtpSp().get()->getDescription()); + ui->accession_label->setText(p_protein_match->getProteinXtpSp().get()->getAccession()); } } diff --git a/src/gui/project_view/automatic_filter_widget/automatic_filter_widget.ui b/src/gui/project_view/automatic_filter_widget/automatic_filter_widget.ui index 7c929c0d..e297d67e 100644 --- a/src/gui/project_view/automatic_filter_widget/automatic_filter_widget.ui +++ b/src/gui/project_view/automatic_filter_widget/automatic_filter_widget.ui @@ -50,13 +50,13 @@ <item row="0" column="1"> <widget class="QDoubleSpinBox" name="peptide_evalue_spinbox"> <property name="decimals"> - <number>3</number> + <number>6</number> </property> <property name="maximum"> <double>1.000000000000000</double> </property> <property name="singleStep"> - <double>0.010000000000000</double> + <double>0.001000000000000</double> </property> <property name="value"> <double>0.050000000000000</double> diff --git a/src/gui/protein_view/protein_detail_view.ui b/src/gui/protein_view/protein_detail_view.ui index 1695aee6..9529e1d9 100644 --- a/src/gui/protein_view/protein_detail_view.ui +++ b/src/gui/protein_view/protein_detail_view.ui @@ -20,7 +20,7 @@ <item> <layout class="QHBoxLayout" name="horizontalLayout"> <item> - <widget class="QLabel" name="descriptionLabel"> + <widget class="QLabel" name="accession_label"> <property name="sizePolicy"> <sizepolicy hsizetype="Preferred" vsizetype="Minimum"> <horstretch>0</horstretch> @@ -72,6 +72,16 @@ </item> </layout> </item> + <item> + <widget class="QLabel" name="description_label"> + <property name="text"> + <string>TextLabel</string> + </property> + <property name="wordWrap"> + <bool>true</bool> + </property> + </widget> + </item> <item> <widget class="QTextEdit" name="sequenceTextEdit"> <property name="lineWrapMode"> diff --git a/src/gui/protein_view/proteinwindow.cpp b/src/gui/protein_view/proteinwindow.cpp index b683f168..7f04c882 100644 --- a/src/gui/protein_view/proteinwindow.cpp +++ b/src/gui/protein_view/proteinwindow.cpp @@ -73,8 +73,8 @@ void ProteinWindow::updateDisplay() { try { ui->validCheckBox->setCheckState(Qt::Unchecked); if (_p_protein_match->isValid()) ui->validCheckBox->setCheckState(Qt::Checked); - ui->descriptionLabel->setText(_p_protein_match->getProteinXtpSp().get()->getAccession()); - //ui->sequenceLabel->setText(_p_protein_match->getProteinXtpSp().get()->getSequence()); + ui->accession_label->setText(_p_protein_match->getProteinXtpSp().get()->getAccession()); + ui->description_label->setText(_p_protein_match->getProteinXtpSp().get()->getDescription()); ui->sequenceTextEdit->setText(_p_protein_match->getHtmlSequence()); ui->coverage_label->setText(QString("%1 %").arg(_p_protein_match->getCoverage()*100)); pappso::Peptide peptide(_p_protein_match->getProteinXtpSp().get()->getSequence()); diff --git a/src/input/xpipsaxhandler.cpp b/src/input/xpipsaxhandler.cpp index fc405743..f494d5fd 100644 --- a/src/input/xpipsaxhandler.cpp +++ b/src/input/xpipsaxhandler.cpp @@ -215,9 +215,7 @@ bool XpipSaxHandler::startElement_protein(QXmlAttributes attributes) { <sequence>MASTKAPGPGEKHHSIDAQLRQLVPGKVSEDDKLIEYDALLVDRFLNILQDLHGPSLREFVQECYEVSADYEGKGDTTKLGELGAKLTGLAPADAILVASSILHMLNLANLAEEVQIAHRRRNSKLKKGGFADEGSATTESDIEETLKRLVSEVGKSPEEVFEALKNQTVDLVFTAHPTQSARRSLLQKNARIRNCLTQLNAKDITDDDKQELDEALQREIQAAFRTDEIRRAQPTPQDEMRYGMSYIHETVWKGVPKFLRRVDTALKNIGINERLPYNVSLIRFSSWMGGDRDGNPRVTPEVTRDVCLLARMMAANLYIDQIEELMFELSMWRCNDELRVRAEELHSSSGSKVTKYYIEFWKQIPPNEPYRVILGHVRDKLYNTRERARHLLASGVSEISAESSFTSIEEFLEPLELCYKSLCDCGDKAIADGSLLDLLRQVFTFGLSLVKLDIRQESERHTDVIDAITTHLGIGSYREWPEDKRQEWLLSELRGKRPLLPPDLPQTDEIADVIGAFHVLAELPPDSFGPYIISMATAPSDVLAVELLQRECGVRQPLPVVPLFERLADLQSAPASVERLFSVDWYMDRIKGKQQVMVGYSDSGKDAGRLSAAWQLYRAQEEMAQVAKRYGVKLTLFHGRGGTVGRGGGPTHLAILSQPPDTINGSIRVTVQGEVIEFCFGEEHLCFQTLQRFTAATLEHGMHPPVSPKPEWRKLMDEMAVVATEEYRSVVVKEARFVEYFRSATPETEYGRMNIGSRPAKRRPGGGITTLRAIPWIFSWTQTRFHLPVWLGVGAAFKFAIDKDVRNFQVLKEMYNEWPFFRVTLDLLEMVFAKGDPGIAGLYDELLVAEELKPFGKQLRDKYVETQQLLLQIAGHKDILEGDPFLKQGLVLRNPYITTLNVFQAYTLKRIRDPNFKVTPQPPLSKEFADENKPAGLVKLNPASEYPPGLEDTLILTMKGIAAGMQNTG</sequence> </protein> */ - _p_protein_match->setEvalue(std::pow ((double) 10.0, attributes.value("evalue").toDouble())); - _current_protein.setDescription(attributes.value("description").simplified()); - _current_protein.setAccession(_current_protein.getDescription().split(" ").at(0)); + _current_protein.setCompleteDescription(attributes.value("description")); qDebug() << "startElement_protein end" ; return true; } diff --git a/src/input/xtandemsaxhandler.cpp b/src/input/xtandemsaxhandler.cpp index e5dd55ac..a720f583 100644 --- a/src/input/xtandemsaxhandler.cpp +++ b/src/input/xtandemsaxhandler.cpp @@ -35,9 +35,9 @@ #include "../utils/peptidestore.h" #include "../utils/proteinstore.h" -XtandemSaxHandler::XtandemSaxHandler(Project * p_project):_p_project(p_project) +XtandemSaxHandler::XtandemSaxHandler(Project * p_project, IdentificationGroup * p_identification_group):_p_project(p_project) { - + _p_identification_group = p_identification_group; } XtandemSaxHandler::~XtandemSaxHandler() @@ -54,36 +54,31 @@ bool XtandemSaxHandler::startElement(const QString & namespaceURI, const QString try { //startElement_group - if (qName == "match") { - is_ok = startElement_match(attributes); + if (qName == "group") { + is_ok = startElement_group(attributes); } else if (qName == "protein") { is_ok = startElement_protein(attributes); - } else if (qName == "identification") { - is_ok = startElement_identification(attributes); + } else if (qName == "note") { + is_ok = startElement_note(attributes); } - //<sample value="P6_08_10"/> - else if (qName == "sample") { - is_ok = startElement_sample(attributes); + else if (qName == "file") { + is_ok = startElement_file(attributes); } else if (qName == "peptide") { is_ok = startElement_peptide(attributes); - } else if (qName == "modifs_mass") { - is_ok = startElement_modifs_mass(attributes); - } else if (qName == "modif") { - is_ok = startElement_modif(attributes); - } else if (qName == "filter_params") { - is_ok = startElement_filter_params(attributes); - } else if (qName == "information") { - is_ok = startElement_information(attributes); + } else if (qName == "aa") { + is_ok = startElement_aa(attributes); + } else if (qName == "domain") { + is_ok = startElement_domain(attributes); } _current_text.clear(); } catch (pappso::PappsoException exception_pappso) { - _errorStr = QObject::tr("ERROR in XpipSaxHandler::startElement tag %1, PAPPSO exception:\n%2").arg(qName).arg(exception_pappso.qwhat()); + _errorStr = QObject::tr("ERROR in XtandemSaxHandler::startElement tag %1, PAPPSO exception:\n%2").arg(qName).arg(exception_pappso.qwhat()); return false; } catch (std::exception exception_std) { - _errorStr = QObject::tr("ERROR in XpipSaxHandler::startElement tag %1, std exception:\n%2").arg(qName).arg(exception_std.what()); + _errorStr = QObject::tr("ERROR in XtandemSaxHandler::startElement tag %1, std exception:\n%2").arg(qName).arg(exception_std.what()); return false; } return is_ok; @@ -95,21 +90,9 @@ bool XtandemSaxHandler::endElement(const QString & namespaceURI, const QString & bool is_ok = true; // endElement_peptide_list try { - if (qName == "protein") + if (qName == "note") { - is_ok = endElement_protein(); - } - else if (qName == "identification") { - is_ok = endElement_identification(); - } - else if (qName == "peptide") { - is_ok = endElement_peptide(); - } - else if (qName == "sequence") { - is_ok = endElement_sequence(); - } - else if (qName == "match") { - is_ok = endElement_match(); + is_ok = endElement_note(); } // end of detection_moulon @@ -117,11 +100,11 @@ bool XtandemSaxHandler::endElement(const QString & namespaceURI, const QString & // (_tag_stack[_tag_stack.size() - 2] == "detection_moulon")) } catch (pappso::PappsoException exception_pappso) { - _errorStr = QObject::tr("ERROR in XpipSaxHandler::endElement tag %1, PAPPSO exception:\n%2").arg(qName).arg(exception_pappso.qwhat()); + _errorStr = QObject::tr("ERROR in XtandemSaxHandler::endElement tag %1, PAPPSO exception:\n%2").arg(qName).arg(exception_pappso.qwhat()); return false; } catch (std::exception exception_std) { - _errorStr = QObject::tr("ERROR in XpipSaxHandler::endElement tag %1, std exception:\n%2").arg(qName).arg(exception_std.what()); + _errorStr = QObject::tr("ERROR in XtandemSaxHandler::endElement tag %1, std exception:\n%2").arg(qName).arg(exception_std.what()); return false; } @@ -143,91 +126,61 @@ bool XtandemSaxHandler::startElement_group(QXmlAttributes attrs) { _scan = attrs.value("id").toUInt(); _mhplus_obser = attrs.value("mh").toDouble(); _charge = attrs.value("z").toUInt(); - _RT = attrs.value("rt"); + //_retention_time = attrs.value("rt"); } } -bool XtandemSaxHandler::startElement_match(QXmlAttributes attributes) { - - qDebug() << "startElement_match "; - /* - * <match_list><match validate="true"> - */ - _p_protein_match = new ProteinMatch(); - _p_protein_match->setChecked(false); - if (attributes.value("validate").simplified().toLower() == "true") { - _p_protein_match->setChecked(true); +bool XtandemSaxHandler::startElement_note(QXmlAttributes attributes) { +//<note label="description">GRMZM2G083841_P01 P04711 Phosphoenolpyruvate carboxylase 1 (PEPCase 1)(PEPC 1)(EC //4.1.1.31) seq=translation; coord=9:61296279..61301686:1; parent_transcript=GRMZM2G083841_T01; + ////parent_gene=GRMZM2G083841</note> + _is_protein_description = false; + if (attributes.value("label") == "description") { + if (_tag_stack[_tag_stack.size() - 2] == "protein") { + _is_protein_description = true; + } } - qDebug() << "startElement_match end" ; - return true; } bool XtandemSaxHandler::startElement_protein(QXmlAttributes attributes) { //<protein expect="-704.6" id="1976.1" uid="195701" label="GRMZM2G083841_P01 P04711 Phosphoenolpyruvate carboxylase 1 (PEPCase 1)(PEPC 1)(EC..." sumI="9.36" > qDebug() << "startElement_protein "; - _p_protein_match->setEvalue(std::pow ((double) 10.0, attributes.value("expect").toDouble())); - _current_protein.setDescription(attributes.value("label").simplified()); - _current_protein.setAccession(_current_protein.getDescription().split(" ").at(0)); - qDebug() << "startElement_protein end" ; - return true; -} + QString accession = attributes.value("label").simplified().split(" ", QString::SkipEmptyParts).at(0); + ProteinMatch * p_protein_match = _p_identification_group->getProteinMatch(accession); -bool XtandemSaxHandler::startElement_file(QXmlAttributes attributes) { - if (attrs.getValue("type").equals("peptide")) - prot_.setDatabase(identification_.getDatabaseSet().getInstance( - attrs.getValue("URL"))); -} + _current_protein.setAccession(accession); + if (p_protein_match == nullptr) { + _p_protein_match = new ProteinMatch(); + _p_protein_match->setChecked(false); -bool XtandemSaxHandler::startElement_identification(QXmlAttributes attributes) { - - qDebug() << "startElement_identification "; - _map_massstr_aamod.clear(); - _current_identification_group_p = _p_project->newIdentificationGroup(); - qDebug() << "startElement_identification end" ; + } + ProteinXtpSp sp_xtp_protein = _current_protein.makeProteinXtpSp(); + p_protein_match->setProteinXtpSp(_p_project->getProteinStore().getInstance(sp_xtp_protein)); + qDebug() << "startElement_protein end" ; return true; } -bool XtandemSaxHandler::startElement_information(QXmlAttributes attributes) { - -//<information Data_Type="indiv" match_number="223"/> - qDebug() << "startElement_information "; - qDebug() << "startElement_information end" ; - return true; -} -bool XtandemSaxHandler::startElement_modifs_mass(QXmlAttributes attributes) { - - /* - <modifs_list_mass><modifs_mass modvalue="-18.01056"/> - <modifs_mass modvalue="-17.02655"/> - <modifs_mass modvalue="15.99491"/> - <modifs_mass modvalue="42.01057"/> - <modifs_mass modvalue="42.01056"/> - <modifs_mass modvalue="57.02146"/> - </modifs_list_mass> - */ - qDebug() << "startElement_modifs_mass "; - QString mass_str(attributes.value("modvalue").simplified()); - pappso::mz mass = mass_str.toDouble(); - - pappso::AaModificationP mod = getAaModificationP(mass); - - _map_massstr_aamod[mass_str] = mod; - qDebug() << "startElement_modifs_mass end" ; - return true; +bool XtandemSaxHandler::startElement_file(QXmlAttributes attributes) { + //<file type="peptide" URL="/gorgone/pappso/formation/TD/Database/Genome_Z_mays_5a.fasta"/> + if (attributes.value("type") == "peptide") { + //prot_.setDatabase(identification_.getDatabaseSet().getInstance( + // attrs.getValue("URL"))); + } } -//<sample value="P6_21_23"/> -bool XtandemSaxHandler::startElement_sample(QXmlAttributes attributes) { +bool XtandemSaxHandler::startElement_domain(QXmlAttributes attributes) { +//<domain id="1976.1.1" start="620" end="629" expect="9.7e-04" mh="1120.5307" delta="-0.0012" hyperscore="29.9" + //nextscore="10.2" y_score="10.4" y_ions="7" b_score="11.2" b_ions="3" pre="QLYR" post="RYGV" + //seq="AQEEMAQVAK" missed_cleavages="0"> - qDebug() << "startElement_sample "; + qDebug() << "startElement_domain "; MsRunSp ms_run = _p_project->getMsRunStore().getInstance(attributes.value("value").simplified()); ms_run.get()->setXmlId(attributes.value("value").simplified()); ms_run.get()->setFilename(attributes.value("value").simplified()); - _current_identification_group_p->addMsRunSp(ms_run); - qDebug() << "startElement_sample end" ; + _p_identification_group->addMsRunSp(ms_run); + qDebug() << "startElement_domain end" ; return true; } bool XtandemSaxHandler::startElement_peptide(QXmlAttributes attributes) { @@ -270,52 +223,25 @@ bool XtandemSaxHandler::startElement_peptide(QXmlAttributes attributes) { return true; } -bool XtandemSaxHandler::startElement_modif(QXmlAttributes attributes) { - - //<modifs><modif aa="M" modvalue="15.99491" posi="17" posi_in_prot="49"/> - qDebug() << "startElement_modif "; +bool XtandemSaxHandler::startElement_aa(QXmlAttributes attributes) { +//<aa type="M" at="624" modified="15.99491" /> + qDebug() << "startElement_aa "; pappso::AaModificationP modif = _map_massstr_aamod[attributes.value("modvalue").simplified()]; unsigned int position = attributes.value("posi").simplified().toUInt(); _current_peptide_sp.get()->addAaModification(modif, position-1); - qDebug() << "startElement_modif end" ; - return true; -} -bool XtandemSaxHandler::endElement_peptide() { - qDebug() << "endElement_peptide "; - - PeptideXtpSp peptide_const = PeptideXtp(*(_current_peptide_sp.get())).makePeptideXtpSp(); - peptide_const = _p_project->getPeptideStore().getInstance(peptide_const); - _p_peptide_match->setPeptideXtpSp(peptide_const); - return true; -} - -bool XtandemSaxHandler::endElement_sequence() { - //if ((_tag_stack.size() > 1) && (_tag_stack[_tag_stack.size() - 1] == "protein")) { - _current_protein.setSequence(_current_text); - //} - //else { - // XtandemHyperscore hyperscore(_curent_spectrum, _current_peptide_sp, _precision, _ion_list, _max_charge,_refine_spectrum_synthesis); - //} - return true; -} -bool XtandemSaxHandler::endElement_protein() { - ProteinXtpSp sp_xtp_protein = _current_protein.makeProteinXtpSp(); - - _p_protein_match->setProteinXtpSp(_p_project->getProteinStore().getInstance(sp_xtp_protein)); - + qDebug() << "startElement_aa end" ; return true; } -bool XtandemSaxHandler::endElement_identification() { - +bool XtandemSaxHandler::endElement_note() { +//<note label="description">GRMZM2G083841_P01 P04711 Phosphoenolpyruvate carboxylase 1 (PEPCase 1)(PEPC 1)(EC //4.1.1.31) seq=translation; coord=9:61296279..61301686:1; parent_transcript=GRMZM2G083841_T01; + ////parent_gene=GRMZM2G083841</note> + if (_is_protein_description) { + _p_protein_match->getProteinXtpSp().get()->setDescription(_current_text.section(" ",1)); + } return true; } -bool XtandemSaxHandler::endElement_match() { - _current_identification_group_p->addProteinMatch(_p_protein_match); - _p_protein_match = nullptr; - return true; -} bool XtandemSaxHandler::error(const QXmlParseException &exception) { @@ -340,7 +266,6 @@ QString XtandemSaxHandler::errorString() const { bool XtandemSaxHandler::endDocument() { - _p_project->updateAutomaticFilters(_automatic_filter_parameters); return true; } diff --git a/src/input/xtandemsaxhandler.h b/src/input/xtandemsaxhandler.h index f794a349..c1864f9d 100644 --- a/src/input/xtandemsaxhandler.h +++ b/src/input/xtandemsaxhandler.h @@ -43,7 +43,7 @@ class XtandemSaxHandler: public QXmlDefaultHandler { public: - XtandemSaxHandler(Project * p_project); + XtandemSaxHandler(Project * p_project, IdentificationGroup * p_identification_group); ~XtandemSaxHandler(); bool startElement(const QString & namespaceURI, const QString & localName, @@ -66,21 +66,13 @@ public: private: bool startElement_group(QXmlAttributes attrs); - - bool startElement_filter_params(QXmlAttributes attributes); - bool startElement_information(QXmlAttributes attributes); - bool startElement_identification(QXmlAttributes attributes); - bool startElement_match(QXmlAttributes attributes); bool startElement_peptide(QXmlAttributes attributes); bool startElement_protein(QXmlAttributes attributes); - bool startElement_sample(QXmlAttributes attributes); - bool startElement_modifs_mass(QXmlAttributes attributes); - bool startElement_modif(QXmlAttributes attributes); - bool endElement_identification(); - bool endElement_sequence(); - bool endElement_protein(); - bool endElement_peptide(); - bool endElement_match(); + bool startElement_note(QXmlAttributes attributes); + bool startElement_file(QXmlAttributes attributes); + bool startElement_aa(QXmlAttributes attributes); + bool startElement_domain(QXmlAttributes attributes); + bool endElement_note(); pappso::AaModificationP getAaModificationP(pappso::mz mass) const; @@ -90,19 +82,22 @@ private: QString _current_text; Project * _p_project; + IdentificationGroup * _p_identification_group; + ProteinMatch * _p_protein_match; PeptideMatch * _p_peptide_match; ProteinXtp _current_protein; PeptideXtpSp _current_peptide_sp; - IdentificationGroup * _current_identification_group_p; QMap<QString, pappso::AaModificationP> _map_massstr_aamod; + QString _current_group_label; QString _current_group_type; - uint _scan; - double _mhplus_obser; - uint _charge; - getValue _RT; + unsigned int _scan; + pappso::pappso_double _mhplus_obser; + unsigned int _charge; + pappso::pappso_double _retention_time; + bool _is_protein_description = false; }; #endif // XTANDEMSAXHANDLER_H -- GitLab