diff --git a/src/input/pepxmlsaxhandler.cpp b/src/input/pepxmlsaxhandler.cpp index 05090d158db014bdd42b9728e9f9eab8b25c7f4a..809b6857549411a3ca63d001c9b18290c6dfe1c6 100644 --- a/src/input/pepxmlsaxhandler.cpp +++ b/src/input/pepxmlsaxhandler.cpp @@ -120,7 +120,10 @@ PepXmlSaxHandler::startElement(const QString &namespaceURI, { is_ok = startElement_mod_aminoacid_mass(attributes); } - + else if(qName == "modification_info") + { + is_ok = startElement_modification_info(attributes); + } _current_text.clear(); } catch(pappso::PappsoException exception_pappso) @@ -211,10 +214,29 @@ PepXmlSaxHandler::startElement_msms_pipeline_analysis(QXmlAttributes attributes) bool PepXmlSaxHandler::startElement_msms_run_summary(QXmlAttributes attributes) { - bool is_ok = true; - QString mz_datafile = QString("%1%2") - .arg(attributes.value("base_name")) - .arg(attributes.value("raw_data")); + bool is_ok = true; + QString old_file; + if(!_current_complete_msrun_file_path.isEmpty()) + { + old_file = _current_complete_msrun_file_path; + } + _current_complete_msrun_file_path = QString("%1%2") + .arg(attributes.value("base_name")) + .arg(attributes.value("raw_data")); + if((!old_file.isEmpty()) && + (QFileInfo(_current_complete_msrun_file_path).baseName() != + QFileInfo(old_file).baseName())) + { + throw pappso::PappsoException( + QObject::tr("ERROR reading pepxml file :\nX!TandemPipeline does not " + "support identification source files containing results " + "from multiple MS runs (%1 != %2)") + .arg(old_file) + .arg(_current_complete_msrun_file_path)); + } + _sp_msrun.get()->setFilename(attributes.value("base_name")); + _sp_msrun.get()->setSampleName( + QFileInfo(_current_complete_msrun_file_path).baseName()); return is_ok; } @@ -259,7 +281,12 @@ PepXmlSaxHandler::startElement_search_summary(QXmlAttributes attributes) bool PepXmlSaxHandler::startElement_spectrum_query(QXmlAttributes attributes) { - bool is_ok = true; + bool is_ok = true; + QString spectrum_ref = attributes.value("spectrum"); + if(_current_complete_msrun_file_path.isEmpty()) + { + _sp_msrun.get()->setFilename(QFileInfo(spectrum_ref).baseName()); + } unsigned int start_scan = attributes.value("start_scan").toUInt(); unsigned int end_scan = attributes.value("end_scan").toUInt(); if(start_scan != end_scan) @@ -272,6 +299,7 @@ PepXmlSaxHandler::startElement_spectrum_query(QXmlAttributes attributes) .arg(start_scan) .arg(end_scan)); } + _scan = start_scan; _current_charge = attributes.value("assumed_charge").toUInt(); if(attributes.value("retention_time_sec").isEmpty()) { @@ -297,7 +325,7 @@ PepXmlSaxHandler::startElement_spectrum_query(QXmlAttributes attributes) //<alternative_protein protein="sp|P46784|RS10B_YEAST" protein_descr="40S // ribosomal protein S10-B OS=Saccharomyces cerevisiae (strain ATCC 204508 -// \ +// \ //S288c) GN=RPS10B PE=1 SV=1" num_tol_term="2" peptide_prev_aa="K" // peptide_next_aa="N"/> bool @@ -482,37 +510,78 @@ PepXmlSaxHandler::startElement_search_score(QXmlAttributes attributes) if(name == "expect") { _p_peptide_evidence->setEvalue(valueStr.simplified().toDouble()); - if(_p_peptide_evidence->getIdentificationEngine() == - IdentificationEngine::OMSSA) - { - _p_peptide_evidence->setParam(PeptideEvidenceParam::omssa_evalue, - valueStr.simplified().toDouble()); - } } else if(name == "EValue") { _p_peptide_evidence->setEvalue(valueStr.simplified().toDouble()); } - // <search_score name="hyperscore" value="232"/> - else if(name == "hyperscore") - { - _p_peptide_evidence->setParam( - PeptideEvidenceParam::tandem_hyperscore, - QVariant(attributes.value("hyperscore").toDouble())); - } - else if(name == "pvalue") + + IdentificationEngine identification_engine = + _p_peptide_evidence->getIdentificationEngine(); + if(identification_engine == IdentificationEngine::OMSSA) { - if(_p_peptide_evidence->getIdentificationEngine() == - IdentificationEngine::OMSSA) + if(name == "pvalue") { _p_peptide_evidence->setParam(PeptideEvidenceParam::omssa_pvalue, valueStr.simplified().toDouble()); } + else if(name == "expect") + { + _p_peptide_evidence->setParam(PeptideEvidenceParam::omssa_evalue, + valueStr.simplified().toDouble()); + } + } + else if(identification_engine == IdentificationEngine::XTandem) + { + if(name == "hyperscore") + { + _p_peptide_evidence->setParam( + PeptideEvidenceParam::tandem_hyperscore, + QVariant(valueStr.simplified().toDouble())); + } + } + else if(identification_engine == IdentificationEngine::MSGFplus) + { + if(name == "raw") + { + _p_peptide_evidence->setParam( + PeptideEvidenceParam::msgfplus_raw, + QVariant(valueStr.simplified().toDouble())); + } + else if(name == "SpecEValue") + { + _p_peptide_evidence->setParam( + PeptideEvidenceParam::msgfplus_SpecEValue, + QVariant(valueStr.simplified().toDouble())); + } + else if(name == "EValue") + { + _p_peptide_evidence->setParam( + PeptideEvidenceParam::msgfplus_EValue, + QVariant(valueStr.simplified().toDouble())); + } } } return is_ok; } + +// <modification_info mod_nterm_mass="43.018389" modified_peptide="SQRDCR"> +bool +PepXmlSaxHandler::startElement_modification_info(QXmlAttributes attributes) +{ + bool is_ok = true; + + if(!attributes.value("mod_nterm_mass").isEmpty()) + { + pappso::AaModificationP modif = + Utils::guessAaModificationPbyMonoisotopicMassDelta( + attributes.value("mod_nterm_mass").toDouble()); + + _current_peptide_sp.get()->addAaModification(modif, 0); + } + return is_ok; +} // <modification_info modified_peptide="SQRDCR"> <mod_aminoacid_mass // position="5" mass="160.030649"/> </modification_info> bool @@ -521,7 +590,7 @@ PepXmlSaxHandler::startElement_mod_aminoacid_mass(QXmlAttributes attributes) bool is_ok = true; double mass = attributes.value("mass").toDouble(); unsigned int position = attributes.value("position").toUInt() - 1; - const pappso::Aa &aa = _current_peptide_sp.get()->getConstAa(position); + const pappso::Aa aa(_current_peptide_sp.get()->getSequence()[position].toLatin1()); double mass_modif = mass - aa.getMass(); pappso::AaModificationP modif = diff --git a/src/input/pepxmlsaxhandler.h b/src/input/pepxmlsaxhandler.h index ed38bc289f7e2e04f541908a7cc0350b861bd0ad..40a44ecc06c5013f25cfc2a7452e699118ab03c7 100644 --- a/src/input/pepxmlsaxhandler.h +++ b/src/input/pepxmlsaxhandler.h @@ -76,6 +76,8 @@ class PepXmlSaxHandler : public QXmlDefaultHandler bool startElement_interprophet_result(QXmlAttributes attributes); bool startElement_search_score(QXmlAttributes attributes); bool startElement_mod_aminoacid_mass(QXmlAttributes attributes); + bool startElement_modification_info(QXmlAttributes attributes); + bool endElement_search_hit(); bool endElement_modification_info(); @@ -100,6 +102,7 @@ class PepXmlSaxHandler : public QXmlDefaultHandler QString _current_group_type; QString _current_note_label; QString _current_note_type; + QString _current_complete_msrun_file_path; unsigned int _scan; unsigned int _current_charge; pappso::pappso_double _current_retention_time; diff --git a/src/utils/types.h b/src/utils/types.h index 9c7ed47c5246d57f57ab26f196c72bdc98903ca0..df9423e49c30412be224aff7c0b17ce0786bf60e 100644 --- a/src/utils/types.h +++ b/src/utils/types.h @@ -75,7 +75,11 @@ enum class PeptideEvidenceParam : std::int8_t peptide_inter_prophet_probability = 5, ///< no PSI MS description omssa_evalue = 6, ///< MS:1001328 "OMSSA E-value." [PSI:PI] omssa_pvalue = 7, ///< MS:1001329 "OMSSA p-value." [PSI:PI] - + msgfplus_raw = 8, ///< MS:1002049 "MS-GF raw score." [PSI:PI] + msgfplus_denovo = 9, ///< MS:1002050 "MS-GF de novo score." [PSI:PI] + msgfplus_energy = 10, ///< MS:1002051 "MS-GF energy score." [PSI:PI] + msgfplus_SpecEValue = 11, ///< MS:1002052 "MS-GF spectral E-value." [PSI:PI] + msgfplus_EValue = 12, ///< MS:1002053 "MS-GF E-value." [PSI:PI] };