From eccbbcd0e0181b42cbc6f1a90080366deb3fc6e1 Mon Sep 17 00:00:00 2001 From: Olivier Langella <olivier.langella@u-psud.fr> Date: Mon, 9 Jul 2018 10:53:15 +0200 Subject: [PATCH] pepxml : take into account buggy engines that do not use correctly protein accession and description --- src/core/proteinxtp.cpp | 2 +- src/core/proteinxtp.h | 2 ++ src/input/pepxmlsaxhandler.cpp | 27 ++++++++++++++++++++++++--- 3 files changed, 27 insertions(+), 4 deletions(-) diff --git a/src/core/proteinxtp.cpp b/src/core/proteinxtp.cpp index c5f99aea..6a59f4e2 100644 --- a/src/core/proteinxtp.cpp +++ b/src/core/proteinxtp.cpp @@ -101,8 +101,8 @@ const FastaFile * ProteinXtp::getFastaFileP() const { void ProteinXtp::setCompleteDescription(const QString & full_description) { setAccession (full_description.simplified().section(" ", 0,0)); setDescription (full_description.simplified().section(" ", 1)); - } + void ProteinXtp::setIsContaminant(bool conta) { _is_contaminant = conta; } diff --git a/src/core/proteinxtp.h b/src/core/proteinxtp.h index adb23610..a49d39eb 100644 --- a/src/core/proteinxtp.h +++ b/src/core/proteinxtp.h @@ -58,6 +58,8 @@ public: ProteinXtpSp makeProteinXtpSp() const; + /** @brief separates accession from description based on first space separator + */ void setCompleteDescription(const QString & full_description); void setIsContaminant(bool conta); diff --git a/src/input/pepxmlsaxhandler.cpp b/src/input/pepxmlsaxhandler.cpp index a51e5f60..bb5a0e0f 100644 --- a/src/input/pepxmlsaxhandler.cpp +++ b/src/input/pepxmlsaxhandler.cpp @@ -326,7 +326,7 @@ PepXmlSaxHandler::startElement_spectrum_query(QXmlAttributes attributes) //<alternative_protein protein="sp|P46784|RS10B_YEAST" protein_descr="40S // ribosomal protein S10-B OS=Saccharomyces cerevisiae (strain ATCC 204508 -// \ +// \ //S288c) GN=RPS10B PE=1 SV=1" num_tol_term="2" peptide_prev_aa="K" // peptide_next_aa="N"/> bool @@ -335,7 +335,14 @@ PepXmlSaxHandler::startElement_alternative_protein(QXmlAttributes attributes) bool is_ok = true; ProteinXtpSp sp_xtp_protein = ProteinXtp().makeProteinXtpSp(); sp_xtp_protein.get()->setAccession(attributes.value("protein")); - sp_xtp_protein.get()->setDescription(attributes.value("protein_descr")); + if(attributes.value("protein_descr").isEmpty()) + { + sp_xtp_protein.get()->setCompleteDescription(attributes.value("protein")); + } + else + { + sp_xtp_protein.get()->setDescription(attributes.value("protein_descr")); + } sp_xtp_protein.get()->setFastaFileP( _p_identification_data_source->getFastaFileList()[0].get()); @@ -364,7 +371,14 @@ PepXmlSaxHandler::startElement_search_hit(QXmlAttributes attributes) //_current_protein.setAccession(attributes.value("protein")); ProteinXtpSp sp_xtp_protein = ProteinXtp().makeProteinXtpSp(); sp_xtp_protein.get()->setAccession(attributes.value("protein")); - sp_xtp_protein.get()->setDescription(attributes.value("protein_descr")); + if(attributes.value("protein_descr").isEmpty()) + { + sp_xtp_protein.get()->setCompleteDescription(attributes.value("protein")); + } + else + { + sp_xtp_protein.get()->setDescription(attributes.value("protein_descr")); + } sp_xtp_protein.get()->setFastaFileP( _p_identification_data_source->getFastaFileList()[0].get()); @@ -413,6 +427,13 @@ PepXmlSaxHandler::startElement_search_hit(QXmlAttributes attributes) // search_engine="X! Tandem (k-score)" search_engine = IdentificationEngine::XTandem; } + + else if(_current_search_engine == "X! Tandem") + { + // files coming from msfragger + search_engine = IdentificationEngine::XTandem; + } + else if(_current_search_engine == "OMSSA") { search_engine = IdentificationEngine::OMSSA; -- GitLab