From eccbbcd0e0181b42cbc6f1a90080366deb3fc6e1 Mon Sep 17 00:00:00 2001
From: Olivier Langella <olivier.langella@u-psud.fr>
Date: Mon, 9 Jul 2018 10:53:15 +0200
Subject: [PATCH] pepxml  : take into account buggy engines that do not use
 correctly protein accession and description

---
 src/core/proteinxtp.cpp        |  2 +-
 src/core/proteinxtp.h          |  2 ++
 src/input/pepxmlsaxhandler.cpp | 27 ++++++++++++++++++++++++---
 3 files changed, 27 insertions(+), 4 deletions(-)

diff --git a/src/core/proteinxtp.cpp b/src/core/proteinxtp.cpp
index c5f99aea..6a59f4e2 100644
--- a/src/core/proteinxtp.cpp
+++ b/src/core/proteinxtp.cpp
@@ -101,8 +101,8 @@ const FastaFile * ProteinXtp::getFastaFileP() const {
 void ProteinXtp::setCompleteDescription(const QString & full_description) {
     setAccession (full_description.simplified().section(" ", 0,0));
     setDescription (full_description.simplified().section(" ", 1));
-
 }
+
 void ProteinXtp::setIsContaminant(bool conta) {
     _is_contaminant = conta;
 }
diff --git a/src/core/proteinxtp.h b/src/core/proteinxtp.h
index adb23610..a49d39eb 100644
--- a/src/core/proteinxtp.h
+++ b/src/core/proteinxtp.h
@@ -58,6 +58,8 @@ public:
 
     ProteinXtpSp makeProteinXtpSp() const;
     
+    /** @brief separates accession from description based on first space separator
+     */
     void setCompleteDescription(const QString & full_description);
     
     void setIsContaminant(bool conta);
diff --git a/src/input/pepxmlsaxhandler.cpp b/src/input/pepxmlsaxhandler.cpp
index a51e5f60..bb5a0e0f 100644
--- a/src/input/pepxmlsaxhandler.cpp
+++ b/src/input/pepxmlsaxhandler.cpp
@@ -326,7 +326,7 @@ PepXmlSaxHandler::startElement_spectrum_query(QXmlAttributes attributes)
 
 //<alternative_protein protein="sp|P46784|RS10B_YEAST" protein_descr="40S
 //       ribosomal protein S10-B OS=Saccharomyces cerevisiae (strain ATCC 204508
-//                                           \
+//                                              \
 //S288c) GN=RPS10B PE=1 SV=1" num_tol_term="2" peptide_prev_aa="K"
 // peptide_next_aa="N"/>
 bool
@@ -335,7 +335,14 @@ PepXmlSaxHandler::startElement_alternative_protein(QXmlAttributes attributes)
   bool is_ok                  = true;
   ProteinXtpSp sp_xtp_protein = ProteinXtp().makeProteinXtpSp();
   sp_xtp_protein.get()->setAccession(attributes.value("protein"));
-  sp_xtp_protein.get()->setDescription(attributes.value("protein_descr"));
+  if(attributes.value("protein_descr").isEmpty())
+    {
+      sp_xtp_protein.get()->setCompleteDescription(attributes.value("protein"));
+    }
+  else
+    {
+      sp_xtp_protein.get()->setDescription(attributes.value("protein_descr"));
+    }
   sp_xtp_protein.get()->setFastaFileP(
     _p_identification_data_source->getFastaFileList()[0].get());
 
@@ -364,7 +371,14 @@ PepXmlSaxHandler::startElement_search_hit(QXmlAttributes attributes)
   //_current_protein.setAccession(attributes.value("protein"));
   ProteinXtpSp sp_xtp_protein = ProteinXtp().makeProteinXtpSp();
   sp_xtp_protein.get()->setAccession(attributes.value("protein"));
-  sp_xtp_protein.get()->setDescription(attributes.value("protein_descr"));
+  if(attributes.value("protein_descr").isEmpty())
+    {
+      sp_xtp_protein.get()->setCompleteDescription(attributes.value("protein"));
+    }
+  else
+    {
+      sp_xtp_protein.get()->setDescription(attributes.value("protein_descr"));
+    }
   sp_xtp_protein.get()->setFastaFileP(
     _p_identification_data_source->getFastaFileList()[0].get());
 
@@ -413,6 +427,13 @@ PepXmlSaxHandler::startElement_search_hit(QXmlAttributes attributes)
       // search_engine="X! Tandem (k-score)"
       search_engine = IdentificationEngine::XTandem;
     }
+
+  else if(_current_search_engine == "X! Tandem")
+    {
+      // files coming from msfragger
+      search_engine = IdentificationEngine::XTandem;
+    }
+
   else if(_current_search_engine == "OMSSA")
     {
       search_engine = IdentificationEngine::OMSSA;
-- 
GitLab