From 0d7a761a852f4b6462918ea3b87f72de7e437b2b Mon Sep 17 00:00:00 2001
From: Olivier Langella <Olivier.Langella@moulon.inra.fr>
Date: Thu, 20 Apr 2017 11:07:28 +0200
Subject: [PATCH] make protein sequence resistant to unknown amino acid

---
 src/core/proteinxtp.cpp                | 16 ++++++++++++++--
 src/core/proteinxtp.h                  | 10 ++++++++++
 src/gui/protein_view/proteinwindow.cpp |  3 +--
 src/input/xtandemsaxhandler.cpp        |  3 ++-
 4 files changed, 27 insertions(+), 5 deletions(-)

diff --git a/src/core/proteinxtp.cpp b/src/core/proteinxtp.cpp
index a010608a..0b593577 100644
--- a/src/core/proteinxtp.cpp
+++ b/src/core/proteinxtp.cpp
@@ -53,7 +53,7 @@ ProteinXtpSp ProteinXtp::makeProteinXtpSp() const {
 void ProteinXtp::setCompleteDescription(const QString & full_description) {
     setAccession (full_description.simplified().section(" ", 0,0));
     setDescription (full_description.simplified().section(" ", 1));
-        
+
 }
 void ProteinXtp::setIsContaminant(bool conta) {
     _is_contaminant = conta;
@@ -68,6 +68,16 @@ bool ProteinXtp::isDecoy() const {
     return _is_decoy;
 }
 
+QString ProteinXtp::getOnlyAminoAcidSequence() const {
+    QString sequence(this->getSequence());
+    return sequence.replace(QRegExp("\\*"), "");
+}
+
+pappso::pappso_double ProteinXtp::getMass() const {
+    pappso::Peptide peptide(getOnlyAminoAcidSequence());
+    return peptide.getMass();
+}
+
 class DigestionHandler: public pappso::EnzymeProductInterface {
 public:
     void setPeptide(std::int8_t sequence_database_id, const pappso::ProteinSp & protein_sp, bool is_decoy, const QString& peptide, unsigned int start, bool is_nter, unsigned int missed_cleavage_number, bool semi_enzyme) override {
@@ -82,7 +92,9 @@ unsigned int ProteinXtp::countTrypticPeptidesForPAI() const {
     pappso::Enzyme kinase;
     kinase.setMiscleavage(0);
     DigestionHandler digestion;
-    kinase.eat(0,this->makeProteinSp(),false,digestion);
+
+    pappso::ProteinSp protein = std::make_shared<const pappso::Protein>(this->getDescription(),this->getOnlyAminoAcidSequence());
+    kinase.eat(0,protein,false,digestion);
 
     unsigned int count = 0;
     for (const QString & peptide_str: digestion._peptide_list) {
diff --git a/src/core/proteinxtp.h b/src/core/proteinxtp.h
index b9831814..9356fdc6 100644
--- a/src/core/proteinxtp.h
+++ b/src/core/proteinxtp.h
@@ -22,6 +22,7 @@
 ******************************************************************************/
 
 #include <pappsomspp/protein/protein.h>
+#include <pappsomspp/types.h>
 #include "sequencedatabase.h"
 
 #ifndef PROTEIN_XTP_H
@@ -51,6 +52,15 @@ public:
     bool isContaminant() const;
     bool isDecoy() const;
     unsigned int countTrypticPeptidesForPAI() const;
+    
+    /** @brief get only amino acid sequence 
+     * clean sequence of other characters : * _ ...
+     */
+    QString getOnlyAminoAcidSequence() const;
+    
+    /** @brief get protein mass in dalton
+     */
+    pappso::pappso_double getMass() const;
 
 private:
     SequenceDatabase * _p_sequence_database;
diff --git a/src/gui/protein_view/proteinwindow.cpp b/src/gui/protein_view/proteinwindow.cpp
index 7f04c882..9b9a2baf 100644
--- a/src/gui/protein_view/proteinwindow.cpp
+++ b/src/gui/protein_view/proteinwindow.cpp
@@ -77,8 +77,7 @@ void ProteinWindow::updateDisplay() {
         ui->description_label->setText(_p_protein_match->getProteinXtpSp().get()->getDescription());
         ui->sequenceTextEdit->setText(_p_protein_match->getHtmlSequence());
         ui->coverage_label->setText(QString("%1 %").arg(_p_protein_match->getCoverage()*100));
-        pappso::Peptide peptide(_p_protein_match->getProteinXtpSp().get()->getSequence());
-        ui->mw_label->setText(QString("%1 kDa").arg(peptide.getMass()/1000));
+        ui->mw_label->setText(QString("%1 kDa").arg(_p_protein_match->getProteinXtpSp().get()->getMass()/1000));
         ui->evalue_label->setText(QString("%1 (log10: %2)").arg(_p_protein_match->getEvalue()).arg(std::log10(_p_protein_match->getEvalue())));
     }
     catch (pappso::PappsoException exception_pappso) {
diff --git a/src/input/xtandemsaxhandler.cpp b/src/input/xtandemsaxhandler.cpp
index 23b4862b..e1649cf2 100644
--- a/src/input/xtandemsaxhandler.cpp
+++ b/src/input/xtandemsaxhandler.cpp
@@ -195,7 +195,8 @@ bool XtandemSaxHandler::startElement_domain(QXmlAttributes attributes) {
     _current_text = _current_text.simplified().replace(" ", "");
     if (!_current_text.isEmpty()) {
         //._sequence.replace(QRegExp("\\*"), "")).removeTranslationStop()
-        _p_protein_match->getProteinXtpSp().get()->setSequence(_current_text.replace(QRegExp("\\*"), ""));
+        //_p_protein_match->getProteinXtpSp().get()->setSequence(_current_text.replace(QRegExp("\\*"), ""));
+        _p_protein_match->getProteinXtpSp().get()->setSequence(_current_text);
     }
 
     // <domain id="4017.1.1" start="21" end="31" expect="2.0e-06"
-- 
GitLab