From 6f27113f2d42f79d48de5c47837c6523227e9311 Mon Sep 17 00:00:00 2001
From: Olivier Langella <olivier.langella@u-psud.fr>
Date: Thu, 8 Mar 2018 15:47:56 +0100
Subject: [PATCH] setting variable and fixed modifications

---
 src/input/mascot/mascotdatparser.cpp | 57 ++++++++++++++++++++++++++--
 src/input/mascot/mascotdatparser.h   |  8 +++-
 2 files changed, 60 insertions(+), 5 deletions(-)

diff --git a/src/input/mascot/mascotdatparser.cpp b/src/input/mascot/mascotdatparser.cpp
index 51d69a9e..bd1ee45a 100644
--- a/src/input/mascot/mascotdatparser.cpp
+++ b/src/input/mascot/mascotdatparser.cpp
@@ -42,7 +42,7 @@ MascotDatParser::MascotDatParser(Project * p_project, IdentificationGroup * p_id
     _p_identification_group = p_identification_group;
     _p_identification_data_source = p_identification_data_source;
 
-    _regexp_header_line.setPattern("^([a-z,0-9,_]+)=(.*)$");
+    _regexp_header_line.setPattern("^([A-Z,a-z,0-9,_]+)=(.*)$");
 
 }
 MascotDatParser::~MascotDatParser() {
@@ -116,26 +116,58 @@ void MascotDatParser::parseMassesLine(const QString & masses_line) {
     qDebug() << __FILE__ << " " << __FUNCTION__<< " " << __LINE__ << " " << masses_line;
     if (_regexp_header_line.exactMatch(masses_line)) {
         QStringList header_list = _regexp_header_line.capturedTexts();
+
 //           C_term=17.002740
 //N_term=1.007825
 //delta1=15.994915,Oxidation (M)
         if (header_list[1].startsWith("delta")) {
+            _delta_modification_list.push_back(MascotModification());
+            unsigned index = header_list[1].mid(5).toUInt();
             QStringList delta_mod_list =  header_list[2].split(",");
             pappso::pappso_double mass = delta_mod_list[0].toDouble();
+            if (delta_mod_list[1] == "Oxidation (M)") {
+                _delta_modification_list[index-1].modification = pappso::AaModification::getInstance("MOD:00719");
+                _delta_modification_list[index-1].residue = 'M';
+            }
+            else {
+                _delta_modification_list[index-1].modification = pappso::AaModification::getInstanceCustomizedMod(mass);
+            }
             //_number_of_residues = header_list[5].toUInt();
         }
 
+//FixedModResidues1=C
+        else if (header_list[1].startsWith("FixedModResidues")) {
+            unsigned index = header_list[1].mid(16).toUInt();
+            qDebug() << __FILE__ << " " << __FUNCTION__<< " " << __LINE__ << " " <<  index;
+            _fixed_modification_list[index-1].residue = header_list[2].at(0);
+        }
+
 //NeutralLoss1=0.000000
 //NeutralLoss1_master=63.998285
 //FixedMod1=57.021464,Carbamidomethyl (C)
-        if (header_list[1].startsWith("FixedMod")) {
+        else if (header_list[1].startsWith("FixedMod")) {
+            unsigned index = header_list[1].mid(8).toUInt();
+            _fixed_modification_list.push_back(MascotModification());
             QStringList fixed_mod_list =  header_list[2].split(",");
             pappso::pappso_double mass = fixed_mod_list[0].toDouble();
             //_number_of_residues = header_list[2].toUInt();
-        }
+            qDebug() << __FILE__ << " " << __FUNCTION__<< " " << __LINE__ << " " << header_list[2];
 
-//FixedModResidues1=C
+            if (fixed_mod_list[1] == "Carbamidomethyl (C)") {
+                _fixed_modification_list[index-1].modification = pappso::AaModification::getInstance("MOD:00397");
+                _fixed_modification_list[index-1].residue = 'C';
+            }
+            else {
+                _fixed_modification_list[index-1].modification = pappso::AaModification::getInstanceCustomizedMod(mass);
+            }
+        }
+    }
+    else {
+        qDebug() << __FILE__ << " " << __FUNCTION__<< " " << __LINE__ << " QREGEXP does not work on " << masses_line;
     }
+    
+    
+    qDebug() << __FILE__ << " " << __FUNCTION__<< " " << __LINE__ << " " << _fixed_modification_list.size();
 }
 
 void MascotDatParser::parseProteinLine(const QString & protein_line) {
@@ -490,6 +522,15 @@ void MascotDatParser::savePeptideList(std::vector< PeptideLine> & peptide_list,
 
         //variable modifications :
         setVariableModifications(peptide_sp, peptide_line.peptide_string_list.at(6));
+        
+        //fixed modifications :
+        for (unsigned int i=0; i < peptide_str.size(); i++) {
+            for (MascotModification mascot_modif :_fixed_modification_list) {
+                if (peptide_str.at(i) == mascot_modif.residue) {
+                    peptide_sp.get()->addAaModification(mascot_modif.modification, i);
+                }
+            }
+        }
 
         peptide_evidence.setPeptideXtpSp(_p_project->getPeptideStore().getInstance(peptide_sp));
 
@@ -555,4 +596,12 @@ pappso::pappso_double MascotDatParser::getEvalueExperimental(pappso::pappso_doub
 }
 
 void MascotDatParser::setVariableModifications(PeptideXtpSp & peptide_sp, QString variable_modifications_string) {
+    qDebug() << __FILE__ << " " << __FUNCTION__<< " " << __LINE__;
+    //
+    for (unsigned int i=0; i < variable_modifications_string.size() ; i++) {
+        if (variable_modifications_string.mid(i,1).toUInt() > 0) {
+            MascotModification mascot_modification = _delta_modification_list[variable_modifications_string.mid(i,1).toUInt()-1];
+            peptide_sp.get()->addAaModification(mascot_modification.modification, i-1);
+        }
+    }
 }
diff --git a/src/input/mascot/mascotdatparser.h b/src/input/mascot/mascotdatparser.h
index 592727de..6c345406 100644
--- a/src/input/mascot/mascotdatparser.h
+++ b/src/input/mascot/mascotdatparser.h
@@ -52,7 +52,10 @@ private:
 
 
 
-
+    struct MascotModification {
+        pappso::AaModificationP modification;
+        QChar residue;
+    };
 
 
     struct PeptideLine {
@@ -103,6 +106,9 @@ private:
     std::vector<SummaryLine> _summary_list;
     std::vector<std::vector<PeptideLine>> _decoy_query_peptide_results;
     std::vector<SummaryLine> _decoy_summary_list;
+    
+    std::vector<MascotModification> _delta_modification_list;
+    std::vector<MascotModification> _fixed_modification_list;
 
 
 };
-- 
GitLab