From 58e8469d21f044d2313f9d44c8696290657dd524 Mon Sep 17 00:00:00 2001
From: Olivier Langella <Olivier.Langella@moulon.inra.fr>
Date: Wed, 19 Apr 2017 21:51:57 +0200
Subject: [PATCH] parsing tandem version OK

---
 src/input/xtandemsaxhandler.cpp | 426 ++++++++++++++++----------------
 1 file changed, 215 insertions(+), 211 deletions(-)

diff --git a/src/input/xtandemsaxhandler.cpp b/src/input/xtandemsaxhandler.cpp
index b32ac882d..1ef6a3d36 100644
--- a/src/input/xtandemsaxhandler.cpp
+++ b/src/input/xtandemsaxhandler.cpp
@@ -151,7 +151,7 @@ bool XtandemSaxHandler::startElement_note(QXmlAttributes attributes) {
             _is_protein_description = true;
         }
     }
-     return is_ok;
+    return is_ok;
 }
 
 bool XtandemSaxHandler::startElement_protein(QXmlAttributes attributes) {
@@ -193,7 +193,7 @@ bool XtandemSaxHandler::startElement_domain(QXmlAttributes attributes) {
     bool is_ok = true;
     _current_text = _current_text.simplified().replace(" ", "");
     if (!_current_text.isEmpty()) {
-      //._sequence.replace(QRegExp("\\*"), "")).removeTranslationStop()
+        //._sequence.replace(QRegExp("\\*"), "")).removeTranslationStop()
         _p_protein_match->getProteinXtpSp().get()->setSequence(_current_text.replace(QRegExp("\\*"), ""));
     }
 
@@ -204,76 +204,76 @@ bool XtandemSaxHandler::startElement_domain(QXmlAttributes attributes) {
     // pre="VLGR" post="VEFM" seq="TGSQGQCTQVR" missed_cleavages="10">
     /*
      * id
- – the identifier for t
-his particular identified dom
-ain (s
-pectrum
- #).(i
-d
-#).(dom
-ain#) 
-start
- – the first residue
- of t
-he dom
-ain 
-end
- – the last residue
- of t
-he dom
-ain 
-expect
- – the expe
-ctation va
-lue for t
-he peptide identification 
-mh
- – the calculated pe
-ptide mass + a prot
-on 
-delta
- – the spectrum
- mh m
-inus
- the calculated m
-h 
-hyperscore
- – T
-ande
-m’s score for t
-he identification 
-peak_count
- – the num
-ber of pe
-aks that matched be
-tween the theoretical
-and t
-he test mass spectrum
-pre
- – the four re
-sidue
-s pre
-ceding t
-he dom
-ain 
-post
- – the four re
-sidue
-s fol
-lowing t
-he dom
-ain 
-seq
- – the seque
-nce of t
-he dom
-ain 
-missed_cleavages
- – the num
-ber of pot
-ential cleavage sites in this
-peptide seque
-nce*/
+    – the identifier for t
+    his particular identified dom
+    ain (s
+    pectrum
+    #).(i
+    d
+    #).(dom
+    ain#)
+    start
+    – the first residue
+    of t
+    he dom
+    ain
+    end
+    – the last residue
+    of t
+    he dom
+    ain
+    expect
+    – the expe
+    ctation va
+    lue for t
+    he peptide identification
+    mh
+    – the calculated pe
+    ptide mass + a prot
+    on
+    delta
+    – the spectrum
+    mh m
+    inus
+    the calculated m
+    h
+    hyperscore
+    – T
+    ande
+    m’s score for t
+    he identification
+    peak_count
+    – the num
+    ber of pe
+    aks that matched be
+    tween the theoretical
+    and t
+    he test mass spectrum
+    pre
+    – the four re
+    sidue
+    s pre
+    ceding t
+    he dom
+    ain
+    post
+    – the four re
+    sidue
+    s fol
+    lowing t
+    he dom
+    ain
+    seq
+    – the seque
+    nce of t
+    he dom
+    ain
+    missed_cleavages
+    – the num
+    ber of pot
+    ential cleavage sites in this
+    peptide seque
+    nce*/
 
     // valeur généric du scan
     _current_peptide_sp = PeptideXtp(attributes.value("seq").simplified()).makePeptideXtpSp();
@@ -332,151 +332,155 @@ bool XtandemSaxHandler::endElement_note() {
         _p_protein_match->getProteinXtpSp().get()->setDescription(_current_text.section(" ",1));
     }
     else {
-           
+
 //<group label="input parameters" type="parameters">
-    /*
-     * 	<note type="input" label="list path, default parameters">/gorgone/pappso/tmp/temp_condor_job8533994640337729751189420695540169/QExactive_analysis_FDR_nosemi.xml</note>
-	<note type="input" label="list path, taxonomy information">/gorgone/pappso/tmp/temp_condor_job8533994640337729751189420695540169/database.xml</note>
-	<note type="input" label="output, histogram column width">30</note>
-	<note type="input" label="output, histograms">yes</note>
-	<note type="input" label="output, maximum valid expectation value">0.05</note>
-	<note type="input" label="output, maximum valid protein expectation value">0.05</note>
-	<note type="input" label="output, one sequence copy">yes</note>
-	<note type="input" label="output, parameters">yes</note>
-	<note type="input" label="output, path">/gorgone/pappso/formation/TD/xml_tandem/20120906_balliau_extract_1_A02_urzb-1.xml</note>
-	<note type="input" label="output, path hashing">no</note>
-	<note type="input" label="output, performance">yes</note>
-	<note type="input" label="output, proteins">yes</note>
-	<note type="input" label="output, results">valid</note>
-	<note type="input" label="output, sequences">yes</note>
-	<note type="input" label="output, sort results by">spectrum</note>
-	<note type="input" label="output, spectra">yes</note>
-	<note type="input" label="output, xsl path">tandem-style.xsl</note>
-	<note type="input" label="protein, C-terminal residue modification mass">0.0</note>
-	<note type="input" label="protein, N-terminal residue modification mass">0.0</note>
-	<note type="input" label="protein, cleavage C-terminal mass change">+17.00305</note>
-	<note type="input" label="protein, cleavage N-terminal mass change">+1.00794</note>
-	<note type="input" label="protein, cleavage semi">no</note>
-	<note type="input" label="protein, cleavage site">[RK]|{P}</note>
-	<note type="input" label="protein, modified residue mass file"></note>
-	<note type="input" label="protein, quick acetyl">yes</note>
-	<note type="input" label="protein, quick pyrolidone">yes</note>
-	<note type="input" label="protein, stP bias">yes</note>
-	<note type="input" label="protein, taxon">usedefined</note>
-	<note type="input" label="refine">yes</note>
-	<note type="input" label="refine, cleavage semi">no</note>
-	<note type="input" label="refine, maximum valid expectation value">0.01</note>
-	<note type="input" label="refine, modification mass">57.02146@C</note>
-	<note type="input" label="refine, modification mass 1"></note>
-	<note type="input" label="refine, point mutations">no</note>
-	<note type="input" label="refine, potential C-terminus modifications"></note>
-	<note type="input" label="refine, potential N-terminus modifications">+42.01056@[</note>
-	<note type="input" label="refine, potential modification mass">15.99491@M</note>
-	<note type="input" label="refine, potential modification mass 1"></note>
-	<note type="input" label="refine, potential modification motif"></note>
-	<note type="input" label="refine, potential modification motif 1"></note>
-	<note type="input" label="refine, spectrum synthesis">yes</note>
-	<note type="input" label="refine, unanticipated cleavage">no</note>
-	<note type="input" label="refine, use potential modifications for full refinement">yes</note>
-	<note type="input" label="residue, modification mass">57.02146@C</note>
-	<note type="input" label="residue, modification mass 1"></note>
-	<note type="input" label="residue, potential modification mass">15.99491@M</note>
-	<note type="input" label="residue, potential modification motif"></note>
-	<note type="input" label="scoring, a ions">no</note>
-	<note type="input" label="scoring, b ions">yes</note>
-	<note type="input" label="scoring, c ions">no</note>
-	<note type="input" label="scoring, cyclic permutation">yes</note>
-	<note type="input" label="scoring, include reverse">yes</note>
-	<note type="input" label="scoring, maximum missed cleavage sites">1</note>
-	<note type="input" label="scoring, minimum ion count">4</note>
-	<note type="input" label="scoring, x ions">no</note>
-	<note type="input" label="scoring, y ions">yes</note>
-	<note type="input" label="scoring, z ions">no</note>
-	<note type="input" label="spectrum, dynamic range">100.0</note>
-	<note type="input" label="spectrum, fragment mass type">monoisotopic</note>
-	<note type="input" label="spectrum, fragment monoisotopic mass error">0.02</note>
-	<note type="input" label="spectrum, fragment monoisotopic mass error units">Daltons</note>
-	<note type="input" label="spectrum, maximum parent charge">4</note>
-	<note type="input" label="spectrum, minimum fragment mz">150.0</note>
-	<note type="input" label="spectrum, minimum parent m+h">500.0</note>
-	<note type="input" label="spectrum, minimum peaks">15</note>
-	<note type="input" label="spectrum, neutral loss mass">18.01057</note>
-	<note type="input" label="spectrum, neutral loss window">0.02</note>
-	<note type="input" label="spectrum, parent monoisotopic mass error minus">10</note>
-	<note type="input" label="spectrum, parent monoisotopic mass error plus">10</note>
-	<note type="input" label="spectrum, parent monoisotopic mass error units">ppm</note>
-	<note type="input" label="spectrum, parent monoisotopic mass isotope error">yes</note>
-	*/
-	//<note type="input" label="spectrum, path">/gorgone/pappso/formation/TD/mzXML/20120906_balliau_extract_1_A02_urzb-1.mzXML</note>
-	
-    if (_current_note_label == "spectrum, path") {
-        _sp_msrun.get()->setFilename(_current_text);
-    }
-	
-	/*
-	<note type="input" label="spectrum, sequence batch size">1000</note>
-	<note type="input" label="spectrum, threads">1</note>
-	<note type="input" label="spectrum, total peaks">100</note>
-	<note type="input" label="spectrum, use contrast angle">no</note>
-	<note type="input" label="spectrum, use neutral loss window">yes</note>
-	<note type="input" label="spectrum, use noise suppression">yes</note>
-	</group>
-
-	*/
+        /*
+         * 	<note type="input" label="list path, default parameters">/gorgone/pappso/tmp/temp_condor_job8533994640337729751189420695540169/QExactive_analysis_FDR_nosemi.xml</note>
+        <note type="input" label="list path, taxonomy information">/gorgone/pappso/tmp/temp_condor_job8533994640337729751189420695540169/database.xml</note>
+        <note type="input" label="output, histogram column width">30</note>
+        <note type="input" label="output, histograms">yes</note>
+        <note type="input" label="output, maximum valid expectation value">0.05</note>
+        <note type="input" label="output, maximum valid protein expectation value">0.05</note>
+        <note type="input" label="output, one sequence copy">yes</note>
+        <note type="input" label="output, parameters">yes</note>
+        <note type="input" label="output, path">/gorgone/pappso/formation/TD/xml_tandem/20120906_balliau_extract_1_A02_urzb-1.xml</note>
+        <note type="input" label="output, path hashing">no</note>
+        <note type="input" label="output, performance">yes</note>
+        <note type="input" label="output, proteins">yes</note>
+        <note type="input" label="output, results">valid</note>
+        <note type="input" label="output, sequences">yes</note>
+        <note type="input" label="output, sort results by">spectrum</note>
+        <note type="input" label="output, spectra">yes</note>
+        <note type="input" label="output, xsl path">tandem-style.xsl</note>
+        <note type="input" label="protein, C-terminal residue modification mass">0.0</note>
+        <note type="input" label="protein, N-terminal residue modification mass">0.0</note>
+        <note type="input" label="protein, cleavage C-terminal mass change">+17.00305</note>
+        <note type="input" label="protein, cleavage N-terminal mass change">+1.00794</note>
+        <note type="input" label="protein, cleavage semi">no</note>
+        <note type="input" label="protein, cleavage site">[RK]|{P}</note>
+        <note type="input" label="protein, modified residue mass file"></note>
+        <note type="input" label="protein, quick acetyl">yes</note>
+        <note type="input" label="protein, quick pyrolidone">yes</note>
+        <note type="input" label="protein, stP bias">yes</note>
+        <note type="input" label="protein, taxon">usedefined</note>
+        <note type="input" label="refine">yes</note>
+        <note type="input" label="refine, cleavage semi">no</note>
+        <note type="input" label="refine, maximum valid expectation value">0.01</note>
+        <note type="input" label="refine, modification mass">57.02146@C</note>
+        <note type="input" label="refine, modification mass 1"></note>
+        <note type="input" label="refine, point mutations">no</note>
+        <note type="input" label="refine, potential C-terminus modifications"></note>
+        <note type="input" label="refine, potential N-terminus modifications">+42.01056@[</note>
+        <note type="input" label="refine, potential modification mass">15.99491@M</note>
+        <note type="input" label="refine, potential modification mass 1"></note>
+        <note type="input" label="refine, potential modification motif"></note>
+        <note type="input" label="refine, potential modification motif 1"></note>
+        <note type="input" label="refine, spectrum synthesis">yes</note>
+        <note type="input" label="refine, unanticipated cleavage">no</note>
+        <note type="input" label="refine, use potential modifications for full refinement">yes</note>
+        <note type="input" label="residue, modification mass">57.02146@C</note>
+        <note type="input" label="residue, modification mass 1"></note>
+        <note type="input" label="residue, potential modification mass">15.99491@M</note>
+        <note type="input" label="residue, potential modification motif"></note>
+        <note type="input" label="scoring, a ions">no</note>
+        <note type="input" label="scoring, b ions">yes</note>
+        <note type="input" label="scoring, c ions">no</note>
+        <note type="input" label="scoring, cyclic permutation">yes</note>
+        <note type="input" label="scoring, include reverse">yes</note>
+        <note type="input" label="scoring, maximum missed cleavage sites">1</note>
+        <note type="input" label="scoring, minimum ion count">4</note>
+        <note type="input" label="scoring, x ions">no</note>
+        <note type="input" label="scoring, y ions">yes</note>
+        <note type="input" label="scoring, z ions">no</note>
+        <note type="input" label="spectrum, dynamic range">100.0</note>
+        <note type="input" label="spectrum, fragment mass type">monoisotopic</note>
+        <note type="input" label="spectrum, fragment monoisotopic mass error">0.02</note>
+        <note type="input" label="spectrum, fragment monoisotopic mass error units">Daltons</note>
+        <note type="input" label="spectrum, maximum parent charge">4</note>
+        <note type="input" label="spectrum, minimum fragment mz">150.0</note>
+        <note type="input" label="spectrum, minimum parent m+h">500.0</note>
+        <note type="input" label="spectrum, minimum peaks">15</note>
+        <note type="input" label="spectrum, neutral loss mass">18.01057</note>
+        <note type="input" label="spectrum, neutral loss window">0.02</note>
+        <note type="input" label="spectrum, parent monoisotopic mass error minus">10</note>
+        <note type="input" label="spectrum, parent monoisotopic mass error plus">10</note>
+        <note type="input" label="spectrum, parent monoisotopic mass error units">ppm</note>
+        <note type="input" label="spectrum, parent monoisotopic mass isotope error">yes</note>
+        */
+        //<note type="input" label="spectrum, path">/gorgone/pappso/formation/TD/mzXML/20120906_balliau_extract_1_A02_urzb-1.mzXML</note>
+
+        if (_current_note_label == "spectrum, path") {
+            _sp_msrun.get()->setFilename(_current_text);
+        }
+
+        /*
+        <note type="input" label="spectrum, sequence batch size">1000</note>
+        <note type="input" label="spectrum, threads">1</note>
+        <note type="input" label="spectrum, total peaks">100</note>
+        <note type="input" label="spectrum, use contrast angle">no</note>
+        <note type="input" label="spectrum, use neutral loss window">yes</note>
+        <note type="input" label="spectrum, use noise suppression">yes</note>
+        </group>
+
+        */
 
 //<group label="unused input parameters"  type="parameters">
 
-/*
-	<note type="input" label="protein, use minimal annotations">yes</note>
-	<note type="input" label="refine, modification mass 2"></note>
-	<note type="input" label="refine, potential modification mass 2"></note>
-	<note type="input" label="refine, potential modification motif 2"></note>
-	<note type="input" label="residue, modification mass 2"></note>
-	<note type="input" label="residue, potential modification mass 1"></note>
-	<note type="input" label="residue, potential modification mass 2"></note>
-	<note type="input" label="residue, potential modification motif 1"></note>
-	<note type="input" label="residue, potential modification motif 2"></note>
-	<note type="input" label="scoring, pluggable scoring">no</note>
-</group>
-*/
+        /*
+        	<note type="input" label="protein, use minimal annotations">yes</note>
+        	<note type="input" label="refine, modification mass 2"></note>
+        	<note type="input" label="refine, potential modification mass 2"></note>
+        	<note type="input" label="refine, potential modification motif 2"></note>
+        	<note type="input" label="residue, modification mass 2"></note>
+        	<note type="input" label="residue, potential modification mass 1"></note>
+        	<note type="input" label="residue, potential modification mass 2"></note>
+        	<note type="input" label="residue, potential modification motif 1"></note>
+        	<note type="input" label="residue, potential modification motif 2"></note>
+        	<note type="input" label="scoring, pluggable scoring">no</note>
+        </group>
+        */
 
 //<group label="performance parameters" type="parameters">
-/*
-	<note label="list path, sequence source #1">/gorgone/pappso/formation/TD/Database/Genome_Z_mays_5a.fasta</note>
-	<note label="list path, sequence source #2">/gorgone/pappso/formation/TD/Database/contaminants_standarts.fasta</note>
-	<note label="list path, sequence source description #1">no description</note>
-	<note label="list path, sequence source description #2">no description</note>
-	<note label="modelling, duplicate peptide ids">6019</note>
-	<note label="modelling, duplicate proteins">19735</note>
-	<note label="modelling, estimated false positives">18</note>
-	<note label="modelling, reversed sequence false positives">20</note>
-	<note label="modelling, spectrum noise suppression ratio">0.00</note>
-	<note label="modelling, total peptides used">96618641</note>
-	<note label="modelling, total proteins used">273656</note>
-	<note label="modelling, total spectra assigned">7464</note>
-	<note label="modelling, total spectra used">12199</note>
-	<note label="modelling, total unique assigned">6260</note>
-	<note label="process, start time">2013:12:20:16:47:19</note>
-	*/
-	//<note label="process, version">X! Tandem Sledgehammer (2013.09.01.1)</note>
-	if (_current_note_label == "process, version") {
-        _p_identification_data_source->setIdentificationEngineVersion(_current_text);
-    }
-	/*
-	<note label="quality values">243 476 437 382 384 417 399 416 346 387 390 382 321 355 311 283 253 272 251 228</note>
-	<note label="refining, # input models">4893</note>
-	<note label="refining, # input spectra">5520</note>
-	<note label="refining, # partial cleavage">326</note>
-	<note label="refining, # point mutations">0</note>
-	<note label="refining, # potential C-terminii">0</note>
-	<note label="refining, # potential N-terminii">392</note>
-	<note label="refining, # unanticipated cleavage">0</note>
-	<note label="timing, initial modelling total (sec)">170.96</note>
-	<note label="timing, initial modelling/spectrum (sec)">0.0140</note>
-	<note label="timing, load sequence models (sec)">0.33</note>
-	<note label="timing, refinement/spectrum (sec)">0.0141</note>
-</group>
-*/
+        /*
+        	<note label="list path, sequence source #1">/gorgone/pappso/formation/TD/Database/Genome_Z_mays_5a.fasta</note>
+        	<note label="list path, sequence source #2">/gorgone/pappso/formation/TD/Database/contaminants_standarts.fasta</note>
+        	<note label="list path, sequence source description #1">no description</note>
+        	<note label="list path, sequence source description #2">no description</note>
+        	<note label="modelling, duplicate peptide ids">6019</note>
+        	<note label="modelling, duplicate proteins">19735</note>
+        	<note label="modelling, estimated false positives">18</note>
+        	<note label="modelling, reversed sequence false positives">20</note>
+        	<note label="modelling, spectrum noise suppression ratio">0.00</note>
+        	<note label="modelling, total peptides used">96618641</note>
+        	<note label="modelling, total proteins used">273656</note>
+        	<note label="modelling, total spectra assigned">7464</note>
+        	<note label="modelling, total spectra used">12199</note>
+        	<note label="modelling, total unique assigned">6260</note>
+        	<note label="process, start time">2013:12:20:16:47:19</note>
+        	*/
+        //<note label="process, version">X! Tandem Sledgehammer (2013.09.01.1)</note>
+        if (_current_note_label == "process, version") {
+            QRegExp rx("\\((.*)\\)");
+            if (rx.indexIn(_current_text, 0) != -1) {
+                _p_identification_data_source->setIdentificationEngineVersion(rx.cap(1));
+            }
+            qDebug() << "XtandemSaxHandler::endElement_note() " << _p_identification_data_source->getIdentificationEngineVersion();
+        }
+        /*
+        <note label="quality values">243 476 437 382 384 417 399 416 346 387 390 382 321 355 311 283 253 272 251 228</note>
+        <note label="refining, # input models">4893</note>
+        <note label="refining, # input spectra">5520</note>
+        <note label="refining, # partial cleavage">326</note>
+        <note label="refining, # point mutations">0</note>
+        <note label="refining, # potential C-terminii">0</note>
+        <note label="refining, # potential N-terminii">392</note>
+        <note label="refining, # unanticipated cleavage">0</note>
+        <note label="timing, initial modelling total (sec)">170.96</note>
+        <note label="timing, initial modelling/spectrum (sec)">0.0140</note>
+        <note label="timing, load sequence models (sec)">0.33</note>
+        <note label="timing, refinement/spectrum (sec)">0.0141</note>
+        </group>
+        */
 
     }
     _current_text = "";
-- 
GitLab