From 58e8469d21f044d2313f9d44c8696290657dd524 Mon Sep 17 00:00:00 2001 From: Olivier Langella <Olivier.Langella@moulon.inra.fr> Date: Wed, 19 Apr 2017 21:51:57 +0200 Subject: [PATCH] parsing tandem version OK --- src/input/xtandemsaxhandler.cpp | 426 ++++++++++++++++---------------- 1 file changed, 215 insertions(+), 211 deletions(-) diff --git a/src/input/xtandemsaxhandler.cpp b/src/input/xtandemsaxhandler.cpp index b32ac882d..1ef6a3d36 100644 --- a/src/input/xtandemsaxhandler.cpp +++ b/src/input/xtandemsaxhandler.cpp @@ -151,7 +151,7 @@ bool XtandemSaxHandler::startElement_note(QXmlAttributes attributes) { _is_protein_description = true; } } - return is_ok; + return is_ok; } bool XtandemSaxHandler::startElement_protein(QXmlAttributes attributes) { @@ -193,7 +193,7 @@ bool XtandemSaxHandler::startElement_domain(QXmlAttributes attributes) { bool is_ok = true; _current_text = _current_text.simplified().replace(" ", ""); if (!_current_text.isEmpty()) { - //._sequence.replace(QRegExp("\\*"), "")).removeTranslationStop() + //._sequence.replace(QRegExp("\\*"), "")).removeTranslationStop() _p_protein_match->getProteinXtpSp().get()->setSequence(_current_text.replace(QRegExp("\\*"), "")); } @@ -204,76 +204,76 @@ bool XtandemSaxHandler::startElement_domain(QXmlAttributes attributes) { // pre="VLGR" post="VEFM" seq="TGSQGQCTQVR" missed_cleavages="10"> /* * id - – the identifier for t -his particular identified dom -ain (s -pectrum - #).(i -d -#).(dom -ain#) -start - – the first residue - of t -he dom -ain -end - – the last residue - of t -he dom -ain -expect - – the expe -ctation va -lue for t -he peptide identification -mh - – the calculated pe -ptide mass + a prot -on -delta - – the spectrum - mh m -inus - the calculated m -h -hyperscore - – T -ande -m’s score for t -he identification -peak_count - – the num -ber of pe -aks that matched be -tween the theoretical -and t -he test mass spectrum -pre - – the four re -sidue -s pre -ceding t -he dom -ain -post - – the four re -sidue -s fol -lowing t -he dom -ain -seq - – the seque -nce of t -he dom -ain -missed_cleavages - – the num -ber of pot -ential cleavage sites in this -peptide seque -nce*/ + – the identifier for t + his particular identified dom + ain (s + pectrum + #).(i + d + #).(dom + ain#) + start + – the first residue + of t + he dom + ain + end + – the last residue + of t + he dom + ain + expect + – the expe + ctation va + lue for t + he peptide identification + mh + – the calculated pe + ptide mass + a prot + on + delta + – the spectrum + mh m + inus + the calculated m + h + hyperscore + – T + ande + m’s score for t + he identification + peak_count + – the num + ber of pe + aks that matched be + tween the theoretical + and t + he test mass spectrum + pre + – the four re + sidue + s pre + ceding t + he dom + ain + post + – the four re + sidue + s fol + lowing t + he dom + ain + seq + – the seque + nce of t + he dom + ain + missed_cleavages + – the num + ber of pot + ential cleavage sites in this + peptide seque + nce*/ // valeur généric du scan _current_peptide_sp = PeptideXtp(attributes.value("seq").simplified()).makePeptideXtpSp(); @@ -332,151 +332,155 @@ bool XtandemSaxHandler::endElement_note() { _p_protein_match->getProteinXtpSp().get()->setDescription(_current_text.section(" ",1)); } else { - + //<group label="input parameters" type="parameters"> - /* - * <note type="input" label="list path, default parameters">/gorgone/pappso/tmp/temp_condor_job8533994640337729751189420695540169/QExactive_analysis_FDR_nosemi.xml</note> - <note type="input" label="list path, taxonomy information">/gorgone/pappso/tmp/temp_condor_job8533994640337729751189420695540169/database.xml</note> - <note type="input" label="output, histogram column width">30</note> - <note type="input" label="output, histograms">yes</note> - <note type="input" label="output, maximum valid expectation value">0.05</note> - <note type="input" label="output, maximum valid protein expectation value">0.05</note> - <note type="input" label="output, one sequence copy">yes</note> - <note type="input" label="output, parameters">yes</note> - <note type="input" label="output, path">/gorgone/pappso/formation/TD/xml_tandem/20120906_balliau_extract_1_A02_urzb-1.xml</note> - <note type="input" label="output, path hashing">no</note> - <note type="input" label="output, performance">yes</note> - <note type="input" label="output, proteins">yes</note> - <note type="input" label="output, results">valid</note> - <note type="input" label="output, sequences">yes</note> - <note type="input" label="output, sort results by">spectrum</note> - <note type="input" label="output, spectra">yes</note> - <note type="input" label="output, xsl path">tandem-style.xsl</note> - <note type="input" label="protein, C-terminal residue modification mass">0.0</note> - <note type="input" label="protein, N-terminal residue modification mass">0.0</note> - <note type="input" label="protein, cleavage C-terminal mass change">+17.00305</note> - <note type="input" label="protein, cleavage N-terminal mass change">+1.00794</note> - <note type="input" label="protein, cleavage semi">no</note> - <note type="input" label="protein, cleavage site">[RK]|{P}</note> - <note type="input" label="protein, modified residue mass file"></note> - <note type="input" label="protein, quick acetyl">yes</note> - <note type="input" label="protein, quick pyrolidone">yes</note> - <note type="input" label="protein, stP bias">yes</note> - <note type="input" label="protein, taxon">usedefined</note> - <note type="input" label="refine">yes</note> - <note type="input" label="refine, cleavage semi">no</note> - <note type="input" label="refine, maximum valid expectation value">0.01</note> - <note type="input" label="refine, modification mass">57.02146@C</note> - <note type="input" label="refine, modification mass 1"></note> - <note type="input" label="refine, point mutations">no</note> - <note type="input" label="refine, potential C-terminus modifications"></note> - <note type="input" label="refine, potential N-terminus modifications">+42.01056@[</note> - <note type="input" label="refine, potential modification mass">15.99491@M</note> - <note type="input" label="refine, potential modification mass 1"></note> - <note type="input" label="refine, potential modification motif"></note> - <note type="input" label="refine, potential modification motif 1"></note> - <note type="input" label="refine, spectrum synthesis">yes</note> - <note type="input" label="refine, unanticipated cleavage">no</note> - <note type="input" label="refine, use potential modifications for full refinement">yes</note> - <note type="input" label="residue, modification mass">57.02146@C</note> - <note type="input" label="residue, modification mass 1"></note> - <note type="input" label="residue, potential modification mass">15.99491@M</note> - <note type="input" label="residue, potential modification motif"></note> - <note type="input" label="scoring, a ions">no</note> - <note type="input" label="scoring, b ions">yes</note> - <note type="input" label="scoring, c ions">no</note> - <note type="input" label="scoring, cyclic permutation">yes</note> - <note type="input" label="scoring, include reverse">yes</note> - <note type="input" label="scoring, maximum missed cleavage sites">1</note> - <note type="input" label="scoring, minimum ion count">4</note> - <note type="input" label="scoring, x ions">no</note> - <note type="input" label="scoring, y ions">yes</note> - <note type="input" label="scoring, z ions">no</note> - <note type="input" label="spectrum, dynamic range">100.0</note> - <note type="input" label="spectrum, fragment mass type">monoisotopic</note> - <note type="input" label="spectrum, fragment monoisotopic mass error">0.02</note> - <note type="input" label="spectrum, fragment monoisotopic mass error units">Daltons</note> - <note type="input" label="spectrum, maximum parent charge">4</note> - <note type="input" label="spectrum, minimum fragment mz">150.0</note> - <note type="input" label="spectrum, minimum parent m+h">500.0</note> - <note type="input" label="spectrum, minimum peaks">15</note> - <note type="input" label="spectrum, neutral loss mass">18.01057</note> - <note type="input" label="spectrum, neutral loss window">0.02</note> - <note type="input" label="spectrum, parent monoisotopic mass error minus">10</note> - <note type="input" label="spectrum, parent monoisotopic mass error plus">10</note> - <note type="input" label="spectrum, parent monoisotopic mass error units">ppm</note> - <note type="input" label="spectrum, parent monoisotopic mass isotope error">yes</note> - */ - //<note type="input" label="spectrum, path">/gorgone/pappso/formation/TD/mzXML/20120906_balliau_extract_1_A02_urzb-1.mzXML</note> - - if (_current_note_label == "spectrum, path") { - _sp_msrun.get()->setFilename(_current_text); - } - - /* - <note type="input" label="spectrum, sequence batch size">1000</note> - <note type="input" label="spectrum, threads">1</note> - <note type="input" label="spectrum, total peaks">100</note> - <note type="input" label="spectrum, use contrast angle">no</note> - <note type="input" label="spectrum, use neutral loss window">yes</note> - <note type="input" label="spectrum, use noise suppression">yes</note> - </group> - - */ + /* + * <note type="input" label="list path, default parameters">/gorgone/pappso/tmp/temp_condor_job8533994640337729751189420695540169/QExactive_analysis_FDR_nosemi.xml</note> + <note type="input" label="list path, taxonomy information">/gorgone/pappso/tmp/temp_condor_job8533994640337729751189420695540169/database.xml</note> + <note type="input" label="output, histogram column width">30</note> + <note type="input" label="output, histograms">yes</note> + <note type="input" label="output, maximum valid expectation value">0.05</note> + <note type="input" label="output, maximum valid protein expectation value">0.05</note> + <note type="input" label="output, one sequence copy">yes</note> + <note type="input" label="output, parameters">yes</note> + <note type="input" label="output, path">/gorgone/pappso/formation/TD/xml_tandem/20120906_balliau_extract_1_A02_urzb-1.xml</note> + <note type="input" label="output, path hashing">no</note> + <note type="input" label="output, performance">yes</note> + <note type="input" label="output, proteins">yes</note> + <note type="input" label="output, results">valid</note> + <note type="input" label="output, sequences">yes</note> + <note type="input" label="output, sort results by">spectrum</note> + <note type="input" label="output, spectra">yes</note> + <note type="input" label="output, xsl path">tandem-style.xsl</note> + <note type="input" label="protein, C-terminal residue modification mass">0.0</note> + <note type="input" label="protein, N-terminal residue modification mass">0.0</note> + <note type="input" label="protein, cleavage C-terminal mass change">+17.00305</note> + <note type="input" label="protein, cleavage N-terminal mass change">+1.00794</note> + <note type="input" label="protein, cleavage semi">no</note> + <note type="input" label="protein, cleavage site">[RK]|{P}</note> + <note type="input" label="protein, modified residue mass file"></note> + <note type="input" label="protein, quick acetyl">yes</note> + <note type="input" label="protein, quick pyrolidone">yes</note> + <note type="input" label="protein, stP bias">yes</note> + <note type="input" label="protein, taxon">usedefined</note> + <note type="input" label="refine">yes</note> + <note type="input" label="refine, cleavage semi">no</note> + <note type="input" label="refine, maximum valid expectation value">0.01</note> + <note type="input" label="refine, modification mass">57.02146@C</note> + <note type="input" label="refine, modification mass 1"></note> + <note type="input" label="refine, point mutations">no</note> + <note type="input" label="refine, potential C-terminus modifications"></note> + <note type="input" label="refine, potential N-terminus modifications">+42.01056@[</note> + <note type="input" label="refine, potential modification mass">15.99491@M</note> + <note type="input" label="refine, potential modification mass 1"></note> + <note type="input" label="refine, potential modification motif"></note> + <note type="input" label="refine, potential modification motif 1"></note> + <note type="input" label="refine, spectrum synthesis">yes</note> + <note type="input" label="refine, unanticipated cleavage">no</note> + <note type="input" label="refine, use potential modifications for full refinement">yes</note> + <note type="input" label="residue, modification mass">57.02146@C</note> + <note type="input" label="residue, modification mass 1"></note> + <note type="input" label="residue, potential modification mass">15.99491@M</note> + <note type="input" label="residue, potential modification motif"></note> + <note type="input" label="scoring, a ions">no</note> + <note type="input" label="scoring, b ions">yes</note> + <note type="input" label="scoring, c ions">no</note> + <note type="input" label="scoring, cyclic permutation">yes</note> + <note type="input" label="scoring, include reverse">yes</note> + <note type="input" label="scoring, maximum missed cleavage sites">1</note> + <note type="input" label="scoring, minimum ion count">4</note> + <note type="input" label="scoring, x ions">no</note> + <note type="input" label="scoring, y ions">yes</note> + <note type="input" label="scoring, z ions">no</note> + <note type="input" label="spectrum, dynamic range">100.0</note> + <note type="input" label="spectrum, fragment mass type">monoisotopic</note> + <note type="input" label="spectrum, fragment monoisotopic mass error">0.02</note> + <note type="input" label="spectrum, fragment monoisotopic mass error units">Daltons</note> + <note type="input" label="spectrum, maximum parent charge">4</note> + <note type="input" label="spectrum, minimum fragment mz">150.0</note> + <note type="input" label="spectrum, minimum parent m+h">500.0</note> + <note type="input" label="spectrum, minimum peaks">15</note> + <note type="input" label="spectrum, neutral loss mass">18.01057</note> + <note type="input" label="spectrum, neutral loss window">0.02</note> + <note type="input" label="spectrum, parent monoisotopic mass error minus">10</note> + <note type="input" label="spectrum, parent monoisotopic mass error plus">10</note> + <note type="input" label="spectrum, parent monoisotopic mass error units">ppm</note> + <note type="input" label="spectrum, parent monoisotopic mass isotope error">yes</note> + */ + //<note type="input" label="spectrum, path">/gorgone/pappso/formation/TD/mzXML/20120906_balliau_extract_1_A02_urzb-1.mzXML</note> + + if (_current_note_label == "spectrum, path") { + _sp_msrun.get()->setFilename(_current_text); + } + + /* + <note type="input" label="spectrum, sequence batch size">1000</note> + <note type="input" label="spectrum, threads">1</note> + <note type="input" label="spectrum, total peaks">100</note> + <note type="input" label="spectrum, use contrast angle">no</note> + <note type="input" label="spectrum, use neutral loss window">yes</note> + <note type="input" label="spectrum, use noise suppression">yes</note> + </group> + + */ //<group label="unused input parameters" type="parameters"> -/* - <note type="input" label="protein, use minimal annotations">yes</note> - <note type="input" label="refine, modification mass 2"></note> - <note type="input" label="refine, potential modification mass 2"></note> - <note type="input" label="refine, potential modification motif 2"></note> - <note type="input" label="residue, modification mass 2"></note> - <note type="input" label="residue, potential modification mass 1"></note> - <note type="input" label="residue, potential modification mass 2"></note> - <note type="input" label="residue, potential modification motif 1"></note> - <note type="input" label="residue, potential modification motif 2"></note> - <note type="input" label="scoring, pluggable scoring">no</note> -</group> -*/ + /* + <note type="input" label="protein, use minimal annotations">yes</note> + <note type="input" label="refine, modification mass 2"></note> + <note type="input" label="refine, potential modification mass 2"></note> + <note type="input" label="refine, potential modification motif 2"></note> + <note type="input" label="residue, modification mass 2"></note> + <note type="input" label="residue, potential modification mass 1"></note> + <note type="input" label="residue, potential modification mass 2"></note> + <note type="input" label="residue, potential modification motif 1"></note> + <note type="input" label="residue, potential modification motif 2"></note> + <note type="input" label="scoring, pluggable scoring">no</note> + </group> + */ //<group label="performance parameters" type="parameters"> -/* - <note label="list path, sequence source #1">/gorgone/pappso/formation/TD/Database/Genome_Z_mays_5a.fasta</note> - <note label="list path, sequence source #2">/gorgone/pappso/formation/TD/Database/contaminants_standarts.fasta</note> - <note label="list path, sequence source description #1">no description</note> - <note label="list path, sequence source description #2">no description</note> - <note label="modelling, duplicate peptide ids">6019</note> - <note label="modelling, duplicate proteins">19735</note> - <note label="modelling, estimated false positives">18</note> - <note label="modelling, reversed sequence false positives">20</note> - <note label="modelling, spectrum noise suppression ratio">0.00</note> - <note label="modelling, total peptides used">96618641</note> - <note label="modelling, total proteins used">273656</note> - <note label="modelling, total spectra assigned">7464</note> - <note label="modelling, total spectra used">12199</note> - <note label="modelling, total unique assigned">6260</note> - <note label="process, start time">2013:12:20:16:47:19</note> - */ - //<note label="process, version">X! Tandem Sledgehammer (2013.09.01.1)</note> - if (_current_note_label == "process, version") { - _p_identification_data_source->setIdentificationEngineVersion(_current_text); - } - /* - <note label="quality values">243 476 437 382 384 417 399 416 346 387 390 382 321 355 311 283 253 272 251 228</note> - <note label="refining, # input models">4893</note> - <note label="refining, # input spectra">5520</note> - <note label="refining, # partial cleavage">326</note> - <note label="refining, # point mutations">0</note> - <note label="refining, # potential C-terminii">0</note> - <note label="refining, # potential N-terminii">392</note> - <note label="refining, # unanticipated cleavage">0</note> - <note label="timing, initial modelling total (sec)">170.96</note> - <note label="timing, initial modelling/spectrum (sec)">0.0140</note> - <note label="timing, load sequence models (sec)">0.33</note> - <note label="timing, refinement/spectrum (sec)">0.0141</note> -</group> -*/ + /* + <note label="list path, sequence source #1">/gorgone/pappso/formation/TD/Database/Genome_Z_mays_5a.fasta</note> + <note label="list path, sequence source #2">/gorgone/pappso/formation/TD/Database/contaminants_standarts.fasta</note> + <note label="list path, sequence source description #1">no description</note> + <note label="list path, sequence source description #2">no description</note> + <note label="modelling, duplicate peptide ids">6019</note> + <note label="modelling, duplicate proteins">19735</note> + <note label="modelling, estimated false positives">18</note> + <note label="modelling, reversed sequence false positives">20</note> + <note label="modelling, spectrum noise suppression ratio">0.00</note> + <note label="modelling, total peptides used">96618641</note> + <note label="modelling, total proteins used">273656</note> + <note label="modelling, total spectra assigned">7464</note> + <note label="modelling, total spectra used">12199</note> + <note label="modelling, total unique assigned">6260</note> + <note label="process, start time">2013:12:20:16:47:19</note> + */ + //<note label="process, version">X! Tandem Sledgehammer (2013.09.01.1)</note> + if (_current_note_label == "process, version") { + QRegExp rx("\\((.*)\\)"); + if (rx.indexIn(_current_text, 0) != -1) { + _p_identification_data_source->setIdentificationEngineVersion(rx.cap(1)); + } + qDebug() << "XtandemSaxHandler::endElement_note() " << _p_identification_data_source->getIdentificationEngineVersion(); + } + /* + <note label="quality values">243 476 437 382 384 417 399 416 346 387 390 382 321 355 311 283 253 272 251 228</note> + <note label="refining, # input models">4893</note> + <note label="refining, # input spectra">5520</note> + <note label="refining, # partial cleavage">326</note> + <note label="refining, # point mutations">0</note> + <note label="refining, # potential C-terminii">0</note> + <note label="refining, # potential N-terminii">392</note> + <note label="refining, # unanticipated cleavage">0</note> + <note label="timing, initial modelling total (sec)">170.96</note> + <note label="timing, initial modelling/spectrum (sec)">0.0140</note> + <note label="timing, load sequence models (sec)">0.33</note> + <note label="timing, refinement/spectrum (sec)">0.0141</note> + </group> + */ } _current_text = ""; -- GitLab