From 77d10953cf7a97065f7334d0ea14f2431b5b9b6f Mon Sep 17 00:00:00 2001 From: Olivier Langella <Olivier.Langella@moulon.inra.fr> Date: Wed, 19 Apr 2017 20:55:54 +0200 Subject: [PATCH] store identification engine and version --- .../identificationdatasource.cpp | 9 + .../identificationdatasource.h | 13 ++ .../identificationxtandemfile.cpp | 2 + src/input/xtandemsaxhandler.cpp | 155 +++++++++++++++++- src/input/xtandemsaxhandler.h | 2 + src/utils/types.h | 9 + 6 files changed, 189 insertions(+), 1 deletion(-) diff --git a/src/core/identification_sources/identificationdatasource.cpp b/src/core/identification_sources/identificationdatasource.cpp index a90445286..f3cc31f80 100644 --- a/src/core/identification_sources/identificationdatasource.cpp +++ b/src/core/identification_sources/identificationdatasource.cpp @@ -63,6 +63,15 @@ MsRunSp IdentificationDataSource::getMsRunSp () const { return (_ms_run_sp); } +IdentificationEngine IdentificationDataSource::getIdentificationEngine() const { + return _engine; +} +const QString& IdentificationDataSource::getIdentificationEngineVersion() const { + return _version; +} +void IdentificationDataSource::setIdentificationEngineVersion(const QString& version) { + _version = version; +} pappso::SpectrumSp IdentificationDataSource::getSpectrumSp(unsigned int scan_number) const { pappso::SpectrumSp spectrum_sp = SpectrumStore::getSpectrumSpFromMsRunSp(_ms_run_sp, scan_number); diff --git a/src/core/identification_sources/identificationdatasource.h b/src/core/identification_sources/identificationdatasource.h index 1319b0b1e..02d1032a5 100644 --- a/src/core/identification_sources/identificationdatasource.h +++ b/src/core/identification_sources/identificationdatasource.h @@ -54,11 +54,24 @@ public: /** \brief read source content to store it in project */ virtual void parseTo(Project* p_project)=0; + + /** \brief identification engine + */ + virtual IdentificationEngine getIdentificationEngine() const; + + /** \brief identification engine version + */ + virtual const QString& getIdentificationEngineVersion() const; + /** \brief set identification engine version + */ + virtual void setIdentificationEngineVersion(const QString& version); protected : QString _resource_name; + IdentificationEngine _engine = IdentificationEngine::unknown; private : //static std::map<QString, pappso::MsRunIdSp> _map_msrunidsp; + QString _version; MsRunSp _ms_run_sp = nullptr; }; diff --git a/src/core/identification_sources/identificationxtandemfile.cpp b/src/core/identification_sources/identificationxtandemfile.cpp index 2fb6f1506..dd394f345 100644 --- a/src/core/identification_sources/identificationxtandemfile.cpp +++ b/src/core/identification_sources/identificationxtandemfile.cpp @@ -27,10 +27,12 @@ IdentificationXtandemFile::IdentificationXtandemFile(const QFileInfo & xtandem_file) : IdentificationDataSource(xtandem_file.absoluteFilePath()), _xtandem_file(xtandem_file) { + _engine = IdentificationEngine::XTandem; } IdentificationXtandemFile::IdentificationXtandemFile(const IdentificationXtandemFile& other) : IdentificationDataSource(other),_xtandem_file (other._xtandem_file) { + _engine = IdentificationEngine::XTandem; } IdentificationXtandemFile::~IdentificationXtandemFile() diff --git a/src/input/xtandemsaxhandler.cpp b/src/input/xtandemsaxhandler.cpp index c616abcf6..b32ac882d 100644 --- a/src/input/xtandemsaxhandler.cpp +++ b/src/input/xtandemsaxhandler.cpp @@ -134,20 +134,24 @@ bool XtandemSaxHandler::startElement_group(QXmlAttributes attrs) { _charge = attrs.value("z").toUInt(); _retention_time = attrs.value("rt").replace("PT","").replace("S","").toDouble(); } + //label="input parameters" type="parameters" return is_ok; } + bool XtandemSaxHandler::startElement_note(QXmlAttributes attributes) { //<note label="description">GRMZM2G083841_P01 P04711 Phosphoenolpyruvate carboxylase 1 (PEPCase 1)(PEPC 1)(EC //4.1.1.31) seq=translation; coord=9:61296279..61301686:1; parent_transcript=GRMZM2G083841_T01; ////parent_gene=GRMZM2G083841</note> bool is_ok = true; + _current_note_label = attributes.value("label"); + _current_note_type = attributes.value("type"); _is_protein_description = false; if (attributes.value("label") == "description") { if (_tag_stack[_tag_stack.size() - 2] == "protein") { _is_protein_description = true; } } - return is_ok; + return is_ok; } bool XtandemSaxHandler::startElement_protein(QXmlAttributes attributes) { @@ -327,6 +331,155 @@ bool XtandemSaxHandler::endElement_note() { if (_is_protein_description) { _p_protein_match->getProteinXtpSp().get()->setDescription(_current_text.section(" ",1)); } + else { + +//<group label="input parameters" type="parameters"> + /* + * <note type="input" label="list path, default parameters">/gorgone/pappso/tmp/temp_condor_job8533994640337729751189420695540169/QExactive_analysis_FDR_nosemi.xml</note> + <note type="input" label="list path, taxonomy information">/gorgone/pappso/tmp/temp_condor_job8533994640337729751189420695540169/database.xml</note> + <note type="input" label="output, histogram column width">30</note> + <note type="input" label="output, histograms">yes</note> + <note type="input" label="output, maximum valid expectation value">0.05</note> + <note type="input" label="output, maximum valid protein expectation value">0.05</note> + <note type="input" label="output, one sequence copy">yes</note> + <note type="input" label="output, parameters">yes</note> + <note type="input" label="output, path">/gorgone/pappso/formation/TD/xml_tandem/20120906_balliau_extract_1_A02_urzb-1.xml</note> + <note type="input" label="output, path hashing">no</note> + <note type="input" label="output, performance">yes</note> + <note type="input" label="output, proteins">yes</note> + <note type="input" label="output, results">valid</note> + <note type="input" label="output, sequences">yes</note> + <note type="input" label="output, sort results by">spectrum</note> + <note type="input" label="output, spectra">yes</note> + <note type="input" label="output, xsl path">tandem-style.xsl</note> + <note type="input" label="protein, C-terminal residue modification mass">0.0</note> + <note type="input" label="protein, N-terminal residue modification mass">0.0</note> + <note type="input" label="protein, cleavage C-terminal mass change">+17.00305</note> + <note type="input" label="protein, cleavage N-terminal mass change">+1.00794</note> + <note type="input" label="protein, cleavage semi">no</note> + <note type="input" label="protein, cleavage site">[RK]|{P}</note> + <note type="input" label="protein, modified residue mass file"></note> + <note type="input" label="protein, quick acetyl">yes</note> + <note type="input" label="protein, quick pyrolidone">yes</note> + <note type="input" label="protein, stP bias">yes</note> + <note type="input" label="protein, taxon">usedefined</note> + <note type="input" label="refine">yes</note> + <note type="input" label="refine, cleavage semi">no</note> + <note type="input" label="refine, maximum valid expectation value">0.01</note> + <note type="input" label="refine, modification mass">57.02146@C</note> + <note type="input" label="refine, modification mass 1"></note> + <note type="input" label="refine, point mutations">no</note> + <note type="input" label="refine, potential C-terminus modifications"></note> + <note type="input" label="refine, potential N-terminus modifications">+42.01056@[</note> + <note type="input" label="refine, potential modification mass">15.99491@M</note> + <note type="input" label="refine, potential modification mass 1"></note> + <note type="input" label="refine, potential modification motif"></note> + <note type="input" label="refine, potential modification motif 1"></note> + <note type="input" label="refine, spectrum synthesis">yes</note> + <note type="input" label="refine, unanticipated cleavage">no</note> + <note type="input" label="refine, use potential modifications for full refinement">yes</note> + <note type="input" label="residue, modification mass">57.02146@C</note> + <note type="input" label="residue, modification mass 1"></note> + <note type="input" label="residue, potential modification mass">15.99491@M</note> + <note type="input" label="residue, potential modification motif"></note> + <note type="input" label="scoring, a ions">no</note> + <note type="input" label="scoring, b ions">yes</note> + <note type="input" label="scoring, c ions">no</note> + <note type="input" label="scoring, cyclic permutation">yes</note> + <note type="input" label="scoring, include reverse">yes</note> + <note type="input" label="scoring, maximum missed cleavage sites">1</note> + <note type="input" label="scoring, minimum ion count">4</note> + <note type="input" label="scoring, x ions">no</note> + <note type="input" label="scoring, y ions">yes</note> + <note type="input" label="scoring, z ions">no</note> + <note type="input" label="spectrum, dynamic range">100.0</note> + <note type="input" label="spectrum, fragment mass type">monoisotopic</note> + <note type="input" label="spectrum, fragment monoisotopic mass error">0.02</note> + <note type="input" label="spectrum, fragment monoisotopic mass error units">Daltons</note> + <note type="input" label="spectrum, maximum parent charge">4</note> + <note type="input" label="spectrum, minimum fragment mz">150.0</note> + <note type="input" label="spectrum, minimum parent m+h">500.0</note> + <note type="input" label="spectrum, minimum peaks">15</note> + <note type="input" label="spectrum, neutral loss mass">18.01057</note> + <note type="input" label="spectrum, neutral loss window">0.02</note> + <note type="input" label="spectrum, parent monoisotopic mass error minus">10</note> + <note type="input" label="spectrum, parent monoisotopic mass error plus">10</note> + <note type="input" label="spectrum, parent monoisotopic mass error units">ppm</note> + <note type="input" label="spectrum, parent monoisotopic mass isotope error">yes</note> + */ + //<note type="input" label="spectrum, path">/gorgone/pappso/formation/TD/mzXML/20120906_balliau_extract_1_A02_urzb-1.mzXML</note> + + if (_current_note_label == "spectrum, path") { + _sp_msrun.get()->setFilename(_current_text); + } + + /* + <note type="input" label="spectrum, sequence batch size">1000</note> + <note type="input" label="spectrum, threads">1</note> + <note type="input" label="spectrum, total peaks">100</note> + <note type="input" label="spectrum, use contrast angle">no</note> + <note type="input" label="spectrum, use neutral loss window">yes</note> + <note type="input" label="spectrum, use noise suppression">yes</note> + </group> + + */ + +//<group label="unused input parameters" type="parameters"> + +/* + <note type="input" label="protein, use minimal annotations">yes</note> + <note type="input" label="refine, modification mass 2"></note> + <note type="input" label="refine, potential modification mass 2"></note> + <note type="input" label="refine, potential modification motif 2"></note> + <note type="input" label="residue, modification mass 2"></note> + <note type="input" label="residue, potential modification mass 1"></note> + <note type="input" label="residue, potential modification mass 2"></note> + <note type="input" label="residue, potential modification motif 1"></note> + <note type="input" label="residue, potential modification motif 2"></note> + <note type="input" label="scoring, pluggable scoring">no</note> +</group> +*/ + +//<group label="performance parameters" type="parameters"> +/* + <note label="list path, sequence source #1">/gorgone/pappso/formation/TD/Database/Genome_Z_mays_5a.fasta</note> + <note label="list path, sequence source #2">/gorgone/pappso/formation/TD/Database/contaminants_standarts.fasta</note> + <note label="list path, sequence source description #1">no description</note> + <note label="list path, sequence source description #2">no description</note> + <note label="modelling, duplicate peptide ids">6019</note> + <note label="modelling, duplicate proteins">19735</note> + <note label="modelling, estimated false positives">18</note> + <note label="modelling, reversed sequence false positives">20</note> + <note label="modelling, spectrum noise suppression ratio">0.00</note> + <note label="modelling, total peptides used">96618641</note> + <note label="modelling, total proteins used">273656</note> + <note label="modelling, total spectra assigned">7464</note> + <note label="modelling, total spectra used">12199</note> + <note label="modelling, total unique assigned">6260</note> + <note label="process, start time">2013:12:20:16:47:19</note> + */ + //<note label="process, version">X! Tandem Sledgehammer (2013.09.01.1)</note> + if (_current_note_label == "process, version") { + _p_identification_data_source->setIdentificationEngineVersion(_current_text); + } + /* + <note label="quality values">243 476 437 382 384 417 399 416 346 387 390 382 321 355 311 283 253 272 251 228</note> + <note label="refining, # input models">4893</note> + <note label="refining, # input spectra">5520</note> + <note label="refining, # partial cleavage">326</note> + <note label="refining, # point mutations">0</note> + <note label="refining, # potential C-terminii">0</note> + <note label="refining, # potential N-terminii">392</note> + <note label="refining, # unanticipated cleavage">0</note> + <note label="timing, initial modelling total (sec)">170.96</note> + <note label="timing, initial modelling/spectrum (sec)">0.0140</note> + <note label="timing, load sequence models (sec)">0.33</note> + <note label="timing, refinement/spectrum (sec)">0.0141</note> +</group> +*/ + + } + _current_text = ""; return is_ok; } diff --git a/src/input/xtandemsaxhandler.h b/src/input/xtandemsaxhandler.h index 85babb2ec..be0448349 100644 --- a/src/input/xtandemsaxhandler.h +++ b/src/input/xtandemsaxhandler.h @@ -96,6 +96,8 @@ private: QString _current_group_label; QString _current_group_type; + QString _current_note_label; + QString _current_note_type; unsigned int _scan; pappso::pappso_double _mhplus_obser; unsigned int _charge; diff --git a/src/utils/types.h b/src/utils/types.h index 19244ecd2..6c98714a1 100644 --- a/src/utils/types.h +++ b/src/utils/types.h @@ -28,6 +28,15 @@ #include <cstdint> /*********** enumerations *********************************/ +/** \def IdentificationEngine identification engine + * + */ +enum class IdentificationEngine { + unknown, ///< X!Tandem + XTandem, ///< X!Tandem + peptider ///< peptider +}; + /** \def GroupingType list of available grouping algoritms * */ -- GitLab