From 038d50da7cfa37b68d4789f0362a75b8fbb17f46 Mon Sep 17 00:00:00 2001 From: Olivier Langella <Olivier.Langella@moulon.inra.fr> Date: Mon, 25 Sep 2017 11:34:53 +0200 Subject: [PATCH] new identification engine statistics --- .../identificationdatasource.cpp | 13 ++++++ .../identificationdatasource.h | 12 ++++++ .../tandem_run_dialog/tandem_run_dialog.ui | 19 +-------- src/gui/tandem_run_dialog/tandemrundialog.cpp | 4 ++ src/gui/tandem_run_dialog/tandemrundialog.h | 1 + src/input/xtandemsaxhandler.cpp | 41 +++++++++++++------ src/output/ods/samplesheet.cpp | 32 +++++++++++++++ src/utils/types.h | 13 ++++++ 8 files changed, 106 insertions(+), 29 deletions(-) diff --git a/src/core/identification_sources/identificationdatasource.cpp b/src/core/identification_sources/identificationdatasource.cpp index dcb377db8..c49f027c5 100644 --- a/src/core/identification_sources/identificationdatasource.cpp +++ b/src/core/identification_sources/identificationdatasource.cpp @@ -97,6 +97,19 @@ const QVariant IdentificationDataSource::getIdentificationEngineParam(Identifica return QVariant(); } } + +void IdentificationDataSource::setIdentificationEngineStatistics(IdentificationEngineStatistics param, const QVariant& value) { + _param_stats.insert(std::pair<IdentificationEngineStatistics, QVariant>(param, value)); +} +const QVariant IdentificationDataSource::getIdentificationEngineStatistics(IdentificationEngineStatistics param) const { + try { + return _param_stats.at(param); + } + catch (std::out_of_range) { + return QVariant(); + } +} + pappso::SpectrumSp IdentificationDataSource::getSpectrumSp(unsigned int scan_number) const { pappso::SpectrumSp spectrum_sp = SpectrumStore::getSpectrumSpFromMsRunSp(_ms_run_sp, scan_number); return spectrum_sp; diff --git a/src/core/identification_sources/identificationdatasource.h b/src/core/identification_sources/identificationdatasource.h index daf0ab5c1..f99f8720b 100644 --- a/src/core/identification_sources/identificationdatasource.h +++ b/src/core/identification_sources/identificationdatasource.h @@ -85,7 +85,18 @@ public: /** \brief get specific identification engine parameter value */ virtual const QVariant getIdentificationEngineParam(IdentificationEngineParam param) const; + + + /** \brief set identification engine statistics + * any statistics on this identification run that can be told by the identification engine + */ + virtual void setIdentificationEngineStatistics(IdentificationEngineStatistics param, const QVariant& value); + + /** \brief get specific identification engine statistics value + */ + virtual const QVariant getIdentificationEngineStatistics(IdentificationEngineStatistics param) const; + /** \brief add Fastafile used by the identification engine */ void addFastaFile (FastaFileSp file); @@ -100,6 +111,7 @@ private : QString _version; MsRunSp _ms_run_sp = nullptr; std::map<IdentificationEngineParam, QVariant> _params; + std::map<IdentificationEngineStatistics, QVariant> _param_stats; std::vector<FastaFileSp> _fastafile_list; }; diff --git a/src/gui/tandem_run_dialog/tandem_run_dialog.ui b/src/gui/tandem_run_dialog/tandem_run_dialog.ui index 6080f6325..79fa385b9 100644 --- a/src/gui/tandem_run_dialog/tandem_run_dialog.ui +++ b/src/gui/tandem_run_dialog/tandem_run_dialog.ui @@ -253,22 +253,6 @@ </widget> <resources/> <connections> - <connection> - <sender>add_files_button</sender> - <signal>clicked()</signal> - <receiver>TandemRunDialog</receiver> - <slot>chooseFiles()</slot> - <hints> - <hint type="sourcelabel"> - <x>550</x> - <y>310</y> - </hint> - <hint type="destinationlabel"> - <x>550</x> - <y>264</y> - </hint> - </hints> - </connection> <connection> <sender>pushButton_2</sender> <signal>clicked()</signal> @@ -305,7 +289,7 @@ <sender>clear_list_button</sender> <signal>clicked()</signal> <receiver>TandemRunDialog</receiver> - <slot>clearFileList()</slot> + <slot>clearFastaFiles()</slot> <hints> <hint type="sourcelabel"> <x>464</x> @@ -402,6 +386,7 @@ <slot>reject()</slot> <slot>accept()</slot> <slot>selectFastaFiles()</slot> + <slot>clearFastaFiles()</slot> <slot>selectMzFiles()</slot> <slot>selectOutputDirectory()</slot> <slot>setPresetName(QString)</slot> diff --git a/src/gui/tandem_run_dialog/tandemrundialog.cpp b/src/gui/tandem_run_dialog/tandemrundialog.cpp index 50302ebbd..2a6ff3ebc 100644 --- a/src/gui/tandem_run_dialog/tandemrundialog.cpp +++ b/src/gui/tandem_run_dialog/tandemrundialog.cpp @@ -159,6 +159,10 @@ void TandemRunDialog::selectOutputDirectory() { } } +void TandemRunDialog::clearFastaFiles() { + _p_fasta_file_list->removeRows( 0, _p_fasta_file_list->rowCount() ); +} + void TandemRunDialog::selectFastaFiles() { try { QSettings settings; diff --git a/src/gui/tandem_run_dialog/tandemrundialog.h b/src/gui/tandem_run_dialog/tandemrundialog.h index f2601144d..83944614e 100644 --- a/src/gui/tandem_run_dialog/tandemrundialog.h +++ b/src/gui/tandem_run_dialog/tandemrundialog.h @@ -51,6 +51,7 @@ public: void reset(); public slots: void selectFastaFiles(); + void clearFastaFiles(); void selectMzFiles(); void selectOutputDirectory(); void selectPresetDirectory(); diff --git a/src/input/xtandemsaxhandler.cpp b/src/input/xtandemsaxhandler.cpp index 2226d76e4..165619288 100644 --- a/src/input/xtandemsaxhandler.cpp +++ b/src/input/xtandemsaxhandler.cpp @@ -295,7 +295,7 @@ bool XtandemSaxHandler::startElement_domain(QXmlAttributes attributes) { _p_peptide_match->setExperimentalMass(exp_mass); _p_peptide_match->setStart(attributes.value("start").simplified().toUInt()-1); _p_peptide_match->setCharge(_charge); - + _p_peptide_match->setParam(PeptideMatchParam::tandem_hyperscore, QVariant( attributes.value("hyperscore").toDouble())); _p_peptide_match->setIdentificationDataSource( _p_identification_data_source); @@ -336,11 +336,11 @@ bool XtandemSaxHandler::endElement_note() { bool is_ok = true; if (_is_protein_description) { //_p_protein_match->getProteinXtpSp().get()->setDescription(_current_text.section(" ",1)); - _p_protein_match->getProteinXtpSp().get()->setCompleteDescription(_current_text); - if (!_p_protein_match->getProteinXtpSp().get()->getAccession().endsWith(":reversed") && _p_protein_match->getProteinXtpSp().get()->getDescription().endsWith(":reversed")) { - //to fit most cases, just check that the :reversed chars added by X!Tandem are not in the description. if so, then add it too in the accession - _p_protein_match->getProteinXtpSp().get()->setAccession(QString("%1%2").arg(_p_protein_match->getProteinXtpSp().get()->getAccession()).arg(":reversed")); - } + _p_protein_match->getProteinXtpSp().get()->setCompleteDescription(_current_text); + if (!_p_protein_match->getProteinXtpSp().get()->getAccession().endsWith(":reversed") && _p_protein_match->getProteinXtpSp().get()->getDescription().endsWith(":reversed")) { + //to fit most cases, just check that the :reversed chars added by X!Tandem are not in the description. if so, then add it too in the accession + _p_protein_match->getProteinXtpSp().get()->setAccession(QString("%1%2").arg(_p_protein_match->getProteinXtpSp().get()->getAccession()).arg(":reversed")); + } } else { @@ -470,13 +470,30 @@ bool XtandemSaxHandler::endElement_note() { <note label="modelling, estimated false positives">18</note> <note label="modelling, reversed sequence false positives">20</note> <note label="modelling, spectrum noise suppression ratio">0.00</note> - <note label="modelling, total peptides used">96618641</note> - <note label="modelling, total proteins used">273656</note> - <note label="modelling, total spectra assigned">7464</note> - <note label="modelling, total spectra used">12199</note> - <note label="modelling, total unique assigned">6260</note> - <note label="process, start time">2013:12:20:16:47:19</note> */ + //<note label="modelling, total peptides used">96618641</note> + if (_current_note_label == "modelling, total peptides used") { + _p_identification_data_source->setIdentificationEngineStatistics(IdentificationEngineStatistics::total_peptide_used, _current_text.toUInt()); + } + + //<note label="modelling, total proteins used">273656</note> + if (_current_note_label == "modelling, total proteins used") { + _p_identification_data_source->setIdentificationEngineStatistics(IdentificationEngineStatistics::total_proteins_used, _current_text.toUInt()); + } + //<note label="modelling, total spectra assigned">7464</note> + if (_current_note_label == "modelling, total spectra assigned") { + _p_identification_data_source->setIdentificationEngineStatistics(IdentificationEngineStatistics::total_spectra_assigned, _current_text.toUInt()); + } + //<note label="modelling, total spectra used">12199</note> + if (_current_note_label == "modelling, total spectra used") { + _p_identification_data_source->setIdentificationEngineStatistics(IdentificationEngineStatistics::total_spectra_used, _current_text.toUInt()); + } + //<note label="modelling, total unique assigned">6260</note> + if (_current_note_label == "modelling, total unique assigned") { + _p_identification_data_source->setIdentificationEngineStatistics(IdentificationEngineStatistics::total_unique_assigned, _current_text.toUInt()); + } + //<note label="process, start time">2013:12:20:16:47:19</note> + //<note label="process, version">X! Tandem Sledgehammer (2013.09.01.1)</note> if (_current_note_label == "process, version") { QRegExp rx("\\((.*)\\)"); diff --git a/src/output/ods/samplesheet.cpp b/src/output/ods/samplesheet.cpp index dbd76c165..2a406f717 100644 --- a/src/output/ods/samplesheet.cpp +++ b/src/output/ods/samplesheet.cpp @@ -60,6 +60,29 @@ void SampleSheet::writeHeaders() { _p_writer->writeCell("Identification fasta files"); _p_writer->writeCell("X!Tandem parameters"); + // total_spectra_assigned=1, ///< total_spectra_assigned in one identification file (one sample) + _p_writer->setCellAnnotation("total spectra assigned in one identification file (one mzdata sample) given by the identification engine"); + _p_writer->writeCell("Total spectra assigned"); + // + //total_spectra_used=2,///< total_spectra_used in one identification file (one sample) + _p_writer->setCellAnnotation("total spectra used in one identification file (one mzdata sample) given by the identification engine"); + _p_writer->writeCell("Total spectra used"); + + + _p_writer->setCellAnnotation("Percentage of spectra assignment (spectra assigned / spectra used"); + _p_writer->writeCell("Assignment percentage"); + + //total_unique_assigned=5,///< total number unique peptide sequence assigned + _p_writer->setCellAnnotation("total unique peptide sequence assigned in one identification file (one mzdata sample) given by the identification engine"); + _p_writer->writeCell("Total unique assigned"); + + + + /* + total_peptide_used=3,///< total number of peptides generated and used in identification + total_proteins_used=4,///< total number of proteins generated and used in identification + + */ } @@ -79,5 +102,14 @@ void SampleSheet::writeIdentificationDataSource(IdentificationDataSource * p_ide _p_writer->writeCell(fasta_files.join(" ")); _p_writer->writeCell(p_ident_data_source->getIdentificationEngineParam(IdentificationEngineParam::tandem_param).toString()); + _p_writer->writeCell(p_ident_data_source->getIdentificationEngineStatistics(IdentificationEngineStatistics::total_spectra_assigned).toString()); + + _p_writer->writeCell(p_ident_data_source->getIdentificationEngineStatistics(IdentificationEngineStatistics::total_spectra_used).toString()); + + if (!p_ident_data_source->getIdentificationEngineStatistics(IdentificationEngineStatistics::total_spectra_assigned).isNull() && !p_ident_data_source->getIdentificationEngineStatistics(IdentificationEngineStatistics::total_spectra_used).isNull()) { + _p_writer->writeCellPercentage(p_ident_data_source->getIdentificationEngineStatistics(IdentificationEngineStatistics::total_spectra_assigned).toDouble() / p_ident_data_source->getIdentificationEngineStatistics(IdentificationEngineStatistics::total_spectra_used).toDouble()); + } + + _p_writer->writeCell(p_ident_data_source->getIdentificationEngineStatistics(IdentificationEngineStatistics::total_unique_assigned).toString()); } diff --git a/src/utils/types.h b/src/utils/types.h index b18293cd2..a264e3aaf 100644 --- a/src/utils/types.h +++ b/src/utils/types.h @@ -64,6 +64,19 @@ enum class IdentificationEngineParam { tandem_param ///< X!Tandem xml parameters file }; + +/** \def IdentificationEngineStatistics identification engine statistics + * + */ +enum class IdentificationEngineStatistics: std::int8_t { + total_spectra_assigned=1, ///< total_spectra_assigned in one identification file (one sample) + total_spectra_used=2,///< total_spectra_used in one identification file (one sample) + total_peptide_used=3,///< total number of peptides generated and used in identification + total_proteins_used=4,///< total number of proteins generated and used in identification + total_unique_assigned=5,///< total number unique peptide sequence assigned + +}; + /** \def GroupingType list of available grouping algoritms * */ -- GitLab