From 2c3eb033e78e5ff6a6a90f9d3d5ba8c1e7d328a7 Mon Sep 17 00:00:00 2001 From: Olivier Langella <olivier.langella@u-psud.fr> Date: Mon, 25 Sep 2017 21:52:43 +0200 Subject: [PATCH] introducing MsRunStatistics --- src/core/msrun.cpp | 12 +++++++++ src/core/msrun.h | 14 +++++++++++ src/output/ods/samplesheet.cpp | 45 ++++++++++++++++++++++++++++------ src/utils/readspectrum.cpp | 23 +++++++++++++++-- src/utils/readspectrum.h | 4 +++ src/utils/types.h | 13 +++++++++- 6 files changed, 100 insertions(+), 11 deletions(-) diff --git a/src/core/msrun.cpp b/src/core/msrun.cpp index cb6b940ec..390f27a0d 100644 --- a/src/core/msrun.cpp +++ b/src/core/msrun.cpp @@ -82,6 +82,18 @@ MzFormat MsRun::getMzFormat() const { return _mz_format; } +void MsRun::setMsRunStatistics(MsRunStatistics param, const QVariant& value) { + _param_stats.insert(std::pair<MsRunStatistics, QVariant>(param, value)); +} +const QVariant MsRun::getMsRunStatistics(MsRunStatistics param) const { + try { + return _param_stats.at(param); + } + catch (std::out_of_range) { + return QVariant(); + } +} + /* void MsRun::setMzFormat(MzFormat mz_format) { _mz_format = mz_format; diff --git a/src/core/msrun.h b/src/core/msrun.h index a10603c98..ce038fc8d 100644 --- a/src/core/msrun.h +++ b/src/core/msrun.h @@ -34,6 +34,7 @@ #include <memory> #include <QString> +#include <QVariant> #include "../utils/types.h" class MsRun; @@ -55,11 +56,24 @@ public: MzFormat getMzFormat() const; //void setMzFormat(MzFormat mz_format); + + + /** \brief set MS run statistics + * any statistics on this MS run file + */ + virtual void setMsRunStatistics(MsRunStatistics param, const QVariant& value); + + + /** \brief get MS run statistics + */ + virtual const QVariant getMsRunStatistics(MsRunStatistics param) const; + private : QString _xml_id; QString _location; QString _name; MzFormat _mz_format=MzFormat::mzXML; + std::map<MsRunStatistics, QVariant> _param_stats; }; #endif // MSRUN_H diff --git a/src/output/ods/samplesheet.cpp b/src/output/ods/samplesheet.cpp index 2a406f717..170cae5c8 100644 --- a/src/output/ods/samplesheet.cpp +++ b/src/output/ods/samplesheet.cpp @@ -28,6 +28,7 @@ ******************************************************************************/ #include "samplesheet.h" +#include "../../utils/readspectrum.h" SampleSheet::SampleSheet (OdsExport * p_ods_export, CalcWriterInterface * p_writer, const Project * p_project): _p_project(p_project) { _p_writer = p_writer; @@ -88,9 +89,12 @@ void SampleSheet::writeHeaders() { void SampleSheet::writeIdentificationDataSource(IdentificationDataSource * p_ident_data_source) { _p_writer->writeLine(); - _p_writer->writeCell(p_ident_data_source->getMsRunSp().get()->getXmlId()); - _p_writer->writeCell(p_ident_data_source->getMsRunSp().get()->getSampleName()); - _p_writer->writeCell(p_ident_data_source->getMsRunSp().get()->getFilename()); + MsRunSp msrun_sp = p_ident_data_source->getMsRunSp(); + SpectrumStore::checkMsRunStatistics(msrun_sp); + + _p_writer->writeCell(msrun_sp.get()->getXmlId()); + _p_writer->writeCell(msrun_sp.get()->getSampleName()); + _p_writer->writeCell(msrun_sp.get()->getFilename()); _p_writer->writeCell(p_ident_data_source->getIdentificationEngineName()); _p_writer->writeCell(p_ident_data_source->getIdentificationEngineVersion()); _p_writer->writeCell(p_ident_data_source->getResourceName()); @@ -101,15 +105,40 @@ void SampleSheet::writeIdentificationDataSource(IdentificationDataSource * p_ide } _p_writer->writeCell(fasta_files.join(" ")); - _p_writer->writeCell(p_ident_data_source->getIdentificationEngineParam(IdentificationEngineParam::tandem_param).toString()); - _p_writer->writeCell(p_ident_data_source->getIdentificationEngineStatistics(IdentificationEngineStatistics::total_spectra_assigned).toString()); - - _p_writer->writeCell(p_ident_data_source->getIdentificationEngineStatistics(IdentificationEngineStatistics::total_spectra_used).toString()); + QVariant var_sample = p_ident_data_source->getIdentificationEngineParam(IdentificationEngineParam::tandem_param); + if (var_sample.isNull()) { + _p_writer->writeEmptyCell(); + } else { + _p_writer->writeCell(var_sample.toString()); + } + var_sample = p_ident_data_source->getIdentificationEngineStatistics(IdentificationEngineStatistics::total_spectra_assigned); + if (var_sample.isNull()) { + _p_writer->writeEmptyCell(); + } else { + _p_writer->writeCell(var_sample.toInt()); + } + var_sample = p_ident_data_source->getIdentificationEngineStatistics(IdentificationEngineStatistics::total_spectra_used); + if (var_sample.isNull()) { + _p_writer->writeEmptyCell(); + } else { + _p_writer->writeCell(var_sample.toInt()); + } if (!p_ident_data_source->getIdentificationEngineStatistics(IdentificationEngineStatistics::total_spectra_assigned).isNull() && !p_ident_data_source->getIdentificationEngineStatistics(IdentificationEngineStatistics::total_spectra_used).isNull()) { _p_writer->writeCellPercentage(p_ident_data_source->getIdentificationEngineStatistics(IdentificationEngineStatistics::total_spectra_assigned).toDouble() / p_ident_data_source->getIdentificationEngineStatistics(IdentificationEngineStatistics::total_spectra_used).toDouble()); } + else { + _p_writer->writeEmptyCell(); + } + - _p_writer->writeCell(p_ident_data_source->getIdentificationEngineStatistics(IdentificationEngineStatistics::total_unique_assigned).toString()); + var_sample = p_ident_data_source->getIdentificationEngineStatistics(IdentificationEngineStatistics::total_unique_assigned); + if (var_sample.isNull()) { + _p_writer->writeEmptyCell(); + } else { + _p_writer->writeCell(var_sample.toInt()); + } + + } diff --git a/src/utils/readspectrum.cpp b/src/utils/readspectrum.cpp index b84e14943..67c8579ce 100644 --- a/src/utils/readspectrum.cpp +++ b/src/utils/readspectrum.cpp @@ -258,8 +258,20 @@ const QString SpectrumStore::findMzFile(const QString &filename) { return QString(); } -pappso::SpectrumSp SpectrumStore::getSpectrumSpFromMsRunSp(MsRunSp msrun, unsigned int scan_num) { - pappso::QualifiedSpectrum spectrum; +void SpectrumStore::checkMsRunStatistics(MsRunSp msrun) { + QVariant msrun_var = msrun.get()->getMsRunStatistics(MsRunStatistics::total_spectra); + if (msrun_var.isNull()) { + try { + pwiz::msdata::MSDataFile * p_msdatafile = findPwizMSDataFile(msrun); + msrun.get()->setMsRunStatistics(MsRunStatistics::total_spectra, (unsigned int) p_msdatafile->run.spectrumListPtr.get()->size()); + } + catch (pappso::ExceptionNotFound& error) { + //no file found, no statistics + } + } +} + +pwiz::msdata::MSDataFile * SpectrumStore::findPwizMSDataFile(MsRunSp msrun) { pwiz::msdata::MSDataFile * p_msdatafile = nullptr; std::map<MsRunSp, pwiz::msdata::MSDataFile *>::iterator it_msdata = _map_msrun_msdatafile.find(msrun); if (it_msdata == _map_msrun_msdatafile.end()) { @@ -279,6 +291,13 @@ pappso::SpectrumSp SpectrumStore::getSpectrumSpFromMsRunSp(MsRunSp msrun, unsign else { p_msdatafile =it_msdata->second; } + return p_msdatafile; +} + +pappso::SpectrumSp SpectrumStore::getSpectrumSpFromMsRunSp(MsRunSp msrun, unsigned int scan_num) { + pappso::QualifiedSpectrum spectrum; + pwiz::msdata::MSDataFile * p_msdatafile = findPwizMSDataFile(msrun); + if (p_msdatafile == nullptr) { return spectrum.getOriginalSpectrumSp(); } diff --git a/src/utils/readspectrum.h b/src/utils/readspectrum.h index f8538bede..8051910c4 100644 --- a/src/utils/readspectrum.h +++ b/src/utils/readspectrum.h @@ -42,8 +42,12 @@ class MSDataFile; class SpectrumStore { public: static pappso::SpectrumSp getSpectrumSpFromMsRunSp(MsRunSp msrun, unsigned int scan_num); + /** @brief try to find MS run statistics + */ + static void checkMsRunStatistics(MsRunSp msrun); private: static const QString findMzFile(const QString &filename); + static pwiz::msdata::MSDataFile * findPwizMSDataFile(MsRunSp msrun) ; private: static std::map<MsRunSp, pwiz::msdata::MSDataFile *> _map_msrun_msdatafile; }; diff --git a/src/utils/types.h b/src/utils/types.h index a264e3aaf..619d3b7ce 100644 --- a/src/utils/types.h +++ b/src/utils/types.h @@ -74,9 +74,20 @@ enum class IdentificationEngineStatistics: std::int8_t { total_peptide_used=3,///< total number of peptides generated and used in identification total_proteins_used=4,///< total number of proteins generated and used in identification total_unique_assigned=5,///< total number unique peptide sequence assigned - }; +/** \def MsRunStatistics MS run statistics + * + */ +enum class MsRunStatistics: std::int8_t { + total_spectra=1, ///< total number of spectra + total_spectra_ms1=2,///< total number of MS1 spectra + total_spectra_ms2=3,///< total number of MS2 spectra + total_spectra_ms3=4,///< total number of MS3 spectra +}; + + + /** \def GroupingType list of available grouping algoritms * */ -- GitLab