From a38f40167de1525eb74f82f4e651164b5eb8acae Mon Sep 17 00:00:00 2001 From: Olivier Langella <olivier.langella@u-psud.fr> Date: Thu, 3 May 2018 22:03:14 +0200 Subject: [PATCH] normalized spectral abundance factor done --- src/core/proteinmatch.cpp | 6 +++ src/core/proteinmatch.h | 37 ++++++++--------- .../export_spreadsheet_dialog.ui | 40 ++++++++++++++++--- .../exportspreadsheetdialog.cpp | 9 +++++ .../exportspreadsheetdialog.h | 1 + src/input/xtpxpipsaxhandler.cpp | 14 ++++++- src/output/ods/comparbasesheet.cpp | 1 + src/output/ods/comparbasesheet.h | 6 ++- src/output/ods/comparspectrasheet.cpp | 21 ++++++++++ src/output/ods/comparspectrasheet.h | 12 ++++++ src/output/ods/odsexport.cpp | 4 ++ 11 files changed, 124 insertions(+), 27 deletions(-) diff --git a/src/core/proteinmatch.cpp b/src/core/proteinmatch.cpp index cf3ba339f..464294f97 100644 --- a/src/core/proteinmatch.cpp +++ b/src/core/proteinmatch.cpp @@ -326,11 +326,17 @@ pappso::pappso_double ProteinMatch::getLogEvalue(const MsRun * sp_msrun_id) cons } pappso::pappso_double ProteinMatch::getNsaf(pappso::pappso_double proto_nsaf_sum, const MsRun * p_msrun_id) const { + if (proto_nsaf_sum == 0) { + throw pappso::PappsoException(QObject::tr("Error computing NSAF for protein %1 :\nproto_nsaf_sum is null").arg(this->getProteinXtpSp().get()->getAccession())); + } return (getProtoNsaf(p_msrun_id)/proto_nsaf_sum); } pappso::pappso_double ProteinMatch::getProtoNsaf(const MsRun * sp_msrun_id) const { try { + if (_protein_sp.get()->size() < 1) { + throw pappso::PappsoException(QObject::tr("protein has no amino acid sequence")); + } pappso::pappso_double proto_nsaf = (pappso::pappso_double) countSampleScan(ValidationState::validAndChecked ,sp_msrun_id) / (pappso::pappso_double) _protein_sp.get()->size(); return proto_nsaf; diff --git a/src/core/proteinmatch.h b/src/core/proteinmatch.h index bb86106c8..d7e2db30d 100644 --- a/src/core/proteinmatch.h +++ b/src/core/proteinmatch.h @@ -45,7 +45,7 @@ public: ~ProteinMatch(); const ProteinXtpSp & getProteinXtpSp() const; - + /** @brief compute protein log10(Evalue) within samples * */ pappso::pappso_double getLogEvalue(const MsRun * sp_msrun_id = nullptr) const; @@ -64,17 +64,18 @@ public: **/ const QString getHtmlSequence(PeptideEvidence * peptide_evidence_to_locate = nullptr) const; - /** @brief compute proto NSAF within msrun - * Warning: this is not NSAF, just a part - * @param p_msrun_id pointer on the msrun to get NSAF. - * */ + /** @brief compute proto NSAF within msrun : spectral abundance factor (SAF) + * Warning: this is not NSAF, just a part + * @param p_msrun_id pointer on the msrun to get NSAF. + * */ pappso::pappso_double getProtoNsaf(const MsRun * p_msrun_id) const; - /** @brief compute NSAF within msrun - * Warning: this is not NSAF, just a part - * @param proto_nsaf_sum the sum of proto nsaf of all proteins within the msrun - * @param p_msrun_id pointer on the msrun to get NSAF - * */ + /** @brief compute NSAF within msrun : normalized spectral abundance factor (NSAF) + * Florens L., Carozza M. J. C., Swanson S. K., et al. Analyzing chromatin remodeling complexes using shotgun proteomics and normalized spectral abundance factors. Methods. 2006;40(4):303–311. doi: 10.1016/j.ymeth.2006.07.028. + * https://www.ncbi.nlm.nih.gov/pmc/articles/PMC1815300/ + * @param proto_nsaf_sum the sum of SAF of all proteins within the msrun + * @param p_msrun_id pointer on the msrun to get NSAF + * */ pappso::pappso_double getNsaf(pappso::pappso_double proto_nsaf_sum, const MsRun * p_msrun_id) const; /** @brief compute Protein Abundance Index (PAI) within sample @@ -91,7 +92,7 @@ public: void addPeptideMatch(const PeptideMatch & peptide_match); std::vector<PeptideMatch> & getPeptideMatchList(); const std::vector<PeptideMatch> & getPeptideMatchList() const; - + /** @brief get peptide match sublist with required validation state * @param state the validation state to select * @param sp_msrun_id the msrun to look for, if nullptr, then get peptide match overall MSruns @@ -106,15 +107,15 @@ public: bool isGrouped() const; ValidationState getValidationState() const; - + /** @brief count peptide match (psm) listed in this protein match */ unsigned int countPeptideMatch(ValidationState state) const; - + /** @brief count distinct sample + scans implied in this protein identification */ unsigned int countSampleScan(ValidationState state, const MsRun * p_msrun_id=nullptr) const; - + /** @brief count peptide (peptide+mass) */ void countPeptideMass(std::vector<pappso::GrpPeptide *> & count_peptide_mass, ValidationState state) const; @@ -130,17 +131,17 @@ public: * */ void updateAutomaticFilters(const AutomaticFilterParameters & automatic_filter_parameters); - + /** @brief collect distinct peptide evidences */ void collectPeptideEvidences(std::set<const PeptideEvidence *> & peptide_evidence_set, ValidationState state) const; - + /** @brief count distinct sequence taking into account equivalence between Leucine and Isoleucine * @param state validation state of peptides to count * @param p_msrun_id count within the specified sample */ size_t countSequenceLi(ValidationState state,const MsRun * p_msrun_id=nullptr) const; - + /** @brief count distinct peptide+mass+charge * peptide is the peptide sequence LI (Leucine => Isoleucine) * mass is the peptide mass, not considering labelled residues (if any) @@ -148,7 +149,7 @@ public: * @param p_msrun_id count within the specified sample */ unsigned int countPeptideMassCharge(ValidationState state, const MsRun * sp_msrun_id=nullptr) const; - + /** @brief count distinct MS samples in which the protein is observed * @param state validation state of peptides to count */ diff --git a/src/gui/export_spreadsheet_dialog/export_spreadsheet_dialog.ui b/src/gui/export_spreadsheet_dialog/export_spreadsheet_dialog.ui index c2d5c0c1d..8aa973a7e 100644 --- a/src/gui/export_spreadsheet_dialog/export_spreadsheet_dialog.ui +++ b/src/gui/export_spreadsheet_dialog/export_spreadsheet_dialog.ui @@ -64,6 +64,9 @@ </item> <item> <widget class="QGroupBox" name="compar_groupbox"> + <property name="toolTip"> + <string/> + </property> <property name="title"> <string>Compar</string> </property> @@ -110,6 +113,16 @@ </property> </widget> </item> + <item row="0" column="3"> + <widget class="QCheckBox" name="comparnsaf_checkbox"> + <property name="toolTip"> + <string>normalized spectral abundance factor (NSAF)</string> + </property> + <property name="text"> + <string>NSAF</string> + </property> + </widget> + </item> </layout> </widget> </item> @@ -328,7 +341,7 @@ <slot>doComparUniqueSequence(bool)</slot> <hints> <hint type="sourcelabel"> - <x>437</x> + <x>359</x> <y>296</y> </hint> <hint type="destinationlabel"> @@ -344,7 +357,7 @@ <slot>doComparSpecificSpectra(bool)</slot> <hints> <hint type="sourcelabel"> - <x>231</x> + <x>167</x> <y>328</y> </hint> <hint type="destinationlabel"> @@ -360,7 +373,7 @@ <slot>doComparSpecificUniqueSequence(bool)</slot> <hints> <hint type="sourcelabel"> - <x>437</x> + <x>359</x> <y>328</y> </hint> <hint type="destinationlabel"> @@ -376,7 +389,7 @@ <slot>doComparPai(bool)</slot> <hints> <hint type="sourcelabel"> - <x>642</x> + <x>501</x> <y>296</y> </hint> <hint type="destinationlabel"> @@ -392,7 +405,7 @@ <slot>doComparEmpai(bool)</slot> <hints> <hint type="sourcelabel"> - <x>642</x> + <x>501</x> <y>328</y> </hint> <hint type="destinationlabel"> @@ -433,6 +446,22 @@ </hint> </hints> </connection> + <connection> + <sender>comparnsaf_checkbox</sender> + <signal>toggled(bool)</signal> + <receiver>ExportSpreadsheetDialog</receiver> + <slot>doComparNsaf(bool)</slot> + <hints> + <hint type="sourcelabel"> + <x>550</x> + <y>286</y> + </hint> + <hint type="destinationlabel"> + <x>806</x> + <y>294</y> + </hint> + </hints> + </connection> </connections> <slots> <slot>reject()</slot> @@ -452,5 +481,6 @@ <slot>doComparEmpai(bool)</slot> <slot>doPtmIslands(bool)</slot> <slot>doPtmSpectra(bool)</slot> + <slot>doComparNsaf(bool)</slot> </slots> </ui> diff --git a/src/gui/export_spreadsheet_dialog/exportspreadsheetdialog.cpp b/src/gui/export_spreadsheet_dialog/exportspreadsheetdialog.cpp index 45a1a878f..60ec8e679 100644 --- a/src/gui/export_spreadsheet_dialog/exportspreadsheetdialog.cpp +++ b/src/gui/export_spreadsheet_dialog/exportspreadsheetdialog.cpp @@ -88,6 +88,10 @@ ExportSpreadsheetDialog::ExportSpreadsheetDialog(QWidget * parent): if (settings.value("export_ods/comparempai", "true").toBool()) { ui->comparempai_checkbox->setCheckState(Qt::Checked); } + ui->comparnsaf_checkbox->setCheckState(Qt::Unchecked); + if (settings.value("export_ods/comparnsaf", "true").toBool()) { + ui->comparnsaf_checkbox->setCheckState(Qt::Checked); + } ui->samples_checkbox->setCheckState(Qt::Unchecked); if (settings.value("export_ods/samples", "true").toBool()) { ui->samples_checkbox->setCheckState(Qt::Checked); @@ -170,6 +174,11 @@ void ExportSpreadsheetDialog::doComparPai(bool simple) { QSettings settings; settings.setValue("export_ods/comparpai", QString("%1").arg(simple)); } +void ExportSpreadsheetDialog::doComparNsaf(bool simple) { + + QSettings settings; + settings.setValue("export_ods/comparnsaf", QString("%1").arg(simple)); +} void ExportSpreadsheetDialog::doPeptidePos(bool simple) { QSettings settings; diff --git a/src/gui/export_spreadsheet_dialog/exportspreadsheetdialog.h b/src/gui/export_spreadsheet_dialog/exportspreadsheetdialog.h index 33892c523..0214e871e 100644 --- a/src/gui/export_spreadsheet_dialog/exportspreadsheetdialog.h +++ b/src/gui/export_spreadsheet_dialog/exportspreadsheetdialog.h @@ -50,6 +50,7 @@ public slots: void doGroups(bool simple); void doPeptidePos(bool simple); void doComparEmpai(bool simple); + void doComparNsaf(bool simple); void doComparPai(bool simple); void doComparSpecificUniqueSequence(bool simple); void doComparUniqueSequence(bool simple); diff --git a/src/input/xtpxpipsaxhandler.cpp b/src/input/xtpxpipsaxhandler.cpp index ab4743b4b..3d9fa5c31 100644 --- a/src/input/xtpxpipsaxhandler.cpp +++ b/src/input/xtpxpipsaxhandler.cpp @@ -493,18 +493,28 @@ bool XtpXpipSaxHandler::startElement_protein(QXmlAttributes attributes) { </protein> */ _sp_current_protein = ProteinXtp().makeProteinXtpSp(); - - _sp_current_protein.get()->setFastaFileP(_map_fasta_files.at(attributes.value("fasta_id")).get()); + + auto fasta_it = _map_fasta_files.find(attributes.value("fasta_id")); + if (fasta_it == _map_fasta_files.end()) { + throw pappso::ExceptionNotFound(QObject::tr("ERROR fasta_id %1 not found").arg(attributes.value("fasta_id"))); + } + //qDebug() << "startElement_protein fastaid=" << attributes.value("fasta_id"); + _sp_current_protein.get()->setFastaFileP(fasta_it->second.get()); _sp_current_protein.get()->setAccession(attributes.value("acc")); + + //qDebug() << "startElement_protein acc=" << attributes.value("acc"); + //qDebug() << "startElement_protein description=" << attributes.value("description"); _sp_current_protein.get()->setDescription(attributes.value("description")); _sp_current_protein.get()->setIsContaminant(false); if (attributes.value("is_contaminant").simplified().toLower() == "true") { _sp_current_protein.get()->setIsContaminant(true); } + //qDebug() << "startElement_protein iscontaminant"; _sp_current_protein.get()->setIsDecoy(false); if (attributes.value("is_decoy").simplified().toLower() == "true") { _sp_current_protein.get()->setIsDecoy(true); } + //qDebug() << "startElement_protein isdecoy"; qDebug() << "startElement_protein end" ; return true; diff --git a/src/output/ods/comparbasesheet.cpp b/src/output/ods/comparbasesheet.cpp index 775d06a61..f228d5186 100644 --- a/src/output/ods/comparbasesheet.cpp +++ b/src/output/ods/comparbasesheet.cpp @@ -129,6 +129,7 @@ void ComparBaseSheet::writeProteinMatch(const ProteinMatch * p_protein_match) { void ComparBaseSheet::writeIdentificationGroup(IdentificationGroup * p_ident) { qDebug() << "ComparBaseSheet::writeIdentificationGroup begin"; + _p_current_identification_group = p_ident; writeHeaders(p_ident); diff --git a/src/output/ods/comparbasesheet.h b/src/output/ods/comparbasesheet.h index da230ef14..c0fc4f460 100644 --- a/src/output/ods/comparbasesheet.h +++ b/src/output/ods/comparbasesheet.h @@ -42,12 +42,12 @@ public : private : void writeIdentificationGroup(IdentificationGroup * p_ident); void writeHeaders(IdentificationGroup * p_ident); - void writeProteinMatch(const ProteinMatch * p_protein_match); - public : void writeSheet(); protected : virtual void writeComparValue(const ProteinMatch * p_protein_match, ValidationState state, const MsRun * p_msrun)=0; + void writeProteinMatch(const ProteinMatch * p_protein_match); + protected : OdsExport * _p_ods_export; @@ -58,6 +58,8 @@ protected : QString _first_cell_coordinate; + IdentificationGroup * _p_current_identification_group; + }; #endif // COMPARBASESHEET_H diff --git a/src/output/ods/comparspectrasheet.cpp b/src/output/ods/comparspectrasheet.cpp index cfc10cbc9..fb5c82395 100644 --- a/src/output/ods/comparspectrasheet.cpp +++ b/src/output/ods/comparspectrasheet.cpp @@ -74,3 +74,24 @@ void ComparEmpaiSheet::writeComparValue(const ProteinMatch * p_protein_match, Va _p_writer->writeCell(p_protein_match->getEmPAI(p_msrun)); qDebug() << "ComparEmpaiSheet::writeComparValue end"; } + +ComparNsafSheet::ComparNsafSheet(OdsExport* p_ods_export, CalcWriterInterface* p_writer, const Project* p_project): ComparBaseSheet(p_ods_export, p_writer, p_project) { + _title_sheet = "compar NSAF"; +} + +void ComparNsafSheet::writeComparValue(const ProteinMatch * p_protein_match, ValidationState state, const MsRun * p_msrun) { + qDebug() << "ComparNsafSheet::writeComparValue begin"; + pappso::pappso_double proto_nsaf_sum=0; + std::pair<std::map<const MsRun *,pappso::pappso_double>::iterator,bool> ret = _map_proto_nsaf_sum_by_msrun.insert(std::pair<const MsRun *,pappso::pappso_double>(p_msrun,0)); + if (ret.second==false) { + //"element 'z' already existed"; + proto_nsaf_sum = ret.first->second; + } + else { + proto_nsaf_sum = _p_current_identification_group->computeProtoNsafSum(p_msrun); + ret.first->second = proto_nsaf_sum; + } + + _p_writer->writeCell(p_protein_match->getNsaf(proto_nsaf_sum,p_msrun)); + qDebug() << "ComparNsafSheet::writeComparValue end"; +} diff --git a/src/output/ods/comparspectrasheet.h b/src/output/ods/comparspectrasheet.h index c3926823f..29653e279 100644 --- a/src/output/ods/comparspectrasheet.h +++ b/src/output/ods/comparspectrasheet.h @@ -69,4 +69,16 @@ protected : virtual void writeComparValue(const ProteinMatch * p_protein_match, ValidationState state, const MsRun * p_msrun) override; }; +class ComparNsafSheet:public ComparBaseSheet +{ +public : + ComparNsafSheet (OdsExport * p_ods_export, CalcWriterInterface * p_writer, const Project * p_project); + +protected : + virtual void writeComparValue(const ProteinMatch * p_protein_match, ValidationState state, const MsRun * p_msrun) override; +private: + + std::map<const MsRun *, pappso::pappso_double> _map_proto_nsaf_sum_by_msrun; +}; + #endif // COMPARSPECTRASHEET_H diff --git a/src/output/ods/odsexport.cpp b/src/output/ods/odsexport.cpp index ad1b661c6..6adb5c960 100644 --- a/src/output/ods/odsexport.cpp +++ b/src/output/ods/odsexport.cpp @@ -114,6 +114,10 @@ void OdsExport::write(CalcWriterInterface * p_writer, WorkMonitorInterface * p_m p_monitor->message(QObject::tr("writing emPAI comparisons")); ComparEmpaiSheet(this, p_writer, _p_project).writeSheet(); } + if (settings.value("export_ods/comparnsaf", "true").toBool()) { + p_monitor->message(QObject::tr("writing NSAF comparisons")); + ComparNsafSheet(this, p_writer, _p_project).writeSheet(); + } } if (settings.value("export_ods/samples", "true").toBool()) { p_monitor->message(QObject::tr("writing samples")); -- GitLab