/******************************************************************************* * Copyright (c) 2017 Olivier Langella <olivier.langella@u-psud.fr>. * * This file is part of XTPcpp. * * XTPcpp is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * XTPcpp is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with XTPcpp. If not, see <http://www.gnu.org/licenses/>. * * Contributors: * Olivier Langella <olivier.langella@u-psud.fr> - initial API and *implementation ******************************************************************************/ #include "identificationgroup.h" #include "project.h" #include "../utils/groupstore.h" #include <pappsomspp/pappsoexception.h> IdentificationGroup::IdentificationGroup(Project *project) { _p_project = project; } IdentificationGroup::~IdentificationGroup() { auto it = _protein_match_list.begin(); while(it != _protein_match_list.end()) { delete(*it); it++; } } const PtmGroupingExperiment * IdentificationGroup::getPtmGroupingExperiment() const { return _p_grp_ptm_experiment; } const GroupStore & IdentificationGroup::getGroupStore() const { return _group_store; } pappso::pappso_double IdentificationGroup::computeProtoNsafSum(const MsRun *p_msrun_id, const Label *p_label) const { pappso::pappso_double nsaf_sum = 0; for(auto &p_protein_match : _protein_match_list) { if(p_protein_match->getValidationState() >= ValidationState::grouped) { nsaf_sum += p_protein_match->getProtoNsaf(p_msrun_id, p_label); } } return nsaf_sum; } unsigned int IdentificationGroup::countPeptideMass(ValidationState state) const { std::vector<pappso::GrpPeptide *> count_peptide_mass; for(auto &p_protein_match : _protein_match_list) { if(p_protein_match->getValidationState() >= state) { p_protein_match->countPeptideMass(count_peptide_mass, state); } } std::sort(count_peptide_mass.begin(), count_peptide_mass.end()); auto last = std::unique(count_peptide_mass.begin(), count_peptide_mass.end()); return std::distance(count_peptide_mass.begin(), last); } unsigned int IdentificationGroup::countPeptideMassSample(ValidationState state) const { std::vector<std::size_t> count_peptide_mass_sample; for(auto &p_protein_match : _protein_match_list) { // if(p_protein_match->getValidationState() >= state) // { p_protein_match->countPeptideMassSample(count_peptide_mass_sample, state); // } } std::sort(count_peptide_mass_sample.begin(), count_peptide_mass_sample.end()); auto last = std::unique(count_peptide_mass_sample.begin(), count_peptide_mass_sample.end()); return std::distance(count_peptide_mass_sample.begin(), last); } unsigned int IdentificationGroup::countDecoyPeptideMassSample(ValidationState state) const { std::vector<std::size_t> count_peptide_mass_sample; for(auto &p_protein_match : _protein_match_list) { if(p_protein_match->getProteinXtpSp().get()->isDecoy()) { // if(p_protein_match->getValidationState() >= state) // { p_protein_match->countPeptideMassSample(count_peptide_mass_sample, state); //} } } std::sort(count_peptide_mass_sample.begin(), count_peptide_mass_sample.end()); auto last = std::unique(count_peptide_mass_sample.begin(), count_peptide_mass_sample.end()); return std::distance(count_peptide_mass_sample.begin(), last); } unsigned int IdentificationGroup::countDecoyProteinMatch(ValidationState state) const { return std::count_if( _protein_match_list.begin(), _protein_match_list.end(), [state](const ProteinMatch *p_protein_match) { if((p_protein_match->getProteinXtpSp().get()->isDecoy()) && (p_protein_match->getValidationState() >= state)) { return true; } else { return false; } }); } unsigned int IdentificationGroup::countPeptideEvidence(ValidationState state) const { std::set<const PeptideEvidence *> peptide_evidence_set; for(auto &p_protein_match : _protein_match_list) { // if(p_protein_match->getValidationState() >= state) // { p_protein_match->collectPeptideEvidences(peptide_evidence_set, state); // } } return peptide_evidence_set.size(); } unsigned int IdentificationGroup::countDecoyPeptideEvidence(ValidationState state) const { std::set<const PeptideEvidence *> peptide_evidence_set; for(auto &p_protein_match : _protein_match_list) { // if(p_protein_match->getValidationState() >= state) // { if(p_protein_match->getProteinXtpSp().get()->isDecoy()) { p_protein_match->collectPeptideEvidences(peptide_evidence_set, state); } // } } return peptide_evidence_set.size(); } unsigned int IdentificationGroup::countPeptideMatch(ValidationState state) const { unsigned int i = 0; for(auto &p_protein_match : _protein_match_list) { if(p_protein_match->getValidationState() >= state) { i += p_protein_match->countPeptideMatch(state); } } return i; } unsigned int IdentificationGroup::countDecoyPeptideMatch(ValidationState state) const { unsigned int i = 0; for(auto &p_protein_match : _protein_match_list) { if(p_protein_match->getValidationState() >= state) { if(p_protein_match->getProteinXtpSp().get()->isDecoy()) { i += p_protein_match->countPeptideMatch(state); } } } return i; } unsigned int IdentificationGroup::countProteinMatch(ValidationState state) const { return std::count_if(_protein_match_list.begin(), _protein_match_list.end(), [state](const ProteinMatch *p_protein_match) { if(p_protein_match->getValidationState() >= state) { return true; } else { return false; } }); } void IdentificationGroup::updateAutomaticFilters( const AutomaticFilterParameters &automatic_filter_parameters) { qDebug() << "IdentificationGroup::updateAutomaticFilters begin"; for(IdentificationDataSource *p_identification_source_list : _id_source_list) { p_identification_source_list->getPeptideEvidenceStore() .updateAutomaticFilters(automatic_filter_parameters); } qDebug() << "IdentificationGroup::updateAutomaticFilters begin p_protein_match"; for(auto &p_protein_match : _protein_match_list) { p_protein_match->updateAutomaticFilters(automatic_filter_parameters); } if(_p_grp_experiment != nullptr) { } qDebug() << "IdentificationGroup::updateAutomaticFilters end"; } ProteinMatch * IdentificationGroup::getProteinMatchInstance(const QString accession) { if(accession.isEmpty()) { throw pappso::PappsoException( QObject::tr("Error protein match not found : accession is empty")); } auto it_cache = _cache_accession_protein_match.find(accession); if(it_cache == _cache_accession_protein_match.end()) { // accession not found in cache ProteinMatch *p_protein_match = new ProteinMatch(); _cache_accession_protein_match.insert( std::pair<QString, ProteinMatch *>(accession, p_protein_match)); _protein_match_list.push_back(p_protein_match); return p_protein_match; } else { return it_cache->second; } return nullptr; } void IdentificationGroup::addProteinMatch(ProteinMatch *protein_match) { _protein_match_list.push_back(protein_match); } bool IdentificationGroup::contains(const MsRun *p_msrun) const { for(const MsRunSp &msrun : _ms_run_list) { if(msrun.get() == p_msrun) return true; } return false; } bool IdentificationGroup::containSample(const QString &sample) const { for(const MsRunSp &msrun : _ms_run_list) { if(msrun.get()->getSampleName() == sample) return true; } return false; } void IdentificationGroup::addIdentificationDataSourceP( IdentificationDataSource *p_identification_source) { addMsRunSp(p_identification_source->getMsRunSp()); auto it = std::find( _id_source_list.begin(), _id_source_list.end(), p_identification_source); if(it == _id_source_list.end()) { _id_source_list.push_back(p_identification_source); } } void IdentificationGroup::addMsRunSp(MsRunSp ms_run_sp) { auto it = std::find(_ms_run_list.begin(), _ms_run_list.end(), ms_run_sp); if(it == _ms_run_list.end()) { _ms_run_list.push_back(ms_run_sp); } } const std::vector<MsRunSp> & IdentificationGroup::getMsRunSpList() const { return _ms_run_list; } const std::vector<IdentificationDataSource *> & IdentificationGroup::getIdentificationDataSourceList() const { return _id_source_list; } const std::vector<ProteinMatch *> & IdentificationGroup::getProteinMatchList() const { return _protein_match_list; } std::vector<ProteinMatch *> & IdentificationGroup::getProteinMatchList() { return _protein_match_list; } std::size_t IdentificationGroup::countGroup() const { return _group_store.countGroup(); } std::size_t IdentificationGroup::countSubGroup() const { return _group_store.countSubGroup(); } void IdentificationGroup::collectMhDelta( std::vector<pappso::pappso_double> &delta_list, pappso::PrecisionUnit unit, ValidationState state) const { std::set<const PeptideEvidence *> peptide_evidence_list; for(auto &p_protein_match : _protein_match_list) { if(p_protein_match->getValidationState() >= state) { if(!p_protein_match->getProteinXtpSp().get()->isDecoy()) { // p_protein_match->collectMhDelta(already_counted, delta_list, // unit, state); p_protein_match->collectPeptideEvidences(peptide_evidence_list, state); } } } for(const PeptideEvidence *p_peptide_evidence : peptide_evidence_list) { if(unit == pappso::PrecisionUnit::ppm) { delta_list.push_back(p_peptide_evidence->getPpmDeltaMass()); } else { delta_list.push_back(p_peptide_evidence->getDeltaMass()); } } } void IdentificationGroup::startGrouping( ContaminantRemovalMode contaminantRemovalMode, const GroupingType &grouping_type, WorkMonitorInterface *p_work_monitor) { qDebug() << "IdentificationGroup::startGrouping begin "; if(_p_grp_experiment != nullptr) { delete _p_grp_experiment; } _p_grp_experiment = GroupingExperiment::newInstance( contaminantRemovalMode, grouping_type, p_work_monitor); for(ProteinMatch *p_protein_match : _protein_match_list) { p_protein_match->setGroupingExperiment(_p_grp_experiment); } _p_grp_experiment->startGrouping(); _group_store.clear(); for(ProteinMatch *p_protein_match : _protein_match_list) { p_protein_match->setGroupInstance(_group_store); } if(_p_grp_ptm_experiment != nullptr) { delete _p_grp_ptm_experiment; _p_grp_ptm_experiment = nullptr; } qDebug() << "IdentificationGroup::startGrouping end "; } void IdentificationGroup::startPtmGrouping() { qDebug() << "IdentificationGroup::startPtmGrouping begin "; if(_p_grp_ptm_experiment != nullptr) { delete _p_grp_ptm_experiment; _p_grp_ptm_experiment = nullptr; } _p_grp_ptm_experiment = new PtmGroupingExperiment(); _p_grp_ptm_experiment->setValidationState(ValidationState::grouped); for(ProteinMatch *p_protein_match : _protein_match_list) { _p_grp_ptm_experiment->addProteinMatch(p_protein_match); } _p_grp_ptm_experiment->startGrouping(); qDebug() << "IdentificationGroup::startPtmGrouping end "; } const QString IdentificationGroup::getTabName() const { return _ms_run_list[0]->getSampleName(); } pappso::pappso_double IdentificationGroup::getPsmFdr(ValidationState state) const { pappso::pappso_double total_peptide = countPeptideEvidence(state); pappso::pappso_double false_peptide = countDecoyPeptideEvidence(state); return (false_peptide / total_peptide); } pappso::pappso_double IdentificationGroup::getPeptideMassFdr(ValidationState state) const { // ValidationState state = ValidationState::valid; pappso::pappso_double total_peptide = countPeptideMassSample(state); pappso::pappso_double false_peptide = countDecoyPeptideMassSample(state); return (false_peptide / total_peptide); } pappso::pappso_double IdentificationGroup::getProteinFdr(ValidationState state) const { // ValidationState state = ValidationState::valid; pappso::pappso_double total_prot = countProteinMatch(state); pappso::pappso_double false_prot = countDecoyProteinMatch(state); return (false_prot / total_prot); } void IdentificationGroup::getSameXicPeptideEvidenceList( std::vector<const PeptideEvidence *> &peptide_evidence_list, const MsRun *p_msrun, const PeptideXtp *p_peptide, unsigned int charge) const { if(!contains(p_msrun)) return; for(const IdentificationDataSource *p_identification_source : _id_source_list) { if(p_msrun == p_identification_source->getMsRunSp().get()) { p_identification_source->getPeptideEvidenceStore() .getSameXicPeptideEvidenceList( peptide_evidence_list, p_msrun, p_peptide, charge); } } }