/******************************************************************************* * Copyright (c) 2017 Olivier Langella <olivier.langella@u-psud.fr>. * * This file is part of XTPcpp. * * XTPcpp is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * XTPcpp is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with XTPcpp. If not, see <http://www.gnu.org/licenses/>. * * Contributors: * Olivier Langella <olivier.langella@u-psud.fr> - initial API and implementation ******************************************************************************/ #include "identificationgroup.h" #include "project.h" #include "../utils/groupstore.h" #include <pappsomspp/pappsoexception.h> IdentificationGroup::IdentificationGroup(Project * project) { _p_project = project; } IdentificationGroup::~IdentificationGroup() { auto it = _protein_match_list.begin(); while (it != _protein_match_list.end()) { delete (*it); it++; } } const PtmGroupingExperiment * IdentificationGroup::getPtmGroupingExperiment() const { return _p_grp_ptm_experiment; } const GroupStore & IdentificationGroup::getGroupStore() const { return _group_store; } unsigned int IdentificationGroup::countPeptideMass(ValidationState state) const { std::vector<pappso::GrpPeptide *> count_peptide_mass; for (auto & p_protein_match : _protein_match_list) { if (p_protein_match->getValidationState() >= state) { p_protein_match->countPeptideMass(count_peptide_mass, state); } } std::sort(count_peptide_mass.begin(), count_peptide_mass.end()); auto last = std::unique(count_peptide_mass.begin(),count_peptide_mass.end()); return std::distance(count_peptide_mass.begin(),last); } unsigned int IdentificationGroup::countPeptideMassSample(ValidationState state) const { std::vector<std::size_t> count_peptide_mass_sample; for (auto & p_protein_match : _protein_match_list) { if (p_protein_match->getValidationState() >= state) { p_protein_match->countPeptideMassSample(count_peptide_mass_sample, state); } } std::sort(count_peptide_mass_sample.begin(), count_peptide_mass_sample.end()); auto last = std::unique(count_peptide_mass_sample.begin(),count_peptide_mass_sample.end()); return std::distance(count_peptide_mass_sample.begin(),last); } unsigned int IdentificationGroup::countDecoyPeptideMassSample(ValidationState state) const { std::vector<std::size_t> count_peptide_mass_sample; for (auto & p_protein_match : _protein_match_list) { if (p_protein_match->getProteinXtpSp().get()->isDecoy()) { if (p_protein_match->getValidationState() >= state) { p_protein_match->countPeptideMassSample(count_peptide_mass_sample, state); } } } std::sort(count_peptide_mass_sample.begin(), count_peptide_mass_sample.end()); auto last = std::unique(count_peptide_mass_sample.begin(),count_peptide_mass_sample.end()); return std::distance(count_peptide_mass_sample.begin(),last); } unsigned int IdentificationGroup::countDecoyProteinMatch(ValidationState state) const { return std::count_if (_protein_match_list.begin(), _protein_match_list.end(), [state](const ProteinMatch * p_protein_match) { if ((p_protein_match->getProteinXtpSp().get()->isDecoy()) && (p_protein_match->getValidationState() >= state)) { return true; } else { return false; } }); } unsigned int IdentificationGroup::countPeptideEvidence(ValidationState state) const { std::set<const PeptideEvidence *> peptide_evidence_set; for (auto & p_protein_match : _protein_match_list) { if (p_protein_match->getValidationState() >= state) { p_protein_match->collectPeptideEvidences(peptide_evidence_set,state); } } return peptide_evidence_set.size(); } unsigned int IdentificationGroup::countDecoyPeptideEvidence(ValidationState state) const { std::set<const PeptideEvidence *> peptide_evidence_set; for (auto & p_protein_match : _protein_match_list) { if (p_protein_match->getValidationState() >= state) { if (p_protein_match->getProteinXtpSp().get()->isDecoy()) { p_protein_match->collectPeptideEvidences(peptide_evidence_set,state); } } } return peptide_evidence_set.size(); } unsigned int IdentificationGroup::countPeptideMatch(ValidationState state) const { unsigned int i=0; for (auto & p_protein_match : _protein_match_list) { if (p_protein_match->getValidationState() >= state) { i+=p_protein_match->countPeptideMatch(state); } } return i; } unsigned int IdentificationGroup::countDecoyPeptideMatch(ValidationState state) const { unsigned int i=0; for (auto & p_protein_match : _protein_match_list) { if (p_protein_match->getValidationState() >= state) { if (p_protein_match->getProteinXtpSp().get()->isDecoy()) { i+=p_protein_match->countPeptideMatch(state); } } } return i; } unsigned int IdentificationGroup::countProteinMatch(ValidationState state) const { return std::count_if (_protein_match_list.begin(), _protein_match_list.end(), [state](const ProteinMatch * p_protein_match) { if (p_protein_match->getValidationState() >= state) { return true; } else { return false; } }); } void IdentificationGroup::updateAutomaticFilters(const AutomaticFilterParameters & automatic_filter_parameters) { qDebug() << "IdentificationGroup::updateAutomaticFilters begin" ; for (IdentificationDataSource * p_identification_source_list:_id_source_list) { p_identification_source_list->getPeptideEvidenceStore().updateAutomaticFilters(automatic_filter_parameters); } for (auto & p_protein_match : _protein_match_list) { p_protein_match->updateAutomaticFilters(automatic_filter_parameters); } if (_p_grp_experiment != nullptr) { } qDebug() << "IdentificationGroup::updateAutomaticFilters end" ; } ProteinMatch * IdentificationGroup::getProteinMatchInstance(const QString accession) { if (accession.isEmpty()) { throw pappso::PappsoException(QObject::tr("Error protein match not found : accession is empty")); } auto it_cache = _cache_accession_protein_match.find(accession); if (it_cache == _cache_accession_protein_match.end()) { //accession not found in cache ProteinMatch * p_protein_match = new ProteinMatch(); _cache_accession_protein_match.insert(std::pair<QString, ProteinMatch *>(accession, p_protein_match)); _protein_match_list.push_back(p_protein_match); return p_protein_match; } else { return it_cache->second; } return nullptr; } void IdentificationGroup::addProteinMatch(ProteinMatch * protein_match) { _protein_match_list.push_back(protein_match); } bool IdentificationGroup::contains (const MsRun * p_msrun) const { for (const MsRunSp & msrun: _ms_run_list) { if (msrun.get() == p_msrun) return true; } return false; } void IdentificationGroup::addIdentificationDataSourceP(IdentificationDataSource * p_identification_source) { addMsRunSp(p_identification_source->getMsRunSp()); auto it = std::find (_id_source_list.begin() ,_id_source_list.end(),p_identification_source); if (it == _id_source_list.end()) { _id_source_list.push_back(p_identification_source); } } void IdentificationGroup::addMsRunSp(MsRunSp ms_run_sp) { auto it = std::find (_ms_run_list.begin() ,_ms_run_list.end(),ms_run_sp); if (it == _ms_run_list.end()) { _ms_run_list.push_back(ms_run_sp); } } const std::vector<MsRunSp> & IdentificationGroup::getMsRunSpList() const { return _ms_run_list; } const std::vector<IdentificationDataSource *> & IdentificationGroup::getIdentificationDataSourceList() const { return _id_source_list; } const std::vector<ProteinMatch *> & IdentificationGroup::getProteinMatchList() const { return _protein_match_list; } std::vector<ProteinMatch *> & IdentificationGroup::getProteinMatchList() { return _protein_match_list; } std::size_t IdentificationGroup::countGroup()const { return _group_store.countGroup(); } std::size_t IdentificationGroup::countSubGroup()const { return _group_store.countSubGroup(); } void IdentificationGroup::collectMhDelta(std::vector< pappso::pappso_double> & delta_list, pappso::PrecisionUnit unit, ValidationState state) const { std::set<const PeptideEvidence *> peptide_evidence_list; for (auto & p_protein_match : _protein_match_list) { if (p_protein_match->getValidationState() >= state) { if (!p_protein_match->getProteinXtpSp().get()->isDecoy()) { //p_protein_match->collectMhDelta(already_counted, delta_list, unit, state); p_protein_match->collectPeptideEvidences(peptide_evidence_list, state); } } } for (const PeptideEvidence * p_peptide_evidence: peptide_evidence_list) { if (unit == pappso::PrecisionUnit::ppm) { delta_list.push_back(p_peptide_evidence->getPpmDeltaMass()); } else { delta_list.push_back(p_peptide_evidence->getDeltaMass()); } } } void IdentificationGroup::startGrouping (const GroupingType & grouping_type, WorkMonitorInterface * p_work_monitor) { qDebug() << "IdentificationGroup::startGrouping begin "; if (_p_grp_experiment != nullptr) { delete _p_grp_experiment; } _p_grp_experiment = GroupingExperiment::newInstance(grouping_type, p_work_monitor); for (ProteinMatch * p_protein_match : _protein_match_list) { p_protein_match->setGroupingExperiment(_p_grp_experiment); } _p_grp_experiment->startGrouping(); _group_store.clear(); for (ProteinMatch * p_protein_match : _protein_match_list) { p_protein_match->setGroupInstance(_group_store); } if (_p_grp_ptm_experiment != nullptr) { delete _p_grp_ptm_experiment; _p_grp_ptm_experiment = nullptr; } qDebug() << "IdentificationGroup::startGrouping end "; } void IdentificationGroup::startPtmGrouping () { qDebug() << "IdentificationGroup::startPtmGrouping begin "; if (_p_grp_ptm_experiment != nullptr) { delete _p_grp_ptm_experiment; _p_grp_ptm_experiment = nullptr; } _p_grp_ptm_experiment = new PtmGroupingExperiment(); _p_grp_ptm_experiment->setValidationState(ValidationState::grouped); for (ProteinMatch * p_protein_match : _protein_match_list) { _p_grp_ptm_experiment->addProteinMatch(p_protein_match); } _p_grp_ptm_experiment->startGrouping(); qDebug() << "IdentificationGroup::startPtmGrouping end "; } const QString IdentificationGroup::getTabName() const { return _ms_run_list[0]->getSampleName(); } pappso::pappso_double IdentificationGroup::getPeptideMassFdr(ValidationState state) const { //ValidationState state = ValidationState::valid; pappso::pappso_double total_peptide=countPeptideMassSample(state); pappso::pappso_double false_peptide=countDecoyPeptideMassSample(state); return (false_peptide/total_peptide); } pappso::pappso_double IdentificationGroup::getProteinFdr(ValidationState state) const { //ValidationState state = ValidationState::valid; pappso::pappso_double total_prot=countProteinMatch(state); pappso::pappso_double false_prot=countDecoyProteinMatch(state); return (false_prot/total_prot); }