Skip to content
Snippets Groups Projects
computeqvalues.cpp 7.57 KiB
Newer Older
/**
 * \file core/qvalue/computeqvalues.cpp
 * \date 04/09/2019
 * \author Olivier Langella
 * \brief compute q-value for each peptide evidence (PSM) of a project
 */

/*******************************************************************************
 * Copyright (c) 2019 Olivier Langella <olivier.langella@u-psud.fr>.
 *
 * This file is part of XTPcpp.
 *
 *     XTPcpp is free software: you can redistribute it and/or modify
 *     it under the terms of the GNU General Public License as published by
 *     the Free Software Foundation, either version 3 of the License, or
 *     (at your option) any later version.
 *
 *     XTPcpp is distributed in the hope that it will be useful,
 *     but WITHOUT ANY WARRANTY; without even the implied warranty of
 *     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *     GNU General Public License for more details.
 *
 *     You should have received a copy of the GNU General Public License
 *     along with XTPcpp.  If not, see <http://www.gnu.org/licenses/>.
 *
 ******************************************************************************/

#include "computeqvalues.h"
#include <pappsomspp/exception/exceptionnotimplemented.h>

ComputeQvalues::ComputeQvalues(ProjectSp project_sp)
{
  std::vector<IdentificationDataSourceSp> identification_source_list =
    project_sp.get()
      ->getIdentificationDataSourceStore()
      .getIdentificationDataSourceList();


  std::vector<PeptideEvidence *> tandem_peptide_evidence_list;
  std::vector<PeptideEvidence *> mascot_peptide_evidence_list;
  for(auto &identification_source_sp : identification_source_list)
    {
      if(identification_source_sp.get()->getIdentificationEngine() ==
         IdentificationEngine::XTandem)
        {
          tandem_peptide_evidence_list.reserve(
            tandem_peptide_evidence_list.size() +
            identification_source_sp.get()->getPeptideEvidenceStore().size());
          for(auto &pe_sp : identification_source_sp.get()
                              ->getPeptideEvidenceStore()
                              .getPeptideEvidenceList())
            {
              tandem_peptide_evidence_list.push_back(pe_sp.get());
            }
        }
      else if(identification_source_sp.get()->getIdentificationEngine() ==
              IdentificationEngine::mascot)
        {
          mascot_peptide_evidence_list.reserve(
            mascot_peptide_evidence_list.size() +
            identification_source_sp.get()->getPeptideEvidenceStore().size());
          for(auto &pe_sp : identification_source_sp.get()
                              ->getPeptideEvidenceStore()
                              .getPeptideEvidenceList())
            {
              mascot_peptide_evidence_list.push_back(pe_sp.get());
            }
        }
      else
        {

          throw pappso::ExceptionNotImplemented(
            QObject::tr("q-value computation for %1 identification engine is "
                        "not yet implemented")
              .arg(
                identification_source_sp.get()->getIdentificationEngineName()));
        }
  for(auto &identification_group :
      project_sp.get()->getIdentificationGroupList())
    {
      for(auto &p_protein_match : identification_group->getProteinMatchList())
        {
          if(p_protein_match->getProteinXtpSp().get()->isDecoy())
            {
              p_protein_match->collectPeptideEvidences(
                m_falsePeptideEvidenceList, ValidationState::notValid);
            }
        }
    }
  computeTandemPeptideEvidenceQvalues(tandem_peptide_evidence_list);

  computeMascotPeptideEvidenceQvalues(mascot_peptide_evidence_list);
}

ComputeQvalues::~ComputeQvalues()
{
}

void
ComputeQvalues::computeMascotPeptideEvidenceQvalues(
  std::vector<PeptideEvidence *> &mascot_peptide_evidence_list) const
{

  // mascot_expectation_value
  std::sort(mascot_peptide_evidence_list.begin(),
            mascot_peptide_evidence_list.end(),
            [](const PeptideEvidence *pepa, const PeptideEvidence *pepb) {
              QVariant evalue_a =
                pepa->getParam(PeptideEvidenceParam::mascot_expectation_value);
              QVariant evalue_b =
                pepb->getParam(PeptideEvidenceParam::mascot_expectation_value);
              if(evalue_a.isNull() || evalue_b.isNull())
                {
                  throw pappso::ExceptionNotImplemented(QObject::tr(
                    "Mascot Evalue is missing, unable to compute q-value"));
                }
              return (evalue_a.toDouble() < evalue_b.toDouble());
            });
  std::size_t count_decoy  = 0;
  std::size_t count_target = 0;
  for(PeptideEvidence *pep : mascot_peptide_evidence_list)
    {
      if(m_falsePeptideEvidenceList.find(pep) !=
         m_falsePeptideEvidenceList.end())
        {
          count_decoy++;
        }
      else
        {
          count_target++;
        }
      double qvalue = Utils::computeFdr(count_decoy, count_target);
      pep->setParam(PeptideEvidenceParam::pappso_qvalue, QVariant(qvalue));
    }

  cleanPeptideEvidenceList(mascot_peptide_evidence_list);
}

void
ComputeQvalues::computeTandemPeptideEvidenceQvalues(
Langella Olivier's avatar
Langella Olivier committed
  std::vector<PeptideEvidence *> &tandem_peptide_evidence_list)
  std::sort(tandem_peptide_evidence_list.begin(),
            tandem_peptide_evidence_list.end(),
            [](const PeptideEvidence *pepa, const PeptideEvidence *pepb) {
              return (pepa->getEvalue() < pepb->getEvalue());
            });
  std::size_t count_decoy  = 0;
  std::size_t count_target = 0;
  for(PeptideEvidence *pep : tandem_peptide_evidence_list)
    {
      if(m_falsePeptideEvidenceList.find(pep) !=
         m_falsePeptideEvidenceList.end())
        {
          count_decoy++;
        }
      else
        {
          count_target++;
        }
      double qvalue = Utils::computeFdr(count_decoy, count_target);
      pep->setParam(PeptideEvidenceParam::pappso_qvalue, QVariant(qvalue));
  cleanPeptideEvidenceList(tandem_peptide_evidence_list);
Langella Olivier's avatar
Langella Olivier committed
  m_tandemPeptideEvidenceList = tandem_peptide_evidence_list;
}

void
ComputeQvalues::cleanPeptideEvidenceList(
  std::vector<PeptideEvidence *> &peptide_evidence_list) const
{
  qDebug() << __FILE__ << " " << __FUNCTION__ << " " << __LINE__;
  // check reverse list to clean q-values
  auto rit = peptide_evidence_list.rbegin();
  while(rit != peptide_evidence_list.rend())
      if((*rit)->getParam(PeptideEvidenceParam::pappso_qvalue).toDouble() >
          (*rit)->setParam(PeptideEvidenceParam::pappso_qvalue,
                           QVariant(qvalue_max));
      qvalue_max =
        (*rit)->getParam(PeptideEvidenceParam::pappso_qvalue).toDouble();
      rit++;
  qDebug() << __FILE__ << " " << __FUNCTION__ << " " << __LINE__;


void
ComputeQvalues::writeDistributionsByEngines(CalcWriterInterface *p_writer) const
{
Langella Olivier's avatar
Langella Olivier committed

  std::size_t count_decoy  = 0;
  std::size_t count_target = 0;
  for(PeptideEvidence *pep : m_tandemPeptideEvidenceList)
    {
      if(m_falsePeptideEvidenceList.find(pep) !=
         m_falsePeptideEvidenceList.end())
        {
          count_decoy++;
        }
      else
        {
          count_target++;
        }
      // double qvalue = Utils::computeFdr(count_decoy, count_target);
      // pep->setParam(PeptideEvidenceParam::pappso_qvalue, QVariant(qvalue));
      p_writer->writeCell((int) count_target);

      p_writer->writeCell((int) count_decoy);

      p_writer->writeCell(pep->getEvalue());

      p_writer->writeCell(pep->getParam(PeptideEvidenceParam::pappso_qvalue).toDouble());
      p_writer->writeLine();
    }