/******************************************************************************* * Copyright (c) 2017 Olivier Langella <olivier.langella@u-psud.fr>. * * This file is part of XTPcpp. * * XTPcpp is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * XTPcpp is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with XTPcpp. If not, see <http://www.gnu.org/licenses/>. * * Contributors: * Olivier Langella <olivier.langella@u-psud.fr> - initial API and implementation ******************************************************************************/ #include "proteinxtp.h" #include <pappsomspp/protein/enzyme.h> #include <pappsomspp/pappsoexception.h> ProteinXtp::ProteinXtp():pappso::Protein() { } ProteinXtp::ProteinXtp(const ProteinXtp& other):pappso::Protein(other) { _is_decoy=other._is_decoy; _is_contaminant=other._is_contaminant; _p_sequence_database = other._p_sequence_database; } ProteinXtp::~ProteinXtp() { } bool ProteinXtp::operator==(const ProteinXtp& other) const { return pappso::Protein::operator==(other); } ProteinXtpSp ProteinXtp::makeProteinXtpSp() const { return std::make_shared<ProteinXtp>(*this); } void ProteinXtp::setCompleteDescription(const QString & full_description) { setAccession (full_description.simplified().section(" ", 0,0)); setDescription (full_description.simplified().section(" ", 1)); } void ProteinXtp::setIsContaminant(bool conta) { _is_contaminant = conta; } void ProteinXtp::setIsDecoy(bool conta) { _is_decoy = conta; } bool ProteinXtp::isContaminant() const { return _is_contaminant; } bool ProteinXtp::isDecoy() const { return _is_decoy; } QString ProteinXtp::getOnlyAminoAcidSequence() const { QString sequence(this->getSequence()); return sequence.replace(QRegExp("\\*"), ""); } pappso::pappso_double ProteinXtp::getMass() const { try { pappso::Peptide peptide(getOnlyAminoAcidSequence().replace("X","")); return peptide.getMass(); } catch (pappso::PappsoException error) { throw pappso::PappsoException(QObject::tr("Error computing mass for protein %1 :\n%2").arg(getAccession()).arg(error.qwhat())); } } class DigestionHandler: public pappso::EnzymeProductInterface { public: void setPeptide(std::int8_t sequence_database_id, const pappso::ProteinSp & protein_sp, bool is_decoy, const QString& peptide, unsigned int start, bool is_nter, unsigned int missed_cleavage_number, bool semi_enzyme) override { _peptide_list.append(peptide); }; QStringList _peptide_list; }; unsigned int ProteinXtp::countTrypticPeptidesForPAI() const { qDebug() << "ProteinXtp::countTrypticPeptidesForPAI begin"; try { pappso::Enzyme kinase; kinase.setMiscleavage(0); DigestionHandler digestion; pappso::ProteinSp protein = std::make_shared<const pappso::Protein>(this->getDescription(),this->getOnlyAminoAcidSequence().replace("X","")); kinase.eat(0,protein,false,digestion); unsigned int count = 0; for (const QString & peptide_str: digestion._peptide_list) { pappso::Peptide peptide(peptide_str); pappso::mz mass= peptide.getMass(); if ((mass > 800) && (mass < 2500)) { count ++; } } qDebug() << "ProteinXtp::countTrypticPeptidesForPAI end"; return count; } catch (pappso::PappsoException error) { throw pappso::PappsoException(QObject::tr("Error in countTrypticPeptidesForPAI for protein %1 :\n%2").arg(getAccession()).arg(error.qwhat())); } } void ProteinXtp::parseAccession2dbxref() { QStringList access_list = getAccession().split("\\|"); // if (access.length == 1) QRegExp atg("^[Aa][Tt][MmCc1-5][Gg]\\d{5}\\.?\\d?$"); QRegExp ncbi_gi("^[0-9]{5,8}$"); QRegExp swiss_prot("^P[A-Z0-9]{5}$"); QRegExp trembl("^[QOA][A-Z0-9]{5}$"); QRegExp ref ("^[NZ]P\\_[0-9]{5,8}$"); for (QString & accession :access_list) { if (atg.indexIn(accession, 0) != -1) { QStringList temp = accession.split("\\."); _dbxref_list.push_back(std::make_pair(ExternalDatabase::AGI_LocusCode, temp.at(0))); } if (ncbi_gi.indexIn(accession, 0) != -1) { _dbxref_list.push_back(std::make_pair(ExternalDatabase::NCBI_gi, accession)); } if (swiss_prot.indexIn(accession, 0) != -1) { _dbxref_list.push_back(std::make_pair(ExternalDatabase::SwissProt, accession)); } if (trembl.indexIn(accession, 0) != -1) { _dbxref_list.push_back(std::make_pair(ExternalDatabase::TrEMBL, accession)); } if (ref.indexIn(accession, 0) != -1) { _dbxref_list.push_back(std::make_pair(ExternalDatabase::ref, accession)); } } } const std::list<std::pair<ExternalDatabase, QString>> & ProteinXtp::getDbxrefList() const { return _dbxref_list; }