/******************************************************************************* * Copyright (c) 2017 Olivier Langella <olivier.langella@u-psud.fr>. * * This file is part of XTPcpp. * * XTPcpp is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * XTPcpp is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with XTPcpp. If not, see <http://www.gnu.org/licenses/>. * * Contributors: * Olivier Langella <olivier.langella@u-psud.fr> - initial API and implementation ******************************************************************************/ #include "proteinxtp.h" #include <pappsomspp/protein/enzyme.h> #include <pappsomspp/pappsoexception.h> DbXref::DbXref() { } DbXref::DbXref(ExternalDatabase database_in, const QString & accession_in):database(database_in), accession(accession_in) { } DbXref::DbXref(const DbXref & other):database(other.database), accession(other.accession) { } const QUrl DbXref::getUrl() const { QUrl url; qDebug() << "DbXref::getUrl " << accession << " "<< (std::uint8_t)database; switch (database) { case ExternalDatabase::AGI_LocusCode : url.setUrl(QString("http://www.arabidopsis.org/servlets/TairObject?type=locus&name=%1").arg(accession)); break; case ExternalDatabase::NCBI_gi : url.setUrl(QString("http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Retrieve&db=protein&dopt=GenBank&list_uids=%1").arg(accession)); break; case ExternalDatabase::SwissProt : url.setUrl(QString("http://www.uniprot.org/uniprot/%1").arg(accession)); break; case ExternalDatabase::TrEMBL : url.setUrl(QString("http://www.uniprot.org/uniprot/%1").arg(accession)); break; case ExternalDatabase::ref : break; case ExternalDatabase::OboPsiMod : QString iri(QString("http://purl.obolibrary.org/obo/%1").arg(QString(accession).replace(":","_"))); url.setUrl(QString("http://www.ebi.ac.uk/ols/ontologies/mod/terms?iri=%1").arg(iri)); break; } qDebug() << "DbXref::getUrl end " << url; return url; } ProteinXtp::ProteinXtp():pappso::Protein() { } ProteinXtp::ProteinXtp(const ProteinXtp& other):pappso::Protein(other) { _is_decoy=other._is_decoy; _is_contaminant=other._is_contaminant; _p_sequence_database = other._p_sequence_database; } ProteinXtp::~ProteinXtp() { } bool ProteinXtp::operator==(const ProteinXtp& other) const { return pappso::Protein::operator==(other); } ProteinXtpSp ProteinXtp::makeProteinXtpSp() const { return std::make_shared<ProteinXtp>(*this); } void ProteinXtp::setCompleteDescription(const QString & full_description) { setAccession (full_description.simplified().section(" ", 0,0)); setDescription (full_description.simplified().section(" ", 1)); } void ProteinXtp::setIsContaminant(bool conta) { _is_contaminant = conta; } void ProteinXtp::setIsDecoy(bool conta) { _is_decoy = conta; } bool ProteinXtp::isContaminant() const { return _is_contaminant; } bool ProteinXtp::isDecoy() const { return _is_decoy; } QString ProteinXtp::getOnlyAminoAcidSequence() const { QString sequence(this->getSequence()); return sequence.replace(QRegExp("\\*"), ""); } pappso::pappso_double ProteinXtp::getMass() const { try { pappso::Peptide peptide(getOnlyAminoAcidSequence().replace("X","")); return peptide.getMass(); } catch (pappso::PappsoException error) { throw pappso::PappsoException(QObject::tr("Error computing mass for protein %1 :\n%2").arg(getAccession()).arg(error.qwhat())); } } class DigestionHandler: public pappso::EnzymeProductInterface { public: void setPeptide(std::int8_t sequence_database_id, const pappso::ProteinSp & protein_sp, bool is_decoy, const QString& peptide, unsigned int start, bool is_nter, unsigned int missed_cleavage_number, bool semi_enzyme) override { _peptide_list.append(peptide); }; QStringList _peptide_list; }; unsigned int ProteinXtp::countTrypticPeptidesForPAI() const { qDebug() << "ProteinXtp::countTrypticPeptidesForPAI begin"; try { pappso::Enzyme kinase; kinase.setMiscleavage(0); kinase.setTakeOnlyFirstWildcard(true); DigestionHandler digestion; pappso::ProteinSp protein = std::make_shared<const pappso::Protein>(*this); kinase.eat(0,protein,false,digestion); unsigned int count = 0; for (const QString & peptide_str: digestion._peptide_list) { pappso::Peptide peptide(peptide_str); pappso::mz mass= peptide.getMass(); if ((mass > 800) && (mass < 2500)) { count ++; } } qDebug() << "ProteinXtp::countTrypticPeptidesForPAI end"; return count; } catch (pappso::PappsoException error) { throw pappso::PappsoException(QObject::tr("Error in countTrypticPeptidesForPAI for protein %1 :\n%2").arg(getAccession()).arg(error.qwhat())); } } void ProteinXtp::parseAccession2dbxref() { qDebug() << "ProteinXtp::parseAccession2dbxref begin" ; QStringList access_list = getAccession().split("|"); // if (access.length == 1) QRegExp atg("^[Aa][Tt][MmCc1-5][Gg]\\d{5}\\.?\\d?$"); QRegExp ncbi_gi("^[0-9]{5,8}$"); QRegExp swiss_prot("^P[A-Z0-9]{5}$"); QRegExp trembl("^[QOA][A-Z0-9]{5}$"); QRegExp ref ("^[NZ]P\\_[0-9]{5,8}$"); QString accession; for (unsigned int i=0; i < access_list.size(); i++) { accession = access_list.at(i); qDebug() << "ProteinXtp::parseAccession2dbxref accession " << accession; if (atg.indexIn(accession, 0) != -1) { QStringList temp = accession.split("."); _dbxref_list.push_back(DbXref(ExternalDatabase::AGI_LocusCode, temp.at(0))); continue; } if (swiss_prot.indexIn(accession, 0) != -1) { qDebug() << "ProteinXtp::parseAccession2dbxref accession SwissProt " << accession; _dbxref_list.push_back(DbXref(ExternalDatabase::SwissProt, accession)); continue; } if (trembl.indexIn(accession, 0) != -1) { qDebug() << "ProteinXtp::parseAccession2dbxref accession TrEMBL " << accession; _dbxref_list.push_back(DbXref(ExternalDatabase::TrEMBL, accession)); continue; } if (ref.indexIn(accession, 0) != -1) { _dbxref_list.push_back(DbXref(ExternalDatabase::ref, accession)); continue; } if (i > 0) { if ((access_list.at(i-1) == "sp")&&(accession.size() == 6)) { _dbxref_list.push_back(DbXref(ExternalDatabase::SwissProt, accession)); } if ((access_list.at(i-1) == "tr")&&(accession.size() == 6)) { _dbxref_list.push_back(DbXref(ExternalDatabase::TrEMBL, accession)); } if ((access_list.at(i-1) == "gb")&&(ncbi_gi.indexIn(accession, 0) != -1)) { _dbxref_list.push_back(DbXref(ExternalDatabase::NCBI_gi, accession)); } } if (_dbxref_list.size() > 1) { _dbxref_list.sort([] (const DbXref & first, const DbXref & second) { return std::tie(first.database, first.accession) < std::tie(second.database, second.accession); }); _dbxref_list.unique([] (const DbXref & first, const DbXref & second) { return ( first.database == second.database ) && (first.accession == second.accession); }); } } qDebug() << "ProteinXtp::parseAccession2dbxref end" ; } const std::list<DbXref> & ProteinXtp::getDbxrefList() const { return _dbxref_list; }