Newer
Older
/*******************************************************************************
* Copyright (c) 2017 Olivier Langella <olivier.langella@u-psud.fr>.
*
* This file is part of XTPcpp.
*
* XTPcpp is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* XTPcpp is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with XTPcpp. If not, see <http://www.gnu.org/licenses/>.
*
* Contributors:
* Olivier Langella <olivier.langella@u-psud.fr> - initial API and implementation
******************************************************************************/
#include "proteinxtp.h"
Olivier Langella
committed
#include <pappsomspp/pappsoexception.h>
ProteinXtp::ProteinXtp():pappso::Protein()
{
}
ProteinXtp::ProteinXtp(const ProteinXtp& other):pappso::Protein(other)
{
_is_decoy=other._is_decoy;
_is_contaminant=other._is_contaminant;
}
ProteinXtp::~ProteinXtp()
{
}
bool ProteinXtp::operator==(const ProteinXtp& other) const
{
return pappso::Protein::operator==(other);
}
ProteinXtpSp ProteinXtp::makeProteinXtpSp() const {
return std::make_shared<ProteinXtp>(*this);
}
void ProteinXtp::setCompleteDescription(const QString & full_description) {
setAccession (full_description.simplified().section(" ", 0,0));
setDescription (full_description.simplified().section(" ", 1));
void ProteinXtp::setIsContaminant(bool conta) {
_is_contaminant = conta;
}
void ProteinXtp::setIsDecoy(bool conta) {
_is_decoy = conta;
}
bool ProteinXtp::isContaminant() const {
return _is_contaminant;
}
bool ProteinXtp::isDecoy() const {
return _is_decoy;
}
QString ProteinXtp::getOnlyAminoAcidSequence() const {
QString sequence(this->getSequence());
return sequence.replace(QRegExp("\\*"), "");
}
pappso::pappso_double ProteinXtp::getMass() const {
Olivier Langella
committed
try {
pappso::Peptide peptide(getOnlyAminoAcidSequence().replace("X",""));
return peptide.getMass();
Olivier Langella
committed
}
catch (pappso::PappsoException error) {
throw pappso::PappsoException(QObject::tr("Error computing mass for protein %1 :\n%2").arg(getAccession()).arg(error.qwhat()));
}
class DigestionHandler: public pappso::EnzymeProductInterface {
public:
void setPeptide(std::int8_t sequence_database_id, const pappso::ProteinSp & protein_sp, bool is_decoy, const QString& peptide, unsigned int start, bool is_nter, unsigned int missed_cleavage_number, bool semi_enzyme) override {
_peptide_list.append(peptide);
};
QStringList _peptide_list;
};
unsigned int ProteinXtp::countTrypticPeptidesForPAI() const {
qDebug() << "ProteinXtp::countTrypticPeptidesForPAI begin";
Olivier Langella
committed
try {
pappso::Enzyme kinase;
kinase.setMiscleavage(0);
DigestionHandler digestion;
pappso::ProteinSp protein = std::make_shared<const pappso::Protein>(this->getDescription(),this->getOnlyAminoAcidSequence().replace("X",""));
kinase.eat(0,protein,false,digestion);
unsigned int count = 0;
for (const QString & peptide_str: digestion._peptide_list) {
pappso::Peptide peptide(peptide_str);
pappso::mz mass= peptide.getMass();
if ((mass > 800) && (mass < 2500)) {
count ++;
}
qDebug() << "ProteinXtp::countTrypticPeptidesForPAI end";
return count;
Olivier Langella
committed
}
catch (pappso::PappsoException error) {
throw pappso::PappsoException(QObject::tr("Error in countTrypticPeptidesForPAI for protein %1 :\n%2").arg(getAccession()).arg(error.qwhat()));
}
void ProteinXtp::parseAccession2dbxref() {
qDebug() << "ProteinXtp::parseAccession2dbxref begin" ;
// if (access.length == 1)
QRegExp atg("^[Aa][Tt][MmCc1-5][Gg]\\d{5}\\.?\\d?$");
QRegExp ncbi_gi("^[0-9]{5,8}$");
QRegExp swiss_prot("^P[A-Z0-9]{5}$");
QRegExp trembl("^[QOA][A-Z0-9]{5}$");
QRegExp ref ("^[NZ]P\\_[0-9]{5,8}$");
for (QString & accession :access_list) {
qDebug() << "ProteinXtp::parseAccession2dbxref accession " << accession;
if (atg.indexIn(accession, 0) != -1) {
QStringList temp = accession.split("\\.");
_dbxref_list.push_back(DbXref(ExternalDatabase::AGI_LocusCode, temp.at(0)));
}
if (ncbi_gi.indexIn(accession, 0) != -1) {
_dbxref_list.push_back(DbXref(ExternalDatabase::NCBI_gi, accession));
if (swiss_prot.indexIn(accession, 0) != -1) {
qDebug() << "ProteinXtp::parseAccession2dbxref accession SwissProt " << accession;
_dbxref_list.push_back(DbXref(ExternalDatabase::SwissProt, accession));
if (trembl.indexIn(accession, 0) != -1) {
qDebug() << "ProteinXtp::parseAccession2dbxref accession TrEMBL " << accession;
_dbxref_list.push_back(DbXref(ExternalDatabase::TrEMBL, accession));
}
if (ref.indexIn(accession, 0) != -1) {
_dbxref_list.push_back(DbXref(ExternalDatabase::ref, accession));
_dbxref_list.sort([] (const DbXref & first, const DbXref & second)
return std::tie(first.database, first.accession) < std::tie(second.database, second.accession);
});
_dbxref_list.unique([] (const DbXref & first, const DbXref & second)
{
return ( first.database == second.database ) && (first.accession == second.accession);
});
}
qDebug() << "ProteinXtp::parseAccession2dbxref end" ;
const std::list<DbXref> & ProteinXtp::getDbxrefList() const {