Newer
Older
/*******************************************************************************
* Copyright (c) 2017 Olivier Langella <olivier.langella@u-psud.fr>.
*
* This file is part of XTPcpp.
*
* XTPcpp is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* XTPcpp is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with XTPcpp. If not, see <http://www.gnu.org/licenses/>.
*
* Contributors:
* Olivier Langella <olivier.langella@u-psud.fr> - initial API and implementation
******************************************************************************/
#include "proteinxtp.h"
Olivier Langella
committed
#include <pappsomspp/pappsoexception.h>
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
DbXref::DbXref() {
}
DbXref::DbXref(ExternalDatabase database_in, const QString & accession_in):database(database_in), accession(accession_in) {
}
DbXref::DbXref(const DbXref & other):database(other.database), accession(other.accession) {
}
const QUrl DbXref::getUrl() const {
QUrl url;
qDebug() << "DbXref::getUrl " << accession << " "<< (std::uint8_t)database;
switch (database) {
case ExternalDatabase::AGI_LocusCode :
url.setUrl(QString("http://www.arabidopsis.org/servlets/TairObject?type=locus&name=%1").arg(accession));
break;
case ExternalDatabase::NCBI_gi :
url.setUrl(QString("http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Retrieve&db=protein&dopt=GenBank&list_uids=%1").arg(accession));
break;
case ExternalDatabase::SwissProt :
url.setUrl(QString("http://www.uniprot.org/uniprot/%1").arg(accession));
break;
case ExternalDatabase::TrEMBL :
url.setUrl(QString("http://www.uniprot.org/uniprot/%1").arg(accession));
break;
case ExternalDatabase::ref :
break;
case ExternalDatabase::OboPsiMod :
QString iri(QString("http://purl.obolibrary.org/obo/%1").arg(QString(accession).replace(":","_")));
url.setUrl(QString("http://www.ebi.ac.uk/ols/ontologies/mod/terms?iri=%1").arg(iri));
break;
}
qDebug() << "DbXref::getUrl end " << url;
return url;
}
ProteinXtp::ProteinXtp():pappso::Protein()
{
}
ProteinXtp::ProteinXtp(const ProteinXtp& other):pappso::Protein(other)
{
_is_decoy=other._is_decoy;
_is_contaminant=other._is_contaminant;
}
ProteinXtp::~ProteinXtp()
{
}
bool ProteinXtp::operator==(const ProteinXtp& other) const
{
return pappso::Protein::operator==(other);
}
ProteinXtpSp ProteinXtp::makeProteinXtpSp() const {
return std::make_shared<ProteinXtp>(*this);
}
void ProteinXtp::setCompleteDescription(const QString & full_description) {
setAccession (full_description.simplified().section(" ", 0,0));
setDescription (full_description.simplified().section(" ", 1));
void ProteinXtp::setIsContaminant(bool conta) {
_is_contaminant = conta;
}
void ProteinXtp::setIsDecoy(bool conta) {
_is_decoy = conta;
}
bool ProteinXtp::isContaminant() const {
return _is_contaminant;
}
bool ProteinXtp::isDecoy() const {
return _is_decoy;
}
QString ProteinXtp::getOnlyAminoAcidSequence() const {
QString sequence(this->getSequence());
return sequence.replace(QRegExp("\\*"), "");
}
pappso::pappso_double ProteinXtp::getMass() const {
Olivier Langella
committed
try {
pappso::Peptide peptide(getOnlyAminoAcidSequence().replace("X",""));
return peptide.getMass();
Olivier Langella
committed
}
catch (pappso::PappsoException error) {
throw pappso::PappsoException(QObject::tr("Error computing mass for protein %1 :\n%2").arg(getAccession()).arg(error.qwhat()));
}
class DigestionHandler: public pappso::EnzymeProductInterface {
public:
void setPeptide(std::int8_t sequence_database_id, const pappso::ProteinSp & protein_sp, bool is_decoy, const QString& peptide, unsigned int start, bool is_nter, unsigned int missed_cleavage_number, bool semi_enzyme) override {
_peptide_list.append(peptide);
};
QStringList _peptide_list;
};
unsigned int ProteinXtp::countTrypticPeptidesForPAI() const {
qDebug() << "ProteinXtp::countTrypticPeptidesForPAI begin";
Olivier Langella
committed
try {
pappso::Enzyme kinase;
kinase.setMiscleavage(0);
DigestionHandler digestion;
pappso::ProteinSp protein = std::make_shared<const pappso::Protein>(*this);
kinase.eat(0,protein,false,digestion);
unsigned int count = 0;
for (const QString & peptide_str: digestion._peptide_list) {
pappso::Peptide peptide(peptide_str);
pappso::mz mass= peptide.getMass();
if ((mass > 800) && (mass < 2500)) {
count ++;
}
qDebug() << "ProteinXtp::countTrypticPeptidesForPAI end";
return count;
Olivier Langella
committed
}
catch (pappso::PappsoException error) {
throw pappso::PappsoException(QObject::tr("Error in countTrypticPeptidesForPAI for protein %1 :\n%2").arg(getAccession()).arg(error.qwhat()));
}
void ProteinXtp::parseAccession2dbxref() {
qDebug() << "ProteinXtp::parseAccession2dbxref begin" ;
// if (access.length == 1)
QRegExp atg("^[Aa][Tt][MmCc1-5][Gg]\\d{5}\\.?\\d?$");
QRegExp ncbi_gi("^[0-9]{5,8}$");
QRegExp swiss_prot("^P[A-Z0-9]{5}$");
QRegExp trembl("^[QOA][A-Z0-9]{5}$");
QRegExp ref ("^[NZ]P\\_[0-9]{5,8}$");
for (unsigned int i=0; i < access_list.size(); i++) {
accession = access_list.at(i);
qDebug() << "ProteinXtp::parseAccession2dbxref accession " << accession;
if (atg.indexIn(accession, 0) != -1) {
_dbxref_list.push_back(DbXref(ExternalDatabase::AGI_LocusCode, temp.at(0)));
if (swiss_prot.indexIn(accession, 0) != -1) {
qDebug() << "ProteinXtp::parseAccession2dbxref accession SwissProt " << accession;
_dbxref_list.push_back(DbXref(ExternalDatabase::SwissProt, accession));
if (trembl.indexIn(accession, 0) != -1) {
qDebug() << "ProteinXtp::parseAccession2dbxref accession TrEMBL " << accession;
_dbxref_list.push_back(DbXref(ExternalDatabase::TrEMBL, accession));
}
if (ref.indexIn(accession, 0) != -1) {
_dbxref_list.push_back(DbXref(ExternalDatabase::ref, accession));
continue;
}
if (i > 0) {
if ((access_list.at(i-1) == "sp")&&(accession.size() == 6)) {
_dbxref_list.push_back(DbXref(ExternalDatabase::SwissProt, accession));
}
if ((access_list.at(i-1) == "tr")&&(accession.size() == 6)) {
_dbxref_list.push_back(DbXref(ExternalDatabase::TrEMBL, accession));
}
if ((access_list.at(i-1) == "gb")&&(ncbi_gi.indexIn(accession, 0) != -1)) {
_dbxref_list.push_back(DbXref(ExternalDatabase::NCBI_gi, accession));
}
}
if (_dbxref_list.size() > 1) {
_dbxref_list.sort([] (const DbXref & first, const DbXref & second)
{
return std::tie(first.database, first.accession) < std::tie(second.database, second.accession);
});
_dbxref_list.unique([] (const DbXref & first, const DbXref & second)
{
return ( first.database == second.database ) && (first.accession == second.accession);
});
qDebug() << "ProteinXtp::parseAccession2dbxref end" ;
const std::list<DbXref> & ProteinXtp::getDbxrefList() const {