Skip to content
Snippets Groups Projects
proteinxtp.cpp 7.99 KiB
Newer Older

/*******************************************************************************
* Copyright (c) 2017 Olivier Langella <olivier.langella@u-psud.fr>.
*
* This file is part of XTPcpp.
*
*     XTPcpp is free software: you can redistribute it and/or modify
*     it under the terms of the GNU General Public License as published by
*     the Free Software Foundation, either version 3 of the License, or
*     (at your option) any later version.
*
*     XTPcpp is distributed in the hope that it will be useful,
*     but WITHOUT ANY WARRANTY; without even the implied warranty of
*     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
*     GNU General Public License for more details.
*
*     You should have received a copy of the GNU General Public License
*     along with XTPcpp.  If not, see <http://www.gnu.org/licenses/>.
*
* Contributors:
*     Olivier Langella <olivier.langella@u-psud.fr> - initial API and implementation
******************************************************************************/

#include "proteinxtp.h"
Olivier Langella's avatar
Olivier Langella committed
#include <pappsomspp/protein/enzyme.h>
Olivier Langella's avatar
Olivier Langella committed
DbXref::DbXref() {
}
DbXref::DbXref(ExternalDatabase database_in, const QString & accession_in):database(database_in), accession(accession_in) {
}

DbXref::DbXref(const DbXref & other):database(other.database), accession(other.accession) {
}
const QUrl DbXref::getUrl() const {
    QUrl url;
    qDebug() << "DbXref::getUrl " << accession << " "<< (std::uint8_t)database;
    switch (database) {

    case ExternalDatabase::AGI_LocusCode :
        url.setUrl(QString("http://www.arabidopsis.org/servlets/TairObject?type=locus&name=%1").arg(accession));
        break;
    case ExternalDatabase::NCBI_gi :
        url.setUrl(QString("http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Retrieve&db=protein&dopt=GenBank&list_uids=%1").arg(accession));
        break;

    case ExternalDatabase::SwissProt :
        url.setUrl(QString("http://www.uniprot.org/uniprot/%1").arg(accession));
        break;
    case ExternalDatabase::TrEMBL :
        url.setUrl(QString("http://www.uniprot.org/uniprot/%1").arg(accession));
        break;
    case ExternalDatabase::ref :
        break;

    case ExternalDatabase::OboPsiMod :
        QString iri(QString("http://purl.obolibrary.org/obo/%1").arg(QString(accession).replace(":","_")));
        url.setUrl(QString("http://www.ebi.ac.uk/ols/ontologies/mod/terms?iri=%1").arg(iri));
        break;

    }
    qDebug() << "DbXref::getUrl end " << url;
    return url;
}

ProteinXtp::ProteinXtp():pappso::Protein()
{

}

ProteinXtp::ProteinXtp(const ProteinXtp& other):pappso::Protein(other)
{
    _is_decoy=other._is_decoy;
    _is_contaminant=other._is_contaminant;
Olivier Langella's avatar
Olivier Langella committed
    _p_sequence_database = other._p_sequence_database;
}

ProteinXtp::~ProteinXtp()
{

}

bool ProteinXtp::operator==(const ProteinXtp& other) const
{
    return pappso::Protein::operator==(other);
}

ProteinXtpSp ProteinXtp::makeProteinXtpSp() const {
    return std::make_shared<ProteinXtp>(*this);
}

void ProteinXtp::setCompleteDescription(const QString & full_description) {
    setAccession (full_description.simplified().section(" ", 0,0));
    setDescription (full_description.simplified().section(" ", 1));
Olivier Langella's avatar
Olivier Langella committed
void ProteinXtp::setIsContaminant(bool conta) {
    _is_contaminant = conta;
}
void ProteinXtp::setIsDecoy(bool conta) {
    _is_decoy = conta;
}
bool ProteinXtp::isContaminant() const {
    return _is_contaminant;
}
bool ProteinXtp::isDecoy() const {
    return _is_decoy;
}
QString ProteinXtp::getOnlyAminoAcidSequence() const {
    QString sequence(this->getSequence());
    return sequence.replace(QRegExp("\\*"), "");
}

pappso::pappso_double ProteinXtp::getMass() const {
        pappso::Peptide peptide(getOnlyAminoAcidSequence().replace("X",""));
        return peptide.getMass();

    }
    catch (pappso::PappsoException error) {
        throw pappso::PappsoException(QObject::tr("Error computing mass for protein %1 :\n%2").arg(getAccession()).arg(error.qwhat()));
    }
Olivier Langella's avatar
Olivier Langella committed
class DigestionHandler: public pappso::EnzymeProductInterface {
public:
    void setPeptide(std::int8_t sequence_database_id, const pappso::ProteinSp & protein_sp, bool is_decoy, const QString& peptide, unsigned int start, bool is_nter, unsigned int missed_cleavage_number, bool semi_enzyme) override {
        _peptide_list.append(peptide);
    };

    QStringList _peptide_list;
};

unsigned int ProteinXtp::countTrypticPeptidesForPAI() const {
    qDebug() << "ProteinXtp::countTrypticPeptidesForPAI begin";
        pappso::Enzyme kinase;
        kinase.setMiscleavage(0);
        kinase.setTakeOnlyFirstWildcard(true);
        DigestionHandler digestion;

        pappso::ProteinSp protein = std::make_shared<const pappso::Protein>(*this);
        kinase.eat(0,protein,false,digestion);

        unsigned int count = 0;
        for (const QString & peptide_str: digestion._peptide_list) {
            pappso::Peptide peptide(peptide_str);
            pappso::mz mass= peptide.getMass();
            if ((mass > 800) && (mass < 2500)) {
                count ++;
            }
        qDebug() << "ProteinXtp::countTrypticPeptidesForPAI end";
        return count;

    }
    catch (pappso::PappsoException error) {
        throw pappso::PappsoException(QObject::tr("Error in countTrypticPeptidesForPAI for protein %1 :\n%2").arg(getAccession()).arg(error.qwhat()));
    }
Olivier Langella's avatar
Olivier Langella committed
}


void ProteinXtp::parseAccession2dbxref() {
Olivier Langella's avatar
Olivier Langella committed
    qDebug() << "ProteinXtp::parseAccession2dbxref begin" ;
Olivier Langella's avatar
Olivier Langella committed
    QStringList access_list = getAccession().split("|");
    // if (access.length == 1)
    QRegExp atg("^[Aa][Tt][MmCc1-5][Gg]\\d{5}\\.?\\d?$");
    QRegExp ncbi_gi("^[0-9]{5,8}$");
    QRegExp swiss_prot("^P[A-Z0-9]{5}$");
    QRegExp trembl("^[QOA][A-Z0-9]{5}$");
    QRegExp ref ("^[NZ]P\\_[0-9]{5,8}$");
Olivier Langella's avatar
Olivier Langella committed
    QString accession;
Olivier Langella's avatar
Olivier Langella committed
    for (unsigned int i=0; i < access_list.size(); i++) {
        accession = access_list.at(i);
Olivier Langella's avatar
Olivier Langella committed
        qDebug() << "ProteinXtp::parseAccession2dbxref accession " << accession;
        if (atg.indexIn(accession, 0) != -1) {
Olivier Langella's avatar
Olivier Langella committed
            QStringList temp = accession.split(".");
Olivier Langella's avatar
Olivier Langella committed
            _dbxref_list.push_back(DbXref(ExternalDatabase::AGI_LocusCode, temp.at(0)));
Olivier Langella's avatar
Olivier Langella committed
            continue;
        if (swiss_prot.indexIn(accession, 0) != -1) {
Olivier Langella's avatar
Olivier Langella committed
            qDebug() << "ProteinXtp::parseAccession2dbxref accession SwissProt " << accession;
Olivier Langella's avatar
Olivier Langella committed
            _dbxref_list.push_back(DbXref(ExternalDatabase::SwissProt, accession));
Olivier Langella's avatar
Olivier Langella committed
            continue;
        if (trembl.indexIn(accession, 0) != -1) {
Olivier Langella's avatar
Olivier Langella committed
            qDebug() << "ProteinXtp::parseAccession2dbxref accession TrEMBL " << accession;
Olivier Langella's avatar
Olivier Langella committed
            _dbxref_list.push_back(DbXref(ExternalDatabase::TrEMBL, accession));
Olivier Langella's avatar
Olivier Langella committed
            continue;
        }
        if (ref.indexIn(accession, 0) != -1) {
Olivier Langella's avatar
Olivier Langella committed
            _dbxref_list.push_back(DbXref(ExternalDatabase::ref, accession));
Olivier Langella's avatar
Olivier Langella committed
            continue;
        }
        if (i > 0) {
            if ((access_list.at(i-1) == "sp")&&(accession.size() == 6)) {
                _dbxref_list.push_back(DbXref(ExternalDatabase::SwissProt, accession));
            }

            if ((access_list.at(i-1) == "tr")&&(accession.size() == 6)) {
                _dbxref_list.push_back(DbXref(ExternalDatabase::TrEMBL, accession));
            }
            if ((access_list.at(i-1) == "gb")&&(ncbi_gi.indexIn(accession, 0) != -1)) {
                _dbxref_list.push_back(DbXref(ExternalDatabase::NCBI_gi, accession));
            }
Olivier Langella's avatar
Olivier Langella committed

        }

        if (_dbxref_list.size() > 1) {
            _dbxref_list.sort([] (const DbXref & first, const DbXref & second)
            {
                return std::tie(first.database, first.accession) < std::tie(second.database, second.accession);
            });
            _dbxref_list.unique([] (const DbXref & first, const DbXref & second)
            {
                return ( first.database == second.database ) && (first.accession == second.accession);
            });
Olivier Langella's avatar
Olivier Langella committed

    qDebug() << "ProteinXtp::parseAccession2dbxref end" ;
Olivier Langella's avatar
Olivier Langella committed
const std::list<DbXref> & ProteinXtp::getDbxrefList() const {
    return _dbxref_list;
}