Skip to content
Snippets Groups Projects
Commit 4fe5dbdb authored by Olivier Langella's avatar Olivier Langella
Browse files

parsing accession to find dbxref list

parent 5d349337
No related branches found
No related tags found
No related merge requests found
......@@ -76,9 +76,9 @@ QString ProteinXtp::getOnlyAminoAcidSequence() const {
pappso::pappso_double ProteinXtp::getMass() const {
try {
pappso::Peptide peptide(getOnlyAminoAcidSequence().replace("X",""));
return peptide.getMass();
pappso::Peptide peptide(getOnlyAminoAcidSequence().replace("X",""));
return peptide.getMass();
}
catch (pappso::PappsoException error) {
throw pappso::PappsoException(QObject::tr("Error computing mass for protein %1 :\n%2").arg(getAccession()).arg(error.qwhat()));
......@@ -97,27 +97,64 @@ public:
unsigned int ProteinXtp::countTrypticPeptidesForPAI() const {
qDebug() << "ProteinXtp::countTrypticPeptidesForPAI begin";
try {
pappso::Enzyme kinase;
kinase.setMiscleavage(0);
DigestionHandler digestion;
pappso::ProteinSp protein = std::make_shared<const pappso::Protein>(this->getDescription(),this->getOnlyAminoAcidSequence().replace("X",""));
kinase.eat(0,protein,false,digestion);
unsigned int count = 0;
for (const QString & peptide_str: digestion._peptide_list) {
pappso::Peptide peptide(peptide_str);
pappso::mz mass= peptide.getMass();
if ((mass > 800) && (mass < 2500)) {
count ++;
pappso::Enzyme kinase;
kinase.setMiscleavage(0);
DigestionHandler digestion;
pappso::ProteinSp protein = std::make_shared<const pappso::Protein>(this->getDescription(),this->getOnlyAminoAcidSequence().replace("X",""));
kinase.eat(0,protein,false,digestion);
unsigned int count = 0;
for (const QString & peptide_str: digestion._peptide_list) {
pappso::Peptide peptide(peptide_str);
pappso::mz mass= peptide.getMass();
if ((mass > 800) && (mass < 2500)) {
count ++;
}
}
}
qDebug() << "ProteinXtp::countTrypticPeptidesForPAI end";
return count;
qDebug() << "ProteinXtp::countTrypticPeptidesForPAI end";
return count;
}
catch (pappso::PappsoException error) {
throw pappso::PappsoException(QObject::tr("Error in countTrypticPeptidesForPAI for protein %1 :\n%2").arg(getAccession()).arg(error.qwhat()));
}
}
void ProteinXtp::parseAccession2dbxref() {
QStringList access_list = getAccession().split("\\|");
// if (access.length == 1)
QRegExp atg("^[Aa][Tt][MmCc1-5][Gg]\\d{5}\\.?\\d?$");
QRegExp ncbi_gi("^[0-9]{5,8}$");
QRegExp swiss_prot("^P[A-Z0-9]{5}$");
QRegExp trembl("^[QOA][A-Z0-9]{5}$");
QRegExp ref ("^[NZ]P\\_[0-9]{5,8}$");
for (QString & accession :access_list) {
if (atg.indexIn(accession, 0) != -1) {
QStringList temp = accession.split("\\.");
_dbxref_list.push_back(std::make_pair(ExternalDatabase::AGI_LocusCode, temp.at(0)));
}
if (ncbi_gi.indexIn(accession, 0) != -1) {
_dbxref_list.push_back(std::make_pair(ExternalDatabase::NCBI_gi, accession));
}
if (swiss_prot.indexIn(accession, 0) != -1) {
_dbxref_list.push_back(std::make_pair(ExternalDatabase::SwissProt, accession));
}
if (trembl.indexIn(accession, 0) != -1) {
_dbxref_list.push_back(std::make_pair(ExternalDatabase::TrEMBL, accession));
}
if (ref.indexIn(accession, 0) != -1) {
_dbxref_list.push_back(std::make_pair(ExternalDatabase::ref, accession));
}
}
}
const std::list<std::pair<ExternalDatabase, QString>> & ProteinXtp::getDbxrefList() const {
return _dbxref_list;
}
......@@ -23,7 +23,9 @@
#include <pappsomspp/protein/protein.h>
#include <pappsomspp/types.h>
#include <list>
#include "sequencedatabase.h"
#include "../utils/types.h"
#ifndef PROTEIN_XTP_H
#define PROTEIN_XTP_H
......@@ -61,11 +63,18 @@ public:
/** @brief get protein mass in dalton
*/
pappso::pappso_double getMass() const;
/** @brief look for external database accessions in accesion text
*/
void parseAccession2dbxref();
const std::list<std::pair<ExternalDatabase, QString>> & getDbxrefList() const;
private:
SequenceDatabase * _p_sequence_database;
bool _is_decoy=false;
bool _is_contaminant=false;
std::list<std::pair<ExternalDatabase, QString>> _dbxref_list;
};
#endif // PROTEIN_XTP_H
This diff is collapsed.
......@@ -51,6 +51,8 @@ private :
void writeOboModif(pappso::AaModificationP mod);
void writeCvParam(QString acc, QString value, QString description);
void writeSequence(ProteinMatch * p_protein_match);
void writeProject();
void writeIdentMethod();
private :
QFile * _output_file;
......
......@@ -88,6 +88,7 @@ void ProteinStore::setProteinInformations(ProteinXtpSp & peptide_in) {
peptide_in.get()->setIsContaminant(false);
peptide_in.get()->setIsDecoy(false);
QString accession = peptide_in.get()->getAccession();
peptide_in.get()->parseAccession2dbxref();
if ((!_regexp_contaminant.isEmpty()) && (_regexp_contaminant.indexIn(accession, 0)>-1)) {
//qDebug() << "ProteinStore::setProteinInformations is contaminant " << accession;
......
......@@ -28,6 +28,17 @@
#include <cstdint>
/*********** enumerations *********************************/
/** \def ExternalDatabase external database references
*
*/
enum class ExternalDatabase {
AGI_LocusCode, ///< AGI_LocusCode
NCBI_gi, ///< NCBI_gi
SwissProt, ///< Swiss-Prot
TrEMBL, ///< TrEMBL
ref ///< ref
};
/** \def IdentificationEngine identification engine
*
*/
......
#include "utils.h"
const QUrl Utils::getOlsUrl(QString psimod_accession) {
QString iri(QString("http://purl.obolibrary.org/obo/%1").arg(psimod_accession.replace(":","_")));
QUrl url(QString("http://www.ebi.ac.uk/ols/ontologies/mod/terms?iri=%1").arg(iri));
return url;
}
const QString Utils::getDatabaseName(ExternalDatabase database) {
QString database_name;
switch (database) {
case ExternalDatabase::AGI_LocusCode :
database_name = "AGI_LocusCode";
break;
case ExternalDatabase::NCBI_gi :
database_name = "NCBI_gi";
break;
case ExternalDatabase::SwissProt :
database_name = "Swiss-Prot";
break;
case ExternalDatabase::TrEMBL :
database_name = "TrEMBL";
break;
case ExternalDatabase::ref :
database_name = "ref";
break;
}
return database_name;
}
......@@ -22,6 +22,7 @@
******************************************************************************/
#include<QUrl>
#include <QString>
#include "types.h"
#ifndef UTILS_H
#define UTILS_H
......@@ -29,6 +30,7 @@ class Utils
{
public:
static const QUrl getOlsUrl(QString modification);
static const QString getDatabaseName(ExternalDatabase database);
};
#endif // UTILS_H
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment