Skip to content
Snippets Groups Projects
Commit 02099c63 authored by Olivier Langella's avatar Olivier Langella
Browse files

parsing xpip WIP

parent 319be7eb
No related branches found
No related tags found
No related merge requests found
......@@ -53,13 +53,14 @@ configure_file (${CMAKE_SOURCE_DIR}/src/config.h.cmake ${CMAKE_SOURCE_DIR}/src/c
# File list
SET(CPP_FILES
utils/readspectrum.cpp
core/project.cpp
core/match.cpp
core/identification_sources/identificationdatasource.cpp
core/identification_sources/identificationxtandemfile.cpp
files/xpipfile.cpp
input/xpipsaxhandler.cpp
utils/peptidestore.cpp
utils/readspectrum.cpp
)
set(QTLIBS ${Qt5Xml_LIBRARIES} ${Qt5Gui_LIBRARIES} ${Qt5Svg_LIBRARIES})
......
......@@ -22,6 +22,7 @@
#include "xpipsaxhandler.h"
#include <pappsomspp/msrun/msrunid.h>
#include <pappsomspp/exception/exceptionnotfound.h>
XpipSaxHandler::XpipSaxHandler(Project * p_project):_p_project(p_project)
{
......@@ -49,10 +50,14 @@ bool XpipSaxHandler::startElement(const QString & namespaceURI, const QString &
//<sample value="P6_08_10"/>
else if (qName == "sample") {
is_ok = startElement_sample(attributes);
} else if (qName == "peptide") {
is_ok = startElement_peptide(attributes);
} else if (qName == "modifs_mass") {
is_ok = startElement_modifs_mass(attributes);
}
_current_text.clear();
}
catch (PappsoException exception_pappso) {
catch (pappso::PappsoException exception_pappso) {
_errorStr = QObject::tr("ERROR in XpipSaxHandler::startElement tag %1, PAPPSO exception:\n%2").arg(qName).arg(exception_pappso.qwhat());
return false;
}
......@@ -73,6 +78,9 @@ bool XpipSaxHandler::endElement(const QString & namespaceURI, const QString & lo
{
is_ok = endElement_protein();
}
else if (qName == "peptide") {
is_ok = endElement_peptide();
}
else if (qName == "sequence") {
is_ok = endElement_sequence();
}
......@@ -81,7 +89,7 @@ bool XpipSaxHandler::endElement(const QString & namespaceURI, const QString & lo
// else if ((_tag_stack.size() > 1) &&
// (_tag_stack[_tag_stack.size() - 2] == "detection_moulon"))
}
catch (PappsoException exception_pappso) {
catch (pappso::PappsoException exception_pappso) {
_errorStr = QObject::tr("ERROR in XpipSaxHandler::endElement tag %1, PAPPSO exception:\n%2").arg(qName).arg(exception_pappso.qwhat());
return false;
}
......@@ -96,34 +104,78 @@ bool XpipSaxHandler::endElement(const QString & namespaceURI, const QString & lo
return is_ok;
}
bool XpipSaxHandler::startElement_modifs_mass(QXmlAttributes attributes) {
/*
<modifs_list_mass><modifs_mass modvalue="-18.01056"/>
<modifs_mass modvalue="-17.02655"/>
<modifs_mass modvalue="15.99491"/>
<modifs_mass modvalue="42.01057"/>
<modifs_mass modvalue="42.01056"/>
<modifs_mass modvalue="57.02146"/>
</modifs_list_mass>
*/
qDebug() << "startElement_modifs_mass ";
QString mass_str(attributes.value("modvalue").simplified());
pappso::mz mass = mass_str.toDouble();
pappso::AaModificationP mod = getAaModificationP(mass);
_map_massstr_aamod[mass_str] = mod;
qDebug() << "startElement_modifs_mass end" ;
return true;
}
//<sample value="P6_21_23"/>
bool XpipSaxHandler::startElement_sample(QXmlAttributes attributes) {
qDebug() << "startElement_sample ";
MsRunId ms_run;
pappso::MsRunId ms_run;
ms_run.setXmlId(attributes.value("value").simplified());
ms_run.setFilename(attributes.value("value").simplified());
_p_project->addMsRunIdSp(ms_run.makeMsRunIdSp());
qDebug() << "startElement_sample end" ;
return true;
}
/*
* <protein peptide_number="268" evalue="-432.77353" URL="Genome_Z_mays_5a.fasta" description="GRMZM2G083841_P01 P04711 Phosphoenolpyruvate carboxylase 1 (PEPCase 1)(PEPC 1)(EC 4.1.1.31) seq=translation; coord=9:61296279..61301686:1; parent_transcript=GRMZM2G083841_T01; parent_gene=GRMZM2G083841">
<protein_evalue evalue="-399.36093" sample="20120906_balliau_extract_1_A02_urzb-1"/>
<protein_evalue evalue="-384.54382" sample="20120906_balliau_extract_1_A01_urnb-1"/>
<sequence>MASTKAPGPGEKHHSIDAQLRQLVPGKVSEDDKLIEYDALLVDRFLNILQDLHGPSLREFVQECYEVSADYEGKGDTTKLGELGAKLTGLAPADAILVASSILHMLNLANLAEEVQIAHRRRNSKLKKGGFADEGSATTESDIEETLKRLVSEVGKSPEEVFEALKNQTVDLVFTAHPTQSARRSLLQKNARIRNCLTQLNAKDITDDDKQELDEALQREIQAAFRTDEIRRAQPTPQDEMRYGMSYIHETVWKGVPKFLRRVDTALKNIGINERLPYNVSLIRFSSWMGGDRDGNPRVTPEVTRDVCLLARMMAANLYIDQIEELMFELSMWRCNDELRVRAEELHSSSGSKVTKYYIEFWKQIPPNEPYRVILGHVRDKLYNTRERARHLLASGVSEISAESSFTSIEEFLEPLELCYKSLCDCGDKAIADGSLLDLLRQVFTFGLSLVKLDIRQESERHTDVIDAITTHLGIGSYREWPEDKRQEWLLSELRGKRPLLPPDLPQTDEIADVIGAFHVLAELPPDSFGPYIISMATAPSDVLAVELLQRECGVRQPLPVVPLFERLADLQSAPASVERLFSVDWYMDRIKGKQQVMVGYSDSGKDAGRLSAAWQLYRAQEEMAQVAKRYGVKLTLFHGRGGTVGRGGGPTHLAILSQPPDTINGSIRVTVQGEVIEFCFGEEHLCFQTLQRFTAATLEHGMHPPVSPKPEWRKLMDEMAVVATEEYRSVVVKEARFVEYFRSATPETEYGRMNIGSRPAKRRPGGGITTLRAIPWIFSWTQTRFHLPVWLGVGAAFKFAIDKDVRNFQVLKEMYNEWPFFRVTLDLLEMVFAKGDPGIAGLYDELLVAEELKPFGKQLRDKYVETQQLLLQIAGHKDILEGDPFLKQGLVLRNPYITTLNVFQAYTLKRIRDPNFKVTPQPPLSKEFADENKPAGLVKLNPASEYPPGLEDTLILTMKGIAAGMQNTG</sequence>
</protein>
*/
bool XpipSaxHandler::startElement_protein(QXmlAttributes attributes) {
/*
* <protein peptide_number="268" evalue="-432.77353" URL="Genome_Z_mays_5a.fasta" description="GRMZM2G083841_P01 P04711 Phosphoenolpyruvate carboxylase 1 (PEPCase 1)(PEPC 1)(EC 4.1.1.31) seq=translation; coord=9:61296279..61301686:1; parent_transcript=GRMZM2G083841_T01; parent_gene=GRMZM2G083841">
<protein_evalue evalue="-399.36093" sample="20120906_balliau_extract_1_A02_urzb-1"/>
<protein_evalue evalue="-384.54382" sample="20120906_balliau_extract_1_A01_urnb-1"/>
<sequence>MASTKAPGPGEKHHSIDAQLRQLVPGKVSEDDKLIEYDALLVDRFLNILQDLHGPSLREFVQECYEVSADYEGKGDTTKLGELGAKLTGLAPADAILVASSILHMLNLANLAEEVQIAHRRRNSKLKKGGFADEGSATTESDIEETLKRLVSEVGKSPEEVFEALKNQTVDLVFTAHPTQSARRSLLQKNARIRNCLTQLNAKDITDDDKQELDEALQREIQAAFRTDEIRRAQPTPQDEMRYGMSYIHETVWKGVPKFLRRVDTALKNIGINERLPYNVSLIRFSSWMGGDRDGNPRVTPEVTRDVCLLARMMAANLYIDQIEELMFELSMWRCNDELRVRAEELHSSSGSKVTKYYIEFWKQIPPNEPYRVILGHVRDKLYNTRERARHLLASGVSEISAESSFTSIEEFLEPLELCYKSLCDCGDKAIADGSLLDLLRQVFTFGLSLVKLDIRQESERHTDVIDAITTHLGIGSYREWPEDKRQEWLLSELRGKRPLLPPDLPQTDEIADVIGAFHVLAELPPDSFGPYIISMATAPSDVLAVELLQRECGVRQPLPVVPLFERLADLQSAPASVERLFSVDWYMDRIKGKQQVMVGYSDSGKDAGRLSAAWQLYRAQEEMAQVAKRYGVKLTLFHGRGGTVGRGGGPTHLAILSQPPDTINGSIRVTVQGEVIEFCFGEEHLCFQTLQRFTAATLEHGMHPPVSPKPEWRKLMDEMAVVATEEYRSVVVKEARFVEYFRSATPETEYGRMNIGSRPAKRRPGGGITTLRAIPWIFSWTQTRFHLPVWLGVGAAFKFAIDKDVRNFQVLKEMYNEWPFFRVTLDLLEMVFAKGDPGIAGLYDELLVAEELKPFGKQLRDKYVETQQLLLQIAGHKDILEGDPFLKQGLVLRNPYITTLNVFQAYTLKRIRDPNFKVTPQPPLSKEFADENKPAGLVKLNPASEYPPGLEDTLILTMKGIAAGMQNTG</sequence>
</protein>
*/
qDebug() << "startElement_protein ";
_current_protein.setDescription(attributes.value("description").simplified());
_current_protein.setAccession(_current_protein.getDescription().split(" ").at(0));
qDebug() << "startElement_protein end" ;
return true;
}
bool XpipSaxHandler::startElement_peptide(QXmlAttributes attributes) {
//<peptide sample="20120208_Blein_rep4_1_B03_DW21-4-26-328"
//sample_file="/gorgone/pappso/moulon/users/Melisande/test-param-masschroq/20120208_Blein_rep4_1_B03_DW21-4-26-328.xml"
//scan="2589" scan_in_xtandem="2589" RT="603" mhplus_obser="873.5401" mhplus_theo="873.5408" deltamass="-7.0E-4"
//sequence="IATAIEKK" pre="NPAR" post="AADA" start="331" stop="338" charge="2" evalue="9.2E-4" hypercorr="35.2" validate="true">
//<modifs></modifs></peptide>
//<modifs><modif aa="M" modvalue="15.99491" posi="17" posi_in_prot="49"/>
//</modifs>
qDebug() << "startElement_peptide ";
_current_peptide_sp = pappso::Peptide(attributes.value("sequence").simplified()).makePeptideSp();
qDebug() << "startElement_peptide end" ;
return true;
}
bool XpipSaxHandler::endElement_peptide() {
qDebug() << "endElement_peptide ";
_current_peptide_sp = peptide_store.getInstance(_current_peptide_sp);
return true;
}
bool XpipSaxHandler::endElement_sequence() {
if ((_tag_stack.size() > 1) && (_tag_stack[_tag_stack.size() - 1] == "protein")) {
......@@ -175,3 +227,54 @@ bool XpipSaxHandler::characters(const QString &str) {
return true;
}
pappso::AaModificationP XpipSaxHandler::getAaModificationP(pappso::mz mass) const {
pappso::PrecisionP precision = pappso::Precision::getDaltonInstance(0.01);
pappso::AaModificationP oxidation = pappso::AaModification::getInstance("MOD:00719");
if (pappso::MassRange(oxidation->getMass(),precision).contains(mass)) {
return oxidation;
}
pappso::AaModificationP iodoacetamide = pappso::AaModification::getInstance("MOD:00397");
if (pappso::MassRange(iodoacetamide->getMass(),precision).contains(mass)) {
return iodoacetamide;
}
pappso::AaModificationP acetylated = pappso::AaModification::getInstance("MOD:00408");
if (pappso::MassRange(acetylated->getMass(),precision).contains(mass)) {
return acetylated;
}
pappso::AaModificationP phosphorylated = pappso::AaModification::getInstance("MOD:00696");
if (pappso::MassRange(phosphorylated->getMass(),precision).contains(mass)) {
return phosphorylated;
}
pappso::AaModificationP ammonia = pappso::AaModification::getInstance("MOD:01160");
if (pappso::MassRange(ammonia->getMass(),precision).contains(mass)) {
return ammonia;
}
pappso::AaModificationP dehydrated = pappso::AaModification::getInstance("MOD:00704");
if (pappso::MassRange(dehydrated->getMass(),precision).contains(mass)) {
return dehydrated;
}
pappso::AaModificationP dimethylated = pappso::AaModification::getInstance("MOD:00429");
if (pappso::MassRange(dimethylated->getMass(),precision).contains(mass)) {
return dimethylated;
}
pappso::AaModificationP dimethylated_medium = pappso::AaModification::getInstance("MOD:00552");
if (pappso::MassRange(dimethylated_medium->getMass(),precision).contains(mass)) {
return dimethylated_medium;
}
pappso::AaModificationP dimethylated_heavy = pappso::AaModification::getInstance("MOD:00638");
if (pappso::MassRange(dimethylated_heavy->getMass(),precision).contains(mass)) {
return dimethylated_heavy;
}
pappso::AaModificationP DimethylpyrroleAdduct = pappso::AaModification::getInstance("MOD:00628");
if (pappso::MassRange(DimethylpyrroleAdduct->getMass(),precision).contains(mass)) {
return DimethylpyrroleAdduct;
}
throw pappso::ExceptionNotFound(QObject::tr("XpipSaxHandler::getAaModificationP => modification not found for mass %1").arg(mass));
}
......@@ -26,9 +26,10 @@
#include <QXmlDefaultHandler>
#include <pappsomspp/pappsoexception.h>
#include <pappsomspp/protein/protein.h>
#include <pappsomspp/peptide/peptide.h>
#include <pappsomspp/amino_acid/aamodification.h>
#include "../core/project.h"
using namespace pappso;
#include "../utils/peptidestore.h"
class XpipSaxHandler: public QXmlDefaultHandler
{
......@@ -53,20 +54,29 @@ public:
QString errorString() const;
private:
bool startElement_protein(QXmlAttributes attributes);
bool startElement_sample(QXmlAttributes attributes);
bool endElement_sequence();
bool endElement_protein();
bool startElement_peptide(QXmlAttributes attributes);
bool startElement_protein(QXmlAttributes attributes);
bool startElement_sample(QXmlAttributes attributes);
bool startElement_modifs_mass(QXmlAttributes attributes);
bool endElement_sequence();
bool endElement_protein();
bool endElement_peptide();
pappso::AaModificationP getAaModificationP(pappso::mz mass) const;
private:
std::vector<QString> _tag_stack;
QString _errorStr;
QString _current_text;
Project * _p_project;
Protein _current_protein;
pappso::Protein _current_protein;
pappso::PeptideSp _current_peptide_sp;
PeptideStore peptide_store;
QMap<QString, pappso::AaModificationP> _map_massstr_aamod;
};
#endif // XTANDEMRESULTSHANDLER_H
/**
* \file utils/peptidestore.h
* \date 7/10/2016
* \author Olivier Langella
* \brief store unique version of peptides
*/
/*******************************************************************************
* Copyright (c) 2016 Olivier Langella <Olivier.Langella@moulon.inra.fr>.
*
* This file is part of peptider.
*
* peptider is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* peptider is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with peptider. If not, see <http://www.gnu.org/licenses/>.
*
* Contributors:
* Olivier Langella <Olivier.Langella@moulon.inra.fr> - initial API and implementation
******************************************************************************/
#include "peptidestore.h"
PeptideStore::PeptideStore()
{
}
PeptideStore::~PeptideStore()
{
}
pappso::PeptideSp & PeptideStore::getInstance(pappso::PeptideSp & peptide_in) {
std::size_t sequence_li_crc = _hash_fn ( peptide_in.get()->getLiAbsoluteString().toStdString());
//QByteArray source = peptide_in.get()->getLiAbsoluteString().toUtf8();
//quint16 sequence_li_crc = qChecksum(source.data(), source.length());
std::pair<std::unordered_map< std::size_t, pappso::PeptideSp>::iterator,bool> ret = _map_crc_peptide_list.insert(std::pair<std::size_t, pappso::PeptideSp>(sequence_li_crc,peptide_in));
return ret.first->second;
}
/**
* \file utils/peptidestore.h
* \date 7/10/2016
* \author Olivier Langella
* \brief store unique version of peptides
*/
/*******************************************************************************
* Copyright (c) 2016 Olivier Langella <Olivier.Langella@moulon.inra.fr>.
*
* This file is part of peptider.
*
* peptider is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* peptider is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with peptider. If not, see <http://www.gnu.org/licenses/>.
*
* Contributors:
* Olivier Langella <Olivier.Langella@moulon.inra.fr> - initial API and implementation
******************************************************************************/
#ifndef PEPTIDESTORE_H
#define PEPTIDESTORE_H
#include <pappsomspp/peptide/peptide.h>
#include <unordered_map>
class PeptideStore
{
public:
PeptideStore();
~PeptideStore();
pappso::PeptideSp & getInstance(pappso::PeptideSp & peptide_in);
private :
std::hash<std::string> _hash_fn;
std::unordered_map<std::size_t, pappso::PeptideSp> _map_crc_peptide_list;
};
#endif // PEPTIDESTORE_H
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment