diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 1db1f3d74db010bc9abd6627b533a628dc13b3d5..63f81e5d68cfcfce465878ffd09961b10cc2e6d6 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -53,13 +53,14 @@ configure_file (${CMAKE_SOURCE_DIR}/src/config.h.cmake ${CMAKE_SOURCE_DIR}/src/c # File list SET(CPP_FILES - utils/readspectrum.cpp core/project.cpp core/match.cpp core/identification_sources/identificationdatasource.cpp core/identification_sources/identificationxtandemfile.cpp files/xpipfile.cpp input/xpipsaxhandler.cpp + utils/peptidestore.cpp + utils/readspectrum.cpp ) set(QTLIBS ${Qt5Xml_LIBRARIES} ${Qt5Gui_LIBRARIES} ${Qt5Svg_LIBRARIES}) diff --git a/src/input/xpipsaxhandler.cpp b/src/input/xpipsaxhandler.cpp index 520c02a79c032524ae23b08e222d2e72ba3e74d2..5b797e3b8fb58764d77d05a300b1c009a9decef3 100644 --- a/src/input/xpipsaxhandler.cpp +++ b/src/input/xpipsaxhandler.cpp @@ -22,6 +22,7 @@ #include "xpipsaxhandler.h" #include <pappsomspp/msrun/msrunid.h> +#include <pappsomspp/exception/exceptionnotfound.h> XpipSaxHandler::XpipSaxHandler(Project * p_project):_p_project(p_project) { @@ -49,10 +50,14 @@ bool XpipSaxHandler::startElement(const QString & namespaceURI, const QString & //<sample value="P6_08_10"/> else if (qName == "sample") { is_ok = startElement_sample(attributes); + } else if (qName == "peptide") { + is_ok = startElement_peptide(attributes); + } else if (qName == "modifs_mass") { + is_ok = startElement_modifs_mass(attributes); } _current_text.clear(); } - catch (PappsoException exception_pappso) { + catch (pappso::PappsoException exception_pappso) { _errorStr = QObject::tr("ERROR in XpipSaxHandler::startElement tag %1, PAPPSO exception:\n%2").arg(qName).arg(exception_pappso.qwhat()); return false; } @@ -73,6 +78,9 @@ bool XpipSaxHandler::endElement(const QString & namespaceURI, const QString & lo { is_ok = endElement_protein(); } + else if (qName == "peptide") { + is_ok = endElement_peptide(); + } else if (qName == "sequence") { is_ok = endElement_sequence(); } @@ -81,7 +89,7 @@ bool XpipSaxHandler::endElement(const QString & namespaceURI, const QString & lo // else if ((_tag_stack.size() > 1) && // (_tag_stack[_tag_stack.size() - 2] == "detection_moulon")) } - catch (PappsoException exception_pappso) { + catch (pappso::PappsoException exception_pappso) { _errorStr = QObject::tr("ERROR in XpipSaxHandler::endElement tag %1, PAPPSO exception:\n%2").arg(qName).arg(exception_pappso.qwhat()); return false; } @@ -96,34 +104,78 @@ bool XpipSaxHandler::endElement(const QString & namespaceURI, const QString & lo return is_ok; } +bool XpipSaxHandler::startElement_modifs_mass(QXmlAttributes attributes) { + + /* + <modifs_list_mass><modifs_mass modvalue="-18.01056"/> + <modifs_mass modvalue="-17.02655"/> + <modifs_mass modvalue="15.99491"/> + <modifs_mass modvalue="42.01057"/> + <modifs_mass modvalue="42.01056"/> + <modifs_mass modvalue="57.02146"/> + </modifs_list_mass> + */ + qDebug() << "startElement_modifs_mass "; + QString mass_str(attributes.value("modvalue").simplified()); + pappso::mz mass = mass_str.toDouble(); + + pappso::AaModificationP mod = getAaModificationP(mass); + + _map_massstr_aamod[mass_str] = mod; + qDebug() << "startElement_modifs_mass end" ; + return true; +} + //<sample value="P6_21_23"/> bool XpipSaxHandler::startElement_sample(QXmlAttributes attributes) { qDebug() << "startElement_sample "; - MsRunId ms_run; + pappso::MsRunId ms_run; ms_run.setXmlId(attributes.value("value").simplified()); ms_run.setFilename(attributes.value("value").simplified()); - + _p_project->addMsRunIdSp(ms_run.makeMsRunIdSp()); qDebug() << "startElement_sample end" ; return true; } -/* - * <protein peptide_number="268" evalue="-432.77353" URL="Genome_Z_mays_5a.fasta" description="GRMZM2G083841_P01 P04711 Phosphoenolpyruvate carboxylase 1 (PEPCase 1)(PEPC 1)(EC 4.1.1.31) seq=translation; coord=9:61296279..61301686:1; parent_transcript=GRMZM2G083841_T01; parent_gene=GRMZM2G083841"> - <protein_evalue evalue="-399.36093" sample="20120906_balliau_extract_1_A02_urzb-1"/> - <protein_evalue evalue="-384.54382" sample="20120906_balliau_extract_1_A01_urnb-1"/> - <sequence>MASTKAPGPGEKHHSIDAQLRQLVPGKVSEDDKLIEYDALLVDRFLNILQDLHGPSLREFVQECYEVSADYEGKGDTTKLGELGAKLTGLAPADAILVASSILHMLNLANLAEEVQIAHRRRNSKLKKGGFADEGSATTESDIEETLKRLVSEVGKSPEEVFEALKNQTVDLVFTAHPTQSARRSLLQKNARIRNCLTQLNAKDITDDDKQELDEALQREIQAAFRTDEIRRAQPTPQDEMRYGMSYIHETVWKGVPKFLRRVDTALKNIGINERLPYNVSLIRFSSWMGGDRDGNPRVTPEVTRDVCLLARMMAANLYIDQIEELMFELSMWRCNDELRVRAEELHSSSGSKVTKYYIEFWKQIPPNEPYRVILGHVRDKLYNTRERARHLLASGVSEISAESSFTSIEEFLEPLELCYKSLCDCGDKAIADGSLLDLLRQVFTFGLSLVKLDIRQESERHTDVIDAITTHLGIGSYREWPEDKRQEWLLSELRGKRPLLPPDLPQTDEIADVIGAFHVLAELPPDSFGPYIISMATAPSDVLAVELLQRECGVRQPLPVVPLFERLADLQSAPASVERLFSVDWYMDRIKGKQQVMVGYSDSGKDAGRLSAAWQLYRAQEEMAQVAKRYGVKLTLFHGRGGTVGRGGGPTHLAILSQPPDTINGSIRVTVQGEVIEFCFGEEHLCFQTLQRFTAATLEHGMHPPVSPKPEWRKLMDEMAVVATEEYRSVVVKEARFVEYFRSATPETEYGRMNIGSRPAKRRPGGGITTLRAIPWIFSWTQTRFHLPVWLGVGAAFKFAIDKDVRNFQVLKEMYNEWPFFRVTLDLLEMVFAKGDPGIAGLYDELLVAEELKPFGKQLRDKYVETQQLLLQIAGHKDILEGDPFLKQGLVLRNPYITTLNVFQAYTLKRIRDPNFKVTPQPPLSKEFADENKPAGLVKLNPASEYPPGLEDTLILTMKGIAAGMQNTG</sequence> - </protein> - */ bool XpipSaxHandler::startElement_protein(QXmlAttributes attributes) { + /* + * <protein peptide_number="268" evalue="-432.77353" URL="Genome_Z_mays_5a.fasta" description="GRMZM2G083841_P01 P04711 Phosphoenolpyruvate carboxylase 1 (PEPCase 1)(PEPC 1)(EC 4.1.1.31) seq=translation; coord=9:61296279..61301686:1; parent_transcript=GRMZM2G083841_T01; parent_gene=GRMZM2G083841"> + <protein_evalue evalue="-399.36093" sample="20120906_balliau_extract_1_A02_urzb-1"/> + <protein_evalue evalue="-384.54382" sample="20120906_balliau_extract_1_A01_urnb-1"/> + <sequence>MASTKAPGPGEKHHSIDAQLRQLVPGKVSEDDKLIEYDALLVDRFLNILQDLHGPSLREFVQECYEVSADYEGKGDTTKLGELGAKLTGLAPADAILVASSILHMLNLANLAEEVQIAHRRRNSKLKKGGFADEGSATTESDIEETLKRLVSEVGKSPEEVFEALKNQTVDLVFTAHPTQSARRSLLQKNARIRNCLTQLNAKDITDDDKQELDEALQREIQAAFRTDEIRRAQPTPQDEMRYGMSYIHETVWKGVPKFLRRVDTALKNIGINERLPYNVSLIRFSSWMGGDRDGNPRVTPEVTRDVCLLARMMAANLYIDQIEELMFELSMWRCNDELRVRAEELHSSSGSKVTKYYIEFWKQIPPNEPYRVILGHVRDKLYNTRERARHLLASGVSEISAESSFTSIEEFLEPLELCYKSLCDCGDKAIADGSLLDLLRQVFTFGLSLVKLDIRQESERHTDVIDAITTHLGIGSYREWPEDKRQEWLLSELRGKRPLLPPDLPQTDEIADVIGAFHVLAELPPDSFGPYIISMATAPSDVLAVELLQRECGVRQPLPVVPLFERLADLQSAPASVERLFSVDWYMDRIKGKQQVMVGYSDSGKDAGRLSAAWQLYRAQEEMAQVAKRYGVKLTLFHGRGGTVGRGGGPTHLAILSQPPDTINGSIRVTVQGEVIEFCFGEEHLCFQTLQRFTAATLEHGMHPPVSPKPEWRKLMDEMAVVATEEYRSVVVKEARFVEYFRSATPETEYGRMNIGSRPAKRRPGGGITTLRAIPWIFSWTQTRFHLPVWLGVGAAFKFAIDKDVRNFQVLKEMYNEWPFFRVTLDLLEMVFAKGDPGIAGLYDELLVAEELKPFGKQLRDKYVETQQLLLQIAGHKDILEGDPFLKQGLVLRNPYITTLNVFQAYTLKRIRDPNFKVTPQPPLSKEFADENKPAGLVKLNPASEYPPGLEDTLILTMKGIAAGMQNTG</sequence> + </protein> + */ qDebug() << "startElement_protein "; _current_protein.setDescription(attributes.value("description").simplified()); _current_protein.setAccession(_current_protein.getDescription().split(" ").at(0)); qDebug() << "startElement_protein end" ; return true; } +bool XpipSaxHandler::startElement_peptide(QXmlAttributes attributes) { + +//<peptide sample="20120208_Blein_rep4_1_B03_DW21-4-26-328" + //sample_file="/gorgone/pappso/moulon/users/Melisande/test-param-masschroq/20120208_Blein_rep4_1_B03_DW21-4-26-328.xml" + //scan="2589" scan_in_xtandem="2589" RT="603" mhplus_obser="873.5401" mhplus_theo="873.5408" deltamass="-7.0E-4" + //sequence="IATAIEKK" pre="NPAR" post="AADA" start="331" stop="338" charge="2" evalue="9.2E-4" hypercorr="35.2" validate="true"> +//<modifs></modifs></peptide> + + //<modifs><modif aa="M" modvalue="15.99491" posi="17" posi_in_prot="49"/> +//</modifs> + qDebug() << "startElement_peptide "; + _current_peptide_sp = pappso::Peptide(attributes.value("sequence").simplified()).makePeptideSp(); + qDebug() << "startElement_peptide end" ; + return true; +} + +bool XpipSaxHandler::endElement_peptide() { + qDebug() << "endElement_peptide "; + + _current_peptide_sp = peptide_store.getInstance(_current_peptide_sp); + return true; +} bool XpipSaxHandler::endElement_sequence() { if ((_tag_stack.size() > 1) && (_tag_stack[_tag_stack.size() - 1] == "protein")) { @@ -175,3 +227,54 @@ bool XpipSaxHandler::characters(const QString &str) { return true; } + +pappso::AaModificationP XpipSaxHandler::getAaModificationP(pappso::mz mass) const { + pappso::PrecisionP precision = pappso::Precision::getDaltonInstance(0.01); + + pappso::AaModificationP oxidation = pappso::AaModification::getInstance("MOD:00719"); + if (pappso::MassRange(oxidation->getMass(),precision).contains(mass)) { + return oxidation; + } + pappso::AaModificationP iodoacetamide = pappso::AaModification::getInstance("MOD:00397"); + if (pappso::MassRange(iodoacetamide->getMass(),precision).contains(mass)) { + return iodoacetamide; + } + pappso::AaModificationP acetylated = pappso::AaModification::getInstance("MOD:00408"); + if (pappso::MassRange(acetylated->getMass(),precision).contains(mass)) { + return acetylated; + } + pappso::AaModificationP phosphorylated = pappso::AaModification::getInstance("MOD:00696"); + if (pappso::MassRange(phosphorylated->getMass(),precision).contains(mass)) { + return phosphorylated; + } + pappso::AaModificationP ammonia = pappso::AaModification::getInstance("MOD:01160"); + if (pappso::MassRange(ammonia->getMass(),precision).contains(mass)) { + return ammonia; + } + pappso::AaModificationP dehydrated = pappso::AaModification::getInstance("MOD:00704"); + if (pappso::MassRange(dehydrated->getMass(),precision).contains(mass)) { + return dehydrated; + } + pappso::AaModificationP dimethylated = pappso::AaModification::getInstance("MOD:00429"); + if (pappso::MassRange(dimethylated->getMass(),precision).contains(mass)) { + return dimethylated; + } + + pappso::AaModificationP dimethylated_medium = pappso::AaModification::getInstance("MOD:00552"); + if (pappso::MassRange(dimethylated_medium->getMass(),precision).contains(mass)) { + return dimethylated_medium; + } + + pappso::AaModificationP dimethylated_heavy = pappso::AaModification::getInstance("MOD:00638"); + if (pappso::MassRange(dimethylated_heavy->getMass(),precision).contains(mass)) { + return dimethylated_heavy; + } + pappso::AaModificationP DimethylpyrroleAdduct = pappso::AaModification::getInstance("MOD:00628"); + if (pappso::MassRange(DimethylpyrroleAdduct->getMass(),precision).contains(mass)) { + return DimethylpyrroleAdduct; + } + + + throw pappso::ExceptionNotFound(QObject::tr("XpipSaxHandler::getAaModificationP => modification not found for mass %1").arg(mass)); +} + diff --git a/src/input/xpipsaxhandler.h b/src/input/xpipsaxhandler.h index e8b2dc4518702c412dd91133ff7cb8b45218dad4..5eca4cad0444df420d7c99c5f6e0adf25c436101 100644 --- a/src/input/xpipsaxhandler.h +++ b/src/input/xpipsaxhandler.h @@ -26,9 +26,10 @@ #include <QXmlDefaultHandler> #include <pappsomspp/pappsoexception.h> #include <pappsomspp/protein/protein.h> +#include <pappsomspp/peptide/peptide.h> +#include <pappsomspp/amino_acid/aamodification.h> #include "../core/project.h" - -using namespace pappso; +#include "../utils/peptidestore.h" class XpipSaxHandler: public QXmlDefaultHandler { @@ -53,20 +54,29 @@ public: QString errorString() const; - + private: - bool startElement_protein(QXmlAttributes attributes); - bool startElement_sample(QXmlAttributes attributes); - bool endElement_sequence(); - bool endElement_protein(); + bool startElement_peptide(QXmlAttributes attributes); + bool startElement_protein(QXmlAttributes attributes); + bool startElement_sample(QXmlAttributes attributes); + bool startElement_modifs_mass(QXmlAttributes attributes); + bool endElement_sequence(); + bool endElement_protein(); + bool endElement_peptide(); + + pappso::AaModificationP getAaModificationP(pappso::mz mass) const; private: std::vector<QString> _tag_stack; QString _errorStr; QString _current_text; - + Project * _p_project; - Protein _current_protein; + pappso::Protein _current_protein; + pappso::PeptideSp _current_peptide_sp; + + PeptideStore peptide_store; + QMap<QString, pappso::AaModificationP> _map_massstr_aamod; }; #endif // XTANDEMRESULTSHANDLER_H diff --git a/src/utils/peptidestore.cpp b/src/utils/peptidestore.cpp new file mode 100644 index 0000000000000000000000000000000000000000..5da496c72c7b6878a77ad63c89d1c5682bbdf641 --- /dev/null +++ b/src/utils/peptidestore.cpp @@ -0,0 +1,52 @@ +/** + * \file utils/peptidestore.h + * \date 7/10/2016 + * \author Olivier Langella + * \brief store unique version of peptides + */ + +/******************************************************************************* + * Copyright (c) 2016 Olivier Langella <Olivier.Langella@moulon.inra.fr>. + * + * This file is part of peptider. + * + * peptider is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * peptider is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with peptider. If not, see <http://www.gnu.org/licenses/>. + * + * Contributors: + * Olivier Langella <Olivier.Langella@moulon.inra.fr> - initial API and implementation + ******************************************************************************/ + +#include "peptidestore.h" + +PeptideStore::PeptideStore() +{ + +} + +PeptideStore::~PeptideStore() +{ + +} + +pappso::PeptideSp & PeptideStore::getInstance(pappso::PeptideSp & peptide_in) { + std::size_t sequence_li_crc = _hash_fn ( peptide_in.get()->getLiAbsoluteString().toStdString()); + + //QByteArray source = peptide_in.get()->getLiAbsoluteString().toUtf8(); + //quint16 sequence_li_crc = qChecksum(source.data(), source.length()); + + std::pair<std::unordered_map< std::size_t, pappso::PeptideSp>::iterator,bool> ret = _map_crc_peptide_list.insert(std::pair<std::size_t, pappso::PeptideSp>(sequence_li_crc,peptide_in)); + + return ret.first->second; + +} diff --git a/src/utils/peptidestore.h b/src/utils/peptidestore.h new file mode 100644 index 0000000000000000000000000000000000000000..d6f88ea1b877c5a1c93caa37806ee57175685101 --- /dev/null +++ b/src/utils/peptidestore.h @@ -0,0 +1,50 @@ +/** + * \file utils/peptidestore.h + * \date 7/10/2016 + * \author Olivier Langella + * \brief store unique version of peptides + */ + +/******************************************************************************* + * Copyright (c) 2016 Olivier Langella <Olivier.Langella@moulon.inra.fr>. + * + * This file is part of peptider. + * + * peptider is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * peptider is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with peptider. If not, see <http://www.gnu.org/licenses/>. + * + * Contributors: + * Olivier Langella <Olivier.Langella@moulon.inra.fr> - initial API and implementation + ******************************************************************************/ + +#ifndef PEPTIDESTORE_H +#define PEPTIDESTORE_H + +#include <pappsomspp/peptide/peptide.h> +#include <unordered_map> + +class PeptideStore +{ +public: + PeptideStore(); + ~PeptideStore(); + + pappso::PeptideSp & getInstance(pappso::PeptideSp & peptide_in); + +private : + + std::hash<std::string> _hash_fn; + std::unordered_map<std::size_t, pappso::PeptideSp> _map_crc_peptide_list; +}; + +#endif // PEPTIDESTORE_H