From 73cf85856273052fcbf4f90b4badd3dfcf39fa75 Mon Sep 17 00:00:00 2001 From: Olivier Langella <Olivier.Langella@moulon.inra.fr> Date: Fri, 7 Apr 2017 12:14:15 +0200 Subject: [PATCH] WIP : writing MassChroqML --- src/grouping/groupinggroup.cpp | 37 +++++++++------ src/grouping/groupinggroup.h | 4 ++ src/output/masschroqml.cpp | 84 +++++++++++++++++++++------------- src/output/masschroqml.h | 2 + 4 files changed, 79 insertions(+), 48 deletions(-) diff --git a/src/grouping/groupinggroup.cpp b/src/grouping/groupinggroup.cpp index b414e320..7dd05b0a 100644 --- a/src/grouping/groupinggroup.cpp +++ b/src/grouping/groupinggroup.cpp @@ -37,26 +37,33 @@ GroupingGroup::~GroupingGroup() { } + +const std::vector<std::pair<unsigned int, const PeptideMatch *>> & GroupingGroup::getPairSgNumberPeptideMatchList() const { + return _pair_sg_number_peptide_match_list; +} +unsigned int GroupingGroup::getGroupNumber() const { + return _group_number; +} unsigned int GroupingGroup::getNumberOfSubgroups() const { return _number_of_subgroup; } const QStringList GroupingGroup::getSubgroupIdList(const PeptideMatch * p_peptide_match) const { - std::set<unsigned int> subgroup_list; + std::set<unsigned int> subgroup_list; pappso::GrpPeptide * p_grp_peptide = p_peptide_match->getGrpPeptideSp().get(); if (p_grp_peptide != nullptr) { - for (auto && pair_peptide_match :_pair_sg_number_peptide_match_list) { - if(pair_peptide_match.second->getGrpPeptideSp().get() == p_grp_peptide) { - subgroup_list.insert(pair_peptide_match.first); + for (auto && pair_peptide_match :_pair_sg_number_peptide_match_list) { + if(pair_peptide_match.second->getGrpPeptideSp().get() == p_grp_peptide) { + subgroup_list.insert(pair_peptide_match.first); + } } } + + + QStringList sg_str_list; + for (unsigned int sgnum : subgroup_list) { + sg_str_list << QString("%1%2").arg(pappso::Utils::getLexicalOrderedString(_group_number)).arg(pappso::Utils::getLexicalOrderedString(sgnum)); } - - - QStringList sg_str_list; - for (unsigned int sgnum : subgroup_list) { - sg_str_list << QString("%1%2").arg(pappso::Utils::getLexicalOrderedString(_group_number)).arg(pappso::Utils::getLexicalOrderedString(sgnum)); - } return sg_str_list; } @@ -65,12 +72,12 @@ unsigned int GroupingGroup::countSubgroupPresence(const PeptideMatch * p_peptide std::set<unsigned int> subgroup_list; pappso::GrpPeptide * p_grp_peptide = p_peptide_match->getGrpPeptideSp().get(); if (p_grp_peptide != nullptr) { - for (auto && pair_peptide_match :_pair_sg_number_peptide_match_list) { - if(pair_peptide_match.second->getGrpPeptideSp().get() == p_grp_peptide) { - subgroup_list.insert(pair_peptide_match.first); + for (auto && pair_peptide_match :_pair_sg_number_peptide_match_list) { + if(pair_peptide_match.second->getGrpPeptideSp().get() == p_grp_peptide) { + subgroup_list.insert(pair_peptide_match.first); + } } } - } return subgroup_list.size(); } @@ -131,7 +138,7 @@ std::size_t GroupingGroup::countSpecificSequence(const ProteinMatch * p_protein_ } void GroupingGroup::add(const ProteinMatch * p_protein_match) { -_group_number = p_protein_match->getGrpProteinSp().get()->getGroupNumber(); + _group_number = p_protein_match->getGrpProteinSp().get()->getGroupNumber(); if (p_protein_match->getGrpProteinSp().get()->getRank() == 1) { _number_of_subgroup++; unsigned int sg_number = p_protein_match->getGrpProteinSp().get()->getSubGroupNumber(); diff --git a/src/grouping/groupinggroup.h b/src/grouping/groupinggroup.h index 9a48d76d..5d740da7 100644 --- a/src/grouping/groupinggroup.h +++ b/src/grouping/groupinggroup.h @@ -40,6 +40,8 @@ public: GroupingGroup(); ~GroupingGroup(); + + unsigned int getGroupNumber() const ; void add(const ProteinMatch * p_protein_match); std::size_t countSpecificSpectrum(const ProteinMatch * p_protein_match) const; @@ -55,6 +57,8 @@ public: * */ const QStringList getSubgroupIdList(const PeptideMatch * p_peptide_match) const; + + const std::vector<std::pair<unsigned int, const PeptideMatch *>> & getPairSgNumberPeptideMatchList() const; private : unsigned int _group_number=0; diff --git a/src/output/masschroqml.cpp b/src/output/masschroqml.cpp index b4492759..0e17ce3c 100644 --- a/src/output/masschroqml.cpp +++ b/src/output/masschroqml.cpp @@ -72,7 +72,7 @@ void MassChroQml::write(ProjectSp sp_project) { if (_sp_project.get() == nullptr) { throw pappso::PappsoException(QObject::tr("Error writing MassChroqML file :\n project is empty")); } - + //<masschroq> _output_stream->writeStartElement("masschroq"); _output_stream->writeAttribute("type","input"); @@ -116,27 +116,44 @@ void MassChroQml::write(ProjectSp sp_project) { void MassChroQml::writePeptideList() { //<peptide_list> _output_stream->writeStartElement("peptide_list"); - /* - <peptide id="pep0" mh="1463.626" mods="114.08" prot_ids="P1.1" - seq="TCVADESHAGCEK"> - <modifications><!-- this tag is optional but gives an exact mass computation --> - <psimod at="2" acc="MOD:00397"></psimod> - <psimod at="11" acc="MOD:00397"></psimod> - </modifications> - <observed_in data="samp0" scan="655" z="2" /> - <observed_in data="samp1" scan="798" z="2" /> - </peptide> - <peptide id="pep1" mh="1103.461" mods="57.04" prot_ids="P1.1" - seq="ADESHAGCEK"> - <modifications><!-- this tag is optional but gives an exact mass computation --> - <psimod at="8" acc="MOD:00397"></psimod> - </modifications> - <observed_in data="samp3" scan="663" z="2" /> - </peptide>*/ - //</peptide_list> + QStringList empty; + for (const GroupingGroupSp & group:_group_list) { + std::map<const PeptideMatch *, QStringList> peptide_sg_map; + unsigned int group_number = group.get()->getGroupNumber(); + for (auto && pair_peptide_match :group.get()->getPairSgNumberPeptideMatchList()) { + unsigned int sg_number = pair_peptide_match.first; + const PeptideMatch * p_peptide_match = pair_peptide_match.second; + if (!p_peptide_match->isGrouped()) continue; + + std::pair<std::map< const PeptideMatch *, QStringList>::iterator,bool> ret = peptide_sg_map.insert(std::pair<const PeptideMatch *, QStringList>(p_peptide_match, empty)); + + QString sg_str = QString("%1.%2.a1").arg(pappso::Utils::getLexicalOrderedString(group_number)).arg(pappso::Utils::getLexicalOrderedString(sg_number)); + if (!ret.first->second.contains(sg_str)) { + ret.first->second << sg_str; + } + } + + for (auto && pair_peptide_sgid : peptide_sg_map) { + //<peptide id="pep0" mh="1463.626" mods="114.08" prot_ids="P1.1" + // seq="TCVADESHAGCEK"> + _output_stream->writeStartElement("peptide"); + _output_stream->writeAttribute("id",pair_peptide_sgid.first->getGrpPeptideSp().get()->getGroupingId()); + _output_stream->writeAttribute("prot_ids",pair_peptide_sgid.second.join(" ")); + /* + <modifications><!-- this tag is optional but gives an exact mass computation --> + <psimod at="2" acc="MOD:00397"></psimod> + <psimod at="11" acc="MOD:00397"></psimod> + </modifications> + <observed_in data="samp0" scan="655" z="2" /> + <observed_in data="samp1" scan="798" z="2" />*/ + //</peptide> + _output_stream->writeEndElement(); + } + } + //</peptide_list> _output_stream->writeEndElement(); } - + void MassChroQml::writeGroups() { //<groups> _output_stream->writeStartElement("groups"); @@ -168,21 +185,22 @@ void MassChroQml::writeProteinList() { _p_identification_group = identification_list[0]; for (ProteinMatch * p_protein_match :_p_identification_group->getProteinMatchList()) { if (!p_protein_match->isGrouped()) continue; - //<protein desc="conta|P02769|ALBU_BOVIN SERUM ALBUMIN PRECURSOR." - // id="P1.1" /> + //<protein desc="conta|P02769|ALBU_BOVIN SERUM ALBUMIN PRECURSOR." + // id="P1.1" /> + _group_list.insert(p_protein_match->getGroupingGroupSp()); pappso::GrpProteinSp grp_protein = p_protein_match->getGrpProteinSp(); if (grp_protein.get()->getRank() == 1) { - _output_stream->writeStartElement("protein"); - _output_stream->writeAttribute("id",grp_protein.get()->getGroupingId()); - QStringList list; - list << p_protein_match->getProteinXtpSp().get()->getAccession(); - list << p_protein_match->getProteinXtpSp().get()->getDescription(); - _output_stream->writeAttribute("desc",list.join(" ")); - //<protein desc="conta|P02770|ALBU_RAT SERUM ALBUMIN PRECURSOR." - // id="P1.2" /> - _output_stream->writeEndElement(); + _output_stream->writeStartElement("protein"); + _output_stream->writeAttribute("id",grp_protein.get()->getGroupingId()); + QStringList list; + list << p_protein_match->getProteinXtpSp().get()->getAccession(); + list << p_protein_match->getProteinXtpSp().get()->getDescription(); + _output_stream->writeAttribute("desc",list.join(" ")); + //<protein desc="conta|P02770|ALBU_RAT SERUM ALBUMIN PRECURSOR." + // id="P1.2" /> + _output_stream->writeEndElement(); } } - //</protein_list> - _output_stream->writeEndElement(); + //</protein_list> + _output_stream->writeEndElement(); } diff --git a/src/output/masschroqml.h b/src/output/masschroqml.h index 47559be0..031695c3 100644 --- a/src/output/masschroqml.h +++ b/src/output/masschroqml.h @@ -34,6 +34,7 @@ #include <QFile> #include <QString> #include "../core/project.h" +#include "../grouping/groupinggroup.h" class MassChroQml { @@ -52,6 +53,7 @@ private : QXmlStreamWriter * _output_stream; ProjectSp _sp_project; IdentificationGroup * _p_identification_group; + std::set<GroupingGroupSp> _group_list; }; #endif // MASSCHROQML_H -- GitLab