diff --git a/src/output/proticdbml.cpp b/src/output/proticdbml.cpp index 371b400b55cc3f60d262b2bfa6449928e01ee553..22ebf1fb7ea356b0edfa51c7e1d64c9f7cd37391 100644 --- a/src/output/proticdbml.cpp +++ b/src/output/proticdbml.cpp @@ -392,10 +392,10 @@ void ProticdbMl::writeIdentificationRun(IdentificationGroup * p_identification) //} writeCvParam("PROTICdbO:0000325", QString("%1").arg(_sp_project.get()->getAutomaticFilterParameters().getFilterMinimumPeptidePerMatch()), - "X!TandemPipeline filter on minimal number of peptide per protein"); + "X!TandemPipeline filter on minimal number of peptide per protein"); writeCvParam("PROTICdbO:0000324", QString("%1").arg(_sp_project.get()->getAutomaticFilterParameters().getFilterPeptideEvalue()), - "X!TandemPipeline filter on peptide evalue"); + "X!TandemPipeline filter on peptide evalue"); // TODO write database filter // this.writeCvParam("PROTICdbO:0000324", "" @@ -410,173 +410,46 @@ void ProticdbMl::writeIdentificationRun(IdentificationGroup * p_identification) _output_stream->writeEndElement();// description } -private void writePeptideHit(Peptide pep, Group group, - PeptideMass peptideMass) throws XMLStreamException, MSMSException { - if (peptideMass == null) { - String message = "peptideMass == null"; - logger.error(message); - throw new MSMSException(message); - } - - String pep_hit_id = "pep" + pepSampleScanCle2xmlId.size(); - // param par default - writer.writeStartElement("peptideHit"); - writer.writeAttribute("calc_mr", "" + (pep.get_mhplus_theo() - 1.00794)); - double exp_mz = (pep.get_mhplus_obser() + (1.00794 * (pep.getCharge() - 1))) - / pep.getCharge(); - writer.writeAttribute("exp_mz", "" + exp_mz); - writer.writeAttribute("delta", "" + pep.get_deltamass()); - writer.writeAttribute("exp_mr", "" + (pep.get_mhplus_obser() - 1.00794)); - writer.writeAttribute("acq_number", "" + pep.get_scan()); - writer.writeAttribute("ms_run_id", - "" + msrun_to_id.get(pep.get_sample())); - writer.writeAttribute("id", pep_hit_id); - writer.writeAttribute("exp_z", "" + pep.getCharge()); - - this.writeCvParam("PROTICdbO:0000339", - Utils.getPappsoPeptideMassId(group, peptideMass), - "peptide mass id"); - - if (xtpExperimentType.equals("phospho")) { - this.writeCvParam("PROTICdbO:0000349", - Utils.getPappsoPhosphoPeptideMassId(group, peptideMass), - "phosphopeptide mass id"); +void ProticdbMl::writepeptideHits(IdentificationGroup * p_identification) { + _output_stream->writeStartElement("peptideHits"); + for (std::pair <unsigned int, GroupingGroupSp > group_pair : p_identification->getGroupStore().getGroupMap()) { + writepeptideHitsbyGroup(group_pair.second); } - - // pep.getMsRun(). - IdentificationDataSource idDataSource = pep - .getIdentificationDataSource(); - if (idDataSource.getClass().equals(IdentificationXtandemFile.class)) { - // cvparam specifique xtandem - - this.writeCvParam("PROTICdbO:0000287", "" + pep.get_evalue(), - "xtandem peptide evalue"); - - this.writeCvParam("PROTICdbO:0000288", "" + pep.get_hypercorr(), - "xtandem peptide hyperscore"); - } - - this.writeCvParam("PROTICdbO:0000289", - "" + pep.getRetentionTimeInMinutes(), - "xtandem peptide retention time"); - /* - * writer.writeStartElement("cvParam"); writer.writeAttribute("value", - * "" + pep.get_RT()); writer.writeAttribute("name", - * "xtandem peptide retention time"); writer.writeAttribute("cvLabel", - * "PROTICdbO"); writer.writeAttribute("accession", - * "PROTICdbO:0000289"); writer.writeEndElement();// cvParam - */ - - // Element cvParam4 = doc.createElement("cvParam"); - // cvParam4.setAttribute("value", "" + pep.get_post()); - // cvParam4.setAttribute("name", "xtandem peptide post"); - // cvParam4.setAttribute("cvLabel", "PROTICdbO"); - // cvParam4.setAttribute("accession", "PROTICdbO:00000XX"); - // peptideHit.appendChild(cvParam4); - // - // Element cvParam5 = doc.createElement("cvParam"); - // cvParam5.setAttribute("value", "" + pep.get_pre()); - // cvParam5.setAttribute("name", "xtandem peptide pre"); - // cvParam5.setAttribute("cvLabel", "PROTICdbO"); - // cvParam5.setAttribute("accession", "PROTICdbO:00000XX"); - // peptideHit.appendChild(cvParam5); - - // sequences avec les modifs - writer.writeStartElement("pepSeq"); - writer.writeStartElement("peptide"); - writer.writeCharacters("" + pep.getSequence()); - writer.writeEndElement();// peptide - if (pep.get_Modifs().size() != 0) { - this.writePtms(pep.get_Modifs()); - } - writer.writeEndElement();// pepSeq - - pepSampleScanCle2xmlId.put(pep.get_sample_scan_sequencepeptide_cle(), - pep_hit_id); - writer.writeEndElement();// peptideHit -} - -private void writePtms(ArrayList<Modifs> mods) throws XMLStreamException { - writer.writeStartElement("ptms"); - for (Modifs mod : mods) { - writer.writeStartElement("ptm"); - writer.writeAttribute("diff_mono", "" + mod.get_modvalue()); - writer.writeAttribute("position", "" + mod.get_posi()); - writer.writeAttribute("aa", "" + mod.get_AA()); - try { - this.writeOboModif(mod.getPsiMod()); - } catch (MSMSException e) { - String message = "error writing OBO modif : " + e.getMessage(); - logger.warn(message); - } - writer.writeEndElement();// ptm - - } - writer.writeEndElement();// ptms + _output_stream->writeEndElement();// "peptideHits"); } -void ProticdbMl::writeOboModif(pappso::AaModificationP mod) { - _output_stream->writeStartElement("cvParam"); - _output_stream->writeAttribute("name", mod->getName()); - _output_stream->writeAttribute("cvLabel", "MOD"); - _output_stream->writeAttribute("accession", mod->getAccession()); - _output_stream->writeEndElement();// cvParam +QString getProticPeptideHitKey(PeptideMatch * p_peptide_match) { + return (QString ("%1 %2 %3").arg(p_peptide_match->getHashSampleScan()).arg(p_peptide_match->getPeptideXtpSp().get()->toAbsoluteString()).arg(p_peptide_match->getIdentificationDataSource()->getResourceName())); } -private void writepeptideHits(Identification identification) -throws MSMSException { - try { - writer.writeStartElement("peptideHits"); - for (Group group : identification.getGrouping().getGroupList()) { - // generating peptide numbers - PeptideMassSet pepMassSet = new PeptideMassSet(); +struct ProticPeptideHit { + bool operator ==(const ProticPeptideHit &other) const { + return (key == other.key); + }; + /** key = getProticPeptideHitKey + * */ + QString key; + PeptideMatch * peptide_match; +}; - Hashtable<String, PeptideMass> pepSampleScanCle2peptideMass; - if (this.xtpExperimentType.equals("phospho")) { - pepSampleScanCle2peptideMass = pepMassSet - .numberingPhosphoGetSampleScanCle2peptideMass(group); +void ProticdbMl::writepeptideHitsbyGroup(GroupingGroup * p_group) { - } else { - pepSampleScanCle2peptideMass = pepMassSet - .numberingGetSampleScanCle2peptideMass(group); - } - - for (SubGroup sg : group.getSubGroupSet().getSubGroupList()) { - for (HashSampleScanSetProt hashProt : sg - .getHashSampleScanSetProtList()) { - - ArrayList<Peptide> peptideList = null; - if (this.xtpExperimentType.equals("phospho")) { - peptideList = ((Match) hashProt.getGrpProtMatch()) - .getValidPeptideInPhosphoIsland((HashSampleScanSetPhospho) hashProt); - } else { - peptideList = ((Match) hashProt.getGrpProtMatch()) - .get_valid_peptides(); - } - - for (Peptide pep : peptideList) { + std::vector <ProticPeptideHit> protic_peptide_hit_list; - if (pepSampleScanCle2xmlId.containsKey(pep - .get_sample_scan_sequencepeptide_cle()) == false) { - - writePeptideHit( - pep, - group, - pepSampleScanCle2peptideMass.get(pep - .get_sample_scan_sequencepeptide_cle())); - - } - } - } - } - } - writer.writeEndElement();// "peptideHits"); - } catch (Exception e) { - String message = "error writing peptideHits in ProticDbMl \n" - + e.getMessage(); - logger.error(message); - throw new MSMSException(message); + for (PeptideMatch* p_peptide_match : p_group->getPeptideMatchList()) { + protic_peptide_hit_list.push_back(ProticPeptideHit(getProticPeptideHitKey(p_peptide_match), p_peptide_match)); + } + std::sort(protic_peptide_hit_list.begin(),protic_peptide_hit_list.end(),[](const ProticPeptideHit & first, const ProticPeptideHit & second) { + return (first.key < second.key) ; + }); + auto last = std::unique(protic_peptide_hit_list.begin(), protic_peptide_hit_list.end()); + protic_peptide_hit_list.erase(last, protic_peptide_hit_list.end()); + + for (ProticPeptideHit & protic_peptide_hit :protic_peptide_hit_list) { + QString xml_id = QString("pep%1").arg(_peptidekey_to_id.size()+1); + writePeptideHit(xml_id, protic_peptide_hit); + _peptidekey_to_id.insert(std::pair<QString, QString>(protic_peptide_hit.key, xml_id)); } } @@ -748,6 +621,120 @@ throws MSMSException { } } +void ProticdbMl::writePeptideHit(Peptide pep, Group group, + PeptideMass peptideMass) throws XMLStreamException, MSMSException { + + if (peptideMass == null) { + String message = "peptideMass == null"; + logger.error(message); + throw new MSMSException(message); + } + + String pep_hit_id = "pep" + pepSampleScanCle2xmlId.size(); + // param par default + writer.writeStartElement("peptideHit"); + writer.writeAttribute("calc_mr", "" + (pep.get_mhplus_theo() - 1.00794)); + double exp_mz = (pep.get_mhplus_obser() + (1.00794 * (pep.getCharge() - 1))) + / pep.getCharge(); + writer.writeAttribute("exp_mz", "" + exp_mz); + writer.writeAttribute("delta", "" + pep.get_deltamass()); + writer.writeAttribute("exp_mr", "" + (pep.get_mhplus_obser() - 1.00794)); + writer.writeAttribute("acq_number", "" + pep.get_scan()); + writer.writeAttribute("ms_run_id", + "" + msrun_to_id.get(pep.get_sample())); + writer.writeAttribute("id", pep_hit_id); + writer.writeAttribute("exp_z", "" + pep.getCharge()); + + this.writeCvParam("PROTICdbO:0000339", + Utils.getPappsoPeptideMassId(group, peptideMass), + "peptide mass id"); + + if (xtpExperimentType.equals("phospho")) { + this.writeCvParam("PROTICdbO:0000349", + Utils.getPappsoPhosphoPeptideMassId(group, peptideMass), + "phosphopeptide mass id"); + } + + // pep.getMsRun(). + IdentificationDataSource idDataSource = pep + .getIdentificationDataSource(); + if (idDataSource.getClass().equals(IdentificationXtandemFile.class)) { + // cvparam specifique xtandem + + this.writeCvParam("PROTICdbO:0000287", "" + pep.get_evalue(), + "xtandem peptide evalue"); + + this.writeCvParam("PROTICdbO:0000288", "" + pep.get_hypercorr(), + "xtandem peptide hyperscore"); + } + + this.writeCvParam("PROTICdbO:0000289", + "" + pep.getRetentionTimeInMinutes(), + "xtandem peptide retention time"); + /* + * writer.writeStartElement("cvParam"); writer.writeAttribute("value", + * "" + pep.get_RT()); writer.writeAttribute("name", + * "xtandem peptide retention time"); writer.writeAttribute("cvLabel", + * "PROTICdbO"); writer.writeAttribute("accession", + * "PROTICdbO:0000289"); writer.writeEndElement();// cvParam + */ + + // Element cvParam4 = doc.createElement("cvParam"); + // cvParam4.setAttribute("value", "" + pep.get_post()); + // cvParam4.setAttribute("name", "xtandem peptide post"); + // cvParam4.setAttribute("cvLabel", "PROTICdbO"); + // cvParam4.setAttribute("accession", "PROTICdbO:00000XX"); + // peptideHit.appendChild(cvParam4); + // + // Element cvParam5 = doc.createElement("cvParam"); + // cvParam5.setAttribute("value", "" + pep.get_pre()); + // cvParam5.setAttribute("name", "xtandem peptide pre"); + // cvParam5.setAttribute("cvLabel", "PROTICdbO"); + // cvParam5.setAttribute("accession", "PROTICdbO:00000XX"); + // peptideHit.appendChild(cvParam5); + + // sequences avec les modifs + writer.writeStartElement("pepSeq"); + writer.writeStartElement("peptide"); + writer.writeCharacters("" + pep.getSequence()); + writer.writeEndElement();// peptide + if (pep.get_Modifs().size() != 0) { + this.writePtms(pep.get_Modifs()); + } + writer.writeEndElement();// pepSeq + + pepSampleScanCle2xmlId.put(pep.get_sample_scan_sequencepeptide_cle(), + pep_hit_id); + writer.writeEndElement();// peptideHit +} + +private void writePtms(ArrayList<Modifs> mods) throws XMLStreamException { + writer.writeStartElement("ptms"); + for (Modifs mod : mods) { + writer.writeStartElement("ptm"); + writer.writeAttribute("diff_mono", "" + mod.get_modvalue()); + writer.writeAttribute("position", "" + mod.get_posi()); + writer.writeAttribute("aa", "" + mod.get_AA()); + try { + this.writeOboModif(mod.getPsiMod()); + } catch (MSMSException e) { + String message = "error writing OBO modif : " + e.getMessage(); + logger.warn(message); + } + writer.writeEndElement();// ptm + + } + writer.writeEndElement();// ptms +} + +void ProticdbMl::writeOboModif(pappso::AaModificationP mod) { + _output_stream->writeStartElement("cvParam"); + _output_stream->writeAttribute("name", mod->getName()); + _output_stream->writeAttribute("cvLabel", "MOD"); + _output_stream->writeAttribute("accession", mod->getAccession()); + _output_stream->writeEndElement();// cvParam +} + void ProticdbMl::writeCvParam(QString acc, QString value, QString description) { _output_stream->writeStartElement("cvParam"); diff --git a/src/output/proticdbml.h b/src/output/proticdbml.h index d1c58b4293f9dc16fd6b817bc439eb89da8f6917..98f9ae9cefbdeb61feb787e14a0c6c006f3daf74 100644 --- a/src/output/proticdbml.h +++ b/src/output/proticdbml.h @@ -57,6 +57,8 @@ private : void writeMsRuns(); void writeIdentificationRun(IdentificationGroup * p_identification); void writeIdentificationDataSource(IdentificationDataSource * p_identification_data_source); + void writepeptideHits(IdentificationGroup * p_identification); + void writepeptideHitsbyGroup(GroupingGroup * p_group); private : QFile * _output_file; @@ -66,6 +68,7 @@ private : QTime _duracel; std::map<QString, QString> _map_accession2xmlid; std::map<QString, QString> _sample_to_id; + std::map<QString, QString> _peptidekey_to_id; }; #endif // PROTICDBML_H