From a71b121197035a5d1dda43453761b3dc9d97098c Mon Sep 17 00:00:00 2001 From: Olivier Langella <Olivier.Langella@moulon.inra.fr> Date: Wed, 17 May 2017 21:31:08 +0200 Subject: [PATCH] WIP: proticdbml output --- src/core/identificationgroup.cpp | 3 + src/core/identificationgroup.h | 1 + src/output/proticdbml.cpp | 361 ++++++++++++++++--------------- src/output/proticdbml.h | 1 + 4 files changed, 196 insertions(+), 170 deletions(-) diff --git a/src/core/identificationgroup.cpp b/src/core/identificationgroup.cpp index 78e09767a..1dbc4b558 100644 --- a/src/core/identificationgroup.cpp +++ b/src/core/identificationgroup.cpp @@ -166,6 +166,9 @@ void IdentificationGroup::addMsRunSp(MsRunSp ms_run_sp) { const std::vector<MsRunSp> & IdentificationGroup::getMsRunSpList() const { return _ms_run_list; } +const std::vector<IdentificationDataSource *> & IdentificationGroup::getIdentificationDataSourceList() const { + return _id_source_list; +} std::vector<ProteinMatch *> & IdentificationGroup::getProteinMatchList() { return _protein_match_list; } diff --git a/src/core/identificationgroup.h b/src/core/identificationgroup.h index 0d5de141e..1ed50884c 100644 --- a/src/core/identificationgroup.h +++ b/src/core/identificationgroup.h @@ -93,6 +93,7 @@ public: void startGrouping (const GroupingType & grouping_type); const std::vector<MsRunSp> & getMsRunSpList() const; + const std::vector<IdentificationDataSource *> & getIdentificationDataSourceList() const; /** @brief get tab name for qtabwidget * */ diff --git a/src/output/proticdbml.cpp b/src/output/proticdbml.cpp index 06ba2c163..371b400b5 100644 --- a/src/output/proticdbml.cpp +++ b/src/output/proticdbml.cpp @@ -33,6 +33,7 @@ #include <pappsomspp/grouping/grpprotein.h> #include <pappsomspp/amino_acid/Aa.h> #include "../core/msrun.h" +#include "../config.h" ProticdbMl::ProticdbMl(const QString & out_filename) { @@ -149,7 +150,7 @@ void ProticdbMl::writeSequence(ProteinMatch * p_protein_match) { } void ProticdbMl::writeProject() { -qDebug() << "ProticdbMl::writeProject begin"; + qDebug() << "ProticdbMl::writeProject begin"; _output_stream->writeStartElement("project"); _output_stream->writeAttribute("name", ""); _output_stream->writeAttribute("id", "p1"); @@ -178,101 +179,66 @@ qDebug() << "ProticdbMl::writeProject begin"; _output_stream->writeEndDocument(); -qDebug() << "ProticdbMl::writeProject end duration = " << duracel << "ms"; + qDebug() << "ProticdbMl::writeProject end duration = " << duracel << "ms"; } void ProticdbMl::writeIdentMethod() { - // Ajout des méthodes - _output_stream->writeStartElement("identMeth"); - _output_stream->writeAttribute("name", ""); - _output_stream->writeAttribute("id", "m1"); - _output_stream->writeEndElement(); + // Ajout des méthodes + _output_stream->writeStartElement("identMeth"); + _output_stream->writeAttribute("name", ""); + _output_stream->writeAttribute("id", "m1"); + _output_stream->writeEndElement(); - // Ajout des base de données - _output_stream->writeStartElement("customDb"); - _output_stream->writeAttribute("name", ""); - _output_stream->writeAttribute("id", "customdb0"); - _output_stream->writeEndElement(); + // Ajout des base de données + _output_stream->writeStartElement("customDb"); + _output_stream->writeAttribute("name", ""); + _output_stream->writeAttribute("id", "customdb0"); + _output_stream->writeEndElement(); } void ProticdbMl::writeSamples() { - _output_stream->writeStartElement("samples"); - for (std::vector<MsRunSp> msrun_sp : _sp_project.get()->getMsRunStore().getMsRunList()) { - _output_stream->writeStartElement("sample"); - QString id_samp = "samp" + pappso::Utils::getLexicalOrderedString(sample_to_id.size()); - QString id_msrun = msrun_sp.get()->getXmlId(); - QString name = msrun_sp.get()->getSampleName(); - // balise sample - _output_stream->writeAttribute("name", name); - _output_stream->writeAttribute("id", id_samp); - _output_stream->writeEmptyElement("description"); - _sample_to_id.insert(std::pair<QString, QString>( name, id_samp)); - - // Element spectrumList = doc.createElement("spectrumList"); - // msRun.appendChild(spectrumList); - - //msrun_to_id.put(samp.getSampleName(), id_msrun); - _output_stream->writeEndElement();// sample - - } - _output_stream->writeEndElement();// samples + _output_stream->writeStartElement("samples"); + for (std::vector<MsRunSp> msrun_sp : _sp_project.get()->getMsRunStore().getMsRunList()) { + _output_stream->writeStartElement("sample"); + QString id_samp = "samp" + pappso::Utils::getLexicalOrderedString(sample_to_id.size()); + QString id_msrun = msrun_sp.get()->getXmlId(); + QString name = msrun_sp.get()->getSampleName(); + // balise sample + _output_stream->writeAttribute("name", name); + _output_stream->writeAttribute("id", id_samp); + _output_stream->writeEmptyElement("description"); + _sample_to_id.insert(std::pair<QString, QString>( name, id_samp)); + + // Element spectrumList = doc.createElement("spectrumList"); + // msRun.appendChild(spectrumList); + + //msrun_to_id.put(samp.getSampleName(), id_msrun); + _output_stream->writeEndElement();// sample + + } + _output_stream->writeEndElement();// samples } void ProticdbMl::writeMsRuns() { - - _output_stream->writeStartElement("msRuns"); - for (std::vector<MsRunSp> msrun_sp : _sp_project.get()->getMsRunStore().getMsRunList()) { - - QString name = msrun_sp.get()->getSampleName(); - - QString id_samp = _sample_to_id.at(name); - - // Element spectrumList = doc.createElement("spectrumList"); - // msRun.appendChild(spectrumList); - - QString id_msrun = msrun_sp.get()->getXmlId(); - - // balise MsRun - _output_stream->writeStartElement("msRun"); - _output_stream->writeAttribute("sample_id", id_samp); - _output_stream->writeAttribute("id", id_msrun); - - _output_stream->writeStartElement("description"); - _output_stream->writeStartElement("admin"); - // writer.writeStartElement("contact"); - // writer.writeStartElement("email"); - // writer.writeCharacters("valot@moulon.inra.fr"); - // writer.writeEndElement();// email - - // writer.writeStartElement("name"); - // writer.writeCharacters("Valot Benoit"); - // writer.writeEndElement();// name - // writer.writeEndElement();// contact - - _output_stream->writeStartElement("sourceFile"); - _output_stream->writeStartElement("nameOfFile"); - _output_stream->writeCharacters(name + ".RAW"); - _output_stream->writeEndElement();// nameOfFile - _output_stream->writeEndElement();// sourceFile - - _output_stream->writeStartElement("sampleName"); - _output_stream->writeCharacters(name); - _output_stream->writeEndElement();// sampleName - _output_stream->writeEndElement();// admin - _output_stream->writeEndElement();// description - - _output_stream->writeEndElement();// msRun - - } - _output_stream->writeEndElement();// msRuns -} + _output_stream->writeStartElement("msRuns"); + for (std::vector<MsRunSp> msrun_sp : _sp_project.get()->getMsRunStore().getMsRunList()) { -void ProticdbMl::writeIdentificationRun(IdentificationGroup * p_identification) { - _output_stream->writeAttribute("ident_method_id", "m1"); - _output_stream->writeAttribute("customdb_id", "customdb0"); + QString name = msrun_sp.get()->getSampleName(); + + QString id_samp = _sample_to_id.at(name); + + // Element spectrumList = doc.createElement("spectrumList"); + // msRun.appendChild(spectrumList); + + QString id_msrun = msrun_sp.get()->getXmlId(); + + // balise MsRun + _output_stream->writeStartElement("msRun"); + _output_stream->writeAttribute("sample_id", id_samp); + _output_stream->writeAttribute("id", id_msrun); _output_stream->writeStartElement("description"); _output_stream->writeStartElement("admin"); @@ -285,108 +251,163 @@ void ProticdbMl::writeIdentificationRun(IdentificationGroup * p_identification) // writer.writeCharacters("Valot Benoit"); // writer.writeEndElement();// name // writer.writeEndElement();// contact - /* - this.xpipFile = XtandemPipelineSession.getInstance() - .getCurrentXpipFile(); _output_stream->writeStartElement("sourceFile"); _output_stream->writeStartElement("nameOfFile"); - // TODO - // get the loaded xpip file name - _output_stream->writeCharacters(this.xpipFile.getName()); + _output_stream->writeCharacters(name + ".RAW"); _output_stream->writeEndElement();// nameOfFile - _output_stream->writeStartElement("pathToFile"); - _output_stream->writeCharacters(this.xpipFile.getAbsolutePath()); - _output_stream->writeEndElement();// pathToFile - _output_stream->writeStartElement("fileType"); - _output_stream->writeCharacters("XPIP file"); - _output_stream->writeEndElement();// fileType _output_stream->writeEndElement();// sourceFile -*/ + + _output_stream->writeStartElement("sampleName"); + _output_stream->writeCharacters(name); + _output_stream->writeEndElement();// sampleName _output_stream->writeEndElement();// admin + _output_stream->writeEndElement();// description - // if we can retrieve original informations in xml xtandem results - HashSet<IdentificationXtandemFile> xtSourceFiles = Utils - .getXtandemFileList(identification); - if (xtSourceFiles != null) { - // - for (IdentificationXtandemFile xtFile : xtSourceFiles) { - xtFile.writeProticDbMlDataProcessing(this.writer); - } - } else { - writer.writeStartElement("dataProcessing"); - writer.writeStartElement("software"); - writer.writeStartElement("name"); - writer.writeAttribute("acc", "PROTICdbO:0000283"); - writer.writeCharacters("X!Tandem"); - writer.writeEndElement();// name - writer.writeStartElement("version"); - writer.writeCharacters(XtandemPipelineSession.getInstance() - .getConfig().getXtandemVersion()); - writer.writeEndElement();// version - writer.writeEndElement();// software - writer.writeStartElement("processingMethod"); - writer.writeEndElement();// processingMethod - writer.writeEndElement();// dataProcessing - } + _output_stream->writeEndElement();// msRun - // id: PROTICdbO:0000316 - // name: X!TandemPipeline - - writer.writeStartElement("dataProcessing"); - writer.writeStartElement("software"); - writer.writeStartElement("name"); - writer.writeAttribute("acc", "PROTICdbO:0000316"); - writer.writeCharacters("X!TandemPipeline"); - writer.writeEndElement();// name - writer.writeStartElement("version"); - writer.writeCharacters(XtandemPipelineMain.version); - writer.writeEndElement();// version - writer.writeEndElement();// software - writer.writeStartElement("processingMethod"); - // cvParams - - writeCvParam("PROTICdbO:0000323", "" - + XtandemPipelineSession.getInstance().getConfig() - .get_protein_evalue(), - "X!TandemPipeline filter on protein evalue (log)"); - - // indi - // combine - // phospho - xtpExperimentType = XtandemPipelineSession.getInstance() - .getDataTypeBase().getType(); - if (xtpExperimentType.equals("indiv")) { - this.writeCvParam("PROTICdbO:0000319", xtpExperimentType, ""); - } - if (xtpExperimentType.equals("combi")) { - this.writeCvParam("PROTICdbO:0000320", xtpExperimentType, ""); - } - if (xtpExperimentType.equals("phospho")) { - this.writeCvParam("PROTICdbO:0000321", xtpExperimentType, ""); - } + } + _output_stream->writeEndElement();// msRuns +} + +void ProticdbMl::writeIdentificationDataSource(IdentificationDataSource * p_identification_data_source) { + + + _output_stream->writeStartElement("dataProcessing"); + _output_stream->writeStartElement("software"); + _output_stream->writeStartElement("name"); + if (p_identification_data_source->getIdentificationEngine() == IdentificationEngine::XTandem) { + _output_stream->writeAttribute("acc", "PROTICdbO:0000283"); + } + _output_stream->writeCharacters(p_identification_data_source->getIdentificationEngineName()); + + _output_stream->writeEndElement();// name + _output_stream->writeStartElement("version"); + _output_stream->writeCharacters(p_identification_data_source->getIdentificationEngineVersion()); + _output_stream->writeEndElement();// version + _output_stream->writeEndElement();// software + _output_stream->writeStartElement("processingMethod"); + // mzXml source file name + writeCvParam("PROTICdbO:0000343",p_identification_data_source->getMsRunSp().get()->getFilename(), + "MS/MS data source file name"); + if (p_identification_data_source->getIdentificationEngine() == IdentificationEngine::XTandem) { + // model file name + writeCvParam("PROTICdbO:0000342", p_identification_data_source->getIdentificationEngineParam(IdentificationEngineParam::tandem_param).toString(), + "X!tandem xml model file name"); + // xtandem result file name + writeCvParam("PROTICdbO:0000341", p_identification_data_source->getResourceName(), + "X!tandem xml result file name"); + } + _output_stream->writeEndElement();// processingMethod + _output_stream->writeEndElement();// dataProcessing + /* + _output_stream->writeStartElement("dataProcessing"); + _output_stream->writeStartElement("software"); + _output_stream->writeStartElement("name"); + _output_stream->writeAttribute("acc", "PROTICdbO:0000283"); + _output_stream->writeCharacters("X!Tandem"); + _output_stream->writeEndElement();// name + _output_stream->writeStartElement("version"); + _output_stream->writeCharacters(XtandemPipelineSession.getInstance() + .getConfig().getXtandemVersion()); + _output_stream->writeEndElement();// version + _output_stream->writeEndElement();// software + _output_stream->writeStartElement("processingMethod"); + _output_stream->writeEndElement();// processingMethod + _output_stream->writeEndElement();// dataProcessing + */ +} + +void ProticdbMl::writeIdentificationRun(IdentificationGroup * p_identification) { + _output_stream->writeAttribute("ident_method_id", "m1"); + _output_stream->writeAttribute("customdb_id", "customdb0"); + + _output_stream->writeStartElement("description"); + _output_stream->writeStartElement("admin"); + // writer.writeStartElement("contact"); + // writer.writeStartElement("email"); + // writer.writeCharacters("valot@moulon.inra.fr"); + // writer.writeEndElement();// email + + // writer.writeStartElement("name"); + // writer.writeCharacters("Valot Benoit"); + // writer.writeEndElement();// name + // writer.writeEndElement();// contact + /* + this.xpipFile = XtandemPipelineSession.getInstance() + .getCurrentXpipFile(); + + _output_stream->writeStartElement("sourceFile"); + _output_stream->writeStartElement("nameOfFile"); + // TODO + // get the loaded xpip file name + _output_stream->writeCharacters(this.xpipFile.getName()); + _output_stream->writeEndElement();// nameOfFile + _output_stream->writeStartElement("pathToFile"); + _output_stream->writeCharacters(this.xpipFile.getAbsolutePath()); + _output_stream->writeEndElement();// pathToFile + _output_stream->writeStartElement("fileType"); + _output_stream->writeCharacters("XPIP file"); + _output_stream->writeEndElement();// fileType + _output_stream->writeEndElement();// sourceFile + */ + _output_stream->writeEndElement();// admin + + // if we can retrieve original informations in xml xtandem results + // + for (IdentificationDataSource * p_identification_data_source : p_identification->getIdentificationDataSourceList()) { + writeIdentificationDataSource(p_identification_data_source); + } + +// id: PROTICdbO:0000316 +// name: X!TandemPipeline + + _output_stream->writeStartElement("dataProcessing"); + _output_stream->writeStartElement("software"); + _output_stream->writeStartElement("name"); + _output_stream->writeAttribute("acc", "PROTICdbO:0000316"); + _output_stream->writeCharacters("X!TandemPipeline"); + _output_stream->writeEndElement();// name + _output_stream->writeStartElement("version"); + _output_stream->writeCharacters(XTPCPP_VERSION); + _output_stream->writeEndElement();// version + _output_stream->writeEndElement();// software + _output_stream->writeStartElement("processingMethod"); +// cvParams + + writeCvParam("PROTICdbO:0000323", QString("%1").arg(_sp_project.get()->getAutomaticFilterParameters().getFilterProteinEvalue()), + "X!TandemPipeline filter on protein evalue (log)"); + +// indi +// combine +// phospho + if (_sp_project.get()->isCombineMode()) { + writeCvParam("PROTICdbO:0000320", "combine", ""); + } + else { + writeCvParam("PROTICdbO:0000319", "indiv", ""); + } + //if (xtpExperimentType.equals("phospho")) { + // this.writeCvParam("PROTICdbO:0000321", xtpExperimentType, ""); + //} - this.writeCvParam("PROTICdbO:0000325", "" - + XtandemPipelineSession.getInstance().getConfig() - .get_peptide_number(), - "X!TandemPipeline filter on minimal number of peptide per protein"); + writeCvParam("PROTICdbO:0000325", QString("%1").arg(_sp_project.get()->getAutomaticFilterParameters().getFilterMinimumPeptidePerMatch()), + "X!TandemPipeline filter on minimal number of peptide per protein"); - this.writeCvParam("PROTICdbO:0000324", "" - + XtandemPipelineSession.getInstance().getConfig() - .get_peptide_evalue(), - "X!TandemPipeline filter on peptide evalue"); + writeCvParam("PROTICdbO:0000324", QString("%1").arg(_sp_project.get()->getAutomaticFilterParameters().getFilterPeptideEvalue()), + "X!TandemPipeline filter on peptide evalue"); - // TODO write database filter - // this.writeCvParam("PROTICdbO:0000324", "" - // + - // XtandemPipelineSession.getInstance().getConfig().get_database_filter(), - // "X!TandemPipeline filter on peptide evalue"); +// TODO write database filter +// this.writeCvParam("PROTICdbO:0000324", "" +// + +// XtandemPipelineSession.getInstance().getConfig().get_database_filter(), +// "X!TandemPipeline filter on peptide evalue"); - writer.writeEndElement();// processingMethod + _output_stream->writeEndElement();// processingMethod - writer.writeEndElement();// dataProcessing + _output_stream->writeEndElement();// dataProcessing - writer.writeEndElement();// description + _output_stream->writeEndElement();// description } private void writePeptideHit(Peptide pep, Group group, diff --git a/src/output/proticdbml.h b/src/output/proticdbml.h index b3364df62..d1c58b429 100644 --- a/src/output/proticdbml.h +++ b/src/output/proticdbml.h @@ -56,6 +56,7 @@ private : void writeSamples(); void writeMsRuns(); void writeIdentificationRun(IdentificationGroup * p_identification); + void writeIdentificationDataSource(IdentificationDataSource * p_identification_data_source); private : QFile * _output_file; -- GitLab