From 4fe5dbdbc725448b63a3eea7135a3d7d230cb3c5 Mon Sep 17 00:00:00 2001 From: Olivier Langella <Olivier.Langella@moulon.inra.fr> Date: Tue, 16 May 2017 16:27:01 +0200 Subject: [PATCH] parsing accession to find dbxref list --- src/core/proteinxtp.cpp | 77 +- src/core/proteinxtp.h | 9 + src/output/proticdbml.cpp | 1370 ++++++++++++++++++------------------ src/output/proticdbml.h | 2 + src/utils/proteinstore.cpp | 1 + src/utils/types.h | 11 + src/utils/utils.cpp | 26 +- src/utils/utils.h | 2 + 8 files changed, 787 insertions(+), 711 deletions(-) diff --git a/src/core/proteinxtp.cpp b/src/core/proteinxtp.cpp index 3f468ea4e..ef3bba3f8 100644 --- a/src/core/proteinxtp.cpp +++ b/src/core/proteinxtp.cpp @@ -76,9 +76,9 @@ QString ProteinXtp::getOnlyAminoAcidSequence() const { pappso::pappso_double ProteinXtp::getMass() const { try { - pappso::Peptide peptide(getOnlyAminoAcidSequence().replace("X","")); - return peptide.getMass(); - + pappso::Peptide peptide(getOnlyAminoAcidSequence().replace("X","")); + return peptide.getMass(); + } catch (pappso::PappsoException error) { throw pappso::PappsoException(QObject::tr("Error computing mass for protein %1 :\n%2").arg(getAccession()).arg(error.qwhat())); @@ -97,27 +97,64 @@ public: unsigned int ProteinXtp::countTrypticPeptidesForPAI() const { qDebug() << "ProteinXtp::countTrypticPeptidesForPAI begin"; try { - pappso::Enzyme kinase; - kinase.setMiscleavage(0); - DigestionHandler digestion; - - pappso::ProteinSp protein = std::make_shared<const pappso::Protein>(this->getDescription(),this->getOnlyAminoAcidSequence().replace("X","")); - kinase.eat(0,protein,false,digestion); - - unsigned int count = 0; - for (const QString & peptide_str: digestion._peptide_list) { - pappso::Peptide peptide(peptide_str); - pappso::mz mass= peptide.getMass(); - if ((mass > 800) && (mass < 2500)) { - count ++; + pappso::Enzyme kinase; + kinase.setMiscleavage(0); + DigestionHandler digestion; + + pappso::ProteinSp protein = std::make_shared<const pappso::Protein>(this->getDescription(),this->getOnlyAminoAcidSequence().replace("X","")); + kinase.eat(0,protein,false,digestion); + + unsigned int count = 0; + for (const QString & peptide_str: digestion._peptide_list) { + pappso::Peptide peptide(peptide_str); + pappso::mz mass= peptide.getMass(); + if ((mass > 800) && (mass < 2500)) { + count ++; + } } - } - qDebug() << "ProteinXtp::countTrypticPeptidesForPAI end"; - return count; - + qDebug() << "ProteinXtp::countTrypticPeptidesForPAI end"; + return count; + } catch (pappso::PappsoException error) { throw pappso::PappsoException(QObject::tr("Error in countTrypticPeptidesForPAI for protein %1 :\n%2").arg(getAccession()).arg(error.qwhat())); } } + + +void ProteinXtp::parseAccession2dbxref() { + QStringList access_list = getAccession().split("\\|"); + // if (access.length == 1) + QRegExp atg("^[Aa][Tt][MmCc1-5][Gg]\\d{5}\\.?\\d?$"); + QRegExp ncbi_gi("^[0-9]{5,8}$"); + QRegExp swiss_prot("^P[A-Z0-9]{5}$"); + QRegExp trembl("^[QOA][A-Z0-9]{5}$"); + QRegExp ref ("^[NZ]P\\_[0-9]{5,8}$"); + + for (QString & accession :access_list) { + if (atg.indexIn(accession, 0) != -1) { + QStringList temp = accession.split("\\."); + _dbxref_list.push_back(std::make_pair(ExternalDatabase::AGI_LocusCode, temp.at(0))); + } + if (ncbi_gi.indexIn(accession, 0) != -1) { + _dbxref_list.push_back(std::make_pair(ExternalDatabase::NCBI_gi, accession)); + } + + if (swiss_prot.indexIn(accession, 0) != -1) { + _dbxref_list.push_back(std::make_pair(ExternalDatabase::SwissProt, accession)); + } + + if (trembl.indexIn(accession, 0) != -1) { + _dbxref_list.push_back(std::make_pair(ExternalDatabase::TrEMBL, accession)); + } + if (ref.indexIn(accession, 0) != -1) { + _dbxref_list.push_back(std::make_pair(ExternalDatabase::ref, accession)); + } + } + +} + +const std::list<std::pair<ExternalDatabase, QString>> & ProteinXtp::getDbxrefList() const { + return _dbxref_list; +} diff --git a/src/core/proteinxtp.h b/src/core/proteinxtp.h index 9356fdc68..947ebb593 100644 --- a/src/core/proteinxtp.h +++ b/src/core/proteinxtp.h @@ -23,7 +23,9 @@ #include <pappsomspp/protein/protein.h> #include <pappsomspp/types.h> +#include <list> #include "sequencedatabase.h" +#include "../utils/types.h" #ifndef PROTEIN_XTP_H #define PROTEIN_XTP_H @@ -61,11 +63,18 @@ public: /** @brief get protein mass in dalton */ pappso::pappso_double getMass() const; + + /** @brief look for external database accessions in accesion text + */ + void parseAccession2dbxref(); + + const std::list<std::pair<ExternalDatabase, QString>> & getDbxrefList() const; private: SequenceDatabase * _p_sequence_database; bool _is_decoy=false; bool _is_contaminant=false; + std::list<std::pair<ExternalDatabase, QString>> _dbxref_list; }; #endif // PROTEIN_XTP_H diff --git a/src/output/proticdbml.cpp b/src/output/proticdbml.cpp index ff313956a..1ace5b09f 100644 --- a/src/output/proticdbml.cpp +++ b/src/output/proticdbml.cpp @@ -77,702 +77,692 @@ void ProticdbMl::write(ProjectSp sp_project) { if (_sp_project.get() == nullptr) { throw pappso::PappsoException(QObject::tr("Error writing PROTICdbML file :\n project is empty")); } - - + + _output_stream->writeNamespace("http://www.w3.org/2001/XMLSchema-instance","xsi"); - // writer.setDefaultNamespace(namespaceURI); - _output_stream->writeStartElement("PROTICdb"); - _output_stream->writeAttribute("xmlns","http://pappso.inra.fr/xsd/masschroqml/2.2"); - _output_stream->writeAttribute("http://www.w3.org/2001/XMLSchema-instance","schemaLocation","http://pappso.inra.fr/xsd/masschroqml/2.2 http://pappso.inra.fr/xsd/masschroq-2.2.xsd"); - + // writer.setDefaultNamespace(namespaceURI); + _output_stream->writeStartElement("PROTICdb"); + //_output_stream->writeAttribute("xmlns","http://pappso.inra.fr/xsd/masschroqml/2.2"); + //_output_stream->writeAttribute("http://www.w3.org/2001/XMLSchema-instance","schemaLocation","http://pappso.inra.fr/xsd/masschroqml/2.2 http://pappso.inra.fr/xsd/masschroq-2.2.xsd"); + + + //writer.writeAttribute(xmlnsxsi, "noNamespaceSchemaLocation", xsischemaLocation); - //writer.writeAttribute(xmlnsxsi, "noNamespaceSchemaLocation", xsischemaLocation); + _output_stream->writeAttribute("version", "1.0"); - _output_stream->writeAttribute("version", "1.0"); + _output_stream->writeAttribute("type", "MSidentificationResults"); - _output_stream->writeAttribute("type", "MSidentificationResults"); + writeIdentMethod(); + _output_stream->writeStartElement("sequences"); + for (IdentificationGroup * p_ident_group : sp_project.get()->getIdentificationGroupList()) { - //writeIdentMethod(); - _output_stream->writeStartElement("sequences"); -for (IdentificationGroup * p_ident_group : sp_project.get()->getIdentificationGroupList()) { - - for (ProteinMatch * p_protein_match: p_ident_group->getProteinMatchList()) { - writeSequence(p_protein_match); + for (ProteinMatch * p_protein_match: p_ident_group->getProteinMatchList()) { + writeSequence(p_protein_match); + } } + //</sequences> + _output_stream->writeEndElement(); + + writeProject(); + } - //</sequences> + +void ProticdbMl::writeSequence(ProteinMatch * p_protein_match) { + qDebug() << "ProticdbMl::writeSequence begin"; + if (p_protein_match->getValidationState() != ValidationState::grouped) return; + pappso::GrpProtein * p_grp_protein = p_protein_match->getGrpProteinSp().get(); + + if (_map_accession2xmlid.find(p_grp_protein->getAccession()) == _map_accession2xmlid.end()) { + //not found + + QString id = QString("seq%1").arg(_map_accession2xmlid.size()); + _output_stream->writeStartElement("sequence"); + _output_stream->writeAttribute("id", id); + QString display_id = QString("%1 %2").arg(p_grp_protein->getAccession()).arg(p_protein_match->getProteinXtpSp().get()->getDescription()); + _output_stream->writeAttribute("display_id", display_id.left(60)); + // <dbxref key="AT5G16390" + // dbname="AGI_LocusCode"></dbxref> + //if (prot.get_dbxref_type().equals("no") == false) { + for (const std::pair<ExternalDatabase, QString> & dbxref :p_protein_match->getProteinXtpSp().get()->getDbxrefList()) { + //if (false) { + _output_stream->writeStartElement("dbxref"); + _output_stream->writeAttribute("dbname", + Utils::getDatabaseName (dbxref.first)); + _output_stream->writeAttribute("key", + dbxref.second); + _output_stream->writeEndElement();// dbxref + } + _output_stream->writeStartElement("description"); + _output_stream->writeCharacters(p_protein_match->getProteinXtpSp().get()->getDescription()); + _output_stream->writeEndElement();// description + _output_stream->writeStartElement("seq"); + _output_stream->writeCharacters(p_protein_match->getProteinXtpSp().get()->getSequence()); + _output_stream->writeEndElement();// seq + + _output_stream->writeEndElement();// sequence + + _map_accession2xmlid.insert(std::pair<QString, QString>(p_grp_protein->getAccession(), id)); + + } + qDebug() << "ProticdbMl::writeSequence end"; +} + +void ProticdbMl::writeProject() { +qDebug() << "ProticdbMl::writeProject begin"; + _output_stream->writeStartElement("project"); + _output_stream->writeAttribute("name", ""); + _output_stream->writeAttribute("id", "p1"); + // ajout des echantillons_msrun + writeSamples(); + writeMsRuns(); + + _output_stream->writeStartElement("identificationRuns"); + + for (IdentificationGroup * p_identification : _sp_project.get()->getIdentificationGroupList()) { + _output_stream->writeStartElement("identificationRun"); + // ajout d'une mnip d'identification + writeIdentificationRun(p_identification); + + // ajout des petides + writepeptideHits(p_identification); + + // ajout des mathces + writeMatchs(p_identification); + + _output_stream->writeEndElement();// identificationRun + } + _output_stream->writeEndElement();// identificationRuns + _output_stream->writeEndElement();// project + + _output_stream->writeEndDocument(); + + +qDebug() << "ProticdbMl::writeProject end duration = " << duracel << "ms"; +} + +void ProticdbMl::writeIdentMethod() { + // Ajout des méthodes + _output_stream->writeStartElement("identMeth"); + _output_stream->writeAttribute("name", ""); + _output_stream->writeAttribute("id", "m1"); + _output_stream->writeEndElement(); + + // Ajout des base de données + _output_stream->writeStartElement("customDb"); + _output_stream->writeAttribute("name", ""); + _output_stream->writeAttribute("id", "customdb0"); _output_stream->writeEndElement(); - } - - void ProticdbMl::writeSequence(ProteinMatch * p_protein_match) { - qDebug() << "ProticdbMl::writeSequence begin"; - if (p_protein_match->getValidationState() != ValidationState::grouped) return; - pappso::GrpProtein * p_grp_protein = p_protein_match->getGrpProteinSp().get(); - - if (_map_accession2xmlid.find(p_grp_protein->getAccession()) == _map_accession2xmlid.end()) { - //not found - - QString id = QString("seq%1").arg(_map_accession2xmlid.size()); - _output_stream->writeStartElement("sequence"); - _output_stream->writeAttribute("id", id); - QString display_id = QString("%1 %2").arg(p_grp_protein->getAccession()).arg(p_protein_match->getProteinXtpSp().get()->getDescription()); - if (display_id.size() > 60) { - _output_stream->writeAttribute("display_id", display_id.left(60)); - } else { - _output_stream->writeAttribute("display_id", display_id); - } - // <dbxref key="AT5G16390" - // dbname="AGI_LocusCode"></dbxref> - //if (prot.get_dbxref_type().equals("no") == false) { - if (false) { - _output_stream->writeStartElement("dbxref"); - _output_stream->writeAttribute("key", - prot.get_dbxref_accession()); - _output_stream->writeAttribute("dbname", - prot.get_dbxref_type()); - _output_stream->writeEndElement();// dbxref - } - _output_stream->writeStartElement("description"); - _output_stream->writeCharacters(p_protein_match->getProteinXtpSp().get()->getDescription()); - _output_stream->writeEndElement();// description - _output_stream->writeStartElement("seq"); - _output_stream->writeCharacters(p_protein_match->getProteinXtpSp().get()->getSequence()); - _output_stream->writeEndElement();// seq - - _output_stream->writeEndElement();// sequence - - _map_accession2xmlid.insert(std::pair<QString, QString>(p_grp_protein->getAccession(), id)); - + + +} + +private void writeSamples() throws Exception { + try { + writer.writeStartElement("samples"); + + for (Identification ident : arrayIdentifications) { + for (MsRun samp : ident.getMsRunSet()) { + writer.writeStartElement("sample"); + String id_samp = "samp" + sample_to_id.size(); + String id_msrun = "msr" + msrun_to_id.size(); + String name = samp.getSampleName(); + name = name.replaceAll(".xml", ""); + // balise sample + writer.writeAttribute("name", name); + writer.writeAttribute("id", id_samp); + writer.writeEmptyElement("description"); + sample_to_id.put(samp.getSampleName(), id_samp); + + // Element spectrumList = doc.createElement("spectrumList"); + // msRun.appendChild(spectrumList); + + msrun_to_id.put(samp.getSampleName(), id_msrun); + writer.writeEndElement();// sample + } + } + writer.writeEndElement();// samples + } catch (Exception e) { + String message = "error writing samples in ProticDbMl \n" + + e.getMessage(); + logger.error(message); + throw new Exception(message); + } +} + +private void writeMsRuns() throws Exception { + try { + writer.writeStartElement("msRuns"); + + for (Identification ident : arrayIdentifications) { + for (MsRun samp : ident.getMsRunSet()) { + + String name = samp.getSampleName(); + name = name.replaceAll(".xml", ""); + + String id_samp = sample_to_id.get(samp.getSampleName()); + + // Element spectrumList = doc.createElement("spectrumList"); + // msRun.appendChild(spectrumList); + + String id_msrun = msrun_to_id.get(samp.getSampleName()); + + // balise MsRun + writer.writeStartElement("msRun"); + writer.writeAttribute("sample_id", id_samp); + writer.writeAttribute("id", id_msrun); + + writer.writeStartElement("description"); + writer.writeStartElement("admin"); + // writer.writeStartElement("contact"); + // writer.writeStartElement("email"); + // writer.writeCharacters("valot@moulon.inra.fr"); + // writer.writeEndElement();// email + + // writer.writeStartElement("name"); + // writer.writeCharacters("Valot Benoit"); + // writer.writeEndElement();// name + // writer.writeEndElement();// contact + + writer.writeStartElement("sourceFile"); + writer.writeStartElement("nameOfFile"); + writer.writeCharacters(name + ".RAW"); + writer.writeEndElement();// nameOfFile + writer.writeEndElement();// sourceFile + + writer.writeStartElement("sampleName"); + writer.writeCharacters(name); + writer.writeEndElement();// sampleName + writer.writeEndElement();// admin + writer.writeEndElement();// description + + writer.writeEndElement();// msRun + } + } + writer.writeEndElement();// msRuns + } catch (Exception e) { + String message = "error writing MS runs in ProticDbMl \n" + + e.getMessage(); + logger.error(message); + throw new Exception(message); + } +} + + +private void writeIdentificationRun(Identification identification) +throws Exception { + try { + writer.writeAttribute("ident_method_id", "m1"); + writer.writeAttribute("customdb_id", "customdb0"); + + writer.writeStartElement("description"); + writer.writeStartElement("admin"); + // writer.writeStartElement("contact"); + // writer.writeStartElement("email"); + // writer.writeCharacters("valot@moulon.inra.fr"); + // writer.writeEndElement();// email + + // writer.writeStartElement("name"); + // writer.writeCharacters("Valot Benoit"); + // writer.writeEndElement();// name + // writer.writeEndElement();// contact + this.xpipFile = XtandemPipelineSession.getInstance() + .getCurrentXpipFile(); + + writer.writeStartElement("sourceFile"); + writer.writeStartElement("nameOfFile"); + // TODO + // get the loaded xpip file name + writer.writeCharacters(this.xpipFile.getName()); + writer.writeEndElement();// nameOfFile + writer.writeStartElement("pathToFile"); + writer.writeCharacters(this.xpipFile.getAbsolutePath()); + writer.writeEndElement();// pathToFile + writer.writeStartElement("fileType"); + writer.writeCharacters("XPIP file"); + writer.writeEndElement();// fileType + writer.writeEndElement();// sourceFile + + writer.writeEndElement();// admin + + // if we can retrieve original informations in xml xtandem results + HashSet<IdentificationXtandemFile> xtSourceFiles = Utils + .getXtandemFileList(identification); + if (xtSourceFiles != null) { + // + for (IdentificationXtandemFile xtFile : xtSourceFiles) { + xtFile.writeProticDbMlDataProcessing(this.writer); + } + } else { + writer.writeStartElement("dataProcessing"); + writer.writeStartElement("software"); + writer.writeStartElement("name"); + writer.writeAttribute("acc", "PROTICdbO:0000283"); + writer.writeCharacters("X!Tandem"); + writer.writeEndElement();// name + writer.writeStartElement("version"); + writer.writeCharacters(XtandemPipelineSession.getInstance() + .getConfig().getXtandemVersion()); + writer.writeEndElement();// version + writer.writeEndElement();// software + writer.writeStartElement("processingMethod"); + writer.writeEndElement();// processingMethod + writer.writeEndElement();// dataProcessing + } + + // id: PROTICdbO:0000316 + // name: X!TandemPipeline + + writer.writeStartElement("dataProcessing"); + writer.writeStartElement("software"); + writer.writeStartElement("name"); + writer.writeAttribute("acc", "PROTICdbO:0000316"); + writer.writeCharacters("X!TandemPipeline"); + writer.writeEndElement();// name + writer.writeStartElement("version"); + writer.writeCharacters(XtandemPipelineMain.version); + writer.writeEndElement();// version + writer.writeEndElement();// software + writer.writeStartElement("processingMethod"); + // cvParams + + this.writeCvParam("PROTICdbO:0000323", "" + + XtandemPipelineSession.getInstance().getConfig() + .get_protein_evalue(), + "X!TandemPipeline filter on protein evalue (log)"); + + // indi + // combine + // phospho + xtpExperimentType = XtandemPipelineSession.getInstance() + .getDataTypeBase().getType(); + if (xtpExperimentType.equals("indiv")) { + this.writeCvParam("PROTICdbO:0000319", xtpExperimentType, ""); + } + if (xtpExperimentType.equals("combi")) { + this.writeCvParam("PROTICdbO:0000320", xtpExperimentType, ""); } - qDebug() << "ProticdbMl::writeSequence end"; - } - - public void close() throws Exception { - writer.writeEndElement();// sequences - - writer.writeStartElement("project"); - writer.writeAttribute("name", ""); - writer.writeAttribute("id", "p1"); - // ajout des echantillons_msrun - this.writeSamples(); - this.writeMsRuns(); - - writer.writeStartElement("identificationRuns"); - - for (Identification identification : this.arrayIdentifications) { - writer.writeStartElement("identificationRun"); - // ajout d'une mnip d'identification - this.writeIdentificationRun(identification); - - // ajout des petides - this.writepeptideHits(identification); - - // ajout des mathces - this.writeMatchs(identification); - - writer.writeEndElement();// identificationRun - } - writer.writeEndElement();// identificationRuns - writer.writeEndElement();// project - - writer.writeEndDocument(); - - writer.flush(); - writer.close(); - - logger.info("Duration creating PROTICdbML xml document: " - + (System.currentTimeMillis() - duracel) + " ms"); - } - - private void writeIdentMethod() { - logger.debug("write begin"); - try { - // Ajout des méthodes - writer.writeStartElement("identMeth"); - writer.writeAttribute("name", ""); - writer.writeAttribute("id", "m1"); - writer.writeEndElement(); - - // Ajout des base de données - writer.writeStartElement("customDb"); - writer.writeAttribute("name", ""); - writer.writeAttribute("id", "customdb0"); - writer.writeEndElement(); - - } catch (Exception e) { - logger.error(e.getMessage()); - } - logger.debug("write end"); - - } - - private void writeSamples() throws Exception { - try { - writer.writeStartElement("samples"); - - for (Identification ident : arrayIdentifications) { - for (MsRun samp : ident.getMsRunSet()) { - writer.writeStartElement("sample"); - String id_samp = "samp" + sample_to_id.size(); - String id_msrun = "msr" + msrun_to_id.size(); - String name = samp.getSampleName(); - name = name.replaceAll(".xml", ""); - // balise sample - writer.writeAttribute("name", name); - writer.writeAttribute("id", id_samp); - writer.writeEmptyElement("description"); - sample_to_id.put(samp.getSampleName(), id_samp); - - // Element spectrumList = doc.createElement("spectrumList"); - // msRun.appendChild(spectrumList); - - msrun_to_id.put(samp.getSampleName(), id_msrun); - writer.writeEndElement();// sample - } - } - writer.writeEndElement();// samples - } catch (Exception e) { - String message = "error writing samples in ProticDbMl \n" - + e.getMessage(); - logger.error(message); - throw new Exception(message); - } - } - - private void writeMsRuns() throws Exception { - try { - writer.writeStartElement("msRuns"); - - for (Identification ident : arrayIdentifications) { - for (MsRun samp : ident.getMsRunSet()) { - - String name = samp.getSampleName(); - name = name.replaceAll(".xml", ""); - - String id_samp = sample_to_id.get(samp.getSampleName()); - - // Element spectrumList = doc.createElement("spectrumList"); - // msRun.appendChild(spectrumList); - - String id_msrun = msrun_to_id.get(samp.getSampleName()); - - // balise MsRun - writer.writeStartElement("msRun"); - writer.writeAttribute("sample_id", id_samp); - writer.writeAttribute("id", id_msrun); - - writer.writeStartElement("description"); - writer.writeStartElement("admin"); - // writer.writeStartElement("contact"); - // writer.writeStartElement("email"); - // writer.writeCharacters("valot@moulon.inra.fr"); - // writer.writeEndElement();// email - - // writer.writeStartElement("name"); - // writer.writeCharacters("Valot Benoit"); - // writer.writeEndElement();// name - // writer.writeEndElement();// contact - - writer.writeStartElement("sourceFile"); - writer.writeStartElement("nameOfFile"); - writer.writeCharacters(name + ".RAW"); - writer.writeEndElement();// nameOfFile - writer.writeEndElement();// sourceFile - - writer.writeStartElement("sampleName"); - writer.writeCharacters(name); - writer.writeEndElement();// sampleName - writer.writeEndElement();// admin - writer.writeEndElement();// description - - writer.writeEndElement();// msRun - } - } - writer.writeEndElement();// msRuns - } catch (Exception e) { - String message = "error writing MS runs in ProticDbMl \n" - + e.getMessage(); - logger.error(message); - throw new Exception(message); - } - } - - - private void writeIdentificationRun(Identification identification) - throws Exception { - try { - writer.writeAttribute("ident_method_id", "m1"); - writer.writeAttribute("customdb_id", "customdb0"); - - writer.writeStartElement("description"); - writer.writeStartElement("admin"); - // writer.writeStartElement("contact"); - // writer.writeStartElement("email"); - // writer.writeCharacters("valot@moulon.inra.fr"); - // writer.writeEndElement();// email - - // writer.writeStartElement("name"); - // writer.writeCharacters("Valot Benoit"); - // writer.writeEndElement();// name - // writer.writeEndElement();// contact - this.xpipFile = XtandemPipelineSession.getInstance() - .getCurrentXpipFile(); - - writer.writeStartElement("sourceFile"); - writer.writeStartElement("nameOfFile"); - // TODO - // get the loaded xpip file name - writer.writeCharacters(this.xpipFile.getName()); - writer.writeEndElement();// nameOfFile - writer.writeStartElement("pathToFile"); - writer.writeCharacters(this.xpipFile.getAbsolutePath()); - writer.writeEndElement();// pathToFile - writer.writeStartElement("fileType"); - writer.writeCharacters("XPIP file"); - writer.writeEndElement();// fileType - writer.writeEndElement();// sourceFile - - writer.writeEndElement();// admin - - // if we can retrieve original informations in xml xtandem results - HashSet<IdentificationXtandemFile> xtSourceFiles = Utils - .getXtandemFileList(identification); - if (xtSourceFiles != null) { - // - for (IdentificationXtandemFile xtFile : xtSourceFiles) { - xtFile.writeProticDbMlDataProcessing(this.writer); - } - } else { - writer.writeStartElement("dataProcessing"); - writer.writeStartElement("software"); - writer.writeStartElement("name"); - writer.writeAttribute("acc", "PROTICdbO:0000283"); - writer.writeCharacters("X!Tandem"); - writer.writeEndElement();// name - writer.writeStartElement("version"); - writer.writeCharacters(XtandemPipelineSession.getInstance() - .getConfig().getXtandemVersion()); - writer.writeEndElement();// version - writer.writeEndElement();// software - writer.writeStartElement("processingMethod"); - writer.writeEndElement();// processingMethod - writer.writeEndElement();// dataProcessing - } - - // id: PROTICdbO:0000316 - // name: X!TandemPipeline - - writer.writeStartElement("dataProcessing"); - writer.writeStartElement("software"); - writer.writeStartElement("name"); - writer.writeAttribute("acc", "PROTICdbO:0000316"); - writer.writeCharacters("X!TandemPipeline"); - writer.writeEndElement();// name - writer.writeStartElement("version"); - writer.writeCharacters(XtandemPipelineMain.version); - writer.writeEndElement();// version - writer.writeEndElement();// software - writer.writeStartElement("processingMethod"); - // cvParams - - this.writeCvParam("PROTICdbO:0000323", "" - + XtandemPipelineSession.getInstance().getConfig() - .get_protein_evalue(), - "X!TandemPipeline filter on protein evalue (log)"); - - // indi - // combine - // phospho - xtpExperimentType = XtandemPipelineSession.getInstance() - .getDataTypeBase().getType(); - if (xtpExperimentType.equals("indiv")) { - this.writeCvParam("PROTICdbO:0000319", xtpExperimentType, ""); - } - if (xtpExperimentType.equals("combi")) { - this.writeCvParam("PROTICdbO:0000320", xtpExperimentType, ""); - } - if (xtpExperimentType.equals("phospho")) { - this.writeCvParam("PROTICdbO:0000321", xtpExperimentType, ""); - } - - this.writeCvParam("PROTICdbO:0000325", "" - + XtandemPipelineSession.getInstance().getConfig() - .get_peptide_number(), - "X!TandemPipeline filter on minimal number of peptide per protein"); - - this.writeCvParam("PROTICdbO:0000324", "" - + XtandemPipelineSession.getInstance().getConfig() - .get_peptide_evalue(), - "X!TandemPipeline filter on peptide evalue"); - - // TODO write database filter - // this.writeCvParam("PROTICdbO:0000324", "" - // + - // XtandemPipelineSession.getInstance().getConfig().get_database_filter(), - // "X!TandemPipeline filter on peptide evalue"); - - writer.writeEndElement();// processingMethod - - writer.writeEndElement();// dataProcessing - - writer.writeEndElement();// description - } catch (Exception e) { - String message = "error writing identificationRun in ProticDbMl \n" - + e.getMessage(); - logger.error(message); - throw new Exception(message); - } - } - - private void writePeptideHit(Peptide pep, Group group, - PeptideMass peptideMass) throws XMLStreamException, MSMSException { - - if (peptideMass == null) { - String message = "peptideMass == null"; - logger.error(message); - throw new MSMSException(message); - } - - String pep_hit_id = "pep" + pepSampleScanCle2xmlId.size(); - // param par default - writer.writeStartElement("peptideHit"); - writer.writeAttribute("calc_mr", "" + (pep.get_mhplus_theo() - 1.00794)); - double exp_mz = (pep.get_mhplus_obser() + (1.00794 * (pep.getCharge() - 1))) - / pep.getCharge(); - writer.writeAttribute("exp_mz", "" + exp_mz); - writer.writeAttribute("delta", "" + pep.get_deltamass()); - writer.writeAttribute("exp_mr", "" + (pep.get_mhplus_obser() - 1.00794)); - writer.writeAttribute("acq_number", "" + pep.get_scan()); - writer.writeAttribute("ms_run_id", - "" + msrun_to_id.get(pep.get_sample())); - writer.writeAttribute("id", pep_hit_id); - writer.writeAttribute("exp_z", "" + pep.getCharge()); - - this.writeCvParam("PROTICdbO:0000339", - Utils.getPappsoPeptideMassId(group, peptideMass), - "peptide mass id"); - - if (xtpExperimentType.equals("phospho")) { - this.writeCvParam("PROTICdbO:0000349", - Utils.getPappsoPhosphoPeptideMassId(group, peptideMass), - "phosphopeptide mass id"); - } - - // pep.getMsRun(). - IdentificationDataSource idDataSource = pep - .getIdentificationDataSource(); - if (idDataSource.getClass().equals(IdentificationXtandemFile.class)) { - // cvparam specifique xtandem - - this.writeCvParam("PROTICdbO:0000287", "" + pep.get_evalue(), - "xtandem peptide evalue"); - - this.writeCvParam("PROTICdbO:0000288", "" + pep.get_hypercorr(), - "xtandem peptide hyperscore"); - } - - this.writeCvParam("PROTICdbO:0000289", - "" + pep.getRetentionTimeInMinutes(), - "xtandem peptide retention time"); - /* - * writer.writeStartElement("cvParam"); writer.writeAttribute("value", - * "" + pep.get_RT()); writer.writeAttribute("name", - * "xtandem peptide retention time"); writer.writeAttribute("cvLabel", - * "PROTICdbO"); writer.writeAttribute("accession", - * "PROTICdbO:0000289"); writer.writeEndElement();// cvParam - */ - - // Element cvParam4 = doc.createElement("cvParam"); - // cvParam4.setAttribute("value", "" + pep.get_post()); - // cvParam4.setAttribute("name", "xtandem peptide post"); - // cvParam4.setAttribute("cvLabel", "PROTICdbO"); - // cvParam4.setAttribute("accession", "PROTICdbO:00000XX"); - // peptideHit.appendChild(cvParam4); - // - // Element cvParam5 = doc.createElement("cvParam"); - // cvParam5.setAttribute("value", "" + pep.get_pre()); - // cvParam5.setAttribute("name", "xtandem peptide pre"); - // cvParam5.setAttribute("cvLabel", "PROTICdbO"); - // cvParam5.setAttribute("accession", "PROTICdbO:00000XX"); - // peptideHit.appendChild(cvParam5); - - // sequences avec les modifs - writer.writeStartElement("pepSeq"); - writer.writeStartElement("peptide"); - writer.writeCharacters("" + pep.getSequence()); - writer.writeEndElement();// peptide - if (pep.get_Modifs().size() != 0) { - this.writePtms(pep.get_Modifs()); - } - writer.writeEndElement();// pepSeq - - pepSampleScanCle2xmlId.put(pep.get_sample_scan_sequencepeptide_cle(), - pep_hit_id); - writer.writeEndElement();// peptideHit - } - - private void writePtms(ArrayList<Modifs> mods) throws XMLStreamException { - writer.writeStartElement("ptms"); - for (Modifs mod : mods) { - writer.writeStartElement("ptm"); - writer.writeAttribute("diff_mono", "" + mod.get_modvalue()); - writer.writeAttribute("position", "" + mod.get_posi()); - writer.writeAttribute("aa", "" + mod.get_AA()); - try { - this.writeOboModif(mod.getPsiMod()); - } catch (MSMSException e) { - String message = "error writing OBO modif : " + e.getMessage(); - logger.warn(message); - } - writer.writeEndElement();// ptm - - } - writer.writeEndElement();// ptms - } - - void ProticdbMl::writeOboModif(pappso::AaModificationP mod) { - _output_stream->writeStartElement("cvParam"); - _output_stream->writeAttribute("name", mod->getName()); - _output_stream->writeAttribute("cvLabel", "MOD"); - _output_stream->writeAttribute("accession", mod->getAccession()); - _output_stream->writeEndElement();// cvParam - } - - private void writepeptideHits(Identification identification) - throws MSMSException { - try { - writer.writeStartElement("peptideHits"); - for (Group group : identification.getGrouping().getGroupList()) { - // generating peptide numbers - PeptideMassSet pepMassSet = new PeptideMassSet(); - - Hashtable<String, PeptideMass> pepSampleScanCle2peptideMass; - if (this.xtpExperimentType.equals("phospho")) { - pepSampleScanCle2peptideMass = pepMassSet - .numberingPhosphoGetSampleScanCle2peptideMass(group); - - } else { - pepSampleScanCle2peptideMass = pepMassSet - .numberingGetSampleScanCle2peptideMass(group); - } - - for (SubGroup sg : group.getSubGroupSet().getSubGroupList()) { - for (HashSampleScanSetProt hashProt : sg - .getHashSampleScanSetProtList()) { - - ArrayList<Peptide> peptideList = null; - if (this.xtpExperimentType.equals("phospho")) { - peptideList = ((Match) hashProt.getGrpProtMatch()) - .getValidPeptideInPhosphoIsland((HashSampleScanSetPhospho) hashProt); - } else { - peptideList = ((Match) hashProt.getGrpProtMatch()) - .get_valid_peptides(); - } - - for (Peptide pep : peptideList) { - - if (pepSampleScanCle2xmlId.containsKey(pep - .get_sample_scan_sequencepeptide_cle()) == false) { - - writePeptideHit( - pep, - group, - pepSampleScanCle2peptideMass.get(pep - .get_sample_scan_sequencepeptide_cle())); - - } - } - } - } - } - writer.writeEndElement();// "peptideHits"); - } catch (Exception e) { - String message = "error writing peptideHits in ProticDbMl \n" - + e.getMessage(); - logger.error(message); - throw new MSMSException(message); - } - } - - private void writeMatchs(Identification identification) - throws MSMSException { - try { - writer.writeStartElement("matchs"); - for (Group group : identification.getGrouping().getGroupList()) { - for (SubGroup sg : group.getSubGroupSet().getSubGroupList()) { - writer.writeStartElement("match"); - - // echantillons - Hashtable<String, String> view = new Hashtable<String, String>(); - for (HashSampleScanSetProt hashProt : sg - .getHashSampleScanSetProtList()) { - Match match = (Match) hashProt.getGrpProtMatch(); - for (MsRun sample : match.get_sample_matchs()) { - if (!view.containsKey(sample.getSampleName())) { - writer.writeStartElement("matchSample"); - writer.writeAttribute( - "sample_id", - "" - + sample_to_id.get(sample - .getSampleName())); - String name = sample.getSampleName(); - name = name.replaceAll(".xml", ""); - writer.writeAttribute("name", "" + name); - view.put(sample.getSampleName(), "ok"); - writer.writeEndElement();// "matchSample"); - } - } - } - - // proteins - int k = 1; - for (HashSampleScanSetProt hashProt : sg - .getHashSampleScanSetProtList()) { - writer.writeStartElement("proteinHit"); - Protein prot = (Protein) hashProt.getGrpProtMatch() - .getGrpProtein(); - writer.writeAttribute("sequence_id", - "" + prot_to_id.get(prot.get_description())); - // proteinHit.setAttribute("score", ""); - writer.writeAttribute("rank", "" + k); - - // //cvparam - this.writeCvParam("PROTICdbO:0000284", - "" + prot.get_mw(), - "MW determination by Benuch"); - /* - * writer.writeStartElement("cvParam"); - * writer.writeAttribute("value", "" + prot.get_mw()); - * writer.writeAttribute("name", - * "MW determination by Benuch"); - * writer.writeAttribute("cvLabel", "PROTICdbO"); - * writer.writeAttribute("accession", - * "PROTICdbO:0000284"); writer.writeEndElement();// - * cvParam - */ - // evalue - MsRunSet samples = ((Match) hashProt.getGrpProtMatch()) - .get_sample_matchs(); - this.writeCvParam("PROTICdbO:0000291", - "" + prot.get_evalue(), "Xtandem log evalue"); - /* - * writer.writeStartElement("cvParam"); - * writer.writeAttribute("value", "" + - * prot.get_evalue()); - * - * writer.writeAttribute("name", "Xtandem log evalue"); - * writer.writeAttribute("cvLabel", "PROTICdbO"); - * writer.writeAttribute("accession", - * "PROTICdbO:0000291"); writer.writeEndElement();// - * cvParam - */ - - // coverage - if (samples.size() == 1) { - this.writeCvParam( - "PROTICdbO:0000285", - "" - + ((Match) hashProt - .getGrpProtMatch()) - .get_coverage(), - "protein coverage"); - } - - // [Term] - // id: PROTICdbO:0000335 - // name: X!TandemPipeline PAI - this.writeCvParam("PROTICdbO:0000335", - "" + hashProt.getPAI(), "PAI"); - - // [Term] - // id: PROTICdbO:0000337 - // name: protein group number - String pappsoGroupingNumber = Utils - .getPappsoGroupingNumber(group, sg, k); - if (pappsoGroupingNumber != null) { - this.writeCvParam("PROTICdbO:0000337", - pappsoGroupingNumber, "grouping number"); - } - - writer.writeEndElement();// "proteinHit"); - k++; - } - for (HashSampleScanSetProt hashProt : sg - .getHashSampleScanSetProtList()) { - Protein prot2 = (Protein) hashProt.getGrpProtMatch() - .getGrpProtein(); - // peptidesHitRef - - ArrayList<Peptide> peptideList = null; - if (this.xtpExperimentType.equals("phospho")) { - peptideList = ((Match) hashProt.getGrpProtMatch()) - .getValidPeptideInPhosphoIsland((HashSampleScanSetPhospho) hashProt); - } else { - peptideList = ((Match) hashProt.getGrpProtMatch()) - .get_valid_peptides(); - } - - for (Peptide pep : peptideList) { - // for (Peptide pep : ((Match) - // hashProt.getGrpProtMatch()) - // .get_peptide_order_inclus_redon()) { - String xmlId = pepSampleScanCle2xmlId.get(pep - .get_sample_scan_sequencepeptide_cle()); - if (xmlId == null) { - String message = "the xml of peptide " - + pep.get_sample_scan_sequencepeptide_cle() - + " is not known in pepSampleScanCle2xmlId"; - logger.debug(message); - throw new Exception(message); - } - writer.writeStartElement("peptideHitRef"); - writer.writeAttribute("peptide_hit_id", "" + xmlId); - - writer.writeStartElement("fromSeq"); - writer.writeAttribute( - "seq_id", - "" - + prot_to_id.get(prot2 - .get_description())); - writer.writeAttribute("start", "" + pep.get_start()); - writer.writeAttribute("stop", "" + pep.get_stop()); - String res1 = pep.get_pre(); - res1 = res1.replaceAll("]", "-"); - writer.writeAttribute("residue_before_nter", "" - + res1.charAt((res1.length() - 1))); - String res2 = pep.get_post(); - res2 = res2.replaceAll("]", "-"); - writer.writeAttribute("residue_after_cter", "" - + res2.charAt(0)); - - writer.writeEndElement();// fromSeq - writer.writeEndElement();// peptideHitRef - } - - } - writer.writeEndElement();// "match"); - } - } - writer.writeEndElement();// "matchs"); - } catch (Exception e) { - String message = "error writing Matchs in ProticDbMl \n" - + e.getMessage(); - logger.error(message); - throw new MSMSException(message); - } - } - - void ProticdbMl::writeCvParam(QString acc, QString value, QString description) { - _output_stream->writeStartElement("cvParam"); - - _output_stream->writeAttribute("value",value); - if (description.isEmpty()) { - _output_stream->writeAttribute("name", "N.A."); - } else { - _output_stream->writeAttribute("name", description); - } - _output_stream->writeAttribute("cvLabel", "PROTICdbO"); - _output_stream->writeAttribute("accession", acc); - _output_stream->writeEndElement();// cvParam - - } + if (xtpExperimentType.equals("phospho")) { + this.writeCvParam("PROTICdbO:0000321", xtpExperimentType, ""); + } + + this.writeCvParam("PROTICdbO:0000325", "" + + XtandemPipelineSession.getInstance().getConfig() + .get_peptide_number(), + "X!TandemPipeline filter on minimal number of peptide per protein"); + + this.writeCvParam("PROTICdbO:0000324", "" + + XtandemPipelineSession.getInstance().getConfig() + .get_peptide_evalue(), + "X!TandemPipeline filter on peptide evalue"); + + // TODO write database filter + // this.writeCvParam("PROTICdbO:0000324", "" + // + + // XtandemPipelineSession.getInstance().getConfig().get_database_filter(), + // "X!TandemPipeline filter on peptide evalue"); + + writer.writeEndElement();// processingMethod + + writer.writeEndElement();// dataProcessing + + writer.writeEndElement();// description + } catch (Exception e) { + String message = "error writing identificationRun in ProticDbMl \n" + + e.getMessage(); + logger.error(message); + throw new Exception(message); + } +} + +private void writePeptideHit(Peptide pep, Group group, + PeptideMass peptideMass) throws XMLStreamException, MSMSException { + + if (peptideMass == null) { + String message = "peptideMass == null"; + logger.error(message); + throw new MSMSException(message); + } + + String pep_hit_id = "pep" + pepSampleScanCle2xmlId.size(); + // param par default + writer.writeStartElement("peptideHit"); + writer.writeAttribute("calc_mr", "" + (pep.get_mhplus_theo() - 1.00794)); + double exp_mz = (pep.get_mhplus_obser() + (1.00794 * (pep.getCharge() - 1))) + / pep.getCharge(); + writer.writeAttribute("exp_mz", "" + exp_mz); + writer.writeAttribute("delta", "" + pep.get_deltamass()); + writer.writeAttribute("exp_mr", "" + (pep.get_mhplus_obser() - 1.00794)); + writer.writeAttribute("acq_number", "" + pep.get_scan()); + writer.writeAttribute("ms_run_id", + "" + msrun_to_id.get(pep.get_sample())); + writer.writeAttribute("id", pep_hit_id); + writer.writeAttribute("exp_z", "" + pep.getCharge()); + + this.writeCvParam("PROTICdbO:0000339", + Utils.getPappsoPeptideMassId(group, peptideMass), + "peptide mass id"); + + if (xtpExperimentType.equals("phospho")) { + this.writeCvParam("PROTICdbO:0000349", + Utils.getPappsoPhosphoPeptideMassId(group, peptideMass), + "phosphopeptide mass id"); + } + + // pep.getMsRun(). + IdentificationDataSource idDataSource = pep + .getIdentificationDataSource(); + if (idDataSource.getClass().equals(IdentificationXtandemFile.class)) { + // cvparam specifique xtandem + + this.writeCvParam("PROTICdbO:0000287", "" + pep.get_evalue(), + "xtandem peptide evalue"); + + this.writeCvParam("PROTICdbO:0000288", "" + pep.get_hypercorr(), + "xtandem peptide hyperscore"); + } + + this.writeCvParam("PROTICdbO:0000289", + "" + pep.getRetentionTimeInMinutes(), + "xtandem peptide retention time"); + /* + * writer.writeStartElement("cvParam"); writer.writeAttribute("value", + * "" + pep.get_RT()); writer.writeAttribute("name", + * "xtandem peptide retention time"); writer.writeAttribute("cvLabel", + * "PROTICdbO"); writer.writeAttribute("accession", + * "PROTICdbO:0000289"); writer.writeEndElement();// cvParam + */ + + // Element cvParam4 = doc.createElement("cvParam"); + // cvParam4.setAttribute("value", "" + pep.get_post()); + // cvParam4.setAttribute("name", "xtandem peptide post"); + // cvParam4.setAttribute("cvLabel", "PROTICdbO"); + // cvParam4.setAttribute("accession", "PROTICdbO:00000XX"); + // peptideHit.appendChild(cvParam4); + // + // Element cvParam5 = doc.createElement("cvParam"); + // cvParam5.setAttribute("value", "" + pep.get_pre()); + // cvParam5.setAttribute("name", "xtandem peptide pre"); + // cvParam5.setAttribute("cvLabel", "PROTICdbO"); + // cvParam5.setAttribute("accession", "PROTICdbO:00000XX"); + // peptideHit.appendChild(cvParam5); + + // sequences avec les modifs + writer.writeStartElement("pepSeq"); + writer.writeStartElement("peptide"); + writer.writeCharacters("" + pep.getSequence()); + writer.writeEndElement();// peptide + if (pep.get_Modifs().size() != 0) { + this.writePtms(pep.get_Modifs()); + } + writer.writeEndElement();// pepSeq + + pepSampleScanCle2xmlId.put(pep.get_sample_scan_sequencepeptide_cle(), + pep_hit_id); + writer.writeEndElement();// peptideHit +} + +private void writePtms(ArrayList<Modifs> mods) throws XMLStreamException { + writer.writeStartElement("ptms"); + for (Modifs mod : mods) { + writer.writeStartElement("ptm"); + writer.writeAttribute("diff_mono", "" + mod.get_modvalue()); + writer.writeAttribute("position", "" + mod.get_posi()); + writer.writeAttribute("aa", "" + mod.get_AA()); + try { + this.writeOboModif(mod.getPsiMod()); + } catch (MSMSException e) { + String message = "error writing OBO modif : " + e.getMessage(); + logger.warn(message); + } + writer.writeEndElement();// ptm + + } + writer.writeEndElement();// ptms +} + +void ProticdbMl::writeOboModif(pappso::AaModificationP mod) { + _output_stream->writeStartElement("cvParam"); + _output_stream->writeAttribute("name", mod->getName()); + _output_stream->writeAttribute("cvLabel", "MOD"); + _output_stream->writeAttribute("accession", mod->getAccession()); + _output_stream->writeEndElement();// cvParam +} + +private void writepeptideHits(Identification identification) +throws MSMSException { + try { + writer.writeStartElement("peptideHits"); + for (Group group : identification.getGrouping().getGroupList()) { + // generating peptide numbers + PeptideMassSet pepMassSet = new PeptideMassSet(); + + Hashtable<String, PeptideMass> pepSampleScanCle2peptideMass; + if (this.xtpExperimentType.equals("phospho")) { + pepSampleScanCle2peptideMass = pepMassSet + .numberingPhosphoGetSampleScanCle2peptideMass(group); + + } else { + pepSampleScanCle2peptideMass = pepMassSet + .numberingGetSampleScanCle2peptideMass(group); + } + + for (SubGroup sg : group.getSubGroupSet().getSubGroupList()) { + for (HashSampleScanSetProt hashProt : sg + .getHashSampleScanSetProtList()) { + + ArrayList<Peptide> peptideList = null; + if (this.xtpExperimentType.equals("phospho")) { + peptideList = ((Match) hashProt.getGrpProtMatch()) + .getValidPeptideInPhosphoIsland((HashSampleScanSetPhospho) hashProt); + } else { + peptideList = ((Match) hashProt.getGrpProtMatch()) + .get_valid_peptides(); + } + + for (Peptide pep : peptideList) { + + if (pepSampleScanCle2xmlId.containsKey(pep + .get_sample_scan_sequencepeptide_cle()) == false) { + + writePeptideHit( + pep, + group, + pepSampleScanCle2peptideMass.get(pep + .get_sample_scan_sequencepeptide_cle())); + + } + } + } + } + } + writer.writeEndElement();// "peptideHits"); + } catch (Exception e) { + String message = "error writing peptideHits in ProticDbMl \n" + + e.getMessage(); + logger.error(message); + throw new MSMSException(message); + } +} + +private void writeMatchs(Identification identification) +throws MSMSException { + try { + writer.writeStartElement("matchs"); + for (Group group : identification.getGrouping().getGroupList()) { + for (SubGroup sg : group.getSubGroupSet().getSubGroupList()) { + writer.writeStartElement("match"); + + // echantillons + Hashtable<String, String> view = new Hashtable<String, String>(); + for (HashSampleScanSetProt hashProt : sg + .getHashSampleScanSetProtList()) { + Match match = (Match) hashProt.getGrpProtMatch(); + for (MsRun sample : match.get_sample_matchs()) { + if (!view.containsKey(sample.getSampleName())) { + writer.writeStartElement("matchSample"); + writer.writeAttribute( + "sample_id", + "" + + sample_to_id.get(sample + .getSampleName())); + String name = sample.getSampleName(); + name = name.replaceAll(".xml", ""); + writer.writeAttribute("name", "" + name); + view.put(sample.getSampleName(), "ok"); + writer.writeEndElement();// "matchSample"); + } + } + } + + // proteins + int k = 1; + for (HashSampleScanSetProt hashProt : sg + .getHashSampleScanSetProtList()) { + writer.writeStartElement("proteinHit"); + Protein prot = (Protein) hashProt.getGrpProtMatch() + .getGrpProtein(); + writer.writeAttribute("sequence_id", + "" + prot_to_id.get(prot.get_description())); + // proteinHit.setAttribute("score", ""); + writer.writeAttribute("rank", "" + k); + + // //cvparam + this.writeCvParam("PROTICdbO:0000284", + "" + prot.get_mw(), + "MW determination by Benuch"); + /* + * writer.writeStartElement("cvParam"); + * writer.writeAttribute("value", "" + prot.get_mw()); + * writer.writeAttribute("name", + * "MW determination by Benuch"); + * writer.writeAttribute("cvLabel", "PROTICdbO"); + * writer.writeAttribute("accession", + * "PROTICdbO:0000284"); writer.writeEndElement();// + * cvParam + */ + // evalue + MsRunSet samples = ((Match) hashProt.getGrpProtMatch()) + .get_sample_matchs(); + this.writeCvParam("PROTICdbO:0000291", + "" + prot.get_evalue(), "Xtandem log evalue"); + /* + * writer.writeStartElement("cvParam"); + * writer.writeAttribute("value", "" + + * prot.get_evalue()); + * + * writer.writeAttribute("name", "Xtandem log evalue"); + * writer.writeAttribute("cvLabel", "PROTICdbO"); + * writer.writeAttribute("accession", + * "PROTICdbO:0000291"); writer.writeEndElement();// + * cvParam + */ + + // coverage + if (samples.size() == 1) { + this.writeCvParam( + "PROTICdbO:0000285", + "" + + ((Match) hashProt + .getGrpProtMatch()) + .get_coverage(), + "protein coverage"); + } + + // [Term] + // id: PROTICdbO:0000335 + // name: X!TandemPipeline PAI + this.writeCvParam("PROTICdbO:0000335", + "" + hashProt.getPAI(), "PAI"); + + // [Term] + // id: PROTICdbO:0000337 + // name: protein group number + String pappsoGroupingNumber = Utils + .getPappsoGroupingNumber(group, sg, k); + if (pappsoGroupingNumber != null) { + this.writeCvParam("PROTICdbO:0000337", + pappsoGroupingNumber, "grouping number"); + } + + writer.writeEndElement();// "proteinHit"); + k++; + } + for (HashSampleScanSetProt hashProt : sg + .getHashSampleScanSetProtList()) { + Protein prot2 = (Protein) hashProt.getGrpProtMatch() + .getGrpProtein(); + // peptidesHitRef + + ArrayList<Peptide> peptideList = null; + if (this.xtpExperimentType.equals("phospho")) { + peptideList = ((Match) hashProt.getGrpProtMatch()) + .getValidPeptideInPhosphoIsland((HashSampleScanSetPhospho) hashProt); + } else { + peptideList = ((Match) hashProt.getGrpProtMatch()) + .get_valid_peptides(); + } + + for (Peptide pep : peptideList) { + // for (Peptide pep : ((Match) + // hashProt.getGrpProtMatch()) + // .get_peptide_order_inclus_redon()) { + String xmlId = pepSampleScanCle2xmlId.get(pep + .get_sample_scan_sequencepeptide_cle()); + if (xmlId == null) { + String message = "the xml of peptide " + + pep.get_sample_scan_sequencepeptide_cle() + + " is not known in pepSampleScanCle2xmlId"; + logger.debug(message); + throw new Exception(message); + } + writer.writeStartElement("peptideHitRef"); + writer.writeAttribute("peptide_hit_id", "" + xmlId); + + writer.writeStartElement("fromSeq"); + writer.writeAttribute( + "seq_id", + "" + + prot_to_id.get(prot2 + .get_description())); + writer.writeAttribute("start", "" + pep.get_start()); + writer.writeAttribute("stop", "" + pep.get_stop()); + String res1 = pep.get_pre(); + res1 = res1.replaceAll("]", "-"); + writer.writeAttribute("residue_before_nter", "" + + res1.charAt((res1.length() - 1))); + String res2 = pep.get_post(); + res2 = res2.replaceAll("]", "-"); + writer.writeAttribute("residue_after_cter", "" + + res2.charAt(0)); + + writer.writeEndElement();// fromSeq + writer.writeEndElement();// peptideHitRef + } + + } + writer.writeEndElement();// "match"); + } + } + writer.writeEndElement();// "matchs"); + } catch (Exception e) { + String message = "error writing Matchs in ProticDbMl \n" + + e.getMessage(); + logger.error(message); + throw new MSMSException(message); + } +} + +void ProticdbMl::writeCvParam(QString acc, QString value, QString description) { + _output_stream->writeStartElement("cvParam"); + + _output_stream->writeAttribute("value",value); + if (description.isEmpty()) { + _output_stream->writeAttribute("name", "N.A."); + } else { + _output_stream->writeAttribute("name", description); + } + _output_stream->writeAttribute("cvLabel", "PROTICdbO"); + _output_stream->writeAttribute("accession", acc); + _output_stream->writeEndElement();// cvParam + +} diff --git a/src/output/proticdbml.h b/src/output/proticdbml.h index e90b252f2..8b226fd88 100644 --- a/src/output/proticdbml.h +++ b/src/output/proticdbml.h @@ -51,6 +51,8 @@ private : void writeOboModif(pappso::AaModificationP mod); void writeCvParam(QString acc, QString value, QString description); void writeSequence(ProteinMatch * p_protein_match); + void writeProject(); + void writeIdentMethod(); private : QFile * _output_file; diff --git a/src/utils/proteinstore.cpp b/src/utils/proteinstore.cpp index 763693cc9..6cf29b59a 100644 --- a/src/utils/proteinstore.cpp +++ b/src/utils/proteinstore.cpp @@ -88,6 +88,7 @@ void ProteinStore::setProteinInformations(ProteinXtpSp & peptide_in) { peptide_in.get()->setIsContaminant(false); peptide_in.get()->setIsDecoy(false); QString accession = peptide_in.get()->getAccession(); + peptide_in.get()->parseAccession2dbxref(); if ((!_regexp_contaminant.isEmpty()) && (_regexp_contaminant.indexIn(accession, 0)>-1)) { //qDebug() << "ProteinStore::setProteinInformations is contaminant " << accession; diff --git a/src/utils/types.h b/src/utils/types.h index 432f5e157..9bf617565 100644 --- a/src/utils/types.h +++ b/src/utils/types.h @@ -28,6 +28,17 @@ #include <cstdint> /*********** enumerations *********************************/ +/** \def ExternalDatabase external database references + * + */ +enum class ExternalDatabase { + AGI_LocusCode, ///< AGI_LocusCode + NCBI_gi, ///< NCBI_gi + SwissProt, ///< Swiss-Prot + TrEMBL, ///< TrEMBL + ref ///< ref +}; + /** \def IdentificationEngine identification engine * */ diff --git a/src/utils/utils.cpp b/src/utils/utils.cpp index b70d4398f..9f4abcb4d 100644 --- a/src/utils/utils.cpp +++ b/src/utils/utils.cpp @@ -1,8 +1,32 @@ #include "utils.h" const QUrl Utils::getOlsUrl(QString psimod_accession) { - + QString iri(QString("http://purl.obolibrary.org/obo/%1").arg(psimod_accession.replace(":","_"))); QUrl url(QString("http://www.ebi.ac.uk/ols/ontologies/mod/terms?iri=%1").arg(iri)); return url; } + + +const QString Utils::getDatabaseName(ExternalDatabase database) { + QString database_name; + switch (database) { + case ExternalDatabase::AGI_LocusCode : + database_name = "AGI_LocusCode"; + break; + case ExternalDatabase::NCBI_gi : + database_name = "NCBI_gi"; + break; + + case ExternalDatabase::SwissProt : + database_name = "Swiss-Prot"; + break; + case ExternalDatabase::TrEMBL : + database_name = "TrEMBL"; + break; + case ExternalDatabase::ref : + database_name = "ref"; + break; + } + return database_name; +} diff --git a/src/utils/utils.h b/src/utils/utils.h index 3480d89df..47e451c01 100644 --- a/src/utils/utils.h +++ b/src/utils/utils.h @@ -22,6 +22,7 @@ ******************************************************************************/ #include<QUrl> #include <QString> +#include "types.h" #ifndef UTILS_H #define UTILS_H @@ -29,6 +30,7 @@ class Utils { public: static const QUrl getOlsUrl(QString modification); + static const QString getDatabaseName(ExternalDatabase database); }; #endif // UTILS_H -- GitLab