From 8ac534fc067752fde862464f7c0c25224e4321c1 Mon Sep 17 00:00:00 2001 From: Olivier Langella <Olivier.Langella@moulon.inra.fr> Date: Wed, 19 Apr 2017 22:29:07 +0200 Subject: [PATCH] new fasta file object and identification engine parameters --- src/CMakeLists.txt | 1 + .../identificationdatasource.cpp | 7 +++ .../identificationdatasource.h | 11 +++++ src/files/fastafile.cpp | 45 +++++++++++++++++++ src/files/fastafile.h | 43 ++++++++++++++++++ src/input/xtandemsaxhandler.cpp | 43 +++++++++++------- src/utils/types.h | 7 +++ 7 files changed, 140 insertions(+), 17 deletions(-) create mode 100644 src/files/fastafile.cpp create mode 100644 src/files/fastafile.h diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index d9c1965b4..cd161d715 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -61,6 +61,7 @@ SET(CPP_FILES core/proteinmatch.cpp core/proteinxtp.cpp core/sequencedatabase.cpp + files/fastafile.cpp files/xpipfile.cpp grouping/groupingexperiment.cpp grouping/groupinggroup.cpp diff --git a/src/core/identification_sources/identificationdatasource.cpp b/src/core/identification_sources/identificationdatasource.cpp index f3cc31f80..dd06ce03a 100644 --- a/src/core/identification_sources/identificationdatasource.cpp +++ b/src/core/identification_sources/identificationdatasource.cpp @@ -72,8 +72,15 @@ const QString& IdentificationDataSource::getIdentificationEngineVersion() const void IdentificationDataSource::setIdentificationEngineVersion(const QString& version) { _version = version; } +void IdentificationDataSource::setIdentificationEngineParam(IdentificationEngineParam param, const QString& value) { + _params.insert(std::pair<IdentificationEngineParam, QString>(param, value)); +} pappso::SpectrumSp IdentificationDataSource::getSpectrumSp(unsigned int scan_number) const { pappso::SpectrumSp spectrum_sp = SpectrumStore::getSpectrumSpFromMsRunSp(_ms_run_sp, scan_number); return spectrum_sp; } + +void IdentificationDataSource::addFastaFile (FastaFile file) { + _fastafile_list.push_back(file); +} diff --git a/src/core/identification_sources/identificationdatasource.h b/src/core/identification_sources/identificationdatasource.h index 02d1032a5..3d4b51f2b 100644 --- a/src/core/identification_sources/identificationdatasource.h +++ b/src/core/identification_sources/identificationdatasource.h @@ -27,6 +27,7 @@ #include <pappsomspp/spectrum/spectrum.h> #include <memory> #include "../msrun.h" +#include "../../files/fastafile.h" class Project; @@ -65,6 +66,14 @@ public: /** \brief set identification engine version */ virtual void setIdentificationEngineVersion(const QString& version); + + /** \brief set identification engine parameter value + */ + virtual void setIdentificationEngineParam(IdentificationEngineParam param, const QString& value); + + /** \brief add Fastafile used by the identification engine + */ + void addFastaFile (FastaFile file); protected : QString _resource_name; @@ -73,6 +82,8 @@ private : //static std::map<QString, pappso::MsRunIdSp> _map_msrunidsp; QString _version; MsRunSp _ms_run_sp = nullptr; + std::map<IdentificationEngineParam, QString> _params; + std::vector<FastaFile> _fastafile_list; }; #endif // IDENTIFICATIONDATASOURCE_H diff --git a/src/files/fastafile.cpp b/src/files/fastafile.cpp new file mode 100644 index 000000000..26d4c8358 --- /dev/null +++ b/src/files/fastafile.cpp @@ -0,0 +1,45 @@ + +/******************************************************************************* +* Copyright (c) 2017 Olivier Langella <olivier.langella@u-psud.fr>. +* +* This file is part of XTPcpp. +* +* XTPcpp is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. +* +* XTPcpp is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. +* +* You should have received a copy of the GNU General Public License +* along with XTPcpp. If not, see <http://www.gnu.org/licenses/>. +* +* Contributors: +* Olivier Langella <olivier.langella@u-psud.fr> - initial API and implementation +******************************************************************************/ + +#include "fastafile.h" + +FastaFile::FastaFile(const QString & fasta_source) : _fasta_source(fasta_source) +{ + +} + +FastaFile::FastaFile(const QUrl & fasta_source) : _fasta_source(fasta_source) +{ + +} +FastaFile::FastaFile(const QFileInfo & fasta_source): _fasta_source(fasta_source.absoluteFilePath()) +{ +} +FastaFile::FastaFile(const FastaFile & other) : _fasta_source(other._fasta_source) +{ + +} +FastaFile::~FastaFile() +{ + +} diff --git a/src/files/fastafile.h b/src/files/fastafile.h new file mode 100644 index 000000000..3e3c90a39 --- /dev/null +++ b/src/files/fastafile.h @@ -0,0 +1,43 @@ + +/******************************************************************************* +* Copyright (c) 2017 Olivier Langella <olivier.langella@u-psud.fr>. +* +* This file is part of XTPcpp. +* +* XTPcpp is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. +* +* XTPcpp is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. +* +* You should have received a copy of the GNU General Public License +* along with XTPcpp. If not, see <http://www.gnu.org/licenses/>. +* +* Contributors: +* Olivier Langella <olivier.langella@u-psud.fr> - initial API and implementation +******************************************************************************/ + +#ifndef FASTAFILE_H +#define FASTAFILE_H + +#include <QUrl> +#include <QFileInfo> +class FastaFile +{ +public: + FastaFile(const QString & fasta_source); + FastaFile(const QUrl & fasta_source); + FastaFile(const QFileInfo & fasta_source); + FastaFile(const FastaFile & other); + ~FastaFile(); + +private : + const QUrl _fasta_source; + +}; + +#endif // FASTAFILE_H diff --git a/src/input/xtandemsaxhandler.cpp b/src/input/xtandemsaxhandler.cpp index 1ef6a3d36..23b4862b6 100644 --- a/src/input/xtandemsaxhandler.cpp +++ b/src/input/xtandemsaxhandler.cpp @@ -34,6 +34,7 @@ #include <cmath> #include "../utils/peptidestore.h" #include "../utils/proteinstore.h" +#include "../files/fastafile.h" XtandemSaxHandler::XtandemSaxHandler(Project * p_project, IdentificationGroup * p_identification_group, IdentificationDataSource * p_identification_data_source):_p_project(p_project) @@ -334,8 +335,11 @@ bool XtandemSaxHandler::endElement_note() { else { //<group label="input parameters" type="parameters"> + //<note type="input" label="list path, default parameters">/gorgone/pappso/tmp/temp_condor_job8533994640337729751189420695540169/QExactive_analysis_FDR_nosemi.xml</note> + if (_current_note_label == "list path, default parameters") { + _p_identification_data_source->setIdentificationEngineParam(IdentificationEngineParam::tandem_param,_current_text); + } /* - * <note type="input" label="list path, default parameters">/gorgone/pappso/tmp/temp_condor_job8533994640337729751189420695540169/QExactive_analysis_FDR_nosemi.xml</note> <note type="input" label="list path, taxonomy information">/gorgone/pappso/tmp/temp_condor_job8533994640337729751189420695540169/database.xml</note> <note type="input" label="output, histogram column width">30</note> <note type="input" label="output, histograms">yes</note> @@ -441,23 +445,28 @@ bool XtandemSaxHandler::endElement_note() { */ //<group label="performance parameters" type="parameters"> + + //<note label="list path, sequence source #1">/gorgone/pappso/formation/TD/Database/Genome_Z_mays_5a.fasta</note> + //<note label="list path, sequence source #2">/gorgone/pappso/formation/TD/Database/contaminants_standarts.fasta</note> + if (_current_note_label.startsWith("list path, sequence source")) { + _p_identification_data_source->addFastaFile(FastaFile(_current_text)); + } + /* - <note label="list path, sequence source #1">/gorgone/pappso/formation/TD/Database/Genome_Z_mays_5a.fasta</note> - <note label="list path, sequence source #2">/gorgone/pappso/formation/TD/Database/contaminants_standarts.fasta</note> - <note label="list path, sequence source description #1">no description</note> - <note label="list path, sequence source description #2">no description</note> - <note label="modelling, duplicate peptide ids">6019</note> - <note label="modelling, duplicate proteins">19735</note> - <note label="modelling, estimated false positives">18</note> - <note label="modelling, reversed sequence false positives">20</note> - <note label="modelling, spectrum noise suppression ratio">0.00</note> - <note label="modelling, total peptides used">96618641</note> - <note label="modelling, total proteins used">273656</note> - <note label="modelling, total spectra assigned">7464</note> - <note label="modelling, total spectra used">12199</note> - <note label="modelling, total unique assigned">6260</note> - <note label="process, start time">2013:12:20:16:47:19</note> - */ + <note label="list path, sequence source description #1">no description</note> + <note label="list path, sequence source description #2">no description</note> + <note label="modelling, duplicate peptide ids">6019</note> + <note label="modelling, duplicate proteins">19735</note> + <note label="modelling, estimated false positives">18</note> + <note label="modelling, reversed sequence false positives">20</note> + <note label="modelling, spectrum noise suppression ratio">0.00</note> + <note label="modelling, total peptides used">96618641</note> + <note label="modelling, total proteins used">273656</note> + <note label="modelling, total spectra assigned">7464</note> + <note label="modelling, total spectra used">12199</note> + <note label="modelling, total unique assigned">6260</note> + <note label="process, start time">2013:12:20:16:47:19</note> + */ //<note label="process, version">X! Tandem Sledgehammer (2013.09.01.1)</note> if (_current_note_label == "process, version") { QRegExp rx("\\((.*)\\)"); diff --git a/src/utils/types.h b/src/utils/types.h index 6c98714a1..aa354e8cd 100644 --- a/src/utils/types.h +++ b/src/utils/types.h @@ -37,6 +37,13 @@ enum class IdentificationEngine { peptider ///< peptider }; +/** \def IdentificationEngineParam identification engine parameters + * + */ +enum class IdentificationEngineParam { + tandem_param ///< X!Tandem xml parameters file +}; + /** \def GroupingType list of available grouping algoritms * */ -- GitLab