diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index ee6256af1b489084e5e97e5aa98be71980f8e029..acfe1d7f3cfb83d55c4c0af33597371fc450f1d9 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -66,6 +66,7 @@ SET(CPP_FILES core/automaticfilterparameters.cpp core/identificationgroup.cpp core/identification_sources/identificationdatasource.cpp + core/identification_sources/identificationmascotdatfile.cpp core/identification_sources/identificationpwizfile.cpp core/identification_sources/identificationxtandemfile.cpp core/labeling/label.cpp @@ -92,6 +93,8 @@ SET(CPP_FILES grouping/ptm/ptmislandgroup.cpp grouping/ptm/ptmislandsubgroup.cpp grouping/ptm/ptmsamplescan.cpp + input/mascot/mascotdatparser.cpp + input/mascot/mimeparser.cpp input/condorqxmlsaxhandler.cpp input/identificationpwizreader.cpp input/xpipsaxhandler.cpp diff --git a/src/core/identification_sources/identificationmascotdatfile.cpp b/src/core/identification_sources/identificationmascotdatfile.cpp new file mode 100644 index 0000000000000000000000000000000000000000..4df4124220e15490717abfdaaa503ed9d38903f6 --- /dev/null +++ b/src/core/identification_sources/identificationmascotdatfile.cpp @@ -0,0 +1,100 @@ +/** + * \file /core/identification_sources/identificationmascotdatfile.cpp + * \date 17/2/2018 + * \author Olivier Langella + * \brief mascot dat identification file handler + */ + +/******************************************************************************* +* Copyright (c) 2018 Olivier Langella <olivier.langella@u-psud.fr>. +* +* This file is part of XTPcpp. +* +* XTPcpp is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. +* +* XTPcpp is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. +* +* You should have received a copy of the GNU General Public License +* along with XTPcpp. If not, see <http://www.gnu.org/licenses/>. +* +* Contributors: +* Olivier Langella <olivier.langella@u-psud.fr> - initial API and implementation +******************************************************************************/ + +#include "identificationmascotdatfile.h" + +#include <pappsomspp/pappsoexception.h> +#include "../project.h" +#include "../../input/mascot/mascotdatparser.h" + +IdentificationMascotDatFile::IdentificationMascotDatFile(const QFileInfo & mascot_dat_file) : IdentificationDataSource(mascot_dat_file.absoluteFilePath()), _mascot_dat_file(mascot_dat_file) +{ + _engine = IdentificationEngine::mascot; +} + +IdentificationMascotDatFile::IdentificationMascotDatFile(const IdentificationMascotDatFile& other) : IdentificationDataSource(other),_mascot_dat_file (other._mascot_dat_file) +{ + _engine = IdentificationEngine::mascot; +} + +IdentificationMascotDatFile::~IdentificationMascotDatFile() +{ + +} + +bool IdentificationMascotDatFile::operator==(const IdentificationMascotDatFile& other) const +{ + +} + +pappso::SpectrumSp IdentificationMascotDatFile::getSpectrumSp(unsigned int scan_number) const { + pappso::SpectrumSp spectrum_sp = IdentificationDataSource::getSpectrumSp(scan_number); + return spectrum_sp; +} + + +void IdentificationMascotDatFile::parseTo(Project* p_project) { + qDebug() << "IdentificationMascotDatFile::parseTo begin"; + + qDebug() << "Read Mascot dat result file '" << _mascot_dat_file.absoluteFilePath() << "'"; + + MsRunSp msrun_sp = p_project->getMsRunStore().getInstance(QFileInfo(_mascot_dat_file).baseName()); + setMsRunSp(msrun_sp); + std::vector<IdentificationGroup *> identification_list = p_project->getIdentificationGroupList(); + IdentificationGroup * identification_group_p = nullptr; + if (p_project->getProjectMode() == ProjectMode::combined) { + if (identification_list.size() == 0) { + identification_group_p = p_project->newIdentificationGroup(); + } + else { + identification_group_p = identification_list[0]; + } + } + else { + for (IdentificationGroup * identification_p_flist : identification_list) { + if (identification_p_flist->containSample(msrun_sp.get()->getSampleName())) { + identification_group_p = identification_p_flist; + break; + } + } + if (identification_group_p == nullptr) { + identification_group_p = p_project->newIdentificationGroup(); + } + } + + identification_group_p->addIdentificationDataSourceP(this); + MascotDatParser mascot_parser(p_project, identification_group_p, this); + + QFile qfile(_mascot_dat_file.absoluteFilePath()); + mascot_parser.parse(&qfile); + + qfile.close(); + + qDebug() << "IdentificationMascotDatFile::parseTo end"; +} diff --git a/src/core/identification_sources/identificationmascotdatfile.h b/src/core/identification_sources/identificationmascotdatfile.h new file mode 100644 index 0000000000000000000000000000000000000000..94ed456ad128cf8ab6fa9680a1dd66c4436d7f3e --- /dev/null +++ b/src/core/identification_sources/identificationmascotdatfile.h @@ -0,0 +1,51 @@ +/** + * \file /core/identification_sources/identificationmascotdatfile.h + * \date 17/2/2018 + * \author Olivier Langella + * \brief mascot dat identification file handler + */ + +/******************************************************************************* +* Copyright (c) 2018 Olivier Langella <olivier.langella@u-psud.fr>. +* +* This file is part of XTPcpp. +* +* XTPcpp is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. +* +* XTPcpp is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. +* +* You should have received a copy of the GNU General Public License +* along with XTPcpp. If not, see <http://www.gnu.org/licenses/>. +* +* Contributors: +* Olivier Langella <olivier.langella@u-psud.fr> - initial API and implementation +******************************************************************************/ + +#ifndef IDENTIFICATIONMASCOTDATFILE_H +#define IDENTIFICATIONMASCOTDATFILE_H + +#include "identificationdatasource.h" +#include <QFileInfo> + +class IdentificationMascotDatFile: public IdentificationDataSource +{ +public: + IdentificationMascotDatFile(const QFileInfo & mascot_dat_file); + IdentificationMascotDatFile(const IdentificationMascotDatFile& other); + ~IdentificationMascotDatFile(); + bool operator==(const IdentificationMascotDatFile& other) const; + + virtual pappso::SpectrumSp getSpectrumSp(unsigned int scan_number) const override; + virtual void parseTo(Project* p_project) override; + +private: + const QFileInfo _mascot_dat_file; +}; + +#endif // IDENTIFICATIONMASCOTDATFILE_H diff --git a/src/input/mascot/mascotdatparser.cpp b/src/input/mascot/mascotdatparser.cpp new file mode 100644 index 0000000000000000000000000000000000000000..724d913403a8ee86d03729c662a5dcef68c7531c --- /dev/null +++ b/src/input/mascot/mascotdatparser.cpp @@ -0,0 +1,76 @@ +/** + * \file /input/mascot/mascotdatparser.h + * \date 17/2/2018 + * \author Olivier Langella + * \brief MASCOT dat file parser + */ + +/******************************************************************************* +* Copyright (c) 2018 Olivier Langella <olivier.langella@u-psud.fr>. +* +* This file is part of XTPcpp. +* +* XTPcpp is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. +* +* XTPcpp is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. +* +* You should have received a copy of the GNU General Public License +* along with XTPcpp. If not, see <http://www.gnu.org/licenses/>. +* +* Contributors: +* Olivier Langella <olivier.langella@u-psud.fr> - initial API and implementation +******************************************************************************/ + +#include "mascotdatparser.h" +#include "mimeparser.h" +#include <QDebug> + +MascotDatParser::MascotDatParser(Project * p_project, IdentificationGroup * p_identification_group, + IdentificationDataSource * p_identification_data_source) { + _p_project = p_project; + _p_identification_group = p_identification_group; + _p_identification_data_source = p_identification_data_source; + +} +MascotDatParser::~MascotDatParser() { +} +void MascotDatParser::parse(QIODevice * in_stream) { + qDebug() << "MascotDatParser::parse begin"; + MimeParser mime_parser(in_stream); + mime_parser.open(); + + for(bool more=mime_parser.goToFirstFile(); more; more=mime_parser.goToNextFile()) { + qDebug() << "MascotDatParser::parse mimetype=" << mime_parser.getCurrentMimeType() << " filename=" << mime_parser.getCurrentFileName(); + + if (mime_parser.getCurrentFileName() == "proteins") { + while(!mime_parser.getCurrentTextStream().atEnd()) { + parseProteinLine( mime_parser.getCurrentTextStream().readLine()); + } + } + } + + mime_parser.close(); + qDebug() << "MascotDatParser::parse end"; +} +void MascotDatParser::parseProteinLine(const QString & protein_line) { + //02::"tr|A0A0D9SF80|A0A0D9SF80_HUMAN"=55120.88,"General transcription factor II-I repeat domain-containing protein 2A OS=Homo sapiens GN=GTF2IRD2B PE=4 SV=1" + QRegExp regexp_protein("^(.*)::\"(.*)\"=([0-9]+\\.[0-9]+),\"(.*)\"$"); + if (regexp_protein.exactMatch(protein_line)) { + QStringList protein_list = regexp_protein.capturedTexts(); + } + else { + QRegExp regexp_proteinb("^\"(.*)\"=([0-9]+\\.[0-9]+),\"(.*)\"$"); + if (regexp_proteinb.exactMatch(protein_line)) { + QStringList protein_list = regexp_proteinb.capturedTexts(); + } + else { + qDebug() << "MascotDatParser::parseProteinLine error " << protein_line; + } + } +} diff --git a/src/input/mascot/mascotdatparser.h b/src/input/mascot/mascotdatparser.h new file mode 100644 index 0000000000000000000000000000000000000000..880ba7ba0b92b2f2f7a8a706a27c48d4e62fee0d --- /dev/null +++ b/src/input/mascot/mascotdatparser.h @@ -0,0 +1,50 @@ +/** + * \file /input/mascot/mascotdatparser.h + * \date 17/2/2018 + * \author Olivier Langella + * \brief MASCOT dat file parser + */ + +/******************************************************************************* +* Copyright (c) 2018 Olivier Langella <olivier.langella@u-psud.fr>. +* +* This file is part of XTPcpp. +* +* XTPcpp is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. +* +* XTPcpp is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. +* +* You should have received a copy of the GNU General Public License +* along with XTPcpp. If not, see <http://www.gnu.org/licenses/>. +* +* Contributors: +* Olivier Langella <olivier.langella@u-psud.fr> - initial API and implementation +******************************************************************************/ + +#ifndef MASCOTDATPARSER_H +#define MASCOTDATPARSER_H + +#include "../../core/project.h" + +class MascotDatParser +{ +public: + MascotDatParser(Project * p_project, IdentificationGroup * p_identification_group, + IdentificationDataSource * p_identification_data_source); + virtual ~MascotDatParser(); + void parse(QIODevice * in_stream); +private: + void parseProteinLine(const QString & protein_line); +private: + Project * _p_project; + IdentificationGroup * _p_identification_group; + IdentificationDataSource * _p_identification_data_source; +}; + +#endif // MASCOTDATPARSER_H diff --git a/src/input/mascot/mimeparser.cpp b/src/input/mascot/mimeparser.cpp index 634959866e41d4115c4c0348fa806f07684f5e46..d922f347112dba565830a58d04fc8524d5ce964e 100644 --- a/src/input/mascot/mimeparser.cpp +++ b/src/input/mascot/mimeparser.cpp @@ -29,16 +29,34 @@ #include "mimeparser.h" #include <QRegExp> +#include <QDebug> +#include <pappsomspp/pappsoexception.h> MimeParser::MimeParser(QIODevice * p_inputstream) { _p_inputstream = p_inputstream; } + +MimeParser::~MimeParser() { + close(); +} bool MimeParser::open() { - _p_inputstream->open(QIODevice::ReadOnly); - _real_in = new QTextStream(_p_inputstream); + qDebug() << "MimeParser::open begin"; + if (_p_inputstream->open(QIODevice::ReadOnly)) { + _real_in = new QTextStream(_p_inputstream); + } + else { + throw pappso::PappsoException(QObject::tr("Error opening mime QIODevice")); + } + qDebug() << "MimeParser::open end"; } bool MimeParser::close() { + qDebug() << "MimeParser::close begin"; + + _priv_file_string = ""; + if (_p_current_file_content != nullptr) { + delete _p_current_file_content; + } if (_real_in !=nullptr) { delete _real_in; _real_in =nullptr; @@ -49,8 +67,10 @@ bool MimeParser::close() { delete _p_inputstream; _p_inputstream =nullptr; } + qDebug() << "MimeParser::close end"; } bool MimeParser::goToFirstFile() { + qDebug() << "MimeParser::goToFirstFile begin"; /* MIME-Version: 1.0 (Generated by Mascot version 1.0) @@ -63,18 +83,71 @@ bool MimeParser::goToFirstFile() { while(!_real_in->atEnd()) { QString line = _real_in->readLine(); + qDebug() << "MimeParser::goToFirstFile line=" << line; if (regexp_boundary.exactMatch(line)) { QStringList boundary_list = regexp_boundary.capturedTexts(); - _boundary = boundary_list[1]; + _boundary = QString("--%1").arg(boundary_list[1]); break; } } + if (_boundary.isEmpty()) { + qDebug() << "MimeParser::goToFirstFile _boundary.isEmpty"; + return false; + } + + while(!_real_in->atEnd()) { + QString line = _real_in->readLine(); + if (line == _boundary) break; + } + qDebug() << "MimeParser::goToFirstFile end"; + return readFile(); +} +bool MimeParser::readFile() { + qDebug() << "MimeParser::readFile begin"; + _current_mime_type = ""; + _current_file_name = ""; + _priv_file_string = ""; + + if (_p_current_file_content != nullptr) { + delete _p_current_file_content; + } + _p_current_file_content = new QTextStream(&_priv_file_string); + + QString line = _real_in->readLine(); + if (line.isEmpty()) return false; + //Content-Type: application/x-Mascot; name="masses" + QRegExp regexp_mimetype("^Content-Type:\\s(.*);\\sname=\"(.*)\"$"); + if (regexp_mimetype.exactMatch(line)) { + QStringList mimetype_list = regexp_mimetype.capturedTexts(); + _current_mime_type = mimetype_list[1]; + _current_file_name = mimetype_list[2]; + } + else { + return false; + } + _real_in->readLine(); + while(!_real_in->atEnd()) { + QString line = _real_in->readLine(); + if (line == _boundary) break; + *_p_current_file_content << line << endl; + } + qDebug() << "MimeParser::readFile end"; + return true; } bool MimeParser::goToNextFile() { + return readFile(); } -const QString & MimeParser::getCurrentFileName() { +const QString & MimeParser::getCurrentMimeType() const { + return _current_mime_type; +} +const QString & MimeParser::getCurrentFileName() const { return _current_file_name; } QTextStream & MimeParser::getCurrentTextStream() { + if (_p_current_file_content == nullptr) { + throw pappso::PappsoException(QObject::tr("Error current file content empty")); + } + _p_current_file_content->reset(); + return *_p_current_file_content; } diff --git a/src/input/mascot/mimeparser.h b/src/input/mascot/mimeparser.h index 641d418bd04f92c4fbb757645264a6312672dc76..2c792abf47f40178a3f4548c40bcebc7483c0d15 100644 --- a/src/input/mascot/mimeparser.h +++ b/src/input/mascot/mimeparser.h @@ -37,18 +37,26 @@ class MimeParser { public: MimeParser(QIODevice * p_inputstream); + virtual ~MimeParser(); bool open(); bool close(); bool goToFirstFile(); bool goToNextFile(); - const QString & getCurrentFileName(); + const QString & getCurrentMimeType() const; + const QString & getCurrentFileName() const; QTextStream & getCurrentTextStream(); +private: + bool readFile(); + private: QString _current_file_name; + QString _current_mime_type; QIODevice * _p_inputstream = nullptr; QTextStream * _real_in = nullptr; QString _boundary; + QTextStream * _p_current_file_content = nullptr; + QString _priv_file_string; }; #endif // MIMEPARSER_H diff --git a/src/utils/identificationdatasourcestore.cpp b/src/utils/identificationdatasourcestore.cpp index c0e100dd0cc71d1702fc39c1d7618925430548bd..e4e16ba50294ae4afea4265bdef3b080692dccb9 100644 --- a/src/utils/identificationdatasourcestore.cpp +++ b/src/utils/identificationdatasourcestore.cpp @@ -30,6 +30,7 @@ #include "identificationdatasourcestore.h" #include "../core/identification_sources/identificationxtandemfile.h" +#include "../core/identification_sources/identificationmascotdatfile.h" #include "../core/identification_sources/identificationpwizfile.h" #include <pappsomspp/pappsoexception.h> #include <pappsomspp/utils.h> @@ -98,6 +99,9 @@ IdentificationDataSourceSp IdentificationDataSourceStore::getInstance(const QStr } else if (ext.toLower() == "pep") { //pep xml file p_identfile = std::make_shared<IdentificationPwizFile>(location_file); + } else if (ext.toLower() == "dat") { + //MASCOT dat file + p_identfile = std::make_shared<IdentificationMascotDatFile>(location_file); } else { p_identfile = std::make_shared<IdentificationPwizFile>(location_file); }