Skip to content
Snippets Groups Projects
Commit ab76fa40 authored by Langella Olivier's avatar Langella Olivier
Browse files

mascot dat parser

parent 97edd298
No related branches found
No related tags found
No related merge requests found
......@@ -66,6 +66,7 @@ SET(CPP_FILES
core/automaticfilterparameters.cpp
core/identificationgroup.cpp
core/identification_sources/identificationdatasource.cpp
core/identification_sources/identificationmascotdatfile.cpp
core/identification_sources/identificationpwizfile.cpp
core/identification_sources/identificationxtandemfile.cpp
core/labeling/label.cpp
......@@ -92,6 +93,8 @@ SET(CPP_FILES
grouping/ptm/ptmislandgroup.cpp
grouping/ptm/ptmislandsubgroup.cpp
grouping/ptm/ptmsamplescan.cpp
input/mascot/mascotdatparser.cpp
input/mascot/mimeparser.cpp
input/condorqxmlsaxhandler.cpp
input/identificationpwizreader.cpp
input/xpipsaxhandler.cpp
......
/**
* \file /core/identification_sources/identificationmascotdatfile.cpp
* \date 17/2/2018
* \author Olivier Langella
* \brief mascot dat identification file handler
*/
/*******************************************************************************
* Copyright (c) 2018 Olivier Langella <olivier.langella@u-psud.fr>.
*
* This file is part of XTPcpp.
*
* XTPcpp is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* XTPcpp is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with XTPcpp. If not, see <http://www.gnu.org/licenses/>.
*
* Contributors:
* Olivier Langella <olivier.langella@u-psud.fr> - initial API and implementation
******************************************************************************/
#include "identificationmascotdatfile.h"
#include <pappsomspp/pappsoexception.h>
#include "../project.h"
#include "../../input/mascot/mascotdatparser.h"
IdentificationMascotDatFile::IdentificationMascotDatFile(const QFileInfo & mascot_dat_file) : IdentificationDataSource(mascot_dat_file.absoluteFilePath()), _mascot_dat_file(mascot_dat_file)
{
_engine = IdentificationEngine::mascot;
}
IdentificationMascotDatFile::IdentificationMascotDatFile(const IdentificationMascotDatFile& other) : IdentificationDataSource(other),_mascot_dat_file (other._mascot_dat_file)
{
_engine = IdentificationEngine::mascot;
}
IdentificationMascotDatFile::~IdentificationMascotDatFile()
{
}
bool IdentificationMascotDatFile::operator==(const IdentificationMascotDatFile& other) const
{
}
pappso::SpectrumSp IdentificationMascotDatFile::getSpectrumSp(unsigned int scan_number) const {
pappso::SpectrumSp spectrum_sp = IdentificationDataSource::getSpectrumSp(scan_number);
return spectrum_sp;
}
void IdentificationMascotDatFile::parseTo(Project* p_project) {
qDebug() << "IdentificationMascotDatFile::parseTo begin";
qDebug() << "Read Mascot dat result file '" << _mascot_dat_file.absoluteFilePath() << "'";
MsRunSp msrun_sp = p_project->getMsRunStore().getInstance(QFileInfo(_mascot_dat_file).baseName());
setMsRunSp(msrun_sp);
std::vector<IdentificationGroup *> identification_list = p_project->getIdentificationGroupList();
IdentificationGroup * identification_group_p = nullptr;
if (p_project->getProjectMode() == ProjectMode::combined) {
if (identification_list.size() == 0) {
identification_group_p = p_project->newIdentificationGroup();
}
else {
identification_group_p = identification_list[0];
}
}
else {
for (IdentificationGroup * identification_p_flist : identification_list) {
if (identification_p_flist->containSample(msrun_sp.get()->getSampleName())) {
identification_group_p = identification_p_flist;
break;
}
}
if (identification_group_p == nullptr) {
identification_group_p = p_project->newIdentificationGroup();
}
}
identification_group_p->addIdentificationDataSourceP(this);
MascotDatParser mascot_parser(p_project, identification_group_p, this);
QFile qfile(_mascot_dat_file.absoluteFilePath());
mascot_parser.parse(&qfile);
qfile.close();
qDebug() << "IdentificationMascotDatFile::parseTo end";
}
/**
* \file /core/identification_sources/identificationmascotdatfile.h
* \date 17/2/2018
* \author Olivier Langella
* \brief mascot dat identification file handler
*/
/*******************************************************************************
* Copyright (c) 2018 Olivier Langella <olivier.langella@u-psud.fr>.
*
* This file is part of XTPcpp.
*
* XTPcpp is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* XTPcpp is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with XTPcpp. If not, see <http://www.gnu.org/licenses/>.
*
* Contributors:
* Olivier Langella <olivier.langella@u-psud.fr> - initial API and implementation
******************************************************************************/
#ifndef IDENTIFICATIONMASCOTDATFILE_H
#define IDENTIFICATIONMASCOTDATFILE_H
#include "identificationdatasource.h"
#include <QFileInfo>
class IdentificationMascotDatFile: public IdentificationDataSource
{
public:
IdentificationMascotDatFile(const QFileInfo & mascot_dat_file);
IdentificationMascotDatFile(const IdentificationMascotDatFile& other);
~IdentificationMascotDatFile();
bool operator==(const IdentificationMascotDatFile& other) const;
virtual pappso::SpectrumSp getSpectrumSp(unsigned int scan_number) const override;
virtual void parseTo(Project* p_project) override;
private:
const QFileInfo _mascot_dat_file;
};
#endif // IDENTIFICATIONMASCOTDATFILE_H
/**
* \file /input/mascot/mascotdatparser.h
* \date 17/2/2018
* \author Olivier Langella
* \brief MASCOT dat file parser
*/
/*******************************************************************************
* Copyright (c) 2018 Olivier Langella <olivier.langella@u-psud.fr>.
*
* This file is part of XTPcpp.
*
* XTPcpp is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* XTPcpp is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with XTPcpp. If not, see <http://www.gnu.org/licenses/>.
*
* Contributors:
* Olivier Langella <olivier.langella@u-psud.fr> - initial API and implementation
******************************************************************************/
#include "mascotdatparser.h"
#include "mimeparser.h"
#include <QDebug>
MascotDatParser::MascotDatParser(Project * p_project, IdentificationGroup * p_identification_group,
IdentificationDataSource * p_identification_data_source) {
_p_project = p_project;
_p_identification_group = p_identification_group;
_p_identification_data_source = p_identification_data_source;
}
MascotDatParser::~MascotDatParser() {
}
void MascotDatParser::parse(QIODevice * in_stream) {
qDebug() << "MascotDatParser::parse begin";
MimeParser mime_parser(in_stream);
mime_parser.open();
for(bool more=mime_parser.goToFirstFile(); more; more=mime_parser.goToNextFile()) {
qDebug() << "MascotDatParser::parse mimetype=" << mime_parser.getCurrentMimeType() << " filename=" << mime_parser.getCurrentFileName();
if (mime_parser.getCurrentFileName() == "proteins") {
while(!mime_parser.getCurrentTextStream().atEnd()) {
parseProteinLine( mime_parser.getCurrentTextStream().readLine());
}
}
}
mime_parser.close();
qDebug() << "MascotDatParser::parse end";
}
void MascotDatParser::parseProteinLine(const QString & protein_line) {
//02::"tr|A0A0D9SF80|A0A0D9SF80_HUMAN"=55120.88,"General transcription factor II-I repeat domain-containing protein 2A OS=Homo sapiens GN=GTF2IRD2B PE=4 SV=1"
QRegExp regexp_protein("^(.*)::\"(.*)\"=([0-9]+\\.[0-9]+),\"(.*)\"$");
if (regexp_protein.exactMatch(protein_line)) {
QStringList protein_list = regexp_protein.capturedTexts();
}
else {
QRegExp regexp_proteinb("^\"(.*)\"=([0-9]+\\.[0-9]+),\"(.*)\"$");
if (regexp_proteinb.exactMatch(protein_line)) {
QStringList protein_list = regexp_proteinb.capturedTexts();
}
else {
qDebug() << "MascotDatParser::parseProteinLine error " << protein_line;
}
}
}
/**
* \file /input/mascot/mascotdatparser.h
* \date 17/2/2018
* \author Olivier Langella
* \brief MASCOT dat file parser
*/
/*******************************************************************************
* Copyright (c) 2018 Olivier Langella <olivier.langella@u-psud.fr>.
*
* This file is part of XTPcpp.
*
* XTPcpp is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* XTPcpp is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with XTPcpp. If not, see <http://www.gnu.org/licenses/>.
*
* Contributors:
* Olivier Langella <olivier.langella@u-psud.fr> - initial API and implementation
******************************************************************************/
#ifndef MASCOTDATPARSER_H
#define MASCOTDATPARSER_H
#include "../../core/project.h"
class MascotDatParser
{
public:
MascotDatParser(Project * p_project, IdentificationGroup * p_identification_group,
IdentificationDataSource * p_identification_data_source);
virtual ~MascotDatParser();
void parse(QIODevice * in_stream);
private:
void parseProteinLine(const QString & protein_line);
private:
Project * _p_project;
IdentificationGroup * _p_identification_group;
IdentificationDataSource * _p_identification_data_source;
};
#endif // MASCOTDATPARSER_H
......@@ -29,16 +29,34 @@
#include "mimeparser.h"
#include <QRegExp>
#include <QDebug>
#include <pappsomspp/pappsoexception.h>
MimeParser::MimeParser(QIODevice * p_inputstream) {
_p_inputstream = p_inputstream;
}
MimeParser::~MimeParser() {
close();
}
bool MimeParser::open() {
_p_inputstream->open(QIODevice::ReadOnly);
_real_in = new QTextStream(_p_inputstream);
qDebug() << "MimeParser::open begin";
if (_p_inputstream->open(QIODevice::ReadOnly)) {
_real_in = new QTextStream(_p_inputstream);
}
else {
throw pappso::PappsoException(QObject::tr("Error opening mime QIODevice"));
}
qDebug() << "MimeParser::open end";
}
bool MimeParser::close() {
qDebug() << "MimeParser::close begin";
_priv_file_string = "";
if (_p_current_file_content != nullptr) {
delete _p_current_file_content;
}
if (_real_in !=nullptr) {
delete _real_in;
_real_in =nullptr;
......@@ -49,8 +67,10 @@ bool MimeParser::close() {
delete _p_inputstream;
_p_inputstream =nullptr;
}
qDebug() << "MimeParser::close end";
}
bool MimeParser::goToFirstFile() {
qDebug() << "MimeParser::goToFirstFile begin";
/*
MIME-Version: 1.0 (Generated by Mascot version 1.0)
......@@ -63,18 +83,71 @@ bool MimeParser::goToFirstFile() {
while(!_real_in->atEnd()) {
QString line = _real_in->readLine();
qDebug() << "MimeParser::goToFirstFile line=" << line;
if (regexp_boundary.exactMatch(line)) {
QStringList boundary_list = regexp_boundary.capturedTexts();
_boundary = boundary_list[1];
_boundary = QString("--%1").arg(boundary_list[1]);
break;
}
}
if (_boundary.isEmpty()) {
qDebug() << "MimeParser::goToFirstFile _boundary.isEmpty";
return false;
}
while(!_real_in->atEnd()) {
QString line = _real_in->readLine();
if (line == _boundary) break;
}
qDebug() << "MimeParser::goToFirstFile end";
return readFile();
}
bool MimeParser::readFile() {
qDebug() << "MimeParser::readFile begin";
_current_mime_type = "";
_current_file_name = "";
_priv_file_string = "";
if (_p_current_file_content != nullptr) {
delete _p_current_file_content;
}
_p_current_file_content = new QTextStream(&_priv_file_string);
QString line = _real_in->readLine();
if (line.isEmpty()) return false;
//Content-Type: application/x-Mascot; name="masses"
QRegExp regexp_mimetype("^Content-Type:\\s(.*);\\sname=\"(.*)\"$");
if (regexp_mimetype.exactMatch(line)) {
QStringList mimetype_list = regexp_mimetype.capturedTexts();
_current_mime_type = mimetype_list[1];
_current_file_name = mimetype_list[2];
}
else {
return false;
}
_real_in->readLine();
while(!_real_in->atEnd()) {
QString line = _real_in->readLine();
if (line == _boundary) break;
*_p_current_file_content << line << endl;
}
qDebug() << "MimeParser::readFile end";
return true;
}
bool MimeParser::goToNextFile() {
return readFile();
}
const QString & MimeParser::getCurrentFileName() {
const QString & MimeParser::getCurrentMimeType() const {
return _current_mime_type;
}
const QString & MimeParser::getCurrentFileName() const {
return _current_file_name;
}
QTextStream & MimeParser::getCurrentTextStream() {
if (_p_current_file_content == nullptr) {
throw pappso::PappsoException(QObject::tr("Error current file content empty"));
}
_p_current_file_content->reset();
return *_p_current_file_content;
}
......@@ -37,18 +37,26 @@ class MimeParser
{
public:
MimeParser(QIODevice * p_inputstream);
virtual ~MimeParser();
bool open();
bool close();
bool goToFirstFile();
bool goToNextFile();
const QString & getCurrentFileName();
const QString & getCurrentMimeType() const;
const QString & getCurrentFileName() const;
QTextStream & getCurrentTextStream();
private:
bool readFile();
private:
QString _current_file_name;
QString _current_mime_type;
QIODevice * _p_inputstream = nullptr;
QTextStream * _real_in = nullptr;
QString _boundary;
QTextStream * _p_current_file_content = nullptr;
QString _priv_file_string;
};
#endif // MIMEPARSER_H
......@@ -30,6 +30,7 @@
#include "identificationdatasourcestore.h"
#include "../core/identification_sources/identificationxtandemfile.h"
#include "../core/identification_sources/identificationmascotdatfile.h"
#include "../core/identification_sources/identificationpwizfile.h"
#include <pappsomspp/pappsoexception.h>
#include <pappsomspp/utils.h>
......@@ -98,6 +99,9 @@ IdentificationDataSourceSp IdentificationDataSourceStore::getInstance(const QStr
} else if (ext.toLower() == "pep") {
//pep xml file
p_identfile = std::make_shared<IdentificationPwizFile>(location_file);
} else if (ext.toLower() == "dat") {
//MASCOT dat file
p_identfile = std::make_shared<IdentificationMascotDatFile>(location_file);
} else {
p_identfile = std::make_shared<IdentificationPwizFile>(location_file);
}
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment