Skip to content
Snippets Groups Projects
mascotdatparser.h 3.74 KiB
Newer Older
Langella Olivier's avatar
Langella Olivier committed
/**
 * \file /input/mascot/mascotdatparser.h
 * \date 17/2/2018
 * \author Olivier Langella
 * \brief MASCOT dat file parser
 */

/*******************************************************************************
* Copyright (c) 2018 Olivier Langella <olivier.langella@u-psud.fr>.
*
* This file is part of XTPcpp.
*
*     XTPcpp is free software: you can redistribute it and/or modify
*     it under the terms of the GNU General Public License as published by
*     the Free Software Foundation, either version 3 of the License, or
*     (at your option) any later version.
*
*     XTPcpp is distributed in the hope that it will be useful,
*     but WITHOUT ANY WARRANTY; without even the implied warranty of
*     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
*     GNU General Public License for more details.
*
*     You should have received a copy of the GNU General Public License
*     along with XTPcpp.  If not, see <http://www.gnu.org/licenses/>.
*
* Contributors:
*     Olivier Langella <olivier.langella@u-psud.fr> - initial API and implementation
******************************************************************************/

#ifndef MASCOTDATPARSER_H
#define MASCOTDATPARSER_H

#include "../../core/project.h"

class MascotDatParser
{
public:
    MascotDatParser(Project * p_project, IdentificationGroup * p_identification_group,
                    IdentificationDataSource * p_identification_data_source);
    virtual ~MascotDatParser();
    void parse(QIODevice * in_stream);
private:
    void parseProteinLine(const QString & protein_line);
Langella Olivier's avatar
Langella Olivier committed
    void parseHeaderLine(const QString & protein_line);
Langella Olivier's avatar
Langella Olivier committed
    void parsePeptidesLine(const QString & peptide_line);
Langella Olivier's avatar
Langella Olivier committed
    void parseQueryLine(const QString & query_line);
    void parseSummaryLine(const QString & summary_line);
    void parseMassesLine(const QString & masses_line);
Langella Olivier's avatar
Langella Olivier committed
    void saveQuery();
Langella Olivier's avatar
Langella Olivier committed
    void saveAndClearPeptide();
    void setVariableModifications(PeptideXtpSp & peptide_sp, QString variable_modifications_string);
Langella Olivier's avatar
Langella Olivier committed
    struct PeptideLine {
        unsigned int query_index=0;
        unsigned int peptide_index=0;
        QString subst;
        QStringList peptide_string_list;
        QStringList protein_string_list;
        std::vector<FastaFileSp> fasta_file_list;
    };
Langella Olivier's avatar
Langella Olivier committed
    struct QueryLine {
        unsigned int query_index=0;
        unsigned int charge=0;
        pappso::pappso_double rt=0;
        QString title;
    };
    struct SummaryLine {
Langella Olivier's avatar
Langella Olivier committed
        pappso::pappso_double match=0; //qmatch, identity threshold
Langella Olivier's avatar
Langella Olivier committed
        pappso::pappso_double exp_mass=0;
Langella Olivier's avatar
Langella Olivier committed
        pappso::pappso_double plug_hole=0; //qplughole, homology threshold
Langella Olivier's avatar
Langella Olivier committed
    };
    void savePeptideList(std::vector< PeptideLine> & peptide_list, bool is_decoy);
    pappso::pappso_double getEvalue(pappso::pappso_double ion_score, SummaryLine & summary_line, pappso::pappso_double confidence_interval = 0.05) const;
    pappso::pappso_double getEvalueExperimental(pappso::pappso_double ion_score, SummaryLine & summary_line, pappso::pappso_double confidence_interval = 0.05) const;

Langella Olivier's avatar
Langella Olivier committed
private:
    Project * _p_project;
    IdentificationGroup * _p_identification_group;
    IdentificationDataSource * _p_identification_data_source;
Langella Olivier's avatar
Langella Olivier committed
    ProteinXtp _current_protein;
    std::vector<FastaFileSp> _fasta_file_list;
Langella Olivier's avatar
Langella Olivier committed

    QRegExp   _regexp_header_line;
    unsigned int _number_of_queries=0;
    unsigned int _number_of_residues=0;
Langella Olivier's avatar
Langella Olivier committed
    unsigned int _current_query_index=0;
Langella Olivier's avatar
Langella Olivier committed
    QString _error_str;
    bool _is_decoy_section = false;
Langella Olivier's avatar
Langella Olivier committed
    PeptideLine _current_peptide;
Langella Olivier's avatar
Langella Olivier committed
    QueryLine _current_query;
Langella Olivier's avatar
Langella Olivier committed
    std::vector<std::vector<PeptideLine>> _query_peptide_results;
Langella Olivier's avatar
Langella Olivier committed
    std::vector<SummaryLine> _summary_list;
    std::vector<std::vector<PeptideLine>> _decoy_query_peptide_results;
    std::vector<SummaryLine> _decoy_summary_list;
Langella Olivier's avatar
Langella Olivier committed
};

#endif // MASCOTDATPARSER_H