Skip to content
Snippets Groups Projects
mascotdatparser.h 4.14 KiB
Newer Older
Langella Olivier's avatar
Langella Olivier committed
/**
 * \file /input/mascot/mascotdatparser.h
 * \date 17/2/2018
 * \author Olivier Langella
 * \brief MASCOT dat file parser
 */

/*******************************************************************************
 * Copyright (c) 2018 Olivier Langella <olivier.langella@u-psud.fr>.
 *
 * This file is part of XTPcpp.
 *
 *     XTPcpp is free software: you can redistribute it and/or modify
 *     it under the terms of the GNU General Public License as published by
 *     the Free Software Foundation, either version 3 of the License, or
 *     (at your option) any later version.
 *
 *     XTPcpp is distributed in the hope that it will be useful,
 *     but WITHOUT ANY WARRANTY; without even the implied warranty of
 *     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *     GNU General Public License for more details.
 *
 *     You should have received a copy of the GNU General Public License
 *     along with XTPcpp.  If not, see <http://www.gnu.org/licenses/>.
 *
 * Contributors:
 *     Olivier Langella <olivier.langella@u-psud.fr> - initial API and
 *implementation
 ******************************************************************************/
Langella Olivier's avatar
Langella Olivier committed

#ifndef MASCOTDATPARSER_H
#define MASCOTDATPARSER_H

#include "../../core/project.h"

class MascotDatParser
{
  public:
  MascotDatParser(Project *p_project,
                  IdentificationGroup *p_identification_group,
                  IdentificationDataSource *p_identification_data_source);
  virtual ~MascotDatParser();
  void parse(QIODevice *in_stream);

  private:
  void parseParametersLine(const QString &parameters_line);
  void parseProteinLine(const QString &protein_line);
  void parseHeaderLine(const QString &protein_line);
  void parsePeptidesLine(const QString &peptide_line);
  void parseQueryLine(const QString &query_line);
  void parseSummaryLine(const QString &summary_line);
  void parseMassesLine(const QString &masses_line);
  void saveQuery();
  void saveAndClearPeptide();
  void setVariableModifications(PeptideXtpSp &peptide_sp,
                                QString variable_modifications_string);


  struct MascotModification
  {
    pappso::AaModificationP modification;
    QChar residue;
  };


  struct PeptideLine
  {
    unsigned int query_index   = 0;
    unsigned int peptide_index = 0;
    QString subst;
    QStringList peptide_string_list;
    QStringList protein_string_list;
    std::vector<FastaFileSp> fasta_file_list;
  };
  struct QueryLine
  {
    std::size_t parsed_scan_number = 0;
    std::size_t query_index       = 0;
    unsigned int index             = 0;
    unsigned int charge            = 0;
    pappso::pappso_double rt       = 0;
    QString title;
  };
  struct SummaryLine
  {
    pappso::pappso_double match     = 0; // qmatch, identity threshold
    pappso::pappso_double exp_mass  = 0;
    pappso::pappso_double plug_hole = 0; // qplughole, homology threshold
  };
  void savePeptideList(std::vector<PeptideLine> &peptide_list, bool is_decoy);
  pappso::pappso_double
  getEvalue(pappso::pappso_double ion_score,
            SummaryLine &summary_line,
            pappso::pappso_double confidence_interval = 0.05) const;
  pappso::pappso_double
  getEvalueExperimental(pappso::pappso_double ion_score,
                        SummaryLine &summary_line,
                        pappso::pappso_double confidence_interval = 0.05) const;

  private:
  Project *_p_project;
  IdentificationGroup *_p_identification_group;
  IdentificationDataSource *_p_identification_data_source;


  ProteinXtp _current_protein;
  std::vector<FastaFileSp> _fasta_file_list;


  QRegExp _regexp_header_line;
  QRegExp _regexp_parse_scan;
  unsigned int _number_of_queries   = 0;
  unsigned int _number_of_residues  = 0;
  QString _error_str;
  bool _is_decoy_section = false;

  PeptideLine _current_peptide;
  QueryLine _current_query;

  std::vector<std::vector<PeptideLine>> _query_peptide_results;
  std::vector<SummaryLine> _summary_list;
  std::vector<std::vector<PeptideLine>> _decoy_query_peptide_results;
  std::vector<SummaryLine> _decoy_summary_list;

  std::vector<MascotModification> _delta_modification_list;
  std::vector<MascotModification> _fixed_modification_list;
Langella Olivier's avatar
Langella Olivier committed
};

#endif // MASCOTDATPARSER_H