Skip to content
Snippets Groups Projects
mascotdatparser.cpp 36.7 KiB
Newer Older
Langella Olivier's avatar
Langella Olivier committed
/**
 * \file /input/mascot/mascotdatparser.h
 * \date 17/2/2018
 * \author Olivier Langella
 * \brief MASCOT dat file parser
 */

/*******************************************************************************
 * Copyright (c) 2018 Olivier Langella <olivier.langella@u-psud.fr>.
 *
 * This file is part of XTPcpp.
 *
 *     XTPcpp is free software: you can redistribute it and/or modify
 *     it under the terms of the GNU General Public License as published by
 *     the Free Software Foundation, either version 3 of the License, or
 *     (at your option) any later version.
 *
 *     XTPcpp is distributed in the hope that it will be useful,
 *     but WITHOUT ANY WARRANTY; without even the implied warranty of
 *     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *     GNU General Public License for more details.
 *
 *     You should have received a copy of the GNU General Public License
 *     along with XTPcpp.  If not, see <http://www.gnu.org/licenses/>.
 *
 * Contributors:
 *     Olivier Langella <olivier.langella@u-psud.fr> - initial API and
 *implementation
 ******************************************************************************/
Langella Olivier's avatar
Langella Olivier committed

#include "mascotdatparser.h"
#include "mimeparser.h"
Langella Olivier's avatar
Langella Olivier committed
#include <cmath>
Langella Olivier's avatar
Langella Olivier committed
#include <QDebug>
Langella Olivier's avatar
Langella Olivier committed
#include <pappsomspp/pappsoexception.h>
Langella Olivier's avatar
Langella Olivier committed
#include "../../core/peptidextp.h"
#include "../../core/proteinmatch.h"
Langella Olivier's avatar
Langella Olivier committed
#include "../../core/peptideevidence.h"
MascotDatParser::MascotDatParser(
  Project *p_project,
  IdentificationGroup *p_identification_group,
  IdentificationDataSource *p_identification_data_source)
{
  _p_project                    = p_project;
  _p_identification_group       = p_identification_group;
  _p_identification_data_source = p_identification_data_source;
  _regexp_header_line.setPattern("^([A-Z,a-z,0-9,_]+)=(.*)$");

  _regexp_parse_scan.setPattern(".*scan=([0-9]+).*");
Langella Olivier's avatar
Langella Olivier committed
}
MascotDatParser::~MascotDatParser()
{
Langella Olivier's avatar
Langella Olivier committed
}
void
MascotDatParser::parse(QIODevice *in_stream)
{
  qDebug() << "MascotDatParser::parse begin";
  MimeParser mime_parser(in_stream);
  mime_parser.open();

  for(bool more = mime_parser.goToFirstFile(); more;
      more      = mime_parser.goToNextFile())
    {
      qDebug() << "MascotDatParser::parse mimetype="
               << mime_parser.getCurrentMimeType()
               << " filename=" << mime_parser.getCurrentFileName();

      if(mime_parser.getCurrentFileName() == "proteins")
        {
          while(!mime_parser.getCurrentTextStream().atEnd())
            {
              parseProteinLine(mime_parser.getCurrentTextStream().readLine());
      else if(mime_parser.getCurrentFileName() == "parameters")
        {
          while(!mime_parser.getCurrentTextStream().atEnd())
            {
              parseParametersLine(
                mime_parser.getCurrentTextStream().readLine());
            }
        }
      else if(mime_parser.getCurrentFileName() == "header")
        {
          while(!mime_parser.getCurrentTextStream().atEnd())
            {
              parseHeaderLine(mime_parser.getCurrentTextStream().readLine());
      else if(mime_parser.getCurrentFileName() == "masses")
        {
          while(!mime_parser.getCurrentTextStream().atEnd())
            {
              parseMassesLine(mime_parser.getCurrentTextStream().readLine());
      else if(mime_parser.getCurrentFileName() == "summary")
        {
          while(!mime_parser.getCurrentTextStream().atEnd())
            {
              parseSummaryLine(mime_parser.getCurrentTextStream().readLine());
      else if(mime_parser.getCurrentFileName() == "peptides")
        {
          while(!mime_parser.getCurrentTextStream().atEnd())
            {
              parsePeptidesLine(mime_parser.getCurrentTextStream().readLine());
Langella Olivier's avatar
Langella Olivier committed
            }
          saveAndClearPeptide();
Langella Olivier's avatar
Langella Olivier committed
        }
      else if(mime_parser.getCurrentFileName() == "decoy_summary")
        {
          _is_decoy_section = true;
          while(!mime_parser.getCurrentTextStream().atEnd())
            {
              parseSummaryLine(mime_parser.getCurrentTextStream().readLine());
          _is_decoy_section = false;
      else if(mime_parser.getCurrentFileName() == "decoy_peptides")
        {
          _is_decoy_section = true;
          while(!mime_parser.getCurrentTextStream().atEnd())
            {
              parsePeptidesLine(mime_parser.getCurrentTextStream().readLine());
          saveAndClearPeptide();
          _is_decoy_section = false;
      else if(mime_parser.getCurrentFileName().startsWith("query"))
        {
          _current_query.query_index =
            mime_parser.getCurrentFileName().mid(5).toULong();
          while(!mime_parser.getCurrentTextStream().atEnd())
            {
              parseQueryLine(mime_parser.getCurrentTextStream().readLine());
          saveQuery();
  mime_parser.close();
  qDebug() << "MascotDatParser::parse end";
Langella Olivier's avatar
Langella Olivier committed
}
void
MascotDatParser::parseMassesLine(const QString &masses_line)
{
  qDebug() << __FILE__ << " " << __FUNCTION__ << " " << __LINE__ << " "
           << masses_line;
  if(_regexp_header_line.exactMatch(masses_line))
    {
      QStringList header_list = _regexp_header_line.capturedTexts();

      //           C_term=17.002740
      // N_term=1.007825
      // delta1=15.994915,Oxidation (M)
      if(header_list[1].startsWith("delta"))
        {
          _delta_modification_list.push_back(MascotModification());
          unsigned index             = header_list[1].mid(5).toUInt();
          QStringList delta_mod_list = header_list[2].split(",");
          pappso::pappso_double mass = delta_mod_list[0].toDouble();
          if(delta_mod_list[1] == "Oxidation (M)")
            {
              _delta_modification_list[index - 1].modification =
                pappso::AaModification::getInstance("MOD:00719");
              _delta_modification_list[index - 1].residue = 'M';
          else
            {
              _delta_modification_list[index - 1].modification =
                pappso::AaModification::getInstanceCustomizedMod(mass);
          //_number_of_residues = header_list[5].toUInt();
      // FixedModResidues1=C
      else if(header_list[1].startsWith("FixedModResidues"))
        {
          unsigned index = header_list[1].mid(16).toUInt();
          qDebug() << __FILE__ << " " << __FUNCTION__ << " " << __LINE__ << " "
                   << index;
          _fixed_modification_list[index - 1].residue = header_list[2].at(0);
      // NeutralLoss1=0.000000
      // NeutralLoss1_master=63.998285
      // FixedMod1=57.021464,Carbamidomethyl (C)
      else if(header_list[1].startsWith("FixedMod"))
        {
          unsigned index = header_list[1].mid(8).toUInt();
          _fixed_modification_list.push_back(MascotModification());
          QStringList fixed_mod_list = header_list[2].split(",");
          pappso::pappso_double mass = fixed_mod_list[0].toDouble();
          //_number_of_residues = header_list[2].toUInt();
          qDebug() << __FILE__ << " " << __FUNCTION__ << " " << __LINE__ << " "
                   << header_list[2];

          if(fixed_mod_list[1] == "Carbamidomethyl (C)")
            {
              _fixed_modification_list[index - 1].modification =
                pappso::AaModification::getInstance("MOD:00397");
              _fixed_modification_list[index - 1].residue = 'C';
          else
            {
              _fixed_modification_list[index - 1].modification =
                pappso::AaModification::getInstanceCustomizedMod(mass);
  else
    {
      qDebug() << __FILE__ << " " << __FUNCTION__ << " " << __LINE__
               << " QREGEXP does not work on " << masses_line;
  qDebug() << __FILE__ << " " << __FUNCTION__ << " " << __LINE__ << " "
           << _fixed_modification_list.size();
}

void
MascotDatParser::parseProteinLine(const QString &protein_line)
{
  qDebug() << __FILE__ << " " << __FUNCTION__ << " " << __LINE__ << " "
           << protein_line;
  ProteinXtpSp sp_xtp_protein;
  // 02::"tr|A0A0D9SF80|A0A0D9SF80_HUMAN"=55120.88,"General transcription factor
  // II-I repeat domain-containing protein 2A OS=Homo sapiens GN=GTF2IRD2B PE=4
  // SV=1"
  QRegExp regexp_protein("^(.*)::\"(.*)\"=([0-9]+\\.[0-9]+),\"(.*)\"$");
  if(regexp_protein.exactMatch(protein_line))
    {
      QStringList protein_list = regexp_protein.capturedTexts();
      FastaFileSp fasta_file_sp =
        _fasta_file_list[protein_list[1].toUInt() - 1];
      _current_protein.setAccession(protein_list[2]);
      _current_protein.setDescription(protein_list[4]);
      _current_protein.setFastaFileP(fasta_file_sp.get());

      sp_xtp_protein = _current_protein.makeProteinXtpSp();
      sp_xtp_protein =
        _p_project->getProteinStore().getInstance(sp_xtp_protein);
Langella Olivier's avatar
Langella Olivier committed
    }
  else
    {
      QRegExp regexp_proteinb("^\"(.*)\"=([0-9]+\\.[0-9]+),\"(.*)\"$");
      if(regexp_proteinb.exactMatch(protein_line))
        {
          QStringList protein_list  = regexp_proteinb.capturedTexts();
          FastaFileSp fasta_file_sp = _fasta_file_list[0];

          _current_protein.setAccession(protein_list[1]);
          _current_protein.setDescription(protein_list[3]);
          _current_protein.setFastaFileP(fasta_file_sp.get());

          sp_xtp_protein = _current_protein.makeProteinXtpSp();
          sp_xtp_protein =
            _p_project->getProteinStore().getInstance(sp_xtp_protein);
Langella Olivier's avatar
Langella Olivier committed
        }
      else
        {
          qDebug() << "MascotDatParser::parseProteinLine error "
                   << protein_line;
void
MascotDatParser::parseHeaderLine(const QString &header_line)
{
  if(_regexp_header_line.exactMatch(header_line))
    {
      QStringList header_list = _regexp_header_line.capturedTexts();
      // sequences=73998
      // sequences_after_tax=73998
      // residues=24900901
      if(header_list[1].startsWith("residues"))
        {
          qDebug() << "queries=" << header_list[2];
          _number_of_residues = header_list[2].toUInt();
      // distribution=64847,0,1,576,2254,1934,1417,984,683,419,238,192,113,89,74,46,23,32,28,21,11,3,2,3,3,2,1,1,0,0,0,0,0,0,1
      // decoy_type=1
      // distribution_decoy=65162,0,1,552,2259,1891,1422,957,601,376,192,159,126,73,56,64,26,27,22,14,6,4,1,1,3,2,1
      // exec_time=484
      // date=1517587671
      // time=17:07:51
      // queries=54084
      else if(header_list[1].startsWith("queries"))
        {
          qDebug() << "queries=" << header_list[2];
          _number_of_queries = header_list[2].toUInt();
          _query_peptide_results.resize(_number_of_queries);
          _summary_list.resize(_number_of_queries);
          _decoy_query_peptide_results.resize(_number_of_queries);
          _decoy_summary_list.resize(_number_of_queries);
      // min_peaks_for_homology=6
      // max_hits=50
      // version=2.5.0
      else if(header_list[1] == "version")
        {
          _p_identification_data_source->setIdentificationEngineVersion(
            header_list[2]);
      // fastafile=C:/inetpub/mascot/sequence/ECOLI_INRA/current/ECOLI_INRA_1.fasta
      else if(header_list[1].startsWith("fastafile"))
        {
          qDebug() << "fastafile=" << header_list[2];
          _fasta_file_list.push_back(
            _p_project->getFastaFileStore().getInstance(
              FastaFile(header_list[2])));

          _p_identification_data_source->addFastaFile(_fasta_file_list.back());
      // release=ECOLI_INRA_1.fasta
      // sequences1=4305
      // sequences_after_tax1=4305
      // residues1=1356026
      // fastafile2=C:/inetpub/mascot/sequence/HUMAN_INRA/current/HUMAN_INRA_1.fasta
      // release2=HUMAN_INRA_1.fasta
      // sequences2=69693
      // sequences_after_tax2=69693
      // residues2=23544875
      // taskid=151758718701
void
MascotDatParser::parseParametersLine(const QString &header_line)
{
  if(_regexp_header_line.exactMatch(header_line))
    {
      QStringList header_list = _regexp_header_line.capturedTexts();
      /*
       * LICENSE=licence du logiciel
MP=
NM=
COM=WP4 Batch4 inj2 paramSTR QEKAC160601_02.raw.mgf
IATOL=
IA2TOL=
IASTOL=
IBTOL=
IB2TOL=
IBSTOL=
IYTOL=
IY2TOL=
IYSTOL=
SEG=
SEGT=
SEGTU=
LTOL=
TOL=5
TOLU=ppm
ITH=
ITOL=70
ITOLU=mmu
PFA=1
DB=Contaminants_WP4_D
DB2=S_cerevisiae_D
DB3=UPS1UPS2_D
MODS=Carbamidomethyl (C)
MASS=Monoisotopic
CLE=Trypsin/P
FILE=F:\MSData\Batch4 Qex+TOUL Injection2\QEKAC160601_02.raw.mgf
PEAK=
QUE=
TWO=
SEARCH=MIS
USERNAME=AMH
USEREMAIL=
CHARGE=2+ and 3+
INTERMEDIATE=
REPORT= AUTO
OVERVIEW=
FORMAT=Mascot generic
FORMVER=1.01
FRAG=
IT_MODS=Acetyl (Protein N-term),Oxidation (M)
USER00=
USER01=
USER02=
USER03=
USER04=
USER05=
USER06=
USER07=
USER08=
USER09=
USER10=
USER11=
USER12=
PRECURSOR=
TAXONOMY=All entries
ACCESSION=
REPTYPE=
SUBCLUSTER=
ICAT=
INSTRUMENT=ESI FTMS HCD
ERRORTOLERANT=
FRAMES=
CUTOUT=
USERID=0
QUANTITATION=
DECOY=
PEP_ISOTOPE_ERROR=
MULTI_SITE_MODS=
DATAURL=
RULES=1,2,4,5,6,7,8,9,10,13,14,15,17,18
INTERNALS=0.0,700.0
*/
      if(header_list[1] == "FILE")
        {
          // FILE=F:\MSData\Batch4 Qex+TOUL Injection2\QEKAC160601_02.raw.mgf
          _p_identification_data_source->getMsRunSp().get()->setFilename(
            header_list[2]);

          QFileInfo fileinfo(header_list[2]);
          if(fileinfo.fileName() == header_list[2])
            {
              fileinfo.setFile(header_list[2].replace("\\", "/"));
              _p_identification_data_source->getMsRunSp().get()->setSampleName(
                fileinfo.baseName());
            }
          else
            {
              _p_identification_data_source->getMsRunSp().get()->setSampleName(
                fileinfo.baseName());
            }
void
MascotDatParser::parsePeptidesLine(const QString &peptide_line)
{
  try
    {
      if(_regexp_header_line.exactMatch(peptide_line))
        {
          QStringList header_list = _regexp_header_line.capturedTexts();
          QString index           = header_list[1];
          QString value           = header_list[2];
          QStringList index_list  = index.split("_");
          QString query_index_str           = index_list[0];
          unsigned int query_index_number   = query_index_str.mid(1).toUInt();
          QString peptide_index             = index_list[1];
          std::size_t current_peptide_index = peptide_index.mid(1).toUInt();

          if((value != "-1") &&
             ((query_index_number != _current_peptide.query_index) ||
              (current_peptide_index != _current_peptide.peptide_index)))
            {
              saveAndClearPeptide();
            }
          _current_peptide.query_index   = query_index_number;
          _current_peptide.peptide_index = current_peptide_index;
          if(index_list.size() == 4)
            {
              if((index_list[2] == "primary") && (index_list[3] == "nl"))
                {
                  // throw pappso::PappsoException(
                  // QObject::tr("primary_nl is not taken into account"));
                }
            }
          else if(index_list.size() == 3)
            {
              if(index_list[2] == "db")
                {
                  // q1_p1_db=02
                  while(value.size() > 0)
                    {
                      QString fasta_str = value.mid(0, 2);
                      _current_peptide.fasta_file_list.push_back(
                        _fasta_file_list.at(fasta_str.toInt() - 1));
                      value = value.mid(2);
Langella Olivier's avatar
Langella Olivier committed
                }
              // q856_p9_subst=1,X,W
              else if(index_list[2] == "subst")
                {
                  _current_peptide.subst = value;
Langella Olivier's avatar
Langella Olivier committed
            }
          else if(index_list.size() == 2)
            {
              if(value == "-1")
                {
                  // no result for this query
              else
                {

                  // q1_p1=0,597.302322,0.997884,2,GAWHK,9,0000000,7.97,0000012000000000000,0,0;"sp|O95006|OR2F2_HUMAN":0:299:303:1
                  int position           = value.indexOf(";\"", 0);
                  QString peptide_string = value.mid(0, position);
                  // qDebug() << "peptide_string=" <<  peptide_string;

                  _current_peptide.peptide_string_list =
                    peptide_string.split(",");


                  QString protein_string = value.mid(position + 2);
                  // qDebug() << "protein_string=" <<  protein_string;
                  //"sp|Q9Y2I7|FYV1_HUMAN":0:670:675:2,"tr|E9PDH4|E9PDH4_HUMAN":0:614:619:2
                  _current_peptide.protein_string_list =
                    protein_string.split(",\"");
          /*
          q1_p1_db=02
          q1_p1=0,597.302322,0.997884,2,GAWHK,9,0000000,7.97,0000012000000000000,0,0;"sp|O95006|OR2F2_HUMAN":0:299:303:1
          q1_p1_terms=K,L
          q1_p2_db=02
          q1_p2=1,598.296219,0.003987,2,KEEPP,11,0000000,1.32,0002000000000000000,0,0;"tr|E9PNM8|E9PNM8_HUMAN":0:134:138:1
          q1_p2_terms=R,-
          q2_p1_db=02
          q2_p1=1,598.380234,-0.000316,3,KAGVPK,13,00000000,17.13,0002011000000000000,0,0;"tr|H7C1P9|H7C1P9_HUMAN":0:945:950:2
          q2_p1_terms=K,K
          q2_p2_db=0202
          q2_p2=1,598.380234,-0.000316,2,KQPVK,9,0000000,9.54,0002011000000000000,0,0;"sp|P33527|MRP1_HUMAN":0:270:274:1,"tr|I3L4X2|I3L4X2_HUMAN":0:169:173:1
          q2_p2_terms=R,V:R,V
          q2_p3_db=02
          q2_p3=1,598.380234,-0.000316,2,KAVPGK,13,00000000,7.03,0002001000000000000,0,0;"sp|Q13061|TRDN_HUMAN":0:440:445:2
          q2_p3_terms=K,K
          q2_p4_db=0202
          q2_p4=1,598.380234,-0.000316,2,IPGGKK,14,00000000,1.26,0002001000000000000,0,0;"sp|Q9Y2I7|FYV1_HUMAN":0:670:675:2,"tr|E9PDH4|E9PDH4_HUMAN":0:614:619:2
          q2_p4_terms=K,F:K,F
          */
          // q24878_p3_primary_nl=000000020000000

          /*
          q856_p9_db=0202
          q856_p9=0,685.427521,-0.000117,3,XLLVR,12,0000000,13.68,0000002000000000000,0,0;"tr|V9GY00|V9GY00_HUMAN":0:1:5:1,"tr|H7C3C3|H7C3C3_HUMAN":0:1:5:1
          q856_p9_terms=-,L:-,V
          q856_p9_subst=1,X,W
          */
Langella Olivier's avatar
Langella Olivier committed
        }
  catch(pappso::PappsoException exception_pappso)
    {
      _error_str = QObject::tr(
                     "ERROR in MascotDatParser::parsePeptidesLine "
                     "%1, PAPPSO exception:\n%2 near q%3_p%4")
                     .arg(peptide_line)
                     .arg(exception_pappso.qwhat())
                     .arg(_current_peptide.query_index)
                     .arg(_current_peptide.peptide_index);
      qDebug() << _error_str;
      throw pappso::PappsoException(_error_str);
  catch(std::exception exception_std)
    {
      _error_str = QObject::tr(
                     "ERROR in MascotDatParser::parsePeptidesLine %1, std "
                     "exception:\n%2  near q%3_p%4")
                     .arg(peptide_line)
                     .arg(exception_std.what())
                     .arg(_current_peptide.query_index)
                     .arg(_current_peptide.peptide_index);
      qDebug() << _error_str;
      throw pappso::PappsoException(_error_str);
void
MascotDatParser::parseQueryLine(const QString &query_line)
{
  try
    {
      if(_regexp_header_line.exactMatch(query_line))
        {
          QStringList header_list = _regexp_header_line.capturedTexts();
          QString index           = header_list[1];
          QString value           = header_list[2];
          // title=FULL%20ISSLGSVGAGIVAVKK%20N22213%20%20%20QEP1_SpikeIn_230914_1_3ng_270914%2e35282%2e35282%2e2
          // title=controllerType%3d0%20controllerNumber%3d1%20scan%3d43355
          if(index == "title")
            {
              _current_query.parsed_scan_number = 0;
              _current_query.title =
                QUrl::fromPercentEncoding(value.toLatin1());
              qDebug() << __FILE__ << " " << __FUNCTION__ << " " << __LINE__
                       << " " << _current_query.title;
              if(_regexp_parse_scan.exactMatch(_current_query.title))
                {
                  _current_query.parsed_scan_number =
                    _regexp_parse_scan.capturedTexts()[1].toULong();
                }
              qDebug() << __FILE__ << " " << __FUNCTION__ << " " << __LINE__
                       << " " << _current_query.parsed_scan_number;
          // rtinseconds=5703.84
          else if(index == "rtinseconds")
            {
              _current_query.rt = value.toDouble();
          // index=44035
          else if(index == "index") // it is not the query index
              _current_query.index = value.toUInt();
              if(_current_query.parsed_scan_number == 0)
                {
                  _current_query.parsed_scan_number = _current_query.index;
                }
          // charge=2+
          else if(index == "charge")
            {
              _current_query.charge = value.mid(0, value.size() - 1).toUInt();
      /*
      mass_min=129.102051
      mass_max=1198.751099
      int_min=2327
      int_max=6.845e+005
      num_vals=44
      num_used1=-1
      Ions1=129.102051:1.111e+005,275.207306:9.008e+004,374.275299:9.717e+004,514.825623:1.929e+004,599.878906:1.472e+005,714.431519:1.356e+004,815.466003:9643,924.570435:4428,1028.645630:1.257e+005,1085.665527:6.845e+005,1198.751099:1.858e+005,147.112656:7.954e+004,261.159271:4.327e+004,357.248169:2.192e+004,506.312775:3319,543.336548:1.083e+005,655.376465:9786,1011.625977:1.747e+004,1068.641602:9.804e+004,1181.725342:2.712e+004,130.086014:3.602e+004,257.197388:1.455e+004,357.212097:3960,591.370056:2.454e+004,972.547058:3819,1069.647583:4.457e+004,201.123077:1.566e+004,299.171783:1.421e+004,534.825562:1.036e+004,1087.671631:2.374e+004,173.128418:1.512e+004,244.129089:1.052e+004,590.873291:8065,1067.656006:2.235e+004,228.133957:1.35e+004,258.180115:3194,534.333374:3635,1096.635376:1.549e+004,200.139084:1.026e+004,535.328552:3006,1070.637085:4813,183.112442:9925,131.080994:4806,211.108139:2327

       */
  catch(pappso::PappsoException exception_pappso)
    {
      _error_str = QObject::tr(
                     "ERROR in MascotDatParser::parseQueryLine %1, PAPPSO "
                     "exception:\n%2")
                     .arg(query_line)
                     .arg(exception_pappso.qwhat());
      qDebug() << _error_str;
      throw pappso::PappsoException(_error_str);
  catch(std::exception exception_std)
    {
      _error_str =
        QObject::tr(
          "ERROR in MascotDatParser::parseQueryLine %1, std exception:\n%2")
          .arg(query_line)
          .arg(exception_std.what());
      qDebug() << _error_str;
      throw pappso::PappsoException(_error_str);
void
MascotDatParser::parseSummaryLine(const QString &summary_line)
{
  std::vector<SummaryLine> *p_summary_list = &_summary_list;
  if(_is_decoy_section)
    {
      p_summary_list = &_decoy_summary_list;
  if(_regexp_header_line.exactMatch(summary_line))
    {
      QStringList header_list = _regexp_header_line.capturedTexts();
      QString index           = header_list[1];
      QString value           = header_list[2];


      // qmass1=598.300206
      if(index.startsWith("qmass"))
        {
          unsigned int query_index = index.mid(5).toUInt();
          qDebug() << __FILE__ << " " << __FUNCTION__ << " " << __LINE__ << " "
                   << query_index << " " << index;
          //_current_query.title = value;
          (*p_summary_list)[query_index - 1].exp_mass = value.toDouble();
      // qexp1=300.157379,2+
      // qintensity1=2054822.6250
      // qmatch1=73
      else if(index.startsWith("qmatch"))
        {
          unsigned int query_index = index.mid(6).toUInt();
          //_current_query.title = value;
          (*p_summary_list)[query_index - 1].match = value.toDouble();
      // qplughole1=14.820890
      else if(index.startsWith("qplughole"))
        {
          unsigned int query_index = index.mid(9).toUInt();
          //_current_query.title = value;
          (*p_summary_list)[query_index - 1].plug_hole = value.toDouble();
void
MascotDatParser::saveAndClearPeptide()
{
  // qDebug() << "MascotDatParser::saveAndClearPeptide begin";
  if(_current_peptide.query_index > 0)
    {
      // save
      if(_is_decoy_section)
        {
          _decoy_query_peptide_results[_current_peptide.query_index - 1]
            .push_back(_current_peptide);
      else
        {
          _query_peptide_results[_current_peptide.query_index - 1].push_back(
            _current_peptide);
  // new peptide query clear
  _current_peptide.peptide_string_list.clear();
  _current_peptide.fasta_file_list.clear();
  _current_peptide.query_index = 0;
  _current_peptide.subst       = "";
  // qDebug() << "MascotDatParser::saveAndClearPeptide end";
void
MascotDatParser::saveQuery()
{
  qDebug() << __FILE__ << " " << __FUNCTION__ << " " << __LINE__;
  if(_current_query.query_index > 0)
    {
      try
        {
          std::vector<PeptideLine> peptide_list =
            _query_peptide_results[_current_query.query_index - 1];
          savePeptideList(peptide_list, false);
          peptide_list =
            _decoy_query_peptide_results[_current_query.query_index - 1];
          savePeptideList(peptide_list, true);
        }
      catch(pappso::PappsoException &error)
        {
          throw pappso::PappsoException(
            QObject::tr("ERROR saving MASCOT query%1 :\n %2")
              .arg(_current_query.query_index)
              .arg(error.qwhat()));
        }
  qDebug() << __FILE__ << " " << __FUNCTION__ << " " << __LINE__;
void
MascotDatParser::savePeptideList(std::vector<PeptideLine> &peptide_list,
                                 bool is_decoy)
{
  qDebug() << __FILE__ << " " << __FUNCTION__ << " " << __LINE__ << " "
           << _current_query.query_index;
      PeptideEvidence peptide_evidence_generic(
        _p_identification_data_source->getMsRunSp().get(),
        _current_query.parsed_scan_number);
      peptide_evidence_generic.setCharge(_current_query.charge);
      peptide_evidence_generic.setChecked(true);
      peptide_evidence_generic.setExperimentalMass(
        _summary_list[_current_query.query_index - 1].exp_mass);
      peptide_evidence_generic.setRetentionTime(_current_query.rt);

      peptide_evidence_generic.setIdentificationDataSource(
        _p_identification_data_source);
      // parse and save
      for(PeptideLine &peptide_line : peptide_list)
        {

          if(peptide_line.peptide_string_list.size() == 0)
            {
              // no results
              continue;
            }
          PeptideEvidence peptide_evidence(peptide_evidence_generic);
          /*
           *
          0 setMissedCleavages(Integer.parseInt(st.nextToken()));
          1 setPeptideMr(Double.parseDouble(st.nextToken()));
          2 setDeltaMass(Double.parseDouble(st.nextToken()));
          3 setNumberOfIonsMatched(Integer.parseInt(st.nextToken()));
          4 setSequence(st.nextToken());
          5 setPeaksUsedFromIons1(Integer.parseInt(st.nextToken()));
          6 setVariableModificationsArray(st.nextToken());
          7 setIonsScore(Double.parseDouble(st.nextToken()));
          8 parseIonSeries(st.nextToken());
          9 setPeaksUsedFromIons2(Integer.parseInt(st.nextToken()));
          10 setPeaksUsedFromIons3(Integer.parseInt(st.nextToken()));
          */
          qDebug() << __FILE__ << " " << __FUNCTION__ << " " << __LINE__;
          if(peptide_line.peptide_string_list.size() < 8)
            {
              throw pappso::PappsoException(
                QObject::tr("unable to parse ion score in peptide line q%1_p%2")
                  .arg(peptide_line.query_index)
                  .arg(peptide_line.peptide_index));
            }
          pappso::pappso_double ion_score =
            peptide_line.peptide_string_list.at(7).toDouble();
          peptide_evidence.setParam(PeptideEvidenceParam::mascot_score,
                                    QVariant(ion_score));
          qDebug() << __FILE__ << " " << __FUNCTION__ << " " << __LINE__;
          if(is_decoy)
              peptide_evidence.setEvalue(getEvalue(
                ion_score,
                _decoy_summary_list.at(_current_query.query_index - 1),
                0.05));
            }
          else
            {
              peptide_evidence.setEvalue(
                getEvalue(ion_score,
                          _summary_list.at(_current_query.query_index - 1),
                          0.05));
          peptide_evidence.setParam(
            PeptideEvidenceParam::mascot_expectation_value,
            QVariant(peptide_evidence.getEvalue()));
          qDebug() << __FILE__ << " " << __FUNCTION__ << " " << __LINE__;
          QString peptide_str = peptide_line.peptide_string_list.at(4);
          qDebug() << __FILE__ << " " << __FUNCTION__ << " " << __LINE__;
          if(!peptide_line.subst.isEmpty())
              // q856_p9_subst=1,X,W
              // q24379_p2_subst=1,B,D,8,B,D
              QStringList subst_list = peptide_line.subst.split(",");
              for(unsigned int i = 0; i < subst_list.size(); i += 3)
                  peptide_str = peptide_str.replace(
                    subst_list.at(0 + i).toInt() - 1, 1, subst_list.at(2 + i));
          PeptideXtpSp peptide_sp;
          peptide_sp = PeptideXtp(peptide_str).makePeptideXtpSp();
          // variable modifications :
          setVariableModifications(peptide_sp,
                                   peptide_line.peptide_string_list.at(6));
          // fixed modifications :
          for(unsigned int i = 0; i < peptide_str.size(); i++)
            {
              for(MascotModification mascot_modif : _fixed_modification_list)
                {
                  if(peptide_str.at(i) == mascot_modif.residue)
                    {
                      peptide_sp.get()->addAaModification(
                        mascot_modif.modification, i);
                    }
                }
            }
          peptide_evidence.setPeptideXtpSp(
            _p_project->getPeptideStore().getInstance(peptide_sp));
          // qDebug() << __FILE__ << " " << __FUNCTION__<< " peptide=" <<
          // peptide_str << " evalue="  << peptide_evidence.getEvalue() << "
          // ionscore=" << ion_score;


          unsigned int i = 0;
          foreach(const QString &str, peptide_line.protein_string_list)
              // sp|O95006|OR2F2_HUMAN":0:299:303:1
              int position      = str.indexOf("\"", 0);
              QString accession = str.mid(0, position);
              if(is_decoy)
                  accession = QString("%1|reversed").arg(accession);
              // qDebug() << "accession=" <<  accession;
              QStringList position_list = str.mid(position + 2).split(":");
              if(position_list.size() != 4)
                {
                  throw pappso::PappsoException(
                    QObject::tr("ERROR position_list.size() != 4 %1").arg(str));
              unsigned int start = position_list.at(1).toUInt() - 1;
              unsigned int stop  = position_list.at(2).toUInt() - 1;
              // qDebug() << __FILE__ << " " << __FUNCTION__<< " " << __LINE__;
              ProteinXtp protein;
              protein.setAccession(accession);
              if(peptide_line.fasta_file_list.size() == 0)
                  if(_fasta_file_list.size() == 1)
                    {
                      protein.setFastaFileP(_fasta_file_list[0].get());
                    }
                  else
                    {
                      throw pappso::PappsoException(
                        QObject::tr("ERROR no FASTA file"));
                    }
                  if(i < peptide_line.fasta_file_list.size())
                    {

                      protein.setFastaFileP(
                        peptide_line.fasta_file_list[i].get());
                    }
                  else
                    {
                      throw pappso::PappsoException(
                        QObject::tr("ERROR no corresponding FASTA file %1")
                          .arg(i));
                    }
                }
              if(is_decoy)
                {
                  protein.setIsDecoy(true);
              ProteinMatch *p_protein_match =
                _p_identification_group->getProteinMatchInstance(accession);
              if(p_protein_match == nullptr)
                {
                  throw pappso::PappsoException(
                    QObject::tr("ERROR (p_protein_match == nullptr) %1")
                      .arg(str));
                }
              // qDebug() << __FILE__ << " " << __FUNCTION__<< " " << __LINE__;
              ProteinXtpSp sp_xtp_protein = protein.makeProteinXtpSp();
              p_protein_match->setProteinXtpSp(
                _p_project->getProteinStore().getInstance(sp_xtp_protein));
              p_protein_match->setChecked(true);

              PeptideMatch peptide_match;
              peptide_match.setStart(start);
              peptide_match.setPeptideEvidenceSp(
                _p_identification_data_source->getPeptideEvidenceStore()
                  .getInstance(&peptide_evidence));

              p_protein_match->addPeptideMatch(peptide_match);
              i++;

  catch(pappso::PappsoException exception_pappso)
    {
      _error_str = QObject::tr(
                     "ERROR in MascotDatParser::savePeptideList "
                     " PAPPSO exception:\n%2")
                     .arg(exception_pappso.qwhat());
      qDebug() << _error_str;
      throw pappso::PappsoException(_error_str);
    }
  catch(std::exception exception_std)
    {
      _error_str = QObject::tr(
                     "ERROR in MascotDatParser::savePeptideList std "
                     "exception:\n%2")
                     .arg(exception_std.what());
      qDebug() << _error_str;
      throw pappso::PappsoException(_error_str);
    }
  qDebug() << __FILE__ << " " << __FUNCTION__ << " " << __LINE__;
pappso::pappso_double
MascotDatParser::getEvalue(pappso::pappso_double ion_score,
                           SummaryLine &summary_line,
                           pappso::pappso_double confidence_interval) const
{
  pappso::pappso_double identity_threshold =
    (pappso::pappso_double)10.0 *
    std::log(summary_line.match /
             (confidence_interval * (pappso::pappso_double)20.0)) /
    log((pappso::pappso_double)10);
  ;
  return (confidence_interval *
          pow((pappso::pappso_double)10,
              ((identity_threshold - ion_score) / (pappso::pappso_double)10)));