diff --git a/src/input/mascot/mascotdatparser.cpp b/src/input/mascot/mascotdatparser.cpp index f49bb8ac45d908cb0adc387adf52651aeb93d1c7..99254b55925b3e6a640476c0d6a8f9a4074099b3 100644 --- a/src/input/mascot/mascotdatparser.cpp +++ b/src/input/mascot/mascotdatparser.cpp @@ -147,6 +147,39 @@ void MascotDatParser::parseHeaderLine(const QString & header_line) { } } +void MascotDatParser::saveAndClearPeptide() { + qDebug() << "MascotDatParser::saveAndClearPeptide begin"; + if (_current_peptide.query_index > 0) { + //parse and save + + pappso::Peptide peptide(_current_peptide.peptide_string_list.at(4)); + + if (_current_peptide.protein_string_list.size() != _current_peptide.fasta_file_list.size()) { + throw pappso::PappsoException(QObject::tr("ERROR (_current_peptide.protein_string_list.size() != _current_peptide.fasta_file_list.size()) %1").arg(_current_peptide.protein_string_list.join(",\""))); + } + + foreach (const QString &str, _current_peptide.protein_string_list) { + //sp|O95006|OR2F2_HUMAN":0:299:303:1 + int position = str.indexOf("\"", 0); + QString accession = str.mid(0, position); + qDebug() << "accession=" << accession; + QStringList position_list = str.mid(position+2).split(":"); + if (position_list.size() != 4) { + throw pappso::PappsoException(QObject::tr("ERROR position_list.size() != 4 %1").arg(str)); + } + unsigned int start = position_list.at(1).toUInt(); + unsigned int stop = position_list.at(2).toUInt(); + } + } + + //new peptide query clear + _current_peptide.peptide_string_list.clear(); + _current_peptide.fasta_file_list.clear(); + _current_peptide.query_index = 0; + _current_peptide.subst = ""; + qDebug() << "MascotDatParser::saveAndClearPeptide end"; +} + void MascotDatParser::parsePeptidesLine(const QString & peptide_line) { try { if (_regexp_header_line.exactMatch(peptide_line)) { @@ -156,14 +189,18 @@ void MascotDatParser::parsePeptidesLine(const QString & peptide_line) { QStringList index_list = index.split("_"); if (index_list.size() == 3) { if (index_list[2] == "db") { + saveAndClearPeptide(); //q1_p1_db=02 - _peptides_fasta_file_list.clear(); while (value.size() > 0) { QString fasta_str = value.mid(0,2); - _peptides_fasta_file_list.push_back(_fasta_file_list.at(fasta_str.toInt()-1)); + _current_peptide.fasta_file_list.push_back(_fasta_file_list.at(fasta_str.toInt()-1)); value = value.mid(2); } } + //q856_p9_subst=1,X,W + else if (index_list[2] == "subst") { + _current_peptide.subst = value; + } } else if (index_list.size() == 2) { if (value == "-1") { @@ -172,37 +209,22 @@ void MascotDatParser::parsePeptidesLine(const QString & peptide_line) { else { QString query_index = index_list[0]; + _current_peptide.query_index = query_index.mid(1).toUInt(); QString peptide_index = index_list[1]; + _current_peptide.peptide_index = peptide_index.mid(1).toUInt(); //q1_p1=0,597.302322,0.997884,2,GAWHK,9,0000000,7.97,0000012000000000000,0,0;"sp|O95006|OR2F2_HUMAN":0:299:303:1 int position = value.indexOf(";\"", 0); QString peptide_string = value.mid(0, position); qDebug() << "peptide_string=" << peptide_string; - QStringList peptide_string_list = peptide_string.split(","); - pappso::Peptide peptide(peptide_string_list.at(4)); + _current_peptide.peptide_string_list = peptide_string.split(","); QString protein_string = value.mid(position+2); qDebug() << "protein_string=" << protein_string; //"sp|Q9Y2I7|FYV1_HUMAN":0:670:675:2,"tr|E9PDH4|E9PDH4_HUMAN":0:614:619:2 - QStringList protein_string_list = protein_string.split(",\""); - if (protein_string_list.size() != _peptides_fasta_file_list.size()) { - throw pappso::PappsoException(QObject::tr("ERROR (protein_string_list.size() != _peptides_fasta_file_list.size()) %1").arg(value)); - } - - foreach (const QString &str, protein_string_list) { - //sp|O95006|OR2F2_HUMAN":0:299:303:1 - int position = str.indexOf("\"", 0); - QString accession = str.mid(0, position); - qDebug() << "accession=" << accession; - QStringList position_list = str.mid(position+2).split(":"); - if (position_list.size() != 4) { - throw pappso::PappsoException(QObject::tr("ERROR position_list.size() != 4 %1").arg(value)); - } - unsigned int start = position_list.at(1).toUInt(); - unsigned int stop = position_list.at(2).toUInt(); - } + _current_peptide.protein_string_list = protein_string.split(",\""); } diff --git a/src/input/mascot/mascotdatparser.h b/src/input/mascot/mascotdatparser.h index f15aa1ff5382385c11d44f57ae716cc4510d6f6d..c349ff68c94b8fdfd27f7da9065709cc18bc2772 100644 --- a/src/input/mascot/mascotdatparser.h +++ b/src/input/mascot/mascotdatparser.h @@ -43,6 +43,16 @@ private: void parseProteinLine(const QString & protein_line); void parseHeaderLine(const QString & protein_line); void parsePeptidesLine(const QString & peptide_line); + void saveAndClearPeptide(); + + struct PeptideLine { + unsigned int query_index=0; + unsigned int peptide_index=0; + QString subst; + QStringList peptide_string_list; + QStringList protein_string_list; + std::vector<FastaFileSp> fasta_file_list; + }; private: Project * _p_project; IdentificationGroup * _p_identification_group; @@ -52,13 +62,16 @@ private: ProteinXtp _current_protein; std::vector<FastaFileSp> _fasta_file_list; - std::vector<FastaFileSp> _peptides_fasta_file_list; + QRegExp _regexp_header_line; unsigned int _number_of_queries=0; unsigned int _number_of_residues=0; QString _error_str; + PeptideLine _current_peptide; + + }; #endif // MASCOTDATPARSER_H