diff --git a/src/input/mascot/mascotdatparser.cpp b/src/input/mascot/mascotdatparser.cpp index cc2be6ef5b7f4619c2e7afb58f7ab6f14ec144fa..319a093196ebd242609107ffd8c14a0402e6d161 100644 --- a/src/input/mascot/mascotdatparser.cpp +++ b/src/input/mascot/mascotdatparser.cpp @@ -77,6 +77,21 @@ void MascotDatParser::parse(QIODevice * in_stream) { saveAndClearPeptide(); } + else if (mime_parser.getCurrentFileName() == "decoy_summary") { + _is_decoy_section = true; + while(!mime_parser.getCurrentTextStream().atEnd()) { + parseSummaryLine( mime_parser.getCurrentTextStream().readLine()); + } + _is_decoy_section = false; + } + else if (mime_parser.getCurrentFileName() == "decoy_peptides") { + _is_decoy_section = true; + while(!mime_parser.getCurrentTextStream().atEnd()) { + parsePeptidesLine( mime_parser.getCurrentTextStream().readLine()); + } + saveAndClearPeptide(); + _is_decoy_section = false; + } else if (mime_parser.getCurrentFileName().startsWith("query")) { _current_query_index = mime_parser.getCurrentFileName().mid(5).toUInt(); while(!mime_parser.getCurrentTextStream().atEnd()) { @@ -90,24 +105,30 @@ void MascotDatParser::parse(QIODevice * in_stream) { qDebug() << "MascotDatParser::parse end"; } void MascotDatParser::parseProteinLine(const QString & protein_line) { + qDebug() << __FILE__ << " " << __FUNCTION__<< " " << __LINE__ << " " << protein_line; ProteinXtpSp sp_xtp_protein; //02::"tr|A0A0D9SF80|A0A0D9SF80_HUMAN"=55120.88,"General transcription factor II-I repeat domain-containing protein 2A OS=Homo sapiens GN=GTF2IRD2B PE=4 SV=1" QRegExp regexp_protein("^(.*)::\"(.*)\"=([0-9]+\\.[0-9]+),\"(.*)\"$"); if (regexp_protein.exactMatch(protein_line)) { QStringList protein_list = regexp_protein.capturedTexts(); + FastaFileSp fasta_file_sp = _fasta_file_list[protein_list[1].toUInt()-1]; _current_protein.setAccession(protein_list[2]); _current_protein.setDescription(protein_list[4]); + _current_protein.setFastaFileP(fasta_file_sp.get()); sp_xtp_protein = _current_protein.makeProteinXtpSp(); sp_xtp_protein = _p_project->getProteinStore().getInstance(sp_xtp_protein); + } else { QRegExp regexp_proteinb("^\"(.*)\"=([0-9]+\\.[0-9]+),\"(.*)\"$"); if (regexp_proteinb.exactMatch(protein_line)) { QStringList protein_list = regexp_proteinb.capturedTexts(); + FastaFileSp fasta_file_sp = _fasta_file_list[0]; _current_protein.setAccession(protein_list[1]); _current_protein.setDescription(protein_list[3]); + _current_protein.setFastaFileP(fasta_file_sp.get()); sp_xtp_protein = _current_protein.makeProteinXtpSp(); sp_xtp_protein = _p_project->getProteinStore().getInstance(sp_xtp_protein); @@ -139,6 +160,8 @@ void MascotDatParser::parseHeaderLine(const QString & header_line) { _number_of_queries = header_list[2].toUInt(); _query_peptide_results.resize(_number_of_queries); _summary_list.resize(_number_of_queries); + _decoy_query_peptide_results.resize(_number_of_queries); + _decoy_summary_list.resize(_number_of_queries); } //min_peaks_for_homology=6 //max_hits=50 @@ -310,6 +333,10 @@ void MascotDatParser::parseQueryLine(const QString & query_line) { void MascotDatParser::parseSummaryLine(const QString & summary_line) { + std::vector<SummaryLine> * p_summary_list = & _summary_list; + if (_is_decoy_section) { + p_summary_list = & _decoy_summary_list; + } if (_regexp_header_line.exactMatch(summary_line)) { QStringList header_list = _regexp_header_line.capturedTexts(); @@ -322,7 +349,7 @@ void MascotDatParser::parseSummaryLine(const QString & summary_line) { unsigned int query_index = index.mid(5).toUInt(); qDebug() << __FILE__ << " " << __FUNCTION__<< " " << __LINE__ << " " << query_index << " " << index; //_current_query.title = value; - _summary_list[query_index-1].exp_mass=value.toDouble(); + (*p_summary_list)[query_index-1].exp_mass=value.toDouble(); } //qexp1=300.157379,2+ //qintensity1=2054822.6250 @@ -330,13 +357,13 @@ void MascotDatParser::parseSummaryLine(const QString & summary_line) { else if (index.startsWith("qmatch")) { unsigned int query_index = index.mid(6).toUInt(); //_current_query.title = value; - _summary_list[query_index-1].match=value.toDouble(); + (*p_summary_list)[query_index-1].match=value.toDouble(); } //qplughole1=14.820890 else if (index.startsWith("qplughole")) { unsigned int query_index = index.mid(9).toUInt(); //_current_query.title = value; - _summary_list[query_index-1].plug_hole=value.toDouble(); + (*p_summary_list)[query_index-1].plug_hole=value.toDouble(); } } @@ -347,7 +374,12 @@ void MascotDatParser::saveAndClearPeptide() { qDebug() << "MascotDatParser::saveAndClearPeptide begin"; if (_current_peptide.query_index > 0) { // save - _query_peptide_results[_current_peptide.query_index-1].push_back(_current_peptide); + if (_is_decoy_section) { + _decoy_query_peptide_results[_current_peptide.query_index-1].push_back(_current_peptide); + } + else { + _query_peptide_results[_current_peptide.query_index-1].push_back(_current_peptide); + } } //new peptide query clear @@ -396,12 +428,13 @@ void MascotDatParser::saveQuery() { peptide_evidence.setPeptideXtpSp(_p_project->getPeptideStore().getInstance(peptide_sp)); - qDebug() << __FILE__ << " " << __FUNCTION__<< " peptide=" << peptide_str << " evalue=" << peptide_evidence.getEvalue() << " ionscore=" << ion_score; + //qDebug() << __FILE__ << " " << __FUNCTION__<< " peptide=" << peptide_str << " evalue=" << peptide_evidence.getEvalue() << " ionscore=" << ion_score; if (peptide_line.protein_string_list.size() != peptide_line.fasta_file_list.size()) { throw pappso::PappsoException(QObject::tr("ERROR (peptide_line.protein_string_list.size() != peptide_line.fasta_file_list.size()) %1").arg(peptide_line.protein_string_list.join(",\""))); } + unsigned int i=0; foreach (const QString &str, peptide_line.protein_string_list) { //sp|O95006|OR2F2_HUMAN":0:299:303:1 int position = str.indexOf("\"", 0); @@ -411,17 +444,18 @@ void MascotDatParser::saveQuery() { if (position_list.size() != 4) { throw pappso::PappsoException(QObject::tr("ERROR position_list.size() != 4 %1").arg(str)); } - unsigned int start = position_list.at(1).toUInt(); - unsigned int stop = position_list.at(2).toUInt(); - qDebug() << __FILE__ << " " << __FUNCTION__<< " " << __LINE__; + unsigned int start = position_list.at(1).toUInt()-1; + unsigned int stop = position_list.at(2).toUInt()-1; + //qDebug() << __FILE__ << " " << __FUNCTION__<< " " << __LINE__; ProteinXtp protein; protein.setAccession(accession); + protein.setFastaFileP(peptide_line.fasta_file_list[i].get()); - ProteinMatch * p_protein_match = _p_identification_group->getProteinMatchInstance(protein.getAccession()); + ProteinMatch * p_protein_match = _p_identification_group->getProteinMatchInstance(accession); if (p_protein_match == nullptr) { throw pappso::PappsoException(QObject::tr("ERROR (p_protein_match == nullptr) %1").arg(str)); } - qDebug() << __FILE__ << " " << __FUNCTION__<< " " << __LINE__; + //qDebug() << __FILE__ << " " << __FUNCTION__<< " " << __LINE__; ProteinXtpSp sp_xtp_protein = protein.makeProteinXtpSp(); p_protein_match->setProteinXtpSp(_p_project->getProteinStore().getInstance(sp_xtp_protein)); p_protein_match->setChecked(true); @@ -431,6 +465,7 @@ void MascotDatParser::saveQuery() { peptide_match.setPeptideEvidenceSp(_p_identification_data_source->getPeptideEvidenceStore().getInstance(&peptide_evidence)); p_protein_match->addPeptideMatch(peptide_match); + i++; } } } diff --git a/src/input/mascot/mascotdatparser.h b/src/input/mascot/mascotdatparser.h index babf865de62db0c988669ad2d5d8284d9384b0c2..b1233770fc879434e3a75df997434586cc37cb50 100644 --- a/src/input/mascot/mascotdatparser.h +++ b/src/input/mascot/mascotdatparser.h @@ -90,12 +90,15 @@ private: unsigned int _number_of_residues=0; unsigned int _current_query_index=0; QString _error_str; + bool _is_decoy_section = false; PeptideLine _current_peptide; QueryLine _current_query; std::vector<std::vector<PeptideLine>> _query_peptide_results; std::vector<SummaryLine> _summary_list; + std::vector<std::vector<PeptideLine>> _decoy_query_peptide_results; + std::vector<SummaryLine> _decoy_summary_list; }; diff --git a/src/output/xpip.cpp b/src/output/xpip.cpp index 6dff9b3c196ac21f698a77f19a8c6b93a91ddfa2..ce0ecf20b02747f16cca1cc472988b3b0d40748f 100644 --- a/src/output/xpip.cpp +++ b/src/output/xpip.cpp @@ -287,7 +287,7 @@ void Xpip::writeProteinList() { const ProteinXtp * p_protein = protein_pair.second.get(); _output_stream->writeStartElement("protein"); if (p_protein->getFastaFileP() == nullptr) { - throw pappso::PappsoException(QObject::tr("Error writing XPIP file :\n FastaFile pointer is null")); + throw pappso::PappsoException(QObject::tr("Error writing XPIP file :\n FastaFile pointer is null for protein accession %1").arg(p_protein->getAccession())); } _output_stream->writeAttribute("fasta_id",p_protein->getFastaFileP()->getXmlId()); _output_stream->writeAttribute("acc",p_protein->getAccession());