Skip to content
Snippets Groups Projects
Commit 710474f2 authored by Langella Olivier's avatar Langella Olivier
Browse files

WIP: mascot dat parser

parent d0769d1c
No related branches found
No related tags found
No related merge requests found
......@@ -77,6 +77,21 @@ void MascotDatParser::parse(QIODevice * in_stream) {
saveAndClearPeptide();
}
else if (mime_parser.getCurrentFileName() == "decoy_summary") {
_is_decoy_section = true;
while(!mime_parser.getCurrentTextStream().atEnd()) {
parseSummaryLine( mime_parser.getCurrentTextStream().readLine());
}
_is_decoy_section = false;
}
else if (mime_parser.getCurrentFileName() == "decoy_peptides") {
_is_decoy_section = true;
while(!mime_parser.getCurrentTextStream().atEnd()) {
parsePeptidesLine( mime_parser.getCurrentTextStream().readLine());
}
saveAndClearPeptide();
_is_decoy_section = false;
}
else if (mime_parser.getCurrentFileName().startsWith("query")) {
_current_query_index = mime_parser.getCurrentFileName().mid(5).toUInt();
while(!mime_parser.getCurrentTextStream().atEnd()) {
......@@ -90,24 +105,30 @@ void MascotDatParser::parse(QIODevice * in_stream) {
qDebug() << "MascotDatParser::parse end";
}
void MascotDatParser::parseProteinLine(const QString & protein_line) {
qDebug() << __FILE__ << " " << __FUNCTION__<< " " << __LINE__ << " " << protein_line;
ProteinXtpSp sp_xtp_protein;
//02::"tr|A0A0D9SF80|A0A0D9SF80_HUMAN"=55120.88,"General transcription factor II-I repeat domain-containing protein 2A OS=Homo sapiens GN=GTF2IRD2B PE=4 SV=1"
QRegExp regexp_protein("^(.*)::\"(.*)\"=([0-9]+\\.[0-9]+),\"(.*)\"$");
if (regexp_protein.exactMatch(protein_line)) {
QStringList protein_list = regexp_protein.capturedTexts();
FastaFileSp fasta_file_sp = _fasta_file_list[protein_list[1].toUInt()-1];
_current_protein.setAccession(protein_list[2]);
_current_protein.setDescription(protein_list[4]);
_current_protein.setFastaFileP(fasta_file_sp.get());
sp_xtp_protein = _current_protein.makeProteinXtpSp();
sp_xtp_protein = _p_project->getProteinStore().getInstance(sp_xtp_protein);
}
else {
QRegExp regexp_proteinb("^\"(.*)\"=([0-9]+\\.[0-9]+),\"(.*)\"$");
if (regexp_proteinb.exactMatch(protein_line)) {
QStringList protein_list = regexp_proteinb.capturedTexts();
FastaFileSp fasta_file_sp = _fasta_file_list[0];
_current_protein.setAccession(protein_list[1]);
_current_protein.setDescription(protein_list[3]);
_current_protein.setFastaFileP(fasta_file_sp.get());
sp_xtp_protein = _current_protein.makeProteinXtpSp();
sp_xtp_protein = _p_project->getProteinStore().getInstance(sp_xtp_protein);
......@@ -139,6 +160,8 @@ void MascotDatParser::parseHeaderLine(const QString & header_line) {
_number_of_queries = header_list[2].toUInt();
_query_peptide_results.resize(_number_of_queries);
_summary_list.resize(_number_of_queries);
_decoy_query_peptide_results.resize(_number_of_queries);
_decoy_summary_list.resize(_number_of_queries);
}
//min_peaks_for_homology=6
//max_hits=50
......@@ -310,6 +333,10 @@ void MascotDatParser::parseQueryLine(const QString & query_line) {
void MascotDatParser::parseSummaryLine(const QString & summary_line) {
std::vector<SummaryLine> * p_summary_list = & _summary_list;
if (_is_decoy_section) {
p_summary_list = & _decoy_summary_list;
}
if (_regexp_header_line.exactMatch(summary_line)) {
QStringList header_list = _regexp_header_line.capturedTexts();
......@@ -322,7 +349,7 @@ void MascotDatParser::parseSummaryLine(const QString & summary_line) {
unsigned int query_index = index.mid(5).toUInt();
qDebug() << __FILE__ << " " << __FUNCTION__<< " " << __LINE__ << " " << query_index << " " << index;
//_current_query.title = value;
_summary_list[query_index-1].exp_mass=value.toDouble();
(*p_summary_list)[query_index-1].exp_mass=value.toDouble();
}
//qexp1=300.157379,2+
//qintensity1=2054822.6250
......@@ -330,13 +357,13 @@ void MascotDatParser::parseSummaryLine(const QString & summary_line) {
else if (index.startsWith("qmatch")) {
unsigned int query_index = index.mid(6).toUInt();
//_current_query.title = value;
_summary_list[query_index-1].match=value.toDouble();
(*p_summary_list)[query_index-1].match=value.toDouble();
}
//qplughole1=14.820890
else if (index.startsWith("qplughole")) {
unsigned int query_index = index.mid(9).toUInt();
//_current_query.title = value;
_summary_list[query_index-1].plug_hole=value.toDouble();
(*p_summary_list)[query_index-1].plug_hole=value.toDouble();
}
}
......@@ -347,7 +374,12 @@ void MascotDatParser::saveAndClearPeptide() {
qDebug() << "MascotDatParser::saveAndClearPeptide begin";
if (_current_peptide.query_index > 0) {
// save
_query_peptide_results[_current_peptide.query_index-1].push_back(_current_peptide);
if (_is_decoy_section) {
_decoy_query_peptide_results[_current_peptide.query_index-1].push_back(_current_peptide);
}
else {
_query_peptide_results[_current_peptide.query_index-1].push_back(_current_peptide);
}
}
//new peptide query clear
......@@ -396,12 +428,13 @@ void MascotDatParser::saveQuery() {
peptide_evidence.setPeptideXtpSp(_p_project->getPeptideStore().getInstance(peptide_sp));
qDebug() << __FILE__ << " " << __FUNCTION__<< " peptide=" << peptide_str << " evalue=" << peptide_evidence.getEvalue() << " ionscore=" << ion_score;
//qDebug() << __FILE__ << " " << __FUNCTION__<< " peptide=" << peptide_str << " evalue=" << peptide_evidence.getEvalue() << " ionscore=" << ion_score;
if (peptide_line.protein_string_list.size() != peptide_line.fasta_file_list.size()) {
throw pappso::PappsoException(QObject::tr("ERROR (peptide_line.protein_string_list.size() != peptide_line.fasta_file_list.size()) %1").arg(peptide_line.protein_string_list.join(",\"")));
}
unsigned int i=0;
foreach (const QString &str, peptide_line.protein_string_list) {
//sp|O95006|OR2F2_HUMAN":0:299:303:1
int position = str.indexOf("\"", 0);
......@@ -411,17 +444,18 @@ void MascotDatParser::saveQuery() {
if (position_list.size() != 4) {
throw pappso::PappsoException(QObject::tr("ERROR position_list.size() != 4 %1").arg(str));
}
unsigned int start = position_list.at(1).toUInt();
unsigned int stop = position_list.at(2).toUInt();
qDebug() << __FILE__ << " " << __FUNCTION__<< " " << __LINE__;
unsigned int start = position_list.at(1).toUInt()-1;
unsigned int stop = position_list.at(2).toUInt()-1;
//qDebug() << __FILE__ << " " << __FUNCTION__<< " " << __LINE__;
ProteinXtp protein;
protein.setAccession(accession);
protein.setFastaFileP(peptide_line.fasta_file_list[i].get());
ProteinMatch * p_protein_match = _p_identification_group->getProteinMatchInstance(protein.getAccession());
ProteinMatch * p_protein_match = _p_identification_group->getProteinMatchInstance(accession);
if (p_protein_match == nullptr) {
throw pappso::PappsoException(QObject::tr("ERROR (p_protein_match == nullptr) %1").arg(str));
}
qDebug() << __FILE__ << " " << __FUNCTION__<< " " << __LINE__;
//qDebug() << __FILE__ << " " << __FUNCTION__<< " " << __LINE__;
ProteinXtpSp sp_xtp_protein = protein.makeProteinXtpSp();
p_protein_match->setProteinXtpSp(_p_project->getProteinStore().getInstance(sp_xtp_protein));
p_protein_match->setChecked(true);
......@@ -431,6 +465,7 @@ void MascotDatParser::saveQuery() {
peptide_match.setPeptideEvidenceSp(_p_identification_data_source->getPeptideEvidenceStore().getInstance(&peptide_evidence));
p_protein_match->addPeptideMatch(peptide_match);
i++;
}
}
}
......
......@@ -90,12 +90,15 @@ private:
unsigned int _number_of_residues=0;
unsigned int _current_query_index=0;
QString _error_str;
bool _is_decoy_section = false;
PeptideLine _current_peptide;
QueryLine _current_query;
std::vector<std::vector<PeptideLine>> _query_peptide_results;
std::vector<SummaryLine> _summary_list;
std::vector<std::vector<PeptideLine>> _decoy_query_peptide_results;
std::vector<SummaryLine> _decoy_summary_list;
};
......
......@@ -287,7 +287,7 @@ void Xpip::writeProteinList() {
const ProteinXtp * p_protein = protein_pair.second.get();
_output_stream->writeStartElement("protein");
if (p_protein->getFastaFileP() == nullptr) {
throw pappso::PappsoException(QObject::tr("Error writing XPIP file :\n FastaFile pointer is null"));
throw pappso::PappsoException(QObject::tr("Error writing XPIP file :\n FastaFile pointer is null for protein accession %1").arg(p_protein->getAccession()));
}
_output_stream->writeAttribute("fasta_id",p_protein->getFastaFileP()->getXmlId());
_output_stream->writeAttribute("acc",p_protein->getAccession());
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment