Skip to content
Snippets Groups Projects
Commit d49d4a75 authored by Langella Olivier's avatar Langella Olivier
Browse files

WIP: Mascot parser

parent 3581ca04
No related branches found
No related tags found
No related merge requests found
......@@ -147,6 +147,39 @@ void MascotDatParser::parseHeaderLine(const QString & header_line) {
}
}
void MascotDatParser::saveAndClearPeptide() {
qDebug() << "MascotDatParser::saveAndClearPeptide begin";
if (_current_peptide.query_index > 0) {
//parse and save
pappso::Peptide peptide(_current_peptide.peptide_string_list.at(4));
if (_current_peptide.protein_string_list.size() != _current_peptide.fasta_file_list.size()) {
throw pappso::PappsoException(QObject::tr("ERROR (_current_peptide.protein_string_list.size() != _current_peptide.fasta_file_list.size()) %1").arg(_current_peptide.protein_string_list.join(",\"")));
}
foreach (const QString &str, _current_peptide.protein_string_list) {
//sp|O95006|OR2F2_HUMAN":0:299:303:1
int position = str.indexOf("\"", 0);
QString accession = str.mid(0, position);
qDebug() << "accession=" << accession;
QStringList position_list = str.mid(position+2).split(":");
if (position_list.size() != 4) {
throw pappso::PappsoException(QObject::tr("ERROR position_list.size() != 4 %1").arg(str));
}
unsigned int start = position_list.at(1).toUInt();
unsigned int stop = position_list.at(2).toUInt();
}
}
//new peptide query clear
_current_peptide.peptide_string_list.clear();
_current_peptide.fasta_file_list.clear();
_current_peptide.query_index = 0;
_current_peptide.subst = "";
qDebug() << "MascotDatParser::saveAndClearPeptide end";
}
void MascotDatParser::parsePeptidesLine(const QString & peptide_line) {
try {
if (_regexp_header_line.exactMatch(peptide_line)) {
......@@ -156,14 +189,18 @@ void MascotDatParser::parsePeptidesLine(const QString & peptide_line) {
QStringList index_list = index.split("_");
if (index_list.size() == 3) {
if (index_list[2] == "db") {
saveAndClearPeptide();
//q1_p1_db=02
_peptides_fasta_file_list.clear();
while (value.size() > 0) {
QString fasta_str = value.mid(0,2);
_peptides_fasta_file_list.push_back(_fasta_file_list.at(fasta_str.toInt()-1));
_current_peptide.fasta_file_list.push_back(_fasta_file_list.at(fasta_str.toInt()-1));
value = value.mid(2);
}
}
//q856_p9_subst=1,X,W
else if (index_list[2] == "subst") {
_current_peptide.subst = value;
}
}
else if (index_list.size() == 2) {
if (value == "-1") {
......@@ -172,37 +209,22 @@ void MascotDatParser::parsePeptidesLine(const QString & peptide_line) {
else {
QString query_index = index_list[0];
_current_peptide.query_index = query_index.mid(1).toUInt();
QString peptide_index = index_list[1];
_current_peptide.peptide_index = peptide_index.mid(1).toUInt();
//q1_p1=0,597.302322,0.997884,2,GAWHK,9,0000000,7.97,0000012000000000000,0,0;"sp|O95006|OR2F2_HUMAN":0:299:303:1
int position = value.indexOf(";\"", 0);
QString peptide_string = value.mid(0, position);
qDebug() << "peptide_string=" << peptide_string;
QStringList peptide_string_list = peptide_string.split(",");
pappso::Peptide peptide(peptide_string_list.at(4));
_current_peptide.peptide_string_list = peptide_string.split(",");
QString protein_string = value.mid(position+2);
qDebug() << "protein_string=" << protein_string;
//"sp|Q9Y2I7|FYV1_HUMAN":0:670:675:2,"tr|E9PDH4|E9PDH4_HUMAN":0:614:619:2
QStringList protein_string_list = protein_string.split(",\"");
if (protein_string_list.size() != _peptides_fasta_file_list.size()) {
throw pappso::PappsoException(QObject::tr("ERROR (protein_string_list.size() != _peptides_fasta_file_list.size()) %1").arg(value));
}
foreach (const QString &str, protein_string_list) {
//sp|O95006|OR2F2_HUMAN":0:299:303:1
int position = str.indexOf("\"", 0);
QString accession = str.mid(0, position);
qDebug() << "accession=" << accession;
QStringList position_list = str.mid(position+2).split(":");
if (position_list.size() != 4) {
throw pappso::PappsoException(QObject::tr("ERROR position_list.size() != 4 %1").arg(value));
}
unsigned int start = position_list.at(1).toUInt();
unsigned int stop = position_list.at(2).toUInt();
}
_current_peptide.protein_string_list = protein_string.split(",\"");
}
......
......@@ -43,6 +43,16 @@ private:
void parseProteinLine(const QString & protein_line);
void parseHeaderLine(const QString & protein_line);
void parsePeptidesLine(const QString & peptide_line);
void saveAndClearPeptide();
struct PeptideLine {
unsigned int query_index=0;
unsigned int peptide_index=0;
QString subst;
QStringList peptide_string_list;
QStringList protein_string_list;
std::vector<FastaFileSp> fasta_file_list;
};
private:
Project * _p_project;
IdentificationGroup * _p_identification_group;
......@@ -52,13 +62,16 @@ private:
ProteinXtp _current_protein;
std::vector<FastaFileSp> _fasta_file_list;
std::vector<FastaFileSp> _peptides_fasta_file_list;
QRegExp _regexp_header_line;
unsigned int _number_of_queries=0;
unsigned int _number_of_residues=0;
QString _error_str;
PeptideLine _current_peptide;
};
#endif // MASCOTDATPARSER_H
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment