Skip to content
Snippets Groups Projects
Commit a7057bfd authored by Langella Olivier's avatar Langella Olivier
Browse files

WIP: better pep xml support

parent 86b2c9df
No related branches found
No related tags found
No related merge requests found
......@@ -120,7 +120,10 @@ PepXmlSaxHandler::startElement(const QString &namespaceURI,
{
is_ok = startElement_mod_aminoacid_mass(attributes);
}
else if(qName == "modification_info")
{
is_ok = startElement_modification_info(attributes);
}
_current_text.clear();
}
catch(pappso::PappsoException exception_pappso)
......@@ -211,10 +214,29 @@ PepXmlSaxHandler::startElement_msms_pipeline_analysis(QXmlAttributes attributes)
bool
PepXmlSaxHandler::startElement_msms_run_summary(QXmlAttributes attributes)
{
bool is_ok = true;
QString mz_datafile = QString("%1%2")
.arg(attributes.value("base_name"))
.arg(attributes.value("raw_data"));
bool is_ok = true;
QString old_file;
if(!_current_complete_msrun_file_path.isEmpty())
{
old_file = _current_complete_msrun_file_path;
}
_current_complete_msrun_file_path = QString("%1%2")
.arg(attributes.value("base_name"))
.arg(attributes.value("raw_data"));
if((!old_file.isEmpty()) &&
(QFileInfo(_current_complete_msrun_file_path).baseName() !=
QFileInfo(old_file).baseName()))
{
throw pappso::PappsoException(
QObject::tr("ERROR reading pepxml file :\nX!TandemPipeline does not "
"support identification source files containing results "
"from multiple MS runs (%1 != %2)")
.arg(old_file)
.arg(_current_complete_msrun_file_path));
}
_sp_msrun.get()->setFilename(attributes.value("base_name"));
_sp_msrun.get()->setSampleName(
QFileInfo(_current_complete_msrun_file_path).baseName());
return is_ok;
}
......@@ -259,7 +281,12 @@ PepXmlSaxHandler::startElement_search_summary(QXmlAttributes attributes)
bool
PepXmlSaxHandler::startElement_spectrum_query(QXmlAttributes attributes)
{
bool is_ok = true;
bool is_ok = true;
QString spectrum_ref = attributes.value("spectrum");
if(_current_complete_msrun_file_path.isEmpty())
{
_sp_msrun.get()->setFilename(QFileInfo(spectrum_ref).baseName());
}
unsigned int start_scan = attributes.value("start_scan").toUInt();
unsigned int end_scan = attributes.value("end_scan").toUInt();
if(start_scan != end_scan)
......@@ -272,6 +299,7 @@ PepXmlSaxHandler::startElement_spectrum_query(QXmlAttributes attributes)
.arg(start_scan)
.arg(end_scan));
}
_scan = start_scan;
_current_charge = attributes.value("assumed_charge").toUInt();
if(attributes.value("retention_time_sec").isEmpty())
{
......@@ -297,7 +325,7 @@ PepXmlSaxHandler::startElement_spectrum_query(QXmlAttributes attributes)
//<alternative_protein protein="sp|P46784|RS10B_YEAST" protein_descr="40S
// ribosomal protein S10-B OS=Saccharomyces cerevisiae (strain ATCC 204508
// \
// \
//S288c) GN=RPS10B PE=1 SV=1" num_tol_term="2" peptide_prev_aa="K"
// peptide_next_aa="N"/>
bool
......@@ -482,37 +510,78 @@ PepXmlSaxHandler::startElement_search_score(QXmlAttributes attributes)
if(name == "expect")
{
_p_peptide_evidence->setEvalue(valueStr.simplified().toDouble());
if(_p_peptide_evidence->getIdentificationEngine() ==
IdentificationEngine::OMSSA)
{
_p_peptide_evidence->setParam(PeptideEvidenceParam::omssa_evalue,
valueStr.simplified().toDouble());
}
}
else if(name == "EValue")
{
_p_peptide_evidence->setEvalue(valueStr.simplified().toDouble());
}
// <search_score name="hyperscore" value="232"/>
else if(name == "hyperscore")
{
_p_peptide_evidence->setParam(
PeptideEvidenceParam::tandem_hyperscore,
QVariant(attributes.value("hyperscore").toDouble()));
}
else if(name == "pvalue")
IdentificationEngine identification_engine =
_p_peptide_evidence->getIdentificationEngine();
if(identification_engine == IdentificationEngine::OMSSA)
{
if(_p_peptide_evidence->getIdentificationEngine() ==
IdentificationEngine::OMSSA)
if(name == "pvalue")
{
_p_peptide_evidence->setParam(PeptideEvidenceParam::omssa_pvalue,
valueStr.simplified().toDouble());
}
else if(name == "expect")
{
_p_peptide_evidence->setParam(PeptideEvidenceParam::omssa_evalue,
valueStr.simplified().toDouble());
}
}
else if(identification_engine == IdentificationEngine::XTandem)
{
if(name == "hyperscore")
{
_p_peptide_evidence->setParam(
PeptideEvidenceParam::tandem_hyperscore,
QVariant(valueStr.simplified().toDouble()));
}
}
else if(identification_engine == IdentificationEngine::MSGFplus)
{
if(name == "raw")
{
_p_peptide_evidence->setParam(
PeptideEvidenceParam::msgfplus_raw,
QVariant(valueStr.simplified().toDouble()));
}
else if(name == "SpecEValue")
{
_p_peptide_evidence->setParam(
PeptideEvidenceParam::msgfplus_SpecEValue,
QVariant(valueStr.simplified().toDouble()));
}
else if(name == "EValue")
{
_p_peptide_evidence->setParam(
PeptideEvidenceParam::msgfplus_EValue,
QVariant(valueStr.simplified().toDouble()));
}
}
}
return is_ok;
}
// <modification_info mod_nterm_mass="43.018389" modified_peptide="SQRDCR">
bool
PepXmlSaxHandler::startElement_modification_info(QXmlAttributes attributes)
{
bool is_ok = true;
if(!attributes.value("mod_nterm_mass").isEmpty())
{
pappso::AaModificationP modif =
Utils::guessAaModificationPbyMonoisotopicMassDelta(
attributes.value("mod_nterm_mass").toDouble());
_current_peptide_sp.get()->addAaModification(modif, 0);
}
return is_ok;
}
// <modification_info modified_peptide="SQRDCR"> <mod_aminoacid_mass
// position="5" mass="160.030649"/> </modification_info>
bool
......@@ -521,7 +590,7 @@ PepXmlSaxHandler::startElement_mod_aminoacid_mass(QXmlAttributes attributes)
bool is_ok = true;
double mass = attributes.value("mass").toDouble();
unsigned int position = attributes.value("position").toUInt() - 1;
const pappso::Aa &aa = _current_peptide_sp.get()->getConstAa(position);
const pappso::Aa aa(_current_peptide_sp.get()->getSequence()[position].toLatin1());
double mass_modif = mass - aa.getMass();
pappso::AaModificationP modif =
......
......@@ -76,6 +76,8 @@ class PepXmlSaxHandler : public QXmlDefaultHandler
bool startElement_interprophet_result(QXmlAttributes attributes);
bool startElement_search_score(QXmlAttributes attributes);
bool startElement_mod_aminoacid_mass(QXmlAttributes attributes);
bool startElement_modification_info(QXmlAttributes attributes);
bool endElement_search_hit();
bool endElement_modification_info();
......@@ -100,6 +102,7 @@ class PepXmlSaxHandler : public QXmlDefaultHandler
QString _current_group_type;
QString _current_note_label;
QString _current_note_type;
QString _current_complete_msrun_file_path;
unsigned int _scan;
unsigned int _current_charge;
pappso::pappso_double _current_retention_time;
......
......@@ -75,7 +75,11 @@ enum class PeptideEvidenceParam : std::int8_t
peptide_inter_prophet_probability = 5, ///< no PSI MS description
omssa_evalue = 6, ///< MS:1001328 "OMSSA E-value." [PSI:PI]
omssa_pvalue = 7, ///< MS:1001329 "OMSSA p-value." [PSI:PI]
msgfplus_raw = 8, ///< MS:1002049 "MS-GF raw score." [PSI:PI]
msgfplus_denovo = 9, ///< MS:1002050 "MS-GF de novo score." [PSI:PI]
msgfplus_energy = 10, ///< MS:1002051 "MS-GF energy score." [PSI:PI]
msgfplus_SpecEValue = 11, ///< MS:1002052 "MS-GF spectral E-value." [PSI:PI]
msgfplus_EValue = 12, ///< MS:1002053 "MS-GF E-value." [PSI:PI]
};
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment