Skip to content
Snippets Groups Projects
Commit 349d8838 authored by Langella Olivier's avatar Langella Olivier
Browse files

WIP: reading Xtandem files the xml stream way seems to work fine

parent fdd9cadb
No related branches found
No related tags found
No related merge requests found
......@@ -209,11 +209,12 @@ TandemReader::readGroup()
while(m_qxmlStreamReader.readNext() ==
QXmlStreamReader::TokenType::Characters)
{
protein_sequence +=
m_qxmlStreamReader.Characters;
qDebug() << m_qxmlStreamReader.text();
protein_sequence += m_qxmlStreamReader.text();
}
protein_sequence =
protein_sequence.simplified().replace(" ", "");
qDebug() << protein_sequence;
if(!protein_sequence.isEmpty())
{
//._sequence.replace(QRegExp("\\*"),
......@@ -254,10 +255,17 @@ TandemReader::readGroup()
}
}
//m_qxmlStreamReader.skipCurrentElement();
// m_qxmlStreamReader.skipCurrentElement();
}
else if(tandem_group.type == "parameters")
{
readGroupTypeParameters(tandem_group);
}
else
{
m_qxmlStreamReader.raiseError(
QObject::tr("Not an X!Tandem input file (no bioml)"));
m_qxmlStreamReader.skipCurrentElement();
}
}
......@@ -520,3 +528,298 @@ TandemReader::readDomain(TandemReader::TandemGroup &tandem_group)
delete p_peptide_evidence;
}
}
void
TandemReader::readGroupTypeParameters(TandemGroup &tandem_group)
{
qDebug() << tandem_group.label;
while(m_qxmlStreamReader.readNextStartElement())
{
qDebug() << m_qxmlStreamReader.name();
if(m_qxmlStreamReader.name() == "note")
{
QString type =
m_qxmlStreamReader.attributes().value("type").toString();
QString label =
m_qxmlStreamReader.attributes().value("label").toString();
QString text = m_qxmlStreamReader.readElementText();
//<group label="input parameters" type="parameters">
//<note type="input" label="list path, default
// parameters">/gorgone/pappso/tmp/temp_condor_job8533994640337729751189420695540169/QExactive_analysis_FDR_nosemi.xml</note>
if(label == "list path, default parameters")
{
mp_identificationDataSource->setIdentificationEngineParam(
IdentificationEngineParam::tandem_param, text);
}
/*
<note type="input" label="list path, taxonomy
information">/gorgone/pappso/tmp/temp_condor_job8533994640337729751189420695540169/database.xml</note>
<note type="input" label="output, histogram column width">30</note>
<note type="input" label="output, histograms">yes</note>
<note type="input" label="output, maximum valid expectation
value">0.05</note> <note type="input" label="output, maximum valid
protein expectation value">0.05</note> <note type="input"
label="output, one sequence copy">yes</note> <note type="input"
label="output, parameters">yes</note> <note type="input"
label="output,
path">/gorgone/pappso/formation/TD/xml_tandem/20120906_balliau_extract_1_A02_urzb-1.xml</note>
<note type="input" label="output, path hashing">no</note>
<note type="input" label="output, performance">yes</note>
<note type="input" label="output, proteins">yes</note>
<note type="input" label="output, results">valid</note>
<note type="input" label="output, sequences">yes</note>
<note type="input" label="output, sort results by">spectrum</note>
<note type="input" label="output, spectra">yes</note>
<note type="input" label="output, xsl path">tandem-style.xsl</note>
<note type="input" label="protein, C-terminal residue modification
mass">0.0</note> <note type="input" label="protein, N-terminal residue
modification mass">0.0</note> <note type="input" label="protein,
cleavage C-terminal mass change">+17.00305</note> <note type="input"
label="protein, cleavage N-terminal mass change">+1.00794</note> <note
type="input" label="protein, cleavage semi">no</note> <note
type="input" label="protein, cleavage site">[RK]|{P}</note> <note
type="input" label="protein, modified residue mass file"></note> <note
type="input" label="protein, quick acetyl">yes</note> <note
type="input" label="protein, quick pyrolidone">yes</note> <note
type="input" label="protein, stP bias">yes</note> <note type="input"
label="protein, taxon">usedefined</note> <note type="input"
label="refine">yes</note> <note type="input" label="refine, cleavage
semi">no</note> <note type="input" label="refine, maximum valid
expectation value">0.01</note> <note type="input" label="refine,
modification mass">57.02146@C</note> <note type="input" label="refine,
modification mass 1"></note> <note type="input" label="refine, point
mutations">no</note> <note type="input" label="refine, potential
C-terminus modifications"></note> <note type="input" label="refine,
potential N-terminus modifications">+42.01056@[</note> <note
type="input" label="refine, potential modification
mass">15.99491@M</note> <note type="input" label="refine, potential
modification mass 1"></note> <note type="input" label="refine,
potential modification motif"></note> <note type="input"
label="refine, potential modification motif 1"></note> <note
type="input" label="refine, spectrum synthesis">yes</note> <note
type="input" label="refine, unanticipated cleavage">no</note> <note
type="input" label="refine, use potential modifications for full
refinement">yes</note> <note type="input" label="residue, modification
mass">57.02146@C</note> <note type="input" label="residue,
modification mass 1"></note> <note type="input" label="residue,
potential modification mass">15.99491@M</note> <note type="input"
label="residue, potential modification motif"></note> <note
type="input" label="scoring, a ions">no</note> <note type="input"
label="scoring, b ions">yes</note> <note type="input" label="scoring,
c ions">no</note> <note type="input" label="scoring, cyclic
permutation">yes</note> <note type="input" label="scoring, include
reverse">yes</note> <note type="input" label="scoring, maximum missed
cleavage sites">1</note> <note type="input" label="scoring, minimum
ion count">4</note> <note type="input" label="scoring, x
ions">no</note> <note type="input" label="scoring, y ions">yes</note>
<note type="input" label="scoring, z ions">no</note>
<note type="input" label="spectrum, dynamic range">100.0</note>
<note type="input" label="spectrum, fragment mass
type">monoisotopic</note> <note type="input" label="spectrum, fragment
monoisotopic mass error">0.02</note> <note type="input"
label="spectrum, fragment monoisotopic mass error
units">Daltons</note> <note type="input" label="spectrum, maximum
parent charge">4</note> <note type="input" label="spectrum, minimum
fragment mz">150.0</note> <note type="input" label="spectrum, minimum
parent m+h">500.0</note> <note type="input" label="spectrum, minimum
peaks">15</note> <note type="input" label="spectrum, neutral loss
mass">18.01057</note> <note type="input" label="spectrum, neutral loss
window">0.02</note> <note type="input" label="spectrum, parent
monoisotopic mass error minus">10</note> <note type="input"
label="spectrum, parent monoisotopic mass error plus">10</note> <note
type="input" label="spectrum, parent monoisotopic mass error
units">ppm</note> <note type="input" label="spectrum, parent
monoisotopic mass isotope error">yes</note>
*/
//<note type="input" label="spectrum,
// path">/gorgone/pappso/formation/TD/mzXML/20120906_balliau_extract_1_A02_urzb-1.mzXML</note>
if(label == "spectrum, path")
{
//_sp_msrun.get()->setFileName(_current_text);
// already set by tandem info parser
}
/*
<note type="input" label="spectrum, sequence batch size">1000</note>
<note type="input" label="spectrum, threads">1</note>
<note type="input" label="spectrum, total peaks">100</note>
<note type="input" label="spectrum, use contrast angle">no</note>
<note type="input" label="spectrum, use neutral loss
window">yes</note> <note type="input" label="spectrum, use noise
suppression">yes</note>
</group>
*/
//<group label="unused input parameters" type="parameters">
/*
<note type="input" label="protein, use minimal
annotations">yes</note> <note type="input" label="refine, modification
mass 2"></note> <note type="input" label="refine, potential
modification mass 2"></note> <note type="input" label="refine,
potential modification motif 2"></note> <note type="input"
label="residue, modification mass 2"></note> <note type="input"
label="residue, potential modification mass 1"></note> <note
type="input" label="residue, potential modification mass 2"></note>
<note type="input" label="residue, potential modification motif
1"></note> <note type="input" label="residue, potential modification
motif 2"></note> <note type="input" label="scoring, pluggable
scoring">no</note>
</group>
*/
//<group label="performance parameters" type="parameters">
//<note label="list path, sequence source
//#1">/gorgone/pappso/formation/TD/Database/Genome_Z_mays_5a.fasta</note>
//<note label="list path, sequence source
//#2">/gorgone/pappso/formation/TD/Database/contaminants_standarts.fasta</note>
if(label.startsWith("list path, sequence source #"))
{
mp_identificationDataSource->addFastaFile(
mp_project->getFastaFileStore().getInstance(FastaFile(text)));
}
/*
<note label="list path, sequence source description #1">no
description</note> <note label="list path, sequence source description
#2">no description</note> <note label="modelling, duplicate peptide
ids">6019</note> <note label="modelling, duplicate
proteins">19735</note> <note label="modelling, estimated false
positives">18</note> <note label="modelling, reversed sequence false
positives">20</note> <note label="modelling, spectrum noise
suppression ratio">0.00</note>
*/
//<note label="modelling, total peptides used">96618641</note>
if(label == "modelling, total peptides used")
{
mp_identificationDataSource->setIdentificationEngineStatistics(
IdentificationEngineStatistics::total_peptide_used,
text.toUInt());
}
//<note label="modelling, total proteins used">273656</note>
if(label == "modelling, total proteins used")
{
mp_identificationDataSource->setIdentificationEngineStatistics(
IdentificationEngineStatistics::total_proteins_used,
text.toUInt());
}
//<note label="modelling, total spectra assigned">7464</note>
if(label == "modelling, total spectra assigned")
{
mp_identificationDataSource->setIdentificationEngineStatistics(
IdentificationEngineStatistics::total_spectra_assigned,
text.toUInt());
}
//<note label="modelling, total spectra used">12199</note>
if(label == "modelling, total spectra used")
{
qDebug() << label;
mp_identificationDataSource->setIdentificationEngineStatistics(
IdentificationEngineStatistics::total_spectra_used,
text.toUInt());
}
//<note label="modelling, total unique assigned">6260</note>
if(label == "modelling, total unique assigned")
{
qDebug() << label;
mp_identificationDataSource->setIdentificationEngineStatistics(
IdentificationEngineStatistics::total_unique_assigned,
text.toUInt());
}
qDebug() << label;
if(label == "spectrum, timstof MS2 centroid parameters")
{
qDebug() << label;
mp_identificationDataSource->setTimstofMs2CentroidParameters(
text);
if((msp_msrun.get()->getFileName().endsWith(".tdf")) ||
(msp_msrun.get()->getFileName().endsWith(".d")))
{
// this is a TimsTOF tandem result file : scan numbers are in
// fact spectrum index: we have to notice this
mp_identificationDataSource->getPeptideEvidenceStore()
.ensureSpectrumIndexRef();
}
}
if(label == "output, spectrum index")
{
// TODO in pappsomspp : change tandem output to explicitly tell if
// we are dealing with spectrum index
qDebug() << label;
if(text == "true")
{ // this MUST be spectrum index instead of scan numbers
mp_identificationDataSource->getPeptideEvidenceStore()
.ensureSpectrumIndexRef();
}
}
if(label == "spectrum, timstof MS2 filters")
{
qDebug() << label;
// this is a TimsTOF tandem result file : scan numbers are in fact
// spectrum index:
// we have to notice this
if((msp_msrun.get()->getFileName().endsWith(".tdf")) ||
(msp_msrun.get()->getFileName().endsWith(".d")))
{
// this is a TimsTOF tandem result file : scan numbers are in
// fact spectrum index: we have to notice this
mp_identificationDataSource->getPeptideEvidenceStore()
.ensureSpectrumIndexRef();
}
}
//<note label="process, start time">2013:12:20:16:47:19</note>
//<note label="process, version">X! Tandem Sledgehammer
//(2013.09.01.1)</note>
if(label == "process, version")
{
QRegExp rx("\\((.*)\\)");
if(rx.indexIn(text, 0) != -1)
{
mp_identificationDataSource->setIdentificationEngineVersion(
rx.cap(1));
}
qDebug() << mp_identificationDataSource
->getIdentificationEngineVersion();
}
/*
<note label="quality values">243 476 437 382 384 417 399 416 346 387
390 382 321 355 311 283 253 272 251 228</note> <note label="refining,
# input models">4893</note> <note label="refining, # input
spectra">5520</note> <note label="refining, # partial
cleavage">326</note> <note label="refining, # point
mutations">0</note> <note label="refining, # potential
C-terminii">0</note> <note label="refining, # potential
N-terminii">392</note> <note label="refining, # unanticipated
cleavage">0</note> <note label="timing, initial modelling total
(sec)">170.96</note> <note label="timing, initial modelling/spectrum
(sec)">0.0140</note> <note label="timing, load sequence models
(sec)">0.33</note> <note label="timing, refinement/spectrum
(sec)">0.0141</note>
</group>
*/
}
else
{
m_qxmlStreamReader.raiseError(
QObject::tr("Not an X!Tandem input "
"file (no bioml)"));
m_qxmlStreamReader.skipCurrentElement();
}
}
}
......@@ -74,6 +74,7 @@ class TandemReader : public pappso::XmlStreamReaderInterface
PeptideMatch peptideMatch;
};
void readDomain(TandemGroup &tandem_group);
void readGroupTypeParameters(TandemGroup &tandem_group);
private:
pappso::UiMonitorInterface *mp_monitor;
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment