Skip to content
Snippets Groups Projects
Commit 038d50da authored by Olivier Langella's avatar Olivier Langella
Browse files

new identification engine statistics

parent 7d9fd326
No related branches found
No related tags found
No related merge requests found
......@@ -97,6 +97,19 @@ const QVariant IdentificationDataSource::getIdentificationEngineParam(Identifica
return QVariant();
}
}
void IdentificationDataSource::setIdentificationEngineStatistics(IdentificationEngineStatistics param, const QVariant& value) {
_param_stats.insert(std::pair<IdentificationEngineStatistics, QVariant>(param, value));
}
const QVariant IdentificationDataSource::getIdentificationEngineStatistics(IdentificationEngineStatistics param) const {
try {
return _param_stats.at(param);
}
catch (std::out_of_range) {
return QVariant();
}
}
pappso::SpectrumSp IdentificationDataSource::getSpectrumSp(unsigned int scan_number) const {
pappso::SpectrumSp spectrum_sp = SpectrumStore::getSpectrumSpFromMsRunSp(_ms_run_sp, scan_number);
return spectrum_sp;
......
......@@ -85,7 +85,18 @@ public:
/** \brief get specific identification engine parameter value
*/
virtual const QVariant getIdentificationEngineParam(IdentificationEngineParam param) const;
/** \brief set identification engine statistics
* any statistics on this identification run that can be told by the identification engine
*/
virtual void setIdentificationEngineStatistics(IdentificationEngineStatistics param, const QVariant& value);
/** \brief get specific identification engine statistics value
*/
virtual const QVariant getIdentificationEngineStatistics(IdentificationEngineStatistics param) const;
/** \brief add Fastafile used by the identification engine
*/
void addFastaFile (FastaFileSp file);
......@@ -100,6 +111,7 @@ private :
QString _version;
MsRunSp _ms_run_sp = nullptr;
std::map<IdentificationEngineParam, QVariant> _params;
std::map<IdentificationEngineStatistics, QVariant> _param_stats;
std::vector<FastaFileSp> _fastafile_list;
};
......
......@@ -253,22 +253,6 @@
</widget>
<resources/>
<connections>
<connection>
<sender>add_files_button</sender>
<signal>clicked()</signal>
<receiver>TandemRunDialog</receiver>
<slot>chooseFiles()</slot>
<hints>
<hint type="sourcelabel">
<x>550</x>
<y>310</y>
</hint>
<hint type="destinationlabel">
<x>550</x>
<y>264</y>
</hint>
</hints>
</connection>
<connection>
<sender>pushButton_2</sender>
<signal>clicked()</signal>
......@@ -305,7 +289,7 @@
<sender>clear_list_button</sender>
<signal>clicked()</signal>
<receiver>TandemRunDialog</receiver>
<slot>clearFileList()</slot>
<slot>clearFastaFiles()</slot>
<hints>
<hint type="sourcelabel">
<x>464</x>
......@@ -402,6 +386,7 @@
<slot>reject()</slot>
<slot>accept()</slot>
<slot>selectFastaFiles()</slot>
<slot>clearFastaFiles()</slot>
<slot>selectMzFiles()</slot>
<slot>selectOutputDirectory()</slot>
<slot>setPresetName(QString)</slot>
......
......@@ -159,6 +159,10 @@ void TandemRunDialog::selectOutputDirectory() {
}
}
void TandemRunDialog::clearFastaFiles() {
_p_fasta_file_list->removeRows( 0, _p_fasta_file_list->rowCount() );
}
void TandemRunDialog::selectFastaFiles() {
try {
QSettings settings;
......
......@@ -51,6 +51,7 @@ public:
void reset();
public slots:
void selectFastaFiles();
void clearFastaFiles();
void selectMzFiles();
void selectOutputDirectory();
void selectPresetDirectory();
......
......@@ -295,7 +295,7 @@ bool XtandemSaxHandler::startElement_domain(QXmlAttributes attributes) {
_p_peptide_match->setExperimentalMass(exp_mass);
_p_peptide_match->setStart(attributes.value("start").simplified().toUInt()-1);
_p_peptide_match->setCharge(_charge);
_p_peptide_match->setParam(PeptideMatchParam::tandem_hyperscore, QVariant( attributes.value("hyperscore").toDouble()));
_p_peptide_match->setIdentificationDataSource( _p_identification_data_source);
......@@ -336,11 +336,11 @@ bool XtandemSaxHandler::endElement_note() {
bool is_ok = true;
if (_is_protein_description) {
//_p_protein_match->getProteinXtpSp().get()->setDescription(_current_text.section(" ",1));
_p_protein_match->getProteinXtpSp().get()->setCompleteDescription(_current_text);
if (!_p_protein_match->getProteinXtpSp().get()->getAccession().endsWith(":reversed") && _p_protein_match->getProteinXtpSp().get()->getDescription().endsWith(":reversed")) {
//to fit most cases, just check that the :reversed chars added by X!Tandem are not in the description. if so, then add it too in the accession
_p_protein_match->getProteinXtpSp().get()->setAccession(QString("%1%2").arg(_p_protein_match->getProteinXtpSp().get()->getAccession()).arg(":reversed"));
}
_p_protein_match->getProteinXtpSp().get()->setCompleteDescription(_current_text);
if (!_p_protein_match->getProteinXtpSp().get()->getAccession().endsWith(":reversed") && _p_protein_match->getProteinXtpSp().get()->getDescription().endsWith(":reversed")) {
//to fit most cases, just check that the :reversed chars added by X!Tandem are not in the description. if so, then add it too in the accession
_p_protein_match->getProteinXtpSp().get()->setAccession(QString("%1%2").arg(_p_protein_match->getProteinXtpSp().get()->getAccession()).arg(":reversed"));
}
}
else {
......@@ -470,13 +470,30 @@ bool XtandemSaxHandler::endElement_note() {
<note label="modelling, estimated false positives">18</note>
<note label="modelling, reversed sequence false positives">20</note>
<note label="modelling, spectrum noise suppression ratio">0.00</note>
<note label="modelling, total peptides used">96618641</note>
<note label="modelling, total proteins used">273656</note>
<note label="modelling, total spectra assigned">7464</note>
<note label="modelling, total spectra used">12199</note>
<note label="modelling, total unique assigned">6260</note>
<note label="process, start time">2013:12:20:16:47:19</note>
*/
//<note label="modelling, total peptides used">96618641</note>
if (_current_note_label == "modelling, total peptides used") {
_p_identification_data_source->setIdentificationEngineStatistics(IdentificationEngineStatistics::total_peptide_used, _current_text.toUInt());
}
//<note label="modelling, total proteins used">273656</note>
if (_current_note_label == "modelling, total proteins used") {
_p_identification_data_source->setIdentificationEngineStatistics(IdentificationEngineStatistics::total_proteins_used, _current_text.toUInt());
}
//<note label="modelling, total spectra assigned">7464</note>
if (_current_note_label == "modelling, total spectra assigned") {
_p_identification_data_source->setIdentificationEngineStatistics(IdentificationEngineStatistics::total_spectra_assigned, _current_text.toUInt());
}
//<note label="modelling, total spectra used">12199</note>
if (_current_note_label == "modelling, total spectra used") {
_p_identification_data_source->setIdentificationEngineStatistics(IdentificationEngineStatistics::total_spectra_used, _current_text.toUInt());
}
//<note label="modelling, total unique assigned">6260</note>
if (_current_note_label == "modelling, total unique assigned") {
_p_identification_data_source->setIdentificationEngineStatistics(IdentificationEngineStatistics::total_unique_assigned, _current_text.toUInt());
}
//<note label="process, start time">2013:12:20:16:47:19</note>
//<note label="process, version">X! Tandem Sledgehammer (2013.09.01.1)</note>
if (_current_note_label == "process, version") {
QRegExp rx("\\((.*)\\)");
......
......@@ -60,6 +60,29 @@ void SampleSheet::writeHeaders() {
_p_writer->writeCell("Identification fasta files");
_p_writer->writeCell("X!Tandem parameters");
// total_spectra_assigned=1, ///< total_spectra_assigned in one identification file (one sample)
_p_writer->setCellAnnotation("total spectra assigned in one identification file (one mzdata sample) given by the identification engine");
_p_writer->writeCell("Total spectra assigned");
//
//total_spectra_used=2,///< total_spectra_used in one identification file (one sample)
_p_writer->setCellAnnotation("total spectra used in one identification file (one mzdata sample) given by the identification engine");
_p_writer->writeCell("Total spectra used");
_p_writer->setCellAnnotation("Percentage of spectra assignment (spectra assigned / spectra used");
_p_writer->writeCell("Assignment percentage");
//total_unique_assigned=5,///< total number unique peptide sequence assigned
_p_writer->setCellAnnotation("total unique peptide sequence assigned in one identification file (one mzdata sample) given by the identification engine");
_p_writer->writeCell("Total unique assigned");
/*
total_peptide_used=3,///< total number of peptides generated and used in identification
total_proteins_used=4,///< total number of proteins generated and used in identification
*/
}
......@@ -79,5 +102,14 @@ void SampleSheet::writeIdentificationDataSource(IdentificationDataSource * p_ide
_p_writer->writeCell(fasta_files.join(" "));
_p_writer->writeCell(p_ident_data_source->getIdentificationEngineParam(IdentificationEngineParam::tandem_param).toString());
_p_writer->writeCell(p_ident_data_source->getIdentificationEngineStatistics(IdentificationEngineStatistics::total_spectra_assigned).toString());
_p_writer->writeCell(p_ident_data_source->getIdentificationEngineStatistics(IdentificationEngineStatistics::total_spectra_used).toString());
if (!p_ident_data_source->getIdentificationEngineStatistics(IdentificationEngineStatistics::total_spectra_assigned).isNull() && !p_ident_data_source->getIdentificationEngineStatistics(IdentificationEngineStatistics::total_spectra_used).isNull()) {
_p_writer->writeCellPercentage(p_ident_data_source->getIdentificationEngineStatistics(IdentificationEngineStatistics::total_spectra_assigned).toDouble() / p_ident_data_source->getIdentificationEngineStatistics(IdentificationEngineStatistics::total_spectra_used).toDouble());
}
_p_writer->writeCell(p_ident_data_source->getIdentificationEngineStatistics(IdentificationEngineStatistics::total_unique_assigned).toString());
}
......@@ -64,6 +64,19 @@ enum class IdentificationEngineParam {
tandem_param ///< X!Tandem xml parameters file
};
/** \def IdentificationEngineStatistics identification engine statistics
*
*/
enum class IdentificationEngineStatistics: std::int8_t {
total_spectra_assigned=1, ///< total_spectra_assigned in one identification file (one sample)
total_spectra_used=2,///< total_spectra_used in one identification file (one sample)
total_peptide_used=3,///< total number of peptides generated and used in identification
total_proteins_used=4,///< total number of proteins generated and used in identification
total_unique_assigned=5,///< total number unique peptide sequence assigned
};
/** \def GroupingType list of available grouping algoritms
*
*/
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment