-
Olivier Langella authoredOlivier Langella authored
masschroqml.cpp 21.21 KiB
/**
* \file output/masschroqml.cpp
* \date 7/4/2017
* \author Olivier Langella
* \brief MassChroQML writer
*/
/*******************************************************************************
* Copyright (c) 2017 Olivier Langella <olivier.langella@u-psud.fr>.
*
* This file is part of XTPcpp.
*
* XTPcpp is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* XTPcpp is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with XTPcpp. If not, see <http://www.gnu.org/licenses/>.
*
* Contributors:
* Olivier Langella <olivier.langella@u-psud.fr> - initial API and implementation
******************************************************************************/
#include "masschroqml.h"
#include <pappsomspp/pappsoexception.h>
#include <pappsomspp/utils.h>
#include <pappsomspp/grouping/grpprotein.h>
#include <pappsomspp/amino_acid/Aa.h>
struct McqPeptide {
QString id;
QStringList mods;
QStringList prot_ids;
QString seq;
const pappso::Peptide * native_peptide;
std::vector<QString> data;
std::vector<unsigned int> scan;
std::vector<unsigned int> charge;
};
struct McqPsimod {
unsigned int at;
QString accession;
};
MassChroQml::MassChroQml(const QString & out_filename)
{
//_p_digestion_pipeline = p_digestion_pipeline;
//_mzidentml = "http://psidev.info/psi/pi/mzIdentML/1.1";
QString complete_out_filename = out_filename;
_output_file = new QFile(complete_out_filename);
if (_output_file->open(QIODevice::WriteOnly))
{
_output_stream = new QXmlStreamWriter();
_output_stream->setDevice(_output_file);
} else
{
throw pappso::PappsoException(QObject::tr("error : cannot open the MassChroqML output file : %1\n").arg(out_filename));
}
_output_stream->setAutoFormatting(true);
_output_stream->writeStartDocument("1.0");
}
MassChroQml::~MassChroQml()
{
delete _output_file;
delete _output_stream;
}
void MassChroQml::close() {
_output_stream->writeEndDocument();
_output_file->close();
}
void MassChroQml::write(ProjectSp sp_project) {
_sp_project = sp_project;
if (_sp_project.get() == nullptr) {
throw pappso::PappsoException(QObject::tr("Error writing MassChroqML file :\n project is empty"));
}
//<masschroq>
_output_stream->writeStartElement("masschroq");
_output_stream->writeAttribute("type","input");
_output_stream->writeAttribute("version","2.2");
//_output_stream->writeAttribute("creationDate", QDateTime::currentDateTime().toString( Qt::ISODate));
_output_stream->writeNamespace("http://www.w3.org/2001/XMLSchema-instance","xsi");
//_output_stream->writeNamespace("http://www.w3.org/2001/XMLSchema-instance","xsi");
_output_stream->writeAttribute("xmlns","http://pappso.inra.fr/xsd/masschroqml/2.2");
//xsi:schemaLocation="http://psidev.info/psi/pi/mzIdentML/1.1 http://www.psidev.info/files/mzIdentML1.1.0.xsd"
_output_stream->writeAttribute("http://www.w3.org/2001/XMLSchema-instance","schemaLocation","http://pappso.inra.fr/xsd/masschroqml/2.2 http://pappso.inra.fr/xsd/masschroq-2.2.xsd");
// <rawdata><!-- time_values_dir="directory" to read retention time corrections-->
_output_stream->writeStartElement("rawdata");
_output_stream->writeComment("time_values_dir=\"directory\" to read retention time corrections");
std::vector<MsRunSp> msrun_list = _sp_project.get()->getMsRunStore().getMsRunList();
for (MsRunSp & msrun : msrun_list) {
//<data_file id="samp0" format="mzxml" path="bsa1.mzXML" type="centroid" />
_output_stream->writeStartElement("data_file");
_output_stream->writeAttribute("id",msrun.get()->getXmlId());
if(msrun.get()->getMzFormat() == MzFormat::mzXML) {
_output_stream->writeAttribute("format","mzxml");
}
_output_stream->writeAttribute("path",msrun.get()->getFilename());
_output_stream->writeAttribute("type","centroid");
// <data_file id="samp1" format="mzxml" path="bsa2.mzXML" type="profile" />
_output_stream->writeEndElement();
}
// <data_file id="samp2" format="mzml" path="/home/user/bsa3.mzml" type="profile" />
//<data_file id="samp3" format="mzml" path="/home/user/bsa4.mzml" type="profile" />
// _output_stream.writeEndElement();
// </rawdata>
_output_stream->writeEndElement();
writeGroups();
writeProteinList();
writePeptideList();
writeIsotopeLabelList();
writeAlignments();
writeQuantificationMethods();
_output_stream->writeStartElement("quantification");
writeQuantificationResults();
writeQuantificationTraces();
writeQuantify();
_output_stream->writeEndElement();
}
void MassChroQml::writeQuantificationResults() {
//<quantification_results>
_output_stream->writeStartElement("quantification_results");
//<quantification_result output_file="result1"
// format="tsv" />
_output_stream->writeStartElement("quantification_result");
_output_stream->writeAttribute("output_file","result1");
_output_stream->writeAttribute("format","tsv");
_output_stream->writeEndElement();
//<quantification_result output_file="result2"
// format="ods" />
_output_stream->writeComment("<quantification_result output_file=\"result2\" format=\"ods\" />");
//<compar_result output_file="compar"
// format="ods" />
_output_stream->writeStartElement("compar_result");
_output_stream->writeAttribute("output_file","result1");
_output_stream->writeAttribute("format","ods");
_output_stream->writeEndElement();
//</quantification_results>
_output_stream->writeEndElement();
}
void MassChroQml::writeQuantificationTraces() {
//<quantification_traces>
_output_stream->writeStartElement("quantification_traces");
//<peptide_traces peptide_ids="pep0 pep1" output_dir="pep_traces"
// format="tsv" />
_output_stream->writeComment("<peptide_traces peptide_ids=\"pep0 pep1\" output_dir=\"pep_traces\" format=\"tsv\" />");
/*
<all_xics_traces output_dir="all_xics_traces" format="tsv" />
<mz_traces mz_values="634.635 449.754 552.234" output_dir="mz_traces"
format="tsv" />
<mzrt_traces output_dir="mzrt_traces" format="tsv">
<mzrt_values>
<mzrt_value mz="732.317" rt="230.712" />
<mzrt_value mz="575.256" rt="254.788" />
</mzrt_values>
</mzrt_traces>*/
//</quantification_traces>
_output_stream->writeEndElement();
}
void MassChroQml::writeQuantify() {
//<quantify id="q1" withingroup="G1" quantification_method_id="my_qzivy">
_output_stream->writeStartElement("quantify");
_output_stream->writeAttribute("id","q1");
_output_stream->writeAttribute("withingroup","fractiona1");
_output_stream->writeAttribute("quantification_method_id","quant1");
//<peptides_in_peptide_list mode="real_or_mean" />
//</quantify>
//<quantify id="q2" withingroup="G2" quantification_method_id="my_moulon">
//<peptides_in_peptide_list mode="post_matching"
// isotope_label_refs="iso1 iso2" />
_output_stream->writeStartElement("peptides_in_peptide_list");
_output_stream->writeAttribute("mode","post_matching");
_output_stream->writeAttribute("ni_min_abundance","0.8");
_output_stream->writeEndElement();
_output_stream->writeComment("<mz_list>732.317 449.754 552.234 464.251 381.577 569.771 575.256</mz_list>");
_output_stream->writeComment("<mzrt_list>\n <mzrt mz=\"732.317\" rt=\"230.712\" />\n <mzrt mz=\"575.256\" rt=\"254.788\" />\n </mzrt_list>");
//</quantify>
_output_stream->writeEndElement();
}
void MassChroQml::writeQuantificationMethods() {
//<quantification_methods>
_output_stream->writeStartElement("quantification_methods");
//<quantification_method id="my_qzivy">
_output_stream->writeStartElement("quantification_method");
_output_stream->writeAttribute("id","quant1");
//<xic_extraction xic_type="sum">
_output_stream->writeStartElement("xic_extraction");
_output_stream->writeAttribute("xic_type","max");
_output_stream->writeComment("max : XIC on BasePeak; sum : XIC on TIC");
//<ppm_range min="10" max="10"/><!--For XIC extraction on Da use: mz_range-->
_output_stream->writeStartElement("ppm_range");
_output_stream->writeAttribute("min","10");
_output_stream->writeAttribute("max","10");
_output_stream->writeComment("For XIC extraction on Da use: mz_range");
_output_stream->writeEndElement();
//</xic_extraction>
_output_stream->writeEndElement();
//<xic_filters>
_output_stream->writeStartElement("xic_filters");
//<anti_spike half="5"/>
_output_stream->writeStartElement("anti_spike");
_output_stream->writeAttribute("half","5");
_output_stream->writeEndElement();
//<background half_mediane="5" half_min_max="20"/>
_output_stream->writeComment("<background half_mediane=\"5\" half_min_max=\"20\"/>");
//</xic_filters>
_output_stream->writeEndElement();
//<peak_detection>
_output_stream->writeStartElement("peak_detection");
//<detection_zivy>
_output_stream->writeStartElement("detection_zivy");
//<mean_filter_half_edge>1</mean_filter_half_edge>
_output_stream->writeStartElement("mean_filter_half_edge");
_output_stream->writeCharacters("1");
_output_stream->writeEndElement();
//<minmax_half_edge>3</minmax_half_edge>
_output_stream->writeStartElement("minmax_half_edge");
_output_stream->writeCharacters("3");
_output_stream->writeEndElement();
//<maxmin_half_edge>2</maxmin_half_edge>
_output_stream->writeStartElement("maxmin_half_edge");
_output_stream->writeCharacters("2");
_output_stream->writeEndElement();
//<detection_threshold_on_max>5000</detection_threshold_on_max>
_output_stream->writeStartElement("detection_threshold_on_max");
_output_stream->writeCharacters("5000");
_output_stream->writeEndElement();
//<detection_threshold_on_min>3000</detection_threshold_on_min>
_output_stream->writeStartElement("detection_threshold_on_min");
_output_stream->writeCharacters("3000");
_output_stream->writeEndElement();
//</detection_zivy>
_output_stream->writeEndElement();
//</peak_detection>
_output_stream->writeEndElement();
//</quantification_method>
_output_stream->writeEndElement();
//</quantification_methods>
_output_stream->writeEndElement();
}
void MassChroQml::writeAlignments() {
//<alignments>
_output_stream->writeStartElement("alignments");
//<alignment_methods>
_output_stream->writeStartElement("alignment_methods");
//<alignment_method id="my_ms2">
_output_stream->writeStartElement("alignment_method");
_output_stream->writeAttribute("id","my_ms2");
//<ms2><!-- write_time_values_output_dir="directory" to write retention time corrections -->
_output_stream->writeStartElement("ms2");
_output_stream->writeComment("write_time_values_output_dir=\"directory\" to write retention time corrections");
//<ms2_tendency_halfwindow>10</ms2_tendency_halfwindow>
_output_stream->writeStartElement("ms2_tendency_halfwindow");
_output_stream->writeCharacters("10");
_output_stream->writeEndElement();
//<ms2_smoothing_halfwindow>5</ms2_smoothing_halfwindow>
_output_stream->writeStartElement("ms2_smoothing_halfwindow");
_output_stream->writeCharacters("15");
_output_stream->writeEndElement();
//<ms1_smoothing_halfwindow>3</ms1_smoothing_halfwindow>
_output_stream->writeStartElement("ms1_smoothing_halfwindow");
_output_stream->writeCharacters("0");
_output_stream->writeEndElement();
//</ms2>
_output_stream->writeEndElement();
//</alignment_method>
_output_stream->writeEndElement();
_output_stream->writeComment("<alignment_method id=\"my_obiwarp\"> \n<obiwarp>\n <lmat_precision>1</lmat_precision>\n <mz_start>500</mz_start>\n <mz_stop>1200</mz_stop>\n </obiwarp>\n </alignment_method>");
//</alignment_methods>
_output_stream->writeEndElement();
//<align group_id="G1" method_id="my_ms2" reference_data_id="samp0" />
//<align group_id="G2" method_id="my_obiwarp" reference_data_id="samp2" />
_output_stream->writeStartElement("align");
_output_stream->writeAttribute("group_id","fractiona1");
_output_stream->writeAttribute("method_id","my_ms2");
_output_stream->writeAttribute("reference_data_id","sampa1");
_output_stream->writeEndElement();
//</alignments>
_output_stream->writeEndElement();
}
void MassChroQml::writeIsotopeLabelList() {
//<isotope_label_list>
_output_stream->writeStartElement("isotope_label_list");
/*
<isotope_label id="iso1">
<mod at="Nter" value="28.0" acc="MOD:00429"/>
<mod at="K" value="28.0" acc="MOD:00429"/>
</isotope_label>
<isotope_label id="iso2">
<mod at="Nter" value="32.0" />
<mod at="K" value="32.0" />
</isotope_label>*/
//</isotope_label_list>
_output_stream->writeEndElement();
}
void MassChroQml::writePeptideList() {
//<peptide_list>
_output_stream->writeStartElement("peptide_list");
const std::map<unsigned int, GroupingGroupSp> & group_store = _p_identification_group->getGroupStore().getGroupMap();
for (auto & group_pair :group_store) {
writePeptideListInGroup(group_pair.second.get());
}
}
void MassChroQml::writePeptideListInGroup(const GroupingGroup * p_group) {
const std::vector<std::pair<unsigned int, const PeptideMatch *>> & sg_peptide_match_list = p_group->getPairSgNumberPeptideMatchList();
std::vector<McqPeptide> mcq_peptide_list;
for (auto & sg_peptide_pair :sg_peptide_match_list) {
unsigned int sg_number = sg_peptide_pair.first;
const PeptideMatch * peptide_match = sg_peptide_pair.second;
McqPeptide mcq_peptide;
mcq_peptide.id = peptide_match->getGrpPeptideSp().get()->getGroupingId();
mcq_peptide.mods << peptide_match->getPeptideXtpSp().get()->getModifString();
mcq_peptide.prot_ids << p_group->getProteinGroupingIdOfSubgroup(sg_number);
mcq_peptide.seq = peptide_match->getPeptideXtpSp().get()->getSequence();
mcq_peptide.native_peptide = peptide_match->getPeptideXtpSp().get()->getNativePeptideP();
mcq_peptide.data.push_back(peptide_match->getMsRunP()->getXmlId());
mcq_peptide.scan.push_back(peptide_match->getScan());
mcq_peptide.charge.push_back(peptide_match->getCharge());
mcq_peptide_list.push_back(mcq_peptide);
}
//sort list
std::sort(mcq_peptide_list.begin(),mcq_peptide_list.end(),[](const McqPeptide & first, const McqPeptide & second) {
return (first.id < second.id) ;
});
std::vector<McqPeptide> cumul_mcq_peptide_list;
if (mcq_peptide_list.size() > 0) {
McqPeptide cumul = mcq_peptide_list[0];
for (McqPeptide & mcq_peptide : mcq_peptide_list) {
if (cumul.id == mcq_peptide.id) {
unsigned int charge = mcq_peptide.charge[0];
QString data = mcq_peptide.data[0];
unsigned int scan = mcq_peptide.scan[0];
bool not_found= true;
for (unsigned int i=0; i < cumul.scan.size(); i++) {
if ((cumul.charge[i] == charge)&&(cumul.data[i] == data)&&(cumul.scan[i] == scan)) {
not_found= false;
break;
}
}
if (not_found) {
cumul.charge.push_back(mcq_peptide.charge[0]);
cumul.data.push_back(mcq_peptide.data[0]);
cumul.scan.push_back(mcq_peptide.scan[0]);
}
if (!cumul.prot_ids.contains(mcq_peptide.prot_ids[0])) {
cumul.prot_ids << mcq_peptide.prot_ids[0];
}
if (!cumul.mods.contains(mcq_peptide.mods[0])) {
cumul.mods << mcq_peptide.mods[0];
}
}
else {
cumul_mcq_peptide_list.push_back(cumul);
cumul = mcq_peptide;
}
}
cumul_mcq_peptide_list.push_back(cumul);
}
for (McqPeptide mcq_peptide : cumul_mcq_peptide_list) {
//<peptide id="pep0" mh="1463.626" mods="114.08" prot_ids="P1.1"
// seq="TCVADESHAGCEK">
_output_stream->writeStartElement("peptide");
_output_stream->writeAttribute("id",mcq_peptide.id);
_output_stream->writeAttribute("mods",mcq_peptide.mods.join("|"));
_output_stream->writeAttribute("prot_ids",mcq_peptide.prot_ids.join(" "));
_output_stream->writeAttribute("mh",QString::number(mcq_peptide.native_peptide->getMz(1), 'f', 10));
_output_stream->writeAttribute("seq",mcq_peptide.seq);
//<modifications><!-- this tag is optional but gives an exact mass computation -->
std::vector<McqPsimod> mod_list;
unsigned int pos=1;
for (const pappso::Aa & aa: *(mcq_peptide.native_peptide)) {
const std::list<pappso::AaModificationP> aa_mod_list = aa.getModificationList();
for (pappso::AaModificationP mod : aa_mod_list) {
if (!mod->isInternal()) {
McqPsimod psimod;
psimod.accession = mod->getAccession();
psimod.at = pos;
mod_list.push_back(psimod);
}
}
pos++;
}
if (mod_list.size() > 0) {
_output_stream->writeStartElement("modifications");
for (McqPsimod & psimod :mod_list) {
//<psimod at="2" acc="MOD:00397"></psimod>
_output_stream->writeStartElement("psimod");
_output_stream->writeAttribute("at",QString("%1").arg(psimod.at));
_output_stream->writeAttribute("acc",psimod.accession);
//<psimod at="11" acc="MOD:00397"></psimod>
_output_stream->writeEndElement();
}
//</modifications>
_output_stream->writeEndElement();
}
//<observed_in data="samp0" scan="655" z="2" />
for (unsigned int i=0; i < mcq_peptide.scan.size(); i++) {
_output_stream->writeStartElement("observed_in");
_output_stream->writeAttribute("data",mcq_peptide.data[i]);
_output_stream->writeAttribute("scan",QString("%1").arg(mcq_peptide.scan[i]));
_output_stream->writeAttribute("z",QString("%1").arg(mcq_peptide.charge[i]));
_output_stream->writeEndElement();
}
//<observed_in data="samp1" scan="798" z="2" />*/
//</peptide>
_output_stream->writeEndElement();
}
//</peptide_list>
_output_stream->writeEndElement();
}
void MassChroQml::writeGroups() {
//<groups>
_output_stream->writeStartElement("groups");
//<group data_ids="samp0 samp1" id="G1" />
_output_stream->writeStartElement("group");
_output_stream->writeAttribute("id",QString("fractiona1"));
std::vector<MsRunSp> msrun_list = _sp_project.get()->getMsRunStore().getMsRunList();
QStringList list;
for (MsRunSp & msrun : msrun_list) {
list << msrun.get()->getXmlId();
}
_output_stream->writeAttribute("data_ids",list.join(" "));
_output_stream->writeEndElement();
//<group data_ids="samp2 samp3" id="G2" />
// </groups>
_output_stream->writeEndElement();
}
void MassChroQml::writeProteinList() {
//<protein_list>
_output_stream->writeStartElement("protein_list");
std::vector<IdentificationGroup *> identification_list = _sp_project.get()->getIdentificationGroupList();
if (identification_list.size() == 0) {
throw pappso::PappsoException(QObject::tr("Error writing MassChroqML file :\n no protein list"));
}
if (identification_list.size() > 1) {
throw pappso::PappsoException(QObject::tr("Error writing MassChroqML file :\n only available in combine mode"));
}
_p_identification_group = identification_list[0];
for (ProteinMatch * p_protein_match :_p_identification_group->getProteinMatchList()) {
if (!p_protein_match->isGrouped()) continue;
//<protein desc="conta|P02769|ALBU_BOVIN SERUM ALBUMIN PRECURSOR."
// id="P1.1" />
pappso::GrpProteinSp grp_protein = p_protein_match->getGrpProteinSp();
if (grp_protein.get()->getRank() == 1) {
_output_stream->writeStartElement("protein");
_output_stream->writeAttribute("id",grp_protein.get()->getGroupingId());
QStringList list;
list << p_protein_match->getProteinXtpSp().get()->getAccession();
list << p_protein_match->getProteinXtpSp().get()->getDescription();
_output_stream->writeAttribute("desc",list.join(" "));
//<protein desc="conta|P02770|ALBU_RAT SERUM ALBUMIN PRECURSOR."
// id="P1.2" />
_output_stream->writeEndElement();
}
}
//</protein_list>
_output_stream->writeEndElement();
}