Skip to content
Snippets Groups Projects
masschroqml.cpp 21.21 KiB
/**
 * \file output/masschroqml.cpp
 * \date 7/4/2017
 * \author Olivier Langella
 * \brief MassChroQML writer
 */

/*******************************************************************************
* Copyright (c) 2017 Olivier Langella <olivier.langella@u-psud.fr>.
*
* This file is part of XTPcpp.
*
*     XTPcpp is free software: you can redistribute it and/or modify
*     it under the terms of the GNU General Public License as published by
*     the Free Software Foundation, either version 3 of the License, or
*     (at your option) any later version.
*
*     XTPcpp is distributed in the hope that it will be useful,
*     but WITHOUT ANY WARRANTY; without even the implied warranty of
*     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
*     GNU General Public License for more details.
*
*     You should have received a copy of the GNU General Public License
*     along with XTPcpp.  If not, see <http://www.gnu.org/licenses/>.
*
* Contributors:
*     Olivier Langella <olivier.langella@u-psud.fr> - initial API and implementation
******************************************************************************/

#include "masschroqml.h"
#include <pappsomspp/pappsoexception.h>
#include <pappsomspp/utils.h>
#include <pappsomspp/grouping/grpprotein.h>
#include <pappsomspp/amino_acid/Aa.h>


struct McqPeptide {
    QString id;
    QStringList mods;
    QStringList prot_ids;
    QString seq;
    const pappso::Peptide * native_peptide;
    std::vector<QString> data;
    std::vector<unsigned int> scan;
    std::vector<unsigned int> charge;
};

struct McqPsimod {
    unsigned int at;
    QString accession;
};

MassChroQml::MassChroQml(const QString & out_filename)
{
    //_p_digestion_pipeline = p_digestion_pipeline;

    //_mzidentml = "http://psidev.info/psi/pi/mzIdentML/1.1";
    QString complete_out_filename = out_filename;
    _output_file = new QFile(complete_out_filename);

    if (_output_file->open(QIODevice::WriteOnly))
    {
        _output_stream = new QXmlStreamWriter();
        _output_stream->setDevice(_output_file);
    } else
    {
        throw pappso::PappsoException(QObject::tr("error : cannot open the MassChroqML output file : %1\n").arg(out_filename));
    }

    _output_stream->setAutoFormatting(true);
    _output_stream->writeStartDocument("1.0");


}

MassChroQml::~MassChroQml()
{
    delete _output_file;
    delete _output_stream;
}

void MassChroQml::close() {
    _output_stream->writeEndDocument();
    _output_file->close();
}

void MassChroQml::write(ProjectSp sp_project) {

    _sp_project = sp_project;
    if (_sp_project.get() == nullptr) {
        throw pappso::PappsoException(QObject::tr("Error writing MassChroqML file :\n project is empty"));
    }

    //<masschroq>
    _output_stream->writeStartElement("masschroq");
    _output_stream->writeAttribute("type","input");
    _output_stream->writeAttribute("version","2.2");
    //_output_stream->writeAttribute("creationDate", QDateTime::currentDateTime().toString( Qt::ISODate));
    _output_stream->writeNamespace("http://www.w3.org/2001/XMLSchema-instance","xsi");
    //_output_stream->writeNamespace("http://www.w3.org/2001/XMLSchema-instance","xsi");
    _output_stream->writeAttribute("xmlns","http://pappso.inra.fr/xsd/masschroqml/2.2");
    //xsi:schemaLocation="http://psidev.info/psi/pi/mzIdentML/1.1 http://www.psidev.info/files/mzIdentML1.1.0.xsd"
    _output_stream->writeAttribute("http://www.w3.org/2001/XMLSchema-instance","schemaLocation","http://pappso.inra.fr/xsd/masschroqml/2.2 http://pappso.inra.fr/xsd/masschroq-2.2.xsd");

    // <rawdata><!-- time_values_dir="directory" to read retention time corrections-->
    _output_stream->writeStartElement("rawdata");
    _output_stream->writeComment("time_values_dir=\"directory\" to read retention time corrections");

    std::vector<MsRunSp> msrun_list = _sp_project.get()->getMsRunStore().getMsRunList();
    for (MsRunSp & msrun : msrun_list) {

        //<data_file id="samp0" format="mzxml" path="bsa1.mzXML" type="centroid" />
        _output_stream->writeStartElement("data_file");
        _output_stream->writeAttribute("id",msrun.get()->getXmlId());
        if(msrun.get()->getMzFormat() == MzFormat::mzXML) {
            _output_stream->writeAttribute("format","mzxml");
        }
        _output_stream->writeAttribute("path",msrun.get()->getFilename());
        _output_stream->writeAttribute("type","centroid");
        // <data_file id="samp1" format="mzxml" path="bsa2.mzXML" type="profile" />
        _output_stream->writeEndElement();
    }
    // <data_file id="samp2" format="mzml" path="/home/user/bsa3.mzml"      type="profile" />
//<data_file id="samp3" format="mzml" path="/home/user/bsa4.mzml"             type="profile" />
    // _output_stream.writeEndElement();
    //             </rawdata>
    _output_stream->writeEndElement();


    writeGroups();
    writeProteinList();
    writePeptideList();
    writeIsotopeLabelList();
    writeAlignments();
    writeQuantificationMethods();
    _output_stream->writeStartElement("quantification");
    writeQuantificationResults();
    writeQuantificationTraces();
    writeQuantify();
    _output_stream->writeEndElement();
}



void MassChroQml::writeQuantificationResults() {
    //<quantification_results>
    _output_stream->writeStartElement("quantification_results");
    //<quantification_result output_file="result1"
    //	format="tsv" />
    _output_stream->writeStartElement("quantification_result");
    _output_stream->writeAttribute("output_file","result1");
    _output_stream->writeAttribute("format","tsv");
    _output_stream->writeEndElement();
    //<quantification_result output_file="result2"
    //	format="ods" />
    _output_stream->writeComment("<quantification_result output_file=\"result2\" format=\"ods\" />");
    //<compar_result output_file="compar"
    //	format="ods" />

    _output_stream->writeStartElement("compar_result");
    _output_stream->writeAttribute("output_file","result1");
    _output_stream->writeAttribute("format","ods");
    _output_stream->writeEndElement();
    //</quantification_results>
    _output_stream->writeEndElement();
}

void MassChroQml::writeQuantificationTraces() {
    //<quantification_traces>

    _output_stream->writeStartElement("quantification_traces");
    //<peptide_traces peptide_ids="pep0 pep1" output_dir="pep_traces"
    //	format="tsv" />
    _output_stream->writeComment("<peptide_traces peptide_ids=\"pep0 pep1\" output_dir=\"pep_traces\" format=\"tsv\" />");
    /*
    		<all_xics_traces output_dir="all_xics_traces" format="tsv" />
    		<mz_traces mz_values="634.635 449.754 552.234" output_dir="mz_traces"
    			format="tsv" />
    		<mzrt_traces output_dir="mzrt_traces" format="tsv">
    			<mzrt_values>
    				<mzrt_value mz="732.317" rt="230.712" />
    				<mzrt_value mz="575.256" rt="254.788" />
    			</mzrt_values>
    		</mzrt_traces>*/
    //</quantification_traces>

    _output_stream->writeEndElement();
}
void MassChroQml::writeQuantify() {
    //<quantify id="q1" withingroup="G1" quantification_method_id="my_qzivy">
    _output_stream->writeStartElement("quantify");
    _output_stream->writeAttribute("id","q1");
    _output_stream->writeAttribute("withingroup","fractiona1");
    _output_stream->writeAttribute("quantification_method_id","quant1");
    //<peptides_in_peptide_list mode="real_or_mean" />
    //</quantify>
    //<quantify id="q2" withingroup="G2" quantification_method_id="my_moulon">
    //<peptides_in_peptide_list mode="post_matching"
    //	isotope_label_refs="iso1 iso2" />

    _output_stream->writeStartElement("peptides_in_peptide_list");
    _output_stream->writeAttribute("mode","post_matching");
    _output_stream->writeAttribute("ni_min_abundance","0.8");
    _output_stream->writeEndElement();

    _output_stream->writeComment("<mz_list>732.317 449.754 552.234 464.251 381.577 569.771 575.256</mz_list>");
    _output_stream->writeComment("<mzrt_list>\n				<mzrt mz=\"732.317\" rt=\"230.712\" />\n			<mzrt mz=\"575.256\" rt=\"254.788\" />\n			</mzrt_list>");

    //</quantify>
    _output_stream->writeEndElement();
}

void MassChroQml::writeQuantificationMethods() {
    //<quantification_methods>
    _output_stream->writeStartElement("quantification_methods");
    //<quantification_method id="my_qzivy">
    _output_stream->writeStartElement("quantification_method");
    _output_stream->writeAttribute("id","quant1");
    //<xic_extraction xic_type="sum">
    _output_stream->writeStartElement("xic_extraction");
    _output_stream->writeAttribute("xic_type","max");
    _output_stream->writeComment("max : XIC on BasePeak; sum : XIC on TIC");
    //<ppm_range min="10" max="10"/><!--For XIC extraction on Da use: mz_range-->

    _output_stream->writeStartElement("ppm_range");
    _output_stream->writeAttribute("min","10");
    _output_stream->writeAttribute("max","10");
    _output_stream->writeComment("For XIC extraction on Da use: mz_range");
    _output_stream->writeEndElement();
    //</xic_extraction>
    _output_stream->writeEndElement();
    //<xic_filters>
    _output_stream->writeStartElement("xic_filters");
    //<anti_spike half="5"/>
    _output_stream->writeStartElement("anti_spike");
    _output_stream->writeAttribute("half","5");
    _output_stream->writeEndElement();
    //<background half_mediane="5" half_min_max="20"/>
    _output_stream->writeComment("<background half_mediane=\"5\" half_min_max=\"20\"/>");
    //</xic_filters>
    _output_stream->writeEndElement();
    //<peak_detection>

    _output_stream->writeStartElement("peak_detection");
    //<detection_zivy>
    _output_stream->writeStartElement("detection_zivy");
    //<mean_filter_half_edge>1</mean_filter_half_edge>
    _output_stream->writeStartElement("mean_filter_half_edge");
    _output_stream->writeCharacters("1");
    _output_stream->writeEndElement();
    //<minmax_half_edge>3</minmax_half_edge>
    _output_stream->writeStartElement("minmax_half_edge");
    _output_stream->writeCharacters("3");
    _output_stream->writeEndElement();
    //<maxmin_half_edge>2</maxmin_half_edge>
    _output_stream->writeStartElement("maxmin_half_edge");
    _output_stream->writeCharacters("2");
    _output_stream->writeEndElement();
    //<detection_threshold_on_max>5000</detection_threshold_on_max>
    _output_stream->writeStartElement("detection_threshold_on_max");
    _output_stream->writeCharacters("5000");
    _output_stream->writeEndElement();
    //<detection_threshold_on_min>3000</detection_threshold_on_min>
    _output_stream->writeStartElement("detection_threshold_on_min");
    _output_stream->writeCharacters("3000");
    _output_stream->writeEndElement();
    //</detection_zivy>
    _output_stream->writeEndElement();
    //</peak_detection>

    _output_stream->writeEndElement();
    //</quantification_method>
    _output_stream->writeEndElement();
    //</quantification_methods>
    _output_stream->writeEndElement();
}
void MassChroQml::writeAlignments() {
    //<alignments>
    _output_stream->writeStartElement("alignments");
    //<alignment_methods>
    _output_stream->writeStartElement("alignment_methods");
    //<alignment_method id="my_ms2">
    _output_stream->writeStartElement("alignment_method");
    _output_stream->writeAttribute("id","my_ms2");

    //<ms2><!-- write_time_values_output_dir="directory" to write retention time corrections -->

    _output_stream->writeStartElement("ms2");
    _output_stream->writeComment("write_time_values_output_dir=\"directory\" to write retention time corrections");
    //<ms2_tendency_halfwindow>10</ms2_tendency_halfwindow>
    _output_stream->writeStartElement("ms2_tendency_halfwindow");
    _output_stream->writeCharacters("10");
    _output_stream->writeEndElement();
    //<ms2_smoothing_halfwindow>5</ms2_smoothing_halfwindow>

    _output_stream->writeStartElement("ms2_smoothing_halfwindow");
    _output_stream->writeCharacters("15");
    _output_stream->writeEndElement();
    //<ms1_smoothing_halfwindow>3</ms1_smoothing_halfwindow>

    _output_stream->writeStartElement("ms1_smoothing_halfwindow");
    _output_stream->writeCharacters("0");
    _output_stream->writeEndElement();
    //</ms2>
    _output_stream->writeEndElement();
    //</alignment_method>
    _output_stream->writeEndElement();
    _output_stream->writeComment("<alignment_method id=\"my_obiwarp\">          \n<obiwarp>\n					<lmat_precision>1</lmat_precision>\n					<mz_start>500</mz_start>\n					<mz_stop>1200</mz_stop>\n				</obiwarp>\n			</alignment_method>");
    //</alignment_methods>
    _output_stream->writeEndElement();
    //<align group_id="G1" method_id="my_ms2" reference_data_id="samp0" />
    //<align group_id="G2" method_id="my_obiwarp" reference_data_id="samp2" />
    _output_stream->writeStartElement("align");
    _output_stream->writeAttribute("group_id","fractiona1");
    _output_stream->writeAttribute("method_id","my_ms2");
    _output_stream->writeAttribute("reference_data_id","sampa1");
    _output_stream->writeEndElement();
    //</alignments>
    _output_stream->writeEndElement();
}

void MassChroQml::writeIsotopeLabelList() {
    //<isotope_label_list>
    _output_stream->writeStartElement("isotope_label_list");
    /*
    	<isotope_label id="iso1">
    		<mod at="Nter" value="28.0" acc="MOD:00429"/>
    		<mod at="K" value="28.0" acc="MOD:00429"/>
    	</isotope_label>
    	<isotope_label id="iso2">
    		<mod at="Nter" value="32.0" />
    		<mod at="K" value="32.0" />
    	</isotope_label>*/
    //</isotope_label_list>
    _output_stream->writeEndElement();
}

void MassChroQml::writePeptideList() {
//<peptide_list>
    _output_stream->writeStartElement("peptide_list");
    const std::map<unsigned int, GroupingGroupSp> & group_store = _p_identification_group->getGroupStore().getGroupMap();
    
    for (auto & group_pair :group_store) {
        writePeptideListInGroup(group_pair.second.get());
    }
} 
    
void MassChroQml::writePeptideListInGroup(const GroupingGroup * p_group) {
    
    const std::vector<std::pair<unsigned int, const PeptideMatch *>> & sg_peptide_match_list =  p_group->getPairSgNumberPeptideMatchList();
    std::vector<McqPeptide> mcq_peptide_list;
    
    for (auto & sg_peptide_pair :sg_peptide_match_list) {
        unsigned int sg_number = sg_peptide_pair.first;
        const PeptideMatch * peptide_match = sg_peptide_pair.second;

            McqPeptide mcq_peptide;
            mcq_peptide.id = peptide_match->getGrpPeptideSp().get()->getGroupingId();
            mcq_peptide.mods << peptide_match->getPeptideXtpSp().get()->getModifString();
            mcq_peptide.prot_ids << p_group->getProteinGroupingIdOfSubgroup(sg_number);
            mcq_peptide.seq = peptide_match->getPeptideXtpSp().get()->getSequence();
            mcq_peptide.native_peptide = peptide_match->getPeptideXtpSp().get()->getNativePeptideP();
            mcq_peptide.data.push_back(peptide_match->getMsRunP()->getXmlId());
            mcq_peptide.scan.push_back(peptide_match->getScan());
            mcq_peptide.charge.push_back(peptide_match->getCharge());
            mcq_peptide_list.push_back(mcq_peptide);
        

    }

    //sort list
    std::sort(mcq_peptide_list.begin(),mcq_peptide_list.end(),[](const McqPeptide & first, const McqPeptide & second) {
        return (first.id < second.id) ;
    });

    std::vector<McqPeptide> cumul_mcq_peptide_list;
    if (mcq_peptide_list.size() > 0) {
        McqPeptide cumul = mcq_peptide_list[0];
        for (McqPeptide & mcq_peptide : mcq_peptide_list) {
            if (cumul.id == mcq_peptide.id) {
                unsigned int charge = mcq_peptide.charge[0];
                QString data = mcq_peptide.data[0];
                unsigned int scan = mcq_peptide.scan[0];

                bool not_found= true;
                for (unsigned int i=0; i < cumul.scan.size(); i++) {
                    if ((cumul.charge[i] == charge)&&(cumul.data[i] == data)&&(cumul.scan[i] == scan)) {
                        not_found= false;
                        break;
                    }
                }

                if (not_found) {
                    cumul.charge.push_back(mcq_peptide.charge[0]);
                    cumul.data.push_back(mcq_peptide.data[0]);
                    cumul.scan.push_back(mcq_peptide.scan[0]);
                }
                if (!cumul.prot_ids.contains(mcq_peptide.prot_ids[0])) {
                    cumul.prot_ids << mcq_peptide.prot_ids[0];
                }
                if (!cumul.mods.contains(mcq_peptide.mods[0])) {
                    cumul.mods << mcq_peptide.mods[0];
                }
            }
            else {
                cumul_mcq_peptide_list.push_back(cumul);
                cumul = mcq_peptide;
            }
        }
        cumul_mcq_peptide_list.push_back(cumul);
    }


    for (McqPeptide mcq_peptide : cumul_mcq_peptide_list) {
        //<peptide id="pep0" mh="1463.626" mods="114.08" prot_ids="P1.1"
        //	seq="TCVADESHAGCEK">
        _output_stream->writeStartElement("peptide");
        _output_stream->writeAttribute("id",mcq_peptide.id);
        _output_stream->writeAttribute("mods",mcq_peptide.mods.join("|"));
        _output_stream->writeAttribute("prot_ids",mcq_peptide.prot_ids.join(" "));
        _output_stream->writeAttribute("mh",QString::number(mcq_peptide.native_peptide->getMz(1), 'f', 10));
        _output_stream->writeAttribute("seq",mcq_peptide.seq);

        //<modifications><!-- this tag is optional but gives an exact mass computation -->
        std::vector<McqPsimod> mod_list;
        unsigned int pos=1;
        for (const pappso::Aa & aa: *(mcq_peptide.native_peptide)) {
            const std::list<pappso::AaModificationP> aa_mod_list = aa.getModificationList();
            for (pappso::AaModificationP mod : aa_mod_list) {
                if (!mod->isInternal()) {
                    McqPsimod psimod;
                    psimod.accession = mod->getAccession();
                    psimod.at = pos;
                    mod_list.push_back(psimod);
                }
            }
            pos++;
        }
        if (mod_list.size() > 0) {
            _output_stream->writeStartElement("modifications");
            for (McqPsimod & psimod :mod_list) {
                //<psimod at="2" acc="MOD:00397"></psimod>
                _output_stream->writeStartElement("psimod");
                _output_stream->writeAttribute("at",QString("%1").arg(psimod.at));
                _output_stream->writeAttribute("acc",psimod.accession);
                //<psimod at="11" acc="MOD:00397"></psimod>
                _output_stream->writeEndElement();
            }
            //</modifications>
            _output_stream->writeEndElement();
        }

        //<observed_in data="samp0" scan="655" z="2" />
        for (unsigned int i=0; i < mcq_peptide.scan.size(); i++) {
            _output_stream->writeStartElement("observed_in");
            _output_stream->writeAttribute("data",mcq_peptide.data[i]);
            _output_stream->writeAttribute("scan",QString("%1").arg(mcq_peptide.scan[i]));
            _output_stream->writeAttribute("z",QString("%1").arg(mcq_peptide.charge[i]));
            _output_stream->writeEndElement();
        }
        //<observed_in data="samp1" scan="798" z="2" />*/
        //</peptide>
        _output_stream->writeEndElement();
    }
    //</peptide_list>
    _output_stream->writeEndElement();
}

void MassChroQml::writeGroups() {
//<groups>
    _output_stream->writeStartElement("groups");
    //<group data_ids="samp0 samp1" id="G1" />
    _output_stream->writeStartElement("group");
    _output_stream->writeAttribute("id",QString("fractiona1"));
    std::vector<MsRunSp> msrun_list = _sp_project.get()->getMsRunStore().getMsRunList();
    QStringList list;
    for (MsRunSp & msrun : msrun_list) {
        list << msrun.get()->getXmlId();
    }
    _output_stream->writeAttribute("data_ids",list.join(" "));
    _output_stream->writeEndElement();
    //<group data_ids="samp2 samp3" id="G2" />
//	</groups>
    _output_stream->writeEndElement();
}

void MassChroQml::writeProteinList() {
//<protein_list>
    _output_stream->writeStartElement("protein_list");
    std::vector<IdentificationGroup *> identification_list = _sp_project.get()->getIdentificationGroupList();
    if (identification_list.size() == 0) {
        throw pappso::PappsoException(QObject::tr("Error writing MassChroqML file :\n no protein list"));
    }
    if (identification_list.size() > 1) {
        throw pappso::PappsoException(QObject::tr("Error writing MassChroqML file :\n only available in combine mode"));
    }
    _p_identification_group = identification_list[0];
    for (ProteinMatch * p_protein_match :_p_identification_group->getProteinMatchList()) {
        if (!p_protein_match->isGrouped()) continue;
        //<protein desc="conta|P02769|ALBU_BOVIN SERUM ALBUMIN PRECURSOR."
        //	id="P1.1" />
        pappso::GrpProteinSp grp_protein = p_protein_match->getGrpProteinSp();
        if (grp_protein.get()->getRank() == 1) {
            _output_stream->writeStartElement("protein");
            _output_stream->writeAttribute("id",grp_protein.get()->getGroupingId());
            QStringList list;
            list << p_protein_match->getProteinXtpSp().get()->getAccession();
            list << p_protein_match->getProteinXtpSp().get()->getDescription();
            _output_stream->writeAttribute("desc",list.join(" "));
            //<protein desc="conta|P02770|ALBU_RAT SERUM ALBUMIN PRECURSOR."
            //	id="P1.2" />
            _output_stream->writeEndElement();
        }
    }
    //</protein_list>
    _output_stream->writeEndElement();
}