Skip to content
Snippets Groups Projects
xpip.cpp 22.9 KiB
Newer Older
Olivier Langella's avatar
Olivier Langella committed
/**
 * \file output/xpip.cpp
 * \date 13/4/2017
 * \author Olivier Langella
 * \brief XPIP writer
 */

/*******************************************************************************
* Copyright (c) 2017 Olivier Langella <olivier.langella@u-psud.fr>.
*
* This file is part of XTPcpp.
*
*     XTPcpp is free software: you can redistribute it and/or modify
*     it under the terms of the GNU General Public License as published by
*     the Free Software Foundation, either version 3 of the License, or
*     (at your option) any later version.
*
*     XTPcpp is distributed in the hope that it will be useful,
*     but WITHOUT ANY WARRANTY; without even the implied warranty of
*     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
*     GNU General Public License for more details.
*
*     You should have received a copy of the GNU General Public License
*     along with XTPcpp.  If not, see <http://www.gnu.org/licenses/>.
*
* Contributors:
*     Olivier Langella <olivier.langella@u-psud.fr> - initial API and implementation
******************************************************************************/

#include "xpip.h"
#include "../config.h"
#include <QDateTime>
Olivier Langella's avatar
Olivier Langella committed
#include <pappsomspp/pappsoexception.h>
#include <pappsomspp/utils.h>
#include <QDebug>
Olivier Langella's avatar
Olivier Langella committed

Xpip::Xpip(const QString & out_filename)
{
    //_p_digestion_pipeline = p_digestion_pipeline;

    //_mzidentml = "http://psidev.info/psi/pi/mzIdentML/1.1";
    QString complete_out_filename = out_filename;
    _output_file = new QFile(complete_out_filename);

    if (_output_file->open(QIODevice::WriteOnly))
    {
        _output_stream = new QXmlStreamWriter();
        _output_stream->setDevice(_output_file);
    } else
    {
        throw pappso::PappsoException(QObject::tr("error : cannot open the XPIP output file : %1\n").arg(out_filename));
    }

    _output_stream->setAutoFormatting(true);
    _output_stream->writeStartDocument("1.0");


}

Xpip::~Xpip()
{
    delete _output_file;
    delete _output_stream;
}

void Xpip::close() {
    _output_file->close();
}

void Xpip::write(ProjectSp sp_project) {

    _sp_project = sp_project;
    if (_sp_project.get() == nullptr) {
        throw pappso::PappsoException(QObject::tr("Error writing XPIP :\n project is empty"));
    }

//<xtandem_pipeline version="3.3.5">
    _output_stream->writeStartElement("xpip");
    _output_stream->writeAttribute("version","4.0");
    //_output_stream->writeAttribute("creationDate", QDateTime::currentDateTime().toString( Qt::ISODate));
    _output_stream->writeNamespace("http://www.w3.org/2001/XMLSchema-instance","xsi");
    //_output_stream->writeNamespace("http://www.w3.org/2001/XMLSchema-instance","xsi");
    _output_stream->writeAttribute("xmlns","http://pappso.inra.fr/xsd/xpip/4.0");
    //xsi:schemaLocation="http://psidev.info/psi/pi/mzIdentML/1.1 http://www.psidev.info/files/mzIdentML1.1.0.xsd"
    _output_stream->writeAttribute("http://www.w3.org/2001/XMLSchema-instance","schemaLocation","http://pappso.inra.fr/xsd/xpip/4.0 http://pappso.inra.fr/xsd/xpip-4.0.xsd");


    //<information Data_Type="combi" match_number="6807"/>
    // _output_stream->writeStartElement("information");
    //  _output_stream->writeAttribute("Data_Type","combi");
    // _output_stream->writeEndElement();
    writeDescription();
Langella Olivier's avatar
Langella Olivier committed
    writeCounts();

    //<filter_params pep_evalue="0.01" prot_evalue="-2.0" pep_number="2" filter_to_all="false" database_filter="/gorgone/pappso/jouy/raw/2013_Orbitrap/2013_07_04_Chanat/contaminants_standarts.fasta"/>
    //const AutomaticFilterParameters & getAutomaticFilterParameters() const;
    writeFilterParameters(_sp_project.get()->getAutomaticFilterParameters());
    writeFastaFileList(_sp_project.get()->getFastaFileStore());

    _output_stream->writeStartElement("contaminants");
    std::vector<const FastaFile *> conta_file_list = _sp_project.get()->getProteinStore().getContaminantFastaFileList();
    _output_stream->writeAttribute("regexp",_sp_project.get()->getProteinStore().getRegexpContaminant().pattern());
    QStringList fasta_list;
    for (const FastaFile * fasta_file : conta_file_list) {
        fasta_list << fasta_file->getXmlId();
    }
    _output_stream->writeAttribute("fasta_id",fasta_list.join(" "));
    _output_stream->writeEndElement();

    _output_stream->writeStartElement("decoys");
    std::vector<const FastaFile *> decoy_file_list = _sp_project.get()->getProteinStore().getDecoyFastaFileList();
    _output_stream->writeAttribute("regexp",_sp_project.get()->getProteinStore().getRegexpDecoy().pattern());
    QStringList fasta_conta_list;
    for (const FastaFile * fasta_file : conta_file_list) {
        fasta_conta_list << fasta_file->getXmlId();
    }
    _output_stream->writeAttribute("fasta_id",fasta_conta_list.join(" "));
    _output_stream->writeEndElement();

    writeMsrunList(_sp_project.get()->getMsRunStore());
    writeIdentificationDataSourceList(_sp_project.get()->getIdentificationDataSourceStore());
    writeProteinList();
    writePeptideList();
    writeIdentificationGroupList();
    _output_stream->writeEndDocument();
}
void Xpip::writeDoubleAttribute(const QString & attribute, pappso::pappso_double value) {
    _output_stream->writeAttribute(attribute,QString::number(value, 'g', 10));

}
void Xpip::writeBooleanAttribute(const QString & attribute, bool value) {
    if (value) {
        _output_stream->writeAttribute(attribute,"true");
    }
    else {
        _output_stream->writeAttribute(attribute,"false");
    }

}
void Xpip::writeFilterParameters(const AutomaticFilterParameters & filters) {
    qDebug() << "Xpip::writeFilterParameters begin";
    _output_stream->writeStartElement("filter_params");
    writeDoubleAttribute("pep_evalue",filters.getFilterPeptideEvalue());
    writeDoubleAttribute("prot_evalue",filters.getFilterProteinEvalue());
    _output_stream->writeAttribute("pep_number",QString("%1").arg(filters.getFilterMinimumPeptidePerMatch()));
    writeBooleanAttribute("cross_sample",filters.getFilterCrossSamplePeptideNumber());
    _output_stream->writeEndElement();
    qDebug() << "Xpip::writeFilterParameters end";
}

void Xpip::writeDescription() {

    _output_stream->writeStartElement("description");
    _output_stream->writeAttribute("version",XTPCPP_VERSION);
Olivier Langella's avatar
Olivier Langella committed
    _output_stream->writeAttribute("grouping","peptidemass");
    writeBooleanAttribute("combine",_sp_project.get()->isCombineMode());
    _output_stream->writeAttribute("date",QDateTime::currentDateTime().toString( Qt::ISODate));
    _output_stream->writeEndElement();
}


Langella Olivier's avatar
Langella Olivier committed
void Xpip::writeCounts() {

    _output_stream->writeStartElement("counts");
    _output_stream->writeAttribute("proteins",QString("%1").arg(_sp_project.get()->getProteinStore().size()));
    _output_stream->writeAttribute("peptides",QString("%1").arg(_sp_project.get()->getPeptideStore().size()));

    std::size_t count_peptide_evidences=0;
    std::size_t count_protein_matches=0;
    for (const IdentificationGroup * p_group : _sp_project.get()->getIdentificationGroupList()) {
        for (const IdentificationDataSource * p_id_source : p_group->getIdentificationDataSourceList()) {
            count_peptide_evidences += p_id_source->getPeptideEvidenceStore().size();
        }
        count_protein_matches += p_group->getProteinMatchList().size();
    }

    _output_stream->writeAttribute("peptide_evidences",QString("%1").arg(count_peptide_evidences));
    _output_stream->writeAttribute("protein_matches",QString("%1").arg(count_protein_matches));
    _output_stream->writeEndElement();
}

void Xpip::writeIdentificationDataSourceList(const IdentificationDataSourceStore & ident_store) {

    _output_stream->writeStartElement("identification_source_list");
    for (const IdentificationDataSourceSp ident_source_sp : ident_store.getIdentificationDataSourceList()) {
        _output_stream->writeStartElement("identification_source");
        _output_stream->writeAttribute("id",ident_source_sp.get()->getXmlId());
        _output_stream->writeAttribute("msrun_id",ident_source_sp.get()->getMsRunSp().get()->getXmlId());
        //_output_stream->writeAttribute("format",QString("%1").arg(static_cast<std::int8_t>(ident_source_sp.get()->getIdentFormat())));
        _output_stream->writeAttribute("path",ident_source_sp.get()->getResourceName());

        _output_stream->writeAttribute("engine",QString("%1").arg(static_cast<std::int8_t>(ident_source_sp.get()->getIdentificationEngine())));
        _output_stream->writeAttribute("version",ident_source_sp.get()->getIdentificationEngineVersion());

        const std::map<IdentificationEngineParam, QVariant> params = ident_source_sp.get()->getIdentificationEngineParamMap();
        if (params.size() > 0) {
            _output_stream->writeStartElement("params");
            for (const std::pair<IdentificationEngineParam, QVariant> stat_pair: params) {
                _output_stream->writeStartElement("param");
                _output_stream->writeAttribute("key",QString("%1").arg(static_cast<std::int8_t>(stat_pair.first)));
                _output_stream->writeAttribute("value",stat_pair.second.toString());
                _output_stream->writeEndElement();
            }
            _output_stream->writeEndElement();
        }
        
        const std::map<IdentificationEngineStatistics, QVariant> stats = ident_source_sp.get()->getIdentificationEngineStatisticsMap();
        if (stats.size() > 0) {
            _output_stream->writeStartElement("stats");
            for (const std::pair<IdentificationEngineStatistics, QVariant> stat_pair: stats) {
                _output_stream->writeStartElement("stat");
                _output_stream->writeAttribute("key",QString("%1").arg(static_cast<std::int8_t>(stat_pair.first)));
                _output_stream->writeAttribute("value",stat_pair.second.toString());
                _output_stream->writeEndElement();
            }
            _output_stream->writeEndElement();
        }
        _output_stream->writeEndElement();
    }
Olivier Langella's avatar
Olivier Langella committed
    _output_stream->writeEndElement();
void Xpip::writeMsrunList(const MsRunStore & msrun_store) {

    _output_stream->writeStartElement("msrun_list");
    for (const MsRunSp msrun_sp : msrun_store.getMsRunList()) {
        _output_stream->writeStartElement("msrun");
        _output_stream->writeAttribute("id",msrun_sp.get()->getXmlId());
        _output_stream->writeAttribute("name",msrun_sp.get()->getSampleName());
        _output_stream->writeAttribute("format",QString("%1").arg(static_cast<std::int8_t>(msrun_sp.get()->getMzFormat())));
        QFileInfo mz_info(msrun_sp.get()->getFilename());
        if (mz_info.exists()) {
            _output_stream->writeAttribute("path",msrun_sp.get()->getFilename());
        }
        else {
            _output_stream->writeAttribute("path",mz_info.completeBaseName());
        }
        const std::map<MsRunStatistics, QVariant> stats = msrun_sp.get()->getMsRunStatisticsMap();
        if (stats.size() > 0) {
            _output_stream->writeStartElement("stats");
            for (const std::pair<MsRunStatistics, QVariant> stat_pair: stats) {
                _output_stream->writeStartElement("stat");
                _output_stream->writeAttribute("key",QString("%1").arg(static_cast<std::int8_t>(stat_pair.first)));
                _output_stream->writeAttribute("value",stat_pair.second.toString());
                _output_stream->writeEndElement();
            }
            _output_stream->writeEndElement();
        }
        _output_stream->writeEndElement();
    }
    _output_stream->writeEndElement();
}
void Xpip::writeFastaFileList(const FastaFileStore & fasta_store) {
    qDebug() << "Xpip::writeFastaFileList begin";
    _output_stream->writeStartElement("fasta_file_list");
Olivier Langella's avatar
Olivier Langella committed
    for (FastaFileSp fasta_file_sp : fasta_store.getFastaFileList()) {
        _output_stream->writeStartElement("fasta_file");
        _output_stream->writeAttribute("id",fasta_file_sp.get()->getXmlId());
Olivier Langella's avatar
Olivier Langella committed
        _output_stream->writeAttribute("path",fasta_file_sp.get()->getAbsoluteFilePath());

        _output_stream->writeEndElement();
    }
    _output_stream->writeEndElement();
    qDebug() << "Xpip::writeFastaFileList end";
}

void Xpip::writeProteinList() {
    qDebug() << "Xpip::writeProteinList begin";
    _output_stream->writeStartElement("protein_list");
    const ProteinStore & protein_store = _sp_project.get()->getProteinStore();
    for (std::pair<QString, ProteinXtpSp> protein_pair : protein_store.getProteinMap()) {
        const ProteinXtp * p_protein = protein_pair.second.get();
        _output_stream->writeStartElement("protein");
        if (p_protein->getFastaFileP() == nullptr) {
            throw pappso::PappsoException(QObject::tr("Error writing XPIP file :\n FastaFile pointer is null"));
        }
        _output_stream->writeAttribute("fasta_id",p_protein->getFastaFileP()->getXmlId());
        _output_stream->writeAttribute("acc",p_protein->getAccession());
        _output_stream->writeAttribute("description",p_protein->getDescription());
        if (p_protein->isDecoy()) {
            _output_stream->writeAttribute("is_decoy","true");
        }
        else {
            _output_stream->writeAttribute("is_decoy","false");
        }

        if (p_protein->isContaminant()) {
            _output_stream->writeAttribute("is_contaminant","true");
        }
        else {
            _output_stream->writeAttribute("is_contaminant","false");
        }

        _output_stream->writeStartElement("sequence");
        _output_stream->writeCharacters(p_protein->getSequence());
        _output_stream->writeEndElement();

        for (DbXref db_xref : p_protein->getDbxrefList()) {
            _output_stream->writeStartElement("dbxref");
            _output_stream->writeAttribute("acc",db_xref.accession);
            _output_stream->writeAttribute("database",QString("%1").arg(static_cast<std::int8_t>(db_xref.database)));
            _output_stream->writeEndElement();
        }
        _output_stream->writeEndElement();
    }
    _output_stream->writeEndElement();
    qDebug() << "Xpip::writeProteinList end";
}

QString Xpip::getPeptideId(std::size_t crc_peptide) const {
    return QString("p%1").arg(pappso::Utils::getLexicalOrderedString(crc_peptide));
}

void Xpip::writeLabelingMethod() {
    qDebug() << "Xpip::writeLabelingMethod begin";

    LabelingMethod * p_labeling_method = _sp_project.get()->getLabelingMethodSp().get();
    if (p_labeling_method == nullptr) return;
    _output_stream->writeStartElement("label_method");
    _output_stream->writeAttribute("id",p_labeling_method->getXmlId());

    _output_stream->writeStartElement("label_list");
    for (const Label * p_label : p_labeling_method->getLabelList()) {
        _output_stream->writeStartElement("label");
        _output_stream->writeAttribute("id", p_label->getXmlId());
        for (const LabelModification label_modification : p_label->getLabelModifictionList()) {
            _output_stream->writeStartElement("label_modification");
            _output_stream->writeAttribute("at", label_modification.at);
            _output_stream->writeAttribute("mod", label_modification.modification->getAccession());
            _output_stream->writeEndElement();// label_modification
        }
        _output_stream->writeEndElement();// label
    }
    _output_stream->writeEndElement();// label_list

    _output_stream->writeEndElement();// label_method
    qDebug() << "Xpip::writeLabelingMethod end";
}

void Xpip::writePeptideList() {
    qDebug() << "Xpip::writePeptideList begin";
    _output_stream->writeStartElement("peptide_list");

    writeLabelingMethod();

    const PeptideStore & peptide_store = _sp_project.get()->getPeptideStore();
    _output_stream->writeStartElement("modification_list");
    for (pappso::AaModificationP mod_p : peptide_store.getModificationCollection()) {
        QString id = QString("mod%1").arg(pappso::Utils::getLexicalOrderedString(_map_modifications.size()+1));
        _map_modifications.insert(std::pair<pappso::AaModificationP, QString>(mod_p, id));
        _output_stream->writeStartElement("modification");
        _output_stream->writeAttribute("id", id);
        _output_stream->writeAttribute("mod", mod_p->getAccession());
        _output_stream->writeEndElement();// modification
    }
    _output_stream->writeEndElement();// modification_list

    for (std::pair<std::size_t, PeptideXtpSp> peptide_pair : peptide_store.getPeptideMap()) {
        const PeptideXtp * p_peptide = peptide_pair.second.get();
        _output_stream->writeStartElement("peptide");
        QString idp = QString("p%1").arg(pappso::Utils::getLexicalOrderedString(_map_peptides.size()+1));
        _map_peptides.insert(std::pair<const PeptideXtp *, QString>(p_peptide, idp));
        _output_stream->writeAttribute("id",idp);
        _output_stream->writeAttribute("seq",p_peptide->getSequence());

        const Label * p_label = p_peptide->getLabel();
        if (p_label != nullptr) {
            _output_stream->writeAttribute("label_id",p_label->getXmlId());
        }


        unsigned int i=0;
        for (const pappso::Aa & amino_acid: *p_peptide) {

            std::list<pappso::AaModificationP> aa_modif_list = amino_acid.getModificationList();


            for (auto && aa_modif : aa_modif_list) {
                if (!aa_modif->isInternal()) {
                    _output_stream->writeStartElement("mod");
                    _output_stream->writeAttribute("ref", _map_modifications.at(aa_modif));
                    _output_stream->writeAttribute("position", QString ("%1").arg(i));
                    _output_stream->writeAttribute("aa", QString(amino_acid.getLetter()));
                    _output_stream->writeEndElement();// mod
                }
            }
            i++;
        }

        _output_stream->writeEndElement();// peptide
    }
    _output_stream->writeEndElement(); //peptide_list
    qDebug() << "Xpip::writePeptideList end";
}

void Xpip::writeIdentificationGroupList() {
    qDebug() << "Xpip::writeIdentificationGroupList begin";
    _output_stream->writeStartElement("identification_group_list");
    for (const IdentificationGroup * p_identification_group : _sp_project.get()->getIdentificationGroupList()) {
        //std::vector<ProteinMatch *> & getProteinMatchList()
        writeIdentificationGroup(p_identification_group);
    }
    _output_stream->writeEndElement(); //identification_group_list
    qDebug() << "Xpip::writeIdentificationGroupList end";
}

void Xpip::writePeptideEvidence(const PeptideEvidence * p_peptide_evidence) {
    qDebug() << "Xpip::writePeptideEvidence begin";

    _output_stream->writeStartElement("peptide_evidence");
    QString idp = QString("pe%1").arg(pappso::Utils::getLexicalOrderedString(_map_peptide_evidences.size()+1));
    _map_peptide_evidences.insert(std::pair<const PeptideEvidence *, QString>(p_peptide_evidence, idp));
    _output_stream->writeAttribute("id",idp);

    //_output_stream->writeAttribute("ms_id",p_peptide_match->getMsRunP()->getXmlId());
    try {
        _output_stream->writeAttribute("peptide_id",_map_peptides.at( p_peptide_evidence->getPeptideXtpSp().get()));
    }
    catch (out_of_range exception_std) {
        throw pappso::PappsoException(QObject::tr("ERROR in Xpip::writePeptideEvidence peptide %1 not found in map :\n%2").arg(p_peptide_evidence->getPeptideXtpSp().get()->toAbsoluteString()).arg(exception_std.what()));
    }
    qDebug() << "Xpip::writePeptideEvidence scan";
    _output_stream->writeAttribute("scan",QString("%1").arg(p_peptide_evidence->getScan()));
    writeDoubleAttribute("rt",p_peptide_evidence->getRetentionTime());
    writeDoubleAttribute("evalue",p_peptide_evidence->getEvalue());
    writeDoubleAttribute("exp_mass",p_peptide_evidence->getExperimentalMass());
    _output_stream->writeAttribute("charge",QString("%1").arg(p_peptide_evidence->getCharge()));

    if (p_peptide_evidence->isChecked()) {
        _output_stream->writeAttribute("checked","true");
    }
    else {
        _output_stream->writeAttribute("checked","false");
    }

    qDebug() << "Xpip::writePeptideEvidence getParamList";
    const std::map<PeptideEvidenceParam, QVariant> & params = p_peptide_evidence->getParamList();

    //if (params.size() > 0) {
    //   _output_stream->writeStartElement("stats");
    for (const std::pair<PeptideEvidenceParam, QVariant> param_pair: params) {
        _output_stream->writeStartElement("param");
        _output_stream->writeAttribute("key",QString("%1").arg(static_cast<std::int8_t>(param_pair.first)));
        _output_stream->writeAttribute("value",param_pair.second.toString());
        _output_stream->writeEndElement();
    }
    //   _output_stream->writeEndElement();
    _output_stream->writeEndElement(); //peptide_evidence
    qDebug() << "Xpip::writePeptideEvidence end";
Langella Olivier's avatar
Langella Olivier committed
void Xpip::writePeptideEvidenceList(const IdentificationDataSource * p_ident_data_source) {
    qDebug() << "Xpip::writePeptideEvidenceList begin";

    _output_stream->writeStartElement("peptide_evidence_list");
Langella Olivier's avatar
Langella Olivier committed
    _output_stream->writeAttribute("ident_source_id",p_ident_data_source->getXmlId());

    for (const PeptideEvidenceSp & sp_peptide_evidence:p_ident_data_source->getPeptideEvidenceStore().getPeptideEvidenceList()) {
        writePeptideEvidence(sp_peptide_evidence.get());
    _output_stream->writeEndElement(); //peptide_evidence_list
    qDebug() << "Xpip::writePeptideEvidenceList end";
void Xpip::writeIdentificationGroup(const IdentificationGroup * p_identification_group) {
    qDebug() << "Xpip::writeIdentificationGroup begin";
    _output_stream->writeStartElement("identification_group");
Langella Olivier's avatar
Langella Olivier committed
    //QStringList id_list;
    for ( const IdentificationDataSource * ident_data_source : p_identification_group->getIdentificationDataSourceList()) {
        //id_list << ident_data_source->getXmlId();
        writePeptideEvidenceList(ident_data_source);
    }


    _output_stream->writeStartElement("protein_match_list");
    for (const ProteinMatch * p_protein_match : p_identification_group->getProteinMatchList()) {
        //std::vector<ProteinMatch *> & getProteinMatchList()
        _output_stream->writeStartElement("protein_match");
        _output_stream->writeAttribute("acc", p_protein_match->getProteinXtpSp().get()->getAccession());
        if (p_protein_match->isChecked()) {
            _output_stream->writeAttribute("checked","true");
        }
        else {
            _output_stream->writeAttribute("checked","false");
        }
        for (const PeptideMatch & peptide_match : p_protein_match->getPeptideMatchList()) {
            writePeptideMatch(peptide_match);
        _output_stream->writeEndElement();// protein_match
    }
    _output_stream->writeEndElement(); //protein_match_list
    _output_stream->writeEndElement(); //identification_group
    qDebug() << "Xpip::writeIdentificationGroup end";
void Xpip::writePeptideMatch(const PeptideMatch & peptide_match) {
    qDebug() << "Xpip::writePeptideMatch begin";
    _output_stream->writeStartElement("peptide_match");
    _output_stream->writeAttribute("peptide_evidence_id",_map_peptide_evidences.at( peptide_match.getPeptideEvidence()));
    _output_stream->writeAttribute("start",QString("%1").arg(peptide_match.getStart()));

    _output_stream->writeEndElement(); //protein_match_list
    qDebug() << "Xpip::writePeptideMatch end";
}