Newer
Older
/**
* \file /core/tandem_run/tandemcondorprocess.cpp
* \date 5/9/2017
* \author Olivier Langella
* \brief handles execution of a bunch of X!Tandem process throught condor job
*/
/*******************************************************************************
* Copyright (c) 2017 Olivier Langella <olivier.langella@u-psud.fr>.
*
* This file is part of XTPcpp.
*
* XTPcpp is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* XTPcpp is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with XTPcpp. If not, see <http://www.gnu.org/licenses/>.
*
* Contributors:
* Olivier Langella <olivier.langella@u-psud.fr> - initial API and implementation
******************************************************************************/
#include "tandemcondorprocess.h"
#include <QDebug>
#include <pappsomspp/pappsoexception.h>
#include <QSettings>
#include <QProcess>
#include <QXmlSimpleReader>
#include <QThread>
#include "../../input/condorqxmlsaxhandler.h"
TandemCondorProcess::TandemCondorProcess(MainWindow * p_main_window, WorkMonitorInterface * p_monitor, const TandemRunBatch & tandem_run_batch) : TandemBatchProcess(p_main_window,p_monitor, tandem_run_batch) {
/*
Universe = vanilla
notification = Error
Rank = Mips
request_memory= 50000
request_cpus = 1
Executable = /usr/bin/tandem
Log = /gorgone/pappso/tmp/temp_condor_job93294001891239208719639434471283743/submit_condor.log
Output = /gorgone/pappso/tmp/temp_condor_job93294001891239208719639434471283743/tandem.$(Process).out
Error = /gorgone/pappso/tmp/temp_condor_job93294001891239208719639434471283743/tandem.$(Process).error
Arguments = /gorgone/pappso/tmp/temp_condor_job93294001891239208719639434471283743/xtandem_param2054956555872858570.xml
Queue
*/
QString condor_tmp_dir = QString("%1/xtpcpp").arg(settings.value("condor/tmp_dir", "/tmp").toString());
_p_tmp_dir = new QTemporaryDir(condor_tmp_dir);
_condor_submit_command = settings.value("condor/submit", "/usr/bin/condor_submit").toString();
_condor_q_command = settings.value("condor/condor_q", "/usr/bin/condor_q").toString();
_condor_request_memory = settings.value("condor/request_memory", "10000").toUInt();
if (!_p_tmp_dir->isValid()) {
// dir.path() returns the unique directory path
throw pappso::PappsoException(QObject::tr("problem creating condor temporary directory in %1\n").arg(condor_tmp_dir));
}
}
TandemCondorProcess::~TandemCondorProcess () {
delete _p_tmp_dir;
}
void TandemCondorProcess::prepareXmlDatabaseFile() {
QFile xml_database_file(QString("%1/database.xml").arg(_p_tmp_dir->path()));
if (xml_database_file.open(QIODevice::WriteOnly))
{
_xml_database_file = QFileInfo( xml_database_file.fileName()).absoluteFilePath();
QXmlStreamWriter * p_out = new QXmlStreamWriter();
p_out->setDevice(&xml_database_file);
writeXmlDatabaseFile(p_out);
xml_database_file.close();
delete p_out;
} else
{
throw pappso::PappsoException(QObject::tr("error : cannot open the XML database file : %1\n").arg(xml_database_file.fileName()));
}
QFileInfo preset_info(_tandem_run_batch._preset_file);
_preset_file = QString("%1/%2").arg(_p_tmp_dir->path()).arg(preset_info.fileName());
QFile::copy(_tandem_run_batch._preset_file, _preset_file);
_preset_file = _tandem_run_batch._preset_file;
prepareXmlDatabaseFile();
//condor submit file :
QFile submit_file(QString("%1/submit.txt").arg(_p_tmp_dir->path()));
QTextStream * p_out = nullptr;
if (submit_file.open(QIODevice::WriteOnly))
{
p_out = new QTextStream();
p_out->setDevice(&submit_file);
*p_out << "Universe = vanilla" << endl;
*p_out << "notification = Error" << endl;
*p_out << "Rank = Mips" << endl;
*p_out << "request_memory= " << _condor_request_memory << endl;
*p_out << "request_cpus = 1" << endl;
*p_out << "Executable = " << _tandem_run_batch._tandem_bin_path << endl;
*p_out << "Log = " << _p_tmp_dir->path() << "/condor.log" << endl;
*p_out << "Output = " << _p_tmp_dir->path() << "/tandem.$(Process).out" << endl;
*p_out << "Error = " << _p_tmp_dir->path() << "/tandem.$(Process).error" << endl;
/*
Log = /gorgone/pappso/tmp/temp_condor_job93294001891239208719639434471283743/submit_condor.log
Output = /gorgone/pappso/tmp/temp_condor_job93294001891239208719639434471283743/tandem.$(Process).out
Error = /gorgone/pappso/tmp/temp_condor_job93294001891239208719639434471283743/tandem.$(Process).error
Arguments = /gorgone/pappso/tmp/temp_condor_job93294001891239208719639434471283743/xtandem_param2054956555872858570.xml
Queue
*/
} else
{
throw pappso::PappsoException(QObject::tr("error : cannot open condor submit file : %1\n").arg(submit_file.fileName()));
}
std::vector<QTemporaryFile *> input_file_list;
int i=0;
_p_monitor->setProgressMaximumValue(_tandem_run_batch._mz_file_list.size());
for (QString mz_file : _tandem_run_batch._mz_file_list) {
QTemporaryFile * p_xml_input_file = new QTemporaryFile(QString("%1/tandem").arg(_p_tmp_dir->path()));
input_file_list.push_back(p_xml_input_file);
p_xml_input_file->setAutoRemove(false);
if (p_xml_input_file->open())
{
QXmlStreamWriter * p_xml_out = new QXmlStreamWriter();
p_xml_out->setDevice(p_xml_input_file);
*p_out << "Arguments = " << QFileInfo( p_xml_input_file->fileName()).absoluteFilePath() << endl;
} else
{
throw pappso::PappsoException(QObject::tr("error : cannot open the XML X!Tandem input file : %1\n").arg(p_xml_input_file->fileName()));
}
i++;
}
for (QTemporaryFile * p_xml_input_file: input_file_list) {
delete p_xml_input_file;
}
if (p_out != nullptr) {
submit_file.close();
delete p_out;
}
QStringList arguments;
arguments << QFileInfo( submit_file.fileName()).absoluteFilePath();
QProcess * condor_process = new QProcess();
//hk_process->setWorkingDirectory(QFileInfo(_hardklor_exe).absolutePath());
qDebug() << "TandemCondorProcess::run command " << _condor_submit_command << " " << arguments.join(" ");
condor_process->start(_condor_submit_command, arguments);
if (!condor_process->waitForStarted()) {
throw pappso::PappsoException(QObject::tr("HTCondor X!Tandem process failed to start"));
}
if (!condor_process->waitForFinished(_max_xt_time_ms)) {
throw pappso::PappsoException(QObject::tr("HTCondor X!Tandem process failed to finish"));
}
QString perr = condor_process->readAllStandardError();
if (perr.length()) {
qDebug() << "TandemCondorProcess::run readAllStandardError " << perr;
throw pappso::PappsoException(QObject::tr("HTCondor X!Tandem process failed :\n%1").arg(perr));
}
else {
qDebug() << "TandemCondorProcess::run readAllStandardError OK " << perr;
}
QString pjob = condor_process->readAllStandardOutput();
if (pjob.length()) {
qDebug() << "TandemCondorProcess::run readAllStandardOutput OK " << pjob;
}
else {
qDebug() << "TandemCondorProcess::run readAllStandardOutput " << pjob;
throw pappso::PappsoException(QObject::tr("HTCondor X!Tandem process failed :\n%1").arg(pjob));
//Submitting job(s).\n1 job(s) submitted to cluster 29.\n
parseCondorJobNumber(pjob);
_p_monitor->setProgressMaximumValue(_condor_job_size);
qDebug() << "TandemCondorProcess::run job=" << _condor_cluster_number << " size=" << _condor_job_size;
/*
if (!xt_process->waitForFinished(_max_xt_time_ms)) {
throw pappso::PappsoException(QObject::tr("can't wait for X!Tandem process to finish : timeout at %1").arg(_max_xt_time_ms));
}
*/
QByteArray result = condor_process->readAll();
QProcess::ExitStatus Status = condor_process->exitStatus();
qDebug() << "TandemCondorProcess::run ExitStatus " << Status << result.data();
if (Status != 0)
{
// != QProcess::NormalExit
throw pappso::PappsoException(QObject::tr("error executing HTCondor Status != 0 : %1 %2\n%3").arg(_tandem_run_batch._tandem_bin_path).arg(arguments.join(" ").arg(result.data())));
}
delete condor_process;
surveyCondorJob();
qDebug() << "TandemCondorProcess::run end" ;
void TandemCondorProcess::surveyCondorJob() {
//condor is running job : we have to survey condor job using "condor_q -xml _condor_cluster_number"
while(true) {
QThread::msleep(_condor_status_timer_millisecond);
getCondorJobState();
}
void TandemCondorProcess::getCondorJobState() {
arguments << "-xml" << QString("%1").arg(_condor_cluster_number);
QProcess condor_q_process;
//hk_process->setWorkingDirectory(QFileInfo(_hardklor_exe).absolutePath());
qDebug() << "TandemCondorProcess::getCondorJobState command " << _condor_q_command << " " << arguments.join(" ");
condor_q_process.start(_condor_q_command, arguments);
if (!condor_q_process.waitForStarted()) {
throw pappso::PappsoException(QObject::tr("HTCondor condor_q process failed to start"));
if (!condor_q_process.waitForFinished(_max_xt_time_ms)) {
throw pappso::PappsoException(QObject::tr("HTCondor condor_q process failed to finish"));
QString perr = condor_q_process.readAllStandardError();
if (perr.length()) {
qDebug() << "TandemCondorProcess::getCondorJobState readAllStandardError " << perr;
throw pappso::PappsoException(QObject::tr("HTCondor condor_q process failed :\n%1").arg(perr));
else {
qDebug() << "TandemCondorProcess::getCondorJobState readAllStandardError OK " << perr;
QString pjob = condor_q_process.readAllStandardOutput();
if (pjob.length()) {
qDebug() << "TandemCondorProcess::getCondorJobState readAllStandardOutput OK " << pjob;
}
else {
qDebug() << "TandemCondorProcess::getCondorJobState readAllStandardOutput " << pjob;
throw pappso::PappsoException(QObject::tr("HTCondor condor_q process failed :\n%1").arg(pjob));
//Submitting job(s).\n1 job(s) submitted to cluster 29.\n
parseCondorQueue(pjob);
}
void TandemCondorProcess::parseCondorQueue(QString & condor_q_xml) {
CondorQxmlSaxHandler * parser = new CondorQxmlSaxHandler(this);
QXmlSimpleReader simplereader;
simplereader.setContentHandler(parser);
simplereader.setErrorHandler(parser);
qDebug() << "TandemCondorProcess::parseCondorQueue Read condor_q_xml";
QXmlInputSource xml_input_source;
xml_input_source.setData(condor_q_xml);
if (simplereader.parse(xml_input_source)) {
} else {
qDebug() << parser->errorString();
throw pappso::PappsoException(QObject::tr("Error reading condor_q xml string :\n %1").arg(condor_q_xml));
}
void TandemCondorProcess::parseCondorJobNumber(QString condor_job) {
// Submitting job(s)...
// 3 job(s) submitted to cluster 3.
QRegExp txt_submit("([0-9]*) job\\(s\\) submitted to cluster ([0-9]*).");
if (txt_submit.indexIn(condor_job, 0) != -1) {
_condor_cluster_number = txt_submit.cap(2).toUInt();
_condor_job_size = txt_submit.cap(1).toUInt();
}
else {
throw pappso::PappsoException(QObject::tr("unable to find HTCondor job numbers in %1").arg(condor_job));
}
}