Newer
Older
/**
* \file /core/tandem_run/tandemcondorprocess.cpp
* \date 5/9/2017
* \author Olivier Langella
* \brief handles execution of a bunch of X!Tandem process throught condor job
*/
/*******************************************************************************
* Copyright (c) 2017 Olivier Langella <olivier.langella@u-psud.fr>.
*
* This file is part of XTPcpp.
*
* XTPcpp is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* XTPcpp is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with XTPcpp. If not, see <http://www.gnu.org/licenses/>.
*
* Contributors:
* Olivier Langella <olivier.langella@u-psud.fr> - initial API and implementation
******************************************************************************/
#include "tandemcondorprocess.h"
#include <QDebug>
#include <pappsomspp/pappsoexception.h>
#include <QSettings>
#include <QProcess>
#include <QXmlSimpleReader>
#include <QThread>
#include "../../input/condorqxmlsaxhandler.h"
TandemCondorProcess::TandemCondorProcess(MainWindow * p_main_window, WorkMonitorInterface * p_monitor, const TandemRunBatch & tandem_run_batch) : TandemBatchProcess(p_main_window,p_monitor, tandem_run_batch) {
/*
Universe = vanilla
notification = Error
Rank = Mips
request_memory= 50000
request_cpus = 1
Executable = /usr/bin/tandem
Log = /gorgone/pappso/tmp/temp_condor_job93294001891239208719639434471283743/submit_condor.log
Output = /gorgone/pappso/tmp/temp_condor_job93294001891239208719639434471283743/tandem.$(Process).out
Error = /gorgone/pappso/tmp/temp_condor_job93294001891239208719639434471283743/tandem.$(Process).error
Arguments = /gorgone/pappso/tmp/temp_condor_job93294001891239208719639434471283743/xtandem_param2054956555872858570.xml
Queue
*/
QString condor_tmp_dir = QString("%1/xtpcpp").arg(settings.value("condor/tmp_dir", "/tmp").toString());
_p_tmp_dir = new QTemporaryDir(condor_tmp_dir);
_condor_submit_command = settings.value("condor/submit", "/usr/bin/condor_submit").toString();
_condor_q_command = settings.value("condor/condor_q", "/usr/bin/condor_q").toString();
_condor_rm_command = settings.value("condor/condor_rm", "/usr/bin/condor_rm").toString();
_condor_request_memory = settings.value("condor/request_memory", "10000").toUInt();
if (!_p_tmp_dir->isValid()) {
// dir.path() returns the unique directory path
throw pappso::PappsoException(QObject::tr("problem creating condor temporary directory in %1\n").arg(condor_tmp_dir));
}
}
TandemCondorProcess::~TandemCondorProcess () {
delete _p_tmp_dir;
}
unsigned int TandemCondorProcess::getCondorJobSize() const {
return _condor_job_size;
}
void TandemCondorProcess::prepareXmlDatabaseFile() {
QFile xml_database_file(QString("%1/database.xml").arg(_p_tmp_dir->path()));
if (xml_database_file.open(QIODevice::WriteOnly))
{
_xml_database_file = QFileInfo( xml_database_file.fileName()).absoluteFilePath();
QXmlStreamWriter * p_out = new QXmlStreamWriter();
p_out->setDevice(&xml_database_file);
writeXmlDatabaseFile(p_out);
xml_database_file.close();
delete p_out;
} else
{
throw pappso::PappsoException(QObject::tr("error : cannot open the XML database file : %1\n").arg(xml_database_file.fileName()));
}
QFileInfo preset_info(_tandem_run_batch._preset_file);
_preset_file = QString("%1/%2").arg(_p_tmp_dir->path()).arg(preset_info.fileName());
QFile::copy(_tandem_run_batch._preset_file, _preset_file);
_preset_file = _tandem_run_batch._preset_file;
prepareXmlDatabaseFile();
//condor submit file :
QFile submit_file(QString("%1/submit.txt").arg(_p_tmp_dir->path()));
QTextStream * p_out = nullptr;
if (submit_file.open(QIODevice::WriteOnly))
{
p_out = new QTextStream();
p_out->setDevice(&submit_file);
*p_out << "Universe = vanilla" << endl;
*p_out << "notification = Error" << endl;
*p_out << "Rank = Mips" << endl;
*p_out << "request_memory= " << _condor_request_memory << endl;
*p_out << "request_cpus = 1" << endl;
*p_out << "Executable = " << _tandem_run_batch._tandem_bin_path << endl;
*p_out << "Log = " << _p_tmp_dir->path() << "/condor.log" << endl;
*p_out << "Output = " << _p_tmp_dir->path() << "/tandem.$(Process).out" << endl;
*p_out << "Error = " << _p_tmp_dir->path() << "/tandem.$(Process).error" << endl;
/*
Log = /gorgone/pappso/tmp/temp_condor_job93294001891239208719639434471283743/submit_condor.log
Output = /gorgone/pappso/tmp/temp_condor_job93294001891239208719639434471283743/tandem.$(Process).out
Error = /gorgone/pappso/tmp/temp_condor_job93294001891239208719639434471283743/tandem.$(Process).error
Arguments = /gorgone/pappso/tmp/temp_condor_job93294001891239208719639434471283743/xtandem_param2054956555872858570.xml
Queue
*/
} else
{
throw pappso::PappsoException(QObject::tr("error : cannot open condor submit file : %1\n").arg(submit_file.fileName()));
}
std::vector<QTemporaryFile *> input_file_list;
int i=0;
_p_monitor->setProgressMaximumValue(_tandem_run_batch._mz_file_list.size());
for (QString mz_file : _tandem_run_batch._mz_file_list) {
QTemporaryFile * p_xml_input_file = new QTemporaryFile(QString("%1/tandem").arg(_p_tmp_dir->path()));
input_file_list.push_back(p_xml_input_file);
p_xml_input_file->setAutoRemove(false);
if (p_xml_input_file->open())
{
QXmlStreamWriter * p_xml_out = new QXmlStreamWriter();
p_xml_out->setDevice(p_xml_input_file);
*p_out << "Arguments = " << QFileInfo( p_xml_input_file->fileName()).absoluteFilePath() << endl;
} else
{
throw pappso::PappsoException(QObject::tr("error : cannot open the XML X!Tandem input file : %1\n").arg(p_xml_input_file->fileName()));
}
i++;
}
for (QTemporaryFile * p_xml_input_file: input_file_list) {
delete p_xml_input_file;
}
if (p_out != nullptr) {
submit_file.close();
delete p_out;
}
QStringList arguments;
arguments << QFileInfo( submit_file.fileName()).absoluteFilePath();
QProcess * condor_process = new QProcess();
//hk_process->setWorkingDirectory(QFileInfo(_hardklor_exe).absolutePath());
qDebug() << "TandemCondorProcess::run command " << _condor_submit_command << " " << arguments.join(" ");
condor_process->start(_condor_submit_command, arguments);
if (!condor_process->waitForStarted()) {
throw pappso::PappsoException(QObject::tr("HTCondor X!Tandem process failed to start"));
}
if (!condor_process->waitForFinished(_max_xt_time_ms)) {
throw pappso::PappsoException(QObject::tr("HTCondor X!Tandem process failed to finish"));
}
QString perr = condor_process->readAllStandardError();
if (perr.length()) {
qDebug() << "TandemCondorProcess::run readAllStandardError " << perr;
throw pappso::PappsoException(QObject::tr("HTCondor X!Tandem process failed :\n%1").arg(perr));
}
else {
qDebug() << "TandemCondorProcess::run readAllStandardError OK " << perr;
}
QString pjob = condor_process->readAllStandardOutput();
if (pjob.length()) {
qDebug() << "TandemCondorProcess::run readAllStandardOutput OK " << pjob;
}
else {
qDebug() << "TandemCondorProcess::run readAllStandardOutput " << pjob;
throw pappso::PappsoException(QObject::tr("HTCondor X!Tandem process failed :\n%1").arg(pjob));
//Submitting job(s).\n1 job(s) submitted to cluster 29.\n
parseCondorJobNumber(pjob);
_p_monitor->setProgressMaximumValue(_condor_job_size);
qDebug() << "TandemCondorProcess::run job=" << _condor_cluster_number << " size=" << _condor_job_size;
/*
if (!xt_process->waitForFinished(_max_xt_time_ms)) {
throw pappso::PappsoException(QObject::tr("can't wait for X!Tandem process to finish : timeout at %1").arg(_max_xt_time_ms));
}
*/
QByteArray result = condor_process->readAll();
QProcess::ExitStatus Status = condor_process->exitStatus();
qDebug() << "TandemCondorProcess::run ExitStatus " << Status << result.data();
if (Status != 0)
{
// != QProcess::NormalExit
throw pappso::PappsoException(QObject::tr("error executing HTCondor Status != 0 : %1 %2\n%3").arg(_tandem_run_batch._tandem_bin_path).arg(arguments.join(" ").arg(result.data())));
}
delete condor_process;
surveyCondorJob();
qDebug() << "TandemCondorProcess::run end" ;
void TandemCondorProcess::surveyCondorJob() {
//condor is running job : we have to survey condor job using "condor_q -xml _condor_cluster_number"
bool keep_running = true;
while(keep_running) {
QThread::msleep(_condor_status_timer_millisecond);
getCondorJobState();
_p_monitor->message(QObject::tr("X!Tandem is running in condor cluster %1\n%2 on %3 jobs completed").arg(_condor_cluster_number).arg(_condor_completed_jobs).arg(_condor_job_size), _condor_completed_jobs);
if (_condor_completed_jobs == _condor_job_size) keep_running = false;
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
//condor_rm
condorRemoveJob();
throw pappso::PappsoException(QObject::tr("HTCondor X!Tandem jobs stopped by the user"));
}
}
}
void TandemCondorProcess::condorRemoveJob() {
QStringList arguments;
arguments << QString("%1").arg(_condor_cluster_number);
QProcess condor_q_process;
//hk_process->setWorkingDirectory(QFileInfo(_hardklor_exe).absolutePath());
qDebug() << "TandemCondorProcess::condorRemoveJob command " << _condor_rm_command << " " << arguments.join(" ");
condor_q_process.start(_condor_rm_command, arguments);
if (!condor_q_process.waitForStarted()) {
throw pappso::PappsoException(QObject::tr("HTCondor condor_rm process failed to start"));
}
if (!condor_q_process.waitForFinished(_max_xt_time_ms)) {
throw pappso::PappsoException(QObject::tr("HTCondor condor_rm process failed to finish"));
}
QString perr = condor_q_process.readAllStandardError();
if (perr.length()) {
qDebug() << "TandemCondorProcess::condorRemoveJob readAllStandardError " << perr;
throw pappso::PappsoException(QObject::tr("HTCondor condor_q process failed :\n%1").arg(perr));
}
else {
qDebug() << "TandemCondorProcess::condorRemoveJob readAllStandardError OK " << perr;
}
QString pjob = condor_q_process.readAllStandardOutput();
if (pjob.length()) {
qDebug() << "TandemCondorProcess::condorRemoveJob readAllStandardOutput OK " << pjob;
else {
qDebug() << "TandemCondorProcess::condorRemoveJob readAllStandardOutput " << pjob;
throw pappso::PappsoException(QObject::tr("HTCondor condor_q process failed :\n%1").arg(pjob));
}
}
void TandemCondorProcess::getCondorJobState() {
arguments << "-xml" << QString("%1").arg(_condor_cluster_number);
QProcess condor_q_process;
//hk_process->setWorkingDirectory(QFileInfo(_hardklor_exe).absolutePath());
qDebug() << "TandemCondorProcess::getCondorJobState command " << _condor_q_command << " " << arguments.join(" ");
condor_q_process.start(_condor_q_command, arguments);
if (!condor_q_process.waitForStarted()) {
throw pappso::PappsoException(QObject::tr("HTCondor condor_q process failed to start"));
if (!condor_q_process.waitForFinished(_max_xt_time_ms)) {
throw pappso::PappsoException(QObject::tr("HTCondor condor_q process failed to finish"));
QString perr = condor_q_process.readAllStandardError();
if (perr.length()) {
qDebug() << "TandemCondorProcess::getCondorJobState readAllStandardError " << perr;
throw pappso::PappsoException(QObject::tr("HTCondor condor_q process failed :\n%1").arg(perr));
else {
qDebug() << "TandemCondorProcess::getCondorJobState readAllStandardError OK " << perr;
QString pjob = condor_q_process.readAllStandardOutput();
if (pjob.length()) {
qDebug() << "TandemCondorProcess::getCondorJobState readAllStandardOutput OK " << pjob;
}
else {
qDebug() << "TandemCondorProcess::getCondorJobState readAllStandardOutput " << pjob;
throw pappso::PappsoException(QObject::tr("HTCondor condor_q process failed :\n%1").arg(pjob));
//Submitting job(s).\n1 job(s) submitted to cluster 29.\n
parseCondorQueue(pjob);
}
void TandemCondorProcess::parseCondorQueue(QString & condor_q_xml) {
CondorQxmlSaxHandler * parser = new CondorQxmlSaxHandler(this);
QXmlSimpleReader simplereader;
simplereader.setContentHandler(parser);
simplereader.setErrorHandler(parser);
qDebug() << "TandemCondorProcess::parseCondorQueue Read condor_q_xml";
QXmlInputSource xml_input_source;
xml_input_source.setData(condor_q_xml);
if (simplereader.parse(xml_input_source)) {
} else {
qDebug() << parser->errorString();
throw pappso::PappsoException(QObject::tr("Error reading condor_q xml string :\n %1\n%2").arg(parser->errorString()).arg(condor_q_xml));
}
void TandemCondorProcess::parseCondorJobNumber(QString condor_job) {
// Submitting job(s)...
// 3 job(s) submitted to cluster 3.
QRegExp txt_submit("([0-9]*) job\\(s\\) submitted to cluster ([0-9]*).");
if (txt_submit.indexIn(condor_job, 0) != -1) {
_condor_cluster_number = txt_submit.cap(2).toUInt();
_condor_job_size = txt_submit.cap(1).toUInt();
}
else {
throw pappso::PappsoException(QObject::tr("unable to find HTCondor job numbers in %1").arg(condor_job));
}
}
void TandemCondorProcess::setCondorJobStatus(std::int8_t count_status[10]) {
QString status_message = QString("%1 unexpanded jobs\n%2 idle jobs\n%3 running jobs\n%4 removed jobs\n%5 completed jobs\n%6 held jobs\n%7 submission errors").arg(count_status[0]).arg(count_status[1]).arg(count_status[2]).arg(count_status[3]).arg(count_status[4]).arg(count_status[5]).arg(count_status[6]);
_p_monitor->setText(status_message);
_condor_completed_jobs = count_status[(std::int8_t) CondorJobStatus::Completed];