blob: 7a3fbf17d7cb0da9ca9cb6571386c4ebf5f15e6b [file] [log] [blame]
/*****************************************************************************\
* preempt.c - Job preemption plugin function setup.
*****************************************************************************
* Copyright (C) 2009-2010 Lawrence Livermore National Security.
* Portions Copyright (C) 2010 SchedMD <http://www.schedmd.com>.
* Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
* Written by Morris Jette <jette1@llnl.gov>
* CODE-OCEC-09-009. All rights reserved.
*
* This file is part of SLURM, a resource management program.
* For details, see <http://slurm.schedmd.com/>.
* Please also read the included file: DISCLAIMER.
*
* SLURM is free software; you can redistribute it and/or modify it under
* the terms of the GNU General Public License as published by the Free
* Software Foundation; either version 2 of the License, or (at your option)
* any later version.
*
* In addition, as a special exception, the copyright holders give permission
* to link the code of portions of this program with the OpenSSL library under
* certain conditions as described in each individual source file, and
* distribute linked combinations including the two. You must obey the GNU
* General Public License in all respects for all of the code used other than
* OpenSSL. If you modify file(s) with this exception, you may extend this
* exception to your version of the file(s), but you are not obligated to do
* so. If you do not wish to do so, delete this exception statement from your
* version. If you delete this exception statement from all source files in
* the program, then also delete it here.
*
* SLURM is distributed in the hope that it will be useful, but WITHOUT ANY
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
* details.
*
* You should have received a copy of the GNU General Public License along
* with SLURM; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
\*****************************************************************************/
#include <pthread.h>
#include <signal.h>
#include "src/common/log.h"
#include "src/common/plugrack.h"
#include "src/common/slurm_protocol_api.h"
#include "src/common/xmalloc.h"
#include "src/common/xstring.h"
#include "src/slurmctld/slurmctld.h"
#include "src/slurmctld/job_scheduler.h"
/* ************************************************************************ */
/* TAG( slurm_preempt_ops_t ) */
/* ************************************************************************ */
typedef struct slurm_preempt_ops {
List (*find_jobs) (struct job_record *job_ptr);
uint16_t (*job_preempt_mode) (struct job_record *job_ptr);
bool (*preemption_enabled) (void);
bool (*job_preempt_check) (job_queue_rec_t *preemptor,
job_queue_rec_t *preemptee);
} slurm_preempt_ops_t;
/*
* Must be synchronized with slurm_preempt_ops_t above.
*/
static const char *syms[] = {
"find_preemptable_jobs",
"job_preempt_mode",
"preemption_enabled",
"job_preempt_check",
};
static slurm_preempt_ops_t ops;
static plugin_context_t *g_context = NULL;
static pthread_mutex_t g_context_lock = PTHREAD_MUTEX_INITIALIZER;
static bool init_run = false;
/* *********************************************************************** */
/* TAG( _preempt_signal ) */
/* *********************************************************************** */
static void _preempt_signal(struct job_record *job_ptr, uint32_t grace_time)
{
if (job_ptr->preempt_time)
return;
job_ptr->preempt_time = time(NULL);
job_ptr->end_time = MIN(job_ptr->end_time,
(job_ptr->preempt_time + (time_t)grace_time));
/* Signal the job at the beginning of preemption GraceTime */
// job_signal(job_ptr->job_id, SIGCONT, 0, 0, 0);
// job_signal(job_ptr->job_id, SIGTERM, 0, 0, 0);
}
/* *********************************************************************** */
/* TAG( slurm_job_check_grace ) */
/* *********************************************************************** */
extern int slurm_job_check_grace(struct job_record *job_ptr)
{
/* Preempt modes: -1 (unset), 0 (none), 1 (partition), 2 (QOS) */
static int preempt_mode = 0;
static time_t last_update_time = (time_t) 0;
int rc = SLURM_SUCCESS;
uint32_t grace_time = 0;
if (job_ptr->preempt_time) {
if (time(NULL) >= job_ptr->end_time)
rc = SLURM_ERROR;
return rc;
}
if (last_update_time != slurmctld_conf.last_update) {
char *preempt_type = slurm_get_preempt_type();
if ((strcmp(preempt_type, "preempt/partition_prio") == 0))
preempt_mode = 1;
else if ((strcmp(preempt_type, "preempt/qos") == 0))
preempt_mode = 2;
else
preempt_mode = 0;
xfree(preempt_type);
last_update_time = slurmctld_conf.last_update;
}
if (preempt_mode == 1)
grace_time = job_ptr->part_ptr->grace_time;
else if (preempt_mode == 2) {
slurmdb_qos_rec_t *qos_ptr = (slurmdb_qos_rec_t *)
job_ptr->qos_ptr;
grace_time = qos_ptr->grace_time;
}
if (grace_time) {
debug("setting %u sec preemption grace time for job %u",
grace_time, job_ptr->job_id);
_preempt_signal(job_ptr, grace_time);
} else
rc = SLURM_ERROR;
return rc;
}
/* *********************************************************************** */
/* TAG( slurm_preempt_init ) */
/* *********************************************************************** */
extern int slurm_preempt_init(void)
{
int retval = SLURM_SUCCESS;
char *plugin_type = "preempt";
char *type = NULL;
/* This function is called frequently, so it should be as fast as
* possible. The test below will be TRUE almost all of the time and
* is as fast as possible. */
if (init_run && g_context)
return retval;
slurm_mutex_lock(&g_context_lock);
if (g_context)
goto done;
type = slurm_get_preempt_type();
g_context = plugin_context_create(
plugin_type, type, (void **)&ops, syms, sizeof(syms));
if (!g_context) {
error("cannot create %s context for %s", plugin_type, type);
retval = SLURM_ERROR;
goto done;
}
init_run = true;
done:
slurm_mutex_unlock(&g_context_lock);
xfree(type);
return retval;
}
/* *********************************************************************** */
/* TAG( slurm_preempt_fini ) */
/* *********************************************************************** */
extern int slurm_preempt_fini(void)
{
int rc;
if (!g_context)
return SLURM_SUCCESS;
init_run = false;
rc = plugin_context_destroy(g_context);
g_context = NULL;
return rc;
}
/* *********************************************************************** */
/* TAG( slurm_find_preemptable_jobs ) */
/* *********************************************************************** */
extern List slurm_find_preemptable_jobs(struct job_record *job_ptr)
{
if (slurm_preempt_init() < 0)
return NULL;
return (*(ops.find_jobs))(job_ptr);
}
/*
* Return the PreemptMode which should apply to stop this job
*/
extern uint16_t slurm_job_preempt_mode(struct job_record *job_ptr)
{
if (slurm_preempt_init() < 0)
return (uint16_t) PREEMPT_MODE_OFF;
return (*(ops.job_preempt_mode))(job_ptr);
}
/*
* Return true if any jobs can be preempted, otherwise false
*/
extern bool slurm_preemption_enabled(void)
{
if (slurm_preempt_init() < 0)
return false;
return (*(ops.preemption_enabled))();
}
/*
* Return true if the preemptor can preempt the preemptee, otherwise false
*/
extern bool slurm_job_preempt_check(job_queue_rec_t *preemptor,
job_queue_rec_t *preemptee)
{
if (slurm_preempt_init() < 0)
return false;
return (*(ops.job_preempt_check))
(preemptor, preemptee);
}