| /*****************************************************************************\ |
| * step_terminate_monitor.c - Run an external program if there are |
| * unkillable processes at step termination. |
| ***************************************************************************** |
| * Copyright (C) 2007 The Regents of the University of California. |
| * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). |
| * Written by Christopher J. Morrone <morrone2@llnl.gov> |
| * CODE-OCEC-09-009. All rights reserved. |
| * |
| * This file is part of Slurm, a resource management program. |
| * For details, see <https://slurm.schedmd.com/>. |
| * Please also read the included file: DISCLAIMER. |
| * |
| * Slurm is free software; you can redistribute it and/or modify it under |
| * the terms of the GNU General Public License as published by the Free |
| * Software Foundation; either version 2 of the License, or (at your option) |
| * any later version. |
| * |
| * Slurm is distributed in the hope that it will be useful, but WITHOUT ANY |
| * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS |
| * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more |
| * details. |
| * |
| * You should have received a copy of the GNU General Public License along |
| * with Slurm; if not, write to the Free Software Foundation, Inc., |
| * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. |
| \*****************************************************************************/ |
| |
| #include <pthread.h> |
| #include <signal.h> |
| #include <stdlib.h> |
| #include <sys/errno.h> |
| #include <sys/wait.h> |
| #include <time.h> |
| |
| #include "src/common/macros.h" |
| #include "src/common/parse_time.h" |
| #include "src/common/xmalloc.h" |
| #include "src/common/xstring.h" |
| #include "src/common/read_config.h" |
| #include "src/interfaces/job_container.h" |
| #include "src/slurmd/slurmstepd/step_terminate_monitor.h" |
| #include "src/slurmd/slurmstepd/slurmstepd.h" |
| |
| static pthread_mutex_t lock = PTHREAD_MUTEX_INITIALIZER; |
| static pthread_cond_t cond = PTHREAD_COND_INITIALIZER; |
| static bool signaled = false; |
| static pthread_t tid = 0; |
| static uint16_t timeout; |
| static char *program_name; |
| static uint32_t recorded_jobid = NO_VAL; |
| static uint32_t recorded_stepid = NO_VAL; |
| |
| static void *_monitor(void *); |
| static int _call_external_program(stepd_step_rec_t *step); |
| |
| void step_terminate_monitor_start(stepd_step_rec_t *step) |
| { |
| slurm_conf_t *conf; |
| |
| slurm_mutex_lock(&lock); |
| |
| if (tid) { |
| slurm_mutex_unlock(&lock); |
| return; |
| } |
| |
| conf = slurm_conf_lock(); |
| timeout = conf->unkillable_timeout; |
| program_name = xstrdup(conf->unkillable_program); |
| slurm_conf_unlock(); |
| |
| slurm_thread_create(&tid, _monitor, step); |
| |
| recorded_jobid = step->step_id.job_id; |
| recorded_stepid = step->step_id.step_id; |
| |
| slurm_mutex_unlock(&lock); |
| } |
| |
| void step_terminate_monitor_stop(void) |
| { |
| slurm_mutex_lock(&lock); |
| |
| if (!tid) { |
| error("%s: already stopped", __func__); |
| slurm_mutex_unlock(&lock); |
| return; |
| } |
| |
| debug("signaling condition"); |
| slurm_cond_signal(&cond); |
| signaled = true; |
| slurm_mutex_unlock(&lock); |
| slurm_thread_join(tid); |
| |
| xfree(program_name); |
| } |
| |
| |
| static void *_monitor(void *arg) |
| { |
| stepd_step_rec_t *step = (stepd_step_rec_t *)arg; |
| struct timespec ts = {0, 0}; |
| int rc = 0; |
| |
| debug2("step_terminate_monitor will run for %d secs", timeout); |
| |
| ts.tv_sec = time(NULL) + 1 + timeout; |
| |
| slurm_mutex_lock(&lock); |
| if (!signaled) |
| rc = pthread_cond_timedwait(&cond, &lock, &ts); |
| if (rc == ETIMEDOUT) { |
| char entity[45], time_str[256]; |
| char *drain_reason = NULL; |
| char stepid_str[33]; |
| time_t now = time(NULL); |
| |
| _call_external_program(step); |
| |
| if (step->step_id.step_id == SLURM_BATCH_SCRIPT) { |
| snprintf(entity, sizeof(entity), |
| "JOB %u", step->step_id.job_id); |
| } else if (step->step_id.step_id == SLURM_EXTERN_CONT) { |
| snprintf(entity, sizeof(entity), |
| "EXTERN STEP FOR %u", step->step_id.job_id); |
| } else if (step->step_id.step_id == SLURM_INTERACTIVE_STEP) { |
| snprintf(entity, sizeof(entity), |
| "INTERACTIVE STEP FOR %u", |
| step->step_id.job_id); |
| } else { |
| char tmp_char[33]; |
| log_build_step_id_str(&step->step_id, tmp_char, |
| sizeof(tmp_char), |
| STEP_ID_FLAG_NO_PREFIX); |
| snprintf(entity, sizeof(entity), "STEP %s", tmp_char); |
| } |
| slurm_make_time_str(&now, time_str, sizeof(time_str)); |
| |
| if (step->state < SLURMSTEPD_STEP_RUNNING) { |
| error("*** %s STEPD TERMINATED ON %s AT %s DUE TO JOB NOT RUNNING ***", |
| entity, step->node_name, time_str); |
| rc = ESLURMD_STEP_NOTRUNNING; |
| } else { |
| error("*** %s STEPD TERMINATED ON %s AT %s DUE TO JOB NOT ENDING WITH SIGNALS ***", |
| entity, step->node_name, time_str); |
| rc = ESLURMD_KILL_TASK_FAILED; |
| } |
| |
| log_build_step_id_str(&step->step_id, |
| stepid_str, |
| sizeof(stepid_str), |
| STEP_ID_FLAG_NO_JOB); |
| xstrfmtcat(drain_reason, "%s (JobId=%u %s)", |
| slurm_strerror(rc), |
| step->step_id.job_id, |
| stepid_str); |
| stepd_drain_node(drain_reason); |
| xfree(drain_reason); |
| |
| if (!step->batch) { |
| /* Notify waiting sruns */ |
| if (step->step_id.step_id != SLURM_EXTERN_CONT) |
| while (stepd_send_pending_exit_msgs(step)) {;} |
| |
| if ((step_complete.rank > -1)) { |
| if (step->aborted) |
| info("unkillable stepd exiting with aborted job"); |
| else |
| stepd_wait_for_children_slurmstepd( |
| step); |
| } |
| /* Notify parent stepd or ctld directly */ |
| stepd_send_step_complete_msgs(step); |
| } |
| |
| stepd_cleanup(NULL, step, NULL, rc, false); |
| } else if (rc != 0) { |
| error("Error waiting on condition in _monitor: %m"); |
| } |
| |
| debug2("step_terminate_monitor is stopping"); |
| slurm_mutex_unlock(&lock); |
| return NULL; |
| } |
| |
| |
| static int _call_external_program(stepd_step_rec_t *step) |
| { |
| int status, rc, opt; |
| pid_t cpid; |
| int max_wait = 300; /* seconds */ |
| int time_remaining; |
| |
| if (program_name == NULL || program_name[0] == '\0') |
| return 0; |
| |
| debug("step_terminate_monitor: unkillable after %d sec, calling: %s", |
| timeout, program_name); |
| |
| if (access(program_name, R_OK | X_OK) < 0) { |
| debug("step_terminate_monitor not running %s: %m", |
| program_name); |
| return 0; |
| } |
| |
| if ((cpid = fork()) < 0) { |
| error("step_terminate_monitor executing %s: fork: %m", |
| program_name); |
| return -1; |
| } |
| if (cpid == 0) { |
| /* child */ |
| char *argv[2]; |
| char **env = NULL; |
| |
| /* container_g_join needs to be called in the |
| forked process part of the fork to avoid a race |
| condition where if this process makes a file or |
| detacts itself from a child before we add the pid |
| to the container in the parent of the fork. |
| */ |
| if (container_g_join(&step->step_id, getuid(), false) != |
| SLURM_SUCCESS) |
| error("container_g_join(%u): %m", recorded_jobid); |
| env = env_array_create(); |
| env_array_append_fmt(&env, "SLURM_JOBID", "%u", recorded_jobid); |
| env_array_append_fmt(&env, "SLURM_JOB_ID", "%u", recorded_jobid); |
| env_array_append_fmt(&env, "SLURM_STEPID", "%u", recorded_stepid); |
| env_array_append_fmt(&env, "SLURM_STEP_ID", "%u", recorded_stepid); |
| |
| argv[0] = program_name; |
| argv[1] = NULL; |
| |
| setpgid(0, 0); |
| execve(program_name, argv, env); |
| error("step_terminate_monitor execv(): %m"); |
| _exit(127); |
| } |
| |
| opt = WNOHANG; |
| time_remaining = max_wait; |
| while (1) { |
| rc = waitpid(cpid, &status, opt); |
| if (rc < 0) { |
| if (errno == EINTR) |
| continue; |
| /* waitpid may very well fail under normal conditions |
| because the wait3() in mgr.c:_wait_for_any_task() |
| may have reaped the return code. */ |
| return 0; |
| } else if (rc == 0) { |
| sleep(1); |
| if ((--time_remaining) == 0) { |
| error("step_terminate_monitor: %s still running" |
| " after %d seconds. Killing.", |
| program_name, max_wait); |
| killpg(cpid, SIGKILL); |
| opt = 0; |
| } |
| } else { |
| return status; |
| } |
| } |
| |
| /* NOTREACHED */ |
| } |