|  | /*****************************************************************************\ | 
|  | *  step_terminate_monitor.c - Run an external program if there are | 
|  | *    unkillable processes at step termination. | 
|  | ***************************************************************************** | 
|  | *  Copyright (C) 2007 The Regents of the University of California. | 
|  | *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). | 
|  | *  Written by Christopher J. Morrone <morrone2@llnl.gov> | 
|  | *  CODE-OCEC-09-009. All rights reserved. | 
|  | * | 
|  | *  This file is part of Slurm, a resource management program. | 
|  | *  For details, see <https://slurm.schedmd.com/>. | 
|  | *  Please also read the included file: DISCLAIMER. | 
|  | * | 
|  | *  Slurm is free software; you can redistribute it and/or modify it under | 
|  | *  the terms of the GNU General Public License as published by the Free | 
|  | *  Software Foundation; either version 2 of the License, or (at your option) | 
|  | *  any later version. | 
|  | * | 
|  | *  Slurm is distributed in the hope that it will be useful, but WITHOUT ANY | 
|  | *  WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS | 
|  | *  FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more | 
|  | *  details. | 
|  | * | 
|  | *  You should have received a copy of the GNU General Public License along | 
|  | *  with Slurm; if not, write to the Free Software Foundation, Inc., | 
|  | *  51 Franklin Street, Fifth Floor, Boston, MA 02110-1301  USA. | 
|  | \*****************************************************************************/ | 
|  |  | 
|  | #include <pthread.h> | 
|  | #include <signal.h> | 
|  | #include <stdlib.h> | 
|  | #include <sys/errno.h> | 
|  | #include <sys/wait.h> | 
|  | #include <time.h> | 
|  |  | 
|  | #include "src/common/macros.h" | 
|  | #include "src/common/parse_time.h" | 
|  | #include "src/common/xmalloc.h" | 
|  | #include "src/common/xstring.h" | 
|  | #include "src/common/read_config.h" | 
|  | #include "src/interfaces/job_container.h" | 
|  | #include "src/slurmd/slurmstepd/step_terminate_monitor.h" | 
|  | #include "src/slurmd/slurmstepd/slurmstepd.h" | 
|  |  | 
|  | static pthread_mutex_t lock = PTHREAD_MUTEX_INITIALIZER; | 
|  | static pthread_cond_t cond = PTHREAD_COND_INITIALIZER; | 
|  | static bool signaled = false; | 
|  | static pthread_t tid = 0; | 
|  | static uint16_t timeout; | 
|  | static char *program_name; | 
|  | static uint32_t recorded_jobid = NO_VAL; | 
|  | static uint32_t recorded_stepid = NO_VAL; | 
|  |  | 
|  | static void *_monitor(void *); | 
|  | static int _call_external_program(stepd_step_rec_t *step); | 
|  |  | 
|  | void step_terminate_monitor_start(stepd_step_rec_t *step) | 
|  | { | 
|  | slurm_conf_t *conf; | 
|  |  | 
|  | slurm_mutex_lock(&lock); | 
|  |  | 
|  | if (tid) { | 
|  | slurm_mutex_unlock(&lock); | 
|  | return; | 
|  | } | 
|  |  | 
|  | conf = slurm_conf_lock(); | 
|  | timeout = conf->unkillable_timeout; | 
|  | program_name = xstrdup(conf->unkillable_program); | 
|  | slurm_conf_unlock(); | 
|  |  | 
|  | slurm_thread_create(&tid, _monitor, step); | 
|  |  | 
|  | recorded_jobid = step->step_id.job_id; | 
|  | recorded_stepid = step->step_id.step_id; | 
|  |  | 
|  | slurm_mutex_unlock(&lock); | 
|  | } | 
|  |  | 
|  | void step_terminate_monitor_stop(void) | 
|  | { | 
|  | slurm_mutex_lock(&lock); | 
|  |  | 
|  | if (!tid) { | 
|  | error("%s: already stopped", __func__); | 
|  | slurm_mutex_unlock(&lock); | 
|  | return; | 
|  | } | 
|  |  | 
|  | debug("signaling condition"); | 
|  | slurm_cond_signal(&cond); | 
|  | signaled = true; | 
|  | slurm_mutex_unlock(&lock); | 
|  | slurm_thread_join(tid); | 
|  |  | 
|  | xfree(program_name); | 
|  | } | 
|  |  | 
|  |  | 
|  | static void *_monitor(void *arg) | 
|  | { | 
|  | stepd_step_rec_t *step = (stepd_step_rec_t *)arg; | 
|  | struct timespec ts = {0, 0}; | 
|  | int rc = 0; | 
|  |  | 
|  | debug2("step_terminate_monitor will run for %d secs", timeout); | 
|  |  | 
|  | ts.tv_sec = time(NULL) + 1 + timeout; | 
|  |  | 
|  | slurm_mutex_lock(&lock); | 
|  | if (!signaled) | 
|  | rc = pthread_cond_timedwait(&cond, &lock, &ts); | 
|  | if (rc == ETIMEDOUT) { | 
|  | char entity[45], time_str[256]; | 
|  | char *drain_reason = NULL; | 
|  | char stepid_str[33]; | 
|  | time_t now = time(NULL); | 
|  |  | 
|  | _call_external_program(step); | 
|  |  | 
|  | if (step->step_id.step_id == SLURM_BATCH_SCRIPT) { | 
|  | snprintf(entity, sizeof(entity), | 
|  | "JOB %u", step->step_id.job_id); | 
|  | } else if (step->step_id.step_id == SLURM_EXTERN_CONT) { | 
|  | snprintf(entity, sizeof(entity), | 
|  | "EXTERN STEP FOR %u", step->step_id.job_id); | 
|  | } else if (step->step_id.step_id == SLURM_INTERACTIVE_STEP) { | 
|  | snprintf(entity, sizeof(entity), | 
|  | "INTERACTIVE STEP FOR %u", | 
|  | step->step_id.job_id); | 
|  | } else { | 
|  | char tmp_char[33]; | 
|  | log_build_step_id_str(&step->step_id, tmp_char, | 
|  | sizeof(tmp_char), | 
|  | STEP_ID_FLAG_NO_PREFIX); | 
|  | snprintf(entity, sizeof(entity), "STEP %s", tmp_char); | 
|  | } | 
|  | slurm_make_time_str(&now, time_str, sizeof(time_str)); | 
|  |  | 
|  | if (step->state < SLURMSTEPD_STEP_RUNNING) { | 
|  | error("*** %s STEPD TERMINATED ON %s AT %s DUE TO JOB NOT RUNNING ***", | 
|  | entity, step->node_name, time_str); | 
|  | rc = ESLURMD_STEP_NOTRUNNING; | 
|  | } else { | 
|  | error("*** %s STEPD TERMINATED ON %s AT %s DUE TO JOB NOT ENDING WITH SIGNALS ***", | 
|  | entity, step->node_name, time_str); | 
|  | rc = ESLURMD_KILL_TASK_FAILED; | 
|  | } | 
|  |  | 
|  | log_build_step_id_str(&step->step_id, | 
|  | stepid_str, | 
|  | sizeof(stepid_str), | 
|  | STEP_ID_FLAG_NO_JOB); | 
|  | xstrfmtcat(drain_reason, "%s (JobId=%u %s)", | 
|  | slurm_strerror(rc), | 
|  | step->step_id.job_id, | 
|  | stepid_str); | 
|  | stepd_drain_node(drain_reason); | 
|  | xfree(drain_reason); | 
|  |  | 
|  | if (!step->batch) { | 
|  | /* Notify waiting sruns */ | 
|  | if (step->step_id.step_id != SLURM_EXTERN_CONT) | 
|  | while (stepd_send_pending_exit_msgs(step)) {;} | 
|  |  | 
|  | if ((step_complete.rank > -1)) { | 
|  | if (step->aborted) | 
|  | info("unkillable stepd exiting with aborted job"); | 
|  | else | 
|  | stepd_wait_for_children_slurmstepd( | 
|  | step); | 
|  | } | 
|  | /* Notify parent stepd or ctld directly */ | 
|  | stepd_send_step_complete_msgs(step); | 
|  | } | 
|  |  | 
|  | stepd_cleanup(NULL, step, NULL, rc, false); | 
|  | } else if (rc != 0) { | 
|  | error("Error waiting on condition in _monitor: %m"); | 
|  | } | 
|  |  | 
|  | debug2("step_terminate_monitor is stopping"); | 
|  | slurm_mutex_unlock(&lock); | 
|  | return NULL; | 
|  | } | 
|  |  | 
|  |  | 
|  | static int _call_external_program(stepd_step_rec_t *step) | 
|  | { | 
|  | int status, rc, opt; | 
|  | pid_t cpid; | 
|  | int max_wait = 300; /* seconds */ | 
|  | int time_remaining; | 
|  |  | 
|  | if (program_name == NULL || program_name[0] == '\0') | 
|  | return 0; | 
|  |  | 
|  | debug("step_terminate_monitor: unkillable after %d sec, calling: %s", | 
|  | timeout, program_name); | 
|  |  | 
|  | if (access(program_name, R_OK | X_OK) < 0) { | 
|  | debug("step_terminate_monitor not running %s: %m", | 
|  | program_name); | 
|  | return 0; | 
|  | } | 
|  |  | 
|  | if ((cpid = fork()) < 0) { | 
|  | error("step_terminate_monitor executing %s: fork: %m", | 
|  | program_name); | 
|  | return -1; | 
|  | } | 
|  | if (cpid == 0) { | 
|  | /* child */ | 
|  | char *argv[2]; | 
|  | char **env = NULL; | 
|  |  | 
|  | /* container_g_join needs to be called in the | 
|  | forked process part of the fork to avoid a race | 
|  | condition where if this process makes a file or | 
|  | detacts itself from a child before we add the pid | 
|  | to the container in the parent of the fork. | 
|  | */ | 
|  | if (container_g_join(&step->step_id, getuid(), false) != | 
|  | SLURM_SUCCESS) | 
|  | error("container_g_join(%u): %m", recorded_jobid); | 
|  | env = env_array_create(); | 
|  | env_array_append_fmt(&env, "SLURM_JOBID", "%u", recorded_jobid); | 
|  | env_array_append_fmt(&env, "SLURM_JOB_ID", "%u", recorded_jobid); | 
|  | env_array_append_fmt(&env, "SLURM_STEPID", "%u", recorded_stepid); | 
|  | env_array_append_fmt(&env, "SLURM_STEP_ID", "%u", recorded_stepid); | 
|  |  | 
|  | argv[0] = program_name; | 
|  | argv[1] = NULL; | 
|  |  | 
|  | setpgid(0, 0); | 
|  | execve(program_name, argv, env); | 
|  | error("step_terminate_monitor execv(): %m"); | 
|  | _exit(127); | 
|  | } | 
|  |  | 
|  | opt = WNOHANG; | 
|  | time_remaining = max_wait; | 
|  | while (1) { | 
|  | rc = waitpid(cpid, &status, opt); | 
|  | if (rc < 0) { | 
|  | if (errno == EINTR) | 
|  | continue; | 
|  | /* waitpid may very well fail under normal conditions | 
|  | because the wait3() in mgr.c:_wait_for_any_task() | 
|  | may have reaped the return code. */ | 
|  | return 0; | 
|  | } else if (rc == 0) { | 
|  | sleep(1); | 
|  | if ((--time_remaining) == 0) { | 
|  | error("step_terminate_monitor: %s still running" | 
|  | " after %d seconds.  Killing.", | 
|  | program_name, max_wait); | 
|  | killpg(cpid, SIGKILL); | 
|  | opt = 0; | 
|  | } | 
|  | } else  { | 
|  | return status; | 
|  | } | 
|  | } | 
|  |  | 
|  | /* NOTREACHED */ | 
|  | } |