blob: f7e649d22958d4a72734017621820c4476e1bf21 [file] [log] [blame]
/*****************************************************************************\
* run_command.c - run a command asynchronously and return output
*****************************************************************************
* Copyright (C) SchedMD LLC.
*
* This file is part of Slurm, a resource management program.
* For details, see <https://slurm.schedmd.com/>.
* Please also read the included file: DISCLAIMER.
*
* Slurm is free software; you can redistribute it and/or modify it under
* the terms of the GNU General Public License as published by the Free
* Software Foundation; either version 2 of the License, or (at your option)
* any later version.
*
* In addition, as a special exception, the copyright holders give permission
* to link the code of portions of this program with the OpenSSL library under
* certain conditions as described in each individual source file, and
* distribute linked combinations including the two. You must obey the GNU
* General Public License in all respects for all of the code used other than
* OpenSSL. If you modify file(s) with this exception, you may extend this
* exception to your version of the file(s), but you are not obligated to do
* so. If you do not wish to do so, delete this exception statement from your
* version. If you delete this exception statement from all source files in
* the program, then also delete it here.
*
* Slurm is distributed in the hope that it will be useful, but WITHOUT ANY
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
* details.
*
* You should have received a copy of the GNU General Public License along
* with Slurm; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
\*****************************************************************************/
#include "config.h"
#define _GNU_SOURCE /* For POLLRDHUP */
#include <fcntl.h>
#include <inttypes.h> /* for uint16_t, uint32_t definitions */
#include <limits.h>
#include <poll.h>
#include <signal.h>
#include <stdlib.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <unistd.h>
#ifndef POLLRDHUP
#define POLLRDHUP POLLHUP
#endif
#include "src/common/fd.h"
#include "src/common/list.h"
#include "src/common/macros.h"
#include "src/common/read_config.h"
#include "src/common/run_command.h"
#include "src/common/slurm_time.h"
#include "src/common/timers.h"
#include "src/common/xassert.h"
#include "src/common/xmalloc.h"
#include "src/common/xstring.h"
/* Define slurm-specific aliases for use by plugins, see slurm_xlator.h. */
strong_alias(run_command, slurm_run_command);
static char *script_launcher = NULL;
static int script_launcher_fd = -1;
static int command_shutdown = 0;
static int child_proc_count = 0;
static pthread_mutex_t proc_count_mutex = PTHREAD_MUTEX_INITIALIZER;
#define MAX_POLL_WAIT 500
/* Function prototypes */
static void _run_command_child_exec(int fd, const char *path, char **argv,
char **env);
static void _run_command_child_pre_exec(void);
extern void run_command_add_to_script(char **script_body, char *new_str)
{
char *orig_script = *script_body;
char *new_script, *sep, save_char;
char *tmp_str = NULL;
int i;
if (!new_str || (new_str[0] == '\0'))
return; /* Nothing to prepend */
if (!orig_script) {
*script_body = xstrdup(new_str);
return;
}
tmp_str = xstrdup(new_str);
i = strlen(tmp_str) - 1;
if (tmp_str[i] != '\n') /* Append new line as needed */
xstrcat(tmp_str, "\n");
if (orig_script[0] != '#') {
/* Prepend new lines */
new_script = xstrdup(tmp_str);
xstrcat(new_script, orig_script);
xfree(*script_body);
*script_body = new_script;
xfree(tmp_str);
return;
}
sep = strchr(orig_script, '\n');
if (sep) {
save_char = sep[1];
sep[1] = '\0';
new_script = xstrdup(orig_script);
xstrcat(new_script, tmp_str);
sep[1] = save_char;
xstrcat(new_script, sep + 1);
xfree(*script_body);
*script_body = new_script;
xfree(tmp_str);
return;
} else {
new_script = xstrdup(orig_script);
xstrcat(new_script, "\n");
xstrcat(new_script, tmp_str);
xfree(*script_body);
*script_body = new_script;
xfree(tmp_str);
return;
}
}
/* used to initialize run_command module */
extern int run_command_init(int argc, char **argv, char *binary)
{
command_shutdown = 0;
#if defined(__linux__)
if (!binary && !script_launcher)
binary = "/proc/self/exe";
#endif /* !__linux__ */
/* Use argv[0] as fallback with absolute path */
if (!binary && (argc > 0) && (argv[0][0] == '/'))
binary = argv[0];
if (!binary)
return SLURM_ERROR;
fd_close(&script_launcher_fd);
xfree(script_launcher);
#if defined(__linux__)
if ((script_launcher_fd = open(binary, (O_PATH|O_CLOEXEC))) >= 0) {
char buf[PATH_MAX];
ssize_t bytes = readlink(binary, buf, sizeof(buf));
/*
* Because we are using script_launcher_fd to exec,
* script_launcher is just used for logging and thus we do not
* need script_launcher to be the full path. So, it is okay
* if readlink truncates the result; in that case, just use
* the truncated string.
*/
if (bytes > 0) {
if (bytes >= sizeof(buf))
bytes = sizeof(buf) - 1;
buf[bytes] = '\0';
script_launcher = xstrdup(buf);
} else {
script_launcher = xstrdup(binary);
}
return SLURM_SUCCESS;
}
#endif /* !__linux__ */
if (access(binary, R_OK | X_OK)) {
error("%s: %s cannot be executed as an intermediate launcher, doing direct launch.",
__func__, binary);
return SLURM_ERROR;
} else {
script_launcher = xstrdup(binary);
return SLURM_SUCCESS;
}
}
/* used to terminate any outstanding commands */
extern void run_command_shutdown(void)
{
command_shutdown = 1;
}
extern bool run_command_is_launcher(int argc, char **argv)
{
return (argc >= RUN_COMMAND_LAUNCHER_ARGC &&
!xstrcmp(argv[1], RUN_COMMAND_LAUNCHER_MODE));
}
/* Return count of child processes */
extern int run_command_count(void)
{
int cnt;
slurm_mutex_lock(&proc_count_mutex);
cnt = child_proc_count;
slurm_mutex_unlock(&proc_count_mutex);
return cnt;
}
static void _kill_pg(pid_t pid)
{
killpg(pid, SIGTERM);
usleep(10000);
killpg(pid, SIGKILL);
}
static void _run_command_child(run_command_args_t *args, int write_fd,
int read_fd, char **launcher_argv)
{
int stdin_fd;
if (read_fd > 0)
stdin_fd = read_fd;
else if ((stdin_fd = open("/dev/null", O_RDWR)) < 0) {
/*
* We must avoid calling non-async-signal-safe functions at
* this point (like error() or similar), so we won't log
* anything now. If we want to log we could use write().
*/
_exit(127);
}
dup2(stdin_fd, STDIN_FILENO);
dup2(write_fd, STDERR_FILENO);
dup2(write_fd, STDOUT_FILENO);
if (launcher_argv && !args->direct_exec)
_run_command_child_exec(script_launcher_fd, script_launcher,
launcher_argv, args->env);
_run_command_child_pre_exec();
_run_command_child_exec(-1, args->script_path, args->script_argv,
args->env);
}
static void _log_str_array(char *prefix, char **array)
{
if (!(slurm_conf.debug_flags & DEBUG_FLAG_SCRIPT))
return;
if (!array)
return;
log_flag(SCRIPT, "%s: START", prefix);
for (int i = 0; array[i]; i++)
log_flag(SCRIPT, "%s[%d]=%s", prefix, i, array[i]);
log_flag(SCRIPT, "%s: END", prefix);
}
static char **_setup_launcher_argv(run_command_args_t *args)
{
char **launcher_argv = NULL;
int extra = RUN_COMMAND_LAUNCHER_ARGC;
int count = 0;
xassert(script_launcher);
_log_str_array("script_argv", args->script_argv);
while (args->script_argv && args->script_argv[count])
count++;
count = count + extra + 1; /* Add one to NULL terminate the array. */
launcher_argv = xcalloc(count, sizeof(launcher_argv[0]));
/*
* args->script_argv[0] (launcher_argv[3]) is usually set to
* script_path, but that is not guaranteed (e.g. if args->script_argv
* == NULL). We want to guarantee that script_path is set, so we set
* it to launcher_argv[2].
*/
launcher_argv[0] = script_launcher;
launcher_argv[1] = RUN_COMMAND_LAUNCHER_MODE;
launcher_argv[2] = (char *) args->script_path;
if (args->script_argv) {
for (int i = 0; args->script_argv[i]; i++)
launcher_argv[i + extra] = args->script_argv[i];
}
launcher_argv[count - 1] = NULL;
_log_str_array("launcher_argv", launcher_argv);
return launcher_argv;
}
/*
* Wrapper for execv/execve. This should never return.
*/
static void _run_command_child_exec(int fd, const char *path, char **argv,
char **env)
{
extern char **environ;
if (!env || !env[0])
env = environ;
if (fd >= 0)
fexecve(fd, argv, env);
else
execve(path, argv, env);
error("%s: execv(%s): %m", __func__, path);
_exit(127);
}
/*
* Called in the child before exec. Do setup like closing unneeded files and
* setting uid/gid.
*/
static void _run_command_child_pre_exec(void)
{
closeall(3);
/* coverity[leaked_handle] */
setpgid(0, 0);
/*
* sync euid -> ruid, egid -> rgid to avoid issues with fork'd
* processes using access() or similar calls.
*/
if (setresgid(getegid(), getegid(), -1)) {
error("%s: Unable to setresgid()", __func__);
_exit(127);
}
if (setresuid(geteuid(), geteuid(), -1)) {
error("%s: Unable to setresuid()", __func__);
_exit(127);
}
}
extern void run_command_launcher(int argc, char **argv)
{
char *script_path = argv[RUN_COMMAND_LAUNCHER_ARGC - 1];
char **script_argv = &argv[RUN_COMMAND_LAUNCHER_ARGC];
xassert(script_path);
_run_command_child_pre_exec();
_run_command_child_exec(-1, script_path, script_argv, NULL);
_exit(127);
}
extern char *run_command(run_command_args_t *args)
{
pid_t cpid;
char *resp = NULL;
char **launcher_argv = NULL;
int pfd_to_child[2] = { -1, -1 };
int pfd[2] = { -1, -1 };
bool free_argv = false;
if ((args->script_path == NULL) || (args->script_path[0] == '\0')) {
error("%s: no script specified", __func__);
*(args->status) = 127;
resp = xstrdup("Run command failed - configuration error");
return resp;
}
if (!args->ignore_path_exec_check) {
if (args->script_path[0] != '/') {
error("%s: %s is not a fully qualified pathname (%s)",
__func__, args->script_type, args->script_path);
*(args->status) = 127;
resp = xstrdup("Run command failed - configuration error");
return resp;
}
if (access(args->script_path, R_OK | X_OK) < 0) {
error("%s: %s can not be executed (%s) %m",
__func__, args->script_type, args->script_path);
*(args->status) = 127;
resp = xstrdup("Run command failed - configuration error");
return resp;
}
}
if ((pipe(pfd) != 0) ||
(args->write_to_child && (pipe(pfd_to_child) != 0))) {
error("%s: pipe(): %m", __func__);
fd_close(&pfd[0]);
fd_close(&pfd[1]);
fd_close(&pfd_to_child[0]);
fd_close(&pfd_to_child[1]);
*(args->status) = 127;
resp = xstrdup("System error");
return resp;
}
if (!(args->script_argv)) {
args->script_argv = xcalloc(2, sizeof(char *));
args->script_argv[0] = xstrdup(args->script_path);
free_argv = true;
}
slurm_mutex_lock(&proc_count_mutex);
child_proc_count++;
slurm_mutex_unlock(&proc_count_mutex);
if (script_launcher && !args->direct_exec)
launcher_argv = _setup_launcher_argv(args);
if ((cpid = fork()) == 0) {
/* Child writes to pfd[1] and reads from pfd_to_child[0] */
fd_close(&pfd_to_child[1]);
fd_close(&pfd[0]);
_run_command_child(args, pfd[1], pfd_to_child[0],
launcher_argv);
/* We should never get here. */
} else if (cpid < 0) {
close(pfd[0]);
close(pfd[1]);
fd_close(&pfd_to_child[0]);
fd_close(&pfd_to_child[1]);
error("%s: fork(): %m", __func__);
slurm_mutex_lock(&proc_count_mutex);
child_proc_count--;
slurm_mutex_unlock(&proc_count_mutex);
} else {
/* Parent writes to pfd_to_child[1] and reads from pfd[0] */
close(pfd[1]);
fd_close(&pfd_to_child[0]);
if (args->tid)
track_script_reset_cpid(args->tid, cpid);
if (args->cb)
args->cb(pfd_to_child[1], args->cb_arg);
/*
* Close the write pipe to the child immediately after it is
* used, before calling run_command_poll_child(). This means
* that the pipe will be closed before waiting for the child
* to finish. If an error happened during the write, when the
* child tries to read the required data from the pipe, the
* pipe will be closed and the child can exit.
*/
fd_close(&pfd_to_child[1]);
resp = run_command_poll_child(cpid,
args->max_wait,
args->orphan_on_shutdown,
pfd[0],
args->script_path,
args->script_type,
args->tid,
args->status,
args->timed_out);
close(pfd[0]);
slurm_mutex_lock(&proc_count_mutex);
child_proc_count--;
slurm_mutex_unlock(&proc_count_mutex);
}
if (free_argv) {
xfree(args->script_argv[0]);
xfree(args->script_argv);
}
log_flag(SCRIPT, "%s:script=%s, resp:\n%s",
__func__, args->script_path, resp);
/* Array contents were not malloc'd, do not free */
xfree(launcher_argv);
return resp;
}
extern char *run_command_poll_child(int cpid,
int max_wait,
bool orphan_on_shutdown,
int read_fd,
const char *script_path,
const char *script_type,
pthread_t tid,
int *status,
bool *timed_out)
{
bool send_terminate = true;
struct pollfd fds;
struct timeval tstart;
int resp_size = 1024, resp_offset = 0;
int new_wait;
int i;
char *resp;
resp = xmalloc(resp_size);
gettimeofday(&tstart, NULL);
while (1) {
if (command_shutdown) {
error("%s: %s %s operation on shutdown",
__func__,
orphan_on_shutdown ?
"orphaning" : "killing",
script_type);
break;
}
/*
* Pass zero as the status to just see if this script
* exists in track_script - if not, then we need to bail
* since this script was killed.
*/
if (tid &&
track_script_killed(tid, 0, false))
break;
fds.fd = read_fd;
fds.events = POLLIN | POLLHUP | POLLRDHUP;
fds.revents = 0;
if (max_wait <= 0) {
new_wait = MAX_POLL_WAIT;
} else {
new_wait = max_wait - timeval_tot_wait(&tstart);
if (new_wait <= 0) {
error("%s: %s poll timeout @ %d msec",
__func__, script_type,
max_wait);
if (timed_out)
*(timed_out) = true;
break;
}
new_wait = MIN(new_wait, MAX_POLL_WAIT);
}
i = poll(&fds, 1, new_wait);
if (i == 0) {
continue;
} else if (i < 0) {
if ((errno == EAGAIN) || (errno == EINTR))
continue;
error("%s: %s poll:%m",
__func__, script_type);
break;
}
if ((fds.revents & POLLIN) == 0) {
send_terminate = false;
break;
}
i = read(read_fd, resp + resp_offset,
resp_size - resp_offset);
if (i == 0) {
send_terminate = false;
break;
} else if (i < 0) {
if (errno == EAGAIN)
continue;
send_terminate = false;
error("%s: read(%s): %m",
__func__,
script_path);
break;
} else {
resp_offset += i;
if (resp_offset + 1024 >= resp_size) {
resp_size *= 2;
resp = xrealloc(resp, resp_size);
}
}
}
if (command_shutdown && orphan_on_shutdown) {
/* Don't kill the script on shutdown */
*status = 0;
} else if (send_terminate) {
/*
* Kill immediately if the script isn't exiting
* normally.
*/
_kill_pg(cpid);
waitpid(cpid, status, 0);
} else {
/*
* If the STDOUT is closed from the script we may reach
* this point without any input in read_fd, so just wait
* for the process here until max_wait.
*/
run_command_waitpid_timeout(script_type,
cpid, status,
max_wait,
timeval_tot_wait(&tstart),
tid, timed_out);
}
return resp;
}
/*
* run_command_waitpid_timeout()
*
* Same as waitpid(2) but kill process group for pid after timeout millisecs.
*/
extern int run_command_waitpid_timeout(
const char *name, pid_t pid, int *pstatus, int timeout_ms,
int elapsed_ms, pthread_t tid, bool *timed_out)
{
int max_delay = 1000; /* max delay between waitpid calls */
int delay = 10; /* initial delay */
int rc;
int options = WNOHANG;
int save_timeout_ms = timeout_ms;
bool killed_pg = false;
if (timeout_ms <= 0 || timeout_ms == NO_VAL16)
options = 0;
timeout_ms -= elapsed_ms;
while ((rc = waitpid (pid, pstatus, options)) <= 0) {
if (rc < 0) {
if (errno == EINTR)
continue;
error("%s: waitpid(%d): %m", __func__, pid);
return -1;
} else if (command_shutdown) {
error("%s: killing %s on shutdown",
__func__, name);
_kill_pg(pid);
killed_pg = true;
options = 0;
} else if (tid && track_script_killed(tid, 0, false)) {
/*
* Pass zero as the status to track_script_killed() to
* know if this script exists in track_script and bail
* if it does not.
*/
_kill_pg(pid);
killed_pg = true;
options = 0;
} else if (timeout_ms <= 0) {
error("%s%stimeout after %d ms: killing pgid %d",
name != NULL ? name : "",
name != NULL ? ": " : "",
save_timeout_ms, pid);
_kill_pg(pid);
killed_pg = true;
options = 0;
if (timed_out)
*timed_out = true;
} else {
(void) poll(NULL, 0, delay);
timeout_ms -= delay;
delay = MIN (timeout_ms, MIN(max_delay, delay*2));
}
}
if (!killed_pg)
_kill_pg(pid); /* kill children too */
return rc;
}