| /*****************************************************************************\ |
| * run_command.c - run a command asynchronously and return output |
| ***************************************************************************** |
| * Copyright (C) 2014-2017 SchedMD LLC. |
| * Written by Morris Jette <jette@schedmd.com> |
| * |
| * This file is part of Slurm, a resource management program. |
| * For details, see <https://slurm.schedmd.com/>. |
| * Please also read the included file: DISCLAIMER. |
| * |
| * Slurm is free software; you can redistribute it and/or modify it under |
| * the terms of the GNU General Public License as published by the Free |
| * Software Foundation; either version 2 of the License, or (at your option) |
| * any later version. |
| * |
| * In addition, as a special exception, the copyright holders give permission |
| * to link the code of portions of this program with the OpenSSL library under |
| * certain conditions as described in each individual source file, and |
| * distribute linked combinations including the two. You must obey the GNU |
| * General Public License in all respects for all of the code used other than |
| * OpenSSL. If you modify file(s) with this exception, you may extend this |
| * exception to your version of the file(s), but you are not obligated to do |
| * so. If you do not wish to do so, delete this exception statement from your |
| * version. If you delete this exception statement from all source files in |
| * the program, then also delete it here. |
| * |
| * Slurm is distributed in the hope that it will be useful, but WITHOUT ANY |
| * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS |
| * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more |
| * details. |
| * |
| * You should have received a copy of the GNU General Public License along |
| * with Slurm; if not, write to the Free Software Foundation, Inc., |
| * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. |
| \*****************************************************************************/ |
| |
| #include "config.h" |
| |
| #define _GNU_SOURCE /* For POLLRDHUP */ |
| #include <fcntl.h> |
| #include <poll.h> |
| #include <signal.h> |
| #include <stdlib.h> |
| #include <sys/stat.h> |
| #include <sys/types.h> |
| #include <sys/wait.h> |
| #include <unistd.h> |
| #include <inttypes.h> /* for uint16_t, uint32_t definitions */ |
| |
| #if defined(__APPLE__) || defined(__FreeBSD__) || defined(__NetBSD__) |
| #define POLLRDHUP POLLHUP |
| #endif |
| |
| #include "src/common/macros.h" |
| #include "src/common/timers.h" |
| #include "src/common/xmalloc.h" |
| #include "src/common/xstring.h" |
| #include "src/common/list.h" |
| #include "src/common/run_command.h" |
| |
| static int shutdown = 0; |
| static int child_proc_count = 0; |
| static pthread_mutex_t proc_count_mutex = PTHREAD_MUTEX_INITIALIZER; |
| |
| #define MAX_POLL_WAIT 500 |
| |
| /* used to initialize run_command module */ |
| extern void run_command_init(void) |
| { |
| shutdown = 0; |
| } |
| |
| /* used to terminate any outstanding commands */ |
| extern void run_command_shutdown(void) |
| { |
| shutdown = 1; |
| } |
| |
| /* Return count of child processes */ |
| extern int run_command_count(void) |
| { |
| int cnt; |
| |
| slurm_mutex_lock(&proc_count_mutex); |
| cnt = child_proc_count; |
| slurm_mutex_unlock(&proc_count_mutex); |
| |
| return cnt; |
| } |
| |
| |
| static int _tot_wait (struct timeval *start_time) |
| { |
| struct timeval end_time; |
| int msec_delay; |
| |
| gettimeofday(&end_time, NULL); |
| msec_delay = (end_time.tv_sec - start_time->tv_sec ) * 1000; |
| msec_delay += ((end_time.tv_usec - start_time->tv_usec + 500) / 1000); |
| return msec_delay; |
| } |
| |
| /* Execute a script, wait for termination and return its stdout. |
| * script_type IN - Type of program being run (e.g. "StartStageIn") |
| * script_path IN - Fully qualified pathname of the program to execute |
| * script_args IN - Arguments to the script |
| * max_wait IN - Maximum time to wait in milliseconds, |
| * -1 for no limit (asynchronous) |
| * tid IN - thread we are called from |
| * status OUT - Job exit code |
| * Return stdout+stderr of spawned program, value must be xfreed. */ |
| extern char *run_command(char *script_type, char *script_path, |
| char **script_argv, int max_wait, |
| pthread_t tid, int *status) |
| { |
| int i, new_wait, resp_size = 0, resp_offset = 0; |
| pid_t cpid; |
| char *resp = NULL; |
| int pfd[2] = { -1, -1 }; |
| |
| if ((script_path == NULL) || (script_path[0] == '\0')) { |
| error("%s: no script specified", __func__); |
| *status = 127; |
| resp = xstrdup("Run command failed - configuration error"); |
| return resp; |
| } |
| if (script_path[0] != '/') { |
| error("%s: %s is not fully qualified pathname (%s)", |
| __func__, script_type, script_path); |
| *status = 127; |
| resp = xstrdup("Run command failed - configuration error"); |
| return resp; |
| } |
| if (access(script_path, R_OK | X_OK) < 0) { |
| error("%s: %s can not be executed (%s) %m", |
| __func__, script_type, script_path); |
| *status = 127; |
| resp = xstrdup("Run command failed - configuration error"); |
| return resp; |
| } |
| if (max_wait != -1) { |
| if (pipe(pfd) != 0) { |
| error("%s: pipe(): %m", __func__); |
| *status = 127; |
| resp = xstrdup("System error"); |
| return resp; |
| } |
| } |
| slurm_mutex_lock(&proc_count_mutex); |
| child_proc_count++; |
| slurm_mutex_unlock(&proc_count_mutex); |
| if ((cpid = fork()) == 0) { |
| int cc; |
| |
| cc = sysconf(_SC_OPEN_MAX); |
| if (max_wait != -1) { |
| dup2(pfd[1], STDERR_FILENO); |
| dup2(pfd[1], STDOUT_FILENO); |
| for (i = 0; i < cc; i++) { |
| if ((i != STDERR_FILENO) && |
| (i != STDOUT_FILENO)) |
| close(i); |
| } |
| } else { |
| for (i = 0; i < cc; i++) |
| close(i); |
| if ((cpid = fork()) < 0) |
| _exit(127); |
| else if (cpid > 0) |
| _exit(0); |
| } |
| setpgid(0, 0); |
| execv(script_path, script_argv); |
| error("%s: execv(%s): %m", __func__, script_path); |
| _exit(127); |
| } else if (cpid < 0) { |
| if (max_wait != -1) { |
| close(pfd[0]); |
| close(pfd[1]); |
| } |
| error("%s: fork(): %m", __func__); |
| slurm_mutex_lock(&proc_count_mutex); |
| child_proc_count--; |
| slurm_mutex_unlock(&proc_count_mutex); |
| } else if (max_wait != -1) { |
| struct pollfd fds; |
| struct timeval tstart; |
| resp_size = 1024; |
| resp = xmalloc(resp_size); |
| close(pfd[1]); |
| gettimeofday(&tstart, NULL); |
| if (tid) |
| track_script_reset_cpid(tid, cpid); |
| while (1) { |
| if (shutdown) { |
| error("%s: killing %s operation on shutdown", |
| __func__, script_type); |
| break; |
| } |
| fds.fd = pfd[0]; |
| fds.events = POLLIN | POLLHUP | POLLRDHUP; |
| fds.revents = 0; |
| if (max_wait <= 0) { |
| new_wait = MAX_POLL_WAIT; |
| } else { |
| new_wait = max_wait - _tot_wait(&tstart); |
| if (new_wait <= 0) { |
| error("%s: %s poll timeout @ %d msec", |
| __func__, script_type, max_wait); |
| break; |
| } |
| new_wait = MIN(new_wait, MAX_POLL_WAIT); |
| } |
| i = poll(&fds, 1, new_wait); |
| if (i == 0) { |
| continue; |
| } else if (i < 0) { |
| error("%s: %s poll:%m", __func__, script_type); |
| break; |
| } |
| if ((fds.revents & POLLIN) == 0) |
| break; |
| i = read(pfd[0], resp + resp_offset, |
| resp_size - resp_offset); |
| if (i == 0) { |
| break; |
| } else if (i < 0) { |
| if (errno == EAGAIN) |
| continue; |
| error("%s: read(%s): %m", __func__, |
| script_path); |
| break; |
| } else { |
| resp_offset += i; |
| if (resp_offset + 1024 >= resp_size) { |
| resp_size *= 2; |
| resp = xrealloc(resp, resp_size); |
| } |
| } |
| } |
| killpg(cpid, SIGTERM); |
| usleep(10000); |
| killpg(cpid, SIGKILL); |
| waitpid(cpid, status, 0); |
| close(pfd[0]); |
| slurm_mutex_lock(&proc_count_mutex); |
| child_proc_count--; |
| slurm_mutex_unlock(&proc_count_mutex); |
| } else { |
| if (tid) |
| track_script_reset_cpid(tid, cpid); |
| waitpid(cpid, status, 0); |
| } |
| |
| return resp; |
| } |
| |
| extern void free_command_argv(char **script_argv) |
| { |
| int i; |
| |
| for (i = 0; script_argv[i]; i++) |
| xfree(script_argv[i]); |
| xfree(script_argv); |
| } |