| /*****************************************************************************\ |
| * src/common/env.c - add an environment variable to environment vector |
| ***************************************************************************** |
| * Copyright (C) 2002-2007 The Regents of the University of California. |
| * Copyright (C) 2008-2009 Lawrence Livermore National Security. |
| * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). |
| * Written by Mark Grondona <mgrondona@llnl.gov>, Danny Auble <da@llnl.gov>. |
| * CODE-OCEC-09-009. All rights reserved. |
| * |
| * This file is part of Slurm, a resource management program. |
| * For details, see <https://slurm.schedmd.com/>. |
| * Please also read the included file: DISCLAIMER. |
| * |
| * Slurm is free software; you can redistribute it and/or modify it under |
| * the terms of the GNU General Public License as published by the Free |
| * Software Foundation; either version 2 of the License, or (at your option) |
| * any later version. |
| * |
| * In addition, as a special exception, the copyright holders give permission |
| * to link the code of portions of this program with the OpenSSL library under |
| * certain conditions as described in each individual source file, and |
| * distribute linked combinations including the two. You must obey the GNU |
| * General Public License in all respects for all of the code used other than |
| * OpenSSL. If you modify file(s) with this exception, you may extend this |
| * exception to your version of the file(s), but you are not obligated to do |
| * so. If you do not wish to do so, delete this exception statement from your |
| * version. If you delete this exception statement from all source files in |
| * the program, then also delete it here. |
| * |
| * Slurm is distributed in the hope that it will be useful, but WITHOUT ANY |
| * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS |
| * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more |
| * details. |
| * |
| * You should have received a copy of the GNU General Public License along |
| * with Slurm; if not, write to the Free Software Foundation, Inc., |
| * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. |
| \*****************************************************************************/ |
| |
| #include "config.h" |
| |
| #define _GNU_SOURCE /* For clone */ |
| #include <fcntl.h> |
| #include <limits.h> |
| #include <poll.h> |
| #include <signal.h> |
| #include <stdarg.h> |
| #include <stdio.h> |
| #include <stdlib.h> |
| #include <string.h> |
| #include <sys/mman.h> |
| #include <sys/mount.h> |
| #include <sys/stat.h> |
| #include <sys/types.h> |
| #include <unistd.h> |
| |
| #include "slurm/slurm.h" |
| #include "src/common/cpu_frequency.h" |
| #include "src/common/log.h" |
| #include "src/common/env.h" |
| #include "src/common/fd.h" |
| #include "src/common/macros.h" |
| #include "src/common/proc_args.h" |
| #include "src/common/read_config.h" |
| #include "src/interfaces/select.h" |
| #include "src/common/slurm_opt.h" |
| #include "src/common/slurm_protocol_api.h" |
| #include "src/common/slurm_protocol_defs.h" |
| #include "src/common/slurm_step_layout.h" |
| #include "src/common/slurmdb_defs.h" |
| #include "src/common/spank.h" |
| #include "src/common/strlcpy.h" |
| #include "src/common/xassert.h" |
| #include "src/common/xmalloc.h" |
| #include "src/common/xstring.h" |
| |
| /* |
| * Define slurm-specific aliases for use by plugins, see slurm_xlator.h |
| * for details. |
| */ |
| strong_alias(setenvf, slurm_setenvpf); |
| strong_alias(unsetenvp, slurm_unsetenvp); |
| strong_alias(getenvp, slurm_getenvp); |
| strong_alias(env_array_create, slurm_env_array_create); |
| strong_alias(env_array_merge, slurm_env_array_merge); |
| strong_alias(env_array_copy, slurm_env_array_copy); |
| strong_alias(env_array_free, slurm_env_array_free); |
| strong_alias(env_array_append, slurm_env_array_append); |
| strong_alias(env_array_append_fmt, slurm_env_array_append_fmt); |
| strong_alias(env_array_overwrite, slurm_env_array_overwrite); |
| strong_alias(env_array_overwrite_fmt, slurm_env_array_overwrite_fmt); |
| strong_alias(env_array_overwrite_het_fmt, slurm_env_array_overwrite_het_fmt); |
| strong_alias(env_unset_environment, slurm_env_unset_environment); |
| |
| #define ENV_BUFSIZE (256 * 1024) |
| #define MAX_ENV_STRLEN (32 * 4096) /* Needed for CPU_BIND and MEM_BIND on |
| * SGI systems with huge CPU counts */ |
| typedef struct { |
| char *cmdstr; |
| int *fildes; |
| bool perform_mount; |
| int rlimit; |
| char **tmp_env; |
| const char *username; |
| } child_args_t; |
| |
| /* |
| * Return pointer to `name' entry in environment if found, or |
| * pointer to the last entry (i.e. NULL) if `name' is not |
| * currently set in `env' |
| * |
| */ |
| static char ** |
| _find_name_in_env(char **env, const char *name) |
| { |
| char **ep; |
| |
| ep = env; |
| while (*ep != NULL) { |
| size_t cnt = 0; |
| |
| while ( ((*ep)[cnt] == name[cnt]) |
| && ( name[cnt] != '\0') |
| && ((*ep)[cnt] != '\0') ) |
| ++cnt; |
| |
| if (name[cnt] == '\0' && (*ep)[cnt] == '=') { |
| break; |
| } else |
| ++ep; |
| } |
| |
| return (ep); |
| } |
| |
| /* |
| * Extend memory allocation for env by 1 entry. Make last entry == NULL. |
| * return pointer to last env entry; |
| */ |
| static char ** |
| _extend_env(char ***envp) |
| { |
| char **ep; |
| size_t newcnt = PTR_ARRAY_SIZE(*envp) + 1; |
| |
| *envp = xrealloc (*envp, newcnt * sizeof (char *)); |
| |
| (*envp)[newcnt - 1] = NULL; |
| ep = &((*envp)[newcnt - 2]); |
| |
| /* |
| * Find last non-NULL entry |
| */ |
| while (*ep == NULL) |
| --ep; |
| |
| return (++ep); |
| } |
| |
| /* return true if the environment variables should not be set for |
| * srun's --get-user-env option */ |
| static bool _discard_env(char *name, char *value) |
| { |
| if ((xstrcmp(name, "DISPLAY") == 0) || |
| (xstrcmp(name, "ENVIRONMENT") == 0) || |
| (xstrcmp(name, "HOSTNAME") == 0)) |
| return true; |
| |
| return false; |
| } |
| |
| /* |
| * Return the number of elements in the environment `env' |
| */ |
| int |
| envcount (char **env) |
| { |
| int envc = 0; |
| while (env && env[envc]) |
| envc++; |
| return (envc); |
| } |
| |
| /* |
| * _setenvfs() (stolen from pdsh) |
| * |
| * Set a variable in the callers environment. Args are printf style. |
| * XXX Space is allocated on the heap and will never be reclaimed. |
| * Example: setenvfs("RMS_RANK=%d", rank); |
| */ |
| int |
| setenvfs(const char *fmt, ...) |
| { |
| va_list ap; |
| char *buf, *bufcpy, *loc; |
| int rc, size; |
| |
| buf = xmalloc(ENV_BUFSIZE); |
| va_start(ap, fmt); |
| vsnprintf(buf, ENV_BUFSIZE, fmt, ap); |
| va_end(ap); |
| |
| size = strlen(buf); |
| bufcpy = xstrdup(buf); |
| xfree(buf); |
| |
| if (size >= MAX_ENV_STRLEN) { |
| if ((loc = strchr(bufcpy, '='))) |
| loc[0] = '\0'; |
| error("environment variable %s is too long", bufcpy); |
| xfree(bufcpy); |
| rc = ENOMEM; |
| } else { |
| rc = putenv(bufcpy); |
| } |
| |
| return rc; |
| } |
| |
| int setenvf(char ***envp, const char *name, const char *fmt, ...) |
| { |
| char *value; |
| va_list ap; |
| int size, rc; |
| |
| if (!name || name[0] == '\0') |
| return EINVAL; |
| |
| value = xmalloc(ENV_BUFSIZE); |
| va_start(ap, fmt); |
| vsnprintf(value, ENV_BUFSIZE, fmt, ap); |
| va_end(ap); |
| |
| size = strlen(name) + strlen(value) + 2; |
| if (size >= MAX_ENV_STRLEN) { |
| error("environment variable %s is too long", name); |
| return ENOMEM; |
| } |
| |
| if (envp && *envp) { |
| if (env_array_overwrite(envp, name, value) == 1) |
| rc = 0; |
| else |
| rc = 1; |
| } else { |
| rc = setenv(name, value, 1); |
| } |
| |
| xfree(value); |
| return rc; |
| } |
| |
| /* |
| * Remove environment variable `name' from "environment" |
| * contained in `env' |
| * |
| * [ This was taken almost verbatim from glibc's |
| * unsetenv() code. ] |
| */ |
| void unsetenvp(char **env, const char *name) |
| { |
| char **ep; |
| |
| if (env == NULL) |
| return; |
| |
| ep = env; |
| while ((ep = _find_name_in_env (ep, name)) && (*ep != NULL)) { |
| char **dp = ep; |
| xfree (*ep); |
| do |
| dp[0] = dp[1]; |
| while (*dp++); |
| |
| /* Continue loop in case `name' appears again. */ |
| ++ep; |
| } |
| return; |
| } |
| |
| char *getenvp(char **env, const char *name) |
| { |
| size_t len; |
| char **ep; |
| |
| if (!name || !env || !env[0]) |
| return (NULL); |
| |
| len = strlen(name); |
| ep = _find_name_in_env (env, name); |
| |
| if (*ep != NULL) |
| return (&(*ep)[len+1]); |
| |
| return NULL; |
| } |
| |
| int setup_env(env_t *env, bool preserve_env) |
| { |
| int rc = SLURM_SUCCESS; |
| char *dist = NULL; |
| char addrbuf[INET6_ADDRSTRLEN]; |
| |
| if (env == NULL) |
| return SLURM_ERROR; |
| |
| /* |
| * Always force SLURM_CONF into the environment. This ensures the |
| * "configless" operation is working, and prevents the client commands |
| * from falling back to separate RPC requests in case the cache dir |
| * is unresponsive. |
| */ |
| if (setenvf(&env->env, "SLURM_CONF", "%s", getenv("SLURM_CONF"))) { |
| error("Unable to set SLURM_CONF environment variable"); |
| rc = SLURM_ERROR; |
| } |
| /* |
| * Similarly, prevent this option from leaking in. SLURM_CONF would |
| * always take precedence, but tidy it up anyways. |
| */ |
| unsetenvp(env->env, "SLURM_CONF_SERVER"); |
| |
| if (!preserve_env && env->ntasks) { |
| if (setenvf(&env->env, "SLURM_NTASKS", "%d", env->ntasks)) { |
| error("Unable to set SLURM_NTASKS environment variable"); |
| rc = SLURM_ERROR; |
| } |
| if (setenvf(&env->env, "SLURM_NPROCS", "%d", env->ntasks)) { |
| error("Unable to set SLURM_NPROCS environment variable"); |
| rc = SLURM_ERROR; |
| } |
| } |
| |
| if (env->cpus_per_task && |
| setenvf(&env->env, "SLURM_CPUS_PER_TASK", "%d", |
| env->cpus_per_task) ) { |
| error("Unable to set SLURM_CPUS_PER_TASK"); |
| rc = SLURM_ERROR; |
| } |
| |
| if (env->ntasks_per_gpu && |
| setenvf(&env->env, "SLURM_NTASKS_PER_GPU", "%d", |
| env->ntasks_per_gpu)) { |
| error("Unable to set SLURM_NTASKS_PER_GPU"); |
| rc = SLURM_ERROR; |
| } |
| |
| if (env->ntasks_per_node |
| && setenvf(&env->env, "SLURM_NTASKS_PER_NODE", "%d", |
| env->ntasks_per_node) ) { |
| error("Unable to set SLURM_NTASKS_PER_NODE"); |
| rc = SLURM_ERROR; |
| } |
| |
| if (env->ntasks_per_socket |
| && setenvf(&env->env, "SLURM_NTASKS_PER_SOCKET", "%d", |
| env->ntasks_per_socket) ) { |
| error("Unable to set SLURM_NTASKS_PER_SOCKET"); |
| rc = SLURM_ERROR; |
| } |
| |
| if (env->ntasks_per_core |
| && setenvf(&env->env, "SLURM_NTASKS_PER_CORE", "%d", |
| env->ntasks_per_core) ) { |
| error("Unable to set SLURM_NTASKS_PER_CORE"); |
| rc = SLURM_ERROR; |
| } |
| |
| if (env->ntasks_per_tres |
| && setenvf(&env->env, "SLURM_NTASKS_PER_TRES", "%d", |
| env->ntasks_per_tres) ) { |
| error("Unable to set SLURM_NTASKS_PER_TRES"); |
| rc = SLURM_ERROR; |
| } |
| |
| if (env->cpus_on_node |
| && setenvf(&env->env, "SLURM_CPUS_ON_NODE", "%d", |
| env->cpus_on_node) ) { |
| error("Unable to set SLURM_CPUS_ON_NODE"); |
| rc = SLURM_ERROR; |
| } |
| |
| set_distribution(env->distribution, &dist); |
| if (dist) { |
| if (setenvf(&env->env, "SLURM_DISTRIBUTION", "%s", dist)) { |
| error("Can't set SLURM_DISTRIBUTION env variable"); |
| rc = SLURM_ERROR; |
| } |
| xfree(dist); |
| } |
| |
| if ((env->distribution & SLURM_DIST_STATE_BASE) == SLURM_DIST_PLANE) |
| if (setenvf(&env->env, "SLURM_DIST_PLANESIZE", "%u", |
| env->plane_size)) { |
| error("Can't set SLURM_DIST_PLANESIZE env variable"); |
| rc = SLURM_ERROR; |
| } |
| |
| if (env->cpu_bind_type && !env->batch_flag && |
| (env->stepid != SLURM_INTERACTIVE_STEP)) { |
| char *str_verbose, *str_bind1 = NULL, *str_bind2 = NULL; |
| char *str_bind_list, *str_bind_type = NULL, *str_bind = NULL; |
| bool append_cpu_bind = false; |
| |
| unsetenvp(env->env, "SLURM_CPU_BIND"); |
| unsetenvp(env->env, "SLURM_CPU_BIND_LIST"); |
| unsetenvp(env->env, "SLURM_CPU_BIND_TYPE"); |
| unsetenvp(env->env, "SLURM_CPU_BIND_VERBOSE"); |
| |
| if (env->cpu_bind_type & CPU_BIND_VERBOSE) |
| str_verbose = "verbose"; |
| else |
| str_verbose = "quiet"; |
| |
| if (env->cpu_bind_type & CPU_BIND_TO_THREADS) { |
| str_bind1 = "threads"; |
| } else if (env->cpu_bind_type & CPU_BIND_TO_CORES) { |
| str_bind1 = "cores"; |
| } else if (env->cpu_bind_type & CPU_BIND_TO_SOCKETS) { |
| str_bind1 = "sockets"; |
| } else if (env->cpu_bind_type & CPU_BIND_TO_LDOMS) { |
| str_bind1 = "ldoms"; |
| } |
| |
| if (env->cpu_bind_type & CPU_BIND_NONE) { |
| str_bind2 = "none"; |
| } else if (env->cpu_bind_type & CPU_BIND_MAP) { |
| str_bind2 = "map_cpu:"; |
| append_cpu_bind = true; |
| } else if (env->cpu_bind_type & CPU_BIND_MASK) { |
| str_bind2 = "mask_cpu:"; |
| append_cpu_bind = true; |
| } else if (env->cpu_bind_type & CPU_BIND_LDRANK) { |
| str_bind2 = "rank_ldom"; |
| } else if (env->cpu_bind_type & CPU_BIND_LDMAP) { |
| str_bind2 = "map_ldom:"; |
| append_cpu_bind = true; |
| } else if (env->cpu_bind_type & CPU_BIND_LDMASK) { |
| str_bind2 = "mask_ldom:"; |
| append_cpu_bind = true; |
| } |
| |
| if (env->cpu_bind && append_cpu_bind) |
| str_bind_list = env->cpu_bind; |
| else |
| str_bind_list = ""; |
| |
| /* combine first and second part with a comma if needed */ |
| if (str_bind1) |
| xstrcat(str_bind_type, str_bind1); |
| if (str_bind1 && str_bind2) |
| xstrcatchar(str_bind_type, ','); |
| if (str_bind2) |
| xstrcat(str_bind_type, str_bind2); |
| |
| xstrcat(str_bind, str_verbose); |
| if (str_bind_type) { |
| xstrcatchar(str_bind, ','); |
| xstrcat(str_bind, str_bind_type); |
| xstrcat(str_bind, str_bind_list); |
| } else |
| str_bind_type = xstrdup(""); |
| |
| /* |
| * Don't set SLURM_CPU_BIND or SLURM_CPU_BIND_LIST in the |
| * environment if they are too long. These are informational |
| * for the user and don't merit an error in the log if they |
| * can't be set, so avoid calling setenvf(). |
| */ |
| if (strlen(str_bind) >= MAX_ENV_STRLEN) |
| debug("Not setting SLURM_CPU_BIND: value too long"); |
| else if (setenvf(&env->env, "SLURM_CPU_BIND", "%s", str_bind)) { |
| error("Unable to set SLURM_CPU_BIND"); |
| rc = SLURM_ERROR; |
| } |
| if (strlen(str_bind_list) >= MAX_ENV_STRLEN) |
| debug("Not setting SLURM_CPU_BIND_LIST: value too long"); |
| else if (setenvf(&env->env, "SLURM_CPU_BIND_LIST", "%s", |
| str_bind_list)) { |
| error("Unable to set SLURM_CPU_BIND_LIST"); |
| rc = SLURM_ERROR; |
| } |
| if (setenvf(&env->env, "SLURM_CPU_BIND_TYPE", "%s", |
| str_bind_type)) { |
| error("Unable to set SLURM_CPU_BIND_TYPE"); |
| rc = SLURM_ERROR; |
| } |
| if (setenvf(&env->env, "SLURM_CPU_BIND_VERBOSE", "%s", |
| str_verbose)) { |
| error("Unable to set SLURM_CPU_BIND_VERBOSE"); |
| rc = SLURM_ERROR; |
| } |
| |
| xfree(str_bind); |
| xfree(str_bind_type); |
| } |
| |
| if (env->mem_bind_type && (env->stepid != SLURM_INTERACTIVE_STEP)) { |
| char *str_verbose, *str_bind_type = NULL, *str_bind_list; |
| char *str_prefer = NULL, *str_bind = NULL; |
| char *str_bind_sort = NULL; |
| |
| if (env->batch_flag) { |
| unsetenvp(env->env, "SBATCH_MEM_BIND"); |
| unsetenvp(env->env, "SBATCH_MEM_BIND_LIST"); |
| unsetenvp(env->env, "SBATCH_MEM_BIND_PREFER"); |
| unsetenvp(env->env, "SBATCH_MEM_BIND_TYPE"); |
| unsetenvp(env->env, "SBATCH_MEM_BIND_VERBOSE"); |
| } else { |
| unsetenvp(env->env, "SLURM_MEM_BIND"); |
| unsetenvp(env->env, "SLURM_MEM_BIND_LIST"); |
| unsetenvp(env->env, "SLURM_MEM_BIND_PREFER"); |
| unsetenvp(env->env, "SLURM_MEM_BIND_SORT"); |
| unsetenvp(env->env, "SLURM_MEM_BIND_TYPE"); |
| unsetenvp(env->env, "SLURM_MEM_BIND_VERBOSE"); |
| } |
| |
| if (env->mem_bind_type & MEM_BIND_VERBOSE) |
| str_verbose = "verbose"; |
| else |
| str_verbose = "quiet"; |
| if (env->mem_bind_type & MEM_BIND_PREFER) |
| str_prefer = "prefer"; |
| if (env->mem_bind_type & MEM_BIND_NONE) { |
| str_bind_type = "none"; |
| } else if (env->mem_bind_type & MEM_BIND_RANK) { |
| str_bind_type = "rank"; |
| } else if (env->mem_bind_type & MEM_BIND_MAP) { |
| str_bind_type = "map_mem:"; |
| } else if (env->mem_bind_type & MEM_BIND_MASK) { |
| str_bind_type = "mask_mem:"; |
| } else if (env->mem_bind_type & MEM_BIND_LOCAL) { |
| str_bind_type = "local"; |
| } |
| |
| if (env->mem_bind_type & MEM_BIND_SORT) |
| str_bind_sort = "sort"; |
| |
| if (env->mem_bind) |
| str_bind_list = env->mem_bind; |
| else |
| str_bind_list = ""; |
| |
| xstrcat(str_bind, str_verbose); |
| if (str_prefer) { |
| xstrcatchar(str_bind, ','); |
| xstrcat(str_bind, str_prefer); |
| } |
| if (str_bind_type) { |
| xstrcatchar(str_bind, ','); |
| xstrcat(str_bind, str_bind_type); |
| xstrcat(str_bind, str_bind_list); |
| } else |
| str_bind_type = ""; |
| |
| if (env->batch_flag) { |
| if (setenvf(&env->env, "SBATCH_MEM_BIND", "%s", str_bind)) { |
| error("Unable to set SBATCH_MEM_BIND"); |
| rc = SLURM_ERROR; |
| } |
| if (setenvf(&env->env, "SBATCH_MEM_BIND_LIST", "%s", |
| str_bind_list)) { |
| error("Unable to set SBATCH_MEM_BIND_LIST"); |
| rc = SLURM_ERROR; |
| } |
| if (str_prefer && |
| setenvf(&env->env, "SBATCH_MEM_BIND_PREFER", "%s", |
| str_prefer)) { |
| error("Unable to set SBATCH_MEM_BIND_PREFER"); |
| rc = SLURM_ERROR; |
| } |
| if (str_bind_sort && |
| setenvf(&env->env, "SBATCH_MEM_BIND_SORT", "%s", |
| str_bind_sort)) { |
| error("Unable to set SBATCH_MEM_BIND_SORT"); |
| rc = SLURM_ERROR; |
| } |
| if (setenvf(&env->env, "SBATCH_MEM_BIND_TYPE", "%s", |
| str_bind_type)) { |
| error("Unable to set SBATCH_MEM_BIND_TYPE"); |
| rc = SLURM_ERROR; |
| } |
| if (setenvf(&env->env, "SBATCH_MEM_BIND_VERBOSE", "%s", |
| str_verbose)) { |
| error("Unable to set SBATCH_MEM_BIND_VERBOSE"); |
| rc = SLURM_ERROR; |
| } |
| } else { |
| if (setenvf(&env->env, "SLURM_MEM_BIND", "%s", str_bind)) { |
| error("Unable to set SLURM_MEM_BIND"); |
| rc = SLURM_ERROR; |
| } |
| if (setenvf(&env->env, "SLURM_MEM_BIND_LIST", "%s", |
| str_bind_list)) { |
| error("Unable to set SLURM_MEM_BIND_LIST"); |
| rc = SLURM_ERROR; |
| } |
| if (str_prefer && |
| setenvf(&env->env, "SLURM_MEM_BIND_PREFER", "%s", |
| str_prefer)) { |
| error("Unable to set SLURM_MEM_BIND_PREFER"); |
| rc = SLURM_ERROR; |
| } |
| if (str_bind_sort && |
| setenvf(&env->env, "SLURM_MEM_BIND_SORT", "%s", |
| str_bind_sort)) { |
| error("Unable to set SLURM_MEM_BIND_SORT"); |
| rc = SLURM_ERROR; |
| } |
| if (setenvf(&env->env, "SLURM_MEM_BIND_TYPE", "%s", |
| str_bind_type)) { |
| error("Unable to set SLURM_MEM_BIND_TYPE"); |
| rc = SLURM_ERROR; |
| } |
| if (setenvf(&env->env, "SLURM_MEM_BIND_VERBOSE", "%s", |
| str_verbose)) { |
| error("Unable to set SLURM_MEM_BIND_VERBOSE"); |
| rc = SLURM_ERROR; |
| } |
| } |
| |
| xfree(str_bind); |
| } |
| |
| if (cpu_freq_set_env("SLURM_CPU_FREQ_REQ", env->cpu_freq_min, |
| env->cpu_freq_max, env->cpu_freq_gov) != SLURM_SUCCESS) |
| rc = SLURM_ERROR; |
| |
| if (env->overcommit |
| && (setenvf(&env->env, "SLURM_OVERCOMMIT", "%s", "1"))) { |
| error("Unable to set SLURM_OVERCOMMIT environment variable"); |
| rc = SLURM_ERROR; |
| } |
| |
| if (env->oom_kill_step != NO_VAL16 && |
| setenvf(&env->env, "SLURM_OOM_KILL_STEP", "%u", env->oom_kill_step)) { |
| error("Unable to set SLURM_OOM_KILL_STEP environment"); |
| rc = SLURM_ERROR; |
| } |
| |
| if (env->slurmd_debug |
| && setenvf(&env->env, "SLURMD_DEBUG", "%d", env->slurmd_debug)) { |
| error("Can't set SLURMD_DEBUG environment variable"); |
| rc = SLURM_ERROR; |
| } |
| |
| if (env->labelio |
| && setenvf(&env->env, "SLURM_LABELIO", "1")) { |
| error("Unable to set SLURM_LABELIO environment variable"); |
| rc = SLURM_ERROR; |
| } |
| |
| if (env->job_end_time) { |
| if (setenvf(&env->env, "SLURM_JOB_END_TIME", "%lu", |
| env->job_end_time)) { |
| error("Unable to set SLURM_JOB_END_TIME environment variable"); |
| rc = SLURM_ERROR; |
| } |
| } |
| |
| if (env->jobid >= 0) { |
| if (setenvf(&env->env, "SLURM_JOB_ID", "%d", env->jobid)) { |
| error("Unable to set SLURM_JOB_ID environment"); |
| rc = SLURM_ERROR; |
| } |
| /* and for backwards compatibility... */ |
| if (setenvf(&env->env, "SLURM_JOBID", "%d", env->jobid)) { |
| error("Unable to set SLURM_JOBID environment"); |
| rc = SLURM_ERROR; |
| } |
| } |
| |
| if (env->job_licenses) { |
| if (setenvf(&env->env, "SLURM_JOB_LICENSES", "%s", |
| env->job_licenses)) { |
| error("Unable to set SLURM_JOB_LICENSES environment"); |
| rc = SLURM_ERROR; |
| } |
| } |
| |
| if (env->job_name) { |
| if (setenvf(&env->env, "SLURM_JOB_NAME", "%s", env->job_name)) { |
| error("Unable to set SLURM_JOB_NAME environment"); |
| rc = SLURM_ERROR; |
| } |
| } |
| |
| if (env->job_start_time) { |
| if (setenvf(&env->env, "SLURM_JOB_START_TIME", "%lu", |
| env->job_start_time)) { |
| error("Unable to set SLURM_JOB_START_TIME environment"); |
| rc = SLURM_ERROR; |
| } |
| } |
| |
| /* |
| * These aren't relevant to a system not using Slurm as the |
| * launcher. Since there isn't a flag for that we check for |
| * the flags we do have. |
| */ |
| if (env->task_pid && |
| setenvf(&env->env, "SLURM_TASK_PID", "%d", |
| (int)env->task_pid)) { |
| error("Unable to set SLURM_TASK_PID environment " |
| "variable"); |
| rc = SLURM_ERROR; |
| } |
| if ((env->nodeid >= 0) && |
| setenvf(&env->env, "SLURM_NODEID", "%d", env->nodeid)) { |
| error("Unable to set SLURM_NODEID environment"); |
| rc = SLURM_ERROR; |
| } |
| |
| if ((env->procid >= 0) && |
| setenvf(&env->env, "SLURM_PROCID", "%d", env->procid)) { |
| error("Unable to set SLURM_PROCID environment"); |
| rc = SLURM_ERROR; |
| } |
| |
| if ((env->localid >= 0) && |
| setenvf(&env->env, "SLURM_LOCALID", "%d", env->localid)) { |
| error("Unable to set SLURM_LOCALID environment"); |
| rc = SLURM_ERROR; |
| } |
| |
| if (env->stepid >= 0) { |
| if (setenvf(&env->env, "SLURM_STEP_ID", "%d", env->stepid)) { |
| error("Unable to set SLURM_STEP_ID environment"); |
| rc = SLURM_ERROR; |
| } |
| /* and for backwards compatibility... */ |
| if (setenvf(&env->env, "SLURM_STEPID", "%d", env->stepid)) { |
| error("Unable to set SLURM_STEPID environment"); |
| rc = SLURM_ERROR; |
| } |
| } |
| |
| if (!preserve_env && env->nhosts |
| && setenvf(&env->env, "SLURM_NNODES", "%d", env->nhosts)) { |
| error("Unable to set SLURM_NNODES environment var"); |
| rc = SLURM_ERROR; |
| } |
| |
| if (env->nhosts |
| && setenvf(&env->env, "SLURM_JOB_NUM_NODES", "%d", env->nhosts)) { |
| error("Unable to set SLURM_JOB_NUM_NODES environment var"); |
| rc = SLURM_ERROR; |
| } |
| |
| if (env->nodelist && |
| setenvf(&env->env, "SLURM_NODELIST", "%s", env->nodelist)) { |
| error("Unable to set SLURM_NODELIST environment var."); |
| rc = SLURM_ERROR; |
| } |
| |
| if (env->partition |
| && setenvf(&env->env, "SLURM_JOB_PARTITION", "%s", env->partition)) { |
| error("Unable to set SLURM_JOB_PARTITION environment var."); |
| rc = SLURM_ERROR; |
| } |
| |
| if (!preserve_env && env->task_count |
| && setenvf (&env->env, |
| "SLURM_TASKS_PER_NODE", "%s", env->task_count)) { |
| error ("Can't set SLURM_TASKS_PER_NODE env variable"); |
| rc = SLURM_ERROR; |
| } |
| |
| if (!preserve_env && env->threads_per_core && |
| setenvf(&env->env, "SLURM_THREADS_PER_CORE", "%d", |
| env->threads_per_core)) { |
| error("Can't set SLURM_THREADS_PER_CORE env variable"); |
| rc = SLURM_ERROR; |
| } |
| |
| if (env->comm_port |
| && setenvf (&env->env, "SLURM_SRUN_COMM_PORT", "%u", |
| env->comm_port)) { |
| error ("Can't set SLURM_SRUN_COMM_PORT env variable"); |
| rc = SLURM_ERROR; |
| } |
| |
| if (env->cli) { |
| slurm_get_ip_str(env->cli, addrbuf, INET6_ADDRSTRLEN); |
| setenvf(&env->env, "SLURM_LAUNCH_NODE_IPADDR", "%s", addrbuf); |
| } |
| |
| if (env->sgtids && |
| setenvf(&env->env, "SLURM_GTIDS", "%s", env->sgtids)) { |
| error("Unable to set SLURM_GTIDS environment variable"); |
| rc = SLURM_ERROR; |
| } |
| |
| if (env->pty_port |
| && setenvf(&env->env, "SLURM_PTY_PORT", "%hu", env->pty_port)) { |
| error("Can't set SLURM_PTY_PORT env variable"); |
| rc = SLURM_ERROR; |
| } |
| if (env->ws_col |
| && setenvf(&env->env, "SLURM_PTY_WIN_COL", "%hu", env->ws_col)) { |
| error("Can't set SLURM_PTY_WIN_COL env variable"); |
| rc = SLURM_ERROR; |
| } |
| if (env->ws_row |
| && setenvf(&env->env, "SLURM_PTY_WIN_ROW", "%hu", env->ws_row)) { |
| error("Can't set SLURM_PTY_WIN_ROW env variable"); |
| rc = SLURM_ERROR; |
| } |
| |
| if (env->restart_cnt && |
| setenvf(&env->env, "SLURM_RESTART_COUNT", "%u", env->restart_cnt)) { |
| error("Can't set SLURM_RESTART_COUNT env variable"); |
| rc = SLURM_ERROR; |
| } |
| |
| if (env->uid != SLURM_AUTH_NOBODY) { |
| if (setenvf(&env->env, "SLURM_JOB_UID", "%u", |
| (unsigned int) env->uid)) { |
| error("Can't set SLURM_JOB_UID env variable"); |
| rc = SLURM_ERROR; |
| } |
| } |
| |
| if (env->user_name) { |
| if (setenvf(&env->env, "SLURM_JOB_USER", "%s", env->user_name)){ |
| error("Can't set SLURM_JOB_USER env variable"); |
| rc = SLURM_ERROR; |
| } |
| } |
| |
| if (env->gid != SLURM_AUTH_NOBODY) { |
| if (setenvf(&env->env, "SLURM_JOB_GID", "%u", env->gid)) { |
| error("Can't set SLURM_JOB_GID env variable"); |
| rc = SLURM_ERROR; |
| } |
| } |
| |
| if (env->group_name) { |
| if (setenvf(&env->env, "SLURM_JOB_GROUP", "%s", |
| env->group_name)) { |
| error("Can't set SLURM_JOB_GROUP env variable"); |
| rc = SLURM_ERROR; |
| } |
| } |
| |
| if (env->account) { |
| if (setenvf(&env->env, |
| "SLURM_JOB_ACCOUNT", |
| "%s", |
| env->account)) { |
| error("%s: can't set SLURM_JOB_ACCOUNT env variable", |
| __func__); |
| rc = SLURM_ERROR; |
| } |
| } |
| if (env->qos) { |
| if (setenvf(&env->env, |
| "SLURM_JOB_QOS", |
| "%s", |
| env->qos)) { |
| error("%s: can't set SLURM_JOB_QOS env variable", |
| __func__); |
| rc = SLURM_ERROR; |
| } |
| } |
| if (env->resv_name) { |
| if (setenvf(&env->env, |
| "SLURM_JOB_RESERVATION", |
| "%s", |
| env->resv_name)) { |
| error("%s: can't set SLURM_JOB_RESERVATION env variable", |
| __func__); |
| rc = SLURM_ERROR; |
| } |
| } |
| |
| return rc; |
| } |
| |
| /********************************************************************** |
| * From here on are the new environment variable management functions, |
| * used by the "new" commands: salloc, sbatch, and the step launch APIs. |
| **********************************************************************/ |
| |
| /* |
| * Return a string representation of an array of uint16_t elements. |
| * Each value in the array is printed in decimal notation and elements |
| * are separated by a comma. If sequential elements in the array |
| * contain the same value, the value is written out just once followed |
| * by "(xN)", where "N" is the number of times the value is repeated. |
| * |
| * Example: |
| * The array "1, 2, 1, 1, 1, 3, 2" becomes the string "1,2,1(x3),3,2" |
| * |
| * Returns an xmalloc'ed string. Free with xfree(). |
| */ |
| extern char *uint16_array_to_str(int array_len, const uint16_t *array) |
| { |
| int i; |
| int previous = 0; |
| char *sep = ","; /* separator */ |
| char *str = xstrdup(""); |
| |
| if (array == NULL) |
| return str; |
| |
| for (i = 0; i < array_len; i++) { |
| if ((i+1 < array_len) && (array[i] == array[i+1])) { |
| previous++; |
| continue; |
| } |
| |
| if (i == array_len-1) /* last time through loop */ |
| sep = ""; |
| if (previous > 0) { |
| xstrfmtcat(str, "%u(x%u)%s", |
| array[i], previous+1, sep); |
| } else { |
| xstrfmtcat(str, "%u%s", array[i], sep); |
| } |
| previous = 0; |
| } |
| |
| return str; |
| } |
| |
| |
| /* |
| * The cpus-per-node representation in Slurm (and perhaps tasks-per-node |
| * in the future) is stored in a compressed format comprised of two |
| * equal-length arrays, and an integer holding the array length. In one |
| * array an element represents a count (number of cpus, number of tasks, |
| * etc.), and the corresponding element in the other array contains the |
| * number of times the count is repeated sequentially in the uncompressed |
| * something-per-node array. |
| * |
| * This function returns the string representation of the compressed |
| * array. Free with xfree(). |
| */ |
| extern char *uint32_compressed_to_str(uint32_t array_len, |
| const uint16_t *array, |
| const uint32_t *array_reps) |
| { |
| int i; |
| char *sep = ","; /* separator */ |
| char *str = xstrdup(""); |
| |
| if (!array || !array_reps) |
| return str; |
| |
| for (i = 0; i < array_len; i++) { |
| if (i == array_len-1) /* last time through loop */ |
| sep = ""; |
| if (array_reps[i] > 1) { |
| xstrfmtcat(str, "%u(x%u)%s", |
| array[i], array_reps[i], sep); |
| } else { |
| xstrfmtcat(str, "%u%s", array[i], sep); |
| } |
| } |
| |
| return str; |
| } |
| |
| /* |
| * Set in "dest" the environment variables relevant to a Slurm job |
| * allocation, overwriting any environment variables of the same name. |
| * If the address pointed to by "dest" is NULL, memory will automatically be |
| * xmalloc'ed. The array is terminated by a NULL pointer, and thus is |
| * suitable for use by execle() and other env_array_* functions. |
| * |
| * Sets the variables: |
| * SLURM_JOB_ID |
| * SLURM_JOB_NAME |
| * SLURM_JOB_NUM_NODES |
| * SLURM_JOB_NODELIST |
| * SLURM_JOB_CPUS_PER_NODE |
| * SLURM_NTASKS_PER_NODE |
| * |
| * dest OUT - array in which to the set environment variables |
| * alloc IN - resource allocation response |
| * desc IN - job allocation request |
| * het_job_offset IN - component offset into hetjob, -1 if not hetjob |
| * |
| * Sets OBSOLETE variables (needed for MPI, do not remove): |
| * SLURM_JOBID |
| * SLURM_NNODES |
| * SLURM_NODELIST |
| * SLURM_NPROCS |
| * SLURM_TASKS_PER_NODE |
| */ |
| extern int env_array_for_job(char ***dest, |
| const resource_allocation_response_msg_t *alloc, |
| const job_desc_msg_t *desc, int het_job_offset) |
| { |
| char *tmp = NULL; |
| char *dist = NULL; |
| char *key, *value; |
| slurm_step_layout_t *step_layout = NULL; |
| int i, new_cpt, rc = SLURM_SUCCESS; |
| slurm_step_layout_req_t step_layout_req; |
| uint16_t cpus_per_task_array[1]; |
| uint32_t cpus_task_reps[1]; |
| |
| if (!alloc || !desc) |
| return SLURM_ERROR; |
| |
| memset(&step_layout_req, 0, sizeof(slurm_step_layout_req_t)); |
| step_layout_req.num_tasks = desc->num_tasks; |
| step_layout_req.num_hosts = alloc->node_cnt; |
| cpus_per_task_array[0] = desc->cpus_per_task; |
| cpus_task_reps[0] = alloc->node_cnt; |
| |
| if (het_job_offset < 1) { |
| env_array_overwrite_fmt(dest, "SLURM_JOB_ID", "%u", |
| alloc->job_id); |
| } |
| env_array_overwrite_het_fmt(dest, "SLURM_JOB_ID", het_job_offset, |
| "%u", alloc->job_id); |
| env_array_overwrite_het_fmt(dest, "SLURM_JOB_NAME", het_job_offset, |
| "%s", desc->name); |
| env_array_overwrite_het_fmt(dest, "SLURM_JOB_NUM_NODES", het_job_offset, |
| "%u", step_layout_req.num_hosts); |
| env_array_overwrite_het_fmt(dest, "SLURM_JOB_NODELIST", het_job_offset, |
| "%s", alloc->node_list); |
| env_array_overwrite_het_fmt(dest, "SLURM_JOB_PARTITION", het_job_offset, |
| "%s", alloc->partition); |
| |
| set_distribution(desc->task_dist, &dist); |
| if (dist) { |
| env_array_overwrite_het_fmt(dest, "SLURM_DISTRIBUTION", |
| het_job_offset, "%s", dist); |
| xfree(dist); |
| } |
| if ((desc->task_dist & SLURM_DIST_STATE_BASE) == SLURM_DIST_PLANE) { |
| env_array_overwrite_het_fmt(dest, "SLURM_DIST_PLANESIZE", |
| het_job_offset, "%u", |
| desc->plane_size); |
| } |
| tmp = uint32_compressed_to_str(alloc->num_cpu_groups, |
| alloc->cpus_per_node, |
| alloc->cpu_count_reps); |
| env_array_overwrite_het_fmt(dest, "SLURM_JOB_CPUS_PER_NODE", |
| het_job_offset, "%s", tmp); |
| xfree(tmp); |
| |
| if (desc->threads_per_core != NO_VAL16) |
| env_array_overwrite_het_fmt(dest, "SLURM_THREADS_PER_CORE", |
| het_job_offset, "%d", |
| desc->threads_per_core); |
| |
| if (alloc->pn_min_memory & MEM_PER_CPU) { |
| uint64_t tmp_mem = alloc->pn_min_memory & (~MEM_PER_CPU); |
| env_array_overwrite_het_fmt(dest, "SLURM_MEM_PER_CPU", |
| het_job_offset, "%"PRIu64"", |
| tmp_mem); |
| } else if (alloc->pn_min_memory) { |
| uint64_t tmp_mem = alloc->pn_min_memory; |
| env_array_overwrite_het_fmt(dest, "SLURM_MEM_PER_NODE", |
| het_job_offset, "%"PRIu64"", |
| tmp_mem); |
| } |
| |
| /* OBSOLETE, but needed by MPI, do not remove */ |
| env_array_overwrite_het_fmt(dest, "SLURM_JOBID", het_job_offset, "%u", |
| alloc->job_id); |
| env_array_overwrite_het_fmt(dest, "SLURM_NNODES", het_job_offset, "%u", |
| step_layout_req.num_hosts); |
| env_array_overwrite_het_fmt(dest, "SLURM_NODELIST", het_job_offset, "%s", |
| alloc->node_list); |
| |
| /* |
| * --ntasks-per-node no-longer sets num_tasks implicitly, so we need |
| * need to calculate num_tasks here to make sure the environment |
| * variable is correct. |
| * |
| * --ntasks-per-tres still implicitly sets ntasks. |
| * --ntasks-per-socket requires --ntasks in order to work. |
| * So neither need to be accounted for here. |
| * |
| * SLURM_TASKS_PER_NODE is used by mpirun so it must be set correctly. |
| */ |
| if ((step_layout_req.num_tasks == NO_VAL) && |
| desc->ntasks_per_node && (desc->ntasks_per_node != NO_VAL16)) { |
| step_layout_req.num_tasks = |
| desc->ntasks_per_node * alloc->node_cnt; |
| } |
| |
| /* |
| * If we know how many tasks we are going to do then we set |
| * SLURM_TASKS_PER_NODE. If no tasks were given we can figure it out |
| * here by totalling up the number of tasks each node can hold (which is |
| * the cpus in a node divided by the number of cpus per task). |
| */ |
| if (step_layout_req.num_tasks == NO_VAL) { |
| step_layout_req.num_tasks = 0; |
| |
| /* Iterate over all kind of cluster nodes. */ |
| for (int i = 0; i < alloc->num_cpu_groups; i++) { |
| /* Get the CPU count for this type of nodes. */ |
| uint32_t ntasks = alloc->cpus_per_node[i]; |
| |
| /* |
| * If CPUs/tasks is set, determine how many tasks a node |
| * of this type can hold. |
| */ |
| if ((desc->cpus_per_task != NO_VAL16) && |
| (desc->cpus_per_task > 1)) |
| ntasks /= desc->cpus_per_task; |
| |
| /* Accum. the number of tasks all the group can hold. */ |
| step_layout_req.num_tasks += ntasks * |
| alloc->cpu_count_reps[i]; |
| } |
| } |
| |
| if ((desc->task_dist & SLURM_DIST_STATE_BASE) == SLURM_DIST_ARBITRARY) { |
| step_layout_req.node_list = desc->req_nodes; |
| env_array_overwrite_het_fmt(dest, "SLURM_ARBITRARY_NODELIST", |
| het_job_offset, "%s", |
| step_layout_req.node_list); |
| } else |
| step_layout_req.node_list = alloc->node_list; |
| |
| step_layout_req.cpus_per_node = alloc->cpus_per_node; |
| step_layout_req.cpu_count_reps = alloc->cpu_count_reps; |
| step_layout_req.cpus_per_task = cpus_per_task_array; |
| step_layout_req.cpus_task_reps = cpus_task_reps; |
| step_layout_req.task_dist = desc->task_dist; |
| step_layout_req.plane_size = desc->plane_size; |
| |
| if (!(step_layout = slurm_step_layout_create(&step_layout_req))) |
| return SLURM_ERROR; |
| |
| tmp = uint16_array_to_str(step_layout->node_cnt, step_layout->tasks); |
| slurm_step_layout_destroy(step_layout); |
| env_array_overwrite_het_fmt(dest, "SLURM_TASKS_PER_NODE", |
| het_job_offset, |
| "%s", tmp); |
| xfree(tmp); |
| |
| if (alloc->account) { |
| env_array_overwrite_het_fmt(dest, "SLURM_JOB_ACCOUNT", |
| het_job_offset, "%s", |
| alloc->account); |
| } |
| if (alloc->qos) { |
| env_array_overwrite_het_fmt(dest, "SLURM_JOB_QOS", |
| het_job_offset, |
| "%s", alloc->qos); |
| } |
| if (alloc->resv_name) { |
| env_array_overwrite_het_fmt(dest, "SLURM_JOB_RESERVATION", |
| het_job_offset, "%s", |
| alloc->resv_name); |
| } |
| |
| if (alloc->env_size) { /* Used to set Burst Buffer environment */ |
| for (i = 0; i < alloc->env_size; i++) { |
| tmp = xstrdup(alloc->environment[i]); |
| key = tmp; |
| value = strchr(tmp, '='); |
| if (value) { |
| value[0] = '\0'; |
| value++; |
| env_array_overwrite_het_fmt(dest, key, |
| het_job_offset, |
| "%s", |
| value); |
| } |
| xfree(tmp); |
| } |
| } |
| |
| if (desc->acctg_freq) { |
| env_array_overwrite_het_fmt(dest, "SLURM_ACCTG_FREQ", |
| het_job_offset, "%s", |
| desc->acctg_freq); |
| }; |
| |
| if (desc->network) { |
| env_array_overwrite_het_fmt(dest, "SLURM_NETWORK", |
| het_job_offset, "%s", |
| desc->network); |
| } |
| |
| if (desc->overcommit != NO_VAL8) { |
| env_array_overwrite_het_fmt(dest, "SLURM_OVERCOMMIT", |
| het_job_offset, "%u", |
| desc->overcommit); |
| } |
| |
| /* Add default task counts for srun, if not already set */ |
| if (desc->bitflags & JOB_NTASKS_SET) { |
| env_array_overwrite_het_fmt(dest, "SLURM_NTASKS", |
| het_job_offset, |
| "%d", desc->num_tasks); |
| /* maintain for old scripts */ |
| env_array_overwrite_het_fmt(dest, "SLURM_NPROCS", |
| het_job_offset, |
| "%d", desc->num_tasks); |
| } |
| |
| new_cpt = slurm_opt_get_tres_per_task_cpu_cnt(alloc->tres_per_task); |
| if (new_cpt) { |
| env_array_overwrite_het_fmt(dest, "SLURM_CPUS_PER_TASK", |
| het_job_offset, "%d", new_cpt); |
| } else if (desc->bitflags & JOB_CPUS_SET) { |
| env_array_overwrite_het_fmt(dest, "SLURM_CPUS_PER_TASK", |
| het_job_offset, "%d", |
| desc->cpus_per_task); |
| } |
| |
| if (alloc->tres_per_task) { |
| env_array_overwrite_het_fmt(dest, "SLURM_TRES_PER_TASK", |
| het_job_offset, "%s", |
| alloc->tres_per_task); |
| } |
| |
| if (desc->ntasks_per_node && (desc->ntasks_per_node != NO_VAL16)) { |
| env_array_overwrite_het_fmt(dest, "SLURM_NTASKS_PER_NODE", |
| het_job_offset, "%d", |
| desc->ntasks_per_node); |
| } |
| |
| if (alloc->segment_size) { |
| env_array_overwrite_het_fmt(dest, "SLURM_JOB_SEGMENT_SIZE", |
| het_job_offset, "%u", |
| alloc->segment_size); |
| } |
| |
| return rc; |
| } |
| |
| /* |
| * Set in "dest" the environment variables strings relevant to a Slurm batch |
| * job allocation, overwriting any environment variables of the same name. |
| * If the address pointed to by "dest" is NULL, memory will automatically be |
| * xmalloc'ed. The array is terminated by a NULL pointer, and thus is |
| * suitable for use by execle() and other env_array_* functions. |
| * |
| * Sets the variables: |
| * SLURM_CLUSTER_NAME |
| * SLURM_JOB_ID |
| * SLURM_JOB_NUM_NODES |
| * SLURM_JOB_NODELIST |
| * SLURM_JOB_CPUS_PER_NODE |
| * ENVIRONMENT=BATCH |
| * HOSTNAME |
| * |
| * Sets OBSOLETE variables (needed for MPI, do not remove): |
| * SLURM_JOBID |
| * SLURM_NNODES |
| * SLURM_NODELIST |
| * SLURM_NTASKS |
| * SLURM_TASKS_PER_NODE |
| */ |
| extern int |
| env_array_for_batch_job(char ***dest, const batch_job_launch_msg_t *batch, |
| const char *node_name) |
| { |
| char *tmp = NULL; |
| int i; |
| slurm_step_layout_t *step_layout = NULL; |
| uint16_t cpus_per_task; |
| uint32_t task_dist; |
| slurm_step_layout_req_t step_layout_req; |
| uint16_t cpus_per_task_array[1]; |
| uint32_t cpus_task_reps[1]; |
| |
| if (!batch) |
| return SLURM_ERROR; |
| |
| memset(&step_layout_req, 0, sizeof(slurm_step_layout_req_t)); |
| step_layout_req.num_tasks = batch->ntasks; |
| |
| /* |
| * There is no explicit node count in the batch structure, |
| * so we need to calculate the node count. |
| */ |
| for (i = 0; i < batch->num_cpu_groups; i++) { |
| step_layout_req.num_hosts += batch->cpu_count_reps[i]; |
| } |
| |
| /* |
| * --ntasks-per-node no-longer sets num_tasks implicitly, so we need |
| * need to calculate num_tasks here to make sure the |
| * SLURM_TASKS_PER_NODE environment variable is correct. Also make sure |
| * that the SLURM_NTASKS environment variable is set. |
| * |
| * --ntasks-per-tres still implicitly sets ntasks. |
| * --ntasks-per-socket requires --ntasks in order to work. |
| * So neither need to be accounted for here. |
| * |
| * SLURM_TASKS_PER_NODE is used by mpirun so it must be set correctly. |
| */ |
| if (!step_layout_req.num_tasks) { |
| char *tmp_env_ntasks_per_node = |
| getenvp(batch->environment, "SLURM_NTASKS_PER_NODE"); |
| if (tmp_env_ntasks_per_node) { |
| step_layout_req.num_tasks = |
| atoi(tmp_env_ntasks_per_node) * |
| step_layout_req.num_hosts; |
| } |
| } |
| |
| env_array_overwrite_fmt(dest, "SLURM_CLUSTER_NAME", "%s", |
| slurm_conf.cluster_name); |
| |
| env_array_overwrite_fmt(dest, "SLURM_JOB_ID", "%u", batch->job_id); |
| env_array_overwrite_fmt(dest, "SLURM_JOB_NUM_NODES", "%u", |
| step_layout_req.num_hosts); |
| if (batch->array_task_id != NO_VAL) { |
| env_array_overwrite_fmt(dest, "SLURM_ARRAY_JOB_ID", "%u", |
| batch->array_job_id); |
| env_array_overwrite_fmt(dest, "SLURM_ARRAY_TASK_ID", "%u", |
| batch->array_task_id); |
| } |
| env_array_overwrite_fmt(dest, "SLURM_JOB_NODELIST", "%s", batch->nodes); |
| env_array_overwrite_fmt(dest, "SLURM_JOB_PARTITION", "%s", |
| batch->partition); |
| |
| tmp = uint32_compressed_to_str(batch->num_cpu_groups, |
| batch->cpus_per_node, |
| batch->cpu_count_reps); |
| env_array_overwrite_fmt(dest, "SLURM_JOB_CPUS_PER_NODE", "%s", tmp); |
| xfree(tmp); |
| |
| env_array_overwrite_fmt(dest, "ENVIRONMENT", "BATCH"); |
| if (node_name) |
| env_array_overwrite_fmt(dest, "HOSTNAME", "%s", node_name); |
| |
| /* OBSOLETE, but needed by MPI, do not remove */ |
| env_array_overwrite_fmt(dest, "SLURM_JOBID", "%u", batch->job_id); |
| env_array_overwrite_fmt(dest, "SLURM_NNODES", "%u", |
| step_layout_req.num_hosts); |
| env_array_overwrite_fmt(dest, "SLURM_NODELIST", "%s", batch->nodes); |
| |
| if ((batch->cpus_per_task != 0) && |
| (batch->cpus_per_task != NO_VAL16)) |
| cpus_per_task = batch->cpus_per_task; |
| else |
| cpus_per_task = 1; /* default value */ |
| cpus_per_task_array[0] = cpus_per_task; |
| cpus_task_reps[0] = step_layout_req.num_hosts; |
| |
| /* Only overwrite this if it is set. They are set in |
| * sbatch directly and could have changed. */ |
| if (getenvp(*dest, "SLURM_CPUS_PER_TASK")) |
| env_array_overwrite_fmt(dest, "SLURM_CPUS_PER_TASK", "%u", |
| cpus_per_task); |
| if (batch->tres_per_task) |
| env_array_overwrite_fmt(dest, "SLURM_TRES_PER_TASK", "%s", |
| batch->tres_per_task); |
| |
| if (step_layout_req.num_tasks) { |
| env_array_overwrite_fmt(dest, "SLURM_NTASKS", "%u", |
| step_layout_req.num_tasks); |
| /* keep around for old scripts */ |
| env_array_overwrite_fmt(dest, "SLURM_NPROCS", "%u", |
| step_layout_req.num_tasks); |
| } else if (!step_layout_req.num_tasks) { |
| /* |
| * Figure out num_tasks if it was not set by either |
| * batch->ntasks or SLURM_NTASKS_PER_NODE above |
| * Iterate over all kind of cluster nodes, and accum. the number |
| * of tasks all the group can hold. |
| */ |
| for (int i = 0; i < batch->num_cpu_groups; i++) |
| step_layout_req.num_tasks += (batch->cpus_per_node[i] / |
| cpus_per_task) * |
| batch->cpu_count_reps[i]; |
| } |
| |
| if ((step_layout_req.node_list = |
| getenvp(*dest, "SLURM_ARBITRARY_NODELIST"))) { |
| task_dist = SLURM_DIST_ARBITRARY; |
| } else { |
| step_layout_req.node_list = batch->nodes; |
| task_dist = SLURM_DIST_BLOCK; |
| } |
| |
| step_layout_req.cpus_per_node = batch->cpus_per_node; |
| step_layout_req.cpu_count_reps = batch->cpu_count_reps; |
| step_layout_req.cpus_per_task = cpus_per_task_array; |
| step_layout_req.cpus_task_reps = cpus_task_reps; |
| step_layout_req.task_dist = task_dist; |
| step_layout_req.plane_size = NO_VAL16; |
| |
| if (!(step_layout = slurm_step_layout_create(&step_layout_req))) |
| return SLURM_ERROR; |
| |
| tmp = uint16_array_to_str(step_layout->node_cnt, step_layout->tasks); |
| slurm_step_layout_destroy(step_layout); |
| env_array_overwrite_fmt(dest, "SLURM_TASKS_PER_NODE", "%s", tmp); |
| xfree(tmp); |
| |
| if (batch->pn_min_memory & MEM_PER_CPU) { |
| uint64_t tmp_mem = batch->pn_min_memory & (~MEM_PER_CPU); |
| env_array_overwrite_fmt(dest, "SLURM_MEM_PER_CPU", "%"PRIu64"", |
| tmp_mem); |
| } else if (batch->pn_min_memory) { |
| uint64_t tmp_mem = batch->pn_min_memory; |
| env_array_overwrite_fmt(dest, "SLURM_MEM_PER_NODE", "%"PRIu64"", |
| tmp_mem); |
| } |
| |
| /* Set the SLURM_JOB_ACCOUNT, SLURM_JOB_QOS |
| * and SLURM_JOB_RESERVATION if set by |
| * the controller. |
| */ |
| if (batch->account) { |
| env_array_overwrite_fmt(dest, |
| "SLURM_JOB_ACCOUNT", |
| "%s", |
| batch->account); |
| } |
| |
| if (batch->qos) { |
| env_array_overwrite_fmt(dest, |
| "SLURM_JOB_QOS", |
| "%s", |
| batch->qos); |
| } |
| |
| if (batch->resv_name) { |
| env_array_overwrite_fmt(dest, |
| "SLURM_JOB_RESERVATION", |
| "%s", |
| batch->resv_name); |
| } |
| |
| return SLURM_SUCCESS; |
| } |
| |
| /* |
| * Set in "dest" the environment variables relevant to a Slurm job step, |
| * overwriting any environment variables of the same name. If the address |
| * pointed to by "dest" is NULL, memory will automatically be xmalloc'ed. |
| * The array is terminated by a NULL pointer, and thus is suitable for |
| * use by execle() and other env_array_* functions. If preserve_env is |
| * true, the variables SLURM_NNODES, SLURM_NTASKS and SLURM_TASKS_PER_NODE |
| * remain unchanged. |
| * |
| * Sets variables: |
| * SLURM_STEP_ID |
| * SLURM_STEP_NUM_NODES |
| * SLURM_STEP_NUM_TASKS |
| * SLURM_STEP_TASKS_PER_NODE |
| * SLURM_STEP_LAUNCHER_PORT |
| * SLURM_STEP_LAUNCHER_IPADDR |
| * SLURM_STEP_RESV_PORTS |
| * SLURM_STEP_SUB_MP |
| * |
| * Sets OBSOLETE variables: |
| * SLURM_STEPID |
| * SLURM_NNODES |
| * SLURM_NTASKS |
| * SLURM_NODELIST |
| * SLURM_TASKS_PER_NODE |
| * SLURM_SRUN_COMM_PORT |
| * SLURM_LAUNCH_NODE_IPADDR |
| * |
| */ |
| extern void |
| env_array_for_step(char ***dest, |
| const job_step_create_response_msg_t *step, |
| launch_tasks_request_msg_t *launch, |
| uint16_t launcher_port, |
| bool preserve_env) |
| { |
| char *tmp, *tpn; |
| uint32_t node_cnt, task_cnt; |
| |
| if (!step || !launch) |
| return; |
| |
| node_cnt = step->step_layout->node_cnt; |
| env_array_overwrite_fmt(dest, "SLURM_STEP_ID", "%u", step->job_step_id); |
| |
| if (launch->het_job_node_list) { |
| tmp = launch->het_job_node_list; |
| env_array_overwrite_fmt(dest, "SLURM_NODELIST", "%s", tmp); |
| env_array_overwrite_fmt(dest, "SLURM_JOB_NODELIST", "%s", tmp); |
| } else { |
| tmp = step->step_layout->node_list; |
| env_array_append_fmt(dest, "SLURM_JOB_NODELIST", "%s", tmp); |
| } |
| env_array_overwrite_fmt(dest, "SLURM_STEP_NODELIST", "%s", tmp); |
| |
| if (launch->het_job_nnodes && (launch->het_job_nnodes != NO_VAL)) |
| node_cnt = launch->het_job_nnodes; |
| env_array_overwrite_fmt(dest, "SLURM_STEP_NUM_NODES", "%u", node_cnt); |
| |
| if (launch->het_job_ntasks && (launch->het_job_ntasks != NO_VAL)) |
| task_cnt = launch->het_job_ntasks; |
| else |
| task_cnt = step->step_layout->task_cnt; |
| env_array_overwrite_fmt(dest, "SLURM_STEP_NUM_TASKS", "%u", task_cnt); |
| |
| if (launch->het_job_task_cnts) { |
| tpn = uint16_array_to_str(launch->het_job_nnodes, |
| launch->het_job_task_cnts); |
| env_array_overwrite_fmt(dest, "SLURM_TASKS_PER_NODE", "%s", |
| tpn); |
| env_array_overwrite_fmt(dest, "SLURM_NNODES", "%u", |
| launch->het_job_nnodes); |
| } else { |
| tpn = uint16_array_to_str(step->step_layout->node_cnt, |
| step->step_layout->tasks); |
| if (!preserve_env) { |
| env_array_overwrite_fmt(dest, "SLURM_TASKS_PER_NODE", |
| "%s", tpn); |
| } |
| } |
| env_array_overwrite_fmt(dest, "SLURM_STEP_TASKS_PER_NODE", "%s", tpn); |
| |
| env_array_overwrite_fmt(dest, "SLURM_STEP_LAUNCHER_PORT", |
| "%hu", launcher_port); |
| if (step->resv_ports) { |
| env_array_overwrite_fmt(dest, "SLURM_STEP_RESV_PORTS", |
| "%s", step->resv_ports); |
| } |
| |
| /* OBSOLETE, but needed by some MPI implementations, do not remove */ |
| env_array_overwrite_fmt(dest, "SLURM_STEPID", "%u", step->job_step_id); |
| if (!preserve_env) { |
| env_array_overwrite_fmt(dest, "SLURM_NNODES", "%u", node_cnt); |
| env_array_overwrite_fmt(dest, "SLURM_NTASKS", "%u", task_cnt); |
| /* keep around for old scripts */ |
| env_array_overwrite_fmt(dest, "SLURM_NPROCS", |
| "%u", step->step_layout->task_cnt); |
| } |
| env_array_overwrite_fmt(dest, "SLURM_SRUN_COMM_PORT", |
| "%hu", launcher_port); |
| |
| xfree(tpn); |
| } |
| |
| /* |
| * Environment variables set elsewhere |
| * ---------------------------------- |
| * |
| * Set by slurmstepd: |
| * SLURM_STEP_NODEID |
| * SLURM_STEP_PROCID |
| * SLURM_STEP_LOCALID |
| * |
| * OBSOLETE set by slurmstepd: |
| * SLURM_NODEID |
| * SLURM_PROCID |
| * SLURM_LOCALID |
| */ |
| |
| /*********************************************************************** |
| * Environment variable array support functions |
| ***********************************************************************/ |
| |
| /* |
| * Return an empty environment variable array (contains a single |
| * pointer to NULL). |
| */ |
| char **env_array_create(void) |
| { |
| char **env_array; |
| |
| env_array = xmalloc(sizeof(char *)); |
| env_array[0] = NULL; |
| |
| return env_array; |
| } |
| |
| static int _env_array_update(char ***array_ptr, const char *name, |
| const char *value, bool over_write) |
| { |
| char **ep = NULL; |
| char *str = NULL; |
| |
| if (array_ptr == NULL) |
| return 0; |
| |
| if (*array_ptr == NULL) |
| *array_ptr = env_array_create(); |
| |
| ep = _find_name_in_env(*array_ptr, name); |
| if (*ep != NULL) { |
| if (!over_write) |
| return 0; |
| xfree (*ep); |
| } else { |
| ep = _extend_env(array_ptr); |
| } |
| |
| xstrfmtcat(str, "%s=%s", name, value); |
| *ep = str; |
| |
| return 1; |
| } |
| |
| /* |
| * Append a single environment variable to an environment variable array, |
| * if and only if a variable by that name does not already exist in the |
| * array. |
| * |
| * "value_fmt" supports printf-style formatting. |
| * |
| * Return 1 on success, and 0 on error. |
| */ |
| int env_array_append_fmt(char ***array_ptr, const char *name, |
| const char *value_fmt, ...) |
| { |
| int rc; |
| char *value; |
| va_list ap; |
| |
| value = xmalloc(ENV_BUFSIZE); |
| va_start(ap, value_fmt); |
| vsnprintf (value, ENV_BUFSIZE, value_fmt, ap); |
| va_end(ap); |
| rc = env_array_append(array_ptr, name, value); |
| xfree(value); |
| |
| return rc; |
| } |
| |
| /* |
| * Append a single environment variable to an environment variable array, |
| * if and only if a variable by that name does not already exist in the |
| * array. |
| * |
| * Return 1 on success, and 0 on error. |
| */ |
| int env_array_append(char ***array_ptr, const char *name, |
| const char *value) |
| { |
| return _env_array_update(array_ptr, name, value, false); |
| } |
| |
| /* |
| * Append a single environment variable to an environment variable array |
| * if a variable by that name does not already exist. If a variable |
| * by the same name is found in the array, it is overwritten with the |
| * new value. |
| * |
| * "value_fmt" supports printf-style formatting. |
| * |
| * Return 1 on success, and 0 on error. |
| */ |
| int env_array_overwrite_fmt(char ***array_ptr, const char *name, |
| const char *value_fmt, ...) |
| { |
| int rc; |
| char *value; |
| va_list ap; |
| |
| value = xmalloc(ENV_BUFSIZE); |
| va_start(ap, value_fmt); |
| vsnprintf (value, ENV_BUFSIZE, value_fmt, ap); |
| va_end(ap); |
| rc = env_array_overwrite(array_ptr, name, value); |
| xfree(value); |
| |
| return rc; |
| } |
| |
| /* |
| * Append a single environment variable to an environment variable array |
| * if a variable by that name does not already exist. If a variable |
| * by the same name is found in the array, it is overwritten with the |
| * new value. |
| * |
| * "value_fmt" supports printf-style formatting. |
| * |
| * Return 1 on success, and 0 on error. |
| */ |
| int env_array_overwrite_het_fmt(char ***array_ptr, const char *name, |
| int het_job_offset, |
| const char *value_fmt, ...) |
| { |
| int rc; |
| char *value; |
| va_list ap; |
| |
| value = xmalloc(ENV_BUFSIZE); |
| va_start(ap, value_fmt); |
| vsnprintf (value, ENV_BUFSIZE, value_fmt, ap); |
| va_end(ap); |
| if (het_job_offset != -1) { |
| char *het_comp_name = NULL; |
| /* Continue support for old hetjob terminology. */ |
| xstrfmtcat(het_comp_name, "%s_PACK_GROUP_%d", name, |
| het_job_offset); |
| rc = env_array_overwrite(array_ptr, het_comp_name, value); |
| xfree(het_comp_name); |
| xstrfmtcat(het_comp_name, "%s_HET_GROUP_%d", name, |
| het_job_offset); |
| rc = env_array_overwrite(array_ptr, het_comp_name, value); |
| xfree(het_comp_name); |
| } else |
| rc = env_array_overwrite(array_ptr, name, value); |
| xfree(value); |
| |
| return rc; |
| } |
| |
| /* |
| * Append a single environment variable to an environment variable array |
| * if a variable by that name does not already exist. If a variable |
| * by the same name is found in the array, it is overwritten with the |
| * new value. |
| * |
| * Return 1 on success, and 0 on error. |
| */ |
| int env_array_overwrite(char ***array_ptr, const char *name, |
| const char *value) |
| { |
| return _env_array_update(array_ptr, name, value, true); |
| } |
| |
| /* |
| * Copy env_array must be freed by env_array_free |
| */ |
| char **env_array_copy(const char **array) |
| { |
| char **ptr = NULL; |
| |
| env_array_merge(&ptr, array); |
| |
| return ptr; |
| } |
| |
| /* |
| * Free the memory used by an environment variable array. |
| */ |
| void env_array_free(char **env_array) |
| { |
| char **ptr; |
| |
| if (env_array == NULL) |
| return; |
| |
| for (ptr = env_array; *ptr != NULL; ptr++) { |
| xfree(*ptr); |
| } |
| xfree(env_array); |
| } |
| |
| /* |
| * Given an environment variable "name=value" string, |
| * copy the name portion into the "name" buffer, and the |
| * value portion into the "value" buffer. |
| * |
| * Return 1 on success, 0 on failure. |
| */ |
| static int _env_array_entry_splitter(const char *entry, |
| char *name, int name_len, |
| char *value, int value_len) |
| { |
| char *ptr; |
| int len; |
| |
| ptr = xstrchr(entry, '='); |
| if (ptr == NULL) /* Bad parsing, no '=' found */ |
| return 0; |
| /* |
| * need to consider the byte pointed by ptr. |
| * example: entry = 0x0 = "a=b" |
| * ptr = 0x1 |
| * len = ptr - entry + 1 = 2 because we need |
| * 2 characters to store 'a\0' |
| */ |
| len = ptr - entry + 1; |
| if (len > name_len) |
| return 0; |
| strlcpy(name, entry, len); |
| |
| ptr++; |
| /* account for '\0' here */ |
| len = strlen(ptr) + 1; |
| if (len > value_len) |
| return 0; |
| strlcpy(value, ptr, len); |
| |
| return 1; |
| } |
| |
| /* |
| * Work similarly to putenv() (from C stdlib), but uses setenv() |
| * under the covers. This avoids having pointers from the global |
| * array "environ" into "string". |
| * |
| * Return 1 on success, 0 on failure. |
| */ |
| static int _env_array_putenv(const char *string) |
| { |
| int rc = 0; |
| char name[256], *value; |
| |
| value = xmalloc(ENV_BUFSIZE); |
| if ((_env_array_entry_splitter(string, name, sizeof(name), |
| value, ENV_BUFSIZE)) && |
| (setenv(name, value, 1) != -1)) |
| rc = 1; |
| |
| xfree(value); |
| return rc; |
| } |
| |
| /* |
| * Set all of the environment variables in a supplied environment |
| * variable array. |
| */ |
| void env_array_set_environment(char **env_array) |
| { |
| char **ptr; |
| |
| if (env_array == NULL) |
| return; |
| |
| for (ptr = env_array; *ptr != NULL; ptr++) { |
| _env_array_putenv(*ptr); |
| } |
| } |
| |
| /* |
| * Unset all of the environment variables in a user's current |
| * environment. |
| * |
| * (Note: because the environ array is decrementing with each |
| * unsetenv, only increment the ptr on a failure to unset.) |
| */ |
| void env_unset_environment(void) |
| { |
| extern char **environ; |
| char **ptr; |
| char name[256], *value; |
| |
| value = xmalloc(ENV_BUFSIZE); |
| for (ptr = (char **)environ; *ptr != NULL; ) { |
| if ((_env_array_entry_splitter(*ptr, name, sizeof(name), |
| value, ENV_BUFSIZE)) && |
| (unsetenv(name) != -1)) |
| ; |
| else |
| ptr++; |
| } |
| xfree(value); |
| } |
| |
| /* |
| * Merge all of the environment variables in src_array into the |
| * array dest_array. Any variables already found in dest_array |
| * will be overwritten with the value from src_array. |
| */ |
| void env_array_merge(char ***dest_array, const char **src_array) |
| { |
| char **ptr; |
| char name[256], *value; |
| |
| if (src_array == NULL) |
| return; |
| |
| value = xmalloc(ENV_BUFSIZE); |
| for (ptr = (char **)src_array; *ptr != NULL; ptr++) { |
| if (_env_array_entry_splitter(*ptr, name, sizeof(name), |
| value, ENV_BUFSIZE)) |
| env_array_overwrite(dest_array, name, value); |
| } |
| xfree(value); |
| } |
| |
| /* |
| * Merge the environment variables in src_array beginning with "SLURM" or |
| * SPANK_OPTION_ENV_PREFIX into the array dest_array. Any variables already |
| * found in dest_array will be overwritten with the value from src_array. |
| */ |
| void env_array_merge_slurm_spank(char ***dest_array, const char **src_array) |
| { |
| char **ptr; |
| char name[256], *value; |
| int spank_len; |
| |
| if (src_array == NULL) |
| return; |
| |
| spank_len = strlen(SPANK_OPTION_ENV_PREFIX); |
| value = xmalloc(ENV_BUFSIZE); |
| for (ptr = (char **)src_array; *ptr != NULL; ptr++) { |
| if (_env_array_entry_splitter(*ptr, name, sizeof(name), |
| value, ENV_BUFSIZE) && |
| ((xstrncmp(name, "SLURM", 5) == 0) || |
| (xstrncmp(name, SPANK_OPTION_ENV_PREFIX, spank_len) == 0))) |
| env_array_overwrite(dest_array, name, value); |
| } |
| xfree(value); |
| } |
| |
| /* Return the net count of curly brackets in a string |
| * '{' adds one and '}' subtracts one (zero means it is balanced). |
| * Special case: return -1 if no open brackets are found */ |
| static int _bracket_cnt(char *value) |
| { |
| int count = 0, i; |
| for (i=0; value[i]; i++) { |
| if (value[i] == '{') |
| count++; |
| else if (value[i] == '}') |
| count--; |
| } |
| return count; |
| } |
| |
| /* |
| * Load user environment from a specified file or file descriptor. |
| * |
| * This will read in a user specified file or fd, that is invoked |
| * via the --export-file option in sbatch. The NAME=value entries must |
| * be NULL separated to support special characters in the environment |
| * definitions. |
| */ |
| char **env_array_from_file(const char *fname) |
| { |
| char *buf = NULL, *ptr = NULL, *eptr = NULL; |
| char *value, *p; |
| char **env = NULL; |
| char name[256]; |
| int buf_size = BUFSIZ, buf_left; |
| int file_size = 0, tmp_size; |
| int separator = '\0'; |
| int fd; |
| |
| if (!fname) |
| return NULL; |
| |
| /* |
| * If file name is a numeric value, then it is assumed to be a |
| * file descriptor. |
| */ |
| fd = (int)strtol(fname, &p, 10); |
| if ((*p != '\0') || (fd < 3) || (fd > sysconf(_SC_OPEN_MAX)) || |
| (fcntl(fd, F_GETFL) < 0)) { |
| fd = open(fname, O_RDONLY); |
| if (fd == -1) { |
| error("Could not open user environment file %s", fname); |
| return NULL; |
| } |
| verbose("Getting environment variables from %s", fname); |
| } else |
| verbose("Getting environment variables from fd %d", fd); |
| |
| /* |
| * Read in the user's environment data. |
| */ |
| buf = xmalloc(buf_size); |
| buf_left = buf_size; |
| while ((tmp_size = read(fd, &buf[file_size], buf_left))) { |
| if (tmp_size < 0) { |
| if (errno == EINTR) |
| continue; |
| error("read(environment_file): %m"); |
| break; |
| } |
| |
| if (buf_left <= tmp_size) { |
| buf_size += BUFSIZ; |
| xrealloc(buf, buf_size); |
| } |
| |
| file_size += tmp_size; |
| buf_left = buf_size - file_size; |
| if (buf_left < 0) { |
| error("%s: We don't have a large enough buffer.", |
| __func__); |
| break; |
| } |
| } |
| close(fd); |
| |
| /* |
| * Parse the buffer into individual environment variable names |
| * and build the environment. |
| */ |
| env = env_array_create(); |
| value = xmalloc(ENV_BUFSIZE); |
| for (ptr = buf; ; ptr = eptr+1) { |
| eptr = strchr(ptr, separator); |
| if ((ptr == eptr) || (eptr == NULL)) |
| break; |
| if (_env_array_entry_splitter(ptr, name, sizeof(name), |
| value, ENV_BUFSIZE) && |
| (!_discard_env(name, value))) { |
| /* |
| * Unset the SLURM_SUBMIT_DIR if it is defined so |
| * that this new value does not get overwritten |
| * in the subsequent call to env_array_merge(). |
| */ |
| if (xstrcmp(name, "SLURM_SUBMIT_DIR") == 0) |
| unsetenv(name); |
| env_array_overwrite(&env, name, value); |
| } |
| } |
| xfree(buf); |
| xfree(value); |
| |
| return env; |
| } |
| |
| int env_array_to_file(const char *filename, const char **env_array, |
| bool newline) |
| { |
| int outfd = -1; |
| int rc = SLURM_SUCCESS; |
| const char *terminator = newline ? "\n" : "\0"; |
| |
| outfd = open(filename, (O_WRONLY | O_CREAT | O_EXCL), 0600); |
| if (outfd < 0) { |
| error("%s: unable to open %s: %m", |
| __func__, filename); |
| goto rwfail; |
| } |
| |
| for (const char **p = env_array; p && *p; p++) { |
| /* skip any env variables with a newline in newline mode */ |
| if (newline && xstrstr(*p, "\n")) { |
| log_flag_hex(STEPS, *p, strlen(*p), |
| "%s: skipping environment variable with newline", |
| __func__); |
| continue; |
| } |
| |
| safe_write(outfd, *p, strlen(*p)); |
| safe_write(outfd, terminator, 1); |
| } |
| |
| (void) close(outfd); |
| |
| return rc; |
| |
| rwfail: |
| rc = errno; |
| |
| if (outfd >= 0) |
| (void) close(outfd); |
| |
| return rc; |
| } |
| |
| static int _child_fn(void *arg) |
| { |
| char **tmp_env = NULL; |
| int devnull, fd = 3; |
| child_args_t *child_args = arg; |
| char *cmdstr; |
| const char *username; |
| |
| username = child_args->username; |
| cmdstr = child_args->cmdstr; |
| tmp_env = child_args->tmp_env; |
| |
| #if !defined(__APPLE__) && !defined(__FreeBSD__) && !defined(__NetBSD__) |
| /* |
| * Setting propagation and mounting our own /proc for this namespace. |
| * This is done to ensure that this cloned process and its children |
| * have coherent /proc contents with their virtual PIDs. |
| * Check _clone_env_child to see namespace flags used in clone. |
| */ |
| if (child_args->perform_mount) { |
| if (mount("none", "/proc", NULL, MS_PRIVATE|MS_REC, NULL)) |
| _exit(1); |
| if (mount("proc", "/proc", "proc", |
| MS_NOSUID|MS_NOEXEC|MS_NODEV, NULL)) |
| _exit(1); |
| } |
| #endif |
| |
| if ((devnull = open("/dev/null", O_RDWR)) != -1) { |
| dup2(devnull, STDIN_FILENO); |
| dup2(devnull, STDERR_FILENO); |
| } |
| dup2(child_args->fildes[1], STDOUT_FILENO); |
| |
| /* slow close all fds */ |
| while (fd < child_args->rlimit) |
| close(fd++); |
| |
| #ifdef LOAD_ENV_NO_LOGIN |
| execle(SUCMD, "su", username, "-c", cmdstr, NULL, tmp_env); |
| #else |
| execle(SUCMD, "su", "-", username, "-c", cmdstr, NULL, tmp_env); |
| #endif |
| if (devnull >= 0) /* Avoid Coverity resource leak notification */ |
| (void) close(devnull); |
| |
| _exit(1); |
| } |
| |
| #if !defined(__APPLE__) && !defined(__FreeBSD__) && !defined(__NetBSD__) |
| static int _clone_env_child(child_args_t *child_args) |
| { |
| char *child_stack; |
| int rc = 0; |
| child_stack = mmap(NULL, STACK_SIZE, PROT_READ | PROT_WRITE, |
| MAP_PRIVATE | MAP_ANONYMOUS | MAP_STACK, -1, 0); |
| if (child_stack == MAP_FAILED) { |
| error("Cannot allocate stack for child: %m"); |
| return -1; |
| } |
| /* |
| * In Linux (since 2.6.24), use CLONE_NEWPID to clone the child into a |
| * new pid namespace. We are not into a job cgroup so we want to be |
| * able to terminate any possible background process, specially because |
| * we're using sudo here and running some user scripts (e.g. .bashrc). |
| * |
| * Killing the 'child' pid will kill all the namespace, since in the |
| * namespace, this 'child' is pid 1. |
| */ |
| rc = clone(_child_fn, child_stack + STACK_SIZE, |
| (SIGCHLD|CLONE_NEWPID|CLONE_NEWNS), child_args); |
| /* Memory deallocated only in parent address space, child unaffected */ |
| if (munmap(child_stack, STACK_SIZE)) |
| error("%s: failed to munmap child stack: %m", __func__); |
| return rc; |
| } |
| |
| static bool _ns_path_disabled(const char *ns_path) |
| { |
| FILE *fp = NULL; |
| size_t line_sz = 0; |
| ssize_t nbytes = 0; |
| int ns_value; |
| char *line = NULL; |
| bool ns_disabled = false; |
| |
| /* We will assume not having these files as having no limits. */ |
| fp = fopen(ns_path, "r"); |
| if (!fp) { |
| debug2("%s: could not open %s, assuming no pid namespace limits. Reason: %m", |
| __func__, ns_path); |
| } else { |
| nbytes = getline(&line, &line_sz, fp); |
| if (nbytes < 0) { |
| debug2("%s: could not read contents of %s. Assuming no namespace limits. Reason: %m", |
| __func__, ns_path); |
| } else if (nbytes == 0) { |
| debug2("%s: read 0 bytes from %s. Assuming no namespace limits", |
| __func__, ns_path); |
| } else { |
| ns_value = xstrntol(line, NULL, nbytes, 10); |
| if (ns_value == 0) |
| ns_disabled = true; |
| } |
| fclose(fp); |
| free(line); |
| line = NULL; |
| } |
| |
| return ns_disabled; |
| } |
| |
| /* |
| * Returns a boolean indicating if the required namespaces for the clone |
| * calls are disabled. This is performed by checking the contents of |
| * "/proc/sys/max_[mnt|pid]_namespaces" and ensuring they are not 0. |
| */ |
| static bool _ns_disabled() |
| { |
| static int disabled = -1; |
| char *pid_ns_path = "/proc/sys/user/max_pid_namespaces"; |
| char *mnt_ns_path = "/proc/sys/user/max_mnt_namespaces"; |
| |
| if (disabled != -1) |
| return disabled; |
| |
| disabled = false; |
| |
| if (_ns_path_disabled(pid_ns_path) || |
| _ns_path_disabled(mnt_ns_path)) |
| disabled = true; |
| |
| return disabled; |
| } |
| #endif |
| |
| /* |
| * Return an array of strings representing the specified user's default |
| * environment variables: |
| * Execute (more or less): "/bin/su - <username> -c /usr/bin/env" |
| * Depending upon the user's login scripts, this may take a very |
| * long time to complete or possibly never return |
| * |
| * timeout value is in seconds or zero for default (120 secs) |
| * On error, returns NULL. |
| * |
| * NOTE: The calling process must have an effective uid of root for |
| * this function to succeed. |
| */ |
| char **env_array_user_default(const char *username) |
| { |
| char *line = NULL, *last = NULL, name[PATH_MAX], *value, *buffer; |
| char **env = NULL; |
| char *starttoken = "XXXXSLURMSTARTPARSINGHEREXXXX"; |
| char *stoptoken = "XXXXSLURMSTOPPARSINGHEREXXXXX"; |
| char *cmdstr = NULL, *env_loc = NULL; |
| char *stepd_path = NULL; |
| int fildes[2], found, fval, len, rc, timeleft; |
| int buf_read, buf_rem, config_timeout; |
| int timeout = DEFAULT_GET_ENV_TIMEOUT; |
| pid_t child; |
| child_args_t child_args = {0}; |
| struct timeval begin, now; |
| struct pollfd ufds; |
| struct stat buf; |
| struct rlimit rlim; |
| |
| if (geteuid() != (uid_t)0) { |
| error("SlurmdUser must be root to use --get-user-env"); |
| return NULL; |
| } |
| |
| if (stat(SUCMD, &buf)) |
| fatal("Could not locate command: "SUCMD); |
| if (stat("/bin/echo", &buf)) |
| fatal("Could not locate command: /bin/echo"); |
| stepd_path = slurm_get_stepd_loc(); |
| if (stat(stepd_path, &buf) == 0) { |
| xstrcat(stepd_path, " getenv"); |
| env_loc = stepd_path; |
| } else if (stat("/bin/env", &buf) == 0) |
| env_loc = "/bin/env"; |
| else if (stat("/usr/bin/env", &buf) == 0) |
| env_loc = "/usr/bin/env"; |
| else |
| fatal("Could not locate command: env"); |
| |
| /* Construct the final command */ |
| cmdstr = xstrdup_printf("/bin/echo; /bin/echo; /bin/echo; " |
| "/bin/echo %s; %s; /bin/echo %s", |
| starttoken, env_loc, stoptoken); |
| xfree(stepd_path); |
| |
| if (pipe(fildes) < 0) { |
| fatal("pipe: %m"); |
| return NULL; |
| } |
| |
| child_args.fildes = fildes; |
| child_args.username = username; |
| child_args.cmdstr = cmdstr; |
| child_args.tmp_env = env_array_create(); |
| child_args.perform_mount = true; |
| env_array_overwrite(&child_args.tmp_env, "ENVIRONMENT", "BATCH"); |
| if (getrlimit(RLIMIT_NOFILE, &rlim) < 0) { |
| error("getrlimit(RLIMIT_NOFILE): %m"); |
| rlim.rlim_cur = 4096; |
| } |
| child_args.rlimit = rlim.rlim_cur; |
| |
| #if defined(__APPLE__) || defined(__FreeBSD__) || defined(__NetBSD__) |
| child = fork(); |
| if (child == -1) { |
| fatal("fork: %m"); |
| return NULL; |
| } |
| if (child == 0) |
| _child_fn(&child_args); |
| #else |
| /* |
| * Since we will be using namespaces in the clone calls (CLONE_NEWPID, |
| * CLONE_NEWNS), we need to know if they are disabled . If they are, |
| * we must fall back to fork and warn the user about the risks. |
| */ |
| if (_ns_disabled()) { |
| warning("%s: pid or mnt namespaces are disabled, avoiding clone and falling back to fork. This can produce orphan/unconstrained processes!", |
| __func__); |
| child_args.perform_mount = false; |
| child = fork(); |
| if (child == -1) { |
| fatal("fork: %m"); |
| return NULL; |
| } |
| if (child == 0) |
| _child_fn(&child_args); |
| } else { |
| if ((child = _clone_env_child(&child_args)) == -1) { |
| fatal("clone: %m"); |
| return NULL; |
| } |
| } |
| #endif |
| xfree(cmdstr); |
| close(fildes[1]); |
| if ((fval = fcntl(fildes[0], F_GETFL, 0)) < 0) |
| error("fcntl(F_GETFL) failed: %m"); |
| else if (fcntl(fildes[0], F_SETFL, fval | O_NONBLOCK) < 0) |
| error("fcntl(F_SETFL) failed: %m"); |
| |
| gettimeofday(&begin, NULL); |
| ufds.fd = fildes[0]; |
| ufds.events = POLLIN; |
| |
| /* Read all of the output from /bin/su into buffer */ |
| found = 0; |
| buf_read = 0; |
| buffer = xmalloc(ENV_BUFSIZE); |
| while (1) { |
| gettimeofday(&now, NULL); |
| timeleft = timeout * 1000; |
| timeleft -= (now.tv_sec - begin.tv_sec) * 1000; |
| timeleft -= (now.tv_usec - begin.tv_usec) / 1000; |
| if (timeleft <= 0) { |
| verbose("timeout waiting for "SUCMD" to complete"); |
| kill(-child, 9); |
| break; |
| } |
| if ((rc = poll(&ufds, 1, timeleft)) <= 0) { |
| if (rc == 0) { |
| verbose("timeout waiting for "SUCMD" to complete"); |
| break; |
| } |
| if ((errno == EINTR) || (errno == EAGAIN)) |
| continue; |
| error("poll(): %m"); |
| break; |
| } |
| if (!(ufds.revents & POLLIN)) { |
| if (ufds.revents & POLLHUP) { /* EOF */ |
| found = 1; /* success */ |
| } else if (ufds.revents & POLLERR) { |
| error("POLLERR"); |
| } else { |
| error("poll() revents=%d", ufds.revents); |
| } |
| break; |
| } |
| buf_rem = ENV_BUFSIZE - buf_read; |
| if (buf_rem == 0) { |
| error("buffer overflow loading env vars"); |
| break; |
| } |
| rc = read(fildes[0], &buffer[buf_read], buf_rem); |
| if (rc > 0) |
| buf_read += rc; |
| else if (rc == 0) { /* EOF */ |
| found = 1; /* success */ |
| break; |
| } else { /* error */ |
| error("read(env pipe): %m"); |
| break; |
| } |
| } |
| close(fildes[0]); |
| env_array_free(child_args.tmp_env); |
| |
| for (config_timeout=0; ; config_timeout++) { |
| kill(-child, SIGKILL); /* Typically a no-op */ |
| if (config_timeout) |
| sleep(1); |
| if (waitpid(child, &rc, WNOHANG) > 0) |
| break; |
| if (config_timeout >= 2) { |
| /* |
| * Non-killable processes are indicative of file system |
| * problems. The process will remain as a zombie, but |
| * slurmd/salloc will not otherwise be effected. |
| */ |
| error("Failed to kill program loading user environment"); |
| break; |
| } |
| } |
| |
| if (!found) { |
| error("Failed to load current user environment variables"); |
| xfree(buffer); |
| return NULL; |
| } |
| |
| /* First look for the start token in the output */ |
| len = strlen(starttoken); |
| found = 0; |
| line = strtok_r(buffer, "\n", &last); |
| while (!found && line) { |
| if (!xstrncmp(line, starttoken, len)) { |
| found = 1; |
| break; |
| } |
| line = strtok_r(NULL, "\n", &last); |
| } |
| if (!found) { |
| error("Failed to get current user environment variables"); |
| xfree(buffer); |
| return NULL; |
| } |
| |
| /* Process environment variables until we find the stop token */ |
| len = strlen(stoptoken); |
| found = 0; |
| env = env_array_create(); |
| line = strtok_r(NULL, "\n", &last); |
| value = xmalloc(ENV_BUFSIZE); |
| while (!found && line) { |
| if (!xstrncmp(line, stoptoken, len)) { |
| found = 1; |
| break; |
| } |
| if (_env_array_entry_splitter(line, name, sizeof(name), |
| value, ENV_BUFSIZE) && |
| (!_discard_env(name, value))) { |
| if (value[0] == '(') { |
| /* This is a bash function. |
| * It may span multiple lines */ |
| while (_bracket_cnt(value) > 0) { |
| line = strtok_r(NULL, "\n", &last); |
| if (!line) |
| break; |
| if ((strlen(value) + strlen(line)) > |
| (ENV_BUFSIZE - 2)) |
| break; |
| strcat(value, "\n"); |
| strcat(value, line); |
| } |
| } |
| env_array_overwrite(&env, name, value); |
| } |
| line = strtok_r(NULL, "\n", &last); |
| } |
| xfree(value); |
| xfree(buffer); |
| if (!found) { |
| error("Failed to get all user environment variables"); |
| env_array_free(env); |
| return NULL; |
| } |
| |
| return env; |
| } |
| |
| static void _set_ext_launcher_hydra(char ***dest, char *b_env, char *extra) |
| { |
| char *bootstrap = getenv(b_env); |
| bool disabled_slurm_hydra_bootstrap = false; |
| |
| if (slurm_conf.mpi_params && |
| xstrstr(slurm_conf.mpi_params,"disable_slurm_hydra_bootstrap")) |
| disabled_slurm_hydra_bootstrap = true; |
| |
| if ((!bootstrap && !disabled_slurm_hydra_bootstrap) || |
| !xstrcmp(bootstrap, "slurm")) { |
| env_array_append(dest, b_env, "slurm"); |
| env_array_append(dest, extra, "--external-launcher"); |
| } |
| } |
| |
| /* |
| * Set TRES related env vars. Set here rather than env_array_for_job() since |
| * we don't have array of opt values and the raw values are not stored in the |
| * job_desc_msg_t structure (only the strings with possibly combined TRES) |
| * |
| * opt IN - options set by command parsing |
| * dest IN/OUT - location to write environment variables |
| * het_job_offset IN - component offset into hetjob, -1 if not hetjob |
| */ |
| extern void set_env_from_opts(slurm_opt_t *opt, char ***dest, |
| int het_job_offset) |
| { |
| if (opt->cpus_per_gpu) { |
| env_array_overwrite_het_fmt(dest, "SLURM_CPUS_PER_GPU", |
| het_job_offset, "%d", |
| opt->cpus_per_gpu); |
| } |
| if (opt->gpus) { |
| env_array_overwrite_het_fmt(dest, "SLURM_GPUS", |
| het_job_offset, "%s", |
| opt->gpus); |
| } |
| if (opt->gpu_freq) { |
| env_array_overwrite_het_fmt(dest, "SLURM_GPU_FREQ", |
| het_job_offset, "%s", |
| opt->gpu_freq); |
| } |
| if (opt->gpus_per_node) { |
| env_array_overwrite_het_fmt(dest, "SLURM_GPUS_PER_NODE", |
| het_job_offset, "%s", |
| opt->gpus_per_node); |
| } |
| if (opt->gpus_per_socket) { |
| env_array_overwrite_het_fmt(dest, "SLURM_GPUS_PER_SOCKET", |
| het_job_offset, "%s", |
| opt->gpus_per_socket); |
| } |
| if (opt->mem_per_gpu != NO_VAL64) { |
| env_array_overwrite_het_fmt(dest, "SLURM_MEM_PER_GPU", |
| het_job_offset, "%"PRIu64, |
| opt->mem_per_gpu); |
| } |
| if (opt->tres_per_task) { |
| env_array_overwrite_het_fmt(dest, "SLURM_TRES_PER_TASK", |
| het_job_offset, "%s", |
| opt->tres_per_task); |
| } |
| if (opt->tres_bind) { |
| env_array_overwrite_het_fmt(dest, "SLURM_TRES_BIND", |
| het_job_offset, "%s", |
| opt->tres_bind); |
| } |
| |
| /* |
| * In the case that an external launcher (mpirun) is launching instead |
| * of srun let the srun it launches to treat the request differently. |
| */ |
| env_array_append(dest, "OMPI_MCA_plm_slurm_args", |
| "--external-launcher"); |
| env_array_append(dest, "PRTE_MCA_plm_slurm_args", |
| "--external-launcher"); |
| |
| /* |
| * Some mpirun implementations like intel will pass the |
| * bootstrap exec extra args to any bootstrap method (e.g. ssh, |
| * rsh), so force 'slurm' bootstrap if no other one is set. |
| */ |
| _set_ext_launcher_hydra(dest, "HYDRA_BOOTSTRAP", |
| "HYDRA_LAUNCHER_EXTRA_ARGS"); |
| _set_ext_launcher_hydra(dest, "I_MPI_HYDRA_BOOTSTRAP", |
| "I_MPI_HYDRA_BOOTSTRAP_EXEC_EXTRA_ARGS"); |
| } |
| |
| extern char *find_quote_token(char *tmp, char *sep, char **last) |
| { |
| char *start; |
| int i, quote_single = 0, quote_double = 0; |
| |
| xassert(last); |
| if (*last) |
| start = *last; |
| else |
| start = tmp; |
| if (start[0] == '\0') |
| return NULL; |
| for (i = 0; ; i++) { |
| if (start[i] == '\'') { |
| if (quote_single) |
| quote_single--; |
| else |
| quote_single++; |
| } else if (start[i] == '\"') { |
| if (quote_double) |
| quote_double--; |
| else |
| quote_double++; |
| } else if (((start[i] == sep[0]) || (start[i] == '\0')) && |
| (quote_single == 0) && (quote_double == 0)) { |
| if (((start[0] == '\'') && (start[i-1] == '\'')) || |
| ((start[0] == '\"') && (start[i-1] == '\"'))) { |
| start++; |
| i -= 2; |
| } |
| if (start[i] == '\0') |
| *last = &start[i]; |
| else |
| *last = &start[i] + 1; |
| start[i] = '\0'; |
| return start; |
| } else if (start[i] == '\0') { |
| error("Improperly formed environment variable (%s)", |
| start); |
| *last = &start[i]; |
| return start; |
| } |
| |
| } |
| } |
| |
| extern void env_merge_filter(slurm_opt_t *opt, job_desc_msg_t *desc) |
| { |
| extern char **environ; |
| int i, len; |
| char *save_env[2] = { NULL, NULL }, *tmp, *tok, *last = NULL; |
| |
| tmp = xstrdup(opt->export_env); |
| tok = find_quote_token(tmp, ",", &last); |
| while (tok) { |
| |
| if (xstrcasecmp(tok, "ALL") == 0) { |
| env_array_merge(&desc->environment, |
| (const char **)environ); |
| tok = find_quote_token(NULL, ",", &last); |
| continue; |
| } |
| |
| if (strchr(tok, '=')) { |
| save_env[0] = tok; |
| env_array_merge(&desc->environment, |
| (const char **)save_env); |
| } else { |
| len = strlen(tok); |
| for (i = 0; environ[i]; i++) { |
| if (xstrncmp(tok, environ[i], len) || |
| (environ[i][len] != '=')) |
| continue; |
| save_env[0] = environ[i]; |
| env_array_merge(&desc->environment, |
| (const char **)save_env); |
| break; |
| } |
| } |
| tok = find_quote_token(NULL, ",", &last); |
| } |
| xfree(tmp); |
| |
| env_array_merge_slurm_spank(&desc->environment, (const char **)environ); |
| } |
| |
| extern char **env_array_exclude(const char **env, const regex_t *regex) |
| { |
| /* alloc with NULL termination */ |
| char **purged = xcalloc(1, sizeof(char *)); |
| |
| /* use regex to skip every matching variable */ |
| for (; *env; env++) { |
| if (!regex_quick_match(*env, regex)) { |
| char **e = _extend_env(&purged); |
| *e = xstrdup(*env); |
| } |
| } |
| |
| return purged; |
| } |
| |
| extern void set_prio_process_env(void) |
| { |
| int retval; |
| |
| errno = 0; /* needed to detect a real failure since prio can be -1 */ |
| |
| if ((retval = getpriority(PRIO_PROCESS, 0)) == -1) { |
| if (errno) { |
| error("getpriority(PRIO_PROCESS): %m"); |
| return; |
| } |
| } |
| |
| if (setenvf(NULL, "SLURM_PRIO_PROCESS", "%d", retval) < 0) { |
| error("unable to set SLURM_PRIO_PROCESS in environment"); |
| return; |
| } |
| |
| debug("propagating SLURM_PRIO_PROCESS=%d", retval); |
| } |