| /*****************************************************************************\ |
| * src/slurmd/slurmstepd/ulimits.c - set user limits for job |
| ***************************************************************************** |
| * Copyright (C) 2002-2007 The Regents of the University of California. |
| * Copyright (C) 2008-2010 Lawrence Livermore National Security. |
| * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). |
| * Written by Mark Grondona <mgrondona@llnl.gov>. |
| * CODE-OCEC-09-009. All rights reserved. |
| * |
| * This file is part of Slurm, a resource management program. |
| * For details, see <https://slurm.schedmd.com/>. |
| * Please also read the included file: DISCLAIMER. |
| * |
| * Slurm is free software; you can redistribute it and/or modify it under |
| * the terms of the GNU General Public License as published by the Free |
| * Software Foundation; either version 2 of the License, or (at your option) |
| * any later version. |
| * |
| * In addition, as a special exception, the copyright holders give permission |
| * to link the code of portions of this program with the OpenSSL library under |
| * certain conditions as described in each individual source file, and |
| * distribute linked combinations including the two. You must obey the GNU |
| * General Public License in all respects for all of the code used other than |
| * OpenSSL. If you modify file(s) with this exception, you may extend this |
| * exception to your version of the file(s), but you are not obligated to do |
| * so. If you do not wish to do so, delete this exception statement from your |
| * version. If you delete this exception statement from all source files in |
| * the program, then also delete it here. |
| * |
| * Slurm is distributed in the hope that it will be useful, but WITHOUT ANY |
| * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS |
| * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more |
| * details. |
| * |
| * You should have received a copy of the GNU General Public License along |
| * with Slurm; if not, write to the Free Software Foundation, Inc., |
| * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. |
| \*****************************************************************************/ |
| |
| #define _GNU_SOURCE /* Required for prlimit */ |
| |
| #include "config.h" |
| |
| #include <stdlib.h> |
| #include <string.h> |
| #include <sys/resource.h> |
| #include <sys/stat.h> |
| #include <sys/types.h> |
| #include <unistd.h> |
| |
| #include "src/common/env.h" /* For unsetenvp() */ |
| #include "src/common/log.h" |
| #include "src/common/macros.h" |
| #include "src/common/read_config.h" |
| #include "src/common/slurm_rlimits_info.h" |
| #include "src/common/strlcpy.h" |
| #include "src/common/xmalloc.h" |
| #include "src/common/xstring.h" |
| |
| #include "src/slurmd/slurmstepd/slurmstepd_job.h" |
| |
| /* |
| * Prototypes: |
| * |
| */ |
| static int _get_env_val(char **env, const char *name, unsigned long *valp, |
| bool *u_req_propagate); |
| static int _set_limit(char **env, slurm_rlimits_info_t *rli); |
| |
| /* |
| * prlimit() only exists on Linux, so on Linux simply call it. For non-Linux |
| * systems, define a function that wraps get/setrlimit() and don't expect a pid. |
| * The pid is currently only used when using pam_slurm_adopt, which is only |
| * supported on Linux. |
| */ |
| #ifdef __linux__ |
| #define _prlimit(pid, resource, new_limit, old_limit) \ |
| prlimit(pid, resource, new_limit, old_limit) |
| #else |
| static int _prlimit(pid_t pid, int resource, const struct rlimit *new_limit, |
| struct rlimit *old_limit) |
| { |
| xassert(pid == 0); |
| xassert(!(new_limit && old_limit)); |
| |
| if (new_limit) |
| return setrlimit(resource, new_limit); |
| |
| xassert(old_limit); |
| return getrlimit(resource, old_limit); |
| } |
| #endif |
| |
| /* |
| * Set user resource limits using the values of the environment variables |
| * of the name "SLURM_RLIMIT_*" that are found in step->env. |
| * |
| * The sys admin can control the propagation of user limits in the slurm |
| * conf file by setting values for the PropagateResourceRlimits and |
| * ResourceLimits keywords. |
| * |
| * NOTE: THIS FUNCTION SHOULD ONLY BE CALLED RIGHT BEFORE THE EXEC OF |
| * A SCRIPT AFTER THE FORK SO AS TO LIMIT THE ABOUT OF EFFECT THE |
| * LIMITS HAVE WHEN COMBINED WITH THE SLURMSTEPD. RLIMIT_FSIZE IS THE |
| * MAIN REASON SINCE IF THE USER SETS THIS TO BE LOWER THAN THE SIZE |
| * OF THE CURRENT SLURMD.LOG THE STEPD WILL CORE THE NEXT TIME |
| * ANYTHING IS WRITTEN TO IT. SO IF RUNNING +DEBUG2 AND THE USER IS |
| * GETTING CORES WITH FILE SYSTEM LIMIT ERRORS THIS IS THE REASON. |
| * |
| * NOTE: The slurmstepd will not normally write a core file due to setuid(). |
| * Run as normal user to disable setuid() and permit a core file to be written. |
| */ |
| |
| extern void set_user_limits(stepd_step_rec_t *step, pid_t pid) |
| { |
| #ifdef RLIMIT_AS |
| #define SLURM_RLIMIT_VSIZE RLIMIT_AS |
| #define SLURM_RLIMIT_VNAME "RLIMIT_AS" |
| #elif defined(RLIMIT_DATA) |
| /* RLIMIT_DATA is useless on many systems which provide anonymous |
| * mmap()'s in addition to brk(), use it here only as a fallback for |
| * oddball systems lacking RLIMIT_AS. */ |
| #define SLURM_RLIMIT_VSIZE RLIMIT_DATA |
| #define SLURM_RLIMIT_VNAME "RLIMIT_DATA" |
| #endif |
| slurm_rlimits_info_t *rli; |
| struct rlimit r; |
| rlim_t task_mem_bytes; |
| int rlimit_rc; |
| |
| if (_prlimit(pid, RLIMIT_CPU, NULL, &r) == 0) { |
| if (r.rlim_max != RLIM_INFINITY) { |
| error("Slurm process CPU time limit is %d seconds", |
| (int) r.rlim_max); |
| } |
| } |
| |
| for (rli = get_slurm_rlimits_info(); rli->name; rli++) |
| _set_limit(step->env, rli); |
| |
| /* Set soft and hard rss and vsize limit for this process, |
| * handle job limit (for all spawned processes) in slurmd */ |
| task_mem_bytes = step->step_mem; /* MB */ |
| task_mem_bytes *= (1024 * 1024); |
| |
| /* Many systems, Linux included, ignore RSS limits, but set it |
| * here anyway for consistency and to provide a way for |
| * applications to interrogate what the RSS limit is (with the |
| * caveat that the real RSS limit is over all job tasks on the |
| * node and not per process, but hopefully this is better than |
| * nothing). */ |
| #ifdef RLIMIT_RSS |
| rlimit_rc = _prlimit(pid, RLIMIT_RSS, NULL, &r); |
| if ((task_mem_bytes) && !rlimit_rc && (r.rlim_max > task_mem_bytes)) { |
| r.rlim_max = r.rlim_cur = task_mem_bytes; |
| if (_prlimit(pid, RLIMIT_RSS, &r, NULL)) { |
| /* Indicates that limit has already been exceeded */ |
| fatal("_prlimit(RLIMIT_RSS, %"PRIu64" MB): %m", |
| step->step_mem); |
| } else |
| debug2("Set task rss(%"PRIu64" MB)", step->step_mem); |
| if (get_log_level() >= LOG_LEVEL_DEBUG2) { |
| _prlimit(pid, RLIMIT_RSS, NULL, &r); |
| debug2("Task RSS limits from _prlimit: rlim_cur:%lu rlim_max:%lu", |
| r.rlim_cur, r.rlim_max); |
| } |
| } else if (rlimit_rc) { |
| error("_prlimit(RLIMIT_RSS,..) failed with %m"); |
| } else { |
| debug2("Not setting task rss rlimit, task bytes: %lu, rlimit_max: %lu", |
| task_mem_bytes, r.rlim_max); |
| } |
| #endif |
| |
| #ifdef SLURM_RLIMIT_VSIZE |
| rlimit_rc = _prlimit(pid, SLURM_RLIMIT_VSIZE, NULL, &r); |
| if ((task_mem_bytes) && slurm_conf.vsize_factor && !rlimit_rc && |
| (r.rlim_max > task_mem_bytes)) { |
| r.rlim_max = task_mem_bytes * (slurm_conf.vsize_factor / 100.0); |
| r.rlim_cur = r.rlim_max; |
| if (_prlimit(pid, SLURM_RLIMIT_VSIZE, &r, NULL)) { |
| /* Indicates that limit has already been exceeded */ |
| fatal("_prlimit(%s, %"PRIu64" MB): %m", |
| SLURM_RLIMIT_VNAME, step->step_mem); |
| } else |
| debug2("Set task vsize(%"PRIu64" MB)", step->step_mem); |
| if (get_log_level() >= LOG_LEVEL_DEBUG2) { |
| _prlimit(pid, SLURM_RLIMIT_VSIZE, NULL, &r); |
| debug2("task VSIZE limits: rlim_cur:%lu rlim_max:%lu", |
| r.rlim_cur, r.rlim_max); |
| } |
| } else if (rlimit_rc) { |
| error("_prlimit(SLURM_RLIMIT_VSIZE,,..) failed with %m"); |
| } else { |
| debug2("Not setting task vsize rlimit, task bytes: %lu, rlimit_max: %lu", |
| task_mem_bytes, r.rlim_max); |
| } |
| #endif |
| } |
| |
| /* |
| * Return an rlimit as a string suitable for printing. |
| */ |
| static char * rlim_to_string (unsigned long rlim, char *buf, size_t n) |
| { |
| if (rlim == (unsigned long) RLIM_INFINITY) |
| strlcpy (buf, "inf", n); |
| else |
| snprintf (buf, n, "%lu", rlim); |
| return (buf); |
| } |
| |
| /* Set umask using value of env var SLURM_UMASK */ |
| extern int |
| set_umask(stepd_step_rec_t *step) |
| { |
| mode_t mask; |
| char *val; |
| |
| if (!(val = getenvp(step->env, "SLURM_UMASK"))) { |
| if (step->step_id.step_id != SLURM_EXTERN_CONT) |
| debug("Couldn't find SLURM_UMASK in environment"); |
| return SLURM_ERROR; |
| } |
| |
| mask = strtol(val, (char **)NULL, 8); |
| if ((step->step_id.step_id == SLURM_EXTERN_CONT) || |
| (step->step_id.step_id == SLURM_INTERACTIVE_STEP) || |
| (step->step_id.step_id == SLURM_BATCH_SCRIPT)) |
| unsetenvp(step->env, "SLURM_UMASK"); |
| umask(mask); |
| return SLURM_SUCCESS; |
| } |
| |
| /* |
| * Set rlimit using value of env vars such as SLURM_RLIMIT_FSIZE if |
| * the slurm config file has PropagateResourceLimits set or the user |
| * requested it with srun/sbatch --propagate. |
| * |
| * NOTE: THIS FUNCTION SHOULD ONLY BE CALLED RIGHT BEFORE THE EXEC OF |
| * A SCRIPT AFTER THE FORK SO AS TO LIMIT THE ABOUT OF EFFECT THE |
| * LIMITS HAVE WHEN COMBINED WITH THE SLURMSTEPD. RLIMIT_FSIZE IS THE |
| * MAIN REASON SINCE IF THE USER SETS THIS TO BE LOWER THAN THE SIZE |
| * OF THE CURRENT SLURMD.LOG THE STEPD WILL CORE THE NEXT TIME |
| * ANYTHING IS WRITTEN TO IT. SO IF RUNNING +DEBUG2 AND THE USER IS |
| * GETTING CORES WITH FILE SYSTEM LIMIT ERRORS THIS IS THE REASON. |
| */ |
| static int |
| _set_limit(char **env, slurm_rlimits_info_t *rli) |
| { |
| unsigned long env_value; |
| char max[24], cur[24], req[24]; |
| struct rlimit r; |
| bool u_req_propagate; /* e.g. true if 'srun --propagate' */ |
| char *env_name = NULL, *rlimit_name; |
| int rc = SLURM_SUCCESS; |
| |
| xstrfmtcat(env_name, "SLURM_RLIMIT_%s", rli->name); |
| rlimit_name = xstrdup(env_name + 6); |
| if (_get_env_val(env, env_name, &env_value, &u_req_propagate)) { |
| debug("Couldn't find %s in environment", env_name); |
| xfree(env_name); |
| return SLURM_ERROR; |
| } |
| |
| /* |
| * Users shouldn't get the SLURM_RLIMIT_* env vars in their environ |
| */ |
| unsetenvp(env, env_name); |
| xfree(env_name); |
| |
| /* |
| * We'll only attempt to set the propagated soft rlimit when indicated |
| * by the slurm conf file settings, or the user requested it. |
| */ |
| if ( ! (rli->propagate_flag == PROPAGATE_RLIMITS || u_req_propagate)) |
| goto cleanup; |
| |
| if (getrlimit( rli->resource, &r ) < 0) { |
| error("getrlimit(%s): %m", rlimit_name); |
| rc = SLURM_ERROR; |
| goto cleanup; |
| } |
| |
| /* |
| * Nothing to do if the rlimit won't change |
| */ |
| if (r.rlim_cur == (rlim_t) env_value) { |
| debug2( "_set_limit: %s setrlimit %s no change in value: %lu", |
| u_req_propagate?"user":"conf", rlimit_name, |
| (unsigned long) r.rlim_cur); |
| goto cleanup; |
| } |
| |
| debug2("_set_limit: %-14s: max:%s cur:%s req:%s", rlimit_name, |
| rlim_to_string (r.rlim_max, max, sizeof (max)), |
| rlim_to_string (r.rlim_cur, cur, sizeof (cur)), |
| rlim_to_string (env_value, req, sizeof (req)) ); |
| |
| r.rlim_cur = (rlim_t) env_value; |
| if ((!u_req_propagate) && (r.rlim_cur > r.rlim_max)) { |
| verbose("%s: %-14s: reducing req:%s to max:%s", |
| __func__, |
| rlimit_name, |
| rlim_to_string(env_value, req, sizeof(req)), |
| rlim_to_string(r.rlim_max, max, sizeof(max))); |
| |
| r.rlim_cur = r.rlim_max; |
| } |
| |
| if (setrlimit( rli->resource, &r ) < 0) { |
| /* |
| * Report an error only if the user requested propagate |
| */ |
| if (u_req_propagate) { |
| error( "Can't propagate %s of %s from submit host: %m", |
| rlimit_name, |
| r.rlim_cur == RLIM_INFINITY ? "'unlimited'" : |
| rlim_to_string( r.rlim_cur, cur, sizeof(cur))); |
| } else { |
| verbose("Can't propagate %s of %s from submit host: %m", |
| rlimit_name, |
| r.rlim_cur == RLIM_INFINITY ? "'unlimited'" : |
| rlim_to_string( r.rlim_cur, cur, sizeof(cur))); |
| } |
| rc = SLURM_ERROR; |
| goto cleanup; |
| } |
| debug2( "_set_limit: %s setrlimit %s succeeded", |
| u_req_propagate?"user":"conf", rlimit_name ); |
| |
| cleanup: |
| xfree(rlimit_name); |
| return rc; |
| } |
| |
| /* |
| * Determine the value of the env var 'name' (if it exists) and whether |
| * or not the user wants to use its value as the jobs soft rlimit. |
| */ |
| static int _get_env_val(char **env, const char *name, unsigned long *valp, |
| bool *u_req_propagate) |
| { |
| char *val = NULL; |
| char *p = NULL; |
| |
| xassert(env != NULL); |
| xassert(name != NULL); |
| |
| if (!(val = getenvp(env, name))) |
| return (-1); |
| |
| /* |
| * The letter 'U' would have been prepended to the string value if the |
| * user requested to have this rlimit propagated via 'srun --propagate' |
| */ |
| if (*val == 'U') { |
| *u_req_propagate = true; |
| debug2( "_get_env_val: %s propagated by user option", &name[6]); |
| val++; |
| } |
| else |
| *u_req_propagate = false; |
| |
| *valp = strtoul(val, &p, 10); |
| |
| if (p && (*p != '\0')) { |
| error("Invalid %s env var, value = `%s'", name, val); |
| return (-1); |
| } |
| |
| return (0); |
| } |