| /*****************************************************************************\ |
| * cgroup.c - driver for cgroup plugin |
| ***************************************************************************** |
| * Copyright (C) SchedMD LLC. |
| * |
| * This file is part of Slurm, a resource management program. |
| * For details, see <https://slurm.schedmd.com/>. |
| * Please also read the included file: DISCLAIMER. |
| * |
| * Slurm is free software; you can redistribute it and/or modify it under |
| * the terms of the GNU General Public License as published by the Free |
| * Software Foundation; either version 2 of the License, or (at your option) |
| * any later version. |
| * |
| * In addition, as a special exception, the copyright holders give permission |
| * to link the code of portions of this program with the OpenSSL library under |
| * certain conditions as described in each individual source file, and |
| * distribute linked combinations including the two. You must obey the GNU |
| * General Public License in all respects for all of the code used other than |
| * OpenSSL. If you modify file(s) with this exception, you may extend this |
| * exception to your version of the file(s), but you are not obligated to do |
| * so. If you do not wish to do so, delete this exception statement from your |
| * version. If you delete this exception statement from all source files in |
| * the program, then also delete it here. |
| * |
| * Slurm is distributed in the hope that it will be useful, but WITHOUT ANY |
| * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS |
| * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more |
| * details. |
| * |
| * You should have received a copy of the GNU General Public License along |
| * with Slurm; if not, write to the Free Software Foundation, Inc., |
| * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. |
| \*****************************************************************************/ |
| |
| #include "src/interfaces/cgroup.h" |
| |
| /* Define slurm-specific aliases for use by plugins, see slurm_xlator.h. */ |
| strong_alias(cgroup_conf_init, slurm_cgroup_conf_init); |
| strong_alias(cgroup_conf_destroy, slurm_cgroup_conf_destroy); |
| strong_alias(autodetect_cgroup_version, slurm_autodetect_cgroup_version); |
| |
| #define DEFAULT_CGROUP_BASEDIR "/sys/fs/cgroup" |
| #define DEFAULT_CGROUP_PLUGIN "autodetect" |
| |
| /* Symbols provided by the plugin */ |
| typedef struct { |
| int (*initialize) (cgroup_ctl_type_t sub); |
| int (*system_create) (cgroup_ctl_type_t sub); |
| int (*system_addto) (cgroup_ctl_type_t sub, pid_t *pids, |
| int npids); |
| int (*system_destroy) (cgroup_ctl_type_t sub); |
| int (*step_create) (cgroup_ctl_type_t sub, |
| stepd_step_rec_t *step); |
| int (*step_addto) (cgroup_ctl_type_t sub, pid_t *pids, |
| int npids); |
| int (*step_get_pids) (pid_t **pids, int *npids); |
| int (*step_suspend) (void); |
| int (*step_resume) (void); |
| int (*step_destroy) (cgroup_ctl_type_t sub); |
| bool (*has_pid) (pid_t pid); |
| cgroup_limits_t *(*constrain_get) (cgroup_ctl_type_t sub, |
| cgroup_level_t level); |
| int (*constrain_set) (cgroup_ctl_type_t sub, |
| cgroup_level_t level, |
| cgroup_limits_t *limits); |
| int (*constrain_apply) (cgroup_ctl_type_t sub, |
| cgroup_level_t level, |
| uint32_t task_id); |
| int (*step_start_oom_mgr) (stepd_step_rec_t *step); |
| cgroup_oom_t *(*step_stop_oom_mgr) (stepd_step_rec_t *step); |
| int (*task_addto) (cgroup_ctl_type_t sub, |
| stepd_step_rec_t *step, pid_t pid, |
| uint32_t task_id); |
| cgroup_acct_t *(*task_get_acct_data) (uint32_t taskid); |
| long int (*get_acct_units) (void); |
| bool (*has_feature) (cgroup_ctl_feature_t f); |
| char *(*get_scope_path)(void); |
| int (*setup_scope)(char *scope_path); |
| int (*signal)(int signal); |
| char *(*get_task_empty_event_path)(uint32_t taskid, bool *on_modify); |
| int (*is_task_empty)(uint32_t taskid); |
| } slurm_ops_t; |
| |
| /* |
| * These strings must be kept in the same order as the fields |
| * declared for slurm_ops_t. |
| */ |
| static const char *syms[] = { |
| "cgroup_p_initialize", |
| "cgroup_p_system_create", |
| "cgroup_p_system_addto", |
| "cgroup_p_system_destroy", |
| "cgroup_p_step_create", |
| "cgroup_p_step_addto", |
| "cgroup_p_step_get_pids", |
| "cgroup_p_step_suspend", |
| "cgroup_p_step_resume", |
| "cgroup_p_step_destroy", |
| "cgroup_p_has_pid", |
| "cgroup_p_constrain_get", |
| "cgroup_p_constrain_set", |
| "cgroup_p_constrain_apply", |
| "cgroup_p_step_start_oom_mgr", |
| "cgroup_p_step_stop_oom_mgr", |
| "cgroup_p_task_addto", |
| "cgroup_p_task_get_acct_data", |
| "cgroup_p_get_acct_units", |
| "cgroup_p_has_feature", |
| "cgroup_p_get_scope_path", |
| "cgroup_p_setup_scope", |
| "cgroup_p_signal", |
| "cgroup_p_get_task_empty_event_path", |
| "cgroup_p_is_task_empty", |
| }; |
| |
| /* Local variables */ |
| static slurm_ops_t ops; |
| static plugin_context_t *g_context = NULL; |
| static pthread_mutex_t g_context_lock = PTHREAD_MUTEX_INITIALIZER; |
| static plugin_init_t plugin_inited = PLUGIN_NOT_INITED; |
| |
| cgroup_conf_t slurm_cgroup_conf; |
| |
| static pthread_rwlock_t cg_conf_lock = PTHREAD_RWLOCK_INITIALIZER; |
| static buf_t *cg_conf_buf = NULL; |
| static bool cg_conf_inited = false; |
| static bool cg_conf_exist = true; |
| |
| static char scope_path[PATH_MAX] = ""; |
| |
| /* local functions */ |
| static void _cgroup_conf_fini(); |
| static void _clear_slurm_cgroup_conf(); |
| static void _pack_cgroup_conf(buf_t *buffer); |
| static int _unpack_cgroup_conf(buf_t *buffer); |
| static void _read_slurm_cgroup_conf(void); |
| |
| /* Local functions */ |
| static int _defunct_option(void **dest, slurm_parser_enum_t type, |
| const char *key, const char *value, |
| const char *line, char **leftover) |
| { |
| error_in_daemon("The option \"%s\" is defunct, please remove it from cgroup.conf.", |
| key); |
| return 0; |
| } |
| |
| static void _cgroup_conf_fini() |
| { |
| slurm_rwlock_wrlock(&cg_conf_lock); |
| |
| _clear_slurm_cgroup_conf(); |
| cg_conf_inited = false; |
| FREE_NULL_BUFFER(cg_conf_buf); |
| |
| slurm_rwlock_unlock(&cg_conf_lock); |
| } |
| |
| static void _clear_slurm_cgroup_conf(void) |
| { |
| xfree(slurm_cgroup_conf.cgroup_mountpoint); |
| xfree(slurm_cgroup_conf.cgroup_plugin); |
| xfree(slurm_cgroup_conf.cgroup_prepend); |
| xfree(slurm_cgroup_conf.enable_extra_controllers); |
| |
| memset(&slurm_cgroup_conf, 0, sizeof(slurm_cgroup_conf)); |
| } |
| |
| static void _init_slurm_cgroup_conf(void) |
| { |
| _clear_slurm_cgroup_conf(); |
| |
| slurm_cgroup_conf.allowed_ram_space = 100; |
| slurm_cgroup_conf.allowed_swap_space = 0; |
| slurm_cgroup_conf.cgroup_mountpoint = xstrdup(DEFAULT_CGROUP_BASEDIR); |
| slurm_cgroup_conf.cgroup_plugin = xstrdup(DEFAULT_CGROUP_PLUGIN); |
| #ifndef MULTIPLE_SLURMD |
| slurm_cgroup_conf.cgroup_prepend = xstrdup("/slurm"); |
| #else |
| slurm_cgroup_conf.cgroup_prepend = xstrdup("/slurm_%n"); |
| #endif |
| slurm_cgroup_conf.constrain_cores = false; |
| slurm_cgroup_conf.constrain_devices = false; |
| slurm_cgroup_conf.constrain_ram_space = false; |
| slurm_cgroup_conf.constrain_swap_space = false; |
| slurm_cgroup_conf.enable_controllers = false; |
| slurm_cgroup_conf.enable_extra_controllers = NULL; |
| slurm_cgroup_conf.ignore_systemd = false; |
| slurm_cgroup_conf.ignore_systemd_on_failure = false; |
| slurm_cgroup_conf.max_ram_percent = 100; |
| slurm_cgroup_conf.max_swap_percent = 100; |
| slurm_cgroup_conf.memory_swappiness = NO_VAL64; |
| slurm_cgroup_conf.min_ram_space = XCGROUP_DEFAULT_MIN_RAM; |
| slurm_cgroup_conf.signal_children_processes = false; |
| slurm_cgroup_conf.systemd_timeout = 1000; |
| } |
| |
| static void _pack_cgroup_conf(buf_t *buffer) |
| { |
| /* |
| * No protocol version needed, at the time of writing we are only |
| * sending at slurmstepd startup. |
| */ |
| |
| if (!cg_conf_exist) { |
| packbool(0, buffer); |
| return; |
| } |
| packbool(1, buffer); |
| packstr(slurm_cgroup_conf.cgroup_mountpoint, buffer); |
| |
| packstr(slurm_cgroup_conf.cgroup_prepend, buffer); |
| |
| packbool(slurm_cgroup_conf.constrain_cores, buffer); |
| |
| packbool(slurm_cgroup_conf.constrain_ram_space, buffer); |
| packfloat(slurm_cgroup_conf.allowed_ram_space, buffer); |
| packfloat(slurm_cgroup_conf.max_ram_percent, buffer); |
| |
| pack64(slurm_cgroup_conf.min_ram_space, buffer); |
| |
| packbool(slurm_cgroup_conf.constrain_swap_space, buffer); |
| packfloat(slurm_cgroup_conf.allowed_swap_space, buffer); |
| packfloat(slurm_cgroup_conf.max_swap_percent, buffer); |
| pack64(slurm_cgroup_conf.memory_swappiness, buffer); |
| |
| packbool(slurm_cgroup_conf.constrain_devices, buffer); |
| packstr(slurm_cgroup_conf.cgroup_plugin, buffer); |
| |
| packbool(slurm_cgroup_conf.ignore_systemd, buffer); |
| packbool(slurm_cgroup_conf.ignore_systemd_on_failure, buffer); |
| |
| packbool(slurm_cgroup_conf.enable_controllers, buffer); |
| packstr(slurm_cgroup_conf.enable_extra_controllers, buffer); |
| packbool(slurm_cgroup_conf.signal_children_processes, buffer); |
| pack64(slurm_cgroup_conf.systemd_timeout, buffer); |
| } |
| |
| static int _unpack_cgroup_conf(buf_t *buffer) |
| { |
| bool tmpbool = false; |
| /* |
| * No protocol version needed, at the time of writing we are only |
| * reading on slurmstepd startup. |
| */ |
| safe_unpackbool(&tmpbool, buffer); |
| if (!tmpbool) { |
| cg_conf_exist = false; |
| return SLURM_SUCCESS; |
| } |
| |
| _clear_slurm_cgroup_conf(); |
| |
| safe_unpackstr(&slurm_cgroup_conf.cgroup_mountpoint, buffer); |
| |
| safe_unpackstr(&slurm_cgroup_conf.cgroup_prepend, buffer); |
| |
| safe_unpackbool(&slurm_cgroup_conf.constrain_cores, buffer); |
| |
| safe_unpackbool(&slurm_cgroup_conf.constrain_ram_space, buffer); |
| safe_unpackfloat(&slurm_cgroup_conf.allowed_ram_space, buffer); |
| safe_unpackfloat(&slurm_cgroup_conf.max_ram_percent, buffer); |
| |
| safe_unpack64(&slurm_cgroup_conf.min_ram_space, buffer); |
| |
| safe_unpackbool(&slurm_cgroup_conf.constrain_swap_space, buffer); |
| safe_unpackfloat(&slurm_cgroup_conf.allowed_swap_space, buffer); |
| safe_unpackfloat(&slurm_cgroup_conf.max_swap_percent, buffer); |
| safe_unpack64(&slurm_cgroup_conf.memory_swappiness, buffer); |
| |
| safe_unpackbool(&slurm_cgroup_conf.constrain_devices, buffer); |
| safe_unpackstr(&slurm_cgroup_conf.cgroup_plugin, buffer); |
| |
| safe_unpackbool(&slurm_cgroup_conf.ignore_systemd, buffer); |
| safe_unpackbool(&slurm_cgroup_conf.ignore_systemd_on_failure, buffer); |
| |
| safe_unpackbool(&slurm_cgroup_conf.enable_controllers, buffer); |
| safe_unpackstr(&slurm_cgroup_conf.enable_extra_controllers, buffer); |
| safe_unpackbool(&slurm_cgroup_conf.signal_children_processes, buffer); |
| safe_unpack64(&slurm_cgroup_conf.systemd_timeout, buffer); |
| |
| return SLURM_SUCCESS; |
| |
| unpack_error: |
| _clear_slurm_cgroup_conf(); |
| |
| return SLURM_ERROR; |
| } |
| |
| /* |
| * read_slurm_cgroup_conf - load the Slurm cgroup configuration from the |
| * cgroup.conf file. |
| */ |
| static void _read_slurm_cgroup_conf(void) |
| { |
| s_p_options_t options[] = { |
| {"CgroupAutomount", S_P_BOOLEAN, _defunct_option}, |
| {"CgroupMountpoint", S_P_STRING}, |
| {"CgroupReleaseAgentDir", S_P_STRING}, |
| {"ConstrainCores", S_P_BOOLEAN}, |
| {"ConstrainRAMSpace", S_P_BOOLEAN}, |
| {"AllowedRAMSpace", S_P_FLOAT}, |
| {"MaxRAMPercent", S_P_FLOAT}, |
| {"MinRAMSpace", S_P_UINT64}, |
| {"ConstrainSwapSpace", S_P_BOOLEAN}, |
| {"AllowedSwapSpace", S_P_FLOAT}, |
| {"MaxSwapPercent", S_P_FLOAT}, |
| {"MemoryLimitEnforcement", S_P_BOOLEAN}, |
| {"MemoryLimitThreshold", S_P_FLOAT}, |
| {"ConstrainDevices", S_P_BOOLEAN}, |
| {"AllowedDevicesFile", S_P_STRING}, |
| {"MemorySwappiness", S_P_UINT64}, |
| {"CgroupPlugin", S_P_STRING}, |
| {"IgnoreSystemd", S_P_BOOLEAN}, |
| {"IgnoreSystemdOnFailure", S_P_BOOLEAN}, |
| {"EnableControllers", S_P_BOOLEAN}, |
| {"EnableExtraControllers", S_P_STRING}, |
| {"SignalChildrenProcesses", S_P_BOOLEAN}, |
| {"SystemdTimeout", S_P_UINT64}, |
| {NULL} }; |
| s_p_hashtbl_t *tbl = NULL; |
| char *conf_path = NULL, *tmp_str; |
| struct stat buf; |
| size_t sz; |
| |
| /* Get the cgroup.conf path and validate the file */ |
| conf_path = get_extra_conf_path("cgroup.conf"); |
| if ((conf_path == NULL) || (stat(conf_path, &buf) == -1)) { |
| info("%s: No cgroup.conf file (%s), using defaults", |
| __func__, conf_path); |
| cg_conf_exist = false; |
| } else { |
| debug("Reading cgroup.conf file %s", conf_path); |
| |
| tbl = s_p_hashtbl_create(options); |
| if (s_p_parse_file(tbl, NULL, conf_path, 0, NULL) == |
| SLURM_ERROR) { |
| fatal("Could not open/read/parse cgroup.conf file %s", |
| conf_path); |
| } |
| |
| /* cgroup initialization parameters */ |
| if (s_p_get_string(&tmp_str, "CgroupMountpoint", tbl)) { |
| /* Remove the trailing / if any. */ |
| sz = strlen(tmp_str); |
| if (*(tmp_str + sz - 1) == '/') |
| *(tmp_str + sz - 1) = '\0'; |
| xfree(slurm_cgroup_conf.cgroup_mountpoint); |
| slurm_cgroup_conf.cgroup_mountpoint = tmp_str; |
| tmp_str = NULL; |
| } |
| if (s_p_get_string(&tmp_str, "CgroupReleaseAgentDir", tbl)) { |
| xfree(tmp_str); |
| fatal("Support for CgroupReleaseAgentDir option has been removed."); |
| } |
| |
| /* Cores constraints related conf items */ |
| (void) s_p_get_boolean(&slurm_cgroup_conf.constrain_cores, |
| "ConstrainCores", tbl); |
| |
| /* RAM and Swap constraints related conf items */ |
| (void) s_p_get_boolean(&slurm_cgroup_conf.constrain_ram_space, |
| "ConstrainRAMSpace", tbl); |
| |
| (void) s_p_get_float(&slurm_cgroup_conf.allowed_ram_space, |
| "AllowedRAMSpace", tbl); |
| |
| (void) s_p_get_float(&slurm_cgroup_conf.max_ram_percent, |
| "MaxRAMPercent", tbl); |
| |
| (void) s_p_get_boolean(&slurm_cgroup_conf.constrain_swap_space, |
| "ConstrainSwapSpace", tbl); |
| |
| (void) s_p_get_float(&slurm_cgroup_conf.allowed_swap_space, |
| "AllowedSwapSpace", tbl); |
| |
| (void) s_p_get_float(&slurm_cgroup_conf.max_swap_percent, |
| "MaxSwapPercent", tbl); |
| |
| (void) s_p_get_uint64 (&slurm_cgroup_conf.min_ram_space, |
| "MinRAMSpace", tbl); |
| |
| if (s_p_get_uint64(&slurm_cgroup_conf.memory_swappiness, |
| "MemorySwappiness", tbl)) { |
| if (slurm_cgroup_conf.memory_swappiness > 100) { |
| error("Value for MemorySwappiness is too high, rounding down to 100."); |
| slurm_cgroup_conf.memory_swappiness = 100; |
| } |
| } |
| |
| /* Devices constraint related conf items */ |
| (void) s_p_get_boolean(&slurm_cgroup_conf.constrain_devices, |
| "ConstrainDevices", tbl); |
| |
| if (s_p_get_string(&tmp_str, "AllowedDevicesFile", tbl)) { |
| xfree(tmp_str); |
| warning("AllowedDevicesFile option is obsolete, please remove it from your configuration."); |
| } |
| |
| if (s_p_get_string(&tmp_str, "CgroupPlugin", tbl)) { |
| xfree(slurm_cgroup_conf.cgroup_plugin); |
| slurm_cgroup_conf.cgroup_plugin = tmp_str; |
| tmp_str = NULL; |
| } |
| |
| if (s_p_get_boolean(&slurm_cgroup_conf.ignore_systemd, |
| "IgnoreSystemd", tbl)) { |
| /* Implicitly set these other one. */ |
| slurm_cgroup_conf.ignore_systemd_on_failure = true; |
| } |
| |
| if (!slurm_cgroup_conf.ignore_systemd && |
| (!s_p_get_boolean( |
| &slurm_cgroup_conf.ignore_systemd_on_failure, |
| "IgnoreSystemdOnFailure", tbl))) |
| slurm_cgroup_conf.ignore_systemd_on_failure = false; |
| |
| (void) s_p_get_boolean(&slurm_cgroup_conf.enable_controllers, |
| "EnableControllers", tbl); |
| if (s_p_get_string(&tmp_str, "EnableExtraControllers", tbl)) { |
| xfree(slurm_cgroup_conf.enable_extra_controllers); |
| slurm_cgroup_conf.enable_extra_controllers = tmp_str; |
| tmp_str = NULL; |
| } |
| (void) s_p_get_boolean( |
| &slurm_cgroup_conf.signal_children_processes, |
| "SignalChildrenProcesses", tbl); |
| |
| (void) s_p_get_uint64(&slurm_cgroup_conf.systemd_timeout, |
| "SystemdTimeout", tbl); |
| |
| s_p_hashtbl_destroy(tbl); |
| } |
| |
| xfree(conf_path); |
| |
| return; |
| } |
| |
| /* Autodetect logic inspired from systemd source code */ |
| extern char *autodetect_cgroup_version(void) |
| { |
| #ifdef WITH_CGROUP |
| struct statfs fs; |
| int cgroup_ver = -1; |
| |
| if (statfs("/sys/fs/cgroup/", &fs) < 0) { |
| error("cgroup filesystem not mounted in /sys/fs/cgroup/"); |
| return NULL; |
| } |
| |
| if (F_TYPE_EQUAL(fs.f_type, CGROUP2_SUPER_MAGIC)) |
| cgroup_ver = 2; |
| else if (F_TYPE_EQUAL(fs.f_type, TMPFS_MAGIC)) { |
| if (statfs("/sys/fs/cgroup/systemd/", &fs) != 0) { |
| error("can't stat /sys/fs/cgroup/systemd/: %m"); |
| return NULL; |
| } |
| |
| if (F_TYPE_EQUAL(fs.f_type, CGROUP2_SUPER_MAGIC)) { |
| if (statfs("/sys/fs/cgroup/unified/", &fs) != 0) { |
| error("can't stat /sys/fs/cgroup/unified/: %m"); |
| return NULL; |
| } |
| cgroup_ver = 2; |
| } else if (F_TYPE_EQUAL(fs.f_type, CGROUP_SUPER_MAGIC)) { |
| cgroup_ver = 1; |
| } else { |
| error("Unexpected fs type on /sys/fs/cgroup/systemd"); |
| return NULL; |
| } |
| } else if (F_TYPE_EQUAL(fs.f_type, SYSFS_MAGIC)) { |
| error("No filesystem mounted on /sys/fs/cgroup"); |
| return NULL; |
| } else { |
| error("Unknown filesystem type mounted on /sys/fs/cgroup"); |
| return NULL; |
| } |
| |
| log_flag(CGROUP, "%s: using cgroup version %d", __func__, cgroup_ver); |
| |
| switch (cgroup_ver) { |
| case 1: |
| return "cgroup/v1"; |
| break; |
| case 2: |
| return "cgroup/v2"; |
| break; |
| default: |
| error("unsupported cgroup version %d", cgroup_ver); |
| break; |
| } |
| #endif |
| |
| return NULL; |
| } |
| |
| /* |
| * cgroup_conf_init - load the cgroup.conf configuration. |
| * |
| * RET SLURM_SUCCESS if conf file is initialized. If the cgroup conf was |
| * already initialized, return SLURM_ERROR. |
| */ |
| extern int cgroup_conf_init(void) |
| { |
| int rc = SLURM_SUCCESS; |
| |
| slurm_rwlock_wrlock(&cg_conf_lock); |
| |
| if (!cg_conf_inited) { |
| _init_slurm_cgroup_conf(); |
| _read_slurm_cgroup_conf(); |
| if (running_in_slurmd()) { |
| /* |
| * Initialize and pack cgroup.conf info into a buffer |
| * that can be used by slurmd to send to stepd every |
| * time, instead of re-packing every time we want to |
| * send to slurmstepd |
| */ |
| cg_conf_buf = init_buf(0); |
| _pack_cgroup_conf(cg_conf_buf); |
| } |
| cg_conf_inited = true; |
| } else |
| rc = SLURM_ERROR; |
| |
| slurm_rwlock_unlock(&cg_conf_lock); |
| return rc; |
| } |
| |
| extern void cgroup_conf_destroy(void) |
| { |
| xassert(cg_conf_inited); |
| _cgroup_conf_fini(); |
| } |
| |
| extern void cgroup_free_limits(cgroup_limits_t *limits) |
| { |
| if (!limits) |
| return; |
| |
| xfree(limits->allow_cores); |
| xfree(limits->allow_mems); |
| xfree(limits); |
| } |
| |
| extern void cgroup_init_limits(cgroup_limits_t *limits) |
| { |
| if (!limits) |
| return; |
| |
| memset(limits, 0, sizeof(*limits)); |
| |
| limits->taskid = NO_VAL; |
| limits->device.type = DEV_TYPE_NONE; |
| limits->device.major = NO_VAL; |
| limits->device.minor = NO_VAL; |
| limits->limit_in_bytes = NO_VAL64; |
| limits->soft_limit_in_bytes = NO_VAL64; |
| limits->memsw_limit_in_bytes = NO_VAL64; |
| limits->swappiness = NO_VAL64; |
| } |
| |
| /* |
| * get_slurm_cgroup_conf - load the Slurm cgroup configuration from the |
| * cgroup.conf file and return a key pair <name,value> ordered list. |
| * RET List with cgroup.conf <name,value> pairs if no error, NULL otherwise. |
| */ |
| extern list_t *cgroup_get_conf_list(void) |
| { |
| list_t *cgroup_conf_l; |
| cgroup_conf_t *cg_conf = &slurm_cgroup_conf; |
| |
| xassert(cg_conf_inited); |
| |
| cgroup_conf_l = list_create(destroy_config_key_pair); |
| |
| slurm_rwlock_rdlock(&cg_conf_lock); |
| |
| add_key_pair(cgroup_conf_l, "CgroupMountpoint", "%s", |
| cg_conf->cgroup_mountpoint); |
| add_key_pair_bool(cgroup_conf_l, "ConstrainCores", |
| cg_conf->constrain_cores); |
| add_key_pair_bool(cgroup_conf_l, "ConstrainRAMSpace", |
| cg_conf->constrain_ram_space); |
| add_key_pair(cgroup_conf_l, "AllowedRAMSpace", "%.1f%%", |
| cg_conf->allowed_ram_space); |
| add_key_pair(cgroup_conf_l, "MaxRAMPercent", "%.1f%%", |
| cg_conf->max_ram_percent); |
| add_key_pair(cgroup_conf_l, "MinRAMSpace", "%"PRIu64"MB", |
| cg_conf->min_ram_space); |
| add_key_pair_bool(cgroup_conf_l, "ConstrainSwapSpace", |
| cg_conf->constrain_swap_space); |
| add_key_pair(cgroup_conf_l, "AllowedSwapSpace", "%.1f%%", |
| cg_conf->allowed_swap_space); |
| add_key_pair(cgroup_conf_l, "MaxSwapPercent", "%.1f%%", |
| cg_conf->max_swap_percent); |
| add_key_pair_bool(cgroup_conf_l, "ConstrainDevices", |
| cg_conf->constrain_devices); |
| add_key_pair(cgroup_conf_l, "CgroupPlugin", "%s", |
| cg_conf->cgroup_plugin); |
| add_key_pair_bool(cgroup_conf_l, "IgnoreSystemd", |
| cg_conf->ignore_systemd); |
| add_key_pair_bool(cgroup_conf_l, "IgnoreSystemdOnFailure", |
| cg_conf->ignore_systemd_on_failure); |
| add_key_pair_bool(cgroup_conf_l, "EnableControllers", |
| cg_conf->enable_controllers); |
| add_key_pair(cgroup_conf_l, "EnableExtraControllers", "%s", |
| cg_conf->enable_extra_controllers); |
| |
| if (cg_conf->memory_swappiness != NO_VAL64) |
| add_key_pair(cgroup_conf_l, "MemorySwappiness", "%"PRIu64, |
| cg_conf->memory_swappiness); |
| else |
| add_key_pair(cgroup_conf_l, "MemorySwappiness", "(null)"); |
| |
| add_key_pair(cgroup_conf_l, "SystemdTimeout", "%"PRIu64" ms", |
| cg_conf->systemd_timeout); |
| |
| slurm_rwlock_unlock(&cg_conf_lock); |
| |
| list_sort(cgroup_conf_l, (ListCmpF) sort_key_pairs); |
| |
| return cgroup_conf_l; |
| } |
| |
| /* |
| * This function is called from slurmd to send the cgroup state (at present |
| * only the scope path in cgroup/v2) to the recently forked slurmstepd, since |
| * slurmstepd might not be able to infer the correct scope path when we are |
| * running into a container. |
| */ |
| extern int cgroup_write_state(int fd) |
| { |
| int len = 0; |
| char *step_path = NULL; |
| |
| if (plugin_inited == PLUGIN_INITED) { |
| step_path = (*(ops.get_scope_path))(); |
| if (step_path) |
| len = strlen(step_path) + 1; |
| } |
| |
| safe_write(fd, &len, sizeof(int)); |
| if (step_path) |
| safe_write(fd, step_path, len); |
| |
| return SLURM_SUCCESS; |
| rwfail: |
| return SLURM_ERROR; |
| } |
| |
| /* |
| * This function is called from slurmstepd before the cgroup plugin is |
| * initialized. It records the cgroup plugin state passed from slurmd |
| * (at present only the scope path in cgroup/v2) in this slurmstepd so it |
| * can be later used by the plugin when it is initialized. |
| */ |
| extern int cgroup_read_state(int fd) |
| { |
| int len; |
| |
| safe_read(fd, &len, sizeof(int)); |
| |
| if (len) |
| safe_read(fd, scope_path, len); |
| |
| return SLURM_SUCCESS; |
| rwfail: |
| return SLURM_ERROR; |
| } |
| |
| extern int cgroup_write_conf(int fd) |
| { |
| int len; |
| |
| xassert(cg_conf_inited); |
| |
| slurm_rwlock_rdlock(&cg_conf_lock); |
| len = get_buf_offset(cg_conf_buf); |
| safe_write(fd, &len, sizeof(int)); |
| safe_write(fd, get_buf_data(cg_conf_buf), len); |
| slurm_rwlock_unlock(&cg_conf_lock); |
| |
| return SLURM_SUCCESS; |
| rwfail: |
| slurm_rwlock_unlock(&cg_conf_lock); |
| return SLURM_ERROR; |
| } |
| |
| extern int cgroup_read_conf(int fd) |
| { |
| int len, rc; |
| buf_t *buffer = NULL; |
| |
| slurm_rwlock_wrlock(&cg_conf_lock); |
| |
| safe_read(fd, &len, sizeof(int)); |
| buffer = init_buf(len); |
| safe_read(fd, buffer->head, len); |
| |
| rc = _unpack_cgroup_conf(buffer); |
| |
| if (rc == SLURM_ERROR) |
| fatal("%s: problem with unpack of cgroup.conf", __func__); |
| |
| FREE_NULL_BUFFER(buffer); |
| |
| cg_conf_inited = true; |
| slurm_rwlock_unlock(&cg_conf_lock); |
| |
| return SLURM_SUCCESS; |
| rwfail: |
| slurm_rwlock_unlock(&cg_conf_lock); |
| FREE_NULL_BUFFER(buffer); |
| |
| return SLURM_ERROR; |
| } |
| |
| extern bool cgroup_memcg_job_confinement(void) |
| { |
| bool status = false; |
| |
| xassert(cg_conf_inited); |
| |
| /* read cgroup configuration */ |
| slurm_rwlock_rdlock(&cg_conf_lock); |
| |
| if (xstrcmp(slurm_cgroup_conf.cgroup_plugin, "disabled") && |
| ((slurm_cgroup_conf.constrain_ram_space || |
| slurm_cgroup_conf.constrain_swap_space) && |
| xstrstr(slurm_conf.task_plugin, "cgroup"))) |
| status = true; |
| |
| slurm_rwlock_unlock(&cg_conf_lock); |
| |
| return status; |
| } |
| |
| /* |
| * Initialize Cgroup plugins. |
| * |
| * Returns a Slurm errno. |
| */ |
| extern int cgroup_g_init(void) |
| { |
| int rc = SLURM_SUCCESS; |
| char *plugin_type = "cgroup"; |
| char *type = NULL; |
| |
| slurm_mutex_lock(&g_context_lock); |
| |
| if (plugin_inited) |
| goto done; |
| |
| if (cgroup_conf_init() != SLURM_SUCCESS) |
| log_flag(CGROUP, "cgroup conf was already initialized."); |
| |
| type = slurm_cgroup_conf.cgroup_plugin; |
| |
| if (!xstrcmp(type, "disabled")) { |
| plugin_inited = PLUGIN_NOOP; |
| goto done; |
| } |
| |
| if (!xstrcmp(type, "autodetect")) { |
| if (!(type = autodetect_cgroup_version())) { |
| rc = SLURM_ERROR; |
| goto done; |
| } |
| } |
| |
| if (running_in_slurmd()) |
| if (!xstrcmp(type, "cgroup/v1")) |
| warning("cgroup/v1 plugin is deprecated, please upgrade to cgroup/v2 at your earliest convenience"); |
| |
| g_context = plugin_context_create( |
| plugin_type, type, (void **)&ops, syms, sizeof(syms)); |
| |
| if (!g_context) { |
| error("cannot create %s context for %s", plugin_type, type); |
| rc = SLURM_ERROR; |
| plugin_inited = PLUGIN_NOT_INITED; |
| goto done; |
| } |
| |
| /* |
| * We have recorded the scope_path here previously, configure it now in |
| * the plugin. |
| */ |
| rc = (*(ops.setup_scope))(scope_path); |
| if (rc == SLURM_ERROR) { |
| error("cannot setup the scope for %s", plugin_type); |
| goto done; |
| } |
| |
| plugin_inited = PLUGIN_INITED; |
| done: |
| slurm_mutex_unlock(&g_context_lock); |
| |
| return rc; |
| } |
| |
| extern int cgroup_g_fini(void) |
| { |
| int rc = SLURM_SUCCESS; |
| |
| slurm_mutex_lock(&g_context_lock); |
| if (g_context) { |
| rc = plugin_context_destroy(g_context); |
| g_context = NULL; |
| } |
| |
| cgroup_conf_destroy(); |
| plugin_inited = PLUGIN_NOT_INITED; |
| |
| slurm_mutex_unlock(&g_context_lock); |
| return rc; |
| } |
| |
| extern int cgroup_g_initialize(cgroup_ctl_type_t sub) |
| { |
| xassert(plugin_inited != PLUGIN_NOT_INITED); |
| |
| if (plugin_inited == PLUGIN_NOOP) |
| fatal("%s: Trying to initialize cgroups but CgroupPlugin=disabled is set in cgroup.conf. Please, unset any configuration that is using cgroups.", |
| __func__); |
| |
| return (*(ops.initialize))(sub); |
| } |
| |
| extern int cgroup_g_system_create(cgroup_ctl_type_t sub) |
| { |
| xassert(plugin_inited != PLUGIN_NOT_INITED); |
| |
| if (plugin_inited == PLUGIN_NOOP) |
| return SLURM_SUCCESS; |
| |
| return (*(ops.system_create))(sub); |
| } |
| |
| extern int cgroup_g_system_addto(cgroup_ctl_type_t sub, pid_t *pids, int npids) |
| { |
| xassert(plugin_inited != PLUGIN_NOT_INITED); |
| |
| if (plugin_inited == PLUGIN_NOOP) |
| return SLURM_SUCCESS; |
| |
| return (*(ops.system_addto))(sub, pids, npids); |
| } |
| |
| extern int cgroup_g_system_destroy(cgroup_ctl_type_t sub) |
| { |
| xassert(plugin_inited != PLUGIN_NOT_INITED); |
| |
| if (plugin_inited == PLUGIN_NOOP) |
| return SLURM_SUCCESS; |
| |
| return (*(ops.system_destroy))(sub); |
| } |
| |
| extern int cgroup_g_step_create(cgroup_ctl_type_t sub, stepd_step_rec_t *step) |
| { |
| int rc; |
| |
| xassert(plugin_inited != PLUGIN_NOT_INITED); |
| |
| if (plugin_inited == PLUGIN_NOOP) |
| return SLURM_SUCCESS; |
| |
| slurm_mutex_lock(&g_context_lock); |
| rc = (*(ops.step_create))(sub, step); |
| slurm_mutex_unlock(&g_context_lock); |
| |
| return rc; |
| } |
| |
| extern int cgroup_g_step_addto(cgroup_ctl_type_t sub, pid_t *pids, int npids) |
| { |
| int rc; |
| |
| xassert(plugin_inited != PLUGIN_NOT_INITED); |
| |
| if (plugin_inited == PLUGIN_NOOP) |
| return SLURM_SUCCESS; |
| |
| slurm_mutex_lock(&g_context_lock); |
| rc = (*(ops.step_addto))(sub, pids, npids); |
| slurm_mutex_unlock(&g_context_lock); |
| |
| return rc; |
| } |
| |
| extern int cgroup_g_step_get_pids(pid_t **pids, int *npids) |
| { |
| int rc; |
| |
| xassert(plugin_inited != PLUGIN_NOT_INITED); |
| |
| if (plugin_inited == PLUGIN_NOOP) { |
| *npids = 0; |
| *pids = NULL; |
| return SLURM_SUCCESS; |
| } |
| |
| slurm_mutex_lock(&g_context_lock); |
| rc = (*(ops.step_get_pids))(pids, npids); |
| slurm_mutex_unlock(&g_context_lock); |
| |
| return rc; |
| } |
| |
| extern int cgroup_g_step_suspend(void) |
| { |
| xassert(plugin_inited != PLUGIN_NOT_INITED); |
| |
| if (plugin_inited == PLUGIN_NOOP) |
| return SLURM_SUCCESS; |
| |
| return (*(ops.step_suspend))(); |
| } |
| |
| extern int cgroup_g_step_resume(void) |
| { |
| xassert(plugin_inited != PLUGIN_NOT_INITED); |
| |
| if (plugin_inited == PLUGIN_NOOP) |
| return SLURM_SUCCESS; |
| |
| return (*(ops.step_resume))(); |
| } |
| |
| extern int cgroup_g_step_destroy(cgroup_ctl_type_t sub) |
| { |
| int rc; |
| |
| xassert(plugin_inited != PLUGIN_NOT_INITED); |
| |
| if (plugin_inited == PLUGIN_NOOP) |
| return SLURM_SUCCESS; |
| |
| slurm_mutex_lock(&g_context_lock); |
| rc = (*(ops.step_destroy))(sub); |
| slurm_mutex_unlock(&g_context_lock); |
| |
| return rc; |
| } |
| |
| extern bool cgroup_g_has_pid(pid_t pid) |
| { |
| int rc; |
| |
| xassert(plugin_inited != PLUGIN_NOT_INITED); |
| |
| if (plugin_inited == PLUGIN_NOOP) |
| return false; |
| |
| slurm_mutex_lock(&g_context_lock); |
| rc = (*(ops.has_pid))(pid); |
| slurm_mutex_unlock(&g_context_lock); |
| |
| return rc; |
| } |
| |
| extern cgroup_limits_t *cgroup_g_constrain_get(cgroup_ctl_type_t sub, |
| cgroup_level_t level) |
| { |
| xassert(plugin_inited != PLUGIN_NOT_INITED); |
| |
| if (plugin_inited == PLUGIN_NOOP) |
| return NULL; |
| |
| return (*(ops.constrain_get))(sub, level); |
| } |
| |
| extern int cgroup_g_constrain_set(cgroup_ctl_type_t sub, cgroup_level_t level, |
| cgroup_limits_t *limits) |
| { |
| xassert(plugin_inited != PLUGIN_NOT_INITED); |
| |
| if (plugin_inited == PLUGIN_NOOP) |
| return SLURM_SUCCESS; |
| |
| return (*(ops.constrain_set))(sub, level, limits); |
| } |
| |
| extern int cgroup_g_constrain_apply(cgroup_ctl_type_t sub, cgroup_level_t level, |
| uint32_t task_id) |
| { |
| xassert(plugin_inited != PLUGIN_NOT_INITED); |
| |
| if (plugin_inited == PLUGIN_NOOP) |
| return SLURM_SUCCESS; |
| |
| return (*(ops.constrain_apply))(sub, level, task_id); |
| } |
| |
| extern int cgroup_g_step_start_oom_mgr(stepd_step_rec_t *step) |
| { |
| xassert(plugin_inited != PLUGIN_NOT_INITED); |
| |
| if (plugin_inited == PLUGIN_NOOP) |
| return SLURM_SUCCESS; |
| |
| return (*(ops.step_start_oom_mgr))(step); |
| } |
| |
| extern cgroup_oom_t *cgroup_g_step_stop_oom_mgr(stepd_step_rec_t *step) |
| { |
| xassert(plugin_inited != PLUGIN_NOT_INITED); |
| |
| if (plugin_inited == PLUGIN_NOOP) { |
| cgroup_oom_t *empty_oom = xmalloc(sizeof(*empty_oom)); |
| return empty_oom; |
| } |
| |
| return (*(ops.step_stop_oom_mgr))(step); |
| } |
| |
| extern int cgroup_g_task_addto(cgroup_ctl_type_t sub, stepd_step_rec_t *step, |
| pid_t pid, uint32_t task_id) |
| { |
| int rc; |
| |
| xassert(plugin_inited != PLUGIN_NOT_INITED); |
| |
| if (plugin_inited == PLUGIN_NOOP) |
| return SLURM_SUCCESS; |
| |
| slurm_mutex_lock(&g_context_lock); |
| rc = (*(ops.task_addto))(sub, step, pid, task_id); |
| slurm_mutex_unlock(&g_context_lock); |
| |
| return rc; |
| } |
| |
| extern cgroup_acct_t *cgroup_g_task_get_acct_data(uint32_t taskid) |
| { |
| xassert(plugin_inited != PLUGIN_NOT_INITED); |
| |
| if (plugin_inited == PLUGIN_NOOP) { |
| cgroup_acct_t *empty_acct = xmalloc(sizeof(*empty_acct)); |
| return empty_acct; |
| } |
| |
| return (*(ops.task_get_acct_data))(taskid); |
| } |
| |
| extern long int cgroup_g_get_acct_units(void) |
| { |
| xassert(plugin_inited != PLUGIN_NOT_INITED); |
| |
| if (plugin_inited == PLUGIN_NOOP) |
| return (long int)USEC_IN_SEC; |
| |
| return (*(ops.get_acct_units))(); |
| } |
| |
| extern bool cgroup_g_has_feature(cgroup_ctl_feature_t f) |
| { |
| xassert(plugin_inited != PLUGIN_NOT_INITED); |
| |
| if (plugin_inited == PLUGIN_NOOP) |
| return false; |
| |
| return (*(ops.has_feature))(f); |
| } |
| |
| extern int cgroup_g_signal(int signal) |
| { |
| xassert(plugin_inited != PLUGIN_NOT_INITED); |
| |
| if (plugin_inited == PLUGIN_NOOP) |
| return SLURM_SUCCESS; |
| |
| return (*(ops.signal))(signal); |
| } |
| |
| extern char *cgroup_g_get_task_empty_event_path(uint32_t taskid, |
| bool *on_modify) |
| { |
| xassert(plugin_inited != PLUGIN_NOT_INITED); |
| |
| if (plugin_inited == PLUGIN_NOOP) |
| return SLURM_SUCCESS; |
| |
| return (*(ops.get_task_empty_event_path))(taskid, on_modify); |
| } |
| |
| extern int cgroup_g_is_task_empty(uint32_t taskid) |
| { |
| xassert(plugin_inited != PLUGIN_NOT_INITED); |
| |
| if (plugin_inited == PLUGIN_NOOP) |
| return SLURM_SUCCESS; |
| |
| return (*(ops.is_task_empty))(taskid); |
| } |