blob: 7ff37ee7a9ae63af78d288993e3a4e1512f34180 [file] [log] [blame] [edit]
/*****************************************************************************\
* cgroup_common.c - Cgroup plugin common functions
*****************************************************************************
* Copyright (C) 2021 SchedMD LLC
* Written by Felip Moll <felip.moll@schedmd.com>
*
* This file is part of Slurm, a resource management program.
* For details, see <https://slurm.schedmd.com/>.
* Please also read the included file: DISCLAIMER.
*
* Slurm is free software; you can redistribute it and/or modify it under
* the terms of the GNU General Public License as published by the Free
* Software Foundation; either version 2 of the License, or (at your option)
* any later version.
*
* In addition, as a special exception, the copyright holders give permission
* to link the code of portions of this program with the OpenSSL library under
* certain conditions as described in each individual source file, and
* distribute linked combinations including the two. You must obey the GNU
* General Public License in all respects for all of the code used other than
* OpenSSL. If you modify file(s) with this exception, you may extend this
* exception to your version of the file(s), but you are not obligated to do
* so. If you do not wish to do so, delete this exception statement from your
* version. If you delete this exception statement from all source files in
* the program, then also delete it here.
*
* Slurm is distributed in the hope that it will be useful, but WITHOUT ANY
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
* details.
*
* You should have received a copy of the GNU General Public License along
* with Slurm; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
\*****************************************************************************/
#include "cgroup_common.h"
/* These are defined here so when we link with something other than
* the slurmctld we will have these symbols defined. They will get
* overwritten when linking with the slurmctld.
*/
#if defined (__APPLE__)
extern slurm_conf_t slurm_conf __attribute__((weak_import));
#else
slurm_conf_t slurm_conf;
#endif
/*
* Returns the path to the cgroup.procs file over which we have permissions
* defined by check_mode. This path is where we'll be able to read or write
* pids. If there are no paths available with these permissions, return NULL,
* which means the cgroup doesn't exist or we do not have permissions to modify
* the cg.
*/
static char *_cgroup_procs_check(xcgroup_t *cg, int check_mode)
{
struct stat st;
char *path = xstrdup_printf("%s/%s", cg->path, "cgroup.procs");
if (!((stat(path, &st) >= 0) && (st.st_mode & check_mode))) {
error("%s: failed on path %s: %m", __func__, path);
xfree(path);
}
return path;
}
static char *_cgroup_procs_readable_path(xcgroup_t *cg)
{
return _cgroup_procs_check(cg, S_IRUSR);
}
static char *_cgroup_procs_writable_path(xcgroup_t *cg)
{
return _cgroup_procs_check(cg, S_IWUSR);
}
static int _set_uint32_param(xcgroup_t *cg, char *param, uint32_t value)
{
int fstatus = SLURM_ERROR;
char file_path[PATH_MAX];
char *cpath = cg->path;
if (snprintf(file_path, PATH_MAX, "%s/%s", cpath, param) >= PATH_MAX) {
log_flag(CGROUP, "unable to build filepath for '%s' and parameter '%s' : %m",
cpath, param);
return fstatus;
}
fstatus = common_file_write_uint32s(file_path, &value, 1);
if (fstatus != SLURM_SUCCESS)
log_flag(CGROUP, "unable to set parameter '%s' to '%u' for '%s'",
param, value, cpath);
else
log_flag(CGROUP, "parameter '%s' set to '%u' for '%s'",
param, value, cpath);
return fstatus;
}
static bool _is_empty_dir(const char *dirpath)
{
DIR *d;
struct dirent *dir;
bool empty = true;
if (!(d = opendir(dirpath)))
return empty;
while ((dir = readdir(d))) {
if (dir->d_type == DT_DIR &&
(strcmp(dir->d_name, ".") && strcmp(dir->d_name, ".."))) {
empty = false;
log_flag(CGROUP, "Found at least one child directory: %s/%s",
dirpath, dir->d_name);
break;
}
}
closedir(d);
return empty;
}
extern size_t common_file_getsize(int fd)
{
int rc;
size_t fsize;
off_t offset;
char c;
/* store current position and rewind */
offset = lseek(fd, 0, SEEK_CUR);
if (offset < 0)
return -1;
if (lseek(fd, 0, SEEK_SET) < 0)
error("%s: lseek(0): %m", __func__);
/* get file size */
fsize = 0;
do {
rc = read(fd, (void*)&c, 1);
if (rc > 0)
fsize++;
} while ((rc < 0 && errno == EINTR) || rc > 0);
/* restore position */
if (lseek(fd, offset, SEEK_SET) < 0)
error("%s: lseek(): %m", __func__);
if (rc < 0)
return -1;
else
return fsize;
}
extern int common_file_write_uint64s(char *file_path, uint64_t *values, int nb)
{
int fstatus;
int rc;
int fd;
char tstr[256];
uint64_t value;
int i;
/* open file for writing */
fd = open(file_path, O_WRONLY, 0700);
if (fd < 0) {
log_flag(CGROUP, "unable to open '%s' for writing : %m",
file_path);
return SLURM_ERROR;
}
/* add one value per line */
fstatus = SLURM_SUCCESS;
for (i = 0; i < nb ; i++) {
value = values[i];
rc = snprintf(tstr, sizeof(tstr), "%"PRIu64"", value);
if (rc < 0) {
log_flag(CGROUP, "unable to build %"PRIu64" string value, skipping",
value);
fstatus = SLURM_ERROR;
continue;
}
do {
rc = write(fd, tstr, strlen(tstr)+1);
} while (rc < 0 && errno == EINTR);
if (rc < 1) {
log_flag(CGROUP, "unable to add value '%s' to file '%s' : %m",
tstr, file_path);
if (errno != ESRCH)
fstatus = SLURM_ERROR;
}
}
/* close file */
close(fd);
return fstatus;
}
extern int common_file_read_uint64s(char *file_path, uint64_t **pvalues,
int *pnb)
{
int rc;
int fd;
size_t fsize;
char *buf;
char *p;
uint64_t *pa=NULL;
int i;
/* check input pointers */
if (pvalues == NULL || pnb == NULL)
return SLURM_ERROR;
/* open file for reading */
fd = open(file_path, O_RDONLY, 0700);
if (fd < 0) {
log_flag(CGROUP, "unable to open '%s' for reading : %m", file_path);
return SLURM_ERROR;
}
/* get file size */
fsize = common_file_getsize(fd);
if (fsize == -1) {
close(fd);
return SLURM_ERROR;
}
/* read file contents */
buf = xmalloc(fsize + 1);
do {
rc = read(fd, buf, fsize);
} while (rc < 0 && errno == EINTR);
close(fd);
buf[fsize]='\0';
/* count values (splitted by \n) */
i=0;
if (rc > 0) {
p = buf;
while (xstrchr(p, '\n') != NULL) {
i++;
p = xstrchr(p, '\n') + 1;
}
}
/* build uint64_t list */
if (i > 0) {
pa = xcalloc(i, sizeof(uint64_t));
p = buf;
i = 0;
while (xstrchr(p, '\n') != NULL) {
long long unsigned int ll_tmp;
sscanf(p, "%llu", &ll_tmp);
pa[i++] = ll_tmp;
p = xstrchr(p, '\n') + 1;
}
}
/* free buffer */
xfree(buf);
/* set output values */
*pvalues = pa;
*pnb = i;
return SLURM_SUCCESS;
}
extern int common_file_write_uint32s(char *file_path, uint32_t *values, int nb)
{
int rc;
int fd;
char tstr[256];
/* open file for writing */
if ((fd = open(file_path, O_WRONLY, 0700)) < 0) {
error("%s: unable to open '%s' for writing: %m",
__func__, file_path);
return SLURM_ERROR;
}
/* add one value per line */
for (int i = 0; i < nb; i++) {
uint32_t value = values[i];
if (snprintf(tstr, sizeof(tstr), "%u", value) < 0)
fatal("%s: unable to build %u string value",
__func__, value);
/* write terminating NUL byte */
safe_write(fd, tstr, strlen(tstr) + 1);
}
/* close file */
close(fd);
return SLURM_SUCCESS;
rwfail:
rc = errno;
error("%s: write pid %s to %s failed: %m",
__func__, tstr, file_path);
close(fd);
return rc;;
}
extern int common_file_read_uint32s(char *file_path, uint32_t **pvalues,
int *pnb)
{
int rc;
int fd;
size_t fsize;
char *buf;
char *p;
uint32_t *pa=NULL;
int i;
/* check input pointers */
if (pvalues == NULL || pnb == NULL)
return SLURM_ERROR;
/* open file for reading */
fd = open(file_path, O_RDONLY, 0700);
if (fd < 0) {
log_flag(CGROUP, "unable to open '%s' for reading : %m",
file_path);
return SLURM_ERROR;
}
/* get file size */
fsize = common_file_getsize(fd);
if (fsize == -1) {
close(fd);
return SLURM_ERROR;
}
/* read file contents */
buf = xmalloc(fsize + 1);
do {
rc = read(fd, buf, fsize);
} while (rc < 0 && errno == EINTR);
close(fd);
buf[fsize]='\0';
/* count values (splitted by \n) */
i=0;
if (rc > 0) {
p = buf;
while (xstrchr(p, '\n') != NULL) {
i++;
p = xstrchr(p, '\n') + 1;
}
}
/* build uint32_t list */
if (i > 0) {
pa = xcalloc(i, sizeof(uint32_t));
p = buf;
i = 0;
while (xstrchr(p, '\n') != NULL) {
sscanf(p, "%u", pa+i);
p = xstrchr(p, '\n') + 1;
i++;
}
}
/* free buffer */
xfree(buf);
/* set output values */
*pvalues = pa;
*pnb = i;
return SLURM_SUCCESS;
}
extern int common_file_write_content(char *file_path, char *content,
size_t csize)
{
int fd;
/* open file for writing */
if ((fd = open(file_path, O_WRONLY, 0700)) < 0) {
error("%s: unable to open '%s' for writing: %m",
__func__, file_path);
return SLURM_ERROR;
}
safe_write(fd, content, csize);
/* close file */
close(fd);
return SLURM_SUCCESS;
rwfail:
error("%s: unable to write %zu bytes to cgroup %s: %m",
__func__, csize, file_path);
close(fd);
return SLURM_ERROR;
}
extern int common_file_read_content(char *file_path, char **content,
size_t *csize)
{
int fstatus;
int rc;
int fd;
size_t fsize;
char *buf;
fstatus = SLURM_ERROR;
/* check input pointers */
if (content == NULL || csize == NULL)
return fstatus;
/* open file for reading */
fd = open(file_path, O_RDONLY, 0700);
if (fd < 0) {
log_flag(CGROUP, "unable to open '%s' for reading : %m", file_path);
return fstatus;
}
/* get file size */
fsize=common_file_getsize(fd);
if (fsize == -1) {
close(fd);
return fstatus;
}
/* read file contents */
buf = xmalloc(fsize + 1);
buf[fsize]='\0';
do {
rc = read(fd, buf, fsize);
} while (rc < 0 && errno == EINTR);
/* set output values */
if (rc >= 0) {
*content = buf;
*csize = rc;
fstatus = SLURM_SUCCESS;
} else {
xfree(buf);
}
/* close file */
close(fd);
return fstatus;
}
extern int common_cgroup_instantiate(xcgroup_t *cg)
{
int fstatus = SLURM_ERROR;
mode_t cmask;
mode_t omask;
char *file_path;
uid_t uid;
gid_t gid;
/* init variables based on input cgroup */
file_path = cg->path;
uid = cg->uid;
gid = cg->gid;
/* save current mask and apply working one */
cmask = S_IWGRP | S_IWOTH;
omask = umask(cmask);
/* build cgroup */
if (mkdir(file_path, 0755)) {
if (errno != EEXIST) {
error("%s: unable to create cgroup '%s' : %m",
__func__, file_path);
umask(omask);
return fstatus;
}
}
umask(omask);
/* change cgroup ownership as requested */
if (chown(file_path, uid, gid)) {
error("%s: unable to chown %d:%d cgroup '%s' : %m",
__func__, uid, gid, file_path);
return fstatus;
}
/* following operations failure might not result in a general
* failure so set output status to success */
fstatus = SLURM_SUCCESS;
return fstatus;
}
extern int common_cgroup_create(xcgroup_ns_t *cgns, xcgroup_t *cg, char *uri,
uid_t uid, gid_t gid)
{
int fstatus = SLURM_ERROR;
char file_path[PATH_MAX];
/* build cgroup absolute path*/
if (snprintf(file_path, PATH_MAX, "%s%s", cgns->mnt_point,
uri) >= PATH_MAX) {
log_flag(CGROUP, "unable to build cgroup '%s' absolute path in ns '%s' : %m",
uri, cgns->subsystems);
return fstatus;
}
/* fill xcgroup structure */
cg->ns = cgns;
cg->name = xstrdup(uri);
cg->path = xstrdup(file_path);
cg->uid = uid;
cg->gid = gid;
return SLURM_SUCCESS;
}
extern int common_cgroup_move_process(xcgroup_t *cg, pid_t pid)
{
char *path = NULL;
/*
* First we check permissions to see if we will be able to move the pid.
* The path is a path to cgroup.procs and writting there will instruct
* the cgroup subsystem to move the process and all its threads there.
*/
path = _cgroup_procs_writable_path(cg);
if (!path) {
error("Cannot write to cgroup.procs for %s", cg->path);
return SLURM_ERROR;
}
xfree(path);
return _set_uint32_param(cg, "cgroup.procs", pid);
}
extern int common_cgroup_set_param(xcgroup_t *cg, char *param, char *content)
{
int fstatus = SLURM_ERROR;
char file_path[PATH_MAX];
char *cpath = cg->path;
if (!cpath || !param)
return fstatus;
if (!content) {
log_flag(CGROUP, "no content given, nothing to do");
return fstatus;
}
if (snprintf(file_path, PATH_MAX, "%s/%s", cpath, param) >= PATH_MAX) {
log_flag(CGROUP, "unable to build filepath for '%s' and parameter '%s' : %m",
cpath, param);
return fstatus;
}
fstatus = common_file_write_content(file_path, content,
strlen(content));
if (fstatus != SLURM_SUCCESS)
log_flag(CGROUP, "unable to set parameter '%s' to '%s' for '%s'",
param, content, cpath);
else
debug3("%s: parameter '%s' set to '%s' for '%s'",
__func__, param, content, cpath);
return fstatus;
}
extern void common_cgroup_ns_destroy(xcgroup_ns_t *cgns)
{
xfree(cgns->mnt_point);
xfree(cgns->mnt_args);
xfree(cgns->subsystems);
}
extern void common_cgroup_destroy(xcgroup_t *cg)
{
cg->ns = NULL;
xfree(cg->name);
xfree(cg->path);
cg->uid = -1;
cg->gid = -1;
}
extern int common_cgroup_delete(xcgroup_t *cg)
{
int retries = 0, npids = -1;
pid_t *pids = NULL;
if (!cg || !cg->path) {
error("invalid control group");
return SLURM_SUCCESS;
}
/*
* Do 5 retries if we receive an EBUSY and there are no pids, because we
* may be trying to remove the directory when the kernel hasn't yet
* drained the cgroup internal references (css_online), even if
* cgroup.procs is already empty.
*/
while ((rmdir(cg->path) < 0) && (errno != ENOENT)) {
if (errno == EBUSY) {
/*
* Do not rely in ENOTEMPTY since in cgroupfs a
* non-empty dir. removal will return EBUSY.
*/
if (!_is_empty_dir(cg->path)) {
log_flag(CGROUP, "Cannot rmdir(%s), cgroup is not empty",
cg->path);
return SLURM_ERROR;
}
if (npids == -1) {
/* Do not retry on a 'really' busy cgroup */
if ((common_cgroup_get_pids(cg, &pids, &npids)
!= SLURM_SUCCESS))
return SLURM_ERROR;
if (npids > 0) {
xfree(pids);
debug3("Not removing %s, found %d pids",
cg->path, npids);
return SLURM_ERROR;
}
}
if (retries < 5) {
retries++;
continue;
}
log_flag(CGROUP, "Unable to rmdir(%s), did %d retries: %m",
cg->path, retries);
} else {
error("Unable to rmdir(%s), unexpected error: %m",
cg->path);
}
return SLURM_ERROR;
}
if (retries)
log_flag(CGROUP, "rmdir(%s): took %d retries, possible cgroup filesystem slowness",
cg->path, retries);
return SLURM_SUCCESS;
}
extern int common_cgroup_add_pids(xcgroup_t *cg, pid_t *pids, int npids)
{
int rc = SLURM_ERROR;
char *path = _cgroup_procs_writable_path(cg);
rc = common_file_write_uint32s(path, (uint32_t*)pids, npids);
if (rc != SLURM_SUCCESS)
error("unable to add pids to '%s'", cg->path);
xfree(path);
return rc;
}
extern int common_cgroup_get_pids(xcgroup_t *cg, pid_t **pids, int *npids)
{
int fstatus = SLURM_ERROR;
char *path = NULL;
if (pids == NULL || npids == NULL || !cg->path)
return SLURM_ERROR;
path = _cgroup_procs_readable_path(cg);
if (!path) {
error("unable to read '%s/cgroup.procs'", cg->path);
return SLURM_ERROR;
}
fstatus = common_file_read_uint32s(path, (uint32_t**)pids, npids);
if (fstatus != SLURM_SUCCESS)
log_flag(CGROUP, "unable to get pids of '%s', file disappeared?",
path);
xfree(path);
return fstatus;
}
extern int common_cgroup_get_param(xcgroup_t *cg, char *param, char **content,
size_t *csize)
{
int fstatus = SLURM_ERROR;
char file_path[PATH_MAX];
char *cpath = cg->path;
if (snprintf(file_path, PATH_MAX, "%s/%s", cpath, param) >= PATH_MAX) {
log_flag(CGROUP, "unable to build filepath for '%s' and parameter '%s' : %m",
cpath, param);
} else {
fstatus = common_file_read_content(file_path, content, csize);
if (fstatus != SLURM_SUCCESS)
log_flag(CGROUP, "unable to get parameter '%s' for '%s'",
param, cpath);
}
return fstatus;
}
extern int common_cgroup_set_uint64_param(xcgroup_t *cg, char *param,
uint64_t value)
{
int fstatus = SLURM_ERROR;
char file_path[PATH_MAX];
char *cpath = cg->path;
if (snprintf(file_path, PATH_MAX, "%s/%s", cpath, param) >= PATH_MAX) {
log_flag(CGROUP, "unable to build filepath for '%s' and parameter '%s' : %m",
cpath, param);
return fstatus;
}
fstatus = common_file_write_uint64s(file_path, &value, 1);
if (fstatus != SLURM_SUCCESS)
log_flag(CGROUP, "unable to set parameter '%s' to '%"PRIu64"' for '%s'",
param, value, cpath);
else
debug3("%s: parameter '%s' set to '%"PRIu64"' for '%s'",
__func__, param, value, cpath);
return fstatus;
}