blob: bb78dcec6831101eca691902b55ba72bb440f8bf [file] [log] [blame]
/*****************************************************************************\
* job_container_tmpfs.c - Define job container plugin for creating a
* temporary mount namespace for the job, to provide
* quota based access to node local memory.
*****************************************************************************
* Copyright (C) 2019-2021 Regents of the University of California
* Produced at Lawrence Berkeley National Laboratory
* Written by Aditi Gaur <agaur@lbl.gov>
* All rights reserved.
*
* This file is part of Slurm, a resource management program.
* For details, see <https://slurm.schedmd.com/>.
* Please also read the included file: DISCLAIMER.
*
* SLURM is free software; you can redistribute it and/or modify it under
* the terms of the GNU General Public License as published by the Free
* Software Foundation; either version 2 of the License, or (at your option)
* any later version.
*
* In addition, as a special exception, the copyright holders give permission
* to link the code of portions of this program with the OpenSSL library under
* certain conditions as described in each individual source file, and
* distribute linked combinations including the two. You must obey the GNU
* General Public License in all respects for all of the code used other than
* OpenSSL. If you modify file(s) with this exception, you may extend this
* exception to your version of the file(s), but you are not obligated to do
* so. If you do not wish to do so, delete this exception statement from your
* version. If you delete this exception statement from all source files in
* the program, then also delete it here.
*
* SLURM is distributed in the hope that it will be useful, but WITHOUT ANY
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
* details.
*
* You should have received a copy of the GNU General Public License along
* with SLURM; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
\*****************************************************************************/
#define _GNU_SOURCE
#include <errno.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <sys/wait.h>
#include <sys/mman.h>
#include <fcntl.h>
#include <sys/mount.h>
#include <semaphore.h>
#include "src/common/slurm_xlator.h"
#include "src/common/env.h"
#include "src/common/fd.h"
#include "src/common/log.h"
#include "src/common/read_config.h"
#include "src/common/run_command.h"
#include "src/common/stepd_api.h"
#include "src/common/uid.h"
#include "src/common/xmalloc.h"
#include "src/common/xstring.h"
#include "src/interfaces/switch.h"
#include "src/slurmd/slurmstepd/slurmstepd_job.h"
#include "read_jcconf.h"
static int _create_ns(uint32_t job_id, stepd_step_rec_t *step);
static int _delete_ns(uint32_t job_id);
#if defined (__APPLE__)
extern slurmd_conf_t *conf __attribute__((weak_import));
#else
slurmd_conf_t *conf = NULL;
#endif
const char plugin_name[] = "job_container tmpfs plugin";
const char plugin_type[] = "job_container/tmpfs";
const uint32_t plugin_version = SLURM_VERSION_NUMBER;
static slurm_jc_conf_t *jc_conf = NULL;
static int step_ns_fd = -1;
static bool plugin_disabled = false;
static void _create_paths(uint32_t job_id, char **job_mount, char **ns_holder,
char **src_bind)
{
xassert(job_mount);
xstrfmtcat(*job_mount, "%s/%u", jc_conf->basepath, job_id);
if (ns_holder)
xstrfmtcat(*ns_holder, "%s/.ns", *job_mount);
if (src_bind)
xstrfmtcat(*src_bind, "%s/.%u", *job_mount, job_id);
}
static int _find_step_in_list(step_loc_t *stepd, uint32_t *job_id)
{
return (stepd->step_id.job_id == *job_id);
}
static bool _is_plugin_disabled(char *basepath)
{
return ((!basepath) || (!xstrncasecmp(basepath, "none", 4)));
}
static int _restore_ns(list_t *steps, const char *d_name)
{
char *endptr;
int fd;
unsigned long job_id;
step_loc_t *stepd;
errno = 0;
job_id = strtoul(d_name, &endptr, 10);
if ((errno != 0) || (job_id >= NO_VAL) || (*endptr != '\0')) {
debug3("ignoring %s, could not convert to jobid.", d_name);
return SLURM_SUCCESS;
}
/* here we think this is a job container */
log_flag(JOB_CONT, "determine if job %lu is still running", job_id);
stepd = list_find_first(steps, (ListFindF)_find_step_in_list, &job_id);
if (!stepd) {
debug("%s: Job %lu not found, deleting the namespace",
__func__, job_id);
return _delete_ns(job_id);
}
fd = stepd_connect(stepd->directory, stepd->nodename,
&stepd->step_id, &stepd->protocol_version);
if (fd == -1) {
error("%s: failed to connect to stepd for %lu.",
__func__, job_id);
return _delete_ns(job_id);
}
close(fd);
return SLURM_SUCCESS;
}
extern int init(void)
{
if (running_in_slurmd()) {
/*
* Only init the config here for the slurmd. It will be sent by
* the slurmd to the slurmstepd at launch time.
*/
if (!(jc_conf = init_slurm_jc_conf())) {
error("%s: Configuration not read correctly: Does '%s' not exist?",
plugin_type, tmpfs_conf_file);
return SLURM_ERROR;
}
plugin_disabled = _is_plugin_disabled(jc_conf->basepath);
debug("job_container.conf read successfully");
}
debug("%s loaded", plugin_name);
return SLURM_SUCCESS;
}
extern void fini(void)
{
debug("%s unloaded", plugin_name);
if (step_ns_fd != -1) {
close(step_ns_fd);
step_ns_fd = -1;
}
#ifdef MEMORY_LEAK_DEBUG
free_jc_conf();
#endif
}
extern int container_p_restore(char *dir_name, bool recover)
{
DIR *dp;
struct dirent *ep;
list_t *steps;
int rc = SLURM_SUCCESS;
if (plugin_disabled)
return SLURM_SUCCESS;
if (jc_conf->auto_basepath) {
int fstatus;
mode_t omask = umask(S_IWGRP | S_IWOTH);
if (jc_conf->basepath[0] != '/') {
debug("%s: unable to create ns directory '%s' : does not start with '/'",
__func__, jc_conf->basepath);
umask(omask);
return SLURM_ERROR;
}
if ((fstatus = mkdirpath(jc_conf->basepath, 0755, true))) {
debug("%s: unable to create ns directory '%s' : %s",
__func__, jc_conf->basepath,
slurm_strerror(fstatus));
umask(omask);
return SLURM_ERROR;
}
umask(omask);
}
steps = stepd_available(conf->spooldir, conf->node_name);
/*
* Iterate over basepath, restore only the folders that seem bounded to
* real jobs (have .ns file). NOTE: Restoring the state could be either
* deleting the folder if the job is died and resources are free, or
* mount it otherwise.
*/
if (!(dp = opendir(jc_conf->basepath))) {
error("%s: Unable to open %s", __func__, jc_conf->basepath);
return SLURM_ERROR;
}
while ((ep = readdir(dp))) {
/* If possible, only check directories */
if ((ep->d_type == DT_DIR) || (ep->d_type == DT_UNKNOWN)) {
if (_restore_ns(steps, ep->d_name))
rc = SLURM_ERROR;
}
}
closedir(dp);
FREE_NULL_LIST(steps);
if (rc)
error("Encountered an error while restoring job containers.");
return rc;
}
static int _mount_private_dirs(char *path, uid_t uid)
{
char *buffer = NULL, *mount_path = NULL, *save_ptr = NULL, *token;
int rc = 0;
if (!path) {
error("%s: no path to private directories specified.",
__func__);
return -1;
}
buffer = xstrdup(jc_conf->dirs);
token = strtok_r(buffer, ",", &save_ptr);
while (token) {
/* skip /dev/shm, this is handled elsewhere */
if (!xstrcmp(token, "/dev/shm")) {
token = strtok_r(NULL, ",", &save_ptr);
continue;
}
xstrfmtcat(mount_path, "%s/%s", path, token);
for (char *t = mount_path + strlen(path) + 1; *t; t++) {
if (*t == '/')
*t = '_';
}
rc = mkdir(mount_path, 0700);
if (rc && errno != EEXIST) {
error("%s: Failed to create %s, %m",
__func__, mount_path);
goto private_mounts_exit;
}
rc = lchown(mount_path, uid, -1);
if (rc) {
error("%s: lchown failed for %s: %m",
__func__, mount_path);
goto private_mounts_exit;
}
if (mount(mount_path, token, NULL, MS_BIND, NULL)) {
error("%s: %s mount failed, %m", __func__, token);
rc = -1;
goto private_mounts_exit;
}
token = strtok_r(NULL, ",", &save_ptr);
xfree(mount_path);
}
private_mounts_exit:
xfree(buffer);
xfree(mount_path);
return rc;
}
static int _mount_private_shm(void)
{
char *loc = NULL;
int rc = 0;
/* return early if "/dev/shm" is not in the mount list */
if (!(loc = xstrcasestr(jc_conf->dirs, "/dev/shm")))
return rc;
if (!((loc[8] == ',') || (loc[8] == 0)))
return rc;
/* handle mounting a new /dev/shm */
if (!jc_conf->shared) {
/*
* only unmount old /dev/shm if private, otherwise this can
* impact the root namespace
*/
rc = umount("/dev/shm");
if (rc && errno != EINVAL) {
error("%s: umount /dev/shm failed: %m", __func__);
return rc;
}
}
rc = mount("tmpfs", "/dev/shm", "tmpfs", 0, NULL);
if (rc) {
error("%s: /dev/shm mount failed: %m", __func__);
return -1;
}
return rc;
}
static int _clean_job_basepath(uint32_t job_id)
{
DIR *dp;
struct dirent *ep;
char *path = NULL;
if (!(dp = opendir(jc_conf->basepath))) {
error("%s: Unable to open %s", __func__, jc_conf->basepath);
return SLURM_ERROR;
}
while ((ep = readdir(dp))) {
if (!xstrcmp(ep->d_name, ".") || !xstrcmp(ep->d_name, ".."))
continue;
/* If possible, only attempt with directories */
if ((ep->d_type == DT_DIR) || (ep->d_type == DT_UNKNOWN)) {
xstrfmtcat(path, "%s/%s",
jc_conf->basepath, ep->d_name);
/* it is not important if this fails */
if (umount2(path, MNT_DETACH))
log_flag(JOB_CONT, "failed to unmount %s for job %u",
path, job_id);
xfree(path);
}
}
closedir(dp);
return SLURM_SUCCESS;
}
static char **_setup_script_env(uint32_t job_id,
stepd_step_rec_t *step,
char *src_bind,
char *ns_holder)
{
char **env = env_array_create();
env_array_overwrite_fmt(&env, "SLURM_JOB_ID", "%u", job_id);
env_array_overwrite_fmt(&env, "SLURM_CONF", "%s", conf->conffile);
env_array_overwrite_fmt(&env, "SLURMD_NODENAME", "%s", conf->node_name);
if (src_bind)
env_array_overwrite_fmt(&env, "SLURM_JOB_MOUNTPOINT_SRC", "%s",
src_bind);
if (step) {
if (step->het_job_id && (step->het_job_id != NO_VAL))
env_array_overwrite_fmt(&env, "SLURM_HET_JOB_ID", "%u",
step->het_job_id);
env_array_overwrite_fmt(&env, "SLURM_JOB_GID", "%u",
step->gid);
env_array_overwrite_fmt(&env, "SLURM_JOB_UID", "%u",
step->uid);
env_array_overwrite_fmt(&env, "SLURM_JOB_USER", "%s",
step->user_name);
if (step->alias_list)
env_array_overwrite_fmt(&env, "SLURM_NODE_ALIASES",
"%s", step->alias_list);
if (step->cwd)
env_array_overwrite_fmt(&env, "SLURM_JOB_WORK_DIR",
"%s", step->cwd);
}
if (ns_holder)
env_array_overwrite_fmt(&env, "SLURM_NS", "%s", ns_holder);
return env;
}
static int _create_ns(uint32_t job_id, stepd_step_rec_t *step)
{
char *job_mount = NULL, *ns_holder = NULL, *src_bind = NULL;
char *result = NULL;
int fd;
int rc = 0;
sem_t *sem1 = NULL;
sem_t *sem2 = NULL;
pid_t cpid;
_create_paths(job_id, &job_mount, &ns_holder, &src_bind);
if (mkdir(job_mount, 0700)) {
error("%s: mkdir %s failed: %m", __func__, job_mount);
rc = SLURM_ERROR;
goto end_it;
}
/*
* MS_BIND mountflag would make mount() ignore all other mountflags
* except MS_REC. We need MS_PRIVATE mountflag as well to make the
* mount (as well as all mounts inside it) private, which needs to be
* done by calling mount() a second time with MS_PRIVATE and MS_REC
* flags.
*/
if (mount(job_mount, job_mount, NULL, MS_BIND, NULL)) {
error("%s: Initial base mount failed: %m", __func__);
rc = SLURM_ERROR;
goto end_it;
}
if (mount(job_mount, job_mount, NULL, MS_PRIVATE | MS_REC, NULL)) {
error("%s: Initial base mount failed: %m", __func__);
rc = SLURM_ERROR;
goto end_it;
}
fd = open(ns_holder, O_CREAT|O_RDWR, S_IRWXU);
if (fd == -1) {
error("%s: open failed %s: %m", __func__, ns_holder);
rc = -1;
goto exit2;
}
close(fd);
/* run any initialization script- if any*/
if (jc_conf->initscript) {
run_command_args_t run_command_args = {
.max_wait = 10 * MSEC_IN_SEC,
.script_path = jc_conf->initscript,
.script_type = "initscript",
.status = &rc,
};
run_command_args.env = _setup_script_env(job_id, step,
src_bind, NULL);
log_flag(JOB_CONT, "Running InitScript");
result = run_command(&run_command_args);
log_flag(JOB_CONT, "InitScript rc: %d, stdout: %s", rc, result);
env_array_free(run_command_args.env);
xfree(result);
if (rc) {
error("%s: InitScript: %s failed with rc: %d",
__func__, jc_conf->initscript, rc);
goto exit2;
}
}
rc = mkdir(src_bind, 0700);
if (rc && (errno != EEXIST)) {
error("%s: mkdir failed %s, %m", __func__, src_bind);
goto exit2;
}
sem1 = mmap(NULL, sizeof(*sem1), PROT_READ|PROT_WRITE,
MAP_SHARED|MAP_ANONYMOUS, -1, 0);
if (sem1 == MAP_FAILED) {
error("%s: mmap failed: %m", __func__);
rc = -1;
goto exit2;
}
sem2 = mmap(NULL, sizeof(*sem2), PROT_READ|PROT_WRITE,
MAP_SHARED|MAP_ANONYMOUS, -1, 0);
if (sem2 == MAP_FAILED) {
error("%s: mmap failed: %m", __func__);
sem_destroy(sem1);
munmap(sem1, sizeof(*sem1));
rc = -1;
goto exit2;
}
rc = sem_init(sem1, 1, 0);
if (rc) {
error("%s: sem_init: %m", __func__);
goto exit1;
}
rc = sem_init(sem2, 1, 0);
if (rc) {
error("%s: sem_init: %m", __func__);
goto exit1;
}
cpid = fork();
if (cpid == -1) {
error("%s: fork Failed: %m", __func__);
rc = -1;
goto exit1;
}
if (cpid == 0) {
rc = unshare(CLONE_NEWNS);
if (rc) {
error("%s: %m", __func__);
goto child_exit;
}
if (sem_post(sem1) < 0) {
error("%s: sem_post failed: %m", __func__);
rc = -1;
goto child_exit;
}
if (sem_wait(sem2) < 0) {
error("%s: sem_wait failed %m", __func__);
rc = -1;
goto child_exit;
}
if (!jc_conf->shared) {
/* Set root filesystem to private */
if (mount(NULL, "/", NULL, MS_PRIVATE|MS_REC, NULL)) {
error("%s: Failed to make root private: %m",
__func__);
rc = -1;
goto child_exit;
}
} else {
/* Set root filesystem to shared */
if (mount(NULL, "/", NULL, MS_SHARED | MS_REC, NULL)) {
error("%s: Failed to make root shared: %m",
__func__);
rc = -1;
goto child_exit;
}
/* Set root filesystem to slave */
if (mount(NULL, "/", NULL, MS_SLAVE | MS_REC, NULL)) {
error("%s: Failed to make root slave: %m",
__func__);
rc = -1;
goto child_exit;
}
}
/*
* Now we have a persistent mount namespace.
* Mount private directories inside the namespace.
*/
if (_mount_private_dirs(src_bind, step->uid) == -1) {
rc = -1;
goto child_exit;
}
/*
* this happens when restarting the slurmd, the ownership should
* already be correct here.
*/
rc = chown(src_bind, step->uid, -1);
if (rc) {
error("%s: chown failed for %s: %m",
__func__, src_bind);
rc = -1;
goto child_exit;
}
/*
* switch/nvidia_imex needs to create an ephemeral device
* node under /dev in this new namespace.
*/
if ((rc = switch_g_fs_init(step))) {
error("%s: switch_g_fs_init failed", __func__);
rc = -1;
goto child_exit;
}
/*
* This umount is to remove the basepath mount from being
* visible inside the namespace. So if a user looks up the
* mounts inside the job, they will only see their job mount
* but not the basepath mount.
*/
if (jc_conf->shared)
rc = _clean_job_basepath(job_id);
else
rc = umount2(job_mount, MNT_DETACH);
if (rc) {
error("%s: failed to clean job mount(s): %m", __func__);
goto child_exit;
}
child_exit:
sem_destroy(sem1);
munmap(sem1, sizeof(*sem1));
sem_destroy(sem2);
munmap(sem2, sizeof(*sem2));
if (!rc) {
rc = _mount_private_shm();
if (rc)
error("%s: could not mount private shm",
__func__);
}
exit(rc);
} else {
int wstatus;
char *proc_path = NULL;
if (sem_wait(sem1) < 0) {
error("%s: sem_Wait failed: %m", __func__);
rc = -1;
goto exit1;
}
xstrfmtcat(proc_path, "/proc/%u/ns/mnt", cpid);
/*
* Bind mount /proc/pid/ns/mnt to hold namespace active
* without a process attached to it
*/
rc = mount(proc_path, ns_holder, NULL, MS_BIND, NULL);
xfree(proc_path);
if (rc) {
error("%s: ns base mount failed: %m", __func__);
if (sem_post(sem2) < 0)
error("%s: Could not release semaphore: %m",
__func__);
goto exit1;
}
if (sem_post(sem2) < 0) {
error("%s: sem_post failed: %m", __func__);
goto exit1;
}
if ((waitpid(cpid, &wstatus, 0) != cpid) || WEXITSTATUS(wstatus)) {
error("%s: waitpid failed", __func__);
rc = SLURM_ERROR;
goto exit1;
}
rc = 0;
}
/* run any post clone initialization script */
if (jc_conf->clonensscript) {
run_command_args_t run_command_args = {
.max_wait = jc_conf->clonensscript_wait * MSEC_IN_SEC,
.script_path = jc_conf->clonensscript,
.script_type = "clonensscript",
.status = &rc,
};
run_command_args.env = _setup_script_env(job_id, step,
src_bind, ns_holder);
log_flag(JOB_CONT, "Running CloneNSScript");
result = run_command(&run_command_args);
log_flag(JOB_CONT, "CloneNSScript rc: %d, stdout: %s",
rc, result);
xfree(result);
env_array_free(run_command_args.env);
if (rc) {
error("%s: CloneNSScript %s failed with rc=%d",
__func__, jc_conf->clonensscript, rc);
goto exit2;
}
}
exit1:
sem_destroy(sem1);
munmap(sem1, sizeof(*sem1));
sem_destroy(sem2);
munmap(sem2, sizeof(*sem2));
exit2:
if (rc) {
int failures;
/* cleanup the job mount */
if ((failures = rmdir_recursive(job_mount, false))) {
error("%s: failed to remove %d files from %s",
__func__, failures, job_mount);
rc = SLURM_ERROR;
goto end_it;
}
if (umount2(job_mount, MNT_DETACH))
error("%s: umount2 %s failed: %m",
__func__, job_mount);
if (rmdir(job_mount))
error("rmdir %s failed: %m", job_mount);
}
end_it:
xfree(job_mount);
xfree(src_bind);
xfree(ns_holder);
return rc;
}
extern int container_p_join_external(uint32_t job_id)
{
char *job_mount = NULL, *ns_holder = NULL;
if (plugin_disabled)
return SLURM_SUCCESS;
_create_paths(job_id, &job_mount, &ns_holder, NULL);
if (step_ns_fd == -1) {
step_ns_fd = open(ns_holder, O_RDONLY);
if (step_ns_fd == -1)
error("%s: %m", __func__);
}
xfree(job_mount);
xfree(ns_holder);
return step_ns_fd;
}
extern int container_p_join(slurm_step_id_t *step_id, uid_t uid,
bool step_create)
{
char *job_mount = NULL, *ns_holder = NULL;
int fd;
int rc = SLURM_SUCCESS;
if (plugin_disabled)
return SLURM_SUCCESS;
/*
* Handle EntireStepInNS setting. If set, the join needs to happen
* during the fork+exec chain that creates the slurmstepd process, and
* all successive calls within slurmstepd need to be skipped. If not
* set, do the opposite.
*/
if ((!jc_conf->entire_step_in_ns && running_in_slurmd() &&
step_create) ||
(jc_conf->entire_step_in_ns && running_in_slurmstepd() &&
step_id->step_id != SLURM_EXTERN_CONT))
return SLURM_SUCCESS;
/*
* Jobid 0 means we are not a real job, but a script running instead we
* do not need to handle this request.
*/
if (step_id->job_id == 0)
return SLURM_SUCCESS;
_create_paths(step_id->job_id, &job_mount, &ns_holder, NULL);
/* This is called on the slurmd so we can't use ns_fd. */
fd = open(ns_holder, O_RDONLY);
if (fd == -1) {
error("%s: open failed for %s: %m", __func__, ns_holder);
xfree(job_mount);
xfree(ns_holder);
return SLURM_ERROR;
}
rc = setns(fd, CLONE_NEWNS);
if (rc) {
error("%s: setns failed for %s: %m", __func__, ns_holder);
/* closed after error() */
close(fd);
xfree(job_mount);
xfree(ns_holder);
return SLURM_ERROR;
} else {
log_flag(JOB_CONT, "job %u entered namespace", step_id->job_id);
}
close(fd);
xfree(job_mount);
xfree(ns_holder);
return SLURM_SUCCESS;
}
static int _delete_ns(uint32_t job_id)
{
char *job_mount = NULL, *ns_holder = NULL;
int rc = 0, failures = 0;
char *result = NULL;
_create_paths(job_id, &job_mount, &ns_holder, NULL);
/* run any post clone epilog script */
/* initialize environ variable to include jobid and namespace file */
if (jc_conf->clonensepilog) {
run_command_args_t run_command_args = {
.max_wait = jc_conf->clonensepilog_wait * MSEC_IN_SEC,
.script_path = jc_conf->clonensepilog,
.script_type = "clonensepilog",
.status = &rc,
};
run_command_args.env = _setup_script_env(job_id, NULL,
NULL, ns_holder);
log_flag(JOB_CONT, "Running CloneNSEpilog");
result = run_command(&run_command_args);
env_array_free(run_command_args.env);
log_flag(JOB_CONT, "CloneNSEpilog rc: %d, stdout: %s",
rc, result);
xfree(result);
if (rc) {
error("%s: CloneNSEpilog script %s failed with rc=%d",
__func__, jc_conf->clonensepilog, rc);
}
}
errno = 0;
/*
* Close the step_ns_fd if it was opened. If close fails here, it
* should be safe to continue since ns_holder is lazy unmounted later
* and will get cleaned up when the slurmstepd process ends.
*/
if (step_ns_fd != -1) {
if (close(step_ns_fd))
log_flag(JOB_CONT, "job %u close step_ns_fd(%d) failed: %m",
job_id, step_ns_fd);
else
step_ns_fd = -1;
}
/*
* umount2() sets errno to EINVAL if the target is not a mount point
* but also if called with invalid flags. Consider this if changing the
* flags to umount2().
*/
rc = umount2(ns_holder, MNT_DETACH);
if (rc) {
if ((errno == EINVAL) || (errno == ENOENT)) {
log_flag(JOB_CONT, "%s: umount2 %s failed: %m",
__func__, ns_holder);
} else {
error("%s: umount2 %s failed: %m",
__func__, ns_holder);
xfree(job_mount);
xfree(ns_holder);
return SLURM_ERROR;
}
}
if ((failures = rmdir_recursive(job_mount, false)))
error("%s: failed to remove %d files from %s",
__func__, failures, job_mount);
if (umount2(job_mount, MNT_DETACH))
log_flag(JOB_CONT, "umount2: %s failed: %m", job_mount);
if (rmdir(job_mount))
error("rmdir %s failed: %m", job_mount);
xfree(job_mount);
xfree(ns_holder);
return SLURM_SUCCESS;
}
extern int container_p_stepd_create(uint32_t job_id, stepd_step_rec_t *step)
{
if (plugin_disabled)
return SLURM_SUCCESS;
return _create_ns(job_id, step);
}
extern int container_p_stepd_delete(uint32_t job_id)
{
if (plugin_disabled)
return SLURM_SUCCESS;
return _delete_ns(job_id);
}
extern int container_p_send_stepd(int fd)
{
int len;
buf_t *buf;
buf = get_slurm_jc_conf_buf();
/* The config should have been inited by now */
xassert(buf);
len = get_buf_offset(buf);
safe_write(fd, &len, sizeof(len));
safe_write(fd, get_buf_data(buf), len);
return SLURM_SUCCESS;
rwfail:
error("%s: failed", __func__);
return SLURM_ERROR;
}
extern int container_p_recv_stepd(int fd)
{
int len;
buf_t *buf;
safe_read(fd, &len, sizeof(len));
buf = init_buf(len);
safe_read(fd, buf->head, len);
if (!(jc_conf = set_slurm_jc_conf(buf)))
goto rwfail;
plugin_disabled = _is_plugin_disabled(jc_conf->basepath);
return SLURM_SUCCESS;
rwfail:
error("%s: failed", __func__);
return SLURM_ERROR;
}