blob: 0ed9824dedc4efba4983b30207ebc0f723aa1932 [file] [log] [blame]
/*****************************************************************************\
* imex_device.c
*****************************************************************************
* Copyright (C) SchedMD LLC.
*
* This file is part of Slurm, a resource management program.
* For details, see <https://slurm.schedmd.com/>.
* Please also read the included file: DISCLAIMER.
*
* Slurm is free software; you can redistribute it and/or modify it under
* the terms of the GNU General Public License as published by the Free
* Software Foundation; either version 2 of the License, or (at your option)
* any later version.
*
* In addition, as a special exception, the copyright holders give permission
* to link the code of portions of this program with the OpenSSL library under
* certain conditions as described in each individual source file, and
* distribute linked combinations including the two. You must obey the GNU
* General Public License in all respects for all of the code used other than
* OpenSSL. If you modify file(s) with this exception, you may extend this
* exception to your version of the file(s), but you are not obligated to do
* so. If you do not wish to do so, delete this exception statement from your
* version. If you delete this exception statement from all source files in
* the program, then also delete it here.
*
* Slurm is distributed in the hope that it will be useful, but WITHOUT ANY
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
* details.
*
* You should have received a copy of the GNU General Public License along
* with Slurm; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
\*****************************************************************************/
#define _GNU_SOURCE
#include <stdint.h>
#include <stdlib.h>
#include <sys/mount.h>
#include <sys/stat.h>
#include <sys/sysmacros.h>
#include "src/common/slurm_xlator.h"
#include "src/common/fd.h"
#include "src/common/log.h"
#include "src/common/read_config.h"
#include "src/common/xmalloc.h"
#include "src/common/xstring.h"
#define IMEX_DEV_DIR "/dev/nvidia-caps-imex-channels"
#define TARGET_DEV_LINE "nvidia-caps-imex-channels"
#define IMEX_CHANNEL_PATTERN IMEX_DEV_DIR "/channel%u"
static int device_major = -1;
static int _find_major(void)
{
char *line = NULL;
FILE *fp = NULL;
size_t len = 0;
if ((line = conf_get_opt_str(slurm_conf.switch_param,
"imex_dev_major="))) {
device_major = atoi(line);
info("using configured imex_dev_major: %d", device_major);
xfree(line);
return SLURM_SUCCESS;
}
if (!(fp = fopen("/proc/devices", "r"))) {
error("Could not open /proc/devices: %m");
return SLURM_ERROR;
}
while (getline(&line, &len, fp) != -1) {
int tmp = 0;
char tmp_char[41] = {0};
if ((sscanf(line, "%d %40s", &tmp, tmp_char) == 2) &&
!xstrcmp(tmp_char, TARGET_DEV_LINE)) {
device_major = tmp;
break;
}
}
free(line);
fclose(fp);
if (device_major == -1)
warning("%s: nvidia-caps-imex-channels major device not found, plugin disabled",
plugin_type);
else
info("nvidia-caps-imex-channels major: %d", device_major);
return SLURM_SUCCESS;
}
static int _make_devdir(void)
{
mode_t mask;
mask = umask(0);
if ((mkdir(IMEX_DEV_DIR, 0755) < 0) && (errno != EEXIST)) {
error("could not create %s: %m", IMEX_DEV_DIR);
umask(mask);
return SLURM_ERROR;
}
umask(mask);
(void) rmdir_recursive(IMEX_DEV_DIR, false);
return SLURM_SUCCESS;
}
extern int slurmd_init(void)
{
if (_find_major() != SLURM_SUCCESS)
return SLURM_ERROR;
if (device_major == -1)
return SLURM_SUCCESS;
if (_make_devdir() != SLURM_SUCCESS)
return SLURM_ERROR;
return SLURM_SUCCESS;
}
extern int stepd_init(void)
{
if (_find_major() != SLURM_SUCCESS)
return SLURM_ERROR;
return SLURM_SUCCESS;
}
extern int setup_imex_channel(uint32_t channel, bool create_ns)
{
int rc = SLURM_SUCCESS;
mode_t mask;
dev_t dev = makedev(device_major, channel);
char *path = NULL;
if (device_major == -1) {
debug("skipping setup for channel %u", channel);
return SLURM_SUCCESS;
}
if (create_ns && unshare(CLONE_NEWNS) < 0) {
error("%s: unshare() failed: %m", __func__);
return SLURM_ERROR;
}
if (mount(NULL, "/", NULL, MS_SLAVE | MS_REC, NULL) < 0) {
error("%s: mount() for / failed: %m", __func__);
return SLURM_ERROR;
}
if (mount("tmpfs", IMEX_DEV_DIR, "tmpfs", MS_NOSUID | MS_NOEXEC,
"size=0,mode=0755") < 0) {
error("%s: mount() for tmpfs failed: %m", __func__);
return SLURM_ERROR;
}
xstrfmtcat(path, IMEX_CHANNEL_PATTERN, channel);
mask = umask(0);
if (mknod(path, S_IFCHR | 0666, dev) < 0) {
error("%s: failed to create %s: %m", __func__, path);
rc = SLURM_ERROR;
} else {
log_flag(SWITCH, "Successfully setup IMEX channel ID %d at %s",
channel, path);
}
umask(mask);
xfree(path);
return rc;
}