blob: 48880699c3f902f811a7cd8c11430e91823911f5 [file] [log] [blame]
/*****************************************************************************\
* gres_common.c - common functions for gres plugins
*****************************************************************************
* Copyright (C) SchedMD LLC.
*
* This file is part of Slurm, a resource management program.
* For details, see <https://slurm.schedmd.com/>.
* Please also read the included file: DISCLAIMER.
*
* Slurm is free software; you can redistribute it and/or modify it under
* the terms of the GNU General Public License as published by the Free
* Software Foundation; either version 2 of the License, or (at your option)
* any later version.
*
* In addition, as a special exception, the copyright holders give permission
* to link the code of portions of this program with the OpenSSL library under
* certain conditions as described in each individual source file, and
* distribute linked combinations including the two. You must obey the GNU
* General Public License in all respects for all of the code used other than
* OpenSSL. If you modify file(s) with this exception, you may extend this
* exception to your version of the file(s), but you are not obligated to do
* so. If you do not wish to do so, delete this exception statement from your
* version. If you delete this exception statement from all source files in
* the program, then also delete it here.
*
* Slurm is distributed in the hope that it will be useful, but WITHOUT ANY
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
* details.
*
* You should have received a copy of the GNU General Public License along
* with Slurm; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
\*****************************************************************************/
#include "config.h"
#include <ctype.h>
#include "gres_common.h"
#include "src/common/xstring.h"
static int _match_dev_inx(void *x, void *key)
{
gres_device_t *gres_device = x;
int dev_inx = *(int *)key;
if (gres_device->index == dev_inx)
return 1;
return 0;
}
extern void common_gres_set_env(common_gres_env_t *gres_env)
{
bool use_local_dev_index = gres_use_local_device_index();
bool set_global_id = false;
gres_device_t *gres_device;
list_itr_t *itr;
char *global_prefix = "", *local_prefix = "";
char *new_global_list = NULL, *new_local_list = NULL;
int device_index = -1;
bool device_considered = false;
int local_inx = 0;
xassert(gres_env);
if (!gres_env->gres_devices)
return;
/* If we are setting task env but don't have usable_gres, just exit */
if (gres_env->is_task && !gres_env->usable_gres)
return;
/* is_task and is_job can't both be true */
xassert(!(gres_env->is_task && gres_env->is_job));
if (!gres_env->bit_alloc) {
/*
* The gres.conf file must identify specific device files
* in order to set the CUDA_VISIBLE_DEVICES env var
*/
return;
}
itr = list_iterator_create(gres_env->gres_devices);
while ((gres_device = list_next(itr))) {
int index;
int global_env_index;
if (!bit_test(gres_env->bit_alloc, gres_device->index))
continue;
/* Track physical devices if MultipleFiles is used */
if (device_index < gres_device->index) {
device_index = gres_device->index;
device_considered = false;
} else if (device_index != gres_device->index)
error("gres_device->index was not monotonically increasing! Are gres_devices not sorted by index? device_index: %d, gres_device->index: %d",
device_index, gres_device->index);
/* Continue if we already bound this physical device */
if (device_considered)
continue;
/*
* NICs want env to match the dev_num parsed from the
* file name; GPUs, however, want it to match the order
* they enumerate on the PCI bus, and this isn't always
* the same order as the device file names
*/
if (gres_env->use_dev_num)
global_env_index = gres_device->dev_num;
else
global_env_index = gres_device->index;
index = use_local_dev_index ?
local_inx++ : global_env_index;
if (gres_env->is_task) {
if (!bit_test(gres_env->usable_gres,
use_local_dev_index ?
index : gres_device->index)) {
/*
* Since this device is not in usable_gres, skip
* over any other device files associated with
* it by setting device_considered = true
*/
device_considered = true;
continue;
}
}
if (!set_global_id) {
gres_env->global_id = gres_device->dev_num;
set_global_id = true;
}
/*
* If unique_id is set for the device, assume that we
* want to use it for the env var
*/
if (gres_device->unique_id)
xstrfmtcat(new_local_list, "%s%s%s", local_prefix,
gres_env->prefix, gres_device->unique_id);
else
xstrfmtcat(new_local_list, "%s%s%d", local_prefix,
gres_env->prefix, index);
xstrfmtcat(new_global_list, "%s%s%d", global_prefix,
gres_env->prefix, global_env_index);
local_prefix = ",";
global_prefix = ",";
device_considered = true;
}
list_iterator_destroy(itr);
if (new_global_list) {
xfree(gres_env->global_list);
gres_env->global_list = new_global_list;
}
if (new_local_list) {
xfree(gres_env->local_list);
gres_env->local_list = new_local_list;
}
if (gres_env->flags & GRES_INTERNAL_FLAG_VERBOSE) {
char *usable_str;
char *alloc_str;
if (gres_env->usable_gres)
usable_str = bit_fmt_hexmask_trim(
gres_env->usable_gres);
else
usable_str = xstrdup("NULL");
alloc_str = bit_fmt_hexmask_trim(gres_env->bit_alloc);
fprintf(stderr, "gpu-bind: usable_gres=%s; bit_alloc=%s; local_inx=%d; global_list=%s; local_list=%s\n",
usable_str, alloc_str, local_inx, gres_env->global_list,
gres_env->local_list);
xfree(alloc_str);
xfree(usable_str);
}
}
/*
* A one-liner version of _print_gres_conf_full()
*/
extern void print_gres_conf(gres_slurmd_conf_t *gres_slurmd_conf,
log_level_t log_lvl)
{
log_var(log_lvl, " GRES[%s] Type:%s Count:%"PRIu64" Cores(%d):%s "
"Links:%s Flags:%s File:%s UniqueId:%s", gres_slurmd_conf->name,
gres_slurmd_conf->type_name, gres_slurmd_conf->count,
gres_slurmd_conf->cpu_cnt, gres_slurmd_conf->cpus,
gres_slurmd_conf->links,
gres_flags2str(gres_slurmd_conf->config_flags),
gres_slurmd_conf->file, gres_slurmd_conf->unique_id);
}
/*
* Print the gres.conf record in a parsable format
* Do NOT change the format of this without also changing test39.18!
*/
static void _print_gres_conf_parsable(gres_slurmd_conf_t *gres_slurmd_conf,
log_level_t log_lvl)
{
/* Only print out unique_id if set */
log_var(log_lvl, "GRES_PARSABLE[%s](%"PRIu64"):%s|%d|%s|%s|%s|%s%s%s",
gres_slurmd_conf->name, gres_slurmd_conf->count,
gres_slurmd_conf->type_name, gres_slurmd_conf->cpu_cnt,
gres_slurmd_conf->cpus, gres_slurmd_conf->links,
gres_slurmd_conf->file,
gres_slurmd_conf->unique_id ? gres_slurmd_conf->unique_id : "",
gres_slurmd_conf->unique_id ? "|" : "",
gres_flags2str(gres_slurmd_conf->config_flags));
}
/*
* Prints out each gres_slurmd_conf_t record in the list
*/
static void _print_gres_list_helper(list_t *gres_list, log_level_t log_lvl,
bool parsable)
{
list_itr_t *itr;
gres_slurmd_conf_t *gres_slurmd_conf;
if (gres_list == NULL)
return;
itr = list_iterator_create(gres_list);
while ((gres_slurmd_conf = list_next(itr))) {
if (parsable)
_print_gres_conf_parsable(gres_slurmd_conf, log_lvl);
else
print_gres_conf(gres_slurmd_conf, log_lvl);
}
list_iterator_destroy(itr);
}
/*
* Print each gres_slurmd_conf_t record in the list
*/
extern void print_gres_list(list_t *gres_list, log_level_t log_lvl)
{
_print_gres_list_helper(gres_list, log_lvl, false);
}
/*
* Print each gres_slurmd_conf_t record in the list in a parsable manner for
* test consumption
*/
extern void print_gres_list_parsable(list_t *gres_list)
{
_print_gres_list_helper(gres_list, LOG_LEVEL_INFO, true);
}
extern void gres_common_gpu_set_env(common_gres_env_t *gres_env)
{
char *slurm_env_var;
uint64_t gres_cnt;
if (gres_env->is_job)
slurm_env_var = "SLURM_JOB_GPUS";
else
slurm_env_var = "SLURM_STEP_GPUS";
gres_env->prefix = "";
common_gres_set_env(gres_env);
/*
* Set environment variables if GRES is found. Otherwise, unset
* environment variables, since this means GRES is not allocated.
* This is useful for jobs and steps that request --gres=none within an
* existing job allocation with GRES.
* Do not unset envs that could have already been set by an allocated
* sharing GRES (GPU).
*
* NOTE: Use gres_env->bit_alloc to ensure SLURM_GPUS_ON_NODE is
* correct with shared gres. Do not use gres_env->gres_cnt.
*/
gres_cnt = gres_env->bit_alloc ? bit_set_count(gres_env->bit_alloc) : 0;
if (gres_cnt) {
char *gpus_on_node = xstrdup_printf("%"PRIu64,
gres_cnt);
env_array_overwrite(gres_env->env_ptr, "SLURM_GPUS_ON_NODE",
gpus_on_node);
xfree(gpus_on_node);
} else if (!(gres_env->flags & GRES_INTERNAL_FLAG_PROTECT_ENV)) {
unsetenvp(*gres_env->env_ptr, "SLURM_GPUS_ON_NODE");
}
if (gres_env->global_list) {
env_array_overwrite(gres_env->env_ptr, slurm_env_var,
gres_env->global_list);
xfree(gres_env->global_list);
} else if (!(gres_env->flags & GRES_INTERNAL_FLAG_PROTECT_ENV)) {
unsetenvp(*gres_env->env_ptr, slurm_env_var);
}
if (gres_env->local_list) {
if (gres_env->gres_conf_flags & GRES_CONF_ENV_NVML)
env_array_overwrite(gres_env->env_ptr,
"CUDA_VISIBLE_DEVICES",
gres_env->local_list);
if (gres_env->gres_conf_flags & GRES_CONF_ENV_RSMI)
env_array_overwrite(gres_env->env_ptr,
"ROCR_VISIBLE_DEVICES",
gres_env->local_list);
if (gres_env->gres_conf_flags & GRES_CONF_ENV_ONEAPI)
env_array_overwrite(gres_env->env_ptr,
"ZE_AFFINITY_MASK",
gres_env->local_list);
if (gres_env->gres_conf_flags & GRES_CONF_ENV_OPENCL)
env_array_overwrite(gres_env->env_ptr,
"GPU_DEVICE_ORDINAL",
gres_env->local_list);
xfree(gres_env->local_list);
} else if (!(gres_env->flags & GRES_INTERNAL_FLAG_PROTECT_ENV)) {
if (gres_env->gres_conf_flags & GRES_CONF_ENV_NVML)
unsetenvp(*gres_env->env_ptr, "CUDA_VISIBLE_DEVICES");
if (gres_env->gres_conf_flags & GRES_CONF_ENV_RSMI)
unsetenvp(*gres_env->env_ptr, "ROCR_VISIBLE_DEVICES");
if (gres_env->gres_conf_flags & GRES_CONF_ENV_ONEAPI)
unsetenvp(*gres_env->env_ptr, "ZE_AFFINITY_MASK");
if (gres_env->gres_conf_flags & GRES_CONF_ENV_OPENCL)
unsetenvp(*gres_env->env_ptr, "GPU_DEVICE_ORDINAL");
}
}
/*
* Set environment variables as appropriate for a job's prolog or epilog based
* GRES allocated to the job.
*
* RETURN: 1 if nothing was done, 0 otherwise.
*/
extern bool gres_common_prep_set_env(char ***prep_env_ptr,
gres_prep_t *gres_prep,
int node_inx, uint32_t gres_conf_flags,
list_t *gres_devices)
{
int dev_inx_first = -1, dev_inx_last, dev_inx;
gres_device_t *gres_device;
char *vendor_gpu_str = NULL;
char *slurm_gpu_str = NULL;
char *sep = "";
xassert(prep_env_ptr);
if (!gres_prep)
return 1;
if (!gres_devices)
return 1;
if (gres_prep->node_cnt == 0) /* no_consume */
return 1;
if (node_inx > gres_prep->node_cnt) {
error("bad node index (%d > %u)",
node_inx, gres_prep->node_cnt);
return 1;
}
if (gres_prep->gres_bit_alloc &&
gres_prep->gres_bit_alloc[node_inx]) {
dev_inx_first = bit_ffs(gres_prep->gres_bit_alloc[node_inx]);
}
if (dev_inx_first >= 0)
dev_inx_last = bit_fls(gres_prep->gres_bit_alloc[node_inx]);
else
dev_inx_last = -2;
for (dev_inx = dev_inx_first; dev_inx <= dev_inx_last; dev_inx++) {
if (!bit_test(gres_prep->gres_bit_alloc[node_inx],
dev_inx))
continue;
if ((gres_device =
list_find_first(gres_devices, _match_dev_inx, &dev_inx))) {
if (gres_device->unique_id)
xstrfmtcat(vendor_gpu_str, "%s%s", sep,
gres_device->unique_id);
else
xstrfmtcat(vendor_gpu_str, "%s%d", sep,
gres_device->index);
xstrfmtcat(slurm_gpu_str, "%s%d", sep,
gres_device->index);
sep = ",";
}
}
if (vendor_gpu_str) {
if (gres_conf_flags & GRES_CONF_ENV_NVML)
env_array_overwrite(prep_env_ptr,
"CUDA_VISIBLE_DEVICES",
vendor_gpu_str);
if (gres_conf_flags & GRES_CONF_ENV_RSMI)
env_array_overwrite(prep_env_ptr,
"ROCR_VISIBLE_DEVICES",
vendor_gpu_str);
if (gres_conf_flags & GRES_CONF_ENV_ONEAPI)
env_array_overwrite(prep_env_ptr,
"ZE_AFFINITY_MASK",
vendor_gpu_str);
if (gres_conf_flags & GRES_CONF_ENV_OPENCL)
env_array_overwrite(prep_env_ptr,
"GPU_DEVICE_ORDINAL",
vendor_gpu_str);
xfree(vendor_gpu_str);
}
if (slurm_gpu_str) {
env_array_overwrite(prep_env_ptr, "SLURM_JOB_GPUS",
slurm_gpu_str);
xfree(slurm_gpu_str);
}
return 0;
}
extern int gres_common_set_env_types_on_node_flags(void *x, void *arg)
{
gres_slurmd_conf_t *gres_slurmd_conf = (gres_slurmd_conf_t *)x;
uint32_t *node_flags = arg;
if (gres_slurmd_conf->config_flags & GRES_CONF_ENV_NVML)
*node_flags |= GRES_CONF_ENV_NVML;
if (gres_slurmd_conf->config_flags & GRES_CONF_ENV_RSMI)
*node_flags |= GRES_CONF_ENV_RSMI;
if (gres_slurmd_conf->config_flags & GRES_CONF_ENV_OPENCL)
*node_flags |= GRES_CONF_ENV_OPENCL;
if (gres_slurmd_conf->config_flags & GRES_CONF_ENV_ONEAPI)
*node_flags |= GRES_CONF_ENV_ONEAPI;
/* No need to continue if all are set */
if ((*node_flags & GRES_CONF_ENV_SET) == GRES_CONF_ENV_SET)
return -1;
return 0;
}