blob: d7130d6b8b16cd44dcc6393fd71548c520293691 [file] [log] [blame]
/*****************************************************************************\
* gres_c_s.c - common functions for shared gres plugins
*****************************************************************************
* Copyright (C) SchedMD LLC.
*
* This file is part of Slurm, a resource management program.
* For details, see <https://slurm.schedmd.com/>.
* Please also read the included file: DISCLAIMER.
*
* Slurm is free software; you can redistribute it and/or modify it under
* the terms of the GNU General Public License as published by the Free
* Software Foundation; either version 2 of the License, or (at your option)
* any later version.
*
* In addition, as a special exception, the copyright holders give permission
* to link the code of portions of this program with the OpenSSL library under
* certain conditions as described in each individual source file, and
* distribute linked combinations including the two. You must obey the GNU
* General Public License in all respects for all of the code used other than
* OpenSSL. If you modify file(s) with this exception, you may extend this
* exception to your version of the file(s), but you are not obligated to do
* so. If you do not wish to do so, delete this exception statement from your
* version. If you delete this exception statement from all source files in
* the program, then also delete it here.
*
* Slurm is distributed in the hope that it will be useful, but WITHOUT ANY
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
* details.
*
* You should have received a copy of the GNU General Public License along
* with Slurm; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
\*****************************************************************************/
#include <sys/stat.h>
#include "gres_c_s.h"
list_t *shared_info = NULL;
static gres_slurmd_conf_t *_create_shared_rec(
gres_slurmd_conf_t *sharing_record, char *shared_name,
gres_slurmd_conf_t *shared_record_in)
{
gres_slurmd_conf_t *shared_record = xmalloc(sizeof(gres_slurmd_conf_t));
shared_record->config_flags = sharing_record->config_flags;
if (shared_record_in &&
gres_id_shared(shared_record_in->config_flags)) {
shared_record->config_flags |= shared_record_in->config_flags;
} else {
shared_record->config_flags |= GRES_CONF_SHARED;
/* The default for MPS is to have only one gpu sharing */
if (!xstrcasecmp(shared_name, "mps"))
shared_record->config_flags |= GRES_CONF_ONE_SHARING;
}
shared_record->cpu_cnt = sharing_record->cpu_cnt;
shared_record->cpus = xstrdup(sharing_record->cpus);
if (sharing_record->cpus_bitmap) {
shared_record->cpus_bitmap =
bit_copy(sharing_record->cpus_bitmap);
}
shared_record->file = xstrdup(sharing_record->file);
shared_record->name = xstrdup(shared_name);
shared_record->plugin_id = gres_build_id(shared_name);
shared_record->type_name = xstrdup(sharing_record->type_name);
return shared_record;
}
/* Distribute MPS Count to records on original list */
static void _distribute_count(list_t *gres_conf_list, list_t *sharing_conf_list,
uint64_t count,
gres_slurmd_conf_t *shared_record_in)
{
gres_slurmd_conf_t *sharing_record, *shared_record;
int rem_sharings = list_count(sharing_conf_list);
while ((sharing_record = list_pop(sharing_conf_list))) {
shared_record = _create_shared_rec(sharing_record,
shared_record_in->name,
shared_record_in);
shared_record->count = count / rem_sharings;
count -= shared_record->count;
rem_sharings--;
list_append(gres_conf_list, shared_record);
list_append(gres_conf_list, sharing_record);
}
}
static int _find_matching_file_gres(void *x, void *arg)
{
gres_slurmd_conf_t *gres_slurmd_conf1 = x;
gres_slurmd_conf_t *gres_slurmd_conf2 = arg;
if (!xstrcmp(gres_slurmd_conf1->file, gres_slurmd_conf2->file))
return 1;
return 0;
}
static int _delete_leftovers(void *x, void *arg)
{
gres_slurmd_conf_t *gres_slurmd_conf = x;
error("Discarding gres/'shared' configuration (File=%s) without matching gres/'sharing' record",
gres_slurmd_conf->file);
return 1;
}
/* Merge SHARED records back to original list, updating and reordering as needed */
static int _merge_lists(list_t *gres_conf_list, list_t *sharing_conf_list,
list_t *shared_conf_list, char *shared_name)
{
gres_slurmd_conf_t *sharing_record, *shared_record;
if (!list_count(sharing_conf_list) && list_count(shared_conf_list)) {
error("SHARED specified without any SHARING found");
return SLURM_ERROR;
}
/*
* If gres/shared has Count, but no File specification, then evenly
* distribute gres/shared Count over all gres/sharing file records
*/
if (list_count(shared_conf_list) == 1) {
shared_record = list_peek(shared_conf_list);
if (!shared_record->file) {
_distribute_count(gres_conf_list, sharing_conf_list,
shared_record->count, shared_record);
list_flush(shared_conf_list);
return SLURM_SUCCESS;
}
}
/*
* Add SHARED records, matching File ordering to that of SHARING
* records
*/
while ((sharing_record = list_pop(sharing_conf_list))) {
shared_record = list_remove_first(shared_conf_list,
_find_matching_file_gres,
sharing_record);
if (shared_record) {
/*
* Copy gres/sharing Type & CPU info to
* gres/shared
*/
if (sharing_record->type_name) {
shared_record->config_flags |=
GRES_CONF_HAS_TYPE;
}
if (sharing_record->cpus) {
xfree(shared_record->cpus);
shared_record->cpus =
xstrdup(sharing_record->cpus);
}
if (sharing_record->cpus_bitmap) {
shared_record->cpu_cnt =
sharing_record->cpu_cnt;
FREE_NULL_BITMAP(
shared_record->cpus_bitmap);
shared_record->cpus_bitmap =
bit_copy(sharing_record->
cpus_bitmap);
}
xfree(shared_record->type_name);
shared_record->type_name =
xstrdup(sharing_record->type_name);
xfree(shared_record->unique_id);
shared_record->unique_id =
xstrdup(sharing_record->unique_id);
list_append(gres_conf_list, shared_record);
} else {
/* Add gres/shared record to match gres/gps record */
shared_record = _create_shared_rec(
sharing_record, shared_name, NULL);
shared_record->count = 0;
list_append(gres_conf_list, shared_record);
}
list_append(gres_conf_list, sharing_record);
}
/* Remove any remaining SHARED records (no matching File) */
(void) list_delete_all(shared_conf_list, _delete_leftovers, NULL);
return SLURM_SUCCESS;
}
/*
* Return true if fake_sharings.conf does exist. Used for testing
*/
static bool _test_gpu_list_fake(void)
{
struct stat config_stat;
char *fake_gpus_file = NULL;
bool have_fake_gpus = false;
fake_gpus_file = get_extra_conf_path("fake_gpus.conf");
if (stat(fake_gpus_file, &config_stat) >= 0) {
have_fake_gpus = true;
}
xfree(fake_gpus_file);
return have_fake_gpus;
}
/* Translate device file name to numeric index "/dev/nvidia2" -> 2 */
static int _compute_local_id(char *dev_file_name)
{
int i, local_id = -1, mult = 1;
if (!dev_file_name)
return -1;
for (i = strlen(dev_file_name) - 1; i >= 0; i--) {
if ((dev_file_name[i] < '0') || (dev_file_name[i] > '9'))
break;
if (local_id == -1)
local_id = 0;
local_id += (dev_file_name[i] - '0') * mult;
mult *= 10;
}
return local_id;
}
static uint64_t _build_shared_dev_info(list_t *gres_conf_list)
{
uint64_t shared_count = 0;
gres_slurmd_conf_t *gres_slurmd_conf;
shared_dev_info_t *shared_conf;
list_itr_t *iter;
FREE_NULL_LIST(shared_info);
shared_info = list_create(xfree_ptr);
iter = list_iterator_create(gres_conf_list);
while ((gres_slurmd_conf = list_next(iter))) {
if (!gres_id_shared(gres_slurmd_conf->config_flags))
continue;
shared_conf = xmalloc(sizeof(shared_dev_info_t));
shared_conf->count = gres_slurmd_conf->count;
shared_conf->id = _compute_local_id(gres_slurmd_conf->file);
list_append(shared_info, shared_conf);
shared_count += gres_slurmd_conf->count;
}
list_iterator_destroy(iter);
return shared_count;
}
/*
* Count of gres/shared records is zero, remove them from GRES list sent to
* slurmctld daemon.
*/
static int _remove_shared_recs(void *x, void *arg)
{
gres_slurmd_conf_t *gres_slurmd_conf = x;
char *shared_name = arg;
if (!xstrcmp(gres_slurmd_conf->name, shared_name))
return 1;
return 0;
}
/*
* Convert all sharing records to a new entries in a list where each File is a
* unique device (i.e. convert a record with "File=nvidia[0-3]" into 4 separate
* records).
*/
static list_t *_build_sharing_list(list_t *gres_list, char *sharing_name)
{
list_itr_t *itr;
gres_slurmd_conf_t *gres_slurmd_conf, *sharing_record;
list_t *sharing_list;
hostlist_t *hl;
char *f_name;
bool log_fname = true;
if (gres_list == NULL)
return NULL;
sharing_list = list_create(destroy_gres_slurmd_conf);
itr = list_iterator_create(gres_list);
while ((gres_slurmd_conf = list_next(itr))) {
if (xstrcmp(gres_slurmd_conf->name, sharing_name))
continue;
if (!gres_slurmd_conf->file) {
if (log_fname) {
error("SHARING configuration lacks \"File\" specification");
log_fname = false;
}
continue;
}
/*
* Do not split up gres records with MultipleFiles, e.g. MIGs.
* With MultipleFiles, all files correspond to the same gres
* device, as opposed to File=nvidia[0-3] which corresponds to
* four separate gres devices.
*/
if (gres_slurmd_conf->config_flags & GRES_CONF_HAS_MULT) {
list_append(sharing_list, gres_slurmd_conf);
list_remove(itr);
continue;
}
hl = hostlist_create(gres_slurmd_conf->file);
while ((f_name = hostlist_shift(hl))) {
sharing_record = xmalloc(sizeof(gres_slurmd_conf_t));
sharing_record->config_flags =
gres_slurmd_conf->config_flags;
if (gres_slurmd_conf->type_name) {
sharing_record->config_flags |=
GRES_CONF_HAS_TYPE;
}
sharing_record->count = 1;
sharing_record->cpu_cnt = gres_slurmd_conf->cpu_cnt;
sharing_record->cpus = xstrdup(gres_slurmd_conf->cpus);
if (gres_slurmd_conf->cpus_bitmap) {
sharing_record->cpus_bitmap =
bit_copy(gres_slurmd_conf->cpus_bitmap);
}
sharing_record->file = xstrdup(f_name);
sharing_record->links =
xstrdup(gres_slurmd_conf->links);
sharing_record->name = xstrdup(gres_slurmd_conf->name);
sharing_record->plugin_id = gres_slurmd_conf->plugin_id;
sharing_record->type_name =
xstrdup(gres_slurmd_conf->type_name);
sharing_record->unique_id =
xstrdup(gres_slurmd_conf->unique_id);
list_append(sharing_list, sharing_record);
free(f_name);
}
hostlist_destroy(hl);
(void) list_delete_item(itr);
}
list_iterator_destroy(itr);
return sharing_list;
}
/*
* Convert all shared_name records to a new entries in a list where each File is
* a unique device (i.e. convert a record with "File=nvidia[0-3]" into 4
* separate records). Similar to _build_gpu_list(), but we copy more fields,
* divide the "Count" across all shared_name records and remove from the
* original list.
*/
static list_t *_build_shared_list(list_t *gres_list, char *shared_name)
{
list_itr_t *itr;
gres_slurmd_conf_t *gres_slurmd_conf, *shared_record;
list_t *shared_list;
hostlist_t *hl;
char *f_name;
uint64_t count_per_file;
int shared_no_file_recs = 0, shared_file_recs = 0;
if (gres_list == NULL)
return NULL;
shared_list = list_create(destroy_gres_slurmd_conf);
itr = list_iterator_create(gres_list);
while ((gres_slurmd_conf = list_next(itr))) {
if (xstrcmp(gres_slurmd_conf->name, shared_name))
continue;
if (!gres_slurmd_conf->file) {
if (shared_no_file_recs)
fatal("%s: bad configuration, multiple configurations without \"File\"",
__func__);
if (shared_file_recs)
fatal("%s: multiple configurations with and without \"File\"",
__func__);
shared_no_file_recs++;
shared_record = xmalloc(sizeof(gres_slurmd_conf_t));
shared_record->config_flags =
gres_slurmd_conf->config_flags;
if (gres_slurmd_conf->type_name)
shared_record->config_flags |= GRES_CONF_HAS_TYPE;
shared_record->count = gres_slurmd_conf->count;
shared_record->cpu_cnt = gres_slurmd_conf->cpu_cnt;
shared_record->cpus = xstrdup(gres_slurmd_conf->cpus);
if (gres_slurmd_conf->cpus_bitmap) {
shared_record->cpus_bitmap =
bit_copy(gres_slurmd_conf->cpus_bitmap);
}
shared_record->name = xstrdup(gres_slurmd_conf->name);
shared_record->plugin_id = gres_slurmd_conf->plugin_id;
shared_record->type_name =
xstrdup(gres_slurmd_conf->type_name);
shared_record->unique_id =
xstrdup(gres_slurmd_conf->unique_id);
list_append(shared_list, shared_record);
} else {
shared_file_recs++;
if (shared_no_file_recs)
fatal("gres/shared: multiple configurations with and without \"File\"");
hl = hostlist_create(gres_slurmd_conf->file);
count_per_file =
gres_slurmd_conf->count / hostlist_count(hl);
while ((f_name = hostlist_shift(hl))) {
shared_record =
xmalloc(sizeof(gres_slurmd_conf_t));
shared_record->config_flags =
gres_slurmd_conf->config_flags;
if (gres_slurmd_conf->type_name) {
shared_record->config_flags |=
GRES_CONF_HAS_TYPE;
}
shared_record->count = count_per_file;
shared_record->cpu_cnt =
gres_slurmd_conf->cpu_cnt;
shared_record->cpus = xstrdup(
gres_slurmd_conf->cpus);
if (gres_slurmd_conf->cpus_bitmap) {
shared_record->cpus_bitmap =
bit_copy(gres_slurmd_conf->
cpus_bitmap);
}
shared_record->file = xstrdup(f_name);
shared_record->name = xstrdup(
gres_slurmd_conf->name);
shared_record->plugin_id =
gres_slurmd_conf->plugin_id;
shared_record->type_name =
xstrdup(gres_slurmd_conf->type_name);
shared_record->unique_id =
xstrdup(gres_slurmd_conf->unique_id);
list_append(shared_list, shared_record);
free(f_name);
}
hostlist_destroy(hl);
}
(void) list_delete_item(itr);
}
list_iterator_destroy(itr);
return shared_list;
}
extern void gres_c_s_fini(void)
{
FREE_NULL_LIST(shared_info);
}
/*
* We could load gres state or validate it using various mechanisms here.
* This only validates that the configuration was specified in gres.conf.
* In the general case, no code would need to be changed.
*/
extern int gres_c_s_init_share_devices(list_t *gres_conf_list,
list_t **share_devices,
node_config_load_t *config,
char *sharing_name)
{
int rc = SLURM_SUCCESS;
list_t *sharing_conf_list, *shared_conf_list;
log_level_t log_lvl;
if (slurm_conf.debug_flags & DEBUG_FLAG_GRES)
log_lvl = LOG_LEVEL_VERBOSE;
else
log_lvl = LOG_LEVEL_DEBUG;
/* Assume this state is caused by an scontrol reconfigure */
if (share_devices && *share_devices) {
debug("Resetting share_devices");
FREE_NULL_LIST(*share_devices);
}
log_flag(GRES, "Initialized gres.conf list");
print_gres_list(gres_conf_list, log_lvl);
/*
* Ensure that every SHARING device file is listed as a SHARED file.
* Any SHARED entry that we need to add will have a "Count" of zero.
* Every SHARED "Type" will be made to match the SHARING "Type". The
* order of SHARED records (by "File") must match the order in which
* SHARINGs are defined for the GRES bitmaps in slurmctld to line up.
*
* First, convert all SHARING records to a new entries in a list where
* each File is a unique device (i.e. convert a record with
* "File=nvidia[0-3]" into 4 separate records).
*/
sharing_conf_list = _build_sharing_list(
gres_conf_list, sharing_name);
/* Now move SHARED records to new list, each with unique device file */
shared_conf_list = _build_shared_list(
gres_conf_list, config->gres_name);
/*
* Merge SHARED records back to original list, updating and reordering
* as needed.
*/
rc = _merge_lists(gres_conf_list, sharing_conf_list, shared_conf_list,
config->gres_name);
FREE_NULL_LIST(sharing_conf_list);
FREE_NULL_LIST(shared_conf_list);
if (rc != SLURM_SUCCESS)
fatal("failed to merge SHARED and SHARING configuration");
rc = gres_node_config_load(gres_conf_list, config, share_devices);
if (rc != SLURM_SUCCESS)
fatal("failed to load configuration");
if (!_build_shared_dev_info(gres_conf_list) && gres_conf_list)
(void) list_delete_all(gres_conf_list, _remove_shared_recs,
config->gres_name);
log_var(log_lvl, "Final gres.conf list:");
print_gres_list(gres_conf_list, log_lvl);
// Print in parsable format for tests if fake system is in use
if (_test_gpu_list_fake()) {
info("Final normalized gres.conf list (parsable):");
print_gres_list_parsable(gres_conf_list);
}
return rc;
}
extern void gres_c_s_send_stepd(buf_t *buffer)
{
uint32_t shared_cnt;
shared_dev_info_t *shared_ptr;
list_itr_t *itr;
if (!shared_info) {
shared_cnt = 0;
pack32(shared_cnt, buffer);
} else {
shared_cnt = list_count(shared_info);
pack32(shared_cnt, buffer);
itr = list_iterator_create(shared_info);
while ((shared_ptr = list_next(itr))) {
pack64(shared_ptr->count, buffer);
pack64(shared_ptr->id, buffer);
}
list_iterator_destroy(itr);
}
return;
}
/* Receive GRES information from slurmd on the specified file descriptor */
extern void gres_c_s_recv_stepd(buf_t *buffer)
{
shared_dev_info_t *shared_ptr = NULL;
uint64_t uint64_tmp;
uint32_t shared_cnt;
safe_unpack32(&shared_cnt, buffer);
if (!shared_cnt)
return;
FREE_NULL_LIST(shared_info);
shared_info = list_create(xfree_ptr);
for (uint32_t i = 0; i < shared_cnt; i++) {
shared_ptr = xmalloc(sizeof(shared_dev_info_t));
safe_unpack64(&uint64_tmp, buffer);
shared_ptr->count = uint64_tmp;
safe_unpack64(&uint64_tmp, buffer);
shared_ptr->id = uint64_tmp;
list_append(shared_info, shared_ptr);
}
return;
unpack_error:
error("failed");
xfree(shared_ptr);
return;
}