blob: 13ec5c5c8f016d91168ad57b10bef700c0ca9f20 [file] [log] [blame]
/*****************************************************************************\
* config.c - Library for managing HPE Slingshot networks
*****************************************************************************
* Copyright 2021-2023 Hewlett Packard Enterprise Development LP
* Written by Jim Nordby <james.nordby@hpe.com>
*
* This file is part of Slurm, a resource management program.
* For details, see <https://slurm.schedmd.com/>.
* Please also read the included file: DISCLAIMER.
*
* Slurm is free software; you can redistribute it and/or modify it under
* the terms of the GNU General Public License as published by the Free
* Software Foundation; either version 2 of the License, or (at your option)
* any later version.
*
* In addition, as a special exception, the copyright holders give permission
* to link the code of portions of this program with the OpenSSL library under
* certain conditions as described in each individual source file, and
* distribute linked combinations including the two. You must obey the GNU
* General Public License in all respects for all of the code used other than
* OpenSSL. If you modify file(s) with this exception, you may extend this
* exception to your version of the file(s), but you are not obligated to do
* so. If you do not wish to do so, delete this exception statement from your
* version. If you delete this exception statement from all source files in
* the program, then also delete it here.
*
* Slurm is distributed in the hope that it will be useful, but WITHOUT ANY
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
* details.
*
* You should have received a copy of the GNU General Public License along
* with Slurm; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
\*****************************************************************************/
#include "config.h"
#include <sys/stat.h>
#include "switch_hpe_slingshot.h"
#include "rest.h"
#include "src/common/job_record.h"
#include "src/common/node_conf.h"
/* Set this to true if VNI table is re-sized and loses some bits */
static bool lost_vnis = false;
/* Number of free VNIs */
int free_vnis = 0;
/*
* Set up slingshot_config defaults
*/
static void _config_defaults(void)
{
memset(&slingshot_config, 0, sizeof(slingshot_config_t));
slingshot_config.single_node_vni = SLINGSHOT_SN_VNI_NONE;
slingshot_config.job_vni = SLINGSHOT_JOB_VNI_NONE;
slingshot_config.tcs = SLINGSHOT_TC_DEFAULT;
slingshot_config.flags = SLINGSHOT_FLAGS_DEFAULT;
slingshot_config.limits.txqs.max = SLINGSHOT_TXQ_MAX;
slingshot_config.limits.tgqs.max = SLINGSHOT_TGQ_MAX;
slingshot_config.limits.eqs.max = SLINGSHOT_EQ_MAX;
slingshot_config.limits.cts.max = SLINGSHOT_CT_MAX;
slingshot_config.limits.tles.max = SLINGSHOT_TLE_MAX;
slingshot_config.limits.ptes.max = SLINGSHOT_PTE_MAX;
slingshot_config.limits.les.max = SLINGSHOT_LE_MAX;
slingshot_config.limits.acs.max = SLINGSHOT_AC_MAX;
slingshot_config.limits.txqs.def = SLINGSHOT_TXQ_DEF;
slingshot_config.limits.tgqs.def = SLINGSHOT_TGQ_DEF;
slingshot_config.limits.eqs.def = SLINGSHOT_EQ_DEF;
slingshot_config.limits.cts.def = SLINGSHOT_CT_DEF;
slingshot_config.limits.tles.def = SLINGSHOT_TLE_DEF;
slingshot_config.limits.ptes.def = SLINGSHOT_PTE_DEF;
slingshot_config.limits.les.def = SLINGSHOT_LE_DEF;
slingshot_config.limits.acs.def = SLINGSHOT_AC_DEF;
slingshot_config.destroy_retries = SLINGSHOT_CXI_DESTROY_RETRIES;
}
/*
* Parse the VNI min/max token, with format "vni=<min>-<max>";
* put results in *minp, *maxp
*/
static bool _config_vnis(const char *token, char *arg, uint16_t *min_ptr,
uint16_t *max_ptr)
{
char *end_ptr;
long min, max;
if (!arg)
goto err;
end_ptr = NULL;
min = strtol(arg, &end_ptr, 10);
if (!end_ptr || (end_ptr == arg) || (*end_ptr != '-'))
goto err;
if ((min < SLINGSHOT_VNI_MIN) || (min > SLINGSHOT_VNI_MAX))
goto err;
arg = end_ptr + 1;
end_ptr = NULL;
max = strtol(arg, &end_ptr, 10);
if (!end_ptr || (end_ptr == arg) || (*end_ptr != '\0'))
goto err;
if ((max <= min) || (max > SLINGSHOT_VNI_MAX))
goto err;
*min_ptr = min;
*max_ptr = max;
log_flag(SWITCH, "[token=%s]: min/max %ld %ld", token, min, max);
return true;
err:
error("Invalid vni token '%s' (example: 'vnis=10-100', valid range %d-%d)",
token, SLINGSHOT_VNI_MIN, SLINGSHOT_VNI_MAX);
return false;
}
/*
* Compare old slingshot_state.vni_{min,max} with passed-in min/max;
* if old table is incompatible with new min/max, return false;
* otherwise set up slingshot_state with new vni_table values
*/
static bool _setup_vni_table(uint16_t min, uint16_t max)
{
int32_t oldbits, newbits;
size_t oldsize = slingshot_state.vni_max - slingshot_state.vni_min + 1;
size_t newsize = max - min + 1;
uint16_t oldmin = slingshot_state.vni_min;
uint16_t oldmax = slingshot_state.vni_max;
bitstr_t *table = slingshot_state.vni_table;
log_flag(SWITCH, "oldmin/max/size %hu %hu %zu min/max/size %hu %hu %zu",
oldmin, oldmax, oldsize, min, max, newsize);
/* If no recovery of vni_table, just set up new one */
if (!slingshot_state.vni_table) {
table = bit_alloc(newsize);
newbits = 0; /* no VNIs used */
goto done;
}
xassert(oldmin);
xassert(oldmax);
xassert(oldsize > 0);
xassert(newsize > 0);
xassert(table);
xassert(bit_size(table) == oldsize);
if ((oldmin == min) && (oldmax == max)) {
newbits = bit_set_count(table);
goto done;
}
/* Re-size bitstring if needed */
oldbits = bit_set_count(table);
if (oldsize != newsize)
table = bit_realloc(table, newsize);
/* Shift bits if vni_min is changing */
if (oldmin != min)
bit_rotate(table, min - oldmin);
newbits = bit_set_count(table);
/* Go on even if we're losing VNIs */
if (newbits != oldbits) {
warning("changing vni_min/max %hu %hu -> %hu %hu; %d VNIs will be lost!",
oldmin, oldmax, min, max, oldbits - newbits);
lost_vnis = true;
}
done:
free_vnis = newsize - newbits;
slingshot_state.vni_min = min;
slingshot_state.vni_max = max;
if ((slingshot_state.vni_last < min) ||
(slingshot_state.vni_last >= max))
slingshot_state.vni_last = min - 1;
slingshot_state.vni_table = table;
log_flag(SWITCH, "version=%d min/max/last=%hu %hu %hu num_vnis=%zu used=%d free_vnis=%d",
slingshot_state.version, slingshot_state.vni_min,
slingshot_state.vni_max, slingshot_state.vni_last, newsize,
newbits, free_vnis);
return true;
}
/*
* Update vni table from config
*/
extern int slingshot_update_vni_table(void)
{
uint16_t min = slingshot_state.vni_min;
uint16_t max = slingshot_state.vni_max;
char *arg = NULL;
if ((arg = conf_get_opt_str(slurm_conf.switch_param, "vnis="))) {
if (!_config_vnis("vnis", arg, &min, &max)) {
xfree(arg);
return SLURM_ERROR;
}
}
xfree(arg);
if (!_setup_vni_table(min, max))
return SLURM_ERROR;
return SLURM_SUCCESS;
}
/* Mapping between Slingshot traffic class labels and their bitmasks */
static struct {
const char *label;
uint32_t bit;
} classes[] = {
{ "DEDICATED_ACCESS", SLINGSHOT_TC_DEDICATED_ACCESS },
{ "LOW_LATENCY", SLINGSHOT_TC_LOW_LATENCY },
{ "BULK_DATA", SLINGSHOT_TC_BULK_DATA },
{ "BEST_EFFORT", SLINGSHOT_TC_BEST_EFFORT },
};
const int num_classes = sizeof(classes) / sizeof(classes[0]);
/*
* Parse the Slingshot traffic classes token, with format
* "tcs=<class1>:<class2>[:...]
*/
static bool _config_tcs(const char *token, char *arg, uint32_t *tcs_out)
{
char *save_ptr = NULL, *tcs = NULL, *tc;
uint32_t tcbits = 0;
int i;
const char prefix[] = "TC_";
const size_t prefix_siz = sizeof(prefix) - 1;
if (!arg)
goto err;
tcs = xstrdup(arg);
for (tc = strtok_r(tcs, ":", &save_ptr); tc;
tc = strtok_r(NULL, ":", &save_ptr)) {
/* Skip optional TC_ prefix */
if (!xstrncasecmp(tc, prefix, prefix_siz))
tc += prefix_siz;
for (i = 0; i < num_classes; i++) {
if (!xstrcasecmp(tc, classes[i].label)) {
tcbits |= classes[i].bit;
break;
}
}
if (i == num_classes)
goto err;
}
if (tcbits == 0)
goto err;
*tcs_out = tcbits;
log_flag(SWITCH, "[token=%s]: tcs %#x", token, tcbits);
xfree(tcs);
return true;
err:
xfree(tcs);
error("Invalid traffic class token '%s' (example 'tcs=DEDICATED_ACCESS:LOW_LATENCY:BULK_DATA:BEST_EFFORT')",
token);
return false;
}
/*
* Parse the Slingshot job VNI token, with format "job_vni={all,user,none}"
*/
static bool _config_job_vni(const char *token, char *arg)
{
/* Backwards compatibility: no argument = SLINGSHOT_JOB_VNI_ALL */
if (!arg) {
slingshot_config.job_vni = SLINGSHOT_JOB_VNI_ALL;
goto out;
}
if (!xstrcasecmp(arg, "all"))
slingshot_config.job_vni = SLINGSHOT_JOB_VNI_ALL;
else if (!xstrcasecmp(arg, "user"))
slingshot_config.job_vni = SLINGSHOT_JOB_VNI_USER;
else if (!xstrcasecmp(arg, "none"))
slingshot_config.job_vni = SLINGSHOT_JOB_VNI_NONE;
else {
error("Invalid job_vni token '%s' (example 'job_vni={all,user,none}')",
token);
return false;
}
out:
log_flag(SWITCH, "[token=%s]: job_vni %d",
token, slingshot_config.job_vni);
return true;
}
/*
* Parse the Slingshot single-node VNI token, with format
* "single_node_vni={all,user,none}"
*/
static bool _config_single_node_vni(const char *token, char *arg)
{
/* Backwards compatibility: no argument = SLINGSHOT_SN_VNI_ALL */
if (!arg) {
slingshot_config.single_node_vni = SLINGSHOT_SN_VNI_ALL;
goto out;
}
if (!xstrcasecmp(arg, "all"))
slingshot_config.single_node_vni = SLINGSHOT_SN_VNI_ALL;
else if (!xstrcasecmp(arg, "user"))
slingshot_config.single_node_vni = SLINGSHOT_SN_VNI_USER;
else if (!xstrcasecmp(arg, "none"))
slingshot_config.single_node_vni = SLINGSHOT_SN_VNI_NONE;
else {
error("Invalid single_node_vni token '%s' (example 'single_node_vni={all,user,none}')",
token);
return false;
}
out:
log_flag(SWITCH, "[token=%s]: single_node_vni %d",
token, slingshot_config.single_node_vni);
return true;
}
/*
* Parse the "hwcoll_addrs_per_job" token, with format
* "hwcoll_addrs_per_job=<number>"
*/
static bool _config_hwcoll_addrs_per_job(const char *token, char *arg)
{
char *end_ptr = NULL;
uint32_t num;
if (!arg)
goto err;
num = strtol(arg, &end_ptr, 10);
if (!end_ptr || (end_ptr == arg))
goto err;
if (num <= 0)
goto err;
slingshot_config.hwcoll_addrs_per_job = num;
log_flag(SWITCH, "[token=%s]: hwcoll_addrs_per_job %u",
token, slingshot_config.hwcoll_addrs_per_job);
return true;
err:
error("Invalid hwcoll_addrs_per_job token '%s' (example 'hwcoll_addrs_per_job=26')",
token);
return false;
}
/*
* Parse the "hwcoll_num_nodes" token, with format "hwcoll_num_nodes=<number>"
*/
static bool _config_hwcoll_num_nodes(const char *token, char *arg)
{
char *end_ptr = NULL;
uint32_t num;
if (!arg)
goto err;
num = strtol(arg, &end_ptr, 10);
if (!end_ptr || (end_ptr == arg))
goto err;
if (num <= 0)
goto err;
slingshot_config.hwcoll_num_nodes = num;
log_flag(SWITCH, "[token=%s]: hwcoll_num_nodes %u",
token, slingshot_config.hwcoll_num_nodes);
return true;
err:
error("Invalid hwcoll_num_nodes token '%s' (example 'hwcoll_num_nodes=64')",
token);
return false;
}
/*
* Parse the "fm_url" token, with format "fm_url=<url>"
*/
static bool _config_fm_url(const char *token, char *arg)
{
if (!arg)
goto err;
slingshot_config.fm_url = xstrdup(arg);
log_flag(SWITCH, "[token=%s]: fm_url %s",
token, slingshot_config.fm_url);
return true;
err:
error("Invalid fm_url token '%s' (example 'fm_url=https://api-gw-service-nmn.local/apis/fm')",
token);
return false;
}
/*
* Parse the "fm_auth" token, with format "fm_auth={BASIC,OAUTH}"
*/
static bool _config_fm_auth(const char *token, char *arg)
{
if (!arg)
goto err;
if (!xstrcasecmp(arg, SLINGSHOT_AUTH_BASIC_STR))
slingshot_config.fm_auth = SLINGSHOT_AUTH_BASIC;
else if (!xstrcasecmp(arg, SLINGSHOT_AUTH_OAUTH_STR))
slingshot_config.fm_auth = SLINGSHOT_AUTH_OAUTH;
else
goto err;
log_flag(SWITCH, "[token=%s]: fm_auth %d",
token, slingshot_config.fm_auth);
return true;
err:
error("Invalid fm_auth token '%s' (example 'fm_auth={BASIC,OAUTH}')",
token);
return false;
}
/*
* Parse the "fm_authdir" token, with format "fm_authdir=<dirpath>"
*/
static bool _config_fm_authdir(const char *token, char *arg)
{
struct stat statbuf;
if (!arg)
goto err;
if (stat(arg, &statbuf) != 0 || !S_ISDIR(statbuf.st_mode)) {
error("fm_authdir directory '%s' is not a directory", arg);
return false;
}
slingshot_config.fm_authdir = xstrdup(arg);
log_flag(SWITCH, "[token=%s]: fm_authdir %s",
token, slingshot_config.fm_authdir);
return true;
err:
error("Invalid fm_authdir token '%s' (example 'fm_authdir=/etc/wlm-client-auth')",
token);
return false;
}
/* Verify that the path is a fully qualified and readable */
static bool _is_valid_file(const char *token, char *path, char *option,
char *example)
{
struct stat statbuf;
if (!path) {
error("Invalid %s token '%s' (example '%s=%s')",
option, token, option, example);
} else if (path[0] != '/') {
error("%s's path (%s) is required to be a fully qualified pathname",
option, path);
} else if ((stat(path, &statbuf) != 0)) {
error("%s's path (%s) can not be accessed or it doesn't exist",
option, path);
} else if (!S_ISREG(statbuf.st_mode)) {
error("%s's path (%s) is not a regular file", option, path);
} else if (access(path, R_OK)) {
error("%s's path (%s) does not have read permissions",
option, path);
} else {
return true;
}
return false;
}
/*
* Parse the "fm_mtls_ca" token"
*/
static bool _config_fm_mtls_ca(const char *token, char *arg)
{
if (!_is_valid_file(token, arg, "fm_mtls_ca",
"/etc/wlm-client-auth/ca.crt"))
return false;
slingshot_config.fm_mtls_ca = xstrdup(arg);
log_flag(SWITCH, "[token=%s]: fm_mtls_ca %s", token,
slingshot_config.fm_mtls_ca);
return true;
}
/*
* Parse the "fm_mtls_cert" token"
*/
static bool _config_fm_mtls_cert(const char *token, char *arg)
{
if (!_is_valid_file(token, arg, "fm_mtls_cert",
"/etc/wlm-client-auth/public.crt"))
return false;
slingshot_config.fm_mtls_cert = xstrdup(arg);
log_flag(SWITCH, "[token=%s]: fm_mtls_cert %s", token,
slingshot_config.fm_mtls_cert);
return true;
}
/*
* Parse the "fm_mtls_key" token"
*/
static bool _config_fm_mtls_key(const char *token, char *arg)
{
if (!_is_valid_file(token, arg, "fm_mtls_key",
"/etc/wlm-client-auth/private.key"))
return false;
slingshot_config.fm_mtls_key = xstrdup(arg);
log_flag(SWITCH, "[token=%s]: fm_mtls_key %s", token,
slingshot_config.fm_mtls_key);
return true;
}
/*
* Parse the "fm_mtls_url" token"
*/
static bool _config_fm_mtls_url(const char *token, char *arg)
{
if (!arg)
goto err;
slingshot_config.fm_mtls_url = xstrdup(arg);
log_flag(SWITCH, "[token=%s]: fm_mtls_url %s", token,
slingshot_config.fm_mtls_url);
return true;
err:
error("Invalid fm_mtls_url token '%s' (example 'fm_mtls_url=https://api-gw-service-nmn.local/apis/fm')",
token);
return false;
}
/*
* If fm_url is set, set up default values for fm_auth{dir}
* (if not already set)
*/
static void _config_fm_defaults(void)
{
if (!slingshot_config.fm_url)
return;
if (slingshot_config.fm_auth == SLINGSHOT_AUTH_NONE)
slingshot_config.fm_auth = SLINGSHOT_AUTH_OAUTH;
if (!slingshot_config.fm_authdir) {
if (slingshot_config.fm_auth == SLINGSHOT_AUTH_OAUTH)
slingshot_config.fm_authdir =
xstrdup(SLINGSHOT_FM_AUTH_OAUTH_DIR);
else if (slingshot_config.fm_auth == SLINGSHOT_AUTH_BASIC)
slingshot_config.fm_authdir =
xstrdup(SLINGSHOT_FM_AUTH_BASIC_DIR);
}
xassert(slingshot_config.fm_authdir);
}
static void _try_enabling_fm_mtls(void)
{
/* Only enable mTLS if the following were provided */
if (!(slingshot_config.fm_mtls_cert && slingshot_config.fm_mtls_key &&
(slingshot_config.fm_mtls_url || slingshot_config.fm_url))) {
/* If only partial mTLS config given warn that it is disabled */
if (slingshot_config.fm_mtls_ca ||
slingshot_config.fm_mtls_cert ||
slingshot_config.fm_mtls_key ||
slingshot_config.fm_mtls_url)
warning("Fabric Manager mTLS authentication is disabled due to fm_mtls_cert, fm_mtls_key, or a fabric manager url (i.e. fm_mtls_url or fm_url) not being configured.");
return;
}
/* If fm_mtls_url is not provided then default to fm_url */
if (!slingshot_config.fm_mtls_url)
slingshot_config.fm_mtls_url = xstrdup(slingshot_config.fm_url);
slingshot_config.flags |= SLINGSHOT_FLAGS_ENABLE_MTLS;
if (!slingshot_config.fm_mtls_ca)
warning("Fabric Manager mTLS authentication is enabled but a certification bundle was not provided. Server identity will not be verified.");
}
static int _config_destroy_retries(const char *token, char *arg)
{
uint32_t tmp;
char *end_ptr = NULL;
if (!arg)
return SLURM_ERROR;
errno = 0;
tmp = strtoul(arg, &end_ptr, 10);
if ((errno != 0) || *end_ptr) {
error("Invalid value for %s", token);
return SLURM_ERROR;
}
slingshot_config.destroy_retries = tmp;
log_flag(SWITCH, "[token=%s]: destroy_retries = %u",
token, slingshot_config.destroy_retries);
return SLURM_SUCCESS;
}
/*
* Mapping between Slingshot limit names, slingshot_limits_set_t offset, maximum
* values
*/
typedef struct limits_table {
const char *name;
size_t offset;
int max;
} limits_table_t;
static limits_table_t limits_table[] = {
{ "txqs", offsetof(slingshot_limits_set_t, txqs), SLINGSHOT_TXQ_MAX },
{ "tgqs", offsetof(slingshot_limits_set_t, tgqs), SLINGSHOT_TGQ_MAX },
{ "eqs", offsetof(slingshot_limits_set_t, eqs), SLINGSHOT_EQ_MAX },
{ "cts", offsetof(slingshot_limits_set_t, cts), SLINGSHOT_CT_MAX },
{ "tles", offsetof(slingshot_limits_set_t, tles), SLINGSHOT_TLE_MAX },
{ "ptes", offsetof(slingshot_limits_set_t, ptes), SLINGSHOT_PTE_MAX },
{ "les", offsetof(slingshot_limits_set_t, les), SLINGSHOT_LE_MAX },
{ "acs", offsetof(slingshot_limits_set_t, acs), SLINGSHOT_AC_MAX },
};
static const int num_limits = sizeof(limits_table) / sizeof(limits_table[0]);
static const char *all_limits = "txqs,tgqs,eqs,cts,tles,ptes,les,acs";
/*
* Check whether the token is a Slingshot resource limit token,
* with format "{def,res,max}_{name}=<limit>"; update slingshot_config
*/
static bool _config_limits(const char *token, slingshot_limits_set_t *limits)
{
char *tok, *arg, *end_ptr;
const char *name, *typestr;
enum { DEF = 1, RES, MAX } type;
int i, limit;
const char def_str[] = "def_";
const size_t def_siz = sizeof(def_str) - 1;
const char res_str[] = "res_";
const size_t res_siz = sizeof(res_str) - 1;
const char max_str[] = "max_";
const size_t max_siz = sizeof(max_str) - 1;
limits_table_t *entry;
slingshot_limits_t *limit_ptr;
tok = xstrdup(token);
if (!(arg = strchr(tok, '=')))
goto err;
*arg = '\0'; /* null-terminate limit name */
arg++;
/* Parse "{def,res,max}_" prefix */
if (!xstrncmp(tok, def_str, def_siz)) {
type = DEF;
typestr = "def";
name = tok + def_siz;
} else if (!xstrncmp(tok, res_str, res_siz)) {
type = RES;
typestr = "res";
name = tok + res_siz;
} else if (!xstrncmp(tok, max_str, max_siz)) {
type = MAX;
typestr = "max";
name = tok + max_siz;
} else {
goto err;
}
/* Now find the limit type and point entry at the limit_table slot */
entry = NULL;
for (i = 0; i < num_limits; i++) {
if (!xstrcmp(name, limits_table[i].name)) {
entry = &limits_table[i];
break;
}
}
if (!entry)
goto err;
end_ptr = NULL;
limit = strtol(arg, &end_ptr, 10);
if (!end_ptr || (end_ptr == arg) || (*end_ptr != '\0'))
goto err;
if ((limit < 0) || (limit > entry->max)) {
error("Invalid limit token '%s': invalid limit %d (valid range 0-%d)",
token, limit, entry->max);
goto out;
}
limit_ptr = (slingshot_limits_t *)(((void *) limits) + entry->offset);
if (type == DEF) {
limit_ptr->def = limit;
} else if (type == RES) {
limit_ptr->res = limit;
} else if (type == MAX) {
limit_ptr->max = limit;
}
log_flag(SWITCH, "[token=%s]: limits[%d].%s.%s %d",
token, i, entry->name, typestr, limit);
xfree(tok);
return true;
err:
error("Invalid limit token '%s' (example {max,res,def}_{%s})",
token, all_limits);
out:
xfree(tok);
return false;
}
static void _print_limits(slingshot_limits_set_t *limits)
{
#define DEBUG_LIMIT(SET, LIM) \
debug("%s: max/res/def %hu %hu %hu", \
#LIM, SET->LIM.max, SET->LIM.res, SET->LIM.def);
DEBUG_LIMIT(limits, txqs);
DEBUG_LIMIT(limits, tgqs);
DEBUG_LIMIT(limits, eqs);
DEBUG_LIMIT(limits, cts);
DEBUG_LIMIT(limits, tles);
DEBUG_LIMIT(limits, ptes);
DEBUG_LIMIT(limits, les);
DEBUG_LIMIT(limits, acs);
#undef DEBUG_LIMIT
}
/*
* Free any configuration memory
*/
extern void slingshot_free_config(void)
{
xfree(slingshot_config.fm_url);
xfree(slingshot_config.fm_authdir);
xfree(slingshot_config.fm_mtls_ca);
xfree(slingshot_config.fm_mtls_cert);
xfree(slingshot_config.fm_mtls_key);
xfree(slingshot_config.fm_mtls_url);
}
extern bool slingshot_stepd_init(const char *switch_params)
{
char *params = NULL, *token, *arg, *save_ptr = NULL;
const char destroy_retries[] = "destroy_retries";
const size_t size_destroy_retries = sizeof(destroy_retries) - 1;
slingshot_config.destroy_retries = SLINGSHOT_CXI_DESTROY_RETRIES;
params = xstrdup(switch_params);
for (token = strtok_r(params, ",", &save_ptr); token;
token = strtok_r(NULL, ",", &save_ptr)) {
if ((arg = strchr(token, '=')))
arg++; /* points to argument after = if any */
if (!xstrncasecmp(token, destroy_retries,
size_destroy_retries)) {
if (_config_destroy_retries(token, arg))
goto err;
}
}
xfree(params);
return true;
err:
xfree(params);
return false;
}
/*
* Set up passed-in slingshot_config_t based on values in 'SwitchParameters'
* slurm.conf setting. Return true on success, false on bad parameters
*/
extern bool slingshot_setup_config(const char *switch_params)
{
char *params = NULL, *token, *arg, *save_ptr = NULL;
const char destroy_retries[] = "destroy_retries";
const size_t size_destroy_retries = sizeof(destroy_retries) - 1;
const char vnis[] = "vnis";
const size_t size_vnis = sizeof(vnis) - 1;
const char tcs[] = "tcs";
const size_t size_tcs = sizeof(tcs) - 1;
const char job_vni[] = "job_vni";
const size_t size_job_vni = sizeof(job_vni) - 1;
const char single_node_vni[] = "single_node_vni";
const size_t size_single_node_vni = sizeof(single_node_vni) - 1;
const char adjust_limits[] = "adjust_limits";
const size_t size_adjust_limits = sizeof(adjust_limits) - 1;
const char no_adjust_limits[] = "no_adjust_limits";
const size_t size_no_adjust_limits = sizeof(no_adjust_limits) - 1;
const char hwcoll_addrs_per_job[] = "hwcoll_addrs_per_job";
const size_t size_hwcoll_addrs_per_job =
sizeof(hwcoll_addrs_per_job) - 1;
const char hwcoll_num_nodes[] = "hwcoll_num_nodes";
const size_t size_hwcoll_num_nodes = sizeof(hwcoll_num_nodes) - 1;
const char fm_url[] = "fm_url";
const size_t size_fm_url = sizeof(fm_url) - 1;
const char fm_auth[] = "fm_auth";
const size_t size_fm_auth = sizeof(fm_auth) - 1;
const char fm_authdir[] = "fm_authdir";
const size_t size_fm_authdir = sizeof(fm_authdir) - 1;
const char fm_mtls_ca[] = "fm_mtls_ca";
const size_t size_fm_mtls_ca = sizeof(fm_mtls_ca) - 1;
const char fm_mtls_cert[] = "fm_mtls_cert";
const size_t size_fm_mtls_cert = sizeof(fm_mtls_cert) - 1;
const char fm_mtls_key[] = "fm_mtls_key";
const size_t size_fm_mtls_key = sizeof(fm_mtls_key) - 1;
const char fm_mtls_url[] = "fm_mtls_url";
const size_t size_fm_mtls_url = sizeof(fm_mtls_url) - 1;
/* Will be default size when SwitchParameters is not set */
uint16_t vni_min = slingshot_state.vni_min;
uint16_t vni_max = slingshot_state.vni_max;
bool vni_range_set = false;
/*
* Handle SwitchParameters values (separated by commas):
*
* vnis=<start>-<end> (e.g. vnis=1-16000)
* tcs=<tc_list> (e.g. tcs=BULK_DATA:BEST_EFFORT)
* single_node_vni=<all,none,user>: allocate VNI for all single-node
* jobs, no single-node jobs, or only on user request
* (via srun --network=single_node_vni)
* job_vni=<all,none,user>: allocate additional VNI per-job for
* all jobs, no jobs, or only on user request
* (via srun --network=job_vni)
* {no_}adjust_limits: {don't} adjust resource reservations
* for each NIC by subtracting resources already
* used/reserved by system services
* (i.e. /etc/jackaloped for BASIC, /etc/wlm-client-auth for OAUTH)
* hwcoll_addrs_per_job=<number>: allocate <number> of Slingshot
* hardware collectives multicast addresses per job
* (that are larger than <hwcoll_min_nodes> nodes)
* hwcoll_num_nodes=<num_nodes>: minimum number of nodes for a
* job to be allocated Slingshot hardware collectives
* fm_url=<url>: use URL for fabric manager REST requests
* fm_auth="BASIC|OAUTH": fabric manager REST API authentication type
* fm_authdir=<dir>: fabric manager authentication info directory
* (i.e. /etc/fmsim for BASIC, /etc/wlm-client-auth for OAUTH)
* fm_mtls_ca=<path to FM certificate bundle>
* fm_mtls_cert=<path to client public certificate>
* fm_mtls_key=<path to client private key>
* fm_mtls_url=<url for mTLS authentication to FM>
*
* def_<NIC_resource>: default per-thread value for resource
* res_<NIC_resource>: reserved value for resource
* max_<NIC_resource>: maximum value for resource
*
* NIC resources are:
* txqs: transmit command queues
* tgqs: target command queues
* eqs: events queues
* cts: counters
* tles: trigger list entries
* ptes: portable table entries
* les: list entries
* acs: addressing contexts
*/
slingshot_free_config();
_config_defaults();
if (!switch_params)
goto out;
log_flag(SWITCH, "switch_params=%s", switch_params);
params = xstrdup(switch_params);
for (token = strtok_r(params, ",", &save_ptr); token;
token = strtok_r(NULL, ",", &save_ptr)) {
if ((arg = strchr(token, '=')))
arg++; /* points to argument after = if any */
if (!xstrncasecmp(token, vnis, size_vnis)) {
if (!_config_vnis(token, arg, &vni_min, &vni_max))
goto err;
/* See if any incompatible changes in VNI range */
if (!_setup_vni_table(vni_min, vni_max))
goto err;
vni_range_set = true;
} else if (!xstrncasecmp(token, tcs, size_tcs)) {
if (!_config_tcs(token, arg, &slingshot_config.tcs))
goto err;
} else if (!xstrncasecmp(token, job_vni, size_job_vni)) {
if (!_config_job_vni(token, arg))
goto err;
} else if (!xstrncasecmp(token, single_node_vni,
size_single_node_vni)) {
if (!_config_single_node_vni(token, arg))
goto err;
} else if (!xstrncasecmp(token, adjust_limits,
size_adjust_limits)) {
slingshot_config.flags |= SLINGSHOT_FLAGS_ADJUST_LIMITS;
} else if (!xstrncasecmp(token, no_adjust_limits,
size_no_adjust_limits)) {
slingshot_config.flags &=
~(SLINGSHOT_FLAGS_ADJUST_LIMITS);
} else if (!xstrncasecmp(token, hwcoll_addrs_per_job,
size_hwcoll_addrs_per_job)) {
if (!_config_hwcoll_addrs_per_job(token, arg))
goto err;
} else if (!xstrncasecmp(token, hwcoll_num_nodes,
size_hwcoll_num_nodes)) {
if (!_config_hwcoll_num_nodes(token, arg))
goto err;
} else if (!xstrncasecmp(token, fm_url, size_fm_url)) {
if (!_config_fm_url(token, arg))
goto err;
/*
* NOTE: fm_authdir needs to come before fm_auth
* since fm_auth is a prefix of fm_authdir
*/
} else if (!xstrncasecmp(token, fm_authdir, size_fm_authdir)) {
if (!_config_fm_authdir(token, arg))
goto err;
} else if (!xstrncasecmp(token, fm_auth, size_fm_auth)) {
if (!_config_fm_auth(token, arg))
goto err;
} else if (!xstrncasecmp(token, fm_mtls_ca, size_fm_mtls_ca)) {
if (!_config_fm_mtls_ca(token, arg))
goto err;
} else if (!xstrncasecmp(token, fm_mtls_cert,
size_fm_mtls_cert)) {
if (!_config_fm_mtls_cert(token, arg))
goto err;
} else if (!xstrncasecmp(token, fm_mtls_key,
size_fm_mtls_key)) {
if (!_config_fm_mtls_key(token, arg))
goto err;
} else if (!xstrncasecmp(token, fm_mtls_url,
size_fm_mtls_url)) {
if (!_config_fm_mtls_url(token, arg))
goto err;
} else if (!xstrncasecmp(token, destroy_retries,
size_destroy_retries)) {
if (_config_destroy_retries(token, arg))
goto err;
} else {
if (!_config_limits(token, &slingshot_config.limits))
goto err;
}
}
/* If fm_url is set, set up default values for fm_auth{dir} */
_config_fm_defaults();
_try_enabling_fm_mtls();
/* Set up connection to fabric manager */
if (!slingshot_init_collectives())
goto err;
out:
if (!vni_range_set && !_setup_vni_table(vni_min, vni_max))
goto err;
debug("single_node_vni=%d job_vni=%d tcs=%#x flags=%#x",
slingshot_config.single_node_vni, slingshot_config.job_vni,
slingshot_config.tcs, slingshot_config.flags);
debug("fm_url=%s fm_auth=%u fm_authdir=%s hwcoll_addrs_per_job=%d hwcoll_num_nodes=%d",
slingshot_config.fm_url, slingshot_config.fm_auth,
slingshot_config.fm_authdir,
slingshot_config.hwcoll_addrs_per_job,
slingshot_config.hwcoll_num_nodes);
_print_limits(&slingshot_config.limits);
xfree(params);
return true;
err:
error("SwitchParameters parsing encountered errors, exiting");
xfree(params);
return false;
}
/*
* Allocate a free VNI (range vni_min... vni_max, starting at vni_last + 1)
* Return true with *vnip filled in on success, false on failure
*/
static bool _alloc_vni(uint16_t *vnip)
{
bitoff_t start, end, bit;
uint16_t vni;
/* Search for clear bit from [vni_last + 1...vni_max] */
start = (slingshot_state.vni_last + 1) - slingshot_state.vni_min;
end = slingshot_state.vni_max - slingshot_state.vni_min;
xassert(start >= 0);
log_flag(SWITCH, "upper bits: start/end %zu %zu", start, end);
for (bit = start; bit <= end; bit++) {
if (!bit_test(slingshot_state.vni_table, bit))
goto gotvni;
}
/* Search for clear bit from [vni_min...vni_last] */
start = 0;
end = slingshot_state.vni_last - slingshot_state.vni_min;
log_flag(SWITCH, "lower bits: start/end %zu %zu", start, end);
for (bit = start; bit <= end; bit++) {
if (!bit_test(slingshot_state.vni_table, bit))
goto gotvni;
}
/* TODO: developer's mode: check for no bits set? */
error("Cannot allocate VNI (min/max/last %hu %hu %hu)",
slingshot_state.vni_min, slingshot_state.vni_max,
slingshot_state.vni_last);
return false;
gotvni:
bit_set(slingshot_state.vni_table, bit);
xassert(bit + slingshot_state.vni_min <= SLINGSHOT_VNI_MAX);
vni = bit + slingshot_state.vni_min;
slingshot_state.vni_last = vni;
free_vnis--;
xassert(free_vnis >= 0);
log_flag(SWITCH, "min/max/last %hu %hu %hu vni=%hu free_vnis=%d",
slingshot_state.vni_min, slingshot_state.vni_max,
slingshot_state.vni_last, vni, free_vnis);
*vnip = vni;
return true;
}
/*
* Allocate a per-job inter-job-step VNI.
* If this is the first allocation for this job ID,
* allocate a new VNI and add it to the job_vnis table;
* otherwise return the VNI from the table for this job ID
* Return true with *vnip filled in on success, false on failure
*/
static bool _alloc_job_vni(uint32_t job_id, uint16_t *vnip)
{
int i, freeslot = -1;
job_vni_t *jobvni;
/*
* Check if this jobID is in the table already:
* if so, fill in *vnip and return true;
* otherwise, save the first free index into the table.
*/
for (i = 0; i < slingshot_state.num_job_vnis; i++) {
jobvni = &slingshot_state.job_vnis[i];
if (jobvni->job_id == job_id) {
log_flag(SWITCH,
"[job_id=%u]: found job_vnis[%d] vni=%hu num_job_vnis=%d",
job_id, i, jobvni->vni,
slingshot_state.num_job_vnis);
*vnip = jobvni->vni;
return true;
} else if (jobvni->job_id == 0 && freeslot < 0) {
freeslot = i;
}
}
/* Allocate VNI from bitmap */
if (!_alloc_vni(vnip))
return false;
/* If no free slot, allocate a new slot in the job_vnis table */
if (freeslot < 0) {
freeslot = slingshot_state.num_job_vnis;
slingshot_state.num_job_vnis++;
xrecalloc(slingshot_state.job_vnis,
slingshot_state.num_job_vnis, sizeof(job_vni_t));
}
slingshot_state.job_vnis[freeslot].job_id = job_id;
slingshot_state.job_vnis[freeslot].vni = *vnip;
log_flag(SWITCH, "[job_id=%u]: new vni[%d] vni=%hu num_job_vnis=%d",
job_id, freeslot, *vnip, slingshot_state.num_job_vnis);
return true;
}
/*
* Free an allocated VNI
*/
static void _free_vni(uint16_t vni)
{
/*
* Range-check VNI, but only if table has been re-sized and VNIs
* were lost
*/
if (lost_vnis &&
((vni < slingshot_state.vni_min) ||
(vni > slingshot_state.vni_max))) {
info("vni %hu: not in current table min/max %hu-%hu",
vni, slingshot_state.vni_min, slingshot_state.vni_max);
return;
}
bitoff_t bit = vni - slingshot_state.vni_min;
if (!bit_test(slingshot_state.vni_table, bit)) {
log_flag(SWITCH, "vni %hu: bit %zu not set in vni_table!",
vni, bit);
return;
}
bit_clear(slingshot_state.vni_table, bit);
free_vnis++;
xassert(free_vnis <=
(slingshot_state.vni_max - slingshot_state.vni_min) + 1);
log_flag(SWITCH, "[vni=%hu]: bit %zu", vni, bit);
}
/*
* Free an allocated per-job "user" VNI
* Return the VNI, or 0 if not found
*/
static uint16_t _free_job_vni(uint32_t job_id)
{
int i;
job_vni_t *jobvni = NULL;
/*
* Find the jobID/vni in the job_vnis table;
* zero-out the slot in the table
*/
for (i = 0; i < slingshot_state.num_job_vnis; i++) {
jobvni = &slingshot_state.job_vnis[i];
if (jobvni->job_id == job_id) {
uint16_t vni = jobvni->vni;
_free_vni(vni);
log_flag(SWITCH,
"[job_id=%u]: free job_vnis[%d] vni=%hu num_job_vnis=%d free_vnis=%d",
jobvni->job_id, i, vni,
slingshot_state.num_job_vnis, free_vnis);
memset(jobvni, 0, sizeof(*jobvni));
return vni;
}
}
if (slingshot_state.num_job_vnis > 0)
log_flag(SWITCH, "job_id=%u: not found in job_vnis[%d]",
job_id, slingshot_state.num_job_vnis);
return 0;
}
/*
* Parse --network 'depth=<value>' token: return value, or 0 on error
*/
static uint32_t _setup_depth(const char *token)
{
uint32_t ret;
char *arg = strchr(token, '=');
if (!arg)
goto err;
arg++; /* point to argument */
char *end_ptr = NULL;
ret = strtol(arg, &end_ptr, 10);
if (*end_ptr || ret < 1 || ret > 1024)
goto err;
log_flag(SWITCH, "[token=%s]: depth %u", token, ret);
return ret;
err:
error("Invalid depth token '%s' (valid range %d-%d)", token, 1, 1024);
return 0;
}
/*
* Parse a single comma-separated part of the --network option
*
* depth: value to be used for threads-per-rank
* job_vni: allocate a job VNI for this job
* single_node_vni: allocate a VNI for this job even if single-node
* no_vni: _don't_ allocate a VNI for this job even if multi-node
* {no_}adjust_limits: {don't} adjust resource limit reservations
* by subtracting system service reserved/used values
* disable_rdzv_get: disable rendezvous gets
* tcs: set of traffic classes (job only)
* def_<NIC_resource>: default per-thread value for resource
* res_<NIC_resource>: reserved value for resource
* max_<NIC_resource>: maximum value for resource
*/
static bool _parse_network_token(const char *token, bool is_job,
slingshot_stepinfo_t *job,
bool *job_vni, bool *single_node_vni,
bool *no_vni)
{
char depth_str[] = "depth";
size_t depth_siz = sizeof(depth_str) - 1;
char job_vni_str[] = "job_vni";
size_t job_vni_siz = sizeof(job_vni_str) - 1;
char single_node_vni_str[] = "single_node_vni";
size_t single_node_vni_siz = sizeof(single_node_vni_str) - 1;
char no_vni_str[] = "no_vni";
size_t no_vni_siz = sizeof(no_vni_str) - 1;
char adjust_limits_str[] = "adjust_limits";
size_t adjust_limits_siz = sizeof(adjust_limits_str) - 1;
char no_adjust_limits_str[] = "no_adjust_limits";
size_t no_adjust_limits_siz = sizeof(no_adjust_limits_str) - 1;
char rdzv_get_str[] = "disable_rdzv_get";
size_t rdzv_get_siz = sizeof(rdzv_get_str) - 1;
char hwcoll_str[] = "hwcoll";
size_t hwcoll_siz = sizeof(hwcoll_siz) - 1;
char tcs_str[] = "tcs";
size_t tcs_siz = sizeof(tcs_str) - 1;
char *arg = xstrchr(token, '=');
if (arg != NULL)
arg++;
if (!xstrncmp(token, depth_str, depth_siz)) {
if ((job->depth = _setup_depth(token)) == 0)
return false;
} else if (!xstrncmp(token, job_vni_str, job_vni_siz)) {
if (token[job_vni_siz] != '\0') {
error("Invalid job_vni token '%s'", token);
return false;
} else if (slingshot_config.job_vni == SLINGSHOT_JOB_VNI_NONE) {
error("Job VNI requested by user, but 'job_vni=<all|user>' not set in SwitchParameters");
return false;
} else {
*job_vni = true;
}
} else if (!xstrncmp(token, single_node_vni_str, single_node_vni_siz)) {
if (token[single_node_vni_siz] != '\0') {
error("Invalid single_node_vni token '%s'", token);
return false;
} else if (slingshot_config.single_node_vni ==
SLINGSHOT_SN_VNI_NONE) {
error("Single-node VNI requested by user, but 'single_node_vni=<all|user>' not set in SwitchParameters");
return false;
} else {
*single_node_vni = true;
}
} else if (!xstrncmp(token, no_vni_str, no_vni_siz)) {
if (token[no_vni_siz] != '\0') {
error("Invalid no_vni token '%s'", token);
return false;
} else {
*no_vni = true;
}
} else if (!xstrncmp(token, adjust_limits_str, adjust_limits_siz)) {
job->flags |= SLINGSHOT_FLAGS_ADJUST_LIMITS;
} else if (!xstrncmp(token, no_adjust_limits_str,
no_adjust_limits_siz)) {
job->flags &= ~(SLINGSHOT_FLAGS_ADJUST_LIMITS);
} else if (!xstrncmp(token, rdzv_get_str, rdzv_get_siz)) {
job->flags |= SLINGSHOT_FLAGS_DISABLE_RDZV_GET;
} else if (!xstrncmp(token, hwcoll_str, hwcoll_siz)) {
;
} else if (!xstrncmp(token, tcs_str, tcs_siz)) {
if (is_job)
return _config_tcs(token, arg, &job->tcs);
} else if (!_config_limits(token, &job->limits)) {
return false;
}
return true;
}
/*
* Set up passed-in slingshot_stepinfo_t based on values in srun/sbatch/salloc
* --network parameters. Return true on successful parsing, false otherwise.
*/
static bool _setup_network_params(const char *network_params,
const char *job_network_params,
slingshot_stepinfo_t *job,
bool *job_vni,
bool *single_node_vni,
bool *no_vni)
{
char *params = NULL, *token, *save_ptr = NULL;
log_flag(SWITCH, "job_network_params=%s network_params=%s",
job_network_params, network_params);
/* First, copy limits, tcs, and flags from slingshot_config to job */
job->limits = slingshot_config.limits;
job->tcs = slingshot_config.tcs;
job->flags = slingshot_config.flags;
/* no_vni disabled by default */
*no_vni = false;
/* Then get configured job VNI setting */
if (slingshot_config.job_vni == SLINGSHOT_JOB_VNI_ALL)
*job_vni = true;
else
*job_vni = false;
/* Then get configured single-node VNI setting */
if (slingshot_config.single_node_vni == SLINGSHOT_SN_VNI_ALL)
*single_node_vni = true;
else
*single_node_vni = false;
/* Handle sbatch/salloc --network argument values */
if (job_network_params) {
params = xstrdup(job_network_params);
for (token = strtok_r(params, ",", &save_ptr); token;
token = strtok_r(NULL, ",", &save_ptr)) {
if (!_parse_network_token(token, true, job, job_vni,
single_node_vni, no_vni))
goto err;
}
xfree(params);
}
/* Handle srun --network argument values */
if (network_params) {
params = xstrdup(network_params);
for (token = strtok_r(params, ",", &save_ptr); token;
token = strtok_r(NULL, ",", &save_ptr)) {
if (!_parse_network_token(token, false, job, job_vni,
single_node_vni, no_vni))
goto err;
}
xfree(params);
}
if (slurm_conf.debug_flags & DEBUG_FLAG_SWITCH)
_print_limits(&job->limits);
return true;
err:
xfree(params);
return false;
}
extern bool slingshot_setup_job_vni_pool(job_record_t *job_ptr)
{
int alloc_vnis = 0;
bool alloc_job_vni, alloc_single_node_vni, no_vni;
slingshot_stepinfo_t stepinfo = {0};
slingshot_jobinfo_t *jobinfo = NULL;
if (!job_ptr->switch_jobinfo)
job_ptr->switch_jobinfo = xmalloc(sizeof(slingshot_jobinfo_t));
/*
* If --network specified, add any depth, limits,
* {job,single_node,no}_vni settings
* Copy configured Slingshot limits to job, add any --network settings
*/
if (!_setup_network_params(NULL, job_ptr->network,
&stepinfo, &alloc_job_vni,
&alloc_single_node_vni, &no_vni))
goto err;
jobinfo = (slingshot_jobinfo_t *) job_ptr->switch_jobinfo;
/*
* VNIs and traffic classes are not allocated if:
* --network=no_vni is set, or single-node jobs,
* unless 'single_node_vni=all' is set in the configuration,
* or 'single_node_vni=user' is set in the configuration and
* 'srun --network=single_node_vni' is used
*/
jobinfo->num_vnis = 0;
if (!no_vni &&
((job_ptr->node_cnt > 1) || alloc_single_node_vni)) {
alloc_vnis = job_ptr->node_cnt;
if (alloc_job_vni)
alloc_vnis++;
}
jobinfo->vnis = xcalloc(alloc_vnis, sizeof(uint16_t));
for (int i = 0; i < alloc_vnis; i++) {
if (!_alloc_vni(&jobinfo->vnis[i]))
goto err;
jobinfo->num_vnis++;
}
return true;
err:
if (jobinfo) {
for (int i = 0; i < jobinfo->num_vnis; i++)
_free_vni(jobinfo->vnis[i]);
slingshot_free_jobinfo(jobinfo);
}
job_ptr->switch_jobinfo = NULL;
return false;
}
/*
* Set up slingshot_stepinfo_t struct with VNIs, and CXI limits,
* based on configured limits as well as any specified with
* the --network option
* Return true on success, false if VNI cannot be allocated,
* or --network parameters have syntax errors
*/
extern bool slingshot_setup_job_step_vni(slingshot_stepinfo_t *job, int node_cnt,
uint32_t job_id,
const char *network_params,
const char *job_network_params)
{
int alloc_vnis = 0;
uint16_t vni = 0, job_vni = 0;
bool alloc_job_vni, alloc_single_node_vni, no_vni;
/*
* If --network specified, add any depth, limits,
* {job,single_node,no}_vni settings
* Copy configured Slingshot limits to job, add any --network settings
*/
if (!_setup_network_params(network_params, job_network_params, job,
&alloc_job_vni, &alloc_single_node_vni,
&no_vni))
goto err;
/*
* VNIs and traffic classes are not allocated if:
* --network=no_vni is set, or single-node jobs,
* unless 'single_node_vni=all' is set in the configuration,
* or 'single_node_vni=user' is set in the configuration and
* 'srun --network=single_node_vni' is used
*/
job->num_vnis = 0;
if (!no_vni && ((node_cnt > 1) || alloc_single_node_vni)) {
alloc_vnis++;
}
if (alloc_vnis > 0 && alloc_job_vni)
alloc_vnis++;
job->vnis = xcalloc(alloc_vnis, sizeof(uint16_t));
if (alloc_vnis >= 1) {
if (!_alloc_vni(&job->vnis[0]))
goto err;
vni = job->vnis[0];
job->num_vnis++;
}
/* Allocate (first step in job) or get job VNI */
if (alloc_vnis == 2) {
if (!_alloc_job_vni(job_id, &job->vnis[1]))
goto err;
job_vni = job->vnis[1];
job->num_vnis++;
}
debug("allocate vni=%hu job_vni=%hu free_vnis=%d",
vni, job_vni, free_vnis);
/* profiles are allocated in slurmstepd */
job->num_profiles = 0;
job->profiles = NULL;
return true;
err:
if (job->vnis) {
if (job->num_vnis > 0)
_free_vni(job->vnis[0]);
if (job->num_vnis > 1)
(void)_free_job_vni(job_id);
xfree(job->vnis);
}
return false;
}
/*
* Free job VNI pool (if any)
*/
extern void slingshot_free_job_vni_pool(slingshot_jobinfo_t *job)
{
/* slingshot_config is only initialized on the ctld and stepmgr */
if (!running_in_slurmctld() && !active_outside_ctld)
return;
if (!job)
return;
for (int i = 0; i < job->num_vnis; i++) {
_free_vni(job->vnis[i]);
debug("free vni=%hu free_vnis=%d", job->vnis[0], free_vnis);
}
}
/*
* Free job-step VNI (if any)
*/
extern void slingshot_free_job_step_vni(slingshot_stepinfo_t *job)
{
/* slingshot_config is only initialized on the ctld and stepmgr */
if (!running_in_slurmctld() && !active_outside_ctld)
return;
/* Second VNI is a job VNI - don't free until job is complete */
if (job->vnis && (job->num_vnis > 0)) {
_free_vni(job->vnis[0]);
debug("free vni=%hu free_vnis=%d", job->vnis[0], free_vnis);
}
}
/*
* Free this job's job-specific VNI; called at end of job
*/
extern void slingshot_free_job_vni(uint32_t job_id)
{
uint16_t vni = _free_job_vni(job_id);
debug("free job_vni=%hu free_vnis=%d", vni, free_vnis);
}
extern void slingshot_free_jobinfo(slingshot_jobinfo_t *jobinfo)
{
if (!jobinfo)
return;
xfree(jobinfo->vnis);
xfree(jobinfo);
}
extern int slingshot_update_config(slingshot_jobinfo_t *jobinfo)
{
free_vnis = 0;
bit_set_all(slingshot_state.vni_table);
for (int i = 0; i < jobinfo->num_vnis; i++) {
bit_clear(slingshot_state.vni_table,
(jobinfo->vnis[i] - slingshot_state.vni_min));
free_vnis++;
}
if (slurm_conf.debug_flags & DEBUG_FLAG_SWITCH) {
char *bit_str = bit_fmt_full(slingshot_state.vni_table);
log_flag(SWITCH, "%s: min/max: %hu/%hu free_vnis: %d bitstr: %s",
__func__, slingshot_state.vni_min,
slingshot_state.vni_max, free_vnis, bit_str);
xfree(bit_str);
}
return SLURM_SUCCESS;
}