blob: 1087c098452968fd7bad163ce997cd248aa6f470 [file] [log] [blame]
/*****************************************************************************\
* gres_shard.c - Support SHARD as a generic resources.
* Sharding is a mechanism to share GPUs generically.
*****************************************************************************
* Copyright (C) SchedMD LLC.
* Written by Danny Auble
*
* This file is part of Slurm, a resource management program.
* For details, see <https://slurm.schedmd.com/>.
* Please also read the included file: DISCLAIMER.
*
* Slurm is free software; you can redistribute it and/or modify it under
* the terms of the GNU General Public License as published by the Free
* Software Foundation; either version 2 of the License, or (at your option)
* any later version.
*
* In addition, as a special exception, the copyright holders give permission
* to link the code of portions of this program with the OpenSSL library under
* certain conditions as described in each individual source file, and
* distribute linked combinations including the two. You must obey the GNU
* General Public License in all respects for all of the code used other than
* OpenSSL. If you modify file(s) with this exception, you may extend this
* exception to your version of the file(s), but you are not obligated to do
* so. If you do not wish to do so, delete this exception statement from your
* version. If you delete this exception statement from all source files in
* the program, then also delete it here.
*
* Slurm is distributed in the hope that it will be useful, but WITHOUT ANY
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
* details.
*
* You should have received a copy of the GNU General Public License along
* with Slurm; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
\*****************************************************************************/
#define _GNU_SOURCE
#include <ctype.h>
#include <inttypes.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <unistd.h>
#include "slurm/slurm.h"
#include "slurm/slurm_errno.h"
#include "src/common/slurm_xlator.h"
#include "src/common/bitstring.h"
#include "src/common/env.h"
#include "src/interfaces/gpu.h"
#include "src/interfaces/gres.h"
#include "src/common/hostlist.h"
#include "src/common/list.h"
#include "src/common/xmalloc.h"
#include "src/common/xstring.h"
#include "../common/gres_common.h"
#include "../common/gres_c_s.h"
/*
* These variables are required by the generic plugin interface. If they
* are not found in the plugin, the plugin loader will ignore it.
*
* plugin_name - A string giving a human-readable description of the
* plugin. There is no maximum length, but the symbol must refer to
* a valid string.
*
* plugin_type - A string suggesting the type of the plugin or its
* applicability to a particular form of data or method of data handling.
* If the low-level plugin API is used, the contents of this string are
* unimportant and may be anything. Slurm uses the higher-level plugin
* interface which requires this string to be of the form
*
* <application>/<method>
*
* where <application> is a description of the intended application of
* the plugin (e.g., "auth" for Slurm authentication) and <method> is a
* description of how this plugin satisfies that application. Slurm will
* only load authentication plugins if the plugin_type string has a prefix
* of "auth/".
*
* plugin_version - an unsigned 32-bit integer containing the Slurm version
* (major.minor.micro combined into a single number).
*/
const char plugin_name[] = "Gres SHARD plugin";
const char plugin_type[] = "gres/shard";
const uint32_t plugin_version = SLURM_VERSION_NUMBER;
static list_t *gres_devices = NULL;
static uint32_t node_flags = 0;
typedef struct shard_dev_info {
uint64_t count;
int id;
} shard_dev_info_t;
extern int init(void)
{
debug("loaded");
return SLURM_SUCCESS;
}
extern void fini(void)
{
debug("unloading");
FREE_NULL_LIST(gres_devices);
gres_c_s_fini();
}
/*
* We could load gres state or validate it using various mechanisms here.
* This only validates that the configuration was specified in gres.conf.
* In the general case, no code would need to be changed.
*/
extern int gres_p_node_config_load(list_t *gres_conf_list,
node_config_load_t *config)
{
int rc = gres_c_s_init_share_devices(
gres_conf_list, &gres_devices, config, "gpu");
if (rc != SLURM_SUCCESS)
return rc;
/*
* See what envs the gres_slurmd_conf records want to set (if one
* record wants an env, assume every record on this node wants that
* env). Check node_flags when setting envs later in stepd.
*/
node_flags = 0;
(void) list_for_each(gres_conf_list,
gres_common_set_env_types_on_node_flags,
&node_flags);
return rc;
}
static void _set_shard_env(common_gres_env_t *gres_env)
{
if (gres_env->gres_cnt) {
char *gpus_on_node = xstrdup_printf("%"PRIu64,
gres_env->gres_cnt);
env_array_overwrite(gres_env->env_ptr, "SLURM_SHARDS_ON_NODE",
gpus_on_node);
xfree(gpus_on_node);
} else if (!(gres_env->flags & GRES_INTERNAL_FLAG_PROTECT_ENV)) {
unsetenvp(*(gres_env->env_ptr), "SLURM_SHARDS_ON_NODE");
}
}
/*
* Set environment variables as appropriate for a job (i.e. all tasks) based
* upon the job's GRES state.
*/
extern void gres_p_job_set_env(char ***job_env_ptr,
bitstr_t *gres_bit_alloc,
uint64_t gres_per_node,
gres_internal_flags_t flags)
{
common_gres_env_t gres_env = {
.bit_alloc = gres_bit_alloc,
.env_ptr = job_env_ptr,
.flags = flags,
.gres_cnt = gres_per_node,
.gres_conf_flags = node_flags,
.gres_devices = gres_devices,
.is_job = true,
};
gres_common_gpu_set_env(&gres_env);
_set_shard_env(&gres_env);
}
/*
* Set environment variables as appropriate for a step (i.e. all tasks) based
* upon the job step's GRES state.
*/
extern void gres_p_step_set_env(char ***step_env_ptr,
bitstr_t *gres_bit_alloc,
uint64_t gres_per_node,
gres_internal_flags_t flags)
{
common_gres_env_t gres_env = {
.bit_alloc = gres_bit_alloc,
.env_ptr = step_env_ptr,
.flags = flags,
.gres_cnt = gres_per_node,
.gres_conf_flags = node_flags,
.gres_devices = gres_devices,
};
gres_common_gpu_set_env(&gres_env);
_set_shard_env(&gres_env);
}
/*
* Reset environment variables as appropriate for a job (i.e. this one task)
* based upon the job step's GRES state and assigned CPUs.
*/
extern void gres_p_task_set_env(char ***task_env_ptr,
bitstr_t *gres_bit_alloc,
uint64_t gres_cnt,
bitstr_t *usable_gres,
gres_internal_flags_t flags)
{
common_gres_env_t gres_env = {
.bit_alloc = gres_bit_alloc,
.env_ptr = task_env_ptr,
.flags = flags,
.gres_cnt = gres_cnt,
.gres_conf_flags = node_flags,
.gres_devices = gres_devices,
.is_task = true,
.usable_gres = usable_gres,
};
gres_common_gpu_set_env(&gres_env);
_set_shard_env(&gres_env);
}
/* Send GRES information to slurmstepd on the specified file descriptor */
extern void gres_p_send_stepd(buf_t *buffer)
{
gres_send_stepd(buffer, gres_devices);
pack32(node_flags, buffer);
gres_c_s_send_stepd(buffer);
return;
}
/* Receive GRES information from slurmd on the specified file descriptor */
extern void gres_p_recv_stepd(buf_t *buffer)
{
gres_recv_stepd(buffer, &gres_devices);
safe_unpack32(&node_flags, buffer);
gres_c_s_recv_stepd(buffer);
return;
unpack_error:
error("%s: failed", __func__);
}
/*
* Return a list of devices of this type. The list elements are of type
* "gres_device_t" and the list should be freed using FREE_NULL_LIST().
*/
extern list_t *gres_p_get_devices(void)
{
return gres_devices;
}
extern void gres_p_step_hardware_init(bitstr_t *usable_gres, char *settings)
{
gpu_g_step_hardware_init(usable_gres, settings);
}
extern void gres_p_step_hardware_fini(void)
{
gpu_g_step_hardware_fini();
}
/*
* Build record used to set environment variables as appropriate for a job's
* prolog or epilog based GRES allocated to the job.
*/
extern gres_prep_t *gres_p_prep_build_env(gres_job_state_t *gres_js)
{
int i;
gres_prep_t *gres_prep;
gres_prep = xmalloc(sizeof(gres_prep_t));
gres_prep->node_cnt = gres_js->node_cnt;
gres_prep->gres_bit_alloc = xcalloc(gres_prep->node_cnt,
sizeof(bitstr_t *));
gres_prep->gres_cnt_node_alloc = xcalloc(gres_prep->node_cnt,
sizeof(uint64_t));
for (i = 0; i < gres_prep->node_cnt; i++) {
if (gres_js->gres_bit_alloc &&
gres_js->gres_bit_alloc[i]) {
gres_prep->gres_bit_alloc[i] =
bit_copy(gres_js->gres_bit_alloc[i]);
}
if (gres_js->gres_bit_alloc &&
gres_js->gres_bit_alloc[i]) {
gres_prep->gres_cnt_node_alloc[i] =
gres_js->gres_cnt_node_alloc[i];
}
}
return gres_prep;
}
/*
* Set environment variables as appropriate for a job's prolog or epilog based
* GRES allocated to the job.
*/
extern void gres_p_prep_set_env(char ***prep_env_ptr,
gres_prep_t *gres_prep, int node_inx)
{
(void) gres_common_prep_set_env(prep_env_ptr, gres_prep,
node_inx, node_flags, gres_devices);
}