blob: 15a803b0ef339c6a7b77bffc0ab7c7a4c85cd871 [file] [log] [blame]
/*****************************************************************************\
* node_select.c - node selection plugin wrapper.
*
* NOTE: The node selection plugin itself is intimately tied to slurmctld
* functions and data structures. Some related functions (e.g. data structure
* un/packing, environment variable setting) are required by most SLURM
* commands. Since some of these commands must be executed on the BlueGene
* front-end nodes, the functions they require are here rather than within
* the plugin. This is because functions required by the plugin can not be
* resolved on the front-end nodes, so we can't load the plugins there.
*****************************************************************************
* Copyright (C) 2002-2007 The Regents of the University of California.
* Copyright (C) 2008-2009 Lawrence Livermore National Security.
* Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
* Written by Morris Jette <jette1@llnl.gov>.
* CODE-OCEC-09-009. All rights reserved.
*
* This file is part of SLURM, a resource management program.
* For details, see <http://slurm.schedmd.com/>.
* Please also read the included file: DISCLAIMER.
*
* SLURM is free software; you can redistribute it and/or modify it under
* the terms of the GNU General Public License as published by the Free
* Software Foundation; either version 2 of the License, or (at your option)
* any later version.
*
* In addition, as a special exception, the copyright holders give permission
* to link the code of portions of this program with the OpenSSL library under
* certain conditions as described in each individual source file, and
* distribute linked combinations including the two. You must obey the GNU
* General Public License in all respects for all of the code used other than
* OpenSSL. If you modify file(s) with this exception, you may extend this
* exception to your version of the file(s), but you are not obligated to do
* so. If you do not wish to do so, delete this exception statement from your
* version. If you delete this exception statement from all source files in
* the program, then also delete it here.
*
* SLURM is distributed in the hope that it will be useful, but WITHOUT ANY
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
* details.
*
* You should have received a copy of the GNU General Public License along
* with SLURM; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
\*****************************************************************************/
#ifdef HAVE_CONFIG_H
# include "config.h"
#endif
#include <pthread.h>
#include <dirent.h>
#include "src/common/list.h"
#include "src/common/node_select.h"
#include "src/common/slurm_protocol_api.h"
#include "src/common/slurm_selecttype_info.h"
#include "src/common/xstring.h"
#include "src/slurmctld/slurmctld.h"
/*
* Must be synchronized with slurm_select_ops_t in node_select.h.
* Also must be synchronized with the other_select.c in
* the select/other lib. (We tried to make it so we only had to
* define it once, but it didn't seem to work.)
*/
const char *node_select_syms[] = {
"plugin_id",
"select_p_state_save",
"select_p_state_restore",
"select_p_job_init",
"select_p_node_ranking",
"select_p_node_init",
"select_p_block_init",
"select_p_job_test",
"select_p_job_begin",
"select_p_job_ready",
"select_p_job_expand_allow",
"select_p_job_expand",
"select_p_job_resized",
"select_p_job_signal",
"select_p_job_fini",
"select_p_job_suspend",
"select_p_job_resume",
"select_p_step_pick_nodes",
"select_p_step_start",
"select_p_step_finish",
"select_p_pack_select_info",
"select_p_select_nodeinfo_pack",
"select_p_select_nodeinfo_unpack",
"select_p_select_nodeinfo_alloc",
"select_p_select_nodeinfo_free",
"select_p_select_nodeinfo_set_all",
"select_p_select_nodeinfo_set",
"select_p_select_nodeinfo_get",
"select_p_select_jobinfo_alloc",
"select_p_select_jobinfo_free",
"select_p_select_jobinfo_set",
"select_p_select_jobinfo_get",
"select_p_select_jobinfo_copy",
"select_p_select_jobinfo_pack",
"select_p_select_jobinfo_unpack",
"select_p_select_jobinfo_sprint",
"select_p_select_jobinfo_xstrdup",
"select_p_update_block",
"select_p_update_sub_node",
"select_p_fail_cnode",
"select_p_get_info_from_plugin",
"select_p_update_node_config",
"select_p_update_node_state",
"select_p_alter_node_cnt",
"select_p_reconfigure",
"select_p_resv_test",
"select_p_ba_init",
"select_p_ba_fini",
"select_p_ba_get_dims",
};
strong_alias(destroy_select_ba_request, slurm_destroy_select_ba_request);
static int select_context_cnt = -1;
static int select_context_default = -1;
static slurm_select_ops_t *ops = NULL;
static plugin_context_t **select_context = NULL;
static pthread_mutex_t select_context_lock = PTHREAD_MUTEX_INITIALIZER;
static bool init_run = false;
/**
* delete a block request
*/
extern void destroy_select_ba_request(void *arg)
{
select_ba_request_t *ba_request = (select_ba_request_t *)arg;
if (ba_request) {
xfree(ba_request->save_name);
if (ba_request->elongate_geos)
list_destroy(ba_request->elongate_geos);
xfree(ba_request->blrtsimage);
xfree(ba_request->linuximage);
xfree(ba_request->mloaderimage);
xfree(ba_request->ramdiskimage);
xfree(ba_request);
}
}
/**
* print a block request
*/
extern void print_select_ba_request(select_ba_request_t* ba_request)
{
int dim;
uint32_t cluster_flags = slurmdb_setup_cluster_flags();
uint16_t cluster_dims = slurmdb_setup_cluster_dims();
if (ba_request == NULL){
error("print_ba_request Error, request is NULL");
return;
}
debug(" ba_request:");
debug(" geometry:\t");
for (dim=0; dim<cluster_dims; dim++){
debug("%d", ba_request->geometry[dim]);
}
debug(" size:\t%d", ba_request->size);
if (cluster_flags & CLUSTER_FLAG_BGQ) {
for (dim=0; dim<cluster_dims; dim++)
debug(" conn_type:\t%d", ba_request->conn_type[dim]);
} else
debug(" conn_type:\t%d", ba_request->conn_type[0]);
debug(" rotate:\t%d", ba_request->rotate);
debug(" elongate:\t%d", ba_request->elongate);
}
extern int select_char2coord(char coord)
{
if ((coord >= '0') && (coord <= '9'))
return (coord - '0');
if ((coord >= 'A') && (coord <= 'Z'))
return ((coord - 'A') + 10);
return -1;
}
/*
* Initialize context for node selection plugin
*/
extern int slurm_select_init(bool only_default)
{
int retval = SLURM_SUCCESS;
char *type = NULL;
int i, j, len;
DIR *dirp;
struct dirent *e;
char *dir_array = NULL, *head = NULL;
char *plugin_type = "select";
if ( init_run && select_context )
return retval;
slurm_mutex_lock( &select_context_lock );
if ( select_context )
goto done;
type = slurm_get_select_type();
if (working_cluster_rec) {
/* just ignore warnings here */
} else {
#ifdef HAVE_BG
if (strcasecmp(type, "select/bluegene")) {
error("%s is incompatible with BlueGene", type);
fatal("Use SelectType=select/bluegene");
}
#else
if (!strcasecmp(type, "select/bluegene")) {
fatal("Requested SelectType=select/bluegene "
"in slurm.conf, but not running on a BG[L|P|Q] "
"system. If looking to emulate a BG[L|P|Q] "
"system use --enable-bgl-emulation or "
"--enable-bgp-emulation respectively.");
}
#endif
#ifdef HAVE_ALPS_CRAY
if (strcasecmp(type, "select/alps")) {
error("%s is incompatible with Cray system "
"running alps", type);
fatal("Use SelectType=select/alps");
}
#else
if (!strcasecmp(type, "select/alps")) {
fatal("Requested SelectType=select/alps "
"in slurm.conf, but not running on a ALPS Cray "
"system. If looking to emulate a Alps Cray "
"system use --enable-alps-cray-emulation.");
}
#endif
#ifdef HAVE_NATIVE_CRAY
if (strcasecmp(type, "select/cray")) {
error("%s is incompatible with a native Cray system.",
type);
fatal("Use SelectType=select/cray");
}
#else
/* if (!strcasecmp(type, "select/cray")) { */
/* fatal("Requested SelectType=select/cray " */
/* "in slurm.conf, but not running on a native Cray " */
/* "system. If looking to run on a Cray " */
/* "system natively use --enable-native-cray."); */
/* } */
#endif
}
select_context_cnt = 0;
if (only_default) {
ops = xmalloc(sizeof(slurm_select_ops_t));
select_context = xmalloc(sizeof(plugin_context_t));
if ((select_context[0] = plugin_context_create(
plugin_type, type, (void **)&ops[0],
node_select_syms, sizeof(node_select_syms)))) {
select_context_default = 0;
select_context_cnt++;
}
goto skip_load_all;
}
if (!(dir_array = slurm_get_plugin_dir())) {
error("plugin_load_and_link: No plugin dir given");
goto done;
}
head = dir_array;
for (i=0; ; i++) {
bool got_colon = 0;
if (dir_array[i] == ':') {
dir_array[i] = '\0';
got_colon = 1;
} else if (dir_array[i] != '\0')
continue;
/* Open the directory. */
if (!(dirp = opendir(head))) {
error("cannot open plugin directory %s", head);
goto done;
}
while (1) {
char full_name[128];
if (!(e = readdir( dirp )))
break;
/* Check only files with select_ in them. */
if (strncmp(e->d_name, "select_", 7))
continue;
len = strlen(e->d_name);
#if defined(__CYGWIN__)
len -= 4;
#else
len -= 3;
#endif
/* Check only shared object files */
if (strcmp(e->d_name+len,
#if defined(__CYGWIN__)
".dll"
#else
".so"
#endif
))
continue;
/* add one for the / */
len++;
xassert(len<sizeof(full_name));
snprintf(full_name, len, "select/%s", e->d_name+7);
for (j=0; j<select_context_cnt; j++) {
if (!strcmp(full_name,
select_context[j]->type))
break;
}
if (j >= select_context_cnt) {
xrealloc(ops,
(sizeof(slurm_select_ops_t) *
(select_context_cnt + 1)));
xrealloc(select_context,
(sizeof(plugin_context_t) *
(select_context_cnt + 1)));
select_context[select_context_cnt] =
plugin_context_create(
plugin_type, full_name,
(void **)&ops[
select_context_cnt],
node_select_syms,
sizeof(node_select_syms));
if (select_context[select_context_cnt]) {
/* set the default */
if (!strcmp(full_name, type))
select_context_default =
select_context_cnt;
select_context_cnt++;
}
}
}
closedir(dirp);
if (got_colon) {
head = dir_array + i + 1;
} else
break;
}
skip_load_all:
if (select_context_default == -1)
fatal("Can't find plugin for %s", type);
/* Insure that plugin_id is valid and unique */
for (i=0; i<select_context_cnt; i++) {
for (j=i+1; j<select_context_cnt; j++) {
if (*(ops[i].plugin_id) !=
*(ops[j].plugin_id))
continue;
fatal("SelectPlugins: Duplicate plugin_id %u for "
"%s and %s",
*(ops[i].plugin_id),
select_context[i]->type,
select_context[j]->type);
}
if (*(ops[i].plugin_id) < 100) {
fatal("SelectPlugins: Invalid plugin_id %u (<100) %s",
*(ops[i].plugin_id),
select_context[i]->type);
}
}
init_run = true;
done:
slurm_mutex_unlock( &select_context_lock );
if (!working_cluster_rec) {
if (select_running_linear_based()) {
uint16_t cr_type = slurm_get_select_type_param();
if ((cr_type & CR_SOCKET) || (cr_type & CR_CORE) ||
(cr_type & CR_CPU)) {
fatal("Invalid SelectTypeParameters for "
"%s: %s (%u)",
type,
select_type_param_string(cr_type),
cr_type);
}
} else {
#ifdef HAVE_XCPU
error("%s is incompatible with XCPU use", type);
fatal("Use SelectType=select/linear");
#endif
}
}
xfree(type);
xfree(dir_array);
return retval;
}
extern int slurm_select_fini(void)
{
int rc = SLURM_SUCCESS, i, j;
slurm_mutex_lock(&select_context_lock);
if (!select_context)
goto fini;
init_run = false;
for (i=0; i<select_context_cnt; i++) {
j = plugin_context_destroy(select_context[i]);
if (j != SLURM_SUCCESS)
rc = j;
}
xfree(ops);
xfree(select_context);
select_context_cnt = -1;
fini: slurm_mutex_unlock(&select_context_lock);
return rc;
}
/* Get this plugin's sequence number in SLURM's internal tables */
extern int select_get_plugin_id_pos(uint32_t plugin_id)
{
int i;
if (slurm_select_init(0) < 0)
return SLURM_ERROR;
for (i=0; i<select_context_cnt; i++) {
if (*(ops[i].plugin_id) == plugin_id)
break;
}
if (i >= select_context_cnt)
return SLURM_ERROR;
return i;
}
/* Get the plugin ID number. Unique for each select plugin type */
extern int select_get_plugin_id(void)
{
if (slurm_select_init(0) < 0)
return 0;
return *(ops[select_context_default].plugin_id);
}
/* If the slurmctld is running a linear based select plugin return 1
* else 0. */
extern int select_running_linear_based(void)
{
int rc = 0;
if (slurm_select_init(0) < 0)
return 0;
switch (*(ops[select_context_default].plugin_id)) {
case 102: // select/linear
case 104: // select/alps -> linear
case 107: // select/cray -> linear
rc = 1;
break;
default:
rc = 0;
break;
}
return rc;
}
/*
* Save any global state information
* IN dir_name - directory into which the data can be stored
*/
extern int select_g_state_save(char *dir_name)
{
if (slurm_select_init(0) < 0)
return SLURM_ERROR;
return (*(ops[select_context_default].state_save))
(dir_name);
}
/*
* Initialize context for node selection plugin and
* restore any global state information
* IN dir_name - directory from which the data can be restored
*/
extern int select_g_state_restore(char *dir_name)
{
if (slurm_select_init(0) < 0)
return SLURM_ERROR;
return (*(ops[select_context_default].state_restore))
(dir_name);
}
/*
* Note the initialization of job records, issued upon restart of
* slurmctld and used to synchronize any job state.
*/
extern int select_g_job_init(List job_list)
{
if (slurm_select_init(0) < 0)
return SLURM_ERROR;
return (*(ops[select_context_default].job_init))
(job_list);
}
/*
* Assign a 'node_rank' value to each of the node_ptr entries.
* IN node_ptr - current node data
* IN node_count - number of node entries
* Return true if node ranking was performed, false if not.
*/
extern bool select_g_node_ranking(struct node_record *node_ptr, int node_cnt)
{
if (slurm_select_init(0) < 0)
return SLURM_ERROR;
return (*(ops[select_context_default].node_ranking))
(node_ptr, node_cnt);
}
/*
* Note re/initialization of node record data structure
* IN node_ptr - current node data
* IN node_count - number of node entries
*/
extern int select_g_node_init(struct node_record *node_ptr, int node_cnt)
{
if (slurm_select_init(0) < 0)
return SLURM_ERROR;
return (*(ops[select_context_default].node_init))
(node_ptr, node_cnt);
}
/*
* Note re/initialization of block record data structure
* IN block_list - list of partition records
*/
extern int select_g_block_init(List block_list)
{
if (slurm_select_init(0) < 0)
return SLURM_ERROR;
return (*(ops[select_context_default].block_init))
(block_list);
}
/*
* Select the "best" nodes for given job from those available
* IN/OUT job_ptr - pointer to job being considered for initiation,
* set's start_time when job expected to start
* IN/OUT bitmap - map of nodes being considered for allocation on input,
* map of nodes actually to be assigned on output
* IN min_nodes - minimum number of nodes to allocate to job
* IN max_nodes - maximum number of nodes to allocate to job
* IN req_nodes - requested (or desired) count of nodes
* IN mode - SELECT_MODE_RUN_NOW: try to schedule job now
* SELECT_MODE_TEST_ONLY: test if job can ever run
* SELECT_MODE_WILL_RUN: determine when and where job can run
* IN preemptee_candidates - List of pointers to jobs which can bee preempted
* IN/OUT preemptee_job_list - Pointer to list of job pointers. These are the
* jobs to be preempted to initiate the pending job. Not set
* if mode=SELECT_MODE_TEST_ONLY or input pointer is NULL.
* Existing list is appended to.
* IN exc_core_bitmap - cores used in reservations and not usable
* RET zero on success, EINVAL otherwise
*/
extern int select_g_job_test(struct job_record *job_ptr, bitstr_t *bitmap,
uint32_t min_nodes, uint32_t max_nodes,
uint32_t req_nodes, uint16_t mode,
List preemptee_candidates,
List *preemptee_job_list,
bitstr_t *exc_core_bitmap)
{
if (slurm_select_init(0) < 0)
return SLURM_ERROR;
return (*(ops[select_context_default].job_test))
(job_ptr, bitmap,
min_nodes, max_nodes,
req_nodes, mode,
preemptee_candidates, preemptee_job_list,
exc_core_bitmap);
}
/*
* Note initiation of job is about to begin. Called immediately
* after select_g_job_test(). Executed from slurmctld.
* IN job_ptr - pointer to job being initiated
*/
extern int select_g_job_begin(struct job_record *job_ptr)
{
if (slurm_select_init(0) < 0)
return SLURM_ERROR;
return (*(ops[select_context_default].job_begin))
(job_ptr);
}
/*
* determine if job is ready to execute per the node select plugin
* IN job_ptr - pointer to job being tested
* RET: -2 fatal error, -1 try again, 1 if ready to execute,
* 0 not ready to execute
*/
extern int select_g_job_ready(struct job_record *job_ptr)
{
if (slurm_select_init(0) < 0)
return -1;
return (*(ops[select_context_default].job_ready))
(job_ptr);
}
/*
* Test if job expansion is supported
*/
extern bool select_g_job_expand_allow(void)
{
if (slurm_select_init(0) < 0)
return false;
return (*(ops[select_context_default].job_expand_allow))
();
}
/*
* Move the resource allocated to one job into that of another job.
* All resources are removed from "from_job_ptr" and moved into
* "to_job_ptr". Also see other_job_resized().
* RET: 0 or an error code
*/
extern int select_g_job_expand(struct job_record *from_job_ptr,
struct job_record *to_job_ptr)
{
if (slurm_select_init(0) < 0)
return -1;
return (*(ops[select_context_default].job_expand))
(from_job_ptr, to_job_ptr);
}
/*
* Modify internal data structures for a job that has changed size
* Only support jobs shrinking now.
* RET: 0 or an error code
*/
extern int select_g_job_resized(struct job_record *job_ptr,
struct node_record *node_ptr)
{
if (slurm_select_init(0) < 0)
return -1;
return (*(ops[select_context_default].job_resized))
(job_ptr, node_ptr);
}
/*
* Pass job-step signal to plugin before signalling any job steps, so that
* any signal-dependent actions can be taken.
* IN job_ptr - job to be signalled
* IN signal - signal(7) number
*/
extern int select_g_job_signal(struct job_record *job_ptr, int signal)
{
if (slurm_select_init(0) < 0)
return SLURM_ERROR;
return (*(ops[select_context_default].job_signal))
(job_ptr, signal);
}
/*
* Note termination of job is starting. Executed from slurmctld.
* IN job_ptr - pointer to job being terminated
*/
extern int select_g_job_fini(struct job_record *job_ptr)
{
if (slurm_select_init(0) < 0)
return SLURM_ERROR;
return (*(ops[select_context_default].job_fini))
(job_ptr);
}
/*
* Suspend a job. Executed from slurmctld.
* IN job_ptr - pointer to job being suspended
* IN indf_susp - set if job is being suspended indefinitely by user
* or admin, otherwise suspended for gang scheduling
* RET SLURM_SUCCESS or error code
*/
extern int select_g_job_suspend(struct job_record *job_ptr, bool indf_susp)
{
if (slurm_select_init(0) < 0)
return SLURM_ERROR;
return (*(ops[select_context_default].job_suspend))
(job_ptr, indf_susp);
}
/*
* Resume a job. Executed from slurmctld.
* IN job_ptr - pointer to job being resumed
* IN indf_susp - set if job is being resumed from indefinite suspend by user
* or admin, otherwise resume from gang scheduling
* RET SLURM_SUCCESS or error code
*/
extern int select_g_job_resume(struct job_record *job_ptr, bool indf_susp)
{
if (slurm_select_init(0) < 0)
return SLURM_ERROR;
return (*(ops[select_context_default].job_resume))
(job_ptr, indf_susp);
}
/*
* Select the "best" nodes for given job step from those available in
* a job allocation.
*
* IN/OUT job_ptr - pointer to job already allocated and running in a
* block where the step is to run.
* set's start_time when job expected to start
* OUT step_jobinfo - Fill in the resources to be used if not
* full size of job.
* IN node_count - How many nodes we are looking for.
* OUT avail_nodes - bitmap of available nodes according to the plugin
* (not always set).
* RET map of slurm nodes to be used for step, NULL on failure
*/
extern bitstr_t *select_g_step_pick_nodes(struct job_record *job_ptr,
dynamic_plugin_data_t *step_jobinfo,
uint32_t node_count,
bitstr_t **avail_nodes)
{
if (slurm_select_init(0) < 0)
return NULL;
xassert(step_jobinfo);
return (*(ops[select_context_default].step_pick_nodes))
(job_ptr, step_jobinfo->data, node_count, avail_nodes);
}
/*
* Post pick_nodes operations for the step.
* IN/OUT step_ptr - step pointer to operate on.
*/
extern int select_g_step_start(struct step_record *step_ptr)
{
if (slurm_select_init(0) < 0)
return SLURM_ERROR;
return (*(ops[select_context_default].step_start))
(step_ptr);
}
/*
* clear what happened in select_g_step_pick_nodes
* IN/OUT step_ptr - Flush the resources from the job and step.
*/
extern int select_g_step_finish(struct step_record *step_ptr)
{
if (slurm_select_init(0) < 0)
return SLURM_ERROR;
return (*(ops[select_context_default].step_finish))
(step_ptr);
}
extern int select_g_pack_select_info(time_t last_query_time,
uint16_t show_flags, Buf *buffer,
uint16_t protocol_version)
{
if (slurm_select_init(0) < 0)
return SLURM_ERROR;
return (*(ops[select_context_default].pack_select_info))
(last_query_time, show_flags, buffer, protocol_version);
}
extern int select_g_select_nodeinfo_pack(dynamic_plugin_data_t *nodeinfo,
Buf buffer,
uint16_t protocol_version)
{
void *data = NULL;
uint32_t plugin_id;
if (slurm_select_init(0) < 0)
return SLURM_ERROR;
if (nodeinfo) {
data = nodeinfo->data;
plugin_id = nodeinfo->plugin_id;
} else
plugin_id = select_context_default;
if (protocol_version >= SLURM_2_5_PROTOCOL_VERSION) {
pack32(*(ops[plugin_id].plugin_id),
buffer);
} else {
error("select_g_select_nodeinfo_pack: protocol_version "
"%hu not supported", protocol_version);
}
return (*(ops[plugin_id].
nodeinfo_pack))(data, buffer, protocol_version);
}
extern int select_g_select_nodeinfo_unpack(dynamic_plugin_data_t **nodeinfo,
Buf buffer,
uint16_t protocol_version)
{
dynamic_plugin_data_t *nodeinfo_ptr = NULL;
if (slurm_select_init(0) < 0)
return SLURM_ERROR;
nodeinfo_ptr = xmalloc(sizeof(dynamic_plugin_data_t));
*nodeinfo = nodeinfo_ptr;
if (protocol_version >= SLURM_2_5_PROTOCOL_VERSION) {
int i;
uint32_t plugin_id;
safe_unpack32(&plugin_id, buffer);
for (i=0; i<select_context_cnt; i++)
if (*(ops[i].plugin_id) == plugin_id) {
nodeinfo_ptr->plugin_id = i;
break;
}
if (i >= select_context_cnt) {
error("we don't have select plugin type %u",plugin_id);
goto unpack_error;
}
} else {
nodeinfo_ptr->plugin_id = select_context_default;
error("select_g_select_nodeinfo_unpack: protocol_version"
" %hu not supported", protocol_version);
goto unpack_error;
}
if ((*(ops[nodeinfo_ptr->plugin_id].nodeinfo_unpack))
((select_nodeinfo_t **)&nodeinfo_ptr->data, buffer,
protocol_version) != SLURM_SUCCESS)
goto unpack_error;
return SLURM_SUCCESS;
unpack_error:
select_g_select_nodeinfo_free(nodeinfo_ptr);
*nodeinfo = NULL;
error("select_g_select_nodeinfo_unpack: unpack error");
return SLURM_ERROR;
}
extern dynamic_plugin_data_t *select_g_select_nodeinfo_alloc(void)
{
dynamic_plugin_data_t *nodeinfo_ptr = NULL;
uint32_t plugin_id;
if (slurm_select_init(0) < 0)
return NULL;
plugin_id = working_cluster_rec ?
working_cluster_rec->plugin_id_select : select_context_default;
nodeinfo_ptr = xmalloc(sizeof(dynamic_plugin_data_t));
nodeinfo_ptr->plugin_id = plugin_id;
nodeinfo_ptr->data = (*(ops[plugin_id].
nodeinfo_alloc))();
return nodeinfo_ptr;
}
extern int select_g_select_nodeinfo_free(dynamic_plugin_data_t *nodeinfo)
{
int rc = SLURM_SUCCESS;
if (slurm_select_init(0) < 0)
return SLURM_ERROR;
if (nodeinfo) {
if (nodeinfo->data)
rc = (*(ops[nodeinfo->plugin_id].
nodeinfo_free))(nodeinfo->data);
xfree(nodeinfo);
}
return rc;
}
extern int select_g_select_nodeinfo_set_all(void)
{
if (slurm_select_init(0) < 0)
return SLURM_ERROR;
return (*(ops[select_context_default].nodeinfo_set_all))
();
}
extern int select_g_select_nodeinfo_set(struct job_record *job_ptr)
{
if (slurm_select_init(0) < 0)
return SLURM_ERROR;
return (*(ops[select_context_default].nodeinfo_set))
(job_ptr);
}
extern int select_g_select_nodeinfo_get(dynamic_plugin_data_t *nodeinfo,
enum select_nodedata_type dinfo,
enum node_states state,
void *data)
{
void *nodedata = NULL;
uint32_t plugin_id;
if (slurm_select_init(0) < 0)
return SLURM_ERROR;
if (nodeinfo) {
nodedata = nodeinfo->data;
plugin_id = nodeinfo->plugin_id;
} else
plugin_id = select_context_default;
return (*(ops[plugin_id].nodeinfo_get))
(nodedata, dinfo, state, data);
}
extern dynamic_plugin_data_t *select_g_select_jobinfo_alloc(void)
{
dynamic_plugin_data_t *jobinfo_ptr = NULL;
uint32_t plugin_id;
if (slurm_select_init(0) < 0)
return NULL;
plugin_id = working_cluster_rec ?
working_cluster_rec->plugin_id_select : select_context_default;
jobinfo_ptr = xmalloc(sizeof(dynamic_plugin_data_t));
jobinfo_ptr->plugin_id = plugin_id;
jobinfo_ptr->data = (*(ops[plugin_id].
jobinfo_alloc))();
return jobinfo_ptr;
}
/* free storage previously allocated for a select job credential
* IN jobinfo - the select job credential to be freed
*/
extern int select_g_select_jobinfo_free(dynamic_plugin_data_t *jobinfo)
{
int rc = SLURM_SUCCESS;
if (slurm_select_init(0) < 0)
return SLURM_ERROR;
if (jobinfo) {
if (jobinfo->data) {
rc = (*(ops[jobinfo->plugin_id].
jobinfo_free))(jobinfo->data);
}
xfree(jobinfo);
}
return rc;
}
extern int select_g_select_jobinfo_set(dynamic_plugin_data_t *jobinfo,
enum select_jobdata_type data_type,
void *data)
{
void *jobdata = NULL;
uint32_t plugin_id;
if (slurm_select_init(0) < 0)
return SLURM_ERROR;
if (jobinfo) {
jobdata = jobinfo->data;
plugin_id = jobinfo->plugin_id;
} else
plugin_id = select_context_default;
return (*(ops[plugin_id].jobinfo_set))
(jobdata, data_type, data);
}
/* get data from a select job credential
* IN jobinfo - updated select job credential
* IN data_type - type of data to enter into job credential
* IN/OUT data - the data to enter into job credential
*/
extern int select_g_select_jobinfo_get(dynamic_plugin_data_t *jobinfo,
enum select_jobdata_type data_type,
void *data)
{
void *jobdata = NULL;
uint32_t plugin_id;
if (slurm_select_init(0) < 0)
return SLURM_ERROR;
if (jobinfo) {
jobdata = jobinfo->data;
plugin_id = jobinfo->plugin_id;
} else
plugin_id = select_context_default;
return (*(ops[plugin_id].jobinfo_get))
(jobdata, data_type, data);
}
/* copy a select job credential
* IN jobinfo - the select job credential to be copied
* RET - the copy or NULL on failure
* NOTE: returned value must be freed using select_g_free_jobinfo
*/
extern dynamic_plugin_data_t *select_g_select_jobinfo_copy(
dynamic_plugin_data_t *jobinfo)
{
dynamic_plugin_data_t *jobinfo_ptr = NULL;
if (slurm_select_init(0) < 0)
return NULL;
jobinfo_ptr = xmalloc(sizeof(dynamic_plugin_data_t));
if (jobinfo) {
jobinfo_ptr->plugin_id = jobinfo->plugin_id;
jobinfo_ptr->data = (*(ops[jobinfo->plugin_id].
jobinfo_copy))(jobinfo->data);
} else
jobinfo_ptr->plugin_id = select_context_default;
return jobinfo_ptr;
}
/* pack a select job credential into a buffer in machine independent form
* IN jobinfo - the select job credential to be saved
* OUT buffer - buffer with select credential appended
* RET - slurm error code
*/
extern int select_g_select_jobinfo_pack(dynamic_plugin_data_t *jobinfo,
Buf buffer,
uint16_t protocol_version)
{
void *data = NULL;
uint32_t plugin_id;
if (slurm_select_init(0) < 0)
return SLURM_ERROR;
if (jobinfo) {
data = jobinfo->data;
plugin_id = jobinfo->plugin_id;
} else
plugin_id = select_context_default;
if (protocol_version >= SLURM_2_5_PROTOCOL_VERSION) {
pack32(*(ops[plugin_id].plugin_id), buffer);
} else {
error("select_g_select_jobinfo_pack: protocol_version "
"%hu not supported", protocol_version);
}
return (*(ops[plugin_id].jobinfo_pack))(data, buffer, protocol_version);
}
/* unpack a select job credential from a buffer
* OUT jobinfo - the select job credential read
* IN buffer - buffer with select credential read from current pointer loc
* RET - slurm error code
* NOTE: returned value must be freed using select_g_free_jobinfo
*/
extern int select_g_select_jobinfo_unpack(dynamic_plugin_data_t **jobinfo,
Buf buffer,
uint16_t protocol_version)
{
dynamic_plugin_data_t *jobinfo_ptr = NULL;
if (slurm_select_init(0) < 0)
return SLURM_ERROR;
jobinfo_ptr = xmalloc(sizeof(dynamic_plugin_data_t));
*jobinfo = jobinfo_ptr;
if (protocol_version >= SLURM_2_5_PROTOCOL_VERSION) {
int i;
uint32_t plugin_id;
safe_unpack32(&plugin_id, buffer);
for (i=0; i<select_context_cnt; i++)
if (*(ops[i].plugin_id) == plugin_id) {
jobinfo_ptr->plugin_id = i;
break;
}
if (i >= select_context_cnt) {
error("we don't have select plugin type %u", plugin_id);
goto unpack_error;
}
} else {
jobinfo_ptr->plugin_id = select_context_default;
error("select_g_select_jobinfo_unpack: protocol_version "
"%hu not supported", protocol_version);
goto unpack_error;
}
if ((*(ops[jobinfo_ptr->plugin_id].jobinfo_unpack))
((select_jobinfo_t **)&jobinfo_ptr->data, buffer,
protocol_version) != SLURM_SUCCESS)
goto unpack_error;
return SLURM_SUCCESS;
unpack_error:
select_g_select_jobinfo_free(jobinfo_ptr);
*jobinfo = NULL;
error("select_g_select_jobinfo_unpack: unpack error");
return SLURM_ERROR;
}
/* write select job credential to a string
* IN jobinfo - a select job credential
* OUT buf - location to write job credential contents
* IN size - byte size of buf
* IN mode - print mode, see enum select_print_mode
* RET - the string, same as buf
*/
extern char *select_g_select_jobinfo_sprint(dynamic_plugin_data_t *jobinfo,
char *buf, size_t size, int mode)
{
void *data = NULL;
uint32_t plugin_id;
if (slurm_select_init(0) < 0)
return NULL;
if (jobinfo) {
data = jobinfo->data;
plugin_id = jobinfo->plugin_id;
} else
plugin_id = select_context_default;
return (*(ops[plugin_id].
jobinfo_sprint))
(data, buf, size, mode);
}
/* write select job info to a string
* IN jobinfo - a select job credential
* IN mode - print mode, see enum select_print_mode
* RET - char * containing string of request
*/
extern char *select_g_select_jobinfo_xstrdup(
dynamic_plugin_data_t *jobinfo, int mode)
{
void *data = NULL;
uint32_t plugin_id;
if (slurm_select_init(0) < 0)
return NULL;
if (jobinfo) {
data = jobinfo->data;
plugin_id = jobinfo->plugin_id;
} else
plugin_id = select_context_default;
return (*(ops[plugin_id].
jobinfo_xstrdup))(data, mode);
}
/*
* Update specific block (usually something has gone wrong)
* IN block_desc_ptr - information about the block
*/
extern int select_g_update_block (update_block_msg_t *block_desc_ptr)
{
if (slurm_select_init(0) < 0)
return SLURM_ERROR;
return (*(ops[select_context_default].
update_block))(block_desc_ptr);
}
/*
* Update specific sub nodes (usually something has gone wrong)
* IN block_desc_ptr - information about the block
*/
extern int select_g_update_sub_node (update_block_msg_t *block_desc_ptr)
{
if (slurm_select_init(0) < 0)
return SLURM_ERROR;
return (*(ops[select_context_default].
update_sub_node))(block_desc_ptr);
}
/*
* Fail certain cnodes in a blocks midplane (usually comes from the
* IBM runjob mux)
* IN step_ptr - step that has failed cnodes
*/
extern int select_g_fail_cnode (struct step_record *step_ptr)
{
if (slurm_select_init(0) < 0)
return SLURM_ERROR;
return (*(ops[select_context_default].fail_cnode))(step_ptr);
}
/*
* Get select data from a plugin
* IN dinfo - type of data to get from the node record
* (see enum select_plugindata_info)
* IN/OUT data - the data to get from node record
*/
extern int select_g_get_info_from_plugin (enum select_plugindata_info dinfo,
struct job_record *job_ptr,
void *data)
{
if (slurm_select_init(0) < 0)
return SLURM_ERROR;
return (*(ops[select_context_default].
get_info_from_plugin))(dinfo, job_ptr, data);
}
/*
* Updated a node configuration. This happens when a node registers with
* more resources than originally configured (e.g. memory).
* IN index - index into the node record list
* RETURN SLURM_SUCCESS on success || SLURM_ERROR else wise
*/
extern int select_g_update_node_config (int index)
{
if (slurm_select_init(0) < 0)
return SLURM_ERROR;
return (*(ops[select_context_default].
update_node_config))(index);
}
/*
* Updated a node state in the plugin, this should happen when a node is
* drained or put into a down state then changed back.
* IN index - index into the node record list
* IN state - state to update to
* RETURN SLURM_SUCCESS on success || SLURM_ERROR else wise
*/
extern int select_g_update_node_state (struct node_record *node_ptr)
{
if (slurm_select_init(0) < 0)
return SLURM_ERROR;
return (*(ops[select_context_default].update_node_state))
(node_ptr);
}
/*
* Alter the node count for a job given the type of system we are on
* IN/OUT job_desc - current job desc
*/
extern int select_g_alter_node_cnt (enum select_node_cnt type, void *data)
{
if (slurm_select_init(0) < 0)
return SLURM_ERROR;
if (type == SELECT_GET_NODE_SCALING) {
/* default to one, so most plugins don't have to */
uint32_t *nodes = (uint32_t *)data;
*nodes = 1;
}
return (*(ops[select_context_default].alter_node_cnt))(type, data);
}
/*
* Note reconfiguration or change in partition configuration
*/
extern int select_g_reconfigure (void)
{
if (slurm_select_init(0) < 0)
return SLURM_ERROR;
return (*(ops[select_context_default].reconfigure))();
}
/*
* select_g_resv_test - Identify the nodes which "best" satisfy a reservation
* request. "best" is defined as either single set of consecutive nodes
* satisfying the request and leaving the minimum number of unused nodes
* OR the fewest number of consecutive node sets
* IN/OUT avail_bitmap - nodes available for the reservation
* IN node_cnt - count of required nodes
* IN core_cnt - count of required cores per node
* IN/OUT core_bitmap - cores which can not be used for this reservation
* IN flags - reservation request flags
* RET - nodes selected for use by the reservation
*/
extern bitstr_t * select_g_resv_test(bitstr_t *avail_bitmap, uint32_t node_cnt,
uint32_t *core_cnt, bitstr_t **core_bitmap,
uint32_t flags)
{
if (slurm_select_init(0) < 0)
return NULL;
return (*(ops[select_context_default].resv_test))
(avail_bitmap, node_cnt, core_cnt, core_bitmap, flags);
}
extern void select_g_ba_init(node_info_msg_t *node_info_ptr, bool sanity_check)
{
uint32_t plugin_id;
if (slurm_select_init(0) < 0)
return;
if (working_cluster_rec)
plugin_id = working_cluster_rec->plugin_id_select;
else
plugin_id = select_context_default;
(*(ops[plugin_id].ba_init))(node_info_ptr, sanity_check);
}
extern void select_g_ba_fini(void)
{
uint32_t plugin_id;
if (slurm_select_init(0) < 0)
return;
if (working_cluster_rec)
plugin_id = working_cluster_rec->plugin_id_select;
else
plugin_id = select_context_default;
(*(ops[plugin_id].ba_fini))();
}
extern int *select_g_ba_get_dims(void)
{
uint32_t plugin_id;
if (slurm_select_init(0) < 0)
return NULL;
if (working_cluster_rec)
plugin_id = working_cluster_rec->plugin_id_select;
else
plugin_id = select_context_default;
return (*(ops[plugin_id].ba_get_dims))();
}