blob: 3a4ca276e7300dd9e6f73fec1177e474107ca1d5 [file] [log] [blame]
/*****************************************************************************\
* node_mgr.c - manage the node records of slurm
* Note: there is a global node table (node_record_table_ptr), its
* hash table (node_hash_table), time stamp (last_node_update) and
* configuration list (config_list)
*****************************************************************************
* Copyright (C) 2002-2007 The Regents of the University of California.
* Copyright (C) 2008-2010 Lawrence Livermore National Security.
* Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
* Written by Morris Jette <jette1@llnl.gov>, et. al.
* CODE-OCEC-09-009. All rights reserved.
*
* This file is part of Slurm, a resource management program.
* For details, see <https://slurm.schedmd.com/>.
* Please also read the included file: DISCLAIMER.
*
* Slurm is free software; you can redistribute it and/or modify it under
* the terms of the GNU General Public License as published by the Free
* Software Foundation; either version 2 of the License, or (at your option)
* any later version.
*
* In addition, as a special exception, the copyright holders give permission
* to link the code of portions of this program with the OpenSSL library under
* certain conditions as described in each individual source file, and
* distribute linked combinations including the two. You must obey the GNU
* General Public License in all respects for all of the code used other than
* OpenSSL. If you modify file(s) with this exception, you may extend this
* exception to your version of the file(s), but you are not obligated to do
* so. If you do not wish to do so, delete this exception statement from your
* version. If you delete this exception statement from all source files in
* the program, then also delete it here.
*
* Slurm is distributed in the hope that it will be useful, but WITHOUT ANY
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
* details.
*
* You should have received a copy of the GNU General Public License along
* with Slurm; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
\*****************************************************************************/
#include "config.h"
#include <ctype.h>
#include <errno.h>
#include <fcntl.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/socket.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <time.h>
#include "src/common/bitstring.h"
#include "src/common/fd.h"
#include "src/common/fetch_config.h"
#include "src/common/hostlist.h"
#include "src/common/macros.h"
#include "src/common/node_features.h"
#include "src/common/pack.h"
#include "src/common/parse_time.h"
#include "src/common/parse_value.h"
#include "src/common/read_config.h"
#include "src/common/slurm_resource_info.h"
#include "src/common/state_save.h"
#include "src/common/timers.h"
#include "src/common/xassert.h"
#include "src/common/xstring.h"
#include "src/interfaces/accounting_storage.h"
#include "src/interfaces/acct_gather_energy.h"
#include "src/interfaces/auth.h"
#include "src/interfaces/conn.h"
#include "src/interfaces/gres.h"
#include "src/interfaces/mcs.h"
#include "src/interfaces/node_features.h"
#include "src/interfaces/select.h"
#include "src/interfaces/serializer.h"
#include "src/interfaces/topology.h"
#include "src/slurmctld/agent.h"
#include "src/slurmctld/locks.h"
#include "src/slurmctld/ping_nodes.h"
#include "src/slurmctld/power_save.h"
#include "src/slurmctld/proc_req.h"
#include "src/slurmctld/read_config.h"
#include "src/slurmctld/reservation.h"
#include "src/slurmctld/sackd_mgr.h"
#include "src/slurmctld/slurmctld.h"
#include "src/slurmctld/state_save.h"
#include "src/slurmctld/trigger_mgr.h"
/* No need to change we always pack SLURM_PROTOCOL_VERSION */
#define NODE_STATE_VERSION "PROTOCOL_VERSION"
#define DEFAULT_NODE_REG_MEM_PERCENT 100.0
#define DEFAULT_CLOUD_REG_MEM_PERCENT 90.0
static bool config_list_update = false;
static pthread_mutex_t config_list_update_mutex = PTHREAD_MUTEX_INITIALIZER;
typedef struct {
uid_t uid;
part_record_t **visible_parts;
} pack_node_info_t;
/* Global variables */
bitstr_t *asap_node_bitmap = NULL; /* bitmap of rebooting asap nodes */
bitstr_t *avail_node_bitmap = NULL; /* bitmap of available nodes */
bitstr_t *bf_ignore_node_bitmap = NULL; /* bitmap of nodes to ignore during a
* backfill cycle */
bitstr_t *booting_node_bitmap = NULL; /* bitmap of booting nodes */
bitstr_t *cg_node_bitmap = NULL; /* bitmap of completing nodes */
bitstr_t *cloud_node_bitmap = NULL; /* bitmap of cloud nodes */
bitstr_t *external_node_bitmap = NULL; /* bitmap of external nodes */
bitstr_t *future_node_bitmap = NULL; /* bitmap of FUTURE nodes */
bitstr_t *idle_node_bitmap = NULL; /* bitmap of idle nodes */
bitstr_t *power_down_node_bitmap = NULL; /* bitmap of powered down nodes */
bitstr_t *rs_node_bitmap = NULL; /* bitmap of resuming nodes */
bitstr_t *share_node_bitmap = NULL; /* bitmap of sharable nodes */
bitstr_t *up_node_bitmap = NULL; /* bitmap of non-down nodes */
bitstr_t *power_up_node_bitmap = NULL; /* bitmap of power_up requested nodes */
static int _delete_node_ptr(node_record_t *node_ptr);
static void _drain_node(node_record_t *node_ptr, char *reason,
uint32_t reason_uid);
static void _make_node_unavail(node_record_t *node_ptr);
static void _make_node_down(node_record_t *node_ptr,
time_t event_time);
static bool _node_is_hidden(node_record_t *node_ptr,
pack_node_info_t *pack_info);
static void _pack_node(node_record_t *dump_node_ptr, buf_t *buffer,
uint16_t protocol_version, uint16_t show_flags);
static void _sync_bitmaps(node_record_t *node_ptr, int job_count);
static void _update_config_ptr(bitstr_t *bitmap,
config_record_t *config_ptr);
static int _update_node_gres(char *node_names, char *gres);
static int _update_node_weight(char *node_names, uint32_t weight);
static bool _valid_node_state_change(uint32_t old, uint32_t new);
static char *_get_msg_hostname(slurm_msg_t *msg)
{
slurm_addr_t *addr = &msg->address;
char *name = NULL;
if (addr->ss_family == AF_UNSPEC) {
int fd = conn_g_get_fd(msg->tls_conn);
(void) slurm_get_peer_addr(fd, addr);
}
if (addr->ss_family != AF_UNSPEC) {
name = xmalloc(INET6_ADDRSTRLEN);
slurm_get_ip_str(addr, name, INET6_ADDRSTRLEN);
}
return name;
}
static void _dump_cluster_settings(buf_t *buffer)
{
packstr(slurm_conf.suspend_exc_nodes, buffer);
packstr(slurm_conf.suspend_exc_parts, buffer);
packstr(slurm_conf.suspend_exc_states, buffer);
}
static int _load_cluster_settings(bool state_only,
buf_t *buffer,
uint16_t protocol_version)
{
char *suspend_exc_nodes = NULL;
char *suspend_exc_parts = NULL;
char *suspend_exc_states = NULL;
if (protocol_version >= SLURM_MIN_PROTOCOL_VERSION) {
safe_unpackstr(&suspend_exc_nodes, buffer);
safe_unpackstr(&suspend_exc_parts, buffer);
safe_unpackstr(&suspend_exc_states, buffer);
if (!state_only) {
xfree(slurm_conf.suspend_exc_nodes);
slurm_conf.suspend_exc_nodes = suspend_exc_nodes;
suspend_exc_nodes = NULL;
xfree(slurm_conf.suspend_exc_parts);
slurm_conf.suspend_exc_parts = suspend_exc_parts;
suspend_exc_parts = NULL;
xfree(slurm_conf.suspend_exc_states);
slurm_conf.suspend_exc_states = suspend_exc_states;
suspend_exc_states = NULL;
} else {
xfree(suspend_exc_nodes);
xfree(suspend_exc_parts);
xfree(suspend_exc_states);
}
}
return SLURM_SUCCESS;
unpack_error:
xfree(suspend_exc_nodes);
xfree(suspend_exc_parts);
xfree(suspend_exc_states);
return SLURM_ERROR;
}
/* dump_all_node_state - save the state of all nodes to file */
int dump_all_node_state ( void )
{
/* Save high-water mark to avoid buffer growth with copies */
static uint32_t high_buffer_size = (1024 * 1024);
int error_code = 0, inx;
node_record_t *node_ptr;
/* Locks: Read config and node */
slurmctld_lock_t node_read_lock = { READ_LOCK, NO_LOCK, READ_LOCK,
NO_LOCK, NO_LOCK };
buf_t *buffer = init_buf(high_buffer_size);
DEF_TIMERS;
START_TIMER;
/* write header: version, time */
packstr(NODE_STATE_VERSION, buffer);
pack16(SLURM_PROTOCOL_VERSION, buffer);
pack_time(time (NULL), buffer);
/* write node records to buffer */
lock_slurmctld (node_read_lock);
_dump_cluster_settings(buffer);
sackd_mgr_dump_state(buffer, SLURM_PROTOCOL_VERSION);
for (inx = 0; (node_ptr = next_node(&inx)); inx++) {
xassert (node_ptr->magic == NODE_MAGIC);
xassert (node_ptr->config_ptr->magic == CONFIG_MAGIC);
node_record_pack_state(node_ptr, SLURM_PROTOCOL_VERSION,
buffer);
}
unlock_slurmctld (node_read_lock);
error_code = save_buf_to_state("node_state", buffer, &high_buffer_size);
FREE_NULL_BUFFER(buffer);
END_TIMER2(__func__);
return error_code;
}
static void _queue_consolidate_config_list(void)
{
slurm_mutex_lock(&config_list_update_mutex);
config_list_update = true;
slurm_mutex_unlock(&config_list_update_mutex);
}
static bool _get_config_list_update(void)
{
bool rc;
slurm_mutex_lock(&config_list_update_mutex);
rc = config_list_update;
slurm_mutex_unlock(&config_list_update_mutex);
return rc;
}
static int _validate_nodes_vs_nodeset(char *nodes_str)
{
hostlist_t *nodes = NULL;
slurm_conf_nodeset_t **ptr;
int count_nodeset = slurm_conf_nodeset_array(&ptr);
if (!nodes_str)
return SLURM_SUCCESS;
nodes = hostlist_create(nodes_str);
for (int i = 0; i < count_nodeset; i++) {
if (hostlist_find(nodes, ptr[i]->name) != -1) {
error("NodeSet with name %s overlaps with an existing NodeName",
ptr[i]->name);
hostlist_destroy(nodes);
return ESLURM_INVALID_NODE_NAME;
}
}
hostlist_destroy(nodes);
return SLURM_SUCCESS;
}
/*
* load_all_node_state - Load the node state from file, recover on slurmctld
* restart. Execute this after loading the configuration file data.
* Data goes into common storage.
* IN state_only - if true, overwrite only node state and reason
* Use this to overwrite the "UNKNOWN state typically used in slurm.conf
* RET SUCCESS or error code
*/
extern int load_all_node_state ( bool state_only )
{
char *state_file;
int error_code = SLURM_SUCCESS, node_cnt = 0;
node_record_t *node_ptr;
time_t time_stamp;
buf_t *buffer;
char *ver_str = NULL;
hostset_t *hs = NULL;
hostlist_t *down_nodes = NULL;
bool power_save_mode = false;
uint16_t protocol_version = NO_VAL16;
xassert(verify_lock(CONF_LOCK, READ_LOCK));
if (slurm_conf.suspend_program && slurm_conf.resume_program)
power_save_mode = true;
/* read the file */
buffer = state_save_open("node_state", &state_file);
if (!buffer) {
if ((clustername_existed == 1) && (!ignore_state_errors))
fatal("No node state file (%s) to recover", state_file);
info("No node state file (%s) to recover", state_file);
xfree(state_file);
return ENOENT;
}
xfree(state_file);
safe_unpackstr(&ver_str, buffer);
debug3("Version string in node_state header is %s", ver_str);
if (ver_str && !xstrcmp(ver_str, NODE_STATE_VERSION))
safe_unpack16(&protocol_version, buffer);
if (!protocol_version || (protocol_version == NO_VAL16)) {
if (!ignore_state_errors)
fatal("Can not recover node state, data version incompatible, start with '-i' to ignore this. Warning: using -i will lose the data that can't be recovered.");
error("*****************************************************");
error("Can not recover node state, data version incompatible");
error("*****************************************************");
xfree(ver_str);
FREE_NULL_BUFFER(buffer);
return EFAULT;
}
xfree(ver_str);
safe_unpack_time (&time_stamp, buffer);
if (_load_cluster_settings(state_only, buffer, protocol_version))
goto unpack_error;
if (sackd_mgr_load_state(buffer, protocol_version))
goto unpack_error;
while (remaining_buf (buffer) > 0) {
node_record_t *node_state_rec = NULL;
uint32_t node_state, base_state;
if (node_record_unpack((void *) &node_state_rec,
protocol_version, buffer)) {
error("failed to unpack node state");
goto unpack_error;
}
node_state = node_state_rec->node_state;
base_state = node_state & NODE_STATE_BASE;
/* validity test as possible */
if ((node_state_rec->cpus == 0) ||
(node_state_rec->boards == 0) ||
(node_state_rec->tot_sockets == 0) ||
(node_state_rec->cores == 0) ||
(node_state_rec->threads == 0) ||
(base_state >= NODE_STATE_END)) {
error("Invalid data for node %s: procs=%u, boards=%u, "
"sockets=%u, cores=%u, threads=%u, state=%u",
node_state_rec->name, node_state_rec->cpus,
node_state_rec->boards,
node_state_rec->tot_sockets,
node_state_rec->cores, node_state_rec->threads,
node_state);
error("No more node data will be processed from the checkpoint file");
purge_node_rec(node_state_rec);
goto unpack_error;
}
/*
* When a NodeSet is defined with the same name than
* an existing node name found in the state file, and which
* might not be in the configuration, then fatal. This can
* happen for example when adding a dynamic node, then changing
* slurm.conf by adding a NodeSet with the same name than the
* dynamic node, and then restarting.
*/
if (_validate_nodes_vs_nodeset(node_state_rec->name) !=
SLURM_SUCCESS) {
fatal("This error might happen when names overlap with dynamic nodes. Please rename the NodeSet in slurm.conf.");
}
if (node_state & NODE_STATE_DYNAMIC_NORM) {
/*
* Create node record to restore node into.
*
* cpu_spec_list, core_spec_cnt, port are only restored
* for dynamic nodes, otherwise always trust slurm.conf
*/
config_record_t *config_ptr =
config_record_from_node_record(node_state_rec);
if ((error_code = add_node_record(node_state_rec->name,
config_ptr,
&node_ptr))) {
error("%s (%s)",
slurm_strerror(error_code),
node_state_rec->name);
error_code = SLURM_SUCCESS;
list_delete_ptr(config_list, config_ptr);
} else {
if (node_state_rec->port) {
node_ptr->port = node_state_rec->port;
/*
* Get node in conf hash tables with
* port. set_node_comm_name() doesn't
* add the node with the port.
*/
slurm_conf_add_node(node_ptr);
}
/*
* add_node_record() populates gres_list but we
* want to use the gres_list from state.
*/
FREE_NULL_LIST(node_ptr->gres_list);
_queue_consolidate_config_list();
}
}
/* find record and perform update */
node_ptr = find_node_record (node_state_rec->name);
if (node_ptr == NULL) {
error ("Node %s has vanished from configuration",
node_state_rec->name);
} else if (state_only &&
!(node_state & NODE_STATE_DYNAMIC_NORM)) {
uint32_t orig_flags;
if ((IS_NODE_CLOUD(node_ptr) ||
(node_state & NODE_STATE_DYNAMIC_FUTURE)) &&
node_state_rec->comm_name &&
node_state_rec->node_hostname) {
/* Recover NodeAddr and NodeHostName */
set_node_comm_name(
node_ptr,
node_state_rec->comm_name,
node_state_rec->node_hostname);
}
if (IS_NODE_FUTURE(node_ptr)) {
/* preserve state for conf FUTURE nodes */
node_ptr->node_state = node_state;
} else if (IS_NODE_CLOUD(node_ptr) || IS_NODE_EXTERNAL(node_ptr)) {
if ((!power_save_mode) &&
((node_state & NODE_STATE_POWERED_DOWN) ||
(node_state & NODE_STATE_POWERING_DOWN) ||
(node_state & NODE_STATE_POWERING_UP))) {
node_state &= (~NODE_STATE_POWERED_DOWN);
node_state &= (~NODE_STATE_POWERING_UP);
node_state &= (~NODE_STATE_POWERING_DOWN);
if (hs)
hostset_insert(
hs,
node_state_rec->name);
else
hs = hostset_create(
node_state_rec->name);
}
/*
* Replace FUTURE state with new state (idle),
* but preserve recovered state flags
* (e.g. POWER*).
*/
if ((node_state & NODE_STATE_BASE) ==
NODE_STATE_FUTURE) {
node_state =
((node_ptr->node_state &
NODE_STATE_BASE) |
(node_state &
NODE_STATE_FLAGS));
/*
* If node was FUTURE, then it wasn't up
* so mark it as powered down.
*/
if (power_save_mode)
node_state |=
NODE_STATE_POWERED_DOWN;
}
node_ptr->node_state =
node_state | NODE_STATE_CLOUD;
} else if (IS_NODE_UNKNOWN(node_ptr)) {
if (base_state == NODE_STATE_DOWN) {
orig_flags = node_ptr->node_state &
NODE_STATE_FLAGS;
node_ptr->node_state = NODE_STATE_DOWN
| orig_flags;
}
if (node_state & NODE_STATE_DRAIN)
node_ptr->node_state |=
NODE_STATE_DRAIN;
if (node_state & NODE_STATE_FAIL)
node_ptr->node_state |=
NODE_STATE_FAIL;
if ((node_state & NODE_STATE_POWERED_DOWN) ||
(node_state & NODE_STATE_POWERING_DOWN)) {
uint32_t power_flag =
node_state &
(NODE_STATE_POWERED_DOWN |
NODE_STATE_POWERING_DOWN);
if (power_save_mode &&
IS_NODE_UNKNOWN(node_ptr)) {
orig_flags = node_ptr->
node_state &
NODE_STATE_FLAGS;
node_ptr->node_state =
NODE_STATE_IDLE |
orig_flags |
power_flag;
} else if (power_save_mode) {
node_ptr->node_state |=
power_flag;
} else if (hs)
hostset_insert(
hs,
node_state_rec->name);
else
hs = hostset_create(
node_state_rec->name);
/* Recover hardware state for powered
* down nodes */
node_ptr->cpus = node_state_rec->cpus;
node_ptr->boards =
node_state_rec->boards;
node_ptr->tot_sockets =
node_state_rec->tot_sockets;
node_ptr->cores = node_state_rec->cores;
node_ptr->tot_cores =
node_state_rec->tot_cores;
node_ptr->threads =
node_state_rec->threads;
node_ptr->real_memory =
node_state_rec->real_memory;
node_ptr->res_cores_per_gpu =
node_state_rec->
res_cores_per_gpu;
node_ptr->tmp_disk =
node_state_rec->tmp_disk;
}
if (node_state & NODE_STATE_MAINT)
node_ptr->node_state |= NODE_STATE_MAINT;
if (node_state & NODE_STATE_REBOOT_REQUESTED)
node_ptr->node_state |=
NODE_STATE_REBOOT_REQUESTED;
if (node_state & NODE_STATE_REBOOT_ISSUED)
node_ptr->node_state |=
NODE_STATE_REBOOT_ISSUED;
if (node_state & NODE_STATE_POWERING_UP) {
if (power_save_mode) {
node_ptr->node_state |=
NODE_STATE_POWERING_UP;
} else if (hs)
hostset_insert(
hs,
node_state_rec->name);
else
hs = hostset_create(
node_state_rec->name);
}
}
if (!node_ptr->extra) {
node_ptr->extra = node_state_rec->extra;
node_state_rec->extra = NULL;
}
if (!node_ptr->cert_token) {
node_ptr->cert_token =
node_state_rec->cert_token;
node_state_rec->cert_token = NULL;
}
if (!node_ptr->comment) {
node_ptr->comment = node_state_rec->comment;
node_state_rec->comment = NULL;
}
if (!node_ptr->instance_id) {
node_ptr->instance_id =
node_state_rec->instance_id;
node_state_rec->instance_id = NULL;
}
if (!node_ptr->instance_type) {
node_ptr->instance_type =
node_state_rec->instance_type;
node_state_rec->instance_type = NULL;
}
if (node_ptr->reason == NULL) {
node_ptr->reason = node_state_rec->reason;
node_state_rec->reason = NULL;
node_ptr->reason_time =
node_state_rec->reason_time;
node_ptr->reason_uid =
node_state_rec->reason_uid;
}
xfree(node_ptr->features_act);
node_ptr->features_act = node_state_rec->features_act;
node_state_rec->features_act = NULL;
node_ptr->gres_list = node_state_rec->gres_list;
node_state_rec->gres_list = NULL;
node_ptr->gpu_spec_bitmap =
node_state_rec->gpu_spec_bitmap;
node_state_rec->gpu_spec_bitmap = NULL;
} else {
if ((!power_save_mode) &&
((node_state & NODE_STATE_POWERED_DOWN) ||
(node_state & NODE_STATE_POWERING_DOWN) ||
(node_state & NODE_STATE_POWERING_UP))) {
node_state &= (~NODE_STATE_POWERED_DOWN);
node_state &= (~NODE_STATE_POWERING_DOWN);
node_state &= (~NODE_STATE_POWERING_UP);
if (hs)
hostset_insert(hs,
node_state_rec->name);
else
hs = hostset_create(
node_state_rec->name);
}
if ((IS_NODE_CLOUD(node_ptr) ||
(node_state & NODE_STATE_DYNAMIC_FUTURE) ||
(node_state & NODE_STATE_DYNAMIC_NORM)) &&
node_state_rec->comm_name &&
node_state_rec->node_hostname) {
/* Recover NodeAddr and NodeHostName */
set_node_comm_name(
node_ptr,
node_state_rec->comm_name,
node_state_rec->node_hostname);
}
node_ptr->node_state = node_state;
xfree(node_ptr->extra);
node_ptr->extra = node_state_rec->extra;
node_state_rec->extra = NULL;
xfree(node_ptr->cert_token);
node_ptr->cert_token = node_state_rec->cert_token;
node_state_rec->cert_token = NULL;
xfree(node_ptr->comment);
node_ptr->comment = node_state_rec->comment;
node_state_rec->comment = NULL;
xfree(node_ptr->instance_id);
node_ptr->instance_id = node_state_rec->instance_id;
node_state_rec->instance_id = NULL;
xfree(node_ptr->instance_type);
node_ptr->instance_type = node_state_rec->instance_type;
node_state_rec->instance_type = NULL;
xfree(node_ptr->reason);
node_ptr->reason = node_state_rec->reason;
node_state_rec->reason = NULL;
node_ptr->reason_time = node_state_rec->reason_time;
node_ptr->reason_uid = node_state_rec->reason_uid;
xfree(node_ptr->features);
node_ptr->features = node_state_rec->features;
node_state_rec->features = NULL;
xfree(node_ptr->features_act);
node_ptr->features_act = node_state_rec->features_act;
node_state_rec->features_act = NULL;
xfree(node_ptr->gres);
node_ptr->gres = node_state_rec->gres;
node_state_rec->gres = NULL;
node_ptr->gres_list = node_state_rec->gres_list;
node_state_rec->gres_list = NULL;
node_ptr->part_cnt = 0;
xfree(node_ptr->part_pptr);
node_ptr->cpu_bind = node_state_rec->cpu_bind;
node_ptr->cpus = node_state_rec->cpus;
node_ptr->boards = node_state_rec->boards;
node_ptr->tot_sockets = node_state_rec->tot_sockets;
node_ptr->cores = node_state_rec->cores;
node_ptr->tot_cores = node_state_rec->tot_cores;
node_ptr->threads = node_state_rec->threads;
node_ptr->real_memory = node_state_rec->real_memory;
node_ptr->res_cores_per_gpu =
node_state_rec->res_cores_per_gpu;
node_ptr->gpu_spec_bitmap =
node_state_rec->gpu_spec_bitmap;
node_state_rec->gpu_spec_bitmap = NULL;
node_ptr->tmp_disk = node_state_rec->tmp_disk;
xfree(node_ptr->mcs_label);
node_ptr->mcs_label = node_state_rec->mcs_label;
node_state_rec->mcs_label = NULL;
xfree(node_ptr->topology_str);
node_ptr->topology_str = node_state_rec->topology_str;
node_state_rec->topology_str = NULL;
}
if (node_ptr) {
node_cnt++;
node_ptr->next_state = node_state_rec->next_state;
if (IS_NODE_DOWN(node_ptr)) {
if (down_nodes)
hostlist_push(down_nodes,
node_state_rec->name);
else
down_nodes = hostlist_create(
node_state_rec->name);
}
if (node_state_rec->resume_after &&
(IS_NODE_DOWN(node_ptr) ||
IS_NODE_DRAINED(node_ptr)))
node_ptr->resume_after =
node_state_rec->resume_after;
node_ptr->last_response = node_state_rec->last_response;
node_ptr->boot_req_time = node_state_rec->boot_req_time;
node_ptr->power_save_req_time =
node_state_rec->power_save_req_time;
if (node_state_rec->protocol_version &&
(node_state_rec->protocol_version != NO_VAL16))
node_ptr->protocol_version =
node_state_rec->protocol_version;
else
node_ptr->protocol_version = protocol_version;
/* Sanity check to make sure we can take a version we
* actually understand.
*/
if (node_ptr->protocol_version <
SLURM_MIN_PROTOCOL_VERSION)
node_ptr->protocol_version =
SLURM_MIN_PROTOCOL_VERSION;
if (!IS_NODE_POWERED_DOWN(node_ptr))
node_ptr->last_busy = node_state_rec->last_busy;
}
purge_node_rec(node_state_rec);
}
fini: info("Recovered state of %d nodes", node_cnt);
if (hs) {
char *node_names = hostset_ranged_string_xmalloc(hs);
info("Cleared POWER_SAVE flag from nodes %s", node_names);
hostset_destroy(hs);
xfree(node_names);
}
if (down_nodes) {
char *down_host_str = NULL;
down_host_str = hostlist_ranged_string_xmalloc(down_nodes);
info("Down nodes: %s", down_host_str);
xfree(down_host_str);
hostlist_destroy(down_nodes);
}
FREE_NULL_BUFFER(buffer);
return error_code;
unpack_error:
if (!ignore_state_errors)
fatal("Incomplete node data checkpoint file, start with '-i' to ignore this. Warning: using -i will lose the data that can't be recovered.");
error("Incomplete node data checkpoint file");
error_code = EFAULT;
goto fini;
}
/* list_compare_config - compare two entry from the config list based upon
* weight, see common/list.h for documentation */
int list_compare_config (void *config_entry1, void *config_entry2)
{
config_record_t *c1 = *(config_record_t **) config_entry1;
config_record_t *c2 = *(config_record_t **) config_entry2;
return slurm_sort_uint32_list_asc(&c1->weight, &c2->weight);
}
static bool _is_dup_config_record(config_record_t *c1, config_record_t *c2)
{
if (c1 == c2)
return false; /* This is the same pointer - ignore */
if ((c1->boards == c2->boards) &&
(c1->core_spec_cnt == c2->core_spec_cnt) &&
(c1->cores == c2->cores) &&
(c1->cpu_bind == c2->cpu_bind) &&
(!xstrcmp(c1->cpu_spec_list, c2->cpu_spec_list)) &&
(c1->cpus == c2->cpus) &&
(!xstrcmp(c1->feature, c2->feature)) &&
(!xstrcmp(c1->gres, c2->gres)) &&
(c1->mem_spec_limit == c2->mem_spec_limit) &&
(c1->real_memory == c2->real_memory) &&
(c1->res_cores_per_gpu == c2->res_cores_per_gpu) &&
(c1->threads == c2->threads) &&
(c1->tmp_disk == c2->tmp_disk) &&
(c1->tot_sockets == c2->tot_sockets) &&
(!xstrcmp(c1->topology_str, c2->topology_str)) &&
(!xstrcmp(c1->tres_weights_str, c2->tres_weights_str)) &&
(c1->weight == c2->weight)) {
/* duplicate records */
return true;
}
return false;
}
static void _combine_dup_config_records(config_record_t *curr_rec)
{
bool changed = false;
config_record_t *config_ptr;
list_itr_t *iter;
iter = list_iterator_create(config_list);
while ((config_ptr = list_next(iter))) {
if (!_is_dup_config_record(curr_rec, config_ptr))
continue;
changed = true;
bit_or(curr_rec->node_bitmap, config_ptr->node_bitmap);
list_delete_item(iter);
}
list_iterator_destroy(iter);
if (!changed)
return;
xfree(curr_rec->nodes);
curr_rec->nodes = bitmap2node_name(curr_rec->node_bitmap);
debug("Consolidated duplicate config records into %s", curr_rec->nodes);
/* Update each node_ptr in node_bitmap */
_update_config_ptr(curr_rec->node_bitmap, curr_rec);
}
static bool _node_is_hidden(node_record_t *node_ptr,
pack_node_info_t *pack_info)
{
int i;
if ((slurm_conf.private_data & PRIVATE_DATA_NODES) &&
(slurm_mcs_get_privatedata() == 1) &&
(mcs_g_check_mcs_label(pack_info->uid, node_ptr->mcs_label,
false) != 0))
return true;
if (!node_ptr->part_cnt)
return false;
for (i = 0; i < node_ptr->part_cnt; i++) {
part_record_t *part_ptr = node_ptr->part_pptr[i];
/* return false if the node belongs to any visible partition */
for (int j = 0; pack_info->visible_parts[j]; j++)
if (pack_info->visible_parts[j] == part_ptr)
return false;
}
return true;
}
static void _free_pack_node_info_members(pack_node_info_t *pack_info)
{
xfree(pack_info->visible_parts);
}
/*
* pack_all_nodes - dump all configuration and node information for all nodes
* in machine independent form (for network transmission)
* IN show_flags - node filtering options
* IN uid - uid of user making request (for partition filtering)
* IN protocol_version - slurm protocol version of client
* OUT buffer
* global: node_record_table_ptr - pointer to global node table
* NOTE: change slurm_load_node() in api/node_info.c when data format changes
*/
extern buf_t *pack_all_nodes(uint16_t show_flags, uid_t uid,
uint16_t protocol_version)
{
int inx;
uint32_t nodes_packed, tmp_offset;
buf_t *buffer;
time_t now = time(NULL);
node_record_t *node_ptr;
bool hidden, privileged = validate_operator(uid);
static config_record_t blank_config = {0};
static node_record_t blank_node = {
.config_ptr = &blank_config,
};
pack_node_info_t pack_info = {
.uid = uid,
.visible_parts = build_visible_parts(uid, privileged)
};
xassert(verify_lock(CONF_LOCK, READ_LOCK));
xassert(verify_lock(PART_LOCK, READ_LOCK));
buffer = init_buf(BUF_SIZE * 16);
nodes_packed = 0;
if (protocol_version >= SLURM_24_11_PROTOCOL_VERSION) {
bitstr_t *hidden_nodes = bit_alloc(node_record_count);
uint32_t pack_bitmap_offset;
bool repack_hidden = false;
/* write header: count, time, hidden node bitmap */
pack32(nodes_packed, buffer);
pack_time(now, buffer);
pack_bitmap_offset = get_buf_offset(buffer);
pack_bit_str_hex(hidden_nodes, buffer);
/* write node records */
for (inx = 0; inx < node_record_count; inx++) {
if (!node_record_table_ptr[inx])
goto pack_empty_SLURM_24_11_PROTOCOL_VERSION;
node_ptr = node_record_table_ptr[inx];
xassert(node_ptr->magic == NODE_MAGIC);
xassert(node_ptr->config_ptr->magic == CONFIG_MAGIC);
/*
* We can't avoid packing node records without breaking
* the node index pointers. So pack a node with a name
* of NULL and let the caller deal with it.
*/
hidden = false;
if (((show_flags & SHOW_ALL) == 0) &&
!privileged &&
(_node_is_hidden(node_ptr, &pack_info)))
hidden = true;
else if (IS_NODE_FUTURE(node_ptr) &&
(!(show_flags & SHOW_FUTURE)))
hidden = true;
else if ((node_ptr->name == NULL) ||
(node_ptr->name[0] == '\0'))
hidden = true;
if (hidden) {
pack_empty_SLURM_24_11_PROTOCOL_VERSION:
bit_set(hidden_nodes, inx);
repack_hidden = true;
} else {
_pack_node(node_ptr, buffer, protocol_version,
show_flags);
}
nodes_packed++;
}
if (repack_hidden) {
tmp_offset = get_buf_offset(buffer);
set_buf_offset(buffer, pack_bitmap_offset);
pack_bit_str_hex(hidden_nodes, buffer);
set_buf_offset(buffer, tmp_offset);
}
FREE_NULL_BITMAP(hidden_nodes);
} else if (protocol_version >= SLURM_MIN_PROTOCOL_VERSION) {
/* write header: count and time */
pack32(nodes_packed, buffer);
pack_time(now, buffer);
/* write node records */
for (inx = 0; inx < node_record_count; inx++) {
if (!node_record_table_ptr[inx])
goto pack_empty;
node_ptr = node_record_table_ptr[inx];
xassert(node_ptr->magic == NODE_MAGIC);
xassert(node_ptr->config_ptr->magic == CONFIG_MAGIC);
/*
* We can't avoid packing node records without breaking
* the node index pointers. So pack a node with a name
* of NULL and let the caller deal with it.
*/
hidden = false;
if (((show_flags & SHOW_ALL) == 0) &&
!privileged &&
(_node_is_hidden(node_ptr, &pack_info)))
hidden = true;
else if (IS_NODE_FUTURE(node_ptr) &&
(!(show_flags & SHOW_FUTURE)))
hidden = true;
else if ((node_ptr->name == NULL) ||
(node_ptr->name[0] == '\0'))
hidden = true;
if (hidden) {
pack_empty:
_pack_node(&blank_node, buffer, protocol_version,
show_flags);
} else {
_pack_node(node_ptr, buffer, protocol_version,
show_flags);
}
nodes_packed++;
}
} else {
error("%s: protocol_version %hu not supported",
__func__, protocol_version);
}
tmp_offset = get_buf_offset(buffer);
set_buf_offset(buffer, 0);
pack32(nodes_packed, buffer);
set_buf_offset(buffer, tmp_offset);
_free_pack_node_info_members(&pack_info);
return buffer;
}
/*
* pack_one_node - dump all configuration and node information for one node
* in machine independent form (for network transmission)
* IN show_flags - node filtering options
* IN uid - uid of user making request (for partition filtering)
* IN node_name - name of node for which information is desired,
* use first node if name is NULL
* IN protocol_version - slurm protocol version of client
* OUT buffer
* global: node_record_table_ptr - pointer to global node table
* NOTE: change slurm_load_node() in api/node_info.c when data format changes
*/
extern buf_t *pack_one_node(uint16_t show_flags, uid_t uid, char *node_name,
uint16_t protocol_version)
{
uint32_t nodes_packed, tmp_offset;
buf_t *buffer;
time_t now = time(NULL);
node_record_t *node_ptr;
bool hidden, privileged = validate_operator(uid);
pack_node_info_t pack_info = {
.uid = uid,
.visible_parts = build_visible_parts(uid, privileged)
};
xassert(verify_lock(CONF_LOCK, READ_LOCK));
xassert(verify_lock(PART_LOCK, READ_LOCK));
buffer = init_buf(BUF_SIZE);
nodes_packed = 0;
if (protocol_version >= SLURM_24_11_PROTOCOL_VERSION) {
/* write header: count and time */
pack32(nodes_packed, buffer);
pack_time(now, buffer);
/* Mirror _unpack_node_info_msg */
pack_bit_str_hex(NULL, buffer);
/* write node records */
if (node_name)
node_ptr = find_node_record(node_name);
else
node_ptr = node_record_table_ptr[0];
if (node_ptr) {
hidden = false;
if (((show_flags & SHOW_ALL) == 0) &&
!privileged &&
(_node_is_hidden(node_ptr, &pack_info)))
hidden = true;
else if (IS_NODE_FUTURE(node_ptr) &&
(!(show_flags & SHOW_FUTURE)))
hidden = true;
else if ((node_ptr->name == NULL) ||
(node_ptr->name[0] == '\0'))
hidden = true;
if (!hidden) {
_pack_node(node_ptr, buffer, protocol_version,
show_flags);
nodes_packed++;
}
}
} else if (protocol_version >= SLURM_MIN_PROTOCOL_VERSION) {
/* write header: count and time */
pack32(nodes_packed, buffer);
pack_time(now, buffer);
/* write node records */
if (node_name)
node_ptr = find_node_record(node_name);
else
node_ptr = node_record_table_ptr[0];
if (node_ptr) {
hidden = false;
if (((show_flags & SHOW_ALL) == 0) &&
!privileged &&
(_node_is_hidden(node_ptr, &pack_info)))
hidden = true;
else if (IS_NODE_FUTURE(node_ptr) &&
(!(show_flags & SHOW_FUTURE)))
hidden = true;
else if ((node_ptr->name == NULL) ||
(node_ptr->name[0] == '\0'))
hidden = true;
if (!hidden) {
_pack_node(node_ptr, buffer, protocol_version,
show_flags);
nodes_packed++;
}
}
} else {
error("%s: protocol_version %hu not supported",
__func__, protocol_version);
}
tmp_offset = get_buf_offset(buffer);
set_buf_offset(buffer, 0);
pack32(nodes_packed, buffer);
set_buf_offset(buffer, tmp_offset);
_free_pack_node_info_members(&pack_info);
return buffer;
}
/*
* _pack_node - dump all configuration information about a specific node in
* machine independent form (for network transmission)
* IN dump_node_ptr - pointer to node for which information is requested
* IN/OUT buffer - buffer where data is placed, pointers automatically updated
* IN protocol_version - slurm protocol version of client
* IN show_flags -
* NOTE: if you make any changes here be sure to make the corresponding changes
* to _unpack_node_info_members() in common/slurm_protocol_pack.c
*/
static void _pack_node(node_record_t *dump_node_ptr, buf_t *buffer,
uint16_t protocol_version, uint16_t show_flags)
{
char *gres_drain = NULL, *gres_used = NULL;
xassert(verify_lock(CONF_LOCK, READ_LOCK));
if (protocol_version >= SLURM_25_05_PROTOCOL_VERSION) {
packstr(dump_node_ptr->name, buffer);
packstr(dump_node_ptr->node_hostname, buffer);
packstr(dump_node_ptr->comm_name, buffer);
packstr(dump_node_ptr->bcast_address, buffer);
/* turns into cert_flags field on remote */
if (dump_node_ptr->cert_token) {
pack16(NODE_CERT_TOKEN_SET, buffer);
} else {
pack16(0, buffer);
}
pack16(dump_node_ptr->port, buffer);
pack32(dump_node_ptr->next_state, buffer);
pack32(dump_node_ptr->node_state, buffer);
packstr(dump_node_ptr->version, buffer);
/* Only data from config_record used for scheduling */
pack16(dump_node_ptr->config_ptr->cpus, buffer);
pack16(dump_node_ptr->config_ptr->boards, buffer);
pack16(dump_node_ptr->config_ptr->tot_sockets, buffer);
pack16(dump_node_ptr->config_ptr->cores, buffer);
pack16(dump_node_ptr->config_ptr->threads, buffer);
pack64(dump_node_ptr->config_ptr->real_memory, buffer);
pack32(dump_node_ptr->config_ptr->tmp_disk, buffer);
packstr(dump_node_ptr->gpu_spec, buffer);
packstr(dump_node_ptr->mcs_label, buffer);
pack32(dump_node_ptr->owner, buffer);
pack16(dump_node_ptr->core_spec_cnt, buffer);
pack32(dump_node_ptr->cpu_bind, buffer);
pack64(dump_node_ptr->mem_spec_limit, buffer);
packstr(dump_node_ptr->cpu_spec_list, buffer);
pack16(dump_node_ptr->cpus_efctv, buffer);
pack32(dump_node_ptr->cpu_load, buffer);
pack64(dump_node_ptr->free_mem, buffer);
pack32(dump_node_ptr->config_ptr->weight, buffer);
pack16(dump_node_ptr->res_cores_per_gpu, buffer);
pack32(dump_node_ptr->reason_uid, buffer);
pack_time(dump_node_ptr->boot_time, buffer);
pack_time(dump_node_ptr->last_busy, buffer);
pack_time(dump_node_ptr->reason_time, buffer);
pack_time(dump_node_ptr->resume_after, buffer);
pack_time(dump_node_ptr->slurmd_start_time, buffer);
pack_time(dump_node_ptr->cert_last_renewal, buffer);
pack16(dump_node_ptr->alloc_cpus, buffer);
pack64(dump_node_ptr->alloc_memory, buffer);
packstr(dump_node_ptr->alloc_tres_fmt_str, buffer);
packstr(dump_node_ptr->arch, buffer);
packstr(dump_node_ptr->features, buffer);
packstr(dump_node_ptr->features_act, buffer);
if (dump_node_ptr->gres)
packstr(dump_node_ptr->gres, buffer);
else
packstr(dump_node_ptr->config_ptr->gres, buffer);
/* Gathering GRES details is slow, so don't by default */
if (show_flags & SHOW_DETAIL) {
gres_drain =
gres_get_node_drain(dump_node_ptr->gres_list);
gres_used =
gres_get_node_used(dump_node_ptr->gres_list);
}
packstr(gres_drain, buffer);
packstr(gres_used, buffer);
xfree(gres_drain);
xfree(gres_used);
packstr(dump_node_ptr->os, buffer);
packstr(dump_node_ptr->comment, buffer);
packstr(dump_node_ptr->extra, buffer);
packstr(dump_node_ptr->instance_id, buffer);
packstr(dump_node_ptr->instance_type, buffer);
packstr(dump_node_ptr->reason, buffer);
acct_gather_energy_pack(dump_node_ptr->energy, buffer,
protocol_version);
packstr(dump_node_ptr->tres_fmt_str, buffer);
packstr(dump_node_ptr->resv_name, buffer);
packstr(dump_node_ptr->topology_str, buffer);
} else if (protocol_version >= SLURM_MIN_PROTOCOL_VERSION) {
packstr(dump_node_ptr->name, buffer);
packstr(dump_node_ptr->node_hostname, buffer);
packstr(dump_node_ptr->comm_name, buffer);
packstr(dump_node_ptr->bcast_address, buffer);
pack16(dump_node_ptr->port, buffer);
pack32(dump_node_ptr->next_state, buffer);
pack32(dump_node_ptr->node_state, buffer);
packstr(dump_node_ptr->version, buffer);
/* Only data from config_record used for scheduling */
pack16(dump_node_ptr->config_ptr->cpus, buffer);
pack16(dump_node_ptr->config_ptr->boards, buffer);
pack16(dump_node_ptr->config_ptr->tot_sockets, buffer);
pack16(dump_node_ptr->config_ptr->cores, buffer);
pack16(dump_node_ptr->config_ptr->threads, buffer);
pack64(dump_node_ptr->config_ptr->real_memory, buffer);
pack32(dump_node_ptr->config_ptr->tmp_disk, buffer);
packstr(dump_node_ptr->gpu_spec, buffer);
packstr(dump_node_ptr->mcs_label, buffer);
pack32(dump_node_ptr->owner, buffer);
pack16(dump_node_ptr->core_spec_cnt, buffer);
pack32(dump_node_ptr->cpu_bind, buffer);
pack64(dump_node_ptr->mem_spec_limit, buffer);
packstr(dump_node_ptr->cpu_spec_list, buffer);
pack16(dump_node_ptr->cpus_efctv, buffer);
pack32(dump_node_ptr->cpu_load, buffer);
pack64(dump_node_ptr->free_mem, buffer);
pack32(dump_node_ptr->config_ptr->weight, buffer);
pack16(dump_node_ptr->res_cores_per_gpu, buffer);
pack32(dump_node_ptr->reason_uid, buffer);
pack_time(dump_node_ptr->boot_time, buffer);
pack_time(dump_node_ptr->last_busy, buffer);
pack_time(dump_node_ptr->reason_time, buffer);
pack_time(dump_node_ptr->resume_after, buffer);
pack_time(dump_node_ptr->slurmd_start_time, buffer);
select_plugin_id_pack(buffer);
pack16(dump_node_ptr->alloc_cpus, buffer);
pack64(dump_node_ptr->alloc_memory, buffer);
packstr(dump_node_ptr->alloc_tres_fmt_str, buffer);
packdouble(0, buffer); /* was alloc_tres_weighted */
packstr(dump_node_ptr->arch, buffer);
packstr(dump_node_ptr->features, buffer);
packstr(dump_node_ptr->features_act, buffer);
if (dump_node_ptr->gres)
packstr(dump_node_ptr->gres, buffer);
else
packstr(dump_node_ptr->config_ptr->gres, buffer);
/* Gathering GRES details is slow, so don't by default */
if (show_flags & SHOW_DETAIL) {
gres_drain =
gres_get_node_drain(dump_node_ptr->gres_list);
gres_used =
gres_get_node_used(dump_node_ptr->gres_list);
}
packstr(gres_drain, buffer);
packstr(gres_used, buffer);
xfree(gres_drain);
xfree(gres_used);
packstr(dump_node_ptr->os, buffer);
packstr(dump_node_ptr->comment, buffer);
packstr(dump_node_ptr->extra, buffer);
packstr(dump_node_ptr->instance_id, buffer);
packstr(dump_node_ptr->instance_type, buffer);
packstr(dump_node_ptr->reason, buffer);
acct_gather_energy_pack(dump_node_ptr->energy, buffer,
protocol_version);
packstr(dump_node_ptr->tres_fmt_str, buffer);
packstr(dump_node_ptr->resv_name, buffer);
} else {
error("_pack_node: protocol_version "
"%hu not supported", protocol_version);
}
}
/* Return "true" if a node's state is already "new_state". This is more
* complex than simply comparing the state values due to flags (e.g.
* A node might be DOWN + NO_RESPOND or IDLE + DRAIN) */
static bool _equivalent_node_state(node_record_t *node_ptr, uint32_t new_state)
{
if (new_state == NO_VAL) /* No change */
return true;
if ((new_state == NODE_STATE_DOWN) && IS_NODE_DOWN(node_ptr))
return true;
if ((new_state == NODE_STATE_DRAIN) && IS_NODE_DRAIN(node_ptr))
return true;
if ((new_state == NODE_STATE_FAIL) && IS_NODE_FAIL(node_ptr))
return true;
/* Other states might be added here */
return false;
}
/* Confirm that the selected ActiveFeatures are a subset of AvailableFeatures */
static bool _valid_features_act(char *features_act, char *features)
{
bool valid_subset = true;
char *tmp_act, *last_act = NULL, *tok_act;
char *tmp_avail, *last_avail = NULL, *tok_avail;
if (!features_act || (features_act[0] == '\0'))
return true;
if (!features || (features[0] == '\0'))
return false;
tmp_act = xstrdup(features_act);
tok_act = strtok_r(tmp_act, ",", &last_act);
while (tok_act) {
last_avail = NULL;
tmp_avail = xstrdup(features);
tok_avail = strtok_r(tmp_avail, ",", &last_avail);
while (tok_avail) {
if (!xstrcmp(tok_act, tok_avail))
break;
tok_avail = strtok_r(NULL, ",", &last_avail);
}
xfree(tmp_avail);
if (!tok_avail) { /* No match found */
valid_subset = false;
break;
}
tok_act = strtok_r(NULL, ",", &last_act);
}
xfree(tmp_act);
return valid_subset;
}
/*
* Validate that reported active changeable features are a superset of the
* current active changeable features in the controller.
*
* If the node doesn't report any features then don't do any validation.
*/
static bool _valid_reported_active_features(const char *reg_active_features,
const char *node_active_features)
{
char *tok, *saveptr;
char *tmp_node_act, *tmp_reg_act;
list_t *changeable_list = NULL;
bool valid = true;
if (!node_active_features || !reg_active_features)
return true;
tmp_reg_act = xstrdup(reg_active_features);
for (tok = strtok_r(tmp_reg_act, ",", &saveptr);
tok;
tok = strtok_r(NULL, ",", &saveptr)) {
if (!node_features_g_changeable_feature(tok))
continue;
if (!changeable_list)
changeable_list = list_create(NULL);
list_append(changeable_list, tok);
}
/*
* The node's current active changeable features should be a subset of
* the changeable features in the registration message.
*/
if (changeable_list && list_count(changeable_list)) {
tmp_node_act = xstrdup(node_active_features);
for (tok = strtok_r(tmp_node_act, ",", &saveptr);
tok;
tok = strtok_r(NULL, ",", &saveptr)) {
if (node_features_g_changeable_feature(tok) &&
!list_delete_all(changeable_list,
slurm_find_char_in_list,
tok)) {
/* feature not in current active list */
valid = false;
break;
}
}
xfree(tmp_node_act);
}
FREE_NULL_LIST(changeable_list);
xfree(tmp_reg_act);
return valid;
}
/*
* Return a new string containing containing only changeable features.
*/
static char *_node_changeable_features(char *all_features)
{
char *tmp_features;
char *tok, *saveptr;
char *changeable_features = NULL;
tmp_features = xstrdup(all_features);
for (tok = strtok_r(tmp_features, ",", &saveptr);
tok;
tok = strtok_r(NULL, ",", &saveptr)) {
if (!node_features_g_changeable_feature(tok))
continue;
xstrfmtcat(changeable_features, "%s%s",
changeable_features ? "," : "",
tok);
}
xfree(tmp_features);
return changeable_features;
}
static void _undo_reboot_asap(node_record_t *node_ptr)
{
if (IS_NODE_IDLE(node_ptr) || IS_NODE_ALLOCATED(node_ptr))
bit_set(avail_node_bitmap, node_ptr->index);
node_ptr->node_state &= (~NODE_STATE_DRAIN);
xfree(node_ptr->reason);
}
static void _require_node_reg(node_record_t *node_ptr)
{
node_ptr->node_state |= NODE_STATE_NO_RESPOND;
node_ptr->last_response = time(NULL);
node_ptr->boot_time = 0;
ping_nodes_now = true;
}
int update_node(update_node_msg_t *update_node_msg, uid_t auth_uid)
{
int error_code = 0, node_cnt;
node_record_t *node_ptr = NULL;
char *this_node_name = NULL, *tmp_feature, *orig_features_act = NULL;
hostlist_t *host_list, *hostaddr_list = NULL, *hostname_list = NULL;
uint32_t base_state = 0, node_flags, state_val, resume_after = NO_VAL;
time_t now = time(NULL);
bool uniq = true;
if (update_node_msg->node_names == NULL ) {
info("%s: invalid node name", __func__);
return ESLURM_INVALID_NODE_NAME;
}
if (update_node_msg->node_addr || update_node_msg->node_hostname)
uniq = false;
if (!(host_list = nodespec_to_hostlist(update_node_msg->node_names,
uniq, NULL)))
return ESLURM_INVALID_NODE_NAME;
if (!(node_cnt = hostlist_count(host_list))) {
info("%s: expansion of node specification '%s' resulted in zero nodes",
__func__, update_node_msg->node_names);
FREE_NULL_HOSTLIST(host_list);
return ESLURM_INVALID_NODE_NAME;
}
if (update_node_msg->node_addr) {
hostaddr_list = hostlist_create(update_node_msg->node_addr);
if (hostaddr_list == NULL) {
info("update_node: hostlist_create error on %s: %m",
update_node_msg->node_addr);
FREE_NULL_HOSTLIST(host_list);
return ESLURM_INVALID_NODE_NAME;
}
if (node_cnt != hostlist_count(hostaddr_list)) {
info("update_node: nodecount mismatch");
FREE_NULL_HOSTLIST(host_list);
FREE_NULL_HOSTLIST(hostaddr_list);
return ESLURM_INVALID_NODE_NAME;
}
}
if (update_node_msg->node_hostname) {
hostname_list = hostlist_create(update_node_msg->node_hostname);
if (hostname_list == NULL) {
info("update_node: hostlist_create error on %s: %m",
update_node_msg->node_hostname);
FREE_NULL_HOSTLIST(host_list);
FREE_NULL_HOSTLIST(hostaddr_list);
return ESLURM_INVALID_NODE_NAME;
}
if (node_cnt != hostlist_count(hostname_list)) {
info("update_node: nodecount mismatch");
FREE_NULL_HOSTLIST(host_list);
FREE_NULL_HOSTLIST(hostaddr_list);
FREE_NULL_HOSTLIST(hostname_list);
return ESLURM_INVALID_NODE_NAME;
}
}
if ((max_powered_nodes != NO_VAL) &&
(update_node_msg->node_state & NODE_STATE_POWER_UP)) {
bitstr_t *bmp = NULL;
int count;
if (hostlist2bitmap(host_list, false, &bmp)) {
info("update_node: hostlist2bitmap failed");
FREE_NULL_HOSTLIST(host_list);
FREE_NULL_HOSTLIST(hostaddr_list);
FREE_NULL_HOSTLIST(hostname_list);
FREE_NULL_BITMAP(bmp);
return ESLURM_INVALID_NODE_NAME;
}
bit_or(bmp, power_up_node_bitmap);
count = bit_set_count(bmp);
FREE_NULL_BITMAP(bmp);
if (count > max_powered_nodes) {
error("update_node: Cannot power up more nodes due to MaxPoweredUpNodes=%d",
max_powered_nodes);
FREE_NULL_HOSTLIST(host_list);
FREE_NULL_HOSTLIST(hostaddr_list);
FREE_NULL_HOSTLIST(hostname_list);
return ESLURM_MAX_POWERED_NODES;
}
log_flag(POWER, "powered nodes good %d", count);
}
while ( (this_node_name = hostlist_shift (host_list)) ) {
int err_code = 0;
bool acct_updated = false;
bool update_db = false;
node_ptr = find_node_record (this_node_name);
if (node_ptr == NULL) {
error ("update_node: node %s does not exist",
this_node_name);
error_code = ESLURM_INVALID_NODE_NAME;
free (this_node_name);
break;
}
if (hostaddr_list) {
char *this_addr = hostlist_shift(hostaddr_list);
xfree(node_ptr->comm_name);
node_ptr->comm_name = xstrdup(this_addr);
free(this_addr);
}
if (hostname_list) {
char *this_hostname = hostlist_shift(hostname_list);
xfree(node_ptr->node_hostname);
node_ptr->node_hostname = xstrdup(this_hostname);
free(this_hostname);
}
if (hostaddr_list || hostname_list) {
/* This updates the lookup table addresses */
slurm_reset_alias(node_ptr->name, node_ptr->comm_name,
node_ptr->node_hostname);
}
if (update_node_msg->cert_token) {
xfree(node_ptr->cert_token);
if (update_node_msg->cert_token[0])
node_ptr->cert_token = xstrdup(
update_node_msg->cert_token);
}
if (update_node_msg->cpu_bind) {
char tmp_str[128];
slurm_sprint_cpu_bind_type(tmp_str,
update_node_msg->cpu_bind);
info("update_node: setting CpuBind to %s for node %s",
tmp_str, this_node_name);
if (update_node_msg->cpu_bind == CPU_BIND_OFF)
node_ptr->cpu_bind = 0;
else
node_ptr->cpu_bind = update_node_msg->cpu_bind;
}
if (update_node_msg->features || update_node_msg->features_act) {
char *features_act = NULL, *features_avail = NULL;
if (!node_features_g_node_update_valid(node_ptr,
update_node_msg)) {
error_code = ESLURM_INVALID_FEATURE;
xfree(update_node_msg->features);
xfree(update_node_msg->features_act);
}
if (update_node_msg->features_act)
features_act = update_node_msg->features_act;
else
features_act = node_ptr->features_act;
if (update_node_msg->features)
features_avail = update_node_msg->features;
else
features_avail = node_ptr->features;
if (!_valid_features_act(features_act, features_avail)){
info("%s: Invalid ActiveFeatures (\'%s\' not subset of \'%s\' on node %s)",
__func__, features_act, features_avail,
node_ptr->name);
error_code = ESLURM_ACTIVE_FEATURE_NOT_SUBSET;
xfree(update_node_msg->features);
xfree(update_node_msg->features_act);
}
}
if (update_node_msg->features_act) {
if (node_ptr->features_act)
orig_features_act =
xstrdup(node_ptr->features_act);
else
orig_features_act = xstrdup(node_ptr->features);
}
if (update_node_msg->features) {
if (!update_node_msg->features_act &&
(node_features_g_count() == 0)) {
/*
* If no NodeFeatures plugin and no explicit
* active features, then make active and
* available feature values match
*/
update_node_msg->features_act =
xstrdup(update_node_msg->features);
}
xfree(node_ptr->features);
if (update_node_msg->features[0]) {
node_ptr->features =
node_features_g_node_xlate2(
update_node_msg->features);
}
/*
* update_node_avail_features() logs and updates
* avail_feature_list below
*/
}
if (update_node_msg->features_act) {
tmp_feature = node_features_g_node_xlate(
update_node_msg->features_act,
orig_features_act, node_ptr->features,
node_ptr->index);
xfree(node_ptr->features_act);
node_ptr->features_act = tmp_feature;
error_code = update_node_active_features(
node_ptr->name,
node_ptr->features_act,
FEATURE_MODE_COMB);
xfree(orig_features_act);
}
if (update_node_msg->gres) {
xfree(node_ptr->gres);
if (update_node_msg->gres[0])
node_ptr->gres = xstrdup(update_node_msg->gres);
/* _update_node_gres() logs and updates config */
}
if (update_node_msg->extra) {
data_t *data = NULL;
char *extra = update_node_msg->extra;
if (extra[0] && extra_constraints_enabled() &&
serialize_g_string_to_data(&data, extra,
strlen(extra),
MIME_TYPE_JSON)) {
error("Failed to decode extra \"%s\" for node %s",
update_node_msg->extra, node_ptr->name);
error_code = ESLURM_INVALID_EXTRA;
} else {
FREE_NULL_DATA(node_ptr->extra_data);
node_ptr->extra_data = data;
xfree(node_ptr->extra);
if (update_node_msg->extra[0]) {
node_ptr->extra =
xstrdup(update_node_msg->extra);
}
/*
* Skip db updates for extra field changes,
* otherwise we'll overwhelm it with event records
* if someone is updating these constantly.
*/
// update_db = true;
}
}
if (update_node_msg->comment) {
xfree(node_ptr->comment);
if (update_node_msg->comment[0])
node_ptr->comment =
xstrdup(update_node_msg->comment);
}
if (update_node_msg->instance_id) {
xfree(node_ptr->instance_id);
if (update_node_msg->instance_id[0])
node_ptr->instance_id = xstrdup(
update_node_msg->instance_id);
update_db = true;
}
if (update_node_msg->instance_type) {
xfree(node_ptr->instance_type);
if (update_node_msg->instance_type[0])
node_ptr->instance_type = xstrdup(
update_node_msg->instance_type);
update_db = true;
}
if (update_db) {
clusteracct_storage_g_node_update(acct_db_conn,
node_ptr);
}
if ((update_node_msg->resume_after != NO_VAL) &&
((update_node_msg->node_state == NODE_STATE_DOWN) ||
(update_node_msg->node_state == NODE_STATE_DRAIN))) {
if (update_node_msg->resume_after == INFINITE)
resume_after = 0;
else
resume_after =
now + update_node_msg->resume_after;
}
if (update_node_msg->topology_str) {
char *topology_str_old = node_ptr->topology_str;
node_ptr->topology_str = NULL;
if (*update_node_msg->topology_str) {
node_ptr->topology_str =
xstrdup(update_node_msg->topology_str);
}
if (topology_g_add_rm_node(node_ptr)) {
info("Invalid node topology specified %s",
node_ptr->topology_str);
xfree(node_ptr->topology_str);
node_ptr->topology_str = topology_str_old;
topology_g_add_rm_node(node_ptr);
error_code =
ESLURM_REQUESTED_TOPO_CONFIG_UNAVAILABLE;
} else {
xfree(topology_str_old);
}
}
state_val = update_node_msg->node_state;
if (_equivalent_node_state(node_ptr, state_val)) {
/* Update resume time if another equivalent update */
if (resume_after != NO_VAL) {
node_ptr->resume_after = resume_after;
info("update_node: node %s will be resumed on %lu",
this_node_name, node_ptr->resume_after);
}
/*
* No accounting update if node state and reason are
* unchanged
*/
if(!xstrcmp(node_ptr->reason,
update_node_msg->reason)) {
free(this_node_name);
continue;
}
} else if (resume_after != NO_VAL) {
/* Set resume time for the 1st time */
node_ptr->resume_after = resume_after;
info("update_node: node %s will be resumed on %lu",
this_node_name, node_ptr->resume_after);
} else if (node_ptr->resume_after) {
/*
* Reset resume time if the state updates to another
* different from down or drain
*/
node_ptr->resume_after = 0;
info("update_node: ResumeAfter reset for node %s after a state change",
this_node_name);
}
if ((update_node_msg -> reason) &&
(update_node_msg -> reason[0])) {
xfree(node_ptr->reason);
node_ptr->reason = xstrdup(update_node_msg->reason);
node_ptr->reason_time = now;
node_ptr->reason_uid = auth_uid;
info ("update_node: node %s reason set to: %s",
this_node_name, node_ptr->reason);
}
if (state_val != NO_VAL) {
base_state = node_ptr->node_state;
if (!_valid_node_state_change(base_state, state_val)) {
info("Invalid node state transition requested "
"for node %s from=%s to=%s",
this_node_name,
node_state_string(base_state),
node_state_string(state_val));
state_val = NO_VAL;
error_code =
ESLURM_INVALID_NODE_STATE_TRANSITION;
}
base_state &= NODE_STATE_BASE;
}
if (state_val != NO_VAL) {
node_flags = node_ptr->node_state & NODE_STATE_FLAGS;
if (state_val == NODE_RESUME) {
trigger_node_resume(node_ptr);
if (IS_NODE_IDLE(node_ptr) &&
(IS_NODE_DRAIN(node_ptr) ||
IS_NODE_FAIL(node_ptr))) {
clusteracct_storage_g_node_up(
acct_db_conn,
node_ptr,
now);
acct_updated = true;
}
node_ptr->node_state &= (~NODE_STATE_DRAIN);
node_ptr->node_state &= (~NODE_STATE_FAIL);
node_ptr->node_state &=
(~NODE_STATE_REBOOT_REQUESTED);
node_ptr->node_state &=
(~NODE_STATE_REBOOT_ISSUED);
if (IS_NODE_POWERING_DOWN(node_ptr)) {
node_ptr->node_state &=
(~NODE_STATE_INVALID_REG);
node_ptr->node_state &=
(~NODE_STATE_POWERING_DOWN);
node_ptr->node_state |=
NODE_STATE_POWERED_DOWN;
if (IS_NODE_CLOUD(node_ptr))
set_node_comm_name(
node_ptr, NULL,
node_ptr->name);
node_ptr->power_save_req_time = 0;
reset_node_active_features(node_ptr);
reset_node_instance(node_ptr);
clusteracct_storage_g_node_down(
acct_db_conn,
node_ptr, now,
"Powered down after resume",
node_ptr->reason_uid);
}
if (IS_NODE_DOWN(node_ptr)) {
state_val = NODE_STATE_IDLE;
_require_node_reg(node_ptr);
} else if (IS_NODE_FUTURE(node_ptr)) {
if (node_ptr->port == 0) {
node_ptr->port =
slurm_conf.slurmd_port;
}
state_val = NODE_STATE_IDLE;
bit_clear(future_node_bitmap,
node_ptr->index);
_require_node_reg(node_ptr);
} else if (node_flags & NODE_STATE_DRAIN) {
state_val = base_state;
_require_node_reg(node_ptr);
} else
state_val = base_state;
} else if (state_val == NODE_STATE_UNDRAIN) {
if (IS_NODE_IDLE(node_ptr) &&
IS_NODE_DRAIN(node_ptr)) {
clusteracct_storage_g_node_up(
acct_db_conn,
node_ptr,
now);
acct_updated = true;
}
node_ptr->node_state &= (~NODE_STATE_DRAIN);
_require_node_reg(node_ptr);
state_val = base_state;
}
if ((state_val == NODE_STATE_DOWN) ||
(state_val == NODE_STATE_FUTURE)) {
/* We must set node DOWN before killing
* its jobs */
_make_node_down(node_ptr, now);
kill_running_job_by_node_ptr(node_ptr);
if (state_val == NODE_STATE_FUTURE) {
bool dyn_norm_node = false;
if (IS_NODE_DYNAMIC_FUTURE(node_ptr)) {
/* Reset comm and hostname */
set_node_comm_name(
node_ptr, NULL,
node_ptr->name);
reset_node_instance(node_ptr);
}
/*
* Preserve dynamic norm state until
* node is deleted.
*/
if (IS_NODE_DYNAMIC_NORM(node_ptr))
dyn_norm_node = true;
node_ptr->node_state =
NODE_STATE_FUTURE;
if (dyn_norm_node)
node_ptr->node_state |=
NODE_STATE_DYNAMIC_NORM;
bit_set(future_node_bitmap,
node_ptr->index);
bit_clear(power_up_node_bitmap,
node_ptr->index);
clusteracct_storage_g_node_down(
acct_db_conn,
node_ptr, now,
"Set to State=FUTURE",
node_ptr->reason_uid);
}
} else if (state_val == NODE_STATE_IDLE) {
/* assume they want to clear DRAIN and
* FAIL flags too */
if (IS_NODE_DOWN(node_ptr)) {
trigger_node_up(node_ptr);
clusteracct_storage_g_node_up(
acct_db_conn,
node_ptr,
now);
acct_updated = true;
} else if (IS_NODE_IDLE(node_ptr) &&
(IS_NODE_DRAIN(node_ptr) ||
IS_NODE_FAIL(node_ptr))) {
clusteracct_storage_g_node_up(
acct_db_conn,
node_ptr,
now);
acct_updated = true;
} /* else already fully available */
node_ptr->node_state &= (~NODE_STATE_DRAIN);
node_ptr->node_state &= (~NODE_STATE_FAIL);
if (!IS_NODE_NO_RESPOND(node_ptr) ||
IS_NODE_POWERED_DOWN(node_ptr))
make_node_avail(node_ptr);
bit_set (idle_node_bitmap, node_ptr->index);
bit_set (up_node_bitmap, node_ptr->index);
if (IS_NODE_POWERED_DOWN(node_ptr))
node_ptr->last_busy = 0;
else
node_ptr->last_busy = now;
} else if (state_val == NODE_STATE_ALLOCATED) {
if (!IS_NODE_DRAIN(node_ptr) &&
!IS_NODE_FAIL(node_ptr) &&
!IS_NODE_NO_RESPOND(node_ptr))
make_node_avail(node_ptr);
bit_set (up_node_bitmap, node_ptr->index);
bit_clear (idle_node_bitmap, node_ptr->index);
} else if ((state_val == NODE_STATE_DRAIN) ||
(state_val == NODE_STATE_FAIL)) {
if (IS_NODE_ALLOCATED(node_ptr) &&
(IS_NODE_POWERED_DOWN(node_ptr) ||
IS_NODE_POWERING_UP(node_ptr))) {
info("%s: DRAIN/FAIL request for node %s which is allocated and being powered up. Requeuing jobs",
__func__, this_node_name);
kill_running_job_by_node_ptr(node_ptr);
}
trigger_node_draining(node_ptr);
bit_clear (avail_node_bitmap, node_ptr->index);
node_ptr->node_state &= (~NODE_STATE_DRAIN);
node_ptr->node_state &= (~NODE_STATE_FAIL);
state_val = node_ptr->node_state |= state_val;
if ((node_ptr->run_job_cnt == 0) &&
(node_ptr->comp_job_cnt == 0)) {
trigger_node_drained(node_ptr);
clusteracct_storage_g_node_down(
acct_db_conn,
node_ptr, now, NULL,
node_ptr->reason_uid);
}
} else if (state_val & NODE_STATE_POWER_DOWN) {
if ((state_val & NODE_STATE_POWER_UP) &&
(IS_NODE_POWERING_UP(node_ptr))) {
/* Clear any reboot op in progress */
node_ptr->node_state &=
(~NODE_STATE_POWERING_UP);
node_ptr->last_response = now;
free(this_node_name);
continue;
}
/* Abort reboot request */
if (IS_NODE_REBOOT_REQUESTED(node_ptr) ||
IS_NODE_REBOOT_ISSUED(node_ptr)) {
/*
* A node is DOWN+REBOOT_ISSUED when the
* reboot has been issued. Set node
* state back to idle only if the reboot
* has been issued. Node will remain
* cleared in the avail_node_bitmap
* until the node is powered down.
*/
if (IS_NODE_REBOOT_ISSUED(node_ptr) &&
IS_NODE_DOWN(node_ptr)) {
node_ptr->node_state =
NODE_STATE_IDLE |
(node_ptr->node_state &
NODE_STATE_FLAGS);
}
node_ptr->node_state &=
(~NODE_STATE_REBOOT_REQUESTED);
node_ptr->node_state &=
(~NODE_STATE_REBOOT_ISSUED);
xfree(node_ptr->reason);
info("Canceling REBOOT on node %s",
this_node_name);
}
if (IS_NODE_POWERING_DOWN(node_ptr)) {
info("ignoring power down request for node %s, already powering down",
this_node_name);
node_ptr->next_state = NO_VAL;
free(this_node_name);
continue;
}
if (state_val & NODE_STATE_POWERED_DOWN) {
/* Force power down */
_make_node_unavail(node_ptr);
/*
* Kill any running jobs and requeue if
* possible.
*/
kill_running_job_by_node_ptr(node_ptr);
node_ptr->node_state &=
(~NODE_STATE_POWERING_UP);
} else if (state_val & NODE_STATE_POWER_DRAIN) {
/* power down asap -- drain */
_drain_node(node_ptr, "POWER_DOWN_ASAP",
node_ptr->reason_uid);
}
if (IS_NODE_DOWN(node_ptr)) {
/* Abort any power up request */
node_ptr->node_state &=
(~NODE_STATE_POWERING_UP);
}
if (IS_NODE_POWERED_DOWN(node_ptr))
info("power down request repeating for node %s",
this_node_name);
else
info("powering down node %s",
this_node_name);
node_ptr->node_state |=
NODE_STATE_POWER_DOWN;
if (IS_NODE_IDLE(node_ptr)) {
/*
* remove node from avail_node_bitmap so
* that it will power_down before jobs
* get on it.
*/
bit_clear(avail_node_bitmap,
node_ptr->index);
}
node_ptr->next_state = NO_VAL;
bit_clear(rs_node_bitmap, node_ptr->index);
free(this_node_name);
continue;
} else if (state_val == NODE_STATE_POWER_UP) {
if (!IS_NODE_POWERED_DOWN(node_ptr)) {
if (IS_NODE_POWERING_UP(node_ptr)) {
node_ptr->node_state |=
NODE_STATE_POWERED_DOWN;
node_ptr->node_state |=
NODE_STATE_POWER_UP;
info("power up request "
"repeating for node %s",
this_node_name);
} else {
verbose("node %s is already "
"powered up",
this_node_name);
}
} else {
node_ptr->node_state |=
NODE_STATE_POWER_UP;
info("powering up node %s",
this_node_name);
}
bit_set(power_up_node_bitmap, node_ptr->index);
node_ptr->next_state = NO_VAL;
bit_clear(rs_node_bitmap, node_ptr->index);
free(this_node_name);
continue;
} else if (state_val == NODE_STATE_NO_RESPOND) {
node_ptr->node_state |= NODE_STATE_NO_RESPOND;
state_val = base_state;
bit_clear(avail_node_bitmap, node_ptr->index);
} else if (state_val == NODE_STATE_REBOOT_CANCEL) {
if (!IS_NODE_REBOOT_ISSUED(node_ptr)) {
node_ptr->node_state &=
(~NODE_STATE_REBOOT_REQUESTED);
state_val = base_state;
if ((node_ptr->next_state &
NODE_STATE_FLAGS) &
NODE_STATE_UNDRAIN)
_undo_reboot_asap(node_ptr);
} else {
info("REBOOT on node %s already in progress -- unable to cancel",
this_node_name);
err_code = error_code =
ESLURM_REBOOT_IN_PROGRESS;
}
} else {
info("Invalid node state specified %u",
state_val);
err_code = 1;
error_code = ESLURM_INVALID_NODE_STATE;
}
if (err_code == 0) {
node_ptr->node_state = state_val |
(node_ptr->node_state &
NODE_STATE_FLAGS);
if (!IS_NODE_REBOOT_REQUESTED(node_ptr) &&
!IS_NODE_REBOOT_ISSUED(node_ptr))
node_ptr->next_state = NO_VAL;
bit_clear(rs_node_bitmap, node_ptr->index);
info ("update_node: node %s state set to %s",
this_node_name,
node_state_string(state_val));
}
}
/*
* After all the possible state changes, check if we need to
* clear the asap_node_bitmap.
*/
if (!IS_NODE_REBOOT_ASAP(node_ptr))
bit_clear(asap_node_bitmap, node_ptr->index);
if (!acct_updated && !IS_NODE_DOWN(node_ptr) &&
!IS_NODE_DRAIN(node_ptr) && !IS_NODE_FAIL(node_ptr)) {
/* reason information is handled in
clusteracct_storage_g_node_up()
*/
clusteracct_storage_g_node_up(
acct_db_conn, node_ptr, now);
}
free (this_node_name);
}
/* Write/clear log */
(void)update_node_active_features(NULL, NULL, FEATURE_MODE_PEND);
FREE_NULL_HOSTLIST(host_list);
FREE_NULL_HOSTLIST(hostaddr_list);
FREE_NULL_HOSTLIST(hostname_list);
last_node_update = now;
if ((error_code == SLURM_SUCCESS) && (update_node_msg->features)) {
error_code = update_node_avail_features(
update_node_msg->node_names,
update_node_msg->features,
FEATURE_MODE_IND);
}
if ((error_code == SLURM_SUCCESS) && (update_node_msg->gres)) {
error_code = _update_node_gres(update_node_msg->node_names,
update_node_msg->gres);
}
/*
* Update weight. Weight is part of config_ptr,
* hence split config records if required
*/
if ((error_code == SLURM_SUCCESS) &&
(update_node_msg->weight != NO_VAL)) {
error_code = _update_node_weight(update_node_msg->node_names,
update_node_msg->weight);
if (error_code == SLURM_SUCCESS) {
/* sort config_list by weight for scheduling */
list_sort(config_list, &list_compare_config);
}
}
return error_code;
}
/*
* restore_node_features - Make node and config (from slurm.conf) fields
* consistent for Features, Gres and Weight
* IN recover -
* 0, 1 - use data from config record, built using slurm.conf
* 2 = use data from node record, built from saved state
*/
extern void restore_node_features(int recover)
{
int i, node_features_plugin_cnt;
node_record_t *node_ptr;
node_features_plugin_cnt = node_features_g_count();
for (i = 0; (node_ptr = next_node(&i)); i++) {
char *gres_name = NULL;
if (node_ptr->weight != node_ptr->config_ptr->weight) {
error("Node %s Weight(%u) differ from slurm.conf",
node_ptr->name, node_ptr->weight);
if (recover == 2) {
_update_node_weight(node_ptr->name,
node_ptr->weight);
} else {
node_ptr->weight = node_ptr->config_ptr->
weight;
}
}
if (xstrcmp(node_ptr->config_ptr->feature, node_ptr->features)){
if (node_features_plugin_cnt == 0) {
error("Node %s Features(%s) differ from slurm.conf",
node_ptr->name, node_ptr->features);
}
if (recover == 2) {
update_node_avail_features(node_ptr->name,
node_ptr->features,
FEATURE_MODE_COMB);
}
}
/*
* Rebuild extra_data
*/
if (node_ptr->extra && node_ptr->extra[0] &&
extra_constraints_enabled()) {
data_t *data = NULL;
if (serialize_g_string_to_data(&data, node_ptr->extra,
strlen(node_ptr->extra),
MIME_TYPE_JSON)) {
info("Failed to decode extra \"%s\" for node %s",
node_ptr->extra, node_ptr->name);
} else {
node_ptr->extra_data = data;
}
}
/* node_ptr->gres is set when recover == 2 */
if (node_ptr->gres)
gres_name = node_ptr->gres;
else
gres_name = node_ptr->config_ptr->gres;
(void) gres_node_reconfig(
node_ptr->name,
gres_name,
&node_ptr->gres,
&node_ptr->gres_list,
slurm_conf.conf_flags & CONF_FLAG_OR,
node_ptr->cores,
node_ptr->tot_sockets);
gres_node_state_log(node_ptr->gres_list, node_ptr->name);
}
update_node_avail_features(NULL, NULL, FEATURE_MODE_PEND);
}
/* Duplicate a configuration record except for the node names & bitmap */
config_record_t *_dup_config(config_record_t *config_ptr)
{
config_record_t *new_config_ptr;
new_config_ptr = create_config_record();
new_config_ptr->magic = config_ptr->magic;
new_config_ptr->cpus = config_ptr->cpus;
new_config_ptr->cpu_spec_list = xstrdup(config_ptr->cpu_spec_list);
new_config_ptr->boards = config_ptr->boards;
new_config_ptr->tot_sockets = config_ptr->tot_sockets;
new_config_ptr->cores = config_ptr->cores;
new_config_ptr->core_spec_cnt = config_ptr->core_spec_cnt;
new_config_ptr->threads = config_ptr->threads;
new_config_ptr->real_memory = config_ptr->real_memory;
new_config_ptr->res_cores_per_gpu = config_ptr->res_cores_per_gpu;
new_config_ptr->mem_spec_limit = config_ptr->mem_spec_limit;
new_config_ptr->tmp_disk = config_ptr->tmp_disk;
new_config_ptr->weight = config_ptr->weight;
new_config_ptr->feature = xstrdup(config_ptr->feature);
new_config_ptr->gres = xstrdup(config_ptr->gres);
_queue_consolidate_config_list();
return new_config_ptr;
}
/*
* _update_node_weight - Update weight associated with nodes
* build new config list records as needed
* IN node_names - list of nodes to update
* IN weight - New weight value
* RET: SLURM_SUCCESS or error code
*/
static int _update_node_weight(char *node_names, uint32_t weight)
{
bitstr_t *node_bitmap = NULL, *tmp_bitmap;
list_itr_t *config_iterator;
config_record_t *config_ptr, *new_config_ptr, *first_new = NULL;
int rc, config_cnt, tmp_cnt;
rc = node_name2bitmap(node_names, false, &node_bitmap, NULL);
if (rc) {
info("_update_node_weight: invalid node_name");
return rc;
}
/* For each config_record with one of these nodes,
* update it (if all nodes updated) or split it into
* a new entry */
config_iterator = list_iterator_create(config_list);
while ((config_ptr = list_next(config_iterator))) {
if (config_ptr == first_new)
break; /* done with all original records */
tmp_bitmap = bit_copy(node_bitmap);
bit_and(tmp_bitmap, config_ptr->node_bitmap);
config_cnt = bit_set_count(config_ptr->node_bitmap);
tmp_cnt = bit_set_count(tmp_bitmap);
if (tmp_cnt == 0) {
/* no overlap, leave alone */
} else if (tmp_cnt == config_cnt) {
/* all nodes changed, update in situ */
config_ptr->weight = weight;
} else {
/* partial update, split config_record */
new_config_ptr = _dup_config(config_ptr);
if (first_new == NULL)
first_new = new_config_ptr;
/* Change weight for the given nodes */
new_config_ptr->weight = weight;
new_config_ptr->node_bitmap = bit_copy(tmp_bitmap);
new_config_ptr->nodes = bitmap2node_name(tmp_bitmap);
_update_config_ptr(tmp_bitmap, new_config_ptr);
/* Update remaining records */
bit_and_not(config_ptr->node_bitmap, tmp_bitmap);
xfree(config_ptr->nodes);
config_ptr->nodes = bitmap2node_name(
config_ptr->node_bitmap);
}
FREE_NULL_BITMAP(tmp_bitmap);
}
list_iterator_destroy(config_iterator);
FREE_NULL_BITMAP(node_bitmap);
info("_update_node_weight: nodes %s weight set to: %u",
node_names, weight);
return SLURM_SUCCESS;
}
static inline void _update_node_features_post(
char *node_names,
char **last_features, char *features,
bitstr_t **last_node_bitmap, bitstr_t **node_bitmap,
int mode, const char *type)
{
xassert(last_features);
xassert(last_node_bitmap);
xassert(node_bitmap);
if (mode == FEATURE_MODE_IND) {
debug2("%s: nodes %s %s features set to: %s",
__func__, node_names, type, features);
} else if (*last_features && *last_node_bitmap &&
((mode == FEATURE_MODE_PEND) ||
xstrcmp(features, *last_features))) {
char *last_node_names = bitmap2node_name(*last_node_bitmap);
debug2("%s: nodes %s %s features set to: %s",
__func__, last_node_names, type, *last_features);
xfree(last_node_names);
xfree(*last_features);
FREE_NULL_BITMAP(*last_node_bitmap);
}
if (mode == FEATURE_MODE_COMB) {
if (!*last_features) {
/* Start combining records */
*last_features = xstrdup(features);
*last_node_bitmap = *node_bitmap;
*node_bitmap = NULL;
} else {
/* Add this node to existing log info */
bit_or(*last_node_bitmap, *node_bitmap);
}
}
}
extern int update_node_active_features(char *node_names, char *active_features,
int mode)
{
static char *last_active_features = NULL;
static bitstr_t *last_node_bitmap = NULL;
bitstr_t *node_bitmap = NULL;
int rc;
if (mode < FEATURE_MODE_PEND) {
/* Perform update of node active features */
rc = node_name2bitmap(node_names, false, &node_bitmap, NULL);
if (rc) {
info("%s: invalid node_name (%s)", __func__,
node_names);
return rc;
}
node_features_update_list(active_feature_list, active_features,
node_bitmap);
(void) node_features_g_node_update(active_features,
node_bitmap);
}
_update_node_features_post(node_names,
&last_active_features, active_features,
&last_node_bitmap, &node_bitmap,
mode, "active");
FREE_NULL_BITMAP(node_bitmap);
return SLURM_SUCCESS;
}
extern int update_node_avail_features(char *node_names, char *avail_features,
int mode)
{
static char *last_avail_features = NULL;
static bitstr_t *last_node_bitmap = NULL;
bitstr_t *node_bitmap = NULL, *tmp_bitmap;
list_itr_t *config_iterator;
config_record_t *config_ptr, *new_config_ptr, *first_new = NULL;
int rc, config_cnt, tmp_cnt;
if (mode < FEATURE_MODE_PEND) {
rc = node_name2bitmap(node_names, false, &node_bitmap, NULL);
if (rc) {
info("%s: invalid node_name (%s)",
__func__, node_names);
return rc;
}
/*
* For each config_record with one of these nodes, update it
* (if all nodes updated) or split it into a new entry
*/
config_iterator = list_iterator_create(config_list);
while ((config_ptr = list_next(config_iterator))) {
if (config_ptr == first_new)
break; /* done with all original records */
tmp_bitmap = bit_copy(node_bitmap);
bit_and(tmp_bitmap, config_ptr->node_bitmap);
config_cnt = bit_set_count(config_ptr->node_bitmap);
tmp_cnt = bit_set_count(tmp_bitmap);
if (tmp_cnt == 0) {
/* no overlap, leave alone */
} else if (tmp_cnt == config_cnt) {
/* all nodes changed, update in situ */
xfree(config_ptr->feature);
if (avail_features && avail_features[0]) {
config_ptr->feature =
xstrdup(avail_features);
}
} else {
/* partial update, split config_record */
new_config_ptr = _dup_config(config_ptr);
if (first_new == NULL)
first_new = new_config_ptr;
xfree(new_config_ptr->feature);
if (avail_features && avail_features[0]) {
new_config_ptr->feature =
xstrdup(avail_features);
}
new_config_ptr->node_bitmap =
bit_copy(tmp_bitmap);
new_config_ptr->nodes =
bitmap2node_name(tmp_bitmap);
_update_config_ptr(tmp_bitmap, new_config_ptr);
/* Update remaining records */
bit_and_not(config_ptr->node_bitmap, tmp_bitmap);
xfree(config_ptr->nodes);
config_ptr->nodes = bitmap2node_name(
config_ptr->node_bitmap);
}
FREE_NULL_BITMAP(tmp_bitmap);
}
list_iterator_destroy(config_iterator);
if (avail_feature_list) { /* list not set at startup */
node_features_update_list(avail_feature_list,
avail_features, node_bitmap);
}
}
_update_node_features_post(node_names,
&last_avail_features, avail_features,
&last_node_bitmap, &node_bitmap,
mode, "available");
FREE_NULL_BITMAP(node_bitmap);
return SLURM_SUCCESS;
}
extern char *filter_out_changeable_features(const char *features)
{
char *conf_features = NULL, *tmp_feat, *tok, *saveptr;
if (!features)
return NULL;
tmp_feat = xstrdup(features);
for (tok = strtok_r(tmp_feat, ",", &saveptr); tok;
tok = strtok_r(NULL, ",", &saveptr)) {
if (node_features_g_changeable_feature(tok))
continue;
xstrfmtcat(conf_features, "%s%s",
conf_features ? "," : "", tok);
}
xfree(tmp_feat);
return conf_features;
}
extern void reset_node_active_features(node_record_t *node_ptr)
{
xassert(node_ptr);
xfree(node_ptr->features_act);
node_ptr->features_act =
filter_out_changeable_features(node_ptr->features);
update_node_active_features(node_ptr->name, node_ptr->features_act,
FEATURE_MODE_IND);
}
extern void reset_node_instance(node_record_t *node_ptr)
{
xassert(node_ptr);
xfree(node_ptr->instance_id);
xfree(node_ptr->instance_type);
}
/*
* _update_node_gres - Update generic resources associated with nodes
* build new config list records as needed
* IN node_names - list of nodes to update
* IN gres - New gres value
* RET: SLURM_SUCCESS or error code
*/
static int _update_node_gres(char *node_names, char *gres)
{
bitstr_t *changed_node_bitmap = NULL, *node_bitmap = NULL, *tmp_bitmap;
list_itr_t *config_iterator;
config_record_t *config_ptr, *new_config_ptr, *first_new = NULL;
node_record_t *node_ptr;
int rc, rc2, overlap1, overlap2;
rc = node_name2bitmap(node_names, false, &node_bitmap, NULL);
if (rc) {
info("%s: invalid node_name: %s", __func__, node_names);
return rc;
}
/*
* For each config_record with one of these nodes,
* update it (if all nodes updated) or split it into a new entry
*/
config_iterator = list_iterator_create(config_list);
while ((config_ptr = list_next(config_iterator))) {
if (config_ptr == first_new)
break; /* done with all original records */
overlap1 = bit_overlap(node_bitmap, config_ptr->node_bitmap);
if (overlap1 == 0)
continue; /* No changes to this config_record */
/* At least some nodes in this config need to change */
tmp_bitmap = bit_copy(node_bitmap);
bit_and(tmp_bitmap, config_ptr->node_bitmap);
for (int i = 0; (node_ptr = next_node_bitmap(tmp_bitmap, &i));
i++) {
rc2 = gres_node_reconfig(
node_ptr->name,
gres, &node_ptr->gres,
&node_ptr->gres_list,
slurm_conf.conf_flags & CONF_FLAG_OR,
node_ptr->cores,
node_ptr->tot_sockets);
if (rc2 != SLURM_SUCCESS) {
bit_clear(tmp_bitmap, i);
overlap1--;
if (rc == SLURM_SUCCESS)
rc = rc2;
}
gres_node_state_log(node_ptr->gres_list,
node_ptr->name);
}
overlap2 = bit_set_count(config_ptr->node_bitmap);
if (overlap1 == 0) {
/* No nodes actually changed in this configuration */
FREE_NULL_BITMAP(tmp_bitmap);
} else if (overlap1 == overlap2) {
/* All nodes changes in this configuration */
xfree(config_ptr->gres);
if (gres && gres[0])
config_ptr->gres = xstrdup(gres);
if (changed_node_bitmap) {
bit_or(changed_node_bitmap, tmp_bitmap);
FREE_NULL_BITMAP(tmp_bitmap);
} else {
changed_node_bitmap = tmp_bitmap;
tmp_bitmap = NULL;
}
} else {
/*
* Some nodes changes in this configuration.
* Split config_record in two.
*/
new_config_ptr = _dup_config(config_ptr);
if (!first_new)
first_new = new_config_ptr;
xfree(new_config_ptr->gres);
if (gres && gres[0])
new_config_ptr->gres = xstrdup(gres);
new_config_ptr->node_bitmap = tmp_bitmap;
new_config_ptr->nodes = bitmap2node_name(tmp_bitmap);
_update_config_ptr(tmp_bitmap, new_config_ptr);
if (changed_node_bitmap) {
bit_or(changed_node_bitmap, tmp_bitmap);
} else {
changed_node_bitmap = bit_copy(tmp_bitmap);
}
/* Update remaining config_record */
bit_and_not(config_ptr->node_bitmap, tmp_bitmap);
xfree(config_ptr->nodes);
config_ptr->nodes = bitmap2node_name(
config_ptr->node_bitmap);
tmp_bitmap = NULL; /* Nothing left to free */
}
}
list_iterator_destroy(config_iterator);
FREE_NULL_BITMAP(node_bitmap);
/* Report changes nodes, may be subset of requested nodes */
if (changed_node_bitmap) {
char *change_node_str = bitmap2node_name(changed_node_bitmap);
info("%s: nodes %s gres set to: %s", __func__,
change_node_str, gres);
FREE_NULL_BITMAP(changed_node_bitmap);
xfree(change_node_str);
}
return rc;
}
/* Reset the config pointer for updated jobs */
static void _update_config_ptr(bitstr_t *bitmap, config_record_t *config_ptr)
{
node_record_t *node_ptr;
for (int i = 0; (node_ptr = next_node_bitmap(bitmap, &i)); i++) {
node_ptr->config_ptr = config_ptr;
}
}
static void _drain_node(node_record_t *node_ptr, char *reason,
uint32_t reason_uid)
{
time_t now = time(NULL);
xassert(node_ptr);
if (IS_NODE_DRAIN(node_ptr)) {
/* state already changed, nothing to do */
return;
}
trigger_node_draining(node_ptr);
node_ptr->node_state |= NODE_STATE_DRAIN;
bit_clear(avail_node_bitmap, node_ptr->index);
info("drain_nodes: node %s state set to DRAIN",
node_ptr->name);
if ((node_ptr->reason == NULL) ||
(xstrncmp(node_ptr->reason, "Not responding", 14) == 0)) {
xfree(node_ptr->reason);
node_ptr->reason = xstrdup(reason);
node_ptr->reason_time = now;
node_ptr->reason_uid = reason_uid;
}
if ((node_ptr->run_job_cnt == 0) &&
(node_ptr->comp_job_cnt == 0)) {
/* no jobs, node is drained */
trigger_node_drained(node_ptr);
clusteracct_storage_g_node_down(acct_db_conn,
node_ptr, now, NULL,
reason_uid);
}
}
/*
* drain_nodes - drain one or more nodes,
* no-op for nodes already drained or draining
* IN nodes - nodes to drain
* IN reason - reason to drain the nodes
* RET SLURM_SUCCESS or error code
* global: node_record_table_ptr - pointer to global node table
*/
extern int drain_nodes(char *nodes, char *reason, uint32_t reason_uid)
{
int error_code = SLURM_SUCCESS;
node_record_t *node_ptr;
char *this_node_name ;
hostlist_t *host_list;
if ((nodes == NULL) || (nodes[0] == '\0')) {
error ("drain_nodes: invalid node name %s", nodes);
return ESLURM_INVALID_NODE_NAME;
}
if ( (host_list = hostlist_create (nodes)) == NULL) {
error ("hostlist_create error on %s: %m", nodes);
return ESLURM_INVALID_NODE_NAME;
}
while ( (this_node_name = hostlist_shift (host_list)) ) {
if (!(node_ptr = find_node_record(this_node_name))) {
error_code = ESLURM_INVALID_NODE_NAME;
error("drain_nodes: node %s does not exist",
this_node_name);
free(this_node_name);
break;
}
free (this_node_name);
_drain_node(node_ptr, reason, reason_uid);
}
last_node_update = time (NULL);
hostlist_destroy (host_list);
/*
* check all reservations since nodes may have been in a reservation with
* floating count of nodes that needs to be updated
*/
validate_all_reservations(false, false);
return error_code;
}
/* Return true if admin request to change node state from old to new is valid */
static bool _valid_node_state_change(uint32_t old, uint32_t new)
{
uint32_t base_state, node_flags;
static bool power_save_on = false;
static time_t sched_update = 0;
if (old == new)
return true;
base_state = old & NODE_STATE_BASE;
node_flags = old & NODE_STATE_FLAGS;
if (sched_update != slurm_conf.last_update) {
power_save_on = power_save_test();
sched_update = slurm_conf.last_update;
}
switch (new) {
case NODE_STATE_DOWN:
case NODE_STATE_DRAIN:
case NODE_STATE_FAIL:
case NODE_STATE_NO_RESPOND:
return true;
case NODE_STATE_UNDRAIN:
if (!(node_flags & NODE_STATE_INVALID_REG))
return true;
break;
case NODE_STATE_POWER_DOWN:
case NODE_STATE_POWER_UP:
case (NODE_STATE_POWER_DOWN | NODE_STATE_POWER_UP):
case (NODE_STATE_POWER_DOWN | NODE_STATE_POWERED_DOWN):
case (NODE_STATE_POWER_DOWN | NODE_STATE_POWER_DRAIN):
if (power_save_on)
return true;
info("attempt to do power work on node but PowerSave is disabled");
break;
case NODE_RESUME:
if (node_flags & NODE_STATE_POWERING_DOWN)
return true;
if (node_flags & NODE_STATE_INVALID_REG)
return false;
if ((base_state == NODE_STATE_DOWN) ||
(base_state == NODE_STATE_FUTURE) ||
(node_flags & NODE_STATE_DRAIN) ||
(node_flags & NODE_STATE_FAIL) ||
(node_flags & NODE_STATE_REBOOT_REQUESTED))
return true;
break;
case NODE_STATE_REBOOT_CANCEL:
if (node_flags & NODE_STATE_REBOOT_REQUESTED)
return true;
break;
case NODE_STATE_FUTURE:
if ((base_state == NODE_STATE_DOWN) ||
(base_state == NODE_STATE_IDLE))
return true;
break;
case NODE_STATE_IDLE:
if (!(node_flags & NODE_STATE_INVALID_REG) &&
((base_state == NODE_STATE_DOWN) ||
(base_state == NODE_STATE_IDLE)))
return true;
break;
case NODE_STATE_ALLOCATED:
if (base_state == NODE_STATE_ALLOCATED)
return true;
break;
default: /* All others invalid */
break;
}
return false;
}
extern int update_node_record_acct_gather_data(
acct_gather_node_resp_msg_t *msg)
{
node_record_t *node_ptr;
node_ptr = find_node_record(msg->node_name);
if (node_ptr == NULL)
return ENOENT;
memcpy(node_ptr->energy, msg->energy, sizeof(acct_gather_energy_t));
return SLURM_SUCCESS;
}
/* A node's socket/core configuration has changed could be due to KNL NUMA
* mode change and reboot. Update this node's config record, splitting an
* existing record if needed. */
static void _split_node_config(node_record_t *node_ptr,
slurm_node_registration_status_msg_t *reg_msg)
{
config_record_t *config_ptr, *new_config_ptr;
if (!node_ptr)
return;
config_ptr = node_ptr->config_ptr;
if (!config_ptr)
return;
if ((bit_set_count(config_ptr->node_bitmap) > 1) &&
bit_test(config_ptr->node_bitmap, node_ptr->index)) {
new_config_ptr = create_config_record();
memcpy(new_config_ptr, config_ptr, sizeof(config_record_t));
new_config_ptr->cpu_spec_list =
xstrdup(config_ptr->cpu_spec_list);
new_config_ptr->feature = xstrdup(config_ptr->feature);
new_config_ptr->gres = xstrdup(config_ptr->gres);
new_config_ptr->topology_str =
xstrdup(config_ptr->topology_str);
bit_clear(config_ptr->node_bitmap, node_ptr->index);
xfree(config_ptr->nodes);
config_ptr->nodes = bitmap2node_name(config_ptr->node_bitmap);
new_config_ptr->node_bitmap = bit_alloc(node_record_count);
bit_set(new_config_ptr->node_bitmap, node_ptr->index);
new_config_ptr->nodes = xstrdup(node_ptr->name);
node_ptr->config_ptr = new_config_ptr;
config_ptr = new_config_ptr;
}
config_ptr->cores = reg_msg->cores;
config_ptr->tot_sockets = reg_msg->sockets;
}
static int _set_gpu_spec(node_record_t *node_ptr, char **reason_down)
{
static uint32_t gpu_plugin_id = NO_VAL;
gres_state_t *gres_state_node;
gres_node_state_t *gres_ns;
uint32_t res_cnt = node_ptr->res_cores_per_gpu;
xassert(reason_down);
xfree(node_ptr->gpu_spec);
FREE_NULL_BITMAP(node_ptr->gpu_spec_bitmap);
if (!res_cnt)
return SLURM_SUCCESS;
if (gpu_plugin_id == NO_VAL)
gpu_plugin_id = gres_build_id("gpu");
if (!(gres_state_node = list_find_first(node_ptr->gres_list,
gres_find_id,
&gpu_plugin_id))) {
/* No GPUs but we thought there were */
xstrfmtcat(*reason_down, "%sRestrictedCoresPerGPU=%u but no gpus on node %s",
*reason_down ? ", " : "", res_cnt, node_ptr->name);
return ESLURM_RES_CORES_PER_GPU_NO;
}
gres_ns = gres_state_node->gres_data;
if (!gres_ns->topo_cnt || !gres_ns->topo_core_bitmap) {
xstrfmtcat(*reason_down, "%sRestrictedCoresPerGPU=%u but the gpus given don't have any topology on node %s.",
*reason_down ? ", " : "", res_cnt, node_ptr->name);
return ESLURM_RES_CORES_PER_GPU_TOPO;
}
if (!gres_ns->topo_res_core_bitmap)
gres_ns->topo_res_core_bitmap = xcalloc(gres_ns->topo_cnt,
sizeof(bitstr_t *));
node_ptr->gpu_spec_bitmap = bit_alloc(node_ptr->tot_cores);
for (int i = 0; i < gres_ns->topo_cnt; i++) {
int cnt = 0;
uint32_t this_gpu_res_cnt;
if (!gres_ns->topo_core_bitmap[i])
continue;
FREE_NULL_BITMAP(gres_ns->topo_res_core_bitmap[i]);
gres_ns->topo_res_core_bitmap[i] =
bit_alloc(node_ptr->tot_cores);
this_gpu_res_cnt = res_cnt * gres_ns->topo_gres_cnt_avail[i];
/* info("%d has %s", i, */
/* bit_fmt_full(gres_ns->topo_core_bitmap[i])); */
for (int j = 0; j < node_ptr->tot_cores; j++) {
/* Skip general spec cores */
if (node_ptr->node_spec_bitmap &&
!bit_test(node_ptr->node_spec_bitmap, j))
continue;
/* Only look at the ones set */
if (!bit_test(gres_ns->topo_core_bitmap[i], j))
continue;
/* Skip any already set */
if (bit_test(node_ptr->gpu_spec_bitmap, j))
continue;
/* info("setting %d", j); */
bit_set(node_ptr->gpu_spec_bitmap, j);
bit_set(gres_ns->topo_res_core_bitmap[i], j);
if (++cnt >= this_gpu_res_cnt)
break;
}
if (cnt != this_gpu_res_cnt) {
FREE_NULL_BITMAP(node_ptr->gpu_spec_bitmap);
xstrfmtcat(*reason_down, "%sRestrictedCoresPerGPU: We can't restrict %u core(s) per gpu. GPU %s(%d) doesn't have access to that many unique cores (%d).",
*reason_down ? ", " : "",
res_cnt, gres_ns->topo_type_name[i], i, cnt);
return ESLURM_RES_CORES_PER_GPU_UNIQUE;
}
}
/*
* We want the opposite of the possible cores so
* we can do a simple & on it when selecting.
*/
/* info("set %s", bit_fmt_full(node_ptr->gpu_spec_bitmap)); */
node_ptr->gpu_spec = bit_fmt_full(node_ptr->gpu_spec_bitmap);
bit_not(node_ptr->gpu_spec_bitmap);
/* info("sending back %s", bit_fmt_full(node_ptr->gpu_spec_bitmap)); */
return SLURM_SUCCESS;
}
/*
* validate_node_specs - validate the node's specifications as valid,
* if not set state to down, in any case update last_response
* IN slurm_msg - get node registration message it
* OUT newly_up - set if node newly brought into service
* RET 0 if no error, ENOENT if no such node, EINVAL if values too low
*/
extern int validate_node_specs(slurm_msg_t *slurm_msg, bool *newly_up)
{
int error_code;
config_record_t *config_ptr;
node_record_t *node_ptr;
char *reason_down = NULL;
char *orig_features = NULL, *orig_features_act = NULL;
uint32_t node_flags;
time_t now = time(NULL);
bool orig_node_avail;
bool update_db = false;
bool was_invalid_reg, was_powering_up = false, was_powered_down = false;
static int node_features_cnt = -1;
int sockets1, sockets2; /* total sockets on node */
int cores1, cores2; /* total cores on node */
int threads1, threads2; /* total threads on node */
static time_t sched_update = 0;
static double conf_node_reg_mem_percent = -1;
xassert(verify_lock(CONF_LOCK, READ_LOCK));
slurm_node_registration_status_msg_t *reg_msg = slurm_msg->data;
node_ptr = find_node_record(reg_msg->node_name);
if (node_ptr == NULL)
return ENOENT;
debug3("%s: validating nodes %s in state: %s",
__func__, reg_msg->node_name,
node_state_string(node_ptr->node_state));
if (sched_update != slurm_conf.last_update) {
char *tmp_ptr;
if ((tmp_ptr = conf_get_opt_str(slurm_conf.slurmctld_params,
"node_reg_mem_percent="))) {
if (s_p_handle_double(&conf_node_reg_mem_percent,
"node_reg_mem_percent",
tmp_ptr))
conf_node_reg_mem_percent = -1;
sched_update = slurm_conf.last_update;
xfree(tmp_ptr);
}
}
orig_node_avail = bit_test(avail_node_bitmap, node_ptr->index);
config_ptr = node_ptr->config_ptr;
error_code = SLURM_SUCCESS;
node_ptr->protocol_version = slurm_msg->protocol_version;
xfree(node_ptr->version);
node_ptr->version = reg_msg->version;
reg_msg->version = NULL;
if (waiting_for_node_boot(node_ptr) ||
waiting_for_node_power_down(node_ptr))
return SLURM_SUCCESS;
bit_clear(booting_node_bitmap, node_ptr->index);
if (node_features_cnt == -1)
node_features_cnt = node_features_g_count();
if (reg_msg->features_avail || reg_msg->features_active) {
orig_features = xstrdup(node_ptr->features);
if (node_ptr->features_act)
orig_features_act = xstrdup(node_ptr->features_act);
else
orig_features_act = xstrdup(node_ptr->features);
}
if (reg_msg->features_avail) {
if (reg_msg->features_active && !node_ptr->features_act) {
node_ptr->features_act = node_ptr->features;
node_ptr->features = NULL;
} else {
xfree(node_ptr->features);
}
node_ptr->features = node_features_g_node_xlate(
reg_msg->features_avail,
orig_features, orig_features,
node_ptr->index);
/* Only update if there was a change */
if (xstrcmp(node_ptr->features, orig_features))
(void) update_node_avail_features(node_ptr->name,
node_ptr->features,
FEATURE_MODE_IND);
}
if (reg_msg->features_active) {
if (!_valid_reported_active_features(reg_msg->features_active,
node_ptr->features_act)) {
char *active_changeable_features =
_node_changeable_features(
node_ptr->features_act);
debug("Node %s reported active features (%s) are not a super set of node's active changeable features (%s)",
reg_msg->node_name,
reg_msg->features_active,
active_changeable_features);
error_code = EINVAL;
xstrfmtcat(reason_down, "%sReported active features (%s) are not a superset of currently active changeable features (%s)",
reason_down ? ", " : "",
reg_msg->features_active,
active_changeable_features);
xfree(active_changeable_features);
} else {
char *tmp_feature;
tmp_feature =
node_features_g_node_xlate(
reg_msg->features_active,
orig_features_act, orig_features,
node_ptr->index);
xfree(node_ptr->features_act);
node_ptr->features_act = tmp_feature;
(void) update_node_active_features(
node_ptr->name, node_ptr->features_act,
FEATURE_MODE_IND);
}
}
xfree(orig_features);
xfree(orig_features_act);
sockets1 = reg_msg->sockets;
cores1 = sockets1 * reg_msg->cores;
threads1 = cores1 * reg_msg->threads;
if (gres_node_config_unpack(reg_msg->gres_info,
node_ptr->name) != SLURM_SUCCESS) {
error_code = SLURM_ERROR;
xstrcat(reason_down, "Could not unpack gres data");
} else if (gres_node_config_validate(node_ptr, reg_msg->threads,
reg_msg->cores, reg_msg->sockets,
(slurm_conf.conf_flags &
CONF_FLAG_OR),
&reason_down) != SLURM_SUCCESS) {
error_code = EINVAL;
/* reason_down set in function above */
}
gres_node_state_log(node_ptr->gres_list, node_ptr->name);
if (node_ptr->res_cores_per_gpu) {
/*
* We need to make gpu_spec_bitmap now that we know the cores
* used per gres.
*/
if (_set_gpu_spec(node_ptr, &reason_down))
error_code = EINVAL;
/* reason_down set in function above */
} else {
FREE_NULL_BITMAP(node_ptr->gpu_spec_bitmap);
}
if (!(slurm_conf.conf_flags & CONF_FLAG_OR)) {
/* sockets1, cores1, and threads1 are set above */
sockets2 = config_ptr->tot_sockets;
cores2 = sockets2 * config_ptr->cores;
threads2 = cores2 * config_ptr->threads;
if (threads1 < threads2) {
debug("Node %s has low socket*core*thread count "
"(%d < %d)",
reg_msg->node_name, threads1, threads2);
error_code = EINVAL;
if (reason_down)
xstrcat(reason_down, ", ");
xstrcat(reason_down, "Low socket*core*thread count");
}
if (reg_msg->cpus < config_ptr->cpus) {
debug("Node %s has low cpu count (%u < %u)",
reg_msg->node_name, reg_msg->cpus,
config_ptr->cpus);
error_code = EINVAL;
if (reason_down)
xstrcat(reason_down, ", ");
xstrcat(reason_down, "Low CPUs");
}
if ((error_code == SLURM_SUCCESS) && running_cons_tres() &&
(node_features_cnt > 0) &&
(reg_msg->sockets != config_ptr->tot_sockets) &&
(reg_msg->cores != config_ptr->cores) &&
((reg_msg->sockets * reg_msg->cores) ==
(config_ptr->tot_sockets * config_ptr->cores))) {
_split_node_config(node_ptr, reg_msg);
}
}
if (reg_msg->boards > reg_msg->sockets) {
error("Node %s has more boards than sockets (%u > %u), setting board count to 1",
reg_msg->node_name, reg_msg->boards, reg_msg->sockets);
reg_msg->boards = 1;
}
if (!(slurm_conf.conf_flags & CONF_FLAG_OR)) {
double node_reg_mem_percent;
if (conf_node_reg_mem_percent == -1) {
if (IS_NODE_CLOUD(node_ptr))
node_reg_mem_percent =
DEFAULT_CLOUD_REG_MEM_PERCENT;
else
node_reg_mem_percent =
DEFAULT_NODE_REG_MEM_PERCENT;
} else
node_reg_mem_percent = conf_node_reg_mem_percent;
if (config_ptr->real_memory &&
((((double)reg_msg->real_memory /
config_ptr->real_memory) * 100) <
node_reg_mem_percent)) {
debug("Node %s has low real_memory size (%"PRIu64" / %"PRIu64") < %.2f%%",
reg_msg->node_name, reg_msg->real_memory,
config_ptr->real_memory, node_reg_mem_percent);
error_code = EINVAL;
if (reason_down)
xstrcat(reason_down, ", ");
xstrfmtcat(reason_down, "Low RealMemory (reported:%"PRIu64" < %.2f%% of configured:%"PRIu64")",
reg_msg->real_memory, node_reg_mem_percent,
config_ptr->real_memory);
}
if (reg_msg->tmp_disk < config_ptr->tmp_disk) {
debug("Node %s has low tmp_disk size (%u < %u)",
reg_msg->node_name, reg_msg->tmp_disk,
config_ptr->tmp_disk);
error_code = EINVAL;
if (reason_down)
xstrcat(reason_down, ", ");
xstrcat(reason_down, "Low TmpDisk");
}
}
if (reg_msg->cpu_spec_list) {
bitstr_t *node_spec_bitmap_old = node_ptr->node_spec_bitmap;
char *cpu_spec_list_old = node_ptr->cpu_spec_list;
node_ptr->node_spec_bitmap = NULL;
node_ptr->cpu_spec_list = reg_msg->cpu_spec_list;
reg_msg->cpu_spec_list = NULL; /* Nothing left to free */
if (build_node_spec_bitmap(node_ptr) != SLURM_SUCCESS)
error_code = EINVAL;
else if (!(slurm_conf.task_plugin_param &
SLURMD_SPEC_OVERRIDE) &&
(!node_spec_bitmap_old ||
!bit_equal(node_spec_bitmap_old,
node_ptr->node_spec_bitmap))) {
debug("Node %s has different spec CPUs than expected (%s, %s)",
reg_msg->node_name, cpu_spec_list_old,
node_ptr->cpu_spec_list);
error_code = EINVAL;
if (reason_down)
xstrcat(reason_down, ", ");
xstrcat(reason_down, "CoreSpec differ");
}
/* Regenerate core spec count and effective cpus */
if (node_ptr->cpu_spec_list &&
(slurm_conf.task_plugin_param & SLURMD_SPEC_OVERRIDE)) {
if (node_ptr->cpu_spec_list) {
build_node_spec_bitmap(node_ptr);
node_conf_convert_cpu_spec_list(node_ptr);
} else if (node_ptr->core_spec_cnt) {
node_conf_select_spec_cores(node_ptr);
}
node_ptr->cpus_efctv =
node_ptr->cpus -
(node_ptr->core_spec_cnt * node_ptr->tpc);
}
xfree(cpu_spec_list_old);
FREE_NULL_BITMAP(node_spec_bitmap_old);
}
if ((slurm_conf.task_plugin_param & SLURMD_SPEC_OVERRIDE) &&
reg_msg->mem_spec_limit) {
node_ptr->mem_spec_limit = reg_msg->mem_spec_limit;
}
xfree(node_ptr->arch);
node_ptr->arch = reg_msg->arch;
reg_msg->arch = NULL; /* Nothing left to free */
xfree(node_ptr->os);
node_ptr->os = reg_msg->os;
reg_msg->os = NULL; /* Nothing left to free */
if (node_ptr->cpu_load != reg_msg->cpu_load) {
node_ptr->cpu_load = reg_msg->cpu_load;
node_ptr->cpu_load_time = now;
last_node_update = now;
}
if (node_ptr->free_mem != reg_msg->free_mem) {
node_ptr->free_mem = reg_msg->free_mem;
node_ptr->free_mem_time = now;
last_node_update = now;
}
if (node_ptr->last_response &&
(node_ptr->boot_time > node_ptr->last_response) &&
!IS_NODE_UNKNOWN(node_ptr)) { /* Node just rebooted */
(void) node_features_g_get_node(node_ptr->name);
}
if (IS_NODE_NO_RESPOND(node_ptr) ||
IS_NODE_POWERING_UP(node_ptr) ||
IS_NODE_POWERING_DOWN(node_ptr) ||
IS_NODE_POWERED_DOWN(node_ptr)) {
was_powered_down = IS_NODE_POWERED_DOWN(node_ptr);
info("Node %s now responding", node_ptr->name);
/*
* Set last_busy in case that the node came up out of band or
* came up after ResumeTimeout so that it can be suspended at a
* later point.
*/
if (IS_NODE_POWERING_UP(node_ptr) ||
IS_NODE_POWERED_DOWN(node_ptr))
node_ptr->last_busy = now;
/*
* Set last_response if it's expected. Otherwise let it get
* marked at "unexpectedly rebooted". Not checked with
* IS_NODE_POWERED_DOWN() above to allow ReturnToService !=2
* catch nodes [re]booting unexpectedly.
*/
if (IS_NODE_POWERING_UP(node_ptr)) {
node_ptr->last_response = now;
was_powering_up = true;
}
node_ptr->node_state &= (~NODE_STATE_NO_RESPOND);
node_ptr->node_state &= (~NODE_STATE_POWERING_UP);
node_ptr->node_state &= (~NODE_STATE_POWERED_DOWN);
node_ptr->node_state &= (~NODE_STATE_POWERING_DOWN);
if (!is_node_in_maint_reservation(node_ptr->index))
node_ptr->node_state &= (~NODE_STATE_MAINT);
bit_clear(power_down_node_bitmap, node_ptr->index);
bit_set(power_up_node_bitmap, node_ptr->index);
last_node_update = now;
if (was_powered_down)
clusteracct_storage_g_node_up(acct_db_conn, node_ptr,
now);
}
if (reg_msg->extra) {
data_t *data = NULL;
if (extra_constraints_enabled() && reg_msg->extra[0] &&
serialize_g_string_to_data(&data, reg_msg->extra,
strlen(reg_msg->extra),
MIME_TYPE_JSON)) {
info("Failed to decode extra \"%s\" for node %s",
reg_msg->extra, node_ptr->name);
}
FREE_NULL_DATA(node_ptr->extra_data);
node_ptr->extra_data = data;
/*
* Always set the extra field from the registration message,
* even if decoding failed.
*/
xfree(node_ptr->extra);
if (reg_msg->extra[0]) {
node_ptr->extra = xstrdup(reg_msg->extra);
/*
* Skip db updates for extra field changes,
* otherwise we'll overwhelm it with event records
* if someone is updating these constantly.
*/
// update_db = true;
}
}
if (reg_msg->instance_id) {
xfree(node_ptr->instance_id);
if (reg_msg->instance_id[0]) {
node_ptr->instance_id = xstrdup(reg_msg->instance_id);
update_db = true;
}
}
if (reg_msg->instance_type) {
xfree(node_ptr->instance_type);
if (reg_msg->instance_type[0]) {
node_ptr->instance_type =
xstrdup(reg_msg->instance_type);
update_db = true;
}
}
if (update_db)
clusteracct_storage_g_node_update(acct_db_conn, node_ptr);
was_invalid_reg = IS_NODE_INVALID_REG(node_ptr);
node_ptr->node_state &= ~NODE_STATE_INVALID_REG;
node_flags = node_ptr->node_state & NODE_STATE_FLAGS;
if (error_code) {
node_ptr->node_state |= NODE_STATE_INVALID_REG;
if (!was_invalid_reg) {
error("Setting node %s state to INVAL with reason:%s",
reg_msg->node_name, reason_down);
if (was_powering_up || was_powered_down)
kill_running_job_by_node_ptr(node_ptr);
}
if (!IS_NODE_DOWN(node_ptr)
&& !IS_NODE_DRAIN(node_ptr)
&& ! IS_NODE_FAIL(node_ptr)) {
drain_nodes(reg_msg->node_name, reason_down,
slurm_conf.slurm_user_id);
} else if (xstrcmp(node_ptr->reason, reason_down)) {
if (was_invalid_reg) {
error("Setting node %s state to INVAL with reason:%s",
reg_msg->node_name, reason_down);
}
xfree(node_ptr->reason);
set_node_reason(node_ptr, reason_down, now);
}
last_node_update = time (NULL);
} else if (reg_msg->status == ESLURMD_PROLOG_FAILED
|| reg_msg->status == ESLURMD_SETUP_ENVIRONMENT_ERROR) {
if (!IS_NODE_DRAIN(node_ptr) && !IS_NODE_FAIL(node_ptr)) {
char *reason;
error("%s: Prolog or job env setup failure on node %s, "
"draining the node",
__func__, reg_msg->node_name);
if (reg_msg->status == ESLURMD_PROLOG_FAILED)
reason = "Prolog error";
else
reason = "Job env setup error";
drain_nodes(reg_msg->node_name, reason,
slurm_conf.slurm_user_id);
last_node_update = time (NULL);
}
} else {
if (IS_NODE_UNKNOWN(node_ptr) || IS_NODE_FUTURE(node_ptr)) {
bool was_future = IS_NODE_FUTURE(node_ptr);
debug("validate_node_specs: node %s registered with "
"%u jobs",
reg_msg->node_name,reg_msg->job_count);
if (IS_NODE_FUTURE(node_ptr)) {
if (IS_NODE_MAINT(node_ptr) &&
!is_node_in_maint_reservation(
node_ptr->index))
node_flags &= (~NODE_STATE_MAINT);
node_flags &= (~NODE_STATE_REBOOT_REQUESTED);
node_flags &= (~NODE_STATE_REBOOT_ISSUED);
}
if (reg_msg->job_count) {
node_ptr->node_state = NODE_STATE_ALLOCATED |
node_flags;
} else {
node_ptr->node_state = NODE_STATE_IDLE |
node_flags;
node_ptr->last_busy = now;
}
last_node_update = now;
/* don't send this on a slurmctld unless needed */
if (was_future || /* always send FUTURE checkins */
(slurmctld_init_db &&
!IS_NODE_DRAIN(node_ptr) &&
!IS_NODE_FAIL(node_ptr))) {
/* reason information is handled in
clusteracct_storage_g_node_up()
*/
clusteracct_storage_g_node_up(
acct_db_conn, node_ptr, now);
}
} else if (IS_NODE_DOWN(node_ptr) &&
((slurm_conf.ret2service == 2) ||
IS_NODE_REBOOT_ISSUED(node_ptr) ||
((slurm_conf.ret2service == 1) &&
!xstrcmp(node_ptr->reason, "Not responding") &&
(node_ptr->boot_time <
node_ptr->last_response)))) {
node_flags &= (~NODE_STATE_REBOOT_ISSUED);
if (node_ptr->next_state != NO_VAL)
node_flags &= (~NODE_STATE_DRAIN);
if ((node_ptr->next_state & NODE_STATE_BASE) ==
NODE_STATE_DOWN) {
node_ptr->node_state = NODE_STATE_DOWN |
node_flags;
set_node_reason(node_ptr, "reboot complete",
now);
} else if (reg_msg->job_count) {
node_ptr->node_state = NODE_STATE_ALLOCATED |
node_flags;
} else {
node_ptr->node_state = NODE_STATE_IDLE |
node_flags;
node_ptr->last_busy = now;
}
node_ptr->next_state = NO_VAL;
node_ptr->resume_after = 0;
bit_clear(rs_node_bitmap, node_ptr->index);
bit_clear(asap_node_bitmap, node_ptr->index);
info("node %s returned to service",
reg_msg->node_name);
trigger_node_up(node_ptr);
last_node_update = now;
if (!IS_NODE_DRAIN(node_ptr)
&& !IS_NODE_DOWN(node_ptr)
&& !IS_NODE_FAIL(node_ptr)) {
/* reason information is handled in
* clusteracct_storage_g_node_up() */
clusteracct_storage_g_node_up(
acct_db_conn, node_ptr, now);
}
} else if (!IS_NODE_DRAINED(node_ptr) &&
!IS_NODE_MAINT(node_ptr) &&
node_ptr->last_response &&
(node_ptr->boot_time > node_ptr->last_response) &&
(slurm_conf.ret2service != 2)) {
if (!node_ptr->reason ||
(node_ptr->reason &&
(!xstrcmp(node_ptr->reason, "Not responding") ||
IS_NODE_REBOOT_REQUESTED(node_ptr)))) {
xfree(node_ptr->reason);
node_ptr->reason_time = now;
node_ptr->reason_uid = slurm_conf.slurm_user_id;
node_ptr->reason = xstrdup(
"Node unexpectedly rebooted");
}
/* If a reboot was requested, cancel it. */
if (IS_NODE_REBOOT_REQUESTED(node_ptr)) {
node_ptr->node_state &=
(~NODE_STATE_REBOOT_REQUESTED);
if ((node_ptr->next_state & NODE_STATE_FLAGS) &
NODE_STATE_UNDRAIN)
/*
* Not using _undo_reboot_asap() so that
* the reason is preserved.
*/
node_ptr->node_state &=
(~NODE_STATE_DRAIN);
bit_clear(rs_node_bitmap, node_ptr->index);
bit_clear(asap_node_bitmap, node_ptr->index);
}
info("%s: Node %s unexpectedly rebooted boot_time=%ld last response=%ld",
__func__, reg_msg->node_name, node_ptr->boot_time,
node_ptr->last_response);
_make_node_down(node_ptr, now);
kill_running_job_by_node_ptr(node_ptr);
last_node_update = now;
reg_msg->job_count = 0;
} else if (IS_NODE_ALLOCATED(node_ptr) &&
(reg_msg->job_count == 0)) { /* job vanished */
node_ptr->node_state = NODE_STATE_IDLE | node_flags;
node_ptr->last_busy = now;
last_node_update = now;
} else if (IS_NODE_COMPLETING(node_ptr) &&
(reg_msg->job_count == 0)) { /* job already done */
node_ptr->node_state &= (~NODE_STATE_COMPLETING);
last_node_update = now;
bit_clear(cg_node_bitmap, node_ptr->index);
} else if (IS_NODE_IDLE(node_ptr) &&
(reg_msg->job_count != 0)) {
if (node_ptr->run_job_cnt != 0) {
node_ptr->node_state = NODE_STATE_ALLOCATED |
node_flags;
error("Invalid state for node %s, was IDLE "
"with %u running jobs",
node_ptr->name, reg_msg->job_count);
}
/*
* there must be completing job(s) on this node since
* reg_msg->job_count was set (run_job_cnt +
* comp_job_cnt) in validate_jobs_on_node()
*/
if (node_ptr->comp_job_cnt != 0) {
node_ptr->node_state |= NODE_STATE_COMPLETING;
bit_set(cg_node_bitmap, node_ptr->index);
}
last_node_update = now;
}
if (IS_NODE_IDLE(node_ptr)) {
node_ptr->owner = NO_VAL;
xfree(node_ptr->mcs_label);
}
_sync_bitmaps(node_ptr, reg_msg->job_count);
}
xfree(reason_down);
if (reg_msg->energy)
memcpy(node_ptr->energy, reg_msg->energy,
sizeof(acct_gather_energy_t));
node_ptr->last_response = now;
node_ptr->boot_req_time = (time_t) 0;
node_ptr->power_save_req_time = (time_t) 0;
*newly_up = (!orig_node_avail &&
bit_test(avail_node_bitmap, node_ptr->index));
if (IS_NODE_CLOUD(node_ptr) ||
IS_NODE_DYNAMIC_FUTURE(node_ptr) ||
IS_NODE_DYNAMIC_NORM(node_ptr)) {
/* Get IP of slurmd */
char *comm_name = _get_msg_hostname(slurm_msg);
set_node_comm_name(node_ptr, comm_name, reg_msg->hostname);
xfree(comm_name);
}
if (was_powering_up || was_powered_down)
log_flag(POWER, "Node %s/%s/%s powered up with instance_id=%s, instance_type=%s",
node_ptr->name, node_ptr->node_hostname,
node_ptr->comm_name, reg_msg->instance_id,
reg_msg->instance_type);
return error_code;
}
/* Sync idle, share, and avail_node_bitmaps for a given node */
static void _sync_bitmaps(node_record_t *node_ptr, int job_count)
{
if (job_count == 0) {
bit_set (idle_node_bitmap, node_ptr->index);
bit_set (share_node_bitmap, node_ptr->index);
}
if (IS_NODE_DOWN(node_ptr) || IS_NODE_DRAIN(node_ptr) ||
IS_NODE_FAIL(node_ptr) || IS_NODE_NO_RESPOND(node_ptr))
bit_clear (avail_node_bitmap, node_ptr->index);
else
make_node_avail(node_ptr);
if (IS_NODE_DOWN(node_ptr))
bit_clear (up_node_bitmap, node_ptr->index);
else
bit_set (up_node_bitmap, node_ptr->index);
}
static void _node_did_resp(node_record_t *node_ptr)
{
uint32_t node_flags;
time_t now = time(NULL);
if (waiting_for_node_boot(node_ptr) ||
waiting_for_node_power_down(node_ptr) ||
IS_NODE_FUTURE(node_ptr))
return;
node_ptr->last_response = now;
if (IS_NODE_NO_RESPOND(node_ptr) || IS_NODE_POWERING_UP(node_ptr)) {
info("Node %s now responding", node_ptr->name);
node_ptr->node_state &= (~NODE_STATE_NO_RESPOND);
node_ptr->node_state &= (~NODE_STATE_POWERING_UP);
if (!is_node_in_maint_reservation(node_ptr->index))
node_ptr->node_state &= (~NODE_STATE_MAINT);
last_node_update = now;
}
node_flags = node_ptr->node_state & NODE_STATE_FLAGS;
if (IS_NODE_UNKNOWN(node_ptr)) {
node_ptr->last_busy = now;
if (node_ptr->run_job_cnt) {
node_ptr->node_state = NODE_STATE_ALLOCATED |
node_flags;
} else
node_ptr->node_state = NODE_STATE_IDLE | node_flags;
last_node_update = now;
if (!IS_NODE_DRAIN(node_ptr) && !IS_NODE_FAIL(node_ptr)) {
clusteracct_storage_g_node_up(acct_db_conn,
node_ptr, now);
}
}
if (IS_NODE_DOWN(node_ptr) &&
!IS_NODE_INVALID_REG(node_ptr) &&
((slurm_conf.ret2service == 2) ||
(node_ptr->boot_req_time != 0) ||
((slurm_conf.ret2service == 1) &&
!xstrcmp(node_ptr->reason, "Not responding")))) {
node_ptr->last_busy = now;
node_ptr->node_state = NODE_STATE_IDLE | node_flags;
node_ptr->resume_after = 0;
info("node_did_resp: node %s returned to service",
node_ptr->name);
trigger_node_up(node_ptr);
last_node_update = now;
if (!IS_NODE_DRAIN(node_ptr) && !IS_NODE_FAIL(node_ptr)) {
/* reason information is handled in
clusteracct_storage_g_node_up()
*/
clusteracct_storage_g_node_up(acct_db_conn,
node_ptr, now);
}
}
if (IS_NODE_IDLE(node_ptr) && !IS_NODE_COMPLETING(node_ptr)) {
bit_set (idle_node_bitmap, node_ptr->index);
bit_set (share_node_bitmap, node_ptr->index);
}
if (IS_NODE_DOWN(node_ptr) ||
IS_NODE_DRAIN(node_ptr) ||
IS_NODE_FAIL(node_ptr) ||
(IS_NODE_POWER_DOWN(node_ptr) && !IS_NODE_ALLOCATED(node_ptr))) {
bit_clear (avail_node_bitmap, node_ptr->index);
} else
bit_set (avail_node_bitmap, node_ptr->index);
if (IS_NODE_DOWN(node_ptr))
bit_clear (up_node_bitmap, node_ptr->index);
else
bit_set (up_node_bitmap, node_ptr->index);
}
/*
* node_did_resp - record that the specified node is responding
* IN name - name of the node
*/
void node_did_resp (char *name)
{
node_record_t *node_ptr;
node_ptr = find_node_record (name);
xassert(verify_lock(CONF_LOCK, READ_LOCK));
if (node_ptr == NULL) {
error ("node_did_resp unable to find node %s", name);
return;
}
_node_did_resp(node_ptr);
debug2("node_did_resp %s",name);
}
/*
* node_not_resp - record that the specified node is not responding
* IN name - name of the node
* IN msg_time - time message was sent
*/
void node_not_resp (char *name, time_t msg_time, slurm_msg_type_t resp_type)
{
node_record_t *node_ptr;
node_ptr = find_node_record (name);
if (node_ptr == NULL) {
error ("node_not_resp unable to find node %s", name);
return;
}
/* If the slurmd on the node responded with something we don't
* want to ever set the node down, so mark that the node
* responded, but for whatever reason there was a
* communication error. This makes it so we don't mark the
* node down if the slurmd really is there (Wrong protocol
* version or munge issue or whatever) so we don't kill
* any running jobs. RESPONSE_FORWARD_FAILED means we
* couldn't contact the slurmd.
* last_response could be in the future if boot in progress.
*/
if (resp_type != RESPONSE_FORWARD_FAILED) {
node_ptr->last_response = MAX(msg_time - 1,
node_ptr->last_response);
}
if (!IS_NODE_DOWN(node_ptr)) {
/* Logged by node_no_resp_msg() on periodic basis */
node_ptr->not_responding = true;
}
if (IS_NODE_NO_RESPOND(node_ptr) ||
IS_NODE_POWERING_DOWN(node_ptr) ||
IS_NODE_POWERED_DOWN(node_ptr))
return; /* Already known to be not responding */
if (node_ptr->last_response >= msg_time) {
debug("node_not_resp: node %s responded since msg sent",
node_ptr->name);
return;
}
node_ptr->node_state |= NODE_STATE_NO_RESPOND;
last_node_update = time(NULL);
bit_clear (avail_node_bitmap, node_ptr->index);
}
/* For every node with the "not_responding" flag set, clear the flag
* and log that the node is not responding using a hostlist expression */
extern void node_no_resp_msg(void)
{
int i;
node_record_t *node_ptr;
char *host_str = NULL;
hostlist_t *no_resp_hostlist = NULL;
for (i = 0; (node_ptr = next_node(&i)); i++) {
if (!node_ptr->not_responding ||
IS_NODE_POWERED_DOWN(node_ptr) ||
IS_NODE_POWERING_DOWN(node_ptr) ||
IS_NODE_POWERING_UP(node_ptr))
continue;
if (no_resp_hostlist) {
(void) hostlist_push_host(no_resp_hostlist,
node_ptr->name);
} else
no_resp_hostlist = hostlist_create(node_ptr->name);
node_ptr->not_responding = false;
}
if (no_resp_hostlist) {
hostlist_uniq(no_resp_hostlist);
host_str = hostlist_ranged_string_xmalloc(no_resp_hostlist);
error("Nodes %s not responding", host_str);
xfree(host_str);
hostlist_destroy(no_resp_hostlist);
}
}
/*
* set_node_down - make the specified compute node's state DOWN and
* kill jobs as needed
* IN name - name of the node
* IN reason - why the node is DOWN
*/
void set_node_down (char *name, char *reason)
{
node_record_t *node_ptr;
node_ptr = find_node_record (name);
if (node_ptr == NULL) {
error ("set_node_down unable to find node %s", name);
return;
}
set_node_down_ptr (node_ptr, reason);
}
/*
* set_node_down_ptr - make the specified compute node's state DOWN and
* kill jobs as needed
* IN node_ptr - node_ptr to the node
* IN reason - why the node is DOWN
*/
void set_node_down_ptr(node_record_t *node_ptr, char *reason)
{
time_t now = time(NULL);
xassert(node_ptr);
set_node_reason(node_ptr, reason, now);
_make_node_down(node_ptr, now);
(void) kill_running_job_by_node_ptr(node_ptr);
_sync_bitmaps(node_ptr, 0);
}
/*
* is_node_down - determine if the specified node's state is DOWN
* IN name - name of the node
* RET true if node exists and is down, otherwise false
*/
bool is_node_down (char *name)
{
node_record_t *node_ptr;
node_ptr = find_node_record (name);
if (node_ptr == NULL) {
error ("is_node_down unable to find node %s", name);
return false;
}
if (IS_NODE_DOWN(node_ptr))
return true;
return false;
}
/*
* is_node_resp - determine if the specified node's state is responding
* IN name - name of the node
* RET true if node exists and is responding, otherwise false
*/
bool is_node_resp (char *name)
{
node_record_t *node_ptr;
node_ptr = find_node_record (name);
if (node_ptr == NULL) {
error ("is_node_resp unable to find node %s", name);
return false;
}
if (IS_NODE_NO_RESPOND(node_ptr))
return false;
return true;
}
/*
* msg_to_slurmd - send given msg_type (REQUEST_RECONFIGURE or REQUEST_SHUTDOWN)
* to every slurmd
*/
void msg_to_slurmd (slurm_msg_type_t msg_type)
{
int i;
shutdown_msg_t *shutdown_req;
agent_arg_t *kill_agent_args;
node_record_t *node_ptr;
kill_agent_args = xmalloc (sizeof (agent_arg_t));
kill_agent_args->msg_type = msg_type;
kill_agent_args->retry = 0;
kill_agent_args->hostlist = hostlist_create(NULL);
if (msg_type == REQUEST_SHUTDOWN) {
shutdown_req = xmalloc(sizeof(shutdown_msg_t));
shutdown_req->options = 0;
kill_agent_args->msg_args = shutdown_req;
}
kill_agent_args->protocol_version = SLURM_PROTOCOL_VERSION;
for (i = 0; (node_ptr = next_node(&i)); i++) {
if (IS_NODE_FUTURE(node_ptr))
continue;
if (IS_NODE_CLOUD(node_ptr) &&
(IS_NODE_POWERED_DOWN(node_ptr) ||
IS_NODE_POWERING_DOWN(node_ptr)))
continue;
if (kill_agent_args->protocol_version >
node_record_table_ptr[node_ptr->index]->protocol_version)
kill_agent_args->protocol_version =
node_record_table_ptr[node_ptr->index]->
protocol_version;
hostlist_push_host(kill_agent_args->hostlist, node_ptr->name);
kill_agent_args->node_count++;
}
if (kill_agent_args->node_count == 0) {
hostlist_destroy(kill_agent_args->hostlist);
xfree (kill_agent_args);
} else {
debug ("Spawning agent msg_type=%s", rpc_num2string(msg_type));
set_agent_arg_r_uid(kill_agent_args, SLURM_AUTH_UID_ANY);
agent_queue_request(kill_agent_args);
}
}
/*
* Specialized version of msg_to_slurmd that handles cross-version issues
* when running configless.
*
* Since the REQUEST_RECONFIGURE message had no body, you could get away with
* sending under the oldest format of any slurmd attached to the system.
*
* For configless, this would mean nothing gets sent to anyone, and those
* older slurmds get REQUEST_RECONFIGURE_WITH_CONFIG and ignore it.
*
* So explicitly split the pool into three groups.
*/
#define RELEVANT_VER 4
extern void push_reconfig_to_slurmd(void)
{
agent_arg_t *ver_args[RELEVANT_VER] = { 0 }, *curr_args;
node_record_t *node_ptr;
int ver;
ver_args[0] = xmalloc(sizeof(agent_arg_t));
ver_args[0]->msg_type = REQUEST_RECONFIGURE_WITH_CONFIG;
ver_args[0]->protocol_version = SLURM_PROTOCOL_VERSION;
ver_args[1] = xmalloc(sizeof(agent_arg_t));
ver_args[1]->msg_type = REQUEST_RECONFIGURE_WITH_CONFIG;
ver_args[1]->protocol_version = SLURM_ONE_BACK_PROTOCOL_VERSION;
ver_args[2] = xmalloc(sizeof(agent_arg_t));
ver_args[2]->msg_type = REQUEST_RECONFIGURE_WITH_CONFIG;
ver_args[2]->protocol_version = SLURM_TWO_BACK_PROTOCOL_VERSION;
ver_args[3] = xmalloc(sizeof(agent_arg_t));
ver_args[3]->msg_type = REQUEST_RECONFIGURE_WITH_CONFIG;
ver_args[3]->protocol_version = SLURM_MIN_PROTOCOL_VERSION;
for (int i = 0; (node_ptr = next_node(&i)); i++) {
if (IS_NODE_FUTURE(node_ptr))
continue;
if (IS_NODE_CLOUD(node_ptr) &&
(IS_NODE_POWERED_DOWN(node_ptr) ||
IS_NODE_POWERING_DOWN(node_ptr)))
continue;
for (ver = 0; ver < RELEVANT_VER; ver++) {
curr_args = ver_args[ver];
if (node_ptr->protocol_version <
curr_args->protocol_version)
continue;
if (!curr_args->hostlist) {
curr_args->hostlist = hostlist_create(NULL);
curr_args->msg_args = new_config_response(true);
}
hostlist_push_host(curr_args->hostlist, node_ptr->name);
curr_args->node_count++;
break;
}
}
for (ver = 0; ver < RELEVANT_VER; ver++) {
/* This movement is needed to prevent a stack smash */
curr_args = ver_args[ver];
ver_args[ver] = NULL;
if (!curr_args->node_count) {
xfree(curr_args);
continue;
}
debug("Spawning agent msg_type=%s version=%u",
rpc_num2string(curr_args->msg_type),
curr_args->protocol_version);
set_agent_arg_r_uid(curr_args, SLURM_AUTH_UID_ANY);
agent_queue_request(curr_args);
}
}
/*
* make_node_alloc - flag specified node as allocated to a job
* IN node_ptr - pointer to node being allocated
* IN job_ptr - pointer to job that is starting
*/
extern void make_node_alloc(node_record_t *node_ptr, job_record_t *job_ptr)
{
uint32_t node_flags;
(node_ptr->run_job_cnt)++;
bit_clear(idle_node_bitmap, node_ptr->index);
if (job_ptr->details && (job_ptr->details->share_res == 0)) {
bit_clear(share_node_bitmap, node_ptr->index);
(node_ptr->no_share_job_cnt)++;
}
if ((job_ptr->details &&
(job_ptr->details->whole_node & WHOLE_NODE_USER)) ||
(job_ptr->part_ptr &&
(job_ptr->part_ptr->flags & PART_FLAG_EXCLUSIVE_USER))) {
node_ptr->owner_job_cnt++;
node_ptr->owner = job_ptr->user_id;
}
if (slurm_mcs_get_select(job_ptr) == 1) {
xfree(node_ptr->mcs_label);
node_ptr->mcs_label = xstrdup(job_ptr->mcs_label);
}
node_flags = node_ptr->node_state & NODE_STATE_FLAGS;
node_ptr->node_state = NODE_STATE_ALLOCATED | node_flags;
xfree(node_ptr->reason);
node_ptr->reason_time = 0;
node_ptr->reason_uid = NO_VAL;
if (job_ptr && job_ptr->part_ptr &&
(job_ptr->part_ptr->flags & PART_FLAG_PDOI)) {
node_ptr->node_state |= NODE_STATE_POWER_DOWN;
}
last_node_update = time(NULL);
}
/* make_node_avail - flag specified node as available */
extern void make_node_avail(node_record_t *node_ptr)
{
if (IS_NODE_POWER_DOWN(node_ptr) || IS_NODE_POWERING_DOWN(node_ptr))
return;
bit_set(avail_node_bitmap, node_ptr->index);
/*
* If we are in the middle of a backfill cycle, this bitmap is
* used (when bf_continue is enabled) to avoid scheduling lower
* priority jobs on to newly available resources.
*/
bit_set(bf_ignore_node_bitmap, node_ptr->index);
}
extern void node_mgr_make_node_blocked(job_record_t *job_ptr, bool set)
{
bitstr_t *tmp_bitmap;
node_record_t *node_ptr;
if (!IS_JOB_WHOLE_TOPO(job_ptr))
return;
if (!job_ptr->job_resrcs || !job_ptr->job_resrcs->node_bitmap)
return;
tmp_bitmap = bit_copy(job_ptr->job_resrcs->node_bitmap);
topology_g_whole_topo(tmp_bitmap, job_ptr->part_ptr->topology_idx);
bit_and_not(tmp_bitmap, job_ptr->job_resrcs->node_bitmap);
for (int i = 0; (node_ptr = next_node_bitmap(tmp_bitmap, &i)); i++) {
if (set)
node_ptr->node_state |= NODE_STATE_BLOCKED;
else
node_ptr->node_state &= (~NODE_STATE_BLOCKED);
}
FREE_NULL_BITMAP(tmp_bitmap);
}
/* make_node_comp - flag specified node as completing a job
* IN node_ptr - pointer to node marked for completion of job
* IN job_ptr - pointer to job that is completing
* IN suspended - true if job was previously suspended
*/
extern void make_node_comp(node_record_t *node_ptr, job_record_t *job_ptr,
bool suspended)
{
uint32_t node_flags;
time_t now = time(NULL);
xassert(node_ptr);
if (suspended) {
if (node_ptr->sus_job_cnt) {
(node_ptr->sus_job_cnt)--;
} else {
error("%s: %pJ node %s sus_job_cnt underflow", __func__,
job_ptr, node_ptr->name);
}
} else {
if (node_ptr->run_job_cnt) {
(node_ptr->run_job_cnt)--;
} else {
error("%s: %pJ node %s run_job_cnt underflow", __func__,
job_ptr, node_ptr->name);
}
if (job_ptr->details && (job_ptr->details->share_res == 0)) {
if (node_ptr->no_share_job_cnt) {
(node_ptr->no_share_job_cnt)--;
} else {
error("%s: %pJ node %s no_share_job_cnt underflow",
__func__, job_ptr, node_ptr->name);
}
if (node_ptr->no_share_job_cnt == 0)
bit_set(share_node_bitmap, node_ptr->index);
}
}
/* Sync up conditionals with deallocate_nodes() */
if (!IS_NODE_DOWN(node_ptr) &&
!IS_NODE_POWERED_DOWN(node_ptr) &&
!IS_NODE_POWERING_UP(node_ptr)) {
/* Don't verify RPC if node in DOWN or POWER_UP state */
(node_ptr->comp_job_cnt)++;
node_ptr->node_state |= NODE_STATE_COMPLETING;
bit_set(cg_node_bitmap, node_ptr->index);
}
node_flags = node_ptr->node_state & NODE_STATE_FLAGS;
if (!node_ptr->run_job_cnt && !node_ptr->comp_job_cnt) {
node_ptr->last_busy = now;
bit_set(idle_node_bitmap, node_ptr->index);
}
if (IS_NODE_DRAIN(node_ptr) || IS_NODE_FAIL(node_ptr)) {
trigger_node_draining(node_ptr);
if (!node_ptr->run_job_cnt && !node_ptr->comp_job_cnt) {
trigger_node_drained(node_ptr);
clusteracct_storage_g_node_down(
acct_db_conn,
node_ptr, now, NULL,
slurm_conf.slurm_user_id);
}
}
if (IS_NODE_DOWN(node_ptr)) {
debug3("%s: Node %s being left DOWN", __func__, node_ptr->name);
} else if (node_ptr->run_job_cnt)
node_ptr->node_state = NODE_STATE_ALLOCATED | node_flags;
else {
node_ptr->node_state = NODE_STATE_IDLE | node_flags;
node_ptr->last_busy = now;
}
last_node_update = now;
}
/*
* Reset the statistics of a node that is powered down or downed unexpectedly
* so that client applications do not show erroneous values.
*/
extern void node_mgr_reset_node_stats(node_record_t *node_ptr)
{
xassert(node_ptr);
xassert(node_ptr->energy);
node_ptr->cpu_load = 0;
memset(node_ptr->energy, 0, sizeof(acct_gather_energy_t));
}
/*
* Subset of _make_node_down() except for marking node down, trigger and
* accounting update.
*/
static void _make_node_unavail(node_record_t *node_ptr)
{
xassert(node_ptr);
node_ptr->node_state &= (~NODE_STATE_COMPLETING);
bit_clear(avail_node_bitmap, node_ptr->index);
bit_clear(cg_node_bitmap, node_ptr->index);
bit_set(idle_node_bitmap, node_ptr->index);
bit_set(share_node_bitmap, node_ptr->index);
bit_clear(up_node_bitmap, node_ptr->index);
}
/* _make_node_down - flag specified node as down */
static void _make_node_down(node_record_t *node_ptr, time_t event_time)
{
uint32_t node_flags;
xassert(node_ptr);
_make_node_unavail(node_ptr);
node_mgr_reset_node_stats(node_ptr);
node_flags = node_ptr->node_state & NODE_STATE_FLAGS;
node_ptr->node_state = NODE_STATE_DOWN | node_flags;
node_ptr->owner = NO_VAL;
xfree(node_ptr->mcs_label);
trigger_node_down(node_ptr);
last_node_update = time (NULL);
clusteracct_storage_g_node_down(acct_db_conn,
node_ptr, event_time, NULL,
node_ptr->reason_uid);
/*
* check all reservations since node may have been in a reservation with
* floating count of nodes that needs to be updated
*/
validate_all_reservations(false, false);
}
/*
* make_node_idle - flag specified node as having finished with a job
* IN node_ptr - pointer to node reporting job completion
* IN job_ptr - pointer to job that just completed or NULL if not applicable
*/
void make_node_idle(node_record_t *node_ptr, job_record_t *job_ptr)
{
uint32_t node_flags;
time_t now = time(NULL);
bitstr_t *node_bitmap = NULL;
if (job_ptr) {
if (job_ptr->node_bitmap_cg)
node_bitmap = job_ptr->node_bitmap_cg;
else
node_bitmap = job_ptr->node_bitmap;
}
log_flag(TRACE_JOBS, "%s: enter %pJ", __func__, job_ptr);
xassert(node_ptr);
if (node_bitmap && (bit_test(node_bitmap, node_ptr->index))) {
/* Not a replay */
last_job_update = now;
bit_clear(node_bitmap, node_ptr->index);
if (!IS_JOB_FINISHED(job_ptr))
job_update_tres_cnt(job_ptr, node_ptr->index);
if (job_ptr->node_cnt) {
/*
* Clean up the JOB_COMPLETING flag
* only if there is not the slurmctld
* epilog running, otherwise wait
* when it terminates then this
* function will be invoked.
*/
job_ptr->node_cnt--;
cleanup_completing(job_ptr, false);
} else if ((job_ptr->total_cpus == 0) &&
(job_ptr->total_nodes == 0)) {
/* Job resized to zero nodes (expanded another job) */
} else {
error("%s: %pJ node_cnt underflow", __func__, job_ptr);
}
if (IS_JOB_SUSPENDED(job_ptr)) {
/* Remove node from suspended job */
if (node_ptr->sus_job_cnt)
(node_ptr->sus_job_cnt)--;
else
error("%s: %pJ node %s sus_job_cnt underflow",
__func__, job_ptr, node_ptr->name);
} else if (IS_JOB_RUNNING(job_ptr)) {
/* Remove node from running job */
if (node_ptr->run_job_cnt)
(node_ptr->run_job_cnt)--;
else
error("%s: %pJ node %s run_job_cnt underflow",
__func__, job_ptr, node_ptr->name);
} else {
if (node_ptr->comp_job_cnt) {
(node_ptr->comp_job_cnt)--;
} else if (IS_NODE_DOWN(node_ptr)) {
/* We were not expecting this response,
* ignore it */
} else {
error("%s: %pJ node %s comp_job_cnt underflow",
__func__, job_ptr, node_ptr->name);
}
if (node_ptr->comp_job_cnt > 0)
goto fini; /* More jobs completing */
}
}
if (node_ptr->comp_job_cnt == 0) {
node_ptr->node_state &= (~NODE_STATE_COMPLETING);
bit_clear(cg_node_bitmap, node_ptr->index);
if (IS_NODE_IDLE(node_ptr)) {
node_ptr->owner = NO_VAL;
xfree(node_ptr->mcs_label);
}
}
node_flags = node_ptr->node_state & NODE_STATE_FLAGS;
if (IS_NODE_DOWN(node_ptr) || IS_NODE_FUTURE(node_ptr)) {
debug3("%s: %pJ node %s being left %s",
__func__, job_ptr, node_ptr->name,
node_state_base_string(node_ptr->node_state));
goto fini;
}
bit_set(up_node_bitmap, node_ptr->index);
if (IS_NODE_DRAIN(node_ptr) || IS_NODE_FAIL(node_ptr) ||
IS_NODE_NO_RESPOND(node_ptr))
bit_clear(avail_node_bitmap, node_ptr->index);
else
make_node_avail(node_ptr);
if (IS_NODE_DRAIN(node_ptr) || IS_NODE_FAIL(node_ptr)) {
trigger_node_draining(node_ptr);
if (!node_ptr->run_job_cnt && !node_ptr->comp_job_cnt) {
node_ptr->node_state = NODE_STATE_IDLE | node_flags;
bit_set(idle_node_bitmap, node_ptr->index);
debug3("%s: %pJ node %s is DRAINED",
__func__, job_ptr, node_ptr->name);
node_ptr->last_busy = now;
trigger_node_drained(node_ptr);
if (!IS_NODE_REBOOT_REQUESTED(node_ptr) &&
!IS_NODE_REBOOT_ISSUED(node_ptr))
clusteracct_storage_g_node_down(
acct_db_conn, node_ptr, now,
NULL, slurm_conf.slurm_user_id);
}
} else if (node_ptr->run_job_cnt) {
node_ptr->node_state = NODE_STATE_ALLOCATED | node_flags;
if (!IS_NODE_NO_RESPOND(node_ptr) &&
!IS_NODE_FAIL(node_ptr) && !IS_NODE_DRAIN(node_ptr))
make_node_avail(node_ptr);
} else {
node_ptr->node_state = NODE_STATE_IDLE | node_flags;
if (!IS_NODE_NO_RESPOND(node_ptr) &&
!IS_NODE_FAIL(node_ptr) && !IS_NODE_DRAIN(node_ptr))
make_node_avail(node_ptr);
if (!IS_NODE_NO_RESPOND(node_ptr) &&
!IS_NODE_COMPLETING(node_ptr))
bit_set(idle_node_bitmap, node_ptr->index);
node_ptr->last_busy = now;
}
if (IS_NODE_IDLE(node_ptr) && IS_NODE_POWER_DOWN(node_ptr)) {
/*
* Now that the node is idle and is to be powered off, remove
* from the avail_node_bitmap to prevent jobs being scheduled on
* the node before it power's off.
*/
bit_clear(avail_node_bitmap, node_ptr->index);
}
fini:
if (job_ptr &&
((job_ptr->details &&
(job_ptr->details->whole_node & WHOLE_NODE_USER)) ||
(job_ptr->part_ptr &&
(job_ptr->part_ptr->flags & PART_FLAG_EXCLUSIVE_USER)))) {
if (node_ptr->owner_job_cnt == 0) {
error("%s: node_ptr->owner_job_cnt underflow",
__func__);
} else if (--node_ptr->owner_job_cnt == 0) {
node_ptr->owner = NO_VAL;
xfree(node_ptr->mcs_label);
}
}
last_node_update = now;
}
extern int send_nodes_to_accounting(time_t event_time)
{
int rc = SLURM_SUCCESS, i = 0;
node_record_t *node_ptr = NULL;
char *reason = NULL;
slurmctld_lock_t node_read_lock = {
.node = READ_LOCK,
};
lock_slurmctld(node_read_lock);
/* send nodes not in 'up' state */
for (i = 0; (node_ptr = next_node(&i)); i++) {
if (!node_ptr->name)
continue;
if (node_ptr->reason)
reason = node_ptr->reason;
else
reason = "First Registration";
if (IS_NODE_DRAIN(node_ptr) ||
IS_NODE_FAIL(node_ptr) ||
IS_NODE_DOWN(node_ptr) ||
IS_NODE_FUTURE(node_ptr) ||
(IS_NODE_CLOUD(node_ptr) &&
IS_NODE_POWERED_DOWN(node_ptr)))
rc = clusteracct_storage_g_node_down(
acct_db_conn,
node_ptr, event_time,
reason,
slurm_conf.slurm_user_id);
if (rc == SLURM_ERROR)
break;
}
unlock_slurmctld(node_read_lock);
return rc;
}
/* node_fini - free all memory associated with node records */
extern void node_fini (void)
{
node_features_free_lists();
FREE_NULL_BITMAP(asap_node_bitmap);
FREE_NULL_BITMAP(avail_node_bitmap);
FREE_NULL_BITMAP(bf_ignore_node_bitmap);
FREE_NULL_BITMAP(booting_node_bitmap);
FREE_NULL_BITMAP(cg_node_bitmap);
FREE_NULL_BITMAP(cloud_node_bitmap);
FREE_NULL_BITMAP(external_node_bitmap);
FREE_NULL_BITMAP(future_node_bitmap);
FREE_NULL_BITMAP(idle_node_bitmap);
FREE_NULL_BITMAP(power_down_node_bitmap);
FREE_NULL_BITMAP(power_up_node_bitmap);
FREE_NULL_BITMAP(share_node_bitmap);
FREE_NULL_BITMAP(up_node_bitmap);
FREE_NULL_BITMAP(rs_node_bitmap);
node_fini2();
}
/* Reset a node's CPU load value */
extern void reset_node_load(char *node_name, uint32_t cpu_load)
{
node_record_t *node_ptr;
node_ptr = find_node_record(node_name);
if (node_ptr) {
time_t now = time(NULL);
node_ptr->cpu_load = cpu_load;
node_ptr->cpu_load_time = now;
last_node_update = now;
} else
error("reset_node_load unable to find node %s", node_name);
}
/* Reset a node's free memory value */
extern void reset_node_free_mem(char *node_name, uint64_t free_mem)
{
node_record_t *node_ptr;
node_ptr = find_node_record(node_name);
if (node_ptr) {
time_t now = time(NULL);
node_ptr->free_mem = free_mem;
node_ptr->free_mem_time = now;
last_node_update = now;
} else
error("reset_node_free_mem unable to find node %s", node_name);
}
/*
* Check for node timed events
*
* Such as:
* reboots - If the node hasn't booted by ResumeTimeout, mark the node as down.
* resume_after - Resume a down|drain node after resume_after time.
*/
extern void check_node_timers(void)
{
int i;
node_record_t *node_ptr;
time_t now = time(NULL);
uint16_t resume_timeout = slurm_conf.resume_timeout;
static bool power_save_on = false;
static time_t sched_update = 0;
hostlist_t *resume_hostlist = NULL;
if (sched_update != slurm_conf.last_update) {
power_save_on = power_save_test();
sched_update = slurm_conf.last_update;
}
for (i = 0; (node_ptr = next_node(&i)); i++) {
if ((IS_NODE_REBOOT_ISSUED(node_ptr) ||
(!power_save_on && IS_NODE_POWERING_UP(node_ptr))) &&
node_ptr->boot_req_time &&
(node_ptr->boot_req_time + resume_timeout < now)) {
/*
* Remove states now so that event state shows as DOWN.
* Does not remove drain state in case it was set by
* scontrol update nodename.
*/
node_ptr->node_state &= (~NODE_STATE_POWERING_UP);
node_ptr->node_state &= (~NODE_STATE_REBOOT_ISSUED);
node_ptr->boot_req_time = 0;
set_node_down_ptr(node_ptr, "reboot timed out");
bit_clear(rs_node_bitmap, node_ptr->index);
} else if (node_ptr->resume_after &&
(now > node_ptr->resume_after)) {
/* Fire resume, reset the time */
node_ptr->resume_after = 0;
if (!resume_hostlist)
resume_hostlist = hostlist_create(NULL);
hostlist_push_host(resume_hostlist, node_ptr->name);
}
}
if (resume_hostlist) {
char *host_str;
update_node_msg_t *resume_msg = NULL;
hostlist_uniq(resume_hostlist);
host_str = hostlist_ranged_string_xmalloc(resume_hostlist);
hostlist_destroy(resume_hostlist);
debug("Issuing resume request for nodes %s", host_str);
resume_msg = xmalloc(sizeof(*resume_msg));
slurm_init_update_node_msg(resume_msg);
resume_msg->node_state = NODE_RESUME;
resume_msg->node_names = host_str;
update_node(resume_msg, 0);
slurm_free_update_node_msg(resume_msg);
/* Back the node changes up */
schedule_node_save();
}
}
extern bool waiting_for_node_boot(node_record_t *node_ptr)
{
xassert(node_ptr);
if ((IS_NODE_POWERING_UP(node_ptr) ||
IS_NODE_REBOOT_ISSUED(node_ptr)) &&
(node_ptr->boot_time < node_ptr->boot_req_time)) {
debug("Still waiting for boot of node %s", node_ptr->name);
return true;
}
return false;
}
extern bool waiting_for_node_power_down(node_record_t *node_ptr)
{
xassert(node_ptr);
if (IS_NODE_POWERING_DOWN(node_ptr) &&
node_ptr->power_save_req_time &&
(node_ptr->boot_time <
(node_ptr->power_save_req_time + slurm_conf.suspend_timeout))) {
debug("Still waiting for node '%s' to power off",
node_ptr->name);
return true;
}
return false;
}
extern void set_node_comm_name(node_record_t *node_ptr, char *comm_name,
char *hostname)
{
xfree(node_ptr->comm_name);
node_ptr->comm_name = xstrdup(comm_name ? comm_name : hostname);
xfree(node_ptr->node_hostname);
node_ptr->node_hostname = xstrdup(hostname);
slurm_reset_alias(node_ptr->name,
node_ptr->comm_name,
node_ptr->node_hostname);
}
static int _foreach_build_part_bitmap(void *x, void *arg)
{
build_part_bitmap(x);
return SLURM_SUCCESS;
}
static void _update_parts()
{
/* scan partition table and identify nodes in each */
list_for_each(part_list, _foreach_build_part_bitmap, NULL);
set_partition_tres(false);
}
static int _build_node_callback(char *alias, char *hostname, char *address,
char *bcast_address, uint16_t port,
int state_val, slurm_conf_node_t *conf_node,
config_record_t *config_ptr)
{
int rc = SLURM_SUCCESS;
node_record_t *node_ptr = NULL;
if ((rc = add_node_record(alias, config_ptr, &node_ptr)))
goto fini;
if ((state_val != NO_VAL) &&
(state_val != NODE_STATE_UNKNOWN))
node_ptr->node_state = state_val;
node_ptr->last_response = (time_t) 0;
node_ptr->comm_name = xstrdup(address);
node_ptr->cpu_bind = conf_node->cpu_bind;
node_ptr->node_hostname = xstrdup(hostname);
node_ptr->bcast_address = xstrdup(bcast_address);
node_ptr->port = port;
node_ptr->reason = xstrdup(conf_node->reason);
node_ptr->node_state |= NODE_STATE_DYNAMIC_NORM;
slurm_conf_add_node(node_ptr);
if (config_ptr->feature) {
node_ptr->features = xstrdup(config_ptr->feature);
node_ptr->features_act = xstrdup(config_ptr->feature);
}
if (node_ptr->topology_str && topology_g_add_rm_node(node_ptr)) {
rc = ESLURM_REQUESTED_TOPO_CONFIG_UNAVAILABLE;
goto fini;
}
bit_clear(power_up_node_bitmap, node_ptr->index);
if (IS_NODE_FUTURE(node_ptr)) {
bit_set(future_node_bitmap, node_ptr->index);
} else if (IS_NODE_CLOUD(node_ptr) || IS_NODE_EXTERNAL(node_ptr)) {
make_node_idle(node_ptr, NULL);
if (IS_NODE_CLOUD(node_ptr)) {
bit_set(cloud_node_bitmap, node_ptr->index);
bit_set(power_down_node_bitmap, node_ptr->index);
} else {
bit_set(external_node_bitmap, node_ptr->index);
}
if ((rc = gres_g_node_config_load(node_ptr->config_ptr->cpus,
node_ptr->name,
node_ptr->gres_list, NULL,
NULL)))
goto fini;
rc = gres_node_config_validate(node_ptr,
node_ptr->config_ptr->threads,
node_ptr->config_ptr->cores,
node_ptr->config_ptr
->tot_sockets,
(slurm_conf.conf_flags &
CONF_FLAG_OR),
NULL);
}
fini:
if (rc && node_ptr)
_delete_node_ptr(node_ptr);
return rc;
}
extern void consolidate_config_list(bool is_locked, bool force)
{
config_record_t *curr_rec;
list_itr_t *iter;
slurmctld_lock_t node_write_lock = { .node = WRITE_LOCK };
if (is_locked)
xassert(verify_lock(NODE_LOCK, WRITE_LOCK));
if (force || _get_config_list_update()) {
if (!is_locked)
lock_slurmctld(node_write_lock);
slurm_mutex_lock(&config_list_update_mutex);
config_list_update = false;
/* Use list iterator because we are changing the list */
iter = list_iterator_create(config_list);
while ((curr_rec = list_next(iter))) {
_combine_dup_config_records(curr_rec);
}
list_iterator_destroy(iter);
slurm_mutex_unlock(&config_list_update_mutex);
if (!is_locked)
unlock_slurmctld(node_write_lock);
}
}
extern int create_nodes(update_node_msg_t *msg, char **err_msg)
{
char *nodeline = msg->extra;
int state_val, rc = SLURM_SUCCESS;
slurm_conf_node_t *conf_node;
config_record_t *config_ptr;
s_p_hashtbl_t *node_hashtbl = NULL;
slurmctld_lock_t write_lock = {
.conf = READ_LOCK,
.job = WRITE_LOCK,
.node = WRITE_LOCK,
.part = WRITE_LOCK,
};
xassert(nodeline);
xassert(err_msg);
if (!xstrstr(slurm_conf.select_type, "cons_tres")) {
*err_msg = xstrdup("Node creation only compatible with select/cons_tres");
error("%s", *err_msg);
return ESLURM_ACCESS_DENIED;
}
lock_slurmctld(write_lock);
if (!(conf_node = slurm_conf_parse_nodeline(nodeline, &node_hashtbl))) {
*err_msg = xstrdup_printf("Failed to parse nodeline '%s'",
nodeline);
error("%s", *err_msg);
rc = SLURM_ERROR;
goto fini;
}
/* copy this so upstream logging messages are more detailed */
xfree(msg->node_names);
msg->node_names = xstrdup(conf_node->nodenames);
if ((rc = _validate_nodes_vs_nodeset(conf_node->nodenames))
!= SLURM_SUCCESS)
goto fini;
state_val = state_str2int(conf_node->state, conf_node->nodenames);
if ((state_val == NO_VAL) ||
((state_val != NODE_STATE_FUTURE) &&
!(state_val & NODE_STATE_CLOUD) &&
!(state_val & NODE_STATE_EXTERNAL))) {
*err_msg = xstrdup("Only State=FUTURE, CLOUD, or EXTERNAL allowed for nodes created by scontrol");
error("%s", *err_msg);
rc = ESLURM_INVALID_NODE_STATE;
goto fini;
}
config_ptr = config_record_from_conf_node(conf_node,
slurmctld_tres_cnt);
config_ptr->node_bitmap = bit_alloc(node_record_count);
if ((rc = expand_nodeline_info(conf_node, config_ptr, err_msg,
_build_node_callback))) {
error("Failed to create a node in '%s': %s",
conf_node->nodenames, *err_msg);
goto fini;
}
if (config_ptr->feature) {
node_features_update_list(avail_feature_list,
config_ptr->feature,
config_ptr->node_bitmap);
node_features_update_list(active_feature_list,
config_ptr->feature,
config_ptr->node_bitmap);
}
_queue_consolidate_config_list();
set_cluster_tres(false);
_update_parts();
power_save_set_timeouts(NULL);
power_save_exc_setup();
select_g_reconfigure();
fini:
s_p_hashtbl_destroy(node_hashtbl);
unlock_slurmctld(write_lock);
if (rc == SLURM_SUCCESS) {
/* Must be called outside of locks */
clusteracct_storage_g_cluster_tres(
acct_db_conn, NULL, NULL, 0, SLURM_PROTOCOL_VERSION);
}
return rc;
}
extern int create_dynamic_reg_node(slurm_msg_t *msg)
{
config_record_t *config_ptr;
node_record_t *node_ptr;
int state_val = NODE_STATE_UNKNOWN, rc;
s_p_hashtbl_t *node_hashtbl = NULL;
slurm_conf_node_t *conf_node = NULL;
slurm_node_registration_status_msg_t *reg_msg = msg->data;
xassert(verify_lock(JOB_LOCK, WRITE_LOCK));
xassert(verify_lock(NODE_LOCK, WRITE_LOCK));
xassert(verify_lock(PART_LOCK, WRITE_LOCK));
if (!xstrstr(slurm_conf.select_type, "cons_tres")) {
error("Node creation only compatible with select/cons_tres");
return ESLURM_ACCESS_DENIED;
}
if (reg_msg->dynamic_conf) {
if (!(conf_node =
slurm_conf_parse_nodeline(reg_msg->dynamic_conf,
&node_hashtbl))) {
s_p_hashtbl_destroy(node_hashtbl);
error("Failed to parse dynamic nodeline '%s'",
reg_msg->dynamic_conf);
return SLURM_ERROR;
}
if (_validate_nodes_vs_nodeset(conf_node->nodenames) !=
SLURM_SUCCESS) {
s_p_hashtbl_destroy(node_hashtbl);
return SLURM_ERROR;
}
config_ptr = config_record_from_conf_node(conf_node,
slurmctld_tres_cnt);
if (conf_node->state)
state_val = state_str2int(conf_node->state,
conf_node->nodenames);
} else {
config_ptr = create_config_record();
config_ptr->boards = reg_msg->boards;
config_ptr->cores = reg_msg->cores;
config_ptr->cpus = reg_msg->cpus;
config_ptr->nodes = xstrdup(reg_msg->node_name);
config_ptr->real_memory = reg_msg->real_memory;
config_ptr->threads = reg_msg->threads;
config_ptr->tmp_disk = reg_msg->tmp_disk;
config_ptr->tot_sockets = reg_msg->sockets;
}
config_ptr->node_bitmap = bit_alloc(node_record_count);
if ((rc = add_node_record(reg_msg->node_name, config_ptr, &node_ptr))) {
error("%s (%s)", slurm_strerror(rc), reg_msg->node_name);
list_delete_ptr(config_list, config_ptr);
s_p_hashtbl_destroy(node_hashtbl);
return SLURM_ERROR;
}
if (conf_node && conf_node->port_str)
node_ptr->port = strtol(conf_node->port_str, NULL, 10);
/*
* Always resolve comm_name in slurmctld as hostname may resolve
* differently here than on the compute node that sent the RPC.
*/
xfree(node_ptr->comm_name);
if (!(node_ptr->comm_name = _get_msg_hostname(msg)))
node_ptr->comm_name = xstrdup(reg_msg->hostname);
xfree(node_ptr->node_hostname);
node_ptr->node_hostname = xstrdup(reg_msg->hostname);
slurm_conf_add_node(node_ptr);
bit_set(power_up_node_bitmap, node_ptr->index);
node_ptr->features = xstrdup(node_ptr->config_ptr->feature);
node_features_update_list(avail_feature_list, node_ptr->features,
config_ptr->node_bitmap);
node_ptr->features_act = xstrdup(node_ptr->config_ptr->feature);
node_features_update_list(active_feature_list, node_ptr->features_act,
config_ptr->node_bitmap);
if (node_ptr->topology_str && topology_g_add_rm_node(node_ptr)) {
error("%s Invalid node topology specified %s ignored",
__func__, node_ptr->topology_str);
xfree(node_ptr->topology_str);
topology_g_add_rm_node(node_ptr);
}
_queue_consolidate_config_list();
/* Handle DOWN and DRAIN, otherwise make the node idle */
if ((state_val == NODE_STATE_DOWN) ||
(state_val & NODE_STATE_DRAIN)) {
time_t now = time(NULL);
if (conf_node && conf_node->reason)
set_node_reason(node_ptr, conf_node->reason, now);
_make_node_down(node_ptr, now);
node_ptr->node_state = state_val;
} else
make_node_idle(node_ptr, NULL);
node_ptr->node_state |= NODE_STATE_DYNAMIC_NORM;
set_cluster_tres(false);
_update_parts();
power_save_set_timeouts(NULL);
power_save_exc_setup();
select_g_reconfigure();
s_p_hashtbl_destroy(node_hashtbl);
return SLURM_SUCCESS;
}
static void _remove_node_from_features(node_record_t *node_ptr)
{
bitstr_t *node_bitmap = bit_alloc(node_record_count);
bit_set(node_bitmap, node_ptr->index);
node_features_update_list(avail_feature_list, NULL, node_bitmap);
node_features_update_list(active_feature_list, NULL, node_bitmap);
FREE_NULL_BITMAP(node_bitmap);
}
/*
* Remove from all global bitmaps
*
* Sync with bitmaps in _init_bitmaps()
*/
static void _remove_node_from_all_bitmaps(node_record_t *node_ptr)
{
bit_clear(asap_node_bitmap, node_ptr->index);
bit_clear(avail_node_bitmap, node_ptr->index);
bit_clear(bf_ignore_node_bitmap, node_ptr->index);
bit_clear(booting_node_bitmap, node_ptr->index);
bit_clear(cg_node_bitmap, node_ptr->index);
bit_clear(cloud_node_bitmap, node_ptr->index);
bit_clear(external_node_bitmap, node_ptr->index);
bit_clear(future_node_bitmap, node_ptr->index);
bit_clear(idle_node_bitmap, node_ptr->index);
bit_clear(power_down_node_bitmap, node_ptr->index);
bit_clear(power_up_node_bitmap, node_ptr->index);
bit_clear(rs_node_bitmap, node_ptr->index);
bit_clear(share_node_bitmap, node_ptr->index);
bit_clear(up_node_bitmap, node_ptr->index);
}
/*
* Has to be in slurmctld code for locking.
*/
static int _delete_node_ptr(node_record_t *node_ptr)
{
xassert(node_ptr);
if (!IS_NODE_DYNAMIC_NORM(node_ptr) && !IS_NODE_EXTERNAL(node_ptr)) {
error("Can't delete non-dynamic node '%s'.", node_ptr->name);
return ESLURM_INVALID_NODE_STATE;
}
if (IS_NODE_ALLOCATED(node_ptr) ||
IS_NODE_COMPLETING(node_ptr)) {
error("Node '%s' can't be delete because it's still in use.",
node_ptr->name);
return ESLURM_NODES_BUSY;
}
if (node_ptr->node_state & NODE_STATE_RES) {
error("Node '%s' can't be delete because it's in a reservation.",
node_ptr->name);
return ESLURM_NODES_BUSY;
}
xfree(node_ptr->topology_str);
topology_g_add_rm_node(node_ptr);
_remove_node_from_all_bitmaps(node_ptr);
_remove_node_from_features(node_ptr);
gres_node_remove(node_ptr);
xhash_pop_str(node_hash_table, node_ptr->name);
slurm_conf_remove_node(node_ptr->name);
delete_node_record(node_ptr);
return SLURM_SUCCESS;
}
static int _delete_node(char *name)
{
node_record_t *node_ptr;
node_ptr = find_node_record(name);
if (!node_ptr) {
error("Unable to find node %s to delete", name);
return ESLURM_INVALID_NODE_NAME;
}
return _delete_node_ptr(node_ptr);
}
extern int delete_nodes(char *names, char **err_msg)
{
char *node_name;
hostlist_t *to_delete;
bool one_success = false;
int ret_rc = SLURM_SUCCESS;
hostlist_t *error_hostlist = NULL;
slurmctld_lock_t write_lock = {
.job = WRITE_LOCK,
.node = WRITE_LOCK,
.part = WRITE_LOCK,
.conf = READ_LOCK,
};
xassert(err_msg);
if (!xstrstr(slurm_conf.select_type, "cons_tres")) {
*err_msg = xstrdup("Node deletion only compatible with select/cons_tres");
error("%s", *err_msg);
return ESLURM_ACCESS_DENIED;
}
lock_slurmctld(write_lock);
if (!(to_delete = nodespec_to_hostlist(names, true, NULL))) {
ret_rc = ESLURM_INVALID_NODE_NAME;
goto cleanup;
}
if (!hostlist_count(to_delete)) {
info("%s: expansion of node specification '%s' resulted in zero nodes",
__func__, names);
ret_rc = ESLURM_INVALID_NODE_NAME;
goto cleanup;
}
while ((node_name = hostlist_shift(to_delete))) {
int rc;
if ((rc = _delete_node(node_name))) {
error("failed to delete node '%s'", node_name);
if (!error_hostlist)
error_hostlist = hostlist_create(node_name);
else
hostlist_push_host(error_hostlist, node_name);
} else
one_success = true;
ret_rc |= rc;
free(node_name);
}
if (one_success) {
set_cluster_tres(false);
_update_parts();
select_g_reconfigure();
power_save_exc_setup();
}
if (error_hostlist) {
char *nodes = hostlist_ranged_string_xmalloc(error_hostlist);
*err_msg = xstrdup_printf("failed to delete nodes %s", nodes);
xfree(nodes);
FREE_NULL_HOSTLIST(error_hostlist);
}
cleanup:
unlock_slurmctld(write_lock);
if (one_success) {
/* Must be called outside of locks */
clusteracct_storage_g_cluster_tres(
acct_db_conn, NULL, NULL, 0, SLURM_PROTOCOL_VERSION);
}
FREE_NULL_HOSTLIST(to_delete);
return ret_rc;
}
extern void set_node_reason(node_record_t *node_ptr,
char *message,
time_t time)
{
xassert(verify_lock(NODE_LOCK, WRITE_LOCK));
xassert(node_ptr);
if (message && message[0]) {
if (node_ptr->reason) {
char *tmp;
tmp = xstrdup(" : ");
xstrcat(tmp, message);
if (!xstrstr(node_ptr->reason, tmp))
xstrfmtcat(node_ptr->reason, " : %s", message);
xfree(tmp);
} else {
node_ptr->reason = xstrdup(message);
}
node_ptr->reason_time = time;
node_ptr->reason_uid = slurm_conf.slurm_user_id;
} else {
xfree(node_ptr->reason);
node_ptr->reason_time = 0;
node_ptr->reason_uid = NO_VAL;
}
}