| /*****************************************************************************\ |
| * node_mgr.c - manage the node records of slurm |
| * Note: there is a global node table (node_record_table_ptr), its |
| * hash table (node_hash_table), time stamp (last_node_update) and |
| * configuration list (config_list) |
| ***************************************************************************** |
| * Copyright (C) 2002-2007 The Regents of the University of California. |
| * Copyright (C) 2008-2010 Lawrence Livermore National Security. |
| * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). |
| * Written by Morris Jette <jette1@llnl.gov>, et. al. |
| * CODE-OCEC-09-009. All rights reserved. |
| * |
| * This file is part of SLURM, a resource management program. |
| * For details, see <http://slurm.schedmd.com/>. |
| * Please also read the included file: DISCLAIMER. |
| * |
| * SLURM is free software; you can redistribute it and/or modify it under |
| * the terms of the GNU General Public License as published by the Free |
| * Software Foundation; either version 2 of the License, or (at your option) |
| * any later version. |
| * |
| * In addition, as a special exception, the copyright holders give permission |
| * to link the code of portions of this program with the OpenSSL library under |
| * certain conditions as described in each individual source file, and |
| * distribute linked combinations including the two. You must obey the GNU |
| * General Public License in all respects for all of the code used other than |
| * OpenSSL. If you modify file(s) with this exception, you may extend this |
| * exception to your version of the file(s), but you are not obligated to do |
| * so. If you do not wish to do so, delete this exception statement from your |
| * version. If you delete this exception statement from all source files in |
| * the program, then also delete it here. |
| * |
| * SLURM is distributed in the hope that it will be useful, but WITHOUT ANY |
| * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS |
| * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more |
| * details. |
| * |
| * You should have received a copy of the GNU General Public License along |
| * with SLURM; if not, write to the Free Software Foundation, Inc., |
| * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. |
| \*****************************************************************************/ |
| |
| #ifdef HAVE_CONFIG_H |
| # include "config.h" |
| #endif |
| |
| |
| #include <ctype.h> |
| #include <errno.h> |
| #include <stdio.h> |
| #include <stdlib.h> |
| #include <string.h> |
| #include <time.h> |
| #include <sys/types.h> |
| #include <sys/stat.h> |
| #include <fcntl.h> |
| |
| #include "src/common/gres.h" |
| #include "src/common/hostlist.h" |
| #include "src/common/macros.h" |
| #include "src/common/pack.h" |
| #include "src/common/parse_time.h" |
| #include "src/common/xassert.h" |
| #include "src/common/xstring.h" |
| #include "src/common/node_select.h" |
| #include "src/common/read_config.h" |
| #include "src/common/slurm_accounting_storage.h" |
| #include "src/common/slurm_acct_gather_energy.h" |
| #include "src/common/slurm_ext_sensors.h" |
| #include "src/slurmctld/agent.h" |
| #include "src/slurmctld/front_end.h" |
| #include "src/slurmctld/locks.h" |
| #include "src/slurmctld/ping_nodes.h" |
| #include "src/slurmctld/proc_req.h" |
| #include "src/slurmctld/reservation.h" |
| #include "src/slurmctld/sched_plugin.h" |
| #include "src/slurmctld/slurmctld.h" |
| #include "src/slurmctld/slurmctld_plugstack.h" |
| #include "src/slurmctld/state_save.h" |
| #include "src/common/timers.h" |
| #include "src/slurmctld/trigger_mgr.h" |
| #include "src/plugins/select/bluegene/bg_enums.h" |
| |
| #define _DEBUG 0 |
| #define MAX_RETRIES 10 |
| |
| /* Change NODE_STATE_VERSION value when changing the state save format */ |
| #define NODE_STATE_VERSION "PROTOCOL_VERSION" |
| #define NODE_2_6_STATE_VERSION "VER006" /* SLURM version 2.6 */ |
| #define NODE_2_5_STATE_VERSION "VER006" /* SLURM version 2.5 */ |
| |
| /* Global variables */ |
| bitstr_t *avail_node_bitmap = NULL; /* bitmap of available nodes */ |
| bitstr_t *cg_node_bitmap = NULL; /* bitmap of completing nodes */ |
| bitstr_t *idle_node_bitmap = NULL; /* bitmap of idle nodes */ |
| bitstr_t *power_node_bitmap = NULL; /* bitmap of powered down nodes */ |
| bitstr_t *share_node_bitmap = NULL; /* bitmap of sharable nodes */ |
| bitstr_t *up_node_bitmap = NULL; /* bitmap of non-down nodes */ |
| |
| static void _dump_node_state (struct node_record *dump_node_ptr, |
| Buf buffer); |
| static front_end_record_t * _front_end_reg( |
| slurm_node_registration_status_msg_t *reg_msg); |
| static void _make_node_down(struct node_record *node_ptr, |
| time_t event_time); |
| static bool _node_is_hidden(struct node_record *node_ptr); |
| static int _open_node_state_file(char **state_file); |
| static void _pack_node (struct node_record *dump_node_ptr, Buf buffer, |
| uint16_t protocol_version); |
| static void _sync_bitmaps(struct node_record *node_ptr, int job_count); |
| static void _update_config_ptr(bitstr_t *bitmap, |
| struct config_record *config_ptr); |
| static int _update_node_features(char *node_names, char *features); |
| static int _update_node_gres(char *node_names, char *gres); |
| static int _update_node_weight(char *node_names, uint32_t weight); |
| static bool _valid_node_state_change(uint16_t old, uint16_t new); |
| |
| |
| /* dump_all_node_state - save the state of all nodes to file */ |
| int dump_all_node_state ( void ) |
| { |
| /* Save high-water mark to avoid buffer growth with copies */ |
| static int high_buffer_size = (1024 * 1024); |
| int error_code = 0, inx, log_fd; |
| char *old_file, *new_file, *reg_file; |
| struct node_record *node_ptr; |
| /* Locks: Read config and node */ |
| slurmctld_lock_t node_read_lock = { READ_LOCK, NO_LOCK, READ_LOCK, |
| NO_LOCK }; |
| Buf buffer = init_buf(high_buffer_size); |
| DEF_TIMERS; |
| |
| START_TIMER; |
| /* write header: version, time */ |
| packstr(NODE_STATE_VERSION, buffer); |
| pack16(SLURM_PROTOCOL_VERSION, buffer); |
| pack_time(time (NULL), buffer); |
| |
| /* write node records to buffer */ |
| lock_slurmctld (node_read_lock); |
| for (inx = 0, node_ptr = node_record_table_ptr; inx < node_record_count; |
| inx++, node_ptr++) { |
| xassert (node_ptr->magic == NODE_MAGIC); |
| xassert (node_ptr->config_ptr->magic == CONFIG_MAGIC); |
| _dump_node_state (node_ptr, buffer); |
| } |
| |
| old_file = xstrdup (slurmctld_conf.state_save_location); |
| xstrcat (old_file, "/node_state.old"); |
| reg_file = xstrdup (slurmctld_conf.state_save_location); |
| xstrcat (reg_file, "/node_state"); |
| new_file = xstrdup (slurmctld_conf.state_save_location); |
| xstrcat (new_file, "/node_state.new"); |
| unlock_slurmctld (node_read_lock); |
| |
| /* write the buffer to file */ |
| lock_state_files(); |
| log_fd = creat (new_file, 0600); |
| if (log_fd < 0) { |
| error ("Can't save state, error creating file %s %m", new_file); |
| error_code = errno; |
| } else { |
| int pos = 0, nwrite = get_buf_offset(buffer), amount, rc; |
| char *data = (char *)get_buf_data(buffer); |
| high_buffer_size = MAX(nwrite, high_buffer_size); |
| while (nwrite > 0) { |
| amount = write(log_fd, &data[pos], nwrite); |
| if ((amount < 0) && (errno != EINTR)) { |
| error("Error writing file %s, %m", new_file); |
| error_code = errno; |
| break; |
| } |
| nwrite -= amount; |
| pos += amount; |
| } |
| |
| rc = fsync_and_close(log_fd, "node"); |
| if (rc && !error_code) |
| error_code = rc; |
| } |
| if (error_code) |
| (void) unlink (new_file); |
| else { /* file shuffle */ |
| (void) unlink (old_file); |
| if (link(reg_file, old_file)) |
| debug4("unable to create link for %s -> %s: %m", |
| reg_file, old_file); |
| (void) unlink (reg_file); |
| if (link(new_file, reg_file)) |
| debug4("unable to create link for %s -> %s: %m", |
| new_file, reg_file); |
| (void) unlink (new_file); |
| } |
| xfree (old_file); |
| xfree (reg_file); |
| xfree (new_file); |
| unlock_state_files (); |
| |
| free_buf (buffer); |
| END_TIMER2("dump_all_node_state"); |
| return error_code; |
| } |
| |
| /* |
| * _dump_node_state - dump the state of a specific node to a buffer |
| * IN dump_node_ptr - pointer to node for which information is requested |
| * IN/OUT buffer - location to store data, pointers automatically advanced |
| */ |
| static void |
| _dump_node_state (struct node_record *dump_node_ptr, Buf buffer) |
| { |
| packstr (dump_node_ptr->comm_name, buffer); |
| packstr (dump_node_ptr->name, buffer); |
| packstr (dump_node_ptr->node_hostname, buffer); |
| packstr (dump_node_ptr->reason, buffer); |
| packstr (dump_node_ptr->features, buffer); |
| packstr (dump_node_ptr->gres, buffer); |
| pack16 (dump_node_ptr->node_state, buffer); |
| pack16 (dump_node_ptr->cpus, buffer); |
| pack16 (dump_node_ptr->boards, buffer); |
| pack16 (dump_node_ptr->sockets, buffer); |
| pack16 (dump_node_ptr->cores, buffer); |
| pack16 (dump_node_ptr->threads, buffer); |
| pack32 (dump_node_ptr->real_memory, buffer); |
| pack32 (dump_node_ptr->tmp_disk, buffer); |
| pack32 (dump_node_ptr->reason_uid, buffer); |
| pack_time(dump_node_ptr->reason_time, buffer); |
| pack16 (dump_node_ptr->protocol_version, buffer); |
| (void) gres_plugin_node_state_pack(dump_node_ptr->gres_list, buffer, |
| dump_node_ptr->name); |
| } |
| |
| |
| /* Open the node state save file, or backup if necessary. |
| * state_file IN - the name of the state save file used |
| * RET the file description to read from or error code |
| */ |
| static int _open_node_state_file(char **state_file) |
| { |
| int state_fd; |
| struct stat stat_buf; |
| |
| *state_file = xstrdup(slurmctld_conf.state_save_location); |
| xstrcat(*state_file, "/node_state"); |
| state_fd = open(*state_file, O_RDONLY); |
| if (state_fd < 0) { |
| error("Could not open node state file %s: %m", *state_file); |
| } else if (fstat(state_fd, &stat_buf) < 0) { |
| error("Could not stat node state file %s: %m", *state_file); |
| (void) close(state_fd); |
| } else if (stat_buf.st_size < 10) { |
| error("Node state file %s too small", *state_file); |
| (void) close(state_fd); |
| } else /* Success */ |
| return state_fd; |
| |
| error("NOTE: Trying backup state save file. Information may be lost!"); |
| xstrcat(*state_file, ".old"); |
| state_fd = open(*state_file, O_RDONLY); |
| return state_fd; |
| } |
| |
| /* |
| * load_all_node_state - Load the node state from file, recover on slurmctld |
| * restart. Execute this after loading the configuration file data. |
| * Data goes into common storage. |
| * IN state_only - if true, overwrite only node state and reason |
| * Use this to overwrite the "UNKNOWN state typically used in slurm.conf |
| * RET 0 or error code |
| * NOTE: READ lock_slurmctld config before entry |
| */ |
| extern int load_all_node_state ( bool state_only ) |
| { |
| char *comm_name = NULL, *node_hostname = NULL; |
| char *node_name = NULL, *reason = NULL, *data = NULL, *state_file; |
| char *features = NULL, *gres = NULL; |
| int data_allocated, data_read = 0, error_code = 0, node_cnt = 0; |
| uint16_t node_state; |
| uint16_t cpus = 1, boards = 1, sockets = 1, cores = 1, threads = 1; |
| uint32_t real_memory, tmp_disk, data_size = 0, name_len; |
| uint32_t reason_uid = NO_VAL; |
| time_t reason_time = 0; |
| List gres_list = NULL; |
| struct node_record *node_ptr; |
| int state_fd; |
| time_t time_stamp, now = time(NULL); |
| Buf buffer; |
| char *ver_str = NULL; |
| hostset_t hs = NULL; |
| bool power_save_mode = false; |
| uint16_t protocol_version = (uint16_t)NO_VAL; |
| |
| if (slurmctld_conf.suspend_program && slurmctld_conf.resume_program) |
| power_save_mode = true; |
| |
| /* read the file */ |
| lock_state_files (); |
| state_fd = _open_node_state_file(&state_file); |
| if (state_fd < 0) { |
| info ("No node state file (%s) to recover", state_file); |
| error_code = ENOENT; |
| } |
| else { |
| data_allocated = BUF_SIZE; |
| data = xmalloc(data_allocated); |
| while (1) { |
| data_read = read(state_fd, &data[data_size], BUF_SIZE); |
| if (data_read < 0) { |
| if (errno == EINTR) |
| continue; |
| else { |
| error ("Read error on %s: %m", |
| state_file); |
| break; |
| } |
| } else if (data_read == 0) /* eof */ |
| break; |
| data_size += data_read; |
| data_allocated += data_read; |
| xrealloc(data, data_allocated); |
| } |
| close (state_fd); |
| } |
| xfree (state_file); |
| unlock_state_files (); |
| |
| buffer = create_buf (data, data_size); |
| |
| safe_unpackstr_xmalloc( &ver_str, &name_len, buffer); |
| debug3("Version string in node_state header is %s", ver_str); |
| if (ver_str) { |
| /* 2.5 and 2.6 share the same NODE_2_6_STATE_VERSION |
| * so we send the lowest supported protocol version |
| * to slurmds. |
| */ |
| if (!strcmp(ver_str, NODE_STATE_VERSION)) |
| safe_unpack16(&protocol_version, buffer); |
| else if (!strcmp(ver_str, NODE_2_6_STATE_VERSION)) |
| protocol_version = SLURM_2_5_PROTOCOL_VERSION; |
| else if (!strcmp(ver_str, NODE_2_5_STATE_VERSION)) |
| protocol_version = SLURM_2_5_PROTOCOL_VERSION; |
| } |
| |
| if (protocol_version == (uint16_t)NO_VAL) { |
| error("*****************************************************"); |
| error("Can not recover node state, data version incompatible"); |
| error("*****************************************************"); |
| xfree(ver_str); |
| free_buf(buffer); |
| return EFAULT; |
| } |
| xfree(ver_str); |
| |
| safe_unpack_time (&time_stamp, buffer); |
| |
| while (remaining_buf (buffer) > 0) { |
| uint16_t base_state, obj_protocol_version = (uint16_t)NO_VAL; |
| if (protocol_version >= SLURM_14_03_PROTOCOL_VERSION) { |
| safe_unpackstr_xmalloc (&comm_name, &name_len, buffer); |
| safe_unpackstr_xmalloc (&node_name, &name_len, buffer); |
| safe_unpackstr_xmalloc (&node_hostname, |
| &name_len, buffer); |
| safe_unpackstr_xmalloc (&reason, &name_len, buffer); |
| safe_unpackstr_xmalloc (&features, &name_len, buffer); |
| safe_unpackstr_xmalloc (&gres, &name_len, buffer); |
| safe_unpack16 (&node_state, buffer); |
| safe_unpack16 (&cpus, buffer); |
| safe_unpack16 (&boards, buffer); |
| safe_unpack16 (&sockets, buffer); |
| safe_unpack16 (&cores, buffer); |
| safe_unpack16 (&threads, buffer); |
| safe_unpack32 (&real_memory, buffer); |
| safe_unpack32 (&tmp_disk, buffer); |
| safe_unpack32 (&reason_uid, buffer); |
| safe_unpack_time (&reason_time, buffer); |
| safe_unpack16 (&obj_protocol_version, buffer); |
| if (gres_plugin_node_state_unpack( |
| &gres_list, buffer, node_name, |
| protocol_version) != SLURM_SUCCESS) |
| goto unpack_error; |
| base_state = node_state & NODE_STATE_BASE; |
| } else if (protocol_version >= SLURM_2_5_PROTOCOL_VERSION) { |
| safe_unpackstr_xmalloc (&comm_name, &name_len, buffer); |
| safe_unpackstr_xmalloc (&node_name, &name_len, buffer); |
| safe_unpackstr_xmalloc (&node_hostname, |
| &name_len, buffer); |
| safe_unpackstr_xmalloc (&reason, &name_len, buffer); |
| safe_unpackstr_xmalloc (&features, &name_len, buffer); |
| safe_unpackstr_xmalloc (&gres, &name_len, buffer); |
| safe_unpack16 (&node_state, buffer); |
| safe_unpack16 (&cpus, buffer); |
| safe_unpack16 (&boards, buffer); |
| safe_unpack16 (&sockets, buffer); |
| safe_unpack16 (&cores, buffer); |
| safe_unpack16 (&threads, buffer); |
| safe_unpack32 (&real_memory, buffer); |
| safe_unpack32 (&tmp_disk, buffer); |
| safe_unpack32 (&reason_uid, buffer); |
| safe_unpack_time (&reason_time, buffer); |
| if (gres_plugin_node_state_unpack( |
| &gres_list, buffer, node_name, |
| protocol_version) != SLURM_SUCCESS) |
| goto unpack_error; |
| base_state = node_state & NODE_STATE_BASE; |
| } else { |
| error("load_all_node_state: protocol_version " |
| "%hu not supported", protocol_version); |
| goto unpack_error; |
| } |
| |
| /* validity test as possible */ |
| if ((cpus == 0) || |
| (boards == 0) || |
| (sockets == 0) || |
| (cores == 0) || |
| (threads == 0) || |
| (base_state >= NODE_STATE_END)) { |
| error ("Invalid data for node %s: procs=%u, boards=%u," |
| " sockets=%u, cores=%u, threads=%u, state=%u", |
| node_name, cpus, boards, |
| sockets, cores, threads, node_state); |
| error ("No more node data will be processed from the " |
| "checkpoint file"); |
| goto unpack_error; |
| |
| } |
| |
| /* find record and perform update */ |
| node_ptr = find_node_record (node_name); |
| if (node_ptr == NULL) { |
| error ("Node %s has vanished from configuration", |
| node_name); |
| } else if (state_only) { |
| uint16_t orig_flags; |
| if (IS_NODE_CLOUD(node_ptr)) { |
| if ((!power_save_mode) && |
| ((node_state & NODE_STATE_POWER_SAVE) || |
| (node_state & NODE_STATE_POWER_UP))) { |
| node_state &= (~NODE_STATE_POWER_SAVE); |
| node_state &= (~NODE_STATE_POWER_UP); |
| if (hs) |
| hostset_insert(hs, node_name); |
| else |
| hs = hostset_create(node_name); |
| } |
| if (comm_name && node_hostname) { |
| /* Recover NodeAddr and NodeHostName */ |
| xfree(node_ptr->comm_name); |
| node_ptr->comm_name = comm_name; |
| comm_name = NULL; /* Nothing to free */ |
| xfree(node_ptr->node_hostname); |
| node_ptr->node_hostname = node_hostname; |
| node_hostname = NULL; /* Nothing to free */ |
| slurm_reset_alias(node_ptr->name, |
| node_ptr->comm_name, |
| node_ptr->node_hostname); |
| } |
| node_ptr->node_state = node_state; |
| } else if (IS_NODE_UNKNOWN(node_ptr)) { |
| if (base_state == NODE_STATE_DOWN) { |
| orig_flags = node_ptr->node_state & |
| NODE_STATE_FLAGS; |
| node_ptr->node_state = NODE_STATE_DOWN |
| | orig_flags; |
| } |
| if (node_state & NODE_STATE_DRAIN) |
| node_ptr->node_state |= |
| NODE_STATE_DRAIN; |
| if (node_state & NODE_STATE_FAIL) |
| node_ptr->node_state |= |
| NODE_STATE_FAIL; |
| if (node_state & NODE_STATE_POWER_SAVE) { |
| if (power_save_mode && |
| IS_NODE_UNKNOWN(node_ptr)) { |
| orig_flags = node_ptr-> |
| node_state & |
| NODE_STATE_FLAGS; |
| node_ptr->node_state = |
| NODE_STATE_IDLE | |
| orig_flags | |
| NODE_STATE_POWER_SAVE; |
| } else if (power_save_mode) { |
| node_ptr->node_state |= |
| NODE_STATE_POWER_SAVE; |
| } else if (hs) |
| hostset_insert(hs, node_name); |
| else |
| hs = hostset_create(node_name); |
| /* Recover hardware state for powered |
| * down nodes */ |
| node_ptr->cpus = cpus; |
| node_ptr->boards = boards; |
| node_ptr->sockets = sockets; |
| node_ptr->cores = cores; |
| node_ptr->threads = threads; |
| node_ptr->real_memory = real_memory; |
| node_ptr->tmp_disk = tmp_disk; |
| } |
| if (node_state & NODE_STATE_MAINT) |
| node_ptr->node_state |= NODE_STATE_MAINT; |
| if (node_state & NODE_STATE_POWER_UP) { |
| if (power_save_mode) { |
| node_ptr->node_state |= |
| NODE_STATE_POWER_UP; |
| } else if (hs) |
| hostset_insert(hs, node_name); |
| else |
| hs = hostset_create(node_name); |
| } |
| } |
| if (node_ptr->reason == NULL) { |
| node_ptr->reason = reason; |
| reason = NULL; /* Nothing to free */ |
| node_ptr->reason_time = reason_time; |
| node_ptr->reason_uid = reason_uid; |
| } |
| node_ptr->gres_list = gres_list; |
| gres_list = NULL; /* Nothing to free */ |
| } else { |
| if ((!power_save_mode) && |
| ((node_state & NODE_STATE_POWER_SAVE) || |
| (node_state & NODE_STATE_POWER_UP))) { |
| node_state &= (~NODE_STATE_POWER_SAVE); |
| node_state &= (~NODE_STATE_POWER_UP); |
| if (hs) |
| hostset_insert(hs, node_name); |
| else |
| hs = hostset_create(node_name); |
| } |
| if (IS_NODE_CLOUD(node_ptr) && |
| comm_name && node_hostname) { |
| /* Recover NodeAddr and NodeHostName */ |
| xfree(node_ptr->comm_name); |
| node_ptr->comm_name = comm_name; |
| comm_name = NULL; /* Nothing to free */ |
| xfree(node_ptr->node_hostname); |
| node_ptr->node_hostname = node_hostname; |
| node_hostname = NULL; /* Nothing to free */ |
| slurm_reset_alias(node_ptr->name, |
| node_ptr->comm_name, |
| node_ptr->node_hostname); |
| } |
| node_ptr->node_state = node_state; |
| xfree(node_ptr->reason); |
| node_ptr->reason = reason; |
| reason = NULL; /* Nothing to free */ |
| node_ptr->reason_time = reason_time; |
| node_ptr->reason_uid = reason_uid; |
| xfree(node_ptr->features); |
| node_ptr->features = features; |
| features = NULL; /* Nothing to free */ |
| xfree(node_ptr->gres); |
| node_ptr->gres = gres; |
| gres = NULL; /* Nothing to free */ |
| node_ptr->gres_list = gres_list; |
| gres_list = NULL; /* Nothing to free */ |
| node_ptr->part_cnt = 0; |
| xfree(node_ptr->part_pptr); |
| node_ptr->cpus = cpus; |
| node_ptr->sockets = sockets; |
| node_ptr->cores = cores; |
| node_ptr->threads = threads; |
| node_ptr->real_memory = real_memory; |
| node_ptr->tmp_disk = tmp_disk; |
| node_ptr->last_response = (time_t) 0; |
| } |
| |
| if (node_ptr) { |
| node_cnt++; |
| if (obj_protocol_version != (uint16_t)NO_VAL) |
| node_ptr->protocol_version = |
| obj_protocol_version; |
| else |
| node_ptr->protocol_version = protocol_version; |
| if (!IS_NODE_POWER_SAVE(node_ptr)) |
| node_ptr->last_idle = now; |
| select_g_update_node_state(node_ptr); |
| } |
| |
| xfree(features); |
| xfree(gres); |
| if (gres_list) { |
| list_destroy(gres_list); |
| gres_list = NULL; |
| } |
| xfree (comm_name); |
| xfree (node_hostname); |
| xfree (node_name); |
| xfree(reason); |
| } |
| |
| fini: info("Recovered state of %d nodes", node_cnt); |
| if (hs) { |
| char node_names[128]; |
| hostset_ranged_string(hs, sizeof(node_names), node_names); |
| info("Cleared POWER_SAVE flag from nodes %s", node_names); |
| hostset_destroy(hs); |
| } |
| free_buf (buffer); |
| return error_code; |
| |
| unpack_error: |
| error("Incomplete node data checkpoint file"); |
| error_code = EFAULT; |
| xfree(features); |
| xfree(gres); |
| if (gres_list) { |
| list_destroy(gres_list); |
| gres_list = NULL; |
| } |
| xfree(comm_name); |
| xfree(node_hostname); |
| xfree(node_name); |
| xfree(reason); |
| goto fini; |
| } |
| |
| |
| /* list_compare_config - compare two entry from the config list based upon |
| * weight, see common/list.h for documentation */ |
| int list_compare_config (void *config_entry1, void *config_entry2) |
| { |
| int weight1, weight2; |
| struct config_record *c1; |
| struct config_record *c2; |
| |
| c1 = *(struct config_record **)config_entry1; |
| c2 = *(struct config_record **)config_entry2; |
| |
| weight1 = c1->weight; |
| weight2 = c2->weight; |
| |
| return (weight1 - weight2); |
| } |
| |
| |
| static bool _node_is_hidden(struct node_record *node_ptr) |
| { |
| int i; |
| bool shown = false; |
| |
| for (i=0; i<node_ptr->part_cnt; i++) { |
| if (!(node_ptr->part_pptr[i]->flags & PART_FLAG_HIDDEN)) { |
| shown = true; |
| break; |
| } |
| } |
| |
| if (shown || (node_ptr->part_cnt == 0)) |
| return false; |
| return true; |
| } |
| |
| /* |
| * pack_all_node - dump all configuration and node information for all nodes |
| * in machine independent form (for network transmission) |
| * OUT buffer_ptr - pointer to the stored data |
| * OUT buffer_size - set to size of the buffer in bytes |
| * IN show_flags - node filtering options |
| * IN uid - uid of user making request (for partition filtering) |
| * IN protocol_version - slurm protocol version of client |
| * global: node_record_table_ptr - pointer to global node table |
| * NOTE: the caller must xfree the buffer at *buffer_ptr |
| * NOTE: change slurm_load_node() in api/node_info.c when data format changes |
| * NOTE: READ lock_slurmctld config before entry |
| */ |
| extern void pack_all_node (char **buffer_ptr, int *buffer_size, |
| uint16_t show_flags, uid_t uid, |
| uint16_t protocol_version) |
| { |
| int inx; |
| uint32_t nodes_packed, tmp_offset, node_scaling; |
| Buf buffer; |
| time_t now = time(NULL); |
| struct node_record *node_ptr = node_record_table_ptr; |
| bool hidden; |
| |
| buffer_ptr[0] = NULL; |
| *buffer_size = 0; |
| |
| buffer = init_buf (BUF_SIZE*16); |
| nodes_packed = 0; |
| |
| if (protocol_version >= SLURM_2_5_PROTOCOL_VERSION) { |
| /* write header: count and time */ |
| pack32(nodes_packed, buffer); |
| select_g_alter_node_cnt(SELECT_GET_NODE_SCALING, |
| &node_scaling); |
| pack32(node_scaling, buffer); |
| |
| pack_time(now, buffer); |
| |
| /* write node records */ |
| part_filter_set(uid); |
| for (inx = 0; inx < node_record_count; inx++, node_ptr++) { |
| xassert (node_ptr->magic == NODE_MAGIC); |
| xassert (node_ptr->config_ptr->magic == |
| CONFIG_MAGIC); |
| |
| /* We can't avoid packing node records without breaking |
| * the node index pointers. So pack a node |
| * with a name of NULL and let the caller deal |
| * with it. */ |
| hidden = false; |
| if (((show_flags & SHOW_ALL) == 0) && (uid != 0) && |
| (_node_is_hidden(node_ptr))) |
| hidden = true; |
| else if (IS_NODE_FUTURE(node_ptr)) |
| hidden = true; |
| else if (IS_NODE_CLOUD(node_ptr) && |
| IS_NODE_POWER_SAVE(node_ptr)) |
| hidden = true; |
| else if ((node_ptr->name == NULL) || |
| (node_ptr->name[0] == '\0')) |
| hidden = true; |
| |
| if (hidden) { |
| char *orig_name = node_ptr->name; |
| node_ptr->name = NULL; |
| _pack_node(node_ptr, buffer, protocol_version); |
| node_ptr->name = orig_name; |
| } else |
| _pack_node(node_ptr, buffer, protocol_version); |
| nodes_packed++; |
| } |
| part_filter_clear(); |
| } else { |
| error("select_g_select_jobinfo_pack: protocol_version " |
| "%hu not supported", protocol_version); |
| } |
| |
| tmp_offset = get_buf_offset (buffer); |
| set_buf_offset (buffer, 0); |
| pack32 (nodes_packed, buffer); |
| set_buf_offset (buffer, tmp_offset); |
| |
| *buffer_size = get_buf_offset (buffer); |
| buffer_ptr[0] = xfer_buf_data (buffer); |
| } |
| |
| /* |
| * pack_one_node - dump all configuration and node information for one node |
| * in machine independent form (for network transmission) |
| * OUT buffer_ptr - pointer to the stored data |
| * OUT buffer_size - set to size of the buffer in bytes |
| * IN show_flags - node filtering options |
| * IN uid - uid of user making request (for partition filtering) |
| * IN node_name - name of node for which information is desired, |
| * use first node if name is NULL |
| * IN protocol_version - slurm protocol version of client |
| * global: node_record_table_ptr - pointer to global node table |
| * NOTE: the caller must xfree the buffer at *buffer_ptr |
| * NOTE: change slurm_load_node() in api/node_info.c when data format changes |
| * NOTE: READ lock_slurmctld config before entry |
| */ |
| extern void pack_one_node (char **buffer_ptr, int *buffer_size, |
| uint16_t show_flags, uid_t uid, char *node_name, |
| uint16_t protocol_version) |
| { |
| uint32_t nodes_packed, tmp_offset, node_scaling; |
| Buf buffer; |
| time_t now = time(NULL); |
| struct node_record *node_ptr; |
| bool hidden; |
| |
| buffer_ptr[0] = NULL; |
| *buffer_size = 0; |
| |
| buffer = init_buf (BUF_SIZE); |
| nodes_packed = 0; |
| |
| if (protocol_version >= SLURM_2_5_PROTOCOL_VERSION) { |
| /* write header: count and time */ |
| pack32(nodes_packed, buffer); |
| select_g_alter_node_cnt(SELECT_GET_NODE_SCALING, |
| &node_scaling); |
| pack32(node_scaling, buffer); |
| |
| pack_time(now, buffer); |
| |
| /* write node records */ |
| part_filter_set(uid); |
| if (node_name) |
| node_ptr = find_node_record(node_name); |
| else |
| node_ptr = node_record_table_ptr; |
| if (node_ptr) { |
| hidden = false; |
| if (((show_flags & SHOW_ALL) == 0) && (uid != 0) && |
| (_node_is_hidden(node_ptr))) |
| hidden = true; |
| else if (IS_NODE_FUTURE(node_ptr)) |
| hidden = true; |
| else if (IS_NODE_CLOUD(node_ptr) && |
| IS_NODE_POWER_SAVE(node_ptr)) |
| hidden = true; |
| else if ((node_ptr->name == NULL) || |
| (node_ptr->name[0] == '\0')) |
| hidden = true; |
| |
| if (!hidden) { |
| _pack_node(node_ptr, buffer, protocol_version); |
| nodes_packed++; |
| } |
| } |
| part_filter_clear(); |
| } else { |
| error("select_g_select_jobinfo_pack: protocol_version " |
| "%hu not supported", protocol_version); |
| } |
| |
| tmp_offset = get_buf_offset (buffer); |
| set_buf_offset (buffer, 0); |
| pack32 (nodes_packed, buffer); |
| set_buf_offset (buffer, tmp_offset); |
| |
| *buffer_size = get_buf_offset (buffer); |
| buffer_ptr[0] = xfer_buf_data (buffer); |
| } |
| |
| /* |
| * _pack_node - dump all configuration information about a specific node in |
| * machine independent form (for network transmission) |
| * IN dump_node_ptr - pointer to node for which information is requested |
| * IN/OUT buffer - buffer where data is placed, pointers automatically updated |
| * IN protocol_version - slurm protocol version of client |
| * NOTE: if you make any changes here be sure to make the corresponding |
| * changes to load_node_config in api/node_info.c |
| * NOTE: READ lock_slurmctld config before entry |
| */ |
| static void _pack_node (struct node_record *dump_node_ptr, Buf buffer, |
| uint16_t protocol_version) |
| { |
| if (protocol_version >= SLURM_14_03_PROTOCOL_VERSION) { |
| packstr (dump_node_ptr->name, buffer); |
| packstr (dump_node_ptr->node_hostname, buffer); |
| packstr (dump_node_ptr->comm_name, buffer); |
| pack16 (dump_node_ptr->node_state, buffer); |
| packstr (dump_node_ptr->version, buffer); |
| /* On a bluegene system always use the regular node |
| * infomation not what is in the config_ptr. |
| */ |
| #ifndef HAVE_BG |
| if (slurmctld_conf.fast_schedule) { |
| /* Only data from config_record used for scheduling */ |
| pack16(dump_node_ptr->config_ptr->cpus, buffer); |
| pack16(dump_node_ptr->config_ptr->boards, buffer); |
| pack16(dump_node_ptr->config_ptr->sockets, buffer); |
| pack16(dump_node_ptr->config_ptr->cores, buffer); |
| pack16(dump_node_ptr->config_ptr->threads, buffer); |
| pack32(dump_node_ptr->config_ptr->real_memory, buffer); |
| pack32(dump_node_ptr->config_ptr->tmp_disk, buffer); |
| } else { |
| #endif |
| /* Individual node data used for scheduling */ |
| pack16(dump_node_ptr->cpus, buffer); |
| pack16(dump_node_ptr->boards, buffer); |
| pack16(dump_node_ptr->sockets, buffer); |
| pack16(dump_node_ptr->cores, buffer); |
| pack16(dump_node_ptr->threads, buffer); |
| pack32(dump_node_ptr->real_memory, buffer); |
| pack32(dump_node_ptr->tmp_disk, buffer); |
| #ifndef HAVE_BG |
| } |
| #endif |
| pack32(dump_node_ptr->cpu_load, buffer); |
| pack32(dump_node_ptr->config_ptr->weight, buffer); |
| pack32(dump_node_ptr->reason_uid, buffer); |
| |
| pack_time(dump_node_ptr->boot_time, buffer); |
| pack_time(dump_node_ptr->reason_time, buffer); |
| pack_time(dump_node_ptr->slurmd_start_time, buffer); |
| |
| select_g_select_nodeinfo_pack(dump_node_ptr->select_nodeinfo, |
| buffer, protocol_version); |
| |
| packstr(dump_node_ptr->arch, buffer); |
| packstr(dump_node_ptr->features, buffer); |
| if (dump_node_ptr->gres) |
| packstr(dump_node_ptr->gres, buffer); |
| else |
| packstr(dump_node_ptr->config_ptr->gres, buffer); |
| packstr(dump_node_ptr->os, buffer); |
| packstr(dump_node_ptr->reason, buffer); |
| acct_gather_energy_pack(dump_node_ptr->energy, buffer, |
| protocol_version); |
| ext_sensors_data_pack(dump_node_ptr->ext_sensors, buffer, |
| protocol_version); |
| } else if (protocol_version >= SLURM_2_6_PROTOCOL_VERSION) { |
| packstr (dump_node_ptr->name, buffer); |
| packstr (dump_node_ptr->node_hostname, buffer); |
| packstr (dump_node_ptr->comm_name, buffer); |
| pack16 (dump_node_ptr->node_state, buffer); |
| /* On a bluegene system always use the regular node |
| * infomation not what is in the config_ptr. |
| */ |
| #ifndef HAVE_BG |
| if (slurmctld_conf.fast_schedule) { |
| /* Only data from config_record used for scheduling */ |
| pack16(dump_node_ptr->config_ptr->cpus, buffer); |
| pack16(dump_node_ptr->config_ptr->boards, buffer); |
| pack16(dump_node_ptr->config_ptr->sockets, buffer); |
| pack16(dump_node_ptr->config_ptr->cores, buffer); |
| pack16(dump_node_ptr->config_ptr->threads, buffer); |
| pack32(dump_node_ptr->config_ptr->real_memory, buffer); |
| pack32(dump_node_ptr->config_ptr->tmp_disk, buffer); |
| } else { |
| #endif |
| /* Individual node data used for scheduling */ |
| pack16(dump_node_ptr->cpus, buffer); |
| pack16(dump_node_ptr->boards, buffer); |
| pack16(dump_node_ptr->sockets, buffer); |
| pack16(dump_node_ptr->cores, buffer); |
| pack16(dump_node_ptr->threads, buffer); |
| pack32(dump_node_ptr->real_memory, buffer); |
| pack32(dump_node_ptr->tmp_disk, buffer); |
| #ifndef HAVE_BG |
| } |
| #endif |
| pack32(dump_node_ptr->cpu_load, buffer); |
| pack32(dump_node_ptr->config_ptr->weight, buffer); |
| pack32(dump_node_ptr->reason_uid, buffer); |
| |
| pack_time(dump_node_ptr->boot_time, buffer); |
| pack_time(dump_node_ptr->reason_time, buffer); |
| pack_time(dump_node_ptr->slurmd_start_time, buffer); |
| |
| select_g_select_nodeinfo_pack(dump_node_ptr->select_nodeinfo, |
| buffer, protocol_version); |
| |
| packstr(dump_node_ptr->arch, buffer); |
| packstr(dump_node_ptr->features, buffer); |
| if (dump_node_ptr->gres) |
| packstr(dump_node_ptr->gres, buffer); |
| else |
| packstr(dump_node_ptr->config_ptr->gres, buffer); |
| packstr(dump_node_ptr->os, buffer); |
| packstr(dump_node_ptr->reason, buffer); |
| acct_gather_energy_pack(dump_node_ptr->energy, buffer, |
| protocol_version); |
| ext_sensors_data_pack(dump_node_ptr->ext_sensors, buffer, |
| protocol_version); |
| } else if (protocol_version >= SLURM_2_5_PROTOCOL_VERSION) { |
| packstr (dump_node_ptr->name, buffer); |
| packstr (dump_node_ptr->node_hostname, buffer); |
| packstr (dump_node_ptr->comm_name, buffer); |
| pack16 (dump_node_ptr->node_state, buffer); |
| /* On a bluegene system always use the regular node |
| * infomation not what is in the config_ptr. |
| */ |
| #ifndef HAVE_BG |
| if (slurmctld_conf.fast_schedule) { |
| /* Only data from config_record used for scheduling */ |
| pack16(dump_node_ptr->config_ptr->cpus, buffer); |
| pack16(dump_node_ptr->config_ptr->boards, buffer); |
| pack16(dump_node_ptr->config_ptr->sockets, buffer); |
| pack16(dump_node_ptr->config_ptr->cores, buffer); |
| pack16(dump_node_ptr->config_ptr->threads, buffer); |
| pack32(dump_node_ptr->config_ptr->real_memory, buffer); |
| pack32(dump_node_ptr->config_ptr->tmp_disk, buffer); |
| } else { |
| #endif |
| /* Individual node data used for scheduling */ |
| pack16(dump_node_ptr->cpus, buffer); |
| pack16(dump_node_ptr->boards, buffer); |
| pack16(dump_node_ptr->sockets, buffer); |
| pack16(dump_node_ptr->cores, buffer); |
| pack16(dump_node_ptr->threads, buffer); |
| pack32(dump_node_ptr->real_memory, buffer); |
| pack32(dump_node_ptr->tmp_disk, buffer); |
| #ifndef HAVE_BG |
| } |
| #endif |
| pack32(dump_node_ptr->cpu_load, buffer); |
| pack32(dump_node_ptr->config_ptr->weight, buffer); |
| pack32(dump_node_ptr->reason_uid, buffer); |
| |
| pack_time(dump_node_ptr->boot_time, buffer); |
| pack_time(dump_node_ptr->reason_time, buffer); |
| pack_time(dump_node_ptr->slurmd_start_time, buffer); |
| |
| select_g_select_nodeinfo_pack(dump_node_ptr->select_nodeinfo, |
| buffer, protocol_version); |
| |
| packstr(dump_node_ptr->arch, buffer); |
| packstr(dump_node_ptr->features, buffer); |
| if (dump_node_ptr->gres) |
| packstr(dump_node_ptr->gres, buffer); |
| else |
| packstr(dump_node_ptr->config_ptr->gres, buffer); |
| packstr(dump_node_ptr->os, buffer); |
| packstr(dump_node_ptr->reason, buffer); |
| acct_gather_energy_pack(dump_node_ptr->energy, buffer, |
| protocol_version); |
| } else { |
| error("_pack_node: protocol_version " |
| "%hu not supported", protocol_version); |
| } |
| } |
| |
| |
| /* |
| * set_slurmd_addr - establish the slurm_addr_t for the slurmd on each node |
| * Uses common data structures. |
| * NOTE: READ lock_slurmctld config before entry |
| */ |
| void set_slurmd_addr (void) |
| { |
| #ifndef HAVE_FRONT_END |
| int i; |
| struct node_record *node_ptr = node_record_table_ptr; |
| DEF_TIMERS; |
| |
| START_TIMER; |
| for (i = 0; i < node_record_count; i++, node_ptr++) { |
| if ((node_ptr->name == NULL) || |
| (node_ptr->name[0] == '\0')) |
| continue; |
| if (IS_NODE_FUTURE(node_ptr)) |
| continue; |
| if (IS_NODE_CLOUD(node_ptr) && IS_NODE_POWER_SAVE(node_ptr)) |
| continue; |
| if (node_ptr->port == 0) |
| node_ptr->port = slurmctld_conf.slurmd_port; |
| slurm_set_addr(&node_ptr->slurm_addr, node_ptr->port, |
| node_ptr->comm_name); |
| if (node_ptr->slurm_addr.sin_port) |
| continue; |
| error("slurm_set_addr failure on %s", node_ptr->comm_name); |
| node_ptr->node_state = NODE_STATE_FUTURE; |
| node_ptr->port = 0; |
| xfree(node_ptr->reason); |
| node_ptr->reason = xstrdup("NO NETWORK ADDRESS FOUND"); |
| node_ptr->reason_time = time(NULL); |
| node_ptr->reason_uid = getuid(); |
| } |
| |
| END_TIMER2("set_slurmd_addr"); |
| #endif |
| } |
| |
| /* |
| * update_node - update the configuration data for one or more nodes |
| * IN update_node_msg - update node request |
| * RET SLURM_SUCCESS or error code |
| * global: node_record_table_ptr - pointer to global node table |
| */ |
| int update_node ( update_node_msg_t * update_node_msg ) |
| { |
| int error_code = 0, node_cnt, node_inx; |
| struct node_record *node_ptr = NULL; |
| char *this_node_name = NULL; |
| hostlist_t host_list, hostaddr_list = NULL, hostname_list = NULL; |
| uint16_t base_state = 0, node_flags, state_val; |
| time_t now = time(NULL); |
| |
| if (update_node_msg->node_names == NULL ) { |
| info("update_node: invalid node name %s", |
| update_node_msg -> node_names ); |
| return ESLURM_INVALID_NODE_NAME; |
| } |
| |
| host_list = hostlist_create(update_node_msg->node_names); |
| if (host_list == NULL) { |
| info("update_node: hostlist_create error on %s: %m", |
| update_node_msg->node_names); |
| return ESLURM_INVALID_NODE_NAME; |
| } |
| node_cnt = hostlist_count(host_list); |
| |
| if (update_node_msg->node_addr) { |
| hostaddr_list = hostlist_create(update_node_msg->node_addr); |
| if (hostaddr_list == NULL) { |
| info("update_node: hostlist_create error on %s: %m", |
| update_node_msg->node_addr); |
| FREE_NULL_HOSTLIST(host_list); |
| return ESLURM_INVALID_NODE_NAME; |
| } |
| if (node_cnt != hostlist_count(hostaddr_list)) { |
| info("update_node: nodecount mismatch"); |
| FREE_NULL_HOSTLIST(host_list); |
| FREE_NULL_HOSTLIST(hostaddr_list); |
| return ESLURM_INVALID_NODE_NAME; |
| } |
| } |
| |
| if (update_node_msg->node_hostname) { |
| hostname_list = hostlist_create(update_node_msg->node_hostname); |
| if (hostname_list == NULL) { |
| info("update_node: hostlist_create error on %s: %m", |
| update_node_msg->node_hostname); |
| FREE_NULL_HOSTLIST(host_list); |
| FREE_NULL_HOSTLIST(hostaddr_list); |
| return ESLURM_INVALID_NODE_NAME; |
| } |
| if (node_cnt != hostlist_count(hostname_list)) { |
| info("update_node: nodecount mismatch"); |
| FREE_NULL_HOSTLIST(host_list); |
| FREE_NULL_HOSTLIST(hostaddr_list); |
| FREE_NULL_HOSTLIST(hostname_list); |
| return ESLURM_INVALID_NODE_NAME; |
| } |
| } |
| |
| while ( (this_node_name = hostlist_shift (host_list)) ) { |
| int err_code = 0; |
| |
| state_val = update_node_msg->node_state; |
| node_ptr = find_node_record (this_node_name); |
| node_inx = node_ptr - node_record_table_ptr; |
| if (node_ptr == NULL) { |
| error ("update_node: node %s does not exist", |
| this_node_name); |
| error_code = ESLURM_INVALID_NODE_NAME; |
| free (this_node_name); |
| break; |
| } |
| |
| if (hostaddr_list) { |
| char *this_addr = hostlist_shift(hostaddr_list); |
| xfree(node_ptr->comm_name); |
| node_ptr->comm_name = xstrdup(this_addr); |
| free(this_addr); |
| } |
| if (hostname_list) { |
| char *this_hostname = hostlist_shift(hostname_list); |
| xfree(node_ptr->node_hostname); |
| node_ptr->node_hostname = xstrdup(this_hostname); |
| free(this_hostname); |
| } |
| if (hostaddr_list || hostname_list) { |
| /* This updates the lookup table addresses */ |
| slurm_reset_alias(node_ptr->name, node_ptr->comm_name, |
| node_ptr->node_hostname); |
| } |
| |
| if (update_node_msg->features) { |
| xfree(node_ptr->features); |
| if (update_node_msg->features[0]) |
| node_ptr->features = xstrdup(update_node_msg-> |
| features); |
| /* _update_node_features() logs and updates config */ |
| } |
| |
| if (update_node_msg->gres) { |
| xfree(node_ptr->gres); |
| if (update_node_msg->gres[0]) |
| node_ptr->gres = xstrdup(update_node_msg->gres); |
| /* _update_node_gres() logs and updates config */ |
| } |
| |
| if ((update_node_msg -> reason) && |
| (update_node_msg -> reason[0])) { |
| xfree(node_ptr->reason); |
| node_ptr->reason = xstrdup(update_node_msg->reason); |
| node_ptr->reason_time = now; |
| node_ptr->reason_uid = update_node_msg->reason_uid; |
| info ("update_node: node %s reason set to: %s", |
| this_node_name, node_ptr->reason); |
| } |
| |
| if (state_val != (uint16_t) NO_VAL) { |
| base_state = node_ptr->node_state; |
| if (!_valid_node_state_change(base_state, state_val)) { |
| info("Invalid node state transition requested " |
| "for node %s from=%s to=%s", |
| this_node_name, |
| node_state_string(base_state), |
| node_state_string(state_val)); |
| state_val = (uint16_t) NO_VAL; |
| error_code = ESLURM_INVALID_NODE_STATE; |
| } |
| base_state &= NODE_STATE_BASE; |
| } |
| |
| if (state_val != (uint16_t) NO_VAL) { |
| node_flags = node_ptr->node_state & NODE_STATE_FLAGS; |
| if (state_val == NODE_RESUME) { |
| if (IS_NODE_IDLE(node_ptr) && |
| (IS_NODE_DRAIN(node_ptr) || |
| IS_NODE_FAIL(node_ptr))) { |
| clusteracct_storage_g_node_up( |
| acct_db_conn, |
| node_ptr, |
| now); |
| } |
| node_ptr->node_state &= (~NODE_STATE_DRAIN); |
| node_ptr->node_state &= (~NODE_STATE_FAIL); |
| if (IS_NODE_DOWN(node_ptr)) { |
| state_val = NODE_STATE_IDLE; |
| #ifndef HAVE_FRONT_END |
| node_ptr->node_state |= |
| NODE_STATE_NO_RESPOND; |
| #endif |
| node_ptr->last_response = now; |
| ping_nodes_now = true; |
| } else if (IS_NODE_FUTURE(node_ptr)) { |
| if (node_ptr->port == 0) { |
| node_ptr->port =slurmctld_conf. |
| slurmd_port; |
| } |
| slurm_set_addr( &node_ptr->slurm_addr, |
| node_ptr->port, |
| node_ptr->comm_name); |
| if (node_ptr->slurm_addr.sin_port) { |
| state_val = NODE_STATE_IDLE; |
| #ifndef HAVE_FRONT_END |
| node_ptr->node_state |= |
| NODE_STATE_NO_RESPOND; |
| #endif |
| node_ptr->last_response = now; |
| ping_nodes_now = true; |
| } else { |
| error("slurm_set_addr failure " |
| "on %s", |
| node_ptr->comm_name); |
| state_val = base_state; |
| } |
| } else |
| state_val = base_state; |
| } else if (state_val == NODE_STATE_UNDRAIN) { |
| if (IS_NODE_IDLE(node_ptr) && |
| IS_NODE_DRAIN(node_ptr)) { |
| clusteracct_storage_g_node_up( |
| acct_db_conn, |
| node_ptr, |
| now); |
| } |
| node_ptr->node_state &= (~NODE_STATE_DRAIN); |
| state_val = base_state; |
| } |
| |
| if ((state_val == NODE_STATE_DOWN) || |
| (state_val == NODE_STATE_FUTURE)) { |
| /* We must set node DOWN before killing |
| * its jobs */ |
| _make_node_down(node_ptr, now); |
| kill_running_job_by_node_name (this_node_name); |
| if (state_val == NODE_STATE_FUTURE) { |
| node_ptr->node_state = NODE_STATE_FUTURE |
| | node_flags; |
| } |
| } else if (state_val == NODE_STATE_IDLE) { |
| /* assume they want to clear DRAIN and |
| * FAIL flags too */ |
| if (IS_NODE_DOWN(node_ptr)) { |
| trigger_node_up(node_ptr); |
| clusteracct_storage_g_node_up( |
| acct_db_conn, |
| node_ptr, |
| now); |
| } else if (IS_NODE_IDLE(node_ptr) && |
| (IS_NODE_DRAIN(node_ptr) || |
| IS_NODE_FAIL(node_ptr))) { |
| clusteracct_storage_g_node_up( |
| acct_db_conn, |
| node_ptr, |
| now); |
| } /* else already fully available */ |
| node_ptr->node_state &= (~NODE_STATE_DRAIN); |
| node_ptr->node_state &= (~NODE_STATE_FAIL); |
| if (!IS_NODE_NO_RESPOND(node_ptr)) |
| bit_set (avail_node_bitmap, node_inx); |
| bit_set (idle_node_bitmap, node_inx); |
| bit_set (up_node_bitmap, node_inx); |
| if (IS_NODE_POWER_SAVE(node_ptr)) |
| node_ptr->last_idle = 0; |
| else |
| node_ptr->last_idle = now; |
| } else if (state_val == NODE_STATE_ALLOCATED) { |
| if (!IS_NODE_DRAIN(node_ptr) && |
| !IS_NODE_FAIL(node_ptr) && |
| !IS_NODE_NO_RESPOND(node_ptr)) |
| bit_set(avail_node_bitmap, node_inx); |
| bit_set (up_node_bitmap, node_inx); |
| bit_clear (idle_node_bitmap, node_inx); |
| } else if ((state_val == NODE_STATE_DRAIN) || |
| (state_val == NODE_STATE_FAIL)) { |
| uint16_t new_state = state_val; |
| bit_clear (avail_node_bitmap, node_inx); |
| state_val = node_ptr->node_state |= state_val; |
| if ((node_ptr->run_job_cnt == 0) && |
| (node_ptr->comp_job_cnt == 0)) { |
| trigger_node_drained(node_ptr); |
| clusteracct_storage_g_node_down( |
| acct_db_conn, |
| node_ptr, now, NULL, |
| node_ptr->reason_uid); |
| } |
| if ((new_state == NODE_STATE_FAIL) && |
| (nonstop_ops.node_fail)) |
| (nonstop_ops.node_fail)(NULL, node_ptr); |
| } else if (state_val == NODE_STATE_POWER_SAVE) { |
| if (IS_NODE_POWER_SAVE(node_ptr)) { |
| node_ptr->last_idle = 0; |
| node_ptr->node_state &= |
| (~NODE_STATE_POWER_SAVE); |
| info("power down request repeating " |
| "for node %s", this_node_name); |
| } else { |
| if (IS_NODE_DOWN(node_ptr) && |
| IS_NODE_POWER_UP(node_ptr)) { |
| /* Abort power up request */ |
| node_ptr->node_state &= |
| (~NODE_STATE_POWER_UP); |
| #ifndef HAVE_FRONT_END |
| node_ptr->node_state |= |
| NODE_STATE_NO_RESPOND; |
| #endif |
| node_ptr->node_state = |
| NODE_STATE_IDLE | |
| (node_ptr->node_state & |
| NODE_STATE_FLAGS); |
| } |
| node_ptr->last_idle = 0; |
| info("powering down node %s", |
| this_node_name); |
| } |
| free(this_node_name); |
| continue; |
| } else if (state_val == NODE_STATE_POWER_UP) { |
| if (!IS_NODE_POWER_SAVE(node_ptr)) { |
| if (IS_NODE_POWER_UP(node_ptr)) { |
| node_ptr->last_idle = now; |
| node_ptr->node_state |= |
| NODE_STATE_POWER_SAVE; |
| info("power up request " |
| "repeating for node %s", |
| this_node_name); |
| } else { |
| verbose("node %s is already " |
| "powered up", |
| this_node_name); |
| } |
| } else { |
| node_ptr->last_idle = now; |
| info("powering up node %s", |
| this_node_name); |
| } |
| free(this_node_name); |
| continue; |
| } else if (state_val == NODE_STATE_NO_RESPOND) { |
| node_ptr->node_state |= NODE_STATE_NO_RESPOND; |
| state_val = base_state; |
| bit_clear(avail_node_bitmap, node_inx); |
| } else { |
| info ("Invalid node state specified %u", |
| state_val); |
| err_code = 1; |
| error_code = ESLURM_INVALID_NODE_STATE; |
| } |
| |
| if (err_code == 0) { |
| node_ptr->node_state = state_val | |
| (node_ptr->node_state & |
| NODE_STATE_FLAGS); |
| select_g_update_node_state(node_ptr); |
| |
| info ("update_node: node %s state set to %s", |
| this_node_name, |
| node_state_string(state_val)); |
| } |
| } |
| |
| if (!IS_NODE_DOWN(node_ptr) && |
| !IS_NODE_DRAIN(node_ptr) && !IS_NODE_FAIL(node_ptr)) { |
| /* reason information is handled in |
| clusteracct_storage_g_node_up() |
| */ |
| clusteracct_storage_g_node_up( |
| acct_db_conn, node_ptr, now); |
| } |
| |
| free (this_node_name); |
| } |
| FREE_NULL_HOSTLIST(host_list); |
| FREE_NULL_HOSTLIST(hostaddr_list); |
| FREE_NULL_HOSTLIST(hostname_list); |
| last_node_update = now; |
| |
| if ((error_code == 0) && (update_node_msg->features)) { |
| error_code = _update_node_features(update_node_msg->node_names, |
| update_node_msg->features); |
| } |
| if ((error_code == 0) && (update_node_msg->gres)) { |
| error_code = _update_node_gres(update_node_msg->node_names, |
| update_node_msg->gres); |
| } |
| |
| /* Update weight. Weight is part of config_ptr, |
| * hence split config records if required */ |
| if ((error_code == 0) && (update_node_msg->weight != NO_VAL)) { |
| error_code = _update_node_weight(update_node_msg->node_names, |
| update_node_msg->weight); |
| if (!error_code) |
| /* sort config_list by weight for scheduling */ |
| list_sort(config_list, &list_compare_config); |
| |
| } |
| |
| return error_code; |
| } |
| |
| /* variation of strcmp that accepts NULL pointers */ |
| static int _strcmp(char *str1, char *str2) |
| { |
| if (!str1 && !str2) |
| return 0; |
| if (str1 && !str2) |
| return 1; |
| if (!str1 && str2) |
| return -1; |
| return strcmp(str1, str2); |
| } |
| |
| /* |
| * restore_node_features - Make node and config (from slurm.conf) fields |
| * consistent for Features, Gres and Weight |
| * IN recover - |
| * 0, 1 - use data from config record, built using slurm.conf |
| * 2 = use data from node record, built from saved state |
| */ |
| extern void restore_node_features(int recover) |
| { |
| int i; |
| struct node_record *node_ptr; |
| |
| for (i=0, node_ptr=node_record_table_ptr; i<node_record_count; |
| i++, node_ptr++) { |
| |
| if (node_ptr->weight != node_ptr->config_ptr->weight) { |
| error("Node %s Weight(%u) differ from slurm.conf", |
| node_ptr->name, node_ptr->weight); |
| if (recover == 2) { |
| _update_node_weight(node_ptr->name, |
| node_ptr->weight); |
| } else { |
| node_ptr->weight = node_ptr->config_ptr-> |
| weight; |
| } |
| } |
| |
| if (_strcmp(node_ptr->config_ptr->feature, node_ptr->features)){ |
| error("Node %s Features(%s) differ from slurm.conf", |
| node_ptr->name, node_ptr->features); |
| if (recover == 2) { |
| _update_node_features(node_ptr->name, |
| node_ptr->features); |
| } else { |
| xfree(node_ptr->features); |
| node_ptr->features = xstrdup(node_ptr-> |
| config_ptr-> |
| feature); |
| } |
| } |
| |
| /* We lose the gres information updated manually and always |
| * use the information from slurm.conf */ |
| (void) gres_plugin_node_reconfig(node_ptr->name, |
| node_ptr->config_ptr->gres, |
| &node_ptr->gres, |
| &node_ptr->gres_list, |
| slurmctld_conf.fast_schedule); |
| gres_plugin_node_state_log(node_ptr->gres_list, node_ptr->name); |
| } |
| } |
| |
| /* Duplicate a configuration record except for the node names & bitmap */ |
| struct config_record * _dup_config(struct config_record *config_ptr) |
| { |
| struct config_record *new_config_ptr; |
| |
| new_config_ptr = create_config_record(); |
| new_config_ptr->magic = config_ptr->magic; |
| new_config_ptr->cpus = config_ptr->cpus; |
| new_config_ptr->boards = config_ptr->boards; |
| new_config_ptr->sockets = config_ptr->sockets; |
| new_config_ptr->cores = config_ptr->cores; |
| new_config_ptr->threads = config_ptr->threads; |
| new_config_ptr->real_memory = config_ptr->real_memory; |
| new_config_ptr->tmp_disk = config_ptr->tmp_disk; |
| new_config_ptr->weight = config_ptr->weight; |
| new_config_ptr->feature = xstrdup(config_ptr->feature); |
| new_config_ptr->gres = xstrdup(config_ptr->gres); |
| |
| return new_config_ptr; |
| } |
| |
| /* |
| * _update_node_weight - Update weight associated with nodes |
| * build new config list records as needed |
| * IN node_names - List of nodes to update |
| * IN weight - New weight value |
| * RET: SLURM_SUCCESS or error code |
| */ |
| static int _update_node_weight(char *node_names, uint32_t weight) |
| { |
| bitstr_t *node_bitmap = NULL, *tmp_bitmap; |
| ListIterator config_iterator; |
| struct config_record *config_ptr, *new_config_ptr; |
| struct config_record *first_new = NULL; |
| int rc, config_cnt, tmp_cnt; |
| |
| rc = node_name2bitmap(node_names, false, &node_bitmap); |
| if (rc) { |
| info("_update_node_weight: invalid node_name"); |
| return rc; |
| } |
| |
| /* For each config_record with one of these nodes, |
| * update it (if all nodes updated) or split it into |
| * a new entry */ |
| config_iterator = list_iterator_create(config_list); |
| while ((config_ptr = (struct config_record *) |
| list_next(config_iterator))) { |
| if (config_ptr == first_new) |
| break; /* done with all original records */ |
| |
| tmp_bitmap = bit_copy(node_bitmap); |
| bit_and(tmp_bitmap, config_ptr->node_bitmap); |
| config_cnt = bit_set_count(config_ptr->node_bitmap); |
| tmp_cnt = bit_set_count(tmp_bitmap); |
| if (tmp_cnt == 0) { |
| /* no overlap, leave alone */ |
| } else if (tmp_cnt == config_cnt) { |
| /* all nodes changed, update in situ */ |
| config_ptr->weight = weight; |
| } else { |
| /* partial update, split config_record */ |
| new_config_ptr = _dup_config(config_ptr); |
| if (first_new == NULL); |
| first_new = new_config_ptr; |
| /* Change weight for the given node */ |
| new_config_ptr->weight = weight; |
| new_config_ptr->node_bitmap = bit_copy(tmp_bitmap); |
| new_config_ptr->nodes = bitmap2node_name(tmp_bitmap); |
| |
| build_config_feature_list(new_config_ptr); |
| _update_config_ptr(tmp_bitmap, new_config_ptr); |
| |
| /* Update remaining records */ |
| bit_not(tmp_bitmap); |
| bit_and(config_ptr->node_bitmap, tmp_bitmap); |
| xfree(config_ptr->nodes); |
| config_ptr->nodes = bitmap2node_name( |
| config_ptr->node_bitmap); |
| } |
| FREE_NULL_BITMAP(tmp_bitmap); |
| } |
| list_iterator_destroy(config_iterator); |
| FREE_NULL_BITMAP(node_bitmap); |
| |
| info("_update_node_weight: nodes %s weight set to: %u", |
| node_names, weight); |
| return SLURM_SUCCESS; |
| } |
| |
| /* |
| * _update_node_features - Update features associated with nodes |
| * build new config list records as needed |
| * IN node_names - List of nodes to update |
| * IN features - New features value |
| * RET: SLURM_SUCCESS or error code |
| */ |
| static int _update_node_features(char *node_names, char *features) |
| { |
| bitstr_t *node_bitmap = NULL, *tmp_bitmap; |
| ListIterator config_iterator; |
| struct config_record *config_ptr, *new_config_ptr; |
| struct config_record *first_new = NULL; |
| int rc, config_cnt, tmp_cnt; |
| |
| rc = node_name2bitmap(node_names, false, &node_bitmap); |
| if (rc) { |
| info("_update_node_features: invalid node_name"); |
| return rc; |
| } |
| |
| /* For each config_record with one of these nodes, |
| * update it (if all nodes updated) or split it into |
| * a new entry */ |
| config_iterator = list_iterator_create(config_list); |
| while ((config_ptr = (struct config_record *) |
| list_next(config_iterator))) { |
| if (config_ptr == first_new) |
| break; /* done with all original records */ |
| |
| tmp_bitmap = bit_copy(node_bitmap); |
| bit_and(tmp_bitmap, config_ptr->node_bitmap); |
| config_cnt = bit_set_count(config_ptr->node_bitmap); |
| tmp_cnt = bit_set_count(tmp_bitmap); |
| if (tmp_cnt == 0) { |
| /* no overlap, leave alone */ |
| } else if (tmp_cnt == config_cnt) { |
| /* all nodes changed, update in situ */ |
| xfree(config_ptr->feature); |
| if (features && features[0]) |
| config_ptr->feature = xstrdup(features); |
| build_config_feature_list(config_ptr); |
| } else { |
| /* partial update, split config_record */ |
| new_config_ptr = _dup_config(config_ptr); |
| if (first_new == NULL); |
| first_new = new_config_ptr; |
| xfree(new_config_ptr->feature); |
| if (features && features[0]) |
| new_config_ptr->feature = xstrdup(features); |
| new_config_ptr->node_bitmap = bit_copy(tmp_bitmap); |
| new_config_ptr->nodes = bitmap2node_name(tmp_bitmap); |
| |
| build_config_feature_list(new_config_ptr); |
| _update_config_ptr(tmp_bitmap, new_config_ptr); |
| |
| /* Update remaining records */ |
| bit_not(tmp_bitmap); |
| bit_and(config_ptr->node_bitmap, tmp_bitmap); |
| xfree(config_ptr->nodes); |
| config_ptr->nodes = bitmap2node_name(config_ptr-> |
| node_bitmap); |
| } |
| FREE_NULL_BITMAP(tmp_bitmap); |
| } |
| list_iterator_destroy(config_iterator); |
| FREE_NULL_BITMAP(node_bitmap); |
| |
| info("_update_node_features: nodes %s features set to: %s", |
| node_names, features); |
| return SLURM_SUCCESS; |
| } |
| |
| /* |
| * _update_node_gres - Update generic resources associated with nodes |
| * build new config list records as needed |
| * IN node_names - List of nodes to update |
| * IN gres - New gres value |
| * RET: SLURM_SUCCESS or error code |
| */ |
| static int _update_node_gres(char *node_names, char *gres) |
| { |
| bitstr_t *node_bitmap = NULL, *tmp_bitmap; |
| ListIterator config_iterator; |
| struct config_record *config_ptr, *new_config_ptr; |
| struct config_record *first_new = NULL; |
| struct node_record *node_ptr; |
| int rc, config_cnt, tmp_cnt; |
| int i, i_first, i_last; |
| |
| rc = node_name2bitmap(node_names, false, &node_bitmap); |
| if (rc) { |
| info("_update_node_gres: invalid node_name"); |
| return rc; |
| } |
| |
| /* For each config_record with one of these nodes, |
| * update it (if all nodes updated) or split it into |
| * a new entry */ |
| config_iterator = list_iterator_create(config_list); |
| while ((config_ptr = (struct config_record *) |
| list_next(config_iterator))) { |
| if (config_ptr == first_new) |
| break; /* done with all original records */ |
| |
| tmp_bitmap = bit_copy(node_bitmap); |
| bit_and(tmp_bitmap, config_ptr->node_bitmap); |
| config_cnt = bit_set_count(config_ptr->node_bitmap); |
| tmp_cnt = bit_set_count(tmp_bitmap); |
| if (tmp_cnt == 0) { |
| /* no overlap, leave alone */ |
| } else if (tmp_cnt == config_cnt) { |
| /* all nodes changed, update in situ */ |
| xfree(config_ptr->gres); |
| if (gres && gres[0]) |
| config_ptr->gres = xstrdup(gres); |
| } else { |
| /* partial update, split config_record */ |
| new_config_ptr = _dup_config(config_ptr); |
| if (first_new == NULL); |
| first_new = new_config_ptr; |
| xfree(new_config_ptr->gres); |
| if (gres && gres[0]) |
| new_config_ptr->gres = xstrdup(gres); |
| new_config_ptr->node_bitmap = bit_copy(tmp_bitmap); |
| new_config_ptr->nodes = bitmap2node_name(tmp_bitmap); |
| |
| _update_config_ptr(tmp_bitmap, new_config_ptr); |
| |
| /* Update remaining records */ |
| bit_not(tmp_bitmap); |
| bit_and(config_ptr->node_bitmap, tmp_bitmap); |
| xfree(config_ptr->nodes); |
| config_ptr->nodes = bitmap2node_name(config_ptr-> |
| node_bitmap); |
| } |
| FREE_NULL_BITMAP(tmp_bitmap); |
| } |
| list_iterator_destroy(config_iterator); |
| |
| i_first = bit_ffs(node_bitmap); |
| i_last = bit_fls(node_bitmap); |
| for (i=i_first; i<=i_last; i++) { |
| node_ptr = node_record_table_ptr + i; |
| (void) gres_plugin_node_reconfig(node_ptr->name, |
| node_ptr->config_ptr->gres, |
| &node_ptr->gres, |
| &node_ptr->gres_list, |
| slurmctld_conf.fast_schedule); |
| gres_plugin_node_state_log(node_ptr->gres_list, node_ptr->name); |
| } |
| FREE_NULL_BITMAP(node_bitmap); |
| |
| info("_update_node_gres: nodes %s gres set to: %s", node_names, gres); |
| return SLURM_SUCCESS; |
| } |
| |
| /* Reset the config pointer for updated jobs */ |
| static void _update_config_ptr(bitstr_t *bitmap, |
| struct config_record *config_ptr) |
| { |
| int i; |
| |
| for (i=0; i<node_record_count; i++) { |
| if (bit_test(bitmap, i) == 0) |
| continue; |
| node_record_table_ptr[i].config_ptr = config_ptr; |
| } |
| } |
| |
| /* |
| * drain_nodes - drain one or more nodes, |
| * no-op for nodes already drained or draining |
| * IN nodes - nodes to drain |
| * IN reason - reason to drain the nodes |
| * RET SLURM_SUCCESS or error code |
| * global: node_record_table_ptr - pointer to global node table |
| */ |
| extern int drain_nodes ( char *nodes, char *reason, uint32_t reason_uid ) |
| { |
| int error_code = 0, node_inx; |
| struct node_record *node_ptr; |
| char *this_node_name ; |
| hostlist_t host_list; |
| time_t now = time(NULL); |
| |
| if ((nodes == NULL) || (nodes[0] == '\0')) { |
| error ("drain_nodes: invalid node name %s", nodes); |
| return ESLURM_INVALID_NODE_NAME; |
| } |
| |
| #ifdef HAVE_ALPS_CRAY |
| error("We cannot drain nodes on a Cray/ALPS system, " |
| "use native Cray tools such as xtprocadmin(8)."); |
| return SLURM_SUCCESS; |
| #endif |
| |
| if ( (host_list = hostlist_create (nodes)) == NULL) { |
| error ("hostlist_create error on %s: %m", nodes); |
| return ESLURM_INVALID_NODE_NAME; |
| } |
| |
| while ( (this_node_name = hostlist_shift (host_list)) ) { |
| node_ptr = find_node_record (this_node_name); |
| node_inx = node_ptr - node_record_table_ptr; |
| if (node_ptr == NULL) { |
| error ("drain_nodes: node %s does not exist", |
| this_node_name); |
| error_code = ESLURM_INVALID_NODE_NAME; |
| free (this_node_name); |
| break; |
| } |
| |
| if (IS_NODE_DRAIN(node_ptr)) { |
| /* state already changed, nothing to do */ |
| free (this_node_name); |
| continue; |
| } |
| |
| node_ptr->node_state |= NODE_STATE_DRAIN; |
| bit_clear (avail_node_bitmap, node_inx); |
| info ("drain_nodes: node %s state set to DRAIN", |
| this_node_name); |
| if ((node_ptr->reason == NULL) || |
| (strncmp(node_ptr->reason, "Not responding", 14) == 0)) { |
| xfree(node_ptr->reason); |
| node_ptr->reason = xstrdup(reason); |
| node_ptr->reason_time = now; |
| node_ptr->reason_uid = reason_uid; |
| } |
| if ((node_ptr->run_job_cnt == 0) && |
| (node_ptr->comp_job_cnt == 0)) { |
| /* no jobs, node is drained */ |
| trigger_node_drained(node_ptr); |
| clusteracct_storage_g_node_down(acct_db_conn, |
| node_ptr, now, NULL, |
| reason_uid); |
| } |
| |
| select_g_update_node_state(node_ptr); |
| |
| free (this_node_name); |
| } |
| last_node_update = time (NULL); |
| |
| hostlist_destroy (host_list); |
| return error_code; |
| } |
| /* Return true if admin request to change node state from old to new is valid */ |
| static bool _valid_node_state_change(uint16_t old, uint16_t new) |
| { |
| uint16_t base_state, node_flags; |
| |
| if (old == new) |
| return true; |
| |
| base_state = old & NODE_STATE_BASE; |
| node_flags = old & NODE_STATE_FLAGS; |
| switch (new) { |
| case NODE_STATE_DOWN: |
| case NODE_STATE_DRAIN: |
| case NODE_STATE_FAIL: |
| case NODE_STATE_NO_RESPOND: |
| case NODE_STATE_POWER_SAVE: |
| case NODE_STATE_POWER_UP: |
| case NODE_STATE_UNDRAIN: |
| return true; |
| |
| case NODE_RESUME: |
| if ((base_state == NODE_STATE_DOWN) || |
| (base_state == NODE_STATE_FUTURE) || |
| (node_flags & NODE_STATE_DRAIN) || |
| (node_flags & NODE_STATE_FAIL)) |
| return true; |
| break; |
| |
| case NODE_STATE_FUTURE: |
| if ((base_state == NODE_STATE_DOWN) || |
| (base_state == NODE_STATE_IDLE)) |
| return true; |
| break; |
| |
| case NODE_STATE_IDLE: |
| if ((base_state == NODE_STATE_DOWN) || |
| (base_state == NODE_STATE_IDLE)) |
| return true; |
| break; |
| |
| case NODE_STATE_ALLOCATED: |
| if (base_state == NODE_STATE_ALLOCATED) |
| return true; |
| break; |
| |
| default: /* All others invalid */ |
| break; |
| } |
| |
| return false; |
| } |
| |
| extern int update_node_record_acct_gather_data( |
| acct_gather_node_resp_msg_t *msg) |
| { |
| struct node_record *node_ptr; |
| |
| node_ptr = find_node_record(msg->node_name); |
| if (node_ptr == NULL) |
| return ENOENT; |
| |
| memcpy(node_ptr->energy, msg->energy, sizeof(acct_gather_energy_t)); |
| |
| return SLURM_SUCCESS; |
| } |
| |
| /* |
| * validate_node_specs - validate the node's specifications as valid, |
| * if not set state to down, in any case update last_response |
| * IN reg_msg - node registration message |
| * IN protocol_version - Version of Slurm on this node |
| * OUT newly_up - set if node newly brought into service |
| * RET 0 if no error, ENOENT if no such node, EINVAL if values too low |
| * NOTE: READ lock_slurmctld config before entry |
| */ |
| extern int validate_node_specs(slurm_node_registration_status_msg_t *reg_msg, |
| uint16_t protocol_version, bool *newly_up) |
| { |
| int error_code, i, node_inx; |
| struct config_record *config_ptr; |
| struct node_record *node_ptr; |
| char *reason_down = NULL; |
| uint16_t node_flags; |
| time_t now = time(NULL); |
| bool gang_flag = false; |
| bool orig_node_avail; |
| static uint32_t cr_flag = NO_VAL; |
| |
| node_ptr = find_node_record (reg_msg->node_name); |
| if (node_ptr == NULL) |
| return ENOENT; |
| node_inx = node_ptr - node_record_table_ptr; |
| orig_node_avail = bit_test(avail_node_bitmap, node_inx); |
| |
| config_ptr = node_ptr->config_ptr; |
| error_code = SLURM_SUCCESS; |
| |
| node_ptr->protocol_version = protocol_version; |
| xfree(node_ptr->version); |
| node_ptr->version = reg_msg->version; |
| reg_msg->version = NULL; |
| |
| if (cr_flag == NO_VAL) { |
| cr_flag = 0; /* call is no-op for select/linear and bluegene */ |
| if (select_g_get_info_from_plugin(SELECT_CR_PLUGIN, |
| NULL, &cr_flag)) { |
| cr_flag = NO_VAL; /* error */ |
| } |
| } |
| if (slurm_get_preempt_mode() != PREEMPT_MODE_OFF) |
| gang_flag = true; |
| |
| if (gres_plugin_node_config_unpack(reg_msg->gres_info, |
| node_ptr->name) != SLURM_SUCCESS) { |
| error_code = SLURM_ERROR; |
| xstrcat(reason_down, "Could not unpack gres data"); |
| } else if (gres_plugin_node_config_validate( |
| node_ptr->name, config_ptr->gres, |
| &node_ptr->gres, &node_ptr->gres_list, |
| slurmctld_conf.fast_schedule, &reason_down) |
| != SLURM_SUCCESS) { |
| error_code = EINVAL; |
| /* reason_down set in function above */ |
| } |
| gres_plugin_node_state_log(node_ptr->gres_list, node_ptr->name); |
| |
| if (slurmctld_conf.fast_schedule != 2) { |
| int sockets1, sockets2; /* total sockets on node */ |
| int cores1, cores2; /* total cores on node */ |
| int threads1, threads2; /* total threads on node */ |
| |
| sockets1 = reg_msg->sockets; |
| cores1 = sockets1 * reg_msg->cores; |
| threads1 = cores1 * reg_msg->threads; |
| sockets2 = config_ptr->sockets; |
| cores2 = sockets2 * config_ptr->cores; |
| threads2 = cores2 * config_ptr->threads; |
| |
| if (threads1 < threads2) { |
| error("Node %s has low socket*core*thread count " |
| "(%d < %d)", |
| reg_msg->node_name, threads1, threads2); |
| error_code = EINVAL; |
| if (reason_down) |
| xstrcat(reason_down, ", "); |
| xstrcat(reason_down, "Low socket*core*thread count"); |
| } else if ((slurmctld_conf.fast_schedule == 0) && |
| ((cr_flag == 1) || gang_flag) && (cores1 < cores2)) { |
| error("Node %s has low socket*core count (%d < %d)", |
| reg_msg->node_name, cores1, cores2); |
| error_code = EINVAL; |
| if (reason_down) |
| xstrcat(reason_down, ", "); |
| xstrcat(reason_down, "Low socket*core count"); |
| } else if ((slurmctld_conf.fast_schedule == 0) && |
| ((cr_flag == 1) || gang_flag) && |
| ((sockets1 > sockets2) || (cores1 > cores2) || |
| (threads1 > threads2))) { |
| error("Node %s has high socket,core,thread count " |
| "(%d,%d,%d > %d,%d,%d), extra resources ignored", |
| reg_msg->node_name, sockets1, cores1, threads1, |
| sockets2, cores2, threads2); |
| /* Preserve configured values */ |
| reg_msg->sockets = config_ptr->sockets; |
| reg_msg->cores = config_ptr->cores; |
| reg_msg->threads = config_ptr->threads; |
| } |
| |
| if (reg_msg->cpus < config_ptr->cpus) { |
| error("Node %s has low cpu count (%u < %u)", |
| reg_msg->node_name, reg_msg->cpus, |
| config_ptr->cpus); |
| error_code = EINVAL; |
| if (reason_down) |
| xstrcat(reason_down, ", "); |
| xstrcat(reason_down, "Low CPUs"); |
| } else if ((slurmctld_conf.fast_schedule == 0) && |
| ((cr_flag == 1) || gang_flag) && |
| (reg_msg->cpus > config_ptr->cpus)) { |
| error("Node %s has high CPU count (%u > %u), " |
| "extra resources ignored", |
| reg_msg->node_name, reg_msg->cpus, |
| config_ptr->cpus); |
| reg_msg->cpus = config_ptr->cpus; |
| } |
| } |
| |
| /* reset partition and node config (in that order) */ |
| if ((node_ptr->cpus != reg_msg->cpus) && |
| (slurmctld_conf.fast_schedule == 0)) { |
| for (i=0; i<node_ptr->part_cnt; i++) { |
| node_ptr->part_pptr[i]->total_cpus += |
| (reg_msg->cpus - node_ptr->cpus); |
| } |
| } |
| if (error_code == SLURM_SUCCESS) { |
| node_ptr->boards = reg_msg->boards; |
| node_ptr->sockets = reg_msg->sockets; |
| node_ptr->cores = reg_msg->cores; |
| node_ptr->threads = reg_msg->threads; |
| node_ptr->cpus = reg_msg->cpus; |
| } |
| |
| if (reg_msg->real_memory < config_ptr->real_memory) { |
| if (slurmctld_conf.fast_schedule == 0) { |
| debug("Node %s has low real_memory size (%u < %u)", |
| reg_msg->node_name, reg_msg->real_memory, |
| config_ptr->real_memory); |
| } else if (slurmctld_conf.fast_schedule == 1) { |
| error("Node %s has low real_memory size (%u < %u)", |
| reg_msg->node_name, reg_msg->real_memory, |
| config_ptr->real_memory); |
| error_code = EINVAL; |
| if (reason_down) |
| xstrcat(reason_down, ", "); |
| xstrcat(reason_down, "Low RealMemory"); |
| } |
| } |
| node_ptr->real_memory = reg_msg->real_memory; |
| |
| if (reg_msg->tmp_disk < config_ptr->tmp_disk) { |
| if (slurmctld_conf.fast_schedule == 0) { |
| debug("Node %s has low tmp_disk size (%u < %u)", |
| reg_msg->node_name, reg_msg->tmp_disk, |
| config_ptr->tmp_disk); |
| } else if (slurmctld_conf.fast_schedule == 1) { |
| error("Node %s has low tmp_disk size (%u < %u)", |
| reg_msg->node_name, reg_msg->tmp_disk, |
| config_ptr->tmp_disk); |
| error_code = EINVAL; |
| if (reason_down) |
| xstrcat(reason_down, ", "); |
| xstrcat(reason_down, "Low TmpDisk"); |
| } |
| } |
| node_ptr->tmp_disk = reg_msg->tmp_disk; |
| |
| xfree(node_ptr->arch); |
| node_ptr->arch = reg_msg->arch; |
| reg_msg->arch = NULL; /* Nothing left to free */ |
| |
| xfree(node_ptr->os); |
| node_ptr->os = reg_msg->os; |
| reg_msg->os = NULL; /* Nothing left to free */ |
| |
| if (node_ptr->cpu_load != reg_msg->cpu_load) { |
| node_ptr->cpu_load = reg_msg->cpu_load; |
| node_ptr->cpu_load_time = now; |
| last_node_update = now; |
| } |
| |
| if (IS_NODE_NO_RESPOND(node_ptr)) { |
| node_ptr->node_state &= (~NODE_STATE_NO_RESPOND); |
| node_ptr->node_state &= (~NODE_STATE_POWER_UP); |
| last_node_update = time (NULL); |
| } |
| |
| node_flags = node_ptr->node_state & NODE_STATE_FLAGS; |
| |
| if (error_code) { |
| if (!IS_NODE_DOWN(node_ptr) |
| && !IS_NODE_DRAIN(node_ptr) |
| && ! IS_NODE_FAIL(node_ptr)) { |
| error ("Setting node %s state to DRAIN", |
| reg_msg->node_name); |
| drain_nodes(reg_msg->node_name, |
| reason_down, |
| slurmctld_conf.slurm_user_id); |
| } |
| last_node_update = time (NULL); |
| } else if (reg_msg->status == ESLURMD_PROLOG_FAILED) { |
| if (!IS_NODE_DRAIN(node_ptr) && !IS_NODE_FAIL(node_ptr)) { |
| error("Prolog failure on node %s, draining the node", |
| reg_msg->node_name); |
| drain_nodes(reg_msg->node_name, "Prolog error", |
| slurm_get_slurm_user_id()); |
| last_node_update = time (NULL); |
| } |
| } else { |
| if (IS_NODE_UNKNOWN(node_ptr) || IS_NODE_FUTURE(node_ptr)) { |
| bool unknown = 0; |
| |
| if (IS_NODE_UNKNOWN(node_ptr)) |
| unknown = 1; |
| |
| debug("validate_node_specs: node %s registered with " |
| "%u jobs", |
| reg_msg->node_name,reg_msg->job_count); |
| if (IS_NODE_FUTURE(node_ptr) && |
| IS_NODE_MAINT(node_ptr) && |
| !is_node_in_maint_reservation(node_inx)) |
| node_flags &= (~NODE_STATE_MAINT); |
| if (reg_msg->job_count) { |
| node_ptr->node_state = NODE_STATE_ALLOCATED | |
| node_flags; |
| } else { |
| node_ptr->node_state = NODE_STATE_IDLE | |
| node_flags; |
| node_ptr->last_idle = now; |
| } |
| last_node_update = now; |
| |
| /* don't send this on a slurmctld unless needed */ |
| if (unknown && slurmctld_init_db |
| && !IS_NODE_DRAIN(node_ptr) |
| && !IS_NODE_FAIL(node_ptr)) { |
| /* reason information is handled in |
| clusteracct_storage_g_node_up() |
| */ |
| clusteracct_storage_g_node_up( |
| acct_db_conn, node_ptr, now); |
| } |
| } else if (IS_NODE_DOWN(node_ptr) && |
| ((slurmctld_conf.ret2service == 2) || |
| !xstrcmp(node_ptr->reason, "Scheduled reboot") || |
| ((slurmctld_conf.ret2service == 1) && |
| !xstrcmp(node_ptr->reason, "Not responding")))) { |
| if (reg_msg->job_count) { |
| node_ptr->node_state = NODE_STATE_ALLOCATED | |
| node_flags; |
| } else { |
| node_ptr->node_state = NODE_STATE_IDLE | |
| node_flags; |
| node_ptr->last_idle = now; |
| } |
| info("node %s returned to service", |
| reg_msg->node_name); |
| trigger_node_up(node_ptr); |
| last_node_update = now; |
| if (!IS_NODE_DRAIN(node_ptr) |
| && !IS_NODE_FAIL(node_ptr)) { |
| /* reason information is handled in |
| clusteracct_storage_g_node_up() |
| */ |
| clusteracct_storage_g_node_up( |
| acct_db_conn, node_ptr, now); |
| } |
| } else if (node_ptr->last_response |
| && (node_ptr->boot_time > node_ptr->last_response) |
| && (slurmctld_conf.ret2service != 2)) { |
| if (!node_ptr->reason) { |
| node_ptr->reason_time = now; |
| node_ptr->reason_uid = |
| slurm_get_slurm_user_id(); |
| node_ptr->reason = xstrdup( |
| "Node unexpectedly rebooted"); |
| } |
| info("Node %s unexpectedly rebooted", |
| reg_msg->node_name); |
| _make_node_down(node_ptr, now); |
| kill_running_job_by_node_name(reg_msg->node_name); |
| last_node_update = now; |
| reg_msg->job_count = 0; |
| } else if (IS_NODE_ALLOCATED(node_ptr) && |
| (reg_msg->job_count == 0)) { /* job vanished */ |
| node_ptr->node_state = NODE_STATE_IDLE | node_flags; |
| node_ptr->last_idle = now; |
| last_node_update = now; |
| } else if (IS_NODE_COMPLETING(node_ptr) && |
| (reg_msg->job_count == 0)) { /* job already done */ |
| node_ptr->node_state &= (~NODE_STATE_COMPLETING); |
| last_node_update = now; |
| bit_clear(cg_node_bitmap, node_inx); |
| } else if (IS_NODE_IDLE(node_ptr) && |
| (reg_msg->job_count != 0)) { |
| if (node_ptr->run_job_cnt != 0) { |
| node_ptr->node_state = NODE_STATE_ALLOCATED | |
| node_flags; |
| error("Invalid state for node %s, was IDLE " |
| "with %u running jobs", |
| node_ptr->name, reg_msg->job_count); |
| } |
| /* |
| * there must be completing job(s) on this node since |
| * reg_msg->job_count was set (run_job_cnt + |
| * comp_job_cnt) in validate_jobs_on_node() |
| */ |
| if (node_ptr->comp_job_cnt != 0) { |
| node_ptr->node_state |= NODE_STATE_COMPLETING; |
| bit_set(cg_node_bitmap, node_inx); |
| } |
| last_node_update = now; |
| } |
| |
| select_g_update_node_config(node_inx); |
| select_g_update_node_state(node_ptr); |
| _sync_bitmaps(node_ptr, reg_msg->job_count); |
| } |
| |
| xfree(reason_down); |
| if (reg_msg->energy) |
| memcpy(node_ptr->energy, reg_msg->energy, |
| sizeof(acct_gather_energy_t)); |
| |
| node_ptr->last_response = now; |
| |
| *newly_up = (!orig_node_avail && bit_test(avail_node_bitmap, node_inx)); |
| |
| return error_code; |
| } |
| |
| static front_end_record_t * _front_end_reg( |
| slurm_node_registration_status_msg_t *reg_msg) |
| { |
| front_end_record_t *front_end_ptr; |
| uint16_t state_base, state_flags; |
| time_t now = time(NULL); |
| |
| debug2("name:%s boot_time:%u up_time:%u", |
| reg_msg->node_name, (unsigned int) reg_msg->slurmd_start_time, |
| reg_msg->up_time); |
| |
| front_end_ptr = find_front_end_record(reg_msg->node_name); |
| if (front_end_ptr == NULL) { |
| error("Registration message from unknown node %s", |
| reg_msg->node_name); |
| return NULL; |
| } |
| |
| front_end_ptr->boot_time = now - reg_msg->up_time; |
| if (front_end_ptr->last_response && |
| (front_end_ptr->boot_time > front_end_ptr->last_response)) { |
| info("front end %s unexpectedly rebooted, " |
| "killing all previously running jobs running on it.", |
| reg_msg->node_name); |
| (void) kill_job_by_front_end_name(front_end_ptr->name); |
| reg_msg->job_count = 0; |
| } |
| |
| front_end_ptr->last_response = now; |
| front_end_ptr->slurmd_start_time = reg_msg->slurmd_start_time; |
| state_base = front_end_ptr->node_state & JOB_STATE_BASE; |
| state_flags = front_end_ptr->node_state & JOB_STATE_FLAGS; |
| if ((state_base == NODE_STATE_DOWN) && (front_end_ptr->reason) && |
| (!strncmp(front_end_ptr->reason, "Not responding", 14))) { |
| error("front end node %s returned to service", |
| reg_msg->node_name); |
| state_base = NODE_STATE_IDLE; |
| xfree(front_end_ptr->reason); |
| front_end_ptr->reason_time = (time_t) 0; |
| front_end_ptr->reason_uid = 0; |
| } |
| if (state_base == NODE_STATE_UNKNOWN) |
| state_base = NODE_STATE_IDLE; |
| |
| state_flags &= (~NODE_STATE_NO_RESPOND); |
| |
| front_end_ptr->node_state = state_base | state_flags; |
| last_front_end_update = now; |
| return front_end_ptr; |
| } |
| |
| /* |
| * validate_nodes_via_front_end - validate all nodes on a cluster as having |
| * a valid configuration as soon as the front-end registers. Individual |
| * nodes will not register with this configuration |
| * IN reg_msg - node registration message |
| * IN protocol_version - Version of Slurm on this node |
| * OUT newly_up - set if node newly brought into service |
| * RET 0 if no error, SLURM error code otherwise |
| * NOTE: READ lock_slurmctld config before entry |
| */ |
| extern int validate_nodes_via_front_end( |
| slurm_node_registration_status_msg_t *reg_msg, |
| uint16_t protocol_version, bool *newly_up) |
| { |
| int error_code = 0, i, j, rc; |
| bool update_node_state = false; |
| struct job_record *job_ptr; |
| struct config_record *config_ptr; |
| struct node_record *node_ptr; |
| time_t now = time(NULL); |
| ListIterator job_iterator; |
| hostlist_t reg_hostlist = NULL; |
| char *host_str = NULL, *reason_down = NULL; |
| uint16_t node_flags; |
| front_end_record_t *front_end_ptr; |
| |
| if (reg_msg->up_time > now) { |
| error("Node up_time on %s is invalid: %u>%u", |
| reg_msg->node_name, reg_msg->up_time, (uint32_t) now); |
| reg_msg->up_time = 0; |
| } |
| |
| front_end_ptr = _front_end_reg(reg_msg); |
| if (front_end_ptr == NULL) |
| return ESLURM_INVALID_NODE_NAME; |
| |
| front_end_ptr->protocol_version = protocol_version; |
| xfree(front_end_ptr->version); |
| front_end_ptr->version = reg_msg->version; |
| reg_msg->version = NULL; |
| *newly_up = false; |
| |
| if (reg_msg->status == ESLURMD_PROLOG_FAILED) { |
| error("Prolog failed on node %s", reg_msg->node_name); |
| /* Do NOT set the node DOWN here. Unlike non-front-end systems, |
| * this failure is likely due to some problem in the underlying |
| * infrastructure (e.g. the block failed to boot). */ |
| /* set_front_end_down(front_end_ptr, "Prolog failed"); */ |
| } |
| |
| /* First validate the job info */ |
| for (i = 0; i < reg_msg->job_count; i++) { |
| if ( (reg_msg->job_id[i] >= MIN_NOALLOC_JOBID) && |
| (reg_msg->job_id[i] <= MAX_NOALLOC_JOBID) ) { |
| info("NoAllocate job %u.%u reported", |
| reg_msg->job_id[i], reg_msg->step_id[i]); |
| continue; |
| } |
| |
| job_ptr = find_job_record(reg_msg->job_id[i]); |
| node_ptr = node_record_table_ptr; |
| if (job_ptr && job_ptr->node_bitmap && |
| ((j = bit_ffs(job_ptr->node_bitmap)) >= 0)) |
| node_ptr += j; |
| |
| if (job_ptr == NULL) { |
| error("Orphan job %u.%u reported on %s", |
| reg_msg->job_id[i], reg_msg->step_id[i], |
| front_end_ptr->name); |
| abort_job_on_node(reg_msg->job_id[i], |
| job_ptr, front_end_ptr->name); |
| continue; |
| } else if (job_ptr->batch_host == NULL) { |
| error("Resetting NULL batch_host of job %u to %s", |
| reg_msg->job_id[i], front_end_ptr->name); |
| job_ptr->batch_host = xstrdup(front_end_ptr->name); |
| } |
| |
| |
| if (IS_JOB_RUNNING(job_ptr) || IS_JOB_SUSPENDED(job_ptr)) { |
| debug3("Registered job %u.%u on %s", |
| reg_msg->job_id[i], reg_msg->step_id[i], |
| front_end_ptr->name); |
| if (job_ptr->batch_flag) { |
| /* NOTE: Used for purging defunct batch jobs */ |
| job_ptr->time_last_active = now; |
| } |
| } |
| |
| else if (IS_JOB_COMPLETING(job_ptr)) { |
| /* Re-send kill request as needed, |
| * not necessarily an error */ |
| kill_job_on_node(reg_msg->job_id[i], job_ptr, |
| node_ptr); |
| } |
| |
| else if (IS_JOB_PENDING(job_ptr)) { |
| /* Typically indicates a job requeue and the hung |
| * slurmd that went DOWN is now responding */ |
| error("Registered PENDING job %u.%u on %s", |
| reg_msg->job_id[i], reg_msg->step_id[i], |
| front_end_ptr->name); |
| abort_job_on_node(reg_msg->job_id[i], job_ptr, |
| front_end_ptr->name); |
| } |
| |
| else if (difftime(now, job_ptr->end_time) < |
| slurm_get_msg_timeout()) { /* Race condition */ |
| debug("Registered newly completed job %u.%u on %s", |
| reg_msg->job_id[i], reg_msg->step_id[i], |
| front_end_ptr->name); |
| } |
| |
| else { /* else job is supposed to be done */ |
| error("Registered job %u.%u in state %s on %s", |
| reg_msg->job_id[i], reg_msg->step_id[i], |
| job_state_string(job_ptr->job_state), |
| front_end_ptr->name); |
| kill_job_on_node(reg_msg->job_id[i], job_ptr, |
| node_ptr); |
| } |
| } |
| |
| |
| /* purge orphan batch jobs */ |
| job_iterator = list_iterator_create(job_list); |
| while ((job_ptr = (struct job_record *) list_next(job_iterator))) { |
| if (!IS_JOB_RUNNING(job_ptr) || |
| IS_JOB_CONFIGURING(job_ptr) || |
| (job_ptr->batch_flag == 0)) |
| continue; |
| if (job_ptr->front_end_ptr != front_end_ptr) |
| continue; |
| #ifdef HAVE_BG |
| /* slurmd does not report job presence until after prolog |
| * completes which waits for bgblock boot to complete. |
| * This can take several minutes on BlueGene. */ |
| if (difftime(now, job_ptr->time_last_active) <= |
| (BG_FREE_PREVIOUS_BLOCK + BG_MIN_BLOCK_BOOT + |
| BG_INCR_BLOCK_BOOT * job_ptr->node_cnt)) |
| continue; |
| #else |
| if (difftime(now, job_ptr->time_last_active) <= 5) |
| continue; |
| #endif |
| info("Killing orphan batch job %u", job_ptr->job_id); |
| job_complete(job_ptr->job_id, 0, false, false, 0); |
| } |
| list_iterator_destroy(job_iterator); |
| |
| (void) gres_plugin_node_config_unpack(reg_msg->gres_info, |
| node_record_table_ptr->name); |
| for (i = 0, node_ptr = node_record_table_ptr; i < node_record_count; |
| i++, node_ptr++) { |
| config_ptr = node_ptr->config_ptr; |
| node_ptr->last_response = now; |
| |
| rc = gres_plugin_node_config_validate(node_ptr->name, |
| config_ptr->gres, |
| &node_ptr->gres, |
| &node_ptr->gres_list, |
| slurmctld_conf. |
| fast_schedule, |
| &reason_down); |
| if (rc) { |
| if (!IS_NODE_DOWN(node_ptr)) { |
| error("Setting node %s state to DOWN", |
| node_ptr->name); |
| } |
| set_node_down(node_ptr->name, reason_down); |
| last_node_update = now; |
| } |
| xfree(reason_down); |
| gres_plugin_node_state_log(node_ptr->gres_list, node_ptr->name); |
| |
| if (reg_msg->up_time) { |
| node_ptr->up_time = reg_msg->up_time; |
| node_ptr->boot_time = now - reg_msg->up_time; |
| } |
| node_ptr->slurmd_start_time = reg_msg->slurmd_start_time; |
| |
| if (IS_NODE_NO_RESPOND(node_ptr)) { |
| update_node_state = true; |
| #ifndef HAVE_ALPS_CRAY |
| /* This is handled by the select/cray plugin */ |
| node_ptr->node_state &= (~NODE_STATE_NO_RESPOND); |
| #endif |
| node_ptr->node_state &= (~NODE_STATE_POWER_UP); |
| } |
| |
| if (reg_msg->status != ESLURMD_PROLOG_FAILED) { |
| if (reg_hostlist) |
| (void) hostlist_push_host(reg_hostlist, |
| node_ptr->name); |
| else |
| reg_hostlist = hostlist_create(node_ptr->name); |
| |
| node_flags = node_ptr->node_state & NODE_STATE_FLAGS; |
| if (IS_NODE_UNKNOWN(node_ptr)) { |
| update_node_state = true; |
| *newly_up = true; |
| if (node_ptr->run_job_cnt) { |
| node_ptr->node_state = |
| NODE_STATE_ALLOCATED | |
| node_flags; |
| } else { |
| node_ptr->node_state = |
| NODE_STATE_IDLE | |
| node_flags; |
| node_ptr->last_idle = now; |
| } |
| if (!IS_NODE_DRAIN(node_ptr) && |
| !IS_NODE_FAIL(node_ptr)) { |
| /* reason information is handled in |
| clusteracct_storage_g_node_up() |
| */ |
| clusteracct_storage_g_node_up( |
| acct_db_conn, |
| node_ptr, now); |
| } |
| } else if (IS_NODE_DOWN(node_ptr) && |
| ((slurmctld_conf.ret2service == 2) || |
| !xstrcmp(node_ptr->reason, |
| "Scheduled reboot") || |
| ((slurmctld_conf.ret2service == 1) && |
| !xstrcmp(node_ptr->reason, |
| "Not responding")))) { |
| update_node_state = true; |
| *newly_up = true; |
| if (node_ptr->run_job_cnt) { |
| node_ptr->node_state = |
| NODE_STATE_ALLOCATED | |
| node_flags; |
| } else { |
| node_ptr->node_state = |
| NODE_STATE_IDLE | |
| node_flags; |
| node_ptr->last_idle = now; |
| } |
| trigger_node_up(node_ptr); |
| if (!IS_NODE_DRAIN(node_ptr) && |
| !IS_NODE_FAIL(node_ptr)) { |
| /* reason information is handled in |
| clusteracct_storage_g_node_up() |
| */ |
| clusteracct_storage_g_node_up( |
| acct_db_conn, |
| node_ptr, now); |
| } |
| } else if (IS_NODE_ALLOCATED(node_ptr) && |
| (node_ptr->run_job_cnt == 0)) { |
| /* job vanished */ |
| update_node_state = true; |
| node_ptr->node_state = NODE_STATE_IDLE | |
| node_flags; |
| node_ptr->last_idle = now; |
| } else if (IS_NODE_COMPLETING(node_ptr) && |
| (node_ptr->comp_job_cnt == 0)) { |
| /* job already done */ |
| update_node_state = true; |
| node_ptr->node_state &= |
| (~NODE_STATE_COMPLETING); |
| bit_clear(cg_node_bitmap, i); |
| } else if (IS_NODE_IDLE(node_ptr) && |
| (node_ptr->run_job_cnt != 0)) { |
| update_node_state = true; |
| node_ptr->node_state = NODE_STATE_ALLOCATED | |
| node_flags; |
| error("Invalid state for node %s, was IDLE " |
| "with %u running jobs", |
| node_ptr->name, reg_msg->job_count); |
| } |
| |
| select_g_update_node_config(i); |
| select_g_update_node_state(node_ptr); |
| _sync_bitmaps(node_ptr, |
| (node_ptr->run_job_cnt + |
| node_ptr->comp_job_cnt)); |
| } |
| if (reg_msg->energy) |
| memcpy(node_ptr->energy, reg_msg->energy, |
| sizeof(acct_gather_energy_t)); |
| } |
| |
| if (reg_hostlist) { |
| hostlist_uniq(reg_hostlist); |
| host_str = hostlist_ranged_string_xmalloc(reg_hostlist); |
| debug("Nodes %s have registered", host_str); |
| xfree(host_str); |
| hostlist_destroy(reg_hostlist); |
| } |
| |
| if (update_node_state) |
| last_node_update = time (NULL); |
| return error_code; |
| } |
| |
| /* Sync idle, share, and avail_node_bitmaps for a given node */ |
| static void _sync_bitmaps(struct node_record *node_ptr, int job_count) |
| { |
| int node_inx = node_ptr - node_record_table_ptr; |
| |
| if (job_count == 0) { |
| bit_set (idle_node_bitmap, node_inx); |
| bit_set (share_node_bitmap, node_inx); |
| } |
| if (IS_NODE_DOWN(node_ptr) || IS_NODE_DRAIN(node_ptr) || |
| IS_NODE_FAIL(node_ptr) || IS_NODE_NO_RESPOND(node_ptr)) |
| bit_clear (avail_node_bitmap, node_inx); |
| else |
| bit_set (avail_node_bitmap, node_inx); |
| if (IS_NODE_DOWN(node_ptr)) |
| bit_clear (up_node_bitmap, node_inx); |
| else |
| bit_set (up_node_bitmap, node_inx); |
| } |
| |
| #ifdef HAVE_FRONT_END |
| static void _node_did_resp(front_end_record_t *fe_ptr) |
| { |
| uint16_t node_flags; |
| time_t now = time(NULL); |
| |
| fe_ptr->last_response = now; |
| #ifndef HAVE_ALPS_CRAY |
| /* This is handled by the select/cray plugin */ |
| if (IS_NODE_NO_RESPOND(fe_ptr)) { |
| info("Node %s now responding", fe_ptr->name); |
| last_front_end_update = now; |
| fe_ptr->node_state &= (~NODE_STATE_NO_RESPOND); |
| } |
| #endif |
| node_flags = fe_ptr->node_state & NODE_STATE_FLAGS; |
| if (IS_NODE_UNKNOWN(fe_ptr)) { |
| last_front_end_update = now; |
| fe_ptr->node_state = NODE_STATE_IDLE | node_flags; |
| } |
| if (IS_NODE_DOWN(fe_ptr) && |
| ((slurmctld_conf.ret2service == 2) || |
| !xstrcmp(fe_ptr->reason, "Scheduled reboot") || |
| ((slurmctld_conf.ret2service == 1) && |
| !xstrcmp(fe_ptr->reason, "Not responding")))) { |
| last_front_end_update = now; |
| fe_ptr->node_state = NODE_STATE_IDLE | node_flags; |
| info("node_did_resp: node %s returned to service", |
| fe_ptr->name); |
| trigger_front_end_up(fe_ptr); |
| if (!IS_NODE_DRAIN(fe_ptr) && !IS_NODE_FAIL(fe_ptr)) { |
| xfree(fe_ptr->reason); |
| fe_ptr->reason_time = 0; |
| fe_ptr->reason_uid = NO_VAL; |
| } |
| } |
| return; |
| } |
| #else |
| static void _node_did_resp(struct node_record *node_ptr) |
| { |
| int node_inx; |
| uint16_t node_flags; |
| time_t now = time(NULL); |
| |
| node_inx = node_ptr - node_record_table_ptr; |
| /* Do not change last_response value (in the future) for nodes being |
| * booted so unexpected reboots are recognized */ |
| if (node_ptr->last_response < now) |
| node_ptr->last_response = now; |
| if (IS_NODE_NO_RESPOND(node_ptr) || IS_NODE_POWER_UP(node_ptr)) { |
| info("Node %s now responding", node_ptr->name); |
| node_ptr->node_state &= (~NODE_STATE_NO_RESPOND); |
| node_ptr->node_state &= (~NODE_STATE_POWER_UP); |
| if (!is_node_in_maint_reservation(node_inx)) |
| node_ptr->node_state &= (~NODE_STATE_MAINT); |
| last_node_update = now; |
| } |
| node_flags = node_ptr->node_state & NODE_STATE_FLAGS; |
| if (IS_NODE_UNKNOWN(node_ptr)) { |
| node_ptr->last_idle = now; |
| if (node_ptr->run_job_cnt) { |
| node_ptr->node_state = NODE_STATE_ALLOCATED | |
| node_flags; |
| } else |
| node_ptr->node_state = NODE_STATE_IDLE | node_flags; |
| last_node_update = now; |
| if (!IS_NODE_DRAIN(node_ptr) && !IS_NODE_FAIL(node_ptr)) { |
| clusteracct_storage_g_node_up(acct_db_conn, |
| node_ptr, now); |
| } |
| } |
| if (IS_NODE_DOWN(node_ptr) && |
| ((slurmctld_conf.ret2service == 2) || |
| !xstrcmp(node_ptr->reason, "Scheduled reboot") || |
| ((slurmctld_conf.ret2service == 1) && |
| !xstrcmp(node_ptr->reason, "Not responding")))) { |
| node_ptr->last_idle = now; |
| node_ptr->node_state = NODE_STATE_IDLE | node_flags; |
| info("node_did_resp: node %s returned to service", |
| node_ptr->name); |
| trigger_node_up(node_ptr); |
| last_node_update = now; |
| if (!IS_NODE_DRAIN(node_ptr) && !IS_NODE_FAIL(node_ptr)) { |
| /* reason information is handled in |
| clusteracct_storage_g_node_up() |
| */ |
| clusteracct_storage_g_node_up(acct_db_conn, |
| node_ptr, now); |
| } |
| } |
| if (IS_NODE_IDLE(node_ptr) && !IS_NODE_COMPLETING(node_ptr)) { |
| bit_set (idle_node_bitmap, node_inx); |
| bit_set (share_node_bitmap, node_inx); |
| } |
| if (IS_NODE_DOWN(node_ptr) || IS_NODE_DRAIN(node_ptr) || |
| IS_NODE_FAIL(node_ptr)) { |
| bit_clear (avail_node_bitmap, node_inx); |
| } else |
| bit_set (avail_node_bitmap, node_inx); |
| if (IS_NODE_DOWN(node_ptr)) |
| bit_clear (up_node_bitmap, node_inx); |
| else |
| bit_set (up_node_bitmap, node_inx); |
| return; |
| } |
| #endif |
| |
| /* |
| * node_did_resp - record that the specified node is responding |
| * IN name - name of the node |
| * NOTE: READ lock_slurmctld config before entry |
| */ |
| void node_did_resp (char *name) |
| { |
| #ifdef HAVE_FRONT_END |
| front_end_record_t *node_ptr; |
| node_ptr = find_front_end_record (name); |
| #else |
| struct node_record *node_ptr; |
| node_ptr = find_node_record (name); |
| #endif |
| if (node_ptr == NULL) { |
| error ("node_did_resp unable to find node %s", name); |
| return; |
| } |
| _node_did_resp(node_ptr); |
| debug2("node_did_resp %s",name); |
| } |
| |
| /* |
| * node_not_resp - record that the specified node is not responding |
| * IN name - name of the node |
| * IN msg_time - time message was sent |
| */ |
| void node_not_resp (char *name, time_t msg_time, slurm_msg_type_t resp_type) |
| { |
| #ifdef HAVE_FRONT_END |
| front_end_record_t *node_ptr; |
| |
| node_ptr = find_front_end_record (name); |
| #else |
| struct node_record *node_ptr; |
| |
| node_ptr = find_node_record (name); |
| #endif |
| if (node_ptr == NULL) { |
| error ("node_not_resp unable to find node %s", name); |
| return; |
| } |
| |
| /* If the slurmd on the node responded with something we don't |
| * want to ever set the node down, so mark that the node |
| * responded, but for whatever reason there was a |
| * communication error. This makes it so we don't mark the |
| * node down if the slurmd really is there (Wrong protocol |
| * version or munge issue or whatever) so we don't kill |
| * any running jobs. RESPONSE_FORWARD_FAILED means we |
| * couldn't contact the slurmd. |
| */ |
| if (resp_type != RESPONSE_FORWARD_FAILED) |
| node_ptr->last_response = msg_time - 1; |
| |
| if (!IS_NODE_DOWN(node_ptr)) { |
| /* Logged by node_no_resp_msg() on periodic basis */ |
| node_ptr->not_responding = true; |
| } |
| |
| if (IS_NODE_NO_RESPOND(node_ptr)) |
| return; /* Already known to be not responding */ |
| |
| if (node_ptr->last_response >= msg_time) { |
| debug("node_not_resp: node %s responded since msg sent", |
| node_ptr->name); |
| return; |
| } |
| node_ptr->node_state |= NODE_STATE_NO_RESPOND; |
| #ifdef HAVE_FRONT_END |
| last_front_end_update = time(NULL); |
| #else |
| last_node_update = time(NULL); |
| bit_clear (avail_node_bitmap, (node_ptr - node_record_table_ptr)); |
| #endif |
| return; |
| } |
| |
| /* For every node with the "not_responding" flag set, clear the flag |
| * and log that the node is not responding using a hostlist expression */ |
| extern void node_no_resp_msg(void) |
| { |
| int i; |
| struct node_record *node_ptr; |
| char *host_str = NULL; |
| hostlist_t no_resp_hostlist = NULL; |
| |
| for (i=0; i<node_record_count; i++) { |
| node_ptr = &node_record_table_ptr[i]; |
| if (!node_ptr->not_responding) |
| continue; |
| if (no_resp_hostlist) { |
| (void) hostlist_push_host(no_resp_hostlist, |
| node_ptr->name); |
| } else |
| no_resp_hostlist = hostlist_create(node_ptr->name); |
| node_ptr->not_responding = false; |
| } |
| if (no_resp_hostlist) { |
| hostlist_uniq(no_resp_hostlist); |
| host_str = hostlist_ranged_string_xmalloc(no_resp_hostlist); |
| error("Nodes %s not responding", host_str); |
| xfree(host_str); |
| hostlist_destroy(no_resp_hostlist); |
| } |
| } |
| |
| /* |
| * set_node_down - make the specified compute node's state DOWN and |
| * kill jobs as needed |
| * IN name - name of the node |
| * IN reason - why the node is DOWN |
| */ |
| void set_node_down (char *name, char *reason) |
| { |
| struct node_record *node_ptr; |
| |
| node_ptr = find_node_record (name); |
| if (node_ptr == NULL) { |
| error ("set_node_down unable to find node %s", name); |
| return; |
| } |
| set_node_down_ptr (node_ptr, reason); |
| |
| return; |
| } |
| |
| /* |
| * set_node_down_ptr - make the specified compute node's state DOWN and |
| * kill jobs as needed |
| * IN node_ptr - node_ptr to the node |
| * IN reason - why the node is DOWN |
| */ |
| void set_node_down_ptr (struct node_record *node_ptr, char *reason) |
| { |
| time_t now = time(NULL); |
| |
| if ((node_ptr->reason == NULL) || |
| (strncmp(node_ptr->reason, "Not responding", 14) == 0)) { |
| xfree(node_ptr->reason); |
| node_ptr->reason = xstrdup(reason); |
| node_ptr->reason_time = now; |
| node_ptr->reason_uid = slurm_get_slurm_user_id(); |
| } |
| _make_node_down(node_ptr, now); |
| (void) kill_running_job_by_node_name(node_ptr->name); |
| _sync_bitmaps(node_ptr, 0); |
| |
| return; |
| } |
| |
| /* |
| * is_node_down - determine if the specified node's state is DOWN |
| * IN name - name of the node |
| * RET true if node exists and is down, otherwise false |
| */ |
| bool is_node_down (char *name) |
| { |
| struct node_record *node_ptr; |
| |
| node_ptr = find_node_record (name); |
| if (node_ptr == NULL) { |
| error ("is_node_down unable to find node %s", name); |
| return false; |
| } |
| |
| if (IS_NODE_DOWN(node_ptr)) |
| return true; |
| return false; |
| } |
| |
| /* |
| * is_node_resp - determine if the specified node's state is responding |
| * IN name - name of the node |
| * RET true if node exists and is responding, otherwise false |
| */ |
| bool is_node_resp (char *name) |
| { |
| #ifdef HAVE_FRONT_END |
| front_end_record_t *node_ptr; |
| |
| node_ptr = find_front_end_record (name); |
| #else |
| struct node_record *node_ptr; |
| |
| node_ptr = find_node_record (name); |
| #endif |
| if (node_ptr == NULL) { |
| error ("is_node_resp unable to find node %s", name); |
| return false; |
| } |
| |
| if (IS_NODE_NO_RESPOND(node_ptr)) |
| return false; |
| return true; |
| } |
| |
| /* |
| * find_first_node_record - find a record for first node in the bitmap |
| * IN node_bitmap |
| */ |
| struct node_record * |
| find_first_node_record (bitstr_t *node_bitmap) |
| { |
| int inx; |
| |
| if (node_bitmap == NULL) { |
| error ("find_first_node_record passed null bitstring"); |
| return NULL; |
| } |
| |
| inx = bit_ffs (node_bitmap); |
| if (inx < 0) |
| return NULL; |
| else |
| return &node_record_table_ptr[inx]; |
| } |
| |
| /* msg_to_slurmd - send given msg_type (REQUEST_RECONFIGURE or |
| * REQUEST_SHUTDOWN) to every slurmd, no args */ |
| void msg_to_slurmd (slurm_msg_type_t msg_type) |
| { |
| int i; |
| shutdown_msg_t *shutdown_req; |
| agent_arg_t *kill_agent_args; |
| #ifdef HAVE_FRONT_END |
| front_end_record_t *front_end_ptr; |
| #else |
| struct node_record *node_ptr; |
| #endif |
| |
| kill_agent_args = xmalloc (sizeof (agent_arg_t)); |
| kill_agent_args->msg_type = msg_type; |
| kill_agent_args->retry = 0; |
| kill_agent_args->hostlist = hostlist_create(NULL); |
| if (msg_type == REQUEST_SHUTDOWN) { |
| shutdown_req = xmalloc(sizeof(shutdown_msg_t)); |
| shutdown_req->options = 0; |
| kill_agent_args->msg_args = shutdown_req; |
| } |
| |
| kill_agent_args->protocol_version = SLURM_PROTOCOL_VERSION; |
| |
| #ifdef HAVE_FRONT_END |
| for (i = 0, front_end_ptr = front_end_nodes; |
| i < front_end_node_cnt; i++, front_end_ptr++) { |
| if (kill_agent_args->protocol_version > |
| front_end_ptr->protocol_version) |
| kill_agent_args->protocol_version = |
| front_end_ptr->protocol_version; |
| |
| hostlist_push_host(kill_agent_args->hostlist, |
| front_end_ptr->name); |
| kill_agent_args->node_count++; |
| } |
| #else |
| node_ptr = node_record_table_ptr; |
| for (i = 0; i < node_record_count; i++, node_ptr++) { |
| if (IS_NODE_FUTURE(node_ptr)) |
| continue; |
| if (IS_NODE_CLOUD(node_ptr) && IS_NODE_POWER_SAVE(node_ptr)) |
| continue; |
| if (kill_agent_args->protocol_version > |
| node_record_table_ptr[i].protocol_version) |
| kill_agent_args->protocol_version = |
| node_record_table_ptr[i].protocol_version; |
| hostlist_push_host(kill_agent_args->hostlist, node_ptr->name); |
| kill_agent_args->node_count++; |
| } |
| #endif |
| |
| if (kill_agent_args->node_count == 0) { |
| hostlist_destroy(kill_agent_args->hostlist); |
| xfree (kill_agent_args); |
| } else { |
| debug ("Spawning agent msg_type=%d", msg_type); |
| agent_queue_request(kill_agent_args); |
| } |
| } |
| |
| |
| /* make_node_alloc - flag specified node as allocated to a job |
| * IN node_ptr - pointer to node being allocated |
| * IN job_ptr - pointer to job that is starting |
| */ |
| extern void make_node_alloc(struct node_record *node_ptr, |
| struct job_record *job_ptr) |
| { |
| int inx = node_ptr - node_record_table_ptr; |
| uint16_t node_flags; |
| |
| (node_ptr->run_job_cnt)++; |
| bit_clear(idle_node_bitmap, inx); |
| if (job_ptr->details && (job_ptr->details->share_res == 0)) { |
| bit_clear(share_node_bitmap, inx); |
| (node_ptr->no_share_job_cnt)++; |
| } |
| |
| node_flags = node_ptr->node_state & NODE_STATE_FLAGS; |
| node_ptr->node_state = NODE_STATE_ALLOCATED | node_flags; |
| xfree(node_ptr->reason); |
| node_ptr->reason_time = 0; |
| node_ptr->reason_uid = NO_VAL; |
| |
| last_node_update = time (NULL); |
| } |
| |
| /* make_node_comp - flag specified node as completing a job |
| * IN node_ptr - pointer to node marked for completion of job |
| * IN job_ptr - pointer to job that is completing |
| * IN suspended - true if job was previously suspended |
| */ |
| extern void make_node_comp(struct node_record *node_ptr, |
| struct job_record *job_ptr, bool suspended) |
| { |
| int inx = node_ptr - node_record_table_ptr; |
| uint16_t node_flags; |
| time_t now = time(NULL); |
| |
| xassert(node_ptr); |
| if (suspended) { |
| if (node_ptr->sus_job_cnt) |
| (node_ptr->sus_job_cnt)--; |
| else |
| error("Node %s sus_job_cnt underflow in " |
| "make_node_comp", node_ptr->name); |
| } else { |
| if (node_ptr->run_job_cnt) |
| (node_ptr->run_job_cnt)--; |
| else |
| error("Node %s run_job_cnt underflow in " |
| "make_node_comp", node_ptr->name); |
| |
| if (job_ptr->details && (job_ptr->details->share_res == 0)) { |
| if (node_ptr->no_share_job_cnt) |
| (node_ptr->no_share_job_cnt)--; |
| else |
| error("Node %s no_share_job_cnt underflow in " |
| "make_node_comp", node_ptr->name); |
| if (node_ptr->no_share_job_cnt == 0) |
| bit_set(share_node_bitmap, inx); |
| } |
| } |
| |
| if (!IS_NODE_DOWN(node_ptr)) { |
| /* Don't verify RPC if DOWN */ |
| (node_ptr->comp_job_cnt)++; |
| node_ptr->node_state |= NODE_STATE_COMPLETING; |
| bit_set(cg_node_bitmap, inx); |
| } |
| node_flags = node_ptr->node_state & NODE_STATE_FLAGS; |
| |
| if ((node_ptr->run_job_cnt == 0) && |
| (node_ptr->comp_job_cnt == 0)) { |
| bit_set(idle_node_bitmap, inx); |
| if (IS_NODE_DRAIN(node_ptr) || IS_NODE_FAIL(node_ptr)) { |
| trigger_node_drained(node_ptr); |
| clusteracct_storage_g_node_down( |
| acct_db_conn, |
| node_ptr, now, NULL, |
| slurm_get_slurm_user_id()); |
| } |
| } |
| |
| if (IS_NODE_DOWN(node_ptr)) { |
| debug3("make_node_comp: Node %s being left DOWN", |
| node_ptr->name); |
| } else if (node_ptr->run_job_cnt) |
| node_ptr->node_state = NODE_STATE_ALLOCATED | node_flags; |
| else { |
| node_ptr->node_state = NODE_STATE_IDLE | node_flags; |
| node_ptr->last_idle = now; |
| } |
| last_node_update = now; |
| } |
| |
| /* _make_node_down - flag specified node as down */ |
| static void _make_node_down(struct node_record *node_ptr, time_t event_time) |
| { |
| int inx = node_ptr - node_record_table_ptr; |
| uint16_t node_flags; |
| |
| xassert(node_ptr); |
| node_flags = node_ptr->node_state & NODE_STATE_FLAGS; |
| node_flags &= (~NODE_STATE_COMPLETING); |
| node_ptr->node_state = NODE_STATE_DOWN | node_flags; |
| bit_clear (avail_node_bitmap, inx); |
| bit_clear (cg_node_bitmap, inx); |
| bit_set (idle_node_bitmap, inx); |
| bit_set (share_node_bitmap, inx); |
| bit_clear (up_node_bitmap, inx); |
| select_g_update_node_state(node_ptr); |
| trigger_node_down(node_ptr); |
| last_node_update = time (NULL); |
| clusteracct_storage_g_node_down(acct_db_conn, |
| node_ptr, event_time, NULL, |
| node_ptr->reason_uid); |
| } |
| |
| /* |
| * make_node_idle - flag specified node as having finished with a job |
| * IN node_ptr - pointer to node reporting job completion |
| * IN job_ptr - pointer to job that just completed |
| */ |
| void make_node_idle(struct node_record *node_ptr, |
| struct job_record *job_ptr) |
| { |
| int inx = node_ptr - node_record_table_ptr; |
| uint16_t node_flags; |
| time_t now = time(NULL); |
| bitstr_t *node_bitmap = NULL; |
| |
| if (job_ptr) { /* Specific job completed */ |
| if (job_ptr->node_bitmap_cg) |
| node_bitmap = job_ptr->node_bitmap_cg; |
| else |
| node_bitmap = job_ptr->node_bitmap; |
| } |
| |
| xassert(node_ptr); |
| if (node_bitmap && (bit_test(node_bitmap, inx))) { |
| /* Not a replay */ |
| last_job_update = now; |
| bit_clear(node_bitmap, inx); |
| |
| job_update_cpu_cnt(job_ptr, inx); |
| |
| if (job_ptr->node_cnt) { |
| /* Clean up the JOB_COMPLETING flag |
| * only if there is not the slurmctld |
| * epilog running, otherwise wait |
| * when it terminates then this |
| * function will be invoked. |
| */ |
| job_ptr->node_cnt--; |
| if (job_ptr->node_cnt == 0 |
| && job_ptr->epilog_running == false) |
| cleanup_completing(job_ptr); |
| } else { |
| error("node_cnt underflow on job_id %u", |
| job_ptr->job_id); |
| } |
| |
| if (IS_JOB_SUSPENDED(job_ptr)) { |
| /* Remove node from suspended job */ |
| if (node_ptr->sus_job_cnt) |
| (node_ptr->sus_job_cnt)--; |
| else |
| error("Node %s sus_job_cnt underflow in " |
| "make_node_idle", node_ptr->name); |
| } else if (IS_JOB_RUNNING(job_ptr)) { |
| /* Remove node from running job */ |
| if (node_ptr->run_job_cnt) |
| (node_ptr->run_job_cnt)--; |
| else |
| error("Node %s run_job_cnt underflow in " |
| "make_node_idle", node_ptr->name); |
| } else { |
| if (node_ptr->comp_job_cnt) |
| (node_ptr->comp_job_cnt)--; |
| else |
| error("Node %s comp_job_cnt underflow in " |
| "make_node_idle, job_id %u", |
| node_ptr->name, job_ptr->job_id); |
| if (node_ptr->comp_job_cnt > 0) |
| return; /* More jobs completing */ |
| } |
| } |
| |
| if (node_ptr->comp_job_cnt == 0) { |
| node_ptr->node_state &= (~NODE_STATE_COMPLETING); |
| bit_clear(cg_node_bitmap, inx); |
| } |
| node_flags = node_ptr->node_state & NODE_STATE_FLAGS; |
| if (IS_NODE_DOWN(node_ptr)) { |
| debug3("make_node_idle: Node %s being left DOWN", |
| node_ptr->name); |
| return; |
| } |
| bit_set(up_node_bitmap, inx); |
| |
| if (IS_NODE_DRAIN(node_ptr) || IS_NODE_FAIL(node_ptr) || |
| IS_NODE_NO_RESPOND(node_ptr)) |
| bit_clear(avail_node_bitmap, inx); |
| else |
| bit_set(avail_node_bitmap, inx); |
| |
| if ((IS_NODE_DRAIN(node_ptr) || IS_NODE_FAIL(node_ptr)) && |
| (node_ptr->run_job_cnt == 0) && (node_ptr->comp_job_cnt == 0)) { |
| node_ptr->node_state = NODE_STATE_IDLE | node_flags; |
| bit_set(idle_node_bitmap, inx); |
| debug3("make_node_idle: Node %s is DRAINED", |
| node_ptr->name); |
| node_ptr->last_idle = now; |
| trigger_node_drained(node_ptr); |
| clusteracct_storage_g_node_down(acct_db_conn, |
| node_ptr, now, NULL, |
| slurm_get_slurm_user_id()); |
| } else if (node_ptr->run_job_cnt) { |
| node_ptr->node_state = NODE_STATE_ALLOCATED | node_flags; |
| if (!IS_NODE_NO_RESPOND(node_ptr) && |
| !IS_NODE_FAIL(node_ptr) && !IS_NODE_DRAIN(node_ptr)) |
| bit_set(avail_node_bitmap, inx); |
| } else { |
| node_ptr->node_state = NODE_STATE_IDLE | node_flags; |
| if (!IS_NODE_NO_RESPOND(node_ptr) && |
| !IS_NODE_FAIL(node_ptr) && !IS_NODE_DRAIN(node_ptr)) |
| bit_set(avail_node_bitmap, inx); |
| if (!IS_NODE_NO_RESPOND(node_ptr) && |
| !IS_NODE_COMPLETING(node_ptr)) |
| bit_set(idle_node_bitmap, inx); |
| node_ptr->last_idle = now; |
| } |
| last_node_update = now; |
| } |
| |
| extern int send_nodes_to_accounting(time_t event_time) |
| { |
| int rc = SLURM_SUCCESS, i = 0; |
| struct node_record *node_ptr = NULL; |
| uint32_t node_scaling = 0; |
| char *reason = NULL; |
| slurmctld_lock_t node_read_lock = { |
| READ_LOCK, NO_LOCK, READ_LOCK, WRITE_LOCK }; |
| |
| select_g_alter_node_cnt(SELECT_GET_NODE_SCALING, &node_scaling); |
| |
| lock_slurmctld(node_read_lock); |
| /* send nodes not in not 'up' state */ |
| node_ptr = node_record_table_ptr; |
| for (i = 0; i < node_record_count; i++, node_ptr++) { |
| if (node_ptr->reason) |
| reason = node_ptr->reason; |
| else |
| reason = "First Registration"; |
| if (node_ptr->name == '\0' || |
| (!IS_NODE_DRAIN(node_ptr) && !IS_NODE_FAIL(node_ptr) && |
| !IS_NODE_DOWN(node_ptr))) { |
| /* At this point, the node appears to be up, |
| but on some systems we need to make sure there |
| aren't some part of a node in an error state. */ |
| if (node_ptr->select_nodeinfo) { |
| uint16_t err_cpus = 0; |
| select_g_select_nodeinfo_get( |
| node_ptr->select_nodeinfo, |
| SELECT_NODEDATA_SUBCNT, |
| NODE_STATE_ERROR, |
| &err_cpus); |
| if (err_cpus) { |
| struct node_record send_node; |
| struct config_record config_rec; |
| int cpus_per_node = 1; |
| |
| memset(&send_node, 0, |
| sizeof(struct node_record)); |
| memset(&config_rec, 0, |
| sizeof(struct config_record)); |
| send_node.name = node_ptr->name; |
| send_node.config_ptr = &config_rec; |
| select_g_alter_node_cnt( |
| SELECT_GET_NODE_SCALING, |
| &node_scaling); |
| |
| if (node_scaling) |
| cpus_per_node = node_ptr->cpus |
| / node_scaling; |
| err_cpus *= cpus_per_node; |
| |
| send_node.cpus = err_cpus; |
| send_node.node_state = NODE_STATE_ERROR; |
| config_rec.cpus = err_cpus; |
| |
| rc = clusteracct_storage_g_node_down( |
| acct_db_conn, |
| &send_node, event_time, |
| reason, |
| slurm_get_slurm_user_id()); |
| } |
| } |
| } else |
| rc = clusteracct_storage_g_node_down( |
| acct_db_conn, |
| node_ptr, event_time, |
| reason, |
| slurm_get_slurm_user_id()); |
| if (rc == SLURM_ERROR) |
| break; |
| } |
| unlock_slurmctld(node_read_lock); |
| return rc; |
| } |
| |
| /* node_fini - free all memory associated with node records */ |
| extern void node_fini (void) |
| { |
| FREE_NULL_BITMAP(avail_node_bitmap); |
| FREE_NULL_BITMAP(cg_node_bitmap); |
| FREE_NULL_BITMAP(idle_node_bitmap); |
| FREE_NULL_BITMAP(power_node_bitmap); |
| FREE_NULL_BITMAP(share_node_bitmap); |
| FREE_NULL_BITMAP(up_node_bitmap); |
| node_fini2(); |
| } |
| |
| /* Reset a node's CPU load value */ |
| extern void reset_node_load(char *node_name, uint32_t cpu_load) |
| { |
| #ifdef HAVE_FRONT_END |
| return; |
| #else |
| struct node_record *node_ptr; |
| |
| node_ptr = find_node_record(node_name); |
| if (node_ptr) { |
| time_t now = time(NULL); |
| node_ptr->cpu_load = cpu_load; |
| node_ptr->cpu_load_time = now; |
| last_node_update = now; |
| } else |
| error("is_node_resp unable to find node %s", node_name); |
| #endif |
| } |