| /*****************************************************************************\ |
| * get_nodes.c - Process Wiki get node info request |
| ***************************************************************************** |
| * Copyright (C) 2006-2007 The Regents of the University of California. |
| * Copyright (C) 2008-2010 Lawrence Livermore National Security. |
| * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). |
| * Written by Morris Jette <jette1@llnl.gov> |
| * CODE-OCEC-09-009. All rights reserved. |
| * |
| * This file is part of SLURM, a resource management program. |
| * For details, see <https://computing.llnl.gov/linux/slurm/>. |
| * Please also read the included file: DISCLAIMER. |
| * |
| * SLURM is free software; you can redistribute it and/or modify it under |
| * the terms of the GNU General Public License as published by the Free |
| * Software Foundation; either version 2 of the License, or (at your option) |
| * any later version. |
| * |
| * In addition, as a special exception, the copyright holders give permission |
| * to link the code of portions of this program with the OpenSSL library under |
| * certain conditions as described in each individual source file, and |
| * distribute linked combinations including the two. You must obey the GNU |
| * General Public License in all respects for all of the code used other than |
| * OpenSSL. If you modify file(s) with this exception, you may extend this |
| * exception to your version of the file(s), but you are not obligated to do |
| * so. If you do not wish to do so, delete this exception statement from your |
| * version. If you delete this exception statement from all source files in |
| * the program, then also delete it here. |
| * |
| * SLURM is distributed in the hope that it will be useful, but WITHOUT ANY |
| * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS |
| * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more |
| * details. |
| * |
| * You should have received a copy of the GNU General Public License along |
| * with SLURM; if not, write to the Free Software Foundation, Inc., |
| * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. |
| \*****************************************************************************/ |
| |
| #include "./msg.h" |
| #include "src/common/hostlist.h" |
| #include "src/slurmctld/locks.h" |
| #include "src/slurmctld/slurmctld.h" |
| |
| static char * _dump_all_nodes(int *node_cnt, time_t update_time); |
| static char * _dump_node(struct node_record *node_ptr, hostlist_t hl, |
| time_t update_time); |
| static bool _hidden_node(struct node_record *node_ptr); |
| static char * _get_node_state(struct node_record *node_ptr); |
| static int _same_info(struct node_record *node1_ptr, |
| struct node_record *node2_ptr, time_t update_time); |
| static int _str_cmp(char *s1, char *s2); |
| |
| /* |
| * get_nodes - get information on specific node(s) changed since some time |
| * cmd_ptr IN - CMD=GETNODES ARG=[<UPDATETIME>:<NODEID>[:<NODEID>]...] |
| * [<UPDATETIME>:ALL] |
| * err_code OUT - 0 or an error code |
| * err_msg OUT - response message |
| * NOTE: xfree() err_msg if err_code is zero |
| * RET 0 on success, -1 on failure |
| * |
| * Response format |
| * ARG=<cnt>#<NODEID>: |
| * STATE=<state>; Moab equivalent node state |
| * [CAT=<reason>]; Reason for a node being down or drained |
| * colon separator |
| * CCLASS=<[part:cpus]>; SLURM partition with CPU count of node, |
| * make have more than one partition |
| * [ARCH=<architecture>;] Computer architecture |
| * [OS=<operating_system>;] Operating system |
| * CMEMORY=<MB>; MB of memory on node |
| * CDISK=<MB>; MB of disk space on node |
| * CPROC=<cpus>; CPU count on node |
| * [FEATURE=<feature>;] Features associated with node, if any |
| * [GRES=<name>[:<count>],...;] generic resources on the node |
| * [#<NODEID>:...]; |
| */ |
| extern int get_nodes(char *cmd_ptr, int *err_code, char **err_msg) |
| { |
| char *arg_ptr = NULL, *tmp_char = NULL, *tmp_buf = NULL, *buf = NULL; |
| time_t update_time; |
| /* Locks: read node, read partition */ |
| slurmctld_lock_t node_read_lock = { |
| NO_LOCK, NO_LOCK, READ_LOCK, READ_LOCK }; |
| int node_rec_cnt = 0, buf_size = 0; |
| |
| arg_ptr = strstr(cmd_ptr, "ARG="); |
| if (arg_ptr == NULL) { |
| *err_code = -300; |
| *err_msg = "GETNODES lacks ARG"; |
| error("wiki: GETNODES lacks ARG"); |
| return -1; |
| } |
| update_time = (time_t) strtoul(arg_ptr+4, &tmp_char, 10); |
| if (tmp_char[0] != ':') { |
| *err_code = -300; |
| *err_msg = "Invalid ARG value"; |
| error("wiki: GETNODES has invalid ARG value"); |
| return -1; |
| } |
| tmp_char++; |
| lock_slurmctld(node_read_lock); |
| if (strncmp(tmp_char, "ALL", 3) == 0) { |
| /* report all nodes */ |
| buf = _dump_all_nodes(&node_rec_cnt, update_time); |
| } else { |
| struct node_record *node_ptr = NULL; |
| char *node_name, *slurm_hosts; |
| int node_cnt; |
| hostset_t slurm_hostset; |
| |
| slurm_hosts = moab2slurm_task_list(tmp_char, &node_cnt); |
| if ((slurm_hostset = hostset_create(slurm_hosts))) { |
| while ((node_name = hostset_shift(slurm_hostset))) { |
| node_ptr = find_node_record(node_name); |
| if (node_ptr == NULL) { |
| error("sched/wiki2: bad hostname %s", |
| node_name); |
| continue; |
| } |
| if (_hidden_node(node_ptr)) |
| continue; |
| tmp_buf = _dump_node(node_ptr, NULL, |
| update_time); |
| if (node_rec_cnt > 0) |
| xstrcat(buf, "#"); |
| xstrcat(buf, tmp_buf); |
| xfree(tmp_buf); |
| node_rec_cnt++; |
| } |
| hostset_destroy(slurm_hostset); |
| } else { |
| error("hostset_create(%s): %m", slurm_hosts); |
| } |
| xfree(slurm_hosts); |
| } |
| unlock_slurmctld(node_read_lock); |
| |
| /* Prepend ("ARG=%d", node_rec_cnt) to reply message */ |
| if (buf) |
| buf_size = strlen(buf); |
| tmp_buf = xmalloc(buf_size + 32); |
| if (node_rec_cnt) |
| sprintf(tmp_buf, "SC=0 ARG=%d#%s", node_rec_cnt, buf); |
| else |
| sprintf(tmp_buf, "SC=0 ARG=0#"); |
| xfree(buf); |
| *err_code = 0; |
| *err_msg = tmp_buf; |
| return 0; |
| } |
| |
| static char * _dump_all_nodes(int *node_cnt, time_t update_time) |
| { |
| int i, cnt = 0, rc; |
| struct node_record *node_ptr = node_record_table_ptr; |
| char *tmp_buf = NULL, *buf = NULL; |
| struct node_record *uniq_node_ptr = NULL; |
| hostlist_t hl = NULL; |
| |
| for (i=0; i<node_record_count; i++, node_ptr++) { |
| if (node_ptr->name == NULL) |
| continue; |
| if (IS_NODE_FUTURE(node_ptr)) |
| continue; |
| if (_hidden_node(node_ptr)) |
| continue; |
| if (use_host_exp == 2) { |
| rc = _same_info(uniq_node_ptr, node_ptr, update_time); |
| if (rc == 0) { |
| uniq_node_ptr = node_ptr; |
| if (hl) { |
| hostlist_push(hl, node_ptr->name); |
| } else { |
| hl = hostlist_create(node_ptr->name); |
| if (hl == NULL) |
| fatal("malloc failure"); |
| } |
| continue; |
| } else { |
| tmp_buf = _dump_node(uniq_node_ptr, hl, |
| update_time); |
| hostlist_destroy(hl); |
| hl = hostlist_create(node_ptr->name); |
| if (hl == NULL) |
| fatal("malloc failure"); |
| uniq_node_ptr = node_ptr; |
| } |
| } else { |
| tmp_buf = _dump_node(node_ptr, hl, update_time); |
| } |
| if (cnt > 0) |
| xstrcat(buf, "#"); |
| xstrcat(buf, tmp_buf); |
| xfree(tmp_buf); |
| cnt++; |
| } |
| |
| if (hl) { |
| tmp_buf = _dump_node(uniq_node_ptr, hl, update_time); |
| hostlist_destroy(hl); |
| if (cnt > 0) |
| xstrcat(buf, "#"); |
| xstrcat(buf, tmp_buf); |
| xfree(tmp_buf); |
| cnt++; |
| } |
| |
| *node_cnt = cnt; |
| return buf; |
| } |
| |
| /* Determine if node1 and node2 have the same parameters that we report to Moab |
| * RET 0 of node1 is NULL or their parameters are the same |
| * >0 otherwise |
| */ |
| static int _same_info(struct node_record *node1_ptr, |
| struct node_record *node2_ptr, time_t update_time) |
| { |
| int i; |
| |
| if (node1_ptr == NULL) /* first record, treat as a match */ |
| return 0; |
| |
| if (node1_ptr->node_state != node2_ptr->node_state) |
| return 1; |
| if (_str_cmp(node1_ptr->reason, node2_ptr->reason)) |
| return 2; |
| if (update_time > last_node_update) |
| return 0; |
| |
| if (slurmctld_conf.fast_schedule) { |
| /* config from slurm.conf */ |
| if (node1_ptr->config_ptr->cpus != node2_ptr->config_ptr->cpus) |
| return 3; |
| } else { |
| /* config as reported by slurmd */ |
| if (node1_ptr->cpus != node2_ptr->cpus) |
| return 4; |
| } |
| if (node1_ptr->part_cnt != node2_ptr->part_cnt) |
| return 5; |
| for (i=0; i<node1_ptr->part_cnt; i++) { |
| if (node1_ptr->part_pptr[i] != node2_ptr->part_pptr[i]) |
| return 6; |
| } |
| if (_str_cmp(node1_ptr->arch, node2_ptr->arch)) |
| return 7; |
| if (_str_cmp(node1_ptr->os, node2_ptr->os)) |
| return 8; |
| if (update_time > 0) |
| return 0; |
| |
| if (slurmctld_conf.fast_schedule) { |
| /* config from slurm.conf */ |
| if ((node1_ptr->config_ptr->real_memory != |
| node2_ptr->config_ptr->real_memory) || |
| (node1_ptr->config_ptr->tmp_disk != |
| node2_ptr->config_ptr->tmp_disk) || |
| (node1_ptr->config_ptr->cpus != |
| node2_ptr->config_ptr->cpus)) |
| return 9; |
| } else { |
| if ((node1_ptr->real_memory != node2_ptr->real_memory) || |
| (node1_ptr->tmp_disk != node2_ptr->tmp_disk) || |
| (node1_ptr->cpus != node2_ptr->cpus)) |
| return 10; |
| } |
| if (_str_cmp(node1_ptr->config_ptr->feature, |
| node2_ptr->config_ptr->feature)) |
| return 11; |
| |
| if (_str_cmp(node1_ptr->config_ptr->gres, |
| node2_ptr->config_ptr->gres)) |
| return 12; |
| |
| return 0; |
| } |
| |
| static char * _dump_node(struct node_record *node_ptr, hostlist_t hl, |
| time_t update_time) |
| { |
| char tmp[16*1024], *buf = NULL; |
| int i; |
| uint32_t cpu_cnt; |
| |
| if (!node_ptr) |
| return NULL; |
| |
| if (hl) { |
| char *node_list; |
| hostlist_sort(hl); |
| hostlist_uniq(hl); |
| node_list = hostlist_ranged_string_xmalloc(hl); |
| xstrcat(buf, node_list); |
| xfree(node_list); |
| } else { |
| snprintf(tmp, sizeof(tmp), "%s", node_ptr->name); |
| xstrcat(buf, tmp); |
| } |
| |
| snprintf(tmp, sizeof(tmp), ":STATE=%s;", _get_node_state(node_ptr)); |
| xstrcat(buf, tmp); |
| if (node_ptr->reason) { |
| /* Strip out any quotes, they confuse Moab */ |
| char *reason, *bad_char; |
| reason = xstrdup(node_ptr->reason); |
| while ((bad_char = strchr(reason, '\''))) |
| bad_char[0] = ' '; |
| while ((bad_char = strchr(reason, '\"'))) |
| bad_char[0] = ' '; |
| snprintf(tmp, sizeof(tmp), "CAT=\"%s\";", reason); |
| xstrcat(buf, tmp); |
| xfree(reason); |
| } |
| |
| if (update_time > last_node_update) |
| return buf; |
| |
| if (slurmctld_conf.fast_schedule) { |
| /* config from slurm.conf */ |
| cpu_cnt = node_ptr->config_ptr->cpus; |
| } else { |
| /* config as reported by slurmd */ |
| cpu_cnt = node_ptr->cpus; |
| } |
| for (i=0; i<node_ptr->part_cnt; i++) { |
| if (i == 0) |
| xstrcat(buf, "CCLASS="); |
| snprintf(tmp, sizeof(tmp), "[%s:%u]", |
| node_ptr->part_pptr[i]->name, |
| cpu_cnt); |
| xstrcat(buf, tmp); |
| } |
| if (i > 0) |
| xstrcat(buf, ";"); |
| |
| if (node_ptr->arch) { |
| snprintf(tmp, sizeof(tmp), "ARCH=%s;", node_ptr->arch); |
| xstrcat(buf, tmp); |
| } |
| |
| if (node_ptr->os) { |
| snprintf(tmp, sizeof(tmp), "OS=%s;", node_ptr->os); |
| xstrcat(buf, tmp); |
| } |
| |
| if (node_ptr->config_ptr && node_ptr->config_ptr->feature) { |
| snprintf(tmp, sizeof(tmp), "FEATURE=%s;", |
| node_ptr->config_ptr->feature); |
| /* comma separator to colon */ |
| for (i=0; (tmp[i] != '\0'); i++) { |
| if (tmp[i] == ',') |
| tmp[i] = ':'; |
| } |
| xstrcat(buf, tmp); |
| } |
| |
| if (node_ptr->config_ptr && node_ptr->config_ptr->gres) { |
| snprintf(tmp, sizeof(tmp), "GRES=%s;", |
| node_ptr->config_ptr->gres); |
| xstrcat(buf, tmp); |
| } |
| |
| if (update_time > 0) |
| return buf; |
| |
| if (slurmctld_conf.fast_schedule) { |
| /* config from slurm.conf */ |
| snprintf(tmp, sizeof(tmp), |
| "CMEMORY=%u;CDISK=%u;CPROC=%u;", |
| node_ptr->config_ptr->real_memory, |
| node_ptr->config_ptr->tmp_disk, |
| node_ptr->config_ptr->cpus); |
| } else { |
| /* config as reported by slurmd */ |
| snprintf(tmp, sizeof(tmp), |
| "CMEMORY=%u;CDISK=%u;CPROC=%u;", |
| node_ptr->real_memory, |
| node_ptr->tmp_disk, |
| node_ptr->cpus); |
| } |
| xstrcat(buf, tmp); |
| |
| return buf; |
| } |
| |
| static char * _get_node_state(struct node_record *node_ptr) |
| { |
| static bool got_select_type = false; |
| static bool node_allocations; |
| |
| if (!got_select_type) { |
| char * select_type = slurm_get_select_type(); |
| if (select_type && |
| (strcasecmp(select_type, "select/linear") == 0)) |
| node_allocations = true; |
| else |
| node_allocations = false; |
| xfree(select_type); |
| got_select_type = true; |
| } |
| |
| if (IS_NODE_DRAIN(node_ptr) || IS_NODE_FAIL(node_ptr)) { |
| return "Drained"; |
| return "Draining"; |
| } |
| if (IS_NODE_COMPLETING(node_ptr)) |
| return "Busy"; |
| |
| if (IS_NODE_DOWN(node_ptr)) |
| return "Down"; |
| if (IS_NODE_ALLOCATED(node_ptr)) { |
| if (node_allocations) |
| return "Busy"; |
| else |
| return "Running"; |
| } |
| if (IS_NODE_IDLE(node_ptr)) |
| return "Idle"; |
| |
| return "Unknown"; |
| } |
| |
| /* Like strcmp(), but can handle NULL pointers */ |
| static int _str_cmp(char *s1, char *s2) |
| { |
| if (s1 && s2) |
| return strcmp(s1, s2); |
| |
| if ((s1 == NULL) && (s2 == NULL)) |
| return 0; |
| |
| /* One pointer is valid and the other is NULL */ |
| return 1; |
| } |
| |
| /* Return true if the node exists in a hidden partition and not in any |
| * non-hidden partitions. */ |
| static bool _hidden_node(struct node_record *node_ptr) |
| { |
| int i, n; |
| int hidden = -1; /* node is hidden for some partition */ |
| int shown = -1; /* node is *not* hidden for some partition */ |
| |
| for (n = 0; n < node_ptr->part_cnt; n++) { |
| bool hide_found = false; |
| for (i=0; i<HIDE_PART_CNT; i++) { |
| if (hide_part_nodes_ptr[i] == NULL) |
| break; |
| if (hide_part_nodes_ptr[i] == node_ptr->part_pptr[n]) { |
| hide_found = true; |
| break; |
| } |
| } |
| if (hide_found) |
| hidden = 1; |
| else |
| shown = 1; |
| } |
| |
| if ((hidden == 1) && (shown != 1)) |
| return true; |
| return false; |
| } |