blob: c11972b3f89877bba9af2bdcf5e7ec1f98b4c4cb [file] [log] [blame] [edit]
/*****************************************************************************\
* sinfo.c - Report overall state the system
*****************************************************************************
* Copyright (C) 2002-2007 The Regents of the University of California.
* Copyright (C) 2008-2010 Lawrence Livermore National Security.
* Portions Copyright (C) 2010 SchedMD <http://www.schedmd.com>.
* Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
* Written by Joey Ekstrom <ekstrom1@llnl.gov>, Morris Jette <jette1@llnl.gov>
* CODE-OCEC-09-009. All rights reserved.
*
* This file is part of SLURM, a resource management program.
* For details, see <https://computing.llnl.gov/linux/slurm/>.
* Please also read the included file: DISCLAIMER.
*
* SLURM is free software; you can redistribute it and/or modify it under
* the terms of the GNU General Public License as published by the Free
* Software Foundation; either version 2 of the License, or (at your option)
* any later version.
*
* In addition, as a special exception, the copyright holders give permission
* to link the code of portions of this program with the OpenSSL library under
* certain conditions as described in each individual source file, and
* distribute linked combinations including the two. You must obey the GNU
* General Public License in all respects for all of the code used other than
* OpenSSL. If you modify file(s) with this exception, you may extend this
* exception to your version of the file(s), but you are not obligated to do
* so. If you do not wish to do so, delete this exception statement from your
* version. If you delete this exception statement from all source files in
* the program, then also delete it here.
*
* SLURM is distributed in the hope that it will be useful, but WITHOUT ANY
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
* details.
*
* You should have received a copy of the GNU General Public License along
* with SLURM; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
\*****************************************************************************/
#if HAVE_CONFIG_H
# include "config.h"
#endif
#include "src/common/xstring.h"
#include "src/common/macros.h"
#include "src/common/node_select.h"
#include "src/sinfo/sinfo.h"
#include "src/sinfo/print.h"
#include "src/plugins/select/bluegene/wrap_rm_api.h"
#include "src/plugins/select/bluegene/plugin/bluegene.h"
/********************
* Global Variables *
********************/
struct sinfo_parameters params;
static int g_node_scaling = 1;
/************
* Funtions *
************/
static int _bg_report(block_info_msg_t *block_ptr);
static int _build_sinfo_data(List sinfo_list,
partition_info_msg_t *partition_msg,
node_info_msg_t *node_msg);
static sinfo_data_t *_create_sinfo(partition_info_t* part_ptr,
uint16_t part_inx, node_info_t *node_ptr,
uint32_t node_scaling);
static bool _filter_out(node_info_t *node_ptr);
static int _get_info(bool clear_old);
static void _sinfo_list_delete(void *data);
static bool _match_node_data(sinfo_data_t *sinfo_ptr,
node_info_t *node_ptr);
static bool _match_part_data(sinfo_data_t *sinfo_ptr,
partition_info_t* part_ptr);
static int _multi_cluster(List clusters);
static int _query_server(partition_info_msg_t ** part_pptr,
node_info_msg_t ** node_pptr,
block_info_msg_t ** block_pptr, bool clear_old);
static void _sort_hostlist(List sinfo_list);
static int _strcmp(char *data1, char *data2);
static void _update_sinfo(sinfo_data_t *sinfo_ptr, node_info_t *node_ptr,
uint32_t node_scaling);
static int _insert_node_ptr(List sinfo_list, uint16_t part_num,
partition_info_t *part_ptr,
node_info_t *node_ptr, uint32_t node_scaling);
static int _handle_subgrps(List sinfo_list, uint16_t part_num,
partition_info_t *part_ptr,
node_info_t *node_ptr, uint32_t node_scaling);
int main(int argc, char *argv[])
{
log_options_t opts = LOG_OPTS_STDERR_ONLY;
int rc = 0;
log_init(xbasename(argv[0]), opts, SYSLOG_FACILITY_USER, NULL);
parse_command_line(argc, argv);
if (params.verbose) {
opts.stderr_level += params.verbose;
log_alter(opts, SYSLOG_FACILITY_USER, NULL);
}
while (1) {
if ((!params.no_header) &&
(params.iterate || params.verbose || params.long_output))
print_date();
if (!params.clusters) {
if (_get_info(false))
rc = 1;
} else if (_multi_cluster(params.clusters) != 0)
rc = 1;
if (params.iterate) {
printf("\n");
sleep(params.iterate);
} else
break;
}
exit(rc);
}
static int _multi_cluster(List clusters)
{
ListIterator itr;
bool first = true;
int rc = 0, rc2;
itr = list_iterator_create(clusters);
while ((working_cluster_rec = list_next(itr))) {
if (first)
first = false;
else
printf("\n");
printf("CLUSTER: %s\n", working_cluster_rec->name);
rc2 = _get_info(true);
rc = MAX(rc, rc2);
}
list_iterator_destroy(itr);
return rc;
}
/* clear_old IN - if set then don't preserve old info (it might be from
* another cluster) */
static int _get_info(bool clear_old)
{
partition_info_msg_t *partition_msg = NULL;
node_info_msg_t *node_msg = NULL;
block_info_msg_t *block_msg = NULL;
List sinfo_list = NULL;
int rc = 0;
if (_query_server(&partition_msg, &node_msg, &block_msg, clear_old))
rc = 1;
else if (params.bg_flag)
(void) _bg_report(block_msg);
else {
sinfo_list = list_create(_sinfo_list_delete);
_build_sinfo_data(sinfo_list, partition_msg, node_msg);
sort_sinfo_list(sinfo_list);
print_sinfo_list(sinfo_list);
FREE_NULL_LIST(sinfo_list);
}
return rc;
}
/*
* _bg_report - download and print current bgblock state information
*/
static int _bg_report(block_info_msg_t *block_ptr)
{
int i;
if (!block_ptr) {
slurm_perror("No block_ptr given");
return SLURM_ERROR;
}
if (!params.no_header)
printf("BG_BLOCK NODES OWNER STATE CONNECTION USE\n");
/* 1234567890123456 123456789012 12345678 12345678 1234567890 12345+ */
/* RMP_22Apr1544018 bg[123x456] name READY TORUS COPROCESSOR */
for (i=0; i<block_ptr->record_count; i++) {
printf("%-16.16s %-12.12s %-8.8s %-8.8s %-10.10s %s\n",
block_ptr->block_array[i].bg_block_id,
block_ptr->block_array[i].nodes,
block_ptr->block_array[i].owner_name,
bg_block_state_string(
block_ptr->block_array[i].state),
conn_type_string(
block_ptr->block_array[i].conn_type),
node_use_string(
block_ptr->block_array[i].node_use));
}
return SLURM_SUCCESS;
}
/*
* _query_server - download the current server state
* part_pptr IN/OUT - partition information message
* node_pptr IN/OUT - node information message
* block_pptr IN/OUT - BlueGene block data
* clear_old IN - If set, then always replace old data, needed when going
* between clusters.
* RET zero or error code
*/
static int
_query_server(partition_info_msg_t ** part_pptr,
node_info_msg_t ** node_pptr,
block_info_msg_t ** block_pptr, bool clear_old)
{
static partition_info_msg_t *old_part_ptr = NULL, *new_part_ptr;
static node_info_msg_t *old_node_ptr = NULL, *new_node_ptr;
static block_info_msg_t *old_bg_ptr = NULL, *new_bg_ptr;
int error_code;
uint16_t show_flags = 0;
if (params.all_flag)
show_flags |= SHOW_ALL;
if (old_part_ptr) {
if (clear_old)
old_part_ptr->last_update = 0;
error_code = slurm_load_partitions(old_part_ptr->last_update,
&new_part_ptr, show_flags);
if (error_code == SLURM_SUCCESS)
slurm_free_partition_info_msg(old_part_ptr);
else if (slurm_get_errno() == SLURM_NO_CHANGE_IN_DATA) {
error_code = SLURM_SUCCESS;
new_part_ptr = old_part_ptr;
}
} else {
error_code = slurm_load_partitions((time_t) NULL, &new_part_ptr,
show_flags);
}
if (error_code) {
slurm_perror("slurm_load_partitions");
return error_code;
}
old_part_ptr = new_part_ptr;
*part_pptr = new_part_ptr;
if (old_node_ptr) {
if (clear_old)
old_node_ptr->last_update = 0;
error_code = slurm_load_node(old_node_ptr->last_update,
&new_node_ptr, show_flags);
if (error_code == SLURM_SUCCESS)
slurm_free_node_info_msg(old_node_ptr);
else if (slurm_get_errno() == SLURM_NO_CHANGE_IN_DATA) {
error_code = SLURM_SUCCESS;
new_node_ptr = old_node_ptr;
}
} else {
error_code = slurm_load_node((time_t) NULL, &new_node_ptr,
show_flags);
}
if (error_code) {
slurm_perror("slurm_load_node");
return error_code;
}
old_node_ptr = new_node_ptr;
*node_pptr = new_node_ptr;
if (!params.bg_flag)
return SLURM_SUCCESS;
if (params.cluster_flags & CLUSTER_FLAG_BG) {
if (old_bg_ptr) {
if (clear_old)
old_bg_ptr->last_update = 0;
error_code = slurm_load_block_info(
old_bg_ptr->last_update,
&new_bg_ptr, show_flags);
if (error_code == SLURM_SUCCESS)
slurm_free_block_info_msg(old_bg_ptr);
else if (slurm_get_errno() == SLURM_NO_CHANGE_IN_DATA) {
error_code = SLURM_SUCCESS;
new_bg_ptr = old_bg_ptr;
}
} else {
error_code = slurm_load_block_info((time_t) NULL,
&new_bg_ptr,
show_flags);
}
}
if (error_code) {
slurm_perror("slurm_load_block");
return error_code;
}
old_bg_ptr = new_bg_ptr;
*block_pptr = new_bg_ptr;
return SLURM_SUCCESS;
}
/*
* _build_sinfo_data - make a sinfo_data entry for each unique node
* configuration and add it to the sinfo_list for later printing.
* sinfo_list IN/OUT - list of unique sinfo_data records to report
* partition_msg IN - partition info message
* node_msg IN - node info message
* RET zero or error code
*/
static int _build_sinfo_data(List sinfo_list,
partition_info_msg_t *partition_msg,
node_info_msg_t *node_msg)
{
node_info_t *node_ptr = NULL;
partition_info_t *part_ptr = NULL;
int j, j2;
g_node_scaling = node_msg->node_scaling;
/* by default every partition is shown, even if no nodes */
if ((!params.node_flag) && params.match_flags.partition_flag) {
part_ptr = partition_msg->partition_array;
for (j=0; j<partition_msg->record_count; j++, part_ptr++) {
if ((!params.partition) ||
(_strcmp(params.partition, part_ptr->name) == 0)) {
list_append(sinfo_list, _create_sinfo(
part_ptr, (uint16_t) j,
NULL,
node_msg->node_scaling));
}
}
}
/* make sinfo_list entries for every node in every partition */
for (j=0; j<partition_msg->record_count; j++, part_ptr++) {
part_ptr = &(partition_msg->partition_array[j]);
if (params.filtering && params.partition &&
_strcmp(part_ptr->name, params.partition))
continue;
j2 = 0;
while(part_ptr->node_inx[j2] >= 0) {
int i2 = 0;
uint16_t subgrp_size = 0;
for(i2 = part_ptr->node_inx[j2];
i2 <= part_ptr->node_inx[j2+1];
i2++) {
node_ptr = &(node_msg->node_array[i2]);
if (node_ptr->name == NULL ||
(params.filtering &&
_filter_out(node_ptr)))
continue;
if(select_g_select_nodeinfo_get(
node_ptr->select_nodeinfo,
SELECT_NODEDATA_SUBGRP_SIZE,
0,
&subgrp_size) == SLURM_SUCCESS
&& subgrp_size)
_handle_subgrps(sinfo_list,
(uint16_t) j,
part_ptr,
node_ptr,
node_msg->
node_scaling);
else
_insert_node_ptr(sinfo_list,
(uint16_t) j,
part_ptr,
node_ptr,
node_msg->
node_scaling);
}
j2 += 2;
}
}
_sort_hostlist(sinfo_list);
return SLURM_SUCCESS;
}
/*
* _filter_out - Determine if the specified node should be filtered out or
* reported.
* node_ptr IN - node to consider filtering out
* RET - true if node should not be reported, false otherwise
*/
static bool _filter_out(node_info_t *node_ptr)
{
static hostlist_t host_list = NULL;
if (params.nodes) {
if (host_list == NULL)
host_list = hostlist_create(params.nodes);
if (hostlist_find (host_list, node_ptr->name) == -1)
return true;
}
if (params.dead_nodes && !IS_NODE_NO_RESPOND(node_ptr))
return true;
if (params.responding_nodes && IS_NODE_NO_RESPOND(node_ptr))
return true;
if (params.state_list) {
int *node_state;
bool match = false;
uint16_t base_state;
ListIterator iterator;
uint16_t cpus = 0;
node_info_t tmp_node, *tmp_node_ptr = &tmp_node;
iterator = list_iterator_create(params.state_list);
while ((node_state = list_next(iterator))) {
tmp_node_ptr->node_state = *node_state;
if (*node_state == NODE_STATE_DRAIN) {
/* We search for anything that has the
* drain flag set */
if (IS_NODE_DRAIN(node_ptr)) {
match = true;
break;
}
} else if (IS_NODE_DRAINING(tmp_node_ptr)) {
/* We search for anything that gets mapped to
* DRAINING in node_state_string */
if (IS_NODE_DRAINING(node_ptr)) {
match = true;
break;
}
} else if (IS_NODE_DRAINED(tmp_node_ptr)) {
/* We search for anything that gets mapped to
* DRAINED in node_state_string */
if (IS_NODE_DRAINED(node_ptr)) {
match = true;
break;
}
} else if (*node_state & NODE_STATE_FLAGS) {
if (*node_state & node_ptr->node_state) {
match = true;
break;
}
} else if (*node_state == NODE_STATE_ERROR) {
slurm_get_select_nodeinfo(
node_ptr->select_nodeinfo,
SELECT_NODEDATA_SUBCNT,
NODE_STATE_ERROR,
&cpus);
if(cpus) {
match = true;
break;
}
} else if (*node_state == NODE_STATE_ALLOCATED) {
slurm_get_select_nodeinfo(
node_ptr->select_nodeinfo,
SELECT_NODEDATA_SUBCNT,
NODE_STATE_ALLOCATED,
&cpus);
if(cpus) {
match = true;
break;
}
} else if (*node_state == NODE_STATE_IDLE) {
base_state = node_ptr->node_state &
(~NODE_STATE_NO_RESPOND);
if (base_state == NODE_STATE_IDLE) {
match = true;
break;
}
} else {
base_state =
node_ptr->node_state & NODE_STATE_BASE;
if (base_state == *node_state) {
match = true;
break;
}
}
}
list_iterator_destroy(iterator);
if (!match)
return true;
}
return false;
}
static void _sort_hostlist(List sinfo_list)
{
ListIterator i;
sinfo_data_t *sinfo_ptr;
i = list_iterator_create(sinfo_list);
while ((sinfo_ptr = list_next(i)))
hostlist_sort(sinfo_ptr->nodes);
list_iterator_destroy(i);
}
static bool _match_node_data(sinfo_data_t *sinfo_ptr, node_info_t *node_ptr)
{
if (sinfo_ptr->nodes &&
params.match_flags.features_flag &&
(_strcmp(node_ptr->features, sinfo_ptr->features)))
return false;
if (sinfo_ptr->nodes &&
params.match_flags.gres_flag &&
(_strcmp(node_ptr->gres, sinfo_ptr->gres)))
return false;
if (sinfo_ptr->nodes &&
params.match_flags.reason_flag &&
(_strcmp(node_ptr->reason, sinfo_ptr->reason)))
return false;
if (params.match_flags.state_flag) {
char *state1, *state2;
state1 = node_state_string(node_ptr->node_state);
state2 = node_state_string(sinfo_ptr->node_state);
if (strcmp(state1, state2))
return false;
}
/* If no need to exactly match sizes, just return here
* otherwise check cpus, disk, memory and weigth individually */
if (!params.exact_match)
return true;
if (params.match_flags.cpus_flag &&
((node_ptr->cpus / g_node_scaling) != sinfo_ptr->min_cpus))
return false;
if (params.match_flags.sockets_flag &&
(node_ptr->sockets != sinfo_ptr->min_sockets))
return false;
if (params.match_flags.cores_flag &&
(node_ptr->cores != sinfo_ptr->min_cores))
return false;
if (params.match_flags.threads_flag &&
(node_ptr->threads != sinfo_ptr->min_threads))
return false;
if (params.match_flags.sct_flag &&
((node_ptr->sockets != sinfo_ptr->min_sockets) ||
(node_ptr->cores != sinfo_ptr->min_cores) ||
(node_ptr->threads != sinfo_ptr->min_threads)))
return false;
if (params.match_flags.disk_flag &&
(node_ptr->tmp_disk != sinfo_ptr->min_disk))
return false;
if (params.match_flags.memory_flag &&
(node_ptr->real_memory != sinfo_ptr->min_mem))
return false;
if (params.match_flags.weight_flag &&
(node_ptr->weight != sinfo_ptr->min_weight))
return false;
return true;
}
static bool _match_part_data(sinfo_data_t *sinfo_ptr,
partition_info_t* part_ptr)
{
if (part_ptr == sinfo_ptr->part_info) /* identical partition */
return true;
if ((part_ptr == NULL) || (sinfo_ptr->part_info == NULL))
return false;
if (params.match_flags.avail_flag &&
(part_ptr->state_up != sinfo_ptr->part_info->state_up))
return false;
if (params.match_flags.groups_flag &&
(_strcmp(part_ptr->allow_groups,
sinfo_ptr->part_info->allow_groups)))
return false;
if (params.match_flags.job_size_flag &&
(part_ptr->min_nodes != sinfo_ptr->part_info->min_nodes))
return false;
if (params.match_flags.job_size_flag &&
(part_ptr->max_nodes != sinfo_ptr->part_info->max_nodes))
return false;
if (params.match_flags.default_time_flag &&
(part_ptr->default_time != sinfo_ptr->part_info->default_time))
return false;
if (params.match_flags.max_time_flag &&
(part_ptr->max_time != sinfo_ptr->part_info->max_time))
return false;
if (params.match_flags.partition_flag &&
(_strcmp(part_ptr->name, sinfo_ptr->part_info->name)))
return false;
if (params.match_flags.root_flag &&
((part_ptr->flags & PART_FLAG_ROOT_ONLY) !=
(sinfo_ptr->part_info->flags & PART_FLAG_ROOT_ONLY)))
return false;
if (params.match_flags.share_flag &&
(part_ptr->max_share != sinfo_ptr->part_info->max_share))
return false;
if (params.match_flags.preempt_mode_flag &&
(part_ptr->preempt_mode != sinfo_ptr->part_info->preempt_mode))
return false;
if (params.match_flags.priority_flag &&
(part_ptr->priority != sinfo_ptr->part_info->priority))
return false;
return true;
}
static void _update_sinfo(sinfo_data_t *sinfo_ptr, node_info_t *node_ptr,
uint32_t node_scaling)
{
uint16_t base_state;
uint16_t used_cpus = 0, error_cpus = 0;
int total_cpus = 0, total_nodes = 0;
/* since node_scaling could be less here we need to use the
global node scaling which should never change. */
int single_node_cpus = (node_ptr->cpus / g_node_scaling);
base_state = node_ptr->node_state & NODE_STATE_BASE;
if (sinfo_ptr->nodes_total == 0) { /* first node added */
sinfo_ptr->node_state = node_ptr->node_state;
sinfo_ptr->features = node_ptr->features;
sinfo_ptr->gres = node_ptr->gres;
sinfo_ptr->reason = node_ptr->reason;
sinfo_ptr->reason_time= node_ptr->reason_time;
sinfo_ptr->reason_uid = node_ptr->reason_uid;
sinfo_ptr->min_cpus = single_node_cpus;
sinfo_ptr->max_cpus = single_node_cpus;
sinfo_ptr->min_sockets = node_ptr->sockets;
sinfo_ptr->max_sockets = node_ptr->sockets;
sinfo_ptr->min_cores = node_ptr->cores;
sinfo_ptr->max_cores = node_ptr->cores;
sinfo_ptr->min_threads = node_ptr->threads;
sinfo_ptr->max_threads = node_ptr->threads;
sinfo_ptr->min_disk = node_ptr->tmp_disk;
sinfo_ptr->max_disk = node_ptr->tmp_disk;
sinfo_ptr->min_mem = node_ptr->real_memory;
sinfo_ptr->max_mem = node_ptr->real_memory;
sinfo_ptr->min_weight = node_ptr->weight;
sinfo_ptr->max_weight = node_ptr->weight;
} else if (hostlist_find(sinfo_ptr->nodes, node_ptr->name) != -1) {
/* we already have this node in this record,
* just return, don't duplicate */
return;
} else {
if (sinfo_ptr->min_cpus > single_node_cpus)
sinfo_ptr->min_cpus = single_node_cpus;
if (sinfo_ptr->max_cpus < single_node_cpus)
sinfo_ptr->max_cpus = single_node_cpus;
if (sinfo_ptr->min_sockets > node_ptr->sockets)
sinfo_ptr->min_sockets = node_ptr->sockets;
if (sinfo_ptr->max_sockets < node_ptr->sockets)
sinfo_ptr->max_sockets = node_ptr->sockets;
if (sinfo_ptr->min_cores > node_ptr->cores)
sinfo_ptr->min_cores = node_ptr->cores;
if (sinfo_ptr->max_cores < node_ptr->cores)
sinfo_ptr->max_cores = node_ptr->cores;
if (sinfo_ptr->min_threads > node_ptr->threads)
sinfo_ptr->min_threads = node_ptr->threads;
if (sinfo_ptr->max_threads < node_ptr->threads)
sinfo_ptr->max_threads = node_ptr->threads;
if (sinfo_ptr->min_disk > node_ptr->tmp_disk)
sinfo_ptr->min_disk = node_ptr->tmp_disk;
if (sinfo_ptr->max_disk < node_ptr->tmp_disk)
sinfo_ptr->max_disk = node_ptr->tmp_disk;
if (sinfo_ptr->min_mem > node_ptr->real_memory)
sinfo_ptr->min_mem = node_ptr->real_memory;
if (sinfo_ptr->max_mem < node_ptr->real_memory)
sinfo_ptr->max_mem = node_ptr->real_memory;
if (sinfo_ptr->min_weight> node_ptr->weight)
sinfo_ptr->min_weight = node_ptr->weight;
if (sinfo_ptr->max_weight < node_ptr->weight)
sinfo_ptr->max_weight = node_ptr->weight;
}
hostlist_push(sinfo_ptr->nodes, node_ptr->name);
total_cpus = node_ptr->cpus;
total_nodes = node_scaling;
select_g_select_nodeinfo_get(node_ptr->select_nodeinfo,
SELECT_NODEDATA_SUBCNT,
NODE_STATE_ALLOCATED,
&used_cpus);
select_g_select_nodeinfo_get(node_ptr->select_nodeinfo,
SELECT_NODEDATA_SUBCNT,
NODE_STATE_ERROR,
&error_cpus);
if (params.cluster_flags & CLUSTER_FLAG_BG) {
if (!params.match_flags.state_flag &&
(used_cpus || error_cpus)) {
/* We only get one shot at this (because all states
are combined together), so we need to make
sure we get all the subgrps accounted. (So use
g_node_scaling for safe measure) */
total_nodes = g_node_scaling;
sinfo_ptr->nodes_alloc += used_cpus;
sinfo_ptr->nodes_other += error_cpus;
sinfo_ptr->nodes_idle +=
(total_nodes - (used_cpus + error_cpus));
used_cpus *= single_node_cpus;
error_cpus *= single_node_cpus;
} else {
/* process only for this subgrp and then return */
total_cpus = total_nodes * single_node_cpus;
if ((base_state == NODE_STATE_ALLOCATED) ||
(node_ptr->node_state & NODE_STATE_COMPLETING)) {
sinfo_ptr->nodes_alloc += total_nodes;
sinfo_ptr->cpus_alloc += total_cpus;
} else if (IS_NODE_DRAIN(node_ptr) ||
(base_state == NODE_STATE_DOWN)) {
sinfo_ptr->nodes_other += total_nodes;
sinfo_ptr->cpus_other += total_cpus;
} else {
sinfo_ptr->nodes_idle += total_nodes;
sinfo_ptr->cpus_idle += total_cpus;
}
sinfo_ptr->nodes_total += total_nodes;
sinfo_ptr->cpus_total += total_cpus;
return;
}
} else {
if ((base_state == NODE_STATE_ALLOCATED) ||
IS_NODE_COMPLETING(node_ptr))
sinfo_ptr->nodes_alloc += total_nodes;
else if (IS_NODE_DRAIN(node_ptr)
|| (base_state == NODE_STATE_DOWN))
sinfo_ptr->nodes_other += total_nodes;
else
sinfo_ptr->nodes_idle += total_nodes;
}
sinfo_ptr->nodes_total += total_nodes;
sinfo_ptr->cpus_alloc += used_cpus;
sinfo_ptr->cpus_total += total_cpus;
total_cpus -= used_cpus + error_cpus;
if (error_cpus) {
sinfo_ptr->cpus_idle += total_cpus;
sinfo_ptr->cpus_other += error_cpus;
} else if (IS_NODE_DRAIN(node_ptr) ||
(base_state == NODE_STATE_DOWN)) {
sinfo_ptr->cpus_other += total_cpus;
} else
sinfo_ptr->cpus_idle += total_cpus;
}
static int _insert_node_ptr(List sinfo_list, uint16_t part_num,
partition_info_t *part_ptr,
node_info_t *node_ptr, uint32_t node_scaling)
{
int rc = SLURM_SUCCESS;
sinfo_data_t *sinfo_ptr = NULL;
ListIterator itr = NULL;
if (params.cluster_flags & CLUSTER_FLAG_BG) {
uint16_t error_cpus = 0;
select_g_select_nodeinfo_get(node_ptr->select_nodeinfo,
SELECT_NODEDATA_SUBCNT,
NODE_STATE_ERROR,
&error_cpus);
if (error_cpus && !node_ptr->reason)
node_ptr->reason = xstrdup("Block(s) in error state");
}
itr = list_iterator_create(sinfo_list);
while ((sinfo_ptr = list_next(itr))) {
if (!_match_part_data(sinfo_ptr, part_ptr))
continue;
if (sinfo_ptr->nodes_total &&
(!_match_node_data(sinfo_ptr, node_ptr)))
continue;
_update_sinfo(sinfo_ptr, node_ptr, node_scaling);
break;
}
list_iterator_destroy(itr);
/* if no match, create new sinfo_data entry */
if (!sinfo_ptr)
list_append(sinfo_list,
_create_sinfo(part_ptr, part_num,
node_ptr, node_scaling));
return rc;
}
static int _handle_subgrps(List sinfo_list, uint16_t part_num,
partition_info_t *part_ptr,
node_info_t *node_ptr, uint32_t node_scaling)
{
uint16_t size;
int *node_state;
int i=0, state_cnt = 2;
ListIterator iterator = NULL;
enum node_states state[] =
{ NODE_STATE_ALLOCATED, NODE_STATE_ERROR };
/* If we ever update the hostlist stuff to support this stuff
* then we can use this to tack on the end of the node name
* the subgrp stuff. On bluegene systems this would be nice
* to see the ionodes in certain states.
*/
if (params.state_list)
iterator = list_iterator_create(params.state_list);
for(i=0; i<state_cnt; i++) {
if(iterator) {
node_info_t tmp_node, *tmp_node_ptr = &tmp_node;
while ((node_state = list_next(iterator))) {
tmp_node_ptr->node_state = *node_state;
if((((state[i] == NODE_STATE_ALLOCATED)
&& IS_NODE_DRAINING(tmp_node_ptr))
|| (*node_state == NODE_STATE_DRAIN))
|| (*node_state == state[i]))
break;
}
list_iterator_reset(iterator);
if(!node_state)
continue;
}
if(select_g_select_nodeinfo_get(node_ptr->select_nodeinfo,
SELECT_NODEDATA_SUBCNT,
state[i],
&size) == SLURM_SUCCESS
&& size) {
node_scaling -= size;
node_ptr->node_state &= NODE_STATE_FLAGS;
node_ptr->node_state |= state[i];
_insert_node_ptr(sinfo_list, part_num, part_ptr,
node_ptr, size);
}
}
/* now handle the idle */
if(iterator) {
while ((node_state = list_next(iterator))) {
node_info_t tmp_node, *tmp_node_ptr = &tmp_node;
tmp_node_ptr->node_state = *node_state;
if(((*node_state == NODE_STATE_DRAIN)
|| IS_NODE_DRAINED(tmp_node_ptr))
|| (*node_state == NODE_STATE_IDLE))
break;
}
list_iterator_destroy(iterator);
if(!node_state)
return SLURM_SUCCESS;
}
node_ptr->node_state &= NODE_STATE_FLAGS;
node_ptr->node_state |= NODE_STATE_IDLE;
if((int)node_scaling > 0)
_insert_node_ptr(sinfo_list, part_num, part_ptr,
node_ptr, node_scaling);
return SLURM_SUCCESS;
}
/*
* _create_sinfo - create an sinfo record for the given node and partition
* sinfo_list IN/OUT - table of accumulated sinfo records
* part_ptr IN - pointer to partition record to add
* part_inx IN - index of partition record (0-origin)
* node_ptr IN - pointer to node record to add
*/
static sinfo_data_t *_create_sinfo(partition_info_t* part_ptr,
uint16_t part_inx, node_info_t *node_ptr,
uint32_t node_scaling)
{
sinfo_data_t *sinfo_ptr;
/* create an entry */
sinfo_ptr = xmalloc(sizeof(sinfo_data_t));
sinfo_ptr->part_info = part_ptr;
sinfo_ptr->part_inx = part_inx;
sinfo_ptr->nodes = hostlist_create("");
if (node_ptr)
_update_sinfo(sinfo_ptr, node_ptr, node_scaling);
return sinfo_ptr;
}
static void _sinfo_list_delete(void *data)
{
sinfo_data_t *sinfo_ptr = data;
hostlist_destroy(sinfo_ptr->nodes);
xfree(sinfo_ptr);
}
/* like strcmp, but works with NULL pointers */
static int _strcmp(char *data1, char *data2)
{
static char null_str[] = "(null)";
if (data1 == NULL)
data1 = null_str;
if (data2 == NULL)
data2 = null_str;
return strcmp(data1, data2);
}