| /*****************************************************************************\ |
| * read_config.c - read the overall slurm configuration file |
| ***************************************************************************** |
| * Copyright (C) 2002 The Regents of the University of California. |
| * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). |
| * Written by Morris Jette <jette1@llnl.gov>. |
| * UCRL-CODE-226842. |
| * |
| * This file is part of SLURM, a resource management program. |
| * For details, see <http://www.llnl.gov/linux/slurm/>. |
| * |
| * SLURM is free software; you can redistribute it and/or modify it under |
| * the terms of the GNU General Public License as published by the Free |
| * Software Foundation; either version 2 of the License, or (at your option) |
| * any later version. |
| * |
| * In addition, as a special exception, the copyright holders give permission |
| * to link the code of portions of this program with the OpenSSL library under |
| * certain conditions as described in each individual source file, and |
| * distribute linked combinations including the two. You must obey the GNU |
| * General Public License in all respects for all of the code used other than |
| * OpenSSL. If you modify file(s) with this exception, you may extend this |
| * exception to your version of the file(s), but you are not obligated to do |
| * so. If you do not wish to do so, delete this exception statement from your |
| * version. If you delete this exception statement from all source files in |
| * the program, then also delete it here. |
| * |
| * SLURM is distributed in the hope that it will be useful, but WITHOUT ANY |
| * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS |
| * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more |
| * details. |
| * |
| * You should have received a copy of the GNU General Public License along |
| * with SLURM; if not, write to the Free Software Foundation, Inc., |
| * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. |
| \*****************************************************************************/ |
| |
| #ifdef HAVE_CONFIG_H |
| # include "config.h" |
| #endif |
| |
| #ifdef HAVE_SYS_SYSLOG_H |
| # include <sys/syslog.h> |
| #endif |
| |
| #include <ctype.h> |
| #include <errno.h> |
| #include <fcntl.h> |
| #include <stdio.h> |
| #include <stdlib.h> |
| #include <string.h> |
| #include <syslog.h> |
| #include <sys/types.h> |
| #include <sys/stat.h> |
| #include <time.h> |
| #include <unistd.h> |
| |
| #include "src/common/hostlist.h" |
| #include "src/common/list.h" |
| #include "src/common/macros.h" |
| #include "src/common/node_select.h" |
| #include "src/common/parse_spec.h" |
| #include "src/common/read_config.h" |
| #include "src/common/slurm_jobcomp.h" |
| #include "src/common/switch.h" |
| #include "src/common/xstring.h" |
| #include "src/common/node_select.h" |
| #include "src/common/slurm_jobacct.h" |
| #include "src/common/slurm_rlimits_info.h" |
| |
| #include "src/slurmctld/locks.h" |
| #include "src/slurmctld/node_scheduler.h" |
| #include "src/slurmctld/proc_req.h" |
| #include "src/slurmctld/read_config.h" |
| #include "src/slurmctld/sched_plugin.h" |
| #include "src/slurmctld/slurmctld.h" |
| #include "src/slurmctld/trigger_mgr.h" |
| |
| static int _build_bitmaps(void); |
| static int _init_all_slurm_conf(void); |
| static void _purge_old_node_state(struct node_record *old_node_table_ptr, |
| int old_node_record_count); |
| static void _restore_node_state(struct node_record *old_node_table_ptr, |
| int old_node_record_count); |
| static int _preserve_select_type_param(slurm_ctl_conf_t * ctl_conf_ptr, |
| select_type_plugin_info_t old_select_type_p); |
| static int _preserve_plugins(slurm_ctl_conf_t * ctl_conf_ptr, |
| char *old_auth_type, char *old_checkpoint_type, |
| char *old_sched_type, char *old_select_type, |
| char *old_switch_type); |
| static int _sync_nodes_to_comp_job(void); |
| static int _sync_nodes_to_jobs(void); |
| static int _sync_nodes_to_active_job(struct job_record *job_ptr); |
| #ifdef HAVE_ELAN |
| static void _validate_node_proc_count(void); |
| #endif |
| |
| static char highest_node_name[MAX_SLURM_NAME] = ""; |
| int node_record_count = 0; |
| |
| /* FIXME - declarations for temporarily moved functions */ |
| #define MULTIPLE_VALUE_MSG "Multiple values for %s, latest one used" |
| |
| |
| /* |
| * _build_bitmaps - build node bitmaps to define which nodes are in which |
| * 1) partition 2) configuration record 3) up state 4) idle state |
| * also sets values of total_nodes and total_cpus for every partition. |
| * RET 0 if no error, errno otherwise |
| * Note: Operates on common variables, no arguments |
| * node_record_count - number of nodes in the system |
| * node_record_table_ptr - pointer to global node table |
| * part_list - pointer to global partition list |
| */ |
| static int _build_bitmaps(void) |
| { |
| int i, j, error_code = SLURM_SUCCESS; |
| char *this_node_name; |
| ListIterator config_iterator; |
| ListIterator part_iterator; |
| struct config_record *config_ptr; |
| struct part_record *part_ptr; |
| struct node_record *node_ptr; |
| struct job_record *job_ptr; |
| ListIterator job_iterator; |
| hostlist_t host_list; |
| |
| last_node_update = time(NULL); |
| last_part_update = time(NULL); |
| |
| /* initialize the idle and up bitmaps */ |
| FREE_NULL_BITMAP(idle_node_bitmap); |
| FREE_NULL_BITMAP(avail_node_bitmap); |
| FREE_NULL_BITMAP(share_node_bitmap); |
| FREE_NULL_BITMAP(up_node_bitmap); |
| idle_node_bitmap = (bitstr_t *) bit_alloc(node_record_count); |
| avail_node_bitmap = (bitstr_t *) bit_alloc(node_record_count); |
| share_node_bitmap = (bitstr_t *) bit_alloc(node_record_count); |
| up_node_bitmap = (bitstr_t *) bit_alloc(node_record_count); |
| if ((idle_node_bitmap == NULL) || |
| (avail_node_bitmap == NULL) || |
| (share_node_bitmap == NULL) || |
| (up_node_bitmap == NULL)) |
| fatal ("bit_alloc malloc failure"); |
| |
| /* initialize the configuration bitmaps */ |
| config_iterator = list_iterator_create(config_list); |
| if (config_iterator == NULL) |
| fatal ("memory allocation failure"); |
| |
| while ((config_ptr = (struct config_record *) |
| list_next(config_iterator))) { |
| FREE_NULL_BITMAP(config_ptr->node_bitmap); |
| config_ptr->node_bitmap = |
| (bitstr_t *) bit_alloc(node_record_count); |
| if (config_ptr->node_bitmap == NULL) |
| fatal ("bit_alloc malloc failure"); |
| } |
| list_iterator_destroy(config_iterator); |
| |
| /* Set all bits, all nodes initially available for sharing */ |
| bit_nset(share_node_bitmap, 0, (node_record_count-1)); |
| |
| /* identify all nodes non-sharable due to non-sharing jobs */ |
| job_iterator = list_iterator_create(job_list); |
| while ((job_ptr = (struct job_record *) list_next(job_iterator))) { |
| bitstr_t *tmp_bits; |
| if ((job_ptr->job_state != JOB_RUNNING) || |
| (job_ptr->node_bitmap == NULL) || |
| (job_ptr->details == NULL) || |
| (job_ptr->details->shared != 0)) |
| continue; |
| tmp_bits = bit_copy(job_ptr->node_bitmap); |
| if (tmp_bits == NULL) |
| fatal ("bit_copy malloc failure"); |
| bit_not(tmp_bits); |
| bit_and(share_node_bitmap, tmp_bits); |
| bit_free(tmp_bits); |
| } |
| list_iterator_destroy(job_iterator); |
| |
| /* scan all nodes and identify which are up, idle and |
| * their configuration, resync DRAINED vs. DRAINING state */ |
| for (i = 0; i < node_record_count; i++) { |
| uint16_t base_state, drain_flag, no_resp_flag, job_cnt; |
| |
| if (node_record_table_ptr[i].name[0] == '\0') |
| continue; /* defunct */ |
| base_state = node_record_table_ptr[i].node_state & |
| NODE_STATE_BASE; |
| drain_flag = node_record_table_ptr[i].node_state & |
| NODE_STATE_DRAIN; |
| no_resp_flag = node_record_table_ptr[i].node_state & |
| NODE_STATE_NO_RESPOND; |
| job_cnt = node_record_table_ptr[i].run_job_cnt + |
| node_record_table_ptr[i].comp_job_cnt; |
| |
| if (((base_state == NODE_STATE_IDLE) && (job_cnt == 0)) |
| || (base_state == NODE_STATE_DOWN)) |
| bit_set(idle_node_bitmap, i); |
| if ((base_state == NODE_STATE_IDLE) |
| || (base_state == NODE_STATE_ALLOCATED)) { |
| if ((drain_flag == 0) && (no_resp_flag == 0)) |
| bit_set(avail_node_bitmap, i); |
| bit_set(up_node_bitmap, i); |
| } |
| if (node_record_table_ptr[i].config_ptr) |
| bit_set(node_record_table_ptr[i].config_ptr-> |
| node_bitmap, i); |
| } |
| |
| /* scan partition table and identify nodes in each */ |
| part_iterator = list_iterator_create(part_list); |
| if (part_iterator == NULL) |
| fatal ("memory allocation failure"); |
| |
| while ((part_ptr = (struct part_record *) list_next(part_iterator))) { |
| FREE_NULL_BITMAP(part_ptr->node_bitmap); |
| part_ptr->node_bitmap = |
| (bitstr_t *) bit_alloc(node_record_count); |
| if (part_ptr->node_bitmap == NULL) |
| fatal ("bit_alloc malloc failure"); |
| |
| /* check for each node in the partition */ |
| if ((part_ptr->nodes == NULL) || (part_ptr->nodes[0] == '\0')) |
| continue; |
| |
| if ((host_list = hostlist_create(part_ptr->nodes)) == NULL) { |
| fatal("hostlist_create error for %s, %m", |
| part_ptr->nodes); |
| continue; |
| } |
| |
| while ((this_node_name = hostlist_shift(host_list))) { |
| node_ptr = find_node_record(this_node_name); |
| if (node_ptr == NULL) { |
| fatal("_build_bitmaps: node %s is referenced " |
| "but not defined in slurm.conf " |
| "(no NodeName specification)", |
| this_node_name); |
| free(this_node_name); |
| continue; |
| } |
| j = node_ptr - node_record_table_ptr; |
| bit_set(part_ptr->node_bitmap, j); |
| part_ptr->total_nodes++; |
| if (slurmctld_conf.fast_schedule) |
| part_ptr->total_cpus += |
| node_ptr->config_ptr->cpus; |
| else |
| part_ptr->total_cpus += node_ptr->cpus; |
| node_ptr->part_cnt++; |
| xrealloc(node_ptr->part_pptr, (node_ptr->part_cnt * |
| sizeof(struct part_record *))); |
| node_ptr->part_pptr[node_ptr->part_cnt-1] = part_ptr; |
| free(this_node_name); |
| } |
| hostlist_destroy(host_list); |
| } |
| list_iterator_destroy(part_iterator); |
| return error_code; |
| } |
| |
| |
| /* |
| * _init_all_slurm_conf - initialize or re-initialize the slurm |
| * configuration values. |
| * RET 0 if no error, otherwise an error code. |
| * NOTE: We leave the job table intact |
| * NOTE: Operates on common variables, no arguments |
| */ |
| static int _init_all_slurm_conf(void) |
| { |
| int error_code; |
| char *conf_name = xstrdup(slurmctld_conf.slurm_conf); |
| |
| slurm_conf_reinit_nolock(conf_name); |
| xfree(conf_name); |
| |
| if ((error_code = init_node_conf())) |
| return error_code; |
| |
| if ((error_code = init_part_conf())) |
| return error_code; |
| |
| if ((error_code = init_job_conf())) |
| return error_code; |
| |
| strcpy(highest_node_name, ""); |
| return 0; |
| } |
| |
| static int _state_str2int(const char *state_str) |
| { |
| int state_val = NO_VAL; |
| int i; |
| |
| for (i = 0; i <= NODE_STATE_END; i++) { |
| if (strcasecmp(node_state_string(i), "END") == 0) |
| break; |
| if (strcasecmp(node_state_string(i), state_str) == 0) { |
| state_val = i; |
| break; |
| } |
| } |
| if ((i >= NODE_STATE_END) |
| && (strncasecmp("DRAIN", state_str, 5) == 0)) |
| state_val = NODE_STATE_UNKNOWN | NODE_STATE_DRAIN; |
| if (state_val == NO_VAL) { |
| error("invalid node state %s", state_str); |
| errno = EINVAL; |
| } |
| return state_val; |
| } |
| |
| #ifdef HAVE_BG |
| /* Used to get the general name of the machine, used primarily |
| * for bluegene systems. Not in general use because some systems |
| * have multiple prefix's such as foo[1-1000],bar[1-1000]. |
| */ |
| /* Caller must be holding slurm_conf_lock() */ |
| static void _set_node_prefix(const char *nodenames, slurm_ctl_conf_t *conf) |
| { |
| int i; |
| char *tmp; |
| |
| xassert(nodenames != NULL); |
| for (i = 1; nodenames[i] != '\0'; i++) { |
| if((nodenames[i-1] == '[') |
| || (nodenames[i-1] <= '9' |
| && nodenames[i-1] >= '0')) |
| break; |
| } |
| xfree(conf->node_prefix); |
| if(nodenames[i] == '\0') |
| conf->node_prefix = xstrdup(nodenames); |
| else { |
| tmp = xmalloc(sizeof(char)*i+1); |
| memset(tmp, 0, i+1); |
| snprintf(tmp, i, "%s", nodenames); |
| conf->node_prefix = tmp; |
| tmp = NULL; |
| } |
| debug3("Prefix is %s %s %d", conf->node_prefix, nodenames, i); |
| } |
| #endif /* HAVE_BG */ |
| /* |
| * _build_single_nodeline_info - From the slurm.conf reader, build table, |
| * and set values |
| * RET 0 if no error, error code otherwise |
| * Note: Operates on common variables |
| * default_node_record - default node configuration values |
| */ |
| static int _build_single_nodeline_info(slurm_conf_node_t *node_ptr, |
| struct config_record *config_ptr, |
| slurm_ctl_conf_t *conf) |
| { |
| int error_code = SLURM_SUCCESS; |
| struct node_record *node_rec = NULL; |
| hostlist_t alias_list = NULL; |
| hostlist_t hostname_list = NULL; |
| hostlist_t address_list = NULL; |
| char *alias = NULL; |
| char *hostname = NULL; |
| char *address = NULL; |
| int state_val = NODE_STATE_UNKNOWN; |
| |
| if (node_ptr->state != NULL) { |
| state_val = _state_str2int(node_ptr->state); |
| if (state_val == NO_VAL) |
| goto cleanup; |
| } |
| |
| if ((alias_list = hostlist_create(node_ptr->nodenames)) == NULL) { |
| error("Unable to create NodeName list from %s", |
| node_ptr->nodenames); |
| error_code = errno; |
| goto cleanup; |
| } |
| if ((hostname_list = hostlist_create(node_ptr->hostnames)) == NULL) { |
| error("Unable to create NodeHostname list from %s", |
| node_ptr->hostnames); |
| error_code = errno; |
| goto cleanup; |
| } |
| if ((address_list = hostlist_create(node_ptr->addresses)) == NULL) { |
| error("Unable to create NodeAddr list from %s", |
| node_ptr->addresses); |
| error_code = errno; |
| goto cleanup; |
| } |
| |
| #ifdef HAVE_BG |
| _set_node_prefix(node_ptr->nodenames, conf); |
| #endif |
| |
| /* some sanity checks */ |
| #ifdef HAVE_FRONT_END |
| if ((hostlist_count(hostname_list) != 1) || |
| (hostlist_count(address_list) != 1)) { |
| error("Only one hostname and address allowed " |
| "in FRONT_END mode"); |
| goto cleanup; |
| } |
| hostname = node_ptr->hostnames; |
| address = node_ptr->addresses; |
| #else |
| if (hostlist_count(hostname_list) < hostlist_count(alias_list)) { |
| error("At least as many NodeHostname are required " |
| "as NodeName"); |
| goto cleanup; |
| } |
| if (hostlist_count(address_list) < hostlist_count(alias_list)) { |
| error("At least as many NodeAddr are required as NodeName"); |
| goto cleanup; |
| } |
| #endif |
| |
| /* now build the individual node structures */ |
| while ((alias = hostlist_shift(alias_list))) { |
| #ifndef HAVE_FRONT_END |
| hostname = hostlist_shift(hostname_list); |
| address = hostlist_shift(address_list); |
| #endif |
| if (strcmp(alias, highest_node_name) <= 0) { |
| /* find_node_record locks this to get the |
| alias so we need to unlock */ |
| slurm_conf_unlock(); |
| node_rec = find_node_record(alias); |
| slurm_conf_lock(); |
| } else { |
| strncpy(highest_node_name, alias, MAX_SLURM_NAME); |
| node_rec = NULL; |
| } |
| |
| if (node_rec == NULL) { |
| node_rec = create_node_record(config_ptr, alias); |
| if ((state_val != NO_VAL) && |
| (state_val != NODE_STATE_UNKNOWN)) |
| node_rec->node_state = state_val; |
| node_rec->last_response = (time_t) 0; |
| strncpy(node_rec->comm_name, address, MAX_SLURM_NAME); |
| |
| node_rec->port = node_ptr->port; |
| node_rec->reason = xstrdup(node_ptr->reason); |
| } else { |
| /* FIXME - maybe should be fatal? */ |
| error("reconfiguration for node %s, ignoring!", alias); |
| } |
| free(alias); |
| #ifndef HAVE_FRONT_END |
| free(hostname); |
| free(address); |
| #endif |
| } |
| |
| /* free allocated storage */ |
| cleanup: |
| if (alias_list) |
| hostlist_destroy(alias_list); |
| if (hostname_list) |
| hostlist_destroy(hostname_list); |
| if (address_list) |
| hostlist_destroy(address_list); |
| return error_code; |
| |
| } |
| |
| static int _handle_downnodes_line(slurm_conf_downnodes_t *down) |
| { |
| int error_code = 0; |
| struct node_record *node_rec = NULL; |
| hostlist_t alias_list = NULL; |
| char *alias = NULL; |
| int state_val = NODE_STATE_DOWN; |
| |
| if (down->state != NULL) { |
| state_val = _state_str2int(down->state); |
| if (state_val == NO_VAL) { |
| error("Invalid State \"%s\"", down->state); |
| goto cleanup; |
| } |
| } |
| |
| if ((alias_list = hostlist_create(down->nodenames)) == NULL) { |
| error("Unable to create NodeName list from %s", |
| down->nodenames); |
| error_code = errno; |
| goto cleanup; |
| } |
| |
| while ((alias = hostlist_shift(alias_list))) { |
| node_rec = find_node_record(alias); |
| if (node_rec == NULL) { |
| error("DownNode \"%s\" does not exist!", alias); |
| free(alias); |
| continue; |
| } |
| |
| if ((state_val != NO_VAL) && |
| (state_val != NODE_STATE_UNKNOWN)) |
| node_rec->node_state = state_val; |
| if (down->reason) { |
| xfree(node_rec->reason); |
| node_rec->reason = xstrdup(down->reason); |
| } |
| free(alias); |
| } |
| |
| cleanup: |
| if (alias_list) |
| hostlist_destroy(alias_list); |
| return error_code; |
| } |
| |
| static void _handle_all_downnodes() |
| { |
| slurm_conf_downnodes_t *ptr, **ptr_array; |
| int count; |
| int i; |
| |
| count = slurm_conf_downnodes_array(&ptr_array); |
| if (count == 0) { |
| debug("No DownNodes"); |
| return; |
| } |
| |
| for (i = 0; i < count; i++) { |
| ptr = ptr_array[i]; |
| |
| _handle_downnodes_line(ptr); |
| } |
| } |
| |
| /* |
| * _build_all_nodeline_info - get a array of slurm_conf_node_t structures |
| * from the slurm.conf reader, build table, and set values |
| * RET 0 if no error, error code otherwise |
| * Note: Operates on common variables |
| * default_node_record - default node configuration values |
| */ |
| static int _build_all_nodeline_info(slurm_ctl_conf_t *conf) |
| { |
| slurm_conf_node_t *node, **ptr_array; |
| struct config_record *config_ptr = NULL; |
| int count; |
| int i; |
| |
| count = slurm_conf_nodename_array(&ptr_array); |
| if (count == 0) |
| fatal("No NodeName information available!"); |
| |
| for (i = 0; i < count; i++) { |
| node = ptr_array[i]; |
| |
| config_ptr = create_config_record(); |
| config_ptr->nodes = xstrdup(node->nodenames); |
| config_ptr->cpus = node->cpus; |
| config_ptr->sockets = node->sockets; |
| config_ptr->cores = node->cores; |
| config_ptr->threads = node->threads; |
| config_ptr->real_memory = node->real_memory; |
| config_ptr->tmp_disk = node->tmp_disk; |
| config_ptr->weight = node->weight; |
| if (node->feature) |
| config_ptr->feature = xstrdup(node->feature); |
| |
| _build_single_nodeline_info(node, config_ptr, conf); |
| } |
| return SLURM_SUCCESS; |
| } |
| |
| /* |
| * _build_single_partitionline_info - get a array of slurm_conf_partition_t |
| * structures from the slurm.conf reader, build table, and set values |
| * RET 0 if no error, error code otherwise |
| * Note: Operates on common variables |
| * global: part_list - global partition list pointer |
| * default_part - default parameters for a partition |
| */ |
| static int _build_single_partitionline_info(slurm_conf_partition_t *part) |
| { |
| struct part_record *part_ptr; |
| |
| if (strlen(part->name) >= MAX_SLURM_NAME) { |
| error("_parse_part_spec: partition name %s too long", |
| part->name); |
| return EINVAL; |
| } |
| |
| part_ptr = list_find_first(part_list, &list_find_part, part->name); |
| if (part_ptr == NULL) { |
| part_ptr = create_part_record(); |
| strcpy(part_ptr->name, part->name); |
| } else { |
| verbose("_parse_part_spec: duplicate entry for partition %s", |
| part->name); |
| } |
| |
| if (part->default_flag) { |
| if ((strlen(default_part_name) > 0) |
| && strcmp(default_part_name, part->name)) |
| info("_parse_part_spec: changing default partition " |
| "from %s to %s", |
| default_part_name, part->name); |
| strcpy(default_part_name, part->name); |
| default_part_loc = part_ptr; |
| } |
| if(part->disable_root_jobs == (uint16_t)NO_VAL) |
| part_ptr->disable_root_jobs = slurmctld_conf.disable_root_jobs; |
| else |
| part_ptr->disable_root_jobs = part->disable_root_jobs; |
| |
| if(part_ptr->disable_root_jobs) |
| debug2("partition %s does not allow root jobs", part_ptr->name); |
| |
| part_ptr->hidden = part->hidden_flag ? 1 : 0; |
| part_ptr->max_time = part->max_time; |
| part_ptr->max_nodes = part->max_nodes; |
| part_ptr->min_nodes = part->min_nodes; |
| part_ptr->root_only = part->root_only_flag ? 1 : 0; |
| part_ptr->state_up = part->state_up_flag ? 1 : 0; |
| part_ptr->shared = part->shared; |
| if (part->allow_groups) { |
| xfree(part_ptr->allow_groups); |
| part_ptr->allow_groups = xstrdup(part->allow_groups); |
| } |
| if (part->nodes) { |
| if (part_ptr->nodes) { |
| int cnt_tot, cnt_uniq, buf_size; |
| hostlist_t hl = hostlist_create(part_ptr->nodes); |
| |
| hostlist_push(hl, part->nodes); |
| cnt_tot = hostlist_count(hl); |
| hostlist_uniq(hl); |
| cnt_uniq = hostlist_count(hl); |
| if (cnt_tot != cnt_uniq) { |
| fatal("Duplicate Nodes for Partition %s", |
| part->name); |
| } |
| buf_size = strlen(part_ptr->nodes) + 1 + |
| strlen(part->nodes) + 1; |
| xfree(part_ptr->nodes); |
| part_ptr->nodes = xmalloc(buf_size); |
| hostlist_ranged_string(hl, buf_size, part_ptr->nodes); |
| hostlist_destroy(hl); |
| } else { |
| part_ptr->nodes = xstrdup(part->nodes); |
| } |
| } |
| |
| return 0; |
| } |
| |
| /* |
| * _build_all_partitionline_info - get a array of slurm_conf_partition_t |
| * structures from the slurm.conf reader, build table, and set values |
| * RET 0 if no error, error code otherwise |
| * Note: Operates on common variables |
| * global: part_list - global partition list pointer |
| * default_part - default parameters for a partition |
| */ |
| static int _build_all_partitionline_info() |
| { |
| slurm_conf_partition_t *part, **ptr_array; |
| int count; |
| int i; |
| |
| count = slurm_conf_partition_array(&ptr_array); |
| if (count == 0) |
| fatal("No PartitionName information available!"); |
| |
| for (i = 0; i < count; i++) { |
| part = ptr_array[i]; |
| |
| _build_single_partitionline_info(part); |
| } |
| return SLURM_SUCCESS; |
| } |
| |
| /* |
| * read_slurm_conf - load the slurm configuration from the configured file. |
| * read_slurm_conf can be called more than once if so desired. |
| * IN recover - replace job, node and/or partition data with last saved |
| * state information depending upon value |
| * 0 = use no saved state information |
| * 1 = recover saved job and trigger state, |
| * node DOWN/DRAIN state and reason information |
| * 2 = recover all state saved from last slurmctld shutdown |
| * RET 0 if no error, otherwise an error code |
| * Note: Operates on common variables only |
| */ |
| int read_slurm_conf(int recover) |
| { |
| DEF_TIMERS; |
| int error_code, i; |
| int old_node_record_count; |
| struct node_record *old_node_table_ptr; |
| char *old_auth_type = xstrdup(slurmctld_conf.authtype); |
| char *old_checkpoint_type = xstrdup(slurmctld_conf.checkpoint_type); |
| char *old_sched_type = xstrdup(slurmctld_conf.schedtype); |
| char *old_select_type = xstrdup(slurmctld_conf.select_type); |
| char *old_switch_type = xstrdup(slurmctld_conf.switch_type); |
| char *state_save_dir = xstrdup(slurmctld_conf.state_save_location); |
| slurm_ctl_conf_t *conf; |
| select_type_plugin_info_t old_select_type_p = |
| (select_type_plugin_info_t) slurmctld_conf.select_type_param; |
| |
| /* initialization */ |
| START_TIMER; |
| |
| if (recover == 0) { |
| /* in order to re-use job state information, |
| * update nodes_completing string (based on node_bitmap) */ |
| update_job_nodes_completing(); |
| } |
| |
| /* save node states for reconfig RPC */ |
| old_node_record_count = node_record_count; |
| old_node_table_ptr = node_record_table_ptr; |
| for (i=0; i<node_record_count; i++) { |
| xfree(old_node_table_ptr[i].features); |
| old_node_table_ptr[i].features = xstrdup( |
| old_node_table_ptr[i].config_ptr->feature); |
| } |
| node_record_table_ptr = NULL; |
| node_record_count = 0; |
| |
| if ((error_code = _init_all_slurm_conf())) { |
| node_record_table_ptr = old_node_table_ptr; |
| return error_code; |
| } |
| conf = slurm_conf_lock(); |
| _build_all_nodeline_info(conf); |
| slurm_conf_unlock(); |
| _handle_all_downnodes(); |
| _build_all_partitionline_info(); |
| |
| update_logging(); |
| jobacct_g_init_slurmctld(slurmctld_conf.job_acct_logfile); |
| g_slurm_jobcomp_init(slurmctld_conf.job_comp_loc); |
| slurm_sched_init(); |
| if (switch_init() < 0) |
| error("Failed to initialize switch plugin"); |
| |
| if (default_part_loc == NULL) |
| error("read_slurm_conf: default partition not set."); |
| |
| if (node_record_count < 1) { |
| error("read_slurm_conf: no nodes configured."); |
| _purge_old_node_state(old_node_table_ptr, old_node_record_count); |
| return EINVAL; |
| } |
| |
| rehash_node(); |
| rehash_jobs(); |
| set_slurmd_addr(); |
| |
| if (recover > 1) { /* Load node, part and job info */ |
| (void) load_all_node_state(false); |
| (void) load_all_part_state(); |
| (void) load_all_job_state(); |
| } else if (recover == 1) { /* Load job info only */ |
| (void) load_all_node_state(true); |
| (void) load_all_job_state(); |
| } else { /* Load no info, preserve all state */ |
| if (old_node_table_ptr) { |
| debug("restoring original state of nodes"); |
| _restore_node_state(old_node_table_ptr, |
| old_node_record_count); |
| } |
| reset_first_job_id(); |
| (void) slurm_sched_reconfig(); |
| xfree(state_save_dir); |
| } |
| |
| if ((select_g_node_init(node_record_table_ptr, node_record_count) |
| != SLURM_SUCCESS) |
| || (select_g_block_init(part_list) != SLURM_SUCCESS) |
| || (select_g_state_restore(state_save_dir) != SLURM_SUCCESS) |
| || (select_g_job_init(job_list) != SLURM_SUCCESS)) { |
| fatal("failed to initialize node selection plugin state, " |
| "Clean start required."); |
| } |
| xfree(state_save_dir); |
| reset_job_bitmaps(); /* must follow select_g_job_init() */ |
| |
| (void) _sync_nodes_to_jobs(); |
| (void) sync_job_files(); |
| _purge_old_node_state(old_node_table_ptr, old_node_record_count); |
| |
| if ((error_code = _build_bitmaps())) |
| return error_code; |
| restore_node_features(); |
| #ifdef HAVE_ELAN |
| _validate_node_proc_count(); |
| #endif |
| (void) _sync_nodes_to_comp_job();/* must follow select_g_node_init() */ |
| load_part_uid_allow_list(1); |
| |
| if (recover >= 1) |
| (void) trigger_state_restore(); |
| |
| /* sort config_list by weight for scheduling */ |
| list_sort(config_list, &list_compare_config); |
| |
| /* Update plugins as possible */ |
| error_code = _preserve_plugins(&slurmctld_conf, |
| old_auth_type, old_checkpoint_type, |
| old_sched_type, old_select_type, |
| old_switch_type); |
| |
| /* Update plugin parameters as possible */ |
| error_code = _preserve_select_type_param( |
| &slurmctld_conf, |
| old_select_type_p); |
| |
| slurmctld_conf.last_update = time(NULL); |
| END_TIMER2("read_slurm_conf"); |
| return error_code; |
| } |
| |
| |
| /* Restore node state and size information from saved records. |
| * If a node was re-configured to be down or drained, we set those states */ |
| static void _restore_node_state(struct node_record *old_node_table_ptr, |
| int old_node_record_count) |
| { |
| struct node_record *node_ptr; |
| int i; |
| |
| for (i = 0; i < old_node_record_count; i++) { |
| uint16_t drain_flag = false, down_flag = false; |
| node_ptr = find_node_record(old_node_table_ptr[i].name); |
| if (node_ptr == NULL) |
| continue; |
| |
| if ((node_ptr->node_state & NODE_STATE_BASE) == NODE_STATE_DOWN) |
| down_flag = true; |
| if (node_ptr->node_state & NODE_STATE_DRAIN) |
| drain_flag = true; |
| node_ptr->node_state = old_node_table_ptr[i].node_state; |
| if (down_flag) { |
| node_ptr->node_state &= NODE_STATE_FLAGS; |
| node_ptr->node_state |= NODE_STATE_DOWN; |
| } |
| if (drain_flag) |
| node_ptr->node_state |= NODE_STATE_DRAIN; |
| |
| node_ptr->last_response = old_node_table_ptr[i].last_response; |
| node_ptr->cpus = old_node_table_ptr[i].cpus; |
| node_ptr->sockets = old_node_table_ptr[i].sockets; |
| node_ptr->cores = old_node_table_ptr[i].cores; |
| node_ptr->threads = old_node_table_ptr[i].threads; |
| node_ptr->real_memory = old_node_table_ptr[i].real_memory; |
| node_ptr->tmp_disk = old_node_table_ptr[i].tmp_disk; |
| if (node_ptr->reason == NULL) { |
| /* Recover only if not explicitly set in slurm.conf */ |
| node_ptr->reason = old_node_table_ptr[i].reason; |
| old_node_table_ptr[i].reason = NULL; |
| } |
| if (old_node_table_ptr[i].features) { |
| xfree(node_ptr->features); |
| node_ptr->features = old_node_table_ptr[i].features; |
| old_node_table_ptr[i].features = NULL; |
| } |
| } |
| } |
| |
| /* Purge old node state information */ |
| static void _purge_old_node_state(struct node_record *old_node_table_ptr, |
| int old_node_record_count) |
| { |
| int i; |
| |
| for (i = 0; i < old_node_record_count; i++) { |
| xfree(old_node_table_ptr[i].part_pptr); |
| xfree(old_node_table_ptr[i].features); |
| xfree(old_node_table_ptr[i].reason); |
| } |
| xfree(old_node_table_ptr); |
| } |
| |
| |
| /* |
| * _preserve_select_type_param - preserve original plugin parameters. |
| * Daemons and/or commands must be restarted for some |
| * select plugin value changes to take effect. |
| * RET zero or error code |
| */ |
| static int _preserve_select_type_param(slurm_ctl_conf_t *ctl_conf_ptr, |
| select_type_plugin_info_t old_select_type_p) |
| { |
| int rc = SLURM_SUCCESS; |
| |
| /* SelectTypeParameters cannot change */ |
| if (old_select_type_p) { |
| if (old_select_type_p != ctl_conf_ptr->select_type_param) { |
| ctl_conf_ptr->select_type_param = (uint16_t) |
| old_select_type_p; |
| rc = ESLURM_INVALID_SELECTTYPE_CHANGE; |
| } |
| } |
| return rc; |
| } |
| |
| /* |
| * _preserve_plugins - preserve original plugin values over reconfiguration |
| * as required. daemons and/or commands must be restarted for some |
| * plugin value changes to take effect. |
| * RET zero or error code |
| */ |
| static int _preserve_plugins(slurm_ctl_conf_t * ctl_conf_ptr, |
| char *old_auth_type, char *old_checkpoint_type, |
| char *old_sched_type, char *old_select_type, |
| char *old_switch_type) |
| { |
| int rc = SLURM_SUCCESS; |
| |
| if (old_auth_type) { |
| if (strcmp(old_auth_type, ctl_conf_ptr->authtype)) { |
| xfree(ctl_conf_ptr->authtype); |
| ctl_conf_ptr->authtype = old_auth_type; |
| rc = ESLURM_INVALID_AUTHTYPE_CHANGE; |
| } else /* free duplicate value */ |
| xfree(old_auth_type); |
| } |
| |
| if (old_checkpoint_type) { |
| if (strcmp(old_checkpoint_type, |
| ctl_conf_ptr->checkpoint_type)) { |
| xfree(ctl_conf_ptr->checkpoint_type); |
| ctl_conf_ptr->checkpoint_type = old_checkpoint_type; |
| rc = ESLURM_INVALID_CHECKPOINT_TYPE_CHANGE; |
| } else /* free duplicate value */ |
| xfree(old_checkpoint_type); |
| } |
| |
| if (old_sched_type) { |
| if (strcmp(old_sched_type, ctl_conf_ptr->schedtype)) { |
| xfree(ctl_conf_ptr->schedtype); |
| ctl_conf_ptr->schedtype = old_sched_type; |
| rc = ESLURM_INVALID_SCHEDTYPE_CHANGE; |
| } else /* free duplicate value */ |
| xfree(old_sched_type); |
| } |
| |
| |
| if (old_select_type) { |
| if (strcmp(old_select_type, ctl_conf_ptr->select_type)) { |
| xfree(ctl_conf_ptr->select_type); |
| ctl_conf_ptr->select_type = old_select_type; |
| rc = ESLURM_INVALID_SELECTTYPE_CHANGE; |
| } else /* free duplicate value */ |
| xfree(old_select_type); |
| } |
| |
| if (old_switch_type) { |
| if (strcmp(old_switch_type, ctl_conf_ptr->switch_type)) { |
| xfree(ctl_conf_ptr->switch_type); |
| ctl_conf_ptr->switch_type = old_switch_type; |
| rc = ESLURM_INVALID_SWITCHTYPE_CHANGE; |
| } else /* free duplicate value */ |
| xfree(old_switch_type); |
| } |
| |
| if (ctl_conf_ptr->backup_controller == NULL) |
| info("read_slurm_conf: backup_controller not specified."); |
| |
| return rc; |
| } |
| |
| |
| /* |
| * _sync_nodes_to_jobs - sync node state to job states on slurmctld restart. |
| * This routine marks nodes allocated to a job as busy no matter what |
| * the node's last saved state |
| * RET count of nodes having state changed |
| * Note: Operates on common variables, no arguments |
| */ |
| static int _sync_nodes_to_jobs(void) |
| { |
| struct job_record *job_ptr; |
| ListIterator job_iterator; |
| int update_cnt = 0; |
| |
| job_iterator = list_iterator_create(job_list); |
| while ((job_ptr = (struct job_record *) list_next(job_iterator))) { |
| if (job_ptr->node_bitmap == NULL) |
| continue; |
| |
| if ((job_ptr->job_state == JOB_RUNNING) || |
| (job_ptr->job_state & JOB_COMPLETING)) |
| update_cnt += _sync_nodes_to_active_job(job_ptr); |
| } |
| list_iterator_destroy(job_iterator); |
| |
| if (update_cnt) |
| info("_sync_nodes_to_jobs updated state of %d nodes", |
| update_cnt); |
| return update_cnt; |
| } |
| |
| /* For jobs which are in state COMPLETING, deallocate the nodes and |
| * issue the RPC to kill the job */ |
| static int _sync_nodes_to_comp_job(void) |
| { |
| struct job_record *job_ptr; |
| ListIterator job_iterator; |
| int update_cnt = 0; |
| |
| job_iterator = list_iterator_create(job_list); |
| while ((job_ptr = (struct job_record *) list_next(job_iterator))) { |
| if ((job_ptr->node_bitmap) && |
| (job_ptr->job_state & JOB_COMPLETING)) { |
| update_cnt++; |
| info("Killing job_id %u", job_ptr->job_id); |
| deallocate_nodes(job_ptr, false, false); |
| } |
| } |
| list_iterator_destroy(job_iterator); |
| if (update_cnt) |
| info("_sync_nodes_to_comp_job completing %d jobs", |
| update_cnt); |
| return update_cnt; |
| } |
| |
| /* Synchronize states of nodes and active jobs (RUNNING or COMPLETING state) |
| * RET count of jobs with state changes */ |
| static int _sync_nodes_to_active_job(struct job_record *job_ptr) |
| { |
| int i, cnt = 0; |
| uint16_t base_state, node_flags; |
| struct node_record *node_ptr = node_record_table_ptr; |
| |
| job_ptr->node_cnt = 0; |
| for (i = 0; i < node_record_count; i++, node_ptr++) { |
| if (bit_test(job_ptr->node_bitmap, i) == 0) |
| continue; |
| job_ptr->node_cnt++; |
| |
| base_state = node_ptr->node_state & NODE_STATE_BASE; |
| node_flags = node_ptr->node_state & NODE_STATE_FLAGS; |
| |
| node_ptr->run_job_cnt++; /* NOTE: |
| * This counter moved to comp_job_cnt |
| * by _sync_nodes_to_comp_job() */ |
| if (((job_ptr->job_state == JOB_RUNNING) || |
| (job_ptr->job_state & JOB_COMPLETING)) && |
| (job_ptr->details) && (job_ptr->details->shared == 0)) |
| node_ptr->no_share_job_cnt++; |
| |
| if (base_state == NODE_STATE_DOWN) { |
| time_t now = time(NULL); |
| job_ptr->job_state = JOB_NODE_FAIL | JOB_COMPLETING; |
| job_ptr->end_time = MIN(job_ptr->end_time, now); |
| job_ptr->exit_code = MAX(job_ptr->exit_code, 1); |
| job_ptr->state_reason = FAIL_DOWN_NODE; |
| job_completion_logger(job_ptr); |
| cnt++; |
| } else if ((base_state == NODE_STATE_UNKNOWN) || |
| (base_state == NODE_STATE_IDLE)) { |
| cnt++; |
| node_ptr->node_state = |
| NODE_STATE_ALLOCATED | node_flags; |
| } |
| } |
| return cnt; |
| } |
| |
| #ifdef HAVE_ELAN |
| /* Every node in a given partition must have the same processor count |
| * at present, this function insure it */ |
| static void _validate_node_proc_count(void) |
| { |
| ListIterator part_iterator; |
| struct part_record *part_ptr; |
| struct node_record *node_ptr; |
| int first_bit, last_bit, i, node_size, part_size; |
| |
| part_iterator = list_iterator_create(part_list); |
| while ((part_ptr = (struct part_record *) list_next(part_iterator))) { |
| first_bit = bit_ffs(part_ptr->node_bitmap); |
| last_bit = bit_fls(part_ptr->node_bitmap); |
| part_size = -1; |
| for (i = first_bit; i <= last_bit; i++) { |
| if (bit_test(part_ptr->node_bitmap, i) == 0) |
| continue; |
| node_ptr = node_record_table_ptr + i; |
| |
| if (slurmctld_conf.fast_schedule) |
| node_size = node_ptr->config_ptr->cpus; |
| else if (node_ptr->cpus < node_ptr->config_ptr->cpus) |
| continue; /* node too small, will be DOWN */ |
| else if ((node_ptr->node_state & NODE_STATE_BASE) |
| == NODE_STATE_DOWN) |
| continue; |
| else |
| node_size = node_ptr->cpus; |
| |
| if (part_size == -1) |
| part_size = node_size; |
| else if (part_size != node_size) |
| fatal("Partition %s has inconsistent " |
| "processor count", part_ptr->name); |
| } |
| } |
| list_iterator_destroy(part_iterator); |
| } |
| #endif |
| |