| /*****************************************************************************\ |
| * front_end.c - Define front end node functions. |
| ***************************************************************************** |
| * Copyright (C) 2010 Lawrence Livermore National Security. |
| * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). |
| * Written by Morris Jette <jette1@llnl.gov> |
| * CODE-OCEC-09-009. All rights reserved. |
| * |
| * This file is part of SLURM, a resource management program. |
| * For details, see <http://slurm.schedmd.com/>. |
| * Please also read the included file: DISCLAIMER. |
| * |
| * SLURM is free software; you can redistribute it and/or modify it under |
| * the terms of the GNU General Public License as published by the Free |
| * Software Foundation; either version 2 of the License, or (at your option) |
| * any later version. |
| * |
| * In addition, as a special exception, the copyright holders give permission |
| * to link the code of portions of this program with the OpenSSL library under |
| * certain conditions as described in each individual source file, and |
| * distribute linked combinations including the two. You must obey the GNU |
| * General Public License in all respects for all of the code used other than |
| * OpenSSL. If you modify file(s) with this exception, you may extend this |
| * exception to your version of the file(s), but you are not obligated to do |
| * so. If you do not wish to do so, delete this exception statement from your |
| * version. If you delete this exception statement from all source files in |
| * the program, then also delete it here. |
| * |
| * SLURM is distributed in the hope that it will be useful, but WITHOUT ANY |
| * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS |
| * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more |
| * details. |
| * |
| * You should have received a copy of the GNU General Public License along |
| * with SLURM; if not, write to the Free Software Foundation, Inc., |
| * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. |
| \*****************************************************************************/ |
| |
| #include <fcntl.h> |
| #include <stdlib.h> |
| #include <sys/stat.h> |
| #include <sys/types.h> |
| #include <unistd.h> |
| |
| #include "slurm/slurm.h" |
| #include "src/common/list.h" |
| #include "src/common/log.h" |
| #include "src/common/node_conf.h" |
| #include "src/common/read_config.h" |
| #include "src/common/slurm_protocol_defs.h" |
| #include "src/common/uid.h" |
| #include "src/common/xstring.h" |
| #include "src/slurmctld/front_end.h" |
| #include "src/slurmctld/locks.h" |
| #include "src/slurmctld/slurmctld.h" |
| #include "src/slurmctld/state_save.h" |
| #include "src/slurmctld/trigger_mgr.h" |
| |
| /* Change FRONT_END_STATE_VERSION value when changing the state save format */ |
| #define FRONT_END_STATE_VERSION "PROTOCOL_VERSION" |
| #define FRONT_END_2_6_STATE_VERSION "VER001" /* SLURM version 2.6 */ |
| #define FRONT_END_2_5_STATE_VERSION "VER001" /* SLURM version 2.5 */ |
| |
| front_end_record_t *front_end_nodes = NULL; |
| uint16_t front_end_node_cnt = 0; |
| time_t last_front_end_update = (time_t) 0; |
| |
| #ifdef HAVE_FRONT_END |
| /* |
| * _dump_front_end_state - dump state of a specific front_end node to a buffer |
| * IN front_end_ptr - pointer to node for which information is requested |
| * IN/OUT buffer - location to store data, pointers automatically advanced |
| */ |
| static void _dump_front_end_state(front_end_record_t *front_end_ptr, |
| Buf buffer) |
| { |
| packstr (front_end_ptr->name, buffer); |
| pack16 (front_end_ptr->node_state, buffer); |
| packstr (front_end_ptr->reason, buffer); |
| pack_time(front_end_ptr->reason_time, buffer); |
| pack32 (front_end_ptr->reason_uid, buffer); |
| pack16 (front_end_ptr->protocol_version, buffer); |
| } |
| |
| |
| /* |
| * Open the front_end node state save file, or backup if necessary. |
| * state_file IN - the name of the state save file used |
| * RET the file description to read from or error code |
| */ |
| static int _open_front_end_state_file(char **state_file) |
| { |
| int state_fd; |
| struct stat stat_buf; |
| |
| *state_file = xstrdup(slurmctld_conf.state_save_location); |
| xstrcat(*state_file, "/front_end_state"); |
| state_fd = open(*state_file, O_RDONLY); |
| if (state_fd < 0) { |
| error("Could not open front_end state file %s: %m", |
| *state_file); |
| } else if (fstat(state_fd, &stat_buf) < 0) { |
| error("Could not stat front_end state file %s: %m", |
| *state_file); |
| (void) close(state_fd); |
| } else if (stat_buf.st_size < 10) { |
| error("Front_end state file %s too small", *state_file); |
| (void) close(state_fd); |
| } else /* Success */ |
| return state_fd; |
| |
| error("NOTE: Trying backup front_end_state save file. Information may " |
| "be lost!"); |
| xstrcat(*state_file, ".old"); |
| state_fd = open(*state_file, O_RDONLY); |
| return state_fd; |
| } |
| |
| /* |
| * _pack_front_end - dump all configuration information about a specific |
| * front_end node in machine independent form (for network transmission) |
| * IN dump_front_end_ptr - pointer to front_end node for which information is |
| * requested |
| * IN/OUT buffer - buffer where data is placed, pointers automatically updated |
| * IN protocol_version - slurm protocol version of client |
| * NOTE: if you make any changes here be sure to make the corresponding |
| * changes to load_front_end_config in api/node_info.c |
| */ |
| static void _pack_front_end(struct front_end_record *dump_front_end_ptr, |
| Buf buffer, uint16_t protocol_version) |
| { |
| if (protocol_version >= SLURM_14_03_PROTOCOL_VERSION) { |
| packstr(dump_front_end_ptr->allow_groups, buffer); |
| packstr(dump_front_end_ptr->allow_users, buffer); |
| pack_time(dump_front_end_ptr->boot_time, buffer); |
| packstr(dump_front_end_ptr->deny_groups, buffer); |
| packstr(dump_front_end_ptr->deny_users, buffer); |
| packstr(dump_front_end_ptr->name, buffer); |
| pack16(dump_front_end_ptr->node_state, buffer); |
| packstr(dump_front_end_ptr->version, buffer); |
| |
| packstr(dump_front_end_ptr->reason, buffer); |
| pack_time(dump_front_end_ptr->reason_time, buffer); |
| pack32(dump_front_end_ptr->reason_uid, buffer); |
| |
| pack_time(dump_front_end_ptr->slurmd_start_time, buffer); |
| } else if (protocol_version >= SLURM_2_6_PROTOCOL_VERSION) { |
| packstr(dump_front_end_ptr->allow_groups, buffer); |
| packstr(dump_front_end_ptr->allow_users, buffer); |
| pack_time(dump_front_end_ptr->boot_time, buffer); |
| packstr(dump_front_end_ptr->deny_groups, buffer); |
| packstr(dump_front_end_ptr->deny_users, buffer); |
| packstr(dump_front_end_ptr->name, buffer); |
| pack16(dump_front_end_ptr->node_state, buffer); |
| |
| packstr(dump_front_end_ptr->reason, buffer); |
| pack_time(dump_front_end_ptr->reason_time, buffer); |
| pack32(dump_front_end_ptr->reason_uid, buffer); |
| |
| pack_time(dump_front_end_ptr->slurmd_start_time, buffer); |
| } else if (protocol_version >= SLURM_2_5_PROTOCOL_VERSION) { |
| pack_time(dump_front_end_ptr->boot_time, buffer); |
| packstr(dump_front_end_ptr->name, buffer); |
| pack16(dump_front_end_ptr->node_state, buffer); |
| |
| packstr(dump_front_end_ptr->reason, buffer); |
| pack_time(dump_front_end_ptr->reason_time, buffer); |
| pack32(dump_front_end_ptr->reason_uid, buffer); |
| |
| pack_time(dump_front_end_ptr->slurmd_start_time, buffer); |
| } else { |
| error("_pack_front_end: Unsupported slurm version %u", |
| protocol_version); |
| } |
| } |
| #endif |
| |
| #ifdef HAVE_FRONT_END |
| /* Validate job's access to a specific front-end node */ |
| static bool _front_end_access(front_end_record_t *front_end_ptr, |
| struct job_record *job_ptr) |
| { |
| int i; |
| |
| if (!job_ptr) |
| return true; |
| |
| if (front_end_ptr->deny_gids) { |
| for (i = 0; front_end_ptr->deny_gids[i]; i++) { |
| if (job_ptr->group_id == front_end_ptr->deny_gids[i]) |
| return false; |
| } |
| } |
| if (front_end_ptr->deny_uids) { |
| for (i = 0; front_end_ptr->deny_uids[i]; i++) { |
| if (job_ptr->user_id == front_end_ptr->deny_uids[i]) |
| return false; |
| } |
| } |
| if (front_end_ptr->allow_gids || front_end_ptr->allow_uids) { |
| if (front_end_ptr->allow_gids) { |
| for (i = 0; front_end_ptr->allow_gids[i]; i++) { |
| if (job_ptr->group_id == |
| front_end_ptr->allow_gids[i]) |
| return true; |
| } |
| } |
| if (front_end_ptr->allow_uids) { |
| for (i = 0; front_end_ptr->allow_uids[i]; i++) { |
| if (job_ptr->user_id == |
| front_end_ptr->allow_uids[i]) |
| return true; |
| } |
| } |
| return false; |
| } |
| return true; |
| } |
| #endif |
| |
| /* |
| * assign_front_end - assign a front end node for starting a job |
| * job_ptr IN - job to assign a front end node (tests access control lists) |
| * RET pointer to the front end node to use or NULL if none found |
| */ |
| extern front_end_record_t *assign_front_end(struct job_record *job_ptr) |
| { |
| #ifdef HAVE_FRONT_END |
| static int last_assigned = -1; |
| front_end_record_t *front_end_ptr; |
| uint16_t state_flags; |
| int i; |
| |
| for (i = 0; i < front_end_node_cnt; i++) { |
| last_assigned = (last_assigned + 1) % front_end_node_cnt; |
| front_end_ptr = front_end_nodes + last_assigned; |
| if (job_ptr->batch_host) { /* Find specific front-end node */ |
| if (strcmp(job_ptr->batch_host, front_end_ptr->name)) |
| continue; |
| if (!_front_end_access(front_end_ptr, job_ptr)) |
| break; |
| } else { /* Find some usable front-end node */ |
| if (IS_NODE_DOWN(front_end_ptr) || |
| IS_NODE_DRAIN(front_end_ptr) || |
| IS_NODE_NO_RESPOND(front_end_ptr)) |
| continue; |
| if (!_front_end_access(front_end_ptr, job_ptr)) |
| continue; |
| } |
| state_flags = front_end_nodes[last_assigned].node_state & |
| NODE_STATE_FLAGS; |
| front_end_nodes[last_assigned].node_state = |
| NODE_STATE_ALLOCATED | state_flags; |
| front_end_nodes[last_assigned].job_cnt_run++; |
| return front_end_ptr; |
| } |
| if (job_ptr->batch_host) { /* Find specific front-end node */ |
| error("assign_front_end: front end node %s not found", |
| job_ptr->batch_host); |
| } else { /* Find some usable front-end node */ |
| error("assign_front_end: no available front end nodes found"); |
| } |
| #endif |
| return NULL; |
| } |
| |
| /* |
| * avail_front_end - test if any front end nodes are available for starting job |
| * job_ptr IN - job to consider for starting (tests access control lists) or |
| * NULL to test if any job can start (no test of ACL) |
| */ |
| extern bool avail_front_end(struct job_record *job_ptr) |
| { |
| #ifdef HAVE_FRONT_END |
| front_end_record_t *front_end_ptr; |
| int i; |
| |
| for (i = 0, front_end_ptr = front_end_nodes; |
| i < front_end_node_cnt; i++, front_end_ptr++) { |
| if (IS_NODE_DOWN(front_end_ptr) || |
| IS_NODE_DRAIN(front_end_ptr) || |
| IS_NODE_NO_RESPOND(front_end_ptr)) |
| continue; |
| if (!_front_end_access(front_end_ptr, job_ptr)) |
| continue; |
| return true; |
| } |
| return false; |
| #else |
| return true; |
| #endif |
| } |
| |
| /* |
| * Update front end node state |
| * update_front_end_msg_ptr IN change specification |
| * RET SLURM_SUCCESS or error code |
| */ |
| extern int update_front_end(update_front_end_msg_t *msg_ptr) |
| { |
| #ifdef HAVE_FRONT_END |
| char *this_node_name = NULL; |
| hostlist_t host_list; |
| front_end_record_t *front_end_ptr; |
| int i, rc = SLURM_SUCCESS; |
| time_t now = time(NULL); |
| |
| if ((host_list = hostlist_create(msg_ptr->name)) == NULL) { |
| error("hostlist_create error on %s: %m", msg_ptr->name); |
| return ESLURM_INVALID_NODE_NAME; |
| } |
| |
| last_front_end_update = now; |
| while ((this_node_name = hostlist_shift(host_list))) { |
| for (i = 0, front_end_ptr = front_end_nodes; |
| i < front_end_node_cnt; i++, front_end_ptr++) { |
| xassert(front_end_ptr->magic == FRONT_END_MAGIC); |
| if (strcmp(this_node_name, front_end_ptr->name)) |
| continue; |
| if (msg_ptr->node_state == (uint16_t) NO_VAL) { |
| ; /* No change in node state */ |
| } else if (msg_ptr->node_state == NODE_RESUME) { |
| front_end_ptr->node_state = NODE_STATE_IDLE; |
| xfree(front_end_ptr->reason); |
| front_end_ptr->reason_time = 0; |
| front_end_ptr->reason_uid = 0; |
| } else if (msg_ptr->node_state == NODE_STATE_DRAIN) { |
| front_end_ptr->node_state |= NODE_STATE_DRAIN; |
| if (msg_ptr->reason) { |
| xfree(front_end_ptr->reason); |
| front_end_ptr->reason = |
| xstrdup(msg_ptr->reason); |
| front_end_ptr->reason_time = now; |
| front_end_ptr->reason_uid = |
| msg_ptr->reason_uid; |
| } |
| } else if (msg_ptr->node_state == NODE_STATE_DOWN) { |
| set_front_end_down(front_end_ptr, |
| msg_ptr->reason); |
| } |
| if (msg_ptr->node_state != (uint16_t) NO_VAL) { |
| info("update_front_end: set state of %s to %s", |
| this_node_name, |
| node_state_string(front_end_ptr-> |
| node_state)); |
| } |
| break; |
| } |
| if (i >= front_end_node_cnt) { |
| info("update_front_end: could not find front end: %s", |
| this_node_name); |
| rc = ESLURM_INVALID_NODE_NAME; |
| } |
| free(this_node_name); |
| } |
| hostlist_destroy(host_list); |
| |
| return rc; |
| #else |
| return ESLURM_INVALID_NODE_NAME; |
| #endif |
| } |
| |
| /* |
| * find_front_end_record - find a record for front_endnode with specified name |
| * input: name - name of the desired front_end node |
| * output: return pointer to front_end node record or NULL if not found |
| */ |
| extern front_end_record_t *find_front_end_record(char *name) |
| { |
| #ifdef HAVE_FRONT_END |
| front_end_record_t *front_end_ptr; |
| int i; |
| |
| for (i = 0, front_end_ptr = front_end_nodes; |
| i < front_end_node_cnt; i++, front_end_ptr++) { |
| xassert(front_end_ptr->magic == FRONT_END_MAGIC); |
| if (strcmp(front_end_ptr->name, name) == 0) |
| return front_end_ptr; |
| } |
| #endif |
| return NULL; |
| } |
| |
| /* |
| * log_front_end_state - log all front end node state |
| */ |
| extern void log_front_end_state(void) |
| { |
| #ifdef HAVE_FRONT_END |
| front_end_record_t *front_end_ptr; |
| int i; |
| |
| for (i = 0, front_end_ptr = front_end_nodes; |
| i < front_end_node_cnt; i++, front_end_ptr++) { |
| xassert(front_end_ptr->magic == FRONT_END_MAGIC); |
| info("FrontendName=%s FrontendAddr=%s Port=%u State=%s " |
| "Reason=%s JobCntRun=%u JobCntComp=%u " |
| "AllowGroups=%s AllowUsers=%s " |
| "DenyGroups=%s DenyUsers=%s ", |
| front_end_ptr->name, front_end_ptr->comm_name, |
| front_end_ptr->port, |
| node_state_string(front_end_ptr->node_state), |
| front_end_ptr->reason, front_end_ptr->job_cnt_run, |
| front_end_ptr->job_cnt_comp, |
| front_end_ptr->allow_groups, front_end_ptr->allow_users, |
| front_end_ptr->deny_groups, front_end_ptr->deny_users); |
| } |
| #endif |
| } |
| |
| /* |
| * purge_front_end_state - purge all front end node state |
| */ |
| extern void purge_front_end_state(void) |
| { |
| #ifdef HAVE_FRONT_END |
| front_end_record_t *front_end_ptr; |
| int i; |
| |
| for (i = 0, front_end_ptr = front_end_nodes; |
| i < front_end_node_cnt; i++, front_end_ptr++) { |
| xassert(front_end_ptr->magic == FRONT_END_MAGIC); |
| xfree(front_end_ptr->allow_gids); |
| xfree(front_end_ptr->allow_groups); |
| xfree(front_end_ptr->allow_uids); |
| xfree(front_end_ptr->allow_users); |
| xfree(front_end_ptr->comm_name); |
| xfree(front_end_ptr->deny_gids); |
| xfree(front_end_ptr->deny_groups); |
| xfree(front_end_ptr->deny_users); |
| xfree(front_end_ptr->name); |
| xfree(front_end_ptr->reason); |
| xfree(front_end_ptr->version); |
| } |
| xfree(front_end_nodes); |
| front_end_node_cnt = 0; |
| #endif |
| } |
| |
| /* Translate comma delimited string of GIDs/group names into a zero terminated |
| * array of GIDs */ |
| gid_t *_xlate_groups(char *group_str, char *key) |
| { |
| char *tmp_str, *token, *save_ptr = NULL; |
| gid_t *gids_array = NULL; |
| int array_size = 0; |
| gid_t gid; |
| |
| if (!group_str || !group_str[0]) |
| return gids_array; |
| |
| tmp_str = xstrdup(group_str); |
| token = strtok_r(tmp_str, ",", &save_ptr); |
| while (token) { |
| if (gid_from_string(token, &gid) || (gid == (gid_t) 0)) { |
| error("Invalid %s value (%s), ignored", key, token); |
| } else { |
| xrealloc(gids_array, sizeof(gid_t) * (array_size+2)); |
| gids_array[array_size++] = gid; |
| } |
| token = strtok_r(NULL, ",", &save_ptr); |
| } |
| xfree(tmp_str); |
| return gids_array; |
| } |
| |
| /* Translate comma delimited string of UIDs/user names into a zero terminated |
| * array of UIDs */ |
| uid_t *_xlate_users(char *user_str, char *key) |
| { |
| char *tmp_str, *token, *save_ptr = NULL; |
| uid_t *uids_array = NULL; |
| int array_size = 0; |
| uid_t uid; |
| |
| if (!user_str || !user_str[0]) |
| return uids_array; |
| |
| tmp_str = xstrdup(user_str); |
| token = strtok_r(tmp_str, ",", &save_ptr); |
| while (token) { |
| if (uid_from_string(token, &uid) || (uid == (uid_t) 0)) { |
| error("Invalid %s value (%s), ignored", key, token); |
| } else { |
| xrealloc(uids_array, sizeof(uid_t) * (array_size+2)); |
| uids_array[array_size++] = uid; |
| } |
| token = strtok_r(NULL, ",", &save_ptr); |
| } |
| xfree(tmp_str); |
| return uids_array; |
| } |
| |
| /* |
| * restore_front_end_state - restore frontend node state |
| * IN recover - replace job, node and/or partition data with latest |
| * available information depending upon value |
| * 0 = use no saved state information, rebuild everything from |
| * slurm.conf contents |
| * 1 = recover saved job and trigger state, |
| * node DOWN/DRAIN/FAIL state and reason information |
| * 2 = recover all saved state |
| */ |
| extern void restore_front_end_state(int recover) |
| { |
| #ifdef HAVE_FRONT_END |
| slurm_conf_frontend_t *slurm_conf_fe_ptr; |
| ListIterator iter; |
| uint16_t state_base, state_flags, tree_width; |
| int i; |
| |
| last_front_end_update = time(NULL); |
| if (recover == 0) |
| purge_front_end_state(); |
| if (front_end_list == NULL) |
| return; /* No front ends in slurm.conf */ |
| |
| iter = list_iterator_create(front_end_list); |
| while ((slurm_conf_fe_ptr = (slurm_conf_frontend_t *) |
| list_next(iter))) { |
| if (slurm_conf_fe_ptr->frontends == NULL) { |
| fatal("FrontendName is NULL"); |
| return; /* Prevent CLANG false positive */ |
| } |
| for (i = 0; i < front_end_node_cnt; i++) { |
| if (strcmp(front_end_nodes[i].name, |
| slurm_conf_fe_ptr->frontends) == 0) |
| break; |
| } |
| if (i >= front_end_node_cnt) { |
| front_end_node_cnt++; |
| xrealloc(front_end_nodes, |
| sizeof(front_end_record_t) * |
| front_end_node_cnt); |
| front_end_nodes[i].name = |
| xstrdup(slurm_conf_fe_ptr->frontends); |
| front_end_nodes[i].magic = FRONT_END_MAGIC; |
| } |
| |
| xfree(front_end_nodes[i].allow_gids); |
| xfree(front_end_nodes[i].allow_groups); |
| if (slurm_conf_fe_ptr->allow_groups) { |
| front_end_nodes[i].allow_groups = |
| xstrdup(slurm_conf_fe_ptr->allow_groups); |
| front_end_nodes[i].allow_gids = |
| _xlate_groups(slurm_conf_fe_ptr->allow_groups, |
| "AllowGroups"); |
| } |
| xfree(front_end_nodes[i].allow_uids); |
| xfree(front_end_nodes[i].allow_users); |
| if (slurm_conf_fe_ptr->allow_users) { |
| front_end_nodes[i].allow_users = |
| xstrdup(slurm_conf_fe_ptr->allow_users); |
| front_end_nodes[i].allow_uids = |
| _xlate_users(slurm_conf_fe_ptr->allow_users, |
| "AllowUsers"); |
| } |
| xfree(front_end_nodes[i].deny_gids); |
| xfree(front_end_nodes[i].deny_groups); |
| if (slurm_conf_fe_ptr->deny_groups) { |
| front_end_nodes[i].deny_groups = |
| xstrdup(slurm_conf_fe_ptr->deny_groups); |
| front_end_nodes[i].deny_gids = |
| _xlate_groups(slurm_conf_fe_ptr->deny_groups, |
| "DenyGroups"); |
| } |
| xfree(front_end_nodes[i].deny_uids); |
| xfree(front_end_nodes[i].deny_users); |
| if (slurm_conf_fe_ptr->deny_users) { |
| front_end_nodes[i].deny_users = |
| xstrdup(slurm_conf_fe_ptr->deny_users); |
| front_end_nodes[i].deny_uids = |
| _xlate_users(slurm_conf_fe_ptr->deny_users, |
| "DenyUsers"); |
| } |
| |
| xfree(front_end_nodes[i].comm_name); |
| if (slurm_conf_fe_ptr->addresses) { |
| front_end_nodes[i].comm_name = |
| xstrdup(slurm_conf_fe_ptr->addresses); |
| } else { |
| front_end_nodes[i].comm_name = |
| xstrdup(front_end_nodes[i].name); |
| } |
| state_base = front_end_nodes[i].node_state & NODE_STATE_BASE; |
| state_flags = front_end_nodes[i].node_state & NODE_STATE_FLAGS; |
| if ((state_base == 0) || (state_base == NODE_STATE_UNKNOWN)) { |
| front_end_nodes[i].node_state = |
| slurm_conf_fe_ptr->node_state | state_flags; |
| } |
| if ((front_end_nodes[i].reason == NULL) && |
| (slurm_conf_fe_ptr->reason != NULL)) { |
| front_end_nodes[i].reason = |
| xstrdup(slurm_conf_fe_ptr->reason); |
| } |
| if (slurm_conf_fe_ptr->port) |
| front_end_nodes[i].port = slurm_conf_fe_ptr->port; |
| else |
| front_end_nodes[i].port = slurmctld_conf.slurmd_port; |
| slurm_set_addr(&front_end_nodes[i].slurm_addr, |
| front_end_nodes[i].port, |
| front_end_nodes[i].comm_name); |
| } |
| list_iterator_destroy(iter); |
| if (front_end_node_cnt == 0) |
| fatal("No front end nodes defined"); |
| tree_width = slurm_get_tree_width(); |
| if (front_end_node_cnt > tree_width) { |
| fatal("front_end_node_cnt > tree_width (%u > %u)", |
| front_end_node_cnt, tree_width); |
| } |
| if (slurmctld_conf.debug_flags & DEBUG_FLAG_FRONT_END) |
| log_front_end_state(); |
| #endif |
| } |
| |
| /* |
| * pack_all_front_end - dump all front_end node information for all nodes |
| * in machine independent form (for network transmission) |
| * OUT buffer_ptr - pointer to the stored data |
| * OUT buffer_size - set to size of the buffer in bytes |
| * IN protocol_version - slurm protocol version of client |
| * NOTE: the caller must xfree the buffer at *buffer_ptr |
| * NOTE: READ lock_slurmctld config before entry |
| */ |
| extern void pack_all_front_end(char **buffer_ptr, int *buffer_size, uid_t uid, |
| uint16_t protocol_version) |
| { |
| time_t now = time(NULL); |
| uint32_t nodes_packed = 0; |
| Buf buffer; |
| #ifdef HAVE_FRONT_END |
| uint32_t tmp_offset; |
| front_end_record_t *front_end_ptr; |
| int i; |
| |
| buffer_ptr[0] = NULL; |
| *buffer_size = 0; |
| |
| buffer = init_buf(BUF_SIZE * 2); |
| nodes_packed = 0; |
| |
| if (protocol_version >= SLURM_2_5_PROTOCOL_VERSION) { |
| /* write header: count and time */ |
| pack32(nodes_packed, buffer); |
| pack_time(now, buffer); |
| |
| /* write records */ |
| for (i = 0, front_end_ptr = front_end_nodes; |
| i < front_end_node_cnt; i++, front_end_ptr++) { |
| xassert(front_end_ptr->magic == FRONT_END_MAGIC); |
| _pack_front_end(front_end_ptr, buffer, |
| protocol_version); |
| nodes_packed++; |
| } |
| } else { |
| error("pack_all_front_end: Unsupported slurm version %u", |
| protocol_version); |
| } |
| |
| tmp_offset = get_buf_offset (buffer); |
| set_buf_offset(buffer, 0); |
| pack32(nodes_packed, buffer); |
| set_buf_offset(buffer, tmp_offset); |
| |
| *buffer_size = get_buf_offset(buffer); |
| buffer_ptr[0] = xfer_buf_data(buffer); |
| #else |
| buffer_ptr[0] = NULL; |
| *buffer_size = 0; |
| buffer = init_buf(64); |
| pack32(nodes_packed, buffer); |
| pack_time(now, buffer); |
| *buffer_size = get_buf_offset(buffer); |
| buffer_ptr[0] = xfer_buf_data(buffer); |
| #endif |
| } |
| |
| /* dump_all_front_end_state - save the state of all front_end nodes to file */ |
| extern int dump_all_front_end_state(void) |
| { |
| #ifdef HAVE_FRONT_END |
| /* Save high-water mark to avoid buffer growth with copies */ |
| static int high_buffer_size = (1024 * 1024); |
| int error_code = 0, i, log_fd; |
| char *old_file, *new_file, *reg_file; |
| front_end_record_t *front_end_ptr; |
| /* Locks: Read config and node */ |
| slurmctld_lock_t node_read_lock = { READ_LOCK, NO_LOCK, READ_LOCK, |
| NO_LOCK }; |
| Buf buffer = init_buf(high_buffer_size); |
| DEF_TIMERS; |
| |
| START_TIMER; |
| /* write header: version, time */ |
| packstr(FRONT_END_STATE_VERSION, buffer); |
| pack16(SLURM_PROTOCOL_VERSION, buffer); |
| pack_time(time(NULL), buffer); |
| |
| /* write node records to buffer */ |
| lock_slurmctld (node_read_lock); |
| |
| for (i = 0, front_end_ptr = front_end_nodes; |
| i < front_end_node_cnt; i++, front_end_ptr++) { |
| xassert(front_end_ptr->magic == FRONT_END_MAGIC); |
| _dump_front_end_state(front_end_ptr, buffer); |
| } |
| |
| old_file = xstrdup (slurmctld_conf.state_save_location); |
| xstrcat (old_file, "/front_end_state.old"); |
| reg_file = xstrdup (slurmctld_conf.state_save_location); |
| xstrcat (reg_file, "/front_end_state"); |
| new_file = xstrdup (slurmctld_conf.state_save_location); |
| xstrcat (new_file, "/front_end_state.new"); |
| unlock_slurmctld (node_read_lock); |
| |
| /* write the buffer to file */ |
| lock_state_files(); |
| log_fd = creat (new_file, 0600); |
| if (log_fd < 0) { |
| error ("Can't save state, error creating file %s %m", new_file); |
| error_code = errno; |
| } else { |
| int pos = 0, nwrite = get_buf_offset(buffer), amount, rc; |
| char *data = (char *)get_buf_data(buffer); |
| high_buffer_size = MAX(nwrite, high_buffer_size); |
| while (nwrite > 0) { |
| amount = write(log_fd, &data[pos], nwrite); |
| if ((amount < 0) && (errno != EINTR)) { |
| error("Error writing file %s, %m", new_file); |
| error_code = errno; |
| break; |
| } |
| nwrite -= amount; |
| pos += amount; |
| } |
| |
| rc = fsync_and_close(log_fd, "front_end"); |
| if (rc && !error_code) |
| error_code = rc; |
| } |
| if (error_code) |
| (void) unlink (new_file); |
| else { /* file shuffle */ |
| (void) unlink (old_file); |
| if (link(reg_file, old_file)) |
| debug4("unable to create link for %s -> %s: %m", |
| reg_file, old_file); |
| (void) unlink (reg_file); |
| if (link(new_file, reg_file)) |
| debug4("unable to create link for %s -> %s: %m", |
| new_file, reg_file); |
| (void) unlink (new_file); |
| } |
| xfree (old_file); |
| xfree (reg_file); |
| xfree (new_file); |
| unlock_state_files (); |
| |
| free_buf (buffer); |
| END_TIMER2("dump_all_front_end_state"); |
| return error_code; |
| #else |
| return SLURM_SUCCESS; |
| #endif |
| } |
| |
| /* |
| * load_all_front_end_state - Load the front_end node state from file, recover |
| * on slurmctld restart. Execute this after loading the configuration |
| * file data. Data goes into common storage. |
| * IN state_only - if true, overwrite only front_end node state and reason |
| * Use this to overwrite the "UNKNOWN state typically used in slurm.conf |
| * RET 0 or error code |
| * NOTE: READ lock_slurmctld config before entry |
| */ |
| extern int load_all_front_end_state(bool state_only) |
| { |
| #ifdef HAVE_FRONT_END |
| char *node_name = NULL, *reason = NULL, *data = NULL, *state_file; |
| int data_allocated, data_read = 0, error_code = 0, node_cnt = 0; |
| uint16_t node_state; |
| uint32_t data_size = 0, name_len; |
| uint32_t reason_uid = NO_VAL; |
| time_t reason_time = 0; |
| front_end_record_t *front_end_ptr; |
| int state_fd; |
| time_t time_stamp; |
| Buf buffer; |
| char *ver_str = NULL; |
| uint16_t protocol_version = (uint16_t) NO_VAL; |
| |
| /* read the file */ |
| lock_state_files (); |
| state_fd = _open_front_end_state_file(&state_file); |
| if (state_fd < 0) { |
| info ("No node state file (%s) to recover", state_file); |
| error_code = ENOENT; |
| } else { |
| data_allocated = BUF_SIZE; |
| data = xmalloc(data_allocated); |
| while (1) { |
| data_read = read(state_fd, &data[data_size], BUF_SIZE); |
| if (data_read < 0) { |
| if (errno == EINTR) |
| continue; |
| else { |
| error ("Read error on %s: %m", |
| state_file); |
| break; |
| } |
| } else if (data_read == 0) /* eof */ |
| break; |
| data_size += data_read; |
| data_allocated += data_read; |
| xrealloc(data, data_allocated); |
| } |
| close (state_fd); |
| } |
| xfree (state_file); |
| unlock_state_files (); |
| |
| buffer = create_buf (data, data_size); |
| |
| safe_unpackstr_xmalloc( &ver_str, &name_len, buffer); |
| debug3("Version string in front_end_state header is %s", ver_str); |
| if (ver_str) { |
| if (!strcmp(ver_str, FRONT_END_STATE_VERSION)) { |
| safe_unpack16(&protocol_version, buffer); |
| } else |
| protocol_version = SLURM_2_6_PROTOCOL_VERSION; |
| } |
| |
| if (protocol_version == (uint16_t) NO_VAL) { |
| error("*****************************************************"); |
| error("Can not recover front_end state, version incompatible"); |
| error("*****************************************************"); |
| xfree(ver_str); |
| free_buf(buffer); |
| return EFAULT; |
| } |
| xfree(ver_str); |
| |
| safe_unpack_time(&time_stamp, buffer); |
| |
| while (remaining_buf (buffer) > 0) { |
| uint16_t base_state, obj_protocol_version = (uint16_t)NO_VAL;; |
| if (protocol_version >= SLURM_14_03_PROTOCOL_VERSION) { |
| safe_unpackstr_xmalloc (&node_name, &name_len, buffer); |
| safe_unpack16 (&node_state, buffer); |
| safe_unpackstr_xmalloc (&reason, &name_len, buffer); |
| safe_unpack_time (&reason_time, buffer); |
| safe_unpack32 (&reason_uid, buffer); |
| safe_unpack16 (&obj_protocol_version, buffer); |
| base_state = node_state & NODE_STATE_BASE; |
| } else if (protocol_version >= SLURM_2_5_PROTOCOL_VERSION) { |
| safe_unpackstr_xmalloc (&node_name, &name_len, buffer); |
| safe_unpack16 (&node_state, buffer); |
| safe_unpackstr_xmalloc (&reason, &name_len, buffer); |
| safe_unpack_time (&reason_time, buffer); |
| safe_unpack32 (&reason_uid, buffer); |
| base_state = node_state & NODE_STATE_BASE; |
| } else |
| goto unpack_error; |
| |
| /* validity test as possible */ |
| |
| /* find record and perform update */ |
| front_end_ptr = find_front_end_record(node_name); |
| if (front_end_ptr == NULL) { |
| error("Front_end node %s has vanished from " |
| "configuration", node_name); |
| } else if (state_only) { |
| uint16_t orig_flags; |
| orig_flags = front_end_ptr->node_state & |
| NODE_STATE_FLAGS; |
| if (IS_NODE_UNKNOWN(front_end_ptr)) { |
| if (base_state == NODE_STATE_DOWN) { |
| orig_flags &= (~NODE_STATE_COMPLETING); |
| front_end_ptr->node_state = |
| NODE_STATE_DOWN | orig_flags; |
| } |
| if (node_state & NODE_STATE_DRAIN) { |
| front_end_ptr->node_state |= |
| NODE_STATE_DRAIN; |
| } |
| if (node_state & NODE_STATE_FAIL) { |
| front_end_ptr->node_state |= |
| NODE_STATE_FAIL; |
| } |
| } |
| if (front_end_ptr->reason == NULL) { |
| front_end_ptr->reason = reason; |
| reason = NULL; /* Nothing to free */ |
| front_end_ptr->reason_time = reason_time; |
| front_end_ptr->reason_uid = reason_uid; |
| } |
| } else { |
| front_end_ptr->node_state = node_state; |
| xfree(front_end_ptr->reason); |
| front_end_ptr->reason = reason; |
| reason = NULL; /* Nothing to free */ |
| front_end_ptr->reason_time = reason_time; |
| front_end_ptr->reason_uid = reason_uid; |
| front_end_ptr->last_response = (time_t) 0; |
| } |
| |
| if (front_end_ptr) { |
| node_cnt++; |
| if (obj_protocol_version != (uint16_t)NO_VAL) |
| front_end_ptr->protocol_version = |
| obj_protocol_version; |
| else |
| front_end_ptr->protocol_version = |
| protocol_version; |
| } |
| |
| xfree(node_name); |
| xfree(reason); |
| } |
| |
| fini: info("Recovered state of %d front_end nodes", node_cnt); |
| free_buf (buffer); |
| return error_code; |
| |
| unpack_error: |
| error("Incomplete front_end node data checkpoint file"); |
| error_code = EFAULT; |
| xfree (node_name); |
| xfree(reason); |
| goto fini; |
| #else |
| return 0; |
| #endif |
| } |
| |
| /* |
| * set_front_end_down - make the specified front end node's state DOWN and |
| * kill jobs as needed |
| * IN front_end_pt - pointer to the front end node |
| * IN reason - why the node is DOWN |
| */ |
| extern void set_front_end_down (front_end_record_t *front_end_ptr, |
| char *reason) |
| { |
| #ifdef HAVE_FRONT_END |
| time_t now = time(NULL); |
| uint16_t state_flags = front_end_ptr->node_state & NODE_STATE_FLAGS; |
| |
| state_flags &= (~NODE_STATE_COMPLETING); |
| front_end_ptr->node_state = NODE_STATE_DOWN | state_flags; |
| trigger_front_end_down(front_end_ptr); |
| (void) kill_job_by_front_end_name(front_end_ptr->name); |
| if ((front_end_ptr->reason == NULL) || |
| (strncmp(front_end_ptr->reason, "Not responding", 14) == 0)) { |
| xfree(front_end_ptr->reason); |
| front_end_ptr->reason = xstrdup(reason); |
| front_end_ptr->reason_time = now; |
| front_end_ptr->reason_uid = slurm_get_slurm_user_id(); |
| } |
| last_front_end_update = now; |
| #endif |
| } |
| |
| /* |
| * sync_front_end_state - synchronize job pointers and front-end node state |
| */ |
| extern void sync_front_end_state(void) |
| { |
| #ifdef HAVE_FRONT_END |
| ListIterator job_iterator; |
| struct job_record *job_ptr; |
| front_end_record_t *front_end_ptr; |
| uint16_t state_flags; |
| int i; |
| |
| for (i = 0, front_end_ptr = front_end_nodes; |
| i < front_end_node_cnt; i++, front_end_ptr++) { |
| front_end_ptr->job_cnt_comp = 0; |
| front_end_ptr->job_cnt_run = 0; |
| } |
| |
| job_iterator = list_iterator_create(job_list); |
| while ((job_ptr = (struct job_record *) list_next(job_iterator))) { |
| if (job_ptr->batch_host) { |
| job_ptr->front_end_ptr = |
| find_front_end_record(job_ptr->batch_host); |
| if ((job_ptr->front_end_ptr == NULL) && |
| IS_JOB_RUNNING(job_ptr)) { |
| error("front end node %s has vanished, " |
| "killing job %u", |
| job_ptr->batch_host, job_ptr->job_id); |
| job_ptr->job_state = JOB_NODE_FAIL | |
| JOB_COMPLETING; |
| } else if (job_ptr->front_end_ptr == NULL) { |
| info("front end node %s has vanished", |
| job_ptr->batch_host); |
| } else if (IS_JOB_COMPLETING(job_ptr)) { |
| job_ptr->front_end_ptr->job_cnt_comp++; |
| } else if (IS_JOB_RUNNING(job_ptr)) { |
| job_ptr->front_end_ptr->job_cnt_run++; |
| } |
| } else { |
| job_ptr->front_end_ptr = NULL; |
| } |
| } |
| list_iterator_destroy(job_iterator); |
| |
| for (i = 0, front_end_ptr = front_end_nodes; |
| i < front_end_node_cnt; i++, front_end_ptr++) { |
| if ((IS_NODE_IDLE(front_end_ptr) || |
| IS_NODE_UNKNOWN(front_end_ptr)) && |
| (front_end_ptr->job_cnt_run != 0)) { |
| state_flags = front_end_ptr->node_state & |
| NODE_STATE_FLAGS; |
| front_end_ptr->node_state = NODE_STATE_ALLOCATED | |
| state_flags; |
| } |
| if (IS_NODE_ALLOCATED(front_end_ptr) && |
| (front_end_ptr->job_cnt_run == 0)) { |
| state_flags = front_end_ptr->node_state & |
| NODE_STATE_FLAGS; |
| front_end_ptr->node_state = NODE_STATE_IDLE | |
| state_flags; |
| } |
| if (IS_NODE_COMPLETING(front_end_ptr) && |
| (front_end_ptr->job_cnt_comp == 0)) { |
| front_end_ptr->node_state &= (~NODE_STATE_COMPLETING); |
| } |
| if (!IS_NODE_COMPLETING(front_end_ptr) && |
| (front_end_ptr->job_cnt_comp != 0)) { |
| front_end_ptr->node_state |= NODE_STATE_COMPLETING; |
| } |
| } |
| |
| if (slurmctld_conf.debug_flags & DEBUG_FLAG_FRONT_END) |
| log_front_end_state(); |
| #endif |
| } |