| /***************************************************************************** \ |
| * switch_nrt.c - Swtich plugin interface, This calls functions in nrt.c |
| * which contains the interface to IBM's NRT (Network Routing Table) API |
| ***************************************************************************** |
| * Copyright (C) 2004-2007 The Regents of the University of California. |
| * Copyright (C) 2008 Lawrence Livermore National Security. |
| * Portions Copyright (C) 2011-2012 SchedMD LLC. |
| * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). |
| * Written by Jason King <jking@llnl.gov> |
| * CODE-OCEC-09-009. All rights reserved. |
| * |
| * This file is part of SLURM, a resource management program. |
| * For details, see <http://www.schedmd.com/slurmdocs/>. |
| * Please also read the included file: DISCLAIMER. |
| * |
| * SLURM is free software; you can redistribute it and/or modify it under |
| * the terms of the GNU General Public License as published by the Free |
| * Software Foundation; either version 2 of the License, or (at your option) |
| * any later version. |
| * |
| * In addition, as a special exception, the copyright holders give permission |
| * to link the code of portions of this program with the OpenSSL library under |
| * certain conditions as described in each individual source file, and |
| * distribute linked combinations including the two. You must obey the GNU |
| * General Public License in all respects for all of the code used other than |
| * OpenSSL. If you modify file(s) with this exception, you may extend this |
| * exception to your version of the file(s), but you are not obligated to do |
| * so. If you do not wish to do so, delete this exception statement from your |
| * version. If you delete this exception statement from all source files in |
| * the program, then also delete it here. |
| * |
| * SLURM is distributed in the hope that it will be useful, but WITHOUT ANY |
| * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS |
| * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more |
| * details. |
| * |
| * You should have received a copy of the GNU General Public License along |
| * with SLURM; if not, write to the Free Software Foundation, Inc., |
| * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. |
| \*****************************************************************************/ |
| |
| #if HAVE_CONFIG_H |
| # include "config.h" |
| #endif |
| |
| #include <stdlib.h> |
| #include <fcntl.h> |
| #include <signal.h> |
| #include <sys/types.h> |
| #include <regex.h> |
| #include <stdlib.h> |
| |
| #include "slurm/slurm_errno.h" |
| #include "src/common/slurm_xlator.h" |
| #include "src/common/macros.h" |
| #include "src/plugins/switch/nrt/slurm_nrt.h" |
| |
| #define NRT_BUF_SIZE 4096 |
| |
| char local_dir_path[1024]; |
| bool nrt_need_state_save = false; |
| |
| static void _spawn_state_save_thread(char *dir); |
| static int _switch_p_libstate_save(char * dir_name, bool free_flag); |
| |
| /* Type for error string table entries */ |
| typedef struct { |
| int xe_number; |
| char *xe_message; |
| } slurm_errtab_t; |
| |
| static slurm_errtab_t slurm_errtab[] = { |
| {0, "No error"}, |
| {-1, "Unspecified error"}, |
| |
| /* switch/nrt routine error codes */ |
| |
| { ESTATUS, |
| "Cannot get adapter status" }, |
| { EADAPTER, |
| "Open of adapter failed" }, |
| { ENOADAPTER, |
| "No adapters found" }, |
| { EBADMAGIC_NRT_NODEINFO, |
| "Bad magic in NRT nodeinfo" }, |
| { EBADMAGIC_NRT_JOBINFO, |
| "Bad magic in NRT jobinfo" }, |
| { EBADMAGIC_NRT_LIBSTATE, |
| "Bad magic in NRT libstate" }, |
| { EUNPACK, |
| "Error during unpack" }, |
| { EHOSTNAME, |
| "Cannot get hostname" }, |
| { ENOTSUPPORTED, |
| "This feature not currently supported" }, |
| { EVERSION, |
| "Header/library version mismatch" }, |
| { EWINDOW, |
| "Error allocating switch window" }, |
| { EUNLOAD, |
| "Error unloading switch window table" } |
| }; |
| |
| /* |
| * These variables are required by the generic plugin interface. If they |
| * are not found in the plugin, the plugin loader will ignore it. |
| * |
| * plugin_name - a string giving a human-readable description of the |
| * plugin. There is no maximum length, but the symbol must refer to |
| * a valid string. |
| * |
| * plugin_type - a string suggesting the type of the plugin or its |
| * applicability to a particular form of data or method of data handling. |
| * If the low-level plugin API is used, the contents of this string are |
| * unimportant and may be anything. SLURM uses the higher-level plugin |
| * interface which requires this string to be of the form |
| * |
| * <application>/<method> |
| * |
| * where <application> is a description of the intended application of |
| * the plugin (e.g., "switch" for SLURM switch) and <method> is a description |
| * of how this plugin satisfies that application. SLURM will only load |
| * a switch plugin if the plugin_type string has a prefix of "switch/". |
| * |
| * plugin_version - an unsigned 32-bit integer giving the version number |
| * of the plugin. If major and minor revisions are desired, the major |
| * version number may be multiplied by a suitable magnitude constant such |
| * as 100 or 1000. Various SLURM versions will likely require a certain |
| * minimum version for their plugins as this API matures. |
| */ |
| const char plugin_name[] = "switch NRT plugin"; |
| const char plugin_type[] = "switch/nrt"; |
| const uint32_t plugin_version = 110; |
| |
| uint32_t debug_flags = 0; |
| |
| /* |
| * init() is called when the plugin is loaded, before any other functions |
| * are called. Put global initialization here. |
| */ |
| extern int init ( void ) |
| { |
| verbose("%s loaded", plugin_name); |
| debug_flags = slurm_get_debug_flags(); |
| |
| return SLURM_SUCCESS; |
| } |
| |
| extern int fini ( void ) |
| { |
| return nrt_fini(); |
| } |
| |
| extern int switch_p_reconfig ( void ) |
| { |
| debug_flags = slurm_get_debug_flags(); |
| return SLURM_SUCCESS; |
| } |
| |
| extern int switch_p_slurmctld_init( void ) |
| { |
| DEF_TIMERS; |
| int rc; |
| |
| if (debug_flags & DEBUG_FLAG_SWITCH) { |
| START_TIMER; |
| info("switch_p_slurmctld_init() starting"); |
| } |
| rc = nrt_slurmctld_init(); |
| if (debug_flags & DEBUG_FLAG_SWITCH) { |
| END_TIMER; |
| info("switch_p_slurmctld_init() ending %s", TIME_STR); |
| } |
| |
| return rc; |
| } |
| |
| extern int switch_p_slurmd_init( void ) |
| { |
| DEF_TIMERS; |
| int rc; |
| |
| if (debug_flags & DEBUG_FLAG_SWITCH) { |
| START_TIMER; |
| info("switch_p_slurmd_init() starting"); |
| } |
| rc = nrt_slurmd_init(); |
| if (debug_flags & DEBUG_FLAG_SWITCH) { |
| END_TIMER; |
| info("switch_p_slurmd_init() ending %s", TIME_STR); |
| } |
| |
| return rc; |
| } |
| |
| extern int switch_p_slurmd_step_init( void ) |
| { |
| DEF_TIMERS; |
| int rc; |
| |
| if (debug_flags & DEBUG_FLAG_SWITCH) { |
| START_TIMER; |
| info("switch_p_slurmd_step_init() starting"); |
| } |
| rc = nrt_slurmd_step_init(); |
| if (debug_flags & DEBUG_FLAG_SWITCH) { |
| END_TIMER; |
| info("switch_p_slurmd_step_init() ending %s", TIME_STR); |
| } |
| |
| return rc; |
| } |
| |
| /* |
| * Switch functions for global state save |
| * NOTE: Clears current switch state as needed for backup |
| * controller to repeatedly assume control primary server |
| */ |
| extern int switch_p_libstate_save ( char * dir_name ) |
| { |
| DEF_TIMERS; |
| int rc; |
| |
| if (debug_flags & DEBUG_FLAG_SWITCH) { |
| START_TIMER; |
| info("switch_p_libstate_save() starting"); |
| } |
| rc = _switch_p_libstate_save(dir_name, true); |
| if (debug_flags & DEBUG_FLAG_SWITCH) { |
| END_TIMER; |
| info("switch_p_libstate_save() ending %s", TIME_STR); |
| } |
| |
| return rc; |
| } |
| |
| /* save and purge the libstate if free_flag is true */ |
| static int _switch_p_libstate_save ( char * dir_name, bool free_flag ) |
| { |
| Buf buffer; |
| char *file_name; |
| int ret = SLURM_SUCCESS; |
| int state_fd; |
| |
| buffer = init_buf(NRT_LIBSTATE_LEN); |
| (void) nrt_libstate_save(buffer, free_flag); |
| file_name = xstrdup(dir_name); |
| xstrcat(file_name, "/nrt_state"); |
| (void) unlink(file_name); |
| state_fd = creat(file_name, 0600); |
| if (state_fd < 0) { |
| error("Can't save state, error creating file %s %m", |
| file_name); |
| ret = SLURM_ERROR; |
| } else { |
| char *buf = get_buf_data(buffer); |
| size_t len = get_buf_offset(buffer); |
| while (1) { |
| int wrote = write (state_fd, buf, len); |
| if ((wrote < 0) && (errno == EINTR)) |
| continue; |
| if (wrote == 0) |
| break; |
| if (wrote < 0) { |
| error("Can't save switch state: %m"); |
| ret = SLURM_ERROR; |
| break; |
| } |
| buf += wrote; |
| len -= wrote; |
| } |
| close(state_fd); |
| } |
| xfree(file_name); |
| |
| if (buffer) |
| free_buf(buffer); |
| |
| return ret; |
| } |
| |
| |
| /* |
| * Restore global nodeinfo from a file. |
| * |
| * NOTE: switch_p_libstate_restore is only called by slurmctld, and only |
| * once at start-up. We exploit this fact to spawn a pthread to |
| * periodically call _switch_p_libstate_save(). |
| */ |
| extern int switch_p_libstate_restore ( char * dir_name, bool recover ) |
| { |
| char *data = NULL, *file_name; |
| Buf buffer = NULL; |
| int error_code = SLURM_SUCCESS; |
| int state_fd, data_allocated = 0, data_read = 0, data_size = 0; |
| DEF_TIMERS; |
| |
| xassert(dir_name != NULL); |
| |
| if (debug_flags & DEBUG_FLAG_SWITCH) { |
| START_TIMER; |
| info("switch_p_libstate_restore() starting"); |
| } |
| _spawn_state_save_thread(xstrdup(dir_name)); |
| if (!recover) /* clean start, no recovery */ |
| return nrt_init(); |
| |
| file_name = xstrdup(dir_name); |
| xstrcat(file_name, "/nrt_state"); |
| state_fd = open (file_name, O_RDONLY); |
| if (state_fd >= 0) { |
| data_allocated = NRT_BUF_SIZE; |
| data = xmalloc(data_allocated); |
| while (1) { |
| data_read = read (state_fd, &data[data_size], |
| NRT_BUF_SIZE); |
| if ((data_read < 0) && (errno == EINTR)) |
| continue; |
| if (data_read < 0) { |
| error ("Read error on %s, %m", file_name); |
| error_code = SLURM_ERROR; |
| break; |
| } else if (data_read == 0) |
| break; |
| data_size += data_read; |
| data_allocated += data_read; |
| xrealloc(data, data_allocated); |
| } |
| close (state_fd); |
| xfree(file_name); |
| } else { |
| error("No %s file for switch/nrt state recovery", file_name); |
| error("Starting switch/nrt with clean state"); |
| xfree(file_name); |
| return nrt_init(); |
| } |
| |
| if (error_code == SLURM_SUCCESS) { |
| buffer = create_buf (data, data_size); |
| data = NULL; /* now in buffer, don't xfree() */ |
| if (nrt_libstate_restore(buffer) < 0) |
| error_code = SLURM_ERROR; |
| } |
| |
| if (buffer) |
| free_buf(buffer); |
| xfree(data); |
| if (debug_flags & DEBUG_FLAG_SWITCH) { |
| END_TIMER; |
| info("switch_p_libstate_restore() ending %s", TIME_STR); |
| } |
| |
| return error_code; |
| } |
| |
| extern int switch_p_libstate_clear(void) |
| { |
| if (debug_flags & DEBUG_FLAG_SWITCH) |
| info("switch_p_libstate_clear()"); |
| |
| return nrt_libstate_clear(); |
| } |
| |
| /***************************************************************************** |
| * switch state monitoring functions |
| *****************************************************************************/ |
| /* NOTE: we assume that once the switch state is cleared, |
| * notification of this will be forwarded to slurmctld. We do not |
| * enforce that in this function. |
| */ |
| extern int switch_p_clear_node_state(void) |
| { |
| DEF_TIMERS; |
| int rc; |
| |
| if (debug_flags & DEBUG_FLAG_SWITCH) { |
| START_TIMER; |
| info("switch_p_clear_node_state() starting"); |
| } |
| rc = nrt_clear_node_state(); |
| if (debug_flags & DEBUG_FLAG_SWITCH) { |
| END_TIMER; |
| info("switch_p_clear_node_state() ending %s", TIME_STR); |
| } |
| |
| return rc; |
| } |
| |
| extern int switch_p_alloc_node_info(switch_node_info_t **switch_node) |
| { |
| if (debug_flags & DEBUG_FLAG_SWITCH) |
| info("switch_p_alloc_node_info() starting"); |
| return nrt_alloc_nodeinfo((slurm_nrt_nodeinfo_t **)switch_node); |
| } |
| |
| extern int switch_p_build_node_info(switch_node_info_t *switch_node) |
| { |
| char hostname[256]; |
| char *tmp; |
| DEF_TIMERS; |
| int rc; |
| |
| if (debug_flags & DEBUG_FLAG_SWITCH) { |
| START_TIMER; |
| info("switch_p_build_node_info() starting"); |
| } |
| if (gethostname(hostname, 256) < 0) |
| slurm_seterrno_ret(EHOSTNAME); |
| /* remove the domain portion, if necessary */ |
| tmp = strstr(hostname, "."); |
| if (tmp) |
| *tmp = '\0'; |
| rc = nrt_build_nodeinfo((slurm_nrt_nodeinfo_t *)switch_node, |
| hostname); |
| if (debug_flags & DEBUG_FLAG_SWITCH) { |
| END_TIMER; |
| info("switch_p_build_node_info() ending %s", TIME_STR); |
| } |
| |
| return rc; |
| } |
| |
| extern int switch_p_pack_node_info(switch_node_info_t *switch_node, Buf buffer) |
| { |
| if (debug_flags & DEBUG_FLAG_SWITCH) |
| info("switch_p_pack_node_info() starting"); |
| return nrt_pack_nodeinfo((slurm_nrt_nodeinfo_t *)switch_node, buffer); |
| } |
| |
| extern int switch_p_unpack_node_info(switch_node_info_t *switch_node, |
| Buf buffer) |
| { |
| if (debug_flags & DEBUG_FLAG_SWITCH) |
| info("switch_p_unpack_node_info()"); |
| return nrt_unpack_nodeinfo((slurm_nrt_nodeinfo_t *)switch_node, |
| buffer); |
| } |
| |
| extern void switch_p_free_node_info(switch_node_info_t **switch_node) |
| { |
| if (debug_flags & DEBUG_FLAG_SWITCH) |
| info("switch_p_free_node_info()"); |
| |
| if (switch_node) |
| nrt_free_nodeinfo((slurm_nrt_nodeinfo_t *)*switch_node, false); |
| } |
| |
| extern char * switch_p_sprintf_node_info(switch_node_info_t *switch_node, |
| char *buf, size_t size) |
| { |
| return nrt_print_nodeinfo((slurm_nrt_nodeinfo_t *)switch_node, buf, |
| size); |
| } |
| |
| /* |
| * switch functions for job step specific credential |
| */ |
| extern int switch_p_alloc_jobinfo(switch_jobinfo_t **switch_job) |
| { |
| if (debug_flags & DEBUG_FLAG_SWITCH) |
| info("switch_p_alloc_jobinfo()"); |
| |
| return nrt_alloc_jobinfo((slurm_nrt_jobinfo_t **)switch_job); |
| } |
| |
| extern int switch_p_build_jobinfo(switch_jobinfo_t *switch_job, char *nodelist, |
| uint16_t *tasks_per_node, uint32_t **tids, |
| char *network) |
| { |
| hostlist_t list = NULL; |
| bool bulk_xfer = false, ip_v4 = true, user_space = false; |
| uint32_t bulk_xfer_resources = 0; |
| bool sn_all = true; /* default to sn_all */ |
| int cau = 0, immed = 0, instances = 1; |
| int dev_type = NRT_MAX_ADAPTER_TYPES; |
| int err = SLURM_SUCCESS; |
| char *adapter_name = NULL; |
| char *protocol = NULL; |
| char *network_str = NULL, *token = NULL, *save_ptr = NULL; |
| DEF_TIMERS; |
| |
| if (debug_flags & DEBUG_FLAG_SWITCH) { |
| START_TIMER; |
| info("switch_p_build_jobinfo(): nodelist:%s network:%s", |
| nodelist, network); |
| } else { |
| debug3("network = \"%s\"", network); |
| } |
| |
| list = hostlist_create(nodelist); |
| if (!list) |
| fatal("hostlist_create(%s): %m", nodelist); |
| |
| if (network) { |
| network_str = xstrdup(network); |
| token = strtok_r(network_str, ",", &save_ptr); |
| } |
| while (token) { |
| /* bulk_xfer options */ |
| if (!strncasecmp(token, "bulk_xfer=", 10)) { |
| long int resources; |
| char *end_ptr = NULL; |
| bulk_xfer = true; |
| resources = strtol(token+10, &end_ptr, 10); |
| if ((end_ptr[0] == 'k') || (end_ptr[0] == 'K')) |
| resources *= 1024; |
| else if ((end_ptr[0] == 'm') || (end_ptr[0] == 'M')) |
| resources *= (1024 * 1024); |
| else if ((end_ptr[0] == 'g') || (end_ptr[0] == 'G')) |
| resources *= (1024 * 1024 * 1024); |
| if (resources >= 0) |
| bulk_xfer_resources = resources; |
| else { |
| info("switch/nrt: invalid option: %s", token); |
| err = SLURM_ERROR; |
| } |
| } else if (!strcasecmp(token, "bulk_xfer")) { |
| bulk_xfer = true; |
| |
| /* device name options */ |
| } else if (!strncasecmp(token, "devname=", 8)) { |
| char *name_ptr = token + 8; |
| if (nrt_adapter_name_check(name_ptr, list)) { |
| debug("switch/nrt: Found adapter %s in " |
| "network string", token); |
| adapter_name = xstrdup(name_ptr); |
| sn_all = false; |
| } else if (!strcasecmp(name_ptr, "sn_all")) { |
| sn_all = true; |
| } else if (!strcasecmp(name_ptr, "sn_single")) { |
| sn_all = false; |
| } else { |
| info("switch/nrt: invalid devname: %s", |
| name_ptr); |
| err = SLURM_ERROR; |
| } |
| |
| /* device type options */ |
| } else if (!strncasecmp(token, "devtype=", 8)) { |
| char *type_ptr = token + 8; |
| if (!strcasecmp(type_ptr, "ib")) { |
| dev_type = NRT_IB; |
| } else if (!strcasecmp(type_ptr, "hfi")) { |
| dev_type = NRT_HFI; |
| } else if (!strcasecmp(type_ptr, "iponly")) { |
| dev_type = NRT_IPONLY; |
| } else if (!strcasecmp(type_ptr, "hpce")) { |
| dev_type = NRT_HPCE; |
| } else if (!strcasecmp(type_ptr, "kmux")) { |
| dev_type = NRT_KMUX; |
| } else if (!strcasecmp(type_ptr, "sn_all")) { |
| sn_all = true; |
| } else if (!strcasecmp(type_ptr, "sn_single")) { |
| sn_all = false; |
| } else { |
| info("switch/nrt: invalid option: %s", token); |
| err = SLURM_ERROR; |
| } |
| |
| /* instances options */ |
| } else if (!strncasecmp(token, "instances=", 10)) { |
| long int count; |
| char *end_ptr = NULL; |
| count = strtol(token+10, &end_ptr, 10); |
| if ((end_ptr[0] == 'k') || (end_ptr[0] == 'K')) |
| count *= 1024; |
| if (count >= 0) |
| instances = count; |
| else { |
| info("switch/nrt: invalid option: %s", token); |
| err = SLURM_ERROR; |
| } |
| |
| /* network options */ |
| } else if (!strcasecmp(token, "ip")) { |
| ip_v4 = true; |
| } else if (!strcasecmp(token, "ipv4")) { |
| ip_v4 = true; |
| } else if (!strcasecmp(token, "ipv6")) { |
| ip_v4 = false; |
| } else if (!strcasecmp(token, "us")) { |
| user_space = true; |
| |
| /* protocol options */ |
| } else if ((!strncasecmp(token, "lapi", 4)) || |
| (!strncasecmp(token, "mpi", 3)) || |
| (!strncasecmp(token, "pami", 4)) || |
| (!strncasecmp(token, "shmem", 5)) || |
| (!strncasecmp(token, "upc", 3))) { |
| if (protocol) |
| xstrcat(protocol, ","); |
| xstrcat(protocol, token); |
| |
| /* adapter options */ |
| } else if (!strcasecmp(token, "sn_all")) { |
| sn_all = true; |
| } else if (!strcasecmp(token, "sn_single")) { |
| sn_all = false; |
| |
| /* Collective Acceleration Units (CAU) */ |
| } else if (!strncasecmp(token, "cau=", 4)) { |
| long int count; |
| char *end_ptr = NULL; |
| count = strtol(token+4, &end_ptr, 10); |
| if ((end_ptr[0] == 'k') || (end_ptr[0] == 'K')) |
| count *= 1024; |
| if (count >= 0) |
| cau = count; |
| else { |
| info("switch/nrt: invalid option: %s", token); |
| err = SLURM_ERROR; |
| } |
| |
| /* Immediate Send Slots Per Window */ |
| } else if (!strncasecmp(token, "immed=", 6)) { |
| long int count; |
| char *end_ptr = NULL; |
| count = strtol(token+6, &end_ptr, 10); |
| if ((end_ptr[0] == 'k') || (end_ptr[0] == 'K')) |
| count *= 1024; |
| if (count >= 0) |
| immed = count; |
| else { |
| info("switch/nrt: invalid option: %s", token); |
| err = SLURM_ERROR; |
| } |
| |
| /* other */ |
| } else { |
| info("switch/nrt: invalid option: %s", token); |
| err = SLURM_ERROR; |
| } |
| token = strtok_r(NULL, ",", &save_ptr); |
| } |
| |
| if (protocol == NULL) |
| xstrcat(protocol, "mpi"); |
| if (!user_space) { |
| /* Bulk transfer only supported with user space */ |
| bulk_xfer = false; |
| bulk_xfer_resources = 0; |
| } |
| |
| if (err == SLURM_SUCCESS) { |
| err = nrt_build_jobinfo((slurm_nrt_jobinfo_t *)switch_job, |
| list, tasks_per_node, tids, sn_all, |
| adapter_name, dev_type, |
| bulk_xfer, bulk_xfer_resources, |
| ip_v4, user_space, protocol, |
| instances, cau, immed); |
| } |
| |
| nrt_need_state_save = true; |
| xfree(adapter_name); |
| xfree(protocol); |
| hostlist_destroy(list); |
| xfree(network_str); |
| if (debug_flags & DEBUG_FLAG_SWITCH) { |
| END_TIMER; |
| info("switch_p_build_jobinfo() ending %s", TIME_STR); |
| } |
| |
| return err; |
| } |
| |
| extern switch_jobinfo_t *switch_p_copy_jobinfo(switch_jobinfo_t *switch_job) |
| { |
| switch_jobinfo_t *j; |
| |
| if (debug_flags & DEBUG_FLAG_SWITCH) |
| info("switch_p_copy_jobinfo()"); |
| |
| j = (switch_jobinfo_t *)nrt_copy_jobinfo((slurm_nrt_jobinfo_t *)switch_job); |
| if (!j) |
| error("nrt_copy_jobinfo failed"); |
| |
| return j; |
| } |
| |
| extern void switch_p_free_jobinfo(switch_jobinfo_t *switch_job) |
| { |
| if (debug_flags & DEBUG_FLAG_SWITCH) |
| info("switch_p_free_jobinfo()"); |
| |
| return nrt_free_jobinfo((slurm_nrt_jobinfo_t *)switch_job); |
| } |
| |
| extern int switch_p_pack_jobinfo(switch_jobinfo_t *switch_job, Buf buffer) |
| { |
| if (debug_flags & DEBUG_FLAG_SWITCH) |
| info("switch_p_pack_jobinfo()"); |
| |
| return nrt_pack_jobinfo((slurm_nrt_jobinfo_t *)switch_job, buffer); |
| } |
| |
| extern int switch_p_unpack_jobinfo(switch_jobinfo_t *switch_job, Buf buffer) |
| { |
| if (debug_flags & DEBUG_FLAG_SWITCH) |
| info("switch_p_unpack_jobinfo()"); |
| |
| return nrt_unpack_jobinfo((slurm_nrt_jobinfo_t *)switch_job, buffer); |
| } |
| |
| extern int switch_p_get_jobinfo(switch_jobinfo_t *switch_job, int key, |
| void *resulting_data) |
| { |
| if (debug_flags & DEBUG_FLAG_SWITCH) |
| info("switch_p_get_jobinfo()"); |
| |
| return nrt_get_jobinfo((slurm_nrt_jobinfo_t *)switch_job, key, |
| resulting_data); |
| } |
| |
| static inline int _make_step_comp(switch_jobinfo_t *jobinfo, char *nodelist) |
| { |
| hostlist_t list = NULL; |
| int rc; |
| |
| list = hostlist_create(nodelist); |
| rc = nrt_job_step_complete((slurm_nrt_jobinfo_t *)jobinfo, list); |
| hostlist_destroy(list); |
| |
| return rc; |
| } |
| |
| extern int switch_p_job_step_complete(switch_jobinfo_t *jobinfo, |
| char *nodelist) |
| { |
| int rc; |
| |
| if (debug_flags & DEBUG_FLAG_SWITCH) |
| info("switch_p_job_step_complete()"); |
| |
| rc = _make_step_comp(jobinfo, nodelist); |
| nrt_need_state_save = true; |
| return rc; |
| } |
| |
| extern int switch_p_job_step_part_comp(switch_jobinfo_t *jobinfo, |
| char *nodelist) |
| { |
| int rc; |
| |
| if (debug_flags & DEBUG_FLAG_SWITCH) |
| info("switch_p_job_step_part_comp()"); |
| |
| rc = _make_step_comp(jobinfo, nodelist); |
| nrt_need_state_save = true; |
| return rc; |
| } |
| |
| extern bool switch_p_part_comp(void) |
| { |
| if (debug_flags & DEBUG_FLAG_SWITCH) |
| info("switch_p_part_comp()"); |
| |
| return true; |
| } |
| |
| extern int switch_p_job_step_allocated(switch_jobinfo_t *jobinfo, |
| char *nodelist) |
| { |
| hostlist_t list = NULL; |
| int rc; |
| |
| if (debug_flags & DEBUG_FLAG_SWITCH) |
| info("switch_p_job_step_allocated()"); |
| |
| list = hostlist_create(nodelist); |
| rc = nrt_job_step_allocated((slurm_nrt_jobinfo_t *)jobinfo, list); |
| hostlist_destroy(list); |
| |
| return rc; |
| } |
| |
| extern void switch_p_print_jobinfo(FILE *fp, switch_jobinfo_t *jobinfo) |
| { |
| return; |
| } |
| |
| extern char *switch_p_sprint_jobinfo(switch_jobinfo_t *switch_jobinfo, |
| char *buf, size_t size) |
| { |
| return NULL; |
| } |
| |
| /* |
| * switch functions for job initiation |
| */ |
| static bool _nrt_version_ok(void) |
| { |
| if ((NRT_VERSION >= 1100) && (NRT_VERSION <= 1200)) |
| return true; |
| error("switch/nrt: Incompatable NRT version"); |
| return false; |
| } |
| |
| int switch_p_node_init(void) |
| { |
| /* check to make sure the version of the library we compiled with |
| * matches the one dynamically linked |
| */ |
| if (!_nrt_version_ok()) { |
| slurm_seterrno_ret(EVERSION); |
| } |
| |
| return SLURM_SUCCESS; |
| } |
| |
| extern int switch_p_node_fini(void) |
| { |
| return SLURM_SUCCESS; |
| } |
| |
| extern int switch_p_job_preinit(switch_jobinfo_t *jobinfo) |
| { |
| return SLURM_SUCCESS; |
| } |
| |
| extern int switch_p_job_init (switch_jobinfo_t *jobinfo, uid_t uid, |
| char *job_name) |
| { |
| pid_t pid; |
| DEF_TIMERS; |
| int rc; |
| |
| if (debug_flags & DEBUG_FLAG_SWITCH) { |
| START_TIMER; |
| info("switch_p_job_init() starting"); |
| } |
| pid = getpid(); |
| rc = nrt_load_table((slurm_nrt_jobinfo_t *)jobinfo, uid, pid, |
| job_name); |
| if (debug_flags & DEBUG_FLAG_SWITCH) { |
| END_TIMER; |
| info("switch_p_job_init() ending %s", TIME_STR); |
| } |
| |
| return rc; |
| } |
| |
| extern int switch_p_job_suspend_test(switch_jobinfo_t *jobinfo) |
| { |
| if (debug_flags & DEBUG_FLAG_SWITCH) |
| info("switch_p_job_suspend_test() starting"); |
| return nrt_preempt_job_test((slurm_nrt_jobinfo_t *)jobinfo); |
| } |
| |
| extern void switch_p_job_suspend_info_get(switch_jobinfo_t *jobinfo, |
| void **suspend_info) |
| { |
| DEF_TIMERS; |
| |
| if ( switch_init() < 0 ) |
| return; |
| |
| if (debug_flags & DEBUG_FLAG_SWITCH) { |
| START_TIMER; |
| info("switch_p_job_suspend_info_get() starting"); |
| } |
| nrt_suspend_job_info_get((slurm_nrt_jobinfo_t *)jobinfo, suspend_info); |
| if (debug_flags & DEBUG_FLAG_SWITCH) { |
| END_TIMER; |
| info("switch_p_job_suspend_info_get() ending %s", TIME_STR); |
| } |
| |
| return; |
| } |
| |
| extern void switch_p_job_suspend_info_pack(void *suspend_info, Buf buffer) |
| { |
| if ( switch_init() < 0 ) |
| return; |
| |
| nrt_suspend_job_info_pack(suspend_info, buffer); |
| return; |
| } |
| |
| extern int switch_p_job_suspend_info_unpack(void **suspend_info, Buf buffer) |
| { |
| if ( switch_init() < 0 ) |
| return SLURM_ERROR; |
| |
| return nrt_suspend_job_info_unpack(suspend_info, buffer); |
| } |
| |
| extern void switch_p_job_suspend_info_free(void *suspend_info) |
| { |
| if ( switch_init() < 0 ) |
| return; |
| |
| nrt_suspend_job_info_free(suspend_info); |
| return; |
| } |
| |
| extern int switch_p_job_suspend(void *suspend_info, int max_wait) |
| { |
| DEF_TIMERS; |
| int rc; |
| |
| if (debug_flags & DEBUG_FLAG_SWITCH) { |
| START_TIMER; |
| info("switch_p_job_suspend() starting"); |
| } |
| rc = nrt_preempt_job(suspend_info, max_wait); |
| if (debug_flags & DEBUG_FLAG_SWITCH) { |
| END_TIMER; |
| info("switch_p_job_suspend() ending %s", TIME_STR); |
| } |
| |
| return rc; |
| } |
| |
| extern int switch_p_job_resume(void *suspend_info, int max_wait) |
| { |
| DEF_TIMERS; |
| int rc; |
| |
| if (debug_flags & DEBUG_FLAG_SWITCH) { |
| START_TIMER; |
| info("switch_p_job_resume() starting"); |
| } |
| rc = nrt_resume_job(suspend_info, max_wait); |
| if (debug_flags & DEBUG_FLAG_SWITCH) { |
| END_TIMER; |
| info("switch_p_job_resume() ending %s", TIME_STR); |
| } |
| |
| return rc; |
| } |
| |
| extern int switch_p_job_fini (switch_jobinfo_t *jobinfo) |
| { |
| return SLURM_SUCCESS; |
| } |
| |
| extern int switch_p_job_postfini(switch_jobinfo_t *jobinfo, uid_t pgid, |
| uint32_t job_id, uint32_t step_id) |
| { |
| DEF_TIMERS; |
| int err; |
| |
| if (debug_flags & DEBUG_FLAG_SWITCH) { |
| START_TIMER; |
| info("switch_p_job_postfini() starting"); |
| } |
| /* |
| * Kill all processes in the job's session |
| */ |
| if (pgid) { |
| debug2("Sending SIGKILL to pgid %lu", |
| (unsigned long) pgid); |
| kill(-pgid, SIGKILL); |
| } else |
| debug("Job %u.%u: pgid value is zero", job_id, step_id); |
| |
| err = nrt_unload_table((slurm_nrt_jobinfo_t *)jobinfo); |
| if (debug_flags & DEBUG_FLAG_SWITCH) { |
| END_TIMER; |
| info("switch_p_job_postfini() ending %s", TIME_STR); |
| } |
| |
| if (err != SLURM_SUCCESS) |
| return SLURM_ERROR; |
| return SLURM_SUCCESS; |
| } |
| |
| extern int switch_p_job_attach(switch_jobinfo_t *jobinfo, char ***env, |
| uint32_t nodeid, uint32_t procid, |
| uint32_t nnodes, uint32_t nprocs, uint32_t rank) |
| { |
| if (debug_flags & DEBUG_FLAG_SWITCH) { |
| info("switch_p_job_attach()"); |
| info("nodeid = %u", nodeid); |
| info("procid = %u", procid); |
| info("nnodes = %u", nnodes); |
| info("nprocs = %u", nprocs); |
| info("rank = %u", rank); |
| } |
| |
| return SLURM_SUCCESS; |
| } |
| |
| /* |
| * switch functions for other purposes |
| */ |
| |
| /* |
| * Linear search through table of errno values and strings, |
| * returns NULL on error, string on success. |
| */ |
| static char *_lookup_slurm_api_errtab(int errnum) |
| { |
| char *res = NULL; |
| int i; |
| |
| for (i = 0; i < sizeof(slurm_errtab) / sizeof(slurm_errtab_t); i++) { |
| if (slurm_errtab[i].xe_number == errnum) { |
| res = slurm_errtab[i].xe_message; |
| break; |
| } |
| } |
| return res; |
| } |
| |
| extern int switch_p_get_errno(void) |
| { |
| int err = slurm_get_errno(); |
| |
| if ((err >= ESLURM_SWITCH_MIN) && (err <= ESLURM_SWITCH_MAX)) |
| return err; |
| |
| return SLURM_SUCCESS; |
| } |
| |
| extern char *switch_p_strerror(int errnum) |
| { |
| char *res = _lookup_slurm_api_errtab(errnum); |
| return (res ? res : strerror(errnum)); |
| } |
| |
| |
| static void *_state_save_thread(void *arg) |
| { |
| char *dir_name = (char *)arg; |
| |
| strncpy(local_dir_path, dir_name, sizeof(local_dir_path)); |
| xfree(dir_name); |
| |
| while (1) { |
| sleep(10); |
| if (nrt_need_state_save) { |
| nrt_need_state_save = false; |
| _switch_p_libstate_save(local_dir_path, false); |
| } |
| } |
| |
| return NULL; |
| } |
| |
| static void _spawn_state_save_thread(char *dir) |
| { |
| pthread_attr_t attr; |
| pthread_t id; |
| |
| slurm_attr_init(&attr); |
| |
| if (pthread_create(&id, &attr, &_state_save_thread, (void *)dir) != 0) |
| error("Could not start switch/nrt state saving pthread"); |
| |
| slurm_attr_destroy(&attr); |
| } |