| /*****************************************************************************\ |
| * switch_elan.c - Library routines for initiating jobs on QsNet. |
| * $Id$ |
| ***************************************************************************** |
| * Copyright (C) 2003-2006 The Regents of the University of California. |
| * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). |
| * Written by Kevin Tew <tew1@llnl.gov>, et. al. |
| * UCRL-CODE-226842. |
| * |
| * This file is part of SLURM, a resource management program. |
| * For details, see <http://www.llnl.gov/linux/slurm/>. |
| * |
| * SLURM is free software; you can redistribute it and/or modify it under |
| * the terms of the GNU General Public License as published by the Free |
| * Software Foundation; either version 2 of the License, or (at your option) |
| * any later version. |
| * |
| * In addition, as a special exception, the copyright holders give permission |
| * to link the code of portions of this program with the OpenSSL library under |
| * certain conditions as described in each individual source file, and |
| * distribute linked combinations including the two. You must obey the GNU |
| * General Public License in all respects for all of the code used other than |
| * OpenSSL. If you modify file(s) with this exception, you may extend this |
| * exception to your version of the file(s), but you are not obligated to do |
| * so. If you do not wish to do so, delete this exception statement from your |
| * version. If you delete this exception statement from all source files in |
| * the program, then also delete it here. |
| * |
| * SLURM is distributed in the hope that it will be useful, but WITHOUT ANY |
| * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS |
| * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more |
| * details. |
| * |
| * You should have received a copy of the GNU General Public License along |
| * with SLURM; if not, write to the Free Software Foundation, Inc., |
| * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. |
| \*****************************************************************************/ |
| |
| #if HAVE_CONFIG_H |
| # include "config.h" |
| #endif |
| |
| #ifdef WITH_PTHREADS |
| # include <pthread.h> |
| #endif /* WITH_PTHREADS */ |
| |
| #include <fcntl.h> |
| #include <signal.h> |
| #include <stdlib.h> |
| #include <sys/stat.h> |
| #include <sys/types.h> |
| #include <unistd.h> |
| #include <dlfcn.h> |
| |
| #include <slurm/slurm_errno.h> |
| |
| #include "src/common/slurm_xlator.h" |
| |
| #include "src/plugins/switch/elan/qsw.h" |
| |
| #define BUFFER_SIZE 1024 |
| #define QSW_STATE_VERSION "VER001" |
| |
| /* |
| * Static prototypes for network error resolver creation: |
| */ |
| static int _set_elan_ids(void); |
| static void *_neterr_thr(void *arg); |
| |
| static int neterr_retval = 0; |
| static pthread_t neterr_tid = 0; |
| static pthread_mutex_t neterr_mutex = PTHREAD_MUTEX_INITIALIZER; |
| static pthread_cond_t neterr_cond = PTHREAD_COND_INITIALIZER; |
| |
| /* Type for error string table entries */ |
| typedef struct { |
| int xe_number; |
| char *xe_message; |
| } slurm_errtab_t; |
| |
| static slurm_errtab_t slurm_errtab[] = { |
| {0, "No error"}, |
| {-1, "Unspecified error"}, |
| |
| /* Quadrics Elan routine error codes */ |
| |
| { ENOSLURM, /* oh no! */ |
| "Out of slurm" }, |
| { EBADMAGIC_QSWLIBSTATE, |
| "Bad magic in QSW libstate" }, |
| { EBADMAGIC_QSWJOBINFO, |
| "Bad magic in QSW jobinfo" }, |
| { EINVAL_PRGCREATE, |
| "Program identifier in use or CPU count invalid, try again" }, |
| { ECHILD_PRGDESTROY, |
| "Processes belonging to this program are still running" }, |
| { EEXIST_PRGDESTROY, |
| "Program identifier does not exist" }, |
| { EELAN3INIT, |
| "Too many processes using Elan or mapping failure" }, |
| { EELAN3CONTROL, |
| "Could not open elan3 control device" }, |
| { EELAN3CREATE, |
| "Could not create elan capability" }, |
| { ESRCH_PRGADDCAP, |
| "Program does not exist (addcap)" }, |
| { EFAULT_PRGADDCAP, |
| "Capability has invalid address (addcap)" }, |
| { EINVAL_SETCAP, |
| "Invalid context number (setcap)" }, |
| { EFAULT_SETCAP, |
| "Capability has invalid address (setcap)" }, |
| { EGETNODEID, |
| "Cannot determine local elan address" }, |
| { EGETNODEID_BYHOST, |
| "Cannot translate hostname to elan address" }, |
| { EGETHOST_BYNODEID, |
| "Cannot translate elan address to hostname" }, |
| { ESRCH_PRGSIGNAL, |
| "No such program identifier" }, |
| { EINVAL_PRGSIGNAL, |
| "Invalid signal number" } |
| }; |
| |
| /* |
| * These variables are required by the generic plugin interface. If they |
| * are not found in the plugin, the plugin loader will ignore it. |
| * |
| * plugin_name - a string giving a human-readable description of the |
| * plugin. There is no maximum length, but the symbol must refer to |
| * a valid string. |
| * |
| * plugin_type - a string suggesting the type of the plugin or its |
| * applicability to a particular form of data or method of data handling. |
| * If the low-level plugin API is used, the contents of this string are |
| * unimportant and may be anything. SLURM uses the higher-level plugin |
| * interface which requires this string to be of the form |
| * |
| * <application>/<method> |
| * |
| * where <application> is a description of the intended application of |
| * the plugin (e.g., "switch" for SLURM switch) and <method> is a description |
| * of how this plugin satisfies that application. SLURM will only load |
| * a switch plugin if the plugin_type string has a prefix of "switch/". |
| * |
| * plugin_version - an unsigned 32-bit integer giving the version number |
| * of the plugin. If major and minor revisions are desired, the major |
| * version number may be multiplied by a suitable magnitude constant such |
| * as 100 or 1000. Various SLURM versions will likely require a certain |
| * minimum versions for their plugins as this API matures. |
| */ |
| const char plugin_name[] = "switch Quadrics Elan3 or Elan4 plugin"; |
| const char plugin_type[] = "switch/elan"; |
| const uint32_t plugin_version = 90; |
| |
| /* |
| * init() is called when the plugin is loaded, before any other functions |
| * are called. Put global initialization here. |
| */ |
| int init ( void ) |
| { |
| #ifdef HAVE_FRONT_END |
| fatal("Plugin switch/elan is incompatable with front-end configuration"); |
| #endif |
| verbose("%s loaded", plugin_name); |
| return SLURM_SUCCESS; |
| } |
| |
| int fini ( void ) |
| { |
| return SLURM_SUCCESS; |
| } |
| |
| /* |
| * switch functions for global state save/restore |
| */ |
| int switch_p_libstate_save (char *dir_name) |
| { |
| int error_code = SLURM_SUCCESS; |
| qsw_libstate_t old_state = NULL; |
| Buf buffer = NULL; |
| int state_fd; |
| char *file_name; |
| |
| if (qsw_alloc_libstate(&old_state)) |
| return SLURM_ERROR; |
| qsw_fini(old_state); |
| buffer = init_buf(1024); |
| packstr(QSW_STATE_VERSION, buffer); |
| (void) qsw_pack_libstate(old_state, buffer); |
| file_name = xstrdup(dir_name); |
| xstrcat(file_name, "/qsw_state"); |
| (void) unlink(file_name); |
| state_fd = creat (file_name, 0600); |
| if (state_fd == 0) { |
| error ("Can't save state, error creating file %s %m", |
| file_name); |
| error_code = SLURM_ERROR; |
| } else { |
| char *buf = get_buf_data(buffer); |
| size_t len = get_buf_offset(buffer); |
| while(1) { |
| int wrote = write (state_fd, buf, len); |
| if ((wrote < 0) && (errno == EINTR)) |
| continue; |
| if (wrote == 0) |
| break; |
| if (wrote < 0) { |
| error ("Can't save switch state: %m"); |
| error_code = SLURM_ERROR; |
| break; |
| } |
| buf += wrote; |
| len -= wrote; |
| } |
| close (state_fd); |
| } |
| xfree(file_name); |
| |
| if (buffer) |
| free_buf(buffer); |
| if (old_state) |
| qsw_free_libstate(old_state); |
| |
| return error_code; |
| } |
| |
| int switch_p_libstate_restore (char *dir_name, bool recover) |
| { |
| char *data = NULL, *file_name; |
| qsw_libstate_t old_state = NULL; |
| Buf buffer = NULL; |
| int error_code = SLURM_SUCCESS; |
| int state_fd, data_allocated = 0, data_read = 0, data_size = 0; |
| char *ver_str = NULL; |
| uint16_t ver_str_len; |
| |
| if (!recover) /* clean start, no recovery */ |
| return qsw_init(NULL); |
| |
| file_name = xstrdup(dir_name); |
| xstrcat(file_name, "/qsw_state"); |
| state_fd = open (file_name, O_RDONLY); |
| if (state_fd >= 0) { |
| data_allocated = BUFFER_SIZE; |
| data = xmalloc(data_allocated); |
| while (1) { |
| data_read = read (state_fd, &data[data_size], |
| BUFFER_SIZE); |
| if ((data_read < 0) && (errno == EINTR)) |
| continue; |
| if (data_read < 0) { |
| error ("Read error on %s, %m", file_name); |
| error_code = SLURM_ERROR; |
| break; |
| } else if (data_read == 0) |
| break; |
| data_size += data_read; |
| data_allocated += data_read; |
| xrealloc(data, data_allocated); |
| } |
| close (state_fd); |
| xfree(file_name); |
| } else { |
| error("No %s file for QSW state recovery", file_name); |
| error("Starting QSW with clean state"); |
| xfree(file_name); |
| return qsw_init(NULL); |
| } |
| |
| if (error_code == SLURM_SUCCESS) { |
| buffer = create_buf (data, data_size); |
| data = NULL; /* now in buffer, don't xfree() */ |
| if (buffer && (size_buf(buffer) >= sizeof(uint16_t) + |
| strlen(QSW_STATE_VERSION))) { |
| char *ptr = get_buf_data(buffer); |
| |
| if (!memcmp(&ptr[sizeof(uint16_t)], |
| QSW_STATE_VERSION, 3)) { |
| unpackstr_xmalloc(&ver_str, &ver_str_len, |
| buffer); |
| debug3("qsw_state file version: %s", ver_str); |
| } |
| } |
| } |
| |
| if (ver_str && (strcmp(ver_str, QSW_STATE_VERSION) == 0)) { |
| if ((qsw_alloc_libstate(&old_state)) |
| || (qsw_unpack_libstate(old_state, buffer) < 0)) |
| error_code = SLURM_ERROR; |
| } else |
| error("qsw_state file is in an unsupported format, ignored"); |
| |
| if (buffer) |
| free_buf(buffer); |
| xfree(data); |
| xfree(ver_str); |
| |
| if (error_code == SLURM_SUCCESS) |
| error_code = qsw_init(old_state); |
| if (old_state) |
| qsw_free_libstate(old_state); |
| |
| return error_code; |
| } |
| |
| int switch_p_libstate_clear ( void ) |
| { |
| return qsw_clear(); |
| } |
| |
| |
| bool switch_p_no_frag ( void ) |
| { |
| return true; |
| } |
| |
| /* |
| * switch functions for job step specific credential |
| */ |
| int switch_p_alloc_jobinfo(switch_jobinfo_t *jp) |
| { |
| return qsw_alloc_jobinfo((qsw_jobinfo_t *)jp); |
| } |
| |
| int switch_p_build_jobinfo ( switch_jobinfo_t switch_job, char *nodelist, |
| uint16_t *tasks_per_node, int cyclic_alloc, char *network) |
| { |
| int node_set_size = QSW_MAX_TASKS; /* overkill but safe */ |
| hostlist_t host_list; |
| char *this_node_name; |
| bitstr_t *nodeset; |
| int node_id, error_code = SLURM_SUCCESS; |
| int i, nnodes, ntasks = 0; |
| |
| if (!tasks_per_node) { |
| slurm_seterrno(ENOMEM); |
| return SLURM_ERROR; |
| } |
| |
| if ((host_list = hostlist_create(nodelist)) == NULL) |
| fatal("hostlist_create(%s): %m", nodelist); |
| |
| nnodes = hostlist_count(host_list); |
| for (i = 0; i < nnodes; i++) |
| ntasks += tasks_per_node[i]; |
| |
| if (ntasks > node_set_size) { |
| slurm_seterrno(ESLURM_BAD_TASK_COUNT); |
| hostlist_destroy(host_list); |
| return SLURM_ERROR; |
| } |
| |
| if ((nodeset = bit_alloc (node_set_size)) == NULL) |
| fatal("bit_alloc: %m"); |
| |
| while ((this_node_name = hostlist_shift(host_list))) { |
| node_id = qsw_getnodeid_byhost(this_node_name); |
| if (node_id >= 0) |
| bit_set(nodeset, node_id); |
| else { |
| error("qsw_getnodeid_byhost(%s) failure", |
| this_node_name); |
| slurm_seterrno(ESLURM_INTERCONNECT_FAILURE); |
| error_code = SLURM_ERROR; |
| } |
| free(this_node_name); |
| } |
| hostlist_destroy(host_list); |
| |
| if (error_code == SLURM_SUCCESS) { |
| qsw_jobinfo_t j = (qsw_jobinfo_t) switch_job; |
| error_code = qsw_setup_jobinfo(j, ntasks, nodeset, |
| tasks_per_node, cyclic_alloc); |
| /* allocs hw context */ |
| } |
| |
| bit_free(nodeset); |
| return error_code; |
| } |
| |
| switch_jobinfo_t switch_p_copy_jobinfo(switch_jobinfo_t j) |
| { |
| return (switch_jobinfo_t) qsw_copy_jobinfo((qsw_jobinfo_t) j); |
| } |
| |
| void switch_p_free_jobinfo(switch_jobinfo_t k) |
| { |
| qsw_free_jobinfo((qsw_jobinfo_t) k); |
| } |
| |
| int switch_p_pack_jobinfo(switch_jobinfo_t k, Buf buffer) |
| { |
| return qsw_pack_jobinfo((qsw_jobinfo_t) k, buffer); |
| } |
| |
| int switch_p_unpack_jobinfo(switch_jobinfo_t k, Buf buffer) |
| { |
| return qsw_unpack_jobinfo((qsw_jobinfo_t) k, buffer); |
| } |
| |
| void switch_p_print_jobinfo(FILE *fp, switch_jobinfo_t jobinfo) |
| { |
| qsw_print_jobinfo(fp, (qsw_jobinfo_t) jobinfo); |
| } |
| |
| char *switch_p_sprint_jobinfo(switch_jobinfo_t switch_jobinfo, char *buf, |
| size_t size) |
| { |
| return qsw_capability_string((struct qsw_jobinfo *) switch_jobinfo, |
| buf, size); |
| } |
| |
| /* |
| * switch functions for job initiation |
| */ |
| |
| static int _have_elan3 (void) |
| { |
| #if HAVE_LIBELAN3 |
| return (1); |
| #else |
| struct stat st; |
| |
| if (stat ("/proc/qsnet/elan3/device0", &st) < 0) |
| return (0); |
| |
| return (1); |
| #endif /* HAVE_LIBELAN3 */ |
| return (0); |
| } |
| |
| /* Initialize node for use of the Elan interconnect by loading |
| * elanid/hostname pairs then spawning the Elan network error |
| * resolver thread. |
| * |
| * Main thread waits for neterr thread to successfully start before |
| * continuing. |
| */ |
| int switch_p_node_init ( void ) |
| { |
| pthread_attr_t attr; |
| |
| /* |
| * Only need to run neterr resolver thread on Elan3 systems. |
| */ |
| if (!_have_elan3 ()) return SLURM_SUCCESS; |
| |
| /* |
| * Load neterr elanid/hostname values into kernel |
| */ |
| if (_set_elan_ids() < 0) |
| return SLURM_ERROR; |
| |
| slurm_attr_init(&attr); |
| if (pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED)) |
| error("pthread_attr_setdetachstate: %m"); |
| |
| slurm_mutex_lock(&neterr_mutex); |
| |
| if (pthread_create(&neterr_tid, &attr, _neterr_thr, NULL)) { |
| error("pthread_create: %m"); |
| slurm_attr_destroy(&attr); |
| return SLURM_ERROR; |
| } |
| slurm_attr_destroy(&attr); |
| |
| /* |
| * Wait for successful startup of neterr thread before |
| * returning control to slurmd. |
| */ |
| pthread_cond_wait(&neterr_cond, &neterr_mutex); |
| pthread_mutex_unlock(&neterr_mutex); |
| |
| return neterr_retval; |
| |
| |
| return SLURM_SUCCESS; |
| } |
| |
| /* |
| * Use dlopen(3) for libelan3.so (when needed) |
| * This allows us to build a single version of the elan plugin |
| * for Elan3 and Elan4 on QsNetII systems. |
| */ |
| static void *elan3h = NULL; |
| |
| /* |
| * * Wrapper functions for needed libelan3 functions |
| * */ |
| static int _elan3_init_neterr_svc (int dbglvl) |
| { |
| static int (*init_svc) (int); |
| |
| if (!(init_svc = dlsym (elan3h, "elan3_init_neterr_svc"))) |
| return (0); |
| |
| return (init_svc (dbglvl)); |
| } |
| |
| |
| static int _elan3_register_neterr_svc (void) |
| { |
| static int (*reg_svc) (void); |
| |
| if (!(reg_svc = dlsym (elan3h, "elan3_register_neterr_svc"))) |
| return (0); |
| |
| return (reg_svc ()); |
| } |
| |
| static int _elan3_run_neterr_svc (void) |
| { |
| static int (*run_svc) (); |
| |
| if (!(run_svc = dlsym (elan3h, "elan3_run_neterr_svc"))) |
| return (0); |
| |
| return (run_svc ()); |
| } |
| |
| |
| static int _elan3_load_neterr_svc (int i, char *host) |
| { |
| static int (*load_svc) (int, char *); |
| |
| if (!(load_svc = dlsym (elan3h, "elan3_load_neterr_svc"))) |
| return (0); |
| |
| return (load_svc (i, host)); |
| } |
| |
| static void *_neterr_thr(void *arg) |
| { |
| debug3("Starting Elan network error resolver thread"); |
| |
| if (!(elan3h = dlopen ("libelan3.so", RTLD_LAZY))) { |
| error ("Unable to open libelan3.so: %s", dlerror ()); |
| goto fail; |
| } |
| |
| if (!_elan3_init_neterr_svc(0)) { |
| error("elan3_init_neterr_svc: %m"); |
| goto fail; |
| } |
| |
| /* |
| * Attempt to register the neterr svc thread. If the address |
| * cannot be bound, then there is already a thread running, and |
| * we should just exit with success. |
| */ |
| if (!_elan3_register_neterr_svc()) { |
| if (errno != EADDRINUSE) { |
| error("elan3_register_neterr_svc: %m"); |
| goto fail; |
| } |
| info("Warning: Elan error resolver thread already running"); |
| } |
| |
| /* |
| * Signal main thread that we've successfully initialized |
| */ |
| slurm_mutex_lock(&neterr_mutex); |
| neterr_retval = 0; |
| pthread_cond_signal(&neterr_cond); |
| slurm_mutex_unlock(&neterr_mutex); |
| |
| /* |
| * Run the network error resolver thread. This should |
| * never return. If it does, there's not much we can do |
| * about it. |
| */ |
| _elan3_run_neterr_svc(); |
| |
| return NULL; |
| |
| fail: |
| slurm_mutex_lock(&neterr_mutex); |
| neterr_retval = SLURM_FAILURE; |
| pthread_cond_signal(&neterr_cond); |
| slurm_mutex_unlock(&neterr_mutex); |
| |
| return NULL; |
| } |
| |
| /* |
| * Called from slurmd just before termination. |
| * We don't really need to do anything special for Elan, but |
| * we'll call pthread_cancel() on the neterr resolver thread anyhow. |
| */ |
| extern int switch_p_node_fini ( void ) |
| { |
| #if HAVE_LIBELAN3 |
| int i; |
| |
| if (!neterr_tid) |
| return SLURM_SUCCESS; |
| |
| for (i=0; i<4; i++) { |
| if (pthread_cancel(neterr_tid)) { |
| neterr_tid = 0; |
| return SLURM_SUCCESS; |
| } |
| usleep(1000); |
| } |
| error("Could not kill switch elan pthread"); |
| return SLURM_ERROR; |
| #else /* !HAVE_LIBELAN3 */ |
| |
| return SLURM_SUCCESS; |
| #endif /* HAVE_LIBELAN3 */ |
| } |
| |
| int switch_p_job_preinit ( switch_jobinfo_t jobinfo ) |
| { |
| return SLURM_SUCCESS; |
| } |
| |
| /* |
| * prepare node for interconnect use |
| */ |
| int switch_p_job_init ( switch_jobinfo_t jobinfo, uid_t uid ) |
| { |
| char buf[4096]; |
| |
| debug2("calling qsw_prog_init from process %lu", |
| (unsigned long) getpid()); |
| verbose("ELAN: %s", qsw_capability_string( |
| (qsw_jobinfo_t)jobinfo, buf, 4096)); |
| |
| if (qsw_prog_init((qsw_jobinfo_t)jobinfo, uid) < 0) { |
| /* |
| * Check for EBADF, which probably means the rms |
| * kernel module is not loaded. |
| */ |
| if (errno == EBADF) |
| error("Initializing interconnect: " |
| "is the rms kernel module loaded?"); |
| else |
| error ("qsw_prog_init: %m"); |
| |
| qsw_print_jobinfo(log_fp(), (qsw_jobinfo_t)jobinfo); |
| return SLURM_ERROR; |
| } |
| |
| return SLURM_SUCCESS; |
| } |
| |
| int switch_p_job_fini ( switch_jobinfo_t jobinfo ) |
| { |
| qsw_prog_fini((qsw_jobinfo_t)jobinfo); |
| return SLURM_SUCCESS; |
| } |
| |
| int switch_p_job_postfini ( switch_jobinfo_t jobinfo, uid_t pgid, |
| uint32_t job_id, uint32_t step_id ) |
| { |
| return SLURM_SUCCESS; |
| } |
| |
| int switch_p_job_attach ( switch_jobinfo_t jobinfo, char ***env, |
| uint32_t nodeid, uint32_t procid, uint32_t nnodes, |
| uint32_t nprocs, uint32_t rank ) |
| { |
| int id = -1; |
| debug3("nodeid=%lu nnodes=%lu procid=%lu nprocs=%lu rank=%lu", |
| (unsigned long) nodeid, (unsigned long) nnodes, |
| (unsigned long) procid, (unsigned long) nprocs, |
| (unsigned long) rank); |
| debug3("setting capability in process %lu", |
| (unsigned long) getpid()); |
| if (qsw_setcap((qsw_jobinfo_t) jobinfo, (int) procid) < 0) { |
| error("qsw_setcap: %m"); |
| return SLURM_ERROR; |
| } |
| |
| if (slurm_setenvpf(env, "RMS_RANK", "%lu", (unsigned long) rank ) |
| < 0) |
| return SLURM_ERROR; |
| if (slurm_setenvpf(env, "RMS_NODEID", "%lu", (unsigned long) nodeid) |
| < 0) |
| return SLURM_ERROR; |
| if (slurm_setenvpf(env, "RMS_PROCID", "%lu", (unsigned long) rank ) |
| < 0) |
| return SLURM_ERROR; |
| if (slurm_setenvpf(env, "RMS_NNODES", "%lu", (unsigned long) nnodes) |
| < 0) |
| return SLURM_ERROR; |
| if (slurm_setenvpf(env, "RMS_NPROCS", "%lu", (unsigned long) nprocs) |
| < 0) |
| return SLURM_ERROR; |
| |
| /* |
| * Tell libelan the key to use for Elan state shmem segment |
| */ |
| if (qsw_statkey ((qsw_jobinfo_t) jobinfo, &id) >= 0) |
| slurm_setenvpf (env, "ELAN_STATKEY", "%d", id); |
| |
| return SLURM_SUCCESS; |
| } |
| |
| extern int switch_p_get_jobinfo(switch_jobinfo_t switch_job, |
| int key, void *resulting_data) |
| { |
| slurm_seterrno(EINVAL); |
| return SLURM_ERROR; |
| } |
| |
| static int |
| _set_elan_ids(void) |
| { |
| int i; |
| |
| for (i = 0; i <= qsw_maxnodeid(); i++) { |
| char host[256]; |
| if (qsw_gethost_bynodeid(host, 256, i) < 0) |
| continue; |
| |
| if (_elan3_load_neterr_svc(i, host) < 0) |
| error("elan3_load_neterr_svc(%d, %s): %m", i, host); |
| } |
| |
| return SLURM_SUCCESS; |
| } |
| |
| |
| /* |
| * Linear search through table of errno values and strings, |
| * returns NULL on error, string on success. |
| */ |
| static char *_lookup_slurm_api_errtab(int errnum) |
| { |
| char *res = NULL; |
| int i; |
| |
| for (i = 0; i < sizeof(slurm_errtab) / sizeof(slurm_errtab_t); i++) { |
| if (slurm_errtab[i].xe_number == errnum) { |
| res = slurm_errtab[i].xe_message; |
| break; |
| } |
| } |
| return res; |
| } |
| |
| extern int switch_p_get_errno(void) |
| { |
| int err = slurm_get_errno(); |
| |
| if ((err >= ESLURM_SWITCH_MIN) && (err <= ESLURM_SWITCH_MAX)) |
| return err; |
| |
| return SLURM_SUCCESS; |
| } |
| |
| extern char *switch_p_strerror(int errnum) |
| { |
| char *res = _lookup_slurm_api_errtab(errnum); |
| return (res ? res : strerror(errnum)); |
| } |
| |
| /* |
| * node switch state monitoring functions |
| * required for IBM Federation switch |
| */ |
| extern int switch_p_clear_node_state(void) |
| { |
| return SLURM_SUCCESS; |
| } |
| |
| extern int switch_p_alloc_node_info(switch_node_info_t *switch_node) |
| { |
| return SLURM_SUCCESS; |
| } |
| |
| extern int switch_p_build_node_info(switch_node_info_t switch_node) |
| { |
| return SLURM_SUCCESS; |
| } |
| |
| extern int switch_p_pack_node_info(switch_node_info_t switch_node, |
| Buf buffer) |
| { |
| return SLURM_SUCCESS; |
| } |
| |
| extern int switch_p_unpack_node_info(switch_node_info_t switch_node, |
| Buf buffer) |
| { |
| return SLURM_SUCCESS; |
| } |
| |
| extern int switch_p_free_node_info(switch_node_info_t *switch_node) |
| { |
| return SLURM_SUCCESS; |
| } |
| |
| extern char*switch_p_sprintf_node_info(switch_node_info_t switch_node, |
| char *buf, size_t size) |
| { |
| if ((buf != NULL) && size) { |
| buf[0] = '\0'; |
| return buf; |
| } |
| |
| return NULL; |
| } |
| |
| extern int switch_p_job_step_complete(switch_jobinfo_t jobinfo, |
| char *nodelist) |
| { |
| qsw_teardown_jobinfo((qsw_jobinfo_t) jobinfo); /* frees hw context */ |
| |
| return SLURM_SUCCESS; |
| } |
| |
| extern int switch_p_job_step_part_comp(switch_jobinfo_t jobinfo, |
| char *nodelist) |
| { |
| return SLURM_SUCCESS; |
| } |
| |
| extern bool switch_p_part_comp(void) |
| { |
| return false; |
| } |
| |
| extern int switch_p_job_step_allocated(switch_jobinfo_t jobinfo, char *nodelist) |
| { |
| return qsw_restore_jobinfo((qsw_jobinfo_t) jobinfo); |
| } |
| |
| extern int switch_p_slurmctld_init( void ) |
| { |
| return SLURM_SUCCESS; |
| } |
| |
| extern int switch_p_slurmd_init( void ) |
| { |
| return SLURM_SUCCESS; |
| } |
| |
| extern int switch_p_slurmd_step_init( void ) |
| { |
| return SLURM_SUCCESS; |
| } |