| /*****************************************************************************\ |
| * mpi_cray_shasta.c - Cray Shasta MPI plugin |
| ***************************************************************************** |
| * Copyright 2019,2022-2023 Hewlett Packard Enterprise Development LP |
| * Written by David Gloe <dgloe@cray.com> |
| * |
| * This file is part of Slurm, a resource management program. |
| * For details, see <https://slurm.schedmd.com/>. |
| * Please also read the included file: DISCLAIMER. |
| * |
| * Slurm is free software; you can redistribute it and/or modify it under |
| * the terms of the GNU General Public License as published by the Free |
| * Software Foundation; either version 2 of the License, or (at your option) |
| * any later version. |
| * |
| * In addition, as a special exception, the copyright holders give permission |
| * to link the code of portions of this program with the OpenSSL library under |
| * certain conditions as described in each individual source file, and |
| * distribute linked combinations including the two. You must obey the GNU |
| * General Public License in all respects for all of the code used other than |
| * OpenSSL. If you modify file(s) with this exception, you may extend this |
| * exception to your version of the file(s), but you are not obligated to do |
| * so. If you do not wish to do so, delete this exception statement from your |
| * version. If you delete this exception statement from all source files in |
| * the program, then also delete it here. |
| * |
| * Slurm is distributed in the hope that it will be useful, but WITHOUT ANY |
| * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS |
| * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more |
| * details. |
| * |
| * You should have received a copy of the GNU General Public License along |
| * with Slurm; if not, write to the Free Software Foundation, Inc., |
| * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. |
| \*****************************************************************************/ |
| |
| #include "config.h" |
| |
| #include <fcntl.h> |
| #include <inttypes.h> |
| #include <limits.h> |
| #include <signal.h> |
| #ifdef HAVE_GETRANDOM |
| #include <sys/random.h> |
| #endif |
| #include <sys/stat.h> |
| #include <sys/types.h> |
| #include <unistd.h> |
| |
| #include "slurm/slurm_errno.h" |
| #include "src/common/slurm_xlator.h" |
| |
| #include "src/common/env.h" |
| #include "src/common/fd.h" |
| #include "src/common/parse_config.h" |
| #include "src/common/read_config.h" |
| #include "src/interfaces/mpi.h" |
| #include "src/common/xstring.h" |
| #include "src/common/xmalloc.h" |
| #include "src/slurmd/slurmstepd/slurmstepd_job.h" |
| |
| #include "apinfo.h" |
| |
| /* |
| * These variables are required by the generic plugin interface. If they |
| * are not found in the plugin, the plugin loader will ignore it. |
| * |
| * plugin_name - a string giving a human-readable description of the |
| * plugin. There is no maximum length, but the symbol must refer to |
| * a valid string. |
| * |
| * plugin_type - a string suggesting the type of the plugin or its |
| * applicability to a particular form of data or method of data handling. |
| * If the low-level plugin API is used, the contents of this string are |
| * unimportant and may be anything. Slurm uses the higher-level plugin |
| * interface which requires this string to be of the form |
| * |
| * <application>/<method> |
| * |
| * where <application> is a description of the intended application of |
| * the plugin (e.g., "switch" for Slurm switch) and <method> is a description |
| * of how this plugin satisfies that application. Slurm will only load |
| * a switch plugin if the plugin_type string has a prefix of "switch/". |
| * |
| * plugin_version - an unsigned 32-bit integer containing the Slurm version |
| * (major.minor.micro combined into a single number). |
| */ |
| const char plugin_name[] = "mpi Cray Shasta plugin"; |
| const char plugin_type[] = "mpi/cray_shasta"; |
| const uint32_t plugin_id = MPI_PLUGIN_CRAY_SHASTA; |
| const uint32_t plugin_version = SLURM_VERSION_NUMBER; |
| |
| /* Environment variables available for applications */ |
| #define PALS_APID_ENV "PALS_APID" |
| #define PALS_APINFO_ENV "PALS_APINFO" |
| #define PALS_LOCAL_RANKID_ENV "PALS_LOCAL_RANKID" |
| #define PALS_NODEID_ENV "PALS_NODEID" |
| #define PALS_RANKID_ENV "PALS_RANKID" |
| #define PALS_SPOOL_DIR_ENV "PALS_SPOOL_DIR" |
| |
| #define PMI_JOBID_ENV "PMI_JOBID" |
| #define PMI_LOCAL_RANK_ENV "PMI_LOCAL_RANK" |
| #define PMI_LOCAL_SIZE_ENV "PMI_LOCAL_SIZE" |
| #define PMI_RANK_ENV "PMI_RANK" |
| #define PMI_SIZE_ENV "PMI_SIZE" |
| #define PMI_UNIVERSE_SIZE_ENV "PMI_UNIVERSE_SIZE" |
| #define PMI_SHARED_SECRET_ENV "PMI_SHARED_SECRET" |
| |
| /* GLOBAL vars */ |
| char *appdir = NULL; // Application-specific spool directory |
| char *apinfo = NULL; // Application PMI file |
| |
| /* |
| * Create the Cray MPI directory under the slurmd spool directory |
| */ |
| static int _create_mpi_dir(const char *spool) |
| { |
| char *mpidir = NULL; |
| int rc = SLURM_SUCCESS; |
| |
| mpidir = xstrdup_printf("%s/%s", spool, MPI_CRAY_DIR); |
| if ((mkdir(mpidir, 0755) == -1) && (errno != EEXIST)) { |
| error("%s: Couldn't create Cray MPI directory %s: %m", |
| plugin_type, mpidir); |
| rc = SLURM_ERROR; |
| } |
| xfree(mpidir); |
| |
| return rc; |
| } |
| |
| /* |
| * Create the application-specific directory under the Cray MPI directory |
| */ |
| static int _create_app_dir(const stepd_step_rec_t *step, const char *spool) |
| { |
| xfree(appdir); |
| // Format the directory name |
| appdir = xstrdup_printf("%s/%s/%u.%u", |
| spool, MPI_CRAY_DIR, |
| step->step_id.job_id, step->step_id.step_id); |
| |
| // Create the directory |
| if ((mkdir(appdir, 0700) == -1) && (errno != EEXIST)) { |
| error("%s: Couldn't create directory %s: %m", |
| plugin_type, appdir); |
| goto error; |
| } |
| |
| // Change directory owner |
| if (chown(appdir, step->uid, step->gid) == -1) { |
| error("%s: Couldn't change directory %s owner: %m", |
| plugin_type, appdir); |
| goto error; |
| } |
| |
| debug("%s: Created application directory %s", plugin_type, appdir); |
| return SLURM_SUCCESS; |
| |
| error: |
| if (rmdir(appdir) < 0) |
| error("rmdir(%s): %m", appdir); |
| xfree(appdir); |
| return SLURM_ERROR; |
| } |
| |
| /* |
| * Set the PMI port to use in the application's environment |
| */ |
| static void _set_pmi_port(char ***env) |
| { |
| char *resv_ports = NULL; |
| char *endp = NULL; |
| unsigned long pmi_port = 0; |
| |
| if (!(resv_ports = getenvp(*env, "SLURM_STEP_RESV_PORTS"))) |
| return; |
| |
| // Get the first port from the range |
| errno = 0; |
| pmi_port = strtoul(resv_ports, &endp, 10); |
| if ((errno != 0) || (pmi_port > 65535) || |
| ((*endp != '-') && (*endp != ',') && (*endp != '\0'))) { |
| error("%s: Couldn't parse reserved ports %s", |
| plugin_type, resv_ports); |
| return; |
| } |
| |
| env_array_overwrite_fmt(env, "PMI_CONTROL_PORT", "%lu", pmi_port); |
| } |
| |
| extern int mpi_p_slurmstepd_prefork(const stepd_step_rec_t *step, char ***env) |
| { |
| /* do the node_name substitution once */ |
| char *spool = slurm_conf_expand_slurmd_path(slurm_conf.slurmd_spooldir, |
| step->node_name, |
| step->node_name); |
| |
| // Set up spool directory and apinfo |
| if (_create_mpi_dir(spool) == SLURM_ERROR || |
| _create_app_dir(step, spool) == SLURM_ERROR || |
| create_apinfo(step, spool) == SLURM_ERROR) { |
| xfree(spool); |
| return SLURM_ERROR; |
| } |
| |
| xfree(spool); |
| |
| return SLURM_SUCCESS; |
| } |
| |
| extern int mpi_p_slurmstepd_task(const mpi_task_info_t *mpi_task, char ***env) |
| { |
| // Set environment variables |
| env_array_overwrite_fmt(env, PALS_APID_ENV, "%u.%u", |
| mpi_task->step_id.job_id, |
| mpi_task->step_id.step_id); |
| env_array_overwrite_fmt(env, PALS_APINFO_ENV, "%s", apinfo); |
| env_array_overwrite_fmt(env, PALS_LOCAL_RANKID_ENV, "%u", |
| mpi_task->ltaskid); |
| env_array_overwrite_fmt(env, PALS_NODEID_ENV, "%u", mpi_task->nodeid); |
| env_array_overwrite_fmt(env, PALS_RANKID_ENV, "%u", mpi_task->gtaskid); |
| env_array_overwrite_fmt(env, PALS_SPOOL_DIR_ENV, "%s", appdir); |
| |
| env_array_overwrite_fmt(env, PMI_JOBID_ENV, "%u", |
| mpi_task->step_id.job_id); |
| env_array_overwrite_fmt(env, PMI_LOCAL_RANK_ENV, "%u", |
| mpi_task->ltaskid); |
| env_array_overwrite_fmt(env, PMI_LOCAL_SIZE_ENV, "%u", |
| mpi_task->ltasks); |
| env_array_overwrite_fmt(env, PMI_RANK_ENV, "%u", mpi_task->gtaskid); |
| env_array_overwrite_fmt(env, PMI_SIZE_ENV, "%u", mpi_task->ntasks); |
| env_array_overwrite_fmt(env, PMI_UNIVERSE_SIZE_ENV, "%u", |
| mpi_task->ntasks); |
| |
| _set_pmi_port(env); |
| |
| return SLURM_SUCCESS; |
| } |
| |
| extern mpi_plugin_client_state_t * |
| mpi_p_client_prelaunch(const mpi_step_info_t *mpi_step, char ***env) |
| { |
| #ifdef HAVE_GETRANDOM |
| static uint64_t shared_secret = 0; |
| static pthread_mutex_t shared_secret_mutex = PTHREAD_MUTEX_INITIALIZER; |
| |
| slurm_mutex_lock(&shared_secret_mutex); |
| |
| /* |
| * Get a non-zero pseudo-random value. getrandom() is guaranteed to |
| * return up to 256 bytes uninturrupted. The only error we might expect |
| * here is ENOSYS, indicating that the kernel does not implement the |
| * getrandom() system call. getrandom() should be present on all |
| * supported cray systems. |
| */ |
| |
| if (!shared_secret && |
| getrandom(&shared_secret, sizeof(shared_secret), 0) < 0) { |
| error("%s: getrandom() failed: %m", __func__); |
| slurm_mutex_unlock(&shared_secret_mutex); |
| return NULL; |
| } |
| |
| /* Set PMI_SHARED_SECRET for PMI authentication */ |
| env_array_overwrite_fmt(env, PMI_SHARED_SECRET_ENV, "%"PRIu64, |
| shared_secret); |
| |
| slurm_mutex_unlock(&shared_secret_mutex); |
| #endif |
| /* only return NULL on error */ |
| return (void *)0xdeadbeef; |
| } |
| |
| extern int mpi_p_client_fini(mpi_plugin_client_state_t *state) |
| { |
| return SLURM_SUCCESS; |
| } |
| |
| extern int init(void) |
| { |
| return SLURM_SUCCESS; |
| } |
| |
| /* |
| * Clean up the application |
| */ |
| extern void fini(void) |
| { |
| // Remove application spool directory |
| if (appdir) |
| rmdir_recursive(appdir, true); |
| |
| // Free allocated storage |
| xfree(appdir); |
| xfree(apinfo); |
| } |
| |
| extern void mpi_p_conf_options(s_p_options_t **full_options, int *full_opt_cnt) |
| { |
| } |
| |
| extern void mpi_p_conf_set(s_p_hashtbl_t *tbl) |
| { |
| } |
| |
| extern s_p_hashtbl_t *mpi_p_conf_get(void) |
| { |
| return NULL; |
| } |
| |
| extern list_t *mpi_p_conf_get_printable(void) |
| { |
| return NULL; |
| } |