| /*****************************************************************************\ |
| ** mpi_pmi2.c - Library routines for initiating MPI jobs using PMI2. |
| ***************************************************************************** |
| * Copyright (C) 2011-2012 National University of Defense Technology. |
| * Written by Hongjia Cao <hjcao@nudt.edu.cn>. |
| * All rights reserved. |
| * Portions copyright (C) 2015 Mellanox Technologies Inc. |
| * Written by Artem Polyakov <artemp@mellanox.com>. |
| * All rights reserved. |
| * |
| * This file is part of Slurm, a resource management program. |
| * For details, see <https://slurm.schedmd.com/>. |
| * Please also read the included file: DISCLAIMER. |
| * |
| * Slurm is free software; you can redistribute it and/or modify it under |
| * the terms of the GNU General Public License as published by the Free |
| * Software Foundation; either version 2 of the License, or (at your option) |
| * any later version. |
| * |
| * In addition, as a special exception, the copyright holders give permission |
| * to link the code of portions of this program with the OpenSSL library under |
| * certain conditions as described in each individual source file, and |
| * distribute linked combinations including the two. You must obey the GNU |
| * General Public License in all respects for all of the code used other than |
| * OpenSSL. If you modify file(s) with this exception, you may extend this |
| * exception to your version of the file(s), but you are not obligated to do |
| * so. If you do not wish to do so, delete this exception statement from your |
| * version. If you delete this exception statement from all source files in |
| * the program, then also delete it here. |
| * |
| * Slurm is distributed in the hope that it will be useful, but WITHOUT ANY |
| * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS |
| * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more |
| * details. |
| * |
| * You should have received a copy of the GNU General Public License along |
| * with Slurm; if not, write to the Free Software Foundation, Inc., |
| * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. |
| \*****************************************************************************/ |
| |
| #include <fcntl.h> |
| #include <signal.h> |
| #include <sys/types.h> |
| |
| #include <slurm/slurm_errno.h> |
| #include "src/common/slurm_xlator.h" |
| #include "src/interfaces/mpi.h" |
| |
| #include "setup.h" |
| #include "agent.h" |
| #include "spawn.h" |
| |
| /* |
| * These variables are required by the generic plugin interface. If they |
| * are not found in the plugin, the plugin loader will ignore it. |
| * |
| * plugin_name - a string giving a human-readable description of the |
| * plugin. There is no maximum length, but the symbol must refer to |
| * a valid string. |
| * |
| * plugin_type - a string suggesting the type of the plugin or its |
| * applicability to a particular form of data or method of data handling. |
| * If the low-level plugin API is used, the contents of this string are |
| * unimportant and may be anything. Slurm uses the higher-level plugin |
| * interface which requires this string to be of the form |
| * |
| * <application>/<method> |
| * |
| * where <application> is a description of the intended application of |
| * the plugin (e.g., "switch" for Slurm switch) and <method> is a description |
| * of how this plugin satisfies that application. Slurm will only load |
| * a switch plugin if the plugin_type string has a prefix of "switch/". |
| * |
| * plugin_version - an unsigned 32-bit integer containing the Slurm version |
| * (major.minor.micro combined into a single number). |
| */ |
| const char plugin_name[] = "mpi PMI2 plugin"; |
| const char plugin_type[] = "mpi/pmi2"; |
| const uint32_t plugin_id = MPI_PLUGIN_PMI2; |
| const uint32_t plugin_version = SLURM_VERSION_NUMBER; |
| |
| /* |
| * The following is executed in slurmstepd. |
| */ |
| |
| extern int mpi_p_slurmstepd_prefork(const stepd_step_rec_t *step, char ***env) |
| { |
| int rc; |
| |
| debug("using mpi/pmi2"); |
| |
| if (step->batch) |
| return SLURM_SUCCESS; |
| |
| rc = pmi2_setup_stepd(step, env); |
| if (rc != SLURM_SUCCESS) |
| return rc; |
| |
| if (pmi2_start_agent() < 0) { |
| error ("mpi/pmi2: failed to create pmi2 agent thread"); |
| return SLURM_ERROR; |
| } |
| |
| return SLURM_SUCCESS; |
| } |
| |
| extern int mpi_p_slurmstepd_task(const mpi_task_info_t *mpi_task, char ***env) |
| { |
| int i; |
| |
| env_array_overwrite_fmt(env, "PMI_FD", "%u", |
| TASK_PMI_SOCK(mpi_task->ltaskid)); |
| |
| env_array_overwrite_fmt(env, "PMI_JOBID", "%s", |
| job_info.pmi_jobid); |
| env_array_overwrite_fmt(env, "PMI_RANK", "%u", mpi_task->gtaskid); |
| env_array_overwrite_fmt(env, "PMI_SIZE", "%u", mpi_task->ntasks); |
| if (job_info.spawn_seq) { /* PMI1.1 needs this env-var */ |
| env_array_overwrite_fmt(env, "PMI_SPAWNED", "%u", 1); |
| } |
| /* close unused sockets in task */ |
| close(tree_sock); |
| tree_sock = 0; |
| for (i = 0; i < mpi_task->ltasks; i ++) { |
| close(STEPD_PMI_SOCK(i)); |
| STEPD_PMI_SOCK(i) = 0; |
| if (i != mpi_task->ltaskid) { |
| close(TASK_PMI_SOCK(i)); |
| TASK_PMI_SOCK(i) = 0; |
| } |
| } |
| return SLURM_SUCCESS; |
| } |
| |
| /* |
| * The following is executed in srun. |
| */ |
| |
| extern mpi_plugin_client_state_t * |
| mpi_p_client_prelaunch(mpi_step_info_t *mpi_step, char ***env) |
| { |
| int rc; |
| |
| debug("mpi/pmi2: client_prelaunch"); |
| |
| rc = pmi2_setup_srun(mpi_step, env); |
| if (rc != SLURM_SUCCESS) { |
| return NULL; |
| } |
| |
| if (pmi2_start_agent() < 0) { |
| error("failed to start PMI2 agent thread"); |
| return NULL; |
| } |
| |
| return (void *)0x12345678; |
| } |
| |
| extern int mpi_p_client_fini(mpi_plugin_client_state_t *state) |
| { |
| pmi2_stop_agent(); |
| |
| /* the job may be allocated by this srun. |
| * or exit of this srun may cause the job script to exit. |
| * wait for the spawned steps. */ |
| spawn_job_wait(); |
| |
| return SLURM_SUCCESS; |
| } |
| |
| extern int init(void) |
| { |
| return SLURM_SUCCESS; |
| } |
| |
| extern void fini(void) |
| { |
| /* cleanup after ourself */ |
| pmi2_stop_agent(); |
| pmi2_cleanup_stepd(); |
| } |
| |
| extern void mpi_p_conf_options(s_p_options_t **full_options, int *full_opt_cnt) |
| { |
| } |
| |
| extern void mpi_p_conf_set(s_p_hashtbl_t *tbl) |
| { |
| } |
| |
| extern s_p_hashtbl_t *mpi_p_conf_get(void) |
| { |
| return NULL; |
| } |
| |
| extern list_t *mpi_p_conf_get_printable(void) |
| { |
| return NULL; |
| } |