| /*****************************************************************************\ |
| * other_select.c - node selection plugin wrapper for other select plugins. |
| * |
| * NOTE: The node selection plugin itself is intimately tied to slurmctld |
| * functions and data structures. Some related functions (e.g. data structure |
| * un/packing, environment variable setting) are required by most Slurm |
| * commands. Since some of these commands must be executed on the BlueGene |
| * front-end nodes, the functions they require are here rather than within |
| * the plugin. This is because functions required by the plugin can not be |
| * resolved on the front-end nodes, so we can't load the plugins there. |
| ***************************************************************************** |
| * Copyright (C) 2002-2007 The Regents of the University of California. |
| * Copyright (C) 2008-2009 Lawrence Livermore National Security. |
| * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). |
| * Written by Morris Jette <jette1@llnl.gov>. |
| * CODE-OCEC-09-009. All rights reserved. |
| * |
| * This file is part of Slurm, a resource management program. |
| * For details, see <https://slurm.schedmd.com/>. |
| * Please also read the included file: DISCLAIMER. |
| * |
| * Slurm is free software; you can redistribute it and/or modify it under |
| * the terms of the GNU General Public License as published by the Free |
| * Software Foundation; either version 2 of the License, or (at your option) |
| * any later version. |
| * |
| * In addition, as a special exception, the copyright holders give permission |
| * to link the code of portions of this program with the OpenSSL library under |
| * certain conditions as described in each individual source file, and |
| * distribute linked combinations including the two. You must obey the GNU |
| * General Public License in all respects for all of the code used other than |
| * OpenSSL. If you modify file(s) with this exception, you may extend this |
| * exception to your version of the file(s), but you are not obligated to do |
| * so. If you do not wish to do so, delete this exception statement from your |
| * version. If you delete this exception statement from all source files in |
| * the program, then also delete it here. |
| * |
| * Slurm is distributed in the hope that it will be useful, but WITHOUT ANY |
| * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS |
| * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more |
| * details. |
| * |
| * You should have received a copy of the GNU General Public License along |
| * with Slurm; if not, write to the Free Software Foundation, Inc., |
| * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. |
| \*****************************************************************************/ |
| |
| #include <dirent.h> |
| #include <pthread.h> |
| |
| #include "other_select.h" |
| #include "src/common/plugin.h" |
| #include "src/common/plugrack.h" |
| #include "src/interfaces/select.h" |
| #include "src/common/slurm_protocol_api.h" |
| #include "src/common/xstring.h" |
| |
| uint16_t other_select_type_param = 0; |
| |
| /* |
| * Must be synchronized with slurm_select_ops_t in select.h. |
| */ |
| const char *node_select_syms[] = { |
| "plugin_id", |
| "select_p_state_save", |
| "select_p_state_restore", |
| "select_p_job_init", |
| "select_p_node_init", |
| "select_p_job_test", |
| "select_p_job_begin", |
| "select_p_job_ready", |
| "select_p_job_expand", |
| "select_p_job_resized", |
| "select_p_job_fini", |
| "select_p_job_suspend", |
| "select_p_job_resume", |
| "select_p_step_pick_nodes", |
| "select_p_step_start", |
| "select_p_step_finish", |
| "select_p_select_nodeinfo_pack", |
| "select_p_select_nodeinfo_unpack", |
| "select_p_select_nodeinfo_alloc", |
| "select_p_select_nodeinfo_free", |
| "select_p_select_nodeinfo_set_all", |
| "select_p_select_nodeinfo_set", |
| "select_p_select_nodeinfo_get", |
| "select_p_select_jobinfo_alloc", |
| "select_p_select_jobinfo_free", |
| "select_p_select_jobinfo_set", |
| "select_p_select_jobinfo_get", |
| "select_p_select_jobinfo_copy", |
| "select_p_select_jobinfo_pack", |
| "select_p_select_jobinfo_unpack", |
| "select_p_get_info_from_plugin", |
| "select_p_reconfigure", |
| }; |
| |
| static slurm_select_ops_t ops; |
| static plugin_context_t *g_context = NULL; |
| static pthread_mutex_t g_context_lock = PTHREAD_MUTEX_INITIALIZER; |
| static bool init_run = false; |
| |
| /* |
| * Initialize context for node selection plugin |
| */ |
| extern int other_select_init(void) |
| { |
| int retval = SLURM_SUCCESS; |
| char *plugin_type = "select"; |
| char *type = NULL; |
| int n_syms; |
| |
| if (init_run && g_context) |
| return retval; |
| |
| slurm_mutex_lock(&g_context_lock); |
| |
| if (g_context) |
| goto done; |
| |
| if (!other_select_type_param) |
| other_select_type_param = slurm_conf.select_type_param; |
| |
| if (other_select_type_param & CR_OTHER_CONS_TRES) |
| type = "select/cons_tres"; |
| else |
| type = "select/linear"; |
| |
| n_syms = sizeof(node_select_syms); |
| if (n_syms != sizeof(ops)) |
| fatal("For some reason node_select_syms in " |
| "src/plugins/select/other/other_select.c differs from " |
| "slurm_select_ops_t found in src/interfaces/select.h. " |
| "node_select_syms should match what is in " |
| "src/common/node_select.c"); |
| |
| if (!(g_context = plugin_context_create( |
| plugin_type, type, (void **)&ops, |
| node_select_syms, n_syms))) { |
| error("cannot create %s context for %s", plugin_type, type); |
| retval = SLURM_ERROR; |
| goto done; |
| } |
| init_run = true; |
| |
| done: |
| slurm_mutex_unlock(&g_context_lock); |
| return retval; |
| } |
| |
| extern int other_select_fini(void) |
| { |
| int rc = SLURM_SUCCESS; |
| |
| slurm_mutex_lock(&g_context_lock); |
| init_run = false; |
| if (!g_context) |
| goto fini; |
| |
| rc = plugin_context_destroy(g_context); |
| g_context = NULL; |
| fini: |
| slurm_mutex_unlock(&g_context_lock); |
| return rc; |
| } |
| |
| /* |
| * Save any global state information |
| * IN dir_name - directory into which the data can be stored |
| */ |
| extern int other_state_save(char *dir_name) |
| { |
| if (other_select_init() < 0) |
| return SLURM_ERROR; |
| |
| return (*(ops.state_save))(dir_name); |
| } |
| |
| /* |
| * Initialize context for node selection plugin and |
| * restore any global state information |
| * IN dir_name - directory from which the data can be restored |
| */ |
| extern int other_state_restore(char *dir_name) |
| { |
| if (other_select_init() < 0) |
| return SLURM_ERROR; |
| |
| return (*(ops.state_restore))(dir_name); |
| } |
| |
| /* |
| * Note the initialization of job records, issued upon restart of |
| * slurmctld and used to synchronize any job state. |
| */ |
| extern int other_job_init(List job_list) |
| { |
| if (other_select_init() < 0) |
| return SLURM_ERROR; |
| |
| return (*(ops.job_init))(job_list); |
| } |
| |
| /* |
| * Note re/initialization of node record data structure |
| * IN node_ptr - current node data |
| * IN node_count - number of node entries |
| */ |
| extern int other_node_init(void) |
| { |
| if (other_select_init() < 0) |
| return SLURM_ERROR; |
| |
| return (*(ops.node_init))(); |
| } |
| |
| /* |
| * Select the "best" nodes for given job from those available |
| * IN/OUT job_ptr - pointer to job being considered for initiation, |
| * set's start_time when job expected to start |
| * IN/OUT bitmap - map of nodes being considered for allocation on input, |
| * map of nodes actually to be assigned on output |
| * IN min_nodes - minimum number of nodes to allocate to job |
| * IN max_nodes - maximum number of nodes to allocate to job |
| * IN req_nodes - requested (or desired) count of nodes |
| * IN mode - SELECT_MODE_RUN_NOW: try to schedule job now |
| * SELECT_MODE_TEST_ONLY: test if job can ever run |
| * SELECT_MODE_WILL_RUN: determine when and where job can run |
| * IN preemptee_candidates - List of pointers to jobs which can bee preempted |
| * IN/OUT preemptee_job_list - Pointer to list of job pointers. These are the |
| * jobs to be preempted to initiate the pending job. Not set |
| * if mode=SELECT_MODE_TEST_ONLY or input pointer is NULL. |
| * Existing list is appended to. |
| * IN resv_exc_ptr - Various TRES which the job can NOT use. |
| * RET zero on success, EINVAL otherwise |
| */ |
| extern int other_job_test(job_record_t *job_ptr, bitstr_t *bitmap, |
| uint32_t min_nodes, uint32_t max_nodes, |
| uint32_t req_nodes, uint16_t mode, |
| List preemptee_candidates, List *preemptee_job_list, |
| resv_exc_t *resv_exc_ptr) |
| { |
| if (other_select_init() < 0) |
| return SLURM_ERROR; |
| |
| return (*(ops.job_test)) |
| (job_ptr, bitmap, |
| min_nodes, max_nodes, |
| req_nodes, mode, |
| preemptee_candidates, preemptee_job_list, |
| resv_exc_ptr); |
| } |
| |
| /* |
| * Note initiation of job is about to begin. Called immediately |
| * after other_job_test(). Executed from slurmctld. |
| * IN job_ptr - pointer to job being initiated |
| */ |
| extern int other_job_begin(job_record_t *job_ptr) |
| { |
| if (other_select_init() < 0) |
| return SLURM_ERROR; |
| |
| return (*(ops.job_begin))(job_ptr); |
| } |
| |
| /* |
| * determine if job is ready to execute per the node select plugin |
| * IN job_ptr - pointer to job being tested |
| * RET: -2 fatal error, -1 try again, 1 if ready to execute, |
| * 0 not ready to execute |
| */ |
| extern int other_job_ready(job_record_t *job_ptr) |
| { |
| if (other_select_init() < 0) |
| return -1; |
| |
| return (*(ops.job_ready))(job_ptr); |
| } |
| |
| /* |
| * Move the resource allocated to one job into that of another job. |
| * All resources are removed from "from_job_ptr" and moved into |
| * "to_job_ptr". Also see other_job_resized(). |
| * RET: 0 or an error code |
| */ |
| extern int other_job_expand(job_record_t *from_job_ptr, |
| job_record_t *to_job_ptr) |
| { |
| if (other_select_init() < 0) |
| return -1; |
| |
| return (*(ops.job_expand))(from_job_ptr, to_job_ptr); |
| } |
| |
| /* |
| * Modify internal data structures for a job that has decreased job size. |
| * Only support jobs shrinking. Also see other_job_expand(); |
| * RET: 0 or an error code |
| */ |
| extern int other_job_resized(job_record_t *job_ptr, node_record_t *node_ptr) |
| { |
| if (other_select_init() < 0) |
| return -1; |
| |
| return (*(ops.job_resized))(job_ptr, node_ptr); |
| } |
| |
| /* |
| * Note termination of job is starting. Executed from slurmctld. |
| * IN job_ptr - pointer to job being terminated |
| */ |
| extern int other_job_fini(job_record_t *job_ptr) |
| { |
| if (other_select_init() < 0) |
| return SLURM_ERROR; |
| |
| return (*(ops.job_fini))(job_ptr); |
| } |
| |
| /* |
| * Suspend a job. Executed from slurmctld. |
| * IN job_ptr - pointer to job being suspended |
| * indf_susp IN - set if job is being suspended indefinitely by user |
| * or admin, otherwise suspended for gang scheduling |
| * RET SLURM_SUCCESS or error code |
| */ |
| extern int other_job_suspend(job_record_t *job_ptr, bool indf_susp) |
| { |
| if (other_select_init() < 0) |
| return SLURM_ERROR; |
| |
| return (*(ops.job_suspend))(job_ptr, indf_susp); |
| } |
| |
| /* |
| * Resume a job. Executed from slurmctld. |
| * indf_susp IN - set if job is being resumed from indefinite suspend by user |
| * or admin, otherwise resume from gang scheduling |
| * IN job_ptr - pointer to job being resumed |
| * RET SLURM_SUCCESS or error code |
| */ |
| extern int other_job_resume(job_record_t *job_ptr, bool indf_susp) |
| { |
| if (other_select_init() < 0) |
| return SLURM_ERROR; |
| |
| return (*(ops.job_resume))(job_ptr, indf_susp); |
| } |
| |
| /* |
| * Select the "best" nodes for given job step from those available in |
| * a job allocation. |
| * |
| * IN/OUT job_ptr - pointer to job already allocated and running in a |
| * block where the step is to run. |
| * set's start_time when job expected to start |
| * OUT step_jobinfo - Fill in the resources to be used if not |
| * full size of job. |
| * IN node_count - How many nodes we are looking for. |
| * OUT avail_nodes - bitmap of available nodes according to the plugin |
| * (not always set). |
| * RET map of slurm nodes to be used for step, NULL on failure |
| */ |
| extern bitstr_t *other_step_pick_nodes(job_record_t *job_ptr, |
| select_jobinfo_t *jobinfo, |
| uint32_t node_count, |
| bitstr_t **avail_nodes) |
| { |
| if (other_select_init() < 0) |
| return NULL; |
| |
| return (*(ops.step_pick_nodes))(job_ptr, jobinfo, node_count, |
| avail_nodes); |
| } |
| |
| extern int other_step_start(step_record_t *step_ptr) |
| { |
| if (other_select_init() < 0) |
| return SLURM_ERROR; |
| |
| return (*(ops.step_start)) |
| (step_ptr); |
| } |
| |
| /* |
| * clear what happened in select_g_step_pick_nodes |
| * IN/OUT step_ptr - Flush the resources from the job and step. |
| * IN killing_step - if true then we are just starting to kill the step |
| * if false, the step is completely terminated |
| */ |
| extern int other_step_finish(step_record_t *step_ptr, bool killing_step) |
| { |
| if (other_select_init() < 0) |
| return SLURM_ERROR; |
| |
| return (*(ops.step_finish)) |
| (step_ptr, killing_step); |
| } |
| |
| extern int other_select_nodeinfo_pack(select_nodeinfo_t *nodeinfo, |
| buf_t *buffer, |
| uint16_t protocol_version) |
| { |
| if (other_select_init() < 0) |
| return SLURM_ERROR; |
| |
| return (*(ops.nodeinfo_pack))(nodeinfo, buffer, protocol_version); |
| } |
| |
| extern int other_select_nodeinfo_unpack(select_nodeinfo_t **nodeinfo, |
| buf_t *buffer, |
| uint16_t protocol_version) |
| { |
| if (other_select_init() < 0) |
| return SLURM_ERROR; |
| |
| return (*(ops.nodeinfo_unpack))(nodeinfo, buffer, protocol_version); |
| } |
| |
| extern select_nodeinfo_t *other_select_nodeinfo_alloc(void) |
| { |
| if (other_select_init() < 0) |
| return NULL; |
| |
| return (*(ops.nodeinfo_alloc))(); |
| } |
| |
| extern int other_select_nodeinfo_free(select_nodeinfo_t *nodeinfo) |
| { |
| if (other_select_init() < 0) |
| return SLURM_ERROR; |
| |
| return (*(ops.nodeinfo_free))(nodeinfo); |
| } |
| |
| extern int other_select_nodeinfo_set_all(void) |
| { |
| if (other_select_init() < 0) |
| return SLURM_ERROR; |
| |
| return (*(ops.nodeinfo_set_all))(); |
| } |
| |
| extern int other_select_nodeinfo_set(job_record_t *job_ptr) |
| { |
| if (other_select_init() < 0) |
| return SLURM_ERROR; |
| |
| return (*(ops.nodeinfo_set))(job_ptr); |
| } |
| |
| extern int other_select_nodeinfo_get(select_nodeinfo_t *nodeinfo, |
| enum select_nodedata_type dinfo, |
| enum node_states state, |
| void *data) |
| { |
| if (other_select_init() < 0) |
| return SLURM_ERROR; |
| |
| return (*(ops.nodeinfo_get))(nodeinfo, dinfo, state, data); |
| } |
| |
| extern select_jobinfo_t *other_select_jobinfo_alloc(void) |
| { |
| if (other_select_init() < 0) |
| return NULL; |
| |
| return (*(ops.jobinfo_alloc))();; |
| } |
| |
| /* free storage previously allocated for a select job credential |
| * IN jobinfo - the select job credential to be freed |
| */ |
| extern int other_select_jobinfo_free(select_jobinfo_t *jobinfo) |
| { |
| if (other_select_init() < 0) |
| return SLURM_ERROR; |
| return (*(ops.jobinfo_free))(jobinfo); |
| } |
| |
| extern int other_select_jobinfo_set(select_jobinfo_t *jobinfo, |
| enum select_jobdata_type data_type, |
| void *data) |
| { |
| if (other_select_init() < 0) |
| return SLURM_ERROR; |
| |
| return (*(ops.jobinfo_set))(jobinfo, data_type, data); |
| } |
| |
| /* get data from a select job credential |
| * IN jobinfo - updated select job credential |
| * IN data_type - type of data to enter into job credential |
| * IN/OUT data - the data to enter into job credential |
| */ |
| extern int other_select_jobinfo_get(select_jobinfo_t *jobinfo, |
| enum select_jobdata_type data_type, |
| void *data) |
| { |
| if (other_select_init() < 0) |
| return SLURM_ERROR; |
| |
| return (*(ops.jobinfo_get))(jobinfo, data_type, data); |
| } |
| |
| /* copy a select job credential |
| * IN jobinfo - the select job credential to be copied |
| * RET - the copy or NULL on failure |
| * NOTE: returned value must be freed using other_free_jobinfo |
| */ |
| extern select_jobinfo_t *other_select_jobinfo_copy( |
| select_jobinfo_t *jobinfo) |
| { |
| if (other_select_init() < 0) |
| return NULL; |
| |
| return (*(ops.jobinfo_copy))(jobinfo); |
| } |
| |
| /* pack a select job credential into a buffer in machine independent form |
| * IN jobinfo - the select job credential to be saved |
| * OUT buffer - buffer with select credential appended |
| * RET - slurm error code |
| */ |
| extern int other_select_jobinfo_pack(select_jobinfo_t *jobinfo, |
| buf_t *buffer, |
| uint16_t protocol_version) |
| { |
| if (other_select_init() < 0) |
| return SLURM_ERROR; |
| |
| return (*(ops.jobinfo_pack))(jobinfo, buffer, protocol_version); |
| } |
| |
| /* unpack a select job credential from a buffer |
| * OUT jobinfo - the select job credential read |
| * IN buffer - buffer with select credential read from current pointer loc |
| * RET - slurm error code |
| * NOTE: returned value must be freed using other_free_jobinfo |
| */ |
| extern int other_select_jobinfo_unpack(select_jobinfo_t **jobinfo, |
| buf_t *buffer, |
| uint16_t protocol_version) |
| { |
| if (other_select_init() < 0) |
| return SLURM_ERROR; |
| |
| return (*(ops.jobinfo_unpack))(jobinfo, buffer, protocol_version); |
| } |
| |
| /* |
| * Get select data from a plugin |
| * IN dinfo - type of data to get from the node record |
| * (see enum select_plugindata_info) |
| * IN/OUT data - the data to get from node record |
| */ |
| extern int other_get_info_from_plugin(enum select_plugindata_info dinfo, |
| job_record_t *job_ptr, void *data) |
| { |
| if (other_select_init() < 0) |
| return SLURM_ERROR; |
| |
| return (*(ops.get_info_from_plugin))(dinfo, job_ptr, data); |
| } |
| |
| /* |
| * Note reconfiguration or change in partition configuration |
| */ |
| extern int other_reconfigure (void) |
| { |
| if (other_select_init() < 0) |
| return SLURM_ERROR; |
| |
| return (*(ops.reconfigure))(); |
| } |