| /*****************************************************************************\ |
| * job_container_plugin.c - job container plugin stub. |
| ***************************************************************************** |
| * Copyright (C) 2013 SchedMD LLC |
| * Written by Morris Jette |
| * |
| * This file is part of SLURM, a resource management program. |
| * For details, see <http://slurm.schedmd.com/>. |
| * Please also read the included file: DISCLAIMER. |
| * |
| * SLURM is free software; you can redistribute it and/or modify it under |
| * the terms of the GNU General Public License as published by the Free |
| * Software Foundation; either version 2 of the License, or (at your option) |
| * any later version. |
| * |
| * In addition, as a special exception, the copyright holders give permission |
| * to link the code of portions of this program with the OpenSSL library under |
| * certain conditions as described in each individual source file, and |
| * distribute linked combinations including the two. You must obey the GNU |
| * General Public License in all respects for all of the code used other than |
| * OpenSSL. If you modify file(s) with this exception, you may extend this |
| * exception to your version of the file(s), but you are not obligated to do |
| * so. If you do not wish to do so, delete this exception statement from your |
| * version. If you delete this exception statement from all source files in |
| * the program, then also delete it here. |
| * |
| * SLURM is distributed in the hope that it will be useful, but WITHOUT ANY |
| * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS |
| * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more |
| * details. |
| * |
| * You should have received a copy of the GNU General Public License along |
| * with SLURM; if not, write to the Free Software Foundation, Inc., |
| * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. |
| \*****************************************************************************/ |
| |
| #include <pthread.h> |
| |
| #include "src/common/plugin.h" |
| #include "src/common/plugrack.h" |
| #include "src/common/slurm_protocol_api.h" |
| #include "src/common/xmalloc.h" |
| #include "src/common/xstring.h" |
| |
| #include "src/slurmd/common/job_container_plugin.h" |
| |
| #include "src/slurmd/slurmstepd/slurmstepd_job.h" |
| |
| typedef struct job_container_ops { |
| int (*container_p_create) (uint32_t job_id); |
| int (*container_p_add_cont) (uint32_t job_id, uint64_t cont_id); |
| int (*container_p_add_pid) (uint32_t job_id, pid_t pid, uid_t uid); |
| int (*container_p_delete) (uint32_t job_id); |
| int (*container_p_restore) (char *dir_name, bool recover); |
| void (*container_p_reconfig) (void); |
| |
| } job_container_ops_t; |
| |
| /* |
| * Must be synchronized with job_container_ops_t above. |
| */ |
| static const char *syms[] = { |
| "container_p_create", |
| "container_p_add_cont", |
| "container_p_add_pid", |
| "container_p_delete", |
| "container_p_restore", |
| "container_p_reconfig", |
| }; |
| |
| static job_container_ops_t *ops = NULL; |
| static plugin_context_t **g_container_context = NULL; |
| static int g_container_context_num = -1; |
| static pthread_mutex_t g_container_context_lock = |
| PTHREAD_MUTEX_INITIALIZER; |
| static bool init_run = false; |
| |
| /* |
| * Initialize the job container plugin. |
| * |
| * RET - slurm error code |
| */ |
| extern int job_container_init(void) |
| { |
| int retval = SLURM_SUCCESS; |
| char *plugin_type = "job_container"; |
| char *container_plugin_type = NULL; |
| char *last = NULL, *job_container_plugin_list, *job_container = NULL; |
| |
| if (init_run && (g_container_context_num >= 0)) |
| return retval; |
| |
| slurm_mutex_lock(&g_container_context_lock); |
| |
| if (g_container_context_num >= 0) |
| goto done; |
| |
| container_plugin_type = slurm_get_job_container_plugin(); |
| g_container_context_num = 0; /* mark it before anything else */ |
| if ((container_plugin_type == NULL) || |
| (container_plugin_type[0] == '\0')) |
| goto done; |
| |
| job_container_plugin_list = container_plugin_type; |
| while ((job_container = |
| strtok_r(job_container_plugin_list, ",", &last))) { |
| xrealloc(ops, |
| sizeof(job_container_ops_t) * |
| (g_container_context_num + 1)); |
| xrealloc(g_container_context, (sizeof(plugin_context_t *) |
| * (g_container_context_num + 1))); |
| if (strncmp(job_container, "job_container/", 14) == 0) |
| job_container += 14; /* backward compatibility */ |
| job_container = xstrdup_printf("job_container/%s", |
| job_container); |
| g_container_context[g_container_context_num] = |
| plugin_context_create( |
| plugin_type, job_container, |
| (void **)&ops[g_container_context_num], |
| syms, sizeof(syms)); |
| if (!g_container_context[g_container_context_num]) { |
| error("cannot create %s context for %s", |
| plugin_type, job_container); |
| xfree(job_container); |
| retval = SLURM_ERROR; |
| break; |
| } |
| |
| xfree(job_container); |
| g_container_context_num++; |
| job_container_plugin_list = NULL; /* for next iteration */ |
| } |
| init_run = true; |
| |
| done: |
| slurm_mutex_unlock(&g_container_context_lock); |
| xfree(container_plugin_type); |
| |
| if (retval != SLURM_SUCCESS) |
| job_container_fini(); |
| |
| return retval; |
| } |
| |
| /* |
| * Terminate the job container plugin, free memory. |
| * |
| * RET - slurm error code |
| */ |
| extern int job_container_fini(void) |
| { |
| int i, rc = SLURM_SUCCESS; |
| |
| slurm_mutex_lock(&g_container_context_lock); |
| if (!g_container_context) |
| goto done; |
| |
| init_run = false; |
| for (i = 0; i < g_container_context_num; i++) { |
| if (g_container_context[i]) { |
| if (plugin_context_destroy(g_container_context[i]) |
| != SLURM_SUCCESS) { |
| rc = SLURM_ERROR; |
| } |
| } |
| } |
| |
| xfree(ops); |
| xfree(g_container_context); |
| g_container_context_num = -1; |
| |
| done: |
| slurm_mutex_unlock(&g_container_context_lock); |
| return rc; |
| } |
| |
| /* Create a container for the specified job */ |
| extern int container_g_create(uint32_t job_id) |
| { |
| int i, rc = SLURM_SUCCESS; |
| |
| if (job_container_init()) |
| return SLURM_ERROR; |
| |
| for (i = 0; ((i < g_container_context_num) && (rc == SLURM_SUCCESS)); |
| i++) { |
| rc = (*(ops[i].container_p_create))(job_id); |
| } |
| |
| return rc; |
| } |
| |
| /* Add a process to the specified job's container. |
| * A proctrack containter will be generated containing the process |
| * before container_g_add_cont() is called (see below). */ |
| extern int container_g_add_pid(uint32_t job_id, pid_t pid, uid_t uid) |
| { |
| int i, rc = SLURM_SUCCESS; |
| |
| if (job_container_init()) |
| return SLURM_ERROR; |
| |
| for (i = 0; ((i < g_container_context_num) && (rc == SLURM_SUCCESS)); |
| i++) { |
| rc = (*(ops[i].container_p_add_pid))(job_id, pid, uid); |
| } |
| |
| return rc; |
| } |
| |
| /* Add a proctrack container (PAGG) to the specified job's container |
| * The PAGG will be the job's cont_id returned by proctrack/sgi_job */ |
| extern int container_g_add_cont(uint32_t job_id, uint64_t cont_id) |
| { |
| int i, rc = SLURM_SUCCESS; |
| |
| if (job_container_init()) |
| return SLURM_ERROR; |
| |
| for (i = 0; ((i < g_container_context_num) && (rc == SLURM_SUCCESS)); |
| i++) { |
| rc = (*(ops[i].container_p_add_cont))(job_id, cont_id); |
| } |
| |
| return rc; |
| } |
| |
| /* Delete the container for the specified job */ |
| extern int container_g_delete(uint32_t job_id) |
| { |
| int i, rc = SLURM_SUCCESS; |
| |
| if (job_container_init()) |
| return SLURM_ERROR; |
| |
| for (i = 0; ((i < g_container_context_num) && (rc == SLURM_SUCCESS)); |
| i++) { |
| rc = (*(ops[i].container_p_delete))(job_id); |
| } |
| |
| return rc; |
| } |
| |
| /* Restore container information */ |
| extern int container_g_restore(char * dir_name, bool recover) |
| { |
| int i, rc = SLURM_SUCCESS; |
| |
| if (job_container_init()) |
| return SLURM_ERROR; |
| |
| for (i = 0; ((i < g_container_context_num) && (rc == SLURM_SUCCESS)); |
| i++) { |
| rc = (*(ops[i].container_p_restore))(dir_name, recover); |
| } |
| |
| return rc; |
| } |
| |
| /* Note change in configuration (e.g. "DebugFlag=JobContainer" set) */ |
| extern void container_g_reconfig(void) |
| { |
| int i; |
| |
| (void) job_container_init(); |
| |
| for (i = 0; i < g_container_context_num;i++) { |
| (*(ops[i].container_p_reconfig))(); |
| } |
| |
| return; |
| } |
| |