blob: 62eec856fc294cd7f3316288ed8ae954cd646105 [file] [log] [blame]
/*****************************************************************************\
* scrun.h - Slurm OCI runtime headers
*****************************************************************************
* Copyright (C) SchedMD LLC.
*
* This file is part of Slurm, a resource management program.
* For details, see <https://slurm.schedmd.com/>.
* Please also read the included file: DISCLAIMER.
*
* Slurm is free software; you can redistribute it and/or modify it under
* the terms of the GNU General Public License as published by the Free
* Software Foundation; either version 2 of the License, or (at your option)
* any later version.
*
* In addition, as a special exception, the copyright holders give permission
* to link the code of portions of this program with the OpenSSL library under
* certain conditions as described in each individual source file, and
* distribute linked combinations including the two. You must obey the GNU
* General Public License in all respects for all of the code used other than
* OpenSSL. If you modify file(s) with this exception, you may extend this
* exception to your version of the file(s), but you are not obligated to do
* so. If you do not wish to do so, delete this exception statement from your
* version. If you delete this exception statement from all source files in
* the program, then also delete it here.
*
* Slurm is distributed in the hope that it will be useful, but WITHOUT ANY
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
* details.
*
* You should have received a copy of the GNU General Public License along
* with Slurm; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
\*****************************************************************************/
#ifndef _SCRUN_H
#define _SCRUN_H
#include <pty.h>
#include <termios.h>
#include "src/conmgr/conmgr.h"
#include "src/common/data.h"
#include "src/common/oci_config.h"
#ifndef OCI_VERSION
extern const char *OCI_VERSION;
#endif
typedef struct {
int magic; /* STATE_MAGIC */
/*
* Only use lock_state() and unlock_state() instead of directly locking
* this field.
*/
pthread_rwlock_t lock; /* lock must be used for access */
#ifndef NDEBUG
/* lock must be used for access needs_lock and locked*/
pthread_mutex_t debug_lock;
bool needs_lock;
int locked;
#endif
/* every field required by OCI runtime-spec v1.0.2 state query. */
char *oci_version;
char *id; /* container-id */
container_state_msg_status_t status; /* current status */
pid_t pid; /* pid of anchor process */
char *bundle; /* path to OCI container bundle */
char *orig_bundle; /* original bundle path before lua override */
list_t *annotations; /* List of config_key_pair_t */
/* Internal state tracking */
char *console_socket;
bool requested_terminal; /* /process/terminal/ value */
int ptm; /* pseudoterminal master (PTM) */
int pts; /* pseudoterminal master (PTS) */
struct winsize tty_size; /* size props of calling terminal */
char *pid_file; /* full path for pid file */
int pid_file_fd; /* file descriptor for pid file */
bool existing_allocation; /* running an existing job allocation */
uint32_t jobid; /* assigned jobID */
bool job_completed; /* has job been completed */
bool staged_out; /* stage out done */
bool staged_in; /* true if stage_in() called */
int srun_pid; /* pid of the srun running job */
int srun_rc; /* exit_code from srun exit (only valid if srun_exited) */
bool srun_exited; /* true if we reaped the srun pid already */
char *anchor_socket; /* path to anchor msg socket */
char *spool_dir; /* container specific work space */
char **job_env; /* env to hand to srun */
char **spank_job_env; /* spank set env */
char *config_file; /* path to config.json */
uint32_t user_id; /* user job is running as */
uint32_t group_id; /* group job is running as */
char *root_dir; /* path to root directory to hold working files */
char *root_path; /* contents of /root/path where container files are */
char *orig_root_path; /* original root path before lua override */
int requested_signal; /* signal to send or zero if not requested */
bool force; /* user requested force argument */
data_t *config; /* container's config.json */
list_t *start_requests; /* list of (blocking_req_t *) */
list_t *delete_requests; /* list of (blocking_req_t *) */
conmgr_fd_t *startup_con; /* conmgr connection for the SIGCHILD handler */
} state_t;
extern state_t state;
extern oci_conf_t *oci_conf;
/* logging options */
extern log_options_t log_opt;
extern log_facility_t log_fac;
extern char *log_file;
extern char *log_format;
extern void update_logging(void);
extern void check_state(void);
extern void init_state(void);
extern void destroy_state(void);
#ifndef NDEBUG
#define read_lock_state() \
do { \
slurm_rwlock_rdlock(&state.lock); \
slurm_mutex_lock(&state.debug_lock); \
xassert(state.needs_lock); \
xassert(state.locked >= 0); \
state.locked++; \
check_state(); \
debug3("%s: taking state read lock needs_lock=%c locked=%d", \
__func__, (state.needs_lock ? 'T' : 'F'), \
state.locked); \
slurm_mutex_unlock(&state.debug_lock); \
} while (false)
#define write_lock_state() \
do { \
slurm_rwlock_wrlock(&state.lock); \
slurm_mutex_lock(&state.debug_lock); \
xassert(state.needs_lock); \
xassert(state.locked >= 0); \
state.locked++; \
check_state(); \
slurm_mutex_unlock(&state.debug_lock); \
debug3("%s: taking state write lock needs_lock=%c locked=%d", \
__func__, (state.needs_lock ? 'T' : 'F'), \
state.locked); \
} while (false)
#define unlock_state() \
do { \
slurm_mutex_lock(&state.debug_lock); \
debug3("%s: unlock state needs_lock=%c locked=%d", \
__func__, (state.needs_lock ? 'T' : 'F'), \
state.locked); \
check_state(); \
xassert(state.needs_lock); \
xassert(state.locked > 0); \
state.locked--; \
slurm_mutex_unlock(&state.debug_lock); \
slurm_rwlock_unlock(&state.lock); \
} while (false)
#else /* NDEBUG */
#define read_lock_state() slurm_rwlock_rdlock(&state.lock)
#define write_lock_state() slurm_rwlock_wrlock(&state.lock)
#define unlock_state() slurm_rwlock_unlock(&state.lock)
#endif /* NDEBUG */
extern int command_create(void);
extern int command_start(void);
extern int command_state(void);
extern int command_kill(void);
extern int command_delete(void);
extern int command_version(void);
/*
* fork and start running anchor process
*/
extern int spawn_anchor(void);
/*
* send rpc to anchor and get reply msg
* WARNING: will not free original msg
* WARNING: must cleanup returned msg with slurm_free_msg();
* IN conn_fd - use this if not -1 and set if not NULL
* reply message
*/
extern int send_rpc(slurm_msg_t *msg, slurm_msg_t **ptr_resp, const char *id,
int *conn_fd);
/*
* Attempt to connect to anchor and get current state
* RET - SLURM_SUCCESS or error
*/
extern int get_anchor_state(void);
/*
* Request allocation for job
* IN arg - ptr to conmgr
*/
extern void get_allocation(conmgr_callback_args_t conmgr_args, void *arg);
/* callback after allocation success */
extern void on_allocation(conmgr_callback_args_t conmgr_args, void *arg);
/*
* Stop and (eventually) cleanup anchor
* IN status - error code for why we are stopping or SLURM_SUCCESS
*/
extern void stop_anchor(int status);
/* run srun container command against job */
extern void exec_srun_container(void);
/* convert data array of arguments into argv[] for execv() */
extern char **create_argv(data_t *args);
/*
* Init the staging - aka load the lua script
*/
extern void init_lua(void);
/*
* release lua script
*/
extern void destroy_lua(void);
/*
* Call lua stage in script
* RET SLURM_SUCCESS or error
*/
extern int stage_in(void);
/*
* Call lua stage out script
* RET SLURM_SUCCESS or error
*/
extern int stage_out(void);
extern void change_status_funcname(container_state_msg_status_t status,
bool force, const char *src, bool locked);
#define change_status(status) \
change_status_funcname(status, false, __func__, false)
#define change_status_locked(status) \
change_status_funcname(status, false, __func__, true)
/* ignore reverse status change */
#define change_status_force(status) \
change_status_funcname(status, true, __func__, false)
#endif