blob: fa2999e37e6c36548634061d04470ec41a6ef1dd [file] [log] [blame]
/****************************************************************************\
* slurm_protocol_defs.h - definitions used for RPCs
*****************************************************************************
* Copyright (C) 2002-2007 The Regents of the University of California.
* Copyright (C) 2008-2010 Lawrence Livermore National Security.
* Copyright (C) SchedMD LLC.
* Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
* Written by Kevin Tew <tew1@llnl.gov>.
* CODE-OCEC-09-009. All rights reserved.
*
* This file is part of Slurm, a resource management program.
* For details, see <https://slurm.schedmd.com/>.
* Please also read the included file: DISCLAIMER.
*
* Slurm is free software; you can redistribute it and/or modify it under
* the terms of the GNU General Public License as published by the Free
* Software Foundation; either version 2 of the License, or (at your option)
* any later version.
*
* In addition, as a special exception, the copyright holders give permission
* to link the code of portions of this program with the OpenSSL library under
* certain conditions as described in each individual source file, and
* distribute linked combinations including the two. You must obey the GNU
* General Public License in all respects for all of the code used other than
* OpenSSL. If you modify file(s) with this exception, you may extend this
* exception to your version of the file(s), but you are not obligated to do
* so. If you do not wish to do so, delete this exception statement from your
* version. If you delete this exception statement from all source files in
* the program, then also delete it here.
*
* Slurm is distributed in the hope that it will be useful, but WITHOUT ANY
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
* details.
*
* You should have received a copy of the GNU General Public License along
* with Slurm; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
\*****************************************************************************/
#ifndef _SLURM_PROTOCOL_DEFS_H
#define _SLURM_PROTOCOL_DEFS_H
#include <inttypes.h>
#include <sys/wait.h>
#ifdef HAVE_SYSCTLBYNAME
#if defined(__FreeBSD__)
#include <sys/types.h>
#else
#include <sys/param.h>
#endif
# include <sys/sysctl.h>
#endif
#include "slurm/slurm.h"
#include "slurm/slurmdb.h"
#include "src/common/bitstring.h"
#include "src/common/list.h"
#include "src/common/macros.h"
#include "src/common/msg_type.h"
#include "src/common/persist_conn.h"
#include "src/common/part_record.h"
#include "src/common/slurm_protocol_common.h"
#include "src/common/slurm_step_layout.h"
#include "src/common/slurmdb_defs.h"
#include "src/common/working_cluster.h"
#include "src/common/xassert.h"
#include "src/interfaces/cred.h"
/*
* This is what the UID and GID accessors return on error.
* The value is currently RedHat Linux's ID for the user "nobody".
*/
#define SLURM_AUTH_NOBODY 99
#define FORWARD_INIT 0xfffe
/* Defined job states */
#define IS_JOB_PENDING(_X) \
((_X->job_state & JOB_STATE_BASE) == JOB_PENDING)
#define IS_JOB_RUNNING(_X) \
((_X->job_state & JOB_STATE_BASE) == JOB_RUNNING)
#define IS_JOB_SUSPENDED(_X) \
((_X->job_state & JOB_STATE_BASE) == JOB_SUSPENDED)
#define IS_JOB_COMPLETE(_X) \
((_X->job_state & JOB_STATE_BASE) == JOB_COMPLETE)
#define IS_JOB_CANCELLED(_X) \
((_X->job_state & JOB_STATE_BASE) == JOB_CANCELLED)
#define IS_JOB_FAILED(_X) \
((_X->job_state & JOB_STATE_BASE) == JOB_FAILED)
#define IS_JOB_TIMEOUT(_X) \
((_X->job_state & JOB_STATE_BASE) == JOB_TIMEOUT)
#define IS_JOB_NODE_FAILED(_X) \
((_X->job_state & JOB_STATE_BASE) == JOB_NODE_FAIL)
#define IS_JOB_DEADLINE(_X) \
((_X->job_state & JOB_STATE_BASE) == JOB_DEADLINE)
#define IS_JOB_OOM(_X) \
((_X->job_state & JOB_STATE_BASE) == JOB_OOM)
#define IS_JOB_POWER_UP_NODE(_X) \
(_X->job_state & JOB_POWER_UP_NODE)
/* Derived job states */
#define IS_JOB_COMPLETING(_X) \
(_X->job_state & JOB_COMPLETING)
#define IS_JOB_CONFIGURING(_X) \
(_X->job_state & JOB_CONFIGURING)
#define IS_JOB_STARTED(_X) \
((_X->job_state & JOB_STATE_BASE) > JOB_PENDING)
#define IS_JOB_FINISHED(_X) \
((_X->job_state & JOB_STATE_BASE) > JOB_SUSPENDED)
#define IS_JOB_COMPLETED(_X) \
(IS_JOB_FINISHED(_X) && ((_X->job_state & JOB_COMPLETING) == 0))
#define IS_JOB_RESIZING(_X) \
(_X->job_state & JOB_RESIZING)
#define IS_JOB_REQUEUED(_X) \
(_X->job_state & JOB_REQUEUE)
#define IS_JOB_FED_REQUEUED(_X) \
(_X->job_state & JOB_REQUEUE_FED)
#define IS_JOB_REVOKED(_X) \
(_X->job_state & JOB_REVOKED)
#define IS_JOB_SIGNALING(_X) \
(_X->job_state & JOB_SIGNALING)
#define IS_JOB_STAGE_OUT(_X) \
(_X->job_state & JOB_STAGE_OUT)
/* DB FLAG state */
#define IS_JOB_IN_DB(_X) \
(_X->db_flags & SLURMDB_JOB_FLAG_START_R)
/* Defined node states */
#define IS_NODE_UNKNOWN(_X) \
((_X->node_state & NODE_STATE_BASE) == NODE_STATE_UNKNOWN)
#define IS_NODE_DOWN(_X) \
((_X->node_state & NODE_STATE_BASE) == NODE_STATE_DOWN)
#define IS_NODE_IDLE(_X) \
((_X->node_state & NODE_STATE_BASE) == NODE_STATE_IDLE)
#define IS_NODE_ALLOCATED(_X) \
((_X->node_state & NODE_STATE_BASE) == NODE_STATE_ALLOCATED)
#define IS_NODE_MIXED(_X) \
((_X->node_state & NODE_STATE_BASE) == NODE_STATE_MIXED)
#define IS_NODE_FUTURE(_X) \
((_X->node_state & NODE_STATE_BASE) == NODE_STATE_FUTURE)
/* Derived node states */
#define IS_NODE_BLOCKED(_X) \
(_X->node_state & NODE_STATE_BLOCKED)
#define IS_NODE_CLOUD(_X) \
(_X->node_state & NODE_STATE_CLOUD)
#define IS_NODE_DRAIN(_X) \
(_X->node_state & NODE_STATE_DRAIN)
#define IS_NODE_DRAINING(_X) \
((_X->node_state & NODE_STATE_DRAIN) \
&& (IS_NODE_ALLOCATED(_X) || IS_NODE_MIXED(_X)))
#define IS_NODE_DRAINED(_X) \
(IS_NODE_DRAIN(_X) && !IS_NODE_DRAINING(_X))
#define IS_NODE_DYNAMIC_FUTURE(_X) \
(_X->node_state & NODE_STATE_DYNAMIC_FUTURE)
#define IS_NODE_DYNAMIC_NORM(_X) \
(_X->node_state & NODE_STATE_DYNAMIC_NORM)
#define IS_NODE_EXTERNAL(_X) \
(_X->node_state & NODE_STATE_EXTERNAL)
#define IS_NODE_COMPLETING(_X) \
(_X->node_state & NODE_STATE_COMPLETING)
#define IS_NODE_INVALID_REG(_X) \
(_X->node_state & NODE_STATE_INVALID_REG)
#define IS_NODE_PLANNED(_X) \
(_X->node_state & NODE_STATE_PLANNED)
#define IS_NODE_POWER_DOWN(_X) \
(_X->node_state & NODE_STATE_POWER_DOWN)
#define IS_NODE_POWER_UP(_X) \
(_X->node_state & NODE_STATE_POWER_UP)
#define IS_NODE_NO_RESPOND(_X) \
(_X->node_state & NODE_STATE_NO_RESPOND)
#define IS_NODE_POWERED_DOWN(_X) \
(_X->node_state & NODE_STATE_POWERED_DOWN)
#define IS_NODE_POWERING_DOWN(_X) \
(_X->node_state & NODE_STATE_POWERING_DOWN)
#define IS_NODE_FAIL(_X) \
(_X->node_state & NODE_STATE_FAIL)
#define IS_NODE_POWERING_UP(_X) \
(_X->node_state & NODE_STATE_POWERING_UP)
#define IS_NODE_MAINT(_X) \
(_X->node_state & NODE_STATE_MAINT)
#define IS_NODE_REBOOT_REQUESTED(_X) \
(_X->node_state & NODE_STATE_REBOOT_REQUESTED)
#define IS_NODE_REBOOT_ISSUED(_X) \
(_X->node_state & NODE_STATE_REBOOT_ISSUED)
#define IS_NODE_RUNNING_JOB(_X) \
(_X->comp_job_cnt || _X->run_job_cnt || _X->sus_job_cnt)
#define IS_NODE_RES(_X) \
(_X->node_state & NODE_STATE_RES)
#define IS_NODE_REBOOT_ASAP(_X) \
(IS_NODE_REBOOT_REQUESTED(_X) && IS_NODE_DRAIN(_X))
#define THIS_FILE ((strrchr(__FILE__, '/') ?: __FILE__ - 1) + 1)
#define MINUTE_SECONDS 60
#define HOUR_MINUTES 60
#define HOUR_SECONDS (HOUR_MINUTES * MINUTE_SECONDS)
#define DAY_HOURS 24
#define DAY_MINUTES (DAY_HOURS * HOUR_MINUTES)
#define YEAR_DAYS 365
#define YEAR_MINUTES (YEAR_DAYS * DAY_HOURS * HOUR_MINUTES)
#define YEAR_SECONDS (YEAR_DAYS * DAY_HOURS * HOUR_SECONDS)
/* Read as 'how many X are in a Y' */
#define MSEC_IN_SEC 1000
#define USEC_IN_SEC 1000000
#define NSEC_IN_SEC 1000000000
#define NSEC_IN_USEC 1000
#define NSEC_IN_MSEC 1000000
#define SLURMD_REG_FLAG_CONFIGLESS 0x0001
#define SLURMD_REG_FLAG_RESP 0x0002
#define RESV_FREE_STR_USER SLURM_BIT(0)
#define RESV_FREE_STR_ACCT SLURM_BIT(1)
#define RESV_FREE_STR_TRES_BB SLURM_BIT(2)
#define RESV_FREE_STR_QOS SLURM_BIT(3)
#define RESV_FREE_STR_TRES_LIC SLURM_BIT(4)
#define RESV_FREE_STR_ALLOWED_PARTS SLURM_BIT(5)
#define RESV_FREE_STR_GROUP SLURM_BIT(6)
#define RESV_FREE_STR_COMMENT SLURM_BIT(7)
#define RESV_FREE_STR_NODES SLURM_BIT(8)
#define RESV_FREE_STR_TRES SLURM_BIT(9)
#ifndef __job_record_t_defined
# define __job_record_t_defined
typedef struct job_record job_record_t;
#endif
/*
* Prototype for conmgr connection w/o including conmgr.h to avoid recursive
* includes.
*/
typedef struct conmgr_fd_s conmgr_fd_t;
typedef struct conmgr_fd_ref_s conmgr_fd_ref_t;
/*****************************************************************************\
* core api configuration struct
\*****************************************************************************/
typedef struct forward {
slurm_node_alias_addrs_t alias_addrs;
uint16_t cnt; /* number of nodes to forward to */
uint16_t init; /* tell me it has been set (FORWARD_INIT) */
char *nodelist; /* ranged string of who to forward the
* message to */
uint32_t timeout; /* original timeout increments */
uint16_t tree_width; /* what the treewidth should be */
uint16_t tree_depth; /* tree depth of this set of nodes */
} forward_t;
#define FORWARD_INITIALIZER \
{ \
.init = FORWARD_INIT, \
}
/*core api protocol message structures */
typedef struct slurm_protocol_header {
uint16_t version;
uint16_t flags;
uint16_t msg_type; /* really slurm_msg_type_t but needs to be
uint16_t for packing purposes. */
uint32_t body_length;
uint16_t ret_cnt;
forward_t forward;
slurm_addr_t orig_addr;
list_t *ret_list;
} header_t;
typedef struct forward_struct {
slurm_node_alias_addrs_t *alias_addrs;
char *buf;
int buf_len;
uint16_t fwd_cnt;
pthread_mutex_t forward_mutex;
pthread_cond_t notify;
list_t *ret_list;
uint32_t timeout;
} forward_struct_t;
typedef struct forward_message {
forward_struct_t *fwd_struct;
header_t header;
int timeout;
} forward_msg_t;
typedef struct slurm_msg {
slurm_addr_t address;
void *auth_cred;
int auth_index; /* DON'T PACK: zero for normal communication.
* index value copied from incoming connection,
* so that we'll respond with the same auth
* plugin used to connect to us originally.
*/
uid_t auth_uid; /* NEVER PACK. Authenticated uid from auth
* credential. Only valid if auth_ids_set is
* true. Set to SLURM_AUTH_NOBODY if not set
* yet.
*/
gid_t auth_gid; /* NEVER PACK. Authenticated uid from auth
* credential. Only valid if auth_ids_set is
* true. Set to SLURM_AUTH_NOBODY if not set
* yet.
*/
bool auth_ids_set; /* NEVER PACK. True when auth_uid and auth_gid
* have been set.
* This is a safety measure against handling
* a slurm_msg_t that has been xmalloc()'d but
* slurm_msg_t_init() was not called since
* auth_uid would be root.
*/
uid_t restrict_uid;
bool restrict_uid_set;
uint32_t body_offset; /* DON'T PACK: offset in buffer where body part of
buffer starts. */
buf_t *buffer; /* DON'T PACK! ptr to buffer that msg was
* unpacked from. */
persist_conn_t *conn; /* DON'T PACK OR FREE! this is here to
* distinguish a persistent connection from a
* normal connection. It should be filled in
* with the connection before sending the
* message so that it is handled correctly. */
conmgr_fd_ref_t *conmgr_con; /* msg originates from conmgr connection */
void *data;
uint16_t flags;
uint8_t hash_index; /* DON'T PACK: zero for normal communication.
* index value copied from incoming connection,
* so that we'll respond with the same hash
* plugin used to connect to us originally.
*/
char *tls_cert; /* TLS certificate for server. Only needed when server's
* cert is not already trusted (i.e. signed by a cert in
* our trust store) */
void *tls_conn; /* TLS connection associated with conn_fd used for
* sending this message and receiving a response */
uint16_t msg_type; /* really a slurm_msg_type_t but needs to be
* this way for packing purposes. message type */
uint16_t protocol_version; /* DON'T PACK! Only used if
* message coming from non-default
* slurm protocol. Initted to
* NO_VAL meaning use the default. */
/* The following were all added for the forward.c code */
forward_t forward;
forward_struct_t *forward_struct;
slurm_addr_t orig_addr;
list_t *ret_list;
} slurm_msg_t;
#define SLURM_MSG_INITIALIZER \
{ \
.auth_uid = SLURM_AUTH_NOBODY, \
.auth_gid = SLURM_AUTH_NOBODY, \
.msg_type = NO_VAL16, \
.protocol_version = NO_VAL16, \
.flags = SLURM_PROTOCOL_NO_FLAGS, \
.forward = FORWARD_INITIALIZER, \
}
typedef struct ret_data_info {
uint16_t type; /* really a slurm_msg_type_t but needs to be
* this way for packing purposes. message type */
uint32_t err;
char *node_name;
void *data; /* used to hold the return message data (i.e.
return_code_msg_t */
} ret_data_info_t;
/*****************************************************************************\
* Slurm Protocol Data Structures
\*****************************************************************************/
struct kvs_hosts {
uint32_t task_id; /* job step's task id */
uint16_t port; /* communication port */
char * hostname; /* communication host */
};
struct kvs_comm {
char * kvs_name;
uint32_t kvs_cnt; /* count of key-pairs */
char ** kvs_keys;
char ** kvs_values;
uint16_t * kvs_key_sent;
};
typedef struct kvs_comm_set {
uint16_t host_cnt; /* hosts getting this message */
struct kvs_hosts *kvs_host_ptr; /* host forwarding info */
uint16_t kvs_comm_recs; /* count of kvs_comm entries */
struct kvs_comm **kvs_comm_ptr; /* pointers to kvs_comm entries */
} kvs_comm_set_t;
typedef struct assoc_shares_object {
uint32_t assoc_id; /* association ID */
char *cluster; /* cluster name */
char *name; /* name */
char *parent; /* parent name */
char *partition; /* partition */
double shares_norm; /* normalized shares */
uint32_t shares_raw; /* number of shares allocated */
uint64_t *tres_run_secs; /* currently running tres-secs
* = grp_used_tres_run_secs */
uint64_t *tres_grp_mins; /* tres-minute limit */
double usage_efctv; /* effective, normalized usage */
double usage_norm; /* normalized usage */
uint64_t usage_raw; /* measure of TRESBillableUnits usage */
long double *usage_tres_raw; /* measure of each TRES usage */
double fs_factor; /* fairshare factor */
double level_fs; /* fairshare factor at this level. stored on an
* assoc as a long double, but that is not
* needed for display in sshare */
uint16_t user; /* 1 if user association 0 if account
* association */
} assoc_shares_object_t;
typedef struct shares_request_msg {
list_t *acct_list;
list_t *user_list;
} shares_request_msg_t;
typedef struct shares_response_msg {
list_t *assoc_shares_list; /* list of assoc_shares_object_t *'s */
uint64_t tot_shares;
uint32_t tres_cnt;
char **tres_names;
} shares_response_msg_t;
typedef struct job_notify_msg {
char * message;
slurm_step_id_t step_id;
} job_notify_msg_t;
typedef struct job_id_msg {
uint32_t job_id;
uint16_t show_flags;
} job_id_msg_t;
typedef struct job_user_id_msg {
uint32_t user_id;
uint16_t show_flags;
} job_user_id_msg_t;
typedef struct job_info_request_msg {
time_t last_update;
uint16_t show_flags;
list_t *job_ids; /* Optional list of job_ids, otherwise show all
* jobs. */
} job_info_request_msg_t;
typedef struct {
uint32_t count;
slurm_selected_step_t *job_ids;
} job_state_request_msg_t;
typedef struct {
uint16_t show_flags;
char *container_id;
uint32_t uid; /* optional filter by UID */
} container_id_request_msg_t;
typedef struct {
list_t *steps; /* list of slurm_step_id_t* */
} container_id_response_msg_t;
typedef struct job_step_info_request_msg {
time_t last_update;
slurm_step_id_t step_id;
uint16_t show_flags;
} job_step_info_request_msg_t;
typedef struct node_info_request_msg {
time_t last_update;
uint16_t show_flags;
} node_info_request_msg_t;
typedef struct node_info_single_msg {
char *node_name;
uint16_t show_flags;
} node_info_single_msg_t;
typedef struct part_info_request_msg {
time_t last_update;
uint16_t show_flags;
} part_info_request_msg_t;
typedef struct resv_info_request_msg {
time_t last_update;
} resv_info_request_msg_t;
typedef struct complete_job_allocation {
uint32_t job_id;
uint32_t job_rc;
} complete_job_allocation_msg_t;
typedef struct complete_batch_script {
jobacctinfo_t *jobacct;
uint32_t job_id;
uint32_t job_rc;
uint32_t slurm_rc;
char *node_name;
uint32_t user_id; /* user the job runs as */
} complete_batch_script_msg_t;
typedef struct complete_prolog {
uint32_t job_id;
char *node_name;
uint32_t prolog_rc;
} complete_prolog_msg_t;
typedef struct step_complete_msg {
uint32_t range_first; /* First node rank within job step's alloc */
uint32_t range_last; /* Last node rank within job step's alloc */
slurm_step_id_t step_id;
uint32_t step_rc; /* largest task return code */
jobacctinfo_t *jobacct;
bool send_to_stepmgr;
} step_complete_msg_t;
typedef struct signal_tasks_msg {
uint16_t flags;
uint16_t signal;
slurm_step_id_t step_id;
} signal_tasks_msg_t;
typedef struct epilog_complete_msg {
uint32_t job_id;
uint32_t return_code;
char *node_name;
} epilog_complete_msg_t;
#define REBOOT_FLAGS_ASAP 0x0001 /* Drain to reboot ASAP */
typedef struct reboot_msg {
char *features;
uint16_t flags;
uint32_t next_state; /* state after reboot */
char *node_list;
char *reason;
} reboot_msg_t;
typedef struct shutdown_msg {
uint16_t options;
} shutdown_msg_t;
typedef enum {
SLURMCTLD_SHUTDOWN_ALL = 0, /* all slurm daemons are shutdown */
/* 1 unused */
SLURMCTLD_SHUTDOWN_CTLD = 2, /* slurmctld only (no core file) */
} slurmctld_shutdown_type_t;
typedef struct last_update_msg {
time_t last_update;
} last_update_msg_t;
typedef struct set_debug_flags_msg {
uint64_t debug_flags_minus;
uint64_t debug_flags_plus;
} set_debug_flags_msg_t;
typedef struct set_debug_level_msg {
uint32_t debug_level;
} set_debug_level_msg_t;
typedef struct job_step_specs {
char *container; /* OCI container bundle path */
char *container_id; /* OCI container ID */
uint32_t cpu_count; /* count of required processors */
uint32_t cpu_freq_gov; /* cpu frequency governor */
uint32_t cpu_freq_max; /* Maximum cpu frequency */
uint32_t cpu_freq_min; /* Minimum cpu frequency */
char *cpus_per_tres; /* semicolon delimited list of TRES=# values */
uint16_t ntasks_per_core; /* number of tasks that can access each cpu */
uint16_t ntasks_per_tres;/* number of tasks that can access each gpu */
char *exc_nodes; /* comma separated list of nodes excluded
* from step's allocation, default NONE */
char *features; /* required node features, default NONE */
uint32_t flags; /* various flags from step_spec_flags_t */
char *host; /* host to contact initiating srun */
uint16_t immediate; /* 1 if allocate to run or fail immediately,
* 0 if to be queued awaiting resources */
uint64_t pn_min_memory; /* minimum real memory per node OR
* real memory per CPU | MEM_PER_CPU,
* default=0 (use job limit) */
char *name; /* name of the job step, default "" */
char *network; /* network use spec */
uint32_t min_nodes; /* minimum number of nodes required by job,
* default=0 */
uint32_t max_nodes; /* maximum number of nodes usable by job,
* default=0 */
char *mem_per_tres; /* semicolon delimited list of TRES=# values */
char *node_list; /* list of required nodes */
uint32_t num_tasks; /* number of tasks required */
uint16_t plane_size; /* plane size when task_dist =
SLURM_DIST_PLANE */
uint16_t port; /* port to contact initiating srun */
uint16_t relative; /* first node to use of job's allocation */
uint16_t resv_port_cnt; /* reserve ports for MPI if set */
uint32_t step_het_comp_cnt; /* How many het components in the step. Used
* for a het step inside a non-het job
* allocation. */
char *step_het_grps; /* what het groups are used by step */
slurm_step_id_t step_id;
uint32_t array_task_id; /* Array Task Id, or NO_VAL */
uint32_t srun_pid; /* PID of srun command, also see host */
char *cwd; /* path derived from cwd or --chdir */
char *std_err; /* pathname of step stderr */
char *std_in; /* pathname of step stdin */
char *std_out; /* pathname of step stdout */
char *submit_line; /* The command issued with all it's options in a
* string */
uint32_t task_dist; /* see enum task_dist_state in slurm.h */
uint32_t time_limit; /* maximum run time in minutes, default is
* partition limit */
uint16_t threads_per_core; /* step requested threads-per-core */
char *tres_bind; /* Task to TRES binding directives */
char *tres_freq; /* TRES frequency directives */
char *tres_per_step; /* semicolon delimited list of TRES=# values */
char *tres_per_node; /* semicolon delimited list of TRES=# values */
char *tres_per_socket; /* semicolon delimited list of TRES=# values */
char *tres_per_task; /* semicolon delimited list of TRES=# values */
uint32_t user_id; /* user the job runs as */
} job_step_create_request_msg_t;
typedef struct job_step_create_response_msg {
uint32_t def_cpu_bind_type; /* Default CPU bind type */
uint32_t job_id; /* assigned job id */
uint32_t job_step_id; /* assigned job step id */
char *resv_ports; /* reserved ports */
slurm_step_layout_t *step_layout; /* information about how the
* step is laid out */
char *stepmgr;
slurm_cred_t *cred; /* slurm job credential */
dynamic_plugin_data_t *switch_step; /* switch opaque data type
* Remove 3 versions after 24.11 */
uint16_t use_protocol_ver; /* Lowest protocol version running on
* the slurmd's in this step.
*/
} job_step_create_response_msg_t;
#define LAUNCH_PARALLEL_DEBUG SLURM_BIT(0)
#define LAUNCH_MULTI_PROG SLURM_BIT(1)
#define LAUNCH_PTY SLURM_BIT(2)
#define LAUNCH_BUFFERED_IO SLURM_BIT(3)
#define LAUNCH_LABEL_IO SLURM_BIT(4)
#define LAUNCH_EXT_LAUNCHER SLURM_BIT(5)
#define LAUNCH_NO_ALLOC SLURM_BIT(6)
#define LAUNCH_OVERCOMMIT SLURM_BIT(7)
#define LAUNCH_NO_SIG_FAIL SLURM_BIT(8)
#define LAUNCH_GRES_ALLOW_TASK_SHARING SLURM_BIT(9)
#define LAUNCH_WAIT_FOR_CHILDREN SLURM_BIT(10)
#define LAUNCH_KILL_ON_BAD_EXIT SLURM_BIT(11)
typedef struct launch_tasks_request_msg {
uint32_t het_job_node_offset; /* Hetjob node offset or NO_VAL */
uint32_t het_job_id; /* Hetjob ID or NO_VAL */
uint32_t het_job_nnodes; /* total node count for entire hetjob */
uint32_t het_job_ntasks; /* total task count for entire hetjob */
uint16_t *het_job_task_cnts; /* Tasks count on each node in hetjob */
uint32_t *het_job_step_task_cnts; /* ntasks on each comp of hetjob */
uint32_t **het_job_tids; /* Task IDs on each node of hetjob */
uint32_t *het_job_tid_offsets; /* map of tasks (by id) to originating
* hetjob */
uint32_t het_job_offset; /* Hetjob offset or NO_VAL */
uint32_t het_job_step_cnt; /* number of steps for entire hetjob */
uint32_t het_job_task_offset; /* Hetjob task ID offset or NO_VAL */
char *het_job_node_list; /* Hetjob step node list */
uint32_t mpi_plugin_id; /* numeric version of mpi_plugin */
uint32_t nnodes; /* number of nodes in this job step */
uint32_t ntasks; /* number of tasks in this job step */
uint16_t ntasks_per_board;/* number of tasks to invoke on each board */
uint16_t ntasks_per_core; /* number of tasks to invoke on each core */
uint16_t ntasks_per_tres; /* number of tasks that can access each gpu */
uint16_t ntasks_per_socket;/* number of tasks to invoke on
* each socket */
uint32_t ngids;
uint32_t *gids;
uint64_t job_mem_lim; /* MB of memory reserved by job per node OR
* real memory per CPU | MEM_PER_CPU,
* default=0 (no limit) */
slurm_step_id_t step_id;
uint64_t step_mem_lim; /* MB of memory reserved by step */
uint16_t *tasks_to_launch;
uint32_t envc;
uint32_t argc;
uint16_t node_cpus;
uint16_t cpus_per_task;
uint16_t *cpt_compact_array; /* Compressed per-node cpus_per_task.
* Index with slurm_get_rep_count_inx() */
uint32_t cpt_compact_cnt; /* number of elements in cpt_compact arrays */
uint32_t *cpt_compact_reps; /* number of consecutive nodes on which a
* value in cpt_compact_array is
* duplicated */
uint16_t threads_per_core;
char *tres_per_task; /* semicolon delimited list of TRES=# values */
char **env;
char **argv;
char *container; /* OCI Container Bundle Path */
char *cwd;
uint16_t cpu_bind_type; /* --cpu-bind= */
char *cpu_bind; /* binding map for map/mask_cpu */
uint16_t mem_bind_type; /* --mem-bind= */
char *mem_bind; /* binding map for tasks to memory */
uint16_t accel_bind_type; /* --accel-bind= */
char *tres_bind; /* task binding to TRES (e.g. GPUs) */
char *tres_freq; /* frequency/power for TRES (e.g. GPUs) */
uint16_t num_resp_port;
uint16_t *resp_port; /* array of available response ports */
/* Distribution at the lowest level of logical processor (lllp) */
uint32_t task_dist; /* --distribution=, -m dist */
uint32_t flags; /* See LAUNCH_* flags defined above */
uint32_t **global_task_ids;
slurm_addr_t orig_addr; /* where message really came from for io */
uint8_t open_mode; /* stdout/err append or truncate */
char *acctg_freq; /* accounting polling intervals */
uint32_t cpu_freq_min; /* Minimum cpu frequency */
uint32_t cpu_freq_max; /* Maximum cpu frequency */
uint32_t cpu_freq_gov; /* cpu frequency governor */
uint16_t job_core_spec; /* Count of specialized cores */
/********** START "normal" IO only options **********/
char *ofname; /* stdout filename pattern */
char *efname; /* stderr filename pattern */
char *ifname; /* stdin filename pattern */
uint16_t num_io_port;
uint16_t *io_port; /* array of available client IO listen ports */
/********** END "normal" IO only options **********/
char *alloc_tls_cert; /* TLS certificate for step IO */
uint32_t profile;
char *task_prolog;
char *task_epilog;
uint16_t slurmd_debug; /* remote slurmd debug level */
uint16_t cred_version; /* job credential protocol_version */
slurm_cred_t *cred; /* job credential */
dynamic_plugin_data_t *switch_step; /* switch credential for the job
* Remove 3 versions after 24.11 */
list_t *options; /* Arbitrary job options */
char *complete_nodelist;
char **spank_job_env;
uint32_t spank_job_env_size;
/* only filled out if step is SLURM_EXTERN_CONT */
uint16_t x11; /* X11 forwarding setup flags */
char *x11_alloc_host; /* host to proxy through */
uint16_t x11_alloc_port; /* port to proxy through */
char *x11_magic_cookie; /* X11 auth cookie to abuse */
char *x11_target; /* X11 target host, or unix socket */
uint16_t x11_target_port; /* X11 target port */
/* To send to stepmgr */
job_record_t *job_ptr;
list_t *job_node_array;
part_record_t *part_ptr;
char *stepmgr; /* Hostname of stepmgr */
bool oom_kill_step;
} launch_tasks_request_msg_t;
typedef struct partition_info partition_desc_msg_t;
typedef struct return_code_msg {
uint32_t return_code;
} return_code_msg_t;
typedef struct return_code2_msg {
uint32_t return_code;
char *err_msg;
} return_code2_msg_t;
typedef struct {
char *stepmgr;
slurmdb_cluster_rec_t *working_cluster_rec;
} reroute_msg_t;
/* defined in slurm.h
typedef struct network_callerid_msg {
unsigned char ip_src[16];
unsigned char ip_dst[16];
uint32_t port_src;
uint32_t port_dst;
int32_t af; // NOTE: un/packed as uint32_t
} network_callerid_msg_t; */
typedef struct network_callerid_resp {
uint32_t job_id;
uint32_t return_code;
char *node_name;
} network_callerid_resp_t;
typedef struct set_fs_dampening_factor_msg {
uint16_t dampening_factor;
} set_fs_dampening_factor_msg_t;
typedef struct control_status_msg {
uint16_t backup_inx; /* Our BackupController# index,
* between 0 and (MAX_CONTROLLERS-1) */
time_t control_time; /* Time we became primary slurmctld (or 0) */
} control_status_msg_t;
#define SIG_OOM 253 /* Dummy signal value for out of memory
* (OOM) notification. Exit status reported as
* 0:125 (0x80 is the signal flag and
* 253 - 128 = 125) */
#define SIG_TERM_KILL 991 /* Send SIGCONT + SIGTERM + SIGKILL */
#define SIG_UME 992 /* Dummy signal value for uncorrectable memory
* error (UME) notification */
#define SIG_REQUEUED 993 /* Dummy signal value to job requeue */
#define SIG_PREEMPTED 994 /* Dummy signal value for job preemption */
#define SIG_DEBUG_WAKE 995 /* Dummy signal value to wake procs stopped
* for debugger */
#define SIG_TIME_LIMIT 996 /* Dummy signal value for time limit reached */
#define SIG_ABORT 997 /* Dummy signal value to abort a job */
#define SIG_NODE_FAIL 998 /* Dummy signal value to signify node failure */
#define SIG_FAILURE 999 /* Dummy signal value to signify sys failure */
typedef struct kill_job_msg {
slurm_cred_t *cred;
char *details;
uint32_t derived_ec;
uint32_t exit_code;
uint32_t het_job_id;
list_t *job_gres_prep; /* Used to set Epilog environment variables */
uint32_t job_state;
uint32_t job_uid;
uint32_t job_gid;
char *nodes; /* Used for reliable cleanup on XCPU systems. */
char **spank_job_env;
uint32_t spank_job_env_size;
time_t start_time; /* time of job start, track job requeue */
slurm_step_id_t step_id;
time_t time; /* slurmctld's time of request */
char *work_dir;
} kill_job_msg_t;
typedef struct reattach_tasks_request_msg {
char *io_key;
uint16_t num_resp_port;
uint16_t *resp_port; /* array of available response ports */
uint16_t num_io_port;
uint16_t *io_port; /* array of available client IO ports */
slurm_step_id_t step_id;
char *tls_cert;
} reattach_tasks_request_msg_t;
typedef struct reattach_tasks_response_msg {
char *node_name;
uint32_t return_code;
uint32_t ntasks; /* number of tasks on this node */
uint32_t *gtids; /* Global task id assignments */
uint32_t *local_pids; /* list of process ids on this node */
char **executable_names; /* array of length "ntasks" */
} reattach_tasks_response_msg_t;
typedef struct prolog_launch_msg {
char *alloc_tls_cert; /* TLS certificate for client that is
* getting/has allocation
* (srun, salloc,etc.) */
slurm_cred_t *cred;
uint32_t gid;
uint32_t het_job_id; /* HetJob id or NO_VAL */
list_t *job_gres_prep; /* Used to set Prolog env vars */
uint32_t job_id; /* slurm job_id */
uint64_t job_mem_limit; /* job's memory limit, passed via cred */
uint32_t nnodes; /* count of nodes, passed via cred */
char *nodes; /* list of nodes allocated to job_step */
char **spank_job_env; /* SPANK job environment variables */
uint32_t spank_job_env_size; /* size of spank_job_env */
uint32_t uid;
char *work_dir; /* full pathname of working directory */
uint16_t x11; /* X11 forwarding setup flags */
char *x11_alloc_host; /* srun/salloc host to setup proxy */
uint16_t x11_alloc_port; /* srun/salloc port to setup proxy */
char *x11_magic_cookie; /* X11 auth cookie to abuse */
char *x11_target; /* X11 target host, or unix socket */
uint16_t x11_target_port; /* X11 target port */
/* To send to stepmgr */
job_record_t *job_ptr;
buf_t *job_ptr_buf;
list_t *job_node_array; /* node_record_t array of size
* job_ptr->node_cnt for stepmgr. */
buf_t *job_node_array_buf;
part_record_t *part_ptr;
buf_t *part_ptr_buf;
} prolog_launch_msg_t;
typedef struct batch_job_launch_msg {
char *account; /* account under which the job is running */
char *acctg_freq; /* accounting polling intervals */
uint32_t array_job_id; /* job array master job ID */
uint32_t array_task_id; /* job array ID or NO_VAL */
char *container; /* OCI Container Bundle path */
uint32_t cpu_freq_min; /* Minimum cpu frequency */
uint32_t cpu_freq_max; /* Maximum cpu frequency */
uint32_t cpu_freq_gov; /* cpu frequency governor */
uint32_t het_job_id;
uint32_t job_id;
uint32_t ngids;
uint32_t *gids;
uint32_t ntasks; /* number of tasks in this job */
uint32_t num_cpu_groups;/* elements in below cpu arrays */
uint16_t cpu_bind_type; /* This currently does not do anything
* but here in case we wanted to bind
* the batch step differently than
* using all the cpus in the
* allocation. */
char *cpu_bind; /* This currently does not do anything
* but here in case we wanted to bind
* the batch step differently than
* using all the cpus in the
* allocation. */
uint16_t *cpus_per_node;/* cpus per node */
uint32_t *cpu_count_reps;/* how many nodes have same cpu count */
uint16_t cpus_per_task; /* number of CPUs requested per task */
uint16_t job_core_spec; /* Count of specialized cores */
char *nodes; /* list of nodes allocated to job_step */
uint32_t profile; /* what to profile for the batch step */
char *script; /* the actual job script, default NONE */
buf_t *script_buf; /* the job script as a mmap buf */
char *std_err; /* pathname of stderr */
char *std_in; /* pathname of stdin */
char *qos; /* qos the job is running under */
char *std_out; /* pathname of stdout */
char *work_dir; /* full pathname of working directory */
uint32_t argc;
char **argv;
uint32_t envc; /* element count in environment */
char **environment; /* environment variables to set for job,
* name=value pairs, one per line */
uint16_t cred_version; /* job credential protocol_version */
slurm_cred_t *cred;
uint8_t open_mode; /* stdout/err append or truncate */
uint8_t overcommit; /* if resources being over subscribed */
char *partition; /* partition used to run job */
uint64_t pn_min_memory; /* minimum real memory per node OR
* real memory per CPU | MEM_PER_CPU,
* default=0 (no limit) */
uint64_t job_mem; /* memory limit for job */
uint16_t restart_cnt; /* batch job restart count */
char *resv_name; /* job's reservation */
char **spank_job_env; /* SPANK job environment variables */
uint32_t spank_job_env_size; /* size of spank_job_env */
char *tres_bind; /* task binding to TRES (e.g. GPUs),
* included for possible future use */
char *tres_freq; /* frequency/power for TRES (e.g. GPUs) */
char *tres_per_task; /* semicolon delimited list of TRES=# values */
bool oom_kill_step;
} batch_job_launch_msg_t;
typedef struct job_id_request_msg {
uint32_t job_pid; /* local process_id of a job */
} job_id_request_msg_t;
typedef struct job_id_response_msg {
uint32_t job_id; /* slurm job_id */
uint32_t return_code; /* slurm return code */
} job_id_response_msg_t;
typedef enum {
CONFIG_REQUEST_SLURM_CONF = 0,
CONFIG_REQUEST_SLURMD,
CONFIG_REQUEST_SACKD,
} config_request_flags_t;
typedef struct {
uint32_t flags; /* see config_request_flags_t */
uint16_t port; /* sackd port to push conf changes to */
} config_request_msg_t;
typedef struct {
bool exists;
bool execute;
char *file_name;
char *file_content;
/* Do not pack - used for local caching for configless clients */
int memfd_fd;
char *memfd_path;
} config_file_t;
typedef struct {
list_t *config_files;
char *slurmd_spooldir;
} config_response_msg_t;
typedef struct kvs_get_msg {
uint32_t task_id; /* job step's task id */
uint32_t size; /* count of tasks in job */
uint16_t port; /* port to be sent the kvs data */
char * hostname; /* hostname to be sent the kvs data */
} kvs_get_msg_t;
enum compress_type {
COMPRESS_OFF = 0, /* no compression */
/* = 1 was zlib */
COMPRESS_LZ4 = 2, /* lz4 compression */
};
typedef enum {
FILE_BCAST_NONE = 0, /* No flags set */
FILE_BCAST_FORCE = 1 << 0, /* replace existing file */
FILE_BCAST_LAST_BLOCK = 1 << 1, /* last file block */
FILE_BCAST_SO = 1 << 2, /* shared object */
FILE_BCAST_EXE = 1 << 3, /* executable ahead of shared object */
} file_bcast_flags_t;
typedef struct file_bcast_msg {
char *fname; /* name of the destination file */
char *exe_fname; /* name of the executable file */
uint32_t block_no; /* block number of this data */
uint16_t compress; /* compress file if set, use compress_type */
uint16_t flags; /* flags from file_bcast_flags_t */
uint16_t modes; /* access rights for destination file */
uint32_t uid; /* owner for destination file */
char *user_name;
uint32_t gid; /* group for destination file */
time_t atime; /* last access time for destination file */
time_t mtime; /* last modification time for dest file */
sbcast_cred_t *cred; /* credential for the RPC */
uint32_t block_len; /* length of this data block */
uint64_t block_offset; /* offset for this data block */
uint32_t uncomp_len; /* uncompressed length of this data block */
char *block; /* data for this block */
uint64_t file_size; /* file size */
} file_bcast_msg_t;
typedef struct multi_core_data {
uint16_t boards_per_node; /* boards per node required by job */
uint16_t sockets_per_board; /* sockets per board required by job */
uint16_t sockets_per_node; /* sockets per node required by job */
uint16_t cores_per_socket; /* cores per cpu required by job */
uint16_t threads_per_core; /* threads per core required by job */
uint16_t ntasks_per_board; /* number of tasks to invoke on each board */
uint16_t ntasks_per_socket; /* number of tasks to invoke on each socket */
uint16_t ntasks_per_core; /* number of tasks to invoke on each core */
uint16_t plane_size; /* plane size when task_dist = SLURM_DIST_PLANE */
} multi_core_data_t;
typedef struct pty_winsz {
uint16_t cols;
uint16_t rows;
} pty_winsz_t;
typedef struct forward_data_msg {
char *address;
uint32_t len;
char *data;
} forward_data_msg_t;
/* suspend_msg_t variant for internal slurm daemon communications */
typedef struct suspend_int_msg {
uint32_t job_id; /* slurm job_id */
uint16_t op; /* suspend operation, see enum suspend_opts */
} suspend_int_msg_t;
typedef struct ping_slurmd_resp_msg {
uint32_t cpu_load; /* CPU load * 100 */
uint64_t free_mem; /* Free memory in MiB */
} ping_slurmd_resp_msg_t;
typedef struct license_info_request_msg {
time_t last_update;
uint16_t show_flags;
} license_info_request_msg_t;
typedef struct bb_status_req_msg {
uint32_t argc;
char **argv;
} bb_status_req_msg_t;
typedef struct bb_status_resp_msg {
char *status_resp;
} bb_status_resp_msg_t;
typedef struct {
uint32_t uid;
} crontab_request_msg_t;
typedef struct {
char *crontab;
char *disabled_lines;
} crontab_response_msg_t;
typedef struct {
char *crontab;
list_t *jobs;
uint32_t uid;
uint32_t gid;
} crontab_update_request_msg_t;
typedef struct {
char *csr;
char *node_name;
char *token;
} tls_cert_request_msg_t;
typedef struct {
char *signed_cert;
} tls_cert_response_msg_t;
typedef enum {
DYN_NODE_NONE = 0,
DYN_NODE_FUTURE,
DYN_NODE_NORM,
} dynamic_node_type_t;
/*****************************************************************************\
* Slurm API Message Types
\*****************************************************************************/
typedef struct slurm_node_registration_status_msg {
char *arch;
uint16_t cores;
uint16_t cpus;
uint32_t cpu_load; /* CPU load * 100 */
uint8_t dynamic_type; /* dynamic registration type */
char *dynamic_conf; /* dynamic configuration */
char *dynamic_feature; /* dynamic registration feature */
uint16_t flags; /* Flags from the slurmd SLURMD_REG_FLAG_* */
uint64_t free_mem; /* Free memory in MiB */
char *cpu_spec_list; /* list of specialized CPUs */
acct_gather_energy_t *energy;
char *extra; /* arbitrary string */
char *features_active; /* Currently active node features */
char *features_avail; /* Available node features */
buf_t *gres_info; /* generic resource info */
uint32_t hash_val; /* hash value of slurm.conf and included files
* existing on node */
char *hostname; /* hostname of slurmd */
char *instance_id; /* cloud instance id */
char *instance_type; /* cloud instance type */
uint32_t job_count; /* number of associate job_id's */
uint64_t mem_spec_limit; /* memspec detected by the node */
char *node_name;
uint16_t boards;
char *os;
uint64_t real_memory;
time_t slurmd_start_time;
uint32_t status; /* node status code, same as return codes */
slurm_step_id_t *step_id; /* IDs of running job steps (if any) */
uint16_t sockets;
uint16_t threads;
time_t timestamp;
uint32_t tmp_disk;
uint32_t up_time; /* seconds since reboot */
char *version;
} slurm_node_registration_status_msg_t;
typedef struct slurm_node_reg_resp_msg {
char *node_name;
list_t *tres_list;
} slurm_node_reg_resp_msg_t;
typedef struct requeue_msg {
uint32_t job_id; /* slurm job ID (number) */
char * job_id_str; /* slurm job ID (string) */
uint32_t flags; /* JobExitRequeue | Hold | JobFailed | etc. */
} requeue_msg_t;
typedef struct {
uint32_t cluster_id; /* cluster id of cluster making request */
void *data; /* Unpacked buffer
* Only populated on the receiving side. */
buf_t *data_buffer; /* Buffer that holds an unpacked data type.
* Only populated on the sending side. */
uint32_t data_offset; /* DON'T PACK: offset where body part of buffer
* starts -- the part that gets sent. */
uint16_t data_type; /* date type to unpack */
uint16_t data_version; /* Version that data is packed with */
uint64_t fed_siblings; /* sibling bitmap of job */
uint32_t group_id; /* gid of submitted job */
uint32_t job_id; /* job_id of job - set in job_desc on receiving
* side */
uint32_t job_state; /* state of job */
uint32_t return_code; /* return code of job */
time_t start_time; /* time sibling job started */
char *resp_host; /* response host for interactive allocations */
uint32_t req_uid; /* uid of user making the request. e.g if a
cancel is happening from a user and being
passed to a remote then the uid will be the
user and not the SlurmUser. */
uint16_t sib_msg_type; /* fed_job_update_type */
char *submit_host; /* node job was submitted from */
uint16_t submit_proto_ver; /* protocol version of submission client */
uint32_t user_id; /* uid of submitted job */
} sib_msg_t;
typedef struct {
uint32_t array_job_id;
uint32_t array_task_id;
char *dependency;
bool is_array;
uint32_t job_id;
char *job_name;
uint32_t user_id;
} dep_msg_t;
typedef struct {
list_t *depend_list;
uint32_t job_id;
} dep_update_origin_msg_t;
typedef struct {
list_t *my_list; /* this list could be of any type as long as it
* is handled correctly on both ends */
} ctld_list_msg_t;
/*****************************************************************************\
* ACCOUNTING PUSHS
\*****************************************************************************/
typedef struct {
list_t *update_list; /* of type slurmdb_update_object_t *'s */
uint16_t rpc_version;
} accounting_update_msg_t;
typedef slurm_conf_t slurm_ctl_conf_info_msg_t;
/*****************************************************************************\
* Container RPCs
\*****************************************************************************/
typedef enum {
CONTAINER_ST_INVALID = 0,
/*
* UNKNOWN: initial state
*/
CONTAINER_ST_UNKNOWN = 0xae00,
/*
* CREATING:
* Official OCI state.
*
* waiting for pty allocation
* waiting for srun exec process to be ready
* waiting for job allocation
* waiting for staging plugin push to complete
*/
CONTAINER_ST_CREATING,
/*
* CREATED:
* Official OCI state.
*
* job allocated (no steps yet)
* staging plugin push done
*/
CONTAINER_ST_CREATED,
/*
* STARTING:
* waiting for srun to start step
*/
CONTAINER_ST_STARTING,
/*
* RUNNING:
* Official OCI state.
*
* job allocated and step started
*/
CONTAINER_ST_RUNNING,
/*
* STOPPING:
* waiting for step to end
* waiting for staging plugin pull to complete
* waiting for job to end
*/
CONTAINER_ST_STOPPING,
/*
* STOPPED:
* Official OCI state
*
* job and step complete
* staging plugin pull complete
* anchor exits here
*/
CONTAINER_ST_STOPPED,
CONTAINER_ST_MAX /* place holder */
} container_state_msg_status_t;
extern const char *slurm_container_status_to_str(
container_state_msg_status_t status);
typedef struct {
/* every field required by OCI runtime-spec v1.0.2 state query */
char *oci_version;
char *id; /* container-id */
container_state_msg_status_t status; /* current status */
uint32_t pid; /* pid of anchor process */
char *bundle; /* path to OCI container bundle */
list_t *annotations; /* List of config_key_pair_t */
} container_state_msg_t;
/*
* Create and init new container state message
* RET ptr to message (must free with slurm_destroy_container_state_msg())
*/
extern container_state_msg_t *slurm_create_container_state_msg(void);
extern void slurm_destroy_container_state_msg(container_state_msg_t *msg);
typedef struct {
uint32_t signal;
} container_signal_msg_t;
typedef struct {
bool force;
} container_delete_msg_t;
typedef struct {
uint32_t rc;
slurm_step_id_t step;
} container_started_msg_t;
typedef struct {
char *args;
char *env;
} container_exec_msg_t;
extern void slurm_destroy_container_exec_msg(container_exec_msg_t *msg);
/*****************************************************************************\
* SLURM MESSAGE INITIALIZATION
\*****************************************************************************/
/*
* slurm_msg_t_init - initialize a slurm message
* OUT msg - pointer to the slurm_msg_t structure which will be initialized
*/
extern void slurm_msg_t_init (slurm_msg_t *msg);
/*
* slurm_msg_t_copy - initialize a slurm_msg_t structure "dest" with
* values from the "src" slurm_msg_t structure.
* IN src - Pointer to the initialized message from which "dest" will
* be initialized.
* OUT dest - Pointer to the slurm_msg_t which will be initialized.
* NOTE: the "dest" structure will contain pointers into the contents of "src".
*/
extern void slurm_msg_t_copy(slurm_msg_t *dest, slurm_msg_t *src);
/* here to add \\ to all \" in a string this needs to be xfreed later */
extern char *slurm_add_slash_to_quotes(char *str);
extern list_t *slurm_copy_char_list(list_t *char_list);
extern int slurm_parse_char_list(
list_t *char_list, char *names, void *args,
int (*func_ptr)(list_t *char_list, char *name, void *args));
extern int slurm_addto_char_list(list_t *char_list, char *names);
extern int slurm_addto_char_list_with_case(list_t *char_list, char *names,
bool lower_case_normalization);
extern int slurm_addto_id_char_list(list_t *char_list, char *names, bool gid);
extern int slurm_addto_mode_char_list(list_t *char_list, char *names, int mode);
extern int slurm_addto_step_list(list_t *step_list, char *names);
extern int slurm_char_list_copy(list_t *dst, list_t *src);
extern char *slurm_char_list_to_xstr(list_t *char_list);
extern void slurm_copy_node_alias_addrs_members(slurm_node_alias_addrs_t *dest,
slurm_node_alias_addrs_t *src);
extern int slurm_find_char_exact_in_list(void *x, void *key);
extern int slurm_find_char_in_list(void *x, void *key);
extern int slurm_find_ptr_in_list(void *x, void *key);
extern int slurm_find_uint16_in_list(void *x, void *key);
extern int slurm_find_uint32_in_list(void *x, void *key);
extern int slurm_find_uint64_in_list(void *x, void *key);
extern int slurm_find_uint_in_list(void *x, void *key);
extern int slurm_find_int_in_list(void *x, void *key);
extern int slurm_find_int64_in_list(void *x, void *key);
extern void slurm_remove_char_list_from_char_list(list_t *haystack,
list_t *needles);
extern int slurm_sort_time_list_asc(const void *, const void *);
extern int slurm_sort_time_list_desc(const void *, const void *);
extern int slurm_sort_uint16_list_asc(const void *, const void *);
extern int slurm_sort_uint16_list_desc(const void *, const void *);
extern int slurm_sort_uint32_list_asc(const void *, const void *);
extern int slurm_sort_uint32_list_desc(const void *, const void *);
extern int slurm_sort_uint64_list_asc(const void *, const void *);
extern int slurm_sort_uint64_list_desc(const void *, const void *);
extern int slurm_sort_int_list_asc(const void *, const void *);
extern int slurm_sort_int_list_desc(const void *, const void *);
extern int slurm_sort_int64_list_asc(const void *, const void *);
extern int slurm_sort_int64_list_desc(const void *, const void *);
extern int slurm_sort_char_list_asc(void *, void *);
extern int slurm_sort_char_list_desc(void *, void *);
extern char **slurm_char_array_copy(int n, char **src);
/*
* Sort an unordered node_list string and remove duplicate node names.
*
* Returns an xmalloc'd node_list that is sorted.
* Caller must xfree() return value.
*/
extern char *slurm_sort_node_list_str(char *node_list);
/*
* For each token in a comma delimited job array expression set the matching
* bitmap entry.
*
* IN tok - An array expression. For example: "[1,3-5,8]"
* IN array_bitmap - Matching entries in tok are set in this bitmap.
* IN max - maximum size of the job array.
* RET true if tok is a valid array expression, v
*/
extern bool slurm_parse_array_tok(char *tok, bitstr_t *array_bitmap,
uint32_t max);
/*
* Take a string representation of an array range or comma separated values
* and translate that into a bitmap.
*
* IN str - An array expression. For example: "[1,3-5,8]"
* IN max_array_size - maximum size of the job array.
* OUT i_last_p - (optional) if not NULL, set the value pointed to by i_last_p
* to the last bit set in the array bitmap. This is only changed
* if the string was successfully parsed.
* RET array_bitmap if successful, NULL otherwise.
*/
extern bitstr_t *slurm_array_str2bitmap(char *str, uint32_t max_array_size,
int32_t *i_last_p);
/*
* Take a string identifying any part of a job and parses it into an id
*
* Formats parsed:
* 0000 - JobId
* 0000+0000 - HetJob
* 0000+0000.0000 - HetJob Step
* 0000_0000 - Array Job
* 0000_0000.0000 - Array Job Step
* 0000.0000 - Job Step
* 0000.0000+0000 - Job HetStep
*
* Rejected formats:
* 0000_0000+0000 Array HetJob (not permitted)
* 0000+0000.0000+0000 HetJob with HetStep (not permitted)
*
* IN src - identifier string
* IN/OUT id - ptr to id to be populated.
* All values are always set to NO_VAL and then populated as parsed.
* (Errors during parsing may result in partially populated ID.)
* IN max_array_size - Maximum size of a job array. May be 0 or NO_VAL if
* job array expressions are not expected.
* RET SLURM_SUCCESS or error
*/
extern int unfmt_job_id_string(const char *src, slurm_selected_step_t *id,
uint32_t max_array_size);
/*
* Dump id into string identifying a part of a job.
* Dumps same formats as unfmt_job_id_string() parsed.
* IN id - job identifier to dump
* IN/OUT dst - ptr to string to populate.
* *dst must always be NULL when called.
* Caller must xfree(*dst).
* RET SLURM_SUCCESS or error
*/
extern int fmt_job_id_string(slurm_selected_step_t *id, char **dst);
extern slurm_selected_step_t *slurm_parse_step_str(char *name);
extern resource_allocation_response_msg_t *
slurm_copy_resource_allocation_response_msg(
resource_allocation_response_msg_t *msg);
/* free message functions */
extern void slurm_free_dep_msg(dep_msg_t *msg);
extern void slurm_free_dep_update_origin_msg(dep_update_origin_msg_t *msg);
extern void slurm_free_last_update_msg(last_update_msg_t * msg);
extern void slurm_free_return_code_msg(return_code_msg_t * msg);
extern void slurm_free_return_code2_msg(return_code2_msg_t *msg);
extern void slurm_free_reroute_msg(reroute_msg_t *msg);
extern void slurm_free_job_alloc_info_msg(job_alloc_info_msg_t * msg);
extern void slurm_free_container_id_request_msg(
container_id_request_msg_t *msg);
extern void slurm_free_container_id_response_msg(
container_id_response_msg_t *msg);
extern void slurm_free_job_info_request_msg(job_info_request_msg_t *msg);
extern void slurm_free_job_state_request_msg(job_state_request_msg_t *msg);
extern void slurm_free_job_step_info_request_msg(
job_step_info_request_msg_t *msg);
extern void slurm_free_node_info_request_msg(node_info_request_msg_t *msg);
extern void slurm_free_node_info_single_msg(node_info_single_msg_t *msg);
extern void slurm_free_part_info_request_msg(part_info_request_msg_t *msg);
extern void slurm_free_sib_msg(sib_msg_t *msg);
extern void slurm_free_stats_info_request_msg(stats_info_request_msg_t *msg);
extern void slurm_free_stats_response_msg(stats_info_response_msg_t *msg);
extern void slurm_free_resv_info_request_msg(resv_info_request_msg_t *msg);
extern void slurm_free_set_debug_flags_msg(set_debug_flags_msg_t *msg);
extern void slurm_free_set_debug_level_msg(set_debug_level_msg_t *msg);
extern void slurm_destroy_assoc_shares_object(void *object);
extern void slurm_free_shares_request_msg(shares_request_msg_t *msg);
extern void slurm_free_shares_response_msg(shares_response_msg_t *msg);
extern void slurm_destroy_priority_factors(void *object);
extern void slurm_destroy_priority_factors_object(void *object);
extern void slurm_copy_priority_factors(priority_factors_t *dest,
priority_factors_t *src);
extern void slurm_free_forward_data_msg(forward_data_msg_t *msg);
extern void slurm_free_ping_slurmd_resp(ping_slurmd_resp_msg_t *msg);
#define slurm_free_timelimit_msg(msg) \
slurm_free_kill_job_msg(msg)
extern void slurm_init_reboot_msg(reboot_msg_t * msg, bool clear);
extern void slurm_free_reboot_msg(reboot_msg_t * msg);
extern void slurm_free_shutdown_msg(shutdown_msg_t * msg);
extern void slurm_free_job_desc_msg(job_desc_msg_t * msg);
extern void
slurm_free_node_registration_status_msg(slurm_node_registration_status_msg_t *
msg);
extern void slurm_free_node_reg_resp_msg(
slurm_node_reg_resp_msg_t *msg);
extern void slurm_free_job_info(job_info_t * job);
extern void slurm_free_job_info_members(job_info_t * job);
extern void slurm_free_batch_script_msg(char *msg);
extern void slurm_free_job_id_msg(job_id_msg_t * msg);
extern void slurm_free_job_user_id_msg(job_user_id_msg_t * msg);
extern void slurm_free_job_id_request_msg(job_id_request_msg_t * msg);
extern void slurm_free_job_id_response_msg(job_id_response_msg_t * msg);
extern void slurm_free_config_request_msg(config_request_msg_t *msg);
extern void slurm_free_config_response_msg(config_response_msg_t *msg);
extern void slurm_free_step_id(slurm_step_id_t *msg);
extern void slurm_free_job_launch_msg(batch_job_launch_msg_t * msg);
extern void slurm_free_update_node_msg(update_node_msg_t * msg);
extern void slurm_free_update_part_msg(update_part_msg_t * msg);
extern void slurm_free_delete_part_msg(delete_part_msg_t * msg);
extern void slurm_free_resv_desc_msg_part(resv_desc_msg_t *msg,
uint32_t res_free_flags);
extern void slurm_free_resv_desc_members(resv_desc_msg_t *msg);
extern void slurm_free_resv_desc_msg(resv_desc_msg_t * msg);
extern void slurm_free_resv_name_msg(reservation_name_msg_t * msg);
extern void slurm_free_resv_info_request_msg(resv_info_request_msg_t * msg);
extern void slurm_free_job_step_create_request_msg(
job_step_create_request_msg_t * msg);
extern void slurm_free_job_step_create_response_msg(
job_step_create_response_msg_t *msg);
extern void slurm_free_complete_job_allocation_msg(
complete_job_allocation_msg_t * msg);
extern void slurm_free_prolog_launch_msg(prolog_launch_msg_t * msg);
extern void slurm_free_complete_batch_script_msg(
complete_batch_script_msg_t * msg);
extern void slurm_free_complete_prolog_msg(
complete_prolog_msg_t * msg);
extern void slurm_free_launch_tasks_request_msg(
launch_tasks_request_msg_t * msg);
extern void slurm_free_launch_tasks_response_msg(
launch_tasks_response_msg_t * msg);
extern void slurm_free_task_exit_msg(task_exit_msg_t * msg);
extern void slurm_free_signal_tasks_msg(signal_tasks_msg_t * msg);
extern void slurm_free_reattach_tasks_request_msg(
reattach_tasks_request_msg_t * msg);
extern void slurm_free_reattach_tasks_response_msg(
reattach_tasks_response_msg_t * msg);
extern void slurm_free_kill_job_msg(kill_job_msg_t * msg);
extern void slurm_free_job_step_kill_msg(job_step_kill_msg_t * msg);
extern void slurm_free_kill_jobs_msg(kill_jobs_msg_t *msg);
extern void slurm_free_kill_jobs_resp_job_t(kill_jobs_resp_job_t *job_resp);
extern void slurm_free_kill_jobs_response_msg(kill_jobs_resp_msg_t *msg);
extern void slurm_free_epilog_complete_msg(epilog_complete_msg_t * msg);
extern void slurm_free_srun_job_complete_msg(srun_job_complete_msg_t * msg);
extern void slurm_free_srun_ping_msg(srun_ping_msg_t * msg);
extern void slurm_free_net_forward_msg(net_forward_msg_t *msg);
extern void slurm_free_srun_node_fail_msg(srun_node_fail_msg_t * msg);
extern void slurm_free_srun_step_missing_msg(srun_step_missing_msg_t * msg);
extern void slurm_free_srun_timeout_msg(srun_timeout_msg_t * msg);
extern void slurm_free_srun_user_msg(srun_user_msg_t * msg);
extern void slurm_free_suspend_msg(suspend_msg_t *msg);
extern void slurm_free_suspend_int_msg(suspend_int_msg_t *msg);
extern void slurm_free_top_job_msg(top_job_msg_t *msg);
extern void slurm_free_token_request_msg(token_request_msg_t *msg);
extern void slurm_free_token_response_msg(token_response_msg_t *msg);
extern void slurm_free_update_step_msg(step_update_request_msg_t * msg);
extern void slurm_free_resource_allocation_response_msg_members (
resource_allocation_response_msg_t * msg);
extern void slurm_free_resource_allocation_response_msg (
resource_allocation_response_msg_t * msg);
extern void slurm_free_job_step_create_response_msg(
job_step_create_response_msg_t * msg);
extern void slurm_free_submit_response_response_msg(
submit_response_msg_t * msg);
extern void slurm_free_ctl_conf(slurm_ctl_conf_info_msg_t * config_ptr);
extern void slurm_free_job_info_msg(job_info_msg_t * job_buffer_ptr);
extern void slurm_free_job_step_info_response_msg(
job_step_info_response_msg_t * msg);
extern void slurm_free_job_step_info_members (job_step_info_t * msg);
extern void slurm_free_node_info_msg(node_info_msg_t * msg);
extern void slurm_init_node_info_t(node_info_t * msg, bool clear);
extern void slurm_free_node_info_members(node_info_t * node);
extern void slurm_free_partition_info_msg(partition_info_msg_t * msg);
extern void slurm_free_partition_info_members(partition_info_t * part);
extern void slurm_free_reservation_info_msg(reserve_info_msg_t * msg);
extern void slurm_free_get_kvs_msg(kvs_get_msg_t *msg);
extern void slurm_free_kvs_comm_set(kvs_comm_set_t *msg);
extern void slurm_free_will_run_response_msg(void *data);
extern void slurm_free_reserve_info_members(reserve_info_t * resv);
extern void slurm_free_topo_info_msg(topo_info_response_msg_t *msg);
extern void slurm_free_topo_config_msg(topo_config_response_msg_t *msg);
extern void slurm_free_topo_request_msg(topo_info_request_msg_t *msg);
extern void slurm_free_file_bcast_msg(file_bcast_msg_t *msg);
extern void slurm_free_step_complete_msg(step_complete_msg_t *msg);
extern void slurm_free_job_step_stat(void *object);
extern void slurm_free_job_step_pids(void *object);
extern void slurm_free_acct_gather_node_resp_msg(
acct_gather_node_resp_msg_t *msg);
extern void slurm_free_acct_gather_energy_req_msg(
acct_gather_energy_req_msg_t *msg);
extern void slurm_free_job_notify_msg(job_notify_msg_t * msg);
extern void slurm_free_ctld_multi_msg(ctld_list_msg_t *msg);
extern void slurm_free_accounting_update_msg(accounting_update_msg_t *msg);
extern void slurm_free_requeue_msg(requeue_msg_t *);
extern int slurm_free_msg_data(slurm_msg_type_t type, void *data);
extern void slurm_free_license_info_request_msg(license_info_request_msg_t *msg);
extern uint32_t slurm_get_return_code(slurm_msg_type_t type, void *data);
extern void slurm_free_network_callerid_msg(network_callerid_msg_t *mesg);
extern void slurm_free_network_callerid_resp(network_callerid_resp_t *resp);
extern void slurm_free_node_alias_addrs_members(slurm_node_alias_addrs_t *msg);
extern void slurm_free_node_alias_addrs(slurm_node_alias_addrs_t *msg);
extern void slurm_free_set_fs_dampening_factor_msg(
set_fs_dampening_factor_msg_t *msg);
extern void slurm_free_control_status_msg(control_status_msg_t *msg);
extern void slurm_free_bb_status_req_msg(bb_status_req_msg_t *msg);
extern void slurm_free_bb_status_resp_msg(bb_status_resp_msg_t *msg);
extern void slurm_free_crontab_request_msg(crontab_request_msg_t *msg);
extern void slurm_free_crontab_response_msg(crontab_response_msg_t *msg);
extern void slurm_free_crontab_update_request_msg(
crontab_update_request_msg_t *msg);
extern void slurm_free_crontab_update_response_msg(
crontab_update_response_msg_t *msg);
extern void slurm_free_tls_cert_request_msg(tls_cert_request_msg_t *msg);
extern void slurm_free_tls_cert_response_msg(tls_cert_response_msg_t *msg);
extern void slurm_free_tls_cert_response_msg_members(tls_cert_response_msg_t
*msg);
extern void slurm_free_suspend_exc_update_msg(suspend_exc_update_msg_t *msg);
extern void slurm_free_sbcast_cred_req_msg(sbcast_cred_req_msg_t *msg);
extern const char *preempt_mode_string(uint16_t preempt_mode);
extern uint16_t preempt_mode_num(const char *preempt_mode);
extern char *log_num2string(uint16_t inx);
extern uint16_t log_string2num(const char *name);
/* Translate a burst buffer numeric value to its equivalent state string */
extern char *bb_state_string(uint16_t state);
/* Translate a burst buffer state string to its equivalent numeric value */
extern uint16_t bb_state_num(char *tok);
/* Convert HealthCheckNodeState numeric value to a string.
* Caller must xfree() the return value */
extern char *health_check_node_state_str(uint32_t node_state);
extern char *job_share_string(uint16_t shared);
extern char *job_state_string(uint32_t inx);
extern char *job_state_string_compact(uint32_t inx);
/* Caller must xfree() the return value */
extern char *job_state_string_complete(uint32_t state);
extern uint32_t job_state_num(const char *state_name);
/*
* Returns true is the node's base state is a known base state.
*/
extern bool valid_base_state(uint32_t state);
/*
* Return the string representing a given node base state.
*/
extern const char *node_state_base_string(uint32_t state);
/*
* Return the string representing a single node state flag.
*
* Clears the flag bit in the passed state variable.
*/
extern const char *node_state_flag_string_single(uint32_t *state);
/*
* Return + separated string of node state flags.
*
* Caller must xfree() the return value.
*/
extern char *node_state_flag_string(uint32_t state);
extern char *node_state_string(uint32_t inx);
extern char *node_state_string_compact(uint32_t inx);
/*
* Return node base state + flags strings.
*
* Caller must xfree() the return value.
*/
extern char *node_state_string_complete(uint32_t inx);
/*
* Return first node state flag that matches the string
*/
extern uint32_t parse_node_state_flag(char *flag_str);
extern void private_data_string(uint16_t private_data, char *str, int str_len);
extern void accounting_enforce_string(uint16_t enforce,
char *str, int str_len);
/* Validate SPANK specified job environment does not contain any invalid
* names. Log failures using info() */
extern bool valid_spank_job_env(char **spank_job_env,
uint32_t spank_job_env_size, uid_t uid);
extern char *trigger_res_type(uint16_t res_type);
extern char *trigger_type(uint32_t trig_type);
/* user needs to xfree return value */
extern char *priority_flags_string(uint16_t priority_flags);
/* user needs to xfree return value */
extern char *reservation_flags_string(reserve_info_t * resv_ptr);
/* Functions to convert burst buffer flags between strings and numbers */
extern char * slurm_bb_flags2str(uint32_t bb_flags);
extern uint32_t slurm_bb_str2flags(char *bb_str);
/* Function to convert enforce type flags between strings and numbers */
extern int parse_part_enforce_type(char *enforce_part_type, uint16_t *param);
extern char * parse_part_enforce_type_2str (uint16_t type);
/* Return true if this cluster_name is in a federation */
extern bool cluster_in_federation(void *ptr, char *cluster_name);
/* Find where cluster_name nodes start in the node_array */
extern int get_cluster_node_offset(char *cluster_name,
node_info_msg_t *node_info_ptr);
/*
* Print the char* given.
*
* Each \n will result in a new line.
* If inx is != -1 it is prepended to the string.
*/
extern void print_multi_line_string(char *user_msg, int inx,
log_level_t loglevel);
/*
* Given a numeric suffix, return the equivalent multiplier for the numeric
* portion. For example: "k" returns 1024, "KB" returns 1000, etc.
* The return value for an invalid suffix is NO_VAL64.
*/
extern uint64_t suffix_mult(char *suffix);
/*
* See if the step_id 'key' coming in matches enough of the step_id 'object'
*/
extern bool verify_step_id(slurm_step_id_t *object, slurm_step_id_t *key);
/* OUT: job_id_str - filled in with the id of the job/array
* RET: job_id_str */
extern char *slurm_get_selected_step_id(
char *job_id_str, int len,
slurm_selected_step_t *selected_step);
/*
* Translate bitmap representation of array from hex to decimal format,
* replacing array_task_str and store the bitmap in array_bitmap.
*
* IN/OUT array_task_str - job's array task string
* IN array_max_tasks - job's array_max_tasks
* OUT array_bitmap - job's array_bitmap
*/
extern void xlate_array_task_str(char **array_task_str,
uint32_t array_max_tasks,
bitstr_t **array_bitmap);
/*
* slurm_array<size>_to_value_reps - Compress array into an array that
* represents the number of repeated values
* compressed.
*
* IN array - Array of values.
* IN array_cnt - Count of elements in 'array'.
* OUT values - Array of values compressed.
* OUT values_reps - How many each corresponding element in 'values' there are.
* OUT values_cnt - Count of elements in 'values' and 'values_reps'.
*/
extern void slurm_array64_to_value_reps(uint64_t *array, uint32_t array_cnt,
uint64_t **values,
uint32_t **values_reps,
uint32_t *values_cnt);
extern void slurm_array16_to_value_reps(uint16_t *array, uint32_t array_cnt,
uint16_t **values,
uint32_t **values_reps,
uint32_t *values_cnt);
/*
* slurm_get_rep_count_inx - given a compressed array of counts and the actual
* index you are looking for if the array was
* uncompressed return the matching index for the
* compressed array.
* IN rep_count - compressed array
* IN rep_count_size - size of compressed_array
* IN inx - uncompressed index
*
* RET - compressed index or -1 on failure.
*/
extern int slurm_get_rep_count_inx(
uint32_t *rep_count, uint32_t rep_count_size, int inx);
/*
* slurm_format_tres_string - given a TRES type and a tres-per-* string,
* will modify the string from the original
* colon-separated tres request format to the new
* '/' separating the TRES type from the tres
* request. This will work even if the request name
* or grestype with the TRES type.
* IN s - tres-per-* tres request string. This will be modified to fit the new
* format.
* IN tres_type - the type of tres to replace in the tres request string
*
* If *s is non-NULL, it must be an xmalloc'd string.
*
* Input:
* license:testing_license:3
* Output:
* license/testing_license:3
*
* This function will not modify correctly formatted tres request strings
* Input:
* license/testing_license:3
* Output:
* license/testing_license:3
*
* Input:
* gres:gres1:type1:3,gres:gres2:type2:6
* Output:
* gres:gres1/type1:3,gres/gres2:type2:6
*
* Input
* gres:gres1_gres:type1_gres:3,gres:gres2_gres:type2_gres:6
* Output:
* gres/gres1_gres:type1_gres:3,gres/gres2_gres:type2_gres:6
*/
extern void slurm_format_tres_string(char **s, char *tres_type);
/*
* Reentrant TRES specification parse logic
* tres_type IN/OUT - type of tres we are looking for, If *tres_type is NULL we
* will fill it in
* in_val IN - initial input string
* name OUT - must be xfreed by caller
* type OUT - must be xfreed by caller
* cnt OUT - count of values
* save_ptr IN/OUT - NULL on initial call, otherwise value from previous call
* RET rc - error code
*/
extern int slurm_get_next_tres(
char **tres_type, char *in_val, char **name_ptr, char **type_ptr,
uint64_t *cnt, char **save_ptr);
/*
* Return a sub-string from full_tres_str for a specific TRES.
* full_tres_str IN - complete list of TRES.
* tres_type IN - type of tres we are looking for (NULL) for all
* num_tasks IN - number of tasks to multiply tres by (tres-per-task)
* include_tres_type IN - include the tres_type in the sub-string
* include_type IN - if a TRES has a name and type (GRES) include it in the
* sub-string.
* RET char * of tres_type we are looking for (xfreed by caller).
*/
extern char *slurm_get_tres_sub_string(
char *full_tres_str, char *tres_type, uint32_t num_tasks,
bool include_tres_type, bool include_type);
extern char *schedule_exit2string(uint16_t opcode);
extern char *bf_exit2string(uint16_t opcode);
/*
* Parse reservation request option Watts
* IN watts_str - value to parse
* IN/OUT resv_msg_ptr - msg where resv_watts member is modified
* OUT err_msg - set to an explanation of failure, if any. Don't set if NULL
*/
extern uint32_t slurm_watts_str_to_int(char *watts_str, char **err_msg);
typedef struct {
uint32_t node_count; /* number of nodes to communicate
* with */
uint16_t retry; /* if set, keep trying */
uid_t r_uid; /* receiver UID */
bool r_uid_set; /* true if receiver UID set */
slurm_addr_t *addr; /* if set will send to this
addr not hostlist */
hostlist_t *hostlist; /* hostlist containing the
* nodes we are sending to */
uint16_t protocol_version; /* protocol version to use */
slurm_msg_type_t msg_type; /* RPC to be issued */
void *msg_args; /* RPC data to be transmitted */
uint16_t msg_flags; /* Flags to be added to msg */
char *tls_cert;
} agent_arg_t;
/* Set r_uid of agent_arg */
extern void set_agent_arg_r_uid(agent_arg_t *agent_arg_ptr, uid_t r_uid);
extern void purge_agent_args(agent_arg_t *agent_arg_ptr);
/*
* validate_slurm_user - validate that the uid is authorized to see
* privileged data (either user root or SlurmUser)
* IN uid - user to validate
* RET true if permitted to run, false otherwise
*/
extern bool validate_slurm_user(uid_t uid);
/*
* validate_slurmd_user - validate that the uid is authorized to see
* privileged data (either user root or SlurmUser)
* IN uid - user to validate
* RET true if permitted to run, false otherwise
*/
extern bool validate_slurmd_user(uid_t uid);
/*
* Return the job's sharing value from job or partition value.
*/
extern uint16_t get_job_share_value(job_record_t *job_ptr);
/*
* Free stepmgr_job_info_t
*/
extern void slurm_free_stepmgr_job_info(stepmgr_job_info_t *object);
/* Resv creation msg client validation. On error err_msg is set */
extern int validate_resv_create_desc(resv_desc_msg_t *resv_msg, char **err_msg,
uint32_t *res_free_flags);
/*
* Get configured DefCpuPerGPU information from a list
* (either global or per partition list)
* Returns NO_VAL64 if configuration parameter not set
*/
extern uint64_t slurm_get_def_cpu_per_gpu(list_t *job_defaults_list);
/*
* Get configured DefMemPerGPU information from a list
* (either global or per partition list)
* Returns NO_VAL64 if configuration parameter not set
*/
extern uint64_t slurm_get_def_mem_per_gpu(list_t *job_defaults_list);
#define safe_read(fd, buf, size) do { \
size_t remaining = size; \
char *ptr = (char *) buf; \
int rc; \
while (remaining > 0) { \
rc = read(fd, ptr, remaining); \
if ((rc == 0) && (remaining == size)) { \
debug("%s:%d: %s: safe_read EOF", \
__FILE__, __LINE__, __func__); \
errno = EIO; \
goto rwfail; \
} else if (rc == 0) { \
debug("%s:%d: %s: safe_read (%zu of %d) EOF", \
__FILE__, __LINE__, __func__, \
remaining, (int)size); \
errno = EIO; \
goto rwfail; \
} else if (rc < 0) { \
if ((errno == EAGAIN) || \
(errno == EINTR) || \
(errno == EWOULDBLOCK)) \
continue; \
debug("%s:%d: %s: safe_read (%zu of %d) failed: %m", \
__FILE__, __LINE__, __func__, \
remaining, (int)size); \
goto rwfail; \
} else { \
ptr += rc; \
remaining -= rc; \
if (remaining > 0) \
debug3("%s:%d: %s: safe_read (%zu of %d) partial read", \
__FILE__, __LINE__, __func__, \
remaining, (int)size); \
} \
} \
} while (0)
#define safe_write(fd, buf, size) do { \
size_t remaining = size; \
char *ptr = (char *) buf; \
int rc; \
while(remaining > 0) { \
rc = write(fd, ptr, remaining); \
if (rc < 0) { \
if ((errno == EAGAIN) || (errno == EINTR))\
continue; \
debug("%s:%d: %s: safe_write (%zu of %d) failed: %m", \
__FILE__, __LINE__, __func__, \
remaining, (int)size); \
goto rwfail; \
} else { \
ptr += rc; \
remaining -= rc; \
if (remaining > 0) \
debug3("%s:%d: %s: safe_write (%zu of %d) partial write", \
__FILE__, __LINE__, __func__, \
remaining, (int)size); \
} \
} \
} while (0)
#endif