| /*****************************************************************************\ |
| * src/slurmd/slurmstepd/slurmstepd_job.h stepd_step_rec_t definition |
| ***************************************************************************** |
| * Copyright (C) 2002-2007 The Regents of the University of California. |
| * Copyright (C) 2008-2010 Lawrence Livermore National Security. |
| * Copyright (C) 2013 Intel, Inc. |
| * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). |
| * Written by Mark Grondona <mgrondona@llnl.gov>. |
| * CODE-OCEC-09-009. All rights reserved. |
| * |
| * This file is part of Slurm, a resource management program. |
| * For details, see <https://slurm.schedmd.com/>. |
| * Please also read the included file: DISCLAIMER. |
| * |
| * Slurm is free software; you can redistribute it and/or modify it under |
| * the terms of the GNU General Public License as published by the Free |
| * Software Foundation; either version 2 of the License, or (at your option) |
| * any later version. |
| * |
| * In addition, as a special exception, the copyright holders give permission |
| * to link the code of portions of this program with the OpenSSL library under |
| * certain conditions as described in each individual source file, and |
| * distribute linked combinations including the two. You must obey the GNU |
| * General Public License in all respects for all of the code used other than |
| * OpenSSL. If you modify file(s) with this exception, you may extend this |
| * exception to your version of the file(s), but you are not obligated to do |
| * so. If you do not wish to do so, delete this exception statement from your |
| * version. If you delete this exception statement from all source files in |
| * the program, then also delete it here. |
| * |
| * Slurm is distributed in the hope that it will be useful, but WITHOUT ANY |
| * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS |
| * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more |
| * details. |
| * |
| * You should have received a copy of the GNU General Public License along |
| * with Slurm; if not, write to the Free Software Foundation, Inc., |
| * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. |
| \*****************************************************************************/ |
| |
| #ifndef _SLURMSTEPD_JOB_H |
| #define _SLURMSTEPD_JOB_H |
| |
| #define _GNU_SOURCE |
| |
| #include <pthread.h> |
| #include <pwd.h> |
| |
| #include "src/common/data.h" |
| #include "src/common/macros.h" |
| #include "src/common/slurm_protocol_api.h" |
| #include "src/common/slurm_protocol_defs.h" |
| #include "src/common/list.h" |
| #include "src/common/eio.h" |
| #include "src/common/env.h" |
| #include "src/common/stepd_api.h" |
| #include "src/common/xsched.h" |
| |
| /* required for rusage */ |
| #include <sys/resource.h> |
| |
| #define STEP_CONTAINER_MAGIC 0xa0b9b2ba |
| |
| typedef struct { |
| char *key; /* srun key for IO verification */ |
| char *tls_cert; /* srun public certificate if tls in use */ |
| slurm_addr_t resp_addr; /* response addr for task exit msg */ |
| slurm_addr_t ioaddr; /* Address to connect on for normal I/O. |
| Spawn IO uses messages to the normal |
| resp_addr. */ |
| uid_t uid; /* user id for job */ |
| uint16_t protocol_version; /* protocol_version of the srun */ |
| } srun_info_t; |
| |
| typedef enum { |
| STEPD_STEP_TASK_INIT, |
| STEPD_STEP_TASK_STARTING, |
| STEPD_STEP_TASK_RUNNING, |
| STEPD_STEP_TASK_COMPLETE |
| } stepd_step_task_state_t; |
| |
| typedef struct { |
| pthread_mutex_t mutex; /* mutex to protect task state */ |
| stepd_step_task_state_t state; /* task state */ |
| cpu_set_t *cpu_set; |
| |
| int id; /* local task id */ |
| uint32_t gtid; /* global task id */ |
| pid_t pid; /* task pid */ |
| |
| char *ifname; /* standard input file name */ |
| char *ofname; /* standard output file name */ |
| char *efname; /* standard error file name */ |
| int stdin_fd; /* standard input file descriptor */ |
| int stdout_fd; /* standard output file descriptor */ |
| int stderr_fd; /* standard error file descriptor */ |
| int to_stdin; /* write file descriptor for task stdin */ |
| int from_stdout;/* read file descriptor from task stdout*/ |
| int from_stderr;/* read file descriptor from task stderr*/ |
| eio_obj_t *in; /* standard input event IO object */ |
| eio_obj_t *out; /* standard output event IO object */ |
| eio_obj_t *err; /* standard error event IO object */ |
| |
| bool killed_by_cmd; /* true if task killed by our signal */ |
| bool aborted; /* true if task called abort */ |
| bool esent; /* true if exit status has been sent */ |
| bool exited; /* true if task has exited */ |
| int estatus; /* this task's exit status */ |
| struct rusage rusage; |
| |
| uint32_t argc; |
| char **argv; |
| } stepd_step_task_info_t; |
| |
| typedef struct { |
| int magic; |
| char *bundle; /* OCI Container Bundle path */ |
| data_t *config; /* OCI Container config.json contents */ |
| char *mount_spool_dir; /* target path to mount container spool dir */ |
| char *rootfs; /* path to container rootfs */ |
| char *spool_dir; /* path to slurmd's spool dir for container */ |
| char *task_spool_dir; /* path to slurmd's spool dir for container task */ |
| } step_container_t; |
| |
| typedef struct { |
| char *alias_list; /* node name to address aliases */ |
| step_container_t *container; /* populated if step is a container */ |
| slurmstepd_state_t state; /* Job state */ |
| pthread_cond_t state_cond; /* Job state conditional */ |
| pthread_mutex_t state_mutex; /* Job state mutex */ |
| slurm_step_id_t step_id; /* Current step id (or NO_VAL) */ |
| uint32_t array_job_id; /* job array master job ID */ |
| uint32_t array_task_id; /* job array ID */ |
| uint32_t nnodes; /* number of nodes in current job */ |
| uint32_t ntasks; /* total number of tasks in current job */ |
| uint32_t nodeid; /* relative position of this node in job */ |
| uint32_t node_tasks; /* number of tasks on *this* node */ |
| slurm_addr_t *node_addrs; /* allocated node addrs -- from cred */ |
| char *node_list; /* allocated nodes -- from cred */ |
| uint32_t het_job_id; /* Hetjob ID or NO_VAL */ |
| uint32_t het_job_nnodes; /* total node count for entire hetjob */ |
| char *het_job_node_list; /* Hetjob step node list */ |
| uint32_t het_job_node_offset;/* Hetjob node offset or NO_VAL */ |
| uint32_t het_job_ntasks; /* total task count for entire hetjob */ |
| uint32_t het_job_offset; /* Hetjob offset or NO_VAL */ |
| uint32_t het_job_step_cnt; /* number of steps for entire hetjob */ |
| uint32_t *het_job_step_task_cnts; /* ntasks on each comp of hetjob */ |
| uint32_t het_job_task_offset;/* Hetjob task offset or NO_VAL */ |
| uint16_t *het_job_task_cnts; /* Number of tasks on each node in hetjob */ |
| uint32_t **het_job_tids; /* Task IDs on each node of hetjob */ |
| uint32_t *het_job_tid_offsets;/* map of tasks (by id) to originating hetjob*/ |
| uint16_t *task_cnts; /* Number of tasks on each node in job */ |
| uint32_t cpus_per_task; /* number of cpus desired per task */ |
| uint32_t debug; /* debug level for job slurmd */ |
| uint64_t job_mem; /* MB of memory reserved for the job */ |
| uint64_t step_mem; /* MB of memory reserved for the step */ |
| uint16_t cpus; /* number of cpus to use for this job */ |
| uint32_t argc; /* number of commandline arguments */ |
| char **env; /* job environment */ |
| char **argv; /* job argument vector */ |
| char *cwd; /* path to current working directory */ |
| task_dist_states_t task_dist;/* -m distribution */ |
| char *node_name; /* node name of node running job |
| * needed for front-end systems */ |
| cpu_bind_type_t cpu_bind_type; /* --cpu-bind= */ |
| char *cpu_bind; /* binding map for map/mask_cpu */ |
| mem_bind_type_t mem_bind_type; /* --mem-bind= */ |
| char *mem_bind; /* binding map for tasks to memory */ |
| uint16_t accel_bind_type; /* --accel_bind= */ |
| uint32_t cpu_freq_min; /* Minimum cpu frequency */ |
| uint32_t cpu_freq_max; /* Maximum cpu frequency */ |
| uint32_t cpu_freq_gov; /* cpu frequency governor */ |
| dynamic_plugin_data_t *switch_step; /* switch-specific job information */ |
| uid_t uid; /* user id for job */ |
| char *user_name; |
| /* fields from the launch cred used to support nss_slurm */ |
| char *pw_gecos; |
| char *pw_dir; |
| char *pw_shell; |
| gid_t gid; /* group ID for job */ |
| int ngids; /* length of the following gids array */ |
| char **gr_names; |
| gid_t *gids; /* array of gids for user specified in uid */ |
| bool aborted; /* true if already aborted */ |
| bool batch; /* true if this is a batch job */ |
| bool run_prolog; /* true if need to run prolog */ |
| time_t timelimit; /* time at which job must stop */ |
| uint32_t profile; /* Level of acct_gather_profile */ |
| char *task_prolog; /* per-task prolog */ |
| char *task_epilog; /* per-task epilog */ |
| stepd_step_task_info_t **task; /* array of task information pointers*/ |
| eio_handle_t *eio; |
| list_t *sruns; /* list of srun_info_t pointers */ |
| list_t *clients; /* list of struct client_io_info pointers */ |
| list_t *stdout_eio_objs;/* list of objs that gather stdout from tasks */ |
| list_t *stderr_eio_objs;/* list of objs that gather stderr from tasks */ |
| list_t *free_incoming; /* list of free struct io_buf * for incoming |
| * traffic. "incoming" means traffic from srun |
| * to the tasks. |
| */ |
| list_t *free_outgoing; /* list of free struct io_buf * for outgoing |
| * traffic "outgoing" means traffic from the |
| * tasks to srun. |
| */ |
| int incoming_count; /* Count of total incoming message buffers |
| * including free_incoming buffers and |
| * buffers in use. |
| */ |
| int outgoing_count; /* Count of total outgoing message buffers |
| * including free_outgoing buffers and |
| * buffers in use. |
| */ |
| |
| list_t *outgoing_cache; /* cache of outgoing stdio messages |
| * used when a new client attaches |
| */ |
| |
| bool io_running; /* I/O thread running */ |
| pthread_cond_t io_cond; /* I/O thread state conditional */ |
| pthread_mutex_t io_mutex; /* I/O thread state mutex */ |
| pthread_t msgid; /* pthread id of message thread */ |
| eio_handle_t *msg_handle; /* eio handle for the message thread */ |
| |
| pid_t jmgr_pid; /* job manager pid */ |
| pid_t pgid; /* process group id for tasks */ |
| uint32_t flags; /* See LAUNCH_* flags defined in slurm_protocol_defs.h */ |
| env_t *envtp; |
| uint64_t cont_id; |
| |
| char *batchdir; |
| jobacctinfo_t *jobacct; |
| uint8_t open_mode; /* stdout/err append or truncate */ |
| list_t *options; |
| uint16_t restart_cnt; /* batch job restart count */ |
| char *job_alloc_cores; /* needed by the SPANK cpuset plugin */ |
| char *step_alloc_cores;/* needed by the SPANK cpuset plugin */ |
| list_t *job_gres_list; /* Needed by GRES plugin */ |
| list_t *step_gres_list; /* Needed by GRES plugin */ |
| char *tres_bind; /* TRES binding */ |
| char *tres_freq; /* TRES frequency */ |
| time_t job_end_time; /* job end time */ |
| char *job_licenses; /* Licenses allocated to job */ |
| time_t job_start_time; /* job start time */ |
| launch_tasks_request_msg_t *msg; /* When a non-batch step this |
| * is the message sent. DO |
| * NOT FREE, IT IS JUST A |
| * POINTER. */ |
| uint16_t job_core_spec; /* count of specialized cores */ |
| bool oom_error; /* step out of memory error */ |
| |
| uint16_t x11; /* only set for extern step */ |
| int x11_display; /* display number if x11 forwarding setup */ |
| char *x11_alloc_host; /* remote host to proxy through */ |
| uint16_t x11_alloc_port; /* remote port to proxy through */ |
| char *x11_magic_cookie; /* xauth magic cookie value */ |
| char *x11_target; /* remote target. unix socket if port == 0 */ |
| uint16_t x11_target_port; /* remote x11 port to connect back to */ |
| char *x11_xauthority; /* temporary XAUTHORITY location, or NULL */ |
| |
| char *selinux_context; |
| char *stepmgr; |
| bool oom_kill_step; |
| } stepd_step_rec_t; |
| |
| |
| stepd_step_rec_t * stepd_step_rec_create(launch_tasks_request_msg_t *msg, |
| uint16_t protocol_version); |
| stepd_step_rec_t * batch_stepd_step_rec_create(batch_job_launch_msg_t *msg); |
| |
| void stepd_step_rec_destroy(stepd_step_rec_t *step); |
| |
| srun_info_t *srun_info_create(slurm_cred_t *cred, char *alloc_tls_cert, |
| slurm_addr_t *respaddr, slurm_addr_t *ioaddr, |
| uid_t uid, uint16_t protocol_version); |
| |
| void srun_info_destroy(srun_info_t *srun); |
| |
| stepd_step_task_info_t * task_info_create(int taskid, int gtaskid, |
| char *ifname, char *ofname, |
| char *efname); |
| |
| /* |
| * Return a task info structure corresponding to pid. |
| * We inline it here so that it can be included from src/common/plugstack.c |
| * without undefined symbol warnings. |
| */ |
| static inline stepd_step_task_info_t * |
| job_task_info_by_pid (stepd_step_rec_t *step, pid_t pid) |
| { |
| uint32_t i; |
| |
| if (!step) |
| return NULL; |
| |
| for (i = 0; i < step->node_tasks; i++) { |
| if (step->task[i]->pid == pid) |
| return (step->task[i]); |
| } |
| return (NULL); |
| } |
| |
| #endif /* !_SLURMSTEPD_JOB_H */ |