| /*****************************************************************************\ |
| * slurmctld.h - definitions of functions and structures for slurmcltd use |
| ***************************************************************************** |
| * Copyright (C) 2002-2007 The Regents of the University of California. |
| * Copyright (C) 2008-2010 Lawrence Livermore National Security. |
| * Portions Copyright (C) 2010 SchedMD <http://www.schedmd.com>. |
| * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). |
| * Written by Morris Jette <jette1@llnl.gov> et. al. |
| * CODE-OCEC-09-009. All rights reserved. |
| * |
| * This file is part of SLURM, a resource management program. |
| * For details, see <http://slurm.schedmd.com/>. |
| * Please also read the included file: DISCLAIMER. |
| * |
| * SLURM is free software; you can redistribute it and/or modify it under |
| * the terms of the GNU General Public License as published by the Free |
| * Software Foundation; either version 2 of the License, or (at your option) |
| * any later version. |
| * |
| * In addition, as a special exception, the copyright holders give permission |
| * to link the code of portions of this program with the OpenSSL library under |
| * certain conditions as described in each individual source file, and |
| * distribute linked combinations including the two. You must obey the GNU |
| * General Public License in all respects for all of the code used other than |
| * OpenSSL. If you modify file(s) with this exception, you may extend this |
| * exception to your version of the file(s), but you are not obligated to do |
| * so. If you do not wish to do so, delete this exception statement from your |
| * version. If you delete this exception statement from all source files in |
| * the program, then also delete it here. |
| * |
| * SLURM is distributed in the hope that it will be useful, but WITHOUT ANY |
| * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS |
| * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more |
| * details. |
| * |
| * You should have received a copy of the GNU General Public License along |
| * with SLURM; if not, write to the Free Software Foundation, Inc., |
| * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. |
| \*****************************************************************************/ |
| |
| #ifndef _HAVE_SLURMCTLD_H |
| #define _HAVE_SLURMCTLD_H |
| |
| |
| #if HAVE_CONFIG_H |
| # include "config.h" |
| # if HAVE_INTTYPES_H |
| # include <inttypes.h> |
| # else |
| # if HAVE_STDINT_H |
| # include <stdint.h> |
| # endif |
| # endif /* HAVE_INTTYPES_H */ |
| #endif |
| |
| #include <pthread.h> |
| /* #include <stdlib.h> */ |
| #include <time.h> |
| #include <strings.h> |
| #include <sys/types.h> |
| #include <unistd.h> |
| |
| #ifdef WITH_PTHREADS |
| # include <pthread.h> |
| #endif /* WITH_PTHREADS */ |
| |
| #include "slurm/slurm.h" |
| |
| #include "src/common/bitstring.h" |
| #include "src/common/checkpoint.h" |
| #include "src/common/list.h" |
| #include "src/common/log.h" |
| #include "src/common/macros.h" |
| #include "src/common/node_conf.h" |
| #include "src/common/pack.h" |
| #include "src/common/read_config.h" /* location of slurmctld_conf */ |
| #include "src/common/job_resources.h" |
| #include "src/common/slurm_cred.h" |
| #include "src/common/slurm_protocol_api.h" |
| #include "src/common/slurm_protocol_defs.h" |
| #include "src/common/switch.h" |
| #include "src/common/timers.h" |
| #include "src/common/xmalloc.h" |
| |
| /*****************************************************************************\ |
| * GENERAL CONFIGURATION parameters and data structures |
| \*****************************************************************************/ |
| /* Maximum index for a job array. The minimum index will always be 0. */ |
| #ifndef MAX_JOB_ARRAY_VALUE |
| #define MAX_JOB_ARRAY_VALUE 1000 |
| #endif |
| |
| /* Maximum parallel threads to service incoming RPCs. |
| * Since some systems schedule pthread on a First-In-Last-Out basis, |
| * increasing this value is strongly discouraged. */ |
| #ifndef MAX_SERVER_THREADS |
| #define MAX_SERVER_THREADS 256 |
| #endif |
| |
| /* Perform full slurmctld's state every PERIODIC_CHECKPOINT seconds */ |
| #ifndef PERIODIC_CHECKPOINT |
| #define PERIODIC_CHECKPOINT 300 |
| #endif |
| |
| /* Retry an incomplete RPC agent request every RPC_RETRY_INTERVAL seconds */ |
| #ifndef RPC_RETRY_INTERVAL |
| #define RPC_RETRY_INTERVAL 60 |
| #endif |
| |
| /* Check for jobs reaching their time limit every PERIODIC_TIMEOUT seconds */ |
| #ifndef PERIODIC_TIMEOUT |
| #define PERIODIC_TIMEOUT 30 |
| #endif |
| |
| /* Attempt to purge defunct job records and resend job kill requests |
| * every PURGE_JOB_INTERVAL seconds */ |
| #ifndef PURGE_JOB_INTERVAL |
| #define PURGE_JOB_INTERVAL 60 |
| #endif |
| |
| /* Process pending trigger events every TRIGGER_INTERVAL seconds */ |
| #ifndef TRIGGER_INTERVAL |
| #define TRIGGER_INTERVAL 15 |
| #endif |
| |
| /* Report current node accounting state every PERIODIC_NODE_ACCT seconds */ |
| #ifndef PERIODIC_NODE_ACCT |
| #define PERIODIC_NODE_ACCT 300 |
| #endif |
| |
| /* Pathname of group file record for checking update times */ |
| #ifndef GROUP_FILE |
| #define GROUP_FILE "/etc/group" |
| #endif |
| |
| /* Seconds to wait for backup controller response to REQUEST_CONTROL RPC */ |
| #ifndef CONTROL_TIMEOUT |
| #define CONTROL_TIMEOUT 10 /* seconds */ |
| #endif |
| |
| /*****************************************************************************\ |
| * General configuration parameters and data structures |
| \*****************************************************************************/ |
| |
| typedef struct slurmctld_config { |
| int daemonize; |
| bool resume_backup; |
| time_t boot_time; |
| time_t shutdown_time; |
| int server_thread_count; |
| |
| slurm_cred_ctx_t cred_ctx; |
| #ifdef WITH_PTHREADS |
| pthread_mutex_t thread_count_lock; |
| pthread_t thread_id_main; |
| pthread_t thread_id_save; |
| pthread_t thread_id_sig; |
| pthread_t thread_id_power; |
| pthread_t thread_id_rpc; |
| #else |
| int thread_count_lock; |
| int thread_id_main; |
| int thread_id_save; |
| int thread_id_sig; |
| int thread_id_power; |
| int thread_id_rpc; |
| #endif |
| } slurmctld_config_t; |
| |
| /* Job scheduling statistics */ |
| typedef struct diag_stats { |
| int proc_req_threads; |
| int proc_req_raw; |
| |
| uint32_t schedule_cycle_max; |
| uint32_t schedule_cycle_last; |
| uint32_t schedule_cycle_sum; |
| uint32_t schedule_cycle_counter; |
| uint32_t schedule_cycle_depth; |
| uint32_t schedule_queue_len; |
| |
| uint32_t jobs_submitted; |
| uint32_t jobs_started; |
| uint32_t jobs_completed; |
| uint32_t jobs_canceled; |
| uint32_t jobs_failed; |
| |
| uint32_t backfilled_jobs; |
| uint32_t last_backfilled_jobs; |
| uint32_t bf_cycle_counter; |
| uint32_t bf_cycle_last; |
| uint32_t bf_cycle_max; |
| uint32_t bf_cycle_sum; |
| uint32_t bf_last_depth; |
| uint32_t bf_last_depth_try; |
| uint32_t bf_depth_sum; |
| uint32_t bf_depth_try_sum; |
| uint32_t bf_queue_len; |
| uint32_t bf_queue_len_sum; |
| time_t bf_when_last_cycle; |
| uint32_t bf_active; |
| } diag_stats_t; |
| |
| extern diag_stats_t slurmctld_diag_stats; |
| extern slurmctld_config_t slurmctld_config; |
| extern int bg_recover; /* state recovery mode */ |
| extern char *slurmctld_cluster_name; /* name of cluster */ |
| extern void *acct_db_conn; |
| extern int accounting_enforce; |
| extern int association_based_accounting; |
| extern uint32_t cluster_cpus; |
| extern int with_slurmdbd; |
| extern bool load_2_4_state; |
| extern int batch_sched_delay; |
| extern int sched_interval; |
| extern bool slurmctld_init_db; |
| extern int slurmctld_primary; |
| |
| /*****************************************************************************\ |
| * NODE parameters and data structures, mostly in src/common/node_conf.h |
| \*****************************************************************************/ |
| extern uint32_t total_cpus; /* count of CPUs in the entire cluster */ |
| extern bool ping_nodes_now; /* if set, ping nodes immediately */ |
| extern bool want_nodes_reboot; /* if set, check for idle nodes */ |
| |
| /*****************************************************************************\ |
| * NODE states and bitmaps |
| * |
| * avail_node_bitmap Set if node's state is not DOWN, DRAINING/DRAINED, |
| * FAILING or NO_RESPOND (i.e. available to run a job) |
| * cg_node_bitmap Set if node in completing state |
| * idle_node_bitmap Set if node has no jobs allocated to it |
| * power_node_bitmap Set for nodes which are powered down |
| * share_node_bitmap Set if no jobs allocated exclusive access to |
| * resources on that node (cleared if --exclusive |
| * option specified by job or Shared=NO configured for |
| * the job's partition) |
| * up_node_bitmap Set if the node's state is not DOWN |
| \*****************************************************************************/ |
| extern bitstr_t *avail_node_bitmap; /* bitmap of available nodes, |
| * state not DOWN, DRAIN or FAILING */ |
| extern bitstr_t *cg_node_bitmap; /* bitmap of completing nodes */ |
| extern bitstr_t *idle_node_bitmap; /* bitmap of idle nodes */ |
| extern bitstr_t *power_node_bitmap; /* Powered down nodes */ |
| extern bitstr_t *share_node_bitmap; /* bitmap of sharable nodes */ |
| extern bitstr_t *up_node_bitmap; /* bitmap of up nodes, not DOWN */ |
| |
| /*****************************************************************************\ |
| * FRONT_END parameters and data structures |
| \*****************************************************************************/ |
| #define FRONT_END_MAGIC 0xfe9b82fe |
| |
| typedef struct front_end_record { |
| gid_t *allow_gids; /* zero terminated list of allowed groups */ |
| char *allow_groups; /* allowed group string */ |
| uid_t *allow_uids; /* zero terminated list of allowed users */ |
| char *allow_users; /* allowed user string */ |
| time_t boot_time; /* Time of node boot, |
| * computed from up_time */ |
| char *comm_name; /* communications path name to node */ |
| gid_t *deny_gids; /* zero terminated list of denied groups */ |
| char *deny_groups; /* denied group string */ |
| uid_t *deny_uids; /* zero terminated list of denied users */ |
| char *deny_users; /* denied user string */ |
| uint32_t job_cnt_comp; /* count of completing jobs on node */ |
| uint16_t job_cnt_run; /* count of running jobs on node */ |
| time_t last_response; /* Time of last communication */ |
| uint32_t magic; /* magic cookie to test data integrity */ |
| char *name; /* frontend node name */ |
| uint16_t node_state; /* enum node_states, ORed with |
| * NODE_STATE_NO_RESPOND if not |
| * responding */ |
| bool not_responding; /* set if fails to respond, |
| * clear after logging this */ |
| slurm_addr_t slurm_addr; /* network address */ |
| uint16_t port; /* frontend specific port */ |
| uint16_t protocol_version; /* Slurm version number */ |
| char *reason; /* reason for down frontend node */ |
| time_t reason_time; /* Time stamp when reason was set, |
| * ignore if no reason is set. */ |
| uint32_t reason_uid; /* User that set the reason, ignore if |
| * no reason is set. */ |
| time_t slurmd_start_time; /* Time of slurmd startup */ |
| char *version; /* Slurm version */ |
| } front_end_record_t; |
| |
| extern front_end_record_t *front_end_nodes; |
| extern uint16_t front_end_node_cnt; |
| extern time_t last_front_end_update; /* time of last front_end update */ |
| |
| /*****************************************************************************\ |
| * PARTITION parameters and data structures |
| \*****************************************************************************/ |
| #define PART_MAGIC 0xaefe8495 |
| |
| struct part_record { |
| char *allow_accounts; /* comma delimited list of accounts, |
| * NULL indicates all */ |
| char **allow_account_array; /* NULL terminated list of allowed |
| * accounts */ |
| char *allow_alloc_nodes;/* comma delimited list of allowed |
| * allocating nodes |
| * NULL indicates all */ |
| char *allow_groups; /* comma delimited list of groups, |
| * NULL indicates all */ |
| uid_t *allow_uids; /* zero terminated list of allowed user IDs */ |
| char *allow_qos; /* comma delimited list of qos, |
| * NULL indicates all */ |
| bitstr_t *allow_qos_bitstr; /* (DON'T PACK) assocaited with |
| * char *allow_qos but used internally */ |
| char *alternate; /* name of alternate partition */ |
| uint32_t def_mem_per_cpu; /* default MB memory per allocated CPU */ |
| uint32_t default_time; /* minutes, NO_VAL or INFINITE */ |
| char *deny_accounts; /* comma delimited list of denied accounts */ |
| char **deny_account_array; /* NULL terminated list of denied accounts */ |
| char *deny_qos; /* comma delimited list of denied qos */ |
| bitstr_t *deny_qos_bitstr; /* (DON'T PACK) associated with |
| * char *deny_qos but used internallly */ |
| uint16_t flags; /* see PART_FLAG_* in slurm.h */ |
| uint32_t grace_time; /* default preempt grace time in seconds */ |
| uint32_t magic; /* magic cookie to test data integrity */ |
| uint32_t max_cpus_per_node; /* maximum allocated CPUs per node */ |
| uint32_t max_mem_per_cpu; /* maximum MB memory per allocated CPU */ |
| uint32_t max_nodes; /* per job or INFINITE */ |
| uint32_t max_nodes_orig;/* unscaled value (c-nodes on BlueGene) */ |
| uint32_t max_offset; /* select plugin max offset */ |
| uint16_t max_share; /* number of jobs to gang schedule */ |
| uint32_t max_time; /* minutes or INFINITE */ |
| uint32_t min_nodes; /* per job */ |
| uint32_t min_offset; /* select plugin min offset */ |
| uint32_t min_nodes_orig;/* unscaled value (c-nodes on BlueGene) */ |
| char *name; /* name of the partition */ |
| bitstr_t *node_bitmap; /* bitmap of nodes in partition */ |
| char *nodes; /* comma delimited list names of nodes */ |
| double norm_priority; /* normalized scheduling priority for |
| * jobs (DON'T PACK) */ |
| uint16_t preempt_mode; /* See PREEMPT_MODE_* in slurm/slurm.h */ |
| uint16_t priority; /* scheduling priority for jobs */ |
| uint16_t state_up; /* See PARTITION_* states in slurm.h */ |
| uint32_t total_nodes; /* total number of nodes in the partition */ |
| uint32_t total_cpus; /* total number of cpus in the partition */ |
| uint16_t cr_type; /* Custom CR values for partition (if supported by select plugin) */ |
| }; |
| |
| extern List part_list; /* list of part_record entries */ |
| extern time_t last_part_update; /* time of last part_list update */ |
| extern struct part_record default_part; /* default configuration values */ |
| extern char *default_part_name; /* name of default partition */ |
| extern struct part_record *default_part_loc; /* default partition ptr */ |
| extern uint16_t part_max_priority; /* max priority in all partitions */ |
| |
| /*****************************************************************************\ |
| * RESERVATION parameters and data structures |
| \*****************************************************************************/ |
| |
| typedef struct slurmctld_resv { |
| char *accounts; /* names of accounts permitted to use */ |
| int account_cnt; /* count of accounts permitted to use */ |
| char **account_list; /* list of accounts permitted to use */ |
| bool account_not; /* account_list users NOT permitted to use */ |
| char *assoc_list; /* list of associations */ |
| uint32_t cpu_cnt; /* number of reserved CPUs */ |
| bitstr_t *core_bitmap; /* bitmap of reserved cores */ |
| uint32_t duration; /* time in seconds for this |
| * reservation to last */ |
| time_t end_time; /* end time of reservation */ |
| char *features; /* required node features */ |
| uint32_t flags; /* see RESERVE_FLAG_* in slurm.h */ |
| bool full_nodes; /* when reservation uses full nodes or not */ |
| uint32_t job_pend_cnt; /* number of pending jobs */ |
| uint32_t job_run_cnt; /* number of running jobs */ |
| List license_list; /* structure with license info */ |
| char *licenses; /* required system licenses */ |
| uint16_t magic; /* magic cookie, RESV_MAGIC */ |
| bool flags_set_node; /* flags (i.e. NODE_STATE_MAINT | |
| * NODE_STATE_RES) set for nodes */ |
| char *name; /* name of reservation */ |
| bitstr_t *node_bitmap; /* bitmap of reserved nodes */ |
| uint32_t node_cnt; /* count of nodes required */ |
| char *node_list; /* list of reserved nodes or ALL */ |
| char *partition; /* name of partition to be used */ |
| struct part_record *part_ptr; /* pointer to partition used */ |
| uint32_t resv_id; /* unique reservation ID, internal use */ |
| bool run_epilog; /* set if epilog has been executed */ |
| bool run_prolog; /* set if prolog has been executed */ |
| time_t start_time; /* start time of reservation */ |
| time_t start_time_first;/* when the reservation first started */ |
| time_t start_time_prev; /* If start time was changed this is |
| * the pervious start time. Needed |
| * for accounting */ |
| char *users; /* names of users permitted to use */ |
| int user_cnt; /* count of users permitted to use */ |
| uid_t *user_list; /* array of users permitted to use */ |
| bool user_not; /* user_list users NOT permitted to use */ |
| } slurmctld_resv_t; |
| |
| /*****************************************************************************\ |
| * JOB parameters and data structures |
| \*****************************************************************************/ |
| extern time_t last_job_update; /* time of last update to job records */ |
| |
| #define DETAILS_MAGIC 0xdea84e7 |
| #define JOB_MAGIC 0xf0b7392c |
| #define STEP_MAGIC 0xce593bc1 |
| |
| #define FEATURE_OP_OR 0 |
| #define FEATURE_OP_AND 1 |
| #define FEATURE_OP_XOR 2 |
| #define FEATURE_OP_XAND 3 |
| #define FEATURE_OP_END 4 /* last entry lacks separator */ |
| struct feature_record { |
| char *name; /* name of feature */ |
| uint16_t count; /* count of nodes with this feature */ |
| uint8_t op_code; /* separator, see FEATURE_OP_ above */ |
| }; |
| |
| /* job_details - specification of a job's constraints, |
| * can be purged after initiation */ |
| struct job_details { |
| char *acctg_freq; /* accounting polling interval */ |
| uint32_t argc; /* count of argv elements */ |
| char **argv; /* arguments for a batch job script */ |
| time_t begin_time; /* start at this time (srun --begin), |
| * resets to time first eligible |
| * (all dependencies satisfied) */ |
| char *ckpt_dir; /* directory to store checkpoint |
| * images */ |
| uint16_t contiguous; /* set if requires contiguous nodes */ |
| uint16_t core_spec; /* specialized core count */ |
| char *cpu_bind; /* binding map for map/mask_cpu */ |
| uint16_t cpu_bind_type; /* see cpu_bind_type_t */ |
| uint16_t cpus_per_task; /* number of processors required for |
| * each task */ |
| List depend_list; /* list of job_ptr:state pairs */ |
| char *dependency; /* wait for other jobs */ |
| char *orig_dependency; /* original value (for archiving) */ |
| uint16_t env_cnt; /* size of env_sup (see below) */ |
| char **env_sup; /* supplemental environment variables |
| * as set by Moab */ |
| bitstr_t *exc_node_bitmap; /* bitmap of excluded nodes */ |
| char *exc_nodes; /* excluded nodes */ |
| uint32_t expanding_jobid; /* ID of job to be expanded */ |
| List feature_list; /* required features with |
| * node counts */ |
| char *features; /* required features */ |
| uint32_t magic; /* magic cookie for data integrity */ |
| uint32_t max_cpus; /* maximum number of cpus */ |
| uint32_t max_nodes; /* maximum number of nodes */ |
| multi_core_data_t *mc_ptr; /* multi-core specific data */ |
| char *mem_bind; /* binding map for map/mask_cpu */ |
| uint16_t mem_bind_type; /* see mem_bind_type_t */ |
| uint32_t min_cpus; /* minimum number of cpus */ |
| uint32_t min_nodes; /* minimum number of nodes */ |
| uint16_t nice; /* requested priority change, |
| * NICE_OFFSET == no change */ |
| uint16_t ntasks_per_node; /* number of tasks on each node */ |
| uint32_t num_tasks; /* number of tasks to start */ |
| uint8_t open_mode; /* stdout/err append or trunctate */ |
| uint8_t overcommit; /* processors being over subscribed */ |
| uint16_t plane_size; /* plane size when task_dist = |
| * SLURM_DIST_PLANE */ |
| /* job constraints: */ |
| uint32_t pn_min_cpus; /* minimum processors per node */ |
| uint32_t pn_min_memory; /* minimum memory per node (MB) OR |
| * memory per allocated |
| * CPU | MEM_PER_CPU */ |
| uint32_t pn_min_tmp_disk; /* minimum tempdisk per node, MB */ |
| uint8_t prolog_running; /* set while prolog_slurmctld is |
| * running */ |
| uint32_t reserved_resources; /* CPU minutes of resources reserved |
| * for this job while it was pending */ |
| bitstr_t *req_node_bitmap; /* bitmap of required nodes */ |
| uint16_t *req_node_layout; /* task layout for required nodes */ |
| time_t preempt_start_time; /* time that preeption began to start |
| * this job */ |
| char *req_nodes; /* required nodes */ |
| uint16_t requeue; /* controls ability requeue job */ |
| char *restart_dir; /* restart execution from ckpt images |
| * in this dir */ |
| uint8_t share_res; /* set if job can share resources with |
| * other jobs */ |
| char *std_err; /* pathname of job's stderr file */ |
| char *std_in; /* pathname of job's stdin file */ |
| char *std_out; /* pathname of job's stdout file */ |
| time_t submit_time; /* time of submission */ |
| uint16_t task_dist; /* task layout for this job. Only |
| * useful when Consumable Resources |
| * is enabled */ |
| uint32_t usable_nodes; /* node count needed by preemption */ |
| uint8_t whole_node; /* job requested exclusive node use */ |
| char *work_dir; /* pathname of working directory */ |
| }; |
| |
| struct job_record { |
| char *account; /* account number to charge */ |
| char *alias_list; /* node name to address aliases */ |
| char *alloc_node; /* local node making resource alloc */ |
| uint16_t alloc_resp_port; /* RESPONSE_RESOURCE_ALLOCATION port */ |
| uint32_t alloc_sid; /* local sid making resource alloc */ |
| uint32_t array_job_id; /* job_id of a job array or 0 if N/A */ |
| uint32_t array_task_id; /* task_id of a job array */ |
| uint32_t assoc_id; /* used for accounting plugins */ |
| void *assoc_ptr; /* job's association record ptr, it is |
| * void* because of interdependencies |
| * in the header files, confirm the |
| * value before use */ |
| uint16_t batch_flag; /* 1 or 2 if batch job (with script), |
| * 2 indicates retry mode (one retry) */ |
| char *batch_host; /* host executing batch script */ |
| check_jobinfo_t check_job; /* checkpoint context, opaque */ |
| uint16_t ckpt_interval; /* checkpoint interval in minutes */ |
| time_t ckpt_time; /* last time job was periodically |
| * checkpointed */ |
| char *comment; /* arbitrary comment */ |
| uint32_t cpu_cnt; /* current count of CPUs held |
| * by the job, decremented while job is |
| * completing (N/A for bluegene |
| * systems) */ |
| uint16_t cr_enabled; /* specify if if Consumable Resources |
| * is enabled. Needed since CR deals |
| * with a finer granularity in its |
| * node/cpu scheduling (available cpus |
| * instead of available nodes) than the |
| * bluegene and the linear plugins |
| * 0 if cr is NOT enabled, |
| * 1 if cr is enabled */ |
| uint32_t db_index; /* used only for database |
| * plugins */ |
| uint32_t derived_ec; /* highest exit code of all job steps */ |
| struct job_details *details; /* job details */ |
| uint16_t direct_set_prio; /* Priority set directly if |
| * set the system will not |
| * change the priority any further. */ |
| time_t end_time; /* time of termination, |
| * actual or expected */ |
| bool epilog_running; /* true of EpilogSlurmctld is running */ |
| uint32_t exit_code; /* exit code for job (status from |
| * wait call) */ |
| front_end_record_t *front_end_ptr; /* Pointer to front-end node running |
| * this job */ |
| char *gres; /* generic resources requested by job */ |
| List gres_list; /* generic resource allocation detail */ |
| char *gres_alloc; /* Allocated GRES added over all nodes |
| * to be passed to slurmdbd */ |
| char *gres_req; /* Requested GRES added over all nodes |
| * to be passed to slurmdbd */ |
| char *gres_used; /* Actual GRES use added over all nodes |
| * to be passed to slurmdbd */ |
| uint32_t group_id; /* group submitted under */ |
| uint32_t job_id; /* job ID */ |
| struct job_record *job_next; /* next entry with same hash index */ |
| struct job_record *job_array_next_j; /* job array linked list by job_id */ |
| struct job_record *job_array_next_t; /* job array linked list by task_id */ |
| job_resources_t *job_resrcs; /* details of allocated cores */ |
| uint16_t job_state; /* state of the job */ |
| uint16_t kill_on_node_fail; /* 1 if job should be killed on |
| * node failure */ |
| char *licenses; /* licenses required by the job */ |
| List license_list; /* structure with license info */ |
| uint16_t limit_set_max_cpus; /* if max_cpus was set from |
| * a limit false if user set */ |
| uint16_t limit_set_max_nodes; /* if max_nodes was set from |
| * a limit false if user set */ |
| uint16_t limit_set_min_cpus; /* if max_cpus was set from |
| * a limit false if user set */ |
| uint16_t limit_set_min_nodes; /* if max_nodes was set from |
| * a limit false if user set */ |
| uint16_t limit_set_pn_min_memory; /* if pn_min_memory was set from |
| * a limit false if user set */ |
| uint16_t limit_set_time; /* if time_limit was set from |
| * a limit false if user set */ |
| uint16_t limit_set_qos; /* if qos_limit was set from |
| * a limit false if user set */ |
| uint16_t mail_type; /* see MAIL_JOB_* in slurm.h */ |
| char *mail_user; /* user to get e-mail notification */ |
| uint32_t magic; /* magic cookie for data integrity */ |
| char *name; /* name of the job */ |
| char *network; /* network/switch requirement spec */ |
| uint32_t next_step_id; /* next step id to be used */ |
| char *nodes; /* list of nodes allocated to job */ |
| slurm_addr_t *node_addr; /* addresses of the nodes allocated to |
| * job */ |
| bitstr_t *node_bitmap; /* bitmap of nodes allocated to job */ |
| bitstr_t *node_bitmap_cg; /* bitmap of nodes completing job */ |
| uint32_t node_cnt; /* count of nodes currently |
| * allocated to job */ |
| uint32_t node_cnt_wag; /* count of nodes Slurm thinks |
| * will be allocated when the |
| * job is pending and node_cnt |
| * wasn't given by the user. |
| * This is packed in total_nodes |
| * when dumping state. When |
| * state is read in check for |
| * pending state and set this |
| * instead of total_nodes */ |
| char *nodes_completing; /* nodes still in completing state |
| * for this job, used to insure |
| * epilog is not re-run for job */ |
| uint16_t other_port; /* port for client communications */ |
| char *partition; /* name of job partition(s) */ |
| List part_ptr_list; /* list of pointers to partition recs */ |
| bool part_nodes_missing; /* set if job's nodes removed from this |
| * partition */ |
| struct part_record *part_ptr; /* pointer to the partition record */ |
| time_t pre_sus_time; /* time job ran prior to last suspend */ |
| time_t preempt_time; /* job preemption signal time */ |
| bool preempt_in_progress; /* Premption of other jobs in progress |
| * in order to start this job, |
| * (Internal use only, don't save) */ |
| uint32_t priority; /* relative priority of the job, |
| * zero == held (don't initiate) */ |
| uint32_t *priority_array; /* partition based priority */ |
| priority_factors_object_t *prio_factors; /* cached value used |
| * by sprio command */ |
| uint32_t profile; /* Acct_gather_profile option */ |
| uint32_t qos_id; /* quality of service id */ |
| void *qos_ptr; /* pointer to the quality of |
| * service record used for |
| * this job, it is |
| * void* because of interdependencies |
| * in the header files, confirm the |
| * value before use */ |
| uint16_t restart_cnt; /* count of restarts */ |
| time_t resize_time; /* time of latest size change */ |
| uint32_t resv_id; /* reservation ID */ |
| char *resv_name; /* reservation name */ |
| struct slurmctld_resv *resv_ptr;/* reservation structure pointer */ |
| uint32_t requid; /* requester user ID */ |
| char *resp_host; /* host for srun communications */ |
| dynamic_plugin_data_t *select_jobinfo;/* opaque data, BlueGene */ |
| char **spank_job_env; /* environment variables for job prolog |
| * and epilog scripts as set by SPANK |
| * plugins */ |
| uint32_t spank_job_env_size; /* element count in spank_env */ |
| time_t start_time; /* time execution begins, |
| * actual or expected */ |
| char *state_desc; /* optional details for state_reason */ |
| uint16_t state_reason; /* reason job still pending or failed |
| * see slurm.h:enum job_wait_reason */ |
| List step_list; /* list of job's steps */ |
| time_t suspend_time; /* time job last suspended or resumed */ |
| time_t time_last_active; /* time of last job activity */ |
| uint32_t time_limit; /* time_limit minutes or INFINITE, |
| * NO_VAL implies partition max_time */ |
| uint32_t time_min; /* minimum time_limit minutes or |
| * INFINITE, |
| * zero implies same as time_limit */ |
| time_t tot_sus_time; /* total time in suspend state */ |
| uint32_t total_cpus; /* number of allocated cpus, |
| * for accounting */ |
| uint32_t total_nodes; /* number of allocated nodes |
| * for accounting */ |
| uint32_t user_id; /* user the job runs as */ |
| uint16_t wait_all_nodes; /* if set, wait for all nodes to boot |
| * before starting the job */ |
| uint16_t warn_flags; /* flags for signal to send */ |
| uint16_t warn_signal; /* signal to send before end_time */ |
| uint16_t warn_time; /* when to send signal before |
| * end_time (secs) */ |
| char *wckey; /* optional wckey */ |
| |
| /* Request number of switches support */ |
| uint32_t req_switch; /* Minimum number of switches */ |
| uint32_t wait4switch; /* Maximum time to wait for minimum switches */ |
| bool best_switch; /* true=min number of switches met */ |
| time_t wait4switch_start; /* Time started waiting for switch */ |
| }; |
| |
| /* Job dependency specification, used in "depend_list" within job_record */ |
| #define SLURM_DEPEND_AFTER 1 /* After job begins */ |
| #define SLURM_DEPEND_AFTER_ANY 2 /* After job completes */ |
| #define SLURM_DEPEND_AFTER_NOT_OK 3 /* After job fails */ |
| #define SLURM_DEPEND_AFTER_OK 4 /* After job completes |
| * successfully */ |
| #define SLURM_DEPEND_SINGLETON 5 /* Only one job for this |
| * user/name at a time */ |
| #define SLURM_DEPEND_EXPAND 6 /* Expand running job */ |
| struct depend_spec { |
| uint32_t array_task_id; /* INFINITE for all array tasks */ |
| uint16_t depend_type; /* SLURM_DEPEND_* type */ |
| uint32_t job_id; /* SLURM job_id */ |
| struct job_record *job_ptr; /* pointer to this job */ |
| }; |
| |
| struct step_record { |
| uint16_t batch_step; /* 1 if batch job step, 0 otherwise */ |
| uint16_t ckpt_interval; /* checkpoint interval in minutes */ |
| check_jobinfo_t check_job; /* checkpoint context, opaque */ |
| char *ckpt_dir; /* path to checkpoint image files */ |
| time_t ckpt_time; /* time of last checkpoint */ |
| bitstr_t *core_bitmap_job; /* bitmap of cores allocated to this |
| * step relative to job's nodes, |
| * see src/common/job_resources.h */ |
| uint32_t cpu_count; /* count of step's CPUs */ |
| uint32_t cpu_freq; /* requested cpu frequency */ |
| uint16_t cpus_per_task; /* cpus per task initiated */ |
| uint16_t cyclic_alloc; /* set for cyclic task allocation |
| * across nodes */ |
| uint16_t exclusive; /* dedicated resources for the step */ |
| uint32_t exit_code; /* highest exit code from any task */ |
| bitstr_t *exit_node_bitmap; /* bitmap of exited nodes */ |
| ext_sensors_data_t *ext_sensors; /* external sensors plugin data */ |
| char *gres; /* generic resources required */ |
| List gres_list; /* generic resource allocation detail */ |
| char *host; /* host for srun communications */ |
| struct job_record* job_ptr; /* ptr to the job that owns the step */ |
| jobacctinfo_t *jobacct; /* keep track of process info in the |
| * step */ |
| uint32_t pn_min_memory; /* minimum real memory per node OR |
| * real memory per CPU | MEM_PER_CPU, |
| * default=0 (use job limit) */ |
| char *name; /* name of job step */ |
| char *network; /* step's network specification */ |
| uint8_t no_kill; /* 1 if no kill on node failure */ |
| uint16_t port; /* port for srun communications */ |
| time_t pre_sus_time; /* time step ran prior to last suspend */ |
| int *resv_port_array; /* reserved port indexes */ |
| uint16_t resv_port_cnt; /* count of ports reserved per node */ |
| char *resv_ports; /* ports reserved for job */ |
| uint32_t requid; /* requester user ID */ |
| time_t start_time; /* step allocation start time */ |
| uint32_t time_limit; /* step allocation time limit */ |
| dynamic_plugin_data_t *select_jobinfo;/* opaque data, BlueGene */ |
| uint16_t state; /* state of the step. See job_states */ |
| uint32_t step_id; /* step number */ |
| slurm_step_layout_t *step_layout;/* info about how tasks are laid out |
| * in the step */ |
| bitstr_t *step_node_bitmap; /* bitmap of nodes allocated to job |
| * step */ |
| /* time_t suspend_time; * time step last suspended or resumed |
| * implicitly the same as suspend_time |
| * in the job record */ |
| switch_jobinfo_t *switch_job; /* switch context, opaque */ |
| time_t time_last_active; /* time step was last found on node */ |
| time_t tot_sus_time; /* total time in suspended state */ |
| }; |
| |
| extern List job_list; /* list of job_record entries */ |
| |
| /*****************************************************************************\ |
| * Consumable Resources parameters and data structures |
| \*****************************************************************************/ |
| |
| /* |
| * Define the type of update and of data retrieval that can happen |
| * from the "select/cons_res" plugin. This information needed to |
| * support processors as consumable resources. This structure will be |
| * useful when updating other types of consumable resources as well |
| */ |
| enum select_plugindata_info { |
| SELECT_CR_PLUGIN, /* data-> uint32 1 if CR plugin */ |
| SELECT_BITMAP, /* Unused since version 2.0 */ |
| SELECT_ALLOC_CPUS, /* data-> uint16 alloc cpus (CR support) */ |
| SELECT_ALLOC_LPS, /* data-> uint32 alloc lps (CR support) */ |
| SELECT_AVAIL_MEMORY, /* data-> uint32 avail mem (CR support) */ |
| SELECT_STATIC_PART, /* data-> uint16, 1 if static partitioning |
| * BlueGene support */ |
| SELECT_CONFIG_INFO /* data-> List get .conf info from select |
| * plugin */ |
| } ; |
| |
| /*****************************************************************************\ |
| * Global slurmctld functions |
| \*****************************************************************************/ |
| |
| /* |
| * abort_job_on_node - Kill the specific job_id on a specific node, |
| * the request is not processed immediately, but queued. |
| * This is to prevent a flood of pthreads if slurmctld restarts |
| * without saved state and slurmd daemons register with a |
| * multitude of running jobs. Slurmctld will not recognize |
| * these jobs and use this function to kill them - one |
| * agent request per node as they register. |
| * IN job_id - id of the job to be killed |
| * IN job_ptr - pointer to terminating job (NULL if unknown, e.g. orphaned) |
| * IN node_name - name of the node on which the job resides |
| */ |
| extern void abort_job_on_node(uint32_t job_id, struct job_record *job_ptr, |
| char *node_name); |
| |
| /* Note that the backup slurmctld has assumed primary control. |
| * This function can be called multiple times. */ |
| extern void backup_slurmctld_restart(void); |
| |
| /* Complete a batch job requeue logic after all steps complete so that |
| * subsequent jobs appear in a separate accounting record. */ |
| void batch_requeue_fini(struct job_record *job_ptr); |
| |
| /* Build a bitmap of nodes completing this job */ |
| extern void build_cg_bitmap(struct job_record *job_ptr); |
| |
| /* Given a config_record with it's bitmap already set, update feature_list */ |
| extern void build_config_feature_list(struct config_record *config_ptr); |
| |
| /* |
| * create_job_record - create an empty job_record including job_details. |
| * load its values with defaults (zeros, nulls, and magic cookie) |
| * IN/OUT error_code - set to zero if no error, errno otherwise |
| * RET pointer to the record or NULL if error |
| * global: job_list - global job list |
| * job_count - number of jobs in the system |
| * last_job_update - time of last job table update |
| * NOTE: allocates memory that should be xfreed with _list_delete_job |
| */ |
| extern struct job_record * create_job_record (int *error_code); |
| |
| /* |
| * create_part_record - create a partition record |
| * RET a pointer to the record or NULL if error |
| * global: default_part - default partition parameters |
| * part_list - global partition list |
| * NOTE: the record's values are initialized to those of default_part |
| * NOTE: allocates memory that should be xfreed with delete_part_record |
| */ |
| extern struct part_record *create_part_record (void); |
| |
| /* |
| * job_limits_check - check the limits specified for the job. |
| * IN job_ptr - pointer to job table entry. |
| * IN check_min_time - if true test job's minimum time limit, |
| * otherwise test maximum time limit |
| * RET WAIT_NO_REASON on success, fail status otherwise. |
| */ |
| extern int job_limits_check(struct job_record **job_pptr, bool check_min_time); |
| |
| /* |
| * delete_job_details - delete a job's detail record and clear it's pointer |
| * this information can be deleted as soon as the job is allocated |
| * resources and running (could need to restart batch job) |
| * IN job_entry - pointer to job_record to clear the record of |
| */ |
| extern void delete_job_details (struct job_record *job_entry); |
| |
| /* |
| * delete_partition - delete the specified partition (actually leave |
| * the entry, just flag it as defunct) |
| * IN job_specs - job specification from RPC |
| * RET 0 on success, errno otherwise |
| */ |
| extern int delete_partition(delete_part_msg_t *part_desc_ptr); |
| |
| /* |
| * delete_step_record - delete record for job step for specified job_ptr |
| * and step_id |
| * IN job_ptr - pointer to job table entry to have step record removed |
| * IN step_id - id of the desired job step |
| * RET 0 on success, errno otherwise |
| */ |
| extern int delete_step_record (struct job_record *job_ptr, uint32_t step_id); |
| |
| /* |
| * delete_step_records - delete step record for specified job_ptr |
| * IN job_ptr - pointer to job table entry to have step records removed |
| */ |
| extern void delete_step_records (struct job_record *job_ptr); |
| |
| /* |
| * Copy a job's dependency list |
| * IN depend_list_src - a job's depend_lst |
| * RET copy of depend_list_src, must bee freed by caller |
| */ |
| extern List depended_list_copy(List depend_list_src); |
| |
| /* |
| * drain_nodes - drain one or more nodes, |
| * no-op for nodes already drained or draining |
| * IN nodes - nodes to drain |
| * IN reason - reason to drain the nodes |
| * IN reason_uid - who set the reason |
| * RET SLURM_SUCCESS or error code |
| * global: node_record_table_ptr - pointer to global node table |
| */ |
| extern int drain_nodes ( char *nodes, char *reason, uint32_t reason_uid ); |
| |
| /* dump_all_job_state - save the state of all jobs to file |
| * RET 0 or error code */ |
| extern int dump_all_job_state ( void ); |
| |
| /* dump_all_node_state - save the state of all nodes to file */ |
| extern int dump_all_node_state ( void ); |
| |
| /* dump_all_part_state - save the state of all partitions to file */ |
| extern int dump_all_part_state ( void ); |
| |
| /* |
| * dump_job_desc - dump the incoming job submit request message |
| * IN job_specs - job specification from RPC |
| */ |
| extern void dump_job_desc(job_desc_msg_t * job_specs); |
| |
| /* |
| * dump_job_step_state - dump the state of a specific job step to a buffer, |
| * load with load_step_state |
| * IN job_ptr - pointer to job for which information is to be dumpped |
| * IN step_ptr - pointer to job step for which information is to be dumpped |
| * IN/OUT buffer - location to store data, pointers automatically advanced |
| */ |
| extern void dump_job_step_state(struct job_record *job_ptr, |
| struct step_record *step_ptr, Buf buffer); |
| |
| /* |
| * dump_step_desc - dump the incoming step initiate request message |
| * IN step_spec - job step request specification from RPC |
| */ |
| extern void dump_step_desc(job_step_create_request_msg_t *step_spec); |
| |
| /* Remove one node from a job's allocation */ |
| extern void excise_node_from_job(struct job_record *job_ptr, |
| struct node_record *node_ptr); |
| |
| /* |
| * Copy a job's feature list |
| * IN feature_list_src - a job's depend_lst |
| * RET copy of depend_list_src, must be freed by caller |
| */ |
| extern List feature_list_copy(List feature_list_src); |
| |
| /* |
| * find_job_array_rec - return a pointer to the job record with the given |
| * array_job_id/array_task_id |
| * IN job_id - requested job's id |
| * IN array_task_id - requested job's task id (NO_VAL if none specified) |
| * RET pointer to the job's record, NULL on error |
| */ |
| extern struct job_record *find_job_array_rec(uint32_t array_job_id, |
| uint32_t array_task_id); |
| |
| /* |
| * find_job_record - return a pointer to the job record with the given job_id |
| * IN job_id - requested job's id |
| * RET pointer to the job's record, NULL on error |
| */ |
| struct job_record *find_job_record(uint32_t job_id); |
| |
| /* |
| * find_first_node_record - find a record for first node in the bitmap |
| * IN node_bitmap |
| */ |
| extern struct node_record *find_first_node_record (bitstr_t *node_bitmap); |
| |
| /* |
| * find_part_record - find a record for partition with specified name |
| * IN name - name of the desired partition |
| * RET pointer to partition or NULL if not found |
| */ |
| extern struct part_record *find_part_record(char *name); |
| |
| /* |
| * find_step_record - return a pointer to the step record with the given |
| * job_id and step_id |
| * IN job_ptr - pointer to job table entry to have step record added |
| * IN step_id - id of the desired job step |
| * RET pointer to the job step's record, NULL on error |
| */ |
| extern struct step_record * find_step_record(struct job_record *job_ptr, |
| uint32_t step_id); |
| |
| /* |
| * get_job_env - return the environment variables and their count for a |
| * given job |
| * IN job_ptr - pointer to job for which data is required |
| * OUT env_size - number of elements to read |
| * RET point to array of string pointers containing environment variables |
| */ |
| extern char **get_job_env (struct job_record *job_ptr, uint32_t *env_size); |
| |
| /* |
| * get_job_script - return the script for a given job |
| * IN job_ptr - pointer to job for which data is required |
| * RET point to string containing job script |
| */ |
| extern char *get_job_script (struct job_record *job_ptr); |
| |
| /* |
| * get_next_job_id - return the job_id to be used by default for |
| * the next job |
| */ |
| extern uint32_t get_next_job_id(void); |
| |
| /* |
| * get_part_list - find record for named partition(s) |
| * IN name - partition name(s) in a comma separated list |
| * RET List of pointers to the partitions or NULL if not found |
| * NOTE: Caller must free the returned list |
| */ |
| extern List get_part_list(char *name); |
| |
| /* |
| * init_job_conf - initialize the job configuration tables and values. |
| * this should be called after creating node information, but |
| * before creating any job entries. |
| * RET 0 if no error, otherwise an error code |
| * global: last_job_update - time of last job table update |
| * job_list - pointer to global job list |
| */ |
| extern int init_job_conf (void); |
| |
| /* |
| * init_node_conf - initialize the node configuration tables and values. |
| * this should be called before creating any node or configuration |
| * entries. |
| * RET 0 if no error, otherwise an error code |
| * global: node_record_table_ptr - pointer to global node table |
| * default_node_record - default values for node records |
| * default_config_record - default values for configuration records |
| * hash_table - table of hash indexes |
| * last_node_update - time of last node table update |
| */ |
| extern int init_node_conf (); |
| |
| /* |
| * init_part_conf - initialize the default partition configuration values |
| * and create a (global) partition list. |
| * this should be called before creating any partition entries. |
| * RET 0 if no error, otherwise an error code |
| * global: default_part - default partition values |
| * part_list - global partition list |
| */ |
| extern int init_part_conf (void); |
| |
| /* |
| * is_node_down - determine if the specified node's state is DOWN |
| * IN name - name of the node |
| * RET true if node exists and is down, otherwise false |
| */ |
| extern bool is_node_down (char *name); |
| |
| /* |
| * is_node_resp - determine if the specified node's state is responding |
| * IN name - name of the node |
| * RET true if node exists and is responding, otherwise false |
| */ |
| extern bool is_node_resp (char *name); |
| |
| /* |
| * allocated_session_in_use - check if an interactive session is already running |
| * IN new_alloc - allocation (alloc_node:alloc_sid) to test for |
| * Returns true if an interactive session of the same node:sid already exists. |
| */ |
| extern bool allocated_session_in_use(job_desc_msg_t *new_alloc); |
| |
| /* |
| * job_alloc_info - get details about an existing job allocation |
| * IN uid - job issuing the code |
| * IN job_id - ID of job for which info is requested |
| * OUT job_pptr - set to pointer to job record |
| */ |
| extern int job_alloc_info(uint32_t uid, uint32_t job_id, |
| struct job_record **job_pptr); |
| /* |
| * job_allocate - create job_records for the supplied job specification and |
| * allocate nodes for it. |
| * IN job_specs - job specifications |
| * IN immediate - if set then either initiate the job immediately or fail |
| * IN will_run - don't initiate the job if set, just test if it could run |
| * now or later |
| * OUT resp - will run response (includes start location, time, etc.) |
| * IN allocate - resource allocation request only if set, batch job if zero |
| * IN submit_uid -uid of user issuing the request |
| * OUT job_pptr - set to pointer to job record |
| * OUT err_msg - Custom error message to the user, caller to xfree results |
| * RET 0 or an error code. If the job would only be able to execute with |
| * some change in partition configuration then |
| * ESLURM_REQUESTED_PART_CONFIG_UNAVAILABLE is returned |
| * NOTE: If allocating nodes lx[0-7] to a job and those nodes have cpu counts |
| * of 4, 4, 4, 4, 8, 8, 4, 4 then num_cpu_groups=3, cpus_per_node={4,8,4} |
| * and cpu_count_reps={4,2,2} |
| * globals: job_list - pointer to global job list |
| * list_part - global list of partition info |
| * default_part_loc - pointer to default partition |
| * NOTE: lock_slurmctld on entry: Read config Write job, Write node, Read part |
| */ |
| extern int job_allocate(job_desc_msg_t * job_specs, int immediate, |
| int will_run, will_run_response_msg_t **resp, |
| int allocate, uid_t submit_uid, struct job_record **job_pptr, |
| char **err_msg); |
| |
| /* Reset a job's end_time based upon it's start_time and time_limit. |
| * NOTE: Do not reset the end_time if already being preempted */ |
| extern void job_end_time_reset(struct job_record *job_ptr); |
| /* |
| * job_hold_by_assoc_id - Hold all pending jobs with a given |
| * association ID. This happens when an association is deleted (e.g. when |
| * a user is removed from the association database). |
| * RET count of held jobs |
| */ |
| extern int job_hold_by_assoc_id(uint32_t assoc_id); |
| |
| /* |
| * job_hold_by_qos_id - Hold all pending jobs with a given |
| * QOS ID. This happens when a QOS is deleted (e.g. when |
| * a QOS is removed from the association database). |
| * RET count of held jobs |
| */ |
| extern int job_hold_by_qos_id(uint32_t qos_id); |
| |
| /* Perform checkpoint operation on a job */ |
| extern int job_checkpoint(checkpoint_msg_t *ckpt_ptr, uid_t uid, |
| slurm_fd_t conn_fd, uint16_t protocol_version); |
| |
| /* log the completion of the specified job */ |
| extern void job_completion_logger(struct job_record *job_ptr, bool requeue); |
| |
| /* |
| * job_epilog_complete - Note the completion of the epilog script for a |
| * given job |
| * IN job_id - id of the job for which the epilog was executed |
| * IN node_name - name of the node on which the epilog was executed |
| * IN return_code - return code from epilog script |
| * RET true if job is COMPLETED, otherwise false |
| */ |
| extern bool job_epilog_complete(uint32_t job_id, char *node_name, |
| uint32_t return_code); |
| |
| /* |
| * job_end_time - Process JOB_END_TIME |
| * IN time_req_msg - job end time request |
| * OUT timeout_msg - job timeout response to be sent |
| * RET SLURM_SUCESS or an error code |
| */ |
| extern int job_end_time(job_alloc_info_msg_t *time_req_msg, |
| srun_timeout_msg_t *timeout_msg); |
| |
| /* job_fini - free all memory associated with job records */ |
| extern void job_fini (void); |
| |
| /* |
| * job_fail - terminate a job due to initiation failure |
| * IN job_id - id of the job to be killed |
| * IN job_state - desired job state (JOB_BOOT_FAIL, JOB_NODE_FAIL, etc.) |
| * RET 0 on success, otherwise ESLURM error code |
| */ |
| extern int job_fail(uint32_t job_id, uint16_t job_state); |
| |
| |
| /* job_hold_requeue() |
| * |
| * Requeue the job based upon its current state. |
| * If JOB_SPECIAL_EXIT then requeue and hold with JOB_SPECIAL_EXIT state. |
| * If JOB_REQUEUE_HOLD then requeue and hold. |
| * If JOB_REQUEUE then requeue and let it run again. |
| * The requeue can happen directly from job_requeue() or from |
| * job_epilog_complete() after the last component has finished. |
| */ |
| extern void job_hold_requeue(struct job_record *job_ptr); |
| |
| /* |
| * determine if job is ready to execute per the node select plugin |
| * IN job_id - job to test |
| * OUT ready - 1 if job is ready to execute 0 otherwise |
| * RET SLURM error code |
| */ |
| extern int job_node_ready(uint32_t job_id, int *ready); |
| |
| /* Record accounting information for a job immediately before changing size */ |
| extern void job_pre_resize_acctg(struct job_record *job_ptr); |
| |
| /* Record accounting information for a job immediately after changing size */ |
| extern void job_post_resize_acctg(struct job_record *job_ptr); |
| |
| /* |
| * job_restart - Restart a batch job from checkpointed state |
| * |
| * Restart a job is similar to submit a new job, except that |
| * the job requirements is load from the checkpoint file and |
| * the job id is restored. |
| * |
| * IN ckpt_ptr - checkpoint request message |
| * IN uid - user id of the user issuing the RPC |
| * IN conn_fd - file descriptor on which to send reply |
| * IN protocol_version - slurm protocol version of client |
| * RET 0 on success, otherwise ESLURM error code |
| */ |
| extern int job_restart(checkpoint_msg_t *ckpt_ptr, uid_t uid, |
| slurm_fd_t conn_fd, uint16_t protocol_version); |
| |
| /* |
| * job_signal - signal the specified job |
| * IN job_id - id of the job to be signaled |
| * IN signal - signal to send, SIGKILL == cancel the job |
| * IN flags - see KILL_JOB_* flags in slurm.h |
| * IN uid - uid of requesting user |
| * IN preempt - true if job being preempted |
| * RET 0 on success, otherwise ESLURM error code |
| */ |
| extern int job_signal(uint32_t job_id, uint16_t signal, uint16_t flags, |
| uid_t uid, bool preempt); |
| |
| /* |
| * job_step_checkpoint - perform some checkpoint operation |
| * IN ckpt_ptr - checkpoint request message |
| * IN uid - user id of the user issuing the RPC |
| * IN conn_fd - file descriptor on which to send reply |
| * IN protocol_version - slurm protocol version of client |
| * RET 0 on success, otherwise ESLURM error code |
| */ |
| extern int job_step_checkpoint(checkpoint_msg_t *ckpt_ptr, |
| uid_t uid, slurm_fd_t conn_fd, uint16_t protocol_version); |
| |
| /* |
| * job_step_checkpoint_comp - note job step checkpoint completion |
| * IN ckpt_ptr - checkpoint complete status message |
| * IN uid - user id of the user issuing the RPC |
| * IN conn_fd - file descriptor on which to send reply |
| * IN protocol_version - slurm protocol version of client |
| * RET 0 on success, otherwise ESLURM error code |
| */ |
| extern int job_step_checkpoint_comp(checkpoint_comp_msg_t *ckpt_ptr, |
| uid_t uid, slurm_fd_t conn_fd, uint16_t protocol_version); |
| /* |
| * job_step_checkpoint_task_comp - note task checkpoint completion |
| * IN ckpt_ptr - checkpoint task complete status message |
| * IN uid - user id of the user issuing the RPC |
| * IN conn_fd - file descriptor on which to send reply |
| * IN protocol_version - slurm protocol version of client |
| * RET 0 on success, otherwise ESLURM error code |
| */ |
| extern int job_step_checkpoint_task_comp(checkpoint_task_comp_msg_t *ckpt_ptr, |
| uid_t uid, slurm_fd_t conn_fd, uint16_t protocol_version); |
| |
| /* |
| * job_suspend - perform some suspend/resume operation |
| * IN sus_ptr - suspend/resume request message |
| * IN uid - user id of the user issuing the RPC |
| * IN conn_fd - file descriptor on which to send reply, |
| * -1 if none |
| * indf_susp IN - set if job is being suspended indefinitely by user or admin |
| * and we should clear it's priority, otherwise suspended |
| * temporarily for gang scheduling |
| * IN protocol_version - slurm protocol version of client |
| * RET 0 on success, otherwise ESLURM error code |
| */ |
| extern int job_suspend(suspend_msg_t *sus_ptr, uid_t uid, |
| slurm_fd_t conn_fd, bool indf_susp, |
| uint16_t protocol_version); |
| |
| /* |
| * job_complete - note the normal termination the specified job |
| * IN job_id - id of the job which completed |
| * IN uid - user id of user issuing the RPC |
| * IN requeue - job should be run again if possible |
| * IN node_fail - true of job terminated due to node failure |
| * IN job_return_code - job's return code, if set then set state to JOB_FAILED |
| * RET - 0 on success, otherwise ESLURM error code |
| * global: job_list - pointer global job list |
| * last_job_update - time of last job table update |
| */ |
| extern int job_complete(uint32_t job_id, uid_t uid, bool requeue, |
| bool node_fail, uint32_t job_return_code); |
| |
| /* |
| * job_independent - determine if this job has a dependent job pending |
| * or if the job's scheduled begin time is in the future |
| * IN job_ptr - pointer to job being tested |
| * IN will_run - is this a test for will_run or not |
| * RET - true if job no longer must be defered for another job |
| */ |
| extern bool job_independent(struct job_record *job_ptr, int will_run); |
| |
| /* |
| * job_req_node_filter - job reqeust node filter. |
| * clear from a bitmap the nodes which can not be used for a job |
| * test memory size, required features, processor count, etc. |
| * IN job_ptr - pointer to node to be scheduled |
| * IN/OUT bitmap - set of nodes being considered for use |
| * RET SLURM_SUCCESS or EINVAL if can't filter (exclusive OR of features) |
| */ |
| extern int job_req_node_filter(struct job_record *job_ptr, |
| bitstr_t *avail_bitmap); |
| |
| /* |
| * job_requeue - Requeue a running or pending batch job |
| * IN uid - user id of user issuing the RPC |
| * IN job_id - id of the job to be requeued |
| * IN conn_fd - file descriptor on which to send reply |
| * IN protocol_version - slurm protocol version of client |
| * IN preempt - true if job being preempted |
| * RET 0 on success, otherwise ESLURM error code |
| */ |
| extern int job_requeue(uid_t uid, |
| uint32_t job_id, |
| slurm_fd_t conn_fd, |
| uint16_t protocol_version, |
| bool preempt); |
| /* |
| * job_step_complete - note normal completion the specified job step |
| * IN job_id - id of the job to be completed |
| * IN step_id - id of the job step to be completed |
| * IN uid - user id of user issuing the RPC |
| * IN requeue - job should be run again if possible |
| * IN job_return_code - job's return code, if set then set state to JOB_FAILED |
| * RET 0 on success, otherwise ESLURM error code |
| * global: job_list - pointer global job list |
| * last_job_update - time of last job table update |
| */ |
| extern int job_step_complete (uint32_t job_id, uint32_t job_step_id, |
| uid_t uid, bool requeue, uint32_t job_return_code); |
| |
| /* |
| * job_step_signal - signal the specified job step |
| * IN job_id - id of the job to be cancelled |
| * IN step_id - id of the job step to be cancelled |
| * IN signal - user id of user issuing the RPC |
| * IN uid - user id of user issuing the RPC |
| * RET 0 on success, otherwise ESLURM error code |
| * global: job_list - pointer global job list |
| * last_job_update - time of last job table update |
| */ |
| extern int job_step_signal(uint32_t job_id, uint32_t step_id, |
| uint16_t signal, uid_t uid); |
| |
| /* |
| * job_time_limit - terminate jobs which have exceeded their time limit |
| * global: job_list - pointer global job list |
| * last_job_update - time of last job table update |
| */ |
| extern void job_time_limit (void); |
| |
| /* |
| * job_update_cpu_cnt - when job is completing remove allocated cpus |
| * from count. |
| * IN/OUT job_ptr - job structure to be updated |
| * IN node_inx - node bit that is finished with job. |
| * RET SLURM_SUCCES on success SLURM_ERROR on cpu_cnt underflow |
| */ |
| extern int job_update_cpu_cnt(struct job_record *job_ptr, int node_inx); |
| |
| /* |
| * check_job_step_time_limit - terminate jobsteps which have exceeded |
| * their time limit |
| * IN job_ptr - pointer to job containing steps to check |
| * IN now - current time to use for the limit check |
| */ |
| extern void check_job_step_time_limit (struct job_record *job_ptr, time_t now); |
| |
| /* |
| * kill_job_by_part_name - Given a partition name, deallocate resource for |
| * its jobs and kill them |
| * IN part_name - name of a partition |
| * RET number of killed jobs |
| */ |
| extern int kill_job_by_part_name(char *part_name); |
| |
| /* |
| * kill_job_on_node - Kill the specific job_id on a specific node. |
| * agent request per node as they register. |
| * IN job_id - id of the job to be killed |
| * IN job_ptr - pointer to terminating job (NULL if unknown, e.g. orphaned) |
| * IN node_ptr - pointer to the node on which the job resides |
| */ |
| extern void kill_job_on_node(uint32_t job_id, struct job_record *job_ptr, |
| struct node_record *node_ptr); |
| |
| /* |
| * kill_job_by_front_end_name - Given a front end node name, deallocate |
| * resource for its jobs and kill them. |
| * IN node_name - name of a front end node |
| * RET number of jobs associated with this front end node |
| */ |
| extern int kill_job_by_front_end_name(char *node_name); |
| |
| /* |
| * kill_running_job_by_node_name - Given a node name, deallocate RUNNING |
| * or COMPLETING jobs from the node or kill them |
| * IN node_name - name of a node |
| * RET number of killed jobs |
| */ |
| extern int kill_running_job_by_node_name(char *node_name); |
| |
| /* |
| * kill_step_on_node - determine if the specified job has any job steps |
| * allocated to the specified node and kill them unless no_kill flag |
| * is set on the step |
| * IN job_ptr - pointer to an active job record |
| * IN node_ptr - pointer to a node record |
| * IN node_fail - true of removed node has failed |
| * RET count of killed job steps |
| */ |
| extern int kill_step_on_node(struct job_record *job_ptr, |
| struct node_record *node_ptr, bool node_fail); |
| |
| /* list_compare_config - compare two entry from the config list based upon |
| * weight, see common/list.h for documentation */ |
| int list_compare_config (void *config_entry1, void *config_entry2); |
| |
| /* |
| * list_find_feature - find an entry in the feature list, see list.h for |
| * documentation |
| * IN key - is feature name or NULL for all features |
| * RET 1 if found, 0 otherwise |
| */ |
| extern int list_find_feature(void *feature_entry, void *key); |
| |
| /* |
| * list_find_part - find an entry in the partition list, see common/list.h |
| * for documentation |
| * IN key - partition name or "universal_key" for all partitions |
| * RET 1 if matches key, 0 otherwise |
| * global- part_list - the global partition list |
| */ |
| extern int list_find_part (void *part_entry, void *key); |
| |
| /* |
| * load_all_job_state - load the job state from file, recover from last |
| * checkpoint. Execute this after loading the configuration file data. |
| * RET 0 or error code |
| */ |
| extern int load_all_job_state ( void ); |
| |
| /* |
| * load_all_node_state - Load the node state from file, recover on slurmctld |
| * restart. Execute this after loading the configuration file data. |
| * Data goes into common storage. |
| * IN state_only - if true over-write only node state, features, gres and reason |
| * RET 0 or error code |
| */ |
| extern int load_all_node_state ( bool state_only ); |
| |
| /* |
| * load_last_job_id - load only the last job ID from state save file. |
| * RET 0 or error code |
| */ |
| extern int load_last_job_id( void ); |
| |
| /* |
| * load_part_uid_allow_list - reload the allow_uid list of partitions |
| * if required (updated group file or force set) |
| * IN force - if set then always reload the allow_uid list |
| */ |
| extern void load_part_uid_allow_list ( int force ); |
| |
| /* |
| * load_all_part_state - load the partition state from file, recover from |
| * slurmctld restart. execute this after loading the configuration |
| * file data. |
| */ |
| extern int load_all_part_state ( void ); |
| |
| /* |
| * Create a new job step from data in a buffer (as created by |
| * dump_job_stepstate) |
| * IN/OUT - job_ptr - point to a job for which the step is to be loaded. |
| * IN/OUT buffer - location from which to get data, pointers |
| * automatically advanced |
| */ |
| extern int load_step_state(struct job_record *job_ptr, Buf buffer, |
| uint16_t protocol_version); |
| |
| /* make_node_alloc - flag specified node as allocated to a job |
| * IN node_ptr - pointer to node being allocated |
| * IN job_ptr - pointer to job that is starting |
| */ |
| extern void make_node_alloc(struct node_record *node_ptr, |
| struct job_record *job_ptr); |
| |
| /* make_node_comp - flag specified node as completing a job |
| * IN node_ptr - pointer to node marked for completion of job |
| * IN job_ptr - pointer to job that is completing |
| * IN suspended - true if job was previously suspended |
| */ |
| extern void make_node_comp(struct node_record *node_ptr, |
| struct job_record *job_ptr, bool suspended); |
| |
| /* |
| * make_node_idle - flag specified node as having finished with a job |
| * IN node_ptr - pointer to node reporting job completion |
| * IN job_ptr - pointer to job that just completed |
| */ |
| extern void make_node_idle(struct node_record *node_ptr, |
| struct job_record *job_ptr); |
| |
| /* |
| * Determine of the specified job can execute right now or is currently |
| * blocked by a partition state or limit. These job states should match the |
| * reason values returned by job_limits_check(). |
| */ |
| extern bool misc_policy_job_runnable_state(struct job_record *job_ptr); |
| |
| /* msg_to_slurmd - send given msg_type every slurmd, no args */ |
| extern void msg_to_slurmd (slurm_msg_type_t msg_type); |
| |
| /* node_fini - free all memory associated with node records */ |
| extern void node_fini (void); |
| |
| /* node_did_resp - record that the specified node is responding |
| * IN name - name of the node */ |
| extern void node_did_resp (char *name); |
| |
| /* |
| * node_not_resp - record that the specified node is not responding |
| * IN name - name of the node |
| * IN msg_time - time message was sent |
| * IN resp_type - what kind of response came back from the node |
| */ |
| extern void node_not_resp (char *name, time_t msg_time, |
| slurm_msg_type_t resp_type); |
| |
| /* For every node with the "not_responding" flag set, clear the flag |
| * and log that the node is not responding using a hostlist expression */ |
| extern void node_no_resp_msg(void); |
| |
| /* |
| * pack_all_jobs - dump all job information for all jobs in |
| * machine independent form (for network transmission) |
| * OUT buffer_ptr - the pointer is set to the allocated buffer. |
| * OUT buffer_size - set to size of the buffer in bytes |
| * IN show_flags - job filtering options |
| * IN uid - uid of user making request (for partition filtering) |
| * IN filter_uid - pack only jobs belonging to this user if not NO_VAL |
| * IN protocol_version - slurm protocol version of client |
| * global: job_list - global list of job records |
| * NOTE: the buffer at *buffer_ptr must be xfreed by the caller |
| * NOTE: change _unpack_job_desc_msg() in common/slurm_protocol_pack.c |
| * whenever the data format changes |
| */ |
| extern void pack_all_jobs(char **buffer_ptr, int *buffer_size, |
| uint16_t show_flags, uid_t uid, uint32_t filter_uid, |
| uint16_t protocol_version); |
| |
| /* |
| * pack_all_node - dump all configuration and node information for all nodes |
| * in machine independent form (for network transmission) |
| * OUT buffer_ptr - pointer to the stored data |
| * OUT buffer_size - set to size of the buffer in bytes |
| * IN show_flags - node filtering options |
| * IN uid - uid of user making request (for partition filtering) |
| * IN protocol_version - slurm protocol version of client |
| * global: node_record_table_ptr - pointer to global node table |
| * NOTE: the caller must xfree the buffer at *buffer_ptr |
| * NOTE: change slurm_load_node() in api/node_info.c when data format changes |
| * NOTE: READ lock_slurmctld config before entry |
| */ |
| extern void pack_all_node (char **buffer_ptr, int *buffer_size, |
| uint16_t show_flags, uid_t uid, |
| uint16_t protocol_version); |
| |
| /* Pack all scheduling statistics */ |
| extern void pack_all_stat(int resp, char **buffer_ptr, int *buffer_size, |
| uint16_t protocol_version); |
| |
| /* |
| * pack_ctld_job_step_info_response_msg - packs job step info |
| * IN job_id - specific id or NO_VAL for all |
| * IN step_id - specific id or NO_VAL for all |
| * IN uid - user issuing request |
| * IN show_flags - job step filtering options |
| * OUT buffer - location to store data, pointers automatically advanced |
| * IN protocol_version - slurm protocol version of client |
| * RET - 0 or error code |
| * NOTE: MUST free_buf buffer |
| */ |
| extern int pack_ctld_job_step_info_response_msg( |
| uint32_t job_id, uint32_t step_id, uid_t uid, |
| uint16_t show_flags, Buf buffer, uint16_t protocol_version); |
| |
| /* |
| * pack_all_part - dump all partition information for all partitions in |
| * machine independent form (for network transmission) |
| * OUT buffer_ptr - the pointer is set to the allocated buffer. |
| * OUT buffer_size - set to size of the buffer in bytes |
| * IN show_flags - partition filtering options |
| * IN uid - uid of user making request (for partition filtering) |
| * IN protocol_version - slurm protocol version of client |
| * global: part_list - global list of partition records |
| * NOTE: the buffer at *buffer_ptr must be xfreed by the caller |
| * NOTE: change slurm_load_part() in api/part_info.c if data format changes |
| */ |
| extern void pack_all_part(char **buffer_ptr, int *buffer_size, |
| uint16_t show_flags, uid_t uid, |
| uint16_t protocol_version); |
| |
| /* |
| * pack_job - dump all configuration information about a specific job in |
| * machine independent form (for network transmission) |
| * IN dump_job_ptr - pointer to job for which information is requested |
| * IN show_flags - job filtering options |
| * IN/OUT buffer - buffer in which data is placed, pointers automatically |
| * updated |
| * IN uid - user requesting the data |
| * NOTE: change _unpack_job_desc_msg() in common/slurm_protocol_pack.c |
| * whenever the data format changes |
| */ |
| extern void pack_job (struct job_record *dump_job_ptr, uint16_t show_flags, |
| Buf buffer, uint16_t protocol_version, uid_t uid); |
| |
| /* |
| * pack_part - dump all configuration information about a specific partition |
| * in machine independent form (for network transmission) |
| * IN part_ptr - pointer to partition for which information is requested |
| * IN/OUT buffer - buffer in which data is placed, pointers automatically |
| * updated |
| * global: default_part_loc - pointer to the default partition |
| * NOTE: if you make any changes here be sure to make the corresponding |
| * changes to load_part_config in api/partition_info.c |
| */ |
| extern void pack_part (struct part_record *part_ptr, Buf buffer, |
| uint16_t protocol_version); |
| |
| /* |
| * pack_one_job - dump information for one jobs in |
| * machine independent form (for network transmission) |
| * OUT buffer_ptr - the pointer is set to the allocated buffer. |
| * OUT buffer_size - set to size of the buffer in bytes |
| * IN job_id - ID of job that we want info for |
| * IN show_flags - job filtering options |
| * IN uid - uid of user making request (for partition filtering) |
| * NOTE: the buffer at *buffer_ptr must be xfreed by the caller |
| * NOTE: change _unpack_job_desc_msg() in common/slurm_protocol_pack.c |
| * whenever the data format changes |
| */ |
| extern int pack_one_job(char **buffer_ptr, int *buffer_size, |
| uint32_t job_id, uint16_t show_flags, uid_t uid, |
| uint16_t protocol_version); |
| |
| /* |
| * pack_one_node - dump all configuration and node information for one node |
| * in machine independent form (for network transmission) |
| * OUT buffer_ptr - pointer to the stored data |
| * OUT buffer_size - set to size of the buffer in bytes |
| * IN show_flags - node filtering options |
| * IN uid - uid of user making request (for partition filtering) |
| * IN node_name - name of node for which information is desired, |
| * use first node if name is NULL |
| * IN protocol_version - slurm protocol version of client |
| * global: node_record_table_ptr - pointer to global node table |
| * NOTE: the caller must xfree the buffer at *buffer_ptr |
| * NOTE: change slurm_load_node() in api/node_info.c when data format changes |
| * NOTE: READ lock_slurmctld config before entry |
| */ |
| extern void pack_one_node (char **buffer_ptr, int *buffer_size, |
| uint16_t show_flags, uid_t uid, char *node_name, |
| uint16_t protocol_version); |
| |
| /* part_filter_clear - Clear the partition's hidden flag based upon a user's |
| * group access. This must follow a call to part_filter_set() */ |
| extern void part_filter_clear(void); |
| |
| /* part_filter_set - Set the partition's hidden flag based upon a user's |
| * group access. This must be followed by a call to part_filter_clear() */ |
| extern void part_filter_set(uid_t uid); |
| |
| /* part_fini - free all memory associated with partition records */ |
| extern void part_fini (void); |
| |
| /* |
| * Create a copy of a job's part_list *partition list |
| * IN part_list_src - a job's part_list |
| * RET copy of part_list_src, must be freed by caller |
| */ |
| extern List part_list_copy(List part_list_src); |
| |
| /* |
| * Determine of the specified job can execute right now or is currently |
| * blocked by a partition state or limit. Execute job_limits_check() to |
| * re-validate job state. |
| */ |
| extern bool part_policy_job_runnable_state(struct job_record *job_ptr); |
| |
| /* Validate a job's account against the partition's AllowAccounts or |
| * DenyAccounts parameters. */ |
| extern int part_policy_valid_acct(struct part_record *part_ptr, char *acct); |
| |
| /* Validate a job's QOS against the partition's AllowQOS or |
| * DenyQOS parameters. */ |
| extern int part_policy_valid_qos( |
| struct part_record *part_ptr, slurmdb_qos_rec_t *qos_ptr); |
| |
| /* |
| * partition_in_use - determine whether a partition is in use by a RUNNING |
| * PENDING or SUSPENDED job |
| * IN part_name - name of a partition |
| * RET true if the partition is in use, else false |
| */ |
| extern bool partition_in_use(char *part_name); |
| |
| /* |
| * purge_old_job - purge old job records. |
| * The jobs must have completed at least MIN_JOB_AGE minutes ago. |
| * Test job dependencies, handle after_ok, after_not_ok before |
| * purging any jobs. |
| * NOTE: READ lock slurmctld config and WRITE lock jobs before entry |
| */ |
| void purge_old_job(void); |
| |
| /* Convert a comma delimited list of QOS names into a bitmap */ |
| extern void qos_list_build(char *qos, bitstr_t **qos_bits); |
| |
| /* Request that the job scheduler execute soon (typically within seconds) */ |
| extern void queue_job_scheduler(void); |
| |
| /* |
| * rehash_jobs - Create or rebuild the job hash table. |
| * NOTE: run lock_slurmctld before entry: Read config, write job |
| */ |
| extern void rehash_jobs(void); |
| |
| /* |
| * Rebuild a job step's core_bitmap_job after a job has just changed size |
| * job_ptr IN - job that was just re-sized |
| * orig_job_node_bitmap IN - The job's original node bitmap |
| */ |
| extern void rebuild_step_bitmaps(struct job_record *job_ptr, |
| bitstr_t *orig_job_node_bitmap); |
| |
| /* |
| * After a job has fully completed run this to release the resouces |
| * and remove it from the system. |
| */ |
| extern int post_job_step(struct step_record *step_ptr); |
| |
| /* update first assigned job id as needed on reconfigure */ |
| extern void reset_first_job_id(void); |
| |
| /* |
| * reset_job_bitmaps - reestablish bitmaps for existing jobs. |
| * this should be called after rebuilding node information, |
| * but before using any job entries. |
| * global: last_job_update - time of last job table update |
| * job_list - pointer to global job list |
| */ |
| extern void reset_job_bitmaps (void); |
| |
| /* Reset a node's CPU load value */ |
| extern void reset_node_load(char *node_name, uint32_t cpu_load); |
| |
| /* Reset all scheduling statistics |
| * level IN - clear backfilled_jobs count if set */ |
| extern void reset_stats(int level); |
| |
| /* |
| * restore_node_features - Make node and config (from slurm.conf) fields |
| * consistent for Features, Gres and Weight |
| * IN recover - |
| * 0, 1 - use data from config record, built using slurm.conf |
| * 2 = use data from node record, built from saved state |
| */ |
| extern void restore_node_features(int recover); |
| |
| /* Update time stamps for job step resume */ |
| extern void resume_job_step(struct job_record *job_ptr); |
| |
| /* run_backup - this is the backup controller, it should run in standby |
| * mode, assuming control when the primary controller stops responding */ |
| extern void run_backup(slurm_trigger_callbacks_t *callbacks); |
| |
| /* Spawn health check function for every node that is not DOWN */ |
| extern void run_health_check(void); |
| |
| /* save_all_state - save entire slurmctld state for later recovery */ |
| extern void save_all_state(void); |
| |
| /* make sure the assoc_mgr lists are up and running and state is |
| * restored */ |
| extern void ctld_assoc_mgr_init(slurm_trigger_callbacks_t *callbacks); |
| |
| /* send all info for the controller to accounting */ |
| extern void send_all_to_accounting(time_t event_time); |
| |
| /* A slurmctld lock needs to at least have a node read lock set before |
| * this is called */ |
| extern void set_cluster_cpus(void); |
| |
| /* sends all jobs in eligible state to accounting. Only needed at |
| * first registration |
| */ |
| extern int send_jobs_to_accounting(); |
| |
| /* send all nodes in a down like state to accounting. Only needed at |
| * first registration |
| */ |
| extern int send_nodes_to_accounting(time_t event_time); |
| |
| /* Set a job's alias_list string */ |
| extern void set_job_alias_list(struct job_record *job_ptr); |
| |
| /* |
| * set_job_prio - set a default job priority |
| * IN job_ptr - pointer to the job_record |
| */ |
| extern void set_job_prio(struct job_record *job_ptr); |
| |
| /* |
| * set_node_down - make the specified node's state DOWN if possible |
| * (not in a DRAIN state), kill jobs as needed |
| * IN name - name of the node |
| * IN reason - why the node is DOWN |
| */ |
| extern void set_node_down (char *name, char *reason); |
| |
| /* |
| * set_node_down_ptr - make the specified compute node's state DOWN and |
| * kill jobs as needed |
| * IN node_ptr - node_ptr to the node |
| * IN reason - why the node is DOWN |
| */ |
| void set_node_down_ptr (struct node_record *node_ptr, char *reason); |
| |
| /* |
| * set_slurmctld_state_loc - create state directory as needed and "cd" to it |
| */ |
| extern void set_slurmctld_state_loc(void); |
| |
| /* set_slurmd_addr - establish the slurm_addr_t for the slurmd on each node |
| * Uses common data structures. */ |
| extern void set_slurmd_addr (void); |
| |
| /* |
| * signal_step_tasks - send specific signal to specific job step |
| * IN step_ptr - step record pointer |
| * IN signal - signal to send |
| * IN msg_type - message type to send |
| */ |
| void signal_step_tasks(struct step_record *step_ptr, uint16_t signal, |
| slurm_msg_type_t msg_type); |
| |
| /* |
| * signal_step_tasks_on_node - send specific signal to specific job step |
| * on a specific node. |
| * IN node_name - name of node on which to signal tasks |
| * IN step_ptr - step record pointer |
| * IN signal - signal to send |
| * IN msg_type - message type to send |
| */ |
| void signal_step_tasks_on_node(char* node_name, struct step_record *step_ptr, |
| uint16_t signal, slurm_msg_type_t msg_type); |
| |
| /* |
| * slurmctld_shutdown - wake up slurm_rpc_mgr thread via signal |
| * RET 0 or error code |
| */ |
| extern int slurmctld_shutdown(void); |
| |
| /* Perform periodic job step checkpoints (per user request) */ |
| extern void step_checkpoint(void); |
| |
| /* Update a job's record of allocated CPUs when a job step gets scheduled */ |
| extern void step_alloc_lps(struct step_record *step_ptr); |
| |
| /* |
| * step_create - creates a step_record in step_specs->job_id, sets up the |
| * according to the step_specs. |
| * IN step_specs - job step specifications |
| * OUT new_step_record - pointer to the new step_record (NULL on error) |
| * IN batch_step - set if step is a batch script |
| * RET - 0 or error code |
| * NOTE: don't free the returned step_record because that is managed through |
| * the job. |
| */ |
| extern int step_create(job_step_create_request_msg_t *step_specs, |
| struct step_record** new_step_record, bool batch_step); |
| |
| /* |
| * step_layout_create - creates a step_layout according to the inputs. |
| * IN step_ptr - step having tasks layed out |
| * IN step_node_list - node list of hosts in step |
| * IN node_count - count of nodes in step allocation |
| * IN num_tasks - number of tasks in step |
| * IN cpus_per_task - number of cpus per task |
| * IN task_dist - type of task distribution |
| * IN plane_size - size of plane (only needed for the plane distribution) |
| * RET - NULL or slurm_step_layout_t * |
| * NOTE: you need to free the returned step_layout usually when the |
| * step is freed. |
| */ |
| extern slurm_step_layout_t *step_layout_create(struct step_record *step_ptr, |
| char *step_node_list, |
| uint32_t node_count, |
| uint32_t num_tasks, |
| uint16_t cpus_per_task, |
| uint16_t task_dist, |
| uint16_t plane_size); |
| |
| /* start_power_mgr - Start power management thread as needed. The thread |
| * terminates automatically at slurmctld shutdown time. |
| * IN thread_id - pointer to thread ID of the started pthread. |
| */ |
| extern void start_power_mgr(pthread_t *thread_id); |
| |
| /* |
| * step_epilog_complete - note completion of epilog on some node and |
| * release it's switch windows if appropriate. can perform partition |
| * switch window releases. |
| * IN job_ptr - pointer to job which has completed epilog |
| * IN node_name - name of node which has completed epilog |
| */ |
| extern int step_epilog_complete(struct job_record *job_ptr, |
| char *node_name); |
| |
| /* |
| * step_partial_comp - Note the completion of a job step on at least |
| * some of its nodes |
| * IN req - step_completion_msg RPC from slurmstepd |
| * IN uid - UID issuing the request |
| * OUT rem - count of nodes for which responses are still pending |
| * OUT max_rc - highest return code for any step thus far |
| * RET 0 on success, otherwise ESLURM error code |
| */ |
| extern int step_partial_comp(step_complete_msg_t *req, uid_t uid, |
| int *rem, uint32_t *max_rc); |
| |
| /* Update time stamps for job step suspend */ |
| extern void suspend_job_step(struct job_record *job_ptr); |
| |
| /* |
| * Synchronize the batch job in the system with their files. |
| * All pending batch jobs must have script and environment files |
| * No other jobs should have such files |
| */ |
| extern int sync_job_files(void); |
| |
| /* After recovering job state, if using priority/basic then we increment the |
| * priorities of all jobs to avoid decrementing the base down to zero */ |
| extern void sync_job_priorities(void); |
| /* |
| * update_job - update a job's parameters per the supplied specifications |
| * IN job_specs - a job's specification |
| * IN uid - uid of user issuing RPC |
| * RET returns an error code from slurm_errno.h |
| * global: job_list - global list of job entries |
| * last_job_update - time of last job table update |
| */ |
| extern int update_job (job_desc_msg_t * job_specs, uid_t uid); |
| |
| /* |
| * Modify the account associated with a pending job |
| * IN module - where this is called from |
| * IN job_ptr - pointer to job which should be modified |
| * IN new_account - desired account name |
| * RET SLURM_SUCCESS or error code |
| */ |
| extern int update_job_account(char *module, struct job_record *job_ptr, |
| char *new_account); |
| |
| /* |
| * Modify the wckey associated with a pending job |
| * IN module - where this is called from |
| * IN job_ptr - pointer to job which should be modified |
| * IN new_wckey - desired wckey name |
| * RET SLURM_SUCCESS or error code |
| */ |
| extern int update_job_wckey(char *module, struct job_record *job_ptr, |
| char *new_wckey); |
| |
| /* Reset nodes_completing field for all jobs */ |
| extern void update_job_nodes_completing(void); |
| |
| /* Reset slurmctld logging based upon configuration parameters |
| * uses common slurmctld_conf data structure */ |
| extern void update_logging(void); |
| |
| /* |
| * update_node - update the configuration data for one or more nodes |
| * IN update_node_msg - update node request |
| * RET 0 or error code |
| * global: node_record_table_ptr - pointer to global node table |
| */ |
| extern int update_node ( update_node_msg_t * update_node_msg ) ; |
| |
| /* Update nodes accounting usage data */ |
| extern void update_nodes_acct_gather_data(void); |
| |
| /* |
| * update_node_record_acct_gather_data - update the energy data in the |
| * node_record |
| * IN msg - node energy data message |
| * RET 0 if no error, ENOENT if no such node |
| */ |
| extern int update_node_record_acct_gather_data( |
| acct_gather_node_resp_msg_t *msg); |
| |
| /* |
| * update_part - create or update a partition's configuration data |
| * IN part_desc - description of partition changes |
| * IN create_flag - create a new partition |
| * RET 0 or an error code |
| * global: part_list - list of partition entries |
| * last_part_update - update time of partition records |
| */ |
| extern int update_part (update_part_msg_t * part_desc, bool create_flag); |
| |
| /* Process job step update request from specified user, |
| * RET - 0 or error code */ |
| extern int update_step(step_update_request_msg_t *req, uid_t uid); |
| |
| /* |
| * validate_alloc_node - validate that the allocating node |
| * is allowed to use this partition |
| * IN part_ptr - pointer to a partition |
| * IN alloc_node - allocting node of the request |
| * RET 1 if permitted to run, 0 otherwise |
| */ |
| extern int validate_alloc_node(struct part_record *part_ptr, char* alloc_node); |
| |
| /* |
| * validate_group - validate that the submit uid is authorized to run in |
| * this partition |
| * IN part_ptr - pointer to a partition |
| * IN run_uid - user to run the job as |
| * RET 1 if permitted to run, 0 otherwise |
| */ |
| extern int validate_group (struct part_record *part_ptr, uid_t run_uid); |
| |
| /* Perform some size checks on strings we store to prevent |
| * malicious user filling slurmctld's memory |
| * RET 0 or error code */ |
| extern int validate_job_create_req(job_desc_msg_t * job_desc); |
| |
| /* |
| * validate_jobs_on_node - validate that any jobs that should be on the node |
| * are actually running, if not clean up the job records and/or node |
| * records, call this function after validate_node_specs() sets the node |
| * state properly |
| * IN reg_msg - node registration message |
| */ |
| extern void validate_jobs_on_node(slurm_node_registration_status_msg_t *reg_msg); |
| |
| /* |
| * validate_node_specs - validate the node's specifications as valid, |
| * if not set state to down, in any case update last_response |
| * IN reg_msg - node registration message |
| * IN protocol_version - Version of Slurm on this node |
| * OUT newly_up - set if node newly brought into service |
| * RET 0 if no error, ENOENT if no such node, EINVAL if values too low |
| * NOTE: READ lock_slurmctld config before entry |
| */ |
| extern int validate_node_specs(slurm_node_registration_status_msg_t *reg_msg, |
| uint16_t protocol_version, bool *newly_up); |
| |
| /* |
| * validate_nodes_via_front_end - validate all nodes on a cluster as having |
| * a valid configuration as soon as the front-end registers. Individual |
| * nodes will not register with this configuration |
| * IN reg_msg - node registration message |
| * IN protocol_version - Version of Slurm on this node |
| * OUT newly_up - set if node newly brought into service |
| * RET 0 if no error, SLURM error code otherwise |
| * NOTE: READ lock_slurmctld config before entry |
| */ |
| extern int validate_nodes_via_front_end( |
| slurm_node_registration_status_msg_t *reg_msg, |
| uint16_t protocol_version, bool *newly_up); |
| |
| /* |
| * validate_slurm_user - validate that the uid is authorized to see |
| * privileged data (either user root or SlurmUser) |
| * IN uid - user to validate |
| * RET true if permitted to run, false otherwise |
| */ |
| extern bool validate_slurm_user(uid_t uid); |
| |
| /* |
| * validate_super_user - validate that the uid is authorized at the |
| * root, SlurmUser, or SLURMDB_ADMIN_SUPER_USER level |
| * IN uid - user to validate |
| * RET true if permitted to run, false otherwise |
| */ |
| extern bool validate_super_user(uid_t uid); |
| |
| /* |
| * validate_operator - validate that the uid is authorized at the |
| * root, SlurmUser, or SLURMDB_ADMIN_OPERATOR level |
| * IN uid - user to validate |
| * RET true if permitted to run, false otherwise |
| */ |
| extern bool validate_operator(uid_t uid); |
| |
| /* cleanup_completing() |
| * |
| * Clean up the JOB_COMPLETING flag and eventually |
| * requeue the job if there is a pending request |
| * for it. This function assumes the caller has the |
| * appropriate locks on the job_record. |
| * This function is called when a job completes |
| * by either when the slurmd epilog finishes or |
| * when the slurmctld epilog finishes, whichever |
| * comes last. |
| */ |
| extern void cleanup_completing(struct job_record *); |
| |
| |
| #endif /* !_HAVE_SLURMCTLD_H */ |