| /*****************************************************************************\ |
| * job_resources.h - functions to manage data structure identifying specific |
| * CPUs allocated to a job, step or partition |
| ***************************************************************************** |
| * Copyright (C) 2008 Lawrence Livermore National Security. |
| * Written by Morris Jette <jette1@llnl.gov>. |
| * CODE-OCEC-09-009. All rights reserved. |
| * |
| * This file is part of Slurm, a resource management program. |
| * For details, see <https://slurm.schedmd.com/>. |
| * Please also read the included file: DISCLAIMER. |
| * |
| * Slurm is free software; you can redistribute it and/or modify it under |
| * the terms of the GNU General Public License as published by the Free |
| * Software Foundation; either version 2 of the License, or (at your option) |
| * any later version. |
| * |
| * In addition, as a special exception, the copyright holders give permission |
| * to link the code of portions of this program with the OpenSSL library under |
| * certain conditions as described in each individual source file, and |
| * distribute linked combinations including the two. You must obey the GNU |
| * General Public License in all respects for all of the code used other than |
| * OpenSSL. If you modify file(s) with this exception, you may extend this |
| * exception to your version of the file(s), but you are not obligated to do |
| * so. If you do not wish to do so, delete this exception statement from your |
| * version. If you delete this exception statement from all source files in |
| * the program, then also delete it here. |
| * |
| * Slurm is distributed in the hope that it will be useful, but WITHOUT ANY |
| * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS |
| * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more |
| * details. |
| * |
| * You should have received a copy of the GNU General Public License along |
| * with Slurm; if not, write to the Free Software Foundation, Inc., |
| * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. |
| \*****************************************************************************/ |
| |
| #ifndef _JOB_RESOURCES_H |
| #define _JOB_RESOURCES_H |
| |
| #include <inttypes.h> |
| |
| #include "src/common/bitstring.h" |
| #include "src/common/pack.h" |
| #include "src/slurmctld/slurmctld.h" |
| |
| /* struct job_resources defines exactly which resources are allocated |
| * to a job, step, partition, etc. |
| * |
| * core_bitmap - Bitmap of allocated cores for all nodes and sockets. |
| * The bitmap reflects allocated resources only on the |
| * allocated nodes, not the full system resources. |
| * core_bitmap_used - Bitmap of cores allocated to job steps (see above) |
| * cores_per_socket - Count of cores per socket on this node, build by |
| * build_job_resources() and ensures consistent |
| * interpretation of core_bitmap |
| * cpus - Count of desired/allocated CPUs per node for job/step |
| * cpus_used - For a job, count of CPUs per node used by job steps |
| * cpu_array_cnt - Count of elements in cpu_array_* below |
| * cpu_array_value - Count of allocated CPUs per node for job |
| * cpu_array_reps - Number of consecutive nodes on which cpu_array_value |
| * is duplicated. See NOTES below. |
| * memory_allocated - MB per node reserved for the job or step |
| * memory_used - MB per node of memory consumed by job steps |
| * nhosts - Number of nodes in the allocation. On a |
| * bluegene machine this represents the number |
| * of midplanes used. This should always be |
| * the number of bits set in node_bitmap. |
| * node_bitmap - Bitmap of nodes allocated to the job. Unlike the |
| * node_bitmap in slurmctld's job record, the bits |
| * here do NOT get cleared as the job completes on a |
| * node |
| * node_req - NODE_CR_RESERVED|NODE_CR_ONE_ROW|NODE_CR_AVAILABLE |
| * nodes - Names of nodes in original job allocation |
| * ncpus - Number of processors in the allocation |
| * sock_core_rep_count - How many consecutive nodes that sockets_per_node |
| * and cores_per_socket apply to, build by |
| * build_job_resources() and ensures consistent |
| * interpretation of core_bitmap |
| * sockets_per_node - Count of sockets on this node, build by |
| * build_job_resources() and ensures consistent |
| * interpretation of core_bitmap |
| * tasks_per_node - Expected tasks to launch per node. Currently used only |
| * by cons_tres for tres_per_task support at resource |
| * allocation time. No need to save/restore or pack. |
| * whole_node - Job allocated full node (used only by select/cons_tres) |
| * |
| * NOTES: |
| * cpu_array_* contains the same information as "cpus", but in a more compact |
| * format. For example if cpus = {4, 4, 2, 2, 2, 2, 2, 2} then cpu_array_cnt=2 |
| * cpu_array_value = {4, 2} and cpu_array_reps = {2, 6}. We do not need to |
| * save/restore these values, but generate them by calling |
| * build_job_resources_cpu_array() |
| * |
| * Sample layout of core_bitmap: |
| * | Node_0 | Node_1 | |
| * | Sock_0 | Sock_1 | Sock_0 | Sock_1 | |
| * | Core_0 | Core_1 | Core_0 | Core_1 | Core_0 | Core_1 | Core_0 | Core_1 | |
| * | Bit_0 | Bit_1 | Bit_2 | Bit_3 | Bit_4 | Bit_5 | Bit_6 | Bit_7 | |
| * |
| * If a job changes size (relinquishes nodes), the node_bitmap will remain |
| * unchanged, but cpus, cpus_used, cpus_array_*, and memory_used will be |
| * updated (e.g. cpus and mem_used on that node cleared). |
| */ |
| struct job_resources { |
| bitstr_t *core_bitmap; |
| bitstr_t *core_bitmap_used; |
| uint32_t cpu_array_cnt; |
| uint16_t *cpu_array_value; |
| uint32_t *cpu_array_reps; |
| uint16_t *cpus; |
| uint16_t *cpus_used; |
| uint16_t *cores_per_socket; |
| uint16_t cr_type; |
| uint64_t *memory_allocated; |
| uint64_t *memory_used; |
| uint32_t next_step_node_inx; |
| uint32_t nhosts; |
| bitstr_t *node_bitmap; |
| uint32_t node_req; |
| char *nodes; |
| uint32_t ncpus; |
| uint32_t *sock_core_rep_count; |
| uint16_t *sockets_per_node; |
| uint16_t *tasks_per_node; |
| uint16_t threads_per_core; |
| uint8_t whole_node; |
| }; |
| |
| /* |
| * node_res_record.node_state assists with the unique state of each node. |
| * When a job is allocated, these flags provide protection for nodes in a |
| * OverSubscribe=NO or OverSubscribe=EXCLUSIVE partition from other jobs. |
| * |
| * NOTES: |
| * - If node is in use by OverSubscribe=NO part, some CPUs/memory may be |
| * available. |
| * - Caution with NODE_CR_AVAILABLE: a Sharing partition could be full. |
| * |
| * - these values are staggered so that they can be incremented as multiple |
| * jobs are allocated to each node. This is needed to be able to support |
| * preemption, which can override these protections. |
| */ |
| enum node_cr_state { |
| NODE_CR_AVAILABLE = 0, /* The node may be IDLE or IN USE (shared) */ |
| NODE_CR_ONE_ROW = 1, /* in use by OverSubscribe=NO part */ |
| NODE_CR_RESERVED = 64000 /* in use by OverSubscribe=EXCLUSIVE part */ |
| }; |
| |
| /* Create an empty job_resources data structure, just a call to xmalloc() */ |
| extern job_resources_t *create_job_resources(void); |
| |
| /* Set the socket and core counts associated with a set of selected |
| * nodes of a job_resources data structure based upon slurmctld state. |
| * (sets cores_per_socket, sockets_per_node, and sock_core_rep_count based |
| * upon the value of node_bitmap, also creates core_bitmap based upon |
| * the total number of cores in the allocation). Call this ONLY from |
| * slurmctld. Example of use: |
| * |
| * job_resources_t *job_resrcs_ptr = create_job_resources(); |
| * node_name2bitmap("dummy[2,5,12,16]", true, &(job_res_ptr->node_bitmap)); |
| * rc = build_job_resources(job_resrcs_ptr); |
| */ |
| extern int build_job_resources(job_resources_t *job_resrcs_ptr); |
| |
| /* Rebuild cpu_array_cnt, cpu_array_value, and cpu_array_reps based upon the |
| * values of cpus in an existing data structure |
| * Return total CPU count or -1 on error */ |
| extern int build_job_resources_cpu_array(job_resources_t *job_resrcs_ptr); |
| |
| /* Validate a job_resources data structure originally built using |
| * build_job_resources() is still valid based upon slurmctld state. |
| * NOTE: Reset the node_bitmap field before calling this function. |
| * If the sockets_per_node or cores_per_socket for any node in the allocation |
| * changes, then return SLURM_ERROR. Otherwise return SLURM_SUCCESS. Any |
| * change in a node's socket or core count require that any job running on |
| * that node be killed. Example of use: |
| * |
| * rc = valid_job_resources(job_resrcs_ptr); |
| */ |
| extern int valid_job_resources(job_resources_t *job_resrcs_ptr); |
| |
| /* Make a copy of a job_resources data structure, |
| * free using free_job_resources() */ |
| extern job_resources_t *copy_job_resources(job_resources_t *job_resrcs_ptr); |
| |
| /* Free job_resources data structure created using copy_job_resources() or |
| * unpack_job_resources() */ |
| extern void free_job_resources(job_resources_t **job_resrcs_pptr); |
| |
| /* Log the contents of a job_resources data structure using info() */ |
| extern void log_job_resources(void *job_ptr); |
| |
| /* Un/pack full job_resources data structure */ |
| extern void pack_job_resources(job_resources_t *job_resrcs_ptr, buf_t *buffer, |
| uint16_t protocol_version); |
| extern int unpack_job_resources(job_resources_t **job_resrcs_pptr, |
| buf_t *buffer, uint16_t protocol_version); |
| |
| /* Reset the node_bitmap in a job_resources data structure |
| * This is needed after a restart/reconfiguration since nodes can |
| * be added or removed from the system resulting in changing in |
| * the bitmap size or bit positions */ |
| extern int reset_node_bitmap(void *job_ptr); |
| |
| /* For a given node_id, socket_id and core_id, get it's offset within |
| * the core bitmap */ |
| extern int get_job_resources_offset(job_resources_t *job_resrcs_ptr, |
| uint32_t node_id, uint16_t socket_id, |
| uint16_t core_id); |
| |
| /* Get/set bit value at specified location. |
| * node_id, socket_id and core_id are all zero origin */ |
| extern int get_job_resources_bit(job_resources_t *job_resrcs_ptr, |
| uint32_t node_id, uint16_t socket_id, |
| uint16_t core_id); |
| extern int set_job_resources_bit(job_resources_t *job_resrcs_ptr, |
| uint32_t node_id, uint16_t socket_id, |
| uint16_t core_id); |
| /* For every core bitmap set in the "from" resources structure at |
| * from_node_offset, set the corresponding bit in the "new" resources structure |
| * at new_node_offset */ |
| extern int job_resources_bits_copy(job_resources_t *new_job_resrcs_ptr, |
| uint16_t new_node_offset, |
| job_resources_t *from_job_resrcs_ptr, |
| uint16_t from_node_offset); |
| |
| /* |
| * AND two job_resources structures. |
| * Every node/core set in job_resrcs1_ptr and job_resrcs2_ptr is set in the |
| * resulting job_resrcs1_ptr data structure |
| * RET SLURM_SUCCESS or an error code |
| */ |
| extern int job_resources_and(job_resources_t *job_resrcs1_ptr, |
| job_resources_t *job_resrcs2_ptr); |
| |
| /* |
| * OR two job_resources structures. |
| * Every node/core set in job_resrcs1_ptr or job_resrcs2_ptr is set in the |
| * resulting job_resrcs1_ptr data structure |
| * RET SLURM_SUCCESS or an error code |
| */ |
| extern int job_resources_or(job_resources_t *job_resrcs1_ptr, |
| job_resources_t *job_resrcs2_ptr); |
| |
| /* Get/clear/set bit value at specified location for whole node allocations |
| * get is for any socket/core on the specified node |
| * set is for all sockets/cores on the specified node |
| * fully compatible with set/get_job_resources_bit() |
| * node_id is all zero origin */ |
| extern int get_job_resources_node(job_resources_t *job_resrcs_ptr, |
| uint32_t node_id); |
| extern int clear_job_resources_node(job_resources_t *job_resrcs_ptr, |
| uint32_t node_id); |
| extern int set_job_resources_node(job_resources_t *job_resrcs_ptr, |
| uint32_t node_id); |
| |
| /* Completely remove specified node from job resources structure */ |
| extern int extract_job_resources_node(job_resources_t *job_resrcs_ptr, |
| uint32_t node_id); |
| |
| /* Return the count of core bitmaps set for the specific node */ |
| extern int count_job_resources_node(job_resources_t *job_resrcs_ptr, |
| uint32_t node_id); |
| |
| /* Return a copy of core_bitmap only for the specific node */ |
| extern bitstr_t * copy_job_resources_node(job_resources_t *job_resrcs_ptr, |
| uint32_t node_id); |
| |
| /* Get socket and core count for a specific node_id (zero origin) */ |
| extern int get_job_resources_cnt(job_resources_t *job_resrcs_ptr, |
| uint32_t node_id, uint16_t *socket_cnt, |
| uint16_t *cores_per_socket_cnt); |
| |
| /* Get CPU count for a specific node_id (zero origin), return -1 on error */ |
| extern int get_job_resources_cpus(job_resources_t *job_resrcs_ptr, |
| uint32_t node_id); |
| |
| /* |
| * Test if job can fit into the given full-length core_bitmap |
| * IN job_resrcs_ptr - resources allocated to a job |
| * IN full_bitmap - bitmap of available CPUs |
| * RET 1 on success, 0 otherwise |
| */ |
| extern int job_fits_into_cores(job_resources_t *job_resrcs_ptr, |
| bitstr_t *full_bitmap); |
| |
| /* |
| * Add job to full-length core_bitmap |
| * IN job_resrcs_ptr - resources allocated to a job |
| * IN/OUT full_bitmap - bitmap of available CPUs, allocate as needed |
| * RET 1 on success, 0 otherwise |
| */ |
| extern void add_job_to_cores(job_resources_t *job_resrcs_ptr, |
| bitstr_t **full_core_bitmap); |
| |
| /* Given a job pointer and a global node index, return the index of that |
| * node in the job_resrcs_ptr->cpus. Return -1 if invalid */ |
| extern int job_resources_node_inx_to_cpu_inx(job_resources_t *job_resrcs_ptr, |
| int node_inx); |
| |
| extern uint16_t job_resources_get_node_cpu_cnt(job_resources_t *job_resrcs_ptr, |
| int job_node_inx, |
| int sys_node_inx); |
| |
| #endif /* !_JOB_RESOURCES_H */ |