blob: 5626fbe52650622417de479087694d55ce69cb63 [file]
/*****************************************************************************\
* gres_sched.c - Scheduling functions used by cons_tres
*****************************************************************************
* Copyright (C) 2020 SchedMD LLC.
* Derived in large part from code previously in common/gres.c
*
* This file is part of Slurm, a resource management program.
* For details, see <https://slurm.schedmd.com/>.
* Please also read the included file: DISCLAIMER.
*
* Slurm is free software; you can redistribute it and/or modify it under
* the terms of the GNU General Public License as published by the Free
* Software Foundation; either version 2 of the License, or (at your option)
* any later version.
*
* In addition, as a special exception, the copyright holders give permission
* to link the code of portions of this program with the OpenSSL library under
* certain conditions as described in each individual source file, and
* distribute linked combinations including the two. You must obey the GNU
* General Public License in all respects for all of the code used other than
* OpenSSL. If you modify file(s) with this exception, you may extend this
* exception to your version of the file(s), but you are not obligated to do
* so. If you do not wish to do so, delete this exception statement from your
* version. If you delete this exception statement from all source files in
* the program, then also delete it here.
*
* Slurm is distributed in the hope that it will be useful, but WITHOUT ANY
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
* details.
*
* You should have received a copy of the GNU General Public License along
* with Slurm; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
\*****************************************************************************/
#include "src/common/slurm_xlator.h"
#include "gres_sched.h"
#include "src/common/xstring.h"
static bool _can_use_gres_exc_topo(resv_exc_t *resv_exc_ptr,
int node_inx, int gres_bit)
{
gres_job_state_t *gres_js;
bool found = false;
if (!resv_exc_ptr)
return true;
gres_js = resv_exc_ptr->gres_js_exc ?
resv_exc_ptr->gres_js_exc : resv_exc_ptr->gres_js_inc;
if (!gres_js)
return true;
if (!gres_js->gres_bit_alloc || !gres_js->gres_bit_alloc[node_inx])
return resv_exc_ptr->gres_js_exc ? true : false;
found = bit_test(gres_js->gres_bit_alloc[node_inx], gres_bit);
if (resv_exc_ptr->gres_js_exc && found) {
log_flag(SELECT_TYPE, "can't include!, it is excluded %d %d",
node_inx, gres_bit);
return false;
} else if (resv_exc_ptr->gres_js_inc && !found) {
log_flag(SELECT_TYPE,
"can't include!, it is not included %d %d",
node_inx, gres_bit);
return false;
}
return true;
}
static void _handle_gres_exc_by_type(resv_exc_t *resv_exc_ptr,
gres_job_state_t *gres_js_in,
int node_inx, uint64_t *avail_gres)
{
gres_job_state_t *gres_js;
if (!resv_exc_ptr)
return;
gres_js = resv_exc_ptr->gres_js_exc ?
resv_exc_ptr->gres_js_exc : resv_exc_ptr->gres_js_inc;
if (!gres_js)
return;
if (gres_js->type_name && (gres_js->type_id != gres_js_in->type_id)) {
if (resv_exc_ptr->gres_js_exc)
return;
*avail_gres = 0;
return;
}
if (resv_exc_ptr->gres_js_exc) {
if (gres_js->gres_cnt_node_alloc[node_inx] >= *avail_gres)
*avail_gres = 0;
else
*avail_gres -= gres_js->gres_cnt_node_alloc[node_inx];
} else
*avail_gres = gres_js->gres_cnt_node_alloc[node_inx];
log_flag(SELECT_TYPE, "avail_gres for node %d is now %"PRIu64,
node_inx, *avail_gres);
return;
}
static void _handle_gres_exc_basic(resv_exc_t *resv_exc_ptr,
gres_job_state_t *gres_js_in,
int node_inx, uint64_t *avail_gres)
{
gres_job_state_t *gres_js;
if (!resv_exc_ptr)
return;
gres_js = resv_exc_ptr->gres_js_exc ?
resv_exc_ptr->gres_js_exc : resv_exc_ptr->gres_js_inc;
if (!gres_js)
return;
if (resv_exc_ptr->gres_js_exc) {
if (gres_js->gres_cnt_node_alloc[node_inx] >= *avail_gres)
*avail_gres = 0;
else
*avail_gres -= gres_js->gres_cnt_node_alloc[node_inx];
} else
*avail_gres = gres_js->gres_cnt_node_alloc[node_inx];
log_flag(SELECT_TYPE, "avail_gres for node %d is now %"PRIu64,
node_inx, *avail_gres);
return;
}
/*
* Determine how many GRES of a given type can be used by this job on a
* given node and return a structure with the details. Note that multiple
* GRES of a given type model can be distributed over multiple topo structures,
* so we need to OR the core_bitmap over all of them.
*/
static sock_gres_t *_build_sock_gres_by_topo(
gres_state_t *gres_state_job,
gres_state_t *gres_state_node,
resv_exc_t *resv_exc_ptr,
bool use_total_gres, bitstr_t *core_bitmap,
uint16_t sockets, uint16_t cores_per_sock,
uint32_t job_id, char *node_name,
bool enforce_binding, uint32_t s_p_n,
bitstr_t **req_sock_map,
uint32_t user_id, const uint32_t node_inx)
{
gres_job_state_t *gres_js = gres_state_job->gres_data;
gres_node_state_t *gres_ns = gres_state_node->gres_data;
gres_node_state_t *alt_gres_ns = NULL;
int i, j, s, c;
uint32_t tot_cores;
sock_gres_t *sock_gres;
int64_t add_gres;
uint64_t avail_gres, min_gres = 0;
bool match = false;
bool use_busy_dev = gres_use_busy_dev(gres_state_node, use_total_gres);
if (gres_ns->gres_cnt_avail == 0)
return NULL;
if (!use_total_gres)
alt_gres_ns = gres_ns->alt_gres ?
gres_ns->alt_gres->gres_data : NULL;
sock_gres = xmalloc(sizeof(sock_gres_t));
sock_gres->sock_cnt = sockets;
sock_gres->bits_by_sock = xcalloc(sockets, sizeof(bitstr_t *));
sock_gres->cnt_by_sock = xcalloc(sockets, sizeof(uint64_t));
for (i = 0; i < gres_ns->topo_cnt; i++) {
bool use_all_sockets = false;
if (gres_js->type_name &&
(gres_js->type_id != gres_ns->topo_type_id[i]))
continue; /* Wrong type_model */
if (use_busy_dev &&
(gres_ns->topo_gres_cnt_alloc[i] == 0))
continue;
if (!use_total_gres && !gres_ns->no_consume &&
(gres_ns->topo_gres_cnt_alloc[i] >=
gres_ns->topo_gres_cnt_avail[i])) {
continue; /* No GRES remaining */
}
if (!_can_use_gres_exc_topo(resv_exc_ptr, node_inx, i))
continue; /* Not allowed in resv_exc_ptr */
if (!use_total_gres && !gres_ns->no_consume) {
avail_gres = gres_ns->topo_gres_cnt_avail[i] -
gres_ns->topo_gres_cnt_alloc[i];
} else {
avail_gres = gres_ns->topo_gres_cnt_avail[i];
}
if (avail_gres == 0)
continue;
/*
* Job requested SHARING or SHARED. Filter out resources already
* allocated to the other GRES type.
*/
if (alt_gres_ns && alt_gres_ns->gres_bit_alloc &&
gres_ns->topo_gres_bitmap[i]) {
c = bit_overlap(gres_ns->topo_gres_bitmap[i],
alt_gres_ns->gres_bit_alloc);
if (c > 0) {
/*
* Here we are using the main one to determine
* if the alt is shared or not. If it main one
* is shared then we just continue here
* otherwise we know the alt is shared.
*/
if (gres_id_shared(
gres_state_node->config_flags))
continue;
else {
avail_gres -= c;
if (avail_gres == 0)
continue;
}
}
}
/* By default only allow one sharing gres per job */
if (gres_id_shared(gres_state_node->config_flags) &&
!(slurm_conf.select_type_param &
MULTIPLE_SHARING_GRES_PJ) &&
(avail_gres > sock_gres->max_node_gres) && !use_total_gres)
/*
* Test use_total_gres so we don't reject shared gres
* jobs as never runnable (see bug 15283)
*/
sock_gres->max_node_gres = avail_gres;
tot_cores = sockets * cores_per_sock;
if ((core_bitmap && (tot_cores != bit_size(core_bitmap))) ||
(gres_ns->topo_core_bitmap[i] &&
(tot_cores != bit_size(gres_ns->topo_core_bitmap[i])))) {
error("%s: Core bitmaps size mismatch on node %s",
__func__, node_name);
match = false;
break;
}
/*
* If some GRES is available on every socket,
* treat like no topo_core_bitmap is specified
*/
if (gres_ns->topo_core_bitmap &&
gres_ns->topo_core_bitmap[i]) {
use_all_sockets = true;
for (s = 0; s < sockets; s++) {
bool use_this_socket = false;
for (c = 0; c < cores_per_sock; c++) {
j = (s * cores_per_sock) + c;
if (bit_test(gres_ns->
topo_core_bitmap[i], j)) {
use_this_socket = true;
break;
}
}
if (!use_this_socket) {
use_all_sockets = false;
break;
}
}
}
if (!gres_ns->topo_core_bitmap ||
!gres_ns->topo_core_bitmap[i] ||
use_all_sockets) {
/*
* Not constrained by core, but only specific
* GRES may be available (save their bitmap)
*/
sock_gres->cnt_any_sock += avail_gres;
sock_gres->total_cnt += avail_gres;
if (!sock_gres->bits_any_sock) {
sock_gres->bits_any_sock =
bit_copy(gres_ns->
topo_gres_bitmap[i]);
} else {
bit_or(sock_gres->bits_any_sock,
gres_ns->topo_gres_bitmap[i]);
}
match = true;
continue;
}
/* Constrained by core */
for (s = 0; ((s < sockets) && avail_gres); s++) {
if (enforce_binding && core_bitmap) {
for (c = 0; c < cores_per_sock; c++) {
j = (s * cores_per_sock) + c;
if (bit_test(core_bitmap, j))
break;
}
if (c >= cores_per_sock) {
/* No available cores on this socket */
continue;
}
}
for (c = 0; c < cores_per_sock; c++) {
j = (s * cores_per_sock) + c;
if (gres_ns->topo_core_bitmap[i] &&
!bit_test(gres_ns->topo_core_bitmap[i],
j))
continue;
if (!gres_ns->topo_gres_bitmap[i]) {
error("%s: topo_gres_bitmap NULL on node %s",
__func__, node_name);
continue;
}
if (!sock_gres->bits_by_sock[s]) {
sock_gres->bits_by_sock[s] =
bit_copy(gres_ns->
topo_gres_bitmap[i]);
} else {
bit_or(sock_gres->bits_by_sock[s],
gres_ns->topo_gres_bitmap[i]);
}
sock_gres->cnt_by_sock[s] += avail_gres;
sock_gres->total_cnt += avail_gres;
avail_gres = 0;
match = true;
break;
}
}
}
/* Process per-GRES limits */
if (match && gres_js->gres_per_socket) {
/*
* Clear core bitmap on sockets with insufficient GRES
* and disable excess GRES per socket
*/
for (s = 0; s < sockets; s++) {
if (sock_gres->cnt_by_sock[s] <
gres_js->gres_per_socket) {
/* Insufficient GRES, clear count */
sock_gres->total_cnt -=
sock_gres->cnt_by_sock[s];
sock_gres->cnt_by_sock[s] = 0;
if (enforce_binding && core_bitmap) {
i = s * cores_per_sock;
bit_nclear(core_bitmap, i,
i + cores_per_sock - 1);
}
} else if (sock_gres->cnt_by_sock[s] >
gres_js->gres_per_socket) {
/* Excess GRES, reduce count */
i = sock_gres->cnt_by_sock[s] -
gres_js->gres_per_socket;
sock_gres->cnt_by_sock[s] =
gres_js->gres_per_socket;
sock_gres->total_cnt -= i;
}
}
}
/*
* Satisfy sockets-per-node (s_p_n) limit by selecting the sockets with
* the most GRES. Sockets with low GRES counts have their core_bitmap
* cleared so that _allocate_sc() in cons_tres/job_test.c does not
* remove sockets needed to satisfy the job's GRES specification.
*/
if (match && enforce_binding && core_bitmap && (s_p_n < sockets)) {
int avail_sock = 0;
bool *avail_sock_flag = xcalloc(sockets, sizeof(bool));
for (s = 0; s < sockets; s++) {
if (sock_gres->cnt_by_sock[s] == 0)
continue;
for (c = 0; c < cores_per_sock; c++) {
i = (s * cores_per_sock) + c;
if (!bit_test(core_bitmap, i))
continue;
avail_sock++;
avail_sock_flag[s] = true;
break;
}
}
while (avail_sock > s_p_n) {
int low_gres_sock_inx = -1;
for (s = 0; s < sockets; s++) {
if (!avail_sock_flag[s])
continue;
if ((low_gres_sock_inx == -1) ||
(sock_gres->cnt_by_sock[s] <
sock_gres->cnt_by_sock[low_gres_sock_inx]))
low_gres_sock_inx = s;
}
if (low_gres_sock_inx == -1)
break;
s = low_gres_sock_inx;
i = s * cores_per_sock;
bit_nclear(core_bitmap, i, i + cores_per_sock - 1);
sock_gres->total_cnt -= sock_gres->cnt_by_sock[s];
sock_gres->cnt_by_sock[s] = 0;
avail_sock--;
avail_sock_flag[s] = false;
}
xfree(avail_sock_flag);
}
if (match) {
if (gres_js->gres_per_node)
min_gres = gres_js->gres_per_node;
if (gres_js->gres_per_task)
min_gres = MAX(min_gres, gres_js->gres_per_task);
if (sock_gres->total_cnt < min_gres)
match = false;
}
/*
* Identify sockets which are required to satisfy
* gres_per_node or task specification so that allocated tasks
* can be distributed over multiple sockets if necessary.
*/
add_gres = min_gres - sock_gres->cnt_any_sock;
if (match && core_bitmap && (add_gres > 0)) {
int best_sock_inx = -1;
bool *avail_sock_flag = xcalloc(sockets, sizeof(bool));
for (s = 0; s < sockets; s++) {
if (sock_gres->cnt_by_sock[s] == 0)
continue;
for (c = 0; c < cores_per_sock; c++) {
i = (s * cores_per_sock) + c;
if (!bit_test(core_bitmap, i))
continue;
avail_sock_flag[s] = true;
if ((best_sock_inx == -1) ||
(sock_gres->cnt_by_sock[s] >
sock_gres->cnt_by_sock[best_sock_inx])) {
best_sock_inx = s;
}
break;
}
}
while ((best_sock_inx != -1) && (add_gres > 0)) {
if (*req_sock_map == NULL)
*req_sock_map = bit_alloc(sockets);
bit_set(*req_sock_map, best_sock_inx);
add_gres -= sock_gres->cnt_by_sock[best_sock_inx];
avail_sock_flag[best_sock_inx] = false;
if (add_gres <= 0)
break;
/* Find next best socket */
best_sock_inx = -1;
for (s = 0; s < sockets; s++) {
if ((sock_gres->cnt_by_sock[s] == 0) ||
!avail_sock_flag[s])
continue;
if ((best_sock_inx == -1) ||
(sock_gres->cnt_by_sock[s] >
sock_gres->cnt_by_sock[best_sock_inx])) {
best_sock_inx = s;
}
}
}
xfree(avail_sock_flag);
}
if (!match) {
gres_sock_delete(sock_gres);
sock_gres = NULL;
}
return sock_gres;
}
/*
* Determine how many GRES of a given type can be used by this job on a
* given node and return a structure with the details. Note that multiple
* GRES of a given type model can be configured, so pick the right one.
*/
static sock_gres_t *_build_sock_gres_by_type(
gres_job_state_t *gres_js,
gres_node_state_t *gres_ns,
resv_exc_t *resv_exc_ptr,
bool use_total_gres, bitstr_t *core_bitmap,
uint16_t sockets, uint16_t cores_per_sock,
uint32_t job_id, char *node_name, uint32_t node_inx)
{
int i;
sock_gres_t *sock_gres;
uint64_t avail_gres, min_gres = 1, gres_tmp;
bool match = false;
if (gres_js->gres_per_node)
min_gres = gres_js-> gres_per_node;
if (gres_js->gres_per_socket)
min_gres = MAX(min_gres, gres_js->gres_per_socket);
if (gres_js->gres_per_task)
min_gres = MAX(min_gres, gres_js->gres_per_task);
sock_gres = xmalloc(sizeof(sock_gres_t));
for (i = 0; i < gres_ns->type_cnt; i++) {
if (gres_js->type_name &&
(gres_js->type_id != gres_ns->type_id[i]))
continue; /* Wrong type_model */
if (!use_total_gres &&
(gres_ns->type_cnt_alloc[i] >=
gres_ns->type_cnt_avail[i])) {
continue; /* No GRES remaining */
} else if (!use_total_gres) {
avail_gres = gres_ns->type_cnt_avail[i] -
gres_ns->type_cnt_alloc[i];
} else {
avail_gres = gres_ns->type_cnt_avail[i];
}
_handle_gres_exc_by_type(resv_exc_ptr, gres_js,
node_inx, &avail_gres);
gres_tmp = gres_ns->gres_cnt_avail;
if (!use_total_gres)
gres_tmp -= gres_ns->gres_cnt_alloc;
avail_gres = MIN(avail_gres, gres_tmp);
if (avail_gres < min_gres)
continue; /* Insufficient GRES remaining */
sock_gres->cnt_any_sock += avail_gres;
sock_gres->total_cnt += avail_gres;
match = true;
}
if (!match)
xfree(sock_gres);
return sock_gres;
}
/*
* Determine how many GRES of a given type can be used by this job on a
* given node and return a structure with the details. No GRES type.
*/
static sock_gres_t *_build_sock_gres_basic(
gres_job_state_t *gres_js,
gres_node_state_t *gres_ns,
resv_exc_t *resv_exc_ptr,
bool use_total_gres, bitstr_t *core_bitmap,
uint16_t sockets, uint16_t cores_per_sock,
uint32_t job_id, char *node_name, uint32_t node_inx)
{
sock_gres_t *sock_gres;
uint64_t avail_gres, min_gres = 1;
if (gres_js->type_name)
return NULL;
if (!use_total_gres &&
(gres_ns->gres_cnt_alloc >= gres_ns->gres_cnt_avail))
return NULL; /* No GRES remaining */
if (gres_js->gres_per_node)
min_gres = gres_js-> gres_per_node;
if (gres_js->gres_per_socket)
min_gres = MAX(min_gres, gres_js->gres_per_socket);
if (gres_js->gres_per_task)
min_gres = MAX(min_gres, gres_js->gres_per_task);
if (!use_total_gres) {
avail_gres = gres_ns->gres_cnt_avail -
gres_ns->gres_cnt_alloc;
} else
avail_gres = gres_ns->gres_cnt_avail;
_handle_gres_exc_basic(resv_exc_ptr, gres_js, node_inx, &avail_gres);
if (avail_gres < min_gres)
return NULL; /* Insufficient GRES remaining */
sock_gres = xmalloc(sizeof(sock_gres_t));
sock_gres->cnt_any_sock += avail_gres;
sock_gres->total_cnt += avail_gres;
return sock_gres;
}
/*
* Given a List of sock_gres_t entries, return a string identifying the
* count of each GRES available on this set of nodes
* IN sock_gres_list - count of GRES available in this group of nodes
* RET xfree the returned string
*/
extern char *gres_sched_str(List sock_gres_list)
{
ListIterator iter;
sock_gres_t *sock_data;
gres_job_state_t *gres_js;
char *out_str = NULL, *sep;
if (!sock_gres_list)
return NULL;
iter = list_iterator_create(sock_gres_list);
while ((sock_data = (sock_gres_t *) list_next(iter))) {
if (!sock_data->gres_state_job) { /* Should never happen */
error("%s: sock_data has no gres_state_job. This should never happen.",
__func__);
continue;
}
gres_js = sock_data->gres_state_job->gres_data;
if (out_str)
sep = ",";
else
sep = "GRES:";
if (gres_js->type_name) {
xstrfmtcat(out_str, "%s%s:%s:%"PRIu64, sep,
sock_data->gres_state_job->gres_name,
gres_js->type_name,
sock_data->total_cnt);
} else {
xstrfmtcat(out_str, "%s%s:%"PRIu64, sep,
sock_data->gres_state_job->gres_name,
sock_data->total_cnt);
}
}
list_iterator_destroy(iter);
return out_str;
}
/*
* Clear GRES allocation info for all job GRES at start of scheduling cycle
* Return TRUE if any gres_per_job constraints to satisfy
*/
extern bool gres_sched_init(List job_gres_list)
{
ListIterator iter;
gres_state_t *gres_state_job;
gres_job_state_t *gres_js;
bool rc = false;
if (!job_gres_list)
return rc;
iter = list_iterator_create(job_gres_list);
while ((gres_state_job = list_next(iter))) {
gres_js = (gres_job_state_t *) gres_state_job->gres_data;
if (!gres_js->gres_per_job)
continue;
gres_js->total_gres = 0;
rc = true;
}
list_iterator_destroy(iter);
return rc;
}
/*
* Return TRUE if all gres_per_job specifications are satisfied
*/
extern bool gres_sched_test(List job_gres_list, uint32_t job_id)
{
ListIterator iter;
gres_state_t *gres_state_job;
gres_job_state_t *gres_js;
bool rc = true;
if (!job_gres_list)
return rc;
iter = list_iterator_create(job_gres_list);
while ((gres_state_job = list_next(iter))) {
gres_js = (gres_job_state_t *) gres_state_job->gres_data;
if (gres_js->gres_per_job &&
(gres_js->gres_per_job > gres_js->total_gres)) {
rc = false;
break;
}
}
list_iterator_destroy(iter);
return rc;
}
/*
* Update a job's total_gres counter as we add a node to potential allocation
* IN job_gres_list - List of job's GRES requirements (gres_state_job_t)
* IN sock_gres_list - Per socket GRES availability on this node (sock_gres_t)
* IN/OUT avail_cpus - CPUs currently available on this node
*/
extern void gres_sched_add(List job_gres_list, List sock_gres_list,
uint16_t *avail_cpus)
{
ListIterator iter;
gres_state_t *gres_state_job;
gres_job_state_t *gres_js;
sock_gres_t *sock_data;
uint64_t gres_limit;
uint16_t gres_cpus = 0;
if (!job_gres_list || !(*avail_cpus))
return;
iter = list_iterator_create(job_gres_list);
while ((gres_state_job = list_next(iter))) {
gres_js = (gres_job_state_t *) gres_state_job->gres_data;
if (!gres_js->gres_per_job) /* Don't care about totals */
continue;
sock_data = list_find_first(sock_gres_list,
gres_find_sock_by_job_state,
gres_state_job);
if (!sock_data) /* None of this GRES available */
continue;
if (gres_js->cpus_per_gres) {
gres_limit = *avail_cpus / gres_js->cpus_per_gres;
gres_limit = MIN(gres_limit, sock_data->total_cnt);
gres_cpus = MAX(gres_cpus,
gres_limit * gres_js->cpus_per_gres);
} else
gres_limit = sock_data->total_cnt;
gres_js->total_gres += gres_limit;
}
list_iterator_destroy(iter);
if (gres_cpus)
*avail_cpus = gres_cpus;
}
/*
* Create/update List GRES that can be made available on the specified node
* IN/OUT consec_gres - List of sock_gres_t that can be made available on
* a set of nodes
* IN job_gres_list - List of job's GRES requirements (gres_job_state_t)
* IN sock_gres_list - Per socket GRES availability on this node (sock_gres_t)
*/
extern void gres_sched_consec(List *consec_gres, List job_gres_list,
List sock_gres_list)
{
ListIterator iter;
gres_state_t *gres_state_job;
gres_job_state_t *gres_js;
sock_gres_t *sock_data, *consec_data;
if (!job_gres_list)
return;
iter = list_iterator_create(job_gres_list);
while ((gres_state_job = list_next(iter))) {
gres_js = (gres_job_state_t *) gres_state_job->gres_data;
if (!gres_js->gres_per_job) /* Don't care about totals */
continue;
sock_data = list_find_first(sock_gres_list,
gres_find_sock_by_job_state,
gres_state_job);
if (!sock_data) /* None of this GRES available */
continue;
if (*consec_gres == NULL)
*consec_gres = list_create(gres_sock_delete);
consec_data = list_find_first(*consec_gres,
gres_find_sock_by_job_state,
gres_state_job);
if (!consec_data) {
consec_data = xmalloc(sizeof(sock_gres_t));
consec_data->gres_state_job = gres_state_job;
list_append(*consec_gres, consec_data);
}
consec_data->total_cnt += sock_data->total_cnt;
}
list_iterator_destroy(iter);
}
/*
* Determine if the additional sock_gres_list resources will result in
* satisfying the job's gres_per_job constraints
* IN job_gres_list - job's GRES requirements
* IN sock_gres_list - available GRES in a set of nodes, data structure built
* by gres_job_sched_consec()
*/
extern bool gres_sched_sufficient(List job_gres_list, List sock_gres_list)
{
ListIterator iter;
gres_state_t *gres_state_job;
gres_job_state_t *gres_js;
sock_gres_t *sock_data;
bool rc = true;
if (!job_gres_list)
return true;
if (!sock_gres_list)
return false;
iter = list_iterator_create(job_gres_list);
while ((gres_state_job = list_next(iter))) {
gres_js = (gres_job_state_t *) gres_state_job->gres_data;
if (!gres_js->gres_per_job) /* Don't care about totals */
continue;
if (gres_js->total_gres >= gres_js->gres_per_job)
continue;
sock_data = list_find_first(sock_gres_list,
gres_find_sock_by_job_state,
gres_state_job);
if (!sock_data) { /* None of this GRES available */
rc = false;
break;
}
if ((gres_js->total_gres + sock_data->total_cnt) <
gres_js->gres_per_job) {
rc = false;
break;
}
}
list_iterator_destroy(iter);
return rc;
}
static void _sock_gres_log(List sock_gres_list, char *node_name)
{
sock_gres_t *sock_gres;
ListIterator iter;
int i, len = -1;
char tmp[32] = "";
if (!sock_gres_list)
return;
info("Sock_gres state for %s", node_name);
iter = list_iterator_create(sock_gres_list);
while ((sock_gres = (sock_gres_t *) list_next(iter))) {
gres_job_state_t *gres_js =
sock_gres->gres_state_job->gres_data;
info("Gres:%s Type:%s TotalCnt:%"PRIu64" MaxNodeGres:%"PRIu64,
sock_gres->gres_state_job->gres_name, gres_js->type_name,
sock_gres->total_cnt, sock_gres->max_node_gres);
if (sock_gres->bits_any_sock) {
bit_fmt(tmp, sizeof(tmp), sock_gres->bits_any_sock);
len = bit_size(sock_gres->bits_any_sock);
}
info(" Sock[ANY]Cnt:%"PRIu64" Bits:%s of %d",
sock_gres->cnt_any_sock, tmp, len);
for (i = 0; i < sock_gres->sock_cnt; i++) {
if (sock_gres->cnt_by_sock[i] == 0)
continue;
tmp[0] = '\0';
len = -1;
if (sock_gres->bits_by_sock &&
sock_gres->bits_by_sock[i]) {
bit_fmt(tmp, sizeof(tmp),
sock_gres->bits_by_sock[i]);
len = bit_size(sock_gres->bits_by_sock[i]);
}
info(" Sock[%d]Cnt:%"PRIu64" Bits:%s of %d", i,
sock_gres->cnt_by_sock[i], tmp, len);
}
}
list_iterator_destroy(iter);
}
extern List gres_sched_create_sock_gres_list(
List job_gres_list, List node_gres_list,
resv_exc_t *resv_exc_ptr,
bool use_total_gres, bitstr_t *core_bitmap,
uint16_t sockets, uint16_t cores_per_sock,
uint32_t job_id, char *node_name,
bool enforce_binding, uint32_t s_p_n,
bitstr_t **req_sock_map, uint32_t user_id,
const uint32_t node_inx)
{
List sock_gres_list = NULL;
ListIterator job_gres_iter;
gres_state_t *gres_state_job, *gres_state_node;
gres_job_state_t *gres_js;
gres_node_state_t *gres_ns;
uint32_t local_s_p_n;
list_t *gres_list_resv = NULL;
gres_job_state_t **gres_js_resv = NULL;
if (!job_gres_list || (list_count(job_gres_list) == 0))
return sock_gres_list;
if (!node_gres_list) /* Node lacks GRES to match */
return sock_gres_list;
(void) gres_init();
if (resv_exc_ptr) {
if (resv_exc_ptr->gres_list_exc) {
gres_list_resv = resv_exc_ptr->gres_list_exc;
gres_js_resv = (gres_job_state_t **)&(
resv_exc_ptr->gres_js_exc);
} else if (resv_exc_ptr->gres_list_inc) {
gres_list_resv = resv_exc_ptr->gres_list_inc;
gres_js_resv = (gres_job_state_t **)&(
resv_exc_ptr->gres_js_inc);
}
}
sock_gres_list = list_create(gres_sock_delete);
job_gres_iter = list_iterator_create(job_gres_list);
while ((gres_state_job = (gres_state_t *) list_next(job_gres_iter))) {
sock_gres_t *sock_gres = NULL;
gres_state_node = list_find_first(node_gres_list, gres_find_id,
&gres_state_job->plugin_id);
if (gres_state_node == NULL) {
/* node lack GRES of type required by the job */
FREE_NULL_LIST(sock_gres_list);
break;
}
gres_js = (gres_job_state_t *) gres_state_job->gres_data;
gres_ns = (gres_node_state_t *) gres_state_node->gres_data;
if (gres_list_resv) {
gres_key_t job_search_key = {
.config_flags = gres_state_job->config_flags,
.plugin_id = gres_state_job->plugin_id,
.type_id = gres_js->type_id,
};
gres_state_t *gres_state_job_tmp =
list_find_first(
gres_list_resv,
gres_find_job_by_key_exact_type,
&job_search_key);
if (gres_state_job_tmp)
*gres_js_resv =
gres_state_job_tmp->gres_data;
else
*gres_js_resv = NULL;
}
if (gres_js->gres_per_job &&
!gres_js->gres_per_socket)
local_s_p_n = s_p_n; /* Maximize GRES per node */
else
local_s_p_n = NO_VAL; /* No need to optimize socket */
if (core_bitmap && (bit_ffs(core_bitmap) == -1)) {
sock_gres = NULL; /* No cores available */
} else if (gres_ns->topo_cnt &&
gres_ns->gres_cnt_found != NO_VAL64) {
sock_gres = _build_sock_gres_by_topo(
gres_state_job, gres_state_node, resv_exc_ptr,
use_total_gres,
core_bitmap, sockets, cores_per_sock,
job_id, node_name, enforce_binding,
local_s_p_n, req_sock_map,
user_id, node_inx);
} else if (gres_ns->type_cnt) {
sock_gres = _build_sock_gres_by_type(
gres_js,
gres_ns, resv_exc_ptr, use_total_gres,
core_bitmap, sockets, cores_per_sock,
job_id, node_name, node_inx);
} else {
sock_gres = _build_sock_gres_basic(
gres_js,
gres_ns, resv_exc_ptr, use_total_gres,
core_bitmap, sockets, cores_per_sock,
job_id, node_name, node_inx);
}
if (!sock_gres) {
/* node lack available resources required by the job */
bit_clear_all(core_bitmap);
FREE_NULL_LIST(sock_gres_list);
break;
}
sock_gres->use_total_gres = use_total_gres;
sock_gres->gres_state_job = gres_state_job;
sock_gres->gres_state_node = gres_state_node;
list_append(sock_gres_list, sock_gres);
}
list_iterator_destroy(job_gres_iter);
if (slurm_conf.debug_flags & DEBUG_FLAG_GRES)
_sock_gres_log(sock_gres_list, node_name);
return sock_gres_list;
}