blob: 868e8279e8d3e4588ee8e5c1cfb4dbb74b12ff7e [file] [log] [blame]
/*****************************************************************************\
* dist_tasks.c - Assign task count for each resource.
*****************************************************************************
* Copyright (C) SchedMD LLC.
* Derived in large part from select/cons_res plugin
*
* This file is part of Slurm, a resource management program.
* For details, see <https://slurm.schedmd.com/>.
* Please also read the included file: DISCLAIMER.
*
* Slurm is free software; you can redistribute it and/or modify it under
* the terms of the GNU General Public License as published by the Free
* Software Foundation; either version 2 of the License, or (at your option)
* any later version.
*
* In addition, as a special exception, the copyright holders give permission
* to link the code of portions of this program with the OpenSSL library under
* certain conditions as described in each individual source file, and
* distribute linked combinations including the two. You must obey the GNU
* General Public License in all respects for all of the code used other than
* OpenSSL. If you modify file(s) with this exception, you may extend this
* exception to your version of the file(s), but you are not obligated to do
* so. If you do not wish to do so, delete this exception statement from your
* version. If you delete this exception statement from all source files in
* the program, then also delete it here.
*
* Slurm is distributed in the hope that it will be useful, but WITHOUT ANY
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
* details.
*
* You should have received a copy of the GNU General Public License along
* with Slurm; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
\*****************************************************************************/
#include "gres_select_util.h"
#include "select_cons_tres.h"
#include "dist_tasks.h"
/* Max boards supported for best-fit across boards */
/* Larger board configurations may require new algorithm */
/* for acceptable performance */
#define MAX_BOARDS 8
/* Combination counts
* comb_counts[n-1][k-1] = number of combinations of
* k items from a set of n items
*
* Formula is n!/k!(n-k)!
*/
static uint32_t comb_counts[MAX_BOARDS][MAX_BOARDS] =
{{1,0,0,0,0,0,0,0},
{2,1,0,0,0,0,0,0},
{3,3,1,0,0,0,0,0},
{4,6,4,1,0,0,0,0},
{5,10,10,5,1,0,0,0},
{6,15,20,15,6,1,0,0},
{7,21,35,35,21,7,1,0},
{8,28,56,70,56,28,8,1}};
static int *sockets_core_cnt = NULL;
/*
* Generate all combinations of k integers from the
* set of integers 0 to n-1.
* Return combinations in comb_list.
*
* Example: For k = 2 and n = 4, there are six
* combinations:
* {0,1},{0,2},{0,3},{1,2},{1,3},{2,3}
*
*/
static void _gen_combs(int *comb_list, int n, int k)
{
int i, b;
int *comb = xmalloc(k * sizeof(int));
/* Setup comb for the initial combination */
for (i = 0; i < k; i++)
comb[i] = i;
b = 0;
/* Generate all the other combinations */
while (1) {
for (i = 0; i < k; i++) {
comb_list[b + i] = comb[i];
}
b += k;
i = k - 1;
++comb[i];
while ((i >= 0) && (comb[i] >= n - k + 1 + i)) {
--i;
++comb[i];
}
if (comb[0] > n - k)
break; /* No more combinations */
for (i = i + 1; i < k; ++i)
comb[i] = comb[i - 1] + 1;
}
xfree(comb);
}
/* qsort compare function for board combination socket list
* NOTE: sockets_core_cnt is a global symbol in this module */
static int _cmp_sock(const void *a, const void *b)
{
return slurm_sort_int_list_desc(&sockets_core_cnt[*((int *) a)],
&sockets_core_cnt[*((int *) b)]);
}
/* Enable detailed logging of cr_dist() node and core bitmaps */
static inline void _log_select_maps(char *loc, job_record_t *job_ptr)
{
job_resources_t *job_res = job_ptr->job_resrcs;
char tmp[100];
int i;
if (!(slurm_conf.debug_flags & DEBUG_FLAG_SELECT_TYPE))
return;
info("%s %pJ", loc, job_ptr);
if (job_res->node_bitmap) {
bit_fmt(tmp, sizeof(tmp), job_res->node_bitmap);
info(" node_bitmap:%s", tmp);
}
if (job_res->core_bitmap) {
bit_fmt(tmp, sizeof(tmp), job_res->core_bitmap);
info(" core_bitmap:%s", tmp);
}
if (job_res->cpus) {
for (i = 0; i < job_res->nhosts; i++) {
info(" avail_cpus[%d]:%u", i,
job_res->cpus[i]);
}
}
if (job_res->tasks_per_node) {
for (i = 0; i < job_res->nhosts; i++) {
info(" tasks_per_node[%d]:%u", i,
job_res->tasks_per_node[i]);
}
}
}
/* Remove any specialized cores from those allocated to the job */
static void _clear_spec_cores(job_record_t *job_ptr,
bitstr_t **core_array)
{
int first_core, last_core;
int alloc_node = -1, alloc_core = -1, c;
job_resources_t *job_res = job_ptr->job_resrcs;
multi_core_data_t *mc_ptr = NULL;
bitstr_t *use_core_array = NULL;
node_record_t *node_ptr;
if (job_ptr->details && job_ptr->details->mc_ptr)
mc_ptr = job_ptr->details->mc_ptr;
bit_set_all(job_res->core_bitmap);
for (int i = 0;
(node_ptr = next_node_bitmap(job_res->node_bitmap, &i)); i++) {
job_res->cpus[++alloc_node] = 0;
first_core = 0;
last_core = node_ptr->tot_cores;
use_core_array = core_array[i];
for (c = first_core; c < last_core; c++) {
alloc_core++;
if (bit_test(use_core_array, c)) {
uint16_t tpc = node_ptr->tpc;
if (mc_ptr &&
(mc_ptr->threads_per_core != NO_VAL16) &&
(mc_ptr->threads_per_core < tpc))
tpc = mc_ptr->threads_per_core;
job_res->cpus[alloc_node] += tpc;
} else {
bit_clear(job_res->core_bitmap, alloc_core);
}
}
}
}
static int _get_task_count(job_record_t *job_ptr)
{
uint32_t maxtasks;
/*
* Here we need to check to know if the num_tasks here were from the
* user or from us. As if the user requested a range of nodes we
* originally calculate off min_nodes if ntasks_per_node is given we
* will not have the right num_tasks, so recalculate.
*/
if (job_ptr->details->ntasks_per_node) {
maxtasks = job_ptr->details->ntasks_per_node *
job_ptr->job_resrcs->nhosts;
} else if (job_ptr->details->num_tasks &&
(job_ptr->bit_flags & JOB_NTASKS_SET)) {
maxtasks = job_ptr->details->num_tasks;
} else {
maxtasks = job_ptr->job_resrcs->ncpus;
if (job_ptr->details->cpus_per_task > 1)
maxtasks /= job_ptr->details->cpus_per_task;
}
return maxtasks;
}
/* CPUs already selected for jobs, just distribute the tasks */
static int _set_task_dist_internal(job_record_t *job_ptr)
{
uint32_t n, i, tid = 0, maxtasks;
uint16_t *avail_cpus;
job_resources_t *job_res = job_ptr->job_resrcs;
char *err_msg = NULL;
int rc = SLURM_SUCCESS, plane_size = 1;
if (!job_res)
err_msg = "job_res is NULL";
else if (!job_res->cpus)
err_msg = "job_res->cpus is NULL";
else if (!job_res->nhosts)
err_msg = "job_res->nhosts is zero";
if (err_msg) {
error("Invalid allocation for %pJ: %s",
job_ptr, err_msg);
return SLURM_ERROR;
}
if ((job_ptr->details->task_dist & SLURM_DIST_STATE_BASE) ==
SLURM_DIST_PLANE) {
if (job_ptr->details->mc_ptr)
plane_size = job_ptr->details->mc_ptr->plane_size;
if (plane_size <= 0) {
error("invalid plane_size");
return SLURM_ERROR;
}
}
i = job_res->nhosts * sizeof(uint16_t);
avail_cpus = xmalloc(i);
memcpy(avail_cpus, job_res->cpus, i);
job_res->tasks_per_node = xmalloc(i);
maxtasks = _get_task_count(job_ptr);
/*
* Safe guard if the user didn't specified a lower number of
* cpus than cpus_per_task or didn't specify the number.
*/
if (!maxtasks) {
error("changing task count from 0 to 1 for %pJ",
job_ptr);
maxtasks = 1;
}
if (job_ptr->details->cpus_per_task == 0)
job_ptr->details->cpus_per_task = 1;
/* First put one task on each node node */
for (n = 0; n < job_res->nhosts; n++) {
tid++;
job_res->tasks_per_node[n] = 1;
if (job_ptr->details->cpus_per_task > avail_cpus[n]) {
if (!job_ptr->details->overcommit) {
error("avail_cpus underflow on node %d for %pJ",
n, job_ptr);
}
avail_cpus[n] = 0;
} else {
avail_cpus[n] -= job_ptr->details->cpus_per_task;
}
}
/* Distribute remaining tasks per plane size */
while (maxtasks > tid) {
uint32_t last_tid = tid;
for (n = 0; n < job_res->nhosts; n++) {
if (job_ptr->details->cpus_per_task > avail_cpus[n])
continue;
i = MAX(job_res->tasks_per_node[n] % plane_size, 1);
i = MIN(i,
avail_cpus[n] /job_ptr->details->cpus_per_task);
i = MIN(i, maxtasks - tid);
job_res->tasks_per_node[n] += i;
tid += i;
avail_cpus[n] -= (i * job_ptr->details->cpus_per_task);
}
if (last_tid == tid)
break;
}
if (maxtasks > tid)
rc = ESLURM_BAD_TASK_COUNT;
xfree(avail_cpus);
return rc;
}
static int _set_task_dist(job_record_t *job_ptr, const uint16_t cr_type)
{
int error_code = _set_task_dist_internal(job_ptr);
if (error_code != SLURM_SUCCESS)
return error_code;
/*
* If we are asking for less threads per core than there are on the node
* we need to adjust for that for accounting.
* This will be reversed for getting the correct memory in
* cons_helpers.c _job_test() look for 'save_mem & MEM_PER_CPU'.
*/
if (job_ptr->job_resrcs &&
(job_ptr->details->mc_ptr->threads_per_core != NO_VAL16) &&
((cr_type & SELECT_CORE) || (cr_type & SELECT_SOCKET))) {
job_resources_t *job_res = job_ptr->job_resrcs;
node_record_t *node_ptr;
int i = 0;
if (!bit_set_count(job_res->node_bitmap))
return SLURM_ERROR;
for (int n = 0;
(node_ptr = next_node_bitmap(job_res->node_bitmap, &n));
n++) {
if (job_ptr->details->mc_ptr->threads_per_core ==
node_ptr->tpc)
continue;
job_res->cpus[i++] *= node_ptr->tpc;
}
}
return SLURM_SUCCESS;
}
/* distribute blocks (planes) of tasks cyclically */
static int _compute_plane_dist(job_record_t *job_ptr, uint32_t *gres_task_limit,
uint32_t *gres_min_cpus)
{
bool do_gres_min_cpus = false;
uint32_t n, i, p, tid, maxtasks, l;
uint16_t *avail_cpus, plane_size = 1;
job_resources_t *job_res = job_ptr->job_resrcs;
bool test_tres_tasks = true;
int rc = SLURM_SUCCESS;
if (!job_res || !job_res->cpus || !job_res->nhosts) {
error("invalid allocation for %pJ",
job_ptr);
return SLURM_ERROR;
}
maxtasks = _get_task_count(job_ptr);
avail_cpus = job_res->cpus;
if (job_ptr->details->mc_ptr)
plane_size = job_ptr->details->mc_ptr->plane_size;
if (plane_size <= 0) {
error("invalid plane_size");
return SLURM_ERROR;
}
job_res->cpus = xcalloc(job_res->nhosts, sizeof(uint16_t));
job_res->tasks_per_node = xcalloc(job_res->nhosts, sizeof(uint16_t));
for (tid = 0, i = 0; (tid < maxtasks); i++) { /* cycle counter */
bool space_remaining = false;
for (n = 0; ((n < job_res->nhosts) && (tid < maxtasks)); n++) {
bool more_tres_tasks = false;
for (p = 0; p < plane_size && (tid < maxtasks); p++) {
if (test_tres_tasks &&
!dist_tasks_tres_tasks_avail(
gres_task_limit, job_res, n))
continue;
more_tres_tasks = true;
if ((job_res->cpus[n] < avail_cpus[n])) {
if (gres_min_cpus[n])
do_gres_min_cpus = true;
tid++;
job_res->tasks_per_node[n]++;
for (l = 0;
l <job_ptr->details->cpus_per_task;
l++) {
if (job_res->cpus[n] <
avail_cpus[n])
job_res->cpus[n]++;
}
}
}
if (!more_tres_tasks)
test_tres_tasks = false;
if (job_res->cpus[n] < avail_cpus[n])
space_remaining = true;
}
if (!space_remaining && (tid < maxtasks)) {
/*
* If gres_task_limit is not associated with
* gres_per_task, it is a soft limit.
*/
if (gres_task_limit &&
!gres_select_util_job_tres_per_task(
job_ptr->gres_list_req)) {
/* Try again without limit */
gres_task_limit = NULL;
} else {
rc = ESLURM_BAD_TASK_COUNT;
break;
}
}
}
if (do_gres_min_cpus)
dist_tasks_gres_min_cpus(job_ptr, avail_cpus, gres_min_cpus);
xfree(avail_cpus);
return rc;
}
/*
* sync up core bitmap arrays with job_resources_t struct using a best-fit
* approach on the available resources on each node
*
* "Best-fit" means:
* 1st priority: Use smallest number of boards with sufficient
* available resources
* 2nd priority: Use smallest number of sockets with sufficient
* available resources
* 3rd priority: Use board combination with the smallest number
* of available resources
* 4th priority: Use higher-numbered boards/sockets/cores first
*
* The job_resources_t struct can include threads based upon configuration
*/
static void _block_sync_core_bitmap(job_record_t *job_ptr,
const uint16_t cr_type)
{
uint32_t c, s, i, j, b, z, csize, core_cnt;
int n, n_first, n_last;
uint16_t cpus, num_bits, vpus = 1;
uint16_t cpus_per_task = job_ptr->details->cpus_per_task;
job_resources_t *job_res = job_ptr->job_resrcs;
bool alloc_cores = false, alloc_sockets = false;
uint16_t ntasks_per_core = INFINITE16;
int tmp_cpt = 0;
int count, core_min, b_min, elig, s_min, comb_idx, sock_idx;
int elig_idx, comb_brd_idx, sock_list_idx, comb_min, board_num;
int sock_per_comb;
int *boards_core_cnt;
int *sort_brds_core_cnt;
int *board_combs;
int *socket_list;
int *elig_brd_combs;
int *elig_core_cnt;
bool *sockets_used;
uint16_t boards_nb;
uint16_t nboards_nb;
uint16_t sockets_nb;
uint16_t ncores_nb;
uint16_t nsockets_nb;
uint16_t sock_per_brd;
uint16_t req_cores,best_fit_cores = 0;
uint32_t best_fit_location = 0;
uint64_t ncomb_brd;
bool sufficient, best_fit_sufficient;
if (!job_res)
return;
if (!job_res->core_bitmap) {
error("core_bitmap for %pJ is NULL",
job_ptr);
return;
}
if (bit_ffs(job_res->core_bitmap) == -1) {
error("core_bitmap for %pJ has no bits set",
job_ptr);
return;
}
n_first = bit_ffs(job_res->node_bitmap);
if (n_first != -1) {
n_last = bit_fls(job_res->node_bitmap);
sockets_nb = node_record_table_ptr[n_first]->tot_sockets;
sockets_core_cnt = xcalloc(sockets_nb, sizeof(int));
sockets_used = xcalloc(sockets_nb, sizeof(bool));
boards_nb = node_record_table_ptr[n_first]->boards;
boards_core_cnt = xcalloc(boards_nb, sizeof(int));
sort_brds_core_cnt = xcalloc(boards_nb, sizeof(int));
} else
return;
if (cr_type & SELECT_SOCKET)
alloc_sockets = true;
else if (cr_type & SELECT_CORE)
alloc_cores = true;
if (job_ptr->details->mc_ptr) {
multi_core_data_t *mc_ptr = job_ptr->details->mc_ptr;
if ((mc_ptr->ntasks_per_core != INFINITE16) &&
(mc_ptr->ntasks_per_core)) {
ntasks_per_core = mc_ptr->ntasks_per_core;
}
}
csize = bit_size(job_res->core_bitmap);
for (c = 0, i = 0, n = n_first; n <= n_last; n++) {
if (!bit_test(job_res->node_bitmap, n))
continue;
core_cnt = 0;
ncores_nb = node_record_table_ptr[n]->cores;
nsockets_nb = node_record_table_ptr[n]->tot_sockets;
nboards_nb = node_record_table_ptr[n]->boards;
num_bits = nsockets_nb * ncores_nb;
if ((c + num_bits) > csize) {
error("index error");
break;
}
cpus = job_res->cpus[i];
vpus = job_mgr_determine_cpus_per_core(job_ptr->details, n);
/* compute still required cores on the node */
req_cores = cpus / vpus;
if (cpus % vpus)
req_cores++;
/*
* figure out core cnt if task requires more than one core and
* tasks_per_core is 1
*/
if ((ntasks_per_core == 1) &&
(cpus_per_task > vpus)) {
/* how many cores a task will consume */
int cores_per_task = ROUNDUP(cpus_per_task, vpus);
int tasks = cpus / cpus_per_task;
req_cores = tasks * cores_per_task;
}
if (nboards_nb > MAX_BOARDS) {
info("node[%u]: exceeds max boards(%d); doing best-fit across sockets only",
n, MAX_BOARDS);
nboards_nb = 1;
}
if (nsockets_nb > sockets_nb) {
sockets_nb = nsockets_nb;
xrecalloc(sockets_core_cnt, sockets_nb, sizeof(int));
xrecalloc(sockets_used, sockets_nb, sizeof(bool));
}
if (nboards_nb > boards_nb) {
boards_nb = nboards_nb;
xrecalloc(boards_core_cnt, boards_nb, sizeof(int));
xrecalloc(sort_brds_core_cnt, boards_nb, sizeof(int));
}
/* Count available cores on each socket and board */
sock_per_brd = nsockets_nb / nboards_nb;
for (b = 0; b < nboards_nb; b++) {
boards_core_cnt[b] = 0;
sort_brds_core_cnt[b] = 0;
}
for (s = 0; s < nsockets_nb; s++) {
sockets_core_cnt[s] = 0;
sockets_used[s] = false;
b = s / sock_per_brd;
for (j = c + (s * ncores_nb);
j < c + ((s+1) * ncores_nb); j++) {
if (bit_test(job_res->core_bitmap, j)) {
sockets_core_cnt[s]++;
boards_core_cnt[b]++;
sort_brds_core_cnt[b]++;
}
}
}
/* Sort boards in descending order of available core count */
qsort(sort_brds_core_cnt, nboards_nb, sizeof(int),
slurm_sort_int_list_desc);
/*
* Determine minimum number of boards required for the
* allocation (b_min)
*/
count = 0;
for (b = 0; b < nboards_nb; b++) {
count += sort_brds_core_cnt[b];
if (count >= req_cores)
break;
}
b_min = b + 1;
if (b_min > nboards_nb) {
char core_str[64];
bit_fmt(core_str, 64, job_res->core_bitmap);
error("b_min > nboards_nb (%d > %u) node:%s core_bitmap:%s",
b_min, nboards_nb,
node_record_table_ptr[n]->name, core_str);
break;
}
sock_per_comb = b_min * sock_per_brd;
/* Allocate space for list of board combinations */
ncomb_brd = comb_counts[nboards_nb-1][b_min-1];
board_combs = xcalloc(ncomb_brd * b_min, sizeof(int));
/* Generate all combinations of b_min boards on the node */
_gen_combs(board_combs, nboards_nb, b_min);
/*
* Determine which combinations have enough available cores
* for the allocation (eligible board combinations)
*/
elig_brd_combs = xcalloc(ncomb_brd, sizeof(int));
elig_core_cnt = xcalloc(ncomb_brd, sizeof(int));
elig = 0;
for (comb_idx = 0; comb_idx < ncomb_brd; comb_idx++) {
count = 0;
for (comb_brd_idx = 0; comb_brd_idx < b_min;
comb_brd_idx++) {
board_num = board_combs[(comb_idx * b_min)
+ comb_brd_idx];
count += boards_core_cnt[board_num];
}
if (count >= req_cores) {
elig_brd_combs[elig] = comb_idx;
elig_core_cnt[elig] = count;
elig++;
}
}
/*
* Allocate space for list of sockets for each eligible board
* combination
*/
socket_list = xcalloc(elig * sock_per_comb, sizeof(int));
/*
* Generate sorted list of sockets for each eligible board
* combination, and find combination with minimum number
* of sockets and minimum number of CPUs required for the
* allocation
*/
s_min = sock_per_comb;
comb_min = 0;
core_min = sock_per_comb * ncores_nb;
for (elig_idx = 0; elig_idx < elig; elig_idx++) {
comb_idx = elig_brd_combs[elig_idx];
for (comb_brd_idx = 0; comb_brd_idx < b_min;
comb_brd_idx++) {
board_num = board_combs[(comb_idx * b_min)
+ comb_brd_idx];
sock_list_idx = (elig_idx * sock_per_comb) +
(comb_brd_idx * sock_per_brd);
for (sock_idx = 0; sock_idx < sock_per_brd;
sock_idx++) {
socket_list[sock_list_idx + sock_idx]
= (board_num * sock_per_brd)
+ sock_idx;
}
}
/*
* Sort this socket list in descending order of
* available core count
*/
qsort(&socket_list[elig_idx*sock_per_comb],
sock_per_comb, sizeof (int), _cmp_sock);
/*
* Determine minimum number of sockets required for
* the allocation from this socket list
*/
count = 0;
for (b = 0; b < sock_per_comb; b++) {
sock_idx =
socket_list[(int)((elig_idx *
sock_per_comb) + b)];
count += sockets_core_cnt[sock_idx];
if (count >= req_cores)
break;
}
b++;
/*
* Use board combination with minimum number
* of required sockets and minimum number of CPUs
*/
if ((b < s_min) ||
((b == s_min) &&
(elig_core_cnt[elig_idx] <= core_min))) {
s_min = b;
comb_min = elig_idx;
core_min = elig_core_cnt[elig_idx];
}
}
log_flag(SELECT_TYPE, "node[%u]: required CPUs:%u min req boards:%u,",
n, cpus, b_min);
log_flag(SELECT_TYPE, "node[%u]: min req sockets:%u min avail cores:%u",
n, s_min, core_min);
/*
* Re-sort socket list for best-fit board combination in
* ascending order of socket number
*/
qsort(&socket_list[comb_min * sock_per_comb], sock_per_comb,
sizeof (int), slurm_sort_int_list_asc);
xfree(board_combs);
xfree(elig_brd_combs);
xfree(elig_core_cnt);
/*
* select cores from the sockets of the best-fit board
* combination using a best-fit approach
*/
tmp_cpt = cpus_per_task;
while (cpus > 0) {
best_fit_cores = 0;
best_fit_sufficient = false;
/* search for the socket with best fit */
for (z = 0; z < sock_per_comb; z++) {
s = socket_list[(comb_min*sock_per_comb)+z];
sufficient = sockets_core_cnt[s] >= req_cores;
if ((best_fit_cores == 0) ||
(sufficient && !best_fit_sufficient ) ||
(sufficient &&
(sockets_core_cnt[s] < best_fit_cores)) ||
(!sufficient &&
(sockets_core_cnt[s] > best_fit_cores))) {
best_fit_cores = sockets_core_cnt[s];
best_fit_location = s;
best_fit_sufficient = sufficient;
}
}
/* check that we have found a usable socket */
if (best_fit_cores == 0)
break;
j = best_fit_location;
if (sock_per_brd)
j /= sock_per_brd;
log_flag(SELECT_TYPE, "using node[%u]: board[%u]: socket[%u]: %u cores available",
n, j,
best_fit_location,
sockets_core_cnt[best_fit_location]);
sockets_used[best_fit_location] = true;
for (j = (c + (best_fit_location * ncores_nb));
j < (c + ((best_fit_location + 1) * ncores_nb));
j++ ) {
/*
* if no more CPUs to select
* release remaining cores unless
* we are allocating whole sockets
*/
if (cpus == 0) {
if (alloc_sockets) {
bit_set(job_res->core_bitmap,
j);
core_cnt++;
} else {
bit_clear(job_res->core_bitmap,
j);
}
continue;
}
/*
* remove cores from socket count and
* cpus count using hyperthreading requirement
*/
if (bit_test(job_res->core_bitmap, j)) {
sockets_core_cnt[best_fit_location]--;
core_cnt++;
if (cpus < vpus)
cpus = 0;
else if ((ntasks_per_core == 1) &&
(cpus_per_task > vpus)) {
int used = MIN(tmp_cpt, vpus);
cpus -= used;
if (tmp_cpt <= used)
tmp_cpt = cpus_per_task;
else
tmp_cpt -= used;
} else {
cpus -= vpus;
}
} else if (alloc_sockets) {
/*
* If the core is not used, add it
* anyway if allocating whole sockets
*/
bit_set(job_res->core_bitmap, j);
core_cnt++;
}
}
/* loop again if more CPUs required */
if (cpus > 0)
continue;
/* release remaining cores of the unused sockets */
for (s = 0; s < nsockets_nb; s++) {
if (sockets_used[s])
continue;
bit_nclear(job_res->core_bitmap,
c + (s * ncores_nb),
c + ((s + 1) * ncores_nb) - 1);
}
}
xfree(socket_list);
if (cpus > 0) {
/*
* CPUs count should NEVER be greater than the number
* of set bits in the core bitmap for a given node
*/
error("CPUs computation error");
break;
}
/* adjust cpus count of the current node */
if ((alloc_cores || alloc_sockets) &&
(node_record_table_ptr[n]->tpc >= 1)) {
job_res->cpus[i] = core_cnt *
node_record_table_ptr[n]->tpc;
}
i++;
/* move c to the next node in core_bitmap */
c += num_bits;
}
xfree(boards_core_cnt);
xfree(sort_brds_core_cnt);
xfree(sockets_core_cnt);
xfree(sockets_used);
}
/*
* Sync up the core_bitmap with the CPU array using cyclic distribution
*
* The CPU array contains the distribution of CPUs, which can include
* virtual CPUs (hyperthreads)
*/
static int _cyclic_sync_core_bitmap(job_record_t *job_ptr,
const uint16_t cr_type, bool preempt_mode)
{
uint32_t c, i, j, k, s;
int n, n_first;
uint32_t *sock_start, *sock_end, csize, core_cnt;
uint16_t cps = 0, cpus, vpus, sockets, sock_size, orig_cpu_cnt;
job_resources_t *job_res = job_ptr->job_resrcs;
bitstr_t *core_map;
bool *sock_used, *sock_avoid;
bool alloc_cores = false, alloc_sockets = false;
uint16_t ntasks_per_socket = INFINITE16;
uint16_t ntasks_per_core = INFINITE16;
int error_code = SLURM_SUCCESS;
int tmp_cpt = 0; /* cpus_per_task */
node_record_t *node_ptr;
if ((job_res == NULL) || (job_res->core_bitmap == NULL) ||
(job_ptr->details == NULL))
return error_code;
n_first = bit_ffs(job_res->node_bitmap);
if (n_first == -1)
return error_code;
sock_size = node_record_table_ptr[n_first]->tot_sockets;
sock_avoid = xcalloc(sock_size, sizeof(bool));
sock_start = xcalloc(sock_size, sizeof(uint32_t));
sock_end = xcalloc(sock_size, sizeof(uint32_t));
sock_used = xcalloc(sock_size, sizeof(bool));
if (cr_type & SELECT_SOCKET)
alloc_sockets = true;
else if (cr_type & SELECT_CORE)
alloc_cores = true;
core_map = job_res->core_bitmap;
if (job_ptr->details->mc_ptr) {
multi_core_data_t *mc_ptr = job_ptr->details->mc_ptr;
if ((mc_ptr->ntasks_per_core != INFINITE16) &&
(mc_ptr->ntasks_per_core)) {
ntasks_per_core = mc_ptr->ntasks_per_core;
}
if (mc_ptr->ntasks_per_socket)
ntasks_per_socket = mc_ptr->ntasks_per_socket;
}
csize = bit_size(core_map);
for (c = 0, i = 0, n = 0;
(node_ptr = next_node_bitmap(job_res->node_bitmap, &n)); n++) {
sockets = node_ptr->tot_sockets;
cps = node_ptr->cores;
vpus = job_mgr_determine_cpus_per_core(job_ptr->details, n);
log_flag(SELECT_TYPE, "%pJ node %s vpus %u cpus %u",
job_ptr, node_ptr->name, vpus, job_res->cpus[i]);
if ((c + (sockets * cps)) > csize) {
error("index error");
break;
}
if (sockets > sock_size) {
sock_size = sockets;
xrecalloc(sock_avoid, sock_size, sizeof(bool));
xrecalloc(sock_start, sock_size, sizeof(uint32_t));
xrecalloc(sock_end, sock_size, sizeof(uint32_t));
xrecalloc(sock_used, sock_size, sizeof(bool));
}
for (s = 0; s < sockets; s++) {
sock_start[s] = c + (s * cps);
sock_end[s] = sock_start[s] + cps;
sock_avoid[s] = false;
sock_used[s] = false;
}
core_cnt = 0;
cpus = job_res->cpus[i];
if (ntasks_per_socket != INFINITE16) {
int x_cpus, cpus_per_socket;
uint32_t total_cpus = 0;
uint32_t *cpus_cnt;
cpus_per_socket = ntasks_per_socket *
job_ptr->details->cpus_per_task;
cpus_cnt = xmalloc(sizeof(uint32_t) * sockets);
for (s = 0; s < sockets; s++) {
for (j = sock_start[s]; j < sock_end[s]; j++) {
if (bit_test(core_map, j))
cpus_cnt[s] += vpus;
}
total_cpus += cpus_cnt[s];
}
for (s = 0; s < sockets && total_cpus > cpus; s++) {
if (cpus_cnt[s] > cpus_per_socket) {
x_cpus = cpus_cnt[s] - cpus_per_socket;
cpus_cnt[s] = cpus_per_socket;
total_cpus -= x_cpus;
}
}
for (s = 0; s < sockets && total_cpus > cpus; s++) {
if ((cpus_cnt[s] <= cpus_per_socket) &&
(total_cpus - cpus_cnt[s] >= cpus)) {
sock_avoid[s] = true;
total_cpus -= cpus_cnt[s];
}
}
xfree(cpus_cnt);
} else if (job_ptr->details->cpus_per_task > 1) {
/* Try to pack all CPUs of each tasks on one socket. */
uint32_t *cpus_cnt, cpus_per_task;
cpus_per_task = job_ptr->details->cpus_per_task;
cpus_cnt = xmalloc(sizeof(uint32_t) * sockets);
for (s = 0; s < sockets; s++) {
for (j = sock_start[s]; j < sock_end[s]; j++) {
if (bit_test(core_map, j))
cpus_cnt[s] += vpus;
}
cpus_cnt[s] -= (cpus_cnt[s] % cpus_per_task);
}
tmp_cpt = cpus_per_task;
for (s = 0; ((s < sockets) && (cpus > 0)); s++) {
while ((sock_start[s] < sock_end[s]) &&
(cpus_cnt[s] > 0) && (cpus > 0)) {
if (bit_test(core_map, sock_start[s])) {
int used;
sock_used[s] = true;
core_cnt++;
if ((ntasks_per_core == 1) &&
(cpus_per_task > vpus)) {
used = MIN(tmp_cpt,
vpus);
if (tmp_cpt <= used)
tmp_cpt = cpus_per_task;
else
tmp_cpt -= used;
} else
used = vpus;
if (cpus_cnt[s] < vpus)
cpus_cnt[s] = 0;
else
cpus_cnt[s] -= used;
if (cpus < vpus)
cpus = 0;
else
cpus -= used;
}
sock_start[s]++;
}
}
xfree(cpus_cnt);
}
orig_cpu_cnt = cpus;
while (cpus > 0) {
uint16_t prev_cpus = cpus;
for (s = 0; s < sockets && cpus > 0; s++) {
if (sock_avoid[s])
continue;
while (sock_start[s] < sock_end[s]) {
if (bit_test(core_map, sock_start[s])) {
sock_used[s] = true;
core_cnt++;
break;
} else
sock_start[s]++;
}
if (sock_start[s] == sock_end[s])
/* this socket is unusable */
continue;
if (cpus < vpus)
cpus = 0;
else
cpus -= vpus;
sock_start[s]++;
}
if (prev_cpus != cpus)
continue;
if (job_ptr->details->overcommit) {
/* We've got all the CPUs that we need */
break;
}
if (!preempt_mode) {
/* we're stuck! */
char *core_str = NULL, *sock_str = NULL, *sep;
for (j = 0, k = c; j < (cps * sockets);
j++, k++) {
if (!bit_test(core_map, k))
continue;
if (core_str)
sep = ",";
else
sep = "";
xstrfmtcat(core_str, "%s%d", sep, j);
}
if (!core_str)
core_str = xstrdup("NONE");
for (s = 0; s < sockets; s++) {
if (!sock_avoid[s])
continue;
if (sock_str)
sep = ",";
else
sep = "";
xstrfmtcat(sock_str, "%s%d", sep, s);
}
if (!sock_str)
sock_str = xstrdup("NONE");
job_ptr->priority = 0;
job_ptr->state_reason = WAIT_HELD;
error("sync loop not progressing, holding %pJ, "
"tried to use %u CPUs on node %s core_map:%s avoided_sockets:%s vpus:%u",
job_ptr, orig_cpu_cnt, node_ptr->name,
core_str, sock_str, vpus);
xfree(core_str);
xfree(sock_str);
}
error_code = SLURM_ERROR;
goto fini;
}
/*
* clear the rest of the cores in each socket
* FIXME: do we need min_core/min_socket checks here?
*/
for (s = 0; s < sockets; s++) {
if (sock_start[s] == sock_end[s])
continue;
if (!alloc_sockets || !sock_used[s]) {
bit_nclear(core_map, sock_start[s],
sock_end[s]-1);
}
if ((node_ptr->tpc >= 1) &&
(alloc_sockets || alloc_cores) && sock_used[s]) {
for (j = sock_start[s]; j < sock_end[s]; j++) {
/* Mark all cores as used */
if (alloc_sockets)
bit_set(core_map, j);
if (bit_test(core_map, j))
core_cnt++;
}
}
}
if ((alloc_cores || alloc_sockets) && (node_ptr->tpc >= 1)) {
job_res->cpus[i] = core_cnt * node_ptr->tpc;
}
i++;
/* advance 'c' to the beginning of the next node */
c += sockets * cps;
}
fini: xfree(sock_avoid);
xfree(sock_start);
xfree(sock_end);
xfree(sock_used);
return error_code;
}
/*
* Check if we're at job tasks_per_node limit for a given node when allocating
* tasks to a node.
*
* RETURNS rc
* rc > 0 if tpn limit or arbitrary tpn exceeded
* rc == 0 if exactly at tpn limit
* rc < 0 if not at limit yet
*/
static int _at_tpn_limit(const uint32_t n, const job_record_t *job_ptr,
const char *tag, bool log_error)
{
const job_resources_t *job_res = job_ptr->job_resrcs;
const log_level_t log_lvl = log_error ? LOG_LEVEL_ERROR :
LOG_LEVEL_INFO;
int limit_rc = -1;
int arbitrary_rc = -1;
if (job_ptr->details->arbitrary_tpn) {
arbitrary_rc = job_res->tasks_per_node[n] -
job_ptr->details->arbitrary_tpn[n];
}
/* Special case where no limit is imposed - no overcommit */
if (job_ptr->details->ntasks_per_node == 0)
return MAX(limit_rc, arbitrary_rc);
limit_rc = job_res->tasks_per_node[n] -
job_ptr->details->ntasks_per_node;
/* Limit exceeded */
if ((limit_rc > 0) && (log_error || (slurm_conf.debug_flags &
DEBUG_FLAG_SELECT_TYPE)))
log_var(log_lvl,
"%s over tasks_per_node for %pJ node:%u task_per_node:%d max:%u",
tag, job_ptr, n, job_res->tasks_per_node[n],
job_ptr->details->ntasks_per_node);
return MAX(limit_rc, arbitrary_rc);
}
/*
* dist_tasks_compute_c_b - compute the number of tasks on each
* of the node for the cyclic and block distribution. We need to do
* this in the case of consumable resources so that we have an exact
* count for the needed hardware resources which will be used later to
* update the different used resources per node structures.
*
* The most common case is when we have more resources than needed. In
* that case we just "take" what we need and "release" the remaining
* resources for other jobs. In the case where we oversubscribe the
* processing units (PUs) we keep the initial set of resources.
*
* IN/OUT job_ptr - pointer to job being scheduled. The per-node
* job_res->cpus array is recomputed here.
* IN gres_task_limit - array of task limits based upon job's GRES specification
* offset based upon bits set in
* job_ptr->job_resrcs->node_bitmap
*/
static int _dist_tasks_compute_c_b(job_record_t *job_ptr,
uint32_t *gres_task_limit,
uint32_t *gres_min_cpus)
{
bool do_gres_min_cpus = false;
uint32_t n, tid, t, maxtasks, l;
uint16_t *avail_cpus;
job_resources_t *job_res = job_ptr->job_resrcs;
char *err_msg = NULL;
uint16_t *vpus;
int rc = SLURM_SUCCESS, rem_cpus, rem_tasks;
uint16_t cpus_per_task;
node_record_t *node_ptr;
if (!job_res)
err_msg = "job_res is NULL";
else if (!job_res->cpus)
err_msg = "job_res->cpus is NULL";
else if (!job_res->nhosts)
err_msg = "job_res->nhosts is zero";
if (err_msg) {
error("Invalid allocation for %pJ: %s",
job_ptr, err_msg);
return SLURM_ERROR;
}
vpus = xmalloc(job_res->nhosts * sizeof(uint16_t));
if (job_ptr->details->cpus_per_task == 0)
job_ptr->details->cpus_per_task = 1;
cpus_per_task = job_ptr->details->cpus_per_task;
for (int i = 0, n = 0;
(node_ptr = next_node_bitmap(job_res->node_bitmap, &i)); i++) {
vpus[n++] = node_ptr->tpc;
}
maxtasks = _get_task_count(job_ptr);
avail_cpus = job_res->cpus;
job_res->cpus = xmalloc(job_res->nhosts * sizeof(uint16_t));
job_res->tasks_per_node = xmalloc(job_res->nhosts * sizeof(uint16_t));
/*
* Safe guard if the user didn't specified a lower number of
* CPUs than cpus_per_task or didn't specify the number.
*/
if (!maxtasks) {
error("changing task count from 0 to 1 for %pJ",
job_ptr);
maxtasks = 1;
}
/* Start by allocating one task per node */
tid = 0;
for (n = 0; ((n < job_res->nhosts) && (tid < maxtasks)); n++) {
if (avail_cpus[n]) {
if (gres_min_cpus[n])
do_gres_min_cpus = true;
/* Ignore gres_task_limit for first task per node */
tid++;
job_res->tasks_per_node[n]++;
for (l = 0; l < cpus_per_task; l++) {
if (job_res->cpus[n] < avail_cpus[n])
job_res->cpus[n]++;
}
}
}
/* Next fill out the CPUs on the cores already allocated to this job */
for (n = 0; ((n < job_res->nhosts) && (tid < maxtasks)); n++) {
rem_cpus = job_res->cpus[n] % vpus[n];
rem_tasks = rem_cpus / cpus_per_task;
if (rem_tasks == 0)
continue;
for (t = 0; ((t < rem_tasks) && (tid < maxtasks)); t++) {
if (((avail_cpus[n] - job_res->cpus[n]) <
cpus_per_task))
break;
if (!dist_tasks_tres_tasks_avail(
gres_task_limit, job_res, n))
break;
if (_at_tpn_limit(n, job_ptr, "fill allocated",
false) >= 0)
break;
tid++;
job_res->tasks_per_node[n]++;
for (l = 0; l < cpus_per_task; l++) {
if (job_res->cpus[n] < avail_cpus[n])
job_res->cpus[n]++;
}
}
}
/*
* Next distribute additional tasks, packing the cores or sockets as
* appropriate to avoid allocating more CPUs than needed. For example,
* with core allocations and 2 processors per core, we don't want to
* partially populate some cores on some nodes and allocate extra
* cores on other nodes. So "srun -n20 hostname" should not launch 7
* tasks on node 0, 7 tasks on node 1, and 6 tasks on node 2. It should
* launch 8 tasks on node, 8 tasks on node 1, and 4 tasks on node 2.
*/
if (job_ptr->details->overcommit && !job_ptr->tres_per_task)
maxtasks = 0; /* Allocate have one_task_per_node */
while (tid < maxtasks) {
bool space_remaining = false;
for (n = 0; ((n < job_res->nhosts) && (tid < maxtasks)); n++) {
rem_tasks = vpus[n] / cpus_per_task;
rem_tasks = MAX(rem_tasks, 1);
for (t = 0; ((t < rem_tasks) && (tid < maxtasks)); t++){
if ((avail_cpus[n] - job_res->cpus[n]) <
cpus_per_task)
break;
if (!dist_tasks_tres_tasks_avail(
gres_task_limit,
job_res, n))
break;
if (_at_tpn_limit(n, job_ptr, "fill allocated",
false) >= 0)
break;
tid++;
job_res->tasks_per_node[n]++;
for (l = 0; l < cpus_per_task;
l++) {
if (job_res->cpus[n] < avail_cpus[n])
job_res->cpus[n]++;
}
if ((avail_cpus[n] - job_res->cpus[n]) >=
cpus_per_task)
space_remaining = true;
}
}
if (!space_remaining && (tid < maxtasks)) {
/*
* If gres_task_limit is not associated with
* gres_per_task, it is a soft limit.
*/
if (gres_task_limit &&
!gres_select_util_job_tres_per_task(
job_ptr->gres_list_req)) {
/* Try again without limit */
gres_task_limit = NULL;
} else {
rc = ESLURM_BAD_TASK_COUNT;
break;
}
}
}
if (do_gres_min_cpus)
dist_tasks_gres_min_cpus(job_ptr, avail_cpus, gres_min_cpus);
xfree(avail_cpus);
xfree(vpus);
return rc;
}
/*
* To effectively deal with heterogeneous nodes, we fake a cyclic
* distribution to figure out how many cores are needed on each node.
*
* This routine is a slightly modified "version" of the routine
* _task_layout_block in src/common/dist_tasks.c. We do not need to
* assign tasks to job->hostid[] and job->tids[][] at this point so
* the core allocation is the same for cyclic and block.
*
* For the consumable resources support we need to determine what
* "node/Core/thread"-tuplets will be allocated for a given job.
* In the past we assumed that we only allocated one task per PU
* (processing unit, the lowest allocatable logical processor,
* core or thread depending upon configuration) and didn't allow
* the use of overcommit. We have changed this philosophy and are now
* allowing people to overcommit their resources and expect the system
* administrator to enable the task/affinity plug-in which will then
* bind all of a job's tasks to its allocated resources thereby
* avoiding interference between co-allocated running jobs.
*
* In the consumable resources environment we need to determine the
* layout schema within slurmctld.
*
* We have a core_bitmap of all available cores. All we're doing here
* is removing cores that are not needed based on the task count, and
* the choice of cores to remove is based on the distribution:
* - "cyclic" removes cores "evenly", starting from the last socket,
* - "block" removes cores from the "last" socket(s)
* - "plane" removes cores "in chunks"
*
* IN job_ptr - job to be allocated resources
* IN cr_type - allocation type (sockets, cores, etc.)
* IN preempt_mode - true if testing with simulated preempted jobs
* IN core_array - system-wide bitmap of cores originally available to
* the job, only used to identify specialized cores
* IN gres_task_limit - array of task limits based upon job GRES specification,
* offset based upon bits set in job_ptr->job_resrcs->node_bitmap
* IN gres_min_cpus - array of minimum required CPUs based upon job's GRES
* specification, offset based upon bits set in
* job_ptr->job_resrcs->node_bitmap
*/
extern int dist_tasks(job_record_t *job_ptr, const uint16_t cr_type,
bool preempt_mode, bitstr_t **core_array,
uint32_t *gres_task_limit, uint32_t *gres_min_cpus)
{
int error_code;
bool one_task_per_node = false;
/*
* Zero size jobs are supported for the creation and deletion of
* persistent burst buffers.
*/
if (job_ptr->details->min_nodes == 0)
return SLURM_SUCCESS;
if (job_ptr->details->core_spec != NO_VAL16) {
/*
* The job has been allocated all non-specialized cores.
* Just set the task distribution for tres_per_task support.
*/
error_code = _set_task_dist(job_ptr, cr_type);
if (error_code != SLURM_SUCCESS)
return error_code;
return SLURM_SUCCESS;
}
if ((job_ptr->job_resrcs->node_req == NODE_CR_RESERVED) ||
(job_ptr->details->whole_node & WHOLE_NODE_REQUIRED)) {
/*
* The job has been allocated an EXCLUSIVE set of nodes,
* so it gets all of the bits in the core_array except for
* specialized cores. Set the task distribution for
* tres_per_task support.
*/
_clear_spec_cores(job_ptr, core_array);
error_code = _set_task_dist(job_ptr, cr_type);
if (error_code != SLURM_SUCCESS)
return error_code;
return SLURM_SUCCESS;
}
if (job_ptr->details->overcommit && !job_ptr->tres_per_task)
one_task_per_node = true;
_log_select_maps("cr_dist/start", job_ptr);
if (((job_ptr->details->task_dist & SLURM_DIST_STATE_BASE) ==
SLURM_DIST_PLANE) && !one_task_per_node) {
/* Perform plane distribution on the job_resources_t struct */
error_code = _compute_plane_dist(job_ptr, gres_task_limit,
gres_min_cpus);
if (error_code != SLURM_SUCCESS)
return error_code;
} else {
/* Perform cyclic distribution on the job_resources_t struct */
error_code = _dist_tasks_compute_c_b(job_ptr, gres_task_limit,
gres_min_cpus);
if (error_code != SLURM_SUCCESS)
return error_code;
}
_log_select_maps("cr_dist/middle", job_ptr);
/*
* now sync up the core_bitmap with the job_resources_t struct
* based on the given distribution AND resource setting
*/
if (!(cr_type & SELECT_CORE) && !(cr_type & SELECT_SOCKET)) {
_block_sync_core_bitmap(job_ptr, cr_type);
return SLURM_SUCCESS;
}
/*
* If SelectTypeParameters mentions to use a block distribution for
* cores by default, use that kind of distribution if no particular
* cores distribution specified.
* Note : cyclic cores distribution, which is the default, is treated
* by the next code block
*/
if (slurm_conf.select_type_param & SELECT_CORE_DEFAULT_DIST_BLOCK) {
switch (job_ptr->details->task_dist & SLURM_DIST_NODESOCKMASK) {
case SLURM_DIST_ARBITRARY:
case SLURM_DIST_BLOCK:
case SLURM_DIST_CYCLIC:
case SLURM_DIST_UNKNOWN:
_block_sync_core_bitmap(job_ptr, cr_type);
return SLURM_SUCCESS;
}
}
/*
* Determine the number of logical processors per node needed
* for this job. Make sure below matches the layouts in
* lllp_distribution in plugins/task/affinity/dist_task.c (FIXME)
*/
switch (job_ptr->details->task_dist & SLURM_DIST_NODESOCKMASK) {
case SLURM_DIST_BLOCK_BLOCK:
case SLURM_DIST_CYCLIC_BLOCK:
case SLURM_DIST_PLANE:
_block_sync_core_bitmap(job_ptr, cr_type);
break;
case SLURM_DIST_ARBITRARY:
case SLURM_DIST_BLOCK:
case SLURM_DIST_CYCLIC:
case SLURM_DIST_BLOCK_CYCLIC:
case SLURM_DIST_CYCLIC_CYCLIC:
case SLURM_DIST_BLOCK_CFULL:
case SLURM_DIST_CYCLIC_CFULL:
case SLURM_DIST_UNKNOWN:
error_code = _cyclic_sync_core_bitmap(job_ptr, cr_type,
preempt_mode);
break;
default:
error("invalid task_dist entry");
return SLURM_ERROR;
}
_log_select_maps("cr_dist/fini", job_ptr);
return error_code;
}
/* Return true if more tasks can be allocated for this job on this node */
extern bool dist_tasks_tres_tasks_avail(uint32_t *gres_task_limit,
job_resources_t *job_res,
uint32_t node_offset)
{
if (!gres_task_limit || !job_res)
return true;
if (gres_task_limit[node_offset] > job_res->tasks_per_node[node_offset])
return true;
return false;
}
extern void dist_tasks_gres_min_cpus(job_record_t *job_ptr,
uint16_t *avail_cpus,
uint32_t *gres_min_cpus)
{
job_resources_t *job_res = job_ptr->job_resrcs;
for (int n = 0; n < job_res->nhosts; n++) {
/*
* Make sure that enough cpus are available to meet the minimum
* number of required cores to satisfy a gres request. This
* can increase the number of cpus per task on a given node.
*/
if (job_res->cpus[n] < gres_min_cpus[n]) {
/*
* If avail_cpus is less then gres_min_cpus,
* something went wrong. Get as many cpus
* as we can.
*/
if (avail_cpus[n] < gres_min_cpus[n]) {
log_flag(
SELECT_TYPE,
"%pJ: gres_min_cpus=%u is greater than avail_cpus=%u for node %u",
job_ptr, gres_min_cpus[n],
avail_cpus[n], n);
job_res->cpus[n] = avail_cpus[n];
} else {
log_flag(
SELECT_TYPE,
"%pJ: Changing job_res->cpus from %u to gres_min_cpus %u for node %u",
job_ptr, job_res->cpus[n],
gres_min_cpus[n], n);
job_res->cpus[n] = gres_min_cpus[n];
}
}
}
}