/*****************************************************************************\
 *  acct_policy.c - Enforce accounting policy
 *****************************************************************************
 *  Copyright (C) 2008 Lawrence Livermore National Security.
 *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
 *  Written by Morris Jette <jette1@llnl.gov>
 *  CODE-OCEC-09-009. All rights reserved.
 *
 *  This file is part of Slurm, a resource management program.
 *  For details, see <https://slurm.schedmd.com/>.
 *  Please also read the included file: DISCLAIMER.
 *
 *  Slurm is free software; you can redistribute it and/or modify it under
 *  the terms of the GNU General Public License as published by the Free
 *  Software Foundation; either version 2 of the License, or (at your option)
 *  any later version.
 *
 *  In addition, as a special exception, the copyright holders give permission
 *  to link the code of portions of this program with the OpenSSL library under
 *  certain conditions as described in each individual source file, and
 *  distribute linked combinations including the two. You must obey the GNU
 *  General Public License in all respects for all of the code used other than
 *  OpenSSL. If you modify file(s) with this exception, you may extend this
 *  exception to your version of the file(s), but you are not obligated to do
 *  so. If you do not wish to do so, delete this exception statement from your
 *  version.  If you delete this exception statement from all source files in
 *  the program, then also delete it here.
 *
 *  Slurm is distributed in the hope that it will be useful, but WITHOUT ANY
 *  WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 *  FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
 *  details.
 *
 *  You should have received a copy of the GNU General Public License along
 *  with Slurm; if not, write to the Free Software Foundation, Inc.,
 *  51 Franklin Street, Fifth Floor, Boston, MA 02110-1301  USA.
\*****************************************************************************/

#include "slurm/slurm_errno.h"

#include "src/common/assoc_mgr.h"

#include "src/interfaces/accounting_storage.h"
#include "src/interfaces/priority.h"
#include "src/interfaces/select.h"

#include "src/slurmctld/slurmctld.h"
#include "src/slurmctld/acct_policy.h"

#define _DEBUG 0

enum {
	ACCT_POLICY_ADD_SUBMIT,
	ACCT_POLICY_REM_SUBMIT,
	ACCT_POLICY_JOB_BEGIN,
	ACCT_POLICY_JOB_FINI
};

typedef enum {
	TRES_USAGE_OKAY,
	TRES_USAGE_CUR_EXCEEDS_LIMIT,
	TRES_USAGE_REQ_EXCEEDS_LIMIT,
	TRES_USAGE_REQ_NOT_SAFE_WITH_USAGE
} acct_policy_tres_usage_t;

typedef struct het_job_limits {
	slurmdb_assoc_rec_t *assoc_ptr;
	job_record_t *job_ptr;
} het_job_limits_t;

typedef struct acct_policy_validate_args {
	acct_policy_limit_set_t *acct_policy_limit_set;
	slurmdb_assoc_rec_t *assoc_in;
	job_desc_msg_t *job_desc;
	slurmdb_qos_rec_t *job_qos_ptr;
	uint32_t *reason;
	bool update_call;
} acct_policy_validate_args_t;

typedef struct {
	char *acct;
	slurmdb_assoc_rec_t *assoc_ptr;
	int cnt;
	job_record_t *job_ptr;
	bool limits_filled;
	time_t now;
	slurmdb_qos_rec_t *qos_ptr;
	uid_t uid;
	slurmdb_used_limits_t *used_limits_acct;
	slurmdb_used_limits_t *used_limits_user;
} acct_policy_accrue_t;

typedef struct {
	uint32_t job_cnt;
	job_record_t *job_ptr;
	list_t *part_qos_list;
	int type;
	uint64_t *used_tres_run_secs;
} foreach_part_qos_limit_usage_t;

static void _apply_limit_factor(uint64_t *limit, double limit_factor)
{
	int64_t new_val;

	xassert(limit);

	if ((limit_factor <= 0.0) ||
	    (*limit == NO_VAL64) ||
	    (*limit == INFINITE64))
		return;

	new_val = (int64_t)(*limit) * limit_factor;
        if (new_val < 0) {
		/* We overflowed, setting to INFINITE */
		debug2("Factored limit overflowed setting to INFINITE");
		*limit = INFINITE64;
	} else {
		debug2("Limit adjusted from %"PRIu64" to %"PRIu64,
		       *limit, new_val);
		*limit = new_val;
	}
}

/*
 * Update a job's allocated node count to reflect only nodes that are not
 * already allocated to this association.  Needed to enforce GrpNode limit.
 */
static void _get_unique_job_node_cnt(job_record_t *job_ptr,
				     bitstr_t *grp_node_bitmap,
				     uint64_t *node_cnt)
{
	xassert(node_cnt);
#if _DEBUG
	char node_bitstr[64];
	if (job_ptr->job_resrcs && job_ptr->job_resrcs->node_bitmap) {
		bit_fmt(node_bitstr, sizeof(node_bitstr),
			job_ptr->job_resrcs->node_bitmap);
		info("%s: %pJ job_resrcs->node_bitmap:%s",  __func__, job_ptr,
		     node_bitstr);
	} else {
		info("%s: %pJ job_resrcs->node_bitmap:NULL",  __func__,
		     job_ptr);
	}

	if (grp_node_bitmap) {
		bit_fmt(node_bitstr, sizeof(node_bitstr), grp_node_bitmap);
		info("%s: object grp_node_bitmap:%s", __func__,
		     node_bitstr);
	} else {
		info("%s: object grp_node_bitmap:NULL", __func__);
	}
#endif

	if (job_ptr->job_resrcs && job_ptr->job_resrcs->node_bitmap &&
	    grp_node_bitmap) {
		uint64_t overlap_cnt = bit_overlap(
			job_ptr->job_resrcs->node_bitmap, grp_node_bitmap);
		if (overlap_cnt) {
			uint64_t init_cnt = bit_set_count(
				job_ptr->job_resrcs->node_bitmap);
			*node_cnt = init_cnt - overlap_cnt;
			debug2("%s: %pJ unique allocated node count changed from %"PRIu64" to %"PRIu64,
			       __func__, job_ptr, init_cnt, *node_cnt);
		}
	} else if (job_ptr->details && job_ptr->details->req_node_bitmap &&
		   grp_node_bitmap) {
		uint64_t overlap_cnt = bit_overlap(
			job_ptr->details->req_node_bitmap, grp_node_bitmap);
		if (overlap_cnt <= *node_cnt) {
			*node_cnt -=  overlap_cnt;
			debug2("%s: %pJ unique allocated node count changed from %"PRIu64" to %"PRIu64,
				 __func__, job_ptr, *node_cnt + overlap_cnt, *node_cnt);
		}
	} else if (job_ptr->node_bitmap_preempt && grp_node_bitmap) {
		uint64_t overlap_cnt = bit_overlap(job_ptr->node_bitmap_preempt,
						   grp_node_bitmap);
		if (overlap_cnt) {
			uint64_t init_cnt =
				bit_set_count(job_ptr->node_bitmap_preempt);
			*node_cnt = init_cnt - overlap_cnt;
			debug2("%s: %pJ unique allocated node count changed from %"PRIu64" to %"PRIu64,
			       __func__, job_ptr, init_cnt, *node_cnt);
		}
	}
}

/*
 * Update node allocation information for a job being started.
 * This includes grp_node_bitmap, grp_node_job_cnt and
 * grp_used_tres[TRES_ARRAY_NODE] of an object (qos, assoc, etc).
 */
static void _add_usage_node_bitmap(job_record_t *job_ptr,
				   bitstr_t **grp_node_bitmap,
				   uint16_t **grp_node_job_cnt,
				   uint64_t *grp_used_tres)
{
	xassert(grp_node_bitmap);
	xassert(grp_node_job_cnt);
	xassert(grp_used_tres);

	if (!job_ptr->job_resrcs || !job_ptr->job_resrcs->node_bitmap) {
		if (IS_JOB_PENDING(job_ptr) && job_ptr->het_job_id) {
			/*
			 * Hetjobs reach here as part of testing before any
			 * resource allocation. See _het_job_limit_check()
			 * in src/plugins/sched/backfill/backfill.c
			 */
		} else if (job_ptr->node_cnt == 0) {
			/* Zero size jobs OK to create/destroy burst buffers */
		} else {
			error("%s: %pJ lacks allocated node bitmap", __func__,
			      job_ptr);
		}
		return;
	}

	slurmdb_merge_grp_node_usage(grp_node_bitmap,
				     grp_node_job_cnt,
				     job_ptr->job_resrcs->node_bitmap,
				     NULL);

	*grp_used_tres = bit_set_count(*grp_node_bitmap);
}

/*
 * Update node allocation information for a job being completed.
 * This includes grp_node_bitmap, grp_node_job_cnt and
 * grp_used_tres[TRES_ARRAY_NODE] of an object (qos, assoc, etc).
 */
static void _rm_usage_node_bitmap(job_record_t *job_ptr,
				  bitstr_t *grp_node_bitmap,
				  uint16_t *grp_node_job_cnt,
				  uint64_t *grp_used_tres)
{
	xassert(grp_used_tres);

	if (!job_ptr->job_resrcs || !job_ptr->job_resrcs->node_bitmap) {
		if (IS_JOB_PENDING(job_ptr) && job_ptr->het_job_id) {
			/*
			 * Hetjobs reach here as part of testing before any
			 * resource allocation. See _het_job_limit_check()
			 * in src/plugins/sched/backfill/backfill.c
			 */
		} else if (job_ptr->node_cnt == 0) {
			/* Zero size jobs OK to create/destroy burst buffers */
		} else {
			error("%s: %pJ lacks allocated node bitmap", __func__,
			      job_ptr);
		}
		return;
	}
	if (!grp_node_bitmap) {
		error("%s: grp_node_bitmap is NULL", __func__);
		return;
	}
	if (!grp_node_job_cnt) {
		error("%s: grp_node_job_cnt is NULL", __func__);
		return;
	}

	for (int i = 0;
	     next_node_bitmap(job_ptr->job_resrcs->node_bitmap, &i); i++) {
		if (--grp_node_job_cnt[i] == 0)
			bit_clear(grp_node_bitmap, i);
	}
	*grp_used_tres = bit_set_count(grp_node_bitmap);
}

static int _get_tres_state_reason(int tres_pos, int unk_reason)
{
	switch (tres_pos) {
	case TRES_ARRAY_CPU:
		switch (unk_reason) {
		case WAIT_ASSOC_GRP_UNK:
			return WAIT_ASSOC_GRP_CPU;
		case WAIT_ASSOC_GRP_UNK_MIN:
			return WAIT_ASSOC_GRP_CPU_MIN;
		case WAIT_ASSOC_GRP_UNK_RUN_MIN:
			return WAIT_ASSOC_GRP_CPU_RUN_MIN;
		case WAIT_ASSOC_MAX_UNK_PER_JOB:
			return WAIT_ASSOC_MAX_CPU_PER_JOB;
		case WAIT_ASSOC_MAX_UNK_MINS_PER_JOB:
			return WAIT_ASSOC_MAX_CPU_MINS_PER_JOB;
		case WAIT_ASSOC_MAX_UNK_PER_NODE:
			return WAIT_ASSOC_MAX_CPU_PER_NODE;
		case WAIT_QOS_GRP_UNK:
			return WAIT_QOS_GRP_CPU;
		case WAIT_QOS_GRP_UNK_MIN:
			return WAIT_QOS_GRP_CPU_MIN;
		case WAIT_QOS_GRP_UNK_RUN_MIN:
			return WAIT_QOS_GRP_CPU_RUN_MIN;
		case WAIT_QOS_MAX_UNK_PER_JOB:
			return WAIT_QOS_MAX_CPU_PER_JOB;
		case WAIT_QOS_MAX_UNK_PER_NODE:
			return WAIT_QOS_MAX_CPU_PER_NODE;
		case WAIT_QOS_MAX_UNK_PER_ACCT:
			return WAIT_QOS_MAX_CPU_PER_ACCT;
		case WAIT_QOS_MAX_UNK_PER_USER:
			return WAIT_QOS_MAX_CPU_PER_USER;
		case WAIT_QOS_MAX_UNK_MINS_PER_JOB:
			return WAIT_QOS_MAX_CPU_MINS_PER_JOB;
		case WAIT_QOS_MAX_UNK_RUN_MINS_PER_ACCT:
			return WAIT_QOS_MAX_CPU_RUN_MINS_PER_ACCT;
		case WAIT_QOS_MAX_UNK_RUN_MINS_PER_USER:
			return WAIT_QOS_MAX_CPU_RUN_MINS_PER_USER;
		case WAIT_QOS_MIN_UNK:
			return WAIT_QOS_MIN_CPU;
		default:
			return unk_reason;
			break;
		}
		break;
	case TRES_ARRAY_MEM:
		switch (unk_reason) {
		case WAIT_ASSOC_GRP_UNK:
			return WAIT_ASSOC_GRP_MEM;
		case WAIT_ASSOC_GRP_UNK_MIN:
			return WAIT_ASSOC_GRP_MEM_MIN;
		case WAIT_ASSOC_GRP_UNK_RUN_MIN:
			return WAIT_ASSOC_GRP_MEM_RUN_MIN;
		case WAIT_ASSOC_MAX_UNK_PER_JOB:
			return WAIT_ASSOC_MAX_MEM_PER_JOB;
		case WAIT_ASSOC_MAX_UNK_MINS_PER_JOB:
			return WAIT_ASSOC_MAX_MEM_MINS_PER_JOB;
		case WAIT_ASSOC_MAX_UNK_PER_NODE:
			return WAIT_ASSOC_MAX_MEM_PER_NODE;
		case WAIT_QOS_GRP_UNK:
			return WAIT_QOS_GRP_MEM;
		case WAIT_QOS_GRP_UNK_MIN:
			return WAIT_QOS_GRP_MEM_MIN;
		case WAIT_QOS_GRP_UNK_RUN_MIN:
			return WAIT_QOS_GRP_MEM_RUN_MIN;
		case WAIT_QOS_MAX_UNK_PER_JOB:
			return WAIT_QOS_MAX_MEM_PER_JOB;
		case WAIT_QOS_MAX_UNK_PER_NODE:
			return WAIT_QOS_MAX_MEM_PER_NODE;
		case WAIT_QOS_MAX_UNK_PER_ACCT:
			return WAIT_QOS_MAX_MEM_PER_ACCT;
		case WAIT_QOS_MAX_UNK_PER_USER:
			return WAIT_QOS_MAX_MEM_PER_USER;
		case WAIT_QOS_MAX_UNK_MINS_PER_JOB:
			return WAIT_QOS_MAX_MEM_MINS_PER_JOB;
		case WAIT_QOS_MAX_UNK_RUN_MINS_PER_ACCT:
			return WAIT_QOS_MAX_MEM_RUN_MINS_PER_ACCT;
		case WAIT_QOS_MAX_UNK_RUN_MINS_PER_USER:
			return WAIT_QOS_MAX_MEM_RUN_MINS_PER_USER;
		case WAIT_QOS_MIN_UNK:
			return WAIT_QOS_MIN_MEM;
		default:
			return unk_reason;
			break;
		}
		break;
	case TRES_ARRAY_ENERGY:
		switch (unk_reason) {
		case WAIT_ASSOC_GRP_UNK:
			return WAIT_ASSOC_GRP_ENERGY;
		case WAIT_ASSOC_GRP_UNK_MIN:
			return WAIT_ASSOC_GRP_ENERGY_MIN;
		case WAIT_ASSOC_GRP_UNK_RUN_MIN:
			return WAIT_ASSOC_GRP_ENERGY_RUN_MIN;
		case WAIT_ASSOC_MAX_UNK_PER_JOB:
			return WAIT_ASSOC_MAX_ENERGY_PER_JOB;
		case WAIT_ASSOC_MAX_UNK_MINS_PER_JOB:
			return WAIT_ASSOC_MAX_ENERGY_MINS_PER_JOB;
		case WAIT_ASSOC_MAX_UNK_PER_NODE:
			return WAIT_ASSOC_MAX_ENERGY_PER_NODE;
		case WAIT_QOS_GRP_UNK:
			return WAIT_QOS_GRP_ENERGY;
		case WAIT_QOS_GRP_UNK_MIN:
			return WAIT_QOS_GRP_ENERGY_MIN;
		case WAIT_QOS_GRP_UNK_RUN_MIN:
			return WAIT_QOS_GRP_ENERGY_RUN_MIN;
		case WAIT_QOS_MAX_UNK_PER_JOB:
			return WAIT_QOS_MAX_ENERGY_PER_JOB;
		case WAIT_QOS_MAX_UNK_PER_NODE:
			return WAIT_QOS_MAX_ENERGY_PER_NODE;
		case WAIT_QOS_MAX_UNK_PER_ACCT:
			return WAIT_QOS_MAX_ENERGY_PER_ACCT;
		case WAIT_QOS_MAX_UNK_PER_USER:
			return WAIT_QOS_MAX_ENERGY_PER_USER;
		case WAIT_QOS_MAX_UNK_MINS_PER_JOB:
			return WAIT_QOS_MAX_ENERGY_MINS_PER_JOB;
		case WAIT_QOS_MAX_UNK_RUN_MINS_PER_ACCT:
			return WAIT_QOS_MAX_ENERGY_RUN_MINS_PER_ACCT;
		case WAIT_QOS_MAX_UNK_RUN_MINS_PER_USER:
			return WAIT_QOS_MAX_ENERGY_RUN_MINS_PER_USER;
		case WAIT_QOS_MIN_UNK:
			return WAIT_QOS_MIN_ENERGY;
		default:
			return unk_reason;
			break;
		}
		break;
	case TRES_ARRAY_NODE:
		switch (unk_reason) {
		case WAIT_ASSOC_GRP_UNK:
			return WAIT_ASSOC_GRP_NODE;
		case WAIT_ASSOC_GRP_UNK_MIN:
			return WAIT_ASSOC_GRP_NODE_MIN;
		case WAIT_ASSOC_GRP_UNK_RUN_MIN:
			return WAIT_ASSOC_GRP_NODE_RUN_MIN;
		case WAIT_ASSOC_MAX_UNK_PER_JOB:
			return WAIT_ASSOC_MAX_NODE_PER_JOB;
		case WAIT_ASSOC_MAX_UNK_MINS_PER_JOB:
			return WAIT_ASSOC_MAX_NODE_MINS_PER_JOB;
		case WAIT_QOS_GRP_UNK:
			return WAIT_QOS_GRP_NODE;
		case WAIT_QOS_GRP_UNK_MIN:
			return WAIT_QOS_GRP_NODE_MIN;
		case WAIT_QOS_GRP_UNK_RUN_MIN:
			return WAIT_QOS_GRP_NODE_RUN_MIN;
		case WAIT_QOS_MAX_UNK_PER_JOB:
			return WAIT_QOS_MAX_NODE_PER_JOB;
		case WAIT_QOS_MAX_UNK_PER_ACCT:
			return WAIT_QOS_MAX_NODE_PER_ACCT;
		case WAIT_QOS_MAX_UNK_PER_USER:
			return WAIT_QOS_MAX_NODE_PER_USER;
		case WAIT_QOS_MAX_UNK_MINS_PER_JOB:
			return WAIT_QOS_MAX_NODE_MINS_PER_JOB;
		case WAIT_QOS_MAX_UNK_RUN_MINS_PER_ACCT:
			return WAIT_QOS_MAX_NODE_RUN_MINS_PER_ACCT;
		case WAIT_QOS_MAX_UNK_RUN_MINS_PER_USER:
			return WAIT_QOS_MAX_NODE_RUN_MINS_PER_USER;
		case WAIT_QOS_MIN_UNK:
			return WAIT_QOS_MIN_NODE;
		default:
			return unk_reason;
			break;
		}
		break;
	case TRES_ARRAY_BILLING:
		switch (unk_reason) {
		case WAIT_ASSOC_GRP_UNK:
			return WAIT_ASSOC_GRP_BILLING;
		case WAIT_ASSOC_GRP_UNK_MIN:
			return WAIT_ASSOC_GRP_BILLING_MIN;
		case WAIT_ASSOC_GRP_UNK_RUN_MIN:
			return WAIT_ASSOC_GRP_BILLING_RUN_MIN;
		case WAIT_ASSOC_MAX_UNK_PER_JOB:
			return WAIT_ASSOC_MAX_BILLING_PER_JOB;
		case WAIT_ASSOC_MAX_UNK_MINS_PER_JOB:
			return WAIT_ASSOC_MAX_BILLING_MINS_PER_JOB;
		case WAIT_ASSOC_MAX_UNK_PER_NODE:
			return WAIT_ASSOC_MAX_BILLING_PER_NODE;
		case WAIT_QOS_GRP_UNK:
			return WAIT_QOS_GRP_BILLING;
		case WAIT_QOS_GRP_UNK_MIN:
			return WAIT_QOS_GRP_BILLING_MIN;
		case WAIT_QOS_GRP_UNK_RUN_MIN:
			return WAIT_QOS_GRP_BILLING_RUN_MIN;
		case WAIT_QOS_MAX_UNK_PER_JOB:
			return WAIT_QOS_MAX_BILLING_PER_JOB;
		case WAIT_QOS_MAX_UNK_PER_NODE:
			return WAIT_QOS_MAX_BILLING_PER_NODE;
		case WAIT_QOS_MAX_UNK_PER_ACCT:
			return WAIT_QOS_MAX_BILLING_PER_ACCT;
		case WAIT_QOS_MAX_UNK_PER_USER:
			return WAIT_QOS_MAX_BILLING_PER_USER;
		case WAIT_QOS_MAX_UNK_MINS_PER_JOB:
			return WAIT_QOS_MAX_BILLING_MINS_PER_JOB;
		case WAIT_QOS_MAX_UNK_RUN_MINS_PER_ACCT:
			return WAIT_QOS_MAX_BILLING_RUN_MINS_PER_ACCT;
		case WAIT_QOS_MAX_UNK_RUN_MINS_PER_USER:
			return WAIT_QOS_MAX_BILLING_RUN_MINS_PER_USER;
		case WAIT_QOS_MIN_UNK:
			return WAIT_QOS_MIN_BILLING;
		default:
			return unk_reason;
			break;
		}
		break;
	default:
		if (!xstrcmp("gres", assoc_mgr_tres_array[tres_pos]->type))
			switch (unk_reason) {
			case WAIT_ASSOC_GRP_UNK:
				return WAIT_ASSOC_GRP_GRES;
			case WAIT_ASSOC_GRP_UNK_MIN:
				return WAIT_ASSOC_GRP_GRES_MIN;
			case WAIT_ASSOC_GRP_UNK_RUN_MIN:
				return WAIT_ASSOC_GRP_GRES_RUN_MIN;
			case WAIT_ASSOC_MAX_UNK_PER_JOB:
				return WAIT_ASSOC_MAX_GRES_PER_JOB;
			case WAIT_ASSOC_MAX_UNK_MINS_PER_JOB:
				return WAIT_ASSOC_MAX_GRES_MINS_PER_JOB;
			case WAIT_ASSOC_MAX_UNK_PER_NODE:
				return WAIT_ASSOC_MAX_GRES_PER_NODE;
			case WAIT_QOS_GRP_UNK:
				return WAIT_QOS_GRP_GRES;
			case WAIT_QOS_GRP_UNK_MIN:
				return WAIT_QOS_GRP_GRES_MIN;
			case WAIT_QOS_GRP_UNK_RUN_MIN:
				return WAIT_QOS_GRP_GRES_RUN_MIN;
			case WAIT_QOS_MAX_UNK_PER_JOB:
				return WAIT_QOS_MAX_GRES_PER_JOB;
			case WAIT_QOS_MAX_UNK_PER_NODE:
				return WAIT_QOS_MAX_GRES_PER_NODE;
			case WAIT_QOS_MAX_UNK_PER_ACCT:
				return WAIT_QOS_MAX_GRES_PER_ACCT;
			case WAIT_QOS_MAX_UNK_PER_USER:
				return WAIT_QOS_MAX_GRES_PER_USER;
			case WAIT_QOS_MAX_UNK_MINS_PER_JOB:
				return WAIT_QOS_MAX_GRES_MINS_PER_JOB;
			case WAIT_QOS_MAX_UNK_RUN_MINS_PER_ACCT:
				return WAIT_QOS_MAX_GRES_RUN_MINS_PER_ACCT;
			case WAIT_QOS_MAX_UNK_RUN_MINS_PER_USER:
				return WAIT_QOS_MAX_GRES_RUN_MINS_PER_USER;
			case WAIT_QOS_MIN_UNK:
				return WAIT_QOS_MIN_GRES;
			default:
				return unk_reason;
				break;
			}
		else if (!xstrcmp("license",
				  assoc_mgr_tres_array[tres_pos]->type))
			switch (unk_reason) {
			case WAIT_ASSOC_GRP_UNK:
				return WAIT_ASSOC_GRP_LIC;
			case WAIT_ASSOC_GRP_UNK_MIN:
				return WAIT_ASSOC_GRP_LIC_MIN;
			case WAIT_ASSOC_GRP_UNK_RUN_MIN:
				return WAIT_ASSOC_GRP_LIC_RUN_MIN;
			case WAIT_ASSOC_MAX_UNK_PER_JOB:
				return WAIT_ASSOC_MAX_LIC_PER_JOB;
			case WAIT_ASSOC_MAX_UNK_MINS_PER_JOB:
				return WAIT_ASSOC_MAX_LIC_MINS_PER_JOB;
			case WAIT_QOS_GRP_UNK:
				return WAIT_QOS_GRP_LIC;
			case WAIT_QOS_GRP_UNK_MIN:
				return WAIT_QOS_GRP_LIC_MIN;
			case WAIT_QOS_GRP_UNK_RUN_MIN:
				return WAIT_QOS_GRP_LIC_RUN_MIN;
			case WAIT_QOS_MAX_UNK_PER_JOB:
				return WAIT_QOS_MAX_LIC_PER_JOB;
			case WAIT_QOS_MAX_UNK_PER_ACCT:
				return WAIT_QOS_MAX_LIC_PER_ACCT;
			case WAIT_QOS_MAX_UNK_PER_USER:
				return WAIT_QOS_MAX_LIC_PER_USER;
			case WAIT_QOS_MAX_UNK_MINS_PER_JOB:
				return WAIT_QOS_MAX_LIC_MINS_PER_JOB;
			case WAIT_QOS_MAX_UNK_RUN_MINS_PER_ACCT:
				return WAIT_QOS_MAX_LIC_RUN_MINS_PER_ACCT;
			case WAIT_QOS_MAX_UNK_RUN_MINS_PER_USER:
				return WAIT_QOS_MAX_LIC_RUN_MINS_PER_USER;
			case WAIT_QOS_MIN_UNK:
				return WAIT_QOS_MIN_LIC;
			default:
				return unk_reason;
				break;
			}
		else if (!xstrcmp("bb", assoc_mgr_tres_array[tres_pos]->type))
			switch (unk_reason) {
			case WAIT_ASSOC_GRP_UNK:
				return WAIT_ASSOC_GRP_BB;
			case WAIT_ASSOC_GRP_UNK_MIN:
				return WAIT_ASSOC_GRP_BB_MIN;
			case WAIT_ASSOC_GRP_UNK_RUN_MIN:
				return WAIT_ASSOC_GRP_BB_RUN_MIN;
			case WAIT_ASSOC_MAX_UNK_PER_JOB:
				return WAIT_ASSOC_MAX_BB_PER_JOB;
			case WAIT_ASSOC_MAX_UNK_MINS_PER_JOB:
				return WAIT_ASSOC_MAX_BB_MINS_PER_JOB;
			case WAIT_ASSOC_MAX_UNK_PER_NODE:
				return WAIT_ASSOC_MAX_BB_PER_NODE;
			case WAIT_QOS_GRP_UNK:
				return WAIT_QOS_GRP_BB;
			case WAIT_QOS_GRP_UNK_MIN:
				return WAIT_QOS_GRP_BB_MIN;
			case WAIT_QOS_GRP_UNK_RUN_MIN:
				return WAIT_QOS_GRP_BB_RUN_MIN;
			case WAIT_QOS_MAX_UNK_PER_JOB:
				return WAIT_QOS_MAX_BB_PER_JOB;
			case WAIT_QOS_MAX_UNK_PER_NODE:
				return WAIT_QOS_MAX_BB_PER_NODE;
			case WAIT_QOS_MAX_UNK_PER_ACCT:
				return WAIT_QOS_MAX_BB_PER_ACCT;
			case WAIT_QOS_MAX_UNK_PER_USER:
				return WAIT_QOS_MAX_BB_PER_USER;
			case WAIT_QOS_MAX_UNK_MINS_PER_JOB:
				return WAIT_QOS_MAX_BB_MINS_PER_JOB;
			case WAIT_QOS_MAX_UNK_RUN_MINS_PER_ACCT:
				return WAIT_QOS_MAX_BB_RUN_MINS_PER_ACCT;
			case WAIT_QOS_MAX_UNK_RUN_MINS_PER_USER:
				return WAIT_QOS_MAX_BB_RUN_MINS_PER_USER;
			case WAIT_QOS_MIN_UNK:
				return WAIT_QOS_MIN_BB;
			default:
				return unk_reason;
				break;
			}
		break;
	}

	return unk_reason;
}

static int _find_used_limits_for_acct(void *x, void *key)
{
	slurmdb_used_limits_t *used_limits = (slurmdb_used_limits_t *)x;
	char *account = (char *)key;

	if (!xstrcmp(account, used_limits->acct))
		return 1;

	return 0;
}

static int _find_used_limits_for_user(void *x, void *key)
{
	slurmdb_used_limits_t *used_limits = (slurmdb_used_limits_t *)x;
	uint32_t user_id = *(uint32_t *)key;

	if (used_limits->uid == user_id)
		return 1;

	return 0;
}

static bool _valid_job_assoc(job_record_t *job_ptr)
{
	slurmdb_assoc_rec_t assoc_rec;

	if ((job_ptr->assoc_ptr == NULL) ||
	    (job_ptr->assoc_ptr->id  != job_ptr->assoc_id) ||
	    (job_ptr->assoc_ptr->uid != job_ptr->user_id)) {
		error("Invalid assoc_ptr for %pJ", job_ptr);
		memset(&assoc_rec, 0, sizeof(slurmdb_assoc_rec_t));

		assoc_rec.acct      = job_ptr->account;
		if (job_ptr->part_ptr)
			assoc_rec.partition = job_ptr->part_ptr->name;
		assoc_rec.uid       = job_ptr->user_id;

		if (assoc_mgr_fill_in_assoc(acct_db_conn, &assoc_rec,
					    accounting_enforce,
					    &job_ptr->assoc_ptr, false)) {
			info("%s: invalid account or partition for uid=%u %pJ",
			     __func__, job_ptr->user_id, job_ptr);
			return false;
		}
		job_ptr->assoc_id = assoc_rec.id;
	}
	return true;
}

/* Set the job_ptr->qos_ptr to the highest priority QOS */
static void _set_highest_prio_qos_ptr(job_record_t *job_ptr)
{
	xassert(verify_assoc_lock(QOS_LOCK, READ_LOCK));

	if (!job_ptr->qos_list || !list_count(job_ptr->qos_list))
		return;

	job_ptr->qos_ptr = list_peek(job_ptr->qos_list);
	job_ptr->qos_id = job_ptr->qos_ptr->id;
}

static void _qos_adjust_limit_usage(int type, job_record_t *job_ptr,
				    slurmdb_qos_rec_t *qos_ptr,
				    uint64_t *used_tres_run_secs,
				    uint32_t job_cnt)
{
	slurmdb_used_limits_t *used_limits = NULL, *used_limits_a = NULL;
	int i;

	if (!qos_ptr || !job_ptr->assoc_ptr)
		return;

	used_limits_a =	acct_policy_get_acct_used_limits(
		&qos_ptr->usage->acct_limit_list,
		job_ptr->assoc_ptr->acct);

	used_limits = acct_policy_get_user_used_limits(
		&qos_ptr->usage->user_limit_list,
		job_ptr->user_id);

	switch (type) {
	case ACCT_POLICY_ADD_SUBMIT:
		qos_ptr->usage->grp_used_submit_jobs += job_cnt;
		used_limits->submit_jobs += job_cnt;
		used_limits_a->submit_jobs += job_cnt;
		break;
	case ACCT_POLICY_REM_SUBMIT:
		if (qos_ptr->usage->grp_used_submit_jobs >= job_cnt)
			qos_ptr->usage->grp_used_submit_jobs -= job_cnt;
		else {
			qos_ptr->usage->grp_used_submit_jobs = 0;
			debug2("acct_policy_remove_job_submit: "
			       "grp_submit_jobs underflow for qos %s",
			       qos_ptr->name);
		}

		if (used_limits->submit_jobs >= job_cnt)
			used_limits->submit_jobs -= job_cnt;
		else {
			used_limits->submit_jobs = 0;
			debug2("acct_policy_remove_job_submit: "
			       "used_submit_jobs underflow for "
			       "qos %s user %d",
			       qos_ptr->name, used_limits->uid);
		}

		if (used_limits_a->submit_jobs >= job_cnt)
			used_limits_a->submit_jobs -= job_cnt;
		else {
			used_limits_a->submit_jobs = 0;
			debug2("acct_policy_remove_job_submit: "
			       "used_submit_jobs underflow for "
			       "qos %s account %s",
			       qos_ptr->name, used_limits_a->acct);
		}

		break;
	case ACCT_POLICY_JOB_BEGIN:
		/*
		 * Now that the job has started set the id correctly. This is
		 * needed when we have multiple QOS, the qos_ptr will be set
		 * correctly, but the qos_id is only set to the highest priority
		 * until now.
		 */
		if (job_ptr->qos_ptr == qos_ptr)
			job_ptr->qos_id = qos_ptr->id;

		qos_ptr->usage->grp_used_jobs++;
		for (i=0; i<slurmctld_tres_cnt; i++) {
			/* tres_alloc_cnt for ENERGY is currently after the
			 * fact, so don't add it here or you will get underflows
			 * when you remove it.  If this ever changes this will
			 * have to be moved to a new TRES ARRAY probably.
			 */
			if (i == TRES_ARRAY_ENERGY)
				continue;
			if (job_ptr->tres_alloc_cnt[i] == NO_CONSUME_VAL64)
				continue;

			used_limits->tres[i] += job_ptr->tres_alloc_cnt[i];
			used_limits->tres_run_secs[i] += used_tres_run_secs[i];
			used_limits_a->tres[i] += job_ptr->tres_alloc_cnt[i];
			used_limits_a->tres_run_secs[i] += used_tres_run_secs[i];

			qos_ptr->usage->grp_used_tres[i] +=
				job_ptr->tres_alloc_cnt[i];
			qos_ptr->usage->grp_used_tres_run_secs[i] +=
				used_tres_run_secs[i];
			debug2("acct_policy_job_begin: after adding %pJ, qos %s grp_used_tres_run_secs(%s) is %"PRIu64,
			       job_ptr, qos_ptr->name,
			       assoc_mgr_tres_name_array[i],
			       qos_ptr->usage->grp_used_tres_run_secs[i]);
		}

		used_limits->jobs++;
		used_limits_a->jobs++;

		_add_usage_node_bitmap(
			job_ptr,
			&qos_ptr->usage->grp_node_bitmap,
			&qos_ptr->usage->grp_node_job_cnt,
			&qos_ptr->usage->grp_used_tres[TRES_ARRAY_NODE]);

		_add_usage_node_bitmap(
			job_ptr,
			&used_limits->node_bitmap,
			&used_limits->node_job_cnt,
			&used_limits->tres[TRES_ARRAY_NODE]);

		_add_usage_node_bitmap(
			job_ptr,
			&used_limits_a->node_bitmap,
			&used_limits_a->node_job_cnt,
			&used_limits_a->tres[TRES_ARRAY_NODE]);
		break;
	case ACCT_POLICY_JOB_FINI:
		/*
		 * If tres_alloc_cnt doesn't exist means ACCT_POLICY_JOB_BEGIN
		 * was never called so no need to clean up that which was never
		 * set up.
		 */
		if (!job_ptr->tres_alloc_cnt)
			break;
		qos_ptr->usage->grp_used_jobs--;
		if ((int32_t)qos_ptr->usage->grp_used_jobs < 0) {
			qos_ptr->usage->grp_used_jobs = 0;
			debug2("acct_policy_job_fini: used_jobs "
			       "underflow for qos %s", qos_ptr->name);
		}

		for (i=0; i<slurmctld_tres_cnt; i++) {
			if (i == TRES_ARRAY_ENERGY)
				continue;

			if (job_ptr->tres_alloc_cnt[i] == NO_CONSUME_VAL64)
				continue;

			if (job_ptr->tres_alloc_cnt[i] >
			    qos_ptr->usage->grp_used_tres[i]) {
				qos_ptr->usage->grp_used_tres[i] = 0;
				debug2("acct_policy_job_fini: "
				       "grp_used_tres(%s) "
				       "underflow for QOS %s",
				       assoc_mgr_tres_name_array[i],
				       qos_ptr->name);
			} else
				qos_ptr->usage->grp_used_tres[i] -=
					job_ptr->tres_alloc_cnt[i];

			if (job_ptr->tres_alloc_cnt[i] > used_limits->tres[i]) {
				used_limits->tres[i] = 0;
				debug2("acct_policy_job_fini: "
				       "used_limits->tres(%s) "
				       "underflow for qos %s user %u",
				       assoc_mgr_tres_name_array[i],
				       qos_ptr->name, used_limits->uid);
			} else
				used_limits->tres[i] -=
					job_ptr->tres_alloc_cnt[i];

			if (job_ptr->tres_alloc_cnt[i] >
			    used_limits_a->tres[i]) {
				used_limits_a->tres[i] = 0;
				debug2("acct_policy_job_fini: "
				       "used_limits->tres(%s) "
				       "underflow for qos %s account %s",
				       assoc_mgr_tres_name_array[i],
				       qos_ptr->name, used_limits_a->acct);
			} else
				used_limits_a->tres[i] -=
					job_ptr->tres_alloc_cnt[i];
		}

		if (used_limits->jobs)
			used_limits->jobs--;
		else
			debug2("acct_policy_job_fini: used_jobs "
			       "underflow for qos %s user %d",
			       qos_ptr->name, used_limits->uid);

		if (used_limits_a->jobs)
			used_limits_a->jobs--;
		else
			debug2("acct_policy_job_fini: used_jobs "
			       "underflow for qos %s account %s",
			       qos_ptr->name, used_limits_a->acct);

		_rm_usage_node_bitmap(
			job_ptr,
			qos_ptr->usage->grp_node_bitmap,
			qos_ptr->usage->grp_node_job_cnt,
			&qos_ptr->usage->grp_used_tres[TRES_ARRAY_NODE]);

		_rm_usage_node_bitmap(
			job_ptr,
			used_limits->node_bitmap,
			used_limits->node_job_cnt,
			&used_limits->tres[TRES_ARRAY_NODE]);

		_rm_usage_node_bitmap(
			job_ptr,
			used_limits_a->node_bitmap,
			used_limits_a->node_job_cnt,
			&used_limits_a->tres[TRES_ARRAY_NODE]);
		break;
	default:
		error("acct_policy: qos unknown type %d", type);
		break;
	}

}

static int _find_qos_part(void *x, void *key)
{
	if ((slurmdb_qos_rec_t *) x == (slurmdb_qos_rec_t *) key)
		return 1;	/* match */

	return 0;
}

static int _foreach_part_qos_limit_usage(void *x, void *arg)
{
	part_record_t *part_ptr = x;
	foreach_part_qos_limit_usage_t *part_qos_limit_usage = arg;

	if (!part_ptr->qos_ptr)
		return 0;
	if (!part_qos_limit_usage->part_qos_list)
		part_qos_limit_usage->part_qos_list = list_create(NULL);
	/*
	 * Don't adjust usage to this partition's qos if
	 * it's the same as the qos of another partition
	 * that we already handled.
	 */
	if (list_find_first(part_qos_limit_usage->part_qos_list, _find_qos_part,
			    part_ptr->qos_ptr))
		return 0;
	list_push(part_qos_limit_usage->part_qos_list, part_ptr->qos_ptr);
	_qos_adjust_limit_usage(part_qos_limit_usage->type,
				part_qos_limit_usage->job_ptr,
				part_ptr->qos_ptr,
				part_qos_limit_usage->used_tres_run_secs,
				part_qos_limit_usage->job_cnt);
	return 0;
}

static void _adjust_limit_usage(int type, job_record_t *job_ptr,
				bool assoc_locked)
{
	slurmdb_assoc_rec_t *assoc_ptr = NULL;
	slurmdb_qos_rec_t *orig_qos_ptr = NULL;
	assoc_mgr_lock_t locks =
		{ .assoc = WRITE_LOCK, .qos = WRITE_LOCK, .tres = READ_LOCK };
	uint64_t used_tres_run_secs[slurmctld_tres_cnt];
	int i;
	uint32_t job_cnt = 1;

	if (assoc_locked) {
		xassert(verify_assoc_lock(ASSOC_LOCK, WRITE_LOCK));
		xassert(verify_assoc_lock(QOS_LOCK, WRITE_LOCK));
		xassert(verify_assoc_lock(TRES_LOCK, READ_LOCK));
	} else {
		xassert(verify_assoc_unlock(ASSOC_LOCK));
		xassert(verify_assoc_unlock(QOS_LOCK));
		xassert(verify_assoc_unlock(TRES_LOCK));
	}

	memset(used_tres_run_secs, 0, sizeof(uint64_t) * slurmctld_tres_cnt);

	if (!(accounting_enforce & ACCOUNTING_ENFORCE_LIMITS)
	    || !_valid_job_assoc(job_ptr))
		return;

	if (type == ACCT_POLICY_JOB_FINI)
		priority_g_job_end(job_ptr);
	else if (type == ACCT_POLICY_JOB_BEGIN) {
		uint64_t time_limit_secs = (uint64_t)job_ptr->time_limit * 60;
		/*
		 * Take into account usage factor.
		 *
		 * qos_ptr is set correctly if we have a qos_list here, no need
		 * to do anything other than that.
		 */
		if (job_ptr->qos_ptr &&
		    (job_ptr->qos_ptr->usage_factor >= 0))
			time_limit_secs *= job_ptr->qos_ptr->usage_factor;
		for (i = 0; i < slurmctld_tres_cnt; i++) {
			if (i == TRES_ARRAY_ENERGY)
				continue;
			if (job_ptr->tres_alloc_cnt[i] == NO_CONSUME_VAL64)
				continue;

			used_tres_run_secs[i] =
				job_ptr->tres_alloc_cnt[i] * time_limit_secs;
		}
	} else if (((type == ACCT_POLICY_ADD_SUBMIT) ||
		    (type == ACCT_POLICY_REM_SUBMIT)) &&
		   job_ptr->array_recs && job_ptr->array_recs->task_cnt)
		job_cnt = job_ptr->array_recs->task_cnt;
	if (!assoc_locked)
		assoc_mgr_lock(&locks);

	/*
	 * This handles removal of the accrual_cnt pending on
	 * state.  We do not want to call this on add submit as it could push
	 * other jobs pending waiting in line for the limit.  The main call to
	 * this that handles the initial call happens in build_job_queue().
	 */
	if (type != ACCT_POLICY_ADD_SUBMIT)
		acct_policy_handle_accrue_time(job_ptr, true);

	if ((type == ACCT_POLICY_ADD_SUBMIT) ||
	    (type == ACCT_POLICY_REM_SUBMIT)) {
		orig_qos_ptr = job_ptr->qos_ptr;
		_set_highest_prio_qos_ptr(job_ptr);
	}

	/*
	 * If we have submitted to multiple partitions we need to handle all of
	 * them on submit and remove if the job was cancelled before it ran
	 * (!job_ptr->tres_alloc_str).
	 */
	if (((type == ACCT_POLICY_ADD_SUBMIT) ||
	     (type == ACCT_POLICY_REM_SUBMIT)) &&
	    job_ptr->part_ptr_list &&
	    (IS_JOB_PENDING(job_ptr) || !job_ptr->tres_alloc_str)) {
		bool job_first = false;
		foreach_part_qos_limit_usage_t part_qos_limit_usage = {
			.job_cnt = job_cnt,
			.job_ptr = job_ptr,
			.part_qos_list = NULL,
			.type = type,
			.used_tres_run_secs = used_tres_run_secs,
		};
		if (job_ptr->qos_ptr &&
		    (((slurmdb_qos_rec_t *)job_ptr->qos_ptr)->flags
		     & QOS_FLAG_OVER_PART_QOS))
			job_first = true;

		if (job_first) {
			_qos_adjust_limit_usage(type, job_ptr, job_ptr->qos_ptr,
						used_tres_run_secs, job_cnt);
			part_qos_limit_usage.part_qos_list = list_create(NULL);
			list_push(part_qos_limit_usage.part_qos_list,
				  job_ptr->qos_ptr);
		}

		(void) list_for_each(job_ptr->part_ptr_list,
				     _foreach_part_qos_limit_usage,
				     &part_qos_limit_usage);

		/*
		 * Don't adjust usage to this job's qos if
		 * it's the same as the qos of a partition
		 * that we already handled.
		 */
		if (!job_first && job_ptr->qos_ptr &&
		    (!part_qos_limit_usage.part_qos_list ||
		     !list_find_first(part_qos_limit_usage.part_qos_list,
				      _find_qos_part,
				      job_ptr->qos_ptr)))
			_qos_adjust_limit_usage(type, job_ptr, job_ptr->qos_ptr,
						used_tres_run_secs, job_cnt);

		FREE_NULL_LIST(part_qos_limit_usage.part_qos_list);
	} else {
		slurmdb_qos_rec_t *qos_ptr_1, *qos_ptr_2;

		/*
		 * Here if the job is starting and we had a part_ptr_list before
		 * hand we need to remove the submit from all partition qos
		 * outside of the one we actually are going to run on.
		 */
		if ((type == ACCT_POLICY_JOB_BEGIN) &&
		    job_ptr->part_ptr_list) {
			foreach_part_qos_limit_usage_t part_qos_limit_usage = {
				.job_cnt = job_cnt,
				.job_ptr = job_ptr,
				.part_qos_list = list_create(NULL),
				.type = ACCT_POLICY_REM_SUBMIT,
				.used_tres_run_secs = used_tres_run_secs,
			};

			if (job_ptr->qos_ptr)
				list_push(part_qos_limit_usage.part_qos_list,
					  job_ptr->qos_ptr);
			if (job_ptr->part_ptr && job_ptr->part_ptr->qos_ptr &&
			    job_ptr->qos_ptr != job_ptr->part_ptr->qos_ptr)
				list_push(part_qos_limit_usage.part_qos_list,
					  job_ptr->part_ptr->qos_ptr);

			(void) list_for_each(job_ptr->part_ptr_list,
					     _foreach_part_qos_limit_usage,
					     &part_qos_limit_usage);

			FREE_NULL_LIST(part_qos_limit_usage.part_qos_list);
		}

		acct_policy_set_qos_order(job_ptr, &qos_ptr_1, &qos_ptr_2);

		_qos_adjust_limit_usage(type, job_ptr, qos_ptr_1,
					used_tres_run_secs, job_cnt);
		_qos_adjust_limit_usage(type, job_ptr, qos_ptr_2,
					used_tres_run_secs, job_cnt);
	}

	assoc_ptr = job_ptr->assoc_ptr;
	while (assoc_ptr) {
		switch (type) {
		case ACCT_POLICY_ADD_SUBMIT:
			assoc_ptr->usage->used_submit_jobs += job_cnt;
			break;
		case ACCT_POLICY_REM_SUBMIT:
			if (assoc_ptr->usage->used_submit_jobs)
				assoc_ptr->usage->used_submit_jobs -= job_cnt;
			else
				debug2("acct_policy_remove_job_submit: "
				       "used_submit_jobs underflow for "
				       "account %s",
				       assoc_ptr->acct);
			break;
		case ACCT_POLICY_JOB_BEGIN:
			assoc_ptr->usage->used_jobs++;
			_add_usage_node_bitmap(
				job_ptr,
				&assoc_ptr->usage->grp_node_bitmap,
				&assoc_ptr->usage->grp_node_job_cnt,
				&assoc_ptr->usage->
				grp_used_tres[TRES_ARRAY_NODE]);

			for (i = 0; i < slurmctld_tres_cnt; i++) {
				if (i == TRES_ARRAY_ENERGY)
					continue;
				if (job_ptr->tres_alloc_cnt[i] ==
				    NO_CONSUME_VAL64)
					continue;

				if (i != TRES_ARRAY_NODE) {
					assoc_ptr->usage->grp_used_tres[i] +=
						job_ptr->tres_alloc_cnt[i];
				}
				assoc_ptr->usage->grp_used_tres_run_secs[i] +=
					used_tres_run_secs[i];
				debug2("acct_policy_job_begin: after adding %pJ, assoc %u(%s/%s/%s) grp_used_tres_run_secs(%s) is %"PRIu64,
				       job_ptr, assoc_ptr->id, assoc_ptr->acct,
				       assoc_ptr->user, assoc_ptr->partition,
				       assoc_mgr_tres_name_array[i],
				       assoc_ptr->usage->
				       grp_used_tres_run_secs[i]);
			}
			break;
		case ACCT_POLICY_JOB_FINI:
			if (assoc_ptr->usage->used_jobs)
				assoc_ptr->usage->used_jobs--;
			else
				debug2("acct_policy_job_fini: used_jobs "
				       "underflow for account %s",
				       assoc_ptr->acct);
			_rm_usage_node_bitmap(
				job_ptr,
				assoc_ptr->usage->grp_node_bitmap,
				assoc_ptr->usage->grp_node_job_cnt,
				&assoc_ptr->usage->
				grp_used_tres[TRES_ARRAY_NODE]);
			for (i = 0; i < slurmctld_tres_cnt; i++) {
				if ((i == TRES_ARRAY_ENERGY) ||
				    (i == TRES_ARRAY_NODE))
					continue;
				if (job_ptr->tres_alloc_cnt[i] ==
				    NO_CONSUME_VAL64)
					continue;

				if (job_ptr->tres_alloc_cnt[i] >
				    assoc_ptr->usage->grp_used_tres[i]) {
					assoc_ptr->usage->grp_used_tres[i] = 0;
					debug2("acct_policy_job_fini: "
					       "grp_used_tres(%s) "
					       "underflow for assoc "
					       "%u(%s/%s/%s)",
					       assoc_mgr_tres_name_array[i],
					       assoc_ptr->id, assoc_ptr->acct,
					       assoc_ptr->user,
					       assoc_ptr->partition);
				} else {
					assoc_ptr->usage->grp_used_tres[i] -=
						job_ptr->tres_alloc_cnt[i];
				}
			}

			break;
		default:
			error("acct_policy: association unknown type %d", type);
			break;
		}
		/* now handle all the group limits of the parents */
		assoc_ptr = assoc_ptr->usage->parent_assoc_ptr;
	}

	/*
	 * When we are removing submit we need to set the pointer back if it was
	 * changed.
	 */
	if ((type == ACCT_POLICY_REM_SUBMIT) &&
	    (orig_qos_ptr != job_ptr->qos_ptr)) {
		job_ptr->qos_ptr = orig_qos_ptr;
		job_ptr->qos_id = orig_qos_ptr->id;
	}

	if (!assoc_locked)
		assoc_mgr_unlock(&locks);
}

static void _set_time_limit(uint32_t *time_limit, uint32_t part_max_time,
			    uint32_t limit_max_time, uint16_t *limit_set_time)
{
	if ((*time_limit) == NO_VAL) {
		if (limit_max_time)
			(*time_limit) = limit_max_time;
		else if (part_max_time != INFINITE)
			(*time_limit) = part_max_time;
		else
			(*time_limit) = INFINITE;

		if (limit_set_time)
			(*limit_set_time) = 1;
	} else if (limit_set_time && (*limit_set_time) &&
		   ((*time_limit) > limit_max_time))
		(*time_limit) = limit_max_time;
}

static void _qos_alter_job(job_record_t *job_ptr,
			   slurmdb_qos_rec_t *qos_ptr,
			   uint64_t *used_tres_run_secs,
			   uint64_t *new_used_tres_run_secs)
{
	int i;
	slurmdb_used_limits_t *used_limits_a = NULL, *used_limits_u = NULL;

	if (!qos_ptr || !job_ptr)
		return;

	used_limits_a =	acct_policy_get_acct_used_limits(
		&qos_ptr->usage->acct_limit_list,
		job_ptr->assoc_ptr->acct);

	used_limits_u = acct_policy_get_user_used_limits(
		&qos_ptr->usage->user_limit_list,
		job_ptr->user_id);

	for (i=0; i<slurmctld_tres_cnt; i++) {
		if (used_tres_run_secs[i] == new_used_tres_run_secs[i])
			continue;
		/*
		 * Handle the case when remaining usage is less than
		 * the original job request.
		 */
		int64_t used_tres_run_sec_decr =
			used_tres_run_secs[i] -
			new_used_tres_run_secs[i];
		if ((used_tres_run_sec_decr < 0) ||
		    (used_tres_run_sec_decr <
		     qos_ptr->usage->grp_used_tres_run_secs[i]))
			qos_ptr->usage->grp_used_tres_run_secs[i] -=
				used_tres_run_sec_decr;
		else
			qos_ptr->usage->grp_used_tres_run_secs[i] = 0;

		if ((used_tres_run_sec_decr < 0) ||
		    (used_tres_run_sec_decr <
		     used_limits_a->tres_run_secs[i]))
			used_limits_a->tres_run_secs[i] -=
				used_tres_run_sec_decr;
		else
			used_limits_a->tres_run_secs[i] = 0;

		if ((used_tres_run_sec_decr < 0) ||
		    (used_tres_run_sec_decr <
		     used_limits_u->tres_run_secs[i]))
			used_limits_u->tres_run_secs[i] -=
				used_tres_run_sec_decr;
		else
			used_limits_u->tres_run_secs[i] = 0;

		debug2("altering %pJ QOS %s got %"PRIu64" just removed %"PRIu64" and added %"PRIu64,
		       job_ptr, qos_ptr->name,
		       qos_ptr->usage->grp_used_tres_run_secs[i],
		       used_tres_run_secs[i],
		       new_used_tres_run_secs[i]);
	}
}

/*
 * _validate_tres_limits_for_assoc - validate the tres requested against limits
 * of an association as well as qos skipping any limit an admin set
 *
 * OUT - tres_pos - if false is returned position in array of failed limit
 * IN - job_tres_array - count of various TRES requested by the job
 * IN - divisor - divide the job_tres_array TRES by this variable, 0 if none
 * IN - assoc_tres_array - TRES limits from an association (Grp, Max, Min)
 * IN - qos_tres_array - TRES limits QOS has imposed already
 * IN - acct_policy_limit_set_array - limits that have been overridden
 *                                    by an admin
 * IN strict_checking - If a limit needs to be enforced now or not.
 * IN update_call - If this is an update or a create call
 * IN max_limit - Limits are for MAX else, the limits are MIN.
 *
 * RET - True if no limit is violated, false otherwise with tres_pos
 * being set to the position of the failed limit.
 */
static bool _validate_tres_limits_for_assoc(
	int *tres_pos,
	uint64_t *job_tres_array,
	uint64_t divisor,
	uint64_t *assoc_tres_array,
	uint64_t *qos_tres_array,
	uint16_t *admin_set_limit_tres_array,
	bool strict_checking,
	bool update_call, bool max_limit)
{
	int i;
	uint64_t job_tres;

	if (!strict_checking)
		return true;

	for (i = 0; i < g_tres_count; i++) {
		(*tres_pos) = i;

		if ((admin_set_limit_tres_array[i] == ADMIN_SET_LIMIT)
		    || (qos_tres_array[i] != INFINITE64)
		    || (assoc_tres_array[i] == INFINITE64)
		    || (!job_tres_array[i] && !update_call))
			continue;

		job_tres = job_tres_array[i];

		if (divisor)
			job_tres /= divisor;

		if (max_limit) {
			if (job_tres > assoc_tres_array[i])
				return false;
		} else if (job_tres < assoc_tres_array[i])
				return false;
	}

	return true;
}


/*
 * _validate_tres_limits_for_qos - validate the tres requested against limits
 * of a QOS as well as qos skipping any limit an admin set
 *
 * OUT - tres_pos - if false is returned position in array of failed limit
 * IN - job_tres_array - count of various TRES requested by the job
 * IN - divisor - divide the job_tres_array TRES by this variable, 0 if none
 * IN - grp_tres_array - Grp TRES limits from QOS
 * IN - max_tres_array - Max/Min TRES limits from QOS
 * IN/OUT - out_grp_tres_array - Grp TRES limits QOS has imposed already,
 *                               if a new limit is found the limit is filled in.
 * IN/OUT - out_max_tres_array - Max/Min TRES limits QOS has imposed already,
 *                               if a new limit is found the limit is filled in.
 * IN - acct_policy_limit_set_array - limits that have been overridden
 *                                    by an admin
 * IN strict_checking - If a limit needs to be enforced now or not.
 * IN max_limit - Limits are for MAX else, the limits are MIN.
 *
 * RET - True if no limit is violated, false otherwise with tres_pos
 * being set to the position of the failed limit.
 */
static bool _validate_tres_limits_for_qos(
	int *tres_pos,
	uint64_t *job_tres_array,
	uint64_t divisor,
	uint64_t *grp_tres_array,
	uint64_t *max_tres_array,
	uint64_t *out_grp_tres_array,
	uint64_t *out_max_tres_array,
	uint16_t *admin_set_limit_tres_array,
	bool strict_checking, bool max_limit)
{
	uint64_t max_tres_limit, out_max_tres_limit;
	int i;
	uint64_t job_tres;

	if (!strict_checking)
		return true;

	for (i = 0; i < g_tres_count; i++) {
		(*tres_pos) = i;
		if (grp_tres_array) {
			max_tres_limit = MIN(grp_tres_array[i],
					     max_tres_array[i]);
			out_max_tres_limit = MIN(out_grp_tres_array[i],
						 out_max_tres_array[i]);
		} else {
			max_tres_limit = max_tres_array[i];
			out_max_tres_limit = out_max_tres_array[i];
		}

		/* we don't need to look at this limit */
		if ((admin_set_limit_tres_array[i] == ADMIN_SET_LIMIT)
		    || (out_max_tres_limit != INFINITE64)
		    || (max_tres_limit == INFINITE64)
		    || (job_tres_array[i] && (job_tres_array[i] == NO_VAL64)))
			continue;

		out_max_tres_array[i] = max_tres_array[i];

		job_tres = job_tres_array[i];

		if (divisor)
			job_tres /= divisor;

		if (out_grp_tres_array && grp_tres_array) {
			if (out_grp_tres_array[i] == INFINITE64)
				out_grp_tres_array[i] = grp_tres_array[i];

			if (max_limit) {
				if (job_tres > grp_tres_array[i])
					return false;
			}  else if (job_tres < grp_tres_array[i])
				return false;
		}

		if (max_limit) {
			if (job_tres > max_tres_array[i])
				return false;
		} else if (job_tres < max_tres_array[i])
			return false;
	}

	return true;
}

/* Only check the time_limits if the admin didn't set
 * the timelimit.
 * It is important we look at these even if strict_checking
 * isn't set so we get the correct time_limit from the job.
 */
static bool _validate_time_limit(uint32_t *time_limit_in,
				 uint32_t part_max_time,
				 uint64_t tres_req_cnt,
				 uint64_t max_limit,
				 void *out_max_limit,
				 uint16_t *limit_set_time,
				 bool strict_checking,
				 bool is64)
{
	uint32_t max_time_limit;
	uint64_t out_max_64 = *(uint64_t *)out_max_limit;
	uint32_t out_max_32 = *(uint32_t *)out_max_limit;

       if (!tres_req_cnt || (((*time_limit_in) != NO_VAL) &&
           (!strict_checking ||
            (*limit_set_time) == ADMIN_SET_LIMIT)))
		return true;

	if (is64) {
		if ((out_max_64 != INFINITE64) ||
		    (max_limit == INFINITE64) ||
		    (tres_req_cnt == NO_VAL64))
			return true;
	} else {
		if ((out_max_32 != INFINITE) ||
		    ((uint32_t)max_limit == INFINITE) ||
		    ((uint32_t)tres_req_cnt == NO_VAL))
			return true;
	}

	max_time_limit = (uint32_t)(max_limit / tres_req_cnt);

	_set_time_limit(time_limit_in, part_max_time, max_time_limit,
			limit_set_time);

	if (is64)
		(*(uint64_t *)out_max_limit) = max_limit;
	else
		(*(uint32_t *)out_max_limit) = (uint32_t)max_limit;

	if ((*time_limit_in) > max_time_limit)
		return false;

	return true;
}

/*
 * _validate_tres_time_limits - validate the tres requested
 * against limits of an association as well as qos skipping any limit
 * an admin set
 *
 * OUT - tres_pos - if false is returned position in array of failed limit
 * IN/OUT - time_limit_in - Job's time limit, set and returned based off limits
 *                          if none is given.
 * IN - part_max_time - Job's partition max time limit
 * IN - job_tres_array - count of various TRES requested by the job
 * IN - max_tres_array - Max TRES limits of association/QOS
 * OUT - out_max_tres_array - Max TRES limits as set by the various TRES
 * OUT - limit_set_time - set if the time_limit was set by a limit QOS/Assoc or
 *                        otherwise.
 * IN strict_checking - If a limit needs to be enforced now or not.
 *
 * RET - True if no limit is violated, false otherwise with tres_pos
 * being set to the position of the failed limit.
 */
static bool _validate_tres_time_limits(
	int *tres_pos,
	uint32_t *time_limit_in,
	uint32_t part_max_time,
	uint64_t *job_tres_array,
	uint64_t *max_tres_array,
	uint64_t *out_max_tres_array,
	uint16_t *limit_set_time,
	bool strict_checking)
{
	int i;
//	uint32_t max_time_limit;

	if (!strict_checking || (*limit_set_time) == ADMIN_SET_LIMIT)
		return true;

	for (i = 0; i < g_tres_count; i++) {
		(*tres_pos) = i;

		if (!_validate_time_limit(time_limit_in, part_max_time,
					  job_tres_array[i],
					  max_tres_array[i],
					  &out_max_tres_array[i],
					  limit_set_time,
					  strict_checking, true))
			return false;
		/* if ((out_max_tres_array[i] != INFINITE64) || */
		/*     (max_tres_array[i] == INFINITE64) || */
		/*     (job_tres_array[i] == NO_VAL64) || */
		/*     (job_tres_array[i] == 0)) */
		/* 	continue; */

		/* max_time_limit = (uint32_t)(max_tres_array[i] / */
		/* 			    job_tres_array[i]); */

		/* _set_time_limit(time_limit_in, */
		/* 		part_max_time, max_time_limit, */
		/* 		limit_set_time); */

		/* out_max_tres_array[i] = max_tres_array[i]; */

		/* if ((*time_limit_in) > max_time_limit) */
		/* 	return false; */
	}

	return true;
}

/*
 * _validate_tres_usage_limits - validate the TRES requested against
 * specified limits; when checking for safe limits, also take into
 * consideration already used and currently running TRES resources
 *
 * OUT - tres_pos - if function returns other than TRES_USAGE_OKAY,
 *                  position in TRES array of failed limit
 * IN - tres_limit_array - count of various TRES limits to check against
 * OUT - out_tres_limit_array - optional; assigned values from tres_limit_array
 *                              when out_tres_limit_set is true,
 *                              skipped when any of:
 *                              1) admin_limit_set is set and is an admin
 *                                 limit
 *                              2) out_tres_limit_array is set and its value
 *                                 has been changed since initially being set
 *                                 to INFINITE64
 *                              3) tres_limit_array is INFINITE64
 * IN - tres_req_cnt - must be set; the following is checked with tres_req_cnt:
 *                     1) safe_limits && tres_req_cnt > tres_limit_array,
 *                        return TRES_USAGE_REQ_EXCEEDS_LIMIT
 *                     2) when safe_limits and tres_usage are set:
 *                        (tres_req_cnt + tres_usage) >
 *                        (tres_limit_array - curr_usage),
 *                        return TRES_USAGE_REQ_NOT_SAFE_WITH_USAGE
 *                        curr_usage will be 0 when not passed
 * IN - tres_usage - TRES (currently running if curr_usage is set, already used
 *                   otherwise) optional; This value is used only if
 *                   safe_limits is true.  It will be added to tres_req_cnt to
 *                   count as extra time to observe, see tres_req_cnt section
 *                   above for tres_usage interaction
 * IN - curr_usage - TRES (already used) optional; when set, check if:
 *                   1) curr_usage > tres_limit_array && tres_req_cnt
 *                      return TRES_USAGE_CUR_EXCEEDS_LIMIT
 *                   2) when safe_limits is true, see tres_req_cnt section
 *                      above for curr_usage interaction
 * IN - admin_limit_set - limits that have been overridden by an admin, see
 *                        out_tres_limit_array section above for interaction
 * IN - safe_limits - see tres_req_cnt section above for interaction
 * IN - out_tres_limit_set - out_tres_limit_array is set as described above
 *      when true; out_tres_limit_array is not modified when false
 * RET - TRES_USAGE_OKAY if no limit is violated, otherwise one of the other
 *       acct_policy_tres_usage_t enumerations with tres_pos being set to the
 *       position of the failed limit.
 */
static acct_policy_tres_usage_t _validate_tres_usage_limits(
	int *tres_pos,
	uint64_t *tres_limit_array,
	uint64_t *out_tres_limit_array,
	uint64_t *tres_req_cnt,
	uint64_t *tres_usage,
	uint64_t *curr_usage,
	uint16_t *admin_limit_set,
	bool safe_limits,
	bool out_tres_limit_set)
{
	int i;
	uint64_t usage = 0;

	xassert(tres_limit_array);
	xassert(tres_req_cnt);

	for (i = 0; i < g_tres_count; i++) {
		(*tres_pos) = i;

		if ((admin_limit_set &&
		     admin_limit_set[i] == ADMIN_SET_LIMIT) ||
		    (out_tres_limit_array &&
		     out_tres_limit_array[i] != INFINITE64) ||
		    (tres_limit_array[i] == INFINITE64))
			continue;

		if (out_tres_limit_set && out_tres_limit_array)
			out_tres_limit_array[i] = tres_limit_array[i];

		if (curr_usage && tres_req_cnt[i] &&
		    (curr_usage[i] >= tres_limit_array[i]))
			return TRES_USAGE_CUR_EXCEEDS_LIMIT;

		if (safe_limits) {
			if (tres_req_cnt[i] > tres_limit_array[i])
				return TRES_USAGE_REQ_EXCEEDS_LIMIT;

			if (curr_usage)
				usage = curr_usage[i];
			if (tres_usage && tres_req_cnt[i] &&
			    ((tres_req_cnt[i] + tres_usage[i]) >
			     (tres_limit_array[i] - usage)))
				return TRES_USAGE_REQ_NOT_SAFE_WITH_USAGE;
		}
	}

	return TRES_USAGE_OKAY;
}

/*
 * _validate_tres_usage_limits_for_qos - validate the tres requested
 * against limits of an qos skipping any limit an admin set
 *
 * OUT - tres_pos - if false is returned position in array of failed limit
 * IN - tres_limit_array - TRES limits from an association
 * IN/OUT - out_tres_limit_array - TRES limits QOS has imposed already, if a new
 *                                 limit is found the limit is filled in.
 * IN - tres_req_cnt - TRES requested from the job
 * IN - tres_usage - TRES usage from the QOS (in minutes)
 * IN - curr_usage - TRES usage in use right now by the QOS (running jobs)
 * IN - admin_limit_set - TRES limits that have been overridden by an admin
 * IN - safe_limits - if the safe flag was set on AccountingStorageEnforce
 *
 * RET - True if no limit is violated, false otherwise with tres_pos
 * being set to the position of the failed limit.
 */
static acct_policy_tres_usage_t _validate_tres_usage_limits_for_qos(
	int *tres_pos,
	uint64_t *tres_limit_array,
	uint64_t *out_tres_limit_array,
	uint64_t *tres_req_cnt,
	uint64_t *tres_usage,
	uint64_t *curr_usage,
	uint16_t *admin_limit_set,
	bool safe_limits)
{
	return _validate_tres_usage_limits(tres_pos,
					   tres_limit_array,
					   out_tres_limit_array,
					   tres_req_cnt,
					   tres_usage,
					   curr_usage,
					   admin_limit_set,
					   safe_limits,
					   true);
}

/*
 * _validate_tres_usage_limits_for_assoc - validate the tres requested
 * against limits of an association as well as qos skipping any limit
 * an admin set
 *
 * OUT - tres_pos - if false is returned position in array of failed limit
 * IN - tres_limit_array - TRES limits from an association
 * IN - qos_tres_limit_array - TRES limits QOS has imposed already
 * IN - tres_req_cnt - TRES requested from the job
 * IN - tres_usage - TRES usage from the association (in minutes)
 * IN - curr_usage - TRES usage in use right now by the assoc (running jobs)
 * IN - admin_limit_set - TRES limits that have been overridden by an admin
 * IN - safe_limits - if the safe flag was set on AccountingStorageEnforce
 *
 * RET - True if no limit is violated, false otherwise with tres_pos
 * being set to the position of the failed limit.
 */
static acct_policy_tres_usage_t _validate_tres_usage_limits_for_assoc(
	int *tres_pos,
	uint64_t *tres_limit_array,
	uint64_t *qos_tres_limit_array,
	uint64_t *tres_req_cnt,
	uint64_t *tres_usage,
	uint64_t *curr_usage,
	uint16_t *admin_limit_set,
	bool safe_limits)
{
	return _validate_tres_usage_limits(tres_pos,
					   tres_limit_array,
					   qos_tres_limit_array,
					   tres_req_cnt,
					   tres_usage,
					   curr_usage,
					   admin_limit_set,
					   safe_limits,
					   false);
}

static int _qos_policy_validate(job_desc_msg_t *job_desc,
				slurmdb_assoc_rec_t *assoc_ptr,
				part_record_t *part_ptr,
				slurmdb_qos_rec_t *qos_ptr,
				slurmdb_qos_rec_t *qos_out_ptr,
				uint32_t *reason,
				acct_policy_limit_set_t *acct_policy_limit_set,
				bool update_call,
				char *user_name,
				int job_cnt,
				bool strict_checking)
{
	int rc = true;
	int tres_pos = 0;

	if (!qos_ptr || !qos_out_ptr)
		return rc;

	if (!_validate_tres_limits_for_qos(&tres_pos,
					   job_desc->tres_req_cnt, 0,
					   NULL,
					   qos_ptr->max_tres_pa_ctld,
					   NULL,
					   qos_out_ptr->max_tres_pa_ctld,
					   acct_policy_limit_set->tres,
					   strict_checking, 1)) {
		if (job_desc->tres_req_cnt[tres_pos] >
		    qos_ptr->max_tres_pa_ctld[tres_pos]) {
			if (reason)
				*reason = _get_tres_state_reason(
					tres_pos, WAIT_QOS_MAX_UNK_PER_ACCT);

			debug2("job submit for user %s(%u): "
			       "min tres(%s) request %"PRIu64" exceeds "
			       "per-acct max tres limit %"PRIu64" for qos '%s'",
			       user_name,
			       job_desc->user_id,
			       assoc_mgr_tres_name_array[tres_pos],
			       job_desc->tres_req_cnt[tres_pos],
			       qos_ptr->max_tres_pa_ctld[tres_pos],
			       qos_ptr->name);
			rc = false;
			goto end_it;
		}
	}

	if (!_validate_tres_limits_for_qos(&tres_pos,
					   job_desc->tres_req_cnt, 0,
					   qos_ptr->grp_tres_ctld,
					   qos_ptr->max_tres_pu_ctld,
					   qos_out_ptr->grp_tres_ctld,
					   qos_out_ptr->max_tres_pu_ctld,
					   acct_policy_limit_set->tres,
					   strict_checking, 1)) {
		if (job_desc->tres_req_cnt[tres_pos] >
		    qos_ptr->max_tres_pu_ctld[tres_pos]) {
			if (reason)
				*reason = _get_tres_state_reason(
					tres_pos, WAIT_QOS_MAX_UNK_PER_USER);

			debug2("job submit for user %s(%u): "
			       "min tres(%s) request %"PRIu64" exceeds "
			       "per-user max tres limit %"PRIu64" for qos '%s'",
			       user_name,
			       job_desc->user_id,
			       assoc_mgr_tres_name_array[tres_pos],
			       job_desc->tres_req_cnt[tres_pos],
			       qos_ptr->max_tres_pu_ctld[tres_pos],
			       qos_ptr->name);
			rc = false;
			goto end_it;
		} else if (job_desc->tres_req_cnt[tres_pos] >
			   qos_ptr->grp_tres_ctld[tres_pos]) {
			if (reason)
				*reason = _get_tres_state_reason(
					tres_pos, WAIT_QOS_GRP_UNK);

			debug2("job submit for user %s(%u): "
			       "min tres(%s) request %"PRIu64" exceeds "
			       "group max tres limit %"PRIu64" for qos '%s'",
			       user_name,
			       job_desc->user_id,
			       assoc_mgr_tres_name_array[tres_pos],
			       job_desc->tres_req_cnt[tres_pos],
			       qos_ptr->grp_tres_ctld[tres_pos],
			       qos_ptr->name);
			rc = false;
			goto end_it;
		}
	}

	/* for validation we don't need to look at
	 * qos_ptr->grp_jobs.
	 */

	if ((qos_out_ptr->grp_submit_jobs == INFINITE) &&
	    (qos_ptr->grp_submit_jobs != INFINITE)) {

		qos_out_ptr->grp_submit_jobs = qos_ptr->grp_submit_jobs;

		if ((qos_ptr->usage->grp_used_submit_jobs + job_cnt)
		    > qos_ptr->grp_submit_jobs) {
			if (reason)
				*reason = WAIT_QOS_GRP_SUB_JOB;
			debug2("job submit for user %s(%u): group max submit job limit exceeded %u (used:%u + requested:%d) for qos '%s'",
			       user_name,
			       job_desc->user_id,
			       qos_ptr->grp_submit_jobs,
			       qos_ptr->usage->grp_used_submit_jobs, job_cnt,
			       qos_ptr->name);
			rc = false;
			goto end_it;
		}
	}

	/* Only check the time_limits if the admin didn't set the timelimit.
	 * It is important we look at these even if strict_checking
	 * isn't set so we get the correct time_limit from the job.
	 */
	if (acct_policy_limit_set->time != ADMIN_SET_LIMIT) {
		if (!_validate_tres_time_limits(
			    &tres_pos,
			    &job_desc->time_limit,
			    part_ptr->max_time,
			    job_desc->tres_req_cnt,
			    qos_ptr->max_tres_mins_pj_ctld,
			    qos_out_ptr->max_tres_mins_pj_ctld,
			    &acct_policy_limit_set->time,
			    strict_checking)) {
			if (reason)
				*reason = _get_tres_state_reason(
					tres_pos,
					WAIT_QOS_MAX_UNK_MINS_PER_JOB);
			debug2("job submit for user %s(%u): "
			       "tres(%s) time limit request %"PRIu64" "
			       "exceeds max per-job limit %"PRIu64" "
			       "for qos '%s'",
			       user_name,
			       job_desc->user_id,
			       assoc_mgr_tres_name_array[tres_pos],
			       ((uint64_t)job_desc->time_limit *
				job_desc->tres_req_cnt[tres_pos]),
			       qos_ptr->max_tres_mins_pj_ctld[tres_pos],
			       qos_ptr->name);
			rc = false;
			goto end_it;
		}

		if (!_validate_tres_time_limits(
			    &tres_pos,
			    &job_desc->time_limit,
			    part_ptr->max_time,
			    job_desc->tres_req_cnt,
			    qos_ptr->grp_tres_mins_ctld,
			    qos_out_ptr->grp_tres_mins_ctld,
			    &acct_policy_limit_set->time,
			    strict_checking)) {
			if (reason)
				*reason = _get_tres_state_reason(
					tres_pos, WAIT_QOS_GRP_UNK_MIN);
			debug2("job submit for user %s(%u): "
			       "tres(%s) time limit request %"PRIu64" "
			       "exceeds group max limit %"PRIu64" "
			       "for qos '%s'",
			       user_name,
			       job_desc->user_id,
			       assoc_mgr_tres_name_array[tres_pos],
			       ((uint64_t)job_desc->time_limit *
				job_desc->tres_req_cnt[tres_pos]),
			       qos_ptr->grp_tres_mins_ctld[tres_pos],
			       qos_ptr->name);
			rc = false;
			goto end_it;
		}

		if (!_validate_tres_time_limits(
			    &tres_pos,
			    &job_desc->time_limit,
			    part_ptr->max_time,
			    job_desc->tres_req_cnt,
			    qos_ptr->grp_tres_run_mins_ctld,
			    qos_out_ptr->grp_tres_run_mins_ctld,
			    &acct_policy_limit_set->time,
			    strict_checking)) {
			if (reason)
				*reason = _get_tres_state_reason(
					tres_pos, WAIT_QOS_GRP_UNK_RUN_MIN);
			debug2("job submit for user %s(%u): "
			       "tres(%s) time limit request %"PRIu64" "
			       "exceeds group max running limit %"PRIu64" "
			       "for qos '%s'",
			       user_name,
			       job_desc->user_id,
			       assoc_mgr_tres_name_array[tres_pos],
			       ((uint64_t)job_desc->time_limit *
				job_desc->tres_req_cnt[tres_pos]),
			       qos_ptr->grp_tres_run_mins_ctld[tres_pos],
			       qos_ptr->name);
			rc = false;
			goto end_it;
		}

		if (!_validate_tres_time_limits(
			    &tres_pos,
			    &job_desc->time_limit,
			    part_ptr->max_time,
			    job_desc->tres_req_cnt,
			    qos_ptr->max_tres_run_mins_pa_ctld,
			    qos_out_ptr->max_tres_run_mins_pa_ctld,
			    &acct_policy_limit_set->time,
			    strict_checking)) {
			if (reason)
				*reason = _get_tres_state_reason(
					tres_pos,
					WAIT_QOS_MAX_UNK_RUN_MINS_PER_ACCT);
			debug2("job submit for user %s(%u): tres(%s) time limit request %"PRIu64"exceeds account max running limit %"PRIu64"for qos '%s'",
			       user_name,
			       job_desc->user_id,
			       assoc_mgr_tres_name_array[tres_pos],
			       ((uint64_t)job_desc->time_limit *
				job_desc->tres_req_cnt[tres_pos]),
			       qos_ptr->max_tres_run_mins_pa_ctld[tres_pos],
			       qos_ptr->name);
			rc = false;
			goto end_it;
		}

		if (!_validate_tres_time_limits(
			    &tres_pos,
			    &job_desc->time_limit,
			    part_ptr->max_time,
			    job_desc->tres_req_cnt,
			    qos_ptr->max_tres_run_mins_pu_ctld,
			    qos_out_ptr->max_tres_run_mins_pu_ctld,
			    &acct_policy_limit_set->time,
			    strict_checking)) {
			if (reason)
				*reason = _get_tres_state_reason(
					tres_pos,
					WAIT_QOS_MAX_UNK_RUN_MINS_PER_USER);
			debug2("job submit for user %s(%u): tres(%s) time limit request %"PRIu64"exceeds user max running limit %"PRIu64"for qos '%s'",
			       user_name,
			       job_desc->user_id,
			       assoc_mgr_tres_name_array[tres_pos],
			       ((uint64_t)job_desc->time_limit *
				job_desc->tres_req_cnt[tres_pos]),
			       qos_ptr->max_tres_run_mins_pu_ctld[tres_pos],
			       qos_ptr->name);
			rc = false;
			goto end_it;
		}

		if ((qos_out_ptr->max_wall_pj == INFINITE) &&
		    (qos_ptr->max_wall_pj != INFINITE) &&
		    (!update_call || (job_desc->time_limit != NO_VAL))) {
			_set_time_limit(&job_desc->time_limit,
					part_ptr->max_time,
					qos_ptr->max_wall_pj,
					&acct_policy_limit_set->time);
			qos_out_ptr->max_wall_pj = qos_ptr->max_wall_pj;

			if (strict_checking
			    && job_desc->time_limit > qos_ptr->max_wall_pj) {
				if (reason)
					*reason = WAIT_QOS_MAX_WALL_PER_JOB;
				debug2("job submit for user %s(%u): "
				       "time limit %u exceeds qos max %u",
				       user_name,
				       job_desc->user_id,
				       job_desc->time_limit,
				       qos_ptr->max_wall_pj);
				rc = false;
				goto end_it;
			}
		}

		if ((qos_out_ptr->grp_wall == INFINITE) &&
		    (qos_ptr->grp_wall != INFINITE) &&
		    (!update_call || (job_desc->time_limit != NO_VAL))) {
			_set_time_limit(&job_desc->time_limit,
					part_ptr->max_time,
					qos_ptr->grp_wall,
					&acct_policy_limit_set->time);

			qos_out_ptr->grp_wall = qos_ptr->grp_wall;

			if (strict_checking
			    && job_desc->time_limit > qos_ptr->grp_wall) {
				if (reason)
					*reason = WAIT_QOS_GRP_WALL;
				debug2("job submit for user %s(%u): "
				       "time limit %u exceeds qos grp max %u",
				       user_name,
				       job_desc->user_id,
				       job_desc->time_limit,
				       qos_ptr->grp_wall);
				rc = false;
				goto end_it;
			}
		}
	}

	if (!_validate_tres_limits_for_qos(&tres_pos,
					   job_desc->tres_req_cnt, 0,
					   NULL,
					   qos_ptr->max_tres_pj_ctld,
					   NULL,
					   qos_out_ptr->max_tres_pj_ctld,
					   acct_policy_limit_set->tres,
					   strict_checking, 1)) {
		if (reason)
			*reason = _get_tres_state_reason(
				tres_pos, WAIT_QOS_MAX_UNK_PER_JOB);

		debug2("job submit for user %s(%u): "
		       "min tres(%s) request %"PRIu64" exceeds "
		       "per-job max tres limit %"PRIu64" for qos '%s'",
		       user_name,
		       job_desc->user_id,
		       assoc_mgr_tres_name_array[tres_pos],
		       job_desc->tres_req_cnt[tres_pos],
		       qos_ptr->max_tres_pj_ctld[tres_pos],
		       qos_ptr->name);
		rc = false;
		goto end_it;
	}

	if (!_validate_tres_limits_for_qos(&tres_pos,
					   job_desc->tres_req_cnt,
					   job_desc->tres_req_cnt[
						   TRES_ARRAY_NODE],
					   NULL,
					   qos_ptr->max_tres_pn_ctld,
					   NULL,
					   qos_out_ptr->max_tres_pn_ctld,
					   acct_policy_limit_set->tres,
					   strict_checking, 1)) {
		if (reason)
			*reason = _get_tres_state_reason(
				tres_pos, WAIT_QOS_MAX_UNK_PER_NODE);

		debug2("job submit for user %s(%u): "
		       "min tres(%s) request %"PRIu64" exceeds "
		       "per-node max tres limit %"PRIu64" for qos '%s'",
		       user_name,
		       job_desc->user_id,
		       assoc_mgr_tres_name_array[tres_pos],
		       job_desc->tres_req_cnt[tres_pos] /
		       job_desc->tres_req_cnt[TRES_ARRAY_NODE],
		       qos_ptr->max_tres_pn_ctld[tres_pos],
		       qos_ptr->name);
		rc = false;
		goto end_it;
	}

	/* for validation we don't need to look at
	 * qos_ptr->max_jobs.
	 */

	/* we don't need to check min_tres_pj here */

	if ((qos_out_ptr->max_submit_jobs_pa == INFINITE) &&
	    (qos_ptr->max_submit_jobs_pa != INFINITE)) {
		slurmdb_used_limits_t *used_limits =
			acct_policy_get_acct_used_limits(
				&qos_ptr->usage->acct_limit_list,
				assoc_ptr->acct);

		qos_out_ptr->max_submit_jobs_pa = qos_ptr->max_submit_jobs_pa;

		if ((used_limits->submit_jobs + job_cnt) >
		    qos_ptr->max_submit_jobs_pa) {
			if (reason)
				*reason = WAIT_QOS_MAX_SUB_JOB_PER_ACCT;
			debug2("job submit for account %s: qos max submit job limit exceeded %u (used:%u + requested:%d) for qos '%s'",
			       assoc_ptr->acct,
			       qos_ptr->max_submit_jobs_pa,
			       used_limits->submit_jobs, job_cnt,
			       qos_ptr->name);
			rc = false;
			goto end_it;
		}
	}

	if ((qos_out_ptr->max_submit_jobs_pu == INFINITE) &&
	    (qos_ptr->max_submit_jobs_pu != INFINITE)) {
		slurmdb_used_limits_t *used_limits =
			acct_policy_get_user_used_limits(
				&qos_ptr->usage->user_limit_list,
				job_desc->user_id);

		qos_out_ptr->max_submit_jobs_pu = qos_ptr->max_submit_jobs_pu;

		if ((used_limits->submit_jobs + job_cnt) >
		     qos_ptr->max_submit_jobs_pu) {
			if (reason)
				*reason = WAIT_QOS_MAX_SUB_JOB;
			debug2("job submit for user %s(%u): qos max submit job limit exceeded %u (used:%u + requested:%d) for qos '%s'",
			       user_name,
			       job_desc->user_id,
			       qos_ptr->max_submit_jobs_pu,
			       used_limits->submit_jobs, job_cnt,
			       qos_ptr->name);
			rc = false;
			goto end_it;
		}
	}

	if (!_validate_tres_limits_for_qos(&tres_pos,
					   job_desc->tres_req_cnt, 0,
					   NULL,
					   qos_ptr->min_tres_pj_ctld,
					   NULL,
					   qos_out_ptr->min_tres_pj_ctld,
					   acct_policy_limit_set->tres,
					   strict_checking, 0)) {
		if (reason)
			*reason = _get_tres_state_reason(
				tres_pos, WAIT_QOS_MIN_UNK);

		debug2("job submit for user %s(%u): "
		       "min tres(%s) request %"PRIu64" exceeds "
		       "per-job max tres limit %"PRIu64" for qos '%s'",
		       user_name,
		       job_desc->user_id,
		       assoc_mgr_tres_name_array[tres_pos],
		       job_desc->tres_req_cnt[tres_pos],
		       qos_ptr->min_tres_pj_ctld[tres_pos],
		       qos_ptr->name);
		rc = false;
		goto end_it;
	}

end_it:
	return rc;
}

static int _qos_job_runnable_pre_select(job_record_t *job_ptr,
					slurmdb_qos_rec_t *qos_ptr,
					slurmdb_qos_rec_t *qos_out_ptr)
{
	uint32_t wall_mins;
	uint32_t time_limit = NO_VAL;
	int rc = true;
	slurmdb_used_limits_t *used_limits = NULL, *used_limits_a = NULL;
	bool safe_limits = false;
	slurmdb_assoc_rec_t *assoc_ptr = job_ptr->assoc_ptr;

	if (!qos_ptr || !qos_out_ptr || !assoc_ptr)
		return rc;

	/*
	 * check to see if we should be using safe limits, if so we
	 * will only start a job if there are sufficient remaining
	 * cpu-minutes for it to run to completion
	 */
	if (accounting_enforce & ACCOUNTING_ENFORCE_SAFE)
		safe_limits = true;

	wall_mins = qos_ptr->usage->grp_used_wall / 60;

	used_limits_a =	acct_policy_get_acct_used_limits(
		&qos_ptr->usage->acct_limit_list,
		assoc_ptr->acct);

	used_limits = acct_policy_get_user_used_limits(
		&qos_ptr->usage->user_limit_list,
		job_ptr->user_id);


	/* we don't need to check grp_tres_mins here */

	/* we don't need to check grp_tres here */

	/* we don't need to check grp_mem here */
	if ((qos_out_ptr->grp_jobs == INFINITE) &&
	    (qos_ptr->grp_jobs != INFINITE)) {

		qos_out_ptr->grp_jobs = qos_ptr->grp_jobs;

		if (qos_ptr->usage->grp_used_jobs >= qos_ptr->grp_jobs) {
			xfree(job_ptr->state_desc);
			job_ptr->state_reason = WAIT_QOS_GRP_JOB;
			debug2("%pJ being held, the job is at or exceeds group max jobs limit %u with %u for QOS %s",
			       job_ptr, qos_ptr->grp_jobs,
			       qos_ptr->usage->grp_used_jobs, qos_ptr->name);

			rc = false;
			goto end_it;
		}
	}

	/* we don't need to check grp_submit_jobs here */

	/* we don't need to check grp_tres_run_mins here */

	/* we don't need to check grp_nodes here */

	/* we don't need to check submit_jobs here */

	if ((job_ptr->limit_set.time != ADMIN_SET_LIMIT)
	    && (qos_out_ptr->grp_wall == INFINITE)
	    && (qos_ptr->grp_wall != INFINITE)) {
		if (time_limit == NO_VAL) {
			time_limit = job_ptr->time_limit;
			_set_time_limit(&time_limit,
					job_ptr->part_ptr->max_time,
					MIN(qos_ptr->grp_wall,
					    qos_ptr->max_wall_pj),
					&job_ptr->limit_set.time);

			/* Account for usage factor, if necessary */
			if ((job_ptr->qos_ptr &&
			     (job_ptr->qos_ptr->flags &
			      QOS_FLAG_USAGE_FACTOR_SAFE) &&
			     (job_ptr->qos_ptr->usage_factor >= 0)) &&
			    ((time_limit != INFINITE) ||
			     (job_ptr->qos_ptr->usage_factor < 1.0))) {
				time_limit *= job_ptr->qos_ptr->usage_factor;
			}
		}

		qos_out_ptr->grp_wall = qos_ptr->grp_wall;

		if (wall_mins >= qos_ptr->grp_wall) {
			xfree(job_ptr->state_desc);
			job_ptr->state_reason = WAIT_QOS_GRP_WALL;
			debug2("%pJ being held, the job is at or exceeds group wall limit %u with %u for QOS %s",
			       job_ptr, qos_ptr->grp_wall,
			       wall_mins, qos_ptr->name);
			rc = false;
			goto end_it;
		} else if (safe_limits &&
			   ((wall_mins + time_limit) > qos_ptr->grp_wall)) {
			xfree(job_ptr->state_desc);
			job_ptr->state_reason = WAIT_QOS_GRP_WALL;
			debug2("%pJ being held, the job request will exceed group wall limit %u if ran with %u for QOS %s",
			       job_ptr, qos_ptr->grp_wall,
			       wall_mins + time_limit, qos_ptr->name);
			rc = false;
			goto end_it;
		}
	}

	/* we don't need to check max_tres_mins_pj here */

	/* we don't need to check max_tres_pj here */

	/* we don't need to check max_tres_pn here */

	/* we don't need to check min_tres_pj here */

	/* we don't need to check max_tres_pa here */

	/* we don't need to check max_tres_pu here */

	/* we don't need to check max_tres_run_mins_pa here */

	/* we don't need to check max_tres_run_mins_pu here */

	if ((qos_out_ptr->max_jobs_pa == INFINITE)
	    && (qos_ptr->max_jobs_pa != INFINITE)) {

		qos_out_ptr->max_jobs_pa = qos_ptr->max_jobs_pa;

		if (used_limits_a->jobs >= qos_ptr->max_jobs_pa) {
			xfree(job_ptr->state_desc);
			job_ptr->state_reason =
				WAIT_QOS_MAX_JOB_PER_ACCT;
			debug2("%pJ being held, the job is at or exceeds max jobs per-acct (%s) limit %u with %u for QOS %s",
			       job_ptr, used_limits_a->acct,
			       qos_ptr->max_jobs_pa,
			       used_limits_a->jobs, qos_ptr->name);
			rc = false;
			goto end_it;
		}
	}

	if ((qos_out_ptr->max_jobs_pu == INFINITE)
	    && (qos_ptr->max_jobs_pu != INFINITE)) {

		qos_out_ptr->max_jobs_pu = qos_ptr->max_jobs_pu;

		if (used_limits->jobs >= qos_ptr->max_jobs_pu) {
			xfree(job_ptr->state_desc);
			job_ptr->state_reason =
				WAIT_QOS_MAX_JOB_PER_USER;
			debug2("%pJ being held, the job is at or exceeds max jobs per-user limit %u with %u for QOS %s",
			       job_ptr, qos_ptr->max_jobs_pu,
			       used_limits->jobs, qos_ptr->name);
			rc = false;
			goto end_it;
		}
	}

	/* we don't need to check submit_jobs_pa here */

	/* we don't need to check submit_jobs_pu here */

	/*
	 * if the QOS limits have changed since job
	 * submission and job can not run, then kill it
	 */
	if ((job_ptr->limit_set.time != ADMIN_SET_LIMIT)
	    && (qos_out_ptr->max_wall_pj == INFINITE)
	    && (qos_ptr->max_wall_pj != INFINITE)) {
		if (time_limit == NO_VAL) {
			time_limit = job_ptr->time_limit;
			_set_time_limit(&time_limit,
					job_ptr->part_ptr->max_time,
					qos_ptr->max_wall_pj,
					&job_ptr->limit_set.time);
		}

		/* Account for usage factor, if necessary */
		if ((job_ptr->qos_ptr &&
		     (job_ptr->qos_ptr->flags &
		      QOS_FLAG_USAGE_FACTOR_SAFE) &&
		     (job_ptr->qos_ptr->usage_factor >= 0)) &&
		    ((time_limit != INFINITE) ||
		     (job_ptr->qos_ptr->usage_factor < 1.0))) {
			time_limit *= job_ptr->qos_ptr->usage_factor;
		}

		qos_out_ptr->max_wall_pj = qos_ptr->max_wall_pj;

		if (time_limit > qos_out_ptr->max_wall_pj) {
			xfree(job_ptr->state_desc);
			job_ptr->state_reason =
				WAIT_QOS_MAX_WALL_PER_JOB;
			debug2("%pJ being held, time limit %u exceeds QOS max wall pj %u",
			       job_ptr, time_limit, qos_out_ptr->max_wall_pj);
			rc = false;
			goto end_it;
		}
	}
end_it:

	return rc;
}

static int _qos_job_runnable_post_select(job_record_t *job_ptr,
					 slurmdb_qos_rec_t *qos_ptr,
					 slurmdb_qos_rec_t *qos_out_ptr,
					 uint64_t *tres_req_cnt,
					 uint64_t *job_tres_time_limit)
{
	uint64_t tres_usage_mins[slurmctld_tres_cnt];
	uint64_t tres_run_mins[slurmctld_tres_cnt];
	uint64_t tres_run_mins_pa[slurmctld_tres_cnt];
	uint64_t tres_run_mins_pu[slurmctld_tres_cnt];
	uint64_t orig_node_cnt;
	slurmdb_used_limits_t *used_limits = NULL, *used_limits_a = NULL;
	bool safe_limits = false;
	int rc = true, i, tres_pos = 0;
	acct_policy_tres_usage_t tres_usage;
	slurmdb_assoc_rec_t *assoc_ptr = job_ptr->assoc_ptr;
	double usage_factor = 1.0;

	if (!qos_ptr || !qos_out_ptr || !assoc_ptr)
		return rc;

	/*
	 * check to see if we should be using safe limits, if so we will only
	 * will only start a job if there are sufficient remaining cpu-minutes
	 * for it to run to completion
	 */
	if (accounting_enforce & ACCOUNTING_ENFORCE_SAFE)
		safe_limits = true;

	used_limits_a =	acct_policy_get_acct_used_limits(
		&qos_ptr->usage->acct_limit_list,
		assoc_ptr->acct);

	used_limits = acct_policy_get_user_used_limits(
		&qos_ptr->usage->user_limit_list,
		job_ptr->user_id);


	/* clang needs this memset to avoid a warning */
	memset(tres_run_mins, 0, sizeof(tres_run_mins));
	memset(tres_run_mins_pa, 0, sizeof(tres_run_mins_pa));
	memset(tres_run_mins_pu, 0, sizeof(tres_run_mins_pu));
	memset(tres_usage_mins, 0, sizeof(tres_usage_mins));
	if (job_ptr->qos_ptr &&
	    (job_ptr->qos_ptr->usage_factor >= 0))
		usage_factor = job_ptr->qos_ptr->usage_factor;
	for (i=0; i<slurmctld_tres_cnt; i++) {
		tres_run_mins[i] =
			qos_ptr->usage->grp_used_tres_run_secs[i] / 60;
		tres_run_mins_pa[i] =
			used_limits_a->tres_run_secs[i] / 60;
		tres_run_mins_pu[i] =
			used_limits->tres_run_secs[i] / 60;
		tres_usage_mins[i] =
			(uint64_t)(qos_ptr->usage->usage_tres_raw[i] / 60.0);

		/*
		 * Clear usage if factor is 0 so that jobs can run. Otherwise
		 * multiplying can cause more jobs to be run than the limit
		 * allows (e.g. usagefactor=.5).
		 */
		if (usage_factor == 0.0) {
			tres_run_mins[i] *= usage_factor;
			tres_run_mins_pa[i] *= usage_factor;
			tres_usage_mins[i] *= usage_factor;
		}
	}

	tres_usage = _validate_tres_usage_limits_for_qos(
		&tres_pos, qos_ptr->grp_tres_mins_ctld,
		qos_out_ptr->grp_tres_mins_ctld, job_tres_time_limit,
		tres_run_mins, tres_usage_mins, job_ptr->limit_set.tres,
		safe_limits);
	switch (tres_usage) {
	case TRES_USAGE_CUR_EXCEEDS_LIMIT:
		xfree(job_ptr->state_desc);
		job_ptr->state_reason = _get_tres_state_reason(
			tres_pos, WAIT_QOS_GRP_UNK_MIN);
		debug2("%pJ being held, QOS %s group max tres(%s) minutes limit of %"PRIu64" is already at or exceeded with %"PRIu64,
		       job_ptr, qos_ptr->name,
		       assoc_mgr_tres_name_array[tres_pos],
		       qos_ptr->grp_tres_mins_ctld[tres_pos],
		       tres_usage_mins[tres_pos]);
		rc = false;
		goto end_it;
		break;
	case TRES_USAGE_REQ_EXCEEDS_LIMIT:
		xfree(job_ptr->state_desc);
		job_ptr->state_reason = _get_tres_state_reason(
			tres_pos, WAIT_QOS_GRP_UNK_MIN);
		debug2("%pJ being held, the job is requesting more than allowed with QOS %s's group max tres(%s) minutes of %"PRIu64" with %"PRIu64,
		       job_ptr, qos_ptr->name,
		       assoc_mgr_tres_name_array[tres_pos],
		       qos_ptr->grp_tres_mins_ctld[tres_pos],
		       job_tres_time_limit[tres_pos]);
		rc = false;
		goto end_it;
		break;
	case TRES_USAGE_REQ_NOT_SAFE_WITH_USAGE:
		/*
		 * If we're using safe limits start
		 * the job only if there are
		 * sufficient cpu-mins left such that
		 * it will run to completion without
		 * being killed
		 */
		xfree(job_ptr->state_desc);
		job_ptr->state_reason = _get_tres_state_reason(
			tres_pos, WAIT_QOS_GRP_UNK_MIN);
		debug2("%pJ being held, the job is at or exceeds QOS %s's group max tres(%s) minutes of %"PRIu64" of which %"PRIu64" are still available but request is for %"PRIu64" (plus %"PRIu64" already in use) tres minutes (request tres count %"PRIu64")",
		       job_ptr, qos_ptr->name,
		       assoc_mgr_tres_name_array[tres_pos],
		       qos_ptr->grp_tres_mins_ctld[tres_pos],
		       qos_ptr->grp_tres_mins_ctld[tres_pos] -
		       tres_usage_mins[tres_pos],
		       job_tres_time_limit[tres_pos],
		       tres_run_mins[tres_pos],
		       tres_req_cnt[tres_pos]);
		rc = false;
		goto end_it;
		break;
	case TRES_USAGE_OKAY:
		/* all good */
		break;
	}

	/*
	 * If the job's CPU limit wasn't administratively set and the QOS
	 * has a GrpCPU limit, cancel the job if its minimum CPU requirement
	 * has exceeded the limit for all CPUs usable by the QOS
	 */
	orig_node_cnt = tres_req_cnt[TRES_ARRAY_NODE];
	_get_unique_job_node_cnt(job_ptr, qos_ptr->usage->grp_node_bitmap,
				 &tres_req_cnt[TRES_ARRAY_NODE]);
	tres_usage = _validate_tres_usage_limits_for_qos(
		&tres_pos,
		qos_ptr->grp_tres_ctld,	qos_out_ptr->grp_tres_ctld,
		tres_req_cnt, qos_ptr->usage->grp_used_tres,
		NULL, job_ptr->limit_set.tres, true);
	tres_req_cnt[TRES_ARRAY_NODE] = orig_node_cnt;
	switch (tres_usage) {
	case TRES_USAGE_CUR_EXCEEDS_LIMIT:
		/* not possible because the curr_usage sent in is NULL */
		break;
	case TRES_USAGE_REQ_EXCEEDS_LIMIT:
		xfree(job_ptr->state_desc);
		job_ptr->state_reason = _get_tres_state_reason(
			tres_pos, WAIT_QOS_GRP_UNK);
		debug2("%pJ is being held, QOS %s min tres(%s) request %"PRIu64" exceeds group max tres limit %"PRIu64,
		       job_ptr, qos_ptr->name,
		       assoc_mgr_tres_name_array[tres_pos],
		       tres_req_cnt[tres_pos],
		       qos_ptr->grp_tres_ctld[tres_pos]);
		rc = false;
		goto end_it;
		break;
	case TRES_USAGE_REQ_NOT_SAFE_WITH_USAGE:
		xfree(job_ptr->state_desc);
		job_ptr->state_reason = _get_tres_state_reason(
			tres_pos, WAIT_QOS_GRP_UNK);
		debug2("%pJ being held, if allowed the job request will exceed QOS %s group max tres(%s) limit %"PRIu64" with already used %"PRIu64" + requested %"PRIu64,
		       job_ptr, qos_ptr->name,
		       assoc_mgr_tres_name_array[tres_pos],
		       qos_ptr->grp_tres_ctld[tres_pos],
		       qos_ptr->usage->grp_used_tres[tres_pos],
		       tres_req_cnt[tres_pos]);
		rc = false;
		goto end_it;
	case TRES_USAGE_OKAY:
		/* all good */
		break;
	}

	/* we don't need to check grp_jobs here */

	/* we don't need to check grp_submit_jobs here */

	tres_usage = _validate_tres_usage_limits_for_qos(
		&tres_pos,
		qos_ptr->grp_tres_run_mins_ctld,
		qos_out_ptr->grp_tres_run_mins_ctld,
		job_tres_time_limit, tres_run_mins, NULL, NULL, true);
	switch (tres_usage) {
	case TRES_USAGE_CUR_EXCEEDS_LIMIT:
		/* not possible because the curr_usage sent in is NULL */
		break;
	case TRES_USAGE_REQ_EXCEEDS_LIMIT:
		xfree(job_ptr->state_desc);
		job_ptr->state_reason = _get_tres_state_reason(
			tres_pos, WAIT_QOS_GRP_UNK_RUN_MIN);
		debug2("%pJ is being held, QOS %s group max running tres(%s) minutes request %"PRIu64" exceeds limit %"PRIu64,
		       job_ptr, qos_ptr->name,
		       assoc_mgr_tres_name_array[tres_pos],
		       job_tres_time_limit[tres_pos],
		       qos_ptr->grp_tres_run_mins_ctld[tres_pos]);
		rc = false;
		goto end_it;
		break;
	case TRES_USAGE_REQ_NOT_SAFE_WITH_USAGE:
		xfree(job_ptr->state_desc);
		job_ptr->state_reason = _get_tres_state_reason(
			tres_pos, WAIT_QOS_GRP_UNK_RUN_MIN);
		debug2("%pJ being held, if allowed the job request will exceed QOS %s group max running tres(%s) minutes limit %"PRIu64" with already used %"PRIu64" + requested %"PRIu64,
		       job_ptr, qos_ptr->name,
		       assoc_mgr_tres_name_array[tres_pos],
		       qos_ptr->grp_tres_run_mins_ctld[tres_pos],
		       tres_run_mins[tres_pos],
		       job_tres_time_limit[tres_pos]);
		rc = false;
		goto end_it;
		break;
	case TRES_USAGE_OKAY:
		/* all good */
		break;
	}

	/* we don't need to check submit_jobs here */

	/* we don't need to check grp_wall here */

	if (!_validate_tres_limits_for_qos(&tres_pos,
					   job_tres_time_limit, 0,
					   NULL,
					   qos_ptr->max_tres_mins_pj_ctld,
					   NULL,
					   qos_out_ptr->max_tres_mins_pj_ctld,
					   job_ptr->limit_set.tres,
					   1, 1)) {
		xfree(job_ptr->state_desc);
		job_ptr->state_reason = _get_tres_state_reason(
			tres_pos, WAIT_QOS_MAX_UNK_MINS_PER_JOB);
		debug2("%pJ being held, the job is requesting more than allowed with QOS %s's max tres(%s) minutes of %"PRIu64" with %"PRIu64,
		       job_ptr, qos_ptr->name,
		       assoc_mgr_tres_name_array[tres_pos],
		       qos_ptr->max_tres_mins_pj_ctld[tres_pos],
		       job_tres_time_limit[tres_pos]);
		rc = false;
		goto end_it;
	}

	if (!_validate_tres_limits_for_qos(&tres_pos,
					   tres_req_cnt, 0,
					   NULL,
					   qos_ptr->max_tres_pj_ctld,
					   NULL,
					   qos_out_ptr->max_tres_pj_ctld,
					   job_ptr->limit_set.tres,
					   1, 1)) {
		xfree(job_ptr->state_desc);
		job_ptr->state_reason = _get_tres_state_reason(
			tres_pos, WAIT_QOS_MAX_UNK_PER_JOB);
		debug2("%pJ is being held, QOS %s min tres(%s) per job request %"PRIu64" exceeds max tres limit %"PRIu64,
		       job_ptr, qos_ptr->name,
		       assoc_mgr_tres_name_array[tres_pos],
		       tres_req_cnt[tres_pos],
		       qos_ptr->max_tres_pj_ctld[tres_pos]);
		rc = false;
		goto end_it;
	}

	if (!_validate_tres_limits_for_qos(&tres_pos,
					   tres_req_cnt,
					   tres_req_cnt[TRES_ARRAY_NODE],
					   NULL,
					   qos_ptr->max_tres_pn_ctld,
					   NULL,
					   qos_out_ptr->max_tres_pn_ctld,
					   job_ptr->limit_set.tres,
					   1, 1)) {
		uint64_t req_per_node;
		xfree(job_ptr->state_desc);
		job_ptr->state_reason = _get_tres_state_reason(
			tres_pos, WAIT_QOS_MAX_UNK_PER_NODE);
		req_per_node = tres_req_cnt[tres_pos];
		if (tres_req_cnt[TRES_ARRAY_NODE] > 1)
			req_per_node /= tres_req_cnt[TRES_ARRAY_NODE];
		debug2("%pJ is being held, QOS %s min tres(%s) per node request %"PRIu64" exceeds max tres limit %"PRIu64,
		       job_ptr, qos_ptr->name,
		       assoc_mgr_tres_name_array[tres_pos],
		       req_per_node,
		       qos_ptr->max_tres_pn_ctld[tres_pos]);
		rc = false;
		goto end_it;
	}

	if (!_validate_tres_limits_for_qos(&tres_pos,
					   tres_req_cnt, 0,
					   NULL,
					   qos_ptr->min_tres_pj_ctld,
					   NULL,
					   qos_out_ptr->min_tres_pj_ctld,
					   job_ptr->limit_set.tres,
					   1, 0)) {
		xfree(job_ptr->state_desc);
		job_ptr->state_reason = _get_tres_state_reason(
			tres_pos, WAIT_QOS_MIN_UNK);
		debug2("%pJ is being held, QOS %s min tres(%s) per job request %"PRIu64" exceeds min tres limit %"PRIu64,
		       job_ptr, qos_ptr->name,
		       assoc_mgr_tres_name_array[tres_pos],
		       tres_req_cnt[tres_pos],
		       qos_ptr->min_tres_pj_ctld[tres_pos]);
		rc = false;
		goto end_it;
	}

	orig_node_cnt = tres_req_cnt[TRES_ARRAY_NODE];
	_get_unique_job_node_cnt(job_ptr, used_limits_a->node_bitmap,
				 &tres_req_cnt[TRES_ARRAY_NODE]);
	tres_usage = _validate_tres_usage_limits_for_qos(
		&tres_pos,
		qos_ptr->max_tres_pa_ctld, qos_out_ptr->max_tres_pa_ctld,
		tres_req_cnt, used_limits_a->tres,
		NULL, job_ptr->limit_set.tres, true);
	tres_req_cnt[TRES_ARRAY_NODE] = orig_node_cnt;
	switch (tres_usage) {
	case TRES_USAGE_CUR_EXCEEDS_LIMIT:
		/* not possible because the curr_usage sent in is NULL */
		break;
	case TRES_USAGE_REQ_EXCEEDS_LIMIT:
		/*
		 * Hold the job if it exceeds the per-acct
		 * TRES limit for the given QOS
		 */
		xfree(job_ptr->state_desc);
		job_ptr->state_reason = _get_tres_state_reason(
			tres_pos, WAIT_QOS_MAX_UNK_PER_ACCT);
		debug2("%pJ is being held, QOS %s min tres(%s) request %"PRIu64" exceeds max tres per account (%s) limit %"PRIu64,
		       job_ptr, qos_ptr->name,
		       assoc_mgr_tres_name_array[tres_pos],
		       tres_req_cnt[tres_pos],
		       used_limits_a->acct,
		       qos_ptr->max_tres_pa_ctld[tres_pos]);
		rc = false;
		goto end_it;
		break;
	case TRES_USAGE_REQ_NOT_SAFE_WITH_USAGE:
		/*
		 * Hold the job if the user has exceeded the QOS per-user
		 * TRES limit with their current usage
		 */
		xfree(job_ptr->state_desc);
		job_ptr->state_reason = _get_tres_state_reason(
			tres_pos, WAIT_QOS_MAX_UNK_PER_ACCT);
		debug2("%pJ being held, if allowed the job request will exceed QOS %s max tres(%s) per account (%s) limit %"PRIu64" with already used %"PRIu64" + requested %"PRIu64,
		       job_ptr, qos_ptr->name,
		       assoc_mgr_tres_name_array[tres_pos],
		       used_limits_a->acct,
		       qos_ptr->max_tres_pa_ctld[tres_pos],
		       used_limits_a->tres[tres_pos],
		       tres_req_cnt[tres_pos]);
		rc = false;
		goto end_it;
	case TRES_USAGE_OKAY:
		/* all good */
		break;
	}

	orig_node_cnt = tres_req_cnt[TRES_ARRAY_NODE];
	_get_unique_job_node_cnt(job_ptr, used_limits->node_bitmap,
				 &tres_req_cnt[TRES_ARRAY_NODE]);
	tres_usage = _validate_tres_usage_limits_for_qos(
		&tres_pos,
		qos_ptr->max_tres_pu_ctld, qos_out_ptr->max_tres_pu_ctld,
		tres_req_cnt, used_limits->tres,
		NULL, job_ptr->limit_set.tres, true);
	tres_req_cnt[TRES_ARRAY_NODE] = orig_node_cnt;
	switch (tres_usage) {
	case TRES_USAGE_CUR_EXCEEDS_LIMIT:
		/* not possible because the curr_usage sent in is NULL */
		break;
	case TRES_USAGE_REQ_EXCEEDS_LIMIT:
		/*
		 * Hold the job if it exceeds the per-user
		 * TRES limit for the given QOS
		 */
		xfree(job_ptr->state_desc);
		job_ptr->state_reason = _get_tres_state_reason(
			tres_pos, WAIT_QOS_MAX_UNK_PER_USER);
		debug2("%pJ is being held, QOS %s min tres(%s) request %"PRIu64" exceeds max tres per user limit %"PRIu64,
		       job_ptr, qos_ptr->name,
		       assoc_mgr_tres_name_array[tres_pos],
		       tres_req_cnt[tres_pos],
		       qos_ptr->max_tres_pu_ctld[tres_pos]);
		rc = false;
		goto end_it;
		break;
	case TRES_USAGE_REQ_NOT_SAFE_WITH_USAGE:
		/*
		 * Hold the job if the user has exceeded the QOS
		 * per-user TRES limit with their current usage
		 */
		xfree(job_ptr->state_desc);
		job_ptr->state_reason = _get_tres_state_reason(
			tres_pos, WAIT_QOS_MAX_UNK_PER_USER);
		debug2("%pJ being held, if allowed the job request will exceed QOS %s max tres(%s) per user limit %"PRIu64" with already used %"PRIu64" + requested %"PRIu64,
		       job_ptr, qos_ptr->name,
		       assoc_mgr_tres_name_array[tres_pos],
		       qos_ptr->max_tres_pu_ctld[tres_pos],
		       used_limits->tres[tres_pos],
		       tres_req_cnt[tres_pos]);
		rc = false;
		goto end_it;
	case TRES_USAGE_OKAY:
		/* all good */
		break;
	}

	/* We do not need to check max_jobs_pa here */

	/* We do not need to check max_jobs_pu here */

	/* we don't need to check submit_jobs_pa here */

	/* we don't need to check submit_jobs_pu here */

	/* we don't need to check max_wall_pj here */

	tres_usage = _validate_tres_usage_limits_for_qos(
		&tres_pos, qos_ptr->max_tres_run_mins_pa_ctld,
		qos_out_ptr->max_tres_run_mins_pa_ctld, job_tres_time_limit,
		tres_run_mins_pa, NULL, NULL, true);
	switch (tres_usage) {
	case TRES_USAGE_CUR_EXCEEDS_LIMIT:
		/* not possible because the curr_usage sent in is NULL */
		break;
	case TRES_USAGE_REQ_EXCEEDS_LIMIT:
		xfree(job_ptr->state_desc);
		job_ptr->state_reason = _get_tres_state_reason(
			tres_pos, WAIT_QOS_MAX_UNK_RUN_MINS_PER_ACCT);
		debug2("%pJ is being held, QOS %s account max running tres(%s) minutes request %"PRIu64" exceeds limit %"PRIu64,
		       job_ptr, qos_ptr->name,
		       assoc_mgr_tres_name_array[tres_pos],
		       job_tres_time_limit[tres_pos],
		       qos_ptr->max_tres_run_mins_pa_ctld[tres_pos]);
		rc = false;
		goto end_it;
		break;
	case TRES_USAGE_REQ_NOT_SAFE_WITH_USAGE:
		xfree(job_ptr->state_desc);
		job_ptr->state_reason = _get_tres_state_reason(
			tres_pos, WAIT_QOS_MAX_UNK_RUN_MINS_PER_ACCT);
		debug2("%pJ being held, if allowed the job request will exceed QOS %s account max running tres(%s) minutes limit %"PRIu64" with already used %"PRIu64" + requested %"PRIu64,
		       job_ptr, qos_ptr->name,
		       assoc_mgr_tres_name_array[tres_pos],
		       qos_ptr->max_tres_run_mins_pa_ctld[tres_pos],
		       tres_run_mins_pa[tres_pos],
		       job_tres_time_limit[tres_pos]);
		rc = false;
		goto end_it;
		break;
	case TRES_USAGE_OKAY:
		/* all good */
		break;
	}

	tres_usage = _validate_tres_usage_limits_for_qos(
		&tres_pos, qos_ptr->max_tres_run_mins_pu_ctld,
		qos_out_ptr->max_tres_run_mins_pu_ctld, job_tres_time_limit,
		tres_run_mins_pu, NULL, NULL, true);
	switch (tres_usage) {
	case TRES_USAGE_CUR_EXCEEDS_LIMIT:
		/* not possible because the curr_usage sent in is NULL */
		break;
	case TRES_USAGE_REQ_EXCEEDS_LIMIT:
		xfree(job_ptr->state_desc);
		job_ptr->state_reason = _get_tres_state_reason(
			tres_pos, WAIT_QOS_MAX_UNK_RUN_MINS_PER_USER);
		debug2("%pJ is being held, QOS %s user max running tres(%s) minutes request %"PRIu64" exceeds limit %"PRIu64,
		       job_ptr, qos_ptr->name,
		       assoc_mgr_tres_name_array[tres_pos],
		       job_tres_time_limit[tres_pos],
		       qos_ptr->max_tres_run_mins_pu_ctld[tres_pos]);
		rc = false;
		goto end_it;
		break;
	case TRES_USAGE_REQ_NOT_SAFE_WITH_USAGE:
		xfree(job_ptr->state_desc);
		job_ptr->state_reason = _get_tres_state_reason(
			tres_pos, WAIT_QOS_MAX_UNK_RUN_MINS_PER_USER);
		debug2("%pJ being held, if allowed the job request will exceed QOS %s user max running tres(%s) minutes limit %"PRIu64" with already used %"PRIu64" + requested %"PRIu64,
		       job_ptr, qos_ptr->name,
		       assoc_mgr_tres_name_array[tres_pos],
		       qos_ptr->max_tres_run_mins_pu_ctld[tres_pos],
		       tres_run_mins_pu[tres_pos],
		       job_tres_time_limit[tres_pos]);
		rc = false;
		goto end_it;
		break;
	case TRES_USAGE_OKAY:
		/* all good */
		break;
	}

end_it:
	if (!rc)
		job_ptr->qos_blocking_ptr = qos_ptr;

	return rc;
}

static int _qos_job_time_out(job_record_t *job_ptr,
			     slurmdb_qos_rec_t *qos_ptr,
			     slurmdb_qos_rec_t *qos_out_ptr,
			     uint64_t *job_tres_usage_mins)
{
	uint64_t tres_usage_mins[slurmctld_tres_cnt];
	uint32_t wall_mins;
	int rc = true, tres_pos = 0, i;
	acct_policy_tres_usage_t tres_usage;
	time_t now = time(NULL);

	if (!qos_ptr || !qos_out_ptr)
		return rc;

	/*
	 * The idea here is for QOS to trump what an association has set for
	 * a limit, so if an association set of wall 10 mins and the QOS has
	 * 20 mins set and the job has been running for 11 minutes it continues
	 * until 20.
	 */
	/* clang needs this memset to avoid a warning */
	memset(tres_usage_mins, 0, sizeof(tres_usage_mins));
	for (i = 0; i < slurmctld_tres_cnt; i++)
		tres_usage_mins[i] =
			(uint64_t)(qos_ptr->usage->usage_tres_raw[i] / 60.0);
	wall_mins = qos_ptr->usage->grp_used_wall / 60;

	tres_usage = _validate_tres_usage_limits_for_qos(
		&tres_pos, qos_ptr->grp_tres_mins_ctld,
		qos_out_ptr->grp_tres_mins_ctld, job_tres_usage_mins,
		NULL, tres_usage_mins, NULL, false);
	switch (tres_usage) {
	case TRES_USAGE_CUR_EXCEEDS_LIMIT:
		last_job_update = now;
		info("%pJ timed out, the job is at or exceeds QOS %s's group max tres(%s) minutes of %"PRIu64" with %"PRIu64"",
		     job_ptr, qos_ptr->name,
		     assoc_mgr_tres_name_array[tres_pos],
		     qos_ptr->grp_tres_mins_ctld[tres_pos],
		     tres_usage_mins[tres_pos]);
		job_ptr->state_reason = FAIL_TIMEOUT;
		xfree(job_ptr->state_desc);
		xstrfmtcat(job_ptr->state_desc, "Job is at or exceeds QOS %s's group max TRES(%s) minutes of %"PRIu64" with %"PRIu64,
			   qos_ptr->name, assoc_mgr_tres_name_array[tres_pos],
			   qos_ptr->grp_tres_mins_ctld[tres_pos],
			   tres_usage_mins[tres_pos]);
		rc = false;
		goto end_it;
		break;
	case TRES_USAGE_REQ_EXCEEDS_LIMIT:
		/* not possible safe_limits is 0 */
	case TRES_USAGE_REQ_NOT_SAFE_WITH_USAGE:
		/* not possible safe_limits is 0 */
	case TRES_USAGE_OKAY:
		/* all good */
		break;
	}

	if ((qos_out_ptr->grp_wall == INFINITE)
	    && (qos_ptr->grp_wall != INFINITE)) {

		qos_out_ptr->grp_wall = qos_ptr->grp_wall;

		if (wall_mins >= qos_ptr->grp_wall) {
			last_job_update = now;
			info("%pJ timed out, the job is at or exceeds QOS %s's group wall limit of %u with %u",
			     job_ptr, qos_ptr->name,
			     qos_ptr->grp_wall, wall_mins);
			job_ptr->state_reason = FAIL_TIMEOUT;
			xfree(job_ptr->state_desc);
			xstrfmtcat(job_ptr->state_desc, "Job is at or exceeds QOS %s's group wall limit of %u with %u",
				   qos_ptr->name, qos_ptr->grp_wall, wall_mins);
			rc = false;
			goto end_it;
		}
	}

	tres_usage = _validate_tres_usage_limits_for_qos(
		&tres_pos, qos_ptr->max_tres_mins_pj_ctld,
		qos_out_ptr->max_tres_mins_pj_ctld, job_tres_usage_mins,
		NULL, NULL, NULL, true);
	switch (tres_usage) {
	case TRES_USAGE_CUR_EXCEEDS_LIMIT:
		/* not possible curr_usage is NULL */
		break;
	case TRES_USAGE_REQ_EXCEEDS_LIMIT:
		last_job_update = now;
		info("%pJ timed out, the job is at or exceeds QOS %s's max tres(%s) minutes of %"PRIu64" with %"PRIu64,
		     job_ptr, qos_ptr->name,
		     assoc_mgr_tres_name_array[tres_pos],
		     qos_ptr->max_tres_mins_pj_ctld[tres_pos],
		     job_tres_usage_mins[tres_pos]);
		job_ptr->state_reason = FAIL_TIMEOUT;
		xfree(job_ptr->state_desc);
		xstrfmtcat(job_ptr->state_desc, "Job is at or exceeds QOS %s's max TRES(%s) minutes of %"PRIu64" with %"PRIu64,
			   qos_ptr->name, assoc_mgr_tres_name_array[tres_pos],
			   qos_ptr->max_tres_mins_pj_ctld[tres_pos],
			   job_tres_usage_mins[tres_pos]);
		rc = false;
		goto end_it;
		break;
	case TRES_USAGE_REQ_NOT_SAFE_WITH_USAGE:
		/* not possible tres_usage is NULL */
	case TRES_USAGE_OKAY:
		/* all good */
		break;
	}

end_it:
	return rc;
}

/*
 * acct_policy_add_job_submit - Note that a job has been submitted for
 *	accounting policy purposes.
 */
extern void acct_policy_add_job_submit(job_record_t *job_ptr, bool assoc_locked)
{
	_adjust_limit_usage(ACCT_POLICY_ADD_SUBMIT, job_ptr, assoc_locked);
}

/*
 * acct_policy_remove_job_submit - Note that a job has finished (might
 *      not had started or been allocated resources) for accounting
 *      policy purposes.
 */
extern void acct_policy_remove_job_submit(job_record_t *job_ptr,
					  bool assoc_locked)
{
	_adjust_limit_usage(ACCT_POLICY_REM_SUBMIT, job_ptr, assoc_locked);
}

/*
 * acct_policy_job_begin - Note that a job is starting for accounting
 *	policy purposes.
 */
extern void acct_policy_job_begin(job_record_t *job_ptr, bool assoc_locked)
{
	_adjust_limit_usage(ACCT_POLICY_JOB_BEGIN, job_ptr, assoc_locked);
}

/*
 * acct_policy_job_fini - Note that a job is completing for accounting
 *	policy purposes.
 */
extern void acct_policy_job_fini(job_record_t *job_ptr, bool assoc_locked)
{
	/* if end_time_exp == NO_VAL this has already happened */
	if (job_ptr->end_time_exp != (time_t)NO_VAL)
		_adjust_limit_usage(ACCT_POLICY_JOB_FINI, job_ptr,
				    assoc_locked);
	else
		debug2("We have already ran the job_fini for %pJ", job_ptr);
}

extern void acct_policy_alter_job(job_record_t *job_ptr,
				  uint32_t new_time_limit)
{
	slurmdb_qos_rec_t *qos_ptr_1, *qos_ptr_2;
	slurmdb_assoc_rec_t *assoc_ptr = NULL;
	assoc_mgr_lock_t locks =
		{ .assoc = WRITE_LOCK, .qos = WRITE_LOCK, .tres = READ_LOCK };
	uint64_t used_tres_run_secs[slurmctld_tres_cnt];
	uint64_t new_used_tres_run_secs[slurmctld_tres_cnt];
	uint64_t time_limit_secs, new_time_limit_secs;
	int i;

	if (!IS_JOB_RUNNING(job_ptr) || (job_ptr->time_limit == new_time_limit))
		return;

	if (!(accounting_enforce & ACCOUNTING_ENFORCE_LIMITS)
	    || !_valid_job_assoc(job_ptr))
		return;

	time_limit_secs = (uint64_t)job_ptr->time_limit * 60;
	new_time_limit_secs = (uint64_t)new_time_limit * 60;

	/* take into account usage factor */
	if (job_ptr->qos_ptr && (job_ptr->qos_ptr->usage_factor >= 0)) {
		time_limit_secs *= job_ptr->qos_ptr->usage_factor;
		new_time_limit_secs *= job_ptr->qos_ptr->usage_factor;
	}

	/* clang needs these memset to avoid a warning */
	memset(used_tres_run_secs, 0, sizeof(used_tres_run_secs));
	memset(new_used_tres_run_secs, 0, sizeof(new_used_tres_run_secs));
	for (i=0; i<slurmctld_tres_cnt; i++) {
		if (i == TRES_ARRAY_ENERGY)
			continue;
		if (job_ptr->tres_alloc_cnt[i] == NO_CONSUME_VAL64)
			continue;

		used_tres_run_secs[i] =
			job_ptr->tres_alloc_cnt[i] * time_limit_secs;
		new_used_tres_run_secs[i] =
			job_ptr->tres_alloc_cnt[i] * new_time_limit_secs;
	}

	assoc_mgr_lock(&locks);

	acct_policy_set_qos_order(job_ptr, &qos_ptr_1, &qos_ptr_2);

	_qos_alter_job(job_ptr, qos_ptr_1,
		       used_tres_run_secs, new_used_tres_run_secs);
	_qos_alter_job(job_ptr, qos_ptr_2,
		       used_tres_run_secs, new_used_tres_run_secs);

	assoc_ptr = job_ptr->assoc_ptr;
	while (assoc_ptr) {
		for (i=0; i<slurmctld_tres_cnt; i++) {
			if (used_tres_run_secs[i] == new_used_tres_run_secs[i])
				continue;
			/*
			 * Handle the case when remaining usage is less than
			 * the original job request.
			 */
			int64_t used_tres_run_sec_decr =
				used_tres_run_secs[i] -
				new_used_tres_run_secs[i];
			if ((used_tres_run_sec_decr < 0) ||
			    (used_tres_run_sec_decr <
			     assoc_ptr->usage->grp_used_tres_run_secs[i]))
				assoc_ptr->usage->grp_used_tres_run_secs[i] -=
					used_tres_run_sec_decr;
			else
				assoc_ptr->usage->grp_used_tres_run_secs[i] = 0;

			debug2("altering %pJ assoc %u(%s/%s/%s) got %"PRIu64" just removed %"PRIu64" and added %"PRIu64,
			       job_ptr, assoc_ptr->id, assoc_ptr->acct,
			       assoc_ptr->user, assoc_ptr->partition,
			       assoc_ptr->usage->grp_used_tres_run_secs[i],
			       used_tres_run_secs[i],
			       new_used_tres_run_secs[i]);
		}

		/* now handle all the group limits of the parents */
		assoc_ptr = assoc_ptr->usage->parent_assoc_ptr;
	}
	assoc_mgr_unlock(&locks);
}

static void _get_prio_thresh(uint32_t *prio_thresh, uint32_t in_thresh)
{
	/*
	 * If we already set prio_thresh then call it good.
	 * If in_thresh is INFINITE we don't have a limit
	 */
	if ((*prio_thresh) || (in_thresh == INFINITE))
		return;

	*prio_thresh = in_thresh;
}

static void _get_accrue_create_cnt(uint32_t *max_jobs_accrue, int *create_cnt,
				   uint32_t in_accrue, uint32_t in_used)
{
	/*
	 * If in_accrue is INFINITE we don't have a limit
	 * If we already set max_jobs_accrue and it is the most restrictive,
	 * then call it good.
	 */
	if ((in_accrue == INFINITE) ||
	    ((*max_jobs_accrue != INFINITE) && (*max_jobs_accrue <= in_accrue)))
		return;

	*max_jobs_accrue = in_accrue;
	if (*max_jobs_accrue > in_used)
		*create_cnt = *max_jobs_accrue - in_used;
	else
		*create_cnt = 0;
}

static void _add_accrue_time_internal(void *x, void *arg)
{
	slurmdb_qos_rec_t *qos_ptr = x;
	acct_policy_accrue_t *acct_policy_accrue = arg;
	slurmdb_assoc_rec_t *assoc_ptr = acct_policy_accrue->assoc_ptr;

	log_flag(ACCRUE, "%s: Adding %d to assoc_ptr %p (%p %p %p)",
		 __func__, acct_policy_accrue->cnt, assoc_ptr, qos_ptr,
		 acct_policy_accrue->used_limits_acct,
		 acct_policy_accrue->used_limits_user);

	if (qos_ptr)
		qos_ptr->usage->accrue_cnt += acct_policy_accrue->cnt;
	if (acct_policy_accrue->used_limits_acct)
		acct_policy_accrue->used_limits_acct->accrue_cnt +=
			acct_policy_accrue->cnt;
	if (acct_policy_accrue->used_limits_user)
		acct_policy_accrue->used_limits_user->accrue_cnt +=
			acct_policy_accrue->cnt;

	while (assoc_ptr) {
		log_flag(ACCRUE, "assoc_id %u(%s/%s/%s/%p) added %d count %d",
			 assoc_ptr->id, assoc_ptr->acct, assoc_ptr->user,
			 assoc_ptr->partition, assoc_ptr->usage,
			 acct_policy_accrue->cnt,
			 assoc_ptr->usage->accrue_cnt);

		assoc_ptr->usage->accrue_cnt += acct_policy_accrue->cnt;
		/* now go up the hierarchy */
		assoc_ptr = assoc_ptr->usage->parent_assoc_ptr;
	}
}

static void _remove_accrue_time_internal(void *x, void *arg)
{
	slurmdb_qos_rec_t *qos_ptr = x;
	acct_policy_accrue_t *acct_policy_accrue = arg;
	slurmdb_assoc_rec_t *assoc_ptr = acct_policy_accrue->assoc_ptr;

	log_flag(ACCRUE, "%s: Removing %d from assoc_ptr %p (%p %p %p)",
		 __func__, acct_policy_accrue->cnt, assoc_ptr, qos_ptr,
		 acct_policy_accrue->used_limits_acct,
		 acct_policy_accrue->used_limits_user);

	if (qos_ptr) {
		if (qos_ptr->usage->accrue_cnt >= acct_policy_accrue->cnt)
			qos_ptr->usage->accrue_cnt -= acct_policy_accrue->cnt;
		else {
			error("%s: QOS %s accrue_cnt underflow",
			      __func__, qos_ptr->name);
			qos_ptr->usage->accrue_cnt = 0;
		}
	}

	if (acct_policy_accrue->used_limits_acct) {
		if (acct_policy_accrue->used_limits_acct->accrue_cnt >=
		    acct_policy_accrue->cnt)
			acct_policy_accrue->used_limits_acct->accrue_cnt -=
				acct_policy_accrue->cnt;
		else {
			if (qos_ptr) {
				error("%s: QOS %s acct %s accrue_cnt underflow",
				      __func__, qos_ptr->name,
				      acct_policy_accrue->used_limits_acct->
				      acct);
			}
			acct_policy_accrue->used_limits_acct->accrue_cnt = 0;
		}
	}

	if (acct_policy_accrue->used_limits_user) {
		if (acct_policy_accrue->used_limits_user->accrue_cnt >=
		    acct_policy_accrue->cnt)
			acct_policy_accrue->used_limits_user->accrue_cnt -=
				acct_policy_accrue->cnt;
		else {
			if (qos_ptr) {
				error("%s: QOS %s user %u accrue_cnt underflow",
				      __func__, qos_ptr->name,
				      acct_policy_accrue->used_limits_user->
				      uid);
			}
			acct_policy_accrue->used_limits_user->accrue_cnt = 0;
		}
	}

	while (assoc_ptr) {
		if (assoc_ptr->usage->accrue_cnt >= acct_policy_accrue->cnt) {
			log_flag(ACCRUE, "assoc_id %u(%s/%s/%s/%p) removed %d count %d",
				 assoc_ptr->id, assoc_ptr->acct,
				 assoc_ptr->user, assoc_ptr->partition,
				 assoc_ptr->usage, acct_policy_accrue->cnt,
				 assoc_ptr->usage->accrue_cnt);
			assoc_ptr->usage->accrue_cnt -= acct_policy_accrue->cnt;
		} else {
			error("%s: assoc_id %u(%s/%s/%s) accrue_cnt underflow",
			      __func__, assoc_ptr->id,
			      assoc_ptr->acct,
			      assoc_ptr->user,
			      assoc_ptr->partition);
			assoc_ptr->usage->accrue_cnt = 0;
		}
		/* now go up the hierarchy */
		assoc_ptr = assoc_ptr->usage->parent_assoc_ptr;
	}
}

static void _fill_in_qos_used_limits(slurmdb_qos_rec_t *qos_ptr,
				     acct_policy_accrue_t *acct_policy_accrue)
{

	if (acct_policy_accrue->limits_filled)
		return;

	acct_policy_accrue->limits_filled = true;
	if (!qos_ptr) {
		acct_policy_accrue->used_limits_acct = NULL;
		acct_policy_accrue->used_limits_user = NULL;
		return;
	}

	xassert(acct_policy_accrue->acct);

	acct_policy_accrue->used_limits_acct =
		acct_policy_get_acct_used_limits(
			&qos_ptr->usage->acct_limit_list,
			acct_policy_accrue->acct);
	acct_policy_accrue->used_limits_user =
		acct_policy_get_user_used_limits(
			&qos_ptr->usage->user_limit_list,
			acct_policy_accrue->uid);
}

static int _for_each_qos_remove_accrue_time(void *x, void *arg)
{
	slurmdb_qos_rec_t *qos_ptr = x;
	acct_policy_accrue_t *acct_policy_accrue = arg;

	_fill_in_qos_used_limits(qos_ptr, acct_policy_accrue);

	_remove_accrue_time_internal(qos_ptr, acct_policy_accrue);

	/* Only do assoc_ptr stuff once */
	acct_policy_accrue->assoc_ptr = NULL;

	return 0;
}

static bool _acct_policy_validate(job_desc_msg_t *job_desc,
				  part_record_t *part_ptr,
				  slurmdb_assoc_rec_t *assoc_in,
				  slurmdb_qos_rec_t *qos_ptr_1,
				  slurmdb_qos_rec_t *qos_ptr_2,
				  uint32_t *reason,
				  acct_policy_limit_set_t *
					acct_policy_limit_set,
				  bool update_call, bool locked)
{
	slurmdb_qos_rec_t qos_rec;
	slurmdb_assoc_rec_t *assoc_ptr = assoc_in;
	int parent = 0, job_cnt = 1;
	char *user_name = NULL;
	bool rc = true;
	assoc_mgr_lock_t locks =
		{ .assoc = READ_LOCK, .qos = WRITE_LOCK, .tres = READ_LOCK };
	bool strict_checking;
	double limit_factor = -1.0;
	uint64_t grp_tres_ctld[slurmctld_tres_cnt];
	uint64_t max_tres_ctld[slurmctld_tres_cnt];

	xassert(acct_policy_limit_set);

	if (!assoc_ptr) {
		error("acct_policy_validate: no assoc_ptr given for job.");
		return false;
	}
	user_name = assoc_ptr->user;

	if (job_desc->array_bitmap)
		job_cnt = bit_set_count(job_desc->array_bitmap);

	slurmdb_init_qos_rec(&qos_rec, 0, INFINITE);

	if (!locked)
		assoc_mgr_lock(&locks);

	xassert(verify_assoc_lock(ASSOC_LOCK, READ_LOCK));
	xassert(verify_assoc_lock(QOS_LOCK, WRITE_LOCK));
	xassert(verify_assoc_lock(TRES_LOCK, READ_LOCK));

	assoc_mgr_set_qos_tres_cnt(&qos_rec);

	if (qos_ptr_1) {
		strict_checking = (qos_ptr_1->flags & QOS_FLAG_DENY_LIMIT);
		if (qos_ptr_2 && !strict_checking)
			strict_checking =
				qos_ptr_2->flags & QOS_FLAG_DENY_LIMIT;

		if (!(rc = _qos_policy_validate(
			      job_desc, assoc_ptr, part_ptr,
			      qos_ptr_1, &qos_rec,
			      reason, acct_policy_limit_set, update_call,
			      user_name, job_cnt, strict_checking)))
			goto end_it;
		if (!(rc = _qos_policy_validate(
			      job_desc, assoc_ptr,
			      part_ptr, qos_ptr_2, &qos_rec,
			      reason, acct_policy_limit_set, update_call,
			      user_name, job_cnt, strict_checking)))
			goto end_it;

	} else /*
		* We don't have a QOS to determine if we should fail or not, so
		* we will go with strict_checking by default.
		*/
		strict_checking = true;

	if (qos_ptr_1 && !fuzzy_equal(qos_ptr_1->limit_factor, INFINITE))
		limit_factor = qos_ptr_1->limit_factor;
	else if (qos_ptr_2 && !fuzzy_equal(qos_ptr_2->limit_factor, INFINITE))
                limit_factor = qos_ptr_2->limit_factor;

	while (assoc_ptr) {
		int tres_pos = 0;
		for (int i = 0; i < slurmctld_tres_cnt; i++) {
			grp_tres_ctld[i] = assoc_ptr->grp_tres_ctld[i];
			max_tres_ctld[i] = assoc_ptr->max_tres_ctld[i];
			_apply_limit_factor(&grp_tres_ctld[i], limit_factor);
			_apply_limit_factor(&max_tres_ctld[i], limit_factor);
		}

		if (!_validate_tres_limits_for_assoc(
			    &tres_pos, job_desc->tres_req_cnt, 0,
			    grp_tres_ctld,
			    qos_rec.grp_tres_ctld,
			    acct_policy_limit_set->tres,
			    strict_checking, update_call, 1)) {
			if (reason)
				*reason = _get_tres_state_reason(
					tres_pos, WAIT_ASSOC_GRP_UNK);

			debug2("job submit for user %s(%u): "
			       "min tres(%s) request %"PRIu64" exceeds "
			       "group max tres limit %"PRIu64" for account %s",
			       user_name,
			       job_desc->user_id,
			       assoc_mgr_tres_name_array[tres_pos],
			       job_desc->tres_req_cnt[tres_pos],
			       grp_tres_ctld[tres_pos],
			       assoc_ptr->acct);
			rc = false;
			break;
		}

		/* for validation we don't need to look at
		 * assoc_ptr->grp_jobs.
		 */

		if ((qos_rec.grp_submit_jobs == INFINITE) &&
		    (assoc_ptr->grp_submit_jobs != INFINITE) &&
		    ((assoc_ptr->usage->used_submit_jobs + job_cnt)
		     > assoc_ptr->grp_submit_jobs)) {
			if (reason)
				*reason = WAIT_ASSOC_GRP_SUB_JOB;
			debug2("job submit for user %s(%u): group max submit job limit exceeded %u (used:%u + requested:%d) for account '%s'",
			       user_name,
			       job_desc->user_id,
			       assoc_ptr->grp_submit_jobs,
			       assoc_ptr->usage->used_submit_jobs, job_cnt,
			       assoc_ptr->acct);
			rc = false;
			break;
		}

		tres_pos = 0;
		if (!update_call && !_validate_tres_time_limits(
			    &tres_pos,
			    &job_desc->time_limit,
			    part_ptr->max_time,
			    job_desc->tres_req_cnt,
			    assoc_ptr->grp_tres_mins_ctld,
			    qos_rec.grp_tres_mins_ctld,
			    &acct_policy_limit_set->time,
			    strict_checking)) {
			if (reason)
				*reason = _get_tres_state_reason(
					tres_pos,
					WAIT_ASSOC_GRP_UNK_MIN);
			debug2("job submit for user %s(%u): "
			       "tres(%s) time limit request %"PRIu64" "
			       "exceeds group max limit %"PRIu64" "
			       "for account '%s'",
			       user_name,
			       job_desc->user_id,
			       assoc_mgr_tres_name_array[tres_pos],
			       ((uint64_t)job_desc->time_limit *
				job_desc->tres_req_cnt[tres_pos]),
			       assoc_ptr->
			       grp_tres_mins_ctld[tres_pos],
			       assoc_ptr->acct);
			rc = false;
			goto end_it;
		}

		tres_pos = 0;
		if (!update_call && !_validate_tres_time_limits(
			    &tres_pos,
			    &job_desc->time_limit,
			    part_ptr->max_time,
			    job_desc->tres_req_cnt,
			    assoc_ptr->grp_tres_run_mins_ctld,
			    qos_rec.grp_tres_run_mins_ctld,
			    &acct_policy_limit_set->time,
			    strict_checking)) {
			if (reason)
				*reason = _get_tres_state_reason(
					tres_pos,
					WAIT_ASSOC_GRP_UNK_RUN_MIN);
			debug2("job submit for user %s(%u): "
			       "tres(%s) time limit request %"PRIu64" "
			       "exceeds group max running "
			       "limit %"PRIu64" for account '%s'",
			       user_name,
			       job_desc->user_id,
			       assoc_mgr_tres_name_array[tres_pos],
			       ((uint64_t)job_desc->time_limit *
				job_desc->tres_req_cnt[tres_pos]),
			       assoc_ptr->
			       grp_tres_run_mins_ctld[tres_pos],
			       assoc_ptr->acct);
			rc = false;
			goto end_it;
		}

		if (!update_call && !_validate_time_limit(
			    &job_desc->time_limit,
			    part_ptr->max_time,
			    1,
			    assoc_ptr->grp_wall,
			    &qos_rec.grp_wall,
			    &acct_policy_limit_set->time,
			    strict_checking, false)) {
			if (reason)
				*reason = WAIT_ASSOC_GRP_WALL;
			debug2("job submit for user %s(%u): "
			       "time limit %u exceeds max group %u for "
			       "account '%s'",
			       user_name,
			       job_desc->user_id,
			       job_desc->time_limit,
			       assoc_ptr->grp_wall,
			       assoc_ptr->acct);
			rc = false;
			break;
		}

		/* We don't need to look at the regular limits for
		 * parents since we have pre-propagated them, so just
		 * continue with the next parent
		 */
		if (parent) {
			assoc_ptr = assoc_ptr->usage->parent_assoc_ptr;
			continue;
		}

		/* for validation we don't need to look at
		 * assoc_ptr->max_cpu_mins_pj.
		 */

		tres_pos = 0;
		if (!_validate_tres_limits_for_assoc(
			    &tres_pos, job_desc->tres_req_cnt, 0,
			    max_tres_ctld,
			    qos_rec.max_tres_pj_ctld,
			    acct_policy_limit_set->tres,
			    strict_checking, update_call, 1)) {
			if (reason)
				*reason = _get_tres_state_reason(
					tres_pos, WAIT_ASSOC_MAX_UNK_PER_JOB);

			debug2("job submit for user %s(%u): "
			       "min tres(%s) request %"PRIu64" exceeds "
			       "max tres limit %"PRIu64" for account %s",
			       user_name,
			       job_desc->user_id,
			       assoc_mgr_tres_name_array[tres_pos],
			       job_desc->tres_req_cnt[tres_pos],
			       max_tres_ctld[tres_pos],
			       assoc_ptr->acct);
			rc = false;
			break;
		}

		tres_pos = 0;
		if (!_validate_tres_limits_for_assoc(
			    &tres_pos, job_desc->tres_req_cnt,
			    job_desc->tres_req_cnt[TRES_ARRAY_NODE],
			    assoc_ptr->max_tres_pn_ctld,
			    qos_rec.max_tres_pn_ctld,
			    acct_policy_limit_set->tres,
			    strict_checking, update_call, 1)) {
			if (reason)
				*reason = _get_tres_state_reason(
					tres_pos,
					WAIT_ASSOC_MAX_UNK_PER_NODE);

			debug2("job submit for user %s(%u): "
			       "min tres(%s) request %"PRIu64" exceeds "
			       "max tres limit %"PRIu64" per node "
			       "for account %s",
			       user_name,
			       job_desc->user_id,
			       assoc_mgr_tres_name_array[tres_pos],
			       job_desc->tres_req_cnt[tres_pos] /
			       job_desc->tres_req_cnt[TRES_ARRAY_NODE],
			       assoc_ptr->max_tres_pn_ctld[tres_pos],
			       assoc_ptr->acct);
			rc = false;
			break;
		}

		/* for validation we don't need to look at
		 * assoc_ptr->max_jobs.
		 */

		if ((qos_rec.max_submit_jobs_pa == INFINITE) &&
		    (qos_rec.max_submit_jobs_pu == INFINITE) &&
		    (assoc_ptr->max_submit_jobs != INFINITE) &&
		    ((assoc_ptr->usage->used_submit_jobs + job_cnt)
		     > assoc_ptr->max_submit_jobs)) {
			if (reason)
				*reason = WAIT_ASSOC_MAX_SUB_JOB;
			debug2("job submit for user %s(%u): account max submit job limit exceeded %u (used:%u + requested:%d) for account '%s'",
			       user_name,
			       job_desc->user_id,
			       assoc_ptr->max_submit_jobs,
			       assoc_ptr->usage->used_submit_jobs, job_cnt,
			       assoc_ptr->acct);
			rc = false;
			break;
		}

		if (!update_call && !_validate_tres_time_limits(
			    &tres_pos,
			    &job_desc->time_limit,
			    part_ptr->max_time,
			    job_desc->tres_req_cnt,
			    assoc_ptr->max_tres_mins_ctld,
			    qos_rec.max_tres_mins_pj_ctld,
			    &acct_policy_limit_set->time,
			    strict_checking)) {
			if (reason)
				*reason = _get_tres_state_reason(
					tres_pos,
					WAIT_ASSOC_MAX_UNK_MINS_PER_JOB);
			debug2("job submit for user %s(%u): "
			       "tres(%s) time limit request %"PRIu64" "
			       "exceeds max per-job limit %"PRIu64" "
			       "for account '%s'",
			       user_name,
			       job_desc->user_id,
			       assoc_mgr_tres_name_array[tres_pos],
			       ((uint64_t)job_desc->time_limit *
				job_desc->tres_req_cnt[tres_pos]),
			       assoc_ptr->max_tres_mins_ctld[tres_pos],
			       assoc_ptr->acct);
			rc = false;
			break;
		}

		if (!update_call && !_validate_time_limit(
			    &job_desc->time_limit,
			    part_ptr->max_time,
			    1,
			    assoc_ptr->max_wall_pj,
			    &qos_rec.max_wall_pj,
			    &acct_policy_limit_set->time,
			    strict_checking, false)) {
			if (reason)
				*reason = WAIT_ASSOC_MAX_WALL_PER_JOB;
			debug2("job submit for user %s(%u): "
			       "time limit %u exceeds max %u for "
			       "account '%s'",
			       user_name,
			       job_desc->user_id,
			       job_desc->time_limit,
			       assoc_ptr->max_wall_pj,
			       assoc_ptr->acct);
			rc = false;
			break;
		}

		assoc_ptr = assoc_ptr->usage->parent_assoc_ptr;
		parent = 1;
	}
end_it:
	if (!locked)
		assoc_mgr_unlock(&locks);
	slurmdb_free_qos_rec_members(&qos_rec);

	return rc;
}

static int _list_acct_policy_validate(void *x, void *arg)
{
	part_record_t *part_ptr = (part_record_t *) x;
	acct_policy_validate_args_t *args = (acct_policy_validate_args_t *) arg;
	slurmdb_qos_rec_t *qos_ptr_1 = NULL, *qos_ptr_2 = NULL;
	job_record_t job_rec;
	bool rc;

	job_rec.qos_ptr = args->job_qos_ptr;
	job_rec.part_ptr = part_ptr;
	acct_policy_set_qos_order(&job_rec, &qos_ptr_1, &qos_ptr_2);
	rc = _acct_policy_validate(args->job_desc, part_ptr, args->assoc_in,
				   qos_ptr_1, qos_ptr_2, args->reason,
				   args->acct_policy_limit_set,
				   args->update_call, true);
	if (!rc)
		return SLURM_ERROR; /* Break out of list_for_each. */
	return rc;
}

/*
 * acct_policy_validate - validate that a job request can be satisfied without
 * exceeding any association or QOS limit.
 * job_desc IN - job descriptor being submitted
 * part_ptr IN - first partition to which the job is being submitted
 * part_ptr_list IN - list of partitions to which the job is being submitted
 *                    (can be NULL)
 * assoc_in IN - pointer to association to which the job is being submitted
 * qos_ptr IN - pointer to QOS to which the job is being submitted
 * state_reason OUT - if non-NULL, set to reason for rejecting the job
 * acct_policy_limit_set IN/OUT - limits set for the job, pre-allocated storage
 *		is filled in by acct_policy_validate
 * update_call IN - true if request to update existing job request
 * RET true if valid
 */
extern bool acct_policy_validate(job_desc_msg_t *job_desc,
				 part_record_t *part_ptr,
				 list_t *part_ptr_list,
				 slurmdb_assoc_rec_t *assoc_in,
				 slurmdb_qos_rec_t *qos_ptr,
				 uint32_t *reason,
				 acct_policy_limit_set_t *acct_policy_limit_set,
				 bool update_call)
{
	int rc = true;
	assoc_mgr_lock_t locks =
		{ .assoc = READ_LOCK, .qos = WRITE_LOCK, .tres = READ_LOCK };
	acct_policy_validate_args_t args = {
		.acct_policy_limit_set = acct_policy_limit_set,
		.assoc_in = assoc_in, .job_desc = job_desc,
		.job_qos_ptr = qos_ptr, .reason = reason,
		.update_call = update_call };

	assoc_mgr_lock(&locks);
	if (!part_ptr_list) {
		if (_list_acct_policy_validate(part_ptr, &args) == SLURM_ERROR)
			rc = false;
		assoc_mgr_unlock(&locks);
		return rc;
	}

	if (list_for_each(part_ptr_list, _list_acct_policy_validate, &args) < 0)
		rc = false;
	assoc_mgr_unlock(&locks);

	return rc;
}

/*
 * acct_policy_validate_het_job - validate that a hetjob as a whole (all
 * components at once) can be satisfied without exceeding any association
 * limit. Build a list of every job's association and QOS information then combine
 * usage information for every job sharing an association and test that against
 * the appropriate limit.
 *
 * NOTE: This test is imperfect. Each job actually has up to 3 sets of limits
 * to test (association, job QOS and partition QOS). Ideally each would be tested
 * independently, but that is complicated due to QOS limits overriding the
 * association limits and the ability to have 3 sets of limits for each job.
 * This only tests the association limit for each hetjob component based
 * upon that component's job and partition QOS.
 *
 * NOTE: That a hetjob passes this test does not mean that it will be able
 * to run. For example, this test assumes resource allocation at the CPU level.
 * If each task is allocated one core, with 2 CPUs, then the CPU limit test
 * would not be accurate.
 *
 * submit_job_list IN - list of job_record_t entries (already created)
 * RET true if valid
 */
extern bool acct_policy_validate_het_job(list_t *submit_job_list)
{
	assoc_mgr_lock_t locks =
		{ .assoc = READ_LOCK, .qos = READ_LOCK, .tres = READ_LOCK };
	list_t *het_job_limit_list = NULL;
	list_itr_t *iter1, *iter2;
	job_record_t *job_ptr1, *job_ptr2;
	het_job_limits_t *job_limit1, *job_limit2;
	bool rc = true;
	job_desc_msg_t job_desc;
	bool build_job_desc = true;
	acct_policy_limit_set_t acct_policy_limit_set;
	int i, job_cnt;
	uint32_t reason = 0;
	int tres_req_size = sizeof(uint64_t) * g_tres_count;

	memset(&acct_policy_limit_set, 0, sizeof(acct_policy_limit_set_t));
	acct_policy_limit_set.tres =
		xmalloc(sizeof(uint16_t) * slurmctld_tres_cnt);

	/* Build list of QOS, association, and job pointers */
	het_job_limit_list = list_create(xfree_ptr);
	iter1 = list_iterator_create(submit_job_list);
	assoc_mgr_lock(&locks);
	while ((job_ptr1 = list_next(iter1))) {
		job_limit1 = xmalloc(sizeof(het_job_limits_t));
		job_limit1->assoc_ptr = job_ptr1->assoc_ptr;
		job_limit1->job_ptr   = job_ptr1;
		list_append(het_job_limit_list, job_limit1);
	}
	assoc_mgr_unlock(&locks);
	list_iterator_destroy(iter1);

	iter1 = list_iterator_create(het_job_limit_list);
	while ((job_limit1 = list_next(iter1))) {
		job_ptr1 = job_limit1->job_ptr;
		if (build_job_desc) {
			build_job_desc = false;
			job_desc.time_limit = job_ptr1->time_limit;
			job_desc.tres_req_cnt = xmalloc(tres_req_size);
			job_desc.user_id = job_ptr1->user_id;
		}
		if (job_limit1->assoc_ptr) {
			job_cnt = 1;
			memcpy(job_desc.tres_req_cnt, job_ptr1->tres_req_cnt,
			       tres_req_size);
			iter2 = list_iterator_create(het_job_limit_list);
			while ((job_limit2 = list_next(iter2))) {
				if ((job_limit2 == job_limit1) ||
				    (job_limit2->assoc_ptr !=
				     job_limit1->assoc_ptr))
					continue;
				job_ptr2 = job_limit2->job_ptr;
				for (i = 0 ; i < g_tres_count; i++) {
					job_desc.tres_req_cnt[i] +=
						job_ptr2->tres_req_cnt[i];
				}
				job_cnt++;
			}
			list_iterator_destroy(iter2);
			if (job_cnt > 1) {
				job_desc.array_bitmap = bit_alloc(job_cnt);
				/*
				 * SET NO BITS. Make this look like zero jobs
				 * are being added. The job count was already
				 * validated when each individual component of
				 * the heterogeneous job was created.
				*/
				rc = acct_policy_validate(&job_desc,
						job_ptr1->part_ptr,
						job_ptr1->part_ptr_list,
						job_limit1->assoc_ptr,
						job_ptr1->qos_ptr,
						&reason,
						&acct_policy_limit_set,
						false);
				FREE_NULL_BITMAP(job_desc.array_bitmap);
				if (!rc)
					break;
			}
		}
	}
	list_iterator_destroy(iter1);

	xfree(job_desc.tres_req_cnt);
	FREE_NULL_LIST(het_job_limit_list);
	xfree(acct_policy_limit_set.tres);

	return rc;
}

/*
 * acct_policy_job_runnable_pre_select - Determine if the specified
 *	job can execute right now or not depending upon accounting
 *	policy (e.g. running job limit for this association). If the
 *	association limits prevent the job from ever running (lowered
 *	limits since job submission), then cancel the job.
 */
extern bool acct_policy_job_runnable_pre_select(job_record_t *job_ptr,
						bool assoc_mgr_locked)
{
	slurmdb_qos_rec_t *qos_ptr_1, *qos_ptr_2;
	slurmdb_qos_rec_t qos_rec;
	slurmdb_assoc_rec_t *assoc_ptr;
	uint32_t time_limit = NO_VAL;
	bool rc = true;
	uint32_t wall_mins;
	bool safe_limits = false;
	int parent = 0; /* flag to tell us if we are looking at the
			 * parent or not
			 */
	assoc_mgr_lock_t locks =
		{ .assoc = READ_LOCK, .qos = WRITE_LOCK, .tres = READ_LOCK };

	/* check to see if we are enforcing associations */
	if (!accounting_enforce)
		return true;

	if (!_valid_job_assoc(job_ptr)) {
		xfree(job_ptr->state_desc);
		job_ptr->state_reason = FAIL_ACCOUNT;
		return false;
	}

	/* now see if we are enforcing limits */
	if (!(accounting_enforce & ACCOUNTING_ENFORCE_LIMITS))
		return true;

	/* clear old state reason */
	if (job_state_reason_check(job_ptr->state_reason, JSR_QOS_ASSOC)) {
		xfree(job_ptr->state_desc);
		job_ptr->state_reason = WAIT_NO_REASON;
	}

	slurmdb_init_qos_rec(&qos_rec, 0, INFINITE);

	if (!assoc_mgr_locked)
		assoc_mgr_lock(&locks);

	assoc_mgr_set_qos_tres_cnt(&qos_rec);

	acct_policy_set_qos_order(job_ptr, &qos_ptr_1, &qos_ptr_2);

	/* check the first QOS setting it's values in the qos_rec */
	if (qos_ptr_1 &&
	    !(rc = _qos_job_runnable_pre_select(job_ptr, qos_ptr_1, &qos_rec)))
		goto end_it;

	/* If qos_ptr_1 didn't set the value use the 2nd QOS to set the limit */
	if (qos_ptr_2 &&
	    !(rc = _qos_job_runnable_pre_select(job_ptr, qos_ptr_2, &qos_rec)))
		goto end_it;

	/*
	 * check to see if we should be using safe limits, if so we
	 * will only start a job if there are sufficient remaining
	 * cpu-minutes for it to run to completion
	 */
	if (accounting_enforce & ACCOUNTING_ENFORCE_SAFE)
		safe_limits = true;

	assoc_ptr = job_ptr->assoc_ptr;
	while (assoc_ptr) {
		/* This only trips when the grp_used_wall is divisible
		 * by 60, i.e if a limit is 1 min and you have only
		 * accumulated 59 seconds you will still be able to
		 * get another job in as 59/60 = 0 int wise.
		 */
		wall_mins = assoc_ptr->usage->grp_used_wall / 60;

#if _DEBUG
		info("acct_job_limits: %u of %u",
		     assoc_ptr->usage->used_jobs, assoc_ptr->max_jobs);
#endif
		/* we don't need to check grp_cpu_mins here */

		/* we don't need to check grp_cpus here */

		/* we don't need to check grp_mem here */

		if ((qos_rec.grp_jobs == INFINITE) &&
		    (assoc_ptr->grp_jobs != INFINITE) &&
		    (assoc_ptr->usage->used_jobs >= assoc_ptr->grp_jobs)) {
			xfree(job_ptr->state_desc);
			job_ptr->state_reason = WAIT_ASSOC_GRP_JOB;
			debug2("%pJ being held, assoc %u is at or exceeds group max jobs limit %u with %u for account %s",
			       job_ptr, assoc_ptr->id, assoc_ptr->grp_jobs,
			       assoc_ptr->usage->used_jobs, assoc_ptr->acct);

			rc = false;
			goto end_it;
		}

		/* we don't need to check grp_cpu_run_mins here */

		/* we don't need to check grp_nodes here */

		/* we don't need to check submit_jobs here */

		if ((job_ptr->limit_set.time != ADMIN_SET_LIMIT)
		    && (qos_rec.grp_wall == INFINITE)
		    && (assoc_ptr->grp_wall != INFINITE)) {
			if (time_limit == NO_VAL) {
				time_limit = job_ptr->time_limit;
				_set_time_limit(&time_limit,
						job_ptr->part_ptr->max_time,
						MIN(assoc_ptr->grp_wall,
						    assoc_ptr->max_wall_pj),
						&job_ptr->limit_set.time);

				/* Account for usage factor, if necessary */
				if ((job_ptr->qos_ptr &&
				     (job_ptr->qos_ptr->flags &
				      QOS_FLAG_USAGE_FACTOR_SAFE) &&
				     (job_ptr->qos_ptr->usage_factor >= 0)) &&
				    ((time_limit != INFINITE) ||
				     (job_ptr->qos_ptr->usage_factor < 1.0))) {
					time_limit *=
						job_ptr->qos_ptr->usage_factor;
				}
			}

			if (wall_mins >= assoc_ptr->grp_wall) {
				xfree(job_ptr->state_desc);
				job_ptr->state_reason = WAIT_ASSOC_GRP_WALL;
				debug2("%pJ being held, assoc %u is at or exceeds group wall limit %u with %u for account %s",
				       job_ptr, assoc_ptr->id,
				       assoc_ptr->grp_wall,
				       wall_mins, assoc_ptr->acct);
				rc = false;
				goto end_it;
			} else if (safe_limits &&
				   ((wall_mins + time_limit) >
				    assoc_ptr->grp_wall)) {
				xfree(job_ptr->state_desc);
				job_ptr->state_reason = WAIT_ASSOC_GRP_WALL;
				debug2("%pJ being held, the job request with assoc %u will exceed group wall limit %u if ran with %u for account %s",
				       job_ptr, assoc_ptr->id,
				       assoc_ptr->grp_wall,
				       wall_mins + time_limit, assoc_ptr->acct);
				rc = false;
				goto end_it;
			}
		}

		/*
		 * We don't need to look at the regular limits for parents
		 * since we have pre-propagated them, so just continue with
		 * the next parent.
		 */
		if (parent) {
			assoc_ptr = assoc_ptr->usage->parent_assoc_ptr;
			continue;
		}

		/* we don't need to check max_cpu_mins_pj here */

		/* we don't need to check max_cpus_pj here */

		if ((qos_rec.max_jobs_pa == INFINITE) &&
		    (qos_rec.max_jobs_pu == INFINITE) &&
		    (assoc_ptr->max_jobs != INFINITE) &&
		    (assoc_ptr->usage->used_jobs >= assoc_ptr->max_jobs)) {
			xfree(job_ptr->state_desc);
			job_ptr->state_reason = WAIT_ASSOC_MAX_JOBS;
			debug2("%pJ being held, assoc %u is at or exceeds max jobs limit %u with %u for account %s",
			       job_ptr, assoc_ptr->id,
			       assoc_ptr->max_jobs,
			       assoc_ptr->usage->used_jobs, assoc_ptr->acct);
			rc = false;
			goto end_it;
		}

		/* we don't need to check submit_jobs here */

		/*
		 * if the association limits have changed since job
		 * submission and job can not run, then kill it
		 */
		if ((job_ptr->limit_set.time != ADMIN_SET_LIMIT)
		    && (qos_rec.max_wall_pj == INFINITE)
		    && (assoc_ptr->max_wall_pj != INFINITE)) {
			if (time_limit == NO_VAL) {
				time_limit = job_ptr->time_limit;
				_set_time_limit(&time_limit,
						job_ptr->part_ptr->max_time,
						assoc_ptr->max_wall_pj,
						&job_ptr->limit_set.time);

				/* Account for usage factor, if necessary */
				if ((job_ptr->qos_ptr &&
				     (job_ptr->qos_ptr->flags &
				      QOS_FLAG_USAGE_FACTOR_SAFE) &&
				     (job_ptr->qos_ptr->usage_factor >= 0)) &&
				    ((time_limit != INFINITE) ||
				     (job_ptr->qos_ptr->usage_factor < 1.0))) {
					time_limit *=
						job_ptr->qos_ptr->usage_factor;
				}
			}

			if (time_limit > assoc_ptr->max_wall_pj) {
				xfree(job_ptr->state_desc);
				job_ptr->state_reason =
					WAIT_ASSOC_MAX_WALL_PER_JOB;
				debug2("%pJ being held, time limit %u exceeds account max %u",
				       job_ptr, job_ptr->time_limit,
				       time_limit);
				rc = false;
				goto end_it;
			}
		}

		assoc_ptr = assoc_ptr->usage->parent_assoc_ptr;
		parent = 1;
	}
end_it:
	if (!assoc_mgr_locked)
		assoc_mgr_unlock(&locks);
	slurmdb_free_qos_rec_members(&qos_rec);

	return rc;
}

/*
 * acct_policy_job_runnable_post_select - After nodes have been
 *	selected for the job verify the counts don't exceed aggregated limits.
 */
extern bool acct_policy_job_runnable_post_select(job_record_t *job_ptr,
						 uint64_t *tres_req_cnt,
						 bool assoc_mgr_locked)
{
	slurmdb_qos_rec_t *qos_ptr_1, *qos_ptr_2;
	slurmdb_qos_rec_t qos_rec;
	slurmdb_assoc_rec_t *assoc_ptr;
	uint64_t grp_tres_ctld[slurmctld_tres_cnt];
	uint64_t max_tres_ctld[slurmctld_tres_cnt];
	uint64_t tres_usage_mins[slurmctld_tres_cnt];
	uint64_t tres_run_mins[slurmctld_tres_cnt];
	uint64_t job_tres_time_limit[slurmctld_tres_cnt];
	uint64_t orig_node_cnt;
	uint32_t time_limit;
	bool rc = true;
	bool safe_limits = false;
	int i, tres_pos = 0;
	acct_policy_tres_usage_t tres_usage;
	double usage_factor = 1.0;
	double limit_factor = -1.0;
	int parent = 0; /* flag to tell us if we are looking at the
			 * parent or not
			 */
	assoc_mgr_lock_t locks =
		{ .assoc = READ_LOCK, .qos = WRITE_LOCK, .tres = READ_LOCK };

	xassert(job_ptr);
	xassert(job_ptr->part_ptr);
	xassert(tres_req_cnt);

	/* check to see if we are enforcing associations */
	if (!accounting_enforce)
		return true;

	/* probably don't need to check this here */
	/* if (!_valid_job_assoc(job_ptr)) { */
	/* 	job_ptr->state_reason = FAIL_ACCOUNT; */
	/* 	return false; */
	/* } */

	/* now see if we are enforcing limits */
	if (!(accounting_enforce & ACCOUNTING_ENFORCE_LIMITS))
		return true;

	/* check to see if we should be using safe limits, if so we
	 * will only start a job if there are sufficient remaining
	 * cpu-minutes for it to run to completion */
	if (accounting_enforce & ACCOUNTING_ENFORCE_SAFE)
		safe_limits = true;

	/* clear old state reason */
	if (job_state_reason_check(job_ptr->state_reason, JSR_QOS_ASSOC)) {
		xfree(job_ptr->state_desc);
		job_ptr->state_reason = WAIT_NO_REASON;
	}

	job_ptr->qos_blocking_ptr = NULL;

	/* clang needs this memset to avoid a warning */
	memset(tres_run_mins, 0, sizeof(tres_run_mins));
	memset(tres_usage_mins, 0, sizeof(tres_usage_mins));
	memset(job_tres_time_limit, 0, sizeof(job_tres_time_limit));

	time_limit = job_ptr->time_limit;
	_set_time_limit(&time_limit, job_ptr->part_ptr->max_time,
			job_ptr->part_ptr->default_time, NULL);

	if (job_ptr->qos_ptr) {
		usage_factor = job_ptr->qos_ptr->usage_factor;

		if ((usage_factor >= 0) &&
		    (job_ptr->qos_ptr->flags & QOS_FLAG_USAGE_FACTOR_SAFE) &&
		    ((time_limit != INFINITE) || (usage_factor < 1.0))) {
			time_limit *= usage_factor;
		}
	}

	for (i=0; i<slurmctld_tres_cnt; i++)
		job_tres_time_limit[i] = (uint64_t)time_limit * tres_req_cnt[i];

	slurmdb_init_qos_rec(&qos_rec, 0, INFINITE);

	if (!assoc_mgr_locked)
		assoc_mgr_lock(&locks);

	assoc_mgr_set_qos_tres_cnt(&qos_rec);

	acct_policy_set_qos_order(job_ptr, &qos_ptr_1, &qos_ptr_2);

	/* check the first QOS setting it's values in the qos_rec */
	if (qos_ptr_1 &&
	    !(rc = _qos_job_runnable_post_select(job_ptr, qos_ptr_1,
						 &qos_rec, tres_req_cnt,
						 job_tres_time_limit)))
		goto end_it;

	/* If qos_ptr_1 didn't set the value use the 2nd QOS to set the limit */
	if (qos_ptr_2 &&
	    !(rc = _qos_job_runnable_post_select(job_ptr, qos_ptr_2,
						 &qos_rec, tres_req_cnt,
						 job_tres_time_limit)))
		goto end_it;

	if (qos_ptr_1 && !fuzzy_equal(qos_ptr_1->limit_factor, INFINITE))
		limit_factor = qos_ptr_1->limit_factor;
	else if (qos_ptr_2 && !fuzzy_equal(qos_ptr_2->limit_factor, INFINITE))
                limit_factor = qos_ptr_2->limit_factor;

	assoc_ptr = job_ptr->assoc_ptr;
	while (assoc_ptr) {
		for (i = 0; i < slurmctld_tres_cnt; i++) {
			tres_usage_mins[i] =
				(uint64_t)(assoc_ptr->usage->usage_tres_raw[i]
					   / 60);
			tres_run_mins[i] =
				assoc_ptr->usage->grp_used_tres_run_secs[i] /
				60;

			/*
			 * Clear usage if factor is 0 so that jobs can run.
			 * Otherwise multiplying can cause more jobs to be run
			 * than the limit allows (e.g. usagefactor=.5).
			 */
			if (usage_factor == 0.0) {
				tres_usage_mins[i] *= usage_factor;
				tres_run_mins[i] *= usage_factor;
			}

			grp_tres_ctld[i] = assoc_ptr->grp_tres_ctld[i];
			max_tres_ctld[i] = assoc_ptr->max_tres_ctld[i];

			_apply_limit_factor(&grp_tres_ctld[i], limit_factor);
			_apply_limit_factor(&max_tres_ctld[i], limit_factor);
		}

#if _DEBUG
		info("acct_job_limits: %u of %u",
		     assoc_ptr->usage->used_jobs, assoc_ptr->max_jobs);
#endif
		/*
		 * If the association has a GrpCPUMins limit set (and there
		 * is no QOS with GrpCPUMins set) we may hold the job
		 */
		tres_usage = _validate_tres_usage_limits_for_assoc(
			&tres_pos, assoc_ptr->grp_tres_mins_ctld,
			qos_rec.grp_tres_mins_ctld,
			job_tres_time_limit, tres_run_mins,
			tres_usage_mins, job_ptr->limit_set.tres,
			safe_limits);
		switch (tres_usage) {
		case TRES_USAGE_CUR_EXCEEDS_LIMIT:
			xfree(job_ptr->state_desc);
			job_ptr->state_reason = _get_tres_state_reason(
				tres_pos, WAIT_ASSOC_GRP_UNK_MIN);
			debug2("%pJ being held, assoc %u(%s/%s/%s) group max tres(%s) minutes limit of %"PRIu64" is already at or exceeded with %"PRIu64,
			       job_ptr, assoc_ptr->id, assoc_ptr->acct,
			       assoc_ptr->user, assoc_ptr->partition,
			       assoc_mgr_tres_name_array[tres_pos],
			       assoc_ptr->grp_tres_mins_ctld[tres_pos],
			       tres_usage_mins[tres_pos]);
			rc = false;
			goto end_it;
			break;
		case TRES_USAGE_REQ_EXCEEDS_LIMIT:
			xfree(job_ptr->state_desc);
			job_ptr->state_reason = _get_tres_state_reason(
				tres_pos, WAIT_ASSOC_GRP_UNK_MIN);
			debug2("%pJ being held, the job is requesting more than allowed with assoc %u(%s/%s/%s) group max tres(%s) minutes of %"PRIu64" with %"PRIu64,
			       job_ptr, assoc_ptr->id, assoc_ptr->acct,
			       assoc_ptr->user, assoc_ptr->partition,
			       assoc_mgr_tres_name_array[tres_pos],
			       assoc_ptr->grp_tres_mins_ctld[tres_pos],
			       job_tres_time_limit[tres_pos]);
			rc = false;
			goto end_it;
			break;
		case TRES_USAGE_REQ_NOT_SAFE_WITH_USAGE:
			/*
			 * If we're using safe limits start
			 * the job only if there are
			 * sufficient cpu-mins left such that
			 * it will run to completion without
			 * being killed
			 */
			xfree(job_ptr->state_desc);
			job_ptr->state_reason = _get_tres_state_reason(
				tres_pos, WAIT_ASSOC_GRP_UNK_MIN);
			debug2("%pJ being held, the job is at or exceeds assoc %u(%s/%s/%s) group max tres(%s) minutes of %"PRIu64" of which %"PRIu64" are still available but request is for %"PRIu64" (plus %"PRIu64" already in use) tres minutes (request tres count %"PRIu64")",
			       job_ptr, assoc_ptr->id, assoc_ptr->acct,
			       assoc_ptr->user, assoc_ptr->partition,
			       assoc_mgr_tres_name_array[tres_pos],
			       assoc_ptr->grp_tres_mins_ctld[tres_pos],
			       assoc_ptr->grp_tres_mins_ctld[tres_pos] -
			       tres_usage_mins[tres_pos],
			       job_tres_time_limit[tres_pos],
			       tres_run_mins[tres_pos],
			       tres_req_cnt[tres_pos]);
			rc = false;
			goto end_it;
			break;
		case TRES_USAGE_OKAY:
			/* all good */
			break;
		}

		orig_node_cnt = tres_req_cnt[TRES_ARRAY_NODE];
		_get_unique_job_node_cnt(job_ptr,
					 assoc_ptr->usage->grp_node_bitmap,
					 &tres_req_cnt[TRES_ARRAY_NODE]);
		tres_usage = _validate_tres_usage_limits_for_assoc(
			&tres_pos,
			grp_tres_ctld, qos_rec.grp_tres_ctld,
			tres_req_cnt, assoc_ptr->usage->grp_used_tres,
			NULL, job_ptr->limit_set.tres, true);
		tres_req_cnt[TRES_ARRAY_NODE] = orig_node_cnt;
		switch (tres_usage) {
		case TRES_USAGE_CUR_EXCEEDS_LIMIT:
			/* not possible because the curr_usage sent in is NULL*/
			break;
		case TRES_USAGE_REQ_EXCEEDS_LIMIT:
			xfree(job_ptr->state_desc);
			job_ptr->state_reason = _get_tres_state_reason(
				tres_pos, WAIT_ASSOC_GRP_UNK);
			debug2("%pJ is being held, assoc %u(%s/%s/%s) min tres(%s) request %"PRIu64" exceeds group max tres limit %"PRIu64,
			       job_ptr, assoc_ptr->id, assoc_ptr->acct,
			       assoc_ptr->user, assoc_ptr->partition,
			       assoc_mgr_tres_name_array[tres_pos],
			       tres_req_cnt[tres_pos],
			       grp_tres_ctld[tres_pos]);
			rc = false;
			goto end_it;
			break;
		case TRES_USAGE_REQ_NOT_SAFE_WITH_USAGE:
			xfree(job_ptr->state_desc);
			job_ptr->state_reason = _get_tres_state_reason(
				tres_pos, WAIT_ASSOC_GRP_UNK);
			debug2("%pJ being held, if allowed the job request will exceed assoc %u(%s/%s/%s) group max tres(%s) limit %"PRIu64" with already used %"PRIu64" + requested %"PRIu64,
			       job_ptr, assoc_ptr->id, assoc_ptr->acct,
			       assoc_ptr->user, assoc_ptr->partition,
			       assoc_mgr_tres_name_array[tres_pos],
			       grp_tres_ctld[tres_pos],
			       assoc_ptr->usage->grp_used_tres[tres_pos],
			       tres_req_cnt[tres_pos]);
			rc = false;
			goto end_it;
		case TRES_USAGE_OKAY:
			/* all good */
			break;
		}

		/* we don't need to check grp_jobs here */

		tres_usage = _validate_tres_usage_limits_for_assoc(
			&tres_pos,
			assoc_ptr->grp_tres_run_mins_ctld,
			qos_rec.grp_tres_run_mins_ctld,
			job_tres_time_limit, tres_run_mins, NULL, NULL, true);
		switch (tres_usage) {
		case TRES_USAGE_CUR_EXCEEDS_LIMIT:
			/* not possible because the curr_usage sent in is NULL*/
			break;
		case TRES_USAGE_REQ_EXCEEDS_LIMIT:
			xfree(job_ptr->state_desc);
			job_ptr->state_reason = _get_tres_state_reason(
				tres_pos, WAIT_ASSOC_GRP_UNK_RUN_MIN);
			debug2("%pJ is being held, assoc %u(%s/%s/%s) group max running tres(%s) minutes request %"PRIu64" exceeds limit %"PRIu64,
			       job_ptr, assoc_ptr->id, assoc_ptr->acct,
			       assoc_ptr->user, assoc_ptr->partition,
			       assoc_mgr_tres_name_array[tres_pos],
			       job_tres_time_limit[tres_pos],
			       assoc_ptr->grp_tres_run_mins_ctld[tres_pos]);
			rc = false;
			goto end_it;
			break;
		case TRES_USAGE_REQ_NOT_SAFE_WITH_USAGE:
			xfree(job_ptr->state_desc);
			job_ptr->state_reason = _get_tres_state_reason(
				tres_pos, WAIT_ASSOC_GRP_UNK_RUN_MIN);
			debug2("%pJ being held, if allowed the job request will exceed assoc %u(%s/%s/%s) group max running tres(%s) minutes limit %"PRIu64" with already used %"PRIu64" + requested %"PRIu64,
			       job_ptr, assoc_ptr->id, assoc_ptr->acct,
			       assoc_ptr->user, assoc_ptr->partition,
			       assoc_mgr_tres_name_array[tres_pos],
			       assoc_ptr->grp_tres_run_mins_ctld[tres_pos],
			       tres_run_mins[tres_pos],
			       job_tres_time_limit[tres_pos]);
			rc = false;
			goto end_it;
			break;
		case TRES_USAGE_OKAY:
			/* all good */
			break;
		}

		/* we don't need to check submit_jobs here */

		/* we don't need to check grp_wall here */


		/* We don't need to look at the regular limits for
		 * parents since we have pre-propagated them, so just
		 * continue with the next parent
		 */
		if (parent) {
			assoc_ptr = assoc_ptr->usage->parent_assoc_ptr;
			continue;
		}

		if (!_validate_tres_limits_for_assoc(
			    &tres_pos, job_tres_time_limit, 0,
			    assoc_ptr->max_tres_mins_ctld,
			    qos_rec.max_tres_mins_pj_ctld,
			    job_ptr->limit_set.tres,
			    1, 0, 1)) {
			xfree(job_ptr->state_desc);
			job_ptr->state_reason = _get_tres_state_reason(
				tres_pos, WAIT_ASSOC_MAX_UNK_MINS_PER_JOB);
			debug2("%pJ being held, the job is requesting more than allowed with assoc %u(%s/%s/%s) max tres(%s) minutes of %"PRIu64" with %"PRIu64,
			       job_ptr, assoc_ptr->id, assoc_ptr->acct,
			       assoc_ptr->user, assoc_ptr->partition,
			       assoc_mgr_tres_name_array[tres_pos],
			       assoc_ptr->max_tres_mins_ctld[tres_pos],
			       job_tres_time_limit[tres_pos]);
			rc = false;
			goto end_it;
		}

		if (!_validate_tres_limits_for_assoc(
			    &tres_pos, tres_req_cnt, 0,
			    max_tres_ctld,
			    qos_rec.max_tres_pj_ctld,
			    job_ptr->limit_set.tres,
			    1, 0, 1)) {
			xfree(job_ptr->state_desc);
			job_ptr->state_reason = _get_tres_state_reason(
				tres_pos, WAIT_ASSOC_MAX_UNK_PER_JOB);
			debug2("%pJ is being held, the job is requesting more than allowed with assoc %u(%s/%s/%s) max tres(%s) limit of %"PRIu64" with %"PRIu64,
			       job_ptr, assoc_ptr->id, assoc_ptr->acct,
			       assoc_ptr->user, assoc_ptr->partition,
			       assoc_mgr_tres_name_array[tres_pos],
			       max_tres_ctld[tres_pos],
			       tres_req_cnt[tres_pos]);
			rc = false;
			break;
		}

		if (!_validate_tres_limits_for_assoc(
			    &tres_pos, tres_req_cnt,
			    tres_req_cnt[TRES_ARRAY_NODE],
			    assoc_ptr->max_tres_pn_ctld,
			    qos_rec.max_tres_pn_ctld,
			    job_ptr->limit_set.tres,
			    1, 0, 1)) {
			xfree(job_ptr->state_desc);
			job_ptr->state_reason = _get_tres_state_reason(
				tres_pos, WAIT_ASSOC_MAX_UNK_PER_NODE);
			debug2("%pJ is being held, the job is requesting more than allowed with assoc %u(%s/%s/%s) max tres(%s) per node limit of %"PRIu64" with %"PRIu64,
			       job_ptr, assoc_ptr->id, assoc_ptr->acct,
			       assoc_ptr->user, assoc_ptr->partition,
			       assoc_mgr_tres_name_array[tres_pos],
			       assoc_ptr->max_tres_pn_ctld[tres_pos],
			       tres_req_cnt[tres_pos]);
			rc = false;
			break;
		}

		/* we do not need to check max_jobs here */

		/* we don't need to check submit_jobs here */

		/* we don't need to check max_wall_pj here */

		assoc_ptr = assoc_ptr->usage->parent_assoc_ptr;
		parent = 1;
	}
end_it:
	if (!assoc_mgr_locked)
		assoc_mgr_unlock(&locks);
	slurmdb_free_qos_rec_members(&qos_rec);

	FREE_NULL_BITMAP(job_ptr->node_bitmap_preempt);

	return rc;
}

extern uint32_t acct_policy_get_max_nodes(job_record_t *job_ptr,
					  uint32_t *wait_reason)
{
	uint64_t max_nodes_limit = INFINITE64, qos_max_p_limit = INFINITE64,
		grp_nodes = INFINITE64;
	assoc_mgr_lock_t locks = { .assoc = READ_LOCK, .qos = READ_LOCK };
	slurmdb_qos_rec_t *qos_ptr_1, *qos_ptr_2;
	slurmdb_assoc_rec_t *assoc_ptr = job_ptr->assoc_ptr;
	bool parent = 0; /* flag to tell us if we are looking at the
			  * parent or not
			  */
	bool grp_set = 0;
	double limit_factor = -1.0;

	/* check to see if we are enforcing associations */
	if (!(accounting_enforce & ACCOUNTING_ENFORCE_LIMITS))
		return max_nodes_limit;

	xassert(wait_reason);

	assoc_mgr_lock(&locks);

	acct_policy_set_qos_order(job_ptr, &qos_ptr_1, &qos_ptr_2);

	if (qos_ptr_1) {
		uint64_t max_nodes_pj =
			qos_ptr_1->max_tres_pj_ctld[TRES_ARRAY_NODE];
		uint64_t max_nodes_pu =
			qos_ptr_1->max_tres_pu_ctld[TRES_ARRAY_NODE];
		uint64_t max_nodes_pa =
			qos_ptr_1->max_tres_pa_ctld[TRES_ARRAY_NODE];

		grp_nodes = qos_ptr_1->grp_tres_ctld[TRES_ARRAY_NODE];

		if (!fuzzy_equal(qos_ptr_1->limit_factor, INFINITE))
			limit_factor = qos_ptr_1->limit_factor;

		if (qos_ptr_2) {
			if (max_nodes_pa == INFINITE64)
				max_nodes_pa = qos_ptr_2->max_tres_pa_ctld[
					TRES_ARRAY_NODE];
			if (max_nodes_pj == INFINITE64)
				max_nodes_pj = qos_ptr_2->max_tres_pj_ctld[
					TRES_ARRAY_NODE];
			if (max_nodes_pu == INFINITE64)
				max_nodes_pu = qos_ptr_2->max_tres_pu_ctld[
					TRES_ARRAY_NODE];
			if (grp_nodes == INFINITE64)
				grp_nodes = qos_ptr_2->grp_tres_ctld[
					TRES_ARRAY_NODE];
			if ((limit_factor == -1.0) &&
			    !fuzzy_equal(qos_ptr_1->limit_factor, INFINITE))
				limit_factor = qos_ptr_2->limit_factor;
		}

		if (max_nodes_pa < max_nodes_limit) {
			max_nodes_limit = max_nodes_pa;
			*wait_reason = WAIT_QOS_MAX_NODE_PER_ACCT;
		}

		if (max_nodes_pj < max_nodes_limit) {
			max_nodes_limit = max_nodes_pj;
			*wait_reason = WAIT_QOS_MAX_NODE_PER_JOB;
		}

		if (max_nodes_pu < max_nodes_limit) {
			max_nodes_limit = max_nodes_pu;
			*wait_reason = WAIT_QOS_MAX_NODE_PER_USER;
		}

		qos_max_p_limit = max_nodes_limit;

		if (grp_nodes < max_nodes_limit) {
			max_nodes_limit = grp_nodes;
			*wait_reason = WAIT_QOS_GRP_NODE;
		}
	}

	/* We have to traverse all the associations because QOS might
	   not override a particular limit.
	*/
	while (assoc_ptr) {
		uint64_t node_limit = assoc_ptr->grp_tres_ctld[TRES_ARRAY_NODE];

		_apply_limit_factor(&node_limit, limit_factor);

		if ((!qos_ptr_1 || (grp_nodes == INFINITE64))
		    && (node_limit != INFINITE64)
		    && (node_limit < max_nodes_limit)) {
			max_nodes_limit = node_limit;
			*wait_reason = WAIT_ASSOC_GRP_NODE;
			grp_set = 1;
		}

		node_limit = assoc_ptr->max_tres_ctld[TRES_ARRAY_NODE];

		_apply_limit_factor(&node_limit, limit_factor);
		if (!parent
		    && (qos_max_p_limit == INFINITE64)
		    && (node_limit != INFINITE64)
		    && (node_limit < max_nodes_limit)) {
			max_nodes_limit = node_limit;
			*wait_reason = WAIT_ASSOC_MAX_NODE_PER_JOB;
		}

		/* only check the first grp set */
		if (grp_set)
			break;

		assoc_ptr = assoc_ptr->usage->parent_assoc_ptr;
		parent = 1;
		continue;
	}

	assoc_mgr_unlock(&locks);
	return max_nodes_limit;
}

/*
 * acct_policy_update_pending_job - Make sure the limits imposed on a job on
 *	submission are correct after an update to a qos or association.  If
 *	the association/qos limits prevent the job from running (lowered
 *	limits since job submission), then reset its reason field.
 */
extern int acct_policy_update_pending_job(job_record_t *job_ptr)
{
	job_desc_msg_t job_desc;
	acct_policy_limit_set_t acct_policy_limit_set;
	bool update_accounting = false;
	job_details_t *details_ptr;
	int rc = SLURM_SUCCESS;
	uint64_t tres_req_cnt[slurmctld_tres_cnt];

	/* check to see if we are enforcing associations and the job
	 * is pending or if we are even enforcing limits. */
	if (!accounting_enforce || !IS_JOB_PENDING(job_ptr)
	    || !(accounting_enforce & ACCOUNTING_ENFORCE_LIMITS))
		return SLURM_SUCCESS;

	details_ptr = job_ptr->details;

	if (!details_ptr) {
		error("acct_policy_update_pending_job: no details");
		return SLURM_ERROR;
	}

	/* set up the job desc to make sure things are the way we
	 * need.
	 */
	slurm_init_job_desc_msg(&job_desc);

	/* copy the limits set from the job the only one that
	 * acct_policy_validate changes is the time limit so we
	 * should be ok with the memcpy here */
	memcpy(&acct_policy_limit_set, &job_ptr->limit_set,
	       sizeof(acct_policy_limit_set_t));
	job_desc.tres_req_cnt = tres_req_cnt;
	/* copy all the tres requests over */
	memcpy(job_desc.tres_req_cnt, job_ptr->tres_req_cnt,
	       sizeof(uint64_t) * slurmctld_tres_cnt);

	/* Only set this value if not set from a limit */
	if (job_ptr->limit_set.time == ADMIN_SET_LIMIT)
		acct_policy_limit_set.time = job_ptr->limit_set.time;
	else if ((job_ptr->time_limit != NO_VAL) && !job_ptr->limit_set.time)
		job_desc.time_limit = job_ptr->time_limit;

	if (!acct_policy_validate(&job_desc, job_ptr->part_ptr,
				  job_ptr->part_ptr_list,
				  job_ptr->assoc_ptr, job_ptr->qos_ptr,
				  &job_ptr->state_reason,
				  &acct_policy_limit_set, 0)) {
		info("%s: exceeded association/qos's cpu, node, memory or time limit for %pJ",
		     __func__, job_ptr);
		return SLURM_ERROR;
	}

	/* The only variable in acct_policy_limit_set that is changed
	 * in acct_policy_validate is the time limit so only worry
	 * about that one.
	 */

	/* If it isn't an admin set limit replace it. */
	if (!acct_policy_limit_set.time && (job_ptr->limit_set.time == 1)) {
		job_ptr->time_limit = NO_VAL;
		job_ptr->limit_set.time = 0;
		update_accounting = true;
	} else if (acct_policy_limit_set.time != ADMIN_SET_LIMIT) {
		if (job_ptr->time_limit != job_desc.time_limit) {
			job_ptr->time_limit = job_desc.time_limit;
			update_accounting = true;
		}
		job_ptr->limit_set.time = acct_policy_limit_set.time;
	}

	if (update_accounting) {
		last_job_update = time(NULL);
		debug("limits changed for %pJ: updating accounting", job_ptr);
		/* Update job record in accounting to reflect changes */
		jobacct_storage_g_job_start(acct_db_conn, job_ptr);
	}

	return rc;
}

/*
 * acct_policy_job_runnable - Determine if the specified job has timed
 *	out based on it's QOS or association.
 */
extern bool acct_policy_job_time_out(job_record_t *job_ptr)
{
	uint64_t job_tres_usage_mins[slurmctld_tres_cnt];
	uint64_t time_delta;
	uint64_t tres_usage_mins[slurmctld_tres_cnt];
	uint32_t wall_mins, orig_node_cnt;
	slurmdb_qos_rec_t *qos_ptr_1, *qos_ptr_2;
	slurmdb_qos_rec_t qos_rec;
	slurmdb_assoc_rec_t *assoc = NULL;
	assoc_mgr_lock_t locks =
		{ .assoc = READ_LOCK, .qos = WRITE_LOCK, .tres = READ_LOCK };
	time_t now;
	int i, tres_pos = 0;
	acct_policy_tres_usage_t tres_usage;

	/*
	 * Now see if we are enforcing limits.  If Safe is set then
	 * return false as well since we are being safe if the limit
	 * was changed after the job was already deemed safe to start.
	 */
	if (!(accounting_enforce & ACCOUNTING_ENFORCE_LIMITS)
	    || (accounting_enforce & ACCOUNTING_ENFORCE_SAFE))
		return false;

	slurmdb_init_qos_rec(&qos_rec, 0, INFINITE);
	assoc_mgr_lock(&locks);

	assoc_mgr_set_qos_tres_cnt(&qos_rec);

	acct_policy_set_qos_order(job_ptr, &qos_ptr_1, &qos_ptr_2);

	assoc =	job_ptr->assoc_ptr;

	now = time(NULL);

	time_delta = (uint64_t)(((now - job_ptr->start_time) -
				 job_ptr->tot_sus_time) / 60);

	/* clang needs this memset to avoid a warning */
	memset(job_tres_usage_mins, 0, sizeof(tres_usage_mins));
	memset(tres_usage_mins, 0, sizeof(tres_usage_mins));

	/*
	 * find out how many CPU minutes this job has been running for.
	 * We add 1 here to make it so we can check for just > instead of
	 * >= in our checks.
	 */
	for (i = 0; i < slurmctld_tres_cnt; i++) {
		if (i == TRES_ARRAY_ENERGY)
			continue;
		if (job_ptr->tres_alloc_cnt[i] == NO_CONSUME_VAL64)
			continue;

		if (job_ptr->tres_alloc_cnt[i]) {
			job_tres_usage_mins[i] =
				(time_delta * job_ptr->tres_alloc_cnt[i]) + 1;
		}
	}

	/* check the first QOS setting it's values in the qos_rec */
	if (qos_ptr_1 && !_qos_job_time_out(job_ptr, qos_ptr_1,
					    &qos_rec, job_tres_usage_mins))
		goto job_failed;

	/* If qos_ptr_1 didn't set the value use the 2nd QOS to set the limit */
	if (qos_ptr_2 && !_qos_job_time_out(job_ptr, qos_ptr_2,
					    &qos_rec, job_tres_usage_mins))
		goto job_failed;

	/* handle any association stuff here */
	while (assoc) {
		for (i = 0; i < slurmctld_tres_cnt; i++)
			tres_usage_mins[i] =
				(uint64_t)(assoc->usage->usage_tres_raw[i]
					   / 60.0);
		wall_mins = assoc->usage->grp_used_wall / 60;

		tres_usage = _validate_tres_usage_limits_for_assoc(
			&tres_pos, assoc->grp_tres_mins_ctld,
			qos_rec.grp_tres_mins_ctld, job_tres_usage_mins,
			NULL, tres_usage_mins, NULL, false);
		switch (tres_usage) {
		case TRES_USAGE_CUR_EXCEEDS_LIMIT:
			last_job_update = now;
			info("%pJ timed out, the job is at or exceeds assoc %u(%s/%s/%s) group max tres(%s) minutes of %"PRIu64" with %"PRIu64,
			     job_ptr, assoc->id, assoc->acct,
			     assoc->user, assoc->partition,
			     assoc_mgr_tres_name_array[tres_pos],
			     assoc->grp_tres_mins_ctld[tres_pos],
			     tres_usage_mins[tres_pos]);
			job_ptr->state_reason = FAIL_TIMEOUT;
			xfree(job_ptr->state_desc);
			xstrfmtcat(job_ptr->state_desc, "Job is at or exceeds association (acc=%s/user=%s/part=%s) group max TRES(%s) minutes of %"PRIu64" with %"PRIu64,
				   assoc->acct, assoc->user, assoc->partition,
				   assoc_mgr_tres_name_array[tres_pos],
				   assoc->grp_tres_mins_ctld[tres_pos],
				   tres_usage_mins[tres_pos]);
			goto job_failed;
			break;
		case TRES_USAGE_REQ_EXCEEDS_LIMIT:
			/* not possible safe_limits is 0 */
		case TRES_USAGE_REQ_NOT_SAFE_WITH_USAGE:
			/* not possible safe_limits is 0 */
		case TRES_USAGE_OKAY:
			/* all good */
			break;
		}

		if ((qos_rec.grp_wall == INFINITE)
		    && (assoc->grp_wall != INFINITE)
		    && (wall_mins >= assoc->grp_wall)) {
			info("%pJ timed out, assoc %u is at or exceeds group wall limit %u with %u for account %s",
			     job_ptr, assoc->id, assoc->grp_wall,
			     wall_mins, assoc->acct);
			job_ptr->state_reason = FAIL_TIMEOUT;
			xfree(job_ptr->state_desc);
			xstrfmtcat(job_ptr->state_desc, "Job is at or exceeds association (acc=%s/user=%s/part=%s) group wall limit %u with %u",
				   assoc->acct, assoc->user, assoc->partition,
				   assoc->grp_wall, wall_mins);
			break;
		}

		orig_node_cnt = job_tres_usage_mins[TRES_ARRAY_NODE];
		job_tres_usage_mins[TRES_ARRAY_NODE] = 0;
		tres_usage = _validate_tres_usage_limits_for_assoc(
			&tres_pos, assoc->max_tres_mins_ctld,
			qos_rec.max_tres_mins_pj_ctld, job_tres_usage_mins,
			NULL, NULL, NULL, true);
		job_tres_usage_mins[TRES_ARRAY_NODE] = orig_node_cnt;
		switch (tres_usage) {
		case TRES_USAGE_CUR_EXCEEDS_LIMIT:
			/* not possible curr_usage is NULL */
			break;
		case TRES_USAGE_REQ_EXCEEDS_LIMIT:
			last_job_update = now;
			info("%pJ timed out, the job is at or exceeds assoc %u(%s/%s/%s) max tres(%s) minutes of %"PRIu64" with %"PRIu64,
			     job_ptr, assoc->id, assoc->acct,
			     assoc->user, assoc->partition,
			     assoc_mgr_tres_name_array[tres_pos],
			     assoc->max_tres_mins_ctld[tres_pos],
			     job_tres_usage_mins[tres_pos]);
			job_ptr->state_reason = FAIL_TIMEOUT;
			xfree(job_ptr->state_desc);
			xstrfmtcat(job_ptr->state_desc, "Job is at or exceeds association (acc=%s/user=%s/part=%s) max TRES(%s) minutes of %"PRIu64" with %"PRIu64,
				   assoc->acct, assoc->user, assoc->partition,
				   assoc_mgr_tres_name_array[tres_pos],
				   assoc->max_tres_mins_ctld[tres_pos],
				   job_tres_usage_mins[tres_pos]);
			goto job_failed;
			break;
		case TRES_USAGE_REQ_NOT_SAFE_WITH_USAGE:
			/* not possible tres_usage is NULL */
		case TRES_USAGE_OKAY:
			/* all good */
			break;
		}

		assoc = assoc->usage->parent_assoc_ptr;
		/* these limits don't apply to the root assoc */
		if (assoc == assoc_mgr_root_assoc)
			break;
	}
job_failed:
	assoc_mgr_unlock(&locks);
	slurmdb_free_qos_rec_members(&qos_rec);

	if (job_ptr->state_reason == FAIL_TIMEOUT)
		return true;

	return false;
}

static void _get_accrue_limits(acct_policy_accrue_t *acct_policy_accrue,
			       uint32_t *max_jobs_accrue_ptr,
			       int *create_cnt_ptr)
{
	job_record_t *job_ptr = acct_policy_accrue->job_ptr;
	slurmdb_assoc_rec_t *assoc_ptr;
	bool parent = false;

	xassert(verify_assoc_lock(ASSOC_LOCK, WRITE_LOCK));
	xassert(verify_assoc_lock(QOS_LOCK, WRITE_LOCK));

	if (job_ptr->qos_ptr) {
		_fill_in_qos_used_limits(job_ptr->qos_ptr, acct_policy_accrue);

		/* Find the most restrictive qos limit */
		_get_accrue_create_cnt(max_jobs_accrue_ptr, create_cnt_ptr,
				       job_ptr->qos_ptr->grp_jobs_accrue,
				       job_ptr->qos_ptr->usage->accrue_cnt);
		if (acct_policy_accrue->used_limits_acct)
			_get_accrue_create_cnt(
				max_jobs_accrue_ptr, create_cnt_ptr,
				job_ptr->qos_ptr->max_jobs_accrue_pa,
				acct_policy_accrue->used_limits_acct->
				accrue_cnt);

		if (acct_policy_accrue->used_limits_user)
			_get_accrue_create_cnt(
				max_jobs_accrue_ptr, create_cnt_ptr,
				job_ptr->qos_ptr->max_jobs_accrue_pu,
				acct_policy_accrue->used_limits_user->
				accrue_cnt);
	}

	assoc_ptr = job_ptr->assoc_ptr;
	while (assoc_ptr) {
		/*
		 * Find the first limit whether it be from the qos above or in
		 * the hierarchy.
		 */
		if (*max_jobs_accrue_ptr != INFINITE)
			break;

		_get_accrue_create_cnt(max_jobs_accrue_ptr, create_cnt_ptr,
				       assoc_ptr->grp_jobs_accrue,
				       assoc_ptr->usage->accrue_cnt);
		/*
		 * We don't need to look at the regular limits for
		 * parents since we have pre-propagated them, so just
		 * continue with the next parent
		 */
		if (!parent)
			_get_accrue_create_cnt(max_jobs_accrue_ptr,
					       create_cnt_ptr,
					       assoc_ptr->max_jobs_accrue,
					       assoc_ptr->usage->accrue_cnt);

		/* now go up the hierarchy */
		assoc_ptr = assoc_ptr->usage->parent_assoc_ptr;
		parent = true;
	}

}

static void _handle_add_accrue(acct_policy_accrue_t *acct_policy_accrue)
{
	job_record_t *job_ptr = acct_policy_accrue->job_ptr;
	job_details_t *details_ptr = job_ptr->details;
	job_record_t *old_job_ptr;
	uint32_t max_jobs_accrue = INFINITE;

	_get_accrue_limits(acct_policy_accrue, &max_jobs_accrue,
			   &acct_policy_accrue->cnt);

	/* No limit (or there is space to accrue) */
	if ((max_jobs_accrue == INFINITE) ||
	    (acct_policy_accrue->cnt &&
	     (!job_ptr->array_recs || !job_ptr->array_recs->task_cnt))) {
		if (!details_ptr->accrue_time &&
		    job_ptr->details->begin_time) {
			/*
			 * If no limit and begin_time hasn't happened yet
			 * then set accrue_time to now.
			 */
			details_ptr->accrue_time =
				((max_jobs_accrue == INFINITE) &&
				 details_ptr->begin_time) ?
				details_ptr->begin_time : time(NULL);

			/*
			 * If we have an array here and no limit we want to add
			 * all the tasks in the array.
			 */
			if (job_ptr->array_recs &&
			    job_ptr->array_recs->task_cnt)
				acct_policy_accrue->cnt =
					job_ptr->array_recs->task_cnt;
			else
				acct_policy_accrue->cnt = 1;

			_add_accrue_time_internal(job_ptr->qos_ptr,
						  acct_policy_accrue);
		}

		return;
	}

	/* Looks like we are at the limit */
	if (!acct_policy_accrue->cnt) {
		log_flag(ACCRUE, "%s: %pJ can't accrue, we are over a limit",
			 __func__, job_ptr);
		return;
	}

	acct_policy_accrue->cnt = MIN(acct_policy_accrue->cnt,
				      job_ptr->array_recs->task_cnt);

	/* How many can we spin off? */
	for (int i = 0; i < acct_policy_accrue->cnt; i++) {
		/*
		 * After we split off the old_job_ptr is what we want to alter
		 * as the job_ptr returned from job_array_post_sched will be the
		 * master job_ptr for the array and we will use that to split
		 * more off if needed.
		 */
		old_job_ptr = job_ptr;

		job_array_pre_sched(job_ptr);
		job_ptr = job_array_post_sched(job_ptr, true);

		details_ptr = old_job_ptr->details;
		if (!details_ptr) {
			fatal_abort("%s: no details after split", __func__);
			return;
		}
		details_ptr->accrue_time = acct_policy_accrue->now;
		log_flag(ACCRUE, "%pJ is now accruing time %ld",
			 old_job_ptr, acct_policy_accrue->now);
	}

	/*
	 * Here we are ok to use all the same pointers from the main job_ptr as
	 * an array will always have the same pointers.  If this ever changes in
	 * the future some how we will need to address it.
	 */
	_add_accrue_time_internal(job_ptr->qos_ptr, acct_policy_accrue);
}

static void _handle_accrue_time(acct_policy_accrue_t *acct_policy_accrue)
{
	job_record_t *job_ptr = acct_policy_accrue->job_ptr;

	/* We have started running, let's clear us out of the mix. */
	if (job_ptr->details->accrue_time) {
		if (!(job_ptr->bit_flags & JOB_ACCRUE_OVER) &&
		    !IS_JOB_PENDING(job_ptr)) {
			/*
			 * Normally only single jobs come in here, but if we
			 * don't have any limits and an array is cancelled the
			 * array itself comes in so we need to remove all of it.
			 */

			if (job_ptr->array_recs &&
			    job_ptr->array_recs->task_cnt)
				acct_policy_accrue->cnt =
					job_ptr->array_recs->task_cnt;
			else
				acct_policy_accrue->cnt = 1;

			/* We only want to handle this once */
			job_ptr->bit_flags |= JOB_ACCRUE_OVER;

			(void) _for_each_qos_remove_accrue_time(
				job_ptr->qos_ptr, acct_policy_accrue);
		}

		/* We already have our time and we aren't an array, endit */
		if (!IS_JOB_PENDING(job_ptr) ||
		    !job_ptr->array_recs || !job_ptr->array_recs->task_cnt)
			return;
	} else if (!IS_JOB_PENDING(job_ptr))
		return;

	_handle_add_accrue(acct_policy_accrue);
}

extern int acct_policy_handle_accrue_time(job_record_t *job_ptr,
					  bool assoc_mgr_locked)
{
	job_details_t *details_ptr;
	int rc = SLURM_SUCCESS;
	time_t now = time(NULL);
	assoc_mgr_lock_t locks = { WRITE_LOCK, NO_LOCK, WRITE_LOCK, NO_LOCK,
				   NO_LOCK, NO_LOCK, NO_LOCK };

	details_ptr = job_ptr->details;
	if (!details_ptr) {
		error("%s: no details", __func__);
		return SLURM_ERROR;
	}

	/*
	 * ACCRUE_ALWAYS flag will always force the accrue_time to be the
	 * submit_time (Not begin).  Accrue limits don't work with this flag.
	 */
	if (slurm_conf.priority_flags & PRIORITY_FLAGS_ACCRUE_ALWAYS) {
		if (!details_ptr->accrue_time)
			details_ptr->accrue_time = details_ptr->submit_time;
		return SLURM_SUCCESS;
	}

	/* Always set accrue_time to begin time when not enforcing limits. */
	if (!(accounting_enforce & ACCOUNTING_ENFORCE_LIMITS)) {
		if (!details_ptr->accrue_time)
			details_ptr->accrue_time = details_ptr->begin_time;
		return SLURM_SUCCESS;
	}

	/*
	 * If the job is not eligible because it is either held, dependent or
	 * because its begin time is in the future don't accrue time.
	 */
	if (!job_ptr->priority || (job_ptr->bit_flags & JOB_DEPENDENT) ||
	    (details_ptr->begin_time && (details_ptr->begin_time > now)))
		return SLURM_SUCCESS;

	/* No accrue_time and the job isn't pending, bail */
	if (!details_ptr->accrue_time && !IS_JOB_PENDING(job_ptr))
		return SLURM_SUCCESS;

	if (!assoc_mgr_locked)
		assoc_mgr_lock(&locks);

	if (!job_ptr->assoc_ptr) {
		debug("%s: no assoc_ptr, this usually means the association was removed right after the job (%pJ) was started, but didn't make it to the database before it was removed.",
		      __func__, job_ptr);
		rc = SLURM_ERROR;
	} else {
		slurmdb_qos_rec_t *orig_qos_ptr = job_ptr->qos_ptr;
		acct_policy_accrue_t acct_policy_accrue = {
			.acct = job_ptr->assoc_ptr->acct,
			.assoc_ptr = job_ptr->assoc_ptr,
			.job_ptr = job_ptr,
			.now = now,
			.uid = job_ptr->user_id,
		};

		_set_highest_prio_qos_ptr(job_ptr);
		_handle_accrue_time(&acct_policy_accrue);
		/*
		 * Now that we are done with accrue set things back to the way
		 * it was qos wise. Accrue limits are always based on the
		 * highest priority QOS.
		 */
		if (job_ptr->qos_ptr != orig_qos_ptr) {
			job_ptr->qos_ptr = orig_qos_ptr;
			job_ptr->qos_id = orig_qos_ptr->id;
		}
	}

	if (!assoc_mgr_locked)
		assoc_mgr_unlock(&locks);

	return rc;
}

extern void acct_policy_add_accrue_time(job_record_t *job_ptr,
					bool assoc_mgr_locked)
{
	slurmdb_assoc_rec_t *assoc_ptr;
	assoc_mgr_lock_t locks = { WRITE_LOCK, NO_LOCK, WRITE_LOCK, NO_LOCK,
				   NO_LOCK, NO_LOCK, NO_LOCK };
	job_details_t *details_ptr = job_ptr->details;
	time_t now = time(NULL);
	acct_policy_accrue_t acct_policy_accrue = {
		.assoc_ptr = job_ptr->assoc_ptr,
		.job_ptr = job_ptr,
		.now = now,
		.uid = job_ptr->user_id,
	};

	/*
	 * ACCRUE_ALWAYS flag will always force the accrue_time to be the
	 * submit_time (Not begin). Accrue limits don't work with this flag.
	 */
	if (slurm_conf.priority_flags & PRIORITY_FLAGS_ACCRUE_ALWAYS)
		return;

	/* check to see if we are enforcing limits */
	if (!(accounting_enforce & ACCOUNTING_ENFORCE_LIMITS))
		return;

	/*
	 * If the job is not eligible because it is either held, dependent or
	 * because its begin time is in the future don't accrue time.
	 */
	if (!job_ptr->priority || (job_ptr->bit_flags & JOB_DEPENDENT) ||
	    (details_ptr &&
	     (details_ptr->begin_time && (details_ptr->begin_time > now)))) {
		/*
		 * If the job was previously accruing time (for example,
		 * ACCRUE_ALWAYS could have been on or not having
		 * ACCOUNTING_ENFORCE_LIMITS), we need to remove the accrue_time.
		 */
		if (details_ptr)
			details_ptr->accrue_time = 0;
		return;
	}

	/* Job has to be pending to accrue time. */
	if (!IS_JOB_PENDING(job_ptr))
		return;

	if (!assoc_mgr_locked)
		assoc_mgr_lock(&locks);

	assoc_ptr = job_ptr->assoc_ptr;
	if (!assoc_ptr) {
		debug("%s: no assoc_ptr, this usually means the association was removed right after the job (%pJ) was started, but didn't make it to the database before it was removed.",
		      __func__, job_ptr);
		goto endit;
	}

	acct_policy_accrue.acct = job_ptr->assoc_ptr->acct;

	_set_highest_prio_qos_ptr(job_ptr);
	_handle_add_accrue(&acct_policy_accrue);
endit:
	if (!assoc_mgr_locked)
		assoc_mgr_unlock(&locks);
}

extern void acct_policy_remove_accrue_time(job_record_t *job_ptr,
					   bool assoc_mgr_locked)
{
	assoc_mgr_lock_t locks = { .assoc = WRITE_LOCK, .qos = WRITE_LOCK };
	acct_policy_accrue_t acct_policy_accrue = {
		.uid = job_ptr->user_id,
	};

	/*
	 * ACCRUE_ALWAYS flag will always force the accrue_time to be the
	 * submit_time (Not begin).  Accrue limits don't work with this flag.
	 */
	if (slurm_conf.priority_flags & PRIORITY_FLAGS_ACCRUE_ALWAYS)
		return;

	/* check to see if we are enforcing limits */
	if (!(accounting_enforce & ACCOUNTING_ENFORCE_LIMITS))
		return;

	if (!job_ptr->details || !job_ptr->details->accrue_time)
		return;

	/* Job has to be pending to accrue time. */
	if (!IS_JOB_PENDING(job_ptr))
		return;

	if (!assoc_mgr_locked)
		assoc_mgr_lock(&locks);

	acct_policy_accrue.assoc_ptr = job_ptr->assoc_ptr;
	if (!acct_policy_accrue.assoc_ptr) {
		debug("%s: no assoc_ptr, this usually means the association was removed right after the job (%pJ) was started, but didn't make it to the database before it was removed.",
		      __func__, job_ptr);
		goto end_it;
	}
	acct_policy_accrue.acct = acct_policy_accrue.assoc_ptr->acct;

	/*
	 * Normally only single jobs come in here, but if we don't have any
	 * limits the array itself comes in so we need to add it all.
	 */
	if (job_ptr->array_recs && job_ptr->array_recs->task_cnt)
		acct_policy_accrue.cnt = job_ptr->array_recs->task_cnt;
	else
		acct_policy_accrue.cnt = 1;

	_set_highest_prio_qos_ptr(job_ptr);
	(void) _for_each_qos_remove_accrue_time(
		job_ptr->qos_ptr, &acct_policy_accrue);

	/* reset the job */
	job_ptr->details->accrue_time = 0;
	job_ptr->bit_flags &= ~JOB_ACCRUE_OVER;

end_it:
	if (!assoc_mgr_locked)
		assoc_mgr_unlock(&locks);
}

extern uint32_t acct_policy_get_prio_thresh(job_record_t *job_ptr,
					    bool assoc_mgr_locked)
{
	slurmdb_qos_rec_t *qos_ptr_1, *qos_ptr_2;
	slurmdb_assoc_rec_t *assoc_ptr;
	uint32_t prio_thresh = 0;
	assoc_mgr_lock_t locks = { READ_LOCK, NO_LOCK, READ_LOCK, NO_LOCK,
				   NO_LOCK, NO_LOCK, NO_LOCK };

	/* check to see if we are enforcing limits */
	if (!(accounting_enforce & ACCOUNTING_ENFORCE_LIMITS))
		return 0;

	if (!assoc_mgr_locked)
		assoc_mgr_lock(&locks);

	assoc_ptr = job_ptr->assoc_ptr;
	if (!assoc_ptr) {
		debug("%s: no assoc_ptr, this usually means the association was removed right after the job (%pJ) was started, but didn't make it to the database before it was removed.",
		      __func__, job_ptr);
		goto endit;
	}

	acct_policy_set_qos_order(job_ptr, &qos_ptr_1, &qos_ptr_2);

	if (qos_ptr_1)
		_get_prio_thresh(&prio_thresh, qos_ptr_1->min_prio_thresh);

	if (qos_ptr_2)
		_get_prio_thresh(&prio_thresh, qos_ptr_2->min_prio_thresh);

	_get_prio_thresh(&prio_thresh, assoc_ptr->min_prio_thresh);

endit:
	if (!assoc_mgr_locked)
		assoc_mgr_unlock(&locks);

	return prio_thresh;
}

extern time_t acct_policy_get_preemptable_time(job_record_t *job_ptr)
{
	slurmdb_qos_rec_t *qos_ptr_1, *qos_ptr_2;
	uint32_t min1, min2, conf_min;
	time_t start = job_ptr->start_time;
	xassert(verify_lock(CONF_LOCK, READ_LOCK));
	xassert(verify_lock(JOB_LOCK, READ_LOCK));
	xassert(verify_assoc_lock(QOS_LOCK, READ_LOCK));

	acct_policy_set_qos_order(job_ptr, &qos_ptr_1, &qos_ptr_2);
	min1 = (qos_ptr_1) ? qos_ptr_1->preempt_exempt_time : INFINITE;
	min2 = (qos_ptr_2) ? qos_ptr_2->preempt_exempt_time : INFINITE;
	conf_min = slurm_conf.preempt_exempt_time;

	/* priority: min1 > min2 > conf_min. INFINITE means none. */
	if (min1 != INFINITE)
		return start + min1;
	else if (min2 != INFINITE)
		return start + min2;
	else if (conf_min != INFINITE)
		return start + conf_min;
	else
		return start;
}

extern bool acct_policy_is_job_preempt_exempt(job_record_t *job_ptr)
{
	time_t now = time(0);

	assoc_mgr_lock_t locks = { .qos = READ_LOCK };
	assoc_mgr_lock(&locks);
	time_t preempt_time = acct_policy_get_preemptable_time(job_ptr);
	assoc_mgr_unlock(&locks);

	return now < preempt_time;
}

/*
 * WARNING: Since we only look at the first partition's QOS, this function
 * must only be used in places where we loop over all partitions in the job.
 */
extern void acct_policy_set_qos_order(job_record_t *job_ptr,
				      slurmdb_qos_rec_t **qos_ptr_1,
				      slurmdb_qos_rec_t **qos_ptr_2)
{
	xassert(job_ptr);
	xassert(qos_ptr_1);
	xassert(qos_ptr_2);

	/* Initialize incoming pointers */
	*qos_ptr_1 = NULL;
	*qos_ptr_2 = NULL;

	if (job_ptr->qos_ptr) {
		if (job_ptr->part_ptr && job_ptr->part_ptr->qos_ptr) {
			/*
			 * If the job's QOS has the flag to over ride the
			 * partition then use that otherwise use the
			 * partition's QOS as the king.
			 */
			if (job_ptr->qos_ptr->flags & QOS_FLAG_OVER_PART_QOS) {
				*qos_ptr_1 = job_ptr->qos_ptr;
				*qos_ptr_2 = job_ptr->part_ptr->qos_ptr;
			} else {
				*qos_ptr_1 = job_ptr->part_ptr->qos_ptr;
				*qos_ptr_2 = job_ptr->qos_ptr;
			}

			/*
			 * No reason to look at the same QOS twice, actually
			 * we never want to do that ;).
			 */
			if (*qos_ptr_1 == *qos_ptr_2)
				*qos_ptr_2 = NULL;
		} else
			*qos_ptr_1 = job_ptr->qos_ptr;
	} else if (job_ptr->part_ptr && job_ptr->part_ptr->qos_ptr)
		*qos_ptr_1 = job_ptr->part_ptr->qos_ptr;
}

/*
 * Checks for record in *user_limit_list of user_id if
 * *user_limit_list doesn't exist it will create it, if the user_id
 * record doesn't exist it will add it to the list.
 * In all cases the user record is returned.
 */
extern slurmdb_used_limits_t *acct_policy_get_acct_used_limits(
	list_t **acct_limit_list, char *acct)
{
	slurmdb_used_limits_t *used_limits;

	xassert(acct_limit_list);

	if (!*acct_limit_list)
		*acct_limit_list = list_create(slurmdb_destroy_used_limits);

	if (!(used_limits = list_find_first(*acct_limit_list,
					    _find_used_limits_for_acct,
					    acct))) {
		int i = sizeof(uint64_t) * slurmctld_tres_cnt;

		used_limits = xmalloc(sizeof(slurmdb_used_limits_t));
		used_limits->acct = xstrdup(acct);

		used_limits->tres = xmalloc(i);
		used_limits->tres_run_secs = xmalloc(i);

		list_append(*acct_limit_list, used_limits);
	}

	return used_limits;
}

/*
 * Checks for record in *user_limit_list of user_id if
 * *user_limit_list doesn't exist it will create it, if the user_id
 * record doesn't exist it will add it to the list.
 * In all cases the user record is returned.
 */
extern slurmdb_used_limits_t *acct_policy_get_user_used_limits(
	list_t **user_limit_list, uint32_t user_id)
{
	slurmdb_used_limits_t *used_limits;

	xassert(user_limit_list);

	if (!*user_limit_list)
		*user_limit_list = list_create(slurmdb_destroy_used_limits);

	if (!(used_limits = list_find_first(*user_limit_list,
					    _find_used_limits_for_user,
					    &user_id))) {
		int i = sizeof(uint64_t) * slurmctld_tres_cnt;

		used_limits = xmalloc(sizeof(slurmdb_used_limits_t));
		used_limits->uid = user_id;

		used_limits->tres = xmalloc(i);
		used_limits->tres_run_secs = xmalloc(i);

		list_append(*user_limit_list, used_limits);
	}

	return used_limits;
}
