blob: 7c5bcebc26046337a11fc2edb120c3dc8db412d3 [file] [log] [blame]
#!/usr/bin/env expect
############################################################################
# Purpose: Test of Slurm functionality
# Test that checks if the QOS limits are enforced.
############################################################################
# Copyright (C) SchedMD LLC.
#
# This file is part of Slurm, a resource management program.
# For details, see <https://slurm.schedmd.com/>.
# Please also read the included file: DISCLAIMER.
#
# Slurm is free software; you can redistribute it and/or modify it under
# the terms of the GNU General Public License as published by the Free
# Software Foundation; either version 2 of the License, or (at your option)
# any later version.
#
# Slurm is distributed in the hope that it will be useful, but WITHOUT ANY
# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
# details.
#
# You should have received a copy of the GNU General Public License along
# with Slurm; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
############################################################################
source ./globals
source ./globals_accounting
source ./inc21.30.1
source ./inc21.30.2
source ./inc21.30.3
source ./inc21.30.4
source ./inc21.30.5
source ./inc21.30.6
source ./inc21.30.7
source ./inc21.30.8
source ./inc21.30.9
source ./inc21.30.10
source ./inc21.30.11
source ./inc21.30.12
source ./inc21.30.13
source ./inc21.30.14
source ./inc21.30.15
source ./inc21.30.16
source ./inc21.30.17
if {[param_contains [get_config_param "SchedulerParameters"] "defer"]} {
skip "This test is not compatible with SchedulerParameters containing defer"
}
#
# Check accounting configuration and terminate if limits not enforced.
#
if {[get_config_param "AccountingStorageType"] ne "accounting_storage/slurmdbd"} {
skip "This test can't be run without a usable AccountStorageType"
} elseif {![param_contains [get_config_param "AccountingStorageEnforce"] "limits"]} {
skip "This test can't be run without enforcing limits"
}
if {![is_super_user]} {
skip "Test can only be ran as SlurmUser"
}
# Determine what the selecttype param is
set select_type_parameters [get_config_param "SelectTypeParameters"]
if {[param_contains $select_type_parameters "CR_SOCKET*"]} {
skip "This test is incompatible with CR_SOCKET allocations"
}
if {[param_contains $select_type_parameters "CR_CORE*"]} {
set selectparam 1
}
if {[param_contains $select_type_parameters "CR_ONE_TASK_PER_CORE"]} {
set one_task_pc 1
}
#
# Some tests will not work properly when allocating sockets or whole nodes to
# jobs
#
if {[check_config_select "linear"] || [default_part_exclusive]} {
skip "This test is incompatible with exclusive node allocations"
}
set test_node ""
# Total cpus in test node
set totcpus 0
set nthreads 0
set acct "$test_id\_test_acct"
set user_name ""
set qosname name
set qostest "$test_id\_qostest"
set grn GrpNodes
set grn_num 0
set grcpu GrpCpus
set grcpu_num 0
set grpcpumin GrpCPUMins
set grpcpumin_num 0
# Set grpcpurunmin_num to multiple of CPUs per core to work with most configurations
# Also make sure that it is at least 4 so we can add and subtract from it
set grpcpurunmin GrpCPURunMins
set grpcpurunmin_num 40
set grjobs GrpJobs
set grjobs_num 2
set grpmem GrpMem
set grpmem_num 100
set grsub GrpSubmit
set grsub_num 2
set grpwall GrpWall
set grpwall_num 1
set maxcpu MaxCpus
set maxcpu_num 0
# Set maxcpumin_num to multiple of CPUs per core to work with most configurations
set maxcpumin MaxCPUMins
set maxcpumin_num 0
set maxwall MaxWall
set maxwall_num 2
set maxcpuspu MaxCPUSPerUser
set maxcpuspu_num 2
set maxnodes MaxNodes
set maxnode_num 0
set maxnodespu MaxNodesPerUser
set maxnodespu_num 0
set maxjobs MaxJobs
set maxjobs_num 2
set maxjobsub MaxSubmitJobs
set maxjobsub_num 2
set save_billing_weights ""
set time_spacing 1
set tres_cpu_mult 2
# cr_core = 1 / cr_cpu = 0
set selectparam 0
set one_task_pc 0
# mod qos
array set mod_qos_vals {
GrpNodes -1
GrpCpus -1
GrpJob -1
GrpSubmit -1
GrpCpuMin -1
GrpCpuRunMin -1
GrpMem -1
GrpWall -1
MaxCpus -1
MaxNode -1
MaxJobs -1
MaxSubmitJobs -1
MaxCpuMin -1
MaxWall -1
MaxCpusPerUser -1
MaxNode -1
GrpTRES=billing -1
GrpTRESMins=billing -1
GrpTRESRunMins=billing -1
MaxTRESPerJob=billing -1
MaxTRESMinsPerJob=billing -1
MaxTRESPerUser=billing -1
}
# Check to see that there are enough resources in the default partition
set tmpc 0
set tmpn 0
set partition [default_partition]
spawn $scontrol show part $partition
expect {
-re "TotalCPUs=($number)" {
set tmpc [expr $expect_out(1,string) - 1]
exp_continue
}
-re "TotalNodes=($number)" {
set tmpn [expr $expect_out(1,string) - 1]
exp_continue
}
-re "TRESBillingWeights=(\\S+)" {
set save_billing_weights $expect_out(1,string)
exp_continue
}
timeout {
fail "scontrol is not responding"
}
eof {
wait
}
}
if {$tmpc == 0 || $tmpn == 0} {
skip "Not enough Nodes and/or CPUs"
}
if {[get_config_param "PriorityType"] eq "priority/multifactor"} {
set prio_multifactor 1
} else {
set prio_multifactor 0
}
set got_node 0
spawn $srun -N1 printenv SLURM_NODELIST
expect {
-re "($re_word_str)" {
set test_node $expect_out(1,string)
set got_node 1
exp_continue
}
timeout {
fail "srun is not responding"
}
eof {
wait
}
}
if {$got_node != 1} {
fail "Did not get node for testing"
}
# Get the number of cpus on a node
lassign [get_node_cpus $test_node] totcpus nthreads
if {$totcpus == 0} {
fail "No cpus were found"
} else {
# Set QoS CPU values
set grcpu_num [expr $totcpus - $nthreads]
set grpcpumin_num $totcpus
set maxcpu_num [expr $totcpus - $nthreads]
set maxcpumin_num $totcpus
}
# Get the number of nodes in the default partition, minus one.
# $maxnode_num is used as MaxNodes in QOS and we will test requests of
# $maxnode_num + 1. If not EnforcePartLimits could interfere.
set num_nodes [expr [llength [get_nodes_by_state]] -1]
if {$num_nodes == 0} {
fail "No cpus were found"
} else {
# Set QoS node values
set grn_num $num_nodes
set maxnode_num $num_nodes
set maxnodespu_num $num_nodes
}
proc cleanup {} {
global sacctmgr qostest acct test_id
global scontrol save_billing_weights partition
# delete qos
run_command -none "$sacctmgr -i delete qos $qostest"
# delete account
run_command -none "$sacctmgr -i delete account $acct"
if {$save_billing_weights ne ""} {
run_command "$scontrol update partitionname=$partition TRESBillingWeights=$save_billing_weights"
}
}
# Make sure we have a clean system
cleanup
# Gets user
set user_name [get_my_user_name]
# add qos
set qosmatch 0
spawn $sacctmgr -i add qos $qosname=$qostest
expect {
-re "Adding QOS" {
incr qosmatch
exp_continue
}
timeout {
fail "sacctmgr did not add QOS"
}
eof {
wait
}
}
# Add account with qos
set acctmatch 0
spawn $sacctmgr -i add account $acct qos=$qostest
expect {
-re "Adding Account" {
incr acctmatch
exp_continue
}
timeout {
fail "sacctmgr is not responding"
}
eof {
wait
}
}
if {$acctmatch != 1} {
fail "sacctmgr had a problem adding the account"
}
# Add user to account
run_command -fail "$sacctmgr -i create user name=$user_name account=$acct"
#
# Test GrpNode limit
#
set mod_qos_vals(GrpNodes) $grn_num
mod_qos $qostest [array get mod_qos_vals]
sleep $time_spacing
inc21_30_1 "QOSGrpNodeLimit"
# Reset the value to 0
set mod_qos_vals(GrpNodes) "-1"
#
# Test GrpCpus
#
set mod_qos_vals(GrpCpus) $grcpu_num
mod_qos $qostest [array get mod_qos_vals]
sleep $time_spacing
if { $one_task_pc } {
set grcpu_num [expr $grcpu_num / $nthreads]
}
inc21_30_2 "QOSGrpCpuLimit"
set mod_qos_vals(GrpCpus) "-1"
#
# test GrpJob limits
#
set mod_qos_vals(GrpJobs) $grjobs_num
mod_qos $qostest [array get mod_qos_vals]
sleep $time_spacing
inc21_30_3 "QOSGrpJobsLimit"
set mod_qos_vals(GrpJobs) "-1"
#
# test GrpSubmit
#
set mod_qos_vals(GrpSubmit) $grsub_num
mod_qos $qostest [array get mod_qos_vals]
sleep $time_spacing
inc21_30_4
set mod_qos_vals(GrpSubmit) "-1"
#
# Test MaxCpus limits
#
set mod_qos_vals(MaxCpus) $maxcpu_num
mod_qos $qostest [array get mod_qos_vals]
sleep $time_spacing
if { $one_task_pc } {
set maxcpu_num [expr $maxcpu_num / $nthreads]
}
inc21_30_5 "QOSMaxCpuPerJobLimit"
set mod_qos_vals(MaxCpus) "-1"
#
# Test MaxNode limit
#
set mod_qos_vals(MaxNodes) $maxnode_num
mod_qos $qostest [array get mod_qos_vals]
sleep $time_spacing
inc21_30_6 [list "QOSMaxNodePerJobLimit" "PartitionConfig"]
set mod_qos_vals(MaxNodes) "-1"
#
# Test MaxJobs limit
#
set mod_qos_vals(MaxJobs) $maxjobs_num
mod_qos $qostest [array get mod_qos_vals]
sleep $time_spacing
inc21_30_7 "QOSMaxJobsPerUserLimit"
set mod_qos_vals(MaxJobs) "-1"
#
# Test MaxJobsSubmits limit
#
set mod_qos_vals(MaxSubmitJobs) $maxjobsub_num
mod_qos $qostest [array get mod_qos_vals]
sleep $time_spacing
inc21_30_8
set mod_qos_vals(MaxSubmitJobs) "-1"
#
# Test GroupCPUMins
#
set mod_qos_vals(GrpCpuMin) $grpcpumin_num
mod_qos $qostest [array get mod_qos_vals]
sleep $time_spacing
if { $one_task_pc } {
set grpcpumin_num [expr $grpcpumin_num / $nthreads]
}
inc21_30_9 "QOSGrpCPUMinutesLimit"
set mod_qos_vals(GrpCpuMin) "-1"
#
# Test GroupCPURunMins
# Requires priority/multifactor to properly handle decay well
#
if { $prio_multifactor != 0 } {
set mod_qos_vals(GrpCpuRunMin) $grpcpurunmin_num
mod_qos $qostest [array get mod_qos_vals]
sleep $time_spacing
if { $one_task_pc } {
set grpcpurunmin_num [expr $grpcpurunmin_num / $nthreads]
}
inc21_30_10 "QOSGrpCPURunMinutesLimit"
set mod_qos_vals(GrpCpuRunMin) "-1"
}
#
# Test Group Memory
#
set mod_qos_vals(GrpMem) $grpmem_num
mod_qos $qostest [array get mod_qos_vals]
sleep $time_spacing
inc21_30_11 "QOSGrpMemLimit"
set mod_qos_vals(GrpMem) "-1"
#
# Test Group wall
# Requires priority/multifactor to properly handle decay well
#
if { $prio_multifactor != 0 } {
set mod_qos_vals(GrpWall) $grpwall_num
mod_qos $qostest [array get mod_qos_vals]
sleep $time_spacing
inc21_30_12
set mod_qos_vals(GrpWall) "-1"
}
#
# Test Max Cpu Mins
#
set mod_qos_vals(MaxCpuMin) $maxcpumin_num
mod_qos $qostest [array get mod_qos_vals]
sleep $time_spacing
if { $one_task_pc } {
set maxcpumin_num [expr $maxcpumin_num / $nthreads]
}
inc21_30_13 "QOSMaxCpuMinutesPerJobLimit"
set mod_qos_vals(MaxCpuMin) "-1"
#
# Test Max Wall
#
set mod_qos_vals(MaxWall) $maxwall_num
mod_qos $qostest [array get mod_qos_vals]
sleep $time_spacing
inc21_30_14 "QOSMaxWallDurationPerJobLimit"
set mod_qos_vals(MaxWall) "-1"
#
# Test Max CPUs Per User
#
# If CR_CORE set maxcpuspu a multiple number of threads
if {$selectparam} {
set maxcpuspu_num [expr $maxcpuspu_num * $nthreads]
}
set mod_qos_vals(MaxCpusPerUser) $maxcpuspu_num
mod_qos $qostest [array get mod_qos_vals]
sleep $time_spacing
if { $one_task_pc } {
set maxcpuspu_num [expr $maxcpuspu_num / $nthreads]
}
inc21_30_15 "QOSMaxCpuPerUserLimit"
set mod_qos_vals(MaxCpusPerUser) "-1"
#
# Test MaxNodesPerUser
#
set mod_qos_vals(MaxNodesPerUser) $maxnodespu_num
mod_qos $qostest [array get mod_qos_vals]
sleep $time_spacing
inc21_30_16 [list "QOSMaxNodePerUserLimit" "PartitionConfig"]
set mod_qos_vals(MaxNodesPerUser) "-1"
#
# Test MaxWall is used as job's timelimit if job was requested
# without --time option
#
set mod_qos_vals(MaxWall) $maxwall_num
mod_qos $qostest [array get mod_qos_vals]
sleep $time_spacing
inc21_30_17
set mod_qos_vals(MaxWall) "-1"
# TRESBillingWeights
spawn $scontrol update partitionname=$partition tresbillingweights=cpu=$tres_cpu_mult
expect {
-re "error" {
fail "Failed to set TRESBillingWeights"
}
timeout {
fail "scontrol is not responding"
}
eof {
wait
}
}
#
# Test GrpTRES=billing
#
set mod_qos_vals(GrpTRES=billing) [expr $grcpu_num * $tres_cpu_mult]
mod_qos $qostest [array get mod_qos_vals]
sleep $time_spacing
if { $one_task_pc } {
set grcpu_num [expr $grcpu_num / $nthreads]
}
inc21_30_2 "QOSGrpBilling"
set mod_qos_vals(GrpTRES=billing) "-1"
#
# Test GrpTRESMins=billing
#
set mod_qos_vals(GrpTRESMins=billing) [expr $grpcpumin_num * $tres_cpu_mult]
mod_qos $qostest [array get mod_qos_vals]
sleep $time_spacing
if { $one_task_pc } {
set grpcpumin_num [expr $grpcpumin_num / $nthreads]
}
inc21_30_9 "QOSGrpBillingMinutes"
set mod_qos_vals(GrpTRESMins=billing) "-1"
#
# Test GroupTRESRunMins=billing
# Requires priority/multifactor to properly handle decay well
#
if { $prio_multifactor != 0 } {
set mod_qos_vals(GrpTRESRunMins=billing) [expr $grpcpurunmin_num * $tres_cpu_mult]
mod_qos $qostest [array get mod_qos_vals]
sleep $time_spacing
if { $one_task_pc } {
set grpcpurunmin_num [expr $grpcpurunmin_num / $nthreads]
}
inc21_30_10 "QOSGrpBillingRunMinutes"
set mod_qos_vals(GrpTRESRunMins=billing) "-1"
}
#
# Test GrpTRES=billing limits
#
set mod_qos_vals(MaxTRESPerJob=billing) [expr $maxcpu_num * $tres_cpu_mult]
mod_qos $qostest [array get mod_qos_vals]
sleep $time_spacing
if { $one_task_pc } {
set maxcpu_num [expr $maxcpu_num / $nthreads]
}
inc21_30_5 "QOSMaxBillingPerJob"
set mod_qos_vals(MaxTRESPerJob=billing) "-1"
#
# Test MaxTRESMinsPerJob=billing
#
set mod_qos_vals(MaxTRESMinsPerJob=billing) [expr $maxcpumin_num * $tres_cpu_mult]
mod_qos $qostest [array get mod_qos_vals]
sleep $time_spacing
if { $one_task_pc } {
set maxcpumin_num [expr $maxcpumin_num / $nthreads]
}
inc21_30_13 "QOSMaxBillingMinutesPerJob"
set mod_qos_vals(MaxTRESMinsPerJob=billing) "-1"
#
# Test Max CPUs Per User
#
# If CR_CORE set maxcpuspu a multiple number of threads
if {$selectparam} {
set maxcpuspu_num [expr $maxcpuspu_num * $nthreads]
}
set mod_qos_vals(MaxTRESPerUser=billing) [expr $maxcpuspu_num * $tres_cpu_mult]
mod_qos $qostest [array get mod_qos_vals]
sleep $time_spacing
if { $one_task_pc } {
set maxcpuspu_num [expr $maxcpuspu_num / $nthreads]
}
inc21_30_15 "QOSMaxBillingPerUser"
set mod_qos_vals(MaxTRESPerUser=billing) "-1"