blob: 4d8a3e6cec90b8e0d94aa5798aacf6ccab1b3859 [file] [log] [blame]
#!/usr/bin/env expect
############################################################################
# Purpose: Test of Slurm functionality
# Validate that DenyOnLimit is enforced on QoS and
# Association limits.
############################################################################
# Copyright (C) SchedMD LLC.
#
# This file is part of Slurm, a resource management program.
# For details, see <https://slurm.schedmd.com/>.
# Please also read the included file: DISCLAIMER.
#
# Slurm is free software; you can redistribute it and/or modify it under
# the terms of the GNU General Public License as published by the Free
# Software Foundation; either version 2 of the License, or (at your option)
# any later version.
#
# Slurm is distributed in the hope that it will be useful, but WITHOUT ANY
# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
# details.
#
# You should have received a copy of the GNU General Public License along
# with Slurm; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
############################################################################
source ./globals
source ./globals_accounting
set test_acct "${test_name}\_acct"
set test_qos "${test_name}\_qos"
set file_in "$test_dir/input"
set max_wall_val 2
set job_id 0
set save_billing_weights ""
set tres_cpu_mult 2
# Mod qos
array set max_tres_limit {
billing 2
cpu 2
node 1
}
array set limit_map {
MaxTres MaxTRESPJ
MaxTresPerUser MaxTRESPU
MaxTresMin MaxTRESMinsPJ
}
# Reset QoS
array set reset_qos_val {
MaxCpus -1
MaxNode -1
MaxJobs -1
MaxSubmitJobs -1
MaxCpuMin -1
MaxWall -1
MaxCpusPerUser -1
MaxNode -1
MaxNodesPerUser -1
MaxTRESMinsPerJob=billing -1
MaxTRESPerJob=billing -1
MaxTRESPerUser=billing -1
}
# Reset Associations
array set reset_assoc_val {
MaxCpus -1
MaxNode -1
MaxJobs -1
MaxSubmitJobs -1
MaxCpuMin -1
MaxWall -1
MaxNode -1
MaxTRESMinsPerJob=billing -1
MaxTRESPerJob=billing -1
}
if {[get_config_param "AccountingStorageType"] ne "accounting_storage/slurmdbd"} {
skip "This test can't be run without a usable AccountStorageType"
}
if {![param_contains [get_config_param "AccountingStorageEnforce"] "limits"]} {
skip "This test can't be run without enforcing limits"
}
if {![is_super_user]} {
skip "Test can only be ran as SlurmUser"
}
if {[check_config_select "linear"]} {
# Adjust the resources to reflect the first node
spawn $scontrol show nodes
expect {
-re {CPUTot=(\d+)} {
set max_tres_limit(cpu) $expect_out(1,string)
set max_tres_limit(billing) $expect_out(1,string)
}
timeout {
fail "'scontrol show nodes' not responding"
}
}
}
proc cleanup { } {
global sacctmgr test_acct test_qos
global partition save_billing_weights scontrol job_id
# Cancel the submitted job
cancel_job $job_id
# Delete test account and qos
spawn $sacctmgr -i delete account $test_acct
expect {
timeout {
fail "sacctmgr is not responding"
}
eof {
wait
}
}
spawn $sacctmgr -i delete qos $test_qos
expect {
timeout {
fail "sacctmgr is not responding"
}
eof {
wait
}
}
if {$save_billing_weights ne ""} {
spawn $scontrol update partitionname=$partition TRESBillingWeights=$save_billing_weights
expect {
-re "error" {
log_error "Failed to reset TRESBillingWeights"
}
timeout {
fail "scontrol is not responding"
}
eof {
wait
}
}
}
}
proc set_limit { entity name limit tres_type tres_value } {
global sacctmgr scontrol srun test_acct limit_map
# Set the limit
set output [run_command_output -fail "$sacctmgr -i modify $entity $name set $limit=$tres_type=$tres_value"]
if {![regexp "Modified" $output]} {
fail "sacctmgr did not modify tres=$tres_type=$tres_value"
}
# Verify that the limit has been transmitted to the controller.
# If checking per-user limits, we wrap the scontrol call in an srun since
# the per-user limits only show up if a job has run under by that user.
set command "[expr {[string match "*PerUser" $limit] ? "$srun --account=$test_acct " : ""}]"
append command "$scontrol show assoc -o"
append command " flags=[expr {$entity eq "account" ? "assoc" : $entity}]"
append command " $entity=$name"
set pattern "$limit_map($limit)=\[^ \]*$tres_type=$tres_value"
wait_for_command_match -fail -timeout 15 $command $pattern
}
proc test_limit { type name limit tres_type } {
global sbatch max_tres_limit
global job_id test_qos test_acct reset_qos_val reset_assoc_val
global file_in number tres_cpu_mult
set flag ""
set tres_value $max_tres_limit($tres_type)
if {$tres_type eq "billing"} {
set tres_value [expr $tres_value * $tres_cpu_mult]
}
set_limit $type $name $limit $tres_type $tres_value
set tmp [expr $max_tres_limit($tres_type) + 1]
if {$tres_type eq "node"} {
set flag "-N$tmp"
} else {
set flag "-n$tmp"
}
set output [run_command_output -xfail -subtest "$sbatch $flag -t1 -o/dev/null --account=$test_acct $file_in"]
subtest {[regexp "Job violates accounting/QOS policy" $output]} "Job should display the expected error message"
if {![regexp "Submitted batch job ($number)" $output - job_id]} {
cancel_job $job_id
}
# Reset limits
mod_qos $test_qos [array get reset_qos_val]
mod_acct $test_acct "" "" [array get reset_assoc_val]
}
# Clean up any vestigial data
cleanup
set partition [default_partition]
make_bash_script $file_in "
sleep 2"
# Add qos
set added 1
spawn $sacctmgr -i create qos $test_qos
expect {
-re "Adding QOS" {
set added 1
exp_continue
}
timeout {
fail "sacctmgr is not responding"
}
eof {
wait
}
}
if {$added != 1} {
fail "Test QoS ($test_qos) was not created"
}
set match 0
spawn $sacctmgr -i create account $test_acct qos=$test_qos
expect {
-re "Associations" {
set match 1
exp_continue
}
timeout {
fail "sacctmgr is not responding"
}
eof {
wait
}
}
if {$match != 1} {
fail "Test account ($test_acct) was not created"
}
spawn $sacctmgr -i create user name=[get_my_user_name] account=$test_acct
expect {
timeout {
fail "sacctmgr is not responding"
}
eof {
wait
}
}
# Set DenyOnLimit qos flag
set modified 0
spawn $sacctmgr -i mod qos $test_qos set flag=DenyOnLimit
expect {
-re "Modified qos" {
set modified 1
exp_continue
}
timeout {
fail "sacctmgr is not responding"
}
eof {
wait
}
}
spawn $scontrol show part $partition
expect {
-re "TRESBillingWeights=(\\S+)" {
set save_billing_weights $expect_out(1,string)
exp_continue
}
timeout {
fail "scontrol is not responding"
}
eof {
wait
}
}
spawn $scontrol update partitionname=$partition tresbillingweights=cpu=$tres_cpu_mult
expect {
-re "error" {
fail "Failed to set TRESBillingWeights"
}
timeout {
fail "scontrol is not responding"
}
eof {
wait
}
}
foreach limit [array names limit_map] {
foreach tres_type [array names max_tres_limit] {
# Check DenyOnLimit for QoS
log_info "==== Testing QoS ===="
test_limit "qos" $test_qos $limit $tres_type
if {$limit ne "MaxTresPerUser"} {
# Check DenyOnLimit for Association
log_info "==== Testing Association ===="
test_limit "account" $test_acct $limit $tres_type
}
}
}
#
# Test Max Wall
#
# Set the limit
set output [run_command_output -fail "$sacctmgr -i modify qos $test_qos set MaxWall=$max_wall_val"]
if {![regexp "Modified" $output]} {
fail "sacctmgr did not modify MaxWall=$max_wall_val"
}
# Verify that the limit has been transmitted to the controller.
set command "$scontrol show assoc -o flags=qos qos=$test_qos"
set pattern "MaxWallPJ=$max_wall_val"
wait_for_command_match -fail -timeout 10 $command $pattern
set match 0
spawn $sbatch -N1 -t[expr $max_wall_val + 1] -o/dev/null --account=$test_acct $file_in
expect {
-re "Job violates accounting/QOS policy" {
set match 1
exp_continue
}
-re "Submitted batch job ($number)" {
set job_id $expect_out(1,string)
}
timeout {
fail "srun is not responding"
}
eof {
wait
}
}
subtest {$match == 1 && ! $job_id} "Job should not be accepted for submission"