| #!/usr/bin/env expect |
| ############################################################################ |
| # Purpose: Test of Slurm functionality |
| # Validate that DenyOnLimit is enforced on QoS and |
| # Association limits. |
| ############################################################################ |
| # Copyright (C) SchedMD LLC. |
| # |
| # This file is part of Slurm, a resource management program. |
| # For details, see <https://slurm.schedmd.com/>. |
| # Please also read the included file: DISCLAIMER. |
| # |
| # Slurm is free software; you can redistribute it and/or modify it under |
| # the terms of the GNU General Public License as published by the Free |
| # Software Foundation; either version 2 of the License, or (at your option) |
| # any later version. |
| # |
| # Slurm is distributed in the hope that it will be useful, but WITHOUT ANY |
| # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS |
| # FOR A PARTICULAR PURPOSE. See the GNU General Public License for more |
| # details. |
| # |
| # You should have received a copy of the GNU General Public License along |
| # with Slurm; if not, write to the Free Software Foundation, Inc., |
| # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. |
| ############################################################################ |
| source ./globals |
| source ./globals_accounting |
| |
| set test_acct "${test_name}\_acct" |
| set test_qos "${test_name}\_qos" |
| set file_in "$test_dir/input" |
| set max_wall_val 2 |
| set job_id 0 |
| |
| set save_billing_weights "" |
| set tres_cpu_mult 2 |
| |
| # Mod qos |
| array set max_tres_limit { |
| billing 2 |
| cpu 2 |
| node 1 |
| } |
| |
| array set limit_map { |
| MaxTres MaxTRESPJ |
| MaxTresPerUser MaxTRESPU |
| MaxTresMin MaxTRESMinsPJ |
| } |
| |
| # Reset QoS |
| array set reset_qos_val { |
| MaxCpus -1 |
| MaxNode -1 |
| MaxJobs -1 |
| MaxSubmitJobs -1 |
| MaxCpuMin -1 |
| MaxWall -1 |
| MaxCpusPerUser -1 |
| MaxNode -1 |
| MaxNodesPerUser -1 |
| MaxTRESMinsPerJob=billing -1 |
| MaxTRESPerJob=billing -1 |
| MaxTRESPerUser=billing -1 |
| } |
| |
| # Reset Associations |
| array set reset_assoc_val { |
| MaxCpus -1 |
| MaxNode -1 |
| MaxJobs -1 |
| MaxSubmitJobs -1 |
| MaxCpuMin -1 |
| MaxWall -1 |
| MaxNode -1 |
| MaxTRESMinsPerJob=billing -1 |
| MaxTRESPerJob=billing -1 |
| } |
| |
| |
| if {[get_config_param "AccountingStorageType"] ne "accounting_storage/slurmdbd"} { |
| skip "This test can't be run without a usable AccountStorageType" |
| } |
| if {![param_contains [get_config_param "AccountingStorageEnforce"] "limits"]} { |
| skip "This test can't be run without enforcing limits" |
| } |
| if {![is_super_user]} { |
| skip "Test can only be ran as SlurmUser" |
| } |
| |
| if {[check_config_select "linear"]} { |
| # Adjust the resources to reflect the first node |
| spawn $scontrol show nodes |
| expect { |
| -re {CPUTot=(\d+)} { |
| set max_tres_limit(cpu) $expect_out(1,string) |
| set max_tres_limit(billing) $expect_out(1,string) |
| } |
| timeout { |
| fail "'scontrol show nodes' not responding" |
| } |
| } |
| } |
| |
| proc cleanup { } { |
| global sacctmgr test_acct test_qos |
| global partition save_billing_weights scontrol job_id |
| |
| # Cancel the submitted job |
| cancel_job $job_id |
| |
| # Delete test account and qos |
| spawn $sacctmgr -i delete account $test_acct |
| expect { |
| timeout { |
| fail "sacctmgr is not responding" |
| } |
| eof { |
| wait |
| } |
| } |
| |
| spawn $sacctmgr -i delete qos $test_qos |
| expect { |
| timeout { |
| fail "sacctmgr is not responding" |
| } |
| eof { |
| wait |
| } |
| } |
| |
| if {$save_billing_weights ne ""} { |
| spawn $scontrol update partitionname=$partition TRESBillingWeights=$save_billing_weights |
| expect { |
| -re "error" { |
| log_error "Failed to reset TRESBillingWeights" |
| } |
| timeout { |
| fail "scontrol is not responding" |
| } |
| eof { |
| wait |
| } |
| } |
| } |
| } |
| |
| |
| proc set_limit { entity name limit tres_type tres_value } { |
| global sacctmgr scontrol srun test_acct limit_map |
| |
| # Set the limit |
| set output [run_command_output -fail "$sacctmgr -i modify $entity $name set $limit=$tres_type=$tres_value"] |
| if {![regexp "Modified" $output]} { |
| fail "sacctmgr did not modify tres=$tres_type=$tres_value" |
| } |
| |
| # Verify that the limit has been transmitted to the controller. |
| # If checking per-user limits, we wrap the scontrol call in an srun since |
| # the per-user limits only show up if a job has run under by that user. |
| set command "[expr {[string match "*PerUser" $limit] ? "$srun --account=$test_acct " : ""}]" |
| append command "$scontrol show assoc -o" |
| append command " flags=[expr {$entity eq "account" ? "assoc" : $entity}]" |
| append command " $entity=$name" |
| set pattern "$limit_map($limit)=\[^ \]*$tres_type=$tres_value" |
| wait_for_command_match -fail -timeout 15 $command $pattern |
| } |
| |
| |
| proc test_limit { type name limit tres_type } { |
| global sbatch max_tres_limit |
| global job_id test_qos test_acct reset_qos_val reset_assoc_val |
| global file_in number tres_cpu_mult |
| |
| set flag "" |
| set tres_value $max_tres_limit($tres_type) |
| if {$tres_type eq "billing"} { |
| set tres_value [expr $tres_value * $tres_cpu_mult] |
| } |
| |
| set_limit $type $name $limit $tres_type $tres_value |
| |
| set tmp [expr $max_tres_limit($tres_type) + 1] |
| if {$tres_type eq "node"} { |
| set flag "-N$tmp" |
| } else { |
| set flag "-n$tmp" |
| } |
| |
| set output [run_command_output -xfail -subtest "$sbatch $flag -t1 -o/dev/null --account=$test_acct $file_in"] |
| subtest {[regexp "Job violates accounting/QOS policy" $output]} "Job should display the expected error message" |
| if {![regexp "Submitted batch job ($number)" $output - job_id]} { |
| cancel_job $job_id |
| } |
| |
| # Reset limits |
| mod_qos $test_qos [array get reset_qos_val] |
| mod_acct $test_acct "" "" [array get reset_assoc_val] |
| } |
| |
| |
| # Clean up any vestigial data |
| cleanup |
| |
| set partition [default_partition] |
| |
| make_bash_script $file_in " |
| sleep 2" |
| |
| # Add qos |
| set added 1 |
| spawn $sacctmgr -i create qos $test_qos |
| expect { |
| -re "Adding QOS" { |
| set added 1 |
| exp_continue |
| } |
| timeout { |
| fail "sacctmgr is not responding" |
| } |
| eof { |
| wait |
| } |
| } |
| |
| if {$added != 1} { |
| fail "Test QoS ($test_qos) was not created" |
| } |
| |
| set match 0 |
| spawn $sacctmgr -i create account $test_acct qos=$test_qos |
| expect { |
| -re "Associations" { |
| set match 1 |
| exp_continue |
| } |
| timeout { |
| fail "sacctmgr is not responding" |
| } |
| eof { |
| wait |
| } |
| } |
| |
| if {$match != 1} { |
| fail "Test account ($test_acct) was not created" |
| } |
| |
| spawn $sacctmgr -i create user name=[get_my_user_name] account=$test_acct |
| expect { |
| timeout { |
| fail "sacctmgr is not responding" |
| } |
| eof { |
| wait |
| } |
| } |
| |
| # Set DenyOnLimit qos flag |
| set modified 0 |
| spawn $sacctmgr -i mod qos $test_qos set flag=DenyOnLimit |
| expect { |
| -re "Modified qos" { |
| set modified 1 |
| exp_continue |
| } |
| timeout { |
| fail "sacctmgr is not responding" |
| } |
| eof { |
| wait |
| } |
| } |
| |
| spawn $scontrol show part $partition |
| expect { |
| -re "TRESBillingWeights=(\\S+)" { |
| set save_billing_weights $expect_out(1,string) |
| exp_continue |
| } |
| timeout { |
| fail "scontrol is not responding" |
| } |
| eof { |
| wait |
| } |
| } |
| |
| spawn $scontrol update partitionname=$partition tresbillingweights=cpu=$tres_cpu_mult |
| expect { |
| -re "error" { |
| fail "Failed to set TRESBillingWeights" |
| } |
| timeout { |
| fail "scontrol is not responding" |
| } |
| eof { |
| wait |
| } |
| } |
| |
| foreach limit [array names limit_map] { |
| foreach tres_type [array names max_tres_limit] { |
| # Check DenyOnLimit for QoS |
| log_info "==== Testing QoS ====" |
| test_limit "qos" $test_qos $limit $tres_type |
| |
| if {$limit ne "MaxTresPerUser"} { |
| # Check DenyOnLimit for Association |
| log_info "==== Testing Association ====" |
| test_limit "account" $test_acct $limit $tres_type |
| } |
| } |
| } |
| |
| # |
| # Test Max Wall |
| # |
| |
| # Set the limit |
| set output [run_command_output -fail "$sacctmgr -i modify qos $test_qos set MaxWall=$max_wall_val"] |
| if {![regexp "Modified" $output]} { |
| fail "sacctmgr did not modify MaxWall=$max_wall_val" |
| } |
| |
| # Verify that the limit has been transmitted to the controller. |
| set command "$scontrol show assoc -o flags=qos qos=$test_qos" |
| set pattern "MaxWallPJ=$max_wall_val" |
| wait_for_command_match -fail -timeout 10 $command $pattern |
| |
| set match 0 |
| spawn $sbatch -N1 -t[expr $max_wall_val + 1] -o/dev/null --account=$test_acct $file_in |
| expect { |
| -re "Job violates accounting/QOS policy" { |
| set match 1 |
| exp_continue |
| } |
| -re "Submitted batch job ($number)" { |
| set job_id $expect_out(1,string) |
| } |
| timeout { |
| fail "srun is not responding" |
| } |
| eof { |
| wait |
| } |
| } |
| |
| subtest {$match == 1 && ! $job_id} "Job should not be accepted for submission" |