| #!/usr/bin/env expect |
| ############################################################################ |
| # Purpose: Test of Slurm functionality |
| # Test that checks if the QOS limits are enforced. |
| ############################################################################ |
| # Copyright (C) SchedMD LLC. |
| # |
| # This file is part of Slurm, a resource management program. |
| # For details, see <https://slurm.schedmd.com/>. |
| # Please also read the included file: DISCLAIMER. |
| # |
| # Slurm is free software; you can redistribute it and/or modify it under |
| # the terms of the GNU General Public License as published by the Free |
| # Software Foundation; either version 2 of the License, or (at your option) |
| # any later version. |
| # |
| # Slurm is distributed in the hope that it will be useful, but WITHOUT ANY |
| # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS |
| # FOR A PARTICULAR PURPOSE. See the GNU General Public License for more |
| # details. |
| # |
| # You should have received a copy of the GNU General Public License along |
| # with Slurm; if not, write to the Free Software Foundation, Inc., |
| # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. |
| ############################################################################ |
| source ./globals |
| source ./globals_accounting |
| source ./inc21.30.1 |
| source ./inc21.30.2 |
| source ./inc21.30.3 |
| source ./inc21.30.4 |
| source ./inc21.30.5 |
| source ./inc21.30.6 |
| source ./inc21.30.7 |
| source ./inc21.30.8 |
| source ./inc21.30.9 |
| source ./inc21.30.10 |
| source ./inc21.30.11 |
| source ./inc21.30.12 |
| source ./inc21.30.13 |
| source ./inc21.30.14 |
| source ./inc21.30.15 |
| source ./inc21.30.16 |
| source ./inc21.30.17 |
| |
| if {[param_contains [get_config_param "SchedulerParameters"] "defer"]} { |
| skip "This test is not compatible with SchedulerParameters containing defer" |
| } |
| |
| # |
| # Check accounting configuration and terminate if limits not enforced. |
| # |
| if {[get_config_param "AccountingStorageType"] ne "accounting_storage/slurmdbd"} { |
| skip "This test can't be run without a usable AccountStorageType" |
| } elseif {![param_contains [get_config_param "AccountingStorageEnforce"] "limits"]} { |
| skip "This test can't be run without enforcing limits" |
| } |
| if {![is_super_user]} { |
| skip "Test can only be ran as SlurmUser" |
| } |
| |
| # Determine what the selecttype param is |
| set select_type_parameters [get_config_param "SelectTypeParameters"] |
| if {[param_contains $select_type_parameters "CR_SOCKET*"]} { |
| skip "This test is incompatible with CR_SOCKET allocations" |
| } |
| if {[param_contains $select_type_parameters "CR_CORE*"]} { |
| set selectparam 1 |
| } |
| if {[param_contains $select_type_parameters "CR_ONE_TASK_PER_CORE"]} { |
| set one_task_pc 1 |
| } |
| |
| # |
| # Some tests will not work properly when allocating sockets or whole nodes to |
| # jobs |
| # |
| if {[check_config_select "linear"] || [default_part_exclusive]} { |
| skip "This test is incompatible with exclusive node allocations" |
| } |
| |
| set test_node "" |
| # Total cpus in test node |
| set totcpus 0 |
| set nthreads 0 |
| set acct "$test_id\_test_acct" |
| set user_name "" |
| set qosname name |
| set qostest "$test_id\_qostest" |
| set grn GrpNodes |
| set grn_num 0 |
| set grcpu GrpCpus |
| set grcpu_num 0 |
| set grpcpumin GrpCPUMins |
| set grpcpumin_num 0 |
| # Set grpcpurunmin_num to multiple of CPUs per core to work with most configurations |
| # Also make sure that it is at least 4 so we can add and subtract from it |
| set grpcpurunmin GrpCPURunMins |
| set grpcpurunmin_num 40 |
| set grjobs GrpJobs |
| set grjobs_num 2 |
| set grpmem GrpMem |
| set grpmem_num 100 |
| set grsub GrpSubmit |
| set grsub_num 2 |
| set grpwall GrpWall |
| set grpwall_num 1 |
| set maxcpu MaxCpus |
| set maxcpu_num 0 |
| # Set maxcpumin_num to multiple of CPUs per core to work with most configurations |
| set maxcpumin MaxCPUMins |
| set maxcpumin_num 0 |
| set maxwall MaxWall |
| set maxwall_num 2 |
| set maxcpuspu MaxCPUSPerUser |
| set maxcpuspu_num 2 |
| set maxnodes MaxNodes |
| set maxnode_num 0 |
| set maxnodespu MaxNodesPerUser |
| set maxnodespu_num 0 |
| set maxjobs MaxJobs |
| set maxjobs_num 2 |
| set maxjobsub MaxSubmitJobs |
| set maxjobsub_num 2 |
| set save_billing_weights "" |
| set time_spacing 1 |
| set tres_cpu_mult 2 |
| |
| # cr_core = 1 / cr_cpu = 0 |
| set selectparam 0 |
| set one_task_pc 0 |
| |
| # mod qos |
| array set mod_qos_vals { |
| GrpNodes -1 |
| GrpCpus -1 |
| GrpJob -1 |
| GrpSubmit -1 |
| GrpCpuMin -1 |
| GrpCpuRunMin -1 |
| GrpMem -1 |
| GrpWall -1 |
| MaxCpus -1 |
| MaxNode -1 |
| MaxJobs -1 |
| MaxSubmitJobs -1 |
| MaxCpuMin -1 |
| MaxWall -1 |
| MaxCpusPerUser -1 |
| MaxNode -1 |
| |
| GrpTRES=billing -1 |
| GrpTRESMins=billing -1 |
| GrpTRESRunMins=billing -1 |
| MaxTRESPerJob=billing -1 |
| MaxTRESMinsPerJob=billing -1 |
| MaxTRESPerUser=billing -1 |
| } |
| |
| # Check to see that there are enough resources in the default partition |
| set tmpc 0 |
| set tmpn 0 |
| set partition [default_partition] |
| spawn $scontrol show part $partition |
| expect { |
| -re "TotalCPUs=($number)" { |
| set tmpc [expr $expect_out(1,string) - 1] |
| exp_continue |
| } |
| -re "TotalNodes=($number)" { |
| set tmpn [expr $expect_out(1,string) - 1] |
| exp_continue |
| } |
| -re "TRESBillingWeights=(\\S+)" { |
| set save_billing_weights $expect_out(1,string) |
| exp_continue |
| } |
| timeout { |
| fail "scontrol is not responding" |
| } |
| eof { |
| wait |
| } |
| } |
| if {$tmpc == 0 || $tmpn == 0} { |
| skip "Not enough Nodes and/or CPUs" |
| } |
| |
| if {[get_config_param "PriorityType"] eq "priority/multifactor"} { |
| set prio_multifactor 1 |
| } else { |
| set prio_multifactor 0 |
| } |
| |
| set got_node 0 |
| spawn $srun -N1 printenv SLURM_NODELIST |
| expect { |
| -re "($re_word_str)" { |
| set test_node $expect_out(1,string) |
| set got_node 1 |
| exp_continue |
| } |
| timeout { |
| fail "srun is not responding" |
| } |
| eof { |
| wait |
| } |
| } |
| |
| if {$got_node != 1} { |
| fail "Did not get node for testing" |
| } |
| |
| # Get the number of cpus on a node |
| lassign [get_node_cpus $test_node] totcpus nthreads |
| |
| if {$totcpus == 0} { |
| fail "No cpus were found" |
| } else { |
| # Set QoS CPU values |
| set grcpu_num [expr $totcpus - $nthreads] |
| set grpcpumin_num $totcpus |
| set maxcpu_num [expr $totcpus - $nthreads] |
| set maxcpumin_num $totcpus |
| } |
| |
| |
| |
| # Get the number of nodes in the default partition, minus one. |
| # $maxnode_num is used as MaxNodes in QOS and we will test requests of |
| # $maxnode_num + 1. If not EnforcePartLimits could interfere. |
| set num_nodes [expr [llength [get_nodes_by_state]] -1] |
| |
| if {$num_nodes == 0} { |
| fail "No cpus were found" |
| } else { |
| # Set QoS node values |
| set grn_num $num_nodes |
| set maxnode_num $num_nodes |
| set maxnodespu_num $num_nodes |
| } |
| |
| |
| proc cleanup {} { |
| global sacctmgr qostest acct test_id |
| global scontrol save_billing_weights partition |
| |
| # delete qos |
| run_command -none "$sacctmgr -i delete qos $qostest" |
| |
| # delete account |
| run_command -none "$sacctmgr -i delete account $acct" |
| |
| if {$save_billing_weights ne ""} { |
| run_command "$scontrol update partitionname=$partition TRESBillingWeights=$save_billing_weights" |
| } |
| } |
| |
| # Make sure we have a clean system |
| cleanup |
| |
| # Gets user |
| set user_name [get_my_user_name] |
| |
| |
| # add qos |
| set qosmatch 0 |
| spawn $sacctmgr -i add qos $qosname=$qostest |
| expect { |
| -re "Adding QOS" { |
| incr qosmatch |
| exp_continue |
| } |
| timeout { |
| fail "sacctmgr did not add QOS" |
| } |
| eof { |
| wait |
| } |
| } |
| |
| # Add account with qos |
| set acctmatch 0 |
| spawn $sacctmgr -i add account $acct qos=$qostest |
| expect { |
| -re "Adding Account" { |
| incr acctmatch |
| exp_continue |
| } |
| timeout { |
| fail "sacctmgr is not responding" |
| } |
| eof { |
| wait |
| |
| } |
| } |
| if {$acctmatch != 1} { |
| fail "sacctmgr had a problem adding the account" |
| } |
| |
| # Add user to account |
| run_command -fail "$sacctmgr -i create user name=$user_name account=$acct" |
| |
| # |
| # Test GrpNode limit |
| # |
| set mod_qos_vals(GrpNodes) $grn_num |
| mod_qos $qostest [array get mod_qos_vals] |
| sleep $time_spacing |
| inc21_30_1 "QOSGrpNodeLimit" |
| # Reset the value to 0 |
| set mod_qos_vals(GrpNodes) "-1" |
| |
| # |
| # Test GrpCpus |
| # |
| set mod_qos_vals(GrpCpus) $grcpu_num |
| mod_qos $qostest [array get mod_qos_vals] |
| sleep $time_spacing |
| |
| if { $one_task_pc } { |
| set grcpu_num [expr $grcpu_num / $nthreads] |
| } |
| |
| inc21_30_2 "QOSGrpCpuLimit" |
| set mod_qos_vals(GrpCpus) "-1" |
| |
| # |
| # test GrpJob limits |
| # |
| set mod_qos_vals(GrpJobs) $grjobs_num |
| mod_qos $qostest [array get mod_qos_vals] |
| sleep $time_spacing |
| inc21_30_3 "QOSGrpJobsLimit" |
| set mod_qos_vals(GrpJobs) "-1" |
| |
| # |
| # test GrpSubmit |
| # |
| set mod_qos_vals(GrpSubmit) $grsub_num |
| mod_qos $qostest [array get mod_qos_vals] |
| sleep $time_spacing |
| inc21_30_4 |
| set mod_qos_vals(GrpSubmit) "-1" |
| |
| # |
| # Test MaxCpus limits |
| # |
| set mod_qos_vals(MaxCpus) $maxcpu_num |
| mod_qos $qostest [array get mod_qos_vals] |
| sleep $time_spacing |
| |
| if { $one_task_pc } { |
| set maxcpu_num [expr $maxcpu_num / $nthreads] |
| } |
| |
| |
| inc21_30_5 "QOSMaxCpuPerJobLimit" |
| set mod_qos_vals(MaxCpus) "-1" |
| |
| # |
| # Test MaxNode limit |
| # |
| set mod_qos_vals(MaxNodes) $maxnode_num |
| mod_qos $qostest [array get mod_qos_vals] |
| sleep $time_spacing |
| inc21_30_6 [list "QOSMaxNodePerJobLimit" "PartitionConfig"] |
| set mod_qos_vals(MaxNodes) "-1" |
| |
| # |
| # Test MaxJobs limit |
| # |
| set mod_qos_vals(MaxJobs) $maxjobs_num |
| mod_qos $qostest [array get mod_qos_vals] |
| sleep $time_spacing |
| inc21_30_7 "QOSMaxJobsPerUserLimit" |
| set mod_qos_vals(MaxJobs) "-1" |
| |
| # |
| # Test MaxJobsSubmits limit |
| # |
| set mod_qos_vals(MaxSubmitJobs) $maxjobsub_num |
| mod_qos $qostest [array get mod_qos_vals] |
| sleep $time_spacing |
| inc21_30_8 |
| set mod_qos_vals(MaxSubmitJobs) "-1" |
| |
| # |
| # Test GroupCPUMins |
| # |
| set mod_qos_vals(GrpCpuMin) $grpcpumin_num |
| mod_qos $qostest [array get mod_qos_vals] |
| sleep $time_spacing |
| if { $one_task_pc } { |
| set grpcpumin_num [expr $grpcpumin_num / $nthreads] |
| } |
| inc21_30_9 "QOSGrpCPUMinutesLimit" |
| set mod_qos_vals(GrpCpuMin) "-1" |
| |
| # |
| # Test GroupCPURunMins |
| # Requires priority/multifactor to properly handle decay well |
| # |
| if { $prio_multifactor != 0 } { |
| set mod_qos_vals(GrpCpuRunMin) $grpcpurunmin_num |
| mod_qos $qostest [array get mod_qos_vals] |
| sleep $time_spacing |
| if { $one_task_pc } { |
| set grpcpurunmin_num [expr $grpcpurunmin_num / $nthreads] |
| } |
| inc21_30_10 "QOSGrpCPURunMinutesLimit" |
| set mod_qos_vals(GrpCpuRunMin) "-1" |
| } |
| |
| # |
| # Test Group Memory |
| # |
| set mod_qos_vals(GrpMem) $grpmem_num |
| mod_qos $qostest [array get mod_qos_vals] |
| sleep $time_spacing |
| inc21_30_11 "QOSGrpMemLimit" |
| set mod_qos_vals(GrpMem) "-1" |
| |
| # |
| # Test Group wall |
| # Requires priority/multifactor to properly handle decay well |
| # |
| if { $prio_multifactor != 0 } { |
| set mod_qos_vals(GrpWall) $grpwall_num |
| mod_qos $qostest [array get mod_qos_vals] |
| sleep $time_spacing |
| inc21_30_12 |
| set mod_qos_vals(GrpWall) "-1" |
| } |
| |
| # |
| # Test Max Cpu Mins |
| # |
| set mod_qos_vals(MaxCpuMin) $maxcpumin_num |
| mod_qos $qostest [array get mod_qos_vals] |
| sleep $time_spacing |
| if { $one_task_pc } { |
| set maxcpumin_num [expr $maxcpumin_num / $nthreads] |
| } |
| inc21_30_13 "QOSMaxCpuMinutesPerJobLimit" |
| set mod_qos_vals(MaxCpuMin) "-1" |
| |
| # |
| # Test Max Wall |
| # |
| set mod_qos_vals(MaxWall) $maxwall_num |
| mod_qos $qostest [array get mod_qos_vals] |
| sleep $time_spacing |
| inc21_30_14 "QOSMaxWallDurationPerJobLimit" |
| set mod_qos_vals(MaxWall) "-1" |
| |
| # |
| # Test Max CPUs Per User |
| # |
| |
| # If CR_CORE set maxcpuspu a multiple number of threads |
| if {$selectparam} { |
| set maxcpuspu_num [expr $maxcpuspu_num * $nthreads] |
| } |
| |
| set mod_qos_vals(MaxCpusPerUser) $maxcpuspu_num |
| mod_qos $qostest [array get mod_qos_vals] |
| sleep $time_spacing |
| if { $one_task_pc } { |
| set maxcpuspu_num [expr $maxcpuspu_num / $nthreads] |
| } |
| inc21_30_15 "QOSMaxCpuPerUserLimit" |
| set mod_qos_vals(MaxCpusPerUser) "-1" |
| |
| # |
| # Test MaxNodesPerUser |
| # |
| set mod_qos_vals(MaxNodesPerUser) $maxnodespu_num |
| mod_qos $qostest [array get mod_qos_vals] |
| sleep $time_spacing |
| inc21_30_16 [list "QOSMaxNodePerUserLimit" "PartitionConfig"] |
| set mod_qos_vals(MaxNodesPerUser) "-1" |
| |
| # |
| # Test MaxWall is used as job's timelimit if job was requested |
| # without --time option |
| # |
| set mod_qos_vals(MaxWall) $maxwall_num |
| mod_qos $qostest [array get mod_qos_vals] |
| sleep $time_spacing |
| inc21_30_17 |
| set mod_qos_vals(MaxWall) "-1" |
| |
| |
| # TRESBillingWeights |
| |
| spawn $scontrol update partitionname=$partition tresbillingweights=cpu=$tres_cpu_mult |
| expect { |
| -re "error" { |
| fail "Failed to set TRESBillingWeights" |
| } |
| timeout { |
| fail "scontrol is not responding" |
| } |
| eof { |
| wait |
| } |
| } |
| |
| # |
| # Test GrpTRES=billing |
| # |
| set mod_qos_vals(GrpTRES=billing) [expr $grcpu_num * $tres_cpu_mult] |
| mod_qos $qostest [array get mod_qos_vals] |
| sleep $time_spacing |
| |
| if { $one_task_pc } { |
| set grcpu_num [expr $grcpu_num / $nthreads] |
| } |
| |
| inc21_30_2 "QOSGrpBilling" |
| set mod_qos_vals(GrpTRES=billing) "-1" |
| |
| |
| # |
| # Test GrpTRESMins=billing |
| # |
| set mod_qos_vals(GrpTRESMins=billing) [expr $grpcpumin_num * $tres_cpu_mult] |
| mod_qos $qostest [array get mod_qos_vals] |
| sleep $time_spacing |
| if { $one_task_pc } { |
| set grpcpumin_num [expr $grpcpumin_num / $nthreads] |
| } |
| inc21_30_9 "QOSGrpBillingMinutes" |
| set mod_qos_vals(GrpTRESMins=billing) "-1" |
| |
| |
| # |
| # Test GroupTRESRunMins=billing |
| # Requires priority/multifactor to properly handle decay well |
| # |
| if { $prio_multifactor != 0 } { |
| set mod_qos_vals(GrpTRESRunMins=billing) [expr $grpcpurunmin_num * $tres_cpu_mult] |
| mod_qos $qostest [array get mod_qos_vals] |
| sleep $time_spacing |
| if { $one_task_pc } { |
| set grpcpurunmin_num [expr $grpcpurunmin_num / $nthreads] |
| } |
| inc21_30_10 "QOSGrpBillingRunMinutes" |
| set mod_qos_vals(GrpTRESRunMins=billing) "-1" |
| } |
| |
| |
| # |
| # Test GrpTRES=billing limits |
| # |
| set mod_qos_vals(MaxTRESPerJob=billing) [expr $maxcpu_num * $tres_cpu_mult] |
| mod_qos $qostest [array get mod_qos_vals] |
| sleep $time_spacing |
| |
| if { $one_task_pc } { |
| set maxcpu_num [expr $maxcpu_num / $nthreads] |
| } |
| |
| inc21_30_5 "QOSMaxBillingPerJob" |
| set mod_qos_vals(MaxTRESPerJob=billing) "-1" |
| |
| # |
| # Test MaxTRESMinsPerJob=billing |
| # |
| set mod_qos_vals(MaxTRESMinsPerJob=billing) [expr $maxcpumin_num * $tres_cpu_mult] |
| mod_qos $qostest [array get mod_qos_vals] |
| sleep $time_spacing |
| if { $one_task_pc } { |
| set maxcpumin_num [expr $maxcpumin_num / $nthreads] |
| } |
| inc21_30_13 "QOSMaxBillingMinutesPerJob" |
| set mod_qos_vals(MaxTRESMinsPerJob=billing) "-1" |
| |
| # |
| # Test Max CPUs Per User |
| # |
| |
| # If CR_CORE set maxcpuspu a multiple number of threads |
| if {$selectparam} { |
| set maxcpuspu_num [expr $maxcpuspu_num * $nthreads] |
| } |
| |
| set mod_qos_vals(MaxTRESPerUser=billing) [expr $maxcpuspu_num * $tres_cpu_mult] |
| mod_qos $qostest [array get mod_qos_vals] |
| sleep $time_spacing |
| if { $one_task_pc } { |
| set maxcpuspu_num [expr $maxcpuspu_num / $nthreads] |
| } |
| inc21_30_15 "QOSMaxBillingPerUser" |
| set mod_qos_vals(MaxTRESPerUser=billing) "-1" |