| #!/usr/bin/env expect |
| ############################################################################ |
| # Purpose: Test of Slurm functionality |
| # Test that partition and job qos limits are enforced when using |
| # the OverPartQos flag for the job's qos |
| ############################################################################ |
| # Copyright (C) SchedMD LLC. |
| # |
| # This file is part of Slurm, a resource management program. |
| # For details, see <https://slurm.schedmd.com/>. |
| # Please also read the included file: DISCLAIMER. |
| # |
| # Slurm is free software; you can redistribute it and/or modify it under |
| # the terms of the GNU General Public License as published by the Free |
| # Software Foundation; either version 2 of the License, or (at your option) |
| # any later version. |
| # |
| # Slurm is distributed in the hope that it will be useful, but WITHOUT ANY |
| # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS |
| # FOR A PARTICULAR PURPOSE. See the GNU General Public License for more |
| # details. |
| # |
| # You should have received a copy of the GNU General Public License along |
| # with Slurm; if not, write to the Free Software Foundation, Inc., |
| # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. |
| ############################################################################ |
| source ./globals |
| source ./globals_accounting |
| source ./inc21.34_tests |
| |
| set test_node "" |
| # Total cpus in test node |
| set totcpus 0 |
| set nthreads 0 |
| set acct test_acct |
| set user_name "" |
| set test_part "${test_name}_part" |
| set part_qos "${test_name}_part_qos" |
| set job_qos "${test_name}_job_qos" |
| set qostest "" |
| set grn GrpNodes |
| set grn_num 0 |
| set grcpu GrpCpus |
| set grcpu_num 0 |
| set grpcpumin GrpCPUMins |
| set grpcpumin_num 0 |
| # Set grpcpurunmin_num to multiple of CPUs per core to work with most configurations |
| # Also make sure that it is at least 4 so we can add and subtract from it |
| set grpcpurunmin GrpCPURunMins |
| set grpcpurunmin_num 40 |
| set grjobs GrpJobs |
| set grjobs_num 2 |
| set grpmem GrpMem |
| set grpmem_num 100 |
| set grsub GrpSubmit |
| set grsub_num 2 |
| set grpwall GrpWall |
| set grpwall_num 1 |
| set maxcpu MaxCpus |
| set maxcpu_num 0 |
| # Set maxcpumin_num to multiple of CPUs per core to work with most configurations |
| set maxcpumin MaxCPUMins |
| set maxcpumin_num 2 |
| set maxwall MaxWall |
| set maxwall_num 2 |
| set maxcpuspu MaxCPUSPerUser |
| set maxcpuspu_num 2 |
| set maxnodes MaxNodes |
| set maxnode_num 0 |
| set maxnodespu MaxNodesPerUser |
| set maxnodespu_num 0 |
| set maxjobs MaxJobs |
| set maxjobs_num 2 |
| set maxjobsub MaxSubmitJobs |
| set maxjobsub_num 2 |
| set time_spacing 1 |
| set tres_cpu_mult 2 |
| |
| # cr_core = 1 / cr_cpu = 0 |
| set selectparam 0 |
| set def_part [default_partition] |
| |
| # mod qos |
| array set mod_job_qos { |
| GrpNodes -1 |
| GrpCpus -1 |
| GrpJob -1 |
| GrpSubmit -1 |
| GrpCpuMin -1 |
| GrpCpuRunMin -1 |
| GrpMem -1 |
| GrpWall -1 |
| MaxCpus -1 |
| MaxNode -1 |
| MaxJobs -1 |
| MaxSubmitJobs -1 |
| MaxCpuMin -1 |
| MaxWall -1 |
| MaxCpusPerUser -1 |
| MaxNode -1 |
| MaxNodesPerUser -1 |
| |
| GrpTRES=billing -1 |
| GrpTRESMins=billing -1 |
| GrpTRESRunMins=billing -1 |
| MaxTRESPerJob=billing -1 |
| MaxTRESMinsPerJob=billing -1 |
| MaxTRESPerUser=billing -1 |
| } |
| |
| array set mod_part_qos { |
| GrpNodes -1 |
| GrpCpus -1 |
| GrpJob -1 |
| GrpSubmit -1 |
| GrpCpuMin -1 |
| GrpCpuRunMin -1 |
| GrpMem -1 |
| GrpWall -1 |
| MaxCpus -1 |
| MaxNode -1 |
| MaxJobs -1 |
| MaxSubmitJobs -1 |
| MaxCpuMin -1 |
| MaxWall -1 |
| MaxCpusPerUser -1 |
| MaxNode -1 |
| MaxNodesPerUser -1 |
| |
| GrpTRES=billing -1 |
| GrpTRESMins=billing -1 |
| GrpTRESRunMins=billing -1 |
| MaxTRESPerJob=billing -1 |
| MaxTRESMinsPerJob=billing -1 |
| MaxTRESPerUser=billing -1 |
| } |
| |
| # |
| # Cannot run the test if OverTimeLimit is set, since we test time limits. |
| # |
| regexp "($number)" [get_config_param "OverTimeLimit"] {} overtimelim |
| if {$overtimelim != 0} { |
| skip "Cannot run this test when OverTimeLimit is set. Exiting now" |
| } |
| |
| if {[get_config_param "AccountingStorageType"] ne "accounting_storage/slurmdbd"} { |
| skip "This test can't be run without a usable AccountStorageType" |
| } |
| if {![param_contains [get_config_param "AccountingStorageEnforce"] "limits"]} { |
| skip "This test can't be run without enforcing limits" |
| } |
| if {![is_super_user]} { |
| skip "Test can only be ran as SlurmUser" |
| } |
| # Determine what the selecttype param is |
| set select_type_parameters [get_config_param "SelectTypeParameters"] |
| if {[param_contains $select_type_parameters "CR_SOCKET*"]} { |
| skip "This test is incompatible with CR_SOCKET allocations" |
| } |
| if {[param_contains $select_type_parameters "CR_ONE_TASK_PER_CORE"]} { |
| skip "This test is incompatible with CR_ONE_TASK_PER_CORE allocations" |
| } |
| if {[param_contains $select_type_parameters "CR_CORE_*"]} { |
| set selectparam 1 |
| } |
| if {[param_contains [get_config_param "SelectType"] "select/linear"]} { |
| skip "This test is incompatible select/linear" |
| } |
| |
| if {[get_config_param "PriorityType"] eq "priority/multifactor"} { |
| set prio_multifactor 1 |
| } else { |
| set prio_multifactor 0 |
| } |
| |
| proc cleanup { } { |
| global acct job_qos part_qos scontrol sacctmgr test_part def_part |
| |
| # Delete the test qos |
| run_command -none "$sacctmgr -i delete qos $job_qos,$part_qos" |
| |
| # Delete account |
| run_command -none "$sacctmgr -i delete account $acct" |
| run_command -none "$scontrol delete partitionname=$test_part" |
| |
| if {[string length $def_part]} { |
| run_command -none "$scontrol update partitionname=$def_part default=yes" |
| } |
| } |
| |
| # Remove any vestigial data |
| cleanup |
| |
| # Check to see that there are enough resources in the default partition |
| set tmpc 0 |
| set tmpn 0 |
| spawn $scontrol show part [default_partition] |
| expect { |
| -re "TotalCPUs=($number)" { |
| set tmpc [expr $expect_out(1,string) - 1] |
| exp_continue |
| } |
| -re "TotalNodes=($number)" { |
| set tmpn [expr $expect_out(1,string) - 1] |
| exp_continue |
| } |
| timeout { |
| fail "scontrol is not responding" |
| } |
| eof { |
| wait |
| } |
| } |
| |
| if {$tmpc == 0 || $tmpn == 0} { |
| skip "Not enough Nodes and/or CPUs" |
| } |
| |
| # Get the number of nodes in the default partition, minus one. |
| # $maxnode_num is used as MaxNodes in QOS and we will test requests of |
| # $maxnode_num + 1. If not EnforcePartLimits could interfere. |
| set num_nodes [expr [llength [get_nodes_by_state]] -1] |
| |
| if {$num_nodes == 0} { |
| fail "No cpus were found" |
| } else { |
| # Set QoS node values |
| set grn_num $num_nodes |
| set maxnode_num $num_nodes |
| set maxnodespu_num $num_nodes |
| } |
| |
| # Create 2 test qos |
| add_qos $part_qos "" |
| add_qos $job_qos "" |
| |
| # create a tmp partition to use for testing |
| spawn $scontrol create partitionname=$test_part qos=$part_qos tresbillingweights=cpu=$tres_cpu_mult default=yes \ |
| nodes=ALL |
| expect { |
| timeout { |
| fail "scontrol is not responding" |
| } |
| eof { |
| wait |
| } |
| } |
| |
| set got_node 0 |
| spawn $srun -N1 printenv SLURM_NODELIST |
| expect { |
| -re "($re_word_str)" { |
| set test_node $expect_out(1,string) |
| set got_node 1 |
| exp_continue |
| } |
| timeout { |
| fail "srun is not responding" |
| } |
| eof { |
| wait |
| } |
| } |
| |
| if {$got_node != 1} { |
| fail "Did not get node for testing" |
| } |
| |
| # Get the number of cpus on a node |
| lassign [get_node_cpus $test_node] totcpus nthreads |
| |
| if {$totcpus == 0} { |
| fail "No cpus were found" |
| } else { |
| # Set QoS CPU values |
| set grcpu_num [expr $totcpus - $nthreads] |
| set grpcpumin_num $totcpus |
| set maxcpu_num [expr $totcpus - $nthreads] |
| set maxcpumin_num $totcpus |
| } |
| |
| # Gets user |
| set user_name [get_my_user_name] |
| |
| # Add account with qos |
| set acctmatch 0 |
| spawn $sacctmgr -i add account $acct qos=$job_qos |
| expect { |
| -re "Adding Account" { |
| incr acctmatch |
| exp_continue |
| } |
| timeout { |
| fail "sacctmgr is not responding" |
| } |
| eof { |
| wait |
| |
| } |
| } |
| if {$acctmatch != 1} { |
| fail "sacctmgr had a problem adding the account" |
| } |
| |
| # Add user to account |
| spawn $sacctmgr -i create user name=$user_name account=$acct |
| expect { |
| timeout { |
| fail "sacctmgr not responding" |
| } |
| eof { |
| wait |
| } |
| } |
| |
| |
| log_info "========== Run limit test on partition's qos limits ==========" |
| part_test |
| |
| # |
| # Set overpartqos flag on job's qos |
| # |
| set changed 0 |
| spawn $sacctmgr -i mod qos $job_qos set flag=overpartqos |
| expect { |
| -re "Modified qos" { |
| set changed 1 |
| exp_continue |
| } |
| timeout { |
| fail "sacctmgr is not resonding" |
| } |
| eof { |
| wait |
| } |
| } |
| |
| |
| log_info "========== Run limit test on job's qos limits ==========" |
| qos_test |