blob: cf88a2efe8492473d08516294ed1ff56da15274e [file] [log] [blame]
#!/usr/bin/env expect
############################################################################
# Purpose: Test normalized factors
############################################################################
# Copyright (C) SchedMD LLC.
#
# This file is part of Slurm, a resource management program.
# For details, see <https://slurm.schedmd.com/>.
# Please also read the included file: DISCLAIMER.
#
# Slurm is free software; you can redistribute it and/or modify it under
# the terms of the GNU General Public License as published by the Free
# Software Foundation; either version 2 of the License, or (at your option)
# any later version.
#
# Slurm is distributed in the hope that it will be useful, but WITHOUT ANY
# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
# details.
#
# You should have received a copy of the GNU General Public License along
# with Slurm; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
############################################################################
source ./globals
source ./globals_accounting
set timeout 60
set ta1 "$test_name-account.1"
set ta2 "$test_name-account.2"
set tq1 "$test_name-qos.1"
set tq2 "$test_name-qos.2"
set p1 "$test_name-part.1"
set p2 "$test_name-part.2"
set config_path ""
set jobid_list ""
set cwd "[$bin_pwd]"
set access_err 0
set user [get_my_user_name]
#
# Check accounting config and bail if not found.
#
if {[get_config_param "AccountingStorageType"] ne "accounting_storage/slurmdbd"} {
skip "This test can't be run without a usable AccountStorageType"
}
if {[get_config_param "PriorityType"] ne "priority/multifactor"} {
skip "This test can't be run without a usable PriorityType"
}
#
# This test needs to be modified to use the core counts rather than CPU counts
#
set select_type_params [get_select_type_params ""]
if { [string first "CR_ONE_TASK_PER_CORE" $select_type_params] != -1 } {
skip "This test can't be run with SelectTypeParameters=CR_ONE_TASK_PER_CORE"
}
proc clean_assocs {} {
global ta1 ta2 tq1 tq2
remove_acct "" $ta1
remove_acct "" $ta2
remove_qos $tq1
remove_qos $tq2
}
proc cleanup { } {
global config_file jobid_list
cancel_job $jobid_list
clean_assocs
restore_conf $config_file
reconfigure
}
proc part_cpu_cnt { partition } {
global sinfo number
set cpu_cnt 1
spawn $sinfo -h -o "%C" -p $partition --state=idle
expect {
-re "($number)(K?)\/($number)(K?)" {
set cpu_cnt $expect_out(3,string)
if {$expect_out(4,string) ne ""} {
set cpu_cnt [expr $cpu_cnt * 1024]
}
exp_continue
}
timeout {
fail "sinfo not responding"
}
eof {
wait
}
}
return $cpu_cnt
}
proc sub_job { args state } {
global number sbatch test_id bin_sleep jobid_list
set jobid 0
set cmd "$sbatch -J$test_id -o/dev/null -e/dev/null --exclusive --wrap \"$bin_sleep 999\""
append cmd " $args"
spawn {*}$cmd
expect {
-re "Submitted batch job ($number)" {
set jobid $expect_out(1,string)
exp_continue
}
timeout {
fail "sbatch not responding"
}
eof {
wait
}
}
if {$jobid == 0} {
fail "Did not get sbatch jobid"
}
wait_for_job -fail $jobid $state
lappend jobid_list $jobid
return $jobid
}
proc sprio_args { args regex } {
global sprio
wait_for -timeout 5 -poll .1 {
[regexp $regex [run_command_output -fail "$sprio $args"]] == 1
} { }
}
proc scontrol { args regex match_cnt } {
global scontrol
set matches 0
spawn $scontrol {*}$args
expect {
-re $regex {
incr matches
exp_continue
}
timeout {
fail "scontrol not responding"
}
eof {
wait
}
}
if {$matches != $match_cnt} {
fail "scontrol $args failed to match '$regex' $match_cnt times"
}
}
proc update_job { jobid prio {error ""} } {
global scontrol
set matches 0
spawn $scontrol update jobid=$jobid priority=$prio
set tmp_re ".*"
if {$error != ""} {
set tmp_re $error
}
expect {
-re $tmp_re {
incr matches
}
timeout {
fail "scontrol not responding"
}
eof {
wait
}
}
if {($error != "") && !$matches} {
fail "Didn't get expected error '$error'"
}
}
proc create_part { name nodes prio } {
global scontrol
spawn $scontrol create partitionname=$name priorityjobfactor=$prio nodes=$nodes
expect {
-re "error" {
fail "scontrol found error"
}
timeout {
fail "scontrol is not responding"
}
eof {
wait
}
}
set found 0
spawn $scontrol show partitionname=$name
expect {
-re "PartitionName=$name" {
set found 1
exp_continue
}
timeout {
fail "scontrol is not responding"
}
eof {
wait
}
}
if {$found == 0} {
fail "scontrol did not create partition ($name)"
}
}
proc update_part { name args } {
global scontrol
set cmd "$scontrol update partitionname=$name"
append cmd " $args"
spawn {*}$cmd
expect {
-re "error" {
fail "scontrol found error"
}
timeout {
fail "scontrol is not responding"
}
eof {
wait
}
}
}
################################################################
if {[info exists env(SPRIO_FORMAT)]} {
unset env(SPRIO_FORMAT)
}
set config_path [get_conf_path]
set config_file $config_path/slurm.conf
save_conf $config_file
set weight 4000000
set weight_half [expr $weight / 2]
# max is uint16_t
set part_weight 60000
set part_weight_half [expr $part_weight / 2]
exec $bin_sed -i "s/^\\(PriorityWeight.*\\)/#\\1/Ig" $config_file
exec $bin_sed -i "s/^\\(PriorityFlags.*\\)/#\\1/Ig" $config_file
set prio_conf "
PriorityWeightAssoc = $weight
PriorityWeightJobSize = 1
PriorityWeightPartition = $part_weight
PriorityWeightQOS = $weight
PriorityWeightTRES=cpu=$weight
"
exec $bin_echo $prio_conf >> $config_file
reconfigure -fail
set idle_node [lindex [get_nodes_by_state] 0]
if {$idle_node == ""} {
fail "No idle nodes"
}
log_info "idle: $idle_node"
create_part $p1 $idle_node $part_weight
create_part $p2 $idle_node $part_weight_half
set cpu_cnt [part_cpu_cnt $p1]
if {[expr fmod($cpu_cnt, 2)] != 0} {
skip "This test requires an even number of cpus per node ($idle_node has $cpu_cnt cpus)"
}
set cpu_cnt_half [expr $cpu_cnt / 2]
clean_assocs
array set qos_req {}
set qos_req(priority) $weight
if {[add_qos $tq1 [array get qos_req]]} {
fail "Unable to create qos ($tq1)"
}
set qos_req(priority) $weight_half
if {[add_qos $tq2 [array get qos_req]]} {
fail "Unable to create qos ($tq2)"
}
array set acct_req {}
set acct_req(qos) "$tq1,$tq2"
set acct_req(priority) $weight
if {[add_acct $ta1 [array get acct_req]]} {
fail "Unable to create account ($ta1)"
}
set acct_req(priority) $weight_half
if {[add_acct $ta2 [array get acct_req]]} {
fail "Unable to create account ($ta2)"
}
array set user_req {}
set user_req(account) "$ta1,$ta2"
if {[add_user $user [array get user_req]]} {
fail "Unable to add user ($user)"
}
log_info "test1"
scontrol "show assoc account=$ta1 flags=assoc" "(Account=$ta1 UserName= Partition= Priority=$weight)|(Account=$ta1 UserName=$user\\S+ Partition= Priority=$weight)" 2
scontrol "show assoc account=$ta2 flags=assoc" "(Account=$ta2 UserName= Partition= Priority=$weight_half)|(Account=$ta2 UserName=$user\\S+ Partition= Priority=$weight_half)" 2
scontrol "show assoc qos=$tq1 flags=qos" "Priority=$weight" 1
scontrol "show assoc qos=$tq2 flags=qos" "Priority=$weight_half" 1
set jobid1 [sub_job "-n$cpu_cnt -p$p1 -A$ta1 --qos=$tq1" RUNNING]
set jobid2 [sub_job "-n$cpu_cnt -p$p1 -A$ta1 --qos=$tq1" PENDING]
set jobid3 [sub_job "-n$cpu_cnt_half -p$p2 -A$ta2 --qos=$tq2" PENDING]
sprio_args "-j $jobid2 -o \"%.15i %10B %10P %10Q %12T\"" "$jobid2\\s+$weight\\s+$part_weight\\s+$weight\\s+cpu=$weight"
sprio_args "-n -j $jobid2 -o \"%.15i %10b %10p %10q %12t\"" "$jobid2\\s+1\\.0+\\s+1\\.0+\\s+1\\.0+\\s+cpu=1\\.0+"
sprio_args "-j $jobid3 -o \"%.15i %10B %10P %10Q %12T\"" "$jobid3\\s+$weight_half\\s+$part_weight_half\\s+$weight_half\\s+cpu=$weight_half"
sprio_args "-n -j $jobid3 -o \"%.15i %10b %10p %10q %12t\"" "$jobid3\\s+0\\.50+\\s+0\\.50+\\s+0\\.50+\\s+cpu=0\\.5+"
log_info "test2"
# update priorities to high water mark, job should get full weights because the
# new high wark will be adjusted.
array set acct_mod_desc {}
array set acct_mod_acct_vals {}
array set acct_mod_assoc_vals {}
set acct_mod_assoc_vals(priority) $weight_half
if {[mod_acct $ta1 [array get acct_mod_desc] [array get acct_mod_assoc_vals] [array get acct_mod_acct_vals]] } {
fail "Unable to modify account ($ta1)"
}
if {[mod_qos $tq1 [array get acct_mod_assoc_vals]] } {
fail "Unable to modify qos ($tq1)"
}
scontrol "show assoc account=$ta1 flags=assoc" "(Account=$ta1 UserName= Partition= Priority=$weight_half)|(Account=$ta1 UserName=$user\\S+ Partition= Priority=$weight_half)" 2
scontrol "show assoc qos=$tq1 flags=qos" "Priority=$weight_half" 1
update_part $p1 "priorityjobfactor=$part_weight_half"
set jobid [sub_job "-n$cpu_cnt -p$p2 -A$ta2 --qos=$tq2" PENDING]
sprio_args "-j $jobid -o \"%.15i %10B %10P %10Q %12T\"" "$jobid\\s+$weight\\s+$part_weight\\s+$weight\\s+cpu=$weight"
sprio_args "-n -j $jobid -o \"%.15i %10b %10p %10q %12t\"" "$jobid\\s+1\\.0+\\s+1\\.0+\\s+1\\.0+\\s+cpu=1\\.0+"
log_info "test3"
# move back to higher wait to test max priorities and normalized values being reconfigured.
array set acct_mod_desc {}
array set acct_mod_acct_vals {}
array set acct_mod_assoc_vals {}
set acct_mod_assoc_vals(priority) $weight
if {[mod_acct $ta1 [array get acct_mod_desc] [array get acct_mod_assoc_vals] [array get acct_mod_acct_vals]] } {
fail "Unable to modify account ($ta1)"
}
if {[mod_qos $tq1 [array get acct_mod_assoc_vals]] } {
fail "Unable to modify qos ($tq1)"
}
scontrol "show assoc account=$ta1 flags=assoc" "(Account=$ta1 UserName= Partition= Priority=$weight)|(Account=$ta1 UserName=$user\\S+ Partition= Priority=$weight)" 2
scontrol "show assoc qos=$tq1 flags=qos" "Priority=$weight" 1
update_part $p1 "priorityjobfactor=$part_weight"
set jobid [sub_job "-n$cpu_cnt_half -p$p2 -A$ta2 --qos=$tq2" PENDING]
sprio_args "-j $jobid -o \"%.15i %10B %10P %10Q %12T\"" "$jobid\\s+$weight_half\\s+$part_weight_half\\s+$weight_half\\s+cpu=$weight_half"
sprio_args "-n -j $jobid -o \"%.15i %10b %10p %10q %12t\"" "$jobid\\s+0\\.50+\\s+0\\.50+\\s+0\\.50+\\s+cpu=0\\.5+"
log_info "test4"
# test setting priority directly on a job
set jobid [sub_job "-n$cpu_cnt_half -p$p2 -A$ta2 --qos=$tq2" PENDING]
update_job $jobid 999
sprio_args "-j $jobid -o \"%.15i %.10Y %.10S %.10A %.10B %.10F %.10J %.10P %.10Q %.11N %.20T\"" "$jobid\\s+999\\s+0\\s+0\\s+0\\s+0\\s+0\\s+0\\s+0\\s+0"
sprio_args "-n -j $jobid -o \"%.15i %.10Y %.10S %.10A %.10B %.10F %.10J %.10P %.10Q %.11N %.20T\"" "$jobid\\s+999\\s+0\\s+0\\s+0\\s+0\\s+0\\s+0\\s+0\\s+0"