blob: 5dac5de93aa5438d962be63c214c89125aa713e1 [file] [log] [blame]
#!/usr/bin/env expect
############################################################################
# Purpose: Test basic functionality of backfill scheduler
############################################################################
# Copyright (C) SchedMD LLC.
#
# This file is part of Slurm, a resource management program.
# For details, see <https://slurm.schedmd.com/>.
# Please also read the included file: DISCLAIMER.
#
# Slurm is free software; you can redistribute it and/or modify it under
# the terms of the GNU General Public License as published by the Free
# Software Foundation; either version 2 of the License, or (at your option)
# any later version.
#
# Slurm is distributed in the hope that it will be useful, but WITHOUT ANY
# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
# details.
#
# You should have received a copy of the GNU General Public License along
# with Slurm; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
############################################################################
source ./globals
set nodes_avail [llength [get_nodes_by_state]]
set nodes_except_one [expr ($nodes_avail - 1)]
set nodes_min 2
set bf_interval [param_value [get_config_param "SchedulerParameters"] "bf_interval" 30]
set bf_interval3 [expr $bf_interval * 3]
set bf_interval5 [expr $bf_interval * 5]
set job_id 0
set job_pd 0
set job_bf 0
if {[get_config_param "SchedulerType"] ne "sched/backfill"} {
skip "This test requires SchedulerType = sched/backfill"
}
if {[string first "bf_min_prio_reserve" [get_config_param "SchedulerParameters"]] != -1} {
skip "This test cannot work with bf_min_prio_reserve"
}
if {$nodes_avail < $nodes_min} {
skip "Not enough nodes currently available ($nodes_avail avail < $nodes_min needed)."
}
if {[get_partition_param [default_partition] "OverSubscribe"] != "NO"} {
skip "This tests not works if OverSubscribe is enabled"
}
proc cleanup { } {
global job_id job_pd job_bf
cancel_job [list $job_id $job_pd $job_bf]
}
proc test_bf {job_pd_args job_bf_args bf_expected} {
global job_id job_pd job_bf
global nodes_except_one bf_interval bf_interval3 bf_interval5
global test_name number re_word_str bin_sleep squeue
# Submit a first job that will block the second one
set job_id [submit_job -fail "--exclusive --nice=0 -o /dev/null -J $test_name --time=$bf_interval3 --wrap '$bin_sleep $bf_interval3' -N$nodes_except_one"]
# Wait to avoid job2 to be scheduled
wait_for_job -fail $job_id "RUNNING"
# Submit the job_pd with higher prio, and later the job_bf with less prio
set job_pd [submit_job -fail "--exclusive --nice=0 -o /dev/null -J $test_name $job_pd_args"]
set job_bf [submit_job -fail "--exclusive --nice=100 -o /dev/null -J $test_name $job_bf_args"]
# Wait for job_bf being started before job_pd
set end false
wait_for -pollinterval 1 -timeout $bf_interval5 {$end} {
if {$bf_expected} {
log_info "Waiting for job $job_bf to be backfilled before job $job_pd..."
} else {
log_info "Waiting for job $job_bf to NOT be backfilled before job $job_pd..."
}
set re_squeue "($number) ($re_word_str) ($number|N/A)"
set bf_found false
set pd_found false
set out [run_command_output -fail "SLURM_TIME_FORMAT=%s $squeue -o '%i %Q %t %S' --sort=i -j $job_pd,$job_bf -h"]
if {[regexp "$job_pd $re_squeue" $out - job_pd_prio job_pd_state job_pd_time]} {
set pd_found true
}
if {[regexp "$job_bf $re_squeue" $out - job_bf_prio job_bf_state job_bf_time]} {
set bf_found true
}
# Sanity checks
if {!$pd_found || !$bf_found} {
fail "Could not find jobs"
}
if {$job_bf_prio > $job_pd_prio} {
fail "Job to be backfilled has higher priority ($job_bf_prio > $job_pd_prio)"
}
# Backfill subtest
if {"$job_bf_state" ne "PD"} {
if {"$job_pd_state" eq "PD"} {
if {$bf_expected} {
subpass "Job 2 ($job_bf) should be backfilled before job 1 ($job_pd)"
} else {
subfail "Job 2 ($job_bf) should NOT be backfilled before job 1 ($job_pd)" "$job_bf is not PD and $job_pd is still PD"
}
set end true
} else {
if {$bf_expected} {
subtest {$job_bf_time < $job_pd_time} "Job 2 ($job_bf) should be backfilled before job 1 ($job_pd)" "$job_bf_time > $job_pd_time"
} else {
subtest {$job_bf_time > $job_pd_time} "Job 2 ($job_bf) should NOT be backfilled before job 1 ($job_pd)" "$job_bf_time < $job_pd_time"
}
set end true
}
} else {
if {"$job_pd_state" ne "PD"} {
if {$bf_expected} {
subfail "Job 2 ($job_bf) should be backfilled before job 1 ($job_pd)" "$job_pd is not PD and $job_bf is still PD"
} else {
subpass "Job 2 ($job_bf) should NOT be backfilled before job 1 ($job_pd)"
}
set end true
}
}
}
if {!$end} {
if {$bf_expected} {
subfail "Job ($job_bf) should be backfilled before job ($job_pd) (condition not reached after $bf_interval3)"
} else {
subfail "Job ($job_bf) should NOT be backfilled before job ($job_pd) (condition not reached after $bf_interval5)"
}
}
cleanup
}
# Positive test: bf should scheduler because that would NOT delay a job with higher prio
testproc test_bf "--time=$bf_interval --wrap '$bin_sleep $bf_interval' -N$nodes_avail" \
"--time=$bf_interval --wrap '$bin_sleep $bf_interval' -N1" \
true
# Negative test: bf should NOT scheduler because that would delay a job with higher prio
testproc test_bf "--time=$bf_interval --wrap '$bin_sleep $bf_interval' -N$nodes_avail" \
"--time=$bf_interval5 --wrap '$bin_sleep $bf_interval5' -N1" \
false