| #!/usr/bin/env expect |
| ############################################################################ |
| # Purpose: Test basic functionality of backfill scheduler |
| ############################################################################ |
| # Copyright (C) SchedMD LLC. |
| # |
| # This file is part of Slurm, a resource management program. |
| # For details, see <https://slurm.schedmd.com/>. |
| # Please also read the included file: DISCLAIMER. |
| # |
| # Slurm is free software; you can redistribute it and/or modify it under |
| # the terms of the GNU General Public License as published by the Free |
| # Software Foundation; either version 2 of the License, or (at your option) |
| # any later version. |
| # |
| # Slurm is distributed in the hope that it will be useful, but WITHOUT ANY |
| # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS |
| # FOR A PARTICULAR PURPOSE. See the GNU General Public License for more |
| # details. |
| # |
| # You should have received a copy of the GNU General Public License along |
| # with Slurm; if not, write to the Free Software Foundation, Inc., |
| # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. |
| ############################################################################ |
| source ./globals |
| |
| set nodes_avail [llength [get_nodes_by_state]] |
| set nodes_except_one [expr ($nodes_avail - 1)] |
| set nodes_min 2 |
| set bf_interval [param_value [get_config_param "SchedulerParameters"] "bf_interval" 30] |
| set bf_interval3 [expr $bf_interval * 3] |
| set bf_interval5 [expr $bf_interval * 5] |
| set job_id 0 |
| set job_pd 0 |
| set job_bf 0 |
| |
| if {[get_config_param "SchedulerType"] ne "sched/backfill"} { |
| skip "This test requires SchedulerType = sched/backfill" |
| } |
| |
| if {[string first "bf_min_prio_reserve" [get_config_param "SchedulerParameters"]] != -1} { |
| skip "This test cannot work with bf_min_prio_reserve" |
| } |
| |
| if {$nodes_avail < $nodes_min} { |
| skip "Not enough nodes currently available ($nodes_avail avail < $nodes_min needed)." |
| } |
| |
| if {[get_partition_param [default_partition] "OverSubscribe"] != "NO"} { |
| skip "This tests not works if OverSubscribe is enabled" |
| } |
| |
| proc cleanup { } { |
| global job_id job_pd job_bf |
| |
| cancel_job [list $job_id $job_pd $job_bf] |
| } |
| |
| proc test_bf {job_pd_args job_bf_args bf_expected} { |
| global job_id job_pd job_bf |
| global nodes_except_one bf_interval bf_interval3 bf_interval5 |
| global test_name number re_word_str bin_sleep squeue |
| |
| # Submit a first job that will block the second one |
| set job_id [submit_job -fail "--exclusive --nice=0 -o /dev/null -J $test_name --time=$bf_interval3 --wrap '$bin_sleep $bf_interval3' -N$nodes_except_one"] |
| |
| # Wait to avoid job2 to be scheduled |
| wait_for_job -fail $job_id "RUNNING" |
| |
| # Submit the job_pd with higher prio, and later the job_bf with less prio |
| set job_pd [submit_job -fail "--exclusive --nice=0 -o /dev/null -J $test_name $job_pd_args"] |
| set job_bf [submit_job -fail "--exclusive --nice=100 -o /dev/null -J $test_name $job_bf_args"] |
| |
| # Wait for job_bf being started before job_pd |
| set end false |
| wait_for -pollinterval 1 -timeout $bf_interval5 {$end} { |
| if {$bf_expected} { |
| log_info "Waiting for job $job_bf to be backfilled before job $job_pd..." |
| } else { |
| log_info "Waiting for job $job_bf to NOT be backfilled before job $job_pd..." |
| } |
| |
| set re_squeue "($number) ($re_word_str) ($number|N/A)" |
| |
| set bf_found false |
| set pd_found false |
| set out [run_command_output -fail "SLURM_TIME_FORMAT=%s $squeue -o '%i %Q %t %S' --sort=i -j $job_pd,$job_bf -h"] |
| if {[regexp "$job_pd $re_squeue" $out - job_pd_prio job_pd_state job_pd_time]} { |
| set pd_found true |
| } |
| if {[regexp "$job_bf $re_squeue" $out - job_bf_prio job_bf_state job_bf_time]} { |
| set bf_found true |
| } |
| |
| # Sanity checks |
| if {!$pd_found || !$bf_found} { |
| fail "Could not find jobs" |
| } |
| if {$job_bf_prio > $job_pd_prio} { |
| fail "Job to be backfilled has higher priority ($job_bf_prio > $job_pd_prio)" |
| } |
| |
| # Backfill subtest |
| if {"$job_bf_state" ne "PD"} { |
| if {"$job_pd_state" eq "PD"} { |
| if {$bf_expected} { |
| subpass "Job 2 ($job_bf) should be backfilled before job 1 ($job_pd)" |
| } else { |
| subfail "Job 2 ($job_bf) should NOT be backfilled before job 1 ($job_pd)" "$job_bf is not PD and $job_pd is still PD" |
| } |
| set end true |
| } else { |
| if {$bf_expected} { |
| subtest {$job_bf_time < $job_pd_time} "Job 2 ($job_bf) should be backfilled before job 1 ($job_pd)" "$job_bf_time > $job_pd_time" |
| } else { |
| subtest {$job_bf_time > $job_pd_time} "Job 2 ($job_bf) should NOT be backfilled before job 1 ($job_pd)" "$job_bf_time < $job_pd_time" |
| } |
| set end true |
| } |
| } else { |
| if {"$job_pd_state" ne "PD"} { |
| if {$bf_expected} { |
| subfail "Job 2 ($job_bf) should be backfilled before job 1 ($job_pd)" "$job_pd is not PD and $job_bf is still PD" |
| } else { |
| subpass "Job 2 ($job_bf) should NOT be backfilled before job 1 ($job_pd)" |
| } |
| set end true |
| } |
| } |
| } |
| if {!$end} { |
| if {$bf_expected} { |
| subfail "Job ($job_bf) should be backfilled before job ($job_pd) (condition not reached after $bf_interval3)" |
| } else { |
| subfail "Job ($job_bf) should NOT be backfilled before job ($job_pd) (condition not reached after $bf_interval5)" |
| } |
| } |
| cleanup |
| } |
| |
| # Positive test: bf should scheduler because that would NOT delay a job with higher prio |
| testproc test_bf "--time=$bf_interval --wrap '$bin_sleep $bf_interval' -N$nodes_avail" \ |
| "--time=$bf_interval --wrap '$bin_sleep $bf_interval' -N1" \ |
| true |
| |
| # Negative test: bf should NOT scheduler because that would delay a job with higher prio |
| testproc test_bf "--time=$bf_interval --wrap '$bin_sleep $bf_interval' -N$nodes_avail" \ |
| "--time=$bf_interval5 --wrap '$bin_sleep $bf_interval5' -N1" \ |
| false |