| #!/usr/bin/env expect |
| ############################################################################ |
| # Purpose: Test of Slurm functionality |
| # Validate that preemption by qos is enforced |
| ############################################################################ |
| # Copyright (C) SchedMD LLC. |
| # |
| # This file is part of Slurm, a resource management program. |
| # For details, see <https://slurm.schedmd.com/>. |
| # Please also read the included file: DISCLAIMER. |
| # |
| # Slurm is free software; you can redistribute it and/or modify it under |
| # the terms of the GNU General Public License as published by the Free |
| # Software Foundation; either version 2 of the License, or (at your option) |
| # any later version. |
| # |
| # Slurm is distributed in the hope that it will be useful, but WITHOUT ANY |
| # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS |
| # FOR A PARTICULAR PURPOSE. See the GNU General Public License for more |
| # details. |
| # |
| # You should have received a copy of the GNU General Public License along |
| # with Slurm; if not, write to the Free Software Foundation, Inc. |
| # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. |
| ############################################################################ |
| source ./globals |
| source ./globals_accounting |
| |
| set user "" |
| set acct_1 "${test_name}_acct1" |
| set acct_2 "${test_name}_acct2" |
| set qos_1 "${test_name}_qos1" |
| set qos_2 "${test_name}_qos2" |
| set file_in "$test_dir/job_script" |
| set nodes [llength [get_nodes_by_state]] |
| set job_id 0 |
| set qos_1_id 0 |
| set qos_2_id 0 |
| |
| # job states |
| set done_state "DONE" |
| set pending_state "PENDING" |
| set preempted_state "PREEMPTED" |
| set running_state "RUNNING" |
| |
| set eligible_now_job_directive "EligibleTime=now" |
| |
| if {[get_config_param "PreemptType"] ne "preempt/qos"} { |
| skip "This test requires that PreemptType=preempt/qos" |
| } |
| if {[get_config_param "AccountingStorageType"] ne "accounting_storage/slurmdbd"} { |
| skip "This test requires use of Slurmdbd" |
| } |
| if {$nodes < 2} { |
| skip "Not enough available nodes ($nodes < 2)" |
| } |
| |
| regexp "($number)" [get_config_param "MinJobAge"] {} min_job_age |
| if {$min_job_age < 10} { |
| skip "MinJobAge configured too low for this test ($min_job_age < 10)" |
| } |
| |
| proc acct_setup { acct_name qos_name pre_qos pre_mode } { |
| global user nodes sacctmgr |
| |
| set added 0 |
| spawn $sacctmgr -i create qos $qos_name preempt=$pre_qos preemptmode=$pre_mode maxnodes=$nodes |
| expect { |
| -re "Adding QOS" { |
| incr added 1 |
| exp_continue |
| } |
| timeout { |
| fail "sacctmgr is not responding" |
| } |
| eof { |
| wait |
| } |
| } |
| |
| spawn $sacctmgr -i create account $acct_name qos=$qos_name |
| expect { |
| -re "Adding Account" { |
| incr added 1 |
| exp_continue |
| } |
| timeout { |
| fail "sacctmgr is not responding" |
| } |
| eof { |
| wait |
| } |
| } |
| |
| spawn $sacctmgr -i add user $user account=$acct_name |
| expect { |
| -re "Associations" { |
| incr added 1 |
| exp_continue |
| } |
| timeout { |
| fail "sacctmgr is not responding" |
| } |
| eof { |
| wait |
| } |
| } |
| |
| if {$added != 3} { |
| fail "Account was not created properly" |
| } |
| } |
| |
| proc sub_job { acct1 {het_job false} } { |
| global nodes file_in sbatch number |
| |
| set job_id 0 |
| if { $het_job } { |
| set nodesless1 [expr $nodes - 1] |
| spawn $sbatch -o/dev/null --exclusive -N1 -A$acct1 : -N$nodesless1 -A$acct1 $file_in |
| } else { |
| spawn $sbatch -o/dev/null --exclusive -N$nodes -A$acct1 $file_in |
| } |
| expect { |
| -re "Submitted batch job ($number)" { |
| set job_id $expect_out(1,string) |
| exp_continue |
| } |
| timeout { |
| fail "sbatch is not responding" |
| } |
| eof { |
| wait |
| } |
| } |
| |
| if { $job_id == 0 } { |
| fail "sbatch did not submit job" |
| } else { |
| return $job_id |
| } |
| } |
| |
| proc cleanup {} { |
| global user acct_1 acct_2 qos_1 qos_2 qos_1_id qos_2_id sacctmgr |
| |
| set cleanacct(0) $acct_1 |
| set cleanacct(1) $acct_2 |
| set cleanqos(0) $qos_1 |
| set cleanqos(1) $qos_2 |
| |
| set clean 0 |
| |
| wait_for_account_done $acct_1,$acct_2 |
| |
| for {set i 0} {$i<2} {incr i 1} { |
| run_command "$sacctmgr -i remove user $user where account=$cleanacct($i)" |
| spawn $sacctmgr delete -i account $cleanacct($i) |
| expect { |
| -re "(Deleting accounts|Nothing deleted)" { |
| incr clean 1 |
| exp_continue |
| } |
| timeout { |
| fail "sacctmgr is not responding" |
| } |
| eof { |
| wait |
| } |
| } |
| |
| spawn $sacctmgr delete -i qos $cleanqos($i) |
| expect { |
| -re "(Deleting QOS|Nothing deleted)" { |
| incr clean 1 |
| exp_continue |
| } |
| timeout { |
| fail "sacctmgr is not responding" |
| } |
| eof { |
| wait |
| } |
| } |
| } |
| |
| if { $clean != 4 } { |
| log_warn "Unable to clean up accounts and qos" |
| } |
| } |
| |
| cleanup |
| make_bash_script $file_in "sleep 30" |
| |
| set user [get_my_user_name] |
| |
| # Preempt modes to test |
| set preempt_mode_cancel "cancel" |
| set preempt_mode_requeue "requeue" |
| set preempt_mode_list "$preempt_mode_cancel $preempt_mode_requeue" |
| |
| # Job types to test for each mode |
| set job_type_batch "batch" |
| set job_type_het "het" |
| set job_type_list "$job_type_batch $job_type_het" |
| |
| log_info "**** Cleanup from previous run ****" |
| |
| acct_setup $acct_1 $qos_1 "" $preempt_mode_cancel |
| acct_setup $acct_2 $qos_2 $qos_1 cluster |
| |
| foreach preempt_mode $preempt_mode_list { |
| foreach job_type $job_type_list { |
| set mode_type_string [string toupper "$preempt_mode $job_type"] |
| log_info "*** TEST PREEMPT $mode_type_string JOB ***" |
| |
| set mod_qos_vals(preemptmode) $preempt_mode |
| mod_qos $qos_1 [array get mod_qos_vals] |
| |
| if { $job_type == $job_type_batch } { |
| set have_het_job 0 |
| } else { |
| set have_het_job 1 |
| } |
| |
| set qos_1_id [sub_job $acct_1 $have_het_job] |
| wait_for_job -fail $qos_1_id $running_state $have_het_job |
| |
| set qos_2_id [sub_job $acct_2] |
| wait_for_job -fail $qos_2_id $running_state |
| |
| if { $preempt_mode == $preempt_mode_cancel } { |
| set jobs_ok false |
| wait_for {$jobs_ok} { |
| set jobs_ok [check_job_state $qos_1_id $preempted_state $have_het_job] |
| } |
| subtest {$jobs_ok} "Job ($qos_1_id) should be fully in the $preempted_state state" |
| } else { |
| # Requeue state |
| set jobs_ok false |
| wait_for {$jobs_ok} { |
| set jobs_ok [check_job_state $qos_1_id $pending_state $have_het_job] |
| } |
| subtest {$jobs_ok} "Job ($qos_1_id) should be fully in the $pending_state state" |
| |
| # Make job eligible to run now to avoid delay |
| run_command "$scontrol update job $qos_2_id $eligible_now_job_directive" |
| wait_for_job -fail $qos_2_id $done_state |
| # Wait for requeued job to restart |
| |
| # Make job eligible to run now to avoid delay |
| run_command "$scontrol update job $qos_1_id $eligible_now_job_directive" |
| wait_for_job -fail $qos_1_id $running_state $have_het_job |
| |
| set jobs_ok false |
| wait_for {$jobs_ok} { |
| set jobs_ok [check_job_state $qos_1_id $running_state $have_het_job] |
| } |
| subtest {$jobs_ok} "Job ($qos_1_id) should be fully in the $running_state state" |
| } |
| |
| wait_for_account_done $acct_1,$acct_2 |
| } |
| } |
| |
| set max_test 3 |
| set test 0 |
| while {($test < $max_test) && |
| ![wait_for_node -timeout 1 planned 1]} { |
| incr test |
| $bin_sleep 10 |
| } |