| #!/usr/bin/env expect |
| ############################################################################ |
| # Purpose: Test of Slurm functionality |
| # Check sinfo node information filtering (--state and --nodes |
| # options). |
| ############################################################################ |
| # Copyright (C) 2002 The Regents of the University of California. |
| # Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). |
| # Written by Morris Jette <jette1@llnl.gov> |
| # CODE-OCEC-09-009. All rights reserved. |
| # |
| # This file is part of Slurm, a resource management program. |
| # For details, see <https://slurm.schedmd.com/>. |
| # Please also read the included file: DISCLAIMER. |
| # |
| # Slurm is free software; you can redistribute it and/or modify it under |
| # the terms of the GNU General Public License as published by the Free |
| # Software Foundation; either version 2 of the License, or (at your option) |
| # any later version. |
| # |
| # Slurm is distributed in the hope that it will be useful, but WITHOUT ANY |
| # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS |
| # FOR A PARTICULAR PURPOSE. See the GNU General Public License for more |
| # details. |
| # |
| # You should have received a copy of the GNU General Public License along |
| # with Slurm; if not, write to the Free Software Foundation, Inc., |
| # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. |
| ############################################################################ |
| source ./globals |
| |
| set matches 0 |
| set name_string "\[a-zA-Z0-9\]\[^ ]*" |
| set node_state "" |
| set num_nodes 0 |
| set num_cpus 0 |
| set mismatches 0 |
| # We start the node position of the array to 3 because the first 2 nodes will be |
| # used for allocated state since scontrol cannot change node state to allocated |
| set node_pos 2 |
| set tmp_id 0 |
| set tmp_sc "${test_name}_tmp_sc" |
| array set node_list {} |
| |
| proc change_state { state nodes } { |
| global scontrol |
| |
| spawn $scontrol update nodename=$nodes state=$state reason=test |
| expect { |
| timeout { |
| fail "scontrol is not responding" |
| } |
| eof { |
| wait |
| } |
| } |
| } |
| |
| proc reset_state { } { |
| global scontrol node_list node_pos |
| |
| foreach num [array names node_list] { |
| spawn $scontrol update nodename=$node_list($num) state=idle |
| expect { |
| timeout { |
| fail "scontrol is not responding" |
| } |
| eof { |
| wait |
| } |
| } |
| } |
| } |
| |
| proc cleanup {} { |
| global tmp_sc tmp_id |
| |
| cancel_job $tmp_id |
| # |
| # Reset node states back to idle |
| # |
| reset_state |
| |
| file delete $tmp_sc |
| } |
| |
| set partition [default_partition] |
| |
| # |
| # Get some values to test against |
| # |
| spawn $sinfo -o%30D%30c --noheader -p$partition |
| expect { |
| -re "($number) *($number)" { |
| set num_nodes $expect_out(1,string) |
| set num_cpus $expect_out(2,string) |
| exp_continue |
| } |
| timeout { |
| fail "sinfo is not responding" |
| } |
| eof { |
| wait |
| } |
| } |
| |
| if {$num_nodes < 8} { |
| skip "This test requires 8 or more nodes but only found $num_nodes available" |
| } |
| |
| if {$num_cpus == 0} { |
| fail "Number of CPUs reported is zero" |
| } |
| |
| # |
| # Get a list of nodes that we can use |
| # |
| set i 0 |
| spawn $sinfo -o%N|%t|%o --noheader -p$partition -N |
| expect { |
| -re "($re_word_str).idle" { |
| if {$i<8} { |
| set node_list($i) $expect_out(1,string) |
| incr i 1 |
| exp_continue |
| } |
| } |
| timeout { |
| fail "sinfo is not responding" |
| } |
| eof { |
| wait |
| } |
| } |
| |
| if {$i != 8} { |
| skip "Unable to get all the required nodes ($i != 8)" |
| } |
| |
| set node_list_str [join [dict values [array get node_list]] ","] |
| |
| # |
| # Submit a job to filter for allocated job state |
| # |
| log_info "Testing ALLOCATED state" |
| make_bash_script $tmp_sc "sleep 20" |
| spawn $sbatch -p$partition -w$node_list(0),$node_list(1) -o/dev/null -n[expr 2 * $num_cpus] --exclusive $tmp_sc |
| expect { |
| -re "Submitted batch job ($number)" { |
| set tmp_id $expect_out(1,string) |
| exp_continue |
| } |
| timeout { |
| fail "sbatch is not responding" |
| } |
| eof { |
| wait |
| } |
| } |
| |
| if {$tmp_id == 0} { |
| fail "sbatch did not submit job" |
| } |
| |
| wait_for_job -fail $tmp_id "RUNNING" |
| |
| set allocated_count 0 |
| spawn $sinfo --Node --node=$node_list_str --long --exact --state=allocated -p$partition |
| expect { |
| -re "($eol)($name_string) *($number_with_suffix) *($name_string) *($re_word_str)" { |
| set node $expect_out(2,string) |
| set state $expect_out(5,string) |
| if {$expect_out(5,string) eq "allocated" && |
| ($node eq $node_list(0) || |
| $node eq $node_list(1))} { |
| incr allocated_cnt |
| } else { |
| log_debug "$node:$state" |
| incr mismatches |
| } |
| exp_continue |
| } |
| -re "error:" { |
| fail "Unexpected error from sinfo" |
| } |
| -re "Unable to contact" { |
| fail "Slurm appears to be down" |
| } |
| timeout { |
| fail "sinfo not responding" |
| } |
| eof { |
| wait |
| } |
| } |
| if {$mismatches != 0} { |
| fail "sinfo node filtering error" |
| } |
| if {$allocated_cnt != 2} { |
| fail "Allocated count ($allocated_cnt) != 2" |
| } |
| |
| cancel_job $tmp_id |
| |
| # Change nodes to different states and see if they made it to that state. |
| |
| change_state idle $node_list(2),$node_list(3) |
| change_state down $node_list(4),$node_list(5) |
| change_state drain $node_list(6),$node_list(7) |
| |
| # |
| # Change node state to idle and use state filter to filter node |
| # |
| log_info "Testing IDLE state" |
| set idle_cnt 0 |
| set idle_drain_cnt 0 |
| spawn $sinfo --Node --node=$node_list_str --long --exact --state=idle -p$partition |
| expect { |
| -re "($eol)($name_string) *($number_with_suffix) *($name_string) *($re_word_str)" { |
| set node $expect_out(2,string) |
| set state $expect_out(5,string) |
| if {$expect_out(5,string) eq "idle" && |
| ($node eq $node_list(0) || |
| $node eq $node_list(1) || |
| $node eq $node_list(2) || |
| $node eq $node_list(3))} { |
| incr idle_cnt |
| # NOTE: drained nodes can be idle as well. |
| } elseif {$expect_out(5,string) eq "drained" && |
| ($node eq $node_list(6) || |
| $node eq $node_list(7))} { |
| incr idle_drain_cnt |
| } else { |
| log_debug "$node:$state" |
| incr mismatches |
| } |
| exp_continue |
| } |
| -re "error:" { |
| fail "Unexpected error from sinfo" |
| } |
| -re "Unable to contact" { |
| fail "Slurm appears to be down" |
| } |
| timeout { |
| fail "sinfo not responding" |
| } |
| eof { |
| wait |
| } |
| } |
| if {$mismatches != 0} { |
| fail "sinfo node filtering error" |
| } |
| if {$idle_cnt != 4} { |
| fail "Idle count ($idle_cnt) != 4" |
| } |
| if {$idle_drain_cnt != 2} { |
| fail "Idle drain count ($idle_drain_cnt) != 2" |
| } |
| |
| # |
| # Change node state to down and use state filter to filter node |
| # |
| log_info "Testing DOWN state" |
| set down_cnt 0 |
| spawn $sinfo --Node --node=$node_list_str --long --exact --state=down -p$partition |
| expect { |
| -re "($eol)($name_string) *($number_with_suffix) *($name_string) *($re_word_str)" { |
| set node $expect_out(2,string) |
| set state $expect_out(5,string) |
| if {$expect_out(5,string) eq "down" && |
| ($node eq $node_list(4) || |
| $node eq $node_list(5))} { |
| incr down_cnt |
| } else { |
| log_debug "$node:$state" |
| incr mismatches |
| } |
| exp_continue |
| } |
| -re "error:" { |
| fail "Unexpected error from sinfo" |
| } |
| -re "Unable to contact" { |
| fail "Slurm appears to be down" |
| } |
| timeout { |
| fail "sinfo not responding" |
| } |
| eof { |
| wait |
| } |
| } |
| if {$mismatches != 0} { |
| fail "sinfo node filtering error" |
| } |
| if {$down_cnt != 2} { |
| fail "Down count ($down_cnt) != 2" |
| } |
| |
| # |
| # Change node state to drain and use state filter to filter node |
| # |
| log_info "Testing DRAIN state" |
| set drain_cnt 0 |
| spawn $sinfo --Node --node=$node_list_str --long --exact --state=drain -p$partition |
| expect { |
| -re "($eol)($name_string) *($number_with_suffix) *($name_string) *($re_word_str)" { |
| set node $expect_out(2,string) |
| set state $expect_out(5,string) |
| if {$expect_out(5,string) eq "drained" && |
| ($node eq $node_list(6) || |
| $node eq $node_list(7))} { |
| incr drain_cnt |
| } else { |
| log_debug "$node:$state" |
| incr mismatches |
| } |
| exp_continue |
| } |
| -re "error:" { |
| fail "Unexpected error from sinfo" |
| } |
| -re "Unable to contact" { |
| fail "Slurm appears to be down" |
| } |
| timeout { |
| fail "sinfo not responding" |
| } |
| eof { |
| wait |
| } |
| } |
| if {$mismatches != 0} { |
| fail "sinfo node filtering error" |
| } |
| if {$drain_cnt != 2} { |
| fail "Drain count ($drain_cnt) != 2" |
| } |
| |
| # |
| # Test for finding drain AND down states. |
| # |
| log_info "Testing DOWN,DRAIN state" |
| set drain_cnt 0 |
| set down_cnt 0 |
| spawn $sinfo --Node --node=$node_list_str --long --exact --state=down,drain -p$partition |
| expect { |
| -re "($eol)($name_string) *($number_with_suffix) *($name_string) *($re_word_str)" { |
| set node $expect_out(2,string) |
| set state $expect_out(5,string) |
| if {$expect_out(5,string) eq "down" && |
| ($node eq $node_list(4) || |
| $node eq $node_list(5))} { |
| incr down_cnt |
| } elseif {$expect_out(5,string) eq "drained" && |
| ($node eq $node_list(6) || |
| $node eq $node_list(7))} { |
| incr drain_cnt |
| } else { |
| log_debug "$node:$state" |
| incr mismatches |
| } |
| exp_continue |
| } |
| -re "error:" { |
| fail "Unexpected error from sinfo" |
| } |
| -re "Unable to contact" { |
| fail "Slurm appears to be down" |
| } |
| timeout { |
| fail "sinfo not responding" |
| } |
| eof { |
| wait |
| } |
| } |
| if {$mismatches != 0} { |
| fail "sinfo node filtering error ($mismatches)" |
| } |
| if {$down_cnt != 2} { |
| fail "Down count ($down_cnt) != 2" |
| } |
| if {$drain_cnt != 2} { |
| fail "Drain count ($drain_cnt) != 2" |
| } |
| |
| # |
| # Test for finding drain AND down states. |
| # |
| log_info "Testing \"DOWN&DRAIN\" state" |
| set drain_cnt 0 |
| change_state drain $node_list(4),$node_list(5) |
| spawn $sinfo --Node --node=$node_list_str --long --exact --state=down&drain -p$partition |
| expect { |
| -re "($eol)($name_string) *($number_with_suffix) *($name_string) *($re_word_str)" { |
| set node $expect_out(2,string) |
| set state $expect_out(5,string) |
| if {$expect_out(5,string) eq "drained" && |
| ($node eq $node_list(4) || |
| $node eq $node_list(5))} { |
| incr drain_cnt |
| } else { |
| log_debug "$node:$state" |
| incr mismatches |
| } |
| exp_continue |
| } |
| -re "error:" { |
| fail "Unexpected error from sinfo" |
| } |
| -re "Unable to contact" { |
| fail "Slurm appears to be down" |
| } |
| timeout { |
| fail "sinfo not responding" |
| } |
| eof { |
| wait |
| } |
| } |
| if {$mismatches != 0} { |
| fail "sinfo node filtering error ($mismatches)" |
| } |
| if {$drain_cnt != 2} { |
| fail "Drain count ($drain_cnt) != 2" |
| } |