blob: 27890a5bab98e8b64d67b5c04d7d264d355cbac3 [file] [log] [blame]
#!/usr/bin/env expect
############################################################################
# Purpose: Test of Slurm functionality
# Check sinfo node information filtering (--state and --nodes
# options).
############################################################################
# Copyright (C) 2002 The Regents of the University of California.
# Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
# Written by Morris Jette <jette1@llnl.gov>
# CODE-OCEC-09-009. All rights reserved.
#
# This file is part of Slurm, a resource management program.
# For details, see <https://slurm.schedmd.com/>.
# Please also read the included file: DISCLAIMER.
#
# Slurm is free software; you can redistribute it and/or modify it under
# the terms of the GNU General Public License as published by the Free
# Software Foundation; either version 2 of the License, or (at your option)
# any later version.
#
# Slurm is distributed in the hope that it will be useful, but WITHOUT ANY
# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
# details.
#
# You should have received a copy of the GNU General Public License along
# with Slurm; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
############################################################################
source ./globals
set matches 0
set name_string "\[a-zA-Z0-9\]\[^ ]*"
set node_state ""
set num_nodes 0
set num_cpus 0
set mismatches 0
# We start the node position of the array to 3 because the first 2 nodes will be
# used for allocated state since scontrol cannot change node state to allocated
set node_pos 2
set tmp_id 0
set tmp_sc "${test_name}_tmp_sc"
array set node_list {}
proc change_state { state nodes } {
global scontrol
spawn $scontrol update nodename=$nodes state=$state reason=test
expect {
timeout {
fail "scontrol is not responding"
}
eof {
wait
}
}
}
proc reset_state { } {
global scontrol node_list node_pos
foreach num [array names node_list] {
spawn $scontrol update nodename=$node_list($num) state=idle
expect {
timeout {
fail "scontrol is not responding"
}
eof {
wait
}
}
}
}
proc cleanup {} {
global tmp_sc tmp_id
cancel_job $tmp_id
#
# Reset node states back to idle
#
reset_state
file delete $tmp_sc
}
set partition [default_partition]
#
# Get some values to test against
#
spawn $sinfo -o%30D%30c --noheader -p$partition
expect {
-re "($number) *($number)" {
set num_nodes $expect_out(1,string)
set num_cpus $expect_out(2,string)
exp_continue
}
timeout {
fail "sinfo is not responding"
}
eof {
wait
}
}
if {$num_nodes < 8} {
skip "This test requires 8 or more nodes but only found $num_nodes available"
}
if {$num_cpus == 0} {
fail "Number of CPUs reported is zero"
}
#
# Get a list of nodes that we can use
#
set i 0
spawn $sinfo -o%N|%t|%o --noheader -p$partition -N
expect {
-re "($re_word_str).idle" {
if {$i<8} {
set node_list($i) $expect_out(1,string)
incr i 1
exp_continue
}
}
timeout {
fail "sinfo is not responding"
}
eof {
wait
}
}
if {$i != 8} {
skip "Unable to get all the required nodes ($i != 8)"
}
set node_list_str [join [dict values [array get node_list]] ","]
#
# Submit a job to filter for allocated job state
#
log_info "Testing ALLOCATED state"
make_bash_script $tmp_sc "sleep 20"
spawn $sbatch -p$partition -w$node_list(0),$node_list(1) -o/dev/null -n[expr 2 * $num_cpus] --exclusive $tmp_sc
expect {
-re "Submitted batch job ($number)" {
set tmp_id $expect_out(1,string)
exp_continue
}
timeout {
fail "sbatch is not responding"
}
eof {
wait
}
}
if {$tmp_id == 0} {
fail "sbatch did not submit job"
}
wait_for_job -fail $tmp_id "RUNNING"
set allocated_count 0
spawn $sinfo --Node --node=$node_list_str --long --exact --state=allocated -p$partition
expect {
-re "($eol)($name_string) *($number_with_suffix) *($name_string) *($re_word_str)" {
set node $expect_out(2,string)
set state $expect_out(5,string)
if {$expect_out(5,string) eq "allocated" &&
($node eq $node_list(0) ||
$node eq $node_list(1))} {
incr allocated_cnt
} else {
log_debug "$node:$state"
incr mismatches
}
exp_continue
}
-re "error:" {
fail "Unexpected error from sinfo"
}
-re "Unable to contact" {
fail "Slurm appears to be down"
}
timeout {
fail "sinfo not responding"
}
eof {
wait
}
}
if {$mismatches != 0} {
fail "sinfo node filtering error"
}
if {$allocated_cnt != 2} {
fail "Allocated count ($allocated_cnt) != 2"
}
cancel_job $tmp_id
# Change nodes to different states and see if they made it to that state.
change_state idle $node_list(2),$node_list(3)
change_state down $node_list(4),$node_list(5)
change_state drain $node_list(6),$node_list(7)
#
# Change node state to idle and use state filter to filter node
#
log_info "Testing IDLE state"
set idle_cnt 0
set idle_drain_cnt 0
spawn $sinfo --Node --node=$node_list_str --long --exact --state=idle -p$partition
expect {
-re "($eol)($name_string) *($number_with_suffix) *($name_string) *($re_word_str)" {
set node $expect_out(2,string)
set state $expect_out(5,string)
if {$expect_out(5,string) eq "idle" &&
($node eq $node_list(0) ||
$node eq $node_list(1) ||
$node eq $node_list(2) ||
$node eq $node_list(3))} {
incr idle_cnt
# NOTE: drained nodes can be idle as well.
} elseif {$expect_out(5,string) eq "drained" &&
($node eq $node_list(6) ||
$node eq $node_list(7))} {
incr idle_drain_cnt
} else {
log_debug "$node:$state"
incr mismatches
}
exp_continue
}
-re "error:" {
fail "Unexpected error from sinfo"
}
-re "Unable to contact" {
fail "Slurm appears to be down"
}
timeout {
fail "sinfo not responding"
}
eof {
wait
}
}
if {$mismatches != 0} {
fail "sinfo node filtering error"
}
if {$idle_cnt != 4} {
fail "Idle count ($idle_cnt) != 4"
}
if {$idle_drain_cnt != 2} {
fail "Idle drain count ($idle_drain_cnt) != 2"
}
#
# Change node state to down and use state filter to filter node
#
log_info "Testing DOWN state"
set down_cnt 0
spawn $sinfo --Node --node=$node_list_str --long --exact --state=down -p$partition
expect {
-re "($eol)($name_string) *($number_with_suffix) *($name_string) *($re_word_str)" {
set node $expect_out(2,string)
set state $expect_out(5,string)
if {$expect_out(5,string) eq "down" &&
($node eq $node_list(4) ||
$node eq $node_list(5))} {
incr down_cnt
} else {
log_debug "$node:$state"
incr mismatches
}
exp_continue
}
-re "error:" {
fail "Unexpected error from sinfo"
}
-re "Unable to contact" {
fail "Slurm appears to be down"
}
timeout {
fail "sinfo not responding"
}
eof {
wait
}
}
if {$mismatches != 0} {
fail "sinfo node filtering error"
}
if {$down_cnt != 2} {
fail "Down count ($down_cnt) != 2"
}
#
# Change node state to drain and use state filter to filter node
#
log_info "Testing DRAIN state"
set drain_cnt 0
spawn $sinfo --Node --node=$node_list_str --long --exact --state=drain -p$partition
expect {
-re "($eol)($name_string) *($number_with_suffix) *($name_string) *($re_word_str)" {
set node $expect_out(2,string)
set state $expect_out(5,string)
if {$expect_out(5,string) eq "drained" &&
($node eq $node_list(6) ||
$node eq $node_list(7))} {
incr drain_cnt
} else {
log_debug "$node:$state"
incr mismatches
}
exp_continue
}
-re "error:" {
fail "Unexpected error from sinfo"
}
-re "Unable to contact" {
fail "Slurm appears to be down"
}
timeout {
fail "sinfo not responding"
}
eof {
wait
}
}
if {$mismatches != 0} {
fail "sinfo node filtering error"
}
if {$drain_cnt != 2} {
fail "Drain count ($drain_cnt) != 2"
}
#
# Test for finding drain AND down states.
#
log_info "Testing DOWN,DRAIN state"
set drain_cnt 0
set down_cnt 0
spawn $sinfo --Node --node=$node_list_str --long --exact --state=down,drain -p$partition
expect {
-re "($eol)($name_string) *($number_with_suffix) *($name_string) *($re_word_str)" {
set node $expect_out(2,string)
set state $expect_out(5,string)
if {$expect_out(5,string) eq "down" &&
($node eq $node_list(4) ||
$node eq $node_list(5))} {
incr down_cnt
} elseif {$expect_out(5,string) eq "drained" &&
($node eq $node_list(6) ||
$node eq $node_list(7))} {
incr drain_cnt
} else {
log_debug "$node:$state"
incr mismatches
}
exp_continue
}
-re "error:" {
fail "Unexpected error from sinfo"
}
-re "Unable to contact" {
fail "Slurm appears to be down"
}
timeout {
fail "sinfo not responding"
}
eof {
wait
}
}
if {$mismatches != 0} {
fail "sinfo node filtering error ($mismatches)"
}
if {$down_cnt != 2} {
fail "Down count ($down_cnt) != 2"
}
if {$drain_cnt != 2} {
fail "Drain count ($drain_cnt) != 2"
}
#
# Test for finding drain AND down states.
#
log_info "Testing \"DOWN&DRAIN\" state"
set drain_cnt 0
change_state drain $node_list(4),$node_list(5)
spawn $sinfo --Node --node=$node_list_str --long --exact --state=down&drain -p$partition
expect {
-re "($eol)($name_string) *($number_with_suffix) *($name_string) *($re_word_str)" {
set node $expect_out(2,string)
set state $expect_out(5,string)
if {$expect_out(5,string) eq "drained" &&
($node eq $node_list(4) ||
$node eq $node_list(5))} {
incr drain_cnt
} else {
log_debug "$node:$state"
incr mismatches
}
exp_continue
}
-re "error:" {
fail "Unexpected error from sinfo"
}
-re "Unable to contact" {
fail "Slurm appears to be down"
}
timeout {
fail "sinfo not responding"
}
eof {
wait
}
}
if {$mismatches != 0} {
fail "sinfo node filtering error ($mismatches)"
}
if {$drain_cnt != 2} {
fail "Drain count ($drain_cnt) != 2"
}