blob: 6f9cbec57aab26b2e34b9495fde06c93a8bf3ae7 [file] [log] [blame]
#!/usr/bin/env expect
############################################################################
# Purpose: Test of Slurm functionality
# Test of --het-group option.
############################################################################
# Copyright (C) SchedMD LLC.
#
# This file is part of Slurm, a resource management program.
# For details, see <https://slurm.schedmd.com/>.
# Please also read the included file: DISCLAIMER.
#
# Slurm is free software; you can redistribute it and/or modify it under
# the terms of the GNU General Public License as published by the Free
# Software Foundation; either version 2 of the License, or (at your option)
# any later version.
#
# Slurm is distributed in the hope that it will be useful, but WITHOUT ANY
# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
# details.
#
# You should have received a copy of the GNU General Public License along
# with Slurm; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
############################################################################
source ./globals
set task_num 2
if {[get_config_param "SchedulerType"] ne "sched/backfill"} {
skip "This test requires SchedulerType = sched/backfill"
}
set nb_nodes [get_partition_param [default_partition] "TotalNodes"]
if {$nb_nodes < 3} {
skip "Need 3 or more nodes in default partition"
}
proc cleanup {} {
global het_job_id
cancel_job $het_job_id
}
set het_job_id 0
set matches 0
set index 0
set component(0) 0
set timeout $max_job_delay
spawn $salloc -t1 -N1 : -N1 : -N1 $bin_bash
expect {
-re "Granted job allocation ($number)" {
set het_job_id $expect_out(1,string)
reset_bash_prompt
exp_continue
}
-re "$test_prompt" {
}
timeout {
fail "salloc : allocation not granted in $timeout seconds"
}
}
if {$het_job_id == 0} {
fail "salloc failure"
}
log_debug "Collect Nodenames"
dict for {k v} [get_jobs] {
if {[dict exists $v "HetJobId"]} {
if {[dict get $v "HetJobId"] eq $het_job_id} {
set het_offset [dict get $v "HetJobOffset"]
set component($het_offset) [dict get $v "NodeList"]
}
}
}
if {[array size component] != 3} {
fail "Unable to obtain the nodes of each component ([array size component] != 3)"
}
log_info "Test Sending Jobs To Various Components"
set matches 0
send "$srun --mpi=none --het-group=0 printenv SLURMD_NODENAME\r"
expect {
-re $component(0) {
incr matches
exp_continue
}
-re "$test_prompt" {
#break
}
timeout {
fail "srun not responding"
}
}
if { $matches != 1} {
fail "srun failure ($matches != 1: $expect_out(buffer))"
}
set matches 0
send "$srun --mpi=none --het-group=1 printenv SLURMD_NODENAME\r"
expect {
-re $component(1) {
incr matches
exp_continue
}
-re "$test_prompt" {
#break
}
timeout {
fail "srun not responding"
}
}
if { $matches != 1} {
fail "srun failure ($matches != 1: $expect_out(buffer))"
}
set matches 0
send "$srun --mpi=none --het-group=2 printenv SLURMD_NODENAME\r"
expect {
-re $component(2) {
incr matches
exp_continue
}
-re "$test_prompt" {
#break
}
timeout {
fail "srun not responding"
}
}
if { $matches != 1} {
fail "srun failure ($matches != 1: $expect_out(buffer))"
}
set matches 0
send "$srun --mpi=none --het-group=0-2 printenv SLURMD_NODENAME\r"
expect {
-re $component(0)|$component(1)|$component(2) {
incr matches
exp_continue
}
-re "$test_prompt" {
#break
}
timeout {
fail "srun not responding"
}
}
if { $matches != 3} {
fail "srun failure ($matches != 1: $expect_out(buffer))"
}
set matches 0
send "$srun --mpi=none --het-group=0,2 printenv SLURMD_NODENAME\r"
expect {
-re $component(2)|$component(0) {
incr matches
exp_continue
}
-re "$test_prompt" {
#break
}
timeout {
fail "srun not responding"
}
}
if { $matches != 2} {
fail "srun failure ($matches != 1: $expect_out(buffer))"
}
log_info "Testing Error Messages"
set matches 0
send "$srun --mpi=none --het-group=0-3 printenv SLURMD_NODENAME\r"
expect {
-re "srun: error: Attempt to run a job step with het group value of 3, but the job allocation has maximum value of 2" {
log_debug "Error is expected, no worries"
incr matches
exp_continue
}
-re "$test_prompt" {
#break
}
timeout {
fail "srun not responding"
}
}
if { $matches != 1} {
fail "srun failure ($matches != 1: $expect_out(buffer))"
}
set matches 0
send "$srun --mpi=none --het-group=0,5 printenv SLURMD_NODENAME\r"
expect {
-re "srun: error: Attempt to run a job step with het group value of 5, but the job allocation has maximum value of 2" {
log_debug "Error is expected, no worries"
incr matches
exp_continue
}
-re "$test_prompt" {
#break
}
timeout {
fail "srun not responding"
}
}
if { $matches != 1} {
fail "srun failure ($matches != 1: $expect_out(buffer))"
}
set matches 0
send "$srun --mpi=none --het-group=0 printenv SLURMD_NODENAME : --het-group=0 env\r"
expect {
-re "srun: fatal: Duplicate het groups in single srun not supported" {
log_debug "Error is expected, no worries"
incr matches
exp_continue
}
-re "$test_prompt" {
#break
}
timeout {
fail "srun not responding"
}
}
if { $matches != 1} {
fail "srun failure ($matches != 1: $expect_out(buffer))"
}
send "exit\r"
expect {
timeout {
fail "salloc not ending"
}
eof {
}
}