blob: 0e9299632ae2e5bc3321647a0a9e837417ab8675 [file] [log] [blame]
#!/usr/bin/expect
############################################################################
# Purpose: Test of SLURM functionality
# Test that switch windows in uses are not re-used.
#
# Output: "TEST: #.#" followed by "SUCCESS" if test was successful, OR
# "FAILURE: ..." otherwise with an explanation of the failure, OR
# anything else indicates a failure mode that must be investigated.
############################################################################
# Copyright (C) 2005-2006 The Regents of the University of California.
# Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
# Written by Morris Jette <jette1@llnl.gov>
# UCRL-CODE-226842.
#
# This file is part of SLURM, a resource management program.
# For details, see <http://www.llnl.gov/linux/slurm/>.
#
# SLURM is free software; you can redistribute it and/or modify it under
# the terms of the GNU General Public License as published by the Free
# Software Foundation; either version 2 of the License, or (at your option)
# any later version.
#
# SLURM is distributed in the hope that it will be useful, but WITHOUT ANY
# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
# details.
#
# You should have received a copy of the GNU General Public License along
# with SLURM; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
############################################################################
source ./globals
set test_id "13.1"
set exit_code 0
print_header $test_id
#
# Report the slurm network and use it to establish window parameters
#
# windows_per_node - switch windows available per node
# windows_iterations - job steps to run after initial switch window allocation
# for a persistent job step
# windows_used - switch windows to use per job step
# step_delay - run time of persistent job step (seconds)
#
log_user 0
set step_delay 0
spawn $scontrol show config
expect {
-re "switch/elan" {
set windows_per_node 2048
set windows_iterations 50
set windows_used 48
set step_delay 10
exp_continue
}
-re "switch/federation" {
set windows_per_node 16
set windows_iterations 32
set windows_used 2
set step_delay 10
exp_continue
}
-re "switch/none" {
set windows_per_node 0
set windows_iterations 5
set windows_used 4
set step_delay 5
exp_continue
}
timeout {
send_user "\nFAILURE: scontrol not responding\n"
set exit_code 1
}
eof {
wait
}
}
log_user 1
if {$step_delay == 0} {
send_user "\nWARNING: not running compatable switch, this test is not applicable\n"
exit $exit_code
}
#
# Submit a slurm allocate job, then run a bunch of job
# steps within that allocation
#
set timeout $max_job_delay
set job_id 0
set matches 0
set srun_pid [spawn $srun --allocate -N1 --verbose -t2]
expect -re "jobid ($number).*"
set job_id $expect_out(1,string)
# start initial job step to claim some switch windows
expect -re $prompt
send "$srun -N1 -O -n$windows_used $bin_sleep $step_delay &\n"
# start more job steps to check see if any switch window conflicts occur
for {set inx 0} {$inx < $windows_iterations} {incr inx} {
expect {
-re "error" {
send_user "\nFAILURE: some error occurred\n"
set exit_code 1
exp_continue
}
-re $prompt {
send_user "spawning step $inx\n"
send "$srun -N1 -O -n$windows_used true\n"
}
timeout {
send_user "\nFAILURE: srun (from --allocate) not responding\n"
slow_kill $srun_pid
exit 1
}
eof {
send_user "\nFAILURE: srun aborted\n"
exit 1
wait
}
}
}
# wait for initial job step to complete
expect -re $prompt
send_user "(sleeping for $step_delay seconds, for job step zero to complete) "
exec $bin_sleep $step_delay
send "exit\n"
expect {
-re "error" {
send_user "\nFAILURE: some error occurred\n"
set exit_code 1
exp_continue
}
timeout {
send_user "\nFAILURE: srun (from --allocate) not responding\n"
slow_kill $srun_pid
exit 1
}
eof {
wait
}
}
if { $exit_code == 0 } {
send_user "\nSUCCESS\n"
}
exit $exit_code