blob: ef57d9c6af32d88eeb06f008cc86772e312ed633 [file]
#!/usr/bin/expect
############################################################################
# Purpose: Test of SLURM functionality
# Test the launch of a batch job within an existing job allocation.
# This logic is used by LSF
#
# Output: "TEST: #.#" followed by "SUCCESS" if test was successful, OR
# "FAILURE: ..." otherwise with an explanation of the failure, OR
# anything else indicates a failure mode that must be investigated.
############################################################################
# Copyright (C) 2005-2006 The Regents of the University of California.
# Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
# Written by Morris Jette <jette1@llnl.gov>
# UCRL-CODE-217948.
#
# This file is part of SLURM, a resource management program.
# For details, see <http://www.llnl.gov/linux/slurm/>.
#
# SLURM is free software; you can redistribute it and/or modify it under
# the terms of the GNU General Public License as published by the Free
# Software Foundation; either version 2 of the License, or (at your option)
# any later version.
#
# SLURM is distributed in the hope that it will be useful, but WITHOUT ANY
# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
# details.
#
# You should have received a copy of the GNU General Public License along
# with SLURM; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
############################################################################
source ./globals
set test_id "1.45"
set file_in "test$test_id.input"
set file_out1 "test$test_id.output1"
set file_out2 "test$test_id.output2"
set exit_code 0
set job_id_0 0
set job_id_1 0
set job_id_2 0
print_header $test_id
#
# Delete left-over stdout/err files
#
exec $bin_rm -f $file_in $file_out1 $file_out2
#
# Build input script file
#
make_bash_script $file_in "
$bin_id
$bin_sleep 20"
#
# Spawn a srun batch job that uses stdout/err and confirm their contents
#
if { [test_bluegene] } {
set node_cnt 1-2048
} else {
if { [test_xcpu] } {
set node_cnt 1-1
} else {
set node_cnt 1-4
}
}
set timeout $max_job_delay
spawn $srun -N$node_cnt -A -v -t1
expect {
-re "jobid ($number):" {
set job_id_0 $expect_out(1,string)
send "$srun -b --jobid=$job_id_0 -o $file_out1 $file_in \n"
exp_continue
}
-re "jobid ($number).0 submitted" {
set job_id_1 $expect_out(1,string)
send "$srun -b --jobid=$job_id_0 -o $file_out2 $file_in \n"
exp_continue
}
-re "jobid ($number).1 submitted" {
set job_id_2 $expect_out(1,string)
}
timeout {
send_user "\nFAILURE: srun not responding\n"
kill_srun
set exit_code 1
exp_continue
}
eof {
wait
}
}
if {$job_id_0 == 0} {
send_user "\nFAILURE: job allocation failure\n"
kill_srun
exit 1
}
if {($job_id_1 == 0) || ($job_id_2 == 0)} {
send_user "\nFAILURE: batch job submit failure\n"
cancel_job $job_id_0
exit 1
}
if {($job_id_0 != $job_id_2) || ($job_id_1 != $job_id_2)} {
send_user "\nFAILURE: batch job did not run in existing allocation\n"
cancel_job $job_id_0
cancel_job $job_id_1
cancel_job $job_id_2
exit 1
}
#
# Check that the job step is reported
#
set matches 0
spawn $scontrol show step $job_id_0.0
expect {
-re "Invalid" {
send_user "\nFAILURE: batch step not found\n"
set matches 1
set exit_code 1
exp_continue
}
-re "$job_id_0.0" {
set matches 1
exp_continue
}
timeout {
send_user "\nFAILURE: srun not responding\n"
set exit_code 1
}
eof {
wait
}
}
if {$matches == 0} {
send_user "\nFAILURE: batch step not found\n"
set exit_code 1
}
#
# Check batch job step output
#
if {[wait_for_file $file_out1] == 0} {
set matches 0
spawn $bin_cat $file_out1
expect {
-re "uid=" {
set matches 1
exp_continue
}
eof {
wait
}
}
if {$matches == 0} {
send_user "\nFAILURE: Job output missing\n"
set exit_code 1
}
}
if {[wait_for_file $file_out2] == 0} {
set matches 0
spawn $bin_cat $file_out2
expect {
-re "uid=" {
set matches 1
exp_continue
}
-re "srun.*command not found" {
send_user "\nWARNING: srun is not installed on this computer\n"
set matches 1
exp_continue
}
eof {
wait
}
}
if {$matches == 0} {
send_user "\nFAILURE: Job output missing\n"
set exit_code 1
}
}
#
# Make sure job is still active, then cancel it
#
set matches 0
spawn $scontrol -o show job $job_id_0
expect {
-re "JobState=RUNNING" {
set matches 1
exp_continue
}
eof {
wait
}
}
if {$matches == 0} {
send_user "\nFAILURE: Job not still running\n"
set exit_code 1
}
cancel_job $job_id_0
if {$exit_code == 0} {
send_user "\nSUCCESS\n"
exec $bin_rm -f $file_in $file_out1 $file_out2
}
exit $exit_code