blob: 269e76c384321b47a8887705263599732218242d [file] [log] [blame]
#!/usr/bin/expect
############################################################################
# Purpose: Test of SLURM functionality
# Test scancel signal option (--signal and --verbose options).
#
# Output: "TEST: #.#" followed by "SUCCESS" if test was successful, OR
# "FAILURE: ..." otherwise with an explanation of the failure, OR
# anything else indicates a failure mode that must be investigated.
############################################################################
# Copyright (C) 2002-2007 The Regents of the University of California.
# Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
# Written by Morris Jette <jette1@llnl.gov>
# UCRL-CODE-226842.
#
# This file is part of SLURM, a resource management program.
# For details, see <http://www.llnl.gov/linux/slurm/>.
#
# SLURM is free software; you can redistribute it and/or modify it under
# the terms of the GNU General Public License as published by the Free
# Software Foundation; either version 2 of the License, or (at your option)
# any later version.
#
# SLURM is distributed in the hope that it will be useful, but WITHOUT ANY
# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
# details.
#
# You should have received a copy of the GNU General Public License along
# with SLURM; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
############################################################################
source ./globals
set test_id "6.7"
set exit_code 0
set file_in "test$test_id.input"
set file_out "test$test_id.output"
set file_prog "test$test_id.prog"
print_header $test_id
#
# Delete left-over programs and rebuild them.
# We use our own program to get ulimit values since the output
# of the ulimit program is inconsistent across systems.
#
exec rm -f $file_prog $file_in $file_out
exec cc -o $file_prog $file_prog.c
make_bash_script $file_in "
$srun ./$file_prog
"
#
# Submit a job so we have something to work with
#
set job_id 0
set srun_pid [spawn $srun --batch --output=$file_out -t1 $file_in]
expect {
-re "jobid ($number) submitted" {
set job_id $expect_out(1,string)
exp_continue
}
timeout {
send_user "\nFAILURE: srun not responding\n"
slow_kill $srun_pid
set exit_code 1
}
eof {
wait
}
}
if {[wait_for_job $job_id RUNNING] != 0} {
send_user "\nFAILURE: error starting job $job_id\n"
cancel_job $job_id
exit 1
}
# Allow time for the step to start
exec $bin_sleep 4
#
# Test verbose scancel
#
set matches 0
spawn $scancel --signal=USR1 --verbose $job_id
expect {
-re "Signal ($number) to job $job_id" {
set sig_num $expect_out(1,string)
if {$sig_num != 9} {
incr matches
} else {
incr matches -1
}
exp_continue
}
-re "Job is in transistional state, retrying" {
incr matches -1
exp_continue
}
timeout {
send_user "\nFAILURE: scancel not responding\n"
set exit_code 1
}
eof {
wait
}
}
if {$matches != 1} {
send_user "\nFAILURE: scancel verbose option failed\n"
set exit_code 1
}
if {[wait_for_job $job_id DONE] != 0} {
send_user "\nFAILURE: error completing job $job_id\n"
cancel_job $job_id
exit 1
}
#
# Test output file
#
if {[wait_for_file $file_out] == 0} {
set got_signal 0
spawn cat $file_out
expect {
-re "ERROR" {
send_user "\nFAILURE: Failure running test program\n"
set exit_code 1
exp_continue
}
-re "Received SIGUSR1" {
set got_signal 1
exp_continue
}
eof {
wait
}
}
if {$got_signal != 1} {
send_user "\nFAILURE: SIGUSR1 not received\n"
set exit_code 1
}
} else {
set exit_code 1
}
#
# Variation of above to test signal of batch script processes
# Note that job termination sends SIGCONT/SIGTERM, hence the
# duplicate sleep call
#
exec rm -f $file_in $file_out
make_bash_script $file_in "
$bin_sleep 10
$bin_sleep 10
echo FINI
"
#
# Submit a job so we have something to work with
#
set job_id 0
set srun_pid [spawn $srun --batch --output=$file_out -t1 $file_in]
expect {
-re "jobid ($number) submitted" {
set job_id $expect_out(1,string)
exp_continue
}
timeout {
send_user "\nFAILURE: srun not responding\n"
slow_kill srun_pid
set exit_code 1
exp_continue
}
eof {
wait
}
}
if {[wait_for_job $job_id RUNNING] != 0} {
send_user "\nFAILURE: error starting job $job_id\n"
cancel_job $job_id
exit 1
}
exec $bin_sleep 2
#
# Test verbose scancel
#
set matches 0
spawn $scancel --batch --signal=STOP --verbose $job_id
expect {
-re "Signal ($number) to job $job_id" {
set sig_num $expect_out(1,string)
if {$sig_num != 9} {
incr matches
} else {
incr matches -1
}
exp_continue
}
-re "Job is in transistional state, retrying" {
incr matches -1
exp_continue
}
timeout {
send_user "\nFAILURE: scancel not responding\n"
set exit_code 1
}
eof {
wait
}
}
if {$matches != 1} {
send_user "\nFAILURE: scancel verbose option failed\n"
set exit_code 1
}
if {[wait_for_job $job_id DONE] != 0} {
send_user "\nFAILURE: error completing job $job_id\n"
cancel_job $job_id
exit 1
}
#
# Test output file
#
if {[wait_for_file $file_out] == 0} {
spawn cat $file_out
expect {
-re "FINI" {
send_user "\nFAILURE: Job not stopped\n"
set exit_code 1
exp_continue
}
eof {
wait
}
}
} else {
set exit_code 1
}
if {$exit_code == 0} {
exec rm -f $file_out $file_prog
send_user "\nSUCCESS\n"
}
exit $exit_code