| #!/usr/bin/expect |
| ############################################################################ |
| # Purpose: Test of SLURM functionality |
| # Test the launch of a batch job within an existing job allocation. |
| # This logic is used by LSF |
| # |
| # Output: "TEST: #.#" followed by "SUCCESS" if test was successful, OR |
| # "FAILURE: ..." otherwise with an explanation of the failure, OR |
| # anything else indicates a failure mode that must be investigated. |
| ############################################################################ |
| # Copyright (C) 2005-2006 The Regents of the University of California. |
| # Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). |
| # Written by Morris Jette <jette1@llnl.gov> |
| # UCRL-CODE-217948. |
| # |
| # This file is part of SLURM, a resource management program. |
| # For details, see <http://www.llnl.gov/linux/slurm/>. |
| # |
| # SLURM is free software; you can redistribute it and/or modify it under |
| # the terms of the GNU General Public License as published by the Free |
| # Software Foundation; either version 2 of the License, or (at your option) |
| # any later version. |
| # |
| # SLURM is distributed in the hope that it will be useful, but WITHOUT ANY |
| # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS |
| # FOR A PARTICULAR PURPOSE. See the GNU General Public License for more |
| # details. |
| # |
| # You should have received a copy of the GNU General Public License along |
| # with SLURM; if not, write to the Free Software Foundation, Inc., |
| # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. |
| ############################################################################ |
| source ./globals |
| |
| set test_id "1.45" |
| set file_in "test$test_id.input" |
| set file_out1 "test$test_id.output1" |
| set file_out2 "test$test_id.output2" |
| set exit_code 0 |
| set job_id_0 0 |
| set job_id_1 0 |
| set job_id_2 0 |
| |
| print_header $test_id |
| |
| # |
| # Delete left-over stdout/err files |
| # |
| exec $bin_rm -f $file_in $file_out1 $file_out2 |
| |
| # |
| # Build input script file |
| # |
| make_bash_script $file_in " |
| $bin_id |
| $bin_sleep 20" |
| |
| # |
| # Spawn a srun batch job that uses stdout/err and confirm their contents |
| # |
| if { [test_bluegene] } { |
| set node_cnt 1-2048 |
| } else { |
| if { [test_xcpu] } { |
| set node_cnt 1-1 |
| } else { |
| set node_cnt 1-4 |
| } |
| } |
| |
| set timeout $max_job_delay |
| spawn $srun -N$node_cnt -A -v -t1 |
| expect { |
| -re "jobid ($number):" { |
| set job_id_0 $expect_out(1,string) |
| send "$srun -b --jobid=$job_id_0 -o $file_out1 $file_in \n" |
| exp_continue |
| } |
| -re "jobid ($number).0 submitted" { |
| set job_id_1 $expect_out(1,string) |
| send "$srun -b --jobid=$job_id_0 -o $file_out2 $file_in \n" |
| exp_continue |
| } |
| -re "jobid ($number).1 submitted" { |
| set job_id_2 $expect_out(1,string) |
| } |
| timeout { |
| send_user "\nFAILURE: srun not responding\n" |
| kill_srun |
| set exit_code 1 |
| exp_continue |
| } |
| eof { |
| wait |
| } |
| } |
| |
| if {$job_id_0 == 0} { |
| send_user "\nFAILURE: job allocation failure\n" |
| kill_srun |
| exit 1 |
| } |
| if {($job_id_1 == 0) || ($job_id_2 == 0)} { |
| send_user "\nFAILURE: batch job submit failure\n" |
| cancel_job $job_id_0 |
| exit 1 |
| } |
| |
| if {($job_id_0 != $job_id_2) || ($job_id_1 != $job_id_2)} { |
| send_user "\nFAILURE: batch job did not run in existing allocation\n" |
| cancel_job $job_id_0 |
| cancel_job $job_id_1 |
| cancel_job $job_id_2 |
| exit 1 |
| } |
| |
| # |
| # Check that the job step is reported |
| # |
| set matches 0 |
| spawn $scontrol show step $job_id_0.0 |
| expect { |
| -re "Invalid" { |
| send_user "\nFAILURE: batch step not found\n" |
| set matches 1 |
| set exit_code 1 |
| exp_continue |
| } |
| -re "$job_id_0.0" { |
| set matches 1 |
| exp_continue |
| } |
| timeout { |
| send_user "\nFAILURE: srun not responding\n" |
| set exit_code 1 |
| } |
| eof { |
| wait |
| } |
| } |
| if {$matches == 0} { |
| send_user "\nFAILURE: batch step not found\n" |
| set exit_code 1 |
| } |
| |
| # |
| # Check batch job step output |
| # |
| if {[wait_for_file $file_out1] == 0} { |
| set matches 0 |
| spawn $bin_cat $file_out1 |
| expect { |
| -re "uid=" { |
| set matches 1 |
| exp_continue |
| } |
| eof { |
| wait |
| } |
| } |
| if {$matches == 0} { |
| send_user "\nFAILURE: Job output missing\n" |
| set exit_code 1 |
| } |
| } |
| if {[wait_for_file $file_out2] == 0} { |
| set matches 0 |
| spawn $bin_cat $file_out2 |
| expect { |
| -re "uid=" { |
| set matches 1 |
| exp_continue |
| } |
| -re "srun.*command not found" { |
| send_user "\nWARNING: srun is not installed on this computer\n" |
| set matches 1 |
| exp_continue |
| } |
| eof { |
| wait |
| } |
| } |
| if {$matches == 0} { |
| send_user "\nFAILURE: Job output missing\n" |
| set exit_code 1 |
| } |
| } |
| |
| # |
| # Make sure job is still active, then cancel it |
| # |
| set matches 0 |
| spawn $scontrol -o show job $job_id_0 |
| expect { |
| -re "JobState=RUNNING" { |
| set matches 1 |
| exp_continue |
| } |
| eof { |
| wait |
| } |
| } |
| if {$matches == 0} { |
| send_user "\nFAILURE: Job not still running\n" |
| set exit_code 1 |
| } |
| cancel_job $job_id_0 |
| |
| if {$exit_code == 0} { |
| send_user "\nSUCCESS\n" |
| exec $bin_rm -f $file_in $file_out1 $file_out2 |
| } |
| exit $exit_code |