| #!/usr/bin/expect |
| ############################################################################ |
| # Purpose: Test of SLURM functionality |
| # Test that no files are open in spawned tasks (except stdin, |
| # stdout, and stderr) to insure successful checkpoint/restart. |
| # |
| # Output: "TEST: #.#" followed by "SUCCESS" if test was successful, OR |
| # "FAILURE: ..." otherwise with an explanation of the failure, OR |
| # anything else indicates a failure mode that must be investigated. |
| ############################################################################ |
| # Copyright (C) 2002-2006 The Regents of the University of California. |
| # Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). |
| # Written by Morris Jette <jette1@llnl.gov> |
| # UCRL-CODE-226842. |
| # |
| # This file is part of SLURM, a resource management program. |
| # For details, see <http://www.llnl.gov/linux/slurm/>. |
| # |
| # SLURM is free software; you can redistribute it and/or modify it under |
| # the terms of the GNU General Public License as published by the Free |
| # Software Foundation; either version 2 of the License, or (at your option) |
| # any later version. |
| # |
| # SLURM is distributed in the hope that it will be useful, but WITHOUT ANY |
| # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS |
| # FOR A PARTICULAR PURPOSE. See the GNU General Public License for more |
| # details. |
| # |
| # You should have received a copy of the GNU General Public License along |
| # with SLURM; if not, write to the Free Software Foundation, Inc., |
| # 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. |
| ############################################################################ |
| source ./globals |
| |
| set test_id "7.9" |
| set exit_code 0 |
| set file_in "test$test_id.input" |
| set file_out "test$test_id.output" |
| set file_prog "test$test_id.prog" |
| set iterations 50 |
| |
| print_header $test_id |
| |
| # |
| # Test is incompatible with proctrack/aix, proctrack/rms, proctrack/sgi_job, |
| # and switch/elan |
| # |
| # Each leave open files, although we could clear the proctrack related files |
| # by just closing all files after fd=3. Closing all files from switch/elan |
| # would disable its use. |
| # |
| set invalid 0 |
| log_user 0 |
| spawn $scontrol show config |
| expect { |
| -re "proctrack/aix" { |
| send_user "\nWARNING: test incompatible with proctrack/aix\n" |
| set invalid 1 |
| exp_continue |
| } |
| -re "proctrack/rms" { |
| send_user "\nWARNING: test incompatible with proctrack/rms\n" |
| set invalid 1 |
| exp_continue |
| } |
| -re "proctrack/sgi_job" { |
| send_user "\nWARNING: test incompatible with proctrack/sgi_job\n" |
| set invalid 1 |
| exp_continue |
| } |
| -re "switch/elan" { |
| send_user "\nWARNING: test incompatible with switch/elan\n" |
| set invalid 1 |
| exp_continue |
| } |
| timeout { |
| send_user "\nFAILURE: scontrol not responding\n" |
| set exit_code 1 |
| } |
| eof { |
| wait |
| } |
| } |
| log_user 1 |
| if {$invalid == 1} { |
| exit $exit_code |
| } |
| |
| # |
| # Delete left-over programs and rebuild them. |
| # We use our own program to get ulimit values since the output |
| # of the ulimit program is inconsistent across systems. |
| # |
| exec $bin_rm -f $file_prog $file_in $file_out |
| exec $bin_make -f /dev/null $file_prog |
| |
| make_bash_script $file_in " |
| $bin_echo 'testing within script' |
| ./$file_prog |
| $bin_echo ' ' |
| $bin_echo 'testing $iterations sets of spawned tasks' |
| for ((i=0; i<$iterations; i++)) ; do |
| $srun $file_prog |
| done |
| " |
| |
| set job_id 0 |
| spawn $sbatch --output=$file_out -t1 ./$file_in |
| expect { |
| -re "Submitted batch job ($number)" { |
| set job_id $expect_out(1,string) |
| exp_continue |
| } |
| eof { |
| wait |
| } |
| } |
| if {$job_id == 0} { |
| send_user "\nFAILURE: batch submit failure\n" |
| exit 1 |
| } |
| |
| # |
| # Wait for job to complete |
| # |
| if {[wait_for_job $job_id "DONE"] != 0} { |
| send_user "\nFAILURE: waiting for job to complete\n" |
| exit 1 |
| } |
| |
| # |
| # Inspect the job's output file |
| # |
| if {[wait_for_file $file_out] != 0} { |
| exit 1 |
| } |
| set matches 0 |
| spawn $bin_cat $file_out |
| expect { |
| -re "FAILED" { |
| incr matches |
| exp_continue |
| } |
| timeout { |
| send_user "\nFAILURE: /bin/cat not responding\n" |
| set exit_code 1 |
| } |
| eof { |
| wait |
| } |
| } |
| if {$matches != 0} { |
| set tot [expr $iterations + 1] |
| if {$matches <= 1} { |
| send_user "\nWARNING: $matches of $tot tests failed\n" |
| } else { |
| send_user "\nFAILURE: $matches of $tot tests failed\n" |
| set exit_code 1 |
| } |
| send_user " This should happen infrequently, typically when\n" |
| send_user " JobAcctFrequency is set to a small value and is\n" |
| send_user " indicative of a non-checkpointable job. For details,\n" |
| send_user " see src/plugins/jobacct/linux/jobacct_linux.c\n" |
| } |
| |
| if {$exit_code == 0} { |
| send_user "\nSUCCESS\n" |
| exec $bin_rm -f $file_in $file_prog $file_out |
| |
| } |
| exit $exit_code |