| #!/usr/bin/expect |
| ############################################################################ |
| # Purpose: Test of SLURM functionality |
| # Validate scontrol update command for partitions. |
| # |
| # Output: "TEST: #.#" followed by "SUCCESS" if test was successful, OR |
| # "FAILURE: ..." otherwise with an explanation of the failure, OR |
| # anything else indicates a failure mode that must be investigated. |
| ############################################################################ |
| # Copyright (C) 2002 The Regents of the University of California. |
| # Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). |
| # Written by Morris Jette <jette1@llnl.gov> |
| # UCRL-CODE-226842. |
| # |
| # This file is part of SLURM, a resource management program. |
| # For details, see <http://www.llnl.gov/linux/slurm/>. |
| # |
| # SLURM is free software; you can redistribute it and/or modify it under |
| # the terms of the GNU General Public License as published by the Free |
| # Software Foundation; either version 2 of the License, or (at your option) |
| # any later version. |
| # |
| # SLURM is distributed in the hope that it will be useful, but WITHOUT ANY |
| # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS |
| # FOR A PARTICULAR PURPOSE. See the GNU General Public License for more |
| # details. |
| # |
| # You should have received a copy of the GNU General Public License along |
| # with SLURM; if not, write to the Free Software Foundation, Inc., |
| # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. |
| ############################################################################ |
| source ./globals |
| |
| set test_id "3.2" |
| set authorized 1 |
| set exit_code 0 |
| set job_runnable 0 |
| set part_name "" |
| set part_old_state "" |
| set part_new_state "" |
| set read_state "" |
| set timeout $max_job_delay |
| |
| print_header $test_id |
| |
| # Execute a srun job in the specified partition name |
| # Returns 0 on successful completion, returns 1 otherwise |
| proc run_job { part_name } { |
| global bin_hostname srun timeout |
| set rc 1 |
| |
| set srun_pid [spawn $srun --output=none --error=none -p $part_name -N1-128 -t1 $bin_hostname] |
| expect { |
| -re "Unable to contact" { |
| send_user "\nFAILURE: slurm appears to be down\n" |
| return 1 |
| } |
| -re "uid" { |
| set rc 0 |
| exp_continue |
| } |
| -re "not available" { |
| slow_kill $srun_pid |
| exp_continue |
| } |
| timeout { |
| send_user "\nsrun not responding\n" |
| slow_kill $srun_pid |
| return 1 |
| } |
| eof { |
| wait |
| } |
| } |
| |
| return $rc |
| } |
| |
| # |
| # Identify a partition and its state |
| # |
| spawn $scontrol show part |
| expect { |
| -re "PartitionName=($alpha_numeric) " { |
| if {[string compare $part_name ""] == 0} { |
| set part_name $expect_out(1,string) |
| } |
| exp_continue |
| } |
| timeout { |
| send_user "\nFAILURE: scontrol not responding\n" |
| set exit_code 1 |
| } |
| eof { |
| wait |
| } |
| } |
| |
| spawn $scontrol show part $part_name |
| expect { |
| -re "State=($alpha_cap) " { |
| set part_old_state $expect_out(1,string) |
| exp_continue |
| } |
| timeout { |
| send_user "\nFAILURE: scontrol not responding\n" |
| set exit_code 1 |
| } |
| eof { |
| wait |
| } |
| } |
| |
| # |
| # Validate current partition information |
| # |
| if {[string compare $part_name ""] == 0} { |
| send_user "\nFAILURE: scontrol unable to identify any partition\n" |
| exit 1 |
| } |
| if {[string compare $part_old_state ""] == 0} { |
| send_user "\nFAILURE: scontrol unable to identify state of" |
| send_user " partition $part_name\n" |
| exit 1 |
| } |
| if {[string compare $part_old_state "UP"] == 0} { |
| set job_runable 1 |
| set part_new_state "DOWN" |
| } |
| if {[string compare $part_old_state "DOWN"] == 0} { |
| set job_runable 0 |
| set part_new_state "UP" |
| } |
| if {[string compare $part_new_state ""] == 0} { |
| send_user "\nFAILURE: state of partition $part_name is" |
| send_user " invalid: $part_old_state\n" |
| exit 1 |
| } |
| |
| # |
| # Confirm that a DOWN partition does not run any jobs |
| # |
| if {$job_runable == 0} { |
| if {[run_job $part_name] == 0} { |
| send_user "\nFAILURE: Job ran in DOWN partition\n" |
| set exit_code 1 |
| } |
| } |
| |
| # |
| # Change that partition's state |
| # |
| spawn $scontrol update PartitionName=$part_name State=$part_new_state |
| expect { |
| -re "slurm_update error: ($alpha_numeric) ($alpha_numeric)" { |
| set access_err 0 |
| set err_msg1 $expect_out(1,string) |
| set err_msg2 $expect_out(2,string) |
| if {[string compare $err_msg1 "Invalid"] == 0} { |
| set access_err 1 |
| } |
| if {[string compare $err_msg2 "user"] == 0} { |
| set access_err 1 |
| } |
| if {$access_err == 1} { |
| send_user "\nWARNING: user not authorized to change slurm state\n" |
| exit $exit_code |
| } else { |
| set authorized 0 |
| } |
| exp_continue |
| } |
| timeout { |
| send_user "\nFAILURE: scontrol not responding\n" |
| set exit_code 1 |
| } |
| eof { |
| wait |
| } |
| } |
| |
| # |
| # Validate partition's new state |
| # |
| spawn $scontrol show partition $part_name |
| expect { |
| -re "State=($alpha_cap) " { |
| set read_state $expect_out(1,string) |
| exp_continue |
| } |
| timeout { |
| send_user "\nFAILURE: scontrol not responding\n" |
| set exit_code 1 |
| } |
| eof { |
| wait |
| } |
| } |
| if {$authorized == 1} { |
| if {[string compare $read_state $part_new_state] != 0} { |
| send_user "\nFAILURE: scontrol state change error\n" |
| set exit_code 1 |
| } |
| } |
| |
| # |
| # Confirm that a DOWN partition does not run any jobs |
| # |
| if {$job_runable == 1} { |
| if {[run_job $part_name] == 0} { |
| send_user "\nFAILURE: Job ran in DOWN partition\n" |
| set exit_code 1 |
| } |
| } |
| |
| # |
| # Return that partition's state to its old value |
| # |
| spawn $scontrol update PartitionName=$part_name State=$part_old_state |
| expect { |
| -re "slurm_update error: Invalid user id" { |
| exp_continue |
| } |
| -re "slurm_update error:" { |
| send_user "\nFAILURE: scontrol update error\n" |
| set exit_code 1 |
| exp_continue |
| } |
| timeout { |
| send_user "\nFAILURE: scontrol not responding\n" |
| set exit_code 1 |
| } |
| eof { |
| wait |
| } |
| } |
| |
| # |
| # Record that partition's state |
| # |
| spawn $scontrol show partition $part_name |
| expect { |
| |
| timeout { |
| send_user "\nFAILURE: scontrol not responding\n" |
| set exit_code 1 |
| } |
| eof { |
| wait |
| } |
| } |
| |
| if {$exit_code == 0} { |
| send_user "\nSUCCESS\n" |
| } |
| exit $exit_code |