| #!/usr/bin/env expect |
| ############################################################################ |
| # Purpose: Test of srun functionality |
| # Test of hostfile option (-hostfile) inside of an allocation. |
| ############################################################################ |
| # Copyright (C) 2002-2006 The Regents of the University of California. |
| # Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). |
| # Written by Danny Auble <da@llnl.gov> |
| # UCRL-CODE-217948. |
| # |
| # This file is part of Slurm, a resource management program. |
| # For details, see <https://slurm.schedmd.com/>. |
| # Please also read the included file: DISCLAIMER. |
| # |
| # Slurm is free software; you can redistribute it and/or modify it under |
| # the terms of the GNU General Public License as published by the Free |
| # Software Foundation; either version 2 of the License, or (at your option) |
| # any later version. |
| # |
| # Slurm is distributed in the hope that it will be useful, but WITHOUT ANY |
| # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS |
| # FOR A PARTICULAR PURPOSE. See the GNU General Public License for more |
| # details. |
| # |
| # You should have received a copy of the GNU General Public License along |
| # with Slurm; if not, write to the Free Software Foundation, Inc., |
| # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. |
| ############################################################################ |
| source ./globals |
| |
| set hostfile "$test_dir/hostfile" |
| set job_id 0 |
| set node_count 0 |
| set num_nodes 3 |
| set task_count 0 |
| |
| # find out if we have enough nodes to test functionality |
| set node_count [get_partition_param [default_partition] "TotalNodes"] |
| if { $node_count < 3 } { |
| skip "Insufficient nodes in default partition ($node_count < 3)" |
| } |
| |
| proc cleanup {} { |
| global job_id |
| |
| cancel_job $job_id |
| } |
| |
| set node0 0 |
| set node1 0 |
| set node2 0 |
| set node3 0 |
| set node4 0 |
| |
| set timeout $max_job_delay |
| spawn $salloc -N$num_nodes -v -t2 $bin_bash |
| expect { |
| -re "salloc: Granted job allocation ($number)" { |
| set job_id $expect_out(1,string) |
| reset_bash_prompt |
| exp_continue |
| } |
| -re $test_prompt { |
| #log_debug "Job initiated" |
| } |
| timeout { |
| fail "salloc not responding" |
| } |
| eof { |
| wait |
| } |
| } |
| if {$job_id == 0} { |
| fail "salloc failure" |
| } |
| |
| for {set i 0} {$i<4} {incr i} { |
| set extra "" |
| |
| if { $i==1 } { |
| if { $node0 == 0 || $node1 == 0 || $node2 == 0 } { |
| fail "Node names not set from previous srun" |
| } |
| send "export SLURM_HOSTFILE=$hostfile\r" |
| expect { |
| -re $test_prompt { |
| #log_debug "Srun completed" |
| } |
| } |
| set 1node0 $node0 |
| set 1node1 $node1 |
| set 1node2 $node2 |
| set file [open $hostfile "w"] |
| puts $file "$node2\n$node0\n$node1" |
| close $file |
| } elseif { $i==2 } { |
| if { $node_count < 3 } { |
| skip "System must have at least 3 nodes to finish this test. This system only has $node_count" |
| } |
| if { $node0 == 0 || $node1 == 0 || $node2 == 0 } { |
| fail "Node names not set from previous srun" |
| } |
| set 2node0 $node1 |
| set 2node1 $node0 |
| set 2node2 $node0 |
| set 2node3 $node0 |
| set 2node4 $node0 |
| set file [open $hostfile "w"] |
| puts $file "$node1\n$node0\n$node0\n$node0\n$node0" |
| close $file |
| set extra "-n5" |
| } elseif { $i==3 } { |
| if { $node0 == 0 || $node1 == 0 || $node2 == 0 } { |
| fail "Node names not set from previous srun" |
| } |
| set 3node0 $node1 |
| set 3node1 $node0 |
| set 3node2 $node0 |
| set 3node3 $node0 |
| set 3node4 0 |
| set file [open $hostfile "w"] |
| puts $file "$node1\n$node0\n$node0\n$node0\n$node0" |
| close $file |
| set extra "-n4" |
| } |
| set node0 0 |
| set node1 0 |
| set node2 0 |
| set node3 0 |
| set node4 0 |
| |
| # |
| # execute srun with a specific node count |
| # |
| send "$srun -l $extra -O $bin_printenv SLURMD_NODENAME\r" |
| expect { |
| -re "($number): *($re_word_str)" { |
| set task_id $expect_out(1,string) |
| if {$task_id == 0} { |
| set node0 $expect_out(2,string) |
| } elseif {$task_id == 1} { |
| set node1 $expect_out(2,string) |
| } elseif {$task_id == 2} { |
| set node2 $expect_out(2,string) |
| } elseif {$task_id == 3} { |
| set node3 $expect_out(2,string) |
| } elseif {$task_id == 4} { |
| set node4 $expect_out(2,string) |
| } elseif {$task_id == 5} { |
| set node5 $expect_out(2,string) |
| } |
| exp_continue |
| } |
| -re $test_prompt { |
| #log_debug "Srun completed" |
| } |
| -re "Granted job allocation ($number)" { |
| set job_id $expect_out(1,string) |
| exp_continue |
| } |
| timeout { |
| fail "srun not responding" |
| } |
| eof { |
| } |
| } |
| if { $i == 1 } { |
| if {$node0 ne $1node2} { |
| fail "Tasks not distributed by hostfile" |
| } elseif {$node1 ne $1node0} { |
| fail "Tasks not distributed by hostfile" |
| } elseif {$node2 ne $1node1} { |
| fail "Tasks not distributed by hostfile" |
| } |
| } elseif { $i == 2 } { |
| if {$node0 ne $2node0} { |
| fail "Tasks not distributed by hostfile" |
| } elseif {$node1 ne $2node1} { |
| fail "Tasks not distributed by hostfile" |
| } elseif {$node2 ne $2node2} { |
| fail "Tasks not distributed by hostfile" |
| } elseif {$node3 ne $2node3} { |
| fail "Tasks not distributed by hostfile" |
| } elseif {$node4 ne $2node4} { |
| fail "Tasks not distributed by hostfile" |
| } |
| } elseif { $i==3 } { |
| if {$node0 ne $3node0} { |
| fail "Tasks not distributed by hostfile" |
| } elseif {$node1 ne $3node1} { |
| fail "Tasks not distributed by hostfile" |
| } elseif {$node2 ne $3node2} { |
| fail "Tasks not distributed by hostfile" |
| } elseif {$node3 ne $3node3} { |
| fail "Tasks not distributed by hostfile" |
| } elseif {$node4 ne $3node4} { |
| fail "Tasks not distributed by hostfile" |
| } |
| } |
| } |
| send "unset SLURM_HOSTFILE\r" |
| expect { |
| -re $test_prompt { |
| #log_debug "Srun completed" |
| } |
| } |
| |
| # try with commandline -w option |
| for {set i 0} {$i<5} {incr i} { |
| set extra "" |
| |
| if { $i==1 } { |
| if { $node0 == 0 || $node1 == 0 || $node2 == 0 } { |
| fail "Node names not set from previous srun" |
| } |
| set 1node0 $node0 |
| set 1node1 $node1 |
| set 1node2 $node2 |
| set extra " -m arbitrary -w $node2,$node0,$node1" |
| } elseif { $i==2 } { |
| if { $node_count < 3 } { |
| skip "System must have at least 3 nodes to finish this test. This system only has $node_count" |
| } |
| if { $node0 == 0 || $node1 == 0 || $node2 == 0 } { |
| fail "Node names not set from previous srun" |
| } |
| set 2node0 $node1 |
| set 2node1 $node0 |
| set 2node2 $node0 |
| set 2node3 $node0 |
| set 2node4 $node0 |
| set extra " -m arbitrary -w $node1,$node0,$node0,$node0,$node0" |
| } elseif { $i==3 } { |
| if { $node1 == 0 } { |
| fail "Node names not set from previous srun" |
| } |
| set 4node0 $node1 |
| set extra " -w $node1" |
| } elseif { $i==4 } { |
| if { $node0 == 0 || $node1 == 0 || $node2 == 0 } { |
| fail "Node names not set from previous srun" |
| } |
| set 4node0 $node1 |
| set 4node1 $node0 |
| set 4node2 $node0 |
| set 4node3 $node0 |
| set 4node4 0 |
| set extra " -m arbitrary -w $node1,$node0,$node0,$node0,$node0 -n4" |
| } |
| |
| set node0 0 |
| set node1 0 |
| set node2 0 |
| set node3 0 |
| set node4 0 |
| |
| # |
| # execute srun with a specific node count |
| # |
| |
| send "$srun -l $extra -O $bin_printenv SLURMD_NODENAME\r" |
| |
| expect { |
| -re "($number): *($re_word_str)" { |
| set task_id $expect_out(1,string) |
| if {$task_id == 0} { |
| set node0 $expect_out(2,string) |
| } elseif {$task_id == 1} { |
| set node1 $expect_out(2,string) |
| } elseif {$task_id == 2} { |
| set node2 $expect_out(2,string) |
| } elseif {$task_id == 3} { |
| set node3 $expect_out(2,string) |
| } elseif {$task_id == 4} { |
| set node4 $expect_out(2,string) |
| } elseif {$task_id == 5} { |
| set node5 $expect_out(2,string) |
| } |
| exp_continue |
| } |
| -re "error" { |
| if { $i == 4 } { |
| log_debug "This error is expected, no worries" |
| } else { |
| fail "Unexpected error received" |
| } |
| exp_continue |
| } |
| -re $test_prompt { |
| #log_debug "Srun completed" |
| } |
| -re "slurm job ($number)" { |
| set job_id $expect_out(1,string) |
| exp_continue |
| } |
| timeout { |
| fail "srun not responding" |
| } |
| eof { |
| } |
| } |
| if { $i == 1 } { |
| if {$node0 ne $1node2} { |
| fail "Tasks not distributed by -w" |
| } elseif {$node1 ne $1node0} { |
| fail "Tasks not distributed by -w" |
| } elseif {$node2 ne $1node1} { |
| fail "Tasks not distributed by -w" |
| } |
| } elseif { $i == 2 } { |
| if {$node0 ne $2node0} { |
| fail "Tasks not distributed by -w" |
| } elseif {$node1 ne $2node1} { |
| fail "Tasks not distributed by -w" |
| } elseif {$node2 ne $2node2} { |
| fail "Tasks not distributed by -w" |
| } elseif {$node3 ne $2node3} { |
| fail "Tasks not distributed by -w" |
| } elseif {$node4 ne $2node4} { |
| fail "Tasks not distributed by -w" |
| } |
| } elseif { $i == 3 } { |
| if {$node2 ne $4node0} { |
| fail "We only asked for 1 node in the allocation and we were unable to get it" |
| } |
| } |
| } |
| |
| send "exit\r" |
| expect { |
| -re "error.*Exit 1" { |
| log_debug "This error is expected, no worries" |
| exp_continue |
| } |
| timeout { |
| kill_srun |
| fail "srun not responding" |
| } |
| eof { |
| wait |
| } |
| } |