| #!/usr/bin/expect |
| ############################################################################ |
| # Purpose: Test of SLURM functionality |
| # Test of CPU affinity support for multi-core systems. |
| # |
| # Output: "TEST: #.#" followed by "SUCCESS" if test was successful, OR |
| # "WARNING: ..." with an explanation of why the test can't be made, OR |
| # "FAILURE: ..." otherwise with an explanation of the failure, OR |
| # anything else indicates a failure mode that must be investigated. |
| ############################################################################ |
| # Copyright (C) 2005-2007 The Regents of the University of California. |
| # Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). |
| # Written by Morris Jette <jette1@llnl.gov> |
| # UCRL-CODE-226842. |
| # |
| # This file is part of SLURM, a resource management program. |
| # For details, see <http://www.llnl.gov/linux/slurm/>. |
| # |
| # SLURM is free software; you can redistribute it and/or modify it under |
| # the terms of the GNU General Public License as published by the Free |
| # Software Foundation; either version 2 of the License, or (at your option) |
| # any later version. |
| # |
| # SLURM is distributed in the hope that it will be useful, but WITHOUT ANY |
| # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS |
| # FOR A PARTICULAR PURPOSE. See the GNU General Public License for more |
| # details. |
| # |
| # You should have received a copy of the GNU General Public License along |
| # with SLURM; if not, write to the Free Software Foundation, Inc., |
| # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. |
| ############################################################################ |
| source ./globals |
| |
| set test_id "1.91" |
| set exit_code 0 |
| set file_prog "test$test_id.prog" |
| |
| print_header $test_id |
| |
| # |
| # Test if task affinity support is supported. |
| # |
| set affinity 0 |
| log_user 0 |
| spawn $scontrol show config |
| expect { |
| -re "task/affinity" { |
| set affinity 1 |
| exp_continue |
| } |
| eof { |
| wait |
| } |
| } |
| log_user 1 |
| if {$affinity == 0} { |
| send_user "\nWARNING: task affinity not supported on this system\n" |
| exit 0 |
| } |
| send_user "\ntask affinity plugin installed\n" |
| |
| set num_sockets 0 |
| set num_cores 0 |
| set num_threads 0 |
| log_user 0 |
| spawn $scontrol show node |
| expect { |
| -re "Sockets=($number)" { |
| set num_sockets $expect_out(1,string) |
| exp_continue |
| } |
| -re "Cores=($number)" { |
| set num_cores $expect_out(1,string) |
| exp_continue |
| } |
| -re "Threads=($number)" { |
| set num_threads $expect_out(1,string) |
| exp_continue |
| } |
| eof { |
| wait |
| } |
| } |
| log_user 1 |
| if {$num_sockets == 0 || $num_cores == 0 || $num_threads == 0} { |
| send_user "\nWARNING: Could not determine number of Sockets:Cores:Threads (saw $num_sockets:$num_cores:$num_threads)\n" |
| exit 0 |
| } |
| send_user "Node config: Sockets=$num_sockets Cores=$num_cores Threads=$num_threads\n\n" |
| |
| # |
| # Build a test program to report affinity by task |
| # |
| exec $bin_rm -f $file_prog |
| exec $bin_make -f /dev/null $file_prog |
| exec $bin_chmod 700 $file_prog |
| |
| # |
| # Create an allocation |
| # |
| global env |
| set env(SLURM_CPU_BIND) "verbose" |
| set salloc_pid [spawn $salloc -N1 --exclusive --verbose -t2 $bin_bash] |
| |
| ############################################################################# |
| # |
| # Run a job step to get allocated processor count and affinity |
| # |
| expect -re $prompt |
| set mask 0 |
| set task_cnt 0 |
| send "$srun -c1 $file_prog\n" |
| expect { |
| -re "TASK_ID:($number),MASK:($number)" { |
| incr task_cnt |
| set mask $expect_out(2,string) |
| exp_continue |
| } |
| -re "error" { |
| send_user "\nFAILURE: some error occurred\n" |
| set exit_code 1 |
| exp_continue |
| } |
| timeout { |
| send_user "\nFAILURE: salloc not responding " |
| send_user "or failure to recognize prompt\n" |
| slow_kill $salloc_pid |
| exit 1 |
| } |
| -re $prompt |
| } |
| |
| ############################################################################# |
| # |
| # Run a job step with affinity to verify unique masks with min -B 1:1:1 |
| # |
| set expected_mask [ expr ((1 << $task_cnt) - 1) ] |
| set task_mask 0 |
| send "$srun -c1 -n $task_cnt -B 1:1:1 $file_prog\n" |
| expect { |
| -re "TASK_ID:($number),MASK:($number)" { |
| incr task_mask $expect_out(2,string) |
| exp_continue |
| } |
| -re "error" { |
| send_user "\nFAILURE: some error occurred\n" |
| set exit_code 1 |
| exp_continue |
| } |
| timeout { |
| send_user "\nFAILURE: srun (from --allocate) not responding " |
| send_user "or failure to recognize prompt\n" |
| set exit_code 1 |
| } |
| -re $prompt |
| } |
| if {$task_mask != $expected_mask} { |
| send_user "\nFAILURE: affinity mask inconsistency ($task_mask,$expected_mask)\n" |
| set exit_code 1 |
| } |
| |
| ############################################################################# |
| # |
| # Run varying number of sockets, verify task count and number of set bits |
| # |
| set this_cnt 1 |
| while {$this_cnt <= $num_sockets} { |
| set expected_tasks [ expr $this_cnt * $num_cores * $num_threads ] |
| set num_tasks 0 |
| set num_bits 0 |
| set task_mask 0 |
| send "$srun -B $this_cnt-$this_cnt:$num_cores:$num_threads $file_prog\n" |
| expect { |
| -re "TASK_ID:($number),MASK:($number)" { |
| incr task_mask $expect_out(2,string) |
| incr num_tasks 1 |
| # count number of set bits |
| set this_mask $expect_out(2,string) |
| while {$this_mask > 0} { |
| if {$this_mask & 1} { |
| incr num_bits 1 |
| } |
| set this_mask [ expr $this_mask >> 1 ] |
| } |
| exp_continue |
| } |
| -re "error" { |
| send_user "\nFAILURE: some error occurred\n" |
| set exit_code 1 |
| exp_continue |
| } |
| timeout { |
| send_user "\nFAILURE: srun (from --allocate) not responding " |
| send_user "or failure to recognize prompt\n" |
| set exit_code 1 |
| } |
| -re $prompt |
| } |
| |
| if {$num_tasks != $expected_tasks} { |
| send_user "\nFAILURE: number of tasks inconsistent ($num_tasks,$expected_tasks)\n" |
| set exit_code 1 |
| } |
| if {$num_bits != $expected_tasks} { |
| send_user "\nFAILURE: number of set bits inconsistent ($num_bits,$expected_tasks)\n" |
| set exit_code 1 |
| } |
| incr this_cnt 1 |
| } |
| |
| |
| ############################################################################# |
| # |
| # Run varying number of cores, verify task count and number of set bits |
| # |
| set this_cnt 1 |
| while {$this_cnt <= $num_cores} { |
| set expected_tasks [ expr $num_sockets * $this_cnt * $num_threads ] |
| set num_tasks 0 |
| set num_bits 0 |
| set task_mask 0 |
| send "$srun -B $num_sockets:$this_cnt-$this_cnt:$num_threads $file_prog\n" |
| expect { |
| -re "TASK_ID:($number),MASK:($number)" { |
| incr task_mask $expect_out(2,string) |
| incr num_tasks 1 |
| # count number of set bits |
| set this_mask $expect_out(2,string) |
| while {$this_mask > 0} { |
| if {$this_mask & 1} { |
| incr num_bits 1 |
| } |
| set this_mask [ expr $this_mask >> 1 ] |
| } |
| exp_continue |
| } |
| -re "error" { |
| send_user "\nFAILURE: some error occurred\n" |
| set exit_code 1 |
| exp_continue |
| } |
| timeout { |
| send_user "\nFAILURE: salloc not responding " |
| send_user "or failure to recognize prompt\n" |
| set exit_code 1 |
| } |
| -re $prompt |
| } |
| |
| if {$num_tasks != $expected_tasks} { |
| send_user "\nFAILURE: number of tasks inconsistent ($num_tasks,$expected_tasks)\n" |
| set exit_code 1 |
| } |
| if {$num_bits != $expected_tasks} { |
| send_user "\nFAILURE: number of set bits inconsistent ($num_bits,$expected_tasks)\n" |
| set exit_code 1 |
| } |
| incr this_cnt 1 |
| } |
| |
| |
| ############################################################################# |
| # |
| # Run varying number of threads, verify task count and number of set bits |
| # |
| set this_cnt 1 |
| while {$this_cnt <= $num_threads} { |
| set expected_tasks [ expr $num_sockets * $num_cores * $this_cnt ] |
| set num_tasks 0 |
| set num_bits 0 |
| set task_mask 0 |
| send "$srun -B $num_sockets:$num_cores:$this_cnt-$this_cnt $file_prog\n" |
| expect { |
| -re "TASK_ID:($number),MASK:($number)" { |
| incr task_mask $expect_out(2,string) |
| incr num_tasks 1 |
| # count number of set bits |
| set this_mask $expect_out(2,string) |
| while {$this_mask > 0} { |
| if {$this_mask & 1} { |
| incr num_bits 1 |
| } |
| set this_mask [ expr $this_mask >> 1 ] |
| } |
| exp_continue |
| } |
| -re "error" { |
| send_user "\nFAILURE: some error occurred\n" |
| set exit_code 1 |
| exp_continue |
| } |
| timeout { |
| send_user "\nFAILURE: salloc not responding " |
| send_user "or failure to recognize prompt\n" |
| set exit_code 1 |
| } |
| -re $prompt |
| } |
| |
| if {$num_tasks != $expected_tasks} { |
| send_user "\nFAILURE: number of tasks inconsistent ($num_tasks,$expected_tasks)\n" |
| set exit_code 1 |
| } |
| if {$num_bits != $expected_tasks} { |
| send_user "\nFAILURE: number of set bits inconsistent ($num_bits,$expected_tasks)\n" |
| set exit_code 1 |
| } |
| incr this_cnt 1 |
| } |
| |
| ############################################################################# |
| # |
| # Run varying cpus per task, verify task count and number of set bits |
| # |
| set this_cnt 1 |
| while {$this_cnt <= $task_cnt} { |
| set expected_tasks 1 |
| set num_tasks 0 |
| set num_bits 0 |
| set task_mask 0 |
| send "$srun -c$this_cnt -B 1:1:1 $file_prog\n" |
| expect { |
| -re "TASK_ID:($number),MASK:($number)" { |
| incr task_mask $expect_out(2,string) |
| incr num_tasks 1 |
| # count number of set bits |
| set this_mask $expect_out(2,string) |
| while {$this_mask > 0} { |
| if {$this_mask & 1} { |
| incr num_bits 1 |
| } |
| set this_mask [ expr $this_mask >> 1 ] |
| } |
| exp_continue |
| } |
| -re "error" { |
| send_user "\nFAILURE: some error occurred\n" |
| set exit_code 1 |
| exp_continue |
| } |
| timeout { |
| send_user "\nFAILURE: salloc not responding " |
| send_user "or failure to recognize prompt\n" |
| set exit_code 1 |
| } |
| -re $prompt |
| } |
| |
| if {$num_tasks != $expected_tasks} { |
| send_user "\nFAILURE: number of tasks inconsistent ($num_tasks,$expected_tasks)\n" |
| set exit_code 1 |
| } |
| if {$num_bits != $this_cnt} { |
| send_user "\nFAILURE: number of set bits inconsistent ($num_bits,$this_cnt)\n" |
| set exit_code 1 |
| } |
| incr this_cnt 1 |
| } |
| |
| ############################################################################# |
| # |
| # Run a job step with plane distribution to exercise option |
| # |
| set expected_mask [ expr ((1 << $task_cnt) - 1) * $task_cnt ] |
| set task_mask 0 |
| send "$srun -n $task_cnt -m plane=4 $file_prog\n" |
| expect { |
| -re "TASK_ID:($number),MASK:($number)" { |
| incr task_mask $expect_out(2,string) |
| exp_continue |
| } |
| -re "error" { |
| send_user "\nFAILURE: some error occurred\n" |
| set exit_code 1 |
| exp_continue |
| } |
| timeout { |
| send_user "\nFAILURE: salloc not responding " |
| send_user "or failure to recognize prompt\n" |
| set exit_code 1 |
| } |
| -re $prompt |
| } |
| if {$task_mask != $expected_mask} { |
| send_user "\nFAILURE: affinity mask inconsistency ($task_mask,$expected_mask)\n" |
| set exit_code 1 |
| } |
| |
| ############################################################################# |
| # |
| # Terminate the job, free the allocation |
| # |
| send "exit\n" |
| expect { |
| -re "error" { |
| send_user "\nFAILURE: some error occurred\n" |
| set exit_code 1 |
| } |
| timeout { |
| send_user "\nFAILURE: salloc not responding " |
| send_user "or failure to recognize prompt\n" |
| slow_kill $salloc_pid |
| set exit_code 1 |
| } |
| eof { |
| wait |
| } |
| } |
| |
| if {$exit_code == 0} { |
| exec $bin_rm -f $file_prog |
| send_user "\nSUCCESS\n" |
| } |
| exit $exit_code |
| |