| #!/usr/bin/expect |
| ############################################################################ |
| # Purpose: Test of SLURM functionality |
| # Test of CPU affinity support. |
| # |
| # Output: "TEST: #.#" followed by "SUCCESS" if test was successful, OR |
| # "WARNING: ..." with an explanation of why the test can't be made, OR |
| # "FAILURE: ..." otherwise with an explanation of the failure, OR |
| # anything else indicates a failure mode that must be investigated. |
| ############################################################################ |
| # Copyright (C) 2005-2006 The Regents of the University of California. |
| # Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). |
| # Written by Morris Jette <jette1@llnl.gov> |
| # UCRL-CODE-226842. |
| # |
| # This file is part of SLURM, a resource management program. |
| # For details, see <http://www.llnl.gov/linux/slurm/>. |
| # |
| # SLURM is free software; you can redistribute it and/or modify it under |
| # the terms of the GNU General Public License as published by the Free |
| # Software Foundation; either version 2 of the License, or (at your option) |
| # any later version. |
| # |
| # SLURM is distributed in the hope that it will be useful, but WITHOUT ANY |
| # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS |
| # FOR A PARTICULAR PURPOSE. See the GNU General Public License for more |
| # details. |
| # |
| # You should have received a copy of the GNU General Public License along |
| # with SLURM; if not, write to the Free Software Foundation, Inc., |
| # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. |
| ############################################################################ |
| source ./globals |
| |
| set test_id "18.36" |
| set exit_code 0 |
| set job_id 0 |
| set file_prog "test$test_id.prog" |
| |
| print_header $test_id |
| |
| # |
| # Test if task affinity support is supported. |
| # |
| set affinity 0 |
| log_user 0 |
| spawn $scontrol show config |
| expect { |
| -re "task/affinity" { |
| set affinity 1 |
| exp_continue |
| } |
| eof { |
| wait |
| } |
| } |
| log_user 1 |
| if {$affinity == 0} { |
| send_user "\nWARNING: task affinity not supported on this system\n" |
| exit 0 |
| } |
| send_user "\ntask affinity plugin installed\n" |
| |
| # |
| # Build a test program to report affinity by task |
| # |
| exec $bin_rm -f $file_prog |
| exec $bin_make -f /dev/null $file_prog |
| exec $bin_chmod 700 $file_prog |
| |
| # |
| # Create an allocation |
| # |
| set salloc_pid [spawn $salloc -N1 --verbose -t2 $bin_bash] |
| expect { |
| -re "Granted job allocation ($number)" { |
| set jobid $expect_out(1,string) |
| } |
| timeout { |
| send_user "\nFAILURE: salloc not responding\n" |
| if {$job_id != 0} { |
| cancel_job $job_id |
| } |
| slow_kill [expr 0 - $salloc_pid] |
| exit 1 |
| } |
| } |
| |
| send "env |grep SLURM_JOB_CPUS_PER_NODE\n" |
| expect { |
| -re "SLURM_JOB_CPUS_PER_NODE=($number)" { |
| set available_cpus $expect_out(1,string) |
| } |
| } |
| |
| # |
| # Run a job step to get allocated processor count and affinity |
| # |
| expect -re $prompt |
| set mask 0 |
| set task_cnt 0 |
| send "$slaunch -n $available_cpus $file_prog\n" |
| expect { |
| -re "TASK_ID:($number),MASK:($number)" { |
| incr task_cnt |
| set mask $expect_out(2,string) |
| exp_continue |
| } |
| -re "error" { |
| send_user "\nFAILURE: some error occurred\n" |
| set exit_code 1 |
| exp_continue |
| } |
| timeout { |
| send_user "\nFAILURE: slaunch not responding or failure to recognize prompt\n" |
| set exit_code 1 |
| } |
| -re $prompt |
| } |
| |
| # |
| # Run a job step with affinity |
| # |
| set expected_mask [ expr ((1 << $task_cnt) - 1) ] |
| set task_mask 0 |
| send "$slaunch -n $available_cpus --cpu_bind=rank $file_prog\n" |
| expect { |
| -re "TASK_ID:($number),MASK:($number)" { |
| incr task_mask $expect_out(2,string) |
| exp_continue |
| } |
| -re "error" { |
| send_user "\nFAILURE: some error occurred\n" |
| set exit_code 1 |
| exp_continue |
| } |
| timeout { |
| send_user "\nFAILURE: slaunch not responding or failure to recognize prompt\n" |
| set exit_code 1 |
| } |
| -re $prompt |
| } |
| if {$task_mask != $expected_mask} { |
| send_user "\nFAILURE: affinity mask inconsistency ($task_mask,$mask)\n" |
| set exit_code 1 |
| } |
| |
| # |
| # Run a job step with verbosity and all tasks on CPU 0 |
| # |
| set task_mask 0 |
| send "$slaunch -n $available_cpus --cpu_bind=verbose,map_cpu:0 $file_prog\n" |
| expect { |
| -re "TASK_ID:($number),MASK:($number)" { |
| incr task_mask $expect_out(2,string) |
| exp_continue |
| } |
| -re "error" { |
| send_user "\nFAILURE: some error occurred\n" |
| set exit_code 1 |
| exp_continue |
| } |
| timeout { |
| send_user "\nFAILURE: slaunch not responding or failure to recognize prompt\n" |
| set exit_code 1 |
| } |
| -re $prompt |
| } |
| if {$task_mask != $task_cnt} { |
| send_user "\nFAILURE: affinity mask inconsistent ($task_mask,$task_cnt)\n" |
| set exit_code 1 |
| } |
| set verbose_cnt 0 |
| send "$slaunch -n $available_cpus --cpu_bind=verbose,map_cpu:0 $file_prog\n" |
| expect { |
| -re "cpu_bind=MAP" { |
| incr verbose_cnt |
| exp_continue |
| } |
| -re "error" { |
| send_user "\nFAILURE: some error occurred\n" |
| set exit_code 1 |
| exp_continue |
| } |
| timeout { |
| send_user "\nFAILURE: slaunch not responding or failure to recognize prompt\n" |
| set exit_code 1 |
| } |
| -re $prompt |
| } |
| if {$verbose_cnt != $task_cnt} { |
| send_user "\nFAILURE: verbose messages count inconsisent ($verbose_cnt,$task_cnt)\n" |
| set exit_code 1 |
| } |
| |
| # |
| # Run all tasks all bound to the same CPU by specifying a map (for each CPU) |
| # |
| set cpu_cnt 0 |
| while {$cpu_cnt < $task_cnt} { |
| set mask_sum 0 |
| set mask [ expr 1 << $cpu_cnt ] |
| send "$slaunch -n $available_cpus --cpu_bind=map_cpu:$cpu_cnt $file_prog\n" |
| expect { |
| -re "TASK_ID:($number),MASK:($number)" { |
| incr mask_sum $expect_out(2,string) |
| exp_continue |
| } |
| -re "error" { |
| send_user "\nFAILURE: some error occurred\n" |
| set exit_code 1 |
| exp_continue |
| } |
| timeout { |
| send_user "\nFAILURE: slaunch not responding or failure to recognize prompt\n" |
| set exit_code 1 |
| exp_continue |
| } |
| -re $prompt |
| } |
| if {$mask_sum != $task_cnt * $mask} { |
| send_user "\nFAILURE: affinity mask inconsistent ($mask_sum,$task_cnt)\n" |
| set exit_code 1 |
| } |
| incr cpu_cnt 1 |
| } |
| |
| # |
| # Run all tasks all bound to the same CPU by specifying a mask (for each CPU) |
| # |
| set cpu_cnt 0 |
| while {$cpu_cnt < $task_cnt} { |
| set mask_sum 0 |
| set mask [ expr 1 << $cpu_cnt ] |
| set mstr [ dec2hex16 $mask] |
| send "$slaunch -n $available_cpus --cpu_bind=mask_cpu:$mstr $file_prog\n" |
| expect { |
| -re "TASK_ID:($number),MASK:($number)" { |
| incr mask_sum $expect_out(2,string) |
| exp_continue |
| } |
| -re "error" { |
| send_user "\nFAILURE: some error occurred\n" |
| set exit_code 1 |
| exp_continue |
| } |
| timeout { |
| send_user "\nFAILURE: slaunch not responding or failure to recognize prompt\n" |
| set exit_code 1 |
| exp_continue |
| } |
| -re $prompt |
| } |
| if {$mask_sum != $task_cnt * $mask} { |
| send_user "\nFAILURE: affinity mask inconsistent ($mask_sum,$task_cnt)\n" |
| set exit_code 1 |
| } |
| incr cpu_cnt 1 |
| } |
| |
| # |
| # Generate foward and reverse masks and maps |
| # |
| set cpu_cnt 0 |
| set fwd_mask "" |
| set fwd_map "" |
| set rev_mask "" |
| set rev_map "" |
| set alt_mask "" |
| set alt_map "" |
| set full_mask [ expr (1 << $task_cnt) - 1 ] |
| while {$cpu_cnt < $task_cnt} { |
| set mask_sum 0 |
| set mask [ expr 1 << $cpu_cnt ] |
| set mstr [ dec2hex16 $mask] |
| set fwd_mask "$fwd_mask,$mstr" |
| set fwd_map "$fwd_map,$cpu_cnt" |
| set rev_mask "$mstr,$rev_mask" |
| set rev_map "$cpu_cnt,$rev_map" |
| if { $cpu_cnt % 2 } { |
| set alt_mask "$mstr,$alt_mask" |
| set alt_map "$cpu_cnt,$alt_map" |
| } else { |
| set alt_mask "$alt_mask,$mstr" |
| set alt_map "$alt_map,$cpu_cnt" |
| } |
| if { $cpu_cnt == 0 } { |
| set fwd_mask "$mstr" |
| set fwd_map "$cpu_cnt" |
| set rev_mask "$mstr" |
| set rev_map "$cpu_cnt" |
| set alt_mask "$mstr" |
| set alt_map "$cpu_cnt" |
| } |
| incr cpu_cnt 1 |
| } |
| |
| send_user "\n" |
| send_user "full_mask: $full_mask\n" |
| send_user "fwd_map: $fwd_map\n" |
| send_user "fwd_mask: $fwd_mask\n" |
| send_user "rev_map: $rev_map\n" |
| send_user "rev_mask: $rev_mask\n" |
| send_user "alt_map: $alt_map\n" |
| send_user "alt_mask: $alt_mask\n" |
| |
| # |
| # Run all tasks bound to a different CPU by specifying a forward map |
| # |
| set task_mask 0 |
| send "$slaunch -n $available_cpus --cpu_bind=map_cpu:$fwd_map $file_prog\n" |
| expect { |
| -re "TASK_ID:($number),MASK:($number)" { |
| incr task_mask $expect_out(2,string) |
| exp_continue |
| } |
| -re "error" { |
| send_user "\nFAILURE: some error occurred\n" |
| set exit_code 1 |
| exp_continue |
| } |
| timeout { |
| send_user "\nFAILURE: slaunch not responding or failure to recognize prompt\n" |
| set exit_code 1 |
| exp_continue |
| } |
| -re $prompt |
| } |
| if {$task_mask != $full_mask} { |
| send_user "\nFAILURE: affinity mask inconsistent ($task_mask,$full_mask)\n" |
| set exit_code 1 |
| } |
| |
| # |
| # Run all tasks bound to a different CPU by specifying a reverse map |
| # |
| set task_mask 0 |
| send "$slaunch -n $available_cpus --cpu_bind=map_cpu:$rev_map $file_prog\n" |
| expect { |
| -re "TASK_ID:($number),MASK:($number)" { |
| incr task_mask $expect_out(2,string) |
| exp_continue |
| } |
| -re "error" { |
| send_user "\nFAILURE: some error occurred\n" |
| set exit_code 1 |
| exp_continue |
| } |
| timeout { |
| send_user "\nFAILURE: slaunch not responding or failure to recognize prompt\n" |
| set exit_code 1 |
| exp_continue |
| } |
| -re $prompt |
| } |
| if {$task_mask != $full_mask} { |
| send_user "\nFAILURE: affinity mask inconsistent ($task_mask,$full_mask)\n" |
| set exit_code 1 |
| } |
| |
| # |
| # Run all tasks bound to a different CPU by specifying an alternating map |
| # |
| set task_mask 0 |
| send "$slaunch -n $available_cpus --cpu_bind=map_cpu:$alt_map $file_prog\n" |
| expect { |
| -re "TASK_ID:($number),MASK:($number)" { |
| incr task_mask $expect_out(2,string) |
| exp_continue |
| } |
| -re "error" { |
| send_user "\nFAILURE: some error occurred\n" |
| set exit_code 1 |
| exp_continue |
| } |
| timeout { |
| send_user "\nFAILURE: slaunch not responding or failure to recognize prompt\n" |
| set exit_code 1 |
| exp_continue |
| } |
| -re $prompt |
| } |
| if {$task_mask != $full_mask} { |
| send_user "\nFAILURE: affinity mask inconsistent ($task_mask,$full_mask)\n" |
| set exit_code 1 |
| } |
| |
| # |
| # Run all tasks bound to a different CPU by specifying a forward mask |
| # |
| set task_mask 0 |
| send "$slaunch -n $available_cpus --cpu_bind=mask_cpu:$fwd_mask $file_prog\n" |
| expect { |
| -re "TASK_ID:($number),MASK:($number)" { |
| incr task_mask $expect_out(2,string) |
| exp_continue |
| } |
| -re "error" { |
| send_user "\nFAILURE: some error occurred\n" |
| set exit_code 1 |
| exp_continue |
| } |
| timeout { |
| send_user "\nFAILURE: slaunch not responding or failure to recognize prompt\n" |
| set exit_code 1 |
| exp_continue |
| } |
| -re $prompt |
| } |
| if {$task_mask != $full_mask} { |
| send_user "\nFAILURE: affinity mask inconsistent ($task_mask,$full_mask)\n" |
| set exit_code 1 |
| } |
| |
| # |
| # Run all tasks bound to a different CPU by specifying a reverse mask |
| # |
| set task_mask 0 |
| send "$slaunch -n $available_cpus --cpu_bind=mask_cpu:$rev_mask $file_prog\n" |
| expect { |
| -re "TASK_ID:($number),MASK:($number)" { |
| incr task_mask $expect_out(2,string) |
| exp_continue |
| } |
| -re "error" { |
| send_user "\nFAILURE: some error occurred\n" |
| set exit_code 1 |
| exp_continue |
| } |
| timeout { |
| send_user "\nFAILURE: slaunch not responding or failure to recognize prompt\n" |
| set exit_code 1 |
| exp_continue |
| } |
| -re $prompt |
| } |
| if {$task_mask != $full_mask} { |
| send_user "\nFAILURE: affinity mask inconsistent ($task_mask,$full_mask)\n" |
| set exit_code 1 |
| } |
| |
| # |
| # Run all tasks bound to a different CPU by specifying an alternating mask |
| # |
| set task_mask 0 |
| send "$slaunch -n $available_cpus --cpu_bind=mask_cpu:$alt_mask $file_prog\n" |
| expect { |
| -re "TASK_ID:($number),MASK:($number)" { |
| incr task_mask $expect_out(2,string) |
| exp_continue |
| } |
| -re "error" { |
| send_user "\nFAILURE: some error occurred\n" |
| set exit_code 1 |
| exp_continue |
| } |
| timeout { |
| send_user "\nFAILURE: slaunch not responding or failure to recognize prompt\n" |
| set exit_code 1 |
| exp_continue |
| } |
| -re $prompt |
| } |
| if {$task_mask != $full_mask} { |
| send_user "\nFAILURE: affinity mask inconsistent ($task_mask,$full_mask)\n" |
| set exit_code 1 |
| } |
| |
| # |
| # Terminate the job, free the allocation |
| # |
| send "exit\n" |
| expect { |
| -re "error" { |
| send_user "\nFAILURE: some error occurred\n" |
| set exit_code 1 |
| } |
| timeout { |
| send_user "\nFAILURE: salloc not responding or failure to recognize prompt\n" |
| if {$job_id != 0} { |
| cancel_job $job_id |
| } |
| slow_kill [expr 0 - $salloc_pid] |
| set exit_code 1 |
| } |
| eof { |
| wait |
| } |
| } |
| |
| if {$exit_code == 0} { |
| exec $bin_rm -f $file_prog |
| send_user "\nSUCCESS\n" |
| } else { |
| send_user "\nNOTE: This test can fail if the node configuration in slurm.conf \n" |
| send_user " (sockets, cores, threads) differs from the actual configuration\n" |
| } |
| exit $exit_code |
| |