| #!/usr/bin/env expect | 
 | ############################################################################ | 
 | # Purpose: Test of Slurm functionality | 
 | #          Test that the thread-spec option in sbatch allocates the correct | 
 | #          number of cores and that tasks spread over multiple nodes | 
 | #          when there is not enough resources on one node. | 
 | ############################################################################ | 
 | # Copyright (C) SchedMD LLC. | 
 | # | 
 | # This file is part of Slurm, a resource management program. | 
 | # For details, see <https://slurm.schedmd.com/>. | 
 | # Please also read the included file: DISCLAIMER. | 
 | # | 
 | # Slurm is free software; you can redistribute it and/or modify it under | 
 | # the terms of the GNU General Public License as published by the Free | 
 | # Software Foundation; either version 2 of the License, or (at your option) | 
 | # any later version. | 
 | # | 
 | # Slurm is distributed in the hope that it will be useful, but WITHOUT ANY | 
 | # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS | 
 | # FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more | 
 | # details. | 
 | # | 
 | # You should have received a copy of the GNU General Public License along | 
 | # with Slurm; if not, write to the Free Software Foundation, Inc., | 
 | # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301  USA. | 
 | ############################################################################ | 
 | source ./globals | 
 |  | 
 | set file_in    "$test_dir/input" | 
 | set file_out   "$test_dir/output" | 
 | set spec_in    "$test_dir/spec_thread_script.in" | 
 | set job_id     0 | 
 |  | 
 | proc cleanup {} { | 
 | 	global job_id | 
 |  | 
 | 	cancel_job $job_id | 
 | } | 
 |  | 
 | ############################################################################# | 
 | # | 
 | # Checks that the node uses the correct number of specialized threads | 
 | # and that the number of nodes the job uses is correct. | 
 | # | 
 | # exp_node = 0: job must only use the specified node | 
 | # exp_node = 1: job must use more then specified node | 
 | # exp_node = -1: job must fail because the job exceeds the number of threads | 
 | # | 
 | ############################################################################# | 
 | proc thread_spec_job {task node thread_spec exp_nodes} { | 
 | 	global sbatch scontrol spec_in file_out number thread_cnt cpu_tot job_id | 
 |  | 
 | 	set job_id 0 | 
 | 	set num_nodes 0 | 
 |  | 
 | 	# Determine the number of tasks that can be run | 
 | 	set cpu_used_by_spec $thread_spec | 
 | 	if {$cpu_tot > $cpu_used_by_spec} { | 
 | 		set task_limit [expr $cpu_tot - $cpu_used_by_spec] | 
 | 	} else { | 
 | 		set task_limit 1 | 
 | 	} | 
 | 	set error_chk 0 | 
 | 	spawn $sbatch -t1 -w$node --thread-spec=$thread_spec -n[expr abs($task_limit + $task)] -o$file_out $spec_in | 
 | 	expect { | 
 | 		-re "Submitted batch job ($number)" { | 
 | 			set job_id $expect_out(1,string) | 
 | 			exp_continue | 
 | 		} | 
 | 		-re "error" { | 
 | 			if {$exp_nodes != -1} { | 
 | 				fail "sbatch should not have produced an error" | 
 | 			} | 
 | 			set error_chk 1 | 
 | 			exp_continue | 
 | 		} | 
 | 		timeout { | 
 | 			fail "sbatch is not responding" | 
 | 		} | 
 | 		eof { | 
 | 			wait | 
 | 		} | 
 | 	} | 
 |  | 
 | 	if {$job_id == 0 && $error_chk == 0} { | 
 | 		fail "Job was not submitted" | 
 |  | 
 | 	} elseif {$exp_nodes == -1 && $job_id != 0} { | 
 | 		fail "This job should have failed but did not" | 
 |  | 
 | 	} elseif {$exp_nodes == -1 && $error_chk != 0} { | 
 | 		log_debug "This error is expected do not worry" | 
 |  | 
 | 	} else { | 
 | 		wait_for_job -fail $job_id "RUNNING" | 
 | 		set thread_chk 0 | 
 | 		spawn $scontrol show job $job_id | 
 | 		expect { | 
 | 			-re "NumNodes=($number)" { | 
 | 				set num_nodes $expect_out(1,string) | 
 | 				exp_continue | 
 | 			} | 
 | 			-re "ThreadSpec=$thread_spec" { | 
 | 				set thread_chk 1 | 
 | 				exp_continue | 
 | 			} | 
 | 			timeout { | 
 | 				fail "scontrol is not responding" | 
 | 			} | 
 | 			eof { | 
 | 				wait | 
 | 			} | 
 | 		} | 
 |  | 
 | 		# Checks that the node uses the correct number of specialized | 
 | 		# threads and that the number of nodes the job uses is correct. | 
 | 		subtest {$thread_chk != 0} "Check that the node uses the correct number of specialized threads" | 
 |  | 
 | 		wait_for_job -fail $job_id "DONE" | 
 | 	} | 
 |  | 
 | 	if {$exp_nodes == 1} { | 
 | 		subtest {$num_nodes > 1} "Check that the number of nodes that the job uses is correct" "Job $job_id should use more then 1 node" | 
 | 	} | 
 |  | 
 | 	if {$exp_nodes == 0} { | 
 | 		subtest {$num_nodes == 1} "Check that the number of nodes that the job uses is correct" "Job $job_id should use only $node" | 
 | 	} | 
 | } | 
 |  | 
 | ############################################################################# | 
 | # | 
 | # Tests begin here | 
 | # | 
 | ############################################################################# | 
 |  | 
 | # Run basic client tool checks before validating configuration | 
 | set output [run_command_output -xfail -subtest "$sbatch --thread-spec=1 -S1 --wrap='/bin/true'"] | 
 | subtest {[regexp "sbatch: fatal: -S/--core-spec and --thred-spec options are mutually exclusive" $output]} "sbatch should fail for both --thread-spec and -S given" | 
 |  | 
 | set output [run_command_output -xfail -subtest "SBATCH_CORE_SPEC=1 SBATCH_THREAD_SPEC=1 $sbatch --wrap='/bin/true'"] | 
 | subtest {[regexp "Both --core-spec and --thread-spec set using environment variables. Those options are mutually exclusive" $output]} "sbatch should fail when both SBATCH_CORE_SPEC SBATCH_THREAD_SPEC set in environment and no cli given" | 
 |  | 
 | if {[check_config_select "linear"]} { | 
 | 	skip "This test is incompatible with select/linear" | 
 | } | 
 |  | 
 | if {[param_contains [get_config_param "SelectTypeParameters"] "CR_SOCKET*"]} { | 
 | 	skip "This test is incompatible with CR_SOCKET allocations" | 
 | } | 
 |  | 
 | if {![get_config_param "AllowSpecResourcesUsage"]} { | 
 | 	skip "AllowSpecResourcesUsage not configured to permit thread specialization" | 
 | } | 
 |  | 
 | # Remove any vestigial files | 
 | exec $bin_rm -f $file_in $file_out $spec_in | 
 |  | 
 | make_bash_script $file_in " | 
 | first=\$($scontrol show hostnames \$SLURM_JOB_NODELIST\ | head -n1)\ | 
 |  | 
 | $scontrol show node \$first\ | 
 |  | 
 | " | 
 | make_bash_script $spec_in "sleep 5" | 
 |  | 
 | spawn $sbatch --exclusive -t1 -N2 -o$file_out $file_in | 
 | expect { | 
 | 	-re "Batch job submission failed" { | 
 | 		skip "Can't test srun task distribution" | 
 | 	} | 
 | 	-re "Submitted batch job ($number)" { | 
 | 		set job_id $expect_out(1,string) | 
 | 		exp_continue | 
 | 	} | 
 | 	timeout { | 
 | 		fail "sbatch is not responding" | 
 | 	} | 
 | 	eof { | 
 | 		wait | 
 | 	} | 
 | } | 
 | if {$job_id == 0} { | 
 | 	fail "sbatch did not submit job" | 
 | } | 
 |  | 
 | wait_for_file -fail $file_out | 
 |  | 
 | set first_node "" | 
 | set core_cnt   0 | 
 | set cpu_tot    1 | 
 | set socket_cnt 1 | 
 | set thread_cnt 1 | 
 |  | 
 | spawn $bin_cat $file_out | 
 | expect { | 
 | 	-re "NodeName=($re_word_str)" { | 
 | 		set first_node $expect_out(1,string) | 
 | 		exp_continue | 
 | 	} | 
 | 	-re "CoresPerSocket=($number)" { | 
 | 		set core_cnt $expect_out(1,string) | 
 | 		exp_continue | 
 | 	} | 
 | 	-re "CPUTot=($number)" { | 
 | 		set cpu_tot $expect_out(1,string) | 
 | 		exp_continue | 
 | 	} | 
 | 	-re "Sockets=($number)" { | 
 | 		set socket_cnt $expect_out(1,string) | 
 | 		exp_continue | 
 | 	} | 
 | 	-re "ThreadsPerCore=($number)" { | 
 | 		set thread_cnt $expect_out(1,string) | 
 | 		exp_continue | 
 | 	} | 
 | 	timeout { | 
 | 		fail "cat is not responding" | 
 | 	} | 
 | 	eof { | 
 | 		wait | 
 | 	} | 
 | } | 
 | set thread_cnt [expr $thread_cnt * $core_cnt * $socket_cnt] | 
 | if {$thread_cnt == 0} { | 
 | 	fail "sbatch did not find the number of thread" | 
 | } | 
 | if {$thread_cnt < 4} { | 
 | 	skip "Thread thread too low for testing ($thread_cnt < 4)" | 
 | } | 
 |  | 
 | # The environment variable should get overwritten. | 
 | set env(SBATCH_CORE_SPEC) "99" | 
 |  | 
 | # | 
 | # Using the thread spec within the node limits | 
 | # | 
 | log_info "Run within the specified node" | 
 | thread_spec_job  0 $first_node [expr $thread_cnt - 2] 0 | 
 | thread_spec_job -1 $first_node [expr $thread_cnt - 2] 0 | 
 |  | 
 | # | 
 | # Using thread spec with more tasks then the node can handle. This should | 
 | # cause the tasks to spread across multiple nodes as needed | 
 | # | 
 | log_info "Spread job across multiple nodes" | 
 | thread_spec_job 1 $first_node [expr $thread_cnt - 2] 1 | 
 | thread_spec_job 1 $first_node [expr $thread_cnt - 1] 1 | 
 |  | 
 | # | 
 | # Using thread spec with more thread then the specified node has | 
 | # | 
 | log_info "Fail by trying to use more threads than exist" | 
 | thread_spec_job 1 $first_node [expr $thread_cnt + 1] -1 | 
 | thread_spec_job 1 $first_node [expr $thread_cnt + 3] -1 |