| #!/usr/bin/env expect | 
 | ############################################################################ | 
 | # Purpose: Test heterogeneous job steps | 
 | ############################################################################ | 
 | # Copyright (C) SchedMD LLC. | 
 | # | 
 | # This file is part of Slurm, a resource management program. | 
 | # For details, see <https://slurm.schedmd.com/>. | 
 | # Please also read the included file: DISCLAIMER. | 
 | # | 
 | # Slurm is free software; you can redistribute it and/or modify it under | 
 | # the terms of the GNU General Public License as published by the Free | 
 | # Software Foundation; either version 2 of the License, or (at your option) | 
 | # any later version. | 
 | # | 
 | # Slurm is distributed in the hope that it will be useful, but WITHOUT ANY | 
 | # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS | 
 | # FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more | 
 | # details. | 
 | # | 
 | # You should have received a copy of the GNU General Public License along | 
 | # with Slurm; if not, write to the Free Software Foundation, Inc., | 
 | # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301  USA. | 
 | ############################################################################ | 
 |  | 
 | source ./globals | 
 |  | 
 | set file_in1     "$test_dir/input1" | 
 | set file_in2     "$test_dir/input2" | 
 | set file_in3     "$test_dir/input3" | 
 | set file_out     "$test_dir/output" | 
 | set job_id_list  [list] | 
 | set matches      0 | 
 |  | 
 | proc get_first_node_of_job {job_id} { | 
 | 	global scontrol | 
 |  | 
 | 	set nodelist [get_job_param $job_id "NodeList"] | 
 | 	set hostnames [run_command_output -fail "$scontrol show hostnames $nodelist"] | 
 | 	set node [lindex $hostnames 0] | 
 |  | 
 | 	return $node | 
 | } | 
 |  | 
 | # | 
 | # Requirements | 
 | # | 
 | set nodes [get_nodes_by_request "-N2"] | 
 | set nb_nodes [llength $nodes] | 
 | if { $nb_nodes != 2 } { | 
 | 	skip "Insufficient nodes in default partition ($nodes)" | 
 | } | 
 | set gpu_cnt [get_highest_gres_count $nb_nodes "gpu"] | 
 | if {$gpu_cnt < 2} { | 
 |         skip "This test requires 2 or more GPUs on $nb_nodes nodes of the default partition" | 
 | } | 
 | if {[get_config_param "SchedulerType"] ne "sched/backfill"} { | 
 | 	skip "This test requires SchedulerType = sched/backfill" | 
 | } | 
 |  | 
 | proc cleanup {} { | 
 | 	global job_id_list | 
 |  | 
 | 	cancel_job $job_id_list | 
 | } | 
 |  | 
 | # het job | 
 | make_bash_script $file_in1 "$srun -l --mpi=none $file_in2 : $file_in3" | 
 | make_bash_script $file_in2 "$bin_echo FILE2" | 
 | make_bash_script $file_in3 "$bin_echo FILE3" | 
 |  | 
 | log_info "SUBTEST: Confirm het job srun runs within non-het job salloc" | 
 | set matches 0 | 
 | set output [run_command_output -fail "$salloc -N2 $file_in1"] | 
 | if {[regexp "Granted job allocation ($number)" $output - job_id]} { | 
 | 	lappend job_id_list $job_id | 
 | } else { | 
 | 	fail "Allocation not granted ($output)" | 
 | } | 
 | set lines_matched [regexp -all -inline "($number): FILE($number)" $output] | 
 | foreach {re step file} $lines_matched { | 
 | 	if {$step == 0 && $file == 2} { | 
 | 		incr matches | 
 | 	} elseif {$step == 1 && $file == 3} { | 
 | 		incr matches | 
 | 	} else { | 
 | 		fail "Invalid output ($re from $lines_matched)" | 
 | 	} | 
 | } | 
 | subtest {$matches == 2} "Job should be granted and output match the expected" "$matches != 2" | 
 |  | 
 | log_info "SUBTEST: Confirm het job srun runs within non-het job sbatch" | 
 | set matches 0 | 
 | set job_id [submit_job -fail "-N2 -o $file_out $file_in1"] | 
 | lappend job_id_list $job_id | 
 | wait_for_job -fail $job_id "DONE" | 
 | wait_for_file -fail $file_out | 
 | set output [run_command_output -fail "$bin_cat $file_out"] | 
 | set matches [regexp -all {FILE[23]} $output] | 
 | subtest {$matches == 2} "Job should be granted and output match the expected" "$matches != 2" | 
 |  | 
 | log_info "SUBTEST: Confirm het job srun runs on different nodes in non-het job sbatch" | 
 | set matches 0 | 
 | set job_id [submit_job -fail "-N2 -o $file_out --wrap=\"$srun $bin_printenv SLURMD_NODENAME\""] | 
 | lappend job_id_list $job_id | 
 | wait_for_job -fail $job_id "DONE" | 
 | wait_for_file -fail $file_out | 
 | set matches [run_command_output -fail "sort -u $file_out | wc -l"] | 
 | subtest {$matches == 2} "Job should be granted and output match the expected" "$matches != 2" | 
 |  | 
 | log_info "SUBTEST: Confirm het job srun with gres runs in non-het job sbatch" | 
 | if [check_config_select "cons_tres"] { | 
 | 	set matches 0 | 
 | 	set job_id [submit_job -fail "-N2 --gpus=2 -o $file_out --wrap=\"$srun --gpus=1 echo ok : --gpus=1 echo ok\""] | 
 | 	lappend job_id_list $job_id | 
 | 	wait_for_job -fail $job_id "DONE" | 
 | 	wait_for_file -fail $file_out | 
 | 	set matches [run_command_output -fail "grep ok $file_out | wc -l"] | 
 | 	subtest {$matches == 2} "Job should be granted and output match the expected" "$matches != 2" | 
 | } else { | 
 | 	subskip "Subtest requires SelectType=select/cons_tres (Job should be granted and output match the expected)" | 
 | } | 
 |  | 
 | log_info "SUBTEST: Confirm het job srun fails in het job salloc" | 
 | set matches 0 | 
 | set output [run_command_output -xfail -fail "$salloc $file_in1 : $file_in1"] | 
 | if {[regexp "Granted job allocation ($number)" $output - job_id]} { | 
 | 	lappend job_id_list $job_id | 
 | } else { | 
 | 	fail "Allocation not granted ($output)" | 
 | } | 
 | if {[regexp "srun: error: Allocation failure" $output]} { | 
 | 	incr matches | 
 | } | 
 | subtest {$matches == 1} "Job should be granted but allocation should fail" "$matches != 1" | 
 |  | 
 | log_info "SUBTEST: Confirm het job srun fails when trying to overlap nodes" | 
 | set matches 0 | 
 | set timeout $max_job_delay | 
 | spawn $salloc -N2 $bin_bash | 
 | expect { | 
 | 	-re "Granted job allocation ($number)" { | 
 | 		set job_id $expect_out(1,string) | 
 | 		lappend job_id_list $job_id | 
 | 		reset_bash_prompt | 
 | 		exp_continue | 
 | 	} | 
 | 	-re "$test_prompt" { | 
 | 		# job started | 
 | 	} | 
 | 	timeout { | 
 | 		fail "salloc not responding" | 
 | 	} | 
 | } | 
 |  | 
 | set node [get_first_node_of_job $job_id] | 
 | log_info "node=$node for job $job_id" | 
 |  | 
 | send "$srun -w $node : -w $node $bin_true\r" | 
 | expect { | 
 | 	-re "$test_prompt" { | 
 | 		#break | 
 | 	} | 
 | 	-re "overlaps with excluded $node" { | 
 | 		incr matches | 
 | 	} | 
 | 	timeout { | 
 | 		fail "srun not responding" | 
 | 	} | 
 | } | 
 | send "exit\r" | 
 | subtest {$matches == 1} "srun should return overlaps with excluded" "$matches != 1" |