blob: 444551778ac8a599d595b1c38741ecfc0286d363 [file] [log] [blame]
#!/usr/bin/env expect
############################################################################
# Purpose: Test gres.conf-specified and system-detected GPU device merging
# logic
############################################################################
# Copyright (C) SchedMD LLC.
#
# This file is part of Slurm, a resource management program.
# For details, see <https://slurm.schedmd.com/>.
# Please also read the included file: DISCLAIMER.
#
# Slurm is free software; you can redistribute it and/or modify it under
# the terms of the GNU General Public License as published by the Free
# Software Foundation; either version 2 of the License, or (at your option)
# any later version.
#
# Slurm is distributed in the hope that it will be useful, but WITHOUT ANY
# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
# details.
#
# You should have received a copy of the GNU General Public License along
# with Slurm; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
############################################################################
source ./globals
set test_prog "$test_name.prog"
set nodename_base "tux"
set nodename "${nodename_base}0"
set nodename_diff "${nodename_base}1"
set test_ulong_prog "$test_name.ulong"
set file_in "$test_dir/test_script"
set file_out "$test_dir/output"
# Assume unsigned long is 64 bits unless overwritten
set ulong_bits 64
set dup_err "error: gpu duplicate device file name"
set cpu_set_err "fatal: _set_cpu_set_bitstr: cpu_set_bitstr is empty"
set mismatch_err "error: This GPU specified in \\\[slurm\\\|gres\\\].conf has mismatching Cores or Links"
set no_name_err "error: Invalid GRES data, no type name"
set parse_err "error: Parse error in file"
set links_err "error: gres.conf: Ignoring invalid Link"
set conf_mismatch_err "warning: A line in gres.conf for GRES $re_word_str has $number more configured than expected in slurm.conf"
set slurm_conf_type_err "GRES in slurm.conf have a type while others do not"
set gres_conf_type_err "fatal: gres.conf for $re_word_str, some records have \"Type=\" specification while others do not"
set gres_conf_file_err "fatal: gres.conf for $re_word_str, some records have \"File\" specification while others do not"
set flags_mismatch_err "fatal: Invalid GRES record name=${re_word_str} type=${re_word_str}: Flags (${re_word_str}) does not match env flags for previous GRES of same node and name"
set flags_no_gpu_err "fatal: Invalid GRES record name=${re_word_str} type=${re_word_str}: Flags (${re_word_str}) contains \"no_gpu_env\", which must be mutually exclusive to all other GRES env flags of same node and name"
set key_parse_err "error: _parse_next_key: Parsing error at unrecognized key"
set flags_default "HAS_FILE,ENV_NVML,ENV_RSMI,ENV_ONEAPI,ENV_OPENCL,ENV_DEFAULT"
set flags_default_type "HAS_FILE,HAS_TYPE,ENV_NVML,ENV_RSMI,ENV_ONEAPI,ENV_OPENCL,ENV_DEFAULT"
set flags_default_type_shared "HAS_FILE,HAS_TYPE,ENV_NVML,ENV_RSMI,ENV_ONEAPI,ENV_OPENCL,ENV_DEFAULT,SHARED,ONE_SHARING"
set flags_type "HAS_TYPE"
set flags_file "HAS_FILE"
set flags_file_type "HAS_FILE,HAS_TYPE"
set flags_file_type_shared "HAS_FILE,HAS_TYPE,SHARED,ONE_SHARING"
set debug_mode 0
if {[get_config_param "SwitchType"] eq "switch/cray"} {
# Many Cray-specific changes required in slurm.conf test files
skip "This test can not run on a Cray system"
}
#
## testproc configuration
#
# Reduce verbosity
set testsuite_testproc_log_calls no
proc cleanup {} {
global test_prog test_ulong_prog
# Delete generated files
file delete $test_prog
file delete $test_ulong_prog
}
# Run the test under valgrind if any arguments are given
set use_valgrind 0
# log_level_t values in log.h
# NOTE: The test requires output from log level INFO, so can't go any lower
set SLURM_LOG_LEVEL_INFO 3
set SLURM_LOG_LEVEL_VERBOSE 4
set SLURM_LOG_LEVEL_DEBUG 5
set SLURM_LOG_LEVEL_DEBUG2 6
set SLURM_LOG_LEVEL_DEBUG3 7
set debug_level $SLURM_LOG_LEVEL_INFO
if {[lindex $argv 0] == "v" || [lindex $argv 0] == "valgrind"} {
set use_valgrind 1
log_info "Running tests under valgrind!"
} elseif {[lindex $argv 0] == "i" || [lindex $argv 0] == "info"} {
set debug_mode 1
log_info "Running tests under debug mode with log level INFO"
set debug_level $SLURM_LOG_LEVEL_INFO
} elseif {[lindex $argv 0] == "verbose"} {
set debug_mode 1
log_info "Running tests under debug mode with log level VERBOSE"
set debug_level $SLURM_LOG_LEVEL_VERBOSE
} elseif {[lindex $argv 0] == "d" || [lindex $argv 0] == "d1" || [lindex $argv 0] == "debug"} {
set debug_mode 1
log_info "Running tests under debug mode with log level DEBUG"
set debug_level $SLURM_LOG_LEVEL_DEBUG
} elseif {[lindex $argv 0] == "d2" || [lindex $argv 0] == "debug2"} {
set debug_mode 1
log_info "Running tests under debug mode with log level DEBUG2"
set debug_level $SLURM_LOG_LEVEL_DEBUG2
} elseif {[lindex $argv 0] == "d3" || [lindex $argv 0] == "debug3"} {
set debug_mode 1
log_info "Running tests under debug mode with log level DEBUG3"
set debug_level $SLURM_LOG_LEVEL_DEBUG3
}
proc touch_file {file} {
exec touch $file
}
proc generate_file {text filepath} {
exec echo $text > $filepath
}
# Create or overwrite the slurm.conf file used by the test runner program
proc create_slurm_conf {} {
global test_name test_dir
# Contain slurm.conf data to this file
# This is all we need to trigger loading the GRES GPU plugin
set slurm_conf "
# This file was autogenerated by $test_name
ControlMachine=test_machine
ClusterName=test_cluster
GresTypes=gpu,mps,nic,mic,tmpdisk
"
# NOTE: There are no node definitions - node name and node gres info
# are passed into the test program directly, not through this slurm.conf
generate_file $slurm_conf $test_dir/slurm.conf
}
create_slurm_conf
# Set up dummy device files for testing. They just need to exist
set dev "$test_dir/nvidia"
set dev0 "${dev}0"
set dev1 "${dev}1"
set dev2 "${dev}2"
set dev3 "${dev}3"
set dev4 "${dev}4"
set dev5 "${dev}5"
set dev6 "${dev}6"
set dev7 "${dev}7"
set dev8 "${dev}8"
touch_file $dev0
touch_file $dev1
touch_file $dev2
touch_file $dev3
touch_file $dev4
touch_file $dev5
touch_file $dev6
touch_file $dev7
touch_file $dev8
# Build test program
if {$use_valgrind} {
set rc [compile_against_libslurm -full $test_prog "-DUSING_VALGRIND"]
} else {
set rc [compile_against_libslurm -full $test_prog]
}
if {$rc} {
fail "Cannot compile test program"
}
# Build program to determine size of unsigned long in C
set build_cmd "$bin_cc ${test_ulong_prog}.c -o ${test_ulong_prog}"
log_debug "Build command: $build_cmd"
eval exec $build_cmd
set output [run_command_output -fail -nolog "./$test_ulong_prog"]
if {$output != $ulong_bits} {
log_warn "We are running on a non-64-bit architecture, where sizeof(unsigned long) * 8 = $output bits"
set ulong_bits $output
}
#
# Tests if $test_prog actual output matches expected output
#
# test_minor - The minor test number (39.18.X).
# slurm_conf_gres - This is the `Gres=` value that would be in
# slurm.conf for this node (it's easier to pass it into
# the program directly than to regenerate a slurm.conf
# file with different node definitions).
# gres_conf - The gres.conf to use.
# fake_gpus_conf - The fake_gpus.conf to use. This file tells Slurm to
# pretend to have devices detected by system, in effect
# emulating the result of AutoDetect=nvml.
# output_expected - The expected "GRES_PARSABLE" output.
# err_msgs (optional) - The error message regex to look for. Use | for multiple.
# If using with $output_expected, it will only reliably
# match if $err_msgs appear BEFORE lines with
# GRES_PARSABLE.
# errs_expected (optional) - The number of times you expect to see $err_msgs in
# the output. Defaults to 0. If this number isn't hit,
# then the test will fail.
#
proc test_cfg {test_minor slurm_conf_gres gres_conf fake_gpus_conf output_expected {err_msgs ""} {errs_expected 0} } {
global test_dir nodename test_prog re_word_str
global test_name use_valgrind file_in file_out
global bin_cat debug_mode debug_level
log_info "====$test_name.$test_minor===="
generate_file $gres_conf $test_dir/gres.conf
generate_file $fake_gpus_conf $test_dir/fake_gpus.conf
if {$use_valgrind} {
make_bash_script $file_in "
valgrind --tool=memcheck --error-limit=no --leak-check=full --show-reachable=yes --max-stackframe=16777216 --num-callers=20 --child-silent-after-fork=yes --track-origins=yes ./$test_prog \"$test_dir\" \"$nodename\" \"$slurm_conf_gres\" $debug_level > $file_out 2>&1
$bin_cat $file_out
rm -rf $file_out"
} else {
make_bash_script $file_in "
./$test_prog \"$test_dir\" \"$nodename\" \"$slurm_conf_gres\" $debug_level > $file_out 2>&1
$bin_cat $file_out
rm -rf $file_out"
}
##
### Parse the `GRES_PARSABLE` output
##
# The order of GPUs is important because it directly corresponds to the
# bits of the GRES bitmaps used to track the GPUs, and this shouldn't
# change unless we deliberately change it.
#
# Also, we will eventually want to sort GPUs by PCI bus ID if AutoDetect
# is used, so we need to test the order to make sure that works. We may
# also want to guarantee that the GPU order in gres.conf is preserved if
# AutoDetect is NOT used, so users can manually specify the PCI bus
# order.
if {$debug_mode} {
set output [run_command_output -fail "$file_in"]
} else {
set output [run_command_output -fail -nolog "$file_in"]
}
set actual [regexp -all -inline "GRES_PARSABLE$re_word_str" $output]
# Check for expected errors
if {$err_msgs != ""} {
set errs_actual [regexp -all "$err_msgs" $output]
} else {
set errs_actual 0
}
set expected [regexp -all -inline "GRES_PARSABLE$re_word_str" $output_expected]
file delete $file_in
# Check the output with what is expected
log_debug "Actual:"
foreach {value} $actual {
log_debug "$value"
}
if {![subtest {$actual == $expected} "Expected output does not match actual output"]} {
log_debug "Expected:"
foreach {value} $expected {
log_debug "$value"
}
}
# Check for expected errors, if applicable
if {$err_msgs != ""} {
subtest {$errs_actual == $errs_expected} "Verify expected errors" "$errs_actual != $errs_expected (regexp='$err_msgs')"
}
}
################################################################################
# NOTES:
################################################################################
# fake_gpus.conf is of the following format, with each line representing one
# GPU device:
#
# type|cpu_count|cpu_range|links|device_files[|unique_id]
#
# Optionally, cpu_range allows special keywords starting in `~` to aid in
# testing the conversion from an NVML cpu affinity array to Slurm bitstring.
#
# The GRES_PARSABLE output is of the following format:
#
# GRES_PARSABLE[name](device_count):type|cpu_count|cpu_range|links|device_files[|unique_id]
#
# To better decipher the tests, temporarily increase the log verbosity in
# test39.18.prog.c. E.g.:
#
# opts.stderr_level = SLURM_LOG_LEVEL_DEBUG2;
#
# Note that there is a known issue with Expect where the regex parsing fails
# when the output is somewhat large. So to avoid random errors, each test output
# should be kept concise.
#
################################################################################
# ##############################################################################
# ------------------------------------------------------------------------------
# ------------------------------------------------------------------------------
# ##############################################################################
# ##############################################################################
# # Test a2 - Type-less gpu specification in slurm.conf and empty gres.conf
# # NOTE the device numbers being out of order
# ##############################################################################
set slurm_conf_gres "gpu:4"
set gres_conf ""
set fake_gpus_conf "
# This file was autogenerated by $test_name
tesla|4|0-1|(null)|${dev}1
tesla|4|2-3|(null)|${dev}3
tesla|4|2-3|(null)|${dev}4
tesla|4|0-1|(null)|${dev}2
"
set expected_output "
GRES_PARSABLE\[gpu\](1):(null)|4|0-1|(null)|${dev}1|$flags_file
GRES_PARSABLE\[gpu\](1):(null)|4|0-1|(null)|${dev}2|$flags_file
GRES_PARSABLE\[gpu\](1):(null)|4|2-3|(null)|${dev}3|$flags_file
GRES_PARSABLE\[gpu\](1):(null)|4|2-3|(null)|${dev}4|$flags_file
"
testproc_alias "a2" test_cfg "a2" $slurm_conf_gres $gres_conf $fake_gpus_conf $expected_output
# ##############################################################################
# # Test a4 - Test empty and null identifiers in fake_gpus.conf
# ##############################################################################
set slurm_conf_gres "gpu:8"
set gres_conf ""
set fake_gpus_conf "
# This file was autogenerated by $test_name
tesla|4|(null)|(null)|(null)
tesla|4|(null)|(null)|${dev}1
tesla|4|(null)|-1,0|(null)
tesla|4|(null)|-1,0|${dev}3
tesla|4|0-1|(null)|(null)
tesla|4|0-1|(null)|${dev}2
tesla|4|0-1|-1,0|(null)
tesla|4|0-1|-1,0|${dev}4
"
set expected_output "
GRES_PARSABLE\[gpu\](1):(null)|4|(null)|-1,0|(null)|
GRES_PARSABLE\[gpu\](1):(null)|4|0-1|-1,0|(null)|
GRES_PARSABLE\[gpu\](1):(null)|4|(null)|(null)|(null)|
GRES_PARSABLE\[gpu\](1):(null)|4|0-1|(null)|(null)|
GRES_PARSABLE\[gpu\](1):(null)|4|(null)|-1,0|${dev}3|$flags_file
GRES_PARSABLE\[gpu\](1):(null)|4|0-1|-1,0|${dev}4|$flags_file
GRES_PARSABLE\[gpu\](1):(null)|4|(null)|(null)|${dev}1|$flags_file
GRES_PARSABLE\[gpu\](1):(null)|4|0-1|(null)|${dev}2|$flags_file
"
testproc_alias "a4" test_cfg "a4" $slurm_conf_gres $gres_conf $fake_gpus_conf $expected_output
# ##############################################################################
# The following tests are NVML-specific.
# They will only execute if HAVE_NVML = 1 in config.h.
# In fake_gpus_conf, a CPU range starting with `~` will trigger the GPU plugin's
# gpu_p_test_cpu_conv(). In order for this to exercise gpu/nvml-specific code,
# explicitly specify `AutoDetect=nvml` in the ad-hoc gres.conf. If not,
# gpu/generic will be used and CPU ranges will be set to null, failing the tests
# ##############################################################################
if [have_nvml] {
# ##############################################################################
# # Test a6 - Test conversion of NVML-style cpu affinity array to Slurm bitstr
# Convert device CPU affinity of 20 ulongs with all bits set
# ##############################################################################
set arr_len 20
set cpu_array [string repeat "X" $arr_len]
set cpus_count [expr $arr_len*$ulong_bits]
set cpus_count_m1 [expr $cpus_count-1]
set slurm_conf_gres "gpu:1"
set gres_conf "AutoDetect=nvml"
set fake_gpus_conf "# This file was autogenerated by $test_name
tesla|$cpus_count|~$cpu_array|(null)|${dev}1
"
set expected_output "
GRES_PARSABLE\[gpu\](1):(null)|$cpus_count|0-$cpus_count_m1|(null)|${dev}1|$flags_file
"
testproc_alias "a6" test_cfg "a6" $slurm_conf_gres $gres_conf $fake_gpus_conf $expected_output
# ##############################################################################
# # Test a8 - Test conversion of NVML-style cpu affinity array to Slurm bitstr
# Convert device CPU affinity of 1 ulong with all bits set
# ##############################################################################
set arr_len 1
set cpu_array [string repeat "X" $arr_len]
set cpus_count [expr $arr_len*$ulong_bits]
set cpus_count_m1 [expr $cpus_count-1]
set slurm_conf_gres "gpu:1"
set gres_conf "AutoDetect=nvml"
set fake_gpus_conf "# This file was autogenerated by $test_name
tesla|$cpus_count|~$cpu_array|(null)|${dev}1
"
set expected_output "
GRES_PARSABLE\[gpu\](1):(null)|$cpus_count|0-$cpus_count_m1|(null)|${dev}1|$flags_file
"
testproc_alias "a8" test_cfg "a8" $slurm_conf_gres $gres_conf $fake_gpus_conf $expected_output
# ##############################################################################
# # Test a10 - Test conversion of NVML-style cpu affinity array to Slurm bitstr
# Convert device CPU affinity of 3 ulongs with all bits set
# ##############################################################################
set arr_len 3
set cpu_array [string repeat "X" $arr_len]
set cpus_count [expr $arr_len*$ulong_bits]
set cpus_count_m1 [expr $cpus_count-1]
set slurm_conf_gres "gpu:1"
set gres_conf "AutoDetect=nvml"
set fake_gpus_conf "# This file was autogenerated by $test_name
tesla|$cpus_count|~$cpu_array|(null)|${dev}1
"
set expected_output "
GRES_PARSABLE\[gpu\](1):(null)|$cpus_count|0-$cpus_count_m1|(null)|${dev}1|$flags_file
"
testproc_alias "a10" test_cfg "a10" $slurm_conf_gres $gres_conf $fake_gpus_conf $expected_output
# ##############################################################################
# # Test a12 - Test conversion of NVML-style cpu affinity array to Slurm bitstr
# Convert device CPU affinity for max # of CPUs supported (2^15)
# ##############################################################################
set max_cpus [expr pow(2, 15)]
set arr_len [expr int($max_cpus/$ulong_bits)]
set cpus_count [expr $arr_len*$ulong_bits]
set cpus_count_m1 [expr $cpus_count-1]
set slurm_conf_gres "gpu:1"
set gres_conf "AutoDetect=nvml"
set fake_gpus_conf "# This file was autogenerated by $test_name
tesla|$cpus_count|~max|(null)|${dev}1
"
set expected_output "
GRES_PARSABLE\[gpu\](1):(null)|$cpus_count|0-$cpus_count_m1|(null)|${dev}1|$flags_file
"
testproc_alias "a12" test_cfg "a12" $slurm_conf_gres $gres_conf $fake_gpus_conf $expected_output
# ##############################################################################
# # Test a14 - Test conversion of NVML-style CPU affinity array to Slurm bitstr
# Convert device CPU affinity for all zeros (expect this to fatal)
# ##############################################################################
set slurm_conf_gres "gpu:1"
set gres_conf "AutoDetect=nvml"
set fake_gpus_conf "
# This file was autogenerated by $test_name
tesla|0|~zero|(null)|${dev}1
"
set expected_output ""
set err_msgs $cpu_set_err
set expected_errs 1
testproc_alias "a14" test_cfg "a14" $slurm_conf_gres $gres_conf $fake_gpus_conf $expected_output $err_msgs $expected_errs
# ##############################################################################
# # Test a16 - Test conversion of NVML-style cpu affinity array to Slurm bitstr
# Convert device CPU affinity for CPU 0 set out of 16 total CPUs
# ##############################################################################
set slurm_conf_gres "gpu:1"
set gres_conf "AutoDetect=nvml"
set fake_gpus_conf "
# This file was autogenerated by $test_name
tesla|16|~one|(null)|${dev}1
"
set expected_output "
GRES_PARSABLE\[gpu\](1):(null)|16|0|(null)|${dev}1|$flags_file
"
testproc_alias "a16" test_cfg "a16" $slurm_conf_gres $gres_conf $fake_gpus_conf $expected_output
# ##############################################################################
# # Test a18 - Test conversion of NVML-style cpu affinity array to Slurm bitstr
# Convert device CPU affinity for CPUs 0-1 set out of 4 total CPUs
# ##############################################################################
set slurm_conf_gres "gpu:1"
set gres_conf "AutoDetect=nvml"
set fake_gpus_conf "
# This file was autogenerated by $test_name
tesla|4|~three|(null)|${dev}1
"
set expected_output "
GRES_PARSABLE\[gpu\](1):(null)|4|0-1|(null)|${dev}1|$flags_file
"
testproc_alias "a18" test_cfg "a18" $slurm_conf_gres $gres_conf $fake_gpus_conf $expected_output
# ##############################################################################
# # Test a20 - Test conversion of NVML-style cpu affinity array to Slurm bitstr
# Convert device CPU affinity for CPUs 8-15 set out of 16 CPUs
# ##############################################################################
set slurm_conf_gres "gpu:1"
set gres_conf "AutoDetect=nvml"
set fake_gpus_conf "
# This file was autogenerated by $test_name
tesla|16|~half|(null)|${dev}1
"
set expected_output "
GRES_PARSABLE\[gpu\](1):(null)|16|8-15|(null)|${dev}1|$flags_file
"
testproc_alias "a20" test_cfg "a20" $slurm_conf_gres $gres_conf $fake_gpus_conf $expected_output
# ##############################################################################
# End NVML-specific tests
# ##############################################################################
} else {
log_warn "====Omitting NVML-specific tests===="
}
# ##############################################################################
# # Test a22 - Test that when no GRES type is specified in slurm.conf,
# the GRES type is NOT set to what the system detects, but rather
# is set to NULL.
# ##############################################################################
set slurm_conf_gres "gpu:4"
set gres_conf ""
set fake_gpus_conf "
# This file was autogenerated by $test_name
tesla_v100-pcie-16gb|4|0-1|(null)|${dev}1
tesla_v100-pcie-16gb|4|2-3|(null)|${dev}3
tesla_v100-pcie-16gb|4|2-3|(null)|${dev}4
tesla_v100-pcie-16gb|4|0-1|(null)|${dev}2
"
set expected_output "
GRES_PARSABLE\[gpu\](1):(null)|4|0-1|(null)|${dev}1|$flags_file
GRES_PARSABLE\[gpu\](1):(null)|4|0-1|(null)|${dev}2|$flags_file
GRES_PARSABLE\[gpu\](1):(null)|4|2-3|(null)|${dev}3|$flags_file
GRES_PARSABLE\[gpu\](1):(null)|4|2-3|(null)|${dev}4|$flags_file
"
testproc_alias "a22" test_cfg "a22" $slurm_conf_gres $gres_conf $fake_gpus_conf $expected_output
# ##############################################################################
# # Test a24 - Test that the GRES type specified in slurm.conf overrides the
# GRES type specified in the system, if it's a substring.
# ##############################################################################
set slurm_conf_gres "gpu:v100:4"
set gres_conf ""
set fake_gpus_conf "
# This file was autogenerated by $test_name
tesla_v100-pcie-16gb|4|0-1|(null)|${dev}1
tesla_v100-pcie-16gb|4|2-3|(null)|${dev}3
tesla_v100-pcie-16gb|4|2-3|(null)|${dev}4
tesla_v100-pcie-16gb|4|0-1|(null)|${dev}2
"
set expected_output "
GRES_PARSABLE\[gpu\](1):v100|4|0-1|(null)|${dev}1|$flags_file_type
GRES_PARSABLE\[gpu\](1):v100|4|0-1|(null)|${dev}2|$flags_file_type
GRES_PARSABLE\[gpu\](1):v100|4|2-3|(null)|${dev}3|$flags_file_type
GRES_PARSABLE\[gpu\](1):v100|4|2-3|(null)|${dev}4|$flags_file_type
"
testproc_alias "a24" test_cfg "a24" $slurm_conf_gres $gres_conf $fake_gpus_conf $expected_output
# ##############################################################################
# # Test a26 - Test that the GRES type specified in slurm.conf doesn't need
# to match the case of the system-detected GRES type, and that it
# can also match the first part.
# ##############################################################################
set slurm_conf_gres "gpu:TESLA_V100:4"
set gres_conf ""
set fake_gpus_conf "
# This file was autogenerated by $test_name
tesla_v100-pcie-16gb|4|0-1|(null)|${dev}1
tesla_v100-pcie-16gb|4|2-3|(null)|${dev}3
tesla_v100-pcie-16gb|4|2-3|(null)|${dev}4
tesla_v100-pcie-16gb|4|0-1|(null)|${dev}2
"
set expected_output "
GRES_PARSABLE\[gpu\](1):TESLA_V100|4|0-1|(null)|${dev}1|$flags_file_type
GRES_PARSABLE\[gpu\](1):TESLA_V100|4|0-1|(null)|${dev}2|$flags_file_type
GRES_PARSABLE\[gpu\](1):TESLA_V100|4|2-3|(null)|${dev}3|$flags_file_type
GRES_PARSABLE\[gpu\](1):TESLA_V100|4|2-3|(null)|${dev}4|$flags_file_type
"
testproc_alias "a26" test_cfg "a26" $slurm_conf_gres $gres_conf $fake_gpus_conf $expected_output
# ##############################################################################
# # Test a28 - Test that the GRES type specified in slurm.conf can match the
# last part of the system-detected GRES type
# ##############################################################################
set slurm_conf_gres "gpu:pcie-16gb:4"
set gres_conf ""
set fake_gpus_conf "
# This file was autogenerated by $test_name
tesla_v100-pcie-16gb|4|0-1|(null)|${dev}1
tesla_v100-pcie-16gb|4|2-3|(null)|${dev}3
tesla_v100-pcie-16gb|4|2-3|(null)|${dev}4
tesla_v100-pcie-16gb|4|0-1|(null)|${dev}2
"
set expected_output "
GRES_PARSABLE\[gpu\](1):pcie-16gb|4|0-1|(null)|${dev}1|$flags_file_type
GRES_PARSABLE\[gpu\](1):pcie-16gb|4|0-1|(null)|${dev}2|$flags_file_type
GRES_PARSABLE\[gpu\](1):pcie-16gb|4|2-3|(null)|${dev}3|$flags_file_type
GRES_PARSABLE\[gpu\](1):pcie-16gb|4|2-3|(null)|${dev}4|$flags_file_type
"
testproc_alias "a28" test_cfg "a28" $slurm_conf_gres $gres_conf $fake_gpus_conf $expected_output
# ##############################################################################
# # Test a30 - Test that the GRES type specified in slurm.conf can match the
# full system-detected GRES type
# ##############################################################################
set slurm_conf_gres "gpu:tesla_v100-pcie-16gb:4"
set gres_conf ""
set fake_gpus_conf "
# This file was autogenerated by $test_name
tesla_v100-pcie-16gb|4|0-1|(null)|${dev}1
tesla_v100-pcie-16gb|4|2-3|(null)|${dev}3
tesla_v100-pcie-16gb|4|2-3|(null)|${dev}4
tesla_v100-pcie-16gb|4|0-1|(null)|${dev}2
"
set expected_output "
GRES_PARSABLE\[gpu\](1):tesla_v100-pcie-16gb|4|0-1|(null)|${dev}1|$flags_file_type
GRES_PARSABLE\[gpu\](1):tesla_v100-pcie-16gb|4|0-1|(null)|${dev}2|$flags_file_type
GRES_PARSABLE\[gpu\](1):tesla_v100-pcie-16gb|4|2-3|(null)|${dev}3|$flags_file_type
GRES_PARSABLE\[gpu\](1):tesla_v100-pcie-16gb|4|2-3|(null)|${dev}4|$flags_file_type
"
testproc_alias "a30" test_cfg "a30" $slurm_conf_gres $gres_conf $fake_gpus_conf $expected_output
# ##############################################################################
# # Test a32 - Test that GRES types work as expected when one GRES type is a
# substring of another GRES type (e.g. k20m + k20m1). See bug 7345
# ##############################################################################
set slurm_conf_gres "gpu:k20m:1,gpu:k20m1:1,gpu:v100:1"
set gres_conf ""
set fake_gpus_conf "
# This file was autogenerated by $test_name
tesla_k20m1|4|0-1|(null)|${dev}1
tesla_k20m|4|0-1|(null)|${dev}2
tesla_v100-sxm2-32gb|4|0-1|(null)|${dev}3
"
set expected_output "
GRES_PARSABLE\[gpu\](1):k20m1|4|0-1|(null)|${dev}1|$flags_file_type
GRES_PARSABLE\[gpu\](1):k20m|4|0-1|(null)|${dev}2|$flags_file_type
GRES_PARSABLE\[gpu\](1):v100|4|0-1|(null)|${dev}3|$flags_file_type
"
testproc_alias "a32" test_cfg "a32" $slurm_conf_gres $gres_conf $fake_gpus_conf $expected_output
# ##############################################################################
# # Test a34 - Test that only the first system device that matches a conf gres
# is used, and that the gres type is truncated.
# ##############################################################################
set slurm_conf_gres "gpu:p100:1"
set gres_conf ""
set fake_gpus_conf "
# This file was autogenerated by $test_name
nvidia-p100|4|0-1|(null)|${dev}2
nvidia-p100|4|0-1|(null)|${dev}6
nvidia-p100|4|0-1|(null)|${dev}3
nvidia-p100|4|0-1|(null)|${dev}4
nvidia-p100|4|0-1|(null)|${dev}1
nvidia-p100|4|0-1|(null)|${dev}5
"
set expected_output "
GRES_PARSABLE\[gpu\](1):p100|4|0-1|(null)|${dev}1|$flags_file_type
"
testproc_alias "a34" test_cfg "a34" $slurm_conf_gres $gres_conf $fake_gpus_conf $expected_output
# ##############################################################################
# # Test a36 - Test that the shorter GRES types don't match against the longer
# GRES types.
# ##############################################################################
set slurm_conf_gres "gpu:aaaa:1,gpu:a:1,gpu:aa:1,gpu:aaa:1"
set gres_conf ""
set fake_gpus_conf "
# This file was autogenerated by $test_name
aa|4|0-1|(null)|${dev}2
aaa|4|0-1|(null)|${dev}1
a|4|0-1|(null)|${dev}3
aaaa|4|0-1|(null)|${dev}4
"
set expected_output "
GRES_PARSABLE\[gpu\](1):aaa|4|0-1|(null)|${dev}1|$flags_file_type
GRES_PARSABLE\[gpu\](1):aa|4|0-1|(null)|${dev}2|$flags_file_type
GRES_PARSABLE\[gpu\](1):a|4|0-1|(null)|${dev}3|$flags_file_type
GRES_PARSABLE\[gpu\](1):aaaa|4|0-1|(null)|${dev}4|$flags_file_type
"
testproc_alias "a36" test_cfg "a36" $slurm_conf_gres $gres_conf $fake_gpus_conf $expected_output
# ##############################################################################
# # Test a38 - Test that the shorter GRES types don't match against the longer
# GRES types prematurely.
# ##############################################################################
set slurm_conf_gres "gpu:k20:2,gpu:k20m:4"
set gres_conf ""
set fake_gpus_conf "
# This file was autogenerated by $test_name
k20|4|0-1|(null)|${dev}2
k20m|4|0-1|(null)|${dev}1
k20|4|0-1|(null)|${dev}3
k20m|4|0-1|(null)|${dev}4
k20|4|0-1|(null)|${dev}5
k20m|4|0-1|(null)|${dev}7
k20|4|0-1|(null)|${dev}6
k20m|4|0-1|(null)|${dev}8
"
set expected_output "
GRES_PARSABLE\[gpu\](1):k20m|4|0-1|(null)|${dev}1|$flags_file_type
GRES_PARSABLE\[gpu\](1):k20|4|0-1|(null)|${dev}2|$flags_file_type
GRES_PARSABLE\[gpu\](1):k20|4|0-1|(null)|${dev}3|$flags_file_type
GRES_PARSABLE\[gpu\](1):k20m|4|0-1|(null)|${dev}4|$flags_file_type
GRES_PARSABLE\[gpu\](1):k20m|4|0-1|(null)|${dev}7|$flags_file_type
GRES_PARSABLE\[gpu\](1):k20m|4|0-1|(null)|${dev}8|$flags_file_type
"
testproc_alias "a38" test_cfg "a38" $slurm_conf_gres $gres_conf $fake_gpus_conf $expected_output
# ##############################################################################
# # Test a39 - Show that shorter types will match against longer types if the
# longer types aren't used up, and that the longer type will be
# truncated accordingly.
# ##############################################################################
set slurm_conf_gres "gpu:k20:2,gpu:k20m:2"
set gres_conf ""
set fake_gpus_conf "
# This file was autogenerated by $test_name
k20m|4|0-1|(null)|${dev}1
k20m|4|0-1|(null)|${dev}2
k20m|4|0-1|(null)|${dev}3
k20|4|0-1|(null)|${dev}4
k20|4|0-1|(null)|${dev}5
"
set expected_output "
GRES_PARSABLE\[gpu\](1):k20m|4|0-1|(null)|${dev}1|$flags_file_type
GRES_PARSABLE\[gpu\](1):k20m|4|0-1|(null)|${dev}2|$flags_file_type
GRES_PARSABLE\[gpu\](1):k20|4|0-1|(null)|${dev}3|$flags_file_type
GRES_PARSABLE\[gpu\](1):k20|4|0-1|(null)|${dev}4|$flags_file_type
"
testproc_alias "a39" test_cfg "a39" $slurm_conf_gres $gres_conf $fake_gpus_conf $expected_output
# ##############################################################################
# # Test a40 - Test GRES types that are the same length
# Note: In a real system, the controller would set the node to
# drain, since the node reports one less aaa GPU than the
# controller is expecting.
# ##############################################################################
set slurm_conf_gres "gpu:aaa:3,gpu:bbb:2,gpu:ccc:1"
set gres_conf ""
set fake_gpus_conf "
# This file was autogenerated by $test_name
test-bbb-test|4|0-1|(null)|${dev}3
test-aaa-test|4|0-1|(null)|${dev}2
test-ccc-test|4|0-1|(null)|${dev}5
test-aaa-test|4|0-1|(null)|${dev}1
test-bbb-test|4|0-1|(null)|${dev}4
test-ccc-test|4|0-1|(null)|${dev}6
"
set expected_output "
GRES_PARSABLE\[gpu\](1):aaa|4|0-1|(null)|${dev}1|$flags_file_type
GRES_PARSABLE\[gpu\](1):aaa|4|0-1|(null)|${dev}2|$flags_file_type
GRES_PARSABLE\[gpu\](1):bbb|4|0-1|(null)|${dev}3|$flags_file_type
GRES_PARSABLE\[gpu\](1):bbb|4|0-1|(null)|${dev}4|$flags_file_type
GRES_PARSABLE\[gpu\](1):ccc|4|0-1|(null)|${dev}5|$flags_file_type
"
testproc_alias "a40" test_cfg "a40" $slurm_conf_gres $gres_conf $fake_gpus_conf $expected_output
# ##############################################################################
# # Test a42 - Test multiple identical GRES type specifications
# ##############################################################################
set slurm_conf_gres "gpu:aaa:1,gpu:aaa:1,gpu:aaa:1"
set gres_conf ""
set fake_gpus_conf "
# This file was autogenerated by $test_name
test-aaa-test|4|0-1|(null)|${dev}4
test-aaa-test|4|0-1|(null)|${dev}3
test-aaa-test|4|0-1|(null)|${dev}2
test-aaa-test|4|0-1|(null)|${dev}1
"
set expected_output "
GRES_PARSABLE\[gpu\](1):aaa|4|0-1|(null)|${dev}1|$flags_file_type
GRES_PARSABLE\[gpu\](1):aaa|4|0-1|(null)|${dev}2|$flags_file_type
GRES_PARSABLE\[gpu\](1):aaa|4|0-1|(null)|${dev}3|$flags_file_type
"
testproc_alias "a42" test_cfg "a42" $slurm_conf_gres $gres_conf $fake_gpus_conf $expected_output
# ##############################################################################
# Test a52 - Test slurm.conf, gres.conf, and detected device interaction
#
# The node returns a list of only 3 GPUs, not 4 (since gpu:special is not
# detected on the system). So the controller should set the node to drain.
# ##############################################################################
set slurm_conf_gres "gpu:tesla:3,gpu:special:1"
set gres_conf ""
set fake_gpus_conf "
# This file was autogenerated by $test_name
tesla|4|0-1|-1|${dev}0
tesla|4|0-1|-1|${dev}1
tesla|4|0-1|-1|${dev}2
"
set expected_output "
GRES_PARSABLE\[gpu\](1):tesla|4|0-1|-1|${dev}0|$flags_file_type
GRES_PARSABLE\[gpu\](1):tesla|4|0-1|-1|${dev}1|$flags_file_type
GRES_PARSABLE\[gpu\](1):tesla|4|0-1|-1|${dev}2|$flags_file_type
"
testproc_alias "a52" test_cfg "a52" $slurm_conf_gres $gres_conf $fake_gpus_conf $expected_output
# ##############################################################################
# Test a53 - Test slurm.conf, gres.conf, and detected device interaction
#
# gpu:special is added on via gres.conf. The final GPU list count matches what
# is expected in slurm.conf. No errors.
# ##############################################################################
set slurm_conf_gres "gpu:tesla:3,gpu:special:1"
set gres_conf "
# This file was autogenerated by $test_name
Name=gpu Type=special File=${dev}5
"
set fake_gpus_conf "
# This file was autogenerated by $test_name
tesla|4|0-1|-1|${dev}0
tesla|4|0-1|-1|${dev}1
tesla|4|0-1|-1|${dev}2
"
set expected_output "
GRES_PARSABLE\[gpu\](1):tesla|4|0-1|-1|${dev}0|$flags_file_type
GRES_PARSABLE\[gpu\](1):tesla|4|0-1|-1|${dev}1|$flags_file_type
GRES_PARSABLE\[gpu\](1):tesla|4|0-1|-1|${dev}2|$flags_file_type
GRES_PARSABLE\[gpu\](1):special|4|(null)|(null)|${dev}5|$flags_default_type
"
testproc_alias "a53" test_cfg "a53" $slurm_conf_gres $gres_conf $fake_gpus_conf $expected_output
# ##############################################################################
# Test a54 - Test slurm.conf, gres.conf, and detected device interaction
#
# The gres conf record for nvidia1 *does* match a system GPU with the same Type
# and File. However, Cores is mismatched, so an error is printed in slurmd and
# the system device is omitted from the final GPU list. The total GPU count is 3
# instead of 4, so the controller sets the node to drain.
# ##############################################################################
set slurm_conf_gres "gpu:tesla:3,gpu:special:1"
set gres_conf "
# This file was autogenerated by $test_name
Name=gpu Type=tesla File=${dev}1 Cores=0
Name=gpu Type=special File=${dev}5
"
set fake_gpus_conf "
# This file was autogenerated by $test_name
tesla|4|0-1|-1|${dev}0
tesla|4|0-1|-1|${dev}1
tesla|4|0-1|-1|${dev}2
"
set expected_output "
GRES_PARSABLE\[gpu\](1):tesla|4|0-1|-1|${dev}0|$flags_file_type
GRES_PARSABLE\[gpu\](1):tesla|4|0-1|-1|${dev}2|$flags_file_type
GRES_PARSABLE\[gpu\](1):special|4|(null)|(null)|${dev}5|$flags_default_type
"
testproc_alias "a54" test_cfg "a54" $slurm_conf_gres $gres_conf $fake_gpus_conf $expected_output
# ##############################################################################
# Test a55 - Test slurm.conf, gres.conf, and detected device interaction
#
# nvidia[0-2] matches exactly what is found on the system, so no problem there.
# tesla + nvidia3 does not match any type and file combo found in the system
# GPUs, so this is assumed to be an “extra” GPU. However, it is not added, since
# there are already 3 teslas found as outlined in slurm.conf.
# The total GPU count is 5 instead of 4, which is fine (i.e. won't set node to
# drain).
# ##############################################################################
set slurm_conf_gres "gpu:tesla:3,gpu:special:1"
set gres_conf "
# This file was autogenerated by $test_name
Name=gpu Type=tesla File=${dev}\[0-2\] Cores=0-1
Name=gpu Type=tesla File=${dev}3 Cores=0
Name=gpu Type=special File=${dev}5
"
set fake_gpus_conf "
# This file was autogenerated by $test_name
tesla|4|0-1|-1|${dev}0
tesla|4|0-1|-1|${dev}1
tesla|4|0-1|-1|${dev}2
"
set expected_output "
GRES_PARSABLE\[gpu\](1):tesla|4|0-1|-1|${dev}0|$flags_file_type
GRES_PARSABLE\[gpu\](1):tesla|4|0-1|-1|${dev}1|$flags_file_type
GRES_PARSABLE\[gpu\](1):tesla|4|0-1|-1|${dev}2|$flags_file_type
GRES_PARSABLE\[gpu\](1):special|4|(null)|(null)|${dev}5|$flags_default_type
"
set err_msgs $conf_mismatch_err
set expected_errs 1
testproc_alias "a55" test_cfg "a55" $slurm_conf_gres $gres_conf $fake_gpus_conf $expected_output $err_msgs $expected_errs
# ##############################################################################
# Test a56 - Test slurm.conf, gres.conf, and detected device interaction
#
# nvidia[0-2] are found on the system, but nvidia3 is not. However, nvidia3 is
# assumed to be an extra GPU (like gpu:special), so it’s ok.
# The total GPU count is 5, so there are no errors or warnings.
# ##############################################################################
set slurm_conf_gres "gpu:tesla:4,gpu:special:1"
set gres_conf "
# This file was autogenerated by $test_name
Name=gpu Type=tesla File=${dev}\[0-3\] Cores=0-1
Name=gpu Type=special File=${dev}5
"
set fake_gpus_conf "
# This file was autogenerated by $test_name
tesla|4|0-1|-1|${dev}0
tesla|4|0-1|-1|${dev}1
tesla|4|0-1|-1|${dev}2
"
set expected_output "
GRES_PARSABLE\[gpu\](1):tesla|4|0-1|-1|${dev}0|$flags_file_type
GRES_PARSABLE\[gpu\](1):tesla|4|0-1|-1|${dev}1|$flags_file_type
GRES_PARSABLE\[gpu\](1):tesla|4|0-1|-1|${dev}2|$flags_file_type
GRES_PARSABLE\[gpu\](1):tesla|4|0-1|(null)|${dev}3|$flags_default_type
GRES_PARSABLE\[gpu\](1):special|4|(null)|(null)|${dev}5|$flags_default_type
"
testproc_alias "a56" test_cfg "a56" $slurm_conf_gres $gres_conf $fake_gpus_conf $expected_output
# ##############################################################################
# # Test a57 - Test that an empty slurm.conf yields no devices
# ##############################################################################
set slurm_conf_gres ""
set gres_conf "
# This file was autogenerated by $test_name
Name=gpu Type=tesla File=${dev}\[0-3\] Cores=0-1
Name=gpu Type=special File=${dev}5
"
set fake_gpus_conf "
# This file was autogenerated by $test_name
tesla|4|0-1|-1|${dev}0
tesla|4|0-1|-1|${dev}1
tesla|4|0-1|-1|${dev}2
"
set expected_output ""
set err_msgs $conf_mismatch_err
set expected_errs 2
testproc_alias "a57" test_cfg "a57" $slurm_conf_gres $gres_conf $fake_gpus_conf $expected_output $err_msgs $expected_errs
# ##############################################################################
# Test a58 - Test that a non-GPU GRES doesn't need an explicit entry in
# gres.conf. Also test that GPUs are rejected unless they have a File
# specification in gres.conf when AutoDetect is turned off.
# ##############################################################################
set slurm_conf_gres "gpu:tesla:3,gpu:special:1,tmpdisk:100"
# Nothing in gres.conf
set gres_conf ""
# Effectively "turn off" AutoDetect
set fake_gpus_conf ""
set expected_output "
GRES_PARSABLE\[tmpdisk\](100):(null)|4|(null)|(null)|(null)|CountOnly
"
testproc_alias "a58" test_cfg "a58" $slurm_conf_gres $gres_conf $fake_gpus_conf $expected_output
# ##############################################################################
# Test a59 - Test over-specified gres.conf compared to slurm.conf
#
# This tests that a larger gres.conf line is truncated down to match what
# slurm.conf specifies. This also tests that user errors are printed whenever
# more GRES are found in gres.conf than specified in slurm.conf.
# ##############################################################################
set slurm_conf_gres "gpu:tesla:2,gpu:gtx:1"
set gres_conf "
# This file was autogenerated by $test_name
Name=gpu Type=tesla File=${dev}\[0-4\]
Name=gpu Type=tesla File=${dev}5
Name=gpu Type=special File=${dev}6
Name=gpu Type=gtx File=${dev}7
Name=nic Count=100
Name=tmpdisk Count=10G
"
set fake_gpus_conf "
# This file was autogenerated by $test_name
tesla|4|0-1|-1|${dev}0
tesla|4|0-1|-1|${dev}1
tesla|4|0-1|-1|${dev}2
"
set expected_output "
GRES_PARSABLE\[gpu\](1):tesla|4|0-1|-1|${dev}0|$flags_file_type
GRES_PARSABLE\[gpu\](1):tesla|4|0-1|-1|${dev}1|$flags_file_type
GRES_PARSABLE\[gpu\](1):gtx|4|(null)|(null)|${dev}7|$flags_default_type
"
set err_msgs $conf_mismatch_err
set expected_errs 5
testproc_alias "a59" test_cfg "a59" $slurm_conf_gres $gres_conf $fake_gpus_conf $expected_output $err_msgs $expected_errs
# ##############################################################################
# Test a60 - Test unique_id parsing (for MIGs)
# ##############################################################################
set slurm_conf_gres "gpu:tesla:4"
set gres_conf ""
set fake_gpus_conf "
# This file was autogenerated by $test_name
tesla|4|0-1|(null)|${dev}1|MIG-GPU-aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee/7/0
tesla|4|0-1|(null)|${dev}2|MIG-GPU-aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee/10/0
tesla|4|2-3|(null)|${dev}3|MIG-GPU-aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee/8/0
tesla|4|2-3|(null)|${dev}4|MIG-GPU-aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee/9/0
"
set expected_output "
GRES_PARSABLE\[gpu\](1):tesla|4|0-1|(null)|${dev}1|MIG-GPU-aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee/7/0|$flags_file_type
GRES_PARSABLE\[gpu\](1):tesla|4|0-1|(null)|${dev}2|MIG-GPU-aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee/10/0|$flags_file_type
GRES_PARSABLE\[gpu\](1):tesla|4|2-3|(null)|${dev}3|MIG-GPU-aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee/8/0|$flags_file_type
GRES_PARSABLE\[gpu\](1):tesla|4|2-3|(null)|${dev}4|MIG-GPU-aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee/9/0|$flags_file_type
"
testproc_alias "a60" test_cfg "a60" $slurm_conf_gres $gres_conf $fake_gpus_conf $expected_output
# ##############################################################################
# ------------------------------------------------------------------------------
# ------------------------------------------------------------------------------
# ##############################################################################
# ##############################################################################
# # Test b2 - Test that all MPS is distributed across multiple GPU types
# ##############################################################################
set slurm_conf_gres "gpu:tesla:1,gpu:1080:1,gpu:gtx560:1,mps:300"
set gres_conf ""
set fake_gpus_conf "
# This file was autogenerated by $test_name
tesla|4|0-1|(null)|${dev}1
1080ti|4|0-1|(null)|${dev}2
gtx560|4|0-1|(null)|${dev}0
"
set expected_output "
GRES_PARSABLE\[mps\](100):gtx560|4|0-1|(null)|${dev}0|$flags_file_type_shared
GRES_PARSABLE\[gpu\](1):gtx560|4|0-1|(null)|${dev}0|$flags_file_type
GRES_PARSABLE\[mps\](100):tesla|4|0-1|(null)|${dev}1|$flags_file_type_shared
GRES_PARSABLE\[gpu\](1):tesla|4|0-1|(null)|${dev}1|$flags_file_type
GRES_PARSABLE\[mps\](100):1080|4|0-1|(null)|${dev}2|$flags_file_type_shared
GRES_PARSABLE\[gpu\](1):1080|4|0-1|(null)|${dev}2|$flags_file_type
"
testproc_alias "b2" test_cfg "b2" $slurm_conf_gres $gres_conf $fake_gpus_conf $expected_output
# ##############################################################################
# # Test b4 - Test that errors are emitted and configured devices omitted when
# Cores or Links mismatch with the corresponding system devices
# ##############################################################################
set slurm_conf_gres "gpu:ti:3,gpu:gtx:3"
set gres_conf "
# This file was autogenerated by $test_name
Name=gpu Type=ti File=${dev}0 COREs=0
Name=gpu Type=ti File=${dev}\[1-2\] COREs=0-1
Name=gpu Type=gtx File=${dev}\[3-5\] COREs=0-1 Links=-1,0,0,0,0,0
"
set fake_gpus_conf "
# This file was autogenerated by $test_name
1080ti|4|0-1|(null)|${dev}0
1080ti|4|0-1|(null)|${dev}1
1080ti|4|0-1|(null)|${dev}2
gtx560|4|0-1|0,0,0,-1,0,0|${dev}3
gtx560|4|0-1|0,0,0,0,-1,0|${dev}4
gtx560|4|0-1|-1,0,0,0,0,0|${dev}5
"
set expected_output "
GRES_PARSABLE\[gpu\](1):gtx|4|0-1|-1,0,0,0,0,0|${dev}5|$flags_file_type
GRES_PARSABLE\[gpu\](1):ti|4|0-1|(null)|${dev}1|$flags_file_type
GRES_PARSABLE\[gpu\](1):ti|4|0-1|(null)|${dev}2|$flags_file_type
"
set err_msgs $mismatch_err
set expected_errs 3
testproc_alias "b4" test_cfg "b4" $slurm_conf_gres $gres_conf $fake_gpus_conf $expected_output $err_msgs $expected_errs
# ##############################################################################
# # Test b5 - Test that GPUs are sorted according to links, not device file
# ##############################################################################
set slurm_conf_gres "gpu:a100:8"
set gres_conf "
# This file was autogenerated by $test_name
Name=gpu Type=a100 File=${dev}0 COREs=0-1 Links=0,0,0,-1,0,0,0,0
Name=gpu Type=a100 File=${dev}1 COREs=0-1 Links=0,0,-1,0,0,0,0,0
Name=gpu Type=a100 File=${dev}2 COREs=0-1 Links=0,-1,0,0,0,0,0,0
Name=gpu Type=a100 File=${dev}3 COREs=0-1 Links=-1,0,0,0,0,0,0,0
Name=gpu Type=a100 File=${dev}4 COREs=2-3 Links=0,0,0,0,0,0,0,-1
Name=gpu Type=a100 File=${dev}5 COREs=2-3 Links=0,0,0,0,0,0,-1,0
Name=gpu Type=a100 File=${dev}6 COREs=2-3 Links=0,0,0,0,0,-1,0,0
Name=gpu Type=a100 File=${dev}7 COREs=2-3 Links=0,0,0,0,-1,0,0,0
"
set fake_gpus_conf "
# This file was autogenerated by $test_name
a100|4|0-1|-1,0,0,0,0,0,0,0|${dev}3
a100|4|0-1|0,-1,0,0,0,0,0,0|${dev}2
a100|4|0-1|0,0,-1,0,0,0,0,0|${dev}1
a100|4|0-1|0,0,0,-1,0,0,0,0|${dev}0
a100|4|2-3|0,0,0,0,-1,0,0,0|${dev}7
a100|4|2-3|0,0,0,0,0,-1,0,0|${dev}6
a100|4|2-3|0,0,0,0,0,0,-1,0|${dev}5
a100|4|2-3|0,0,0,0,0,0,0,-1|${dev}4
"
set expected_output "
GRES_PARSABLE\[gpu\](1):a100|4|0-1|-1,0,0,0,0,0,0,0|${dev}3|$flags_file_type
GRES_PARSABLE\[gpu\](1):a100|4|0-1|0,-1,0,0,0,0,0,0|${dev}2|$flags_file_type
GRES_PARSABLE\[gpu\](1):a100|4|0-1|0,0,-1,0,0,0,0,0|${dev}1|$flags_file_type
GRES_PARSABLE\[gpu\](1):a100|4|0-1|0,0,0,-1,0,0,0,0|${dev}0|$flags_file_type
GRES_PARSABLE\[gpu\](1):a100|4|2-3|0,0,0,0,-1,0,0,0|${dev}7|$flags_file_type
GRES_PARSABLE\[gpu\](1):a100|4|2-3|0,0,0,0,0,-1,0,0|${dev}6|$flags_file_type
GRES_PARSABLE\[gpu\](1):a100|4|2-3|0,0,0,0,0,0,-1,0|${dev}5|$flags_file_type
GRES_PARSABLE\[gpu\](1):a100|4|2-3|0,0,0,0,0,0,0,-1|${dev}4|$flags_file_type
"
testproc_alias "b5" test_cfg "b5" $slurm_conf_gres $gres_conf $fake_gpus_conf $expected_output
# ##############################################################################
# # Test b6 - Test that "extra" GPUs are still used when not found on system
# ##############################################################################
set slurm_conf_gres "gpu:1080:3"
set gres_conf "
# This file was autogenerated by $test_name
Name=gpu Type=1080 File=${dev}\[0-2\] COREs=0-1
"
set fake_gpus_conf ""
set expected_output "
GRES_PARSABLE\[gpu\](1):1080|4|0-1|(null)|${dev}0|$flags_default_type
GRES_PARSABLE\[gpu\](1):1080|4|0-1|(null)|${dev}1|$flags_default_type
GRES_PARSABLE\[gpu\](1):1080|4|0-1|(null)|${dev}2|$flags_default_type
"
testproc_alias "b6" test_cfg "b6" $slurm_conf_gres $gres_conf $fake_gpus_conf $expected_output
# ##############################################################################
# # Test b7 - Test that GPUs are sorted according links, if specified, and
# device file, if not specified. Test also that null links are
# sorted after non-null links.
# ##############################################################################
set slurm_conf_gres "gpu:a100:8"
set gres_conf "
# This file was autogenerated by $test_name
Name=gpu Type=a100 File=${dev}0 COREs=0-1 Links=0,0,0,-1
Name=gpu Type=a100 File=${dev}1 COREs=0-1 Links=0,0,-1,0
Name=gpu Type=a100 File=${dev}2 COREs=0-1 Links=0,-1,0,0
Name=gpu Type=a100 File=${dev}3 COREs=0-1 Links=-1,0,0,0
Name=gpu Type=a100 File=${dev}4 COREs=2-3
Name=gpu Type=a100 File=${dev}5 COREs=2-3
Name=gpu Type=a100 File=${dev}6 COREs=2-3
Name=gpu Type=a100 File=${dev}7 COREs=2-3
"
set fake_gpus_conf "
# This file was autogenerated by $test_name
a100|4|0-1|-1,0,0,0|${dev}3
a100|4|0-1|0,-1,0,0|${dev}2
a100|4|0-1|0,0,-1,0|${dev}1
a100|4|0-1|0,0,0,-1|${dev}0
a100|4|2-3|(null)|${dev}7
a100|4|2-3|(null)|${dev}6
a100|4|2-3|(null)|${dev}5
a100|4|2-3|(null)|${dev}4
"
set expected_output "
GRES_PARSABLE\[gpu\](1):a100|4|0-1|-1,0,0,0|${dev}3|$flags_file_type
GRES_PARSABLE\[gpu\](1):a100|4|0-1|0,-1,0,0|${dev}2|$flags_file_type
GRES_PARSABLE\[gpu\](1):a100|4|0-1|0,0,-1,0|${dev}1|$flags_file_type
GRES_PARSABLE\[gpu\](1):a100|4|0-1|0,0,0,-1|${dev}0|$flags_file_type
GRES_PARSABLE\[gpu\](1):a100|4|2-3|(null)|${dev}4|$flags_file_type
GRES_PARSABLE\[gpu\](1):a100|4|2-3|(null)|${dev}5|$flags_file_type
GRES_PARSABLE\[gpu\](1):a100|4|2-3|(null)|${dev}6|$flags_file_type
GRES_PARSABLE\[gpu\](1):a100|4|2-3|(null)|${dev}7|$flags_file_type
"
testproc_alias "b7" test_cfg "b7" $slurm_conf_gres $gres_conf $fake_gpus_conf $expected_output
# ##############################################################################
# # Test b8 - Test that separate "extra" GPUs in gres.conf with different Cores
# and Links are properly accounted under the typeless slurm.conf
# gres specification.
# ##############################################################################
set slurm_conf_gres "gpu:5"
set gres_conf "
# This file was autogenerated by $test_name
Name=gpu File=${dev}\[0-1\] Cores=0,1
Name=gpu File=${dev}\[2-3\] Cores=0,1 Links=-1
Name=gpu File=${dev}4 Cores=0
"
set fake_gpus_conf ""
set expected_output "
GRES_PARSABLE\[gpu\](1):(null)|4|0,1|-1|${dev}2|$flags_default
GRES_PARSABLE\[gpu\](1):(null)|4|0,1|-1|${dev}3|$flags_default
GRES_PARSABLE\[gpu\](1):(null)|4|0,1|(null)|${dev}0|$flags_default
GRES_PARSABLE\[gpu\](1):(null)|4|0,1|(null)|${dev}1|$flags_default
GRES_PARSABLE\[gpu\](1):(null)|4|0|(null)|${dev}4|$flags_default
"
testproc_alias "b8" test_cfg "b8" $slurm_conf_gres $gres_conf $fake_gpus_conf $expected_output
# ##############################################################################
# # Test b10 - Test that slurm.conf doesn't allow a mix of Type and no Type
# ##############################################################################
set slurm_conf_gres "gpu:tesla:1,gpu:1"
set gres_conf ""
set fake_gpus_conf ""
set expected_output ""
set err_msgs $slurm_conf_type_err
set expected_errs 1
testproc_alias "b10" test_cfg "b10" $slurm_conf_gres $gres_conf $fake_gpus_conf $expected_output $err_msgs $expected_errs
# ##############################################################################
# # Test b12 - Test that gres.conf doesn't allow a mix of Type and no Type
# ##############################################################################
set slurm_conf_gres "gpu:tesla:2,gpu:v100:2"
set gres_conf "
# This file was autogenerated by $test_name
Name=gpu File=${dev}\[0-1\]
Name=gpu Type=v100 File=${dev}\[2-3\]
"
set fake_gpus_conf ""
set expected_output ""
set err_msgs $gres_conf_type_err
set expected_errs 1
testproc_alias "b12" test_cfg "b12" $slurm_conf_gres $gres_conf $fake_gpus_conf $expected_output $err_msgs $expected_errs
# ##############################################################################
# # Test b14 - Test that gres.conf doesn't allow a mix of File and no File
# ##############################################################################
set slurm_conf_gres "gpu:tesla:2,gpu:v100:2"
set gres_conf "
# This file was autogenerated by $test_name
Name=gpu Type=tesla File=${dev}\[0-1\]
Name=gpu Type=v100
"
set fake_gpus_conf ""
set expected_output ""
set err_msgs $gres_conf_file_err
set expected_errs 1
testproc_alias "b14" test_cfg "b14" $slurm_conf_gres $gres_conf $fake_gpus_conf $expected_output $err_msgs $expected_errs
# ##############################################################################
# ------------------------------------------------------------------------------
# Test gres.conf parsing
# ------------------------------------------------------------------------------
# ##############################################################################
# ##############################################################################
# # Test c2 - Test gres/gpu plus gres/mps with count
# # NOTE the device numbers being out of order
# ##############################################################################
set slurm_conf_gres "gpu:tesla:1,gpu:gtx560:1,mps:200"
set gres_conf "
# This file was autogenerated by $test_name
Name=gpu Type=gtx560 File=${dev}0 COREs=0,1
Name=gpu Type=tesla File=${dev}1 COREs=2,3
Name=mps Count=100 File=${dev}1
Name=mps Count=100 File=${dev}0
"
set fake_gpus_conf ""
set expected_output "
GRES_PARSABLE\[mps\](100):gtx560|4|0,1|(null)|${dev}0|$flags_file_type_shared
GRES_PARSABLE\[gpu\](1):gtx560|4|0,1|(null)|${dev}0|$flags_default_type
GRES_PARSABLE\[mps\](100):tesla|4|2,3|(null)|${dev}1|$flags_file_type_shared
GRES_PARSABLE\[gpu\](1):tesla|4|2,3|(null)|${dev}1|$flags_default_type
"
testproc_alias "c2" test_cfg "c2" $slurm_conf_gres $gres_conf $fake_gpus_conf $expected_output
# ##############################################################################
# # Test c4 - Test gres/mps with count and no file names
# # NOTE the device numbers being out of order
# ##############################################################################
set slurm_conf_gres "gpu:tesla:1,gpu:gtx560:1,mps:210"
set gres_conf "
# This file was autogenerated by $test_name
Name=gpu Type=tesla File=${dev}1 COREs=2,3
Name=gpu Type=gtx560 File=${dev}0 COREs=0,1
Name=mps Count=210
"
set fake_gpus_conf ""
set expected_output "
GRES_PARSABLE\[mps\](105):gtx560|4|0,1|(null)|${dev}0|$flags_default_type_shared
GRES_PARSABLE\[gpu\](1):gtx560|4|0,1|(null)|${dev}0|$flags_default_type
GRES_PARSABLE\[mps\](105):tesla|4|2,3|(null)|${dev}1|$flags_default_type_shared
GRES_PARSABLE\[gpu\](1):tesla|4|2,3|(null)|${dev}1|$flags_default_type
"
testproc_alias "c4" test_cfg "c4" $slurm_conf_gres $gres_conf $fake_gpus_conf $expected_output
# ##############################################################################
# # Test c6 - Test using only a subset of the system devices found
# ##############################################################################
set slurm_conf_gres "gpu:tesla:3"
set gres_conf "
# This file was autogenerated by $test_name
Name=gpu Type=tesla File=${dev}\[3-4\] Cores=2-3
Name=gpu Type=tesla File=${dev}1 Cores=0-1
"
set fake_gpus_conf "
# This file was autogenerated by $test_name
tesla|4|0-1|(null)|${dev}1
tesla|4|0-1|(null)|${dev}2
tesla|4|2-3|(null)|${dev}3
tesla|4|2-3|(null)|${dev}4
"
set expected_output "
GRES_PARSABLE\[gpu\](1):tesla|4|0-1|(null)|${dev}1|$flags_file_type
GRES_PARSABLE\[gpu\](1):tesla|4|2-3|(null)|${dev}3|$flags_file_type
GRES_PARSABLE\[gpu\](1):tesla|4|2-3|(null)|${dev}4|$flags_file_type
"
testproc_alias "c6" test_cfg "c6" $slurm_conf_gres $gres_conf $fake_gpus_conf $expected_output
# ##############################################################################
# # Test c8 - Test disjoint sets of conf and system devices
# ##############################################################################
set slurm_conf_gres "gpu:tesla:6"
set gres_conf "
# This file was autogenerated by $test_name
Name=gpu Type=tesla File=${dev}1 Cores=2-3
Name=gpu Type=tesla File=${dev}2 Cores=2-3
Name=gpu Type=tesla File=${dev}3 Cores=2-3
Name=gpu Type=tesla File=${dev}4 Cores=2-3
"
set fake_gpus_conf "
# This file was autogenerated by $test_name
tesla|4|0-1|(null)|${dev}5
tesla|4|0-1|(null)|${dev}6
"
set expected_output "
GRES_PARSABLE\[gpu\](1):tesla|4|2-3|(null)|${dev}1|$flags_default_type
GRES_PARSABLE\[gpu\](1):tesla|4|2-3|(null)|${dev}2|$flags_default_type
GRES_PARSABLE\[gpu\](1):tesla|4|2-3|(null)|${dev}3|$flags_default_type
GRES_PARSABLE\[gpu\](1):tesla|4|2-3|(null)|${dev}4|$flags_default_type
GRES_PARSABLE\[gpu\](1):tesla|4|0-1|(null)|${dev}5|$flags_file_type
GRES_PARSABLE\[gpu\](1):tesla|4|0-1|(null)|${dev}6|$flags_file_type
"
testproc_alias "c8" test_cfg "c8" $slurm_conf_gres $gres_conf $fake_gpus_conf $expected_output
# ##############################################################################
# # Test c10 - Different links, different records
# ##############################################################################
# Devices 0-2 are all doubly linked to each other
# Device 5 is singly linked to 3-4
# Devices 7-8 are doubly linked to each other
set slurm_conf_gres "gpu:tesla:8"
set gres_conf "
# This file was autogenerated by $test_name
Name=gpu Type=tesla File=${dev}0 Cores=0-3 Links=-1,2,2,0,0,0,0,0
Name=gpu Type=tesla File=${dev}1 Cores=0-3 Links=2,-1,2,0,0,0,0,0
Name=gpu Type=tesla File=${dev}2 Cores=0-3 Links=2,2,-1,0,0,0,0,0
Name=gpu Type=tesla File=${dev}3 Cores=0-3 Links=0,0,0,-1,0,1,0,0
Name=gpu Type=tesla File=${dev}4 Cores=0-3 Links=0,0,0,0,-1,1,0,0
Name=gpu Type=tesla File=${dev}5 Cores=0-3 Links=0,0,0,1,1,-1,0,0
Name=gpu Type=tesla File=${dev}6 Cores=0-3 Links=0,0,0,0,0,0,-1,2
Name=gpu Type=tesla File=${dev}7 Cores=0-3 Links=0,0,0,0,0,0,2,-1
"
set fake_gpus_conf ""
set expected_output "
GRES_PARSABLE\[gpu\](1):tesla|4|0-3|-1,2,2,0,0,0,0,0|${dev}0|$flags_default_type
GRES_PARSABLE\[gpu\](1):tesla|4|0-3|2,-1,2,0,0,0,0,0|${dev}1|$flags_default_type
GRES_PARSABLE\[gpu\](1):tesla|4|0-3|2,2,-1,0,0,0,0,0|${dev}2|$flags_default_type
GRES_PARSABLE\[gpu\](1):tesla|4|0-3|0,0,0,-1,0,1,0,0|${dev}3|$flags_default_type
GRES_PARSABLE\[gpu\](1):tesla|4|0-3|0,0,0,0,-1,1,0,0|${dev}4|$flags_default_type
GRES_PARSABLE\[gpu\](1):tesla|4|0-3|0,0,0,1,1,-1,0,0|${dev}5|$flags_default_type
GRES_PARSABLE\[gpu\](1):tesla|4|0-3|0,0,0,0,0,0,-1,2|${dev}6|$flags_default_type
GRES_PARSABLE\[gpu\](1):tesla|4|0-3|0,0,0,0,0,0,2,-1|${dev}7|$flags_default_type
"
testproc_alias "c10" test_cfg "c10" $slurm_conf_gres $gres_conf $fake_gpus_conf $expected_output
# ##############################################################################
# # Test c12 - Empty everything
# ##############################################################################
set slurm_conf_gres ""
set gres_conf ""
set fake_gpus_conf ""
set expected_output ""
testproc_alias "c12" test_cfg "c12" $slurm_conf_gres $gres_conf $fake_gpus_conf $expected_output
# ##############################################################################
# # Test c14 - Empty system devices
# ##############################################################################
set slurm_conf_gres "gpu:tesla:4"
set gres_conf "
# This file was autogenerated by $test_name
Name=gpu Type=tesla File=${dev}\[1-4\] Cores=2-3
"
set fake_gpus_conf ""
set expected_output "
GRES_PARSABLE\[gpu\](1):tesla|4|2-3|(null)|${dev}1|$flags_default_type
GRES_PARSABLE\[gpu\](1):tesla|4|2-3|(null)|${dev}2|$flags_default_type
GRES_PARSABLE\[gpu\](1):tesla|4|2-3|(null)|${dev}3|$flags_default_type
GRES_PARSABLE\[gpu\](1):tesla|4|2-3|(null)|${dev}4|$flags_default_type
"
testproc_alias "c14" test_cfg "c14" $slurm_conf_gres $gres_conf $fake_gpus_conf $expected_output
# ##############################################################################
# # Test c16 - Test non-GPU GRESs with types
# ##############################################################################
set slurm_conf_gres "gpu:tesla:1,tmpdisk:disky:1,nic:nikki:1,mic:mickey:1"
set gres_conf "
# This file was autogenerated by $test_name
Name=gpu Type=tesla File=${dev}1 Cores=0-3
Name=tmpdisk Type=disky File=${dev}2 Cores=0-3
Name=nic Type=nikki File=${dev}3 Cores=0-3
Name=mic Type=mickey File=${dev}4 Cores=0-3
"
set fake_gpus_conf ""
set expected_output "
GRES_PARSABLE\[nic\](1):nikki|4|0-3|(null)|${dev}3|$flags_file_type
GRES_PARSABLE\[mic\](1):mickey|4|0-3|(null)|${dev}4|$flags_file_type
GRES_PARSABLE\[tmpdisk\](1):disky|4|0-3|(null)|${dev}2|$flags_file_type
GRES_PARSABLE\[gpu\](1):tesla|4|0-3|(null)|${dev}1|$flags_default_type
"
testproc_alias "c16" test_cfg "c16" $slurm_conf_gres $gres_conf $fake_gpus_conf $expected_output
# ##############################################################################
# # Test c17 - Test non-GPU GRESs without types
# ##############################################################################
set slurm_conf_gres "gpu:1,tmpdisk:1,nic:1,mic:1"
set gres_conf "
# This file was autogenerated by $test_name
Name=gpu File=${dev}1 Cores=0-3
Name=tmpdisk File=${dev}2 Cores=0-3
Name=nic File=${dev}3 Cores=0-3
Name=mic File=${dev}4 Cores=0-3
"
set fake_gpus_conf ""
set expected_output "
GRES_PARSABLE\[nic\](1):(null)|4|0-3|(null)|${dev}3|HAS_FILE
GRES_PARSABLE\[mic\](1):(null)|4|0-3|(null)|${dev}4|HAS_FILE
GRES_PARSABLE\[tmpdisk\](1):(null)|4|0-3|(null)|${dev}2|HAS_FILE
GRES_PARSABLE\[gpu\](1):(null)|4|0-3|(null)|${dev}1|$flags_default
"
testproc_alias "c17" test_cfg "c17" $slurm_conf_gres $gres_conf $fake_gpus_conf $expected_output
# ##############################################################################
# # Test c18 - Test NodeName when Name isn't specified
# ##############################################################################
set slurm_conf_gres "gpu:tesla:2"
set gres_conf "
# This file was autogenerated by $test_name
NodeName=$nodename Name=gpu Type=tesla File=${dev}1 Cores=0-3
NodeName=$nodename Type=tesla File=${dev}2 Cores=0-3
"
set fake_gpus_conf ""
set expected_output "
GRES_PARSABLE\[gpu\](1):tesla|4|0-3|(null)|${dev}1|$flags_default_type
"
set err_msgs $no_name_err
set expected_errs 1
testproc_alias "c18" test_cfg "c18" $slurm_conf_gres $gres_conf $fake_gpus_conf $expected_output $err_msgs $expected_errs
# ##############################################################################
# # Test c20 - Test that empty Name does not parse and produces error
# ##############################################################################
set slurm_conf_gres ""
set gres_conf "
# This file was autogenerated by $test_name
Type=tesla File=${dev}1 Cores=0-3
"
set fake_gpus_conf ""
set expected_output ""
set err_msgs $key_parse_err
set expected_errs 1
testproc_alias "c20" test_cfg "c20" $slurm_conf_gres $gres_conf $fake_gpus_conf $expected_output $err_msgs $expected_errs
# ##############################################################################
# # Test c22 - Ensure no malloc error for large count with non-GPU GRES
# See bug 6014
# ##############################################################################
set slurm_conf_gres "tmpdisk:10g"
set gres_conf "Name=tmpdisk count=10g"
set fake_gpus_conf ""
set expected_output "
GRES_PARSABLE\[tmpdisk\](10737418240):(null)|4|(null)|(null)|(null)|
"
testproc_alias "c22" test_cfg "c22" $slurm_conf_gres $gres_conf $fake_gpus_conf $expected_output
# ##############################################################################
# # Test c23 - Ensure no errors for large count with non-GPU GRES *and* with
# Type and Cores specified in gres.conf.
# ##############################################################################
set slurm_conf_gres "tmpdisk:tempy:10g"
set gres_conf "Name=tmpdisk Type=tempy Count=10g Cores=0-1"
set fake_gpus_conf ""
set expected_output "
GRES_PARSABLE\[tmpdisk\](10737418240):tempy|4|0-1|(null)|(null)|HAS_TYPE
"
testproc_alias "c23" test_cfg "c23" $slurm_conf_gres $gres_conf $fake_gpus_conf $expected_output
# ##############################################################################
# # Test c24 - Different types, different records
# ##############################################################################
set slurm_conf_gres "gpu:a:2,gpu:b:2"
set gres_conf "
# This file was autogenerated by $test_name
Name=gpu Type=a File=${dev}1 Cores=0-3
Name=gpu Type=a File=${dev}2 Cores=0-3
Name=gpu Type=b File=${dev}5 Cores=0-3
Name=gpu Type=b File=${dev}6 Cores=0-3
"
set fake_gpus_conf ""
set expected_output "
GRES_PARSABLE\[gpu\](1):a|4|0-3|(null)|${dev}1|$flags_default_type
GRES_PARSABLE\[gpu\](1):a|4|0-3|(null)|${dev}2|$flags_default_type
GRES_PARSABLE\[gpu\](1):b|4|0-3|(null)|${dev}5|$flags_default_type
GRES_PARSABLE\[gpu\](1):b|4|0-3|(null)|${dev}6|$flags_default_type
"
testproc_alias "c24" test_cfg "c24" $slurm_conf_gres $gres_conf $fake_gpus_conf $expected_output
# ##############################################################################
# # Test c28 - Test alternating device/cpu ranges
# ##############################################################################
set slurm_conf_gres "gpu:tesla_a:4,gpu:tesla_b:4"
set gres_conf "
# This file was autogenerated by $test_name
Name=gpu Type=tesla_a File=${dev}\[1-2\] Cores=0-3
Name=gpu Type=tesla_b File=${dev}\[3-4\] Cores=0-3
Name=gpu Type=tesla_a File=${dev}5 Cores=0-3
Name=gpu Type=tesla_b File=${dev}6 Cores=0-3
Name=gpu Type=tesla_a File=${dev}7 Cores=0-3
Name=gpu Type=tesla_b File=${dev}8 Cores=0-3
"
set fake_gpus_conf ""
set expected_output "
GRES_PARSABLE\[gpu\](1):tesla_a|4|0-3|(null)|${dev}1|$flags_default_type
GRES_PARSABLE\[gpu\](1):tesla_a|4|0-3|(null)|${dev}2|$flags_default_type
GRES_PARSABLE\[gpu\](1):tesla_b|4|0-3|(null)|${dev}3|$flags_default_type
GRES_PARSABLE\[gpu\](1):tesla_b|4|0-3|(null)|${dev}4|$flags_default_type
GRES_PARSABLE\[gpu\](1):tesla_a|4|0-3|(null)|${dev}5|$flags_default_type
GRES_PARSABLE\[gpu\](1):tesla_b|4|0-3|(null)|${dev}6|$flags_default_type
GRES_PARSABLE\[gpu\](1):tesla_a|4|0-3|(null)|${dev}7|$flags_default_type
GRES_PARSABLE\[gpu\](1):tesla_b|4|0-3|(null)|${dev}8|$flags_default_type
"
testproc_alias "c28" test_cfg "c28" $slurm_conf_gres $gres_conf $fake_gpus_conf $expected_output
# ##############################################################################
# # Test c30 - Test duplicate devices in gres.conf
# ##############################################################################
set slurm_conf_gres "gpu:tesla:6"
set gres_conf "
# This file was autogenerated by $test_name
Name=gpu Type=tesla File=${dev}3 Cores=0-3
Name=gpu Type=tesla File=${dev}3 Cores=0-3
Name=gpu Type=tesla File=${dev}4 Cores=0-3
Name=gpu Type=tesla File=${dev}4 Cores=0-3
Name=gpu Type=tesla File=${dev}\[3-4\] Cores=0-3
"
set fake_gpus_conf ""
set expected_output ""
set err_msgs $dup_err
set expected_errs 1
testproc_alias "c30" test_cfg "c30" $slurm_conf_gres $gres_conf $fake_gpus_conf $expected_output $err_msgs $expected_errs
# ##############################################################################
# # Test c32 - Test increasing duplicate devices in gres.conf
# ##############################################################################
set slurm_conf_gres "gpu:tesla:10"
set gres_conf "
# This file was autogenerated by $test_name
Name=gpu Type=tesla File=${dev}1 Cores=0-3
Name=gpu Type=tesla File=${dev}\[1-2\] Cores=0-3
Name=gpu Type=tesla File=${dev}\[1-3\] Cores=0-3
Name=gpu Type=tesla File=${dev}\[1-4\] Cores=0-3
"
set fake_gpus_conf ""
set expected_output ""
set err_msgs $dup_err
set expected_errs 1
testproc_alias "c32" test_cfg "c32" $slurm_conf_gres $gres_conf $fake_gpus_conf $expected_output $err_msgs $expected_errs
# ##############################################################################
# # Test c34 - Test decreasing duplicate devices in gres.conf
# ##############################################################################
set slurm_conf_gres "gpu:tesla:10"
set gres_conf "
# This file was autogenerated by $test_name
Name=gpu Type=tesla File=${dev}4 Cores=0-3
Name=gpu Type=tesla File=${dev}\[3-4\] Cores=0-3
Name=gpu Type=tesla File=${dev}\[2-4\] Cores=0-3
Name=gpu Type=tesla File=${dev}\[1-4\] Cores=0-3
"
set fake_gpus_conf ""
set expected_output ""
set err_msgs $dup_err
set expected_errs 1
testproc_alias "c34" test_cfg "c34" $slurm_conf_gres $gres_conf $fake_gpus_conf $expected_output $err_msgs $expected_errs
# ##############################################################################
# # Test c36 - Test duplicate devices
# ##############################################################################
set slurm_conf_gres "gpu:tesla:4"
set gres_conf "
# This file was autogenerated by $test_name
Name=gpu Type=tesla File=${dev}1 Cores=0
Name=gpu Type=tesla File=${dev}1 Cores=0-1
Name=gpu Type=tesla File=${dev}1 Cores=0-2
Name=gpu Type=tesla File=${dev}1 Cores=0-3
"
set fake_gpus_conf ""
set expected_output ""
set err_msgs $dup_err
set expected_errs 1
testproc_alias "c36" test_cfg "c36" $slurm_conf_gres $gres_conf $fake_gpus_conf $expected_output $err_msgs $expected_errs
# ##############################################################################
# # Test c38 - Test overlapping Core ranges
# ##############################################################################
set slurm_conf_gres "gpu:tesla:4"
set gres_conf "
# This file was autogenerated by $test_name
Name=gpu Type=tesla File=${dev}1 Cores=0
Name=gpu Type=tesla File=${dev}2 Cores=0-1
Name=gpu Type=tesla File=${dev}3 Cores=0-2
Name=gpu Type=tesla File=${dev}4 Cores=0-3
"
set fake_gpus_conf ""
set expected_output "
GRES_PARSABLE\[gpu\](1):tesla|4|0|(null)|${dev}1|$flags_default_type
GRES_PARSABLE\[gpu\](1):tesla|4|0-1|(null)|${dev}2|$flags_default_type
GRES_PARSABLE\[gpu\](1):tesla|4|0-2|(null)|${dev}3|$flags_default_type
GRES_PARSABLE\[gpu\](1):tesla|4|0-3|(null)|${dev}4|$flags_default_type
"
testproc_alias "c38" test_cfg "c38" $slurm_conf_gres $gres_conf $fake_gpus_conf $expected_output
# ##############################################################################
# # Test c40 - Test that nulled, empty, and zeroed links produce errors
# ##############################################################################
set slurm_conf_gres "gpu:tesla:4"
set gres_conf "
# This file was autogenerated by $test_name
Name=gpu Type=tesla File=${dev}1 Cores=0-3
Name=gpu Type=tesla File=${dev}2 Cores=0-3 Links=\"\"
Name=gpu Type=tesla File=${dev}3 Cores=0-3 Links=null
Name=gpu Type=tesla File=${dev}4 Cores=0-3 Links=0
"
set fake_gpus_conf ""
set expected_output "
GRES_PARSABLE\[gpu\](1):tesla|4|0-3|(null)|${dev}1|$flags_default_type
GRES_PARSABLE\[gpu\](1):tesla|4|0-3|(null)|${dev}2|$flags_default_type
GRES_PARSABLE\[gpu\](1):tesla|4|0-3|(null)|${dev}3|$flags_default_type
GRES_PARSABLE\[gpu\](1):tesla|4|0-3|(null)|${dev}4|$flags_default_type
"
set err_msgs $links_err
set expected_errs 3
testproc_alias "c40" test_cfg "c40" $slurm_conf_gres $gres_conf $fake_gpus_conf $expected_output $err_msgs $expected_errs
# ##############################################################################
# # Test c42 - Show link ranges do NOT work - only commas
# ##############################################################################
set slurm_conf_gres "gpu:tesla:4"
set gres_conf "
# This file was autogenerated by $test_name
Name=gpu Type=tesla File=${dev}1 Cores=0-3 Links=0-1
Name=gpu Type=tesla File=${dev}2 Cores=0-3 Links=0,-1
Name=gpu Type=tesla File=${dev}3 Cores=0-3 Links=0-2
Name=gpu Type=tesla File=${dev}4 Cores=0-3 Links=0,-1,2
"
set fake_gpus_conf ""
set expected_output "
GRES_PARSABLE\[gpu\](1):tesla|4|0-3|0,-1|${dev}2|$flags_default_type
GRES_PARSABLE\[gpu\](1):tesla|4|0-3|0,-1,2|${dev}4|$flags_default_type
GRES_PARSABLE\[gpu\](1):tesla|4|0-3|(null)|${dev}1|$flags_default_type
GRES_PARSABLE\[gpu\](1):tesla|4|0-3|(null)|${dev}3|$flags_default_type
"
testproc_alias "c42" test_cfg "c42" $slurm_conf_gres $gres_conf $fake_gpus_conf $expected_output
# ##############################################################################
# # Test c46 - Type not specified
# ##############################################################################
set slurm_conf_gres "gpu:4"
set gres_conf "
# This file was autogenerated by $test_name
Name=gpu File=${dev}1 Cores=0-3
Name=gpu File=${dev}2 Cores=0-3
Name=gpu File=${dev}3 Cores=0-3
Name=gpu File=${dev}4 Cores=0-3
"
set fake_gpus_conf ""
set expected_output "
GRES_PARSABLE\[gpu\](1):(null)|4|0-3|(null)|${dev}1|$flags_default
GRES_PARSABLE\[gpu\](1):(null)|4|0-3|(null)|${dev}2|$flags_default
GRES_PARSABLE\[gpu\](1):(null)|4|0-3|(null)|${dev}3|$flags_default
GRES_PARSABLE\[gpu\](1):(null)|4|0-3|(null)|${dev}4|$flags_default
"
testproc_alias "c46" test_cfg "c46" $slurm_conf_gres $gres_conf $fake_gpus_conf $expected_output
# ##############################################################################
# # Test c48 - Cores not specified or null
# ##############################################################################
set slurm_conf_gres "gpu:tesla:5"
set gres_conf "
# This file was autogenerated by $test_name
Name=gpu Type=tesla File=${dev}1
Name=gpu Type=tesla File=${dev}2
Name=gpu Type=tesla File=${dev}3 Cores=\"\"
Name=gpu Type=tesla File=${dev}4 Cores=null
Name=gpu Type=tesla File=${dev}5 Cores=0
"
set fake_gpus_conf ""
set expected_output "
GRES_PARSABLE\[gpu\](1):tesla|4|(null)|(null)|${dev}1|$flags_default_type
GRES_PARSABLE\[gpu\](1):tesla|4|(null)|(null)|${dev}2|$flags_default_type
GRES_PARSABLE\[gpu\](1):tesla|4||(null)|${dev}3|$flags_default_type
GRES_PARSABLE\[gpu\](1):tesla|4|null|(null)|${dev}4|$flags_default_type
GRES_PARSABLE\[gpu\](1):tesla|4|0|(null)|${dev}5|$flags_default_type
"
testproc_alias "c48" test_cfg "c48" $slurm_conf_gres $gres_conf $fake_gpus_conf $expected_output
# TODO: What other tests should we add?
# Make sure current gres.conf files still work as expected
# Test to make sure CPU affinity is correct? Machine vs abstract?
# Invalid CPU counts
# Invalid cpu range
# ##############################################################################
# # Test c50 - Test examples in gres.conf docs
# ##############################################################################
set slurm_conf_gres "gpu:gtx560:1,gpu:tesla:1,mps:200"
set gres_conf "
# This file was autogenerated by $test_name
Name=gpu Type=gtx560 File=${dev}0 COREs=0,1
Name=gpu Type=tesla File=${dev}1 COREs=2,3
Name=mps Count=100 File=${dev}0
Name=mps Count=100 File=${dev}1
"
set fake_gpus_conf ""
set expected_output "
GRES_PARSABLE\[mps\](100):gtx560|4|0,1|(null)|${dev}0|$flags_file_type_shared
GRES_PARSABLE\[gpu\](1):gtx560|4|0,1|(null)|${dev}0|$flags_default_type
GRES_PARSABLE\[mps\](100):tesla|4|2,3|(null)|${dev}1|$flags_file_type_shared
GRES_PARSABLE\[gpu\](1):tesla|4|2,3|(null)|${dev}1|$flags_default_type
"
testproc_alias "c50" test_cfg "c50" $slurm_conf_gres $gres_conf $fake_gpus_conf $expected_output
# ##############################################################################
# # Test c52 - Test examples in gres.conf docs
# ##############################################################################
set slurm_conf_gres "gpu:tesla:3"
set gres_conf "
# This file was autogenerated by $test_name
Name=gpu Type=tesla File=${dev}\[0-1\] COREs=0,1
# NOTE: nvidia2 device is out of service
# Name=gpu Type=tesla File=${dev}\[2-3\] COREs=2,3
Name=gpu Type=tesla File=${dev}3 COREs=2,3
"
set fake_gpus_conf ""
set expected_output "
GRES_PARSABLE\[gpu\](1):tesla|4|0,1|(null)|${dev}0|$flags_default_type
GRES_PARSABLE\[gpu\](1):tesla|4|0,1|(null)|${dev}1|$flags_default_type
GRES_PARSABLE\[gpu\](1):tesla|4|2,3|(null)|${dev}3|$flags_default_type
"
testproc_alias "c52" test_cfg "c52" $slurm_conf_gres $gres_conf $fake_gpus_conf $expected_output
# ##############################################################################
# # Test c54 - Test examples in gres.conf docs
# # Note: ${nodename_base}0 == $nodename, so only node 0's GRES
# # definitions will apply to this test
# ##############################################################################
set slurm_conf_gres "gpu:4"
set gres_conf "
# This file was autogenerated by $test_name
## Explicitly specify devices on nodes ${nodename_base}0-${nodename_base}15
# NodeName=${nodename_base}\[0-15\] Name=gpu File=${dev}\[0-3\]
# NOTE: ${nodename_base}3 ${dev}1 device is out of service
NodeName=${nodename_base}\[0-2\] Name=gpu File=${dev}\[0-3\]
NodeName=${nodename_base}3 Name=gpu File=${dev}\[0,2-3\]
NodeName=${nodename_base}\[4-15\] Name=gpu File=${dev}\[0-3\]
"
set fake_gpus_conf ""
set expected_output "
GRES_PARSABLE\[gpu\](1):(null)|4|(null)|(null)|${dev}0|$flags_default
GRES_PARSABLE\[gpu\](1):(null)|4|(null)|(null)|${dev}1|$flags_default
GRES_PARSABLE\[gpu\](1):(null)|4|(null)|(null)|${dev}2|$flags_default
GRES_PARSABLE\[gpu\](1):(null)|4|(null)|(null)|${dev}3|$flags_default
"
testproc_alias "c54" test_cfg "c54" $slurm_conf_gres $gres_conf $fake_gpus_conf $expected_output
# ##############################################################################
# # Test c56 - Test gres/mps with count on only some devices
# # NOTE the device numbers being out of order, one GPU without a gres/mps and
# # a gres/mps with a device file not valid for any configured GPU
# ##############################################################################
set slurm_conf_gres "gpu:gtx560:1,gpu:tesla:2,mps:900"
set gres_conf "
# This file was autogenerated by $test_name
Name=gpu Type=tesla File=${dev}1 COREs=2,3
Name=gpu Type=gtx560 File=${dev}0 COREs=0,1
Name=gpu Type=tesla File=${dev}3 COREs=2,3
Name=mps Count=200 File=${dev}1
Name=mps Count=300 File=${dev}3
Name=mps Count=400 File=${dev}8
"
set fake_gpus_conf ""
set expected_output "
GRES_PARSABLE\[mps\](0):gtx560|4|0,1|(null)|${dev}0|$flags_default_type_shared
GRES_PARSABLE\[gpu\](1):gtx560|4|0,1|(null)|${dev}0|$flags_default_type
GRES_PARSABLE\[mps\](200):tesla|4|2,3|(null)|${dev}1|$flags_file_type_shared
GRES_PARSABLE\[gpu\](1):tesla|4|2,3|(null)|${dev}1|$flags_default_type
GRES_PARSABLE\[mps\](300):tesla|4|2,3|(null)|${dev}3|$flags_file_type_shared
GRES_PARSABLE\[gpu\](1):tesla|4|2,3|(null)|${dev}3|$flags_default_type
"
testproc_alias "c56" test_cfg "c56" $slurm_conf_gres $gres_conf $fake_gpus_conf $expected_output
# ==============================================================================
# ==============================================================================
# # Tests dX - Test Flags parsing with and without AutoDetect
# ==============================================================================
# ==============================================================================
# ##############################################################################
# # Test d1 - Default (all envs set)
# ##############################################################################
set slurm_conf_gres "gpu:a100:4"
set gres_conf "
# This file was autogenerated by $test_name
Name=gpu Type=a100 File=${dev}0 Cores=0-1
Name=gpu Type=a100 File=${dev}1 Cores=0-1
Name=gpu Type=a100 File=${dev}2 Cores=0-1
Name=gpu Type=a100 File=${dev}3 Cores=0-1
"
set fake_gpus_conf ""
set expected_output "
GRES_PARSABLE\[gpu\](1):a100|4|0-1|(null)|${dev}0|$flags_default_type
GRES_PARSABLE\[gpu\](1):a100|4|0-1|(null)|${dev}1|$flags_default_type
GRES_PARSABLE\[gpu\](1):a100|4|0-1|(null)|${dev}2|$flags_default_type
GRES_PARSABLE\[gpu\](1):a100|4|0-1|(null)|${dev}3|$flags_default_type
"
testproc_alias "d1" test_cfg "d1" $slurm_conf_gres $gres_conf $fake_gpus_conf $expected_output
# ##############################################################################
# # Test d2 - No envs set
# ##############################################################################
set slurm_conf_gres "gpu:a100:4"
set gres_conf "
# This file was autogenerated by $test_name
Name=gpu Type=a100 File=${dev}0 Cores=0-1 Flags=no_gpu_env
Name=gpu Type=a100 File=${dev}1 Cores=0-1 Flags=no_gpu_env
Name=gpu Type=a100 File=${dev}2 Cores=0-1 Flags=no_gpu_env
Name=gpu Type=a100 File=${dev}3 Cores=0-1 Flags=no_gpu_env
"
set fake_gpus_conf ""
set expected_output "
GRES_PARSABLE\[gpu\](1):a100|4|0-1|(null)|${dev}0|$flags_file_type
GRES_PARSABLE\[gpu\](1):a100|4|0-1|(null)|${dev}1|$flags_file_type
GRES_PARSABLE\[gpu\](1):a100|4|0-1|(null)|${dev}2|$flags_file_type
GRES_PARSABLE\[gpu\](1):a100|4|0-1|(null)|${dev}3|$flags_file_type
"
testproc_alias "d2" test_cfg "d2" $slurm_conf_gres $gres_conf $fake_gpus_conf $expected_output
# ##############################################################################
# # Test d3 - Test with NodeName
# ##############################################################################
set slurm_conf_gres "gpu:a100:4"
set gres_conf "
# This file was autogenerated by $test_name
NodeName=$nodename Name=gpu Type=a100 File=${dev}0 Cores=0-1 Flags=amd_gpu_env
NodeName=$nodename Name=gpu Type=a100 File=${dev}1 Cores=0-1 Flags=amd_gpu_env
NodeName=$nodename_diff Name=gpu Type=a100 File=${dev}2 Cores=0-1 Flags=nvidia_gpu_env
NodeName=$nodename_diff Name=gpu Type=a100 File=${dev}3 Cores=0-1 Flags=nvidia_gpu_env
"
set fake_gpus_conf ""
set expected_output "
GRES_PARSABLE\[gpu\](1):a100|4|0-1|(null)|${dev}0|$flags_file_type,ENV_RSMI
GRES_PARSABLE\[gpu\](1):a100|4|0-1|(null)|${dev}1|$flags_file_type,ENV_RSMI
"
testproc_alias "d3" test_cfg "d3" $slurm_conf_gres $gres_conf $fake_gpus_conf $expected_output
# ##############################################################################
# # Test d4 - AutoDetect match with conf does NOT overwrite no_gpu_env
# ##############################################################################
set slurm_conf_gres "gpu:a100:4"
set gres_conf "
# This file was autogenerated by $test_name
Name=gpu Type=a100 File=${dev}0 Cores=0-1 Flags=no_gpu_env
Name=gpu Type=a100 File=${dev}\[1-3\] Cores=0-1 Flags=no_gpu_env
"
set fake_gpus_conf "
# This file was autogenerated by $test_name
a100|4|0-1|(null)|${dev}0|(null)|nvidia_gpu_env
"
set expected_output "
GRES_PARSABLE\[gpu\](1):a100|4|0-1|(null)|${dev}0|$flags_file_type
GRES_PARSABLE\[gpu\](1):a100|4|0-1|(null)|${dev}1|$flags_file_type
GRES_PARSABLE\[gpu\](1):a100|4|0-1|(null)|${dev}2|$flags_file_type
GRES_PARSABLE\[gpu\](1):a100|4|0-1|(null)|${dev}3|$flags_file_type
"
testproc_alias "d4" test_cfg "d4" $slurm_conf_gres $gres_conf $fake_gpus_conf $expected_output
# ##############################################################################
# # Test d5 - Test that AutoDetect and conf can match with correct Flags
# ##############################################################################
set slurm_conf_gres "gpu:a100:4"
set gres_conf "
# This file was autogenerated by $test_name
Name=gpu Type=a100 File=${dev}0 Cores=0-1 Flags=nvidia_gpu_env
Name=gpu Type=a100 File=${dev}\[1-3\] Cores=0-1 Flags=nvidia_gpu_env
"
set fake_gpus_conf "
# This file was autogenerated by $test_name
a100|4|0-1|(null)|${dev}0|(null)|nvidia_gpu_env
"
set expected_output "
GRES_PARSABLE\[gpu\](1):a100|4|0-1|(null)|${dev}0|$flags_file_type,ENV_NVML
GRES_PARSABLE\[gpu\](1):a100|4|0-1|(null)|${dev}1|$flags_file_type,ENV_NVML
GRES_PARSABLE\[gpu\](1):a100|4|0-1|(null)|${dev}2|$flags_file_type,ENV_NVML
GRES_PARSABLE\[gpu\](1):a100|4|0-1|(null)|${dev}3|$flags_file_type,ENV_NVML
"
testproc_alias "d5" test_cfg "d5" $slurm_conf_gres $gres_conf $fake_gpus_conf $expected_output
# ##############################################################################
# # Test d6 - Test that AutoDetect works with no conf
# ##############################################################################
set slurm_conf_gres "gpu:a100:4"
set gres_conf "
# This file was autogenerated by $test_name
Name=gpu Type=a100 File=${dev}\[1-3\] Cores=0-1 Flags=nvidia_gpu_env
"
set fake_gpus_conf "
# This file was autogenerated by $test_name
a100|4|0-1|(null)|${dev}0|(null)|nvidia_gpu_env
"
set expected_output "
GRES_PARSABLE\[gpu\](1):a100|4|0-1|(null)|${dev}0|$flags_file_type,ENV_NVML
GRES_PARSABLE\[gpu\](1):a100|4|0-1|(null)|${dev}1|$flags_file_type,ENV_NVML
GRES_PARSABLE\[gpu\](1):a100|4|0-1|(null)|${dev}2|$flags_file_type,ENV_NVML
GRES_PARSABLE\[gpu\](1):a100|4|0-1|(null)|${dev}3|$flags_file_type,ENV_NVML
"
testproc_alias "d6" test_cfg "d6" $slurm_conf_gres $gres_conf $fake_gpus_conf $expected_output
# ##############################################################################
# # Test d7 - Test that AutoDetect works with no conf, and other conf different
# NOTE: The stepd will combine all env flags of records on the same
# node, so in reality, both CUDA_* and ROCR_* would be set on all.
# ##############################################################################
set slurm_conf_gres "gpu:a100:4"
set gres_conf "
# This file was autogenerated by $test_name
Name=gpu Type=a100 File=${dev}\[1-3\] Cores=0-1 Flags=amd_gpu_env
"
set fake_gpus_conf "
# This file was autogenerated by $test_name
a100|4|0-1|(null)|${dev}0|(null)|nvidia_gpu_env
"
set expected_output "
GRES_PARSABLE\[gpu\](1):a100|4|0-1|(null)|${dev}0|$flags_file_type,ENV_NVML
GRES_PARSABLE\[gpu\](1):a100|4|0-1|(null)|${dev}1|$flags_file_type,ENV_RSMI
GRES_PARSABLE\[gpu\](1):a100|4|0-1|(null)|${dev}2|$flags_file_type,ENV_RSMI
GRES_PARSABLE\[gpu\](1):a100|4|0-1|(null)|${dev}3|$flags_file_type,ENV_RSMI
"
testproc_alias "d7" test_cfg "d7" $slurm_conf_gres $gres_conf $fake_gpus_conf $expected_output
# ##############################################################################
# # Test d8 - Test that flags propagate to next GRES line
# ##############################################################################
set slurm_conf_gres "gpu:a100:4"
set gres_conf "
# This file was autogenerated by $test_name
Name=gpu Type=a100 File=${dev}0 Cores=0-1 Flags=amd_gpu_env,nvidia_gpu_env
Name=gpu Type=a100 File=${dev}1 Cores=0-1
Name=gpu Type=a100 File=${dev}2 Cores=0-1
Name=gpu Type=a100 File=${dev}3 Cores=0-1
"
set fake_gpus_conf ""
set expected_output "
GRES_PARSABLE\[gpu\](1):a100|4|0-1|(null)|${dev}0|$flags_file_type,ENV_NVML,ENV_RSMI
GRES_PARSABLE\[gpu\](1):a100|4|0-1|(null)|${dev}1|$flags_file_type,ENV_NVML,ENV_RSMI
GRES_PARSABLE\[gpu\](1):a100|4|0-1|(null)|${dev}2|$flags_file_type,ENV_NVML,ENV_RSMI
GRES_PARSABLE\[gpu\](1):a100|4|0-1|(null)|${dev}3|$flags_file_type,ENV_NVML,ENV_RSMI
"
testproc_alias "d8" test_cfg "d8" $slurm_conf_gres $gres_conf $fake_gpus_conf $expected_output
# ##############################################################################
# # Test d9 - Fatal on env flag mismatch
# ##############################################################################
set slurm_conf_gres "gpu:a100:4"
set gres_conf "
# This file was autogenerated by $test_name
Name=gpu Type=a100 File=${dev}\[0-1\] Cores=0-1 Flags=no_gpu_env
Name=gpu Type=a100 File=${dev}\[2-3\] Cores=0-1 Flags=nvidia_gpu_env
"
set fake_gpus_conf ""
set expected_output ""
set err_msgs $flags_mismatch_err
set expected_errs 1
testproc_alias "d9" test_cfg "d9" $slurm_conf_gres $gres_conf $fake_gpus_conf $expected_output $err_msgs $expected_errs
# ##############################################################################
# # Test d10 - Fatal on env flag mismatch with default
# ##############################################################################
set slurm_conf_gres "gpu:a100:4"
set gres_conf "
# This file was autogenerated by $test_name
Name=gpu Type=a100 File=${dev}0 Cores=0-1
Name=gpu Type=a100 File=${dev}\[1-3\] Cores=0-1 Flags=nvidia_gpu_env
"
set fake_gpus_conf "
# This file was autogenerated by $test_name
a100|4|0-1|(null)|${dev}0|(null)|nvidia_gpu_env
"
set expected_output ""
set err_msgs $flags_mismatch_err
set expected_errs 1
testproc_alias "d10" test_cfg "d10" $slurm_conf_gres $gres_conf $fake_gpus_conf $expected_output $err_msgs $expected_errs
# ##############################################################################
# # Test d11 - Make sure that env flag propagation works with different types
# of the same GRES name
# ##############################################################################
set slurm_conf_gres "gpu:a100:1,gpu:b100:1,gpu:c100:1,gpu:d100:1"
set gres_conf "
# This file was autogenerated by $test_name
Name=gpu Type=a100 File=${dev}0 Cores=0-1 Flags=amd_gpu_env
Name=gpu Type=b100 File=${dev}1 Cores=0-1
Name=gpu Type=c100 File=${dev}2 Cores=0-1
Name=gpu Type=d100 File=${dev}3 Cores=0-1
"
set fake_gpus_conf ""
set expected_output "
GRES_PARSABLE\[gpu\](1):a100|4|0-1|(null)|${dev}0|$flags_file_type,ENV_RSMI
GRES_PARSABLE\[gpu\](1):b100|4|0-1|(null)|${dev}1|$flags_file_type,ENV_RSMI
GRES_PARSABLE\[gpu\](1):c100|4|0-1|(null)|${dev}2|$flags_file_type,ENV_RSMI
GRES_PARSABLE\[gpu\](1):d100|4|0-1|(null)|${dev}3|$flags_file_type,ENV_RSMI
"
testproc_alias "d11" test_cfg "d11" $slurm_conf_gres $gres_conf $fake_gpus_conf $expected_output