| #!/usr/bin/env expect |
| ############################################################################ |
| # Purpose: Test gres.conf-specified and system-detected GPU device merging |
| # logic |
| ############################################################################ |
| # Copyright (C) SchedMD LLC. |
| # |
| # This file is part of Slurm, a resource management program. |
| # For details, see <https://slurm.schedmd.com/>. |
| # Please also read the included file: DISCLAIMER. |
| # |
| # Slurm is free software; you can redistribute it and/or modify it under |
| # the terms of the GNU General Public License as published by the Free |
| # Software Foundation; either version 2 of the License, or (at your option) |
| # any later version. |
| # |
| # Slurm is distributed in the hope that it will be useful, but WITHOUT ANY |
| # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS |
| # FOR A PARTICULAR PURPOSE. See the GNU General Public License for more |
| # details. |
| # |
| # You should have received a copy of the GNU General Public License along |
| # with Slurm; if not, write to the Free Software Foundation, Inc., |
| # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. |
| ############################################################################ |
| source ./globals |
| |
| set test_prog "$test_name.prog" |
| set nodename_base "tux" |
| set nodename "${nodename_base}0" |
| set nodename_diff "${nodename_base}1" |
| set test_ulong_prog "$test_name.ulong" |
| set file_in "$test_dir/test_script" |
| set file_out "$test_dir/output" |
| # Assume unsigned long is 64 bits unless overwritten |
| set ulong_bits 64 |
| set dup_err "error: gpu duplicate device file name" |
| set cpu_set_err "fatal: _set_cpu_set_bitstr: cpu_set_bitstr is empty" |
| set mismatch_err "error: This GPU specified in \\\[slurm\\\|gres\\\].conf has mismatching Cores or Links" |
| set no_name_err "error: Invalid GRES data, no type name" |
| set parse_err "error: Parse error in file" |
| set links_err "error: gres.conf: Ignoring invalid Link" |
| set conf_mismatch_err "warning: A line in gres.conf for GRES $re_word_str has $number more configured than expected in slurm.conf" |
| set slurm_conf_type_err "GRES in slurm.conf have a type while others do not" |
| set gres_conf_type_err "fatal: gres.conf for $re_word_str, some records have \"Type=\" specification while others do not" |
| set gres_conf_file_err "fatal: gres.conf for $re_word_str, some records have \"File\" specification while others do not" |
| set flags_mismatch_err "fatal: Invalid GRES record name=${re_word_str} type=${re_word_str}: Flags (${re_word_str}) does not match env flags for previous GRES of same node and name" |
| set flags_no_gpu_err "fatal: Invalid GRES record name=${re_word_str} type=${re_word_str}: Flags (${re_word_str}) contains \"no_gpu_env\", which must be mutually exclusive to all other GRES env flags of same node and name" |
| set key_parse_err "error: _parse_next_key: Parsing error at unrecognized key" |
| set flags_default "HAS_FILE,ENV_NVML,ENV_RSMI,ENV_ONEAPI,ENV_OPENCL,ENV_DEFAULT" |
| set flags_default_type "HAS_FILE,HAS_TYPE,ENV_NVML,ENV_RSMI,ENV_ONEAPI,ENV_OPENCL,ENV_DEFAULT" |
| set flags_default_type_shared "HAS_FILE,HAS_TYPE,ENV_NVML,ENV_RSMI,ENV_ONEAPI,ENV_OPENCL,ENV_DEFAULT,SHARED,ONE_SHARING" |
| set flags_type "HAS_TYPE" |
| set flags_file "HAS_FILE" |
| set flags_file_type "HAS_FILE,HAS_TYPE" |
| set flags_file_type_shared "HAS_FILE,HAS_TYPE,SHARED,ONE_SHARING" |
| |
| set debug_mode 0 |
| |
| if {[get_config_param "SwitchType"] eq "switch/cray"} { |
| # Many Cray-specific changes required in slurm.conf test files |
| skip "This test can not run on a Cray system" |
| } |
| |
| # |
| ## testproc configuration |
| # |
| # Reduce verbosity |
| set testsuite_testproc_log_calls no |
| |
| proc cleanup {} { |
| global test_prog test_ulong_prog |
| |
| # Delete generated files |
| file delete $test_prog |
| file delete $test_ulong_prog |
| } |
| |
| # Run the test under valgrind if any arguments are given |
| set use_valgrind 0 |
| |
| # log_level_t values in log.h |
| # NOTE: The test requires output from log level INFO, so can't go any lower |
| set SLURM_LOG_LEVEL_INFO 3 |
| set SLURM_LOG_LEVEL_VERBOSE 4 |
| set SLURM_LOG_LEVEL_DEBUG 5 |
| set SLURM_LOG_LEVEL_DEBUG2 6 |
| set SLURM_LOG_LEVEL_DEBUG3 7 |
| |
| set debug_level $SLURM_LOG_LEVEL_INFO |
| |
| if {[lindex $argv 0] == "v" || [lindex $argv 0] == "valgrind"} { |
| set use_valgrind 1 |
| log_info "Running tests under valgrind!" |
| } elseif {[lindex $argv 0] == "i" || [lindex $argv 0] == "info"} { |
| set debug_mode 1 |
| log_info "Running tests under debug mode with log level INFO" |
| set debug_level $SLURM_LOG_LEVEL_INFO |
| } elseif {[lindex $argv 0] == "verbose"} { |
| set debug_mode 1 |
| log_info "Running tests under debug mode with log level VERBOSE" |
| set debug_level $SLURM_LOG_LEVEL_VERBOSE |
| } elseif {[lindex $argv 0] == "d" || [lindex $argv 0] == "d1" || [lindex $argv 0] == "debug"} { |
| set debug_mode 1 |
| log_info "Running tests under debug mode with log level DEBUG" |
| set debug_level $SLURM_LOG_LEVEL_DEBUG |
| } elseif {[lindex $argv 0] == "d2" || [lindex $argv 0] == "debug2"} { |
| set debug_mode 1 |
| log_info "Running tests under debug mode with log level DEBUG2" |
| set debug_level $SLURM_LOG_LEVEL_DEBUG2 |
| } elseif {[lindex $argv 0] == "d3" || [lindex $argv 0] == "debug3"} { |
| set debug_mode 1 |
| log_info "Running tests under debug mode with log level DEBUG3" |
| set debug_level $SLURM_LOG_LEVEL_DEBUG3 |
| } |
| |
| proc touch_file {file} { |
| exec touch $file |
| } |
| |
| proc generate_file {text filepath} { |
| exec echo $text > $filepath |
| } |
| |
| # Create or overwrite the slurm.conf file used by the test runner program |
| proc create_slurm_conf {} { |
| global test_name test_dir |
| # Contain slurm.conf data to this file |
| # This is all we need to trigger loading the GRES GPU plugin |
| set slurm_conf " |
| # This file was autogenerated by $test_name |
| ControlMachine=test_machine |
| ClusterName=test_cluster |
| GresTypes=gpu,mps,nic,mic,tmpdisk |
| " |
| |
| # NOTE: There are no node definitions - node name and node gres info |
| # are passed into the test program directly, not through this slurm.conf |
| |
| generate_file $slurm_conf $test_dir/slurm.conf |
| } |
| |
| create_slurm_conf |
| # Set up dummy device files for testing. They just need to exist |
| set dev "$test_dir/nvidia" |
| set dev0 "${dev}0" |
| set dev1 "${dev}1" |
| set dev2 "${dev}2" |
| set dev3 "${dev}3" |
| set dev4 "${dev}4" |
| set dev5 "${dev}5" |
| set dev6 "${dev}6" |
| set dev7 "${dev}7" |
| set dev8 "${dev}8" |
| touch_file $dev0 |
| touch_file $dev1 |
| touch_file $dev2 |
| touch_file $dev3 |
| touch_file $dev4 |
| touch_file $dev5 |
| touch_file $dev6 |
| touch_file $dev7 |
| touch_file $dev8 |
| |
| |
| # Build test program |
| if {$use_valgrind} { |
| set rc [compile_against_libslurm -full $test_prog "-DUSING_VALGRIND"] |
| } else { |
| set rc [compile_against_libslurm -full $test_prog] |
| } |
| if {$rc} { |
| fail "Cannot compile test program" |
| } |
| |
| # Build program to determine size of unsigned long in C |
| set build_cmd "$bin_cc ${test_ulong_prog}.c -o ${test_ulong_prog}" |
| log_debug "Build command: $build_cmd" |
| eval exec $build_cmd |
| |
| set output [run_command_output -fail -nolog "./$test_ulong_prog"] |
| if {$output != $ulong_bits} { |
| log_warn "We are running on a non-64-bit architecture, where sizeof(unsigned long) * 8 = $output bits" |
| set ulong_bits $output |
| } |
| |
| # |
| # Tests if $test_prog actual output matches expected output |
| # |
| # test_minor - The minor test number (39.18.X). |
| # slurm_conf_gres - This is the `Gres=` value that would be in |
| # slurm.conf for this node (it's easier to pass it into |
| # the program directly than to regenerate a slurm.conf |
| # file with different node definitions). |
| # gres_conf - The gres.conf to use. |
| # fake_gpus_conf - The fake_gpus.conf to use. This file tells Slurm to |
| # pretend to have devices detected by system, in effect |
| # emulating the result of AutoDetect=nvml. |
| # output_expected - The expected "GRES_PARSABLE" output. |
| # err_msgs (optional) - The error message regex to look for. Use | for multiple. |
| # If using with $output_expected, it will only reliably |
| # match if $err_msgs appear BEFORE lines with |
| # GRES_PARSABLE. |
| # errs_expected (optional) - The number of times you expect to see $err_msgs in |
| # the output. Defaults to 0. If this number isn't hit, |
| # then the test will fail. |
| # |
| proc test_cfg {test_minor slurm_conf_gres gres_conf fake_gpus_conf output_expected {err_msgs ""} {errs_expected 0} } { |
| global test_dir nodename test_prog re_word_str |
| global test_name use_valgrind file_in file_out |
| global bin_cat debug_mode debug_level |
| |
| log_info "====$test_name.$test_minor====" |
| |
| generate_file $gres_conf $test_dir/gres.conf |
| generate_file $fake_gpus_conf $test_dir/fake_gpus.conf |
| |
| if {$use_valgrind} { |
| make_bash_script $file_in " |
| valgrind --tool=memcheck --error-limit=no --leak-check=full --show-reachable=yes --max-stackframe=16777216 --num-callers=20 --child-silent-after-fork=yes --track-origins=yes ./$test_prog \"$test_dir\" \"$nodename\" \"$slurm_conf_gres\" $debug_level > $file_out 2>&1 |
| $bin_cat $file_out |
| rm -rf $file_out" |
| } else { |
| make_bash_script $file_in " |
| ./$test_prog \"$test_dir\" \"$nodename\" \"$slurm_conf_gres\" $debug_level > $file_out 2>&1 |
| $bin_cat $file_out |
| rm -rf $file_out" |
| } |
| |
| ## |
| ### Parse the `GRES_PARSABLE` output |
| ## |
| # The order of GPUs is important because it directly corresponds to the |
| # bits of the GRES bitmaps used to track the GPUs, and this shouldn't |
| # change unless we deliberately change it. |
| # |
| # Also, we will eventually want to sort GPUs by PCI bus ID if AutoDetect |
| # is used, so we need to test the order to make sure that works. We may |
| # also want to guarantee that the GPU order in gres.conf is preserved if |
| # AutoDetect is NOT used, so users can manually specify the PCI bus |
| # order. |
| if {$debug_mode} { |
| set output [run_command_output -fail "$file_in"] |
| } else { |
| set output [run_command_output -fail -nolog "$file_in"] |
| } |
| set actual [regexp -all -inline "GRES_PARSABLE$re_word_str" $output] |
| |
| # Check for expected errors |
| if {$err_msgs != ""} { |
| set errs_actual [regexp -all "$err_msgs" $output] |
| } else { |
| set errs_actual 0 |
| } |
| |
| set expected [regexp -all -inline "GRES_PARSABLE$re_word_str" $output_expected] |
| |
| file delete $file_in |
| |
| # Check the output with what is expected |
| log_debug "Actual:" |
| foreach {value} $actual { |
| log_debug "$value" |
| } |
| if {![subtest {$actual == $expected} "Expected output does not match actual output"]} { |
| log_debug "Expected:" |
| foreach {value} $expected { |
| log_debug "$value" |
| } |
| } |
| |
| # Check for expected errors, if applicable |
| if {$err_msgs != ""} { |
| subtest {$errs_actual == $errs_expected} "Verify expected errors" "$errs_actual != $errs_expected (regexp='$err_msgs')" |
| } |
| } |
| |
| ################################################################################ |
| # NOTES: |
| ################################################################################ |
| # fake_gpus.conf is of the following format, with each line representing one |
| # GPU device: |
| # |
| # type|cpu_count|cpu_range|links|device_files[|unique_id] |
| # |
| # Optionally, cpu_range allows special keywords starting in `~` to aid in |
| # testing the conversion from an NVML cpu affinity array to Slurm bitstring. |
| # |
| # The GRES_PARSABLE output is of the following format: |
| # |
| # GRES_PARSABLE[name](device_count):type|cpu_count|cpu_range|links|device_files[|unique_id] |
| # |
| # To better decipher the tests, temporarily increase the log verbosity in |
| # test39.18.prog.c. E.g.: |
| # |
| # opts.stderr_level = SLURM_LOG_LEVEL_DEBUG2; |
| # |
| # Note that there is a known issue with Expect where the regex parsing fails |
| # when the output is somewhat large. So to avoid random errors, each test output |
| # should be kept concise. |
| # |
| ################################################################################ |
| |
| |
| |
| # ############################################################################## |
| # ------------------------------------------------------------------------------ |
| # ------------------------------------------------------------------------------ |
| # ############################################################################## |
| |
| # ############################################################################## |
| # # Test a2 - Type-less gpu specification in slurm.conf and empty gres.conf |
| # # NOTE the device numbers being out of order |
| # ############################################################################## |
| |
| set slurm_conf_gres "gpu:4" |
| set gres_conf "" |
| set fake_gpus_conf " |
| # This file was autogenerated by $test_name |
| tesla|4|0-1|(null)|${dev}1 |
| tesla|4|2-3|(null)|${dev}3 |
| tesla|4|2-3|(null)|${dev}4 |
| tesla|4|0-1|(null)|${dev}2 |
| " |
| set expected_output " |
| GRES_PARSABLE\[gpu\](1):(null)|4|0-1|(null)|${dev}1|$flags_file |
| GRES_PARSABLE\[gpu\](1):(null)|4|0-1|(null)|${dev}2|$flags_file |
| GRES_PARSABLE\[gpu\](1):(null)|4|2-3|(null)|${dev}3|$flags_file |
| GRES_PARSABLE\[gpu\](1):(null)|4|2-3|(null)|${dev}4|$flags_file |
| " |
| testproc_alias "a2" test_cfg "a2" $slurm_conf_gres $gres_conf $fake_gpus_conf $expected_output |
| |
| # ############################################################################## |
| # # Test a4 - Test empty and null identifiers in fake_gpus.conf |
| # ############################################################################## |
| |
| set slurm_conf_gres "gpu:8" |
| set gres_conf "" |
| set fake_gpus_conf " |
| # This file was autogenerated by $test_name |
| tesla|4|(null)|(null)|(null) |
| tesla|4|(null)|(null)|${dev}1 |
| tesla|4|(null)|-1,0|(null) |
| tesla|4|(null)|-1,0|${dev}3 |
| tesla|4|0-1|(null)|(null) |
| tesla|4|0-1|(null)|${dev}2 |
| tesla|4|0-1|-1,0|(null) |
| tesla|4|0-1|-1,0|${dev}4 |
| " |
| set expected_output " |
| GRES_PARSABLE\[gpu\](1):(null)|4|(null)|-1,0|(null)| |
| GRES_PARSABLE\[gpu\](1):(null)|4|0-1|-1,0|(null)| |
| GRES_PARSABLE\[gpu\](1):(null)|4|(null)|(null)|(null)| |
| GRES_PARSABLE\[gpu\](1):(null)|4|0-1|(null)|(null)| |
| GRES_PARSABLE\[gpu\](1):(null)|4|(null)|-1,0|${dev}3|$flags_file |
| GRES_PARSABLE\[gpu\](1):(null)|4|0-1|-1,0|${dev}4|$flags_file |
| GRES_PARSABLE\[gpu\](1):(null)|4|(null)|(null)|${dev}1|$flags_file |
| GRES_PARSABLE\[gpu\](1):(null)|4|0-1|(null)|${dev}2|$flags_file |
| " |
| testproc_alias "a4" test_cfg "a4" $slurm_conf_gres $gres_conf $fake_gpus_conf $expected_output |
| |
| |
| |
| # ############################################################################## |
| # The following tests are NVML-specific. |
| # They will only execute if HAVE_NVML = 1 in config.h. |
| # In fake_gpus_conf, a CPU range starting with `~` will trigger the GPU plugin's |
| # gpu_p_test_cpu_conv(). In order for this to exercise gpu/nvml-specific code, |
| # explicitly specify `AutoDetect=nvml` in the ad-hoc gres.conf. If not, |
| # gpu/generic will be used and CPU ranges will be set to null, failing the tests |
| # ############################################################################## |
| |
| if [have_nvml] { |
| |
| # ############################################################################## |
| # # Test a6 - Test conversion of NVML-style cpu affinity array to Slurm bitstr |
| # Convert device CPU affinity of 20 ulongs with all bits set |
| # ############################################################################## |
| |
| set arr_len 20 |
| set cpu_array [string repeat "X" $arr_len] |
| set cpus_count [expr $arr_len*$ulong_bits] |
| set cpus_count_m1 [expr $cpus_count-1] |
| |
| set slurm_conf_gres "gpu:1" |
| set gres_conf "AutoDetect=nvml" |
| set fake_gpus_conf "# This file was autogenerated by $test_name |
| tesla|$cpus_count|~$cpu_array|(null)|${dev}1 |
| " |
| set expected_output " |
| GRES_PARSABLE\[gpu\](1):(null)|$cpus_count|0-$cpus_count_m1|(null)|${dev}1|$flags_file |
| " |
| testproc_alias "a6" test_cfg "a6" $slurm_conf_gres $gres_conf $fake_gpus_conf $expected_output |
| |
| # ############################################################################## |
| # # Test a8 - Test conversion of NVML-style cpu affinity array to Slurm bitstr |
| # Convert device CPU affinity of 1 ulong with all bits set |
| # ############################################################################## |
| |
| set arr_len 1 |
| set cpu_array [string repeat "X" $arr_len] |
| set cpus_count [expr $arr_len*$ulong_bits] |
| set cpus_count_m1 [expr $cpus_count-1] |
| |
| set slurm_conf_gres "gpu:1" |
| set gres_conf "AutoDetect=nvml" |
| set fake_gpus_conf "# This file was autogenerated by $test_name |
| tesla|$cpus_count|~$cpu_array|(null)|${dev}1 |
| " |
| set expected_output " |
| GRES_PARSABLE\[gpu\](1):(null)|$cpus_count|0-$cpus_count_m1|(null)|${dev}1|$flags_file |
| " |
| testproc_alias "a8" test_cfg "a8" $slurm_conf_gres $gres_conf $fake_gpus_conf $expected_output |
| |
| # ############################################################################## |
| # # Test a10 - Test conversion of NVML-style cpu affinity array to Slurm bitstr |
| # Convert device CPU affinity of 3 ulongs with all bits set |
| # ############################################################################## |
| |
| set arr_len 3 |
| set cpu_array [string repeat "X" $arr_len] |
| set cpus_count [expr $arr_len*$ulong_bits] |
| set cpus_count_m1 [expr $cpus_count-1] |
| |
| set slurm_conf_gres "gpu:1" |
| set gres_conf "AutoDetect=nvml" |
| set fake_gpus_conf "# This file was autogenerated by $test_name |
| tesla|$cpus_count|~$cpu_array|(null)|${dev}1 |
| " |
| set expected_output " |
| GRES_PARSABLE\[gpu\](1):(null)|$cpus_count|0-$cpus_count_m1|(null)|${dev}1|$flags_file |
| " |
| testproc_alias "a10" test_cfg "a10" $slurm_conf_gres $gres_conf $fake_gpus_conf $expected_output |
| |
| # ############################################################################## |
| # # Test a12 - Test conversion of NVML-style cpu affinity array to Slurm bitstr |
| # Convert device CPU affinity for max # of CPUs supported (2^15) |
| # ############################################################################## |
| |
| set max_cpus [expr pow(2, 15)] |
| set arr_len [expr int($max_cpus/$ulong_bits)] |
| set cpus_count [expr $arr_len*$ulong_bits] |
| set cpus_count_m1 [expr $cpus_count-1] |
| |
| set slurm_conf_gres "gpu:1" |
| set gres_conf "AutoDetect=nvml" |
| set fake_gpus_conf "# This file was autogenerated by $test_name |
| tesla|$cpus_count|~max|(null)|${dev}1 |
| " |
| set expected_output " |
| GRES_PARSABLE\[gpu\](1):(null)|$cpus_count|0-$cpus_count_m1|(null)|${dev}1|$flags_file |
| " |
| testproc_alias "a12" test_cfg "a12" $slurm_conf_gres $gres_conf $fake_gpus_conf $expected_output |
| |
| # ############################################################################## |
| # # Test a14 - Test conversion of NVML-style CPU affinity array to Slurm bitstr |
| # Convert device CPU affinity for all zeros (expect this to fatal) |
| # ############################################################################## |
| |
| set slurm_conf_gres "gpu:1" |
| set gres_conf "AutoDetect=nvml" |
| set fake_gpus_conf " |
| # This file was autogenerated by $test_name |
| tesla|0|~zero|(null)|${dev}1 |
| " |
| set expected_output "" |
| set err_msgs $cpu_set_err |
| set expected_errs 1 |
| testproc_alias "a14" test_cfg "a14" $slurm_conf_gres $gres_conf $fake_gpus_conf $expected_output $err_msgs $expected_errs |
| |
| # ############################################################################## |
| # # Test a16 - Test conversion of NVML-style cpu affinity array to Slurm bitstr |
| # Convert device CPU affinity for CPU 0 set out of 16 total CPUs |
| # ############################################################################## |
| |
| set slurm_conf_gres "gpu:1" |
| set gres_conf "AutoDetect=nvml" |
| set fake_gpus_conf " |
| # This file was autogenerated by $test_name |
| tesla|16|~one|(null)|${dev}1 |
| " |
| set expected_output " |
| GRES_PARSABLE\[gpu\](1):(null)|16|0|(null)|${dev}1|$flags_file |
| " |
| testproc_alias "a16" test_cfg "a16" $slurm_conf_gres $gres_conf $fake_gpus_conf $expected_output |
| |
| # ############################################################################## |
| # # Test a18 - Test conversion of NVML-style cpu affinity array to Slurm bitstr |
| # Convert device CPU affinity for CPUs 0-1 set out of 4 total CPUs |
| # ############################################################################## |
| |
| set slurm_conf_gres "gpu:1" |
| set gres_conf "AutoDetect=nvml" |
| set fake_gpus_conf " |
| # This file was autogenerated by $test_name |
| tesla|4|~three|(null)|${dev}1 |
| " |
| set expected_output " |
| GRES_PARSABLE\[gpu\](1):(null)|4|0-1|(null)|${dev}1|$flags_file |
| " |
| testproc_alias "a18" test_cfg "a18" $slurm_conf_gres $gres_conf $fake_gpus_conf $expected_output |
| |
| # ############################################################################## |
| # # Test a20 - Test conversion of NVML-style cpu affinity array to Slurm bitstr |
| # Convert device CPU affinity for CPUs 8-15 set out of 16 CPUs |
| # ############################################################################## |
| |
| set slurm_conf_gres "gpu:1" |
| set gres_conf "AutoDetect=nvml" |
| set fake_gpus_conf " |
| # This file was autogenerated by $test_name |
| tesla|16|~half|(null)|${dev}1 |
| " |
| set expected_output " |
| GRES_PARSABLE\[gpu\](1):(null)|16|8-15|(null)|${dev}1|$flags_file |
| " |
| testproc_alias "a20" test_cfg "a20" $slurm_conf_gres $gres_conf $fake_gpus_conf $expected_output |
| |
| # ############################################################################## |
| # End NVML-specific tests |
| # ############################################################################## |
| |
| } else { |
| log_warn "====Omitting NVML-specific tests====" |
| } |
| |
| # ############################################################################## |
| # # Test a22 - Test that when no GRES type is specified in slurm.conf, |
| # the GRES type is NOT set to what the system detects, but rather |
| # is set to NULL. |
| # ############################################################################## |
| |
| set slurm_conf_gres "gpu:4" |
| set gres_conf "" |
| set fake_gpus_conf " |
| # This file was autogenerated by $test_name |
| tesla_v100-pcie-16gb|4|0-1|(null)|${dev}1 |
| tesla_v100-pcie-16gb|4|2-3|(null)|${dev}3 |
| tesla_v100-pcie-16gb|4|2-3|(null)|${dev}4 |
| tesla_v100-pcie-16gb|4|0-1|(null)|${dev}2 |
| " |
| set expected_output " |
| GRES_PARSABLE\[gpu\](1):(null)|4|0-1|(null)|${dev}1|$flags_file |
| GRES_PARSABLE\[gpu\](1):(null)|4|0-1|(null)|${dev}2|$flags_file |
| GRES_PARSABLE\[gpu\](1):(null)|4|2-3|(null)|${dev}3|$flags_file |
| GRES_PARSABLE\[gpu\](1):(null)|4|2-3|(null)|${dev}4|$flags_file |
| " |
| testproc_alias "a22" test_cfg "a22" $slurm_conf_gres $gres_conf $fake_gpus_conf $expected_output |
| |
| # ############################################################################## |
| # # Test a24 - Test that the GRES type specified in slurm.conf overrides the |
| # GRES type specified in the system, if it's a substring. |
| # ############################################################################## |
| |
| set slurm_conf_gres "gpu:v100:4" |
| set gres_conf "" |
| set fake_gpus_conf " |
| # This file was autogenerated by $test_name |
| tesla_v100-pcie-16gb|4|0-1|(null)|${dev}1 |
| tesla_v100-pcie-16gb|4|2-3|(null)|${dev}3 |
| tesla_v100-pcie-16gb|4|2-3|(null)|${dev}4 |
| tesla_v100-pcie-16gb|4|0-1|(null)|${dev}2 |
| " |
| set expected_output " |
| GRES_PARSABLE\[gpu\](1):v100|4|0-1|(null)|${dev}1|$flags_file_type |
| GRES_PARSABLE\[gpu\](1):v100|4|0-1|(null)|${dev}2|$flags_file_type |
| GRES_PARSABLE\[gpu\](1):v100|4|2-3|(null)|${dev}3|$flags_file_type |
| GRES_PARSABLE\[gpu\](1):v100|4|2-3|(null)|${dev}4|$flags_file_type |
| " |
| testproc_alias "a24" test_cfg "a24" $slurm_conf_gres $gres_conf $fake_gpus_conf $expected_output |
| |
| |
| # ############################################################################## |
| # # Test a26 - Test that the GRES type specified in slurm.conf doesn't need |
| # to match the case of the system-detected GRES type, and that it |
| # can also match the first part. |
| # ############################################################################## |
| |
| set slurm_conf_gres "gpu:TESLA_V100:4" |
| set gres_conf "" |
| set fake_gpus_conf " |
| # This file was autogenerated by $test_name |
| tesla_v100-pcie-16gb|4|0-1|(null)|${dev}1 |
| tesla_v100-pcie-16gb|4|2-3|(null)|${dev}3 |
| tesla_v100-pcie-16gb|4|2-3|(null)|${dev}4 |
| tesla_v100-pcie-16gb|4|0-1|(null)|${dev}2 |
| " |
| set expected_output " |
| GRES_PARSABLE\[gpu\](1):TESLA_V100|4|0-1|(null)|${dev}1|$flags_file_type |
| GRES_PARSABLE\[gpu\](1):TESLA_V100|4|0-1|(null)|${dev}2|$flags_file_type |
| GRES_PARSABLE\[gpu\](1):TESLA_V100|4|2-3|(null)|${dev}3|$flags_file_type |
| GRES_PARSABLE\[gpu\](1):TESLA_V100|4|2-3|(null)|${dev}4|$flags_file_type |
| " |
| testproc_alias "a26" test_cfg "a26" $slurm_conf_gres $gres_conf $fake_gpus_conf $expected_output |
| |
| |
| # ############################################################################## |
| # # Test a28 - Test that the GRES type specified in slurm.conf can match the |
| # last part of the system-detected GRES type |
| # ############################################################################## |
| |
| set slurm_conf_gres "gpu:pcie-16gb:4" |
| set gres_conf "" |
| set fake_gpus_conf " |
| # This file was autogenerated by $test_name |
| tesla_v100-pcie-16gb|4|0-1|(null)|${dev}1 |
| tesla_v100-pcie-16gb|4|2-3|(null)|${dev}3 |
| tesla_v100-pcie-16gb|4|2-3|(null)|${dev}4 |
| tesla_v100-pcie-16gb|4|0-1|(null)|${dev}2 |
| " |
| set expected_output " |
| GRES_PARSABLE\[gpu\](1):pcie-16gb|4|0-1|(null)|${dev}1|$flags_file_type |
| GRES_PARSABLE\[gpu\](1):pcie-16gb|4|0-1|(null)|${dev}2|$flags_file_type |
| GRES_PARSABLE\[gpu\](1):pcie-16gb|4|2-3|(null)|${dev}3|$flags_file_type |
| GRES_PARSABLE\[gpu\](1):pcie-16gb|4|2-3|(null)|${dev}4|$flags_file_type |
| " |
| testproc_alias "a28" test_cfg "a28" $slurm_conf_gres $gres_conf $fake_gpus_conf $expected_output |
| |
| # ############################################################################## |
| # # Test a30 - Test that the GRES type specified in slurm.conf can match the |
| # full system-detected GRES type |
| # ############################################################################## |
| |
| set slurm_conf_gres "gpu:tesla_v100-pcie-16gb:4" |
| set gres_conf "" |
| set fake_gpus_conf " |
| # This file was autogenerated by $test_name |
| tesla_v100-pcie-16gb|4|0-1|(null)|${dev}1 |
| tesla_v100-pcie-16gb|4|2-3|(null)|${dev}3 |
| tesla_v100-pcie-16gb|4|2-3|(null)|${dev}4 |
| tesla_v100-pcie-16gb|4|0-1|(null)|${dev}2 |
| " |
| set expected_output " |
| GRES_PARSABLE\[gpu\](1):tesla_v100-pcie-16gb|4|0-1|(null)|${dev}1|$flags_file_type |
| GRES_PARSABLE\[gpu\](1):tesla_v100-pcie-16gb|4|0-1|(null)|${dev}2|$flags_file_type |
| GRES_PARSABLE\[gpu\](1):tesla_v100-pcie-16gb|4|2-3|(null)|${dev}3|$flags_file_type |
| GRES_PARSABLE\[gpu\](1):tesla_v100-pcie-16gb|4|2-3|(null)|${dev}4|$flags_file_type |
| " |
| testproc_alias "a30" test_cfg "a30" $slurm_conf_gres $gres_conf $fake_gpus_conf $expected_output |
| |
| # ############################################################################## |
| # # Test a32 - Test that GRES types work as expected when one GRES type is a |
| # substring of another GRES type (e.g. k20m + k20m1). See bug 7345 |
| # ############################################################################## |
| |
| set slurm_conf_gres "gpu:k20m:1,gpu:k20m1:1,gpu:v100:1" |
| set gres_conf "" |
| set fake_gpus_conf " |
| # This file was autogenerated by $test_name |
| tesla_k20m1|4|0-1|(null)|${dev}1 |
| tesla_k20m|4|0-1|(null)|${dev}2 |
| tesla_v100-sxm2-32gb|4|0-1|(null)|${dev}3 |
| " |
| set expected_output " |
| GRES_PARSABLE\[gpu\](1):k20m1|4|0-1|(null)|${dev}1|$flags_file_type |
| GRES_PARSABLE\[gpu\](1):k20m|4|0-1|(null)|${dev}2|$flags_file_type |
| GRES_PARSABLE\[gpu\](1):v100|4|0-1|(null)|${dev}3|$flags_file_type |
| " |
| testproc_alias "a32" test_cfg "a32" $slurm_conf_gres $gres_conf $fake_gpus_conf $expected_output |
| |
| # ############################################################################## |
| # # Test a34 - Test that only the first system device that matches a conf gres |
| # is used, and that the gres type is truncated. |
| # ############################################################################## |
| |
| set slurm_conf_gres "gpu:p100:1" |
| set gres_conf "" |
| set fake_gpus_conf " |
| # This file was autogenerated by $test_name |
| nvidia-p100|4|0-1|(null)|${dev}2 |
| nvidia-p100|4|0-1|(null)|${dev}6 |
| nvidia-p100|4|0-1|(null)|${dev}3 |
| nvidia-p100|4|0-1|(null)|${dev}4 |
| nvidia-p100|4|0-1|(null)|${dev}1 |
| nvidia-p100|4|0-1|(null)|${dev}5 |
| " |
| set expected_output " |
| GRES_PARSABLE\[gpu\](1):p100|4|0-1|(null)|${dev}1|$flags_file_type |
| " |
| testproc_alias "a34" test_cfg "a34" $slurm_conf_gres $gres_conf $fake_gpus_conf $expected_output |
| |
| # ############################################################################## |
| # # Test a36 - Test that the shorter GRES types don't match against the longer |
| # GRES types. |
| # ############################################################################## |
| |
| set slurm_conf_gres "gpu:aaaa:1,gpu:a:1,gpu:aa:1,gpu:aaa:1" |
| set gres_conf "" |
| set fake_gpus_conf " |
| # This file was autogenerated by $test_name |
| aa|4|0-1|(null)|${dev}2 |
| aaa|4|0-1|(null)|${dev}1 |
| a|4|0-1|(null)|${dev}3 |
| aaaa|4|0-1|(null)|${dev}4 |
| " |
| set expected_output " |
| GRES_PARSABLE\[gpu\](1):aaa|4|0-1|(null)|${dev}1|$flags_file_type |
| GRES_PARSABLE\[gpu\](1):aa|4|0-1|(null)|${dev}2|$flags_file_type |
| GRES_PARSABLE\[gpu\](1):a|4|0-1|(null)|${dev}3|$flags_file_type |
| GRES_PARSABLE\[gpu\](1):aaaa|4|0-1|(null)|${dev}4|$flags_file_type |
| " |
| |
| testproc_alias "a36" test_cfg "a36" $slurm_conf_gres $gres_conf $fake_gpus_conf $expected_output |
| |
| # ############################################################################## |
| # # Test a38 - Test that the shorter GRES types don't match against the longer |
| # GRES types prematurely. |
| # ############################################################################## |
| |
| set slurm_conf_gres "gpu:k20:2,gpu:k20m:4" |
| set gres_conf "" |
| set fake_gpus_conf " |
| # This file was autogenerated by $test_name |
| k20|4|0-1|(null)|${dev}2 |
| k20m|4|0-1|(null)|${dev}1 |
| k20|4|0-1|(null)|${dev}3 |
| k20m|4|0-1|(null)|${dev}4 |
| k20|4|0-1|(null)|${dev}5 |
| k20m|4|0-1|(null)|${dev}7 |
| k20|4|0-1|(null)|${dev}6 |
| k20m|4|0-1|(null)|${dev}8 |
| " |
| set expected_output " |
| GRES_PARSABLE\[gpu\](1):k20m|4|0-1|(null)|${dev}1|$flags_file_type |
| GRES_PARSABLE\[gpu\](1):k20|4|0-1|(null)|${dev}2|$flags_file_type |
| GRES_PARSABLE\[gpu\](1):k20|4|0-1|(null)|${dev}3|$flags_file_type |
| GRES_PARSABLE\[gpu\](1):k20m|4|0-1|(null)|${dev}4|$flags_file_type |
| GRES_PARSABLE\[gpu\](1):k20m|4|0-1|(null)|${dev}7|$flags_file_type |
| GRES_PARSABLE\[gpu\](1):k20m|4|0-1|(null)|${dev}8|$flags_file_type |
| " |
| |
| testproc_alias "a38" test_cfg "a38" $slurm_conf_gres $gres_conf $fake_gpus_conf $expected_output |
| |
| # ############################################################################## |
| # # Test a39 - Show that shorter types will match against longer types if the |
| # longer types aren't used up, and that the longer type will be |
| # truncated accordingly. |
| # ############################################################################## |
| |
| set slurm_conf_gres "gpu:k20:2,gpu:k20m:2" |
| set gres_conf "" |
| set fake_gpus_conf " |
| # This file was autogenerated by $test_name |
| k20m|4|0-1|(null)|${dev}1 |
| k20m|4|0-1|(null)|${dev}2 |
| k20m|4|0-1|(null)|${dev}3 |
| k20|4|0-1|(null)|${dev}4 |
| k20|4|0-1|(null)|${dev}5 |
| " |
| set expected_output " |
| GRES_PARSABLE\[gpu\](1):k20m|4|0-1|(null)|${dev}1|$flags_file_type |
| GRES_PARSABLE\[gpu\](1):k20m|4|0-1|(null)|${dev}2|$flags_file_type |
| GRES_PARSABLE\[gpu\](1):k20|4|0-1|(null)|${dev}3|$flags_file_type |
| GRES_PARSABLE\[gpu\](1):k20|4|0-1|(null)|${dev}4|$flags_file_type |
| " |
| |
| testproc_alias "a39" test_cfg "a39" $slurm_conf_gres $gres_conf $fake_gpus_conf $expected_output |
| |
| # ############################################################################## |
| # # Test a40 - Test GRES types that are the same length |
| # Note: In a real system, the controller would set the node to |
| # drain, since the node reports one less aaa GPU than the |
| # controller is expecting. |
| # ############################################################################## |
| |
| set slurm_conf_gres "gpu:aaa:3,gpu:bbb:2,gpu:ccc:1" |
| set gres_conf "" |
| set fake_gpus_conf " |
| # This file was autogenerated by $test_name |
| test-bbb-test|4|0-1|(null)|${dev}3 |
| test-aaa-test|4|0-1|(null)|${dev}2 |
| test-ccc-test|4|0-1|(null)|${dev}5 |
| test-aaa-test|4|0-1|(null)|${dev}1 |
| test-bbb-test|4|0-1|(null)|${dev}4 |
| test-ccc-test|4|0-1|(null)|${dev}6 |
| " |
| set expected_output " |
| GRES_PARSABLE\[gpu\](1):aaa|4|0-1|(null)|${dev}1|$flags_file_type |
| GRES_PARSABLE\[gpu\](1):aaa|4|0-1|(null)|${dev}2|$flags_file_type |
| GRES_PARSABLE\[gpu\](1):bbb|4|0-1|(null)|${dev}3|$flags_file_type |
| GRES_PARSABLE\[gpu\](1):bbb|4|0-1|(null)|${dev}4|$flags_file_type |
| GRES_PARSABLE\[gpu\](1):ccc|4|0-1|(null)|${dev}5|$flags_file_type |
| " |
| |
| testproc_alias "a40" test_cfg "a40" $slurm_conf_gres $gres_conf $fake_gpus_conf $expected_output |
| |
| |
| # ############################################################################## |
| # # Test a42 - Test multiple identical GRES type specifications |
| # ############################################################################## |
| |
| set slurm_conf_gres "gpu:aaa:1,gpu:aaa:1,gpu:aaa:1" |
| set gres_conf "" |
| set fake_gpus_conf " |
| # This file was autogenerated by $test_name |
| test-aaa-test|4|0-1|(null)|${dev}4 |
| test-aaa-test|4|0-1|(null)|${dev}3 |
| test-aaa-test|4|0-1|(null)|${dev}2 |
| test-aaa-test|4|0-1|(null)|${dev}1 |
| " |
| set expected_output " |
| GRES_PARSABLE\[gpu\](1):aaa|4|0-1|(null)|${dev}1|$flags_file_type |
| GRES_PARSABLE\[gpu\](1):aaa|4|0-1|(null)|${dev}2|$flags_file_type |
| GRES_PARSABLE\[gpu\](1):aaa|4|0-1|(null)|${dev}3|$flags_file_type |
| " |
| |
| testproc_alias "a42" test_cfg "a42" $slurm_conf_gres $gres_conf $fake_gpus_conf $expected_output |
| |
| |
| # ############################################################################## |
| # Test a52 - Test slurm.conf, gres.conf, and detected device interaction |
| # |
| # The node returns a list of only 3 GPUs, not 4 (since gpu:special is not |
| # detected on the system). So the controller should set the node to drain. |
| # ############################################################################## |
| |
| set slurm_conf_gres "gpu:tesla:3,gpu:special:1" |
| set gres_conf "" |
| set fake_gpus_conf " |
| # This file was autogenerated by $test_name |
| tesla|4|0-1|-1|${dev}0 |
| tesla|4|0-1|-1|${dev}1 |
| tesla|4|0-1|-1|${dev}2 |
| " |
| set expected_output " |
| GRES_PARSABLE\[gpu\](1):tesla|4|0-1|-1|${dev}0|$flags_file_type |
| GRES_PARSABLE\[gpu\](1):tesla|4|0-1|-1|${dev}1|$flags_file_type |
| GRES_PARSABLE\[gpu\](1):tesla|4|0-1|-1|${dev}2|$flags_file_type |
| " |
| |
| testproc_alias "a52" test_cfg "a52" $slurm_conf_gres $gres_conf $fake_gpus_conf $expected_output |
| |
| |
| # ############################################################################## |
| # Test a53 - Test slurm.conf, gres.conf, and detected device interaction |
| # |
| # gpu:special is added on via gres.conf. The final GPU list count matches what |
| # is expected in slurm.conf. No errors. |
| # ############################################################################## |
| |
| set slurm_conf_gres "gpu:tesla:3,gpu:special:1" |
| set gres_conf " |
| # This file was autogenerated by $test_name |
| Name=gpu Type=special File=${dev}5 |
| " |
| set fake_gpus_conf " |
| # This file was autogenerated by $test_name |
| tesla|4|0-1|-1|${dev}0 |
| tesla|4|0-1|-1|${dev}1 |
| tesla|4|0-1|-1|${dev}2 |
| " |
| set expected_output " |
| GRES_PARSABLE\[gpu\](1):tesla|4|0-1|-1|${dev}0|$flags_file_type |
| GRES_PARSABLE\[gpu\](1):tesla|4|0-1|-1|${dev}1|$flags_file_type |
| GRES_PARSABLE\[gpu\](1):tesla|4|0-1|-1|${dev}2|$flags_file_type |
| GRES_PARSABLE\[gpu\](1):special|4|(null)|(null)|${dev}5|$flags_default_type |
| " |
| |
| testproc_alias "a53" test_cfg "a53" $slurm_conf_gres $gres_conf $fake_gpus_conf $expected_output |
| |
| # ############################################################################## |
| # Test a54 - Test slurm.conf, gres.conf, and detected device interaction |
| # |
| # The gres conf record for nvidia1 *does* match a system GPU with the same Type |
| # and File. However, Cores is mismatched, so an error is printed in slurmd and |
| # the system device is omitted from the final GPU list. The total GPU count is 3 |
| # instead of 4, so the controller sets the node to drain. |
| # ############################################################################## |
| |
| set slurm_conf_gres "gpu:tesla:3,gpu:special:1" |
| set gres_conf " |
| # This file was autogenerated by $test_name |
| Name=gpu Type=tesla File=${dev}1 Cores=0 |
| Name=gpu Type=special File=${dev}5 |
| " |
| set fake_gpus_conf " |
| # This file was autogenerated by $test_name |
| tesla|4|0-1|-1|${dev}0 |
| tesla|4|0-1|-1|${dev}1 |
| tesla|4|0-1|-1|${dev}2 |
| " |
| set expected_output " |
| GRES_PARSABLE\[gpu\](1):tesla|4|0-1|-1|${dev}0|$flags_file_type |
| GRES_PARSABLE\[gpu\](1):tesla|4|0-1|-1|${dev}2|$flags_file_type |
| GRES_PARSABLE\[gpu\](1):special|4|(null)|(null)|${dev}5|$flags_default_type |
| " |
| |
| testproc_alias "a54" test_cfg "a54" $slurm_conf_gres $gres_conf $fake_gpus_conf $expected_output |
| |
| # ############################################################################## |
| # Test a55 - Test slurm.conf, gres.conf, and detected device interaction |
| # |
| # nvidia[0-2] matches exactly what is found on the system, so no problem there. |
| # tesla + nvidia3 does not match any type and file combo found in the system |
| # GPUs, so this is assumed to be an “extra” GPU. However, it is not added, since |
| # there are already 3 teslas found as outlined in slurm.conf. |
| # The total GPU count is 5 instead of 4, which is fine (i.e. won't set node to |
| # drain). |
| # ############################################################################## |
| |
| set slurm_conf_gres "gpu:tesla:3,gpu:special:1" |
| set gres_conf " |
| # This file was autogenerated by $test_name |
| Name=gpu Type=tesla File=${dev}\[0-2\] Cores=0-1 |
| Name=gpu Type=tesla File=${dev}3 Cores=0 |
| Name=gpu Type=special File=${dev}5 |
| " |
| set fake_gpus_conf " |
| # This file was autogenerated by $test_name |
| tesla|4|0-1|-1|${dev}0 |
| tesla|4|0-1|-1|${dev}1 |
| tesla|4|0-1|-1|${dev}2 |
| " |
| set expected_output " |
| GRES_PARSABLE\[gpu\](1):tesla|4|0-1|-1|${dev}0|$flags_file_type |
| GRES_PARSABLE\[gpu\](1):tesla|4|0-1|-1|${dev}1|$flags_file_type |
| GRES_PARSABLE\[gpu\](1):tesla|4|0-1|-1|${dev}2|$flags_file_type |
| GRES_PARSABLE\[gpu\](1):special|4|(null)|(null)|${dev}5|$flags_default_type |
| " |
| set err_msgs $conf_mismatch_err |
| set expected_errs 1 |
| |
| testproc_alias "a55" test_cfg "a55" $slurm_conf_gres $gres_conf $fake_gpus_conf $expected_output $err_msgs $expected_errs |
| |
| # ############################################################################## |
| # Test a56 - Test slurm.conf, gres.conf, and detected device interaction |
| # |
| # nvidia[0-2] are found on the system, but nvidia3 is not. However, nvidia3 is |
| # assumed to be an extra GPU (like gpu:special), so it’s ok. |
| # The total GPU count is 5, so there are no errors or warnings. |
| # ############################################################################## |
| |
| set slurm_conf_gres "gpu:tesla:4,gpu:special:1" |
| set gres_conf " |
| # This file was autogenerated by $test_name |
| Name=gpu Type=tesla File=${dev}\[0-3\] Cores=0-1 |
| Name=gpu Type=special File=${dev}5 |
| " |
| set fake_gpus_conf " |
| # This file was autogenerated by $test_name |
| tesla|4|0-1|-1|${dev}0 |
| tesla|4|0-1|-1|${dev}1 |
| tesla|4|0-1|-1|${dev}2 |
| " |
| set expected_output " |
| GRES_PARSABLE\[gpu\](1):tesla|4|0-1|-1|${dev}0|$flags_file_type |
| GRES_PARSABLE\[gpu\](1):tesla|4|0-1|-1|${dev}1|$flags_file_type |
| GRES_PARSABLE\[gpu\](1):tesla|4|0-1|-1|${dev}2|$flags_file_type |
| GRES_PARSABLE\[gpu\](1):tesla|4|0-1|(null)|${dev}3|$flags_default_type |
| GRES_PARSABLE\[gpu\](1):special|4|(null)|(null)|${dev}5|$flags_default_type |
| " |
| |
| testproc_alias "a56" test_cfg "a56" $slurm_conf_gres $gres_conf $fake_gpus_conf $expected_output |
| |
| # ############################################################################## |
| # # Test a57 - Test that an empty slurm.conf yields no devices |
| # ############################################################################## |
| |
| set slurm_conf_gres "" |
| set gres_conf " |
| # This file was autogenerated by $test_name |
| Name=gpu Type=tesla File=${dev}\[0-3\] Cores=0-1 |
| Name=gpu Type=special File=${dev}5 |
| " |
| set fake_gpus_conf " |
| # This file was autogenerated by $test_name |
| tesla|4|0-1|-1|${dev}0 |
| tesla|4|0-1|-1|${dev}1 |
| tesla|4|0-1|-1|${dev}2 |
| " |
| set expected_output "" |
| |
| set err_msgs $conf_mismatch_err |
| set expected_errs 2 |
| |
| testproc_alias "a57" test_cfg "a57" $slurm_conf_gres $gres_conf $fake_gpus_conf $expected_output $err_msgs $expected_errs |
| |
| # ############################################################################## |
| # Test a58 - Test that a non-GPU GRES doesn't need an explicit entry in |
| # gres.conf. Also test that GPUs are rejected unless they have a File |
| # specification in gres.conf when AutoDetect is turned off. |
| # ############################################################################## |
| |
| set slurm_conf_gres "gpu:tesla:3,gpu:special:1,tmpdisk:100" |
| # Nothing in gres.conf |
| set gres_conf "" |
| # Effectively "turn off" AutoDetect |
| set fake_gpus_conf "" |
| set expected_output " |
| GRES_PARSABLE\[tmpdisk\](100):(null)|4|(null)|(null)|(null)|CountOnly |
| " |
| testproc_alias "a58" test_cfg "a58" $slurm_conf_gres $gres_conf $fake_gpus_conf $expected_output |
| |
| # ############################################################################## |
| # Test a59 - Test over-specified gres.conf compared to slurm.conf |
| # |
| # This tests that a larger gres.conf line is truncated down to match what |
| # slurm.conf specifies. This also tests that user errors are printed whenever |
| # more GRES are found in gres.conf than specified in slurm.conf. |
| # ############################################################################## |
| |
| set slurm_conf_gres "gpu:tesla:2,gpu:gtx:1" |
| set gres_conf " |
| # This file was autogenerated by $test_name |
| Name=gpu Type=tesla File=${dev}\[0-4\] |
| Name=gpu Type=tesla File=${dev}5 |
| Name=gpu Type=special File=${dev}6 |
| Name=gpu Type=gtx File=${dev}7 |
| Name=nic Count=100 |
| Name=tmpdisk Count=10G |
| " |
| set fake_gpus_conf " |
| # This file was autogenerated by $test_name |
| tesla|4|0-1|-1|${dev}0 |
| tesla|4|0-1|-1|${dev}1 |
| tesla|4|0-1|-1|${dev}2 |
| " |
| set expected_output " |
| GRES_PARSABLE\[gpu\](1):tesla|4|0-1|-1|${dev}0|$flags_file_type |
| GRES_PARSABLE\[gpu\](1):tesla|4|0-1|-1|${dev}1|$flags_file_type |
| GRES_PARSABLE\[gpu\](1):gtx|4|(null)|(null)|${dev}7|$flags_default_type |
| " |
| |
| set err_msgs $conf_mismatch_err |
| set expected_errs 5 |
| |
| testproc_alias "a59" test_cfg "a59" $slurm_conf_gres $gres_conf $fake_gpus_conf $expected_output $err_msgs $expected_errs |
| |
| |
| # ############################################################################## |
| # Test a60 - Test unique_id parsing (for MIGs) |
| # ############################################################################## |
| |
| set slurm_conf_gres "gpu:tesla:4" |
| set gres_conf "" |
| set fake_gpus_conf " |
| # This file was autogenerated by $test_name |
| tesla|4|0-1|(null)|${dev}1|MIG-GPU-aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee/7/0 |
| tesla|4|0-1|(null)|${dev}2|MIG-GPU-aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee/10/0 |
| tesla|4|2-3|(null)|${dev}3|MIG-GPU-aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee/8/0 |
| tesla|4|2-3|(null)|${dev}4|MIG-GPU-aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee/9/0 |
| " |
| set expected_output " |
| GRES_PARSABLE\[gpu\](1):tesla|4|0-1|(null)|${dev}1|MIG-GPU-aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee/7/0|$flags_file_type |
| GRES_PARSABLE\[gpu\](1):tesla|4|0-1|(null)|${dev}2|MIG-GPU-aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee/10/0|$flags_file_type |
| GRES_PARSABLE\[gpu\](1):tesla|4|2-3|(null)|${dev}3|MIG-GPU-aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee/8/0|$flags_file_type |
| GRES_PARSABLE\[gpu\](1):tesla|4|2-3|(null)|${dev}4|MIG-GPU-aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee/9/0|$flags_file_type |
| " |
| testproc_alias "a60" test_cfg "a60" $slurm_conf_gres $gres_conf $fake_gpus_conf $expected_output |
| |
| |
| # ############################################################################## |
| # ------------------------------------------------------------------------------ |
| # ------------------------------------------------------------------------------ |
| # ############################################################################## |
| |
| |
| |
| # ############################################################################## |
| # # Test b2 - Test that all MPS is distributed across multiple GPU types |
| # ############################################################################## |
| |
| set slurm_conf_gres "gpu:tesla:1,gpu:1080:1,gpu:gtx560:1,mps:300" |
| set gres_conf "" |
| set fake_gpus_conf " |
| # This file was autogenerated by $test_name |
| tesla|4|0-1|(null)|${dev}1 |
| 1080ti|4|0-1|(null)|${dev}2 |
| gtx560|4|0-1|(null)|${dev}0 |
| " |
| set expected_output " |
| GRES_PARSABLE\[mps\](100):gtx560|4|0-1|(null)|${dev}0|$flags_file_type_shared |
| GRES_PARSABLE\[gpu\](1):gtx560|4|0-1|(null)|${dev}0|$flags_file_type |
| GRES_PARSABLE\[mps\](100):tesla|4|0-1|(null)|${dev}1|$flags_file_type_shared |
| GRES_PARSABLE\[gpu\](1):tesla|4|0-1|(null)|${dev}1|$flags_file_type |
| GRES_PARSABLE\[mps\](100):1080|4|0-1|(null)|${dev}2|$flags_file_type_shared |
| GRES_PARSABLE\[gpu\](1):1080|4|0-1|(null)|${dev}2|$flags_file_type |
| " |
| |
| testproc_alias "b2" test_cfg "b2" $slurm_conf_gres $gres_conf $fake_gpus_conf $expected_output |
| |
| |
| # ############################################################################## |
| # # Test b4 - Test that errors are emitted and configured devices omitted when |
| # Cores or Links mismatch with the corresponding system devices |
| # ############################################################################## |
| |
| set slurm_conf_gres "gpu:ti:3,gpu:gtx:3" |
| set gres_conf " |
| # This file was autogenerated by $test_name |
| Name=gpu Type=ti File=${dev}0 COREs=0 |
| Name=gpu Type=ti File=${dev}\[1-2\] COREs=0-1 |
| Name=gpu Type=gtx File=${dev}\[3-5\] COREs=0-1 Links=-1,0,0,0,0,0 |
| " |
| |
| set fake_gpus_conf " |
| # This file was autogenerated by $test_name |
| 1080ti|4|0-1|(null)|${dev}0 |
| 1080ti|4|0-1|(null)|${dev}1 |
| 1080ti|4|0-1|(null)|${dev}2 |
| gtx560|4|0-1|0,0,0,-1,0,0|${dev}3 |
| gtx560|4|0-1|0,0,0,0,-1,0|${dev}4 |
| gtx560|4|0-1|-1,0,0,0,0,0|${dev}5 |
| " |
| |
| set expected_output " |
| GRES_PARSABLE\[gpu\](1):gtx|4|0-1|-1,0,0,0,0,0|${dev}5|$flags_file_type |
| GRES_PARSABLE\[gpu\](1):ti|4|0-1|(null)|${dev}1|$flags_file_type |
| GRES_PARSABLE\[gpu\](1):ti|4|0-1|(null)|${dev}2|$flags_file_type |
| " |
| |
| set err_msgs $mismatch_err |
| set expected_errs 3 |
| testproc_alias "b4" test_cfg "b4" $slurm_conf_gres $gres_conf $fake_gpus_conf $expected_output $err_msgs $expected_errs |
| |
| |
| # ############################################################################## |
| # # Test b5 - Test that GPUs are sorted according to links, not device file |
| # ############################################################################## |
| |
| set slurm_conf_gres "gpu:a100:8" |
| set gres_conf " |
| # This file was autogenerated by $test_name |
| Name=gpu Type=a100 File=${dev}0 COREs=0-1 Links=0,0,0,-1,0,0,0,0 |
| Name=gpu Type=a100 File=${dev}1 COREs=0-1 Links=0,0,-1,0,0,0,0,0 |
| Name=gpu Type=a100 File=${dev}2 COREs=0-1 Links=0,-1,0,0,0,0,0,0 |
| Name=gpu Type=a100 File=${dev}3 COREs=0-1 Links=-1,0,0,0,0,0,0,0 |
| Name=gpu Type=a100 File=${dev}4 COREs=2-3 Links=0,0,0,0,0,0,0,-1 |
| Name=gpu Type=a100 File=${dev}5 COREs=2-3 Links=0,0,0,0,0,0,-1,0 |
| Name=gpu Type=a100 File=${dev}6 COREs=2-3 Links=0,0,0,0,0,-1,0,0 |
| Name=gpu Type=a100 File=${dev}7 COREs=2-3 Links=0,0,0,0,-1,0,0,0 |
| " |
| |
| set fake_gpus_conf " |
| # This file was autogenerated by $test_name |
| a100|4|0-1|-1,0,0,0,0,0,0,0|${dev}3 |
| a100|4|0-1|0,-1,0,0,0,0,0,0|${dev}2 |
| a100|4|0-1|0,0,-1,0,0,0,0,0|${dev}1 |
| a100|4|0-1|0,0,0,-1,0,0,0,0|${dev}0 |
| a100|4|2-3|0,0,0,0,-1,0,0,0|${dev}7 |
| a100|4|2-3|0,0,0,0,0,-1,0,0|${dev}6 |
| a100|4|2-3|0,0,0,0,0,0,-1,0|${dev}5 |
| a100|4|2-3|0,0,0,0,0,0,0,-1|${dev}4 |
| " |
| |
| set expected_output " |
| GRES_PARSABLE\[gpu\](1):a100|4|0-1|-1,0,0,0,0,0,0,0|${dev}3|$flags_file_type |
| GRES_PARSABLE\[gpu\](1):a100|4|0-1|0,-1,0,0,0,0,0,0|${dev}2|$flags_file_type |
| GRES_PARSABLE\[gpu\](1):a100|4|0-1|0,0,-1,0,0,0,0,0|${dev}1|$flags_file_type |
| GRES_PARSABLE\[gpu\](1):a100|4|0-1|0,0,0,-1,0,0,0,0|${dev}0|$flags_file_type |
| GRES_PARSABLE\[gpu\](1):a100|4|2-3|0,0,0,0,-1,0,0,0|${dev}7|$flags_file_type |
| GRES_PARSABLE\[gpu\](1):a100|4|2-3|0,0,0,0,0,-1,0,0|${dev}6|$flags_file_type |
| GRES_PARSABLE\[gpu\](1):a100|4|2-3|0,0,0,0,0,0,-1,0|${dev}5|$flags_file_type |
| GRES_PARSABLE\[gpu\](1):a100|4|2-3|0,0,0,0,0,0,0,-1|${dev}4|$flags_file_type |
| " |
| |
| testproc_alias "b5" test_cfg "b5" $slurm_conf_gres $gres_conf $fake_gpus_conf $expected_output |
| |
| |
| # ############################################################################## |
| # # Test b6 - Test that "extra" GPUs are still used when not found on system |
| # ############################################################################## |
| |
| set slurm_conf_gres "gpu:1080:3" |
| set gres_conf " |
| # This file was autogenerated by $test_name |
| Name=gpu Type=1080 File=${dev}\[0-2\] COREs=0-1 |
| " |
| |
| set fake_gpus_conf "" |
| set expected_output " |
| GRES_PARSABLE\[gpu\](1):1080|4|0-1|(null)|${dev}0|$flags_default_type |
| GRES_PARSABLE\[gpu\](1):1080|4|0-1|(null)|${dev}1|$flags_default_type |
| GRES_PARSABLE\[gpu\](1):1080|4|0-1|(null)|${dev}2|$flags_default_type |
| " |
| |
| testproc_alias "b6" test_cfg "b6" $slurm_conf_gres $gres_conf $fake_gpus_conf $expected_output |
| |
| |
| |
| # ############################################################################## |
| # # Test b7 - Test that GPUs are sorted according links, if specified, and |
| # device file, if not specified. Test also that null links are |
| # sorted after non-null links. |
| # ############################################################################## |
| |
| set slurm_conf_gres "gpu:a100:8" |
| set gres_conf " |
| # This file was autogenerated by $test_name |
| Name=gpu Type=a100 File=${dev}0 COREs=0-1 Links=0,0,0,-1 |
| Name=gpu Type=a100 File=${dev}1 COREs=0-1 Links=0,0,-1,0 |
| Name=gpu Type=a100 File=${dev}2 COREs=0-1 Links=0,-1,0,0 |
| Name=gpu Type=a100 File=${dev}3 COREs=0-1 Links=-1,0,0,0 |
| Name=gpu Type=a100 File=${dev}4 COREs=2-3 |
| Name=gpu Type=a100 File=${dev}5 COREs=2-3 |
| Name=gpu Type=a100 File=${dev}6 COREs=2-3 |
| Name=gpu Type=a100 File=${dev}7 COREs=2-3 |
| " |
| |
| set fake_gpus_conf " |
| # This file was autogenerated by $test_name |
| a100|4|0-1|-1,0,0,0|${dev}3 |
| a100|4|0-1|0,-1,0,0|${dev}2 |
| a100|4|0-1|0,0,-1,0|${dev}1 |
| a100|4|0-1|0,0,0,-1|${dev}0 |
| a100|4|2-3|(null)|${dev}7 |
| a100|4|2-3|(null)|${dev}6 |
| a100|4|2-3|(null)|${dev}5 |
| a100|4|2-3|(null)|${dev}4 |
| " |
| |
| set expected_output " |
| GRES_PARSABLE\[gpu\](1):a100|4|0-1|-1,0,0,0|${dev}3|$flags_file_type |
| GRES_PARSABLE\[gpu\](1):a100|4|0-1|0,-1,0,0|${dev}2|$flags_file_type |
| GRES_PARSABLE\[gpu\](1):a100|4|0-1|0,0,-1,0|${dev}1|$flags_file_type |
| GRES_PARSABLE\[gpu\](1):a100|4|0-1|0,0,0,-1|${dev}0|$flags_file_type |
| GRES_PARSABLE\[gpu\](1):a100|4|2-3|(null)|${dev}4|$flags_file_type |
| GRES_PARSABLE\[gpu\](1):a100|4|2-3|(null)|${dev}5|$flags_file_type |
| GRES_PARSABLE\[gpu\](1):a100|4|2-3|(null)|${dev}6|$flags_file_type |
| GRES_PARSABLE\[gpu\](1):a100|4|2-3|(null)|${dev}7|$flags_file_type |
| " |
| |
| testproc_alias "b7" test_cfg "b7" $slurm_conf_gres $gres_conf $fake_gpus_conf $expected_output |
| |
| |
| |
| # ############################################################################## |
| # # Test b8 - Test that separate "extra" GPUs in gres.conf with different Cores |
| # and Links are properly accounted under the typeless slurm.conf |
| # gres specification. |
| # ############################################################################## |
| |
| set slurm_conf_gres "gpu:5" |
| set gres_conf " |
| # This file was autogenerated by $test_name |
| Name=gpu File=${dev}\[0-1\] Cores=0,1 |
| Name=gpu File=${dev}\[2-3\] Cores=0,1 Links=-1 |
| Name=gpu File=${dev}4 Cores=0 |
| " |
| |
| set fake_gpus_conf "" |
| set expected_output " |
| GRES_PARSABLE\[gpu\](1):(null)|4|0,1|-1|${dev}2|$flags_default |
| GRES_PARSABLE\[gpu\](1):(null)|4|0,1|-1|${dev}3|$flags_default |
| GRES_PARSABLE\[gpu\](1):(null)|4|0,1|(null)|${dev}0|$flags_default |
| GRES_PARSABLE\[gpu\](1):(null)|4|0,1|(null)|${dev}1|$flags_default |
| GRES_PARSABLE\[gpu\](1):(null)|4|0|(null)|${dev}4|$flags_default |
| " |
| |
| testproc_alias "b8" test_cfg "b8" $slurm_conf_gres $gres_conf $fake_gpus_conf $expected_output |
| |
| # ############################################################################## |
| # # Test b10 - Test that slurm.conf doesn't allow a mix of Type and no Type |
| # ############################################################################## |
| |
| set slurm_conf_gres "gpu:tesla:1,gpu:1" |
| set gres_conf "" |
| set fake_gpus_conf "" |
| set expected_output "" |
| set err_msgs $slurm_conf_type_err |
| set expected_errs 1 |
| |
| testproc_alias "b10" test_cfg "b10" $slurm_conf_gres $gres_conf $fake_gpus_conf $expected_output $err_msgs $expected_errs |
| |
| # ############################################################################## |
| # # Test b12 - Test that gres.conf doesn't allow a mix of Type and no Type |
| # ############################################################################## |
| |
| set slurm_conf_gres "gpu:tesla:2,gpu:v100:2" |
| set gres_conf " |
| # This file was autogenerated by $test_name |
| Name=gpu File=${dev}\[0-1\] |
| Name=gpu Type=v100 File=${dev}\[2-3\] |
| " |
| set fake_gpus_conf "" |
| set expected_output "" |
| set err_msgs $gres_conf_type_err |
| set expected_errs 1 |
| |
| testproc_alias "b12" test_cfg "b12" $slurm_conf_gres $gres_conf $fake_gpus_conf $expected_output $err_msgs $expected_errs |
| |
| # ############################################################################## |
| # # Test b14 - Test that gres.conf doesn't allow a mix of File and no File |
| # ############################################################################## |
| |
| set slurm_conf_gres "gpu:tesla:2,gpu:v100:2" |
| set gres_conf " |
| # This file was autogenerated by $test_name |
| Name=gpu Type=tesla File=${dev}\[0-1\] |
| Name=gpu Type=v100 |
| " |
| set fake_gpus_conf "" |
| set expected_output "" |
| set err_msgs $gres_conf_file_err |
| set expected_errs 1 |
| |
| testproc_alias "b14" test_cfg "b14" $slurm_conf_gres $gres_conf $fake_gpus_conf $expected_output $err_msgs $expected_errs |
| |
| |
| # ############################################################################## |
| # ------------------------------------------------------------------------------ |
| # Test gres.conf parsing |
| # ------------------------------------------------------------------------------ |
| # ############################################################################## |
| |
| |
| # ############################################################################## |
| # # Test c2 - Test gres/gpu plus gres/mps with count |
| # # NOTE the device numbers being out of order |
| # ############################################################################## |
| |
| set slurm_conf_gres "gpu:tesla:1,gpu:gtx560:1,mps:200" |
| set gres_conf " |
| # This file was autogenerated by $test_name |
| Name=gpu Type=gtx560 File=${dev}0 COREs=0,1 |
| Name=gpu Type=tesla File=${dev}1 COREs=2,3 |
| Name=mps Count=100 File=${dev}1 |
| Name=mps Count=100 File=${dev}0 |
| " |
| set fake_gpus_conf "" |
| set expected_output " |
| GRES_PARSABLE\[mps\](100):gtx560|4|0,1|(null)|${dev}0|$flags_file_type_shared |
| GRES_PARSABLE\[gpu\](1):gtx560|4|0,1|(null)|${dev}0|$flags_default_type |
| GRES_PARSABLE\[mps\](100):tesla|4|2,3|(null)|${dev}1|$flags_file_type_shared |
| GRES_PARSABLE\[gpu\](1):tesla|4|2,3|(null)|${dev}1|$flags_default_type |
| " |
| testproc_alias "c2" test_cfg "c2" $slurm_conf_gres $gres_conf $fake_gpus_conf $expected_output |
| |
| # ############################################################################## |
| # # Test c4 - Test gres/mps with count and no file names |
| # # NOTE the device numbers being out of order |
| # ############################################################################## |
| |
| set slurm_conf_gres "gpu:tesla:1,gpu:gtx560:1,mps:210" |
| set gres_conf " |
| # This file was autogenerated by $test_name |
| Name=gpu Type=tesla File=${dev}1 COREs=2,3 |
| Name=gpu Type=gtx560 File=${dev}0 COREs=0,1 |
| Name=mps Count=210 |
| " |
| set fake_gpus_conf "" |
| set expected_output " |
| GRES_PARSABLE\[mps\](105):gtx560|4|0,1|(null)|${dev}0|$flags_default_type_shared |
| GRES_PARSABLE\[gpu\](1):gtx560|4|0,1|(null)|${dev}0|$flags_default_type |
| GRES_PARSABLE\[mps\](105):tesla|4|2,3|(null)|${dev}1|$flags_default_type_shared |
| GRES_PARSABLE\[gpu\](1):tesla|4|2,3|(null)|${dev}1|$flags_default_type |
| " |
| testproc_alias "c4" test_cfg "c4" $slurm_conf_gres $gres_conf $fake_gpus_conf $expected_output |
| |
| # ############################################################################## |
| # # Test c6 - Test using only a subset of the system devices found |
| # ############################################################################## |
| |
| set slurm_conf_gres "gpu:tesla:3" |
| set gres_conf " |
| # This file was autogenerated by $test_name |
| Name=gpu Type=tesla File=${dev}\[3-4\] Cores=2-3 |
| Name=gpu Type=tesla File=${dev}1 Cores=0-1 |
| " |
| set fake_gpus_conf " |
| # This file was autogenerated by $test_name |
| tesla|4|0-1|(null)|${dev}1 |
| tesla|4|0-1|(null)|${dev}2 |
| tesla|4|2-3|(null)|${dev}3 |
| tesla|4|2-3|(null)|${dev}4 |
| " |
| set expected_output " |
| GRES_PARSABLE\[gpu\](1):tesla|4|0-1|(null)|${dev}1|$flags_file_type |
| GRES_PARSABLE\[gpu\](1):tesla|4|2-3|(null)|${dev}3|$flags_file_type |
| GRES_PARSABLE\[gpu\](1):tesla|4|2-3|(null)|${dev}4|$flags_file_type |
| " |
| testproc_alias "c6" test_cfg "c6" $slurm_conf_gres $gres_conf $fake_gpus_conf $expected_output |
| |
| # ############################################################################## |
| # # Test c8 - Test disjoint sets of conf and system devices |
| # ############################################################################## |
| |
| set slurm_conf_gres "gpu:tesla:6" |
| set gres_conf " |
| # This file was autogenerated by $test_name |
| Name=gpu Type=tesla File=${dev}1 Cores=2-3 |
| Name=gpu Type=tesla File=${dev}2 Cores=2-3 |
| Name=gpu Type=tesla File=${dev}3 Cores=2-3 |
| Name=gpu Type=tesla File=${dev}4 Cores=2-3 |
| " |
| set fake_gpus_conf " |
| # This file was autogenerated by $test_name |
| tesla|4|0-1|(null)|${dev}5 |
| tesla|4|0-1|(null)|${dev}6 |
| " |
| set expected_output " |
| GRES_PARSABLE\[gpu\](1):tesla|4|2-3|(null)|${dev}1|$flags_default_type |
| GRES_PARSABLE\[gpu\](1):tesla|4|2-3|(null)|${dev}2|$flags_default_type |
| GRES_PARSABLE\[gpu\](1):tesla|4|2-3|(null)|${dev}3|$flags_default_type |
| GRES_PARSABLE\[gpu\](1):tesla|4|2-3|(null)|${dev}4|$flags_default_type |
| GRES_PARSABLE\[gpu\](1):tesla|4|0-1|(null)|${dev}5|$flags_file_type |
| GRES_PARSABLE\[gpu\](1):tesla|4|0-1|(null)|${dev}6|$flags_file_type |
| " |
| testproc_alias "c8" test_cfg "c8" $slurm_conf_gres $gres_conf $fake_gpus_conf $expected_output |
| |
| # ############################################################################## |
| # # Test c10 - Different links, different records |
| # ############################################################################## |
| |
| # Devices 0-2 are all doubly linked to each other |
| # Device 5 is singly linked to 3-4 |
| # Devices 7-8 are doubly linked to each other |
| set slurm_conf_gres "gpu:tesla:8" |
| set gres_conf " |
| # This file was autogenerated by $test_name |
| Name=gpu Type=tesla File=${dev}0 Cores=0-3 Links=-1,2,2,0,0,0,0,0 |
| Name=gpu Type=tesla File=${dev}1 Cores=0-3 Links=2,-1,2,0,0,0,0,0 |
| Name=gpu Type=tesla File=${dev}2 Cores=0-3 Links=2,2,-1,0,0,0,0,0 |
| Name=gpu Type=tesla File=${dev}3 Cores=0-3 Links=0,0,0,-1,0,1,0,0 |
| Name=gpu Type=tesla File=${dev}4 Cores=0-3 Links=0,0,0,0,-1,1,0,0 |
| Name=gpu Type=tesla File=${dev}5 Cores=0-3 Links=0,0,0,1,1,-1,0,0 |
| Name=gpu Type=tesla File=${dev}6 Cores=0-3 Links=0,0,0,0,0,0,-1,2 |
| Name=gpu Type=tesla File=${dev}7 Cores=0-3 Links=0,0,0,0,0,0,2,-1 |
| " |
| set fake_gpus_conf "" |
| set expected_output " |
| GRES_PARSABLE\[gpu\](1):tesla|4|0-3|-1,2,2,0,0,0,0,0|${dev}0|$flags_default_type |
| GRES_PARSABLE\[gpu\](1):tesla|4|0-3|2,-1,2,0,0,0,0,0|${dev}1|$flags_default_type |
| GRES_PARSABLE\[gpu\](1):tesla|4|0-3|2,2,-1,0,0,0,0,0|${dev}2|$flags_default_type |
| GRES_PARSABLE\[gpu\](1):tesla|4|0-3|0,0,0,-1,0,1,0,0|${dev}3|$flags_default_type |
| GRES_PARSABLE\[gpu\](1):tesla|4|0-3|0,0,0,0,-1,1,0,0|${dev}4|$flags_default_type |
| GRES_PARSABLE\[gpu\](1):tesla|4|0-3|0,0,0,1,1,-1,0,0|${dev}5|$flags_default_type |
| GRES_PARSABLE\[gpu\](1):tesla|4|0-3|0,0,0,0,0,0,-1,2|${dev}6|$flags_default_type |
| GRES_PARSABLE\[gpu\](1):tesla|4|0-3|0,0,0,0,0,0,2,-1|${dev}7|$flags_default_type |
| " |
| testproc_alias "c10" test_cfg "c10" $slurm_conf_gres $gres_conf $fake_gpus_conf $expected_output |
| |
| # ############################################################################## |
| # # Test c12 - Empty everything |
| # ############################################################################## |
| |
| set slurm_conf_gres "" |
| set gres_conf "" |
| set fake_gpus_conf "" |
| set expected_output "" |
| testproc_alias "c12" test_cfg "c12" $slurm_conf_gres $gres_conf $fake_gpus_conf $expected_output |
| |
| # ############################################################################## |
| # # Test c14 - Empty system devices |
| # ############################################################################## |
| |
| set slurm_conf_gres "gpu:tesla:4" |
| set gres_conf " |
| # This file was autogenerated by $test_name |
| Name=gpu Type=tesla File=${dev}\[1-4\] Cores=2-3 |
| " |
| set fake_gpus_conf "" |
| set expected_output " |
| GRES_PARSABLE\[gpu\](1):tesla|4|2-3|(null)|${dev}1|$flags_default_type |
| GRES_PARSABLE\[gpu\](1):tesla|4|2-3|(null)|${dev}2|$flags_default_type |
| GRES_PARSABLE\[gpu\](1):tesla|4|2-3|(null)|${dev}3|$flags_default_type |
| GRES_PARSABLE\[gpu\](1):tesla|4|2-3|(null)|${dev}4|$flags_default_type |
| " |
| testproc_alias "c14" test_cfg "c14" $slurm_conf_gres $gres_conf $fake_gpus_conf $expected_output |
| |
| |
| # ############################################################################## |
| # # Test c16 - Test non-GPU GRESs with types |
| # ############################################################################## |
| |
| set slurm_conf_gres "gpu:tesla:1,tmpdisk:disky:1,nic:nikki:1,mic:mickey:1" |
| set gres_conf " |
| # This file was autogenerated by $test_name |
| Name=gpu Type=tesla File=${dev}1 Cores=0-3 |
| Name=tmpdisk Type=disky File=${dev}2 Cores=0-3 |
| Name=nic Type=nikki File=${dev}3 Cores=0-3 |
| Name=mic Type=mickey File=${dev}4 Cores=0-3 |
| " |
| set fake_gpus_conf "" |
| set expected_output " |
| GRES_PARSABLE\[nic\](1):nikki|4|0-3|(null)|${dev}3|$flags_file_type |
| GRES_PARSABLE\[mic\](1):mickey|4|0-3|(null)|${dev}4|$flags_file_type |
| GRES_PARSABLE\[tmpdisk\](1):disky|4|0-3|(null)|${dev}2|$flags_file_type |
| GRES_PARSABLE\[gpu\](1):tesla|4|0-3|(null)|${dev}1|$flags_default_type |
| " |
| testproc_alias "c16" test_cfg "c16" $slurm_conf_gres $gres_conf $fake_gpus_conf $expected_output |
| |
| # ############################################################################## |
| # # Test c17 - Test non-GPU GRESs without types |
| # ############################################################################## |
| |
| set slurm_conf_gres "gpu:1,tmpdisk:1,nic:1,mic:1" |
| set gres_conf " |
| # This file was autogenerated by $test_name |
| Name=gpu File=${dev}1 Cores=0-3 |
| Name=tmpdisk File=${dev}2 Cores=0-3 |
| Name=nic File=${dev}3 Cores=0-3 |
| Name=mic File=${dev}4 Cores=0-3 |
| " |
| set fake_gpus_conf "" |
| set expected_output " |
| GRES_PARSABLE\[nic\](1):(null)|4|0-3|(null)|${dev}3|HAS_FILE |
| GRES_PARSABLE\[mic\](1):(null)|4|0-3|(null)|${dev}4|HAS_FILE |
| GRES_PARSABLE\[tmpdisk\](1):(null)|4|0-3|(null)|${dev}2|HAS_FILE |
| GRES_PARSABLE\[gpu\](1):(null)|4|0-3|(null)|${dev}1|$flags_default |
| " |
| testproc_alias "c17" test_cfg "c17" $slurm_conf_gres $gres_conf $fake_gpus_conf $expected_output |
| |
| # ############################################################################## |
| # # Test c18 - Test NodeName when Name isn't specified |
| # ############################################################################## |
| |
| set slurm_conf_gres "gpu:tesla:2" |
| set gres_conf " |
| # This file was autogenerated by $test_name |
| NodeName=$nodename Name=gpu Type=tesla File=${dev}1 Cores=0-3 |
| NodeName=$nodename Type=tesla File=${dev}2 Cores=0-3 |
| " |
| set fake_gpus_conf "" |
| set expected_output " |
| GRES_PARSABLE\[gpu\](1):tesla|4|0-3|(null)|${dev}1|$flags_default_type |
| " |
| |
| set err_msgs $no_name_err |
| set expected_errs 1 |
| testproc_alias "c18" test_cfg "c18" $slurm_conf_gres $gres_conf $fake_gpus_conf $expected_output $err_msgs $expected_errs |
| |
| # ############################################################################## |
| # # Test c20 - Test that empty Name does not parse and produces error |
| # ############################################################################## |
| |
| set slurm_conf_gres "" |
| set gres_conf " |
| # This file was autogenerated by $test_name |
| Type=tesla File=${dev}1 Cores=0-3 |
| " |
| set fake_gpus_conf "" |
| set expected_output "" |
| set err_msgs $key_parse_err |
| set expected_errs 1 |
| testproc_alias "c20" test_cfg "c20" $slurm_conf_gres $gres_conf $fake_gpus_conf $expected_output $err_msgs $expected_errs |
| |
| # ############################################################################## |
| # # Test c22 - Ensure no malloc error for large count with non-GPU GRES |
| # See bug 6014 |
| # ############################################################################## |
| |
| set slurm_conf_gres "tmpdisk:10g" |
| set gres_conf "Name=tmpdisk count=10g" |
| set fake_gpus_conf "" |
| set expected_output " |
| GRES_PARSABLE\[tmpdisk\](10737418240):(null)|4|(null)|(null)|(null)| |
| " |
| testproc_alias "c22" test_cfg "c22" $slurm_conf_gres $gres_conf $fake_gpus_conf $expected_output |
| |
| # ############################################################################## |
| # # Test c23 - Ensure no errors for large count with non-GPU GRES *and* with |
| # Type and Cores specified in gres.conf. |
| # ############################################################################## |
| |
| set slurm_conf_gres "tmpdisk:tempy:10g" |
| set gres_conf "Name=tmpdisk Type=tempy Count=10g Cores=0-1" |
| set fake_gpus_conf "" |
| set expected_output " |
| GRES_PARSABLE\[tmpdisk\](10737418240):tempy|4|0-1|(null)|(null)|HAS_TYPE |
| " |
| testproc_alias "c23" test_cfg "c23" $slurm_conf_gres $gres_conf $fake_gpus_conf $expected_output |
| |
| # ############################################################################## |
| # # Test c24 - Different types, different records |
| # ############################################################################## |
| |
| set slurm_conf_gres "gpu:a:2,gpu:b:2" |
| set gres_conf " |
| # This file was autogenerated by $test_name |
| Name=gpu Type=a File=${dev}1 Cores=0-3 |
| Name=gpu Type=a File=${dev}2 Cores=0-3 |
| Name=gpu Type=b File=${dev}5 Cores=0-3 |
| Name=gpu Type=b File=${dev}6 Cores=0-3 |
| " |
| set fake_gpus_conf "" |
| set expected_output " |
| GRES_PARSABLE\[gpu\](1):a|4|0-3|(null)|${dev}1|$flags_default_type |
| GRES_PARSABLE\[gpu\](1):a|4|0-3|(null)|${dev}2|$flags_default_type |
| GRES_PARSABLE\[gpu\](1):b|4|0-3|(null)|${dev}5|$flags_default_type |
| GRES_PARSABLE\[gpu\](1):b|4|0-3|(null)|${dev}6|$flags_default_type |
| " |
| testproc_alias "c24" test_cfg "c24" $slurm_conf_gres $gres_conf $fake_gpus_conf $expected_output |
| |
| # ############################################################################## |
| # # Test c28 - Test alternating device/cpu ranges |
| # ############################################################################## |
| |
| set slurm_conf_gres "gpu:tesla_a:4,gpu:tesla_b:4" |
| set gres_conf " |
| # This file was autogenerated by $test_name |
| Name=gpu Type=tesla_a File=${dev}\[1-2\] Cores=0-3 |
| Name=gpu Type=tesla_b File=${dev}\[3-4\] Cores=0-3 |
| Name=gpu Type=tesla_a File=${dev}5 Cores=0-3 |
| Name=gpu Type=tesla_b File=${dev}6 Cores=0-3 |
| Name=gpu Type=tesla_a File=${dev}7 Cores=0-3 |
| Name=gpu Type=tesla_b File=${dev}8 Cores=0-3 |
| " |
| set fake_gpus_conf "" |
| set expected_output " |
| GRES_PARSABLE\[gpu\](1):tesla_a|4|0-3|(null)|${dev}1|$flags_default_type |
| GRES_PARSABLE\[gpu\](1):tesla_a|4|0-3|(null)|${dev}2|$flags_default_type |
| GRES_PARSABLE\[gpu\](1):tesla_b|4|0-3|(null)|${dev}3|$flags_default_type |
| GRES_PARSABLE\[gpu\](1):tesla_b|4|0-3|(null)|${dev}4|$flags_default_type |
| GRES_PARSABLE\[gpu\](1):tesla_a|4|0-3|(null)|${dev}5|$flags_default_type |
| GRES_PARSABLE\[gpu\](1):tesla_b|4|0-3|(null)|${dev}6|$flags_default_type |
| GRES_PARSABLE\[gpu\](1):tesla_a|4|0-3|(null)|${dev}7|$flags_default_type |
| GRES_PARSABLE\[gpu\](1):tesla_b|4|0-3|(null)|${dev}8|$flags_default_type |
| " |
| testproc_alias "c28" test_cfg "c28" $slurm_conf_gres $gres_conf $fake_gpus_conf $expected_output |
| |
| # ############################################################################## |
| # # Test c30 - Test duplicate devices in gres.conf |
| # ############################################################################## |
| |
| set slurm_conf_gres "gpu:tesla:6" |
| set gres_conf " |
| # This file was autogenerated by $test_name |
| Name=gpu Type=tesla File=${dev}3 Cores=0-3 |
| Name=gpu Type=tesla File=${dev}3 Cores=0-3 |
| Name=gpu Type=tesla File=${dev}4 Cores=0-3 |
| Name=gpu Type=tesla File=${dev}4 Cores=0-3 |
| Name=gpu Type=tesla File=${dev}\[3-4\] Cores=0-3 |
| " |
| set fake_gpus_conf "" |
| set expected_output "" |
| set err_msgs $dup_err |
| set expected_errs 1 |
| testproc_alias "c30" test_cfg "c30" $slurm_conf_gres $gres_conf $fake_gpus_conf $expected_output $err_msgs $expected_errs |
| |
| # ############################################################################## |
| # # Test c32 - Test increasing duplicate devices in gres.conf |
| # ############################################################################## |
| |
| set slurm_conf_gres "gpu:tesla:10" |
| set gres_conf " |
| # This file was autogenerated by $test_name |
| Name=gpu Type=tesla File=${dev}1 Cores=0-3 |
| Name=gpu Type=tesla File=${dev}\[1-2\] Cores=0-3 |
| Name=gpu Type=tesla File=${dev}\[1-3\] Cores=0-3 |
| Name=gpu Type=tesla File=${dev}\[1-4\] Cores=0-3 |
| " |
| set fake_gpus_conf "" |
| set expected_output "" |
| set err_msgs $dup_err |
| set expected_errs 1 |
| testproc_alias "c32" test_cfg "c32" $slurm_conf_gres $gres_conf $fake_gpus_conf $expected_output $err_msgs $expected_errs |
| |
| # ############################################################################## |
| # # Test c34 - Test decreasing duplicate devices in gres.conf |
| # ############################################################################## |
| |
| set slurm_conf_gres "gpu:tesla:10" |
| set gres_conf " |
| # This file was autogenerated by $test_name |
| Name=gpu Type=tesla File=${dev}4 Cores=0-3 |
| Name=gpu Type=tesla File=${dev}\[3-4\] Cores=0-3 |
| Name=gpu Type=tesla File=${dev}\[2-4\] Cores=0-3 |
| Name=gpu Type=tesla File=${dev}\[1-4\] Cores=0-3 |
| " |
| set fake_gpus_conf "" |
| set expected_output "" |
| set err_msgs $dup_err |
| set expected_errs 1 |
| testproc_alias "c34" test_cfg "c34" $slurm_conf_gres $gres_conf $fake_gpus_conf $expected_output $err_msgs $expected_errs |
| |
| # ############################################################################## |
| # # Test c36 - Test duplicate devices |
| # ############################################################################## |
| |
| set slurm_conf_gres "gpu:tesla:4" |
| set gres_conf " |
| # This file was autogenerated by $test_name |
| Name=gpu Type=tesla File=${dev}1 Cores=0 |
| Name=gpu Type=tesla File=${dev}1 Cores=0-1 |
| Name=gpu Type=tesla File=${dev}1 Cores=0-2 |
| Name=gpu Type=tesla File=${dev}1 Cores=0-3 |
| " |
| set fake_gpus_conf "" |
| set expected_output "" |
| set err_msgs $dup_err |
| set expected_errs 1 |
| testproc_alias "c36" test_cfg "c36" $slurm_conf_gres $gres_conf $fake_gpus_conf $expected_output $err_msgs $expected_errs |
| |
| # ############################################################################## |
| # # Test c38 - Test overlapping Core ranges |
| # ############################################################################## |
| |
| set slurm_conf_gres "gpu:tesla:4" |
| set gres_conf " |
| # This file was autogenerated by $test_name |
| Name=gpu Type=tesla File=${dev}1 Cores=0 |
| Name=gpu Type=tesla File=${dev}2 Cores=0-1 |
| Name=gpu Type=tesla File=${dev}3 Cores=0-2 |
| Name=gpu Type=tesla File=${dev}4 Cores=0-3 |
| " |
| set fake_gpus_conf "" |
| set expected_output " |
| GRES_PARSABLE\[gpu\](1):tesla|4|0|(null)|${dev}1|$flags_default_type |
| GRES_PARSABLE\[gpu\](1):tesla|4|0-1|(null)|${dev}2|$flags_default_type |
| GRES_PARSABLE\[gpu\](1):tesla|4|0-2|(null)|${dev}3|$flags_default_type |
| GRES_PARSABLE\[gpu\](1):tesla|4|0-3|(null)|${dev}4|$flags_default_type |
| " |
| testproc_alias "c38" test_cfg "c38" $slurm_conf_gres $gres_conf $fake_gpus_conf $expected_output |
| |
| |
| # ############################################################################## |
| # # Test c40 - Test that nulled, empty, and zeroed links produce errors |
| # ############################################################################## |
| |
| set slurm_conf_gres "gpu:tesla:4" |
| set gres_conf " |
| # This file was autogenerated by $test_name |
| Name=gpu Type=tesla File=${dev}1 Cores=0-3 |
| Name=gpu Type=tesla File=${dev}2 Cores=0-3 Links=\"\" |
| Name=gpu Type=tesla File=${dev}3 Cores=0-3 Links=null |
| Name=gpu Type=tesla File=${dev}4 Cores=0-3 Links=0 |
| " |
| set fake_gpus_conf "" |
| set expected_output " |
| GRES_PARSABLE\[gpu\](1):tesla|4|0-3|(null)|${dev}1|$flags_default_type |
| GRES_PARSABLE\[gpu\](1):tesla|4|0-3|(null)|${dev}2|$flags_default_type |
| GRES_PARSABLE\[gpu\](1):tesla|4|0-3|(null)|${dev}3|$flags_default_type |
| GRES_PARSABLE\[gpu\](1):tesla|4|0-3|(null)|${dev}4|$flags_default_type |
| " |
| set err_msgs $links_err |
| set expected_errs 3 |
| testproc_alias "c40" test_cfg "c40" $slurm_conf_gres $gres_conf $fake_gpus_conf $expected_output $err_msgs $expected_errs |
| |
| # ############################################################################## |
| # # Test c42 - Show link ranges do NOT work - only commas |
| # ############################################################################## |
| |
| set slurm_conf_gres "gpu:tesla:4" |
| set gres_conf " |
| # This file was autogenerated by $test_name |
| Name=gpu Type=tesla File=${dev}1 Cores=0-3 Links=0-1 |
| Name=gpu Type=tesla File=${dev}2 Cores=0-3 Links=0,-1 |
| Name=gpu Type=tesla File=${dev}3 Cores=0-3 Links=0-2 |
| Name=gpu Type=tesla File=${dev}4 Cores=0-3 Links=0,-1,2 |
| " |
| set fake_gpus_conf "" |
| set expected_output " |
| GRES_PARSABLE\[gpu\](1):tesla|4|0-3|0,-1|${dev}2|$flags_default_type |
| GRES_PARSABLE\[gpu\](1):tesla|4|0-3|0,-1,2|${dev}4|$flags_default_type |
| GRES_PARSABLE\[gpu\](1):tesla|4|0-3|(null)|${dev}1|$flags_default_type |
| GRES_PARSABLE\[gpu\](1):tesla|4|0-3|(null)|${dev}3|$flags_default_type |
| " |
| testproc_alias "c42" test_cfg "c42" $slurm_conf_gres $gres_conf $fake_gpus_conf $expected_output |
| |
| # ############################################################################## |
| # # Test c46 - Type not specified |
| # ############################################################################## |
| |
| set slurm_conf_gres "gpu:4" |
| set gres_conf " |
| # This file was autogenerated by $test_name |
| Name=gpu File=${dev}1 Cores=0-3 |
| Name=gpu File=${dev}2 Cores=0-3 |
| Name=gpu File=${dev}3 Cores=0-3 |
| Name=gpu File=${dev}4 Cores=0-3 |
| |
| " |
| set fake_gpus_conf "" |
| set expected_output " |
| GRES_PARSABLE\[gpu\](1):(null)|4|0-3|(null)|${dev}1|$flags_default |
| GRES_PARSABLE\[gpu\](1):(null)|4|0-3|(null)|${dev}2|$flags_default |
| GRES_PARSABLE\[gpu\](1):(null)|4|0-3|(null)|${dev}3|$flags_default |
| GRES_PARSABLE\[gpu\](1):(null)|4|0-3|(null)|${dev}4|$flags_default |
| " |
| testproc_alias "c46" test_cfg "c46" $slurm_conf_gres $gres_conf $fake_gpus_conf $expected_output |
| |
| # ############################################################################## |
| # # Test c48 - Cores not specified or null |
| # ############################################################################## |
| |
| set slurm_conf_gres "gpu:tesla:5" |
| set gres_conf " |
| # This file was autogenerated by $test_name |
| Name=gpu Type=tesla File=${dev}1 |
| Name=gpu Type=tesla File=${dev}2 |
| Name=gpu Type=tesla File=${dev}3 Cores=\"\" |
| Name=gpu Type=tesla File=${dev}4 Cores=null |
| Name=gpu Type=tesla File=${dev}5 Cores=0 |
| " |
| set fake_gpus_conf "" |
| set expected_output " |
| GRES_PARSABLE\[gpu\](1):tesla|4|(null)|(null)|${dev}1|$flags_default_type |
| GRES_PARSABLE\[gpu\](1):tesla|4|(null)|(null)|${dev}2|$flags_default_type |
| GRES_PARSABLE\[gpu\](1):tesla|4||(null)|${dev}3|$flags_default_type |
| GRES_PARSABLE\[gpu\](1):tesla|4|null|(null)|${dev}4|$flags_default_type |
| GRES_PARSABLE\[gpu\](1):tesla|4|0|(null)|${dev}5|$flags_default_type |
| " |
| testproc_alias "c48" test_cfg "c48" $slurm_conf_gres $gres_conf $fake_gpus_conf $expected_output |
| |
| |
| # TODO: What other tests should we add? |
| # Make sure current gres.conf files still work as expected |
| # Test to make sure CPU affinity is correct? Machine vs abstract? |
| # Invalid CPU counts |
| # Invalid cpu range |
| |
| # ############################################################################## |
| # # Test c50 - Test examples in gres.conf docs |
| # ############################################################################## |
| |
| set slurm_conf_gres "gpu:gtx560:1,gpu:tesla:1,mps:200" |
| set gres_conf " |
| # This file was autogenerated by $test_name |
| Name=gpu Type=gtx560 File=${dev}0 COREs=0,1 |
| Name=gpu Type=tesla File=${dev}1 COREs=2,3 |
| Name=mps Count=100 File=${dev}0 |
| Name=mps Count=100 File=${dev}1 |
| " |
| set fake_gpus_conf "" |
| set expected_output " |
| GRES_PARSABLE\[mps\](100):gtx560|4|0,1|(null)|${dev}0|$flags_file_type_shared |
| GRES_PARSABLE\[gpu\](1):gtx560|4|0,1|(null)|${dev}0|$flags_default_type |
| GRES_PARSABLE\[mps\](100):tesla|4|2,3|(null)|${dev}1|$flags_file_type_shared |
| GRES_PARSABLE\[gpu\](1):tesla|4|2,3|(null)|${dev}1|$flags_default_type |
| " |
| testproc_alias "c50" test_cfg "c50" $slurm_conf_gres $gres_conf $fake_gpus_conf $expected_output |
| |
| # ############################################################################## |
| # # Test c52 - Test examples in gres.conf docs |
| # ############################################################################## |
| |
| set slurm_conf_gres "gpu:tesla:3" |
| set gres_conf " |
| # This file was autogenerated by $test_name |
| Name=gpu Type=tesla File=${dev}\[0-1\] COREs=0,1 |
| # NOTE: nvidia2 device is out of service |
| # Name=gpu Type=tesla File=${dev}\[2-3\] COREs=2,3 |
| Name=gpu Type=tesla File=${dev}3 COREs=2,3 |
| " |
| set fake_gpus_conf "" |
| set expected_output " |
| GRES_PARSABLE\[gpu\](1):tesla|4|0,1|(null)|${dev}0|$flags_default_type |
| GRES_PARSABLE\[gpu\](1):tesla|4|0,1|(null)|${dev}1|$flags_default_type |
| GRES_PARSABLE\[gpu\](1):tesla|4|2,3|(null)|${dev}3|$flags_default_type |
| " |
| testproc_alias "c52" test_cfg "c52" $slurm_conf_gres $gres_conf $fake_gpus_conf $expected_output |
| |
| # ############################################################################## |
| # # Test c54 - Test examples in gres.conf docs |
| # # Note: ${nodename_base}0 == $nodename, so only node 0's GRES |
| # # definitions will apply to this test |
| # ############################################################################## |
| |
| set slurm_conf_gres "gpu:4" |
| set gres_conf " |
| # This file was autogenerated by $test_name |
| ## Explicitly specify devices on nodes ${nodename_base}0-${nodename_base}15 |
| # NodeName=${nodename_base}\[0-15\] Name=gpu File=${dev}\[0-3\] |
| # NOTE: ${nodename_base}3 ${dev}1 device is out of service |
| NodeName=${nodename_base}\[0-2\] Name=gpu File=${dev}\[0-3\] |
| NodeName=${nodename_base}3 Name=gpu File=${dev}\[0,2-3\] |
| NodeName=${nodename_base}\[4-15\] Name=gpu File=${dev}\[0-3\] |
| " |
| set fake_gpus_conf "" |
| set expected_output " |
| GRES_PARSABLE\[gpu\](1):(null)|4|(null)|(null)|${dev}0|$flags_default |
| GRES_PARSABLE\[gpu\](1):(null)|4|(null)|(null)|${dev}1|$flags_default |
| GRES_PARSABLE\[gpu\](1):(null)|4|(null)|(null)|${dev}2|$flags_default |
| GRES_PARSABLE\[gpu\](1):(null)|4|(null)|(null)|${dev}3|$flags_default |
| " |
| testproc_alias "c54" test_cfg "c54" $slurm_conf_gres $gres_conf $fake_gpus_conf $expected_output |
| |
| # ############################################################################## |
| # # Test c56 - Test gres/mps with count on only some devices |
| # # NOTE the device numbers being out of order, one GPU without a gres/mps and |
| # # a gres/mps with a device file not valid for any configured GPU |
| # ############################################################################## |
| |
| set slurm_conf_gres "gpu:gtx560:1,gpu:tesla:2,mps:900" |
| set gres_conf " |
| # This file was autogenerated by $test_name |
| Name=gpu Type=tesla File=${dev}1 COREs=2,3 |
| Name=gpu Type=gtx560 File=${dev}0 COREs=0,1 |
| Name=gpu Type=tesla File=${dev}3 COREs=2,3 |
| Name=mps Count=200 File=${dev}1 |
| Name=mps Count=300 File=${dev}3 |
| Name=mps Count=400 File=${dev}8 |
| " |
| set fake_gpus_conf "" |
| set expected_output " |
| GRES_PARSABLE\[mps\](0):gtx560|4|0,1|(null)|${dev}0|$flags_default_type_shared |
| GRES_PARSABLE\[gpu\](1):gtx560|4|0,1|(null)|${dev}0|$flags_default_type |
| GRES_PARSABLE\[mps\](200):tesla|4|2,3|(null)|${dev}1|$flags_file_type_shared |
| GRES_PARSABLE\[gpu\](1):tesla|4|2,3|(null)|${dev}1|$flags_default_type |
| GRES_PARSABLE\[mps\](300):tesla|4|2,3|(null)|${dev}3|$flags_file_type_shared |
| GRES_PARSABLE\[gpu\](1):tesla|4|2,3|(null)|${dev}3|$flags_default_type |
| " |
| testproc_alias "c56" test_cfg "c56" $slurm_conf_gres $gres_conf $fake_gpus_conf $expected_output |
| |
| # ============================================================================== |
| # ============================================================================== |
| # # Tests dX - Test Flags parsing with and without AutoDetect |
| # ============================================================================== |
| # ============================================================================== |
| |
| # ############################################################################## |
| # # Test d1 - Default (all envs set) |
| # ############################################################################## |
| |
| set slurm_conf_gres "gpu:a100:4" |
| set gres_conf " |
| # This file was autogenerated by $test_name |
| Name=gpu Type=a100 File=${dev}0 Cores=0-1 |
| Name=gpu Type=a100 File=${dev}1 Cores=0-1 |
| Name=gpu Type=a100 File=${dev}2 Cores=0-1 |
| Name=gpu Type=a100 File=${dev}3 Cores=0-1 |
| " |
| |
| set fake_gpus_conf "" |
| set expected_output " |
| GRES_PARSABLE\[gpu\](1):a100|4|0-1|(null)|${dev}0|$flags_default_type |
| GRES_PARSABLE\[gpu\](1):a100|4|0-1|(null)|${dev}1|$flags_default_type |
| GRES_PARSABLE\[gpu\](1):a100|4|0-1|(null)|${dev}2|$flags_default_type |
| GRES_PARSABLE\[gpu\](1):a100|4|0-1|(null)|${dev}3|$flags_default_type |
| " |
| |
| testproc_alias "d1" test_cfg "d1" $slurm_conf_gres $gres_conf $fake_gpus_conf $expected_output |
| |
| |
| # ############################################################################## |
| # # Test d2 - No envs set |
| # ############################################################################## |
| |
| set slurm_conf_gres "gpu:a100:4" |
| set gres_conf " |
| # This file was autogenerated by $test_name |
| Name=gpu Type=a100 File=${dev}0 Cores=0-1 Flags=no_gpu_env |
| Name=gpu Type=a100 File=${dev}1 Cores=0-1 Flags=no_gpu_env |
| Name=gpu Type=a100 File=${dev}2 Cores=0-1 Flags=no_gpu_env |
| Name=gpu Type=a100 File=${dev}3 Cores=0-1 Flags=no_gpu_env |
| " |
| |
| set fake_gpus_conf "" |
| set expected_output " |
| GRES_PARSABLE\[gpu\](1):a100|4|0-1|(null)|${dev}0|$flags_file_type |
| GRES_PARSABLE\[gpu\](1):a100|4|0-1|(null)|${dev}1|$flags_file_type |
| GRES_PARSABLE\[gpu\](1):a100|4|0-1|(null)|${dev}2|$flags_file_type |
| GRES_PARSABLE\[gpu\](1):a100|4|0-1|(null)|${dev}3|$flags_file_type |
| " |
| |
| testproc_alias "d2" test_cfg "d2" $slurm_conf_gres $gres_conf $fake_gpus_conf $expected_output |
| |
| # ############################################################################## |
| # # Test d3 - Test with NodeName |
| # ############################################################################## |
| |
| set slurm_conf_gres "gpu:a100:4" |
| set gres_conf " |
| # This file was autogenerated by $test_name |
| NodeName=$nodename Name=gpu Type=a100 File=${dev}0 Cores=0-1 Flags=amd_gpu_env |
| NodeName=$nodename Name=gpu Type=a100 File=${dev}1 Cores=0-1 Flags=amd_gpu_env |
| NodeName=$nodename_diff Name=gpu Type=a100 File=${dev}2 Cores=0-1 Flags=nvidia_gpu_env |
| NodeName=$nodename_diff Name=gpu Type=a100 File=${dev}3 Cores=0-1 Flags=nvidia_gpu_env |
| " |
| |
| set fake_gpus_conf "" |
| set expected_output " |
| GRES_PARSABLE\[gpu\](1):a100|4|0-1|(null)|${dev}0|$flags_file_type,ENV_RSMI |
| GRES_PARSABLE\[gpu\](1):a100|4|0-1|(null)|${dev}1|$flags_file_type,ENV_RSMI |
| " |
| |
| testproc_alias "d3" test_cfg "d3" $slurm_conf_gres $gres_conf $fake_gpus_conf $expected_output |
| |
| # ############################################################################## |
| # # Test d4 - AutoDetect match with conf does NOT overwrite no_gpu_env |
| # ############################################################################## |
| |
| set slurm_conf_gres "gpu:a100:4" |
| set gres_conf " |
| # This file was autogenerated by $test_name |
| Name=gpu Type=a100 File=${dev}0 Cores=0-1 Flags=no_gpu_env |
| Name=gpu Type=a100 File=${dev}\[1-3\] Cores=0-1 Flags=no_gpu_env |
| " |
| |
| set fake_gpus_conf " |
| # This file was autogenerated by $test_name |
| a100|4|0-1|(null)|${dev}0|(null)|nvidia_gpu_env |
| " |
| |
| set expected_output " |
| GRES_PARSABLE\[gpu\](1):a100|4|0-1|(null)|${dev}0|$flags_file_type |
| GRES_PARSABLE\[gpu\](1):a100|4|0-1|(null)|${dev}1|$flags_file_type |
| GRES_PARSABLE\[gpu\](1):a100|4|0-1|(null)|${dev}2|$flags_file_type |
| GRES_PARSABLE\[gpu\](1):a100|4|0-1|(null)|${dev}3|$flags_file_type |
| " |
| |
| testproc_alias "d4" test_cfg "d4" $slurm_conf_gres $gres_conf $fake_gpus_conf $expected_output |
| |
| # ############################################################################## |
| # # Test d5 - Test that AutoDetect and conf can match with correct Flags |
| # ############################################################################## |
| |
| set slurm_conf_gres "gpu:a100:4" |
| set gres_conf " |
| # This file was autogenerated by $test_name |
| Name=gpu Type=a100 File=${dev}0 Cores=0-1 Flags=nvidia_gpu_env |
| Name=gpu Type=a100 File=${dev}\[1-3\] Cores=0-1 Flags=nvidia_gpu_env |
| " |
| |
| set fake_gpus_conf " |
| # This file was autogenerated by $test_name |
| a100|4|0-1|(null)|${dev}0|(null)|nvidia_gpu_env |
| " |
| |
| set expected_output " |
| GRES_PARSABLE\[gpu\](1):a100|4|0-1|(null)|${dev}0|$flags_file_type,ENV_NVML |
| GRES_PARSABLE\[gpu\](1):a100|4|0-1|(null)|${dev}1|$flags_file_type,ENV_NVML |
| GRES_PARSABLE\[gpu\](1):a100|4|0-1|(null)|${dev}2|$flags_file_type,ENV_NVML |
| GRES_PARSABLE\[gpu\](1):a100|4|0-1|(null)|${dev}3|$flags_file_type,ENV_NVML |
| " |
| |
| testproc_alias "d5" test_cfg "d5" $slurm_conf_gres $gres_conf $fake_gpus_conf $expected_output |
| |
| |
| # ############################################################################## |
| # # Test d6 - Test that AutoDetect works with no conf |
| # ############################################################################## |
| |
| set slurm_conf_gres "gpu:a100:4" |
| set gres_conf " |
| # This file was autogenerated by $test_name |
| Name=gpu Type=a100 File=${dev}\[1-3\] Cores=0-1 Flags=nvidia_gpu_env |
| " |
| |
| set fake_gpus_conf " |
| # This file was autogenerated by $test_name |
| a100|4|0-1|(null)|${dev}0|(null)|nvidia_gpu_env |
| " |
| |
| set expected_output " |
| GRES_PARSABLE\[gpu\](1):a100|4|0-1|(null)|${dev}0|$flags_file_type,ENV_NVML |
| GRES_PARSABLE\[gpu\](1):a100|4|0-1|(null)|${dev}1|$flags_file_type,ENV_NVML |
| GRES_PARSABLE\[gpu\](1):a100|4|0-1|(null)|${dev}2|$flags_file_type,ENV_NVML |
| GRES_PARSABLE\[gpu\](1):a100|4|0-1|(null)|${dev}3|$flags_file_type,ENV_NVML |
| " |
| |
| testproc_alias "d6" test_cfg "d6" $slurm_conf_gres $gres_conf $fake_gpus_conf $expected_output |
| |
| |
| # ############################################################################## |
| # # Test d7 - Test that AutoDetect works with no conf, and other conf different |
| # NOTE: The stepd will combine all env flags of records on the same |
| # node, so in reality, both CUDA_* and ROCR_* would be set on all. |
| # ############################################################################## |
| |
| set slurm_conf_gres "gpu:a100:4" |
| set gres_conf " |
| # This file was autogenerated by $test_name |
| Name=gpu Type=a100 File=${dev}\[1-3\] Cores=0-1 Flags=amd_gpu_env |
| " |
| |
| set fake_gpus_conf " |
| # This file was autogenerated by $test_name |
| a100|4|0-1|(null)|${dev}0|(null)|nvidia_gpu_env |
| " |
| |
| set expected_output " |
| GRES_PARSABLE\[gpu\](1):a100|4|0-1|(null)|${dev}0|$flags_file_type,ENV_NVML |
| GRES_PARSABLE\[gpu\](1):a100|4|0-1|(null)|${dev}1|$flags_file_type,ENV_RSMI |
| GRES_PARSABLE\[gpu\](1):a100|4|0-1|(null)|${dev}2|$flags_file_type,ENV_RSMI |
| GRES_PARSABLE\[gpu\](1):a100|4|0-1|(null)|${dev}3|$flags_file_type,ENV_RSMI |
| " |
| |
| testproc_alias "d7" test_cfg "d7" $slurm_conf_gres $gres_conf $fake_gpus_conf $expected_output |
| |
| |
| # ############################################################################## |
| # # Test d8 - Test that flags propagate to next GRES line |
| # ############################################################################## |
| |
| set slurm_conf_gres "gpu:a100:4" |
| set gres_conf " |
| # This file was autogenerated by $test_name |
| Name=gpu Type=a100 File=${dev}0 Cores=0-1 Flags=amd_gpu_env,nvidia_gpu_env |
| Name=gpu Type=a100 File=${dev}1 Cores=0-1 |
| Name=gpu Type=a100 File=${dev}2 Cores=0-1 |
| Name=gpu Type=a100 File=${dev}3 Cores=0-1 |
| " |
| |
| set fake_gpus_conf "" |
| set expected_output " |
| GRES_PARSABLE\[gpu\](1):a100|4|0-1|(null)|${dev}0|$flags_file_type,ENV_NVML,ENV_RSMI |
| GRES_PARSABLE\[gpu\](1):a100|4|0-1|(null)|${dev}1|$flags_file_type,ENV_NVML,ENV_RSMI |
| GRES_PARSABLE\[gpu\](1):a100|4|0-1|(null)|${dev}2|$flags_file_type,ENV_NVML,ENV_RSMI |
| GRES_PARSABLE\[gpu\](1):a100|4|0-1|(null)|${dev}3|$flags_file_type,ENV_NVML,ENV_RSMI |
| " |
| |
| testproc_alias "d8" test_cfg "d8" $slurm_conf_gres $gres_conf $fake_gpus_conf $expected_output |
| |
| # ############################################################################## |
| # # Test d9 - Fatal on env flag mismatch |
| # ############################################################################## |
| |
| set slurm_conf_gres "gpu:a100:4" |
| set gres_conf " |
| # This file was autogenerated by $test_name |
| Name=gpu Type=a100 File=${dev}\[0-1\] Cores=0-1 Flags=no_gpu_env |
| Name=gpu Type=a100 File=${dev}\[2-3\] Cores=0-1 Flags=nvidia_gpu_env |
| " |
| |
| set fake_gpus_conf "" |
| set expected_output "" |
| set err_msgs $flags_mismatch_err |
| set expected_errs 1 |
| testproc_alias "d9" test_cfg "d9" $slurm_conf_gres $gres_conf $fake_gpus_conf $expected_output $err_msgs $expected_errs |
| |
| # ############################################################################## |
| # # Test d10 - Fatal on env flag mismatch with default |
| # ############################################################################## |
| |
| set slurm_conf_gres "gpu:a100:4" |
| set gres_conf " |
| # This file was autogenerated by $test_name |
| Name=gpu Type=a100 File=${dev}0 Cores=0-1 |
| Name=gpu Type=a100 File=${dev}\[1-3\] Cores=0-1 Flags=nvidia_gpu_env |
| " |
| |
| set fake_gpus_conf " |
| # This file was autogenerated by $test_name |
| a100|4|0-1|(null)|${dev}0|(null)|nvidia_gpu_env |
| " |
| |
| set expected_output "" |
| set err_msgs $flags_mismatch_err |
| set expected_errs 1 |
| |
| testproc_alias "d10" test_cfg "d10" $slurm_conf_gres $gres_conf $fake_gpus_conf $expected_output $err_msgs $expected_errs |
| |
| # ############################################################################## |
| # # Test d11 - Make sure that env flag propagation works with different types |
| # of the same GRES name |
| # ############################################################################## |
| |
| set slurm_conf_gres "gpu:a100:1,gpu:b100:1,gpu:c100:1,gpu:d100:1" |
| set gres_conf " |
| # This file was autogenerated by $test_name |
| Name=gpu Type=a100 File=${dev}0 Cores=0-1 Flags=amd_gpu_env |
| Name=gpu Type=b100 File=${dev}1 Cores=0-1 |
| Name=gpu Type=c100 File=${dev}2 Cores=0-1 |
| Name=gpu Type=d100 File=${dev}3 Cores=0-1 |
| " |
| |
| set fake_gpus_conf "" |
| set expected_output " |
| GRES_PARSABLE\[gpu\](1):a100|4|0-1|(null)|${dev}0|$flags_file_type,ENV_RSMI |
| GRES_PARSABLE\[gpu\](1):b100|4|0-1|(null)|${dev}1|$flags_file_type,ENV_RSMI |
| GRES_PARSABLE\[gpu\](1):c100|4|0-1|(null)|${dev}2|$flags_file_type,ENV_RSMI |
| GRES_PARSABLE\[gpu\](1):d100|4|0-1|(null)|${dev}3|$flags_file_type,ENV_RSMI |
| " |
| |
| testproc_alias "d11" test_cfg "d11" $slurm_conf_gres $gres_conf $fake_gpus_conf $expected_output |