|  | #!/usr/bin/env expect | 
|  | ############################################################################ | 
|  | # Purpose: Establish global state information for Slurm test suite | 
|  | # | 
|  | # To define site-specific state information, set the values in a file | 
|  | # named 'globals.local'. Those values will override any specified here. | 
|  | # for example: | 
|  | # | 
|  | # $ cat globals.local | 
|  | # set slurm_dir  "/usr/local" | 
|  | # set build_dir  "/home/mine/SLURM/build_smd" | 
|  | # set src_dir    "/home/mine/SLURM/slurm.git" | 
|  | # set mpicc      "/usr/local/bin/mpicc" | 
|  | # | 
|  | # If you want to have more than one test going at the same time for multiple | 
|  | # installs you can have multiple globals.local files and set the | 
|  | # SLURM_LOCAL_GLOBALS_FILE env var, and have that set to the correct | 
|  | # globals.local file for your various installs.  The file can be named anything, | 
|  | # not just globals.local. | 
|  | # | 
|  | ############################################################################ | 
|  | # Copyright (C) 2002-2007 The Regents of the University of California. | 
|  | # Copyright (C) 2008-2010 Lawrence Livermore National Security. | 
|  | # Copyright (C) SchedMD LLC. | 
|  | # Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). | 
|  | # Written by Morris Jette <jette1@llnl.gov> | 
|  | # Additions by Joseph Donaghy <donaghy1@llnl.gov> | 
|  | # CODE-OCEC-09-009. All rights reserved. | 
|  | # | 
|  | # This file is part of Slurm, a resource management program. | 
|  | # For details, see <https://slurm.schedmd.com/>. | 
|  | # Please also read the supplied file: DISCLAIMER. | 
|  | # | 
|  | # Slurm is free software; you can redistribute it and/or modify it under | 
|  | # the terms of the GNU General Public License as published by the Free | 
|  | # Software Foundation; either version 2 of the License, or (at your option) | 
|  | # any later version. | 
|  | # | 
|  | # Slurm is distributed in the hope that it will be useful, but WITHOUT ANY | 
|  | # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS | 
|  | # FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more | 
|  | # details. | 
|  | # | 
|  | # You should have received a copy of the GNU General Public License along | 
|  | # with Slurm; if not, write to the Free Software Foundation, Inc., | 
|  | # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301  USA. | 
|  | ############################################################################ | 
|  |  | 
|  | # Avoid sourcing this file multiple times | 
|  | if {[info procs exit] eq "exit"} { | 
|  | return | 
|  | } | 
|  |  | 
|  | # Read in unified testsuite properties from testsuite/testsuite.conf | 
|  | set unified_testsuite_dir [file dirname [file normalize "[info script]/.."]] | 
|  | set unified_testsuite_conf_file "$unified_testsuite_dir/testsuite.conf" | 
|  | if {[info exists env(SLURM_TESTSUITE_CONF)]} { | 
|  | set unified_testsuite_conf_file $env(SLURM_TESTSUITE_CONF) | 
|  | } | 
|  | set unified_testsuite_config_dict [dict create] | 
|  | if [file readable "$unified_testsuite_conf_file"] { | 
|  | set fp [open "$unified_testsuite_conf_file" r] | 
|  | set file_data [read $fp] | 
|  | close $fp | 
|  | foreach line [split $file_data "\n"] { | 
|  | if [regexp {^\s*(\S+)\s*=\s*(\S+)\s*$} $line {} parameter_name parameter_value] { | 
|  | dict set unified_testsuite_config_dict [string tolower $parameter_name] $parameter_value | 
|  | } | 
|  | } | 
|  | } else { | 
|  | puts stderr "Warning: Unable to open the unified testsuite configuration file ($unified_testsuite_conf_file) for reading. This file can be created from a copy of the autogenerated sample found in BUILDDIR/testsuite/testsuite.conf.sample. By default, this file is expected to be found in SRCDIR/testsuite ($unified_testsuite_dir). If placed elsewhere, set the SLURM_TESTSUITE_CONF environment variable to the full path of your testsuite.conf file." | 
|  | flush stderr | 
|  | } | 
|  | if [dict exists $unified_testsuite_config_dict "slurmsourcedir"] { | 
|  | set src_dir [dict get $unified_testsuite_config_dict "slurmsourcedir"] | 
|  | } | 
|  | if [dict exists $unified_testsuite_config_dict "slurmbuilddir"] { | 
|  | set build_dir [dict get $unified_testsuite_config_dict "slurmbuilddir"] | 
|  | } | 
|  | if [dict exists $unified_testsuite_config_dict "slurminstalldir"] { | 
|  | set slurm_dir [dict get $unified_testsuite_config_dict "slurminstalldir"] | 
|  | } | 
|  |  | 
|  | global sacctmgr sacct salloc sattach sbatch sbcast scancel scontrol sinfo | 
|  | global smd squeue sreport srun sstat strigger | 
|  |  | 
|  | ################################################################ | 
|  | # | 
|  | # NAME | 
|  | #	cset - conditional set | 
|  | # | 
|  | # SYNOPSIS | 
|  | #	cset name value | 
|  | # | 
|  | # DESCRIPTION | 
|  | #	Conditional set.  Only set variable if variable does not yet exist. | 
|  | # | 
|  | # Input: name  -- name of the variable to set | 
|  | #	 value -- value to set to 'name' | 
|  | # | 
|  | ################################################################ | 
|  |  | 
|  | proc cset {name value} { | 
|  | if {![uplevel 1 info exists $name]} { | 
|  | upvar $name tmp | 
|  | set tmp $value | 
|  | } | 
|  | } | 
|  |  | 
|  |  | 
|  | # | 
|  | # Defining colors here to be able to use them in globals.local. | 
|  | # By default, these colors are bold | 
|  | # | 
|  | set COLOR_RED         "\033\[1;31m" | 
|  | set COLOR_RED_NORMAL  "\033\[31m" | 
|  | set COLOR_ORANGE      "\033\[1;38;5;208m" | 
|  | set COLOR_YELLOW      "\033\[1;33m" | 
|  | set COLOR_GREEN       "\033\[1;32m" | 
|  | set COLOR_BLUE        "\033\[1;34m" | 
|  | set COLOR_MAGENTA     "\033\[1;35m" | 
|  | set COLOR_CYAN        "\033\[1;36m" | 
|  | set COLOR_NONE        "\033\[0m" | 
|  |  | 
|  | cset local_globals_file "./globals.local" | 
|  |  | 
|  | # Log level "enum" | 
|  | # Define log levels here so they are available in globals.local | 
|  | set LOG_LEVEL_QUIET   0 | 
|  | set LOG_LEVEL_FATAL   1 | 
|  | set LOG_LEVEL_ERROR   2 | 
|  | set LOG_LEVEL_WARNING 3 | 
|  | set LOG_LEVEL_INFO    4 | 
|  | set LOG_LEVEL_PASS    4 | 
|  | set LOG_LEVEL_COMMAND 4 | 
|  | set LOG_LEVEL_DEBUG   5 | 
|  | set LOG_LEVEL_TRACE   6 | 
|  |  | 
|  | if {[info exists env(SLURM_LOCAL_GLOBALS_FILE)]} { | 
|  | set local_globals_file $env(SLURM_LOCAL_GLOBALS_FILE) | 
|  | } | 
|  |  | 
|  | if [file exists $local_globals_file] { | 
|  | source $local_globals_file | 
|  | } | 
|  |  | 
|  | # | 
|  | # Specify the slurm install directory. | 
|  | # Used to locate binaries, libraries, and header files. | 
|  | # | 
|  | cset slurm_dir   "/usr" | 
|  | cset build_dir   "../../" | 
|  | cset src_dir     "../../" | 
|  | cset config_h    "${build_dir}/config.h" | 
|  | cset sacctmgr    "${slurm_dir}/bin/sacctmgr" | 
|  | cset sacct       "${slurm_dir}/bin/sacct" | 
|  | cset salloc      "${slurm_dir}/bin/salloc" | 
|  | cset sattach     "${slurm_dir}/bin/sattach" | 
|  | cset sbatch      "${slurm_dir}/bin/sbatch" | 
|  | cset sbcast      "${slurm_dir}/bin/sbcast" | 
|  | cset scancel     "${slurm_dir}/bin/scancel" | 
|  | cset scontrol    "${slurm_dir}/bin/scontrol" | 
|  | cset sdiag       "${slurm_dir}/bin/sdiag" | 
|  | cset sgather     "${slurm_dir}/bin/sgather" | 
|  | cset sh5util     "${slurm_dir}/bin/sh5util" | 
|  | cset sinfo       "${slurm_dir}/bin/sinfo" | 
|  | cset smd         "${slurm_dir}/bin/smd" | 
|  | cset sprio       "${slurm_dir}/bin/sprio" | 
|  | cset squeue      "${slurm_dir}/bin/squeue" | 
|  | cset srun        "${slurm_dir}/bin/srun" | 
|  | cset sreport     "${slurm_dir}/bin/sreport" | 
|  | cset sshare      "${slurm_dir}/bin/sshare" | 
|  | cset sstat       "${slurm_dir}/bin/sstat" | 
|  | cset strigger    "${slurm_dir}/bin/strigger" | 
|  |  | 
|  | cset slurmd      "${slurm_dir}/sbin/slurmd" | 
|  | cset slurmrestd  "${slurm_dir}/sbin/slurmrestd" | 
|  |  | 
|  | cset pbsnodes    "${slurm_dir}/bin/pbsnodes" | 
|  | cset qdel        "${slurm_dir}/bin/qdel" | 
|  | cset qstat       "${slurm_dir}/bin/qstat" | 
|  | cset qsub        "${slurm_dir}/bin/qsub" | 
|  | cset qalter      "${slurm_dir}/bin/qalter" | 
|  | cset qrerun      "${slurm_dir}/bin/qrerun" | 
|  |  | 
|  | cset seff        "${slurm_dir}/bin/seff" | 
|  |  | 
|  | cset lsid      	 "${slurm_dir}/bin/lsid" | 
|  | cset bjobs     	 "${slurm_dir}/bin/bjobs" | 
|  | cset bkill     	 "${slurm_dir}/bin/bkill" | 
|  | cset bsub     	 "${slurm_dir}/bin/bsub" | 
|  |  | 
|  | cset influx	"/usr/bin/influx" | 
|  | # If using MPICH-2 or other version of MPI requiring pmi libary, use this | 
|  | #cset mpicc	"/home/jette/mpich2-install/bin/mpicc" | 
|  | #cset use_pmi	1 | 
|  | # OR for other versions of MPICH, use this | 
|  | cset mpicc	"/usr/local/bin/mpicc" | 
|  | cset nvcc	"/usr/bin/nvcc" | 
|  | cset use_pmi	0 | 
|  | #cset upcc       "/usr/local/bin/upcc" | 
|  | cset upcc       "/usr/bin/xlupc" | 
|  | cset oshcc      "/usr/local/bin/oshcc" | 
|  |  | 
|  | cset mpirun	"mpirun" | 
|  | cset totalviewcli	"/usr/local/bin/totalviewcli" | 
|  |  | 
|  | # Set if using "--enable-memory-leak-debug" configuration option | 
|  | cset enable_memory_leak_debug 0 | 
|  |  | 
|  | # test_prompt: to be used as prompt for interactive shells (see proc reset_bash_prompt) | 
|  | set test_prompt  "TEST_PROMPT: " | 
|  |  | 
|  | # | 
|  | # Specify locations of other executable files used | 
|  | # Only the shell names (e.g. bin_bash) must be full pathnames | 
|  | # | 
|  | cset bin_awk	"awk" | 
|  | cset bin_bash   [exec which bash | tail -n 1] | 
|  | cset bin_cat	"cat" | 
|  | cset bin_cc	"gcc" | 
|  | cset bin_chmod	"chmod" | 
|  | cset bin_cmp	"cmp" | 
|  | cset bin_cp	"cp" | 
|  | cset bin_date	"date" | 
|  | cset bin_diff	"diff" | 
|  | cset bin_echo	"echo" | 
|  | cset bin_env	"env" | 
|  | cset bin_file	"file" | 
|  | cset bin_id	"id" | 
|  | cset bin_ip	"ip" | 
|  | cset bin_jq	"jq" | 
|  | cset bin_grep   "grep" | 
|  | cset bin_head   "head" | 
|  | cset bin_ln     "ln" | 
|  | cset bin_perldoc "/usr/bin/perldoc" | 
|  | cset bin_py3     "python3" | 
|  | cset bin_oasgen  "openapi-generator-cli" | 
|  |  | 
|  | # Don't user $bin_hostname, use $bin_printenv SLURMD_NODENAME | 
|  | cset bin_hostname "hostname" | 
|  |  | 
|  | cset bin_kill	"kill" | 
|  | cset bin_lscpu	"lscpu" | 
|  | cset bin_make	"make" | 
|  | cset bin_mv	"mv" | 
|  | cset bin_od	"od" | 
|  | cset bin_pkill	"pkill" | 
|  | cset bin_printenv "printenv" | 
|  | cset bin_ps	"ps" | 
|  | cset bin_pwd	"pwd" | 
|  | cset bin_rm	"rm" | 
|  | cset bin_sed	"sed" | 
|  | cset bin_sleep  "sleep" | 
|  | cset bin_sort	"sort" | 
|  | cset bin_socat	"socat" | 
|  | cset bin_sum	"sum" | 
|  | cset bin_sudo	"sudo" | 
|  | cset bin_systemd_detect_virt "systemd-detect-virt" | 
|  | cset bin_touch	"touch" | 
|  | cset bin_true	"true" | 
|  | cset bin_uname	"uname" | 
|  | cset bin_uniq	"uniq" | 
|  | cset bin_unshare "unshare" | 
|  | cset bin_virtualenv "virtualenv" | 
|  | cset bin_wc	"wc" | 
|  |  | 
|  | # | 
|  | # Let the commands complete without expect timing out waiting for a | 
|  | # response. Single node jobs submitted to the default partition should | 
|  | # be initiated within this number of seconds. | 
|  | # for interactive slurm jobs: cset timeout $max_job_delay | 
|  | # | 
|  | cset max_job_delay 120 | 
|  |  | 
|  | # | 
|  | # Specify the maximum number of tasks to use in the stress tests. | 
|  | # | 
|  | cset max_stress_tasks 4 | 
|  |  | 
|  | # | 
|  | # The error message that the "sleep" command prints when we run "sleep aaa". | 
|  | # | 
|  | cset sleep_error_message "(invalid time interval)|(bad character in argument)|(usage: sleep seconds)" | 
|  |  | 
|  | # Force LANG, as the expect tests aren't localized | 
|  | set ::env(LANG)          "en_US.UTF-8" | 
|  |  | 
|  | # Testsuite level variables | 
|  | cset testsuite_dir "[file dirname [file normalize [info script]]]" | 
|  | cset testsuite_shared_dir "[$bin_pwd]" | 
|  |  | 
|  | # Testsuite non-privileged user (set it in globals.local) | 
|  | cset testsuite_user "" | 
|  |  | 
|  | # Testsuite log variables | 
|  | cset testsuite_log_level $LOG_LEVEL_DEBUG | 
|  | cset testsuite_log_format "\[%{timestamp}s.%{msecs}03d] %{loglevel}-7s %{message}s \(%{backtrace}s)" | 
|  | cset testsuite_time_format "%Y-%m-%d %H:%M:%S" | 
|  |  | 
|  | # Default to using color if writing to a terminal and not if writing to a file | 
|  | cset testsuite_colorize        [dict exists [fconfigure stdout] -mode] | 
|  | cset testsuite_color_fatal     $COLOR_RED | 
|  | cset testsuite_color_error     $COLOR_RED_NORMAL | 
|  | cset testsuite_color_warn      $COLOR_ORANGE | 
|  | cset testsuite_color_info      $COLOR_YELLOW | 
|  | cset testsuite_color_pass      $COLOR_GREEN | 
|  | cset testsuite_color_command   $COLOR_CYAN | 
|  | cset testsuite_color_debug     $COLOR_BLUE | 
|  | cset testsuite_color_trace     $COLOR_MAGENTA | 
|  | cset testsuite_color_header    $COLOR_NONE | 
|  | cset testsuite_color_success   $COLOR_GREEN | 
|  | cset testsuite_color_failure   $COLOR_RED | 
|  | cset testsuite_color_skipped   $COLOR_ORANGE | 
|  |  | 
|  | # Set to true to cause the first subtest failure to immediately end the test | 
|  | cset testsuite_subtest_fatal   false | 
|  | # Set to all, fail_skip, fail or none print datails of subtests and testprocs | 
|  | cset testsuite_subtest_details  fail | 
|  | cset testsuite_testproc_details fail | 
|  | cset testsuite_testproc_log_calls ignore_skips | 
|  |  | 
|  | # To automatically call cleanup or not when ending the test | 
|  | cset testsuite_cleanup_on_failure true | 
|  | if {[info exists env(SLURM_TESTSUITE_CLEANUP_ON_FAILURE)]} { | 
|  | set testsuite_cleanup_on_failure $env(SLURM_TESTSUITE_CLEANUP_ON_FAILURE) | 
|  | } | 
|  |  | 
|  | # To avoid potential infinite loops due calls to fail/pass/skip inside | 
|  | # custom cleanup procs (_test_fini should be called only once) | 
|  | set _test_fini_called false | 
|  |  | 
|  | # Testproc internal variables | 
|  | set _testproc_included    [list] | 
|  | set _testproc_excluded    [list] | 
|  | set _testproc_pass_list   [list] | 
|  | set _testproc_skip_list   [list] | 
|  | set _testproc_fail_list   [list] | 
|  | set _testproc_messages    [dict create] | 
|  | set _testproc_skip_next   false | 
|  | set _testproc_skip_reason "" | 
|  | set _incomplete_reason    "" | 
|  | set _subtest_messages     [dict create] | 
|  |  | 
|  | set _subtest_pass_count   0 | 
|  | set _subtest_skip_count   0 | 
|  | set _subtest_fail_count   0 | 
|  |  | 
|  | set STATUS_PASS 0 | 
|  | set STATUS_FAIL 1 | 
|  | set STATUS_SKIP -1 | 
|  |  | 
|  | # Final test status | 
|  | set test_status $STATUS_PASS | 
|  |  | 
|  | # Other common variables | 
|  | set re_word_str          "\\S+" | 
|  | set digit                "\\d" | 
|  | set eol                  "\r?\n" | 
|  | set float                "\\d+\\.?\\d*" | 
|  | set number               "\\d+" | 
|  | set format_time          "\\d+\\:\\d+\\:\\d+" | 
|  | set number_with_suffix   "\\d+\[KM\]*" | 
|  | set slash                "/" | 
|  | set whitespace		 "\\s+" | 
|  | set controlmachine_regex "\\S+" | 
|  | # Any characters except ( , : newline | 
|  | set no_delim             "\[^(,:\r\n\]" | 
|  | set no_delim_slash       "\[^(,:/\r\n\]" | 
|  | # The first group matches GRES name | 
|  | # The second **optional** group matches GRES type. | 
|  | # The third group matches GRES count. | 
|  | # Test out the regex here: https://regex101.com/r/FlNYKM/7 | 
|  | set gres_regex "($no_delim_slash*):($no_delim*)?:?($no_delim*)?:?($no_delim*)" | 
|  |  | 
|  | #basic #defines in slurm.h | 
|  | set NO_VAL 0xfffffffe | 
|  | set INFINITE 0xffffffff | 
|  | set SLURM_MAX_NORMAL_STEP_ID 0xfffffff0 | 
|  | set SLURM_EXTERN_CONT 0xfffffffc | 
|  | set SLURM_BATCH_SCRIPT 0xfffffffb | 
|  |  | 
|  |  | 
|  | # | 
|  | # Global variable used in multiple functions in "globals" file | 
|  | # | 
|  | set gpu_sock_list {} | 
|  |  | 
|  | # | 
|  | # Procedure return values | 
|  | # | 
|  | set RETURN_SUCCESS 0 | 
|  | set RETURN_ERROR   1 | 
|  | set RETURN_TIMEOUT 110 ; # ETIMEDOUT | 
|  |  | 
|  |  | 
|  | ################################################################ | 
|  | # | 
|  | # NAME | 
|  | #	fail - fails a test | 
|  | # | 
|  | # SYNOPSIS | 
|  | #	fail message | 
|  | # | 
|  | # DESCRIPTION | 
|  | #	To be used when an error is fatal for the test. This routine | 
|  | #	prints the specified error message, optionally cleans up, prints | 
|  | #	a final test failure message, and exits the test with exit code 1. | 
|  | # | 
|  | # ENVIRONMENT | 
|  | #	Whether or not the cleanup procedure is called depends on the setting | 
|  | #	of the $testsuite_cleanup_on_failure set in the globals.local file or | 
|  | #	overridden with the SLURM_TESTSUITE_CLEANUP_ON_FAILURE environment | 
|  | #	variable. | 
|  | # | 
|  | # NOTE | 
|  | #	DO NOT call this within your local cleanup procedure. | 
|  | # | 
|  | ################################################################ | 
|  |  | 
|  | proc fail { message } { | 
|  | global _incomplete_reason STATUS_FAIL | 
|  |  | 
|  | # Avoid recursive calls from within cleanup | 
|  | if {[info level] > 1 && [lindex [info level -1] 0] eq "cleanup"} { | 
|  | log_error "Local cleanup shouldn't call pass, fail or skip" | 
|  | return | 
|  | } | 
|  |  | 
|  | log_fatal $message | 
|  | set _incomplete_reason $message | 
|  |  | 
|  | # _test_fini will handle cleanup and print the failure message. | 
|  | _test_fini $STATUS_FAIL | 
|  | } | 
|  |  | 
|  |  | 
|  | ################################################################ | 
|  | # | 
|  | # NAME | 
|  | #	skip - skips a test | 
|  | # | 
|  | # SYNOPSIS | 
|  | #	skip message | 
|  | # | 
|  | # DESCRIPTION | 
|  | #	To be used when a precondition for the test fails and the test | 
|  | #	should be skipped. This routine prints the specified warning message, | 
|  | #	calls the cleanup procedure if defined, prints a final test skipped | 
|  | #	message, and exits the test with exit code -1 (aka 255). | 
|  | # | 
|  | # NOTE | 
|  | #	DO NOT call this within your local cleanup procedure. | 
|  | # | 
|  | ################################################################ | 
|  |  | 
|  | proc skip { message } { | 
|  | global _incomplete_reason STATUS_SKIP | 
|  |  | 
|  | # Avoid recursive calls from within cleanup | 
|  | if {[info level] > 1 && [lindex [info level -1] 0] eq "cleanup"} { | 
|  | log_error "Local cleanup shouldn't call pass, fail or skip" | 
|  | return | 
|  | } | 
|  |  | 
|  | log_warn $message | 
|  | set _incomplete_reason $message | 
|  |  | 
|  | # _test_fini will handle cleanup and print the skipped message. | 
|  | _test_fini $STATUS_SKIP | 
|  | } | 
|  |  | 
|  |  | 
|  | ################################################################ | 
|  | # | 
|  | # NAME | 
|  | #	pass - passes a test | 
|  | # | 
|  | # SYNOPSIS | 
|  | #	pass | 
|  | # | 
|  | # DESCRIPTION | 
|  | #	To be used when a test passes and should complete with success. | 
|  | #	This routine calls the cleanup procedure if defined, prints a final | 
|  | #	test success message, and exits with exit code 0. | 
|  | # | 
|  | # NOTE | 
|  | #	DO NOT call this within your local cleanup procedure. | 
|  | # | 
|  | ################################################################ | 
|  |  | 
|  | proc pass { } { | 
|  | global STATUS_PASS | 
|  |  | 
|  | # Avoid recursive calls from within cleanup | 
|  | if {[info level] > 1 && [lindex [info level -1] 0] eq "cleanup"} { | 
|  | log_error "Local cleanup shouldn't call pass, fail or skip" | 
|  | return | 
|  | } | 
|  |  | 
|  | # _test_fini will handle cleanup and print the success message. | 
|  | _test_fini $STATUS_PASS | 
|  | } | 
|  |  | 
|  |  | 
|  | ################################################################ | 
|  | # | 
|  | # NAME | 
|  | #	subpass - registers a passing subtest result | 
|  | # | 
|  | # SYNOPSIS | 
|  | #	subpass ?description? | 
|  | # | 
|  | # DESCRIPTION | 
|  | #	Increments the subtest pass count and logs a passing subtest message | 
|  | # | 
|  | # ARGUMENTS | 
|  | #	description | 
|  | #		A single-line string describing the subtest being verified | 
|  | # | 
|  | ################################################################ | 
|  |  | 
|  | proc subpass args { | 
|  | global _subtest_fail_count _subtest_pass_count _subtest_skip_count | 
|  | global _subtest_messages | 
|  |  | 
|  | set description "" | 
|  | set argument_count [llength $args] | 
|  | if {$argument_count == 1} { set args [lassign $args description] } | 
|  | if {$argument_count > 1} { | 
|  | fail "Too many arguments ($argument_count): $args" | 
|  | } | 
|  |  | 
|  | set subtest_count [expr $_subtest_pass_count + $_subtest_fail_count + $_subtest_skip_count + 1] | 
|  | incr _subtest_pass_count | 
|  | set message [format "Subtest %2d passed" $subtest_count] | 
|  | if {$description ne ""} { append message "  : $description" } | 
|  | log_pass $message | 
|  | dict set _subtest_messages $subtest_count [list pass $message] | 
|  | } | 
|  |  | 
|  |  | 
|  | ################################################################ | 
|  | # | 
|  | # NAME | 
|  | #	subfail - registers a failing subtest result | 
|  | # | 
|  | # SYNOPSIS | 
|  | #	subfail ?options? ?description? ?diagnostics? | 
|  | # | 
|  | # DESCRIPTION | 
|  | #	Increments the subtest failure count and logs a failing subtest message | 
|  | # | 
|  | # OPTIONS | 
|  | #	-fatal | 
|  | #		Causes this subtest failure to be fatal, ending the test | 
|  | # ARGUMENTS | 
|  | #	description | 
|  | #		A single-line string describing the condition being verified | 
|  | #	diagnostics | 
|  | #		A string providing additional diagnostic information that will | 
|  | #		be included with the log message | 
|  | # | 
|  | # ENVIRONMENT | 
|  | #	testsuite_subtest_fatal | 
|  | #		Specifies whether first failing subtest aborts the test | 
|  | # | 
|  | ################################################################ | 
|  |  | 
|  | proc subfail args { | 
|  | global _subtest_fail_count _subtest_pass_count _subtest_skip_count | 
|  | global testsuite_subtest_fatal | 
|  | global _subtest_messages | 
|  |  | 
|  | set description "" | 
|  | set fatal       false | 
|  | while {[llength $args]} { | 
|  | switch -glob -- [lindex $args 0] { | 
|  | -fatal   {set fatal true; set args [lrange $args 1 end]} | 
|  | -*       {fail "Unknown option: [lindex $args 0]"} | 
|  | default  break | 
|  | } | 
|  | } | 
|  | set argument_count [llength $args] | 
|  | if {$argument_count >= 1} { set args [lassign $args description] } | 
|  |  | 
|  | set subtest_count [expr $_subtest_pass_count + $_subtest_fail_count + $_subtest_skip_count + 1] | 
|  | incr _subtest_fail_count | 
|  | set message [format "Subtest %2d failed" $subtest_count] | 
|  | if {$description ne ""} { append message "  : $description" } | 
|  | if [llength $args] { append message " (" [join $args ", "] ")" } | 
|  | if {$fatal || $testsuite_subtest_fatal} { | 
|  | fail $message | 
|  | } else { | 
|  | log_error $message | 
|  | } | 
|  | dict set _subtest_messages $subtest_count [list fail $message] | 
|  | } | 
|  |  | 
|  |  | 
|  | ################################################################ | 
|  | # | 
|  | # NAME | 
|  | #	subskip - registers a skipped subtest result | 
|  | # | 
|  | # SYNOPSIS | 
|  | #	subskip ?options? ?description? | 
|  | # | 
|  | # DESCRIPTION | 
|  | #	Increments the subtest skip count and logs a skipped subtest message | 
|  | # | 
|  | # OPTIONS | 
|  | #	-count NUMBER | 
|  | #		When used with -skip, indicates the number of subtests that | 
|  | #		were skipped | 
|  | # ARGUMENTS | 
|  | #	description | 
|  | #		A single-line string describing the reason the subtest is | 
|  | #		being skipped | 
|  | # | 
|  | ################################################################ | 
|  |  | 
|  | proc subskip args { | 
|  | global _subtest_fail_count _subtest_pass_count _subtest_skip_count | 
|  | global _subtest_messages | 
|  |  | 
|  | set description "" | 
|  | set count       1 | 
|  | set nolog       0 | 
|  | while {[llength $args]} { | 
|  | switch -glob -- [lindex $args 0] { | 
|  | -nolog { | 
|  | set nolog 1 | 
|  | # Shift args down one | 
|  | set args [lassign $args -] | 
|  | } | 
|  | -count   {set args [lassign $args - count]} | 
|  | -*       {fail "Unknown option: [lindex $args 0]"} | 
|  | default  break | 
|  | } | 
|  | } | 
|  | set argument_count [llength $args] | 
|  | if {$argument_count == 1} { set args [lassign $args description] } | 
|  | if {$argument_count > 1} { | 
|  | fail "Too many arguments ($argument_count): $args" | 
|  | } | 
|  |  | 
|  | set subtest_count [expr $_subtest_pass_count + $_subtest_fail_count + $_subtest_skip_count + 1] | 
|  | incr _subtest_skip_count $count | 
|  | if {$count > 1} { | 
|  | set message "Subtest $subtest_count-[expr $subtest_count+$count-1] skipped" | 
|  | } else { | 
|  | set message [format "Subtest %2d skipped" $subtest_count] | 
|  | } | 
|  | if {$description ne ""} { append message " : $description" } | 
|  | if {$nolog != 1} { | 
|  | log_warn $message | 
|  | } | 
|  | dict set _subtest_messages [expr $subtest_count] [list skip $message] | 
|  | } | 
|  |  | 
|  |  | 
|  | ################################################################ | 
|  | # | 
|  | # NAME | 
|  | #	get_subtest_fail_count - returns the current subtest failure count | 
|  | # | 
|  | # SYNOPSIS | 
|  | #	get_subtest_fail_count | 
|  | # | 
|  | ################################################################ | 
|  |  | 
|  | proc get_subtest_fail_count {} { | 
|  | global _subtest_fail_count | 
|  |  | 
|  | return $_subtest_fail_count | 
|  | } | 
|  |  | 
|  |  | 
|  | ################################################################ | 
|  | # | 
|  | # NAME | 
|  | #	reset_bash_prompt - used in expect body to reset bash prompt | 
|  | # | 
|  | # SYNOPSIS | 
|  | #	reset_bash_prompt | 
|  | # | 
|  | # DESCRIPTION | 
|  | #	The procedure must be called within an expect body to | 
|  | #	reset the bash prompt to the $test_prompt. | 
|  | # | 
|  | # EXAMPLE | 
|  | #	spawn $salloc $bin_bash | 
|  | #	expect { | 
|  | #		-re "Granted job allocation ($number)" { | 
|  | #			set job_id $expect_out(1,string) | 
|  | #			reset_bash_prompt | 
|  | #			exp_continue | 
|  | #		} | 
|  | #		-re $test_prompt {}; # Expect initial test prompt from bash | 
|  | #		timeout { | 
|  | #			fail "salloc not responding or prompt not recognized" | 
|  | #		} | 
|  | #	} | 
|  | # | 
|  | ################################################################ | 
|  |  | 
|  | proc reset_bash_prompt {} { | 
|  | global testsuite_dir | 
|  |  | 
|  | upvar spawn_id sid | 
|  | send -i $sid "source $testsuite_dir/reset_bash_prompt.sh\n" | 
|  | } | 
|  |  | 
|  |  | 
|  | ################################################################ | 
|  | # | 
|  | # NAME | 
|  | #	print_time - prints the current date and time | 
|  | # | 
|  | # SYNOPSIS | 
|  | #	print_time | 
|  | # | 
|  | ################################################################ | 
|  |  | 
|  | proc print_time { } { | 
|  | global bin_date | 
|  |  | 
|  | spawn $bin_date | 
|  | expect { | 
|  | eof { | 
|  | wait | 
|  | } | 
|  | } | 
|  |  | 
|  | return | 
|  | } | 
|  |  | 
|  | ################################################################ | 
|  | # | 
|  | # NAME | 
|  | #	dict_getdef - 'dict get' with ability to specify the default value | 
|  | # | 
|  | # SYNOPSIS | 
|  | #	dict_getdef dictionary_value key default_value | 
|  | # | 
|  | # DESCRIPTION | 
|  | #	Tcl < 8.7 lacks a built in 'dict get' with ability to specify the | 
|  | #	default value. Tcl 8.7 adds a dict getdef. | 
|  | #	This proc returns the value from the dictionary corresponding to the | 
|  | #	keys if it exists, or the default value otherwise. | 
|  | # | 
|  | # EXAMPLE | 
|  | #	dict_getdef $option_dict action "warn" | 
|  | # | 
|  | # SOURCE | 
|  | #	https://core.tcl-lang.org/tips/doc/trunk/tip/342.md | 
|  | #	https://core.tcl-lang.org/tcl/tktview/2370575 | 
|  | # | 
|  | ################################################################ | 
|  |  | 
|  | proc dict_getdef {D args} { | 
|  | if {[dict exists $D {*}[lrange $args 0 end-1]]} then { | 
|  | dict get $D {*}[lrange $args 0 end-1] | 
|  | } else { | 
|  | lindex $args end | 
|  | } | 
|  | } | 
|  |  | 
|  |  | 
|  | ################################################################ | 
|  | # | 
|  | # NAME | 
|  | #	_line_trace - returns an abbreviated call stack trace with line numbers | 
|  | # | 
|  | # SYNOPSIS | 
|  | #	_line_trace | 
|  | # | 
|  | ################################################################ | 
|  |  | 
|  | proc _line_trace {} { | 
|  | set line_trace  "" | 
|  | set first_entry true | 
|  | for {set f [expr [info frame] - 3]} {$f >= 1} {incr f -1} { | 
|  | set frame_dict [info frame $f] | 
|  | if [dict exists $frame_dict file] { | 
|  | if [regexp uplevel [dict get $frame_dict cmd]] { | 
|  | continue | 
|  | } | 
|  | if {$first_entry} { | 
|  | set first_entry false | 
|  | } else { | 
|  | append line_trace "," | 
|  | } | 
|  | if [dict exists $frame_dict proc] { | 
|  | set proc [namespace tail [dict get $frame_dict proc]] | 
|  | if {$proc ne ""} { | 
|  | append line_trace "$proc\@" | 
|  | } | 
|  | } | 
|  | append line_trace [file tail [dict get $frame_dict file]] | 
|  | if [dict exists $frame_dict line] { | 
|  | append line_trace ":[dict get $frame_dict line]" | 
|  | } | 
|  | } | 
|  | } | 
|  | return $line_trace | 
|  | } | 
|  |  | 
|  |  | 
|  | ################################################################ | 
|  | # | 
|  | # NAME | 
|  | #	tolerance - determines whether a value is within a specified tolerance | 
|  | # | 
|  | # SYNOPSIS | 
|  | #	tolerance expected observed tolerance_expression | 
|  | # | 
|  | # ARGUMENTS | 
|  | #	expected | 
|  | #		the expected (numeric) value | 
|  | #	observed | 
|  | #		the observed (numeric) value | 
|  | #	tolerance_expression | 
|  | #		a string of the form: [~][+|-]<tolerance>[%] | 
|  | # | 
|  | # DESCRIPTION | 
|  | #	tolerance | 
|  | #		A numeric tolerance | 
|  | #	symmetry | 
|  | #		By default the permitted range of values is symetric: | 
|  | #			[expected - tolerance, expected + tolerance] | 
|  | #		If the + sign is specified, the tolerance is limited to the | 
|  | #		the higher side only: | 
|  | #			[expected, expected + tolerance] | 
|  | #		If the - sign is specified, the tolerance is limited to the | 
|  | #		the lower side only: | 
|  | #			[expected - tolerance, expected] | 
|  | #	percent | 
|  | #		By default the permitted range is computed as absolute values: | 
|  | #			[expected - tolerance, expected + tolerance] | 
|  | #		If % is specified, the permitted range is computed as a | 
|  | #		percentage of the expected value: | 
|  | #			[expected*(1-tolerance/100), expected*(1+tolerance/100)] | 
|  | #	exclusivity | 
|  | #		By default the permitted range of values is inclusive, ie | 
|  | #		the min and max tolerated values are included in the range: | 
|  | #			[expected - tolerance, expected + tolerance] | 
|  | #		If ~ (exclusive) is specified, the tolerance limits are | 
|  | #		exclusive, ie the min and max tolerated values are excluded: | 
|  | #			(expected - tolerance, expected + tolerance) | 
|  | #	expression | 
|  | #		any combination of symetry, percent and exclusivity is allowed | 
|  | # | 
|  | # RETURN VALUE | 
|  | #	Returns true if the observed value is within the specified tolerance | 
|  | #	range of the expected value, otherwise false | 
|  | # | 
|  | # EXAMPLES | 
|  | #	The indicated tolerance_expression is true if: | 
|  | #	"5"     expected - 5  <= observed <= expected + 5 | 
|  | #	"-5"    expected - 5  <= observed <= expected | 
|  | #	"+5"    expected      <= observed <= expected + 5 | 
|  | #	"5%"    expected - 5% <= observed <= expected + 5% | 
|  | #	"~5"    expected - 5  <  observed <  expected + 5 | 
|  | #	"~+5%"  expected      <= observed <  expected + 5% | 
|  | # | 
|  | ################################################################ | 
|  |  | 
|  | proc tolerance { expected observed tolerance_expression } { | 
|  | if {![regexp {^(~?)([-+]?)([0-9\.]+)(%?)$} $tolerance_expression {} exclusive sign tolerance percent]} { | 
|  | fail "Invalid tolerance expression ($tolerance_expression)" | 
|  | } | 
|  |  | 
|  | set lower_bound_expression $observed | 
|  | if {$sign eq "+" || $exclusive ne "~"} { | 
|  | append lower_bound_expression " >=" | 
|  | } else { | 
|  | append lower_bound_expression " >" | 
|  | } | 
|  | append lower_bound_expression " $expected" | 
|  | if {$sign eq "-" || $sign eq ""} { | 
|  | if {$percent eq "%"} { | 
|  | append lower_bound_expression " - $tolerance * $expected / 100" | 
|  | } else { | 
|  | append lower_bound_expression " - $tolerance" | 
|  | } | 
|  | } | 
|  |  | 
|  | set upper_bound_expression $observed | 
|  | if {$sign eq "-" || $exclusive ne "~"} { | 
|  | append upper_bound_expression " <=" | 
|  | } else { | 
|  | append upper_bound_expression " <" | 
|  | } | 
|  | append upper_bound_expression " $expected" | 
|  | if {$sign eq "+" || $sign eq ""} { | 
|  | if {$percent eq "%"} { | 
|  | append upper_bound_expression " + $tolerance * $expected / 100" | 
|  | } else { | 
|  | append upper_bound_expression " + $tolerance" | 
|  | } | 
|  | } | 
|  |  | 
|  | if {[expr $lower_bound_expression] && [expr $upper_bound_expression]} { | 
|  | log_debug "$observed is within tolerance $tolerance_expression of $expected" | 
|  | return true | 
|  | } else { | 
|  | log_warn "$observed is not within tolerance $tolerance_expression of $expected" | 
|  | return false | 
|  | } | 
|  | } | 
|  |  | 
|  |  | 
|  | ################################################################ | 
|  | # | 
|  | # NAME | 
|  | #	check_run_as_user - check if the caller may run_command as the supplied user | 
|  | # | 
|  | # SYNOPSIS | 
|  | #	check_run_as_user user | 
|  | # | 
|  | # DESCRIPTION | 
|  | #	Note that a proper sudo config needs to be set in orther to pass this | 
|  | #	check. Calling user should be permitted to run_commands as the | 
|  | #	supplied user using sudo without password. | 
|  | #	See the -user option of run_command. | 
|  | #	This proc also log_warn a message if user already exists in the DB | 
|  | #	because most probably this user is testsuite_user and that user is | 
|  | #	expected NOT to be in the DB and could potentially be removed from it by | 
|  | #	the test. | 
|  | # | 
|  | # RETURN VALUE | 
|  | #	Returns a boolean value indicating whether the calling user may | 
|  | #	run_command as user. | 
|  | # | 
|  | ################################################################ | 
|  |  | 
|  | proc check_run_as_user user { | 
|  | global bin_id | 
|  |  | 
|  | if {$user eq ""} { | 
|  | return false | 
|  | } | 
|  | if {[run_command_status -none -user $user "$bin_id -un"]} { | 
|  | return false | 
|  | } | 
|  |  | 
|  | if {[get_admin_level $user] != ""} { | 
|  | log_warn "User $user already exists in DB, but it's probable that it's going to be removed by the test cleanup" | 
|  | } | 
|  |  | 
|  | return true | 
|  | } | 
|  |  | 
|  |  | 
|  | ################################################################ | 
|  | # | 
|  | # NAME | 
|  | #	check_user_id - check if user exists in the system | 
|  | # | 
|  | # SYNOPSIS | 
|  | #	check_user_id user | 
|  | # | 
|  | # DESCRIPTION | 
|  | #	This proc also log_warn a message if user already exists in the DB | 
|  | #	because most probably this user is testsuite_user and that user is | 
|  | #	expected NOT to be in the DB and could potentially be removed from | 
|  | #	it by the test. | 
|  | # | 
|  | # RETURN VALUE | 
|  | #	Returns true if the user passed exists in the system, false otherwise. | 
|  | # | 
|  | ################################################################ | 
|  |  | 
|  | proc check_user_id user { | 
|  | global bin_id | 
|  |  | 
|  | if {$user eq ""} { | 
|  | return false | 
|  | } | 
|  | if {[run_command_status -none "$bin_id -un $user"]} { | 
|  | return false | 
|  | } | 
|  |  | 
|  | if {[get_admin_level $user] != ""} { | 
|  | log_warn "User $user already exists in DB, but it's probable that it's going to be removed by the test cleanup" | 
|  | } | 
|  |  | 
|  | return true | 
|  | } | 
|  |  | 
|  |  | 
|  | ################################################################ | 
|  | # | 
|  | # NAME | 
|  | #	run_command - executes a command and returns a dictionary result | 
|  | # | 
|  | # SYNOPSIS | 
|  | #	run_command ?options? command | 
|  | # | 
|  | # DESCRIPTION | 
|  | #	Executes a command and returns a dictionary that includes the output, | 
|  | #	exit code, etc. An action can be taken (fail, warn, subtest, none) if | 
|  | #	the command's exit code is unexpected. By default, the action | 
|  | #	will be applied if the command fails. If the -xfail option is | 
|  | #	specified, the behavior will be reversed to apply the action if the | 
|  | #	command ran successfully. | 
|  | #	A timeout is always treated as unexpected, so log_error will be shown | 
|  | #	by default, or fail/subfail will be called if -fail/-subtest are used. | 
|  | # | 
|  | # OPTIONS | 
|  | #	-fail | 
|  | #		If the exit code is unexpected, the action that will | 
|  | #		be taken is to fail the test | 
|  | #	-subtest | 
|  | #		If the exit code is unexpected, the action that will | 
|  | #		be taken is to subfail, otherwise subpass will be called | 
|  | #	-warn | 
|  | #		If the exit code is unexpected, the action that will | 
|  | #		be taken is to log a warning (this is the default) | 
|  | #	-none | 
|  | #		If the exit code is unexpected, no action will be taken | 
|  | #	-xfail | 
|  | #		If the command exits with zero the action will be applied. | 
|  | #		Without this option, the action will be applied if the | 
|  | #		command exits with a non-zero exit code. | 
|  | #	-timeout <float_number> | 
|  | #		Time in seconds to wait for the command to complete before | 
|  | #		timing out (default is 60.0) | 
|  | #	-nolog | 
|  | #		Logging for this command will occur at trace threshold only | 
|  | #	-stdin | 
|  | #		Provide standard in to be piped into command | 
|  | #	-user <user> | 
|  | #		Attempt to execute command as <user>. Note that sudo must be | 
|  | #		properly configured to permit the caller to execute as <user>. | 
|  | #		See check_run_as_user. | 
|  | # | 
|  | # ARGUMENTS | 
|  | #	command | 
|  | #		a string containing the command and arguments to execute | 
|  | # | 
|  | # RETURN VALUE | 
|  | #	A dictionary containing the following elements: | 
|  | #		command    - The command that was invoked | 
|  | #		exit_code  - Exit code | 
|  | #		output     - The combined standard output and standard error | 
|  | #		start_time - The time (with ms) the command was executed | 
|  | #		duration   - The duration (seconds and milliseconds) the | 
|  | #		             command took to run | 
|  | # | 
|  | ################################################################ | 
|  | proc run_command args { | 
|  | global bin_bash bin_sudo | 
|  |  | 
|  | set alt_user "" | 
|  | set exit_status 0 | 
|  | set timedout false | 
|  | set output "" | 
|  | set action "warn" | 
|  | set timeout 60 | 
|  | set expect_failure false | 
|  | set log_at_trace_level false | 
|  | set stdin "" | 
|  | while {[llength $args]} { | 
|  | switch -glob -- [lindex $args 0] { | 
|  | -fail    {set action "fail"; set args [lrange $args 1 end]} | 
|  | -subtest {set action "subtest"; set args [lrange $args 1 end]} | 
|  | -none    {set action "none"; set args [lrange $args 1 end]} | 
|  | -timeout {set args [lassign $args - timeout]} | 
|  | -warn    {set action "warn"; set args [lrange $args 1 end]} | 
|  | -xfail   {set expect_failure true; set args [lrange $args 1 end]} | 
|  | -nolog   {set log_at_trace_level true; set args [lrange $args 1 end]} | 
|  | -stdin   {set stdin [lindex $args 1]; set args [lassign $args - stdin]} | 
|  | -user    {set args [lassign $args - alt_user]} | 
|  | -*       {fail "Unknown option: [lindex $args 0]"} | 
|  | default  break | 
|  | } | 
|  | } | 
|  | if {[llength $args] == 1} { | 
|  | lassign $args command | 
|  | } else { | 
|  | fail "Invalid number of arguments [llength $args]: $args" | 
|  | } | 
|  |  | 
|  | if {$action eq "subtest"} { | 
|  | if {$expect_failure} { | 
|  | set test_description "Command \"$command\" should fail" | 
|  | } else { | 
|  | set test_description "Command \"$command\" should succeed" | 
|  | } | 
|  | } | 
|  |  | 
|  | if {$log_at_trace_level} { | 
|  | interp alias {} log_run     {} log_trace | 
|  | interp alias {} log_details {} log_trace | 
|  | } else { | 
|  | interp alias {} log_run     {} log_command | 
|  | interp alias {} log_details {} log_debug | 
|  | } | 
|  |  | 
|  | set orig_log_user [log_user -info] | 
|  | log_user 0 | 
|  |  | 
|  | if {$alt_user ne ""} { | 
|  | log_run "Run Command as user $alt_user: $command" | 
|  | } else { | 
|  | log_run "Run Command: $command" | 
|  | } | 
|  | set start_clock_ms [clock milliseconds] | 
|  | set stty_init raw ; # Prevent the terminal from inserting \r | 
|  | if {$alt_user ne ""} { | 
|  | set expect_pid [spawn -noecho $bin_sudo -nu $alt_user $bin_bash -c "$command"] | 
|  | } else { | 
|  | set expect_pid [spawn -noecho $bin_bash -c "$command"] | 
|  | } | 
|  | if { $stdin != "" } { | 
|  | exp_send "$stdin" | 
|  | set command "$command <<< $stdin" | 
|  | } | 
|  | expect { | 
|  | -re "(.+)" { | 
|  | append output $expect_out(1,string) | 
|  | exp_continue | 
|  | } | 
|  | timeout { | 
|  | slow_kill $expect_pid | 
|  | set exit_status $::RETURN_TIMEOUT | 
|  | set timedout true | 
|  | } | 
|  | eof { | 
|  | lassign [wait] pid spawnid os_error_flag errno | 
|  | set exit_status [expr $errno > 128 ? $errno - 256 : $errno] | 
|  | } | 
|  | } | 
|  | set start_time [format "%.3f" [expr $start_clock_ms / 1000.000]] | 
|  | set end_time [format "%.3f" [expr [clock milliseconds] / 1000.000]] | 
|  | set duration [format "%.3f" [expr $end_time - $start_time]] | 
|  |  | 
|  | log_details "Command Results:" | 
|  | log_details "	Duration:   $duration" | 
|  | log_details "	Exit Code:  $exit_status" | 
|  | if {[info exists output]} { | 
|  | log_details "	Output: $output" | 
|  | } | 
|  |  | 
|  | if {$timedout} { | 
|  | set message "Command \"$command\" timed out after $timeout seconds" | 
|  | if {$action eq "fail"} { | 
|  | fail $message | 
|  | } elseif {$action eq "subtest"} { | 
|  | subfail $test_description $message | 
|  | } else { | 
|  | log_error $message | 
|  | } | 
|  | } elseif {! $expect_failure && $exit_status != 0} { | 
|  | set message "Command \"$command\" failed with rc=$exit_status" | 
|  | if {[info exists output] && $output != ""} { | 
|  | append message ": [string trimright $output]" | 
|  | } | 
|  | if {$action eq "warn"} { | 
|  | log_warn $message | 
|  | } elseif {$action eq "subtest"} { | 
|  | subfail $test_description $message | 
|  | } elseif {$action eq "fail"} { | 
|  | fail $message | 
|  | } | 
|  | } elseif {$expect_failure && $exit_status == 0} { | 
|  | set message "Command \"$command\" was expected to fail but succeeeded" | 
|  | if {$action eq "warn"} { | 
|  | log_warn $message | 
|  | } elseif {$action eq "subtest"} { | 
|  | subfail $test_description $message | 
|  | } elseif {$action eq "fail"} { | 
|  | fail $message | 
|  | } | 
|  | } elseif {$action eq "subtest"} { | 
|  | subpass $test_description | 
|  | } | 
|  | log_user $orig_log_user | 
|  |  | 
|  | dict set result command    $command | 
|  | dict set result exit_code  $exit_status | 
|  | dict set result output     $output | 
|  | dict set result start_time $start_time | 
|  | dict set result duration   $duration | 
|  |  | 
|  | return $result | 
|  | } | 
|  |  | 
|  |  | 
|  | ################################################################ | 
|  | # | 
|  | # NAME | 
|  | #	run_command_output - executes a command and returns the output | 
|  | # | 
|  | # SYNOPSIS | 
|  | #	run_command_output ?options? command | 
|  | # | 
|  | # DESCRIPTION | 
|  | #	Executes a command and returns a dictionary that includes the output, | 
|  | #	exit code, etc. An action can be taken (fail, warn, none)  if the | 
|  | #	command's exit code or timeout is unexpected. By default, the action | 
|  | #	will be applied if the command fails. If the -xfail option is | 
|  | #	specified, the behavior will be reversed to apply the action if the | 
|  | #	command ran successfully. | 
|  | # | 
|  | # OPTIONS | 
|  | #	-fail | 
|  | #		if the exit code or timeout is unexpected, the action that will | 
|  | #		be taken is to fail the test | 
|  | #	-warn | 
|  | #		if the exit code or timeout is unexpected, the action that will | 
|  | #		be taken is to log a warning (this is the default) | 
|  | #	-none | 
|  | #		if the exit code or timeout is unexpected, no action will be | 
|  | #		taken | 
|  | #	-xfail | 
|  | #		if the command exits with zero and does not time out, the | 
|  | #		action will be applied. Without this option, the action will | 
|  | #		be applied if the command exits with a non-zero exit code or | 
|  | #		times out. | 
|  | #	-timeout <float_number> | 
|  | #		time in seconds to wait for the command to complete before | 
|  | #		timing out (default is 60.0) | 
|  | #	-stdin | 
|  | #		Provide standard in to be piped into command | 
|  | # | 
|  | # ARGUMENTS | 
|  | #	command | 
|  | #		a string containing the command and arguments to execute | 
|  | # | 
|  | # RETURN VALUE | 
|  | #	A string containing the combined standard output and standard error | 
|  | # | 
|  | ################################################################ | 
|  |  | 
|  | proc run_command_output args { | 
|  |  | 
|  | set result [run_command {*}$args] | 
|  |  | 
|  | if [dict exists $result output] { | 
|  | return [dict get $result output] | 
|  | } else { | 
|  | return "" | 
|  | } | 
|  | } | 
|  |  | 
|  |  | 
|  | ################################################################ | 
|  | # | 
|  | # NAME | 
|  | #	run_command_status - executes a command and returns the exit code | 
|  | # | 
|  | # SYNOPSIS | 
|  | #	run_command_status ?options? command | 
|  | # | 
|  | # DESCRIPTION | 
|  | #	Executes a command and returns a dictionary that includes the output, | 
|  | #	exit code, etc. An action can be taken (fail, warn, none)  if the | 
|  | #	command's exit code or timeout is unexpected. By default, the action | 
|  | #	will be applied if the command fails. If the -xfail option is | 
|  | #	specified, the behavior will be reversed to apply the action if the | 
|  | #	command ran successfully. | 
|  | # | 
|  | # OPTIONS | 
|  | #	-fail | 
|  | #		if the exit code or timeout is unexpected, the action that will | 
|  | #		be taken is to fail the test | 
|  | #	-warn | 
|  | #		if the exit code or timeout is unexpected, the action that will | 
|  | #		be taken is to log a warning (this is the default) | 
|  | #	-none | 
|  | #		if the exit code or timeout is unexpected, no action will be | 
|  | #		taken | 
|  | #	-xfail | 
|  | #		if the command exits with zero and does not time out, the | 
|  | #		action will be applied. Without this option, the action will | 
|  | #		be applied if the command exits with a non-zero exit code or | 
|  | #		times out. | 
|  | #	-timeout <float_number> | 
|  | #		time in seconds to wait for the command to complete before | 
|  | #		timing out (default is 60.0) | 
|  | #	-stdin | 
|  | #		Provide standard in to be piped into command | 
|  | # | 
|  | # ARGUMENTS | 
|  | #	command | 
|  | #		a string containing the command and arguments to execute | 
|  | # | 
|  | # RETURN VALUE | 
|  | #	The exit code for the invoked command | 
|  | # | 
|  | ################################################################ | 
|  |  | 
|  | proc run_command_status args { | 
|  |  | 
|  | set result [run_command {*}$args] | 
|  |  | 
|  | return [dict get $result exit_code] | 
|  | } | 
|  |  | 
|  |  | 
|  | ################################################################ | 
|  | # | 
|  | # NAME | 
|  | #	cancel_job - cancels the specified job list | 
|  | # | 
|  | # SYNOPSIS | 
|  | #	cancel_job job_id_list ?het_job? | 
|  | # | 
|  | # DESCRIPTION | 
|  | #	Cancels one or more jobs. A job_id of 0 will be silently ignored. | 
|  | # | 
|  | # OPTIONS | 
|  | #	-fail | 
|  | #		if scancel fails with exit code or timeout, or the job doesn't | 
|  | #		end, test will will fail | 
|  | # | 
|  | # ARGUMENTS | 
|  | #	job_id_list | 
|  | #		The list of Slurm job ids that we want to cancel | 
|  | #	het_job | 
|  | #		1 if jobs are hetjobs and we want to confirm each | 
|  | #		component has completed | 
|  | # | 
|  | # RETURN VALUE | 
|  | #	RETURN_SUCCESS if jobs are cancelled, or non-zero value otherwise. | 
|  | # | 
|  | ################################################################ | 
|  |  | 
|  | proc cancel_job args { | 
|  | global scancel | 
|  |  | 
|  | set fatal         false | 
|  | while {[llength $args]} { | 
|  | switch -glob -- [lindex $args 0] { | 
|  | -fail   {set fatal true; set args [lrange $args 1 end]} | 
|  | -*      {fail "Unknown option: [lindex $args 0]"} | 
|  | default break | 
|  | } | 
|  | } | 
|  |  | 
|  | set argument_count [llength $args] | 
|  | if {$argument_count < 1} { | 
|  | fail "Too few arguments ($argument_count): $args" | 
|  | } elseif {$argument_count > 2} { | 
|  | fail "Too many arguments ($argument_count): $args" | 
|  | } | 
|  | lassign $args job_id_list | 
|  | if {$argument_count == 2} { | 
|  | set het_job [lindex $args 1] | 
|  | } else { | 
|  | set het_job 0 | 
|  | } | 
|  |  | 
|  | set job_list_clean [list] | 
|  | foreach job_id $job_id_list { | 
|  | if {$job_id != 0} { | 
|  | lappend job_list_clean $job_id | 
|  | } | 
|  | } | 
|  |  | 
|  | if {![llength $job_list_clean]} { | 
|  | return $::RETURN_SUCCESS | 
|  | } | 
|  |  | 
|  | log_debug "Cancelling $job_list_clean" | 
|  | set result [run_command "$scancel -Q $job_list_clean"] | 
|  | if {[dict get $result exit_code]} { | 
|  | set message "scancel command returned an error ([dict get $result output])" | 
|  | if {$fatal} { | 
|  | fail $message | 
|  | } else { | 
|  | log_warn $message | 
|  | return $::RETURN_ERROR | 
|  | } | 
|  | } | 
|  | foreach job_id $job_list_clean { | 
|  | if {[wait_for_job $job_id "DONE" $het_job]} { | 
|  | set message "Job ($job_id) did not end when cancelled" | 
|  | if {$fatal} { | 
|  | fail $message | 
|  | } else { | 
|  | log_warn $message | 
|  | return $::RETURN_ERROR | 
|  | } | 
|  | } | 
|  | } | 
|  | return $::RETURN_SUCCESS | 
|  | } | 
|  |  | 
|  |  | 
|  | ################################################################ | 
|  | # | 
|  | # NAME | 
|  | #	get_line_cnt - returns the size of the specified file | 
|  | # | 
|  | # SYNOPSIS | 
|  | #	get_line_cnt file_name | 
|  | # | 
|  | # RETURN VALUE | 
|  | #	Number of lines in the specified file. | 
|  | # | 
|  | ################################################################ | 
|  |  | 
|  | proc get_line_cnt { file_name } { | 
|  | global bin_wc number | 
|  | set lines 0 | 
|  | spawn $bin_wc -l $file_name | 
|  | expect { | 
|  | -re "($number) " { | 
|  | set lines $expect_out(1,string) | 
|  | exp_continue | 
|  | } | 
|  | eof { | 
|  | wait | 
|  | } | 
|  | } | 
|  | return $lines | 
|  | } | 
|  |  | 
|  |  | 
|  | ################################################################ | 
|  | # | 
|  | # NAME | 
|  | #	convert_time_str - Converts a string like [HH:]MM:SS[.sss] to secs | 
|  | # | 
|  | # SYNOPSIS | 
|  | #	Convert the string to a number. | 
|  | # | 
|  | # DESCRIPTION | 
|  | #	Units can be hours, mins, secs. | 
|  | # | 
|  | # RETURN VALUE | 
|  | #	A non-zero return code indicates a failure. | 
|  | # | 
|  | ################################################################ | 
|  |  | 
|  | proc convert_time_str { time_str units } { | 
|  | subtest {[regexp {(?:(\d+):)?(\d+):(\d+)(?:\.(\d+))?} $time_str - hours mins secs msecs]} "Time should be in the format \[HH:\]MM:SS\[.sss\]" "$time_str" | 
|  | if { $hours eq "" } { | 
|  | set hours 0 | 
|  | } | 
|  |  | 
|  | # Use scan to avoid TCL octals | 
|  | set value  [expr ([scan $hours "%d"] * 3600) + ([scan $mins "%d"] * 60) + [scan $secs "%d"]] | 
|  |  | 
|  | if { $units eq "hours" } { | 
|  | set value [expr $value / 3600] | 
|  | } elseif { $units eq "mins" } { | 
|  | set value [expr $value / 60] | 
|  | } | 
|  |  | 
|  | return $value | 
|  | } | 
|  |  | 
|  |  | 
|  | ################################################################ | 
|  | # | 
|  | # NAME | 
|  | #	slow_kill - kills a process slowly | 
|  | # | 
|  | # SYNOPSIS | 
|  | #	slow_kill pid | 
|  | # | 
|  | # DESCRIPTION | 
|  | #	Kill a process slowly, first trying SIGINT, pausing for | 
|  | #       a second, then sending SIGKILL. | 
|  | # | 
|  | # RETURN VALUE | 
|  | #	A non-zero return code indicates a failure. | 
|  | # | 
|  | ################################################################ | 
|  |  | 
|  | proc slow_kill { pid } { | 
|  | global bin_kill | 
|  |  | 
|  | catch {exec $bin_kill -INT $pid} | 
|  | catch {exec $bin_kill -INT $pid} | 
|  | sleep  1 | 
|  | catch {exec $bin_kill -KILL $pid} | 
|  |  | 
|  | return 0 | 
|  | } | 
|  |  | 
|  |  | 
|  | ################################################################ | 
|  | # | 
|  | # NAME | 
|  | #	get_my_id - gets the id from the running user | 
|  | # | 
|  | # SYNOPSIS | 
|  | #	get_my_id | 
|  | # | 
|  | # RETURN VALUE | 
|  | #	output of id | 
|  | # | 
|  | ################################################################ | 
|  |  | 
|  | proc get_my_id {} { | 
|  |  | 
|  | global bin_id number | 
|  | set login_info -1 | 
|  |  | 
|  | log_user 0 | 
|  |  | 
|  | spawn $bin_id | 
|  | expect { | 
|  | -re "(uid=.*\n)" { | 
|  | set login_info $expect_out(1,string) | 
|  | exp_continue | 
|  | } | 
|  | eof { | 
|  | wait | 
|  | } | 
|  | } | 
|  |  | 
|  | log_user 1 | 
|  |  | 
|  | if {$login_info == -1} { | 
|  | fail "Unable to get user info" | 
|  | } | 
|  |  | 
|  | return $login_info | 
|  | } | 
|  |  | 
|  |  | 
|  | ################################################################ | 
|  | # | 
|  | # NAME | 
|  | #	get_my_user_name - gets the name from the running user | 
|  | # | 
|  | # SYNOPSIS | 
|  | #	get_my_user_name | 
|  | # | 
|  | # RETURN VALUE | 
|  | #	A non-zero return code indicates a failure. | 
|  | # | 
|  | ################################################################ | 
|  |  | 
|  | proc get_my_user_name {  } { | 
|  | global bin_id re_word_str | 
|  |  | 
|  | set user_name -1 | 
|  |  | 
|  | log_user 0 | 
|  | spawn $bin_id -nu | 
|  | expect { | 
|  | -re "($re_word_str)" { | 
|  | set user_name $expect_out(1,string) | 
|  | exp_continue | 
|  | } | 
|  | eof { | 
|  | wait | 
|  | } | 
|  | } | 
|  | log_user 1 | 
|  |  | 
|  | if {$user_name == -1} { | 
|  | fail "Unable to get user name" | 
|  | } | 
|  |  | 
|  | return $user_name | 
|  | } | 
|  |  | 
|  |  | 
|  | ################################################################ | 
|  | # | 
|  | # NAME | 
|  | #	get_my_uid - gets the uid from the running user | 
|  | # | 
|  | # SYNOPSIS | 
|  | #	get_my_uid | 
|  | # | 
|  | # RETURN VALUE | 
|  | #	The uid of the current user, or fails. | 
|  | # | 
|  | ################################################################ | 
|  |  | 
|  | proc get_my_uid {  } { | 
|  | global bin_id number | 
|  |  | 
|  | set out [run_command_output -nolog -fail "$bin_id -u"] | 
|  | if {![regexp "($number)" $out - uid]} { | 
|  | fail "Unable to get UID with $bin_id ($out)" | 
|  | } | 
|  |  | 
|  | return $uid | 
|  | } | 
|  |  | 
|  |  | 
|  | ################################################################ | 
|  | # | 
|  | # NAME | 
|  | #	get_my_gid - gets the gid from the running user | 
|  | # | 
|  | # SYNOPSIS | 
|  | #	get_my_gid | 
|  | # | 
|  | # RETURN VALUE | 
|  | #	A non-zero return code indicates a failure. | 
|  | # | 
|  | ################################################################ | 
|  |  | 
|  | proc get_my_gid {  } { | 
|  | global bin_id number | 
|  |  | 
|  | set gid -1 | 
|  |  | 
|  | log_user 0 | 
|  | spawn $bin_id -g | 
|  | expect { | 
|  | -re "($number)" { | 
|  | set gid $expect_out(1,string) | 
|  | exp_continue | 
|  | } | 
|  | eof { | 
|  | wait | 
|  | } | 
|  | } | 
|  | log_user 1 | 
|  |  | 
|  | return $gid | 
|  | } | 
|  |  | 
|  |  | 
|  | ################################################################ | 
|  | # | 
|  | # NAME | 
|  | #	kill_salloc - kills all salloc commands associated with this user | 
|  | # | 
|  | # SYNOPSIS | 
|  | #	kill_salloc | 
|  | # | 
|  | # DESCRIPTION | 
|  | #	Kill all salloc commands associated with this user. | 
|  | #	Issue two SIGINT, sleep 1 and a SIGKILL | 
|  | # | 
|  | # RETURN VALUE | 
|  | #	A non-zero return code indicates a failure. | 
|  | # | 
|  | # NOTE | 
|  | #	Use slow_kill instead of kill_salloc if you can capture | 
|  | #	the process id | 
|  | # | 
|  | ################################################################ | 
|  |  | 
|  | proc kill_salloc {  } { | 
|  | global bin_id bin_pkill bin_sleep number | 
|  |  | 
|  | set uid [get_my_uid] | 
|  | catch {exec $bin_pkill -INT -u $uid salloc} | 
|  | catch {exec $bin_pkill -INT -u $uid salloc} | 
|  | sleep  1 | 
|  | catch {exec $bin_pkill -KILL -u $uid salloc} | 
|  |  | 
|  | return 0 | 
|  | } | 
|  |  | 
|  |  | 
|  | ################################################################ | 
|  | # | 
|  | # NAME | 
|  | #	kill_srun - kills all srun commands associated with this user | 
|  | # | 
|  | # SYNOPSIS | 
|  | #	kill_srun | 
|  | # | 
|  | # DESCRIPTION | 
|  | #	Kill all srun commands associated with this user. | 
|  | #	Issue two SIGINT, sleep 1 and a SIGKILL | 
|  | # | 
|  | # RETURN VALUE | 
|  | #	A non-zero return code indicates a failure. | 
|  | # | 
|  | # NOTE | 
|  | #	Use slow_kill instead of kill_srun if you can capture | 
|  | #	the process id | 
|  | # | 
|  | ################################################################ | 
|  |  | 
|  | proc kill_srun {  } { | 
|  | global bin_id bin_pkill bin_sleep number | 
|  |  | 
|  | set uid [get_my_uid] | 
|  | catch {exec $bin_pkill -INT -u $uid srun} | 
|  | catch {exec $bin_pkill -INT -u $uid srun} | 
|  | sleep  1 | 
|  | catch {exec $bin_pkill -KILL -u $uid srun} | 
|  |  | 
|  | return 0 | 
|  | } | 
|  |  | 
|  |  | 
|  | ################################################################ | 
|  | # | 
|  | # NAME | 
|  | #	wait_for - generic wait utility | 
|  | # | 
|  | # SYNOPSIS | 
|  | #	wait_for ?options? condition body | 
|  | # | 
|  | # DESCRIPTION | 
|  | #	Generic wait utility allowing you to repeatedly execute a generic block | 
|  | #	of code until a specified boolean expression is met. The code block and | 
|  | #	condition check occur every poll interval until a timeout is reached. | 
|  | # | 
|  | # OPTIONS | 
|  | #	-fail | 
|  | #		fail the test if the condition is not met within the timeout | 
|  | #	-subtest | 
|  | #		if the condition is met within the timeout call subpass, otherwise | 
|  | #		call subfail | 
|  | #	-timeout <float_number> | 
|  | #		time in seconds to wait for the condition to be met before | 
|  | #		timing out (default is 60.0) | 
|  | #	-pollinterval <float_number> | 
|  | #		time in seconds between each loop execution and condition check | 
|  | #		(default is 1.0) | 
|  | # | 
|  | # ARGUMENTS | 
|  | #	condition | 
|  | #		The boolean expression to test | 
|  | #	body | 
|  | #		A block of code to evaluate in the invoking stack frame | 
|  | # | 
|  | # RETURN VALUE | 
|  | #	RETURN_SUCCESS if the condition is met before the timeout occurs, | 
|  | #	RETURN_TIMEOUT if the timeout occurs before the condition is met | 
|  | # | 
|  | ################################################################ | 
|  |  | 
|  | proc wait_for args { | 
|  | set action          "none" | 
|  | set subtest         false | 
|  | set timeout         60 | 
|  | set poll_interval   1 | 
|  | while {[llength $args]} { | 
|  | switch -glob -- [lindex $args 0] { | 
|  | -fail       {set action "fail"; set args [lrange $args 1 end]} | 
|  | -subtest    {set action "subtest"; set args [lrange $args 1 end]} | 
|  | -time*      {set args [lassign $args - timeout]} | 
|  | -poll*      {set args [lassign $args - poll_interval]} | 
|  | -*          {fail "Unknown option: [lindex $args 0]"} | 
|  | default     break | 
|  | } | 
|  | } | 
|  | if {[llength $args] == 2} { | 
|  | lassign $args condition body | 
|  | } else { | 
|  | fail "Invalid number of arguments [llength $args]: $args" | 
|  | } | 
|  |  | 
|  | set start_time [format "%.3f" [expr [clock milliseconds] / 1000.000]] | 
|  |  | 
|  | log_debug "Waiting for $condition" | 
|  |  | 
|  | while {1} { | 
|  | # Evaluate code block | 
|  | log_trace "Evaluating code block ([string trim $body])" | 
|  | uplevel $body | 
|  |  | 
|  | # Check condition | 
|  | if {[uplevel expr [format "{%s}" $condition]]} { | 
|  | set now [format "%.3f" [expr [clock milliseconds] / 1000.000]] | 
|  | log_debug "Condition ($condition) was met" | 
|  | if {$action eq "subtest"} { | 
|  | subpass "Condition ($condition) should be met (within $timeout seconds)" | 
|  | } | 
|  | return $::RETURN_SUCCESS | 
|  | } else { | 
|  | log_trace "Condition ($condition) was not met" | 
|  | } | 
|  |  | 
|  | # Sleep poll interval | 
|  | log_trace "Sleeping for $poll_interval seconds" | 
|  | after [expr {int($poll_interval * 1000)}] | 
|  |  | 
|  | # Check if we have surpassed our timeout | 
|  | set now [format "%.3f" [expr [clock milliseconds] / 1000.000]] | 
|  | log_trace "Checking whether the current time ([clock format [expr int($now)] -format %Y-%m-%dT%X].[lindex [split $now '.'] 1]) is greater than the start time plus the timeout ([clock format [expr int($start_time + $timeout)] -format %Y-%m-%dT%X].[lindex [split [expr $start_time + $timeout] '.'] 1])" | 
|  | if {$now > $start_time + $timeout} { | 
|  | set message "Condition ($condition) was not met before timeout ($timeout seconds)" | 
|  | if {$action eq "fail"} { | 
|  | fail $message | 
|  | } else { | 
|  | log_warn $message | 
|  | if {$action eq "subtest"} { | 
|  | subfail "Condition ($condition) should be met (within $timeout seconds)" "Timed out" | 
|  | } | 
|  | return $::RETURN_TIMEOUT | 
|  | } | 
|  | } | 
|  | } | 
|  | } | 
|  |  | 
|  |  | 
|  | ################################################################ | 
|  | # | 
|  | # NAME | 
|  | #	wait_for_command - repeat a command until it is successful or meets a specified condition | 
|  | # | 
|  | # SYNOPSIS | 
|  | #	wait_for_command ?options? command ?condition? | 
|  | # | 
|  | # DESCRIPTION | 
|  | #	A command is repeated until it meets a condition or a timeout is reached. | 
|  | #	If a condition is not specified, the command will be repeated until it | 
|  | #	is successful (the exit code is zero). | 
|  | # | 
|  | # OPTIONS | 
|  | #	-fail | 
|  | #		fail the testif the condition is not met within the timeout | 
|  | #	-subtest | 
|  | #		if the condition is met within the timeout call subpass, otherwise | 
|  | #		call subfail | 
|  | #	-timeout <float_number> | 
|  | #		time in seconds to wait for the condition to be met before | 
|  | #		timing out (default is 60.0) | 
|  | #	-pollinterval <float_number> | 
|  | #		time in seconds between each loop execution and condition | 
|  | #		check (default is 1.0) | 
|  | # | 
|  | # ARGUMENTS | 
|  | #	command | 
|  | #		a string containing the command and arguments to execute | 
|  | #	condition | 
|  | #		The boolean expression to test. For each command invocation, | 
|  | #		the result variable will be set to the dictionary returned | 
|  | #		from run_command. | 
|  | #		The condition expression will normally involve a comparison | 
|  | #		with one or more values of this dictionary. If a condition is | 
|  | #		not specified, this condition will be used: | 
|  | #		{ [dict get $result exit_code] == 0 } | 
|  | # | 
|  | # RETURN VALUE | 
|  | #	RETURN_SUCCESS if the condition is met before the timeout occurs, | 
|  | #	RETURN_TIMEOUT if the timeout occurs before the condition is met | 
|  | # | 
|  | ################################################################ | 
|  |  | 
|  | proc wait_for_command args { | 
|  | set action          "none" | 
|  | set timeout         60 | 
|  | set poll_interval   1 | 
|  | while {[llength $args]} { | 
|  | switch -glob -- [lindex $args 0] { | 
|  | -fail       {set action "fail"; set args [lrange $args 1 end]} | 
|  | -subtest    {set action "subtest"; set args [lrange $args 1 end]} | 
|  | -time*      {set args [lassign $args - timeout]} | 
|  | -poll*      {set args [lassign $args - poll_interval]} | 
|  | -*          {fail "Unknown option: [lindex $args 0]"} | 
|  | default     break | 
|  | } | 
|  | } | 
|  |  | 
|  | set argument_count [llength $args] | 
|  | if {$argument_count < 1} { | 
|  | fail "Too few arguments ($argument_count): $args" | 
|  | } elseif {$argument_count > 2} { | 
|  | fail "Too many arguments ($argument_count): $args" | 
|  | } | 
|  | lassign $args command | 
|  | if {$argument_count == 2} { | 
|  | set condition [lindex $args 1] | 
|  | } else { | 
|  | set condition { [dict get $result exit_code] == 0 } | 
|  | } | 
|  |  | 
|  | set start_time [format "%.3f" [expr [clock milliseconds] / 1000.000]] | 
|  |  | 
|  | log_debug "Waiting for $condition" | 
|  |  | 
|  | while {1} { | 
|  | # Run command | 
|  | set result [run_command $command] | 
|  |  | 
|  | # Check condition | 
|  | if {[eval expr [format "{%s}" $condition]]} { | 
|  | set now [format "%.3f" [expr [clock milliseconds] / 1000.000]] | 
|  | log_debug "Condition ($condition) was met" | 
|  | if {$action eq "subtest"} { | 
|  | subpass "Condition ($condition) should be met (within $timeout seconds)" | 
|  | } | 
|  | return $::RETURN_SUCCESS | 
|  | } else { | 
|  | log_trace "Condition ($condition) was not met" | 
|  | } | 
|  |  | 
|  | # Sleep poll interval | 
|  | log_trace "Sleeping for $poll_interval seconds" | 
|  | after [expr {int($poll_interval * 1000)}] | 
|  |  | 
|  | # Check whether we have surpassed our timeout | 
|  | set now [format "%.3f" [expr [clock milliseconds] / 1000.000]] | 
|  | log_trace "Checking whether the current time ([clock format [expr int($now)] -format %Y-%m-%dT%X].[lindex [split $now '.'] 1]) is greater than the start time plus the timeout ([clock format [expr int($start_time + $timeout)] -format %Y-%m-%dT%X].[lindex [split [expr $start_time + $timeout] '.'] 1])" | 
|  | if {$now > $start_time + $timeout} { | 
|  | set message "Condition ($condition) was not met before timeout ($timeout seconds)" | 
|  | if {$action eq "fail"} { | 
|  | fail $message | 
|  | } else { | 
|  | log_warn $message | 
|  | if {$action eq "subtest"} { | 
|  | subfail "Condition ($condition) should be met (within $timeout seconds)" "Timed out" | 
|  | } | 
|  | return $::RETURN_TIMEOUT | 
|  | } | 
|  | } | 
|  | } | 
|  | } | 
|  |  | 
|  |  | 
|  | ################################################################ | 
|  | # | 
|  | # NAME | 
|  | #	wait_for_command_match - repeat a command until its output matches the specified pattern | 
|  | # | 
|  | # SYNOPSIS | 
|  | #	wait_for_command_match ?options? command pattern | 
|  | # | 
|  | # DESCRIPTION | 
|  | #	A command is repeated until its output matches the specified pattern | 
|  | # | 
|  | # OPTIONS | 
|  | #	-negate | 
|  | #		negates the match. The command is repeated until its output does not | 
|  | #		match the specified pattern | 
|  | #	-fail | 
|  | #		fail the test if the output does not match the pattern within the | 
|  | #		timeout | 
|  | #	-subfail | 
|  | #		if the condition is met within the timeout call subpass, otherwise | 
|  | #		call subfail | 
|  | #	-timeout <float_number> | 
|  | #		time in seconds to wait for the pattern to be matched before | 
|  | #		timing out (default is 60.0) | 
|  | #	-pollinterval <float_number> | 
|  | #		time in seconds between each loop execution and match check | 
|  | #		(default is 1.0) | 
|  | # | 
|  | # ARGUMENTS | 
|  | #	command | 
|  | #		a string containing the command and arguments to execute | 
|  | #	pattern | 
|  | #		The regular expression to match against the command output | 
|  | # | 
|  | # RETURN VALUE | 
|  | #	RETURN_SUCCESS if the pattern is matched before the timeout occurs, | 
|  | #	RETURN_TIMEOUT if the timeout occurs before the pattern is matched | 
|  | # | 
|  | ################################################################ | 
|  |  | 
|  | proc wait_for_command_match args { | 
|  |  | 
|  | set pattern [lindex $args end] | 
|  | set args [lrange $args 0 end-1] | 
|  | set match 1 | 
|  | set idx [lsearch $args "-negate"] | 
|  | if {$idx != -1} { | 
|  | set match 0 | 
|  | set args [lreplace $args $idx $idx]; # Remove the -negate argument | 
|  | } | 
|  |  | 
|  | return [wait_for_command {*}$args "\[regexp -- {$pattern} \[dict get \$result output\]\] == $match"] | 
|  | } | 
|  |  | 
|  |  | 
|  | ################################################################ | 
|  | # | 
|  | # NAME | 
|  | #	wait_for_file - waits for a file to exist with non-zero size | 
|  | # | 
|  | # SYNOPSIS | 
|  | #	wait_for_file ?options? file_name | 
|  | # | 
|  | # OPTIONS | 
|  | #	-fail | 
|  | #		If an error occurs or the file does not become present | 
|  | #		by the timeout, fail the test rather than returning an error | 
|  | #	-timeout <integer_number> | 
|  | #		time in seconds to wait for the file to exist before | 
|  | #		timing out (default is 90) | 
|  | #	-pollinterval <integer_number> | 
|  | #		time in seconds between each file existence test (default is 1) | 
|  | # | 
|  | # DESCRIPTION | 
|  | #	Wait for the specified file to exist and have a non-zero size. | 
|  | #	Note that if JobFileAppend=0 is configured, a file can exist and | 
|  | #	be purged then be re-created. | 
|  | # | 
|  | # RETURN VALUE | 
|  | #	RETURN_SUCCESS if the file becomes present within the timeout, or | 
|  | #	non-zero value otherwise. | 
|  | # | 
|  | ################################################################ | 
|  |  | 
|  | proc wait_for_file args { | 
|  | global bin_sleep | 
|  |  | 
|  | set fatal         false | 
|  | set timeout       90 | 
|  | set poll_interval 1 | 
|  | while {[llength $args]} { | 
|  | switch -glob -- [lindex $args 0] { | 
|  | -fatal  - | 
|  | -fail   {set fatal true; set args [lrange $args 1 end]} | 
|  | -time*  {set args [lassign $args - timeout]} | 
|  | -poll*  {set args [lassign $args - poll_interval]} | 
|  | -*      {fail "Unknown option: [lindex $args 0]"} | 
|  | default break | 
|  | } | 
|  | } | 
|  | set argument_count [llength $args] | 
|  | if {$argument_count != 1} { | 
|  | fail "Invalid number of arguments ($argument_count): $args" | 
|  | } else { | 
|  | lassign $args file_name | 
|  | } | 
|  |  | 
|  | for {set my_delay 0} {$my_delay <= $timeout} \ | 
|  | {set my_delay [expr $my_delay + $poll_interval]} { | 
|  | if {[file exists $file_name]} { | 
|  | # Add small delay for I/O buffering | 
|  | exec $bin_sleep 1 | 
|  | return $::RETURN_SUCCESS | 
|  | } | 
|  | exec $bin_sleep $poll_interval | 
|  |  | 
|  | # Expect may fail to load current NFS info. | 
|  | # Use the ls command to load current info. | 
|  | set slash_pos [string last $file_name "/"] | 
|  | if {$slash_pos < 1} { | 
|  | set dir_name "." | 
|  | } else { | 
|  | decr slash_pos | 
|  | set dir_name [string $file_name 0 $slash_pos] | 
|  | } | 
|  | exec /bin/ls $dir_name | 
|  | } | 
|  | set message "Timeout waiting for file ($file_name)" | 
|  | if {$fatal} { | 
|  | fail $message | 
|  | } | 
|  |  | 
|  | log_warn $message | 
|  | return $::RETURN_TIMEOUT | 
|  | } | 
|  |  | 
|  |  | 
|  | ################################################################ | 
|  | # | 
|  | # NAME | 
|  | #	_wait_for_single_job - waits for a job to reach the desired state | 
|  | # | 
|  | # SYNOPSIS | 
|  | #	_wait_for_single_job ?options? job_id desired_state | 
|  | # | 
|  | # DESCRIPTION | 
|  | #	Wait for a previously submitted Slurm job to reach the desired state. | 
|  | # | 
|  | # OPTIONS | 
|  | #	-fail | 
|  | #		If an error occurs or the job does not reach the desired state | 
|  | #		by the timeout, fail the test rather than returning an error | 
|  | #	-timeout <integer_number> | 
|  | #		time in seconds to wait for the job to be in the desired state | 
|  | #		before timing out (default is 360) | 
|  | #	-pollinterval <integer_number> | 
|  | #		time in seconds between each job state check (default is 1) | 
|  | # | 
|  | # ARGUMENTS | 
|  | #	job_id | 
|  | #		The Slurm job id of a job we want to wait for. | 
|  | #	desired_state | 
|  | #		The state you want the job to attain before | 
|  | #		returning.  Currently supports: | 
|  | #			CANCELLED - job is cancelled | 
|  | #			DONE      - any terminated state (includes cancelled) | 
|  | #			PENDING   - job is pending | 
|  | #			RUNNING   - job is running | 
|  | #			SPECIAL_EXIT | 
|  | #			SUSPENDED - job is suspended | 
|  | # | 
|  | # RETURN VALUE | 
|  | #	RETURN_SUCCESS, or non-zero on error. | 
|  | # | 
|  | ################################################################ | 
|  |  | 
|  | proc _wait_for_single_job args { | 
|  | global scontrol | 
|  |  | 
|  | set fatal         false | 
|  | set timeout       360 | 
|  | set poll_interval 1 | 
|  | while {[llength $args]} { | 
|  | switch -glob -- [lindex $args 0] { | 
|  | -fatal  - | 
|  | -fail   {set fatal true; set args [lrange $args 1 end]} | 
|  | -time*  {set args [lassign $args - timeout]} | 
|  | -poll*  {set args [lassign $args - poll_interval]} | 
|  | -*      {fail "Unknown option: [lindex $args 0]"} | 
|  | default break | 
|  | } | 
|  | } | 
|  | set argument_count [llength $args] | 
|  | if {$argument_count != 2} { | 
|  | fail "Invalid number of arguments ($argument_count): $args" | 
|  | } else { | 
|  | lassign $args job_id desired_state | 
|  | } | 
|  |  | 
|  | # First verify that desired_state is supported | 
|  | switch $desired_state { | 
|  | "CANCELLED" {} | 
|  | "DONE" {} | 
|  | "PENDING" {} | 
|  | "RUNNING" {} | 
|  | "SPECIAL_EXIT" {} | 
|  | "SUSPENDED" {} | 
|  | default { | 
|  | set message "Invalid desired state: $desired_state" | 
|  | if {$fatal} { | 
|  | fail $message | 
|  | } | 
|  | log_warn $message | 
|  | return $::RETURN_ERROR | 
|  | } | 
|  | } | 
|  |  | 
|  | if {$job_id == 0} { | 
|  | set message "Invalid job ID ($job_id)" | 
|  | if {$fatal} { | 
|  | fail $message | 
|  | } | 
|  | log_warn $message | 
|  | return $::RETURN_ERROR | 
|  | } | 
|  |  | 
|  | set my_delay    0 | 
|  | while 1 { | 
|  | set fd [open "|$scontrol -o show job $job_id"] | 
|  | gets $fd line | 
|  | catch {close $fd} | 
|  | if {[regexp {JobState\s*=\s*(\w+)} $line foo state] != 1} { | 
|  | set state "NOT_FOUND" | 
|  | } | 
|  |  | 
|  | switch $state { | 
|  | "CANCELLED" { | 
|  | if {$desired_state eq "CANCELLED"} { | 
|  | log_debug "Job $job_id is CANCELLED" | 
|  | return $::RETURN_SUCCESS | 
|  | } | 
|  | if {$desired_state eq "DONE"} { | 
|  | log_debug "Job $job_id is DONE ($state)" | 
|  | return $::RETURN_SUCCESS | 
|  | } | 
|  | set message "Job ($job_id) is $state, but we wanted $desired_state" | 
|  | if {$fatal} { | 
|  | fail $message | 
|  | } | 
|  | log_debug $message | 
|  | return $::RETURN_ERROR | 
|  | } | 
|  | "NOT_FOUND" - | 
|  | "BOOT_FAIL" - | 
|  | "COMPLETED" - | 
|  | "DEADLINE" - | 
|  | "FAILED" - | 
|  | "NODE_FAIL" - | 
|  | "OUT_OF_MEMORY" - | 
|  | "PREEMPTED" - | 
|  | "TIMEOUT" { | 
|  | if {$desired_state eq "DONE"} { | 
|  | log_debug "Job $job_id is DONE ($state)" | 
|  | return $::RETURN_SUCCESS | 
|  | } | 
|  | set message "Job ($job_id) is $state, but we wanted $desired_state" | 
|  | if {$fatal} { | 
|  | fail $message | 
|  | } | 
|  | log_debug $message | 
|  | return $::RETURN_ERROR | 
|  | } | 
|  | "PENDING" { | 
|  | if {$desired_state eq "PENDING"} { | 
|  | log_debug "Job $job_id is PENDING" | 
|  | return $::RETURN_SUCCESS | 
|  | } | 
|  | log_debug "Job $job_id is in state $state, desire $desired_state" | 
|  | } | 
|  | "RUNNING" { | 
|  | if {$desired_state eq "RUNNING"} { | 
|  | log_debug "Job $job_id is RUNNING" | 
|  | return $::RETURN_SUCCESS | 
|  | } | 
|  | log_debug "Job $job_id is in state $state, desire $desired_state" | 
|  | } | 
|  | "SPECIAL_EXIT" { | 
|  | if {$desired_state eq "SPECIAL_EXIT"} { | 
|  | log_debug "Job $job_id is SPECIAL_EXIT" | 
|  | return $::RETURN_SUCCESS | 
|  | } | 
|  | log_debug "Job $job_id is in state $state, desire $desired_state" | 
|  | } | 
|  | "SUSPENDED" { | 
|  | if {$desired_state eq "SUSPENDED"} { | 
|  | log_debug "Job $job_id is SUSPENDED" | 
|  | return $::RETURN_SUCCESS | 
|  | } | 
|  | log_debug "Job $job_id is in state $state, desire $desired_state" | 
|  | } | 
|  | default { | 
|  | log_debug "Job $job_id is in state $state, desire $desired_state" | 
|  | } | 
|  | } | 
|  |  | 
|  | if { $my_delay > $timeout } { | 
|  | set message "Timeout waiting for job state $desired_state" | 
|  | if {$fatal} { | 
|  | fail $message | 
|  | } | 
|  | log_warn "Timeout waiting for job state $desired_state" | 
|  | return $::RETURN_TIMEOUT | 
|  | } | 
|  |  | 
|  | exec sleep $poll_interval | 
|  | set my_delay [expr $my_delay + $poll_interval] | 
|  | } | 
|  | } | 
|  |  | 
|  |  | 
|  | ################################################################ | 
|  | # | 
|  | # NAME | 
|  | #	wait_for_job - waits for job to be in desired state | 
|  | # | 
|  | # SYNOPSIS | 
|  | #	wait_for_job ?options? job_id desired_state ?het_job? | 
|  | # | 
|  | # DESCRIPTION | 
|  | #	Wait for job to be in desired state. Can handle het job components. | 
|  | # | 
|  | # OPTIONS | 
|  | #	-fail | 
|  | #		If an error occurs or the job does not reach the desired state | 
|  | #		by the timeout, fail the test rather than returning an error | 
|  | #	-timeout <integer_number> | 
|  | #		time in seconds to wait for the job to be in the desired state | 
|  | #		before timing out (default is 90) | 
|  | #	-pollinterval <integer_number> | 
|  | #		time in seconds between each job state check (default is 1) | 
|  | # | 
|  | # ARGUMENTS | 
|  | #	job_id | 
|  | #		The Slurm job id of a job we want to wait for. | 
|  | #	desired_state | 
|  | #		The state you want the job to attain before returning. | 
|  | #		Currently supports: | 
|  | #			CANCELLED - job is cancelled | 
|  | #			DONE      - any terminated state (including cancelled) | 
|  | #			PENDING   - job is pending | 
|  | #			RUNNING   - job is running | 
|  | #			SPECIAL_EXIT | 
|  | #			SUSPENDED - job is suspended | 
|  | #	het_job | 
|  | #		If set, checks the state of each component job if the job | 
|  | #		is a het one. | 
|  | # | 
|  | # RETURN VALUE | 
|  | #	RETURN_SUCCESS if job reaches the desired state, or non-zero value | 
|  | #	otherwise. | 
|  | # | 
|  | # SEE ALSO | 
|  | #	_wait_for_single_job | 
|  | # | 
|  | ################################################################ | 
|  |  | 
|  | proc wait_for_job args { | 
|  |  | 
|  | set options  [list] | 
|  | set het_job  0 | 
|  |  | 
|  | while {[llength $args]} { | 
|  | switch -glob -- [lindex $args 0] { | 
|  | -fatal  - | 
|  | -fail   { | 
|  | lappend options [lindex $args 0] | 
|  | set args [lrange $args 1 end] | 
|  | } | 
|  | -time*  - | 
|  | -poll*  { | 
|  | lappend options {*}[lrange $args 0 1] | 
|  | set args [lrange $args 2 end] | 
|  | } | 
|  | default break | 
|  | } | 
|  | } | 
|  | set argument_count [llength $args] | 
|  | if {$argument_count < 2} { | 
|  | fail "Too few arguments ($argument_count): $args" | 
|  | } elseif {$argument_count > 3}  { | 
|  | fail "Too many arguments ($argument_count): $args" | 
|  | } else { | 
|  | lassign $args job_id desired_state | 
|  | } | 
|  | if {$argument_count == 3} { set hetjob [lindex $args 2] } | 
|  |  | 
|  | if { $het_job } { | 
|  | # get component job ids | 
|  | set jid_list [get_het_job_ids $job_id 1] | 
|  | } | 
|  |  | 
|  | set rc       0 | 
|  | set jid_list "" | 
|  | if { $jid_list == "" } { | 
|  | # non-het job | 
|  | set jid_list $job_id | 
|  | } | 
|  |  | 
|  | foreach jid $jid_list { | 
|  | set rc [_wait_for_single_job {*}$options $jid $desired_state] | 
|  | if { $rc } { | 
|  | # bail out on first failure | 
|  | break | 
|  | } | 
|  | } | 
|  | return $rc | 
|  | } | 
|  |  | 
|  |  | 
|  | ################################################################ | 
|  | # | 
|  | # NAME | 
|  | #	wait_for_job_acct - waits for the job accounting record to be updated | 
|  | # | 
|  | # SYNOPSIS | 
|  | #	wait_for_job_acct ?options? job_id ?field? | 
|  | # | 
|  | # DESCRIPTION | 
|  | #	Wait for evidence that a specific job stage has been recorded in the | 
|  | #	accounting database by verifying that a field has been populated. | 
|  | # | 
|  | # OPTIONS | 
|  | #	-fail | 
|  | #		If an error occurs or the job does has not populated the field | 
|  | #		by the timeout, fail the test rather than returning an error | 
|  | #	-timeout <integer_number> | 
|  | #		time in seconds to wait for the job's field to be populated | 
|  | #		before timing out (default is 90) | 
|  | #	-pollinterval <integer_number> | 
|  | #		time in seconds between each field check (default is 1) | 
|  | # | 
|  | # ARGUMENTS | 
|  | #	job_id | 
|  | #		The Slurm job id of a job we want to wait for. | 
|  | #	field | 
|  | #		The field that we are waiting to be populated, not empty and | 
|  | #		not "Unknown". (Default: End) | 
|  | # | 
|  | # RETURN VALUE | 
|  | #	RETURN_SUCCESS if the desired field has been populated for the job | 
|  | #	in the accounting database, or non-zero value otherwise. | 
|  | # | 
|  | ################################################################ | 
|  |  | 
|  | proc wait_for_job_acct args { | 
|  | global sacct | 
|  |  | 
|  | set options [list] | 
|  | set timeout 10 | 
|  | set pollinterval .2 | 
|  | set field End | 
|  |  | 
|  | while {[llength $args]} { | 
|  | switch -glob -- [lindex $args 0] { | 
|  | -fail   { | 
|  | lappend options [lindex $args 0] | 
|  | set args [lrange $args 1 end] | 
|  | } | 
|  | -time*  {set args [lassign $args - timeout]} | 
|  | -poll*  {set args [lassign $args - pollinterval]} | 
|  | default break | 
|  | } | 
|  | } | 
|  | lappend options -timeout $timeout | 
|  | lappend options -pollinterval $pollinterval | 
|  |  | 
|  | set argument_count [llength $args] | 
|  | if {$argument_count < 1} { | 
|  | fail "Too few arguments ($argument_count): $args" | 
|  | } elseif {$argument_count > 2}  { | 
|  | fail "Too many arguments ($argument_count): $args" | 
|  | } | 
|  | lassign $args job_id | 
|  | if {$argument_count == 2} {set field [lindex $args 1]} | 
|  |  | 
|  | # Wait for sacct to populate the specified field | 
|  | # We want to see the field be populated with anything but "Unknown" | 
|  | # We can't use a negation match here because empty would match | 
|  | return [wait_for_command_match {*}$options "$sacct -j $job_id --allocation --format $field --parsable2 --noheader" (?n)^(?!.*Unknown$).] | 
|  | } | 
|  |  | 
|  |  | 
|  | ################################################################ | 
|  | # | 
|  | # NAME | 
|  | #	wait_for_account_done - cancels and waits on jobs in specified accounts | 
|  | # | 
|  | # SYNOPSIS | 
|  | #	wait_for_account_done ?options? accounts | 
|  | # | 
|  | # DESCRIPTION | 
|  | #	Cancel jobs on and wait for them to be finished in account(s) given. | 
|  | # | 
|  | # OPTIONS | 
|  | #	-timeout <integer_number> | 
|  | #		time in seconds to wait for the jobs to be finished before | 
|  | #		timing out (default is 360) | 
|  | #	-pollinterval <integer_number> | 
|  | #		time in seconds between each job state check (default is 1) | 
|  | # | 
|  | # ARGUMENTS | 
|  | #	accounts | 
|  | #		Comma-delimited list of accounts | 
|  | # | 
|  | # RETURN VALUE | 
|  | #	RETURN_SUCCESS if all jobs of the account are finished, or non-zero | 
|  | #	otherwise. | 
|  | # | 
|  | # NOTE | 
|  | #	We sleep for two seconds before replying that a job is | 
|  | #	done to give time for I/O completion (stdout/stderr files) | 
|  | # | 
|  | ################################################################ | 
|  |  | 
|  | proc wait_for_account_done args { | 
|  | global scancel squeue re_word_str | 
|  |  | 
|  | set timeout       360 | 
|  | set poll_interval 1 | 
|  | while {[llength $args]} { | 
|  | switch -glob -- [lindex $args 0] { | 
|  | -time*  {set args [lassign $args - timeout]} | 
|  | -poll*  {set args [lassign $args - poll_interval]} | 
|  | -*      {fail "Unknown option: [lindex $args 0]"} | 
|  | default break | 
|  | } | 
|  | } | 
|  | set argument_count [llength $args] | 
|  | if {$argument_count != 1} { | 
|  | fail "Invalid number of arguments ($argument_count): $args" | 
|  | } else { | 
|  | lassign $args accounts | 
|  | } | 
|  |  | 
|  | if { $accounts == "" } { | 
|  | fail "An account must be specified" | 
|  | } | 
|  |  | 
|  | log_user 0 | 
|  | set account_list [split $accounts ","] | 
|  | foreach item $account_list { | 
|  | spawn $scancel -A $item | 
|  | expect { | 
|  | timeout { | 
|  | log_warn "No response from scancel" | 
|  | } | 
|  | eof { | 
|  | wait | 
|  | } | 
|  | } | 
|  | } | 
|  |  | 
|  | set my_delay    0 | 
|  | while 1 { | 
|  | set found 0 | 
|  | spawn $squeue -o Account=%a -h -A$accounts | 
|  | expect { | 
|  | -re "Account=($re_word_str)" { | 
|  | set found 1 | 
|  | exp_continue | 
|  | } | 
|  | eof { | 
|  | wait | 
|  | } | 
|  | } | 
|  |  | 
|  | if { !$found } { | 
|  | log_debug "Account(s) $accounts is/are empty" | 
|  | break | 
|  | } | 
|  |  | 
|  | if { $my_delay > $timeout } { | 
|  | log_error "Timeout waiting for account(s) ($accounts) to be finished" | 
|  | log_user 1 | 
|  | return $::RETURN_TIMEOUT | 
|  | } | 
|  |  | 
|  | exec sleep $poll_interval | 
|  | set my_delay [expr $my_delay + $poll_interval] | 
|  | } | 
|  | log_user 1 | 
|  | return $::RETURN_SUCCESS | 
|  | } | 
|  |  | 
|  |  | 
|  | ################################################################ | 
|  | # | 
|  | # NAME | 
|  | #	wait_for_part_done - cancels and waits on jobs in specified partition | 
|  | # | 
|  | # SYNOPSIS | 
|  | #	wait_for_part_done ?options? partition | 
|  | # | 
|  | # DESCRIPTION | 
|  | #	Cancel jobs on and wait for them to be finished in partition given. | 
|  | # | 
|  | # OPTIONS | 
|  | #	-timeout <integer_number> | 
|  | #		time in seconds to wait for the jobs to be finished before | 
|  | #		timing out (default is 360) | 
|  | #	-pollinterval <integer_number> | 
|  | #		time in seconds between each job state check (default is 1) | 
|  | # | 
|  | # ARGUMENTS | 
|  | #	partition | 
|  | #		partition name | 
|  | # | 
|  | # RETURN VALUE | 
|  | #	RETURN_SUCCESS if all jobs of the partition are finished, or non-zero | 
|  | #	otherwise. | 
|  | # | 
|  | # NOTE | 
|  | #	We sleep for two seconds before replying that a job is | 
|  | #	done to give time for I/O completion (stdout/stderr files) | 
|  | # | 
|  | ################################################################ | 
|  |  | 
|  | proc wait_for_part_done args { | 
|  | global scancel squeue re_word_str | 
|  |  | 
|  | set timeout       360 | 
|  | set poll_interval 1 | 
|  | while {[llength $args]} { | 
|  | switch -glob -- [lindex $args 0] { | 
|  | -time*  {set args [lassign $args - timeout]} | 
|  | -poll*  {set args [lassign $args - poll_interval]} | 
|  | -*      {fail "Unknown option: [lindex $args 0]"} | 
|  | default break | 
|  | } | 
|  | } | 
|  | set argument_count [llength $args] | 
|  | if {$argument_count != 1} { | 
|  | fail "Invalid number of arguments ($argument_count): $args" | 
|  | } else { | 
|  | lassign $args partition | 
|  | } | 
|  |  | 
|  | if { $partition == "" } { | 
|  | fail "A partition must be specified" | 
|  | } | 
|  |  | 
|  | run_command -fail -nolog "$scancel -p $partition" | 
|  |  | 
|  | set my_delay    0 | 
|  | while 1 { | 
|  | set found 0 | 
|  | spawn $squeue -o Part=%P -h -p$partition | 
|  | expect { | 
|  | -re "Part=($re_word_str)" { | 
|  | set found 1 | 
|  | exp_continue | 
|  | } | 
|  | eof { | 
|  | wait | 
|  | } | 
|  | } | 
|  |  | 
|  | if { !$found } { | 
|  | log_debug "Partition $partition is empty" | 
|  | break | 
|  | } | 
|  |  | 
|  | if { $my_delay > $timeout } { | 
|  | log_error "Timeout waiting for partition ($partition) to be finished" | 
|  | return $::RETURN_TIMEOUT | 
|  | } | 
|  |  | 
|  | exec sleep $poll_interval | 
|  | set my_delay [expr $my_delay + $poll_interval] | 
|  | } | 
|  | return $::RETURN_SUCCESS | 
|  | } | 
|  |  | 
|  |  | 
|  | ################################################################ | 
|  | # | 
|  | # NAME | 
|  | #	wait_for_step - waits for a job step to be found | 
|  | # | 
|  | # SYNOPSIS | 
|  | #	wait_for_step ?options? step_id | 
|  | # | 
|  | # DESCRIPTION | 
|  | #	Wait for a job step to be found. | 
|  | # | 
|  | # OPTIONS | 
|  | #	-timeout <integer_number> | 
|  | #		time in seconds to wait for the job step to be found before | 
|  | #		timing out (default is 30) | 
|  | #	-pollinterval <integer_number> | 
|  | #		time in seconds between each step existence check (default is 1) | 
|  | # | 
|  | # ARGUMENTS | 
|  | #	step_id | 
|  | #		job step id | 
|  | # | 
|  | # RETURN VALUE | 
|  | #	RETURN_SUCCESS if step_id is found, or non-zero otherwise. | 
|  | # | 
|  | ################################################################ | 
|  |  | 
|  | proc wait_for_step args { | 
|  | global scontrol | 
|  |  | 
|  | set timeout       30 | 
|  | set poll_interval 1 | 
|  | while {[llength $args]} { | 
|  | switch -glob -- [lindex $args 0] { | 
|  | -time*  {set args [lassign $args - timeout]} | 
|  | -poll*  {set args [lassign $args - poll_interval]} | 
|  | -*      {fail "Unknown option: [lindex $args 0]"} | 
|  | default break | 
|  | } | 
|  | } | 
|  | set argument_count [llength $args] | 
|  | if {$argument_count != 1} { | 
|  | fail "Invalid number of arguments ($argument_count): $args" | 
|  | } else { | 
|  | lassign $args step_id | 
|  | } | 
|  |  | 
|  | set my_delay 0 | 
|  | while 1 { | 
|  | set fd [open "|$scontrol -o show step $step_id"] | 
|  | gets $fd line | 
|  | catch {close $fd} | 
|  | if {[regexp {Nodes=} $line foo] == 1} { | 
|  | return $::RETURN_SUCCESS | 
|  | } | 
|  | if {[regexp {MidplaneList=} $line foo] == 1} { | 
|  | return $::RETURN_SUCCESS | 
|  | } | 
|  | if { $my_delay > $timeout } { | 
|  | log_warn "Timeout waiting for job step" | 
|  | return $::RETURN_TIMEOUT | 
|  | } | 
|  |  | 
|  | log_debug "Step $step_id not done yet. Waiting for $poll_interval seconds" | 
|  | exec sleep $poll_interval | 
|  | set my_delay [expr $my_delay + $poll_interval] | 
|  | } | 
|  | } | 
|  |  | 
|  |  | 
|  | ################################################################ | 
|  | # | 
|  | # NAME | 
|  | #	wait_job_reason - waits for a desired job state and reason | 
|  | # | 
|  | # SYNOPSIS | 
|  | #	wait_job_reason ?options? job_id ?desired_state? ?desired_reason_list? | 
|  | # | 
|  | # DESCRIPTION | 
|  | #	Wait until the job is in desired state and reason is one | 
|  | #	of the desired ones or until the timeout. | 
|  | # | 
|  | # OPTIONS | 
|  | #	-timeout <integer_number> | 
|  | #		time in seconds to wait for the job state and reason before | 
|  | #		timing out (default is 360) | 
|  | #	-pollinterval <integer_number> | 
|  | #		time in seconds between each job state check (default is 1) | 
|  | # | 
|  | # ARGUMENTS | 
|  | #	job_id | 
|  | #		The job to wait for | 
|  | #	desired_state | 
|  | #		Desired state. | 
|  | #	desired_reason_list | 
|  | #		List of desired reasons. Empty list means that any reason | 
|  | #		is ok. | 
|  | # | 
|  | # RETURN VALUE | 
|  | #	RETURN_SUCCESS when job is in the desired state and reason is one | 
|  | #	of the desired ones, or non-zero otherwise. | 
|  | # | 
|  | ################################################################ | 
|  |  | 
|  | proc wait_job_reason args { | 
|  | global scontrol re_word_str | 
|  |  | 
|  | set final_state "COMPLETED CANCELLED FAILED TIMEOUT DEADLINE | 
|  | OUT_OF_MEMORY" | 
|  |  | 
|  | set timeout       360 | 
|  | set poll_interval 1 | 
|  | set desired_state "PENDING" | 
|  | set desired_reason_list "" | 
|  | while {[llength $args]} { | 
|  | switch -glob -- [lindex $args 0] { | 
|  | -time*  {set args [lassign $args - timeout]} | 
|  | -poll*  {set args [lassign $args - poll_interval]} | 
|  | -*      {fail "Unknown option: [lindex $args 0]"} | 
|  | default break | 
|  | } | 
|  | } | 
|  | set argument_count [llength $args] | 
|  | if {$argument_count < 1} { | 
|  | fail "Too few arguments ($argument_count): $args" | 
|  | } else { | 
|  | lassign $args job_id | 
|  | } | 
|  | if {$argument_count >= 2} { set desired_state [lindex $args 1] } | 
|  | if {$argument_count == 3} { set desired_reason_list [lindex $args 2] } | 
|  | if {$argument_count > 3} { | 
|  | fail "Too many arguments ($argument_count): $args" | 
|  | } | 
|  |  | 
|  | set log_user_prev [log_user -info] | 
|  | log_user 0 | 
|  |  | 
|  | set my_delay 0 | 
|  | set rc $::RETURN_ERROR | 
|  | while true { | 
|  | set pending 0 | 
|  | set has_reason 1 | 
|  | spawn $scontrol show job $job_id | 
|  | expect { | 
|  | -re "JobState=($re_word_str) Reason=(\\S+)" { | 
|  | set job_state $expect_out(1,string) | 
|  | set job_reason $expect_out(2,string) | 
|  | } | 
|  | timeout { | 
|  | log_error "No response from scontrol show job" | 
|  | set rc $::RETURN_TIMEOUT | 
|  | break | 
|  | } | 
|  | } | 
|  |  | 
|  | # Check if both state and reason are the desired ones | 
|  | if {$job_state == $desired_state} { | 
|  | set found 0 | 
|  | set reason_msg "" | 
|  | if {$desired_reason_list == ""} { | 
|  | set found 1 | 
|  | } | 
|  | foreach desired_reason $desired_reason_list { | 
|  | if {$job_reason == $desired_reason } { | 
|  | set reason_msg " with reason $job_reason" | 
|  | set found 1 | 
|  | } | 
|  | } | 
|  | if {$found} { | 
|  | log_debug "Job $job_id found $job_state$reason_msg" | 
|  | set rc $::RETURN_SUCCESS | 
|  | break | 
|  | } | 
|  | } elseif {[lsearch -exact final_state $job_state] >= 0} { | 
|  | # Job is in final step no need to wait longer | 
|  | log_error [format "Job in final state/reason '%s' / '%s' instead of the desired '%s' / '%s'." \ | 
|  | $job_state $job_reason \ | 
|  | $desired_state $desired_reason_list] | 
|  | set rc $::RETURN_ERROR | 
|  | break | 
|  | } | 
|  |  | 
|  | # Check if this was the last poll | 
|  | if {$my_delay > $timeout} { | 
|  | log_error "Timeout" | 
|  | set rc $::RETURN_TIMEOUT | 
|  | break | 
|  | } | 
|  | set remamining_sec [expr $timeout - $my_delay] | 
|  | log_debug [format "Job in state/reason '%s' / '%s' instead of the desired '%s' / '%s'." \ | 
|  | $job_state $job_reason \ | 
|  | $desired_state $desired_reason_list] | 
|  | log_debug [format "Polling again in %ss, %ss to timeout." \ | 
|  | $poll_interval $remamining_sec] | 
|  |  | 
|  | sleep $poll_interval | 
|  | set my_delay [expr $my_delay + $poll_interval] | 
|  | } | 
|  |  | 
|  | log_user $log_user_prev | 
|  | return $rc | 
|  | } | 
|  |  | 
|  |  | 
|  | ################################################################ | 
|  | # | 
|  | # NAME | 
|  | #	get_config - returns a dictionary of slurm configuration parameters | 
|  | # | 
|  | # SYNOPSIS | 
|  | #	get_config ?options? | 
|  | # | 
|  | # OPTIONS | 
|  | #	-dbd | 
|  | #		uses `sacctmgr show config` to return slurmdbd configuration | 
|  | #		parameters | 
|  | #	-slurm | 
|  | #		uses `scontrol show config` to return slurm configuration | 
|  | #		parameters (this is the default) | 
|  | # | 
|  | # RETURN VALUE | 
|  | #	Returns a dictionary of parameter values | 
|  | # | 
|  | ################################################################ | 
|  |  | 
|  | proc get_config args { | 
|  | global sacctmgr scontrol | 
|  |  | 
|  | set config_dict [list] | 
|  | set command "$scontrol" | 
|  | while {[llength $args]} { | 
|  | switch -glob -- [lindex $args 0] { | 
|  | -slurm  {set command "$scontrol"; set args [lrange $args 1 end]} | 
|  | -dbd    {set command "$sacctmgr"; set args [lrange $args 1 end]} | 
|  | -*      {fail "Unknown option: [lindex $args 0]"} | 
|  | default break | 
|  | } | 
|  | } | 
|  | if {[llength $args] > 0} { | 
|  | fail "[lindex [info level 0] 0]: No arguments allowed: $args" | 
|  | } | 
|  |  | 
|  | set output [run_command_output -fail -nolog "$command show config"] | 
|  |  | 
|  | foreach line [split $output "\n"] { | 
|  | if {[regexp {^(\S+) += (.*)$} $line {} param_name param_value] == 1} { | 
|  | dict set config_dict $param_name $param_value | 
|  | } | 
|  | } | 
|  |  | 
|  | return $config_dict | 
|  | } | 
|  |  | 
|  |  | 
|  | ################################################################ | 
|  | # | 
|  | # NAME | 
|  | #	get_config_param - returns a slurm configuration parameter value | 
|  | # | 
|  | # SYNOPSIS | 
|  | #	get_config_param ?options? parameter_name | 
|  | # | 
|  | # OPTIONS | 
|  | #	-dbd | 
|  | #		uses `sacctmgr show config` to return the specified slurmdbd | 
|  | #		configuration parameter value | 
|  | #	-slurm | 
|  | #		uses `scontrol show config` to return the specified slurm | 
|  | #		configuration parameter value (this is the default) | 
|  | # | 
|  | # ARGUMENTS | 
|  | #	parameter_name | 
|  | #		the parameter to return the value for | 
|  | # | 
|  | # DESCRIPTION | 
|  | #	Returns a specific configuration parameter value. | 
|  | # | 
|  | # RETURN VALUE | 
|  | #	Returns the value of the specified parameter or MISSING if it does not | 
|  | #	exist. | 
|  | # | 
|  | ################################################################ | 
|  |  | 
|  | proc get_config_param args { | 
|  |  | 
|  | set options [list] | 
|  | while {[llength $args]} { | 
|  | switch -glob -- [lindex $args 0] { | 
|  | -* { | 
|  | lappend options [lindex $args 0] | 
|  | set args [lrange $args 1 end] | 
|  | } | 
|  | default break | 
|  | } | 
|  | } | 
|  | if {[llength $args] == 1} { | 
|  | lassign $args parameter_name | 
|  | } else { | 
|  | fail "[lindex [info level 0] 0]: Invalid number of arguments ([llength $args]): $args" | 
|  | } | 
|  |  | 
|  | set config_dict [get_config {*}$options] | 
|  |  | 
|  | if [dict exists $config_dict $parameter_name] { | 
|  | return [dict get $config_dict $parameter_name] | 
|  | } else { | 
|  | return "MISSING" | 
|  | } | 
|  | } | 
|  |  | 
|  |  | 
|  | ################################################################ | 
|  | # | 
|  | # NAME | 
|  | #	param_contains - test whether a comma-separated-list contains a specified value | 
|  | # | 
|  | # SYNOPSIS | 
|  | #	param_contains haystack needle | 
|  | # | 
|  | # DESCRIPTION | 
|  | #	Searches for the specified value (needle) in the comma-separated-list | 
|  | #	string (haystack). Needle can be a glob-style pattern. | 
|  | # | 
|  | # RETURN VALUE | 
|  | #	Returns a boolean value indicating whether the value (needle) was found | 
|  | #	in the comma-separated-list string (haystack) | 
|  | # | 
|  | ################################################################ | 
|  |  | 
|  | proc param_contains { haystack needle } { | 
|  | if {[lsearch [split $haystack ","] $needle] != -1} { | 
|  | return true | 
|  | } else { | 
|  | return false | 
|  | } | 
|  | } | 
|  |  | 
|  |  | 
|  | ################################################################ | 
|  | # | 
|  | # NAME | 
|  | #	param_value - returns the value of a parameter in a comma-separated-list | 
|  | # | 
|  | # SYNOPSIS | 
|  | #	param_value params_list param ?default? | 
|  | # | 
|  | # DESCRIPTION | 
|  | #	Searches for the specified param in the comma-separated-list | 
|  | #	string (params_list) and returns its value. | 
|  | # | 
|  | # RETURN VALUE | 
|  | #	Returns the value found or the optional default value if not found. | 
|  | #	If the param is found without a value, returns true (ie like | 
|  | #	param_contains). | 
|  | # | 
|  | ################################################################ | 
|  |  | 
|  | proc param_value {params_list param {default false}} { | 
|  | global re_word_str | 
|  | foreach pair [split $params_list ","] { | 
|  | if {[regexp "$param" $pair] == 1} { | 
|  | if {[regexp "$param=($re_word_str)" $pair - value] == 1} { | 
|  | return $value | 
|  | } else { | 
|  | return true | 
|  | } | 
|  | } | 
|  | } | 
|  | return $default | 
|  | } | 
|  |  | 
|  |  | 
|  | ################################################################ | 
|  | # | 
|  | # NAME | 
|  | #	get_affinity_types - gets the task plugins running with task/ stripped | 
|  | # | 
|  | # SYNOPSIS | 
|  | #	get_affinity_types | 
|  | # | 
|  | # RETURN VALUE | 
|  | #	Returns comma separated list of task plugins running without the task/ | 
|  | # | 
|  | ################################################################ | 
|  |  | 
|  | proc get_affinity_types { } { | 
|  | global scontrol re_word_str | 
|  |  | 
|  | log_user 0 | 
|  | set affinity "" | 
|  | spawn $scontrol show config | 
|  | expect { | 
|  | -re "TaskPlugin *= ($re_word_str)" { | 
|  | set parts [split $expect_out(1,string) ",/"] | 
|  | while 1 { | 
|  | set task_found [lsearch $parts "task"] | 
|  | if { $task_found == -1 } break | 
|  | set parts [lreplace $parts $task_found $task_found] | 
|  | } | 
|  | set affinity [join $parts ","] | 
|  | exp_continue | 
|  | } | 
|  | eof { | 
|  | wait | 
|  | } | 
|  | } | 
|  | log_user 1 | 
|  |  | 
|  | return $affinity | 
|  | } | 
|  |  | 
|  |  | 
|  | ################################################################ | 
|  | # | 
|  | # NAME | 
|  | #	get_mps_count_by_index - gets the count of a specific gres/mps device | 
|  | # | 
|  | # SYNOPSIS | 
|  | #	get_mps_count_by_index index hostname | 
|  | # | 
|  | # RETURN VALUE | 
|  | #	Returns the Count of a specific gres/mps device | 
|  | # | 
|  | ################################################################ | 
|  |  | 
|  | proc get_mps_count_by_index { index hostname } { | 
|  | global slurmd number re_word_str | 
|  |  | 
|  | log_user 0 | 
|  | set count 0 | 
|  | spawn $slurmd -G -N $hostname | 
|  | expect { | 
|  | -re "Gres Name=mps Type=$re_word_str Count=($number) Index=$index" { | 
|  | set count $expect_out(1,string) | 
|  | exp_continue | 
|  | } | 
|  | eof { | 
|  | wait | 
|  | } | 
|  | } | 
|  | log_user 1 | 
|  |  | 
|  | return $count | 
|  | } | 
|  |  | 
|  | ################################################################ | 
|  | # | 
|  | # NAME | 
|  | #	check_mpi - determines if mpicc and the desired --mpi option are available | 
|  | # | 
|  | # SYNOPSIS | 
|  | #	check_mpi desired_mpi | 
|  | # | 
|  | # DESCRIPTION | 
|  | #	Runs srun --mpi=list to determine if the desired mpi is available, as | 
|  | #	well as if mpicc set in globals.local is too. | 
|  | # | 
|  | # RETURN VALUE | 
|  | #	Returns true if the desired mpi and mpicc are available, false otherwise. | 
|  | # | 
|  | ################################################################ | 
|  |  | 
|  | proc check_mpi {desired_mpi} { | 
|  | global srun eol mpicc | 
|  | set output [run_command_output -fail "$srun --mpi=list"] | 
|  |  | 
|  | return [regexp "${desired_mpi}${eol}" $output] && [file executable $mpicc] | 
|  | } | 
|  |  | 
|  | ################################################################ | 
|  | # | 
|  | # NAME | 
|  | #	check_influxdb_access - determines if user can access to the desired influxdb | 
|  | # | 
|  | # SYNOPSIS | 
|  | #	check_influxdb_access host port database | 
|  | # | 
|  | # DESCRIPTION | 
|  | #	It uses the global $influx CLI command to try to connect and use | 
|  | #	the desired database. | 
|  | # | 
|  | # RETURN VALUE | 
|  | #	Returns true if we can connect to host:port and use the database, | 
|  | #	false otherwise | 
|  | # | 
|  | ################################################################ | 
|  |  | 
|  | proc check_influxdb_access {host port database} { | 
|  | global influx | 
|  | set connected 0 | 
|  | set access 0 | 
|  |  | 
|  | if {![file executable $influx]} { | 
|  | log_warn "Cannot execute influx command: $influx" | 
|  | return false | 
|  | } | 
|  |  | 
|  | set log_user_save [log_user -info] | 
|  | log_user 0 | 
|  | spawn $influx -host $host -port $port | 
|  | expect { | 
|  | -re "Connected to" { | 
|  | set connected 1 | 
|  | send "use $database\r" | 
|  | exp_continue | 
|  | } | 
|  | -re "Using database" { | 
|  | set access 1 | 
|  | send "quit\r" | 
|  | } | 
|  | -re "unable to parse authentication credentials" { | 
|  | send "quit\r" | 
|  | } | 
|  | -re "authorization failed" { | 
|  | send "quit\r" | 
|  | } | 
|  | timeout { | 
|  | fail "InfluxDB instance not responding" | 
|  | } | 
|  | eof { | 
|  | wait | 
|  | } | 
|  | } | 
|  | log_user $log_user_save | 
|  |  | 
|  | if {!$connected} { | 
|  | log_warn "Cannot connect to $host:$port" | 
|  | return false | 
|  | } | 
|  | if {!$access} { | 
|  | log_warn "Connected to $host:$port, but cannot use $database" | 
|  | return false | 
|  | } | 
|  | return true | 
|  | } | 
|  |  | 
|  |  | 
|  | ################################################################ | 
|  | # | 
|  | # NAME | 
|  | #	check_bb_emulate - determines if Cray burst buffers API is emulated | 
|  | # | 
|  | # SYNOPSIS | 
|  | #	check_bb_emulate | 
|  | # | 
|  | # RETURN VALUE | 
|  | #	Returns true if Cray burst buffers API is emulated, false otherwise | 
|  | # | 
|  | ################################################################ | 
|  |  | 
|  | proc check_bb_emulate { } { | 
|  | global scontrol | 
|  |  | 
|  | log_user 0 | 
|  | set bb_emulate false | 
|  | spawn $scontrol show burst | 
|  | expect { | 
|  | -re "EmulateCray" { | 
|  | set bb_emulate true | 
|  | exp_continue | 
|  | } | 
|  | eof { | 
|  | wait | 
|  | } | 
|  | } | 
|  | log_user 1 | 
|  | return $bb_emulate | 
|  | } | 
|  |  | 
|  |  | 
|  | ################################################################ | 
|  | # | 
|  | # NAME | 
|  | #	check_bb_persistent - determines if persistent burst buffers can be created by users | 
|  | # | 
|  | # SYNOPSIS | 
|  | #	check_bb_persistent | 
|  | # | 
|  | # RETURN VALUE | 
|  | #	Returns true if Cray burst buffers can be created by users, | 
|  | #	false otherwise | 
|  | # | 
|  | ################################################################ | 
|  |  | 
|  | proc check_bb_persistent { } { | 
|  | global scontrol | 
|  |  | 
|  | log_user 0 | 
|  | set bb_persistent false | 
|  | spawn $scontrol show burst | 
|  | expect { | 
|  | -re "EnablePersistent" { | 
|  | set bb_persistent true | 
|  | exp_continue | 
|  | } | 
|  | eof { | 
|  | wait | 
|  | } | 
|  | } | 
|  | log_user 1 | 
|  | return $bb_persistent | 
|  | } | 
|  |  | 
|  |  | 
|  | ################################################################ | 
|  | # | 
|  | # NAME | 
|  | #	get_default_acct - gets user's default account | 
|  | # | 
|  | # SYNOPSIS | 
|  | #	get_default_acct user | 
|  | # | 
|  | # RETURN VALUE | 
|  | #	Returns name of default account if exists, NULL otherwise | 
|  | # | 
|  | ################################################################ | 
|  |  | 
|  | proc get_default_acct { user } { | 
|  | global sacctmgr re_word_str bin_id | 
|  |  | 
|  | log_user 0 | 
|  | set def_acct "" | 
|  |  | 
|  | if { !$user } { | 
|  | set user [get_my_user_name] | 
|  | } | 
|  |  | 
|  | spawn $sacctmgr -n list -P user $user format="DefaultAccount" | 
|  | expect { | 
|  | -re "($re_word_str)" { | 
|  | set def_acct $expect_out(1,string) | 
|  | exp_continue | 
|  | } | 
|  | eof { | 
|  | wait | 
|  | } | 
|  | } | 
|  | log_user 1 | 
|  |  | 
|  | return $def_acct | 
|  | } | 
|  |  | 
|  |  | 
|  | ################################################################ | 
|  | # | 
|  | # NAME | 
|  | #	get_cycle_count - get desired iteration count | 
|  | # | 
|  | # SYNOPSIS | 
|  | #	get_cycle_count | 
|  | # | 
|  | # DESCRIPTION | 
|  | #	For tests with iteration counts (e.g. test9.1, test9.2) | 
|  | #	return the desired iteration count | 
|  | # | 
|  | # RETURN VALUE | 
|  | #	Returns desired iteration count | 
|  | # | 
|  | ################################################################ | 
|  |  | 
|  | proc get_cycle_count { } { | 
|  | global enable_memory_leak_debug | 
|  |  | 
|  | if {$enable_memory_leak_debug != 0} { | 
|  | return 2 | 
|  | } | 
|  | return 100 | 
|  | } | 
|  |  | 
|  |  | 
|  | ################################################################ | 
|  | # | 
|  | # NAME | 
|  | #	get_select_type_params - determines SelectTypeParameters being used for a given partition | 
|  | # | 
|  | # SYNOPSIS | 
|  | #	get_select_type_params ?partition? | 
|  | # | 
|  | # DESCRIPTION | 
|  | #	Determine SelectTypeParameters being used for a given partition. | 
|  | #	If the partition is not specified, the default partition will be used. | 
|  | # | 
|  | # RETURN VALUE | 
|  | #	Returns a string containing SelectTypeParameters | 
|  | # | 
|  | ################################################################ | 
|  |  | 
|  | proc get_select_type_params { {partition ""} } { | 
|  | global scontrol bin_bash bin_grep re_word_str | 
|  |  | 
|  | log_user 0 | 
|  | set params "" | 
|  |  | 
|  | if {[string length $partition] == 0} { | 
|  | set partition [default_partition] | 
|  | } | 
|  |  | 
|  | if {$partition ne ""} { | 
|  | spawn -noecho $bin_bash -c "exec $scontrol show part $partition | $bin_grep SelectTypeParameters" | 
|  | expect { | 
|  | -re "SelectTypeParameters *= *NONE" { | 
|  | exp_continue | 
|  | } | 
|  | -re "SelectTypeParameters *= *($re_word_str)" { | 
|  | set params $expect_out(1,string) | 
|  | exp_continue | 
|  | } | 
|  | eof { | 
|  | wait | 
|  | } | 
|  | } | 
|  | } | 
|  | if {$params eq ""} { | 
|  | spawn -noecho $bin_bash -c "exec $scontrol show config | $bin_grep SelectTypeParameters" | 
|  | expect { | 
|  | -re "SelectTypeParameters *= *($re_word_str)" { | 
|  | set params $expect_out(1,string) | 
|  | exp_continue | 
|  | } | 
|  | eof { | 
|  | wait | 
|  | } | 
|  | } | 
|  | } | 
|  | log_user 1 | 
|  |  | 
|  | return $params | 
|  | } | 
|  |  | 
|  |  | 
|  | ################################################################ | 
|  | # | 
|  | # NAME | 
|  | #	check_config_select - checks if effectively using the select type | 
|  | # | 
|  | # SYNOPSIS | 
|  | #	check_config_select type | 
|  | # | 
|  | # DESCRIPTION | 
|  | #	Determine if SelectType is equivalent to the passed type. | 
|  | # | 
|  | # ARGUMENTS | 
|  | #	type | 
|  | #		the desired SelectType to check (e.g. cons_tres) | 
|  | # | 
|  | # RETURN VALUE | 
|  | #	Returns true if configured, false otherwise | 
|  | # | 
|  | ################################################################ | 
|  |  | 
|  | proc check_config_select { type } { | 
|  | set select_type [get_config_param "SelectType"] | 
|  | set select_type_parameters [get_config_param "SelectTypeParameters"] | 
|  |  | 
|  | if {$select_type eq "select/$type"} { | 
|  | return true | 
|  | } | 
|  |  | 
|  | return false | 
|  | } | 
|  |  | 
|  |  | 
|  | ################################################################ | 
|  | # | 
|  | # NAME | 
|  | #	get_total_cpus - gets the total available CPUs on the default partition | 
|  | # | 
|  | # SYNOPSIS | 
|  | #	get_total_cpus | 
|  | # | 
|  | # RETURN VALUE | 
|  | #	The total available CPUs on the default partition. | 
|  | # | 
|  | # NOTE | 
|  | #	CoreSpecCount are not part of the total. | 
|  | # | 
|  | ################################################################ | 
|  |  | 
|  | proc get_total_cpus {} { | 
|  |  | 
|  | set total_cpu_count 0 | 
|  |  | 
|  | # Obtain the list of available nodes in the default partition | 
|  | set node_list [get_nodes_by_state] | 
|  |  | 
|  | # Tally the cpus on these nodes | 
|  | set nodes_dict [get_nodes] | 
|  | foreach node_name $node_list { | 
|  | set node_dict [dict get $nodes_dict $node_name] | 
|  | set node_cpu_count [dict get $node_dict "CPUTot"] | 
|  |  | 
|  | # Subtract out any spec cores | 
|  | if {[dict exists $node_dict "CoreSpecCount"] && [dict exists $node_dict "ThreadsPerCore"]} { | 
|  | set spec_cpu_count [expr [dict get $node_dict "CPUTot"] * [dict get $node_dict "ThreadsPerCore"]] | 
|  | incr node_cpu_count -$spec_cpu_count | 
|  | } | 
|  |  | 
|  | incr total_cpu_count $node_cpu_count | 
|  | } | 
|  |  | 
|  | return $total_cpu_count | 
|  | } | 
|  |  | 
|  |  | 
|  | ################################################################ | 
|  | # | 
|  | # NAME | 
|  | #	is_running_in_container | 
|  | # | 
|  | # SYNOPSIS | 
|  | #	is_running_in_container | 
|  | # | 
|  | # DESCRIPTION | 
|  | #	Determine if test script is running inside of a container. | 
|  | # | 
|  | # RETURN VALUE | 
|  | #	true if container detected or systemd-detect-virt is not found, | 
|  | #	false otherwise | 
|  | # | 
|  | ################################################################ | 
|  |  | 
|  | proc is_running_in_container {} { | 
|  | global bin_systemd_detect_virt | 
|  |  | 
|  | if {[run_command_status -nolog -none "$bin_systemd_detect_virt --version"]} { | 
|  | log_warn "$bin_systemd_detect_virt not found, assuming container" | 
|  | return true | 
|  | } | 
|  |  | 
|  | set result [run_command -nolog -none "$bin_systemd_detect_virt -c"] | 
|  | set output [string trimright [dict get $result output] "\r\n"] | 
|  | if { $output != "none"} { | 
|  | log_debug "Detected container type: $output" | 
|  | } | 
|  | if {[dict get $result exit_code]} { | 
|  | return false | 
|  | } | 
|  | return true | 
|  | } | 
|  |  | 
|  | ################################################################ | 
|  | # | 
|  | # NAME | 
|  | #	is_super_user - determines if user is root or SlurmUser | 
|  | # | 
|  | # SYNOPSIS | 
|  | #	is_super_user ?user? | 
|  | # | 
|  | # DESCRIPTION | 
|  | #	Determine if user is a Slurm super user (i.e. user | 
|  | #	root or configured SlurmUser) | 
|  | # | 
|  | # RETURN VALUE | 
|  | #	true is user is root or SlurmUser, false otherwise | 
|  | # | 
|  | ################################################################ | 
|  |  | 
|  | proc is_super_user {{user ""}} { | 
|  | global number | 
|  |  | 
|  | if {$user == ""} { | 
|  | set user [get_my_user_name] | 
|  | } | 
|  |  | 
|  | # Check if user is root | 
|  | if {$user eq "root"} { | 
|  | return true | 
|  | } | 
|  |  | 
|  | # Check if user is SlurmUser | 
|  | set slurm_user [get_config_param "SlurmUser"] | 
|  | if {[regexp "${user}\\($number\\)" $slurm_user match]} { | 
|  | return true | 
|  | } | 
|  |  | 
|  | return false | 
|  | } | 
|  |  | 
|  |  | 
|  | ################################################################ | 
|  | # | 
|  | # NAME | 
|  | #	dec2hex - creates a 32 bit hex number from a signed decimal number | 
|  | # | 
|  | # SYNOPSIS | 
|  | #	dec2hex value | 
|  | # | 
|  | # DESCRIPTION | 
|  | #	Create a 32 bit hex number from a signed decimal number | 
|  | # | 
|  | # RETURN VALUE | 
|  | #	32 bit hex version of input 'value' | 
|  | # | 
|  | # SOURCE | 
|  | #	Courtesy of Chris Cornish | 
|  | #	http://aspn.activestate.com/ASPN/Cookbook/Tcl/Recipe/415982 | 
|  | # | 
|  | ################################################################ | 
|  | # Replace all non-decimal characters | 
|  |  | 
|  | proc dec2hex {value} { | 
|  | regsub -all {[^0-x\.-]} $value {} newtemp | 
|  | set value [string trim $newtemp] | 
|  | if {$value < 2147483647 && $value > -2147483648} { | 
|  | set tempvalue [format "%#010X" [expr $value]] | 
|  | return [string range $tempvalue 2 9] | 
|  | } elseif {$value < -2147483647} { | 
|  | return "80000000" | 
|  | } else { | 
|  | return "7FFFFFFF" | 
|  | } | 
|  | } | 
|  |  | 
|  |  | 
|  | ################################################################ | 
|  | # | 
|  | # NAME | 
|  | #	uint2hex - creates a 32 bit hex number from an unsigned decimal | 
|  | # | 
|  | # SYNOPSIS | 
|  | #	uint2hex value | 
|  | # | 
|  | # DESCRIPTION | 
|  | #	Create a 32 bit hex number from an unsigned decimal number. | 
|  | # | 
|  | # ARGUMENTS | 
|  | #	value | 
|  | #		unsigneddecimal number to convert | 
|  | # | 
|  | # RETURN VALUE | 
|  | #	32 bit hex version of input 'value' | 
|  | # | 
|  | # SOURCE | 
|  | #	Courtesy of Chris Cornish | 
|  | #	http://aspn.activestate.com/ASPN/Cookbook/Tcl/Recipe/415982 | 
|  | # | 
|  | ################################################################ | 
|  | # Replace all non-decimal characters | 
|  |  | 
|  | proc uint2hex {value} { | 
|  | regsub -all {[^0-x\.-]} $value {} newtemp | 
|  | set value [string trim $newtemp] | 
|  | if {$value <= 4294967295 && $value >= 0} { | 
|  | set tempvalue [format "%#010X" [expr $value]] | 
|  | return [string range $tempvalue 2 9] | 
|  | } else { | 
|  | return "FFFFFFFF" | 
|  | } | 
|  | } | 
|  |  | 
|  |  | 
|  | ################################################################ | 
|  | # | 
|  | # NAME | 
|  | #	partition_oversubscribe - determines the oversubscribe configuration of the specified partition | 
|  | # | 
|  | # SYNOPSIS | 
|  | #	partition_oversubscribe ?partition? | 
|  | # | 
|  | # DESCRIPTION | 
|  | #	Determine the oversubscribe configuration of the specified partition. | 
|  | #	If the partition is not specified, the default partition will be used. | 
|  | # | 
|  | # RETURN VALUE | 
|  | #	Return the oversubscribe configuration of the specified partition. | 
|  | # | 
|  | ################################################################ | 
|  |  | 
|  | proc partition_oversubscribe { {partition ""} } { | 
|  | global sinfo | 
|  |  | 
|  | if {[string length $partition] == 0} { | 
|  | set partition [default_partition] | 
|  | } | 
|  |  | 
|  | set oversubscribe "NO" | 
|  | log_debug "$sinfo --noheader --partition $partition --format %h" | 
|  | set fd [open "|$sinfo --noheader --partition $partition --format %h"] | 
|  | gets $fd line | 
|  | catch {close $fd} | 
|  | regexp {[a-zA-Z]+} $line oversubscribe | 
|  | return $oversubscribe | 
|  | } | 
|  |  | 
|  |  | 
|  | ################################################################ | 
|  | # | 
|  | # NAME | 
|  | #	default_partition - determines the name of the default partition | 
|  | # | 
|  | # SYNOPSIS | 
|  | #	default_partition | 
|  | # | 
|  | # DESCRIPTION | 
|  | #	Use scontrol to determine the name of the default partition | 
|  | # | 
|  | # RETURN VALUE | 
|  | #	Name of the current default partition, or fail if not found. | 
|  | # | 
|  | ################################################################ | 
|  |  | 
|  | proc default_partition {} { | 
|  | global scontrol | 
|  |  | 
|  | set name "" | 
|  | set fd [open "|$scontrol --all --oneliner show partition"] | 
|  | while {[gets $fd line] != -1} { | 
|  | if {[regexp {^PartitionName=([^ ]*).*Default=YES} $line frag name] | 
|  | == 1} { | 
|  | break | 
|  | } | 
|  | } | 
|  | catch {close $fd} | 
|  |  | 
|  | if {[string length $name] == 0} { | 
|  | fail "Could not identify the default partition" | 
|  | } | 
|  |  | 
|  | return $name | 
|  | } | 
|  |  | 
|  |  | 
|  | ################################################################ | 
|  | # | 
|  | # NAME | 
|  | #	default_part_exclusive - determines if the default partition allocates whole nodes to jobs | 
|  | # | 
|  | # SYNOPSIS | 
|  | #	default_part_exclusive | 
|  | # | 
|  | # DESCRIPTION | 
|  | #	Use scontrol to determine if the default partition | 
|  | #	allocates whole nodes to jobs | 
|  | # | 
|  | # RETURN VALUE | 
|  | #	Name of the current default partition | 
|  | # | 
|  | ################################################################ | 
|  |  | 
|  | proc default_part_exclusive {} { | 
|  | set def_part [default_partition] | 
|  | set oversubscribe [partition_oversubscribe $def_part] | 
|  | if {$oversubscribe eq "EXCLUSIVE"} { | 
|  | return 1 | 
|  | } else { | 
|  | return 0 | 
|  | } | 
|  | } | 
|  |  | 
|  |  | 
|  | ################################################################ | 
|  | # | 
|  | # NAME | 
|  | #	make_bash_script - creates a bash script | 
|  | # | 
|  | # SYNOPSIS | 
|  | #	make_bash_script script_name script_contents | 
|  | # | 
|  | # DESCRIPTION | 
|  | #	Create a bash script of name "script_name", and | 
|  | #	make the body of the script "script_contents". | 
|  | #	make_bash_script removes the file if it already exists, | 
|  | #	then generates the #! line, and then dumps "script_contents" | 
|  | #	to the file.  Finally, it makes certain that the script | 
|  | #	is executable. | 
|  | # | 
|  | # ARGUMENTS | 
|  | #	script_name | 
|  | #		file name for the bash script | 
|  | #	script_contents | 
|  | #		body of the script, not including the initial #! line. | 
|  | # | 
|  | # RETURN VALUE | 
|  | #	Nothing. | 
|  | # | 
|  | ################################################################ | 
|  |  | 
|  | proc make_bash_script { script_name script_contents } { | 
|  | global bin_bash bin_chmod | 
|  |  | 
|  | file delete $script_name | 
|  | set fd [open $script_name "w"] | 
|  | puts $fd "#!$bin_bash" | 
|  | puts $fd $script_contents | 
|  | close $fd | 
|  | exec $bin_chmod 777 $script_name | 
|  | } | 
|  |  | 
|  |  | 
|  | ################################################################ | 
|  | # | 
|  | # NAME | 
|  | #	check_acct_associations - checks associations | 
|  | # | 
|  | # SYNOPSIS | 
|  | #	check_acct_associations | 
|  | # | 
|  | # DESCRIPTION | 
|  | #	Use sacctmgr to check associations | 
|  | # | 
|  | # RETURN VALUE | 
|  | #	true if no error is found, false otherwise | 
|  | # | 
|  | ################################################################ | 
|  |  | 
|  | proc check_acct_associations { } { | 
|  | global sacctmgr number re_word_str | 
|  |  | 
|  | # 2 versions after 23.11 we can remove the below function | 
|  | # until then we should check older clusters lft/rgt | 
|  | set rc [check_acct_associations_lft] | 
|  |  | 
|  | if { !$rc } { | 
|  | return $rc; | 
|  | } | 
|  |  | 
|  | log_user 0 | 
|  | log_debug "Sanity-Checking Associations" | 
|  | # | 
|  | # Use sacctmgr to check associations | 
|  | # | 
|  | spawn $sacctmgr -n -p list assoc wopi wopl withd format=lineage,cluster | 
|  | expect { | 
|  | -re "($re_word_str)\\|($re_word_str)\\|" { | 
|  | # Here we are checking if we have duplicates and | 
|  | # setting up an array to check for holes later | 
|  |  | 
|  | set lineage $expect_out(1,string) | 
|  | set cluster $expect_out(2,string) | 
|  | set first [info exists found($cluster,$lineage)] | 
|  |  | 
|  | if { $first } { | 
|  | log_error "$cluster found lineage $lineage again" | 
|  | set rc false | 
|  | } else { | 
|  | set found($cluster,$lineage) 1 | 
|  | } | 
|  | exp_continue | 
|  | } | 
|  | timeout { | 
|  | fail "sacctmgr add not responding" | 
|  | } | 
|  | eof { | 
|  | wait | 
|  | } | 
|  | } | 
|  |  | 
|  | log_user 1 | 
|  | return $rc | 
|  | } | 
|  |  | 
|  | proc check_acct_associations_lft { } { | 
|  | global sacctmgr number re_word_str | 
|  |  | 
|  | set rc true | 
|  | log_user 0 | 
|  |  | 
|  | log_debug "Sanity-Checking Associations" | 
|  |  | 
|  | set clusters "" | 
|  |  | 
|  | spawn $sacctmgr show cluster format=cluster,rpc -p | 
|  | expect { | 
|  | -re "($re_word_str)\\|($number)\\|" { | 
|  | # 9984 == 23.02, the last version where lft/rgt matter | 
|  | if { $expect_out(2,string) > 9984 } { | 
|  | exp_continue | 
|  | } | 
|  | set clusters [ concat $clusters "," $expect_out(1,string) ] | 
|  | exp_continue | 
|  | } | 
|  | } | 
|  |  | 
|  | # | 
|  | # Use sacctmgr to check associations | 
|  | # | 
|  | spawn $sacctmgr -n -p list assoc wopi wopl withd format=lft,rgt,cluster clusters="$clusters" | 
|  | expect { | 
|  | -re "($number)\\|($number)\\|($re_word_str)\\|" { | 
|  | # Here we are checking if we have duplicates and | 
|  | # setting up an array to check for holes later | 
|  |  | 
|  | set cluster $expect_out(3,string) | 
|  | if { ![info exists c_min($cluster)] } { | 
|  | set c_min($cluster) -1 | 
|  | set c_max($cluster) -1 | 
|  | } | 
|  |  | 
|  | set num1 $expect_out(1,string) | 
|  | set num2 $expect_out(2,string) | 
|  | set first [info exists found($cluster,$num1)] | 
|  | set sec [info exists found($cluster,$num2)] | 
|  | #log_debug "$first=$num1 $sec=$num2" | 
|  | if { $first } { | 
|  | log_error "$cluster found lft $num1 again" | 
|  | set rc false | 
|  | } elseif { $sec } { | 
|  | log_error "$cluster found rgt $num2 again" | 
|  | set rc false | 
|  | } else { | 
|  | set found($cluster,$num1) 1 | 
|  | set found($cluster,$num2) 1 | 
|  | if { $c_min($cluster) == -1 | 
|  | || $c_min($cluster) > $num1 } { | 
|  | set c_min($cluster) $num1 | 
|  | } | 
|  | if { $c_max($cluster) == -1 | 
|  | || $c_max($cluster) < $num2 } { | 
|  | set c_max($cluster) $num2 | 
|  | } | 
|  | } | 
|  | exp_continue | 
|  | } | 
|  | timeout { | 
|  | fail "sacctmgr add not responding" | 
|  | } | 
|  | eof { | 
|  | wait | 
|  | } | 
|  | } | 
|  |  | 
|  | foreach cluster [array names c_min] { | 
|  | # Here we are checking for holes in the list from above | 
|  | for {set inx $c_min($cluster)} {$inx < $c_max($cluster)} {incr inx} { | 
|  | if { ![info exists found($cluster,$inx)] } { | 
|  | log_error "$cluster No index at $inx" | 
|  | set rc false | 
|  | } | 
|  | } | 
|  | } | 
|  | log_user 1 | 
|  | return $rc | 
|  | } | 
|  |  | 
|  | ################################################################ | 
|  | # | 
|  | # NAME | 
|  | #	get_job_acct_freq - gets the value of the job account gather frequency | 
|  | # | 
|  | # SYNOPSIS | 
|  | #	get_job_acct_freq | 
|  | # | 
|  | # RETURN VALUE | 
|  | #	job account gather frequency | 
|  | # | 
|  | ################################################################ | 
|  |  | 
|  | proc get_job_acct_freq { } { | 
|  | global scontrol number | 
|  |  | 
|  | log_user 0 | 
|  | set freq_val 0 | 
|  |  | 
|  | spawn $scontrol show config | 
|  | expect { | 
|  | -re "JobAcctGatherFrequency *= ($number)" { | 
|  | set freq_val $expect_out(1,string) | 
|  | if {$freq_val == 0} { | 
|  | set freq_val 0 | 
|  | } | 
|  | } | 
|  | -re "JobAcctGatherFrequency *= task=($number)" { | 
|  | set freq_val $expect_out(1,string) | 
|  | if {$freq_val == 0} { | 
|  | set freq_val 0 | 
|  | } | 
|  | } | 
|  | eof { | 
|  | wait | 
|  | } | 
|  | } | 
|  |  | 
|  | log_user 1 | 
|  | return $freq_val | 
|  | } | 
|  |  | 
|  |  | 
|  | ################################################################ | 
|  | # | 
|  | # NAME | 
|  | #	get_admin_level - gets the AdminLevel of the user | 
|  | # | 
|  | # SYNOPSIS | 
|  | #	get_admin_level ?user? | 
|  | # | 
|  | # RETURN VALUE | 
|  | #	AdminLevel for the current user | 
|  | # | 
|  | ################################################################ | 
|  |  | 
|  | proc get_admin_level {{user_name ""}} { | 
|  | global sacctmgr re_word_str re_word_str bin_id | 
|  |  | 
|  | set admin_level "" | 
|  |  | 
|  | if {$user_name == ""} { | 
|  | set user_name [get_my_user_name] | 
|  | if { ![string length $user_name] } { | 
|  | log_error "No name returned from id" | 
|  | return "" | 
|  | } | 
|  | } | 
|  |  | 
|  | if {[is_super_user $user_name]} { | 
|  | return "Administrator" | 
|  | } | 
|  |  | 
|  | # | 
|  | # Use sacctmgr to check admin_level | 
|  | # | 
|  | log_user 0 | 
|  | spawn $sacctmgr -n -P list user $user_name format=admin | 
|  | expect { | 
|  | -re "($re_word_str)" { | 
|  | set admin_level $expect_out(1,string) | 
|  | exp_continue | 
|  | } | 
|  | timeout { | 
|  | fail "sacctmgr add not responding" | 
|  | } | 
|  | eof { | 
|  | wait | 
|  | } | 
|  | } | 
|  | log_user 1 | 
|  |  | 
|  | return $admin_level | 
|  | } | 
|  |  | 
|  |  | 
|  | ################################################# | 
|  | # | 
|  | # NAME | 
|  | #	scale_to_megs - scales the value by the factor T|G|M to megabytes | 
|  | # | 
|  | # SYNOPSIS | 
|  | #	scale_to_megs value factor | 
|  | # | 
|  | # DESCRIPTION | 
|  | #	scale the value by the factor T|G|M to megabytes | 
|  | # | 
|  | # RETURN VALUE | 
|  | #	the scaled variable | 
|  | # | 
|  | ################################################# | 
|  |  | 
|  | proc scale_to_megs { value factor } { | 
|  |  | 
|  | if {$factor == "T"} { | 
|  | set value [expr $value * 1024 * 1024] | 
|  | } elseif {$factor == "G"} { | 
|  | set value [expr $value * 1024] | 
|  | } elseif {$factor == "M"} { | 
|  | set value [expr $value * 1] | 
|  | } elseif {$factor == "K"} { | 
|  | set value [expr $value / 1024] | 
|  | set value [expr {round($value)}] | 
|  | } else { | 
|  | set value [expr $value / (1024 * 1024)] | 
|  | set value [expr {round($value)}] | 
|  | } | 
|  |  | 
|  | return $value | 
|  | } | 
|  |  | 
|  |  | 
|  | ################################################# | 
|  | # | 
|  | # NAME | 
|  | #	scale_to_ks - scales the value by the factor G|M|K to kilobytes | 
|  | # | 
|  | # SYNOPSIS | 
|  | #	scale_to_ks value factor | 
|  | # | 
|  | # DESCRIPTION | 
|  | #	scale the value by the factor G|M|K to kilobytes | 
|  | # | 
|  | # RETURN VALUE | 
|  | #	the scaled variable | 
|  | # | 
|  | ################################################# | 
|  |  | 
|  | proc scale_to_ks { value factor } { | 
|  |  | 
|  | if {$factor == "G"} { | 
|  | set value [expr $value * 1024 * 1024] | 
|  | } elseif {$factor == "M"} { | 
|  | set value [expr $value * 1024] | 
|  | } elseif {$factor == "K"} { | 
|  | set value [expr $value * 1] | 
|  | } else { | 
|  | set value [expr $value / 1024] | 
|  | set value [expr {round($value)}] | 
|  | } | 
|  |  | 
|  | return $value | 
|  | } | 
|  |  | 
|  |  | 
|  | ############################################################ | 
|  | # | 
|  | # NAME | 
|  | #	check_config_node_mem - checks that the nodes have memory configured | 
|  | # | 
|  | # SYNOPSIS | 
|  | #	check_config_node_mem | 
|  | # | 
|  | # RETURN VALUE | 
|  | #	true if all nodes have memory, false otherwise | 
|  | # | 
|  | ############################################################ | 
|  |  | 
|  | proc check_config_node_mem { } { | 
|  |  | 
|  | set nodes_dict [get_nodes] | 
|  | dict for {node_name node_dict} $nodes_dict { | 
|  | if [dict exists $node_dict "RealMemory"] { | 
|  | if {[dict get $node_dict "RealMemory"] == 1} { | 
|  | return false | 
|  | } | 
|  | } else { | 
|  | log_warn "Parameter RealMemory not found on node $node_name" | 
|  | return false | 
|  | } | 
|  | } | 
|  |  | 
|  | return true | 
|  | } | 
|  |  | 
|  |  | 
|  | ################################################################ | 
|  | # | 
|  | # NAME | 
|  | #	wait_for_node - waits for nodes in a partition to reach a certain state | 
|  | # | 
|  | # SYNOPSIS | 
|  | #	wait_for_node ?options? state num_nodes ?partition? | 
|  | # | 
|  | # DESCRIPTION | 
|  | #	Wait for a certain number of nodes in a partition to reach a certain | 
|  | #	state. | 
|  | # | 
|  | # OPTIONS | 
|  | #	-timeout <integer_number> | 
|  | #		time in seconds to wait for the node state before | 
|  | #		timing out (default is 3) | 
|  | #	-pollinterval <integer_number> | 
|  | #		time in seconds between each node state check (default is 1) | 
|  | # | 
|  | # ARGUMENTS | 
|  | #	state | 
|  | #		The node state to wait for | 
|  | #	num_nodes | 
|  | #		The number of nodes we want to be in the specified state | 
|  | #	partition | 
|  | #		Partition name (the default partition is used if not specified) | 
|  | # | 
|  | # RETURN VALUE | 
|  | #	RETURN_SUCCESS, or non-zero on failure | 
|  | # | 
|  | ################################################################ | 
|  |  | 
|  | proc wait_for_node args { | 
|  | global sinfo number | 
|  |  | 
|  | set partition     "" | 
|  | set timeout       3 | 
|  | set poll_interval 1 | 
|  | set desired_state "PENDING" | 
|  | set desired_reason_list "" | 
|  | while {[llength $args]} { | 
|  | switch -glob -- [lindex $args 0] { | 
|  | -time*  {set args [lassign $args - timeout]} | 
|  | -poll*  {set args [lassign $args - poll_interval]} | 
|  | -*      {fail "Unknown option: [lindex $args 0]"} | 
|  | default break | 
|  | } | 
|  | } | 
|  | set argument_count [llength $args] | 
|  | if {$argument_count < 2} { | 
|  | fail "Too few arguments ($argument_count): $args" | 
|  | } else { | 
|  | lassign $args state num_nodes | 
|  | } | 
|  | if {$argument_count == 3} { set partition [lindex $args 2] } | 
|  | if {$argument_count > 3} { | 
|  | fail "Too many arguments ($argument_count): $args" | 
|  | } | 
|  |  | 
|  | set wait_time 0 | 
|  | set done      0 | 
|  | set cnt       0 | 
|  | set rt        $::RETURN_SUCCESS | 
|  |  | 
|  | if {[string length $partition] == 0} { | 
|  | set partition [default_partition] | 
|  | } | 
|  |  | 
|  | while {$done != 1 && $wait_time < $timeout} { | 
|  | set output [run_command_output -fail "$sinfo --noheader --partition $partition --state $state --format %D"] | 
|  | regexp "$number" $output cnt | 
|  |  | 
|  | if {$num_nodes <= $cnt} { | 
|  | set done 1 | 
|  | } else { | 
|  | log_debug "Partition $partition has $cnt nodes idle and we want $num_nodes" | 
|  | sleep $poll_interval | 
|  | incr wait_time 1 | 
|  | } | 
|  | } | 
|  | if {$done != 1} { | 
|  | set rt $::RETURN_ERROR | 
|  | } | 
|  |  | 
|  | return $rt | 
|  | } | 
|  |  | 
|  |  | 
|  | ##################################################################### | 
|  | # | 
|  | # NAME | 
|  | #	node_list_to_range - converts a TCL list into a Slurm hostlist using scontrol | 
|  | # | 
|  | # SYNOPSIS | 
|  | #	node_list_to_range nodes_list | 
|  | # | 
|  | # ARGUMENTS | 
|  | # | 
|  | #	nodes_list | 
|  | #		a TCL list of node names | 
|  | # | 
|  | # RETURN VALUE | 
|  | #	the hostlist form returned by scrontrol show hostlist | 
|  | # | 
|  | ##################################################################### | 
|  |  | 
|  | proc node_list_to_range {nodes_list} { | 
|  | global scontrol | 
|  |  | 
|  | set comalist [join $nodes_list ,] | 
|  | set hostlist [run_command_output -nolog -fail "$scontrol show hostlist $comalist"] | 
|  | set hostlist [string trimright $hostlist "\r\n"] | 
|  | } | 
|  |  | 
|  |  | 
|  | ################################################################ | 
|  | # | 
|  | # NAME | 
|  | #	list_to_range - converts a list of integer numbers to a range expression | 
|  | # | 
|  | # SYNOPSIS | 
|  | #	list_to_range ?numeric_list? | 
|  | # | 
|  | # DESCRIPTION | 
|  | #	Collapse a numeric list to a range expression defined by the following | 
|  | #	EBNF: | 
|  | #		<expression> ::= <range> {, <range>}* | 
|  | #		<range> ::= <integer> - <integer> | <integer> | 
|  | # | 
|  | # RETURN VALUE | 
|  | #	a range expression representing the numeric elements on the given list. | 
|  | # | 
|  | ################################################################ | 
|  |  | 
|  | proc list_to_range {numeric_list} { | 
|  | set node_range_expression [node_list_to_range $numeric_list] | 
|  | set range_expression [regsub {^\[(.*)\]$} $node_range_expression {\1}] | 
|  | return $range_expression | 
|  | } | 
|  |  | 
|  |  | 
|  | ##################################################################### | 
|  | # | 
|  | # NAME | 
|  | #	node_range_to_list - converts a node range expression into a list of nodes using scontrol | 
|  | # | 
|  | # SYNOPSIS | 
|  | #	node_range_to_list node_range_expression | 
|  | # | 
|  | # ARGUMENTS | 
|  | # | 
|  | #	node_range_expression | 
|  | #		a node range expression accepted by scontrol | 
|  | # | 
|  | # RETURN VALUE | 
|  | #	a list contining all node names expanded from the node range expression | 
|  | # | 
|  | ##################################################################### | 
|  |  | 
|  | proc node_range_to_list {node_range_expression} { | 
|  | global scontrol | 
|  |  | 
|  | set node_list [list] | 
|  | set output [run_command_output -nolog -fail "$scontrol show hostnames $node_range_expression"] | 
|  | foreach line [split $output "\n"] { | 
|  | if {$line eq ""} { | 
|  | break | 
|  | } | 
|  | lappend node_list $line | 
|  | } | 
|  | return $node_list | 
|  | } | 
|  |  | 
|  |  | 
|  | ################################################################ | 
|  | # | 
|  | # NAME | 
|  | #	range_to_list - converts a range expression into a list with the numbers of the range | 
|  | # | 
|  | # SYNOPSIS | 
|  | #	range_to_list ?range_expression? | 
|  | # | 
|  | # DESCRIPTION | 
|  | #	Expands a range expression defined by the following EBNF into a numeric list | 
|  | #		<expression> ::= <range> {, <range>}* | 
|  | #		<range> ::= <integer> - <integer> | <integer> | 
|  | # | 
|  | # RETURN VALUE | 
|  | #	Returns the list of integer numbers defined by the range_expression | 
|  | # | 
|  | ################################################################ | 
|  |  | 
|  | proc range_to_list {range_expression} { | 
|  | set node_range_expression \[$range_expression\] | 
|  | set range_list [node_range_to_list $node_range_expression] | 
|  | return $range_list | 
|  | } | 
|  |  | 
|  |  | 
|  | ##################################################################### | 
|  | # | 
|  | # NAME | 
|  | #	get_nodes_by_state - gets the list of node names in a given partition/states | 
|  | # | 
|  | # SYNOPSIS | 
|  | #	get_nodes_by_state partition states | 
|  | # | 
|  | # DESCRIPTION | 
|  | #	sinfo is used to list node names and states in the specified partition. | 
|  | #	This list of nodes is filtered to return only the nodes matching one of | 
|  | #	the requested states. | 
|  | # | 
|  | # ARGUMENTS | 
|  | #	partition | 
|  | #		partition to get nodes off | 
|  | #	states | 
|  | #		comma-separated list of allowed states | 
|  | # | 
|  | # RETURN VALUE | 
|  | #	list of nodes having the required state | 
|  | # | 
|  | ##################################################################### | 
|  |  | 
|  | proc get_nodes_by_state {{states ""} {partition ""}} { | 
|  | global sinfo | 
|  |  | 
|  | set node_list [list] | 
|  |  | 
|  | if {$partition eq ""} { | 
|  | set partition [default_partition] | 
|  | } | 
|  |  | 
|  | if {$states eq ""} { | 
|  | set states "idle" | 
|  | } | 
|  |  | 
|  | set output [run_command_output -fail "$sinfo -h -N -p $partition -o '%N' -e -t $states"] | 
|  | foreach line [split $output "\n"] { | 
|  | if {$line eq ""} { continue } | 
|  | lappend node_list $line | 
|  | } | 
|  |  | 
|  | return $node_list | 
|  | } | 
|  |  | 
|  |  | 
|  | ##################################################################### | 
|  | # | 
|  | # NAME | 
|  | #	set_partition_maximum_time_limit - sets the maximum time limit in a given partition | 
|  | # | 
|  | # SYNOPSIS | 
|  | #	set_partition_maximum_time_limit partition limit | 
|  | # | 
|  | # RETURN VALUE | 
|  | #	RETURN_SUCCESS, or non-zero on error | 
|  | # | 
|  | ##################################################################### | 
|  |  | 
|  | proc set_partition_maximum_time_limit {partition limit} { | 
|  | global scontrol | 
|  |  | 
|  | if {[string length $partition] == 0} { | 
|  | set partition [default_partition] | 
|  | if { $partition == "" } { | 
|  | return $::RETURN_ERROR | 
|  | } | 
|  | } | 
|  |  | 
|  | if { $limit < -1 } { | 
|  | fail "Trying to set invalid partition time limit of $limit" | 
|  | } | 
|  | if { $limit == -1 } { | 
|  | set expected_lim "UNLIMITED" | 
|  | } else { | 
|  | set expected_lim limit | 
|  | } | 
|  |  | 
|  | run_command -fail "$scontrol update partitionname=$partition MaxTime=-1" | 
|  |  | 
|  | set maxtime [get_partition_maximum_time_limit $partition] | 
|  | if { $maxtime != $limit } { | 
|  | log_error "Unable to update partition MaxTime, got $maxtime, wanted $limit" | 
|  | return $::RETURN_ERROR | 
|  | } | 
|  |  | 
|  | return $::RETURN_SUCCESS | 
|  | } | 
|  |  | 
|  |  | 
|  | ##################################################################### | 
|  | # | 
|  | # NAME | 
|  | #	get_partition_maximum_time_limit - gets the maximum time limit in a given partition | 
|  | # | 
|  | # SYNOPSIS | 
|  | #	get_partition_maximum_time_limit partition | 
|  | # | 
|  | # DESCRIPTION | 
|  | #	Get the maximum time limit in a given partition | 
|  | # | 
|  | # RETURN VALUE | 
|  | #	time limit in seconds, -1 if undefined or error | 
|  | # | 
|  | ##################################################################### | 
|  |  | 
|  | proc get_partition_maximum_time_limit {partition} { | 
|  | global sinfo number | 
|  |  | 
|  | if {[string length $partition] == 0} { | 
|  | set partition [default_partition] | 
|  | } | 
|  |  | 
|  | set secs 0 | 
|  | log_user 0 | 
|  | spawn -noecho $sinfo -h -p $partition -O time -e | 
|  | expect { | 
|  | -re "infinite" { | 
|  | set secs -1 | 
|  | exp_continue | 
|  | } | 
|  | -re "n/a" { | 
|  | set secs -1 | 
|  | exp_continue | 
|  | } | 
|  | -re "($number)-($number):($number):($number)" { | 
|  | set days  [expr $expect_out(1,string) * 24 * 60 * 60] | 
|  | set hours [expr $expect_out(2,string) * 60 * 60] | 
|  | set mins  [expr $expect_out(3,string) * 60] | 
|  | set secs  [expr $days + $hours + $mins + $expect_out(4,string)] | 
|  | exp_continue | 
|  | } | 
|  | -re "($number):($number):($number)" { | 
|  | set hours [expr $expect_out(1,string) * 60 * 60] | 
|  | set mins  [expr $expect_out(2,string) * 60] | 
|  | set secs  [expr $hours + $mins + $expect_out(3,string)] | 
|  | exp_continue | 
|  | } | 
|  | -re "($number):($number)" { | 
|  | set mins  [expr $expect_out(1,string) * 60] | 
|  | set secs  [expr $mins + $expect_out(2,string)] | 
|  | exp_continue | 
|  | } | 
|  | -re "($number)" { | 
|  | set secs  [expr $expect_out(1,string) * 60] | 
|  | exp_continue | 
|  | } | 
|  | timeout { | 
|  | fail "sinfo not responding" | 
|  | } | 
|  | eof { | 
|  | wait | 
|  | } | 
|  | } | 
|  |  | 
|  | log_user 1 | 
|  | return $secs | 
|  | } | 
|  |  | 
|  |  | 
|  | ################################################################ | 
|  | # | 
|  | # NAME | 
|  | #	get_partition_default_time_limit - gets the default time limit in a given partition | 
|  | # | 
|  | # SYNOPSIS | 
|  | #	get_partition_default_time_limit ?partition? | 
|  | # | 
|  | # DESCRIPTION | 
|  | #	Get the default time limit in a given partition. | 
|  | #	If the partition is not specified, the default partition will be used. | 
|  | # | 
|  | # RETURN VALUE | 
|  | #	Returns: time limit in seconds, -1 if undefined or error. | 
|  | # | 
|  | ################################################################ | 
|  |  | 
|  | proc get_partition_default_time_limit { {partition ""} } { | 
|  | global sinfo number | 
|  |  | 
|  | if {[string length $partition] == 0} { | 
|  | set partition [default_partition] | 
|  | } | 
|  |  | 
|  | set secs 0 | 
|  | log_user 0 | 
|  | spawn -noecho $sinfo -h -p $partition -O defaulttime -e | 
|  | expect { | 
|  | -re "infinite" { | 
|  | set secs -1 | 
|  | exp_continue | 
|  | } | 
|  | -re "n/a" { | 
|  | set secs -1 | 
|  | exp_continue | 
|  | } | 
|  | -re "($number)-($number):($number):($number)" { | 
|  | set days  [expr $expect_out(1,string) * 24 * 60 * 60] | 
|  | set hours [expr $expect_out(2,string) * 60 * 60] | 
|  | set mins  [expr $expect_out(3,string) * 60] | 
|  | set secs  [expr $days + $hours + $mins + $expect_out(4,string)] | 
|  | exp_continue | 
|  | } | 
|  | -re "($number):($number):($number)" { | 
|  | set hours [expr $expect_out(1,string) * 60 * 60] | 
|  | set mins  [expr $expect_out(2,string) * 60] | 
|  | set secs  [expr $hours + $mins + $expect_out(3,string)] | 
|  | exp_continue | 
|  | } | 
|  | -re "($number):($number)" { | 
|  | set mins  [expr $expect_out(1,string) * 60] | 
|  | set secs  [expr $mins + $expect_out(2,string)] | 
|  | exp_continue | 
|  | } | 
|  | -re "($number)" { | 
|  | set secs  [expr $expect_out(1,string) * 60] | 
|  | exp_continue | 
|  | } | 
|  | timeout { | 
|  | fail "sinfo not responding" | 
|  | } | 
|  | eof { | 
|  | wait | 
|  | } | 
|  | } | 
|  |  | 
|  | log_user 1 | 
|  | return $secs | 
|  | } | 
|  |  | 
|  |  | 
|  | ##################################################################### | 
|  | # | 
|  | # NAME | 
|  | #	get_node_cores - given a node, returns its total number of cores | 
|  | # | 
|  | # SYNOPSIS | 
|  | #	get_node_cores node | 
|  | # | 
|  | # DESCRIPTION | 
|  | #	Given a node, return its total number of cores | 
|  | #	(not the CoresPerSocket, but the total cores) | 
|  | # | 
|  | # RETURN VALUE | 
|  | #	node cores if retrieved, -1 otherwise | 
|  | # | 
|  | ##################################################################### | 
|  |  | 
|  | proc get_node_cores {node} { | 
|  | global sinfo number | 
|  |  | 
|  | set cores -1 | 
|  | set sockets_per_node 0 | 
|  | set cores_per_socket 0 | 
|  |  | 
|  | if {[string length $node] == 0} { | 
|  | return $cores | 
|  | } | 
|  |  | 
|  | log_user 0 | 
|  | spawn -noecho $sinfo -o "%X %Y" -h -n $node | 
|  | expect { | 
|  | -re "($number)" { | 
|  | if {$sockets_per_node == 0} { | 
|  | set sockets_per_node $expect_out(1,string) | 
|  | } else { | 
|  | set cores_per_socket $expect_out(1,string) | 
|  | } | 
|  | exp_continue | 
|  | } | 
|  | timeout { | 
|  | fail "sinfo not responding" | 
|  | } | 
|  | eof { | 
|  | wait | 
|  | } | 
|  | } | 
|  | log_user 1 | 
|  |  | 
|  | set cores [expr $sockets_per_node * $cores_per_socket] | 
|  |  | 
|  | return $cores | 
|  | } | 
|  |  | 
|  |  | 
|  | ##################################################################### | 
|  | # | 
|  | # NAME | 
|  | #	get_node_cpus - given a node, returns its total number of threads we account for | 
|  | # | 
|  | # SYNOPSIS | 
|  | #	get_node_cpus node | 
|  | # | 
|  | # DESCRIPTION | 
|  | #	Given a node, return its total number of threads we account for. | 
|  | #	(not always ThreadsPerCore, but how many threads are in use. | 
|  | #	i.e. CPUs=6 CoresPerSocket=6 ThreadsPerCore=2 Socket=1 would | 
|  | #	result in only 1 thread we care about instead of the 2 listed.) | 
|  | # | 
|  | # RETURN VALUE | 
|  | #	list of node [ tot_cpus threads ] if retrieved, [ -1 -1 ] otherwise | 
|  | # | 
|  | ##################################################################### | 
|  |  | 
|  | proc get_node_cpus {node} { | 
|  | global scontrol number | 
|  |  | 
|  | set nthreads -1 | 
|  | set nsockets 0 | 
|  | set ncores 0 | 
|  | set totcpus -1 | 
|  |  | 
|  | if {[string length $node] == 0} { | 
|  | return [list $totcpus $nthreads] | 
|  | } | 
|  |  | 
|  | # Get the number of CPUs on a node | 
|  | spawn $scontrol show node $node | 
|  | expect { | 
|  | -re "CoresPerSocket=($number)" { | 
|  | set ncores $expect_out(1,string) | 
|  | exp_continue | 
|  | } | 
|  | -re "CPUTot=($number)" { | 
|  | set totcpus $expect_out(1,string) | 
|  | exp_continue | 
|  | } | 
|  | -re "Sockets=($number)" { | 
|  | set nsockets $expect_out(1,string) | 
|  | exp_continue | 
|  | } | 
|  | -re "ThreadsPerCore=($number)" { | 
|  | set nthreads $expect_out(1,string) | 
|  | exp_continue | 
|  | } | 
|  | timeout { | 
|  | fail "scontrol is not responding" | 
|  | } | 
|  | eof { | 
|  | wait | 
|  | } | 
|  | } | 
|  |  | 
|  | set core_cnt [expr $nsockets * $ncores] | 
|  | set thread_cnt [expr $ncores * $nthreads] | 
|  | if {$totcpus != $nthreads && $totcpus == $ncores} { | 
|  | log_debug "Cores rather than threads are being allocated" | 
|  | set nthreads 1 | 
|  | } | 
|  |  | 
|  | return [list $totcpus $nthreads] | 
|  | } | 
|  |  | 
|  |  | 
|  | ##################################################################### | 
|  | # | 
|  | # NAME | 
|  | #	get_part_total_cores - given a partition and/or states, return its total cores | 
|  | # | 
|  | # SYNOPSIS | 
|  | #	get_part_total_cores partition states | 
|  | # | 
|  | # DESCRIPTION | 
|  | #	Given a partition and/or states, return its total cores | 
|  | # | 
|  | # ARGUMENTS | 
|  | #	partition | 
|  | #		partition to check cores | 
|  | #	states | 
|  | #		states to filter on partition cores | 
|  | # | 
|  | # RETURN VALUE | 
|  | #	partition cores | 
|  | # | 
|  | ##################################################################### | 
|  |  | 
|  | proc get_part_total_cores {part states} { | 
|  | global sinfo number | 
|  |  | 
|  | set cores 0 | 
|  | set tmp 0 | 
|  | set i 0 | 
|  |  | 
|  | if {[string length $part] == 0} { | 
|  | set part [default_partition] | 
|  | } | 
|  |  | 
|  | log_user 0 | 
|  | if {[string length $states] == 0} { | 
|  | spawn -noecho $sinfo -h -N -p $part -o "%X %Y" | 
|  | } else { | 
|  | spawn -noecho $sinfo -h -N -p $part -t $states -o "%X %Y" | 
|  | } | 
|  | expect { | 
|  | -re "($number)" { | 
|  | set is_even [expr {($i % 2) == 0}] | 
|  | if {$is_even == 1} { | 
|  | set tmp $expect_out(1,string) | 
|  | } else { | 
|  | set tmp [expr $tmp * $expect_out(1,string)] | 
|  | set cores [expr $cores + $tmp] | 
|  | } | 
|  | incr i | 
|  | exp_continue | 
|  | } | 
|  | timeout { | 
|  | fail "sinfo not responding" | 
|  | } | 
|  | eof { | 
|  | wait | 
|  | } | 
|  | } | 
|  | log_user 1 | 
|  |  | 
|  | return $cores | 
|  | } | 
|  |  | 
|  |  | 
|  | ##################################################################### | 
|  | # | 
|  | # NAME | 
|  | #	check_hosts_contiguous - verify if all hosts belong to the partition and are contiguous | 
|  | # | 
|  | # SYNOPSIS | 
|  | #	check_hosts_contiguous check_hosts_list partition | 
|  | # | 
|  | # DESCRIPTION | 
|  | #	Given a partition and a list of hosts, verify if all | 
|  | #	hosts belong to the partition and are contiguous. | 
|  | #	If the partition argument is empty, the default partition | 
|  | #	will be used. | 
|  | # | 
|  | # RETURN VALUE | 
|  | #	Returns: true if hosts are contiguous, false otherwise. | 
|  | # | 
|  | ##################################################################### | 
|  |  | 
|  | proc check_hosts_contiguous { check_hosts_list {partition ""} } { | 
|  | global sinfo re_word_str | 
|  |  | 
|  | if {[string length $partition] == 0} { | 
|  | set partition [default_partition] | 
|  | } | 
|  |  | 
|  | set part_hosts_list {} | 
|  |  | 
|  | log_user 0 | 
|  | spawn $sinfo --noheader -p $partition -N -o %N | 
|  | expect { | 
|  | -re "($re_word_str)" { | 
|  | lappend part_hosts_list $expect_out(1,string) | 
|  | exp_continue | 
|  | } | 
|  | -re "Unable to contact" { | 
|  | fail "Slurm appears to be down" | 
|  | } | 
|  | timeout { | 
|  | fail "sinfo not responding" | 
|  | } | 
|  | eof { | 
|  | wait | 
|  | } | 
|  | } | 
|  | log_user 1 | 
|  |  | 
|  | foreach host $check_hosts_list { | 
|  | set idx_cur [lsearch $part_hosts_list $host] | 
|  | if {$idx_cur == -1} { | 
|  | fail "Host ($host) not found in list of hosts from partition $partition" | 
|  | } | 
|  | if {[info exists idx_old]} { | 
|  | if {$idx_cur != [expr $idx_old + 1]} { | 
|  | log_error "Node sequence number not contiguous" | 
|  | return false | 
|  | } | 
|  | } | 
|  | set idx_old $idx_cur | 
|  | } | 
|  |  | 
|  | return true | 
|  | } | 
|  |  | 
|  |  | 
|  | ################################################################ | 
|  | # | 
|  | # NAME | 
|  | #	get_het_job_ids - gets list of component job ids for a het job | 
|  | # | 
|  | # SYNOPSIS | 
|  | #	get_het_job_ids job_id ?use_offset? | 
|  | # | 
|  | # DESCRIPTION | 
|  | #	Gets list of component job ids for a het job. | 
|  | # | 
|  | # ARGUMENTS | 
|  | #	job_id | 
|  | #		Slurm job id | 
|  | #	use_offset | 
|  | #		If zero, returns list of integer job ids, else returns ids in | 
|  | #		the form of X+Y where X is het job master id and Y is the | 
|  | #		offset. | 
|  | # | 
|  | # RETURN VALUE | 
|  | #	A list of ids for a hetjob or an empty list if jobid | 
|  | #	is not a het one. | 
|  | # | 
|  | ################################################################ | 
|  |  | 
|  | proc get_het_job_ids { jobid {use_offset 0}} { | 
|  | global scontrol number | 
|  |  | 
|  | set id_list "" | 
|  | set log_user_save [log_user -info] | 
|  | log_user 0 | 
|  | spawn $scontrol show job $jobid | 
|  | expect { | 
|  | -re "JobId=($number) HetJobId=($number) HetJobOffset=($number)" { | 
|  | if { $use_offset } { | 
|  | lappend id_list "$expect_out(2,string)+$expect_out(3,string)" | 
|  | } else { | 
|  | lappend id_list $expect_out(1,string) | 
|  | } | 
|  | exp_continue | 
|  | } | 
|  | timeout { | 
|  | fail "scontrol not responding" | 
|  | } | 
|  | eof { | 
|  | wait | 
|  | } | 
|  | } | 
|  | log_user $log_user_save | 
|  | return $id_list | 
|  | } | 
|  |  | 
|  |  | 
|  | ################################################################ | 
|  | # | 
|  | # NAME | 
|  | #	reconfigure - calls scontrol reconfigure | 
|  | # | 
|  | # SYNOPSIS | 
|  | #	reconfigure ?options? ?cluster? | 
|  | # | 
|  | # DESCRIPTION | 
|  | #	Calls scontrol reconfigure. This routine takes the same options as | 
|  | #	run_command, passing them to the underlying run_command invocation. | 
|  | #	This command waits an additional 5 seconds before returning. | 
|  | # | 
|  | # OPTIONS | 
|  | #	See OPTIONS of run_command proc. | 
|  | # | 
|  | # ARGUMENTS | 
|  | #	cluster | 
|  | #		The cluster to reconfigure | 
|  | # | 
|  | # RETURN VALUE | 
|  | #	RETURN_SUCCESS on success, otherwise RETURN_ERROR | 
|  | # | 
|  | ################################################################ | 
|  |  | 
|  | proc reconfigure args { | 
|  | global scontrol | 
|  |  | 
|  | set options [list] | 
|  | set cluster "" | 
|  |  | 
|  | while {[llength $args]} { | 
|  | switch -glob -- [lindex $args 0] { | 
|  | -* { | 
|  | lappend options {*}[lrange $args 0 1] | 
|  | set args [lrange $args 2 end] | 
|  | } | 
|  | default break | 
|  | } | 
|  | } | 
|  | set argument_count [llength $args] | 
|  | if {$argument_count > 1}  { | 
|  | fail "Too many arguments ($argument_count): $args" | 
|  | } elseif {$argument_count == 1} { | 
|  | lassign $args cluster | 
|  | } | 
|  |  | 
|  | set command $scontrol | 
|  | if {$cluster ne ""} { | 
|  | append command " -M$cluster" | 
|  | } | 
|  | append command " reconfigure" | 
|  | set rc [run_command_status {*}$options "$command"] | 
|  |  | 
|  | # | 
|  | # Wait 5 seconds for reconfigure to complete, then return. | 
|  | # | 
|  | sleep 5 | 
|  | return $rc | 
|  | } | 
|  |  | 
|  |  | 
|  | ##################################################################### | 
|  | # | 
|  | # NAME | 
|  | #	log_fatal - prints a fatal message | 
|  | # | 
|  | # SYNOPSIS | 
|  | #	log_fatal message | 
|  | # | 
|  | # SEE ALSO | 
|  | #	_log_format for options governing the message format and colorization | 
|  | # | 
|  | ##################################################################### | 
|  |  | 
|  | proc log_fatal {message} { | 
|  | global testsuite_log_level LOG_LEVEL_FATAL | 
|  |  | 
|  | if {$testsuite_log_level >= $LOG_LEVEL_FATAL} { | 
|  | _log_format "fatal" "$message" | 
|  | } | 
|  | } | 
|  |  | 
|  |  | 
|  | ##################################################################### | 
|  | # | 
|  | # NAME | 
|  | #	log_error - prints an error message | 
|  | # | 
|  | # SYNOPSIS | 
|  | #	log_error message | 
|  | # | 
|  | # SEE ALSO | 
|  | #	_log_format for options governing the message format and colorization | 
|  | # | 
|  | ##################################################################### | 
|  |  | 
|  | proc log_error {message} { | 
|  | global testsuite_log_level LOG_LEVEL_ERROR | 
|  |  | 
|  | if {$testsuite_log_level >= $LOG_LEVEL_ERROR} { | 
|  | _log_format "error" "$message" | 
|  | } | 
|  | } | 
|  |  | 
|  |  | 
|  | ##################################################################### | 
|  | # | 
|  | # NAME | 
|  | #	log_warn - prints a warning message | 
|  | # | 
|  | # SYNOPSIS | 
|  | #	log_warn message | 
|  | # | 
|  | # SEE ALSO | 
|  | #	_log_format for options governing the message format and colorization | 
|  | # | 
|  | ##################################################################### | 
|  |  | 
|  | proc log_warn {message} { | 
|  | global testsuite_log_level LOG_LEVEL_WARNING | 
|  |  | 
|  | if {$testsuite_log_level >= $LOG_LEVEL_WARNING} { | 
|  | _log_format "warning" "$message" | 
|  | } | 
|  | } | 
|  |  | 
|  |  | 
|  | ##################################################################### | 
|  | # | 
|  | # NAME | 
|  | #	log_info - prints an information message | 
|  | # | 
|  | # SYNOPSIS | 
|  | #	log_info message | 
|  | # | 
|  | # SEE ALSO | 
|  | #	_log_format for options governing the message format and colorization | 
|  | # | 
|  | ##################################################################### | 
|  |  | 
|  | proc log_info {message} { | 
|  | global testsuite_log_level LOG_LEVEL_INFO | 
|  |  | 
|  | if {$testsuite_log_level >= $LOG_LEVEL_INFO} { | 
|  | _log_format "info" "$message" | 
|  | } | 
|  | } | 
|  |  | 
|  |  | 
|  | ##################################################################### | 
|  | # | 
|  | # NAME | 
|  | #	log_pass - prints a pass level message | 
|  | # | 
|  | # SYNOPSIS | 
|  | #	log_pass message | 
|  | # | 
|  | # SEE ALSO | 
|  | #	_log_format for options governing the message format and colorization | 
|  | # | 
|  | ##################################################################### | 
|  |  | 
|  | proc log_pass {message} { | 
|  | global testsuite_log_level LOG_LEVEL_PASS | 
|  |  | 
|  | if {$testsuite_log_level >= $LOG_LEVEL_PASS} { | 
|  | _log_format "pass" "$message" | 
|  | } | 
|  | } | 
|  |  | 
|  |  | 
|  | ##################################################################### | 
|  | # | 
|  | # NAME | 
|  | #	log_command - prints a command level message | 
|  | # | 
|  | # SYNOPSIS | 
|  | #	log_command message | 
|  | # | 
|  | # SEE ALSO | 
|  | #	_log_format for options governing the message format and colorization | 
|  | # | 
|  | ##################################################################### | 
|  |  | 
|  | proc log_command {message} { | 
|  | global testsuite_log_level LOG_LEVEL_COMMAND | 
|  |  | 
|  | if {$testsuite_log_level >= $LOG_LEVEL_COMMAND} { | 
|  | _log_format "command" "$message" | 
|  | } | 
|  | } | 
|  |  | 
|  |  | 
|  | ##################################################################### | 
|  | # | 
|  | # NAME | 
|  | #	log_debug - prints a debug level message | 
|  | # | 
|  | # SYNOPSIS | 
|  | #	log_debug message | 
|  | # | 
|  | # SEE ALSO | 
|  | #	_log_format for options governing the message format and colorization | 
|  | # | 
|  | ##################################################################### | 
|  |  | 
|  | proc log_debug {message} { | 
|  | global testsuite_log_level LOG_LEVEL_DEBUG | 
|  |  | 
|  | if {$testsuite_log_level >= $LOG_LEVEL_DEBUG} { | 
|  | _log_format "debug" "$message" | 
|  | } | 
|  | } | 
|  |  | 
|  |  | 
|  | ##################################################################### | 
|  | # | 
|  | # NAME | 
|  | #	log_trace - prints a trace level message | 
|  | # | 
|  | # SYNOPSIS | 
|  | #	log_trace message | 
|  | # | 
|  | # SEE ALSO | 
|  | #	_log_format for options governing the message format and colorization | 
|  | # | 
|  | ##################################################################### | 
|  |  | 
|  | proc log_trace {message} { | 
|  | global testsuite_log_level LOG_LEVEL_TRACE | 
|  |  | 
|  | if {$testsuite_log_level >= $LOG_LEVEL_TRACE} { | 
|  | _log_format "trace" "$message" | 
|  | } | 
|  | } | 
|  |  | 
|  |  | 
|  | ################################################################ | 
|  | # | 
|  | # NAME | 
|  | #	in_fed - checks whether this cluster is in a federation | 
|  | # | 
|  | # SYNOPSIS | 
|  | #	in_fed | 
|  | # | 
|  | # RETURN VALUE | 
|  | #	Returns true if this cluster is in a federation, false otherwise | 
|  | # | 
|  | ################################################################ | 
|  |  | 
|  | proc in_fed {} { | 
|  | global scontrol | 
|  |  | 
|  | set output [run_command_output -fail -nolog "$scontrol show fed"] | 
|  | if {[regexp "Federation" $output]} { | 
|  | return true | 
|  | } | 
|  |  | 
|  | return false | 
|  | } | 
|  |  | 
|  |  | 
|  | ################################################################ | 
|  | # | 
|  | # NAME | 
|  | #	check_job_state - checks if the state of a job is the expected one | 
|  | # | 
|  | # SYNOPSIS | 
|  | #	check_job_state job state ?het_job? | 
|  | # | 
|  | # DESCRIPTION | 
|  | #	Checks if the state of a job is the expected one. | 
|  | # | 
|  | # ARGUMENTS | 
|  | #	job | 
|  | #		Job ID to check | 
|  | #	state | 
|  | #		Desired state of the job to match | 
|  | #	het_job | 
|  | #		If set, checks state of each component job if the | 
|  | #		job is a hetjob. | 
|  | # | 
|  | # RETURN VALUE | 
|  | #	true if job was on the desired state, or the number of job components | 
|  | #	on that state if it's a hetjob and het_job option enabled, false | 
|  | #	otherwise. | 
|  | # | 
|  | ################################################################ | 
|  |  | 
|  | proc check_job_state { job state {het_job 0}} { | 
|  | global scontrol | 
|  |  | 
|  | set jid_list "" | 
|  | if { $het_job } { | 
|  | set jid_list [get_het_job_ids $job 1] | 
|  | } | 
|  |  | 
|  | if { $jid_list == "" } { | 
|  | # non-het job | 
|  | set jid_list $job | 
|  | } | 
|  |  | 
|  | foreach jid $jid_list { | 
|  | set state_match 0 | 
|  | spawn $scontrol show job $jid | 
|  | expect { | 
|  | -re "JobState=($state)" { | 
|  | incr state_match | 
|  | } | 
|  | timeout { | 
|  | fail "scontrol not responding" | 
|  | } | 
|  | eof { | 
|  | wait | 
|  | } | 
|  | } | 
|  |  | 
|  | if {$state_match != 1} { | 
|  | log_error "job $jid should be in $state state, but is not" | 
|  | return false | 
|  | } | 
|  | } | 
|  |  | 
|  | return true | 
|  | } | 
|  |  | 
|  |  | 
|  | ################################################################ | 
|  | # | 
|  | # NAME | 
|  | #	get_gres_count - returns a dict of nodes and GRES counts | 
|  | # | 
|  | # SYNOPSIS | 
|  | #	get_gres_count gres_name ?node_list? | 
|  | # | 
|  | # DESCRIPTION | 
|  | #	Returns a dict of node names and the count of a specifed | 
|  | #	GRES aggregating all its types on each node. | 
|  | # | 
|  | # RETURN VALUE | 
|  | #	If the node_list is not specified node name is specified, | 
|  | #	this function will return a dict with the GRES count for all | 
|  | #	the nodes of the default partition. | 
|  | #	If specified, a dict only with the nodes of the node_list. | 
|  | # | 
|  | ################################################################ | 
|  |  | 
|  | proc get_gres_count { gres_name {node_list ""} } { | 
|  | set nodes_dict [get_nodes $node_list] | 
|  | set nodes_gres_dict [dict create] | 
|  | dict for {node_name node_dict} $nodes_dict { | 
|  | if [dict exists $node_dict "Gres"] { | 
|  | set gres_param [dict get $node_dict "Gres"] | 
|  | set gres_dict  [count_gres $gres_param] | 
|  | } | 
|  | if [dict exists $gres_dict $gres_name] { | 
|  | set gres_count [dict get $gres_dict $gres_name] | 
|  | dict set nodes_gres_dict $node_name $gres_count | 
|  | } | 
|  | } | 
|  | return $nodes_gres_dict | 
|  | } | 
|  |  | 
|  |  | 
|  | ################################################################ | 
|  | # | 
|  | # NAME | 
|  | #	count_gres - returns a dict of GRES names and their total counts | 
|  | # | 
|  | # SYNOPSIS | 
|  | #	count_gres gres_param | 
|  | # | 
|  | # DESCRIPTION | 
|  | #	Parses a GRES parameter string typically obtained from nodes or | 
|  | #	jobs info, and returns a dict of GRES names and their count | 
|  | #	aggregating all the types of each GRES. | 
|  | # | 
|  | # ARGUMENTS | 
|  | #	gres_param | 
|  | #		The usual coma-separated list of Gres (e.g gpu:2,craynetwork:1). | 
|  | #	only_consumable | 
|  | #		If true, the no_consume gres are ignored. | 
|  | # | 
|  | # RETURN VALUE | 
|  | #	A dict of GRES names and their count aggregating all types of | 
|  | #	each GRES. | 
|  | # | 
|  | ################################################################ | 
|  |  | 
|  | proc count_gres {gres_param {only_consumable false}} { | 
|  | global gres_regex | 
|  | set gres_dict [dict create] | 
|  | foreach gres [split $gres_param ","] { | 
|  | if {[regexp $gres_regex $gres {} name type nc count] == 1} { | 
|  | if {$only_consumable && ($type eq "no_consume" || $nc eq "no_consume")} { | 
|  | continue | 
|  | } elseif {$nc eq ""} { | 
|  | set count $type | 
|  | } elseif {$count eq ""} { | 
|  | set count $nc | 
|  | } | 
|  |  | 
|  | if {[dict exists $gres_dict $name]} { | 
|  | dict set gres_dict $name [expr [dict get $gres_dict $name] + $count] | 
|  | } else { | 
|  | dict set gres_dict $name $count | 
|  | } | 
|  | } | 
|  | } | 
|  | return $gres_dict | 
|  | } | 
|  |  | 
|  | ################################################################ | 
|  | # | 
|  | # NAME | 
|  | #	get_highest_gres_count - returns highest number of GRES per node on node_count nodes | 
|  | # | 
|  | # SYNOPSIS | 
|  | #	get_highest_gres_count node_count gres_name | 
|  | # | 
|  | # DESCRIPTION | 
|  | #	For a given number of nodes, returns the highest GRES count per | 
|  | #	node available on at least that number of nodes. | 
|  | # | 
|  | # EXAMPLE | 
|  | #	For example: node1 has 1 GPU, node2 has 2 GPUs and node3 has 3 GPUs | 
|  | #	[get_highest_gres_count 1 "gpu"] returns 3 (i.e. 1 node 3 GPUs) | 
|  | #	[get_highest_gres_count 2 "gpu"] returns 2 (i.e. 2 nodes have at least 2 GPUs each) | 
|  | #	[get_highest_gres_count 3 "gpu"] returns 1 (i.e. 3 nodes have at least 1 GPU each) | 
|  | # | 
|  | ################################################################ | 
|  |  | 
|  | proc get_highest_gres_count { node_count gres_name } { | 
|  |  | 
|  | set available_nodes [node_list_to_range [get_nodes_by_state]] | 
|  | set gres_dict [get_gres_count $gres_name $available_nodes] | 
|  | set gres_count [list] | 
|  |  | 
|  | dict for {node gres} $gres_dict { | 
|  | lappend gres_count $gres | 
|  | } | 
|  |  | 
|  | set count [lindex [lsort -decreasing -integer $gres_count] [expr $node_count - 1]] | 
|  | return $count | 
|  | } | 
|  |  | 
|  |  | 
|  | ################################################################ | 
|  | # | 
|  | # NAME | 
|  | #	_set_gpu_socket_inx - adds a socket index to the gpu_sock_list if not already on it | 
|  | # | 
|  | # SYNOPSIS | 
|  | #	_set_gpu_socket_inx sock_inx | 
|  | # | 
|  | # DESCRIPTION | 
|  | #	Add a socket index to the array gpu_sock_list if not already | 
|  | #	on the list. Subroutine used by get_gpu_socket_count | 
|  | # | 
|  | ################################################################ | 
|  |  | 
|  | proc _set_gpu_socket_inx { sock_inx } { | 
|  | global gpu_sock_list | 
|  |  | 
|  | if {$sock_inx == -1} { | 
|  | set gpu_sock_list [lreplace $gpu_sock_list 0 99] | 
|  | return | 
|  | } | 
|  |  | 
|  | set sock_cnt [llength $gpu_sock_list] | 
|  | for {set i 0} {$i < $sock_cnt} {incr i} { | 
|  | if {[lindex $gpu_sock_list $i] == $sock_inx} { | 
|  | return | 
|  | } | 
|  | } | 
|  | lappend gpu_sock_list $sock_inx | 
|  | } | 
|  |  | 
|  |  | 
|  | ################################################################ | 
|  | # Subroutine used by get_gpu_socket_count | 
|  | # Add a socket index to the array gpu_sock_list if not already | 
|  | # on the list. | 
|  | ################################################################ | 
|  |  | 
|  | proc _set_gpu_socket_range { sock_first_inx sock_last_inx } { | 
|  | global gpu_sock_list | 
|  |  | 
|  | set sock_cnt [llength $gpu_sock_list] | 
|  | for {set s $sock_first_inx} {$s <= $sock_last_inx} {incr s} { | 
|  | set found 0 | 
|  | for {set i 0} {$i < $sock_cnt} {incr i} { | 
|  | if {[lindex $gpu_sock_list $i] == $s} { | 
|  | set found 1 | 
|  | break | 
|  | } | 
|  | } | 
|  | if {$found == 0} { | 
|  | lappend gpu_sock_list $s | 
|  | } | 
|  | } | 
|  | } | 
|  |  | 
|  |  | 
|  | ################################################################ | 
|  | # | 
|  | # NAME | 
|  | #	get_gpu_socket_count - returns the number of sockets with GPUS on a node with the given per-node GPU count | 
|  | # | 
|  | # SYNOPSIS | 
|  | #	get_gpu_socket_count gpu_cnt sockets_per_node | 
|  | # | 
|  | # DESCRIPTION | 
|  | #	Given a per-node GPU count, return the number of sockets with | 
|  | #	GPUs on a node with the given per-node GPU count. | 
|  | #	If the sockets_per_node has a value of 1 then just return 1 | 
|  | #	rather than determine the count (for performance reasons). | 
|  | # | 
|  | ################################################################ | 
|  |  | 
|  | proc get_gpu_socket_count { gpu_cnt sockets_per_node } { | 
|  | global test_dir re_word_str bin_rm number scontrol srun gpu_sock_list | 
|  |  | 
|  | set sockets_with_gpus 1 | 
|  | set file_in "$test_dir/test_get_gpu_socket_count" | 
|  |  | 
|  | if {$sockets_per_node == 1} { | 
|  | return 1 | 
|  | } | 
|  |  | 
|  | log_user 0 | 
|  | _set_gpu_socket_inx -1 | 
|  | make_bash_script $file_in "$scontrol show node \$SLURMD_NODENAME" | 
|  | spawn $srun -N1 --gres=gpu:$gpu_cnt $file_in | 
|  | expect { | 
|  | -re "gpu:${number}.S:($number)-($number)" { | 
|  | _set_gpu_socket_range $expect_out(1,string) $expect_out(2,string) | 
|  | exp_continue | 
|  | } | 
|  | -re "gpu:${re_word_str}:${number}.S:($number),($number),($number),($number)" { | 
|  | _set_gpu_socket_inx $expect_out(1,string) | 
|  | _set_gpu_socket_inx $expect_out(2,string) | 
|  | _set_gpu_socket_inx $expect_out(3,string) | 
|  | _set_gpu_socket_inx $expect_out(4,string) | 
|  | exp_continue | 
|  | } | 
|  | -re "gpu:${re_word_str}:${number}.S:($number),($number),($number)" { | 
|  | _set_gpu_socket_inx $expect_out(1,string) | 
|  | _set_gpu_socket_inx $expect_out(2,string) | 
|  | _set_gpu_socket_inx $expect_out(3,string) | 
|  | exp_continue | 
|  | } | 
|  | -re "gpu:${re_word_str}:${number}.S:($number),($number)" { | 
|  | _set_gpu_socket_inx $expect_out(1,string) | 
|  | _set_gpu_socket_inx $expect_out(2,string) | 
|  | exp_continue | 
|  | } | 
|  | -re "gpu:${re_word_str}:${number}.S:($number)" { | 
|  | _set_gpu_socket_inx $expect_out(1,string) | 
|  | exp_continue | 
|  | } | 
|  | -re "gpu:${number}.S:($number),($number),($number),($number)" { | 
|  | _set_gpu_socket_inx $expect_out(1,string) | 
|  | _set_gpu_socket_inx $expect_out(2,string) | 
|  | _set_gpu_socket_inx $expect_out(3,string) | 
|  | _set_gpu_socket_inx $expect_out(4,string) | 
|  | exp_continue | 
|  | } | 
|  | -re "gpu:${number}.S:($number),($number),($number)" { | 
|  | _set_gpu_socket_inx $expect_out(1,string) | 
|  | _set_gpu_socket_inx $expect_out(2,string) | 
|  | _set_gpu_socket_inx $expect_out(3,string) | 
|  | exp_continue | 
|  | } | 
|  | -re "gpu:${number}.S:($number),($number)" { | 
|  | _set_gpu_socket_inx $expect_out(1,string) | 
|  | _set_gpu_socket_inx $expect_out(2,string) | 
|  | exp_continue | 
|  | } | 
|  | -re "gpu:${number}.S:($number)" { | 
|  | _set_gpu_socket_inx $expect_out(1,string) | 
|  | exp_continue | 
|  | } | 
|  | eof { | 
|  | wait | 
|  | } | 
|  | } | 
|  | log_user 1 | 
|  |  | 
|  | set sock_cnt [llength $gpu_sock_list] | 
|  | if {$sock_cnt > 1} { | 
|  | set sockets_with_gpus $sock_cnt | 
|  | } | 
|  |  | 
|  | return $sockets_with_gpus | 
|  | } | 
|  |  | 
|  |  | 
|  | ################################################################ | 
|  | # | 
|  | # NAME | 
|  | #	get_highest_mps_count - get_highest_gres_count nodes mps, but for "mps per GPU" | 
|  | # | 
|  | # SYNOPSIS | 
|  | #	get_highest_mps_count node_count | 
|  | # | 
|  | # DESCRIPTION | 
|  | #	For a given number of nodes, returns the higest number of MPS per GPU | 
|  | #	available at least on those number of nodes. | 
|  | # | 
|  | ################################################################ | 
|  |  | 
|  | proc get_highest_mps_count { node_count } { | 
|  | # We cannot use get_highest_gres_count because we need "per gpu", | 
|  | # so we get all the mps per node and all gpus per node, to create | 
|  | # a mps_per_gpu list to sort and get the count. | 
|  | set available_nodes [node_list_to_range [get_nodes_by_state]] | 
|  | set mps_dict [get_gres_count "mps" $available_nodes] | 
|  | set gpu_dict [get_gres_count "gpu" $available_nodes] | 
|  | set mps_per_gpu [list] | 
|  |  | 
|  | dict for {node mps} $mps_dict { | 
|  | if { $mps > 0 } { | 
|  | if [dict exists $gpu_dict $node] { | 
|  | set gpu [dict get $gpu_dict $node] | 
|  | if { $gpu > 0 } { | 
|  | lappend mps_per_gpu [expr $mps / $gpu] | 
|  | } else { | 
|  | fail "All nodes with MPS should have a GPU" | 
|  | } | 
|  | } else { | 
|  | fail "All nodes with MPS should have a GPU" | 
|  | } | 
|  | } | 
|  | } | 
|  |  | 
|  | set count [lindex [lsort -decreasing -integer $mps_per_gpu] [expr $node_count - 1]] | 
|  | return $count | 
|  | } | 
|  |  | 
|  |  | 
|  | ################################################################ | 
|  | # | 
|  | # NAME | 
|  | #	get_mps_node_count - gets the number of nodes with a positive number of GRES MPS | 
|  | # | 
|  | # SYNOPSIS | 
|  | #	get_mps_node_count | 
|  | # | 
|  | # RETURN VALUE | 
|  | #	Return the count of nodes with a non-zero count of GRES MPS | 
|  | # | 
|  | ################################################################ | 
|  |  | 
|  | proc get_mps_node_count { } { | 
|  | global number sinfo re_word_str | 
|  | set fini 0 | 
|  | set node_inx 0 | 
|  | set def_part [default_partition] | 
|  |  | 
|  | log_user 0 | 
|  | spawn $sinfo -N -p$def_part -oGRES=%G -h | 
|  | expect { | 
|  | -re "GRES=($re_word_str)" { | 
|  | set mps_count 0 | 
|  | set parts [split $expect_out(1,string) ",/"] | 
|  | while 1 { | 
|  | set mps_found [lsearch $parts "mps*"] | 
|  | if { $mps_found == -1 } break | 
|  |  | 
|  | set parts2 [split [lindex $parts $mps_found] ":(/"] | 
|  | set col [lsearch -regexp $parts2 ^$number$] | 
|  | if { $col == -1 } { | 
|  | incr mps_count | 
|  | } else { | 
|  | set mps_count [expr $mps_count + [lindex $parts2 $col]] | 
|  | } | 
|  | set parts [lreplace $parts $mps_found $mps_found] | 
|  | } | 
|  |  | 
|  | if {$mps_count > 0} { | 
|  | incr node_inx | 
|  | } | 
|  | exp_continue | 
|  | } | 
|  | eof { | 
|  | wait | 
|  | } | 
|  | } | 
|  | log_user 1 | 
|  |  | 
|  | return $node_inx | 
|  | } | 
|  |  | 
|  |  | 
|  | ################################################################ | 
|  | # | 
|  | # NAME | 
|  | #	cuda_count - determines the count of allocated GPUs | 
|  | # | 
|  | # SYNOPSIS | 
|  | #	cuda_count cuda_string | 
|  | # | 
|  | # ARGUMENTS | 
|  | #	cuda_string | 
|  | #		Contents of a CUDA_VISIBLE_DEVICES environment variable | 
|  | # | 
|  | # RETURN VALUE | 
|  | #	Return the number of GPUs or -1 on error | 
|  | # | 
|  | ################################################################ | 
|  |  | 
|  | proc cuda_count { cuda_string } { | 
|  | set cuda_count 0 | 
|  | set has_number 0 | 
|  | set len [string length $cuda_string] | 
|  | for {set char_inx 0} {$char_inx < $len} {incr char_inx} { | 
|  | set cuda_char [string index $cuda_string $char_inx] | 
|  | if {[string match , $cuda_char]} { | 
|  | if {$has_number > 0} { | 
|  | incr cuda_count | 
|  | set has_number 0 | 
|  | } else { | 
|  | log_error "Invalid input ($cuda_string)" | 
|  | return -1 | 
|  | } | 
|  | } elseif {[string is digit $cuda_char]} { | 
|  | set has_number 1 | 
|  | } | 
|  | } | 
|  | if {$has_number > 0} { | 
|  | incr cuda_count | 
|  | } else { | 
|  | log_error "Invalid input ($cuda_string)" | 
|  | return -1 | 
|  | } | 
|  | return $cuda_count | 
|  | } | 
|  |  | 
|  |  | 
|  | ################################################################ | 
|  | # NAME | 
|  | #	get_conf_path - gets the path to the slurm.conf file | 
|  | # | 
|  | # SYNOPSIS | 
|  | #	get_conf_path | 
|  | # | 
|  | # RETURN VALUE | 
|  | #	Returns the path to the slurm.conf file | 
|  | # | 
|  | ################################################################ | 
|  |  | 
|  | proc get_conf_path { } { | 
|  | global scontrol re_word_str eol | 
|  |  | 
|  | if [regexp {(.*)/slurm.conf} [get_config_param "SLURM_CONF"] {} config_dir] { | 
|  | return $config_dir | 
|  | } else { | 
|  | fail "Unable to determine config dir" | 
|  | } | 
|  | } | 
|  |  | 
|  |  | 
|  | ################################################################ | 
|  | # | 
|  | # NAME | 
|  | #	save_conf - saves a backup of the specfied configuration file | 
|  | # | 
|  | # SYNOPSIS | 
|  | #	save_conf file_name | 
|  | # | 
|  | # DESCRIPTION | 
|  | #	If the specified file_name exists, a backup is made which will be | 
|  | #	restored when restore_conf is called. | 
|  | #	If the specified file_name does not exist, a special backup will be | 
|  | #	made that will cause the file to be removed when restore_conf is | 
|  | #	called. | 
|  | #	If a backup already exists, a warning is issued and no backup is made | 
|  | #	(honoring the existing backup). | 
|  | # | 
|  | # SEE ALSO | 
|  | #	restore_conf | 
|  | # | 
|  | ################################################################ | 
|  |  | 
|  | proc save_conf { file_name } { | 
|  | global bin_chmod bin_cp bin_mv bin_touch test_name | 
|  |  | 
|  | log_debug "Saving backup of $file_name" | 
|  |  | 
|  | # | 
|  | # Check for existing backup | 
|  | # If a backup exists, issue a warning and return (honor existing backup) | 
|  | # | 
|  | set conf_dir [file dirname $file_name] | 
|  | set dir_files [glob -nocomplain -directory $conf_dir *] | 
|  | set preexisting_backup_file [lsearch -inline -regexp $dir_files "$file_name\\\.test\\d+\\\.\\d+\$"] | 
|  | if {$preexisting_backup_file ne ""} { | 
|  | log_warn "Backup file already exists: ($preexisting_backup_file)" | 
|  | return | 
|  | } | 
|  |  | 
|  | # | 
|  | # Check if file to backup exists. | 
|  | # If it doesn't exist, warn the user, touch an empty backup file with | 
|  | # the sticky bit set and allow the test to continue. | 
|  | # restore_conf will remove the file. | 
|  | # | 
|  | set new_backup_file "$file_name.$test_name" | 
|  | if {![file exists $file_name]} { | 
|  | log_warn "Backup of a nonexistent file requested: $file_name" | 
|  | run_command -fail -nolog "$bin_touch $new_backup_file" | 
|  | run_command -fail -nolog "$bin_chmod +t $new_backup_file" | 
|  | return | 
|  | } | 
|  |  | 
|  | run_command -fail -nolog "$bin_mv $file_name $new_backup_file" | 
|  | run_command -fail -nolog "$bin_cp $new_backup_file $file_name" | 
|  | } | 
|  |  | 
|  |  | 
|  | ################################################################ | 
|  | # | 
|  | # NAME | 
|  | #	restore_conf - restores the original confiration file from backup | 
|  | # | 
|  | # SYNOPSIS | 
|  | #	restore_conf file_name | 
|  | # | 
|  | # DESCRIPTION | 
|  | #	If a backup exists for the specified file_name, it is restored. | 
|  | #	If the specified file_name did not exist when originally backed up, | 
|  | #	it will be removed. | 
|  | #	If no backup exists, a warning is issued. | 
|  | # | 
|  | # SEE ALSO | 
|  | #	save_conf | 
|  | # | 
|  | ################################################################ | 
|  |  | 
|  | proc restore_conf { file_name } { | 
|  | global bin_mv bin_rm | 
|  |  | 
|  | log_debug "Restoring backup of $file_name" | 
|  |  | 
|  | set conf_dir [file dirname $file_name] | 
|  | set dir_files [glob -nocomplain -directory $conf_dir *] | 
|  | set backup_file [lsearch -inline -regexp $dir_files "$file_name\\\.test\\d+\\\.\\d+\$"] | 
|  | if {$backup_file ne ""} { | 
|  | file stat $backup_file stat | 
|  |  | 
|  | # If the sticky bit is set and the file is empty, remove both | 
|  | if {! $stat(size) && [expr $stat(mode) & 512]} { | 
|  | log_debug "Removing file used for the test: $file_name" | 
|  | run_command -fail -nolog "$bin_rm -f $backup_file $file_name" | 
|  | # Else replace the original with the backup | 
|  | } else { | 
|  | run_command -fail -nolog "$bin_mv $backup_file $file_name" | 
|  | } | 
|  | } else { | 
|  | # | 
|  | # If backup file doesn't exist, it has probably already been | 
|  | # restored by a previous call to restore_conf | 
|  | # | 
|  | log_warn "Backup file does not exist for $file_name. It has probably already been restored" | 
|  | return | 
|  | } | 
|  | } | 
|  |  | 
|  |  | 
|  | ################################################################ | 
|  | # | 
|  | # NAME | 
|  | #	have_nvml - checks if HAVE_NVML is set in config.h | 
|  | # | 
|  | # SYNOPSIS | 
|  | #	have_nvml | 
|  | # | 
|  | # RETURN VALUE | 
|  | #	Returns true if HAVE_NVML is set in config.h. Else, returns false | 
|  | # | 
|  | ################################################################ | 
|  |  | 
|  | proc have_nvml { } { | 
|  | global bin_grep number config_h | 
|  |  | 
|  | return [expr [run_command_status -none -nolog "$bin_grep \"HAVE_NVML 1\" $config_h"] == 0] | 
|  | } | 
|  |  | 
|  |  | 
|  | ################################################################ | 
|  | # | 
|  | # NAME | 
|  | #	delete_part - deletes partition on system | 
|  | # | 
|  | # SYNOPSIS | 
|  | #	delete_part partition | 
|  | # | 
|  | ################################################################ | 
|  |  | 
|  | proc delete_part { part_name } { | 
|  | global scontrol | 
|  |  | 
|  | # Remove part | 
|  | spawn $scontrol delete partition=$part_name | 
|  | expect { | 
|  | timeout { | 
|  | fail "scontrol is not responding" | 
|  | } | 
|  | eof { | 
|  | wait | 
|  | } | 
|  | } | 
|  | } | 
|  |  | 
|  |  | 
|  | ################################################################ | 
|  | # | 
|  | # NAME | 
|  | #	have_lua - checks if HAVE_LUA is set in config.h | 
|  | # | 
|  | # SYNOPSIS | 
|  | #	have_lua | 
|  | # | 
|  | # RETURN VALUE | 
|  | #	Returns true if HAVE_LUA is set in config.h. Else, returns false | 
|  | # | 
|  | ################################################################ | 
|  |  | 
|  | proc have_lua { } { | 
|  | global bin_grep config_h | 
|  |  | 
|  | return [expr [run_command_status -none -nolog "$bin_grep HAVE_LUA $config_h"] == 0] | 
|  | } | 
|  |  | 
|  |  | 
|  | ################################################################ | 
|  | # | 
|  | # NAME | 
|  | #	get_reservations - returns a dictionary of dictionaries of reservation parameters | 
|  | # | 
|  | # SYNOPSIS | 
|  | #	get_reservations ?resv_name? | 
|  | # | 
|  | # RETURN VALUE | 
|  | #	Uses `scontrol show reservation` to return a dictionary of dictionaries | 
|  | #	of job parameters. Specifying an invalid resv_name result in a failure. | 
|  | # | 
|  | ################################################################ | 
|  |  | 
|  | proc get_reservations { {resv_name ""} } { | 
|  | global scontrol | 
|  |  | 
|  | set command "$scontrol show reservation -o" | 
|  | if {$resv_name ne ""} { | 
|  | append command " $resv_name" | 
|  | } | 
|  | set output [run_command_output -fail "$command"] | 
|  |  | 
|  | # Iterate over each reservation's parameter list | 
|  | foreach line [split $output "\n"] { | 
|  | if {$line eq ""} { continue } | 
|  |  | 
|  | # Peel off the resv parameters one at a time | 
|  | # The first quantifier sets the greediness for the whole RE | 
|  | while {[regexp {^ *?([^ =]+)=(.*)(?= +[^ =]+=| *$)} $line {} param_name param_value]} { | 
|  | # Remove the consumed parameter from the line | 
|  | set line [regsub {^ *?[^ =]+=.*(?= +[^ =]+=| *$)} $line {}] | 
|  | # Add it to the temporary job dictionary | 
|  | dict set resv_dict $param_name $param_value | 
|  | } | 
|  |  | 
|  | set resv_name_dict [dict get $resv_dict "ReservationName"] | 
|  |  | 
|  | # Add the resv dictionary to resvs dictionary | 
|  | dict set resvs_dict $resv_name_dict $resv_dict | 
|  |  | 
|  | # Clear the resv dictionary for the next resv | 
|  | set resv_dict {} | 
|  | } | 
|  |  | 
|  | return $resvs_dict | 
|  | } | 
|  |  | 
|  |  | 
|  | ################################################################ | 
|  | # | 
|  | # NAME | 
|  | #	get_reservation_param - returns a specific parameter value for a specific reservation | 
|  | # | 
|  | # SYNOPSIS | 
|  | #	get_reservation_param resv_name parameter_name | 
|  | # | 
|  | # DESCRIPTION | 
|  | #	Returns a specific parameter value for a specified reservation if the | 
|  | #	parameter exists for the reservation, or MISSING if it does not exist. | 
|  | #	Specifying an invalid reservation name will result in a failure. | 
|  | # | 
|  | ################################################################ | 
|  |  | 
|  | proc get_reservation_param { resv_name parameter_name } { | 
|  |  | 
|  | set resvs_dict [get_reservations $resv_name] | 
|  |  | 
|  | if [dict exists $resvs_dict $resv_name $parameter_name] { | 
|  | return [dict get $resvs_dict $resv_name $parameter_name] | 
|  | } else { | 
|  | return "MISSING" | 
|  | } | 
|  | } | 
|  |  | 
|  |  | 
|  | ################################################################ | 
|  | # | 
|  | # NAME | 
|  | #	create_res - create new reservation in system | 
|  | # | 
|  | # SYNOPSIS | 
|  | #	create_res ?res_name? ?res_params? | 
|  | # | 
|  | # RETURN VALUE | 
|  | #	the exit code of the scontrol command run | 
|  | # | 
|  | ################################################################ | 
|  |  | 
|  | proc create_res { res_name res_params } { | 
|  | global scontrol | 
|  |  | 
|  | set result   [run_command "$scontrol create res ReservationName=$res_name $res_params"] | 
|  | set output   [dict get $result output] | 
|  | set ret_code [dict get $result exit_code] | 
|  |  | 
|  | if { $ret_code } { | 
|  | log_warn "[lindex [info level 0] 0]: error from scontrol: $output" | 
|  | } else { | 
|  | log_debug "[lindex [info level 0] 0]: success from scontrol: $output" | 
|  | } | 
|  |  | 
|  | return $ret_code | 
|  | } | 
|  |  | 
|  |  | 
|  | ################################################################ | 
|  | # | 
|  | # NAME | 
|  | #	update_res - update exisiting reservation in system | 
|  | # | 
|  | # SYNOPSIS | 
|  | #	update_res ?res_name? ?res_params? | 
|  | # | 
|  | # RETURN VALUE | 
|  | #	the exit code of the scontrol command run | 
|  | # | 
|  | ################################################################ | 
|  |  | 
|  | proc update_res { res_name res_params } { | 
|  | global scontrol | 
|  |  | 
|  | set result   [run_command "$scontrol update ReservationName=$res_name $res_params"] | 
|  | set output   [dict get $result output] | 
|  | set ret_code [dict get $result exit_code] | 
|  |  | 
|  | if { $ret_code } { | 
|  | log_warn "Return code from scontrol: $ret_code. Output: $output" | 
|  | } | 
|  |  | 
|  | return $ret_code | 
|  | } | 
|  |  | 
|  |  | 
|  | ################################################################ | 
|  | # | 
|  | # NAME | 
|  | #	delete_res - delete reservation from system | 
|  | # | 
|  | # SYNOPSIS | 
|  | #	delete_res ?res_name? | 
|  | # | 
|  | # RETURN VALUE | 
|  | #	the exit code of the scontrol command run | 
|  | # | 
|  | ################################################################ | 
|  |  | 
|  | proc delete_res { res_name } { | 
|  | global scontrol | 
|  |  | 
|  | set result   [run_command "$scontrol delete ReservationName=$res_name"] | 
|  | set output   [dict get $result output] | 
|  | set ret_code [dict get $result exit_code] | 
|  |  | 
|  | if { $ret_code } { | 
|  | log_warn "Return code from scontrol: $ret_code. Output: $output" | 
|  | } | 
|  |  | 
|  | return $ret_code | 
|  | } | 
|  |  | 
|  |  | 
|  | ################################################################ | 
|  | # | 
|  | # NAME | 
|  | #	create_part - creates a partition | 
|  | # | 
|  | # SYNOPSIS | 
|  | #	create_part partition num_nodes | 
|  | # | 
|  | # ARGUMENTS | 
|  | #	partition | 
|  | #		Name of partition to create | 
|  | #	num_nodes | 
|  | #		Number of nodes of partition to create | 
|  | # | 
|  | # RETURN VALUE | 
|  | #	RETURN_SUCCESS, or non-zero on error | 
|  | # | 
|  | ################################################################ | 
|  |  | 
|  | proc create_part { part_name num_nodes_in } { | 
|  | global scontrol srun bin_printenv number re_word_str | 
|  |  | 
|  | set nodes "" | 
|  | set num_nodes_out 0 | 
|  |  | 
|  | set found 0 | 
|  | spawn $scontrol show partitionname=$part_name | 
|  | expect { | 
|  | -re "PartitionName=$part_name" { | 
|  | set found 1 | 
|  | exp_continue | 
|  | } | 
|  | timeout { | 
|  | fail "scontrol is not responding" | 
|  | } | 
|  | eof { | 
|  | wait | 
|  | } | 
|  | } | 
|  |  | 
|  | if {$found == 1} { | 
|  | log_error "There is already a partition $part_name" | 
|  | return $::RETURN_ERROR | 
|  | } | 
|  |  | 
|  | if {[string length [default_partition]] == 0} { | 
|  | log_warn "create_part does not work without a default partition" | 
|  | return $::RETURN_ERROR | 
|  | } | 
|  |  | 
|  | if { $num_nodes_in } { | 
|  | set num_nodes $num_nodes_in | 
|  | } else { | 
|  | set num_nodes [llength [get_nodes_by_state]] | 
|  | } | 
|  |  | 
|  | log_user 0 | 
|  | # Get a list of nodes | 
|  | spawn $srun -t1 -N1-$num_nodes $bin_printenv | 
|  | expect { | 
|  | -re "SLURM_JOB_NUM_NODES=($number)" { | 
|  | set num_nodes_out $expect_out(1,string) | 
|  | exp_continue | 
|  | } | 
|  | -re "SLURM_NODELIST=($re_word_str)" { | 
|  | set nodes $expect_out(1,string) | 
|  | exp_continue | 
|  | } | 
|  | timeout { | 
|  | fail "srun is not responding getting number of nodes creating part" | 
|  | } | 
|  | eof { | 
|  | wait | 
|  | } | 
|  | } | 
|  |  | 
|  | if {[string length $nodes] == 0} { | 
|  | log_error "Did not get a valid node list" | 
|  | return $::RETURN_ERROR | 
|  | } elseif { $num_nodes_out != $num_nodes_in } { | 
|  | log_error "Did not get enough nodes ($num_nodes_out != $num_nodes_in) to run test" | 
|  | return $::RETURN_ERROR | 
|  | } | 
|  |  | 
|  | spawn $scontrol create partitionname=$part_name nodes=$nodes | 
|  | expect { | 
|  | timeout { | 
|  | fail "scontrol is not responding creating partition" | 
|  | } | 
|  | eof { | 
|  | wait | 
|  | } | 
|  | } | 
|  |  | 
|  | set found 0 | 
|  | spawn $scontrol show partitionname=$part_name | 
|  | expect { | 
|  | -re "PartitionName=$part_name" { | 
|  | set found 1 | 
|  | exp_continue | 
|  | } | 
|  | timeout { | 
|  | fail "scontrol is not responding" | 
|  | } | 
|  | eof { | 
|  | wait | 
|  | } | 
|  | } | 
|  |  | 
|  | if { $found == 0 } { | 
|  | log_error "scontrol did not create partition $part_name" | 
|  | return $::RETURN_ERROR | 
|  | } | 
|  | log_user 1 | 
|  |  | 
|  | log_debug "Created partition $part_name with $num_nodes_in nodes" | 
|  | return $::RETURN_SUCCESS | 
|  | } | 
|  |  | 
|  |  | 
|  | ################################################################ | 
|  | # | 
|  | # NAME | 
|  | #	get_nodes - returns a dictionary of dictionaries of node parameters | 
|  | # | 
|  | # SYNOPSIS | 
|  | #	get_nodes ?hostlist_expression? | 
|  | # | 
|  | # DESCRIPTION | 
|  | #	Uses `scontrol show node` to query node parameters, returning a | 
|  | #	dictionary of dictionaries with the node names as keys of the first | 
|  | #	level dictionary and with the parameters as keys of the second level | 
|  | #	dictionary. Specifying an invalid node name will result in a failure. | 
|  | # | 
|  | # RETURN VALUE | 
|  | #	If the optional node expression argument is specified, the result will | 
|  | #	be constrained by the specified hostlist expression. Otherwise, the | 
|  | #	results for all nodes will be returned. | 
|  | # | 
|  | ################################################################ | 
|  |  | 
|  | proc get_nodes { {hostlist_expression ""} } { | 
|  | global scontrol | 
|  |  | 
|  | set command "$scontrol show node -o" | 
|  | if {$hostlist_expression ne ""} { | 
|  | append command " $hostlist_expression" | 
|  | } | 
|  | set output [run_command_output -fail -nolog "$command"] | 
|  |  | 
|  | # Iterate over each node parameter line | 
|  | foreach line [split $output "\n"] { | 
|  | if {$line eq ""} { continue } | 
|  |  | 
|  | # Peel off the node parameters one at a time | 
|  | # The first quantifier sets the greediness for the whole RE | 
|  | while {[regexp {^ *?([^ =]+)=(.*)(?= +[^ =]+=| *$)} $line {} param_name param_value]} { | 
|  | # Remove the consumed parameter from the line | 
|  | set line [regsub {^ *?[^ =]+=.*(?= +[^ =]+=| *$)} $line {}] | 
|  | # Add it to the temporary node dictionary | 
|  | dict set node_dict $param_name $param_value | 
|  | } | 
|  |  | 
|  | set node_name [dict get $node_dict "NodeName"] | 
|  |  | 
|  | # Add the node dictionary to nodes dictionary | 
|  | dict set nodes_dict $node_name $node_dict | 
|  |  | 
|  | # Clear the node dictionary for use by the next node | 
|  | set node_dict {} | 
|  | } | 
|  |  | 
|  | return $nodes_dict | 
|  | } | 
|  |  | 
|  |  | 
|  | ################################################################ | 
|  | # | 
|  | # NAME | 
|  | #	get_node_param - returns a specific parameter value for a specific node | 
|  | # | 
|  | # SYNOPSIS | 
|  | #	get_node_param node_name parameter_name | 
|  | # | 
|  | # DESCRIPTION | 
|  | #	Returns a specific parameter value for a specified node if the | 
|  | #	parameter exists for the node, or MISSING if it does not exist. | 
|  | #	Specifying an invalid node name will result in a failure. | 
|  | # | 
|  | ################################################################ | 
|  |  | 
|  | proc get_node_param { node_name parameter_name } { | 
|  |  | 
|  | set nodes_dict [get_nodes $node_name] | 
|  |  | 
|  | if [dict exists $nodes_dict $node_name $parameter_name] { | 
|  | return [dict get $nodes_dict $node_name $parameter_name] | 
|  | } else { | 
|  | return "MISSING" | 
|  | } | 
|  | } | 
|  |  | 
|  |  | 
|  | ################################################################ | 
|  | # | 
|  | # NAME | 
|  | #	get_nodes_by_request - get a list of nodes satisfying requested resources | 
|  | # | 
|  | # SYNOPSIS | 
|  | #	get_nodes_by_request ?options? ?request_args? | 
|  | # | 
|  | # DESCRIPTION | 
|  | #	Using srun (optionally with the specified arguments), returns a list | 
|  | #	of nodes having the requested resources. | 
|  | #	If an error occurs, the invoking test will fail. | 
|  | # | 
|  | # OPTIONS | 
|  | #	-fail | 
|  | #		fail the test if the execution of srun results in an error or timeout | 
|  | # | 
|  | # ARGUMENTS | 
|  | #	request_args | 
|  | #		Desired resources of a node in form of srun arguments, | 
|  | #		e.g. "--gres=gpu:1 -n1 -t1" | 
|  | # | 
|  | # RETURN VALUE | 
|  | #	A list of nodes with at least the requested resources, or an empty | 
|  | #	list otherwise. | 
|  | # | 
|  | ################################################################ | 
|  |  | 
|  | proc get_nodes_by_request args { | 
|  | global srun | 
|  |  | 
|  | set options [list] | 
|  | while {[llength $args]} { | 
|  | switch -glob -- [lindex $args 0] { | 
|  | -fail { | 
|  | lappend options [lrange $args 0 0] | 
|  | set args [lrange $args 1 end] | 
|  | } | 
|  | default break | 
|  | } | 
|  | } | 
|  |  | 
|  | if {[llength $args] == 1} { | 
|  | lassign $args request_args | 
|  | } elseif {[llength $args] == 0} { | 
|  | set request_args "-n1 -t1" | 
|  | } else { | 
|  | fail "[lindex [info level 0] 0]: Invalid number of arguments ([llength $args]): $args" | 
|  | } | 
|  |  | 
|  | log_debug "Getting nodes that can be allocated with request: $request_args" | 
|  | set command "$srun -Q $request_args printenv SLURMD_NODENAME" | 
|  | set result [run_command {*}$options $command] | 
|  |  | 
|  | if [dict get $result exit_code] { | 
|  | return {} | 
|  | } | 
|  |  | 
|  | set output [dict get $result output] | 
|  | foreach line [split $output "\n"] { | 
|  | if {$line eq ""} { continue } | 
|  | dict incr allocated_nodes $line | 
|  | } | 
|  |  | 
|  | return [lsort [dict keys $allocated_nodes]] | 
|  | } | 
|  |  | 
|  |  | 
|  | ################################################################ | 
|  | # | 
|  | # NAME | 
|  | #	get_partitions - returns a dictionary of dictionaries of partition parameters | 
|  | # | 
|  | # SYNOPSIS | 
|  | #	get_partitions ?partition_name? | 
|  | # | 
|  | # DESCRIPTION | 
|  | #	Uses `scontrol show partitions` to query partition parameters, | 
|  | #	returning a dictionary of dictionaries with the partition names | 
|  | #	as keys of the first level dictionary and with the parameters as | 
|  | #	keys of the second level dictionary. Specifying an invalid partition | 
|  | #	name will result in a failure. | 
|  | # | 
|  | # RETURN VALUE | 
|  | #	If the optional partition_name argument is specified, the result will | 
|  | #	contain only the one patition. Otherwise, the results for all | 
|  | #	partitions will be returned. | 
|  | # | 
|  | ################################################################ | 
|  |  | 
|  | proc get_partitions { {partition_name ""} } { | 
|  | global scontrol | 
|  |  | 
|  | set command "$scontrol show partition -o" | 
|  | if {$partition_name ne ""} { | 
|  | append command " $partition_name" | 
|  | } | 
|  | set output [run_command_output -fail -nolog "$command"] | 
|  |  | 
|  | # Iterate over each partition parameter line | 
|  | foreach line [split $output "\n"] { | 
|  | if {$line eq ""} { continue } | 
|  |  | 
|  | # Peel off the partition parameters one at a time | 
|  | # The first quantifier sets the greediness for the whole RE | 
|  | while {[regexp {^ *?([^ =]+)=(.*)(?= +[^ =]+=| *$)} $line {} param_name param_value]} { | 
|  | # Remove the consumed parameter from the line | 
|  | set line [regsub {^ *?[^ =]+=.*(?= +[^ =]+=| *$)} $line {}] | 
|  | # Add it to the temporary node dictionary | 
|  | dict set part_dict $param_name $param_value | 
|  | } | 
|  |  | 
|  | set part_name [dict get $part_dict "PartitionName"] | 
|  |  | 
|  | # Add the node dictionary to nodes dictionary | 
|  | dict set parts_dict $part_name $part_dict | 
|  |  | 
|  | # Clear the node dictionary for use by the next node | 
|  | set part_dict {} | 
|  | } | 
|  |  | 
|  | return $parts_dict | 
|  | } | 
|  |  | 
|  |  | 
|  | ################################################################ | 
|  | # | 
|  | # NAME | 
|  | #	get_partition_param - returns a specific parameter value for a specific partition | 
|  | # | 
|  | # SYNOPSIS | 
|  | #	get_partition_param partitoin_name parameter_name | 
|  | # | 
|  | # DESCRIPTION | 
|  | #	Returns a specific parameter value for a specified partition if the | 
|  | #	parameter exists for the partition, or MISSING if it does not exist. | 
|  | #	Specifying an invalid partition name will result in a failure. | 
|  | # | 
|  | ################################################################ | 
|  |  | 
|  | proc get_partition_param { partition_name parameter_name } { | 
|  |  | 
|  | set partitions_dict [get_partitions $partition_name] | 
|  |  | 
|  | if [dict exists $partitions_dict $partition_name $parameter_name] { | 
|  | return [dict get $partitions_dict $partition_name $parameter_name] | 
|  | } else { | 
|  | return "MISSING" | 
|  | } | 
|  | } | 
|  |  | 
|  |  | 
|  | ################################################################ | 
|  | # | 
|  | # NAME | 
|  | #	get_jobs - returns a dictionary of dictionaries of job parameters | 
|  | # | 
|  | # SYNOPSIS | 
|  | #	get_jobs ?job_id? | 
|  | # | 
|  | # DESCRIPTION | 
|  | #	Uses `scontrol show job` to return a dictionary of dictionaries of job | 
|  | #	parameters. Specifying an invalid job id will result in a failure. | 
|  | # | 
|  | ################################################################ | 
|  |  | 
|  | proc get_jobs { {job_id_in ""} } { | 
|  | global scontrol | 
|  |  | 
|  | set command "$scontrol show job -d -o" | 
|  | if {$job_id_in ne ""} { | 
|  | append command " $job_id_in" | 
|  | } | 
|  | set output [run_command_output -fail "$command"] | 
|  |  | 
|  | # Iterate over each job's parameter list | 
|  | foreach line [split $output "\n"] { | 
|  | if {$line eq ""} { continue } | 
|  |  | 
|  | # Peel off the job parameters one at a time | 
|  | # The first quantifier sets the greediness for the whole RE | 
|  | while {[regexp {^ *?([^ =]+)=(.*)(?= +[^ =]+=| *$)} $line {} param_name param_value]} { | 
|  | # Remove the consumed parameter from the line | 
|  | set line [regsub {^ *?[^ =]+=.*(?= +[^ =]+=| *$)} $line {}] | 
|  | # Add it to the temporary job dictionary | 
|  | dict set job_dict $param_name $param_value | 
|  | } | 
|  |  | 
|  | set job_id [dict get $job_dict "JobId"] | 
|  |  | 
|  | # Add the job dictionary to jobs dictionary | 
|  | dict set jobs_dict $job_id $job_dict | 
|  |  | 
|  | # Clear the job dictionary for the next job | 
|  | set job_dict {} | 
|  | } | 
|  |  | 
|  | return $jobs_dict | 
|  | } | 
|  |  | 
|  |  | 
|  | ################################################################ | 
|  | # | 
|  | # NAME | 
|  | #	get_job_param - returns a specific parameter value for a specific job | 
|  | # | 
|  | # SYNOPSIS | 
|  | #	get_job_param job_id parameter_name | 
|  | # | 
|  | # DESCRIPTION | 
|  | #	Returns a specific parameter value for a specified job if the | 
|  | #	parameter exists for the job, or MISSING if it does not exist. | 
|  | #	Specifying an invalid job id will result in a failure. | 
|  | # | 
|  | ################################################################ | 
|  |  | 
|  | proc get_job_param { job_id parameter_name } { | 
|  |  | 
|  | set jobs_dict [get_jobs $job_id] | 
|  |  | 
|  | if [dict exists $jobs_dict $job_id $parameter_name] { | 
|  | return [dict get $jobs_dict $job_id $parameter_name] | 
|  | } else { | 
|  | return "MISSING" | 
|  | } | 
|  | } | 
|  |  | 
|  | proc check_reason { job_id reason } { | 
|  | global squeue | 
|  |  | 
|  | set found 0 | 
|  | spawn $squeue -j $job_id --noheader -o "%r" | 
|  | expect { | 
|  | -re "$reason" { | 
|  | set found 1 | 
|  | exp_continue | 
|  | } | 
|  | timeout { | 
|  | log_error "squeue not responding" | 
|  | } | 
|  | eof { | 
|  | wait | 
|  | } | 
|  | } | 
|  |  | 
|  | if {$found == 0} { | 
|  | log_error "Job $job_id should have a wait reason of $reason" | 
|  | } | 
|  | return $found | 
|  | } | 
|  |  | 
|  |  | 
|  | ################################################################ | 
|  | # | 
|  | # NAME | 
|  | #	submit_job - submits a job with sbatch and returns its job id | 
|  | # | 
|  | # SYNOPSIS | 
|  | #	submit_job ?options? job_args | 
|  | # | 
|  | # DESCRIPTION | 
|  | #	Submits a job with sbatch and returns its jobid, or 0 if error. | 
|  | #	It accepts all the options of run_command. | 
|  | # | 
|  | # OPTIONS | 
|  | #	It accepts and passes all the options of/to run_command and also: | 
|  | #	-env env | 
|  | #		Prepend $env to the actual sbatch command to set environment | 
|  | #		variables. For example "-env 'SLURM_NTASKS_PER_GPU=2'". | 
|  | # | 
|  | # ARGUMENTS | 
|  | #	job_args | 
|  | #		a string containing all the arguments to pass to sbatch | 
|  | # | 
|  | # RETURN VALUE | 
|  | #	the job id, or 0 if an error happen | 
|  | # | 
|  | ################################################################ | 
|  |  | 
|  | proc submit_job args { | 
|  | global sbatch | 
|  |  | 
|  | set env    "" | 
|  | set job_id 0 | 
|  |  | 
|  | set idx [lsearch $args -env] | 
|  | if {$idx >= 0} { | 
|  | set env  [lindex   $args [expr $idx+1]] | 
|  | set args [lreplace $args $idx [expr $idx+1]] | 
|  | } | 
|  |  | 
|  | if {[llength $args] < 1} { | 
|  | fail "Wrong number of parameters, should be >=1" | 
|  | } | 
|  | set job_options [lindex $args [expr [llength $args] - 1 ]] | 
|  | set run_options   "" | 
|  | if {[llength $args] > 1} { | 
|  | set run_options [lrange $args 0 [expr [llength $args] - 2 ]] | 
|  | } | 
|  |  | 
|  | set command "" | 
|  | if {$env ne ""} { | 
|  | set command "$env " | 
|  | } | 
|  | set command "$command$sbatch $job_options" | 
|  | set output [run_command_output {*}$run_options "$command"] | 
|  | regexp {Submitted \S+ job (\d+)} $output - job_id | 
|  |  | 
|  | return $job_id | 
|  | } | 
|  |  | 
|  | ############################################################################### | 
|  | # | 
|  | # NAME | 
|  | #	check_exclusive_gres - Given a job_id and node_name of an exclusive | 
|  | #                              allocation make sure we allocated all GRES. | 
|  | # | 
|  | ################################################################################ | 
|  |  | 
|  | proc check_exclusive_gres { job_id node_name } { | 
|  | # Check all consumable GRES of the node were allocated on the job | 
|  | set gres_dict_job  [count_gres [get_job_param  $job_id "JOB_GRES"]] | 
|  | set gres_dict_node [count_gres [get_node_param $node_name "Gres"] true] | 
|  |  | 
|  | dict for {gres_name gres_count} $gres_dict_node { | 
|  | # We don't want mps to be allocated with --exclusive | 
|  | if {$gres_name eq "mps" || $gres_name eq "shard"} { | 
|  | if {[dict exists $gres_dict_job $gres_name]} { | 
|  | subfail "Gres ($gres_name) on node ($node_name) allocated on job ($job_id) with --exclusive but it shouldn't be" | 
|  | } else { | 
|  | subpass "Gres ($gres_name) on node ($node_name) was not allocated on job ($job_id) with --exclusive, this is expected" | 
|  | } | 
|  | } elseif {![dict exists $gres_dict_job $gres_name]} { | 
|  | subfail "Gres ($gres_name) on node ($node_name) not allocated on job ($job_id) with --exclusive" | 
|  | } else { | 
|  | subpass "Gres ($gres_name) on node ($node_name) was allocated on job ($job_id) with --exclusive" | 
|  |  | 
|  | set gres_count_job [dict get $gres_dict_job $gres_name] | 
|  | subtest { $gres_count_job == $gres_count } "Gres ($gres_name) on node ($node_name) fully allocated on job ($job_id) with --exclusive" "$gres_count_job != $gres_count" | 
|  | } | 
|  | } | 
|  | } | 
|  |  | 
|  | ################################################################ | 
|  | # | 
|  | # NAME | 
|  | #	compile_against_libslurm - compiles a test program against either libslurm.so or libslurmfull.so | 
|  | # | 
|  | # SYNOPSIS | 
|  | #	compile_against_libslurm ?options? test_prog ?build_args? | 
|  | # | 
|  | # DESCRIPTION | 
|  | #	Compile a test program against either libslurm.so or libslurmfull.so. | 
|  | # | 
|  | # OPTIONS | 
|  | #	-full | 
|  | #		use libslurmfull.so instead of libslurm.so | 
|  | #	-shared | 
|  | #		produces a shared library (adds the -shared compiler option | 
|  | #		and adds a .so suffix to the output file name) | 
|  | # | 
|  | # ARGUMENTS | 
|  | #	test_prog | 
|  | #		The name of the test program (and .c file) | 
|  | #	build_args | 
|  | #		Additional string to be appended to the build command. | 
|  | #		E.g. "-DUSING_VALGRIND -lm ${build_dir}/src/slurmctld/locks.o" | 
|  | #		(initial space will be added automatically). | 
|  | # | 
|  | ################################################################################ | 
|  |  | 
|  | proc compile_against_libslurm args { | 
|  | global slurm_dir bin_cc src_dir build_dir bin_chmod | 
|  |  | 
|  | set use_full   false | 
|  | set shared     false | 
|  | set build_args "" | 
|  | while {[llength $args]} { | 
|  | switch -glob -- [lindex $args 0] { | 
|  | -full    {set use_full true; set args [lrange $args 1 end]} | 
|  | -shared  {set shared true; set args [lrange $args 1 end]} | 
|  | -*       {fail "Unknown option: [lindex $args 0]"} | 
|  | default  break | 
|  | } | 
|  | } | 
|  | set argument_count [llength $args] | 
|  | if {$argument_count < 1} { | 
|  | fail "Too few arguments ($argument_count): $args" | 
|  | } else { | 
|  | lassign $args test_prog | 
|  | } | 
|  | if {$argument_count == 2} { set build_args [lindex $args 1] } | 
|  | if {$argument_count > 2} { | 
|  | fail "Too many arguments ($argument_count): $args" | 
|  | } | 
|  |  | 
|  | if {$use_full} { | 
|  | set libfile "libslurmfull.so" | 
|  | } else { | 
|  | set libfile "libslurm.so" | 
|  | } | 
|  |  | 
|  | if [file exists $slurm_dir/lib64/slurm/$libfile] { | 
|  | set libdir "lib64" | 
|  | } else { | 
|  | set libdir "lib" | 
|  | } | 
|  |  | 
|  | if {$use_full} { | 
|  | set libline "$slurm_dir/$libdir/slurm" | 
|  | set libfile "slurmfull" | 
|  | } else { | 
|  | set libline "$slurm_dir/$libdir" | 
|  | set libfile "slurm" | 
|  | } | 
|  |  | 
|  | set build_cmd "$bin_cc ${test_prog}.c -g -pthread" | 
|  |  | 
|  | if {$shared} { | 
|  | set out "${test_prog}.so" | 
|  | append build_cmd " -fPIC -shared" | 
|  | } else { | 
|  | set out "${test_prog}" | 
|  | } | 
|  | append build_cmd " -o $out" | 
|  |  | 
|  | append build_cmd " -I$src_dir -I$build_dir -I$slurm_dir/include -Wl,-rpath=$libline -L$libline -l$libfile -lresolv" | 
|  |  | 
|  | # Add additional arguments to the build command | 
|  | if {$build_args != ""} { | 
|  | append build_cmd " $build_args" | 
|  | } | 
|  |  | 
|  | log_debug "Build command: $build_cmd" | 
|  | catch {exec {*}$build_cmd} out_str out_dict | 
|  | if {[dict get $out_dict -code]} { | 
|  | log_error $out_str | 
|  | return $::RETURN_ERROR | 
|  | } | 
|  |  | 
|  | exec $bin_chmod 700 $out | 
|  | return $::RETURN_SUCCESS | 
|  | } | 
|  |  | 
|  |  | 
|  | ################################################################ | 
|  | # | 
|  | # NAME | 
|  | #	subtest - tests a boolean condition and updates subtest tallies | 
|  | # | 
|  | # SYNOPSIS | 
|  | #	subtest ?options? condition description ?diagnostics? | 
|  | # | 
|  | # DESCRIPTION | 
|  | #	Based on the results of testing a boolean expression, increments the | 
|  | #	relevant subtest count (pass, fail or skip) and logs a message. | 
|  | # | 
|  | # OPTIONS | 
|  | #	-fatal | 
|  | #		If the subtest fails, causes a fatal error ending the test | 
|  | # | 
|  | # ARGUMENTS | 
|  | #	condition | 
|  | #		The boolean expression to test | 
|  | #	description | 
|  | #		A single-line string describing what is being tested. This is | 
|  | #		a subtest "name" that is displayed with the log message | 
|  | #		whether the subtest passes or fails | 
|  | #	diagnostics | 
|  | #		A string providing additional diagnostic information that is | 
|  | #		only included in the log message on failure | 
|  | # | 
|  | # RETURN VALUE | 
|  | #	the boolean condition evaluated | 
|  | # | 
|  | # ENVIRONMENT | 
|  | #	testsuite_subtest_fatal | 
|  | #		Specifies whether first failing subtest aborts the test | 
|  | # | 
|  | ################################################################ | 
|  |  | 
|  | proc subtest args { | 
|  |  | 
|  | set options [list] | 
|  | while {[llength $args]} { | 
|  | switch -glob -- [lindex $args 0] { | 
|  | -fatal - | 
|  | -fail  { | 
|  | lappend options -fatal | 
|  | set args [lrange $args 1 end] | 
|  | } | 
|  | -*      {fail "Unknown option: [lindex $args 0]"} | 
|  | default break | 
|  | } | 
|  | } | 
|  | set argument_count [llength $args] | 
|  | if {$argument_count < 2} { | 
|  | fail "Too few arguments ($argument_count): $args" | 
|  | } else { | 
|  | set args [lassign $args condition description] | 
|  | } | 
|  |  | 
|  | if [uplevel 1 expr [format "{%s}" $condition]] { | 
|  | subpass $description | 
|  | return true | 
|  | } else { | 
|  | subfail {*}$options $description {*}$args | 
|  | return false | 
|  | } | 
|  | } | 
|  |  | 
|  |  | 
|  | ################################################################ | 
|  | # | 
|  | # NAME | 
|  | #	_is_testproc_included - returns if testproc_id was included or not excluded in argv | 
|  | # | 
|  | # SYNOPSIS | 
|  | #	_is_testproc_included testproc_id testproc_alias | 
|  | # | 
|  | # DESCRIPTION | 
|  | #	From command line the test runner can use -i and -e to include or | 
|  | #	exclude some test functions by their number. This function must be | 
|  | #	used to check if the test runner included or excluded the given | 
|  | #	testproc_id. | 
|  | # | 
|  | # RETURN VALUE | 
|  | #	true is testproc_id or testproc_alias was included with -i or not | 
|  | #	excluded -e, false otherwise | 
|  | # | 
|  | ################################################################ | 
|  |  | 
|  | proc _is_testproc_included {testproc_id testproc_alias} { | 
|  | global _testproc_included _testproc_excluded | 
|  |  | 
|  | if {[llength $_testproc_included]} { | 
|  | if {[lsearch $_testproc_included $testproc_id] >= 0 || \ | 
|  | [lsearch $_testproc_included $testproc_alias] >= 0} { | 
|  | return true | 
|  | } | 
|  | return false | 
|  | } | 
|  |  | 
|  | if {[llength $_testproc_excluded]} { | 
|  | if {[lsearch $_testproc_excluded $testproc_id] >= 0 || \ | 
|  | [lsearch $_testproc_excluded $testproc_alias] >= 0} { | 
|  | return false | 
|  | } | 
|  | } | 
|  |  | 
|  | return true | 
|  | } | 
|  |  | 
|  |  | 
|  | ################################################################ | 
|  | # | 
|  | # NAME | 
|  | #	skip_following_testprocs - the following testproc calls will be skipped | 
|  | # | 
|  | # SYNOPSIS | 
|  | #	skip_following_testprocs reason | 
|  | # | 
|  | # ARGUMENTS | 
|  | #	reason | 
|  | #		The string with the reason message to add on the skip message | 
|  | #		on each skipped testproc. | 
|  | # | 
|  | # DESCRIPTION | 
|  | #	This function disables normal execution of testproc calls. | 
|  | #	It is meant to be used when some testprocs cannot be run due config | 
|  | #	limitations, but still call testproc to register what testprocs | 
|  | #	are skipped for a given reason. | 
|  | #	Use run_following_testprocs to reenable the norma execution of testprocs. | 
|  |  | 
|  | ################################################################ | 
|  |  | 
|  | proc skip_following_testprocs {reason} { | 
|  | global _testproc_skip_next _testproc_skip_reason | 
|  |  | 
|  | set _testproc_skip_next   true | 
|  | set _testproc_skip_reason $reason | 
|  | } | 
|  |  | 
|  |  | 
|  | ################################################################ | 
|  | # | 
|  | # NAME | 
|  | #	run_following_testprocs - the following testproc call will be run (if not excluded from command line) | 
|  | # | 
|  | # SYNOPSIS | 
|  | #	run_following_testprocs | 
|  | # | 
|  | # DESCRIPTION | 
|  | #	This function reenables the normal execution of testproc calls. | 
|  | #	It is meant to be used when skip_following_testprocs was called to skip | 
|  | #	previous testproc calls, and we want to normally run the following ones. | 
|  | #	Note that it does NOT overwrite what -i and -e included are passed | 
|  | #	from command line. | 
|  | # | 
|  | ################################################################ | 
|  |  | 
|  | proc run_following_testprocs {} { | 
|  | global _testproc_skip_next _testproc_skip_reason | 
|  |  | 
|  | set _testproc_skip_next   false | 
|  | set _testproc_skip_reason "" | 
|  | } | 
|  |  | 
|  |  | 
|  | ################################################################ | 
|  | # | 
|  | # NAME | 
|  | #	testproc - launcher to run or skip a testproc_call | 
|  | # | 
|  | # SYNOPSIS | 
|  | #	testproc testproc_call | 
|  | # | 
|  | # ARGUMENTS | 
|  | #	A testproc_call is any normal call to a proc with any arguments that | 
|  | #	could be done normally without the testproc launcher. | 
|  | #	For example, we could normally do: | 
|  | # | 
|  | #	test_my_feature $some_args $expected_out | 
|  | # | 
|  | #	Or use the launcher like: | 
|  | # | 
|  | #	testproc test_my_feature $some_args $expected_out | 
|  | # | 
|  | # DESCRIPTION | 
|  | #	Using the testproc launcher has the following main benefits: | 
|  | #	a) Handles the -i and -e terminal options to include or exclude some | 
|  | #	   testprocs numbers. | 
|  | #	b) Runs or skips based on the last call of testproc_{skip,run}_following. | 
|  | #	c) Creates extra sections in the status summary | 
|  | #	   (see testsuite_testproc_details). | 
|  | # | 
|  | # RETURN VALUE | 
|  | #	The rc of the testproc_call if it has been run, or $::RETURN_SUCCESS | 
|  | #	otherwise. Using it is not recommended, though. | 
|  | # | 
|  | ################################################################ | 
|  |  | 
|  | proc testproc args { | 
|  | return [testproc_alias "" {*}$args] | 
|  | } | 
|  |  | 
|  |  | 
|  | ################################################################ | 
|  | # | 
|  | # NAME | 
|  | #	testproc_alias - launcher to run or skip a testproc_call, with an alias | 
|  | # | 
|  | # SYNOPSIS | 
|  | #	testproc alias testproc_call | 
|  | # | 
|  | # DESCRIPTION | 
|  | #	See testproc | 
|  | # | 
|  | ################################################################ | 
|  |  | 
|  | proc testproc_alias {alias args} { | 
|  | global _testproc_pass_list _testproc_skip_list _testproc_fail_list | 
|  | global _testproc_skip_next _testproc_skip_reason | 
|  | global _testproc_messages testsuite_testproc_log_calls | 
|  | global _subtest_pass_count | 
|  | global _subtest_skip_count | 
|  | global _subtest_fail_count | 
|  |  | 
|  | # Avoid integer alias to avoid confusions with testproc_id | 
|  | if {[llength $alias] && [string is integer -strict $alias]} { | 
|  | fail "testproc_alias doesn't support integer alias, use alphanumeric" | 
|  | } | 
|  |  | 
|  | # Save previous subtest counts and next subtest num | 
|  | set prev_pass $_subtest_pass_count | 
|  | set prev_skip $_subtest_skip_count | 
|  | set prev_fail $_subtest_fail_count | 
|  | set prev_subtest [expr $_subtest_pass_count + \ | 
|  | $_subtest_skip_count + \ | 
|  | $_subtest_fail_count + 1] | 
|  |  | 
|  | # Get the testproc number | 
|  | set testproc_id [expr [llength $_testproc_pass_list] + \ | 
|  | [llength $_testproc_skip_list] + \ | 
|  | [llength $_testproc_fail_list] + 1] | 
|  |  | 
|  | # Run or skip the testproc | 
|  | set rc $::RETURN_SUCCESS | 
|  | set reason "" | 
|  | if {![_is_testproc_included $testproc_id $alias]} { | 
|  | set reason "(Excluded from command line)" | 
|  | if {$testsuite_testproc_log_calls == yes} { | 
|  | subskip -nolog "Skipping testproc $testproc_id: {$args} $reason" | 
|  | } else { | 
|  | subskip -nolog "Skipping testproc $testproc_id: $reason" | 
|  | } | 
|  | } else { | 
|  | if {$_testproc_skip_next} { | 
|  | set reason "($_testproc_skip_reason)" | 
|  | if {$testsuite_testproc_log_calls == yes} { | 
|  | subskip  "Skipping testproc $testproc_id: {$args} $reason" | 
|  | } else { | 
|  | subskip  "Skipping testproc $testproc_id: $reason" | 
|  | } | 
|  | } else { | 
|  | if {$testsuite_testproc_log_calls != no} { | 
|  | log_info "Running testproc $testproc_id: $args" | 
|  | } else { | 
|  | log_info "Running testproc $testproc_id" | 
|  | } | 
|  | set rc [{*}$args] | 
|  | } | 
|  | } | 
|  |  | 
|  | # Get current subtest counts | 
|  | set curr_subtest [expr $_subtest_pass_count + \ | 
|  | $_subtest_skip_count + \ | 
|  | $_subtest_fail_count] | 
|  |  | 
|  | # Register the testproc as fail, skip or pass (based on subtests) | 
|  | if {[llength $alias]} { | 
|  | set alias "${alias}: " | 
|  | } | 
|  | if {$_subtest_fail_count > $prev_fail} { | 
|  | set reason "(Subtests: $prev_subtest to $curr_subtest)" | 
|  | lappend  _testproc_fail_list $testproc_id | 
|  | if {$testsuite_testproc_log_calls != no} { | 
|  | dict set _testproc_messages  $testproc_id [list failed "$alias{$args} $reason"] | 
|  | } else { | 
|  | dict set _testproc_messages  $testproc_id [list failed "$alias$reason"] | 
|  | } | 
|  | } elseif {$_subtest_skip_count > $prev_skip} { | 
|  | lappend  _testproc_skip_list $testproc_id | 
|  | if {$testsuite_testproc_log_calls == yes} { | 
|  | dict set _testproc_messages  $testproc_id [list skipped "$alias{$args} $reason"] | 
|  | } else { | 
|  | dict set _testproc_messages  $testproc_id [list skipped "$alias$reason"] | 
|  | } | 
|  | } else { | 
|  | set reason "(Subtests: $prev_subtest to $curr_subtest)" | 
|  | lappend  _testproc_pass_list $testproc_id | 
|  | if {$testsuite_testproc_log_calls == yes} { | 
|  | dict set _testproc_messages  $testproc_id [list passed "$alias{$args} $reason"] | 
|  | } else { | 
|  | dict set _testproc_messages  $testproc_id [list passed "$alias$reason"] | 
|  | } | 
|  | } | 
|  |  | 
|  | return $rc | 
|  | } | 
|  |  | 
|  |  | 
|  | ################################################################ | 
|  | # | 
|  | # NAME | 
|  | #	_log_format - prints a log message with colorization and formatting | 
|  | # | 
|  | # SYNOPSIS | 
|  | #	_log_format log_level message | 
|  | # | 
|  | # DESCRIPTION | 
|  | #	This procedure is called by the log_<level> procedures and | 
|  | #	derives the relevant log level from the caller's procedure name. | 
|  | # | 
|  | # ARGUMENTS | 
|  | #	log_level | 
|  | #		The logging threshold that triggered the log statement | 
|  | #	message | 
|  | #		The message to print with colorization and formatting | 
|  | # | 
|  | # ENVIRONMENT | 
|  | #	testsuite_log_format | 
|  | #		Used as the template for the fields to be output. | 
|  | #		Fields must be expressed in the form: | 
|  | #			%{<field_name>}<format_conversion_specifier> | 
|  | #		Supported fields include the following: | 
|  | #			message | 
|  | #				The log message | 
|  | #			filename | 
|  | #				The file name where the log_<log_level> | 
|  | #				procedure was called from | 
|  | #			lineno | 
|  | #				The line number where the log_<log_level> | 
|  | #				procedure was called from | 
|  | #			timestamp | 
|  | #				The date and time when the log_<log_level> | 
|  | #				procedure was called at | 
|  | #			msecs | 
|  | #				The milliseconds when the log_<log_level> | 
|  | #				procedure was called at | 
|  | #			loglevel | 
|  | #				The log level that triggers the log_<log_level> | 
|  | #				procedure to be called | 
|  | #			backtrace | 
|  | #				An abbreviated call stack trace with line | 
|  | #				numbers | 
|  | #	testsuite_time_format | 
|  | #		Used as a template for the timestamp. See the format groups | 
|  | #		for the tcl clock format command. | 
|  | #	testsuite_colorize | 
|  | #		Boolean that turns colorization on or off | 
|  | #	testsuite_color_<log_level> | 
|  | #		Can be set to define the color used for each log level | 
|  | # | 
|  | ################################################################ | 
|  |  | 
|  | proc _log_format { log_level message } { | 
|  | global testsuite_colorize testsuite_log_format testsuite_time_format | 
|  | global COLOR_NONE | 
|  | global testsuite_color_fatal testsuite_color_error testsuite_color_warn | 
|  | global testsuite_color_info testsuite_color_pass testsuite_color_command | 
|  | global testsuite_color_debug testsuite_color_trace | 
|  |  | 
|  | set format_string $testsuite_log_format | 
|  | set milliseconds_since_epoch [clock milliseconds] | 
|  | set date_time [clock format [expr {$milliseconds_since_epoch / 1000}] -format "$testsuite_time_format"] | 
|  | set milliseconds [expr {$milliseconds_since_epoch % 1000}] | 
|  |  | 
|  | set frame_level -2 | 
|  | while { [dict get [info frame $frame_level] type] != "source" } { | 
|  | incr frame_level -1 | 
|  | } | 
|  |  | 
|  | set format_args {} | 
|  | while {[regexp "%{\[a-z]+}" $format_string format_field]} { | 
|  | if {$format_field eq "%{message}"} { | 
|  | lappend format_args $message | 
|  | } elseif {$format_field eq "%{filename}"} { | 
|  | lappend format_args [file tail [dict get [info frame $frame_level] file]] | 
|  | } elseif {$format_field eq "%{lineno}"} { | 
|  | lappend format_args [dict get [info frame $frame_level] line] | 
|  | } elseif {$format_field eq "%{timestamp}"} { | 
|  | lappend format_args $date_time | 
|  | } elseif {$format_field eq "%{msecs}" || $format_field eq "%{milliseconds}"} { | 
|  | lappend format_args $milliseconds | 
|  | } elseif {$format_field eq "%{loglevel}" || $format_field eq "%{levelname}"} { | 
|  | lappend format_args [string totitle $log_level] | 
|  | } elseif {$format_field eq "%{backtrace}"} { | 
|  | lappend format_args [_line_trace] | 
|  | } else { | 
|  | fail "Invalid field ($format_field) specified in testsuite_log_format" | 
|  | } | 
|  | regsub $format_field $format_string "%" format_string | 
|  | } | 
|  |  | 
|  | if ($testsuite_colorize) { | 
|  | switch $log_level { | 
|  | fatal   { append output $testsuite_color_fatal } | 
|  | error   { append output $testsuite_color_error } | 
|  | warning { append output $testsuite_color_warn } | 
|  | info    { append output $testsuite_color_info } | 
|  | pass    { append output $testsuite_color_pass } | 
|  | command { append output $testsuite_color_command } | 
|  | debug   { append output $testsuite_color_debug } | 
|  | trace   { append output $testsuite_color_trace } | 
|  | } | 
|  | } | 
|  | append output [format $format_string {*}$format_args] | 
|  | if ($testsuite_colorize) { | 
|  | append output $COLOR_NONE | 
|  | } | 
|  | puts $output | 
|  | } | 
|  |  | 
|  |  | 
|  | ################################################################ | 
|  | # | 
|  | # NAME | 
|  | #	_print_header - prints a test header | 
|  | # | 
|  | # SYNOPSIS | 
|  | #	_print_header | 
|  | # | 
|  | # ENVIRONMENT | 
|  | #	testsuite_colorize | 
|  | #		Boolean that turns colorization on or off | 
|  | #	testsuite_color_header | 
|  | #		Can be set to define the color used for the header | 
|  | # | 
|  | ################################################################ | 
|  |  | 
|  | proc _print_header { } { | 
|  | global test_name testsuite_color_header testsuite_colorize COLOR_NONE | 
|  |  | 
|  | if ($testsuite_colorize) { | 
|  | append output $testsuite_color_header | 
|  | } | 
|  | append output [string repeat = 78]\n | 
|  | append output [format "%-9s" "TEST:"]${test_name}\n | 
|  | append output [string repeat = 78] | 
|  | if ($testsuite_colorize) { | 
|  | append output $COLOR_NONE | 
|  | } | 
|  | puts $output | 
|  | } | 
|  |  | 
|  |  | 
|  | ################################################################ | 
|  | # | 
|  | # NAME | 
|  | #	_print_summary - prints the final status summary | 
|  | # | 
|  | # SYNOPSIS | 
|  | #	_print_summary status completed | 
|  | # | 
|  | # ARGUMENTS | 
|  | #	status | 
|  | #		The final status of the test. | 
|  | #		When status is zero, we print SUCCESS. | 
|  | #		When status is negative, we print SKIPPED. | 
|  | #		When status is positive, we print FAILURE. | 
|  | # | 
|  | #	completed | 
|  | #		A boolean value that is true if the test completed and false | 
|  | #		if aborted (ended early with exit status != 0) | 
|  | # | 
|  | # ENVIRONMENT | 
|  | #	testsuite_colorize | 
|  | #		Boolean that turns colorization on or off | 
|  | #	testsuite_color_<test_status> | 
|  | #		Can be set to define the color used for each test status | 
|  | # | 
|  | ################################################################ | 
|  |  | 
|  | proc _print_summary {status completed} { | 
|  | global test_name testsuite_colorize COLOR_NONE | 
|  | global testsuite_color_success testsuite_color_skipped | 
|  | global testsuite_color_failure | 
|  | global _subtest_fail_count _subtest_pass_count _subtest_skip_count | 
|  | global _testproc_pass_list _testproc_skip_list _testproc_fail_list | 
|  | global _incomplete_reason | 
|  | global _subtest_messages _testproc_messages | 
|  | global testsuite_subtest_details testsuite_testproc_details | 
|  |  | 
|  | if {$status == 0} { | 
|  | set color $testsuite_color_success | 
|  | set header "SUCCESS" | 
|  | } elseif {$status < 0} { | 
|  | set color $testsuite_color_skipped | 
|  | set header "SKIPPED" | 
|  | } elseif {$status > 0} { | 
|  | set color $testsuite_color_failure; | 
|  | set header "FAILURE" | 
|  | } | 
|  |  | 
|  | if ($testsuite_colorize) { | 
|  | append output $color | 
|  | } | 
|  |  | 
|  | append output [string repeat = 78]\n | 
|  |  | 
|  | # Get subtest and testproc counts | 
|  | set testproc_fail  [llength $_testproc_fail_list] | 
|  | set testproc_skip  [llength $_testproc_skip_list] | 
|  | set testproc_pass  [llength $_testproc_pass_list] | 
|  | set testproc_count [expr $testproc_pass + \ | 
|  | $testproc_skip + \ | 
|  | $testproc_fail] | 
|  | set subtest_count  [expr $_subtest_fail_count + \ | 
|  | $_subtest_pass_count + \ | 
|  | $_subtest_skip_count] | 
|  |  | 
|  | # Initial summary | 
|  | append output [format "%s             : %s\n" $header  $test_name] | 
|  | if {$testproc_count > 0} { | 
|  | append output [format "  Testprocs failed  : %3d (%3d%%)%s\n" $testproc_fail [expr $testproc_fail * 100 / $testproc_count] \ | 
|  | [expr {$testproc_fail ? " List: [join $_testproc_fail_list ,]" : ""}]] | 
|  | append output [format "  Testprocs skipped : %3d (%3d%%)%s\n" $testproc_skip [expr $testproc_skip * 100 / $testproc_count] \ | 
|  | [expr {$testproc_skip ? " List: [join $_testproc_skip_list ,]" : ""}]] | 
|  | append output [format "  Testprocs passed  : %3d (%3d%%)\n"   $testproc_pass [expr $testproc_pass * 100 / $testproc_count]] | 
|  | append output [format "  Testprocs total   : %3d %s\n"        $testproc_count [expr {$completed ? "COMPLETE" : "INCOMPLETE: $_incomplete_reason"}]] | 
|  | } | 
|  | if {$subtest_count > 0} { | 
|  | if {$testproc_count > 0} { | 
|  | append output \n | 
|  | } | 
|  | append output [format "  Subtests failed   : %3d (%3d%%)\n" $_subtest_fail_count [expr $_subtest_fail_count * 100 / $subtest_count]] | 
|  | append output [format "  Subtests skipped  : %3d (%3d%%)\n" $_subtest_skip_count [expr $_subtest_skip_count * 100 / $subtest_count]] | 
|  | append output [format "  Subtests passed   : %3d (%3d%%)\n" $_subtest_pass_count [expr $_subtest_pass_count * 100 / $subtest_count]] | 
|  | append output [format "  Subtests total    : %3d %s\n"      $subtest_count [expr {$completed ? "COMPLETE" : "INCOMPLETE: $_incomplete_reason"}]] | 
|  | } | 
|  | append output [string repeat = 78]\n | 
|  |  | 
|  | # Detailed information | 
|  | if {$testsuite_subtest_details ne "none"} { | 
|  | set show_details false | 
|  | if {$testsuite_subtest_details eq "all" && $subtest_count > 0} { | 
|  | set show_details true | 
|  | } elseif {$testsuite_subtest_details eq "fail_skip" && $_subtest_skip_count > 0} { | 
|  | set show_details true | 
|  | } elseif {$_subtest_fail_count > 0} { | 
|  | set show_details true | 
|  | } | 
|  |  | 
|  | if {$show_details} { | 
|  | append output [format "SUBTESTS DETAILS     : %s\n" $test_name] | 
|  | dict for {id result_msg} $_subtest_messages { | 
|  | set result [lindex $result_msg 0] | 
|  | if {$testsuite_subtest_details eq "all"} { | 
|  | append output [format "  %s\n" [lindex $result_msg 1]] | 
|  | } elseif {$testsuite_subtest_details eq "fail_skip" && $result eq "skip"} { | 
|  | append output [format "  %s\n" [lindex $result_msg 1]] | 
|  | } elseif {$result eq "fail"} { | 
|  | append output [format "  %s\n" [lindex $result_msg 1]] | 
|  | } | 
|  | } | 
|  | append output [string repeat = 78]\n | 
|  | } | 
|  | } | 
|  | if {$testsuite_testproc_details ne "none"} { | 
|  | set show_details false | 
|  | if {$testsuite_testproc_details eq "all" && $testproc_count > 0} { | 
|  | set show_details true | 
|  | } elseif {$testsuite_testproc_details eq "fail_skip" && $testproc_skip > 0} { | 
|  | set show_details true | 
|  | } elseif {$testproc_fail > 0} { | 
|  | set show_details true | 
|  | } | 
|  |  | 
|  | if {$show_details} { | 
|  | append output [format "TESTPROCS DETAILS     : %s\n" $test_name] | 
|  | dict for {num result_msg} $_testproc_messages { | 
|  | set result [lindex $result_msg 0] | 
|  | if {$testsuite_testproc_details eq "all"} { | 
|  | append output [format "  Testproc %2d %-7s : %s\n" $num [lindex $result_msg 0] [lindex $result_msg 1]] | 
|  | } elseif {$testsuite_testproc_details eq "fail_skip" && $result eq "skipped"} { | 
|  | append output [format "  Testproc %2d %-7s : %s\n" $num [lindex $result_msg 0] [lindex $result_msg 1]] | 
|  | } elseif {$result eq "failed"} { | 
|  | append output [format "  Testproc %2d %-7s : %s\n" $num [lindex $result_msg 0] [lindex $result_msg 1]] | 
|  | } | 
|  | } | 
|  | append output [string repeat = 78]\n | 
|  | } | 
|  | } | 
|  |  | 
|  | if ($testsuite_colorize) { | 
|  | append output $COLOR_NONE | 
|  | } | 
|  |  | 
|  | puts -nonewline $output | 
|  | } | 
|  |  | 
|  |  | 
|  | ################################################################ | 
|  | # | 
|  | # NAME | 
|  | #	_get_test_name - gets the name of the invoking source script | 
|  | # | 
|  | # SYNOPSIS | 
|  | #	_get_test_name | 
|  | # | 
|  | # RETURN VALUE | 
|  | #	The name of the originally called script | 
|  | # | 
|  | ################################################################ | 
|  |  | 
|  | proc _get_test_name { } { | 
|  |  | 
|  | set test_name unknown | 
|  | set frame_level 1 | 
|  | while { $frame_level <= [info frame] } { | 
|  | if { [dict get [info frame $frame_level] type] == "source" } { | 
|  | set test_name [file tail [dict get [info frame $frame_level] file]] | 
|  | break | 
|  | } | 
|  | incr frame_level | 
|  | } | 
|  |  | 
|  | return $test_name | 
|  | } | 
|  |  | 
|  |  | 
|  | ################################################################ | 
|  | # | 
|  | # NAME | 
|  | #	_test_cleanup - performs the test cleanup | 
|  | # | 
|  | # SYNOPSIS | 
|  | #	_test_cleanup | 
|  | # | 
|  | # DESCRIPTION | 
|  | #	This procedure removes the temporary test_dir and calls the | 
|  | #	test-defined cleanup procedure. | 
|  | # | 
|  | # NOTES | 
|  | #	This function should be called only from _test_init and _test_fini. | 
|  | # | 
|  | ################################################################ | 
|  |  | 
|  | proc _test_cleanup {} { | 
|  | global log_warn test_dir | 
|  |  | 
|  | set rc 0 | 
|  |  | 
|  | # Call global cleanup procedure if it is defined by the test | 
|  | if {[info procs cleanup] eq "cleanup"} { | 
|  | if {[catch {cleanup} cleanup_error ]} { | 
|  | log_error "Cleanup had errors: $cleanup_error" | 
|  | set rc 1 | 
|  | } | 
|  | } | 
|  |  | 
|  | # Remove the temporary test directory | 
|  | exec rm -rf $test_dir | 
|  |  | 
|  | return $rc | 
|  | } | 
|  |  | 
|  |  | 
|  | ################################################################ | 
|  | # | 
|  | # NAME | 
|  | #	_test_init - performs test initialization | 
|  | # | 
|  | # SYNOPSIS | 
|  | #	_test_init | 
|  | # | 
|  | # DESCRIPTION | 
|  | #	This procedure is called automatically at the beginning of each test. | 
|  | #	It prints the header, creates the temporary test dir, etc. | 
|  | # | 
|  | ################################################################ | 
|  |  | 
|  | proc _test_init {} { | 
|  | global test_dir test_id test_name testsuite_shared_dir | 
|  | global _testproc_included _testproc_excluded argv bin_chmod scontrol | 
|  |  | 
|  | # parse argv to get and remove _testproc_included and _testproc_excluded params | 
|  | set idx [expr {[info exists argv] ? [lsearch $argv -i] : -1}] | 
|  | if {$idx >= 0} { | 
|  | set _testproc_included [split [lindex $argv [expr $idx + 1]] ,] | 
|  | set argv [lreplace $argv $idx [expr $idx + 1]] | 
|  | } | 
|  | set idx [expr {[info exists argv] ? [lsearch $argv -e] : -1}] | 
|  | if {$idx >= 0} { | 
|  | set _testproc_excluded [split [lindex $argv [expr $idx + 1]] ,] | 
|  | set argv [lreplace $argv $idx [expr $idx + 1]] | 
|  | } | 
|  |  | 
|  | # Set test name to name of originally invoked test script, e.g. test1.1 | 
|  | set test_name [_get_test_name] | 
|  |  | 
|  | # Set test id to suffix of the test script, e.g. 1.1 | 
|  | set test_id [string map {test ""} $test_name] | 
|  |  | 
|  | # Temporary test directory used to stash saved configs, output files... | 
|  | set test_dir  "$testsuite_shared_dir/${test_name}dir" | 
|  |  | 
|  | # Print test header | 
|  | _print_header | 
|  |  | 
|  | # Cleanup in case test was not cleaned up on last execution | 
|  | if { [_test_cleanup] } { | 
|  | fail "Error in the initial cleanup" | 
|  | } | 
|  |  | 
|  | # Create temporary shared test directory | 
|  | exec mkdir -p $test_dir | 
|  | exec $bin_chmod a+rwx $test_dir | 
|  |  | 
|  | # Default precondition checks | 
|  | if {![regexp "is UP" [run_command_output -nolog -none "$scontrol ping"]]} { | 
|  | skip "The expect testsuite requires Slurm to be running, but '$scontrol ping' failed." | 
|  | } | 
|  | set idle_nodes [get_nodes_by_state] | 
|  | if {[llength $idle_nodes] < 1} { | 
|  | skip "The expect testsuite requires there to be at least one idle node in the default partition." | 
|  | } | 
|  | # Ensure that idle nodes are not drained/down | 
|  | # Checking one by one to be able to print a warning if some was drain/down | 
|  | foreach idle_node $idle_nodes { | 
|  | run_command -nolog -xfail "$scontrol update nodename=$idle_node state=resume" | 
|  | } | 
|  | } | 
|  |  | 
|  |  | 
|  | ################################################################ | 
|  | # | 
|  | # NAME | 
|  | #	_test_fini - performs test finalization | 
|  | # | 
|  | # SYNOPSIS | 
|  | #	_test_fini ?status? | 
|  | # | 
|  | # DESCRIPTION | 
|  | #	This procedure is called automatically from the ending functions | 
|  | #	pass, skip and failure. | 
|  | #	It cleans up based on the status and the testsuite_cleanup_on_failure | 
|  | #	variable, prints the final test status/summary, and exits the test. | 
|  | # | 
|  | #	We will always cleanup for SUCCESS or SKIPPED tests. | 
|  | #	Whether or not the cleanup procedure is called for FAILURE tests | 
|  | #	depends on the setting of the $testsuite_cleanup_on_failure variable | 
|  | #	which can be set in the globals.local file or overridden with the | 
|  | #	SLURM_TESTSUITE_CLEANUP_ON_FAILURE environment variable. | 
|  | # | 
|  | ################################################################ | 
|  |  | 
|  | proc _test_fini { status } { | 
|  | global testsuite_cleanup_on_failure _test_fini_called | 
|  | global _subtest_fail_count _subtest_skip_count | 
|  | global STATUS_FAIL STATUS_PASS STATUS_SKIP test_status | 
|  |  | 
|  | # Avoid potential infinite recursive calls. | 
|  | # _test_fini should be called only once, but custom cleanup procs | 
|  | # called from _test_cleanup can potentially call it (eg fail) | 
|  | if {$_test_fini_called} { | 
|  | log_debug "Recursive _test_fini call detected, most probably a fail on a cleanup function" | 
|  | return | 
|  | } | 
|  | set _test_fini_called true | 
|  |  | 
|  |  | 
|  | # Determine if test completed or was aborted | 
|  | set completed [expr $status == $STATUS_PASS ? true : false] | 
|  |  | 
|  | # Override status with subtest status if available and necessary | 
|  | if {$status != $STATUS_FAIL} { | 
|  | if {$_subtest_fail_count > 0} { | 
|  | set status $STATUS_FAIL | 
|  | } elseif {$_subtest_skip_count > 0} { | 
|  | set status $STATUS_SKIP | 
|  | } | 
|  | } | 
|  |  | 
|  | # Set final test status global variable so it can be used in cleanup | 
|  | set test_status $status | 
|  |  | 
|  | # Only cleanup if test not failed or configured to do so | 
|  | if {$status != $STATUS_FAIL || $testsuite_cleanup_on_failure} { | 
|  | _test_cleanup | 
|  | } | 
|  | _print_summary $status $completed | 
|  |  | 
|  | __exit $status | 
|  | } | 
|  |  | 
|  |  | 
|  | ################################################################ | 
|  | # | 
|  | # Overload the exit routine to ensure that no one is explicitly | 
|  | # calling it, and to enforce _test_fini if exit is called when | 
|  | # the test reach its EOF. | 
|  | # | 
|  | # All tests should exit using pass, skip or fail. | 
|  | # | 
|  | ################################################################ | 
|  |  | 
|  | rename exit __exit | 
|  |  | 
|  | proc exit { {status 0} } { | 
|  | global test_name | 
|  |  | 
|  | # To avoid failures when using "expect -c 'source globals'" | 
|  | if {$test_name eq "globals"} { | 
|  | pass | 
|  | } | 
|  |  | 
|  | # | 
|  | # Minor sanity check to detect if exit was explicitly called (not | 
|  | # allowed) or automatically executed when the test ends | 
|  | # | 
|  | if {[info level] > 1} { | 
|  | # exit was called from a function, and it shouldn't | 
|  | fail "Exit should not be directly called, use pass, skip or fail instead" | 
|  | } else { | 
|  | set frame_level 1 | 
|  | while { $frame_level <= [info frame] } { | 
|  | if { [dict get [info frame $frame_level] type] == "source" } { | 
|  | if { [file tail [dict get [info frame $frame_level] file]] eq $test_name } { | 
|  | # exit was called explicitly from the | 
|  | # test, and it shouldn't | 
|  | fail "Exit should not be directly called, use pass, skip or fail instead" | 
|  | } | 
|  | break | 
|  | } | 
|  | incr frame_level | 
|  | } | 
|  | } | 
|  |  | 
|  | if {$status != 0} { | 
|  | fail "An Expect/TCL exception occurred" | 
|  | } | 
|  |  | 
|  | # The exit was called implicitly when the test ends, allowed but | 
|  | # _test_fini call enforced | 
|  | pass | 
|  | } | 
|  |  | 
|  |  | 
|  | # Call _test_init at the beginning of each test | 
|  | _test_init |