testsuite/expect/globals - SchedMD/slurm - Git at Google

 #!/usr/bin/env expect
 ############################################################################
 # Purpose: Establish global state information for Slurm test suite
 #
 # To define site-specific state information, set the values in a file
 # named 'globals.local'. Those values will override any specified here.
 # for example:
 #
 # $ cat globals.local
 # set slurm_dir  "/usr/local"
 # set build_dir  "/home/mine/SLURM/build_smd"
 # set src_dir    "/home/mine/SLURM/slurm.git"
 # set mpicc      "/usr/local/bin/mpicc"
 #
 # If you want to have more than one test going at the same time for multiple
 # installs you can have multiple globals.local files and set the
 # SLURM_LOCAL_GLOBALS_FILE env var, and have that set to the correct
 # globals.local file for your various installs.  The file can be named anything,
 # not just globals.local.
 #
 ############################################################################
 # Copyright (C) 2002-2007 The Regents of the University of California.
 # Copyright (C) 2008-2010 Lawrence Livermore National Security.
 # Copyright (C) SchedMD LLC.
 # Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
 # Written by Morris Jette <jette1@llnl.gov>
 # Additions by Joseph Donaghy <donaghy1@llnl.gov>
 # CODE-OCEC-09-009. All rights reserved.
 #
 # This file is part of Slurm, a resource management program.
 # For details, see <https://slurm.schedmd.com/>.
 # Please also read the supplied file: DISCLAIMER.
 #
 # Slurm is free software; you can redistribute it and/or modify it under
 # the terms of the GNU General Public License as published by the Free
 # Software Foundation; either version 2 of the License, or (at your option)
 # any later version.
 #
 # Slurm is distributed in the hope that it will be useful, but WITHOUT ANY
 # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 # FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
 # details.
 #
 # You should have received a copy of the GNU General Public License along
 # with Slurm; if not, write to the Free Software Foundation, Inc.,
 # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301  USA.
 ############################################################################

 # Avoid sourcing this file multiple times
 if {[info procs exit] eq "exit"} {
 	return
 }

 # Read in unified testsuite properties from testsuite/testsuite.conf
 set unified_testsuite_dir [file dirname [file normalize "[info script]/.."]]
 set unified_testsuite_conf_file "$unified_testsuite_dir/testsuite.conf"
 if {[info exists env(SLURM_TESTSUITE_CONF)]} {
 	set unified_testsuite_conf_file $env(SLURM_TESTSUITE_CONF)
 }
 set unified_testsuite_config_dict [dict create]
 if [file readable "$unified_testsuite_conf_file"] {
 	set fp [open "$unified_testsuite_conf_file" r]
 	set file_data [read $fp]
 	close $fp
 	foreach line [split $file_data "\n"] {
 		if [regexp {^\s*(\S+)\s*=\s*(\S+)\s*$} $line {} parameter_name parameter_value] {
 			dict set unified_testsuite_config_dict [string tolower $parameter_name] $parameter_value
 		}
 	}
 } else {
 	puts stderr "Warning: Unable to open the unified testsuite configuration file ($unified_testsuite_conf_file) for reading. This file can be created from a copy of the autogenerated sample found in BUILDDIR/testsuite/testsuite.conf.sample. By default, this file is expected to be found in SRCDIR/testsuite ($unified_testsuite_dir). If placed elsewhere, set the SLURM_TESTSUITE_CONF environment variable to the full path of your testsuite.conf file."
 	flush stderr
 }
 if [dict exists $unified_testsuite_config_dict "slurmsourcedir"] {
 	set src_dir [dict get $unified_testsuite_config_dict "slurmsourcedir"]
 }
 if [dict exists $unified_testsuite_config_dict "slurmbuilddir"] {
 	set build_dir [dict get $unified_testsuite_config_dict "slurmbuilddir"]
 }
 if [dict exists $unified_testsuite_config_dict "slurminstalldir"] {
 	set slurm_dir [dict get $unified_testsuite_config_dict "slurminstalldir"]
 }

 global sacctmgr sacct salloc sattach sbatch sbcast scancel scontrol sinfo
 global smd squeue sreport srun sstat strigger

 ################################################################
 #
 # NAME
 #	cset - conditional set
 #
 # SYNOPSIS
 #	cset name value
 #
 # DESCRIPTION
 #	Conditional set.  Only set variable if variable does not yet exist.
 #
 # Input: name  -- name of the variable to set
 #	 value -- value to set to 'name'
 #
 ################################################################

 proc cset {name value} {
 	if {![uplevel 1 info exists $name]} {
 		upvar $name tmp
 		set tmp $value
 	}
 }


 #
 # Defining colors here to be able to use them in globals.local.
 # By default, these colors are bold
 #
 set COLOR_RED         "\033\[1;31m"
 set COLOR_RED_NORMAL  "\033\[31m"
 set COLOR_ORANGE      "\033\[1;38;5;208m"
 set COLOR_YELLOW      "\033\[1;33m"
 set COLOR_GREEN       "\033\[1;32m"
 set COLOR_BLUE        "\033\[1;34m"
 set COLOR_MAGENTA     "\033\[1;35m"
 set COLOR_CYAN        "\033\[1;36m"
 set COLOR_NONE        "\033\[0m"

 cset local_globals_file "./globals.local"

 # Log level "enum"
 # Define log levels here so they are available in globals.local
 set LOG_LEVEL_QUIET   0
 set LOG_LEVEL_FATAL   1
 set LOG_LEVEL_ERROR   2
 set LOG_LEVEL_WARNING 3
 set LOG_LEVEL_INFO    4
 set LOG_LEVEL_PASS    4
 set LOG_LEVEL_COMMAND 4
 set LOG_LEVEL_DEBUG   5
 set LOG_LEVEL_TRACE   6

 if {[info exists env(SLURM_LOCAL_GLOBALS_FILE)]} {
 	set local_globals_file $env(SLURM_LOCAL_GLOBALS_FILE)
 }

 if [file exists $local_globals_file] {
 	source $local_globals_file
 }

 #
 # Specify the slurm install directory.
 # Used to locate binaries, libraries, and header files.
 #
 cset slurm_dir   "/usr"
 cset build_dir   "../../"
 cset src_dir     "../../"
 cset config_h    "${build_dir}/config.h"
 cset sacctmgr    "${slurm_dir}/bin/sacctmgr"
 cset sacct       "${slurm_dir}/bin/sacct"
 cset salloc      "${slurm_dir}/bin/salloc"
 cset sattach     "${slurm_dir}/bin/sattach"
 cset sbatch      "${slurm_dir}/bin/sbatch"
 cset sbcast      "${slurm_dir}/bin/sbcast"
 cset scancel     "${slurm_dir}/bin/scancel"
 cset scontrol    "${slurm_dir}/bin/scontrol"
 cset sdiag       "${slurm_dir}/bin/sdiag"
 cset sgather     "${slurm_dir}/bin/sgather"
 cset sh5util     "${slurm_dir}/bin/sh5util"
 cset sinfo       "${slurm_dir}/bin/sinfo"
 cset smd         "${slurm_dir}/bin/smd"
 cset sprio       "${slurm_dir}/bin/sprio"
 cset squeue      "${slurm_dir}/bin/squeue"
 cset srun        "${slurm_dir}/bin/srun"
 cset sreport     "${slurm_dir}/bin/sreport"
 cset sshare      "${slurm_dir}/bin/sshare"
 cset sstat       "${slurm_dir}/bin/sstat"
 cset strigger    "${slurm_dir}/bin/strigger"

 cset slurmd      "${slurm_dir}/sbin/slurmd"
 cset slurmrestd  "${slurm_dir}/sbin/slurmrestd"

 cset pbsnodes    "${slurm_dir}/bin/pbsnodes"
 cset qdel        "${slurm_dir}/bin/qdel"
 cset qstat       "${slurm_dir}/bin/qstat"
 cset qsub        "${slurm_dir}/bin/qsub"
 cset qalter      "${slurm_dir}/bin/qalter"
 cset qrerun      "${slurm_dir}/bin/qrerun"

 cset seff        "${slurm_dir}/bin/seff"

 cset lsid      	 "${slurm_dir}/bin/lsid"
 cset bjobs     	 "${slurm_dir}/bin/bjobs"
 cset bkill     	 "${slurm_dir}/bin/bkill"
 cset bsub     	 "${slurm_dir}/bin/bsub"

 cset influx	"/usr/bin/influx"
 # If using MPICH-2 or other version of MPI requiring pmi libary, use this
 #cset mpicc	"/home/jette/mpich2-install/bin/mpicc"
 #cset use_pmi	1
 # OR for other versions of MPICH, use this
 cset mpicc	"/usr/local/bin/mpicc"
 cset nvcc	"/usr/bin/nvcc"
 cset use_pmi	0
 #cset upcc       "/usr/local/bin/upcc"
 cset upcc       "/usr/bin/xlupc"
 cset oshcc      "/usr/local/bin/oshcc"

 cset mpirun	"mpirun"
 cset totalviewcli	"/usr/local/bin/totalviewcli"

 # Set if using "--enable-memory-leak-debug" configuration option
 cset enable_memory_leak_debug 0

 # test_prompt: to be used as prompt for interactive shells (see proc reset_bash_prompt)
 set test_prompt  "TEST_PROMPT: "

 #
 # Specify locations of other executable files used
 # Only the shell names (e.g. bin_bash) must be full pathnames
 #
 cset bin_awk	"awk"
 cset bin_bash   [exec which bash | tail -n 1]
 cset bin_cat	"cat"
 cset bin_cc	"gcc"
 cset bin_chmod	"chmod"
 cset bin_cmp	"cmp"
 cset bin_cp	"cp"
 cset bin_date	"date"
 cset bin_diff	"diff"
 cset bin_echo	"echo"
 cset bin_env	"env"
 cset bin_file	"file"
 cset bin_id	"id"
 cset bin_ip	"ip"
 cset bin_jq	"jq"
 cset bin_grep   "grep"
 cset bin_head   "head"
 cset bin_ln     "ln"
 cset bin_perldoc "/usr/bin/perldoc"
 cset bin_py3     "python3"
 cset bin_oasgen  "openapi-generator-cli"

 # Don't user $bin_hostname, use $bin_printenv SLURMD_NODENAME
 cset bin_hostname "hostname"

 cset bin_kill	"kill"
 cset bin_lscpu	"lscpu"
 cset bin_make	"make"
 cset bin_mv	"mv"
 cset bin_od	"od"
 cset bin_pkill	"pkill"
 cset bin_printenv "printenv"
 cset bin_ps	"ps"
 cset bin_pwd	"pwd"
 cset bin_rm	"rm"
 cset bin_sed	"sed"
 cset bin_sleep  "sleep"
 cset bin_sort	"sort"
 cset bin_socat	"socat"
 cset bin_sum	"sum"
 cset bin_sudo	"sudo"
 cset bin_systemd_detect_virt "systemd-detect-virt"
 cset bin_touch	"touch"
 cset bin_true	"true"
 cset bin_uname	"uname"
 cset bin_uniq	"uniq"
 cset bin_unshare "unshare"
 cset bin_virtualenv "virtualenv"
 cset bin_wc	"wc"

 #
 # Let the commands complete without expect timing out waiting for a
 # response. Single node jobs submitted to the default partition should
 # be initiated within this number of seconds.
 # for interactive slurm jobs: cset timeout $max_job_delay
 #
 cset max_job_delay 120

 #
 # Specify the maximum number of tasks to use in the stress tests.
 #
 cset max_stress_tasks 4

 #
 # The error message that the "sleep" command prints when we run "sleep aaa".
 #
 cset sleep_error_message "(invalid time interval)|(bad character in argument)|(usage: sleep seconds)"

 # Force LANG, as the expect tests aren't localized
 set ::env(LANG)          "en_US.UTF-8"

 # Testsuite level variables
 cset testsuite_dir "[file dirname [file normalize [info script]]]"
 cset testsuite_shared_dir "[$bin_pwd]"

 # Testsuite non-privileged user (set it in globals.local)
 cset testsuite_user ""

 # Testsuite log variables
 cset testsuite_log_level $LOG_LEVEL_DEBUG
 cset testsuite_log_format "\[%{timestamp}s.%{msecs}03d] %{loglevel}-7s %{message}s \(%{backtrace}s)"
 cset testsuite_time_format "%Y-%m-%d %H:%M:%S"

 # Default to using color if writing to a terminal and not if writing to a file
 cset testsuite_colorize        [dict exists [fconfigure stdout] -mode]
 cset testsuite_color_fatal     $COLOR_RED
 cset testsuite_color_error     $COLOR_RED_NORMAL
 cset testsuite_color_warn      $COLOR_ORANGE
 cset testsuite_color_info      $COLOR_YELLOW
 cset testsuite_color_pass      $COLOR_GREEN
 cset testsuite_color_command   $COLOR_CYAN
 cset testsuite_color_debug     $COLOR_BLUE
 cset testsuite_color_trace     $COLOR_MAGENTA
 cset testsuite_color_header    $COLOR_NONE
 cset testsuite_color_success   $COLOR_GREEN
 cset testsuite_color_failure   $COLOR_RED
 cset testsuite_color_skipped   $COLOR_ORANGE

 # Set to true to cause the first subtest failure to immediately end the test
 cset testsuite_subtest_fatal   false
 # Set to all, fail_skip, fail or none print datails of subtests and testprocs
 cset testsuite_subtest_details  fail
 cset testsuite_testproc_details fail
 cset testsuite_testproc_log_calls ignore_skips

 # To automatically call cleanup or not when ending the test
 cset testsuite_cleanup_on_failure true
 if {[info exists env(SLURM_TESTSUITE_CLEANUP_ON_FAILURE)]} {
 	set testsuite_cleanup_on_failure $env(SLURM_TESTSUITE_CLEANUP_ON_FAILURE)
 }

 # To avoid potential infinite loops due calls to fail/pass/skip inside
 # custom cleanup procs (_test_fini should be called only once)
 set _test_fini_called false

 # Testproc internal variables
 set _testproc_included    [list]
 set _testproc_excluded    [list]
 set _testproc_pass_list   [list]
 set _testproc_skip_list   [list]
 set _testproc_fail_list   [list]
 set _testproc_messages    [dict create]
 set _testproc_skip_next   false
 set _testproc_skip_reason ""
 set _incomplete_reason    ""
 set _subtest_messages     [dict create]

 set _subtest_pass_count   0
 set _subtest_skip_count   0
 set _subtest_fail_count   0

 set STATUS_PASS 0
 set STATUS_FAIL 1
 set STATUS_SKIP -1

 # Final test status
 set test_status $STATUS_PASS

 # Other common variables
 set re_word_str          "\\S+"
 set digit                "\\d"
 set eol                  "\r?\n"
 set float                "\\d+\\.?\\d*"
 set number               "\\d+"
 set format_time          "\\d+\\:\\d+\\:\\d+"
 set number_with_suffix   "\\d+\[KM\]*"
 set slash                "/"
 set whitespace		 "\\s+"
 set controlmachine_regex "\\S+"
 # Any characters except ( , : newline
 set no_delim             "\[^(,:\r\n\]"
 set no_delim_slash       "\[^(,:/\r\n\]"
 # The first group matches GRES name
 # The second **optional** group matches GRES type.
 # The third group matches GRES count.
 # Test out the regex here: https://regex101.com/r/FlNYKM/7
 set gres_regex "($no_delim_slash*):($no_delim*)?:?($no_delim*)?:?($no_delim*)"

 #basic #defines in slurm.h
 set NO_VAL 0xfffffffe
 set INFINITE 0xffffffff
 set SLURM_MAX_NORMAL_STEP_ID 0xfffffff0
 set SLURM_EXTERN_CONT 0xfffffffc
 set SLURM_BATCH_SCRIPT 0xfffffffb


 #
 # Global variable used in multiple functions in "globals" file
 #
 set gpu_sock_list {}

 #
 # Procedure return values
 #
 set RETURN_SUCCESS 0
 set RETURN_ERROR   1
 set RETURN_TIMEOUT 110 ; # ETIMEDOUT


 ################################################################
 #
 # NAME
 #	fail - fails a test
 #
 # SYNOPSIS
 #	fail message
 #
 # DESCRIPTION
 #	To be used when an error is fatal for the test. This routine
 #	prints the specified error message, optionally cleans up, prints
 #	a final test failure message, and exits the test with exit code 1.
 #
 # ENVIRONMENT
 #	Whether or not the cleanup procedure is called depends on the setting
 #	of the $testsuite_cleanup_on_failure set in the globals.local file or
 #	overridden with the SLURM_TESTSUITE_CLEANUP_ON_FAILURE environment
 #	variable.
 #
 # NOTE
 #	DO NOT call this within your local cleanup procedure.
 #
 ################################################################

 proc fail { message } {
 	global _incomplete_reason STATUS_FAIL

 	# Avoid recursive calls from within cleanup
 	if {[info level] > 1 && [lindex [info level -1] 0] eq "cleanup"} {
 		log_error "Local cleanup shouldn't call pass, fail or skip"
 		return
 	}

 	log_fatal $message
 	set _incomplete_reason $message

 	# _test_fini will handle cleanup and print the failure message.
 	_test_fini $STATUS_FAIL
 }


 ################################################################
 #
 # NAME
 #	skip - skips a test
 #
 # SYNOPSIS
 #	skip message
 #
 # DESCRIPTION
 #	To be used when a precondition for the test fails and the test
 #	should be skipped. This routine prints the specified warning message,
 #	calls the cleanup procedure if defined, prints a final test skipped
 #	message, and exits the test with exit code -1 (aka 255).
 #
 # NOTE
 #	DO NOT call this within your local cleanup procedure.
 #
 ################################################################

 proc skip { message } {
 	global _incomplete_reason STATUS_SKIP

 	# Avoid recursive calls from within cleanup
 	if {[info level] > 1 && [lindex [info level -1] 0] eq "cleanup"} {
 		log_error "Local cleanup shouldn't call pass, fail or skip"
 		return
 	}

 	log_warn $message
 	set _incomplete_reason $message

 	# _test_fini will handle cleanup and print the skipped message.
 	_test_fini $STATUS_SKIP
 }


 ################################################################
 #
 # NAME
 #	pass - passes a test
 #
 # SYNOPSIS
 #	pass
 #
 # DESCRIPTION
 #	To be used when a test passes and should complete with success.
 #	This routine calls the cleanup procedure if defined, prints a final
 #	test success message, and exits with exit code 0.
 #
 # NOTE
 #	DO NOT call this within your local cleanup procedure.
 #
 ################################################################

 proc pass { } {
 	global STATUS_PASS

 	# Avoid recursive calls from within cleanup
 	if {[info level] > 1 && [lindex [info level -1] 0] eq "cleanup"} {
 		log_error "Local cleanup shouldn't call pass, fail or skip"
 		return
 	}

 	# _test_fini will handle cleanup and print the success message.
 	_test_fini $STATUS_PASS
 }


 ################################################################
 #
 # NAME
 #	subpass - registers a passing subtest result
 #
 # SYNOPSIS
 #	subpass ?description?
 #
 # DESCRIPTION
 #	Increments the subtest pass count and logs a passing subtest message
 #
 # ARGUMENTS
 #	description
 #		A single-line string describing the subtest being verified
 #
 ################################################################

 proc subpass args {
 	global _subtest_fail_count _subtest_pass_count _subtest_skip_count
 	global _subtest_messages

 	set description ""
 	set argument_count [llength $args]
 	if {$argument_count == 1} { set args [lassign $args description] }
 	if {$argument_count > 1} {
 		fail "Too many arguments ($argument_count): $args"
 	}

 	set subtest_count [expr $_subtest_pass_count + $_subtest_fail_count + $_subtest_skip_count + 1]
 	incr _subtest_pass_count
 	set message [format "Subtest %2d passed" $subtest_count]
 	if {$description ne ""} { append message "  : $description" }
 	log_pass $message
 	dict set _subtest_messages $subtest_count [list pass $message]
 }


 ################################################################
 #
 # NAME
 #	subfail - registers a failing subtest result
 #
 # SYNOPSIS
 #	subfail ?options? ?description? ?diagnostics?
 #
 # DESCRIPTION
 #	Increments the subtest failure count and logs a failing subtest message
 #
 # OPTIONS
 #	-fatal
 #		Causes this subtest failure to be fatal, ending the test
 # ARGUMENTS
 #	description
 #		A single-line string describing the condition being verified
 #	diagnostics
 #		A string providing additional diagnostic information that will
 #		be included with the log message
 #
 # ENVIRONMENT
 #	testsuite_subtest_fatal
 #		Specifies whether first failing subtest aborts the test
 #
 ################################################################

 proc subfail args {
 	global _subtest_fail_count _subtest_pass_count _subtest_skip_count
 	global testsuite_subtest_fatal
 	global _subtest_messages

 	set description ""
 	set fatal       false
 	while {[llength $args]} {
 		switch -glob -- [lindex $args 0] {
 			-fatal   {set fatal true; set args [lrange $args 1 end]}
 			-*       {fail "Unknown option: [lindex $args 0]"}
 			default  break
 		}
 	}
 	set argument_count [llength $args]
 	if {$argument_count >= 1} { set args [lassign $args description] }

 	set subtest_count [expr $_subtest_pass_count + $_subtest_fail_count + $_subtest_skip_count + 1]
 	incr _subtest_fail_count
 	set message [format "Subtest %2d failed" $subtest_count]
 	if {$description ne ""} { append message "  : $description" }
 	if [llength $args] { append message " (" [join $args ", "] ")" }
 	if {$fatal || $testsuite_subtest_fatal} {
 		fail $message
 	} else {
 		log_error $message
 	}
 	dict set _subtest_messages $subtest_count [list fail $message]
 }


 ################################################################
 #
 # NAME
 #	subskip - registers a skipped subtest result
 #
 # SYNOPSIS
 #	subskip ?options? ?description?
 #
 # DESCRIPTION
 #	Increments the subtest skip count and logs a skipped subtest message
 #
 # OPTIONS
 #	-count NUMBER
 #		When used with -skip, indicates the number of subtests that
 #		were skipped
 # ARGUMENTS
 #	description
 #		A single-line string describing the reason the subtest is
 #		being skipped
 #
 ################################################################

 proc subskip args {
 	global _subtest_fail_count _subtest_pass_count _subtest_skip_count
 	global _subtest_messages

 	set description ""
 	set count       1
 	set nolog       0
 	while {[llength $args]} {
 		switch -glob -- [lindex $args 0] {
 			-nolog {
 				set nolog 1
 				# Shift args down one
 				set args [lassign $args -]
 			}
 			-count   {set args [lassign $args - count]}
 			-*       {fail "Unknown option: [lindex $args 0]"}
 			default  break
 		}
 	}
 	set argument_count [llength $args]
 	if {$argument_count == 1} { set args [lassign $args description] }
 	if {$argument_count > 1} {
 		fail "Too many arguments ($argument_count): $args"
 	}

 	set subtest_count [expr $_subtest_pass_count + $_subtest_fail_count + $_subtest_skip_count + 1]
 	incr _subtest_skip_count $count
 	if {$count > 1} {
 		set message "Subtest $subtest_count-[expr $subtest_count+$count-1] skipped"
 	} else {
 		set message [format "Subtest %2d skipped" $subtest_count]
 	}
 	if {$description ne ""} { append message " : $description" }
 	if {$nolog != 1} {
 		log_warn $message
 	}
 	dict set _subtest_messages [expr $subtest_count] [list skip $message]
 }


 ################################################################
 #
 # NAME
 #	get_subtest_fail_count - returns the current subtest failure count
 #
 # SYNOPSIS
 #	get_subtest_fail_count
 #
 ################################################################

 proc get_subtest_fail_count {} {
 	global _subtest_fail_count

 	return $_subtest_fail_count
 }


 ################################################################
 #
 # NAME
 #	reset_bash_prompt - used in expect body to reset bash prompt
 #
 # SYNOPSIS
 #	reset_bash_prompt
 #
 # DESCRIPTION
 #	The procedure must be called within an expect body to
 #	reset the bash prompt to the $test_prompt.
 #
 # EXAMPLE
 #	spawn $salloc $bin_bash
 #	expect {
 #		-re "Granted job allocation ($number)" {
 #			set job_id $expect_out(1,string)
 #			reset_bash_prompt
 #			exp_continue
 #		}
 #		-re $test_prompt {}; # Expect initial test prompt from bash
 #		timeout {
 #			fail "salloc not responding or prompt not recognized"
 #		}
 #	}
 #
 ################################################################

 proc reset_bash_prompt {} {
 	global testsuite_dir

 	upvar spawn_id sid
 	send -i $sid "source $testsuite_dir/reset_bash_prompt.sh\n"
 }


 ################################################################
 #
 # NAME
 #	print_time - prints the current date and time
 #
 # SYNOPSIS
 #	print_time
 #
 ################################################################

 proc print_time { } {
 	global bin_date

 	spawn $bin_date
 	expect {
 		eof {
 			wait
 		}
 	}

 	return
 }

 ################################################################
 #
 # NAME
 #	dict_getdef - 'dict get' with ability to specify the default value
 #
 # SYNOPSIS
 #	dict_getdef dictionary_value key default_value
 #
 # DESCRIPTION
 #	Tcl < 8.7 lacks a built in 'dict get' with ability to specify the
 #	default value. Tcl 8.7 adds a dict getdef.
 #	This proc returns the value from the dictionary corresponding to the
 #	keys if it exists, or the default value otherwise.
 #
 # EXAMPLE
 #	dict_getdef $option_dict action "warn"
 #
 # SOURCE
 #	https://core.tcl-lang.org/tips/doc/trunk/tip/342.md
 #	https://core.tcl-lang.org/tcl/tktview/2370575
 #
 ################################################################

 proc dict_getdef {D args} {
 	if {[dict exists $D {*}[lrange $args 0 end-1]]} then {
 		dict get $D {*}[lrange $args 0 end-1]
 	} else {
 		lindex $args end
 	}
 }


 ################################################################
 #
 # NAME
 #	_line_trace - returns an abbreviated call stack trace with line numbers
 #
 # SYNOPSIS
 #	_line_trace
 #
 ################################################################

 proc _line_trace {} {
 	set line_trace  ""
 	set first_entry true
 	for {set f [expr [info frame] - 3]} {$f >= 1} {incr f -1} {
 		set frame_dict [info frame $f]
 		if [dict exists $frame_dict file] {
 			if [regexp uplevel [dict get $frame_dict cmd]] {
 				continue
 			}
 			if {$first_entry} {
 				set first_entry false
 			} else {
 				append line_trace ","
 			}
 			if [dict exists $frame_dict proc] {
 				set proc [namespace tail [dict get $frame_dict proc]]
 				if {$proc ne ""} {
 					append line_trace "$proc\@"
 				}
 			}
 			append line_trace [file tail [dict get $frame_dict file]]
 			if [dict exists $frame_dict line] {
 				append line_trace ":[dict get $frame_dict line]"
 			}
 		}
 	}
 	return $line_trace
 }


 ################################################################
 #
 # NAME
 #	tolerance - determines whether a value is within a specified tolerance
 #
 # SYNOPSIS
 #	tolerance expected observed tolerance_expression
 #
 # ARGUMENTS
 #	expected
 #		the expected (numeric) value
 #	observed
 #		the observed (numeric) value
 #	tolerance_expression
 #		a string of the form: [~][+|-]<tolerance>[%]
 #
 # DESCRIPTION
 #	tolerance
 #		A numeric tolerance
 #	symmetry
 #		By default the permitted range of values is symetric:
 #			[expected - tolerance, expected + tolerance]
 #		If the + sign is specified, the tolerance is limited to the
 #		the higher side only:
 #			[expected, expected + tolerance]
 #		If the - sign is specified, the tolerance is limited to the
 #		the lower side only:
 #			[expected - tolerance, expected]
 #	percent
 #		By default the permitted range is computed as absolute values:
 #			[expected - tolerance, expected + tolerance]
 #		If % is specified, the permitted range is computed as a
 #		percentage of the expected value:
 #			[expected*(1-tolerance/100), expected*(1+tolerance/100)]
 #	exclusivity
 #		By default the permitted range of values is inclusive, ie
 #		the min and max tolerated values are included in the range:
 #			[expected - tolerance, expected + tolerance]
 #		If ~ (exclusive) is specified, the tolerance limits are
 #		exclusive, ie the min and max tolerated values are excluded:
 #			(expected - tolerance, expected + tolerance)
 #	expression
 #		any combination of symetry, percent and exclusivity is allowed
 #
 # RETURN VALUE
 #	Returns true if the observed value is within the specified tolerance
 #	range of the expected value, otherwise false
 #
 # EXAMPLES
 #	The indicated tolerance_expression is true if:
 #	"5"     expected - 5  <= observed <= expected + 5
 #	"-5"    expected - 5  <= observed <= expected
 #	"+5"    expected      <= observed <= expected + 5
 #	"5%"    expected - 5% <= observed <= expected + 5%
 #	"~5"    expected - 5  <  observed <  expected + 5
 #	"~+5%"  expected      <= observed <  expected + 5%
 #
 ################################################################

 proc tolerance { expected observed tolerance_expression } {
 	if {![regexp {^(~?)([-+]?)([0-9\.]+)(%?)$} $tolerance_expression {} exclusive sign tolerance percent]} {
 		fail "Invalid tolerance expression ($tolerance_expression)"
 	}

 	set lower_bound_expression $observed
 	if {$sign eq "+" || $exclusive ne "~"} {
 		append lower_bound_expression " >="
 	} else {
 		append lower_bound_expression " >"
 	}
 	append lower_bound_expression " $expected"
 	if {$sign eq "-" || $sign eq ""} {
 		if {$percent eq "%"} {
 			append lower_bound_expression " - $tolerance * $expected / 100"
 		} else {
 			append lower_bound_expression " - $tolerance"
 		}
 	}

 	set upper_bound_expression $observed
 	if {$sign eq "-" || $exclusive ne "~"} {
 		append upper_bound_expression " <="
 	} else {
 		append upper_bound_expression " <"
 	}
 	append upper_bound_expression " $expected"
 	if {$sign eq "+" || $sign eq ""} {
 		if {$percent eq "%"} {
 			append upper_bound_expression " + $tolerance * $expected / 100"
 		} else {
 			append upper_bound_expression " + $tolerance"
 		}
 	}

 	if {[expr $lower_bound_expression] && [expr $upper_bound_expression]} {
 		log_debug "$observed is within tolerance $tolerance_expression of $expected"
 		return true
 	} else {
 		log_warn "$observed is not within tolerance $tolerance_expression of $expected"
 		return false
 	}
 }


 ################################################################
 #
 # NAME
 #	check_run_as_user - check if the caller may run_command as the supplied user
 #
 # SYNOPSIS
 #	check_run_as_user user
 #
 # DESCRIPTION
 #	Note that a proper sudo config needs to be set in orther to pass this
 #	check. Calling user should be permitted to run_commands as the
 #	supplied user using sudo without password.
 #	See the -user option of run_command.
 #	This proc also log_warn a message if user already exists in the DB
 #	because most probably this user is testsuite_user and that user is
 #	expected NOT to be in the DB and could potentially be removed from it by
 #	the test.
 #
 # RETURN VALUE
 #	Returns a boolean value indicating whether the calling user may
 #	run_command as user.
 #
 ################################################################

 proc check_run_as_user user {
 	global bin_id

 	if {$user eq ""} {
 		return false
 	}
 	if {[run_command_status -none -user $user "$bin_id -un"]} {
 		return false
 	}

 	if {[get_admin_level $user] != ""} {
 		log_warn "User $user already exists in DB, but it's probable that it's going to be removed by the test cleanup"
 	}

 	return true
 }


 ################################################################
 #
 # NAME
 #	check_user_id - check if user exists in the system
 #
 # SYNOPSIS
 #	check_user_id user
 #
 # DESCRIPTION
 #	This proc also log_warn a message if user already exists in the DB
 #	because most probably this user is testsuite_user and that user is
 #	expected NOT to be in the DB and could potentially be removed from
 #	it by the test.
 #
 # RETURN VALUE
 #	Returns true if the user passed exists in the system, false otherwise.
 #
 ################################################################

 proc check_user_id user {
 	global bin_id

 	if {$user eq ""} {
 		return false
 	}
 	if {[run_command_status -none "$bin_id -un $user"]} {
 		return false
 	}

 	if {[get_admin_level $user] != ""} {
 		log_warn "User $user already exists in DB, but it's probable that it's going to be removed by the test cleanup"
 	}

 	return true
 }


 ################################################################
 #
 # NAME
 #	run_command - executes a command and returns a dictionary result
 #
 # SYNOPSIS
 #	run_command ?options? command
 #
 # DESCRIPTION
 #	Executes a command and returns a dictionary that includes the output,
 #	exit code, etc. An action can be taken (fail, warn, subtest, none) if
 #	the command's exit code is unexpected. By default, the action
 #	will be applied if the command fails. If the -xfail option is
 #	specified, the behavior will be reversed to apply the action if the
 #	command ran successfully.
 #	A timeout is always treated as unexpected, so log_error will be shown
 #	by default, or fail/subfail will be called if -fail/-subtest are used.
 #
 # OPTIONS
 #	-fail
 #		If the exit code is unexpected, the action that will
 #		be taken is to fail the test
 #	-subtest
 #		If the exit code is unexpected, the action that will
 #		be taken is to subfail, otherwise subpass will be called
 #	-warn
 #		If the exit code is unexpected, the action that will
 #		be taken is to log a warning (this is the default)
 #	-none
 #		If the exit code is unexpected, no action will be taken
 #	-xfail
 #		If the command exits with zero the action will be applied.
 #		Without this option, the action will be applied if the
 #		command exits with a non-zero exit code.
 #	-timeout <float_number>
 #		Time in seconds to wait for the command to complete before
 #		timing out (default is 60.0)
 #	-nolog
 #		Logging for this command will occur at trace threshold only
 #	-stdin
 #		Provide standard in to be piped into command
 #	-user <user>
 #		Attempt to execute command as <user>. Note that sudo must be
 #		properly configured to permit the caller to execute as <user>.
 #		See check_run_as_user.
 #
 # ARGUMENTS
 #	command
 #		a string containing the command and arguments to execute
 #
 # RETURN VALUE
 #	A dictionary containing the following elements:
 #		command    - The command that was invoked
 #		exit_code  - Exit code
 #		output     - The combined standard output and standard error
 #		start_time - The time (with ms) the command was executed
 #		duration   - The duration (seconds and milliseconds) the
 #		             command took to run
 #
 ################################################################
 proc run_command args {
 	global bin_bash bin_sudo

 	set alt_user ""
 	set exit_status 0
 	set timedout false
 	set output ""
 	set action "warn"
 	set timeout 60
 	set expect_failure false
 	set log_at_trace_level false
 	set stdin ""
 	while {[llength $args]} {
 		switch -glob -- [lindex $args 0] {
 			-fail    {set action "fail"; set args [lrange $args 1 end]}
 			-subtest {set action "subtest"; set args [lrange $args 1 end]}
 			-none    {set action "none"; set args [lrange $args 1 end]}
 			-timeout {set args [lassign $args - timeout]}
 			-warn    {set action "warn"; set args [lrange $args 1 end]}
 			-xfail   {set expect_failure true; set args [lrange $args 1 end]}
 			-nolog   {set log_at_trace_level true; set args [lrange $args 1 end]}
 			-stdin   {set stdin [lindex $args 1]; set args [lassign $args - stdin]}
 			-user    {set args [lassign $args - alt_user]}
 			-*       {fail "Unknown option: [lindex $args 0]"}
 			default  break
 		}
 	}
 	if {[llength $args] == 1} {
 		lassign $args command
 	} else {
 		fail "Invalid number of arguments [llength $args]: $args"
 	}

 	if {$action eq "subtest"} {
 		if {$expect_failure} {
 			set test_description "Command \"$command\" should fail"
 		} else {
 			set test_description "Command \"$command\" should succeed"
 		}
 	}

 	if {$log_at_trace_level} {
 		interp alias {} log_run     {} log_trace
 		interp alias {} log_details {} log_trace
 	} else {
 		interp alias {} log_run     {} log_command
 		interp alias {} log_details {} log_debug
 	}

 	set orig_log_user [log_user -info]
 	log_user 0

 	if {$alt_user ne ""} {
 		log_run "Run Command as user $alt_user: $command"
 	} else {
 		log_run "Run Command: $command"
 	}
 	set start_clock_ms [clock milliseconds]
 	set stty_init raw ; # Prevent the terminal from inserting \r
 	if {$alt_user ne ""} {
 		set expect_pid [spawn -noecho $bin_sudo -nu $alt_user $bin_bash -c "$command"]
 	} else {
 		set expect_pid [spawn -noecho $bin_bash -c "$command"]
 	}
 	if { $stdin != "" } {
 		exp_send "$stdin"
 		set command "$command <<< $stdin"
 	}
 	expect {
 		-re "(.+)" {
 			append output $expect_out(1,string)
 			exp_continue
 		}
 		timeout {
 			slow_kill $expect_pid
 			set exit_status $::RETURN_TIMEOUT
 			set timedout true
 		}
 		eof {
 			lassign [wait] pid spawnid os_error_flag errno
 			set exit_status [expr $errno > 128 ? $errno - 256 : $errno]
 		}
 	}
 	set start_time [format "%.3f" [expr $start_clock_ms / 1000.000]]
 	set end_time [format "%.3f" [expr [clock milliseconds] / 1000.000]]
 	set duration [format "%.3f" [expr $end_time - $start_time]]

 	log_details "Command Results:"
 	log_details "	Duration:   $duration"
 	log_details "	Exit Code:  $exit_status"
 	if {[info exists output]} {
 		log_details "	Output: $output"
 	}

 	if {$timedout} {
 		set message "Command \"$command\" timed out after $timeout seconds"
 		if {$action eq "fail"} {
 			fail $message
 		} elseif {$action eq "subtest"} {
 			subfail $test_description $message
 		} else {
 			log_error $message
 		}
 	} elseif {! $expect_failure && $exit_status != 0} {
 		set message "Command \"$command\" failed with rc=$exit_status"
 		if {[info exists output] && $output != ""} {
 			append message ": [string trimright $output]"
 		}
 		if {$action eq "warn"} {
 			log_warn $message
 		} elseif {$action eq "subtest"} {
 			subfail $test_description $message
 		} elseif {$action eq "fail"} {
 			fail $message
 		}
 	} elseif {$expect_failure && $exit_status == 0} {
 		set message "Command \"$command\" was expected to fail but succeeeded"
 		if {$action eq "warn"} {
 			log_warn $message
 		} elseif {$action eq "subtest"} {
 			subfail $test_description $message
 		} elseif {$action eq "fail"} {
 			fail $message
 		}
 	} elseif {$action eq "subtest"} {
 		subpass $test_description
 	}
 	log_user $orig_log_user

 	dict set result command    $command
 	dict set result exit_code  $exit_status
 	dict set result output     $output
 	dict set result start_time $start_time
 	dict set result duration   $duration

 	return $result
 }


 ################################################################
 #
 # NAME
 #	run_command_output - executes a command and returns the output
 #
 # SYNOPSIS
 #	run_command_output ?options? command
 #
 # DESCRIPTION
 #	Executes a command and returns a dictionary that includes the output,
 #	exit code, etc. An action can be taken (fail, warn, none)  if the
 #	command's exit code or timeout is unexpected. By default, the action
 #	will be applied if the command fails. If the -xfail option is
 #	specified, the behavior will be reversed to apply the action if the
 #	command ran successfully.
 #
 # OPTIONS
 #	-fail
 #		if the exit code or timeout is unexpected, the action that will
 #		be taken is to fail the test
 #	-warn
 #		if the exit code or timeout is unexpected, the action that will
 #		be taken is to log a warning (this is the default)
 #	-none
 #		if the exit code or timeout is unexpected, no action will be
 #		taken
 #	-xfail
 #		if the command exits with zero and does not time out, the
 #		action will be applied. Without this option, the action will
 #		be applied if the command exits with a non-zero exit code or
 #		times out.
 #	-timeout <float_number>
 #		time in seconds to wait for the command to complete before
 #		timing out (default is 60.0)
 #	-stdin
 #		Provide standard in to be piped into command
 #
 # ARGUMENTS
 #	command
 #		a string containing the command and arguments to execute
 #
 # RETURN VALUE
 #	A string containing the combined standard output and standard error
 #
 ################################################################

 proc run_command_output args {

 	set result [run_command {*}$args]

 	if [dict exists $result output] {
 		return [dict get $result output]
 	} else {
 		return ""
 	}
 }


 ################################################################
 #
 # NAME
 #	run_command_status - executes a command and returns the exit code
 #
 # SYNOPSIS
 #	run_command_status ?options? command
 #
 # DESCRIPTION
 #	Executes a command and returns a dictionary that includes the output,
 #	exit code, etc. An action can be taken (fail, warn, none)  if the
 #	command's exit code or timeout is unexpected. By default, the action
 #	will be applied if the command fails. If the -xfail option is
 #	specified, the behavior will be reversed to apply the action if the
 #	command ran successfully.
 #
 # OPTIONS
 #	-fail
 #		if the exit code or timeout is unexpected, the action that will
 #		be taken is to fail the test
 #	-warn
 #		if the exit code or timeout is unexpected, the action that will
 #		be taken is to log a warning (this is the default)
 #	-none
 #		if the exit code or timeout is unexpected, no action will be
 #		taken
 #	-xfail
 #		if the command exits with zero and does not time out, the
 #		action will be applied. Without this option, the action will
 #		be applied if the command exits with a non-zero exit code or
 #		times out.
 #	-timeout <float_number>
 #		time in seconds to wait for the command to complete before
 #		timing out (default is 60.0)
 #	-stdin
 #		Provide standard in to be piped into command
 #
 # ARGUMENTS
 #	command
 #		a string containing the command and arguments to execute
 #
 # RETURN VALUE
 #	The exit code for the invoked command
 #
 ################################################################

 proc run_command_status args {

 	set result [run_command {*}$args]

 	return [dict get $result exit_code]
 }


 ################################################################
 #
 # NAME
 #	cancel_job - cancels the specified job list
 #
 # SYNOPSIS
 #	cancel_job job_id_list ?het_job?
 #
 # DESCRIPTION
 #	Cancels one or more jobs. A job_id of 0 will be silently ignored.
 #
 # OPTIONS
 #	-fail
 #		if scancel fails with exit code or timeout, or the job doesn't
 #		end, test will will fail
 #
 # ARGUMENTS
 #	job_id_list
 #		The list of Slurm job ids that we want to cancel
 #	het_job
 #		1 if jobs are hetjobs and we want to confirm each
 #		component has completed
 #
 # RETURN VALUE
 #	RETURN_SUCCESS if jobs are cancelled, or non-zero value otherwise.
 #
 ################################################################

 proc cancel_job args {
 	global scancel

 	set fatal         false
 	while {[llength $args]} {
 		switch -glob -- [lindex $args 0] {
 			-fail   {set fatal true; set args [lrange $args 1 end]}
 			-*      {fail "Unknown option: [lindex $args 0]"}
 			default break
 		}
 	}

 	set argument_count [llength $args]
 	if {$argument_count < 1} {
 		fail "Too few arguments ($argument_count): $args"
 	} elseif {$argument_count > 2} {
 		fail "Too many arguments ($argument_count): $args"
 	}
 	lassign $args job_id_list
 	if {$argument_count == 2} {
 		set het_job [lindex $args 1]
 	} else {
 		set het_job 0
 	}

 	set job_list_clean [list]
 	foreach job_id $job_id_list {
 		if {$job_id != 0} {
 			lappend job_list_clean $job_id
 		}
 	}

 	if {![llength $job_list_clean]} {
 		return $::RETURN_SUCCESS
 	}

 	log_debug "Cancelling $job_list_clean"
 	set result [run_command "$scancel -Q $job_list_clean"]
 	if {[dict get $result exit_code]} {
 		set message "scancel command returned an error ([dict get $result output])"
 		if {$fatal} {
 			fail $message
 		} else {
 			log_warn $message
 			return $::RETURN_ERROR
 		}
 	}
 	foreach job_id $job_list_clean {
 		if {[wait_for_job $job_id "DONE" $het_job]} {
 			set message "Job ($job_id) did not end when cancelled"
 			if {$fatal} {
 				fail $message
 			} else {
 				log_warn $message
 				return $::RETURN_ERROR
 			}
 		}
 	}
 	return $::RETURN_SUCCESS
 }


 ################################################################
 #
 # NAME
 #	get_line_cnt - returns the size of the specified file
 #
 # SYNOPSIS
 #	get_line_cnt file_name
 #
 # RETURN VALUE
 #	Number of lines in the specified file.
 #
 ################################################################

 proc get_line_cnt { file_name } {
 	global bin_wc number
 	set lines 0
 	spawn $bin_wc -l $file_name
 	expect {
 		-re "($number) " {
 			set lines $expect_out(1,string)
 			exp_continue
 		}
 		eof {
 			wait
 		}
 	}
 	return $lines
 }


 ################################################################
 #
 # NAME
 #	convert_time_str - Converts a string like [HH:]MM:SS[.sss] to secs
 #
 # SYNOPSIS
 #	Convert the string to a number.
 #
 # DESCRIPTION
 #	Units can be hours, mins, secs.
 #
 # RETURN VALUE
 #	A non-zero return code indicates a failure.
 #
 ################################################################

 proc convert_time_str { time_str units } {
 	subtest {[regexp {(?:(\d+):)?(\d+):(\d+)(?:\.(\d+))?} $time_str - hours mins secs msecs]} "Time should be in the format \[HH:\]MM:SS\[.sss\]" "$time_str"
 	if { $hours eq "" } {
 		set hours 0
 	}

 	# Use scan to avoid TCL octals
 	set value  [expr ([scan $hours "%d"] * 3600) + ([scan $mins "%d"] * 60) + [scan $secs "%d"]]

 	if { $units eq "hours" } {
 		set value [expr $value / 3600]
 	} elseif { $units eq "mins" } {
 		set value [expr $value / 60]
 	}

 	return $value
 }


 ################################################################
 #
 # NAME
 #	slow_kill - kills a process slowly
 #
 # SYNOPSIS
 #	slow_kill pid
 #
 # DESCRIPTION
 #	Kill a process slowly, first trying SIGINT, pausing for
 #       a second, then sending SIGKILL.
 #
 # RETURN VALUE
 #	A non-zero return code indicates a failure.
 #
 ################################################################

 proc slow_kill { pid } {
 	global bin_kill

 	catch {exec $bin_kill -INT $pid}
 	catch {exec $bin_kill -INT $pid}
 	sleep  1
 	catch {exec $bin_kill -KILL $pid}

 	return 0
 }


 ################################################################
 #
 # NAME
 #	get_my_id - gets the id from the running user
 #
 # SYNOPSIS
 #	get_my_id
 #
 # RETURN VALUE
 #	output of id
 #
 ################################################################

 proc get_my_id {} {

 	global bin_id number
 	set login_info -1

 	log_user 0

 	spawn $bin_id
 	expect {
 		-re "(uid=.*\n)" {
 			set login_info $expect_out(1,string)
 			exp_continue
 		}
 		eof {
 			wait
 		}
 	}

 	log_user 1

 	if {$login_info == -1} {
 		fail "Unable to get user info"
 	}

 	return $login_info
 }


 ################################################################
 #
 # NAME
 #	get_my_user_name - gets the name from the running user
 #
 # SYNOPSIS
 #	get_my_user_name
 #
 # RETURN VALUE
 #	A non-zero return code indicates a failure.
 #
 ################################################################

 proc get_my_user_name {  } {
 	global bin_id re_word_str

 	set user_name -1

 	log_user 0
 	spawn $bin_id -nu
 	expect {
 		-re "($re_word_str)" {
 			set user_name $expect_out(1,string)
 			exp_continue
 		}
 		eof {
 			wait
 		}
 	}
 	log_user 1

 	if {$user_name == -1} {
 		fail "Unable to get user name"
 	}

 	return $user_name
 }


 ################################################################
 #
 # NAME
 #	get_my_uid - gets the uid from the running user
 #
 # SYNOPSIS
 #	get_my_uid
 #
 # RETURN VALUE
 #	The uid of the current user, or fails.
 #
 ################################################################

 proc get_my_uid {  } {
 	global bin_id number

 	set out [run_command_output -nolog -fail "$bin_id -u"]
 	if {![regexp "($number)" $out - uid]} {
 		fail "Unable to get UID with $bin_id ($out)"
 	}

 	return $uid
 }


 ################################################################
 #
 # NAME
 #	get_my_gid - gets the gid from the running user
 #
 # SYNOPSIS
 #	get_my_gid
 #
 # RETURN VALUE
 #	A non-zero return code indicates a failure.
 #
 ################################################################

 proc get_my_gid {  } {
 	global bin_id number

 	set gid -1

 	log_user 0
 	spawn $bin_id -g
 	expect {
 		-re "($number)" {
 			set gid $expect_out(1,string)
 			exp_continue
 		}
 		eof {
 			wait
 		}
 	}
 	log_user 1

 	return $gid
 }


 ################################################################
 #
 # NAME
 #	kill_salloc - kills all salloc commands associated with this user
 #
 # SYNOPSIS
 #	kill_salloc
 #
 # DESCRIPTION
 #	Kill all salloc commands associated with this user.
 #	Issue two SIGINT, sleep 1 and a SIGKILL
 #
 # RETURN VALUE
 #	A non-zero return code indicates a failure.
 #
 # NOTE
 #	Use slow_kill instead of kill_salloc if you can capture
 #	the process id
 #
 ################################################################

 proc kill_salloc {  } {
 	global bin_id bin_pkill bin_sleep number

 	set uid [get_my_uid]
 	catch {exec $bin_pkill -INT -u $uid salloc}
 	catch {exec $bin_pkill -INT -u $uid salloc}
 	sleep  1
 	catch {exec $bin_pkill -KILL -u $uid salloc}

 	return 0
 }


 ################################################################
 #
 # NAME
 #	kill_srun - kills all srun commands associated with this user
 #
 # SYNOPSIS
 #	kill_srun
 #
 # DESCRIPTION
 #	Kill all srun commands associated with this user.
 #	Issue two SIGINT, sleep 1 and a SIGKILL
 #
 # RETURN VALUE
 #	A non-zero return code indicates a failure.
 #
 # NOTE
 #	Use slow_kill instead of kill_srun if you can capture
 #	the process id
 #
 ################################################################

 proc kill_srun {  } {
 	global bin_id bin_pkill bin_sleep number

 	set uid [get_my_uid]
 	catch {exec $bin_pkill -INT -u $uid srun}
 	catch {exec $bin_pkill -INT -u $uid srun}
 	sleep  1
 	catch {exec $bin_pkill -KILL -u $uid srun}

 	return 0
 }


 ################################################################
 #
 # NAME
 #	wait_for - generic wait utility
 #
 # SYNOPSIS
 #	wait_for ?options? condition body
 #
 # DESCRIPTION
 #	Generic wait utility allowing you to repeatedly execute a generic block
 #	of code until a specified boolean expression is met. The code block and
 #	condition check occur every poll interval until a timeout is reached.
 #
 # OPTIONS
 #	-fail
 #		fail the test if the condition is not met within the timeout
 #	-subtest
 #		if the condition is met within the timeout call subpass, otherwise
 #		call subfail
 #	-timeout <float_number>
 #		time in seconds to wait for the condition to be met before
 #		timing out (default is 60.0)
 #	-pollinterval <float_number>
 #		time in seconds between each loop execution and condition check
 #		(default is 1.0)
 #
 # ARGUMENTS
 #	condition
 #		The boolean expression to test
 #	body
 #		A block of code to evaluate in the invoking stack frame
 #
 # RETURN VALUE
 #	RETURN_SUCCESS if the condition is met before the timeout occurs,
 #	RETURN_TIMEOUT if the timeout occurs before the condition is met
 #
 ################################################################

 proc wait_for args {
 	set action          "none"
 	set subtest         false
 	set timeout         60
 	set poll_interval   1
 	while {[llength $args]} {
 		switch -glob -- [lindex $args 0] {
 			-fail       {set action "fail"; set args [lrange $args 1 end]}
 			-subtest    {set action "subtest"; set args [lrange $args 1 end]}
 			-time*      {set args [lassign $args - timeout]}
 			-poll*      {set args [lassign $args - poll_interval]}
 			-*          {fail "Unknown option: [lindex $args 0]"}
 			default     break
 		}
 	}
 	if {[llength $args] == 2} {
 		lassign $args condition body
 	} else {
 		fail "Invalid number of arguments [llength $args]: $args"
 	}

 	set start_time [format "%.3f" [expr [clock milliseconds] / 1000.000]]

 	log_debug "Waiting for $condition"

 	while {1} {
 		# Evaluate code block
 		log_trace "Evaluating code block ([string trim $body])"
 		uplevel $body

 		# Check condition
 		if {[uplevel expr [format "{%s}" $condition]]} {
 			set now [format "%.3f" [expr [clock milliseconds] / 1000.000]]
 			log_debug "Condition ($condition) was met"
 			if {$action eq "subtest"} {
 				subpass "Condition ($condition) should be met (within $timeout seconds)"
 			}
 			return $::RETURN_SUCCESS
 		} else {
 			log_trace "Condition ($condition) was not met"
 		}

 		# Sleep poll interval
 		log_trace "Sleeping for $poll_interval seconds"
 		after [expr {int($poll_interval * 1000)}]

 		# Check if we have surpassed our timeout
 		set now [format "%.3f" [expr [clock milliseconds] / 1000.000]]
 		log_trace "Checking whether the current time ([clock format [expr int($now)] -format %Y-%m-%dT%X].[lindex [split $now '.'] 1]) is greater than the start time plus the timeout ([clock format [expr int($start_time + $timeout)] -format %Y-%m-%dT%X].[lindex [split [expr $start_time + $timeout] '.'] 1])"
 		if {$now > $start_time + $timeout} {
 			set message "Condition ($condition) was not met before timeout ($timeout seconds)"
 			if {$action eq "fail"} {
 				fail $message
 			} else {
 				log_warn $message
 				if {$action eq "subtest"} {
 					subfail "Condition ($condition) should be met (within $timeout seconds)" "Timed out"
 				}
 				return $::RETURN_TIMEOUT
 			}
 		}
 	}
 }


 ################################################################
 #
 # NAME
 #	wait_for_command - repeat a command until it is successful or meets a specified condition
 #
 # SYNOPSIS
 #	wait_for_command ?options? command ?condition?
 #
 # DESCRIPTION
 #	A command is repeated until it meets a condition or a timeout is reached.
 #	If a condition is not specified, the command will be repeated until it
 #	is successful (the exit code is zero).
 #
 # OPTIONS
 #	-fail
 #		fail the testif the condition is not met within the timeout
 #	-subtest
 #		if the condition is met within the timeout call subpass, otherwise
 #		call subfail
 #	-timeout <float_number>
 #		time in seconds to wait for the condition to be met before
 #		timing out (default is 60.0)
 #	-pollinterval <float_number>
 #		time in seconds between each loop execution and condition
 #		check (default is 1.0)
 #
 # ARGUMENTS
 #	command
 #		a string containing the command and arguments to execute
 #	condition
 #		The boolean expression to test. For each command invocation,
 #		the result variable will be set to the dictionary returned
 #		from run_command.
 #		The condition expression will normally involve a comparison
 #		with one or more values of this dictionary. If a condition is
 #		not specified, this condition will be used:
 #		{ [dict get $result exit_code] == 0 }
 #
 # RETURN VALUE
 #	RETURN_SUCCESS if the condition is met before the timeout occurs,
 #	RETURN_TIMEOUT if the timeout occurs before the condition is met
 #
 ################################################################

 proc wait_for_command args {
 	set action          "none"
 	set timeout         60
 	set poll_interval   1
 	while {[llength $args]} {
 		switch -glob -- [lindex $args 0] {
 			-fail       {set action "fail"; set args [lrange $args 1 end]}
 			-subtest    {set action "subtest"; set args [lrange $args 1 end]}
 			-time*      {set args [lassign $args - timeout]}
 			-poll*      {set args [lassign $args - poll_interval]}
 			-*          {fail "Unknown option: [lindex $args 0]"}
 			default     break
 		}
 	}

 	set argument_count [llength $args]
 	if {$argument_count < 1} {
 		fail "Too few arguments ($argument_count): $args"
 	} elseif {$argument_count > 2} {
 		fail "Too many arguments ($argument_count): $args"
 	}
 	lassign $args command
 	if {$argument_count == 2} {
 		set condition [lindex $args 1]
 	} else {
 		set condition { [dict get $result exit_code] == 0 }
 	}

 	set start_time [format "%.3f" [expr [clock milliseconds] / 1000.000]]

 	log_debug "Waiting for $condition"

 	while {1} {
 		# Run command
 		set result [run_command $command]

 		# Check condition
 		if {[eval expr [format "{%s}" $condition]]} {
 			set now [format "%.3f" [expr [clock milliseconds] / 1000.000]]
 			log_debug "Condition ($condition) was met"
 			if {$action eq "subtest"} {
 				subpass "Condition ($condition) should be met (within $timeout seconds)"
 			}
 			return $::RETURN_SUCCESS
 		} else {
 			log_trace "Condition ($condition) was not met"
 		}

 		# Sleep poll interval
 		log_trace "Sleeping for $poll_interval seconds"
 		after [expr {int($poll_interval * 1000)}]

 		# Check whether we have surpassed our timeout
 		set now [format "%.3f" [expr [clock milliseconds] / 1000.000]]
 		log_trace "Checking whether the current time ([clock format [expr int($now)] -format %Y-%m-%dT%X].[lindex [split $now '.'] 1]) is greater than the start time plus the timeout ([clock format [expr int($start_time + $timeout)] -format %Y-%m-%dT%X].[lindex [split [expr $start_time + $timeout] '.'] 1])"
 		if {$now > $start_time + $timeout} {
 			set message "Condition ($condition) was not met before timeout ($timeout seconds)"
 			if {$action eq "fail"} {
 				fail $message
 			} else {
 				log_warn $message
 				if {$action eq "subtest"} {
 					subfail "Condition ($condition) should be met (within $timeout seconds)" "Timed out"
 				}
 				return $::RETURN_TIMEOUT
 			}
 		}
 	}
 }


 ################################################################
 #
 # NAME
 #	wait_for_command_match - repeat a command until its output matches the specified pattern
 #
 # SYNOPSIS
 #	wait_for_command_match ?options? command pattern
 #
 # DESCRIPTION
 #	A command is repeated until its output matches the specified pattern
 #
 # OPTIONS
 #	-negate
 #		negates the match. The command is repeated until its output does not
 #		match the specified pattern
 #	-fail
 #		fail the test if the output does not match the pattern within the
 #		timeout
 #	-subfail
 #		if the condition is met within the timeout call subpass, otherwise
 #		call subfail
 #	-timeout <float_number>
 #		time in seconds to wait for the pattern to be matched before
 #		timing out (default is 60.0)
 #	-pollinterval <float_number>
 #		time in seconds between each loop execution and match check
 #		(default is 1.0)
 #
 # ARGUMENTS
 #	command
 #		a string containing the command and arguments to execute
 #	pattern
 #		The regular expression to match against the command output
 #
 # RETURN VALUE
 #	RETURN_SUCCESS if the pattern is matched before the timeout occurs,
 #	RETURN_TIMEOUT if the timeout occurs before the pattern is matched
 #
 ################################################################

 proc wait_for_command_match args {

 	set pattern [lindex $args end]
 	set args [lrange $args 0 end-1]
 	set match 1
 	set idx [lsearch $args "-negate"]
 	if {$idx != -1} {
 		set match 0
 		set args [lreplace $args $idx $idx]; # Remove the -negate argument
 	}

 	return [wait_for_command {*}$args "\[regexp -- {$pattern} \[dict get \$result output\]\] == $match"]
 }


 ################################################################
 #
 # NAME
 #	wait_for_file - waits for a file to exist with non-zero size
 #
 # SYNOPSIS
 #	wait_for_file ?options? file_name
 #
 # OPTIONS
 #	-fail
 #		If an error occurs or the file does not become present
 #		by the timeout, fail the test rather than returning an error
 #	-timeout <integer_number>
 #		time in seconds to wait for the file to exist before
 #		timing out (default is 90)
 #	-pollinterval <integer_number>
 #		time in seconds between each file existence test (default is 1)
 #
 # DESCRIPTION
 #	Wait for the specified file to exist and have a non-zero size.
 #	Note that if JobFileAppend=0 is configured, a file can exist and
 #	be purged then be re-created.
 #
 # RETURN VALUE
 #	RETURN_SUCCESS if the file becomes present within the timeout, or
 #	non-zero value otherwise.
 #
 ################################################################

 proc wait_for_file args {
 	global bin_sleep

 	set fatal         false
 	set timeout       90
 	set poll_interval 1
 	while {[llength $args]} {
 		switch -glob -- [lindex $args 0] {
 			-fatal  -
 			-fail   {set fatal true; set args [lrange $args 1 end]}
 			-time*  {set args [lassign $args - timeout]}
 			-poll*  {set args [lassign $args - poll_interval]}
 			-*      {fail "Unknown option: [lindex $args 0]"}
 			default break
 		}
 	}
 	set argument_count [llength $args]
 	if {$argument_count != 1} {
 		fail "Invalid number of arguments ($argument_count): $args"
 	} else {
 		lassign $args file_name
 	}

 	for {set my_delay 0} {$my_delay <= $timeout} \
 	                     {set my_delay [expr $my_delay + $poll_interval]} {
 		if {[file exists $file_name]} {
 			# Add small delay for I/O buffering
 			exec $bin_sleep 1
 			return $::RETURN_SUCCESS
 		}
 		exec $bin_sleep $poll_interval

 		# Expect may fail to load current NFS info.
 		# Use the ls command to load current info.
 		set slash_pos [string last $file_name "/"]
 		if {$slash_pos < 1} {
 			set dir_name "."
 		} else {
 			decr slash_pos
 			set dir_name [string $file_name 0 $slash_pos]
 		}
 		exec /bin/ls $dir_name
 	}
 	set message "Timeout waiting for file ($file_name)"
 	if {$fatal} {
 		fail $message
 	}

 	log_warn $message
 	return $::RETURN_TIMEOUT
 }


 ################################################################
 #
 # NAME
 #	_wait_for_single_job - waits for a job to reach the desired state
 #
 # SYNOPSIS
 #	_wait_for_single_job ?options? job_id desired_state
 #
 # DESCRIPTION
 #	Wait for a previously submitted Slurm job to reach the desired state.
 #
 # OPTIONS
 #	-fail
 #		If an error occurs or the job does not reach the desired state
 #		by the timeout, fail the test rather than returning an error
 #	-timeout <integer_number>
 #		time in seconds to wait for the job to be in the desired state
 #		before timing out (default is 360)
 #	-pollinterval <integer_number>
 #		time in seconds between each job state check (default is 1)
 #
 # ARGUMENTS
 #	job_id
 #		The Slurm job id of a job we want to wait for.
 #	desired_state
 #		The state you want the job to attain before
 #		returning.  Currently supports:
 #			CANCELLED - job is cancelled
 #			DONE      - any terminated state (includes cancelled)
 #			PENDING   - job is pending
 #			RUNNING   - job is running
 #			SPECIAL_EXIT
 #			SUSPENDED - job is suspended
 #
 # RETURN VALUE
 #	RETURN_SUCCESS, or non-zero on error.
 #
 ################################################################

 proc _wait_for_single_job args {
 	global scontrol

 	set fatal         false
 	set timeout       360
 	set poll_interval 1
 	while {[llength $args]} {
 		switch -glob -- [lindex $args 0] {
 			-fatal  -
 			-fail   {set fatal true; set args [lrange $args 1 end]}
 			-time*  {set args [lassign $args - timeout]}
 			-poll*  {set args [lassign $args - poll_interval]}
 			-*      {fail "Unknown option: [lindex $args 0]"}
 			default break
 		}
 	}
 	set argument_count [llength $args]
 	if {$argument_count != 2} {
 		fail "Invalid number of arguments ($argument_count): $args"
 	} else {
 		lassign $args job_id desired_state
 	}

 	# First verify that desired_state is supported
 	switch $desired_state {
 		"CANCELLED" {}
 		"DONE" {}
 		"PENDING" {}
 		"RUNNING" {}
 		"SPECIAL_EXIT" {}
 		"SUSPENDED" {}
 		default {
 			set message "Invalid desired state: $desired_state"
 			if {$fatal} {
 				fail $message
 			}
 			log_warn $message
 			return $::RETURN_ERROR
 		}
 	}

 	if {$job_id == 0} {
 		set message "Invalid job ID ($job_id)"
 		if {$fatal} {
 			fail $message
 		}
 		log_warn $message
 		return $::RETURN_ERROR
 	}

 	set my_delay    0
 	while 1 {
 		set fd [open "|$scontrol -o show job $job_id"]
 		gets $fd line
 		catch {close $fd}
 		if {[regexp {JobState\s*=\s*(\w+)} $line foo state] != 1} {
 			set state "NOT_FOUND"
 		}

 		switch $state {
 			"CANCELLED" {
 				if {$desired_state eq "CANCELLED"} {
 					log_debug "Job $job_id is CANCELLED"
 					return $::RETURN_SUCCESS
 				}
 				if {$desired_state eq "DONE"} {
 					log_debug "Job $job_id is DONE ($state)"
 					return $::RETURN_SUCCESS
 				}
 				set message "Job ($job_id) is $state, but we wanted $desired_state"
 				if {$fatal} {
 					fail $message
 				}
 				log_debug $message
 				return $::RETURN_ERROR
 			}
 			"NOT_FOUND" -
 			"BOOT_FAIL" -
 			"COMPLETED" -
 			"DEADLINE" -
 			"FAILED" -
 			"NODE_FAIL" -
 			"OUT_OF_MEMORY" -
 			"PREEMPTED" -
 			"TIMEOUT" {
 				if {$desired_state eq "DONE"} {
 					log_debug "Job $job_id is DONE ($state)"
 					return $::RETURN_SUCCESS
 				}
 				set message "Job ($job_id) is $state, but we wanted $desired_state"
 				if {$fatal} {
 					fail $message
 				}
 				log_debug $message
 				return $::RETURN_ERROR
 			}
 			"PENDING" {
 				if {$desired_state eq "PENDING"} {
 					log_debug "Job $job_id is PENDING"
 					return $::RETURN_SUCCESS
 				}
 				log_debug "Job $job_id is in state $state, desire $desired_state"
 			}
 			"RUNNING" {
 				if {$desired_state eq "RUNNING"} {
 					log_debug "Job $job_id is RUNNING"
 					return $::RETURN_SUCCESS
 				}
 				log_debug "Job $job_id is in state $state, desire $desired_state"
 			}
 			"SPECIAL_EXIT" {
 				if {$desired_state eq "SPECIAL_EXIT"} {
 					log_debug "Job $job_id is SPECIAL_EXIT"
 					return $::RETURN_SUCCESS
 				}
 				log_debug "Job $job_id is in state $state, desire $desired_state"
 			}
 			"SUSPENDED" {
 				if {$desired_state eq "SUSPENDED"} {
 					log_debug "Job $job_id is SUSPENDED"
 					return $::RETURN_SUCCESS
 				}
 				log_debug "Job $job_id is in state $state, desire $desired_state"
 			}
 			default {
 				log_debug "Job $job_id is in state $state, desire $desired_state"
 			}
 		}

 		if { $my_delay > $timeout } {
 			set message "Timeout waiting for job state $desired_state"
 			if {$fatal} {
 				fail $message
 			}
 			log_warn "Timeout waiting for job state $desired_state"
 			return $::RETURN_TIMEOUT
 		}

 		exec sleep $poll_interval
 		set my_delay [expr $my_delay + $poll_interval]
 	}
 }


 ################################################################
 #
 # NAME
 #	wait_for_job - waits for job to be in desired state
 #
 # SYNOPSIS
 #	wait_for_job ?options? job_id desired_state ?het_job?
 #
 # DESCRIPTION
 #	Wait for job to be in desired state. Can handle het job components.
 #
 # OPTIONS
 #	-fail
 #		If an error occurs or the job does not reach the desired state
 #		by the timeout, fail the test rather than returning an error
 #	-timeout <integer_number>
 #		time in seconds to wait for the job to be in the desired state
 #		before timing out (default is 90)
 #	-pollinterval <integer_number>
 #		time in seconds between each job state check (default is 1)
 #
 # ARGUMENTS
 #	job_id
 #		The Slurm job id of a job we want to wait for.
 #	desired_state
 #		The state you want the job to attain before returning.
 #		Currently supports:
 #			CANCELLED - job is cancelled
 #			DONE      - any terminated state (including cancelled)
 #			PENDING   - job is pending
 #			RUNNING   - job is running
 #			SPECIAL_EXIT
 #			SUSPENDED - job is suspended
 #	het_job
 #		If set, checks the state of each component job if the job
 #		is a het one.
 #
 # RETURN VALUE
 #	RETURN_SUCCESS if job reaches the desired state, or non-zero value
 #	otherwise.
 #
 # SEE ALSO
 #	_wait_for_single_job
 #
 ################################################################

 proc wait_for_job args {

 	set options  [list]
 	set het_job  0

 	while {[llength $args]} {
 		switch -glob -- [lindex $args 0] {
 			-fatal  -
 			-fail   {
 				lappend options [lindex $args 0]
 				set args [lrange $args 1 end]
 			}
 			-time*  -
 			-poll*  {
 				lappend options {*}[lrange $args 0 1]
 				set args [lrange $args 2 end]
 			}
 			default break
 		}
 	}
 	set argument_count [llength $args]
 	if {$argument_count < 2} {
 		fail "Too few arguments ($argument_count): $args"
 	} elseif {$argument_count > 3}  {
 		fail "Too many arguments ($argument_count): $args"
 	} else {
 		lassign $args job_id desired_state
 	}
 	if {$argument_count == 3} { set hetjob [lindex $args 2] }

 	if { $het_job } {
 		# get component job ids
 		set jid_list [get_het_job_ids $job_id 1]
 	}

 	set rc       0
 	set jid_list ""
 	if { $jid_list == "" } {
 		# non-het job
 		set jid_list $job_id
 	}

 	foreach jid $jid_list {
 		set rc [_wait_for_single_job {*}$options $jid $desired_state]
 		if { $rc } {
 			# bail out on first failure
 			break
 		}
 	}
 	return $rc
 }


 ################################################################
 #
 # NAME
 #	wait_for_job_acct - waits for the job accounting record to be updated
 #
 # SYNOPSIS
 #	wait_for_job_acct ?options? job_id ?field?
 #
 # DESCRIPTION
 #	Wait for evidence that a specific job stage has been recorded in the
 #	accounting database by verifying that a field has been populated.
 #
 # OPTIONS
 #	-fail
 #		If an error occurs or the job does has not populated the field
 #		by the timeout, fail the test rather than returning an error
 #	-timeout <integer_number>
 #		time in seconds to wait for the job's field to be populated
 #		before timing out (default is 90)
 #	-pollinterval <integer_number>
 #		time in seconds between each field check (default is 1)
 #
 # ARGUMENTS
 #	job_id
 #		The Slurm job id of a job we want to wait for.
 #	field
 #		The field that we are waiting to be populated, not empty and
 #		not "Unknown". (Default: End)
 #
 # RETURN VALUE
 #	RETURN_SUCCESS if the desired field has been populated for the job
 #	in the accounting database, or non-zero value otherwise.
 #
 ################################################################

 proc wait_for_job_acct args {
 	global sacct

 	set options [list]
 	set timeout 10
 	set pollinterval .2
 	set field End

 	while {[llength $args]} {
 		switch -glob -- [lindex $args 0] {
 			-fail   {
 				lappend options [lindex $args 0]
 				set args [lrange $args 1 end]
 			}
 			-time*  {set args [lassign $args - timeout]}
 			-poll*  {set args [lassign $args - pollinterval]}
 			default break
 		}
 	}
 	lappend options -timeout $timeout
 	lappend options -pollinterval $pollinterval

 	set argument_count [llength $args]
 	if {$argument_count < 1} {
 		fail "Too few arguments ($argument_count): $args"
 	} elseif {$argument_count > 2}  {
 		fail "Too many arguments ($argument_count): $args"
 	}
 	lassign $args job_id
 	if {$argument_count == 2} {set field [lindex $args 1]}

 	# Wait for sacct to populate the specified field
 	# We want to see the field be populated with anything but "Unknown"
 	# We can't use a negation match here because empty would match
 	return [wait_for_command_match {*}$options "$sacct -j $job_id --allocation --format $field --parsable2 --noheader" (?n)^(?!.*Unknown$).]
 }


 ################################################################
 #
 # NAME
 #	wait_for_account_done - cancels and waits on jobs in specified accounts
 #
 # SYNOPSIS
 #	wait_for_account_done ?options? accounts
 #
 # DESCRIPTION
 #	Cancel jobs on and wait for them to be finished in account(s) given.
 #
 # OPTIONS
 #	-timeout <integer_number>
 #		time in seconds to wait for the jobs to be finished before
 #		timing out (default is 360)
 #	-pollinterval <integer_number>
 #		time in seconds between each job state check (default is 1)
 #
 # ARGUMENTS
 #	accounts
 #		Comma-delimited list of accounts
 #
 # RETURN VALUE
 #	RETURN_SUCCESS if all jobs of the account are finished, or non-zero
 #	otherwise.
 #
 # NOTE
 #	We sleep for two seconds before replying that a job is
 #	done to give time for I/O completion (stdout/stderr files)
 #
 ################################################################

 proc wait_for_account_done args {
 	global scancel squeue re_word_str

 	set timeout       360
 	set poll_interval 1
 	while {[llength $args]} {
 		switch -glob -- [lindex $args 0] {
 			-time*  {set args [lassign $args - timeout]}
 			-poll*  {set args [lassign $args - poll_interval]}
 			-*      {fail "Unknown option: [lindex $args 0]"}
 			default break
 		}
 	}
 	set argument_count [llength $args]
 	if {$argument_count != 1} {
 		fail "Invalid number of arguments ($argument_count): $args"
 	} else {
 		lassign $args accounts
 	}

 	if { $accounts == "" } {
 		fail "An account must be specified"
 	}

 	log_user 0
 	set account_list [split $accounts ","]
 	foreach item $account_list {
 		spawn $scancel -A $item
 		expect {
 			timeout {
 				log_warn "No response from scancel"
 			}
 			eof {
 				wait
 			}
 		}
 	}

 	set my_delay    0
 	while 1 {
 		set found 0
 		spawn $squeue -o Account=%a -h -A$accounts
 		expect {
 			-re "Account=($re_word_str)" {
 				set found 1
 				exp_continue
 			}
 			eof {
 				wait
 			}
 		}

 		if { !$found } {
 			log_debug "Account(s) $accounts is/are empty"
 			break
 		}

 		if { $my_delay > $timeout } {
 			log_error "Timeout waiting for account(s) ($accounts) to be finished"
 			log_user 1
 			return $::RETURN_TIMEOUT
 		}

 		exec sleep $poll_interval
 		set my_delay [expr $my_delay + $poll_interval]
 	}
 	log_user 1
 	return $::RETURN_SUCCESS
 }


 ################################################################
 #
 # NAME
 #	wait_for_part_done - cancels and waits on jobs in specified partition
 #
 # SYNOPSIS
 #	wait_for_part_done ?options? partition
 #
 # DESCRIPTION
 #	Cancel jobs on and wait for them to be finished in partition given.
 #
 # OPTIONS
 #	-timeout <integer_number>
 #		time in seconds to wait for the jobs to be finished before
 #		timing out (default is 360)
 #	-pollinterval <integer_number>
 #		time in seconds between each job state check (default is 1)
 #
 # ARGUMENTS
 #	partition
 #		partition name
 #
 # RETURN VALUE
 #	RETURN_SUCCESS if all jobs of the partition are finished, or non-zero
 #	otherwise.
 #
 # NOTE
 #	We sleep for two seconds before replying that a job is
 #	done to give time for I/O completion (stdout/stderr files)
 #
 ################################################################

 proc wait_for_part_done args {
 	global scancel squeue re_word_str

 	set timeout       360
 	set poll_interval 1
 	while {[llength $args]} {
 		switch -glob -- [lindex $args 0] {
 			-time*  {set args [lassign $args - timeout]}
 			-poll*  {set args [lassign $args - poll_interval]}
 			-*      {fail "Unknown option: [lindex $args 0]"}
 			default break
 		}
 	}
 	set argument_count [llength $args]
 	if {$argument_count != 1} {
 		fail "Invalid number of arguments ($argument_count): $args"
 	} else {
 		lassign $args partition
 	}

 	if { $partition == "" } {
 		fail "A partition must be specified"
 	}

 	run_command -fail -nolog "$scancel -p $partition"

 	set my_delay    0
 	while 1 {
 		set found 0
 		spawn $squeue -o Part=%P -h -p$partition
 		expect {
 			-re "Part=($re_word_str)" {
 				set found 1
 				exp_continue
 			}
 			eof {
 				wait
 			}
 		}

 		if { !$found } {
 			log_debug "Partition $partition is empty"
 			break
 		}

 		if { $my_delay > $timeout } {
 			log_error "Timeout waiting for partition ($partition) to be finished"
 			return $::RETURN_TIMEOUT
 		}

 		exec sleep $poll_interval
 		set my_delay [expr $my_delay + $poll_interval]
 	}
 	return $::RETURN_SUCCESS
 }


 ################################################################
 #
 # NAME
 #	wait_for_step - waits for a job step to be found
 #
 # SYNOPSIS
 #	wait_for_step ?options? step_id
 #
 # DESCRIPTION
 #	Wait for a job step to be found.
 #
 # OPTIONS
 #	-timeout <integer_number>
 #		time in seconds to wait for the job step to be found before
 #		timing out (default is 30)
 #	-pollinterval <integer_number>
 #		time in seconds between each step existence check (default is 1)
 #
 # ARGUMENTS
 #	step_id
 #		job step id
 #
 # RETURN VALUE
 #	RETURN_SUCCESS if step_id is found, or non-zero otherwise.
 #
 ################################################################

 proc wait_for_step args {
 	global scontrol

 	set timeout       30
 	set poll_interval 1
 	while {[llength $args]} {
 		switch -glob -- [lindex $args 0] {
 			-time*  {set args [lassign $args - timeout]}
 			-poll*  {set args [lassign $args - poll_interval]}
 			-*      {fail "Unknown option: [lindex $args 0]"}
 			default break
 		}
 	}
 	set argument_count [llength $args]
 	if {$argument_count != 1} {
 		fail "Invalid number of arguments ($argument_count): $args"
 	} else {
 		lassign $args step_id
 	}

 	set my_delay 0
 	while 1 {
 		set fd [open "|$scontrol -o show step $step_id"]
 		gets $fd line
 		catch {close $fd}
 		if {[regexp {Nodes=} $line foo] == 1} {
 			return $::RETURN_SUCCESS
 		}
 		if {[regexp {MidplaneList=} $line foo] == 1} {
 			return $::RETURN_SUCCESS
 		}
 		if { $my_delay > $timeout } {
 			log_warn "Timeout waiting for job step"
 			return $::RETURN_TIMEOUT
 		}

 		log_debug "Step $step_id not done yet. Waiting for $poll_interval seconds"
 		exec sleep $poll_interval
 		set my_delay [expr $my_delay + $poll_interval]
 	}
 }


 ################################################################
 #
 # NAME
 #	wait_job_reason - waits for a desired job state and reason
 #
 # SYNOPSIS
 #	wait_job_reason ?options? job_id ?desired_state? ?desired_reason_list?
 #
 # DESCRIPTION
 #	Wait until the job is in desired state and reason is one
 #	of the desired ones or until the timeout.
 #
 # OPTIONS
 #	-timeout <integer_number>
 #		time in seconds to wait for the job state and reason before
 #		timing out (default is 360)
 #	-pollinterval <integer_number>
 #		time in seconds between each job state check (default is 1)
 #
 # ARGUMENTS
 #	job_id
 #		The job to wait for
 #	desired_state
 #		Desired state.
 #	desired_reason_list
 #		List of desired reasons. Empty list means that any reason
 #		is ok.
 #
 # RETURN VALUE
 #	RETURN_SUCCESS when job is in the desired state and reason is one
 #	of the desired ones, or non-zero otherwise.
 #
 ################################################################

 proc wait_job_reason args {
 	global scontrol re_word_str

 	set final_state "COMPLETED CANCELLED FAILED TIMEOUT DEADLINE
 	                 OUT_OF_MEMORY"

 	set timeout       360
 	set poll_interval 1
 	set desired_state "PENDING"
 	set desired_reason_list ""
 	while {[llength $args]} {
 		switch -glob -- [lindex $args 0] {
 			-time*  {set args [lassign $args - timeout]}
 			-poll*  {set args [lassign $args - poll_interval]}
 			-*      {fail "Unknown option: [lindex $args 0]"}
 			default break
 		}
 	}
 	set argument_count [llength $args]
 	if {$argument_count < 1} {
 		fail "Too few arguments ($argument_count): $args"
 	} else {
 		lassign $args job_id
 	}
 	if {$argument_count >= 2} { set desired_state [lindex $args 1] }
 	if {$argument_count == 3} { set desired_reason_list [lindex $args 2] }
 	if {$argument_count > 3} {
 		fail "Too many arguments ($argument_count): $args"
 	}

 	set log_user_prev [log_user -info]
 	log_user 0

 	set my_delay 0
 	set rc $::RETURN_ERROR
 	while true {
 		set pending 0
 		set has_reason 1
 		spawn $scontrol show job $job_id
 		expect {
 			-re "JobState=($re_word_str) Reason=(\\S+)" {
 				set job_state $expect_out(1,string)
 				set job_reason $expect_out(2,string)
 			}
 			timeout {
 				log_error "No response from scontrol show job"
 				set rc $::RETURN_TIMEOUT
 				break
 			}
 		}

 		# Check if both state and reason are the desired ones
 		if {$job_state == $desired_state} {
 			set found 0
 			set reason_msg ""
 			if {$desired_reason_list == ""} {
 				set found 1
 			}
 			foreach desired_reason $desired_reason_list {
 				if {$job_reason == $desired_reason } {
 					set reason_msg " with reason $job_reason"
 					set found 1
 				}
 			}
 			if {$found} {
 				log_debug "Job $job_id found $job_state$reason_msg"
 				set rc $::RETURN_SUCCESS
 				break
 			}
 		} elseif {[lsearch -exact final_state $job_state] >= 0} {
 			# Job is in final step no need to wait longer
 			log_error [format "Job in final state/reason '%s' / '%s' instead of the desired '%s' / '%s'." \
 			                  $job_state $job_reason \
 			                  $desired_state $desired_reason_list]
 			set rc $::RETURN_ERROR
 			break
 		}

 		# Check if this was the last poll
 		if {$my_delay > $timeout} {
 			log_error "Timeout"
 			set rc $::RETURN_TIMEOUT
 			break
 		}
 		set remamining_sec [expr $timeout - $my_delay]
 		log_debug [format "Job in state/reason '%s' / '%s' instead of the desired '%s' / '%s'." \
 		                  $job_state $job_reason \
 		                  $desired_state $desired_reason_list]
 		log_debug [format "Polling again in %ss, %ss to timeout." \
 		                  $poll_interval $remamining_sec]

 		sleep $poll_interval
 		set my_delay [expr $my_delay + $poll_interval]
 	}

 	log_user $log_user_prev
 	return $rc
 }


 ################################################################
 #
 # NAME
 #	get_config - returns a dictionary of slurm configuration parameters
 #
 # SYNOPSIS
 #	get_config ?options?
 #
 # OPTIONS
 #	-dbd
 #		uses `sacctmgr show config` to return slurmdbd configuration
 #		parameters
 #	-slurm
 #		uses `scontrol show config` to return slurm configuration
 #		parameters (this is the default)
 #
 # RETURN VALUE
 #	Returns a dictionary of parameter values
 #
 ################################################################

 proc get_config args {
 	global sacctmgr scontrol

 	set config_dict [list]
 	set command "$scontrol"
 	while {[llength $args]} {
 		switch -glob -- [lindex $args 0] {
 			-slurm  {set command "$scontrol"; set args [lrange $args 1 end]}
 			-dbd    {set command "$sacctmgr"; set args [lrange $args 1 end]}
 			-*      {fail "Unknown option: [lindex $args 0]"}
 			default break
 		}
 	}
 	if {[llength $args] > 0} {
 		fail "[lindex [info level 0] 0]: No arguments allowed: $args"
 	}

 	set output [run_command_output -fail -nolog "$command show config"]

 	foreach line [split $output "\n"] {
 		if {[regexp {^(\S+) += (.*)$} $line {} param_name param_value] == 1} {
 			dict set config_dict $param_name $param_value
 		}
 	}

 	return $config_dict
 }


 ################################################################
 #
 # NAME
 #	get_config_param - returns a slurm configuration parameter value
 #
 # SYNOPSIS
 #	get_config_param ?options? parameter_name
 #
 # OPTIONS
 #	-dbd
 #		uses `sacctmgr show config` to return the specified slurmdbd
 #		configuration parameter value
 #	-slurm
 #		uses `scontrol show config` to return the specified slurm
 #		configuration parameter value (this is the default)
 #
 # ARGUMENTS
 #	parameter_name
 #		the parameter to return the value for
 #
 # DESCRIPTION
 #	Returns a specific configuration parameter value.
 #
 # RETURN VALUE
 #	Returns the value of the specified parameter or MISSING if it does not
 #	exist.
 #
 ################################################################

 proc get_config_param args {

 	set options [list]
 	while {[llength $args]} {
 		switch -glob -- [lindex $args 0] {
 			-* {
 				lappend options [lindex $args 0]
 				set args [lrange $args 1 end]
 			}
 			default break
 		}
 	}
 	if {[llength $args] == 1} {
 		lassign $args parameter_name
 	} else {
 		fail "[lindex [info level 0] 0]: Invalid number of arguments ([llength $args]): $args"
 	}

 	set config_dict [get_config {*}$options]

 	if [dict exists $config_dict $parameter_name] {
 		return [dict get $config_dict $parameter_name]
 	} else {
 		return "MISSING"
 	}
 }


 ################################################################
 #
 # NAME
 #	param_contains - test whether a comma-separated-list contains a specified value
 #
 # SYNOPSIS
 #	param_contains haystack needle
 #
 # DESCRIPTION
 #	Searches for the specified value (needle) in the comma-separated-list
 #	string (haystack). Needle can be a glob-style pattern.
 #
 # RETURN VALUE
 #	Returns a boolean value indicating whether the value (needle) was found
 #	in the comma-separated-list string (haystack)
 #
 ################################################################

 proc param_contains { haystack needle } {
 	if {[lsearch [split $haystack ","] $needle] != -1} {
 		return true
 	} else {
 		return false
 	}
 }


 ################################################################
 #
 # NAME
 #	param_value - returns the value of a parameter in a comma-separated-list
 #
 # SYNOPSIS
 #	param_value params_list param ?default?
 #
 # DESCRIPTION
 #	Searches for the specified param in the comma-separated-list
 #	string (params_list) and returns its value.
 #
 # RETURN VALUE
 #	Returns the value found or the optional default value if not found.
 #	If the param is found without a value, returns true (ie like
 #	param_contains).
 #
 ################################################################

 proc param_value {params_list param {default false}} {
 	global re_word_str
 	foreach pair [split $params_list ","] {
 		if {[regexp "$param" $pair] == 1} {
 			if {[regexp "$param=($re_word_str)" $pair - value] == 1} {
 				return $value
 			} else {
 				return true
 			}
 		}
 	}
 	return $default
 }


 ################################################################
 #
 # NAME
 #	get_affinity_types - gets the task plugins running with task/ stripped
 #
 # SYNOPSIS
 #	get_affinity_types
 #
 # RETURN VALUE
 #	Returns comma separated list of task plugins running without the task/
 #
 ################################################################

 proc get_affinity_types { } {
 	global scontrol re_word_str

 	log_user 0
 	set affinity ""
 	spawn $scontrol show config
 	expect {
 		-re "TaskPlugin *= ($re_word_str)" {
 			set parts [split $expect_out(1,string) ",/"]
 			while 1 {
 				set task_found [lsearch $parts "task"]
 				if { $task_found == -1 } break
 				set parts [lreplace $parts $task_found $task_found]
 			}
 			set affinity [join $parts ","]
 			exp_continue
 		}
 		eof {
 			wait
 		}
 	}
 	log_user 1

 	return $affinity
 }


 ################################################################
 #
 # NAME
 #	get_mps_count_by_index - gets the count of a specific gres/mps device
 #
 # SYNOPSIS
 #	get_mps_count_by_index index hostname
 #
 # RETURN VALUE
 #	Returns the Count of a specific gres/mps device
 #
 ################################################################

 proc get_mps_count_by_index { index hostname } {
 	global slurmd number re_word_str

 	log_user 0
 	set count 0
 	spawn $slurmd -G -N $hostname
 	expect {
 		-re "Gres Name=mps Type=$re_word_str Count=($number) Index=$index" {
 			set count $expect_out(1,string)
 			exp_continue
 		}
 		eof {
 			wait
 		}
 	}
 	log_user 1

 	return $count
 }

 ################################################################
 #
 # NAME
 #	check_mpi - determines if mpicc and the desired --mpi option are available
 #
 # SYNOPSIS
 #	check_mpi desired_mpi
 #
 # DESCRIPTION
 #	Runs srun --mpi=list to determine if the desired mpi is available, as
 #	well as if mpicc set in globals.local is too.
 #
 # RETURN VALUE
 #	Returns true if the desired mpi and mpicc are available, false otherwise.
 #
 ################################################################

 proc check_mpi {desired_mpi} {
 	global srun eol mpicc
 	set output [run_command_output -fail "$srun --mpi=list"]

 	return [regexp "${desired_mpi}${eol}" $output] && [file executable $mpicc]
 }

 ################################################################
 #
 # NAME
 #	check_influxdb_access - determines if user can access to the desired influxdb
 #
 # SYNOPSIS
 #	check_influxdb_access host port database
 #
 # DESCRIPTION
 #	It uses the global $influx CLI command to try to connect and use
 #	the desired database.
 #
 # RETURN VALUE
 #	Returns true if we can connect to host:port and use the database,
 #	false otherwise
 #
 ################################################################

 proc check_influxdb_access {host port database} {
 	global influx
 	set connected 0
 	set access 0

 	if {![file executable $influx]} {
 		log_warn "Cannot execute influx command: $influx"
 		return false
 	}

 	set log_user_save [log_user -info]
 	log_user 0
 	spawn $influx -host $host -port $port
 	expect {
 		-re "Connected to" {
 			set connected 1
 			send "use $database\r"
 			exp_continue
 		}
 		-re "Using database" {
 			set access 1
 			send "quit\r"
 		}
 		-re "unable to parse authentication credentials" {
 			send "quit\r"
 		}
 		-re "authorization failed" {
 			send "quit\r"
 		}
 		timeout {
 			fail "InfluxDB instance not responding"
 		}
 		eof {
 			wait
 		}
 	}
 	log_user $log_user_save

 	if {!$connected} {
 		log_warn "Cannot connect to $host:$port"
 		return false
 	}
 	if {!$access} {
 		log_warn "Connected to $host:$port, but cannot use $database"
 		return false
 	}
 	return true
 }


 ################################################################
 #
 # NAME
 #	check_bb_emulate - determines if Cray burst buffers API is emulated
 #
 # SYNOPSIS
 #	check_bb_emulate
 #
 # RETURN VALUE
 #	Returns true if Cray burst buffers API is emulated, false otherwise
 #
 ################################################################

 proc check_bb_emulate { } {
 	global scontrol

 	log_user 0
 	set bb_emulate false
 	spawn $scontrol show burst
 	expect {
 		-re "EmulateCray" {
 			set bb_emulate true
 			exp_continue
 		}
 		eof {
 			wait
 		}
 	}
 	log_user 1
 	return $bb_emulate
 }


 ################################################################
 #
 # NAME
 #	check_bb_persistent - determines if persistent burst buffers can be created by users
 #
 # SYNOPSIS
 #	check_bb_persistent
 #
 # RETURN VALUE
 #	Returns true if Cray burst buffers can be created by users,
 #	false otherwise
 #
 ################################################################

 proc check_bb_persistent { } {
 	global scontrol

 	log_user 0
 	set bb_persistent false
 	spawn $scontrol show burst
 	expect {
 		-re "EnablePersistent" {
 			set bb_persistent true
 			exp_continue
 		}
 		eof {
 			wait
 		}
 	}
 	log_user 1
 	return $bb_persistent
 }


 ################################################################
 #
 # NAME
 #	get_default_acct - gets user's default account
 #
 # SYNOPSIS
 #	get_default_acct user
 #
 # RETURN VALUE
 #	Returns name of default account if exists, NULL otherwise
 #
 ################################################################

 proc get_default_acct { user } {
 	global sacctmgr re_word_str bin_id

 	log_user 0
 	set def_acct ""

 	if { !$user } {
 		set user [get_my_user_name]
 	}

 	spawn $sacctmgr -n list -P user $user format="DefaultAccount"
 	expect {
 		-re "($re_word_str)" {
 			set def_acct $expect_out(1,string)
 			exp_continue
 		}
 		eof {
 			wait
 		}
 	}
 	log_user 1

 	return $def_acct
 }


 ################################################################
 #
 # NAME
 #	get_cycle_count - get desired iteration count
 #
 # SYNOPSIS
 #	get_cycle_count
 #
 # DESCRIPTION
 #	For tests with iteration counts (e.g. test9.1, test9.2)
 #	return the desired iteration count
 #
 # RETURN VALUE
 #	Returns desired iteration count
 #
 ################################################################

 proc get_cycle_count { } {
 	global enable_memory_leak_debug

 	if {$enable_memory_leak_debug != 0} {
 		return 2
 	}
 	return 100
 }


 ################################################################
 #
 # NAME
 #	get_select_type_params - determines SelectTypeParameters being used for a given partition
 #
 # SYNOPSIS
 #	get_select_type_params ?partition?
 #
 # DESCRIPTION
 #	Determine SelectTypeParameters being used for a given partition.
 #	If the partition is not specified, the default partition will be used.
 #
 # RETURN VALUE
 #	Returns a string containing SelectTypeParameters
 #
 ################################################################

 proc get_select_type_params { {partition ""} } {
 	global scontrol bin_bash bin_grep re_word_str

 	log_user 0
 	set params ""

 	if {[string length $partition] == 0} {
 		set partition [default_partition]
 	}

 	if {$partition ne ""} {
 		spawn -noecho $bin_bash -c "exec $scontrol show part $partition | $bin_grep SelectTypeParameters"
 		expect {
 			-re "SelectTypeParameters *= *NONE" {
 				exp_continue
 			}
 			-re "SelectTypeParameters *= *($re_word_str)" {
 				set params $expect_out(1,string)
 				exp_continue
 			}
 			eof {
 				wait
 			}
 		}
 	}
 	if {$params eq ""} {
 		spawn -noecho $bin_bash -c "exec $scontrol show config | $bin_grep SelectTypeParameters"
 		expect {
 			-re "SelectTypeParameters *= *($re_word_str)" {
 				set params $expect_out(1,string)
 				exp_continue
 			}
 			eof {
 				wait
 			}
 		}
 	}
 	log_user 1

 	return $params
 }


 ################################################################
 #
 # NAME
 #	check_config_select - checks if effectively using the select type
 #
 # SYNOPSIS
 #	check_config_select type
 #
 # DESCRIPTION
 #	Determine if SelectType is equivalent to the passed type.
 #
 # ARGUMENTS
 #	type
 #		the desired SelectType to check (e.g. cons_tres)
 #
 # RETURN VALUE
 #	Returns true if configured, false otherwise
 #
 ################################################################

 proc check_config_select { type } {
 	set select_type [get_config_param "SelectType"]
 	set select_type_parameters [get_config_param "SelectTypeParameters"]

 	if {$select_type eq "select/$type"} {
 		return true
 	}

 	return false
 }


 ################################################################
 #
 # NAME
 #	get_total_cpus - gets the total available CPUs on the default partition
 #
 # SYNOPSIS
 #	get_total_cpus
 #
 # RETURN VALUE
 #	The total available CPUs on the default partition.
 #
 # NOTE
 #	CoreSpecCount are not part of the total.
 #
 ################################################################

 proc get_total_cpus {} {

 	set total_cpu_count 0

 	# Obtain the list of available nodes in the default partition
 	set node_list [get_nodes_by_state]

 	# Tally the cpus on these nodes
 	set nodes_dict [get_nodes]
 	foreach node_name $node_list {
 		set node_dict [dict get $nodes_dict $node_name]
 		set node_cpu_count [dict get $node_dict "CPUTot"]

 		# Subtract out any spec cores
 		if {[dict exists $node_dict "CoreSpecCount"] && [dict exists $node_dict "ThreadsPerCore"]} {
 			set spec_cpu_count [expr [dict get $node_dict "CPUTot"] * [dict get $node_dict "ThreadsPerCore"]]
 			incr node_cpu_count -$spec_cpu_count
 		}

 		incr total_cpu_count $node_cpu_count
 	}

 	return $total_cpu_count
 }


 ################################################################
 #
 # NAME
 #	is_running_in_container
 #
 # SYNOPSIS
 #	is_running_in_container
 #
 # DESCRIPTION
 #	Determine if test script is running inside of a container.
 #
 # RETURN VALUE
 #	true if container detected or systemd-detect-virt is not found,
 #	false otherwise
 #
 ################################################################

 proc is_running_in_container {} {
 	global bin_systemd_detect_virt

 	if {[run_command_status -nolog -none "$bin_systemd_detect_virt --version"]} {
 		log_warn "$bin_systemd_detect_virt not found, assuming container"
 		return true
 	}

 	set result [run_command -nolog -none "$bin_systemd_detect_virt -c"]
 	set output [string trimright [dict get $result output] "\r\n"]
 	if { $output != "none"} {
 		log_debug "Detected container type: $output"
 	}
 	if {[dict get $result exit_code]} {
 		return false
 	}
 	return true
 }

 ################################################################
 #
 # NAME
 #	is_super_user - determines if user is root or SlurmUser
 #
 # SYNOPSIS
 #	is_super_user ?user?
 #
 # DESCRIPTION
 #	Determine if user is a Slurm super user (i.e. user
 #	root or configured SlurmUser)
 #
 # RETURN VALUE
 #	true is user is root or SlurmUser, false otherwise
 #
 ################################################################

 proc is_super_user {{user ""}} {
 	global number

 	if {$user == ""} {
 		set user [get_my_user_name]
 	}

 	# Check if user is root
 	if {$user eq "root"} {
 		return true
 	}

 	# Check if user is SlurmUser
 	set slurm_user [get_config_param "SlurmUser"]
 	if {[regexp "${user}\\($number\\)" $slurm_user match]} {
 		return true
 	}

 	return false
 }


 ################################################################
 #
 # NAME
 #	dec2hex - creates a 32 bit hex number from a signed decimal number
 #
 # SYNOPSIS
 #	dec2hex value
 #
 # DESCRIPTION
 #	Create a 32 bit hex number from a signed decimal number
 #
 # RETURN VALUE
 #	32 bit hex version of input 'value'
 #
 # SOURCE
 #	Courtesy of Chris Cornish
 #	http://aspn.activestate.com/ASPN/Cookbook/Tcl/Recipe/415982
 #
 ################################################################
 # Replace all non-decimal characters

 proc dec2hex {value} {
 	regsub -all {[^0-x\.-]} $value {} newtemp
 	set value [string trim $newtemp]
 	if {$value < 2147483647 && $value > -2147483648} {
 		set tempvalue [format "%#010X" [expr $value]]
 		return [string range $tempvalue 2 9]
 	} elseif {$value < -2147483647} {
 		return "80000000"
 	} else {
 		return "7FFFFFFF"
 	}
 }


 ################################################################
 #
 # NAME
 #	uint2hex - creates a 32 bit hex number from an unsigned decimal
 #
 # SYNOPSIS
 #	uint2hex value
 #
 # DESCRIPTION
 #	Create a 32 bit hex number from an unsigned decimal number.
 #
 # ARGUMENTS
 #	value
 #		unsigneddecimal number to convert
 #
 # RETURN VALUE
 #	32 bit hex version of input 'value'
 #
 # SOURCE
 #	Courtesy of Chris Cornish
 #	http://aspn.activestate.com/ASPN/Cookbook/Tcl/Recipe/415982
 #
 ################################################################
 # Replace all non-decimal characters

 proc uint2hex {value} {
 	regsub -all {[^0-x\.-]} $value {} newtemp
 	set value [string trim $newtemp]
 	if {$value <= 4294967295 && $value >= 0} {
 		set tempvalue [format "%#010X" [expr $value]]
 		return [string range $tempvalue 2 9]
 	} else {
 		return "FFFFFFFF"
 	}
 }


 ################################################################
 #
 # NAME
 #	partition_oversubscribe - determines the oversubscribe configuration of the specified partition
 #
 # SYNOPSIS
 #	partition_oversubscribe ?partition?
 #
 # DESCRIPTION
 #	Determine the oversubscribe configuration of the specified partition.
 #	If the partition is not specified, the default partition will be used.
 #
 # RETURN VALUE
 #	Return the oversubscribe configuration of the specified partition.
 #
 ################################################################

 proc partition_oversubscribe { {partition ""} } {
 	global sinfo

 	if {[string length $partition] == 0} {
 		set partition [default_partition]
 	}

 	set oversubscribe "NO"
 	log_debug "$sinfo --noheader --partition $partition --format %h"
 	set fd [open "|$sinfo --noheader --partition $partition --format %h"]
 	gets $fd line
 	catch {close $fd}
 	regexp {[a-zA-Z]+} $line oversubscribe
 	return $oversubscribe
 }


 ################################################################
 #
 # NAME
 #	default_partition - determines the name of the default partition
 #
 # SYNOPSIS
 #	default_partition
 #
 # DESCRIPTION
 #	Use scontrol to determine the name of the default partition
 #
 # RETURN VALUE
 #	Name of the current default partition, or fail if not found.
 #
 ################################################################

 proc default_partition {} {
 	global scontrol

 	set name ""
 	set fd [open "|$scontrol --all --oneliner show partition"]
 	while {[gets $fd line] != -1} {
 		if {[regexp {^PartitionName=([^ ]*).*Default=YES} $line frag name]
 				== 1} {
 			break
 		}
 	}
 	catch {close $fd}

 	if {[string length $name] == 0} {
 		fail "Could not identify the default partition"
 	}

 	return $name
 }


 ################################################################
 #
 # NAME
 #	default_part_exclusive - determines if the default partition allocates whole nodes to jobs
 #
 # SYNOPSIS
 #	default_part_exclusive
 #
 # DESCRIPTION
 #	Use scontrol to determine if the default partition
 #	allocates whole nodes to jobs
 #
 # RETURN VALUE
 #	Name of the current default partition
 #
 ################################################################

 proc default_part_exclusive {} {
 	set def_part [default_partition]
 	set oversubscribe [partition_oversubscribe $def_part]
 	if {$oversubscribe eq "EXCLUSIVE"} {
 		return 1
 	} else {
 		return 0
 	}
 }


 ################################################################
 #
 # NAME
 #	make_bash_script - creates a bash script
 #
 # SYNOPSIS
 #	make_bash_script script_name script_contents
 #
 # DESCRIPTION
 #	Create a bash script of name "script_name", and
 #	make the body of the script "script_contents".
 #	make_bash_script removes the file if it already exists,
 #	then generates the #! line, and then dumps "script_contents"
 #	to the file.  Finally, it makes certain that the script
 #	is executable.
 #
 # ARGUMENTS
 #	script_name
 #		file name for the bash script
 #	script_contents
 #		body of the script, not including the initial #! line.
 #
 # RETURN VALUE
 #	Nothing.
 #
 ################################################################

 proc make_bash_script { script_name script_contents } {
 	global bin_bash bin_chmod

 	file delete $script_name
 	set fd [open $script_name "w"]
 	puts $fd "#!$bin_bash"
 	puts $fd $script_contents
 	close $fd
 	exec $bin_chmod 777 $script_name
 }


 ################################################################
 #
 # NAME
 #	check_acct_associations - checks associations
 #
 # SYNOPSIS
 #	check_acct_associations
 #
 # DESCRIPTION
 #	Use sacctmgr to check associations
 #
 # RETURN VALUE
 #	true if no error is found, false otherwise
 #
 ################################################################

 proc check_acct_associations { } {
 	global sacctmgr number re_word_str

 	# 2 versions after 23.11 we can remove the below function
 	# until then we should check older clusters lft/rgt
 	set rc [check_acct_associations_lft]

 	if { !$rc } {
 		return $rc;
 	}

 	log_user 0
 	log_debug "Sanity-Checking Associations"
 	#
 	# Use sacctmgr to check associations
 	#
 	spawn $sacctmgr -n -p list assoc wopi wopl withd format=lineage,cluster
 	expect {
 	       -re "($re_word_str)\\|($re_word_str)\\|" {
 		      # Here we are checking if we have duplicates and
 		      # setting up an array to check for holes later

 		      set lineage $expect_out(1,string)
 		      set cluster $expect_out(2,string)
 		      set first [info exists found($cluster,$lineage)]

 		      if { $first } {
 			     log_error "$cluster found lineage $lineage again"
 			     set rc false
 		      } else {
 			     set found($cluster,$lineage) 1
 		      }
 		      exp_continue
 	       }
 	       timeout {
 			fail "sacctmgr add not responding"
 	       }
 	       eof {
 		      wait
 	       }
 	}

 	log_user 1
 	return $rc
 }

 proc check_acct_associations_lft { } {
 	global sacctmgr number re_word_str

 	set rc true
 	log_user 0

 	log_debug "Sanity-Checking Associations"

 	set clusters ""

 	spawn $sacctmgr show cluster format=cluster,rpc -p
 	expect {
 		-re "($re_word_str)\\|($number)\\|" {
 			# 9984 == 23.02, the last version where lft/rgt matter
 			if { $expect_out(2,string) > 9984 } {
 				exp_continue
 			}
 			set clusters [ concat $clusters "," $expect_out(1,string) ]
 			exp_continue
 		}
 	}

 	#
 	# Use sacctmgr to check associations
 	#
 	spawn $sacctmgr -n -p list assoc wopi wopl withd format=lft,rgt,cluster clusters="$clusters"
 	expect {
 	       -re "($number)\\|($number)\\|($re_word_str)\\|" {
 		      # Here we are checking if we have duplicates and
 		      # setting up an array to check for holes later

 		      set cluster $expect_out(3,string)
 		      if { ![info exists c_min($cluster)] } {
 			      set c_min($cluster) -1
 			      set c_max($cluster) -1
 		      }

 		      set num1 $expect_out(1,string)
 		      set num2 $expect_out(2,string)
 		      set first [info exists found($cluster,$num1)]
 		      set sec [info exists found($cluster,$num2)]
 		      #log_debug "$first=$num1 $sec=$num2"
 		      if { $first } {
 			     log_error "$cluster found lft $num1 again"
 			     set rc false
 		      } elseif { $sec } {
 			     log_error "$cluster found rgt $num2 again"
 			     set rc false
 		      } else {
 			     set found($cluster,$num1) 1
 			     set found($cluster,$num2) 1
 			     if { $c_min($cluster) == -1
 				  || $c_min($cluster) > $num1 } {
 				    set c_min($cluster) $num1
 			     }
 			     if { $c_max($cluster) == -1
 				  || $c_max($cluster) < $num2 } {
 				    set c_max($cluster) $num2
 			     }
 		      }
 		      exp_continue
 	       }
 	       timeout {
 			fail "sacctmgr add not responding"
 	       }
 	       eof {
 		      wait
 	       }
 	}

 	foreach cluster [array names c_min] {
 		# Here we are checking for holes in the list from above
 		for {set inx $c_min($cluster)} {$inx < $c_max($cluster)} {incr inx} {
 			if { ![info exists found($cluster,$inx)] } {
 				log_error "$cluster No index at $inx"
 				set rc false
 			}
 		}
 	}
 	log_user 1
 	return $rc
 }

 ################################################################
 #
 # NAME
 #	get_job_acct_freq - gets the value of the job account gather frequency
 #
 # SYNOPSIS
 #	get_job_acct_freq
 #
 # RETURN VALUE
 #	job account gather frequency
 #
 ################################################################

 proc get_job_acct_freq { } {
 	global scontrol number

 	log_user 0
 	set freq_val 0

 	spawn $scontrol show config
 	expect {
 		-re "JobAcctGatherFrequency *= ($number)" {
 			set freq_val $expect_out(1,string)
 			if {$freq_val == 0} {
 				set freq_val 0
 			}
 		}
 		-re "JobAcctGatherFrequency *= task=($number)" {
 			set freq_val $expect_out(1,string)
 			if {$freq_val == 0} {
 				set freq_val 0
 			}
 		}
 		eof {
 			wait
 		}
 	}

 	log_user 1
 	return $freq_val
 }


 ################################################################
 #
 # NAME
 #	get_admin_level - gets the AdminLevel of the user
 #
 # SYNOPSIS
 #	get_admin_level ?user?
 #
 # RETURN VALUE
 #	AdminLevel for the current user
 #
 ################################################################

 proc get_admin_level {{user_name ""}} {
 	global sacctmgr re_word_str re_word_str bin_id

 	set admin_level ""

 	if {$user_name == ""} {
 		set user_name [get_my_user_name]
 		if { ![string length $user_name] } {
 			log_error "No name returned from id"
 			return ""
 		}
 	}

 	if {[is_super_user $user_name]} {
 		return "Administrator"
 	}

 	#
 	# Use sacctmgr to check admin_level
 	#
 	log_user 0
 	spawn $sacctmgr -n -P list user $user_name format=admin
 	expect {
 		-re "($re_word_str)" {
 		      set admin_level $expect_out(1,string)
 		      exp_continue
 	       }
 	       timeout {
 		      fail "sacctmgr add not responding"
 	       }
 	       eof {
 		      wait
 	       }
 	}
 	log_user 1

 	return $admin_level
 }


 #################################################
 #
 # NAME
 #	scale_to_megs - scales the value by the factor T|G|M to megabytes
 #
 # SYNOPSIS
 #	scale_to_megs value factor
 #
 # DESCRIPTION
 #	scale the value by the factor T|G|M to megabytes
 #
 # RETURN VALUE
 #	the scaled variable
 #
 #################################################

 proc scale_to_megs { value factor } {

 	if {$factor == "T"} {
 		set value [expr $value * 1024 * 1024]
 	} elseif {$factor == "G"} {
 		set value [expr $value * 1024]
 	} elseif {$factor == "M"} {
 		set value [expr $value * 1]
 	} elseif {$factor == "K"} {
 		set value [expr $value / 1024]
 		set value [expr {round($value)}]
 	} else {
 		set value [expr $value / (1024 * 1024)]
 		set value [expr {round($value)}]
 	}

 	return $value
 }


 #################################################
 #
 # NAME
 #	scale_to_ks - scales the value by the factor G|M|K to kilobytes
 #
 # SYNOPSIS
 #	scale_to_ks value factor
 #
 # DESCRIPTION
 #	scale the value by the factor G|M|K to kilobytes
 #
 # RETURN VALUE
 #	the scaled variable
 #
 #################################################

 proc scale_to_ks { value factor } {

 	if {$factor == "G"} {
 		set value [expr $value * 1024 * 1024]
 	} elseif {$factor == "M"} {
 		set value [expr $value * 1024]
 	} elseif {$factor == "K"} {
 		set value [expr $value * 1]
 	} else {
 		set value [expr $value / 1024]
 		set value [expr {round($value)}]
 	}

 	return $value
 }


 ############################################################
 #
 # NAME
 #	check_config_node_mem - checks that the nodes have memory configured
 #
 # SYNOPSIS
 #	check_config_node_mem
 #
 # RETURN VALUE
 #	true if all nodes have memory, false otherwise
 #
 ############################################################

 proc check_config_node_mem { } {

 	set nodes_dict [get_nodes]
 	dict for {node_name node_dict} $nodes_dict {
 		if [dict exists $node_dict "RealMemory"] {
 			if {[dict get $node_dict "RealMemory"] == 1} {
 				return false
 			}
 		} else {
 			log_warn "Parameter RealMemory not found on node $node_name"
 			return false
 		}
 	}

 	return true
 }


 ################################################################
 #
 # NAME
 #	wait_for_node - waits for nodes in a partition to reach a certain state
 #
 # SYNOPSIS
 #	wait_for_node ?options? state num_nodes ?partition?
 #
 # DESCRIPTION
 #	Wait for a certain number of nodes in a partition to reach a certain
 #	state.
 #
 # OPTIONS
 #	-timeout <integer_number>
 #		time in seconds to wait for the node state before
 #		timing out (default is 3)
 #	-pollinterval <integer_number>
 #		time in seconds between each node state check (default is 1)
 #
 # ARGUMENTS
 #	state
 #		The node state to wait for
 #	num_nodes
 #		The number of nodes we want to be in the specified state
 #	partition
 #		Partition name (the default partition is used if not specified)
 #
 # RETURN VALUE
 #	RETURN_SUCCESS, or non-zero on failure
 #
 ################################################################

 proc wait_for_node args {
 	global sinfo number

 	set partition     ""
 	set timeout       3
 	set poll_interval 1
 	set desired_state "PENDING"
 	set desired_reason_list ""
 	while {[llength $args]} {
 		switch -glob -- [lindex $args 0] {
 			-time*  {set args [lassign $args - timeout]}
 			-poll*  {set args [lassign $args - poll_interval]}
 			-*      {fail "Unknown option: [lindex $args 0]"}
 			default break
 		}
 	}
 	set argument_count [llength $args]
 	if {$argument_count < 2} {
 		fail "Too few arguments ($argument_count): $args"
 	} else {
 		lassign $args state num_nodes
 	}
 	if {$argument_count == 3} { set partition [lindex $args 2] }
 	if {$argument_count > 3} {
 		fail "Too many arguments ($argument_count): $args"
 	}

 	set wait_time 0
 	set done      0
 	set cnt       0
 	set rt        $::RETURN_SUCCESS

 	if {[string length $partition] == 0} {
 		set partition [default_partition]
 	}

 	while {$done != 1 && $wait_time < $timeout} {
 		set output [run_command_output -fail "$sinfo --noheader --partition $partition --state $state --format %D"]
 		regexp "$number" $output cnt

 		if {$num_nodes <= $cnt} {
 			set done 1
 		} else {
 			log_debug "Partition $partition has $cnt nodes idle and we want $num_nodes"
 			sleep $poll_interval
 			incr wait_time 1
 		}
 	}
 	if {$done != 1} {
 		set rt $::RETURN_ERROR
 	}

 	return $rt
 }


 #####################################################################
 #
 # NAME
 #	node_list_to_range - converts a TCL list into a Slurm hostlist using scontrol
 #
 # SYNOPSIS
 #	node_list_to_range nodes_list
 #
 # ARGUMENTS
 #
 #	nodes_list
 #		a TCL list of node names
 #
 # RETURN VALUE
 #	the hostlist form returned by scrontrol show hostlist
 #
 #####################################################################

 proc node_list_to_range {nodes_list} {
 	global scontrol

 	set comalist [join $nodes_list ,]
 	set hostlist [run_command_output -nolog -fail "$scontrol show hostlist $comalist"]
 	set hostlist [string trimright $hostlist "\r\n"]
 }


 ################################################################
 #
 # NAME
 #	list_to_range - converts a list of integer numbers to a range expression
 #
 # SYNOPSIS
 #	list_to_range ?numeric_list?
 #
 # DESCRIPTION
 #	Collapse a numeric list to a range expression defined by the following
 #	EBNF:
 #		<expression> ::= <range> {, <range>}*
 #		<range> ::= <integer> - <integer> | <integer>
 #
 # RETURN VALUE
 #	a range expression representing the numeric elements on the given list.
 #
 ################################################################

 proc list_to_range {numeric_list} {
 	set node_range_expression [node_list_to_range $numeric_list]
 	set range_expression [regsub {^\[(.*)\]$} $node_range_expression {\1}]
 	return $range_expression
 }


 #####################################################################
 #
 # NAME
 #	node_range_to_list - converts a node range expression into a list of nodes using scontrol
 #
 # SYNOPSIS
 #	node_range_to_list node_range_expression
 #
 # ARGUMENTS
 #
 #	node_range_expression
 #		a node range expression accepted by scontrol
 #
 # RETURN VALUE
 #	a list contining all node names expanded from the node range expression
 #
 #####################################################################

 proc node_range_to_list {node_range_expression} {
 	global scontrol

 	set node_list [list]
 	set output [run_command_output -nolog -fail "$scontrol show hostnames $node_range_expression"]
 	foreach line [split $output "\n"] {
 		if {$line eq ""} {
 			break
 		}
 		lappend node_list $line
 	}
 	return $node_list
 }


 ################################################################
 #
 # NAME
 #	range_to_list - converts a range expression into a list with the numbers of the range
 #
 # SYNOPSIS
 #	range_to_list ?range_expression?
 #
 # DESCRIPTION
 #	Expands a range expression defined by the following EBNF into a numeric list
 #		<expression> ::= <range> {, <range>}*
 #		<range> ::= <integer> - <integer> | <integer>
 #
 # RETURN VALUE
 #	Returns the list of integer numbers defined by the range_expression
 #
 ################################################################

 proc range_to_list {range_expression} {
 	set node_range_expression \[$range_expression\]
 	set range_list [node_range_to_list $node_range_expression]
 	return $range_list
 }


 #####################################################################
 #
 # NAME
 #	get_nodes_by_state - gets the list of node names in a given partition/states
 #
 # SYNOPSIS
 #	get_nodes_by_state partition states
 #
 # DESCRIPTION
 #	sinfo is used to list node names and states in the specified partition.
 #	This list of nodes is filtered to return only the nodes matching one of
 #	the requested states.
 #
 # ARGUMENTS
 #	partition
 #		partition to get nodes off
 #	states
 #		comma-separated list of allowed states
 #
 # RETURN VALUE
 #	list of nodes having the required state
 #
 #####################################################################

 proc get_nodes_by_state {{states ""} {partition ""}} {
 	global sinfo

 	set node_list [list]

 	if {$partition eq ""} {
 		set partition [default_partition]
 	}

 	if {$states eq ""} {
 		set states "idle"
 	}

 	set output [run_command_output -fail "$sinfo -h -N -p $partition -o '%N' -e -t $states"]
 	foreach line [split $output "\n"] {
 		if {$line eq ""} { continue }
 		lappend node_list $line
 	}

 	return $node_list
 }


 #####################################################################
 #
 # NAME
 #	set_partition_maximum_time_limit - sets the maximum time limit in a given partition
 #
 # SYNOPSIS
 #	set_partition_maximum_time_limit partition limit
 #
 # RETURN VALUE
 #	RETURN_SUCCESS, or non-zero on error
 #
 #####################################################################

 proc set_partition_maximum_time_limit {partition limit} {
 	global scontrol

 	if {[string length $partition] == 0} {
 		set partition [default_partition]
 		if { $partition == "" } {
 			return $::RETURN_ERROR
 		}
 	}

 	if { $limit < -1 } {
 		fail "Trying to set invalid partition time limit of $limit"
 	}
 	if { $limit == -1 } {
 		set expected_lim "UNLIMITED"
 	} else {
 		set expected_lim limit
 	}

 	run_command -fail "$scontrol update partitionname=$partition MaxTime=-1"

 	set maxtime [get_partition_maximum_time_limit $partition]
 	if { $maxtime != $limit } {
 		log_error "Unable to update partition MaxTime, got $maxtime, wanted $limit"
 		return $::RETURN_ERROR
 	}

 	return $::RETURN_SUCCESS
 }


 #####################################################################
 #
 # NAME
 #	get_partition_maximum_time_limit - gets the maximum time limit in a given partition
 #
 # SYNOPSIS
 #	get_partition_maximum_time_limit partition
 #
 # DESCRIPTION
 #	Get the maximum time limit in a given partition
 #
 # RETURN VALUE
 #	time limit in seconds, -1 if undefined or error
 #
 #####################################################################

 proc get_partition_maximum_time_limit {partition} {
 	global sinfo number

 	if {[string length $partition] == 0} {
 		set partition [default_partition]
 	}

 	set secs 0
 	log_user 0
 	spawn -noecho $sinfo -h -p $partition -O time -e
 	expect {
 		-re "infinite" {
 			set secs -1
 			exp_continue
 		}
 		-re "n/a" {
 			set secs -1
 			exp_continue
 		}
 		-re "($number)-($number):($number):($number)" {
 			set days  [expr $expect_out(1,string) * 24 * 60 * 60]
 			set hours [expr $expect_out(2,string) * 60 * 60]
 			set mins  [expr $expect_out(3,string) * 60]
 			set secs  [expr $days + $hours + $mins + $expect_out(4,string)]
 			exp_continue
 		}
 		-re "($number):($number):($number)" {
 			set hours [expr $expect_out(1,string) * 60 * 60]
 			set mins  [expr $expect_out(2,string) * 60]
 			set secs  [expr $hours + $mins + $expect_out(3,string)]
 			exp_continue
 		}
 		-re "($number):($number)" {
 			set mins  [expr $expect_out(1,string) * 60]
 			set secs  [expr $mins + $expect_out(2,string)]
 			exp_continue
 		}
 		-re "($number)" {
 			set secs  [expr $expect_out(1,string) * 60]
 			exp_continue
 		}
 		timeout {
 			fail "sinfo not responding"
 		}
 		eof {
 			wait
 		}
 	}

 	log_user 1
 	return $secs
 }


 ################################################################
 #
 # NAME
 #	get_partition_default_time_limit - gets the default time limit in a given partition
 #
 # SYNOPSIS
 #	get_partition_default_time_limit ?partition?
 #
 # DESCRIPTION
 #	Get the default time limit in a given partition.
 #	If the partition is not specified, the default partition will be used.
 #
 # RETURN VALUE
 #	Returns: time limit in seconds, -1 if undefined or error.
 #
 ################################################################

 proc get_partition_default_time_limit { {partition ""} } {
 	global sinfo number

 	if {[string length $partition] == 0} {
 		set partition [default_partition]
 	}

 	set secs 0
 	log_user 0
 	spawn -noecho $sinfo -h -p $partition -O defaulttime -e
 	expect {
 		-re "infinite" {
 			set secs -1
 			exp_continue
 		}
 		-re "n/a" {
 			set secs -1
 			exp_continue
 		}
 		-re "($number)-($number):($number):($number)" {
 			set days  [expr $expect_out(1,string) * 24 * 60 * 60]
 			set hours [expr $expect_out(2,string) * 60 * 60]
 			set mins  [expr $expect_out(3,string) * 60]
 			set secs  [expr $days + $hours + $mins + $expect_out(4,string)]
 			exp_continue
 		}
 		-re "($number):($number):($number)" {
 			set hours [expr $expect_out(1,string) * 60 * 60]
 			set mins  [expr $expect_out(2,string) * 60]
 			set secs  [expr $hours + $mins + $expect_out(3,string)]
 			exp_continue
 		}
 		-re "($number):($number)" {
 			set mins  [expr $expect_out(1,string) * 60]
 			set secs  [expr $mins + $expect_out(2,string)]
 			exp_continue
 		}
 		-re "($number)" {
 			set secs  [expr $expect_out(1,string) * 60]
 			exp_continue
 		}
 		timeout {
 			fail "sinfo not responding"
 		}
 		eof {
 			wait
 		}
 	}

 	log_user 1
 	return $secs
 }


 #####################################################################
 #
 # NAME
 #	get_node_cores - given a node, returns its total number of cores
 #
 # SYNOPSIS
 #	get_node_cores node
 #
 # DESCRIPTION
 #	Given a node, return its total number of cores
 #	(not the CoresPerSocket, but the total cores)
 #
 # RETURN VALUE
 #	node cores if retrieved, -1 otherwise
 #
 #####################################################################

 proc get_node_cores {node} {
 	global sinfo number

 	set cores -1
 	set sockets_per_node 0
 	set cores_per_socket 0

 	if {[string length $node] == 0} {
 		return $cores
 	}

 	log_user 0
 	spawn -noecho $sinfo -o "%X %Y" -h -n $node
 	expect {
 		-re "($number)" {
 			if {$sockets_per_node == 0} {
 				set sockets_per_node $expect_out(1,string)
 			} else {
 				set cores_per_socket $expect_out(1,string)
 			}
 			exp_continue
 		}
 		timeout {
 			fail "sinfo not responding"
 		}
 		eof {
 			wait
 		}
 	}
 	log_user 1

 	set cores [expr $sockets_per_node * $cores_per_socket]

 	return $cores
 }


 #####################################################################
 #
 # NAME
 #	get_node_cpus - given a node, returns its total number of threads we account for
 #
 # SYNOPSIS
 #	get_node_cpus node
 #
 # DESCRIPTION
 #	Given a node, return its total number of threads we account for.
 #	(not always ThreadsPerCore, but how many threads are in use.
 #	i.e. CPUs=6 CoresPerSocket=6 ThreadsPerCore=2 Socket=1 would
 #	result in only 1 thread we care about instead of the 2 listed.)
 #
 # RETURN VALUE
 #	list of node [ tot_cpus threads ] if retrieved, [ -1 -1 ] otherwise
 #
 #####################################################################

 proc get_node_cpus {node} {
 	global scontrol number

 	set nthreads -1
 	set nsockets 0
 	set ncores 0
 	set totcpus -1

 	if {[string length $node] == 0} {
 		return [list $totcpus $nthreads]
 	}

 	# Get the number of CPUs on a node
 	spawn $scontrol show node $node
 	expect {
 		-re "CoresPerSocket=($number)" {
 			set ncores $expect_out(1,string)
 			exp_continue
 		}
 		-re "CPUTot=($number)" {
 			set totcpus $expect_out(1,string)
 			exp_continue
 		}
 		-re "Sockets=($number)" {
 			set nsockets $expect_out(1,string)
 			exp_continue
 		}
 		-re "ThreadsPerCore=($number)" {
 			set nthreads $expect_out(1,string)
 			exp_continue
 		}
 		timeout {
 			fail "scontrol is not responding"
 		}
 		eof {
 			wait
 		}
 	}

 	set core_cnt [expr $nsockets * $ncores]
 	set thread_cnt [expr $ncores * $nthreads]
 	if {$totcpus != $nthreads && $totcpus == $ncores} {
 		log_debug "Cores rather than threads are being allocated"
 		set nthreads 1
 	}

 	return [list $totcpus $nthreads]
 }


 #####################################################################
 #
 # NAME
 #	get_part_total_cores - given a partition and/or states, return its total cores
 #
 # SYNOPSIS
 #	get_part_total_cores partition states
 #
 # DESCRIPTION
 #	Given a partition and/or states, return its total cores
 #
 # ARGUMENTS
 #	partition
 #		partition to check cores
 #	states
 #		states to filter on partition cores
 #
 # RETURN VALUE
 #	partition cores
 #
 #####################################################################

 proc get_part_total_cores {part states} {
 	global sinfo number

 	set cores 0
 	set tmp 0
 	set i 0

 	if {[string length $part] == 0} {
 		set part [default_partition]
 	}

 	log_user 0
 	if {[string length $states] == 0} {
 		spawn -noecho $sinfo -h -N -p $part -o "%X %Y"
 	} else {
 		spawn -noecho $sinfo -h -N -p $part -t $states -o "%X %Y"
 	}
 	expect {
 		-re "($number)" {
 			set is_even [expr {($i % 2) == 0}]
 			if {$is_even == 1} {
 				set tmp $expect_out(1,string)
 			} else {
 				set tmp [expr $tmp * $expect_out(1,string)]
 				set cores [expr $cores + $tmp]
 			}
 			incr i
 			exp_continue
 		}
 		timeout {
 			fail "sinfo not responding"
 		}
 		eof {
 			wait
 		}
 	}
 	log_user 1

 	return $cores
 }


 #####################################################################
 #
 # NAME
 #	check_hosts_contiguous - verify if all hosts belong to the partition and are contiguous
 #
 # SYNOPSIS
 #	check_hosts_contiguous check_hosts_list partition
 #
 # DESCRIPTION
 #	Given a partition and a list of hosts, verify if all
 #	hosts belong to the partition and are contiguous.
 #	If the partition argument is empty, the default partition
 #	will be used.
 #
 # RETURN VALUE
 #	Returns: true if hosts are contiguous, false otherwise.
 #
 #####################################################################

 proc check_hosts_contiguous { check_hosts_list {partition ""} } {
 	global sinfo re_word_str

 	if {[string length $partition] == 0} {
 		set partition [default_partition]
 	}

 	set part_hosts_list {}

 	log_user 0
 	spawn $sinfo --noheader -p $partition -N -o %N
 	expect {
 		-re "($re_word_str)" {
 			lappend part_hosts_list $expect_out(1,string)
 			exp_continue
 		}
 		-re "Unable to contact" {
 			fail "Slurm appears to be down"
 		}
 		timeout {
 			fail "sinfo not responding"
 		}
 		eof {
 			wait
 		}
 	}
 	log_user 1

 	foreach host $check_hosts_list {
 		set idx_cur [lsearch $part_hosts_list $host]
 		if {$idx_cur == -1} {
 			fail "Host ($host) not found in list of hosts from partition $partition"
 		}
 		if {[info exists idx_old]} {
 			if {$idx_cur != [expr $idx_old + 1]} {
 				log_error "Node sequence number not contiguous"
 				return false
 			}
 		}
 		set idx_old $idx_cur
 	}

 	return true
 }


 ################################################################
 #
 # NAME
 #	get_het_job_ids - gets list of component job ids for a het job
 #
 # SYNOPSIS
 #	get_het_job_ids job_id ?use_offset?
 #
 # DESCRIPTION
 #	Gets list of component job ids for a het job.
 #
 # ARGUMENTS
 #	job_id
 #		Slurm job id
 #	use_offset
 #		If zero, returns list of integer job ids, else returns ids in
 #		the form of X+Y where X is het job master id and Y is the
 #		offset.
 #
 # RETURN VALUE
 #	A list of ids for a hetjob or an empty list if jobid
 #	is not a het one.
 #
 ################################################################

 proc get_het_job_ids { jobid {use_offset 0}} {
 	global scontrol number

 	set id_list ""
 	set log_user_save [log_user -info]
 	log_user 0
 	spawn $scontrol show job $jobid
 	expect {
 		-re "JobId=($number) HetJobId=($number) HetJobOffset=($number)" {
 			if { $use_offset } {
 				lappend id_list "$expect_out(2,string)+$expect_out(3,string)"
 			} else {
 				lappend id_list $expect_out(1,string)
 			}
 			exp_continue
 		}
 		timeout {
 			fail "scontrol not responding"
 		}
 		eof {
 			wait
 		}
 	}
 	log_user $log_user_save
 	return $id_list
 }


 ################################################################
 #
 # NAME
 #	reconfigure - calls scontrol reconfigure
 #
 # SYNOPSIS
 #	reconfigure ?options? ?cluster?
 #
 # DESCRIPTION
 #	Calls scontrol reconfigure. This routine takes the same options as
 #	run_command, passing them to the underlying run_command invocation.
 #	This command waits an additional 5 seconds before returning.
 #
 # OPTIONS
 #	See OPTIONS of run_command proc.
 #
 # ARGUMENTS
 #	cluster
 #		The cluster to reconfigure
 #
 # RETURN VALUE
 #	RETURN_SUCCESS on success, otherwise RETURN_ERROR
 #
 ################################################################

 proc reconfigure args {
 	global scontrol

 	set options [list]
 	set cluster ""

 	while {[llength $args]} {
 		switch -glob -- [lindex $args 0] {
 			-* {
 				lappend options {*}[lrange $args 0 1]
 				set args [lrange $args 2 end]
 			}
 			default break
 		}
 	}
 	set argument_count [llength $args]
 	if {$argument_count > 1}  {
 		fail "Too many arguments ($argument_count): $args"
 	} elseif {$argument_count == 1} {
 		lassign $args cluster
 	}

 	set command $scontrol
 	if {$cluster ne ""} {
 		append command " -M$cluster"
 	}
 	append command " reconfigure"
 	set rc [run_command_status {*}$options "$command"]

 	#
 	# Wait 5 seconds for reconfigure to complete, then return.
 	#
 	sleep 5
 	return $rc
 }


 #####################################################################
 #
 # NAME
 #	log_fatal - prints a fatal message
 #
 # SYNOPSIS
 #	log_fatal message
 #
 # SEE ALSO
 #	_log_format for options governing the message format and colorization
 #
 #####################################################################

 proc log_fatal {message} {
 	global testsuite_log_level LOG_LEVEL_FATAL

 	if {$testsuite_log_level >= $LOG_LEVEL_FATAL} {
 		_log_format "fatal" "$message"
 	}
 }


 #####################################################################
 #
 # NAME
 #	log_error - prints an error message
 #
 # SYNOPSIS
 #	log_error message
 #
 # SEE ALSO
 #	_log_format for options governing the message format and colorization
 #
 #####################################################################

 proc log_error {message} {
 	global testsuite_log_level LOG_LEVEL_ERROR

 	if {$testsuite_log_level >= $LOG_LEVEL_ERROR} {
 		_log_format "error" "$message"
 	}
 }


 #####################################################################
 #
 # NAME
 #	log_warn - prints a warning message
 #
 # SYNOPSIS
 #	log_warn message
 #
 # SEE ALSO
 #	_log_format for options governing the message format and colorization
 #
 #####################################################################

 proc log_warn {message} {
 	global testsuite_log_level LOG_LEVEL_WARNING

 	if {$testsuite_log_level >= $LOG_LEVEL_WARNING} {
 		_log_format "warning" "$message"
 	}
 }


 #####################################################################
 #
 # NAME
 #	log_info - prints an information message
 #
 # SYNOPSIS
 #	log_info message
 #
 # SEE ALSO
 #	_log_format for options governing the message format and colorization
 #
 #####################################################################

 proc log_info {message} {
 	global testsuite_log_level LOG_LEVEL_INFO

 	if {$testsuite_log_level >= $LOG_LEVEL_INFO} {
 		_log_format "info" "$message"
 	}
 }


 #####################################################################
 #
 # NAME
 #	log_pass - prints a pass level message
 #
 # SYNOPSIS
 #	log_pass message
 #
 # SEE ALSO
 #	_log_format for options governing the message format and colorization
 #
 #####################################################################

 proc log_pass {message} {
 	global testsuite_log_level LOG_LEVEL_PASS

 	if {$testsuite_log_level >= $LOG_LEVEL_PASS} {
 		_log_format "pass" "$message"
 	}
 }


 #####################################################################
 #
 # NAME
 #	log_command - prints a command level message
 #
 # SYNOPSIS
 #	log_command message
 #
 # SEE ALSO
 #	_log_format for options governing the message format and colorization
 #
 #####################################################################

 proc log_command {message} {
 	global testsuite_log_level LOG_LEVEL_COMMAND

 	if {$testsuite_log_level >= $LOG_LEVEL_COMMAND} {
 		_log_format "command" "$message"
 	}
 }


 #####################################################################
 #
 # NAME
 #	log_debug - prints a debug level message
 #
 # SYNOPSIS
 #	log_debug message
 #
 # SEE ALSO
 #	_log_format for options governing the message format and colorization
 #
 #####################################################################

 proc log_debug {message} {
 	global testsuite_log_level LOG_LEVEL_DEBUG

 	if {$testsuite_log_level >= $LOG_LEVEL_DEBUG} {
 		_log_format "debug" "$message"
 	}
 }


 #####################################################################
 #
 # NAME
 #	log_trace - prints a trace level message
 #
 # SYNOPSIS
 #	log_trace message
 #
 # SEE ALSO
 #	_log_format for options governing the message format and colorization
 #
 #####################################################################

 proc log_trace {message} {
 	global testsuite_log_level LOG_LEVEL_TRACE

 	if {$testsuite_log_level >= $LOG_LEVEL_TRACE} {
 		_log_format "trace" "$message"
 	}
 }


 ################################################################
 #
 # NAME
 #	in_fed - checks whether this cluster is in a federation
 #
 # SYNOPSIS
 #	in_fed
 #
 # RETURN VALUE
 #	Returns true if this cluster is in a federation, false otherwise
 #
 ################################################################

 proc in_fed {} {
 	global scontrol

 	set output [run_command_output -fail -nolog "$scontrol show fed"]
 	if {[regexp "Federation" $output]} {
 		return true
 	}

 	return false
 }


 ################################################################
 #
 # NAME
 #	check_job_state - checks if the state of a job is the expected one
 #
 # SYNOPSIS
 #	check_job_state job state ?het_job?
 #
 # DESCRIPTION
 #	Checks if the state of a job is the expected one.
 #
 # ARGUMENTS
 #	job
 #		Job ID to check
 #	state
 #		Desired state of the job to match
 #	het_job
 #		If set, checks state of each component job if the
 #		job is a hetjob.
 #
 # RETURN VALUE
 #	true if job was on the desired state, or the number of job components
 #	on that state if it's a hetjob and het_job option enabled, false
 #	otherwise.
 #
 ################################################################

 proc check_job_state { job state {het_job 0}} {
 	global scontrol

 	set jid_list ""
 	if { $het_job } {
 		set jid_list [get_het_job_ids $job 1]
 	}

 	if { $jid_list == "" } {
 		# non-het job
 		set jid_list $job
 	}

 	foreach jid $jid_list {
 		set state_match 0
 		spawn $scontrol show job $jid
 		expect {
 			-re "JobState=($state)" {
 				incr state_match
 			}
 			timeout {
 				fail "scontrol not responding"
 			}
 			eof {
 				wait
 			}
 		}

 		if {$state_match != 1} {
 			log_error "job $jid should be in $state state, but is not"
 			return false
 		}
 	}

 	return true
 }


 ################################################################
 #
 # NAME
 #	get_gres_count - returns a dict of nodes and GRES counts
 #
 # SYNOPSIS
 #	get_gres_count gres_name ?node_list?
 #
 # DESCRIPTION
 #	Returns a dict of node names and the count of a specifed
 #	GRES aggregating all its types on each node.
 #
 # RETURN VALUE
 #	If the node_list is not specified node name is specified,
 #	this function will return a dict with the GRES count for all
 #	the nodes of the default partition.
 #	If specified, a dict only with the nodes of the node_list.
 #
 ################################################################

 proc get_gres_count { gres_name {node_list ""} } {
 	set nodes_dict [get_nodes $node_list]
 	set nodes_gres_dict [dict create]
 	dict for {node_name node_dict} $nodes_dict {
 		if [dict exists $node_dict "Gres"] {
 			set gres_param [dict get $node_dict "Gres"]
 			set gres_dict  [count_gres $gres_param]
 		}
 		if [dict exists $gres_dict $gres_name] {
 			set gres_count [dict get $gres_dict $gres_name]
 			dict set nodes_gres_dict $node_name $gres_count
 		}
 	}
 	return $nodes_gres_dict
 }


 ################################################################
 #
 # NAME
 #	count_gres - returns a dict of GRES names and their total counts
 #
 # SYNOPSIS
 #	count_gres gres_param
 #
 # DESCRIPTION
 #	Parses a GRES parameter string typically obtained from nodes or
 #	jobs info, and returns a dict of GRES names and their count
 #	aggregating all the types of each GRES.
 #
 # ARGUMENTS
 #	gres_param
 #		The usual coma-separated list of Gres (e.g gpu:2,craynetwork:1).
 #	only_consumable
 #		If true, the no_consume gres are ignored.
 #
 # RETURN VALUE
 #	A dict of GRES names and their count aggregating all types of
 #	each GRES.
 #
 ################################################################

 proc count_gres {gres_param {only_consumable false}} {
 	global gres_regex
 	set gres_dict [dict create]
 	foreach gres [split $gres_param ","] {
 		if {[regexp $gres_regex $gres {} name type nc count] == 1} {
 			if {$only_consumable && ($type eq "no_consume" || $nc eq "no_consume")} {
 				continue
 			} elseif {$nc eq ""} {
 				set count $type
 			} elseif {$count eq ""} {
 				set count $nc
 			}

 			if {[dict exists $gres_dict $name]} {
 				dict set gres_dict $name [expr [dict get $gres_dict $name] + $count]
 			} else {
 				dict set gres_dict $name $count
 			}
 		}
 	}
 	return $gres_dict
 }

 ################################################################
 #
 # NAME
 #	get_highest_gres_count - returns highest number of GRES per node on node_count nodes
 #
 # SYNOPSIS
 #	get_highest_gres_count node_count gres_name
 #
 # DESCRIPTION
 #	For a given number of nodes, returns the highest GRES count per
 #	node available on at least that number of nodes.
 #
 # EXAMPLE
 #	For example: node1 has 1 GPU, node2 has 2 GPUs and node3 has 3 GPUs
 #	[get_highest_gres_count 1 "gpu"] returns 3 (i.e. 1 node 3 GPUs)
 #	[get_highest_gres_count 2 "gpu"] returns 2 (i.e. 2 nodes have at least 2 GPUs each)
 #	[get_highest_gres_count 3 "gpu"] returns 1 (i.e. 3 nodes have at least 1 GPU each)
 #
 ################################################################

 proc get_highest_gres_count { node_count gres_name } {

 	set available_nodes [node_list_to_range [get_nodes_by_state]]
 	set gres_dict [get_gres_count $gres_name $available_nodes]
 	set gres_count [list]

 	dict for {node gres} $gres_dict {
 		lappend gres_count $gres
 	}

 	set count [lindex [lsort -decreasing -integer $gres_count] [expr $node_count - 1]]
 	return $count
 }


 ################################################################
 #
 # NAME
 #	_set_gpu_socket_inx - adds a socket index to the gpu_sock_list if not already on it
 #
 # SYNOPSIS
 #	_set_gpu_socket_inx sock_inx
 #
 # DESCRIPTION
 #	Add a socket index to the array gpu_sock_list if not already
 #	on the list. Subroutine used by get_gpu_socket_count
 #
 ################################################################

 proc _set_gpu_socket_inx { sock_inx } {
 	global gpu_sock_list

 	if {$sock_inx == -1} {
 		set gpu_sock_list [lreplace $gpu_sock_list 0 99]
 		return
 	}

 	set sock_cnt [llength $gpu_sock_list]
 	for {set i 0} {$i < $sock_cnt} {incr i} {
 		if {[lindex $gpu_sock_list $i] == $sock_inx} {
 			return
 		}
 	}
 	lappend gpu_sock_list $sock_inx
 }


 ################################################################
 # Subroutine used by get_gpu_socket_count
 # Add a socket index to the array gpu_sock_list if not already
 # on the list.
 ################################################################

 proc _set_gpu_socket_range { sock_first_inx sock_last_inx } {
 	global gpu_sock_list

 	set sock_cnt [llength $gpu_sock_list]
 	for {set s $sock_first_inx} {$s <= $sock_last_inx} {incr s} {
 		set found 0
 		for {set i 0} {$i < $sock_cnt} {incr i} {
 			if {[lindex $gpu_sock_list $i] == $s} {
 				set found 1
 				break
 			}
 		}
 		if {$found == 0} {
 			lappend gpu_sock_list $s
 		}
 	}
 }


 ################################################################
 #
 # NAME
 #	get_gpu_socket_count - returns the number of sockets with GPUS on a node with the given per-node GPU count
 #
 # SYNOPSIS
 #	get_gpu_socket_count gpu_cnt sockets_per_node
 #
 # DESCRIPTION
 #	Given a per-node GPU count, return the number of sockets with
 #	GPUs on a node with the given per-node GPU count.
 #	If the sockets_per_node has a value of 1 then just return 1
 #	rather than determine the count (for performance reasons).
 #
 ################################################################

 proc get_gpu_socket_count { gpu_cnt sockets_per_node } {
 	global test_dir re_word_str bin_rm number scontrol srun gpu_sock_list

 	set sockets_with_gpus 1
 	set file_in "$test_dir/test_get_gpu_socket_count"

 	if {$sockets_per_node == 1} {
 		return 1
 	}

 	log_user 0
 	_set_gpu_socket_inx -1
 	make_bash_script $file_in "$scontrol show node \$SLURMD_NODENAME"
 	spawn $srun -N1 --gres=gpu:$gpu_cnt $file_in
 	expect {
 		-re "gpu:${number}.S:($number)-($number)" {
 			_set_gpu_socket_range $expect_out(1,string) $expect_out(2,string)
 			exp_continue
 		}
 		-re "gpu:${re_word_str}:${number}.S:($number),($number),($number),($number)" {
 			_set_gpu_socket_inx $expect_out(1,string)
 			_set_gpu_socket_inx $expect_out(2,string)
 			_set_gpu_socket_inx $expect_out(3,string)
 			_set_gpu_socket_inx $expect_out(4,string)
 			exp_continue
 		}
 		-re "gpu:${re_word_str}:${number}.S:($number),($number),($number)" {
 			_set_gpu_socket_inx $expect_out(1,string)
 			_set_gpu_socket_inx $expect_out(2,string)
 			_set_gpu_socket_inx $expect_out(3,string)
 			exp_continue
 		}
 		-re "gpu:${re_word_str}:${number}.S:($number),($number)" {
 			_set_gpu_socket_inx $expect_out(1,string)
 			_set_gpu_socket_inx $expect_out(2,string)
 			exp_continue
 		}
 		-re "gpu:${re_word_str}:${number}.S:($number)" {
 			_set_gpu_socket_inx $expect_out(1,string)
 			exp_continue
 		}
 		-re "gpu:${number}.S:($number),($number),($number),($number)" {
 			_set_gpu_socket_inx $expect_out(1,string)
 			_set_gpu_socket_inx $expect_out(2,string)
 			_set_gpu_socket_inx $expect_out(3,string)
 			_set_gpu_socket_inx $expect_out(4,string)
 			exp_continue
 		}
 		-re "gpu:${number}.S:($number),($number),($number)" {
 			_set_gpu_socket_inx $expect_out(1,string)
 			_set_gpu_socket_inx $expect_out(2,string)
 			_set_gpu_socket_inx $expect_out(3,string)
 			exp_continue
 		}
 		-re "gpu:${number}.S:($number),($number)" {
 			_set_gpu_socket_inx $expect_out(1,string)
 			_set_gpu_socket_inx $expect_out(2,string)
 			exp_continue
 		}
 		-re "gpu:${number}.S:($number)" {
 			_set_gpu_socket_inx $expect_out(1,string)
 			exp_continue
 		}
 		eof {
 			wait
 		}
 	}
 	log_user 1

 	set sock_cnt [llength $gpu_sock_list]
 	if {$sock_cnt > 1} {
 		set sockets_with_gpus $sock_cnt
 	}

 	return $sockets_with_gpus
 }


 ################################################################
 #
 # NAME
 #	get_highest_mps_count - get_highest_gres_count nodes mps, but for "mps per GPU"
 #
 # SYNOPSIS
 #	get_highest_mps_count node_count
 #
 # DESCRIPTION
 #	For a given number of nodes, returns the higest number of MPS per GPU
 #	available at least on those number of nodes.
 #
 ################################################################

 proc get_highest_mps_count { node_count } {
 	# We cannot use get_highest_gres_count because we need "per gpu",
 	# so we get all the mps per node and all gpus per node, to create
 	# a mps_per_gpu list to sort and get the count.
 	set available_nodes [node_list_to_range [get_nodes_by_state]]
 	set mps_dict [get_gres_count "mps" $available_nodes]
 	set gpu_dict [get_gres_count "gpu" $available_nodes]
 	set mps_per_gpu [list]

 	dict for {node mps} $mps_dict {
 		if { $mps > 0 } {
 			if [dict exists $gpu_dict $node] {
 				set gpu [dict get $gpu_dict $node]
 				if { $gpu > 0 } {
 					lappend mps_per_gpu [expr $mps / $gpu]
 				} else {
 					fail "All nodes with MPS should have a GPU"
 				}
 			} else {
 				fail "All nodes with MPS should have a GPU"
 			}
 		}
 	}

 	set count [lindex [lsort -decreasing -integer $mps_per_gpu] [expr $node_count - 1]]
 	return $count
 }


 ################################################################
 #
 # NAME
 #	get_mps_node_count - gets the number of nodes with a positive number of GRES MPS
 #
 # SYNOPSIS
 #	get_mps_node_count
 #
 # RETURN VALUE
 #	Return the count of nodes with a non-zero count of GRES MPS
 #
 ################################################################

 proc get_mps_node_count { } {
 	global number sinfo re_word_str
 	set fini 0
 	set node_inx 0
 	set def_part [default_partition]

 	log_user 0
 	spawn $sinfo -N -p$def_part -oGRES=%G -h
 	expect {
 		-re "GRES=($re_word_str)" {
 			set mps_count 0
 			set parts [split $expect_out(1,string) ",/"]
 			while 1 {
 				set mps_found [lsearch $parts "mps*"]
 				if { $mps_found == -1 } break

 				set parts2 [split [lindex $parts $mps_found] ":(/"]
 				set col [lsearch -regexp $parts2 ^$number$]
 				if { $col == -1 } {
 					incr mps_count
 				} else {
 					set mps_count [expr $mps_count + [lindex $parts2 $col]]
 				}
 				set parts [lreplace $parts $mps_found $mps_found]
 			}

 			if {$mps_count > 0} {
 				incr node_inx
 			}
 			exp_continue
 		}
 		eof {
 			wait
 		}
 	}
 	log_user 1

 	return $node_inx
 }


 ################################################################
 #
 # NAME
 #	cuda_count - determines the count of allocated GPUs
 #
 # SYNOPSIS
 #	cuda_count cuda_string
 #
 # ARGUMENTS
 #	cuda_string
 #		Contents of a CUDA_VISIBLE_DEVICES environment variable
 #
 # RETURN VALUE
 #	Return the number of GPUs or -1 on error
 #
 ################################################################

 proc cuda_count { cuda_string } {
 	set cuda_count 0
 	set has_number 0
 	set len [string length $cuda_string]
 	for {set char_inx 0} {$char_inx < $len} {incr char_inx} {
 		set cuda_char [string index $cuda_string $char_inx]
 		if {[string match , $cuda_char]} {
 			if {$has_number > 0} {
 				incr cuda_count
 				set has_number 0
 			} else {
 				log_error "Invalid input ($cuda_string)"
 				return -1
 			}
 		} elseif {[string is digit $cuda_char]} {
 			set has_number 1
 		}
 	}
 	if {$has_number > 0} {
 		incr cuda_count
 	} else {
 		log_error "Invalid input ($cuda_string)"
 		return -1
 	}
 	return $cuda_count
 }


 ################################################################
 # NAME
 #	get_conf_path - gets the path to the slurm.conf file
 #
 # SYNOPSIS
 #	get_conf_path
 #
 # RETURN VALUE
 #	Returns the path to the slurm.conf file
 #
 ################################################################

 proc get_conf_path { } {
 	global scontrol re_word_str eol

 	if [regexp {(.*)/slurm.conf} [get_config_param "SLURM_CONF"] {} config_dir] {
 		return $config_dir
 	} else {
 		fail "Unable to determine config dir"
 	}
 }


 ################################################################
 #
 # NAME
 #	save_conf - saves a backup of the specfied configuration file
 #
 # SYNOPSIS
 #	save_conf file_name
 #
 # DESCRIPTION
 #	If the specified file_name exists, a backup is made which will be
 #	restored when restore_conf is called.
 #	If the specified file_name does not exist, a special backup will be
 #	made that will cause the file to be removed when restore_conf is
 #	called.
 #	If a backup already exists, a warning is issued and no backup is made
 #	(honoring the existing backup).
 #
 # SEE ALSO
 #	restore_conf
 #
 ################################################################

 proc save_conf { file_name } {
 	global bin_chmod bin_cp bin_mv bin_touch test_name

 	log_debug "Saving backup of $file_name"

 	#
 	# Check for existing backup
 	# If a backup exists, issue a warning and return (honor existing backup)
 	#
 	set conf_dir [file dirname $file_name]
 	set dir_files [glob -nocomplain -directory $conf_dir *]
 	set preexisting_backup_file [lsearch -inline -regexp $dir_files "$file_name\\\.test\\d+\\\.\\d+\$"]
 	if {$preexisting_backup_file ne ""} {
 		log_warn "Backup file already exists: ($preexisting_backup_file)"
 		return
 	}

 	#
 	# Check if file to backup exists.
 	# If it doesn't exist, warn the user, touch an empty backup file with
 	# the sticky bit set and allow the test to continue.
 	# restore_conf will remove the file.
 	#
 	set new_backup_file "$file_name.$test_name"
 	if {![file exists $file_name]} {
 		log_warn "Backup of a nonexistent file requested: $file_name"
 		run_command -fail -nolog "$bin_touch $new_backup_file"
 		run_command -fail -nolog "$bin_chmod +t $new_backup_file"
 		return
 	}

 	run_command -fail -nolog "$bin_mv $file_name $new_backup_file"
 	run_command -fail -nolog "$bin_cp $new_backup_file $file_name"
 }


 ################################################################
 #
 # NAME
 #	restore_conf - restores the original confiration file from backup
 #
 # SYNOPSIS
 #	restore_conf file_name
 #
 # DESCRIPTION
 #	If a backup exists for the specified file_name, it is restored.
 #	If the specified file_name did not exist when originally backed up,
 #	it will be removed.
 #	If no backup exists, a warning is issued.
 #
 # SEE ALSO
 #	save_conf
 #
 ################################################################

 proc restore_conf { file_name } {
 	global bin_mv bin_rm

 	log_debug "Restoring backup of $file_name"

 	set conf_dir [file dirname $file_name]
 	set dir_files [glob -nocomplain -directory $conf_dir *]
 	set backup_file [lsearch -inline -regexp $dir_files "$file_name\\\.test\\d+\\\.\\d+\$"]
 	if {$backup_file ne ""} {
 		file stat $backup_file stat

 		# If the sticky bit is set and the file is empty, remove both
 		if {! $stat(size) && [expr $stat(mode) & 512]} {
 			log_debug "Removing file used for the test: $file_name"
 			run_command -fail -nolog "$bin_rm -f $backup_file $file_name"
 		# Else replace the original with the backup
 		} else {
 			run_command -fail -nolog "$bin_mv $backup_file $file_name"
 		}
 	} else {
 		#
 		# If backup file doesn't exist, it has probably already been
 		# restored by a previous call to restore_conf
 		#
 		log_warn "Backup file does not exist for $file_name. It has probably already been restored"
 		return
 	}
 }


 ################################################################
 #
 # NAME
 #	have_nvml - checks if HAVE_NVML is set in config.h
 #
 # SYNOPSIS
 #	have_nvml
 #
 # RETURN VALUE
 #	Returns true if HAVE_NVML is set in config.h. Else, returns false
 #
 ################################################################

 proc have_nvml { } {
 	global bin_grep number config_h

 	return [expr [run_command_status -none -nolog "$bin_grep \"HAVE_NVML 1\" $config_h"] == 0]
 }


 ################################################################
 #
 # NAME
 #	delete_part - deletes partition on system
 #
 # SYNOPSIS
 #	delete_part partition
 #
 ################################################################

 proc delete_part { part_name } {
 	global scontrol

 	# Remove part
 	spawn $scontrol delete partition=$part_name
 	expect {
 		timeout {
 			fail "scontrol is not responding"
 		}
 		eof {
 			wait
 		}
 	}
 }


 ################################################################
 #
 # NAME
 #	have_lua - checks if HAVE_LUA is set in config.h
 #
 # SYNOPSIS
 #	have_lua
 #
 # RETURN VALUE
 #	Returns true if HAVE_LUA is set in config.h. Else, returns false
 #
 ################################################################

 proc have_lua { } {
 	global bin_grep config_h

 	return [expr [run_command_status -none -nolog "$bin_grep HAVE_LUA $config_h"] == 0]
 }


 ################################################################
 #
 # NAME
 #	get_reservations - returns a dictionary of dictionaries of reservation parameters
 #
 # SYNOPSIS
 #	get_reservations ?resv_name?
 #
 # RETURN VALUE
 #	Uses `scontrol show reservation` to return a dictionary of dictionaries
 #	of job parameters. Specifying an invalid resv_name result in a failure.
 #
 ################################################################

 proc get_reservations { {resv_name ""} } {
 	global scontrol

 	set command "$scontrol show reservation -o"
 	if {$resv_name ne ""} {
 		append command " $resv_name"
 	}
 	set output [run_command_output -fail "$command"]

 	# Iterate over each reservation's parameter list
 	foreach line [split $output "\n"] {
 		if {$line eq ""} { continue }

 		# Peel off the resv parameters one at a time
 		# The first quantifier sets the greediness for the whole RE
 		while {[regexp {^ *?([^ =]+)=(.*)(?= +[^ =]+=| *$)} $line {} param_name param_value]} {
 			# Remove the consumed parameter from the line
 			set line [regsub {^ *?[^ =]+=.*(?= +[^ =]+=| *$)} $line {}]
 			# Add it to the temporary job dictionary
 			dict set resv_dict $param_name $param_value
 		}

 		set resv_name_dict [dict get $resv_dict "ReservationName"]

 		# Add the resv dictionary to resvs dictionary
 		dict set resvs_dict $resv_name_dict $resv_dict

 		# Clear the resv dictionary for the next resv
 		set resv_dict {}
 	}

 	return $resvs_dict
 }


 ################################################################
 #
 # NAME
 #	get_reservation_param - returns a specific parameter value for a specific reservation
 #
 # SYNOPSIS
 #	get_reservation_param resv_name parameter_name
 #
 # DESCRIPTION
 #	Returns a specific parameter value for a specified reservation if the
 #	parameter exists for the reservation, or MISSING if it does not exist.
 #	Specifying an invalid reservation name will result in a failure.
 #
 ################################################################

 proc get_reservation_param { resv_name parameter_name } {

 	set resvs_dict [get_reservations $resv_name]

 	if [dict exists $resvs_dict $resv_name $parameter_name] {
 		return [dict get $resvs_dict $resv_name $parameter_name]
 	} else {
 		return "MISSING"
 	}
 }


 ################################################################
 #
 # NAME
 #	create_res - create new reservation in system
 #
 # SYNOPSIS
 #	create_res ?res_name? ?res_params?
 #
 # RETURN VALUE
 #	the exit code of the scontrol command run
 #
 ################################################################

 proc create_res { res_name res_params } {
 	global scontrol

 	set result   [run_command "$scontrol create res ReservationName=$res_name $res_params"]
 	set output   [dict get $result output]
 	set ret_code [dict get $result exit_code]

 	if { $ret_code } {
 		log_warn "[lindex [info level 0] 0]: error from scontrol: $output"
 	} else {
 		log_debug "[lindex [info level 0] 0]: success from scontrol: $output"
 	}

 	return $ret_code
 }


 ################################################################
 #
 # NAME
 #	update_res - update exisiting reservation in system
 #
 # SYNOPSIS
 #	update_res ?res_name? ?res_params?
 #
 # RETURN VALUE
 #	the exit code of the scontrol command run
 #
 ################################################################

 proc update_res { res_name res_params } {
 	global scontrol

 	set result   [run_command "$scontrol update ReservationName=$res_name $res_params"]
 	set output   [dict get $result output]
 	set ret_code [dict get $result exit_code]

 	if { $ret_code } {
 		log_warn "Return code from scontrol: $ret_code. Output: $output"
 	}

 	return $ret_code
 }


 ################################################################
 #
 # NAME
 #	delete_res - delete reservation from system
 #
 # SYNOPSIS
 #	delete_res ?res_name?
 #
 # RETURN VALUE
 #	the exit code of the scontrol command run
 #
 ################################################################

 proc delete_res { res_name } {
 	global scontrol

 	set result   [run_command "$scontrol delete ReservationName=$res_name"]
 	set output   [dict get $result output]
 	set ret_code [dict get $result exit_code]

 	if { $ret_code } {
 		log_warn "Return code from scontrol: $ret_code. Output: $output"
 	}

 	return $ret_code
 }


 ################################################################
 #
 # NAME
 #	create_part - creates a partition
 #
 # SYNOPSIS
 #	create_part partition num_nodes
 #
 # ARGUMENTS
 #	partition
 #		Name of partition to create
 #	num_nodes
 #		Number of nodes of partition to create
 #
 # RETURN VALUE
 #	RETURN_SUCCESS, or non-zero on error
 #
 ################################################################

 proc create_part { part_name num_nodes_in } {
 	global scontrol srun bin_printenv number re_word_str

 	set nodes ""
 	set num_nodes_out 0

 	set found 0
 	spawn $scontrol show partitionname=$part_name
 	expect {
 		-re "PartitionName=$part_name" {
 			set found 1
 			exp_continue
 		}
 		timeout {
 			fail "scontrol is not responding"
 		}
 		eof {
 			wait
 		}
 	}

 	if {$found == 1} {
 		log_error "There is already a partition $part_name"
 		return $::RETURN_ERROR
 	}

 	if {[string length [default_partition]] == 0} {
 		log_warn "create_part does not work without a default partition"
 		return $::RETURN_ERROR
 	}

 	if { $num_nodes_in } {
 		set num_nodes $num_nodes_in
 	} else {
 		set num_nodes [llength [get_nodes_by_state]]
 	}

 	log_user 0
 	# Get a list of nodes
 	spawn $srun -t1 -N1-$num_nodes $bin_printenv
 	expect {
 		-re "SLURM_JOB_NUM_NODES=($number)" {
 			set num_nodes_out $expect_out(1,string)
 			exp_continue
 		}
 		-re "SLURM_NODELIST=($re_word_str)" {
 			set nodes $expect_out(1,string)
 			exp_continue
 		}
 		timeout {
 			fail "srun is not responding getting number of nodes creating part"
 		}
 		eof {
 			wait
 		}
 	}

 	if {[string length $nodes] == 0} {
 		log_error "Did not get a valid node list"
 		return $::RETURN_ERROR
 	} elseif { $num_nodes_out != $num_nodes_in } {
 		log_error "Did not get enough nodes ($num_nodes_out != $num_nodes_in) to run test"
 		return $::RETURN_ERROR
 	}

 	spawn $scontrol create partitionname=$part_name nodes=$nodes
 	expect {
 		timeout {
 			fail "scontrol is not responding creating partition"
 		}
 		eof {
 			wait
 		}
 	}

 	set found 0
 	spawn $scontrol show partitionname=$part_name
 	expect {
 		-re "PartitionName=$part_name" {
 			set found 1
 			exp_continue
 		}
 		timeout {
 			fail "scontrol is not responding"
 		}
 		eof {
 			wait
 		}
 	}

 	if { $found == 0 } {
 		log_error "scontrol did not create partition $part_name"
 		return $::RETURN_ERROR
 	}
 	log_user 1

 	log_debug "Created partition $part_name with $num_nodes_in nodes"
 	return $::RETURN_SUCCESS
 }


 ################################################################
 #
 # NAME
 #	get_nodes - returns a dictionary of dictionaries of node parameters
 #
 # SYNOPSIS
 #	get_nodes ?hostlist_expression?
 #
 # DESCRIPTION
 #	Uses `scontrol show node` to query node parameters, returning a
 #	dictionary of dictionaries with the node names as keys of the first
 #	level dictionary and with the parameters as keys of the second level
 #	dictionary. Specifying an invalid node name will result in a failure.
 #
 # RETURN VALUE
 #	If the optional node expression argument is specified, the result will
 #	be constrained by the specified hostlist expression. Otherwise, the
 #	results for all nodes will be returned.
 #
 ################################################################

 proc get_nodes { {hostlist_expression ""} } {
 	global scontrol

 	set command "$scontrol show node -o"
 	if {$hostlist_expression ne ""} {
 		append command " $hostlist_expression"
 	}
 	set output [run_command_output -fail -nolog "$command"]

 	# Iterate over each node parameter line
 	foreach line [split $output "\n"] {
 		if {$line eq ""} { continue }

 		# Peel off the node parameters one at a time
 		# The first quantifier sets the greediness for the whole RE
 		while {[regexp {^ *?([^ =]+)=(.*)(?= +[^ =]+=| *$)} $line {} param_name param_value]} {
 			# Remove the consumed parameter from the line
 			set line [regsub {^ *?[^ =]+=.*(?= +[^ =]+=| *$)} $line {}]
 			# Add it to the temporary node dictionary
 			dict set node_dict $param_name $param_value
 		}

 		set node_name [dict get $node_dict "NodeName"]

 		# Add the node dictionary to nodes dictionary
 		dict set nodes_dict $node_name $node_dict

 		# Clear the node dictionary for use by the next node
 		set node_dict {}
 	}

 	return $nodes_dict
 }


 ################################################################
 #
 # NAME
 #	get_node_param - returns a specific parameter value for a specific node
 #
 # SYNOPSIS
 #	get_node_param node_name parameter_name
 #
 # DESCRIPTION
 #	Returns a specific parameter value for a specified node if the
 #	parameter exists for the node, or MISSING if it does not exist.
 #	Specifying an invalid node name will result in a failure.
 #
 ################################################################

 proc get_node_param { node_name parameter_name } {

 	set nodes_dict [get_nodes $node_name]

 	if [dict exists $nodes_dict $node_name $parameter_name] {
 		return [dict get $nodes_dict $node_name $parameter_name]
 	} else {
 		return "MISSING"
 	}
 }


 ################################################################
 #
 # NAME
 #	get_nodes_by_request - get a list of nodes satisfying requested resources
 #
 # SYNOPSIS
 #	get_nodes_by_request ?options? ?request_args?
 #
 # DESCRIPTION
 #	Using srun (optionally with the specified arguments), returns a list
 #	of nodes having the requested resources.
 #	If an error occurs, the invoking test will fail.
 #
 # OPTIONS
 #	-fail
 #		fail the test if the execution of srun results in an error or timeout
 #
 # ARGUMENTS
 #	request_args
 #		Desired resources of a node in form of srun arguments,
 #		e.g. "--gres=gpu:1 -n1 -t1"
 #
 # RETURN VALUE
 #	A list of nodes with at least the requested resources, or an empty
 #	list otherwise.
 #
 ################################################################

 proc get_nodes_by_request args {
 	global srun

 	set options [list]
 	while {[llength $args]} {
 		switch -glob -- [lindex $args 0] {
 			-fail {
 				lappend options [lrange $args 0 0]
 				set args [lrange $args 1 end]
 			}
 			default break
 		}
 	}

 	if {[llength $args] == 1} {
 		lassign $args request_args
 	} elseif {[llength $args] == 0} {
 		set request_args "-n1 -t1"
 	} else {
 		fail "[lindex [info level 0] 0]: Invalid number of arguments ([llength $args]): $args"
 	}

 	log_debug "Getting nodes that can be allocated with request: $request_args"
 	set command "$srun -Q $request_args printenv SLURMD_NODENAME"
 	set result [run_command {*}$options $command]

 	if [dict get $result exit_code] {
 		return {}
 	}

 	set output [dict get $result output]
 	foreach line [split $output "\n"] {
 		if {$line eq ""} { continue }
 		dict incr allocated_nodes $line
 	}

 	return [lsort [dict keys $allocated_nodes]]
 }


 ################################################################
 #
 # NAME
 #	get_partitions - returns a dictionary of dictionaries of partition parameters
 #
 # SYNOPSIS
 #	get_partitions ?partition_name?
 #
 # DESCRIPTION
 #	Uses `scontrol show partitions` to query partition parameters,
 #	returning a dictionary of dictionaries with the partition names
 #	as keys of the first level dictionary and with the parameters as
 #	keys of the second level dictionary. Specifying an invalid partition
 #	name will result in a failure.
 #
 # RETURN VALUE
 #	If the optional partition_name argument is specified, the result will
 #	contain only the one patition. Otherwise, the results for all
 #	partitions will be returned.
 #
 ################################################################

 proc get_partitions { {partition_name ""} } {
 	global scontrol

 	set command "$scontrol show partition -o"
 	if {$partition_name ne ""} {
 		append command " $partition_name"
 	}
 	set output [run_command_output -fail -nolog "$command"]

 	# Iterate over each partition parameter line
 	foreach line [split $output "\n"] {
 		if {$line eq ""} { continue }

 		# Peel off the partition parameters one at a time
 		# The first quantifier sets the greediness for the whole RE
 		while {[regexp {^ *?([^ =]+)=(.*)(?= +[^ =]+=| *$)} $line {} param_name param_value]} {
 			# Remove the consumed parameter from the line
 			set line [regsub {^ *?[^ =]+=.*(?= +[^ =]+=| *$)} $line {}]
 			# Add it to the temporary node dictionary
 			dict set part_dict $param_name $param_value
 		}

 		set part_name [dict get $part_dict "PartitionName"]

 		# Add the node dictionary to nodes dictionary
 		dict set parts_dict $part_name $part_dict

 		# Clear the node dictionary for use by the next node
 		set part_dict {}
 	}

 	return $parts_dict
 }


 ################################################################
 #
 # NAME
 #	get_partition_param - returns a specific parameter value for a specific partition
 #
 # SYNOPSIS
 #	get_partition_param partitoin_name parameter_name
 #
 # DESCRIPTION
 #	Returns a specific parameter value for a specified partition if the
 #	parameter exists for the partition, or MISSING if it does not exist.
 #	Specifying an invalid partition name will result in a failure.
 #
 ################################################################

 proc get_partition_param { partition_name parameter_name } {

 	set partitions_dict [get_partitions $partition_name]

 	if [dict exists $partitions_dict $partition_name $parameter_name] {
 		return [dict get $partitions_dict $partition_name $parameter_name]
 	} else {
 		return "MISSING"
 	}
 }


 ################################################################
 #
 # NAME
 #	get_jobs - returns a dictionary of dictionaries of job parameters
 #
 # SYNOPSIS
 #	get_jobs ?job_id?
 #
 # DESCRIPTION
 #	Uses `scontrol show job` to return a dictionary of dictionaries of job
 #	parameters. Specifying an invalid job id will result in a failure.
 #
 ################################################################

 proc get_jobs { {job_id_in ""} } {
 	global scontrol

 	set command "$scontrol show job -d -o"
 	if {$job_id_in ne ""} {
 		append command " $job_id_in"
 	}
 	set output [run_command_output -fail "$command"]

 	# Iterate over each job's parameter list
 	foreach line [split $output "\n"] {
 		if {$line eq ""} { continue }

 		# Peel off the job parameters one at a time
 		# The first quantifier sets the greediness for the whole RE
 		while {[regexp {^ *?([^ =]+)=(.*)(?= +[^ =]+=| *$)} $line {} param_name param_value]} {
 			# Remove the consumed parameter from the line
 			set line [regsub {^ *?[^ =]+=.*(?= +[^ =]+=| *$)} $line {}]
 			# Add it to the temporary job dictionary
 			dict set job_dict $param_name $param_value
 		}

 		set job_id [dict get $job_dict "JobId"]

 		# Add the job dictionary to jobs dictionary
 		dict set jobs_dict $job_id $job_dict

 		# Clear the job dictionary for the next job
 		set job_dict {}
 	}

 	return $jobs_dict
 }


 ################################################################
 #
 # NAME
 #	get_job_param - returns a specific parameter value for a specific job
 #
 # SYNOPSIS
 #	get_job_param job_id parameter_name
 #
 # DESCRIPTION
 #	Returns a specific parameter value for a specified job if the
 #	parameter exists for the job, or MISSING if it does not exist.
 #	Specifying an invalid job id will result in a failure.
 #
 ################################################################

 proc get_job_param { job_id parameter_name } {

 	set jobs_dict [get_jobs $job_id]

 	if [dict exists $jobs_dict $job_id $parameter_name] {
 		return [dict get $jobs_dict $job_id $parameter_name]
 	} else {
 		return "MISSING"
 	}
 }

 proc check_reason { job_id reason } {
 	global squeue

 	set found 0
 	spawn $squeue -j $job_id --noheader -o "%r"
 	expect {
 		-re "$reason" {
 			set found 1
 			exp_continue
 		}
 		timeout {
 			log_error "squeue not responding"
 		}
 		eof {
 			wait
 		}
 	}

 	if {$found == 0} {
 		log_error "Job $job_id should have a wait reason of $reason"
 	}
 	return $found
 }


 ################################################################
 #
 # NAME
 #	submit_job - submits a job with sbatch and returns its job id
 #
 # SYNOPSIS
 #	submit_job ?options? job_args
 #
 # DESCRIPTION
 #	Submits a job with sbatch and returns its jobid, or 0 if error.
 #	It accepts all the options of run_command.
 #
 # OPTIONS
 #	It accepts and passes all the options of/to run_command and also:
 #	-env env
 #		Prepend $env to the actual sbatch command to set environment
 #		variables. For example "-env 'SLURM_NTASKS_PER_GPU=2'".
 #
 # ARGUMENTS
 #	job_args
 #		a string containing all the arguments to pass to sbatch
 #
 # RETURN VALUE
 #	the job id, or 0 if an error happen
 #
 ################################################################

 proc submit_job args {
 	global sbatch

 	set env    ""
 	set job_id 0

 	set idx [lsearch $args -env]
 	if {$idx >= 0} {
 		set env  [lindex   $args [expr $idx+1]]
 		set args [lreplace $args $idx [expr $idx+1]]
 	}

 	if {[llength $args] < 1} {
 		fail "Wrong number of parameters, should be >=1"
 	}
 	set job_options [lindex $args [expr [llength $args] - 1 ]]
 	set run_options   ""
 	if {[llength $args] > 1} {
 		set run_options [lrange $args 0 [expr [llength $args] - 2 ]]
 	}

 	set command ""
 	if {$env ne ""} {
 		set command "$env "
 	}
 	set command "$command$sbatch $job_options"
 	set output [run_command_output {*}$run_options "$command"]
 	regexp {Submitted \S+ job (\d+)} $output - job_id

 	return $job_id
 }

 ###############################################################################
 #
 # NAME
 #	check_exclusive_gres - Given a job_id and node_name of an exclusive
 #                              allocation make sure we allocated all GRES.
 #
 ################################################################################

 proc check_exclusive_gres { job_id node_name } {
 	# Check all consumable GRES of the node were allocated on the job
 	set gres_dict_job  [count_gres [get_job_param  $job_id "JOB_GRES"]]
 	set gres_dict_node [count_gres [get_node_param $node_name "Gres"] true]

 	dict for {gres_name gres_count} $gres_dict_node {
 		# We don't want mps to be allocated with --exclusive
 		if {$gres_name eq "mps" || $gres_name eq "shard"} {
 			if {[dict exists $gres_dict_job $gres_name]} {
 				subfail "Gres ($gres_name) on node ($node_name) allocated on job ($job_id) with --exclusive but it shouldn't be"
 			} else {
 				subpass "Gres ($gres_name) on node ($node_name) was not allocated on job ($job_id) with --exclusive, this is expected"
 			}
 		} elseif {![dict exists $gres_dict_job $gres_name]} {
 			subfail "Gres ($gres_name) on node ($node_name) not allocated on job ($job_id) with --exclusive"
 		} else {
 			subpass "Gres ($gres_name) on node ($node_name) was allocated on job ($job_id) with --exclusive"

 			set gres_count_job [dict get $gres_dict_job $gres_name]
 			subtest { $gres_count_job == $gres_count } "Gres ($gres_name) on node ($node_name) fully allocated on job ($job_id) with --exclusive" "$gres_count_job != $gres_count"
 		}
 	}
 }

 ################################################################
 #
 # NAME
 #	compile_against_libslurm - compiles a test program against either libslurm.so or libslurmfull.so
 #
 # SYNOPSIS
 #	compile_against_libslurm ?options? test_prog ?build_args?
 #
 # DESCRIPTION
 #	Compile a test program against either libslurm.so or libslurmfull.so.
 #
 # OPTIONS
 #	-full
 #		use libslurmfull.so instead of libslurm.so
 #	-shared
 #		produces a shared library (adds the -shared compiler option
 #		and adds a .so suffix to the output file name)
 #
 # ARGUMENTS
 #	test_prog
 #		The name of the test program (and .c file)
 #	build_args
 #		Additional string to be appended to the build command.
 #		E.g. "-DUSING_VALGRIND -lm ${build_dir}/src/slurmctld/locks.o"
 #		(initial space will be added automatically).
 #
 ################################################################################

 proc compile_against_libslurm args {
 	global slurm_dir bin_cc src_dir build_dir bin_chmod

 	set use_full   false
 	set shared     false
 	set build_args ""
 	while {[llength $args]} {
 		switch -glob -- [lindex $args 0] {
 			-full    {set use_full true; set args [lrange $args 1 end]}
 			-shared  {set shared true; set args [lrange $args 1 end]}
 			-*       {fail "Unknown option: [lindex $args 0]"}
 			default  break
 		}
 	}
 	set argument_count [llength $args]
 	if {$argument_count < 1} {
 		fail "Too few arguments ($argument_count): $args"
 	} else {
 		lassign $args test_prog
 	}
 	if {$argument_count == 2} { set build_args [lindex $args 1] }
 	if {$argument_count > 2} {
 		fail "Too many arguments ($argument_count): $args"
 	}

 	if {$use_full} {
 		set libfile "libslurmfull.so"
 	} else {
 		set libfile "libslurm.so"
 	}

 	if [file exists $slurm_dir/lib64/slurm/$libfile] {
 		set libdir "lib64"
 	} else {
 		set libdir "lib"
 	}

 	if {$use_full} {
 		set libline "$slurm_dir/$libdir/slurm"
 		set libfile "slurmfull"
 	} else {
 		set libline "$slurm_dir/$libdir"
 		set libfile "slurm"
 	}

 	set build_cmd "$bin_cc ${test_prog}.c -g -pthread"

 	if {$shared} {
 		set out "${test_prog}.so"
 		append build_cmd " -fPIC -shared"
 	} else {
 		set out "${test_prog}"
 	}
 	append build_cmd " -o $out"

 	append build_cmd " -I$src_dir -I$build_dir -I$slurm_dir/include -Wl,-rpath=$libline -L$libline -l$libfile -lresolv"

 	# Add additional arguments to the build command
 	if {$build_args != ""} {
 		append build_cmd " $build_args"
 	}

 	log_debug "Build command: $build_cmd"
 	catch {exec {*}$build_cmd} out_str out_dict
 	if {[dict get $out_dict -code]} {
 		log_error $out_str
 		return $::RETURN_ERROR
 	}

 	exec $bin_chmod 700 $out
 	return $::RETURN_SUCCESS
 }


 ################################################################
 #
 # NAME
 #	subtest - tests a boolean condition and updates subtest tallies
 #
 # SYNOPSIS
 #	subtest ?options? condition description ?diagnostics?
 #
 # DESCRIPTION
 #	Based on the results of testing a boolean expression, increments the
 #	relevant subtest count (pass, fail or skip) and logs a message.
 #
 # OPTIONS
 #	-fatal
 #		If the subtest fails, causes a fatal error ending the test
 #
 # ARGUMENTS
 #	condition
 #		The boolean expression to test
 #	description
 #		A single-line string describing what is being tested. This is
 #		a subtest "name" that is displayed with the log message
 #		whether the subtest passes or fails
 #	diagnostics
 #		A string providing additional diagnostic information that is
 #		only included in the log message on failure
 #
 # RETURN VALUE
 #	the boolean condition evaluated
 #
 # ENVIRONMENT
 #	testsuite_subtest_fatal
 #		Specifies whether first failing subtest aborts the test
 #
 ################################################################

 proc subtest args {

 	set options [list]
 	while {[llength $args]} {
 		switch -glob -- [lindex $args 0] {
 			-fatal -
 			-fail  {
 				lappend options -fatal
 				set args [lrange $args 1 end]
 			}
 			-*      {fail "Unknown option: [lindex $args 0]"}
 			default break
 		}
 	}
 	set argument_count [llength $args]
 	if {$argument_count < 2} {
 		fail "Too few arguments ($argument_count): $args"
 	} else {
 		set args [lassign $args condition description]
 	}

 	if [uplevel 1 expr [format "{%s}" $condition]] {
 		subpass $description
 		return true
 	} else {
 		subfail {*}$options $description {*}$args
 		return false
 	}
 }


 ################################################################
 #
 # NAME
 #	_is_testproc_included - returns if testproc_id was included or not excluded in argv
 #
 # SYNOPSIS
 #	_is_testproc_included testproc_id testproc_alias
 #
 # DESCRIPTION
 #	From command line the test runner can use -i and -e to include or
 #	exclude some test functions by their number. This function must be
 #	used to check if the test runner included or excluded the given
 #	testproc_id.
 #
 # RETURN VALUE
 #	true is testproc_id or testproc_alias was included with -i or not
 #	excluded -e, false otherwise
 #
 ################################################################

 proc _is_testproc_included {testproc_id testproc_alias} {
 	global _testproc_included _testproc_excluded

 	if {[llength $_testproc_included]} {
 		if {[lsearch $_testproc_included $testproc_id] >= 0 || \
 		    [lsearch $_testproc_included $testproc_alias] >= 0} {
 			return true
 		}
 		return false
 	}

 	if {[llength $_testproc_excluded]} {
 		if {[lsearch $_testproc_excluded $testproc_id] >= 0 || \
 		    [lsearch $_testproc_excluded $testproc_alias] >= 0} {
 			return false
 		}
 	}

 	return true
 }


 ################################################################
 #
 # NAME
 #	skip_following_testprocs - the following testproc calls will be skipped
 #
 # SYNOPSIS
 #	skip_following_testprocs reason
 #
 # ARGUMENTS
 #	reason
 #		The string with the reason message to add on the skip message
 #		on each skipped testproc.
 #
 # DESCRIPTION
 #	This function disables normal execution of testproc calls.
 #	It is meant to be used when some testprocs cannot be run due config
 #	limitations, but still call testproc to register what testprocs
 #	are skipped for a given reason.
 #	Use run_following_testprocs to reenable the norma execution of testprocs.

 ################################################################

 proc skip_following_testprocs {reason} {
 	global _testproc_skip_next _testproc_skip_reason

 	set _testproc_skip_next   true
 	set _testproc_skip_reason $reason
 }


 ################################################################
 #
 # NAME
 #	run_following_testprocs - the following testproc call will be run (if not excluded from command line)
 #
 # SYNOPSIS
 #	run_following_testprocs
 #
 # DESCRIPTION
 #	This function reenables the normal execution of testproc calls.
 #	It is meant to be used when skip_following_testprocs was called to skip
 #	previous testproc calls, and we want to normally run the following ones.
 #	Note that it does NOT overwrite what -i and -e included are passed
 #	from command line.
 #
 ################################################################

 proc run_following_testprocs {} {
 	global _testproc_skip_next _testproc_skip_reason

 	set _testproc_skip_next   false
 	set _testproc_skip_reason ""
 }


 ################################################################
 #
 # NAME
 #	testproc - launcher to run or skip a testproc_call
 #
 # SYNOPSIS
 #	testproc testproc_call
 #
 # ARGUMENTS
 #	A testproc_call is any normal call to a proc with any arguments that
 #	could be done normally without the testproc launcher.
 #	For example, we could normally do:
 #
 #	test_my_feature $some_args $expected_out
 #
 #	Or use the launcher like:
 #
 #	testproc test_my_feature $some_args $expected_out
 #
 # DESCRIPTION
 #	Using the testproc launcher has the following main benefits:
 #	a) Handles the -i and -e terminal options to include or exclude some
 #	   testprocs numbers.
 #	b) Runs or skips based on the last call of testproc_{skip,run}_following.
 #	c) Creates extra sections in the status summary
 #	   (see testsuite_testproc_details).
 #
 # RETURN VALUE
 #	The rc of the testproc_call if it has been run, or $::RETURN_SUCCESS
 #	otherwise. Using it is not recommended, though.
 #
 ################################################################

 proc testproc args {
 	return [testproc_alias "" {*}$args]
 }


 ################################################################
 #
 # NAME
 #	testproc_alias - launcher to run or skip a testproc_call, with an alias
 #
 # SYNOPSIS
 #	testproc alias testproc_call
 #
 # DESCRIPTION
 #	See testproc
 #
 ################################################################

 proc testproc_alias {alias args} {
 	global _testproc_pass_list _testproc_skip_list _testproc_fail_list
 	global _testproc_skip_next _testproc_skip_reason
 	global _testproc_messages testsuite_testproc_log_calls
 	global _subtest_pass_count
 	global _subtest_skip_count
 	global _subtest_fail_count

 	# Avoid integer alias to avoid confusions with testproc_id
 	if {[llength $alias] && [string is integer -strict $alias]} {
 		fail "testproc_alias doesn't support integer alias, use alphanumeric"
 	}

 	# Save previous subtest counts and next subtest num
 	set prev_pass $_subtest_pass_count
 	set prev_skip $_subtest_skip_count
 	set prev_fail $_subtest_fail_count
 	set prev_subtest [expr $_subtest_pass_count + \
 	                       $_subtest_skip_count + \
 	                       $_subtest_fail_count + 1]

 	# Get the testproc number
 	set testproc_id [expr [llength $_testproc_pass_list] + \
 	                      [llength $_testproc_skip_list] + \
 	                      [llength $_testproc_fail_list] + 1]

 	# Run or skip the testproc
 	set rc $::RETURN_SUCCESS
 	set reason ""
 	if {![_is_testproc_included $testproc_id $alias]} {
 		set reason "(Excluded from command line)"
 		if {$testsuite_testproc_log_calls == yes} {
 			subskip -nolog "Skipping testproc $testproc_id: {$args} $reason"
 		} else {
 			subskip -nolog "Skipping testproc $testproc_id: $reason"
 		}
 	} else {
 		if {$_testproc_skip_next} {
 			set reason "($_testproc_skip_reason)"
 			if {$testsuite_testproc_log_calls == yes} {
 				subskip  "Skipping testproc $testproc_id: {$args} $reason"
 			} else {
 				subskip  "Skipping testproc $testproc_id: $reason"
 			}
 		} else {
 			if {$testsuite_testproc_log_calls != no} {
 				log_info "Running testproc $testproc_id: $args"
 			} else {
 				log_info "Running testproc $testproc_id"
 			}
 			set rc [{*}$args]
 		}
 	}

 	# Get current subtest counts
 	set curr_subtest [expr $_subtest_pass_count + \
 	                       $_subtest_skip_count + \
 	                       $_subtest_fail_count]

 	# Register the testproc as fail, skip or pass (based on subtests)
 	if {[llength $alias]} {
 		set alias "${alias}: "
 	}
 	if {$_subtest_fail_count > $prev_fail} {
 		set reason "(Subtests: $prev_subtest to $curr_subtest)"
 		lappend  _testproc_fail_list $testproc_id
 		if {$testsuite_testproc_log_calls != no} {
 			dict set _testproc_messages  $testproc_id [list failed "$alias{$args} $reason"]
 		} else {
 			dict set _testproc_messages  $testproc_id [list failed "$alias$reason"]
 		}
 	} elseif {$_subtest_skip_count > $prev_skip} {
 		lappend  _testproc_skip_list $testproc_id
 		if {$testsuite_testproc_log_calls == yes} {
 			dict set _testproc_messages  $testproc_id [list skipped "$alias{$args} $reason"]
 		} else {
 			dict set _testproc_messages  $testproc_id [list skipped "$alias$reason"]
 		}
 	} else {
 		set reason "(Subtests: $prev_subtest to $curr_subtest)"
 		lappend  _testproc_pass_list $testproc_id
 		if {$testsuite_testproc_log_calls == yes} {
 			dict set _testproc_messages  $testproc_id [list passed "$alias{$args} $reason"]
 		} else {
 			dict set _testproc_messages  $testproc_id [list passed "$alias$reason"]
 		}
 	}

 	return $rc
 }


 ################################################################
 #
 # NAME
 #	_log_format - prints a log message with colorization and formatting
 #
 # SYNOPSIS
 #	_log_format log_level message
 #
 # DESCRIPTION
 #	This procedure is called by the log_<level> procedures and
 #	derives the relevant log level from the caller's procedure name.
 #
 # ARGUMENTS
 #	log_level
 #		The logging threshold that triggered the log statement
 #	message
 #		The message to print with colorization and formatting
 #
 # ENVIRONMENT
 #	testsuite_log_format
 #		Used as the template for the fields to be output.
 #		Fields must be expressed in the form:
 #			%{<field_name>}<format_conversion_specifier>
 #		Supported fields include the following:
 #			message
 #				The log message
 #			filename
 #				The file name where the log_<log_level>
 #				procedure was called from
 #			lineno
 #				The line number where the log_<log_level>
 #				procedure was called from
 #			timestamp
 #				The date and time when the log_<log_level>
 #				procedure was called at
 #			msecs
 #				The milliseconds when the log_<log_level>
 #				procedure was called at
 #			loglevel
 #				The log level that triggers the log_<log_level>
 #				procedure to be called
 #			backtrace
 #				An abbreviated call stack trace with line
 #				numbers
 #	testsuite_time_format
 #		Used as a template for the timestamp. See the format groups
 #		for the tcl clock format command.
 #	testsuite_colorize
 #		Boolean that turns colorization on or off
 #	testsuite_color_<log_level>
 #		Can be set to define the color used for each log level
 #
 ################################################################

 proc _log_format { log_level message } {
 	global testsuite_colorize testsuite_log_format testsuite_time_format
 	global COLOR_NONE
 	global testsuite_color_fatal testsuite_color_error testsuite_color_warn
 	global testsuite_color_info testsuite_color_pass testsuite_color_command
 	global testsuite_color_debug testsuite_color_trace

 	set format_string $testsuite_log_format
 	set milliseconds_since_epoch [clock milliseconds]
 	set date_time [clock format [expr {$milliseconds_since_epoch / 1000}] -format "$testsuite_time_format"]
 	set milliseconds [expr {$milliseconds_since_epoch % 1000}]

 	set frame_level -2
 	while { [dict get [info frame $frame_level] type] != "source" } {
 		incr frame_level -1
 	}

 	set format_args {}
 	while {[regexp "%{\[a-z]+}" $format_string format_field]} {
 		if {$format_field eq "%{message}"} {
 			lappend format_args $message
 		} elseif {$format_field eq "%{filename}"} {
 			lappend format_args [file tail [dict get [info frame $frame_level] file]]
 		} elseif {$format_field eq "%{lineno}"} {
 			lappend format_args [dict get [info frame $frame_level] line]
 		} elseif {$format_field eq "%{timestamp}"} {
 			lappend format_args $date_time
 		} elseif {$format_field eq "%{msecs}" || $format_field eq "%{milliseconds}"} {
 			lappend format_args $milliseconds
 		} elseif {$format_field eq "%{loglevel}" || $format_field eq "%{levelname}"} {
 			lappend format_args [string totitle $log_level]
 		} elseif {$format_field eq "%{backtrace}"} {
 			lappend format_args [_line_trace]
 		} else {
 			fail "Invalid field ($format_field) specified in testsuite_log_format"
 		}
 		regsub $format_field $format_string "%" format_string
 	}

 	if ($testsuite_colorize) {
 		switch $log_level {
 			fatal   { append output $testsuite_color_fatal }
 			error   { append output $testsuite_color_error }
 			warning { append output $testsuite_color_warn }
 			info    { append output $testsuite_color_info }
 			pass    { append output $testsuite_color_pass }
 			command { append output $testsuite_color_command }
 			debug   { append output $testsuite_color_debug }
 			trace   { append output $testsuite_color_trace }
 		}
 	}
 	append output [format $format_string {*}$format_args]
 	if ($testsuite_colorize) {
 		append output $COLOR_NONE
 	}
 	puts $output
 }


 ################################################################
 #
 # NAME
 #	_print_header - prints a test header
 #
 # SYNOPSIS
 #	_print_header
 #
 # ENVIRONMENT
 #	testsuite_colorize
 #		Boolean that turns colorization on or off
 #	testsuite_color_header
 #		Can be set to define the color used for the header
 #
 ################################################################

 proc _print_header { } {
 	global test_name testsuite_color_header testsuite_colorize COLOR_NONE

 	if ($testsuite_colorize) {
 		append output $testsuite_color_header
 	}
 	append output [string repeat = 78]\n
 	append output [format "%-9s" "TEST:"]${test_name}\n
 	append output [string repeat = 78]
 	if ($testsuite_colorize) {
 		append output $COLOR_NONE
 	}
 	puts $output
 }


 ################################################################
 #
 # NAME
 #	_print_summary - prints the final status summary
 #
 # SYNOPSIS
 #	_print_summary status completed
 #
 # ARGUMENTS
 #	status
 #		The final status of the test.
 #		When status is zero, we print SUCCESS.
 #		When status is negative, we print SKIPPED.
 #		When status is positive, we print FAILURE.
 #
 #	completed
 #		A boolean value that is true if the test completed and false
 #		if aborted (ended early with exit status != 0)
 #
 # ENVIRONMENT
 #	testsuite_colorize
 #		Boolean that turns colorization on or off
 #	testsuite_color_<test_status>
 #		Can be set to define the color used for each test status
 #
 ################################################################

 proc _print_summary {status completed} {
 	global test_name testsuite_colorize COLOR_NONE
 	global testsuite_color_success testsuite_color_skipped
 	global testsuite_color_failure
 	global _subtest_fail_count _subtest_pass_count _subtest_skip_count
 	global _testproc_pass_list _testproc_skip_list _testproc_fail_list
 	global _incomplete_reason
 	global _subtest_messages _testproc_messages
 	global testsuite_subtest_details testsuite_testproc_details

 	if {$status == 0} {
 		set color $testsuite_color_success
 		set header "SUCCESS"
 	} elseif {$status < 0} {
 		set color $testsuite_color_skipped
 		set header "SKIPPED"
 	} elseif {$status > 0} {
 		set color $testsuite_color_failure;
 		set header "FAILURE"
 	}

 	if ($testsuite_colorize) {
 		append output $color
 	}

 	append output [string repeat = 78]\n

 	# Get subtest and testproc counts
 	set testproc_fail  [llength $_testproc_fail_list]
 	set testproc_skip  [llength $_testproc_skip_list]
 	set testproc_pass  [llength $_testproc_pass_list]
 	set testproc_count [expr $testproc_pass + \
 	                         $testproc_skip + \
 	                         $testproc_fail]
 	set subtest_count  [expr $_subtest_fail_count + \
 	                         $_subtest_pass_count + \
 	                         $_subtest_skip_count]

 	# Initial summary
 	append output [format "%s             : %s\n" $header  $test_name]
 	if {$testproc_count > 0} {
 		append output [format "  Testprocs failed  : %3d (%3d%%)%s\n" $testproc_fail [expr $testproc_fail * 100 / $testproc_count] \
 		                                                             [expr {$testproc_fail ? " List: [join $_testproc_fail_list ,]" : ""}]]
 		append output [format "  Testprocs skipped : %3d (%3d%%)%s\n" $testproc_skip [expr $testproc_skip * 100 / $testproc_count] \
 		                                                             [expr {$testproc_skip ? " List: [join $_testproc_skip_list ,]" : ""}]]
 		append output [format "  Testprocs passed  : %3d (%3d%%)\n"   $testproc_pass [expr $testproc_pass * 100 / $testproc_count]]
 		append output [format "  Testprocs total   : %3d %s\n"        $testproc_count [expr {$completed ? "COMPLETE" : "INCOMPLETE: $_incomplete_reason"}]]
 	}
 	if {$subtest_count > 0} {
 		if {$testproc_count > 0} {
 			append output \n
 		}
 		append output [format "  Subtests failed   : %3d (%3d%%)\n" $_subtest_fail_count [expr $_subtest_fail_count * 100 / $subtest_count]]
 		append output [format "  Subtests skipped  : %3d (%3d%%)\n" $_subtest_skip_count [expr $_subtest_skip_count * 100 / $subtest_count]]
 		append output [format "  Subtests passed   : %3d (%3d%%)\n" $_subtest_pass_count [expr $_subtest_pass_count * 100 / $subtest_count]]
 		append output [format "  Subtests total    : %3d %s\n"      $subtest_count [expr {$completed ? "COMPLETE" : "INCOMPLETE: $_incomplete_reason"}]]
 	}
 	append output [string repeat = 78]\n

 	# Detailed information
 	if {$testsuite_subtest_details ne "none"} {
 		set show_details false
 		if {$testsuite_subtest_details eq "all" && $subtest_count > 0} {
 			set show_details true
 		} elseif {$testsuite_subtest_details eq "fail_skip" && $_subtest_skip_count > 0} {
 			set show_details true
 		} elseif {$_subtest_fail_count > 0} {
 			set show_details true
 		}

 		if {$show_details} {
 			append output [format "SUBTESTS DETAILS     : %s\n" $test_name]
 			dict for {id result_msg} $_subtest_messages {
 				set result [lindex $result_msg 0]
 				if {$testsuite_subtest_details eq "all"} {
 					append output [format "  %s\n" [lindex $result_msg 1]]
 				} elseif {$testsuite_subtest_details eq "fail_skip" && $result eq "skip"} {
 					append output [format "  %s\n" [lindex $result_msg 1]]
 				} elseif {$result eq "fail"} {
 					append output [format "  %s\n" [lindex $result_msg 1]]
 				}
 			}
 			append output [string repeat = 78]\n
 		}
 	}
 	if {$testsuite_testproc_details ne "none"} {
 		set show_details false
 		if {$testsuite_testproc_details eq "all" && $testproc_count > 0} {
 			set show_details true
 		} elseif {$testsuite_testproc_details eq "fail_skip" && $testproc_skip > 0} {
 			set show_details true
 		} elseif {$testproc_fail > 0} {
 			set show_details true
 		}

 		if {$show_details} {
 			append output [format "TESTPROCS DETAILS     : %s\n" $test_name]
 			dict for {num result_msg} $_testproc_messages {
 				set result [lindex $result_msg 0]
 				if {$testsuite_testproc_details eq "all"} {
 					append output [format "  Testproc %2d %-7s : %s\n" $num [lindex $result_msg 0] [lindex $result_msg 1]]
 				} elseif {$testsuite_testproc_details eq "fail_skip" && $result eq "skipped"} {
 					append output [format "  Testproc %2d %-7s : %s\n" $num [lindex $result_msg 0] [lindex $result_msg 1]]
 				} elseif {$result eq "failed"} {
 					append output [format "  Testproc %2d %-7s : %s\n" $num [lindex $result_msg 0] [lindex $result_msg 1]]
 				}
 			}
 			append output [string repeat = 78]\n
 		}
 	}

 	if ($testsuite_colorize) {
 		append output $COLOR_NONE
 	}

 	puts -nonewline $output
 }


 ################################################################
 #
 # NAME
 #	_get_test_name - gets the name of the invoking source script
 #
 # SYNOPSIS
 #	_get_test_name
 #
 # RETURN VALUE
 #	The name of the originally called script
 #
 ################################################################

 proc _get_test_name { } {

 	set test_name unknown
 	set frame_level 1
 	while { $frame_level <= [info frame] } {
 		if { [dict get [info frame $frame_level] type] == "source" } {
 			set test_name [file tail [dict get [info frame $frame_level] file]]
 			break
 		}
 		incr frame_level
 	}

 	return $test_name
 }


 ################################################################
 #
 # NAME
 #	_test_cleanup - performs the test cleanup
 #
 # SYNOPSIS
 #	_test_cleanup
 #
 # DESCRIPTION
 #	This procedure removes the temporary test_dir and calls the
 #	test-defined cleanup procedure.
 #
 # NOTES
 #	This function should be called only from _test_init and _test_fini.
 #
 ################################################################

 proc _test_cleanup {} {
 	global log_warn test_dir

 	set rc 0

 	# Call global cleanup procedure if it is defined by the test
 	if {[info procs cleanup] eq "cleanup"} {
 		if {[catch {cleanup} cleanup_error ]} {
 			log_error "Cleanup had errors: $cleanup_error"
 			set rc 1
 		}
 	 }

 	# Remove the temporary test directory
 	exec rm -rf $test_dir

 	return $rc
 }


 ################################################################
 #
 # NAME
 #	_test_init - performs test initialization
 #
 # SYNOPSIS
 #	_test_init
 #
 # DESCRIPTION
 #	This procedure is called automatically at the beginning of each test.
 #	It prints the header, creates the temporary test dir, etc.
 #
 ################################################################

 proc _test_init {} {
 	global test_dir test_id test_name testsuite_shared_dir
 	global _testproc_included _testproc_excluded argv bin_chmod scontrol

 	# parse argv to get and remove _testproc_included and _testproc_excluded params
 	set idx [expr {[info exists argv] ? [lsearch $argv -i] : -1}]
 	if {$idx >= 0} {
 		set _testproc_included [split [lindex $argv [expr $idx + 1]] ,]
 		set argv [lreplace $argv $idx [expr $idx + 1]]
 	}
 	set idx [expr {[info exists argv] ? [lsearch $argv -e] : -1}]
 	if {$idx >= 0} {
 		set _testproc_excluded [split [lindex $argv [expr $idx + 1]] ,]
 		set argv [lreplace $argv $idx [expr $idx + 1]]
 	}

 	# Set test name to name of originally invoked test script, e.g. test1.1
 	set test_name [_get_test_name]

 	# Set test id to suffix of the test script, e.g. 1.1
 	set test_id [string map {test ""} $test_name]

 	# Temporary test directory used to stash saved configs, output files...
 	set test_dir  "$testsuite_shared_dir/${test_name}dir"

 	# Print test header
 	_print_header

 	# Cleanup in case test was not cleaned up on last execution
 	if { [_test_cleanup] } {
 		fail "Error in the initial cleanup"
 	}

 	# Create temporary shared test directory
 	exec mkdir -p $test_dir
 	exec $bin_chmod a+rwx $test_dir

 	# Default precondition checks
 	if {![regexp "is UP" [run_command_output -nolog -none "$scontrol ping"]]} {
 		skip "The expect testsuite requires Slurm to be running, but '$scontrol ping' failed."
 	}
 	set idle_nodes [get_nodes_by_state]
 	if {[llength $idle_nodes] < 1} {
 		skip "The expect testsuite requires there to be at least one idle node in the default partition."
 	}
 	# Ensure that idle nodes are not drained/down
 	# Checking one by one to be able to print a warning if some was drain/down
 	foreach idle_node $idle_nodes {
 		run_command -nolog -xfail "$scontrol update nodename=$idle_node state=resume"
 	}
 }


 ################################################################
 #
 # NAME
 #	_test_fini - performs test finalization
 #
 # SYNOPSIS
 #	_test_fini ?status?
 #
 # DESCRIPTION
 #	This procedure is called automatically from the ending functions
 #	pass, skip and failure.
 #	It cleans up based on the status and the testsuite_cleanup_on_failure
 #	variable, prints the final test status/summary, and exits the test.
 #
 #	We will always cleanup for SUCCESS or SKIPPED tests.
 #	Whether or not the cleanup procedure is called for FAILURE tests
 #	depends on the setting of the $testsuite_cleanup_on_failure variable
 #	which can be set in the globals.local file or overridden with the
 #	SLURM_TESTSUITE_CLEANUP_ON_FAILURE environment variable.
 #
 ################################################################

 proc _test_fini { status } {
 	global testsuite_cleanup_on_failure _test_fini_called
 	global _subtest_fail_count _subtest_skip_count
 	global STATUS_FAIL STATUS_PASS STATUS_SKIP test_status

 	# Avoid potential infinite recursive calls.
 	# _test_fini should be called only once, but custom cleanup procs
 	# called from _test_cleanup can potentially call it (eg fail)
 	if {$_test_fini_called} {
 		log_debug "Recursive _test_fini call detected, most probably a fail on a cleanup function"
 		return
 	}
 	set _test_fini_called true


 	# Determine if test completed or was aborted
 	set completed [expr $status == $STATUS_PASS ? true : false]

 	# Override status with subtest status if available and necessary
 	if {$status != $STATUS_FAIL} {
 		if {$_subtest_fail_count > 0} {
 			set status $STATUS_FAIL
 		} elseif {$_subtest_skip_count > 0} {
 			set status $STATUS_SKIP
 		}
 	}

 	# Set final test status global variable so it can be used in cleanup
 	set test_status $status

 	# Only cleanup if test not failed or configured to do so
 	if {$status != $STATUS_FAIL || $testsuite_cleanup_on_failure} {
 		_test_cleanup
 	}
 	_print_summary $status $completed

 	__exit $status
 }


 ################################################################
 #
 # Overload the exit routine to ensure that no one is explicitly
 # calling it, and to enforce _test_fini if exit is called when
 # the test reach its EOF.
 #
 # All tests should exit using pass, skip or fail.
 #
 ################################################################

 rename exit __exit

 proc exit { {status 0} } {
 	global test_name

 	# To avoid failures when using "expect -c 'source globals'"
 	if {$test_name eq "globals"} {
 		pass
 	}

 	#
 	# Minor sanity check to detect if exit was explicitly called (not
 	# allowed) or automatically executed when the test ends
 	#
 	if {[info level] > 1} {
 		# exit was called from a function, and it shouldn't
 		fail "Exit should not be directly called, use pass, skip or fail instead"
 	} else {
 		set frame_level 1
 		while { $frame_level <= [info frame] } {
 			if { [dict get [info frame $frame_level] type] == "source" } {
 				if { [file tail [dict get [info frame $frame_level] file]] eq $test_name } {
 					# exit was called explicitly from the
 					# test, and it shouldn't
 					fail "Exit should not be directly called, use pass, skip or fail instead"
 				}
 				break
 			}
 			incr frame_level
 		}
 	}

 	if {$status != 0} {
 		fail "An Expect/TCL exception occurred"
 	}

 	# The exit was called implicitly when the test ends, allowed but
 	# _test_fini call enforced
 	pass
 }


 # Call _test_init at the beginning of each test
 _test_init