blob: 9dbe1513073b839120888ff6de19d8638e59c811 [file] [log] [blame]
#!/usr/bin/env expect
############################################################################
# Purpose: Test of Slurm functionality
# to be called from test21.34
# Tests if the Grpwall limit is enforced
############################################################################
# Copyright (C) SchedMD LLC.
#
# This file is part of Slurm, a resource management program.
# For details, see <https://slurm.schedmd.com/>.
# Please also read the included file: DISCLAIMER.
#
# Slurm is free software; you can redistribute it and/or modify it under
# the terms of the GNU General Public License as published by the Free
# Software Foundation; either version 2 of the License, or (at your option)
# any later version.
#
# Slurm is distributed in the hope that it will be useful, but WITHOUT ANY
# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
# details.
#
# You should have received a copy of the GNU General Public License along
# with Slurm; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
############################################################################
proc inc21_34_2 { qostest } {
global sbatch srun acct bin_sleep grpwall_num
log_info "Starting Grpwall test"
set job_id_list [list]
set consumption_job_count 6 ; # This should be an evenly divisible factor of 60
# Raise an error to abort the catch block
set exception_code [catch {
# Since wall is a decayed variable, reset it to make sure the test
# gets exactly what we would expect.
if [reset_qos_usage "" $qostest] {
error "Unable to reset QOS usage"
}
# If we just ran a single job to consume the full group wall time, the priority decay
# would make it consume slightly less than the limit. Using a larger time limit would
# be blocked by the GrpWall limit. So, we use multiple jobs to consume the GrpWall limit.
# The last job will consume an additional second to supersede the priority decay.
log_debug "Submitting multiple jobs to consume the usage of the GrpWall limit"
for {set i 0} {$i < $consumption_job_count} {incr i} {
set sleep_time [expr $grpwall_num * 60 / $consumption_job_count]
if {$i == $consumption_job_count - 1} {
incr sleep_time
}
set result [run_command "$sbatch --account=$acct -N2 -t1 --wrap '$srun $bin_sleep $sleep_time' -o /dev/null -e /dev/null"]
if [dict get $result exit_code] {
error "Consumption$i job submission failed ([dict get $result output])"
}
if {[regexp {Submitted batch job (\d+)} [dict get $result output] - job_id]} {
lappend job_id_list $job_id
}
}
foreach job_id $job_id_list {
if {[wait_for_job $job_id DONE] != $::RETURN_SUCCESS} {
error "Job ($job_id) did not complete"
}
}
log_debug "Submitting a final job to surpass the GrpWall limit"
set result [run_command "$sbatch --account=$acct -N1 -t1 --wrap '$srun $bin_sleep 10' -o /dev/null -e /dev/null"]
if [dict get $result exit_code] {
error "Final job submission failed ([dict get $result output])"
}
if {[regexp {Submitted batch job (\d+)} [dict get $result output] - job_id]} {
lappend job_id_list $job_id
}
subtest {[wait_job_reason $job_id PENDING QOSGrpWallLimit] == $::RETURN_SUCCESS} "Job should be pending with reason QOSGrpWallLimit"
} message] ; # Store the error message in $message
# Reset QoS usage
reset_qos_usage "" $qostest
# Cancel job
cancel_job $job_id_list
# Convert any errors into failures (after cleaning up)
if {$exception_code == 1} { ; # errors only
fail "Failure testing Group wall: $message"
}
}