blob: a9c15e3201033f4dda20a6270e329e532cefc333 [file] [log] [blame]
#!/usr/bin/env expect
############################################################################
# Purpose: Test of Slurm functionality
# Test exclusive resource allocation for a step (--exclusive option).
############################################################################
# Copyright (C) SchedMD LLC.
# Copyright (C) 2007 The Regents of the University of California.
# Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
# Written by Morris Jette <jette1@llnl.gov>
# CODE-OCEC-09-009. All rights reserved.
#
# This file is part of Slurm, a resource management program.
# For details, see <https://slurm.schedmd.com/>.
# Please also read the included file: DISCLAIMER.
#
# Slurm is free software; you can redistribute it and/or modify it under
# the terms of the GNU General Public License as published by the Free
# Software Foundation; either version 2 of the License, or (at your option)
# any later version.
#
# Slurm is distributed in the hope that it will be useful, but WITHOUT ANY
# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
# details.
#
# You should have received a copy of the GNU General Public License along
# with Slurm; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
############################################################################
source ./globals
set file_in "$test_dir/test.input"
set file_out "$test_dir/test.output"
set job_id 0
if {[check_config_select "linear"]} {
skip "Test requires SelectType != linear"
}
proc cleanup {} {
global job_id file_in file_out test_dir
cancel_job $job_id
file delete $file_in $file_out
file delete $test_dir/step1 $test_dir/step2 $test_dir/step3 $test_dir/next
}
#
# Delete left-over input script
# Build input script file
# Run one more step than allocated CPUs and make sure it waits
#
proc test_cpus {} {
global bin_rm file_in file_out test_dir
global bin_touch srun bin_sleep job_id
cleanup
make_bash_script $file_in "
echo starting step1 and step 2
$srun --cpus-per-task=2 --mem=1 --exclusive -n1 bash -c '
$bin_touch $test_dir/step1;
while \[ \! -f $test_dir/step3 \]
do $bin_sleep 0.25;
done
' &
$srun --cpus-per-task=2 --mem=1 --exclusive -n1 bash -xvc '
$bin_touch $test_dir/step2;
while \[ \! -f $test_dir/next \]
do $bin_sleep 0.25;
done
' &
s2=\$\!
echo waiting to verify step1 and step2 are running
while \[ \! -f $test_dir/step1 -a \! -f $test_dir/step2 \]
do
$bin_sleep 0.25;
done
echo \'starting step3 (should not be started until step2 ends)\'
$srun --cpus-per-task=2 --mem=1 -v --exclusive -n1 bash -c '
$bin_touch $test_dir/step3
' &
wait
"
set job_id [submit_job -fail "--cpus-per-task=2 -N1 -n2 -t2 --gres=craynetwork:0 --mem=10 --output=$file_out $file_in"]
subtest {![wait_for_job -timeout 30 $job_id RUNNING]} "Job ($job_id) should start"
subtest {![wait_for_file $test_dir/step1]} "File ($test_dir/step1) should be created by step1"
subtest {![wait_for_file $test_dir/step2]} "File ($test_dir/step2) should be created by step2"
subtest { [wait_for_file -timeout 30 $test_dir/step3]} "File ($test_dir/step3) shouldn't exist yet"
run_command -fail "$bin_touch $test_dir/next"
subtest {![wait_for_file $test_dir/step3]} "File ($test_dir/step3) should be created by step3"
subtest {![wait_for_job -timeout 30 $job_id DONE]} "Job ($job_id) should finish"
}
#
# Delete left-over input script
# Build another input script file
# Run one more step than allocated CPUs with immediate option and make aborts
#
proc test_cpus_immediate {} {
global bin_rm file_in file_out test_dir
global bin_sleep scontrol sbatch number
global job_id srun bin_cat bin_touch
cleanup
make_bash_script $file_in "
echo starting step1 and step2
$srun --cpus-per-task=2 --mem=1 --exclusive --immediate=1 -n1 bash -c '
$bin_touch $test_dir/step1;
while \[ \! -f $test_dir/next \]
do $bin_sleep 0.25;
done
' &
$srun --cpus-per-task=2 --mem=1 --exclusive --immediate=1 -n1 bash -xvc '
$bin_touch $test_dir/step2;
while \[ \! -f $test_dir/next \]
do $bin_sleep 0.25;
done
' &
s2=\$\!
echo waiting to verify step1 and step2 are running
while \[ \! -f $test_dir/step1 -a \! -f $test_dir/step2 \]
do
$bin_sleep 0.25;
done
echo submitting step3 with --immediate
$srun --cpus-per-task=2 --mem=1 -v --immediate=1 --exclusive -n1 bash -c '
$bin_touch $test_dir/step3
' &
s3=\$\!
echo waiting for step3 to fail
wait \$s3
echo step3: \$?
touch $test_dir/next
wait
"
set job_id [submit_job -fail "--cpus-per-task=2 -N1 -n2 -t2 --gres=craynetwork:0 --mem=10 --output=$file_out $file_in"]
subtest {![wait_for_job -timeout 30 $job_id DONE]} "Job ($job_id) should finish"
subtest {![wait_for_file $test_dir/step1]} "File ($test_dir/step1) should be created by step1"
subtest {![wait_for_file $test_dir/step2]} "File ($test_dir/step2) should be created by step2"
subtest {![wait_for_file $test_dir/next]} "File ($test_dir/next) should be created after step3"
subtest {[wait_for_file -timeout 5 $test_dir/step3]} "File ($test_dir/step3) should NOT be created, because step3 should fail"
}
#
# Verify that all GPUs and other GRES are allocated with the --exclusive flag
#
proc test_gpus {node_name} {
global bin_sleep
cleanup
# Get the total number of GPUs in the test node
set gres_node [get_node_param $node_name "Gres"]
set gpu_tot [dict get [count_gres $gres_node] "gpu"]
#
# Verify that all GPUs and other GRES are allocated with the --exclusive flag
#
set job_id [submit_job -fail "-n1 -N1 -w $node_name --gres=gpu --exclusive -e none -o none --wrap '$bin_sleep 10'"]
subtest {![wait_for_job $job_id RUNNING]} "Job ($job_id) should start"
# Check all GRES of the node were allocated on the job
check_exclusive_gres $job_id $node_name
}
testproc test_cpus
if {[param_contains [get_config_param "SchedulerParameters"] "defer"]} {
skip_following_testprocs "Skipping immediate test since SchedulerParameters=defer is set"
}
testproc test_cpus_immediate
run_following_testprocs
set node_name [get_nodes_by_request "--gres=gpu:2 -n1 -t1"]
if { [llength $node_name] != 1 } {
skip_following_testprocs "This test need to be able to submit jobs with at least --gres=gpu:2"
}
if {![param_contains [get_config_param "AccountingStorageTRES"] "gres/gpu"]} {
skip_following_testprocs "This test requires AccountingStorageTRES=gres/gpu"
}
testproc test_gpus $node_name