blob: 11a15036800134ad7f97a340b2e211ccef98b724 [file] [log] [blame] [edit]
############################################################################
# Copyright (C) SchedMD LLC.
############################################################################
import atf
import pytest
import re
node_count = 1
# Setup
@pytest.fixture(scope="module", autouse=True)
def setup():
atf.require_nodes(node_count)
atf.require_slurm_running()
def test_wait(tmp_path):
"""Verify srun --wait"""
task_count = 10
file_in = str(tmp_path / "file_in.input")
atf.make_bash_script(
file_in,
"""if [[ -z "$SLURM_PROCID" ]]
then exit
fi
if [[ $SLURM_PROCID == 1 ]]
then exit
fi
sleep 20
""",
)
run_error = atf.run_command_error(
f"srun -n{task_count} -N{node_count} -O -W2 {file_in}"
)
task_count -= 1
assert (
re.search(r"First task exited", run_error) is not None
), "First task did not exit"
# Sometimes the list of running and/or killed tasks may not be received
# in a single message, but we may get it split.
# E.g.:
# srun: error: node0: tasks 0,2-6,8-9: Killed
# srun: error: node0: task 7: Killed
#
# We should support receiving it in any combination of ranges
expected_tasks = sorted(map(int, atf.node_range_to_list("[0,2-9]")))
# Assert running tasks
matches = re.findall(r"task[s]?\s+([0-9,\-]+): running", run_error)
trange = "[" + ",".join(matches) + "]"
tlist = sorted(map(int, atf.node_range_to_list(trange)))
assert (
tlist == expected_tasks
), f"Running tasks should be {expected_tasks}, but got {tlist}"
# Assert killed tasks
matches = re.findall(r"task[s]?\s+([0-9,\-]+): Killed", run_error)
trange = "[" + ",".join(matches) + "]"
tlist = sorted(map(int, atf.node_range_to_list(trange)))
assert (
tlist == expected_tasks
), f"Killed tasks should be {expected_tasks}, but got {tlist}"