blob: 9decaa244f98b8890ac947b0733ff54a62fee65b [file] [edit]
############################################################################
# Copyright (C) SchedMD LLC.
############################################################################
import atf
import pytest
# Setup
@pytest.fixture(scope="module", autouse=True)
def setup():
atf.require_nodes(2, [("ThreadsPerCore", 2), ("Cores", 4), ("Sockets", 1)])
atf.require_slurm_running()
@pytest.mark.xfail(
atf.get_version() < (26, 5)
and atf.get_config_parameter("SelectType", live=False) != "select/linear",
reason="Ticket 20799: Concurrent steps are not started and identified correctly",
)
def test_no_missing_step(tmp_path):
"""
This test confirms steps 0 and 1 are run concurrently, and that step 2 is
run as soon as there are available resources, and with the correct step id.
"""
atf.make_bash_script(
"my_script.sh",
"""
srun -l -n 3 --hint=nomultithread --distribution=pack --exact sleep 20 &
sleep 3
srun -l -n 3 --hint=nomultithread --distribution=pack --exact sleep 10 &
sleep 3
srun -l -n 2 --hint=nomultithread --distribution=pack --exact sleep 10 &
wait $(jobs -p)
""",
)
job_id = atf.submit_job_sbatch("-N2 -n16 my_script.sh", fatal=True)
assert atf.repeat_until(
lambda: atf.get_steps(job_id),
lambda steps: f"{job_id}.0" in steps
and f"{job_id}.1" in steps
and f"{job_id}.2" not in steps,
), "First two steps should run in parallel before the third one"
assert atf.repeat_until(
lambda: atf.get_steps(job_id),
lambda steps: f"{job_id}.0" in steps
and f"{job_id}.1" not in steps
and f"{job_id}.2" in steps,
), "The third step should run as soon as the second ends, so in parallel with the first one"