blob: 1f830a68768f5115e4844ec4be411091b520121c [file] [edit]
############################################################################
# Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
############################################################################
import atf
import pytest
INVALID_TRES_BINDS = [
"gres/gpu:map:abcd",
"gres/gpu:map:0x00",
"gres/gpu:mask:x000",
"gres/gpu:mask:wxyz",
]
VALID_TRES_BINDS = [
"gres/gpu:map:",
"gres/gpu:map:0",
"gres/gpu:map:0,1",
"gres/gpu:map:0*2",
"gres/gpu:mask:",
"gres/gpu:mask:abc",
"gres/gpu:mask:0x1",
"gres/gpu:mask:0x1,0x2",
"gres/gpu:mask:0x1*2",
]
@pytest.fixture(scope="module", autouse=True)
def setup():
atf.require_slurm_running()
@pytest.fixture
def held_job():
job_id = atf.submit_job_sbatch("--hold -n1 --wrap 'true'", fatal=True)
yield job_id
atf.cancel_jobs([job_id], quiet=True)
def test_submit_offsets():
"""The full bind spec must be validated"""
val = "z"
while len(val) < 10:
if atf.get_version() < (26, 5) and len(val) <= 4:
# Issue 50893: List validation for mask/map skipped four characters
if len(val) < 4:
# Don't test potentially undefined behavior
pass
else: # len(val) == 4
assert 0 != _submit_job(
f"gres/gpu:map:{val}", xfail=False
), f"Rejected empty string in --tres-bind=gres/gpu:map:{val}"
assert 0 != _submit_job(
f"gres/gpu:mask:{val}", xfail=False
), f"Rejected empty string in --tres-bind=gres/gpu:mask:{val}"
else:
_submit_job(f"gres/gpu:map:{val}", xfail=True), (
f"Accepted invalid --tres-bind=gres/gpu/map:{val}"
)
_submit_job(f"gres/gpu:mask:{val}", xfail=True), (
f"Accepted invalid --tres-bind=gres/gpu/mask:{val}"
)
assert not _submit_job(
f"gres/gpu:map_gpu:{val}", xfail=True
), f"Accepted invalid --tres-bind=gres/gpu/map_gpu:{val}"
assert not _submit_job(
f"gres/gpu:mask_gpu:{val}", xfail=True
), f"Accepted invalid --tres-bind=gres/gpu/mask_gpu:{val}"
val = "0" + val
def _submit_job(tres_bind, *, xfail=False):
job_args = f"--tres-bind={tres_bind} -n1"
return atf.submit_job_sbatch(f"{job_args} --wrap 'true'", xfail=xfail)
@pytest.mark.xfail(
atf.get_version("sbin/slurmctld") < (26, 5),
reason="--tres-bind map/mask list validation skips four chars before 26.05",
)
@pytest.mark.parametrize("tres_bind", INVALID_TRES_BINDS)
def test_submit_invalid_tres_bind(tres_bind):
"""Invalid --tres-bind map and mask lists must be rejected."""
assert not _submit_job(
tres_bind, xfail=True
), f"sbatch accepted invalid --tres-bind={tres_bind}."
@pytest.mark.xfail(
atf.get_version("sbin/slurmctld") < (26, 5),
reason="--tres-bind map/mask list validation skips four chars before 26.05",
)
@pytest.mark.parametrize("tres_bind", INVALID_TRES_BINDS)
def test_update_invalid_tres_bind(held_job, tres_bind):
"""scontrol update must reject invalid TresBind map and mask lists."""
result = atf.run_command(
f"scontrol update JobId={held_job} TresBind={tres_bind}",
xfail=True,
)
assert (
result["exit_code"] != 0
), f"scontrol update accepted invalid TresBind={tres_bind}."
@pytest.mark.parametrize("tres_bind", VALID_TRES_BINDS)
def test_submit_valid_tres_bind(tres_bind):
"""Valid map and mask lists must be accepted by scontrol update."""
assert _submit_job(
tres_bind, xfail=True
), f"sbatch rejected valid --tres-bind={tres_bind}."
@pytest.mark.parametrize("tres_bind", VALID_TRES_BINDS)
def test_update_valid_tres_bind(held_job, tres_bind):
"""Valid map and mask lists must be accepted by scontrol update."""
atf.run_command(
f"scontrol update JobId={held_job} TresBind={tres_bind}",
fatal=True,
)
assert atf.get_job_parameter(held_job, "TresBind") == tres_bind