blob: 7ce371c496e921c2826f38c201f17392e5c0465f [file] [edit]
############################################################################
# Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
############################################################################
import atf
import logging
import pytest
# Ticket 24975: "sacctmgr load" failed when the input file contained a typed
# TRES (e.g. "gres/gpu:a100") because the ':' that separates the GRES name
# from its type was being treated as the file-format column separator,
# truncating the option early.
gres_name = "gpu"
gres_type = "a100"
typed_tres = f"gres/{gres_name}:{gres_type}"
qos1 = "qos_test_102_12"
dump_file = "test.dump"
redump_file = "test.dump.after"
@pytest.fixture(scope="module", autouse=True)
def setup():
atf.require_version((26, 5), "bin/sacctmgr")
atf.require_config_parameter_includes("GresTypes", gres_name)
atf.require_config_parameter_includes("AccountingStorageTRES", typed_tres)
atf.require_accounting(modify=True)
atf.require_slurm_running()
@pytest.fixture(scope="function", autouse=True)
def setup_db():
# User and account necessary for sacctmgr dump to work
atf.run_command(
f"sacctmgr -i add account {atf.properties["slurm-user"]}",
user=atf.properties["slurm-user"],
fatal=True,
)
atf.run_command(
f"sacctmgr -i add user {atf.properties["slurm-user"]} Account={atf.properties["slurm-user"]} AdminLevel=Admin",
user=atf.properties["slurm-user"],
fatal=True,
)
atf.run_command(
f"sacctmgr -i add qos {qos1} MaxTRESPerUser={typed_tres}=1",
user=atf.properties["slurm-user"],
fatal=True,
)
yield
atf.run_command(
f"sacctmgr -i remove qos {qos1}",
user=atf.properties["slurm-user"],
quiet=True,
)
atf.run_command(
f"sacctmgr -i remove user {atf.properties["slurm-user"]} Account={atf.properties["slurm-user"]}",
user=atf.properties["slurm-user"],
quiet=True,
)
atf.run_command(
f"sacctmgr -i remove account {atf.properties["slurm-user"]}",
user=atf.properties["slurm-user"],
quiet=True,
)
def test_sacctmgr_load_typed_tres():
"""'sacctmgr load' must accept a dumped file containing a
typed TRES (e.g. gres/gpu:a100) without failing to parse the colon,
and must leave the relevant database contents unchanged."""
cluster_name = atf.get_config_parameter("ClusterName")
dump_path = f"{atf.module_tmp_path}/{dump_file}"
redump_path = f"{atf.module_tmp_path}/{redump_file}"
qos_show_before = atf.run_command_output(
f"sacctmgr show -nP qos {qos1} format=MaxTRESPU",
fatal=True,
)
assert (
typed_tres in qos_show_before
), f"QoS {qos1} should have MaxTRESPerUser={typed_tres}=1, got: {qos_show_before.rstrip()}"
# Generate the dump file
atf.run_command(
f"sacctmgr -i dump Cluster={cluster_name} file={dump_path}",
user=atf.properties["slurm-user"],
fatal=True,
)
# Load it back. Before the fix, the ':' inside '{typed_tres}' was treated
# as a column separator and the load failed with a parse error.
load_result = atf.run_command(
f"sacctmgr -i load file={dump_path}",
user=atf.properties["slurm-user"],
)
hint = (
f"Hint: confirm that 'sacctmgr load' parses the typed TRES '{typed_tres}' "
f"without treating its ':' as the file-format column separator."
)
assert load_result["exit_code"] == 0, (
f"sacctmgr load failed (exit={load_result['exit_code']}):"
f"\nstderr={load_result['stderr'].strip()}"
f"\nstdout={load_result['stdout'].strip()}"
f"\n{hint}"
)
# Verify the typed-TRES limit on the QoS specifically survived the load.
qos_show_after = atf.run_command_output(
f"sacctmgr show -nP qos {qos1} format=MaxTRESPU",
fatal=True,
)
assert qos_show_before == qos_show_after, (
f"QoS {qos1} MaxTRESPerUser changed across 'sacctmgr load':\n"
f" before: {qos_show_before.rstrip()}\n"
f" after: {qos_show_after.rstrip()}\n{hint}"
)
# Re-dump the cluster after load and compare against the original dump.
atf.run_command(
f"sacctmgr -i dump Cluster={cluster_name} file={redump_path}",
user=atf.properties["slurm-user"],
fatal=True,
)
with open(dump_path) as f:
dump_before = f.read()
with open(redump_path) as f:
dump_after = f.read()
assert dump_before == dump_after, (
"sacctmgr dump output differs before vs after 'sacctmgr load' — "
"database contents changed.\n"
f"--- before ---\n{dump_before}"
f"--- after ---\n{dump_after}"
)
logging.info(
f"sacctmgr load completed successfully and DB unchanged for '{typed_tres}'"
)