blob: 87aa855b3afd03214f0f8b227037e6949fac966e [file] [log] [blame] [edit]
############################################################################
# Copyright (C) SchedMD LLC.
############################################################################
import atf
import pytest
# A dictionary to store the nodes used in the test
node_list = {}
node_range = ""
# Setup for all tests
@pytest.fixture(scope="module", autouse=True)
def setup():
global node_range
atf.require_nodes(8)
atf.require_slurm_running()
# Get a list of 8 idle nodes to be used in the tests
nodes = atf.run_job_nodes("-N8 true", fatal=True)
for i, node in enumerate(nodes):
node_list[i] = node
node_range = atf.node_list_to_range(node_list.values())
@pytest.fixture(scope="function", autouse=True)
def reset_node_states():
"""Ensure all jobs are canceled and nodes are resumed after each test."""
yield
atf.cancel_all_jobs()
atf.run_command(
f"scontrol update nodename={node_range} state=resume",
user=atf.properties["slurm-user"],
quiet=True,
)
def test_allocated_state():
"""Test sinfo filtering for ALLOCATED state."""
# Submit a job to allocate the first two nodes
job_id = atf.submit_job_sbatch(
f"-N2 -w {node_list[0]},{node_list[1]} --exclusive --wrap 'sleep 60'"
)
atf.wait_for_job_state(job_id, "RUNNING", fatal=True)
# Get sinfo output for allocated nodes
output = atf.run_command_output(
f"sinfo -Nh -n {node_range} -O NodeList --state=allocated"
)
# Verify that only the two allocated nodes are in the output
assert len(output.splitlines()) == 2, "Expected 2 allocated nodes"
assert node_list[0] in output, f"{node_list[0]} should be allocated"
assert node_list[1] in output, f"{node_list[1]} should be allocated"
def test_idle_state():
"""Test sinfo filtering for IDLE state."""
# Change the state of some nodes
atf.run_command(
f"scontrol update nodename={node_list[4]},{node_list[5]} state=down reason=test",
user=atf.properties["slurm-user"],
)
atf.run_command(
f"scontrol update nodename={node_list[6]},{node_list[7]} state=drain reason=test",
user=atf.properties["slurm-user"],
)
# Get sinfo output for idle nodes
output = atf.run_command_output(
f"sinfo -Nh -n {node_range} -O NodeList,StateComplete --state=idle"
)
# Verify that the correct nodes are reported as idle (including idle+drain)
lines = [line.strip().split() for line in output.strip().splitlines()]
nodes = {node: status for node, status in lines}
nodes_idle = [n for n, s in nodes.items() if s == "idle"]
nodes_drain = [n for n, s in nodes.items() if "drain" in s]
assert len(nodes) == 6, "Expected 6 idle nodes, 4 only idle and 2 idle+drain nodes"
assert len(nodes_idle) == 4, "Expected 4 idle nodes"
for i in range(4):
assert node_list[i] in nodes_idle, f"{node_list[i]} should be only idle"
# Drained nodes are also idle
assert len(nodes_drain) == 2, "Expected 2 drained* nodes (that are also idle)"
assert node_list[6] in nodes_drain, f"{node_list[6]} should be idle+drain"
assert node_list[7] in nodes_drain, f"{node_list[7]} should be idle+drain"
def test_down_state():
"""Test sinfo filtering for DOWN state."""
# Change the state of some nodes
atf.run_command(
f"scontrol update nodename={node_list[4]},{node_list[5]} state=down reason=test",
user=atf.properties["slurm-user"],
)
# Get sinfo output for down nodes
output = atf.run_command_output(
f"sinfo -Nh -n {node_range} -O NodeList,StateComplete --state=down"
)
# Verify that the correct nodes are reported as down
lines = [line.strip().split() for line in output.strip().splitlines()]
nodes = {node: status for node, status in lines}
nodes_down = [n for n, s in nodes.items() if "down" in s]
assert len(nodes) == 2, "Expected 2 down nodes"
assert node_list[4] in nodes_down, f"{node_list[4]} should be down"
assert node_list[5] in nodes_down, f"{node_list[5]} should be down"
def test_drain_state():
"""Test sinfo filtering for DRAIN state."""
# Change the state of some nodes
atf.run_command(
f"scontrol update nodename={node_list[6]},{node_list[7]} state=drain reason=test",
user=atf.properties["slurm-user"],
)
# Get sinfo output for drained nodes
output = atf.run_command_output(
f"sinfo -Nh -n {node_range} -O NodeList,StateComplete --state=drain"
)
# Verify that the correct nodes are reported as drained
lines = [line.strip().split() for line in output.strip().splitlines()]
nodes = {node: status for node, status in lines}
nodes_drain = [n for n, s in nodes.items() if "drain" in s]
assert len(nodes_drain) == 2, "Expected 2 drained nodes"
assert node_list[6] in nodes_drain, f"{node_list[6]} should be drained"
assert node_list[7] in nodes_drain, f"{node_list[7]} should be drained"
def test_down_and_drain_state():
"""Test sinfo filtering for DOWN and DRAIN states with a comma."""
# Change the state of some nodes
atf.run_command(
f"scontrol update nodename={node_list[4]},{node_list[5]} state=down reason=test",
user=atf.properties["slurm-user"],
)
atf.run_command(
f"scontrol update nodename={node_list[6]},{node_list[7]} state=drain reason=test",
user=atf.properties["slurm-user"],
)
# Get sinfo output for down and drained nodes
output = atf.run_command_output(
f"sinfo -Nh -n {node_range} -O NodeList,StateComplete --state=down,drain"
)
lines = [line.strip().split() for line in output.strip().splitlines()]
nodes = {node: status for node, status in lines}
nodes_down = [n for n, s in nodes.items() if "down" in s]
nodes_drain = [n for n, s in nodes.items() if "drain" in s]
# Verify the output
assert len(nodes_down) == 2, "Expected 2 down nodes"
assert len(nodes_drain) == 2, "Expected 2 drained nodes"
assert node_list[4] in nodes_down, f"{node_list[4]} should be down"
assert node_list[5] in nodes_down, f"{node_list[5]} should be down"
assert node_list[6] in nodes_drain, f"{node_list[6]} should be drained"
assert node_list[7] in nodes_drain, f"{node_list[7]} should be drained"
def test_down_and_drain_state_ampersand():
"""Test sinfo filtering for DOWN&DRAIN states with an ampersand."""
# Change the state of some nodes to be both down and drained
atf.run_command(
f"scontrol update nodename={node_list[4]},{node_list[5]} state=down reason=test",
user=atf.properties["slurm-user"],
)
atf.run_command(
f"scontrol update nodename={node_list[4]},{node_list[5]} state=drain reason=test",
user=atf.properties["slurm-user"],
)
# Get sinfo output for down&drained nodes
output = atf.run_command_output(
f"sinfo -Nh -n {node_range} -O NodeList,StateComplete --state='down&drain'"
)
# Verify the output for nodes that are both down and drained (state will show as `drained`)
lines = [line.strip().split() for line in output.strip().splitlines()]
nodes = {node: status for node, status in lines}
down_drained_nodes = [n for n, s in nodes.items() if "drain" in s]
assert len(down_drained_nodes) == 2, "Expected 2 drained nodes"
assert node_list[4] in down_drained_nodes, f"{node_list[4]} should be down&drained"
assert node_list[5] in down_drained_nodes, f"{node_list[5]} should be down&drained"