blob: 44d41cf272e0b2efaaf7a4f6feb76e3a39b6167e [file] [log] [blame]
############################################################################
# Copyright (C) SchedMD LLC.
############################################################################
import atf
import pytest
# import re
import pexpect
import time
suser = atf.properties["slurm-user"]
# Setup
@pytest.fixture(scope="module", autouse=True)
def setup():
atf.require_slurm_running()
def test_single_job():
"""Submit job directly to slurmd without use of slurmctld scheduler."""
node_dict = atf.get_nodes()
node = list(node_dict.keys())[0]
job_output = atf.run_job_output(
f"-N1 --nodelist={node} --no-allocate printenv SLURMD_NODENAME", user=suser
)
assert (
job_output.strip("\n") == node
), f"The job failed to print out the node name: {node}"
def test_multiple_jobs():
"""
Run three tasks at a time on some node and do so repeatedly
This checks for slurmd race conditions
The sleep between cycles is to make sure the job step completion
logic has time to be processed (slurmd -> slurmctld messages)
Note: process output in order of expected completion
"""
node_dict = atf.get_nodes()
node = list(node_dict.keys())[0]
for it in range(100):
child1 = pexpect.spawn(f"srun -N1 --nodelist={node} true")
child2 = pexpect.spawn(
f'sudo -u {suser} bash -lc "srun -N1 --nodelist={node} -Z sleep 0.5"'
)
child3 = pexpect.spawn(
f'sudo -u {suser} bash -lc "srun -N1 --nodelist={node} -Z sleep 0.25"'
)
pattern_index = child2.expect(
[r"error:.*configuring interconnect", r"error:", pexpect.EOF]
)
assert pattern_index != 1, f"Child 2 failed to run on iteration {it}"
pattern_index = child3.expect(
[r"error:.*configuring interconnect", r"error:", pexpect.EOF]
)
assert pattern_index != 1, f"Child 3 failed to run on iteration {it}"
pattern_index = child1.expect(
[r"error:.*configuring interconnect", r"error:", pexpect.EOF]
)
assert pattern_index != 1, f"Child 1 failed to run on iteration {it}"
time.sleep(0.25)