blob: a6dd9b3214ec2fd6818dce33a6c2e212b3c07c2d [file] [log] [blame]
############################################################################
# Copyright (C) SchedMD LLC.
############################################################################
import pytest
import atf
script_name = "./script.sh"
script_out = "./script.out"
@pytest.fixture(scope="module", autouse=True)
def setup():
atf.require_slurm_running()
@pytest.fixture(scope="function", autouse=True)
def cancel_jobs():
yield
atf.cancel_all_jobs()
def make_and_run_bash(command) -> None:
"""Make and run the bash script to run a command 1000 times.
Input is a list of lines to be run as bash script."""
atf.make_bash_script(
script_name,
f"""
for i in $(seq 1 1000)
do {command} &
done > {script_out}
wait
""",
)
atf.run_command(script_name)
@pytest.mark.parametrize(
"command, phrase",
[
("sinfo --Format=NODELIST", "^NODELIST"),
("scontrol show node | head -1", "^NodeName="),
("squeue --only-job-state --Format=NODELIST", "^NODELIST"),
],
)
def test_parallel(command, phrase):
"""Test that sinfo and scontrol can be run in parallel. We submit
1000 user commands to slurm to make sure that it doesn't crash. We then
check the output that the correct number of commands were run."""
make_and_run_bash(command)
output = atf.run_command_output(f"cat {script_out} | grep -c '{phrase}'")
assert (
int(output) == 1000
), f"We expected 1000 commands to be run in parallel, but got {output}"
def test_squeue_parallel():
"""Test for when lots of squeue calls are made, all the commands still run
correctly. To test this we submit 100 jobs, then we run 1000 squeue
commands and make sure that all 1000 worked and produced output."""
# Submit 100 jobs to fill up the queue
for i in range(100):
atf.submit_job_sbatch("--wrap='sleep 100'")
make_and_run_bash("squeue")
output = atf.run_command_output(f"cat {script_out} | grep -c 'JOBID'")
assert (
int(output) == 1000
), f"We expected 1000 user commands to run, but got {int(output)}"
def test_show_jobs_parallel():
"""Test that scontrol show job works in parallel. We submit one job and
then run 1000 'scontrol show job {job_id} &' commands to make sure that all
of them provide the correct output"""
job_id = atf.submit_job_srun("true")
make_and_run_bash(f"scontrol show job {job_id}")
output = atf.run_command_output(f'cat {script_out} | grep -c "JobId="')
assert (
int(output) == 1000
), f"We expected 1000 commands to be run in parallel, but got {output}"