blob: fc5fef95986ead603b72a2a2cea963fd21aa6de2 [file] [log] [blame]
############################################################################
# Copyright (C) SchedMD LLC.
############################################################################
import atf
# import logging
import pytest
import re
from pathlib import Path
job_file = None
step_file = None
job_output_file = None
constrain_devices = False
# Setup
@pytest.fixture(scope="module", autouse=True)
def setup():
global job_file, step_file, job_output_file, constrain_devices
atf.require_config_parameter("SelectType", "select/cons_tres")
atf.require_config_parameter("SelectTypeParameters", "CR_CPU")
atf.require_config_parameter_includes("GresTypes", "gpu")
# Require 8 tty because one test requests 8 "GPU"s (4 GPUS each for 2 nodes)
for tty_num in range(8):
atf.require_tty(tty_num)
atf.require_config_parameter(
"Name", {"gpu": {"File": "/dev/tty[0-7]"}}, source="gres"
)
atf.require_nodes(2, [("Gres", "gpu:4"), ("CPUs", 8)])
atf.require_slurm_running()
job_file = Path(atf.module_tmp_path) / "job_file"
step_file = Path(atf.module_tmp_path) / "step_file"
job_output_file = Path(atf.module_tmp_path) / "job_output_file"
constrain_devices = atf.get_config_parameter("ConstrainDevices") == "yes"
def test_gpus_per_node_parallel_1_delayed():
"""Test --gpus-per-node option by job step"""
# Delete previous job output file and prepare job scripts
job_output_file.unlink(missing_ok=True)
atf.make_bash_script(
job_file,
f"""
scontrol -dd show job ${{SLURM_JOBID}}
srun -vv --exact -n1 --gpus-per-node=1 --exact -n1 --mem=0 {step_file} &
srun -vv --exact -n1 --gpus-per-node=1 --exact -n1 --mem=0 {step_file} &
srun -vv --exact -n1 --gpus-per-node=1 --exact -n1 --mem=0 {step_file} &
wait
exit 0""",
)
atf.make_bash_script(
step_file,
"""
echo 'STEP_ID:'$SLURM_STEP_ID 'CUDA_VISIBLE_DEVICES:'$CUDA_VISIBLE_DEVICES
sleep 3
if [ $SLURM_STEP_ID -eq 2 ]; then
squeue -s --name=test_job
fi
exit 0""",
)
job_id = atf.submit_job_sbatch(
"--cpus-per-gpu=1 --gpus-per-node=2 -N1 -n3 -t1 "
+ f"-o {job_output_file} -J test_job {job_file}",
fatal=True,
)
atf.wait_for_job_state(job_id, "DONE", fatal=True)
atf.wait_for_file(job_output_file, fatal=True)
output = atf.run_command_output(f"cat {job_output_file}", fatal=True)
# Verify all steps used only 1 GPU
assert not re.search(
r"STEP_ID:\d+ CUDA_VISIBLE_DEVICES:\d+,", output
), "Not all steps used only 1 GPU"
# Verify a GPU was used 3 times
assert (
len(re.findall(r"STEP_ID:\d+ CUDA_VISIBLE_DEVICES:\d+", output)) == 3
), "A GPU was not used 3 times"
# Verify one step was delayed
assert atf.check_steps_delayed(
job_id, output, 1
), "One step should have been delayed"
if constrain_devices:
# Verify all GPUs are CUDA_VISIBLE_DEVICES:0 (with ConstrainDevices)
assert (
len(re.findall(r"STEP_ID:\d+ CUDA_VISIBLE_DEVICES:0", output)) == 3
), "Not all GPUs are CUDA_VISIBLE_DEVICES:0 (with ConstrainDevices)"
else:
# Verify steps split between the two GPUs (without ConstrainDevices)
cuda_devices_used = re.findall(
r"STEP_ID:\d+ CUDA_VISIBLE_DEVICES:(\d+)", output
)
assert (
len(set(cuda_devices_used)) > 1
), f"The job steps weren't split among the two GPUS (CUDA devices {set(cuda_devices_used)} used instead of 0 and 1 (without ConstrainDevices))"
@pytest.mark.parametrize("step_args", ["-n1 --gpus-per-task=1", "-n1 --gpus=1"])
def test_gpus_per_node_parallel(step_args):
"""Test parallel step args with a job with --gpus-per-node"""
# Delete previous job output file and prepare job scripts
job_output_file.unlink(missing_ok=True)
atf.make_bash_script(
job_file,
f"""
scontrol -dd show job ${{SLURM_JOBID}}
srun --exact --gpus-per-node=0 --mem=0 {step_args} {step_file} &
srun --exact --gpus-per-node=0 --mem=0 {step_args} {step_file} &
wait
exit 0""",
)
atf.make_bash_script(
step_file,
"""
echo 'STEP_ID:'$SLURM_STEP_ID 'CUDA_VISIBLE_DEVICES:'$CUDA_VISIBLE_DEVICES
sleep 3
if [ $SLURM_STEP_ID -eq 1 ]; then
scontrol show step $SLURM_JOB_ID.$SLURM_STEP_ID
fi
exit 0""",
)
job_id = atf.submit_job_sbatch(
"--cpus-per-gpu=2 --gpus-per-node=2 -N1 -n2 -t1 "
+ f"-o {job_output_file} -J test_job {job_file}",
fatal=True,
)
atf.wait_for_job_state(job_id, "DONE", fatal=True)
atf.wait_for_file(job_output_file, fatal=True)
output = atf.run_command_output(f"cat {job_output_file}", fatal=True)
# Verify all steps used only 1 GPU
assert not re.search(
r"STEP_ID:\d+ CUDA_VISIBLE_DEVICES:\d+,", output
), "Not all steps used only 1 GPU"
# Verify a GPU was used 2 times
assert (
len(re.findall(r"STEP_ID:\d+ CUDA_VISIBLE_DEVICES:\d+", output)) == 2
), "A GPU was not used 2 times"
# Verify all steps run in parallel
assert not re.search(
r"Step completed in JobId=\d+, retrying", output
), "Not all steps ran in parallel"
if constrain_devices:
# Verify all GPUs are CUDA_VISIBLE_DEVICES:0 (with ConstrainDevices)
assert (
len(re.findall(r"STEP_ID:\d+ CUDA_VISIBLE_DEVICES:0", output)) == 2
), "Not all GPUs are CUDA_VISIBLE_DEVICES:0 (with ConstrainDevices)"
else:
# Verify 1 GPU is CUDA_VISIBLE_DEVICES:0 (without ConstrainDevices)
assert (
len(re.findall(r"STEP_ID:\d+ CUDA_VISIBLE_DEVICES:0", output)) == 1
), "Not 1 GPU is CUDA_VISIBLE_DEVICES:0 (without ConstrainDevices)"
# Verify 1 GPU is CUDA_VISIBLE_DEVICES:1 (without ConstrainDevices)
assert (
len(re.findall(r"STEP_ID:\d+ CUDA_VISIBLE_DEVICES:1", output)) == 1
), "Not 1 GPU is CUDA_VISIBLE_DEVICES:1 (without ConstrainDevices)"
def test_gpus_per_node_different_gpus():
"""Test --gpus (per job or step) option by job step"""
# Delete previous job output file and prepare job scripts
job_output_file.unlink(missing_ok=True)
atf.make_bash_script(
job_file,
f"""
scontrol -dd show job ${{SLURM_JOBID}}
srun --exact -n2 --gpus=2 --gpus-per-node=0 --mem=0 {step_file} &
srun --exact -n1 --gpus=1 --gpus-per-node=0 --mem=0 {step_file} &
wait
exit 0""",
)
atf.make_bash_script(
step_file,
"""
echo 'STEP_ID:'$SLURM_STEP_ID 'CUDA_VISIBLE_DEVICES:'$CUDA_VISIBLE_DEVICES
sleep 3
exit 0""",
)
job_id = atf.submit_job_sbatch(
"--cpus-per-gpu=1 --gpus-per-node=3 -N1 -n3 -t2 "
+ f"-o {job_output_file} -J test_job {job_file}",
fatal=True,
)
atf.wait_for_job_state(job_id, "DONE", fatal=True)
atf.wait_for_file(job_output_file, fatal=True)
output = atf.run_command_output(f"cat {job_output_file}")
step_2gpu = re.search(
r"STEP_ID:\d+ CUDA_VISIBLE_DEVICES:(\d+),(\d+)", output
).groups()
step_1gpu = re.search(
r"STEP_ID:\d+ CUDA_VISIBLE_DEVICES:(\d+$)", output, re.MULTILINE
).groups()
# Verify 1 step used 1 GPU and 2 steps used 2 GPUs
assert (
len(step_1gpu) == 1 and len(step_2gpu) == 2
), f"Fail to obtain all GPUs index ({len(step_1gpu)} != 1 or {len(step_2gpu)} != 2)"
if constrain_devices:
# Verify if devices are constrained, CUDA_VISIBLE_DEVICES start always
# with 0 in a step
assert (
step_2gpu[0] == "0" and step_1gpu[0] == "0"
), "CUDA_VISIBLE_DEVICES did not always start with 0 in a step"
else:
# Verify if devices are NOT constrained, all CUDA_VISIBLE_DEVICES are
# unique
assert step_1gpu[0] not in step_2gpu, "All CUDA_VISIBLE_DEVICES are not unique"
def test_gpus_per_node_with_gpus_per_task():
"""Test --gpus-per-task option by job step"""
job_gpus = 3
step_gpus = 2
# Delete previous job output file and prepare job scripts
job_output_file.unlink(missing_ok=True)
atf.make_bash_script(
job_file,
f"""
scontrol -dd show job ${{SLURM_JOBID}}
srun -vv --exact -n1 --gpus-per-task={step_gpus} --gpus-per-node=0 --mem=0 {step_file} &
srun -vv --exact -n1 --gpus-per-task={step_gpus} --gpus-per-node=0 --mem=0 {step_file} &
srun -vv --exact -n1 --gpus-per-task={step_gpus} --gpus-per-node=0 --mem=0 {step_file} &
wait
exit 0""",
)
atf.make_bash_script(
step_file,
"""
echo 'STEP_ID:'$SLURM_STEP_ID 'CUDA_VISIBLE_DEVICES:'$CUDA_VISIBLE_DEVICES
sleep 3
if [ $SLURM_STEP_ID -eq 2 ]; then
scontrol show step $SLURM_JOB_ID.$SLURM_STEP_ID
fi
exit 0""",
)
job_id = atf.submit_job_sbatch(
f"--cpus-per-gpu=1 --gpus-per-node={job_gpus} -N1 -n3 -t1 "
+ f"-o {job_output_file} -J test_job {job_file}",
fatal=True,
)
atf.wait_for_job_state(job_id, "DONE", fatal=True)
atf.wait_for_file(job_output_file, fatal=True)
output = atf.run_command_output(f"cat {job_output_file}", fatal=True)
# Verify no step has more than 2 GPUs
assert not re.search(
r"STEP_ID:\d+ CUDA_VISIBLE_DEVICES:\d+,\d+,", output
), "A step has more than 2 GPUs"
# Verify all steps have 2 GPUs
assert (
len(re.findall(r"STEP_ID:\d+ CUDA_VISIBLE_DEVICES:\d+,\d+", output)) == 3
), "Not all steps have 2 GPUs"
# Verify two steps were delayed, one of them twice
assert atf.check_steps_delayed(
job_id, output, 2
), "Two steps were not delayed or one of them was not delayed twice"
def test_gpus_per_node_with_gpus():
"""Test --gpus option by job step"""
job_gpus = 2
step_gpus = 2
# Delete previous job output file and prepare job scripts
job_output_file.unlink(missing_ok=True)
atf.make_bash_script(
job_file,
f"""
scontrol -dd show job ${{SLURM_JOBID}}
srun -vv --exact -n2 --gpus={step_gpus} --gpus-per-node=0 --mem=0 {step_file} &
srun -vv --exact -n2 --gpus={step_gpus} --gpus-per-node=0 --mem=0 {step_file} &
srun -vv --exact -n2 --gpus={step_gpus} --gpus-per-node=0 --mem=0 {step_file} &
wait
exit 0""",
)
atf.make_bash_script(
step_file,
"""
echo 'HOST:'$SLURMD_NODENAME 'NODE_ID:'$SLURM_NODEID 'STEP_ID:'$SLURM_STEP_ID 'CUDA_VISIBLE_DEVICES:'$CUDA_VISIBLE_DEVICES
sleep 3
if [ $SLURM_STEP_ID -eq 2 -a $SLURM_NODEID -eq 0 ]; then
scontrol show step $SLURM_JOB_ID.$SLURM_STEP_ID
fi
exit 0""",
)
job_id = atf.submit_job_sbatch(
f"--cpus-per-gpu=2 --gpus-per-node={job_gpus} -N2 -n6 -t1 "
+ f"-o {job_output_file} -J test_job {job_file}",
fatal=True,
)
atf.wait_for_job_state(job_id, "DONE", fatal=True)
atf.wait_for_file(job_output_file, fatal=True)
output = atf.run_command_output(f"cat {job_output_file}", fatal=True)
# Verify no more that 1 GPU is visible (per node)
assert not re.search(
r"STEP_ID:\d+ CUDA_VISIBLE_DEVICES:\d+,", output
), "1 GPU is not visible (per node)"
# Verify step 0 had access to 2 GPUs
assert (
len(re.findall(r"STEP_ID:0 CUDA_VISIBLE_DEVICES:\d+", output)) == 2
), "Step 0 did not have access to 2 GPUs"
# Verify step 1 had access to 2 GPUs
assert (
len(re.findall(r"STEP_ID:1 CUDA_VISIBLE_DEVICES:\d+", output)) == 2
), "Step 1 did not have access to 2 GPUs"
# Verify step 2 had access to 2 GPUs
assert (
len(re.findall(r"STEP_ID:2 CUDA_VISIBLE_DEVICES:\d+", output)) == 2
), "Step 2 did not have access to 2 GPUs"
# Verify one step was delayed
assert atf.check_steps_delayed(job_id, output, 1), "One step was not delayed"
if constrain_devices:
# Verify all GPUs are CUDA_VISIBLE_DEVICES:0 due to ConstrainDevices
assert (
len(re.findall(r"STEP_ID:\d+ CUDA_VISIBLE_DEVICES:0", output)) == 6
), "Not all GPUs are CUDA_VISIBLE_DEVICES:0 due to ConstrainDevices"
else:
cuda_val = []
cuda_val.append(
re.search(r"STEP_ID:0 CUDA_VISIBLE_DEVICES:(\d+)", output).group(1)
)
cuda_val.append(
re.search(r"STEP_ID:1 CUDA_VISIBLE_DEVICES:(\d+)", output).group(1)
)
cuda_val.append(
re.search(r"STEP_ID:2 CUDA_VISIBLE_DEVICES:(\d+)", output).group(1)
)
# Verify two first steps use different GPUs (without ConstrainDevices)
assert (
cuda_val[0] != cuda_val[1]
), "The two first steps did not use different GPUs (without ConstrainDevices)"
# Verify last step used a previous GPU (without ConstrainDevices)
assert (
cuda_val[2] == cuda_val[0] or cuda_val[2] == cuda_val[1]
), "The last step did not use one of the previous GPUs (without ConstrainDevices)"
def test_gpus_per_node_with_gpus_2_nodes():
"""Test --gpus option across 2 nodes"""
job_gpus = 4
# Delete previous job output file and prepare job scripts
job_output_file.unlink(missing_ok=True)
atf.make_bash_script(
job_file,
f"""
scontrol -dd show job ${{SLURM_JOBID}}
srun -vv --exact -n2 --gpus=6 --gpus-per-node=0 --mem=0 {step_file} &
srun -vv --exact -n2 --gpus=7 --gpus-per-node=0 --mem=0 {step_file} &
srun -vv --exact -n2 --gpus=8 --gpus-per-node=0 --mem=0 {step_file} &
wait
exit 0""",
)
atf.make_bash_script(
step_file,
"""
echo 'HOST:'$SLURMD_NODENAME 'NODE_ID:'$SLURM_NODEID 'STEP_ID:'$SLURM_STEP_ID 'CUDA_VISIBLE_DEVICES:'$CUDA_VISIBLE_DEVICES
sleep 3
if [ $SLURM_STEP_ID -eq 2 -a $SLURM_NODEID -eq 0 ]; then
scontrol show step $SLURM_JOB_ID.$SLURM_STEP_ID
fi
exit 0""",
)
job_id = atf.submit_job_sbatch(
f"--cpus-per-gpu=2 --gpus-per-node={job_gpus} -N2 -n6 -t1 "
+ f"-o {job_output_file} -J test_job {job_file}",
fatal=True,
)
atf.wait_for_job_state(job_id, "DONE", fatal=True)
atf.wait_for_file(job_output_file, fatal=True)
output = atf.run_command_output(f"cat {job_output_file}", fatal=True)
# Verify all steps have less than 5 GPUs per node
assert not re.search(
r"STEP_ID:\d+ CUDA_VISIBLE_DEVICES:\d+,\d+,\d+,\d+,", output
), "Not all steps have less than 5 GPUs per node"
# Verify step 0 used 2 nodes
assert (
len(re.findall(r"STEP_ID:0 CUDA_VISIBLE_DEVICES:\d+", output)) == 2
), "Step 0 did not use 2 nodes"
# Verify step 1 used 2 nodes
assert (
len(re.findall(r"STEP_ID:1 CUDA_VISIBLE_DEVICES:\d+", output)) == 2
), "Step 1 did not use 2 nodes"
# Verify step 2 used 2 nodes
assert (
len(re.findall(r"STEP_ID:2 CUDA_VISIBLE_DEVICES:\d+", output)) == 2
), "Step 2 did not use 2 nodes"
# Verify two steps were delayed, one of them twice
assert atf.check_steps_delayed(
job_id, output, 2
), "Two steps were not delayed or one of them was not delayed twice"
def test_gpus_per_node_with_gpus_per_task_3():
"""Test --gpus-per-task option by job step"""
job_gpus = 4
step_gpus = 2
# Delete previous job output file and prepare job scripts
job_output_file.unlink(missing_ok=True)
atf.make_bash_script(
job_file,
f"""
scontrol -dd show job ${{SLURM_JOBID}}
srun -vv {step_file}
srun -vv --exact -n3 --gpus-per-task={step_gpus} --gpus-per-node=0 --mem=0 {step_file} &
srun -vv --exact -n3 --gpus-per-task={step_gpus} --gpus-per-node=0 --mem=0 {step_file} &
wait
exit 0""",
)
atf.make_bash_script(
step_file,
"""
echo 'STEP_ID:'$SLURM_STEP_ID 'CUDA_VISIBLE_DEVICES:'$CUDA_VISIBLE_DEVICES
sleep 3
if [ $SLURM_STEP_ID -eq 1 -a $SLURM_PROCID -eq 0 ]; then
scontrol show step $SLURM_JOB_ID.$SLURM_STEP_ID
fi
exit 0""",
)
job_id = atf.submit_job_sbatch(
f"--cpus-per-gpu=1 --gpus-per-node={job_gpus} -N2 -n4 -t1 "
+ f"-o {job_output_file} -J test_job {job_file}",
fatal=True,
)
atf.wait_for_job_state(job_id, "DONE", fatal=True)
atf.wait_for_file(job_output_file, fatal=True)
output = atf.run_command_output(f"cat {job_output_file}", fatal=True)
# Verify no more than 4 GPUs are visible in any step
assert not re.search(
r"STEP_ID:\d+ CUDA_VISIBLE_DEVICES:\d+,\d+,\d+,\d+,", output
), "More than 4 GPUs are visible in a step"
# Verify job has access to 4 GPUs
assert (
len(re.findall(r"STEP_ID:0 CUDA_VISIBLE_DEVICES:\d+,\d+,\d+,\d+", output)) == 4
), "Job does not have access to 4 GPUs"
# Verify step 1 has 3 tasks and 2 GPUs per task
assert (
len(re.findall(r"STEP_ID:1 CUDA_VISIBLE_DEVICES:\d+,\d+", output)) == 3
), "Step 1 does not have 3 tasks and 2 GPUs per task"
# Verify step 2 has 3 tasks and 2 GPUs per task
assert (
len(re.findall(r"STEP_ID:2 CUDA_VISIBLE_DEVICES:\d+,\d+", output)) == 3
), "Step 2 does not have 3 tasks and 2 GPUs per task"
# Verify one step was delayed
assert atf.check_steps_delayed(job_id, output, 1), "One step was not delayed"
def test_gpus_per_node_with_gpus_per_task_5():
"""Test --gpus-per-task option by job step"""
job_gpus = 4
step_gpus = 2
# Delete previous job output file and prepare job scripts
job_output_file.unlink(missing_ok=True)
atf.make_bash_script(
job_file,
f"""
scontrol -dd show job ${{SLURM_JOBID}}
srun -vv --exact -N1 -n1 --gpus-per-task={step_gpus} --gpus-per-node=0 --mem=0 {step_file} &
srun -vv --exact -N1 -n1 --gpus-per-task={step_gpus} --gpus-per-node=0 --mem=0 {step_file} &
srun -vv --exact -N1 -n1 --gpus-per-task={step_gpus} --gpus-per-node=0 --mem=0 {step_file} &
srun -vv --exact -N1 -n1 --gpus-per-task={step_gpus} --gpus-per-node=0 --mem=0 {step_file} &
srun -vv --exact -N1 -n1 --gpus-per-task={step_gpus} --gpus-per-node=0 --mem=0 {step_file} &
wait
exit 0""",
)
atf.make_bash_script(
step_file,
"""
echo 'STEP_ID:'$SLURM_STEP_ID 'CUDA_VISIBLE_DEVICES:'$CUDA_VISIBLE_DEVICES
sleep 3
if [ $SLURM_STEP_ID -eq 1 -a $SLURM_PROCID -eq 0 ]; then
scontrol show step $SLURM_JOB_ID.$SLURM_STEP_ID
fi
exit 0""",
)
job_id = atf.submit_job_sbatch(
f"--cpus-per-gpu=1 --gpus-per-node={job_gpus} -N2 -n5 -t1 "
+ f"-o {job_output_file} -J test_job {job_file}",
fatal=True,
)
atf.wait_for_job_state(job_id, "DONE", timeout=30, fatal=True)
atf.wait_for_file(job_output_file, fatal=True)
output = atf.run_command_output(f"cat {job_output_file}", fatal=True)
# Verify no more that 2 GPUs are visible in any step
assert not re.search(
r"STEP_ID:\d+ CUDA_VISIBLE_DEVICES:\d+,\d+,\d+,", output
), "More that 2 GPUs are visible in a step"
# Verify all 5 steps have access to 2 GPUs
assert (
len(re.findall(r"STEP_ID:\d+ CUDA_VISIBLE_DEVICES:\d+,\d+", output)) == 5
), "Not all 5 steps have access to 2 GPUs"
# Verify one step was delayed
assert atf.check_steps_delayed(job_id, output, 1), "One step was not delayed"