testsuite/python/tests/test_144_7.py - SchedMD/slurm - Git at Google

 ############################################################################
 # Copyright (C) SchedMD LLC.
 ############################################################################
 import atf

 # import logging
 import pytest
 import re
 from pathlib import Path

 job_file = None
 step_file = None
 job_output_file = None
 constrain_devices = False


 # Setup
 @pytest.fixture(scope="module", autouse=True)
 def setup():
     global job_file, step_file, job_output_file, constrain_devices

     atf.require_config_parameter("SelectType", "select/cons_tres")
     atf.require_config_parameter("SelectTypeParameters", "CR_CPU")
     atf.require_config_parameter_includes("GresTypes", "gpu")

     # Require 8 tty because one test requests 8 "GPU"s (4 GPUS each for 2 nodes)
     for tty_num in range(8):
         atf.require_tty(tty_num)
     atf.require_config_parameter(
         "Name", {"gpu": {"File": "/dev/tty[0-7]"}}, source="gres"
     )
     atf.require_nodes(2, [("Gres", "gpu:4"), ("CPUs", 8)])

     atf.require_slurm_running()

     job_file = Path(atf.module_tmp_path) / "job_file"
     step_file = Path(atf.module_tmp_path) / "step_file"
     job_output_file = Path(atf.module_tmp_path) / "job_output_file"
     constrain_devices = atf.get_config_parameter("ConstrainDevices") == "yes"


 def test_gpus_per_node_parallel_1_delayed():
     """Test --gpus-per-node option by job step"""
     # Delete previous job output file and prepare job scripts
     job_output_file.unlink(missing_ok=True)
     atf.make_bash_script(
         job_file,
         f"""
         scontrol -dd show job ${{SLURM_JOBID}}
         srun -vv --exact -n1 --gpus-per-node=1 --exact -n1 --mem=0 {step_file} &
         srun -vv --exact -n1 --gpus-per-node=1 --exact -n1 --mem=0 {step_file} &
         srun -vv --exact -n1 --gpus-per-node=1 --exact -n1 --mem=0 {step_file} &
         wait
         exit 0""",
     )

     atf.make_bash_script(
         step_file,
         """
         echo 'STEP_ID:'$SLURM_STEP_ID 'CUDA_VISIBLE_DEVICES:'$CUDA_VISIBLE_DEVICES
         sleep 3
         if [ $SLURM_STEP_ID -eq 2 ]; then
             squeue -s --name=test_job
         fi
         exit 0""",
     )

     job_id = atf.submit_job_sbatch(
         "--cpus-per-gpu=1 --gpus-per-node=2 -N1 -n3 -t1 "
         + f"-o {job_output_file} -J test_job {job_file}",
         fatal=True,
     )

     atf.wait_for_job_state(job_id, "DONE", fatal=True)
     atf.wait_for_file(job_output_file, fatal=True)

     output = atf.run_command_output(f"cat {job_output_file}", fatal=True)

     # Verify all steps used only 1 GPU
     assert not re.search(
         r"STEP_ID:\d+ CUDA_VISIBLE_DEVICES:\d+,", output
     ), "Not all steps used only 1 GPU"
     # Verify a GPU was used 3 times
     assert (
         len(re.findall(r"STEP_ID:\d+ CUDA_VISIBLE_DEVICES:\d+", output)) == 3
     ), "A GPU was not used 3 times"
     # Verify one step was delayed
     assert atf.check_steps_delayed(
         job_id, output, 1
     ), "One step should have been delayed"

     if constrain_devices:
         # Verify all GPUs are CUDA_VISIBLE_DEVICES:0 (with ConstrainDevices)
         assert (
             len(re.findall(r"STEP_ID:\d+ CUDA_VISIBLE_DEVICES:0", output)) == 3
         ), "Not all GPUs are CUDA_VISIBLE_DEVICES:0 (with ConstrainDevices)"
     else:
         # Verify steps split between the two GPUs (without ConstrainDevices)
         cuda_devices_used = re.findall(
             r"STEP_ID:\d+ CUDA_VISIBLE_DEVICES:(\d+)", output
         )
         assert (
             len(set(cuda_devices_used)) > 1
         ), f"The job steps weren't split among the two GPUS (CUDA devices {set(cuda_devices_used)} used instead of 0 and 1 (without ConstrainDevices))"


 @pytest.mark.parametrize("step_args", ["-n1 --gpus-per-task=1", "-n1 --gpus=1"])
 def test_gpus_per_node_parallel(step_args):
     """Test parallel step args with a job with --gpus-per-node"""
     # Delete previous job output file and prepare job scripts
     job_output_file.unlink(missing_ok=True)
     atf.make_bash_script(
         job_file,
         f"""
         scontrol -dd show job ${{SLURM_JOBID}}
         srun --exact --gpus-per-node=0 --mem=0 {step_args} {step_file} &
         srun --exact --gpus-per-node=0 --mem=0 {step_args} {step_file} &
         wait
         exit 0""",
     )

     atf.make_bash_script(
         step_file,
         """
         echo 'STEP_ID:'$SLURM_STEP_ID 'CUDA_VISIBLE_DEVICES:'$CUDA_VISIBLE_DEVICES
         sleep 3
         if [ $SLURM_STEP_ID -eq 1 ]; then
             scontrol show step $SLURM_JOB_ID.$SLURM_STEP_ID
         fi
         exit 0""",
     )

     job_id = atf.submit_job_sbatch(
         "--cpus-per-gpu=2 --gpus-per-node=2 -N1 -n2 -t1 "
         + f"-o {job_output_file} -J test_job {job_file}",
         fatal=True,
     )

     atf.wait_for_job_state(job_id, "DONE", fatal=True)
     atf.wait_for_file(job_output_file, fatal=True)

     output = atf.run_command_output(f"cat {job_output_file}", fatal=True)

     # Verify all steps used only 1 GPU
     assert not re.search(
         r"STEP_ID:\d+ CUDA_VISIBLE_DEVICES:\d+,", output
     ), "Not all steps used only 1 GPU"
     # Verify a GPU was used 2 times
     assert (
         len(re.findall(r"STEP_ID:\d+ CUDA_VISIBLE_DEVICES:\d+", output)) == 2
     ), "A GPU was not used 2 times"
     # Verify all steps run in parallel
     assert not re.search(
         r"Step completed in JobId=\d+, retrying", output
     ), "Not all steps ran in parallel"

     if constrain_devices:
         # Verify all GPUs are CUDA_VISIBLE_DEVICES:0 (with ConstrainDevices)
         assert (
             len(re.findall(r"STEP_ID:\d+ CUDA_VISIBLE_DEVICES:0", output)) == 2
         ), "Not all GPUs are CUDA_VISIBLE_DEVICES:0 (with ConstrainDevices)"
     else:
         # Verify 1 GPU is CUDA_VISIBLE_DEVICES:0 (without ConstrainDevices)
         assert (
             len(re.findall(r"STEP_ID:\d+ CUDA_VISIBLE_DEVICES:0", output)) == 1
         ), "Not 1 GPU is CUDA_VISIBLE_DEVICES:0 (without ConstrainDevices)"
         # Verify 1 GPU is CUDA_VISIBLE_DEVICES:1 (without ConstrainDevices)
         assert (
             len(re.findall(r"STEP_ID:\d+ CUDA_VISIBLE_DEVICES:1", output)) == 1
         ), "Not 1 GPU is CUDA_VISIBLE_DEVICES:1 (without ConstrainDevices)"


 def test_gpus_per_node_different_gpus():
     """Test --gpus (per job or step) option by job step"""
     # Delete previous job output file and prepare job scripts
     job_output_file.unlink(missing_ok=True)
     atf.make_bash_script(
         job_file,
         f"""
         scontrol -dd show job ${{SLURM_JOBID}}
         srun --exact -n2 --gpus=2 --gpus-per-node=0 --mem=0 {step_file} &
         srun --exact -n1 --gpus=1 --gpus-per-node=0 --mem=0 {step_file} &
         wait
         exit 0""",
     )

     atf.make_bash_script(
         step_file,
         """
         echo 'STEP_ID:'$SLURM_STEP_ID 'CUDA_VISIBLE_DEVICES:'$CUDA_VISIBLE_DEVICES
         sleep 3
         exit 0""",
     )

     job_id = atf.submit_job_sbatch(
         "--cpus-per-gpu=1 --gpus-per-node=3 -N1 -n3 -t2 "
         + f"-o {job_output_file} -J test_job {job_file}",
         fatal=True,
     )

     atf.wait_for_job_state(job_id, "DONE", fatal=True)
     atf.wait_for_file(job_output_file, fatal=True)

     output = atf.run_command_output(f"cat {job_output_file}")
     step_2gpu = re.search(
         r"STEP_ID:\d+ CUDA_VISIBLE_DEVICES:(\d+),(\d+)", output
     ).groups()
     step_1gpu = re.search(
         r"STEP_ID:\d+ CUDA_VISIBLE_DEVICES:(\d+$)", output, re.MULTILINE
     ).groups()

     # Verify 1 step used 1 GPU and 2 steps used 2 GPUs
     assert (
         len(step_1gpu) == 1 and len(step_2gpu) == 2
     ), f"Fail to obtain all GPUs index ({len(step_1gpu)} != 1 or {len(step_2gpu)} != 2)"

     if constrain_devices:
         # Verify if devices are constrained, CUDA_VISIBLE_DEVICES start always
         # with 0 in a step
         assert (
             step_2gpu[0] == "0" and step_1gpu[0] == "0"
         ), "CUDA_VISIBLE_DEVICES did not always start with 0 in a step"
     else:
         # Verify if devices are NOT constrained, all CUDA_VISIBLE_DEVICES are
         # unique
         assert step_1gpu[0] not in step_2gpu, "All CUDA_VISIBLE_DEVICES are not unique"


 def test_gpus_per_node_with_gpus_per_task():
     """Test --gpus-per-task option by job step"""
     job_gpus = 3
     step_gpus = 2

     # Delete previous job output file and prepare job scripts
     job_output_file.unlink(missing_ok=True)
     atf.make_bash_script(
         job_file,
         f"""
         scontrol -dd show job ${{SLURM_JOBID}}
         srun -vv --exact -n1 --gpus-per-task={step_gpus} --gpus-per-node=0 --mem=0 {step_file} &
         srun -vv --exact -n1 --gpus-per-task={step_gpus} --gpus-per-node=0 --mem=0 {step_file} &
         srun -vv --exact -n1 --gpus-per-task={step_gpus} --gpus-per-node=0 --mem=0 {step_file} &
         wait
         exit 0""",
     )

     atf.make_bash_script(
         step_file,
         """
         echo 'STEP_ID:'$SLURM_STEP_ID 'CUDA_VISIBLE_DEVICES:'$CUDA_VISIBLE_DEVICES
         sleep 3
         if [ $SLURM_STEP_ID -eq 2 ]; then
             scontrol show step $SLURM_JOB_ID.$SLURM_STEP_ID
         fi
         exit 0""",
     )

     job_id = atf.submit_job_sbatch(
         f"--cpus-per-gpu=1 --gpus-per-node={job_gpus} -N1 -n3 -t1 "
         + f"-o {job_output_file} -J test_job {job_file}",
         fatal=True,
     )

     atf.wait_for_job_state(job_id, "DONE", fatal=True)
     atf.wait_for_file(job_output_file, fatal=True)

     output = atf.run_command_output(f"cat {job_output_file}", fatal=True)

     # Verify no step has more than 2 GPUs
     assert not re.search(
         r"STEP_ID:\d+ CUDA_VISIBLE_DEVICES:\d+,\d+,", output
     ), "A step has more than 2 GPUs"
     # Verify all steps have 2 GPUs
     assert (
         len(re.findall(r"STEP_ID:\d+ CUDA_VISIBLE_DEVICES:\d+,\d+", output)) == 3
     ), "Not all steps have 2 GPUs"
     # Verify two steps were delayed, one of them twice
     assert atf.check_steps_delayed(
         job_id, output, 2
     ), "Two steps were not delayed or one of them was not delayed twice"


 def test_gpus_per_node_with_gpus():
     """Test --gpus option by job step"""
     job_gpus = 2
     step_gpus = 2

     # Delete previous job output file and prepare job scripts
     job_output_file.unlink(missing_ok=True)
     atf.make_bash_script(
         job_file,
         f"""
         scontrol -dd show job ${{SLURM_JOBID}}
         srun -vv --exact -n2 --gpus={step_gpus} --gpus-per-node=0 --mem=0 {step_file} &
         srun -vv --exact -n2 --gpus={step_gpus} --gpus-per-node=0 --mem=0 {step_file} &
         srun -vv --exact -n2 --gpus={step_gpus} --gpus-per-node=0 --mem=0 {step_file} &
         wait
         exit 0""",
     )

     atf.make_bash_script(
         step_file,
         """
         echo 'HOST:'$SLURMD_NODENAME 'NODE_ID:'$SLURM_NODEID 'STEP_ID:'$SLURM_STEP_ID 'CUDA_VISIBLE_DEVICES:'$CUDA_VISIBLE_DEVICES
         sleep 3
         if [ $SLURM_STEP_ID -eq 2 -a $SLURM_NODEID -eq 0 ]; then
             scontrol show step $SLURM_JOB_ID.$SLURM_STEP_ID
         fi
         exit 0""",
     )

     job_id = atf.submit_job_sbatch(
         f"--cpus-per-gpu=2 --gpus-per-node={job_gpus} -N2 -n6 -t1 "
         + f"-o {job_output_file} -J test_job {job_file}",
         fatal=True,
     )

     atf.wait_for_job_state(job_id, "DONE", fatal=True)
     atf.wait_for_file(job_output_file, fatal=True)

     output = atf.run_command_output(f"cat {job_output_file}", fatal=True)

     # Verify no more that 1 GPU is visible (per node)
     assert not re.search(
         r"STEP_ID:\d+ CUDA_VISIBLE_DEVICES:\d+,", output
     ), "1 GPU is not visible (per node)"
     # Verify step 0 had access to 2 GPUs
     assert (
         len(re.findall(r"STEP_ID:0 CUDA_VISIBLE_DEVICES:\d+", output)) == 2
     ), "Step 0 did not have access to 2 GPUs"
     # Verify step 1 had access to 2 GPUs
     assert (
         len(re.findall(r"STEP_ID:1 CUDA_VISIBLE_DEVICES:\d+", output)) == 2
     ), "Step 1 did not have access to 2 GPUs"
     # Verify step 2 had access to 2 GPUs
     assert (
         len(re.findall(r"STEP_ID:2 CUDA_VISIBLE_DEVICES:\d+", output)) == 2
     ), "Step 2 did not have access to 2 GPUs"
     # Verify one step was delayed
     assert atf.check_steps_delayed(job_id, output, 1), "One step was not delayed"

     if constrain_devices:
         # Verify all GPUs are CUDA_VISIBLE_DEVICES:0 due to ConstrainDevices
         assert (
             len(re.findall(r"STEP_ID:\d+ CUDA_VISIBLE_DEVICES:0", output)) == 6
         ), "Not all GPUs are CUDA_VISIBLE_DEVICES:0 due to ConstrainDevices"
     else:
         cuda_val = []
         cuda_val.append(
             re.search(r"STEP_ID:0 CUDA_VISIBLE_DEVICES:(\d+)", output).group(1)
         )
         cuda_val.append(
             re.search(r"STEP_ID:1 CUDA_VISIBLE_DEVICES:(\d+)", output).group(1)
         )
         cuda_val.append(
             re.search(r"STEP_ID:2 CUDA_VISIBLE_DEVICES:(\d+)", output).group(1)
         )
         # Verify two first steps use different GPUs (without ConstrainDevices)
         assert (
             cuda_val[0] != cuda_val[1]
         ), "The two first steps did not use different GPUs (without ConstrainDevices)"
         # Verify last step used a previous GPU (without ConstrainDevices)
         assert (
             cuda_val[2] == cuda_val[0] or cuda_val[2] == cuda_val[1]
         ), "The last step did not use one of the previous GPUs (without ConstrainDevices)"


 def test_gpus_per_node_with_gpus_2_nodes():
     """Test --gpus option across 2 nodes"""
     job_gpus = 4

     # Delete previous job output file and prepare job scripts
     job_output_file.unlink(missing_ok=True)
     atf.make_bash_script(
         job_file,
         f"""
         scontrol -dd show job ${{SLURM_JOBID}}
         srun -vv --exact -n2 --gpus=6 --gpus-per-node=0 --mem=0 {step_file} &
         srun -vv --exact -n2 --gpus=7 --gpus-per-node=0 --mem=0 {step_file} &
         srun -vv --exact -n2 --gpus=8 --gpus-per-node=0 --mem=0 {step_file} &
         wait
         exit 0""",
     )

     atf.make_bash_script(
         step_file,
         """
         echo 'HOST:'$SLURMD_NODENAME 'NODE_ID:'$SLURM_NODEID 'STEP_ID:'$SLURM_STEP_ID 'CUDA_VISIBLE_DEVICES:'$CUDA_VISIBLE_DEVICES
         sleep 3
         if [ $SLURM_STEP_ID -eq 2 -a $SLURM_NODEID -eq 0 ]; then
             scontrol show step $SLURM_JOB_ID.$SLURM_STEP_ID
         fi
         exit 0""",
     )

     job_id = atf.submit_job_sbatch(
         f"--cpus-per-gpu=2 --gpus-per-node={job_gpus} -N2 -n6 -t1 "
         + f"-o {job_output_file} -J test_job {job_file}",
         fatal=True,
     )

     atf.wait_for_job_state(job_id, "DONE", fatal=True)
     atf.wait_for_file(job_output_file, fatal=True)

     output = atf.run_command_output(f"cat {job_output_file}", fatal=True)

     # Verify all steps have less than 5 GPUs per node
     assert not re.search(
         r"STEP_ID:\d+ CUDA_VISIBLE_DEVICES:\d+,\d+,\d+,\d+,", output
     ), "Not all steps have less than 5 GPUs per node"
     # Verify step 0 used 2 nodes
     assert (
         len(re.findall(r"STEP_ID:0 CUDA_VISIBLE_DEVICES:\d+", output)) == 2
     ), "Step 0 did not use 2 nodes"
     # Verify step 1 used 2 nodes
     assert (
         len(re.findall(r"STEP_ID:1 CUDA_VISIBLE_DEVICES:\d+", output)) == 2
     ), "Step 1 did not use 2 nodes"
     # Verify step 2 used 2 nodes
     assert (
         len(re.findall(r"STEP_ID:2 CUDA_VISIBLE_DEVICES:\d+", output)) == 2
     ), "Step 2 did not use 2 nodes"
     # Verify two steps were delayed, one of them twice
     assert atf.check_steps_delayed(
         job_id, output, 2
     ), "Two steps were not delayed or one of them was not delayed twice"


 def test_gpus_per_node_with_gpus_per_task_3():
     """Test --gpus-per-task option by job step"""
     job_gpus = 4
     step_gpus = 2

     # Delete previous job output file and prepare job scripts
     job_output_file.unlink(missing_ok=True)
     atf.make_bash_script(
         job_file,
         f"""
         scontrol -dd show job ${{SLURM_JOBID}}
         srun -vv {step_file}
         srun -vv --exact -n3 --gpus-per-task={step_gpus} --gpus-per-node=0 --mem=0 {step_file} &
         srun -vv --exact -n3 --gpus-per-task={step_gpus} --gpus-per-node=0 --mem=0 {step_file} &
         wait
         exit 0""",
     )

     atf.make_bash_script(
         step_file,
         """
         echo 'STEP_ID:'$SLURM_STEP_ID 'CUDA_VISIBLE_DEVICES:'$CUDA_VISIBLE_DEVICES
         sleep 3
         if [ $SLURM_STEP_ID -eq 1 -a $SLURM_PROCID -eq 0 ]; then
             scontrol show step $SLURM_JOB_ID.$SLURM_STEP_ID
         fi
         exit 0""",
     )

     job_id = atf.submit_job_sbatch(
         f"--cpus-per-gpu=1 --gpus-per-node={job_gpus} -N2 -n4 -t1 "
         + f"-o {job_output_file} -J test_job {job_file}",
         fatal=True,
     )

     atf.wait_for_job_state(job_id, "DONE", fatal=True)
     atf.wait_for_file(job_output_file, fatal=True)

     output = atf.run_command_output(f"cat {job_output_file}", fatal=True)

     # Verify no more than 4 GPUs are visible in any step
     assert not re.search(
         r"STEP_ID:\d+ CUDA_VISIBLE_DEVICES:\d+,\d+,\d+,\d+,", output
     ), "More than 4 GPUs are visible in a step"
     # Verify job has access to 4 GPUs
     assert (
         len(re.findall(r"STEP_ID:0 CUDA_VISIBLE_DEVICES:\d+,\d+,\d+,\d+", output)) == 4
     ), "Job does not have access to 4 GPUs"
     # Verify step 1 has 3 tasks and 2 GPUs per task
     assert (
         len(re.findall(r"STEP_ID:1 CUDA_VISIBLE_DEVICES:\d+,\d+", output)) == 3
     ), "Step 1 does not have 3 tasks and 2 GPUs per task"
     # Verify step 2 has 3 tasks and 2 GPUs per task
     assert (
         len(re.findall(r"STEP_ID:2 CUDA_VISIBLE_DEVICES:\d+,\d+", output)) == 3
     ), "Step 2 does not have 3 tasks and 2 GPUs per task"
     # Verify one step was delayed
     assert atf.check_steps_delayed(job_id, output, 1), "One step was not delayed"


 def test_gpus_per_node_with_gpus_per_task_5():
     """Test --gpus-per-task option by job step"""
     job_gpus = 4
     step_gpus = 2

     # Delete previous job output file and prepare job scripts
     job_output_file.unlink(missing_ok=True)
     atf.make_bash_script(
         job_file,
         f"""
         scontrol -dd show job ${{SLURM_JOBID}}
         srun -vv --exact -N1 -n1 --gpus-per-task={step_gpus} --gpus-per-node=0 --mem=0 {step_file} &
         srun -vv --exact -N1 -n1 --gpus-per-task={step_gpus} --gpus-per-node=0 --mem=0 {step_file} &
         srun -vv --exact -N1 -n1 --gpus-per-task={step_gpus} --gpus-per-node=0 --mem=0 {step_file} &
         srun -vv --exact -N1 -n1 --gpus-per-task={step_gpus} --gpus-per-node=0 --mem=0 {step_file} &
         srun -vv --exact -N1 -n1 --gpus-per-task={step_gpus} --gpus-per-node=0 --mem=0 {step_file} &
         wait
         exit 0""",
     )

     atf.make_bash_script(
         step_file,
         """
         echo 'STEP_ID:'$SLURM_STEP_ID 'CUDA_VISIBLE_DEVICES:'$CUDA_VISIBLE_DEVICES
         sleep 3
         if [ $SLURM_STEP_ID -eq 1 -a $SLURM_PROCID -eq 0 ]; then
             scontrol show step $SLURM_JOB_ID.$SLURM_STEP_ID
         fi
         exit 0""",
     )

     job_id = atf.submit_job_sbatch(
         f"--cpus-per-gpu=1 --gpus-per-node={job_gpus} -N2 -n5 -t1 "
         + f"-o {job_output_file} -J test_job {job_file}",
         fatal=True,
     )

     atf.wait_for_job_state(job_id, "DONE", timeout=30, fatal=True)
     atf.wait_for_file(job_output_file, fatal=True)

     output = atf.run_command_output(f"cat {job_output_file}", fatal=True)

     # Verify no more that 2 GPUs are visible in any step
     assert not re.search(
         r"STEP_ID:\d+ CUDA_VISIBLE_DEVICES:\d+,\d+,\d+,", output
     ), "More that 2 GPUs are visible in a step"
     # Verify all 5 steps have access to 2 GPUs
     assert (
         len(re.findall(r"STEP_ID:\d+ CUDA_VISIBLE_DEVICES:\d+,\d+", output)) == 5
     ), "Not all 5 steps have access to 2 GPUs"
     # Verify one step was delayed
     assert atf.check_steps_delayed(job_id, output, 1), "One step was not delayed"
	############################################################################
	# Copyright (C) SchedMD LLC.
	############################################################################
	import atf

	# import logging
	import pytest
	import re
	from pathlib import Path

	job_file = None
	step_file = None
	job_output_file = None
	constrain_devices = False


	# Setup
	@pytest.fixture(scope="module", autouse=True)
	def setup():
	global job_file, step_file, job_output_file, constrain_devices

	atf.require_config_parameter("SelectType", "select/cons_tres")
	atf.require_config_parameter("SelectTypeParameters", "CR_CPU")
	atf.require_config_parameter_includes("GresTypes", "gpu")

	# Require 8 tty because one test requests 8 "GPU"s (4 GPUS each for 2 nodes)
	for tty_num in range(8):
	atf.require_tty(tty_num)
	atf.require_config_parameter(
	"Name", {"gpu": {"File": "/dev/tty[0-7]"}}, source="gres"
	)
	atf.require_nodes(2, [("Gres", "gpu:4"), ("CPUs", 8)])

	atf.require_slurm_running()

	job_file = Path(atf.module_tmp_path) / "job_file"
	step_file = Path(atf.module_tmp_path) / "step_file"
	job_output_file = Path(atf.module_tmp_path) / "job_output_file"
	constrain_devices = atf.get_config_parameter("ConstrainDevices") == "yes"


	def test_gpus_per_node_parallel_1_delayed():
	"""Test --gpus-per-node option by job step"""
	# Delete previous job output file and prepare job scripts
	job_output_file.unlink(missing_ok=True)
	atf.make_bash_script(
	job_file,
	f"""
	scontrol -dd show job ${{SLURM_JOBID}}
	srun -vv --exact -n1 --gpus-per-node=1 --exact -n1 --mem=0 {step_file} &
	srun -vv --exact -n1 --gpus-per-node=1 --exact -n1 --mem=0 {step_file} &
	srun -vv --exact -n1 --gpus-per-node=1 --exact -n1 --mem=0 {step_file} &
	wait
	exit 0""",
	)

	atf.make_bash_script(
	step_file,
	"""
	echo 'STEP_ID:'$SLURM_STEP_ID 'CUDA_VISIBLE_DEVICES:'$CUDA_VISIBLE_DEVICES
	sleep 3
	if [ $SLURM_STEP_ID -eq 2 ]; then
	squeue -s --name=test_job
	fi
	exit 0""",
	)

	job_id = atf.submit_job_sbatch(
	"--cpus-per-gpu=1 --gpus-per-node=2 -N1 -n3 -t1 "
	+ f"-o {job_output_file} -J test_job {job_file}",
	fatal=True,
	)

	atf.wait_for_job_state(job_id, "DONE", fatal=True)
	atf.wait_for_file(job_output_file, fatal=True)

	output = atf.run_command_output(f"cat {job_output_file}", fatal=True)

	# Verify all steps used only 1 GPU
	assert not re.search(
	r"STEP_ID:\d+ CUDA_VISIBLE_DEVICES:\d+,", output
	), "Not all steps used only 1 GPU"
	# Verify a GPU was used 3 times
	assert (
	len(re.findall(r"STEP_ID:\d+ CUDA_VISIBLE_DEVICES:\d+", output)) == 3
	), "A GPU was not used 3 times"
	# Verify one step was delayed
	assert atf.check_steps_delayed(
	job_id, output, 1
	), "One step should have been delayed"

	if constrain_devices:
	# Verify all GPUs are CUDA_VISIBLE_DEVICES:0 (with ConstrainDevices)
	assert (
	len(re.findall(r"STEP_ID:\d+ CUDA_VISIBLE_DEVICES:0", output)) == 3
	), "Not all GPUs are CUDA_VISIBLE_DEVICES:0 (with ConstrainDevices)"
	else:
	# Verify steps split between the two GPUs (without ConstrainDevices)
	cuda_devices_used = re.findall(
	r"STEP_ID:\d+ CUDA_VISIBLE_DEVICES:(\d+)", output
	)
	assert (
	len(set(cuda_devices_used)) > 1
	), f"The job steps weren't split among the two GPUS (CUDA devices {set(cuda_devices_used)} used instead of 0 and 1 (without ConstrainDevices))"


	@pytest.mark.parametrize("step_args", ["-n1 --gpus-per-task=1", "-n1 --gpus=1"])
	def test_gpus_per_node_parallel(step_args):
	"""Test parallel step args with a job with --gpus-per-node"""
	# Delete previous job output file and prepare job scripts
	job_output_file.unlink(missing_ok=True)
	atf.make_bash_script(
	job_file,
	f"""
	scontrol -dd show job ${{SLURM_JOBID}}
	srun --exact --gpus-per-node=0 --mem=0 {step_args} {step_file} &
	srun --exact --gpus-per-node=0 --mem=0 {step_args} {step_file} &
	wait
	exit 0""",
	)

	atf.make_bash_script(
	step_file,
	"""
	echo 'STEP_ID:'$SLURM_STEP_ID 'CUDA_VISIBLE_DEVICES:'$CUDA_VISIBLE_DEVICES
	sleep 3
	if [ $SLURM_STEP_ID -eq 1 ]; then
	scontrol show step $SLURM_JOB_ID.$SLURM_STEP_ID
	fi
	exit 0""",
	)

	job_id = atf.submit_job_sbatch(
	"--cpus-per-gpu=2 --gpus-per-node=2 -N1 -n2 -t1 "
	+ f"-o {job_output_file} -J test_job {job_file}",
	fatal=True,
	)

	atf.wait_for_job_state(job_id, "DONE", fatal=True)
	atf.wait_for_file(job_output_file, fatal=True)

	output = atf.run_command_output(f"cat {job_output_file}", fatal=True)

	# Verify all steps used only 1 GPU
	assert not re.search(
	r"STEP_ID:\d+ CUDA_VISIBLE_DEVICES:\d+,", output
	), "Not all steps used only 1 GPU"
	# Verify a GPU was used 2 times
	assert (
	len(re.findall(r"STEP_ID:\d+ CUDA_VISIBLE_DEVICES:\d+", output)) == 2
	), "A GPU was not used 2 times"
	# Verify all steps run in parallel
	assert not re.search(
	r"Step completed in JobId=\d+, retrying", output
	), "Not all steps ran in parallel"

	if constrain_devices:
	# Verify all GPUs are CUDA_VISIBLE_DEVICES:0 (with ConstrainDevices)
	assert (
	len(re.findall(r"STEP_ID:\d+ CUDA_VISIBLE_DEVICES:0", output)) == 2
	), "Not all GPUs are CUDA_VISIBLE_DEVICES:0 (with ConstrainDevices)"
	else:
	# Verify 1 GPU is CUDA_VISIBLE_DEVICES:0 (without ConstrainDevices)
	assert (
	len(re.findall(r"STEP_ID:\d+ CUDA_VISIBLE_DEVICES:0", output)) == 1
	), "Not 1 GPU is CUDA_VISIBLE_DEVICES:0 (without ConstrainDevices)"
	# Verify 1 GPU is CUDA_VISIBLE_DEVICES:1 (without ConstrainDevices)
	assert (
	len(re.findall(r"STEP_ID:\d+ CUDA_VISIBLE_DEVICES:1", output)) == 1
	), "Not 1 GPU is CUDA_VISIBLE_DEVICES:1 (without ConstrainDevices)"


	def test_gpus_per_node_different_gpus():
	"""Test --gpus (per job or step) option by job step"""
	# Delete previous job output file and prepare job scripts
	job_output_file.unlink(missing_ok=True)
	atf.make_bash_script(
	job_file,
	f"""
	scontrol -dd show job ${{SLURM_JOBID}}
	srun --exact -n2 --gpus=2 --gpus-per-node=0 --mem=0 {step_file} &
	srun --exact -n1 --gpus=1 --gpus-per-node=0 --mem=0 {step_file} &
	wait
	exit 0""",
	)

	atf.make_bash_script(
	step_file,
	"""
	echo 'STEP_ID:'$SLURM_STEP_ID 'CUDA_VISIBLE_DEVICES:'$CUDA_VISIBLE_DEVICES
	sleep 3
	exit 0""",
	)

	job_id = atf.submit_job_sbatch(
	"--cpus-per-gpu=1 --gpus-per-node=3 -N1 -n3 -t2 "
	+ f"-o {job_output_file} -J test_job {job_file}",
	fatal=True,
	)

	atf.wait_for_job_state(job_id, "DONE", fatal=True)
	atf.wait_for_file(job_output_file, fatal=True)

	output = atf.run_command_output(f"cat {job_output_file}")
	step_2gpu = re.search(
	r"STEP_ID:\d+ CUDA_VISIBLE_DEVICES:(\d+),(\d+)", output
	).groups()
	step_1gpu = re.search(
	r"STEP_ID:\d+ CUDA_VISIBLE_DEVICES:(\d+$)", output, re.MULTILINE
	).groups()

	# Verify 1 step used 1 GPU and 2 steps used 2 GPUs
	assert (
	len(step_1gpu) == 1 and len(step_2gpu) == 2
	), f"Fail to obtain all GPUs index ({len(step_1gpu)} != 1 or {len(step_2gpu)} != 2)"

	if constrain_devices:
	# Verify if devices are constrained, CUDA_VISIBLE_DEVICES start always
	# with 0 in a step
	assert (
	step_2gpu[0] == "0" and step_1gpu[0] == "0"
	), "CUDA_VISIBLE_DEVICES did not always start with 0 in a step"
	else:
	# Verify if devices are NOT constrained, all CUDA_VISIBLE_DEVICES are
	# unique
	assert step_1gpu[0] not in step_2gpu, "All CUDA_VISIBLE_DEVICES are not unique"


	def test_gpus_per_node_with_gpus_per_task():
	"""Test --gpus-per-task option by job step"""
	job_gpus = 3
	step_gpus = 2

	# Delete previous job output file and prepare job scripts
	job_output_file.unlink(missing_ok=True)
	atf.make_bash_script(
	job_file,
	f"""
	scontrol -dd show job ${{SLURM_JOBID}}
	srun -vv --exact -n1 --gpus-per-task={step_gpus} --gpus-per-node=0 --mem=0 {step_file} &
	srun -vv --exact -n1 --gpus-per-task={step_gpus} --gpus-per-node=0 --mem=0 {step_file} &
	srun -vv --exact -n1 --gpus-per-task={step_gpus} --gpus-per-node=0 --mem=0 {step_file} &
	wait
	exit 0""",
	)

	atf.make_bash_script(
	step_file,
	"""
	echo 'STEP_ID:'$SLURM_STEP_ID 'CUDA_VISIBLE_DEVICES:'$CUDA_VISIBLE_DEVICES
	sleep 3
	if [ $SLURM_STEP_ID -eq 2 ]; then
	scontrol show step $SLURM_JOB_ID.$SLURM_STEP_ID
	fi
	exit 0""",
	)

	job_id = atf.submit_job_sbatch(
	f"--cpus-per-gpu=1 --gpus-per-node={job_gpus} -N1 -n3 -t1 "
	+ f"-o {job_output_file} -J test_job {job_file}",
	fatal=True,
	)

	atf.wait_for_job_state(job_id, "DONE", fatal=True)
	atf.wait_for_file(job_output_file, fatal=True)

	output = atf.run_command_output(f"cat {job_output_file}", fatal=True)

	# Verify no step has more than 2 GPUs
	assert not re.search(
	r"STEP_ID:\d+ CUDA_VISIBLE_DEVICES:\d+,\d+,", output
	), "A step has more than 2 GPUs"
	# Verify all steps have 2 GPUs
	assert (
	len(re.findall(r"STEP_ID:\d+ CUDA_VISIBLE_DEVICES:\d+,\d+", output)) == 3
	), "Not all steps have 2 GPUs"
	# Verify two steps were delayed, one of them twice
	assert atf.check_steps_delayed(
	job_id, output, 2
	), "Two steps were not delayed or one of them was not delayed twice"


	def test_gpus_per_node_with_gpus():
	"""Test --gpus option by job step"""
	job_gpus = 2
	step_gpus = 2

	# Delete previous job output file and prepare job scripts
	job_output_file.unlink(missing_ok=True)
	atf.make_bash_script(
	job_file,
	f"""
	scontrol -dd show job ${{SLURM_JOBID}}
	srun -vv --exact -n2 --gpus={step_gpus} --gpus-per-node=0 --mem=0 {step_file} &
	srun -vv --exact -n2 --gpus={step_gpus} --gpus-per-node=0 --mem=0 {step_file} &
	srun -vv --exact -n2 --gpus={step_gpus} --gpus-per-node=0 --mem=0 {step_file} &
	wait
	exit 0""",
	)

	atf.make_bash_script(
	step_file,
	"""
	echo 'HOST:'$SLURMD_NODENAME 'NODE_ID:'$SLURM_NODEID 'STEP_ID:'$SLURM_STEP_ID 'CUDA_VISIBLE_DEVICES:'$CUDA_VISIBLE_DEVICES
	sleep 3
	if [ $SLURM_STEP_ID -eq 2 -a $SLURM_NODEID -eq 0 ]; then
	scontrol show step $SLURM_JOB_ID.$SLURM_STEP_ID
	fi
	exit 0""",
	)

	job_id = atf.submit_job_sbatch(
	f"--cpus-per-gpu=2 --gpus-per-node={job_gpus} -N2 -n6 -t1 "
	+ f"-o {job_output_file} -J test_job {job_file}",
	fatal=True,
	)

	atf.wait_for_job_state(job_id, "DONE", fatal=True)
	atf.wait_for_file(job_output_file, fatal=True)

	output = atf.run_command_output(f"cat {job_output_file}", fatal=True)

	# Verify no more that 1 GPU is visible (per node)
	assert not re.search(
	r"STEP_ID:\d+ CUDA_VISIBLE_DEVICES:\d+,", output
	), "1 GPU is not visible (per node)"
	# Verify step 0 had access to 2 GPUs
	assert (
	len(re.findall(r"STEP_ID:0 CUDA_VISIBLE_DEVICES:\d+", output)) == 2
	), "Step 0 did not have access to 2 GPUs"
	# Verify step 1 had access to 2 GPUs
	assert (
	len(re.findall(r"STEP_ID:1 CUDA_VISIBLE_DEVICES:\d+", output)) == 2
	), "Step 1 did not have access to 2 GPUs"
	# Verify step 2 had access to 2 GPUs
	assert (
	len(re.findall(r"STEP_ID:2 CUDA_VISIBLE_DEVICES:\d+", output)) == 2
	), "Step 2 did not have access to 2 GPUs"
	# Verify one step was delayed
	assert atf.check_steps_delayed(job_id, output, 1), "One step was not delayed"

	if constrain_devices:
	# Verify all GPUs are CUDA_VISIBLE_DEVICES:0 due to ConstrainDevices
	assert (
	len(re.findall(r"STEP_ID:\d+ CUDA_VISIBLE_DEVICES:0", output)) == 6
	), "Not all GPUs are CUDA_VISIBLE_DEVICES:0 due to ConstrainDevices"
	else:
	cuda_val = []
	cuda_val.append(
	re.search(r"STEP_ID:0 CUDA_VISIBLE_DEVICES:(\d+)", output).group(1)
	)
	cuda_val.append(
	re.search(r"STEP_ID:1 CUDA_VISIBLE_DEVICES:(\d+)", output).group(1)
	)
	cuda_val.append(
	re.search(r"STEP_ID:2 CUDA_VISIBLE_DEVICES:(\d+)", output).group(1)
	)
	# Verify two first steps use different GPUs (without ConstrainDevices)
	assert (
	cuda_val[0] != cuda_val[1]
	), "The two first steps did not use different GPUs (without ConstrainDevices)"
	# Verify last step used a previous GPU (without ConstrainDevices)
	assert (
	cuda_val[2] == cuda_val[0] or cuda_val[2] == cuda_val[1]
	), "The last step did not use one of the previous GPUs (without ConstrainDevices)"


	def test_gpus_per_node_with_gpus_2_nodes():
	"""Test --gpus option across 2 nodes"""
	job_gpus = 4

	# Delete previous job output file and prepare job scripts
	job_output_file.unlink(missing_ok=True)
	atf.make_bash_script(
	job_file,
	f"""
	scontrol -dd show job ${{SLURM_JOBID}}
	srun -vv --exact -n2 --gpus=6 --gpus-per-node=0 --mem=0 {step_file} &
	srun -vv --exact -n2 --gpus=7 --gpus-per-node=0 --mem=0 {step_file} &
	srun -vv --exact -n2 --gpus=8 --gpus-per-node=0 --mem=0 {step_file} &
	wait
	exit 0""",
	)

	atf.make_bash_script(
	step_file,
	"""
	echo 'HOST:'$SLURMD_NODENAME 'NODE_ID:'$SLURM_NODEID 'STEP_ID:'$SLURM_STEP_ID 'CUDA_VISIBLE_DEVICES:'$CUDA_VISIBLE_DEVICES
	sleep 3
	if [ $SLURM_STEP_ID -eq 2 -a $SLURM_NODEID -eq 0 ]; then
	scontrol show step $SLURM_JOB_ID.$SLURM_STEP_ID
	fi
	exit 0""",
	)

	job_id = atf.submit_job_sbatch(
	f"--cpus-per-gpu=2 --gpus-per-node={job_gpus} -N2 -n6 -t1 "
	+ f"-o {job_output_file} -J test_job {job_file}",
	fatal=True,
	)

	atf.wait_for_job_state(job_id, "DONE", fatal=True)
	atf.wait_for_file(job_output_file, fatal=True)

	output = atf.run_command_output(f"cat {job_output_file}", fatal=True)

	# Verify all steps have less than 5 GPUs per node
	assert not re.search(
	r"STEP_ID:\d+ CUDA_VISIBLE_DEVICES:\d+,\d+,\d+,\d+,", output
	), "Not all steps have less than 5 GPUs per node"
	# Verify step 0 used 2 nodes
	assert (
	len(re.findall(r"STEP_ID:0 CUDA_VISIBLE_DEVICES:\d+", output)) == 2
	), "Step 0 did not use 2 nodes"
	# Verify step 1 used 2 nodes
	assert (
	len(re.findall(r"STEP_ID:1 CUDA_VISIBLE_DEVICES:\d+", output)) == 2
	), "Step 1 did not use 2 nodes"
	# Verify step 2 used 2 nodes
	assert (
	len(re.findall(r"STEP_ID:2 CUDA_VISIBLE_DEVICES:\d+", output)) == 2
	), "Step 2 did not use 2 nodes"
	# Verify two steps were delayed, one of them twice
	assert atf.check_steps_delayed(
	job_id, output, 2
	), "Two steps were not delayed or one of them was not delayed twice"


	def test_gpus_per_node_with_gpus_per_task_3():
	"""Test --gpus-per-task option by job step"""
	job_gpus = 4
	step_gpus = 2

	# Delete previous job output file and prepare job scripts
	job_output_file.unlink(missing_ok=True)
	atf.make_bash_script(
	job_file,
	f"""
	scontrol -dd show job ${{SLURM_JOBID}}
	srun -vv {step_file}
	srun -vv --exact -n3 --gpus-per-task={step_gpus} --gpus-per-node=0 --mem=0 {step_file} &
	srun -vv --exact -n3 --gpus-per-task={step_gpus} --gpus-per-node=0 --mem=0 {step_file} &
	wait
	exit 0""",
	)

	atf.make_bash_script(
	step_file,
	"""
	echo 'STEP_ID:'$SLURM_STEP_ID 'CUDA_VISIBLE_DEVICES:'$CUDA_VISIBLE_DEVICES
	sleep 3
	if [ $SLURM_STEP_ID -eq 1 -a $SLURM_PROCID -eq 0 ]; then
	scontrol show step $SLURM_JOB_ID.$SLURM_STEP_ID
	fi
	exit 0""",
	)

	job_id = atf.submit_job_sbatch(
	f"--cpus-per-gpu=1 --gpus-per-node={job_gpus} -N2 -n4 -t1 "
	+ f"-o {job_output_file} -J test_job {job_file}",
	fatal=True,
	)

	atf.wait_for_job_state(job_id, "DONE", fatal=True)
	atf.wait_for_file(job_output_file, fatal=True)

	output = atf.run_command_output(f"cat {job_output_file}", fatal=True)

	# Verify no more than 4 GPUs are visible in any step
	assert not re.search(
	r"STEP_ID:\d+ CUDA_VISIBLE_DEVICES:\d+,\d+,\d+,\d+,", output
	), "More than 4 GPUs are visible in a step"
	# Verify job has access to 4 GPUs
	assert (
	len(re.findall(r"STEP_ID:0 CUDA_VISIBLE_DEVICES:\d+,\d+,\d+,\d+", output)) == 4
	), "Job does not have access to 4 GPUs"
	# Verify step 1 has 3 tasks and 2 GPUs per task
	assert (
	len(re.findall(r"STEP_ID:1 CUDA_VISIBLE_DEVICES:\d+,\d+", output)) == 3
	), "Step 1 does not have 3 tasks and 2 GPUs per task"
	# Verify step 2 has 3 tasks and 2 GPUs per task
	assert (
	len(re.findall(r"STEP_ID:2 CUDA_VISIBLE_DEVICES:\d+,\d+", output)) == 3
	), "Step 2 does not have 3 tasks and 2 GPUs per task"
	# Verify one step was delayed
	assert atf.check_steps_delayed(job_id, output, 1), "One step was not delayed"


	def test_gpus_per_node_with_gpus_per_task_5():
	"""Test --gpus-per-task option by job step"""
	job_gpus = 4
	step_gpus = 2

	# Delete previous job output file and prepare job scripts
	job_output_file.unlink(missing_ok=True)
	atf.make_bash_script(
	job_file,
	f"""
	scontrol -dd show job ${{SLURM_JOBID}}
	srun -vv --exact -N1 -n1 --gpus-per-task={step_gpus} --gpus-per-node=0 --mem=0 {step_file} &
	srun -vv --exact -N1 -n1 --gpus-per-task={step_gpus} --gpus-per-node=0 --mem=0 {step_file} &
	srun -vv --exact -N1 -n1 --gpus-per-task={step_gpus} --gpus-per-node=0 --mem=0 {step_file} &
	srun -vv --exact -N1 -n1 --gpus-per-task={step_gpus} --gpus-per-node=0 --mem=0 {step_file} &
	srun -vv --exact -N1 -n1 --gpus-per-task={step_gpus} --gpus-per-node=0 --mem=0 {step_file} &
	wait
	exit 0""",
	)

	atf.make_bash_script(
	step_file,
	"""
	echo 'STEP_ID:'$SLURM_STEP_ID 'CUDA_VISIBLE_DEVICES:'$CUDA_VISIBLE_DEVICES
	sleep 3
	if [ $SLURM_STEP_ID -eq 1 -a $SLURM_PROCID -eq 0 ]; then
	scontrol show step $SLURM_JOB_ID.$SLURM_STEP_ID
	fi
	exit 0""",
	)

	job_id = atf.submit_job_sbatch(
	f"--cpus-per-gpu=1 --gpus-per-node={job_gpus} -N2 -n5 -t1 "
	+ f"-o {job_output_file} -J test_job {job_file}",
	fatal=True,
	)

	atf.wait_for_job_state(job_id, "DONE", timeout=30, fatal=True)
	atf.wait_for_file(job_output_file, fatal=True)

	output = atf.run_command_output(f"cat {job_output_file}", fatal=True)

	# Verify no more that 2 GPUs are visible in any step
	assert not re.search(
	r"STEP_ID:\d+ CUDA_VISIBLE_DEVICES:\d+,\d+,\d+,", output
	), "More that 2 GPUs are visible in a step"
	# Verify all 5 steps have access to 2 GPUs
	assert (
	len(re.findall(r"STEP_ID:\d+ CUDA_VISIBLE_DEVICES:\d+,\d+", output)) == 5
	), "Not all 5 steps have access to 2 GPUs"
	# Verify one step was delayed
	assert atf.check_steps_delayed(job_id, output, 1), "One step was not delayed"