| --[[ |
| --Example job_submit.lua file for Slurm |
| --For more information check: |
| -- https://slurm.schedmd.com/job_submit_plugins.html |
| --For the list of available fields check: |
| -- src/plugins/job_submit/lua/job_submit_lua.c |
| --]] |
| |
| log_prefix = 'slurm_job_submit' |
| |
| function _find_in_str(str, arg) |
| if str ~= nil then |
| return string.find(str,arg) |
| else |
| return false |
| end |
| end |
| |
| function _log_user_and_debug(fmt, ...) |
| --[[ |
| Different messages logged to end user should be associated |
| unique return code, to make those properly displayed in case |
| of modification of array job. |
| --]] |
| |
| --[[ Implicit definition of arg was removed in Lua 5.2 --]] |
| local arg = {...} |
| |
| --[[ |
| -- Returning a message to user from slurm_job_modify is supported |
| -- since Slurm 23.02, using it in older versions will result in |
| -- an error message in slurmctld logs. |
| -- In older versions of Lua - prior to Lua 5.2 you may need to use |
| -- unpack as a built-in instead of table.unpack |
| ]]-- |
| |
| slurm.log_user(fmt, table.unpack(arg)) |
| slurm.log_debug(fmt, table.unpack(arg)) |
| end |
| |
| |
| -- Do not allow interactive jobs longer than 4 hours except for certain users |
| function validate_interactive_job(job_desc, uid) |
| if job_desc['script'] ~= nil then |
| return slurm.SUCCESS -- no limit for batch jobs |
| end |
| if uid == 0 --[[ or uid == SpecialUser --]] then |
| _log_user_and_debug("Interactive job allowed for uid: %u", uid) |
| else |
| local time_limit = job_desc['time_limit'] |
| if (time_limit == slurm.NO_VAL) then |
| _log_user_and_debug("You must request a time limit within 4 hours for interactive jobs") |
| return slurm.ESLURM_INVALID_TIME_LIMIT |
| elseif (time_limit > (4 * 60)) then |
| _log_user_and_debug("Interactive jobs for time longer than 4h forbidden") |
| return slurm.ESLURM_INVALID_TIME_LIMIT |
| end |
| end |
| return slurm.SUCCESS |
| end |
| |
| function slurm_job_submit(job_desc, part_list, submit_uid) |
| |
| --[[ |
| Don't block any activity from root. |
| This may make reproduction of user errors difficult |
| --]] |
| if submit_uid == 0 then |
| return slurm.SUCCESS |
| end |
| |
| local rc = validate_interactive_job(job_desc, submit_uid) |
| if rc ~= slurm.SUCCESS then |
| return rc |
| end |
| |
| --[[ |
| -- Change partition to GPU if job requested any GPU |
| -- depending on option used this may be visible in |
| -- different job_desc field: |
| -- --gres=gpu: -> tres_per_node |
| -- --gpus-per-task -> tres_per_task |
| -- --gpus-per-socket -> tres_per_socket |
| -- --gpus -> tres_per_job |
| --]] |
| |
| if _find_in_str(job_desc['tres_per_node'], "gpu") or |
| _find_in_str(job_desc['tres_per_task'], "gpu") or |
| _find_in_str(job_desc['tres_per_socket'], "gpu") or |
| _find_in_str(job_desc['tres_per_job'], "gpu") then |
| job_desc.partition = 'gpu' |
| _log_user_and_debug("%s: Set partition to: %s", |
| log_prefix, |
| job_desc.partition) |
| end |
| |
| |
| --[[ Forbid unlimited --mem if memory specified --]] |
| if job_desc['min_mem_per_node'] ~= nil then |
| if job_desc['min_mem_per_node'] == 0 then |
| slurm.log_user("%s: --mem=0 is not allowed", |
| log_prefix); |
| return slurm.ESLURM_ACCOUNTING_POLICY; |
| end |
| else |
| --[[ Enforce --mem specification --]] |
| if job_desc['min_mem_per_cpu'] == nil then |
| _log_user_and_debug("%s: Neither --mem nor --mem-per-cpu specified", |
| log_prefix); |
| return slurm.ESLURM_ACCOUNTING_POLICY; |
| else |
| if job_desc['min_mem_per_cpu'] == 0 then |
| _log_user_and_debug("%s: --mem-per-cpu=0 is not allowed", |
| log_prefix); |
| return slurm.ESLURM_ACCOUNTING_POLICY; |
| end |
| end |
| end |
| |
| --[[ Forbid usage of MAINT job name --]] |
| if job_desc['name'] == "MAINT" then |
| _log_user_and_debug("%s: JobName=MAINT reserved. Please use other name.", |
| log_prefix); |
| return slurm.ERROR |
| end |
| |
| --[[ |
| -- If no default partition, set to the highest |
| -- priority partition this user has access to |
| --]] |
| if job_desc['partition'] == nil then |
| local new_partition = nil |
| local top_priority = -1 |
| local last_priority = -1 |
| local inx = 0 |
| for name, part in pairs(part_list) do |
| slurm.log_debug2("%s: part name[%d]:%s", |
| log_prefix, |
| inx, |
| part['name']) |
| inx = inx + 1 |
| --[[ |
| -- Don't change partition if the default |
| -- is defined in the slurm.conf |
| --]] |
| if part['flag_default'] ~= 0 then |
| top_priority = -1 |
| break |
| end |
| |
| if part['priority_job_factor'] ~= nil then |
| slurm.log_debug3("%s: last priority is %d", |
| log_prefix, |
| part['priority_job_factor']) |
| last_priority = part['priority_job_factor'] |
| if last_priority > top_priority then |
| top_priority = last_priority |
| new_partition = part.name |
| end |
| end |
| end |
| |
| if top_priority >= 0 then |
| _log_user_and_debug("%s: setting default partition value: %s", |
| log_prefix, |
| new_partition) |
| job_desc.partition = new_partition |
| end |
| end |
| |
| return slurm.SUCCESS |
| end |
| |
| function slurm_job_modify(job_desc, job_ptr, part_list, modify_uid) |
| --[[ |
| -- While working on that it's important to understand that |
| -- modification of a job array metarecord may differs from specific |
| -- element modification. When job is not yet split to tasks it will |
| -- be treated as one element. |
| --]] |
| |
| |
| --[[ Don't block/modify any update from root --]] |
| if modify_uid == 0 then |
| return slurm.SUCCESS |
| end |
| |
| local rc = validate_interactive_job(job_desc, modify_uid) |
| if rc ~= slurm.SUCCESS then |
| return rc |
| end |
| |
| --[[ |
| -- Forbid unlimited --mem in update |
| -- for instance: scontrol update job=XX MinMemoryNode=0 |
| --]] |
| if job_desc['min_mem_per_node'] ~= nil then |
| if job_desc['min_mem_per_node'] == 0 then |
| _log_user_and_debug("%s: Updating MinMemPerNode=0 is not allowed", |
| log_prefix) |
| return slurm.ESLURM_ACCOUNTING_POLICY; |
| end |
| end |
| if job_desc['min_mem_per_cpu'] ~= nil then |
| if job_desc['min_mem_per_cpu'] == 0 then |
| _log_user_and_debug("%s: Updating MinMemoryCPU=0 is not allowed", |
| log_prefix) |
| return slurm.ESLURM_ACCOUNTING_POLICY; |
| end |
| end |
| |
| --[[ Forbid usage of MAINT job name --]] |
| if job_desc['name'] == "MAINT" then |
| _log_user_and_debug("%s: Updating JobName to MAINT is not allowed from non-root user", |
| log_prefix); |
| --[[ |
| -- This will show 'Unspecified error' but use of |
| -- slurm.log_user is limited to job_submit |
| --]] |
| return slurm.ERROR |
| end |
| |
| return slurm.SUCCESS |
| end |