blob: 0b6d726f3cb0c13890928ac26036bb2bf97dca6a [file] [log] [blame]
--[[
--Example burst_buffer.lua file for Slurm
--
--In order to use this file, it must be called "burst_buffer.lua" and must
--exist in the same directory as slurm.conf and burst_buffer.conf.
--BurstBufferType=burst_buffer/lua must also be specified in slurm.conf.
--
--This file implements each required burst buffer function, but does not
--do anything particularly useful.
--
--The required functions begin with "slurm_bb_". Other functions in this
--example file are provided merely as examples.
--
--Some functions in this file are expected to be very fast because they are
--called from slurmctld while it is has some mutexes locked. If these functions
--run for very long (even 1 second), it may severely impact slurmctld
--performance. These functions cannot be killed, so they will never time out.
--The remaining functions are called asynchronously and can potentially run for
--as long as needed without harming slurmctld performance. These functions are
--called from a separate process and will be killed if they exceed the time
--limit specified by burst_buffer.conf.
--A comment above each function will specify whether or not the function must
--return quickly.
--
--
--Function parameters:
--
--All parameters for "slurm_bb_" functions except for job_info are strings.
--job_info is a table of information about the job. The function print_job_info
--demonstrates how to read the data in this table. A complete list of fields is
--in the Slurm source code in the following location:
--
--src/plugins/burst_buffer/lua/burst_buffer_lua.c:_lua_job_info_field
--
--NOTE: job_info is read-only. It is a snapshot of job information
--just before this function was called. The actual job record can be changed
--while this script is running, making the job_info not in sync with the real
--job record.
--
--
--Return values:
--
--Each function may return 1 or 2 values. The first value must be the return
--code. The second value is optional. If given, the second return value is
--a string. It may be useful to pass a message back to the job, for example a
--reason why a particular burst buffer function failed, but is not required.
--If a "slurm_bb_" function returns an error and a string, the string may
--appear in the job's reason field.
--
--
--External "slurm" functions:
--
--You may log to the slurmctld log file with Slurm logging functions such as
--slurm.log_info(). Replace "info" with the desired debug level.
--
--
--This file also provides an example of how to use a module in lua-posix.
--lua-posix provides posix bindings to lua, which can be very useful, but it is
--not required to run this file and may be removed.
--]]
--Print job_info to the log file
function print_job_info(job_info)
account = job_info["account"]
array_job_id = job_info["array_job_id"]
array_task_id = job_info["array_task_id"]
array_max_tasks = job_info["array_max_tasks"]
array_task_str = job_info["array_task_str"]
gres_detail_cnt = job_info["gres_detail_cnt"]
if (gres_detail_cnt ~= 0) then
--[[
--This keys of this table are the index starting with 1 and
--ending with gres_detail_cnt. The index is the offset of the
--node in the job (index==1 is the first node in the job).
--
--The values of this table are strings representing the gres
--currently allocated to the job on each node. The format
--is a comma-separated list of:
--
--For gres with a file:
--<gres_name>[:<gres_type>]:<count>(IDX:<gres_index>)
--
--For count-only gres:
--<gres_name>[:<gres_type>](CNT:<count>)
--
--This field is only non-nil if the job is running and has
--allocated gres; hence it only applies
--to slurm_bb_pre_run since that is the only hook called with
--a job in the running state.
--]]
gres_table = job_info["gres_detail_str"]
sep = "\n\t\t"
gres_detail_str = string.format("%s%s",
sep, table.concat(gres_table, sep))
else
gres_detail_str = nil
end
gres_total = job_info["gres_total"]
group_id = job_info["group_id"]
het_job_id = job_info["het_job_id"]
het_job_id_set = job_info["het_job_id_set"]
het_job_offset = job_info["het_job_offset"]
job_id = job_info["job_id"]
job_state = job_info["job_state"]
nodes = job_info["nodes"]
partition = job_info["partition"]
user_name = job_info["user_name"]
--user_name is not guaranteed to be set, but is accurate when it is.
--See slurm_job_info_t in slurm.h. This is for performance reasons.
--The script may do a lookup using the UID if needed.
if (user_name == nil) then
user_name = "NULL"
--Or do lookup with UID here
end
slurm.log_info("JobId=%u\
account=%s\
array_job_id=%u\
array_task_id=%u\
array_max_tasks=%u\
array_task_str=%s\
gres_total=%s\
group_id=%u\
het_job_id=%u\
het_job_offset=%u\
job_state=%u\
nodes=%s\
partition=%s\
user_name=%s\
",
job_id, account, array_job_id, array_task_id,
array_max_tasks, array_task_str, gres_total, group_id,
het_job_id, het_job_offset, job_state, nodes, partition,
user_name)
if (gres_detail_cnt ~= 0) then
slurm.log_info("complete gres_detail_str=\n%s",
gres_detail_str)
for i,v in ipairs(gres_table) do
slurm.log_info("Node index = %u, gres_detail_str = %s",
i, gres_table[i])
end
end
end
--This requires lua-posix to be installed
function posix_sleep(n)
local Munistd = require("posix.unistd")
local rc
slurm.log_info("sleep for %u seconds", n)
rc = Munistd.sleep(n)
--rc will be 0 if successful or non-zero for amount of time left
--to sleep
return rc
end
--This commented out function is a wrapper for the posix "sleep"
--function in the lua-posix posix.unistd module.
function sleep_wrapper(n)
return slurm.SUCCESS, ""
--local rc, ret_str
--rc = posix_sleep(n)
--if (rc ~= 0) then
-- ret_str = "Sleep interrupted, " .. tostring(rc) .. " seconds left"
-- rc = slurm.ERROR
--else
-- ret_str = "Success"
-- rc = slurm.SUCCESS
--end
--return rc, ret_str
end
--[[
--slurm_bb_job_process
--
--WARNING: This function is called synchronously from slurmctld and must
--return quickly.
--
--This function is called on job submission.
--This example reads, logs, and returns the job script.
--If this function returns an error, the job is rejected and the second return
--value (if given) is printed where salloc, sbatch, or srun was called.
--]]
function slurm_bb_job_process(job_script, uid, gid, job_info)
local contents
slurm.log_info("slurm_bb_job_process(). job_script=%s, uid=%s, gid=%s",
job_script, uid, gid)
io.input(job_script)
contents = io.read("*all")
return slurm.SUCCESS, contents
end
--[[
--slurm_bb_pools
--
--WARNING: This function is called from slurmctld and must return quickly.
--
--This function is called on slurmctld startup, and then periodically while
--slurmctld is running.
--
--You may specify "pools" of resources here. If you specify pools, a job may
--request a specific pool and the amount it wants from the pool. Slurm will
--subtract the job's usage from the pool at slurm_bb_data_in and Slurm will
--add the job's usage of those resources back to the pool after
--slurm_bb_teardown.
--A job may choose not to specify a pool even you pools are provided.
--If pools are not returned here, Slurm does not track burst buffer resources
--used by jobs.
--
--If pools are desired, they must be returned as the second return value
--of this function. It must be a single JSON string representing the pools.
--]]
function slurm_bb_pools()
--This commented out code specifies pools in a file:
--local pools_file, pools
--pools_file = "/path/to/file"
--io.input(pools_file)
--pools = io.read("*all")
--slurm.log_info("Pools file:\n%s", pools)
--This specifies pools inline:
local pools
pools ="\
{\
\"pools\":\
[\
{ \"id\":\"pool1\", \"quantity\":1000, \"granularity\":1024 },\
{ \"id\":\"pool2\", \"quantity\":5, \"granularity\":2 },\
{ \"id\":\"pool3\", \"quantity\":4, \"granularity\":1 },\
{ \"id\":\"pool4\", \"quantity\":25000, \"granularity\":1 }\
]\
}"
return slurm.SUCCESS, pools
end
--[[
--slurm_bb_job_teardown
--
--This function is called asynchronously and is not required to return quickly.
--This function is normally called after the job completes (or is cancelled).
--]]
function slurm_bb_job_teardown(job_id, job_script, hurry, uid, gid)
slurm.log_info("slurm_bb_job_teardown(). job id:%s, job script:%s, hurry:%s, uid:%s, gid:%s",
job_id, job_script, hurry, uid, gid)
local rc, ret_str = sleep_wrapper(1)
return rc, ret_str
end
--[[
--slurm_bb_setup
--
--This function is called asynchronously and is not required to return quickly.
--This function is called while the job is pending.
--]]
function slurm_bb_setup(job_id, uid, gid, pool, bb_size, job_script, job_info)
slurm.log_info("slurm_bb_setup(). job id:%s, uid: %s, gid:%s, pool:%s, size:%s, job script:%s",
job_id, uid, gid, pool, bb_size, job_script)
return slurm.SUCCESS
end
--[[
--slurm_bb_data_in
--
--This function is called asynchronously and is not required to return quickly.
--This function is called immediately after slurm_bb_setup while the job is
--pending.
--]]
function slurm_bb_data_in(job_id, job_script, uid, gid, job_info)
slurm.log_info("slurm_bb_data_in(). job id:%s, job script:%s, uid:%s, gid:%s",
job_id, job_script, uid, gid)
local rc, ret_str = sleep_wrapper(1)
return rc, ret_str
end
--[[
--slurm_bb_test_data_in
--
--This function is called asynchronously and is not required to return quickly.
--This function is called immediately after slurm_bb_data_in while the job is
--pending.
--
--This function is meant to be used to poll if data_in has completed.
--If the first return value is slurm.SUCCESS and the second return value is
--"BUSY" (or slurm.SLURM_BB_BUSY), then the job will continue to pend and
--this function will continue to be called periodically.
--If the first return value is slurm.SUCCESS and the second return value is
--empty or any other string besides "BUSY", then job and burst buffer state
--will proceed. If the first return value is not slurm.SUCCESS, then the job
--will be placed in a held state.
--
--If this function returns slurm.SUCCESS, slurm.SLURM_BB_BUSY for longer than
--StageInTimeout, then the job will be placed in a held state.
--]]
function slurm_bb_test_data_in(job_id, job_script, uid, gid, job_info)
slurm.log_info("%s: slurm_bb_test_data_in(). job id:%s, job script:%s, uid:%s, gid:%s",
lua_script_name, job_id, job_script, uid, gid)
local rc, ret_str = sleep_wrapper(1)
--return rc, slurm.SLURM_BB_BUSY
return rc, ret_str
end
--[[
--slurm_bb_real_size
--
--This function is called asynchronously and is not required to return quickly.
--This function is called immediately after slurm_bb_test_data_in while the job
--is pending.
--
--This function is only called if pools are specified and the job requested a
--pool. This function may return a number (surrounded by quotes to make it a
--string) as the second return value. If it does, the job's usage of the pool
--will be changed to this number. A commented out example is given.
--]]
function slurm_bb_real_size(job_id, uid, gid, job_info)
slurm.log_info("slurm_bb_real_size(). job id:%s, uid:%s, gid:%s",
job_id, uid, gid)
--return slurm.SUCCESS, "10000"
return slurm.SUCCESS
end
--[[
--slurm_bb_paths
--
--WARNING: This function is called synchronously from slurmctld and must
--return quickly.
--This function is called after the job is scheduled but before the
--job starts running when the job is in a "running + configuring" state.
--
--The file specified by path_file is an empty file. If environment variables are
--written to path_file, these environment variables are added to the job's
--environment. A commented out example is given.
--]]
function slurm_bb_paths(job_id, job_script, path_file, uid, gid, job_info)
slurm.log_info("slurm_bb_paths(). job id:%s, job script:%s, path file:%s, uid:%s, gid:%s",
job_id, job_script, path_file, uid, gid)
--io.output(path_file)
--io.write("FOO=BAR")
return slurm.SUCCESS
end
--[[
--slurm_bb_pre_run
--
--This function is called asynchronously and is not required to return quickly.
--This function is called after the job is scheduled but before the
--job starts running when the job is in a "running + configuring" state.
--]]
function slurm_bb_pre_run(job_id, job_script, uid, gid, job_info)
slurm.log_info("slurm_bb_pre_run(). job id:%s, job script:%s, uid:%s, gid:%s",
job_id, job_script, uid, gid)
local rc, ret_str
rc, ret_str = sleep_wrapper(1)
print_job_info(job_info)
-- Generate a list of nodes allocated to the job.
-- A hostlist expression of the nodes allocated to the job is in
-- job_info["nodes"].
-- scontrol show hostnames expands a hostlist expression to one node
-- per line. It does not send an RPC to slurmctld.
--[[
local slurm_install_path = "/opt/slurm/install"
local scontrol = string.format("%s/bin/scontrol show hostnames %s",
slurm_install_path, job_info["nodes"])
slurm.log_info("Running %s", scontrol)
local fd = io.popen(scontrol)
local nodelist = {}
for node in fd:lines() do
nodelist[#nodelist + 1] = node
end
fd:close()
for i,v in ipairs(nodelist) do
slurm.log_info("slurm_bb_pre_run: node(%u)=%s", i, v)
end
--]]
return rc, ret_str
end
--[[
--slurm_bb_post_run
--
--This function is called asynchronously and is not required to return quickly.
--This function is called after the job finishes. The job is in a "stage out"
--state.
--]]
function slurm_bb_post_run(job_id, job_script, uid, gid, job_info)
slurm.log_info("slurm_post_run(). job id:%s, job script:%s, uid:%s, gid:%s",
job_id, job_script, uid, gid)
local rc, ret_str = sleep_wrapper(1)
return rc, ret_str
end
--[[
--slurm_bb_data_out
--
--This function is called asynchronously and is not required to return quickly.
--This function is called after the job finishes immediately after
--slurm_bb_post_run. The job is in a "stage out" state.
--]]
function slurm_bb_data_out(job_id, job_script, uid, gid, job_info)
slurm.log_info("slurm_bb_data_out(). job id:%s, job script:%s, uid:%s, gid:%s",
job_id, job_script, uid, gid)
local rc, ret_str = sleep_wrapper(1)
return rc, ret_str
end
--[[
--slurm_bb_test_data_out
--
--This function is called asynchronously and is not required to return quickly.
--This function is called immediately after slurm_bb_data_out while the job is
--pending.
--
--This function is meant to be used to poll if data_out has completed.
--If the first return value is slurm.SUCCESS and the second return value is
--"BUSY" (or slurm.SLURM_BB_BUSY), then the job will stay in the completing
--state and this function will continue to be called periodically.
--If the first return value is slurm.SUCCESS and the second return value is
--empty or any other string besides "BUSY", then job and burst buffer state
--will proceed. If the first return value is not slurm.SUCCESS, then the job
--will be placed in a held state.
--]]
function slurm_bb_test_data_out(job_id, job_script, uid, gid, job_info)
slurm.log_info("%s: slurm_bb_test_data_out(). job id:%s, job script:%s, uid:%s, gid:%s",
lua_script_name, job_id, job_script, uid, gid)
local rc, ret_str = sleep_wrapper(1)
--return rc, slurm.SLURM_BB_BUSY
return rc, ret_str
end
--[[
--slurm_bb_get_status
--
--This function is called asynchronously and is not required to return quickly.
--
--This function is called when "scontrol show bbstat" is run. It receives the
--authenticated user id and group id of the caller, as well as a variable
--number of arguments - whatever arguments are after "bbstat".
--For example:
--
-- scontrol show bbstat foo bar
--
--This command will pass 2 arguments after uid and gid to this function:
-- "foo" and "bar".
--
--If this function returns slurm.SUCCESS, then this function's second return
--value will be printed where the scontrol command was run. If this function
--returns slurm.ERROR, then this function's second return value is ignored and
--an error message will be printed instead.
--
--The example in this function simply returns the arguments that were given.
--Example usage:
--
--$ scontrol show bbstat foo bar
--Status return message.
--Args:
--arg1
--arg2
--]]
function slurm_bb_get_status(uid, gid, ...)
local i, v, args, outstr, arr
slurm.log_info("slurm_bb_get_status(), uid: %s, gid:%s",
uid, gid)
arr = { }
-- Create a table from variable arg list
args = {...}
args.n = select("#", ...)
for i,v in ipairs(args) do
arr[#arr+1] = tostring(v)
end
outstr = table.concat(arr, "\n")
return slurm.SUCCESS, "Status return message.\nArgs:\n" .. outstr .. "\n"
end