|  | --[[ | 
|  | --Example burst_buffer.lua file for Slurm | 
|  | -- | 
|  | --In order to use this file, it must be called "burst_buffer.lua" and must | 
|  | --exist in the same directory as slurm.conf and burst_buffer.conf. | 
|  | --BurstBufferType=burst_buffer/lua must also be specified in slurm.conf. | 
|  | -- | 
|  | --This file implements each required burst buffer function, but does not | 
|  | --do anything particularly useful. | 
|  | -- | 
|  | --The required functions begin with "slurm_bb_". Other functions in this | 
|  | --example file are provided merely as examples. | 
|  | -- | 
|  | --Some functions in this file are expected to be very fast because they are | 
|  | --called from slurmctld while it is has some mutexes locked. If these functions | 
|  | --run for very long (even 1 second), it may severely impact slurmctld | 
|  | --performance. These functions cannot be killed, so they will never time out. | 
|  | --The remaining functions are called asynchronously and can potentially run for | 
|  | --as long as needed without harming slurmctld performance. These functions are | 
|  | --called from a separate process and will be killed if they exceed the time | 
|  | --limit specified by burst_buffer.conf. | 
|  | --A comment above each function will specify whether or not the function must | 
|  | --return quickly. | 
|  | -- | 
|  | -- | 
|  | --Function parameters: | 
|  | -- | 
|  | --All parameters for "slurm_bb_" functions except for job_info are strings. | 
|  | --job_info is a table of information about the job. The function print_job_info | 
|  | --demonstrates how to read the data in this table. A complete list of fields is | 
|  | --in the Slurm source code in the following location: | 
|  | -- | 
|  | --src/plugins/burst_buffer/lua/burst_buffer_lua.c:_lua_job_info_field | 
|  | -- | 
|  | --NOTE: job_info is read-only. It is a snapshot of job information | 
|  | --just before this function was called. The actual job record can be changed | 
|  | --while this script is running, making the job_info not in sync with the real | 
|  | --job record. | 
|  | -- | 
|  | -- | 
|  | --Return values: | 
|  | -- | 
|  | --Each function may return 1 or 2 values. The first value must be the return | 
|  | --code. The second value is optional. If given, the second return value is | 
|  | --a string. It may be useful to pass a message back to the job, for example a | 
|  | --reason why a particular burst buffer function failed, but is not required. | 
|  | --If a "slurm_bb_" function returns an error and a string, the string may | 
|  | --appear in the job's reason field. | 
|  | -- | 
|  | -- | 
|  | --External "slurm" functions: | 
|  | -- | 
|  | --You may log to the slurmctld log file with Slurm logging functions such as | 
|  | --slurm.log_info(). Replace "info" with the desired debug level. | 
|  | -- | 
|  | -- | 
|  | --This file also provides an example of how to use a module in lua-posix. | 
|  | --lua-posix provides posix bindings to lua, which can be very useful, but it is | 
|  | --not required to run this file and may be removed. | 
|  | --]] | 
|  |  | 
|  | --Print job_info to the log file | 
|  | function print_job_info(job_info) | 
|  | account = job_info["account"] | 
|  | array_job_id = job_info["array_job_id"] | 
|  | array_task_id = job_info["array_task_id"] | 
|  | array_max_tasks = job_info["array_max_tasks"] | 
|  | array_task_str = job_info["array_task_str"] | 
|  | gres_detail_cnt = job_info["gres_detail_cnt"] | 
|  | if (gres_detail_cnt ~= 0) then | 
|  | --[[ | 
|  | --This keys of this table are the index starting with 1 and | 
|  | --ending with gres_detail_cnt. The index is the offset of the | 
|  | --node in the job (index==1 is the first node in the job). | 
|  | -- | 
|  | --The values of this table are strings representing the gres | 
|  | --currently allocated to the job on each node. The format | 
|  | --is a comma-separated list of: | 
|  | -- | 
|  | --For gres with a file: | 
|  | --<gres_name>[:<gres_type>]:<count>(IDX:<gres_index>) | 
|  | -- | 
|  | --For count-only gres: | 
|  | --<gres_name>[:<gres_type>](CNT:<count>) | 
|  | -- | 
|  | --This field is only non-nil if the job is running and has | 
|  | --allocated gres; hence it only applies | 
|  | --to slurm_bb_pre_run since that is the only hook called with | 
|  | --a job in the running state. | 
|  | --]] | 
|  | gres_table = job_info["gres_detail_str"] | 
|  | sep = "\n\t\t" | 
|  | gres_detail_str = string.format("%s%s", | 
|  | sep, table.concat(gres_table, sep)) | 
|  | else | 
|  | gres_detail_str = nil | 
|  | end | 
|  | gres_total = job_info["gres_total"] | 
|  | group_id = job_info["group_id"] | 
|  | het_job_id = job_info["het_job_id"] | 
|  | het_job_id_set = job_info["het_job_id_set"] | 
|  | het_job_offset = job_info["het_job_offset"] | 
|  | job_id = job_info["job_id"] | 
|  | job_state = job_info["job_state"] | 
|  | nodes = job_info["nodes"] | 
|  | partition = job_info["partition"] | 
|  | user_name = job_info["user_name"] | 
|  |  | 
|  | --user_name is not guaranteed to be set, but is accurate when it is. | 
|  | --See slurm_job_info_t in slurm.h. This is for performance reasons. | 
|  | --The script may do a lookup using the UID if needed. | 
|  |  | 
|  | if (user_name == nil) then | 
|  | user_name = "NULL" | 
|  | --Or do lookup with UID here | 
|  | end | 
|  |  | 
|  | slurm.log_info("JobId=%u\ | 
|  | account=%s\ | 
|  | array_job_id=%u\ | 
|  | array_task_id=%u\ | 
|  | array_max_tasks=%u\ | 
|  | array_task_str=%s\ | 
|  | gres_total=%s\ | 
|  | group_id=%u\ | 
|  | het_job_id=%u\ | 
|  | het_job_offset=%u\ | 
|  | job_state=%u\ | 
|  | nodes=%s\ | 
|  | partition=%s\ | 
|  | user_name=%s\ | 
|  | ", | 
|  | job_id, account, array_job_id, array_task_id, | 
|  | array_max_tasks, array_task_str, gres_total, group_id, | 
|  | het_job_id, het_job_offset, job_state, nodes, partition, | 
|  | user_name) | 
|  |  | 
|  | if (gres_detail_cnt ~= 0) then | 
|  | slurm.log_info("complete gres_detail_str=\n%s", | 
|  | gres_detail_str) | 
|  | for i,v in ipairs(gres_table) do | 
|  | slurm.log_info("Node index = %u, gres_detail_str = %s", | 
|  | i, gres_table[i]) | 
|  | end | 
|  | end | 
|  | end | 
|  |  | 
|  |  | 
|  | --This requires lua-posix to be installed | 
|  | function posix_sleep(n) | 
|  | local Munistd = require("posix.unistd") | 
|  | local rc | 
|  | slurm.log_info("sleep for %u seconds", n) | 
|  | rc = Munistd.sleep(n) | 
|  | --rc will be 0 if successful or non-zero for amount of time left | 
|  | --to sleep | 
|  | return rc | 
|  | end | 
|  |  | 
|  | --This commented out function is a wrapper for the posix "sleep" | 
|  | --function in the lua-posix posix.unistd module. | 
|  | function sleep_wrapper(n) | 
|  | return slurm.SUCCESS, "" | 
|  | --local rc, ret_str | 
|  | --rc = posix_sleep(n) | 
|  | --if (rc ~= 0) then | 
|  | --	ret_str = "Sleep interrupted, " .. tostring(rc) .. " seconds left" | 
|  | --	rc = slurm.ERROR | 
|  | --else | 
|  | --	ret_str = "Success" | 
|  | --	rc = slurm.SUCCESS | 
|  | --end | 
|  | --return rc, ret_str | 
|  | end | 
|  |  | 
|  | --[[ | 
|  | --slurm_bb_job_process | 
|  | -- | 
|  | --WARNING: This function is called synchronously from slurmctld and must | 
|  | --return quickly. | 
|  | -- | 
|  | --This function is called on job submission. | 
|  | --This example reads, logs, and returns the job script. | 
|  | --If this function returns an error, the job is rejected and the second return | 
|  | --value (if given) is printed where salloc, sbatch, or srun was called. | 
|  | --]] | 
|  | function slurm_bb_job_process(job_script, uid, gid, job_info) | 
|  | local contents | 
|  | slurm.log_info("slurm_bb_job_process(). job_script=%s, uid=%s, gid=%s", | 
|  | job_script, uid, gid) | 
|  | io.input(job_script) | 
|  | contents = io.read("*all") | 
|  |  | 
|  | return slurm.SUCCESS, contents | 
|  | end | 
|  |  | 
|  | --[[ | 
|  | --slurm_bb_pools | 
|  | -- | 
|  | --WARNING: This function is called from slurmctld and must return quickly. | 
|  | -- | 
|  | --This function is called on slurmctld startup, and then periodically while | 
|  | --slurmctld is running. | 
|  | -- | 
|  | --You may specify "pools" of resources here. If you specify pools, a job may | 
|  | --request a specific pool and the amount it wants from the pool. Slurm will | 
|  | --subtract the job's usage from the pool at slurm_bb_data_in and Slurm will | 
|  | --add the job's usage of those resources back to the pool after | 
|  | --slurm_bb_teardown. | 
|  | --A job may choose not to specify a pool even you pools are provided. | 
|  | --If pools are not returned here, Slurm does not track burst buffer resources | 
|  | --used by jobs. | 
|  | -- | 
|  | --If pools are desired, they must be returned as the second return value | 
|  | --of this function. It must be a single JSON string representing the pools. | 
|  | --]] | 
|  | function slurm_bb_pools() | 
|  | --This commented out code specifies pools in a file: | 
|  | --local pools_file, pools | 
|  | --pools_file = "/path/to/file" | 
|  |  | 
|  | --io.input(pools_file) | 
|  | --pools = io.read("*all") | 
|  | --slurm.log_info("Pools file:\n%s", pools) | 
|  |  | 
|  | --This specifies pools inline: | 
|  | local pools | 
|  | pools ="\ | 
|  | {\ | 
|  | \"pools\":\ | 
|  | [\ | 
|  | { \"id\":\"pool1\", \"quantity\":1000, \"granularity\":1024 },\ | 
|  | { \"id\":\"pool2\", \"quantity\":5, \"granularity\":2 },\ | 
|  | { \"id\":\"pool3\", \"quantity\":4, \"granularity\":1 },\ | 
|  | { \"id\":\"pool4\", \"quantity\":25000, \"granularity\":1 }\ | 
|  | ]\ | 
|  | }" | 
|  |  | 
|  | return slurm.SUCCESS, pools | 
|  | end | 
|  |  | 
|  | --[[ | 
|  | --slurm_bb_job_teardown | 
|  | -- | 
|  | --This function is called asynchronously and is not required to return quickly. | 
|  | --This function is normally called after the job completes (or is cancelled). | 
|  | --]] | 
|  | function slurm_bb_job_teardown(job_id, job_script, hurry, uid, gid) | 
|  | slurm.log_info("slurm_bb_job_teardown(). job id:%s, job script:%s, hurry:%s, uid:%s, gid:%s", | 
|  | job_id, job_script, hurry, uid, gid) | 
|  | local rc, ret_str = sleep_wrapper(1) | 
|  | return rc, ret_str | 
|  | end | 
|  |  | 
|  | --[[ | 
|  | --slurm_bb_setup | 
|  | -- | 
|  | --This function is called asynchronously and is not required to return quickly. | 
|  | --This function is called while the job is pending. | 
|  | --]] | 
|  | function slurm_bb_setup(job_id, uid, gid, pool, bb_size, job_script, job_info) | 
|  | slurm.log_info("slurm_bb_setup(). job id:%s, uid: %s, gid:%s, pool:%s, size:%s, job script:%s", | 
|  | job_id, uid, gid, pool, bb_size, job_script) | 
|  |  | 
|  | return slurm.SUCCESS | 
|  | end | 
|  |  | 
|  | --[[ | 
|  | --slurm_bb_data_in | 
|  | -- | 
|  | --This function is called asynchronously and is not required to return quickly. | 
|  | --This function is called immediately after slurm_bb_setup while the job is | 
|  | --pending. | 
|  | --]] | 
|  | function slurm_bb_data_in(job_id, job_script, uid, gid, job_info) | 
|  | slurm.log_info("slurm_bb_data_in(). job id:%s, job script:%s, uid:%s, gid:%s", | 
|  | job_id, job_script, uid, gid) | 
|  | local rc, ret_str = sleep_wrapper(1) | 
|  | return rc, ret_str | 
|  | end | 
|  |  | 
|  | --[[ | 
|  | --slurm_bb_test_data_in | 
|  | -- | 
|  | --This function is called asynchronously and is not required to return quickly. | 
|  | --This function is called immediately after slurm_bb_data_in while the job is | 
|  | --pending. | 
|  | -- | 
|  | --This function is meant to be used to poll if data_in has completed. | 
|  | --If the first return value is slurm.SUCCESS and the second return value is | 
|  | --"BUSY" (or slurm.SLURM_BB_BUSY), then the job will continue to pend and | 
|  | --this function will continue to be called periodically. | 
|  | --If the first return value is slurm.SUCCESS and the second return value is | 
|  | --empty or any other string besides "BUSY", then job and burst buffer state | 
|  | --will proceed. If the first return value is not slurm.SUCCESS, then the job | 
|  | --will be placed in a held state. | 
|  | -- | 
|  | --If this function returns slurm.SUCCESS, slurm.SLURM_BB_BUSY for longer than | 
|  | --StageInTimeout, then the job will be placed in a held state. | 
|  | --]] | 
|  | function slurm_bb_test_data_in(job_id, job_script, uid, gid, job_info) | 
|  | slurm.log_info("%s: slurm_bb_test_data_in(). job id:%s, job script:%s, uid:%s, gid:%s", | 
|  | lua_script_name, job_id, job_script, uid, gid) | 
|  | local rc, ret_str = sleep_wrapper(1) | 
|  | --return rc, slurm.SLURM_BB_BUSY | 
|  | return rc, ret_str | 
|  | end | 
|  |  | 
|  | --[[ | 
|  | --slurm_bb_real_size | 
|  | -- | 
|  | --This function is called asynchronously and is not required to return quickly. | 
|  | --This function is called immediately after slurm_bb_test_data_in while the job | 
|  | --is pending. | 
|  | -- | 
|  | --This function is only called if pools are specified and the job requested a | 
|  | --pool. This function may return a number (surrounded by quotes to make it a | 
|  | --string) as the second return value. If it does, the job's usage of the pool | 
|  | --will be changed to this number. A commented out example is given. | 
|  | --]] | 
|  | function slurm_bb_real_size(job_id, uid, gid, job_info) | 
|  | slurm.log_info("slurm_bb_real_size(). job id:%s, uid:%s, gid:%s", | 
|  | job_id, uid, gid) | 
|  | --return slurm.SUCCESS, "10000" | 
|  | return slurm.SUCCESS | 
|  | end | 
|  |  | 
|  | --[[ | 
|  | --slurm_bb_paths | 
|  | -- | 
|  | --WARNING: This function is called synchronously from slurmctld and must | 
|  | --return quickly. | 
|  | --This function is called after the job is scheduled but before the | 
|  | --job starts running when the job is in a "running + configuring" state. | 
|  | -- | 
|  | --The file specified by path_file is an empty file. If environment variables are | 
|  | --written to path_file, these environment variables are added to the job's | 
|  | --environment. A commented out example is given. | 
|  | --]] | 
|  | function slurm_bb_paths(job_id, job_script, path_file, uid, gid, job_info) | 
|  | slurm.log_info("slurm_bb_paths(). job id:%s, job script:%s, path file:%s, uid:%s, gid:%s", | 
|  | job_id, job_script, path_file, uid, gid) | 
|  | --io.output(path_file) | 
|  | --io.write("FOO=BAR") | 
|  | return slurm.SUCCESS | 
|  | end | 
|  |  | 
|  | --[[ | 
|  | --slurm_bb_pre_run | 
|  | -- | 
|  | --This function is called asynchronously and is not required to return quickly. | 
|  | --This function is called after the job is scheduled but before the | 
|  | --job starts running when the job is in a "running + configuring" state. | 
|  | --]] | 
|  | function slurm_bb_pre_run(job_id, job_script, uid, gid, job_info) | 
|  | slurm.log_info("slurm_bb_pre_run(). job id:%s, job script:%s, uid:%s, gid:%s", | 
|  | job_id, job_script, uid, gid) | 
|  | local rc, ret_str | 
|  | rc, ret_str = sleep_wrapper(1) | 
|  |  | 
|  | print_job_info(job_info) | 
|  |  | 
|  | -- Generate a list of nodes allocated to the job. | 
|  | -- A hostlist expression of the nodes allocated to the job is in | 
|  | -- job_info["nodes"]. | 
|  | -- scontrol show hostnames expands a hostlist expression to one node | 
|  | -- per line. It does not send an RPC to slurmctld. | 
|  | --[[ | 
|  | local slurm_install_path = "/opt/slurm/install" | 
|  | local scontrol = string.format("%s/bin/scontrol show hostnames %s", | 
|  | slurm_install_path, job_info["nodes"]) | 
|  | slurm.log_info("Running %s", scontrol) | 
|  | local fd = io.popen(scontrol) | 
|  | local nodelist = {} | 
|  |  | 
|  | for node in fd:lines() do | 
|  | nodelist[#nodelist + 1] = node | 
|  | end | 
|  | fd:close() | 
|  |  | 
|  | for i,v in ipairs(nodelist) do | 
|  | slurm.log_info("slurm_bb_pre_run: node(%u)=%s", i, v) | 
|  | end | 
|  | --]] | 
|  |  | 
|  | return rc, ret_str | 
|  | end | 
|  |  | 
|  | --[[ | 
|  | --slurm_bb_post_run | 
|  | -- | 
|  | --This function is called asynchronously and is not required to return quickly. | 
|  | --This function is called after the job finishes. The job is in a "stage out" | 
|  | --state. | 
|  | --]] | 
|  | function slurm_bb_post_run(job_id, job_script, uid, gid, job_info) | 
|  | slurm.log_info("slurm_post_run(). job id:%s, job script:%s, uid:%s, gid:%s", | 
|  | job_id, job_script, uid, gid) | 
|  | local rc, ret_str = sleep_wrapper(1) | 
|  | return rc, ret_str | 
|  | end | 
|  |  | 
|  | --[[ | 
|  | --slurm_bb_data_out | 
|  | -- | 
|  | --This function is called asynchronously and is not required to return quickly. | 
|  | --This function is called after the job finishes immediately after | 
|  | --slurm_bb_post_run. The job is in a "stage out" state. | 
|  | --]] | 
|  | function slurm_bb_data_out(job_id, job_script, uid, gid, job_info) | 
|  | slurm.log_info("slurm_bb_data_out(). job id:%s, job script:%s, uid:%s, gid:%s", | 
|  | job_id, job_script, uid, gid) | 
|  | local rc, ret_str = sleep_wrapper(1) | 
|  | return rc, ret_str | 
|  | end | 
|  |  | 
|  | --[[ | 
|  | --slurm_bb_test_data_out | 
|  | -- | 
|  | --This function is called asynchronously and is not required to return quickly. | 
|  | --This function is called immediately after slurm_bb_data_out while the job is | 
|  | --pending. | 
|  | -- | 
|  | --This function is meant to be used to poll if data_out has completed. | 
|  | --If the first return value is slurm.SUCCESS and the second return value is | 
|  | --"BUSY" (or slurm.SLURM_BB_BUSY), then the job will stay in the completing | 
|  | --state and this function will continue to be called periodically. | 
|  | --If the first return value is slurm.SUCCESS and the second return value is | 
|  | --empty or any other string besides "BUSY", then job and burst buffer state | 
|  | --will proceed. If the first return value is not slurm.SUCCESS, then the job | 
|  | --will be placed in a held state. | 
|  | --]] | 
|  | function slurm_bb_test_data_out(job_id, job_script, uid, gid, job_info) | 
|  | slurm.log_info("%s: slurm_bb_test_data_out(). job id:%s, job script:%s, uid:%s, gid:%s", | 
|  | lua_script_name, job_id, job_script, uid, gid) | 
|  | local rc, ret_str = sleep_wrapper(1) | 
|  | --return rc, slurm.SLURM_BB_BUSY | 
|  | return rc, ret_str | 
|  | end | 
|  |  | 
|  | --[[ | 
|  | --slurm_bb_get_status | 
|  | -- | 
|  | --This function is called asynchronously and is not required to return quickly. | 
|  | -- | 
|  | --This function is called when "scontrol show bbstat" is run. It receives the | 
|  | --authenticated user id and group id of the caller, as well as a variable | 
|  | --number of arguments - whatever arguments are after "bbstat". | 
|  | --For example: | 
|  | -- | 
|  | --  scontrol show bbstat foo bar | 
|  | -- | 
|  | --This command will pass 2 arguments after uid and gid to this function: | 
|  | --  "foo" and "bar". | 
|  | -- | 
|  | --If this function returns slurm.SUCCESS, then this function's second return | 
|  | --value will be printed where the scontrol command was run. If this function | 
|  | --returns slurm.ERROR, then this function's second return value is ignored and | 
|  | --an error message will be printed instead. | 
|  | -- | 
|  | --The example in this function simply returns the arguments that were given. | 
|  | --Example usage: | 
|  | -- | 
|  | --$ scontrol show bbstat foo bar | 
|  | --Status return message. | 
|  | --Args: | 
|  | --arg1 | 
|  | --arg2 | 
|  | --]] | 
|  | function slurm_bb_get_status(uid, gid, ...) | 
|  |  | 
|  | local i, v, args, outstr, arr | 
|  | slurm.log_info("slurm_bb_get_status(), uid: %s, gid:%s", | 
|  | uid, gid) | 
|  |  | 
|  | arr = { } | 
|  | -- Create a table from variable arg list | 
|  | args = {...} | 
|  | args.n = select("#", ...) | 
|  |  | 
|  | for i,v in ipairs(args) do | 
|  | arr[#arr+1] = tostring(v) | 
|  | end | 
|  | outstr = table.concat(arr, "\n") | 
|  |  | 
|  | return slurm.SUCCESS, "Status return message.\nArgs:\n" .. outstr .. "\n" | 
|  | end |