blob: 3fa1a9157861b247828727aa14626ce26d514af1 [file] [log] [blame]
/*****************************************************************************\
* burst_buffer.h - driver for burst buffer infrastructure and plugin
*****************************************************************************
* Copyright (C) SchedMD LLC.
*
* This file is part of Slurm, a resource management program.
* For details, see <https://slurm.schedmd.com/>.
* Please also read the included file: DISCLAIMER.
*
* Slurm is free software; you can redistribute it and/or modify it under
* the terms of the GNU General Public License as published by the Free
* Software Foundation; either version 2 of the License, or (at your option)
* any later version.
*
* In addition, as a special exception, the copyright holders give permission
* to link the code of portions of this program with the OpenSSL library under
* certain conditions as described in each individual source file, and
* distribute linked combinations including the two. You must obey the GNU
* General Public License in all respects for all of the code used other than
* OpenSSL. If you modify file(s) with this exception, you may extend this
* exception to your version of the file(s), but you are not obligated to do
* so. If you do not wish to do so, delete this exception statement from your
* version. If you delete this exception statement from all source files in
* the program, then also delete it here.
*
* Slurm is distributed in the hope that it will be useful, but WITHOUT ANY
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
* details.
*
* You should have received a copy of the GNU General Public License along
* with Slurm; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
\*****************************************************************************/
#ifndef _INTERFACES_BURST_BUFFER_H
#define _INTERFACES_BURST_BUFFER_H
#include "slurm/slurm.h"
#include "src/common/pack.h"
#include "src/slurmctld/slurmctld.h"
/*
* Initialize the burst buffer infrastructure.
*
* Returns a Slurm errno.
*/
extern int bb_g_init(void);
/*
* Terminate the burst buffer infrastructure. Free memory.
*
* Returns a Slurm errno.
*/
extern int bb_g_fini(void);
/*
**************************************************************************
* P L U G I N C A L L S *
**************************************************************************
*/
/*
* Load the current burst buffer state (e.g. how much space is available now).
* Run at the beginning of each scheduling cycle in order to recognize external
* changes to the burst buffer state (e.g. capacity is added, removed, fails,
* etc.)
*
* init_config IN - true if called as part of slurmctld initialization
* Returns a Slurm errno.
*/
extern int bb_g_load_state(bool init_config);
/*
* Return string containing current burst buffer status
* argc IN - count of status command arguments
* argv IN - status command arguments
* uid - authenticated UID
* gid - authenticated GID
* RET status string, release memory using xfree()
*/
extern char *bb_g_get_status(uint32_t argc, char **argv, uint32_t uid,
uint32_t gid);
/*
* Pack current burst buffer state information for network transmission to
* user (e.g. "scontrol show burst")
*
* Returns a Slurm errno.
*/
extern int bb_g_state_pack(uid_t uid, buf_t *buffer, uint16_t protocol_version);
/*
* Note configuration may have changed. Handle changes in BurstBufferParameters.
*
* Returns a Slurm errno.
*/
extern int bb_g_reconfig(void);
/*
* Give the total burst buffer size in MB of a given plugin name (e.g. "cray");.
* If "name" is NULL, return the total space of all burst buffer plugins.
*/
extern uint64_t bb_g_get_system_size(char *name);
/*
* Preliminary validation of a job submit request with respect to burst buffer
* options. Performed after setting default account + qos, but prior to
* establishing job ID or creating script file.
*
* job_desc IN - Job submission request
* submit_uid IN - ID of the user submitting the job.
* err_msg IN/OUT - Message to send to the user in case of error.
* Returns a Slurm errno.
*/
extern int bb_g_job_validate(job_desc_msg_t *job_desc, uid_t submit_uid,
char **err_msg);
/*
* Secondary validation of a job submit request with respect to burst buffer
* options. Performed after establishing job ID and creating script file.
*
* Returns a Slurm errno.
*/
extern int bb_g_job_validate2(job_record_t *job_ptr, char **err_msg);
/*
* Convert a hetjob batch script into a script containing only the portions
* relevant to a specific hetjob component.
*
* script IN - Whole job batch script
* het_job_offset IN - Zero origin hetjob component ID
* RET script for that job component, call xfree() to release memory
*/
extern char *bb_g_build_het_job_script(char *script, uint32_t het_job_offset);
/*
* Fill in the tres_cnt (in MB) based off the job record
* NOTE: Based upon job-specific burst buffers, excludes persistent buffers
* IN job_ptr - job record
* IN/OUT tres_cnt - fill in this already allocated array with tres_cnts
* IN locked - if the assoc_mgr tres read locked is locked or not
*/
extern void bb_g_job_set_tres_cnt(job_record_t *job_ptr, uint64_t *tres_cnt,
bool locked);
/*
* For a given job, return our best guess if when it might be able to start
*/
extern time_t bb_g_job_get_est_start(job_record_t *job_ptr);
/*
* Allocate burst buffers to jobs expected to start soonest
* Job records must be read locked
*
* Returns a Slurm errno.
*/
extern int bb_g_job_try_stage_in(void);
/*
* Determine if a job's burst buffer stage-in is complete
* job_ptr IN - Job to test
* test_only IN - If false, then attempt to load burst buffer if possible
*
* RET: 0 - stage-in is underway
* 1 - stage-in complete
* -1 - stage-in not started or burst buffer in some unexpected state
*/
extern int bb_g_job_test_stage_in(job_record_t *job_ptr, bool test_only);
/* Attempt to claim burst buffer resources.
* At this time, bb_g_job_test_stage_in() should have been run successfully AND
* the compute nodes selected for the job.
*
* Returns a Slurm errno.
*/
extern int bb_g_job_begin(job_record_t *job_ptr);
/* Revoke allocation, but do not release resources.
* Executed after bb_g_job_begin() if there was an allocation failure.
* Does not release previously allocated resources.
*
* Returns a Slurm errno.
*/
extern int bb_g_job_revoke_alloc(job_record_t *job_ptr);
/*
* Trigger a job's burst buffer stage-out to begin
*
* Returns a Slurm errno.
*/
extern int bb_g_job_start_stage_out(job_record_t *job_ptr);
/*
* Determine if a job's burst buffer post_run operation is complete
*
* RET: 0 - post_run is underway
* 1 - post_run complete
* -1 - fatal error
*/
extern int bb_g_job_test_post_run(job_record_t *job_ptr);
/*
* Determine if a job's burst buffer stage-out is complete
*
* RET: 0 - stage-out is underway
* 1 - stage-out complete
* -1 - fatal error
*/
extern int bb_g_job_test_stage_out(job_record_t *job_ptr);
/*
* Terminate any file staging and completely release burst buffer resources
*
* Returns a Slurm errno.
*/
extern int bb_g_job_cancel(job_record_t *job_ptr);
/*
* Run a script in the burst buffer plugin
*
* func IN - script function to run
* jobid IN - job id for which we are running the script (0 if not for a job)
* argc IN - number of arguments to pass to script
* argv IN - argument list to pass to script
* resp_msg OUT - string returned by script
*
* NOTE: This is currently only called for the lua plugin.
*
* Returns the status of the script.
*/
extern int bb_g_run_script(char *func, uint32_t job_id, uint32_t argc,
char **argv, job_info_msg_t *job_info,
char **resp_msg);
/*
* Translate a burst buffer string to it's equivalent TRES string
* (e.g. "cray:2G,generic:4M" -> "1004=2048,1005=4")
* Caller must xfree the return value
*/
extern char *bb_g_xlate_bb_2_tres_str(char *burst_buffer);
#endif