blob: 1cdcec1de27433848f2d6da1b49c134577aded71 [file] [log] [blame]
/*****************************************************************************\
* node_scheduler.h - definitions of functions in node_scheduler.c
*****************************************************************************
* Copyright (C) 2004-2007 The Regents of the University of California.
* Copyright (C) 2008 Lawrence Livermore National Security.
* Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
* Written by Morris Jette <jette@llnl.gov> et. al.
* CODE-OCEC-09-009. All rights reserved.
*
* This file is part of Slurm, a resource management program.
* For details, see <https://slurm.schedmd.com/>.
* Please also read the included file: DISCLAIMER.
*
* Slurm is free software; you can redistribute it and/or modify it under
* the terms of the GNU General Public License as published by the Free
* Software Foundation; either version 2 of the License, or (at your option)
* any later version.
*
* In addition, as a special exception, the copyright holders give permission
* to link the code of portions of this program with the OpenSSL library under
* certain conditions as described in each individual source file, and
* distribute linked combinations including the two. You must obey the GNU
* General Public License in all respects for all of the code used other than
* OpenSSL. If you modify file(s) with this exception, you may extend this
* exception to your version of the file(s), but you are not obligated to do
* so. If you do not wish to do so, delete this exception statement from your
* version. If you delete this exception statement from all source files in
* the program, then also delete it here.
*
* Slurm is distributed in the hope that it will be useful, but WITHOUT ANY
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
* details.
*
* You should have received a copy of the GNU General Public License along
* with Slurm; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
\*****************************************************************************/
#ifndef _HAVE_NODE_SCHEDULER_H
#define _HAVE_NODE_SCHEDULER_H
typedef struct {
char **err_msg;
job_record_t *job_ptr;
int rc;
int rc_best;
int rc_part_limits;
int rc_qos;
int rc_resv;
bool test_only;
} job_node_select_t;
/*
* allocate_nodes - change state of specified nodes to NODE_STATE_ALLOCATED
* also claim required licenses
* IN job_ptr - job being allocated resources
*/
extern void allocate_nodes(job_record_t *job_ptr);
/* For a given job, if the available nodes differ from those with currently
* active features, return a bitmap of nodes with the job's required
* features currently active
* IN job_ptr - job requesting resource allocation
* IN avail_bitmap - nodes currently available for this job
* OUT active_bitmap - nodes with job's features currently active, NULL if
* identical to avail_bitmap
* NOTE: Currently supports only simple AND of features
*/
extern void build_active_feature_bitmap(job_record_t *job_ptr,
bitstr_t *avail_bitmap,
bitstr_t **active_bitmap);
/* Return bitmap of nodes with all specified features currently active */
extern bitstr_t *build_active_feature_bitmap2(char *reboot_features);
/*
* build_node_details - sets addresses for allocated nodes
* IN job_ptr - pointer to a job record
* IN new_alloc - set if new job allocation, cleared if state recovery
*/
extern void build_node_details(job_record_t *job_ptr, bool new_alloc);
/*
* deallocate_nodes - for a given job, deallocate its nodes and make
* their state NODE_STATE_COMPLETING
* also release the job's licenses
* IN job_ptr - pointer to terminating job (already in some COMPLETING state)
* IN timeout - true if job exhausted time limit, send REQUEST_KILL_TIMELIMIT
* RPC instead of REQUEST_TERMINATE_JOB
* IN suspended - true if job was already suspended (node's run_job_cnt
* already decremented);
* IN preempted - true if job is being preempted
*/
extern void deallocate_nodes(job_record_t *job_ptr, bool timeout,
bool suspended, bool preempted);
/* Remove nodes from consideration for allocation based upon "mcs" by
* other users
* job_ptr IN - Job to be scheduled
* usable_node_mask IN/OUT - Nodes available for use by this job's mcs
*/
extern void filter_by_node_mcs(job_record_t *job_ptr, int mcs_select,
bitstr_t *usable_node_mask);
/* Remove nodes from consideration for allocation based upon "ownership" by
* other users
* job_ptr IN - Job to be scheduled
* usable_node_mask IN/OUT - Nodes available for use by this job's user
*/
extern void filter_by_node_owner(job_record_t *job_ptr,
bitstr_t *usable_node_mask);
/*
* For every element in the feature_list, identify the nodes with that feature
* either active or available and set the feature_list's node_bitmap_active and
* node_bitmap_avail fields accordingly.
*/
extern void find_feature_nodes(list_t *feature_list, bool can_reboot);
/*
* re_kill_job - for a given job, deallocate its nodes for a second time,
* basically a cleanup for failed deallocate() calls
* IN job_ptr - pointer to terminating job (already in some COMPLETING state)
* globals: node_record_count - number of nodes in the system
* node_record_table_ptr - pointer to global node table
*/
extern void re_kill_job(job_record_t *job_ptr);
/*
* select_nodes - select and allocate nodes to a specific job
* IN job_node_select - pointer with at least a pointer to the job record
* IN test_only - if set do not allocate nodes, just confirm they
* could be allocated now
* IN submission - if set ignore reservations
* IN scheduler_type - which scheduler is calling this
* (i.e. SLURMDB_JOB_FLAG_BACKFILL, SLURMDB_JOB_FLAG_SCHED, etc)
* RET 0 on success, ESLURM code from slurm_errno.h otherwise
* globals: list_part - global list of partition info
* default_part_loc - pointer to default partition
* config_list - global list of node configuration info
* Notes: The algorithm is
* 1) Build a table (node_set_ptr) of nodes with the requisite
* configuration. Each table entry includes their weight,
* node_list, features, etc.
* 2) Call _pick_best_nodes() to select those nodes best satisfying
* the request, (e.g. best-fit or other criterion)
* 3) Call allocate_nodes() to perform the actual allocation
*/
extern int select_nodes(job_node_select_t *job_node_select,
bool test_only, bool submission,
uint32_t scheduler_type);
/*
* get_node_cnts - determine the number of nodes for the requested job.
* IN job_ptr - pointer to the job record.
* IN qos_flags - Flags of the job_ptr's qos. This is so we don't have to send
* in a pointer or lock the qos read lock before calling.
* IN part_ptr - pointer to the job's partition.
* OUT min_nodes - The minimum number of nodes for the job.
* OUT req_nodes - The number of node the select plugin should target.
* OUT max_nodes - The max number of nodes for the job.
* RET SLURM_SUCCESS on success, ESLURM code from slurm_errno.h otherwise.
*/
extern int get_node_cnts(job_record_t *job_ptr, uint32_t qos_flags,
part_record_t *part_ptr, uint32_t *min_nodes,
uint32_t *req_nodes, uint32_t *max_nodes);
/* launch_prolog - launch job prolog script by slurmd on allocated nodes
* IN job_ptr - pointer to the job record
*/
extern void launch_prolog(job_record_t *job_ptr);
/*
* valid_feature_counts - validate a job's features can be satisfied
* by the selected nodes (NOTE: does not process MOR or XAND operators)
* IN job_ptr - job to operate on
* IN use_active - if set, then only consider nodes with the identified features
* active, otherwise use available features
* IN/OUT node_bitmap - nodes available for use, clear if unusable
* OUT has_mor - set if MOR/XAND found in feature expression
* RET SLURM_SUCCESS or error
*/
extern int valid_feature_counts(job_record_t *job_ptr, bool use_active,
bitstr_t *node_bitmap, bool *has_mor);
#endif /* !_HAVE_NODE_SCHEDULER_H */