blob: 8eb96d5d3f0d8a778218cfc37198e3c0fcf91de9 [file] [log] [blame] [edit]
/*****************************************************************************\
* node_conf.h - definitions for reading the node part of slurm configuration
* file and work with the corresponding structures
*****************************************************************************
* Copyright (C) 2002-2007 The Regents of the University of California.
* Copyright (C) 2008-2010 Lawrence Livermore National Security.
* Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
* Written by Morris Jette <jette1@llnl.gov> et. al.
* CODE-OCEC-09-009. All rights reserved.
*
* This file is part of SLURM, a resource management program.
* For details, see <http://www.schedmd.com/slurmdocs/>.
* Please also read the included file: DISCLAIMER.
*
* SLURM is free software; you can redistribute it and/or modify it under
* the terms of the GNU General Public License as published by the Free
* Software Foundation; either version 2 of the License, or (at your option)
* any later version.
*
* In addition, as a special exception, the copyright holders give permission
* to link the code of portions of this program with the OpenSSL library under
* certain conditions as described in each individual source file, and
* distribute linked combinations including the two. You must obey the GNU
* General Public License in all respects for all of the code used other than
* OpenSSL. If you modify file(s) with this exception, you may extend this
* exception to your version of the file(s), but you are not obligated to do
* so. If you do not wish to do so, delete this exception statement from your
* version. If you delete this exception statement from all source files in
* the program, then also delete it here.
*
* SLURM is distributed in the hope that it will be useful, but WITHOUT ANY
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
* details.
*
* You should have received a copy of the GNU General Public License along
* with SLURM; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
\*****************************************************************************/
#ifndef _HAVE_NODE_CONF_H
#define _HAVE_NODE_CONF_H
#if HAVE_CONFIG_H
# include "config.h"
# if HAVE_INTTYPES_H
# include <inttypes.h>
# else
# if HAVE_STDINT_H
# include <stdint.h>
# endif
# endif /* HAVE_INTTYPES_H */
#endif
#include <time.h>
#include "src/common/bitstring.h"
#include "src/common/list.h"
#include "src/common/slurm_protocol_defs.h"
#include "src/common/slurm_protocol_socket_common.h"
#define CONFIG_MAGIC 0xc065eded
#define FEATURE_MAGIC 0x34dfd8b5
#define NODE_MAGIC 0x0de575ed
struct config_record {
uint32_t magic; /* magic cookie to test data integrity */
uint16_t cpus; /* count of processors running on the node */
uint16_t sockets; /* number of sockets per node */
uint16_t cores; /* number of cores per CPU */
uint16_t threads; /* number of threads per core */
uint32_t real_memory; /* MB real memory on the node */
uint32_t tmp_disk; /* MB total storage in TMP_FS file system */
uint32_t weight; /* arbitrary priority of node for
* scheduling work on */
char *feature; /* arbitrary list of node's features */
char **feature_array; /* array of feature names */
char *gres; /* arbitrary list of node's generic resources */
char *nodes; /* name of nodes with this configuration */
bitstr_t *node_bitmap; /* bitmap of nodes with this configuration */
};
extern List config_list; /* list of config_record entries */
extern List front_end_list; /* list of slurm_conf_frontend_t entries */
struct features_record {
uint32_t magic; /* magic cookie to test data integrity */
char *name; /* name of a feature */
bitstr_t *node_bitmap; /* bitmap of nodes with this feature */
};
extern List feature_list; /* list of features_record entries */
struct node_record {
uint32_t magic; /* magic cookie for data integrity */
char *name; /* name of the node. NULL==defunct */
char *node_hostname; /* hostname of the node */
uint16_t node_state; /* enum node_states, ORed with
* NODE_STATE_NO_RESPOND if not
* responding */
bool not_responding; /* set if fails to respond,
* clear after logging this */
time_t boot_time; /* Time of node boot,
* computed from up_time */
time_t slurmd_start_time; /* Time of slurmd startup */
time_t last_response; /* last response from the node */
time_t last_idle; /* time node last become idle */
uint16_t cpus; /* count of processors on the node */
uint16_t sockets; /* number of sockets per node */
uint16_t cores; /* number of cores per CPU */
uint16_t threads; /* number of threads per core */
uint32_t real_memory; /* MB real memory on the node */
uint32_t tmp_disk; /* MB total disk in TMP_FS */
uint32_t up_time; /* seconds since node boot */
struct config_record *config_ptr; /* configuration spec ptr */
uint16_t part_cnt; /* number of associated partitions */
struct part_record **part_pptr; /* array of pointers to partitions
* associated with this node*/
char *comm_name; /* communications path name to node */
uint16_t port; /* TCP port number of the slurmd */
slurm_addr_t slurm_addr; /* network address */
uint16_t comp_job_cnt; /* count of jobs completing on node */
uint16_t run_job_cnt; /* count of jobs running on node */
uint16_t sus_job_cnt; /* count of jobs suspended on node */
uint16_t no_share_job_cnt; /* count of jobs running that will
* not share nodes */
char *reason; /* why a node is DOWN or DRAINING */
time_t reason_time; /* Time stamp when reason was
* set, ignore if no reason is set. */
uint32_t reason_uid; /* User that set the reason, ignore if
* no reason is set. */
char *features; /* node's features, used only
* for state save/restore, DO NOT
* use for scheduling purposes */
char *gres; /* node's generic resources, used only
* for state save/restore, DO NOT
* use for scheduling purposes */
List gres_list; /* list of gres state info managed by
* plugins */
uint32_t weight; /* orignal weight, used only for state
* save/restore, DO NOT use for
* scheduling purposes. */
char *arch; /* computer architecture */
char *os; /* operating system now running */
struct node_record *node_next; /* next entry with same hash index */
uint32_t node_rank; /* Hilbert number based on node name,
* or other sequence number used to
* order nodes by location,
* no need to save/restore */
#ifdef HAVE_CRAY
uint32_t basil_node_id; /* Cray-XT BASIL node ID,
* no need to save/restore */
time_t down_time; /* When first set to DOWN state */
#endif /* HAVE_CRAY */
dynamic_plugin_data_t *select_nodeinfo; /* opaque data structure,
* use select_g_get_nodeinfo()
* to access contents */
};
extern struct node_record *node_record_table_ptr; /* ptr to node records */
extern int node_record_count; /* count in node_record_table_ptr */
extern time_t last_node_update; /* time of last node record update */
/*
* bitmap2node_name_sortable - given a bitmap, build a list of comma
* separated node names. names may include regular expressions
* (e.g. "lx[01-10]")
* IN bitmap - bitmap pointer
* IN sort - returned ordered list or not
* RET pointer to node list or NULL on error
* globals: node_record_table_ptr - pointer to node table
* NOTE: the caller must xfree the memory at node_list when no longer required
*/
char * bitmap2node_name_sortable (bitstr_t *bitmap, bool sort);
/*
* bitmap2node_name - given a bitmap, build a list of comma separated node
* names. names may include regular expressions (e.g. "lx[01-10]")
* IN bitmap - bitmap pointer
* RET pointer to node list or NULL on error
* globals: node_record_table_ptr - pointer to node table
* NOTE: the caller must xfree the memory at node_list when no longer required
*/
char * bitmap2node_name (bitstr_t *bitmap);
/*
* build_all_nodeline_info - get a array of slurm_conf_node_t structures
* from the slurm.conf reader, build table, and set values
* IN set_bitmap - if true, set node_bitmap in config record (used by slurmd)
* RET 0 if no error, error code otherwise
*/
extern int build_all_nodeline_info (bool set_bitmap);
/*
* build_all_frontend_info - get a array of slurm_conf_frontend_t structures
* from the slurm.conf reader, build table, and set values
* is_slurmd_context: set to true if run from slurmd
* RET 0 if no error, error code otherwise
*/
extern int build_all_frontend_info (bool is_slurmd_context);
/* Given a config_record with it's bitmap already set, update feature_list */
extern void build_config_feature_list (struct config_record *config_ptr);
/*
* create_config_record - create a config_record entry and set is values to
* the defaults. each config record corresponds to a line in the
* slurm.conf file and typically describes the configuration of a
* large number of nodes
* RET pointer to the config_record
* NOTE: memory allocated will remain in existence until
* _delete_config_record() is called to delete all configuration records
*/
extern struct config_record *create_config_record (void);
/*
* create_node_record - create a node record and set its values to defaults
* IN config_ptr - pointer to node's configuration information
* IN node_name - name of the node
* RET pointer to the record or NULL if error
* NOTE: allocates memory at node_record_table_ptr that must be xfreed when
* the global node table is no longer required
*/
extern struct node_record *create_node_record (
struct config_record *config_ptr, char *node_name);
/*
* find_node_record - find a record for node with specified name
* input: name - name of the desired node
* output: return pointer to node record or NULL if not found
* node_hash_table - table of hash indecies
*/
extern struct node_record *find_node_record (char *name);
/*
* init_node_conf - initialize the node configuration tables and values.
* this should be called before creating any node or configuration
* entries.
* RET 0 if no error, otherwise an error code
*/
extern int init_node_conf (void);
/* node_fini2 - free memory associated with node records (except bitmaps) */
extern void node_fini2 (void);
/*
* node_name2bitmap - given a node name regular expression, build a bitmap
* representation
* IN node_names - list of nodes
* IN best_effort - if set don't return an error on invalid node name entries
* OUT bitmap - set to bitmap, may not have all bits set on error
* RET 0 if no error, otherwise EINVAL
* NOTE: the caller must bit_free() memory at bitmap when no longer required
*/
extern int node_name2bitmap (char *node_names, bool best_effort,
bitstr_t **bitmap);
/* Purge the contents of a node record */
extern void purge_node_rec (struct node_record *node_ptr);
/*
* rehash_node - build a hash table of the node_record entries.
* NOTE: manages memory for node_hash_table
*/
extern void rehash_node (void);
/* Convert a node state string to it's equivalent enum value */
extern int state_str2int(const char *state_str, char *node_name);
#endif /* !_HAVE_NODE_CONF_H */