blob: 7748d52ecd42a62d8c7efeb9bc224d732f245558 [file] [log] [blame]
/*****************************************************************************\
* read_config.c - read the overall slurm configuration file
*****************************************************************************
* Copyright (C) 2002-2006 The Regents of the University of California.
* Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
* Written by Morris Jette <jette1@llnl.gov>.
* UCRL-CODE-226842.
*
* This file is part of SLURM, a resource management program.
* For details, see <http://www.llnl.gov/linux/slurm/>.
*
* SLURM is free software; you can redistribute it and/or modify it under
* the terms of the GNU General Public License as published by the Free
* Software Foundation; either version 2 of the License, or (at your option)
* any later version.
*
* In addition, as a special exception, the copyright holders give permission
* to link the code of portions of this program with the OpenSSL library under
* certain conditions as described in each individual source file, and
* distribute linked combinations including the two. You must obey the GNU
* General Public License in all respects for all of the code used other than
* OpenSSL. If you modify file(s) with this exception, you may extend this
* exception to your version of the file(s), but you are not obligated to do
* so. If you do not wish to do so, delete this exception statement from your
* version. If you delete this exception statement from all source files in
* the program, then also delete it here.
*
* SLURM is distributed in the hope that it will be useful, but WITHOUT ANY
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
* details.
*
* You should have received a copy of the GNU General Public License along
* with SLURM; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
\*****************************************************************************/
#ifdef HAVE_CONFIG_H
# include "config.h"
#endif
#include <assert.h>
#include <ctype.h>
#include <errno.h>
#include <pwd.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/types.h>
#include <time.h>
#include <unistd.h>
#include <pthread.h>
#include <slurm/slurm.h>
#include "src/common/hostlist.h"
#include "src/common/slurm_protocol_defs.h"
#include "src/common/slurm_protocol_api.h"
#include "src/common/log.h"
#include "src/common/macros.h"
#include "src/common/parse_spec.h"
#include "src/common/read_config.h"
#include "src/common/xmalloc.h"
#include "src/common/xstring.h"
#include "src/common/slurm_rlimits_info.h"
#include "src/common/parse_config.h"
#include "src/common/slurm_selecttype_info.h"
#include "src/common/uid.h"
/* Instantiation of the "extern slurm_ctl_conf_t slurmcltd_conf"
* found in slurmctld.h */
slurm_ctl_conf_t slurmctld_conf;
static pthread_mutex_t conf_lock = PTHREAD_MUTEX_INITIALIZER;
static s_p_hashtbl_t *conf_hashtbl = NULL;
static slurm_ctl_conf_t *conf_ptr = &slurmctld_conf;
static bool conf_initialized = false;
static s_p_hashtbl_t *default_nodename_tbl;
static s_p_hashtbl_t *default_partition_tbl;
inline static void _normalize_debug_level(uint16_t *level);
static void _init_slurm_conf(const char *file_name);
#define NAME_HASH_LEN 512
typedef struct names_ll_s {
char *alias; /* NodeName */
char *hostname; /* NodeHostname */
char *address; /* NodeAddr */
uint16_t port;
uint16_t cpus;
uint16_t sockets;
uint16_t cores;
uint16_t threads;
slurm_addr addr;
bool addr_initialized;
struct names_ll_s *next_alias;
struct names_ll_s *next_hostname;
} names_ll_t;
bool nodehash_initialized = false;
static names_ll_t *host_to_node_hashtbl[NAME_HASH_LEN] = {NULL};
static names_ll_t *node_to_host_hashtbl[NAME_HASH_LEN] = {NULL};
static int parse_nodename(void **dest, slurm_parser_enum_t type,
const char *key, const char *value,
const char *line, char **leftover);
static void destroy_nodename(void *ptr);
static int parse_partitionname(void **dest, slurm_parser_enum_t type,
const char *key, const char *value,
const char *line, char **leftover);
static void destroy_partitionname(void *ptr);
static int parse_downnodes(void **dest, slurm_parser_enum_t type,
const char *key, const char *value,
const char *line, char **leftover);
static void destroy_downnodes(void *ptr);
static int defunct_option(void **dest, slurm_parser_enum_t type,
const char *key, const char *value,
const char *line, char **leftover);
static void validate_and_set_defaults(slurm_ctl_conf_t *conf,
s_p_hashtbl_t *hashtbl);
s_p_options_t slurm_conf_options[] = {
{"AuthType", S_P_STRING},
{"CheckpointType", S_P_STRING},
{"CacheGroups", S_P_UINT16},
{"BackupAddr", S_P_STRING},
{"BackupController", S_P_STRING},
{"ControlAddr", S_P_STRING},
{"ControlMachine", S_P_STRING},
{"DisableRootJobs", S_P_BOOLEAN},
{"Epilog", S_P_STRING},
{"FastSchedule", S_P_UINT16},
{"FirstJobId", S_P_UINT32},
{"HashBase", S_P_LONG, defunct_option},
{"HeartbeatInterval", S_P_LONG, defunct_option},
{"InactiveLimit", S_P_UINT16},
{"JobAcctLogFile", S_P_STRING},
{"JobAcctFrequency", S_P_UINT16},
{"JobAcctType", S_P_STRING},
{"JobCompLoc", S_P_STRING},
{"JobCompType", S_P_STRING},
{"JobCredentialPrivateKey", S_P_STRING},
{"JobCredentialPublicCertificate", S_P_STRING},
{"JobFileAppend", S_P_UINT16},
{"GetEnvTimeout", S_P_UINT16},
{"KillTree", S_P_UINT16, defunct_option},
{"KillWait", S_P_UINT16},
{"MailProg", S_P_STRING},
{"MaxJobCount", S_P_UINT16},
{"MessageTimeout", S_P_UINT16},
{"MinJobAge", S_P_UINT16},
{"MpichGmDirectSupport", S_P_LONG},
{"MpiDefault", S_P_STRING},
{"PluginDir", S_P_STRING},
{"PlugStackConfig", S_P_STRING},
{"ProctrackType", S_P_STRING},
{"Prolog", S_P_STRING},
{"PropagatePrioProcess", S_P_UINT16},
{"PropagateResourceLimitsExcept", S_P_STRING},
{"PropagateResourceLimits", S_P_STRING},
{"ReturnToService", S_P_UINT16},
{"SchedulerAuth", S_P_STRING},
{"SchedulerPort", S_P_UINT16},
{"SchedulerRootFilter", S_P_UINT16},
{"SchedulerType", S_P_STRING},
{"SelectType", S_P_STRING},
{"SelectTypeParameters", S_P_STRING},
{"SlurmUser", S_P_STRING},
{"SlurmctldDebug", S_P_UINT16},
{"SlurmctldLogFile", S_P_STRING},
{"SlurmctldPidFile", S_P_STRING},
{"SlurmctldPort", S_P_UINT32},
{"SlurmctldTimeout", S_P_UINT16},
{"SlurmdDebug", S_P_UINT16},
{"SlurmdLogFile", S_P_STRING},
{"SlurmdPidFile", S_P_STRING},
{"SlurmdPort", S_P_UINT32},
{"SlurmdSpoolDir", S_P_STRING},
{"SlurmdTimeout", S_P_UINT16},
{"SrunEpilog", S_P_STRING},
{"SrunProlog", S_P_STRING},
{"StateSaveLocation", S_P_STRING},
{"SwitchType", S_P_STRING},
{"TaskEpilog", S_P_STRING},
{"TaskProlog", S_P_STRING},
{"TaskPlugin", S_P_STRING},
{"TaskPluginParam", S_P_STRING},
{"TmpFS", S_P_STRING},
{"TreeWidth", S_P_UINT16},
{"UnkillableStepProgram", S_P_STRING},
{"UnkillableStepTimeout", S_P_UINT16},
{"UsePAM", S_P_BOOLEAN},
{"WaitTime", S_P_UINT16},
{"NodeName", S_P_ARRAY, parse_nodename, destroy_nodename},
{"PartitionName", S_P_ARRAY, parse_partitionname, destroy_partitionname},
{"DownNodes", S_P_ARRAY, parse_downnodes, destroy_downnodes},
{NULL}
};
static int defunct_option(void **dest, slurm_parser_enum_t type,
const char *key, const char *value,
const char *line, char **leftover)
{
error("The option \"%s\" is defunct, see man slurm.conf.", key);
return 0;
}
static int parse_nodename(void **dest, slurm_parser_enum_t type,
const char *key, const char *value,
const char *line, char **leftover)
{
s_p_hashtbl_t *tbl, *dflt;
slurm_conf_node_t *n;
static s_p_options_t _nodename_options[] = {
{"NodeHostname", S_P_STRING},
{"NodeAddr", S_P_STRING},
{"CoresPerSocket", S_P_UINT16},
{"Feature", S_P_STRING},
{"Port", S_P_UINT16},
{"Procs", S_P_UINT16},
{"RealMemory", S_P_UINT32},
{"Reason", S_P_STRING},
{"Sockets", S_P_UINT16},
{"State", S_P_STRING},
{"ThreadsPerCore", S_P_UINT16},
{"TmpDisk", S_P_UINT32},
{"Weight", S_P_UINT32},
{NULL}
};
tbl = s_p_hashtbl_create(_nodename_options);
s_p_parse_line(tbl, *leftover, leftover);
/* s_p_dump_values(tbl, _nodename_options); */
if (strcasecmp(value, "DEFAULT") == 0) {
char *tmp;
if (s_p_get_string(&tmp, "NodeHostname", tbl)) {
error("NodeHostname not allowed with NodeName=DEFAULT");
xfree(tmp);
s_p_hashtbl_destroy(tbl);
return -1;
}
if (s_p_get_string(&tmp, "NodeAddr", tbl)) {
error("NodeAddr not allowed with NodeName=DEFAULT");
xfree(tmp);
s_p_hashtbl_destroy(tbl);
return -1;
}
if (default_nodename_tbl != NULL)
s_p_hashtbl_destroy(default_nodename_tbl);
default_nodename_tbl = tbl;
return 0;
} else {
bool no_cpus = false;
bool no_sockets = false;
bool no_cores = false;
bool no_threads = false;
n = xmalloc(sizeof(slurm_conf_node_t));
dflt = default_nodename_tbl;
n->nodenames = xstrdup(value);
if (!s_p_get_string(&n->hostnames, "NodeHostname", tbl))
n->hostnames = xstrdup(n->nodenames);
if (!s_p_get_string(&n->addresses, "NodeAddr", tbl))
n->addresses = xstrdup(n->hostnames);
if (!s_p_get_uint16(&n->cores, "CoresPerSocket", tbl)
&& !s_p_get_uint16(&n->cores, "CoresPerSocket", dflt)) {
n->cores = 1;
no_cores = true;
}
if (!s_p_get_string(&n->feature, "Feature", tbl))
s_p_get_string(&n->feature, "Feature", dflt);
if (!s_p_get_uint16(&n->port, "Port", tbl)
&& !s_p_get_uint16(&n->port, "Port", dflt)) {
/* This gets resolved in slurm_conf_get_port()
* and slurm_conf_get_addr(). For now just
* leave with a value of zero */
n->port = 0;
}
if (!s_p_get_uint16(&n->cpus, "Procs", tbl)
&& !s_p_get_uint16(&n->cpus, "Procs", dflt)) {
n->cpus = 1;
no_cpus = true;
}
if (!s_p_get_uint32(&n->real_memory, "RealMemory", tbl)
&& !s_p_get_uint32(&n->real_memory, "RealMemory", dflt))
n->real_memory = 1;
if (!s_p_get_string(&n->reason, "Reason", tbl))
s_p_get_string(&n->reason, "Reason", dflt);
if (!s_p_get_uint16(&n->sockets, "Sockets", tbl)
&& !s_p_get_uint16(&n->sockets, "Sockets", dflt)) {
n->sockets = 1;
no_sockets = true;
}
if (!s_p_get_string(&n->state, "State", tbl)
&& !s_p_get_string(&n->state, "State", dflt))
n->state = NULL;
if (!s_p_get_uint16(&n->threads, "ThreadsPerCore", tbl)
&& !s_p_get_uint16(&n->threads, "ThreadsPerCore", dflt)) {
n->threads = 1;
no_threads = true;
}
if (!s_p_get_uint32(&n->tmp_disk, "TmpDisk", tbl)
&& !s_p_get_uint32(&n->tmp_disk, "TmpDisk", dflt))
n->tmp_disk = 0;
if (!s_p_get_uint32(&n->weight, "Weight", tbl)
&& !s_p_get_uint32(&n->weight, "Weight", dflt))
n->weight = 1;
s_p_hashtbl_destroy(tbl);
if (n->cores == 0) /* make sure cores is non-zero */
n->cores = 1;
if (n->threads == 0) /* make sure threads is non-zero */
n->threads = 1;
if (!no_cpus && /* infer missing Sockets= */
no_sockets) {
n->sockets = n->cpus / (n->cores * n->threads);
}
if (n->sockets == 0) /* make sure sockets is non-zero */
n->sockets = 1;
if (no_cpus && /* infer missing Procs= */
!no_sockets) {
n->cpus = n->sockets * n->cores * n->threads;
}
/* if only Procs= and Sockets= specified check for match */
if (!no_cpus &&
!no_sockets &&
no_cores &&
no_threads) {
if (n->cpus != n->sockets) {
n->sockets = n->cpus;
error("Procs doesn't match Sockets, "
"setting Sockets to %d",
n->sockets);
}
}
*dest = (void *)n;
return 1;
}
/* should not get here */
}
static void destroy_nodename(void *ptr)
{
slurm_conf_node_t *n = (slurm_conf_node_t *)ptr;
xfree(n->nodenames);
xfree(n->hostnames);
xfree(n->addresses);
xfree(n->feature);
xfree(n->reason);
xfree(n->state);
xfree(ptr);
}
int slurm_conf_nodename_array(slurm_conf_node_t **ptr_array[])
{
int count;
slurm_conf_node_t **ptr;
if (s_p_get_array((void ***)&ptr, &count, "NodeName", conf_hashtbl)) {
*ptr_array = ptr;
return count;
} else {
*ptr_array = NULL;
return 0;
}
}
static int parse_partitionname(void **dest, slurm_parser_enum_t type,
const char *key, const char *value,
const char *line, char **leftover)
{
s_p_hashtbl_t *tbl, *dflt;
slurm_conf_partition_t *p;
char *tmp = NULL;
static s_p_options_t _partition_options[] = {
{"AllowGroups", S_P_STRING},
{"Default", S_P_BOOLEAN}, /* YES or NO */
{"DisableRootJobs", S_P_BOOLEAN}, /* YES or NO */
{"Hidden", S_P_BOOLEAN}, /* YES or NO */
{"MaxTime", S_P_UINT32}, /* INFINITE or a number */
{"MaxNodes", S_P_UINT32}, /* INFINITE or a number */
{"MinNodes", S_P_UINT32},
{"Nodes", S_P_STRING},
{"RootOnly", S_P_BOOLEAN}, /* YES or NO */
{"Shared", S_P_STRING}, /* YES, NO, or FORCE */
{"State", S_P_BOOLEAN}, /* UP or DOWN */
{NULL}
};
tbl = s_p_hashtbl_create(_partition_options);
s_p_parse_line(tbl, *leftover, leftover);
/* s_p_dump_values(tbl, _partition_options); */
if (strcasecmp(value, "DEFAULT") == 0) {
if (default_partition_tbl != NULL)
s_p_hashtbl_destroy(default_partition_tbl);
default_partition_tbl = tbl;
return 0;
} else {
p = xmalloc(sizeof(slurm_conf_partition_t));
dflt = default_partition_tbl;
p->name = xstrdup(value);
if (!s_p_get_string(&p->allow_groups, "AllowGroups", tbl))
s_p_get_string(&p->allow_groups, "AllowGroups", dflt);
if (p->allow_groups && strcasecmp(p->allow_groups, "ALL")==0) {
xfree(p->allow_groups);
p->allow_groups = NULL; /* NULL means allow all */
}
if (!s_p_get_boolean(&p->default_flag, "Default", tbl)
&& !s_p_get_boolean(&p->default_flag, "Default", dflt))
p->default_flag = false;
if (!s_p_get_boolean((bool *)&p->disable_root_jobs,
"DisableRootJobs", tbl))
p->disable_root_jobs = (uint16_t)NO_VAL;
if (!s_p_get_boolean(&p->hidden_flag, "Hidden", tbl)
&& !s_p_get_boolean(&p->hidden_flag, "Hidden", dflt))
p->hidden_flag = false;
if (!s_p_get_uint32(&p->max_time, "MaxTime", tbl)
&& !s_p_get_uint32(&p->max_time, "MaxTime", dflt))
p->max_time = INFINITE;
if (!s_p_get_uint32(&p->max_nodes, "MaxNodes", tbl)
&& !s_p_get_uint32(&p->max_nodes, "MaxNodes", dflt))
p->max_nodes = INFINITE;
if (!s_p_get_uint32(&p->min_nodes, "MinNodes", tbl)
&& !s_p_get_uint32(&p->min_nodes, "MinNodes", dflt))
p->min_nodes = 1;
if (!s_p_get_string(&p->nodes, "Nodes", tbl)
&& !s_p_get_string(&p->nodes, "Nodes", dflt))
p->nodes = NULL;
else {
int i;
for (i=0; p->nodes[i]; i++) {
if (isspace(p->nodes[i]))
p->nodes[i] = ',';
}
}
if (!s_p_get_boolean(&p->root_only_flag, "RootOnly", tbl)
&& !s_p_get_boolean(&p->root_only_flag, "RootOnly", dflt))
p->root_only_flag = false;
if (!s_p_get_string(&tmp, "Shared", tbl)
&& !s_p_get_string(&tmp, "Shared", dflt)) {
p->shared = SHARED_NO;
} else {
if (strcasecmp(tmp, "NO") == 0)
p->shared = SHARED_NO;
#ifndef HAVE_XCPU
/* Only "Shared=NO" is valid on XCPU systems */
else if (strcasecmp(tmp, "YES") == 0)
p->shared = SHARED_YES;
else if (strcasecmp(tmp, "EXCLUSIVE") == 0)
p->shared = SHARED_EXCLUSIVE;
else if (strcasecmp(tmp, "FORCE") == 0)
p->shared = SHARED_FORCE;
#endif
else {
error("Bad value \"%s\" for Shared", tmp);
destroy_partitionname(p);
s_p_hashtbl_destroy(tbl);
xfree(tmp);
return -1;
}
}
xfree(tmp);
if (!s_p_get_boolean(&p->state_up_flag, "State", tbl)
&& !s_p_get_boolean(&p->state_up_flag, "State", dflt))
p->state_up_flag = true;
s_p_hashtbl_destroy(tbl);
*dest = (void *)p;
return 1;
}
/* should not get here */
}
static void destroy_partitionname(void *ptr)
{
slurm_conf_partition_t *p = (slurm_conf_partition_t *)ptr;
xfree(p->name);
xfree(p->nodes);
xfree(p->allow_groups);
xfree(ptr);
}
int slurm_conf_partition_array(slurm_conf_partition_t **ptr_array[])
{
int count;
slurm_conf_partition_t **ptr;
if (s_p_get_array((void ***)&ptr, &count, "PartitionName",
conf_hashtbl)) {
*ptr_array = ptr;
return count;
} else {
*ptr_array = NULL;
return 0;
}
}
static int parse_downnodes(void **dest, slurm_parser_enum_t type,
const char *key, const char *value,
const char *line, char **leftover)
{
s_p_hashtbl_t *tbl, *dflt;
slurm_conf_downnodes_t *n;
static s_p_options_t _downnodes_options[] = {
{"Reason", S_P_STRING},
{"State", S_P_STRING},
{NULL}
};
tbl = s_p_hashtbl_create(_downnodes_options);
s_p_parse_line(tbl, *leftover, leftover);
/* s_p_dump_values(tbl, _downnodes_options); */
n = xmalloc(sizeof(slurm_conf_node_t));
dflt = default_nodename_tbl;
n->nodenames = xstrdup(value);
if (!s_p_get_string(&n->reason, "Reason", tbl))
n->reason = xstrdup("Set in slurm.conf");
if (!s_p_get_string(&n->state, "State", tbl))
n->state = NULL;
s_p_hashtbl_destroy(tbl);
*dest = (void *)n;
return 1;
}
static void destroy_downnodes(void *ptr)
{
slurm_conf_downnodes_t *n = (slurm_conf_downnodes_t *)ptr;
xfree(n->nodenames);
xfree(n->reason);
xfree(n->state);
xfree(ptr);
}
int slurm_conf_downnodes_array(slurm_conf_downnodes_t **ptr_array[])
{
int count;
slurm_conf_downnodes_t **ptr;
if (s_p_get_array((void ***)&ptr, &count, "DownNodes", conf_hashtbl)) {
*ptr_array = ptr;
return count;
} else {
*ptr_array = NULL;
return 0;
}
}
static void _free_name_hashtbl()
{
int i;
names_ll_t *p, *q;
for (i=0; i<NAME_HASH_LEN; i++) {
p = node_to_host_hashtbl[i];
while (p) {
xfree(p->alias);
xfree(p->hostname);
xfree(p->address);
q = p->next_alias;
xfree(p);
p = q;
}
node_to_host_hashtbl[i] = NULL;
host_to_node_hashtbl[i] = NULL;
}
nodehash_initialized = false;
}
static void _init_name_hashtbl()
{
return;
}
static int _get_hash_idx(const char *s)
{
int i;
i = 0;
while (*s) i += (int)*s++;
return i % NAME_HASH_LEN;
}
static void _push_to_hashtbls(char *alias, char *hostname,
char *address, uint16_t port,
uint16_t cpus, uint16_t sockets,
uint16_t cores, uint16_t threads)
{
int hostname_idx, alias_idx;
names_ll_t *p, *new;
alias_idx = _get_hash_idx(alias);
hostname_idx = _get_hash_idx(hostname);
#if !defined(HAVE_FRONT_END) && !defined(MULTIPLE_SLURMD)
/* Ensure only one slurmd configured on each host */
p = host_to_node_hashtbl[hostname_idx];
while (p) {
if (strcmp(p->hostname, hostname)==0) {
error("Duplicated NodeHostname %s in the config file",
hostname);
return;
}
p = p->next_hostname;
}
#endif
/* Ensure only one instance of each NodeName */
p = node_to_host_hashtbl[alias_idx];
while (p) {
if (strcmp(p->alias, alias)==0) {
fatal("Duplicated NodeName %s in the config file",
p->alias);
return;
}
p = p->next_alias;
}
/* Create the new data structure and link it into the hash tables */
new = (names_ll_t *)xmalloc(sizeof(*new));
new->alias = xstrdup(alias);
new->hostname = xstrdup(hostname);
new->address = xstrdup(address);
new->port = port;
new->cpus = cpus;
new->sockets = sockets;
new->cores = cores;
new->threads = threads;
new->addr_initialized = false;
new->next_hostname = host_to_node_hashtbl[hostname_idx];
host_to_node_hashtbl[hostname_idx] = new;
new->next_alias = node_to_host_hashtbl[alias_idx];
node_to_host_hashtbl[alias_idx] = new;
}
/*
* Register the given NodeName in the alias table.
* If node_hostname is NULL, only node_name will be used and
* no lookup table record is created.
*/
static int _register_conf_node_aliases(slurm_conf_node_t *node_ptr)
{
hostlist_t alias_list = NULL;
hostlist_t hostname_list = NULL;
hostlist_t address_list = NULL;
char *alias = NULL;
char *hostname = NULL;
char *address = NULL;
int error_code = SLURM_SUCCESS;
if (node_ptr->nodenames == NULL || *node_ptr->nodenames == '\0')
return -1;
if ((alias_list = hostlist_create(node_ptr->nodenames)) == NULL) {
error("Unable to create NodeName list from %s",
node_ptr->nodenames);
error_code = errno;
goto cleanup;
}
if ((hostname_list = hostlist_create(node_ptr->hostnames)) == NULL) {
error("Unable to create NodeHostname list from %s",
node_ptr->hostnames);
error_code = errno;
goto cleanup;
}
if ((address_list = hostlist_create(node_ptr->addresses)) == NULL) {
error("Unable to create NodeAddr list from %s",
node_ptr->addresses);
error_code = errno;
goto cleanup;
}
/* some sanity checks */
#ifdef HAVE_FRONT_END
if (hostlist_count(hostname_list) != 1
|| hostlist_count(address_list) != 1) {
error("Only one hostname and address allowed "
"in FRONT_END mode");
goto cleanup;
}
hostname = node_ptr->hostnames;
address = node_ptr->addresses;
#else
if (hostlist_count(hostname_list) < hostlist_count(alias_list)) {
error("At least as many NodeHostname are required "
"as NodeName");
goto cleanup;
}
if (hostlist_count(address_list) < hostlist_count(alias_list)) {
error("At least as many NodeAddr are required as NodeName");
goto cleanup;
}
#endif
/* now build the individual node structures */
while ((alias = hostlist_shift(alias_list))) {
#ifndef HAVE_FRONT_END
hostname = hostlist_shift(hostname_list);
address = hostlist_shift(address_list);
#endif
_push_to_hashtbls(alias, hostname, address, node_ptr->port,
node_ptr->cpus, node_ptr->sockets,
node_ptr->cores, node_ptr->threads);
free(alias);
#ifndef HAVE_FRONT_END
free(hostname);
free(address);
#endif
}
/* free allocated storage */
cleanup:
if (alias_list)
hostlist_destroy(alias_list);
if (hostname_list)
hostlist_destroy(hostname_list);
if (address_list)
hostlist_destroy(address_list);
return error_code;
}
static void _init_slurmd_nodehash(void)
{
slurm_conf_node_t **ptr_array;
int count;
int i;
if (nodehash_initialized)
return;
else
nodehash_initialized = true;
if(!conf_initialized)
_init_slurm_conf(NULL);
count = slurm_conf_nodename_array(&ptr_array);
if (count == 0) {
return;
}
for (i = 0; i < count; i++) {
_register_conf_node_aliases(ptr_array[i]);
}
}
extern void slurm_conf_nodehash_init(void)
{
slurm_conf_lock();
_init_slurmd_nodehash();
slurm_conf_unlock();
}
/*
* Caller needs to call slurm_conf_lock() and hold the lock before
* calling this function (and call slurm_conf_unlock() afterwards).
*/
static char *_internal_get_hostname(const char *node_name)
{
int idx;
names_ll_t *p;
_init_slurmd_nodehash();
idx = _get_hash_idx(node_name);
p = node_to_host_hashtbl[idx];
while (p) {
if (strcmp(p->alias, node_name) == 0) {
return xstrdup(p->hostname);
}
p = p->next_alias;
}
return NULL;
}
/*
* slurm_conf_get_hostname - Return the NodeHostname for given NodeName
*/
extern char *slurm_conf_get_hostname(const char *node_name)
{
char *hostname = NULL;
slurm_conf_lock();
hostname = _internal_get_hostname(node_name);
slurm_conf_unlock();
return hostname;
}
/*
* slurm_conf_get_nodename - Return the NodeName for given NodeHostname
*/
extern char *slurm_conf_get_nodename(const char *node_hostname)
{
int idx;
names_ll_t *p;
slurm_conf_lock();
_init_slurmd_nodehash();
idx = _get_hash_idx(node_hostname);
p = host_to_node_hashtbl[idx];
while (p) {
if (strcmp(p->hostname, node_hostname) == 0) {
char *alias = xstrdup(p->alias);
slurm_conf_unlock();
return alias;
}
p = p->next_hostname;
}
slurm_conf_unlock();
return NULL;
}
/*
* slurm_conf_get_port - Return the port for a given NodeName
*/
extern uint16_t slurm_conf_get_port(const char *node_name)
{
int idx;
names_ll_t *p;
slurm_conf_lock();
_init_slurmd_nodehash();
idx = _get_hash_idx(node_name);
p = node_to_host_hashtbl[idx];
while (p) {
if (strcmp(p->alias, node_name) == 0) {
uint16_t port = p->port;
if (!port)
p->port = (uint16_t) conf_ptr->slurmd_port;
port = p->port;
slurm_conf_unlock();
return port;
}
p = p->next_alias;
}
slurm_conf_unlock();
return 0;
}
/*
* slurm_conf_get_addr - Return the slurm_addr for a given NodeName
* Returns SLURM_SUCCESS on success, SLURM_FAILURE on failure.
*/
extern int slurm_conf_get_addr(const char *node_name, slurm_addr *address)
{
int idx;
names_ll_t *p;
slurm_conf_lock();
_init_slurmd_nodehash();
idx = _get_hash_idx(node_name);
p = node_to_host_hashtbl[idx];
while (p) {
if (strcmp(p->alias, node_name) == 0) {
if (!p->port)
p->port = (uint16_t) conf_ptr->slurmd_port;
if (!p->addr_initialized) {
slurm_set_addr(&p->addr, p->port, p->address);
p->addr_initialized = true;
}
*address = p->addr;
slurm_conf_unlock();
return SLURM_SUCCESS;
}
p = p->next_alias;
}
slurm_conf_unlock();
return SLURM_FAILURE;
}
/*
* slurm_conf_get_cpus_sct -
* Return the cpus, sockets, cores, and threads for a given NodeName
* Returns SLURM_SUCCESS on success, SLURM_FAILURE on failure.
*/
extern int slurm_conf_get_cpus_sct(const char *node_name,
uint16_t *cpus, uint16_t *sockets,
uint16_t *cores, uint16_t *threads)
{
int idx;
names_ll_t *p;
slurm_conf_lock();
_init_slurmd_nodehash();
idx = _get_hash_idx(node_name);
p = node_to_host_hashtbl[idx];
while (p) {
if (strcmp(p->alias, node_name) == 0) {
if (cpus)
*cpus = p->cpus;
if (sockets)
*sockets = p->sockets;
if (cores)
*cores = p->cores;
if (threads)
*threads = p->threads;
slurm_conf_unlock();
return SLURM_SUCCESS;
}
p = p->next_alias;
}
slurm_conf_unlock();
return SLURM_FAILURE;
}
/* gethostname_short - equivalent to gethostname, but return only the first
* component of the fully qualified name
* (e.g. "linux123.foo.bar" becomes "linux123")
* OUT name
*/
int
gethostname_short (char *name, size_t len)
{
int error_code, name_len;
char *dot_ptr, path_name[1024];
error_code = gethostname (path_name, sizeof(path_name));
if (error_code)
return error_code;
dot_ptr = strchr (path_name, '.');
if (dot_ptr == NULL)
dot_ptr = path_name + strlen(path_name);
else
dot_ptr[0] = '\0';
name_len = (dot_ptr - path_name);
if (name_len > len)
return ENAMETOOLONG;
strcpy (name, path_name);
return 0;
}
/*
* free_slurm_conf - free all storage associated with a slurm_ctl_conf_t.
* IN/OUT ctl_conf_ptr - pointer to data structure to be freed
* IN purge_node_hash - purge system-wide node hash table if set,
* set to zero if clearing private copy of config data
*/
extern void
free_slurm_conf (slurm_ctl_conf_t *ctl_conf_ptr, bool purge_node_hash)
{
xfree (ctl_conf_ptr->authtype);
xfree (ctl_conf_ptr->checkpoint_type);
xfree (ctl_conf_ptr->backup_addr);
xfree (ctl_conf_ptr->backup_controller);
xfree (ctl_conf_ptr->control_addr);
xfree (ctl_conf_ptr->control_machine);
xfree (ctl_conf_ptr->epilog);
xfree (ctl_conf_ptr->job_acct_logfile);
xfree (ctl_conf_ptr->job_acct_type);
xfree (ctl_conf_ptr->job_comp_loc);
xfree (ctl_conf_ptr->job_comp_type);
xfree (ctl_conf_ptr->job_credential_private_key);
xfree (ctl_conf_ptr->job_credential_public_certificate);
xfree (ctl_conf_ptr->mail_prog);
xfree (ctl_conf_ptr->mpi_default);
xfree (ctl_conf_ptr->plugindir);
xfree (ctl_conf_ptr->plugstack);
xfree (ctl_conf_ptr->proctrack_type);
xfree (ctl_conf_ptr->prolog);
xfree (ctl_conf_ptr->propagate_rlimits_except);
xfree (ctl_conf_ptr->propagate_rlimits);
xfree (ctl_conf_ptr->schedtype);
xfree (ctl_conf_ptr->select_type);
xfree (ctl_conf_ptr->slurm_conf);
xfree (ctl_conf_ptr->slurm_user_name);
xfree (ctl_conf_ptr->slurmctld_logfile);
xfree (ctl_conf_ptr->slurmctld_pidfile);
xfree (ctl_conf_ptr->slurmd_logfile);
xfree (ctl_conf_ptr->slurmd_pidfile);
xfree (ctl_conf_ptr->slurmd_spooldir);
xfree (ctl_conf_ptr->state_save_location);
xfree (ctl_conf_ptr->switch_type);
xfree (ctl_conf_ptr->tmp_fs);
xfree (ctl_conf_ptr->task_epilog);
xfree (ctl_conf_ptr->task_prolog);
xfree (ctl_conf_ptr->task_plugin);
xfree (ctl_conf_ptr->tmp_fs);
xfree (ctl_conf_ptr->srun_prolog);
xfree (ctl_conf_ptr->srun_epilog);
xfree (ctl_conf_ptr->node_prefix);
xfree (ctl_conf_ptr->unkillable_program);
if (purge_node_hash)
_free_name_hashtbl();
}
/*
* init_slurm_conf - initialize or re-initialize the slurm configuration
* values to defaults (NULL or NO_VAL). Note that the configuration
* file pathname (slurm_conf) is not changed.
* IN/OUT ctl_conf_ptr - pointer to data structure to be initialized
*/
void
init_slurm_conf (slurm_ctl_conf_t *ctl_conf_ptr)
{
ctl_conf_ptr->last_update = time(NULL);
xfree (ctl_conf_ptr->authtype);
ctl_conf_ptr->cache_groups = (uint16_t) NO_VAL;
xfree (ctl_conf_ptr->checkpoint_type);
xfree (ctl_conf_ptr->backup_addr);
xfree (ctl_conf_ptr->backup_controller);
xfree (ctl_conf_ptr->control_addr);
xfree (ctl_conf_ptr->control_machine);
ctl_conf_ptr->disable_root_jobs = 0;
xfree (ctl_conf_ptr->epilog);
ctl_conf_ptr->fast_schedule = (uint16_t) NO_VAL;
ctl_conf_ptr->first_job_id = (uint32_t) NO_VAL;
ctl_conf_ptr->inactive_limit = (uint16_t) NO_VAL;
xfree (ctl_conf_ptr->job_acct_logfile);
ctl_conf_ptr->job_acct_freq = 0;
xfree (ctl_conf_ptr->job_acct_type);
xfree (ctl_conf_ptr->job_comp_loc);
xfree (ctl_conf_ptr->job_comp_type);
xfree (ctl_conf_ptr->job_credential_private_key);
xfree (ctl_conf_ptr->job_credential_public_certificate);
ctl_conf_ptr->job_file_append = (uint16_t) NO_VAL;
ctl_conf_ptr->kill_wait = (uint16_t) NO_VAL;
xfree (ctl_conf_ptr->mail_prog);
ctl_conf_ptr->max_job_cnt = (uint16_t) NO_VAL;
ctl_conf_ptr->min_job_age = (uint16_t) NO_VAL;
xfree (ctl_conf_ptr->mpi_default);
ctl_conf_ptr->msg_timeout = (uint16_t) NO_VAL;
ctl_conf_ptr->next_job_id = (uint32_t) NO_VAL;
xfree (ctl_conf_ptr->plugindir);
xfree (ctl_conf_ptr->plugstack);
xfree (ctl_conf_ptr->proctrack_type);
xfree (ctl_conf_ptr->prolog);
ctl_conf_ptr->propagate_prio_process = (uint16_t) NO_VAL;
xfree (ctl_conf_ptr->propagate_rlimits_except);
xfree (ctl_conf_ptr->propagate_rlimits);
ctl_conf_ptr->ret2service = (uint16_t) NO_VAL;
ctl_conf_ptr->schedport = (uint16_t) NO_VAL;
ctl_conf_ptr->schedrootfltr = (uint16_t) NO_VAL;
xfree( ctl_conf_ptr->schedtype );
xfree( ctl_conf_ptr->select_type );
ctl_conf_ptr->select_type_param = (uint16_t) NO_VAL;
ctl_conf_ptr->slurm_user_id = (uint16_t) NO_VAL;
xfree (ctl_conf_ptr->slurm_user_name);
ctl_conf_ptr->slurmctld_debug = (uint16_t) NO_VAL;
xfree (ctl_conf_ptr->slurmctld_logfile);
xfree (ctl_conf_ptr->slurmctld_pidfile);
ctl_conf_ptr->slurmctld_port = (uint32_t) NO_VAL;
ctl_conf_ptr->slurmctld_timeout = (uint16_t) NO_VAL;
ctl_conf_ptr->slurmd_debug = (uint16_t) NO_VAL;
xfree (ctl_conf_ptr->slurmd_logfile);
xfree (ctl_conf_ptr->slurmd_pidfile);
ctl_conf_ptr->slurmd_port = (uint32_t) NO_VAL;
xfree (ctl_conf_ptr->slurmd_spooldir);
ctl_conf_ptr->slurmd_timeout = (uint16_t) NO_VAL;
xfree (ctl_conf_ptr->state_save_location);
xfree (ctl_conf_ptr->switch_type);
xfree (ctl_conf_ptr->task_epilog);
xfree (ctl_conf_ptr->task_prolog);
xfree (ctl_conf_ptr->task_plugin);
xfree (ctl_conf_ptr->tmp_fs);
ctl_conf_ptr->wait_time = (uint16_t) NO_VAL;
xfree (ctl_conf_ptr->srun_prolog);
xfree (ctl_conf_ptr->srun_epilog);
xfree (ctl_conf_ptr->node_prefix);
ctl_conf_ptr->tree_width = (uint16_t) NO_VAL;
ctl_conf_ptr->use_pam = 0;
xfree (ctl_conf_ptr->unkillable_program);
ctl_conf_ptr->unkillable_timeout = (uint16_t) NO_VAL;
_free_name_hashtbl();
_init_name_hashtbl();
return;
}
/* caller must lock conf_lock */
static void
_init_slurm_conf(const char *file_name)
{
char *name = (char *)file_name;
/* conf_ptr = (slurm_ctl_conf_t *)xmalloc(sizeof(slurm_ctl_conf_t)); */
if (name == NULL) {
name = getenv("SLURM_CONF");
if (name == NULL)
name = default_slurm_config_file;
}
if(conf_initialized) {
error("the conf_hashtbl is already inited");
}
conf_hashtbl = s_p_hashtbl_create(slurm_conf_options);
conf_ptr->last_update = time(NULL);
if(s_p_parse_file(conf_hashtbl, name) == SLURM_ERROR)
fatal("something wrong with opening/reading conf file");
/* s_p_dump_values(conf_hashtbl, slurm_conf_options); */
validate_and_set_defaults(conf_ptr, conf_hashtbl);
conf_ptr->slurm_conf = xstrdup(name);
}
/* caller must lock conf_lock */
static void
_destroy_slurm_conf()
{
s_p_hashtbl_destroy(conf_hashtbl);
if (default_nodename_tbl != NULL) {
s_p_hashtbl_destroy(default_nodename_tbl);
default_nodename_tbl = NULL;
}
if (default_partition_tbl != NULL) {
s_p_hashtbl_destroy(default_partition_tbl);
default_partition_tbl = NULL;
}
free_slurm_conf(conf_ptr, true);
conf_initialized = false;
/* xfree(conf_ptr); */
}
/*
* slurm_conf_init - load the slurm configuration from the a file.
* IN file_name - name of the slurm configuration file to be read
* If file_name is NULL, then this routine tries to use
* the value in the SLURM_CONF env variable. Failing that,
* it uses the compiled-in default file name.
* If the conf structures have already been initialized by a call to
* slurm_conf_init, any subsequent calls will do nothing until
* slurm_conf_destroy is called.
* RET SLURM_SUCCESS if conf file is initialized. If the slurm conf
* was already initialied, return SLURM_ERROR.
*/
extern int
slurm_conf_init(const char *file_name)
{
pthread_mutex_lock(&conf_lock);
if (conf_initialized) {
pthread_mutex_unlock(&conf_lock);
return SLURM_ERROR;
}
_init_slurm_conf(file_name);
conf_initialized = true;
pthread_mutex_unlock(&conf_lock);
return SLURM_SUCCESS;
}
static int _internal_reinit(const char *file_name)
{
char *name = (char *)file_name;
if (name == NULL) {
name = getenv("SLURM_CONF");
if (name == NULL)
name = default_slurm_config_file;
}
if (conf_initialized) {
/* could check modified time on slurm.conf here */
_destroy_slurm_conf();
}
_init_slurm_conf(name);
conf_initialized = true;
return SLURM_SUCCESS;
}
/*
* slurm_conf_reinit - reload the slurm configuration from a file.
* IN file_name - name of the slurm configuration file to be read
* If file_name is NULL, then this routine tries to use
* the value in the SLURM_CONF env variable. Failing that,
* it uses the compiled-in default file name.
* Unlike slurm_conf_init, slurm_conf_reinit will always reread the
* file and reinitialize the configuration structures.
* RET SLURM_SUCCESS if conf file is reinitialized, otherwise SLURM_ERROR.
*/
extern int
slurm_conf_reinit(const char *file_name)
{
int rc;
pthread_mutex_lock(&conf_lock);
rc = _internal_reinit(file_name);
pthread_mutex_unlock(&conf_lock);
return rc;
}
/*
* slurm_conf_reinit_nolock - reload the slurm configuration from a file.
* This does the same thing as slurm_conf_reinit, but it performs
* no internal locking. You are responsible for calling slurm_conf_lock()
* before calling this function, and calling slurm_conf_unlock()
* afterwards.
* IN file_name - name of the slurm configuration file to be read
* If file_name is NULL, then this routine tries to use
* the value in the SLURM_CONF env variable. Failing that,
* it uses the compiled-in default file name.
* Unlike slurm_conf_init, slurm_conf_reinit will always reread the
* file and reinitialize the configuration structures.
* RET SLURM_SUCCESS if conf file is reinitialized, otherwise SLURM_ERROR.
*/
extern int
slurm_conf_reinit_nolock(const char *file_name)
{
return _internal_reinit(file_name);
}
extern void
slurm_conf_mutex_init(void)
{
pthread_mutex_init(&conf_lock, NULL);
}
extern void
slurm_conf_install_fork_handlers()
{
int err;
if ((err = pthread_atfork(NULL, NULL, &slurm_conf_mutex_init)))
fatal("can't install slurm_conf atfork handler");
return;
}
extern int
slurm_conf_destroy(void)
{
pthread_mutex_lock(&conf_lock);
if (!conf_initialized) {
pthread_mutex_unlock(&conf_lock);
return SLURM_SUCCESS;
}
_destroy_slurm_conf();
pthread_mutex_unlock(&conf_lock);
return SLURM_SUCCESS;
}
extern slurm_ctl_conf_t *
slurm_conf_lock(void)
{
pthread_mutex_lock(&conf_lock);
if (!conf_initialized) {
_init_slurm_conf(NULL);
conf_initialized = true;
}
return conf_ptr;
}
extern void
slurm_conf_unlock(void)
{
pthread_mutex_unlock(&conf_lock);
}
/* Normalize supplied debug level to be in range per log.h definitions */
static void _normalize_debug_level(uint16_t *level)
{
if (*level > LOG_LEVEL_END) {
error("Normalizing debug level from %u to %d",
*level, (LOG_LEVEL_END - 1));
*level = (LOG_LEVEL_END - 1);
}
/* level is uint16, always > LOG_LEVEL_QUIET(0), can't underflow */
}
/*
*
* IN/OUT ctl_conf_ptr - a configuration as loaded by read_slurm_conf_ctl
*
* NOTE: a backup_controller or control_machine of "localhost" are over-written
* with this machine's name.
* NOTE: if backup_addr is NULL, it is over-written by backup_controller
* NOTE: if control_addr is NULL, it is over-written by control_machine
*/
static void
validate_and_set_defaults(slurm_ctl_conf_t *conf, s_p_hashtbl_t *hashtbl)
{
char *temp_str = NULL;
bool truth;
if (s_p_get_string(&conf->backup_controller, "BackupController",
hashtbl)
&& strcasecmp("localhost", conf->backup_controller) == 0) {
xfree(conf->backup_controller);
conf->backup_controller = xmalloc (MAX_SLURM_NAME);
if (gethostname_short(conf->backup_controller, MAX_SLURM_NAME))
fatal("getnodename: %m");
}
if (s_p_get_string(&conf->backup_addr, "BackupAddr", hashtbl)) {
if (conf->backup_controller == NULL) {
error("BackupAddr specified without BackupController");
xfree(conf->backup_addr);
}
} else {
if (conf->backup_controller != NULL)
conf->backup_addr = xstrdup(conf->backup_controller);
}
if (!s_p_get_string(&conf->control_machine, "ControlMachine", hashtbl))
fatal ("validate_and_set_defaults: "
"ControlMachine not specified.");
else if (strcasecmp("localhost", conf->control_machine) == 0) {
xfree (conf->control_machine);
conf->control_machine = xmalloc(MAX_SLURM_NAME);
if (gethostname_short(conf->control_machine, MAX_SLURM_NAME))
fatal("getnodename: %m");
}
if (!s_p_get_string(&conf->control_addr, "ControlAddr", hashtbl)
&& conf->control_machine != NULL)
conf->control_addr = xstrdup (conf->control_machine);
if ((conf->backup_controller != NULL)
&& (strcmp(conf->backup_controller, conf->control_machine) == 0)) {
error("ControlMachine and BackupController identical");
xfree(conf->backup_addr);
xfree(conf->backup_controller);
}
if (!s_p_get_string(&conf->job_credential_private_key,
"JobCredentialPrivateKey", hashtbl))
fatal("JobCredentialPrivateKey not set");
if (!s_p_get_string(&conf->job_credential_public_certificate,
"JobCredentialPublicCertificate", hashtbl))
fatal("JobCredentialPublicCertificate not set");
if (s_p_get_uint16(&conf->max_job_cnt, "MaxJobCount", hashtbl)
&& conf->max_job_cnt < 1)
fatal("MaxJobCount=%u, No jobs permitted", conf->max_job_cnt);
if (!s_p_get_string(&conf->authtype, "AuthType", hashtbl))
conf->authtype = xstrdup(DEFAULT_AUTH_TYPE);
if (!s_p_get_uint16(&conf->cache_groups, "CacheGroups", hashtbl))
conf->cache_groups = DEFAULT_CACHE_GROUPS;
if (!s_p_get_string(&conf->checkpoint_type, "CheckpointType", hashtbl))
conf->checkpoint_type = xstrdup(DEFAULT_CHECKPOINT_TYPE);
if (!s_p_get_boolean((bool *) &conf->disable_root_jobs,
"DisableRootJobs", hashtbl))
conf->disable_root_jobs = DEFAULT_DISABLE_ROOT_JOBS;
s_p_get_string(&conf->epilog, "Epilog", hashtbl);
if (!s_p_get_uint16(&conf->fast_schedule, "FastSchedule", hashtbl))
conf->fast_schedule = DEFAULT_FAST_SCHEDULE;
if (!s_p_get_uint32(&conf->first_job_id, "FirstJobId", hashtbl))
conf->first_job_id = DEFAULT_FIRST_JOB_ID;
if (s_p_get_uint16(&conf->inactive_limit, "InactiveLimit", hashtbl)) {
#ifdef HAVE_BG
/* Inactive limit must be zero on Blue Gene */
if (conf->inactive_limit) {
error("InactiveLimit=%ld is invalid on Blue Gene",
conf->inactive_limit);
}
conf->inactive_limit = 0;
#endif
} else {
#ifdef HAVE_BG
conf->inactive_limit = 0;
#endif
conf->inactive_limit = DEFAULT_INACTIVE_LIMIT;
}
if (!s_p_get_string(&conf->job_acct_logfile,
"JobAcctLogFile", hashtbl))
conf->job_acct_logfile = xstrdup(DEFAULT_JOB_ACCT_LOGFILE);
if (!s_p_get_uint16(&conf->job_acct_freq, "JobAcctFrequency", hashtbl))
conf->job_acct_freq = DEFAULT_JOB_ACCT_FREQ;
if (!s_p_get_string(&conf->job_acct_type, "JobAcctType", hashtbl))
conf->job_acct_type = xstrdup(DEFAULT_JOB_ACCT_TYPE);
s_p_get_string(&conf->job_comp_loc, "JobCompLoc", hashtbl);
if (!s_p_get_string(&conf->job_comp_type, "JobCompType", hashtbl))
conf->job_comp_type = xstrdup(DEFAULT_JOB_COMP_TYPE);
if (!s_p_get_uint16(&conf->job_file_append, "JobFileAppend", hashtbl))
conf->job_file_append = 0;
if (!s_p_get_uint16(&conf->get_env_timeout, "GetEnvTimeout", hashtbl))
conf->get_env_timeout = DEFAULT_GET_ENV_TIMEOUT;
if (!s_p_get_uint16(&conf->kill_wait, "KillWait", hashtbl))
conf->kill_wait = DEFAULT_KILL_WAIT;
if (!s_p_get_string(&conf->mail_prog, "MailProg", hashtbl))
conf->mail_prog = xstrdup(DEFAULT_MAIL_PROG);
if (!s_p_get_uint16(&conf->max_job_cnt, "MaxJobCount", hashtbl))
conf->max_job_cnt = DEFAULT_MAX_JOB_COUNT;
if (!s_p_get_uint16(&conf->msg_timeout, "MessageTimeout", hashtbl))
conf->msg_timeout = DEFAULT_MSG_TIMEOUT;
else if (conf->msg_timeout > 100)
info("WARNING: MessageTimeout is too high for effective fault-tolerance");
if (!s_p_get_uint16(&conf->min_job_age, "MinJobAge", hashtbl))
conf->min_job_age = DEFAULT_MIN_JOB_AGE;
if (!s_p_get_string(&conf->mpi_default, "MpiDefault", hashtbl))
conf->mpi_default = xstrdup(DEFAULT_MPI_DEFAULT);
if (!s_p_get_string(&conf->plugindir, "PluginDir", hashtbl))
conf->plugindir = xstrdup(default_plugin_path);
if (!s_p_get_string(&conf->plugstack, "PlugStackConfig", hashtbl))
conf->plugstack = xstrdup(default_plugstack);
if (!s_p_get_string(&conf->switch_type, "SwitchType", hashtbl))
conf->switch_type = xstrdup(DEFAULT_SWITCH_TYPE);
if (!s_p_get_string(&conf->proctrack_type, "ProctrackType", hashtbl)) {
if (!strcmp(conf->switch_type,"switch/elan"))
conf->proctrack_type = xstrdup("proctrack/rms");
else
conf->proctrack_type =
xstrdup(DEFAULT_PROCTRACK_TYPE);
}
if ((!strcmp(conf->switch_type, "switch/elan"))
&& (!strcmp(conf->proctrack_type,"proctrack/linuxproc")))
fatal("proctrack/linuxproc is incompatable with switch/elan");
s_p_get_string(&conf->prolog, "Prolog", hashtbl);
if (!s_p_get_uint16(&conf->propagate_prio_process,
"PropagatePrioProcess", hashtbl)) {
conf->propagate_prio_process = DEFAULT_PROPAGATE_PRIO_PROCESS;
} else if (conf->propagate_prio_process > 1) {
fatal("Bad PropagatePrioProcess: %u",
conf->propagate_prio_process);
}
if (s_p_get_string(&conf->propagate_rlimits_except,
"PropagateResourceLimitsExcept", hashtbl)) {
if ((parse_rlimits(conf->propagate_rlimits_except,
NO_PROPAGATE_RLIMITS)) < 0)
fatal("Bad PropagateResourceLimitsExcept: %s",
conf->propagate_rlimits_except);
} else {
if (!s_p_get_string(&conf->propagate_rlimits,
"PropagateResourceLimits", hashtbl))
conf->propagate_rlimits = xstrdup( "ALL" );
if ((parse_rlimits(conf->propagate_rlimits,
PROPAGATE_RLIMITS )) < 0)
fatal("Bad PropagateResourceLimits: %s",
conf->propagate_rlimits);
}
if (!s_p_get_uint16(&conf->ret2service, "ReturnToService", hashtbl))
conf->ret2service = DEFAULT_RETURN_TO_SERVICE;
if (s_p_get_uint16(&conf->schedport, "SchedulerPort", hashtbl)) {
if (conf->schedport == 0) {
error("SchedulerPort=0 is invalid");
conf->schedport = DEFAULT_SCHEDULER_PORT;
}
} else {
conf->schedport = DEFAULT_SCHEDULER_PORT;
}
if (!s_p_get_uint16(&conf->schedrootfltr,
"SchedulerRootFilter", hashtbl))
conf->schedrootfltr = DEFAULT_SCHEDROOTFILTER;
if (!s_p_get_string(&conf->schedtype, "SchedulerType", hashtbl))
conf->schedtype = xstrdup(DEFAULT_SCHEDTYPE);
if (!s_p_get_string(&conf->select_type, "SelectType", hashtbl))
conf->select_type = xstrdup(DEFAULT_SELECT_TYPE);
if (s_p_get_string(&temp_str,
"SelectTypeParameters", hashtbl)) {
select_type_plugin_info_t type_param;
if ((parse_select_type_param(temp_str, &type_param) < 0)) {
xfree(temp_str);
fatal("Bad SelectTypeParameter: %s",
conf->select_type_param);
}
conf->select_type_param = (uint16_t) type_param;
xfree(temp_str);
} else {
if (strcmp(conf->select_type,"select/cons_res") == 0)
conf->select_type_param = CR_CPU;
else
conf->select_type_param = SELECT_TYPE_INFO_NONE;
}
if (!s_p_get_string( &conf->slurm_user_name, "SlurmUser", hashtbl)) {
conf->slurm_user_name = xstrdup("root");
conf->slurm_user_id = 0;
} else {
uid_t my_uid = uid_from_string(conf->slurm_user_name);
if (my_uid == (uid_t) -1) {
error ("Invalid user for SlurmUser %s, ignored",
conf->slurm_user_name);
xfree(conf->slurm_user_name);
} else {
if (my_uid > 0xffff)
error("SlurmUser numeric overflow, "
"will be fixed soon");
else
conf->slurm_user_id = my_uid;
}
}
if (s_p_get_uint16(&conf->slurmctld_debug, "SlurmctldDebug", hashtbl))
_normalize_debug_level(&conf->slurmctld_debug);
else
conf->slurmctld_debug = LOG_LEVEL_INFO;
if (!s_p_get_string(&conf->slurmctld_pidfile,
"SlurmctldPidFile", hashtbl))
conf->slurmctld_pidfile = xstrdup(DEFAULT_SLURMCTLD_PIDFILE);
s_p_get_string(&conf->slurmctld_logfile, "SlurmctldLogFile", hashtbl);
if (!s_p_get_uint32(&conf->slurmctld_port, "SlurmctldPort", hashtbl))
conf->slurmctld_port = SLURMCTLD_PORT;
if (!s_p_get_uint16(&conf->slurmctld_timeout,
"SlurmctldTimeout", hashtbl))
conf->slurmctld_timeout = DEFAULT_SLURMCTLD_TIMEOUT;
if (s_p_get_uint16(&conf->slurmd_debug, "SlurmdDebug", hashtbl))
_normalize_debug_level(&conf->slurmd_debug);
else
conf->slurmd_debug = LOG_LEVEL_INFO;
s_p_get_string(&conf->slurmd_logfile, "SlurmdLogFile", hashtbl);
if (!s_p_get_string(&conf->slurmd_pidfile, "SlurmdPidFile", hashtbl))
conf->slurmd_pidfile = xstrdup(DEFAULT_SLURMD_PIDFILE);
if (!s_p_get_uint32(&conf->slurmd_port, "SlurmdPort", hashtbl))
conf->slurmd_port = SLURMD_PORT;
if (!s_p_get_string(&conf->slurmd_spooldir, "SlurmdSpoolDir", hashtbl))
conf->slurmd_spooldir = xstrdup(DEFAULT_SPOOLDIR);
if (!s_p_get_uint16(&conf->slurmd_timeout, "SlurmdTimeout", hashtbl))
conf->slurmd_timeout = DEFAULT_SLURMD_TIMEOUT;
s_p_get_string(&conf->srun_prolog, "SrunProlog", hashtbl);
s_p_get_string(&conf->srun_epilog, "SrunEpilog", hashtbl);
if (!s_p_get_string(&conf->state_save_location,
"StateSaveLocation", hashtbl))
conf->state_save_location = xstrdup(DEFAULT_SAVE_STATE_LOC);
/* see above for switch_type, order dependent */
if (!s_p_get_string(&conf->task_plugin, "TaskPlugin", hashtbl))
conf->task_plugin = xstrdup(DEFAULT_TASK_PLUGIN);
if (s_p_get_string(&temp_str, "TaskPluginParam", hashtbl)) {
if (strcasecmp(temp_str, "cpusets") == 0)
conf->task_plugin_param = TASK_PARAM_CPUSETS;
else if (strcasecmp(temp_str, "sched") == 0)
conf->task_plugin_param = TASK_PARAM_SCHED;
else {
fatal("Bad TaskPluginParam: %s", temp_str);
conf->task_plugin_param = TASK_PARAM_NONE;
}
xfree(temp_str);
}
s_p_get_string(&conf->task_epilog, "TaskEpilog", hashtbl);
s_p_get_string(&conf->task_prolog, "TaskProlog", hashtbl);
if (!s_p_get_string(&conf->tmp_fs, "TmpFS", hashtbl))
conf->tmp_fs = xstrdup(DEFAULT_TMP_FS);
if (!s_p_get_uint16(&conf->wait_time, "WaitTime", hashtbl))
conf->wait_time = DEFAULT_WAIT_TIME;
if (s_p_get_uint16(&conf->tree_width, "TreeWidth", hashtbl)) {
if (conf->tree_width == 0) {
error("TreeWidth=0 is invalid");
conf->tree_width = DEFAULT_TREE_WIDTH; /* default? */
}
} else {
conf->tree_width = DEFAULT_TREE_WIDTH;
}
if (s_p_get_boolean(&truth, "UsePAM", hashtbl) && truth) {
conf->use_pam = 1;
} else {
conf->use_pam = 0;
}
s_p_get_string(&conf->unkillable_program,
"UnkillableStepProgram", hashtbl);
if (!s_p_get_uint16(&conf->unkillable_timeout,
"UnkillableStepTimeout", hashtbl))
conf->unkillable_timeout = DEFAULT_UNKILLABLE_TIMEOUT;
}
/*
* Replace first "%h" in path string with NodeHostname.
* Replace first "%n" in path string with NodeName.
*
* NOTE: Caller should be holding slurm_conf_lock() when calling this function.
*
* Returns an xmalloc()ed string which the caller must free with xfree().
*/
extern char *
slurm_conf_expand_slurmd_path(const char *path, const char *node_name)
{
char *hostname;
char *dir = NULL;
dir = xstrdup(path);
hostname = _internal_get_hostname(node_name);
xstrsubstitute(dir, "%h", hostname);
xfree(hostname);
xstrsubstitute(dir, "%n", node_name);
return dir;
}