blob: d4b14a26ce6080f150442a3d1c2e2136557c69df [file] [log] [blame]
/*****************************************************************************\
* opt.c - options processing for srun
*****************************************************************************
* Copyright (C) 2002-2007 The Regents of the University of California.
* Copyright (C) 2008-2010 Lawrence Livermore National Security.
* Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
* Written by Mark Grondona <grondona1@llnl.gov>, et. al.
* CODE-OCEC-09-009. All rights reserved.
*
* This file is part of SLURM, a resource management program.
* For details, see <http://slurm.schedmd.com/>.
* Please also read the included file: DISCLAIMER.
*
* SLURM is free software; you can redistribute it and/or modify it under
* the terms of the GNU General Public License as published by the Free
* Software Foundation; either version 2 of the License, or (at your option)
* any later version.
*
* In addition, as a special exception, the copyright holders give permission
* to link the code of portions of this program with the OpenSSL library under
* certain conditions as described in each individual source file, and
* distribute linked combinations including the two. You must obey the GNU
* General Public License in all respects for all of the code used other than
* OpenSSL. If you modify file(s) with this exception, you may extend this
* exception to your version of the file(s), but you are not obligated to do
* so. If you do not wish to do so, delete this exception statement from your
* version. If you delete this exception statement from all source files in
* the program, then also delete it here.
*
* SLURM is distributed in the hope that it will be useful, but WITHOUT ANY
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
* details.
*
* You should have received a copy of the GNU General Public License along
* with SLURM; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
\*****************************************************************************/
#if HAVE_CONFIG_H
# include "config.h"
#endif
#include <string.h> /* strcpy, strncasecmp */
#include <ctype.h> /* isdigit() */
#ifdef HAVE_STRINGS_H
# include <strings.h>
#endif
#ifndef _GNU_SOURCE
# define _GNU_SOURCE
#endif
#if HAVE_GETOPT_H
# include <getopt.h>
#else
# include "src/common/getopt.h"
#endif
#ifdef HAVE_LIMITS_H
# include <limits.h>
#endif
#include <fcntl.h>
#include <stdarg.h> /* va_start */
#include <stdio.h>
#include <stdlib.h> /* getenv */
#include <sys/param.h> /* MAXPATHLEN */
#include <unistd.h>
#include <sys/types.h>
#include <sys/utsname.h>
#include "src/common/cpu_frequency.h"
#include "src/common/list.h"
#include "src/common/log.h"
#include "src/common/mpi.h"
#include "src/common/optz.h"
#include "src/common/parse_time.h"
#include "src/common/plugstack.h"
#include "src/common/proc_args.h"
#include "src/common/slurm_protocol_api.h"
#include "src/common/slurm_protocol_interface.h"
#include "src/common/slurm_rlimits_info.h"
#include "src/common/slurm_resource_info.h"
#include "src/common/slurm_acct_gather_profile.h"
#include "src/common/uid.h"
#include "src/common/xmalloc.h"
#include "src/common/xstring.h"
#include "src/common/util-net.h"
#include "src/api/pmi_server.h"
#include "debugger.h"
#include "launch.h"
#include "multi_prog.h"
#include "opt.h"
/* generic OPT_ definitions -- mainly for use with env vars */
#define OPT_NONE 0x00
#define OPT_INT 0x01
#define OPT_STRING 0x02
#define OPT_IMMEDIATE 0x03
#define OPT_DISTRIB 0x04
#define OPT_NODES 0x05
#define OPT_OVERCOMMIT 0x06
#define OPT_CONN_TYPE 0x08
#define OPT_RESV_PORTS 0x09
#define OPT_NO_ROTATE 0x0a
#define OPT_GEOMETRY 0x0b
#define OPT_MPI 0x0c
#define OPT_CPU_BIND 0x0d
#define OPT_MEM_BIND 0x0e
#define OPT_MULTI 0x0f
#define OPT_NSOCKETS 0x10
#define OPT_NCORES 0x11
#define OPT_NTHREADS 0x12
#define OPT_EXCLUSIVE 0x13
#define OPT_OPEN_MODE 0x14
#define OPT_ACCTG_FREQ 0x15
#define OPT_WCKEY 0x16
#define OPT_SIGNAL 0x17
#define OPT_TIME_VAL 0x18
#define OPT_CPU_FREQ 0x19
#define OPT_CORE_SPEC 0x1a
#define OPT_PROFILE 0x20
#define OPT_EXPORT 0x21
#define OPT_HINT 0x22
/* generic getopt_long flags, integers and *not* valid characters */
#define LONG_OPT_HELP 0x100
#define LONG_OPT_USAGE 0x101
#define LONG_OPT_XTO 0x102
#define LONG_OPT_LAUNCH 0x103
#define LONG_OPT_TIMEO 0x104
#define LONG_OPT_JOBID 0x105
#define LONG_OPT_TMP 0x106
#define LONG_OPT_MEM 0x107
#define LONG_OPT_MINCPUS 0x108
#define LONG_OPT_CONT 0x109
#define LONG_OPT_UID 0x10a
#define LONG_OPT_GID 0x10b
#define LONG_OPT_MPI 0x10c
#define LONG_OPT_RESV_PORTS 0x10d
#define LONG_OPT_DEBUG_TS 0x110
#define LONG_OPT_CONNTYPE 0x111
#define LONG_OPT_TEST_ONLY 0x113
#define LONG_OPT_NETWORK 0x114
#define LONG_OPT_EXCLUSIVE 0x115
#define LONG_OPT_PROPAGATE 0x116
#define LONG_OPT_PROLOG 0x117
#define LONG_OPT_EPILOG 0x118
#define LONG_OPT_BEGIN 0x119
#define LONG_OPT_MAIL_TYPE 0x11a
#define LONG_OPT_MAIL_USER 0x11b
#define LONG_OPT_TASK_PROLOG 0x11c
#define LONG_OPT_TASK_EPILOG 0x11d
#define LONG_OPT_NICE 0x11e
#define LONG_OPT_CPU_BIND 0x11f
#define LONG_OPT_MEM_BIND 0x120
#define LONG_OPT_MULTI 0x122
#define LONG_OPT_COMMENT 0x124
#define LONG_OPT_QOS 0x127
#define LONG_OPT_SOCKETSPERNODE 0x130
#define LONG_OPT_CORESPERSOCKET 0x131
#define LONG_OPT_THREADSPERCORE 0x132
#define LONG_OPT_MINSOCKETS 0x133
#define LONG_OPT_MINCORES 0x134
#define LONG_OPT_MINTHREADS 0x135
#define LONG_OPT_NTASKSPERNODE 0x136
#define LONG_OPT_NTASKSPERSOCKET 0x137
#define LONG_OPT_NTASKSPERCORE 0x138
#define LONG_OPT_MEM_PER_CPU 0x13a
#define LONG_OPT_HINT 0x13b
#define LONG_OPT_BLRTS_IMAGE 0x140
#define LONG_OPT_LINUX_IMAGE 0x141
#define LONG_OPT_MLOADER_IMAGE 0x142
#define LONG_OPT_RAMDISK_IMAGE 0x143
#define LONG_OPT_REBOOT 0x144
#define LONG_OPT_GET_USER_ENV 0x145
#define LONG_OPT_PTY 0x146
#define LONG_OPT_CHECKPOINT 0x147
#define LONG_OPT_CHECKPOINT_DIR 0x148
#define LONG_OPT_OPEN_MODE 0x149
#define LONG_OPT_ACCTG_FREQ 0x14a
#define LONG_OPT_WCKEY 0x14b
#define LONG_OPT_RESERVATION 0x14c
#define LONG_OPT_RESTART_DIR 0x14d
#define LONG_OPT_SIGNAL 0x14e
#define LONG_OPT_DEBUG_SLURMD 0x14f
#define LONG_OPT_TIME_MIN 0x150
#define LONG_OPT_GRES 0x151
#define LONG_OPT_ALPS 0x152
#define LONG_OPT_REQ_SWITCH 0x153
#define LONG_OPT_LAUNCHER_OPTS 0x154
#define LONG_OPT_CPU_FREQ 0x155
#define LONG_OPT_LAUNCH_CMD 0x156
#define LONG_OPT_PROFILE 0x157
#define LONG_OPT_EXPORT 0x158
#define LONG_OPT_PRIORITY 0x160
extern char **environ;
/*---- global variables, defined in opt.h ----*/
int _verbose;
opt_t opt;
int error_exit = 1;
int immediate_exit = 1;
char *mpi_type = NULL;
resource_allocation_response_msg_t *global_resp = NULL;
/*---- forward declarations of static functions ----*/
static bool mpi_initialized = false;
typedef struct env_vars env_vars_t;
static int _get_task_count(void);
/* Get a decimal integer from arg */
static int _get_int(const char *arg, const char *what, bool positive);
static void _help(void);
/* fill in default options */
static void _opt_default(void);
/* set options based upon env vars */
static void _opt_env(void);
static void _opt_args(int argc, char **argv);
/* list known options and their settings */
static void _opt_list(void);
/* verify options sanity */
static bool _opt_verify(void);
static void _process_env_var(env_vars_t *e, const char *val);
static bool _under_parallel_debugger(void);
static void _usage(void);
static bool _valid_node_list(char **node_list_pptr);
/*---[ end forward declarations of static functions ]---------------------*/
int initialize_and_process_args(int argc, char *argv[])
{
/* initialize option defaults */
_opt_default();
/* initialize options with env vars */
_opt_env();
/* initialize options with argv */
_opt_args(argc, argv);
if (!_opt_verify())
exit(error_exit);
if (_verbose > 3)
_opt_list();
if (opt.launch_cmd) {
char *launch_type = slurm_get_launch_type();
if (!strcmp(launch_type, "launch/slurm")) {
error("--launch-cmd option is invalid with %s",
launch_type);
xfree(launch_type);
exit(1);
}
xfree(launch_type);
/* Massage ntasks value earlier than normal */
if (!opt.ntasks_set)
opt.ntasks = _get_task_count();
launch_g_create_job_step(NULL, 0, NULL, NULL);
exit(0);
}
return 1;
}
static int _get_task_count(void)
{
char *cpus_per_node = NULL, *end_ptr = NULL;
int cpu_count, node_count, task_count, total_tasks = 0;
if (opt.ntasks_per_node != NO_VAL)
return (opt.min_nodes * opt.ntasks_per_node);
if (opt.cpus_set)
cpus_per_node = getenv("SLURM_JOB_CPUS_PER_NODE");
if (cpus_per_node) {
cpu_count = strtol(cpus_per_node, &end_ptr, 10);
task_count = cpu_count / opt.cpus_per_task;
while (1) {
if ((end_ptr[0] == '(') && (end_ptr[1] == 'x')) {
end_ptr += 2;
node_count = strtol(end_ptr, &end_ptr, 10);
task_count *= node_count;
total_tasks += task_count;
if (end_ptr[0] == ')')
end_ptr++;
} else if ((end_ptr[0] == ',') || (end_ptr[0] == 0))
total_tasks += task_count;
else {
error("Invalid value for environment variable "
"SLURM_JOB_CPUS_PER_NODE (%s)",
cpus_per_node);
break;
}
if (end_ptr[0] == ',')
end_ptr++;
if (end_ptr[0] == 0)
break;
}
return total_tasks;
}
return opt.min_nodes;
}
/*
* If the node list supplied is a file name, translate that into
* a list of nodes, we orphan the data pointed to
* RET true if the node list is a valid one
*/
static bool _valid_node_list(char **node_list_pptr)
{
int count = NO_VAL;
/* If we are using Arbitrary and we specified the number of
procs to use then we need exactly this many since we are
saying, lay it out this way! Same for max and min nodes.
Other than that just read in as many in the hostfile */
if (opt.ntasks_set)
count = opt.ntasks;
else if (opt.nodes_set) {
if (opt.max_nodes)
count = opt.max_nodes;
else if (opt.min_nodes)
count = opt.min_nodes;
}
return verify_node_list(node_list_pptr, opt.distribution, count);
}
/*
* print error message to stderr with opt.progname prepended
*/
#undef USE_ARGERROR
#if USE_ARGERROR
static void argerror(const char *msg, ...)
__attribute__ ((format (printf, 1, 2)));
static void argerror(const char *msg, ...)
{
va_list ap;
char buf[256];
va_start(ap, msg);
vsnprintf(buf, sizeof(buf), msg, ap);
fprintf(stderr, "%s: %s\n",
opt.progname ? opt.progname : "srun", buf);
va_end(ap);
}
#else
# define argerror error
#endif /* USE_ARGERROR */
/*
* _opt_default(): used by initialize_and_process_args to set defaults
*/
static void _opt_default()
{
char buf[MAXPATHLEN + 1];
int i;
uid_t uid = getuid();
opt.user = uid_to_string(uid);
if (strcmp(opt.user, "nobody") == 0)
fatal("Invalid user id: %u", uid);
opt.uid = uid;
opt.gid = getgid();
if ((getcwd(buf, MAXPATHLEN)) == NULL) {
error("getcwd failed: %m");
exit(error_exit);
}
opt.cwd = xstrdup(buf);
opt.cwd_set = false;
opt.progname = NULL;
opt.ntasks = 1;
opt.ntasks_set = false;
opt.cpus_per_task = 0;
opt.cpus_set = false;
opt.min_nodes = 1;
opt.max_nodes = 0;
opt.sockets_per_node = NO_VAL; /* requested sockets */
opt.cores_per_socket = NO_VAL; /* requested cores */
opt.threads_per_core = NO_VAL; /* requested threads */
opt.ntasks_per_node = NO_VAL; /* ntask max limits */
opt.ntasks_per_socket = NO_VAL;
opt.ntasks_per_core = NO_VAL;
opt.nodes_set = false;
opt.nodes_set_env = false;
opt.nodes_set_opt = false;
opt.cpu_bind_type = 0;
opt.cpu_bind = NULL;
opt.mem_bind_type = 0;
opt.mem_bind = NULL;
opt.core_spec = 0;
opt.core_spec_set = false;
opt.time_limit = NO_VAL;
opt.time_limit_str = NULL;
opt.time_min = NO_VAL;
opt.time_min_str = NULL;
opt.ckpt_interval = 0;
opt.ckpt_interval_str = NULL;
opt.ckpt_dir = NULL;
opt.restart_dir = NULL;
opt.partition = NULL;
opt.max_threads = MAX_THREADS;
pmi_server_max_threads(opt.max_threads);
opt.relative = NO_VAL;
opt.relative_set = false;
opt.resv_port_cnt = NO_VAL;
opt.cmd_name = NULL;
opt.job_name = NULL;
opt.job_name_set_cmd = false;
opt.job_name_set_env = false;
opt.jobid = NO_VAL;
opt.jobid_set = false;
opt.dependency = NULL;
opt.account = NULL;
opt.comment = NULL;
opt.qos = NULL;
opt.distribution = SLURM_DIST_UNKNOWN;
opt.plane_size = NO_VAL;
opt.ofname = NULL;
opt.ifname = NULL;
opt.efname = NULL;
opt.labelio = false;
opt.unbuffered = false;
opt.overcommit = false;
opt.shared = (uint16_t)NO_VAL;
opt.exclusive = false;
opt.export_env = NULL;
opt.no_kill = false;
opt.kill_bad_exit = NO_VAL;
opt.immediate = 0;
opt.join = false;
opt.max_wait = slurm_get_wait_time();
opt.quit_on_intr = false;
opt.disable_status = false;
opt.test_only = false;
opt.preserve_env = false;
opt.quiet = 0;
_verbose = 0;
opt.slurmd_debug = LOG_LEVEL_QUIET;
opt.warn_flags = 0;
opt.warn_signal = 0;
opt.warn_time = 0;
opt.pn_min_cpus = NO_VAL;
opt.pn_min_memory = NO_VAL;
opt.mem_per_cpu = NO_VAL;
opt.pn_min_tmp_disk= NO_VAL;
opt.hold = false;
opt.constraints = NULL;
opt.gres = NULL;
opt.contiguous = false;
opt.hostfile = NULL;
opt.nodelist = NULL;
opt.exc_nodes = NULL;
opt.max_launch_time = 120;/* 120 seconds to launch job */
opt.max_exit_timeout= 60; /* Warn user 60 seconds after task exit */
/* Default launch msg timeout */
opt.msg_timeout = slurm_get_msg_timeout();
for (i=0; i<HIGHEST_DIMENSIONS; i++) {
opt.conn_type[i] = (uint16_t) NO_VAL;
opt.geometry[i] = 0;
}
opt.reboot = false;
opt.no_rotate = false;
opt.blrtsimage = NULL;
opt.linuximage = NULL;
opt.mloaderimage = NULL;
opt.ramdiskimage = NULL;
opt.euid = (uid_t) -1;
opt.egid = (gid_t) -1;
opt.propagate = NULL; /* propagate specific rlimits */
opt.profile = ACCT_GATHER_PROFILE_NOT_SET;
opt.prolog = slurm_get_srun_prolog();
opt.epilog = slurm_get_srun_epilog();
opt.begin = (time_t)0;
opt.task_prolog = NULL;
opt.task_epilog = NULL;
/*
* Reset some default values if running under a parallel debugger
*/
if ((opt.parallel_debug = _under_parallel_debugger())) {
opt.max_launch_time = 120;
opt.max_threads = 1;
pmi_server_max_threads(opt.max_threads);
opt.msg_timeout = 15;
}
opt.pty = false;
opt.open_mode = 0;
opt.acctg_freq = NULL;
opt.cpu_freq = NO_VAL;
opt.reservation = NULL;
opt.wckey = NULL;
opt.req_switch = -1;
opt.wait4switch = -1;
opt.launcher_opts = NULL;
opt.launch_cmd = false;
opt.nice = 0;
opt.priority = 0;
}
/*---[ env var processing ]-----------------------------------------------*/
/*
* try to use a similar scheme as popt.
*
* in order to add a new env var (to be processed like an option):
*
* define a new entry into env_vars[], if the option is a simple int
* or string you may be able to get away with adding a pointer to the
* option to set. Otherwise, process var based on "type" in _opt_env.
*/
struct env_vars {
const char *var;
int type;
void *arg;
void *set_flag;
};
env_vars_t env_vars[] = {
{"SLURMD_DEBUG", OPT_INT, &opt.slurmd_debug, NULL },
{"SLURM_ACCOUNT", OPT_STRING, &opt.account, NULL },
{"SLURM_ACCTG_FREQ", OPT_STRING, &opt.acctg_freq, NULL },
{"SLURM_BLRTS_IMAGE", OPT_STRING, &opt.blrtsimage, NULL },
{"SLURM_CHECKPOINT", OPT_STRING, &opt.ckpt_interval_str, NULL },
{"SLURM_CHECKPOINT_DIR",OPT_STRING, &opt.ckpt_dir, NULL },
{"SLURM_CNLOAD_IMAGE", OPT_STRING, &opt.linuximage, NULL },
{"SLURM_CONN_TYPE", OPT_CONN_TYPE, NULL, NULL },
{"SLURM_CORE_SPEC", OPT_INT, &opt.core_spec, NULL },
{"SLURM_CPUS_PER_TASK", OPT_INT, &opt.cpus_per_task, &opt.cpus_set },
{"SLURM_CPU_BIND", OPT_CPU_BIND, NULL, NULL },
{"SLURM_CPU_FREQ_REQ", OPT_CPU_FREQ, NULL, NULL },
{"SLURM_DEPENDENCY", OPT_STRING, &opt.dependency, NULL },
{"SLURM_DISABLE_STATUS",OPT_INT, &opt.disable_status,NULL },
{"SLURM_DISTRIBUTION", OPT_DISTRIB, NULL, NULL },
{"SLURM_EPILOG", OPT_STRING, &opt.epilog, NULL },
{"SLURM_EXCLUSIVE", OPT_EXCLUSIVE, NULL, NULL },
{"SLURM_EXPORT_ENV", OPT_STRING, &opt.export_env, NULL },
{"SLURM_GEOMETRY", OPT_GEOMETRY, NULL, NULL },
{"SLURM_GRES", OPT_STRING, &opt.gres, NULL },
{"SLURM_HINT", OPT_HINT, NULL, NULL },
{"SLURM_IMMEDIATE", OPT_IMMEDIATE, NULL, NULL },
{"SLURM_IOLOAD_IMAGE", OPT_STRING, &opt.ramdiskimage, NULL },
/* SLURM_JOBID was used in slurm version 1.3 and below, it is now vestigial */
{"SLURM_JOBID", OPT_INT, &opt.jobid, NULL },
{"SLURM_JOB_ID", OPT_INT, &opt.jobid, NULL },
{"SLURM_JOB_NAME", OPT_STRING, &opt.job_name, &opt.job_name_set_env},
{"SLURM_KILL_BAD_EXIT", OPT_INT, &opt.kill_bad_exit, NULL },
{"SLURM_LABELIO", OPT_INT, &opt.labelio, NULL },
{"SLURM_LINUX_IMAGE", OPT_STRING, &opt.linuximage, NULL },
{"SLURM_MEM_BIND", OPT_MEM_BIND, NULL, NULL },
{"SLURM_MEM_PER_CPU", OPT_INT, &opt.mem_per_cpu, NULL },
{"SLURM_MEM_PER_NODE", OPT_INT, &opt.pn_min_memory, NULL },
{"SLURM_MLOADER_IMAGE", OPT_STRING, &opt.mloaderimage, NULL },
{"SLURM_MPI_TYPE", OPT_MPI, NULL, NULL },
{"SLURM_NCORES_PER_SOCKET",OPT_NCORES, NULL, NULL },
{"SLURM_NETWORK", OPT_STRING, &opt.network, &opt.network_set_env},
{"SLURM_NNODES", OPT_NODES, NULL, NULL },
{"SLURM_NODELIST", OPT_STRING, &opt.alloc_nodelist,NULL },
{"SLURM_NO_ROTATE", OPT_NO_ROTATE, NULL, NULL },
{"SLURM_NTASKS", OPT_INT, &opt.ntasks, &opt.ntasks_set },
{"SLURM_NPROCS", OPT_INT, &opt.ntasks, &opt.ntasks_set },
{"SLURM_NSOCKETS_PER_NODE",OPT_NSOCKETS,NULL, NULL },
{"SLURM_NTASKS_PER_NODE", OPT_INT, &opt.ntasks_per_node, NULL },
{"SLURM_NTHREADS_PER_CORE",OPT_NTHREADS,NULL, NULL },
{"SLURM_OPEN_MODE", OPT_OPEN_MODE, NULL, NULL },
{"SLURM_OVERCOMMIT", OPT_OVERCOMMIT, NULL, NULL },
{"SLURM_PARTITION", OPT_STRING, &opt.partition, NULL },
{"SLURM_PROFILE", OPT_PROFILE, NULL, NULL },
{"SLURM_PROLOG", OPT_STRING, &opt.prolog, NULL },
{"SLURM_QOS", OPT_STRING, &opt.qos, NULL },
{"SLURM_RAMDISK_IMAGE", OPT_STRING, &opt.ramdiskimage, NULL },
{"SLURM_REMOTE_CWD", OPT_STRING, &opt.cwd, NULL },
{"SLURM_RESERVATION", OPT_STRING, &opt.reservation, NULL },
{"SLURM_RESTART_DIR", OPT_STRING, &opt.restart_dir , NULL },
{"SLURM_RESV_PORTS", OPT_RESV_PORTS, NULL, NULL },
{"SLURM_SIGNAL", OPT_SIGNAL, NULL, NULL },
{"SLURM_SRUN_MULTI", OPT_MULTI, NULL, NULL },
{"SLURM_STDERRMODE", OPT_STRING, &opt.efname, NULL },
{"SLURM_STDINMODE", OPT_STRING, &opt.ifname, NULL },
{"SLURM_STDOUTMODE", OPT_STRING, &opt.ofname, NULL },
{"SLURM_TASK_EPILOG", OPT_STRING, &opt.task_epilog, NULL },
{"SLURM_TASK_PROLOG", OPT_STRING, &opt.task_prolog, NULL },
{"SLURM_THREADS", OPT_INT, &opt.max_threads, NULL },
{"SLURM_TIMELIMIT", OPT_STRING, &opt.time_limit_str,NULL },
{"SLURM_UNBUFFEREDIO", OPT_INT, &opt.unbuffered, NULL },
{"SLURM_WAIT", OPT_INT, &opt.max_wait, NULL },
{"SLURM_WCKEY", OPT_STRING, &opt.wckey, NULL },
{"SLURM_WORKING_DIR", OPT_STRING, &opt.cwd, &opt.cwd_set },
{"SLURM_REQ_SWITCH", OPT_INT, &opt.req_switch, NULL },
{"SLURM_WAIT4SWITCH", OPT_TIME_VAL, NULL, NULL },
{NULL, 0, NULL, NULL}
};
/*
* _opt_env(): used by initialize_and_process_args to set options via
* environment variables. See comments above for how to
* extend srun to process different vars
*/
static void _opt_env()
{
char *val = NULL;
env_vars_t *e = env_vars;
while (e->var) {
if ((val = getenv(e->var)) != NULL)
_process_env_var(e, val);
e++;
}
}
static void
_process_env_var(env_vars_t *e, const char *val)
{
char *end = NULL;
task_dist_states_t dt;
debug2("now processing env var %s=%s", e->var, val);
if (e->set_flag) {
*((bool *) e->set_flag) = true;
}
switch (e->type) {
case OPT_STRING:
*((char **) e->arg) = xstrdup(val);
break;
case OPT_INT:
if (val != NULL) {
*((int *) e->arg) = (int) strtol(val, &end, 10);
if (!(end && *end == '\0')) {
error("%s=%s invalid. ignoring...",
e->var, val);
}
}
break;
case OPT_DISTRIB:
if (strcmp(val, "unknown") == 0)
break; /* ignore it, passed from salloc */
dt = verify_dist_type(val, &opt.plane_size);
if (dt == SLURM_DIST_UNKNOWN) {
error("\"%s=%s\" -- invalid distribution type. "
"ignoring...", e->var, val);
} else
opt.distribution = dt;
break;
case OPT_CPU_BIND:
if (slurm_verify_cpu_bind(val, &opt.cpu_bind,
&opt.cpu_bind_type))
exit(error_exit);
break;
case OPT_CPU_FREQ:
if (cpu_freq_verify_param(val, &opt.cpu_freq))
error("Invalid --cpu-freq argument: %s. Ignored", val);
break;
case OPT_HINT:
/* Keep after other options filled in */
if (verify_hint(val,
&opt.sockets_per_node,
&opt.cores_per_socket,
&opt.threads_per_core,
&opt.ntasks_per_core,
&opt.cpu_bind_type)) {
exit(error_exit);
}
break;
case OPT_MEM_BIND:
if (slurm_verify_mem_bind(val, &opt.mem_bind,
&opt.mem_bind_type))
exit(error_exit);
break;
case OPT_NODES:
opt.nodes_set_env = get_resource_arg_range( val ,"OPT_NODES",
&opt.min_nodes,
&opt.max_nodes,
false);
if (opt.nodes_set_env == false) {
error("\"%s=%s\" -- invalid node count. ignoring...",
e->var, val);
} else
opt.nodes_set = opt.nodes_set_env;
break;
case OPT_OVERCOMMIT:
opt.overcommit = true;
break;
case OPT_EXCLUSIVE:
opt.exclusive = true;
opt.shared = 0;
break;
case OPT_EXPORT:
xfree(opt.export_env);
opt.export_env = xstrdup(val);
break;
case OPT_RESV_PORTS:
if (val)
opt.resv_port_cnt = strtol(val, NULL, 10);
else
opt.resv_port_cnt = 0;
break;
case OPT_OPEN_MODE:
if ((val[0] == 'a') || (val[0] == 'A'))
opt.open_mode = OPEN_MODE_APPEND;
else if ((val[0] == 't') || (val[0] == 'T'))
opt.open_mode = OPEN_MODE_TRUNCATE;
else
error("Invalid SLURM_OPEN_MODE: %s. Ignored", val);
break;
case OPT_CONN_TYPE:
verify_conn_type(val, opt.conn_type);
break;
case OPT_NO_ROTATE:
opt.no_rotate = true;
break;
case OPT_GEOMETRY:
if (verify_geometry(val, opt.geometry)) {
error("\"%s=%s\" -- invalid geometry, ignoring...",
e->var, val);
}
break;
case OPT_IMMEDIATE:
if (val)
opt.immediate = strtol(val, NULL, 10);
else
opt.immediate = DEFAULT_IMMEDIATE;
break;
case OPT_MPI:
xfree(mpi_type);
mpi_type = xstrdup(val);
if (mpi_hook_client_init((char *)val) == SLURM_ERROR) {
error("\"%s=%s\" -- invalid MPI type, "
"--mpi=list for acceptable types.",
e->var, val);
exit(error_exit);
}
mpi_initialized = true;
break;
case OPT_SIGNAL:
if (get_signal_opts((char *)val, &opt.warn_signal,
&opt.warn_time, &opt.warn_flags)) {
error("Invalid signal specification: %s", val);
exit(error_exit);
}
break;
case OPT_TIME_VAL:
opt.wait4switch = time_str2secs(val);
break;
case OPT_PROFILE:
opt.profile = acct_gather_profile_from_string((char *)val);
break;
default:
/* do nothing */
break;
}
}
/*
* Get a decimal integer from arg.
*
* Returns the integer on success, exits program on failure.
*
*/
static int
_get_int(const char *arg, const char *what, bool positive)
{
char *p;
long int result = strtol(arg, &p, 10);
if ((*p != '\0') || (result < 0L)
|| (positive && (result <= 0L))) {
error ("Invalid numeric value \"%s\" for %s.", arg, what);
exit(error_exit);
} else if (result > INT_MAX) {
error ("Numeric argument (%ld) to big for %s.", result, what);
} else if (result < INT_MIN) {
error ("Numeric argument %ld to small for %s.", result, what);
}
return (int) result;
}
static void _set_options(const int argc, char **argv)
{
int opt_char, option_index = 0, max_val = 0, tmp_int;
struct utsname name;
static struct option long_options[] = {
{"account", required_argument, 0, 'A'},
{"extra-node-info", required_argument, 0, 'B'},
{"cpus-per-task", required_argument, 0, 'c'},
{"constraint", required_argument, 0, 'C'},
{"dependency", required_argument, 0, 'd'},
{"chdir", required_argument, 0, 'D'},
{"error", required_argument, 0, 'e'},
{"preserve-env", no_argument, 0, 'E'},
{"preserve-slurm-env", no_argument, 0, 'E'},
{"geometry", required_argument, 0, 'g'},
{"hold", no_argument, 0, 'H'},
{"input", required_argument, 0, 'i'},
{"immediate", optional_argument, 0, 'I'},
{"join", no_argument, 0, 'j'},
{"job-name", required_argument, 0, 'J'},
{"no-kill", no_argument, 0, 'k'},
{"kill-on-bad-exit", optional_argument, 0, 'K'},
{"label", no_argument, 0, 'l'},
{"licenses", required_argument, 0, 'L'},
{"distribution", required_argument, 0, 'm'},
{"ntasks", required_argument, 0, 'n'},
{"nodes", required_argument, 0, 'N'},
{"output", required_argument, 0, 'o'},
{"overcommit", no_argument, 0, 'O'},
{"partition", required_argument, 0, 'p'},
{"quit-on-interrupt", no_argument, 0, 'q'},
{"quiet", no_argument, 0, 'Q'},
{"relative", required_argument, 0, 'r'},
{"no-rotate", no_argument, 0, 'R'},
{"share", no_argument, 0, 's'},
{"core-spec", required_argument, 0, 'S'},
{"time", required_argument, 0, 't'},
{"threads", required_argument, 0, 'T'},
{"unbuffered", no_argument, 0, 'u'},
{"verbose", no_argument, 0, 'v'},
{"version", no_argument, 0, 'V'},
{"nodelist", required_argument, 0, 'w'},
{"wait", required_argument, 0, 'W'},
{"exclude", required_argument, 0, 'x'},
{"disable-status", no_argument, 0, 'X'},
{"no-allocate", no_argument, 0, 'Z'},
{"acctg-freq", required_argument, 0, LONG_OPT_ACCTG_FREQ},
{"alps", required_argument, 0, LONG_OPT_ALPS},
{"begin", required_argument, 0, LONG_OPT_BEGIN},
{"blrts-image", required_argument, 0, LONG_OPT_BLRTS_IMAGE},
{"checkpoint", required_argument, 0, LONG_OPT_CHECKPOINT},
{"checkpoint-dir", required_argument, 0, LONG_OPT_CHECKPOINT_DIR},
{"cnload-image", required_argument, 0, LONG_OPT_LINUX_IMAGE},
{"comment", required_argument, 0, LONG_OPT_COMMENT},
{"conn-type", required_argument, 0, LONG_OPT_CONNTYPE},
{"contiguous", no_argument, 0, LONG_OPT_CONT},
{"cores-per-socket", required_argument, 0, LONG_OPT_CORESPERSOCKET},
{"cpu_bind", required_argument, 0, LONG_OPT_CPU_BIND},
{"cpu-freq", required_argument, 0, LONG_OPT_CPU_FREQ},
{"debugger-test", no_argument, 0, LONG_OPT_DEBUG_TS},
{"epilog", required_argument, 0, LONG_OPT_EPILOG},
{"exclusive", no_argument, 0, LONG_OPT_EXCLUSIVE},
{"export", required_argument, 0, LONG_OPT_EXPORT},
{"get-user-env", optional_argument, 0, LONG_OPT_GET_USER_ENV},
{"gid", required_argument, 0, LONG_OPT_GID},
{"gres", required_argument, 0, LONG_OPT_GRES},
{"help", no_argument, 0, LONG_OPT_HELP},
{"hint", required_argument, 0, LONG_OPT_HINT},
{"ioload-image", required_argument, 0, LONG_OPT_RAMDISK_IMAGE},
{"jobid", required_argument, 0, LONG_OPT_JOBID},
{"linux-image", required_argument, 0, LONG_OPT_LINUX_IMAGE},
{"launch-cmd", no_argument, 0, LONG_OPT_LAUNCH_CMD},
{"launcher-opts", required_argument, 0, LONG_OPT_LAUNCHER_OPTS},
{"mail-type", required_argument, 0, LONG_OPT_MAIL_TYPE},
{"mail-user", required_argument, 0, LONG_OPT_MAIL_USER},
{"max-exit-timeout", required_argument, 0, LONG_OPT_XTO},
{"max-launch-time", required_argument, 0, LONG_OPT_LAUNCH},
{"mem", required_argument, 0, LONG_OPT_MEM},
{"mem-per-cpu", required_argument, 0, LONG_OPT_MEM_PER_CPU},
{"mem_bind", required_argument, 0, LONG_OPT_MEM_BIND},
{"mincores", required_argument, 0, LONG_OPT_MINCORES},
{"mincpus", required_argument, 0, LONG_OPT_MINCPUS},
{"minsockets", required_argument, 0, LONG_OPT_MINSOCKETS},
{"minthreads", required_argument, 0, LONG_OPT_MINTHREADS},
{"mloader-image", required_argument, 0, LONG_OPT_MLOADER_IMAGE},
{"mpi", required_argument, 0, LONG_OPT_MPI},
{"msg-timeout", required_argument, 0, LONG_OPT_TIMEO},
{"multi-prog", no_argument, 0, LONG_OPT_MULTI},
{"network", required_argument, 0, LONG_OPT_NETWORK},
{"nice", optional_argument, 0, LONG_OPT_NICE},
{"ntasks-per-core", required_argument, 0, LONG_OPT_NTASKSPERCORE},
{"ntasks-per-node", required_argument, 0, LONG_OPT_NTASKSPERNODE},
{"ntasks-per-socket",required_argument, 0, LONG_OPT_NTASKSPERSOCKET},
{"open-mode", required_argument, 0, LONG_OPT_OPEN_MODE},
{"priority", required_argument, 0, LONG_OPT_PRIORITY},
{"profile", required_argument, 0, LONG_OPT_PROFILE},
{"prolog", required_argument, 0, LONG_OPT_PROLOG},
{"propagate", optional_argument, 0, LONG_OPT_PROPAGATE},
{"pty", no_argument, 0, LONG_OPT_PTY},
{"qos", required_argument, 0, LONG_OPT_QOS},
{"ramdisk-image", required_argument, 0, LONG_OPT_RAMDISK_IMAGE},
{"reboot", no_argument, 0, LONG_OPT_REBOOT},
{"reservation", required_argument, 0, LONG_OPT_RESERVATION},
{"restart-dir", required_argument, 0, LONG_OPT_RESTART_DIR},
{"resv-ports", optional_argument, 0, LONG_OPT_RESV_PORTS},
{"runjob-opts", required_argument, 0, LONG_OPT_LAUNCHER_OPTS},
{"signal", required_argument, 0, LONG_OPT_SIGNAL},
{"slurmd-debug", required_argument, 0, LONG_OPT_DEBUG_SLURMD},
{"sockets-per-node", required_argument, 0, LONG_OPT_SOCKETSPERNODE},
{"switches", required_argument, 0, LONG_OPT_REQ_SWITCH},
{"task-epilog", required_argument, 0, LONG_OPT_TASK_EPILOG},
{"task-prolog", required_argument, 0, LONG_OPT_TASK_PROLOG},
{"tasks-per-node", required_argument, 0, LONG_OPT_NTASKSPERNODE},
{"test-only", no_argument, 0, LONG_OPT_TEST_ONLY},
{"time-min", required_argument, 0, LONG_OPT_TIME_MIN},
{"threads-per-core", required_argument, 0, LONG_OPT_THREADSPERCORE},
{"tmp", required_argument, 0, LONG_OPT_TMP},
{"uid", required_argument, 0, LONG_OPT_UID},
{"usage", no_argument, 0, LONG_OPT_USAGE},
{"wckey", required_argument, 0, LONG_OPT_WCKEY},
{NULL, 0, 0, 0}
};
char *opt_string = "+A:B:c:C:d:D:e:Eg:hHi:I::jJ:kK::lL:m:n:N:"
"o:Op:P:qQr:RsS:t:T:uU:vVw:W:x:XZ";
char *pos_delimit;
bool ntasks_set_opt = false;
#ifdef HAVE_PTY_H
char *tmp_str;
#endif
struct option *optz = spank_option_table_create (long_options);
if (!optz) {
error("Unable to create option table");
exit(error_exit);
}
if (opt.progname == NULL)
opt.progname = xbasename(argv[0]);
else
error("opt.progname is already set.");
optind = 0;
while((opt_char = getopt_long(argc, argv, opt_string,
optz, &option_index)) != -1) {
switch (opt_char) {
case (int)'?':
fprintf(stderr,
"Try \"srun --help\" for more information\n");
exit(error_exit);
break;
case (int)'A':
case (int)'U': /* backwards compatibility */
xfree(opt.account);
opt.account = xstrdup(optarg);
break;
case (int)'B':
opt.extra_set = verify_socket_core_thread_count(
optarg,
&opt.sockets_per_node,
&opt.cores_per_socket,
&opt.threads_per_core,
&opt.cpu_bind_type);
if (opt.extra_set == false) {
error("invalid resource allocation -B `%s'",
optarg);
exit(error_exit);
}
break;
case (int)'c':
tmp_int = _get_int(optarg, "cpus-per-task", false);
if (opt.cpus_set && (tmp_int > opt.cpus_per_task)) {
info("Job step's --cpus-per-task value exceeds"
" that of job (%d > %d). Job step may "
"never run.", tmp_int, opt.cpus_per_task);
}
opt.cpus_set = true;
opt.cpus_per_task = tmp_int;
break;
case (int)'C':
xfree(opt.constraints);
opt.constraints = xstrdup(optarg);
break;
case (int)'d':
xfree(opt.dependency);
opt.dependency = xstrdup(optarg);
break;
case (int)'D':
opt.cwd_set = true;
xfree(opt.cwd);
if (is_full_path(optarg))
opt.cwd = xstrdup(optarg);
else
opt.cwd = make_full_path(optarg);
break;
case (int)'e':
if (opt.pty) {
fatal("--error incompatible with --pty "
"option");
exit(error_exit);
}
xfree(opt.efname);
if (strcasecmp(optarg, "none") == 0)
opt.efname = xstrdup("/dev/null");
else
opt.efname = xstrdup(optarg);
break;
case (int)'E':
opt.preserve_env = true;
break;
case (int)'g':
if (verify_geometry(optarg, opt.geometry))
exit(error_exit);
break;
case (int)'H':
opt.hold = true;
break;
case (int)'i':
if (opt.pty) {
fatal("--input incompatible with "
"--pty option");
exit(error_exit);
}
xfree(opt.ifname);
if (strcasecmp(optarg, "none") == 0)
opt.ifname = xstrdup("/dev/null");
else
opt.ifname = xstrdup(optarg);
break;
case (int)'I':
if (optarg)
opt.immediate = strtol(optarg, NULL, 10);
else
opt.immediate = DEFAULT_IMMEDIATE;
break;
case (int)'j':
opt.join = true;
break;
case (int)'J':
opt.job_name_set_cmd = true;
xfree(opt.job_name);
opt.job_name = xstrdup(optarg);
break;
case (int)'k':
opt.no_kill = true;
break;
case (int)'K':
if (optarg)
opt.kill_bad_exit = strtol(optarg, NULL, 10);
else
opt.kill_bad_exit = 1;
break;
case (int)'l':
opt.labelio = true;
break;
case 'L':
xfree(opt.licenses);
opt.licenses = xstrdup(optarg);
break;
case (int)'m':
opt.distribution = verify_dist_type(optarg,
&opt.plane_size);
if (opt.distribution == SLURM_DIST_UNKNOWN) {
error("distribution type `%s' "
"is not recognized", optarg);
exit(error_exit);
}
break;
case (int)'n':
ntasks_set_opt = true;
opt.ntasks_set = true;
opt.ntasks =
_get_int(optarg, "number of tasks", true);
break;
case (int)'N':
opt.nodes_set_opt =
get_resource_arg_range( optarg,
"requested node count",
&opt.min_nodes,
&opt.max_nodes, true );
if (opt.nodes_set_opt == false) {
error("invalid resource allocation -N `%s'",
optarg);
exit(error_exit);
} else
opt.nodes_set = opt.nodes_set_opt;
break;
case (int)'o':
if (opt.pty) {
error("--output incompatible with --pty "
"option");
exit(error_exit);
}
xfree(opt.ofname);
if (strcasecmp(optarg, "none") == 0)
opt.ofname = xstrdup("/dev/null");
else
opt.ofname = xstrdup(optarg);
break;
case (int)'O':
opt.overcommit = true;
break;
case (int)'p':
xfree(opt.partition);
opt.partition = xstrdup(optarg);
break;
case (int)'P':
verbose("-P option is deprecated, use -d instead");
xfree(opt.dependency);
opt.dependency = xstrdup(optarg);
break;
case (int)'q':
opt.quit_on_intr = true;
break;
case (int) 'Q':
opt.quiet++;
break;
case (int)'r':
opt.relative = _get_int(optarg, "relative", false);
opt.relative_set = true;
break;
case (int)'R':
opt.no_rotate = true;
break;
case (int)'s':
opt.shared = 1;
break;
case (int)'S':
opt.core_spec = _get_int(optarg, "core_spec", true);
opt.core_spec_set = true;
break;
case (int)'t':
xfree(opt.time_limit_str);
opt.time_limit_str = xstrdup(optarg);
break;
case (int)'T':
opt.max_threads =
_get_int(optarg, "max_threads", true);
pmi_server_max_threads(opt.max_threads);
break;
case (int)'u':
opt.unbuffered = true;
break;
case (int)'v':
_verbose++;
break;
case (int)'V':
print_slurm_version();
exit(0);
break;
case (int)'w':
xfree(opt.nodelist);
opt.nodelist = xstrdup(optarg);
break;
case (int)'W':
opt.max_wait = _get_int(optarg, "wait", false);
break;
case (int)'x':
xfree(opt.exc_nodes);
opt.exc_nodes = xstrdup(optarg);
if (!_valid_node_list(&opt.exc_nodes))
exit(error_exit);
break;
case (int)'X':
opt.disable_status = true;
break;
case (int)'Z':
opt.no_alloc = true;
uname(&name);
if (strcasecmp(name.sysname, "AIX") == 0)
opt.network = xstrdup("ip");
break;
case LONG_OPT_CONT:
opt.contiguous = true;
break;
case LONG_OPT_EXCLUSIVE:
opt.exclusive = true;
opt.shared = 0;
break;
case LONG_OPT_EXPORT:
xfree(opt.export_env);
opt.export_env = xstrdup(optarg);
break;
case LONG_OPT_CPU_BIND:
if (slurm_verify_cpu_bind(optarg, &opt.cpu_bind,
&opt.cpu_bind_type))
exit(error_exit);
break;
case LONG_OPT_LAUNCH_CMD:
opt.launch_cmd = true;
break;
case LONG_OPT_MEM_BIND:
if (slurm_verify_mem_bind(optarg, &opt.mem_bind,
&opt.mem_bind_type))
exit(error_exit);
break;
case LONG_OPT_MINCPUS:
opt.pn_min_cpus = _get_int(optarg, "mincpus", true);
break;
case LONG_OPT_MINCORES:
verbose("mincores option has been deprecated, use "
"cores-per-socket");
opt.cores_per_socket = _get_int(optarg,
"mincores", true);
if (opt.cores_per_socket < 0) {
error("invalid mincores constraint %s",
optarg);
exit(error_exit);
}
break;
case LONG_OPT_MINSOCKETS:
verbose("minsockets option has been deprecated, use "
"sockets-per-node");
opt.sockets_per_node = _get_int(optarg,
"minsockets",true);
if (opt.sockets_per_node < 0) {
error("invalid minsockets constraint %s",
optarg);
exit(error_exit);
}
break;
case LONG_OPT_MINTHREADS:
verbose("minthreads option has been deprecated, use "
"threads-per-core");
opt.threads_per_core = _get_int(optarg,
"minthreads",true);
if (opt.threads_per_core < 0) {
error("invalid minthreads constraint %s",
optarg);
exit(error_exit);
}
break;
case LONG_OPT_MEM:
opt.pn_min_memory = (int) str_to_mbytes(optarg);
if (opt.pn_min_memory < 0) {
error("invalid memory constraint %s",
optarg);
exit(error_exit);
}
break;
case LONG_OPT_MEM_PER_CPU:
opt.mem_per_cpu = (int) str_to_mbytes(optarg);
if (opt.mem_per_cpu < 0) {
error("invalid memory constraint %s",
optarg);
exit(error_exit);
}
break;
case LONG_OPT_MPI:
xfree(mpi_type);
mpi_type = xstrdup(optarg);
if (mpi_hook_client_init((char *)optarg)
== SLURM_ERROR) {
error("\"--mpi=%s\" -- long invalid MPI type, "
"--mpi=list for acceptable types.",
optarg);
exit(error_exit);
}
mpi_initialized = true;
break;
case LONG_OPT_RESV_PORTS:
if (optarg)
opt.resv_port_cnt = strtol(optarg, NULL, 10);
else
opt.resv_port_cnt = 0;
break;
case LONG_OPT_TMP:
opt.pn_min_tmp_disk = str_to_mbytes(optarg);
if (opt.pn_min_tmp_disk < 0) {
error("invalid tmp value %s", optarg);
exit(error_exit);
}
break;
case LONG_OPT_JOBID:
opt.jobid = _get_int(optarg, "jobid", true);
opt.jobid_set = true;
break;
case LONG_OPT_TIMEO:
opt.msg_timeout =
_get_int(optarg, "msg-timeout", true);
break;
case LONG_OPT_LAUNCH:
opt.max_launch_time =
_get_int(optarg, "max-launch-time", true);
break;
case LONG_OPT_XTO:
opt.max_exit_timeout =
_get_int(optarg, "max-exit-timeout", true);
break;
case LONG_OPT_UID:
if (opt.euid != (uid_t) -1) {
error("duplicate --uid option");
exit(error_exit);
}
if (uid_from_string (optarg, &opt.euid) < 0) {
error("--uid=\"%s\" invalid", optarg);
exit(error_exit);
}
break;
case LONG_OPT_GID:
if (opt.egid != (gid_t) -1) {
error("duplicate --gid option");
exit(error_exit);
}
if (gid_from_string (optarg, &opt.egid) < 0) {
error("--gid=\"%s\" invalid", optarg);
exit(error_exit);
}
break;
case LONG_OPT_DEBUG_SLURMD:
if (isdigit(optarg[0]))
opt.slurmd_debug =
_get_int(optarg, "slurmd-debug", false);
else
opt.slurmd_debug = log_string2num(optarg);
break;
case LONG_OPT_DEBUG_TS:
opt.debugger_test = true;
/* make other parameters look like debugger
* is really attached */
opt.parallel_debug = true;
opt.max_launch_time = 120;
opt.max_threads = 1;
pmi_server_max_threads(opt.max_threads);
opt.msg_timeout = 15;
break;
case 'h':
case LONG_OPT_HELP:
_help();
exit(0);
case LONG_OPT_USAGE:
_usage();
exit(0);
case LONG_OPT_CONNTYPE:
verify_conn_type(optarg, opt.conn_type);
break;
case LONG_OPT_TEST_ONLY:
opt.test_only = true;
break;
case LONG_OPT_NETWORK:
xfree(opt.network);
opt.network = xstrdup(optarg);
setenv("SLURM_NETWORK", opt.network, 1);
opt.network_set_env = false;
break;
case LONG_OPT_PROPAGATE:
xfree(opt.propagate);
if (optarg)
opt.propagate = xstrdup(optarg);
else
opt.propagate = xstrdup("ALL");
break;
case LONG_OPT_PROLOG:
xfree(opt.prolog);
opt.prolog = xstrdup(optarg);
break;
case LONG_OPT_EPILOG:
xfree(opt.epilog);
opt.epilog = xstrdup(optarg);
break;
case LONG_OPT_BEGIN:
opt.begin = parse_time(optarg, 0);
if (errno == ESLURM_INVALID_TIME_VALUE) {
error("Invalid time specification %s",
optarg);
exit(error_exit);
}
break;
case LONG_OPT_MAIL_TYPE:
opt.mail_type |= parse_mail_type(optarg);
if (opt.mail_type == 0) {
error("--mail-type=%s invalid", optarg);
exit(error_exit);
}
break;
case LONG_OPT_MAIL_USER:
xfree(opt.mail_user);
opt.mail_user = xstrdup(optarg);
break;
case LONG_OPT_TASK_PROLOG:
xfree(opt.task_prolog);
opt.task_prolog = xstrdup(optarg);
break;
case LONG_OPT_TASK_EPILOG:
xfree(opt.task_epilog);
opt.task_epilog = xstrdup(optarg);
break;
case LONG_OPT_NICE:
if (optarg)
opt.nice = strtol(optarg, NULL, 10);
else
opt.nice = 100;
if (abs(opt.nice) > NICE_OFFSET) {
error("Invalid nice value, must be between "
"-%d and %d", NICE_OFFSET, NICE_OFFSET);
exit(error_exit);
}
if (opt.nice < 0) {
uid_t my_uid = getuid();
if ((my_uid != 0) &&
(my_uid != slurm_get_slurm_user_id())) {
error("Nice value must be non-negative, "
"value ignored");
opt.nice = 0;
}
}
break;
case LONG_OPT_PRIORITY: {
long long priority = strtoll(optarg, NULL, 10);
if (priority < 0) {
error("Priority must be >= 0");
exit(error_exit);
}
if (priority >= NO_VAL) {
error("Priority must be < %i", NO_VAL);
exit(error_exit);
}
opt.priority = priority;
break;
}
case LONG_OPT_MULTI:
opt.multi_prog = true;
break;
case LONG_OPT_COMMENT:
xfree(opt.comment);
opt.comment = xstrdup(optarg);
break;
case LONG_OPT_QOS:
xfree(opt.qos);
opt.qos = xstrdup(optarg);
break;
case LONG_OPT_SOCKETSPERNODE:
max_val = 0;
get_resource_arg_range( optarg, "sockets-per-node",
&opt.sockets_per_node,
&max_val, true );
if ((opt.sockets_per_node == 1) &&
(max_val == INT_MAX))
opt.sockets_per_node = NO_VAL;
break;
case LONG_OPT_CORESPERSOCKET:
max_val = 0;
get_resource_arg_range( optarg, "cores-per-socket",
&opt.cores_per_socket,
&max_val, true );
if ((opt.cores_per_socket == 1) &&
(max_val == INT_MAX))
opt.cores_per_socket = NO_VAL;
break;
case LONG_OPT_THREADSPERCORE:
max_val = 0;
get_resource_arg_range( optarg, "threads-per-core",
&opt.threads_per_core,
&max_val, true );
if ((opt.threads_per_core == 1) &&
(max_val == INT_MAX))
opt.threads_per_core = NO_VAL;
break;
case LONG_OPT_NTASKSPERNODE:
opt.ntasks_per_node = _get_int(optarg, "ntasks-per-node",
true);
break;
case LONG_OPT_NTASKSPERSOCKET:
opt.ntasks_per_socket = _get_int(optarg,
"ntasks-per-socket", true);
break;
case LONG_OPT_NTASKSPERCORE:
opt.ntasks_per_core = _get_int(optarg, "ntasks-per-core",
true);
break;
case LONG_OPT_HINT:
/* Keep after other options filled in */
if (verify_hint(optarg,
&opt.sockets_per_node,
&opt.cores_per_socket,
&opt.threads_per_core,
&opt.ntasks_per_core,
&opt.cpu_bind_type)) {
exit(error_exit);
}
break;
case LONG_OPT_BLRTS_IMAGE:
xfree(opt.blrtsimage);
opt.blrtsimage = xstrdup(optarg);
break;
case LONG_OPT_LINUX_IMAGE:
xfree(opt.linuximage);
opt.linuximage = xstrdup(optarg);
break;
case LONG_OPT_MLOADER_IMAGE:
xfree(opt.mloaderimage);
opt.mloaderimage = xstrdup(optarg);
break;
case LONG_OPT_RAMDISK_IMAGE:
xfree(opt.ramdiskimage);
opt.ramdiskimage = xstrdup(optarg);
break;
case LONG_OPT_REBOOT:
opt.reboot = true;
break;
case LONG_OPT_GET_USER_ENV:
error("--get-user-env is no longer supported in srun, "
"use sbatch");
break;
case LONG_OPT_PTY:
#ifdef HAVE_PTY_H
opt.pty = true;
opt.unbuffered = true; /* implicit */
if (opt.ifname)
tmp_str = "--input";
else if (opt.ofname)
tmp_str = "--output";
else if (opt.efname)
tmp_str = "--error";
else
tmp_str = NULL;
if (tmp_str) {
error("%s incompatible with --pty option",
tmp_str);
exit(error_exit);
}
#else
error("--pty not currently supported on this system "
"type, ignoring option");
#endif
break;
case LONG_OPT_CHECKPOINT:
xfree(opt.ckpt_interval_str);
opt.ckpt_interval_str = xstrdup(optarg);
break;
case LONG_OPT_OPEN_MODE:
if ((optarg[0] == 'a') || (optarg[0] == 'A'))
opt.open_mode = OPEN_MODE_APPEND;
else if ((optarg[0] == 't') || (optarg[0] == 'T'))
opt.open_mode = OPEN_MODE_TRUNCATE;
else {
error("Invalid --open-mode argument: %s. Ignored",
optarg);
}
break;
case LONG_OPT_ACCTG_FREQ:
xfree(opt.acctg_freq);
opt.acctg_freq = xstrdup(optarg);
break;
case LONG_OPT_CPU_FREQ:
if (cpu_freq_verify_param(optarg, &opt.cpu_freq))
error("Invalid --cpu-freq argument: %s. Ignored",
optarg);
break;
case LONG_OPT_WCKEY:
xfree(opt.wckey);
opt.wckey = xstrdup(optarg);
break;
case LONG_OPT_PROFILE:
opt.profile = acct_gather_profile_from_string(optarg);
break;
case LONG_OPT_RESERVATION:
xfree(opt.reservation);
opt.reservation = xstrdup(optarg);
break;
case LONG_OPT_LAUNCHER_OPTS:
xfree(opt.launcher_opts);
opt.launcher_opts = xstrdup(optarg);
break;
case LONG_OPT_CHECKPOINT_DIR:
xfree(opt.ckpt_dir);
opt.ckpt_dir = xstrdup(optarg);
break;
case LONG_OPT_RESTART_DIR:
xfree(opt.restart_dir);
opt.restart_dir = xstrdup(optarg);
break;
case LONG_OPT_SIGNAL:
if (get_signal_opts(optarg, &opt.warn_signal,
&opt.warn_time, &opt.warn_flags)) {
error("Invalid signal specification: %s",
optarg);
exit(error_exit);
}
break;
case LONG_OPT_TIME_MIN:
xfree(opt.time_min_str);
opt.time_min_str = xstrdup(optarg);
break;
case LONG_OPT_GRES:
if (!strcasecmp(optarg, "help") ||
!strcasecmp(optarg, "list")) {
print_gres_help();
exit(0);
}
xfree(opt.gres);
opt.gres = xstrdup(optarg);
break;
case LONG_OPT_ALPS:
error("Not running ALPS. --alps option ignored.");
break;
case LONG_OPT_REQ_SWITCH:
pos_delimit = strstr(optarg,"@");
if (pos_delimit != NULL) {
pos_delimit[0] = '\0';
pos_delimit++;
opt.wait4switch = time_str2secs(pos_delimit);
}
opt.req_switch = _get_int(optarg, "switches",
true);
break;
default:
if (spank_process_option (opt_char, optarg) < 0) {
exit(error_exit);
}
}
}
/* This means --ntasks was read from the environment. We will override
* it with what the user specified in the hostlist. POE launched
* jobs excluded (they have the SLURM_STARTED_STEP env var set). */
if (!ntasks_set_opt && (opt.distribution == SLURM_DIST_ARBITRARY) &&
!getenv("SLURM_STARTED_STEP"))
opt.ntasks_set = false;
spank_option_table_destroy (optz);
}
/*
* _opt_args() : set options via commandline args and popt
*/
static void _opt_args(int argc, char **argv)
{
int i, command_pos = 0, command_args = 0;
char **rest = NULL;
_set_options(argc, argv);
if ((opt.pn_min_memory > -1) && (opt.mem_per_cpu > -1)) {
if (opt.pn_min_memory < opt.mem_per_cpu) {
info("mem < mem-per-cpu - resizing mem to be equal "
"to mem-per-cpu");
opt.pn_min_memory = opt.mem_per_cpu;
}
}
/* Check to see if user has specified enough resources to
* satisfy the plane distribution with the specified
* plane_size.
* if (n/plane_size < N) and ((N-1) * plane_size >= n) -->
* problem Simple check will not catch all the problem/invalid
* cases.
* The limitations of the plane distribution in the cons_res
* environment are more extensive and are documented in the
* SLURM reference guide. */
if (opt.distribution == SLURM_DIST_PLANE && opt.plane_size) {
if ((opt.ntasks/opt.plane_size) < opt.min_nodes) {
if (((opt.min_nodes-1)*opt.plane_size) >= opt.ntasks) {
#if (0)
info("Too few processes ((n/plane_size) %d < N %d) "
"and ((N-1)*(plane_size) %d >= n %d)) ",
opt.ntasks/opt.plane_size, opt.min_nodes,
(opt.min_nodes-1)*opt.plane_size, opt.ntasks);
#endif
error("Too few processes for the requested "
"{plane,node} distribution");
exit(error_exit);
}
}
}
if (opt.pty) {
char *launch_type = slurm_get_launch_type();
if (strcmp(launch_type, "launch/slurm")) {
error("--pty not currently supported with %s "
"configuration, ignoring option", launch_type);
opt.pty = false;
}
xfree(launch_type);
}
#ifdef HAVE_AIX
if (opt.network == NULL) {
opt.network = "us,sn_all,bulk_xfer";
setenv("SLURM_NETWORK", opt.network, 1);
}
#endif
#ifdef HAVE_NATIVE_CRAY
/* only fatal on the allocation */
if (opt.network && opt.shared && (opt.jobid == NO_VAL))
fatal("Requesting network performance counters requires "
"exclusive access. Please add the --exclusive option "
"to your request.");
if (opt.network)
setenv("SLURM_NETWORK", opt.network, 1);
#endif
if (opt.dependency)
setenvfs("SLURM_JOB_DEPENDENCY=%s", opt.dependency);
if (opt.nodelist && (!opt.test_only)) {
#ifdef HAVE_BG
info("\tThe nodelist option should only be used if\n"
"\tthe block you are asking for can be created.\n"
"\tIt should also include all the midplanes you\n"
"\twant to use, partial lists will not work correctly.\n"
"\tPlease consult smap before using this option\n"
"\tor your job may be stuck with no way to run.");
#endif
}
opt.argc = 0;
if (optind < argc) {
rest = argv + optind;
while (rest[opt.argc] != NULL)
opt.argc++;
}
command_args = opt.argc;
if (!rest && !opt.test_only)
fatal("No command given to execute.");
#if defined HAVE_BG && !defined HAVE_BG_L_P
/* Since this is needed on an emulated system don't put this code in
* the launch plugin.
*/
bg_figure_nodes_tasks(&opt.min_nodes, &opt.max_nodes,
&opt.ntasks_per_node, &opt.ntasks_set,
&opt.ntasks, opt.nodes_set, opt.nodes_set_opt,
opt.overcommit, 1);
#endif
if (launch_init() != SLURM_SUCCESS) {
fatal("Unable to load launch plugin, check LaunchType "
"configuration");
}
command_pos = launch_g_setup_srun_opt(rest);
/* Since this is needed on an emulated system don't put this code in
* the launch plugin.
*/
#if defined HAVE_BG && !defined HAVE_BG_L_P
if (opt.test_only && !opt.jobid_set && (opt.jobid != NO_VAL)) {
/* Do not perform allocate test, only disable use of "runjob" */
opt.test_only = false;
}
#endif
/* make sure we have allocated things correctly */
xassert((command_pos + command_args) <= opt.argc);
for (i = command_pos; i < opt.argc; i++) {
if (!rest || !rest[i-command_pos])
break;
opt.argv[i] = xstrdup(rest[i-command_pos]);
}
opt.argv[i] = NULL; /* End of argv's (for possible execv) */
#if defined HAVE_BG && !defined HAVE_BG_L_P
/* BGQ's runjob command required a fully qualified path */
if (!launch_g_handle_multi_prog_verify(command_pos) &&
(opt.argc > command_pos)) {
char *fullpath;
if ((fullpath = search_path(opt.cwd,
opt.argv[command_pos],
false, X_OK))) {
xfree(opt.argv[command_pos]);
opt.argv[command_pos] = fullpath;
}
}
#else
(void) launch_g_handle_multi_prog_verify(command_pos);
#endif
#if 0
for (i=0; i<opt.argc; i++)
info("%d is '%s'", i, opt.argv[i]);
#endif
}
/*
* _opt_verify : perform some post option processing verification
*
*/
static bool _opt_verify(void)
{
bool verified = true;
hostlist_t hl = NULL;
int hl_cnt = 0;
/*
* Do not set slurmd debug level higher than DEBUG2,
* as DEBUG3 is used for slurmd IO operations, which
* are not appropriate to be sent back to srun. (because
* these debug messages cause the generation of more
* debug messages ad infinitum)
*/
if (opt.slurmd_debug + LOG_LEVEL_ERROR > LOG_LEVEL_DEBUG2) {
opt.slurmd_debug = LOG_LEVEL_DEBUG2 - LOG_LEVEL_ERROR;
info("Using srun's max debug increment of %d",
opt.slurmd_debug);
}
if (opt.quiet && _verbose) {
error ("don't specify both --verbose (-v) and --quiet (-Q)");
verified = false;
}
if (opt.no_alloc && !opt.nodelist) {
error("must specify a node list with -Z, --no-allocate.");
verified = false;
}
if (opt.no_alloc && opt.exc_nodes) {
error("can not specify --exclude list with -Z, --no-allocate.");
verified = false;
}
if (opt.no_alloc && opt.relative_set) {
error("do not specify -r,--relative with -Z,--no-allocate.");
verified = false;
}
if (opt.relative_set && (opt.exc_nodes || opt.nodelist)) {
error("-r,--relative not allowed with "
"-w,--nodelist or -x,--exclude.");
verified = false;
}
if (opt.cpus_set && (opt.pn_min_cpus < opt.cpus_per_task))
opt.pn_min_cpus = opt.cpus_per_task;
if (opt.argc > 0)
opt.cmd_name = base_name(opt.argv[0]);
if (!opt.nodelist) {
if ((opt.nodelist = xstrdup(getenv("SLURM_HOSTFILE")))) {
/* make sure the file being read in has a / in
it to make sure it is a file in the
valid_node_list function */
if (!strstr(opt.nodelist, "/")) {
char *add_slash = xstrdup("./");
xstrcat(add_slash, opt.nodelist);
xfree(opt.nodelist);
opt.nodelist = add_slash;
}
opt.distribution = SLURM_DIST_ARBITRARY;
opt.hostfile = xstrdup(opt.nodelist);
if (!_valid_node_list(&opt.nodelist)) {
error("Failure getting NodeNames from "
"hostfile");
exit(error_exit);
} else {
debug("loaded nodes (%s) from hostfile",
opt.nodelist);
}
}
} else {
if (strstr(opt.nodelist, "/"))
opt.hostfile = xstrdup(opt.nodelist);
if (!_valid_node_list(&opt.nodelist))
exit(error_exit);
}
/* set up the proc and node counts based on the arbitrary list
of nodes */
if ((opt.distribution == SLURM_DIST_ARBITRARY)
&& (!opt.nodes_set || !opt.ntasks_set)) {
hostlist_t hl = hostlist_create(opt.nodelist);
if (!opt.ntasks_set) {
opt.ntasks_set = true;
opt.ntasks = hostlist_count(hl);
}
if (!opt.nodes_set) {
opt.nodes_set = true;
opt.nodes_set_opt = true;
hostlist_uniq(hl);
opt.min_nodes = opt.max_nodes = hostlist_count(hl);
}
hostlist_destroy(hl);
}
/* now if max is set make sure we have <= max_nodes in the
* nodelist but only if it isn't arbitrary since the user has
* laid it out how it should be so don't mess with it print an
* error later if it doesn't work the way they wanted */
if (opt.max_nodes && opt.nodelist
&& opt.distribution != SLURM_DIST_ARBITRARY) {
hostlist_t hl = hostlist_create(opt.nodelist);
int count = hostlist_count(hl);
if (count > opt.max_nodes) {
int i = 0;
error("Required nodelist includes more nodes than "
"permitted by max-node count (%d > %d). "
"Eliminating nodes from the nodelist.",
count, opt.max_nodes);
count -= opt.max_nodes;
while(i<count) {
char *name = hostlist_pop(hl);
if (name)
free(name);
else
break;
i++;
}
xfree(opt.nodelist);
opt.nodelist = hostlist_ranged_string_xmalloc(hl);
}
hostlist_destroy(hl);
}
if ((opt.argc == 0) && (opt.test_only == false)) {
error("must supply remote command");
verified = false;
}
/* check for realistic arguments */
if (opt.ntasks <= 0) {
error("invalid number of tasks (-n %d)", opt.ntasks);
verified = false;
}
if (opt.cpus_set && (opt.cpus_per_task <= 0)) {
error("invalid number of cpus per task (-c %d)",
opt.cpus_per_task);
verified = false;
}
if ((opt.min_nodes <= 0) || (opt.max_nodes < 0) ||
(opt.max_nodes && (opt.min_nodes > opt.max_nodes))) {
error("invalid number of nodes (-N %d-%d)",
opt.min_nodes, opt.max_nodes);
verified = false;
}
#if defined(HAVE_BGL)
if (opt.blrtsimage && strchr(opt.blrtsimage, ' ')) {
error("invalid BlrtsImage given '%s'", opt.blrtsimage);
verified = false;
}
#endif
if (opt.linuximage && strchr(opt.linuximage, ' ')) {
#ifdef HAVE_BGL
error("invalid LinuxImage given '%s'", opt.linuximage);
#else
error("invalid CnloadImage given '%s'", opt.linuximage);
#endif
verified = false;
}
if (opt.mloaderimage && strchr(opt.mloaderimage, ' ')) {
error("invalid MloaderImage given '%s'", opt.mloaderimage);
verified = false;
}
if (opt.ramdiskimage && strchr(opt.ramdiskimage, ' ')) {
#ifdef HAVE_BGL
error("invalid RamDiskImage given '%s'", opt.ramdiskimage);
#else
error("invalid IoloadImage given '%s'", opt.ramdiskimage);
#endif
verified = false;
}
/* bound max_threads/cores from ntasks_cores/sockets */
if (opt.ntasks_per_core > 0) {
/* if cpu_bind_type doesn't already have a auto pref,
* choose the level based on the level of ntasks
*/
if (!(opt.cpu_bind_type & (CPU_BIND_TO_SOCKETS |
CPU_BIND_TO_CORES |
CPU_BIND_TO_THREADS |
CPU_BIND_TO_LDOMS |
CPU_BIND_TO_BOARDS))) {
opt.cpu_bind_type |= CPU_BIND_TO_CORES;
}
}
if (opt.ntasks_per_socket > 0) {
/* if cpu_bind_type doesn't already have a auto pref,
* choose the level based on the level of ntasks
*/
if (!(opt.cpu_bind_type & (CPU_BIND_TO_SOCKETS |
CPU_BIND_TO_CORES |
CPU_BIND_TO_THREADS |
CPU_BIND_TO_LDOMS |
CPU_BIND_TO_BOARDS))) {
opt.cpu_bind_type |= CPU_BIND_TO_SOCKETS;
}
}
/* massage the numbers */
if (opt.nodelist) {
hl = hostlist_create(opt.nodelist);
if (!hl) {
error("memory allocation failure");
exit(error_exit);
}
hostlist_uniq(hl);
hl_cnt = hostlist_count(hl);
if (opt.nodes_set)
opt.min_nodes = MAX(hl_cnt, opt.min_nodes);
else
opt.min_nodes = hl_cnt;
}
if ((opt.nodes_set || opt.extra_set) &&
((opt.min_nodes == opt.max_nodes) || (opt.max_nodes == 0)) &&
!opt.ntasks_set) {
/* 1 proc / node default */
opt.ntasks = opt.min_nodes;
/* 1 proc / min_[socket * core * thread] default */
if ((opt.sockets_per_node != NO_VAL) &&
(opt.cores_per_socket != NO_VAL) &&
(opt.threads_per_core != NO_VAL)) {
opt.ntasks *= opt.sockets_per_node;
opt.ntasks *= opt.cores_per_socket;
opt.ntasks *= opt.threads_per_core;
opt.ntasks_set = true;
} else if (opt.ntasks_per_node != NO_VAL)
opt.ntasks *= opt.ntasks_per_node;
/* massage the numbers */
if (opt.nodelist) {
if (hl) /* possibly built above */
hostlist_destroy(hl);
hl = hostlist_create(opt.nodelist);
if (!hl) {
error("memory allocation failure");
exit(error_exit);
}
if (opt.distribution == SLURM_DIST_ARBITRARY
&& !opt.ntasks_set) {
opt.ntasks = hostlist_count(hl);
opt.ntasks_set = true;
}
hostlist_uniq(hl);
hl_cnt = hostlist_count(hl);
if (opt.nodes_set)
opt.min_nodes = MAX(hl_cnt, opt.min_nodes);
else
opt.min_nodes = hl_cnt;
/* Don't destroy hl here since it may be used later */
}
} else if (opt.nodes_set && opt.ntasks_set) {
/*
* Make sure that the number of
* max_nodes is <= number of tasks
*/
if (opt.ntasks < opt.max_nodes)
opt.max_nodes = opt.ntasks;
/*
* make sure # of procs >= min_nodes
*/
if ((opt.ntasks < opt.min_nodes) && (opt.ntasks > 0)) {
info ("Warning: can't run %d processes on %d "
"nodes, setting nnodes to %d",
opt.ntasks, opt.min_nodes, opt.ntasks);
opt.min_nodes = opt.ntasks;
opt.nodes_set_opt = true;
if (opt.max_nodes
&& (opt.min_nodes > opt.max_nodes) )
opt.max_nodes = opt.min_nodes;
if (hl_cnt > opt.min_nodes) {
int del_cnt, i;
char *host;
del_cnt = hl_cnt - opt.min_nodes;
for (i=0; i<del_cnt; i++) {
host = hostlist_pop(hl);
free(host);
}
xfree(opt.nodelist);
opt.nodelist =
hostlist_ranged_string_xmalloc(hl);
}
}
} /* else if (opt.ntasks_set && !opt.nodes_set) */
if (hl)
hostlist_destroy(hl);
if (opt.max_threads <= 0) { /* set default */
error("Thread value invalid, reset to 1");
opt.max_threads = 1;
pmi_server_max_threads(opt.max_threads);
} else if (opt.max_threads > MAX_THREADS) {
error("Thread value exceeds defined limit, reset to %d",
MAX_THREADS);
}
if (opt.labelio && opt.unbuffered) {
error("Do not specify both -l (--label) and "
"-u (--unbuffered)");
exit(error_exit);
}
/*
* --wait always overrides hidden max_exit_timeout
*/
if (opt.max_wait)
opt.max_exit_timeout = opt.max_wait;
if (opt.time_limit_str) {
opt.time_limit = time_str2mins(opt.time_limit_str);
if ((opt.time_limit < 0) && (opt.time_limit != INFINITE)) {
error("Invalid time limit specification");
exit(error_exit);
}
if (opt.time_limit == 0)
opt.time_limit = INFINITE;
}
if (opt.time_min_str) {
opt.time_min = time_str2mins(opt.time_min_str);
if ((opt.time_min < 0) && (opt.time_min != INFINITE)) {
error("Invalid time-min specification");
exit(error_exit);
}
if (opt.time_min == 0)
opt.time_min = INFINITE;
}
if (opt.ckpt_interval_str) {
opt.ckpt_interval = time_str2mins(opt.ckpt_interval_str);
if ((opt.ckpt_interval < 0) &&
(opt.ckpt_interval != INFINITE)) {
error("Invalid checkpoint interval specification");
exit(error_exit);
}
}
if (! opt.ckpt_dir)
opt.ckpt_dir = xstrdup(opt.cwd);
if ((opt.euid != (uid_t) -1) && (opt.euid != opt.uid))
opt.uid = opt.euid;
if ((opt.egid != (gid_t) -1) && (opt.egid != opt.gid))
opt.gid = opt.egid;
if (slurm_verify_cpu_bind(NULL, &opt.cpu_bind,
&opt.cpu_bind_type))
exit(error_exit);
if (!mpi_initialized) {
mpi_type = slurm_get_mpi_default();
(void) mpi_hook_client_init(NULL);
}
if ((opt.resv_port_cnt == NO_VAL) && !strcmp(mpi_type, "openmpi"))
opt.resv_port_cnt = 0;
xfree(mpi_type);
return verified;
}
/* Initialize the spank_job_env based upon environment variables set
* via salloc or sbatch commands */
extern void init_spank_env(void)
{
int i;
char *name, *eq, *value;
if (environ == NULL)
return;
for (i=0; environ[i]; i++) {
if (strncmp(environ[i], "SLURM_SPANK_", 12))
continue;
name = xstrdup(environ[i] + 12);
eq = strchr(name, (int)'=');
if (eq == NULL) {
xfree(name);
break;
}
eq[0] = '\0';
value = eq + 1;
spank_set_job_env(name, value, 1);
xfree(name);
}
}
/* Functions used by SPANK plugins to read and write job environment
* variables for use within job's Prolog and/or Epilog */
extern char *spank_get_job_env(const char *name)
{
int i, len;
char *tmp_str = NULL;
if ((name == NULL) || (name[0] == '\0') ||
(strchr(name, (int)'=') != NULL)) {
slurm_seterrno(EINVAL);
return NULL;
}
xstrcat(tmp_str, name);
xstrcat(tmp_str, "=");
len = strlen(tmp_str);
for (i=0; i<opt.spank_job_env_size; i++) {
if (strncmp(opt.spank_job_env[i], tmp_str, len))
continue;
xfree(tmp_str);
return (opt.spank_job_env[i] + len);
}
return NULL;
}
extern int spank_set_job_env(const char *name, const char *value,
int overwrite)
{
int i, len;
char *tmp_str = NULL;
if ((name == NULL) || (name[0] == '\0') ||
(strchr(name, (int)'=') != NULL)) {
slurm_seterrno(EINVAL);
return -1;
}
xstrcat(tmp_str, name);
xstrcat(tmp_str, "=");
len = strlen(tmp_str);
xstrcat(tmp_str, value);
for (i=0; i<opt.spank_job_env_size; i++) {
if (strncmp(opt.spank_job_env[i], tmp_str, len))
continue;
if (overwrite) {
xfree(opt.spank_job_env[i]);
opt.spank_job_env[i] = tmp_str;
} else
xfree(tmp_str);
return 0;
}
/* Need to add an entry */
opt.spank_job_env_size++;
xrealloc(opt.spank_job_env, sizeof(char *) * opt.spank_job_env_size);
opt.spank_job_env[i] = tmp_str;
return 0;
}
extern int spank_unset_job_env(const char *name)
{
int i, j, len;
char *tmp_str = NULL;
if ((name == NULL) || (name[0] == '\0') ||
(strchr(name, (int)'=') != NULL)) {
slurm_seterrno(EINVAL);
return -1;
}
xstrcat(tmp_str, name);
xstrcat(tmp_str, "=");
len = strlen(tmp_str);
for (i=0; i<opt.spank_job_env_size; i++) {
if (strncmp(opt.spank_job_env[i], tmp_str, len))
continue;
xfree(opt.spank_job_env[i]);
for (j=(i+1); j<opt.spank_job_env_size; i++, j++)
opt.spank_job_env[i] = opt.spank_job_env[j];
opt.spank_job_env_size--;
if (opt.spank_job_env_size == 0)
xfree(opt.spank_job_env);
return 0;
}
return 0; /* not found */
}
/* helper function for printing options
*
* warning: returns pointer to memory allocated on the stack.
*/
static char *print_constraints()
{
char *buf = xstrdup("");
if (opt.pn_min_cpus != NO_VAL)
xstrfmtcat(buf, "mincpus-per-node=%d ", opt.pn_min_cpus);
if (opt.pn_min_memory != NO_VAL)
xstrfmtcat(buf, "mem-per-node=%dM ", opt.pn_min_memory);
if (opt.mem_per_cpu != NO_VAL)
xstrfmtcat(buf, "mem-per-cpu=%dM ", opt.mem_per_cpu);
if (opt.pn_min_tmp_disk != NO_VAL)
xstrfmtcat(buf, "tmp-per-node=%ld ", opt.pn_min_tmp_disk);
if (opt.contiguous == true)
xstrcat(buf, "contiguous ");
if (opt.nodelist != NULL)
xstrfmtcat(buf, "nodelist=%s ", opt.nodelist);
if (opt.exc_nodes != NULL)
xstrfmtcat(buf, "exclude=%s ", opt.exc_nodes);
if (opt.constraints != NULL)
xstrfmtcat(buf, "constraints=`%s' ", opt.constraints);
return buf;
}
#define tf_(b) (b == true) ? "true" : "false"
static void _opt_list(void)
{
char *str;
info("defined options for program `%s'", opt.progname);
info("--------------- ---------------------");
info("user : `%s'", opt.user);
info("uid : %ld", (long) opt.uid);
info("gid : %ld", (long) opt.gid);
info("cwd : %s", opt.cwd);
info("ntasks : %d %s", opt.ntasks,
opt.ntasks_set ? "(set)" : "(default)");
if (opt.cpus_set)
info("cpus_per_task : %d", opt.cpus_per_task);
if (opt.max_nodes)
info("nodes : %d-%d", opt.min_nodes, opt.max_nodes);
else {
info("nodes : %d %s", opt.min_nodes,
opt.nodes_set ? "(set)" : "(default)");
}
info("jobid : %u %s", opt.jobid,
opt.jobid_set ? "(set)" : "(default)");
info("partition : %s",
opt.partition == NULL ? "default" : opt.partition);
info("profile : `%s'",
acct_gather_profile_to_string(opt.profile));
info("job name : `%s'", opt.job_name);
info("reservation : `%s'", opt.reservation);
info("wckey : `%s'", opt.wckey);
info("switches : %d", opt.req_switch);
info("wait-for-switches : %d", opt.wait4switch);
info("distribution : %s", format_task_dist_states(opt.distribution));
if (opt.distribution == SLURM_DIST_PLANE)
info("plane size : %u", opt.plane_size);
info("cpu_bind : %s",
opt.cpu_bind == NULL ? "default" : opt.cpu_bind);
info("mem_bind : %s",
opt.mem_bind == NULL ? "default" : opt.mem_bind);
info("cpu_freq : %u", opt.cpu_freq);
info("verbose : %d", _verbose);
info("slurmd_debug : %d", opt.slurmd_debug);
if (opt.immediate <= 1)
info("immediate : %s", tf_(opt.immediate));
else
info("immediate : %d secs", (opt.immediate - 1));
info("label output : %s", tf_(opt.labelio));
info("unbuffered IO : %s", tf_(opt.unbuffered));
info("overcommit : %s", tf_(opt.overcommit));
info("threads : %d", opt.max_threads);
if (opt.time_limit == INFINITE)
info("time_limit : INFINITE");
else if (opt.time_limit != NO_VAL)
info("time_limit : %d", opt.time_limit);
if (opt.time_min != NO_VAL)
info("time_min : %d", opt.time_min);
if (opt.ckpt_interval)
info("checkpoint : %d mins", opt.ckpt_interval);
info("checkpoint_dir : %s", opt.ckpt_dir);
if (opt.restart_dir)
info("restart_dir : %s", opt.restart_dir);
info("wait : %d", opt.max_wait);
if (opt.nice)
info("nice : %d", opt.nice);
info("account : %s", opt.account);
info("comment : %s", opt.comment);
info("dependency : %s", opt.dependency);
if (opt.gres)
info("gres : %s", opt.gres);
info("exclusive : %s", tf_(opt.exclusive));
info("qos : %s", opt.qos);
if (opt.shared != (uint16_t) NO_VAL)
info("shared : %u", opt.shared);
str = print_constraints();
info("constraints : %s", str);
xfree(str);
if (opt.conn_type[0] != (uint16_t) NO_VAL) {
str = conn_type_string_full(opt.conn_type);
info("conn_type : %s", str);
xfree(str);
}
str = print_geometry(opt.geometry);
info("geometry : %s", str);
xfree(str);
info("reboot : %s", opt.reboot ? "no" : "yes");
info("rotate : %s", opt.no_rotate ? "yes" : "no");
info("preserve_env : %s", tf_(opt.preserve_env));
#ifdef HAVE_BGL
if (opt.blrtsimage)
info("BlrtsImage : %s", opt.blrtsimage);
#endif
if (opt.linuximage)
#ifdef HAVE_BGL
info("LinuxImage : %s", opt.linuximage);
#else
info("CnloadImage : %s", opt.linuximage);
#endif
if (opt.mloaderimage)
info("MloaderImage : %s", opt.mloaderimage);
if (opt.ramdiskimage)
#ifdef HAVE_BGL
info("RamDiskImage : %s", opt.ramdiskimage);
#else
info("IoloadImage : %s", opt.ramdiskimage);
#endif
info("network : %s", opt.network);
info("propagate : %s",
opt.propagate == NULL ? "NONE" : opt.propagate);
if (opt.begin) {
char time_str[32];
slurm_make_time_str(&opt.begin, time_str, sizeof(time_str));
info("begin : %s", time_str);
}
info("prolog : %s", opt.prolog);
info("epilog : %s", opt.epilog);
info("mail_type : %s", print_mail_type(opt.mail_type));
info("mail_user : %s", opt.mail_user);
info("task_prolog : %s", opt.task_prolog);
info("task_epilog : %s", opt.task_epilog);
info("multi_prog : %s", opt.multi_prog ? "yes" : "no");
info("sockets-per-node : %d", opt.sockets_per_node);
info("cores-per-socket : %d", opt.cores_per_socket);
info("threads-per-core : %d", opt.threads_per_core);
info("ntasks-per-node : %d", opt.ntasks_per_node);
info("ntasks-per-socket : %d", opt.ntasks_per_socket);
info("ntasks-per-core : %d", opt.ntasks_per_core);
info("plane_size : %u", opt.plane_size);
info("core-spec : %d", opt.core_spec);
if (opt.resv_port_cnt != NO_VAL)
info("resv_port_cnt : %d", opt.resv_port_cnt);
str = print_commandline(opt.argc, opt.argv);
info("remote command : `%s'", str);
xfree(str);
}
/* Determine if srun is under the control of a parallel debugger or not */
static bool _under_parallel_debugger (void)
{
#if defined HAVE_BG_FILES && !defined HAVE_BG_L_P
/* Use symbols from the runjob.so library provided by IBM.
* Do NOT use debugger symbols local to the srun command */
return false;
#else
return (MPIR_being_debugged != 0);
#endif
}
static void _usage(void)
{
printf(
"Usage: srun [-N nnodes] [-n ntasks] [-i in] [-o out] [-e err]\n"
" [-c ncpus] [-r n] [-p partition] [--hold] [-t minutes]\n"
" [-D path] [--immediate[=secs]] [--overcommit] [--no-kill]\n"
" [--share] [--label] [--unbuffered] [-m dist] [-J jobname]\n"
" [--jobid=id] [--verbose] [--slurmd_debug=#] [--gres=list]\n"
" [-T threads] [-W sec] [--checkpoint=time]\n"
" [--checkpoint-dir=dir] [--licenses=names]\n"
" [--restart-dir=dir] [--qos=qos] [--time-min=minutes]\n"
" [--contiguous] [--mincpus=n] [--mem=MB] [--tmp=MB] [-C list]\n"
" [--mpi=type] [--account=name] [--dependency=type:jobid]\n"
" [--launch-cmd] [--launcher-opts=options]\n"
" [--kill-on-bad-exit] [--propagate[=rlimits] [--comment=name]\n"
" [--cpu_bind=...] [--mem_bind=...] [--network=type]\n"
" [--ntasks-per-node=n] [--ntasks-per-socket=n] [reservation=name]\n"
" [--ntasks-per-core=n] [--mem-per-cpu=MB] [--preserve-env]\n"
" [--profile=...]\n"
#ifdef HAVE_BG /* Blue gene specific options */
#ifdef HAVE_BG_L_P
" [--geometry=XxYxZ] "
#else
" [--export=NONE] [--geometry=AxXxYxZ] "
#endif
"[--conn-type=type] [--no-rotate] [--reboot]\n"
#ifdef HAVE_BGL
" [--blrts-image=path] [--linux-image=path]\n"
" [--mloader-image=path] [--ramdisk-image=path]\n"
#else
" [--cnload-image=path]\n"
" [--mloader-image=path] [--ioload-image=path]\n"
#endif
#endif
" [--mail-type=type] [--mail-user=user] [--nice[=value]]\n"
" [--prolog=fname] [--epilog=fname]\n"
" [--task-prolog=fname] [--task-epilog=fname]\n"
" [--ctrl-comm-ifhn=addr] [--multi-prog]\n"
" [--switches=max-switches{@max-time-to-wait}]\n"
" [--core-spec=cores]\n"
" [-w hosts...] [-x hosts...] executable [args...]\n"
" [--acctg-freq=<datatype>=<interval>");
}
static void _help(void)
{
slurm_ctl_conf_t *conf;
printf (
"Usage: srun [OPTIONS...] executable [args...]\n"
"\n"
"Parallel run options:\n"
" -A, --account=name charge job to specified account\n"
" --begin=time defer job until HH:MM MM/DD/YY\n"
" --acctg-freq=<datatype>=<interval> accounting and profiling sampling\n" " intervals. Supported datatypes:\n"
" task=<interval> energy=<interval>\n"
" network=<interval> filesystem=<interval>\n"
" -c, --cpus-per-task=ncpus number of cpus required per task\n"
" --checkpoint=time job step checkpoint interval\n"
" --checkpoint-dir=dir directory to store job step checkpoint image \n"
" files\n"
" --comment=name arbitrary comment\n"
" -d, --dependency=type:jobid defer job until condition on jobid is satisfied\n"
" -D, --chdir=path change remote current working directory\n"
" -e, --error=err location of stderr redirection\n"
" --epilog=program run \"program\" after launching job step\n"
" -E, --preserve-env env vars for node and task counts override\n"
" command-line flags\n"
" --get-user-env used by Moab. See srun man page.\n"
" --gres=list required generic resources\n"
" -H, --hold submit job in held state\n"
" -i, --input=in location of stdin redirection\n"
" -I, --immediate[=secs] exit if resources not available in \"secs\"\n"
" --jobid=id run under already allocated job\n"
" -J, --job-name=jobname name of job\n"
" -k, --no-kill do not kill job on node failure\n"
" -K, --kill-on-bad-exit kill the job if any task terminates with a\n"
" non-zero exit code\n"
" -l, --label prepend task number to lines of stdout/err\n"
" -L, --licenses=names required license, comma separated\n"
" --launch-cmd print external launcher command line if not SLURM\n"
" --launcher-opts= options for the external launcher command if not\n"
" SLURM\n"
" -m, --distribution=type distribution method for processes to nodes\n"
" (type = block|cyclic|arbitrary)\n"
" --mail-type=type notify on state change: BEGIN, END, FAIL or ALL\n"
" --mail-user=user who to send email notification for job state\n"
" changes\n"
" --mpi=type type of MPI being used\n"
" --multi-prog if set the program name specified is the\n"
" configuration specification for multiple programs\n"
" -n, --ntasks=ntasks number of tasks to run\n"
" --nice[=value] decrease scheduling priority by value\n"
" --ntasks-per-node=n number of tasks to invoke on each node\n"
" -N, --nodes=N number of nodes on which to run (N = min[-max])\n"
" -o, --output=out location of stdout redirection\n"
" -O, --overcommit overcommit resources\n"
" -p, --partition=partition partition requested\n"
" --priority=value set the priority of the job to value\n"
" --prolog=program run \"program\" before launching job step\n"
" --profile=value enable acct_gather_profile for detailed data\n"
" value is all or none or any combination of\n"
" energy, lustre, network or task\n"
" --propagate[=rlimits] propagate all [or specific list of] rlimits\n"
#ifdef HAVE_PTY_H
" --pty run task zero in pseudo terminal\n"
#endif
" -q, --quit-on-interrupt quit on single Ctrl-C\n"
" --qos=qos quality of service\n"
" -Q, --quiet quiet mode (suppress informational messages)\n"
" -r, --relative=n run job step relative to node n of allocation\n"
" --restart-dir=dir directory of checkpoint image files to restart\n"
" from\n"
" -s, --share share nodes with other jobs\n"
" -S, --core-spec=cores count of reserved cores\n"
" --signal=[B:]num[@time] send signal when time limit within time seconds\n"
" --slurmd-debug=level slurmd debug level\n"
" --switches=max-switches{@max-time-to-wait}\n"
" Optimum switches and max time to wait for optimum\n"
" --task-epilog=program run \"program\" after launching task\n"
" --task-prolog=program run \"program\" before launching task\n"
" -T, --threads=threads set srun launch fanout\n"
" -t, --time=minutes time limit\n"
" --time-min=minutes minimum time limit (if distinct)\n"
" -u, --unbuffered do not line-buffer stdout/err\n"
" -v, --verbose verbose mode (multiple -v's increase verbosity)\n"
" -W, --wait=sec seconds to wait after first task exits\n"
" before killing job\n"
" -X, --disable-status Disable Ctrl-C status feature\n"
"\n"
"Constraint options:\n"
" --contiguous demand a contiguous range of nodes\n"
" -C, --constraint=list specify a list of constraints\n"
" --mem=MB minimum amount of real memory\n"
" --mincpus=n minimum number of logical processors (threads)\n"
" per node\n"
" --reservation=name allocate resources from named reservation\n"
" --tmp=MB minimum amount of temporary disk\n"
" -w, --nodelist=hosts... request a specific list of hosts\n"
" -x, --exclude=hosts... exclude a specific list of hosts\n"
" -Z, --no-allocate don't allocate nodes (must supply -w)\n"
"\n"
"Consumable resources related options:\n"
" --exclusive allocate nodes in exclusive mode when\n"
" cpu consumable resource is enabled\n"
" or don't share CPUs for job steps\n"
" --mem-per-cpu=MB maximum amount of real memory per allocated\n"
" cpu required by the job.\n"
" --mem >= --mem-per-cpu if --mem is specified.\n"
" --resv-ports reserve communication ports\n"
"\n"
"Affinity/Multi-core options: (when the task/affinity plugin is enabled)\n"
" -B, --extra-node-info=S[:C[:T]] Expands to:\n"
" --sockets-per-node=S number of sockets per node to allocate\n"
" --cores-per-socket=C number of cores per socket to allocate\n"
" --threads-per-core=T number of threads per core to allocate\n"
" each field can be 'min' or wildcard '*'\n"
" total cpus requested = (N x S x C x T)\n"
"\n"
" --ntasks-per-core=n number of tasks to invoke on each core\n"
" --ntasks-per-socket=n number of tasks to invoke on each socket\n");
conf = slurm_conf_lock();
if (conf->task_plugin != NULL
&& strcasecmp(conf->task_plugin, "task/affinity") == 0) {
printf(
" --cpu_bind= Bind tasks to CPUs\n"
" (see \"--cpu_bind=help\" for options)\n"
" --hint= Bind tasks according to application hints\n"
" (see \"--hint=help\" for options)\n"
" --mem_bind= Bind memory to locality domains (ldom)\n"
" (see \"--mem_bind=help\" for options)\n");
}
slurm_conf_unlock();
spank_print_options(stdout, 6, 30);
printf("\n"
#if defined HAVE_AIX || defined HAVE_LIBNRT /* IBM PE specific options */
"PE related options:\n"
" --network=type communication protocol to be used\n"
"\n"
#endif
#ifdef HAVE_NATIVE_CRAY /* Native Cray specific options */
"Cray related options:\n"
" --network=type Use network performace counters\n"
" (system, network, or processor)\n"
"\n"
#endif
#ifdef HAVE_BG /* Blue gene specific options */
"Blue Gene related options:\n"
" --conn-type=type constraint on type of connection, MESH or TORUS\n"
" if not set, then tries to fit TORUS else MESH\n"
#ifdef HAVE_BG_L_P
" -g, --geometry=XxYxZ geometry constraints of the job\n"
#else
" --export=NONE do not pass environment variables to launcher\n"
" -g, --geometry=AxXxYxZ Midplane geometry constraints of the job,\n"
" sub-block allocations can not be allocated\n"
" with the geometry option\n"
#endif
" -R, --no-rotate disable geometry rotation\n"
" --reboot reboot block before starting job\n"
#ifndef HAVE_BGL
" If wanting to run in HTC mode (only for 1\n"
" midplane and below). You can use HTC_S for\n"
" SMP, HTC_D for Dual, HTC_V for\n"
" virtual node mode, and HTC_L for Linux mode.\n"
" --cnload-image=path path to compute node image for bluegene block. Default if not set\n"
" --mloader-image=path path to mloader image for bluegene block. Default if not set\n"
" --ioload-image=path path to ioload image for bluegene block. Default if not set\n"
#else
" --blrts-image=path path to blrts image for bluegene block. Default if not set\n"
" --linux-image=path path to linux image for bluegene block. Default if not set\n"
" --mloader-image=path path to mloader image for bluegene block. Default if not set\n"
" --ramdisk-image=path path to ramdisk image for bluegene block. Default if not set\n"
#endif
#endif
"\n"
"Help options:\n"
" -h, --help show this help message\n"
" --usage display brief usage message\n"
"\n"
"Other options:\n"
" -V, --version output version information and exit\n"
"\n"
);
}