blob: de1b25d64ee66f54fa7f4838ce1edee2584b6a35 [file] [log] [blame]
/*****************************************************************************\
* opt.c - options processing for slaunch
*****************************************************************************
* Copyright (C) 2002-2006 The Regents of the University of California.
* Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
* Written by Mark Grondona <grondona1@llnl.gov>, et. al.
* UCRL-CODE-226842.
*
* This file is part of SLURM, a resource management program.
* For details, see <http://www.llnl.gov/linux/slurm/>.
*
* SLURM is free software; you can redistribute it and/or modify it under
* the terms of the GNU General Public License as published by the Free
* Software Foundation; either version 2 of the License, or (at your option)
* any later version.
*
* SLURM is distributed in the hope that it will be useful, but WITHOUT ANY
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
* details.
*
* You should have received a copy of the GNU General Public License along
* with SLURM; if not, write to the Free Software Foundation, Inc.,
* 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
\*****************************************************************************/
#if HAVE_CONFIG_H
# include "config.h"
#endif
#include <string.h> /* strcpy, strncasecmp */
#ifdef HAVE_STRINGS_H
# include <strings.h>
#endif
#ifndef _GNU_SOURCE
# define _GNU_SOURCE
#endif
#if HAVE_GETOPT_H
# include <getopt.h>
#else
# include "src/common/getopt.h"
#endif
#include <fcntl.h>
#include <stdarg.h> /* va_start */
#include <stdio.h>
#include <stdlib.h> /* getenv */
#include <pwd.h> /* getpwuid */
#include <ctype.h> /* isdigit */
#include <sys/param.h> /* MAXPATHLEN */
#include <sys/stat.h>
#include <unistd.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <sys/utsname.h>
#include <regex.h>
#include "src/slaunch/opt.h"
#include "src/common/list.h"
#include "src/common/log.h"
#include "src/common/parse_time.h"
#include "src/common/slurm_protocol_api.h"
#include "src/common/uid.h"
#include "src/common/xmalloc.h"
#include "src/common/xstring.h"
#include "src/common/slurm_rlimits_info.h"
#include "src/common/plugstack.h"
#include "src/common/optz.h"
#include "src/common/read_config.h" /* getnodename() */
#include "src/common/hostlist.h"
#include "src/common/mpi.h"
#include "src/api/pmi_server.h"
#include "src/slaunch/attach.h"
/* generic OPT_ definitions -- mainly for use with env vars */
#define OPT_NONE 0x00
#define OPT_INT 0x01
#define OPT_STRING 0x02
#define OPT_DEBUG 0x03
#define OPT_DISTRIB 0x04
#define OPT_BOOL 0x06
#define OPT_CORE 0x07
#define OPT_MPI 0x0c
#define OPT_CPU_BIND 0x0d
#define OPT_MEM_BIND 0x0e
#define OPT_MULTI 0x0f
/* generic getopt_long flags, integers and *not* valid characters */
#define LONG_OPT_USAGE 0x100
#define LONG_OPT_LAUNCH 0x103
#define LONG_OPT_JOBID 0x105
#define LONG_OPT_UID 0x106
#define LONG_OPT_GID 0x107
#define LONG_OPT_MPI 0x108
#define LONG_OPT_CORE 0x109
#define LONG_OPT_DEBUG_TS 0x10a
#define LONG_OPT_NETWORK 0x10b
#define LONG_OPT_PROPAGATE 0x10c
#define LONG_OPT_PROLOG 0x10d
#define LONG_OPT_EPILOG 0x10e
#define LONG_OPT_TASK_PROLOG 0x10f
#define LONG_OPT_TASK_EPILOG 0x110
#define LONG_OPT_CPU_BIND 0x111
#define LONG_OPT_MEM_BIND 0x112
#define LONG_OPT_COMM_HOSTNAME 0x113
#define LONG_OPT_MULTI 0x114
#define LONG_OPT_PMI_THREADS 0x115
#define LONG_OPT_LIN_FILTER 0x116
#define LONG_OPT_LOUT_FILTER 0x117
#define LONG_OPT_LERR_FILTER 0x118
#define LONG_OPT_RIN_FILTER 0x119
#define LONG_OPT_ROUT_FILTER 0x11a
#define LONG_OPT_RERR_FILTER 0x11b
/*---- forward declarations of static functions ----*/
typedef struct env_vars env_vars_t;
/* return command name from its full path name */
static char * _base_name(char* command);
static List _create_path_list(void);
/* Get a POSITIVE decimal integer from arg */
static int _get_pos_int(const char *arg, const char *what);
/* Get a decimal integer from arg */
static int _get_int(const char *arg, const char *what, bool positive);
static void _help(void);
/* load a multi-program configuration file */
static void _load_multi(int *argc, char **argv);
/* fill in default options */
static void _opt_default(void);
/* set options based upon env vars */
static void _opt_env(void);
static void _opt_args(int argc, char **argv);
/* list known options and their settings */
static void _opt_list(void);
/* verify options sanity */
static bool _opt_verify(void);
static void _print_version(void);
static void _process_env_var(env_vars_t *e, const char *val);
/* search PATH for command returns full path */
static char *_search_path(char *, int);
static void _usage(void);
static int _verify_cpu_bind(const char *arg, char **cpu_bind,
cpu_bind_type_t *cpu_bind_type);
static int _verify_mem_bind(const char *arg, char **mem_bind,
mem_bind_type_t *mem_bind_type);
static task_dist_states_t _verify_dist_type(const char *arg, uint32_t *psize);
/*---[ end forward declarations of static functions ]---------------------*/
int initialize_and_process_args(int argc, char *argv[])
{
/* initialize option defaults */
_opt_default();
/* initialize options with env vars */
_opt_env();
/* initialize options with argv */
_opt_args(argc, argv);
if (opt.verbose > 1)
_opt_list();
return 1;
}
static void _print_version(void)
{
printf("%s %s\n", PACKAGE, SLURM_VERSION);
}
/*
* _isvalue
* returns 1 is the argument appears to be a value, 0 otherwise
*/
static int _isvalue(char *arg) {
if (isdigit(*arg)) { /* decimal values and 0x... hex values */
return 1;
}
while (isxdigit(*arg)) { /* hex values not preceded by 0x */
arg++;
}
if (*arg == ',' || *arg == '\0') { /* end of field or string */
return 1;
}
return 0; /* not a value */
}
/*
* verify cpu_bind arguments
* returns -1 on error, 0 otherwise
*/
static int _verify_cpu_bind(const char *arg, char **cpu_bind,
cpu_bind_type_t *cpu_bind_type)
{
char *buf, *p, *tok;
if (!arg) {
return 0;
}
/* we support different launch policy names
* we also allow a verbose setting to be specified
* --cpu_bind=threads
* --cpu_bind=cores
* --cpu_bind=sockets
* --cpu_bind=v
* --cpu_bind=rank,v
* --cpu_bind=rank
* --cpu_bind={MAP_CPU|MASK_CPU}:0,1,2,3,4
*/
buf = xstrdup(arg);
p = buf;
/* change all ',' delimiters not followed by a digit to ';' */
/* simplifies parsing tokens while keeping map/mask together */
while (p[0] != '\0') {
if ((p[0] == ',') && (!_isvalue(&(p[1]))))
p[0] = ';';
p++;
}
p = buf;
while ((tok = strsep(&p, ";"))) {
if (strcasecmp(tok, "help") == 0) {
printf(
"CPU bind options:\n"
" --cpu_bind= Bind tasks to CPUs\n"
" q[uiet] quietly bind before task runs (default)\n"
" v[erbose] verbosely report binding before task runs\n"
" no[ne] don't bind tasks to CPUs (default)\n"
" rank bind by task rank\n"
" map_cpu:<list> specify a CPU ID binding for each task\n"
" where <list> is <cpuid1>,<cpuid2>,...<cpuidN>\n"
" mask_cpu:<list> specify a CPU ID binding mask for each task\n"
" where <list> is <mask1>,<mask2>,...<maskN>\n"
" sockets auto-generated masks bind to sockets\n"
" cores auto-generated masks bind to cores\n"
" threads auto-generated masks bind to threads\n"
" help show this help message\n");
return 1;
} else if ((strcasecmp(tok, "q") == 0) ||
(strcasecmp(tok, "quiet") == 0)) {
*cpu_bind_type &= ~CPU_BIND_VERBOSE;
} else if ((strcasecmp(tok, "v") == 0) ||
(strcasecmp(tok, "verbose") == 0)) {
*cpu_bind_type |= CPU_BIND_VERBOSE;
} else if ((strcasecmp(tok, "no") == 0) ||
(strcasecmp(tok, "none") == 0)) {
*cpu_bind_type |= CPU_BIND_NONE;
*cpu_bind_type &= ~CPU_BIND_RANK;
*cpu_bind_type &= ~CPU_BIND_MAP;
*cpu_bind_type &= ~CPU_BIND_MASK;
xfree(*cpu_bind);
} else if (strcasecmp(tok, "rank") == 0) {
*cpu_bind_type &= ~CPU_BIND_NONE;
*cpu_bind_type |= CPU_BIND_RANK;
*cpu_bind_type &= ~CPU_BIND_MAP;
*cpu_bind_type &= ~CPU_BIND_MASK;
xfree(*cpu_bind);
} else if ((strncasecmp(tok, "map_cpu", 7) == 0) ||
(strncasecmp(tok, "mapcpu", 6) == 0)) {
char *list;
list = strsep(&tok, ":=");
list = strsep(&tok, ":=");
*cpu_bind_type &= ~CPU_BIND_NONE;
*cpu_bind_type &= ~CPU_BIND_RANK;
*cpu_bind_type |= CPU_BIND_MAP;
*cpu_bind_type &= ~CPU_BIND_MASK;
xfree(*cpu_bind);
if (list && *list) {
*cpu_bind = xstrdup(list);
} else {
error("missing list for \"--cpu_bind=map_cpu:<list>\"");
xfree(buf);
return 1;
}
} else if ((strncasecmp(tok, "mask_cpu", 8) == 0) ||
(strncasecmp(tok, "maskcpu", 7) == 0)) {
char *list;
list = strsep(&tok, ":=");
list = strsep(&tok, ":=");
*cpu_bind_type &= ~CPU_BIND_NONE;
*cpu_bind_type &= ~CPU_BIND_RANK;
*cpu_bind_type &= ~CPU_BIND_MAP;
*cpu_bind_type |= CPU_BIND_MASK;
xfree(*cpu_bind);
if (list && *list) {
*cpu_bind = xstrdup(list);
} else {
error("missing list for \"--cpu_bind=mask_cpu:<list>\"");
xfree(buf);
return 1;
}
} else if ((strcasecmp(tok, "socket") == 0) ||
(strcasecmp(tok, "sockets") == 0)) {
*cpu_bind_type |= CPU_BIND_TO_SOCKETS;
*cpu_bind_type &= ~CPU_BIND_TO_CORES;
*cpu_bind_type &= ~CPU_BIND_TO_THREADS;
} else if ((strcasecmp(tok, "core") == 0) ||
(strcasecmp(tok, "cores") == 0)) {
*cpu_bind_type &= ~CPU_BIND_TO_SOCKETS;
*cpu_bind_type |= CPU_BIND_TO_CORES;
*cpu_bind_type &= ~CPU_BIND_TO_THREADS;
} else if ((strcasecmp(tok, "thread") == 0) ||
(strcasecmp(tok, "threads") == 0)) {
*cpu_bind_type &= ~CPU_BIND_TO_SOCKETS;
*cpu_bind_type &= ~CPU_BIND_TO_CORES;
*cpu_bind_type |= CPU_BIND_TO_THREADS;
} else {
error("unrecognized --cpu_bind argument \"%s\"", tok);
xfree(buf);
return 1;
}
}
xfree(buf);
return 0;
}
/*
* verify mem_bind arguments
* returns -1 on error, 0 otherwise
*/
static int _verify_mem_bind(const char *arg, char **mem_bind,
mem_bind_type_t *mem_bind_type)
{
char *buf, *p, *tok;
if (!arg) {
return 0;
}
/* we support different memory binding names
* we also allow a verbose setting to be specified
* --mem_bind=v
* --mem_bind=rank,v
* --mem_bind=rank
* --mem_bind={MAP_MEM|MASK_MEM}:0,1,2,3,4
*/
buf = xstrdup(arg);
p = buf;
/* change all ',' delimiters not followed by a digit to ';' */
/* simplifies parsing tokens while keeping map/mask together */
while (p[0] != '\0') {
if ((p[0] == ',') && (!_isvalue(&(p[1]))))
p[0] = ';';
p++;
}
p = buf;
while ((tok = strsep(&p, ";"))) {
if (strcasecmp(tok, "help") == 0) {
printf(
"Memory bind options:\n"
" --mem_bind= Bind memory to locality domains (ldom)\n"
" q[uiet] quietly bind before task runs (default)\n"
" v[erbose] verbosely report binding before task runs\n"
" no[ne] don't bind tasks to memory (default)\n"
" rank bind by task rank\n"
" local bind to memory local to processor\n"
" map_mem:<list> specify a memory binding for each task\n"
" where <list> is <cpuid1>,<cpuid2>,...<cpuidN>\n"
" mask_mem:<list> specify a memory binding mask for each tasks\n"
" where <list> is <mask1>,<mask2>,...<maskN>\n"
" help show this help message\n");
return 1;
} else if ((strcasecmp(tok, "q") == 0) ||
(strcasecmp(tok, "quiet") == 0)) {
*mem_bind_type &= ~MEM_BIND_VERBOSE;
} else if ((strcasecmp(tok, "v") == 0) ||
(strcasecmp(tok, "verbose") == 0)) {
*mem_bind_type |= MEM_BIND_VERBOSE;
} else if ((strcasecmp(tok, "no") == 0) ||
(strcasecmp(tok, "none") == 0)) {
*mem_bind_type |= MEM_BIND_NONE;
*mem_bind_type &= ~MEM_BIND_RANK;
*mem_bind_type &= ~MEM_BIND_LOCAL;
*mem_bind_type &= ~MEM_BIND_MAP;
*mem_bind_type &= ~MEM_BIND_MASK;
xfree(*mem_bind);
} else if (strcasecmp(tok, "rank") == 0) {
*mem_bind_type &= ~MEM_BIND_NONE;
*mem_bind_type |= MEM_BIND_RANK;
*mem_bind_type &= ~MEM_BIND_LOCAL;
*mem_bind_type &= ~MEM_BIND_MAP;
*mem_bind_type &= ~MEM_BIND_MASK;
xfree(*mem_bind);
} else if (strcasecmp(tok, "local") == 0) {
*mem_bind_type &= ~MEM_BIND_NONE;
*mem_bind_type &= ~MEM_BIND_RANK;
*mem_bind_type |= MEM_BIND_LOCAL;
*mem_bind_type &= ~MEM_BIND_MAP;
*mem_bind_type &= ~MEM_BIND_MASK;
xfree(*mem_bind);
} else if ((strncasecmp(tok, "map_mem", 7) == 0) ||
(strncasecmp(tok, "mapmem", 6) == 0)) {
char *list;
list = strsep(&tok, ":=");
list = strsep(&tok, ":=");
*mem_bind_type &= ~MEM_BIND_NONE;
*mem_bind_type &= ~MEM_BIND_RANK;
*mem_bind_type &= ~MEM_BIND_LOCAL;
*mem_bind_type |= MEM_BIND_MAP;
*mem_bind_type &= ~MEM_BIND_MASK;
xfree(*mem_bind);
if (list && *list) {
*mem_bind = xstrdup(list);
} else {
error("missing list for \"--mem_bind=map_mem:<list>\"");
xfree(buf);
return 1;
}
} else if ((strncasecmp(tok, "mask_mem", 8) == 0) ||
(strncasecmp(tok, "maskmem", 7) == 0)) {
char *list;
list = strsep(&tok, ":=");
list = strsep(&tok, ":=");
*mem_bind_type &= ~MEM_BIND_NONE;
*mem_bind_type &= ~MEM_BIND_RANK;
*mem_bind_type &= ~MEM_BIND_LOCAL;
*mem_bind_type &= ~MEM_BIND_MAP;
*mem_bind_type |= MEM_BIND_MASK;
xfree(*mem_bind);
if (list && *list) {
*mem_bind = xstrdup(list);
} else {
error("missing list for \"--mem_bind=mask_mem:<list>\"");
xfree(buf);
return 1;
}
} else {
error("unrecognized --mem_bind argument \"%s\"", tok);
xfree(buf);
return 1;
}
}
xfree(buf);
return 0;
}
/*
* verify that a distribution type in arg is of a known form
* returns the task_dist_states, or -1 if state is unknown
*/
static task_dist_states_t _verify_dist_type(const char *arg,
uint32_t *plane_size)
{
int len = strlen(arg);
char *dist_str = NULL;
task_dist_states_t result = SLURM_DIST_UNKNOWN;
bool lllp_dist = false, plane_dist = false;
dist_str = strchr(arg,':');
if (dist_str != NULL) {
/* -m cyclic|block:cyclic|block */
lllp_dist = true;
} else {
/* -m plane=<plane_size> */
dist_str = strchr(arg,'=');
if(dist_str != NULL) {
*plane_size=atoi(dist_str+1);
len = dist_str-arg;
plane_dist = true;
}
}
if (lllp_dist) {
if (strcasecmp(arg, "cyclic:cyclic") == 0) {
result = SLURM_DIST_CYCLIC_CYCLIC;
} else if (strcasecmp(arg, "cyclic:block") == 0) {
result = SLURM_DIST_CYCLIC_BLOCK;
} else if (strcasecmp(arg, "block:block") == 0) {
result = SLURM_DIST_BLOCK_BLOCK;
} else if (strcasecmp(arg, "block:cyclic") == 0) {
result = SLURM_DIST_BLOCK_CYCLIC;
}
} else if (plane_dist) {
if (strncasecmp(arg, "plane", len) == 0) {
result = SLURM_DIST_PLANE;
}
} else {
if (strncasecmp(arg, "cyclic", len) == 0) {
result = SLURM_DIST_CYCLIC;
} else if (strncasecmp(arg, "block", len) == 0) {
result = SLURM_DIST_BLOCK;
} else if ((strncasecmp(arg, "arbitrary", len) == 0) ||
(strncasecmp(arg, "hostfile", len) == 0)) {
result = SLURM_DIST_ARBITRARY;
}
}
return result;
}
/*
* Parse the next greatest of:
* CPUS(REPS),
* or
* CPUS(REPS)
* or
* CPUS,
* or
* CPUS
* moving "ptr" past the parsed cpu/reps pair
*
* Return 1 after succesfully parsing a new number or pair, and 0 otherwise.
*/
static int _parse_cpu_rep_pair(char **ptr, uint32_t *cpu, uint32_t *rep)
{
char *endptr;
*rep = 1;
*cpu = strtol(*ptr, &endptr, 10);
if (*cpu == 0 && endptr == *ptr) {
/* no more numbers */
return 0;
}
if (endptr[0] == (char)',') {
*ptr = endptr+1;
return 1;
} else if (endptr[0] == (char)'('
&& endptr[1] == (char)'x') {
*ptr = endptr+2;
*rep = strtol(*ptr, &endptr, 10);
if (*rep == 0 && endptr == *ptr) {
error("was expecting a number at \"%s\"", *ptr);
return 0;
}
if (endptr[0] != (char)')') {
error("was expecting a closing parenthasis at \"%s\"",
endptr);
return 0;
}
endptr = endptr+1;
/* finally, swallow the next comma, if there is one */
if (endptr[0] == (char)',') {
*ptr = endptr + 1;
} else {
*ptr = endptr;
}
return 1;
} else {
*ptr = endptr;
return 1;
}
}
/* Take a string representing cpus-per-node in compressed representation,
* and set variables in "alloc_info" pertaining to cpus-per-node.
*/
static int _set_cpus_per_node(const char *str,
resource_allocation_response_msg_t *alloc_info)
{
char *ptr = (char *)str;
uint16_t num_cpus_groups = 0;
uint32_t *cpus = NULL;
uint32_t *cpus_reps = NULL;
uint32_t cpu, rep;
while (_parse_cpu_rep_pair(&ptr, &cpu, &rep)) {
num_cpus_groups++;
xrealloc(cpus, sizeof(uint32_t)*num_cpus_groups);
xrealloc(cpus_reps, sizeof(uint32_t)*num_cpus_groups);
cpus[num_cpus_groups-1] = cpu;
cpus_reps[num_cpus_groups-1] = rep;
}
if (num_cpus_groups == 0)
return 0;
alloc_info->num_cpu_groups = num_cpus_groups;
alloc_info->cpus_per_node = cpus;
alloc_info->cpu_count_reps = cpus_reps;
return 1;
}
/* return command name from its full path name */
static char * _base_name(char* command)
{
char *char_ptr, *name;
int i;
if (command == NULL)
return NULL;
char_ptr = strrchr(command, (int)'/');
if (char_ptr == NULL)
char_ptr = command;
else
char_ptr++;
i = strlen(char_ptr);
name = xmalloc(i+1);
strcpy(name, char_ptr);
return name;
}
/*
* print error message to stderr with opt.progname prepended
*/
#undef USE_ARGERROR
#if USE_ARGERROR
static void argerror(const char *msg, ...)
{
va_list ap;
char buf[256];
va_start(ap, msg);
vsnprintf(buf, sizeof(buf), msg, ap);
fprintf(stderr, "%s: %s\n",
opt.progname ? opt.progname : "slaunch", buf);
va_end(ap);
}
#else
# define argerror error
#endif /* USE_ARGERROR */
/*
* _opt_default(): used by initialize_and_process_args to set defaults
*/
static void _opt_default()
{
char buf[MAXPATHLEN + 1];
struct passwd *pw;
if ((pw = getpwuid(getuid())) != NULL) {
strncpy(opt.user, pw->pw_name, MAX_USERNAME);
opt.uid = pw->pw_uid;
} else
error("who are you?");
opt.gid = getgid();
if ((getcwd(buf, MAXPATHLEN)) == NULL)
fatal("getcwd failed: %m");
opt.cwd = xstrdup(buf);
opt.progname = NULL;
opt.num_tasks = 1;
opt.num_tasks_set = false;
opt.cpus_per_task = 1;
opt.cpus_per_task_set = false;
opt.num_nodes = 1;
opt.num_nodes_set = false;
opt.cpu_bind_type = 0;
opt.cpu_bind = NULL;
opt.mem_bind_type = 0;
opt.mem_bind = NULL;
opt.relative = (uint16_t)NO_VAL;
opt.relative_set = false;
opt.job_name = NULL;
opt.jobid = NO_VAL;
opt.jobid_set = false;
opt.distribution = SLURM_DIST_UNKNOWN;
opt.plane_size = NO_VAL;
opt.local_ofname = NULL;
opt.local_ifname = NULL;
opt.local_efname = NULL;
opt.remote_ofname = NULL;
opt.remote_ifname = NULL;
opt.remote_efname = NULL;
opt.local_input_filter = (uint32_t)-1;
opt.local_input_filter_set = false;
opt.local_output_filter = (uint32_t)-1;
opt.local_output_filter_set = false;
opt.local_error_filter = (uint32_t)-1;
opt.local_error_filter_set = false;
opt.remote_input_filter = (uint32_t)-1;
opt.remote_output_filter = (uint32_t)-1;
opt.remote_error_filter = (uint32_t)-1;
opt.core_type = CORE_DEFAULT;
opt.labelio = false;
opt.unbuffered = false;
opt.overcommit = false;
opt.no_kill = false;
opt.kill_bad_exit = false;
opt.max_wait = slurm_get_wait_time();
opt.quiet = 0;
opt.verbose = 0;
opt.slurmd_debug = LOG_LEVEL_QUIET;
opt.nodelist = NULL;
opt.nodelist_byid = NULL;
opt.task_layout = NULL;
opt.task_layout_file_set = false;
opt.euid = (uid_t) -1;
opt.egid = (gid_t) -1;
opt.propagate = NULL; /* propagate specific rlimits */
opt.prolog = slurm_get_srun_prolog();
opt.epilog = slurm_get_srun_epilog();
opt.task_prolog = NULL;
opt.task_epilog = NULL;
opt.comm_hostname = xshort_hostname();
}
/*---[ env var processing ]-----------------------------------------------*/
/*
* try to use a similar scheme as popt.
*
* in order to add a new env var (to be processed like an option):
*
* define a new entry into env_vars[], if the option is a simple int
* or string you may be able to get away with adding a pointer to the
* option to set. Otherwise, process var based on "type" in _opt_env.
*/
struct env_vars {
const char *var;
int type;
void *arg;
void *set_flag;
};
env_vars_t env_vars[] = {
/* SLURM_JOBID is handled like SLAUNCH_JOBID as backwards compatibility
with LCRM. If we get LCRM to call a slurm API function which
tells LCRM which variables to set for a particular jobid number,
then there would be no need for LCRM's static SLURM_JOBID code or
the handling of SLURM_JOBID below.*/
{"SLURM_JOBID", OPT_INT, &opt.jobid, &opt.jobid_set },
{"SLAUNCH_JOBID", OPT_INT, &opt.jobid, &opt.jobid_set },
{"SLURMD_DEBUG", OPT_INT, &opt.slurmd_debug, NULL },
{"SLAUNCH_CORE_FORMAT", OPT_CORE, NULL, NULL },
{"SLAUNCH_CPU_BIND", OPT_CPU_BIND, NULL, NULL },
{"SLAUNCH_MEM_BIND", OPT_MEM_BIND, NULL, NULL },
{"SLAUNCH_DEBUG", OPT_DEBUG, NULL, NULL },
{"SLAUNCH_DISTRIBUTION", OPT_DISTRIB, NULL, NULL },
{"SLAUNCH_KILL_BAD_EXIT",OPT_BOOL, &opt.kill_bad_exit, NULL },
{"SLAUNCH_LABELIO", OPT_BOOL, &opt.labelio, NULL },
{"SLAUNCH_OVERCOMMIT", OPT_BOOL, &opt.overcommit, NULL },
{"SLAUNCH_WAIT", OPT_INT, &opt.max_wait, NULL },
{"SLAUNCH_MPI_TYPE", OPT_MPI, NULL, NULL },
{"SLAUNCH_COMM_HOSTNAME",OPT_STRING, &opt.comm_hostname, NULL },
{"SLAUNCH_PROLOG", OPT_STRING, &opt.prolog, NULL },
{"SLAUNCH_EPILOG", OPT_STRING, &opt.epilog, NULL },
{"SLAUNCH_TASK_PROLOG", OPT_STRING, &opt.task_prolog, NULL },
{"SLAUNCH_TASK_EPILOG", OPT_STRING, &opt.task_epilog, NULL },
{NULL, 0, NULL, NULL}
};
/*
* _opt_env(): used by initialize_and_process_args to set options via
* environment variables. See comments above for how to
* extend slaunch to process different vars
*/
static void _opt_env()
{
char *val = NULL;
env_vars_t *e = env_vars;
while (e->var) {
if ((val = getenv(e->var)) != NULL)
_process_env_var(e, val);
e++;
}
}
static void
_process_env_var(env_vars_t *e, const char *val)
{
char *end = NULL;
debug2("now processing env var %s=%s", e->var, val);
if (e->set_flag) {
*((bool *) e->set_flag) = true;
}
switch (e->type) {
case OPT_STRING:
*((char **) e->arg) = xstrdup(val);
break;
case OPT_INT:
if (val != NULL) {
*((int *) e->arg) = (int) strtol(val, &end, 10);
if (!(end && *end == '\0'))
error("%s=%s invalid. ignoring...", e->var, val);
}
break;
case OPT_BOOL:
/* A boolean env variable is true if:
* - set, but no argument
* - argument is "yes"
* - argument is a non-zero number
*/
if (val == NULL || strcmp(val, "") == 0) {
*((bool *)e->arg) = true;
} else if (strcasecmp(val, "yes") == 0) {
*((bool *)e->arg) = true;
} else if ((strtol(val, &end, 10) != 0)
&& end != val) {
*((bool *)e->arg) = true;
} else {
*((bool *)e->arg) = false;
}
break;
case OPT_DEBUG:
if (val != NULL) {
opt.verbose = (int) strtol(val, &end, 10);
if (!(end && *end == '\0'))
error("%s=%s invalid", e->var, val);
}
break;
case OPT_DISTRIB:
opt.plane_size = 0;
opt.distribution = _verify_dist_type(val, &opt.plane_size);
if (opt.distribution == SLURM_DIST_UNKNOWN) {
error("\"%s=%s\" -- invalid distribution type. ",
e->var, val);
exit(1);
}
break;
case OPT_CPU_BIND:
if (_verify_cpu_bind(val, &opt.cpu_bind,
&opt.cpu_bind_type))
exit(1);
break;
case OPT_MEM_BIND:
if (_verify_mem_bind(val, &opt.mem_bind,
&opt.mem_bind_type))
exit(1);
break;
case OPT_CORE:
opt.core_type = core_format_type (val);
break;
case OPT_MPI:
if (mpi_hook_client_init((char *)val) == SLURM_ERROR) {
fatal("\"%s=%s\" -- invalid MPI type, "
"--mpi=list for acceptable types.",
e->var, val);
}
break;
default:
/* do nothing */
break;
}
}
/*
* Get a POSITIVE decimal integer from arg.
*
* Returns the integer on success, exits program on failure.
*
*/
static int
_get_pos_int(const char *arg, const char *what)
{
char *p;
long int result = strtol(arg, &p, 10);
if (p == arg || !xstring_is_whitespace(p) || (result < 0L)) {
error ("Invalid numeric value \"%s\" for %s.", arg, what);
exit(1);
}
if (result > INT_MAX) {
error ("Numeric argument %ld to big for %s.", result, what);
exit(1);
}
return (int) result;
}
/*
* Get a decimal integer from arg.
*
* Returns the integer on success, exits program on failure.
*
*/
static int
_get_int(const char *arg, const char *what, bool positive)
{
char *p;
long int result = strtol(arg, &p, 10);
if ((p == arg) || (!xstring_is_whitespace(p))
|| (positive && (result <= 0L))) {
error ("Invalid numeric value \"%s\" for %s.", arg, what);
exit(1);
}
if (result > INT_MAX) {
error ("Numeric argument %ld to big for %s.", result, what);
} else if (result < INT_MIN) {
error ("Numeric argument %ld to small for %s.", result, what);
}
return (int) result;
}
void set_options(const int argc, char **argv)
{
int opt_char, option_index = 0;
char *tmp;
static struct option long_options[] = {
{"cpus-per-task", required_argument, 0, 'c'},
{"overcommit", no_argument, 0, 'C'},
{"slurmd-debug", required_argument, 0, 'd'},
{"workdir", required_argument, 0, 'D'},
{"slaunch-error", required_argument, 0, 'e'},
{"task-error", required_argument, 0, 'E'},
{"task-layout-file",required_argument,0,'F'},
{"help", no_argument, 0, 'h'},
{"slaunch-input", required_argument, 0, 'i'},
{"task-input", required_argument, 0, 'I'},
{"name", required_argument, 0, 'J'},
{"kill-on-bad-exit", no_argument, 0, 'K'},
{"label", no_argument, 0, 'l'},
{"nodelist-byid", required_argument, 0, 'L'},
{"distribution", required_argument, 0, 'm'},
{"tasks", required_argument, 0, 'n'},
{"nodes", required_argument, 0, 'N'},
{"slaunch-output",required_argument, 0, 'o'},
{"task-output", required_argument, 0, 'O'},
{"quiet", no_argument, 0, 'q'},
{"relative", required_argument, 0, 'r'},
{"unbuffered", no_argument, 0, 'u'},
{"task-layout-byid", required_argument, 0, 'T'},
{"verbose", no_argument, 0, 'v'},
{"version", no_argument, 0, 'V'},
{"nodelist-byname", required_argument, 0, 'w'},
{"wait", required_argument, 0, 'W'},
{"task-layout-byname", required_argument, 0, 'Y'},
{"cpu_bind", required_argument, 0, LONG_OPT_CPU_BIND},
{"mem_bind", required_argument, 0, LONG_OPT_MEM_BIND},
{"core", required_argument, 0, LONG_OPT_CORE},
{"mpi", required_argument, 0, LONG_OPT_MPI},
{"jobid", required_argument, 0, LONG_OPT_JOBID},
{"uid", required_argument, 0, LONG_OPT_UID},
{"gid", required_argument, 0, LONG_OPT_GID},
/* debugger-test intentionally undocumented in the man page */
{"debugger-test", no_argument, 0, LONG_OPT_DEBUG_TS},
{"usage", no_argument, 0, LONG_OPT_USAGE},
{"network", required_argument, 0, LONG_OPT_NETWORK},
{"propagate", optional_argument, 0, LONG_OPT_PROPAGATE},
{"prolog", required_argument, 0, LONG_OPT_PROLOG},
{"epilog", required_argument, 0, LONG_OPT_EPILOG},
{"task-prolog", required_argument, 0, LONG_OPT_TASK_PROLOG},
{"task-epilog", required_argument, 0, LONG_OPT_TASK_EPILOG},
{"ctrl-comm-ifhn", required_argument, 0, LONG_OPT_COMM_HOSTNAME},
{"multi-prog", no_argument, 0, LONG_OPT_MULTI},
/* pmi-threads intentionally undocumented in the man page */
{"pmi-threads", required_argument, 0, LONG_OPT_PMI_THREADS},
{"slaunch-input-filter",required_argument,0, LONG_OPT_LIN_FILTER},
{"slaunch-output-filter",required_argument,0,LONG_OPT_LOUT_FILTER},
{"slaunch-error-filter",required_argument,0, LONG_OPT_LERR_FILTER},
/* task-*-filter are not yet functional, and intentionally
undocumented in the man page */
{"task-input-filter", required_argument, 0, LONG_OPT_RIN_FILTER},
{"task-output-filter",required_argument, 0, LONG_OPT_ROUT_FILTER},
{"task-error-filter", required_argument, 0, LONG_OPT_RERR_FILTER},
{NULL, 0, 0, 0}
};
char *opt_string =
"+c:Cd:D:e:E:F:hi:I:J:KlL:m:n:N:o:O:qr:T:uvVw:W:Y:";
struct option *optz = spank_option_table_create (long_options);
if (!optz) {
error ("Unable to create option table");
exit (1);
}
opt.progname = xbasename(argv[0]);
optind = 0;
while((opt_char = getopt_long(argc, argv, opt_string,
optz, &option_index)) != -1) {
switch (opt_char) {
case '?':
fprintf(stderr, "Try \"slaunch --help\" for more "
"information\n");
exit(1);
break;
case 'c':
opt.cpus_per_task_set = true;
opt.cpus_per_task =
_get_pos_int(optarg, "cpus-per-task");
break;
case 'C':
opt.overcommit = true;
break;
case 'd':
opt.slurmd_debug =
_get_pos_int(optarg, "slurmd-debug");
break;
case 'D':
xfree(opt.cwd);
opt.cwd = xstrdup(optarg);
break;
case 'e':
xfree(opt.local_efname);
if (strncasecmp(optarg, "none", (size_t) 4) == 0)
opt.local_efname = xstrdup("/dev/null");
else
opt.local_efname = xstrdup(optarg);
break;
case 'F':
xfree(opt.task_layout);
tmp = slurm_read_hostfile(optarg, 0);
if (tmp != NULL) {
opt.task_layout = xstrdup(tmp);
free(tmp);
opt.task_layout_file_set = true;
} else {
error("\"%s\" is not a valid task layout file");
exit(1);
}
break;
case 'E':
xfree(opt.remote_efname);
if (strncasecmp(optarg, "none", (size_t) 4) == 0)
opt.remote_efname = xstrdup("/dev/null");
else
opt.remote_efname = xstrdup(optarg);
break;
case 'h':
_help();
exit(0);
case 'i':
xfree(opt.local_ifname);
opt.local_ifname = xstrdup(optarg);
break;
case 'I':
xfree(opt.remote_ifname);
opt.remote_ifname = xstrdup(optarg);
break;
case 'J':
xfree(opt.job_name);
opt.job_name = xstrdup(optarg);
break;
case 'K':
opt.kill_bad_exit = true;
break;
case 'l':
opt.labelio = true;
break;
case 'L':
xfree(opt.nodelist_byid);
opt.nodelist_byid = xstrdup(optarg);
break;
case 'm':
opt.plane_size = 0;
opt.distribution = _verify_dist_type(optarg,
&opt.plane_size);
if (opt.distribution == SLURM_DIST_UNKNOWN) {
error("distribution type `%s' "
"is not recognized", optarg);
exit(1);
}
break;
case 'n':
opt.num_tasks_set = true;
opt.num_tasks = _get_pos_int(optarg, "number of tasks");
break;
case 'N':
opt.num_nodes_set = true;
opt.num_nodes = _get_pos_int(optarg, "number of nodes");
break;
case 'o':
xfree(opt.local_ofname);
if (strncasecmp(optarg, "none", (size_t) 4) == 0)
opt.local_ofname = xstrdup("/dev/null");
else
opt.local_ofname = xstrdup(optarg);
break;
case 'O':
xfree(opt.remote_ofname);
if (strncasecmp(optarg, "none", (size_t) 4) == 0)
opt.remote_ofname = xstrdup("/dev/null");
else
opt.remote_ofname = xstrdup(optarg);
break;
case 'q':
opt.quiet++;
break;
case 'r':
opt.relative_set = true;
opt.relative = _get_int(optarg, "relative start node",
false);
break;
case 'T':
xfree(opt.task_layout);
opt.task_layout_byid = xstrdup(optarg);
opt.task_layout_byid_set = true;
break;
case 'u':
opt.unbuffered = true;
break;
case 'v':
opt.verbose++;
break;
case 'V':
_print_version();
exit(0);
break;
case 'w':
xfree(opt.nodelist);
opt.nodelist = xstrdup(optarg);
#ifdef HAVE_BG
info("\tThe nodelist option should only be used if\n"
"\tthe block you are asking for can be created.\n"
"\tIt should also include all the midplanes you\n"
"\twant to use, partial lists may not\n"
"\twork correctly.\n"
"\tPlease consult smap before using this option\n"
"\tor your job may be stuck with no way to run.");
#endif
break;
case 'W':
opt.max_wait = _get_pos_int(optarg, "wait");
break;
case 'Y':
xfree(opt.task_layout);
opt.task_layout = xstrdup(optarg);
opt.task_layout_byname_set = true;
break;
case LONG_OPT_CPU_BIND:
if (_verify_cpu_bind(optarg, &opt.cpu_bind,
&opt.cpu_bind_type))
exit(1);
break;
case LONG_OPT_MEM_BIND:
if (_verify_mem_bind(optarg, &opt.mem_bind,
&opt.mem_bind_type))
exit(1);
break;
case LONG_OPT_CORE:
opt.core_type = core_format_type (optarg);
if (opt.core_type == CORE_INVALID)
error ("--core=\"%s\" Invalid -- ignoring.\n",
optarg);
break;
case LONG_OPT_MPI:
if (mpi_hook_client_init((char *)optarg)
== SLURM_ERROR) {
fatal("\"--mpi=%s\" -- long invalid MPI type, "
"--mpi=list for acceptable types.",
optarg);
}
break;
case LONG_OPT_JOBID:
opt.jobid = _get_pos_int(optarg, "jobid");
opt.jobid_set = true;
break;
case LONG_OPT_UID:
opt.euid = uid_from_string (optarg);
if (opt.euid == (uid_t) -1)
fatal ("--uid=\"%s\" invalid", optarg);
break;
case LONG_OPT_GID:
opt.egid = gid_from_string (optarg);
if (opt.egid == (gid_t) -1)
fatal ("--gid=\"%s\" invalid", optarg);
break;
case LONG_OPT_DEBUG_TS:
/* simulate running under a parallel debugger */
opt.debugger_test = true;
MPIR_being_debugged = 1;
break;
case LONG_OPT_USAGE:
_usage();
exit(0);
case LONG_OPT_NETWORK:
xfree(opt.network);
opt.network = xstrdup(optarg);
#ifdef HAVE_AIX
setenv("SLURM_NETWORK", opt.network, 1);
#endif
break;
case LONG_OPT_PROPAGATE:
xfree(opt.propagate);
if (optarg) opt.propagate = xstrdup(optarg);
else opt.propagate = xstrdup("ALL");
break;
case LONG_OPT_PROLOG:
xfree(opt.prolog);
opt.prolog = xstrdup(optarg);
break;
case LONG_OPT_EPILOG:
xfree(opt.epilog);
opt.epilog = xstrdup(optarg);
break;
case LONG_OPT_TASK_PROLOG:
xfree(opt.task_prolog);
opt.task_prolog = xstrdup(optarg);
break;
case LONG_OPT_TASK_EPILOG:
xfree(opt.task_epilog);
opt.task_epilog = xstrdup(optarg);
break;
case LONG_OPT_COMM_HOSTNAME:
xfree(opt.comm_hostname);
opt.comm_hostname = xstrdup(optarg);
break;
case LONG_OPT_MULTI:
opt.multi_prog = true;
break;
case LONG_OPT_PMI_THREADS: /* undocumented option */
pmi_server_max_threads(_get_pos_int(optarg,
"pmi-threads"));
break;
case LONG_OPT_LIN_FILTER:
if (strcmp(optarg, "-") != 0) {
opt.local_input_filter =
_get_pos_int(optarg,
"slaunch-input-filter");
}
opt.local_input_filter_set = true;
break;
case LONG_OPT_LOUT_FILTER:
if (strcmp(optarg, "-") != 0) {
opt.local_output_filter =
_get_pos_int(optarg,
"slaunch-output-filter");
}
opt.local_output_filter_set = true;
break;
case LONG_OPT_LERR_FILTER:
if (strcmp(optarg, "-") != 0) {
opt.local_error_filter =
_get_pos_int(optarg,
"slaunch-error-filter");
}
opt.local_error_filter_set = true;
break;
case LONG_OPT_RIN_FILTER:
opt.remote_input_filter =
_get_pos_int(optarg, "task-input-filter");
error("task-input-filter not yet implemented");
break;
case LONG_OPT_ROUT_FILTER:
opt.remote_output_filter =
_get_pos_int(optarg, "task-output-filter");
error("task-output-filter not yet implemented");
break;
case LONG_OPT_RERR_FILTER:
opt.remote_error_filter =
_get_pos_int(optarg, "task-error-filter");
error("task-error-filter not yet implemented");
break;
default:
if (spank_process_option (opt_char, optarg) < 0) {
exit (1);
}
}
}
spank_option_table_destroy (optz);
}
/*
* Use the supplied compiled regular expression "re" to convert a string
* into first and last numbers in the range.
*
* If there is only a single number in the "token" string, both
* "first" and "last" will be set to the same value.
*
* Returns 1 on success, 0 on failure
*/
static int _get_range(regex_t *re, char *token, int *first, int *last,
int num_nodes)
{
size_t nmatch = 8;
regmatch_t pmatch[8];
long f, l;
bool first_set = false;
char *ptr;
*first = *last = 0;
memset(pmatch, 0, sizeof(regmatch_t)*nmatch);
if (regexec(re, token, nmatch, pmatch, 0) == REG_NOMATCH) {
error("\"%s\" is not a valid node index range", token);
return 0;
}
/* convert the second, possibly only, number */
ptr = (char *)(xstrndup(token + pmatch[3].rm_so,
pmatch[3].rm_eo - pmatch[3].rm_so));
l = strtol(ptr, NULL, 10);
xfree(ptr);
if ((l >= 0 && l >= num_nodes)
|| (l < 0 && l < -num_nodes)) {
error("\"%ld\" is beyond the range of the"
" %d available nodes", l, num_nodes);
return 0;
}
*last = (int)l;
*first = (int)l;
/* convert the first number, if it exists */
if (pmatch[2].rm_so != -1) {
first_set = true;
ptr = (char *)(xstrndup(token + pmatch[2].rm_so,
pmatch[2].rm_eo - pmatch[2].rm_so));
f = strtol(ptr, NULL, 10);
xfree(ptr);
if ((f >= 0 && f >= num_nodes)
|| (f < 0 && f < -num_nodes)) {
error("\"%ld\" is beyond the range of the"
" %d available nodes", f, num_nodes);
return 0;
}
*first = (int)f;
}
return 1;
}
/*
* Convert a node index string into a nodelist string.
*
* A node index string is a string of single numbers and/or ranges seperated
* by commas. For instance: 2,6,-3,8,-3-2,16,2--4,7-9,0
*
* If both numbers in a range are of the same sign (both positive, or both
* negative), then the range counts directly from the first number to the
* second number; it will not wrap around the "end" of the node list.
*
* If the numbers in a range differ in sign, the range wraps around the
* end of the list of nodes.
*
* Examples: Given a node allocation of foo[1-16]:
*
* -2-3 (negative 2 to positive 3) becomes foo[15-16,1-4]
* 3--2 (positive 3 to negative 2) becomes foo[4,3,2,1,16,15]
* -3--2 becomes foo[14-15]
* -2--3 becomes foo[15,14]
* 2-3 becomes foo[3-4]
* 3-2 becomes foo[4,3]
*/
static char *_node_indices_to_nodelist(const char *indices_list,
resource_allocation_response_msg_t *alloc_info)
{
char *list;
int list_len;
char *start, *end;
hostlist_t node_l, alloc_l;
regex_t range_re;
char *range_re_pattern =
"^[[:space:]]*"
"((-?[[:digit:]]+)[[:space:]]*-)?" /* optional start */
"[[:space:]]*"
"(-?[[:digit:]]+)[[:space:]]*$";
char *nodelist = NULL;
int i, idx;
/* intialize the regular expression */
if (regcomp(&range_re, range_re_pattern, REG_EXTENDED) != 0) {
error("Node index range regex compilation failed\n");
return NULL;
}
/* Now break the string up into tokens between commas,
feed each token into the regular expression, and make
certain that the range numbers are valid. */
node_l = hostlist_create(NULL);
alloc_l = hostlist_create(alloc_info->node_list);
list = xstrdup(indices_list);
start = (char *)list;
list_len = strlen(list);
while (start != NULL && start < (list + list_len)) {
int first = 0;
int last = 0;
/* Find the next index range in the list */
end = strchr(start,',');
if (end == NULL) {
end = list + list_len;
}
*end = '\0';
/* Use the regexp to get the range numbers */
if (!_get_range(&range_re, start, &first, &last,
hostlist_count(alloc_l))) {
goto cleanup;
}
/* Now find all nodes in this range, and add them to node_l */
if (first <= last) {
char *node;
for (i = first; i <= last; i++) {
if (i < 0)
idx = i + hostlist_count(alloc_l);
else
idx = i;
node = hostlist_nth(alloc_l, idx);
hostlist_push(node_l, node);
free(node);
}
} else { /* first > last */
char *node;
for (i = first; i >= last; i--) {
if (i < 0)
idx = i + hostlist_count(alloc_l);
else
idx = i;
node = hostlist_nth(alloc_l, idx);
hostlist_push(node_l, node);
free(node);
}
}
start = end+1;
}
i = 2048;
nodelist = NULL;
do {
i *= 2;
xrealloc(nodelist, i);
} while (hostlist_ranged_string(node_l, i, nodelist) == -1);
cleanup:
xfree(list);
hostlist_destroy(alloc_l);
hostlist_destroy(node_l);
regfree(&range_re);
return nodelist;
}
/* Load the multi_prog config file into argv, pass the entire file contents
* in order to avoid having to read the file on every node. We could parse
* the infomration here too for loading the MPIR records for TotalView */
static void _load_multi(int *argc, char **argv)
{
int config_fd, data_read = 0, i;
struct stat stat_buf;
char *data_buf;
if ((config_fd = open(argv[0], O_RDONLY)) == -1) {
error("Could not open multi_prog config file %s",
argv[0]);
exit(1);
}
if (fstat(config_fd, &stat_buf) == -1) {
error("Could not stat multi_prog config file %s",
argv[0]);
exit(1);
}
if (stat_buf.st_size > 60000) {
error("Multi_prog config file %s is too large",
argv[0]);
exit(1);
}
data_buf = xmalloc(stat_buf.st_size);
while ((i = read(config_fd, &data_buf[data_read], stat_buf.st_size
- data_read)) != 0) {
if (i < 0) {
error("Error reading multi_prog config file %s",
argv[0]);
exit(1);
} else
data_read += i;
}
close(config_fd);
for (i=1; i<*argc; i++)
xfree(argv[i]);
argv[1] = data_buf;
*argc = 2;
}
/*
* _opt_args() : set options via commandline args and popt
*/
static void _opt_args(int argc, char **argv)
{
int i;
char **rest = NULL;
set_options(argc, argv);
#ifdef HAVE_AIX
if (opt.network == NULL) {
opt.network = "us,sn_all,bulk_xfer";
setenv("SLURM_NETWORK", opt.network, 1);
}
#endif
opt.argc = 0;
if (optind < argc) {
rest = argv + optind;
while (rest[opt.argc] != NULL)
opt.argc++;
}
opt.argv = (char **) xmalloc((opt.argc + 1) * sizeof(char *));
for (i = 0; i < opt.argc; i++)
opt.argv[i] = xstrdup(rest[i]);
opt.argv[i] = NULL; /* End of argv's (for possible execv) */
if (opt.multi_prog) {
if (opt.argc < 1) {
error("configuration file not specified");
exit(1);
}
_load_multi(&opt.argc, opt.argv);
}
else if (opt.argc > 0) {
char *fullpath;
if ((fullpath = _search_path(opt.argv[0], R_OK|X_OK))) {
xfree(opt.argv[0]);
opt.argv[0] = fullpath;
}
}
if (!_opt_verify())
exit(1);
}
static bool
_allocation_lookup_env(resource_allocation_response_msg_t **alloc_info)
{
char *ptr, *val;
resource_allocation_response_msg_t *alloc;
long l;
alloc = (resource_allocation_response_msg_t *)xmalloc(
sizeof(resource_allocation_response_msg_t));
/* get SLURM_JOB_ID */
val = getenv("SLURM_JOB_ID");
if (val == NULL)
goto fail1;
l = strtol(val, &ptr, 10);
if (ptr == val || !xstring_is_whitespace(ptr) || l < 0)
goto fail1;
alloc->job_id = (uint32_t)l;
/* get SLURM_JOB_NUM_NODES */
val = getenv("SLURM_JOB_NUM_NODES");
if (val == NULL)
goto fail1;
l = strtol(val, &ptr, 10);
if (ptr == val || !xstring_is_whitespace(ptr) || l < 1)
goto fail1;
alloc->node_cnt = (uint16_t)l;
/* get SLURM_JOB_NODELIST */
val = getenv("SLURM_JOB_NODELIST");
if (val == NULL)
goto fail1;
alloc->node_list = xstrdup(val);
/* get SLURM_JOB_CPUS_PER_NODE */
val = getenv("SLURM_JOB_CPUS_PER_NODE");
if (val == NULL)
goto fail2;
if (!_set_cpus_per_node(val, alloc))
goto fail2;
*alloc_info = alloc;
return true;
fail2:
xfree(alloc->node_list);
fail1:
xfree(alloc);
*alloc_info = NULL;
return false;
}
static bool
_set_allocation_info(resource_allocation_response_msg_t **alloc_info)
{
bool env_flag;
/* First, try to set the allocation info from the environment */
env_flag = _allocation_lookup_env(alloc_info);
/* If that fails, we need to try to get the allocation info
* from the slurmctld. We also need to talk to the slurmctld if
* opt.job_id is set and does not match the information from the
* environment variables.
*/
if (!env_flag || (env_flag
&& opt.jobid_set
&& opt.jobid != (*alloc_info)->job_id)) {
verbose("Need to look up allocation info with the controller");
if (slurm_allocation_lookup_lite(opt.jobid, alloc_info) < 0) {
error("Unable to look up job ID %u: %m", opt.jobid);
return false;
}
} else if (!env_flag && !opt.jobid_set) {
error("A job ID MUST be specified on the command line,");
error("or through the SLAUNCH_JOBID environment variable.");
return false;
}
return true;
}
/*
* _opt_verify : perform some post option processing verification
*
*/
static bool _opt_verify(void)
{
bool verified = true;
hostlist_t task_l = NULL;
hostlist_t node_l = NULL;
resource_allocation_response_msg_t *alloc_info;
if (!_set_allocation_info(&alloc_info)) {
/* error messages printed under _set_allocation_info */
exit(1);
}
/*
* Now set default options based on allocation info.
*/
if (!opt.jobid_set)
opt.jobid = alloc_info->job_id;
if (!opt.num_nodes_set)
opt.num_nodes = alloc_info->node_cnt;
if (opt.task_layout_byid_set && opt.task_layout == NULL) {
opt.task_layout = _node_indices_to_nodelist(
opt.task_layout_byid, alloc_info);
if (opt.task_layout == NULL)
verified = false;
}
if (opt.nodelist_byid != NULL && opt.nodelist == NULL) {
hostlist_t hl;
char *nodenames;
nodenames = _node_indices_to_nodelist(opt.nodelist_byid,
alloc_info);
if (nodenames == NULL) {
verified = false;
} else {
hl = hostlist_create(nodenames);
hostlist_uniq(hl);
/* assumes that the sorted unique hostlist must be a
shorter string than unsorted (or equal lenght) */
hostlist_ranged_string(hl, strlen(nodenames)+1,
nodenames);
opt.nodelist = nodenames;
}
}
/*
* Now, all the rest of the checks and setup.
*/
if (opt.task_layout_byid_set && opt.task_layout_file_set) {
error("-T/--task-layout-byid and -F/--task-layout-file"
" are incompatible.");
verified = false;
}
if (opt.task_layout_byname_set && opt.task_layout_file_set) {
error("-Y/--task-layout-byname and -F/--task-layout-file"
" are incompatible.");
verified = false;
}
if (opt.task_layout_byname_set && opt.task_layout_byid_set) {
error("-Y/--task-layout-byname and -T/--task-layout-byid"
" are incompatible.");
verified = false;
}
if (opt.nodelist && (opt.task_layout_byid_set
|| opt.task_layout_byname_set
|| opt.task_layout_file_set)) {
error("-w/--nodelist is incompatible with task layout"
" options.");
verified = false;
}
if (opt.nodelist && opt.task_layout_file_set) {
error("Only one of -w/--nodelist or -F/--task-layout-file"
" may be used.");
verified = false;
}
if (opt.num_nodes_set && (opt.task_layout_byid_set
|| opt.task_layout_byname_set
|| opt.task_layout_file_set)) {
error("-N/--node is incompatible with task layout options.");
verified = false;
}
if (opt.task_layout != NULL) {
task_l = hostlist_create(opt.task_layout);
if (opt.num_tasks_set) {
if (opt.num_tasks < hostlist_count(task_l)) {
/* shrink the hostlist */
int i, shrink;
char buf[8192];
shrink = hostlist_count(task_l) - opt.num_tasks;
for (i = 0; i < shrink; i++)
free(hostlist_pop(task_l));
xfree(opt.task_layout);
hostlist_ranged_string(task_l, 8192, buf);
opt.task_layout = xstrdup(buf);
} else if (opt.num_tasks > hostlist_count(task_l)) {
error("Asked for more tasks (%d) than listed"
" in the task layout (%d)",
opt.num_tasks, hostlist_count(task_l));
verified = false;
} else {
/* they are equal, no problemo! */
}
} else {
opt.num_tasks = hostlist_count(task_l);
opt.num_tasks_set = true;
}
node_l = hostlist_copy(task_l);
hostlist_uniq(node_l);
opt.num_nodes = hostlist_count(node_l);
opt.num_nodes_set = true;
/* task_layout parameters implicitly trigger
arbitrary task layout mode */
opt.distribution = SLURM_DIST_ARBITRARY;
} else if (opt.nodelist != NULL) {
hostlist_t tmp;
tmp = hostlist_create(opt.nodelist);
node_l = hostlist_copy(tmp);
hostlist_uniq(node_l);
if (hostlist_count(node_l) != hostlist_count(tmp)) {
error("Node names may only appear once in the"
" nodelist (-w/--nodelist)");
verified = false;
}
hostlist_destroy(tmp);
if (opt.num_nodes_set
&& (opt.num_nodes != hostlist_count(node_l))) {
error("You asked for %d nodes (-N/--nodes), but there"
" are %d nodes in the nodelist (-w/--nodelist)",
opt.num_nodes, hostlist_count(node_l));
verified = false;
} else {
opt.num_nodes = hostlist_count(node_l);
opt.num_nodes_set = true;
}
}
if (opt.overcommit && opt.cpus_per_task_set) {
error("--overcommit/-C and --cpus-per-task/-c are incompatible");
verified = false;
}
if (!opt.num_nodes_set && opt.num_tasks_set
&& opt.num_tasks < opt.num_nodes)
opt.num_nodes = opt.num_tasks;
if (!opt.num_tasks_set) {
if (opt.nodelist != NULL) {
opt.num_tasks = hostlist_count(node_l);
} else {
opt.num_tasks = opt.num_nodes;
}
}
if (opt.quiet && opt.verbose) {
error ("don't specify both --verbose (-v) and --quiet (-q)");
verified = false;
}
if (opt.relative_set) {
if (opt.nodelist != NULL) {
error("-r/--relative not allowed with"
" -w/--nodelist.");
verified = false;
}
if (opt.task_layout_byid_set) {
error("-r/--relative not allowed with"
" -T/--task-layout-byid");
verified = false;
}
if (opt.task_layout_byname_set) {
error("-r/--relative not allowed with"
" -Y/--task-layout-byname");
verified = false;
}
if (opt.task_layout_file_set) {
error("-r/--relative not allowed with"
" -F/--task-layout-file");
verified = false;
}
/* convert a negative relative number into a positive number
that the slurmctld will accept */
if (opt.relative < 0 && opt.relative >= -(alloc_info->node_cnt))
opt.relative += alloc_info->node_cnt;
}
if ((opt.job_name == NULL) && (opt.argc > 0))
opt.job_name = _base_name(opt.argv[0]);
if (opt.argc == 0) {
error("must supply remote command");
verified = false;
}
/* check for realistic arguments */
if (opt.num_tasks <= 0) {
error("%s: invalid number of tasks (-n %d)",
opt.progname, opt.num_tasks);
verified = false;
}
if (opt.cpus_per_task <= 0) {
error("%s: invalid number of cpus per task (-c %d)\n",
opt.progname, opt.cpus_per_task);
verified = false;
}
if (opt.num_nodes <= 0) {
error("%s: invalid number of nodes (-N %d)\n",
opt.progname, opt.num_nodes);
verified = false;
}
core_format_enable (opt.core_type);
if (opt.labelio && opt.unbuffered) {
error("Do not specify both -l (--label) and "
"-u (--unbuffered)");
exit(1);
}
if ((opt.euid != (uid_t) -1) && (opt.euid != opt.uid))
opt.uid = opt.euid;
if ((opt.egid != (gid_t) -1) && (opt.egid != opt.gid))
opt.gid = opt.egid;
if ((opt.egid != (gid_t) -1) && (opt.egid != opt.gid))
opt.gid = opt.egid;
if (opt.propagate && parse_rlimits( opt.propagate, PROPAGATE_RLIMITS)) {
error( "--propagate=%s is not valid.", opt.propagate );
verified = false;
}
/* FIXME - figure out the proper way to free alloc_info */
hostlist_destroy(task_l);
hostlist_destroy(node_l);
return verified;
}
static void
_freeF(void *data)
{
xfree(data);
}
static List
_create_path_list(void)
{
List l;
char *path = xstrdup(getenv("PATH"));
char *c, *lc;
if (path == NULL) {
error("No PATH environment variable (or empty PATH)");
return NULL;
}
l = list_create(_freeF);
c = lc = path;
while (*c != '\0') {
if (*c == ':') {
/* nullify and push token onto list */
*c = '\0';
if (lc != NULL && strlen(lc) > 0)
list_append(l, xstrdup(lc));
lc = ++c;
} else
c++;
}
if (strlen(lc) > 0)
list_append(l, xstrdup(lc));
xfree(path);
return l;
}
static char *
_search_path(char *cmd, int access_mode)
{
List l = _create_path_list();
ListIterator i = NULL;
char *path, *fullpath = NULL;
if (l == NULL)
return NULL;
if ((cmd[0] == '.' || cmd[0] == '/')
&& (access(cmd, access_mode) == 0 ) ) {
if (cmd[0] == '.')
xstrfmtcat(fullpath, "%s/", opt.cwd);
xstrcat(fullpath, cmd);
goto done;
}
i = list_iterator_create(l);
while ((path = list_next(i))) {
xstrfmtcat(fullpath, "%s/%s", path, cmd);
if (access(fullpath, access_mode) == 0)
goto done;
xfree(fullpath);
fullpath = NULL;
}
done:
list_destroy(l);
return fullpath;
}
static char *
print_remote_command()
{
int i;
char *buf = NULL;
char *space;
for (i = 0; i < opt.argc; i++) {
if (i == opt.argc-1) {
space = "";
} else {
space = " ";
}
xstrfmtcat(buf, "\"%s\"%s", opt.argv[i], space);
}
return buf;
}
#define tf_(b) (b == true) ? "true" : "false"
static void _opt_list()
{
char *str;
info("defined options for program \"%s\"", opt.progname);
info("--------------- ---------------------");
info("user : \"%s\"", opt.user);
info("uid : %ld", (long) opt.uid);
info("gid : %ld", (long) opt.gid);
info("cwd : %s", opt.cwd);
info("num_tasks : %d %s", opt.num_tasks,
opt.num_tasks_set ? "(set)" : "(default)");
info("cpus_per_task : %d %s", opt.cpus_per_task,
opt.cpus_per_task_set ? "(set)" : "(default)");
info("nodes : %d %s",
opt.num_nodes, opt.num_nodes_set ? "(set)" : "(default)");
info("jobid : %u %s", opt.jobid,
opt.jobid_set ? "(set)" : "(default)");
info("job name : \"%s\"", opt.job_name);
info("distribution : %s",
format_task_dist_states(opt.distribution));
info("cpu_bind : %s",
opt.cpu_bind == NULL ? "default" : opt.cpu_bind);
info("mem_bind : %s",
opt.mem_bind == NULL ? "default" : opt.mem_bind);
info("core format : %s", core_format_name (opt.core_type));
info("verbose : %d", opt.verbose);
info("slurmd_debug : %d", opt.slurmd_debug);
info("label output : %s", tf_(opt.labelio));
info("unbuffered IO : %s", tf_(opt.unbuffered));
info("overcommit : %s", tf_(opt.overcommit));
info("wait : %d", opt.max_wait);
info("required nodes : %s", opt.nodelist);
info("network : %s", opt.network);
info("propagate : %s",
opt.propagate == NULL ? "NONE" : opt.propagate);
info("prolog : %s", opt.prolog);
info("epilog : %s", opt.epilog);
info("task_prolog : %s", opt.task_prolog);
info("task_epilog : %s", opt.task_epilog);
info("comm_hostname : %s", opt.comm_hostname);
info("multi_prog : %s", opt.multi_prog ? "yes" : "no");
info("plane_size : %u", opt.plane_size);
str = print_remote_command();
info("remote command : %s", str);
xfree(str);
}
static void _usage(void)
{
printf(
"Usage: slaunch [-N nnodes] [-n ntasks] [-i in] [-o out] [-e err]\n"
" [-c ncpus] [-r n] [-t minutes]\n"
" [-D path] [--overcommit] [--no-kill]\n"
" [--label] [--unbuffered] [-m dist] [-J jobname]\n"
" [--jobid=id] [--batch] [--verbose] [--slurmd_debug=#]\n"
" [--core=type] [-W sec]\n"
" [--mpi=type]\n"
" [--kill-on-bad-exit] [--propagate[=rlimits] ]\n"
" [--cpu_bind=...] [--mem_bind=...]\n"
" [--prolog=fname] [--epilog=fname]\n"
" [--task-prolog=fname] [--task-epilog=fname]\n"
" [--comm-hostname=<hostname|address>] [--multi-prog]\n"
" [-w hosts...] [-L hostids...] executable [args...]\n");
}
static void _help(void)
{
slurm_ctl_conf_t *conf;
printf (
"Usage: slaunch [OPTIONS...] executable [args...]\n"
"\n"
"Parallel run options:\n"
" -n, --ntasks=ntasks number of tasks to run\n"
" -N, --nodes=N number of nodes on which to run\n"
" -c, --cpus-per-task=ncpus number of cpus required per task\n"
" -i, --slaunch-input=file slaunch will read stdin from \"file\"\n"
" -o, --slaunch-output=file slaunch will write stdout to \"file\"\n"
" -e, --slaunch-error=file slaunch will write stderr to \"file\"\n"
" --slaunch-input-filter=taskid send stdin to only the specified task\n"
" --slaunch-output-filter=taskid only print stdout from the specified task\n"
" --slaunch-error-filter=taskid only print stderr from the specified task\n"
" -I, --task-input=file connect task stdin to \"file\"\n"
" -O, --task-output=file connect task stdout to \"file\"\n"
" -E, --task-error=file connect task stderr to \"file\"\n"
" -r, --relative=n run job step relative to node n of allocation\n"
" -t, --time=minutes time limit\n"
" -D, --workdir=path the working directory for the launched tasks\n"
" -C, --overcommit overcommit resources\n"
" -k, --no-kill do not kill job on node failure\n"
" -K, --kill-on-bad-exit kill the job if any task terminates with a\n"
" non-zero exit code\n"
" -l, --label prepend task number to lines of stdout/err\n"
" -u, --unbuffered do not line-buffer stdout/err\n"
" -m, --distribution=type distribution method for processes to nodes\n"
" (type = block|cyclic|hostfile)\n"
" -J, --job-name=jobname name of job\n"
" --jobid=id run under already allocated job\n"
" -W, --wait=sec seconds to wait after first task exits\n"
" before killing job\n"
" -v, --verbose verbose mode (multiple -v's increase verbosity)\n"
" -q, --quiet quiet mode (suppress informational messages)\n"
" -d, --slurmd-debug=level slurmd debug level\n"
" --core=type change default corefile format type\n"
" (type=\"list\" to list of valid formats)\n"
" --propagate[=rlimits] propagate all [or specific list of] rlimits\n"
" --mpi=type specifies version of MPI to use\n"
" --prolog=program run \"program\" before launching job step\n"
" --epilog=program run \"program\" after launching job step\n"
" --task-prolog=program run \"program\" before launching task\n"
" --task-epilog=program run \"program\" after launching task\n"
" --comm-hostname=hostname hostname for PMI communications with slaunch\n"
" --multi-prog if set the program name specified is the\n"
" configuration specificaiton for multiple programs\n"
" -w, --nodelist-byname=hosts... request a specific list of hosts\n"
" -L, --nodelist-byid=hosts... request a specific list of hosts\n");
conf = slurm_conf_lock();
if (conf->task_plugin != NULL
&& strcasecmp(conf->task_plugin, "task/affinity") == 0) {
printf(
" --cpu_bind= Bind tasks to CPUs\n"
" (see \"--cpu_bind=help\" for options)\n"
" --mem_bind= Bind memory to locality domains (ldom)\n"
" (see \"--mem_bind=help\" for options)\n"
);
}
slurm_conf_unlock();
spank_print_options (stdout, 6, 30);
printf("\n");
printf(
#ifdef HAVE_AIX /* AIX/Federation specific options */
"AIX related options:\n"
" --network=type communication protocol to be used\n"
"\n"
#endif
"Help options:\n"
" -h, --help show this help message\n"
" --usage display brief usage message\n"
"\n"
"Other options:\n"
" -V, --version output version information and exit\n"
"\n"
);
}