| /*****************************************************************************\ |
| * proc_args.c - helper functions for command argument processing |
| * $Id: opt.h 11996 2007-08-10 20:36:26Z jette $ |
| ***************************************************************************** |
| * Copyright (C) 2007 Hewlett-Packard Development Company, L.P. |
| * Written by Christopher Holmes <cholmes@hp.com>, who borrowed heavily |
| * from existing SLURM source code, particularly src/srun/opt.c |
| * |
| * This file is part of SLURM, a resource management program. |
| * For details, see <http://slurm.schedmd.com/>. |
| * Please also read the included file: DISCLAIMER. |
| * |
| * SLURM is free software; you can redistribute it and/or modify it under |
| * the terms of the GNU General Public License as published by the Free |
| * Software Foundation; either version 2 of the License, or (at your option) |
| * any later version. |
| * |
| * In addition, as a special exception, the copyright holders give permission |
| * to link the code of portions of this program with the OpenSSL library under |
| * certain conditions as described in each individual source file, and |
| * distribute linked combinations including the two. You must obey the GNU |
| * General Public License in all respects for all of the code used other than |
| * OpenSSL. If you modify file(s) with this exception, you may extend this |
| * exception to your version of the file(s), but you are not obligated to do |
| * so. If you do not wish to do so, delete this exception statement from your |
| * version. If you delete this exception statement from all source files in |
| * the program, then also delete it here. |
| * |
| * SLURM is distributed in the hope that it will be useful, but WITHOUT ANY |
| * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS |
| * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more |
| * details. |
| * |
| * You should have received a copy of the GNU General Public License along |
| * with SLURM; if not, write to the Free Software Foundation, Inc., |
| * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. |
| \*****************************************************************************/ |
| |
| #if HAVE_CONFIG_H |
| # include "config.h" |
| #endif |
| |
| #include <string.h> /* strcpy, strncasecmp */ |
| |
| #ifdef HAVE_STRINGS_H |
| # include <strings.h> |
| #endif |
| |
| #ifndef __USE_ISOC99 |
| #define __USE_ISOC99 |
| #endif |
| |
| #ifdef HAVE_LIMITS_H |
| # include <limits.h> |
| #endif |
| |
| #ifndef _GNU_SOURCE |
| # define _GNU_SOURCE |
| #endif |
| |
| #ifndef SYSTEM_DIMENSIONS |
| # define SYSTEM_DIMENSIONS 1 |
| #endif |
| |
| #include <fcntl.h> |
| #include <stdarg.h> /* va_start */ |
| #include <stdio.h> |
| #include <stdlib.h> /* getenv, strtoll */ |
| #include <pwd.h> /* getpwuid */ |
| #include <ctype.h> /* isdigit */ |
| #include <sys/param.h> /* MAXPATHLEN */ |
| #include <sys/stat.h> |
| #include <unistd.h> |
| #include <sys/stat.h> |
| #include <sys/types.h> |
| #include <sys/utsname.h> |
| |
| #include "src/common/gres.h" |
| #include "src/common/list.h" |
| #include "src/common/proc_args.h" |
| #include "src/common/xmalloc.h" |
| #include "src/common/xstring.h" |
| |
| |
| /* print this version of SLURM */ |
| void print_slurm_version(void) |
| { |
| printf("%s %s\n", PACKAGE_NAME, SLURM_VERSION_STRING); |
| } |
| |
| /* print the available gres options */ |
| void print_gres_help(void) |
| { |
| char help_msg[1024] = ""; |
| |
| gres_plugin_help_msg(help_msg, sizeof(help_msg)); |
| if (help_msg[0]) |
| printf("%s", help_msg); |
| else |
| printf("No gres help is available\n"); |
| } |
| |
| void set_distribution(task_dist_states_t distribution, |
| char **dist, char **lllp_dist) |
| { |
| if (((int)distribution >= 0) |
| && (distribution != SLURM_DIST_UNKNOWN)) { |
| switch (distribution) { |
| case SLURM_DIST_CYCLIC: |
| *dist = "cyclic"; |
| break; |
| case SLURM_DIST_BLOCK: |
| *dist = "block"; |
| break; |
| case SLURM_DIST_PLANE: |
| *dist = "plane"; |
| *lllp_dist = "plane"; |
| break; |
| case SLURM_DIST_ARBITRARY: |
| *dist = "arbitrary"; |
| break; |
| case SLURM_DIST_CYCLIC_CYCLIC: |
| *dist = "cyclic:cyclic"; |
| *lllp_dist = "cyclic"; |
| break; |
| case SLURM_DIST_CYCLIC_BLOCK: |
| *dist = "cyclic:block"; |
| *lllp_dist = "block"; |
| break; |
| case SLURM_DIST_BLOCK_CYCLIC: |
| *dist = "block:cyclic"; |
| *lllp_dist = "cyclic"; |
| break; |
| case SLURM_DIST_BLOCK_BLOCK: |
| *dist = "block:block"; |
| *lllp_dist = "block"; |
| break; |
| case SLURM_DIST_CYCLIC_CFULL: |
| *dist = "cyclic:fcyclic"; |
| *lllp_dist = "fcyclic"; |
| break; |
| case SLURM_DIST_BLOCK_CFULL: |
| *dist = "block:fcyclic"; |
| *lllp_dist = "cyclic"; |
| break; |
| default: |
| error("unknown dist, type %d", distribution); |
| break; |
| } |
| } |
| } |
| |
| /* |
| * verify that a distribution type in arg is of a known form |
| * returns the task_dist_states, or -1 if state is unknown |
| */ |
| task_dist_states_t verify_dist_type(const char *arg, uint32_t *plane_size) |
| { |
| int len = strlen(arg); |
| char *dist_str = NULL; |
| task_dist_states_t result = SLURM_DIST_UNKNOWN; |
| bool lllp_dist = false, plane_dist = false; |
| |
| dist_str = strchr(arg,':'); |
| if (dist_str != NULL) { |
| /* -m cyclic|block:cyclic|block */ |
| lllp_dist = true; |
| } else { |
| /* -m plane=<plane_size> */ |
| dist_str = strchr(arg,'='); |
| if (dist_str != NULL) { |
| *plane_size=atoi(dist_str+1); |
| len = dist_str-arg; |
| plane_dist = true; |
| } |
| } |
| |
| if (lllp_dist) { |
| if (strcasecmp(arg, "cyclic:cyclic") == 0) { |
| result = SLURM_DIST_CYCLIC_CYCLIC; |
| } else if (strcasecmp(arg, "cyclic:block") == 0) { |
| result = SLURM_DIST_CYCLIC_BLOCK; |
| } else if (strcasecmp(arg, "block:block") == 0) { |
| result = SLURM_DIST_BLOCK_BLOCK; |
| } else if (strcasecmp(arg, "block:cyclic") == 0) { |
| result = SLURM_DIST_BLOCK_CYCLIC; |
| } else if (strcasecmp(arg, "block:fcyclic") == 0) { |
| result = SLURM_DIST_BLOCK_CFULL; |
| } else if (strcasecmp(arg, "cyclic:fcyclic") == 0) { |
| result = SLURM_DIST_CYCLIC_CFULL; |
| } |
| } else if (plane_dist) { |
| if (strncasecmp(arg, "plane", len) == 0) { |
| result = SLURM_DIST_PLANE; |
| } |
| } else { |
| if (strncasecmp(arg, "cyclic", len) == 0) { |
| result = SLURM_DIST_CYCLIC; |
| } else if (strncasecmp(arg, "block", len) == 0) { |
| result = SLURM_DIST_BLOCK; |
| } else if ((strncasecmp(arg, "arbitrary", len) == 0) || |
| (strncasecmp(arg, "hostfile", len) == 0)) { |
| result = SLURM_DIST_ARBITRARY; |
| } |
| } |
| |
| return result; |
| } |
| |
| static uint16_t _get_conn_type(char *arg, bool bgp) |
| { |
| uint16_t len = strlen(arg); |
| if (!len) { |
| /* no input given */ |
| error("no conn-type argument given."); |
| return (uint16_t)NO_VAL; |
| } else if (!strncasecmp(arg, "MESH", len)) |
| return SELECT_MESH; |
| else if (!strncasecmp(arg, "TORUS", len)) |
| return SELECT_TORUS; |
| else if (!strncasecmp(arg, "NAV", len)) |
| return SELECT_NAV; |
| else if (!strncasecmp(arg, "SMALL", len)) |
| return SELECT_SMALL; |
| else if (bgp) { |
| if (!strncasecmp(arg, "HTC", len) || |
| !strncasecmp(arg, "HTC_S", len)) |
| return SELECT_HTC_S; |
| else if (!strncasecmp(arg, "HTC_D", len)) |
| return SELECT_HTC_D; |
| else if (!strncasecmp(arg, "HTC_V", len)) |
| return SELECT_HTC_V; |
| else if (!strncasecmp(arg, "HTC_L", len)) |
| return SELECT_HTC_L; |
| } |
| |
| error("invalid conn-type argument '%s' ignored.", arg); |
| return (uint16_t)NO_VAL; |
| } |
| |
| /* |
| * verify comma separated list of connection types to array of uint16_t |
| * connection_types or NO_VAL if not recognized |
| */ |
| extern void verify_conn_type(const char *arg, uint16_t *conn_type) |
| { |
| bool got_bgp = 0; |
| int inx = 0; |
| int highest_dims = 1; |
| char *arg_tmp = xstrdup(arg), *tok, *save_ptr = NULL; |
| |
| if (working_cluster_rec) { |
| if (working_cluster_rec->flags & CLUSTER_FLAG_BGP) |
| got_bgp = 1; |
| else if (working_cluster_rec->flags & CLUSTER_FLAG_BGQ) |
| highest_dims = 4; |
| } else { |
| #ifdef HAVE_BGP |
| got_bgp = 1; |
| # elif defined HAVE_BGQ |
| highest_dims = 4; |
| #endif |
| } |
| |
| tok = strtok_r(arg_tmp, ",", &save_ptr); |
| while (tok) { |
| if (inx >= highest_dims) { |
| error("too many conn-type arguments: %s", arg); |
| break; |
| } |
| conn_type[inx++] = _get_conn_type(tok, got_bgp); |
| tok = strtok_r(NULL, ",", &save_ptr); |
| } |
| if (inx == 0) |
| error("invalid conn-type argument '%s' ignored.", arg); |
| /* Fill the rest in with NO_VALS (use HIGHEST_DIMS here |
| * instead of highest_dims since that is the size of the |
| * array. */ |
| for ( ; inx < HIGHEST_DIMENSIONS; inx++) { |
| conn_type[inx] = (uint16_t)NO_VAL; |
| } |
| |
| xfree(arg_tmp); |
| } |
| |
| /* |
| * verify geometry arguments, must have proper count |
| * returns -1 on error, 0 otherwise |
| */ |
| int verify_geometry(const char *arg, uint16_t *geometry) |
| { |
| char* token, *delimiter = ",x", *next_ptr; |
| int i, rc = 0; |
| char* geometry_tmp = xstrdup(arg); |
| char* original_ptr = geometry_tmp; |
| int dims = slurmdb_setup_cluster_dims(); |
| |
| token = strtok_r(geometry_tmp, delimiter, &next_ptr); |
| for (i=0; i<dims; i++) { |
| if (token == NULL) { |
| error("insufficient dimensions in --geometry"); |
| rc = -1; |
| break; |
| } |
| geometry[i] = (uint16_t)atoi(token); |
| if (geometry[i] == 0 || geometry[i] == (uint16_t)NO_VAL) { |
| error("invalid --geometry argument"); |
| rc = -1; |
| break; |
| } |
| geometry_tmp = next_ptr; |
| token = strtok_r(geometry_tmp, delimiter, &next_ptr); |
| } |
| if (token != NULL) { |
| error("too many dimensions in --geometry"); |
| rc = -1; |
| } |
| |
| if (original_ptr) |
| xfree(original_ptr); |
| |
| return rc; |
| } |
| |
| /* return command name from its full path name */ |
| char * base_name(char* command) |
| { |
| char *char_ptr, *name; |
| int i; |
| |
| if (command == NULL) |
| return NULL; |
| |
| char_ptr = strrchr(command, (int)'/'); |
| if (char_ptr == NULL) |
| char_ptr = command; |
| else |
| char_ptr++; |
| |
| i = strlen(char_ptr); |
| name = xmalloc(i+1); |
| strcpy(name, char_ptr); |
| return name; |
| } |
| |
| /* |
| * str_to_mbytes(): verify that arg is numeric with optional "K", "M", "G" |
| * or "T" at end and return the number in mega-bytes |
| */ |
| long str_to_mbytes(const char *arg) |
| { |
| long result; |
| char *endptr; |
| |
| errno = 0; |
| result = strtol(arg, &endptr, 10); |
| if ((errno != 0) && ((result == LONG_MIN) || (result == LONG_MAX))) |
| result = -1; |
| else if (endptr[0] == '\0') |
| ; |
| else if ((endptr[0] == 'k') || (endptr[0] == 'K')) |
| result = (result + 1023) / 1024; /* round up */ |
| else if ((endptr[0] == 'm') || (endptr[0] == 'M')) |
| ; |
| else if ((endptr[0] == 'g') || (endptr[0] == 'G')) |
| result *= 1024; |
| else if ((endptr[0] == 't') || (endptr[0] == 'T')) |
| result *= (1024 * 1024); |
| else |
| result = -1; |
| |
| return result; |
| } |
| |
| /* Convert a string into a node count */ |
| static int |
| _str_to_nodes(const char *num_str, char **leftover) |
| { |
| long int num; |
| char *endptr; |
| |
| num = strtol(num_str, &endptr, 10); |
| if (endptr == num_str) { /* no valid digits */ |
| *leftover = (char *)num_str; |
| return 0; |
| } |
| if (*endptr != '\0' && (*endptr == 'k' || *endptr == 'K')) { |
| num *= 1024; |
| endptr++; |
| } |
| if (*endptr != '\0' && (*endptr == 'm' || *endptr == 'M')) { |
| num *= (1024 * 1024); |
| endptr++; |
| } |
| *leftover = endptr; |
| |
| return (int)num; |
| } |
| |
| /* |
| * verify that a node count in arg is of a known form (count or min-max) |
| * OUT min, max specified minimum and maximum node counts |
| * RET true if valid |
| */ |
| bool verify_node_count(const char *arg, int *min_nodes, int *max_nodes) |
| { |
| char *ptr, *min_str, *max_str; |
| char *leftover; |
| |
| /* Does the string contain a "-" character? If so, treat as a range. |
| * otherwise treat as an absolute node count. */ |
| if ((ptr = index(arg, '-')) != NULL) { |
| min_str = xstrndup(arg, ptr-arg); |
| *min_nodes = _str_to_nodes(min_str, &leftover); |
| if (!xstring_is_whitespace(leftover)) { |
| error("\"%s\" is not a valid node count", min_str); |
| xfree(min_str); |
| return false; |
| } |
| xfree(min_str); |
| #ifdef HAVE_ALPS_CRAY |
| if (*min_nodes < 0) { |
| #else |
| if (*min_nodes == 0) { |
| #endif |
| *min_nodes = 1; |
| } |
| |
| max_str = xstrndup(ptr+1, strlen(arg)-((ptr+1)-arg)); |
| *max_nodes = _str_to_nodes(max_str, &leftover); |
| if (!xstring_is_whitespace(leftover)) { |
| error("\"%s\" is not a valid node count", max_str); |
| xfree(max_str); |
| return false; |
| } |
| xfree(max_str); |
| } else { |
| *min_nodes = *max_nodes = _str_to_nodes(arg, &leftover); |
| if (!xstring_is_whitespace(leftover)) { |
| error("\"%s\" is not a valid node count", arg); |
| return false; |
| } |
| #ifdef HAVE_ALPS_CRAY |
| if (*min_nodes < 0) { |
| #else |
| if (*min_nodes == 0) { |
| #endif |
| /* whitespace does not a valid node count make */ |
| error("\"%s\" is not a valid node count", arg); |
| return false; |
| } |
| } |
| |
| if ((*max_nodes != 0) && (*max_nodes < *min_nodes)) { |
| error("Maximum node count %d is less than" |
| " minimum node count %d", |
| *max_nodes, *min_nodes); |
| return false; |
| } |
| |
| return true; |
| } |
| |
| /* |
| * If the node list supplied is a file name, translate that into |
| * a list of nodes, we orphan the data pointed to |
| * RET true if the node list is a valid one |
| */ |
| bool verify_node_list(char **node_list_pptr, enum task_dist_states dist, |
| int task_count) |
| { |
| char *nodelist = NULL; |
| |
| xassert (node_list_pptr); |
| xassert (*node_list_pptr); |
| |
| if (strchr(*node_list_pptr, '/') == NULL) |
| return true; /* not a file name */ |
| |
| /* If we are using Arbitrary grab count out of the hostfile |
| using them exactly the way we read it in since we are |
| saying, lay it out this way! */ |
| if (dist == SLURM_DIST_ARBITRARY) |
| nodelist = slurm_read_hostfile(*node_list_pptr, task_count); |
| else |
| nodelist = slurm_read_hostfile(*node_list_pptr, NO_VAL); |
| |
| if (!nodelist) |
| return false; |
| |
| xfree(*node_list_pptr); |
| *node_list_pptr = xstrdup(nodelist); |
| free(nodelist); |
| |
| return true; |
| } |
| |
| /* |
| * get either 1 or 2 integers for a resource count in the form of either |
| * (count, min-max, or '*') |
| * A partial error message is passed in via the 'what' param. |
| * IN arg - argument |
| * IN what - variable name (for errors) |
| * OUT min - first number |
| * OUT max - maximum value if specified, NULL if don't care |
| * IN isFatal - if set, exit on error |
| * RET true if valid |
| */ |
| bool get_resource_arg_range(const char *arg, const char *what, int* min, |
| int *max, bool isFatal) |
| { |
| char *p; |
| long int result; |
| |
| /* wildcard meaning every possible value in range */ |
| if ((*arg == '\0') || (*arg == '*' )) { |
| *min = 1; |
| if (max) |
| *max = INT_MAX; |
| return true; |
| } |
| |
| result = strtol(arg, &p, 10); |
| if (*p == 'k' || *p == 'K') { |
| result *= 1024; |
| p++; |
| } else if (*p == 'm' || *p == 'M') { |
| result *= 1048576; |
| p++; |
| } |
| |
| if (((*p != '\0') && (*p != '-')) || (result <= 0L)) { |
| error ("Invalid numeric value \"%s\" for %s.", arg, what); |
| if (isFatal) |
| exit(1); |
| return false; |
| } else if (result > INT_MAX) { |
| error ("Numeric argument (%ld) to big for %s.", result, what); |
| if (isFatal) |
| exit(1); |
| return false; |
| } |
| |
| *min = (int) result; |
| |
| if (*p == '\0') |
| return true; |
| if (*p == '-') |
| p++; |
| |
| result = strtol(p, &p, 10); |
| if ((*p == 'k') || (*p == 'K')) { |
| result *= 1024; |
| p++; |
| } else if (*p == 'm' || *p == 'M') { |
| result *= 1048576; |
| p++; |
| } |
| |
| if (((*p != '\0') && (*p != '-')) || (result <= 0L)) { |
| error ("Invalid numeric value \"%s\" for %s.", arg, what); |
| if (isFatal) |
| exit(1); |
| return false; |
| } else if (result > INT_MAX) { |
| error ("Numeric argument (%ld) to big for %s.", result, what); |
| if (isFatal) |
| exit(1); |
| return false; |
| } |
| |
| if (max) |
| *max = (int) result; |
| |
| return true; |
| } |
| |
| /* |
| * verify that a resource counts in arg are of a known form X, X:X, X:X:X, or |
| * X:X:X:X, where X is defined as either (count, min-max, or '*') |
| * RET true if valid |
| */ |
| bool verify_socket_core_thread_count(const char *arg, int *min_sockets, |
| int *min_cores, int *min_threads, |
| cpu_bind_type_t *cpu_bind_type) |
| { |
| bool tmp_val,ret_val; |
| int i,j; |
| int max_sockets = 0, max_cores, max_threads; |
| const char *cur_ptr = arg; |
| char buf[3][48]; /* each can hold INT64_MAX - INT64_MAX */ |
| buf[0][0] = '\0'; |
| buf[1][0] = '\0'; |
| buf[2][0] = '\0'; |
| |
| for (j=0;j<3;j++) { |
| for (i=0;i<47;i++) { |
| if (*cur_ptr == '\0' || *cur_ptr ==':') |
| break; |
| buf[j][i] = *cur_ptr++; |
| } |
| if (*cur_ptr == '\0') |
| break; |
| xassert(*cur_ptr == ':'); |
| buf[j][i] = '\0'; |
| cur_ptr++; |
| } |
| /* if cpu_bind_type doesn't already have a auto preference, choose |
| * the level based on the level of the -E specification |
| */ |
| if (!(*cpu_bind_type & (CPU_BIND_TO_SOCKETS | |
| CPU_BIND_TO_CORES | |
| CPU_BIND_TO_THREADS))) { |
| if (j == 0) { |
| *cpu_bind_type |= CPU_BIND_TO_SOCKETS; |
| } else if (j == 1) { |
| *cpu_bind_type |= CPU_BIND_TO_CORES; |
| } else if (j == 2) { |
| *cpu_bind_type |= CPU_BIND_TO_THREADS; |
| } |
| } |
| buf[j][i] = '\0'; |
| |
| ret_val = true; |
| tmp_val = get_resource_arg_range(&buf[0][0], "first arg of -B", |
| min_sockets, &max_sockets, true); |
| if ((*min_sockets == 1) && (max_sockets == INT_MAX)) |
| *min_sockets = NO_VAL; /* Use full range of values */ |
| ret_val = ret_val && tmp_val; |
| |
| tmp_val = get_resource_arg_range(&buf[1][0], "second arg of -B", |
| min_cores, &max_cores, true); |
| if ((*min_cores == 1) && (max_cores == INT_MAX)) |
| *min_cores = NO_VAL; /* Use full range of values */ |
| ret_val = ret_val && tmp_val; |
| |
| tmp_val = get_resource_arg_range(&buf[2][0], "third arg of -B", |
| min_threads, &max_threads, true); |
| if ((*min_threads == 1) && (max_threads == INT_MAX)) |
| *min_threads = NO_VAL; /* Use full range of values */ |
| ret_val = ret_val && tmp_val; |
| |
| return ret_val; |
| } |
| |
| /* |
| * verify that a hint is valid and convert it into the implied settings |
| * RET true if valid |
| */ |
| bool verify_hint(const char *arg, int *min_sockets, int *min_cores, |
| int *min_threads, int *ntasks_per_core, |
| cpu_bind_type_t *cpu_bind_type) |
| { |
| char *buf, *p, *tok; |
| if (!arg) { |
| return true; |
| } |
| |
| buf = xstrdup(arg); |
| p = buf; |
| /* change all ',' delimiters not followed by a digit to ';' */ |
| /* simplifies parsing tokens while keeping map/mask together */ |
| while (p[0] != '\0') { |
| if ((p[0] == ',') && (!isdigit((int)p[1]))) |
| p[0] = ';'; |
| p++; |
| } |
| |
| p = buf; |
| while ((tok = strsep(&p, ";"))) { |
| if (strcasecmp(tok, "help") == 0) { |
| printf( |
| "Application hint options:\n" |
| " --hint= Bind tasks according to application hints\n" |
| " compute_bound use all cores in each socket\n" |
| " memory_bound use only one core in each socket\n" |
| " [no]multithread [don't] use extra threads with in-core multi-threading\n" |
| " help show this help message\n"); |
| return 1; |
| } else if (strcasecmp(tok, "compute_bound") == 0) { |
| *min_sockets = NO_VAL; |
| *min_cores = NO_VAL; |
| *min_threads = 1; |
| *cpu_bind_type |= CPU_BIND_TO_CORES; |
| } else if (strcasecmp(tok, "memory_bound") == 0) { |
| *min_cores = 1; |
| *min_threads = 1; |
| *cpu_bind_type |= CPU_BIND_TO_CORES; |
| } else if (strcasecmp(tok, "multithread") == 0) { |
| *min_threads = NO_VAL; |
| *cpu_bind_type |= CPU_BIND_TO_THREADS; |
| *cpu_bind_type &= (~CPU_BIND_ONE_THREAD_PER_CORE); |
| if (*ntasks_per_core == NO_VAL) |
| *ntasks_per_core = INFINITE; |
| } else if (strcasecmp(tok, "nomultithread") == 0) { |
| *min_threads = 1; |
| *cpu_bind_type |= CPU_BIND_TO_THREADS; |
| *cpu_bind_type |= CPU_BIND_ONE_THREAD_PER_CORE; |
| } else { |
| error("unrecognized --hint argument \"%s\", " |
| "see --hint=help", tok); |
| xfree(buf); |
| return 1; |
| } |
| } |
| |
| xfree(buf); |
| return 0; |
| } |
| |
| uint16_t parse_mail_type(const char *arg) |
| { |
| uint16_t rc; |
| |
| if (strcasecmp(arg, "BEGIN") == 0) |
| rc = MAIL_JOB_BEGIN; |
| else if (strcasecmp(arg, "END") == 0) |
| rc = MAIL_JOB_END; |
| else if (strcasecmp(arg, "FAIL") == 0) |
| rc = MAIL_JOB_FAIL; |
| else if (strcasecmp(arg, "REQUEUE") == 0) |
| rc = MAIL_JOB_REQUEUE; |
| else if (strcasecmp(arg, "ALL") == 0) |
| rc = MAIL_JOB_BEGIN | MAIL_JOB_END | MAIL_JOB_FAIL | |
| MAIL_JOB_REQUEUE; |
| else |
| rc = 0; /* failure */ |
| |
| return rc; |
| } |
| char *print_mail_type(const uint16_t type) |
| { |
| if (type == 0) |
| return "NONE"; |
| |
| if (type == MAIL_JOB_BEGIN) |
| return "BEGIN"; |
| if (type == MAIL_JOB_END) |
| return "END"; |
| if (type == MAIL_JOB_FAIL) |
| return "FAIL"; |
| if (type == MAIL_JOB_REQUEUE) |
| return "REQUEUE"; |
| if (type == (MAIL_JOB_BEGIN | MAIL_JOB_END | MAIL_JOB_FAIL | |
| MAIL_JOB_REQUEUE)) |
| return "ALL"; |
| |
| return "MULTIPLE"; |
| } |
| |
| static void |
| _freeF(void *data) |
| { |
| xfree(data); |
| } |
| |
| static List |
| _create_path_list(void) |
| { |
| List l = list_create(_freeF); |
| char *path; |
| char *c, *lc; |
| |
| c = getenv("PATH"); |
| if (!c) { |
| error("No PATH environment variable"); |
| return l; |
| } |
| path = xstrdup(c); |
| c = lc = path; |
| |
| while (*c != '\0') { |
| if (*c == ':') { |
| /* nullify and push token onto list */ |
| *c = '\0'; |
| if (lc != NULL && strlen(lc) > 0) |
| list_append(l, xstrdup(lc)); |
| lc = ++c; |
| } else |
| c++; |
| } |
| |
| if (strlen(lc) > 0) |
| list_append(l, xstrdup(lc)); |
| |
| xfree(path); |
| |
| return l; |
| } |
| |
| char * |
| search_path(char *cwd, char *cmd, bool check_current_dir, int access_mode) |
| { |
| List l = NULL; |
| ListIterator i = NULL; |
| char *path, *fullpath = NULL; |
| |
| #if defined HAVE_BG && !defined HAVE_BG_L_P |
| /* BGQ's runjob command required a fully qualified path */ |
| if ( (cmd[0] == '.' || cmd[0] == '/') && |
| (access(cmd, access_mode) == 0 ) ) { |
| if (cmd[0] == '.') |
| xstrfmtcat(fullpath, "%s/", cwd); |
| xstrcat(fullpath, cmd); |
| goto done; |
| } |
| #else |
| if ((cmd[0] == '.') || (cmd[0] == '/')) |
| return NULL; |
| #endif |
| |
| l = _create_path_list(); |
| if (l == NULL) |
| return NULL; |
| |
| if (check_current_dir) |
| list_prepend(l, xstrdup(cwd)); |
| |
| i = list_iterator_create(l); |
| while ((path = list_next(i))) { |
| xstrfmtcat(fullpath, "%s/%s", path, cmd); |
| |
| if (access(fullpath, access_mode) == 0) |
| goto done; |
| |
| xfree(fullpath); |
| fullpath = NULL; |
| } |
| done: |
| if (l) |
| list_destroy(l); |
| return fullpath; |
| } |
| |
| char *print_commandline(const int script_argc, char **script_argv) |
| { |
| int i; |
| char tmp[256], *out_buf = NULL, *prefix; |
| |
| for (i = 0; i < script_argc; i++) { |
| if (out_buf) |
| prefix = " "; |
| else |
| prefix = ""; |
| snprintf(tmp, 256, "%s%s", prefix, script_argv[i]); |
| xstrcat(out_buf, tmp); |
| } |
| return out_buf; |
| } |
| |
| char *print_geometry(const uint16_t *geometry) |
| { |
| int i; |
| char buf[32], *rc = NULL; |
| int dims = slurmdb_setup_cluster_dims(); |
| |
| if ((dims == 0) || !geometry[0] |
| || (geometry[0] == (uint16_t)NO_VAL)) |
| return NULL; |
| |
| for (i=0; i<dims; i++) { |
| if (i > 0) |
| snprintf(buf, sizeof(buf), "x%u", geometry[i]); |
| else |
| snprintf(buf, sizeof(buf), "%u", geometry[i]); |
| xstrcat(rc, buf); |
| } |
| |
| return rc; |
| } |
| |
| /* Translate a signal option string "--signal=<int>[@<time>]" into |
| * it's warn_signal and warn_time components. |
| * RET 0 on success, -1 on failure */ |
| int get_signal_opts(char *optarg, uint16_t *warn_signal, uint16_t *warn_time, |
| uint16_t *warn_flags) |
| { |
| char *endptr; |
| long num; |
| |
| if (optarg == NULL) |
| return -1; |
| |
| if (!strncasecmp(optarg, "B:", 2)) { |
| *warn_flags = KILL_JOB_BATCH; |
| optarg += 2; |
| } |
| |
| endptr = strchr(optarg, '@'); |
| if (endptr) |
| endptr[0] = '\0'; |
| num = (uint16_t) sig_name2num(optarg); |
| if (endptr) |
| endptr[0] = '@'; |
| if ((num < 1) || (num > 0x0ffff)) |
| return -1; |
| *warn_signal = (uint16_t) num; |
| |
| if (!endptr) { |
| *warn_time = 60; |
| return 0; |
| } |
| |
| num = strtol(endptr+1, &endptr, 10); |
| if ((num < 0) || (num > 0x0ffff)) |
| return -1; |
| *warn_time = (uint16_t) num; |
| if (endptr[0] == '\0') |
| return 0; |
| return -1; |
| } |
| |
| /* Convert a signal name to it's numeric equivalent. |
| * Return -1 on failure */ |
| int sig_name2num(char *signal_name) |
| { |
| char *sig_name[] = {"HUP", "INT", "QUIT", "KILL", "TERM", |
| "USR1", "USR2", "CONT", NULL}; |
| int sig_num[] = {SIGHUP, SIGINT, SIGQUIT, SIGKILL, SIGTERM, |
| SIGUSR1, SIGUSR2, SIGCONT}; |
| char *ptr; |
| long tmp; |
| int sig; |
| int i; |
| |
| tmp = strtol(signal_name, &ptr, 10); |
| if (ptr != signal_name) { /* found a number */ |
| if (xstring_is_whitespace(ptr)) |
| sig = (int)tmp; |
| else |
| return 0; |
| } else { |
| ptr = (char *)signal_name; |
| while (isspace((int)*ptr)) |
| ptr++; |
| if (strncasecmp(ptr, "SIG", 3) == 0) |
| ptr += 3; |
| for (i = 0; ; i++) { |
| if (sig_name[i] == NULL) |
| return 0; |
| if (strncasecmp(ptr, sig_name[i], |
| strlen(sig_name[i])) == 0) { |
| /* found the signal name */ |
| if (!xstring_is_whitespace(ptr + |
| strlen(sig_name[i]))) |
| return 0; |
| sig = sig_num[i]; |
| break; |
| } |
| } |
| } |
| |
| return sig; |
| } |
| |
| |
| /* |
| * parse_uint32 - Convert anscii string to a 32 bit unsigned int. |
| * IN aval - ascii string. |
| * IN/OUT ival - 32 bit pointer. |
| * RET 0 if no error, 1 otherwise. |
| */ |
| extern int parse_uint32(char *aval, uint32_t *ival) |
| { |
| /* |
| * First, convert the ascii value it to a |
| * long long int. If the result is greater |
| * than or equal to 0 and less than NO_VAL |
| * set the value and return. Otherwise return |
| * an error. |
| */ |
| uint32_t max32uint = (uint32_t) NO_VAL; |
| long long tval; |
| char *p; |
| |
| /* |
| * Return error for invalid value. |
| */ |
| tval = strtoll(aval, &p, 10); |
| if (p[0] || (tval == LLONG_MIN) || (tval == LLONG_MAX) || |
| (tval < 0) || (tval >= max32uint)) |
| return 1; |
| |
| *ival = (uint32_t) tval; |
| |
| return 0; |
| } |
| |
| /* |
| * parse_uint16 - Convert anscii string to a 16 bit unsigned int. |
| * IN aval - ascii string. |
| * IN/OUT ival - 16 bit pointer. |
| * RET 0 if no error, 1 otherwise. |
| */ |
| extern int parse_uint16(char *aval, uint16_t *ival) |
| { |
| /* |
| * First, convert the ascii value it to a |
| * long long int. If the result is greater then |
| * or equal to 0 and less than (uint16_t) NO_VAL |
| * set the value and return. Otherwise |
| * return an error. |
| */ |
| uint16_t max16uint = (uint16_t) NO_VAL; |
| long long tval; |
| char *p; |
| |
| /* |
| * Return error for invalid value. |
| */ |
| tval = strtoll(aval, &p, 10); |
| if (p[0] || (tval == LLONG_MIN) || (tval == LLONG_MAX) || |
| (tval < 0) || (tval >= max16uint)) |
| return 1; |
| |
| *ival = (uint16_t) tval; |
| |
| return 0; |
| } |
| |
| /* print_db_notok() - Print an error message about slurmdbd |
| * is unreachable or wrong cluster name. |
| * IN cname - char * cluster name |
| * IN isenv - bool cluster name from env or from command line option. |
| */ |
| void print_db_notok(const char *cname, bool isenv) |
| { |
| if (errno) |
| error("There is a problem talking to the database: %m. " |
| "Only local cluster communication is available, remove " |
| "%s or contact your admin to resolve the problem.", |
| isenv ? "SLURM_CLUSTERS from your environment" : |
| "--cluster from your command line"); |
| else if (!strcasecmp("all", cname)) |
| error("No clusters can be reached now. " |
| "Contact your admin to resolve the problem."); |
| else |
| error("'%s' can't be reached now, " |
| "or it is an invalid entry for %s. " |
| "Use 'sacctmgr list clusters' to see available clusters.", |
| cname, isenv ? "SLURM_CLUSTERS" : "--cluster"); |
| } |
| |
| static bool _check_is_pow_of_2(int32_t n) { |
| /* Bitwise ANDing a power of 2 number like 16 with its |
| * negative (-16) gives itself back. Only integers which are power of |
| * 2 behave like that. |
| */ |
| return ((n!=0) && (n&(-n))==n); |
| } |
| |
| extern void bg_figure_nodes_tasks(int *min_nodes, int *max_nodes, |
| int *ntasks_per_node, bool *ntasks_set, |
| int *ntasks, bool nodes_set, |
| bool nodes_set_opt, bool overcommit, |
| bool set_tasks) |
| { |
| /* BGQ has certain restrictions to run a job. So lets validate |
| * and correct what the user asked for if possible. |
| */ |
| int32_t node_cnt; |
| bool figured = false; |
| uint32_t cluster_flags = slurmdb_setup_cluster_flags(); |
| |
| if (!(cluster_flags & CLUSTER_FLAG_BGQ)) |
| fatal("bg_figure_nodes_tasks is only valid on a BGQ system."); |
| |
| if (!(*ntasks_set) |
| && (*ntasks_per_node) && (*ntasks_per_node != NO_VAL)) { |
| if ((*ntasks_per_node != 1) |
| && (*ntasks_per_node != 2) |
| && (*ntasks_per_node != 4) |
| && (*ntasks_per_node != 8) |
| && (*ntasks_per_node != 16) |
| && (*ntasks_per_node != 32) |
| && (*ntasks_per_node != 64)) |
| fatal("You requested --ntasks-per-node=%d, " |
| "which is not valid, it must be a power of 2. " |
| "Please validate your request and try again.", |
| *ntasks_per_node); |
| else if (!overcommit |
| && ((*ntasks_per_node == 32) |
| || (*ntasks_per_node == 64))) |
| fatal("You requested --ntasks-per-node=%d, " |
| "which is not valid without --overcommit.", |
| *ntasks_per_node); |
| } |
| |
| if (*max_nodes) |
| node_cnt = *max_nodes; |
| else |
| node_cnt = *min_nodes; |
| |
| if (*ntasks_set) { |
| int32_t ntpn; |
| |
| if (nodes_set) { |
| if (node_cnt > *ntasks) { |
| if (nodes_set_opt) |
| info("You asked for %d nodes, " |
| "but only %d tasks, resetting " |
| "node count to %u.", |
| node_cnt, *ntasks, *ntasks); |
| *max_nodes = *min_nodes = node_cnt |
| = *ntasks; |
| } |
| } |
| /* If nodes not set do not try to set min/max nodes |
| yet since that would result in an incorrect |
| allocation. For a step allocation it is figured |
| out later in srun_job.c _job_create_structure(). |
| */ |
| |
| if ((!*ntasks_per_node || (*ntasks_per_node == NO_VAL))) { |
| /* We always want the next larger number if |
| there is a fraction so we try to stay in |
| the allocation requested. |
| */ |
| *ntasks_per_node = |
| (*ntasks + node_cnt - 1) / node_cnt; |
| figured = true; |
| } |
| |
| /* On a Q we need ntasks_per_node to be a multiple of 2 */ |
| ntpn = *ntasks_per_node; |
| while (!_check_is_pow_of_2(ntpn)) |
| ntpn++; |
| if (!figured && ntpn > 64) |
| fatal("You requested --ntasks-per-node=%d, " |
| "which is not a power of 2. But the next " |
| "largest power of 2 (%d) is greater than the " |
| "largest valid power which is 64. Please " |
| "validate your request and try again.", |
| *ntasks_per_node, ntpn); |
| if (!figured && (ntpn != *ntasks_per_node)) { |
| info("You requested --ntasks-per-node=%d, which is not " |
| "a power of 2. Setting --ntasks-per-node=%d " |
| "for you.", *ntasks_per_node, ntpn); |
| figured = true; |
| } |
| *ntasks_per_node = ntpn; |
| |
| /* We always want the next larger number if |
| there is a fraction so we try to stay in |
| the allocation requested. |
| */ |
| ntpn = ((*ntasks) + (*ntasks_per_node) - 1) |
| / (*ntasks_per_node); |
| /* Make sure we are requesting the correct number of nodes. */ |
| if (node_cnt < ntpn) { |
| *max_nodes = *min_nodes = ntpn; |
| if (nodes_set && !figured) { |
| fatal("You requested -N %d and -n %d " |
| "with --ntasks-per-node=%d. " |
| "This isn't a valid request.", |
| node_cnt, *ntasks, |
| *ntasks_per_node); |
| } |
| node_cnt = *max_nodes; |
| } |
| |
| /* Do this again to make sure we have a legitimate |
| ratio. */ |
| ntpn = *ntasks_per_node; |
| if ((node_cnt * ntpn) < *ntasks) { |
| ntpn++; |
| while (!_check_is_pow_of_2(ntpn)) |
| ntpn++; |
| if (!figured && (ntpn != *ntasks_per_node)) |
| info("You requested --ntasks-per-node=%d, " |
| "which cannot spread across %d nodes " |
| "correctly. Setting --ntasks-per-node=%d " |
| "for you.", |
| *ntasks_per_node, node_cnt, ntpn); |
| *ntasks_per_node = ntpn; |
| } else if (!overcommit && ((node_cnt * ntpn) > *ntasks)) { |
| ntpn = (*ntasks + node_cnt - 1) / node_cnt; |
| while (!_check_is_pow_of_2(ntpn)) |
| ntpn++; |
| if (!figured && (ntpn != *ntasks_per_node)) |
| info("You requested --ntasks-per-node=%d, " |
| "which is more than the tasks you " |
| "requested. Setting --ntasks-per-node=%d " |
| "for you.", |
| *ntasks_per_node, ntpn); |
| *ntasks_per_node = ntpn; |
| } |
| } else if (set_tasks) { |
| if (*ntasks_per_node && (*ntasks_per_node != NO_VAL)) |
| *ntasks = node_cnt * (*ntasks_per_node); |
| else { |
| *ntasks = node_cnt; |
| *ntasks_per_node = 1; |
| } |
| *ntasks_set = true; |
| } |
| |
| /* If set_tasks isn't set we are coming in for the |
| allocation so verify it will work first before we |
| go any futher. |
| */ |
| if (nodes_set && (*ntasks_per_node && (*ntasks_per_node != NO_VAL))) { |
| if ((*ntasks_per_node != 1) |
| && (*ntasks_per_node != 2) |
| && (*ntasks_per_node != 4) |
| && (*ntasks_per_node != 8) |
| && (*ntasks_per_node != 16) |
| && (*ntasks_per_node != 32) |
| && (*ntasks_per_node != 64)) { |
| if (*ntasks_set) |
| fatal("You requested -N %d and -n %d " |
| "which gives --ntasks-per-node=%d. " |
| "This isn't a valid request.", |
| node_cnt, *ntasks, |
| *ntasks_per_node); |
| else |
| fatal("You requested -N %d and " |
| "--ntasks-per-node=%d. " |
| "This isn't a valid request.", |
| node_cnt, *ntasks_per_node); |
| } else if (!overcommit |
| && ((*ntasks_per_node == 32) |
| || (*ntasks_per_node == 64))) { |
| if (*ntasks_set) |
| fatal("You requested -N %d and -n %d " |
| "which gives --ntasks-per-node=%d. " |
| "This isn't a valid request " |
| "without --overcommit.", |
| node_cnt, *ntasks, |
| *ntasks_per_node); |
| else |
| fatal("You requested -N %d and " |
| "--ntasks-per-node=%d. " |
| "This isn't a valid request " |
| "without --overcommit.", |
| node_cnt, *ntasks_per_node); |
| } |
| } |
| |
| /* If we aren't setting tasks reset ntasks_per_node as well. */ |
| if (!set_tasks && figured) |
| *ntasks_per_node = 0; |
| |
| } |