| /*****************************************************************************\ |
| * scontrol.c - administration tool for slurm. |
| * provides interface to read, write, update, and configurations. |
| ***************************************************************************** |
| * Copyright (C) 2002-2007 The Regents of the University of California. |
| * Copyright (C) 2008-2010 Lawrence Livermore National Security. |
| * Portions Copyright (C) 2008 Vijay Ramasubramanian. |
| * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). |
| * Written by Morris Jette <jette1@llnl.gov> |
| * CODE-OCEC-09-009. All rights reserved. |
| * |
| * This file is part of SLURM, a resource management program. |
| * For details, see <http://www.schedmd.com/slurmdocs/>. |
| * Please also read the included file: DISCLAIMER. |
| * |
| * SLURM is free software; you can redistribute it and/or modify it under |
| * the terms of the GNU General Public License as published by the Free |
| * Software Foundation; either version 2 of the License, or (at your option) |
| * any later version. |
| * |
| * In addition, as a special exception, the copyright holders give permission |
| * to link the code of portions of this program with the OpenSSL library under |
| * certain conditions as described in each individual source file, and |
| * distribute linked combinations including the two. You must obey the GNU |
| * General Public License in all respects for all of the code used other than |
| * OpenSSL. If you modify file(s) with this exception, you may extend this |
| * exception to your version of the file(s), but you are not obligated to do |
| * so. If you do not wish to do so, delete this exception statement from your |
| * version. If you delete this exception statement from all source files in |
| * the program, then also delete it here. |
| * |
| * SLURM is distributed in the hope that it will be useful, but WITHOUT ANY |
| * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS |
| * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more |
| * details. |
| * |
| * You should have received a copy of the GNU General Public License along |
| * with SLURM; if not, write to the Free Software Foundation, Inc., |
| * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. |
| \*****************************************************************************/ |
| |
| #include "scontrol.h" |
| #include "src/plugins/select/bluegene/bg_enums.h" |
| #include "src/common/proc_args.h" |
| |
| #define OPT_LONG_HIDE 0x102 |
| |
| char *command_name; |
| List clusters = NULL; |
| int all_flag; /* display even hidden partitions */ |
| int detail_flag; /* display additional details */ |
| int exit_code; /* scontrol's exit code, =1 on any error at any time */ |
| int exit_flag; /* program to terminate if =1 */ |
| int input_words; /* number of words of input permitted */ |
| int one_liner; /* one record per line if =1 */ |
| int quiet_flag; /* quiet=1, verbose=-1, normal=0 */ |
| int verbosity; /* count of "-v" options */ |
| uint32_t cluster_flags; /* what type of cluster are we talking to */ |
| |
| block_info_msg_t *old_block_info_ptr = NULL; |
| front_end_info_msg_t *old_front_end_info_ptr = NULL; |
| job_info_msg_t *old_job_info_ptr = NULL; |
| node_info_msg_t *old_node_info_ptr = NULL; |
| partition_info_msg_t *old_part_info_ptr = NULL; |
| reserve_info_msg_t *old_res_info_ptr = NULL; |
| slurm_ctl_conf_info_msg_t *old_slurm_ctl_conf_ptr = NULL; |
| |
| static void _create_it (int argc, char *argv[]); |
| static void _delete_it (int argc, char *argv[]); |
| static void _show_it (int argc, char *argv[]); |
| static int _get_command (int *argc, char *argv[]); |
| static void _ping_slurmctld(char *control_machine, |
| char *backup_controller); |
| static void _print_config (char *config_param); |
| static void _print_daemons (void); |
| static void _print_aliases (char* node_hostname); |
| static void _print_ping (void); |
| static void _print_slurmd(char *hostlist); |
| static void _print_version( void ); |
| static int _process_command (int argc, char *argv[]); |
| static void _update_it (int argc, char *argv[]); |
| static int _update_bluegene_block (int argc, char *argv[]); |
| static int _update_bluegene_subbp (int argc, char *argv[]); |
| static int _update_slurmctld_debug(char *val); |
| static void _usage (); |
| |
| int |
| main (int argc, char *argv[]) |
| { |
| int error_code = SLURM_SUCCESS, i, opt_char, input_field_count; |
| char **input_fields, *env_val; |
| log_options_t opts = LOG_OPTS_STDERR_ONLY ; |
| |
| int option_index; |
| static struct option long_options[] = { |
| {"all", 0, 0, 'a'}, |
| {"cluster", 1, 0, 'M'}, |
| {"clusters", 1, 0, 'M'}, |
| {"details", 0, 0, 'd'}, |
| {"help", 0, 0, 'h'}, |
| {"hide", 0, 0, OPT_LONG_HIDE}, |
| {"oneliner", 0, 0, 'o'}, |
| {"quiet", 0, 0, 'Q'}, |
| {"usage", 0, 0, 'h'}, |
| {"verbose", 0, 0, 'v'}, |
| {"version", 0, 0, 'V'}, |
| {NULL, 0, 0, 0} |
| }; |
| |
| command_name = argv[0]; |
| all_flag = 0; |
| detail_flag = 0; |
| exit_code = 0; |
| exit_flag = 0; |
| input_field_count = 0; |
| quiet_flag = 0; |
| verbosity = 0; |
| log_init("scontrol", opts, SYSLOG_FACILITY_DAEMON, NULL); |
| |
| if (getenv ("SCONTROL_ALL")) |
| all_flag= 1; |
| if ((env_val = getenv("SLURM_CLUSTERS"))) { |
| if (!(clusters = slurmdb_get_info_cluster(env_val))) { |
| error("'%s' can't be reached now, " |
| "or it is an invalid entry for " |
| "SLURM_CLUSTERS. Use 'sacctmgr --list " |
| "cluster' to see available clusters.", |
| env_val); |
| exit(1); |
| } |
| working_cluster_rec = list_peek(clusters); |
| } |
| |
| while((opt_char = getopt_long(argc, argv, "adhM:oQvV", |
| long_options, &option_index)) != -1) { |
| switch (opt_char) { |
| case (int)'?': |
| fprintf(stderr, "Try \"scontrol --help\" for " |
| "more information\n"); |
| exit(1); |
| break; |
| case (int)'a': |
| all_flag = 1; |
| break; |
| case (int)'d': |
| detail_flag++; |
| break; |
| case (int)'h': |
| _usage (); |
| exit(exit_code); |
| break; |
| case OPT_LONG_HIDE: |
| all_flag = 0; |
| detail_flag = 0; |
| break; |
| case (int)'M': |
| if (clusters) { |
| list_destroy(clusters); |
| clusters = NULL; |
| working_cluster_rec = NULL; |
| } |
| if (!(clusters = slurmdb_get_info_cluster(optarg))) { |
| error("'%s' can't be reached now, " |
| "or it is an invalid entry for " |
| "--cluster. Use 'sacctmgr --list " |
| "cluster' to see available clusters.", |
| optarg); |
| exit(1); |
| } |
| working_cluster_rec = list_peek(clusters); |
| break; |
| case (int)'o': |
| one_liner = 1; |
| break; |
| case (int)'Q': |
| quiet_flag = 1; |
| break; |
| case (int)'v': |
| quiet_flag = -1; |
| verbosity++; |
| break; |
| case (int)'V': |
| _print_version(); |
| exit(exit_code); |
| break; |
| default: |
| exit_code = 1; |
| fprintf(stderr, "getopt error, returned %c\n", |
| opt_char); |
| exit(exit_code); |
| } |
| } |
| |
| if (clusters && (list_count(clusters) > 1)) |
| fatal("Only one cluster can be used at a time with scontrol"); |
| cluster_flags = slurmdb_setup_cluster_flags(); |
| |
| if (verbosity) { |
| opts.stderr_level += verbosity; |
| log_alter(opts, SYSLOG_FACILITY_USER, NULL); |
| } |
| |
| if (argc > MAX_INPUT_FIELDS) /* bogus input, but continue anyway */ |
| input_words = argc; |
| else |
| input_words = 128; |
| input_fields = (char **) xmalloc (sizeof (char *) * input_words); |
| if (optind < argc) { |
| for (i = optind; i < argc; i++) { |
| input_fields[input_field_count++] = argv[i]; |
| } |
| } |
| |
| if (input_field_count) |
| exit_flag = 1; |
| else |
| error_code = _get_command (&input_field_count, input_fields); |
| |
| while (error_code == SLURM_SUCCESS) { |
| error_code = _process_command (input_field_count, |
| input_fields); |
| if (error_code || exit_flag) |
| break; |
| error_code = _get_command (&input_field_count, input_fields); |
| } |
| if (clusters) |
| list_destroy(clusters); |
| exit(exit_code); |
| } |
| |
| static void _print_version(void) |
| { |
| print_slurm_version(); |
| if (quiet_flag == -1) { |
| long version = slurm_api_version(); |
| printf("slurm_api_version: %ld, %ld.%ld.%ld\n", version, |
| SLURM_VERSION_MAJOR(version), |
| SLURM_VERSION_MINOR(version), |
| SLURM_VERSION_MICRO(version)); |
| } |
| } |
| |
| |
| #if !HAVE_READLINE |
| /* |
| * Alternative to readline if readline is not available |
| */ |
| static char *_getline(const char *prompt) |
| { |
| char buf[4096]; |
| char *line; |
| int len; |
| |
| printf("%s", prompt); |
| |
| /* we only set this here to avoid a warning. We throw it away |
| later. */ |
| line = fgets(buf, 4096, stdin); |
| |
| len = strlen(buf); |
| if ((len > 0) && (buf[len-1] == '\n')) |
| buf[len-1] = '\0'; |
| else |
| len++; |
| line = malloc (len * sizeof(char)); |
| return strncpy(line, buf, len); |
| } |
| #endif |
| |
| /* |
| * _get_command - get a command from the user |
| * OUT argc - location to store count of arguments |
| * OUT argv - location to store the argument list |
| */ |
| static int |
| _get_command (int *argc, char **argv) |
| { |
| char *in_line; |
| static char *last_in_line = NULL; |
| int i, in_line_size; |
| static int last_in_line_size = 0; |
| |
| *argc = 0; |
| |
| #if HAVE_READLINE |
| in_line = readline ("scontrol: "); |
| #else |
| in_line = _getline("scontrol: "); |
| #endif |
| if (in_line == NULL) |
| return 0; |
| else if (strcmp (in_line, "!!") == 0) { |
| free (in_line); |
| in_line = last_in_line; |
| in_line_size = last_in_line_size; |
| } else { |
| if (last_in_line) |
| free (last_in_line); |
| last_in_line = in_line; |
| last_in_line_size = in_line_size = strlen (in_line); |
| } |
| |
| #if HAVE_READLINE |
| add_history(in_line); |
| #endif |
| |
| /* break in_line into tokens */ |
| for (i = 0; i < in_line_size; i++) { |
| bool double_quote = false, single_quote = false; |
| if (in_line[i] == '\0') |
| break; |
| if (isspace ((int) in_line[i])) |
| continue; |
| if (((*argc) + 1) > MAX_INPUT_FIELDS) { /* bogus input line */ |
| exit_code = 1; |
| fprintf (stderr, |
| "%s: can not process over %d words\n", |
| command_name, input_words); |
| return E2BIG; |
| } |
| argv[(*argc)++] = &in_line[i]; |
| for (i++; i < in_line_size; i++) { |
| if (in_line[i] == '\042') { |
| double_quote = !double_quote; |
| continue; |
| } |
| if (in_line[i] == '\047') { |
| single_quote = !single_quote; |
| continue; |
| } |
| if (in_line[i] == '\0') |
| break; |
| if (double_quote || single_quote) |
| continue; |
| if (isspace ((int) in_line[i])) { |
| in_line[i] = '\0'; |
| break; |
| } |
| } |
| } |
| return 0; |
| } |
| |
| /* |
| * _print_config - print the specified configuration parameter and value |
| * IN config_param - NULL to print all parameters and values |
| */ |
| static void |
| _print_config (char *config_param) |
| { |
| int error_code; |
| slurm_ctl_conf_info_msg_t *slurm_ctl_conf_ptr = NULL; |
| |
| if (old_slurm_ctl_conf_ptr) { |
| error_code = slurm_load_ctl_conf ( |
| old_slurm_ctl_conf_ptr->last_update, |
| &slurm_ctl_conf_ptr); |
| if (error_code == SLURM_SUCCESS) |
| slurm_free_ctl_conf(old_slurm_ctl_conf_ptr); |
| else if (slurm_get_errno () == SLURM_NO_CHANGE_IN_DATA) { |
| slurm_ctl_conf_ptr = old_slurm_ctl_conf_ptr; |
| error_code = SLURM_SUCCESS; |
| if (quiet_flag == -1) { |
| printf ("slurm_load_ctl_conf no change " |
| "in data\n"); |
| } |
| } |
| } |
| else |
| error_code = slurm_load_ctl_conf ((time_t) NULL, |
| &slurm_ctl_conf_ptr); |
| |
| if (error_code) { |
| exit_code = 1; |
| if (quiet_flag != 1) |
| slurm_perror ("slurm_load_ctl_conf error"); |
| } |
| else |
| old_slurm_ctl_conf_ptr = slurm_ctl_conf_ptr; |
| |
| |
| if (error_code == SLURM_SUCCESS) { |
| slurm_print_ctl_conf (stdout, slurm_ctl_conf_ptr) ; |
| fprintf(stdout, "\n"); |
| } |
| if (slurm_ctl_conf_ptr) |
| _ping_slurmctld (slurm_ctl_conf_ptr->control_machine, |
| slurm_ctl_conf_ptr->backup_controller); |
| } |
| |
| /* Print slurmd status on localhost. |
| * Parse hostlist in the future */ |
| static void _print_slurmd(char *hostlist) |
| { |
| slurmd_status_t *slurmd_status; |
| |
| if (slurm_load_slurmd_status(&slurmd_status)) { |
| exit_code = 1; |
| if (quiet_flag != 1) |
| slurm_perror("slurm_load_slurmd_status"); |
| } else { |
| slurm_print_slurmd_status(stdout, slurmd_status); |
| slurm_free_slurmd_status(slurmd_status); |
| } |
| } |
| |
| /* Print state of controllers only */ |
| static void |
| _print_ping (void) |
| { |
| slurm_ctl_conf_info_msg_t *conf; |
| char *primary, *secondary; |
| |
| slurm_conf_init(NULL); |
| |
| conf = slurm_conf_lock(); |
| primary = xstrdup(conf->control_machine); |
| secondary = xstrdup(conf->backup_controller); |
| slurm_conf_unlock(); |
| |
| _ping_slurmctld (primary, secondary); |
| |
| xfree(primary); |
| xfree(secondary); |
| } |
| |
| /* Report if slurmctld daemons are responding */ |
| static void |
| _ping_slurmctld(char *control_machine, char *backup_controller) |
| { |
| static char *state[2] = { "UP", "DOWN" }; |
| int primary = 1, secondary = 1; |
| int down_msg = 0; |
| |
| if (slurm_ping(1) == SLURM_SUCCESS) |
| primary = 0; |
| if (slurm_ping(2) == SLURM_SUCCESS) |
| secondary = 0; |
| fprintf(stdout, "Slurmctld(primary/backup) "); |
| if (control_machine || backup_controller) { |
| fprintf(stdout, "at "); |
| if (control_machine) { |
| fprintf(stdout, "%s/", control_machine); |
| if (primary) |
| down_msg = 1; |
| } else |
| fprintf(stdout, "(NULL)/"); |
| if (backup_controller) { |
| fprintf(stdout, "%s ", backup_controller); |
| if (secondary) |
| down_msg = 1; |
| } else |
| fprintf(stdout, "(NULL) "); |
| } |
| fprintf(stdout, "are %s/%s\n", |
| state[primary], state[secondary]); |
| |
| if (down_msg && (getuid() == 0)) { |
| fprintf(stdout, "*****************************************\n"); |
| fprintf(stdout, "** RESTORE SLURMCTLD DAEMON TO SERVICE **\n"); |
| fprintf(stdout, "*****************************************\n"); |
| } |
| } |
| |
| /* |
| * _print_daemons - report what daemons should be running on this node |
| */ |
| static void |
| _print_daemons (void) |
| { |
| slurm_ctl_conf_info_msg_t *conf; |
| char me[MAX_SLURM_NAME], *b, *c, *n, *token, *save_ptr = NULL; |
| int actld = 0, ctld = 0, d = 0; |
| char daemon_list[] = "slurmctld slurmd"; |
| |
| slurm_conf_init(NULL); |
| conf = slurm_conf_lock(); |
| |
| gethostname_short(me, MAX_SLURM_NAME); |
| if ((b = conf->backup_controller)) { |
| if ((strcmp(b, me) == 0) || |
| (strcasecmp(b, "localhost") == 0)) |
| ctld = 1; |
| } |
| if (conf->control_machine) { |
| actld = 1; |
| c = xstrdup(conf->control_machine); |
| token = strtok_r(c, ",", &save_ptr); |
| while (token) { |
| if ((strcmp(token, me) == 0) || |
| (strcasecmp(token, "localhost") == 0)) { |
| ctld = 1; |
| break; |
| } |
| token = strtok_r(NULL, ",", &save_ptr); |
| } |
| xfree(c); |
| } |
| slurm_conf_unlock(); |
| |
| if ((n = slurm_conf_get_nodename(me))) { |
| d = 1; |
| xfree(n); |
| } else if ((n = slurm_conf_get_aliased_nodename())) { |
| d = 1; |
| xfree(n); |
| } else if ((n = slurm_conf_get_nodename("localhost"))) { |
| d = 1; |
| xfree(n); |
| } |
| |
| strcpy(daemon_list, ""); |
| if (actld && ctld) |
| strcat(daemon_list, "slurmctld "); |
| if (actld && d) |
| strcat(daemon_list, "slurmd"); |
| fprintf (stdout, "%s\n", daemon_list) ; |
| } |
| |
| /* |
| * _print_aliases - report which aliases should be running on this node |
| */ |
| static void |
| _print_aliases (char* node_hostname) |
| { |
| char me[MAX_SLURM_NAME], *n = NULL, *a = NULL; |
| char *s; |
| |
| slurm_conf_init(NULL); |
| if (!node_hostname) { |
| gethostname_short(me, MAX_SLURM_NAME); |
| s = me; |
| } else |
| s = node_hostname; |
| |
| if (!(n = slurm_conf_get_aliases(s)) && (s == me)) { |
| |
| if (!(a = slurm_conf_get_aliased_nodename())) |
| a = slurm_conf_get_nodename("localhost"); |
| |
| if (a) { |
| n = slurm_conf_get_aliases(a); |
| xfree(a); |
| } |
| } |
| |
| if (n) { |
| fprintf(stdout, "%s\n", n); |
| xfree(n); |
| } |
| |
| } |
| |
| /* |
| * _reboot_nodes - issue RPC to have computing nodes reboot when idle |
| * RET 0 or a slurm error code |
| */ |
| static int _reboot_nodes(char *node_list) |
| { |
| slurm_ctl_conf_t *conf; |
| int rc; |
| slurm_msg_t msg; |
| reboot_msg_t req; |
| |
| conf = slurm_conf_lock(); |
| if (conf->reboot_program == NULL) { |
| error("RebootProgram isn't defined"); |
| slurm_conf_unlock(); |
| slurm_seterrno(SLURM_ERROR); |
| return SLURM_ERROR; |
| } |
| slurm_conf_unlock(); |
| |
| slurm_msg_t_init(&msg); |
| |
| req.node_list = node_list; |
| msg.msg_type = REQUEST_REBOOT_NODES; |
| msg.data = &req; |
| |
| if (slurm_send_recv_controller_rc_msg(&msg, &rc) < 0) |
| return SLURM_ERROR; |
| |
| if (rc) |
| slurm_seterrno_ret(rc); |
| |
| return rc; |
| } |
| |
| /* |
| * _process_command - process the user's command |
| * IN argc - count of arguments |
| * IN argv - the arguments |
| * RET 0 or errno (only for errors fatal to scontrol) |
| */ |
| static int |
| _process_command (int argc, char *argv[]) |
| { |
| int error_code = 0; |
| char *tag = argv[0]; |
| int tag_len = 0; |
| |
| if (argc < 1) { |
| exit_code = 1; |
| if (quiet_flag == -1) |
| fprintf(stderr, "no input"); |
| return 0; |
| } else if(tag) |
| tag_len = strlen(tag); |
| else { |
| if (quiet_flag == -1) |
| fprintf(stderr, "input problem"); |
| return 0; |
| } |
| |
| if (strncasecmp (tag, "abort", MAX(tag_len, 5)) == 0) { |
| /* require full command name */ |
| if (argc > 2) { |
| exit_code = 1; |
| fprintf (stderr, |
| "too many arguments for keyword:%s\n", |
| tag); |
| } |
| error_code = slurm_shutdown (1); |
| if (error_code) { |
| exit_code = 1; |
| if (quiet_flag != 1) |
| slurm_perror ("slurm_shutdown error"); |
| } |
| } |
| else if (strncasecmp (tag, "all", MAX(tag_len, 2)) == 0) |
| all_flag = 1; |
| else if (strncasecmp (tag, "completing", MAX(tag_len, 2)) == 0) { |
| if (argc > 1) { |
| exit_code = 1; |
| fprintf (stderr, |
| "too many arguments for keyword:%s\n", |
| tag); |
| } |
| scontrol_print_completing(); |
| } |
| else if (strncasecmp (tag, "cluster", MAX(tag_len, 2)) == 0) { |
| if (clusters) { |
| list_destroy(clusters); |
| clusters = NULL; |
| working_cluster_rec = NULL; |
| } |
| if (argc >= 2) { |
| if (!(clusters = slurmdb_get_info_cluster(argv[1]))) { |
| error("'%s' can't be reached now, " |
| "or it is an invalid entry for " |
| "--cluster. Use 'sacctmgr --list " |
| "cluster' to see available clusters.", |
| optarg); |
| exit(1); |
| } |
| working_cluster_rec = list_peek(clusters); |
| if (list_count(clusters) > 1) { |
| fatal("Only one cluster can be used at a time " |
| "with scontrol"); |
| } |
| } |
| cluster_flags = slurmdb_setup_cluster_flags(); |
| slurm_free_block_info_msg(old_block_info_ptr); |
| old_block_info_ptr = NULL; |
| slurm_free_front_end_info_msg(old_front_end_info_ptr); |
| old_front_end_info_ptr = NULL; |
| slurm_free_job_info_msg(old_job_info_ptr); |
| old_job_info_ptr = NULL; |
| slurm_free_node_info_msg(old_node_info_ptr); |
| old_node_info_ptr = NULL; |
| slurm_free_partition_info_msg(old_part_info_ptr); |
| old_part_info_ptr = NULL; |
| slurm_free_reservation_info_msg(old_res_info_ptr); |
| old_res_info_ptr = NULL; |
| slurm_free_ctl_conf(old_slurm_ctl_conf_ptr); |
| old_slurm_ctl_conf_ptr = NULL; |
| /* if(old_block_info_ptr) */ |
| /* old_block_info_ptr->last_update = 0; */ |
| /* if(old_job_info_ptr) */ |
| /* old_job_info_ptr->last_update = 0; */ |
| /* if(old_node_info_ptr) */ |
| /* old_node_info_ptr->last_update = 0; */ |
| /* if(old_part_info_ptr) */ |
| /* old_part_info_ptr->last_update = 0; */ |
| /* if(old_res_info_ptr) */ |
| /* old_res_info_ptr->last_update = 0; */ |
| /* if(old_slurm_ctl_conf_ptr) */ |
| /* old_slurm_ctl_conf_ptr->last_update = 0; */ |
| } |
| else if (strncasecmp (tag, "create", MAX(tag_len, 2)) == 0) { |
| if (argc < 2) { |
| exit_code = 1; |
| fprintf (stderr, "too few arguments for %s keyword\n", |
| tag); |
| return 0; |
| } |
| _create_it ((argc - 1), &argv[1]); |
| } |
| else if (strncasecmp (tag, "details", MAX(tag_len, 1)) == 0) { |
| if (argc > 1) { |
| exit_code = 1; |
| fprintf (stderr, |
| "too many arguments for keyword:%s\n", |
| tag); |
| return 0; |
| } |
| detail_flag = 1; |
| } |
| else if (strncasecmp (tag, "script", MAX(tag_len, 3)) == 0) { |
| if (argc > 1) { |
| exit_code = 1; |
| fprintf (stderr, |
| "too many arguments for keyword:%s\n", |
| tag); |
| return 0; |
| } |
| detail_flag = 2; |
| } |
| else if (strncasecmp (tag, "exit", MAX(tag_len, 1)) == 0) { |
| if (argc > 1) { |
| exit_code = 1; |
| fprintf (stderr, |
| "too many arguments for keyword:%s\n", |
| tag); |
| } |
| exit_flag = 1; |
| } |
| else if (strncasecmp (tag, "help", MAX(tag_len, 2)) == 0) { |
| if (argc > 1) { |
| exit_code = 1; |
| fprintf (stderr, |
| "too many arguments for keyword:%s\n", |
| tag); |
| } |
| _usage (); |
| } |
| else if (strncasecmp (tag, "hide", MAX(tag_len, 2)) == 0) { |
| all_flag = 0; |
| detail_flag = 0; |
| } |
| else if (strncasecmp (tag, "oneliner", MAX(tag_len, 1)) == 0) { |
| if (argc > 1) { |
| exit_code = 1; |
| fprintf (stderr, |
| "too many arguments for keyword:%s\n", |
| tag); |
| } |
| one_liner = 1; |
| } |
| else if (strncasecmp (tag, "pidinfo", MAX(tag_len, 3)) == 0) { |
| if (argc > 2) { |
| exit_code = 1; |
| fprintf (stderr, |
| "too many arguments for keyword:%s\n", |
| tag); |
| } else if (argc < 2) { |
| exit_code = 1; |
| fprintf (stderr, |
| "missing argument for keyword:%s\n", |
| tag); |
| } else |
| scontrol_pid_info ((pid_t) atol (argv[1]) ); |
| } |
| else if (strncasecmp (tag, "ping", MAX(tag_len, 3)) == 0) { |
| if (argc > 1) { |
| exit_code = 1; |
| fprintf (stderr, |
| "too many arguments for keyword:%s\n", |
| tag); |
| } |
| _print_ping (); |
| } |
| else if ((strncasecmp (tag, "\\q", 2) == 0) || |
| (strncasecmp (tag, "quiet", MAX(tag_len, 4)) == 0)) { |
| if (argc > 1) { |
| exit_code = 1; |
| fprintf (stderr, "too many arguments for keyword:%s\n", |
| tag); |
| } |
| quiet_flag = 1; |
| } |
| else if (strncasecmp (tag, "quit", MAX(tag_len, 4)) == 0) { |
| if (argc > 1) { |
| exit_code = 1; |
| fprintf (stderr, |
| "too many arguments for keyword:%s\n", |
| tag); |
| } |
| exit_flag = 1; |
| } |
| else if (strncasecmp (tag, "reboot_nodes", MAX(tag_len, 3)) == 0) { |
| if (argc > 2) { |
| exit_code = 1; |
| fprintf (stderr, |
| "too many arguments for keyword:%s\n", |
| tag); |
| } else if (argc < 2) { |
| error_code = _reboot_nodes("ALL"); |
| } else |
| error_code = _reboot_nodes(argv[1]); |
| if (error_code) { |
| exit_code = 1; |
| if (quiet_flag != 1) |
| slurm_perror ("scontrol_reboot_nodes error"); |
| } |
| } |
| else if (strncasecmp (tag, "reconfigure", MAX(tag_len, 3)) == 0) { |
| if (argc > 2) { |
| exit_code = 1; |
| fprintf (stderr, "too many arguments for keyword:%s\n", |
| tag); |
| } |
| error_code = slurm_reconfigure(); |
| if (error_code) { |
| exit_code = 1; |
| if (quiet_flag != 1) |
| slurm_perror ("slurm_reconfigure error"); |
| } |
| } |
| else if (strncasecmp (tag, "checkpoint", MAX(tag_len, 2)) == 0) { |
| if (argc > 5) { |
| exit_code = 1; |
| if (quiet_flag != 1) |
| fprintf(stderr, |
| "too many arguments for keyword:%s\n", |
| tag); |
| } |
| else if (argc < 3) { |
| exit_code = 1; |
| if (quiet_flag != 1) |
| fprintf(stderr, |
| "too few arguments for keyword:%s\n", |
| tag); |
| } |
| else { |
| error_code = scontrol_checkpoint(argv[1], argv[2], |
| argc - 3, &argv[3]); |
| if (error_code) { |
| exit_code = 1; |
| if (quiet_flag != 1) |
| slurm_perror( |
| "scontrol_checkpoint error"); |
| } |
| } |
| } |
| else if (strncasecmp (tag, "requeue", MAX(tag_len, 3)) == 0) { |
| if (argc > 2) { |
| exit_code = 1; |
| if (quiet_flag != 1) |
| fprintf(stderr, |
| "too many arguments for keyword:%s\n", |
| tag); |
| } else if (argc < 2) { |
| exit_code = 1; |
| if (quiet_flag != 1) |
| fprintf(stderr, |
| "too few arguments for keyword:%s\n", |
| tag); |
| } else { |
| error_code = scontrol_requeue(argv[1]); |
| if (error_code) { |
| exit_code = 1; |
| if (quiet_flag != 1) |
| slurm_perror ("slurm_requeue error"); |
| } |
| } |
| |
| } |
| else if ((strncasecmp (tag, "hold", 4) == 0) || |
| (strncasecmp (tag, "holdu", 5) == 0) || |
| (strncasecmp (tag, "uhold", 5) == 0) || |
| (strncasecmp (tag, "release", MAX(tag_len, 3)) == 0)) { |
| if (argc > 2) { |
| exit_code = 1; |
| if (quiet_flag != 1) |
| fprintf(stderr, |
| "too many arguments for keyword:%s\n", |
| tag); |
| } |
| else if (argc < 2) { |
| exit_code = 1; |
| if (quiet_flag != 1) |
| fprintf(stderr, |
| "too few arguments for keyword:%s\n", |
| tag); |
| } else { |
| error_code = scontrol_hold(argv[0], argv[1]); |
| if (error_code) { |
| exit_code = 1; |
| if (quiet_flag != 1) |
| slurm_perror ("slurm_update_job error"); |
| } |
| } |
| } |
| else if ((strncasecmp (tag, "suspend", MAX(tag_len, 2)) == 0) || |
| (strncasecmp (tag, "resume", MAX(tag_len, 3)) == 0)) { |
| if (argc > 2) { |
| exit_code = 1; |
| if (quiet_flag != 1) |
| fprintf(stderr, |
| "too many arguments for keyword:%s\n", |
| tag); |
| } |
| else if (argc < 2) { |
| exit_code = 1; |
| if (quiet_flag != 1) |
| fprintf(stderr, |
| "too few arguments for keyword:%s\n", |
| tag); |
| } else { |
| error_code = scontrol_suspend(argv[0], argv[1]); |
| if (error_code) { |
| exit_code = 1; |
| if (quiet_flag != 1) |
| slurm_perror ("slurm_suspend error"); |
| } |
| } |
| } |
| else if (strncasecmp (tag, "wait_job", MAX(tag_len, 2)) == 0) { |
| if (cluster_flags & CLUSTER_FLAG_CRAYXT) { |
| fprintf(stderr, |
| "wait_job is handled automatically on Cray.\n"); |
| } else if (argc > 2) { |
| exit_code = 1; |
| if (quiet_flag != 1) |
| fprintf(stderr, |
| "too many arguments for keyword:%s\n", |
| tag); |
| } else if (argc < 2) { |
| exit_code = 1; |
| if (quiet_flag != 1) |
| fprintf(stderr, |
| "too few arguments for keyword:%s\n", |
| tag); |
| } else { |
| error_code = scontrol_job_ready(argv[1]); |
| if (error_code) |
| exit_code = 1; |
| } |
| } |
| else if (strncasecmp (tag, "setdebugflags", MAX(tag_len, 9)) == 0) { |
| if (argc > 2) { |
| exit_code = 1; |
| if (quiet_flag != 1) |
| fprintf(stderr, |
| "too many arguments for keyword:%s\n", |
| tag); |
| } else if (argc < 2) { |
| exit_code = 1; |
| if (quiet_flag != 1) |
| fprintf(stderr, |
| "too few arguments for keyword:%s\n", |
| tag); |
| } else { |
| int i, mode = 0; |
| uint32_t debug_flags_plus = 0; |
| uint32_t debug_flags_minus = 0, flags; |
| |
| for (i = 1; i < argc; i++) { |
| if (argv[i][0] == '+') |
| mode = 1; |
| else if (argv[i][0] == '-') |
| mode = -1; |
| else { |
| mode = 0; |
| break; |
| } |
| flags = debug_str2flags(&argv[i][1]); |
| if (flags == NO_VAL) |
| break; |
| if (mode == 1) |
| debug_flags_plus |= flags; |
| else |
| debug_flags_minus |= flags; |
| } |
| if (i < argc) { |
| exit_code = 1; |
| if (quiet_flag != 1) { |
| fprintf(stderr, "invalid debug " |
| "flag: %s\n", argv[i]); |
| } |
| if ((quiet_flag != 1) && (mode = 0)) { |
| fprintf(stderr, "Usage: setdebugflags" |
| " [+|-]NAME\n"); |
| } |
| } else { |
| error_code = slurm_set_debugflags( |
| debug_flags_plus, debug_flags_minus); |
| if (error_code) { |
| exit_code = 1; |
| if (quiet_flag != 1) |
| slurm_perror( |
| "slurm_set_debug_flags" |
| " error"); |
| } |
| } |
| } |
| } |
| else if (strncasecmp (tag, "setdebug", MAX(tag_len, 2)) == 0) { |
| if (argc > 2) { |
| exit_code = 1; |
| if (quiet_flag != 1) |
| fprintf(stderr, |
| "too many arguments for keyword:%s\n", |
| tag); |
| } else if (argc < 2) { |
| exit_code = 1; |
| if (quiet_flag != 1) |
| fprintf(stderr, |
| "too few arguments for keyword:%s\n", |
| tag); |
| } else { |
| int level = -1; |
| char *endptr; |
| char *levels[] = { |
| "quiet", "fatal", "error", "info", "verbose", |
| "debug", "debug2", "debug3", "debug4", |
| "debug5", NULL}; |
| int index = 0; |
| while (levels[index]) { |
| if (strcasecmp(argv[1], levels[index]) == 0) { |
| level = index; |
| break; |
| } |
| index ++; |
| } |
| if (level == -1) { |
| /* effective levels: 0 - 9 */ |
| level = (int)strtoul (argv[1], &endptr, 10); |
| if (*endptr != '\0' || level > 9) { |
| level = -1; |
| exit_code = 1; |
| if (quiet_flag != 1) |
| fprintf(stderr, "invalid " |
| "debug level: %s\n", |
| argv[1]); |
| } |
| } |
| if (level != -1) { |
| error_code = slurm_set_debug_level( |
| level); |
| if (error_code) { |
| exit_code = 1; |
| if (quiet_flag != 1) |
| slurm_perror( |
| "slurm_set_debug_level " |
| "error"); |
| } |
| } |
| } |
| } |
| else if (strncasecmp (tag, "schedloglevel", MAX(tag_len, 3)) == 0) { |
| if (argc > 2) { |
| exit_code = 1; |
| if (quiet_flag != 1) |
| fprintf(stderr, |
| "too many arguments for keyword:%s\n", |
| tag); |
| } else if (argc < 2) { |
| exit_code = 1; |
| if (quiet_flag != 1) |
| fprintf(stderr, |
| "too few arguments for keyword:%s\n", |
| tag); |
| } else { |
| int level = -1; |
| char *endptr; |
| char *levels[] = { |
| "disable", "enable", NULL}; |
| int index = 0; |
| while (levels[index]) { |
| if (strcasecmp(argv[1], levels[index]) == 0) { |
| level = index; |
| break; |
| } |
| index ++; |
| } |
| if (level == -1) { |
| /* effective levels: 0 - 1 */ |
| level = (int)strtoul (argv[1], &endptr, 10); |
| if (*endptr != '\0' || level > 1) { |
| level = -1; |
| exit_code = 1; |
| if (quiet_flag != 1) |
| fprintf(stderr, "invalid schedlog " |
| "level: %s\n", argv[1]); |
| } |
| } |
| if (level != -1) { |
| error_code = slurm_set_schedlog_level( |
| level); |
| if (error_code) { |
| exit_code = 1; |
| if (quiet_flag != 1) |
| slurm_perror( |
| "slurm_set_schedlog_level" |
| " error"); |
| } |
| } |
| } |
| } |
| else if (strncasecmp (tag, "show", MAX(tag_len, 3)) == 0) { |
| _show_it (argc, argv); |
| } |
| else if (strncasecmp (tag, "takeover", MAX(tag_len, 8)) == 0) { |
| char *secondary = NULL; |
| slurm_ctl_conf_info_msg_t *slurm_ctl_conf_ptr = NULL; |
| |
| slurm_ctl_conf_ptr = slurm_conf_lock(); |
| secondary = xstrdup(slurm_ctl_conf_ptr->backup_controller); |
| slurm_conf_unlock(); |
| |
| if ( secondary && secondary[0] != '\0' ) { |
| error_code = slurm_takeover(); |
| if (error_code) { |
| exit_code = 1; |
| if (quiet_flag != 1) |
| slurm_perror("slurm_takeover error"); |
| } |
| } else { |
| fprintf(stderr, "slurm_takeover error: no backup " |
| "controller defined\n"); |
| } |
| xfree(secondary); |
| } |
| else if (strncasecmp (tag, "shutdown", MAX(tag_len, 8)) == 0) { |
| /* require full command name */ |
| uint16_t options = 0; |
| if (argc == 2) { |
| if (strcmp(argv[1], "slurmctld") && |
| strcmp(argv[1], "controller")) { |
| error_code = 1; |
| exit_code = 1; |
| fprintf (stderr, |
| "invalid shutdown argument:%s\n", |
| argv[1]); |
| } else |
| options= 2; |
| } else if (argc > 2) { |
| error_code = 1; |
| exit_code = 1; |
| fprintf (stderr, |
| "too many arguments for keyword:%s\n", |
| tag); |
| } |
| if (error_code == 0) { |
| error_code = slurm_shutdown(options); |
| if (error_code) { |
| exit_code = 1; |
| if (quiet_flag != 1) |
| slurm_perror ("slurm_shutdown error"); |
| } |
| } |
| } |
| else if (strncasecmp (tag, "update", MAX(tag_len, 1)) == 0) { |
| if (argc < 2) { |
| exit_code = 1; |
| fprintf (stderr, "too few arguments for %s keyword\n", |
| tag); |
| return 0; |
| } |
| _update_it ((argc - 1), &argv[1]); |
| } |
| else if (strncasecmp (tag, "delete", MAX(tag_len, 1)) == 0) { |
| if (argc < 2) { |
| exit_code = 1; |
| fprintf (stderr, "too few arguments for %s keyword\n", |
| tag); |
| return 0; |
| } |
| _delete_it ((argc - 1), &argv[1]); |
| } |
| else if (strncasecmp (tag, "verbose", MAX(tag_len, 4)) == 0) { |
| if (argc > 1) { |
| exit_code = 1; |
| fprintf (stderr, |
| "too many arguments for %s keyword\n", |
| tag); |
| } |
| quiet_flag = -1; |
| } |
| else if (strncasecmp (tag, "version", MAX(tag_len, 4)) == 0) { |
| if (argc > 1) { |
| exit_code = 1; |
| fprintf (stderr, |
| "too many arguments for %s keyword\n", |
| tag); |
| } |
| _print_version(); |
| } |
| else if (strncasecmp (tag, "listpids", MAX(tag_len, 1)) == 0) { |
| if (argc > 3) { |
| exit_code = 1; |
| fprintf (stderr, |
| "too many arguments for keyword:%s\n", |
| tag); |
| } else { |
| scontrol_list_pids (argc == 1 ? NULL : argv[1], |
| argc <= 2 ? NULL : argv[2]); |
| } |
| } |
| else if (strncasecmp (tag, "notify", MAX(tag_len, 1)) == 0) { |
| if (argc < 3) { |
| exit_code = 1; |
| fprintf (stderr, |
| "too few arguments for keyword:%s\n", |
| tag); |
| } else if (scontrol_job_notify(argc-1, &argv[1])) { |
| exit_code = 1; |
| slurm_perror("job notify failure"); |
| } |
| } |
| else { |
| exit_code = 1; |
| fprintf (stderr, "invalid keyword: %s\n", tag); |
| } |
| |
| return 0; |
| } |
| |
| |
| /* |
| * _create_it - create a slurm configuration per the supplied arguments |
| * IN argc - count of arguments |
| * IN argv - list of arguments |
| */ |
| static void |
| _create_it (int argc, char *argv[]) |
| { |
| /* Scan for "res" first, anywhere in the args. When creating |
| a reservation there is a partition= option, which we don't |
| want to mistake for a requestion to create a partition. */ |
| int i, error_code = SLURM_SUCCESS; |
| for (i=0; i<argc; i++) { |
| char *tag = argv[i]; |
| char *val = strchr(argv[i], '='); |
| int tag_len; |
| |
| if (val) { |
| tag_len = val - argv[i]; |
| val++; |
| } else { |
| tag_len = strlen(tag); |
| } |
| if (!strncasecmp(tag, "ReservationName", MAX(tag_len, 3))) { |
| error_code = scontrol_create_res(argc, argv); |
| break; |
| } else if (!strncasecmp(tag, "PartitionName", MAX(tag_len, 3))) { |
| error_code = scontrol_create_part(argc, argv); |
| break; |
| } |
| } |
| |
| if (i >= argc) { |
| exit_code = 1; |
| error("Invalid creation entity: %s", argv[0]); |
| } else if (error_code) |
| exit_code = 1; |
| } |
| |
| |
| |
| |
| /* |
| * _delete_it - delete the specified slurm entity |
| * IN argc - count of arguments |
| * IN argv - list of arguments |
| */ |
| static void |
| _delete_it (int argc, char *argv[]) |
| { |
| char *tag = NULL, *val = NULL; |
| int tag_len = 0; |
| |
| if (argc != 1) { |
| error("Only one option follows delete. %d given.", argc); |
| exit_code = 1; |
| return; |
| } |
| |
| tag = argv[0]; |
| val = strchr(argv[0], '='); |
| if (val) { |
| tag_len = val - argv[0]; |
| val++; |
| } else { |
| error("Proper format is 'delete Partition=p'" |
| " or 'delete Reservation=r'"); |
| exit_code = 1; |
| return; |
| } |
| |
| /* First identify the entity type to delete */ |
| if (strncasecmp (tag, "PartitionName", MAX(tag_len, 3)) == 0) { |
| delete_part_msg_t part_msg; |
| part_msg.name = val; |
| if (slurm_delete_partition(&part_msg)) { |
| char errmsg[64]; |
| snprintf(errmsg, 64, "delete_partition %s", argv[0]); |
| slurm_perror(errmsg); |
| } |
| } else if (strncasecmp (tag, "ReservationName", MAX(tag_len, 3)) == 0) { |
| reservation_name_msg_t res_msg; |
| res_msg.name = val; |
| if (slurm_delete_reservation(&res_msg)) { |
| char errmsg[64]; |
| snprintf(errmsg, 64, "delete_reservation %s", argv[0]); |
| slurm_perror(errmsg); |
| } |
| } else if (strncasecmp (tag, "BlockName", MAX(tag_len, 3)) == 0) { |
| if(cluster_flags & CLUSTER_FLAG_BG) { |
| update_block_msg_t block_msg; |
| slurm_init_update_block_msg ( &block_msg ); |
| block_msg.bg_block_id = val; |
| block_msg.state = BG_BLOCK_NAV; |
| if (slurm_update_block(&block_msg)) { |
| char errmsg[64]; |
| snprintf(errmsg, 64, "delete_block %s", |
| argv[0]); |
| slurm_perror(errmsg); |
| } |
| } else { |
| exit_code = 1; |
| fprintf(stderr, |
| "This only works on a bluegene system.\n"); |
| } |
| } else { |
| exit_code = 1; |
| fprintf(stderr, "Invalid deletion entity: %s\n", argv[0]); |
| } |
| } |
| |
| |
| /* |
| * _show_it - print a description of the specified slurm entity |
| * IN argc - count of arguments |
| * IN argv - list of arguments |
| */ |
| static void |
| _show_it (int argc, char *argv[]) |
| { |
| char *tag = NULL, *val = NULL; |
| int tag_len = 0; |
| |
| if (argc > 3) { |
| exit_code = 1; |
| if (quiet_flag != 1) |
| fprintf(stderr, |
| "too many arguments for keyword:%s\n", |
| argv[0]); |
| return; |
| } |
| else if (argc < 2) { |
| exit_code = 1; |
| if (quiet_flag != 1) |
| fprintf(stderr, |
| "too few arguments for keyword:%s\n", argv[0]); |
| return; |
| } |
| |
| tag = argv[1]; |
| tag_len = strlen(tag); |
| val = strchr(argv[1], '='); |
| if (val) { |
| tag_len = val - argv[1]; |
| val++; |
| } else if (argc == 3) { |
| val = argv[2]; |
| } else { |
| val = NULL; |
| } |
| |
| if (strncasecmp (tag, "aliases", MAX(tag_len, 1)) == 0) { |
| if (val) |
| _print_aliases (val); |
| else |
| _print_aliases (NULL); |
| } else if (strncasecmp (tag, "blocks", MAX(tag_len, 1)) == 0) { |
| scontrol_print_block (val); |
| } else if (strncasecmp (tag, "config", MAX(tag_len, 1)) == 0) { |
| _print_config (val); |
| } else if (strncasecmp (tag, "daemons", MAX(tag_len, 1)) == 0) { |
| if (val) { |
| exit_code = 1; |
| if (quiet_flag != 1) |
| fprintf(stderr, |
| "too many arguments for keyword:%s\n", |
| argv[0]); |
| } |
| _print_daemons (); |
| } else if (strncasecmp (tag, "FrontendName", MAX(tag_len, 1)) == 0) { |
| scontrol_print_front_end_list(val); |
| } else if (strncasecmp (tag, "hostnames", MAX(tag_len, 5)) == 0) { |
| if (val) |
| scontrol_print_hosts(val); |
| else |
| scontrol_print_hosts(getenv("SLURM_NODELIST")); |
| } else if (strncasecmp (tag, "hostlist", MAX(tag_len, 5)) == 0) { |
| if (!val) { |
| exit_code = 1; |
| fprintf(stderr, "invalid encode argument\n"); |
| _usage(); |
| } else if (scontrol_encode_hostlist(val)) |
| exit_code = 1; |
| } else if (strncasecmp (tag, "jobs", MAX(tag_len, 1)) == 0 || |
| strncasecmp (tag, "jobid", MAX(tag_len, 1)) == 0 ) { |
| scontrol_print_job (val); |
| } else if (strncasecmp (tag, "nodes", MAX(tag_len, 1)) == 0) { |
| scontrol_print_node_list (val); |
| } else if (strncasecmp (tag, "partitions", MAX(tag_len, 1)) == 0 || |
| strncasecmp (tag, "partitionname", MAX(tag_len, 1)) == 0) { |
| scontrol_print_part (val); |
| } else if (strncasecmp (tag, "reservations", MAX(tag_len, 1)) == 0 || |
| strncasecmp (tag, "reservationname", MAX(tag_len, 1)) == 0) { |
| scontrol_print_res (val); |
| } else if (strncasecmp (tag, "slurmd", MAX(tag_len, 2)) == 0) { |
| _print_slurmd (val); |
| } else if (strncasecmp (tag, "steps", MAX(tag_len, 2)) == 0) { |
| scontrol_print_step (val); |
| } else if (strncasecmp (tag, "topology", MAX(tag_len, 1)) == 0) { |
| scontrol_print_topo (val); |
| } else { |
| exit_code = 1; |
| if (quiet_flag != 1) |
| fprintf (stderr, |
| "invalid entity:%s for keyword:%s \n", |
| tag, argv[0]); |
| } |
| |
| } |
| |
| |
| |
| /* |
| * _update_it - update the slurm configuration per the supplied arguments |
| * IN argc - count of arguments |
| * IN argv - list of arguments |
| */ |
| static void |
| _update_it (int argc, char *argv[]) |
| { |
| char *val = NULL; |
| int i, error_code = SLURM_SUCCESS; |
| int node_tag = 0, part_tag = 0, job_tag = 0; |
| int block_tag = 0, sub_tag = 0, res_tag = 0; |
| int debug_tag = 0, step_tag = 0, front_end_tag = 0; |
| |
| /* First identify the entity to update */ |
| for (i=0; i<argc; i++) { |
| char *tag = argv[i]; |
| int tag_len = 0; |
| val = strchr(argv[i], '='); |
| if (!val) |
| continue; |
| tag_len = val - argv[i]; |
| val++; |
| |
| if (!strncasecmp(tag, "NodeName", MAX(tag_len, 3))) { |
| node_tag = 1; |
| } else if (!strncasecmp(tag, "PartitionName", |
| MAX(tag_len, 3))) { |
| part_tag = 1; |
| } else if (!strncasecmp(tag, "JobId", MAX(tag_len, 3))) { |
| job_tag = 1; |
| } else if (!strncasecmp(tag, "StepId", MAX(tag_len, 4))) { |
| step_tag = 1; |
| } else if (!strncasecmp(tag, "BlockName", MAX(tag_len, 3))) { |
| block_tag = 1; |
| } else if (!strncasecmp(tag, "SubBPName", MAX(tag_len, 3)) |
| || !strncasecmp(tag, "SubMPName", MAX(tag_len, 3))) { |
| sub_tag = 1; |
| } else if (!strncasecmp(tag, "FrontendName", |
| MAX(tag_len, 2))) { |
| front_end_tag = 1; |
| } else if (!strncasecmp(tag, "ReservationName", |
| MAX(tag_len, 3))) { |
| res_tag = 1; |
| } else if (!strncasecmp(tag, "SlurmctldDebug", |
| MAX(tag_len, 2))) { |
| debug_tag= 1; |
| } |
| } |
| |
| /* The order of tests matters here. An update job request can include |
| * partition and reservation tags, possibly before the jobid tag, but |
| * none of the other updates have a jobid tag, so check jobtag first. |
| * Likewise, check restag next, because reservations can have a |
| * partition tag. The order of the rest doesn't matter because there |
| * aren't any other duplicate tags. */ |
| |
| if (job_tag) |
| error_code = scontrol_update_job (argc, argv); |
| else if (step_tag) |
| error_code = scontrol_update_step (argc, argv); |
| else if (res_tag) |
| error_code = scontrol_update_res (argc, argv); |
| else if (node_tag) |
| error_code = scontrol_update_node (argc, argv); |
| else if (front_end_tag) |
| error_code = scontrol_update_front_end (argc, argv); |
| else if (part_tag) |
| error_code = scontrol_update_part (argc, argv); |
| else if (block_tag) |
| error_code = _update_bluegene_block (argc, argv); |
| else if (sub_tag) |
| error_code = _update_bluegene_subbp (argc, argv); |
| else if (debug_tag) |
| error_code = _update_slurmctld_debug(val); |
| else { |
| exit_code = 1; |
| fprintf(stderr, "No valid entity in update command\n"); |
| fprintf(stderr, "Input line must include \"NodeName\", "); |
| if(cluster_flags & CLUSTER_FLAG_BG) { |
| fprintf(stderr, "\"BlockName\", \"SubMPName\" " |
| "(i.e. bgl000[0-3]),"); |
| } |
| fprintf(stderr, "\"PartitionName\", \"Reservation\", " |
| "\"JobId\", or \"SlurmctldDebug\" \n"); |
| } |
| |
| if (error_code) { |
| exit_code = 1; |
| slurm_perror ("slurm_update error"); |
| } |
| } |
| |
| /* |
| * _update_bluegene_block - update the bluegene block per the |
| * supplied arguments |
| * IN argc - count of arguments |
| * IN argv - list of arguments |
| * RET 0 if no slurm error, errno otherwise. parsing error prints |
| * error message and returns 0 |
| */ |
| static int |
| _update_bluegene_block (int argc, char *argv[]) |
| { |
| int i, update_cnt = 0; |
| update_block_msg_t block_msg; |
| |
| if(!(cluster_flags & CLUSTER_FLAG_BG)) { |
| exit_code = 1; |
| fprintf(stderr, "This only works on a bluegene system.\n"); |
| return 0; |
| } |
| |
| slurm_init_update_block_msg ( &block_msg ); |
| |
| for (i=0; i<argc; i++) { |
| char *tag = argv[i]; |
| char *val = strchr(argv[i], '='); |
| int tag_len = 0, vallen = 0; |
| |
| if (val) { |
| tag_len = val - argv[i]; |
| val++; |
| vallen = strlen(val); |
| } else { |
| exit_code = 1; |
| error("Invalid input for BlueGene block " |
| "update %s", |
| argv[i]); |
| return 0; |
| } |
| |
| if (!strncasecmp(tag, "BlockName", MAX(tag_len, 2))) { |
| block_msg.bg_block_id = val; |
| } else if (!strncasecmp(tag, "State", MAX(tag_len, 2))) { |
| if (!strncasecmp(val, "ERROR", MAX(vallen, 1))) |
| block_msg.state = BG_BLOCK_ERROR_FLAG; |
| else if (!strncasecmp(val, "FREE", MAX(vallen, 1))) |
| block_msg.state = BG_BLOCK_FREE; |
| else if (!strncasecmp(val, "RECREATE", MAX(vallen, 3))) |
| block_msg.state = BG_BLOCK_BOOTING; |
| else if (!strncasecmp(val, "REMOVE", MAX(vallen, 3))) |
| block_msg.state = BG_BLOCK_NAV; |
| else if (!strncasecmp(val, "RESUME", MAX(vallen, 3))) |
| block_msg.state = BG_BLOCK_TERM; |
| else { |
| exit_code = 1; |
| fprintf (stderr, "Invalid input: %s\n", |
| argv[i]); |
| fprintf (stderr, |
| "Acceptable State values " |
| "are ERROR, FREE, RECREATE, " |
| "REMOVE, RESUME\n"); |
| return 0; |
| } |
| update_cnt++; |
| } else { |
| exit_code = 1; |
| error("Invalid input for BlueGene block update %s", |
| argv[i]); |
| return 0; |
| } |
| } |
| |
| if(!block_msg.bg_block_id) { |
| error("You didn't supply a block name."); |
| return 0; |
| } else if (block_msg.state == (uint16_t)NO_VAL) { |
| error("You didn't give me a state to set %s to " |
| "(i.e. FREE, ERROR).", block_msg.mp_str); |
| return 0; |
| } |
| |
| if (slurm_update_block(&block_msg)) { |
| exit_code = 1; |
| return slurm_get_errno (); |
| } else |
| return 0; |
| } |
| |
| /* |
| * _update_bluegene_subbp - update the bluegene nodecards per the |
| * supplied arguments |
| * IN argc - count of arguments |
| * IN argv - list of arguments |
| * RET 0 if no slurm error, errno otherwise. parsing error prints |
| * error message and returns 0 |
| */ |
| static int |
| _update_bluegene_subbp (int argc, char *argv[]) |
| { |
| int i, update_cnt = 0; |
| update_block_msg_t block_msg; |
| |
| if(!(cluster_flags & CLUSTER_FLAG_BG)) { |
| exit_code = 1; |
| fprintf(stderr, "This only works on a bluegene system.\n"); |
| return 0; |
| } |
| |
| slurm_init_update_block_msg ( &block_msg ); |
| |
| for (i=0; i<argc; i++) { |
| char *tag = argv[i]; |
| char *val = strchr(argv[i], '='); |
| int tag_len = 0, vallen = 0; |
| |
| if (val) { |
| tag_len = val - argv[i]; |
| val++; |
| vallen = strlen(val); |
| } else { |
| exit_code = 1; |
| error("Invalid input for BlueGene SubMPName update %s", |
| argv[i]); |
| return 0; |
| } |
| |
| if (!strncasecmp(tag, "SubBPName", MAX(tag_len, 2)) |
| || !strncasecmp(tag, "SubMPName", MAX(tag_len, 2))) |
| block_msg.mp_str = val; |
| else if (!strncasecmp(tag, "State", MAX(tag_len, 2))) { |
| if (!strncasecmp(val, "ERROR", MAX(vallen, 1))) |
| block_msg.state = BG_BLOCK_ERROR_FLAG; |
| else if (!strncasecmp(val, "FREE", MAX(vallen, 1))) |
| block_msg.state = BG_BLOCK_FREE; |
| else { |
| exit_code = 1; |
| fprintf (stderr, "Invalid input: %s\n", |
| argv[i]); |
| fprintf (stderr, "Acceptable State values " |
| "are FREE and ERROR\n"); |
| return 0; |
| } |
| update_cnt++; |
| } else { |
| exit_code = 1; |
| error("Invalid input for BlueGene SubMPName update %s", |
| argv[i]); |
| return 0; |
| } |
| } |
| |
| if(!block_msg.mp_str) { |
| error("You didn't supply an ionode list."); |
| return 0; |
| } else if (block_msg.state == (uint16_t)NO_VAL) { |
| error("You didn't give me a state to set %s to " |
| "(i.e. FREE, ERROR).", block_msg.mp_str); |
| return 0; |
| } |
| |
| if (slurm_update_block(&block_msg)) { |
| exit_code = 1; |
| return slurm_get_errno (); |
| } else |
| return 0; |
| } |
| |
| /* |
| * _update_slurmctld_debug - update the slurmctld debug level |
| * IN val - new value |
| * RET 0 if no slurm error, errno otherwise. parsing error prints |
| * error message and returns 0 |
| */ |
| static int _update_slurmctld_debug(char *val) |
| { |
| char *endptr; |
| int error_code = SLURM_SUCCESS; |
| uint32_t level = (uint32_t)strtoul(val, &endptr, 10); |
| |
| if (*endptr != '\0' || level > 9) { |
| error_code = 1; |
| if (quiet_flag != 1) |
| fprintf(stderr, "invalid debug level: %s\n", |
| val); |
| } else { |
| error_code = slurm_set_debug_level(level); |
| } |
| |
| return error_code; |
| } |
| |
| /* _usage - show the valid scontrol commands */ |
| void |
| _usage () { |
| printf ("\ |
| scontrol [<OPTION>] [<COMMAND>] \n\ |
| Valid <OPTION> values are: \n\ |
| -a or --all: equivalent to \"all\" command \n\ |
| -d or --details: equivalent to \"details\" command \n\ |
| -h or --help: equivalent to \"help\" command \n\ |
| --hide: equivalent to \"hide\" command \n\ |
| -M or --cluster: equivalent to \"cluster\" command \n\ |
| -o or --oneliner: equivalent to \"oneliner\" command \n\ |
| -Q or --quiet: equivalent to \"quiet\" command \n\ |
| -v or --verbose: equivalent to \"verbose\" command \n\ |
| -V or --version: equivalent to \"version\" command \n\ |
| \n\ |
| <keyword> may be omitted from the execute line and scontrol will execute \n\ |
| in interactive mode. It will process commands as entered until explicitly\n\ |
| terminated. \n\ |
| \n\ |
| Valid <COMMAND> values are: \n\ |
| abort shutdown slurm controller immediately \n\ |
| generating a core file. \n\ |
| all display information about all partitions, \n\ |
| including hidden partitions. \n\ |
| cluster cluster to issue commands to. Default is \n\ |
| current cluster. cluster with no name will \n\ |
| reset to default. \n\ |
| checkpoint <CH_OP><ID> perform a checkpoint operation on identified \n\ |
| job or job step \n\ |
| completing display jobs in completing state along with \n\ |
| their completing or down nodes \n\ |
| create <SPECIFICATIONS> create a new partition or reservation \n\ |
| details evokes additional details from the \"show\" \n\ |
| command \n\ |
| delete <SPECIFICATIONS> delete the specified partition or reservation\n\ |
| On Dynamic layout Bluegene systems you can also\n\ |
| delete blocks. \n\ |
| exit terminate scontrol \n\ |
| help print this description of use. \n\ |
| hold <job_id> prevent specified job from starting (see release)\n\ |
| holdu <job_id> place user hold on specified job (see release)\n\ |
| hide do not display information about hidden \n\ |
| partitions \n\ |
| listpids <job_id<.step>> List pids associated with the given jobid, or\n\ |
| all jobs if no id is given (This will only \n\ |
| display the processes on the node which the \n\ |
| scontrol is ran on, and only for those \n\ |
| processes spawned by SLURM and their \n\ |
| descendants) \n\ |
| notify <job_id> msg send message to specified job \n\ |
| oneliner report output one record per line. \n\ |
| pidinfo <pid> return slurm job information for given pid. \n\ |
| ping print status of slurmctld daemons. \n\ |
| quiet print no messages other than error messages. \n\ |
| quit terminate this command. \n\ |
| reboot_nodes [<nodelist>] reboot the nodes when they become idle. \n\ |
| By default all nodes are rebooted. \n\ |
| reconfigure re-read configuration files. \n\ |
| release <job_id> permit specified job to start (see hold) \n\ |
| requeue <job_id> re-queue a batch job \n\ |
| resume <job_id> resume previously suspended job (see suspend)\n\ |
| setdebug <level> set slurmctld debug level \n\ |
| setdebugflags [+|-]<flag> add or remove slurmctld DebugFlags \n\ |
| schedloglevel <slevel> set scheduler log level \n\ |
| show <ENTITY> [<ID>] display state of identified entity, default \n\ |
| is all records. \n\ |
| shutdown <OPTS> shutdown slurm daemons \n\ |
| (the primary controller will be stopped) \n\ |
| suspend <job_id> susend specified job (see resume) \n\ |
| takeover ask slurm backup controller to take over \n\ |
| uhold <job_id> place user hold on specified job (see release)\n\ |
| update <SPECIFICATIONS> update job, node, partition, reservation, \n\ |
| step or bluegene block/subbp configuration \n\ |
| verbose enable detailed logging. \n\ |
| version display tool version number. \n\ |
| wait_job <job_id> wait until the nodes allocated to the job \n\ |
| are booted and usable \n\ |
| !! Repeat the last command entered. \n\ |
| \n\ |
| <ENTITY> may be \"aliases\", \"config\", \"daemons\", \"frontend\", \n\ |
| \"hostlist\", \"hostnames\", \"job\", \"node\", \"partition\", \n\ |
| \"reservation\", \"slurmd\", \"step\", or \"topology\" \n\ |
| (also for BlueGene only: \"block\" or \"subbp\"). \n\ |
| \n\ |
| <ID> may be a configuration parameter name, job id, node name, partition \n\ |
| name, reservation name, job step id, or hostlist or pathname to a \n\ |
| list of host names. \n\ |
| \n\ |
| <HOSTLIST> may either be a comma separated list of host names or the \n\ |
| absolute pathname of a file (with leading '/' containing host names \n\ |
| either separated by commas or new-lines \n\ |
| \n\ |
| <LEVEL> may be an integer value like SlurmctldDebug in the slurm.conf \n\ |
| file or the name of the most detailed errors to report (e.g. \"info\",\n\ |
| \"verbose\", \"debug\", \"debug2\", etc.). \n\ |
| \n\ |
| <SLEVEL> may be an integer value like SlurmSchedLogLevel in the \n\ |
| slurm.conf file or \"enable\" or \"disable\". \n\ |
| \n\ |
| <OPTS> may be \"slurmctld\" to shutdown just the slurmctld daemon, \n\ |
| otherwise all slurm daemons are shutdown \n\ |
| \n\ |
| Node names may be specified using simple range expressions, \n\ |
| (e.g. \"lx[10-20]\" corresponds to lx10, lx11, lx12, ...) \n\ |
| The job step id is the job id followed by a period and the step id. \n\ |
| \n\ |
| <SPECIFICATIONS> are specified in the same format as the configuration \n\ |
| file. You may wish to use the \"show\" keyword then use its output as \n\ |
| input for the update keyword, editing as needed. Bluegene blocks/subbps \n\ |
| are only able to be set to an error or free state. You can also remove \n\ |
| blocks by specifying 'remove' as the state. The remove option is only \n\ |
| valid on Dynamic layout systems. \n\ |
| (Bluegene systems only) \n\ |
| \n\ |
| <CH_OP> identify checkpoint operations and may be \"able\", \"disable\", \n\ |
| \"enable\", \"create\", \"vacate\", \"requeue\", \"restart\", or \"error\"\n\ |
| Additional options include \"ImageDir=<dir>\", \"MaxWait=<seconds>\" and \n\ |
| \"StickToNodes\" \n\ |
| \n\ |
| All commands and options are case-insensitive, although node names and \n\ |
| partition names tests are case-sensitive (node names \"LX\" and \"lx\" \n\ |
| are distinct). \n\n"); |
| |
| } |