| /*****************************************************************************\ |
| * update_job.c - update job functions for scontrol. |
| ***************************************************************************** |
| * Copyright (C) 2002-2007 The Regents of the University of California. |
| * Copyright (C) 2008-2010 Lawrence Livermore National Security. |
| * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). |
| * Written by Morris Jette <jette1@llnl.gov> |
| * CODE-OCEC-09-009. All rights reserved. |
| * |
| * This file is part of SLURM, a resource management program. |
| * For details, see <http://www.schedmd.com/slurmdocs/>. |
| * Please also read the included file: DISCLAIMER. |
| * |
| * SLURM is free software; you can redistribute it and/or modify it under |
| * the terms of the GNU General Public License as published by the Free |
| * Software Foundation; either version 2 of the License, or (at your option) |
| * any later version. |
| * |
| * In addition, as a special exception, the copyright holders give permission |
| * to link the code of portions of this program with the OpenSSL library under |
| * certain conditions as described in each individual source file, and |
| * distribute linked combinations including the two. You must obey the GNU |
| * General Public License in all respects for all of the code used other than |
| * OpenSSL. If you modify file(s) with this exception, you may extend this |
| * exception to your version of the file(s), but you are not obligated to do |
| * so. If you do not wish to do so, delete this exception statement from your |
| * version. If you delete this exception statement from all source files in |
| * the program, then also delete it here. |
| * |
| * SLURM is distributed in the hope that it will be useful, but WITHOUT ANY |
| * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS |
| * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more |
| * details. |
| * |
| * You should have received a copy of the GNU General Public License along |
| * with SLURM; if not, write to the Free Software Foundation, Inc., |
| * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. |
| \*****************************************************************************/ |
| |
| #include "scontrol.h" |
| #include "src/common/env.h" |
| #include "src/common/proc_args.h" |
| |
| static int _parse_checkpoint_args(int argc, char **argv, |
| uint16_t *max_wait, char **image_dir); |
| static int _parse_restart_args(int argc, char **argv, |
| uint16_t *stick, char **image_dir); |
| static void _update_job_size(uint32_t job_id); |
| |
| /* |
| * scontrol_checkpoint - perform some checkpoint/resume operation |
| * IN op - checkpoint operation |
| * IN job_step_id_str - either a job name (for all steps of the given job) or |
| * a step name: "<jid>.<step_id>" |
| * IN argc - argument count |
| * IN argv - arguments of the operation |
| * RET 0 if no slurm error, errno otherwise. parsing error prints |
| * error message and returns 0 |
| */ |
| extern int |
| scontrol_checkpoint(char *op, char *job_step_id_str, int argc, char *argv[]) |
| { |
| int rc = SLURM_SUCCESS; |
| uint32_t job_id = 0, step_id = 0; |
| char *next_str; |
| uint32_t ckpt_errno; |
| char *ckpt_strerror = NULL; |
| int oplen = strlen(op); |
| uint16_t max_wait = CKPT_WAIT, stick = 0; |
| char *image_dir = NULL; |
| |
| if (job_step_id_str) { |
| job_id = (uint32_t) strtol (job_step_id_str, &next_str, 10); |
| if (next_str[0] == '.') { |
| step_id = (uint32_t) strtol (&next_str[1], &next_str, |
| 10); |
| } else |
| step_id = NO_VAL; |
| if (next_str[0] != '\0') { |
| fprintf(stderr, "Invalid job step name\n"); |
| return 0; |
| } |
| } else { |
| fprintf(stderr, "Invalid job step name\n"); |
| return 0; |
| } |
| |
| if (strncasecmp(op, "able", MAX(oplen, 1)) == 0) { |
| time_t start_time; |
| rc = slurm_checkpoint_able (job_id, step_id, &start_time); |
| if (rc == SLURM_SUCCESS) { |
| if (start_time) { |
| char time_str[32]; |
| slurm_make_time_str(&start_time, time_str, |
| sizeof(time_str)); |
| printf("Began at %s\n", time_str); |
| } else |
| printf("Yes\n"); |
| } else if (slurm_get_errno() == ESLURM_DISABLED) { |
| printf("No\n"); |
| rc = SLURM_SUCCESS; /* not real error */ |
| } |
| } |
| else if (strncasecmp(op, "complete", MAX(oplen, 2)) == 0) { |
| /* Undocumented option used for testing purposes */ |
| static uint32_t error_code = 1; |
| char error_msg[64]; |
| sprintf(error_msg, "test error message %d", error_code); |
| rc = slurm_checkpoint_complete(job_id, step_id, (time_t) 0, |
| error_code++, error_msg); |
| } |
| else if (strncasecmp(op, "disable", MAX(oplen, 1)) == 0) |
| rc = slurm_checkpoint_disable (job_id, step_id); |
| else if (strncasecmp(op, "enable", MAX(oplen, 2)) == 0) |
| rc = slurm_checkpoint_enable (job_id, step_id); |
| else if (strncasecmp(op, "create", MAX(oplen, 2)) == 0) { |
| if (_parse_checkpoint_args(argc, argv, &max_wait, &image_dir)){ |
| return 0; |
| } |
| rc = slurm_checkpoint_create (job_id, step_id, max_wait, |
| image_dir); |
| |
| } else if (strncasecmp(op, "requeue", MAX(oplen, 2)) == 0) { |
| if (_parse_checkpoint_args(argc, argv, &max_wait, &image_dir)){ |
| return 0; |
| } |
| rc = slurm_checkpoint_requeue (job_id, max_wait, image_dir); |
| |
| } else if (strncasecmp(op, "vacate", MAX(oplen, 2)) == 0) { |
| if (_parse_checkpoint_args(argc, argv, &max_wait, &image_dir)){ |
| return 0; |
| } |
| rc = slurm_checkpoint_vacate (job_id, step_id, max_wait, |
| image_dir); |
| |
| } else if (strncasecmp(op, "restart", MAX(oplen, 2)) == 0) { |
| if (_parse_restart_args(argc, argv, &stick, &image_dir)) { |
| return 0; |
| } |
| rc = slurm_checkpoint_restart (job_id, step_id, stick, |
| image_dir); |
| |
| } else if (strncasecmp(op, "error", MAX(oplen, 2)) == 0) { |
| rc = slurm_checkpoint_error (job_id, step_id, |
| &ckpt_errno, &ckpt_strerror); |
| if (rc == SLURM_SUCCESS) { |
| printf("error(%u): %s\n", ckpt_errno, ckpt_strerror); |
| free(ckpt_strerror); |
| } |
| } |
| |
| else { |
| fprintf (stderr, "Invalid checkpoint operation: %s\n", op); |
| return 0; |
| } |
| |
| return rc; |
| } |
| |
| static int |
| _parse_checkpoint_args(int argc, char **argv, uint16_t *max_wait, |
| char **image_dir) |
| { |
| int i; |
| |
| for (i=0; i< argc; i++) { |
| if (strncasecmp(argv[i], "MaxWait=", 8) == 0) { |
| *max_wait = (uint16_t) strtol(&argv[i][8], |
| (char **) NULL, 10); |
| } else if (strncasecmp(argv[i], "ImageDir=", 9) == 0) { |
| *image_dir = &argv[i][9]; |
| } else { |
| exit_code = 1; |
| error("Invalid input: %s", argv[i]); |
| error("Request aborted"); |
| return -1; |
| } |
| } |
| return 0; |
| } |
| |
| static int |
| _parse_restart_args(int argc, char **argv, uint16_t *stick, char **image_dir) |
| { |
| int i; |
| |
| for (i=0; i< argc; i++) { |
| if (strncasecmp(argv[i], "StickToNodes", 5) == 0) { |
| *stick = 1; |
| } else if (strncasecmp(argv[i], "ImageDir=", 9) == 0) { |
| *image_dir = &argv[i][9]; |
| } else { |
| exit_code = 1; |
| error("Invalid input: %s", argv[i]); |
| error("Request aborted"); |
| return -1; |
| } |
| } |
| return 0; |
| } |
| |
| /* Return the current time limit of the specified job_id or NO_VAL if the |
| * information is not available */ |
| static uint32_t _get_job_time(uint32_t job_id) |
| { |
| uint32_t time_limit = NO_VAL; |
| int i, rc; |
| job_info_msg_t *resp; |
| |
| rc = slurm_load_job(&resp, job_id, SHOW_ALL); |
| if (rc == SLURM_SUCCESS) { |
| for (i = 0; i < resp->record_count; i++) { |
| if (resp->job_array[i].job_id != job_id) |
| continue; /* should not happen */ |
| time_limit = resp->job_array[i].time_limit; |
| break; |
| } |
| slurm_free_job_info_msg(resp); |
| } else { |
| error("Could not load state information for job %u: %m", |
| job_id); |
| } |
| |
| return time_limit; |
| } |
| |
| /* |
| * scontrol_hold - perform some job hold/release operation |
| * IN op - suspend/resume operation |
| * IN job_id_str - a job id |
| * RET 0 if no slurm error, errno otherwise. parsing error prints |
| * error message and returns 0 |
| */ |
| extern int |
| scontrol_hold(char *op, char *job_id_str) |
| { |
| int rc = SLURM_SUCCESS; |
| char *next_str; |
| job_desc_msg_t job_msg; |
| uint16_t job_state; |
| |
| slurm_init_job_desc_msg (&job_msg); |
| |
| /* set current user, needed e.g., for AllowGroups checks */ |
| job_msg.user_id = getuid(); |
| |
| if (job_id_str) { |
| job_msg.job_id = (uint32_t) strtol(job_id_str, &next_str, 10); |
| if ((job_msg.job_id == 0) || (next_str[0] != '\0')) { |
| fprintf(stderr, "Invalid job id specified\n"); |
| exit_code = 1; |
| return 0; |
| } |
| } else { |
| fprintf(stderr, "Invalid job id specified\n"); |
| exit_code = 1; |
| return 0; |
| } |
| |
| job_state = scontrol_get_job_state(job_msg.job_id); |
| if (job_state == (uint16_t) NO_VAL) |
| return SLURM_ERROR; |
| if ((job_state & JOB_STATE_BASE) != JOB_PENDING) { |
| slurm_seterrno(ESLURM_JOB_NOT_PENDING); |
| return ESLURM_JOB_NOT_PENDING; |
| } |
| |
| if ((strncasecmp(op, "holdu", 5) == 0) || |
| (strncasecmp(op, "uhold", 5) == 0)) { |
| job_msg.priority = 0; |
| job_msg.alloc_sid = ALLOC_SID_USER_HOLD; |
| } else if (strncasecmp(op, "hold", 4) == 0) { |
| job_msg.priority = 0; |
| job_msg.alloc_sid = 0; |
| } else |
| job_msg.priority = INFINITE; |
| |
| if (slurm_update_job(&job_msg)) |
| return slurm_get_errno(); |
| |
| return rc; |
| } |
| |
| |
| /* |
| * scontrol_suspend - perform some suspend/resume operation |
| * IN op - suspend/resume operation |
| * IN job_id_str - a job id |
| * RET 0 if no slurm error, errno otherwise. parsing error prints |
| * error message and returns 0 |
| */ |
| extern int |
| scontrol_suspend(char *op, char *job_id_str) |
| { |
| uint32_t job_id = 0; |
| char *next_str; |
| |
| if (job_id_str) { |
| job_id = (uint32_t) strtol (job_id_str, &next_str, 10); |
| if (next_str[0] != '\0') { |
| fprintf(stderr, "Invalid job id specified\n"); |
| exit_code = 1; |
| return SLURM_SUCCESS; |
| } |
| } else { |
| fprintf(stderr, "Invalid job id specified\n"); |
| exit_code = 1; |
| return SLURM_SUCCESS; |
| } |
| |
| if (strncasecmp(op, "suspend", MAX(strlen(op), 2)) == 0) |
| return slurm_suspend(job_id); |
| else |
| return slurm_resume(job_id); |
| } |
| |
| /* |
| * scontrol_requeue - requeue a pending or running batch job |
| * IN job_id_str - a job id |
| * RET 0 if no slurm error, errno otherwise. parsing error prints |
| * error message and returns 0 |
| */ |
| extern int |
| scontrol_requeue(char *job_id_str) |
| { |
| int rc = SLURM_SUCCESS; |
| uint32_t job_id = 0; |
| char *next_str; |
| |
| if (job_id_str) { |
| job_id = (uint32_t) strtol (job_id_str, &next_str, 10); |
| if (next_str[0] != '\0') { |
| fprintf(stderr, "Invalid job id specified\n"); |
| exit_code = 1; |
| return 0; |
| } |
| } else { |
| fprintf(stderr, "Invalid job id specified\n"); |
| exit_code = 1; |
| return 0; |
| } |
| |
| rc = slurm_requeue (job_id); |
| return rc; |
| } |
| |
| /* |
| * scontrol_update_job - update the slurm job configuration per the supplied |
| * arguments |
| * IN argc - count of arguments |
| * IN argv - list of arguments |
| * RET 0 if no slurm error, errno otherwise. parsing error prints |
| * error message and returns 0 |
| */ |
| extern int |
| scontrol_update_job (int argc, char *argv[]) |
| { |
| bool update_size = false; |
| int i, update_cnt = 0; |
| char *tag, *val; |
| int taglen, vallen; |
| job_desc_msg_t job_msg; |
| |
| slurm_init_job_desc_msg (&job_msg); |
| |
| /* set current user, needed e.g., for AllowGroups checks */ |
| job_msg.user_id = getuid(); |
| |
| for (i=0; i<argc; i++) { |
| tag = argv[i]; |
| val = strchr(argv[i], '='); |
| if (val) { |
| taglen = val - argv[i]; |
| val++; |
| vallen = strlen(val); |
| } else if (strncasecmp(tag, "Nice", MAX(strlen(tag), 2)) == 0){ |
| /* "Nice" is the only tag that might not have an |
| equal sign, so it is handled specially. */ |
| job_msg.nice = NICE_OFFSET + 100; |
| update_cnt++; |
| continue; |
| } else { |
| exit_code = 1; |
| fprintf (stderr, "Invalid input: %s\n", argv[i]); |
| fprintf (stderr, "Request aborted\n"); |
| return -1; |
| } |
| |
| if (strncasecmp(tag, "JobId", MAX(taglen, 3)) == 0) { |
| if (parse_uint32(val, &job_msg.job_id)) { |
| error ("Invalid JobId value: %s", val); |
| exit_code = 1; |
| return 0; |
| } |
| } |
| else if (strncasecmp(tag, "Comment", MAX(taglen, 3)) == 0) { |
| job_msg.comment = val; |
| update_cnt++; |
| } |
| else if (strncasecmp(tag, "TimeLimit", MAX(taglen, 5)) == 0) { |
| bool incr, decr; |
| uint32_t job_current_time, time_limit; |
| |
| incr = (val[0] == '+'); |
| decr = (val[0] == '-'); |
| if (incr || decr) |
| val++; |
| time_limit = time_str2mins(val); |
| if ((time_limit < 0) && (time_limit != INFINITE)) { |
| error("Invalid TimeLimit value"); |
| exit_code = 1; |
| return 0; |
| } |
| if (incr || decr) { |
| job_current_time = _get_job_time(job_msg. |
| job_id); |
| if (job_current_time == NO_VAL) { |
| exit_code = 1; |
| return 0; |
| } |
| if (incr) { |
| time_limit += job_current_time; |
| } else if (time_limit > job_current_time) { |
| error("TimeLimit decrement larger than" |
| " current time limit (%u > %u)", |
| time_limit, job_current_time); |
| exit_code = 1; |
| return 0; |
| } else { |
| time_limit = job_current_time - |
| time_limit; |
| } |
| } |
| job_msg.time_limit = time_limit; |
| update_cnt++; |
| } |
| else if (strncasecmp(tag, "TimeMin", MAX(taglen, 5)) == 0) { |
| int time_min = time_str2mins(val); |
| if ((time_min < 0) && (time_min != INFINITE)) { |
| error("Invalid TimeMin value"); |
| exit_code = 1; |
| return 0; |
| } |
| job_msg.time_min = time_min; |
| update_cnt++; |
| } |
| else if (strncasecmp(tag, "Priority", MAX(taglen, 2)) == 0) { |
| if (parse_uint32(val, &job_msg.priority)) { |
| error ("Invalid Priority value: %s", val); |
| exit_code = 1; |
| return 0; |
| } |
| update_cnt++; |
| } |
| else if (strncasecmp(tag, "Nice", MAX(taglen, 2)) == 0) { |
| int nice; |
| nice = strtoll(val, (char **) NULL, 10); |
| if (abs(nice) > NICE_OFFSET) { |
| error("Invalid nice value, must be between " |
| "-%d and %d", NICE_OFFSET, |
| NICE_OFFSET); |
| exit_code = 1; |
| return 0; |
| } |
| job_msg.nice = NICE_OFFSET + nice; |
| update_cnt++; |
| } |
| else if (strncasecmp(tag, "NumCPUs", MAX(taglen, 6)) == 0) { |
| int min_cpus, max_cpus=0; |
| if (!get_resource_arg_range(val, "NumCPUs", &min_cpus, |
| &max_cpus, false) || |
| (min_cpus <= 0) || |
| (max_cpus && (max_cpus < min_cpus))) { |
| error ("Invalid NumCPUs value: %s", val); |
| exit_code = 1; |
| return 0; |
| } |
| job_msg.min_cpus = min_cpus; |
| if (max_cpus) |
| job_msg.max_cpus = max_cpus; |
| update_cnt++; |
| } |
| /* ReqProcs was removed in SLURM version 2.1 */ |
| else if (strncasecmp(tag, "ReqProcs", MAX(taglen, 8)) == 0) { |
| if (parse_uint32(val, &job_msg.num_tasks)) { |
| error ("Invalid ReqProcs value: %s", val); |
| exit_code = 1; |
| return 0; |
| } |
| update_cnt++; |
| } |
| else if (strncasecmp(tag, "Requeue", MAX(taglen, 4)) == 0) { |
| if (parse_uint16(val, &job_msg.requeue)) { |
| error ("Invalid Requeue value: %s", val); |
| exit_code = 1; |
| return 0; |
| } |
| update_cnt++; |
| } |
| /* ReqNodes was replaced by NumNodes in SLURM version 2.1 */ |
| else if ((strncasecmp(tag, "ReqNodes", MAX(taglen, 8)) == 0) || |
| (strncasecmp(tag, "NumNodes", MAX(taglen, 8)) == 0)) { |
| int min_nodes, max_nodes, rc; |
| if (strcmp(val, "0") == 0) { |
| job_msg.min_nodes = 0; |
| } else if (strcasecmp(val, "ALL") == 0) { |
| job_msg.min_nodes = INFINITE; |
| } else { |
| min_nodes = (int) job_msg.min_nodes; |
| max_nodes = (int) job_msg.max_nodes; |
| rc = get_resource_arg_range( |
| val, "requested node count", |
| &min_nodes, &max_nodes, false); |
| if (!rc) |
| return rc; |
| job_msg.min_nodes = (uint32_t) min_nodes; |
| job_msg.max_nodes = (uint32_t) max_nodes; |
| } |
| update_size = true; |
| update_cnt++; |
| } |
| else if (strncasecmp(tag, "ReqSockets", MAX(taglen, 4)) == 0) { |
| if (parse_uint16(val, &job_msg.sockets_per_node)) { |
| error ("Invalid ReqSockets value: %s", val); |
| exit_code = 1; |
| return 0; |
| } |
| update_cnt++; |
| } |
| else if (strncasecmp(tag, "ReqCores", MAX(taglen, 4)) == 0) { |
| if (parse_uint16(val, &job_msg.cores_per_socket)) { |
| error ("Invalid ReqCores value: %s", val); |
| exit_code = 1; |
| return 0; |
| } |
| update_cnt++; |
| } |
| else if (strncasecmp(tag, "TasksPerNode", MAX(taglen, 2))==0) { |
| if (parse_uint16(val, &job_msg.ntasks_per_node)) { |
| error ("Invalid TasksPerNode value: %s", val); |
| exit_code = 1; |
| return 0; |
| } |
| update_cnt++; |
| } |
| else if (strncasecmp(tag, "ReqThreads", MAX(taglen, 4)) == 0) { |
| if (parse_uint16(val, &job_msg.threads_per_core)) { |
| error ("Invalid ReqThreads value: %s", val); |
| exit_code = 1; |
| return 0; |
| } |
| update_cnt++; |
| } |
| else if (strncasecmp(tag, "MinCPUsNode", MAX(taglen, 4)) == 0) { |
| if (parse_uint16(val, &job_msg.pn_min_cpus)) { |
| error ("Invalid MinCPUsNode value: %s", val); |
| exit_code = 1; |
| return 0; |
| } |
| update_cnt++; |
| } |
| else if (strncasecmp(tag, "MinMemoryNode", |
| MAX(taglen, 10)) == 0) { |
| if (parse_uint32(val, &job_msg.pn_min_memory)) { |
| error ("Invalid MinMemoryNode value: %s", val); |
| exit_code = 1; |
| return 0; |
| } |
| update_cnt++; |
| } |
| else if (strncasecmp(tag, "MinMemoryCPU", |
| MAX(taglen, 10)) == 0) { |
| if (parse_uint32(val, &job_msg.pn_min_memory)) { |
| error ("Invalid MinMemoryCPU value: %s", val); |
| exit_code = 1; |
| return 0; |
| } |
| job_msg.pn_min_memory |= MEM_PER_CPU; |
| update_cnt++; |
| } |
| else if (strncasecmp(tag, "MinTmpDiskNode", |
| MAX(taglen, 5)) == 0) { |
| if (parse_uint32(val, &job_msg.pn_min_tmp_disk)) { |
| error ("Invalid MinTmpDiskNode value: %s", val); |
| exit_code = 1; |
| return 0; |
| } |
| update_cnt++; |
| } |
| else if (strncasecmp(tag, "Partition", MAX(taglen, 2)) == 0) { |
| job_msg.partition = val; |
| update_cnt++; |
| } |
| else if (strncasecmp(tag, "QOS", MAX(taglen, 2)) == 0) { |
| job_msg.qos = val; |
| update_cnt++; |
| } |
| else if (strncasecmp(tag, "ReservationName", |
| MAX(taglen, 3)) == 0) { |
| job_msg.reservation = val; |
| update_cnt++; |
| } |
| else if (strncasecmp(tag, "Name", MAX(taglen, 2)) == 0) { |
| job_msg.name = val; |
| update_cnt++; |
| } |
| else if (strncasecmp(tag, "WCKey", MAX(taglen, 1)) == 0) { |
| job_msg.wckey = val; |
| update_cnt++; |
| } |
| else if (strncasecmp(tag, "Switches", MAX(taglen, 5)) == 0) { |
| char *sep_char; |
| job_msg.req_switch = |
| (uint32_t) strtol(val, &sep_char, 10); |
| update_cnt++; |
| if (sep_char && sep_char[0] == '@') { |
| job_msg.wait4switch = time_str2mins(sep_char+1) |
| * 60; |
| } |
| } |
| else if (strncasecmp(tag, "wait-for-switch", MAX(taglen, 5)) |
| == 0) { |
| if (parse_uint32(val, &job_msg.wait4switch)) { |
| error ("Invalid wait-for-switch value: %s", val); |
| exit_code = 1; |
| return 0; |
| } |
| update_cnt++; |
| } |
| else if (strncasecmp(tag, "Shared", MAX(taglen, 2)) == 0) { |
| if (strncasecmp(val, "YES", MAX(vallen, 1)) == 0) |
| job_msg.shared = 1; |
| else if (strncasecmp(val, "NO", MAX(vallen, 1)) == 0) |
| job_msg.shared = 0; |
| else if (parse_uint16(val, &job_msg.shared)) { |
| error ("Invalid wait-for-switch value: %s", val); |
| exit_code = 1; |
| return 0; |
| } |
| update_cnt++; |
| } |
| else if (strncasecmp(tag, "Contiguous", MAX(taglen, 3)) == 0) { |
| if (strncasecmp(val, "YES", MAX(vallen, 1)) == 0) |
| job_msg.contiguous = 1; |
| else if (strncasecmp(val, "NO", MAX(vallen, 1)) == 0) |
| job_msg.contiguous = 0; |
| else if (parse_uint16(val, &job_msg.contiguous)) { |
| error ("Invalid Contiguous value: %s", val); |
| exit_code = 1; |
| return 0; |
| } |
| update_cnt++; |
| } |
| else if (strncasecmp(tag, "ExcNodeList", MAX(taglen, 3)) == 0){ |
| job_msg.exc_nodes = val; |
| update_cnt++; |
| } |
| else if (!strncasecmp(tag, "NodeList", MAX(taglen, 8)) || |
| !strncasecmp(tag, "ReqNodeList", MAX(taglen, 8))) { |
| job_msg.req_nodes = val; |
| update_size = true; |
| update_cnt++; |
| } |
| else if (strncasecmp(tag, "Features", MAX(taglen, 1)) == 0) { |
| job_msg.features = val; |
| update_cnt++; |
| } |
| else if (strncasecmp(tag, "Gres", MAX(taglen, 2)) == 0) { |
| if (!strcasecmp(val, "help") || |
| !strcasecmp(val, "list")) { |
| print_gres_help(); |
| } else { |
| job_msg.gres = val; |
| update_cnt++; |
| } |
| } |
| else if (strncasecmp(tag, "Account", MAX(taglen, 1)) == 0) { |
| job_msg.account = val; |
| update_cnt++; |
| } |
| else if (strncasecmp(tag, "Dependency", MAX(taglen, 1)) == 0) { |
| job_msg.dependency = val; |
| update_cnt++; |
| } |
| else if (strncasecmp(tag, "Geometry", MAX(taglen, 2)) == 0) { |
| char* token, *delimiter = ",x", *next_ptr; |
| int j, rc = 0; |
| int dims = slurmdb_setup_cluster_dims(); |
| uint16_t geo[dims]; |
| char* geometry_tmp = xstrdup(val); |
| char* original_ptr = geometry_tmp; |
| token = strtok_r(geometry_tmp, delimiter, &next_ptr); |
| for (j=0; j<dims; j++) { |
| if (token == NULL) { |
| error("insufficient dimensions in " |
| "Geometry"); |
| rc = -1; |
| break; |
| } |
| geo[j] = (uint16_t) atoi(token); |
| if (geo[j] <= 0) { |
| error("invalid --geometry argument"); |
| rc = -1; |
| break; |
| } |
| geometry_tmp = next_ptr; |
| token = strtok_r(geometry_tmp, delimiter, |
| &next_ptr); |
| } |
| if (token != NULL) { |
| error("too many dimensions in Geometry"); |
| rc = -1; |
| } |
| |
| if (original_ptr) |
| xfree(original_ptr); |
| if (rc != 0) |
| exit_code = 1; |
| else { |
| for (j=0; j<dims; j++) |
| job_msg.geometry[j] = geo[j]; |
| update_cnt++; |
| } |
| } |
| |
| else if (strncasecmp(tag, "Rotate", MAX(taglen, 2)) == 0) { |
| if (strncasecmp(val, "YES", MAX(vallen, 1)) == 0) |
| job_msg.rotate = 1; |
| else if (strncasecmp(val, "NO", MAX(vallen, 1)) == 0) |
| job_msg.rotate = 0; |
| else if (parse_uint16(val, &job_msg.rotate)) { |
| error ("Invalid wait-for-switch value: %s", val); |
| exit_code = 1; |
| return 0; |
| } |
| update_cnt++; |
| } |
| else if (strncasecmp(tag, "Conn-Type", MAX(taglen, 2)) == 0) { |
| verify_conn_type(val, job_msg.conn_type); |
| if(job_msg.conn_type[0] != (uint16_t)NO_VAL) |
| update_cnt++; |
| } |
| else if (strncasecmp(tag, "Licenses", MAX(taglen, 1)) == 0) { |
| job_msg.licenses = val; |
| update_cnt++; |
| } |
| else if (!strncasecmp(tag, "EligibleTime", MAX(taglen, 2)) || |
| !strncasecmp(tag, "StartTime", MAX(taglen, 2))) { |
| if ((job_msg.begin_time = parse_time(val, 0))) { |
| if (job_msg.begin_time < time(NULL)) |
| job_msg.begin_time = time(NULL); |
| update_cnt++; |
| } |
| } |
| else if (!strncasecmp(tag, "EndTime", MAX(taglen, 2))) { |
| job_msg.end_time = parse_time(val, 0); |
| update_cnt++; |
| } |
| else { |
| exit_code = 1; |
| fprintf (stderr, "Update of this parameter is not " |
| "supported: %s\n", argv[i]); |
| fprintf (stderr, "Request aborted\n"); |
| return 0; |
| } |
| } |
| |
| if (update_cnt == 0) { |
| exit_code = 1; |
| fprintf (stderr, "No changes specified\n"); |
| return 0; |
| } |
| |
| if (slurm_update_job(&job_msg)) |
| return slurm_get_errno (); |
| |
| if (update_size) |
| _update_job_size(job_msg.job_id); |
| |
| return SLURM_SUCCESS; |
| } |
| |
| /* |
| * Send message to stdout of specified job |
| * argv[0] == jobid |
| * argv[1]++ the message |
| */ |
| extern int |
| scontrol_job_notify(int argc, char *argv[]) |
| { |
| int i; |
| uint32_t job_id; |
| char *message = NULL; |
| |
| job_id = atoi(argv[0]); |
| if (job_id <= 0) { |
| fprintf(stderr, "Invalid job_id %s", argv[0]); |
| return 1; |
| } |
| |
| for (i=1; i<argc; i++) { |
| if (message) |
| xstrfmtcat(message, " %s", argv[i]); |
| else |
| xstrcat(message, argv[i]); |
| } |
| |
| i = slurm_notify_job(job_id, message); |
| xfree(message); |
| |
| if (i) |
| return slurm_get_errno (); |
| else |
| return 0; |
| } |
| |
| static void _update_job_size(uint32_t job_id) |
| { |
| resource_allocation_response_msg_t *alloc_info; |
| char *fname_csh = NULL, *fname_sh = NULL; |
| FILE *resize_csh = NULL, *resize_sh = NULL; |
| |
| if (!getenv("SLURM_JOBID")) |
| return; /*No job environment here to update */ |
| |
| if (slurm_allocation_lookup_lite(job_id, &alloc_info) != |
| SLURM_SUCCESS) { |
| slurm_perror("slurm_allocation_lookup_lite"); |
| return; |
| } |
| |
| xstrfmtcat(fname_csh, "slurm_job_%u_resize.csh", job_id); |
| xstrfmtcat(fname_sh, "slurm_job_%u_resize.sh", job_id); |
| (void) unlink(fname_csh); |
| (void) unlink(fname_sh); |
| if (!(resize_csh = fopen(fname_csh, "w"))) { |
| fprintf(stderr, "Could not create file %s: %s\n", fname_csh, |
| strerror(errno)); |
| goto fini; |
| } |
| if (!(resize_sh = fopen(fname_sh, "w"))) { |
| fprintf(stderr, "Could not create file %s: %s\n", fname_sh, |
| strerror(errno)); |
| goto fini; |
| } |
| chmod(fname_csh, 0700); /* Make file executable */ |
| chmod(fname_sh, 0700); |
| |
| if (getenv("SLURM_NODELIST")) { |
| fprintf(resize_sh, "export SLURM_NODELIST=\"%s\"\n", |
| alloc_info->node_list); |
| fprintf(resize_csh, "setenv SLURM_NODELIST \"%s\"\n", |
| alloc_info->node_list); |
| } |
| if (getenv("SLURM_JOB_NODELIST")) { |
| fprintf(resize_sh, "export SLURM_JOB_NODELIST=\"%s\"\n", |
| alloc_info->node_list); |
| fprintf(resize_csh, "setenv SLURM_JOB_NODELIST \"%s\"\n", |
| alloc_info->node_list); |
| } |
| if (getenv("SLURM_NNODES")) { |
| fprintf(resize_sh, "export SLURM_NNODES=%u\n", |
| alloc_info->node_cnt); |
| fprintf(resize_csh, "setenv SLURM_NNODES %u\n", |
| alloc_info->node_cnt); |
| } |
| if (getenv("SLURM_JOB_NUM_NODES")) { |
| fprintf(resize_sh, "export SLURM_JOB_NUM_NODES=%u\n", |
| alloc_info->node_cnt); |
| fprintf(resize_csh, "setenv SLURM_JOB_NUM_NODES %u\n", |
| alloc_info->node_cnt); |
| } |
| if (getenv("SLURM_JOB_CPUS_PER_NODE")) { |
| char *tmp; |
| tmp = uint32_compressed_to_str(alloc_info->num_cpu_groups, |
| alloc_info->cpus_per_node, |
| alloc_info->cpu_count_reps); |
| fprintf(resize_sh, "export SLURM_JOB_CPUS_PER_NODE=\"%s\"\n", |
| tmp); |
| fprintf(resize_csh, "setenv SLURM_JOB_CPUS_PER_NODE \"%s\"\n", |
| tmp); |
| xfree(tmp); |
| } |
| if (getenv("SLURM_TASKS_PER_NODE")) { |
| /* We don't have sufficient information to recreate this */ |
| fprintf(resize_sh, "unset SLURM_TASKS_PER_NODE\n"); |
| fprintf(resize_csh, "unsetenv SLURM_TASKS_PER_NODE\n"); |
| } |
| |
| printf("To reset SLURM environment variables, execute\n"); |
| printf(" For bash or sh shells: . ./%s\n", fname_sh); |
| printf(" For csh shells: source ./%s\n", fname_csh); |
| |
| fini: slurm_free_resource_allocation_response_msg(alloc_info); |
| xfree(fname_csh); |
| xfree(fname_sh); |
| if (resize_csh) |
| fclose(resize_csh); |
| if (resize_sh) |
| fclose(resize_sh); |
| } |