|  | /****************************************************************************\ | 
|  | *  srun_job.c - job data structure creation functions | 
|  | ***************************************************************************** | 
|  | *  Copyright (C) 2002-2007 The Regents of the University of California. | 
|  | *  Copyright (C) 2008 Lawrence Livermore National Security. | 
|  | *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). | 
|  | *  Written by Mark Grondona <grondona@llnl.gov>. | 
|  | *  CODE-OCEC-09-009. All rights reserved. | 
|  | * | 
|  | *  This file is part of Slurm, a resource management program. | 
|  | *  For details, see <https://slurm.schedmd.com/>. | 
|  | *  Please also read the included file: DISCLAIMER. | 
|  | * | 
|  | *  Slurm is free software; you can redistribute it and/or modify it under | 
|  | *  the terms of the GNU General Public License as published by the Free | 
|  | *  Software Foundation; either version 2 of the License, or (at your option) | 
|  | *  any later version. | 
|  | * | 
|  | *  In addition, as a special exception, the copyright holders give permission | 
|  | *  to link the code of portions of this program with the OpenSSL library under | 
|  | *  certain conditions as described in each individual source file, and | 
|  | *  distribute linked combinations including the two. You must obey the GNU | 
|  | *  General Public License in all respects for all of the code used other than | 
|  | *  OpenSSL. If you modify file(s) with this exception, you may extend this | 
|  | *  exception to your version of the file(s), but you are not obligated to do | 
|  | *  so. If you do not wish to do so, delete this exception statement from your | 
|  | *  version.  If you delete this exception statement from all source files in | 
|  | *  the program, then also delete it here. | 
|  | * | 
|  | *  Slurm is distributed in the hope that it will be useful, but WITHOUT ANY | 
|  | *  WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS | 
|  | *  FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more | 
|  | *  details. | 
|  | * | 
|  | *  You should have received a copy of the GNU General Public License along | 
|  | *  with Slurm; if not, write to the Free Software Foundation, Inc., | 
|  | *  51 Franklin Street, Fifth Floor, Boston, MA 02110-1301  USA. | 
|  | \*****************************************************************************/ | 
|  |  | 
|  | #include "config.h" | 
|  |  | 
|  | #include <fcntl.h> | 
|  | #include <grp.h> | 
|  | #include <limits.h> | 
|  | #include <netdb.h> | 
|  | #include <signal.h> | 
|  | #include <stdlib.h> | 
|  | #include <string.h> | 
|  | #include <sys/resource.h> | 
|  | #include <sys/stat.h> | 
|  | #include <sys/types.h> | 
|  | #include <sys/wait.h> | 
|  | #include <unistd.h> | 
|  |  | 
|  | #include "src/common/bitstring.h" | 
|  | #include "src/interfaces/cli_filter.h" | 
|  | #include "src/common/cbuf.h" | 
|  | #include "src/common/fd.h" | 
|  | #include "src/common/forward.h" | 
|  | #include "src/common/hostlist.h" | 
|  | #include "src/common/log.h" | 
|  | #include "src/common/macros.h" | 
|  | #include "src/common/proc_args.h" | 
|  | #include "src/common/read_config.h" | 
|  | #include "src/common/slurm_opt.h" | 
|  | #include "src/common/slurm_protocol_api.h" | 
|  | #include "src/common/slurm_rlimits_info.h" | 
|  | #include "src/common/spank.h" | 
|  | #include "src/common/uid.h" | 
|  | #include "src/common/xmalloc.h" | 
|  | #include "src/common/xsignal.h" | 
|  | #include "src/common/xstring.h" | 
|  |  | 
|  | #include "src/api/step_launch.h" | 
|  |  | 
|  | #include "src/srun/allocate.h" | 
|  | #include "src/srun/debugger.h" | 
|  | #include "src/srun/fname.h" | 
|  | #include "src/srun/launch.h" | 
|  | #include "src/srun/opt.h" | 
|  | #include "src/srun/multi_prog.h" | 
|  | #include "src/srun/srun_job.h" | 
|  |  | 
|  | /* | 
|  | * allocation information structure used to store general information | 
|  | * about node allocation to be passed to _job_create_structure() | 
|  | */ | 
|  | typedef struct allocation_info { | 
|  | uint16_t               *cpus_per_node; | 
|  | uint32_t               *cpu_count_reps; | 
|  | uint32_t                nnodes; | 
|  | char                   *nodelist; | 
|  | uint16_t ntasks_per_board;/* number of tasks to invoke on each board */ | 
|  | uint16_t ntasks_per_core; /* number of tasks to invoke on each core */ | 
|  | uint16_t ntasks_per_tres; /* number of tasks that can access each gpu */ | 
|  | uint16_t ntasks_per_socket;/* number of tasks to invoke on | 
|  | * each socket */ | 
|  | uint32_t                num_cpu_groups; | 
|  | char                   *partition; | 
|  | slurm_step_id_t         step_id; | 
|  | uid_t uid; /* resolved user id of job */ | 
|  | char *user_name; /* resolved user name of job */ | 
|  | gid_t gid; /* resolved group id of job */ | 
|  | char *group_name; /* resolved group name of job */ | 
|  | } allocation_info_t; | 
|  |  | 
|  | typedef struct het_job_resp_struct { | 
|  | uint16_t *cpu_cnt; | 
|  | hostlist_t *host_list; | 
|  | uint32_t node_cnt; | 
|  | } het_job_resp_struct_t; | 
|  |  | 
|  |  | 
|  | static int shepherd_fd = -1; | 
|  | static pthread_t signal_thread = (pthread_t) 0; | 
|  | static int pty_sigarray[] = { SIGWINCH, 0 }; | 
|  |  | 
|  | extern char **environ; | 
|  |  | 
|  | /* | 
|  | * Prototypes: | 
|  | */ | 
|  |  | 
|  | static void _call_spank_fini(void); | 
|  | static int  _call_spank_local_user(srun_job_t *job, slurm_opt_t *opt_local); | 
|  | static long _diff_tv_str(struct timeval *tv1, struct timeval *tv2); | 
|  | static void _handle_intr(srun_job_t *job); | 
|  | static void _handle_pipe(void); | 
|  | static srun_job_t *_job_create_structure(allocation_info_t *ainfo, | 
|  | slurm_opt_t *opt_local); | 
|  | static char *_normalize_hostlist(const char *hostlist); | 
|  | static void _print_job_information(resource_allocation_response_msg_t *resp); | 
|  | static void _run_srun_epilog (srun_job_t *job); | 
|  | static void _run_srun_prolog (srun_job_t *job); | 
|  | static int  _run_srun_script (srun_job_t *job, char *script); | 
|  | static void _set_env_vars(resource_allocation_response_msg_t *resp, | 
|  | int het_job_offset); | 
|  | static void _set_env_vars2(resource_allocation_response_msg_t *resp, | 
|  | int het_job_offset); | 
|  | static void _set_ntasks(allocation_info_t *ai, slurm_opt_t *opt_local); | 
|  | static int  _set_rlimit_env(void); | 
|  | static void _set_submit_dir_env(void); | 
|  | static int  _set_umask_env(void); | 
|  | static void _shepherd_notify(int shepherd_fd); | 
|  | static int _shepherd_spawn(srun_job_t *job, list_t *srun_job_list, | 
|  | bool got_alloc); | 
|  | static void *_srun_signal_mgr(void *no_data); | 
|  | static void _srun_cli_filter_post_submit(uint32_t jobid, uint32_t stepid); | 
|  | static int  _validate_relative(resource_allocation_response_msg_t *resp, | 
|  | slurm_opt_t *opt_local); | 
|  |  | 
|  |  | 
|  | /* | 
|  | * Create an srun job structure w/out an allocation response msg. | 
|  | * (i.e. use the command line options) | 
|  | */ | 
|  | srun_job_t * | 
|  | job_create_noalloc(void) | 
|  | { | 
|  | srun_job_t *job = NULL; | 
|  | allocation_info_t *ai = xmalloc(sizeof(allocation_info_t)); | 
|  | uint16_t cpn[1]; | 
|  | uint32_t cpu_count_reps[1]; | 
|  | slurm_opt_t *opt_local = &opt; | 
|  | hostlist_t *hl = hostlist_create(opt_local->nodelist); | 
|  |  | 
|  | if (!hl) { | 
|  | error("Invalid node list `%s' specified", opt_local->nodelist); | 
|  | goto error; | 
|  | } | 
|  | srand48(getpid()); | 
|  | ai->step_id.job_id = MIN_NOALLOC_JOBID + | 
|  | ((uint32_t) lrand48() % | 
|  | (MAX_NOALLOC_JOBID - MIN_NOALLOC_JOBID + 1)); | 
|  | ai->step_id.step_id = (uint32_t) (lrand48()); | 
|  | ai->step_id.step_het_comp = NO_VAL; | 
|  | ai->nodelist       = opt_local->nodelist; | 
|  | ai->nnodes         = hostlist_count(hl); | 
|  | ai->uid = getuid(); | 
|  | ai->user_name = uid_to_string_or_null(ai->uid); | 
|  | ai->gid = getgid(); | 
|  | ai->group_name = gid_to_string_or_null(ai->gid); | 
|  |  | 
|  | hostlist_destroy(hl); | 
|  |  | 
|  | cpn[0] = ROUNDUP(opt_local->ntasks, ai->nnodes); | 
|  | ai->cpus_per_node  = cpn; | 
|  | cpu_count_reps[0] = ai->nnodes; | 
|  | ai->cpu_count_reps = cpu_count_reps; | 
|  | ai->num_cpu_groups = 1; | 
|  |  | 
|  | /* | 
|  | * Create job, then fill in host addresses | 
|  | */ | 
|  | job = _job_create_structure(ai, opt_local); | 
|  |  | 
|  | if (job != NULL) | 
|  | job_update_io_fnames(job, opt_local); | 
|  | if (job && (job->ntasks == NO_VAL)) { | 
|  | job->ntasks = ai->nnodes; | 
|  | job->cpu_count = opt_local->cpus_per_task * job->ntasks; | 
|  | } | 
|  |  | 
|  | error: | 
|  | xfree(ai); | 
|  | return (job); | 
|  |  | 
|  | } | 
|  |  | 
|  | static void _set_min_node_count(allocation_info_t *ai, | 
|  | resource_allocation_response_msg_t *resp, | 
|  | slurm_opt_t *opt_local) | 
|  | { | 
|  | int num_tasks; | 
|  |  | 
|  | if (opt_local->nodes_set) | 
|  | return; | 
|  |  | 
|  | opt_local->nodes_set = true; | 
|  |  | 
|  | if (!local_het_step) { | 
|  | /* | 
|  | * we don't want to set the number of nodes = | 
|  | * to the number of requested processes unless we | 
|  | * know it is less than the number of nodes | 
|  | * in the allocation | 
|  | */ | 
|  | if (opt_local->ntasks_set && | 
|  | (opt_local->ntasks < ai->nnodes)) | 
|  | opt_local->min_nodes = opt_local->ntasks; | 
|  | else | 
|  | opt_local->min_nodes = ai->nnodes; | 
|  | return; | 
|  | } | 
|  |  | 
|  | /* | 
|  | * Here we want to try to figure out what the minimum amount of nodes | 
|  | * should be needed to put this step into the allocation. | 
|  | */ | 
|  | num_tasks = 0; | 
|  | opt_local->min_nodes = 0; | 
|  | for (int i = 0; ((i < resp->num_cpu_groups) && | 
|  | (opt_local->min_nodes < resp->node_cnt)); | 
|  | i++) { | 
|  | for (int j = 0; j < resp->cpu_count_reps[i]; j++) { | 
|  | /* | 
|  | * Given this node, figure out how many tasks could fit | 
|  | * on it. | 
|  | */ | 
|  | int ntasks_per_node = resp->cpus_per_node[i]; | 
|  |  | 
|  | if (opt_local->cpus_per_task) | 
|  | ntasks_per_node /= | 
|  | opt_local->cpus_per_task; | 
|  |  | 
|  | if ((opt_local->ntasks_per_node != NO_VAL) && | 
|  | (ntasks_per_node >= opt_local->ntasks_per_node)) | 
|  | ntasks_per_node = opt_local->ntasks_per_node; | 
|  |  | 
|  | /* Then add it to the total task count */ | 
|  | num_tasks += ntasks_per_node; | 
|  |  | 
|  | opt_local->min_nodes++; | 
|  | if (num_tasks >= opt_local->ntasks) | 
|  | return; | 
|  | } | 
|  | } | 
|  | } | 
|  |  | 
|  | /* | 
|  | * Create an srun job structure for a step w/out an allocation response msg. | 
|  | * (i.e. inside an allocation) | 
|  | */ | 
|  | extern srun_job_t *job_step_create_allocation( | 
|  | resource_allocation_response_msg_t *resp, | 
|  | slurm_opt_t *opt_local) | 
|  | { | 
|  | srun_opt_t *srun_opt = opt_local->srun_opt; | 
|  | uint32_t job_id = resp->job_id; | 
|  | srun_job_t *job = NULL; | 
|  | allocation_info_t *ai = xmalloc(sizeof(allocation_info_t)); | 
|  | hostlist_t *hl = NULL; | 
|  | char *buf = NULL; | 
|  | int count = 0; | 
|  | uint32_t alloc_count = 0; | 
|  | char *step_nodelist = NULL; | 
|  | xassert(srun_opt); | 
|  |  | 
|  | ai->step_id.job_id          = job_id; | 
|  | ai->step_id.step_id         = NO_VAL; | 
|  | ai->step_id.step_het_comp = NO_VAL; | 
|  | if (srun_opt->alloc_nodelist) | 
|  | ai->nodelist = xstrdup(srun_opt->alloc_nodelist); | 
|  | else | 
|  | ai->nodelist = xstrdup(resp->node_list); | 
|  |  | 
|  | hl = hostlist_create(ai->nodelist); | 
|  | hostlist_uniq(hl); | 
|  | alloc_count = hostlist_count(hl); | 
|  | ai->nnodes = alloc_count; | 
|  | hostlist_destroy(hl); | 
|  |  | 
|  | /* exclude is handled elsewhere for het steps */ | 
|  | if (!local_het_step && opt_local->exclude) { | 
|  | hostlist_t *exc_hl = hostlist_create(opt_local->exclude); | 
|  | hostlist_t *inc_hl = NULL; | 
|  | char *node_name = NULL; | 
|  |  | 
|  | hl = hostlist_create(ai->nodelist); | 
|  | if (opt_local->nodelist) | 
|  | inc_hl = hostlist_create(opt_local->nodelist); | 
|  | hostlist_uniq(hl); | 
|  | //info("using %s or %s", opt_local->nodelist, ai->nodelist); | 
|  | while ((node_name = hostlist_shift(exc_hl))) { | 
|  | int inx = hostlist_find(hl, node_name); | 
|  | if (inx >= 0) { | 
|  | debug("excluding node %s", node_name); | 
|  | hostlist_delete_nth(hl, inx); | 
|  | ai->nnodes--;	/* decrement node count */ | 
|  | } | 
|  | if (inc_hl) { | 
|  | inx = hostlist_find(inc_hl, node_name); | 
|  | if (inx >= 0) { | 
|  | error("Requested node %s is also " | 
|  | "in the excluded list.", | 
|  | node_name); | 
|  | error("Job not submitted."); | 
|  | hostlist_destroy(exc_hl); | 
|  | hostlist_destroy(inc_hl); | 
|  | goto error; | 
|  | } | 
|  | } | 
|  | free(node_name); | 
|  | } | 
|  | hostlist_destroy(exc_hl); | 
|  |  | 
|  | /* we need to set this here so if there are more nodes | 
|  | * available than we requested we can set it | 
|  | * straight. If there is no exclude list then we set | 
|  | * the vars then. | 
|  | */ | 
|  | if (!opt_local->nodes_set) { | 
|  | /* we don't want to set the number of nodes = | 
|  | * to the number of requested processes unless we | 
|  | * know it is less than the number of nodes | 
|  | * in the allocation | 
|  | */ | 
|  | if (opt_local->ntasks_set && | 
|  | (opt_local->ntasks < ai->nnodes)) | 
|  | opt_local->min_nodes = opt_local->ntasks; | 
|  | else | 
|  | opt_local->min_nodes = ai->nnodes; | 
|  | opt_local->nodes_set = true; | 
|  | } | 
|  | if (!opt_local->max_nodes) | 
|  | opt_local->max_nodes = opt_local->min_nodes; | 
|  | if ((opt_local->max_nodes > 0) && | 
|  | (opt_local->max_nodes < ai->nnodes)) | 
|  | ai->nnodes = opt_local->max_nodes; | 
|  |  | 
|  | count = hostlist_count(hl); | 
|  | if (!count) { | 
|  | error("Hostlist is empty!  Can't run job."); | 
|  | hostlist_destroy(hl); | 
|  | goto error; | 
|  | } | 
|  | if (inc_hl) { | 
|  | count = hostlist_count(inc_hl); | 
|  | if (count < ai->nnodes) { | 
|  | /* add more nodes to get correct number for | 
|  | allocation */ | 
|  | hostlist_t *tmp_hl = hostlist_copy(hl); | 
|  | int i = 0; | 
|  | int diff = ai->nnodes - count; | 
|  | buf = hostlist_ranged_string_xmalloc(inc_hl); | 
|  | hostlist_delete(tmp_hl, buf); | 
|  | xfree(buf); | 
|  | while ((i < diff) && | 
|  | (node_name = hostlist_shift(tmp_hl))) { | 
|  | hostlist_push_host(inc_hl, node_name); | 
|  | free(node_name); | 
|  | i++; | 
|  | } | 
|  | hostlist_destroy(tmp_hl); | 
|  | } | 
|  | buf = hostlist_ranged_string_xmalloc(inc_hl); | 
|  | hostlist_destroy(inc_hl); | 
|  | xfree(opt_local->nodelist); | 
|  | opt_local->nodelist = buf; | 
|  | } else { | 
|  | /* remove more nodes than needed for allocation */ | 
|  | for (int i = count; i > ai->nnodes; i--) { | 
|  | hostlist_delete_nth(hl, i - 1); | 
|  | } | 
|  | xfree(opt_local->nodelist); | 
|  | opt_local->nodelist = hostlist_ranged_string_xmalloc(hl); | 
|  | } | 
|  |  | 
|  | hostlist_destroy(hl); | 
|  | } else { | 
|  | _set_min_node_count(ai, resp, opt_local); | 
|  |  | 
|  | if (!opt_local->max_nodes) | 
|  | opt_local->max_nodes = opt_local->min_nodes; | 
|  | if ((opt_local->max_nodes > 0) && | 
|  | (opt_local->max_nodes < ai->nnodes)) | 
|  | ai->nnodes = opt_local->max_nodes; | 
|  | /* Don't reset the ai->nodelist because that is the | 
|  | * nodelist we want to say the allocation is under | 
|  | * opt_local->nodelist is what is used for the allocation. | 
|  | */ | 
|  | /* xfree(ai->nodelist); */ | 
|  | /* ai->nodelist = xstrdup(buf); */ | 
|  | } | 
|  |  | 
|  | /* get the correct number of hosts to run tasks on */ | 
|  | if (opt_local->nodelist) | 
|  | step_nodelist = opt_local->nodelist; | 
|  | else if (((opt_local->distribution & SLURM_DIST_STATE_BASE) == | 
|  | SLURM_DIST_ARBITRARY) && (count == 0)) | 
|  | step_nodelist = getenv("SLURM_ARBITRARY_NODELIST"); | 
|  | if (step_nodelist) { | 
|  | hl = hostlist_create(step_nodelist); | 
|  | if ((opt_local->distribution & SLURM_DIST_STATE_BASE) != | 
|  | SLURM_DIST_ARBITRARY) | 
|  | hostlist_uniq(hl); | 
|  | if (!hostlist_count(hl)) { | 
|  | error("Hostlist is empty!  Can not run job."); | 
|  | hostlist_destroy(hl); | 
|  | goto error; | 
|  | } | 
|  |  | 
|  | buf = hostlist_ranged_string_xmalloc(hl); | 
|  | count = hostlist_count(hl); | 
|  | hostlist_destroy(hl); | 
|  | /* | 
|  | * Don't reset the ai->nodelist because that is the | 
|  | * nodelist we want to say the allocation is under | 
|  | * opt_local->nodelist is what is used for the allocation. | 
|  | */ | 
|  | /* xfree(ai->nodelist); */ | 
|  | /* ai->nodelist = xstrdup(buf); */ | 
|  | xfree(opt_local->nodelist); | 
|  | opt_local->nodelist = buf; | 
|  | } | 
|  |  | 
|  | if (((opt_local->distribution & SLURM_DIST_STATE_BASE) == | 
|  | SLURM_DIST_ARBITRARY) && (count != opt_local->ntasks)) { | 
|  | error("You asked for %d tasks but hostlist specified %d nodes", | 
|  | opt_local->ntasks, count); | 
|  | goto error; | 
|  | } | 
|  |  | 
|  | if (ai->nnodes == 0) { | 
|  | error("No nodes in allocation, can't run job"); | 
|  | goto error; | 
|  | } | 
|  |  | 
|  | ai->num_cpu_groups = resp->num_cpu_groups; | 
|  | ai->cpus_per_node  = resp->cpus_per_node; | 
|  | ai->cpu_count_reps = resp->cpu_count_reps; | 
|  | ai->ntasks_per_board = resp->ntasks_per_board; | 
|  |  | 
|  | /* Here let the srun options override the allocation resp */ | 
|  | ai->ntasks_per_core = (opt_local->ntasks_per_core != NO_VAL) ? | 
|  | opt_local->ntasks_per_core : resp->ntasks_per_core; | 
|  | ai->ntasks_per_socket = (opt_local->ntasks_per_socket != NO_VAL) ? | 
|  | opt_local->ntasks_per_socket : resp->ntasks_per_socket; | 
|  |  | 
|  | ai->partition = resp->partition; | 
|  |  | 
|  | /* 	info("looking for %d nodes out of %s with a must list of %s", */ | 
|  | /* 	     ai->nnodes, ai->nodelist, opt_local->nodelist); */ | 
|  | /* | 
|  | * Create job | 
|  | */ | 
|  | job = _job_create_structure(ai, opt_local); | 
|  | error: | 
|  | xfree(ai->nodelist); | 
|  | xfree(ai); | 
|  | return (job); | 
|  |  | 
|  | } | 
|  |  | 
|  | /* | 
|  | * Create an srun job structure from a resource allocation response msg | 
|  | */ | 
|  | extern srun_job_t *job_create_allocation( | 
|  | resource_allocation_response_msg_t *resp, | 
|  | slurm_opt_t *opt_local) | 
|  | { | 
|  | srun_job_t *job; | 
|  | allocation_info_t *i = xmalloc(sizeof(allocation_info_t)); | 
|  |  | 
|  | i->nodelist       = _normalize_hostlist(resp->node_list); | 
|  | i->nnodes	  = resp->node_cnt; | 
|  | i->partition      = resp->partition; | 
|  | i->step_id.job_id          = resp->job_id; | 
|  | i->step_id.step_id         = NO_VAL; | 
|  | i->step_id.step_het_comp = NO_VAL; | 
|  | i->num_cpu_groups = resp->num_cpu_groups; | 
|  | i->cpus_per_node  = resp->cpus_per_node; | 
|  | i->cpu_count_reps = resp->cpu_count_reps; | 
|  | i->ntasks_per_board = resp->ntasks_per_board; | 
|  | i->ntasks_per_core = resp->ntasks_per_core; | 
|  | i->ntasks_per_socket = resp->ntasks_per_socket; | 
|  |  | 
|  | i->uid = resp->uid; | 
|  | i->user_name = xstrdup(resp->user_name); | 
|  | i->gid = resp->gid; | 
|  | i->group_name = xstrdup(resp->group_name); | 
|  |  | 
|  | job = _job_create_structure(i, opt_local); | 
|  | if (job) { | 
|  | job->account = xstrdup(resp->account); | 
|  | job->qos = xstrdup(resp->qos); | 
|  | job->resv_name = xstrdup(resp->resv_name); | 
|  | } | 
|  |  | 
|  | xfree(i->nodelist); | 
|  | xfree(i); | 
|  |  | 
|  | return (job); | 
|  | } | 
|  |  | 
|  | static void _copy_args(list_t *missing_argc_list, slurm_opt_t *opt_master) | 
|  | { | 
|  | list_itr_t *iter; | 
|  | slurm_opt_t *opt_local; | 
|  | int i; | 
|  |  | 
|  | iter = list_iterator_create(missing_argc_list); | 
|  | while ((opt_local = list_next(iter))) { | 
|  | opt_local->argc = opt_master->argc; | 
|  | opt_local->argv = xcalloc(sizeof(char *), | 
|  | (opt_local->argc + 1)); | 
|  | for (i = 0; i < opt_local->argc; i++) | 
|  | opt_local->argv[i] = xstrdup(opt_master->argv[i]); | 
|  | list_remove(iter); | 
|  | } | 
|  | list_iterator_destroy(iter); | 
|  | } | 
|  |  | 
|  | /* | 
|  | * Build "het_group" string. If set on execute line, it may need to be | 
|  | * rebuilt for multiple option structures ("--het-group=1,2" becomes two | 
|  | * opt structures). Clear "het_grp_bits".if determined to not be a hetjob. | 
|  | */ | 
|  | static void _het_grp_test(list_t *opt_list) | 
|  | { | 
|  | list_itr_t *iter; | 
|  | int het_job_offset; | 
|  | bitstr_t *master_map = NULL; | 
|  | list_t *missing_argv_list = NULL; | 
|  | bool multi_comp = false, multi_prog = false; | 
|  |  | 
|  | if (opt_list) { | 
|  | slurm_opt_t *opt_local; | 
|  | missing_argv_list = list_create(NULL); | 
|  | iter = list_iterator_create(opt_list); | 
|  | while ((opt_local = list_next(iter))) { | 
|  | srun_opt_t *srun_opt = opt_local->srun_opt; | 
|  | xassert(srun_opt); | 
|  | if (opt_local->argc == 0) | 
|  | list_append(missing_argv_list, opt_local); | 
|  | else | 
|  | _copy_args(missing_argv_list, opt_local); | 
|  | xfree(srun_opt->het_group); | 
|  | if (srun_opt->het_grp_bits && | 
|  | ((het_job_offset = | 
|  | bit_ffs(srun_opt->het_grp_bits)) >= 0)) { | 
|  | xstrfmtcat(srun_opt->het_group, "%d", | 
|  | het_job_offset); | 
|  | } | 
|  | if (!srun_opt->het_grp_bits) { | 
|  | error("%s: het_grp_bits is NULL", __func__); | 
|  | } else if (!master_map) { | 
|  | master_map | 
|  | = bit_copy(srun_opt->het_grp_bits); | 
|  | } else { | 
|  | if (bit_overlap_any(master_map, | 
|  | srun_opt->het_grp_bits)) { | 
|  | fatal("Duplicate het groups in single srun not supported"); | 
|  | } | 
|  | bit_or(master_map, srun_opt->het_grp_bits); | 
|  | } | 
|  | if (srun_opt->multi_prog) | 
|  | multi_prog = true; | 
|  | } | 
|  | if (master_map && (bit_set_count(master_map) > 1)) | 
|  | multi_comp = true; | 
|  | FREE_NULL_BITMAP(master_map); | 
|  | list_iterator_destroy(iter); | 
|  | FREE_NULL_LIST(missing_argv_list); | 
|  | } else if (!sropt.het_group && !getenv("SLURM_HET_SIZE")) { | 
|  | FREE_NULL_BITMAP(sropt.het_grp_bits); | 
|  | /* het_group is already NULL */ | 
|  | } else if (!sropt.het_group && sropt.het_grp_bits) { | 
|  | if ((het_job_offset = bit_ffs(sropt.het_grp_bits)) < 0) | 
|  | het_job_offset = 0; | 
|  | else if (bit_set_count(sropt.het_grp_bits) > 1) | 
|  | multi_comp = true; | 
|  | if (sropt.multi_prog) | 
|  | multi_prog = true; | 
|  | xstrfmtcat(sropt.het_group, "%d", het_job_offset); | 
|  | } | 
|  |  | 
|  | if (multi_comp && multi_prog) | 
|  | fatal("--multi-prog option not supported with multiple het groups"); | 
|  | } | 
|  |  | 
|  | /* | 
|  | * Copy job name from last component to all hetjob components unless | 
|  | * explicitly set. | 
|  | */ | 
|  | static void _match_job_name(list_t *opt_list) | 
|  | { | 
|  | int cnt; | 
|  | list_itr_t *iter; | 
|  | slurm_opt_t *opt_local; | 
|  |  | 
|  | if (!opt_list) | 
|  | return; | 
|  |  | 
|  | cnt = list_count(opt_list); | 
|  | if (cnt < 2) | 
|  | return; | 
|  |  | 
|  | iter = list_iterator_create(opt_list); | 
|  | while ((opt_local = list_next(iter))) { | 
|  | if (!opt_local->job_name) | 
|  | opt_local->job_name = xstrdup(opt.job_name); | 
|  | if (opt_local->open_mode == 0) { | 
|  | opt_local->open_mode = OPEN_MODE_APPEND; | 
|  | } | 
|  | } | 
|  | list_iterator_destroy(iter); | 
|  | } | 
|  |  | 
|  | static int _sort_by_offset(void *x, void *y) | 
|  | { | 
|  | slurm_opt_t *opt_local1 = *(slurm_opt_t **) x; | 
|  | slurm_opt_t *opt_local2 = *(slurm_opt_t **) y; | 
|  | int offset1 = -1, offset2 = -1; | 
|  |  | 
|  | if (opt_local1->srun_opt->het_grp_bits) | 
|  | offset1 = bit_ffs(opt_local1->srun_opt->het_grp_bits); | 
|  | if (opt_local2->srun_opt->het_grp_bits) | 
|  | offset2 = bit_ffs(opt_local2->srun_opt->het_grp_bits); | 
|  | if (offset1 < offset2) | 
|  | return -1; | 
|  | if (offset1 > offset2) | 
|  | return 1; | 
|  | return 0; | 
|  | } | 
|  |  | 
|  | static void _post_opts(list_t *opt_list) | 
|  | { | 
|  | _het_grp_test(opt_list); | 
|  | _match_job_name(opt_list); | 
|  | if (opt_list) | 
|  | list_sort(opt_list, _sort_by_offset); | 
|  | } | 
|  |  | 
|  | extern void init_srun(int argc, char **argv, log_options_t *logopt, | 
|  | bool handle_signals) | 
|  | { | 
|  | bool het_job_fini = false; | 
|  | int i, het_job_argc, het_job_argc_off; | 
|  | char **het_job_argv; | 
|  |  | 
|  | /* | 
|  | * This must happen before we spawn any threads | 
|  | * which are not designed to handle arbitrary signals | 
|  | */ | 
|  | if (handle_signals) { | 
|  | if (xsignal_block(sig_array) < 0) | 
|  | error("Unable to block signals"); | 
|  | } | 
|  | xsignal_block(pty_sigarray); | 
|  |  | 
|  | /* | 
|  | * Initialize plugin stack, read options from plugins, etc. | 
|  | */ | 
|  | init_spank_env(); | 
|  | if (spank_init(NULL)) { | 
|  | error("Plug-in initialization failed"); | 
|  | exit(error_exit); | 
|  | } | 
|  |  | 
|  | /* | 
|  | * Be sure to call spank_fini when srun exits. | 
|  | */ | 
|  | if (atexit(_call_spank_fini) < 0) | 
|  | error("Failed to register atexit handler for plugins: %m"); | 
|  |  | 
|  | opt.submit_line = slurm_option_get_argv_str(argc, argv); | 
|  |  | 
|  | het_job_argc = argc; | 
|  | het_job_argv = argv; | 
|  | while (!het_job_fini) { | 
|  | het_job_argc_off = -1; | 
|  | if (initialize_and_process_args(het_job_argc, het_job_argv, | 
|  | &het_job_argc_off) < 0) { | 
|  | error("srun parameter parsing"); | 
|  | exit(1); | 
|  | } | 
|  | if ((het_job_argc_off >= 0) && | 
|  | (het_job_argc_off < het_job_argc)) { | 
|  | for (i = het_job_argc_off; i < het_job_argc; i++) { | 
|  | if (!xstrcmp(het_job_argv[i], ":")) { | 
|  | het_job_argc_off = i; | 
|  | break; | 
|  | } | 
|  | } | 
|  | } | 
|  | if ((het_job_argc_off >= 0) && | 
|  | (het_job_argc_off < het_job_argc) && | 
|  | !xstrcmp(het_job_argv[het_job_argc_off], ":")) { | 
|  | /* | 
|  | * move het_job_argv[0] from "srun" to ":" | 
|  | */ | 
|  | het_job_argc -= het_job_argc_off; | 
|  | het_job_argv += het_job_argc_off; | 
|  | colon_cnt++; | 
|  | } else { | 
|  | het_job_fini = true; | 
|  | } | 
|  | } | 
|  |  | 
|  | if (!mpi_g_client_init(&sropt.mpi_type)) { | 
|  | error("Invalid MPI type '%s', --mpi=list for acceptable types", | 
|  | sropt.mpi_type); | 
|  | exit(error_exit); | 
|  | } | 
|  |  | 
|  | _post_opts(opt_list); | 
|  |  | 
|  | /* | 
|  | * reinit log with new verbosity (if changed by command line) | 
|  | */ | 
|  | if (logopt && (opt.verbose || opt.quiet)) { | 
|  | /* | 
|  | * If log level is already increased, only increment the | 
|  | * level to the difference of opt.verbose an LOG_LEVEL_INFO | 
|  | */ | 
|  | if ((opt.verbose -= (logopt->stderr_level - LOG_LEVEL_INFO)) > 0) | 
|  | logopt->stderr_level += opt.verbose; | 
|  | logopt->stderr_level -= opt.quiet; | 
|  | logopt->prefix_level = 1; | 
|  | log_alter(*logopt, 0, NULL); | 
|  | } | 
|  |  | 
|  | (void) _set_rlimit_env(); | 
|  | set_prio_process_env(); | 
|  | (void) _set_umask_env(); | 
|  | _set_submit_dir_env(); | 
|  |  | 
|  | /* | 
|  | * save process startup time to be used with -I<timeout> | 
|  | */ | 
|  | srun_begin_time = time(NULL); | 
|  | } | 
|  |  | 
|  | /* | 
|  | * Modify options for a job step (after job allocation is complete | 
|  | */ | 
|  | static void _set_step_opts(slurm_opt_t *opt_local, | 
|  | resource_allocation_response_msg_t *resp) | 
|  | { | 
|  | srun_opt_t *srun_opt = opt_local->srun_opt; | 
|  | int new_cpt; | 
|  |  | 
|  | xassert(srun_opt); | 
|  |  | 
|  | opt_local->time_limit = NO_VAL;/* not applicable for step, only job */ | 
|  | xfree(opt_local->constraint);	/* not applicable for this step */ | 
|  | if ((srun_opt->core_spec_set || srun_opt->exclusive) | 
|  | && opt_local->cpus_set) { | 
|  | /* Step gets specified CPU count, which may only part | 
|  | * of the job allocation. */ | 
|  | srun_opt->exclusive = true; | 
|  | } else { | 
|  | /* Step gets all CPUs in the job allocation. */ | 
|  | srun_opt->exclusive = false; | 
|  | } | 
|  |  | 
|  | new_cpt = slurm_opt_get_tres_per_task_cpu_cnt(resp->tres_per_task); | 
|  | if (new_cpt) | 
|  | opt_local->cpus_per_task = new_cpt; | 
|  |  | 
|  | if (resp->tres_per_task) { | 
|  | xfree(opt_local->tres_per_task); | 
|  | SWAP(opt_local->tres_per_task, resp->tres_per_task); | 
|  | } | 
|  | } | 
|  |  | 
|  | static int _handle_het_step_exclude(srun_job_t *job, slurm_opt_t *opt_local, | 
|  | hostlist_t *exclude_hl_in) | 
|  | { | 
|  | hostlist_t *exclude_hl, *allocation_hl; | 
|  | int rc = SLURM_SUCCESS; | 
|  |  | 
|  | if (!exclude_hl_in || !hostlist_count(exclude_hl_in)) | 
|  | return rc; | 
|  |  | 
|  | allocation_hl = hostlist_create(job->nodelist); | 
|  | hostlist_uniq(allocation_hl); | 
|  |  | 
|  | exclude_hl = hostlist_copy(exclude_hl_in); | 
|  | hostlist_push(exclude_hl, opt_local->exclude); | 
|  | hostlist_uniq(exclude_hl); | 
|  | hostlist_sort(exclude_hl); | 
|  |  | 
|  | xfree(opt_local->exclude); | 
|  | opt_local->exclude = hostlist_ranged_string_xmalloc(exclude_hl); | 
|  |  | 
|  | if ((hostlist_count(allocation_hl) - hostlist_count(exclude_hl)) < | 
|  | opt_local->min_nodes) { | 
|  | error("Allocation failure of %d nodes: job size of %d, already allocated %d nodes to previous components.", | 
|  | opt_local->min_nodes, hostlist_count(allocation_hl), | 
|  | hostlist_count(exclude_hl)); | 
|  | rc = SLURM_ERROR; | 
|  | goto end_it; | 
|  | } | 
|  |  | 
|  | if (opt_local->nodelist) { | 
|  | char *node_name = NULL; | 
|  | hostlist_t *inc_hl = hostlist_create(opt_local->nodelist); | 
|  | while ((node_name = hostlist_shift(exclude_hl))) { | 
|  | if (hostlist_find(inc_hl, node_name) >= 0) { | 
|  | error("Requested nodelist %s overlaps with excluded %s.", | 
|  | opt_local->nodelist, | 
|  | opt_local->exclude); | 
|  | error("Job not submitted."); | 
|  | rc = SLURM_ERROR; | 
|  | break; | 
|  | } | 
|  | free(node_name); | 
|  | } | 
|  | FREE_NULL_HOSTLIST(inc_hl); | 
|  | } | 
|  | end_it: | 
|  | FREE_NULL_HOSTLIST(allocation_hl); | 
|  | FREE_NULL_HOSTLIST(exclude_hl); | 
|  |  | 
|  | return rc; | 
|  | } | 
|  |  | 
|  | /* | 
|  | * Create the job step(s). For a heterogeneous job, each step is requested in | 
|  | * a separate RPC. create_job_step() references "opt", so we need to match up | 
|  | * the job allocation request with its requested options. | 
|  | */ | 
|  | static int _create_job_step(srun_job_t *job, bool use_all_cpus, | 
|  | list_t *srun_job_list, uint32_t het_job_id, | 
|  | char *het_job_nodelist) | 
|  | { | 
|  | list_itr_t *opt_iter = NULL, *job_iter; | 
|  | slurm_opt_t *opt_local = &opt; | 
|  | uint32_t node_offset = 0, het_job_nnodes = 0, step_id = NO_VAL; | 
|  | uint32_t het_job_ntasks = 0, task_offset = 0; | 
|  | bool update_het_nnodes = false; | 
|  | uint32_t updated_het_nnodes; | 
|  | uint32_t updated_het_ntasks = 0; | 
|  |  | 
|  | job_step_create_response_msg_t *step_resp; | 
|  | char *resv_ports = NULL; | 
|  | int rc = 0; | 
|  |  | 
|  | if (srun_job_list) { | 
|  | hostlist_t *exclude_hl = NULL; | 
|  |  | 
|  | if (local_het_step) | 
|  | exclude_hl = hostlist_create(NULL); | 
|  |  | 
|  | if (opt_list) | 
|  | opt_iter = list_iterator_create(opt_list); | 
|  | job_iter = list_iterator_create(srun_job_list); | 
|  | while ((job = list_next(job_iter))) { | 
|  | if (het_job_id) | 
|  | job->het_job_id = het_job_id; | 
|  | job->step_id.step_id = NO_VAL; | 
|  |  | 
|  | /* | 
|  | * Only set the step_het_comp if we are in a het step | 
|  | * from a single allocation | 
|  | */ | 
|  | if (local_het_step) | 
|  | job->step_id.step_het_comp = | 
|  | job->het_job_offset; | 
|  | else | 
|  | job->step_id.step_het_comp = NO_VAL; | 
|  |  | 
|  | het_job_nnodes += job->nhosts; | 
|  | if (job->ntasks == NO_VAL) | 
|  | het_job_ntasks = NO_VAL; | 
|  | else if (het_job_ntasks != NO_VAL) | 
|  | het_job_ntasks += job->ntasks; | 
|  | } | 
|  |  | 
|  | updated_het_nnodes = het_job_nnodes; | 
|  | list_iterator_reset(job_iter); | 
|  | while ((job = list_next(job_iter))) { | 
|  | uint32_t old_nhosts = job->nhosts; | 
|  | if (opt_list) | 
|  | opt_local = list_next(opt_iter); | 
|  | if (!opt_local) | 
|  | fatal("%s: opt_list too short", __func__); | 
|  | job->het_job_node_offset = node_offset; | 
|  | job->het_job_nnodes = het_job_nnodes; | 
|  | job->het_job_ntasks = het_job_ntasks; | 
|  | job->het_job_task_offset = task_offset; | 
|  | if (step_id != NO_VAL) | 
|  | job->step_id.step_id = step_id; | 
|  |  | 
|  | if ((rc = _handle_het_step_exclude( | 
|  | job, opt_local, exclude_hl)) != | 
|  | SLURM_SUCCESS) | 
|  | break; | 
|  |  | 
|  | rc = create_job_step(job, use_all_cpus, opt_local); | 
|  | if (rc < 0) | 
|  | break; | 
|  | if (step_id == NO_VAL) | 
|  | step_id = job->step_id.step_id; | 
|  | if (exclude_hl) { | 
|  | slurm_step_layout_t *step_layout = | 
|  | launch_common_get_slurm_step_layout( | 
|  | job); | 
|  | hostlist_push(exclude_hl, | 
|  | step_layout->node_list); | 
|  | } | 
|  | step_resp = job->step_ctx->step_resp; | 
|  | if (step_resp && step_resp->resv_ports && | 
|  | strcmp(step_resp->resv_ports, "(null)")) { | 
|  | if (resv_ports) | 
|  | xstrcat(resv_ports, ","); | 
|  | xstrcat(resv_ports, step_resp->resv_ports); | 
|  | } | 
|  | node_offset += job->nhosts; | 
|  | task_offset += job->ntasks; | 
|  |  | 
|  | /* | 
|  | * If packing nodes (SELECT_PACK_NODES, -mpack), the step | 
|  | * may have an updated layout. Need to update each | 
|  | * component's het_job_nnodes with the updated counts. | 
|  | */ | 
|  | if (job->nhosts < old_nhosts) { | 
|  | update_het_nnodes = true; | 
|  | updated_het_nnodes -= old_nhosts - job->nhosts; | 
|  | } | 
|  |  | 
|  | if (het_job_ntasks == NO_VAL) | 
|  | updated_het_ntasks += job->ntasks; | 
|  | } | 
|  |  | 
|  | if (update_het_nnodes) { | 
|  | list_iterator_reset(job_iter); | 
|  | while ((job = list_next(job_iter))) { | 
|  | job->het_job_nnodes = updated_het_nnodes; | 
|  | } | 
|  | } | 
|  | if (updated_het_ntasks) { | 
|  | list_iterator_reset(job_iter); | 
|  | while ((job = list_next(job_iter))) { | 
|  | job->het_job_ntasks = updated_het_ntasks; | 
|  | } | 
|  | } | 
|  |  | 
|  | FREE_NULL_HOSTLIST(exclude_hl); | 
|  |  | 
|  | if (!rc && resv_ports) { | 
|  | /* | 
|  | * Merge numeric values into single range | 
|  | * (e.g. "10-12,13-15,16-18" -> "10-18") | 
|  | */ | 
|  | hostset_t *hs; | 
|  | char *tmp = NULL, *sep; | 
|  | xstrfmtcat(tmp, "[%s]", resv_ports); | 
|  | hs = hostset_create(tmp); | 
|  | hostset_ranged_string(hs, strlen(tmp) + 1, tmp); | 
|  | sep = strchr(tmp, ']'); | 
|  | if (sep) | 
|  | sep[0] = '\0'; | 
|  | xfree(resv_ports); | 
|  | resv_ports = xstrdup(tmp + 1); | 
|  | xfree(tmp); | 
|  | hostset_destroy(hs); | 
|  |  | 
|  | list_iterator_reset(job_iter); | 
|  | while ((job = list_next(job_iter))) { | 
|  | if (!job->step_ctx->step_resp) | 
|  | continue; | 
|  | xfree(job->step_ctx->step_resp->resv_ports); | 
|  | job->step_ctx->step_resp->resv_ports = | 
|  | xstrdup(resv_ports); | 
|  | } | 
|  | } | 
|  | xfree(resv_ports); | 
|  | list_iterator_destroy(job_iter); | 
|  | if (opt_iter) | 
|  | list_iterator_destroy(opt_iter); | 
|  | return rc; | 
|  | } else if (job) { | 
|  | if (het_job_id) { | 
|  | job->het_job_id  = het_job_id; | 
|  | job->het_job_nnodes = job->nhosts; | 
|  | job->het_job_ntasks = job->ntasks; | 
|  | job->het_job_task_offset = 0; | 
|  | } | 
|  | if ((rc = create_job_step(job, use_all_cpus, &opt)) < 0) | 
|  | return rc; | 
|  |  | 
|  | if (het_job_id) { | 
|  | /* | 
|  | * If packing nodes (SELECT_PACK_NODES, -mpack), the step | 
|  | * may have an updated layout. | 
|  | */ | 
|  | job->het_job_nnodes = job->nhosts; | 
|  | /* The stepmgr logic can modify ntasks */ | 
|  | job->het_job_ntasks = job->ntasks; | 
|  | } | 
|  |  | 
|  | return rc; | 
|  | } else { | 
|  | return -1; | 
|  | } | 
|  | } | 
|  |  | 
|  | static void _cancel_steps(list_t *srun_job_list) | 
|  | { | 
|  | srun_job_t *job; | 
|  | list_itr_t *job_iter; | 
|  | slurm_msg_t req; | 
|  | step_complete_msg_t msg; | 
|  | int rc = 0; | 
|  |  | 
|  | if (!srun_job_list) | 
|  | return; | 
|  |  | 
|  | slurm_msg_t_init(&req); | 
|  | req.msg_type = REQUEST_STEP_COMPLETE; | 
|  | req.data = &msg; | 
|  | memset(&msg, 0, sizeof(step_complete_msg_t)); | 
|  | msg.step_rc = 0; | 
|  |  | 
|  | job_iter = list_iterator_create(srun_job_list); | 
|  | while ((job = list_next(job_iter))) { | 
|  | if (job->step_id.step_id == NO_VAL) | 
|  | continue; | 
|  | memcpy(&msg.step_id, &job->step_id, sizeof(msg.step_id)); | 
|  | msg.range_first	= 0; | 
|  | msg.range_last	= job->nhosts - 1; | 
|  | (void) slurm_send_recv_controller_rc_msg(&req, &rc, | 
|  | working_cluster_rec); | 
|  | } | 
|  | list_iterator_destroy(job_iter); | 
|  | } | 
|  |  | 
|  | static void _het_job_struct_del(void *x) | 
|  | { | 
|  | het_job_resp_struct_t *het_job_resp = (het_job_resp_struct_t *) x; | 
|  |  | 
|  | xfree(het_job_resp->cpu_cnt); | 
|  | if (het_job_resp->host_list) | 
|  | hostlist_destroy(het_job_resp->host_list); | 
|  | xfree(het_job_resp); | 
|  | } | 
|  |  | 
|  | static char *_compress_het_job_nodelist(list_t *used_resp_list) | 
|  | { | 
|  | resource_allocation_response_msg_t *resp; | 
|  | het_job_resp_struct_t *het_job_resp; | 
|  | list_t *het_job_resp_list; | 
|  | list_itr_t *resp_iter; | 
|  | char *tmp; | 
|  | char *het_job_nodelist = NULL, *node_name; | 
|  | hostset_t *hs; | 
|  | int cnt, i, j, k; | 
|  | uint16_t *cpus; | 
|  | uint32_t *reps, cpu_inx; | 
|  |  | 
|  | if (!used_resp_list) | 
|  | return het_job_nodelist; | 
|  |  | 
|  | cnt = list_count(used_resp_list); | 
|  | het_job_resp_list = list_create(_het_job_struct_del); | 
|  | hs = hostset_create(""); | 
|  | resp_iter = list_iterator_create(used_resp_list); | 
|  | while ((resp = list_next(resp_iter))) { | 
|  | if (!resp->node_list) | 
|  | continue; | 
|  | hostset_insert(hs, resp->node_list); | 
|  | het_job_resp = xmalloc(sizeof(het_job_resp_struct_t)); | 
|  | het_job_resp->node_cnt = resp->node_cnt; | 
|  | het_job_resp->cpu_cnt = | 
|  | xmalloc(sizeof(uint16_t) * resp->node_cnt); | 
|  | het_job_resp->host_list = hostlist_create(resp->node_list); | 
|  | for (i = 0, k = 0; | 
|  | (i < resp->num_cpu_groups) && (k < resp->node_cnt); i++) { | 
|  | for (j = 0; j < resp->cpu_count_reps[i]; j++) { | 
|  | het_job_resp->cpu_cnt[k++] = | 
|  | resp->cpus_per_node[i]; | 
|  | if (k >= resp->node_cnt) | 
|  | break; | 
|  | } | 
|  | if (k >= resp->node_cnt) | 
|  | break; | 
|  | } | 
|  | list_append(het_job_resp_list, het_job_resp); | 
|  | } | 
|  | list_iterator_destroy(resp_iter); | 
|  |  | 
|  | het_job_nodelist = hostset_ranged_string_xmalloc(hs); | 
|  |  | 
|  | cpu_inx = 0; | 
|  | cnt = hostset_count(hs); | 
|  | cpus = xmalloc(sizeof(uint16_t) * (cnt + 1)); | 
|  | reps = xmalloc(sizeof(uint32_t) * (cnt + 1)); | 
|  | for (i = 0; i < cnt; i++) { | 
|  | node_name = hostset_nth(hs, i); | 
|  | resp_iter = list_iterator_create(het_job_resp_list); | 
|  | while ((het_job_resp = list_next(resp_iter))) { | 
|  | j = hostlist_find(het_job_resp->host_list, node_name); | 
|  | if ((j == -1) || !het_job_resp->cpu_cnt) | 
|  | continue;	/* node not in this hetjob */ | 
|  | if (cpus[cpu_inx] == het_job_resp->cpu_cnt[j]) { | 
|  | reps[cpu_inx]++; | 
|  | } else { | 
|  | if (cpus[cpu_inx] != 0) | 
|  | cpu_inx++; | 
|  | cpus[cpu_inx] = het_job_resp->cpu_cnt[j]; | 
|  | reps[cpu_inx]++; | 
|  | } | 
|  | break; | 
|  | } | 
|  | list_iterator_destroy(resp_iter); | 
|  | free(node_name); | 
|  | } | 
|  |  | 
|  | cpu_inx++; | 
|  | tmp = uint32_compressed_to_str(cpu_inx, cpus, reps); | 
|  | if (setenv("SLURM_JOB_CPUS_PER_NODE", tmp, 1) < 0) { | 
|  | error("%s: Unable to set SLURM_JOB_CPUS_PER_NODE in environment", | 
|  | __func__); | 
|  | } | 
|  | xfree(tmp); | 
|  |  | 
|  | xfree(reps); | 
|  | xfree(cpus); | 
|  | hostset_destroy(hs); | 
|  | FREE_NULL_LIST(het_job_resp_list); | 
|  |  | 
|  | return het_job_nodelist; | 
|  | } | 
|  |  | 
|  | /* | 
|  | * Here we have a regular job allocation, but we are requesting a het step in | 
|  | * that allocation. So here we will copy the resp_list to the number of | 
|  | * components we care about. | 
|  | */ | 
|  | static void _copy_job_resp(list_t *job_resp_list, int count) | 
|  | { | 
|  | resource_allocation_response_msg_t *new, *orig; | 
|  | xassert(job_resp_list); | 
|  | xassert(list_count(job_resp_list) == 1); | 
|  |  | 
|  | orig = list_peek(job_resp_list); | 
|  | for (int i = 0; i < count; i++) { | 
|  | new = slurm_copy_resource_allocation_response_msg(orig); | 
|  | list_append(job_resp_list, new); | 
|  | } | 
|  | } | 
|  | static void _check_gpus_per_socket(slurm_opt_t *opt_local) | 
|  | { | 
|  | static bool checked = false; /* Only log the warning once */ | 
|  |  | 
|  | if (!opt_local->gpus_per_socket || checked) | 
|  | return; | 
|  |  | 
|  | checked = true; | 
|  | if (opt_local->gpus_per_socket && | 
|  | !slurm_option_set_by_env(opt_local, LONG_OPT_GPUS_PER_SOCKET)) { | 
|  | /* | 
|  | * gpus_per_socket does not work for steps. | 
|  | * If it is set by env, it was likely inherited by the job. | 
|  | */ | 
|  | warning("Ignoring --gpus-per-socket because it can only be specified at job allocation time, not during step allocation."); | 
|  | } | 
|  | } | 
|  |  | 
|  | extern void create_srun_job(void **p_job, bool *got_alloc) | 
|  | { | 
|  | resource_allocation_response_msg_t *resp; | 
|  | list_t *job_resp_list = NULL, *srun_job_list = NULL; | 
|  | list_t *used_resp_list = NULL; | 
|  | list_itr_t *opt_iter, *resp_iter; | 
|  | srun_job_t *job = NULL; | 
|  | int i, max_list_offset, max_het_job_offset, het_job_offset = -1, | 
|  | het_step_offset = -1; | 
|  | uint32_t my_job_id = 0, het_job_id = 0; | 
|  | char *het_job_nodelist = NULL; | 
|  | bool begin_error_logged = false; | 
|  | bool core_spec_error_logged = false; | 
|  | bool node_cnt_error_logged = false; | 
|  | bool tres_license_error_logged = false; | 
|  | bool x11_error_logged = false; | 
|  |  | 
|  | /* | 
|  | * now global "opt" should be filled in and available, | 
|  | * create a job from opt | 
|  | */ | 
|  | if (sropt.test_only) { | 
|  | int rc = allocate_test(); | 
|  | if (rc) { | 
|  | slurm_perror("allocation failure"); | 
|  | exit (1); | 
|  | } | 
|  | exit (0); | 
|  |  | 
|  | } else if (sropt.no_alloc) { | 
|  | if (opt_list || | 
|  | (sropt.het_grp_bits && (bit_fls(sropt.het_grp_bits) > 0))) | 
|  | fatal("--no-allocation option not supported for heterogeneous jobs"); | 
|  | info("do not allocate resources"); | 
|  | job = job_create_noalloc(); | 
|  | if (job == NULL) { | 
|  | error("Job creation failure."); | 
|  | exit(error_exit); | 
|  | } | 
|  | if (create_job_step(job, false, &opt) < 0) | 
|  | exit(error_exit); | 
|  | } else if ((job_resp_list = existing_allocation())) { | 
|  | slurm_opt_t *opt_local; | 
|  |  | 
|  | max_list_offset = 0; | 
|  | max_het_job_offset = list_count(job_resp_list) - 1; | 
|  | if (opt_list) { | 
|  | opt_iter = list_iterator_create(opt_list); | 
|  | while ((opt_local = list_next(opt_iter))) { | 
|  | srun_opt_t *srun_opt = opt_local->srun_opt; | 
|  | xassert(srun_opt); | 
|  | if (srun_opt->het_grp_bits) { | 
|  | i = bit_fls(srun_opt->het_grp_bits); | 
|  | max_list_offset = MAX(max_list_offset, | 
|  | i); | 
|  | } | 
|  | } | 
|  | list_iterator_destroy(opt_iter); | 
|  | if (max_list_offset > max_het_job_offset) { | 
|  | if (list_count(job_resp_list) != 1) { | 
|  | error("Attempt to run a job step with het group value of %d, but the job allocation has maximum value of %d", | 
|  | max_list_offset, | 
|  | max_het_job_offset); | 
|  | exit(1); | 
|  | } | 
|  |  | 
|  | /* | 
|  | * Here we have a regular job allocation, but we | 
|  | * are requesting a het step in that | 
|  | * allocation. So here we will copy the | 
|  | * resp_list to the number of components we care | 
|  | * about. | 
|  | */ | 
|  | _copy_job_resp(job_resp_list, max_list_offset); | 
|  | max_het_job_offset = max_list_offset; | 
|  | local_het_step = true; | 
|  | } | 
|  | if (list_count(opt_list) > 1) | 
|  | het_step_offset = 0; | 
|  | } | 
|  | srun_job_list = list_create(NULL); | 
|  | used_resp_list = list_create(NULL); | 
|  | if (max_het_job_offset > 0) | 
|  | het_job_offset = 0; | 
|  | resp_iter = list_iterator_create(job_resp_list); | 
|  | while ((resp = list_next(resp_iter))) { | 
|  | bool merge_nodelist = true; | 
|  | if (my_job_id == 0) { | 
|  | my_job_id = resp->job_id; | 
|  | if (resp->working_cluster_rec) | 
|  | slurm_setup_remote_working_cluster(resp); | 
|  | } | 
|  | _print_job_information(resp); | 
|  | (void) get_next_opt(-2); | 
|  | /* | 
|  | * Check using het_job_offset here, but we use | 
|  | * het_step_offset for the job being added. | 
|  | */ | 
|  | while ((opt_local = get_next_opt(het_job_offset))) { | 
|  | srun_opt_t *srun_opt = opt_local->srun_opt; | 
|  | xassert(srun_opt); | 
|  |  | 
|  | if (local_het_step) | 
|  | opt_local->step_het_comp_cnt = | 
|  | max_het_job_offset; | 
|  |  | 
|  | if (merge_nodelist) { | 
|  | merge_nodelist = false; | 
|  | list_append(used_resp_list, resp); | 
|  | } | 
|  | if (slurm_option_set_by_env(opt_local, 'N') && | 
|  | (opt_local->min_nodes > resp->node_cnt)) { | 
|  | /* | 
|  | * This signifies the job used the | 
|  | * --no-kill option and a node went DOWN | 
|  | * or it used a node count range | 
|  | * specification, was checkpointed from | 
|  | * one size and restarted at a different | 
|  | * size | 
|  | */ | 
|  | if (!node_cnt_error_logged) { | 
|  | error("SLURM_JOB_NUM_NODES environment variable conflicts with allocated node count (%u != %u).", | 
|  | opt_local->min_nodes, | 
|  | resp->node_cnt); | 
|  | node_cnt_error_logged = true; | 
|  | } | 
|  | /* | 
|  | * Modify options to match resource | 
|  | * allocation. | 
|  | * NOTE: Some options are not supported | 
|  | */ | 
|  | opt_local->min_nodes = resp->node_cnt; | 
|  | xfree(srun_opt->alloc_nodelist); | 
|  | if (!opt_local->ntasks_set) { | 
|  | opt_local->ntasks = | 
|  | opt_local->min_nodes; | 
|  | } | 
|  | } | 
|  | _check_gpus_per_socket(opt_local); | 
|  | if (!tres_license_error_logged && | 
|  | !slurm_option_set_by_env( | 
|  | opt_local, | 
|  | LONG_OPT_TRES_PER_TASK) && | 
|  | xstrstr(opt_local->tres_per_task, | 
|  | "license")) { | 
|  | warning("Ignoring --tres-per-task license specification because licenses can only be specified at job allocation time, not during step allocation."); | 
|  | tres_license_error_logged = true; | 
|  | } | 
|  | if (srun_opt->core_spec_set && | 
|  | !core_spec_error_logged) { | 
|  | /* | 
|  | * NOTE: Silently ignore specialized | 
|  | * core count set with SLURM_CORE_SPEC | 
|  | * environment variable | 
|  | */ | 
|  | error("Ignoring --core-spec value for a job step " | 
|  | "within an existing job. Set specialized cores " | 
|  | "at job allocation time."); | 
|  | core_spec_error_logged = true; | 
|  | } | 
|  |  | 
|  | /* | 
|  | * Here we send the het job groups to the | 
|  | * slurmctld to set up the interconnect | 
|  | * correctly.  We only ever need to send it to | 
|  | * the first component of the step. | 
|  | * | 
|  | * FIXME - is this still needed post-Cray? | 
|  | */ | 
|  | if (g_het_grp_bits) { | 
|  | xfree(opt_local->step_het_grps); | 
|  | opt_local->step_het_grps = | 
|  | bit_fmt_hexmask(g_het_grp_bits); | 
|  | } | 
|  |  | 
|  | _set_env_vars(resp, het_step_offset); | 
|  | if (_validate_relative(resp, opt_local)) | 
|  | exit(error_exit); | 
|  | if (opt_local->begin && !begin_error_logged) { | 
|  | error("--begin is ignored because nodes are already allocated."); | 
|  | begin_error_logged = true; | 
|  | } | 
|  | if (opt_local->x11 && !x11_error_logged) { | 
|  | error("Ignoring --x11 option for a job step within an " | 
|  | "existing job. Set x11 options at job allocation time."); | 
|  | x11_error_logged = true; | 
|  | } | 
|  | job = job_step_create_allocation(resp, | 
|  | opt_local); | 
|  | if (!job) | 
|  | exit(error_exit); | 
|  | if (max_het_job_offset > 0) | 
|  | job->het_job_offset = het_step_offset; | 
|  | list_append(srun_job_list, job); | 
|  | het_step_offset++; | 
|  | }	/* While more option structures */ | 
|  | het_job_offset++; | 
|  | }	/* More hetjob components */ | 
|  | list_iterator_destroy(resp_iter); | 
|  |  | 
|  | max_het_job_offset = get_max_het_group(); | 
|  | het_job_offset = list_count(job_resp_list) - 1; | 
|  | if (max_het_job_offset > het_job_offset) { | 
|  | error("Requested het-group offset exceeds highest hetjob index (%d > %d)", | 
|  | max_het_job_offset, het_job_offset); | 
|  | exit(error_exit); | 
|  | } | 
|  | i = list_count(srun_job_list); | 
|  | if (i == 0) { | 
|  | error("No directives to start application on any available hetjob components"); | 
|  | exit(error_exit); | 
|  | } | 
|  | if (i == 1) | 
|  | FREE_NULL_LIST(srun_job_list);	/* Just use "job" */ | 
|  | if (list_count(job_resp_list) > 1) { | 
|  | /* only set if actually a hetjob */ | 
|  | if (!local_het_step && my_job_id) | 
|  | het_job_id = my_job_id; | 
|  | het_job_nodelist = | 
|  | _compress_het_job_nodelist(used_resp_list); | 
|  | } | 
|  | FREE_NULL_LIST(used_resp_list); | 
|  | if (_create_job_step(job, false, srun_job_list, het_job_id, | 
|  | het_job_nodelist) < 0) { | 
|  | if (*got_alloc) | 
|  | slurm_complete_job(my_job_id, 1); | 
|  | else | 
|  | _cancel_steps(srun_job_list); | 
|  | exit(error_exit); | 
|  | } | 
|  | xfree(het_job_nodelist); | 
|  | } else { | 
|  | /* Combined job allocation and job step launch */ | 
|  | if (slurm_option_set_by_cli(&opt, 'J')) | 
|  | setenvfs("SLURM_JOB_NAME=%s", opt.job_name); | 
|  | else if (!slurm_option_set_by_env(&opt, 'J') && opt.argc) | 
|  | setenvfs("SLURM_JOB_NAME=%s", opt.argv[0]); | 
|  |  | 
|  | if (opt_list) { | 
|  | if (!colon_cnt) { | 
|  | error("--het-group expected to be used within an HetJob allocation"); | 
|  | exit(error_exit); | 
|  | } | 
|  | job_resp_list = allocate_het_job_nodes(); | 
|  | if (!job_resp_list) | 
|  | exit(error_exit); | 
|  | srun_job_list = list_create(NULL); | 
|  | opt_iter  = list_iterator_create(opt_list); | 
|  | resp_iter = list_iterator_create(job_resp_list); | 
|  | while ((resp = list_next(resp_iter))) { | 
|  | slurm_opt_t *opt_local; | 
|  |  | 
|  | if (my_job_id == 0) { | 
|  | my_job_id = resp->job_id; | 
|  | *got_alloc = true; | 
|  | } | 
|  | opt_local = list_next(opt_iter); | 
|  | if (!opt_local) | 
|  | break; | 
|  | _print_job_information(resp); | 
|  | _set_env_vars(resp, ++het_job_offset); | 
|  | _set_env_vars2(resp, het_job_offset); | 
|  | if (_validate_relative(resp, opt_local)) { | 
|  | slurm_complete_job(my_job_id, 1); | 
|  | exit(error_exit); | 
|  | } | 
|  | job = job_create_allocation(resp, opt_local); | 
|  | job->het_job_offset = het_job_offset; | 
|  | list_append(srun_job_list, job); | 
|  | _set_step_opts(opt_local, resp); | 
|  | } | 
|  | list_iterator_destroy(opt_iter); | 
|  | list_iterator_destroy(resp_iter); | 
|  | if (!local_het_step) { | 
|  | /* Continue support for old pack terminology. */ | 
|  | setenvfs("SLURM_PACK_SIZE=%d", | 
|  | het_job_offset + 1); | 
|  | setenvfs("SLURM_HET_SIZE=%d", | 
|  | het_job_offset + 1); | 
|  | } | 
|  | } else { | 
|  | if (sropt.het_grp_bits && | 
|  | (bit_fls(sropt.het_grp_bits) != -1)) { | 
|  | error("--het-group expected to be used within an HetJob allocation"); | 
|  | exit(error_exit); | 
|  | } | 
|  |  | 
|  | if (!(resp = allocate_nodes(&opt))) | 
|  | exit(error_exit); | 
|  | *got_alloc = true; | 
|  | my_job_id = resp->job_id; | 
|  | _print_job_information(resp); | 
|  | _set_env_vars(resp, -1); | 
|  | if (_validate_relative(resp, &opt)) { | 
|  | slurm_complete_job(resp->job_id, 1); | 
|  | exit(error_exit); | 
|  | } | 
|  | job = job_create_allocation(resp, &opt); | 
|  | _set_step_opts(&opt, resp); | 
|  | } | 
|  | if (srun_job_list && (list_count(srun_job_list) > 1) && | 
|  | opt_list && (list_count(opt_list) > 1) && my_job_id) { | 
|  | /* only set if actually a hetjob */ | 
|  | if (!local_het_step) | 
|  | het_job_id = my_job_id; | 
|  | het_job_nodelist = | 
|  | _compress_het_job_nodelist(job_resp_list); | 
|  | } | 
|  |  | 
|  | if (_create_job_step(job, true, srun_job_list, het_job_id, | 
|  | het_job_nodelist) < 0) { | 
|  | slurm_complete_job(my_job_id, 1); | 
|  | exit(error_exit); | 
|  | } | 
|  | xfree(het_job_nodelist); | 
|  |  | 
|  | if (opt_list) { | 
|  | resp_iter = list_iterator_create(job_resp_list); | 
|  | while ((resp = list_next(resp_iter))) { | 
|  | slurm_free_resource_allocation_response_msg( | 
|  | resp); | 
|  | } | 
|  | list_iterator_destroy(resp_iter); | 
|  | } else { | 
|  | slurm_free_resource_allocation_response_msg(resp); | 
|  | } | 
|  | } | 
|  |  | 
|  | /* | 
|  | * Spawn process to ensure clean-up of job and/or step | 
|  | * on abnormal termination | 
|  | */ | 
|  | shepherd_fd = _shepherd_spawn(job, srun_job_list, *got_alloc); | 
|  |  | 
|  | if (opt_list) | 
|  | *p_job = (void *) srun_job_list; | 
|  | else | 
|  | *p_job = (void *) job; | 
|  |  | 
|  | if (job) | 
|  | _srun_cli_filter_post_submit(my_job_id, job->step_id.step_id); | 
|  | } | 
|  |  | 
|  | extern void pre_launch_srun_job(srun_job_t *job, slurm_opt_t *opt_local) | 
|  | { | 
|  | if (!signal_thread) | 
|  | slurm_thread_create(&signal_thread, _srun_signal_mgr, job); | 
|  |  | 
|  | _run_srun_prolog(job); | 
|  | if (_call_spank_local_user(job, opt_local)) { | 
|  | error("Failure in local plugin stack"); | 
|  | slurm_step_launch_abort(job->step_ctx); | 
|  | exit(error_exit); | 
|  | } | 
|  |  | 
|  | env_array_merge(&job->env, (const char **)environ); | 
|  | } | 
|  |  | 
|  | extern void fini_srun(srun_job_t *job, bool got_alloc, uint32_t *global_rc) | 
|  | { | 
|  | if (got_alloc) { | 
|  | cleanup_allocation(); | 
|  |  | 
|  | /* Tell slurmctld that we were cancelled */ | 
|  | if (job->state >= SRUN_JOB_CANCELLED) | 
|  | slurm_complete_job(job->step_id.job_id, NO_VAL); | 
|  | else | 
|  | slurm_complete_job(job->step_id.job_id, *global_rc); | 
|  | } | 
|  | _shepherd_notify(shepherd_fd); | 
|  |  | 
|  | if (signal_thread) { | 
|  | srun_shutdown = true; | 
|  | pthread_kill(signal_thread, SIGINT); | 
|  | slurm_thread_join(signal_thread); | 
|  | } | 
|  |  | 
|  | _run_srun_epilog(job); | 
|  |  | 
|  | step_ctx_destroy(job->step_ctx); | 
|  |  | 
|  | if (WIFEXITED(*global_rc)) | 
|  | *global_rc = WEXITSTATUS(*global_rc); | 
|  | else if (WIFSIGNALED(*global_rc)) | 
|  | *global_rc = 128 + WTERMSIG(*global_rc); | 
|  |  | 
|  | mpir_cleanup(); | 
|  | } | 
|  |  | 
|  | void | 
|  | update_job_state(srun_job_t *job, srun_job_state_t state) | 
|  | { | 
|  | slurm_mutex_lock(&job->state_mutex); | 
|  | if (job->state < state) { | 
|  | job->state = state; | 
|  | slurm_cond_signal(&job->state_cond); | 
|  |  | 
|  | } | 
|  | slurm_mutex_unlock(&job->state_mutex); | 
|  | return; | 
|  | } | 
|  |  | 
|  | srun_job_state_t | 
|  | job_state(srun_job_t *job) | 
|  | { | 
|  | srun_job_state_t state; | 
|  | slurm_mutex_lock(&job->state_mutex); | 
|  | state = job->state; | 
|  | slurm_mutex_unlock(&job->state_mutex); | 
|  | return state; | 
|  | } | 
|  |  | 
|  |  | 
|  | void | 
|  | job_force_termination(srun_job_t *job) | 
|  | { | 
|  | static int kill_sent = 0; | 
|  | static time_t last_msg = 0; | 
|  |  | 
|  | if (kill_sent == 0) { | 
|  | info("forcing job termination"); | 
|  | /* Send SIGKILL to tasks directly */ | 
|  | update_job_state(job, SRUN_JOB_CANCELLED); | 
|  | launch_g_fwd_signal(SIGKILL); | 
|  | } else { | 
|  | time_t now = time(NULL); | 
|  | if (last_msg != now) { | 
|  | info("job abort in progress"); | 
|  | last_msg = now; | 
|  | } | 
|  | if (kill_sent == 1) { | 
|  | /* Try sending SIGKILL through slurmctld */ | 
|  | slurm_kill_job_step(job->step_id.job_id, | 
|  | job->step_id.step_id, SIGKILL, 0); | 
|  | } | 
|  | } | 
|  | kill_sent++; | 
|  | } | 
|  |  | 
|  | static void _set_ntasks(allocation_info_t *ai, slurm_opt_t *opt_local) | 
|  | { | 
|  | int cnt = 0; | 
|  |  | 
|  | /* Distinction between explicit or implicit set of ntasks */ | 
|  | if (opt_local->ntasks_opt_set || | 
|  | (opt_local->ntasks_set && | 
|  | (opt_local->ntasks_per_node == NO_VAL))) | 
|  | return; | 
|  |  | 
|  | if (opt_local->ntasks_per_node != NO_VAL) { | 
|  | cnt = ai->nnodes * opt_local->ntasks_per_node; | 
|  | opt_local->ntasks_set = true;	/* implicit */ | 
|  | } else if (opt_local->cpus_set) { | 
|  | opt_local->ntasks = NO_VAL; | 
|  | opt_local->ntasks_set = true;	/* implicit */ | 
|  | return; | 
|  | } | 
|  |  | 
|  | opt_local->ntasks = (cnt < ai->nnodes) ? ai->nnodes : cnt; | 
|  | } | 
|  |  | 
|  | /* | 
|  | * Create an srun job structure from a resource allocation response msg | 
|  | */ | 
|  | static srun_job_t *_job_create_structure(allocation_info_t *ainfo, | 
|  | slurm_opt_t *opt_local) | 
|  | { | 
|  | srun_job_t *job = xmalloc(sizeof(srun_job_t)); | 
|  | int i; | 
|  |  | 
|  | _set_ntasks(ainfo, opt_local); | 
|  | debug2("creating job with %d tasks", opt_local->ntasks); | 
|  |  | 
|  | slurm_mutex_init(&job->state_mutex); | 
|  | slurm_cond_init(&job->state_cond, NULL); | 
|  | job->state = SRUN_JOB_INIT; | 
|  |  | 
|  | job->container = xstrdup(opt_local->container); | 
|  | job->nodelist = xstrdup(ainfo->nodelist); | 
|  | job->partition = xstrdup(ainfo->partition); | 
|  | memcpy(&job->step_id, &ainfo->step_id, sizeof(job->step_id)); | 
|  | job->het_job_id  = NO_VAL; | 
|  | job->het_job_nnodes = NO_VAL; | 
|  | job->het_job_ntasks = NO_VAL; | 
|  | job->het_job_offset = NO_VAL; | 
|  | job->het_job_task_offset = NO_VAL; | 
|  | job->nhosts   = ainfo->nnodes; | 
|  |  | 
|  | if (opt_local->min_nodes > job->nhosts) { | 
|  | error("Only allocated %d nodes asked for %d", | 
|  | job->nhosts, opt_local->min_nodes); | 
|  | if (opt_local->exclude) { | 
|  | /* When resources are pre-allocated and some nodes | 
|  | * are explicitly excluded, this error can occur. */ | 
|  | error("Are required nodes explicitly excluded?"); | 
|  | } | 
|  | xfree(job); | 
|  | return NULL; | 
|  | } | 
|  | if ((ainfo->cpus_per_node == NULL) || | 
|  | (ainfo->cpu_count_reps == NULL)) { | 
|  | error("cpus_per_node array is not set"); | 
|  | xfree(job); | 
|  | return NULL; | 
|  | } | 
|  |  | 
|  | job->ntasks  = opt_local->ntasks; | 
|  | job->ntasks_per_board = ainfo->ntasks_per_board; | 
|  | job->ntasks_per_core = ainfo->ntasks_per_core; | 
|  | job->ntasks_per_socket = ainfo->ntasks_per_socket; | 
|  |  | 
|  | /* | 
|  | * If cpus_per_task is set then get the exact count of cpus for the | 
|  | * requested step (we might very well use less, especially if | 
|  | * --exclusive is used).  Else get the total for the allocation given. | 
|  | */ | 
|  | if (opt_local->cpus_set) { | 
|  | if (opt_local->ntasks == NO_VAL) | 
|  | job->cpu_count = NO_VAL; | 
|  | else | 
|  | job->cpu_count = opt_local->ntasks * | 
|  | opt_local->cpus_per_task; | 
|  | } else { | 
|  | for (i = 0; i < ainfo->num_cpu_groups; i++) { | 
|  | job->cpu_count += ainfo->cpus_per_node[i] * | 
|  | ainfo->cpu_count_reps[i]; | 
|  | } | 
|  | } | 
|  |  | 
|  | job->rc       = -1; | 
|  |  | 
|  | job_update_io_fnames(job, opt_local); | 
|  |  | 
|  | job->uid = ainfo->uid; | 
|  | job->user_name = xstrdup(ainfo->user_name); | 
|  | job->gid = ainfo->gid; | 
|  | job->group_name = xstrdup(ainfo->group_name); | 
|  |  | 
|  | return (job); | 
|  | } | 
|  |  | 
|  | extern void job_update_io_fnames(srun_job_t *job, slurm_opt_t *opt_local) | 
|  | { | 
|  | job->ifname = fname_create(job, opt_local->ifname, opt_local->ntasks); | 
|  | job->ofname = fname_create(job, opt_local->ofname, opt_local->ntasks); | 
|  | job->efname = opt_local->efname ? | 
|  | fname_create(job, opt_local->efname, opt_local->ntasks) : | 
|  | job->ofname; | 
|  | } | 
|  |  | 
|  | static char * | 
|  | _normalize_hostlist(const char *hostlist) | 
|  | { | 
|  | char *buf = NULL; | 
|  | hostlist_t *hl = hostlist_create(hostlist); | 
|  |  | 
|  | if (hl)	{ | 
|  | buf = hostlist_ranged_string_xmalloc(hl); | 
|  | hostlist_destroy(hl); | 
|  | } | 
|  | if (!buf) | 
|  | return xstrdup(hostlist); | 
|  |  | 
|  | return buf; | 
|  | } | 
|  |  | 
|  | static int _call_spank_local_user(srun_job_t *job, slurm_opt_t *opt_local) | 
|  | { | 
|  | struct spank_launcher_job_info info[1]; | 
|  |  | 
|  | info->argc = opt_local->argc; | 
|  | info->argv = opt_local->argv; | 
|  | info->gid	= opt_local->gid; | 
|  | info->jobid	= job->step_id.job_id; | 
|  | info->stepid	= job->step_id.step_id; | 
|  | info->step_layout = launch_common_get_slurm_step_layout(job); | 
|  | info->uid	= opt_local->uid; | 
|  |  | 
|  | return spank_local_user(info); | 
|  | } | 
|  |  | 
|  | /* Return the number of microseconds between tv1 and tv2 with a maximum | 
|  | * a maximum value of 10,000,000 to prevent overflows */ | 
|  | static long _diff_tv_str(struct timeval *tv1, struct timeval *tv2) | 
|  | { | 
|  | long delta_t; | 
|  |  | 
|  | delta_t  = MIN((tv2->tv_sec - tv1->tv_sec), 10); | 
|  | delta_t *= USEC_IN_SEC; | 
|  | delta_t +=  tv2->tv_usec - tv1->tv_usec; | 
|  | return delta_t; | 
|  | } | 
|  |  | 
|  | static void _handle_intr(srun_job_t *job) | 
|  | { | 
|  | static struct timeval last_intr = { 0, 0 }; | 
|  | struct timeval now; | 
|  |  | 
|  | gettimeofday(&now, NULL); | 
|  | if (sropt.quit_on_intr || _diff_tv_str(&last_intr, &now) < 1000000) { | 
|  | info("sending Ctrl-C to %ps", &job->step_id); | 
|  | launch_g_fwd_signal(SIGINT); | 
|  | job_force_termination(job); | 
|  | } else { | 
|  | if (sropt.disable_status) { | 
|  | info("sending Ctrl-C to %ps", &job->step_id); | 
|  | launch_g_fwd_signal(SIGINT); | 
|  | } else if (job->state < SRUN_JOB_CANCELLED) { | 
|  | info("interrupt (one more within 1 sec to abort)"); | 
|  | launch_g_print_status(); | 
|  | } | 
|  | last_intr = now; | 
|  | } | 
|  | } | 
|  |  | 
|  | static void _handle_pipe(void) | 
|  | { | 
|  | static int ending = 0; | 
|  |  | 
|  | if (ending) | 
|  | return; | 
|  | ending = 1; | 
|  | launch_g_fwd_signal(SIGKILL); | 
|  | } | 
|  |  | 
|  |  | 
|  | static void _print_job_information(resource_allocation_response_msg_t *resp) | 
|  | { | 
|  | int i; | 
|  | char *str = NULL; | 
|  | char *sep = ""; | 
|  |  | 
|  | if (!opt.verbose) | 
|  | return; | 
|  |  | 
|  | xstrfmtcat(str, "jobid %u: nodes(%u):`%s', cpu counts: ", | 
|  | resp->job_id, resp->node_cnt, resp->node_list); | 
|  |  | 
|  | for (i = 0; i < resp->num_cpu_groups; i++) { | 
|  | xstrfmtcat(str, "%s%u(x%u)", | 
|  | sep, resp->cpus_per_node[i], | 
|  | resp->cpu_count_reps[i]); | 
|  | sep = ","; | 
|  | } | 
|  | verbose("%s", str); | 
|  | xfree(str); | 
|  | } | 
|  |  | 
|  | /* NOTE: Executed once for entire hetjob */ | 
|  | static void _run_srun_epilog (srun_job_t *job) | 
|  | { | 
|  | int rc; | 
|  |  | 
|  | if (sropt.epilog && xstrcasecmp(sropt.epilog, "none") != 0) { | 
|  | if (setenvf(NULL, "SLURM_SCRIPT_CONTEXT", "epilog_srun") < 0) | 
|  | error("unable to set SLURM_SCRIPT_CONTEXT in environment"); | 
|  | rc = _run_srun_script(job, sropt.epilog); | 
|  | if (rc) { | 
|  | error("srun epilog failed status=%d", rc); | 
|  | } | 
|  | } | 
|  | } | 
|  |  | 
|  | static void _run_srun_prolog (srun_job_t *job) | 
|  | { | 
|  | int rc; | 
|  |  | 
|  | if (sropt.prolog && xstrcasecmp(sropt.prolog, "none") != 0) { | 
|  | if (setenvf(NULL, "SLURM_SCRIPT_CONTEXT", "prolog_srun") < 0) | 
|  | error("unable to set SLURM_SCRIPT_CONTEXT in environment"); | 
|  | rc = _run_srun_script(job, sropt.prolog); | 
|  | if (rc) { | 
|  | error("srun prolog failed rc = %d. Aborting step.", rc); | 
|  | slurm_step_launch_abort(job->step_ctx); | 
|  | } | 
|  | } | 
|  | } | 
|  |  | 
|  | /* | 
|  | * Run srun prolog/epilog script. | 
|  | * | 
|  | * RET the exit status of the script or 1 on generic error and 0 on success | 
|  | */ | 
|  | static int _run_srun_script (srun_job_t *job, char *script) | 
|  | { | 
|  | int status; | 
|  | pid_t cpid; | 
|  | int i; | 
|  | char **args = NULL; | 
|  |  | 
|  | if (script == NULL || script[0] == '\0') | 
|  | return 0; | 
|  |  | 
|  | if (access(script, R_OK | X_OK) < 0) { | 
|  | info("Access denied for %s: %m", script); | 
|  | return 0; | 
|  | } | 
|  |  | 
|  | if ((cpid = fork()) < 0) { | 
|  | error ("run_srun_script: fork: %m"); | 
|  | return 1; | 
|  | } | 
|  | if (cpid == 0) { | 
|  | /* | 
|  | * set the prolog/epilog scripts command line arguments to the | 
|  | * application arguments (for last hetjob component), but | 
|  | * shifted one higher | 
|  | */ | 
|  | args = xmalloc(sizeof(char *) * 1024); | 
|  | args[0] = script; | 
|  | for (i = 0; i < opt.argc; i++) { | 
|  | args[i + 1] = opt.argv[i]; | 
|  | } | 
|  | args[i + 1] = NULL; | 
|  | execv(script, args); | 
|  | error("Failed to execute srun prolog/epilog script: %m"); | 
|  | _exit(127); | 
|  | } | 
|  |  | 
|  | do { | 
|  | if (waitpid(cpid, &status, 0) < 0) { | 
|  | if (errno == EINTR) | 
|  | continue; | 
|  | error("waitpid: %m"); | 
|  | return 0; | 
|  | } else if (WIFEXITED(status)) { | 
|  | return WEXITSTATUS(status); | 
|  | } else { | 
|  | error("script did not exit normally"); | 
|  | return 1; | 
|  | } | 
|  | } while(1); | 
|  |  | 
|  | /* NOTREACHED */ | 
|  | } | 
|  |  | 
|  | static char *_build_key(char *base, int het_job_offset) | 
|  | { | 
|  | char *key = NULL; | 
|  |  | 
|  | /* If we are a local_het_step we treat it like a normal step */ | 
|  | if (local_het_step || (het_job_offset == -1)) | 
|  | key = xstrdup(base); | 
|  | else | 
|  | xstrfmtcat(key, "%s_PACK_GROUP_%d", base, het_job_offset); | 
|  |  | 
|  | return key; | 
|  | } | 
|  |  | 
|  | static void _set_env_vars(resource_allocation_response_msg_t *resp, | 
|  | int het_job_offset) | 
|  | { | 
|  | char *key, *value, *tmp; | 
|  | int i; | 
|  |  | 
|  | key = _build_key("SLURM_JOB_CPUS_PER_NODE", het_job_offset); | 
|  | if (!getenv(key)) { | 
|  | tmp = uint32_compressed_to_str(resp->num_cpu_groups, | 
|  | resp->cpus_per_node, | 
|  | resp->cpu_count_reps); | 
|  | if (setenvf(NULL, key, "%s", tmp) < 0) | 
|  | error("unable to set %s in environment", key); | 
|  | xfree(tmp); | 
|  | } | 
|  | xfree(key); | 
|  |  | 
|  | if (resp->env_size) {	/* Used to set Burst Buffer environment */ | 
|  | for (i = 0; i < resp->env_size; i++) { | 
|  | tmp = xstrdup(resp->environment[i]); | 
|  | key = tmp; | 
|  | value = strchr(tmp, '='); | 
|  | if (value) { | 
|  | value[0] = '\0'; | 
|  | value++; | 
|  | setenv(key, value, 0); | 
|  | } | 
|  | xfree(tmp); | 
|  | } | 
|  | } | 
|  |  | 
|  | if (resp->pn_min_memory & MEM_PER_CPU) { | 
|  | uint64_t tmp_mem = resp->pn_min_memory & (~MEM_PER_CPU); | 
|  | key = _build_key("SLURM_MEM_PER_CPU", het_job_offset); | 
|  | if (!getenv(key) && | 
|  | (setenvf(NULL, key, "%"PRIu64, tmp_mem) < 0)) { | 
|  | error("unable to set %s in environment", key); | 
|  | } | 
|  | xfree(key); | 
|  | } else if (resp->pn_min_memory) { | 
|  | uint64_t tmp_mem = resp->pn_min_memory; | 
|  | key = _build_key("SLURM_MEM_PER_NODE", het_job_offset); | 
|  | if (!getenv(key) && | 
|  | (setenvf(NULL, key, "%"PRIu64, tmp_mem) < 0)) { | 
|  | error("unable to set %s in environment", key); | 
|  | } | 
|  | xfree(key); | 
|  | } | 
|  |  | 
|  | if (resp->segment_size) { | 
|  | key = _build_key("SLURM_JOB_SEGMENT_SIZE", het_job_offset); | 
|  | if (!getenv(key) && | 
|  | (setenvf(NULL, key, "%u", resp->segment_size) < 0)) { | 
|  | error("unable to set %s in environment", key); | 
|  | } | 
|  | xfree(key); | 
|  | } | 
|  |  | 
|  | return; | 
|  | } | 
|  |  | 
|  | /* | 
|  | * Set some hetjob environment variables for combined job & step allocation | 
|  | */ | 
|  | static void _set_env_vars2(resource_allocation_response_msg_t *resp, | 
|  | int het_job_offset) | 
|  | { | 
|  | char *key; | 
|  |  | 
|  | if (resp->account) { | 
|  | key = _build_key("SLURM_JOB_ACCOUNT", het_job_offset); | 
|  | if (!getenv(key) && | 
|  | (setenvf(NULL, key, "%s", resp->account) < 0)) { | 
|  | error("unable to set %s in environment", key); | 
|  | } | 
|  | xfree(key); | 
|  | } | 
|  |  | 
|  | key = _build_key("SLURM_JOB_ID", het_job_offset); | 
|  | if (!getenv(key) && | 
|  | (setenvf(NULL, key, "%u", resp->job_id) < 0)) { | 
|  | error("unable to set %s in environment", key); | 
|  | } | 
|  | xfree(key); | 
|  |  | 
|  | key = _build_key("SLURM_JOB_NODELIST", het_job_offset); | 
|  | if (!getenv(key) && | 
|  | (setenvf(NULL, key, "%s", resp->node_list) < 0)) { | 
|  | error("unable to set %s in environment", key); | 
|  | } | 
|  | xfree(key); | 
|  |  | 
|  | key = _build_key("SLURM_JOB_PARTITION", het_job_offset); | 
|  | if (!getenv(key) && | 
|  | (setenvf(NULL, key, "%s", resp->partition) < 0)) { | 
|  | error("unable to set %s in environment", key); | 
|  | } | 
|  | xfree(key); | 
|  |  | 
|  | if (resp->qos) { | 
|  | key = _build_key("SLURM_JOB_QOS", het_job_offset); | 
|  | if (!getenv(key) && | 
|  | (setenvf(NULL, key, "%s", resp->qos) < 0)) { | 
|  | error("unable to set %s in environment", key); | 
|  | } | 
|  | xfree(key); | 
|  | } | 
|  |  | 
|  | if (resp->resv_name) { | 
|  | key = _build_key("SLURM_JOB_RESERVATION", het_job_offset); | 
|  | if (!getenv(key) && | 
|  | (setenvf(NULL, key, "%s", resp->resv_name) < 0)) { | 
|  | error("unable to set %s in environment", key); | 
|  | } | 
|  | xfree(key); | 
|  | } | 
|  | } | 
|  |  | 
|  | /* Set SLURM_RLIMIT_* environment variables with current resource | 
|  | * limit values, reset RLIMIT_NOFILE to maximum possible value */ | 
|  | static int _set_rlimit_env(void) | 
|  | { | 
|  | int                  rc = SLURM_SUCCESS; | 
|  | struct rlimit        rlim[1]; | 
|  | unsigned long        cur; | 
|  | char                 name[64], *format; | 
|  | slurm_rlimits_info_t *rli; | 
|  |  | 
|  | /* Modify limits with any command-line options */ | 
|  | if (sropt.propagate | 
|  | && parse_rlimits(sropt.propagate, PROPAGATE_RLIMITS)) { | 
|  | error( "--propagate=%s is not valid.", sropt.propagate ); | 
|  | exit(error_exit); | 
|  | } | 
|  |  | 
|  | for (rli = get_slurm_rlimits_info(); rli->name != NULL; rli++ ) { | 
|  |  | 
|  | if (rli->propagate_flag != PROPAGATE_RLIMITS) | 
|  | continue; | 
|  |  | 
|  | if (getrlimit (rli->resource, rlim) < 0) { | 
|  | error ("getrlimit (RLIMIT_%s): %m", rli->name); | 
|  | rc = SLURM_ERROR; | 
|  | continue; | 
|  | } | 
|  |  | 
|  | cur = (unsigned long) rlim->rlim_cur; | 
|  | snprintf(name, sizeof(name), "SLURM_RLIMIT_%s", rli->name); | 
|  | if (sropt.propagate && (rli->propagate_flag == PROPAGATE_RLIMITS)) | 
|  | /* | 
|  | * Prepend 'U' to indicate user requested propagate | 
|  | */ | 
|  | format = "U%lu"; | 
|  | else | 
|  | format = "%lu"; | 
|  |  | 
|  | if (setenvf (NULL, name, format, cur) < 0) { | 
|  | error ("unable to set %s in environment", name); | 
|  | rc = SLURM_ERROR; | 
|  | continue; | 
|  | } | 
|  |  | 
|  | debug ("propagating RLIMIT_%s=%lu", rli->name, cur); | 
|  | } | 
|  |  | 
|  | /* | 
|  | *  Now increase NOFILE to the max available for this srun | 
|  | */ | 
|  | rlimits_use_max_nofile(); | 
|  |  | 
|  | return rc; | 
|  | } | 
|  |  | 
|  | /* Set SLURM_CLUSTER_NAME< SLURM_SUBMIT_DIR and SLURM_SUBMIT_HOST environment | 
|  | * variables within current state */ | 
|  | static void _set_submit_dir_env(void) | 
|  | { | 
|  | char buf[PATH_MAX], host[256]; | 
|  |  | 
|  | /* Only set these environment variables in new allocations */ | 
|  | if (sropt.jobid != NO_VAL) | 
|  | return; | 
|  |  | 
|  | if (setenvf(NULL, "SLURM_CLUSTER_NAME", "%s", | 
|  | slurm_conf.cluster_name) < 0) | 
|  | error("unable to set SLURM_CLUSTER_NAME in environment"); | 
|  |  | 
|  | if ((getcwd(buf, PATH_MAX)) == NULL) | 
|  | error("getcwd failed: %m"); | 
|  | else if (setenvf(NULL, "SLURM_SUBMIT_DIR", "%s", buf) < 0) | 
|  | error("unable to set SLURM_SUBMIT_DIR in environment"); | 
|  |  | 
|  | if ((gethostname(host, sizeof(host)))) | 
|  | error("gethostname_short failed: %m"); | 
|  | else if (setenvf(NULL, "SLURM_SUBMIT_HOST", "%s", host) < 0) | 
|  | error("unable to set SLURM_SUBMIT_HOST in environment"); | 
|  | } | 
|  |  | 
|  | /* Set some environment variables with current state */ | 
|  | static int _set_umask_env(void) | 
|  | { | 
|  | if (!getenv("SRUN_DEBUG")) {	/* do not change current value */ | 
|  | /* NOTE: Default debug level is 3 (info) */ | 
|  | int log_level = LOG_LEVEL_INFO + opt.verbose - opt.quiet; | 
|  |  | 
|  | if (setenvf(NULL, "SRUN_DEBUG", "%d", log_level) < 0) | 
|  | error ("unable to set SRUN_DEBUG in environment"); | 
|  | } | 
|  |  | 
|  | if (!getenv("SLURM_UMASK")) {	/* do not change current value */ | 
|  | char mask_char[5]; | 
|  | mode_t mask; | 
|  |  | 
|  | mask = (int)umask(0); | 
|  | umask(mask); | 
|  |  | 
|  | sprintf(mask_char, "0%d%d%d", | 
|  | ((mask>>6)&07), ((mask>>3)&07), mask&07); | 
|  | if (setenvf(NULL, "SLURM_UMASK", "%s", mask_char) < 0) { | 
|  | error ("unable to set SLURM_UMASK in environment"); | 
|  | return SLURM_ERROR; | 
|  | } | 
|  | debug ("propagating UMASK=%s", mask_char); | 
|  | } | 
|  |  | 
|  | return SLURM_SUCCESS; | 
|  | } | 
|  |  | 
|  | static void _shepherd_notify(int shepherd_fd) | 
|  | { | 
|  | int rc; | 
|  |  | 
|  | while (1) { | 
|  | rc = write(shepherd_fd, "", 1); | 
|  | if (rc == -1) { | 
|  | if ((errno == EAGAIN) || (errno == EINTR)) | 
|  | continue; | 
|  | error("write(shepherd): %m"); | 
|  | } | 
|  | break; | 
|  | } | 
|  | close(shepherd_fd); | 
|  | } | 
|  |  | 
|  | static int _shepherd_spawn(srun_job_t *job, list_t *srun_job_list, | 
|  | bool got_alloc) | 
|  | { | 
|  | int shepherd_pipe[2], rc; | 
|  | pid_t shepherd_pid; | 
|  | char buf[1]; | 
|  |  | 
|  | if (pipe(shepherd_pipe)) { | 
|  | error("pipe: %m"); | 
|  | return -1; | 
|  | } | 
|  |  | 
|  | shepherd_pid = fork(); | 
|  | if (shepherd_pid == -1) { | 
|  | error("fork: %m"); | 
|  | return -1; | 
|  | } | 
|  | if (shepherd_pid != 0) { | 
|  | close(shepherd_pipe[0]); | 
|  | return shepherd_pipe[1]; | 
|  | } | 
|  |  | 
|  | /* Wait for parent to notify of completion or I/O error on abort */ | 
|  | close(shepherd_pipe[1]); | 
|  | while (1) { | 
|  | rc = read(shepherd_pipe[0], buf, 1); | 
|  | if (rc == 1) { | 
|  | _exit(0); | 
|  | } else if (rc == 0) { | 
|  | break;	/* EOF */ | 
|  | } else if (rc == -1) { | 
|  | if ((errno == EAGAIN) || (errno == EINTR)) | 
|  | continue; | 
|  | break; | 
|  | } | 
|  | } | 
|  |  | 
|  | if (srun_job_list) { | 
|  | list_itr_t *job_iter; | 
|  | job_iter  = list_iterator_create(srun_job_list); | 
|  | while ((job = list_next(job_iter))) { | 
|  | (void) slurm_kill_job_step(job->step_id.job_id, job->step_id.step_id, | 
|  | SIGKILL, 0); | 
|  | if (got_alloc) | 
|  | slurm_complete_job(job->step_id.job_id, NO_VAL); | 
|  | } | 
|  | list_iterator_destroy(job_iter); | 
|  | } else { | 
|  | (void) slurm_kill_job_step(job->step_id.job_id, | 
|  | job->step_id.step_id, SIGKILL, 0); | 
|  | if (got_alloc) | 
|  | slurm_complete_job(job->step_id.job_id, NO_VAL); | 
|  | } | 
|  |  | 
|  | _exit(0); | 
|  | return -1; | 
|  | } | 
|  |  | 
|  | /* _srun_signal_mgr - Process daemon-wide signals */ | 
|  | static void *_srun_signal_mgr(void *job_ptr) | 
|  | { | 
|  | int sig; | 
|  | int i, rc; | 
|  | sigset_t set; | 
|  | srun_job_t *job = (srun_job_t *)job_ptr; | 
|  |  | 
|  | /* Make sure no required signals are ignored (possibly inherited) */ | 
|  | for (i = 0; sig_array[i]; i++) | 
|  | xsignal_default(sig_array[i]); | 
|  | while (!srun_shutdown) { | 
|  | xsignal_sigset_create(sig_array, &set); | 
|  | rc = sigwait(&set, &sig); | 
|  | if (rc == EINTR) | 
|  | continue; | 
|  | switch (sig) { | 
|  | case SIGINT: | 
|  | if (!srun_shutdown) | 
|  | _handle_intr(job); | 
|  | break; | 
|  | case SIGQUIT: | 
|  | info("Quit"); | 
|  | /* continue with slurm_step_launch_abort */ | 
|  | case SIGTERM: | 
|  | case SIGHUP: | 
|  | /* No need to call job_force_termination here since we | 
|  | * are ending the job now and we don't need to update | 
|  | * the state. */ | 
|  | info("forcing job termination"); | 
|  | launch_g_fwd_signal(SIGKILL); | 
|  | break; | 
|  | case SIGCONT: | 
|  | info("got SIGCONT"); | 
|  | break; | 
|  | case SIGPIPE: | 
|  | _handle_pipe(); | 
|  | break; | 
|  | case SIGALRM: | 
|  | if (srun_max_timer) { | 
|  | info("First task exited %ds ago", sropt.max_wait); | 
|  | launch_g_print_status(); | 
|  | launch_g_step_terminate(); | 
|  | } | 
|  | break; | 
|  | default: | 
|  | launch_g_fwd_signal(sig); | 
|  | break; | 
|  | } | 
|  | } | 
|  | return NULL; | 
|  | } | 
|  |  | 
|  | static int _validate_relative(resource_allocation_response_msg_t *resp, | 
|  | slurm_opt_t *opt_local) | 
|  | { | 
|  | srun_opt_t *srun_opt = opt_local->srun_opt; | 
|  | xassert(srun_opt); | 
|  |  | 
|  | if ((srun_opt->relative != NO_VAL) && | 
|  | ((srun_opt->relative + opt_local->min_nodes) | 
|  | > resp->node_cnt)) { | 
|  | if (slurm_option_set_by_cli(opt_local, 'N')) { | 
|  | /* -N command line option used */ | 
|  | error("--relative and --nodes option incompatible " | 
|  | "with count of allocated nodes (%d+%d>%d)", | 
|  | srun_opt->relative, | 
|  | opt_local->min_nodes, | 
|  | resp->node_cnt); | 
|  | } else {		/* SLURM_JOB_NUM_NODES option used */ | 
|  | error("--relative and SLURM_JOB_NUM_NODES option incompatible with count of allocated nodes (%d+%d>%d)", | 
|  | srun_opt->relative, | 
|  | opt_local->min_nodes, | 
|  | resp->node_cnt); | 
|  | } | 
|  | return SLURM_ERROR; | 
|  | } | 
|  | return SLURM_SUCCESS; | 
|  | } | 
|  |  | 
|  | static void _call_spank_fini(void) | 
|  | { | 
|  | if (-1 != shepherd_fd) | 
|  | spank_fini(NULL); | 
|  | } | 
|  |  | 
|  | /* | 
|  | * Run cli_filter_post_submit on all opt structures | 
|  | * Convenience function since this might need to run in two spots | 
|  | */ | 
|  | static void _srun_cli_filter_post_submit(uint32_t jobid, uint32_t stepid) | 
|  | { | 
|  | static bool post_submit_ran = false; | 
|  | int idx = 0, components = 1; | 
|  |  | 
|  | if (post_submit_ran) | 
|  | return; | 
|  |  | 
|  | if (opt_list) | 
|  | components = list_count(opt_list); | 
|  |  | 
|  | for (idx = 0; idx < components; idx++) | 
|  | cli_filter_g_post_submit(idx, jobid, stepid); | 
|  |  | 
|  | post_submit_ran = true; | 
|  | } |