blob: 61421527a713e29aab2e9ff853f6402df0ffd927 [file] [log] [blame]
/*****************************************************************************\
* info_job.c - job information functions for scontrol.
*****************************************************************************
* Copyright (C) 2002-2007 The Regents of the University of California.
* Copyright (C) 2008-2010 Lawrence Livermore National Security.
* Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
* Written by Morris Jette <jette1@llnl.gov>
* CODE-OCEC-09-009. All rights reserved.
*
* This file is part of Slurm, a resource management program.
* For details, see <https://slurm.schedmd.com/>.
* Please also read the included file: DISCLAIMER.
*
* Slurm is free software; you can redistribute it and/or modify it under
* the terms of the GNU General Public License as published by the Free
* Software Foundation; either version 2 of the License, or (at your option)
* any later version.
*
* In addition, as a special exception, the copyright holders give permission
* to link the code of portions of this program with the OpenSSL library under
* certain conditions as described in each individual source file, and
* distribute linked combinations including the two. You must obey the GNU
* General Public License in all respects for all of the code used other than
* OpenSSL. If you modify file(s) with this exception, you may extend this
* exception to your version of the file(s), but you are not obligated to do
* so. If you do not wish to do so, delete this exception statement from your
* version. If you delete this exception statement from all source files in
* the program, then also delete it here.
*
* Slurm is distributed in the hope that it will be useful, but WITHOUT ANY
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
* details.
*
* You should have received a copy of the GNU General Public License along
* with Slurm; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
\*****************************************************************************/
#include <arpa/inet.h>
#include <grp.h>
#include <fcntl.h>
#include <pwd.h>
#include <sys/stat.h>
#include <sys/types.h>
#include "scontrol.h"
#include "src/common/bitstring.h"
#include "src/common/cpu_frequency.h"
#include "src/common/proc_args.h"
#include "src/common/slurm_time.h"
#include "src/common/stepd_api.h"
#include "src/interfaces/data_parser.h"
#include "src/common/openapi.h"
#define CONTAINER_ID_TAG "containerid="
#define POLL_SLEEP 3 /* retry interval in seconds */
typedef struct add_to_listjobs_list_args {
list_t *jobs_seen;
list_t *listjobs_list;
} add_to_listjobs_list_args_t;
static node_info_msg_t *_get_node_info_for_jobs(void)
{
int error_code;
node_info_msg_t *node_info_msg = NULL;
uint16_t show_flags = 0;
if (old_node_info_ptr)
return old_node_info_ptr;
/* Must load all nodes including hidden for cross-index
* from job's node_inx to node table to work */
/*if (all_flag) Always set this flag */
show_flags |= SHOW_ALL;
if (federation_flag)
show_flags |= SHOW_FEDERATION;
if (local_flag)
show_flags |= SHOW_LOCAL;
error_code = scontrol_load_nodes(&node_info_msg, show_flags);
if (error_code) {
exit_code = 1;
if (quiet_flag != 1)
slurm_perror ("slurm_load_nodes error");
return NULL;
}
return node_info_msg;
}
/* This set of functions loads/free node information so that we can map a job's
* core bitmap to it's CPU IDs based upon the thread count on each node. */
static uint32_t _threads_per_core(char *host)
{
node_info_msg_t *node_info_msg = NULL;
uint32_t i, threads = 1;
if (!host)
return threads;
if (!(node_info_msg = _get_node_info_for_jobs()))
return threads;
for (i = 0; i < node_info_msg->record_count; i++) {
if (node_info_msg->node_array[i].name &&
!xstrcmp(host, node_info_msg->node_array[i].name)) {
threads = node_info_msg->node_array[i].threads;
break;
}
}
return threads;
}
static void _sprint_range(char *str, uint32_t str_size,
uint32_t lower, uint32_t upper)
{
if (upper > 0)
snprintf(str, str_size, "%u-%u", lower, upper);
else
snprintf(str, str_size, "%u", lower);
}
static void _add_to_line(char **str1, bool *is_new_line, char *str2)
{
xstrfmtcat(*str1, "%s%s", *is_new_line ? "" : " ", str2);
*is_new_line = false;
}
/*
* _sprint_job_info - output information about a specific Slurm
* job based upon message as loaded using slurm_load_jobs
* IN job_ptr - an individual job information record pointer
* RET out - char * containing formatted output (must be freed after call)
* NULL is returned on failure.
*/
static char *_sprint_job_info(job_info_t *job_ptr)
{
int i, j, k;
bool is_new_line;
char time_str[256], *group_name, *user_name;
char *gres_last = "", tmp1[128], tmp2[128];
char *tmp6_ptr;
char tmp_line[1024 * 128];
char tmp_path[PATH_MAX];
uint16_t exit_status = 0, term_sig = 0;
job_resources_t *job_resrcs = job_ptr->job_resrcs;
char *job_size_str = NULL;
char *out = NULL;
time_t run_time;
uint32_t min_nodes, max_nodes = 0;
char *nodelist = "NodeList";
char *sorted_nodelist = NULL;
bitstr_t *cpu_bitmap;
char *host;
int sock_inx, sock_reps, last;
int abs_node_inx, rel_node_inx;
int64_t nice;
int bit_inx, bit_reps;
uint64_t *last_mem_alloc_ptr = NULL;
uint64_t last_mem_alloc = NO_VAL64;
char *last_hosts;
hostlist_t *hl, *hl_last;
uint32_t threads;
char *line_end = (one_liner) ? " " : "\n ";
char *indent = (one_liner) ? "" : " ";
if (job_ptr->job_id == 0) /* Duplicated sibling job record */
return NULL;
/****** Line 1 ******/
xstrfmtcat(out, "JobId=%u ", job_ptr->job_id);
if (job_ptr->array_job_id) {
if (job_ptr->array_task_str) {
xstrfmtcat(out, "ArrayJobId=%u ArrayTaskId=%s ",
job_ptr->array_job_id,
job_ptr->array_task_str);
} else {
xstrfmtcat(out, "ArrayJobId=%u ArrayTaskId=%u ",
job_ptr->array_job_id,
job_ptr->array_task_id);
}
if (job_ptr->array_max_tasks) {
xstrfmtcat(out, "ArrayTaskThrottle=%u ",
job_ptr->array_max_tasks);
}
} else if (job_ptr->het_job_id) {
xstrfmtcat(out, "HetJobId=%u HetJobOffset=%u ",
job_ptr->het_job_id, job_ptr->het_job_offset);
}
xstrfmtcat(out, "JobName=%s", job_ptr->name);
xstrcat(out, line_end);
/****** Line ******/
if (job_ptr->het_job_id_set) {
xstrfmtcat(out, "HetJobIdSet=%s", job_ptr->het_job_id_set);
xstrcat(out, line_end);
}
/****** Line ******/
user_name = uid_to_string((uid_t) job_ptr->user_id);
group_name = gid_to_string((gid_t) job_ptr->group_id);
xstrfmtcat(out, "UserId=%s(%u) GroupId=%s(%u) MCS_label=%s",
user_name, job_ptr->user_id, group_name, job_ptr->group_id,
(job_ptr->mcs_label==NULL) ? "N/A" : job_ptr->mcs_label);
xfree(user_name);
xfree(group_name);
xstrcat(out, line_end);
/****** Line 3 ******/
nice = ((int64_t)job_ptr->nice) - NICE_OFFSET;
xstrfmtcat(out, "Priority=%u Nice=%"PRIi64" Account=%s QOS=%s",
job_ptr->priority, nice, job_ptr->account, job_ptr->qos);
if (slurm_get_track_wckey())
xstrfmtcat(out, " WCKey=%s", job_ptr->wckey);
xstrcat(out, line_end);
/****** Line 4 ******/
xstrfmtcat(out, "JobState=%s ", job_state_string(job_ptr->job_state));
if (job_ptr->state_desc) {
/* Replace white space with underscore for easier parsing */
for (j=0; job_ptr->state_desc[j]; j++) {
if (isspace((int)job_ptr->state_desc[j]))
job_ptr->state_desc[j] = '_';
}
xstrfmtcat(out, "Reason=%s ", job_ptr->state_desc);
} else
xstrfmtcat(out, "Reason=%s ",
job_state_reason_string(job_ptr->state_reason));
if (job_ptr->failed_node)
xstrfmtcat(out, "FailedNode=%s ", job_ptr->failed_node);
xstrfmtcat(out, "Dependency=%s", job_ptr->dependency);
xstrcat(out, line_end);
/****** Line 5 ******/
xstrfmtcat(out, "Requeue=%u Restarts=%u BatchFlag=%u Reboot=%u ",
job_ptr->requeue, job_ptr->restart_cnt, job_ptr->batch_flag,
job_ptr->reboot);
exit_status = term_sig = 0;
if (WIFSIGNALED(job_ptr->exit_code))
term_sig = WTERMSIG(job_ptr->exit_code);
else if (WIFEXITED(job_ptr->exit_code))
exit_status = WEXITSTATUS(job_ptr->exit_code);
xstrfmtcat(out, "ExitCode=%u:%u", exit_status, term_sig);
xstrcat(out, line_end);
/****** Line 5a (optional) ******/
if (detail_flag) {
exit_status = term_sig = 0;
if (WIFSIGNALED(job_ptr->derived_ec))
term_sig = WTERMSIG(job_ptr->derived_ec);
else if (WIFEXITED(job_ptr->derived_ec))
exit_status = WEXITSTATUS(job_ptr->derived_ec);
xstrfmtcat(out, "DerivedExitCode=%u:%u", exit_status, term_sig);
xstrcat(out, line_end);
}
/****** Line 6 ******/
if (IS_JOB_PENDING(job_ptr) || !job_ptr->start_time)
run_time = 0;
else if (IS_JOB_SUSPENDED(job_ptr))
run_time = job_ptr->pre_sus_time;
else {
time_t end_time;
if (IS_JOB_RUNNING(job_ptr) || (job_ptr->end_time == 0))
end_time = time(NULL);
else
end_time = job_ptr->end_time;
if (job_ptr->suspend_time) {
run_time = (time_t)
(difftime(end_time, job_ptr->suspend_time)
+ job_ptr->pre_sus_time);
} else
run_time = (time_t)
difftime(end_time, job_ptr->start_time);
}
secs2time_str(run_time, time_str, sizeof(time_str));
xstrfmtcat(out, "RunTime=%s ", time_str);
if (job_ptr->time_limit == NO_VAL)
xstrcat(out, "TimeLimit=Partition_Limit ");
else {
mins2time_str(job_ptr->time_limit, time_str, sizeof(time_str));
xstrfmtcat(out, "TimeLimit=%s ", time_str);
}
if (job_ptr->time_min == 0)
xstrcat(out, "TimeMin=N/A");
else {
mins2time_str(job_ptr->time_min, time_str, sizeof(time_str));
xstrfmtcat(out, "TimeMin=%s", time_str);
}
xstrcat(out, line_end);
/****** Line 7 ******/
slurm_make_time_str(&job_ptr->submit_time, time_str, sizeof(time_str));
xstrfmtcat(out, "SubmitTime=%s ", time_str);
slurm_make_time_str(&job_ptr->eligible_time, time_str, sizeof(time_str));
xstrfmtcat(out, "EligibleTime=%s", time_str);
xstrcat(out, line_end);
/****** Line 7.5 ******/
slurm_make_time_str(&job_ptr->accrue_time, time_str, sizeof(time_str));
xstrfmtcat(out, "AccrueTime=%s", time_str);
xstrcat(out, line_end);
/****** Line 8 (optional) ******/
if (job_ptr->resize_time) {
slurm_make_time_str(&job_ptr->resize_time, time_str,
sizeof(time_str));
xstrfmtcat(out, "ResizeTime=%s", time_str);
xstrcat(out, line_end);
}
/****** Line 9 ******/
slurm_make_time_str(&job_ptr->start_time, time_str, sizeof(time_str));
xstrfmtcat(out, "StartTime=%s ", time_str);
if ((job_ptr->time_limit == INFINITE) &&
(job_ptr->end_time > time(NULL)))
xstrcat(out, "EndTime=Unknown ");
else {
slurm_make_time_str(&job_ptr->end_time, time_str,
sizeof(time_str));
xstrfmtcat(out, "EndTime=%s ", time_str);
}
if (job_ptr->deadline) {
slurm_make_time_str(&job_ptr->deadline, time_str,
sizeof(time_str));
xstrfmtcat(out, "Deadline=%s", time_str);
} else {
xstrcat(out, "Deadline=N/A");
}
xstrcat(out, line_end);
/****** Line ******/
if (job_ptr->bitflags & CRON_JOB || job_ptr->cronspec) {
if (job_ptr->bitflags & CRON_JOB)
xstrcat(out, "CronJob=Yes ");
xstrfmtcat(out, "CrontabSpec=\"%s\"", job_ptr->cronspec);
xstrcat(out, line_end);
}
/****** Line ******/
/*
* only print this line if preemption is enabled and job started
* see src/slurmctld/job_mgr.c:pack_job, 'preemptable'
*/
if (job_ptr->preemptable_time) {
slurm_make_time_str(&job_ptr->preemptable_time,
time_str, sizeof(time_str));
xstrfmtcat(out, "PreemptEligibleTime=%s ", time_str);
if (job_ptr->preempt_time == 0)
xstrcat(out, "PreemptTime=None");
else {
slurm_make_time_str(&job_ptr->preempt_time, time_str,
sizeof(time_str));
xstrfmtcat(out, "PreemptTime=%s", time_str);
}
xstrcat(out, line_end);
}
/****** Line 10 ******/
if (job_ptr->suspend_time) {
slurm_make_time_str(&job_ptr->suspend_time, time_str,
sizeof(time_str));
xstrfmtcat(out, "SuspendTime=%s ", time_str);
} else
xstrcat(out, "SuspendTime=None ");
xstrfmtcat(out, "SecsPreSuspend=%ld ", (long int)job_ptr->pre_sus_time);
slurm_make_time_str(&job_ptr->last_sched_eval, time_str,
sizeof(time_str));
xstrfmtcat(out, "LastSchedEval=%s Scheduler=%s%s", time_str,
job_ptr->bitflags & BACKFILL_SCHED ? "Backfill" : "Main",
job_ptr->bitflags & BACKFILL_LAST ? ":*" : "");
xstrcat(out, line_end);
/****** Line 11 ******/
xstrfmtcat(out, "Partition=%s AllocNode:Sid=%s:%u",
job_ptr->partition, job_ptr->alloc_node, job_ptr->alloc_sid);
xstrcat(out, line_end);
/****** Line 12 ******/
xstrfmtcat(out, "Req%s=%s Exc%s=%s", nodelist, job_ptr->req_nodes,
nodelist, job_ptr->exc_nodes);
xstrcat(out, line_end);
/****** Line 13 ******/
sorted_nodelist = slurm_sort_node_list_str(job_ptr->nodes);
xstrfmtcat(out, "%s=%s", nodelist, sorted_nodelist);
xfree(sorted_nodelist);
if (job_ptr->sched_nodes)
xstrfmtcat(out, " Sched%s=%s", nodelist, job_ptr->sched_nodes);
xstrcat(out, line_end);
/****** Line 14 (optional) ******/
is_new_line = true;
if (job_ptr->batch_features) {
snprintf(tmp_line, sizeof(tmp_line), "BatchFeatures=%s",
job_ptr->batch_features);
_add_to_line(&out, &is_new_line, tmp_line);
}
if (job_ptr->batch_host) {
snprintf(tmp_line, sizeof(tmp_line), "BatchHost=%s",
job_ptr->batch_host);
_add_to_line(&out, &is_new_line, tmp_line);
}
if (!is_new_line)
xstrcat(out, line_end);
/****** Line 14 (optional) ******/
if (job_ptr->bitflags & STEPMGR_ENABLED) {
xstrfmtcat(out, "StepMgrEnabled=Yes");
xstrcat(out, line_end);
}
/****** Line 14a (optional) ******/
if (job_ptr->fed_siblings_active || job_ptr->fed_siblings_viable) {
xstrfmtcat(out, "FedOrigin=%s FedViableSiblings=%s FedActiveSiblings=%s",
job_ptr->fed_origin_str,
job_ptr->fed_siblings_viable_str,
job_ptr->fed_siblings_active_str);
xstrcat(out, line_end);
}
/****** Line 15 ******/
if (IS_JOB_PENDING(job_ptr)) {
min_nodes = job_ptr->num_nodes;
max_nodes = job_ptr->max_nodes;
job_size_str = job_ptr->job_size_str;
if (max_nodes && (max_nodes < min_nodes))
min_nodes = max_nodes;
} else {
min_nodes = job_ptr->num_nodes;
max_nodes = 0;
}
if (job_size_str)
snprintf(tmp_line, sizeof(tmp_line), "%s", job_size_str);
else
_sprint_range(tmp_line, sizeof(tmp_line), min_nodes, max_nodes);
xstrfmtcat(out, "NumNodes=%s ", tmp_line);
_sprint_range(tmp_line, sizeof(tmp_line), job_ptr->num_cpus,
job_ptr->max_cpus);
xstrfmtcat(out, "NumCPUs=%s ", tmp_line);
if (job_ptr->num_tasks == NO_VAL)
xstrcat(out, "NumTasks=N/A ");
else
xstrfmtcat(out, "NumTasks=%u ", job_ptr->num_tasks);
if (job_ptr->cpus_per_task == NO_VAL16)
xstrfmtcat(out, "CPUs/Task=N/A ");
else
xstrfmtcat(out, "CPUs/Task=%u ", job_ptr->cpus_per_task);
if (job_ptr->boards_per_node == NO_VAL16)
xstrcat(out, "ReqB:S:C:T=*:");
else
xstrfmtcat(out, "ReqB:S:C:T=%u:", job_ptr->boards_per_node);
if (job_ptr->sockets_per_board == NO_VAL16)
xstrcat(out, "*:");
else
xstrfmtcat(out, "%u:", job_ptr->sockets_per_board);
if (job_ptr->cores_per_socket == NO_VAL16)
xstrcat(out, "*:");
else
xstrfmtcat(out, "%u:", job_ptr->cores_per_socket);
if (job_ptr->threads_per_core == NO_VAL16)
xstrcat(out, "*");
else
xstrfmtcat(out, "%u", job_ptr->threads_per_core);
xstrcat(out, line_end);
/****** Line 16 ******/
/* Tres should already of been converted at this point from simple */
xstrfmtcat(out, "ReqTRES=%s", job_ptr->tres_req_str);
xstrcat(out, line_end);
/****** Line ******/
xstrfmtcat(out, "AllocTRES=%s", job_ptr->tres_alloc_str);
xstrcat(out, line_end);
/****** Line 17 ******/
if (job_ptr->sockets_per_node == NO_VAL16)
xstrcat(out, "Socks/Node=* ");
else
xstrfmtcat(out, "Socks/Node=%u ", job_ptr->sockets_per_node);
if (job_ptr->ntasks_per_node == NO_VAL16)
xstrcat(out, "NtasksPerN:B:S:C=*:");
else
xstrfmtcat(out, "NtasksPerN:B:S:C=%u:",
job_ptr->ntasks_per_node);
if (job_ptr->ntasks_per_board == NO_VAL16)
xstrcat(out, "*:");
else
xstrfmtcat(out, "%u:", job_ptr->ntasks_per_board);
if ((job_ptr->ntasks_per_socket == NO_VAL16) ||
(job_ptr->ntasks_per_socket == INFINITE16))
xstrcat(out, "*:");
else
xstrfmtcat(out, "%u:", job_ptr->ntasks_per_socket);
if ((job_ptr->ntasks_per_core == NO_VAL16) ||
(job_ptr->ntasks_per_core == INFINITE16))
xstrcat(out, "* ");
else
xstrfmtcat(out, "%u ", job_ptr->ntasks_per_core);
if (job_ptr->core_spec == NO_VAL16)
xstrcat(out, "CoreSpec=*");
else if (job_ptr->core_spec & CORE_SPEC_THREAD)
xstrfmtcat(out, "ThreadSpec=%d",
(job_ptr->core_spec & (~CORE_SPEC_THREAD)));
else
xstrfmtcat(out, "CoreSpec=%u", job_ptr->core_spec);
xstrcat(out, line_end);
if (job_resrcs && job_resrcs->core_bitmap &&
((last = bit_fls(job_resrcs->core_bitmap)) != -1)) {
xstrfmtcat(out, "JOB_GRES=%s", job_ptr->gres_total);
xstrcat(out, line_end);
hl = hostlist_create(job_resrcs->nodes);
if (!hl) {
error("%s: hostlist_create: %s",
__func__, job_resrcs->nodes);
return NULL;
}
hl_last = hostlist_create(NULL);
if (!hl_last) {
error("%s: hostlist_create: NULL", __func__);
hostlist_destroy(hl);
return NULL;
}
bit_inx = 0;
i = sock_inx = sock_reps = 0;
abs_node_inx = job_ptr->node_inx[i];
gres_last = "";
/* tmp1[] stores the current cpu(s) allocated */
tmp2[0] = '\0'; /* stores last cpu(s) allocated */
for (rel_node_inx=0; rel_node_inx < job_resrcs->nhosts;
rel_node_inx++) {
if (sock_reps >=
job_resrcs->sock_core_rep_count[sock_inx]) {
sock_inx++;
sock_reps = 0;
}
sock_reps++;
bit_reps = job_resrcs->sockets_per_node[sock_inx] *
job_resrcs->cores_per_socket[sock_inx];
host = hostlist_shift(hl);
threads = _threads_per_core(host);
cpu_bitmap = bit_alloc(bit_reps * threads);
for (j = 0; j < bit_reps; j++) {
if (bit_test(job_resrcs->core_bitmap, bit_inx)){
for (k = 0; k < threads; k++)
bit_set(cpu_bitmap,
(j * threads) + k);
}
bit_inx++;
}
bit_fmt(tmp1, sizeof(tmp1), cpu_bitmap);
FREE_NULL_BITMAP(cpu_bitmap);
/*
* If the allocation values for this host are not the
* same as the last host, print the report of the last
* group of hosts that had identical allocation values.
*/
if (xstrcmp(tmp1, tmp2) ||
((rel_node_inx < job_ptr->gres_detail_cnt) &&
xstrcmp(job_ptr->gres_detail_str[rel_node_inx],
gres_last)) ||
(last_mem_alloc_ptr !=
job_resrcs->memory_allocated) ||
(job_resrcs->memory_allocated &&
(last_mem_alloc !=
job_resrcs->memory_allocated[rel_node_inx]))) {
if (hostlist_count(hl_last)) {
last_hosts =
hostlist_ranged_string_xmalloc(
hl_last);
xstrfmtcat(out,
"%sNodes=%s CPU_IDs=%s "
"Mem=%"PRIu64" GRES=%s",
indent, last_hosts, tmp2,
last_mem_alloc_ptr ?
last_mem_alloc : 0,
gres_last);
xfree(last_hosts);
xstrcat(out, line_end);
hostlist_destroy(hl_last);
hl_last = hostlist_create(NULL);
}
strcpy(tmp2, tmp1);
if (rel_node_inx < job_ptr->gres_detail_cnt) {
gres_last = job_ptr->
gres_detail_str[rel_node_inx];
} else {
gres_last = "";
}
last_mem_alloc_ptr =
job_resrcs->memory_allocated;
if (last_mem_alloc_ptr)
last_mem_alloc = job_resrcs->
memory_allocated[rel_node_inx];
else
last_mem_alloc = NO_VAL64;
}
hostlist_push_host(hl_last, host);
free(host);
if (bit_inx > last)
break;
if (abs_node_inx > job_ptr->node_inx[i+1]) {
i += 2;
abs_node_inx = job_ptr->node_inx[i];
} else {
abs_node_inx++;
}
}
if (hostlist_count(hl_last)) {
last_hosts = hostlist_ranged_string_xmalloc(hl_last);
xstrfmtcat(out, "%sNodes=%s CPU_IDs=%s Mem=%"PRIu64" GRES=%s",
indent, last_hosts, tmp2,
last_mem_alloc_ptr ? last_mem_alloc : 0,
gres_last);
xfree(last_hosts);
xstrcat(out, line_end);
}
hostlist_destroy(hl);
hostlist_destroy(hl_last);
}
/****** Line 18 ******/
/*
* If there is a mem_per_tres job->pn_min_memory will not be
* set, let's figure it from the first tres there.
*/
if (job_ptr->mem_per_tres) {
tmp6_ptr = "TRES";
} else if (job_ptr->pn_min_memory & MEM_PER_CPU) {
job_ptr->pn_min_memory &= (~MEM_PER_CPU);
tmp6_ptr = "CPU";
} else
tmp6_ptr = "Node";
xstrfmtcat(out, "MinCPUsNode=%u ", job_ptr->pn_min_cpus);
convert_num_unit((float)job_ptr->pn_min_memory, tmp1, sizeof(tmp1),
UNIT_MEGA, NO_VAL, CONVERT_NUM_UNIT_EXACT);
convert_num_unit((float)job_ptr->pn_min_tmp_disk, tmp2, sizeof(tmp2),
UNIT_MEGA, NO_VAL, CONVERT_NUM_UNIT_EXACT);
xstrfmtcat(out, "MinMemory%s=%s MinTmpDiskNode=%s",
tmp6_ptr, tmp1, tmp2);
xstrcat(out, line_end);
/****** Line ******/
secs2time_str((time_t)job_ptr->delay_boot, tmp1, sizeof(tmp1));
xstrfmtcat(out, "Features=%s DelayBoot=%s", job_ptr->features, tmp1);
xstrcat(out, line_end);
/****** Line (optional) ******/
if (job_ptr->cluster_features) {
xstrfmtcat(out, "ClusterFeatures=%s",
job_ptr->cluster_features);
xstrcat(out, line_end);
}
/****** Line (optional) ******/
if (job_ptr->prefer) {
xstrfmtcat(out, "Prefer=%s", job_ptr->prefer);
xstrcat(out, line_end);
}
/****** Line (optional) ******/
if (job_ptr->resv_name) {
xstrfmtcat(out, "Reservation=%s", job_ptr->resv_name);
xstrcat(out, line_end);
}
/****** Line 20 ******/
xstrfmtcat(out, "OverSubscribe=%s Contiguous=%d Licenses=%s LicensesAlloc=%s Network=%s",
job_share_string(job_ptr->shared), job_ptr->contiguous,
job_ptr->licenses, job_ptr->licenses_allocated,
job_ptr->network);
xstrcat(out, line_end);
/****** Line 21 ******/
xstrfmtcat(out, "Command=%s", job_ptr->command);
xstrcat(out, line_end);
/****** Line 22 ******/
xstrfmtcat(out, "WorkDir=%s", job_ptr->work_dir);
xstrcat(out, line_end);
/****** Line (optional) ******/
if (job_ptr->admin_comment) {
xstrfmtcat(out, "AdminComment=%s", job_ptr->admin_comment);
xstrcat(out, line_end);
}
/****** Line (optional) ******/
if (job_ptr->system_comment) {
xstrfmtcat(out, "SystemComment=%s", job_ptr->system_comment);
xstrcat(out, line_end);
}
/****** Line (optional) ******/
if (job_ptr->comment) {
xstrfmtcat(out, "Comment=%s", job_ptr->comment);
xstrcat(out, line_end);
}
/****** Line (optional) ******/
if (job_ptr->extra) {
xstrfmtcat(out, "Extra=%s", job_ptr->extra);
xstrcat(out, line_end);
}
/****** Line 30 (optional) ******/
if (job_ptr->batch_flag) {
slurm_get_job_stderr(tmp_path, sizeof(tmp_path), job_ptr);
xstrfmtcat(out, "StdErr=%s", tmp_path);
xstrcat(out, line_end);
}
/****** Line 31 (optional) ******/
if (job_ptr->batch_flag) {
slurm_get_job_stdin(tmp_path, sizeof(tmp_path), job_ptr);
xstrfmtcat(out, "StdIn=%s", tmp_path);
xstrcat(out, line_end);
}
/****** Line 32 (optional) ******/
if (job_ptr->batch_flag) {
slurm_get_job_stdout(tmp_path, sizeof(tmp_path), job_ptr);
xstrfmtcat(out, "StdOut=%s", tmp_path);
xstrcat(out, line_end);
}
/****** Line (optional) ******/
if (job_ptr->segment_size) {
xstrfmtcat(out, "SegmentSize=%u", job_ptr->segment_size);
xstrcat(out, line_end);
}
/****** Line 34 (optional) ******/
if (job_ptr->req_switch) {
char time_buf[32];
secs2time_str((time_t) job_ptr->wait4switch, time_buf,
sizeof(time_buf));
xstrfmtcat(out, "Switches=%u@%s",
job_ptr->req_switch, time_buf);
xstrcat(out, line_end);
}
/****** Line 35 (optional) ******/
if (job_ptr->burst_buffer) {
xstrfmtcat(out, "BurstBuffer=%s", job_ptr->burst_buffer);
xstrcat(out, line_end);
}
/****** Line (optional) ******/
if (job_ptr->burst_buffer_state) {
xstrfmtcat(out, "BurstBufferState=%s",
job_ptr->burst_buffer_state);
xstrcat(out, line_end);
}
/****** Line 36 (optional) ******/
if (cpu_freq_debug(NULL, NULL, tmp1, sizeof(tmp1),
job_ptr->cpu_freq_gov, job_ptr->cpu_freq_min,
job_ptr->cpu_freq_max, NO_VAL) != 0) {
xstrcat(out, tmp1);
xstrcat(out, line_end);
}
/****** Line 38 (optional) ******/
is_new_line = true;
if (job_ptr->bitflags & GRES_ALLOW_TASK_SHARING)
_add_to_line(&out, &is_new_line, "GresAllowTaskSharing=Yes");
if (job_ptr->bitflags & GRES_DISABLE_BIND)
_add_to_line(&out, &is_new_line, "GresEnforceBind=No");
if (job_ptr->bitflags & GRES_ENFORCE_BIND)
_add_to_line(&out, &is_new_line, "GresEnforceBind=Yes");
if (job_ptr->bitflags & GRES_MULT_TASKS_PER_SHARING)
_add_to_line(&out, &is_new_line, "GresOneTaskPerSharing=No");
if (job_ptr->bitflags & GRES_ONE_TASK_PER_SHARING)
_add_to_line(&out, &is_new_line, "GresOneTaskPerSharing=Yes");
if (job_ptr->bitflags & KILL_INV_DEP)
_add_to_line(&out, &is_new_line, "KillOnInvalidDependent=Yes");
if (job_ptr->bitflags & NO_KILL_INV_DEP)
_add_to_line(&out, &is_new_line, "KillOnInvalidDependent=No");
if (job_ptr->bitflags & SPREAD_JOB)
_add_to_line(&out, &is_new_line, "SpreadJob=Yes");
if (!is_new_line)
xstrcat(out, line_end);
/****** Line (optional) ******/
if (job_ptr->oom_kill_step != NO_VAL16) {
xstrfmtcat(out, "OOMKillStep=%u", job_ptr->oom_kill_step);
xstrcat(out, line_end);
}
/****** Line (optional) ******/
if (job_ptr->cpus_per_tres) {
xstrfmtcat(out, "CpusPerTres=%s", job_ptr->cpus_per_tres);
xstrcat(out, line_end);
}
/****** Line (optional) ******/
if (job_ptr->mem_per_tres) {
xstrfmtcat(out, "MemPerTres=%s", job_ptr->mem_per_tres);
xstrcat(out, line_end);
}
/****** Line (optional) ******/
if (job_ptr->tres_bind) {
xstrfmtcat(out, "TresBind=%s", job_ptr->tres_bind);
xstrcat(out, line_end);
}
/****** Line (optional) ******/
if (job_ptr->tres_freq) {
xstrfmtcat(out, "TresFreq=%s", job_ptr->tres_freq);
xstrcat(out, line_end);
}
/****** Line (optional) ******/
if (job_ptr->tres_per_job) {
xstrfmtcat(out, "TresPerJob=%s", job_ptr->tres_per_job);
xstrcat(out, line_end);
}
/****** Line (optional) ******/
if (job_ptr->tres_per_node) {
xstrfmtcat(out, "TresPerNode=%s", job_ptr->tres_per_node);
xstrcat(out, line_end);
}
/****** Line (optional) ******/
if (job_ptr->tres_per_socket) {
xstrfmtcat(out, "TresPerSocket=%s", job_ptr->tres_per_socket);
xstrcat(out, line_end);
}
/****** Line (optional) ******/
if (job_ptr->tres_per_task) {
xstrfmtcat(out, "TresPerTask=%s", job_ptr->tres_per_task);
xstrcat(out, line_end);
}
/****** Line (optional) ******/
if (job_ptr->mail_type && job_ptr->mail_user) {
xstrfmtcat(out, "MailUser=%s MailType=%s",
job_ptr->mail_user,
print_mail_type(job_ptr->mail_type));
xstrcat(out, line_end);
}
/****** Line (optional) ******/
if ((job_ptr->ntasks_per_tres) &&
(job_ptr->ntasks_per_tres != NO_VAL16) &&
(job_ptr->ntasks_per_tres != INFINITE16)) {
xstrfmtcat(out, "NtasksPerTRES=%u", job_ptr->ntasks_per_tres);
xstrcat(out, line_end);
}
/****** Line (optional) ******/
if (job_ptr->container || job_ptr->container_id) {
xstrfmtcat(out, "Container=%s ContainerID=%s",
job_ptr->container, job_ptr->container_id);
xstrcat(out, line_end);
}
/****** Line (optional) ******/
if (job_ptr->selinux_context) {
xstrfmtcat(out, "SELinuxContext=%s", job_ptr->selinux_context);
xstrcat(out, line_end);
}
/****** Line (optional) ******/
if (job_ptr->resv_ports) {
xstrfmtcat(out, "ResvPorts=%s", job_ptr->resv_ports);
xstrcat(out, line_end);
}
xstrtrim(out);
/****** END OF JOB RECORD ******/
if (one_liner)
xstrcat(out, "\n");
else
xstrcat(out, "\n\n");
return out;
}
/*
* _print_job_info - output information about a specific Slurm
* job based upon message as loaded using slurm_load_jobs
* IN out - file to write to
* IN job_ptr - an individual job information record pointer
*/
static void _print_job_info(FILE *out, job_info_t *job_ptr)
{
char *print_this;
if ((print_this = _sprint_job_info(job_ptr))) {
fprintf(out, "%s", print_this);
xfree(print_this);
}
}
/* Load current job table information into *job_buffer_pptr */
extern int
scontrol_load_job(job_info_msg_t ** job_buffer_pptr, uint32_t job_id)
{
int error_code;
static uint16_t last_show_flags = 0xffff;
uint16_t show_flags = 0;
job_info_msg_t * job_info_ptr = NULL;
if (all_flag)
show_flags |= SHOW_ALL;
if (detail_flag)
show_flags |= SHOW_DETAIL;
if (federation_flag)
show_flags |= SHOW_FEDERATION;
if (local_flag)
show_flags |= SHOW_LOCAL;
if (sibling_flag)
show_flags |= SHOW_FEDERATION | SHOW_SIBLING;
if (old_job_info_ptr) {
if (last_show_flags != show_flags)
old_job_info_ptr->last_update = (time_t) 0;
if (job_id) {
error_code = slurm_load_job(&job_info_ptr, job_id,
show_flags);
} else {
error_code = slurm_load_jobs(
old_job_info_ptr->last_update,
&job_info_ptr, show_flags);
}
if (error_code == SLURM_SUCCESS)
slurm_free_job_info_msg (old_job_info_ptr);
else if (errno == SLURM_NO_CHANGE_IN_DATA) {
job_info_ptr = old_job_info_ptr;
error_code = SLURM_SUCCESS;
if (quiet_flag == -1)
printf ("slurm_load_jobs no change in data\n");
}
} else if (job_id) {
error_code = slurm_load_job(&job_info_ptr, job_id, show_flags);
} else {
error_code = slurm_load_jobs((time_t) NULL, &job_info_ptr,
show_flags);
}
if (error_code == SLURM_SUCCESS) {
old_job_info_ptr = job_info_ptr;
if (job_id)
old_job_info_ptr->last_update = (time_t) 0;
last_show_flags = show_flags;
*job_buffer_pptr = job_info_ptr;
}
return error_code;
}
/*
* scontrol_pid_info - given a local process id, print the corresponding
* slurm job id and its expected end time
* IN job_pid - the local process id of interest
*/
extern void
scontrol_pid_info(pid_t job_pid)
{
int error_code;
uint32_t job_id = 0;
time_t end_time;
long rem_time;
error_code = slurm_pid2jobid(job_pid, &job_id);
if (error_code) {
exit_code = 1;
if (quiet_flag != 1)
fprintf(stderr,
"Failed to locate job for requested pid\n");
return;
}
error_code = slurm_get_end_time(job_id, &end_time);
if (error_code) {
exit_code = 1;
if (quiet_flag != 1)
slurm_perror("Failed to get job end time");
return;
}
printf("Slurm JobId=%u ends at %s\n", job_id, slurm_ctime2(&end_time));
rem_time = slurm_get_rem_time(job_id);
printf("Job remaining time is %ld seconds\n", rem_time);
return;
}
/*
* scontrol_print_completing - print jobs in completing state and
* associated nodes in COMPLETING or DOWN state
*/
extern void
scontrol_print_completing (void)
{
int error_code, i;
job_info_msg_t *job_info_msg;
job_info_t *job_info;
node_info_msg_t *node_info_msg;
error_code = scontrol_load_job (&job_info_msg, 0);
if (error_code) {
exit_code = 1;
if (quiet_flag != 1)
slurm_perror ("slurm_load_jobs error");
return;
}
if (!(node_info_msg = _get_node_info_for_jobs()))
return;
/* Scan the jobs for completing state */
job_info = job_info_msg->job_array;
for (i = 0; i < job_info_msg->record_count; i++) {
if (job_info[i].job_state & JOB_COMPLETING)
scontrol_print_completing_job(&job_info[i],
node_info_msg);
}
}
extern void
scontrol_print_completing_job(job_info_t *job_ptr,
node_info_msg_t *node_info_msg)
{
int i, c_offset = 0;
node_info_t *node_info;
hostlist_t *comp_nodes, *down_nodes;
char *node_buf;
char time_str[256];
time_t completing_time = 0;
comp_nodes = hostlist_create(NULL);
down_nodes = hostlist_create(NULL);
if (job_ptr->cluster && federation_flag && !local_flag)
c_offset = get_cluster_node_offset(job_ptr->cluster,
node_info_msg);
for (i = 0; job_ptr->node_inx[i] != -1; i+=2) {
int j = job_ptr->node_inx[i];
for (; j <= job_ptr->node_inx[i+1]; j++) {
int node_inx = j + c_offset;
if (node_inx >= node_info_msg->record_count)
break;
node_info = &(node_info_msg->node_array[node_inx]);
if (IS_NODE_COMPLETING(node_info))
hostlist_push_host(comp_nodes, node_info->name);
else if (IS_NODE_DOWN(node_info))
hostlist_push_host(down_nodes, node_info->name);
}
}
fprintf(stdout, "JobId=%u ", job_ptr->job_id);
slurm_make_time_str(&job_ptr->end_time, time_str, sizeof(time_str));
fprintf(stdout, "EndTime=%s ", time_str);
completing_time = time(NULL) - job_ptr->end_time;
secs2time_str(completing_time, time_str, sizeof(time_str));
fprintf(stdout, "CompletingTime=%s ", time_str);
/* Sort the hostlists */
hostlist_sort(comp_nodes);
hostlist_sort(down_nodes);
node_buf = hostlist_ranged_string_xmalloc(comp_nodes);
if (node_buf && node_buf[0])
fprintf(stdout, "Nodes(COMPLETING)=%s ", node_buf);
xfree(node_buf);
node_buf = hostlist_ranged_string_xmalloc(down_nodes);
if (node_buf && node_buf[0])
fprintf(stdout, "Nodes(DOWN)=%s ", node_buf);
xfree(node_buf);
fprintf(stdout, "\n");
hostlist_destroy(comp_nodes);
hostlist_destroy(down_nodes);
}
static bool _het_job_offset_match(job_info_t *job_ptr, uint32_t het_job_offset)
{
if ((het_job_offset == NO_VAL) ||
(het_job_offset == job_ptr->het_job_offset))
return true;
return false;
}
static bool _task_id_in_job(job_info_t *job_ptr, uint32_t array_id)
{
uint32_t array_len;
if ((array_id == NO_VAL) ||
(array_id == job_ptr->array_task_id))
return true;
if (!job_ptr->array_bitmap)
return false;
array_len = bit_size(job_ptr->array_bitmap);
if (array_id >= array_len)
return false;
if (bit_test(job_ptr->array_bitmap, array_id))
return true;
return false;
}
/*
* scontrol_print_job - print the specified job's information
* IN job_id - job's id or NULL to print information about all jobs
*/
extern void scontrol_print_job(char *job_id_str, int argc, char **argv)
{
int error_code = SLURM_SUCCESS, i, print_cnt = 0;
uint32_t job_id = 0;
uint32_t array_id = NO_VAL, het_job_offset = NO_VAL;
job_info_msg_t * job_buffer_ptr = NULL;
job_info_t *job_ptr = NULL;
char *end_ptr = NULL;
if (job_id_str) {
char *tmp_job_ptr = job_id_str;
/*
* Check that the input is a valid job id (i.e. 123 or 123_456).
*/
while (*tmp_job_ptr) {
if (!isdigit(*tmp_job_ptr) &&
(*tmp_job_ptr != '_') && (*tmp_job_ptr != '+')) {
exit_code = 1;
errno = ESLURM_INVALID_JOB_ID;
if (quiet_flag != 1)
slurm_perror("scontrol_print_job error");
return;
}
++tmp_job_ptr;
}
job_id = (uint32_t) strtol (job_id_str, &end_ptr, 10);
if (end_ptr[0] == '_')
array_id = strtol(end_ptr + 1, &end_ptr, 10);
if (end_ptr[0] == '+')
het_job_offset = strtol(end_ptr + 1, &end_ptr, 10);
}
error_code = scontrol_load_job(&job_buffer_ptr, job_id);
if (mime_type) {
openapi_resp_job_info_msg_t resp = {
.jobs = job_buffer_ptr,
};
if (job_buffer_ptr) {
resp.last_update = job_buffer_ptr->last_update;
resp.last_backfill = job_buffer_ptr->last_backfill;
}
DATA_DUMP_CLI(OPENAPI_JOB_INFO_RESP, resp, argc, argv, NULL,
mime_type, data_parser, error_code);
if (error_code)
exit_code = 1;
return;
}
if (error_code) {
exit_code = 1;
if (quiet_flag != 1)
slurm_perror("slurm_load_jobs error");
return;
}
if (quiet_flag == -1) {
char time_str[256];
slurm_make_time_str ((time_t *)&job_buffer_ptr->last_update,
time_str, sizeof(time_str));
printf ("last_update_time=%s, records=%d\n",
time_str, job_buffer_ptr->record_count);
}
for (i = 0, job_ptr = job_buffer_ptr->job_array;
i < job_buffer_ptr->record_count; i++, job_ptr++) {
char *save_array_str = NULL;
uint32_t save_task_id = 0;
if (!_het_job_offset_match(job_ptr, het_job_offset))
continue;
if (!_task_id_in_job(job_ptr, array_id))
continue;
if ((array_id != NO_VAL) && job_ptr->array_task_str) {
save_array_str = job_ptr->array_task_str;
job_ptr->array_task_str = NULL;
save_task_id = job_ptr->array_task_id;
job_ptr->array_task_id = array_id;
}
_print_job_info(stdout, job_ptr);
if (save_array_str) {
job_ptr->array_task_str = save_array_str;
job_ptr->array_task_id = save_task_id;
}
print_cnt++;
}
if (print_cnt == 0) {
if (job_id_str) {
exit_code = 1;
if (quiet_flag != 1) {
if (array_id != NO_VAL) {
printf("Job %u_%u not found\n",
job_id, array_id);
} else if (het_job_offset != NO_VAL) {
printf("Job %u+%u not found\n",
job_id, het_job_offset);
} else {
printf("Job %u not found\n", job_id);
}
}
} else if (quiet_flag != 1)
printf ("No jobs in the system\n");
}
}
/*
* scontrol_print_step - print the specified job step's information
* IN job_step_id_str - job step's id or NULL to print information
* about all job steps
*/
extern void scontrol_print_step(char *job_step_id_str, int argc, char **argv)
{
int error_code = 0, print_cnt = 0;
slurm_step_id_t step_id = {
.job_id = NO_VAL,
.step_het_comp = NO_VAL,
.step_id = NO_VAL,
};
uint32_t array_id = NO_VAL;
job_step_info_response_msg_t *job_step_info_ptr = NULL;
static uint32_t last_job_id = 0, last_array_id, last_step_id = 0;
static job_step_info_response_msg_t *old_job_step_info_ptr = NULL;
static uint16_t last_show_flags = 0xffff;
uint16_t show_flags = 0;
job_step_info_t **steps = NULL;
if (!job_step_id_str) {
/* do nothing */
} else if (!xstrncasecmp(job_step_id_str, CONTAINER_ID_TAG,
strlen(CONTAINER_ID_TAG))) {
uid_t uid = SLURM_AUTH_NOBODY;
list_t *step_list = list_create((ListDelF) slurm_free_step_id);
char *cid = job_step_id_str + strlen(CONTAINER_ID_TAG);
error_code = slurm_find_step_ids_by_container_id(
SHOW_ALL, uid, cid, step_list);
if (error_code || list_is_empty(step_list)) {
step_id.job_id = 0;
} else {
/* just clone out the first step id details */
step_id = *(slurm_step_id_t *) list_peek(step_list);
job_step_id_str = NULL;
}
FREE_NULL_LIST(step_list);
} else {
slurm_selected_step_t id = {0};
if (!(error_code = unfmt_job_id_string(job_step_id_str, &id,
NO_VAL))) {
if (id.array_task_id != NO_VAL)
array_id = id.array_task_id;
step_id = id.step_id;
}
}
if (all_flag)
show_flags |= SHOW_ALL;
if (local_flag)
show_flags |= SHOW_LOCAL;
if (!step_id.job_id || error_code) {
/* step lookup failed already - skip trying again */
} else if ((old_job_step_info_ptr) && (last_job_id == step_id.job_id) &&
(last_array_id == array_id) && (last_step_id == step_id.step_id)) {
if (last_show_flags != show_flags)
old_job_step_info_ptr->last_update = (time_t) 0;
error_code = slurm_get_job_steps(
old_job_step_info_ptr->last_update,
step_id.job_id, step_id.step_id, &job_step_info_ptr,
show_flags);
if (error_code == SLURM_SUCCESS)
slurm_free_job_step_info_response_msg (
old_job_step_info_ptr);
else if (errno == SLURM_NO_CHANGE_IN_DATA) {
job_step_info_ptr = old_job_step_info_ptr;
error_code = SLURM_SUCCESS;
if (quiet_flag == -1)
printf("slurm_get_job_steps no change in data\n");
} else {
error_code = errno;
}
} else {
if (old_job_step_info_ptr) {
slurm_free_job_step_info_response_msg (
old_job_step_info_ptr);
old_job_step_info_ptr = NULL;
}
error_code = slurm_get_job_steps ( (time_t) 0, step_id.job_id,
step_id.step_id,
&job_step_info_ptr,
show_flags);
if ((error_code == SLURM_ERROR) && errno)
error_code = errno;
}
if (error_code || !job_step_info_ptr) {
if (mime_type) {
openapi_resp_job_step_info_msg_t resp = {
.steps = job_step_info_ptr,
};
if (job_step_info_ptr)
resp.last_update =
job_step_info_ptr->last_update;
DATA_DUMP_CLI(OPENAPI_STEP_INFO_MSG, resp, argc, argv,
NULL, mime_type, data_parser, error_code);
if (error_code)
exit_code = 1;
return;
}
exit_code = 1;
if (quiet_flag != 1) {
if (!step_id.job_id)
printf("No job steps found\n");
else
error("%s: slurm_get_job_steps(%s) failed: %s",
__func__, job_step_id_str,
slurm_strerror(error_code));
}
return;
}
old_job_step_info_ptr = job_step_info_ptr;
last_show_flags = show_flags;
last_job_id = step_id.job_id;
last_step_id = step_id.step_id;
if (!mime_type && (quiet_flag == -1)) {
char time_str[256];
slurm_make_time_str ((time_t *)&job_step_info_ptr->last_update,
time_str, sizeof(time_str));
printf ("last_update_time=%s, records=%d\n",
time_str, job_step_info_ptr->job_step_count);
}
if (job_step_info_ptr->job_step_count) {
int s = 0;
steps = xcalloc(job_step_info_ptr->job_step_count + 1,
sizeof(*steps));
for (int i = 0; i < job_step_info_ptr->job_step_count; i++) {
job_step_info_t *step =
&job_step_info_ptr->job_steps[i];
if ((array_id != NO_VAL) &&
(array_id != step->array_task_id))
continue;
steps[s] = step;
s++;
}
}
if (mime_type) {
openapi_resp_job_step_info_msg_t resp = {
.steps = job_step_info_ptr,
};
if (job_step_info_ptr)
resp.last_update = job_step_info_ptr->last_update;
DATA_DUMP_CLI(OPENAPI_STEP_INFO_MSG, resp, argc, argv, NULL,
mime_type, data_parser, error_code);
} else if (steps) {
int i = 0;
for (; steps[i]; i++)
slurm_print_job_step_info(stdout, steps[i], one_liner);
print_cnt = i;
}
if (!mime_type && !print_cnt) {
if (job_step_id_str) {
exit_code = 1;
if (quiet_flag != 1) {
char tmp_char[45];
log_build_step_id_str(&step_id, tmp_char,
sizeof(tmp_char),
(STEP_ID_FLAG_NO_PREFIX |
STEP_ID_FLAG_NO_JOB));
if (array_id == NO_VAL) {
printf("Job step %u.%s not found\n",
step_id.job_id, tmp_char);
} else {
printf("Job step %u_%u.%s not found\n",
step_id.job_id, array_id,
tmp_char);
}
}
} else if (quiet_flag != 1)
printf ("No job steps in the system\n");
}
xfree(steps);
}
static int _add_to_listjobs_list(void *x, void *arg)
{
step_loc_t *step_loc = x;
slurm_step_id_t step_id = step_loc->step_id;
listjobs_info_t *listjobs_info;
uint32_t *job_id;
add_to_listjobs_list_args_t *args = arg;
list_t *listjobs_list = args->listjobs_list;
list_t *jobs_seen = args->jobs_seen;
/* Don't add duplicate job ids to the list */
if (list_find_first(jobs_seen, slurm_find_uint32_in_list,
&step_id.job_id))
return 0;
job_id = xmalloc(sizeof(*job_id));
*job_id = step_id.job_id;
list_append(jobs_seen, job_id);
listjobs_info = xmalloc(sizeof(*listjobs_info));
listjobs_info->job_id = step_id.job_id;
list_append(listjobs_list, listjobs_info);
return 0;
}
static int _print_listjobs_info(void *x, void *arg)
{
uint32_t *job_id = x;
printf("%-8d\n", *job_id);
return 0;
}
static void _dump_listjobs(list_t *listjobs_list, int argc, char **argv)
{
int rc;
openapi_resp_listjobs_info_t resp = {
.listjobs_list = listjobs_list,
};
DATA_DUMP_CLI(OPENAPI_LISTJOBS_INFO_RESP, resp, argc, argv, NULL,
mime_type, data_parser, rc);
if (rc != SLURM_SUCCESS)
exit_code = 1;
}
/*
* scontrol_list_jobs - Print jobs on node.
*
* IN node_name - query this node for any jobs
*/
extern void scontrol_list_jobs(int argc, char **argv)
{
char *node_name = NULL;
list_t *steps = NULL;
list_t *listjobs_list = NULL;
list_t *jobs_seen = NULL;
add_to_listjobs_list_args_t for_each_args = { 0 };
if (argc)
node_name = argv[1];
steps = stepd_available(NULL, node_name);
if (!steps || !list_count(steps)) {
if (mime_type)
_dump_listjobs(NULL, argc, argv);
else {
fprintf(stderr, "No slurmstepd's found on this node\n");
exit_code = 1;
}
goto cleanup;
}
listjobs_list = list_create(xfree_ptr);
jobs_seen = list_create(xfree_ptr);
for_each_args.listjobs_list = listjobs_list;
for_each_args.jobs_seen = jobs_seen;
list_for_each(steps, _add_to_listjobs_list, &for_each_args);
if (mime_type) {
_dump_listjobs(listjobs_list, argc, argv);
goto cleanup;
}
printf("JOBID\n");
list_for_each(listjobs_list, _print_listjobs_info, NULL);
cleanup:
FREE_NULL_LIST(listjobs_list);
FREE_NULL_LIST(jobs_seen);
FREE_NULL_LIST(steps);
}
/* Return 1 on success, 0 on failure to find a jobid in the string */
static int _parse_jobid(const char *jobid_str, uint32_t *out_jobid)
{
char *ptr, *job;
long jobid;
job = xstrdup(jobid_str);
ptr = xstrchr(job, '.');
if (ptr != NULL) {
*ptr = '\0';
}
jobid = strtol(job, &ptr, 10);
if (!xstring_is_whitespace(ptr)) {
fprintf(stderr, "\"%s\" does not look like a jobid\n", job);
xfree(job);
return 0;
}
*out_jobid = (uint32_t) jobid;
xfree(job);
return 1;
}
/* Return 1 on success, 0 on failure to find a stepid in the string */
static int _parse_stepid(const char *jobid_str, slurm_step_id_t *step_id)
{
char *ptr, *job, *step;
int rc = 1;
job = xstrdup(jobid_str);
ptr = xstrchr(job, '.');
if (ptr == NULL) {
/* did not find a period, so no step ID in this string */
xfree(job);
return rc;
} else {
step = ptr + 1;
}
step_id->step_id = (uint32_t)strtol(step, &ptr, 10);
step = xstrchr(ptr, '+');
if (step) {
/* het step */
step++;
step_id->step_het_comp = (uint32_t)strtol(step, &ptr, 10);
} else
step_id->step_het_comp = NO_VAL;
if (!xstring_is_whitespace(ptr)) {
fprintf(stderr, "\"%s\" does not look like a stepid\n",
jobid_str);
rc = 0;
}
xfree(job);
return rc;
}
static bool
_in_task_array(pid_t pid, slurmstepd_task_info_t *task_array,
uint32_t task_array_count)
{
int i;
for (i = 0; i < task_array_count; i++) {
if (pid == task_array[i].pid)
return true;
}
return false;
}
static void _list_pids_one_step(const char *node_name, slurm_step_id_t *step_id,
list_t *listpids_list)
{
int fd;
slurmstepd_task_info_t *task_info = NULL;
uint32_t *pids = NULL;
uint32_t count = 0;
uint32_t tcount = 0;
int i;
uint16_t protocol_version;
char tmp_char[64];
fd = stepd_connect(NULL, node_name, step_id, &protocol_version);
if (fd == -1) {
exit_code = 1;
if (errno == ENOENT) {
fprintf(stderr,
"%s does not exist on this node.\n",
log_build_step_id_str(step_id, tmp_char,
sizeof(tmp_char),
STEP_ID_FLAG_NONE));
exit_code = 1;
} else {
perror("Unable to connect to slurmstepd");
}
return;
}
log_build_step_id_str(step_id, tmp_char, sizeof(tmp_char),
STEP_ID_FLAG_NO_JOB | STEP_ID_FLAG_NO_PREFIX);
/* Get all task pids */
stepd_task_info(fd, protocol_version, &task_info, &tcount);
for (i = 0; i < (int)tcount; i++) {
if (task_info[i].exited)
continue;
listpids_info_t *listpids_info = xmalloc(
sizeof(*listpids_info));
listpids_info->global_task_id = task_info[i].gtid;
listpids_info->job_id = step_id->job_id;
listpids_info->local_task_id = task_info[i].id;
listpids_info->pid = task_info[i].pid;
listpids_info->step_id = xstrdup(tmp_char);
list_append(listpids_list, listpids_info);
}
/* Get pids in proctrack container (slurmstepd, srun, etc.) */
stepd_list_pids(fd, protocol_version, &pids, &count);
for (i = 0; i < count; i++) {
if (_in_task_array((pid_t) pids[i], task_info, tcount))
continue;
listpids_info_t *listpids_info = xmalloc(
sizeof(*listpids_info));
listpids_info->global_task_id = NO_VAL;
listpids_info->job_id = step_id->job_id;
listpids_info->local_task_id = NO_VAL;
listpids_info->pid = pids[i];
listpids_info->step_id = xstrdup(tmp_char);
list_append(listpids_list, listpids_info);
}
xfree(pids);
xfree(task_info);
close(fd);
}
static void _dump_listpids(list_t *listpids_list, int argc, char **argv)
{
int rc;
openapi_resp_listpids_info_t resp = {
.listpids_list = listpids_list,
};
DATA_DUMP_CLI(OPENAPI_LISTPIDS_INFO_RESP, resp, argc, argv, NULL,
mime_type, data_parser, rc);
if (rc != SLURM_SUCCESS)
exit_code = 1;
}
static void _list_pids_all_steps(const char *node_name,
slurm_step_id_t *step_id,
list_t* listpids_list,
int argc, char **argv)
{
list_t *steps;
list_itr_t *itr;
step_loc_t *stepd;
int count = 0;
char tmp_char[64];
if (step_id->step_het_comp != NO_VAL) {
_list_pids_one_step(node_name, step_id, listpids_list);
return;
}
steps = stepd_available(NULL, node_name);
if (!steps || list_count(steps) == 0) {
if (mime_type) {
_dump_listpids(NULL, argc, argv);
} else {
fprintf(stderr, "%s does not exist on node %s.\n",
log_build_step_id_str(step_id, tmp_char,
sizeof(tmp_char),
STEP_ID_FLAG_NONE),
node_name);
}
FREE_NULL_LIST(steps);
exit_code = 1;
return;
}
itr = list_iterator_create(steps);
while ((stepd = list_next(itr))) {
if (step_id->job_id != stepd->step_id.job_id)
continue;
if ((step_id->step_id != NO_VAL) &&
(step_id->step_id != stepd->step_id.step_id))
continue;
_list_pids_one_step(stepd->nodename, &stepd->step_id,
listpids_list);
count++;
}
list_iterator_destroy(itr);
FREE_NULL_LIST(steps);
if (count == 0) {
if (step_id->step_id != NO_VAL) {
fprintf(stderr, "%s does not exist on node %s.\n",
log_build_step_id_str(step_id, tmp_char,
sizeof(tmp_char),
STEP_ID_FLAG_NONE),
node_name);
} else
fprintf(stderr, "There are no steps for job %u on node %s.\n",
step_id->job_id, node_name);
exit_code = 1;
}
}
static void _list_pids_all_jobs(const char *node_name, list_t *listpids_list,
int argc, char **argv)
{
list_t *steps;
list_itr_t *itr;
step_loc_t *stepd;
steps = stepd_available(NULL, node_name);
if (!steps || list_count(steps) == 0) {
if (mime_type)
_dump_listpids(NULL, argc, argv);
else
fprintf(stderr, "No job steps exist on this node.\n");
FREE_NULL_LIST(steps);
exit_code = 1;
return;
}
itr = list_iterator_create(steps);
while((stepd = list_next(itr))) {
_list_pids_one_step(stepd->nodename, &stepd->step_id,
listpids_list);
}
list_iterator_destroy(itr);
FREE_NULL_LIST(steps);
}
static int _print_listpids_info(void *x, void *arg)
{
listpids_info_t *listpids_info = x;
printf("%-8d %-8d %-8s ",
listpids_info->pid, listpids_info->job_id,
listpids_info->step_id);
if (listpids_info->local_task_id != NO_VAL) {
printf("%-7d ", listpids_info->local_task_id);
} else {
printf("%-7s ", "-");
}
if (listpids_info->global_task_id != NO_VAL) {
printf("%-8d ", listpids_info->global_task_id);
} else {
printf("%-8s ", "-");
}
printf("\n");
return 0;
}
static void _free_listpids_info(void *x)
{
listpids_info_t *listpids_info = x;
if (listpids_info) {
xfree(listpids_info->step_id);
}
xfree(listpids_info);
}
/*
* scontrol_list_pids - given a slurmd job ID or job ID + step ID,
* print the process IDs of the processes each job step (or
* just the specified step ID).
* IN jobid_str - string representing a jobid: jobid[.stepid]
* IN node_name - May be NULL, in which case it will attempt to
* determine the NodeName of the local host on its own.
* This is mostly of use when multiple-slurmd support is in use,
* because if NULL is used when there are multiple slurmd on the
* node, one of them will be selected more-or-less at random.
*/
extern void scontrol_list_pids(int argc, char **argv)
{
char *jobid_str = NULL;
char *node_name = NULL;
list_t *listpids_list = NULL;
slurm_step_id_t step_id = {
.job_id = 0,
.step_id = NO_VAL,
.step_het_comp = NO_VAL,
};
if (argc >= 2)
jobid_str = argv[1];
if (argc >= 3)
node_name = argv[2];
/* Job ID is optional */
if (jobid_str != NULL
&& jobid_str[0] != '*'
&& !_parse_jobid(jobid_str, &step_id.job_id)) {
exit_code = 1;
return;
}
listpids_list = list_create(_free_listpids_info);
/* Step ID is optional */
if (jobid_str == NULL || jobid_str[0] == '*') {
_list_pids_all_jobs(node_name, listpids_list, argc, argv);
} else if (_parse_stepid(jobid_str, &step_id))
_list_pids_all_steps(node_name, &step_id, listpids_list, argc,
argv);
if (exit_code)
goto cleanup;
if (mime_type) {
_dump_listpids(listpids_list, argc, argv);
goto cleanup;
}
printf("%-8s %-8s %-8s %-7s %-8s\n",
"PID", "JOBID", "STEPID", "LOCALID", "GLOBALID");
list_for_each(listpids_list, _print_listpids_info, NULL);
cleanup:
FREE_NULL_LIST(listpids_list);
}
static int _add_to_liststeps_list(void *x, void *arg)
{
liststeps_info_t *liststeps_info;
step_loc_t *step_loc = x;
list_t *liststeps_list = arg;
char step_id_str[32];
slurm_step_id_t step_id = step_loc->step_id;
log_build_step_id_str(&step_id, step_id_str, sizeof(step_id_str),
STEP_ID_FLAG_NO_JOB | STEP_ID_FLAG_NO_PREFIX);
liststeps_info = xmalloc(sizeof(*liststeps_info));
liststeps_info->step_id = xstrdup(step_id_str);
liststeps_info->job_id = step_id.job_id;
list_append(liststeps_list, liststeps_info);
return 0;
}
static int _print_liststeps_info(void *x, void *arg)
{
liststeps_info_t *liststeps_info = x;
printf("%-8d %-8s\n", liststeps_info->job_id, liststeps_info->step_id);
return 0;
}
static void _free_liststeps_info(void *x)
{
liststeps_info_t *liststeps_info = x;
if (liststeps_info) {
xfree(liststeps_info->step_id);
}
xfree(liststeps_info);
}
static void _dump_liststeps(list_t *liststeps_list, int argc, char **argv)
{
int rc;
openapi_resp_liststeps_info_t resp = {
.liststeps_list = liststeps_list,
};
DATA_DUMP_CLI(OPENAPI_LISTSTEPS_INFO_RESP, resp, argc, argv, NULL,
mime_type, data_parser, rc);
if (rc != SLURM_SUCCESS)
exit_code = 1;
}
/*
* scontrol_list_steps - Print steps on node.
*
* IN node_name - query this node for any steps
*/
extern void scontrol_list_steps(int argc, char **argv)
{
list_t *liststeps_list = NULL;
char *node_name = NULL;
list_t *steps;
if (argc)
node_name = argv[1];
steps = stepd_available(NULL, node_name);
if (!steps || !list_count(steps)) {
if (mime_type)
_dump_liststeps(NULL, argc, argv);
else {
fprintf(stderr, "No slurmstepd's found on this node\n");
exit_code = 1;
}
goto cleanup;
}
liststeps_list = list_create(_free_liststeps_info);
list_for_each(steps, _add_to_liststeps_list, liststeps_list);
if (mime_type) {
_dump_liststeps(liststeps_list, argc, argv);
goto cleanup;
}
printf("%-8s %-8s\n", "JOBID", "STEPID");
list_for_each(liststeps_list, _print_liststeps_info, NULL);
cleanup:
FREE_NULL_LIST(liststeps_list);
FREE_NULL_LIST(steps);
}
extern void scontrol_getent(const char *node_name)
{
list_t *steps = NULL;
list_itr_t *itr = NULL;
step_loc_t *stepd;
int fd;
struct passwd *pwd = NULL;
struct group **grps = NULL;
if (!(steps = stepd_available(NULL, node_name))) {
fprintf(stderr, "No steps found on this node\n");
return;
}
itr = list_iterator_create(steps);
while ((stepd = list_next(itr))) {
char tmp_char[45];
fd = stepd_connect(NULL, node_name, &stepd->step_id,
&stepd->protocol_version);
if (fd < 0)
continue;
pwd = stepd_getpw(fd, stepd->protocol_version,
GETPW_MATCH_ALWAYS, 0, NULL);
if (!pwd) {
close(fd);
continue;
}
log_build_step_id_str(&stepd->step_id, tmp_char,
sizeof(tmp_char), STEP_ID_FLAG_NO_PREFIX);
printf("JobId=%s:\nUser:\n", tmp_char);
printf("%s:%s:%u:%u:%s:%s:%s\nGroups:\n",
pwd->pw_name, pwd->pw_passwd, pwd->pw_uid, pwd->pw_gid,
pwd->pw_gecos, pwd->pw_dir, pwd->pw_shell);
xfree_struct_passwd(pwd);
grps = stepd_getgr(fd, stepd->protocol_version,
GETGR_MATCH_ALWAYS, 0, NULL);
if (!grps) {
close(fd);
printf("\n");
continue;
}
for (int i = 0; grps[i]; i++) {
printf("%s:%s:%u:%s\n",
grps[i]->gr_name, grps[i]->gr_passwd,
grps[i]->gr_gid,
(grps[i]->gr_mem) ? grps[i]->gr_mem[0] : "");
}
close(fd);
xfree_struct_group_array(grps);
printf("\n");
}
list_iterator_destroy(itr);
FREE_NULL_LIST(steps);
}
extern void scontrol_gethost(const char *stepd_node, const char *node_name)
{
list_t *steps = NULL;
list_itr_t *itr = NULL;
step_loc_t *stepd;
int fd;
if (!(steps = stepd_available(NULL, stepd_node))) {
fprintf(stderr, "No steps found on this node\n");
return;
}
itr = list_iterator_create(steps);
while ((stepd = list_next(itr))) {
char tmp_char[45], buf[INET6_ADDRSTRLEN];
struct hostent *host = NULL;
const char *ip;
int i, j;
fd = stepd_connect(NULL, stepd_node, &stepd->step_id,
&stepd->protocol_version);
if (fd < 0)
continue;
host = stepd_gethostbyname(fd, stepd->protocol_version,
(GETHOST_IPV4 | GETHOST_IPV6 |
GETHOST_NOT_MATCH_PID), node_name);
log_build_step_id_str(&stepd->step_id, tmp_char,
sizeof(tmp_char), STEP_ID_FLAG_NO_PREFIX);
printf("JobId=%s:\nHost:\n", tmp_char);
for (i = 0; host && host->h_addr_list[i] != NULL; ++i) {
ip = inet_ntop(host->h_addrtype, host->h_addr_list[i],
buf, sizeof (buf));
printf("%-15s %s", ip, host->h_name);
for (j = 0; host->h_aliases[j] != NULL; ++j) {
printf(" %s", host->h_aliases[i]);
}
printf("\n");
}
xfree_struct_hostent(host);
close(fd);
printf("\n");
}
list_iterator_destroy(itr);
FREE_NULL_LIST(steps);
}
/*
* scontrol_print_hosts - given a node list expression, return
* a list of nodes, one per line
*/
extern void
scontrol_print_hosts (char * node_list)
{
hostlist_t *hl;
char *host;
if (!node_list) {
error("host list is empty");
return;
}
hl = hostlist_create_client(node_list);
if (!hl) {
fprintf(stderr, "Invalid hostlist: %s\n", node_list);
return;
}
while ((host = hostlist_shift_dims(hl, 0))) {
printf("%s\n", host);
free(host);
}
hostlist_destroy(hl);
}
/* Replace '\n' with ',', remove duplicate comma */
static void
_reformat_hostlist(char *hostlist)
{
int i, o;
for (i=0; (hostlist[i] != '\0'); i++) {
if (hostlist[i] == '\n')
hostlist[i] = ',';
}
o = 0;
for (i=0; (hostlist[i] != '\0'); i++) {
while ((hostlist[i] == ',') && (hostlist[i+1] == ','))
i++;
hostlist[o++] = hostlist[i];
}
hostlist[o] = '\0';
}
/*
* scontrol_encode_hostlist - given a list of hostnames or the pathname
* of a file containing hostnames, translate them into a hostlist
* expression
*/
extern int scontrol_encode_hostlist(char *arg_hostlist, bool sorted)
{
char *io_buf = NULL, *tmp_list, *ranged_string, *hostlist;
int buf_size = 1024 * 1024;
int data_read = 0;
hostlist_t *hl;
if (!arg_hostlist) {
fprintf(stderr, "Hostlist is NULL\n");
return SLURM_ERROR;
}
if (!xstrcmp(arg_hostlist, "-"))
hostlist = "/dev/stdin";
else
hostlist = arg_hostlist;
if (hostlist[0] == '/') {
ssize_t buf_read;
int fd = open(hostlist, O_RDONLY);
if (fd < 0) {
fprintf(stderr, "Can not open %s\n", hostlist);
return SLURM_ERROR;
}
io_buf = xmalloc(buf_size + 1);
while ((buf_read = read(fd, &io_buf[data_read],
buf_size - data_read)) > 0) {
data_read += buf_read;
}
close(fd);
if (buf_read < 0) {
xfree(io_buf);
fprintf(stderr, "Error reading %s\n", hostlist);
return SLURM_ERROR;
}
if (data_read >= buf_size) {
/* If over 1MB, the file is almost certainly invalid */
fprintf(stderr, "File %s is too large\n", hostlist);
xfree(io_buf);
return SLURM_ERROR;
}
io_buf[data_read] = '\0';
_reformat_hostlist(io_buf);
tmp_list = io_buf;
} else
tmp_list = hostlist;
hl = hostlist_create_client(tmp_list);
if (hl == NULL) {
fprintf(stderr, "Invalid hostlist: %s\n", tmp_list);
xfree(io_buf);
return SLURM_ERROR;
}
if (sorted)
hostlist_sort(hl);
ranged_string = hostlist_ranged_string_xmalloc(hl);
printf("%s\n", ranged_string);
hostlist_destroy(hl);
xfree(ranged_string);
xfree(io_buf);
return SLURM_SUCCESS;
}
static int _wait_nodes_ready(uint32_t job_id)
{
int is_ready = SLURM_ERROR, i, rc = 0;
int cur_delay = 0;
int max_delay;
if (!slurm_conf.suspend_timeout || !slurm_conf.resume_timeout)
return SLURM_SUCCESS; /* Power save mode disabled */
max_delay = slurm_conf.suspend_timeout + slurm_conf.resume_timeout;
max_delay *= 5; /* Allow for ResumeRate support */
for (i=0; (cur_delay < max_delay); i++) {
if (i) {
if (i == 1)
info("Waiting for nodes to boot");
sleep(POLL_SLEEP);
cur_delay += POLL_SLEEP;
}
rc = slurm_job_node_ready(job_id);
if (rc == READY_JOB_FATAL)
break; /* fatal error */
if ((rc == READY_JOB_ERROR) || (rc == EAGAIN))
continue; /* retry */
if ((rc & READY_JOB_STATE) == 0) /* job killed */
break;
if ((rc & READY_NODE_STATE) &&
(rc & READY_PROLOG_STATE)) {
is_ready = SLURM_SUCCESS;
break;
}
}
if (is_ready == SLURM_SUCCESS)
info("Nodes are ready for job %u", job_id);
else if ((rc & READY_JOB_STATE) == 0)
info("Job %u no longer running", job_id);
else
info("Problem running job %u", job_id);
return is_ready;
}
/*
* Wait until a job is ready to execute or enters some failed state
* RET 1: job ready to run
* 0: job can't run (cancelled, failure state, timeout, etc.)
*/
extern int scontrol_job_ready(char *job_id_str)
{
uint32_t job_id;
job_id = atoi(job_id_str);
if (job_id <= 0) {
fprintf(stderr, "Invalid job_id %s", job_id_str);
return SLURM_ERROR;
}
return _wait_nodes_ready(job_id);
}
extern int scontrol_callerid(int argc, char **argv)
{
int af, ver = 4;
unsigned char ip_src[sizeof(struct in6_addr)],
ip_dst[sizeof(struct in6_addr)];
uint32_t port_src, port_dst, job_id;
network_callerid_msg_t req;
char node_name[HOST_NAME_MAX], *ptr;
if (argc == 5) {
ver = strtoul(argv[4], &ptr, 0);
if (ptr && ptr[0]) {
error("Address family not an integer");
return SLURM_ERROR;
}
}
if (ver != 4 && ver != 6) {
error("Invalid address family: %d", ver);
return SLURM_ERROR;
}
af = ver == 4 ? AF_INET : AF_INET6;
if (!inet_pton(af, argv[0], ip_src)) {
error("inet_pton failed for '%s'", argv[0]);
return SLURM_ERROR;
}
port_src = strtoul(argv[1], &ptr, 0);
if (ptr && ptr[0]) {
error("Source port not an integer");
return SLURM_ERROR;
}
if (!inet_pton(af, argv[2], ip_dst)) {
error("scontrol_callerid: inet_pton failed for '%s'", argv[2]);
return SLURM_ERROR;
}
port_dst = strtoul(argv[3], &ptr, 0);
if (ptr && ptr[0]) {
error("Destination port not an integer");
return SLURM_ERROR;
}
memcpy(req.ip_src, ip_src, 16);
memcpy(req.ip_dst, ip_dst, 16);
req.port_src = port_src;
req.port_dst = port_dst;
req.af = af;
if (slurm_network_callerid(req, &job_id, node_name, HOST_NAME_MAX)
!= SLURM_SUCCESS) {
fprintf(stderr,
"slurm_network_callerid: unable to retrieve callerid data from remote slurmd\n");
return SLURM_ERROR;
} else if (job_id == NO_VAL) {
fprintf(stderr,
"slurm_network_callerid: remote job id indeterminate\n");
return SLURM_ERROR;
} else {
printf("%u %s\n", job_id, node_name);
return SLURM_SUCCESS;
}
}
extern int scontrol_batch_script(int argc, char **argv)
{
char *filename;
FILE *out;
int exit_code;
uint32_t jobid;
if (argc < 1)
return SLURM_ERROR;
jobid = atoll(argv[0]);
if (argc > 1)
filename = xstrdup(argv[1]);
else
filename = xstrdup_printf("slurm-%u.sh", jobid);
if (!xstrcmp(filename, "-")) {
out = stdout;
} else {
if (!(out = fopen(filename, "w"))) {
fprintf(stderr, "failed to open file `%s`: %m\n",
filename);
xfree(filename);
return errno;
}
}
exit_code = slurm_job_batch_script(out, jobid);
if (out != stdout)
fclose(out);
if (exit_code != SLURM_SUCCESS) {
if (out != stdout)
unlink(filename);
slurm_perror("job script retrieval failed");
} else if ((out != stdout) && (quiet_flag != 1)) {
printf("batch script for job %u written to %s\n",
jobid, filename);
}
xfree(filename);
return exit_code;
}