blob: 70f27685dbf81d66c21893af6257480d187c41d1 [file] [log] [blame]
.TH "Slurm API" "3" "December 2006" "Morris Jette" "Slurm job step launch functions"
.SH "NAME"
slurm_step_launch_params_t_init, slurm_step_launch, slurm_step_launch_wait_start,
slurm_step_launch_wait_finish, slurm_step_launch_abort \- Slurm job step launch functions
.SH "SYNTAX"
.LP
#include <slurm/slurm.h>
.LP
.LP
void \fBslurm_step_launch_params_t_init\fR (
.br
slurm_step_launch_params_t *\fIlaunch_req\fP
.br
);
.LP
int \fBslurm_step_launch\fR (
.br
slurm_step_ctx \fIctx\fP,
.br
const slurm_step_launch_params_t *\fIlaunch_req\fP,
.br
const slurm_step_launch_callbacks_t \fIcallbacks\fP
.br
);
.LP
int \fBslurm_step_launch_wait_start\fR (
.br
slurm_step_ctx \fIctx\fP
.br
);
.LP
void \fBslurm_step_launch_wait_finish\fR (
.br
slurm_step_ctx \fIctx\fP
.br
);
.LP
void \fBslurm_step_launch_abort\fR {
.br
slurm_step_ctx \fIctx\fP
.br
);
.SH "ARGUMENTS"
.LP
.TP
\fIcallbacks\fP
Identify functions to be called when various events occur.
.TP
\fIctx\fP
Job step context. Created by \fBslurm_step_ctx_create\fR, used in subsequent
function calls, and destroyed by \fBslurm_step_ctx_destroy\fR.
.TP
\fIlaunch_req\fP
Pointer to a structure allocated by the user containing specifications of
the job step to be launched.
.SH "DESCRIPTION"
.LP
\fBslurm_step_launch_params_t_init\fR Iinitialize a user-allocated
slurm_step_launch_params_t structure with default values.
default values. This function will NOT allocate any new memory.
.LP
\fBslurm_step_launch\fR Launch a parallel job step.
.LP
\fBslurm_step_launch_wait_start\fR Block until all tasks have started.
.LP
\fBslurm_step_launch_wait_finish\fR Block until all tasks have finished
(or failed to start altogether).
.LP
\fBslurm_step_launch_abort\fR Abort an in-progress launch, or terminate
the fully launched job step. Can be called from a signal handler.
.SH "RETURN VALUE"
.LP
\fBslurm_step_launch\fR and \fBslurm_step_launch_wait_start\fR
will return SLURM_SUCCESS when all tasks have successfully started,
or SLURM_ERROR if the job step is aborted during launch.
.SH "ERRORS"
.LP
\fBEINVAL\fR Invalid argument
.LP
\fBSLURM_PROTOCOL_VERSION_ERROR\fR Protocol version has changed, re\-link your code.
.LP
\fBESLURM_INVALID_JOB_ID\fR the requested job id does not exist.
.LP
\fBESLURM_ALREADY_DONE\fR the specified job has already completed and can not be modified.
.LP
\fBESLURM_ACCESS_DENIED\fR the requesting user lacks authorization for the requested action (e.g. trying to delete or modify another user's job).
.LP
\fBESLURM_INTERCONNECT_FAILURE\fR failed to configure the node interconnect.
.LP
\fBESLURM_BAD_DIST\fR task distribution specification is invalid.
.LP
\fBSLURM_PROTOCOL_SOCKET_IMPL_TIMEOUT\fR Timeout in communicating with
SLURM controller.
.SH "EXAMPLE
.LP
.nf
/*
* To compile:
* gcc test.c \-o test \-g \-pthread \-lslurm
*
* Or if Slurm is not in your default search paths:
* gcc test.c \-o test \-g \-pthread \-I{$SLURM_DIR}/include \\
* \-Wl,\-\-rpath={$SLURM_DIR}/lib \-L{$SLURM_DIR}/lib \-lslurm
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <slurm/slurm.h>
#include <slurm/slurm_errno.h>
static void _task_start(launch_tasks_response_msg_t *msg)
{
printf("%d tasks started on node %s\\n",
msg->count_of_pids, msg->node_name);
}
static void _task_finish(task_exit_msg_t *msg)
{
printf("%d tasks finished\\n", msg->num_tasks);
}
int main (int argc, char *argv[])
{
slurm_step_ctx_params_t step_params;
slurm_step_ctx step_ctx;
slurm_step_launch_params_t params;
slurm_step_launch_callbacks_t callbacks;
uint32_t job_id, step_id;
slurm_step_ctx_params_t_init(&step_params);
step_params.node_count = 1;
step_params.task_count = 4;
step_params.overcommit = true;
step_ctx = slurm_step_ctx_create(&step_params);
if (step_ctx == NULL) {
slurm_perror("slurm_step_ctx_create");
exit(1);
}
slurm_step_ctx_get(step_ctx, SLURM_STEP_CTX_JOBID, &job_id);
slurm_step_ctx_get(step_ctx, SLURM_STEP_CTX_STEPID, &step_id);
printf("Ready to start job %u step %u\\n", job_id, step_id);
slurm_step_launch_params_t_init(&params);
params.argc = argc \- 1;
params.argv = argv + 1;
callbacks.task_start = _task_start;
callbacks.task_finish = _task_finish;
if (slurm_step_launch(step_ctx, &params, &callbacks)
!= SLURM_SUCCESS) {
slurm_perror("slurm_step_launch");
exit(1);
}
printf("Sent step launch RPC\\n");
if (slurm_step_launch_wait_start(step_ctx) != SLURM_SUCCESS) {
fprintf(stderr, "job step was aborted during launch\\n");
} else {
printf("All tasks have started\\n");
}
slurm_step_launch_wait_finish(step_ctx);
printf("All tasks have finished\\n");
slurm_step_ctx_destroy(step_ctx);
exit(0);
}
.fi
.SH "NOTE"
These functions are included in the libslurm library,
which must be linked to your process for use
(e.g. "cc \-lslurm myprog.c").
.SH "COPYING"
Copyright (C) 2006 The Regents of the University of California.
Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
UCRL\-CODE\-226842.
.LP
This file is part of SLURM, a resource management program.
For details, see <https://computing.llnl.gov/linux/slurm/>.
.LP
SLURM is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free
Software Foundation; either version 2 of the License, or (at your option)
any later version.
.LP
SLURM is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
details.
.SH "SEE ALSO"
.LP
\fBslurm_step_ctx_create\fR(3), \fBslurm_step_ctx_destroy\fR(3),
\fBslurm_get_errno\fR(3), \fBslurm_perror\fR(3), \fBslurm_strerror\fR(3),
\fBsalloc\fR(1), \fBsrun\fR(1)