| .TH "Slurm API" "3" "December 2006" "Morris Jette" "Slurm job step launch functions" |
| |
| .SH "NAME" |
| |
| slurm_step_launch_params_t_init, slurm_step_launch, slurm_step_launch_wait_start, |
| slurm_step_launch_wait_finish, slurm_step_launch_abort \- Slurm job step launch functions |
| |
| .SH "SYNTAX" |
| .LP |
| #include <slurm/slurm.h> |
| .LP |
| .LP |
| void \fBslurm_step_launch_params_t_init\fR ( |
| .br |
| slurm_step_launch_params_t *\fIlaunch_req\fP |
| .br |
| ); |
| .LP |
| int \fBslurm_step_launch\fR ( |
| .br |
| slurm_step_ctx \fIctx\fP, |
| .br |
| const slurm_step_launch_params_t *\fIlaunch_req\fP, |
| .br |
| const slurm_step_launch_callbacks_t \fIcallbacks\fP |
| .br |
| ); |
| .LP |
| int \fBslurm_step_launch_wait_start\fR ( |
| .br |
| slurm_step_ctx \fIctx\fP |
| .br |
| ); |
| .LP |
| void \fBslurm_step_launch_wait_finish\fR ( |
| .br |
| slurm_step_ctx \fIctx\fP |
| .br |
| ); |
| .LP |
| void \fBslurm_step_launch_abort\fR { |
| .br |
| slurm_step_ctx \fIctx\fP |
| .br |
| ); |
| |
| .SH "ARGUMENTS" |
| .LP |
| .TP |
| \fIcallbacks\fP |
| Identify functions to be called when various events occur. |
| .TP |
| \fIctx\fP |
| Job step context. Created by \fBslurm_step_ctx_create\fR, used in subsequent |
| function calls, and destroyed by \fBslurm_step_ctx_destroy\fR. |
| .TP |
| \fIlaunch_req\fP |
| Pointer to a structure allocated by the user containing specifications of |
| the job step to be launched. |
| |
| .SH "DESCRIPTION" |
| .LP |
| \fBslurm_step_launch_params_t_init\fR Iinitialize a user-allocated |
| slurm_step_launch_params_t structure with default values. |
| default values. This function will NOT allocate any new memory. |
| .LP |
| \fBslurm_step_launch\fR Launch a parallel job step. |
| .LP |
| \fBslurm_step_launch_wait_start\fR Block until all tasks have started. |
| .LP |
| \fBslurm_step_launch_wait_finish\fR Block until all tasks have finished |
| (or failed to start altogether). |
| .LP |
| \fBslurm_step_launch_abort\fR Abort an in-progress launch, or terminate |
| the fully launched job step. Can be called from a signal handler. |
| |
| .SH "RETURN VALUE" |
| .LP |
| \fBslurm_step_launch\fR and \fBslurm_step_launch_wait_start\fR |
| will return SLURM_SUCCESS when all tasks have successfully started, |
| or SLURM_ERROR if the job step is aborted during launch. |
| |
| .SH "ERRORS" |
| .LP |
| \fBEINVAL\fR Invalid argument |
| .LP |
| \fBSLURM_PROTOCOL_VERSION_ERROR\fR Protocol version has changed, re\-link your code. |
| .LP |
| \fBESLURM_INVALID_JOB_ID\fR the requested job id does not exist. |
| .LP |
| \fBESLURM_ALREADY_DONE\fR the specified job has already completed and can not be modified. |
| .LP |
| \fBESLURM_ACCESS_DENIED\fR the requesting user lacks authorization for the requested action (e.g. trying to delete or modify another user's job). |
| .LP |
| \fBESLURM_INTERCONNECT_FAILURE\fR failed to configure the node interconnect. |
| .LP |
| \fBESLURM_BAD_DIST\fR task distribution specification is invalid. |
| .LP |
| \fBSLURM_PROTOCOL_SOCKET_IMPL_TIMEOUT\fR Timeout in communicating with |
| SLURM controller. |
| |
| .SH "EXAMPLE |
| .LP |
| .nf |
| /* |
| * To compile: |
| * gcc test.c \-o test \-g \-pthread \-lslurm |
| * |
| * Or if Slurm is not in your default search paths: |
| * gcc test.c \-o test \-g \-pthread \-I{$SLURM_DIR}/include \\ |
| * \-Wl,\-\-rpath={$SLURM_DIR}/lib \-L{$SLURM_DIR}/lib \-lslurm |
| */ |
| #include <stdio.h> |
| #include <stdlib.h> |
| #include <string.h> |
| #include <slurm/slurm.h> |
| #include <slurm/slurm_errno.h> |
| |
| static void _task_start(launch_tasks_response_msg_t *msg) |
| { |
| printf("%d tasks started on node %s\\n", |
| msg->count_of_pids, msg->node_name); |
| } |
| |
| static void _task_finish(task_exit_msg_t *msg) |
| { |
| printf("%d tasks finished\\n", msg->num_tasks); |
| } |
| |
| int main (int argc, char *argv[]) |
| { |
| slurm_step_ctx_params_t step_params; |
| slurm_step_ctx step_ctx; |
| slurm_step_launch_params_t params; |
| slurm_step_launch_callbacks_t callbacks; |
| uint32_t job_id, step_id; |
| |
| slurm_step_ctx_params_t_init(&step_params); |
| step_params.node_count = 1; |
| step_params.task_count = 4; |
| step_params.overcommit = true; |
| |
| step_ctx = slurm_step_ctx_create(&step_params); |
| if (step_ctx == NULL) { |
| slurm_perror("slurm_step_ctx_create"); |
| exit(1); |
| } |
| slurm_step_ctx_get(step_ctx, SLURM_STEP_CTX_JOBID, &job_id); |
| slurm_step_ctx_get(step_ctx, SLURM_STEP_CTX_STEPID, &step_id); |
| printf("Ready to start job %u step %u\\n", job_id, step_id); |
| |
| slurm_step_launch_params_t_init(¶ms); |
| params.argc = argc \- 1; |
| params.argv = argv + 1; |
| callbacks.task_start = _task_start; |
| callbacks.task_finish = _task_finish; |
| if (slurm_step_launch(step_ctx, ¶ms, &callbacks) |
| != SLURM_SUCCESS) { |
| slurm_perror("slurm_step_launch"); |
| exit(1); |
| } |
| printf("Sent step launch RPC\\n"); |
| |
| if (slurm_step_launch_wait_start(step_ctx) != SLURM_SUCCESS) { |
| fprintf(stderr, "job step was aborted during launch\\n"); |
| } else { |
| printf("All tasks have started\\n"); |
| } |
| |
| slurm_step_launch_wait_finish(step_ctx); |
| printf("All tasks have finished\\n"); |
| |
| slurm_step_ctx_destroy(step_ctx); |
| exit(0); |
| } |
| .fi |
| |
| .SH "NOTE" |
| These functions are included in the libslurm library, |
| which must be linked to your process for use |
| (e.g. "cc \-lslurm myprog.c"). |
| |
| .SH "COPYING" |
| Copyright (C) 2006 The Regents of the University of California. |
| Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). |
| UCRL\-CODE\-226842. |
| .LP |
| This file is part of SLURM, a resource management program. |
| For details, see <https://computing.llnl.gov/linux/slurm/>. |
| .LP |
| SLURM is free software; you can redistribute it and/or modify it under |
| the terms of the GNU General Public License as published by the Free |
| Software Foundation; either version 2 of the License, or (at your option) |
| any later version. |
| .LP |
| SLURM is distributed in the hope that it will be useful, but WITHOUT ANY |
| WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS |
| FOR A PARTICULAR PURPOSE. See the GNU General Public License for more |
| details. |
| .SH "SEE ALSO" |
| .LP |
| \fBslurm_step_ctx_create\fR(3), \fBslurm_step_ctx_destroy\fR(3), |
| \fBslurm_get_errno\fR(3), \fBslurm_perror\fR(3), \fBslurm_strerror\fR(3), |
| \fBsalloc\fR(1), \fBsrun\fR(1) |