| .TH "Slurm API" "3" "December 2005" "Morris Jette" "Slurm checkpoint functions" |
| |
| .SH "NAME" |
| slurm_checkpoint_able, slurm_checkpoint_complete, slurm_checkpoint_create, |
| slurm_checkpoint_disable, slurm_checkpoint_enable, slurm_checkpoint_error, |
| slurm_checkpoint_restart, slurm_checkpoint_vacate \- Slurm checkpoint functions |
| |
| .SH "SYNTAX" |
| .LP |
| #include <slurm/slurm.h> |
| .LP |
| .LP |
| int \fBslurm_checkpoint_able\fR ( |
| .br |
| uint32_t \fIjob_id\fP, |
| .br |
| uint32_t \fIstep_id\fP, |
| .br |
| time_t *\fIstart_time\fP, |
| .br |
| ); |
| .LP |
| int \fBslurm_checkpoint_complete\fR ( |
| .br |
| uint32_t \fIjob_id\fP, |
| .br |
| uint32_t \fIstep_id\fP, |
| .br |
| time_t \fIstart_time\fP, |
| .br |
| uint32_t \fIerror_code\fP, |
| .br |
| char *\fIerror_msg\fP |
| .br |
| ); |
| .LP |
| int \fBslurm_checkpoint_create\fR ( |
| .br |
| uint32_t \fIjob_id\fP, |
| .br |
| uint32_t \fIstep_id\fP, |
| .br |
| uint16_t \fImax_wait\fP |
| .br |
| ); |
| .LP |
| int \fBslurm_checkpoint_disable\fR ( |
| .br |
| uint32_t \fIjob_id\fP, |
| .br |
| uint32_t \fIstep_id\fP |
| .br |
| ); |
| .LP |
| int \fBslurm_checkpoint_enable\fR ( |
| .br |
| uint32_t \fIjob_id\fP, |
| .br |
| uint32_t \fIstep_id\fP |
| .br |
| ); |
| .LP |
| int \fBslurm_checkpoint_error\fR ( |
| |
| .br |
| uint32_t \fIjob_id\fP, |
| .br |
| uint32_t \fIstep_id\fP, |
| .br |
| uint32_t *\fIerror_code\fP, |
| .br |
| char ** \fIerror_msg\fP |
| .br |
| ); |
| .LP |
| int \fBslurm_checkpoint_restart\fR ( |
| .br |
| uint32_t \fIjob_id\fP, |
| .br |
| uint32_t \fIstep_id\fP |
| .br |
| ); |
| .LP |
| int \fBslurm_checkpoint_vacate\fR ( |
| .br |
| uint32_t \fIjob_id\fP, |
| .br |
| uint32_t \fIstep_id\fP, |
| .br |
| uint16_t \fImax_wait\fP |
| .br |
| ); |
| |
| .SH "ARGUMENTS" |
| .LP |
| .TP |
| \fIerror_code\fP |
| Error code for checkpoint operation. Only the highest value is preserved. |
| .TP |
| \fIerror_msg\fP |
| Error message for checkpoint operation. Only the \fIerror_msg\fP value for the highest |
| \fIerror_code\fP is preserved. |
| .TP |
| \fIjob_id\fP |
| SLURM job ID to perform the operation upon. |
| .TP |
| \fImax_wait\fP |
| Maximum time to allow for the operation to complete in seconds. |
| .TP |
| \fIstart_time\fP |
| Time at which last checkpoint operation began (if one is in progress), otherwise zero. |
| .TP |
| \fIstep_id\fP |
| SLURM job step ID to perform the operation upon. |
| May be NO_VAL if the operation is to be performed on all steps of the specified job. |
| |
| .SH "DESCRIPTION" |
| .LP |
| \fBslurm_checkpoint_able\fR |
| Report if checkpoint operations can presently be issued for the specified job step. |
| If yes, returns SLURM_SUCCESS and sets \fIstart_time\fP if checkpoint operation is |
| presently active. Returns ESLURM_DISABLED if checkpoint operation is disabled. |
| .LP |
| \fBslurm_checkpoint_complete\fR |
| Note that a requested checkpoint has been completed. |
| .LP |
| \fBslurm_checkpoint_create\fR |
| Request a checkpoint for the identified job step. |
| Continue its execution upon completion of the checkpoint. |
| .LP |
| \fBslurm_checkpoint_disable\fR |
| Make the identified job step non\-checkpointable. |
| This can be issued as needed to prevent checkpointing while |
| a job step is in a critical section or for other reasons. |
| .LP |
| \fBslurm_checkpoint_enable\fR |
| Make the indentified job step checkpointable. |
| .LP |
| \fBslurm_checkpoint_error\fR |
| Get error information about the last checkpoint operation for a given job step. |
| .LP |
| \fBslurm_checkpoint_restart\fR |
| Request that a previously checkpointed job resume execution. |
| It may continue execution on differrent nodes than were |
| originally used. |
| Execution may be delayed if resources are not immediately |
| available. |
| .LP |
| \fBslurm_checkpoint_vacate\fR |
| Request a checkpoint for the identified job step. |
| Terminate its execution upon completion of the checkpoint. |
| |
| |
| .SH "RETURN VALUE" |
| .LP |
| Zero is returned upon success. |
| On error, \-1 is returned, and the Slurm error code is set appropriately. |
| .SH "ERRORS" |
| .LP |
| \fBESLURM_INVALID_JOB_ID\fR the requested job or job step id does not exist. |
| .LP |
| \fBESLURM_ACCESS_DENIED\fR the requesting user lacks authorization for the requested |
| action (e.g. trying to delete or modify another user's job). |
| .LP |
| \fBESLURM_JOB_PENDING\fR the requested job is still pending. |
| .LP |
| \fBESLURM_ALREADY_DONE\fR the requested job has already completed. |
| .LP |
| \fBESLURM_DISABLED\fR the requested operation has been disabled for this job step. |
| This will occur when a request for checkpoint is issued when they have been disabled. |
| .LP |
| \fBESLURM_NOT_SUPPORTED\fR the requested operation is not supported on this system. |
| |
| .SH "EXAMPLE" |
| .LP |
| #include <stdio.h> |
| .br |
| #include <stdlib.h> |
| .br |
| #include <slurm/slurm.h> |
| .br |
| #include <slurm/slurm_errno.h> |
| .LP |
| int main (int argc, char *argv[]) |
| .br |
| { |
| .br |
| uint32_t job_id, step_id; |
| .LP |
| if (argc < 3) { |
| .br |
| printf("Usage: %s job_id step_id\\n", argv[0]); |
| .br |
| exit(1); |
| .br |
| } |
| .LP |
| job_id = atoi(argv[1]); |
| .br |
| step_id = atoi(argv[2]); |
| .br |
| if (slurm_checkpoint_disable(job_id, step_id)) { |
| .br |
| slurm_perror ("slurm_checkpoint_error:"); |
| .br |
| exit (1); |
| .br |
| } |
| .br |
| exit (0); |
| .br |
| } |
| |
| .SH "NOTE" |
| These functions are included in the libslurm library, |
| which must be linked to your process for use |
| (e.g. "cc \-lslurm myprog.c"). |
| |
| .SH "COPYING" |
| Copyright (C) 2004 The Regents of the University of California. |
| Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). |
| UCRL\-CODE\-226842. |
| .LP |
| This file is part of SLURM, a resource management program. |
| For details, see <https://computing.llnl.gov/linux/slurm/>. |
| .LP |
| SLURM is free software; you can redistribute it and/or modify it under |
| the terms of the GNU General Public License as published by the Free |
| Software Foundation; either version 2 of the License, or (at your option) |
| any later version. |
| .LP |
| SLURM is distributed in the hope that it will be useful, but WITHOUT ANY |
| WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS |
| FOR A PARTICULAR PURPOSE. See the GNU General Public License for more |
| details. |
| |
| .SH "SEE ALSO" |
| .LP |
| \fBsrun\fR(1), \fBsqueue\fR(1), \fBfree\fR(3) |