blob: fe98352bb2e3ffb05d45c9342dac9aaf8882ff5a [file] [log] [blame]
.TH "Slurm API" "3" "December 2005" "Morris Jette" "Slurm checkpoint functions"
.SH "NAME"
slurm_checkpoint_able, slurm_checkpoint_complete, slurm_checkpoint_create,
slurm_checkpoint_disable, slurm_checkpoint_enable, slurm_checkpoint_error,
slurm_checkpoint_restart, slurm_checkpoint_vacate \- Slurm checkpoint functions
.SH "SYNTAX"
.LP
#include <slurm/slurm.h>
.LP
.LP
int \fBslurm_checkpoint_able\fR (
.br
uint32_t \fIjob_id\fP,
.br
uint32_t \fIstep_id\fP,
.br
time_t *\fIstart_time\fP,
.br
);
.LP
int \fBslurm_checkpoint_complete\fR (
.br
uint32_t \fIjob_id\fP,
.br
uint32_t \fIstep_id\fP,
.br
time_t \fIstart_time\fP,
.br
uint32_t \fIerror_code\fP,
.br
char *\fIerror_msg\fP
.br
);
.LP
int \fBslurm_checkpoint_create\fR (
.br
uint32_t \fIjob_id\fP,
.br
uint32_t \fIstep_id\fP,
.br
uint16_t \fImax_wait\fP
.br
);
.LP
int \fBslurm_checkpoint_disable\fR (
.br
uint32_t \fIjob_id\fP,
.br
uint32_t \fIstep_id\fP
.br
);
.LP
int \fBslurm_checkpoint_enable\fR (
.br
uint32_t \fIjob_id\fP,
.br
uint32_t \fIstep_id\fP
.br
);
.LP
int \fBslurm_checkpoint_error\fR (
.br
uint32_t \fIjob_id\fP,
.br
uint32_t \fIstep_id\fP,
.br
uint32_t *\fIerror_code\fP,
.br
char ** \fIerror_msg\fP
.br
);
.LP
int \fBslurm_checkpoint_restart\fR (
.br
uint32_t \fIjob_id\fP,
.br
uint32_t \fIstep_id\fP
.br
);
.LP
int \fBslurm_checkpoint_vacate\fR (
.br
uint32_t \fIjob_id\fP,
.br
uint32_t \fIstep_id\fP,
.br
uint16_t \fImax_wait\fP
.br
);
.SH "ARGUMENTS"
.LP
.TP
\fIerror_code\fP
Error code for checkpoint operation. Only the highest value is preserved.
.TP
\fIerror_msg\fP
Error message for checkpoint operation. Only the \fIerror_msg\fP value for the highest
\fIerror_code\fP is preserved.
.TP
\fIjob_id\fP
SLURM job ID to perform the operation upon.
.TP
\fImax_wait\fP
Maximum time to allow for the operation to complete in seconds.
.TP
\fIstart_time\fP
Time at which last checkpoint operation began (if one is in progress), otherwise zero.
.TP
\fIstep_id\fP
SLURM job step ID to perform the operation upon.
May be NO_VAL if the operation is to be performed on all steps of the specified job.
.SH "DESCRIPTION"
.LP
\fBslurm_checkpoint_able\fR
Report if checkpoint operations can presently be issued for the specified job step.
If yes, returns SLURM_SUCCESS and sets \fIstart_time\fP if checkpoint operation is
presently active. Returns ESLURM_DISABLED if checkpoint operation is disabled.
.LP
\fBslurm_checkpoint_complete\fR
Note that a requested checkpoint has been completed.
.LP
\fBslurm_checkpoint_create\fR
Request a checkpoint for the identified job step.
Continue its execution upon completion of the checkpoint.
.LP
\fBslurm_checkpoint_disable\fR
Make the identified job step non\-checkpointable.
This can be issued as needed to prevent checkpointing while
a job step is in a critical section or for other reasons.
.LP
\fBslurm_checkpoint_enable\fR
Make the indentified job step checkpointable.
.LP
\fBslurm_checkpoint_error\fR
Get error information about the last checkpoint operation for a given job step.
.LP
\fBslurm_checkpoint_restart\fR
Request that a previously checkpointed job resume execution.
It may continue execution on differrent nodes than were
originally used.
Execution may be delayed if resources are not immediately
available.
.LP
\fBslurm_checkpoint_vacate\fR
Request a checkpoint for the identified job step.
Terminate its execution upon completion of the checkpoint.
.SH "RETURN VALUE"
.LP
Zero is returned upon success.
On error, \-1 is returned, and the Slurm error code is set appropriately.
.SH "ERRORS"
.LP
\fBESLURM_INVALID_JOB_ID\fR the requested job or job step id does not exist.
.LP
\fBESLURM_ACCESS_DENIED\fR the requesting user lacks authorization for the requested
action (e.g. trying to delete or modify another user's job).
.LP
\fBESLURM_JOB_PENDING\fR the requested job is still pending.
.LP
\fBESLURM_ALREADY_DONE\fR the requested job has already completed.
.LP
\fBESLURM_DISABLED\fR the requested operation has been disabled for this job step.
This will occur when a request for checkpoint is issued when they have been disabled.
.LP
\fBESLURM_NOT_SUPPORTED\fR the requested operation is not supported on this system.
.SH "EXAMPLE"
.LP
#include <stdio.h>
.br
#include <stdlib.h>
.br
#include <slurm/slurm.h>
.br
#include <slurm/slurm_errno.h>
.LP
int main (int argc, char *argv[])
.br
{
.br
uint32_t job_id, step_id;
.LP
if (argc < 3) {
.br
printf("Usage: %s job_id step_id\\n", argv[0]);
.br
exit(1);
.br
}
.LP
job_id = atoi(argv[1]);
.br
step_id = atoi(argv[2]);
.br
if (slurm_checkpoint_disable(job_id, step_id)) {
.br
slurm_perror ("slurm_checkpoint_error:");
.br
exit (1);
.br
}
.br
exit (0);
.br
}
.SH "NOTE"
These functions are included in the libslurm library,
which must be linked to your process for use
(e.g. "cc \-lslurm myprog.c").
.SH "COPYING"
Copyright (C) 2004 The Regents of the University of California.
Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
UCRL\-CODE\-226842.
.LP
This file is part of SLURM, a resource management program.
For details, see <https://computing.llnl.gov/linux/slurm/>.
.LP
SLURM is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free
Software Foundation; either version 2 of the License, or (at your option)
any later version.
.LP
SLURM is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
details.
.SH "SEE ALSO"
.LP
\fBsrun\fR(1), \fBsqueue\fR(1), \fBfree\fR(3)