blob: f7ec6db6fc935426b66a5e0776b67490ee5fa8c5 [file] [log] [blame] [edit]
.TH "Slurm API" "3" "April 2006" "Morris Jette" "Slurm job initiation functions"
.SH "NAME"
slurm_allocate_resources, slurm_allocate_resources_blocking,
slurm_allocation_msg_thr_create, slurm_allocation_msg_thr_destroy,
slurm_allocation_lookup, slurm_allocation_lookup_lite,
slurm_confirm_allocation,
slurm_free_submit_response_response_msg, slurm_init_job_desc_msg,
slurm_job_will_run, slurm_read_hostfile, slurm_submit_batch_job
\- Slurm job initiation functions
.SH "SYNTAX"
.LP
#include <slurm/slurm.h>
.LP
int \fBslurm_allocate_resources\fR (
.br
job_desc_msg_t *\fIjob_desc_msg_ptr\fP,
.br
resource_allocation_response_msg_t **\fIslurm_alloc_msg_pptr\fP
.br
);
.LP
resource_allocation_response_msg_t *\fBslurm_allocate_resources_blocking\fR (
.br
job_desc_msg_t *\fIjob_desc_msg_ptr\fP,
.br
time_t \fItimeout\fP, void \fI(*pending_callback)(uint32_t job_id)\fP
.br
);
.LP
allocation_msg_thread_t *\fBslurm_allocation_msg_thr_create\fR (
.br
uint16_t *\fIport\fP,
.br
slurm_allocation_callbacks_t *\fIcallbacks\fP
.br
);
.LP
void *\fBslurm_allocation_msg_thr_destroy\fR (
.br
allocation_msg_thread_t *\fIslurm_alloc_msg_thr_ptr\fP
.br
);
.LP
int \fBslurm_allocation_lookup\fR {
.br
uint32_t \fIjobid\fP,
.br
resource_allocation_response_msg_t **\fIslurm_alloc_msg_pptr\fP
.br
);
.LP
int \fBslurm_allocation_lookup_lite\fR {
.br
uint32_t \fIjobid\fP,
.br
resource_allocation_response_msg_t **\fIslurm_alloc_msg_pptr\fP
.br
);
.LP
int \fBslurm_confirm_allocation\fR (
.br
old_job_alloc_msg_t *\fIold_job_desc_msg_ptr\fP,
.br
resource_allocation_response_msg_t **\fIslurm_alloc_msg_pptr\fP
.br
);
.LP
void \fBslurm_free_resource_allocation_response_msg\fR (
.br
resource_allocation_response_msg_t *\fIslurm_alloc_msg_ptr\fP
.br
);
.LP
void \fBslurm_free_submit_response_response_msg\fR (
.br
submit_response_msg_t *\fIslurm_submit_msg_ptr\fP
.br
);
.LP
void \fBslurm_init_job_desc_msg\fR (
.br
job_desc_msg_t *\fIjob_desc_msg_ptr\fP
.br
);
.LP
int \fBslurm_job_will_run\fR (
.br
job_desc_msg_t *\fIjob_desc_msg_ptr\fP,
.br
);
.LP
int \fBslurm_read_hostfile\fR (
.br
char *\fIfilename\fP, int \fIn\fP
.br
);
.LP
int \fBslurm_submit_batch_job\fR (
.br
job_desc_msg_t *\fIjob_desc_msg_ptr\fP,
.br
submit_response_msg_t **\fIslurm_submit_msg_pptr\fP
.br
);
.SH "ARGUMENTS"
.LP
.TP
\fIjob_desc_msg_ptr\fP
Specifies the pointer to a job request specification. See slurm.h for full details
on the data structure's contents.
.TP
\fIcallbacks\fP
Specifies the pointer to a allocation callbacks structure. See
slurm.h for full details on the data structure's contents.
.TP
\fIold_job_desc_msg_ptr\fP
Specifies the pointer to a description of an existing job. See slurm.h for
full details on the data structure's contents.
.TP
\fIslurm_alloc_msg_pptr\fP
Specifies the double pointer to the structure to be created and filled with a
description of the created resource allocation (job): job ID, list of allocated nodes,
processor count per allocated node, etc. See slurm.h for full details on the data
structure's contents.
.TP
\fIslurm_alloc_msg_ptr\fP
Specifies the pointer to the structure to be created and filled in by the function
\fIslurm_allocate_resources\fP,
\fIslurm_allocate_resources_blocking\fP,
\fIslurm_allocation_lookup\fP, \fIslurm_allocation_lookup_lite\fP,
\fIslurm_confirm_allocation\fP or \fIslurm_job_will_run\fP.
.TP
\fIslurm_alloc_msg_thr_ptr\fP
Specigies the pointer to the structure created and returned by the
function \fIslurm_allocation_msg_thr_create\fP. Must be destroyed
with function \fIslurm_allocation_msg_thr_destroy\fP.
.TP
\fIslurm_submit_msg_pptr\fP
Specifies the double pointer to the structure to be created and filled with a description
of the created job: job ID, etc. See slurm.h for full details on the
data structure's contents.
.TP
\fIslurm_submit_msg_ptr\fP
Specifies the pointer to the structure to be created and filled in by the function \fIslurm_submit_batch_job\fP.
.SH "DESCRIPTION"
.LP
\fBslurm_allocate_resources\fR Request a resource allocation for a job. If
successful, a job entry is created. Note that if the job's requested node
count or time allocation are outside of the partition's limits then a job
entry will be created, a warning indication will be placed in the \fIerror_code\fP field of the response message, and the job will be left
queued until the partition's limits are changed.
Always release the response message when no longer required using
the function \fBslurm_free_resource_allocation_response_msg\fR. This
function only makes the request once. If the allocation is not
available immediately the node_cnt variable in the resp will be 0. If
you want a function that will block until either an error is received
or an allocation is granted you can use the
\fIslurm_allocate_resources_blocking\fP function described below.
.LP
\fBslurm_allocate_resources_blocking\fR Request a resource allocation for a
job. This call will block until the allocation is granted, an error
occurs, or the specified timeout limit is reached. The \fIpending_callback\fP
parameter will be called if the allocation is not available
immediately and the immedite flag is not set in the request. This can
be used to get the jobid of the job while waiting for the allocation
to become available. On failure NULL is returned and errno is set.
.LP
\fBslurm_allocation_msg_thr_create\fR Startup a message handler
talking with the controller dealing with messages from the controller
during an allocation. Callback functions are declared in the
\fIcallbacks\fP parameter and will be called when a corresponding
message is received from the controller. This message thread is
needed to receive messages from the controller about node failure in
an allocation and other important messages. Although technically not
required, it could be very helpful to inform about problems with the
allocation.
.LP
\fBslurm_allocation_msg_thr_destroy\fR Shutdown the message handler
talking with the controller dealing with messages from the controller during
an allocation.
.LP
\fBslurm_confirm_allocation\fR Return detailed information on a specific
existing job allocation. \fBOBSOLETE FUNCTION: Use slurm_allocation_lookup
instead.\fR This function may only be successfully executed by the job's
owner or user root.
.LP
\fBslurm_free_resource_allocation_response_msg\fR Release the storage generated in response
to a call of the function \fBslurm_allocate_resources\fR,
\fBslurm_allocation_lookup\fR, or \fBslurm_allocation_lookup_lite\fR.
.LP
\fBslurm_free_submit_response_msg\fR Release the storage generated in response
to a call of the function \fBslurm_submit_batch_job\fR.
.LP
\fBslurm_init_job_desc_msg\fR Initialize the contents of a job descriptor with default values.
Execute this function before issuing a request to submit or modify a job.
.LP
\fBslurm_job_will_run\fR Determine if the supplied job description could be executed immediately.
.LP
\fBslurm_read_hostfile\fR Read a SLURM hostfile specified by
"filename". "filename" must contain a list of SLURM NodeNames, one
per line. Reads up to "n" number of hostnames from the file. Returns
a string representing a hostlist ranged string of the contents
of the file. This is a helper function, it does not contact any SLURM
daemons.
.LP
\fBslurm_submit_batch_job\fR Submit a job for later execution. Note that if
the job's requested node count or time allocation are outside of the partition's limits then a job entry will be created, a warning indication will be placed in the \fIerror_code\fP field of the response message, and the job will be left queued until the partition's limits are changed and resources are available. Always release the response message when no
longer required using the function \fBslurm_free_submit_response_msg\fR.
.SH "RETURN VALUE"
.LP
On success, zero is returned. On error, \-1 is returned, and Slurm error code is set appropriately.
.SH "ERRORS"
.LP
\fBSLURM_PROTOCOL_VERSION_ERROR\fR Protocol version has changed, re\-link your code.
.LP
\fBESLURM_DEFAULT_PARTITION_NOT_SET\fR the system lacks a valid default partition.
.LP
\fBESLURM_JOB_MISSING_PARTITION_KEY\fR use of this partition is restricted through a credential provided only to user root. This job lacks such a valid credential.
.LP
\fBESLURM_JOB_MISSING_REQUIRED_PARTITION_GROUP\fR use of this partition is restricted to certain groups. This user is not a member of an authorized group.
.LP
\fBESLURM_REQUESTED_NODES_NOT_IN_PARTITION\fR the job requested use of specific nodes which are not in the requested (or default) partition.
.LP
\fBESLURM_TOO_MANY_REQUESTED_CPUS\fR the job requested use of more processors than can be made available to in the requested (or default) partition.
.LP
\fBESLURM_TOO_MANY_REQUESTED_NODES\fR the job requested use of more nodes than can be made available to in the requested (or default) partition.
.LP
\fBESLURM_ERROR_ON_DESC_TO_RECORD_COPY\fR unable to create the job due to internal resources being exhausted. Try again later.
.LP
\fBESLURM_JOB_MISSING_SIZE_SPECIFICATION\fR the job failed to specify some size specification. At least one of the following must be supplied: required processor count, required node count, or required node list.
.LP
\fBESLURM_JOB_SCRIPT_MISSING\fR failed to identify executable program to be queued.
.LP
\fBESLURM_USER_ID_MISSING\fR identification of the job's owner was not provided.
.LP
\fBESLURM_DUPLICATE_JOB_ID\fR the requested job id is already in use.
.LP
\fBESLURM_NOT_TOP_PRIORITY\fR job can not be started immediately because higher priority jobs are waiting to use this partition.
.LP
\fBESLURM_REQUESTED_NODE_CONFIG_UNAVAILABLE\fR the requested node configuration is not available (at least not in sufficient quantity) to satisfy the request.
.LP
\fBESLURM_REQUESTED_PART_CONFIG_UNAVAILABLE\fR the requested partition
configuration is not available to satisfy the request. This is not a fatal
error, but indicates that the job will be left queued until the partition's
configuration is changed. This typically indicates that the job's requested
node count is outside of the node count range its partition is configured
to support (e.g. the job wants 64 nodes and the partition will only schedule
jobs using between 1 and 32 nodes). Alternately, the job's time limit exceeds
the partition's time limit.
.LP
\fBESLURM_NODES_BUSY\fR the requested nodes are already in use.
.LP
\fBESLURM_INVALID_FEATURE\fR the requested feature(s) does not exist.
.LP
\fBESLURM_INVALID_JOB_ID\fR the requested job id does not exist.
.LP
\fBESLURM_INVALID_NODE_NAME\fR the requested node name(s) is/are not valid.
.LP
\fBESLURM_INVALID_PARTITION_NAME\fR the requested partition name is not valid.
.LP
\fBESLURM_TRANSITION_STATE_NO_UPDATE\fR the requested job configuration change can not take place at this time. Try again later.
.LP
\fBESLURM_ALREADY_DONE\fR the specified job has already completed and can not be modified.
.LP
\fBESLURM_ACCESS_DENIED\fR the requesting user lacks authorization for the requested action (e.g. trying to delete or modify another user's job).
.LP
\fBESLURM_INTERCONNECT_FAILURE\fR failed to configure the node interconnect.
.LP
\fBESLURM_BAD_DIST\fR task distribution specification is invalid.
.LP
\fBSLURM_PROTOCOL_SOCKET_IMPL_TIMEOUT\fR Timeout in communicating with
SLURM controller.
.SH "NON-BLOCKING EXAMPLE"
.LP
#include <stdio.h>
.br
#include <stdlib.h>
.br
#include <signal.h>
.br
#include <slurm/slurm.h>
.br
#include <slurm/slurm_errno.h>
.LP
int main (int argc, char *argv[])
.br
{
.br
job_desc_msg_t job_desc_msg;
.br
resource_allocation_response_msg_t* slurm_alloc_msg_ptr ;
.LP
slurm_init_job_desc_msg( &job_desc_msg );
.br
job_desc_msg. name = ("job01\0");
.br
job_desc_msg. job_min_memory = 1024;
.br
job_desc_msg. time_limit = 200;
.br
job_desc_msg. min_nodes = 400;
.br
job_desc_msg. user_id = getuid();
.br
job_desc_msg. group_id = getgid();
.br
if (slurm_allocate_resources(&job_desc_msg,
.br
&slurm_alloc_msg_ptr)) {
.br
slurm_perror ("slurm_allocate_resources error");
.br
exit (1);
.br
}
.br
printf ("Allocated nodes %s to job_id %u\\n",
.br
slurm_alloc_msg_ptr\->node_list,
.br
slurm_alloc_msg_ptr\->job_id );
.br
if (slurm_kill_job(slurm_alloc_msg_ptr\->job_id, SIGKILL, 0)) {
.br
printf ("kill errno %d\\n", slurm_get_errno());
.br
exit (1);
.br
}
.br
printf ("canceled job_id %u\\n",
.br
slurm_alloc_msg_ptr\->job_id );
.br
slurm_free_resource_allocation_response_msg(
.br
slurm_alloc_msg_ptr);
.br
exit (0);
.br
}
.SH "BLOCKING EXAMPLE"
.LP
#include <stdio.h>
.br
#include <stdlib.h>
.br
#include <signal.h>
.br
#include <slurm/slurm.h>
.br
#include <slurm/slurm_errno.h>
.LP
int main (int argc, char *argv[])
.br
{
.br
job_desc_msg_t job_desc_msg;
.br
resource_allocation_response_msg_t* slurm_alloc_msg_ptr ;
.LP
slurm_init_job_desc_msg( &job_desc_msg );
.br
job_desc_msg. name = ("job01\0");
.br
job_desc_msg. job_min_memory = 1024;
.br
job_desc_msg. time_limit = 200;
.br
job_desc_msg. min_nodes = 400;
.br
job_desc_msg. user_id = getuid();
.br
job_desc_msg. group_id = getgid();
.br
if (!(slurm_alloc_msg_ptr =
.br
slurm_allocate_resources_blocking(&job_desc_msg, 0, NULL))) {
.br
slurm_perror ("slurm_allocate_resources_blocking error");
.br
exit (1);
.br
}
.br
printf ("Allocated nodes %s to job_id %u\\n",
.br
slurm_alloc_msg_ptr\->node_list,
.br
slurm_alloc_msg_ptr\->job_id );
.br
if (slurm_kill_job(slurm_alloc_msg_ptr\->job_id, SIGKILL, 0)) {
.br
printf ("kill errno %d\\n", slurm_get_errno());
.br
exit (1);
.br
}
.br
printf ("canceled job_id %u\\n",
.br
slurm_alloc_msg_ptr\->job_id );
.br
slurm_free_resource_allocation_response_msg(
.br
slurm_alloc_msg_ptr);
.br
exit (0);
.br
}
.SH "NOTE"
These functions are included in the libslurm library,
which must be linked to your process for use
(e.g. "cc \-lslurm myprog.c").
.SH "COPYING"
Copyright (C) 2002\-2006 The Regents of the University of California.
Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
CODE\-OCEC\-09\-009. All rights reserved.
.LP
This file is part of SLURM, a resource management program.
For details, see <https://computing.llnl.gov/linux/slurm/>.
.LP
SLURM is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free
Software Foundation; either version 2 of the License, or (at your option)
any later version.
.LP
SLURM is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
details.
.SH "SEE ALSO"
.LP
\fBhostlist_create\fR(3), \fBhostlist_shift\fR(3), \fBhostlist_destroy\fR(3),
\fBscancel\fR(1), \fBsrun\fR(1), \fBslurm_free_job_info_msg\fR(3),
\fBslurm_get_errno\fR(3), \fBslurm_load_jobs\fR(3),
\fBslurm_perror\fR(3), \fBslurm_strerror\fR(3)