|  | /*****************************************************************************\ | 
|  | *  suspend.c - job step suspend and resume functions. | 
|  | ***************************************************************************** | 
|  | *  Copyright (C) 2005-2006 The Regents of the University of California. | 
|  | *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). | 
|  | *  Written by Morris Jette <jette1@llnl.gov> et. al. | 
|  | *  CODE-OCEC-09-009. All rights reserved. | 
|  | * | 
|  | *  This file is part of Slurm, a resource management program. | 
|  | *  For details, see <https://slurm.schedmd.com/>. | 
|  | *  Please also read the included file: DISCLAIMER. | 
|  | * | 
|  | *  Slurm is free software; you can redistribute it and/or modify it under | 
|  | *  the terms of the GNU General Public License as published by the Free | 
|  | *  Software Foundation; either version 2 of the License, or (at your option) | 
|  | *  any later version. | 
|  | * | 
|  | *  In addition, as a special exception, the copyright holders give permission | 
|  | *  to link the code of portions of this program with the OpenSSL library under | 
|  | *  certain conditions as described in each individual source file, and | 
|  | *  distribute linked combinations including the two. You must obey the GNU | 
|  | *  General Public License in all respects for all of the code used other than | 
|  | *  OpenSSL. If you modify file(s) with this exception, you may extend this | 
|  | *  exception to your version of the file(s), but you are not obligated to do | 
|  | *  so. If you do not wish to do so, delete this exception statement from your | 
|  | *  version.  If you delete this exception statement from all source files in | 
|  | *  the program, then also delete it here. | 
|  | * | 
|  | *  Slurm is distributed in the hope that it will be useful, but WITHOUT ANY | 
|  | *  WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS | 
|  | *  FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more | 
|  | *  details. | 
|  | * | 
|  | *  You should have received a copy of the GNU General Public License along | 
|  | *  with Slurm; if not, write to the Free Software Foundation, Inc., | 
|  | *  51 Franklin Street, Fifth Floor, Boston, MA 02110-1301  USA. | 
|  | \*****************************************************************************/ | 
|  |  | 
|  | #include "slurm/slurm.h" | 
|  | #include "src/common/slurm_protocol_api.h" | 
|  | #include "src/common/xmalloc.h" | 
|  | #include "src/common/xstring.h" | 
|  |  | 
|  | /* | 
|  | * _suspend_op - perform a suspend/resume operation for some job. | 
|  | * IN op         - operation to perform | 
|  | * IN job_id     - job on which to perform operation or NO_VAL | 
|  | * RET 0 or a slurm error code | 
|  | * NOTE: Supply either job_id NO_VAL or job_id_str as NULL, not both | 
|  | */ | 
|  | static int _suspend_op(uint16_t op, uint32_t job_id) | 
|  | { | 
|  | int rc = SLURM_SUCCESS; | 
|  | suspend_msg_t sus_req; | 
|  | slurm_msg_t req_msg; | 
|  |  | 
|  | slurm_msg_t_init(&req_msg); | 
|  | memset(&sus_req, 0, sizeof(sus_req)); | 
|  | sus_req.op         = op; | 
|  | sus_req.job_id     = job_id; | 
|  | sus_req.job_id_str = NULL; | 
|  | req_msg.msg_type   = REQUEST_SUSPEND; | 
|  | req_msg.data       = &sus_req; | 
|  |  | 
|  | if (slurm_send_recv_controller_rc_msg(&req_msg, &rc, | 
|  | working_cluster_rec) < 0) | 
|  | return SLURM_ERROR; | 
|  |  | 
|  | errno = rc; | 
|  | return rc; | 
|  | } | 
|  |  | 
|  | /* | 
|  | * slurm_suspend - suspend execution of a job. | 
|  | * IN job_id  - job on which to perform operation | 
|  | * RET 0 or a slurm error code | 
|  | */ | 
|  | extern int slurm_suspend(uint32_t job_id) | 
|  | { | 
|  | return _suspend_op (SUSPEND_JOB, job_id); | 
|  | } | 
|  |  | 
|  | /* | 
|  | * slurm_resume - resume execution of a previously suspended job. | 
|  | * IN job_id  - job on which to perform operation | 
|  | * RET 0 or a slurm error code | 
|  | */ | 
|  | extern int slurm_resume(uint32_t job_id) | 
|  | { | 
|  | return _suspend_op(RESUME_JOB, job_id); | 
|  | } | 
|  |  | 
|  | /* | 
|  | * _suspend_op2 - perform a suspend/resume operation for some job. | 
|  | * IN op         - operation to perform | 
|  | * IN job_id_str - job on which to perform operation in string format or NULL | 
|  | * OUT resp      - slurm error codes by job array task ID | 
|  | * RET 0 or a slurm error code | 
|  | * NOTE: Supply either job_id NO_VAL or job_id_str as NULL, not both | 
|  | */ | 
|  | static int _suspend_op2(uint16_t op, char *job_id_str, | 
|  | job_array_resp_msg_t **resp) | 
|  | { | 
|  | int rc = SLURM_SUCCESS; | 
|  | suspend_msg_t sus_req; | 
|  | slurm_msg_t req_msg, resp_msg; | 
|  |  | 
|  | slurm_msg_t_init(&req_msg); | 
|  | slurm_msg_t_init(&resp_msg); | 
|  | memset(&sus_req, 0, sizeof(sus_req)); | 
|  | sus_req.op         = op; | 
|  | sus_req.job_id     = NO_VAL; | 
|  | sus_req.job_id_str = job_id_str; | 
|  | req_msg.msg_type   = REQUEST_SUSPEND; | 
|  | req_msg.data       = &sus_req; | 
|  |  | 
|  | rc = slurm_send_recv_controller_msg(&req_msg, &resp_msg, | 
|  | working_cluster_rec); | 
|  | switch (resp_msg.msg_type) { | 
|  | case RESPONSE_JOB_ARRAY_ERRORS: | 
|  | *resp = (job_array_resp_msg_t *) resp_msg.data; | 
|  | break; | 
|  | case RESPONSE_SLURM_RC: | 
|  | rc = ((return_code_msg_t *) resp_msg.data)->return_code; | 
|  | if (rc) | 
|  | errno = rc; | 
|  | break; | 
|  | default: | 
|  | errno = SLURM_UNEXPECTED_MSG_ERROR; | 
|  | } | 
|  |  | 
|  | return rc; | 
|  | } | 
|  |  | 
|  | /* | 
|  | * slurm_suspend2 - suspend execution of a job. | 
|  | * IN job_id in string form  - job on which to perform operation | 
|  | * OUT resp - per task response to the request, | 
|  | *	      free using slurm_free_job_array_resp() | 
|  | * RET 0 or a slurm error code | 
|  | */ | 
|  | extern int slurm_suspend2(char *job_id, job_array_resp_msg_t **resp) | 
|  | { | 
|  | return _suspend_op2(SUSPEND_JOB, job_id, resp); | 
|  | } | 
|  |  | 
|  |  | 
|  | /* | 
|  | * slurm_resume2 - resume execution of a previously suspended job. | 
|  | * IN job_id in string form  - job on which to perform operation | 
|  | * OUT resp - per task response to the request, | 
|  | *	      free using slurm_free_job_array_resp() | 
|  | * RET 0 or a slurm error code | 
|  | */ | 
|  | extern int slurm_resume2(char *job_id, job_array_resp_msg_t **resp) | 
|  | { | 
|  | return _suspend_op2(RESUME_JOB, job_id, resp); | 
|  | } | 
|  |  | 
|  | /* | 
|  | * slurm_requeue - re-queue a batch job, if already running | 
|  | *	then terminate it first | 
|  | * IN job_id  - job on which to perform operation | 
|  | * IN flags - JOB_SPECIAL_EXIT - job should be placed special exit state and | 
|  | *		  held. | 
|  | *            JOB_REQUEUE_HOLD - job should be placed JOB_PENDING state and | 
|  | *		  held. | 
|  | *            JOB_RECONFIG_FAIL - Node configuration for job failed | 
|  | *            JOB_RUNNING - Operate only on jobs in a state of | 
|  | *		  CONFIGURING, RUNNING, STOPPED or SUSPENDED. | 
|  | * RET 0 or a slurm error code | 
|  | */ | 
|  | extern int slurm_requeue(uint32_t job_id, uint32_t flags) | 
|  | { | 
|  | int rc = SLURM_SUCCESS; | 
|  | requeue_msg_t requeue_req; | 
|  | slurm_msg_t req_msg; | 
|  |  | 
|  | slurm_msg_t_init(&req_msg); | 
|  |  | 
|  | memset(&requeue_req, 0, sizeof(requeue_req)); | 
|  | requeue_req.job_id	= job_id; | 
|  | requeue_req.job_id_str	= NULL; | 
|  | requeue_req.flags	= flags; | 
|  | req_msg.msg_type	= REQUEST_JOB_REQUEUE; | 
|  | req_msg.data		= &requeue_req; | 
|  |  | 
|  | if (slurm_send_recv_controller_rc_msg(&req_msg, &rc, | 
|  | working_cluster_rec) < 0) | 
|  | return SLURM_ERROR; | 
|  |  | 
|  | errno = rc; | 
|  | return rc; | 
|  | } | 
|  |  | 
|  | /* | 
|  | * slurm_requeue2 - re-queue a batch job, if already running | 
|  | *	then terminate it first | 
|  | * IN job_id in string form  - job on which to perform operation, may be job | 
|  | *            array specification (e.g. "123_1-20,44"); | 
|  | * IN flags - JOB_SPECIAL_EXIT - job should be placed special exit state and | 
|  | *		  held. | 
|  | *            JOB_REQUEUE_HOLD - job should be placed JOB_PENDING state and | 
|  | *		  held. | 
|  | *            JOB_RECONFIG_FAIL - Node configuration for job failed | 
|  | *            JOB_RUNNING - Operate only on jobs in a state of | 
|  | *		  CONFIGURING, RUNNING, STOPPED or SUSPENDED. | 
|  | * OUT resp - per task response to the request, | 
|  | *	      free using slurm_free_job_array_resp() | 
|  | * RET 0 or a slurm error code | 
|  | */ | 
|  | extern int slurm_requeue2(char *job_id_str, uint32_t flags, | 
|  | job_array_resp_msg_t **resp) | 
|  | { | 
|  | int rc = SLURM_SUCCESS; | 
|  | requeue_msg_t requeue_req; | 
|  | slurm_msg_t req_msg, resp_msg; | 
|  |  | 
|  | slurm_msg_t_init(&req_msg); | 
|  | slurm_msg_t_init(&resp_msg); | 
|  | memset(&requeue_req, 0, sizeof(requeue_req)); | 
|  | requeue_req.job_id	= NO_VAL; | 
|  | requeue_req.job_id_str	= job_id_str; | 
|  | requeue_req.flags	= flags; | 
|  | req_msg.msg_type	= REQUEST_JOB_REQUEUE; | 
|  | req_msg.data		= &requeue_req; | 
|  |  | 
|  | rc = slurm_send_recv_controller_msg(&req_msg, &resp_msg, | 
|  | working_cluster_rec); | 
|  | switch (resp_msg.msg_type) { | 
|  | case RESPONSE_JOB_ARRAY_ERRORS: | 
|  | *resp = (job_array_resp_msg_t *) resp_msg.data; | 
|  | break; | 
|  | case RESPONSE_SLURM_RC: | 
|  | rc = ((return_code_msg_t *) resp_msg.data)->return_code; | 
|  | if (rc) | 
|  | errno = rc; | 
|  | break; | 
|  | default: | 
|  | errno = SLURM_UNEXPECTED_MSG_ERROR; | 
|  | } | 
|  |  | 
|  | return rc; | 
|  | } |