blob: fc82b504a005026d25f42c9601131734c8b94a69 [file] [log] [blame]
/*****************************************************************************\
* slurm_protocol_socket.c - slurm socket handling functions
*****************************************************************************
* Copyright (C) 2002-2007 The Regents of the University of California.
* Copyright (C) 2008-2010 Lawrence Livermore National Security.
* Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
* Written by Kevin Tew <tew1@llnl.gov>, et. al.
* CODE-OCEC-09-009. All rights reserved.
*
* This file is part of Slurm, a resource management program.
* For details, see <https://slurm.schedmd.com/>.
* Please also read the included file: DISCLAIMER.
*
* Slurm is free software; you can redistribute it and/or modify it under
* the terms of the GNU General Public License as published by the Free
* Software Foundation; either version 2 of the License, or (at your option)
* any later version.
*
* In addition, as a special exception, the copyright holders give permission
* to link the code of portions of this program with the OpenSSL library under
* certain conditions as described in each individual source file, and
* distribute linked combinations including the two. You must obey the GNU
* General Public License in all respects for all of the code used other than
* OpenSSL. If you modify file(s) with this exception, you may extend this
* exception to your version of the file(s), but you are not obligated to do
* so. If you do not wish to do so, delete this exception statement from your
* version. If you delete this exception statement from all source files in
* the program, then also delete it here.
*
* Slurm is distributed in the hope that it will be useful, but WITHOUT ANY
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
* details.
*
* You should have received a copy of the GNU General Public License along
* with Slurm; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
\*****************************************************************************/
#define _GNU_SOURCE
#include <arpa/inet.h>
#include <errno.h>
#include <netdb.h>
#include <netinet/in.h>
#include <poll.h>
#include <signal.h>
#include <stdarg.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/param.h>
#include <sys/socket.h>
#include <sys/time.h>
#include <sys/types.h>
#include <sys/uio.h>
#include <sys/un.h>
#include <unistd.h>
#include "slurm/slurm_errno.h"
#include "src/common/fd.h"
#include "src/common/log.h"
#include "src/common/net.h"
#include "src/common/read_config.h"
#include "src/common/slurm_protocol_api.h"
#include "src/common/slurm_protocol_socket.h"
#include "src/common/slurm_time.h"
#include "src/common/strlcpy.h"
#include "src/common/util-net.h"
#include "src/common/xmalloc.h"
#include "src/common/xsignal.h"
#include "src/common/xstring.h"
#include "src/interfaces/conn.h"
#define PORT_RETRIES 3
#define MIN_USER_PORT (IPPORT_RESERVED + 1)
#define MAX_USER_PORT 0xffff
#define RANDOM_USER_PORT ((uint16_t) ((lrand48() % \
(MAX_USER_PORT - MIN_USER_PORT + 1)) + MIN_USER_PORT))
/* Static functions */
static int _slurm_connect(int __fd, struct sockaddr const * __addr,
socklen_t __len);
/****************************************************************
* MIDDLE LAYER MSG FUNCTIONS
****************************************************************/
/*
* Pick a random port number to use. Use this if the system
* selected port can't connect. This may indicate that the
* port/address of both the client and server match a defunct
* socket record in TIME_WAIT state.
*/
static void _sock_bind_wild(int sockfd)
{
int rc, retry;
slurm_addr_t sin;
static bool seeded = false;
if (!seeded) {
seeded = true;
srand48((long int) (time(NULL) + getpid()));
}
slurm_setup_addr(&sin, RANDOM_USER_PORT);
for (retry=0; retry < PORT_RETRIES ; retry++) {
rc = bind(sockfd, (struct sockaddr *) &sin, sizeof(sin));
if (rc >= 0)
break;
slurm_set_port(&sin, RANDOM_USER_PORT);
}
return;
}
extern ssize_t slurm_msg_recvfrom_timeout(void *tls_conn, char **pbuf,
size_t *lenp, int timeout)
{
ssize_t len;
uint32_t msglen;
len = slurm_recv_timeout(tls_conn, (char *) &msglen, sizeof(msglen),
timeout);
if (len < ((ssize_t) sizeof(msglen)))
return SLURM_ERROR;
msglen = ntohl(msglen);
if (msglen > MAX_MSG_SIZE)
slurm_seterrno_ret(SLURM_PROTOCOL_INSANE_MSG_LENGTH);
/*
* Allocate memory on heap for message
*/
if (!(*pbuf = try_xmalloc(msglen)))
slurm_seterrno_ret(ENOMEM);
if (slurm_recv_timeout(tls_conn, *pbuf, msglen, timeout) != msglen) {
xfree(*pbuf);
*pbuf = NULL;
return SLURM_ERROR;
}
*lenp = msglen;
return (ssize_t) msglen;
}
static int _writev_timeout(int fd, void *tls_conn, struct iovec *iov,
int iovcnt, int timeout)
{
int tot_bytes_sent = 0;
size_t size = 0;
int fd_flags;
struct pollfd ufds;
struct timeval tstart;
char temp[2];
if (tls_conn)
fd = conn_g_get_fd(tls_conn);
ufds.fd = fd;
ufds.events = POLLOUT;
fd_flags = fcntl(fd, F_GETFL);
fd_set_nonblocking(fd);
gettimeofday(&tstart, NULL);
for (int i = 0; i < iovcnt; i++)
size += iov[i].iov_len;
while (true) {
ssize_t bytes_sent = 0;
int rc;
int timeleft = timeout - timeval_tot_wait(&tstart);
if (conn_g_peek(tls_conn))
goto ready;
if (timeleft <= 0) {
debug("%s at %d of %zu, timeout",
__func__, tot_bytes_sent, size);
errno = SLURM_PROTOCOL_SOCKET_IMPL_TIMEOUT;
tot_bytes_sent = SLURM_ERROR;
break;
}
if ((rc = poll(&ufds, 1, timeleft)) <= 0) {
if ((rc == 0) || (errno == EINTR) || (errno == EAGAIN))
continue;
else {
debug("%s at %d of %zu, poll error: %s",
__func__, tot_bytes_sent, size,
strerror(errno));
errno = SLURM_COMMUNICATIONS_SEND_ERROR;
tot_bytes_sent = SLURM_ERROR;
break;
}
}
/*
* Check here to make sure the socket really is there.
* If not then exit out and notify the sender. This
* is here since a write doesn't always tell you the
* socket is gone, but getting 0 back from a
* nonblocking read means just that.
*/
if (ufds.revents & POLLERR) {
int e;
if ((rc = fd_get_socket_error(fd, &e)))
debug("%s: Socket POLLERR, fd_get_socket_error failed: %s",
__func__, slurm_strerror(rc));
else
debug("%s: Socket POLLERR: %s",
__func__, slurm_strerror(e));
errno = e;
tot_bytes_sent = SLURM_ERROR;
break;
}
if ((ufds.revents & POLLHUP) || (ufds.revents & POLLNVAL) ||
(recv(fd, &temp, 1, 0) == 0)) {
int so_err;
if ((rc = fd_get_socket_error(fd, &so_err)))
debug2("%s: Socket no longer there, fd_get_socket_error failed: %s",
__func__, slurm_strerror(rc));
else
debug2("%s: Socket no longer there: %s",
__func__, slurm_strerror(so_err));
errno = so_err;
tot_bytes_sent = SLURM_ERROR;
break;
}
if ((ufds.revents & POLLOUT) != POLLOUT) {
error("%s: Poll failure, revents:%d",
__func__, ufds.revents);
}
ready:
if (tls_conn) {
bytes_sent = conn_g_sendv(tls_conn, iov, iovcnt);
} else {
bytes_sent = writev(fd, iov, iovcnt);
}
if (bytes_sent < 0) {
if (errno == EINTR)
continue;
log_flag(NET, "%s: [fd:%d] writev() sent %zd/%zu bytes failed: %m",
__func__, fd, bytes_sent, size);
if ((errno == EAGAIN) || (errno == EWOULDBLOCK)) {
/* poll() lied to us */
usleep(10000);
continue;
}
errno = SLURM_COMMUNICATIONS_SEND_ERROR;
tot_bytes_sent = SLURM_ERROR;
break;
}
if (bytes_sent == 0) {
/*
* If driver false reports POLLIN but then does not
* provide any output: try poll() again.
*/
log_flag(NET, "%s: [fd:%d] writev() sent zero bytes out of %d/%zu",
__func__, fd, tot_bytes_sent, size);
continue;
}
tot_bytes_sent += bytes_sent;
if (tot_bytes_sent >= size) {
log_flag(NET, "%s: [fd:%d] writev() completed sending %d/%zu bytes",
__func__, fd, tot_bytes_sent, size);
break;
}
/* partial write, need to adjust iovec before next call */
for (int i = 0; i < iovcnt; i++) {
if (bytes_sent < iov[i].iov_len) {
iov[i].iov_base += bytes_sent;
iov[i].iov_len -= bytes_sent;
break;
}
bytes_sent -= iov[i].iov_len;
iov[i].iov_base = NULL;
iov[i].iov_len = 0;
}
}
/* Reset fd flags to prior state, preserve errno */
if (fd_flags != -1) {
int slurm_err = errno;
if (fcntl(fd, F_SETFL, fd_flags) < 0)
error("%s: fcntl(F_SETFL) error: %m", __func__);
errno = slurm_err;
}
return tot_bytes_sent;
}
/*
* Send slurm message with timeout
* RET message size (as specified in argument) or SLURM_ERROR on error
*/
extern int slurm_send_timeout(void *tls_conn, char *buf, size_t size,
int timeout)
{
struct iovec iov = { .iov_base = buf, .iov_len = size };
return _writev_timeout(-1, tls_conn, &iov, 1, timeout);
}
extern ssize_t slurm_msg_sendto(void *tls_conn, char *buffer, size_t size)
{
struct iovec iov[2];
uint32_t usize;
int len;
SigFunc *ohandler;
int timeout = slurm_conf.msg_timeout * 1000;
/*
* Ignore SIGPIPE so that send can return a error code if the
* other side closes the socket
*/
ohandler = xsignal(SIGPIPE, SIG_IGN);
iov[0].iov_base = &usize;
iov[0].iov_len = sizeof(usize);
iov[1].iov_base = buffer;
iov[1].iov_len = size;
usize = htonl(iov[1].iov_len);
len = _writev_timeout(-1, tls_conn, iov, 2, timeout);
xsignal(SIGPIPE, ohandler);
/* Returned size should not include the 4-byte length header. */
if (len < 0)
return SLURM_ERROR;
return size;
}
extern ssize_t slurm_msg_sendto_socket(int fd, char *buffer, size_t size)
{
struct iovec iov[2];
uint32_t usize;
int len;
SigFunc *ohandler;
int timeout = slurm_conf.msg_timeout * 1000;
/*
* Ignore SIGPIPE so that send can return a error code if the
* other side closes the socket
*/
ohandler = xsignal(SIGPIPE, SIG_IGN);
iov[0].iov_base = &usize;
iov[0].iov_len = sizeof(usize);
iov[1].iov_base = buffer;
iov[1].iov_len = size;
usize = htonl(iov[1].iov_len);
len = _writev_timeout(fd, NULL, iov, 2, timeout);
xsignal(SIGPIPE, ohandler);
/* Returned size should not include the 4-byte length header. */
if (len < 0)
return SLURM_ERROR;
return size;
}
extern ssize_t slurm_bufs_sendto(void *tls_conn, msg_bufs_t *buffers)
{
struct iovec iov[4];
int len;
uint32_t usize;
SigFunc *ohandler;
int timeout = slurm_conf.msg_timeout * 1000;
xassert(buffers);
/*
* Ignore SIGPIPE so that send can return a error code if the other
* side closes the socket
*/
ohandler = xsignal(SIGPIPE, SIG_IGN);
/* auth portion is optional. header and body are mandatory. */
iov[0].iov_base = &usize;
iov[0].iov_len = sizeof(usize);
iov[1].iov_base = get_buf_data(buffers->header);
iov[1].iov_len = get_buf_offset(buffers->header);
iov[2].iov_base = buffers->auth ? get_buf_data(buffers->auth) : NULL;
iov[2].iov_len = buffers->auth ? get_buf_offset(buffers->auth) : 0;
iov[3].iov_base = get_buf_data(buffers->body);
iov[3].iov_len = get_buf_offset(buffers->body);
usize = htonl(iov[1].iov_len + iov[2].iov_len + iov[3].iov_len);
len = _writev_timeout(-1, tls_conn, iov, 4, timeout);
xsignal(SIGPIPE, ohandler);
return len;
}
/* Get slurm message with timeout
* RET message size (as specified in argument) or SLURM_ERROR on error */
extern int slurm_recv_timeout(void *tls_conn, char *buffer, size_t size,
int timeout)
{
int fd = -1;
int rc;
int recvlen = 0;
int fd_flags;
struct pollfd ufds;
struct timeval tstart;
int timeleft = timeout;
fd = conn_g_get_fd(tls_conn);
ufds.fd = fd;
ufds.events = POLLIN;
fd_flags = fcntl(fd, F_GETFL);
fd_set_nonblocking(fd);
gettimeofday(&tstart, NULL);
while (recvlen < size) {
timeleft = timeout - timeval_tot_wait(&tstart);
if (timeleft <= 0) {
debug("%s at %d of %zu, timeout", __func__, recvlen,
size);
errno = SLURM_PROTOCOL_SOCKET_IMPL_TIMEOUT;
recvlen = SLURM_ERROR;
goto done;
}
if (conn_g_peek(tls_conn))
goto ready;
if ((rc = poll(&ufds, 1, timeleft)) <= 0) {
if ((errno == EINTR) || (errno == EAGAIN) || (rc == 0))
continue;
else {
debug("%s at %d of %zu, poll error: %m",
__func__, recvlen, size);
errno = SLURM_COMMUNICATIONS_RECEIVE_ERROR;
recvlen = SLURM_ERROR;
goto done;
}
}
if (ufds.revents & POLLERR) {
int e, rc;
if ((rc = fd_get_socket_error(fd, &e)))
debug("%s: Socket POLLERR: fd_get_socket_error failed: %s",
__func__, slurm_strerror(rc));
else
debug("%s: Socket POLLERR: %s",
__func__, slurm_strerror(e));
errno = e;
recvlen = SLURM_ERROR;
goto done;
}
if ((ufds.revents & POLLNVAL) ||
((ufds.revents & POLLHUP) &&
((ufds.revents & POLLIN) == 0))) {
int so_err, rc;
if ((rc = fd_get_socket_error(fd, &so_err))) {
debug2("%s: Socket no longer there: fd_get_socket_error failed: %s",
__func__, slurm_strerror(rc));
errno = rc;
} else {
debug2("%s: Socket no longer there: %s",
__func__, slurm_strerror(so_err));
errno = so_err;
}
recvlen = SLURM_ERROR;
goto done;
}
if ((ufds.revents & POLLIN) != POLLIN) {
error("%s: Poll failure, revents:%d",
__func__, ufds.revents);
continue;
}
ready:
rc = conn_g_recv(tls_conn, &buffer[recvlen], (size - recvlen));
if (rc < 0) {
if ((errno == EINTR) || (errno == EAGAIN)) {
log_flag(NET, "%s: recv(fd:%d) got %m. retrying.",
__func__, fd);
continue;
} else {
debug("%s at %d of %zu, recv error: %m",
__func__, recvlen, size);
errno = SLURM_COMMUNICATIONS_RECEIVE_ERROR;
recvlen = SLURM_ERROR;
goto done;
}
}
if (rc == 0) {
log_flag(NET, "%s: recv(fd:%d)=0 at %d/%zu bytes",
__func__, fd, recvlen, size);
if (recvlen > 0)
errno = ESLURM_PROTOCOL_INCOMPLETE_PACKET;
else
errno = SLURM_PROTOCOL_SOCKET_ZERO_BYTES_SENT;
recvlen = SLURM_ERROR;
goto done;
}
recvlen += rc;
}
done:
/* Reset fd flags to prior state, preserve errno */
if (fd_flags != -1) {
int slurm_err = errno;
if (fcntl(fd, F_SETFL, fd_flags) < 0)
error("%s: fcntl(F_SETFL) error: %m", __func__);
errno = slurm_err;
}
return recvlen;
}
extern int slurm_init_msg_engine(slurm_addr_t *addr, bool quiet)
{
int rc;
int fd;
int log_lvl = LOG_LEVEL_ERROR;
const int one = 1;
const size_t sz1 = sizeof(one);
if (quiet)
log_lvl = LOG_LEVEL_DEBUG;
if ((fd = socket(addr->ss_family, SOCK_STREAM | SOCK_CLOEXEC,
IPPROTO_TCP)) < 0) {
format_print(log_lvl, "Error creating slurm stream socket: %m");
return fd;
}
rc = setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &one, sz1);
if (rc < 0) {
format_print(log_lvl, "setsockopt SO_REUSEADDR failed: %m");
goto error;
}
rc = bind(fd, (struct sockaddr const *) addr, sizeof(*addr));
if (rc < 0) {
format_print(log_lvl, "Error binding slurm stream socket: %m");
goto error;
}
if (listen(fd, SLURM_DEFAULT_LISTEN_BACKLOG) < 0) {
format_print(log_lvl,
"Error listening on slurm stream socket: %m");
rc = SLURM_ERROR;
goto error;
}
return fd;
error:
(void) close(fd);
return rc;
}
/* Await a connection on socket FD.
* When a connection arrives, open a new socket to communicate with it,
* set *ADDR (which is *ADDR_LEN bytes long) to the address of the connecting
* peer and *ADDR_LEN to the address's actual length, and return the
* new socket's descriptor, or -1 for errors. */
extern int slurm_accept_conn(int fd, slurm_addr_t *addr)
{
socklen_t len = sizeof(*addr);
int sock = accept4(fd, (struct sockaddr *) addr, &len, SOCK_CLOEXEC);
net_set_nodelay(sock, true, NULL);
return sock;
}
extern void *slurm_accept_msg_conn(int fd, slurm_addr_t *addr)
{
socklen_t len = sizeof(*addr);
int sock = -1;
void *tls_conn = NULL;
conn_args_t tls_args = {
.mode = TLS_CONN_SERVER,
};
sock = accept4(fd, (struct sockaddr *) addr, &len, SOCK_CLOEXEC);
if (sock < 0) {
error("%s: Unable to accept() connection to address %pA: %m",
__func__, addr);
return NULL;
}
tls_args.input_fd = tls_args.output_fd = sock;
net_set_nodelay(sock, true, NULL);
if (!(tls_conn = conn_g_create(&tls_args))) {
error("%s: Unable to create server TLS connection to address %pA: %m",
__func__, addr);
(void) close(sock);
}
return tls_conn;
}
extern int slurm_open_stream(slurm_addr_t *addr, bool retry)
{
int retry_cnt = 0, ehostunreach_cnt = 0;
int fd, rc = SLURM_SUCCESS;
uint32_t sleep_ns = 500 * NSEC_IN_MSEC;
if ((slurm_addr_is_unspec(addr)) || (slurm_get_port(addr) == 0)) {
error("Error connecting, bad data: family = %u, port = %u",
addr->ss_family, slurm_get_port(addr));
return SLURM_ERROR;
}
while (true) {
fd = socket(addr->ss_family, SOCK_STREAM | SOCK_CLOEXEC,
IPPROTO_TCP);
if (fd < 0) {
error("Error creating slurm stream socket: %m");
return SLURM_ERROR;
}
net_set_nodelay(fd, true, NULL);
if (retry_cnt) {
if (retry_cnt == 1) {
debug3("Error connecting, "
"picking new stream port");
}
_sock_bind_wild(fd);
}
rc = _slurm_connect(fd, (struct sockaddr const *)addr,
sizeof(*addr));
if (!rc) {
/* success */
break;
}
if (slurm_conf.host_unreach_retry_count &&
(rc == EHOSTUNREACH)) {
if (ehostunreach_cnt >=
slurm_conf.host_unreach_retry_count) {
debug2("Error connecting to %pA (%s), host_unreach_retry_count (%d) reached",
addr, strerror(rc),
slurm_conf.host_unreach_retry_count);
goto error;
}
ehostunreach_cnt++;
debug2("Error connecting to %pA (%s), trying again",
addr, strerror(rc));
/*
* Assume issue was temporary network outage. Wait for
* a bit and hope it comes back.
*/
slurm_nanosleep(0, sleep_ns);
} else if (((rc != ECONNREFUSED) && (rc != ETIMEDOUT)) ||
(!retry) || (retry_cnt >= PORT_RETRIES)) {
goto error;
} else {
retry_cnt++;
}
(void) close(fd);
}
errno = rc;
return fd;
error:
debug2("Error connecting slurm stream socket at %pA: %s", addr,
strerror(rc));
(void) close(fd);
/* Always set errno as upstream callers expect it. */
errno = rc;
return SLURM_ERROR;
}
extern int slurm_open_unix_stream(char *addr_name, int sock_flags, int *fd)
{
struct sockaddr_un sa;
int rc;
xassert(addr_name);
if (strlen(addr_name) >= sizeof(sa.sun_path)) {
rc = ESLURMD_INVALID_SOCKET_NAME_LEN;
error("%s: [%s]: %s", __func__, addr_name, slurm_strerror(rc));
return rc;
}
if ((*fd = socket(AF_UNIX, SOCK_STREAM | sock_flags, 0)) < 0) {
rc = errno;
error("%s: [%s]: socket() failed: %m", __func__, addr_name);
return rc;
}
memset(&sa, 0, sizeof(sa));
sa.sun_family = AF_UNIX;
strcpy(sa.sun_path, addr_name);
while ((rc = connect(*fd, (struct sockaddr *) &sa, SUN_LEN(&sa))) &&
(errno == EINTR))
; /* empty loop */
if (rc < 0) {
rc = errno;
debug2("%s: [%s]: connect() failed: %m", __func__, addr_name);
fd_close(fd);
}
return rc;
}
/* Put the local address of FD into *ADDR and its length in *LEN. */
extern int slurm_get_stream_addr(int fd, slurm_addr_t *addr )
{
socklen_t size = sizeof(*addr);
return getsockname(fd, (struct sockaddr *) addr, &size);
}
/* Open a connection on socket FD to peer at ADDR (which LEN bytes long).
* For connectionless socket types, just set the default address to send to
* and the only address from which to accept transmissions.
* Return SLURM_SUCCESS or error */
static int _slurm_connect (int __fd, struct sockaddr const * __addr,
socklen_t __len)
{
/* From "man connect": Note that for IP sockets the timeout
* may be very long when syncookies are enabled on the server.
*
* Timeouts in excess of 3 minutes have been observed, resulting
* in serious problems for slurmctld. Making the connect call
* non-blocking and polling seems to fix the problem. */
int rc, flags, flags_save;
struct pollfd ufds;
flags = fcntl(__fd, F_GETFL);
flags_save = flags;
if (flags == -1) {
error("%s: fcntl(F_GETFL) error: %m", __func__);
flags = 0;
}
if (fcntl(__fd, F_SETFL, flags | O_NONBLOCK) < 0)
error("%s: fcntl(F_SETFL) error: %m", __func__);
rc = connect(__fd , __addr , __len);
if ((rc < 0) && (errno != EINPROGRESS))
return errno;
if (rc == 0)
goto done; /* connect completed immediately */
ufds.fd = __fd;
ufds.events = POLLIN | POLLOUT;
ufds.revents = 0;
again:
rc = poll(&ufds, 1, slurm_conf.tcp_timeout * 1000);
if (rc == -1) {
int lerrno = errno;
/* poll failed */
if (lerrno == EINTR) {
/* NOTE: connect() is non-interruptible in Linux */
debug2("%s: poll() failed for %pA: %s",
__func__, __addr, slurm_strerror(lerrno));
goto again;
}
error("%s: poll() failed for %pA: %s",
__func__, __addr, slurm_strerror(lerrno));
return lerrno;
} else if (rc == 0) {
/* poll timed out before any socket events */
debug2("%s: connect to %pA in %us: %s",
__func__, __addr, slurm_conf.tcp_timeout,
slurm_strerror(ETIMEDOUT));
return ETIMEDOUT;
} else if (ufds.revents & POLLERR) {
int err;
/* poll saw some event on the socket
* We need to check if the connection succeeded by
* using getsockopt. The revent is not necessarily
* POLLERR when the connection fails! */
if ((rc = fd_get_socket_error(__fd, &err)))
return rc;
/* NOTE: Connection refused is typically reported for
* non-responsived nodes plus attempts to communicate
* with terminated srun commands. */
debug2("%s: failed to connect to %pA: %s",
__func__, __addr, slurm_strerror(err));
return err;
}
done:
if (flags_save != -1) {
if (fcntl(__fd, F_SETFL, flags_save) < 0)
error("%s: fcntl(F_SETFL) error: %m", __func__);
}
return SLURM_SUCCESS;
}
extern void slurm_set_addr(slurm_addr_t *addr, uint16_t port, char *host)
{
struct addrinfo *ai_ptr, *ai_start;
log_flag(NET, "%s: called with port='%u' host='%s'",
__func__, port, host);
/*
* xgetaddrinfo uses hints from our config to determine what address
* families to return
*/
ai_start = xgetaddrinfo_port(host, port);
if (!ai_start) {
error_in_daemon("%s: Unable to resolve \"%s\"", __func__, host);
addr->ss_family = AF_UNSPEC;
return;
}
/*
* When host is null, assume we are trying to bind here.
* Make sure we return the v6 wildcard address first (when applicable)
* since we want v6 to be the default.
*/
if (host || !(slurm_conf.conf_flags & CONF_FLAG_IPV6_ENABLED)) {
ai_ptr = ai_start;
} else {
for (ai_ptr = ai_start; ai_ptr; ai_ptr = ai_ptr->ai_next) {
if (ai_ptr->ai_family == AF_INET6)
break;
}
/* fall back to whatever was available */
if (!ai_ptr)
ai_ptr = ai_start;
}
memcpy(addr, ai_ptr->ai_addr, ai_ptr->ai_addrlen);
log_flag(NET, "%s: update addr. addr='%pA'", __func__, addr);
freeaddrinfo(ai_start);
}
extern void slurm_pack_addr(slurm_addr_t *addr, buf_t *buffer)
{
pack16(addr->ss_family, buffer);
if (addr->ss_family == AF_INET6) {
struct sockaddr_in6 *in6 = (struct sockaddr_in6 *) addr;
packmem(in6->sin6_addr.s6_addr, 16, buffer);
pack16(in6->sin6_port, buffer);
} else if (addr->ss_family == AF_INET) {
struct sockaddr_in *in = (struct sockaddr_in *) addr;
pack32(in->sin_addr.s_addr, buffer);
pack16(in->sin_port, buffer);
}
}
extern int slurm_unpack_addr_no_alloc(slurm_addr_t *addr, buf_t *buffer)
{
uint16_t tmp_uint16 = 0;
/* ss_family is only uint8_t on BSD. */
safe_unpack16(&tmp_uint16, buffer);
addr->ss_family = tmp_uint16;
if (addr->ss_family == AF_INET6) {
uint32_t size;
char *buffer_addr;
struct sockaddr_in6 *in6 = (struct sockaddr_in6 *) addr;
safe_unpackmem_ptr(&buffer_addr, &size, buffer);
if (size != 16)
goto unpack_error;
memcpy(&in6->sin6_addr.s6_addr, buffer_addr, 16);
safe_unpack16(&in6->sin6_port, buffer);
} else if (addr->ss_family == AF_INET) {
struct sockaddr_in *in = (struct sockaddr_in *) addr;
safe_unpack32(&in->sin_addr.s_addr, buffer);
safe_unpack16(&in->sin_port, buffer);
} else {
memset(addr, 0, sizeof(*addr));
}
return SLURM_SUCCESS;
unpack_error:
return SLURM_ERROR;
}