blob: 5804fa84a35f62417ee0922670591a7628719948 [file] [log] [blame]
/*****************************************************************************\
** info.c - job/node info related functions
*****************************************************************************
* Copyright (C) 2011-2012 National University of Defense Technology.
* Written by Hongjia Cao <hjcao@nudt.edu.cn>.
* All rights reserved.
*
* This file is part of Slurm, a resource management program.
* For details, see <https://slurm.schedmd.com/>.
* Please also read the included file: DISCLAIMER.
*
* Slurm is free software; you can redistribute it and/or modify it under
* the terms of the GNU General Public License as published by the Free
* Software Foundation; either version 2 of the License, or (at your option)
* any later version.
*
* In addition, as a special exception, the copyright holders give permission
* to link the code of portions of this program with the OpenSSL library under
* certain conditions as described in each individual source file, and
* distribute linked combinations including the two. You must obey the GNU
* General Public License in all respects for all of the code used other than
* OpenSSL. If you modify file(s) with this exception, you may extend this
* exception to your version of the file(s), but you are not obligated to do
* so. If you do not wish to do so, delete this exception statement from your
* version. If you delete this exception statement from all source files in
* the program, then also delete it here.
*
* Slurm is distributed in the hope that it will be useful, but WITHOUT ANY
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
* details.
*
* You should have received a copy of the GNU General Public License along
* with Slurm; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
\*****************************************************************************/
#include "pmi.h"
#include "setup.h"
#include "client.h"
#if !defined(__FreeBSD__)
#include <net/if.h>
#endif
#include <arpa/inet.h>
#include <netinet/in.h>
#include <sys/types.h>
#include <sys/socket.h>
#include <ifaddrs.h>
#include <netdb.h>
#include <stdlib.h>
#include <unistd.h>
#include "slurm/slurm.h"
#include "src/srun/launch.h"
#include "src/common/strlcpy.h"
#include "src/interfaces/switch.h"
#include "src/common/slurm_protocol_api.h"
#include "src/common/xmalloc.h"
#define NODE_ATTR_SIZE_INC 8
/* pending node attribute get request */
typedef struct nag_req {
int fd;
int rank;
char key[PMI2_MAX_KEYLEN];
struct nag_req *next;
} nag_req_t;
static nag_req_t *nag_req_list = NULL;
/* node attributes */
static int na_cnt = 0;
static int na_size = 0;
static char **node_attr = NULL;
#define KEY_INDEX(i) (i * 2)
#define VAL_INDEX(i) (i * 2 + 1)
static char *ifconfig(void);
extern int
enqueue_nag_req(int fd, int rank, char *key)
{
nag_req_t *req;
req = xmalloc(sizeof(nag_req_t));
req->fd = fd;
req->rank = rank;
strlcpy(req->key, key, PMI2_MAX_KEYLEN);
/* insert in the head */
req->next = nag_req_list;
nag_req_list = req;
return SLURM_SUCCESS;
}
extern int
node_attr_put(char *key, char *val)
{
nag_req_t *req = NULL, **pprev = NULL;
client_resp_t *resp = NULL;
int rc = SLURM_SUCCESS;
debug3("mpi/pmi2: node_attr_put: %s=%s", key, val);
if (na_cnt * 2 >= na_size) {
na_size += NODE_ATTR_SIZE_INC;
xrealloc(node_attr, na_size * sizeof(char*));
}
node_attr[KEY_INDEX(na_cnt)] = xstrdup(key);
node_attr[VAL_INDEX(na_cnt)] = xstrdup(val);
na_cnt ++;
/* process pending requests */
pprev = &nag_req_list;
req = *pprev;
while (req != NULL) {
if (xstrncmp(key, req->key, PMI2_MAX_KEYLEN)) {
pprev = &req->next;
req = *pprev;
} else {
debug("mpi/pmi2: found pending request from rank %d",
req->rank);
/* send response msg */
if (! resp) {
resp = client_resp_new();
client_resp_append(resp,
CMD_KEY"="
GETNODEATTRRESP_CMD";"
RC_KEY"=0;"
FOUND_KEY"="TRUE_VAL";"
VALUE_KEY"=%s;", val);
}
rc = client_resp_send(resp, req->fd);
if (rc != SLURM_SUCCESS) {
error("mpi/pmi2: failed to send '"
GETNODEATTRRESP_CMD "' to task %d",
req->rank);
}
/* remove the request */
*pprev = req->next;
xfree(req);
req = *pprev;
}
}
if (resp) {
client_resp_free (resp);
}
debug3("mpi/pmi2: out node_attr_put");
return SLURM_SUCCESS;
}
/* returned value not dup-ed */
extern char *
node_attr_get(char *key)
{
int i;
char *val = NULL;
debug3("mpi/pmi2: node_attr_get: key=%s", key);
for (i = 0; i < na_cnt; i ++) {
if (! xstrcmp(key, node_attr[KEY_INDEX(i)])) {
val = node_attr[VAL_INDEX(i)];
break;
}
}
debug3("mpi/pmi2: out node_attr_get: val=%s", val);
return val;
}
/* job_attr_get_netinfo()
*/
static char *
job_attr_get_netinfo(char *key, char *attr)
{
char *netinfo;
/* get network information of node in netinfo, xmalloc'ed
*/
netinfo = ifconfig();
snprintf(attr, PMI2_MAX_VALLEN, "%s", netinfo);
xfree(netinfo);
debug3("%s: netinfo %s", __func__, attr);
return attr;
}
/* job_attr_get()
*/
extern char *
job_attr_get(char *key)
{
static char attr[PMI2_MAX_VALLEN];
if (!xstrcmp(key, JOB_ATTR_PROC_MAP)) {
return job_info.proc_mapping;
}
if (!xstrcmp(key, JOB_ATTR_UNIV_SIZE)) {
snprintf(attr, PMI2_MAX_VALLEN, "%d", job_info.ntasks);
return attr;
}
if (!xstrcmp(key, JOB_ATTR_RESV_PORTS)) {
if (! job_info.resv_ports)
return NULL;
debug3("%s: SLURM_STEP_RESV_PORTS %s", __func__, job_info.resv_ports);
snprintf(attr, PMI2_MAX_VALLEN, "%s", job_info.resv_ports);
return attr;
}
if (xstrcmp(key, JOB_ATTR_NETINFO) >= 0) {
if (job_attr_get_netinfo(key, attr) == NULL) {
return NULL;
}
return attr;
}
return NULL;
}
/* ifconfig()
*
* Return information about network interfaces.
*/
static char *
ifconfig(void)
{
struct ifaddrs *ifaddr;
struct ifaddrs *ifa;
int s;
int n;
char addr[NI_MAXHOST];
char hostname[HOST_NAME_MAX];
char *buf;
if (getifaddrs(&ifaddr) == -1) {
error("%s: getifaddrs failed %m", __func__);
return NULL;
}
n = 0;
for (ifa = ifaddr; ifa != NULL; ifa = ifa->ifa_next)
++n;
/* this should be a good guess of the size we need.
*/
buf = xmalloc((HOST_NAME_MAX + n) * 64);
gethostname(hostname, sizeof(hostname));
n = sprintf(buf, "(%s", hostname);
/* Walk through linked list, maintaining head pointer so we
* can free list later
*/
for (ifa = ifaddr; ifa != NULL; ifa = ifa->ifa_next) {
if (ifa->ifa_addr == NULL)
continue;
#if !defined(__FreeBSD__)
if (ifa->ifa_flags & IFF_LOOPBACK)
continue;
#endif
if (ifa->ifa_addr->sa_family != AF_INET
&& ifa->ifa_addr->sa_family != AF_INET6)
continue;
if (ifa->ifa_addr->sa_family == AF_INET) {
s = getnameinfo(ifa->ifa_addr,
sizeof(struct sockaddr_in),
addr, NI_MAXHOST, NULL, 0, NI_NUMERICHOST);
if (s != 0) {
error("%s: AF_INET getnameinfo() failed: %s",
__func__, gai_strerror(s));
continue;
}
n = n + sprintf(buf + n, ",(%s,%s,%s)",
ifa->ifa_name, "IP_V4", addr);
continue;
}
if (ifa->ifa_addr->sa_family == AF_INET6) {
s = getnameinfo(ifa->ifa_addr,
sizeof(struct sockaddr_in6),
addr, NI_MAXHOST, NULL, 0, NI_NUMERICHOST);
if (s != 0) {
error("%s: AF_INET6 getnameinfo() failed: %s",
__func__, gai_strerror(s));
continue;
}
n = n + sprintf(buf + n, ",(%s,%s,%s)",
ifa->ifa_name, "IP_V6", addr);
}
}
sprintf(buf + n, ")");
debug("%s: ifconfig %s", __func__, buf);
freeifaddrs(ifaddr);
return buf;
}