| /* |
| * Copyright (c) 2005-2014 Intel Corporation. All rights reserved. |
| * |
| * This software is available to you under a choice of one of two |
| * licenses. You may choose to be licensed under the terms of the GNU |
| * General Public License (GPL) Version 2, available from the file |
| * COPYING in the main directory of this source tree, or the |
| * OpenIB.org BSD license below: |
| * |
| * Redistribution and use in source and binary forms, with or |
| * without modification, are permitted provided that the following |
| * conditions are met: |
| * |
| * - Redistributions of source code must retain the above |
| * copyright notice, this list of conditions and the following |
| * disclaimer. |
| * |
| * - Redistributions in binary form must reproduce the above |
| * copyright notice, this list of conditions and the following |
| * disclaimer in the documentation and/or other materials |
| * provided with the distribution. |
| * |
| * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, |
| * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
| * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND |
| * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS |
| * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN |
| * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN |
| * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
| * SOFTWARE. |
| */ |
| |
| #include <config.h> |
| |
| #include <stdlib.h> |
| #include <string.h> |
| #include <glob.h> |
| #include <stdio.h> |
| #include <fcntl.h> |
| #include <errno.h> |
| #include <stdint.h> |
| #include <poll.h> |
| #include <unistd.h> |
| #include <pthread.h> |
| #include <endian.h> |
| #include <stddef.h> |
| #include <netdb.h> |
| #include <syslog.h> |
| #include <limits.h> |
| #include <sys/sysmacros.h> |
| |
| #include "cma.h" |
| #include "indexer.h" |
| #include <infiniband/driver.h> |
| #include <infiniband/marshall.h> |
| #include <rdma/rdma_cma.h> |
| #include <rdma/rdma_cma_abi.h> |
| #include <rdma/rdma_verbs.h> |
| #include <infiniband/ib.h> |
| #include <util/util.h> |
| #include <util/rdma_nl.h> |
| #include <ccan/list.h> |
| |
| #define CMA_INIT_CMD(req, req_size, op) \ |
| do { \ |
| memset(req, 0, req_size); \ |
| (req)->cmd = UCMA_CMD_##op; \ |
| (req)->in = req_size - sizeof(struct ucma_abi_cmd_hdr); \ |
| } while (0) |
| |
| #define CMA_INIT_CMD_RESP(req, req_size, op, resp, resp_size) \ |
| do { \ |
| CMA_INIT_CMD(req, req_size, op); \ |
| (req)->out = resp_size; \ |
| (req)->response = (uintptr_t) (resp); \ |
| } while (0) |
| |
| #define UCMA_INVALID_IB_INDEX -1 |
| |
| struct cma_port { |
| uint8_t link_layer; |
| }; |
| |
| struct cma_device { |
| struct ibv_device *dev; |
| struct list_node entry; |
| struct ibv_context *verbs; |
| struct ibv_pd *pd; |
| struct ibv_xrcd *xrcd; |
| struct cma_port *port; |
| __be64 guid; |
| int port_cnt; |
| int refcnt; |
| int max_qpsize; |
| uint8_t max_initiator_depth; |
| uint8_t max_responder_resources; |
| int ibv_idx; |
| uint8_t is_device_dead : 1; |
| }; |
| |
| struct cma_id_private { |
| struct rdma_cm_id id; |
| struct cma_device *cma_dev; |
| void *connect; |
| size_t connect_len; |
| int events_completed; |
| int connect_error; |
| int sync; |
| pthread_cond_t cond; |
| pthread_mutex_t mut; |
| uint32_t handle; |
| struct cma_multicast *mc_list; |
| struct ibv_qp_init_attr *qp_init_attr; |
| uint8_t initiator_depth; |
| uint8_t responder_resources; |
| struct ibv_ece local_ece; |
| struct ibv_ece remote_ece; |
| }; |
| |
| struct cma_multicast { |
| struct cma_multicast *next; |
| struct cma_id_private *id_priv; |
| void *context; |
| int events_completed; |
| pthread_cond_t cond; |
| uint32_t handle; |
| union ibv_gid mgid; |
| uint16_t mlid; |
| uint16_t join_flags; |
| struct sockaddr_storage addr; |
| }; |
| |
| struct cma_event { |
| struct rdma_cm_event event; |
| uint8_t private_data[RDMA_MAX_PRIVATE_DATA]; |
| struct cma_id_private *id_priv; |
| struct cma_multicast *mc; |
| }; |
| |
| static LIST_HEAD(cma_dev_list); |
| /* sorted based or index or guid, depends on kernel support */ |
| static struct ibv_device **dev_list; |
| static pthread_mutex_t mut = PTHREAD_MUTEX_INITIALIZER; |
| static int abi_ver = -1; |
| static char dev_name[64] = "rdma_cm"; |
| static dev_t dev_cdev; |
| int af_ib_support; |
| static struct index_map ucma_idm; |
| static fastlock_t idm_lock; |
| |
| static int check_abi_version_nl_cb(struct nl_msg *msg, void *data) |
| { |
| struct nlattr *tb[RDMA_NLDEV_ATTR_MAX]; |
| uint64_t cdev64; |
| int ret; |
| |
| ret = nlmsg_parse(nlmsg_hdr(msg), 0, tb, RDMA_NLDEV_ATTR_MAX - 1, |
| rdmanl_policy); |
| if (ret < 0) |
| return ret; |
| if (!tb[RDMA_NLDEV_ATTR_CHARDEV] || !tb[RDMA_NLDEV_ATTR_CHARDEV_ABI] || |
| !tb[RDMA_NLDEV_ATTR_CHARDEV_NAME]) |
| return NLE_PARSE_ERR; |
| |
| /* Convert from huge_encode_dev to whatever glibc uses */ |
| cdev64 = nla_get_u64(tb[RDMA_NLDEV_ATTR_CHARDEV]); |
| dev_cdev = makedev((cdev64 & 0xfff00) >> 8, |
| (cdev64 & 0xff) | ((cdev64 >> 12) & 0xfff00)); |
| |
| if (!check_snprintf(dev_name, sizeof(dev_name), "%s", |
| nla_get_string(tb[RDMA_NLDEV_ATTR_CHARDEV_NAME]))) |
| return NLE_PARSE_ERR; |
| |
| /* |
| * The top 32 bits of CHARDEV_ABI are reserved for a future use, |
| * current kernels set them to 0 |
| */ |
| abi_ver = (uint32_t)nla_get_u64(tb[RDMA_NLDEV_ATTR_CHARDEV_ABI]); |
| |
| return 0; |
| } |
| |
| /* Ask the kernel for the uverbs char device information */ |
| static int check_abi_version_nl(void) |
| { |
| struct nl_sock *nl; |
| |
| nl = rdmanl_socket_alloc(); |
| if (!nl) |
| return -1; |
| if (rdmanl_get_chardev(nl, -1, "rdma_cm", check_abi_version_nl_cb, |
| NULL)) |
| goto err_socket; |
| if (abi_ver == -1) |
| goto err_socket; |
| nl_socket_free(nl); |
| return 0; |
| |
| err_socket: |
| nl_socket_free(nl); |
| return -1; |
| } |
| |
| static void check_abi_version_sysfs(void) |
| { |
| char value[8]; |
| |
| if ((ibv_read_sysfs_file(ibv_get_sysfs_path(), |
| "class/misc/rdma_cm/abi_version", |
| value, sizeof value) < 0) && |
| (ibv_read_sysfs_file(ibv_get_sysfs_path(), |
| "class/infiniband_ucma/abi_version", |
| value, sizeof value) < 0)) { |
| /* |
| * Older version of Linux do not have class/misc. To support |
| * backports, assume the most recent version of the ABI. If |
| * we're wrong, we'll simply fail later when calling the ABI. |
| */ |
| abi_ver = RDMA_USER_CM_MAX_ABI_VERSION; |
| return; |
| } |
| abi_ver = strtol(value, NULL, 10); |
| dev_cdev = 0; |
| } |
| |
| static int check_abi_version(void) |
| { |
| if (abi_ver == -1) { |
| if (check_abi_version_nl()) |
| check_abi_version_sysfs(); |
| } |
| |
| if (abi_ver < RDMA_USER_CM_MIN_ABI_VERSION || |
| abi_ver > RDMA_USER_CM_MAX_ABI_VERSION) |
| return -1; |
| return 0; |
| } |
| |
| /* |
| * This function is called holding the mutex lock |
| * cma_dev_list must be not empty before calling this function to |
| * ensure that the lock is not acquired recursively. |
| */ |
| static void ucma_set_af_ib_support(void) |
| { |
| struct rdma_cm_id *id; |
| struct sockaddr_ib sib; |
| int ret; |
| |
| ret = rdma_create_id(NULL, &id, NULL, RDMA_PS_IB); |
| if (ret) |
| return; |
| |
| memset(&sib, 0, sizeof sib); |
| sib.sib_family = AF_IB; |
| sib.sib_sid = htobe64(RDMA_IB_IP_PS_TCP); |
| sib.sib_sid_mask = htobe64(RDMA_IB_IP_PS_MASK); |
| af_ib_support = 1; |
| ret = rdma_bind_addr(id, (struct sockaddr *) &sib); |
| af_ib_support = !ret; |
| |
| rdma_destroy_id(id); |
| } |
| |
| static struct cma_device *insert_cma_dev(struct ibv_device *dev) |
| { |
| struct cma_device *cma_dev, *p; |
| |
| cma_dev = calloc(1, sizeof(struct cma_device)); |
| if (!cma_dev) |
| return NULL; |
| |
| cma_dev->guid = ibv_get_device_guid(dev); |
| cma_dev->ibv_idx = ibv_get_device_index(dev); |
| cma_dev->dev = dev; |
| |
| /* reverse iteration, optimized to ibv_idx which is growing */ |
| list_for_each_rev(&cma_dev_list, p, entry) { |
| if (cma_dev->ibv_idx == UCMA_INVALID_IB_INDEX) { |
| /* index not available, sort by guid */ |
| if (be64toh(p->guid) < be64toh(cma_dev->guid)) |
| break; |
| } else { |
| if (p->ibv_idx < cma_dev->ibv_idx) |
| break; |
| } |
| } |
| list_add_after(&cma_dev_list, &p->entry, &cma_dev->entry); |
| |
| return cma_dev; |
| } |
| |
| static void remove_cma_dev(struct cma_device *cma_dev) |
| { |
| if (cma_dev->refcnt) { |
| /* we were asked to be deleted by sync_devices_list() */ |
| cma_dev->is_device_dead = true; |
| return; |
| } |
| |
| if (cma_dev->xrcd) |
| ibv_close_xrcd(cma_dev->xrcd); |
| if (cma_dev->pd) |
| ibv_dealloc_pd(cma_dev->pd); |
| if (cma_dev->verbs) |
| ibv_close_device(cma_dev->verbs); |
| free(cma_dev->port); |
| list_del_from(&cma_dev_list, &cma_dev->entry); |
| free(cma_dev); |
| } |
| |
| static int dev_cmp(const void *a, const void *b) |
| { |
| return (*(uintptr_t *)a > *(uintptr_t *)b) - (*(uintptr_t *)a < *(uintptr_t *)b); |
| } |
| |
| static int sync_devices_list(void) |
| { |
| struct ibv_device **new_list; |
| int i, j, numb_dev; |
| |
| new_list = ibv_get_device_list(&numb_dev); |
| if (!new_list) |
| return ERR(ENODEV); |
| |
| if (!numb_dev) { |
| ibv_free_device_list(new_list); |
| return ERR(ENODEV); |
| } |
| |
| qsort(new_list, numb_dev, sizeof(struct ibv_device *), dev_cmp); |
| if (unlikely(!dev_list)) { |
| /* first sync */ |
| for (j = 0; new_list[j]; j++) |
| insert_cma_dev(new_list[j]); |
| goto out; |
| } |
| |
| for (i = 0, j = 0; dev_list[i] || new_list[j];) { |
| if (dev_list[i] == new_list[j]) { |
| i++; |
| j++; |
| continue; |
| } |
| /* |
| * The device list is sorted by pointer address, |
| * so we need to compare the new list with old one. |
| * |
| * 1. If the device exists in new list, but doesn't exist in |
| * old list, we will add that device to the list. |
| * 2. If the device exists in old list, but doesn't exist in |
| * new list, we should delete it. |
| */ |
| if ((dev_list[i] > new_list[j] && new_list[j]) || |
| (!dev_list[i] && new_list[j])) { |
| insert_cma_dev(new_list[j++]); |
| continue; |
| } |
| if ((dev_list[i] < new_list[j] && dev_list[i]) || |
| (!new_list[j] && dev_list[i])) { |
| /* |
| * We will try our best to remove the entry, |
| * but if some process holds it, we will remove it |
| * later, when rdma-cm will put this resource back. |
| */ |
| struct cma_device *c, *t; |
| |
| list_for_each_safe(&cma_dev_list, c, t, entry) { |
| if (c->dev == dev_list[i]) |
| remove_cma_dev(c); |
| } |
| i++; |
| } |
| } |
| |
| ibv_free_device_list(dev_list); |
| out: |
| dev_list = new_list; |
| return 0; |
| } |
| |
| int ucma_init(void) |
| { |
| int ret; |
| |
| /* |
| * ucma_set_af_ib_support() below recursively calls to this function |
| * again under the &mut lock, so do this fast check and return |
| * immediately. |
| */ |
| if (!list_empty(&cma_dev_list)) |
| return 0; |
| |
| pthread_mutex_lock(&mut); |
| if (!list_empty(&cma_dev_list)) { |
| pthread_mutex_unlock(&mut); |
| return 0; |
| } |
| |
| fastlock_init(&idm_lock); |
| ret = check_abi_version(); |
| if (ret) { |
| ret = ERR(EPERM); |
| goto err1; |
| } |
| |
| ret = sync_devices_list(); |
| if (ret) |
| goto err1; |
| |
| ucma_set_af_ib_support(); |
| pthread_mutex_unlock(&mut); |
| return 0; |
| |
| err1: |
| fastlock_destroy(&idm_lock); |
| pthread_mutex_unlock(&mut); |
| return ret; |
| } |
| |
| static bool match(struct cma_device *cma_dev, __be64 guid, uint32_t idx) |
| { |
| if ((idx == UCMA_INVALID_IB_INDEX) || |
| (cma_dev->ibv_idx == UCMA_INVALID_IB_INDEX)) |
| return cma_dev->guid == guid; |
| |
| return cma_dev->ibv_idx == idx && cma_dev->guid == guid; |
| } |
| |
| static int ucma_init_device(struct cma_device *cma_dev) |
| { |
| struct ibv_port_attr port_attr; |
| struct ibv_device_attr attr; |
| int i, ret; |
| |
| if (cma_dev->verbs) |
| return 0; |
| |
| cma_dev->verbs = ibv_open_device(cma_dev->dev); |
| if (!cma_dev->verbs) |
| return ERR(ENODEV); |
| |
| ret = ibv_query_device(cma_dev->verbs, &attr); |
| if (ret) { |
| ret = ERR(ret); |
| goto err; |
| } |
| |
| cma_dev->port = malloc(sizeof(*cma_dev->port) * attr.phys_port_cnt); |
| if (!cma_dev->port) { |
| ret = ERR(ENOMEM); |
| goto err; |
| } |
| |
| for (i = 1; i <= attr.phys_port_cnt; i++) { |
| if (ibv_query_port(cma_dev->verbs, i, &port_attr)) |
| cma_dev->port[i - 1].link_layer = IBV_LINK_LAYER_UNSPECIFIED; |
| else |
| cma_dev->port[i - 1].link_layer = port_attr.link_layer; |
| } |
| |
| cma_dev->port_cnt = attr.phys_port_cnt; |
| cma_dev->max_qpsize = attr.max_qp_wr; |
| cma_dev->max_initiator_depth = (uint8_t) attr.max_qp_init_rd_atom; |
| cma_dev->max_responder_resources = (uint8_t) attr.max_qp_rd_atom; |
| return 0; |
| |
| err: |
| ibv_close_device(cma_dev->verbs); |
| cma_dev->verbs = NULL; |
| return ret; |
| } |
| |
| static int ucma_init_all(void) |
| { |
| struct cma_device *dev; |
| int ret = 0; |
| |
| ret = ucma_init(); |
| if (ret) |
| return ret; |
| |
| pthread_mutex_lock(&mut); |
| list_for_each(&cma_dev_list, dev, entry) { |
| if (dev->is_device_dead) |
| continue; |
| |
| if (ucma_init_device(dev)) { |
| /* Couldn't initialize the device: mark it dead and continue */ |
| dev->is_device_dead = true; |
| } |
| } |
| pthread_mutex_unlock(&mut); |
| return 0; |
| } |
| |
| struct ibv_context **rdma_get_devices(int *num_devices) |
| { |
| struct ibv_context **devs = NULL; |
| struct cma_device *dev; |
| int cma_dev_cnt = 0; |
| int i = 0; |
| |
| if (ucma_init()) |
| goto err_init; |
| |
| pthread_mutex_lock(&mut); |
| if (sync_devices_list()) |
| goto out; |
| |
| list_for_each(&cma_dev_list, dev, entry) { |
| if (dev->is_device_dead) |
| continue; |
| |
| /* reinit newly added devices */ |
| if (ucma_init_device(dev)) { |
| /* Couldn't initialize the device: mark it dead and continue */ |
| dev->is_device_dead = true; |
| continue; |
| } |
| cma_dev_cnt++; |
| } |
| |
| devs = malloc(sizeof(*devs) * (cma_dev_cnt + 1)); |
| if (!devs) |
| goto out; |
| |
| list_for_each(&cma_dev_list, dev, entry) { |
| if (dev->is_device_dead) |
| continue; |
| devs[i++] = dev->verbs; |
| dev->refcnt++; |
| } |
| devs[i] = NULL; |
| out: |
| pthread_mutex_unlock(&mut); |
| err_init: |
| if (num_devices) |
| *num_devices = devs ? cma_dev_cnt : 0; |
| return devs; |
| } |
| |
| void rdma_free_devices(struct ibv_context **list) |
| { |
| struct cma_device *c, *tmp; |
| int i; |
| |
| pthread_mutex_lock(&mut); |
| list_for_each_safe(&cma_dev_list, c, tmp, entry) { |
| for (i = 0; list[i]; i++) { |
| if (list[i] != c->verbs) |
| /* |
| * Skip devices that were added after |
| * user received the list. |
| */ |
| continue; |
| c->refcnt--; |
| if (c->is_device_dead) |
| /* try to remove */ |
| remove_cma_dev(c); |
| } |
| } |
| pthread_mutex_unlock(&mut); |
| free(list); |
| } |
| |
| struct rdma_event_channel *rdma_create_event_channel(void) |
| { |
| struct rdma_event_channel *channel; |
| |
| if (ucma_init()) |
| return NULL; |
| |
| channel = malloc(sizeof(*channel)); |
| if (!channel) |
| return NULL; |
| |
| channel->fd = open_cdev(dev_name, dev_cdev); |
| if (channel->fd < 0) { |
| goto err; |
| } |
| return channel; |
| err: |
| free(channel); |
| return NULL; |
| } |
| |
| void rdma_destroy_event_channel(struct rdma_event_channel *channel) |
| { |
| close(channel->fd); |
| free(channel); |
| } |
| |
| static struct cma_device *ucma_get_cma_device(__be64 guid, uint32_t idx) |
| { |
| struct cma_device *cma_dev; |
| |
| list_for_each(&cma_dev_list, cma_dev, entry) |
| if (!cma_dev->is_device_dead && match(cma_dev, guid, idx)) |
| goto match; |
| |
| if (sync_devices_list()) |
| return NULL; |
| /* |
| * Kernel informed us that we have new device and it must |
| * be in global dev_list[], let's find the right one. |
| */ |
| list_for_each(&cma_dev_list, cma_dev, entry) |
| if (!cma_dev->is_device_dead && match(cma_dev, guid, idx)) |
| goto match; |
| cma_dev = NULL; |
| match: |
| if (cma_dev) |
| cma_dev->refcnt++; |
| return cma_dev; |
| } |
| |
| static int ucma_get_device(struct cma_id_private *id_priv, __be64 guid, |
| uint32_t idx) |
| { |
| struct cma_device *cma_dev; |
| int ret; |
| |
| pthread_mutex_lock(&mut); |
| cma_dev = ucma_get_cma_device(guid, idx); |
| if (!cma_dev) { |
| pthread_mutex_unlock(&mut); |
| return ERR(ENODEV); |
| } |
| |
| ret = ucma_init_device(cma_dev); |
| if (ret) |
| goto out; |
| |
| if (!cma_dev->pd) |
| cma_dev->pd = ibv_alloc_pd(cma_dev->verbs); |
| if (!cma_dev->pd) { |
| ret = -1; |
| goto out; |
| } |
| |
| id_priv->cma_dev = cma_dev; |
| id_priv->id.verbs = cma_dev->verbs; |
| id_priv->id.pd = cma_dev->pd; |
| out: |
| if (ret) |
| cma_dev->refcnt--; |
| pthread_mutex_unlock(&mut); |
| return ret; |
| } |
| |
| static void ucma_put_device(struct cma_device *cma_dev) |
| { |
| pthread_mutex_lock(&mut); |
| if (!--cma_dev->refcnt) { |
| ibv_dealloc_pd(cma_dev->pd); |
| if (cma_dev->xrcd) |
| ibv_close_xrcd(cma_dev->xrcd); |
| cma_dev->pd = NULL; |
| cma_dev->xrcd = NULL; |
| if (cma_dev->is_device_dead) |
| remove_cma_dev(cma_dev); |
| } |
| pthread_mutex_unlock(&mut); |
| } |
| |
| static struct ibv_xrcd *ucma_get_xrcd(struct cma_device *cma_dev) |
| { |
| struct ibv_xrcd_init_attr attr; |
| |
| pthread_mutex_lock(&mut); |
| if (!cma_dev->xrcd) { |
| memset(&attr, 0, sizeof attr); |
| attr.comp_mask = IBV_XRCD_INIT_ATTR_FD | IBV_XRCD_INIT_ATTR_OFLAGS; |
| attr.fd = -1; |
| attr.oflags = O_CREAT; |
| cma_dev->xrcd = ibv_open_xrcd(cma_dev->verbs, &attr); |
| } |
| pthread_mutex_unlock(&mut); |
| return cma_dev->xrcd; |
| } |
| |
| static void ucma_insert_id(struct cma_id_private *id_priv) |
| { |
| fastlock_acquire(&idm_lock); |
| idm_set(&ucma_idm, id_priv->handle, id_priv); |
| fastlock_release(&idm_lock); |
| } |
| |
| static void ucma_remove_id(struct cma_id_private *id_priv) |
| { |
| if (id_priv->handle <= IDX_MAX_INDEX) |
| idm_clear(&ucma_idm, id_priv->handle); |
| } |
| |
| static struct cma_id_private *ucma_lookup_id(int handle) |
| { |
| return idm_lookup(&ucma_idm, handle); |
| } |
| |
| static void ucma_free_id(struct cma_id_private *id_priv) |
| { |
| ucma_remove_id(id_priv); |
| if (id_priv->cma_dev) |
| ucma_put_device(id_priv->cma_dev); |
| pthread_cond_destroy(&id_priv->cond); |
| pthread_mutex_destroy(&id_priv->mut); |
| if (id_priv->id.route.path_rec) |
| free(id_priv->id.route.path_rec); |
| |
| if (id_priv->sync) |
| rdma_destroy_event_channel(id_priv->id.channel); |
| if (id_priv->connect_len) |
| free(id_priv->connect); |
| free(id_priv); |
| } |
| |
| static struct cma_id_private *ucma_alloc_id(struct rdma_event_channel *channel, |
| void *context, |
| enum rdma_port_space ps, |
| enum ibv_qp_type qp_type) |
| { |
| struct cma_id_private *id_priv; |
| |
| id_priv = calloc(1, sizeof(*id_priv)); |
| if (!id_priv) |
| return NULL; |
| |
| id_priv->id.context = context; |
| id_priv->id.ps = ps; |
| id_priv->id.qp_type = qp_type; |
| id_priv->handle = 0xFFFFFFFF; |
| |
| if (!channel) { |
| id_priv->id.channel = rdma_create_event_channel(); |
| if (!id_priv->id.channel) |
| goto err; |
| id_priv->sync = 1; |
| } else { |
| id_priv->id.channel = channel; |
| } |
| |
| pthread_mutex_init(&id_priv->mut, NULL); |
| if (pthread_cond_init(&id_priv->cond, NULL)) |
| goto err; |
| |
| return id_priv; |
| |
| err: ucma_free_id(id_priv); |
| return NULL; |
| } |
| |
| static int rdma_create_id2(struct rdma_event_channel *channel, |
| struct rdma_cm_id **id, void *context, |
| enum rdma_port_space ps, enum ibv_qp_type qp_type) |
| { |
| struct ucma_abi_create_id_resp resp; |
| struct ucma_abi_create_id cmd; |
| struct cma_id_private *id_priv; |
| int ret; |
| |
| ret = ucma_init(); |
| if (ret) |
| return ret; |
| |
| id_priv = ucma_alloc_id(channel, context, ps, qp_type); |
| if (!id_priv) |
| return ERR(ENOMEM); |
| |
| CMA_INIT_CMD_RESP(&cmd, sizeof cmd, CREATE_ID, &resp, sizeof resp); |
| cmd.uid = (uintptr_t) id_priv; |
| cmd.ps = ps; |
| cmd.qp_type = qp_type; |
| |
| ret = write(id_priv->id.channel->fd, &cmd, sizeof cmd); |
| if (ret != sizeof(cmd)) { |
| ret = (ret >= 0) ? ERR(ENODATA) : -1; |
| goto err; |
| } |
| |
| VALGRIND_MAKE_MEM_DEFINED(&resp, sizeof resp); |
| |
| id_priv->handle = resp.id; |
| ucma_insert_id(id_priv); |
| *id = &id_priv->id; |
| return 0; |
| |
| err: ucma_free_id(id_priv); |
| return ret; |
| } |
| |
| int rdma_create_id(struct rdma_event_channel *channel, |
| struct rdma_cm_id **id, void *context, |
| enum rdma_port_space ps) |
| { |
| enum ibv_qp_type qp_type; |
| |
| qp_type = (ps == RDMA_PS_IPOIB || ps == RDMA_PS_UDP) ? |
| IBV_QPT_UD : IBV_QPT_RC; |
| return rdma_create_id2(channel, id, context, ps, qp_type); |
| } |
| |
| static int ucma_destroy_kern_id(int fd, uint32_t handle) |
| { |
| struct ucma_abi_destroy_id_resp resp; |
| struct ucma_abi_destroy_id cmd; |
| int ret; |
| |
| CMA_INIT_CMD_RESP(&cmd, sizeof cmd, DESTROY_ID, &resp, sizeof resp); |
| cmd.id = handle; |
| |
| ret = write(fd, &cmd, sizeof cmd); |
| if (ret != sizeof cmd) |
| return (ret >= 0) ? ERR(ENODATA) : -1; |
| |
| VALGRIND_MAKE_MEM_DEFINED(&resp, sizeof resp); |
| |
| return resp.events_reported; |
| } |
| |
| int rdma_destroy_id(struct rdma_cm_id *id) |
| { |
| struct cma_id_private *id_priv; |
| int ret; |
| |
| id_priv = container_of(id, struct cma_id_private, id); |
| ret = ucma_destroy_kern_id(id->channel->fd, id_priv->handle); |
| if (ret < 0) |
| return ret; |
| |
| if (id_priv->id.event) |
| rdma_ack_cm_event(id_priv->id.event); |
| |
| pthread_mutex_lock(&id_priv->mut); |
| while (id_priv->events_completed < ret) |
| pthread_cond_wait(&id_priv->cond, &id_priv->mut); |
| pthread_mutex_unlock(&id_priv->mut); |
| |
| ucma_free_id(id_priv); |
| return 0; |
| } |
| |
| int ucma_addrlen(struct sockaddr *addr) |
| { |
| if (!addr) |
| return 0; |
| |
| switch (addr->sa_family) { |
| case PF_INET: |
| return sizeof(struct sockaddr_in); |
| case PF_INET6: |
| return sizeof(struct sockaddr_in6); |
| case PF_IB: |
| return af_ib_support ? sizeof(struct sockaddr_ib) : 0; |
| default: |
| return 0; |
| } |
| } |
| |
| static int ucma_query_addr(struct rdma_cm_id *id) |
| { |
| struct ucma_abi_query_addr_resp resp; |
| struct ucma_abi_query cmd; |
| struct cma_id_private *id_priv; |
| int ret; |
| |
| CMA_INIT_CMD_RESP(&cmd, sizeof cmd, QUERY, &resp, sizeof resp); |
| id_priv = container_of(id, struct cma_id_private, id); |
| cmd.id = id_priv->handle; |
| cmd.option = UCMA_QUERY_ADDR; |
| |
| /* |
| * If kernel doesn't support ibdev_index, this field will |
| * be left as is by the kernel. |
| */ |
| resp.ibdev_index = UCMA_INVALID_IB_INDEX; |
| |
| ret = write(id->channel->fd, &cmd, sizeof cmd); |
| if (ret != sizeof cmd) |
| return (ret >= 0) ? ERR(ENODATA) : -1; |
| |
| VALGRIND_MAKE_MEM_DEFINED(&resp, sizeof resp); |
| |
| memcpy(&id->route.addr.src_addr, &resp.src_addr, resp.src_size); |
| memcpy(&id->route.addr.dst_addr, &resp.dst_addr, resp.dst_size); |
| |
| if (!id_priv->cma_dev && resp.node_guid) { |
| ret = ucma_get_device(id_priv, resp.node_guid, |
| resp.ibdev_index); |
| if (ret) |
| return ret; |
| id->port_num = resp.port_num; |
| id->route.addr.addr.ibaddr.pkey = resp.pkey; |
| } |
| |
| return 0; |
| } |
| |
| static int ucma_query_gid(struct rdma_cm_id *id) |
| { |
| struct ucma_abi_query_addr_resp resp; |
| struct ucma_abi_query cmd; |
| struct cma_id_private *id_priv; |
| struct sockaddr_ib *sib; |
| int ret; |
| |
| CMA_INIT_CMD_RESP(&cmd, sizeof cmd, QUERY, &resp, sizeof resp); |
| id_priv = container_of(id, struct cma_id_private, id); |
| cmd.id = id_priv->handle; |
| cmd.option = UCMA_QUERY_GID; |
| |
| ret = write(id->channel->fd, &cmd, sizeof cmd); |
| if (ret != sizeof cmd) |
| return (ret >= 0) ? ERR(ENODATA) : -1; |
| |
| VALGRIND_MAKE_MEM_DEFINED(&resp, sizeof resp); |
| |
| sib = (struct sockaddr_ib *) &resp.src_addr; |
| memcpy(id->route.addr.addr.ibaddr.sgid.raw, sib->sib_addr.sib_raw, |
| sizeof id->route.addr.addr.ibaddr.sgid); |
| |
| sib = (struct sockaddr_ib *) &resp.dst_addr; |
| memcpy(id->route.addr.addr.ibaddr.dgid.raw, sib->sib_addr.sib_raw, |
| sizeof id->route.addr.addr.ibaddr.dgid); |
| |
| return 0; |
| } |
| |
| static void ucma_convert_path(struct ibv_path_data *path_data, |
| struct ibv_sa_path_rec *sa_path) |
| { |
| uint32_t fl_hop; |
| |
| sa_path->dgid = path_data->path.dgid; |
| sa_path->sgid = path_data->path.sgid; |
| sa_path->dlid = path_data->path.dlid; |
| sa_path->slid = path_data->path.slid; |
| sa_path->raw_traffic = 0; |
| |
| fl_hop = be32toh(path_data->path.flowlabel_hoplimit); |
| sa_path->flow_label = htobe32(fl_hop >> 8); |
| sa_path->hop_limit = (uint8_t) fl_hop; |
| |
| sa_path->traffic_class = path_data->path.tclass; |
| sa_path->reversible = path_data->path.reversible_numpath >> 7; |
| sa_path->numb_path = 1; |
| sa_path->pkey = path_data->path.pkey; |
| sa_path->sl = be16toh(path_data->path.qosclass_sl) & 0xF; |
| sa_path->mtu_selector = 2; /* exactly */ |
| sa_path->mtu = path_data->path.mtu & 0x1F; |
| sa_path->rate_selector = 2; |
| sa_path->rate = path_data->path.rate & 0x1F; |
| sa_path->packet_life_time_selector = 2; |
| sa_path->packet_life_time = path_data->path.packetlifetime & 0x1F; |
| |
| sa_path->preference = (uint8_t) path_data->flags; |
| } |
| |
| static int ucma_query_path(struct rdma_cm_id *id) |
| { |
| struct ucma_abi_query_path_resp *resp; |
| struct ucma_abi_query cmd; |
| struct cma_id_private *id_priv; |
| int ret, i, size; |
| |
| size = sizeof(*resp) + sizeof(struct ibv_path_data) * 6; |
| resp = alloca(size); |
| CMA_INIT_CMD_RESP(&cmd, sizeof cmd, QUERY, resp, size); |
| id_priv = container_of(id, struct cma_id_private, id); |
| cmd.id = id_priv->handle; |
| cmd.option = UCMA_QUERY_PATH; |
| |
| ret = write(id->channel->fd, &cmd, sizeof cmd); |
| if (ret != sizeof cmd) |
| return (ret >= 0) ? ERR(ENODATA) : -1; |
| |
| VALGRIND_MAKE_MEM_DEFINED(resp, size); |
| |
| if (resp->num_paths) { |
| id->route.path_rec = malloc(sizeof(*id->route.path_rec) * |
| resp->num_paths); |
| if (!id->route.path_rec) |
| return ERR(ENOMEM); |
| |
| id->route.num_paths = resp->num_paths; |
| for (i = 0; i < resp->num_paths; i++) |
| ucma_convert_path(&resp->path_data[i], &id->route.path_rec[i]); |
| } |
| |
| return 0; |
| } |
| |
| static int ucma_query_route(struct rdma_cm_id *id) |
| { |
| struct ucma_abi_query_route_resp resp; |
| struct ucma_abi_query cmd; |
| struct cma_id_private *id_priv; |
| int ret, i; |
| |
| CMA_INIT_CMD_RESP(&cmd, sizeof cmd, QUERY_ROUTE, &resp, sizeof resp); |
| id_priv = container_of(id, struct cma_id_private, id); |
| cmd.id = id_priv->handle; |
| |
| /* |
| * If kernel doesn't support ibdev_index, this field will |
| * be left as is by the kernel. |
| */ |
| resp.ibdev_index = UCMA_INVALID_IB_INDEX; |
| |
| ret = write(id->channel->fd, &cmd, sizeof cmd); |
| if (ret != sizeof cmd) |
| return (ret >= 0) ? ERR(ENODATA) : -1; |
| |
| VALGRIND_MAKE_MEM_DEFINED(&resp, sizeof resp); |
| |
| if (resp.num_paths) { |
| id->route.path_rec = malloc(sizeof(*id->route.path_rec) * |
| resp.num_paths); |
| if (!id->route.path_rec) |
| return ERR(ENOMEM); |
| |
| id->route.num_paths = resp.num_paths; |
| for (i = 0; i < resp.num_paths; i++) |
| ibv_copy_path_rec_from_kern(&id->route.path_rec[i], |
| &resp.ib_route[i]); |
| } |
| |
| memcpy(id->route.addr.addr.ibaddr.sgid.raw, resp.ib_route[0].sgid, |
| sizeof id->route.addr.addr.ibaddr.sgid); |
| memcpy(id->route.addr.addr.ibaddr.dgid.raw, resp.ib_route[0].dgid, |
| sizeof id->route.addr.addr.ibaddr.dgid); |
| id->route.addr.addr.ibaddr.pkey = resp.ib_route[0].pkey; |
| memcpy(&id->route.addr.src_addr, &resp.src_addr, |
| sizeof resp.src_addr); |
| memcpy(&id->route.addr.dst_addr, &resp.dst_addr, |
| sizeof resp.dst_addr); |
| |
| if (!id_priv->cma_dev && resp.node_guid) { |
| ret = ucma_get_device(id_priv, resp.node_guid, |
| resp.ibdev_index); |
| if (ret) |
| return ret; |
| id_priv->id.port_num = resp.port_num; |
| } |
| |
| return 0; |
| } |
| |
| static int rdma_bind_addr2(struct rdma_cm_id *id, struct sockaddr *addr, |
| socklen_t addrlen) |
| { |
| struct ucma_abi_bind cmd; |
| struct cma_id_private *id_priv; |
| int ret; |
| |
| CMA_INIT_CMD(&cmd, sizeof cmd, BIND); |
| id_priv = container_of(id, struct cma_id_private, id); |
| cmd.id = id_priv->handle; |
| cmd.addr_size = addrlen; |
| memcpy(&cmd.addr, addr, addrlen); |
| |
| ret = write(id->channel->fd, &cmd, sizeof cmd); |
| if (ret != sizeof cmd) |
| return (ret >= 0) ? ERR(ENODATA) : -1; |
| |
| ret = ucma_query_addr(id); |
| if (!ret) |
| ret = ucma_query_gid(id); |
| return ret; |
| } |
| |
| int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr) |
| { |
| struct ucma_abi_bind_ip cmd; |
| struct cma_id_private *id_priv; |
| int ret, addrlen; |
| |
| addrlen = ucma_addrlen(addr); |
| if (!addrlen) |
| return ERR(EINVAL); |
| |
| if (af_ib_support) |
| return rdma_bind_addr2(id, addr, addrlen); |
| |
| CMA_INIT_CMD(&cmd, sizeof cmd, BIND_IP); |
| id_priv = container_of(id, struct cma_id_private, id); |
| cmd.id = id_priv->handle; |
| memcpy(&cmd.addr, addr, addrlen); |
| |
| ret = write(id->channel->fd, &cmd, sizeof cmd); |
| if (ret != sizeof cmd) |
| return (ret >= 0) ? ERR(ENODATA) : -1; |
| |
| return ucma_query_route(id); |
| } |
| |
| int ucma_complete(struct rdma_cm_id *id) |
| { |
| struct cma_id_private *id_priv; |
| int ret; |
| |
| id_priv = container_of(id, struct cma_id_private, id); |
| if (!id_priv->sync) |
| return 0; |
| |
| if (id_priv->id.event) { |
| rdma_ack_cm_event(id_priv->id.event); |
| id_priv->id.event = NULL; |
| } |
| |
| ret = rdma_get_cm_event(id_priv->id.channel, &id_priv->id.event); |
| if (ret) |
| return ret; |
| |
| if (id_priv->id.event->status) { |
| if (id_priv->id.event->event == RDMA_CM_EVENT_REJECTED) |
| ret = ERR(ECONNREFUSED); |
| else if (id_priv->id.event->status < 0) |
| ret = ERR(-id_priv->id.event->status); |
| else |
| ret = ERR(id_priv->id.event->status); |
| } |
| return ret; |
| } |
| |
| static int rdma_resolve_addr2(struct rdma_cm_id *id, struct sockaddr *src_addr, |
| socklen_t src_len, struct sockaddr *dst_addr, |
| socklen_t dst_len, int timeout_ms) |
| { |
| struct ucma_abi_resolve_addr cmd; |
| struct cma_id_private *id_priv; |
| int ret; |
| |
| CMA_INIT_CMD(&cmd, sizeof cmd, RESOLVE_ADDR); |
| id_priv = container_of(id, struct cma_id_private, id); |
| cmd.id = id_priv->handle; |
| cmd.src_size = src_len; |
| memcpy(&cmd.src_addr, src_addr, src_len); |
| memcpy(&cmd.dst_addr, dst_addr, dst_len); |
| cmd.dst_size = dst_len; |
| cmd.timeout_ms = timeout_ms; |
| |
| ret = write(id->channel->fd, &cmd, sizeof cmd); |
| if (ret != sizeof cmd) |
| return (ret >= 0) ? ERR(ENODATA) : -1; |
| |
| memcpy(&id->route.addr.dst_addr, dst_addr, dst_len); |
| return ucma_complete(id); |
| } |
| |
| int rdma_resolve_addr(struct rdma_cm_id *id, struct sockaddr *src_addr, |
| struct sockaddr *dst_addr, int timeout_ms) |
| { |
| struct ucma_abi_resolve_ip cmd; |
| struct cma_id_private *id_priv; |
| int ret, dst_len, src_len; |
| |
| dst_len = ucma_addrlen(dst_addr); |
| if (!dst_len) |
| return ERR(EINVAL); |
| |
| src_len = ucma_addrlen(src_addr); |
| if (src_addr && !src_len) |
| return ERR(EINVAL); |
| |
| if (af_ib_support) |
| return rdma_resolve_addr2(id, src_addr, src_len, dst_addr, |
| dst_len, timeout_ms); |
| |
| CMA_INIT_CMD(&cmd, sizeof cmd, RESOLVE_IP); |
| id_priv = container_of(id, struct cma_id_private, id); |
| cmd.id = id_priv->handle; |
| if (src_addr) |
| memcpy(&cmd.src_addr, src_addr, src_len); |
| memcpy(&cmd.dst_addr, dst_addr, dst_len); |
| cmd.timeout_ms = timeout_ms; |
| |
| ret = write(id->channel->fd, &cmd, sizeof cmd); |
| if (ret != sizeof cmd) |
| return (ret >= 0) ? ERR(ENODATA) : -1; |
| |
| memcpy(&id->route.addr.dst_storage, dst_addr, dst_len); |
| return ucma_complete(id); |
| } |
| |
| static int ucma_set_ib_route(struct rdma_cm_id *id) |
| { |
| struct rdma_addrinfo hint, *rai; |
| int ret; |
| |
| memset(&hint, 0, sizeof hint); |
| hint.ai_flags = RAI_ROUTEONLY; |
| hint.ai_family = id->route.addr.src_addr.sa_family; |
| hint.ai_src_len = ucma_addrlen((struct sockaddr *) &id->route.addr.src_addr); |
| hint.ai_src_addr = &id->route.addr.src_addr; |
| hint.ai_dst_len = ucma_addrlen((struct sockaddr *) &id->route.addr.dst_addr); |
| hint.ai_dst_addr = &id->route.addr.dst_addr; |
| |
| ret = rdma_getaddrinfo(NULL, NULL, &hint, &rai); |
| if (ret) |
| return ret; |
| |
| if (rai->ai_route_len) |
| ret = rdma_set_option(id, RDMA_OPTION_IB, RDMA_OPTION_IB_PATH, |
| rai->ai_route, rai->ai_route_len); |
| else |
| ret = -1; |
| |
| rdma_freeaddrinfo(rai); |
| return ret; |
| } |
| |
| int rdma_resolve_route(struct rdma_cm_id *id, int timeout_ms) |
| { |
| struct ucma_abi_resolve_route cmd; |
| struct cma_id_private *id_priv; |
| int ret; |
| |
| id_priv = container_of(id, struct cma_id_private, id); |
| if (id->verbs->device->transport_type == IBV_TRANSPORT_IB) { |
| ret = ucma_set_ib_route(id); |
| if (!ret) |
| goto out; |
| } |
| |
| CMA_INIT_CMD(&cmd, sizeof cmd, RESOLVE_ROUTE); |
| cmd.id = id_priv->handle; |
| cmd.timeout_ms = timeout_ms; |
| |
| ret = write(id->channel->fd, &cmd, sizeof cmd); |
| if (ret != sizeof cmd) |
| return (ret >= 0) ? ERR(ENODATA) : -1; |
| |
| out: |
| return ucma_complete(id); |
| } |
| |
| static int ucma_is_ud_qp(enum ibv_qp_type qp_type) |
| { |
| return (qp_type == IBV_QPT_UD); |
| } |
| |
| int rdma_init_qp_attr(struct rdma_cm_id *id, struct ibv_qp_attr *qp_attr, |
| int *qp_attr_mask) |
| { |
| struct ucma_abi_init_qp_attr cmd; |
| struct ib_uverbs_qp_attr resp; |
| struct cma_id_private *id_priv; |
| int ret; |
| |
| CMA_INIT_CMD_RESP(&cmd, sizeof cmd, INIT_QP_ATTR, &resp, sizeof resp); |
| id_priv = container_of(id, struct cma_id_private, id); |
| cmd.id = id_priv->handle; |
| cmd.qp_state = qp_attr->qp_state; |
| |
| ret = write(id->channel->fd, &cmd, sizeof cmd); |
| if (ret != sizeof cmd) |
| return (ret >= 0) ? ERR(ENODATA) : -1; |
| |
| VALGRIND_MAKE_MEM_DEFINED(&resp, sizeof resp); |
| |
| ibv_copy_qp_attr_from_kern(qp_attr, &resp); |
| *qp_attr_mask = resp.qp_attr_mask; |
| return 0; |
| } |
| |
| static int ucma_modify_qp_rtr(struct rdma_cm_id *id, uint8_t resp_res) |
| { |
| struct cma_id_private *id_priv; |
| struct ibv_qp_attr qp_attr; |
| int qp_attr_mask, ret; |
| uint8_t link_layer; |
| |
| if (!id->qp) |
| return 0; |
| |
| /* Need to update QP attributes from default values. */ |
| qp_attr.qp_state = IBV_QPS_INIT; |
| ret = rdma_init_qp_attr(id, &qp_attr, &qp_attr_mask); |
| if (ret) |
| return ret; |
| |
| ret = ibv_modify_qp(id->qp, &qp_attr, qp_attr_mask); |
| if (ret) |
| return ERR(ret); |
| |
| qp_attr.qp_state = IBV_QPS_RTR; |
| ret = rdma_init_qp_attr(id, &qp_attr, &qp_attr_mask); |
| if (ret) |
| return ret; |
| |
| /* |
| * Workaround for rdma_ucm kernel bug: |
| * mask off qp_attr_mask bits 21-24 which are used for RoCE |
| */ |
| id_priv = container_of(id, struct cma_id_private, id); |
| link_layer = id_priv->cma_dev->port[id->port_num - 1].link_layer; |
| |
| if (link_layer == IBV_LINK_LAYER_INFINIBAND) |
| qp_attr_mask &= UINT_MAX ^ 0xe00000; |
| |
| if (resp_res != RDMA_MAX_RESP_RES) |
| qp_attr.max_dest_rd_atomic = resp_res; |
| return rdma_seterrno(ibv_modify_qp(id->qp, &qp_attr, qp_attr_mask)); |
| } |
| |
| static int ucma_modify_qp_rts(struct rdma_cm_id *id, uint8_t init_depth) |
| { |
| struct ibv_qp_attr qp_attr; |
| int qp_attr_mask, ret; |
| |
| if (!id->qp) |
| return 0; |
| |
| qp_attr.qp_state = IBV_QPS_RTS; |
| ret = rdma_init_qp_attr(id, &qp_attr, &qp_attr_mask); |
| if (ret) |
| return ret; |
| |
| if (init_depth != RDMA_MAX_INIT_DEPTH) |
| qp_attr.max_rd_atomic = init_depth; |
| return rdma_seterrno(ibv_modify_qp(id->qp, &qp_attr, qp_attr_mask)); |
| } |
| |
| static int ucma_modify_qp_sqd(struct rdma_cm_id *id) |
| { |
| struct ibv_qp_attr qp_attr; |
| |
| if (!id->qp) |
| return 0; |
| |
| qp_attr.qp_state = IBV_QPS_SQD; |
| return rdma_seterrno(ibv_modify_qp(id->qp, &qp_attr, IBV_QP_STATE)); |
| } |
| |
| static int ucma_modify_qp_err(struct rdma_cm_id *id) |
| { |
| struct ibv_qp_attr qp_attr; |
| |
| if (!id->qp) |
| return 0; |
| |
| qp_attr.qp_state = IBV_QPS_ERR; |
| return rdma_seterrno(ibv_modify_qp(id->qp, &qp_attr, IBV_QP_STATE)); |
| } |
| |
| static int ucma_init_conn_qp3(struct cma_id_private *id_priv, struct ibv_qp *qp) |
| { |
| struct ibv_qp_attr qp_attr; |
| int ret; |
| |
| ret = ibv_get_pkey_index(id_priv->cma_dev->verbs, id_priv->id.port_num, |
| id_priv->id.route.addr.addr.ibaddr.pkey); |
| if (ret < 0) |
| return ERR(EINVAL); |
| |
| qp_attr.pkey_index = ret; |
| qp_attr.port_num = id_priv->id.port_num; |
| qp_attr.qp_state = IBV_QPS_INIT; |
| qp_attr.qp_access_flags = 0; |
| |
| ret = ibv_modify_qp(qp, &qp_attr, IBV_QP_STATE | IBV_QP_ACCESS_FLAGS | |
| IBV_QP_PKEY_INDEX | IBV_QP_PORT); |
| return rdma_seterrno(ret); |
| } |
| |
| static int ucma_init_conn_qp(struct cma_id_private *id_priv, struct ibv_qp *qp) |
| { |
| struct ibv_qp_attr qp_attr; |
| int qp_attr_mask, ret; |
| |
| if (abi_ver == 3) |
| return ucma_init_conn_qp3(id_priv, qp); |
| |
| qp_attr.qp_state = IBV_QPS_INIT; |
| ret = rdma_init_qp_attr(&id_priv->id, &qp_attr, &qp_attr_mask); |
| if (ret) |
| return ret; |
| |
| return rdma_seterrno(ibv_modify_qp(qp, &qp_attr, qp_attr_mask)); |
| } |
| |
| static int ucma_init_ud_qp3(struct cma_id_private *id_priv, struct ibv_qp *qp) |
| { |
| struct ibv_qp_attr qp_attr; |
| int ret; |
| |
| ret = ibv_get_pkey_index(id_priv->cma_dev->verbs, id_priv->id.port_num, |
| id_priv->id.route.addr.addr.ibaddr.pkey); |
| if (ret < 0) |
| return ERR(EINVAL); |
| |
| qp_attr.pkey_index = ret; |
| qp_attr.port_num = id_priv->id.port_num; |
| qp_attr.qp_state = IBV_QPS_INIT; |
| qp_attr.qkey = RDMA_UDP_QKEY; |
| |
| ret = ibv_modify_qp(qp, &qp_attr, IBV_QP_STATE | IBV_QP_QKEY | |
| IBV_QP_PKEY_INDEX | IBV_QP_PORT); |
| if (ret) |
| return ERR(ret); |
| |
| qp_attr.qp_state = IBV_QPS_RTR; |
| ret = ibv_modify_qp(qp, &qp_attr, IBV_QP_STATE); |
| if (ret) |
| return ERR(ret); |
| |
| qp_attr.qp_state = IBV_QPS_RTS; |
| qp_attr.sq_psn = 0; |
| ret = ibv_modify_qp(qp, &qp_attr, IBV_QP_STATE | IBV_QP_SQ_PSN); |
| return rdma_seterrno(ret); |
| } |
| |
| static int ucma_init_ud_qp(struct cma_id_private *id_priv, struct ibv_qp *qp) |
| { |
| struct ibv_qp_attr qp_attr; |
| int qp_attr_mask, ret; |
| |
| if (abi_ver == 3) |
| return ucma_init_ud_qp3(id_priv, qp); |
| |
| qp_attr.qp_state = IBV_QPS_INIT; |
| ret = rdma_init_qp_attr(&id_priv->id, &qp_attr, &qp_attr_mask); |
| if (ret) |
| return ret; |
| |
| ret = ibv_modify_qp(qp, &qp_attr, qp_attr_mask); |
| if (ret) |
| return ERR(ret); |
| |
| qp_attr.qp_state = IBV_QPS_RTR; |
| ret = ibv_modify_qp(qp, &qp_attr, IBV_QP_STATE); |
| if (ret) |
| return ERR(ret); |
| |
| qp_attr.qp_state = IBV_QPS_RTS; |
| qp_attr.sq_psn = 0; |
| ret = ibv_modify_qp(qp, &qp_attr, IBV_QP_STATE | IBV_QP_SQ_PSN); |
| return rdma_seterrno(ret); |
| } |
| |
| static void ucma_destroy_cqs(struct rdma_cm_id *id) |
| { |
| if (id->qp_type == IBV_QPT_XRC_RECV && id->srq) |
| return; |
| |
| if (id->recv_cq) { |
| ibv_destroy_cq(id->recv_cq); |
| if (id->send_cq && (id->send_cq != id->recv_cq)) { |
| ibv_destroy_cq(id->send_cq); |
| id->send_cq = NULL; |
| } |
| id->recv_cq = NULL; |
| } |
| |
| if (id->recv_cq_channel) { |
| ibv_destroy_comp_channel(id->recv_cq_channel); |
| if (id->send_cq_channel && (id->send_cq_channel != id->recv_cq_channel)) { |
| ibv_destroy_comp_channel(id->send_cq_channel); |
| id->send_cq_channel = NULL; |
| } |
| id->recv_cq_channel = NULL; |
| } |
| } |
| |
| static int ucma_create_cqs(struct rdma_cm_id *id, uint32_t send_size, uint32_t recv_size) |
| { |
| if (recv_size) { |
| id->recv_cq_channel = ibv_create_comp_channel(id->verbs); |
| if (!id->recv_cq_channel) |
| goto err; |
| |
| id->recv_cq = ibv_create_cq(id->verbs, recv_size, |
| id, id->recv_cq_channel, 0); |
| if (!id->recv_cq) |
| goto err; |
| } |
| |
| if (send_size) { |
| id->send_cq_channel = ibv_create_comp_channel(id->verbs); |
| if (!id->send_cq_channel) |
| goto err; |
| |
| id->send_cq = ibv_create_cq(id->verbs, send_size, |
| id, id->send_cq_channel, 0); |
| if (!id->send_cq) |
| goto err; |
| } |
| |
| return 0; |
| err: |
| ucma_destroy_cqs(id); |
| return -1; |
| } |
| |
| int rdma_create_srq_ex(struct rdma_cm_id *id, struct ibv_srq_init_attr_ex *attr) |
| { |
| struct cma_id_private *id_priv; |
| struct ibv_srq *srq; |
| int ret; |
| |
| id_priv = container_of(id, struct cma_id_private, id); |
| if (!(attr->comp_mask & IBV_SRQ_INIT_ATTR_TYPE)) |
| return ERR(EINVAL); |
| |
| if (!(attr->comp_mask & IBV_SRQ_INIT_ATTR_PD) || !attr->pd) { |
| attr->pd = id->pd; |
| attr->comp_mask |= IBV_SRQ_INIT_ATTR_PD; |
| } |
| |
| if (attr->srq_type == IBV_SRQT_XRC) { |
| if (!(attr->comp_mask & IBV_SRQ_INIT_ATTR_XRCD) || !attr->xrcd) { |
| attr->xrcd = ucma_get_xrcd(id_priv->cma_dev); |
| if (!attr->xrcd) |
| return -1; |
| } |
| if (!(attr->comp_mask & IBV_SRQ_INIT_ATTR_CQ) || !attr->cq) { |
| ret = ucma_create_cqs(id, 0, attr->attr.max_wr); |
| if (ret) |
| return ret; |
| attr->cq = id->recv_cq; |
| } |
| attr->comp_mask |= IBV_SRQ_INIT_ATTR_XRCD | IBV_SRQ_INIT_ATTR_CQ; |
| } |
| |
| srq = ibv_create_srq_ex(id->verbs, attr); |
| if (!srq) { |
| ret = -1; |
| goto err; |
| } |
| |
| if (!id->pd) |
| id->pd = attr->pd; |
| id->srq = srq; |
| return 0; |
| err: |
| ucma_destroy_cqs(id); |
| return ret; |
| } |
| |
| int rdma_create_srq(struct rdma_cm_id *id, struct ibv_pd *pd, |
| struct ibv_srq_init_attr *attr) |
| { |
| struct ibv_srq_init_attr_ex attr_ex; |
| int ret; |
| |
| memcpy(&attr_ex, attr, sizeof(*attr)); |
| attr_ex.comp_mask = IBV_SRQ_INIT_ATTR_TYPE | IBV_SRQ_INIT_ATTR_PD; |
| if (id->qp_type == IBV_QPT_XRC_RECV) { |
| attr_ex.srq_type = IBV_SRQT_XRC; |
| } else { |
| attr_ex.srq_type = IBV_SRQT_BASIC; |
| } |
| attr_ex.pd = pd; |
| ret = rdma_create_srq_ex(id, &attr_ex); |
| memcpy(attr, &attr_ex, sizeof(*attr)); |
| return ret; |
| } |
| |
| void rdma_destroy_srq(struct rdma_cm_id *id) |
| { |
| ibv_destroy_srq(id->srq); |
| id->srq = NULL; |
| ucma_destroy_cqs(id); |
| } |
| |
| static int init_ece(struct rdma_cm_id *id, struct ibv_qp *qp) |
| { |
| struct cma_id_private *id_priv = |
| container_of(id, struct cma_id_private, id); |
| struct ibv_ece ece = {}; |
| int ret; |
| |
| ret = ibv_query_ece(qp, &ece); |
| if (ret && ret != EOPNOTSUPP) |
| return ERR(ret); |
| |
| id_priv->local_ece.vendor_id = ece.vendor_id; |
| id_priv->local_ece.options = ece.options; |
| |
| if (!id_priv->remote_ece.vendor_id) |
| /* |
| * This QP was created explicitly and we don't need |
| * to do anything additional to the setting local_ece values. |
| */ |
| return 0; |
| |
| /* This QP was created due to REQ event */ |
| if (id_priv->remote_ece.vendor_id != id_priv->local_ece.vendor_id) { |
| /* |
| * Signal to the provider that other ECE node is different |
| * vendor and clear ECE options. |
| */ |
| ece.vendor_id = id_priv->local_ece.vendor_id; |
| ece.options = 0; |
| } else { |
| ece.vendor_id = id_priv->remote_ece.vendor_id; |
| ece.options = id_priv->remote_ece.options; |
| } |
| ret = ibv_set_ece(qp, &ece); |
| return (ret && ret != EOPNOTSUPP) ? ERR(ret) : 0; |
| } |
| |
| static int set_local_ece(struct rdma_cm_id *id, struct ibv_qp *qp) |
| { |
| struct cma_id_private *id_priv = |
| container_of(id, struct cma_id_private, id); |
| struct ibv_ece ece = {}; |
| int ret; |
| |
| if (!id_priv->remote_ece.vendor_id) |
| return 0; |
| |
| ret = ibv_query_ece(qp, &ece); |
| if (ret && ret != EOPNOTSUPP) |
| return ERR(ret); |
| |
| id_priv->local_ece.options = ece.options; |
| return 0; |
| } |
| |
| int rdma_create_qp_ex(struct rdma_cm_id *id, |
| struct ibv_qp_init_attr_ex *attr) |
| { |
| struct cma_id_private *id_priv; |
| struct ibv_qp *qp; |
| int ret; |
| |
| if (id->qp) |
| return ERR(EINVAL); |
| |
| id_priv = container_of(id, struct cma_id_private, id); |
| if (!(attr->comp_mask & IBV_QP_INIT_ATTR_PD) || !attr->pd) { |
| attr->comp_mask |= IBV_QP_INIT_ATTR_PD; |
| attr->pd = id->pd; |
| } else if (id->verbs != attr->pd->context) |
| return ERR(EINVAL); |
| |
| if ((id->recv_cq && attr->recv_cq && id->recv_cq != attr->recv_cq) || |
| (id->send_cq && attr->send_cq && id->send_cq != attr->send_cq)) |
| return ERR(EINVAL); |
| |
| if (id->qp_type == IBV_QPT_XRC_RECV) { |
| if (!(attr->comp_mask & IBV_QP_INIT_ATTR_XRCD) || !attr->xrcd) { |
| attr->xrcd = ucma_get_xrcd(id_priv->cma_dev); |
| if (!attr->xrcd) |
| return -1; |
| attr->comp_mask |= IBV_QP_INIT_ATTR_XRCD; |
| } |
| } |
| |
| ret = ucma_create_cqs(id, attr->send_cq || id->send_cq ? 0 : attr->cap.max_send_wr, |
| attr->recv_cq || id->recv_cq ? 0 : attr->cap.max_recv_wr); |
| if (ret) |
| return ret; |
| |
| if (!attr->send_cq) |
| attr->send_cq = id->send_cq; |
| if (!attr->recv_cq) |
| attr->recv_cq = id->recv_cq; |
| if (id->srq && !attr->srq) |
| attr->srq = id->srq; |
| qp = ibv_create_qp_ex(id->verbs, attr); |
| if (!qp) { |
| ret = -1; |
| goto err1; |
| } |
| |
| ret = init_ece(id, qp); |
| if (ret) |
| goto err2; |
| |
| if (ucma_is_ud_qp(id->qp_type)) |
| ret = ucma_init_ud_qp(id_priv, qp); |
| else |
| ret = ucma_init_conn_qp(id_priv, qp); |
| if (ret) |
| goto err2; |
| ret = set_local_ece(id, qp); |
| if (ret) |
| goto err2; |
| |
| id->pd = qp->pd; |
| id->qp = qp; |
| return 0; |
| err2: |
| ibv_destroy_qp(qp); |
| err1: |
| ucma_destroy_cqs(id); |
| return ret; |
| } |
| |
| int rdma_create_qp(struct rdma_cm_id *id, struct ibv_pd *pd, |
| struct ibv_qp_init_attr *qp_init_attr) |
| { |
| struct ibv_qp_init_attr_ex attr_ex; |
| int ret; |
| |
| memcpy(&attr_ex, qp_init_attr, sizeof(*qp_init_attr)); |
| attr_ex.comp_mask = IBV_QP_INIT_ATTR_PD; |
| attr_ex.pd = pd ? pd : id->pd; |
| ret = rdma_create_qp_ex(id, &attr_ex); |
| memcpy(qp_init_attr, &attr_ex, sizeof(*qp_init_attr)); |
| return ret; |
| } |
| |
| void rdma_destroy_qp(struct rdma_cm_id *id) |
| { |
| ibv_destroy_qp(id->qp); |
| id->qp = NULL; |
| ucma_destroy_cqs(id); |
| } |
| |
| static int ucma_valid_param(struct cma_id_private *id_priv, |
| struct rdma_conn_param *param) |
| { |
| if (id_priv->id.ps != RDMA_PS_TCP) |
| return 0; |
| |
| if (!id_priv->id.qp && !param) |
| goto err; |
| |
| if (!param) |
| return 0; |
| |
| if ((param->responder_resources != RDMA_MAX_RESP_RES) && |
| (param->responder_resources > id_priv->cma_dev->max_responder_resources)) |
| goto err; |
| |
| if ((param->initiator_depth != RDMA_MAX_INIT_DEPTH) && |
| (param->initiator_depth > id_priv->cma_dev->max_initiator_depth)) |
| goto err; |
| |
| return 0; |
| err: |
| return ERR(EINVAL); |
| } |
| |
| static void ucma_copy_conn_param_to_kern(struct cma_id_private *id_priv, |
| struct ucma_abi_conn_param *dst, |
| struct rdma_conn_param *src, |
| uint32_t qp_num, uint8_t srq) |
| { |
| dst->qp_num = qp_num; |
| dst->srq = srq; |
| dst->responder_resources = id_priv->responder_resources; |
| dst->initiator_depth = id_priv->initiator_depth; |
| dst->valid = 1; |
| |
| if (id_priv->connect_len) { |
| memcpy(dst->private_data, id_priv->connect, id_priv->connect_len); |
| dst->private_data_len = id_priv->connect_len; |
| } |
| |
| if (src) { |
| dst->flow_control = src->flow_control; |
| dst->retry_count = src->retry_count; |
| dst->rnr_retry_count = src->rnr_retry_count; |
| |
| if (src->private_data && src->private_data_len) { |
| memcpy(dst->private_data + dst->private_data_len, |
| src->private_data, src->private_data_len); |
| dst->private_data_len += src->private_data_len; |
| } |
| } else { |
| dst->retry_count = 7; |
| dst->rnr_retry_count = 7; |
| } |
| } |
| |
| static void ucma_copy_ece_param_to_kern_req(struct cma_id_private *id_priv, |
| struct ucma_abi_ece *dst) |
| { |
| dst->vendor_id = id_priv->local_ece.vendor_id; |
| dst->attr_mod = id_priv->local_ece.options; |
| } |
| |
| int rdma_connect(struct rdma_cm_id *id, struct rdma_conn_param *conn_param) |
| { |
| uint32_t qp_num = conn_param ? conn_param->qp_num : 0; |
| uint8_t srq = conn_param ? conn_param->srq : 0; |
| struct ucma_abi_connect cmd; |
| struct cma_id_private *id_priv; |
| int ret; |
| |
| id_priv = container_of(id, struct cma_id_private, id); |
| ret = ucma_valid_param(id_priv, conn_param); |
| if (ret) |
| return ret; |
| |
| if (conn_param && conn_param->initiator_depth != RDMA_MAX_INIT_DEPTH) |
| id_priv->initiator_depth = conn_param->initiator_depth; |
| else |
| id_priv->initiator_depth = id_priv->cma_dev->max_initiator_depth; |
| if (conn_param && conn_param->responder_resources != RDMA_MAX_RESP_RES) |
| id_priv->responder_resources = conn_param->responder_resources; |
| else |
| id_priv->responder_resources = id_priv->cma_dev->max_responder_resources; |
| |
| CMA_INIT_CMD(&cmd, sizeof cmd, CONNECT); |
| cmd.id = id_priv->handle; |
| if (id->qp) { |
| qp_num = id->qp->qp_num; |
| srq = !!id->qp->srq; |
| } |
| |
| ucma_copy_conn_param_to_kern(id_priv, &cmd.conn_param, conn_param, |
| qp_num, srq); |
| |
| ucma_copy_ece_param_to_kern_req(id_priv, &cmd.ece); |
| |
| ret = write(id->channel->fd, &cmd, sizeof cmd); |
| if (ret != sizeof cmd) |
| return (ret >= 0) ? ERR(ENODATA) : -1; |
| |
| if (id_priv->connect_len) { |
| free(id_priv->connect); |
| id_priv->connect_len = 0; |
| } |
| |
| return ucma_complete(id); |
| } |
| |
| int rdma_listen(struct rdma_cm_id *id, int backlog) |
| { |
| struct ucma_abi_listen cmd; |
| struct cma_id_private *id_priv; |
| int ret; |
| |
| CMA_INIT_CMD(&cmd, sizeof cmd, LISTEN); |
| id_priv = container_of(id, struct cma_id_private, id); |
| cmd.id = id_priv->handle; |
| cmd.backlog = backlog; |
| |
| ret = write(id->channel->fd, &cmd, sizeof cmd); |
| if (ret != sizeof cmd) |
| return (ret >= 0) ? ERR(ENODATA) : -1; |
| |
| if (af_ib_support) |
| return ucma_query_addr(id); |
| else |
| return ucma_query_route(id); |
| } |
| |
| int rdma_get_request(struct rdma_cm_id *listen, struct rdma_cm_id **id) |
| { |
| struct cma_id_private *id_priv; |
| struct rdma_cm_event *event; |
| int ret; |
| |
| id_priv = container_of(listen, struct cma_id_private, id); |
| if (!id_priv->sync) |
| return ERR(EINVAL); |
| |
| if (listen->event) { |
| rdma_ack_cm_event(listen->event); |
| listen->event = NULL; |
| } |
| |
| ret = rdma_get_cm_event(listen->channel, &event); |
| if (ret) |
| return ret; |
| |
| if (event->event == RDMA_CM_EVENT_REJECTED) { |
| ret = ERR(ECONNREFUSED); |
| goto err; |
| } |
| |
| if (event->status) { |
| ret = ERR(-event->status); |
| goto err; |
| } |
| |
| if (event->event != RDMA_CM_EVENT_CONNECT_REQUEST) { |
| ret = ERR(EINVAL); |
| goto err; |
| } |
| |
| if (id_priv->qp_init_attr) { |
| struct ibv_qp_init_attr attr; |
| |
| attr = *id_priv->qp_init_attr; |
| ret = rdma_create_qp(event->id, listen->pd, &attr); |
| if (ret) |
| goto err; |
| } |
| |
| *id = event->id; |
| (*id)->event = event; |
| return 0; |
| |
| err: |
| listen->event = event; |
| return ret; |
| } |
| |
| static void ucma_copy_ece_param_to_kern_rep(struct cma_id_private *id_priv, |
| struct ucma_abi_ece *dst) |
| { |
| /* Return result with same ID as received. */ |
| dst->vendor_id = id_priv->remote_ece.vendor_id; |
| dst->attr_mod = id_priv->local_ece.options; |
| } |
| |
| int rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param *conn_param) |
| { |
| uint32_t qp_num = id->qp ? id->qp->qp_num : conn_param->qp_num; |
| uint8_t srq = id->qp ? !!id->qp->srq : conn_param->srq; |
| struct ucma_abi_accept cmd; |
| struct cma_id_private *id_priv; |
| int ret; |
| |
| id_priv = container_of(id, struct cma_id_private, id); |
| ret = ucma_valid_param(id_priv, conn_param); |
| if (ret) |
| return ret; |
| |
| if (!conn_param || conn_param->initiator_depth == RDMA_MAX_INIT_DEPTH) { |
| id_priv->initiator_depth = min(id_priv->initiator_depth, |
| id_priv->cma_dev->max_initiator_depth); |
| } else { |
| id_priv->initiator_depth = conn_param->initiator_depth; |
| } |
| if (!conn_param || conn_param->responder_resources == RDMA_MAX_RESP_RES) { |
| id_priv->responder_resources = min(id_priv->responder_resources, |
| id_priv->cma_dev->max_responder_resources); |
| } else { |
| id_priv->responder_resources = conn_param->responder_resources; |
| } |
| |
| if (!ucma_is_ud_qp(id->qp_type)) { |
| ret = ucma_modify_qp_rtr(id, id_priv->responder_resources); |
| if (ret) |
| return ret; |
| |
| ret = ucma_modify_qp_rts(id, id_priv->initiator_depth); |
| if (ret) |
| return ret; |
| } |
| |
| CMA_INIT_CMD(&cmd, sizeof cmd, ACCEPT); |
| cmd.id = id_priv->handle; |
| cmd.uid = (uintptr_t) id_priv; |
| ucma_copy_conn_param_to_kern(id_priv, &cmd.conn_param, conn_param, |
| qp_num, srq); |
| ucma_copy_ece_param_to_kern_rep(id_priv, &cmd.ece); |
| |
| ret = write(id->channel->fd, &cmd, sizeof cmd); |
| if (ret != sizeof cmd) { |
| ucma_modify_qp_err(id); |
| return (ret >= 0) ? ERR(ENODATA) : -1; |
| } |
| |
| if (ucma_is_ud_qp(id->qp_type)) { |
| if (id_priv->sync && id_priv->id.event) { |
| rdma_ack_cm_event(id_priv->id.event); |
| id_priv->id.event = NULL; |
| } |
| |
| return 0; |
| } |
| |
| return ucma_complete(id); |
| } |
| |
| static int reject_with_reason(struct rdma_cm_id *id, const void *private_data, |
| uint8_t private_data_len, uint8_t reason) |
| { |
| struct ucma_abi_reject cmd; |
| struct cma_id_private *id_priv; |
| int ret; |
| |
| CMA_INIT_CMD(&cmd, sizeof cmd, REJECT); |
| |
| id_priv = container_of(id, struct cma_id_private, id); |
| cmd.id = id_priv->handle; |
| if (private_data && private_data_len) { |
| memcpy(cmd.private_data, private_data, private_data_len); |
| cmd.private_data_len = private_data_len; |
| } |
| cmd.reason = reason; |
| |
| ret = write(id->channel->fd, &cmd, sizeof cmd); |
| if (ret != sizeof cmd) |
| return (ret >= 0) ? ERR(ENODATA) : -1; |
| |
| return 0; |
| } |
| |
| int rdma_reject(struct rdma_cm_id *id, const void *private_data, |
| uint8_t private_data_len) |
| { |
| return reject_with_reason(id, private_data, private_data_len, 0); |
| } |
| |
| int rdma_reject_ece(struct rdma_cm_id *id, const void *private_data, |
| uint8_t private_data_len) |
| { |
| /* IBTA defines CM_REJ_VENDOR_OPTION_NOT_SUPPORTED as 35 */ |
| return reject_with_reason(id, private_data, private_data_len, 35); |
| } |
| |
| int rdma_notify(struct rdma_cm_id *id, enum ibv_event_type event) |
| { |
| struct ucma_abi_notify cmd; |
| struct cma_id_private *id_priv; |
| int ret; |
| |
| CMA_INIT_CMD(&cmd, sizeof cmd, NOTIFY); |
| |
| id_priv = container_of(id, struct cma_id_private, id); |
| cmd.id = id_priv->handle; |
| cmd.event = event; |
| ret = write(id->channel->fd, &cmd, sizeof cmd); |
| if (ret != sizeof cmd) |
| return (ret >= 0) ? ERR(ENODATA) : -1; |
| |
| return 0; |
| } |
| |
| int ucma_shutdown(struct rdma_cm_id *id) |
| { |
| if (!id->verbs || !id->verbs->device) |
| return ERR(EINVAL); |
| |
| switch (id->verbs->device->transport_type) { |
| case IBV_TRANSPORT_IB: |
| return ucma_modify_qp_err(id); |
| case IBV_TRANSPORT_IWARP: |
| return ucma_modify_qp_sqd(id); |
| default: |
| return ERR(EINVAL); |
| } |
| } |
| |
| int rdma_disconnect(struct rdma_cm_id *id) |
| { |
| struct ucma_abi_disconnect cmd; |
| struct cma_id_private *id_priv; |
| int ret; |
| |
| ret = ucma_shutdown(id); |
| if (ret) |
| return ret; |
| |
| CMA_INIT_CMD(&cmd, sizeof cmd, DISCONNECT); |
| id_priv = container_of(id, struct cma_id_private, id); |
| cmd.id = id_priv->handle; |
| |
| ret = write(id->channel->fd, &cmd, sizeof cmd); |
| if (ret != sizeof cmd) |
| return (ret >= 0) ? ERR(ENODATA) : -1; |
| |
| return ucma_complete(id); |
| } |
| |
| static int rdma_join_multicast2(struct rdma_cm_id *id, struct sockaddr *addr, |
| socklen_t addrlen, uint16_t join_flags, |
| void *context) |
| { |
| struct ucma_abi_create_id_resp resp; |
| struct cma_id_private *id_priv; |
| struct cma_multicast *mc, **pos; |
| int ret; |
| |
| id_priv = container_of(id, struct cma_id_private, id); |
| mc = calloc(1, sizeof(*mc)); |
| if (!mc) |
| return ERR(ENOMEM); |
| |
| mc->context = context; |
| mc->id_priv = id_priv; |
| mc->join_flags = join_flags; |
| memcpy(&mc->addr, addr, addrlen); |
| if (pthread_cond_init(&mc->cond, NULL)) { |
| ret = -1; |
| goto err1; |
| } |
| |
| pthread_mutex_lock(&id_priv->mut); |
| mc->next = id_priv->mc_list; |
| id_priv->mc_list = mc; |
| pthread_mutex_unlock(&id_priv->mut); |
| |
| if (af_ib_support) { |
| struct ucma_abi_join_mcast cmd; |
| |
| CMA_INIT_CMD_RESP(&cmd, sizeof cmd, JOIN_MCAST, &resp, sizeof resp); |
| cmd.id = id_priv->handle; |
| memcpy(&cmd.addr, addr, addrlen); |
| cmd.addr_size = addrlen; |
| cmd.uid = (uintptr_t) mc; |
| cmd.join_flags = join_flags; |
| |
| ret = write(id->channel->fd, &cmd, sizeof cmd); |
| if (ret != sizeof cmd) { |
| ret = (ret >= 0) ? ERR(ENODATA) : -1; |
| goto err2; |
| } |
| } else { |
| struct ucma_abi_join_ip_mcast cmd; |
| |
| CMA_INIT_CMD_RESP(&cmd, sizeof cmd, JOIN_IP_MCAST, &resp, sizeof resp); |
| cmd.id = id_priv->handle; |
| memcpy(&cmd.addr, addr, addrlen); |
| cmd.uid = (uintptr_t) mc; |
| |
| ret = write(id->channel->fd, &cmd, sizeof cmd); |
| if (ret != sizeof cmd) { |
| ret = (ret >= 0) ? ERR(ENODATA) : -1; |
| goto err2; |
| } |
| } |
| |
| VALGRIND_MAKE_MEM_DEFINED(&resp, sizeof resp); |
| |
| mc->handle = resp.id; |
| return ucma_complete(id); |
| |
| err2: |
| pthread_mutex_lock(&id_priv->mut); |
| for (pos = &id_priv->mc_list; *pos != mc; pos = &(*pos)->next) |
| ; |
| *pos = mc->next; |
| pthread_mutex_unlock(&id_priv->mut); |
| err1: |
| free(mc); |
| return ret; |
| } |
| |
| int rdma_join_multicast_ex(struct rdma_cm_id *id, |
| struct rdma_cm_join_mc_attr_ex *mc_join_attr, |
| void *context) |
| { |
| int addrlen; |
| |
| if (mc_join_attr->comp_mask >= RDMA_CM_JOIN_MC_ATTR_RESERVED) |
| return ERR(ENOTSUP); |
| |
| if (!(mc_join_attr->comp_mask & RDMA_CM_JOIN_MC_ATTR_ADDRESS)) |
| return ERR(EINVAL); |
| |
| if (!(mc_join_attr->comp_mask & RDMA_CM_JOIN_MC_ATTR_JOIN_FLAGS) || |
| (mc_join_attr->join_flags >= RDMA_MC_JOIN_FLAG_RESERVED)) |
| return ERR(EINVAL); |
| |
| addrlen = ucma_addrlen(mc_join_attr->addr); |
| if (!addrlen) |
| return ERR(EINVAL); |
| |
| return rdma_join_multicast2(id, mc_join_attr->addr, addrlen, |
| mc_join_attr->join_flags, context); |
| } |
| |
| int rdma_join_multicast(struct rdma_cm_id *id, struct sockaddr *addr, |
| void *context) |
| { |
| int addrlen; |
| |
| addrlen = ucma_addrlen(addr); |
| if (!addrlen) |
| return ERR(EINVAL); |
| |
| return rdma_join_multicast2(id, addr, addrlen, |
| RDMA_MC_JOIN_FLAG_FULLMEMBER, context); |
| } |
| |
| int rdma_leave_multicast(struct rdma_cm_id *id, struct sockaddr *addr) |
| { |
| struct ucma_abi_destroy_id cmd; |
| struct ucma_abi_destroy_id_resp resp; |
| struct cma_id_private *id_priv; |
| struct cma_multicast *mc, **pos; |
| int ret, addrlen; |
| |
| addrlen = ucma_addrlen(addr); |
| if (!addrlen) |
| return ERR(EINVAL); |
| |
| id_priv = container_of(id, struct cma_id_private, id); |
| pthread_mutex_lock(&id_priv->mut); |
| for (pos = &id_priv->mc_list; *pos; pos = &(*pos)->next) |
| if (!memcmp(&(*pos)->addr, addr, addrlen)) |
| break; |
| |
| mc = *pos; |
| if (*pos) |
| *pos = mc->next; |
| pthread_mutex_unlock(&id_priv->mut); |
| if (!mc) |
| return ERR(EADDRNOTAVAIL); |
| |
| if (id->qp && (mc->join_flags != RDMA_MC_JOIN_FLAG_SENDONLY_FULLMEMBER)) |
| ibv_detach_mcast(id->qp, &mc->mgid, mc->mlid); |
| |
| CMA_INIT_CMD_RESP(&cmd, sizeof cmd, LEAVE_MCAST, &resp, sizeof resp); |
| cmd.id = mc->handle; |
| |
| ret = write(id->channel->fd, &cmd, sizeof cmd); |
| if (ret != sizeof cmd) { |
| ret = (ret >= 0) ? ERR(ENODATA) : -1; |
| goto free; |
| } |
| |
| VALGRIND_MAKE_MEM_DEFINED(&resp, sizeof resp); |
| |
| pthread_mutex_lock(&id_priv->mut); |
| while (mc->events_completed < resp.events_reported) |
| pthread_cond_wait(&mc->cond, &id_priv->mut); |
| pthread_mutex_unlock(&id_priv->mut); |
| |
| ret = 0; |
| free: |
| free(mc); |
| return ret; |
| } |
| |
| static void ucma_complete_event(struct cma_id_private *id_priv) |
| { |
| pthread_mutex_lock(&id_priv->mut); |
| id_priv->events_completed++; |
| pthread_cond_signal(&id_priv->cond); |
| pthread_mutex_unlock(&id_priv->mut); |
| } |
| |
| static void ucma_complete_mc_event(struct cma_multicast *mc) |
| { |
| pthread_mutex_lock(&mc->id_priv->mut); |
| mc->events_completed++; |
| pthread_cond_signal(&mc->cond); |
| mc->id_priv->events_completed++; |
| pthread_cond_signal(&mc->id_priv->cond); |
| pthread_mutex_unlock(&mc->id_priv->mut); |
| } |
| |
| int rdma_ack_cm_event(struct rdma_cm_event *event) |
| { |
| struct cma_event *evt; |
| |
| if (!event) |
| return ERR(EINVAL); |
| |
| evt = container_of(event, struct cma_event, event); |
| |
| if (evt->mc) |
| ucma_complete_mc_event(evt->mc); |
| else |
| ucma_complete_event(evt->id_priv); |
| free(evt); |
| return 0; |
| } |
| |
| static void ucma_process_addr_resolved(struct cma_event *evt) |
| { |
| struct rdma_cm_id *id = &evt->id_priv->id; |
| |
| if (af_ib_support) { |
| evt->event.status = ucma_query_addr(id); |
| if (!evt->event.status && !id->verbs) |
| goto err_dev; |
| |
| if (!evt->event.status && |
| id->verbs->device->transport_type == IBV_TRANSPORT_IB) { |
| evt->event.status = ucma_query_gid(id); |
| } |
| } else { |
| evt->event.status = ucma_query_route(id); |
| if (!evt->event.status && !id->verbs) |
| goto err_dev; |
| } |
| |
| if (evt->event.status) |
| evt->event.event = RDMA_CM_EVENT_ADDR_ERROR; |
| return; |
| |
| err_dev: |
| evt->event.status = ERR(ENODEV); |
| evt->event.event = RDMA_CM_EVENT_ADDR_ERROR; |
| } |
| |
| static void ucma_process_route_resolved(struct cma_event *evt) |
| { |
| if (evt->id_priv->id.verbs->device->transport_type != IBV_TRANSPORT_IB) |
| return; |
| |
| if (af_ib_support) |
| evt->event.status = ucma_query_path(&evt->id_priv->id); |
| else |
| evt->event.status = ucma_query_route(&evt->id_priv->id); |
| |
| if (evt->event.status) |
| evt->event.event = RDMA_CM_EVENT_ROUTE_ERROR; |
| } |
| |
| static int ucma_query_req_info(struct rdma_cm_id *id) |
| { |
| int ret; |
| |
| if (!af_ib_support) |
| return ucma_query_route(id); |
| |
| ret = ucma_query_addr(id); |
| if (ret) |
| return ret; |
| |
| ret = ucma_query_gid(id); |
| if (ret) |
| return ret; |
| |
| ret = ucma_query_path(id); |
| if (ret) |
| return ret; |
| |
| return 0; |
| } |
| |
| static int ucma_process_conn_req(struct cma_event *evt, uint32_t handle, |
| struct ucma_abi_ece *ece) |
| { |
| struct cma_id_private *id_priv; |
| int ret; |
| |
| id_priv = ucma_alloc_id(evt->id_priv->id.channel, |
| evt->id_priv->id.context, evt->id_priv->id.ps, |
| evt->id_priv->id.qp_type); |
| if (!id_priv) { |
| ucma_destroy_kern_id(evt->id_priv->id.channel->fd, handle); |
| ret = ERR(ENOMEM); |
| goto err1; |
| } |
| |
| evt->event.listen_id = &evt->id_priv->id; |
| evt->event.id = &id_priv->id; |
| id_priv->handle = handle; |
| ucma_insert_id(id_priv); |
| id_priv->initiator_depth = evt->event.param.conn.initiator_depth; |
| id_priv->responder_resources = evt->event.param.conn.responder_resources; |
| id_priv->remote_ece.vendor_id = ece->vendor_id; |
| id_priv->remote_ece.options = ece->attr_mod; |
| |
| if (evt->id_priv->sync) { |
| ret = rdma_migrate_id(&id_priv->id, NULL); |
| if (ret) |
| goto err2; |
| } |
| |
| ret = ucma_query_req_info(&id_priv->id); |
| if (ret) |
| goto err2; |
| |
| return 0; |
| |
| err2: |
| rdma_destroy_id(&id_priv->id); |
| err1: |
| ucma_complete_event(evt->id_priv); |
| return ret; |
| } |
| |
| static int ucma_process_conn_resp(struct cma_id_private *id_priv) |
| { |
| struct ucma_abi_accept cmd; |
| int ret; |
| |
| ret = ucma_modify_qp_rtr(&id_priv->id, RDMA_MAX_RESP_RES); |
| if (ret) |
| goto err; |
| |
| ret = ucma_modify_qp_rts(&id_priv->id, RDMA_MAX_INIT_DEPTH); |
| if (ret) |
| goto err; |
| |
| CMA_INIT_CMD(&cmd, sizeof cmd, ACCEPT); |
| cmd.id = id_priv->handle; |
| |
| ret = write(id_priv->id.channel->fd, &cmd, sizeof cmd); |
| if (ret != sizeof cmd) { |
| ret = (ret >= 0) ? ERR(ENODATA) : -1; |
| goto err; |
| } |
| |
| return 0; |
| err: |
| ucma_modify_qp_err(&id_priv->id); |
| return ret; |
| } |
| |
| static int ucma_process_conn_resp_ece(struct cma_id_private *id_priv, |
| struct ucma_abi_ece *ece) |
| { |
| struct ibv_ece ibv_ece = { .vendor_id = ece->vendor_id, |
| .options = ece->attr_mod }; |
| int ret; |
| |
| /* This is response handler */ |
| if (!ece->vendor_id) { |
| /* |
| * Kernel or user-space doesn't support ECE transfer, |
| * clear everything. |
| */ |
| ibv_ece.vendor_id = id_priv->local_ece.vendor_id; |
| ibv_ece.options = 0; |
| } else if (ece->vendor_id != id_priv->local_ece.vendor_id) { |
| /* |
| * At this point remote vendor_id should be the same |
| * as the local one, or something bad happened in |
| * ECE handshake implementation. |
| */ |
| ucma_modify_qp_err(&id_priv->id); |
| return ERR(EINVAL); |
| } |
| |
| id_priv->remote_ece.vendor_id = ece->vendor_id; |
| ret = ibv_set_ece(id_priv->id.qp, &ibv_ece); |
| if (ret && ret != EOPNOTSUPP) |
| return ret; |
| |
| ret = ucma_process_conn_resp(id_priv); |
| if (ret) |
| return ret; |
| |
| ret = ibv_query_ece(id_priv->id.qp, &ibv_ece); |
| if (ret && ret != EOPNOTSUPP) { |
| ucma_modify_qp_err(&id_priv->id); |
| return ret; |
| } |
| |
| id_priv->local_ece.options = (ret == EOPNOTSUPP) ? 0 : ibv_ece.options; |
| return 0; |
| } |
| |
| static int ucma_process_join(struct cma_event *evt) |
| { |
| evt->mc->mgid = evt->event.param.ud.ah_attr.grh.dgid; |
| evt->mc->mlid = evt->event.param.ud.ah_attr.dlid; |
| |
| if (!evt->id_priv->id.qp) |
| return 0; |
| |
| /* Don't attach QP to multicast if joined as send only full member */ |
| if (evt->mc->join_flags == RDMA_MC_JOIN_FLAG_SENDONLY_FULLMEMBER) |
| return 0; |
| |
| return rdma_seterrno(ibv_attach_mcast(evt->id_priv->id.qp, |
| &evt->mc->mgid, evt->mc->mlid)); |
| } |
| |
| static void ucma_copy_conn_event(struct cma_event *event, |
| struct ucma_abi_conn_param *src) |
| { |
| struct rdma_conn_param *dst = &event->event.param.conn; |
| |
| dst->private_data_len = src->private_data_len; |
| if (src->private_data_len) { |
| dst->private_data = &event->private_data; |
| memcpy(&event->private_data, src->private_data, |
| src->private_data_len); |
| } |
| |
| dst->responder_resources = src->responder_resources; |
| dst->initiator_depth = src->initiator_depth; |
| dst->flow_control = src->flow_control; |
| dst->retry_count = src->retry_count; |
| dst->rnr_retry_count = src->rnr_retry_count; |
| dst->srq = src->srq; |
| dst->qp_num = src->qp_num; |
| } |
| |
| static void ucma_copy_ud_event(struct cma_event *event, |
| struct ucma_abi_ud_param *src) |
| { |
| struct rdma_ud_param *dst = &event->event.param.ud; |
| |
| dst->private_data_len = src->private_data_len; |
| if (src->private_data_len) { |
| dst->private_data = &event->private_data; |
| memcpy(&event->private_data, src->private_data, |
| src->private_data_len); |
| } |
| |
| ibv_copy_ah_attr_from_kern(&dst->ah_attr, &src->ah_attr); |
| dst->qp_num = src->qp_num; |
| dst->qkey = src->qkey; |
| } |
| |
| int rdma_establish(struct rdma_cm_id *id) |
| { |
| if (id->qp) |
| return ERR(EINVAL); |
| |
| /* id->qp is NULL, so ucma_process_conn_resp() will only send ACCEPT to |
| * the passive side, and will not attempt to modify the QP. |
| */ |
| return ucma_process_conn_resp(container_of(id, struct cma_id_private, |
| id)); |
| } |
| |
| int rdma_get_cm_event(struct rdma_event_channel *channel, |
| struct rdma_cm_event **event) |
| { |
| struct ucma_abi_event_resp resp = {}; |
| struct ucma_abi_get_event cmd; |
| struct cma_event *evt; |
| int ret; |
| |
| ret = ucma_init(); |
| if (ret) |
| return ret; |
| |
| if (!event) |
| return ERR(EINVAL); |
| |
| evt = malloc(sizeof(*evt)); |
| if (!evt) |
| return ERR(ENOMEM); |
| |
| retry: |
| memset(evt, 0, sizeof(*evt)); |
| CMA_INIT_CMD_RESP(&cmd, sizeof cmd, GET_EVENT, &resp, sizeof resp); |
| ret = write(channel->fd, &cmd, sizeof cmd); |
| if (ret != sizeof cmd) { |
| free(evt); |
| return (ret >= 0) ? ERR(ENODATA) : -1; |
| } |
| |
| VALGRIND_MAKE_MEM_DEFINED(&resp, sizeof resp); |
| |
| evt->event.event = resp.event; |
| /* |
| * We should have a non-zero uid, except for connection requests. |
| * But a bug in older kernels can report a uid 0. Work-around this |
| * issue by looking up the cma_id based on the kernel's id when the |
| * uid is 0 and we're processing a connection established event. |
| * In all other cases, if the uid is 0, we discard the event, like |
| * the kernel should have done. |
| */ |
| if (resp.uid) { |
| evt->id_priv = (void *) (uintptr_t) resp.uid; |
| } else { |
| evt->id_priv = ucma_lookup_id(resp.id); |
| if (!evt->id_priv) { |
| syslog(LOG_WARNING, PFX "Warning: discarding unmatched " |
| "event - rdma_destroy_id may hang.\n"); |
| goto retry; |
| } |
| if (resp.event != RDMA_CM_EVENT_ESTABLISHED) { |
| ucma_complete_event(evt->id_priv); |
| goto retry; |
| } |
| } |
| evt->event.id = &evt->id_priv->id; |
| evt->event.status = resp.status; |
| |
| switch (resp.event) { |
| case RDMA_CM_EVENT_ADDR_RESOLVED: |
| ucma_process_addr_resolved(evt); |
| break; |
| case RDMA_CM_EVENT_ROUTE_RESOLVED: |
| ucma_process_route_resolved(evt); |
| break; |
| case RDMA_CM_EVENT_CONNECT_REQUEST: |
| evt->id_priv = (void *) (uintptr_t) resp.uid; |
| if (ucma_is_ud_qp(evt->id_priv->id.qp_type)) |
| ucma_copy_ud_event(evt, &resp.param.ud); |
| else |
| ucma_copy_conn_event(evt, &resp.param.conn); |
| |
| ret = ucma_process_conn_req(evt, resp.id, &resp.ece); |
| if (ret) |
| goto retry; |
| break; |
| case RDMA_CM_EVENT_CONNECT_RESPONSE: |
| ucma_copy_conn_event(evt, &resp.param.conn); |
| if (!evt->id_priv->id.qp) { |
| evt->event.event = RDMA_CM_EVENT_CONNECT_RESPONSE; |
| evt->id_priv->remote_ece.vendor_id = resp.ece.vendor_id; |
| evt->id_priv->remote_ece.options = resp.ece.attr_mod; |
| } else { |
| evt->event.status = ucma_process_conn_resp_ece( |
| evt->id_priv, &resp.ece); |
| if (!evt->event.status) |
| evt->event.event = RDMA_CM_EVENT_ESTABLISHED; |
| else { |
| evt->event.event = RDMA_CM_EVENT_CONNECT_ERROR; |
| evt->id_priv->connect_error = 1; |
| } |
| } |
| break; |
| case RDMA_CM_EVENT_ESTABLISHED: |
| if (ucma_is_ud_qp(evt->id_priv->id.qp_type)) { |
| ucma_copy_ud_event(evt, &resp.param.ud); |
| break; |
| } |
| |
| ucma_copy_conn_event(evt, &resp.param.conn); |
| break; |
| case RDMA_CM_EVENT_REJECTED: |
| if (evt->id_priv->connect_error) { |
| ucma_complete_event(evt->id_priv); |
| goto retry; |
| } |
| ucma_copy_conn_event(evt, &resp.param.conn); |
| ucma_modify_qp_err(evt->event.id); |
| break; |
| case RDMA_CM_EVENT_DISCONNECTED: |
| if (evt->id_priv->connect_error) { |
| ucma_complete_event(evt->id_priv); |
| goto retry; |
| } |
| ucma_copy_conn_event(evt, &resp.param.conn); |
| break; |
| case RDMA_CM_EVENT_MULTICAST_JOIN: |
| evt->mc = (void *) (uintptr_t) resp.uid; |
| evt->id_priv = evt->mc->id_priv; |
| evt->event.id = &evt->id_priv->id; |
| ucma_copy_ud_event(evt, &resp.param.ud); |
| evt->event.param.ud.private_data = evt->mc->context; |
| evt->event.status = ucma_process_join(evt); |
| if (evt->event.status) |
| evt->event.event = RDMA_CM_EVENT_MULTICAST_ERROR; |
| break; |
| case RDMA_CM_EVENT_MULTICAST_ERROR: |
| evt->mc = (void *) (uintptr_t) resp.uid; |
| evt->id_priv = evt->mc->id_priv; |
| evt->event.id = &evt->id_priv->id; |
| evt->event.param.ud.private_data = evt->mc->context; |
| break; |
| default: |
| evt->id_priv = (void *) (uintptr_t) resp.uid; |
| evt->event.id = &evt->id_priv->id; |
| evt->event.status = resp.status; |
| if (ucma_is_ud_qp(evt->id_priv->id.qp_type)) |
| ucma_copy_ud_event(evt, &resp.param.ud); |
| else |
| ucma_copy_conn_event(evt, &resp.param.conn); |
| break; |
| } |
| |
| *event = &evt->event; |
| return 0; |
| } |
| |
| const char *rdma_event_str(enum rdma_cm_event_type event) |
| { |
| switch (event) { |
| case RDMA_CM_EVENT_ADDR_RESOLVED: |
| return "RDMA_CM_EVENT_ADDR_RESOLVED"; |
| case RDMA_CM_EVENT_ADDR_ERROR: |
| return "RDMA_CM_EVENT_ADDR_ERROR"; |
| case RDMA_CM_EVENT_ROUTE_RESOLVED: |
| return "RDMA_CM_EVENT_ROUTE_RESOLVED"; |
| case RDMA_CM_EVENT_ROUTE_ERROR: |
| return "RDMA_CM_EVENT_ROUTE_ERROR"; |
| case RDMA_CM_EVENT_CONNECT_REQUEST: |
| return "RDMA_CM_EVENT_CONNECT_REQUEST"; |
| case RDMA_CM_EVENT_CONNECT_RESPONSE: |
| return "RDMA_CM_EVENT_CONNECT_RESPONSE"; |
| case RDMA_CM_EVENT_CONNECT_ERROR: |
| return "RDMA_CM_EVENT_CONNECT_ERROR"; |
| case RDMA_CM_EVENT_UNREACHABLE: |
| return "RDMA_CM_EVENT_UNREACHABLE"; |
| case RDMA_CM_EVENT_REJECTED: |
| return "RDMA_CM_EVENT_REJECTED"; |
| case RDMA_CM_EVENT_ESTABLISHED: |
| return "RDMA_CM_EVENT_ESTABLISHED"; |
| case RDMA_CM_EVENT_DISCONNECTED: |
| return "RDMA_CM_EVENT_DISCONNECTED"; |
| case RDMA_CM_EVENT_DEVICE_REMOVAL: |
| return "RDMA_CM_EVENT_DEVICE_REMOVAL"; |
| case RDMA_CM_EVENT_MULTICAST_JOIN: |
| return "RDMA_CM_EVENT_MULTICAST_JOIN"; |
| case RDMA_CM_EVENT_MULTICAST_ERROR: |
| return "RDMA_CM_EVENT_MULTICAST_ERROR"; |
| case RDMA_CM_EVENT_ADDR_CHANGE: |
| return "RDMA_CM_EVENT_ADDR_CHANGE"; |
| case RDMA_CM_EVENT_TIMEWAIT_EXIT: |
| return "RDMA_CM_EVENT_TIMEWAIT_EXIT"; |
| default: |
| return "UNKNOWN EVENT"; |
| } |
| } |
| |
| int rdma_set_option(struct rdma_cm_id *id, int level, int optname, |
| void *optval, size_t optlen) |
| { |
| struct ucma_abi_set_option cmd; |
| struct cma_id_private *id_priv; |
| int ret; |
| |
| CMA_INIT_CMD(&cmd, sizeof cmd, SET_OPTION); |
| id_priv = container_of(id, struct cma_id_private, id); |
| cmd.id = id_priv->handle; |
| cmd.optval = (uintptr_t) optval; |
| cmd.level = level; |
| cmd.optname = optname; |
| cmd.optlen = optlen; |
| |
| ret = write(id->channel->fd, &cmd, sizeof cmd); |
| if (ret != sizeof cmd) |
| return (ret >= 0) ? ERR(ENODATA) : -1; |
| |
| return 0; |
| } |
| |
| int rdma_migrate_id(struct rdma_cm_id *id, struct rdma_event_channel *channel) |
| { |
| struct ucma_abi_migrate_resp resp; |
| struct ucma_abi_migrate_id cmd; |
| struct cma_id_private *id_priv; |
| int ret, sync; |
| |
| id_priv = container_of(id, struct cma_id_private, id); |
| if (id_priv->sync && !channel) |
| return ERR(EINVAL); |
| |
| if ((sync = (channel == NULL))) { |
| channel = rdma_create_event_channel(); |
| if (!channel) |
| return -1; |
| } |
| |
| CMA_INIT_CMD_RESP(&cmd, sizeof cmd, MIGRATE_ID, &resp, sizeof resp); |
| cmd.id = id_priv->handle; |
| cmd.fd = id->channel->fd; |
| |
| ret = write(channel->fd, &cmd, sizeof cmd); |
| if (ret != sizeof cmd) { |
| if (sync) |
| rdma_destroy_event_channel(channel); |
| return (ret >= 0) ? ERR(ENODATA) : -1; |
| } |
| |
| VALGRIND_MAKE_MEM_DEFINED(&resp, sizeof resp); |
| |
| if (id_priv->sync) { |
| if (id->event) { |
| rdma_ack_cm_event(id->event); |
| id->event = NULL; |
| } |
| rdma_destroy_event_channel(id->channel); |
| } |
| |
| /* |
| * Eventually if we want to support migrating channels while events are |
| * being processed on the current channel, we need to block here while |
| * there are any outstanding events on the current channel for this id |
| * to prevent the user from processing events for this id on the old |
| * channel after this call returns. |
| */ |
| pthread_mutex_lock(&id_priv->mut); |
| id_priv->sync = sync; |
| id->channel = channel; |
| while (id_priv->events_completed < resp.events_reported) |
| pthread_cond_wait(&id_priv->cond, &id_priv->mut); |
| pthread_mutex_unlock(&id_priv->mut); |
| |
| return 0; |
| } |
| |
| static int ucma_passive_ep(struct rdma_cm_id *id, struct rdma_addrinfo *res, |
| struct ibv_pd *pd, struct ibv_qp_init_attr *qp_init_attr) |
| { |
| struct cma_id_private *id_priv; |
| int ret; |
| |
| if (af_ib_support) |
| ret = rdma_bind_addr2(id, res->ai_src_addr, res->ai_src_len); |
| else |
| ret = rdma_bind_addr(id, res->ai_src_addr); |
| if (ret) |
| return ret; |
| |
| id_priv = container_of(id, struct cma_id_private, id); |
| if (pd) |
| id->pd = pd; |
| |
| if (qp_init_attr) { |
| id_priv->qp_init_attr = malloc(sizeof(*qp_init_attr)); |
| if (!id_priv->qp_init_attr) |
| return ERR(ENOMEM); |
| |
| *id_priv->qp_init_attr = *qp_init_attr; |
| id_priv->qp_init_attr->qp_type = res->ai_qp_type; |
| } |
| |
| return 0; |
| } |
| |
| int rdma_create_ep(struct rdma_cm_id **id, struct rdma_addrinfo *res, |
| struct ibv_pd *pd, struct ibv_qp_init_attr *qp_init_attr) |
| { |
| struct rdma_cm_id *cm_id; |
| struct cma_id_private *id_priv; |
| int ret; |
| |
| ret = rdma_create_id2(NULL, &cm_id, NULL, res->ai_port_space, res->ai_qp_type); |
| if (ret) |
| return ret; |
| |
| if (res->ai_flags & RAI_PASSIVE) { |
| ret = ucma_passive_ep(cm_id, res, pd, qp_init_attr); |
| if (ret) |
| goto err; |
| goto out; |
| } |
| |
| if (af_ib_support) |
| ret = rdma_resolve_addr2(cm_id, res->ai_src_addr, res->ai_src_len, |
| res->ai_dst_addr, res->ai_dst_len, 2000); |
| else |
| ret = rdma_resolve_addr(cm_id, res->ai_src_addr, res->ai_dst_addr, 2000); |
| if (ret) |
| goto err; |
| |
| if (res->ai_route_len) { |
| ret = rdma_set_option(cm_id, RDMA_OPTION_IB, RDMA_OPTION_IB_PATH, |
| res->ai_route, res->ai_route_len); |
| if (!ret) |
| ret = ucma_complete(cm_id); |
| } else { |
| ret = rdma_resolve_route(cm_id, 2000); |
| } |
| if (ret) |
| goto err; |
| |
| if (qp_init_attr) { |
| qp_init_attr->qp_type = res->ai_qp_type; |
| ret = rdma_create_qp(cm_id, pd, qp_init_attr); |
| if (ret) |
| goto err; |
| } |
| |
| if (res->ai_connect_len) { |
| id_priv = container_of(cm_id, struct cma_id_private, id); |
| id_priv->connect = malloc(res->ai_connect_len); |
| if (!id_priv->connect) { |
| ret = ERR(ENOMEM); |
| goto err; |
| } |
| memcpy(id_priv->connect, res->ai_connect, res->ai_connect_len); |
| id_priv->connect_len = res->ai_connect_len; |
| } |
| |
| out: |
| *id = cm_id; |
| return 0; |
| |
| err: |
| rdma_destroy_ep(cm_id); |
| return ret; |
| } |
| |
| void rdma_destroy_ep(struct rdma_cm_id *id) |
| { |
| struct cma_id_private *id_priv; |
| |
| if (id->qp) |
| rdma_destroy_qp(id); |
| |
| if (id->srq) |
| rdma_destroy_srq(id); |
| |
| id_priv = container_of(id, struct cma_id_private, id); |
| if (id_priv->qp_init_attr) |
| free(id_priv->qp_init_attr); |
| |
| rdma_destroy_id(id); |
| } |
| |
| int ucma_max_qpsize(struct rdma_cm_id *id) |
| { |
| struct cma_id_private *id_priv; |
| struct cma_device *dev; |
| int max_size = 0; |
| |
| id_priv = container_of(id, struct cma_id_private, id); |
| if (id && id_priv->cma_dev) { |
| max_size = id_priv->cma_dev->max_qpsize; |
| } else { |
| ucma_init_all(); |
| pthread_mutex_lock(&mut); |
| list_for_each(&cma_dev_list, dev, entry) |
| if (!dev->is_device_dead && |
| (!max_size || max_size > dev->max_qpsize)) |
| max_size = dev->max_qpsize; |
| pthread_mutex_unlock(&mut); |
| } |
| return max_size; |
| } |
| |
| __be16 ucma_get_port(struct sockaddr *addr) |
| { |
| switch (addr->sa_family) { |
| case AF_INET: |
| return ((struct sockaddr_in *) addr)->sin_port; |
| case AF_INET6: |
| return ((struct sockaddr_in6 *) addr)->sin6_port; |
| case AF_IB: |
| return htobe16((uint16_t) be64toh(((struct sockaddr_ib *) addr)->sib_sid)); |
| default: |
| return 0; |
| } |
| } |
| |
| __be16 rdma_get_src_port(struct rdma_cm_id *id) |
| { |
| return ucma_get_port(&id->route.addr.src_addr); |
| } |
| |
| __be16 rdma_get_dst_port(struct rdma_cm_id *id) |
| { |
| return ucma_get_port(&id->route.addr.dst_addr); |
| } |
| |
| int rdma_set_local_ece(struct rdma_cm_id *id, struct ibv_ece *ece) |
| { |
| struct cma_id_private *id_priv; |
| |
| if (!id || id->qp || !ece || !ece->vendor_id || ece->comp_mask) |
| return ERR(EINVAL); |
| |
| id_priv = container_of(id, struct cma_id_private, id); |
| id_priv->local_ece.vendor_id = ece->vendor_id; |
| id_priv->local_ece.options = ece->options; |
| |
| return 0; |
| } |
| |
| int rdma_get_remote_ece(struct rdma_cm_id *id, struct ibv_ece *ece) |
| { |
| struct cma_id_private *id_priv; |
| |
| if (!id || id->qp || !ece) |
| return ERR(EINVAL); |
| |
| id_priv = container_of(id, struct cma_id_private, id); |
| ece->vendor_id = id_priv->remote_ece.vendor_id; |
| ece->options = id_priv->remote_ece.options; |
| ece->comp_mask = 0; |
| |
| return 0; |
| } |