| /* |
| * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved. |
| * Copyright (c) 2006 Cisco Systems, Inc. All rights reserved. |
| * |
| * This software is available to you under a choice of one of two |
| * licenses. You may choose to be licensed under the terms of the GNU |
| * General Public License (GPL) Version 2, available from the file |
| * COPYING in the main directory of this source tree, or the |
| * OpenIB.org BSD license below: |
| * |
| * Redistribution and use in source and binary forms, with or |
| * without modification, are permitted provided that the following |
| * conditions are met: |
| * |
| * - Redistributions of source code must retain the above |
| * copyright notice, this list of conditions and the following |
| * disclaimer. |
| * |
| * - Redistributions in binary form must reproduce the above |
| * copyright notice, this list of conditions and the following |
| * disclaimer in the documentation and/or other materials |
| * provided with the distribution. |
| * |
| * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, |
| * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
| * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND |
| * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS |
| * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN |
| * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN |
| * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
| * SOFTWARE. |
| */ |
| #define _GNU_SOURCE |
| #include <config.h> |
| |
| #include <stdlib.h> |
| #include <string.h> |
| #include <glob.h> |
| #include <stdarg.h> |
| #include <stdio.h> |
| #include <unistd.h> |
| #include <sys/stat.h> |
| #include <sys/types.h> |
| #include <sys/time.h> |
| #include <sys/resource.h> |
| #include <fcntl.h> |
| #include <dirent.h> |
| #include <errno.h> |
| #include <assert.h> |
| #include <fnmatch.h> |
| #include <sys/sysmacros.h> |
| |
| #include <rdma/rdma_netlink.h> |
| |
| #include <util/util.h> |
| #include "driver.h" |
| #include "ibverbs.h" |
| #include <infiniband/cmd_write.h> |
| |
| int abi_ver; |
| |
| static uint32_t verbs_log_level; |
| static FILE *verbs_log_fp; |
| |
| __attribute__((format(printf, 3, 4))) |
| void __verbs_log(struct verbs_context *ctx, uint32_t level, |
| const char *fmt, ...) |
| { |
| va_list args; |
| |
| if (level <= verbs_log_level) { |
| int tmp = errno; |
| va_start(args, fmt); |
| vfprintf(verbs_log_fp, fmt, args); |
| va_end(args); |
| errno = tmp; |
| } |
| } |
| |
| struct ibv_driver { |
| struct list_node entry; |
| const struct verbs_device_ops *ops; |
| }; |
| |
| static LIST_HEAD(driver_list); |
| |
| int try_access_device(const struct verbs_sysfs_dev *sysfs_dev) |
| { |
| struct stat cdev_stat; |
| char *devpath; |
| int ret; |
| |
| if (asprintf(&devpath, RDMA_CDEV_DIR"/%s", |
| sysfs_dev->sysfs_name) < 0) |
| return ENOMEM; |
| |
| ret = stat(devpath, &cdev_stat); |
| free(devpath); |
| return ret; |
| } |
| |
| enum ibv_node_type decode_knode_type(unsigned int knode_type) |
| { |
| switch (knode_type) { |
| case RDMA_NODE_IB_CA: |
| return IBV_NODE_CA; |
| case RDMA_NODE_IB_SWITCH: |
| return IBV_NODE_SWITCH; |
| case RDMA_NODE_IB_ROUTER: |
| return IBV_NODE_ROUTER; |
| case RDMA_NODE_RNIC: |
| return IBV_NODE_RNIC; |
| case RDMA_NODE_USNIC: |
| return IBV_NODE_USNIC; |
| case RDMA_NODE_USNIC_UDP: |
| return IBV_NODE_USNIC_UDP; |
| case RDMA_NODE_UNSPECIFIED: |
| return IBV_NODE_UNSPECIFIED; |
| } |
| return IBV_NODE_UNKNOWN; |
| } |
| |
| int setup_sysfs_uverbs(int uv_dirfd, const char *uverbs, |
| struct verbs_sysfs_dev *sysfs_dev) |
| { |
| unsigned int major; |
| unsigned int minor; |
| struct stat buf; |
| char value[32]; |
| |
| if (!check_snprintf(sysfs_dev->sysfs_name, |
| sizeof(sysfs_dev->sysfs_name), "%s", uverbs)) |
| return -1; |
| |
| if (stat(sysfs_dev->ibdev_path, &buf)) |
| return -1; |
| sysfs_dev->time_created = buf.st_mtim; |
| |
| if (ibv_read_sysfs_file_at(uv_dirfd, "dev", value, |
| sizeof(value)) < 0) |
| return -1; |
| if (sscanf(value, "%u:%u", &major, &minor) != 2) |
| return -1; |
| sysfs_dev->sysfs_cdev = makedev(major, minor); |
| |
| if (ibv_read_sysfs_file_at(uv_dirfd, "abi_version", value, |
| sizeof(value)) > 0) |
| sysfs_dev->abi_ver = strtoul(value, NULL, 10); |
| |
| return 0; |
| } |
| |
| static int setup_sysfs_dev(int dirfd, const char *uverbs, |
| struct list_head *tmp_sysfs_dev_list) |
| { |
| struct verbs_sysfs_dev *sysfs_dev = NULL; |
| char value[32]; |
| int uv_dirfd; |
| |
| sysfs_dev = calloc(1, sizeof(*sysfs_dev)); |
| if (!sysfs_dev) |
| return ENOMEM; |
| |
| sysfs_dev->ibdev_idx = -1; |
| |
| uv_dirfd = openat(dirfd, uverbs, O_RDONLY | O_DIRECTORY | O_CLOEXEC); |
| if (uv_dirfd == -1) |
| goto err_alloc; |
| |
| if (ibv_read_sysfs_file_at(uv_dirfd, "ibdev", sysfs_dev->ibdev_name, |
| sizeof(sysfs_dev->ibdev_name)) < 0) |
| goto err_fd; |
| |
| if (!check_snprintf( |
| sysfs_dev->ibdev_path, sizeof(sysfs_dev->ibdev_path), |
| "%s/class/infiniband/%s", ibv_get_sysfs_path(), |
| sysfs_dev->ibdev_name)) |
| goto err_fd; |
| |
| if (setup_sysfs_uverbs(uv_dirfd, uverbs, sysfs_dev)) |
| goto err_fd; |
| |
| if (ibv_read_ibdev_sysfs_file(value, sizeof(value), sysfs_dev, |
| "node_type") <= 0) |
| sysfs_dev->node_type = IBV_NODE_UNKNOWN; |
| else |
| sysfs_dev->node_type = |
| decode_knode_type(strtoul(value, NULL, 10)); |
| |
| if (try_access_device(sysfs_dev)) |
| goto err_fd; |
| |
| close(uv_dirfd); |
| list_add(tmp_sysfs_dev_list, &sysfs_dev->entry); |
| return 0; |
| |
| err_fd: |
| close(uv_dirfd); |
| err_alloc: |
| free(sysfs_dev); |
| return 0; |
| } |
| |
| static int find_sysfs_devs(struct list_head *tmp_sysfs_dev_list) |
| { |
| struct verbs_sysfs_dev *dev, *dev_tmp; |
| char class_path[IBV_SYSFS_PATH_MAX]; |
| DIR *class_dir; |
| struct dirent *dent; |
| int ret = 0; |
| |
| if (!check_snprintf(class_path, sizeof(class_path), |
| "%s/class/infiniband_verbs", ibv_get_sysfs_path())) |
| return ENOMEM; |
| |
| class_dir = opendir(class_path); |
| if (!class_dir) |
| return ENOSYS; |
| |
| while ((dent = readdir(class_dir))) { |
| if (dent->d_name[0] == '.') |
| continue; |
| |
| ret = setup_sysfs_dev(dirfd(class_dir), dent->d_name, |
| tmp_sysfs_dev_list); |
| if (ret) |
| break; |
| } |
| closedir(class_dir); |
| |
| if (ret) { |
| list_for_each_safe (tmp_sysfs_dev_list, dev, dev_tmp, entry) { |
| list_del(&dev->entry); |
| free(dev); |
| } |
| } |
| return ret; |
| } |
| |
| void verbs_register_driver(const struct verbs_device_ops *ops) |
| { |
| struct ibv_driver *driver; |
| |
| driver = malloc(sizeof *driver); |
| if (!driver) { |
| fprintf(stderr, |
| PFX "Warning: couldn't allocate driver for %s\n", |
| ops->name); |
| return; |
| } |
| |
| driver->ops = ops; |
| |
| list_add_tail(&driver_list, &driver->entry); |
| } |
| |
| /* Match a single modalias value */ |
| static bool match_modalias(const struct verbs_match_ent *ent, const char *value) |
| { |
| char pci_ma[100]; |
| |
| switch (ent->kind) { |
| case VERBS_MATCH_MODALIAS: |
| return fnmatch(ent->u.modalias, value, 0) == 0; |
| case VERBS_MATCH_PCI: |
| snprintf(pci_ma, sizeof(pci_ma), "pci:v%08Xd%08Xsv*", |
| ent->vendor, ent->device); |
| return fnmatch(pci_ma, value, 0) == 0; |
| default: |
| return false; |
| } |
| } |
| |
| /* Search a null terminated table of verbs_match_ent's and return the one |
| * that matches the device the verbs sysfs device is bound to or NULL. |
| */ |
| static const struct verbs_match_ent * |
| match_modalias_device(const struct verbs_device_ops *ops, |
| struct verbs_sysfs_dev *sysfs_dev) |
| { |
| const struct verbs_match_ent *i; |
| |
| if (!(sysfs_dev->flags & VSYSFS_READ_MODALIAS)) { |
| sysfs_dev->flags |= VSYSFS_READ_MODALIAS; |
| if (ibv_read_ibdev_sysfs_file( |
| sysfs_dev->modalias, sizeof(sysfs_dev->modalias), |
| sysfs_dev, "device/modalias") <= 0) { |
| sysfs_dev->modalias[0] = 0; |
| return NULL; |
| } |
| } |
| |
| for (i = ops->match_table; i->kind != VERBS_MATCH_SENTINEL; i++) |
| if (match_modalias(i, sysfs_dev->modalias)) |
| return i; |
| |
| return NULL; |
| } |
| |
| /* Match the device name itself */ |
| static const struct verbs_match_ent * |
| match_name(const struct verbs_device_ops *ops, |
| struct verbs_sysfs_dev *sysfs_dev) |
| { |
| char name_ma[100]; |
| const struct verbs_match_ent *i; |
| |
| if (!check_snprintf(name_ma, sizeof(name_ma), |
| "rdma_device:N%s", sysfs_dev->ibdev_name)) |
| return NULL; |
| |
| for (i = ops->match_table; i->kind != VERBS_MATCH_SENTINEL; i++) |
| if (match_modalias(i, name_ma)) |
| return i; |
| |
| return NULL; |
| } |
| |
| /* Match the driver id we get from netlink */ |
| static const struct verbs_match_ent * |
| match_driver_id(const struct verbs_device_ops *ops, |
| struct verbs_sysfs_dev *sysfs_dev) |
| { |
| const struct verbs_match_ent *i; |
| |
| if (sysfs_dev->driver_id == RDMA_DRIVER_UNKNOWN) |
| return NULL; |
| |
| for (i = ops->match_table; i->kind != VERBS_MATCH_SENTINEL; i++) |
| if (i->kind == VERBS_MATCH_DRIVER_ID && |
| i->u.driver_id == sysfs_dev->driver_id) |
| return i; |
| return NULL; |
| } |
| |
| /* True if the provider matches the selected rdma sysfs device */ |
| static bool match_device(const struct verbs_device_ops *ops, |
| struct verbs_sysfs_dev *sysfs_dev) |
| { |
| if (ops->match_table) { |
| sysfs_dev->match = match_driver_id(ops, sysfs_dev); |
| if (!sysfs_dev->match) |
| sysfs_dev->match = match_name(ops, sysfs_dev); |
| if (!sysfs_dev->match) |
| sysfs_dev->match = |
| match_modalias_device(ops, sysfs_dev); |
| } |
| |
| if (ops->match_device) { |
| /* If a matching function is provided then it is called |
| * unconditionally after the table match above, it is |
| * responsible for determining if the device matches based on |
| * the match pointer and any other internal information. |
| */ |
| if (!ops->match_device(sysfs_dev)) |
| return false; |
| } else { |
| /* With no match function, we must have a table match */ |
| if (!sysfs_dev->match) |
| return false; |
| } |
| |
| if (sysfs_dev->abi_ver < ops->match_min_abi_version || |
| sysfs_dev->abi_ver > ops->match_max_abi_version) { |
| fprintf(stderr, PFX |
| "Warning: Driver %s does not support the kernel ABI of %u (supports %u to %u) for device %s\n", |
| ops->name, sysfs_dev->abi_ver, |
| ops->match_min_abi_version, |
| ops->match_max_abi_version, |
| sysfs_dev->ibdev_path); |
| return false; |
| } |
| return true; |
| } |
| |
| static struct verbs_device *try_driver(const struct verbs_device_ops *ops, |
| struct verbs_sysfs_dev *sysfs_dev) |
| { |
| struct verbs_device *vdev; |
| struct ibv_device *dev; |
| |
| if (!match_device(ops, sysfs_dev)) |
| return NULL; |
| |
| vdev = ops->alloc_device(sysfs_dev); |
| if (!vdev) { |
| fprintf(stderr, PFX "Fatal: couldn't allocate device for %s\n", |
| sysfs_dev->ibdev_path); |
| return NULL; |
| } |
| |
| vdev->ops = ops; |
| |
| atomic_init(&vdev->refcount, 1); |
| dev = &vdev->device; |
| assert(dev->_ops._dummy1 == NULL); |
| assert(dev->_ops._dummy2 == NULL); |
| |
| dev->node_type = sysfs_dev->node_type; |
| switch (sysfs_dev->node_type) { |
| case IBV_NODE_CA: |
| case IBV_NODE_SWITCH: |
| case IBV_NODE_ROUTER: |
| dev->transport_type = IBV_TRANSPORT_IB; |
| break; |
| case IBV_NODE_RNIC: |
| dev->transport_type = IBV_TRANSPORT_IWARP; |
| break; |
| case IBV_NODE_USNIC: |
| dev->transport_type = IBV_TRANSPORT_USNIC; |
| break; |
| case IBV_NODE_USNIC_UDP: |
| dev->transport_type = IBV_TRANSPORT_USNIC_UDP; |
| break; |
| case IBV_NODE_UNSPECIFIED: |
| dev->transport_type = IBV_TRANSPORT_UNSPECIFIED; |
| break; |
| default: |
| dev->transport_type = IBV_TRANSPORT_UNKNOWN; |
| break; |
| } |
| |
| strcpy(dev->dev_name, sysfs_dev->sysfs_name); |
| if (!check_snprintf(dev->dev_path, sizeof(dev->dev_path), |
| "%s/class/infiniband_verbs/%s", |
| ibv_get_sysfs_path(), sysfs_dev->sysfs_name)) |
| goto err; |
| strcpy(dev->name, sysfs_dev->ibdev_name); |
| strcpy(dev->ibdev_path, sysfs_dev->ibdev_path); |
| vdev->sysfs = sysfs_dev; |
| |
| return vdev; |
| |
| err: |
| ops->uninit_device(vdev); |
| return NULL; |
| } |
| |
| static struct verbs_device *try_drivers(struct verbs_sysfs_dev *sysfs_dev) |
| { |
| struct ibv_driver *driver; |
| struct verbs_device *dev; |
| |
| /* |
| * Matching by driver_id takes priority over other match types, do it |
| * first. |
| */ |
| if (sysfs_dev->driver_id != RDMA_DRIVER_UNKNOWN) { |
| list_for_each (&driver_list, driver, entry) { |
| if (match_driver_id(driver->ops, sysfs_dev)) { |
| dev = try_driver(driver->ops, sysfs_dev); |
| if (dev) |
| return dev; |
| } |
| } |
| } |
| |
| list_for_each(&driver_list, driver, entry) { |
| dev = try_driver(driver->ops, sysfs_dev); |
| if (dev) |
| return dev; |
| } |
| |
| return NULL; |
| } |
| |
| static int check_abi_version(void) |
| { |
| char value[8]; |
| |
| if (abi_ver) |
| return 0; |
| |
| if (ibv_read_sysfs_file(ibv_get_sysfs_path(), |
| "class/infiniband_verbs/abi_version", value, |
| sizeof(value)) < 0) { |
| return ENOSYS; |
| } |
| |
| abi_ver = strtol(value, NULL, 10); |
| |
| if (abi_ver < IB_USER_VERBS_MIN_ABI_VERSION || |
| abi_ver > IB_USER_VERBS_MAX_ABI_VERSION) { |
| fprintf(stderr, PFX "Fatal: kernel ABI version %d " |
| "doesn't match library version %d.\n", |
| abi_ver, IB_USER_VERBS_MAX_ABI_VERSION); |
| return ENOSYS; |
| } |
| |
| return 0; |
| } |
| |
| static void check_memlock_limit(void) |
| { |
| struct rlimit rlim; |
| |
| if (!geteuid()) |
| return; |
| |
| if (getrlimit(RLIMIT_MEMLOCK, &rlim)) { |
| fprintf(stderr, PFX "Warning: getrlimit(RLIMIT_MEMLOCK) failed."); |
| return; |
| } |
| |
| if (rlim.rlim_cur <= 32768) |
| fprintf(stderr, PFX "Warning: RLIMIT_MEMLOCK is %llu bytes.\n" |
| " This will severely limit memory registrations.\n", |
| (unsigned long long)rlim.rlim_cur); |
| } |
| |
| static int same_sysfs_dev(struct verbs_sysfs_dev *sysfs1, |
| struct verbs_sysfs_dev *sysfs2) |
| { |
| if (strcmp(sysfs1->sysfs_name, sysfs2->sysfs_name) != 0) |
| return 0; |
| |
| /* In netlink mode the idx is a globally unique ID */ |
| if (sysfs1->ibdev_idx != sysfs2->ibdev_idx) |
| return 0; |
| |
| if (sysfs1->ibdev_idx == -1 && |
| ts_cmp(&sysfs1->time_created, &sysfs2->time_created, !=)) |
| return 0; |
| |
| return 1; |
| } |
| |
| /* Match every ibv_sysfs_dev in the sysfs_list to a driver and add a new entry |
| * to device_list. Once matched to a driver the entry in sysfs_list is |
| * removed. |
| */ |
| static void try_all_drivers(struct list_head *sysfs_list, |
| struct list_head *device_list, |
| unsigned int *num_devices) |
| { |
| struct verbs_sysfs_dev *sysfs_dev; |
| struct verbs_sysfs_dev *tmp; |
| struct verbs_device *vdev; |
| |
| list_for_each_safe(sysfs_list, sysfs_dev, tmp, entry) { |
| vdev = try_drivers(sysfs_dev); |
| if (vdev) { |
| list_del(&sysfs_dev->entry); |
| /* Ownership of sysfs_dev moves into vdev->sysfs */ |
| list_add(device_list, &vdev->entry); |
| (*num_devices)++; |
| } |
| } |
| } |
| |
| int ibverbs_get_device_list(struct list_head *device_list) |
| { |
| LIST_HEAD(sysfs_list); |
| struct verbs_sysfs_dev *sysfs_dev, *next_dev; |
| struct verbs_device *vdev, *tmp; |
| static int drivers_loaded; |
| unsigned int num_devices = 0; |
| int ret; |
| |
| ret = find_sysfs_devs_nl(&sysfs_list); |
| if (ret) { |
| ret = find_sysfs_devs(&sysfs_list); |
| if (ret) |
| return -ret; |
| } |
| |
| if (!list_empty(&sysfs_list)) { |
| ret = check_abi_version(); |
| if (ret) |
| return -ret; |
| } |
| |
| /* Remove entries from the sysfs_list that are already preset in the |
| * device_list, and remove entries from the device_list that are not |
| * present in the sysfs_list. |
| */ |
| list_for_each_safe(device_list, vdev, tmp, entry) { |
| struct verbs_sysfs_dev *old_sysfs = NULL; |
| |
| list_for_each(&sysfs_list, sysfs_dev, entry) { |
| if (same_sysfs_dev(vdev->sysfs, sysfs_dev)) { |
| old_sysfs = sysfs_dev; |
| break; |
| } |
| } |
| |
| if (old_sysfs) { |
| list_del(&old_sysfs->entry); |
| free(old_sysfs); |
| num_devices++; |
| } else { |
| list_del(&vdev->entry); |
| ibverbs_device_put(&vdev->device); |
| } |
| } |
| |
| try_all_drivers(&sysfs_list, device_list, &num_devices); |
| |
| if (list_empty(&sysfs_list) || drivers_loaded) |
| goto out; |
| |
| load_drivers(); |
| drivers_loaded = 1; |
| |
| try_all_drivers(&sysfs_list, device_list, &num_devices); |
| |
| out: |
| /* Anything left in sysfs_list was not assoicated with a |
| * driver. |
| */ |
| list_for_each_safe(&sysfs_list, sysfs_dev, next_dev, entry) { |
| if (getenv("IBV_SHOW_WARNINGS")) { |
| fprintf(stderr, PFX |
| "Warning: no userspace device-specific driver found for %s\n", |
| sysfs_dev->ibdev_name); |
| } |
| free(sysfs_dev); |
| } |
| |
| return num_devices; |
| } |
| |
| static void verbs_set_log_level(void) |
| { |
| char *env; |
| |
| env = getenv("VERBS_LOG_LEVEL"); |
| if (env) |
| verbs_log_level = strtol(env, NULL, 0); |
| } |
| |
| /* |
| * Fallback in case log file is not provided or can't be opened. |
| * Release mode: disable debug prints. |
| * Debug mode: Use stderr instead of a file. |
| */ |
| static void verbs_log_file_fallback(void) |
| { |
| #ifdef VERBS_DEBUG |
| verbs_log_fp = stderr; |
| #else |
| verbs_log_level = VERBS_LOG_LEVEL_NONE; |
| #endif |
| } |
| |
| static void verbs_set_log_file(void) |
| { |
| char *env; |
| |
| if (verbs_log_level == VERBS_LOG_LEVEL_NONE) |
| return; |
| |
| env = getenv("VERBS_LOG_FILE"); |
| if (!env) { |
| verbs_log_file_fallback(); |
| return; |
| } |
| |
| verbs_log_fp = fopen(env, "aw+"); |
| if (!verbs_log_fp) { |
| verbs_log_file_fallback(); |
| return; |
| } |
| } |
| |
| int ibverbs_init(void) |
| { |
| if (check_env("RDMAV_FORK_SAFE") || check_env("IBV_FORK_SAFE")) |
| if (ibv_fork_init()) |
| fprintf(stderr, PFX "Warning: fork()-safety requested " |
| "but init failed\n"); |
| |
| verbs_allow_disassociate_destroy = check_env("RDMAV_ALLOW_DISASSOC_DESTROY") |
| /* Backward compatibility for the mlx4 driver env */ |
| || check_env("MLX4_DEVICE_FATAL_CLEANUP"); |
| |
| if (!ibv_get_sysfs_path()) |
| return -errno; |
| |
| check_memlock_limit(); |
| verbs_set_log_level(); |
| verbs_set_log_file(); |
| |
| return 0; |
| } |
| |
| void ibverbs_device_hold(struct ibv_device *dev) |
| { |
| struct verbs_device *verbs_device = verbs_get_device(dev); |
| |
| atomic_fetch_add(&verbs_device->refcount, 1); |
| } |
| |
| void ibverbs_device_put(struct ibv_device *dev) |
| { |
| struct verbs_device *verbs_device = verbs_get_device(dev); |
| |
| if (atomic_fetch_sub(&verbs_device->refcount, 1) == 1) { |
| free(verbs_device->sysfs); |
| if (verbs_device->ops->uninit_device) |
| verbs_device->ops->uninit_device(verbs_device); |
| } |
| } |