| // SPDX-License-Identifier: (GPL-2.0 OR Linux-OpenIB) |
| /* Copyright (c) 2019, Mellanox Technologies. All rights reserved. See COPYING file */ |
| |
| #define _GNU_SOURCE |
| #include <stdio.h> |
| #include <stdlib.h> |
| #include <string.h> |
| #include <stdbool.h> |
| #include <errno.h> |
| #include <unistd.h> |
| #include <getopt.h> |
| #include <sys/types.h> |
| #include <dirent.h> |
| #include <syslog.h> |
| #include <rdma/rdma_netlink.h> |
| #include <netlink/netlink.h> |
| #include <netlink/msg.h> |
| #include <netlink/attr.h> |
| #include <linux/pci_regs.h> |
| #include <util/rdma_nl.h> |
| |
| /* |
| * Rename modes: |
| * NAME_FALLBACK - Try to name devices in the following order: |
| * by->onboard -> by-pci -> by-guid -> kernel |
| * NAME_KERNEL - leave name as kernel provided |
| * NAME_PCI - based on PCI/slot/function location |
| * NAME_GUID - based on node GUID |
| * NAME_ONBOARD - based on-board device index |
| * NAME_FIXED - rename the device to the fixed named in the next argument |
| * |
| * The stable names are combination of device type technology and rename mode. |
| * Infiniband - ib* |
| * RoCE - roce* |
| * iWARP - iw* |
| * OPA - opa* |
| * Default (unknown protocol) - rdma* |
| * |
| * Example: |
| * NAME_PCI |
| * pci = 0000:00:0c.4 |
| * Device type = IB |
| * mlx5_0 -> ibp0s12f4 |
| * NAME_GUID |
| * GUID = 5254:00c0:fe12:3455 |
| * Device type = RoCE |
| * mlx5_0 -> rocex525400c0fe123455 |
| * NAME_ONBOARD |
| * Index = 3 |
| * Device type = OPA |
| * hfi1_1 -> opao3 |
| */ |
| |
| struct data { |
| const char *curr; |
| char *prefix; |
| uint64_t node_guid; |
| char *name; |
| int idx; |
| }; |
| |
| static bool debug_mode; |
| #define pr_err(args...) syslog(LOG_ERR, ##args) |
| #define pr_dbg(args...) \ |
| do { \ |
| if (debug_mode) \ |
| syslog(LOG_ERR, ##args); \ |
| } while (0) |
| |
| #define ONBOARD_INDEX_MAX (16*1024-1) |
| static int by_onboard(struct data *d) |
| { |
| char *index = NULL; |
| char *acpi = NULL; |
| unsigned int o; |
| FILE *fp; |
| int ret; |
| |
| /* |
| * ACPI_DSM - device specific method for naming |
| * PCI or PCI Express device |
| */ |
| ret = asprintf(&acpi, "/sys/class/infiniband/%s/device/acpi_index", |
| d->curr); |
| if (ret < 0) |
| return -ENOMEM; |
| |
| /* SMBIOS type 41 - Onboard Devices Extended Information */ |
| ret = asprintf(&index, "/sys/class/infiniband/%s/device/index", d->curr); |
| if (ret < 0) { |
| index = NULL; |
| ret = -ENOMEM; |
| goto out; |
| } |
| |
| fp = fopen(acpi, "r"); |
| if (!fp) |
| fp = fopen(index, "r"); |
| if (!fp) { |
| pr_dbg("%s: Device is not embedded onboard\n", d->curr); |
| ret = -ENOENT; |
| goto out; |
| } |
| |
| ret = fscanf(fp, "%u", &o); |
| fclose(fp); |
| /* https://github.com/systemd/systemd/blob/master/src/udev/udev-builtin-net_id.c#L263 */ |
| if (!ret || o > ONBOARD_INDEX_MAX) { |
| pr_err("%s: Onboard index %d and ret %d\n", d->curr, o, ret); |
| ret = -ENOENT; |
| goto out; |
| } |
| |
| ret = asprintf(&d->name, "%so%u", d->prefix, o); |
| if (ret < 0) { |
| pr_err("%s: Failed to allocate name with prefix %s and onboard index %d\n", |
| d->curr, d->prefix, o); |
| ret = -ENOENT; |
| d->name = NULL; |
| goto out; |
| } |
| ret = 0; |
| out: |
| free(index); |
| free(acpi); |
| return ret; |
| } |
| |
| static int find_sun(char *devname, char *pci) |
| { |
| char bof[256], tmp[256]; |
| struct dirent *dent; |
| char *slots; |
| DIR *dir; |
| int ret; |
| |
| ret = asprintf(&slots, "%s/subsystem/slots", devname); |
| if (ret < 0) |
| return 0; |
| |
| ret = 0; |
| dir = opendir(slots); |
| if (!dir) |
| goto err_dir; |
| |
| if (sscanf(pci, "%s.%s", bof, tmp) != 2) |
| goto out; |
| |
| while ((dent = readdir(dir))) { |
| char *str, address[256]; |
| FILE *fp; |
| int i; |
| |
| if (dent->d_name[0] == '.') |
| continue; |
| i = atoi(dent->d_name); |
| if (i <= 0) |
| continue; |
| |
| ret = asprintf(&str, "%s/%s/address", slots, dent->d_name); |
| if (ret < 0) { |
| ret = 0; |
| goto out; |
| } |
| |
| fp = fopen(str, "r"); |
| free(str); |
| if (!fp) { |
| ret = 0; |
| goto out; |
| } |
| |
| ret = fscanf(fp, "%255s", address); |
| fclose(fp); |
| |
| if (ret != 1) { |
| ret = 0; |
| goto out; |
| } |
| |
| if (!strcmp(bof, address)) { |
| ret = i; |
| break; |
| } |
| } |
| out: |
| closedir(dir); |
| err_dir: |
| free(slots); |
| return ret; |
| } |
| |
| static int is_pci_multifunction(char *devname) |
| { |
| char c[64] = {}; |
| char *config; |
| FILE *fp; |
| int ret; |
| |
| ret = asprintf(&config, "%s/config", devname); |
| if (ret < 0) |
| return 0; |
| |
| fp = fopen(config, "r"); |
| free(config); |
| if (!fp) |
| return 0; |
| |
| ret = fread(c, 1, sizeof(c), fp); |
| fclose(fp); |
| if (ret != sizeof(c)) |
| return 0; |
| |
| /* bit 0-6 header type, bit 7 multi/single function device */ |
| return c[PCI_HEADER_TYPE] & 0x80; |
| } |
| |
| static int is_pci_ari_enabled(char *devname) |
| { |
| int ret, a; |
| char *ari; |
| FILE *fp; |
| |
| ret = asprintf(&ari, "%s/ari_enabled", devname); |
| if (ret < 0) |
| return 0; |
| |
| fp = fopen(ari, "r"); |
| free(ari); |
| if (!fp) |
| return 0; |
| |
| ret = fscanf(fp, "%d", &a); |
| fclose(fp); |
| return (ret) ? a == 1 : 0; |
| } |
| |
| struct pci_info { |
| char *pcidev; |
| |
| unsigned int domain; |
| unsigned int bus; |
| unsigned int slot; |
| unsigned int func; |
| unsigned int sun; |
| unsigned int vf; |
| bool valid_vf; |
| }; |
| |
| static int fill_pci_info(struct data *d, struct pci_info *p) |
| { |
| char buf[256] = {}; |
| char *pci; |
| int ret; |
| |
| ret = readlink(p->pcidev, buf, sizeof(buf)-1); |
| if (ret == -1 || ret == sizeof(buf)) |
| return -EINVAL; |
| |
| buf[ret] = 0; |
| |
| pci = basename(buf); |
| /* |
| * pci = 0000:00:0c.0 |
| */ |
| ret = sscanf(pci, "%x:%x:%x.%u", &p->domain, &p->bus, &p->slot, |
| &p->func); |
| if (ret != 4) { |
| pr_err("%s: Failed to read PCI BOF\n", d->curr); |
| return -ENOENT; |
| } |
| |
| if (is_pci_ari_enabled(p->pcidev)) { |
| /* |
| * ARI devices support up to 256 functions on a single device |
| * ("slot"), and interpret the traditional 5-bit slot and 3-bit |
| * function number as a single 8-bit function number, where the |
| * slot makes up the upper 5 bits. |
| * |
| * https://github.com/systemd/systemd/blob/master/src/udev/udev-builtin-net_id.c#L344 |
| */ |
| p->func += p->slot * 8; |
| pr_dbg("%s: This is ARI device, new PCI BOF is %04x:%02x:%02x.%u\n", |
| d->curr, p->domain, p->bus, p->slot, p->func); |
| } |
| |
| p->sun = find_sun(p->pcidev, pci); |
| |
| return 0; |
| } |
| |
| static int get_virtfn_info(struct data *d, struct pci_info *p) |
| { |
| struct pci_info vf = {}; |
| char *physfn_pcidev; |
| struct dirent *dent; |
| DIR *dir; |
| int ret; |
| |
| /* Check if this is a virtual function. */ |
| ret = asprintf(&physfn_pcidev, "%s/physfn", p->pcidev); |
| if (ret < 0) |
| return -ENOMEM; |
| |
| /* We are VF, get VF number and replace pcidev to point to PF */ |
| dir = opendir(physfn_pcidev); |
| if (!dir) { |
| /* |
| * -ENOENT means that we are already in PF |
| * and pcidev points to right PCI. |
| */ |
| ret = (errno == ENOENT) ? 0 : -ENOMEM; |
| goto err_free; |
| } |
| |
| p->valid_vf = true; |
| vf.pcidev = p->pcidev; |
| ret = fill_pci_info(d, &vf); |
| if (ret) |
| goto err_dir; |
| |
| while ((dent = readdir(dir))) { |
| const char *s = "virtfn"; |
| struct pci_info v = {}; |
| |
| if (strncmp(dent->d_name, s, strlen(s)) || |
| strlen(dent->d_name) == strlen(s)) |
| continue; |
| |
| ret = asprintf(&v.pcidev, "%s/%s", physfn_pcidev, dent->d_name); |
| if (ret < 0) { |
| ret = -ENOMEM; |
| goto err_dir; |
| } |
| ret = fill_pci_info(d, &v); |
| free(v.pcidev); |
| if (ret) { |
| ret = -ENOMEM; |
| goto err_dir; |
| } |
| if (vf.func == v.func && |
| vf.slot == v.slot) { |
| p->vf = atoi(&dent->d_name[6]); |
| break; |
| } |
| } |
| |
| p->pcidev = physfn_pcidev; |
| closedir(dir); |
| |
| return 0; |
| |
| err_dir: |
| closedir(dir); |
| err_free: |
| free(physfn_pcidev); |
| return ret; |
| } |
| |
| static int by_pci(struct data *d) |
| { |
| struct pci_info p = {}; |
| char *subsystem; |
| char buf[256] = {}; |
| char *subs; |
| int ret; |
| |
| ret = asprintf(&subsystem, "/sys/class/infiniband/%s/device/subsystem", |
| d->curr); |
| if (ret < 0) |
| return -ENOMEM; |
| |
| ret = readlink(subsystem, buf, sizeof(buf)-1); |
| if (ret == -1 || ret == sizeof(buf)) { |
| ret = -EINVAL; |
| goto out; |
| } |
| buf[ret] = 0; |
| |
| subs = basename(buf); |
| if (strcmp(subs, "pci")) { |
| /* Ball out virtual devices */ |
| pr_dbg("%s: Non-PCI device (%s) was detected\n", d->curr, subs); |
| ret = -EINVAL; |
| goto out; |
| } |
| |
| /* Real devices */ |
| ret = asprintf(&p.pcidev, "/sys/class/infiniband/%s/device", d->curr); |
| if (ret < 0) { |
| ret = -ENOMEM; |
| p.pcidev = NULL; |
| goto out; |
| } |
| |
| ret = get_virtfn_info(d, &p); |
| if (ret) |
| goto out; |
| |
| ret = fill_pci_info(d, &p); |
| if (ret) { |
| pr_err("%s: Failed to fill PCI device information\n", d->curr); |
| goto out; |
| } |
| |
| d->name = calloc(256, sizeof(char)); |
| if (!d->name) { |
| ret = -ENOMEM; |
| goto out; |
| } |
| |
| ret = sprintf(d->name, "%s", d->prefix); |
| if (ret == -1) { |
| ret = -EINVAL; |
| goto out; |
| } |
| |
| if (p.domain > 0) { |
| ret = sprintf(buf, "P%u", p.domain); |
| if (ret == -1) { |
| ret = -ENOMEM; |
| goto out; |
| } |
| strcat(d->name, buf); |
| } |
| |
| if (p.sun > 0) |
| ret = sprintf(buf, "s%u", p.sun); |
| else |
| ret = sprintf(buf, "p%us%u", p.bus, p.slot); |
| if (ret == -1) { |
| ret = -ENOMEM; |
| goto out; |
| } |
| |
| strcat(d->name, buf); |
| |
| if (p.func > 0 || is_pci_multifunction(p.pcidev)) { |
| ret = sprintf(buf, "f%u", p.func); |
| if (ret == -1) { |
| ret = -ENOMEM; |
| goto out; |
| } |
| strcat(d->name, buf); |
| |
| if (p.valid_vf) { |
| ret = sprintf(buf, "v%u", p.vf); |
| if (ret == -1) { |
| ret = -ENOMEM; |
| goto out; |
| } |
| strcat(d->name, buf); |
| } |
| } |
| ret = 0; |
| out: |
| free(p.pcidev); |
| free(subsystem); |
| if (ret) { |
| free(d->name); |
| d->name = NULL; |
| } |
| |
| return ret; |
| } |
| |
| static int by_guid(struct data *d) |
| { |
| uint16_t vp[4]; |
| int ret = -1; |
| |
| if (!d->node_guid) |
| /* virtual devices start without GUID */ |
| goto out; |
| |
| memcpy(vp, &d->node_guid, sizeof(uint64_t)); |
| ret = asprintf(&d->name, "%sx%04x%04x%04x%04x", d->prefix, vp[3], vp[2], |
| vp[1], vp[0]); |
| out: |
| if (ret == -1) { |
| d->name = NULL; |
| return -ENOMEM; |
| } |
| |
| return 0; |
| } |
| |
| static int set_fixed_name(struct data *d, char *name) |
| { |
| int ret; |
| |
| ret = asprintf(&d->name, "%s", name); |
| if (ret == -1) { |
| d->name = NULL; |
| return -ENOMEM; |
| } |
| |
| return 0; |
| } |
| |
| static int device_rename(struct nl_sock *nl, struct data *d) |
| { |
| struct nlmsghdr *hdr; |
| struct nl_msg *msg; |
| int ret = -1; |
| |
| msg = nlmsg_alloc(); |
| if (!msg) |
| return -ENOMEM; |
| |
| hdr = nlmsg_put(msg, NL_AUTO_PORT, NL_AUTO_SEQ, |
| RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_SET), |
| 0, 0); |
| if (!hdr) { |
| ret = -ENOMEM; |
| goto nla_put_failure; |
| } |
| |
| NLA_PUT_U32(msg, RDMA_NLDEV_ATTR_DEV_INDEX, d->idx); |
| NLA_PUT_STRING(msg, RDMA_NLDEV_ATTR_DEV_NAME, d->name); |
| ret = nl_send_auto(nl, msg); |
| if (ret < 0) |
| return ret; |
| nla_put_failure: |
| nlmsg_free(msg); |
| return (ret < 0) ? ret : 0; |
| } |
| |
| static int get_nldata_cb(struct nl_msg *msg, void *data) |
| { |
| struct nlattr *tb[RDMA_NLDEV_ATTR_MAX] = {}; |
| struct nlmsghdr *hdr = nlmsg_hdr(msg); |
| struct data *d = data; |
| int ret; |
| |
| ret = nlmsg_parse(hdr, 0, tb, RDMA_NLDEV_ATTR_MAX - 1, rdmanl_policy); |
| if (ret < 0) |
| return NL_STOP; |
| |
| if (!tb[RDMA_NLDEV_ATTR_DEV_NAME] || !tb[RDMA_NLDEV_ATTR_DEV_INDEX] || |
| !tb[RDMA_NLDEV_ATTR_NODE_GUID]) |
| return NL_STOP; |
| |
| ret = strcmp(d->curr, nla_get_string(tb[RDMA_NLDEV_ATTR_DEV_NAME])); |
| if (ret) |
| return NL_OK; |
| |
| if (tb[RDMA_NLDEV_ATTR_DEV_PROTOCOL]) |
| d->prefix = strdup( |
| nla_get_string(tb[RDMA_NLDEV_ATTR_DEV_PROTOCOL])); |
| if (!d->prefix) |
| ret = asprintf(&d->prefix, "rdma"); |
| if (ret < 0) |
| return NL_STOP; |
| |
| d->idx = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]); |
| d->node_guid = nla_get_u64(tb[RDMA_NLDEV_ATTR_NODE_GUID]); |
| return NL_STOP; |
| } |
| |
| enum name_policy { |
| NAME_KERNEL = 1 << 0, |
| NAME_PCI = 1 << 1, |
| NAME_GUID = 1 << 2, |
| NAME_ONBOARD = 1 << 3, |
| NAME_FIXED = 1 << 4, |
| NAME_ERROR = 1 << 8 |
| }; |
| |
| static int str2policy(const char *np) |
| { |
| if (!strcmp(np, "NAME_KERNEL")) |
| return NAME_KERNEL; |
| if (!strcmp(np, "NAME_PCI")) |
| return NAME_PCI; |
| if (!strcmp(np, "NAME_GUID")) |
| return NAME_GUID; |
| if (!strcmp(np, "NAME_ONBOARD")) |
| return NAME_ONBOARD; |
| if (!strcmp(np, "NAME_FIXED")) |
| return NAME_FIXED; |
| if (!strcmp(np, "NAME_FALLBACK")) |
| return NAME_ONBOARD | NAME_PCI; |
| return NAME_ERROR; |
| }; |
| |
| int main(int argc, char **argv) |
| { |
| struct data d = { .idx = -1 }; |
| struct nl_sock *nl; |
| int ret = -1; |
| int np, opt; |
| |
| if (argc < 3) |
| goto err; |
| |
| while ((opt = getopt(argc, argv, "v")) >= 0) { |
| switch (opt) { |
| case 'v': |
| debug_mode = true; |
| break; |
| default: |
| goto err; |
| } |
| } |
| |
| argc -= optind; |
| argv += optind; |
| |
| d.curr = argv[0]; |
| |
| np = str2policy(argv[1]); |
| if (np & NAME_ERROR) { |
| pr_err("%s: Unknown policy %s\n", d.curr, argv[1]); |
| goto err; |
| } |
| |
| if (np & NAME_FIXED && argc < 3) { |
| pr_err("%s: No name specified\n", d.curr); |
| goto err; |
| } |
| |
| pr_dbg("%s: Requested policy is %s\n", d.curr, argv[1]); |
| |
| if (np & NAME_KERNEL) { |
| pr_dbg("%s: Leave kernel names, do nothing\n", d.curr); |
| /* Do nothing */ |
| exit(0); |
| } |
| |
| nl = rdmanl_socket_alloc(); |
| if (!nl) { |
| pr_err("%s: Failed to allocate netlink socket\n", d.curr); |
| goto err; |
| } |
| |
| if (rdmanl_get_devices(nl, get_nldata_cb, &d)) { |
| pr_err("%s: Failed to connect to NETLINK_RDMA\n", d.curr); |
| goto out; |
| } |
| |
| if (d.idx == -1 || !d.prefix) { |
| pr_err("%s: Failed to get current device name and index\n", |
| d.curr); |
| goto out; |
| } |
| |
| ret = -1; |
| if (np & NAME_ONBOARD) |
| ret = by_onboard(&d); |
| if (ret && (np & NAME_PCI)) |
| ret = by_pci(&d); |
| if (ret && (np & NAME_GUID)) |
| ret = by_guid(&d); |
| if (ret && (np & NAME_FIXED)) |
| ret = set_fixed_name(&d, argv[2]); |
| if (ret) |
| goto out; |
| |
| ret = device_rename(nl, &d); |
| if (ret) { |
| pr_err("%s: Device rename to %s failed with error %d\n", d.curr, |
| d.name, ret); |
| goto out; |
| } |
| pr_dbg("%s: Successfully renamed device to be %s\n", d.curr, d.name); |
| |
| printf("%s\n", d.name); |
| free(d.name); |
| |
| out: |
| free(d.prefix); |
| nl_socket_free(nl); |
| err: |
| ret = (ret) ? 1 : 0; |
| exit(ret); |
| } |