|  | /* | 
|  | * Copyright (c) 2012 Mellanox Technologies, Inc.  All rights reserved. | 
|  | * Copyright (c) 2020 Intel Corporation.  All rights reserved. | 
|  | * | 
|  | * This software is available to you under a choice of one of two | 
|  | * licenses.  You may choose to be licensed under the terms of the GNU | 
|  | * General Public License (GPL) Version 2, available from the file | 
|  | * COPYING in the main directory of this source tree, or the | 
|  | * OpenIB.org BSD license below: | 
|  | * | 
|  | *     Redistribution and use in source and binary forms, with or | 
|  | *     without modification, are permitted provided that the following | 
|  | *     conditions are met: | 
|  | * | 
|  | *      - Redistributions of source code must retain the above | 
|  | *        copyright notice, this list of conditions and the following | 
|  | *        disclaimer. | 
|  | * | 
|  | *      - Redistributions in binary form must reproduce the above | 
|  | *        copyright notice, this list of conditions and the following | 
|  | *        disclaimer in the documentation and/or other materials | 
|  | *        provided with the distribution. | 
|  | * | 
|  | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | 
|  | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | 
|  | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | 
|  | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS | 
|  | * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN | 
|  | * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN | 
|  | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | 
|  | * SOFTWARE. | 
|  | */ | 
|  | #define _GNU_SOURCE | 
|  | #include <config.h> | 
|  |  | 
|  | #include <stdio.h> | 
|  | #include <stdlib.h> | 
|  | #include <unistd.h> | 
|  | #include <errno.h> | 
|  | #include <sys/mman.h> | 
|  | #include <pthread.h> | 
|  | #include <string.h> | 
|  | #include <sched.h> | 
|  | #include <sys/param.h> | 
|  |  | 
|  | #include <util/symver.h> | 
|  | #include <rdma/mlx5_user_ioctl_cmds.h> | 
|  |  | 
|  | #include "mlx5.h" | 
|  | #include "mlx5-abi.h" | 
|  | #include "wqe.h" | 
|  | #include "mlx5_ifc.h" | 
|  | #include "mlx5_vfio.h" | 
|  |  | 
|  | static void mlx5_free_context(struct ibv_context *ibctx); | 
|  | static bool is_mlx5_dev(struct ibv_device *device); | 
|  |  | 
|  | #ifndef CPU_OR | 
|  | #define CPU_OR(x, y, z) do {} while (0) | 
|  | #endif | 
|  |  | 
|  | #ifndef CPU_EQUAL | 
|  | #define CPU_EQUAL(x, y) 1 | 
|  | #endif | 
|  |  | 
|  | #define HCA(v, d) VERBS_PCI_MATCH(PCI_VENDOR_ID_##v, d, NULL) | 
|  | const struct verbs_match_ent mlx5_hca_table[] = { | 
|  | VERBS_DRIVER_ID(RDMA_DRIVER_MLX5), | 
|  | HCA(MELLANOX, 0x1011),	/* MT4113 Connect-IB */ | 
|  | HCA(MELLANOX, 0x1012),	/* Connect-IB Virtual Function */ | 
|  | HCA(MELLANOX, 0x1013),	/* ConnectX-4 */ | 
|  | HCA(MELLANOX, 0x1014),	/* ConnectX-4 Virtual Function */ | 
|  | HCA(MELLANOX, 0x1015),	/* ConnectX-4LX */ | 
|  | HCA(MELLANOX, 0x1016),	/* ConnectX-4LX Virtual Function */ | 
|  | HCA(MELLANOX, 0x1017),	/* ConnectX-5, PCIe 3.0 */ | 
|  | HCA(MELLANOX, 0x1018),	/* ConnectX-5 Virtual Function */ | 
|  | HCA(MELLANOX, 0x1019),    /* ConnectX-5 Ex */ | 
|  | HCA(MELLANOX, 0x101a),	/* ConnectX-5 Ex VF */ | 
|  | HCA(MELLANOX, 0x101b),    /* ConnectX-6 */ | 
|  | HCA(MELLANOX, 0x101c),	/* ConnectX-6 VF */ | 
|  | HCA(MELLANOX, 0x101d),	/* ConnectX-6 DX */ | 
|  | HCA(MELLANOX, 0x101e),	/* ConnectX family mlx5Gen Virtual Function */ | 
|  | HCA(MELLANOX, 0x101f),	/* ConnectX-6 LX */ | 
|  | HCA(MELLANOX, 0x1021),  /* ConnectX-7 */ | 
|  | HCA(MELLANOX, 0x1023),  /* ConnectX-8 */ | 
|  | HCA(MELLANOX, 0xa2d2),	/* BlueField integrated ConnectX-5 network controller */ | 
|  | HCA(MELLANOX, 0xa2d3),	/* BlueField integrated ConnectX-5 network controller VF */ | 
|  | HCA(MELLANOX, 0xa2d6),  /* BlueField-2 integrated ConnectX-6 Dx network controller */ | 
|  | HCA(MELLANOX, 0xa2dc),  /* BlueField-3 integrated ConnectX-7 network controller */ | 
|  | HCA(MELLANOX, 0xa2df),  /* BlueField-4 integrated ConnectX-8 network controller */ | 
|  | {} | 
|  | }; | 
|  |  | 
|  | uint32_t mlx5_debug_mask = 0; | 
|  | int mlx5_freeze_on_error_cqe; | 
|  |  | 
|  | static const struct verbs_context_ops mlx5_ctx_common_ops = { | 
|  | .query_port    = mlx5_query_port, | 
|  | .alloc_pd      = mlx5_alloc_pd, | 
|  | .async_event   = mlx5_async_event, | 
|  | .dealloc_pd    = mlx5_free_pd, | 
|  | .reg_mr	       = mlx5_reg_mr, | 
|  | .reg_dmabuf_mr = mlx5_reg_dmabuf_mr, | 
|  | .rereg_mr      = mlx5_rereg_mr, | 
|  | .dereg_mr      = mlx5_dereg_mr, | 
|  | .alloc_mw      = mlx5_alloc_mw, | 
|  | .dealloc_mw    = mlx5_dealloc_mw, | 
|  | .bind_mw       = mlx5_bind_mw, | 
|  | .create_cq     = mlx5_create_cq, | 
|  | .poll_cq       = mlx5_poll_cq, | 
|  | .req_notify_cq = mlx5_arm_cq, | 
|  | .cq_event      = mlx5_cq_event, | 
|  | .resize_cq     = mlx5_resize_cq, | 
|  | .destroy_cq    = mlx5_destroy_cq, | 
|  | .create_srq    = mlx5_create_srq, | 
|  | .modify_srq    = mlx5_modify_srq, | 
|  | .query_srq     = mlx5_query_srq, | 
|  | .destroy_srq   = mlx5_destroy_srq, | 
|  | .post_srq_recv = mlx5_post_srq_recv, | 
|  | .create_qp     = mlx5_create_qp, | 
|  | .query_qp      = mlx5_query_qp, | 
|  | .modify_qp     = mlx5_modify_qp, | 
|  | .destroy_qp    = mlx5_destroy_qp, | 
|  | .post_send     = mlx5_post_send, | 
|  | .post_recv     = mlx5_post_recv, | 
|  | .create_ah     = mlx5_create_ah, | 
|  | .destroy_ah    = mlx5_destroy_ah, | 
|  | .attach_mcast  = mlx5_attach_mcast, | 
|  | .detach_mcast  = mlx5_detach_mcast, | 
|  |  | 
|  | .advise_mr = mlx5_advise_mr, | 
|  | .alloc_dm = mlx5_alloc_dm, | 
|  | .alloc_parent_domain = mlx5_alloc_parent_domain, | 
|  | .alloc_td = mlx5_alloc_td, | 
|  | .attach_counters_point_flow = mlx5_attach_counters_point_flow, | 
|  | .close_xrcd = mlx5_close_xrcd, | 
|  | .create_counters = mlx5_create_counters, | 
|  | .create_cq_ex = mlx5_create_cq_ex, | 
|  | .create_flow = mlx5_create_flow, | 
|  | .create_flow_action_esp = mlx5_create_flow_action_esp, | 
|  | .create_qp_ex = mlx5_create_qp_ex, | 
|  | .create_rwq_ind_table = mlx5_create_rwq_ind_table, | 
|  | .create_srq_ex = mlx5_create_srq_ex, | 
|  | .create_wq = mlx5_create_wq, | 
|  | .dealloc_td = mlx5_dealloc_td, | 
|  | .destroy_counters = mlx5_destroy_counters, | 
|  | .destroy_flow = mlx5_destroy_flow, | 
|  | .destroy_flow_action = mlx5_destroy_flow_action, | 
|  | .destroy_rwq_ind_table = mlx5_destroy_rwq_ind_table, | 
|  | .destroy_wq = mlx5_destroy_wq, | 
|  | .free_dm = mlx5_free_dm, | 
|  | .get_srq_num = mlx5_get_srq_num, | 
|  | .import_dm = mlx5_import_dm, | 
|  | .import_mr = mlx5_import_mr, | 
|  | .import_pd = mlx5_import_pd, | 
|  | .modify_cq = mlx5_modify_cq, | 
|  | .modify_flow_action_esp = mlx5_modify_flow_action_esp, | 
|  | .modify_qp_rate_limit = mlx5_modify_qp_rate_limit, | 
|  | .modify_wq = mlx5_modify_wq, | 
|  | .open_qp = mlx5_open_qp, | 
|  | .open_xrcd = mlx5_open_xrcd, | 
|  | .post_srq_ops = mlx5_post_srq_ops, | 
|  | .query_device_ex = mlx5_query_device_ex, | 
|  | .query_ece = mlx5_query_ece, | 
|  | .query_rt_values = mlx5_query_rt_values, | 
|  | .read_counters = mlx5_read_counters, | 
|  | .reg_dm_mr = mlx5_reg_dm_mr, | 
|  | .alloc_null_mr = mlx5_alloc_null_mr, | 
|  | .free_context = mlx5_free_context, | 
|  | .set_ece = mlx5_set_ece, | 
|  | .unimport_dm = mlx5_unimport_dm, | 
|  | .unimport_mr = mlx5_unimport_mr, | 
|  | .unimport_pd = mlx5_unimport_pd, | 
|  | .query_qp_data_in_order = mlx5_query_qp_data_in_order, | 
|  | }; | 
|  |  | 
|  | static const struct verbs_context_ops mlx5_ctx_cqev1_ops = { | 
|  | .poll_cq = mlx5_poll_cq_v1, | 
|  | }; | 
|  |  | 
|  | static int read_number_from_line(const char *line, int *value) | 
|  | { | 
|  | const char *ptr; | 
|  |  | 
|  | ptr = strchr(line, ':'); | 
|  | if (!ptr) | 
|  | return 1; | 
|  |  | 
|  | ++ptr; | 
|  |  | 
|  | *value = atoi(ptr); | 
|  | return 0; | 
|  | } | 
|  | /** | 
|  | * The function looks for the first free user-index in all the | 
|  | * user-index tables. If all are used, returns -1, otherwise | 
|  | * a valid user-index. | 
|  | * In case the reference count of the table is zero, it means the | 
|  | * table is not in use and wasn't allocated yet, therefore the | 
|  | * mlx5_store_uidx allocates the table, and increment the reference | 
|  | * count on the table. | 
|  | */ | 
|  | static int32_t get_free_uidx(struct mlx5_context *ctx) | 
|  | { | 
|  | int32_t tind; | 
|  | int32_t i; | 
|  |  | 
|  | for (tind = 0; tind < MLX5_UIDX_TABLE_SIZE; tind++) { | 
|  | if (ctx->uidx_table[tind].refcnt < MLX5_UIDX_TABLE_MASK) | 
|  | break; | 
|  | } | 
|  |  | 
|  | if (tind == MLX5_UIDX_TABLE_SIZE) | 
|  | return -1; | 
|  |  | 
|  | if (!ctx->uidx_table[tind].refcnt) | 
|  | return tind << MLX5_UIDX_TABLE_SHIFT; | 
|  |  | 
|  | for (i = 0; i < MLX5_UIDX_TABLE_MASK + 1; i++) { | 
|  | if (!ctx->uidx_table[tind].table[i]) | 
|  | break; | 
|  | } | 
|  |  | 
|  | return (tind << MLX5_UIDX_TABLE_SHIFT) | i; | 
|  | } | 
|  |  | 
|  | int mlx5_cmd_status_to_err(uint8_t status) | 
|  | { | 
|  | switch (status) { | 
|  | case MLX5_CMD_STAT_OK:				return 0; | 
|  | case MLX5_CMD_STAT_INT_ERR:			return EIO; | 
|  | case MLX5_CMD_STAT_BAD_OP_ERR:			return EINVAL; | 
|  | case MLX5_CMD_STAT_BAD_PARAM_ERR:		return EINVAL; | 
|  | case MLX5_CMD_STAT_BAD_SYS_STATE_ERR:		return EIO; | 
|  | case MLX5_CMD_STAT_BAD_RES_ERR:			return EINVAL; | 
|  | case MLX5_CMD_STAT_RES_BUSY:			return EBUSY; | 
|  | case MLX5_CMD_STAT_LIM_ERR:			return ENOMEM; | 
|  | case MLX5_CMD_STAT_BAD_RES_STATE_ERR:		return EINVAL; | 
|  | case MLX5_CMD_STAT_IX_ERR:			return EINVAL; | 
|  | case MLX5_CMD_STAT_NO_RES_ERR:			return EAGAIN; | 
|  | case MLX5_CMD_STAT_BAD_INP_LEN_ERR:		return EIO; | 
|  | case MLX5_CMD_STAT_BAD_OUTP_LEN_ERR:		return EIO; | 
|  | case MLX5_CMD_STAT_BAD_QP_STATE_ERR:		return EINVAL; | 
|  | case MLX5_CMD_STAT_BAD_PKT_ERR:			return EINVAL; | 
|  | case MLX5_CMD_STAT_BAD_SIZE_OUTS_CQES_ERR:	return EINVAL; | 
|  | default:					return EIO; | 
|  | } | 
|  | } | 
|  |  | 
|  | int mlx5_get_cmd_status_err(int err, void *out) | 
|  | { | 
|  | if (err == EREMOTEIO) | 
|  | err = mlx5_cmd_status_to_err(DEVX_GET(mbox_out, out, status)); | 
|  | return err; | 
|  | } | 
|  |  | 
|  | int32_t mlx5_store_uidx(struct mlx5_context *ctx, void *rsc) | 
|  | { | 
|  | int32_t tind; | 
|  | int32_t ret = -1; | 
|  | int32_t uidx; | 
|  |  | 
|  | pthread_mutex_lock(&ctx->uidx_table_mutex); | 
|  | uidx = get_free_uidx(ctx); | 
|  | if (uidx < 0) | 
|  | goto out; | 
|  |  | 
|  | tind = uidx >> MLX5_UIDX_TABLE_SHIFT; | 
|  |  | 
|  | if (!ctx->uidx_table[tind].refcnt) { | 
|  | ctx->uidx_table[tind].table = calloc(MLX5_UIDX_TABLE_MASK + 1, | 
|  | sizeof(struct mlx5_resource *)); | 
|  | if (!ctx->uidx_table[tind].table) | 
|  | goto out; | 
|  | } | 
|  |  | 
|  | ++ctx->uidx_table[tind].refcnt; | 
|  | ctx->uidx_table[tind].table[uidx & MLX5_UIDX_TABLE_MASK] = rsc; | 
|  | ret = uidx; | 
|  |  | 
|  | out: | 
|  | pthread_mutex_unlock(&ctx->uidx_table_mutex); | 
|  | return ret; | 
|  | } | 
|  |  | 
|  | void mlx5_clear_uidx(struct mlx5_context *ctx, uint32_t uidx) | 
|  | { | 
|  | int tind = uidx >> MLX5_UIDX_TABLE_SHIFT; | 
|  |  | 
|  | pthread_mutex_lock(&ctx->uidx_table_mutex); | 
|  |  | 
|  | if (!--ctx->uidx_table[tind].refcnt) | 
|  | free(ctx->uidx_table[tind].table); | 
|  | else | 
|  | ctx->uidx_table[tind].table[uidx & MLX5_UIDX_TABLE_MASK] = NULL; | 
|  |  | 
|  | pthread_mutex_unlock(&ctx->uidx_table_mutex); | 
|  | } | 
|  |  | 
|  | struct mlx5_mkey *mlx5_find_mkey(struct mlx5_context *ctx, uint32_t mkey) | 
|  | { | 
|  | int tind = mkey >> MLX5_MKEY_TABLE_SHIFT; | 
|  |  | 
|  | if (ctx->mkey_table[tind].refcnt) | 
|  | return ctx->mkey_table[tind].table[mkey & MLX5_MKEY_TABLE_MASK]; | 
|  | else | 
|  | return NULL; | 
|  | } | 
|  |  | 
|  | int mlx5_store_mkey(struct mlx5_context *ctx, uint32_t mkey, | 
|  | struct mlx5_mkey *mlx5_mkey) | 
|  | { | 
|  | int tind = mkey >> MLX5_MKEY_TABLE_SHIFT; | 
|  | int ret = 0; | 
|  |  | 
|  | pthread_mutex_lock(&ctx->mkey_table_mutex); | 
|  |  | 
|  | if (!ctx->mkey_table[tind].refcnt) { | 
|  | ctx->mkey_table[tind].table = calloc(MLX5_MKEY_TABLE_MASK + 1, | 
|  | sizeof(struct mlx5_mkey *)); | 
|  | if (!ctx->mkey_table[tind].table) { | 
|  | ret = -1; | 
|  | goto out; | 
|  | } | 
|  | } | 
|  |  | 
|  | ++ctx->mkey_table[tind].refcnt; | 
|  | ctx->mkey_table[tind].table[mkey & MLX5_MKEY_TABLE_MASK] = mlx5_mkey; | 
|  |  | 
|  | out: | 
|  | pthread_mutex_unlock(&ctx->mkey_table_mutex); | 
|  | return ret; | 
|  | } | 
|  |  | 
|  | void mlx5_clear_mkey(struct mlx5_context *ctx, uint32_t mkey) | 
|  | { | 
|  | int tind = mkey >> MLX5_MKEY_TABLE_SHIFT; | 
|  |  | 
|  | pthread_mutex_lock(&ctx->mkey_table_mutex); | 
|  |  | 
|  | if (!--ctx->mkey_table[tind].refcnt) | 
|  | free(ctx->mkey_table[tind].table); | 
|  | else | 
|  | ctx->mkey_table[tind].table[mkey & MLX5_MKEY_TABLE_MASK] = NULL; | 
|  |  | 
|  | pthread_mutex_unlock(&ctx->mkey_table_mutex); | 
|  | } | 
|  |  | 
|  | struct mlx5_psv *mlx5_create_psv(struct ibv_pd *pd) | 
|  | { | 
|  | uint32_t out[DEVX_ST_SZ_DW(create_psv_out)] = {}; | 
|  | uint32_t in[DEVX_ST_SZ_DW(create_psv_in)] = {}; | 
|  | struct mlx5_psv *psv; | 
|  |  | 
|  | psv = calloc(1, sizeof(*psv)); | 
|  | if (!psv) { | 
|  | errno = ENOMEM; | 
|  | return NULL; | 
|  | } | 
|  |  | 
|  | DEVX_SET(create_psv_in, in, opcode, MLX5_CMD_OP_CREATE_PSV); | 
|  | DEVX_SET(create_psv_in, in, pd, to_mpd(pd)->pdn); | 
|  | DEVX_SET(create_psv_in, in, num_psv, 1); | 
|  |  | 
|  | psv->devx_obj = mlx5dv_devx_obj_create(pd->context, in, sizeof(in), | 
|  | out, sizeof(out)); | 
|  | if (!psv->devx_obj) { | 
|  | errno = mlx5_get_cmd_status_err(errno, out); | 
|  | goto err_free_psv; | 
|  | } | 
|  |  | 
|  | psv->index = DEVX_GET(create_psv_out, out, psv0_index); | 
|  |  | 
|  | return psv; | 
|  | err_free_psv: | 
|  | free(psv); | 
|  | return NULL; | 
|  | } | 
|  |  | 
|  | int mlx5_destroy_psv(struct mlx5_psv *psv) | 
|  | { | 
|  | int ret; | 
|  |  | 
|  | ret = mlx5dv_devx_obj_destroy(psv->devx_obj); | 
|  | if (!ret) | 
|  | free(psv); | 
|  |  | 
|  | return ret; | 
|  | } | 
|  |  | 
|  | static int mlx5_is_sandy_bridge(int *num_cores) | 
|  | { | 
|  | char line[128]; | 
|  | FILE *fd; | 
|  | int rc = 0; | 
|  | int cur_cpu_family = -1; | 
|  | int cur_cpu_model = -1; | 
|  |  | 
|  | fd = fopen("/proc/cpuinfo", "r"); | 
|  | if (!fd) | 
|  | return 0; | 
|  |  | 
|  | *num_cores = 0; | 
|  |  | 
|  | while (fgets(line, 128, fd)) { | 
|  | int value; | 
|  |  | 
|  | /* if this is information on new processor */ | 
|  | if (!strncmp(line, "processor", 9)) { | 
|  | ++*num_cores; | 
|  |  | 
|  | cur_cpu_family = -1; | 
|  | cur_cpu_model  = -1; | 
|  | } else if (!strncmp(line, "cpu family", 10)) { | 
|  | if ((cur_cpu_family < 0) && (!read_number_from_line(line, &value))) | 
|  | cur_cpu_family = value; | 
|  | } else if (!strncmp(line, "model", 5)) { | 
|  | if ((cur_cpu_model < 0) && (!read_number_from_line(line, &value))) | 
|  | cur_cpu_model = value; | 
|  | } | 
|  |  | 
|  | /* if this is a Sandy Bridge CPU */ | 
|  | if ((cur_cpu_family == 6) && | 
|  | (cur_cpu_model == 0x2A || (cur_cpu_model == 0x2D) )) | 
|  | rc = 1; | 
|  | } | 
|  |  | 
|  | fclose(fd); | 
|  | return rc; | 
|  | } | 
|  |  | 
|  | /* | 
|  | man cpuset | 
|  |  | 
|  | This format displays each 32-bit word in hexadecimal (using ASCII characters "0" - "9" and "a" - "f"); words | 
|  | are filled with leading zeros, if required. For masks longer than one word, a comma separator is used between | 
|  | words. Words are displayed in big-endian order, which has the most significant bit first. The hex digits | 
|  | within a word are also in big-endian order. | 
|  |  | 
|  | The number of 32-bit words displayed is the minimum number needed to display all bits of the bitmask, based on | 
|  | the size of the bitmask. | 
|  |  | 
|  | Examples of the Mask Format: | 
|  |  | 
|  | 00000001                        # just bit 0 set | 
|  | 40000000,00000000,00000000      # just bit 94 set | 
|  | 000000ff,00000000               # bits 32-39 set | 
|  | 00000000,000E3862               # 1,5,6,11-13,17-19 set | 
|  |  | 
|  | A mask with bits 0, 1, 2, 4, 8, 16, 32, and 64 set displays as: | 
|  |  | 
|  | 00000001,00000001,00010117 | 
|  |  | 
|  | The first "1" is for bit 64, the second for bit 32, the third for bit 16, the fourth for bit 8, the fifth for | 
|  | bit 4, and the "7" is for bits 2, 1, and 0. | 
|  | */ | 
|  | static void mlx5_local_cpu_set(struct ibv_device *ibdev, struct mlx5_context *mctx, | 
|  | cpu_set_t *cpu_set) | 
|  | { | 
|  | char *p, buf[1024] = {}; | 
|  | char *env_value; | 
|  | uint32_t word; | 
|  | int i, k; | 
|  |  | 
|  | env_value = getenv("MLX5_LOCAL_CPUS"); | 
|  | if (env_value) | 
|  | strncpy(buf, env_value, sizeof(buf) - 1); | 
|  | else { | 
|  | char fname[MAXPATHLEN]; | 
|  | FILE *fp; | 
|  |  | 
|  | snprintf(fname, MAXPATHLEN, "/sys/class/infiniband/%s/device/local_cpus", | 
|  | ibv_get_device_name(ibdev)); | 
|  |  | 
|  | fp = fopen(fname, "r"); | 
|  | if (!fp) { | 
|  | mlx5_err(mctx->dbg_fp, PFX "Warning: can not get local cpu set: failed to open %s\n", fname); | 
|  | return; | 
|  | } | 
|  | if (!fgets(buf, sizeof(buf), fp)) { | 
|  | mlx5_err(mctx->dbg_fp, PFX "Warning: can not get local cpu set: failed to read cpu mask\n"); | 
|  | fclose(fp); | 
|  | return; | 
|  | } | 
|  | fclose(fp); | 
|  | } | 
|  |  | 
|  | p = strrchr(buf, ','); | 
|  | if (!p) | 
|  | p = buf; | 
|  |  | 
|  | i = 0; | 
|  | do { | 
|  | if (*p == ',') { | 
|  | *p = 0; | 
|  | p ++; | 
|  | } | 
|  |  | 
|  | word = strtoul(p, NULL, 16); | 
|  |  | 
|  | for (k = 0; word; ++k, word >>= 1) | 
|  | if (word & 1) | 
|  | CPU_SET(k+i, cpu_set); | 
|  |  | 
|  | if (p == buf) | 
|  | break; | 
|  |  | 
|  | p = strrchr(buf, ','); | 
|  | if (!p) | 
|  | p = buf; | 
|  |  | 
|  | i += 32; | 
|  | } while (i < CPU_SETSIZE); | 
|  | } | 
|  |  | 
|  | static int mlx5_enable_sandy_bridge_fix(struct ibv_device *ibdev, struct mlx5_context *mctx) | 
|  | { | 
|  | cpu_set_t my_cpus, dev_local_cpus, result_set; | 
|  | int stall_enable; | 
|  | int ret; | 
|  | int num_cores; | 
|  |  | 
|  | if (!mlx5_is_sandy_bridge(&num_cores)) | 
|  | return 0; | 
|  |  | 
|  | /* by default enable stall on sandy bridge arch */ | 
|  | stall_enable = 1; | 
|  |  | 
|  | /* | 
|  | * check if app is bound to cpu set that is inside | 
|  | * of device local cpu set. Disable stalling if true | 
|  | */ | 
|  |  | 
|  | /* use static cpu set - up to CPU_SETSIZE (1024) cpus/node */ | 
|  | CPU_ZERO(&my_cpus); | 
|  | CPU_ZERO(&dev_local_cpus); | 
|  | CPU_ZERO(&result_set); | 
|  | ret = sched_getaffinity(0, sizeof(my_cpus), &my_cpus); | 
|  | if (ret == -1) { | 
|  | if (errno == EINVAL) | 
|  | mlx5_err(mctx->dbg_fp, PFX "Warning: my cpu set is too small\n"); | 
|  | else | 
|  | mlx5_err(mctx->dbg_fp, PFX "Warning: failed to get my cpu set\n"); | 
|  | goto out; | 
|  | } | 
|  |  | 
|  | /* get device local cpu set */ | 
|  | mlx5_local_cpu_set(ibdev, mctx, &dev_local_cpus); | 
|  |  | 
|  | /* check if my cpu set is in dev cpu */ | 
|  | CPU_OR(&result_set, &my_cpus, &dev_local_cpus); | 
|  | stall_enable = CPU_EQUAL(&result_set, &dev_local_cpus) ? 0 : 1; | 
|  |  | 
|  | out: | 
|  | return stall_enable; | 
|  | } | 
|  |  | 
|  | static void mlx5_read_env(struct ibv_device *ibdev, struct mlx5_context *ctx) | 
|  | { | 
|  | char *env_value; | 
|  |  | 
|  | env_value = getenv("MLX5_STALL_CQ_POLL"); | 
|  | if (env_value) | 
|  | /* check if cq stall is enforced by user */ | 
|  | ctx->stall_enable = (strcmp(env_value, "0")) ? 1 : 0; | 
|  | else | 
|  | /* autodetect if we need to do cq polling */ | 
|  | ctx->stall_enable = mlx5_enable_sandy_bridge_fix(ibdev, ctx); | 
|  |  | 
|  | env_value = getenv("MLX5_STALL_NUM_LOOP"); | 
|  | if (env_value) | 
|  | mlx5_stall_num_loop = atoi(env_value); | 
|  |  | 
|  | env_value = getenv("MLX5_STALL_CQ_POLL_MIN"); | 
|  | if (env_value) | 
|  | mlx5_stall_cq_poll_min = atoi(env_value); | 
|  |  | 
|  | env_value = getenv("MLX5_STALL_CQ_POLL_MAX"); | 
|  | if (env_value) | 
|  | mlx5_stall_cq_poll_max = atoi(env_value); | 
|  |  | 
|  | env_value = getenv("MLX5_STALL_CQ_INC_STEP"); | 
|  | if (env_value) | 
|  | mlx5_stall_cq_inc_step = atoi(env_value); | 
|  |  | 
|  | env_value = getenv("MLX5_STALL_CQ_DEC_STEP"); | 
|  | if (env_value) | 
|  | mlx5_stall_cq_dec_step = atoi(env_value); | 
|  |  | 
|  | ctx->stall_adaptive_enable = 0; | 
|  | ctx->stall_cycles = 0; | 
|  |  | 
|  | if (mlx5_stall_num_loop < 0) { | 
|  | ctx->stall_adaptive_enable = 1; | 
|  | ctx->stall_cycles = mlx5_stall_cq_poll_min; | 
|  | } | 
|  |  | 
|  | } | 
|  |  | 
|  | static int get_total_uuars(int page_size) | 
|  | { | 
|  | int size = MLX5_DEF_TOT_UUARS; | 
|  | int uuars_in_page; | 
|  | char *env; | 
|  |  | 
|  | env = getenv("MLX5_TOTAL_UUARS"); | 
|  | if (env) | 
|  | size = atoi(env); | 
|  |  | 
|  | if (size < 1) | 
|  | return -EINVAL; | 
|  |  | 
|  | uuars_in_page = page_size / MLX5_ADAPTER_PAGE_SIZE * MLX5_NUM_NON_FP_BFREGS_PER_UAR; | 
|  | size = max(uuars_in_page, size); | 
|  | size = align(size, MLX5_NUM_NON_FP_BFREGS_PER_UAR); | 
|  | if (size > MLX5_MAX_BFREGS) | 
|  | return -ENOMEM; | 
|  |  | 
|  | return size; | 
|  | } | 
|  |  | 
|  | void mlx5_open_debug_file(FILE **dbg_fp) | 
|  | { | 
|  | char *env; | 
|  | FILE *default_dbg_fp = NULL; | 
|  |  | 
|  | #ifdef MLX5_DEBUG | 
|  | default_dbg_fp = stderr; | 
|  | #endif | 
|  |  | 
|  | env = getenv("MLX5_DEBUG_FILE"); | 
|  | if (!env) { | 
|  | *dbg_fp = default_dbg_fp; | 
|  | return; | 
|  | } | 
|  |  | 
|  | *dbg_fp = fopen(env, "aw+"); | 
|  | if (!*dbg_fp) { | 
|  | *dbg_fp = default_dbg_fp; | 
|  | mlx5_err(*dbg_fp, "Failed opening debug file %s\n", env); | 
|  | return; | 
|  | } | 
|  | } | 
|  |  | 
|  | void mlx5_close_debug_file(FILE *dbg_fp) | 
|  | { | 
|  | if (dbg_fp && dbg_fp != stderr) | 
|  | fclose(dbg_fp); | 
|  | } | 
|  |  | 
|  | void mlx5_set_debug_mask(void) | 
|  | { | 
|  | char *env; | 
|  |  | 
|  | env = getenv("MLX5_DEBUG_MASK"); | 
|  | if (env) | 
|  | mlx5_debug_mask = strtol(env, NULL, 0); | 
|  | } | 
|  |  | 
|  | static void set_freeze_on_error(void) | 
|  | { | 
|  | char *env; | 
|  |  | 
|  | env = getenv("MLX5_FREEZE_ON_ERROR_CQE"); | 
|  | if (env) | 
|  | mlx5_freeze_on_error_cqe = strtol(env, NULL, 0); | 
|  | } | 
|  |  | 
|  | static int get_always_bf(void) | 
|  | { | 
|  | char *env; | 
|  |  | 
|  | env = getenv("MLX5_POST_SEND_PREFER_BF"); | 
|  | if (!env) | 
|  | return 1; | 
|  |  | 
|  | return strcmp(env, "0") ? 1 : 0; | 
|  | } | 
|  |  | 
|  | static int get_shut_up_bf(void) | 
|  | { | 
|  | char *env; | 
|  |  | 
|  | env = getenv("MLX5_SHUT_UP_BF"); | 
|  | if (!env) | 
|  | return 0; | 
|  |  | 
|  | return strcmp(env, "0") ? 1 : 0; | 
|  | } | 
|  |  | 
|  | static int get_num_low_lat_uuars(int tot_uuars) | 
|  | { | 
|  | char *env; | 
|  | int num = 4; | 
|  |  | 
|  | env = getenv("MLX5_NUM_LOW_LAT_UUARS"); | 
|  | if (env) | 
|  | num = atoi(env); | 
|  |  | 
|  | if (num < 0) | 
|  | return -EINVAL; | 
|  |  | 
|  | num = max(num, tot_uuars - MLX5_MED_BFREGS_TSHOLD); | 
|  | return num; | 
|  | } | 
|  |  | 
|  | /* The library allocates an array of uuar contexts. The one in index zero does | 
|  | * not to execersize odd/even policy so it can avoid a lock but it may not use | 
|  | * blue flame. The upper ones, low_lat_uuars can use blue flame with no lock | 
|  | * since they are assigned to one QP only. The rest can use blue flame but since | 
|  | * they are shared they need a lock | 
|  | */ | 
|  | static int need_uuar_lock(struct mlx5_context *ctx, int uuarn) | 
|  | { | 
|  | int i; | 
|  |  | 
|  | if (uuarn == 0 || mlx5_single_threaded) | 
|  | return 0; | 
|  |  | 
|  | i = (uuarn / 2) + (uuarn % 2); | 
|  | if (i >= ctx->tot_uuars - ctx->low_lat_uuars) | 
|  | return 0; | 
|  |  | 
|  | return 1; | 
|  | } | 
|  |  | 
|  | static int single_threaded_app(void) | 
|  | { | 
|  |  | 
|  | char *env; | 
|  |  | 
|  | env = getenv("MLX5_SINGLE_THREADED"); | 
|  | if (env) | 
|  | return strcmp(env, "1") ? 0 : 1; | 
|  |  | 
|  | return 0; | 
|  | } | 
|  |  | 
|  | static int mlx5_cmd_get_context(struct mlx5_context *context, | 
|  | struct mlx5_alloc_ucontext *req, | 
|  | size_t req_len, | 
|  | struct mlx5_alloc_ucontext_resp *resp, | 
|  | size_t resp_len) | 
|  | { | 
|  | struct verbs_context *verbs_ctx = &context->ibv_ctx; | 
|  |  | 
|  | if (!ibv_cmd_get_context(verbs_ctx, &req->ibv_cmd, | 
|  | req_len, &resp->ibv_resp, resp_len)) | 
|  | return 0; | 
|  |  | 
|  | /* The ibv_cmd_get_context fails in older kernels when passing | 
|  | * a request length that the kernel doesn't know. | 
|  | * To avoid breaking compatibility of new libmlx5 and older | 
|  | * kernels, when ibv_cmd_get_context fails with the full | 
|  | * request length, we try once again with the legacy length. | 
|  | * We repeat this process while reducing requested size based | 
|  | * on the feature input size. To avoid this in the future, we | 
|  | * will remove the check in kernel that requires fields unknown | 
|  | * to the kernel to be cleared. This will require that any new | 
|  | * feature that involves extending struct mlx5_alloc_ucontext | 
|  | * will be accompanied by an indication in the form of one or | 
|  | * more fields in struct mlx5_alloc_ucontext_resp. If the | 
|  | * response value can be interpreted as feature not supported | 
|  | * when the returned value is zero, this will suffice to | 
|  | * indicate to the library that the request was ignored by the | 
|  | * kernel, either because it is unaware or because it decided | 
|  | * to do so. If zero is a valid response, we will add a new | 
|  | * field that indicates whether the request was handled. | 
|  | */ | 
|  | if (!ibv_cmd_get_context(verbs_ctx, &req->ibv_cmd, | 
|  | offsetof(struct mlx5_alloc_ucontext, lib_caps), | 
|  | &resp->ibv_resp, resp_len)) | 
|  | return 0; | 
|  |  | 
|  | return ibv_cmd_get_context(verbs_ctx, &req->ibv_cmd, | 
|  | offsetof(struct mlx5_alloc_ucontext, | 
|  | max_cqe_version), | 
|  | &resp->ibv_resp, resp_len); | 
|  | } | 
|  |  | 
|  | static int mlx5_map_internal_clock(struct mlx5_device *mdev, | 
|  | struct ibv_context *ibv_ctx) | 
|  | { | 
|  | struct mlx5_context *context = to_mctx(ibv_ctx); | 
|  | void *hca_clock_page; | 
|  | off_t offset = 0; | 
|  |  | 
|  | set_command(MLX5_IB_MMAP_CORE_CLOCK, &offset); | 
|  | hca_clock_page = mmap(NULL, mdev->page_size, | 
|  | PROT_READ, MAP_SHARED, ibv_ctx->cmd_fd, | 
|  | mdev->page_size * offset); | 
|  |  | 
|  | if (hca_clock_page == MAP_FAILED) { | 
|  | mlx5_err(context->dbg_fp, PFX | 
|  | "Warning: Timestamp available,\n" | 
|  | "but failed to mmap() hca core clock page.\n"); | 
|  | return -1; | 
|  | } | 
|  |  | 
|  | context->hca_core_clock = hca_clock_page + | 
|  | (context->core_clock.offset & (mdev->page_size - 1)); | 
|  | return 0; | 
|  | } | 
|  |  | 
|  | static void mlx5_map_clock_info(struct mlx5_device *mdev, | 
|  | struct ibv_context *ibv_ctx) | 
|  | { | 
|  | struct mlx5_context *context = to_mctx(ibv_ctx); | 
|  | void *clock_info_page; | 
|  | off_t offset = 0; | 
|  |  | 
|  | set_command(MLX5_IB_MMAP_CLOCK_INFO, &offset); | 
|  | set_index(MLX5_IB_CLOCK_INFO_V1, &offset); | 
|  | clock_info_page = mmap(NULL, mdev->page_size, | 
|  | PROT_READ, MAP_SHARED, ibv_ctx->cmd_fd, | 
|  | offset * mdev->page_size); | 
|  |  | 
|  | if (clock_info_page != MAP_FAILED) | 
|  | context->clock_info_page = clock_info_page; | 
|  | } | 
|  |  | 
|  | static uint32_t get_dc_odp_caps(struct ibv_context *ctx) | 
|  | { | 
|  | uint32_t in[DEVX_ST_SZ_DW(query_hca_cap_in)] = {}; | 
|  | uint32_t out[DEVX_ST_SZ_DW(query_hca_cap_out)] = {}; | 
|  | uint16_t opmod = (MLX5_CAP_ODP << 1) | HCA_CAP_OPMOD_GET_CUR; | 
|  | uint32_t ret; | 
|  |  | 
|  | DEVX_SET(query_hca_cap_in, in, opcode, MLX5_CMD_OP_QUERY_HCA_CAP); | 
|  | DEVX_SET(query_hca_cap_in, in, op_mod, opmod); | 
|  |  | 
|  | ret = mlx5dv_devx_general_cmd(ctx, in, sizeof(in), out, sizeof(out)); | 
|  | if (ret) | 
|  | return 0; | 
|  |  | 
|  | if (DEVX_GET(query_hca_cap_out, out, | 
|  | capability.odp_cap.dc_odp_caps.send)) | 
|  | ret |= IBV_ODP_SUPPORT_SEND; | 
|  | if (DEVX_GET(query_hca_cap_out, out, | 
|  | capability.odp_cap.dc_odp_caps.receive)) | 
|  | ret |= IBV_ODP_SUPPORT_RECV; | 
|  | if (DEVX_GET(query_hca_cap_out, out, | 
|  | capability.odp_cap.dc_odp_caps.write)) | 
|  | ret |= IBV_ODP_SUPPORT_WRITE; | 
|  | if (DEVX_GET(query_hca_cap_out, out, | 
|  | capability.odp_cap.dc_odp_caps.read)) | 
|  | ret |= IBV_ODP_SUPPORT_READ; | 
|  | if (DEVX_GET(query_hca_cap_out, out, | 
|  | capability.odp_cap.dc_odp_caps.atomic)) | 
|  | ret |= IBV_ODP_SUPPORT_ATOMIC; | 
|  | if (DEVX_GET(query_hca_cap_out, out, | 
|  | capability.odp_cap.dc_odp_caps.srq_receive)) | 
|  | ret |= IBV_ODP_SUPPORT_SRQ_RECV; | 
|  |  | 
|  | return ret; | 
|  | } | 
|  |  | 
|  | static int _mlx5dv_query_device(struct ibv_context *ctx_in, | 
|  | struct mlx5dv_context *attrs_out) | 
|  | { | 
|  | struct mlx5_context *mctx = to_mctx(ctx_in); | 
|  | uint64_t comp_mask_out = 0; | 
|  |  | 
|  | attrs_out->version   = 0; | 
|  | attrs_out->flags     = 0; | 
|  |  | 
|  | if (mctx->cqe_version == MLX5_CQE_VERSION_V1) | 
|  | attrs_out->flags |= MLX5DV_CONTEXT_FLAGS_CQE_V1; | 
|  |  | 
|  | if (mctx->vendor_cap_flags & MLX5_VENDOR_CAP_FLAGS_MPW_ALLOWED) | 
|  | attrs_out->flags |= MLX5DV_CONTEXT_FLAGS_MPW_ALLOWED; | 
|  |  | 
|  | if (mctx->vendor_cap_flags & MLX5_VENDOR_CAP_FLAGS_CQE_128B_COMP) | 
|  | attrs_out->flags |= MLX5DV_CONTEXT_FLAGS_CQE_128B_COMP; | 
|  |  | 
|  | if (mctx->vendor_cap_flags & MLX5_VENDOR_CAP_FLAGS_CQE_128B_PAD) | 
|  | attrs_out->flags |= MLX5DV_CONTEXT_FLAGS_CQE_128B_PAD; | 
|  |  | 
|  | if (mctx->flags & MLX5_CTX_FLAGS_REAL_TIME_TS_SUPPORTED) | 
|  | attrs_out->flags |= MLX5DV_CONTEXT_FLAGS_REAL_TIME_TS; | 
|  |  | 
|  | if (attrs_out->comp_mask & MLX5DV_CONTEXT_MASK_CQE_COMPRESION) { | 
|  | attrs_out->cqe_comp_caps = mctx->cqe_comp_caps; | 
|  | comp_mask_out |= MLX5DV_CONTEXT_MASK_CQE_COMPRESION; | 
|  | } | 
|  |  | 
|  | if (mctx->vendor_cap_flags & MLX5_VENDOR_CAP_FLAGS_ENHANCED_MPW) | 
|  | attrs_out->flags |= MLX5DV_CONTEXT_FLAGS_ENHANCED_MPW; | 
|  |  | 
|  | if (mctx->vendor_cap_flags & | 
|  | MLX5_VENDOR_CAP_FLAGS_PACKET_BASED_CREDIT_MODE) | 
|  | attrs_out->flags |= MLX5DV_CONTEXT_FLAGS_PACKET_BASED_CREDIT_MODE; | 
|  |  | 
|  | if (attrs_out->comp_mask & MLX5DV_CONTEXT_MASK_SWP) { | 
|  | attrs_out->sw_parsing_caps = mctx->sw_parsing_caps; | 
|  | comp_mask_out |= MLX5DV_CONTEXT_MASK_SWP; | 
|  | } | 
|  |  | 
|  | if (attrs_out->comp_mask & MLX5DV_CONTEXT_MASK_STRIDING_RQ) { | 
|  | attrs_out->striding_rq_caps = mctx->striding_rq_caps; | 
|  | comp_mask_out |= MLX5DV_CONTEXT_MASK_STRIDING_RQ; | 
|  | } | 
|  |  | 
|  | if (attrs_out->comp_mask & MLX5DV_CONTEXT_MASK_TUNNEL_OFFLOADS) { | 
|  | attrs_out->tunnel_offloads_caps = mctx->tunnel_offloads_caps; | 
|  | comp_mask_out |= MLX5DV_CONTEXT_MASK_TUNNEL_OFFLOADS; | 
|  | } | 
|  |  | 
|  | if (attrs_out->comp_mask & MLX5DV_CONTEXT_MASK_DCI_STREAMS) { | 
|  | attrs_out->dci_streams_caps = mctx->dci_streams_caps; | 
|  | comp_mask_out |= MLX5DV_CONTEXT_MASK_DCI_STREAMS; | 
|  | } | 
|  |  | 
|  | if (attrs_out->comp_mask & MLX5DV_CONTEXT_MASK_DYN_BFREGS) { | 
|  | attrs_out->max_dynamic_bfregs = mctx->num_dyn_bfregs; | 
|  | comp_mask_out |= MLX5DV_CONTEXT_MASK_DYN_BFREGS; | 
|  | } | 
|  |  | 
|  | if (attrs_out->comp_mask & MLX5DV_CONTEXT_MASK_CLOCK_INFO_UPDATE) { | 
|  | if (mctx->clock_info_page) { | 
|  | attrs_out->max_clock_info_update_nsec = | 
|  | mctx->clock_info_page->overflow_period; | 
|  | comp_mask_out |= MLX5DV_CONTEXT_MASK_CLOCK_INFO_UPDATE; | 
|  | } | 
|  | } | 
|  |  | 
|  | if (attrs_out->comp_mask & MLX5DV_CONTEXT_MASK_FLOW_ACTION_FLAGS) { | 
|  | attrs_out->flow_action_flags = mctx->flow_action_flags; | 
|  | comp_mask_out |= MLX5DV_CONTEXT_MASK_FLOW_ACTION_FLAGS; | 
|  | } | 
|  |  | 
|  | if (attrs_out->comp_mask & MLX5DV_CONTEXT_MASK_DC_ODP_CAPS) { | 
|  | attrs_out->dc_odp_caps = get_dc_odp_caps(ctx_in); | 
|  | comp_mask_out |= MLX5DV_CONTEXT_MASK_DC_ODP_CAPS; | 
|  | } | 
|  |  | 
|  | if (attrs_out->comp_mask & MLX5DV_CONTEXT_MASK_HCA_CORE_CLOCK) { | 
|  | if (mctx->hca_core_clock) { | 
|  | attrs_out->hca_core_clock = mctx->hca_core_clock; | 
|  | comp_mask_out |= MLX5DV_CONTEXT_MASK_HCA_CORE_CLOCK; | 
|  | } | 
|  | } | 
|  |  | 
|  | if (attrs_out->comp_mask & MLX5DV_CONTEXT_MASK_NUM_LAG_PORTS) { | 
|  | if (mctx->entropy_caps.num_lag_ports) { | 
|  | attrs_out->num_lag_ports = | 
|  | mctx->entropy_caps.num_lag_ports; | 
|  | comp_mask_out |= MLX5DV_CONTEXT_MASK_NUM_LAG_PORTS; | 
|  | } | 
|  | } | 
|  |  | 
|  | if (attrs_out->comp_mask & MLX5DV_CONTEXT_MASK_SIGNATURE_OFFLOAD) { | 
|  | attrs_out->sig_caps = mctx->sig_caps; | 
|  | comp_mask_out |= MLX5DV_CONTEXT_MASK_SIGNATURE_OFFLOAD; | 
|  | } | 
|  |  | 
|  | if (attrs_out->comp_mask & MLX5DV_CONTEXT_MASK_WR_MEMCPY_LENGTH) { | 
|  | attrs_out->max_wr_memcpy_length = | 
|  | mctx->dma_mmo_caps.dma_max_size; | 
|  | comp_mask_out |= MLX5DV_CONTEXT_MASK_WR_MEMCPY_LENGTH; | 
|  | } | 
|  |  | 
|  | if (attrs_out->comp_mask & MLX5DV_CONTEXT_MASK_CRYPTO_OFFLOAD) { | 
|  | attrs_out->crypto_caps = mctx->crypto_caps; | 
|  | comp_mask_out |= MLX5DV_CONTEXT_MASK_CRYPTO_OFFLOAD; | 
|  | } | 
|  |  | 
|  | attrs_out->comp_mask = comp_mask_out; | 
|  |  | 
|  | return 0; | 
|  | } | 
|  |  | 
|  | int mlx5dv_query_device(struct ibv_context *ctx_in, | 
|  | struct mlx5dv_context *attrs_out) | 
|  | { | 
|  | struct mlx5_dv_context_ops *dvops = mlx5_get_dv_ops(ctx_in); | 
|  |  | 
|  | if (!dvops || !dvops->query_device) | 
|  | return EOPNOTSUPP; | 
|  |  | 
|  | return dvops->query_device(ctx_in, attrs_out); | 
|  | } | 
|  |  | 
|  | static int mlx5dv_get_qp(struct ibv_qp *qp_in, | 
|  | struct mlx5dv_qp *qp_out) | 
|  | { | 
|  | struct mlx5_qp *mqp = to_mqp(qp_in); | 
|  | uint64_t mask_out = 0; | 
|  |  | 
|  | qp_out->dbrec     = mqp->db; | 
|  |  | 
|  | if (mqp->sq_buf_size) | 
|  | /* IBV_QPT_RAW_PACKET */ | 
|  | qp_out->sq.buf = (void *)((uintptr_t)mqp->sq_buf.buf); | 
|  | else | 
|  | qp_out->sq.buf = (void *)((uintptr_t)mqp->buf.buf + mqp->sq.offset); | 
|  | qp_out->sq.wqe_cnt = mqp->sq.wqe_cnt; | 
|  | qp_out->sq.stride  = 1 << mqp->sq.wqe_shift; | 
|  |  | 
|  | qp_out->rq.buf     = (void *)((uintptr_t)mqp->buf.buf + mqp->rq.offset); | 
|  | qp_out->rq.wqe_cnt = mqp->rq.wqe_cnt; | 
|  | qp_out->rq.stride  = 1 << mqp->rq.wqe_shift; | 
|  |  | 
|  | qp_out->bf.reg	   = mqp->bf->reg; | 
|  |  | 
|  | if (qp_out->comp_mask & MLX5DV_QP_MASK_UAR_MMAP_OFFSET) { | 
|  | qp_out->uar_mmap_offset = mqp->bf->uar_mmap_offset; | 
|  | mask_out |= MLX5DV_QP_MASK_UAR_MMAP_OFFSET; | 
|  | } | 
|  |  | 
|  | if (qp_out->comp_mask & MLX5DV_QP_MASK_RAW_QP_HANDLES) { | 
|  | qp_out->tirn = mqp->tirn; | 
|  | qp_out->tisn = mqp->tisn; | 
|  | qp_out->rqn = mqp->rqn; | 
|  | qp_out->sqn = mqp->sqn; | 
|  | mask_out |= MLX5DV_QP_MASK_RAW_QP_HANDLES; | 
|  | } | 
|  |  | 
|  | if (qp_out->comp_mask & MLX5DV_QP_MASK_RAW_QP_TIR_ADDR) { | 
|  | qp_out->tir_icm_addr = mqp->tir_icm_addr; | 
|  | mask_out |= MLX5DV_QP_MASK_RAW_QP_TIR_ADDR; | 
|  | } | 
|  |  | 
|  | if (mqp->bf->uuarn > 0) | 
|  | qp_out->bf.size = mqp->bf->buf_size; | 
|  | else | 
|  | qp_out->bf.size = 0; | 
|  |  | 
|  | qp_out->comp_mask = mask_out; | 
|  |  | 
|  | return 0; | 
|  | } | 
|  |  | 
|  | static int mlx5dv_get_cq(struct ibv_cq *cq_in, | 
|  | struct mlx5dv_cq *cq_out) | 
|  | { | 
|  | struct mlx5_cq *mcq = to_mcq(cq_in); | 
|  | struct mlx5_context *mctx = to_mctx(cq_in->context); | 
|  |  | 
|  | cq_out->comp_mask = 0; | 
|  | cq_out->cqn       = mcq->cqn; | 
|  | cq_out->cqe_cnt   = mcq->verbs_cq.cq.cqe + 1; | 
|  | cq_out->cqe_size  = mcq->cqe_sz; | 
|  | cq_out->buf       = mcq->active_buf->buf; | 
|  | cq_out->dbrec     = mcq->dbrec; | 
|  | cq_out->cq_uar	  = mctx->cq_uar_reg; | 
|  |  | 
|  | mcq->flags	 |= MLX5_CQ_FLAGS_DV_OWNED; | 
|  |  | 
|  | return 0; | 
|  | } | 
|  |  | 
|  | static int mlx5dv_get_rwq(struct ibv_wq *wq_in, | 
|  | struct mlx5dv_rwq *rwq_out) | 
|  | { | 
|  | struct mlx5_rwq *mrwq = to_mrwq(wq_in); | 
|  |  | 
|  | rwq_out->comp_mask = 0; | 
|  | rwq_out->buf       = mrwq->pbuff; | 
|  | rwq_out->dbrec     = mrwq->recv_db; | 
|  | rwq_out->wqe_cnt   = mrwq->rq.wqe_cnt; | 
|  | rwq_out->stride    = 1 << mrwq->rq.wqe_shift; | 
|  |  | 
|  | return 0; | 
|  | } | 
|  |  | 
|  | static int mlx5dv_get_srq(struct ibv_srq *srq_in, | 
|  | struct mlx5dv_srq *srq_out) | 
|  | { | 
|  | struct mlx5_srq *msrq; | 
|  | uint64_t mask_out = 0; | 
|  |  | 
|  | msrq = container_of(srq_in, struct mlx5_srq, vsrq.srq); | 
|  |  | 
|  | srq_out->buf       = msrq->buf.buf; | 
|  | srq_out->dbrec     = msrq->db; | 
|  | srq_out->stride    = 1 << msrq->wqe_shift; | 
|  | srq_out->head      = msrq->head; | 
|  | srq_out->tail      = msrq->tail; | 
|  |  | 
|  | if (srq_out->comp_mask & MLX5DV_SRQ_MASK_SRQN) { | 
|  | srq_out->srqn = msrq->srqn; | 
|  | mask_out |= MLX5DV_SRQ_MASK_SRQN; | 
|  | } | 
|  |  | 
|  | srq_out->comp_mask = mask_out; | 
|  | return 0; | 
|  | } | 
|  |  | 
|  | static int mlx5dv_get_dm(struct ibv_dm *dm_in, | 
|  | struct mlx5dv_dm *dm_out) | 
|  | { | 
|  | struct mlx5_dm *mdm = to_mdm(dm_in); | 
|  | uint64_t mask_out = 0; | 
|  |  | 
|  | dm_out->buf       = mdm->start_va; | 
|  | dm_out->length    = mdm->length; | 
|  |  | 
|  | if (dm_out->comp_mask & MLX5DV_DM_MASK_REMOTE_VA) { | 
|  | dm_out->remote_va = mdm->remote_va; | 
|  | mask_out |= MLX5DV_DM_MASK_REMOTE_VA; | 
|  | } | 
|  |  | 
|  | dm_out->comp_mask = mask_out; | 
|  |  | 
|  | return 0; | 
|  | } | 
|  |  | 
|  | static int mlx5dv_get_av(struct ibv_ah *ah_in, | 
|  | struct mlx5dv_ah *ah_out) | 
|  | { | 
|  | struct mlx5_ah *mah = to_mah(ah_in); | 
|  |  | 
|  | ah_out->comp_mask = 0; | 
|  | ah_out->av	  = &mah->av; | 
|  |  | 
|  | return 0; | 
|  | } | 
|  |  | 
|  | static int mlx5dv_get_pd(struct ibv_pd *pd_in, | 
|  | struct mlx5dv_pd *pd_out) | 
|  | { | 
|  | struct mlx5_pd *mpd = to_mpd(pd_in); | 
|  |  | 
|  | pd_out->comp_mask = 0; | 
|  | pd_out->pdn = mpd->pdn; | 
|  |  | 
|  | return 0; | 
|  | } | 
|  |  | 
|  | static int query_lag(struct ibv_context *ctx, uint8_t *lag_state, | 
|  | uint8_t *tx_remap_affinity_1, | 
|  | uint8_t *tx_remap_affinity_2) | 
|  | { | 
|  | uint32_t out_lag[DEVX_ST_SZ_DW(query_lag_out)] = {}; | 
|  | uint32_t in_lag[DEVX_ST_SZ_DW(query_lag_in)] = {}; | 
|  | int ret; | 
|  |  | 
|  | DEVX_SET(query_lag_in, in_lag, opcode, MLX5_CMD_OP_QUERY_LAG); | 
|  | ret = mlx5dv_devx_general_cmd(ctx, in_lag, sizeof(in_lag), out_lag, | 
|  | sizeof(out_lag)); | 
|  | if (ret) | 
|  | return mlx5_get_cmd_status_err(ret, out_lag); | 
|  |  | 
|  | *lag_state = DEVX_GET(query_lag_out, out_lag, ctx.lag_state); | 
|  | if (tx_remap_affinity_1) | 
|  | *tx_remap_affinity_1 = DEVX_GET(query_lag_out, out_lag, | 
|  | ctx.tx_remap_affinity_1); | 
|  | if (tx_remap_affinity_2) | 
|  | *tx_remap_affinity_2 = DEVX_GET(query_lag_out, out_lag, | 
|  | ctx.tx_remap_affinity_2); | 
|  |  | 
|  | return 0; | 
|  | } | 
|  |  | 
|  | static bool lag_operation_supported(struct ibv_qp *qp) | 
|  | { | 
|  | struct mlx5_context *mctx = to_mctx(qp->context); | 
|  | struct mlx5_qp *mqp = to_mqp(qp); | 
|  |  | 
|  | if (mctx->entropy_caps.num_lag_ports <= 1) | 
|  | return false; | 
|  |  | 
|  | if ((qp->qp_type == IBV_QPT_RC) || | 
|  | (qp->qp_type == IBV_QPT_UD) || | 
|  | (qp->qp_type == IBV_QPT_UC) || | 
|  | (qp->qp_type == IBV_QPT_RAW_PACKET) || | 
|  | (qp->qp_type == IBV_QPT_XRC_SEND) || | 
|  | ((qp->qp_type == IBV_QPT_DRIVER) && | 
|  | (mqp->dc_type == MLX5DV_DCTYPE_DCI))) | 
|  | return true; | 
|  |  | 
|  | return false; | 
|  | } | 
|  |  | 
|  |  | 
|  | static int _mlx5dv_query_qp_lag_port(struct ibv_qp *qp, uint8_t *port_num, | 
|  | uint8_t *active_port_num) | 
|  | { | 
|  | uint8_t lag_state = 0, tx_remap_affinity_1 = 0, tx_remap_affinity_2 = 0; | 
|  | uint32_t in_tis[DEVX_ST_SZ_DW(query_tis_in)] = {}; | 
|  | uint32_t out_tis[DEVX_ST_SZ_DW(query_tis_out)] = {}; | 
|  | uint32_t in_qp[DEVX_ST_SZ_DW(query_qp_in)] = {}; | 
|  | uint32_t out_qp[DEVX_ST_SZ_DW(query_qp_out)] = {}; | 
|  | struct mlx5_context *mctx = to_mctx(qp->context); | 
|  | struct mlx5_qp *mqp = to_mqp(qp); | 
|  | int ret; | 
|  |  | 
|  | if (!lag_operation_supported(qp)) | 
|  | return EOPNOTSUPP; | 
|  |  | 
|  | ret = query_lag(qp->context, &lag_state, | 
|  | &tx_remap_affinity_1, &tx_remap_affinity_2); | 
|  | if (ret) | 
|  | return ret; | 
|  |  | 
|  | if (!lag_state && !mctx->entropy_caps.lag_tx_port_affinity) | 
|  | return EOPNOTSUPP; | 
|  |  | 
|  | switch (qp->qp_type) { | 
|  | case IBV_QPT_RAW_PACKET: | 
|  | DEVX_SET(query_tis_in, in_tis, opcode, MLX5_CMD_OP_QUERY_TIS); | 
|  | DEVX_SET(query_tis_in, in_tis, tisn, mqp->tisn); | 
|  | ret = mlx5dv_devx_qp_query(qp, in_tis, sizeof(in_tis), out_tis, | 
|  | sizeof(out_tis)); | 
|  | if (ret) | 
|  | return mlx5_get_cmd_status_err(ret, out_tis); | 
|  |  | 
|  | *port_num = DEVX_GET(query_tis_out, out_tis, | 
|  | tis_context.lag_tx_port_affinity); | 
|  | break; | 
|  |  | 
|  | default: | 
|  | DEVX_SET(query_qp_in, in_qp, opcode, MLX5_CMD_OP_QUERY_QP); | 
|  | DEVX_SET(query_qp_in, in_qp, qpn, qp->qp_num); | 
|  | ret = mlx5dv_devx_qp_query(qp, in_qp, sizeof(in_qp), out_qp, | 
|  | sizeof(out_qp)); | 
|  | if (ret) | 
|  | return mlx5_get_cmd_status_err(ret, out_qp); | 
|  |  | 
|  | *port_num = DEVX_GET(query_qp_out, out_qp, | 
|  | qpc.lag_tx_port_affinity); | 
|  | break; | 
|  | } | 
|  |  | 
|  | switch (*port_num) { | 
|  | case 1: | 
|  | *active_port_num = tx_remap_affinity_1; | 
|  | break; | 
|  |  | 
|  | case 2: | 
|  | *active_port_num = tx_remap_affinity_2; | 
|  | break; | 
|  |  | 
|  | default: | 
|  | return EOPNOTSUPP; | 
|  | } | 
|  |  | 
|  | return 0; | 
|  | } | 
|  |  | 
|  | int mlx5dv_query_qp_lag_port(struct ibv_qp *qp, uint8_t *port_num, | 
|  | uint8_t *active_port_num) | 
|  | { | 
|  | struct mlx5_dv_context_ops *dvops = mlx5_get_dv_ops(qp->context); | 
|  |  | 
|  | if (!dvops || !dvops->query_qp_lag_port) | 
|  | return EOPNOTSUPP; | 
|  |  | 
|  | return dvops->query_qp_lag_port(qp, port_num, | 
|  | active_port_num); | 
|  | } | 
|  |  | 
|  | static int modify_tis_lag_port(struct ibv_qp *qp, uint8_t port_num) | 
|  | { | 
|  | uint32_t out[DEVX_ST_SZ_DW(modify_tis_out)] = {}; | 
|  | uint32_t in[DEVX_ST_SZ_DW(modify_tis_in)] = {}; | 
|  | struct mlx5_qp *mqp = to_mqp(qp); | 
|  | int ret; | 
|  |  | 
|  | DEVX_SET(modify_tis_in, in, opcode, MLX5_CMD_OP_MODIFY_TIS); | 
|  | DEVX_SET(modify_tis_in, in, tisn, mqp->tisn); | 
|  | DEVX_SET(modify_tis_in, in, bitmask.lag_tx_port_affinity, 1); | 
|  | DEVX_SET(modify_tis_in, in, ctx.lag_tx_port_affinity, port_num); | 
|  | ret = mlx5dv_devx_qp_modify(qp, in, sizeof(in), out, sizeof(out)); | 
|  | return ret ? mlx5_get_cmd_status_err(ret, out) : 0; | 
|  | } | 
|  |  | 
|  | static int modify_qp_lag_port(struct ibv_qp *qp, uint8_t port_num) | 
|  | { | 
|  | uint32_t out[DEVX_ST_SZ_DW(rts2rts_qp_out)] = {}; | 
|  | uint32_t in[DEVX_ST_SZ_DW(rts2rts_qp_in)] = {}; | 
|  | struct mlx5_context *mctx = to_mctx(qp->context); | 
|  | int ret; | 
|  |  | 
|  | if (!mctx->entropy_caps.rts2rts_lag_tx_port_affinity || | 
|  | qp->state != IBV_QPS_RTS) | 
|  | return EOPNOTSUPP; | 
|  |  | 
|  | DEVX_SET(rts2rts_qp_in, in, opcode, MLX5_CMD_OP_RTS2RTS_QP); | 
|  | DEVX_SET(rts2rts_qp_in, in, qpn, qp->qp_num); | 
|  | DEVX_SET(rts2rts_qp_in, in, opt_param_mask, | 
|  | MLX5_QPC_OPT_MASK_RTS2RTS_LAG_TX_PORT_AFFINITY); | 
|  | DEVX_SET(rts2rts_qp_in, in, qpc.lag_tx_port_affinity, port_num); | 
|  | ret = mlx5dv_devx_qp_modify(qp, in, sizeof(in), out, sizeof(out)); | 
|  | return ret ? mlx5_get_cmd_status_err(ret, out) : 0; | 
|  | } | 
|  |  | 
|  | static int _mlx5dv_modify_qp_lag_port(struct ibv_qp *qp, uint8_t port_num) | 
|  | { | 
|  | uint8_t curr_configured, curr_active; | 
|  | struct mlx5_qp *mqp = to_mqp(qp); | 
|  | int ret; | 
|  |  | 
|  | /* Query lag port to see if we are at all in lag mode, otherwise FW | 
|  | * might return success and ignore the modification. | 
|  | */ | 
|  | ret = mlx5dv_query_qp_lag_port(qp, &curr_configured, &curr_active); | 
|  | if (ret) | 
|  | return ret; | 
|  |  | 
|  | switch (qp->qp_type) { | 
|  | case IBV_QPT_RAW_PACKET: | 
|  | return modify_tis_lag_port(qp, port_num); | 
|  |  | 
|  | case IBV_QPT_DRIVER: | 
|  | if (mqp->dc_type != MLX5DV_DCTYPE_DCI) | 
|  | return EOPNOTSUPP; | 
|  | SWITCH_FALLTHROUGH; | 
|  | case IBV_QPT_RC: | 
|  | case IBV_QPT_UD: | 
|  | case IBV_QPT_UC: | 
|  | return modify_qp_lag_port(qp, port_num); | 
|  |  | 
|  | default: | 
|  | return EOPNOTSUPP; | 
|  | } | 
|  | } | 
|  |  | 
|  | int mlx5dv_modify_qp_lag_port(struct ibv_qp *qp, uint8_t port_num) | 
|  | { | 
|  | struct mlx5_dv_context_ops *dvops = mlx5_get_dv_ops(qp->context); | 
|  |  | 
|  | if (!dvops || !dvops->modify_qp_lag_port) | 
|  | return EOPNOTSUPP; | 
|  |  | 
|  | return dvops->modify_qp_lag_port(qp, port_num); | 
|  |  | 
|  | } | 
|  |  | 
|  | static int _mlx5dv_modify_qp_udp_sport(struct ibv_qp *qp, uint16_t udp_sport) | 
|  | { | 
|  | uint32_t in[DEVX_ST_SZ_DW(rts2rts_qp_in)] = {}; | 
|  | uint32_t out[DEVX_ST_SZ_DW(rts2rts_qp_out)] = {}; | 
|  | struct mlx5_context *mctx = to_mctx(qp->context); | 
|  | int ret; | 
|  |  | 
|  | switch (qp->qp_type) { | 
|  | case IBV_QPT_RC: | 
|  | case IBV_QPT_UC: | 
|  | if (qp->state != IBV_QPS_RTS || | 
|  | !mctx->entropy_caps.rts2rts_qp_udp_sport) | 
|  | return EOPNOTSUPP; | 
|  | break; | 
|  | default: | 
|  | return EOPNOTSUPP; | 
|  | } | 
|  | DEVX_SET(rts2rts_qp_in, in, opcode, MLX5_CMD_OP_RTS2RTS_QP); | 
|  | DEVX_SET(rts2rts_qp_in, in, qpn, qp->qp_num); | 
|  | DEVX_SET64(rts2rts_qp_in, in, opt_param_mask_95_32, | 
|  | MLX5_QPC_OPT_MASK_32_UDP_SPORT); | 
|  | DEVX_SET(rts2rts_qp_in, in, qpc.primary_address_path.udp_sport, | 
|  | udp_sport); | 
|  |  | 
|  | ret = mlx5dv_devx_qp_modify(qp, in, sizeof(in), out, sizeof(out)); | 
|  | return ret ? mlx5_get_cmd_status_err(ret, out) : 0; | 
|  | } | 
|  |  | 
|  | int mlx5dv_modify_qp_udp_sport(struct ibv_qp *qp, uint16_t udp_sport) | 
|  | { | 
|  | struct mlx5_dv_context_ops *dvops = mlx5_get_dv_ops(qp->context); | 
|  |  | 
|  | if (!dvops || !dvops->modify_qp_udp_sport) | 
|  | return EOPNOTSUPP; | 
|  |  | 
|  | return dvops->modify_qp_udp_sport(qp, udp_sport); | 
|  | } | 
|  |  | 
|  | int mlx5dv_dci_stream_id_reset(struct ibv_qp *qp, uint16_t stream_id) | 
|  | { | 
|  | uint32_t out[DEVX_ST_SZ_DW(rts2rts_qp_out)] = {}; | 
|  | uint32_t in[DEVX_ST_SZ_DW(rts2rts_qp_in)] = {}; | 
|  | struct mlx5_context *mctx = to_mctx(qp->context); | 
|  | struct mlx5_qp *mqp = to_mqp(qp); | 
|  | void *qpce = DEVX_ADDR_OF(rts2rts_qp_in, in, qpc_data_ext); | 
|  | int ret; | 
|  |  | 
|  | if (!is_mlx5_dev(qp->context->device) || | 
|  | !mctx->dci_streams_caps.max_log_num_errored || | 
|  | !mctx->qpc_extension_cap || | 
|  | qp->state != IBV_QPS_RTS) | 
|  | return EOPNOTSUPP; | 
|  |  | 
|  | if ((mqp->dc_type != MLX5DV_DCTYPE_DCI) || (qp->qp_type != IBV_QPT_DRIVER)) | 
|  | return EINVAL; | 
|  |  | 
|  | DEVX_SET(rts2rts_qp_in, in, opcode, MLX5_CMD_OP_RTS2RTS_QP); | 
|  | DEVX_SET(rts2rts_qp_in, in, qpn, qp->qp_num); | 
|  | DEVX_SET(rts2rts_qp_in, in, qpc_ext, 1); | 
|  | DEVX_SET64(rts2rts_qp_in, in, opt_param_mask_95_32, | 
|  | MLX5_QPC_OPT_MASK_32_DCI_STREAM_CHANNEL_ID); | 
|  |  | 
|  | DEVX_SET(qpc_ext, qpce, dci_stream_channel_id, stream_id); | 
|  |  | 
|  | ret = mlx5dv_devx_qp_modify(qp, in, sizeof(in), out, sizeof(out)); | 
|  | return ret ? mlx5_get_cmd_status_err(ret, out) : 0; | 
|  | } | 
|  |  | 
|  | static bool sched_supported(struct ibv_context *ctx) | 
|  | { | 
|  | struct mlx5_qos_caps *qc = &to_mctx(ctx)->qos_caps; | 
|  |  | 
|  | return (qc->qos && | 
|  | (qc->nic_element_type & ELEMENT_TYPE_CAP_MASK_TASR) && | 
|  | (qc->nic_element_type & ELEMENT_TYPE_CAP_MASK_QUEUE_GROUP) && | 
|  | (qc->nic_tsar_type & TSAR_TYPE_CAP_MASK_DWRR)); | 
|  | } | 
|  |  | 
|  | static struct mlx5dv_devx_obj * | 
|  | mlx5dv_sched_nic_create(struct ibv_context *ctx, | 
|  | const struct mlx5dv_sched_attr *sched_attr, | 
|  | int elem_type) | 
|  | { | 
|  | uint32_t out[DEVX_ST_SZ_DW(general_obj_out_cmd_hdr)] = {}; | 
|  | uint32_t in[DEVX_ST_SZ_DW(create_sched_elem_in)] = {}; | 
|  | struct mlx5dv_devx_obj *obj; | 
|  | uint32_t parent_id; | 
|  | void *attr; | 
|  |  | 
|  | attr = DEVX_ADDR_OF(create_sched_elem_in, in, hdr); | 
|  | DEVX_SET(general_obj_in_cmd_hdr, | 
|  | attr, opcode, MLX5_CMD_OP_CREATE_GENERAL_OBJECT); | 
|  | DEVX_SET(general_obj_in_cmd_hdr, | 
|  | attr, obj_type, MLX5_OBJ_TYPE_SCHEDULING_ELEMENT); | 
|  |  | 
|  | attr = DEVX_ADDR_OF(create_sched_elem_in, in, sched_elem); | 
|  | DEVX_SET64(sched_elem, attr, modify_field_select, sched_attr->flags); | 
|  | DEVX_SET(sched_elem, attr, | 
|  | scheduling_hierarchy, MLX5_SCHED_HIERARCHY_NIC); | 
|  |  | 
|  | attr = DEVX_ADDR_OF(create_sched_elem_in, in, sched_elem.sched_context); | 
|  | DEVX_SET(sched_context, attr, element_type, elem_type); | 
|  |  | 
|  | parent_id = sched_attr->parent ? sched_attr->parent->obj->object_id : 0; | 
|  | DEVX_SET(sched_context, attr, parent_element_id, parent_id); | 
|  | if (sched_attr->flags & MLX5DV_SCHED_ELEM_ATTR_FLAGS_BW_SHARE) | 
|  | DEVX_SET(sched_context, attr, bw_share, sched_attr->bw_share); | 
|  | if (sched_attr->flags & MLX5DV_SCHED_ELEM_ATTR_FLAGS_MAX_AVG_BW) | 
|  | DEVX_SET(sched_context, attr, | 
|  | max_average_bw, sched_attr->max_avg_bw); | 
|  |  | 
|  | attr = DEVX_ADDR_OF(create_sched_elem_in, in, | 
|  | sched_elem.sched_context.sched_elem_attr); | 
|  | DEVX_SET(sched_elem_attr_tsar, attr, tsar_type, | 
|  | MLX5_SCHED_TSAR_TYPE_DWRR); | 
|  |  | 
|  | obj = mlx5dv_devx_obj_create(ctx, in, sizeof(in), out, sizeof(out)); | 
|  | if (!obj) | 
|  | errno = mlx5_get_cmd_status_err(errno, out); | 
|  | return obj; | 
|  | } | 
|  |  | 
|  | static int | 
|  | mlx5dv_sched_nic_modify(struct mlx5dv_devx_obj *obj, | 
|  | const struct mlx5dv_sched_attr *sched_attr, | 
|  | int elem_type) | 
|  | { | 
|  | uint32_t out[DEVX_ST_SZ_DW(general_obj_out_cmd_hdr)] = {}; | 
|  | uint32_t in[DEVX_ST_SZ_DW(create_sched_elem_in)] = {}; | 
|  | void *attr; | 
|  | int ret; | 
|  |  | 
|  | attr = DEVX_ADDR_OF(create_sched_elem_in, in, hdr); | 
|  | DEVX_SET(general_obj_in_cmd_hdr, | 
|  | attr, opcode, MLX5_CMD_OP_MODIFY_GENERAL_OBJECT); | 
|  | DEVX_SET(general_obj_in_cmd_hdr, | 
|  | attr, obj_type, MLX5_OBJ_TYPE_SCHEDULING_ELEMENT); | 
|  | DEVX_SET(general_obj_in_cmd_hdr, in, obj_id, obj->object_id); | 
|  |  | 
|  | attr = DEVX_ADDR_OF(create_sched_elem_in, in, sched_elem); | 
|  | DEVX_SET64(sched_elem, attr, modify_field_select, sched_attr->flags); | 
|  | DEVX_SET(sched_elem, attr, | 
|  | scheduling_hierarchy, MLX5_SCHED_HIERARCHY_NIC); | 
|  |  | 
|  | attr = DEVX_ADDR_OF(create_sched_elem_in, in, sched_elem.sched_context); | 
|  | DEVX_SET(sched_context, attr, element_type, elem_type); | 
|  | if (sched_attr->flags & MLX5DV_SCHED_ELEM_ATTR_FLAGS_BW_SHARE) | 
|  | DEVX_SET(sched_context, attr, bw_share, sched_attr->bw_share); | 
|  | if (sched_attr->flags & MLX5DV_SCHED_ELEM_ATTR_FLAGS_MAX_AVG_BW) | 
|  | DEVX_SET(sched_context, attr, | 
|  | max_average_bw, sched_attr->max_avg_bw); | 
|  |  | 
|  | attr = DEVX_ADDR_OF(create_sched_elem_in, in, | 
|  | sched_elem.sched_context.sched_elem_attr); | 
|  | DEVX_SET(sched_elem_attr_tsar, attr, tsar_type, | 
|  | MLX5_SCHED_TSAR_TYPE_DWRR); | 
|  |  | 
|  | ret = mlx5dv_devx_obj_modify(obj, in, sizeof(in), out, sizeof(out)); | 
|  | return ret ? mlx5_get_cmd_status_err(ret, out) : 0; | 
|  | } | 
|  |  | 
|  | #define MLX5DV_SCHED_ELEM_ATTR_ALL_FLAGS \ | 
|  | (MLX5DV_SCHED_ELEM_ATTR_FLAGS_BW_SHARE |	\ | 
|  | MLX5DV_SCHED_ELEM_ATTR_FLAGS_MAX_AVG_BW) | 
|  |  | 
|  | static bool attr_supported(struct ibv_context *ctx, | 
|  | const struct mlx5dv_sched_attr *attr) | 
|  | { | 
|  | struct mlx5_qos_caps *qc = &to_mctx(ctx)->qos_caps; | 
|  |  | 
|  | if ((attr->flags & MLX5DV_SCHED_ELEM_ATTR_FLAGS_BW_SHARE) && | 
|  | !qc->nic_bw_share) | 
|  | return false; | 
|  | if ((attr->flags & MLX5DV_SCHED_ELEM_ATTR_FLAGS_MAX_AVG_BW) && | 
|  | !qc->nic_rate_limit) | 
|  | return false; | 
|  |  | 
|  | return true; | 
|  | } | 
|  |  | 
|  | static bool sched_attr_valid(const struct mlx5dv_sched_attr *attr, bool node) | 
|  | { | 
|  | if (!attr || attr->comp_mask || | 
|  | !check_comp_mask(attr->flags, MLX5DV_SCHED_ELEM_ATTR_ALL_FLAGS)) | 
|  | return false; | 
|  | if (node && (!attr->parent && attr->flags)) | 
|  | return false; | 
|  | if (!node && !attr->parent) | 
|  | return false; | 
|  |  | 
|  | return true; | 
|  | } | 
|  |  | 
|  | static struct mlx5dv_sched_node * | 
|  | _mlx5dv_sched_node_create(struct ibv_context *ctx, | 
|  | const struct mlx5dv_sched_attr *attr) | 
|  | { | 
|  | struct mlx5dv_sched_node *node; | 
|  | struct mlx5dv_devx_obj *obj; | 
|  |  | 
|  | if (!sched_attr_valid(attr, true)) { | 
|  | errno = EINVAL; | 
|  | return NULL; | 
|  | } | 
|  |  | 
|  | if (!sched_supported(ctx) || !attr_supported(ctx, attr)) { | 
|  | errno = EOPNOTSUPP; | 
|  | return NULL; | 
|  | } | 
|  |  | 
|  | node = calloc(1, sizeof(*node)); | 
|  | if (!node) { | 
|  | errno = ENOMEM; | 
|  | return NULL; | 
|  | } | 
|  |  | 
|  | obj = mlx5dv_sched_nic_create(ctx, attr, MLX5_SCHED_ELEM_TYPE_TSAR); | 
|  | if (!obj) | 
|  | goto err_sched_nic_create; | 
|  |  | 
|  | node->obj = obj; | 
|  | node->parent = attr->parent; | 
|  | return node; | 
|  |  | 
|  | err_sched_nic_create: | 
|  | free(node); | 
|  | return NULL; | 
|  | } | 
|  |  | 
|  | struct mlx5dv_sched_node * | 
|  | mlx5dv_sched_node_create(struct ibv_context *ctx, | 
|  | const struct mlx5dv_sched_attr *attr) | 
|  | { | 
|  | struct mlx5_dv_context_ops *dvops = mlx5_get_dv_ops(ctx); | 
|  |  | 
|  | if (!dvops || !dvops->sched_node_create) { | 
|  | errno = EOPNOTSUPP; | 
|  | return NULL; | 
|  | } | 
|  |  | 
|  | return dvops->sched_node_create(ctx, attr); | 
|  | } | 
|  |  | 
|  | static struct mlx5dv_sched_leaf * | 
|  | _mlx5dv_sched_leaf_create(struct ibv_context *ctx, | 
|  | const struct mlx5dv_sched_attr *attr) | 
|  | { | 
|  | struct mlx5dv_sched_leaf *leaf; | 
|  | struct mlx5dv_devx_obj *obj; | 
|  |  | 
|  | if (!sched_attr_valid(attr, false)) { | 
|  | errno = EINVAL; | 
|  | return NULL; | 
|  | } | 
|  |  | 
|  | if (!attr_supported(ctx, attr)) { | 
|  | errno = EOPNOTSUPP; | 
|  | return NULL; | 
|  | } | 
|  |  | 
|  | leaf = calloc(1, sizeof(*leaf)); | 
|  | if (!leaf) { | 
|  | errno = ENOMEM; | 
|  | return NULL; | 
|  | } | 
|  |  | 
|  | obj = mlx5dv_sched_nic_create(ctx, attr, | 
|  | MLX5_SCHED_ELEM_TYPE_QUEUE_GROUP); | 
|  | if (!obj) | 
|  | goto err_sched_nic_create; | 
|  |  | 
|  | leaf->obj = obj; | 
|  | leaf->parent = attr->parent; | 
|  | return leaf; | 
|  |  | 
|  | err_sched_nic_create: | 
|  | free(leaf); | 
|  | return NULL; | 
|  | } | 
|  |  | 
|  | struct mlx5dv_sched_leaf * | 
|  | mlx5dv_sched_leaf_create(struct ibv_context *ctx, | 
|  | const struct mlx5dv_sched_attr *attr) | 
|  | { | 
|  | struct mlx5_dv_context_ops *dvops = mlx5_get_dv_ops(ctx); | 
|  |  | 
|  | if (!dvops || !dvops->sched_leaf_create) { | 
|  | errno = EOPNOTSUPP; | 
|  | return NULL; | 
|  | } | 
|  |  | 
|  | return dvops->sched_leaf_create(ctx, attr); | 
|  | } | 
|  |  | 
|  | static int _mlx5dv_sched_node_modify(struct mlx5dv_sched_node *node, | 
|  | const struct mlx5dv_sched_attr *attr) | 
|  | { | 
|  | if (!node || !sched_attr_valid(attr, true)) { | 
|  | errno = EINVAL; | 
|  | return errno; | 
|  | } | 
|  |  | 
|  | if (!attr_supported(node->obj->context, attr)) { | 
|  | errno = EOPNOTSUPP; | 
|  | return errno; | 
|  | } | 
|  |  | 
|  | return mlx5dv_sched_nic_modify(node->obj, attr, | 
|  | MLX5_SCHED_ELEM_TYPE_TSAR); | 
|  | } | 
|  |  | 
|  | int mlx5dv_sched_node_modify(struct mlx5dv_sched_node *node, | 
|  | const struct mlx5dv_sched_attr *attr) | 
|  | { | 
|  | struct mlx5_dv_context_ops *dvops = mlx5_get_dv_ops(node->obj->context); | 
|  |  | 
|  | if (!dvops || !dvops->sched_node_modify) | 
|  | return EOPNOTSUPP; | 
|  |  | 
|  | return dvops->sched_node_modify(node, attr); | 
|  | } | 
|  |  | 
|  | static int _mlx5dv_sched_leaf_modify(struct mlx5dv_sched_leaf *leaf, | 
|  | const struct mlx5dv_sched_attr *attr) | 
|  | { | 
|  | if (!leaf || !sched_attr_valid(attr, false)) { | 
|  | errno = EINVAL; | 
|  | return errno; | 
|  | } | 
|  |  | 
|  | if (!attr_supported(leaf->obj->context, attr)) { | 
|  | errno = EOPNOTSUPP; | 
|  | return errno; | 
|  | } | 
|  |  | 
|  | return mlx5dv_sched_nic_modify(leaf->obj, attr, | 
|  | MLX5_SCHED_ELEM_TYPE_QUEUE_GROUP); | 
|  | } | 
|  |  | 
|  | int mlx5dv_sched_leaf_modify(struct mlx5dv_sched_leaf *leaf, | 
|  | const struct mlx5dv_sched_attr *attr) | 
|  | { | 
|  | struct mlx5_dv_context_ops *dvops = mlx5_get_dv_ops(leaf->obj->context); | 
|  |  | 
|  | if (!dvops || !dvops->sched_leaf_modify) | 
|  | return EOPNOTSUPP; | 
|  |  | 
|  | return dvops->sched_leaf_modify(leaf, attr); | 
|  | } | 
|  |  | 
|  | static int _mlx5dv_sched_node_destroy(struct mlx5dv_sched_node *node) | 
|  | { | 
|  | int ret; | 
|  |  | 
|  | ret = mlx5dv_devx_obj_destroy(node->obj); | 
|  | if (ret) | 
|  | return ret; | 
|  |  | 
|  | free(node); | 
|  | return 0; | 
|  | } | 
|  |  | 
|  | int mlx5dv_sched_node_destroy(struct mlx5dv_sched_node *node) | 
|  | { | 
|  | struct mlx5_dv_context_ops *dvops = mlx5_get_dv_ops(node->obj->context); | 
|  |  | 
|  | if (!dvops || !dvops->sched_node_destroy) | 
|  | return EOPNOTSUPP; | 
|  |  | 
|  | return dvops->sched_node_destroy(node); | 
|  | } | 
|  |  | 
|  | static int _mlx5dv_sched_leaf_destroy(struct mlx5dv_sched_leaf *leaf) | 
|  | { | 
|  | int ret; | 
|  |  | 
|  | ret = mlx5dv_devx_obj_destroy(leaf->obj); | 
|  | if (ret) | 
|  | return ret; | 
|  |  | 
|  | free(leaf); | 
|  | return 0; | 
|  | } | 
|  |  | 
|  | int mlx5dv_sched_leaf_destroy(struct mlx5dv_sched_leaf *leaf) | 
|  | { | 
|  | struct mlx5_dv_context_ops *dvops = mlx5_get_dv_ops(leaf->obj->context); | 
|  |  | 
|  | if (!dvops || !dvops->sched_leaf_destroy) | 
|  | return EOPNOTSUPP; | 
|  |  | 
|  | return dvops->sched_leaf_destroy(leaf); | 
|  | } | 
|  |  | 
|  | static int modify_ib_qp_sched_elem_init(struct ibv_qp *qp, | 
|  | uint32_t req_id, uint32_t resp_id) | 
|  | { | 
|  | uint64_t mask = MLX5_QPC_OPT_MASK_32_QOS_QUEUE_GROUP_ID; | 
|  | uint32_t in[DEVX_ST_SZ_DW(init2init_qp_in)] = {}; | 
|  | uint32_t out[DEVX_ST_SZ_DW(init2init_qp_out)] = {}; | 
|  | void *qpce = DEVX_ADDR_OF(init2init_qp_in, in, qpc_data_ext); | 
|  | int ret; | 
|  |  | 
|  | DEVX_SET(init2init_qp_in, in, opcode, MLX5_CMD_OP_INIT2INIT_QP); | 
|  | DEVX_SET(init2init_qp_in, in, qpc_ext, 1); | 
|  | DEVX_SET(init2init_qp_in, in, qpn, qp->qp_num); | 
|  | DEVX_SET64(init2init_qp_in, in, opt_param_mask_95_32, mask); | 
|  |  | 
|  | DEVX_SET(qpc_ext, qpce, qos_queue_group_id_requester, req_id); | 
|  | DEVX_SET(qpc_ext, qpce, qos_queue_group_id_responder, resp_id); | 
|  |  | 
|  | ret = mlx5dv_devx_qp_modify(qp, in, sizeof(in), out, sizeof(out)); | 
|  | return ret ? mlx5_get_cmd_status_err(ret, out) : 0; | 
|  | } | 
|  |  | 
|  | static int modify_ib_qp_sched_elem_rts(struct ibv_qp *qp, | 
|  | uint32_t req_id, uint32_t resp_id) | 
|  | { | 
|  | uint64_t mask = MLX5_QPC_OPT_MASK_32_QOS_QUEUE_GROUP_ID; | 
|  | uint32_t in[DEVX_ST_SZ_DW(rts2rts_qp_in)] = {}; | 
|  | uint32_t out[DEVX_ST_SZ_DW(rts2rts_qp_out)] = {}; | 
|  | void *qpce = DEVX_ADDR_OF(rts2rts_qp_in, in, qpc_data_ext); | 
|  | int ret; | 
|  |  | 
|  | DEVX_SET(rts2rts_qp_in, in, opcode, MLX5_CMD_OP_RTS2RTS_QP); | 
|  | DEVX_SET(rts2rts_qp_in, in, qpc_ext, 1); | 
|  | DEVX_SET(rts2rts_qp_in, in, qpn, qp->qp_num); | 
|  | DEVX_SET64(rts2rts_qp_in, in, opt_param_mask_95_32, mask); | 
|  |  | 
|  | DEVX_SET(qpc_ext, qpce, qos_queue_group_id_requester, req_id); | 
|  | DEVX_SET(qpc_ext, qpce, qos_queue_group_id_responder, resp_id); | 
|  |  | 
|  | ret = mlx5dv_devx_qp_modify(qp, in, sizeof(in), out, sizeof(out)); | 
|  | return ret ? mlx5_get_cmd_status_err(ret, out) : 0; | 
|  | } | 
|  |  | 
|  | static int modify_ib_qp_sched_elem(struct ibv_qp *qp, | 
|  | uint32_t req_id, uint32_t resp_id) | 
|  | { | 
|  | int ret; | 
|  |  | 
|  | switch (qp->state) { | 
|  | case IBV_QPS_INIT: | 
|  | ret = modify_ib_qp_sched_elem_init(qp, req_id, resp_id); | 
|  | break; | 
|  |  | 
|  | case IBV_QPS_RTS: | 
|  | ret = modify_ib_qp_sched_elem_rts(qp, req_id, resp_id); | 
|  | break; | 
|  |  | 
|  | default: | 
|  | return EOPNOTSUPP; | 
|  | }; | 
|  |  | 
|  | return ret; | 
|  | } | 
|  |  | 
|  | static int modify_raw_qp_sched_elem(struct ibv_qp *qp, uint32_t qos_id) | 
|  | { | 
|  | struct mlx5_qos_caps *qc = &to_mctx(qp->context)->qos_caps; | 
|  | uint32_t mout[DEVX_ST_SZ_DW(modify_sq_out)] = {}; | 
|  | uint32_t min[DEVX_ST_SZ_DW(modify_sq_in)] = {}; | 
|  | struct mlx5_qp *mqp = to_mqp(qp); | 
|  | void *sqc; | 
|  | int ret; | 
|  |  | 
|  | if (qp->state != IBV_QPS_RTS || !qc->nic_sq_scheduling) | 
|  | return EOPNOTSUPP; | 
|  |  | 
|  | DEVX_SET(modify_sq_in, min, opcode, MLX5_CMD_OP_MODIFY_SQ); | 
|  | DEVX_SET(modify_sq_in, min, sq_state, MLX5_SQC_STATE_RDY); | 
|  | DEVX_SET(modify_sq_in, min, sqn, mqp->sqn); | 
|  | DEVX_SET64(modify_sq_in, min, modify_bitmask, | 
|  | MLX5_MODIFY_SQ_BITMASK_QOS_QUEUE_GROUP_ID); | 
|  | sqc = DEVX_ADDR_OF(modify_sq_in, min, sq_context); | 
|  | DEVX_SET(sqc, sqc, state, MLX5_SQC_STATE_RDY); | 
|  | DEVX_SET(sqc, sqc, qos_queue_group_id, qos_id); | 
|  |  | 
|  | ret = mlx5dv_devx_qp_modify(qp, min, sizeof(min), mout, sizeof(mout)); | 
|  | return ret ? mlx5_get_cmd_status_err(ret, mout) : 0; | 
|  | } | 
|  |  | 
|  | static int _mlx5dv_modify_qp_sched_elem(struct ibv_qp *qp, | 
|  | const struct mlx5dv_sched_leaf *requestor, | 
|  | const struct mlx5dv_sched_leaf *responder) | 
|  | { | 
|  | struct mlx5_qos_caps *qc = &to_mctx(qp->context)->qos_caps; | 
|  |  | 
|  | switch (qp->qp_type) { | 
|  | case IBV_QPT_UC: | 
|  | case IBV_QPT_UD: | 
|  | if (responder) | 
|  | return EINVAL; | 
|  | SWITCH_FALLTHROUGH; | 
|  | case IBV_QPT_RC: | 
|  | if ((!to_mctx(qp->context)->qpc_extension_cap) || | 
|  | !(qc->nic_qp_scheduling)) | 
|  | return EOPNOTSUPP; | 
|  | return modify_ib_qp_sched_elem(qp, | 
|  | requestor ? requestor->obj->object_id : 0, | 
|  | responder ? responder->obj->object_id : 0); | 
|  | case IBV_QPT_RAW_PACKET: | 
|  | if (responder) | 
|  | return EINVAL; | 
|  | return modify_raw_qp_sched_elem(qp, | 
|  | requestor ? requestor->obj->object_id : 0); | 
|  | default: | 
|  | return EOPNOTSUPP; | 
|  | } | 
|  | } | 
|  |  | 
|  | int mlx5dv_modify_qp_sched_elem(struct ibv_qp *qp, | 
|  | const struct mlx5dv_sched_leaf *requestor, | 
|  | const struct mlx5dv_sched_leaf *responder) | 
|  | { | 
|  | struct mlx5_dv_context_ops *dvops = mlx5_get_dv_ops(qp->context); | 
|  |  | 
|  | if (!dvops || !dvops->modify_qp_sched_elem) | 
|  | return EOPNOTSUPP; | 
|  |  | 
|  | return dvops->modify_qp_sched_elem(qp, requestor, responder); | 
|  | } | 
|  |  | 
|  | int mlx5_modify_qp_drain_sigerr(struct ibv_qp *qp) | 
|  | { | 
|  | uint64_t mask = MLX5_QPC_OPT_MASK_INIT2INIT_DRAIN_SIGERR; | 
|  | uint32_t in[DEVX_ST_SZ_DW(init2init_qp_in)] = {}; | 
|  | uint32_t out[DEVX_ST_SZ_DW(init2init_qp_out)] = {}; | 
|  | void *qpc = DEVX_ADDR_OF(init2init_qp_in, in, qpc); | 
|  | int ret; | 
|  |  | 
|  | DEVX_SET(init2init_qp_in, in, opcode, MLX5_CMD_OP_INIT2INIT_QP); | 
|  | DEVX_SET(init2init_qp_in, in, qpn, qp->qp_num); | 
|  | DEVX_SET(init2init_qp_in, in, opt_param_mask, mask); | 
|  |  | 
|  | DEVX_SET(qpc, qpc, drain_sigerr, 1); | 
|  |  | 
|  | ret = mlx5dv_devx_qp_modify(qp, in, sizeof(in), out, sizeof(out)); | 
|  | return ret ? mlx5_get_cmd_status_err(ret, out) : 0; | 
|  | } | 
|  |  | 
|  | static struct reserved_qpn_blk *reserved_qpn_blk_alloc(struct mlx5_context *mctx) | 
|  | { | 
|  | uint32_t out[DEVX_ST_SZ_DW(general_obj_out_cmd_hdr)] = {}; | 
|  | uint32_t in[DEVX_ST_SZ_DW(create_reserved_qpn_in)] = {}; | 
|  | struct reserved_qpn_blk *blk; | 
|  | void *attr; | 
|  |  | 
|  | blk = calloc(1, sizeof(*blk)); | 
|  | if (!blk) { | 
|  | errno = ENOMEM; | 
|  | return NULL; | 
|  | } | 
|  |  | 
|  | blk->bmp = bitmap_alloc0(1 << mctx->hca_cap_2_caps.log_reserved_qpns_per_obj); | 
|  | if (!blk->bmp) { | 
|  | errno = ENOMEM; | 
|  | goto bmp_alloc_fail; | 
|  | } | 
|  |  | 
|  | attr = DEVX_ADDR_OF(create_reserved_qpn_in, in, hdr); | 
|  | DEVX_SET(general_obj_in_cmd_hdr, | 
|  | attr, opcode, MLX5_CMD_OP_CREATE_GENERAL_OBJECT); | 
|  | DEVX_SET(general_obj_in_cmd_hdr, | 
|  | attr, obj_type, MLX5_OBJ_TYPE_RESERVED_QPN); | 
|  | DEVX_SET(general_obj_in_cmd_hdr, | 
|  | attr, log_obj_range, mctx->hca_cap_2_caps.log_reserved_qpns_per_obj); | 
|  |  | 
|  | blk->obj = mlx5dv_devx_obj_create(&mctx->ibv_ctx.context, | 
|  | in, sizeof(in), out, sizeof(out)); | 
|  | if (!blk->obj) { | 
|  | errno = mlx5_get_cmd_status_err(errno, out); | 
|  | goto obj_alloc_fail; | 
|  | } | 
|  |  | 
|  | blk->first_qpn = blk->obj->object_id; | 
|  | blk->next_avail_slot = 0; | 
|  |  | 
|  | return blk; | 
|  |  | 
|  | obj_alloc_fail: | 
|  | free(blk->bmp); | 
|  |  | 
|  | bmp_alloc_fail: | 
|  | free(blk); | 
|  | return NULL; | 
|  | } | 
|  |  | 
|  | static void reserved_qpn_blk_dealloc(struct reserved_qpn_blk *blk) | 
|  | { | 
|  | if (mlx5dv_devx_obj_destroy(blk->obj)) | 
|  | assert(false); | 
|  |  | 
|  | free(blk->bmp); | 
|  | free(blk); | 
|  | } | 
|  |  | 
|  | static void reserved_qpn_blks_free(struct mlx5_context *mctx) | 
|  | { | 
|  | struct reserved_qpn_blk *blk, *tmp; | 
|  |  | 
|  | pthread_mutex_lock(&mctx->reserved_qpns.mutex); | 
|  |  | 
|  | list_for_each_safe(&mctx->reserved_qpns.blk_list, | 
|  | blk, tmp, entry) { | 
|  | list_del(&blk->entry); | 
|  | reserved_qpn_blk_dealloc(blk); | 
|  | } | 
|  |  | 
|  | pthread_mutex_unlock(&mctx->reserved_qpns.mutex); | 
|  | } | 
|  |  | 
|  | /** | 
|  | * Allocate a reserved QPN either from the last FW object allocated, | 
|  | * or by allocating a new one. When find a free QPN in an object, it | 
|  | * always starts from last allocation position, to make sure the QPN | 
|  | * always move forward to prevent stale QPN. | 
|  | */ | 
|  | static int _mlx5dv_reserved_qpn_alloc(struct ibv_context *ctx, uint32_t *qpn) | 
|  | { | 
|  | struct mlx5_context *mctx = to_mctx(ctx); | 
|  | struct reserved_qpn_blk *blk; | 
|  | uint32_t qpns_per_obj; | 
|  | int ret = 0; | 
|  |  | 
|  | if (!(mctx->general_obj_types_caps & (1ULL << MLX5_OBJ_TYPE_RESERVED_QPN))) | 
|  | return EOPNOTSUPP; | 
|  |  | 
|  | qpns_per_obj = 1 << mctx->hca_cap_2_caps.log_reserved_qpns_per_obj; | 
|  |  | 
|  | pthread_mutex_lock(&mctx->reserved_qpns.mutex); | 
|  |  | 
|  | blk = list_tail(&mctx->reserved_qpns.blk_list, | 
|  | struct reserved_qpn_blk, entry); | 
|  | if (!blk || | 
|  | (blk->next_avail_slot >= qpns_per_obj)) { | 
|  | blk = reserved_qpn_blk_alloc(mctx); | 
|  | if (!blk) { | 
|  | ret = errno; | 
|  | goto end; | 
|  | } | 
|  | list_add_tail(&mctx->reserved_qpns.blk_list, &blk->entry); | 
|  | } | 
|  |  | 
|  | *qpn = blk->first_qpn + blk->next_avail_slot; | 
|  | bitmap_set_bit(blk->bmp, blk->next_avail_slot); | 
|  | blk->next_avail_slot++; | 
|  |  | 
|  | end: | 
|  | pthread_mutex_unlock(&mctx->reserved_qpns.mutex); | 
|  | return ret; | 
|  | } | 
|  |  | 
|  | int mlx5dv_reserved_qpn_alloc(struct ibv_context *ctx, uint32_t *qpn) | 
|  | { | 
|  | struct mlx5_dv_context_ops *dvops = mlx5_get_dv_ops(ctx); | 
|  |  | 
|  | if (!dvops || !dvops->reserved_qpn_alloc) | 
|  | return EOPNOTSUPP; | 
|  |  | 
|  | return dvops->reserved_qpn_alloc(ctx, qpn); | 
|  | } | 
|  |  | 
|  | /** | 
|  | * Deallocate a reserved QPN. The FW object is destroyed only when all QPNs | 
|  | * in this object were used and freed. | 
|  | */ | 
|  | static int _mlx5dv_reserved_qpn_dealloc(struct ibv_context *ctx, uint32_t qpn) | 
|  | { | 
|  | struct mlx5_context *mctx = to_mctx(ctx); | 
|  | struct reserved_qpn_blk *blk, *tmp; | 
|  | uint32_t qpns_per_obj; | 
|  | bool found = false; | 
|  | int ret = 0; | 
|  |  | 
|  | qpns_per_obj = 1 << mctx->hca_cap_2_caps.log_reserved_qpns_per_obj; | 
|  |  | 
|  | pthread_mutex_lock(&mctx->reserved_qpns.mutex); | 
|  |  | 
|  | list_for_each_safe(&mctx->reserved_qpns.blk_list, | 
|  | blk, tmp, entry) { | 
|  | if ((qpn >= blk->first_qpn) && | 
|  | (qpn < blk->first_qpn + qpns_per_obj)) { | 
|  | found = true; | 
|  | break; | 
|  | } | 
|  | } | 
|  |  | 
|  | if (!found || !bitmap_test_bit(blk->bmp, qpn - blk->first_qpn)) { | 
|  | errno = EINVAL; | 
|  | ret = errno; | 
|  | goto end; | 
|  | } | 
|  |  | 
|  | bitmap_clear_bit(blk->bmp, qpn - blk->first_qpn); | 
|  | if ((blk->next_avail_slot >= qpns_per_obj) && | 
|  | (bitmap_empty(blk->bmp, qpns_per_obj))) { | 
|  | list_del(&blk->entry); | 
|  | reserved_qpn_blk_dealloc(blk); | 
|  | } | 
|  |  | 
|  | end: | 
|  | pthread_mutex_unlock(&mctx->reserved_qpns.mutex); | 
|  | return ret; | 
|  | } | 
|  |  | 
|  | int mlx5dv_reserved_qpn_dealloc(struct ibv_context *ctx, uint32_t qpn) | 
|  | { | 
|  | struct mlx5_dv_context_ops *dvops = mlx5_get_dv_ops(ctx); | 
|  |  | 
|  | if (!dvops || !dvops->reserved_qpn_dealloc) | 
|  | return EOPNOTSUPP; | 
|  |  | 
|  | return dvops->reserved_qpn_dealloc(ctx, qpn); | 
|  | } | 
|  |  | 
|  | static int _mlx5dv_init_obj(struct mlx5dv_obj *obj, uint64_t obj_type) | 
|  | { | 
|  | int ret = 0; | 
|  |  | 
|  | if (obj_type & MLX5DV_OBJ_QP) | 
|  | ret = mlx5dv_get_qp(obj->qp.in, obj->qp.out); | 
|  | if (!ret && (obj_type & MLX5DV_OBJ_CQ)) | 
|  | ret = mlx5dv_get_cq(obj->cq.in, obj->cq.out); | 
|  | if (!ret && (obj_type & MLX5DV_OBJ_SRQ)) | 
|  | ret = mlx5dv_get_srq(obj->srq.in, obj->srq.out); | 
|  | if (!ret && (obj_type & MLX5DV_OBJ_RWQ)) | 
|  | ret = mlx5dv_get_rwq(obj->rwq.in, obj->rwq.out); | 
|  | if (!ret && (obj_type & MLX5DV_OBJ_DM)) | 
|  | ret = mlx5dv_get_dm(obj->dm.in, obj->dm.out); | 
|  | if (!ret && (obj_type & MLX5DV_OBJ_AH)) | 
|  | ret = mlx5dv_get_av(obj->ah.in, obj->ah.out); | 
|  | if (!ret && (obj_type & MLX5DV_OBJ_PD)) | 
|  | ret = mlx5dv_get_pd(obj->pd.in, obj->pd.out); | 
|  |  | 
|  | return ret; | 
|  | } | 
|  |  | 
|  | static struct ibv_context * | 
|  | get_context_from_obj(struct mlx5dv_obj *obj, uint64_t obj_type) | 
|  | { | 
|  | if (obj_type & MLX5DV_OBJ_QP) | 
|  | return obj->qp.in->context; | 
|  | if (obj_type & MLX5DV_OBJ_CQ) | 
|  | return obj->cq.in->context; | 
|  | if (obj_type & MLX5DV_OBJ_SRQ) | 
|  | return obj->srq.in->context; | 
|  | if (obj_type & MLX5DV_OBJ_RWQ) | 
|  | return obj->rwq.in->context; | 
|  | if (obj_type & MLX5DV_OBJ_DM) | 
|  | return obj->dm.in->context; | 
|  | if (obj_type & MLX5DV_OBJ_AH) | 
|  | return obj->ah.in->context; | 
|  | if (obj_type & MLX5DV_OBJ_PD) | 
|  | return obj->pd.in->context; | 
|  |  | 
|  | return NULL; | 
|  | } | 
|  |  | 
|  | LATEST_SYMVER_FUNC(mlx5dv_init_obj, 1_2, "MLX5_1.2", | 
|  | int, | 
|  | struct mlx5dv_obj *obj, uint64_t obj_type) | 
|  | { | 
|  | struct mlx5_dv_context_ops *dvops; | 
|  | struct ibv_context *ctx; | 
|  |  | 
|  | ctx = get_context_from_obj(obj, obj_type); | 
|  | if (!ctx) | 
|  | return EINVAL; | 
|  |  | 
|  | dvops = mlx5_get_dv_ops(ctx); | 
|  |  | 
|  | if (!dvops || !dvops->init_obj) | 
|  | return EOPNOTSUPP; | 
|  |  | 
|  | return dvops->init_obj(obj, obj_type); | 
|  | } | 
|  |  | 
|  | COMPAT_SYMVER_FUNC(mlx5dv_init_obj, 1_0, "MLX5_1.0", | 
|  | int, | 
|  | struct mlx5dv_obj *obj, uint64_t obj_type) | 
|  | { | 
|  | int ret = 0; | 
|  |  | 
|  | ret = __mlx5dv_init_obj_1_2(obj, obj_type); | 
|  | if (!ret && (obj_type & MLX5DV_OBJ_CQ)) { | 
|  | /* ABI version 1.0 returns the void ** in this memory | 
|  | * location | 
|  | */ | 
|  | obj->cq.out->cq_uar = &(to_mctx(obj->cq.in->context)->cq_uar_reg); | 
|  | } | 
|  | return ret; | 
|  | } | 
|  |  | 
|  | off_t get_uar_mmap_offset(int idx, int page_size, int command) | 
|  | { | 
|  | off_t offset = 0; | 
|  |  | 
|  | set_command(command, &offset); | 
|  |  | 
|  | if (command == MLX5_IB_MMAP_ALLOC_WC && | 
|  | idx >= (1 << MLX5_IB_MMAP_CMD_SHIFT)) | 
|  | set_extended_index(idx, &offset); | 
|  | else | 
|  | set_index(idx, &offset); | 
|  |  | 
|  | return offset * page_size; | 
|  | } | 
|  |  | 
|  | static off_t uar_type_to_cmd(int uar_type) | 
|  | { | 
|  | return (uar_type == MLX5_UAR_TYPE_NC) ? MLX5_MMAP_GET_NC_PAGES_CMD : | 
|  | MLX5_MMAP_GET_REGULAR_PAGES_CMD; | 
|  | } | 
|  |  | 
|  | void *mlx5_mmap(struct mlx5_uar_info *uar, int index, int cmd_fd, int page_size, | 
|  | int uar_type) | 
|  | { | 
|  | off_t offset; | 
|  |  | 
|  | if (uar_type == MLX5_UAR_TYPE_NC) { | 
|  | offset = get_uar_mmap_offset(index, page_size, | 
|  | MLX5_MMAP_GET_NC_PAGES_CMD); | 
|  | uar->reg = mmap(NULL, page_size, PROT_WRITE, MAP_SHARED, | 
|  | cmd_fd, offset); | 
|  | if (uar->reg != MAP_FAILED) { | 
|  | uar->type = MLX5_UAR_TYPE_NC; | 
|  | goto out; | 
|  | } | 
|  | } | 
|  |  | 
|  | /* Backward compatibility for legacy kernels that don't support | 
|  | * MLX5_MMAP_GET_NC_PAGES_CMD mmap command. | 
|  | */ | 
|  | offset = get_uar_mmap_offset(index, page_size, | 
|  | (uar_type == MLX5_UAR_TYPE_REGULAR_DYN) ? | 
|  | MLX5_IB_MMAP_ALLOC_WC : | 
|  | MLX5_MMAP_GET_REGULAR_PAGES_CMD); | 
|  | uar->reg = mmap(NULL, page_size, PROT_WRITE, MAP_SHARED, | 
|  | cmd_fd, offset); | 
|  | if (uar->reg != MAP_FAILED) | 
|  | uar->type = MLX5_UAR_TYPE_REGULAR; | 
|  |  | 
|  | out: | 
|  | return uar->reg; | 
|  | } | 
|  |  | 
|  | static int _mlx5dv_set_context_attr(struct ibv_context *ibv_ctx, | 
|  | enum mlx5dv_set_ctx_attr_type type, | 
|  | void *attr) | 
|  | { | 
|  | struct mlx5_context *ctx = to_mctx(ibv_ctx); | 
|  |  | 
|  | switch (type) { | 
|  | case MLX5DV_CTX_ATTR_BUF_ALLOCATORS: | 
|  | ctx->extern_alloc = *((struct mlx5dv_ctx_allocators *)attr); | 
|  | break; | 
|  | default: | 
|  | return ENOTSUP; | 
|  | } | 
|  |  | 
|  | return 0; | 
|  | } | 
|  |  | 
|  | int mlx5dv_set_context_attr(struct ibv_context *ibv_ctx, | 
|  | enum mlx5dv_set_ctx_attr_type type, void *attr) | 
|  | { | 
|  | struct mlx5_dv_context_ops *dvops = mlx5_get_dv_ops(ibv_ctx); | 
|  |  | 
|  | if (!dvops || !dvops->set_context_attr) | 
|  | return EOPNOTSUPP; | 
|  |  | 
|  | return dvops->set_context_attr(ibv_ctx, type, attr); | 
|  | } | 
|  |  | 
|  | static int _mlx5dv_get_clock_info(struct ibv_context *ctx_in, | 
|  | struct mlx5dv_clock_info *clock_info) | 
|  | { | 
|  | struct mlx5_context *ctx = to_mctx(ctx_in); | 
|  | const struct mlx5_ib_clock_info *ci; | 
|  | uint32_t retry, tmp_sig; | 
|  | atomic_uint32_t *sig; | 
|  |  | 
|  | if (!is_mlx5_dev(ctx_in->device)) | 
|  | return EOPNOTSUPP; | 
|  |  | 
|  | ci = ctx->clock_info_page; | 
|  |  | 
|  | if (!ci) | 
|  | return EINVAL; | 
|  |  | 
|  | sig = (atomic_uint32_t *)&ci->sign; | 
|  |  | 
|  | do { | 
|  | retry = 10; | 
|  | repeat: | 
|  | tmp_sig = atomic_load(sig); | 
|  | if (unlikely(tmp_sig & | 
|  | MLX5_IB_CLOCK_INFO_KERNEL_UPDATING)) { | 
|  | if (--retry) | 
|  | goto repeat; | 
|  | return EBUSY; | 
|  | } | 
|  | clock_info->nsec   = ci->nsec; | 
|  | clock_info->last_cycles = ci->cycles; | 
|  | clock_info->frac   = ci->frac; | 
|  | clock_info->mult   = ci->mult; | 
|  | clock_info->shift  = ci->shift; | 
|  | clock_info->mask   = ci->mask; | 
|  | } while (unlikely(tmp_sig != atomic_load(sig))); | 
|  |  | 
|  | return 0; | 
|  | } | 
|  |  | 
|  | int mlx5dv_get_clock_info(struct ibv_context *ctx_in, | 
|  | struct mlx5dv_clock_info *clock_info) | 
|  | { | 
|  | struct mlx5_dv_context_ops *dvops = mlx5_get_dv_ops(ctx_in); | 
|  |  | 
|  | if (!dvops || !dvops->get_clock_info) | 
|  | return EOPNOTSUPP; | 
|  |  | 
|  | return dvops->get_clock_info(ctx_in, clock_info); | 
|  | } | 
|  |  | 
|  | static struct mlx5_dv_context_ops mlx5_dv_ctx_ops = { | 
|  | .query_device = _mlx5dv_query_device, | 
|  |  | 
|  | .query_qp_lag_port = _mlx5dv_query_qp_lag_port, | 
|  | .modify_qp_lag_port = _mlx5dv_modify_qp_lag_port, | 
|  |  | 
|  | .modify_qp_udp_sport = _mlx5dv_modify_qp_udp_sport, | 
|  |  | 
|  | .sched_node_create = _mlx5dv_sched_node_create, | 
|  | .sched_leaf_create = _mlx5dv_sched_leaf_create, | 
|  | .sched_node_modify = _mlx5dv_sched_node_modify, | 
|  | .sched_leaf_modify = _mlx5dv_sched_leaf_modify, | 
|  | .sched_node_destroy = _mlx5dv_sched_node_destroy, | 
|  | .sched_leaf_destroy = _mlx5dv_sched_leaf_destroy, | 
|  | .modify_qp_sched_elem = _mlx5dv_modify_qp_sched_elem, | 
|  |  | 
|  | .reserved_qpn_alloc = _mlx5dv_reserved_qpn_alloc, | 
|  | .reserved_qpn_dealloc = _mlx5dv_reserved_qpn_dealloc, | 
|  |  | 
|  | .set_context_attr = _mlx5dv_set_context_attr, | 
|  | .get_clock_info = _mlx5dv_get_clock_info, | 
|  | .init_obj = _mlx5dv_init_obj, | 
|  | }; | 
|  |  | 
|  | static void adjust_uar_info(struct mlx5_device *mdev, | 
|  | struct mlx5_context *context, | 
|  | struct mlx5_ib_alloc_ucontext_resp *resp) | 
|  | { | 
|  | if (!resp->log_uar_size && !resp->num_uars_per_page) { | 
|  | /* old kernel */ | 
|  | context->uar_size = mdev->page_size; | 
|  | context->num_uars_per_page = 1; | 
|  | return; | 
|  | } | 
|  |  | 
|  | context->uar_size = 1 << resp->log_uar_size; | 
|  | context->num_uars_per_page = resp->num_uars_per_page; | 
|  | } | 
|  |  | 
|  | bool mlx5dv_is_supported(struct ibv_device *device) | 
|  | { | 
|  | return is_mlx5_dev(device); | 
|  | } | 
|  |  | 
|  | struct ibv_context * | 
|  | mlx5dv_open_device(struct ibv_device *device, struct mlx5dv_context_attr *attr) | 
|  | { | 
|  | if (!is_mlx5_dev(device)) { | 
|  | errno = EOPNOTSUPP; | 
|  | return NULL; | 
|  | } | 
|  |  | 
|  | return verbs_open_device(device, attr); | 
|  | } | 
|  |  | 
|  | static int get_uar_info(struct mlx5_device *mdev, | 
|  | int *tot_uuars, int *low_lat_uuars) | 
|  | { | 
|  | *tot_uuars = get_total_uuars(mdev->page_size); | 
|  | if (*tot_uuars < 0) { | 
|  | errno = -*tot_uuars; | 
|  | return -1; | 
|  | } | 
|  |  | 
|  | *low_lat_uuars = get_num_low_lat_uuars(*tot_uuars); | 
|  | if (*low_lat_uuars < 0) { | 
|  | errno = -*low_lat_uuars; | 
|  | return -1; | 
|  | } | 
|  |  | 
|  | if (*low_lat_uuars > *tot_uuars - 1) { | 
|  | errno = ENOMEM; | 
|  | return -1; | 
|  | } | 
|  |  | 
|  | return 0; | 
|  | } | 
|  |  | 
|  | static void mlx5_uninit_context(struct mlx5_context *context) | 
|  | { | 
|  | mlx5_close_debug_file(context->dbg_fp); | 
|  |  | 
|  | verbs_uninit_context(&context->ibv_ctx); | 
|  | free(context); | 
|  | } | 
|  |  | 
|  | static struct mlx5_context *mlx5_init_context(struct ibv_device *ibdev, | 
|  | int cmd_fd) | 
|  | { | 
|  | struct mlx5_device *mdev = to_mdev(ibdev); | 
|  | struct mlx5_context *context; | 
|  | int low_lat_uuars; | 
|  | int tot_uuars; | 
|  | int ret; | 
|  |  | 
|  | context = verbs_init_and_alloc_context(ibdev, cmd_fd, context, ibv_ctx, | 
|  | RDMA_DRIVER_MLX5); | 
|  | if (!context) | 
|  | return NULL; | 
|  |  | 
|  | mlx5_open_debug_file(&context->dbg_fp); | 
|  | mlx5_set_debug_mask(); | 
|  | set_freeze_on_error(); | 
|  | if (gethostname(context->hostname, sizeof(context->hostname))) | 
|  | strcpy(context->hostname, "host_unknown"); | 
|  |  | 
|  | mlx5_single_threaded = single_threaded_app(); | 
|  |  | 
|  | ret = get_uar_info(mdev, &tot_uuars, &low_lat_uuars); | 
|  | if (ret) { | 
|  | mlx5_uninit_context(context); | 
|  | return NULL; | 
|  | } | 
|  | context->tot_uuars = tot_uuars; | 
|  | context->low_lat_uuars = low_lat_uuars; | 
|  |  | 
|  | return context; | 
|  | } | 
|  |  | 
|  | static int mlx5_set_context(struct mlx5_context *context, | 
|  | struct mlx5_ib_alloc_ucontext_resp *resp, | 
|  | bool is_import) | 
|  | { | 
|  | struct verbs_context *v_ctx = &context->ibv_ctx; | 
|  | struct ibv_port_attr port_attr = {}; | 
|  | int cmd_fd = v_ctx->context.cmd_fd; | 
|  | struct mlx5_device *mdev = to_mdev(v_ctx->context.device); | 
|  | struct ibv_device *ibdev = v_ctx->context.device; | 
|  | int page_size = mdev->page_size; | 
|  | int num_sys_page_map; | 
|  | int gross_uuars; | 
|  | int bfi; | 
|  | int i, k, j; | 
|  |  | 
|  | context->max_num_qps = resp->qp_tab_size; | 
|  | context->bf_reg_size = resp->bf_reg_size; | 
|  | context->cache_line_size = resp->cache_line_size; | 
|  | context->max_sq_desc_sz = resp->max_sq_desc_sz; | 
|  | context->max_rq_desc_sz = resp->max_rq_desc_sz; | 
|  | context->max_send_wqebb	= resp->max_send_wqebb; | 
|  | context->num_ports = resp->num_ports; | 
|  | context->max_recv_wr = resp->max_recv_wr; | 
|  | context->max_srq_recv_wr = resp->max_srq_recv_wr; | 
|  | context->num_dyn_bfregs = resp->num_dyn_bfregs; | 
|  |  | 
|  | if (resp->comp_mask & MLX5_IB_ALLOC_UCONTEXT_RESP_MASK_ECE) | 
|  | context->flags |= MLX5_CTX_FLAGS_ECE_SUPPORTED; | 
|  |  | 
|  | if (resp->comp_mask & MLX5_IB_ALLOC_UCONTEXT_RESP_MASK_SQD2RTS) | 
|  | context->flags |= MLX5_CTX_FLAGS_SQD2RTS_SUPPORTED; | 
|  |  | 
|  | if (resp->comp_mask & MLX5_IB_ALLOC_UCONTEXT_RESP_MASK_REAL_TIME_TS) | 
|  | context->flags |= MLX5_CTX_FLAGS_REAL_TIME_TS_SUPPORTED; | 
|  |  | 
|  | if (resp->comp_mask & MLX5_IB_ALLOC_UCONTEXT_RESP_MASK_DUMP_FILL_MKEY) { | 
|  | context->dump_fill_mkey = resp->dump_fill_mkey; | 
|  | /* Have the BE value ready to be used in data path */ | 
|  | context->dump_fill_mkey_be = htobe32(resp->dump_fill_mkey); | 
|  | } else { | 
|  | /* kernel driver will never return MLX5_INVALID_LKEY for | 
|  | * dump_fill_mkey | 
|  | */ | 
|  | context->dump_fill_mkey = MLX5_INVALID_LKEY; | 
|  | context->dump_fill_mkey_be = htobe32(MLX5_INVALID_LKEY); | 
|  | } | 
|  |  | 
|  | context->cqe_version = resp->cqe_version; | 
|  | adjust_uar_info(mdev, context, resp); | 
|  |  | 
|  | context->cmds_supp_uhw = resp->cmds_supp_uhw; | 
|  | context->vendor_cap_flags = 0; | 
|  | list_head_init(&context->dyn_uar_bf_list); | 
|  | list_head_init(&context->dyn_uar_qp_shared_list); | 
|  | list_head_init(&context->dyn_uar_qp_dedicated_list); | 
|  |  | 
|  | if (resp->eth_min_inline) | 
|  | context->eth_min_inline_size = (resp->eth_min_inline == MLX5_USER_INLINE_MODE_NONE) ? | 
|  | 0 : MLX5_ETH_L2_INLINE_HEADER_SIZE; | 
|  | else | 
|  | context->eth_min_inline_size = MLX5_ETH_L2_INLINE_HEADER_SIZE; | 
|  |  | 
|  | pthread_mutex_init(&context->qp_table_mutex, NULL); | 
|  | pthread_mutex_init(&context->srq_table_mutex, NULL); | 
|  | pthread_mutex_init(&context->uidx_table_mutex, NULL); | 
|  | pthread_mutex_init(&context->mkey_table_mutex, NULL); | 
|  | pthread_mutex_init(&context->dyn_bfregs_mutex, NULL); | 
|  | pthread_mutex_init(&context->crypto_login_mutex, NULL); | 
|  | for (i = 0; i < MLX5_QP_TABLE_SIZE; ++i) | 
|  | context->qp_table[i].refcnt = 0; | 
|  |  | 
|  | for (i = 0; i < MLX5_QP_TABLE_SIZE; ++i) | 
|  | context->uidx_table[i].refcnt = 0; | 
|  |  | 
|  | for (i = 0; i < MLX5_MKEY_TABLE_SIZE; ++i) | 
|  | context->mkey_table[i].refcnt = 0; | 
|  |  | 
|  | list_head_init(&context->dbr_available_pages); | 
|  | cl_qmap_init(&context->dbr_map); | 
|  |  | 
|  | pthread_mutex_init(&context->dbr_map_mutex, NULL); | 
|  |  | 
|  | context->prefer_bf = get_always_bf(); | 
|  | context->shut_up_bf = get_shut_up_bf(); | 
|  |  | 
|  | if (resp->tot_bfregs) { | 
|  | if (is_import) { | 
|  | errno = EINVAL; | 
|  | return EINVAL; | 
|  | } | 
|  | context->tot_uuars = resp->tot_bfregs; | 
|  | gross_uuars = context->tot_uuars / MLX5_NUM_NON_FP_BFREGS_PER_UAR * NUM_BFREGS_PER_UAR; | 
|  | context->bfs = calloc(gross_uuars, sizeof(*context->bfs)); | 
|  | if (!context->bfs) { | 
|  | errno = ENOMEM; | 
|  | goto err_free; | 
|  | } | 
|  | context->flags |= MLX5_CTX_FLAGS_NO_KERN_DYN_UAR; | 
|  | } else { | 
|  | context->qp_max_dedicated_uuars = context->low_lat_uuars; | 
|  | context->qp_max_shared_uuars = context->tot_uuars - context->low_lat_uuars; | 
|  | goto bf_done; | 
|  | } | 
|  |  | 
|  | context->max_num_legacy_dyn_uar_sys_page = context->num_dyn_bfregs / | 
|  | (context->num_uars_per_page * MLX5_NUM_NON_FP_BFREGS_PER_UAR); | 
|  | num_sys_page_map = context->tot_uuars / (context->num_uars_per_page * MLX5_NUM_NON_FP_BFREGS_PER_UAR); | 
|  | for (i = 0; i < num_sys_page_map; ++i) { | 
|  | if (mlx5_mmap(&context->uar[i], i, cmd_fd, page_size, | 
|  | context->shut_up_bf ? MLX5_UAR_TYPE_NC : | 
|  | MLX5_UAR_TYPE_REGULAR) == MAP_FAILED) { | 
|  | context->uar[i].reg = NULL; | 
|  | goto err_free_bf; | 
|  | } | 
|  | } | 
|  |  | 
|  | for (i = 0; i < num_sys_page_map; i++) { | 
|  | for (j = 0; j < context->num_uars_per_page; j++) { | 
|  | for (k = 0; k < NUM_BFREGS_PER_UAR; k++) { | 
|  | bfi = (i * context->num_uars_per_page + j) * NUM_BFREGS_PER_UAR + k; | 
|  | context->bfs[bfi].reg = context->uar[i].reg + MLX5_ADAPTER_PAGE_SIZE * j + | 
|  | MLX5_BF_OFFSET + k * context->bf_reg_size; | 
|  | context->bfs[bfi].need_lock = need_uuar_lock(context, bfi); | 
|  | mlx5_spinlock_init(&context->bfs[bfi].lock, context->bfs[bfi].need_lock); | 
|  | context->bfs[bfi].offset = 0; | 
|  | if (bfi) | 
|  | context->bfs[bfi].buf_size = context->bf_reg_size / 2; | 
|  | context->bfs[bfi].uuarn = bfi; | 
|  | context->bfs[bfi].uar_mmap_offset = | 
|  | get_uar_mmap_offset(i, page_size, | 
|  | uar_type_to_cmd(context->uar[i].type)); | 
|  | } | 
|  | } | 
|  | } | 
|  |  | 
|  | bf_done: | 
|  |  | 
|  | context->hca_core_clock = NULL; | 
|  | if (resp->comp_mask & MLX5_IB_ALLOC_UCONTEXT_RESP_MASK_CORE_CLOCK_OFFSET) { | 
|  | context->core_clock.offset = resp->hca_core_clock_offset; | 
|  | mlx5_map_internal_clock(mdev, &v_ctx->context); | 
|  | } | 
|  |  | 
|  | context->clock_info_page = NULL; | 
|  | if ((resp->clock_info_versions & (1 << MLX5_IB_CLOCK_INFO_V1))) | 
|  | mlx5_map_clock_info(mdev, &v_ctx->context); | 
|  |  | 
|  | context->flow_action_flags = resp->flow_action_flags; | 
|  |  | 
|  | mlx5_read_env(ibdev, context); | 
|  |  | 
|  | mlx5_spinlock_init(&context->hugetlb_lock, !mlx5_single_threaded); | 
|  | list_head_init(&context->hugetlb_list); | 
|  |  | 
|  | verbs_set_ops(v_ctx, &mlx5_ctx_common_ops); | 
|  | if (context->cqe_version) { | 
|  | if (context->cqe_version == MLX5_CQE_VERSION_V1) | 
|  | verbs_set_ops(v_ctx, &mlx5_ctx_cqev1_ops); | 
|  | else | 
|  | goto err_free; | 
|  | } | 
|  | context->dv_ctx_ops = &mlx5_dv_ctx_ops; | 
|  |  | 
|  | mlx5_query_device_ctx(context); | 
|  |  | 
|  | for (j = 0; j < min(MLX5_MAX_PORTS_NUM, context->num_ports); ++j) { | 
|  | memset(&port_attr, 0, sizeof(port_attr)); | 
|  | if (!mlx5_query_port(&v_ctx->context, j + 1, &port_attr)) { | 
|  | context->cached_link_layer[j] = port_attr.link_layer; | 
|  | context->cached_port_flags[j] = port_attr.flags; | 
|  | } | 
|  | } | 
|  |  | 
|  | mlx5_set_singleton_nc_uar(&v_ctx->context); | 
|  | context->cq_uar_reg = context->nc_uar ? context->nc_uar->uar : context->uar[0].reg; | 
|  |  | 
|  | pthread_mutex_init(&context->reserved_qpns.mutex, NULL); | 
|  | list_head_init(&context->reserved_qpns.blk_list); | 
|  |  | 
|  | return 0; | 
|  |  | 
|  | err_free_bf: | 
|  | free(context->bfs); | 
|  |  | 
|  | err_free: | 
|  | for (i = 0; i < MLX5_MAX_UARS; ++i) { | 
|  | if (context->uar[i].reg) | 
|  | munmap(context->uar[i].reg, page_size); | 
|  | } | 
|  |  | 
|  | return -1; | 
|  | } | 
|  |  | 
|  | static struct verbs_context *mlx5_alloc_context(struct ibv_device *ibdev, | 
|  | int cmd_fd, | 
|  | void *private_data) | 
|  | { | 
|  | struct mlx5_context	       *context; | 
|  | struct mlx5_alloc_ucontext	req = {}; | 
|  | struct mlx5_alloc_ucontext_resp resp = {}; | 
|  | struct mlx5dv_context_attr      *ctx_attr = private_data; | 
|  | bool				always_devx = false; | 
|  | int ret; | 
|  |  | 
|  | context = mlx5_init_context(ibdev, cmd_fd); | 
|  | if (!context) | 
|  | return NULL; | 
|  |  | 
|  | if (ctx_attr && ctx_attr->comp_mask) { | 
|  | errno = EINVAL; | 
|  | goto err; | 
|  | } | 
|  |  | 
|  | req.total_num_bfregs = context->tot_uuars; | 
|  | req.num_low_latency_bfregs = context->low_lat_uuars; | 
|  | req.max_cqe_version = MLX5_CQE_VERSION_V1; | 
|  | req.lib_caps |= (MLX5_LIB_CAP_4K_UAR | MLX5_LIB_CAP_DYN_UAR); | 
|  | if (ctx_attr && ctx_attr->flags) { | 
|  |  | 
|  | if (!check_comp_mask(ctx_attr->flags, | 
|  | MLX5DV_CONTEXT_FLAGS_DEVX)) { | 
|  | errno = EINVAL; | 
|  | goto err; | 
|  | } | 
|  |  | 
|  | req.flags = MLX5_IB_ALLOC_UCTX_DEVX; | 
|  | } else { | 
|  | req.flags = MLX5_IB_ALLOC_UCTX_DEVX; | 
|  | always_devx = true; | 
|  | } | 
|  |  | 
|  | retry_open: | 
|  | if (mlx5_cmd_get_context(context, &req, sizeof(req), &resp, | 
|  | sizeof(resp))) { | 
|  | if (always_devx) { | 
|  | req.flags &= ~MLX5_IB_ALLOC_UCTX_DEVX; | 
|  | always_devx = false; | 
|  | memset(&resp, 0, sizeof(resp)); | 
|  | goto retry_open; | 
|  | } else { | 
|  | goto err; | 
|  | } | 
|  | } | 
|  |  | 
|  | ret = mlx5_set_context(context, &resp.drv_payload, false); | 
|  | if (ret) | 
|  | goto err; | 
|  |  | 
|  | return &context->ibv_ctx; | 
|  |  | 
|  | err: | 
|  | mlx5_uninit_context(context); | 
|  | return NULL; | 
|  | } | 
|  |  | 
|  | static struct verbs_context *mlx5_import_context(struct ibv_device *ibdev, | 
|  | int cmd_fd) | 
|  |  | 
|  | { | 
|  | struct mlx5_ib_alloc_ucontext_resp resp = {}; | 
|  | DECLARE_COMMAND_BUFFER_LINK(driver_attr, UVERBS_OBJECT_DEVICE, | 
|  | UVERBS_METHOD_QUERY_CONTEXT, 1, | 
|  | NULL); | 
|  | struct ibv_context *context; | 
|  | struct mlx5_context *mctx; | 
|  | int ret; | 
|  |  | 
|  | mctx = mlx5_init_context(ibdev, cmd_fd); | 
|  | if (!mctx) | 
|  | return NULL; | 
|  |  | 
|  | context = &mctx->ibv_ctx.context; | 
|  |  | 
|  | fill_attr_out_ptr(driver_attr, MLX5_IB_ATTR_QUERY_CONTEXT_RESP_UCTX, &resp); | 
|  | ret = ibv_cmd_query_context(context, driver_attr); | 
|  | if (ret) | 
|  | goto err; | 
|  |  | 
|  | ret = mlx5_set_context(mctx, &resp, true); | 
|  | if (ret) | 
|  | goto err; | 
|  |  | 
|  | return &mctx->ibv_ctx; | 
|  |  | 
|  | err: | 
|  | mlx5_uninit_context(mctx); | 
|  | return NULL; | 
|  | } | 
|  |  | 
|  | static void mlx5_free_context(struct ibv_context *ibctx) | 
|  | { | 
|  | struct mlx5_context *context = to_mctx(ibctx); | 
|  | int page_size = to_mdev(ibctx->device)->page_size; | 
|  | int i; | 
|  |  | 
|  | free(context->bfs); | 
|  | for (i = 0; i < MLX5_MAX_UARS; ++i) { | 
|  | if (context->uar[i].reg) | 
|  | munmap(context->uar[i].reg, page_size); | 
|  | } | 
|  | if (context->hca_core_clock) | 
|  | munmap(context->hca_core_clock - context->core_clock.offset, | 
|  | page_size); | 
|  | if (context->clock_info_page) | 
|  | munmap((void *)context->clock_info_page, page_size); | 
|  | mlx5_close_debug_file(context->dbg_fp); | 
|  | clean_dyn_uars(ibctx); | 
|  | reserved_qpn_blks_free(context); | 
|  |  | 
|  | verbs_uninit_context(&context->ibv_ctx); | 
|  | free(context); | 
|  | } | 
|  |  | 
|  | static void mlx5_uninit_device(struct verbs_device *verbs_device) | 
|  | { | 
|  | struct mlx5_device *dev = to_mdev(&verbs_device->device); | 
|  |  | 
|  | free(dev); | 
|  | } | 
|  |  | 
|  | static struct verbs_device *mlx5_device_alloc(struct verbs_sysfs_dev *sysfs_dev) | 
|  | { | 
|  | struct mlx5_device *dev; | 
|  |  | 
|  | dev = calloc(1, sizeof *dev); | 
|  | if (!dev) | 
|  | return NULL; | 
|  |  | 
|  | dev->page_size   = sysconf(_SC_PAGESIZE); | 
|  | dev->driver_abi_ver = sysfs_dev->abi_ver; | 
|  |  | 
|  | mlx5_set_dv_ctx_ops(&mlx5_dv_ctx_ops); | 
|  | return &dev->verbs_dev; | 
|  | } | 
|  |  | 
|  | static const struct verbs_device_ops mlx5_dev_ops = { | 
|  | .name = "mlx5", | 
|  | .match_min_abi_version = MLX5_UVERBS_MIN_ABI_VERSION, | 
|  | .match_max_abi_version = MLX5_UVERBS_MAX_ABI_VERSION, | 
|  | .match_table = mlx5_hca_table, | 
|  | .alloc_device = mlx5_device_alloc, | 
|  | .uninit_device = mlx5_uninit_device, | 
|  | .alloc_context = mlx5_alloc_context, | 
|  | .import_context = mlx5_import_context, | 
|  | }; | 
|  |  | 
|  | static bool is_mlx5_dev(struct ibv_device *device) | 
|  | { | 
|  | struct verbs_device *verbs_device = verbs_get_device(device); | 
|  |  | 
|  | return verbs_device->ops == &mlx5_dev_ops; | 
|  | } | 
|  |  | 
|  | struct mlx5_dv_context_ops *mlx5_get_dv_ops(struct ibv_context *ibctx) | 
|  | { | 
|  | if (is_mlx5_dev(ibctx->device)) | 
|  | return to_mctx(ibctx)->dv_ctx_ops; | 
|  | else if (is_mlx5_vfio_dev(ibctx->device)) | 
|  | return to_mvfio_ctx(ibctx)->dv_ctx_ops; | 
|  | else | 
|  | return NULL; | 
|  | } | 
|  | PROVIDER_DRIVER(mlx5, mlx5_dev_ops); |