| // SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause |
| |
| // Authors: Bernard Metzler <bmt@zurich.ibm.com> |
| // Copyright (c) 2008-2019, IBM Corporation |
| |
| #include <stdio.h> |
| #include <stdlib.h> |
| #include <string.h> |
| #include <unistd.h> |
| #include <errno.h> |
| #include <sys/mman.h> |
| #include <net/if.h> |
| #include <pthread.h> |
| #include <stdatomic.h> |
| #include <assert.h> |
| |
| #include "siw_abi.h" |
| #include "siw.h" |
| |
| static void siw_free_context(struct ibv_context *ibv_ctx); |
| |
| static int siw_query_device(struct ibv_context *context, |
| const struct ibv_query_device_ex_input *input, |
| struct ibv_device_attr_ex *attr, size_t attr_size) |
| { |
| struct ib_uverbs_ex_query_device_resp resp; |
| size_t resp_size = sizeof(resp); |
| uint64_t raw_fw_ver; |
| unsigned int major, minor, sub_minor; |
| int rv; |
| |
| rv = ibv_cmd_query_device_any(context, input, attr, attr_size, &resp, |
| &resp_size); |
| if (rv) |
| return rv; |
| |
| raw_fw_ver = resp.base.fw_ver; |
| major = (raw_fw_ver >> 32) & 0xffff; |
| minor = (raw_fw_ver >> 16) & 0xffff; |
| sub_minor = raw_fw_ver & 0xffff; |
| |
| snprintf(attr->orig_attr.fw_ver, sizeof(attr->orig_attr.fw_ver), |
| "%d.%d.%d", major, minor, sub_minor); |
| |
| return 0; |
| } |
| |
| static int siw_query_port(struct ibv_context *ctx, uint8_t port, |
| struct ibv_port_attr *attr) |
| { |
| struct ibv_query_port cmd; |
| |
| memset(&cmd, 0, sizeof(cmd)); |
| |
| return ibv_cmd_query_port(ctx, port, attr, &cmd, sizeof(cmd)); |
| } |
| |
| static int siw_query_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr, |
| int attr_mask, struct ibv_qp_init_attr *init_attr) |
| { |
| struct ibv_query_qp cmd; |
| |
| memset(&cmd, 0, sizeof(cmd)); |
| |
| return ibv_cmd_query_qp(qp, attr, attr_mask, init_attr, &cmd, |
| sizeof(cmd)); |
| } |
| |
| static struct ibv_pd *siw_alloc_pd(struct ibv_context *ctx) |
| { |
| struct ibv_alloc_pd cmd; |
| struct ib_uverbs_alloc_pd_resp resp; |
| struct ibv_pd *pd; |
| |
| memset(&cmd, 0, sizeof(cmd)); |
| |
| pd = calloc(1, sizeof(*pd)); |
| if (!pd) |
| return NULL; |
| |
| if (ibv_cmd_alloc_pd(ctx, pd, &cmd, sizeof(cmd), &resp, sizeof(resp))) { |
| free(pd); |
| return NULL; |
| } |
| return pd; |
| } |
| |
| static int siw_free_pd(struct ibv_pd *pd) |
| { |
| int rv; |
| |
| rv = ibv_cmd_dealloc_pd(pd); |
| if (rv) |
| return rv; |
| |
| free(pd); |
| return 0; |
| } |
| |
| static struct ibv_mr *siw_reg_mr(struct ibv_pd *pd, void *addr, size_t len, |
| uint64_t hca_va, int access) |
| { |
| struct siw_cmd_reg_mr cmd = {}; |
| struct siw_cmd_reg_mr_resp resp = {}; |
| struct siw_mr *mr; |
| int rv; |
| |
| mr = calloc(1, sizeof(*mr)); |
| if (!mr) |
| return NULL; |
| |
| rv = ibv_cmd_reg_mr(pd, addr, len, hca_va, access, |
| &mr->base_mr, &cmd.ibv_cmd, sizeof(cmd), |
| &resp.ibv_resp, sizeof(resp)); |
| if (rv) { |
| free(mr); |
| return NULL; |
| } |
| return &mr->base_mr.ibv_mr; |
| } |
| |
| static int siw_dereg_mr(struct verbs_mr *base_mr) |
| { |
| struct siw_mr *mr = mr_base2siw(base_mr); |
| int rv; |
| |
| rv = ibv_cmd_dereg_mr(base_mr); |
| if (rv) |
| return rv; |
| |
| free(mr); |
| return 0; |
| } |
| |
| static struct ibv_cq *siw_create_cq(struct ibv_context *ctx, int num_cqe, |
| struct ibv_comp_channel *channel, |
| int comp_vector) |
| { |
| struct siw_cmd_create_cq cmd = {}; |
| struct siw_cmd_create_cq_resp resp = {}; |
| struct siw_cq *cq; |
| int cq_size, rv; |
| |
| cq = calloc(1, sizeof(*cq)); |
| if (!cq) |
| return NULL; |
| |
| rv = ibv_cmd_create_cq(ctx, num_cqe, channel, comp_vector, &cq->base_cq, |
| &cmd.ibv_cmd, sizeof(cmd), &resp.ibv_resp, |
| sizeof(resp)); |
| if (rv) { |
| verbs_err(verbs_get_ctx(ctx), |
| "libsiw: CQ creation failed: %d\n", rv); |
| free(cq); |
| return NULL; |
| } |
| if (resp.cq_key == SIW_INVAL_UOBJ_KEY) { |
| verbs_err(verbs_get_ctx(ctx), |
| "libsiw: prepare CQ mapping failed\n"); |
| goto fail; |
| } |
| pthread_spin_init(&cq->lock, PTHREAD_PROCESS_PRIVATE); |
| cq->id = resp.cq_id; |
| cq->num_cqe = resp.num_cqe; |
| |
| cq_size = resp.num_cqe * sizeof(struct siw_cqe) + |
| sizeof(struct siw_cq_ctrl); |
| |
| cq->queue = mmap(NULL, cq_size, PROT_READ | PROT_WRITE, |
| MAP_SHARED, ctx->cmd_fd, resp.cq_key); |
| |
| if (cq->queue == MAP_FAILED) { |
| verbs_err(verbs_get_ctx(ctx), "libsiw: CQ mapping failed: %d", |
| errno); |
| goto fail; |
| } |
| cq->ctrl = (struct siw_cq_ctrl *)&cq->queue[cq->num_cqe]; |
| cq->ctrl->flags = SIW_NOTIFY_NOT; |
| |
| return &cq->base_cq; |
| fail: |
| ibv_cmd_destroy_cq(&cq->base_cq); |
| free(cq); |
| |
| return NULL; |
| } |
| |
| static int siw_destroy_cq(struct ibv_cq *base_cq) |
| { |
| struct siw_cq *cq = cq_base2siw(base_cq); |
| int rv; |
| |
| assert(pthread_spin_trylock(&cq->lock)); |
| |
| if (cq->queue) |
| munmap(cq->queue, cq->num_cqe * sizeof(struct siw_cqe) + |
| sizeof(struct siw_cq_ctrl)); |
| |
| rv = ibv_cmd_destroy_cq(base_cq); |
| if (rv) { |
| pthread_spin_unlock(&cq->lock); |
| return rv; |
| } |
| pthread_spin_destroy(&cq->lock); |
| |
| free(cq); |
| |
| return 0; |
| } |
| |
| static struct ibv_srq *siw_create_srq(struct ibv_pd *pd, |
| struct ibv_srq_init_attr *attr) |
| { |
| struct siw_cmd_create_srq cmd = {}; |
| struct siw_cmd_create_srq_resp resp = {}; |
| struct ibv_context *ctx = pd->context; |
| struct siw_srq *srq; |
| int rv, rq_size; |
| |
| srq = calloc(1, sizeof(*srq)); |
| if (!srq) |
| return NULL; |
| |
| rv = ibv_cmd_create_srq(pd, &srq->base_srq, attr, &cmd.ibv_cmd, |
| sizeof(cmd), &resp.ibv_resp, sizeof(resp)); |
| if (rv) { |
| verbs_err(verbs_get_ctx(pd->context), |
| "libsiw: creating SRQ failed\n"); |
| free(srq); |
| return NULL; |
| } |
| if (resp.srq_key == SIW_INVAL_UOBJ_KEY) { |
| verbs_err(verbs_get_ctx(pd->context), |
| "libsiw: prepare SRQ mapping failed\n"); |
| goto fail; |
| } |
| pthread_spin_init(&srq->lock, PTHREAD_PROCESS_PRIVATE); |
| rq_size = resp.num_rqe * sizeof(struct siw_rqe); |
| srq->num_rqe = resp.num_rqe; |
| |
| srq->recvq = mmap(NULL, rq_size, PROT_READ | PROT_WRITE, |
| MAP_SHARED, ctx->cmd_fd, resp.srq_key); |
| |
| if (srq->recvq == MAP_FAILED) { |
| verbs_err(verbs_get_ctx(pd->context), |
| "libsiw: SRQ mapping failed: %d", errno); |
| goto fail; |
| } |
| return &srq->base_srq; |
| fail: |
| ibv_cmd_destroy_srq(&srq->base_srq); |
| free(srq); |
| |
| return NULL; |
| } |
| |
| static int siw_modify_srq(struct ibv_srq *base_srq, struct ibv_srq_attr *attr, |
| int attr_mask) |
| { |
| struct ibv_modify_srq cmd = {}; |
| struct siw_srq *srq = srq_base2siw(base_srq); |
| int rv; |
| |
| pthread_spin_lock(&srq->lock); |
| rv = ibv_cmd_modify_srq(base_srq, attr, attr_mask, &cmd, sizeof(cmd)); |
| pthread_spin_unlock(&srq->lock); |
| |
| return rv; |
| } |
| |
| static int siw_destroy_srq(struct ibv_srq *base_srq) |
| { |
| struct siw_srq *srq = srq_base2siw(base_srq); |
| int rv; |
| |
| assert(pthread_spin_trylock(&srq->lock)); |
| |
| rv = ibv_cmd_destroy_srq(base_srq); |
| if (rv) { |
| pthread_spin_unlock(&srq->lock); |
| return rv; |
| } |
| if (srq->recvq) |
| munmap(srq->recvq, srq->num_rqe * sizeof(struct siw_rqe)); |
| |
| pthread_spin_destroy(&srq->lock); |
| |
| free(srq); |
| |
| return 0; |
| } |
| |
| static struct ibv_qp *siw_create_qp(struct ibv_pd *pd, |
| struct ibv_qp_init_attr *attr) |
| { |
| struct siw_cmd_create_qp cmd = {}; |
| struct siw_cmd_create_qp_resp resp = {}; |
| struct siw_qp *qp; |
| struct ibv_context *base_ctx = pd->context; |
| int sq_size, rq_size, rv; |
| |
| memset(&cmd, 0, sizeof(cmd)); |
| memset(&resp, 0, sizeof(resp)); |
| |
| qp = calloc(1, sizeof(*qp)); |
| if (!qp) |
| return NULL; |
| |
| rv = ibv_cmd_create_qp(pd, &qp->base_qp, attr, &cmd.ibv_cmd, |
| sizeof(cmd), &resp.ibv_resp, sizeof(resp)); |
| |
| if (rv) { |
| verbs_err(verbs_get_ctx(pd->context), |
| "libsiw: QP creation failed\n"); |
| free(qp); |
| return NULL; |
| } |
| if (resp.sq_key == SIW_INVAL_UOBJ_KEY || |
| resp.rq_key == SIW_INVAL_UOBJ_KEY) { |
| verbs_err(verbs_get_ctx(pd->context), |
| "libsiw: prepare QP mapping failed\n"); |
| goto fail; |
| } |
| qp->id = resp.qp_id; |
| qp->num_sqe = resp.num_sqe; |
| qp->num_rqe = resp.num_rqe; |
| qp->sq_sig_all = attr->sq_sig_all; |
| |
| /* Init doorbell request structure */ |
| qp->db_req.hdr.command = IB_USER_VERBS_CMD_POST_SEND; |
| qp->db_req.hdr.in_words = sizeof(qp->db_req) / 4; |
| qp->db_req.hdr.out_words = sizeof(qp->db_resp) / 4; |
| qp->db_req.response = (uintptr_t)&qp->db_resp; |
| qp->db_req.wr_count = 0; |
| qp->db_req.sge_count = 0; |
| qp->db_req.wqe_size = sizeof(struct ibv_send_wr); |
| |
| pthread_spin_init(&qp->sq_lock, PTHREAD_PROCESS_PRIVATE); |
| pthread_spin_init(&qp->rq_lock, PTHREAD_PROCESS_PRIVATE); |
| |
| sq_size = resp.num_sqe * sizeof(struct siw_sqe); |
| |
| qp->sendq = mmap(NULL, sq_size, PROT_READ | PROT_WRITE, |
| MAP_SHARED, base_ctx->cmd_fd, resp.sq_key); |
| |
| if (qp->sendq == MAP_FAILED) { |
| verbs_err(verbs_get_ctx(pd->context), |
| "libsiw: SQ mapping failed: %d", errno); |
| |
| qp->sendq = NULL; |
| goto fail; |
| } |
| if (attr->srq) { |
| qp->srq = srq_base2siw(attr->srq); |
| } else { |
| rq_size = resp.num_rqe * sizeof(struct siw_rqe); |
| |
| qp->recvq = mmap(NULL, rq_size, PROT_READ | PROT_WRITE, |
| MAP_SHARED, base_ctx->cmd_fd, resp.rq_key); |
| |
| if (qp->recvq == MAP_FAILED) { |
| verbs_err(verbs_get_ctx(pd->context), |
| "libsiw: RQ mapping failed: %d\n", |
| resp.num_rqe); |
| qp->recvq = NULL; |
| goto fail; |
| } |
| } |
| qp->db_req.qp_handle = qp->base_qp.handle; |
| |
| return &qp->base_qp; |
| fail: |
| ibv_cmd_destroy_qp(&qp->base_qp); |
| |
| if (qp->sendq) |
| munmap(qp->sendq, qp->num_sqe * sizeof(struct siw_sqe)); |
| if (qp->recvq) |
| munmap(qp->recvq, qp->num_rqe * sizeof(struct siw_rqe)); |
| |
| free(qp); |
| |
| return NULL; |
| } |
| |
| static int siw_modify_qp(struct ibv_qp *base_qp, struct ibv_qp_attr *attr, |
| int attr_mask) |
| { |
| struct ibv_modify_qp cmd; |
| struct siw_qp *qp = qp_base2siw(base_qp); |
| int rv; |
| |
| memset(&cmd, 0, sizeof(cmd)); |
| |
| pthread_spin_lock(&qp->sq_lock); |
| pthread_spin_lock(&qp->rq_lock); |
| |
| rv = ibv_cmd_modify_qp(base_qp, attr, attr_mask, &cmd, sizeof(cmd)); |
| |
| pthread_spin_unlock(&qp->rq_lock); |
| pthread_spin_unlock(&qp->sq_lock); |
| |
| return rv; |
| } |
| |
| static int siw_destroy_qp(struct ibv_qp *base_qp) |
| { |
| struct siw_qp *qp = qp_base2siw(base_qp); |
| int rv; |
| |
| assert(pthread_spin_trylock(&qp->sq_lock)); |
| assert(pthread_spin_trylock(&qp->rq_lock)); |
| |
| if (qp->sendq) |
| munmap(qp->sendq, qp->num_sqe * sizeof(struct siw_sqe)); |
| if (qp->recvq) |
| munmap(qp->recvq, qp->num_rqe * sizeof(struct siw_rqe)); |
| |
| rv = ibv_cmd_destroy_qp(base_qp); |
| if (rv) { |
| pthread_spin_unlock(&qp->rq_lock); |
| pthread_spin_unlock(&qp->sq_lock); |
| return rv; |
| } |
| pthread_spin_destroy(&qp->rq_lock); |
| pthread_spin_destroy(&qp->sq_lock); |
| |
| free(qp); |
| |
| return 0; |
| } |
| |
| static void siw_async_event(struct ibv_context *ctx, |
| struct ibv_async_event *event) |
| { |
| struct ibv_qp *base_qp = event->element.qp; |
| struct ibv_cq *base_cq = event->element.cq; |
| |
| switch (event->event_type) { |
| case IBV_EVENT_CQ_ERR: |
| verbs_err(verbs_get_ctx(ctx), "libsiw: CQ[%d] event: error\n", |
| cq_base2siw(base_cq)->id); |
| break; |
| |
| case IBV_EVENT_QP_FATAL: |
| verbs_err(verbs_get_ctx(ctx), |
| "libsiw: QP[%d] event: fatal error\n", |
| qp_base2siw(base_qp)->id); |
| break; |
| |
| case IBV_EVENT_QP_REQ_ERR: |
| verbs_err(verbs_get_ctx(ctx), |
| "libsiw: QP[%d] event: request error\n", |
| qp_base2siw(base_qp)->id); |
| break; |
| |
| case IBV_EVENT_QP_ACCESS_ERR: |
| verbs_err(verbs_get_ctx(ctx), |
| "libsiw: QP[%d] event: access error\n", |
| qp_base2siw(base_qp)->id); |
| break; |
| |
| case IBV_EVENT_SQ_DRAINED: |
| case IBV_EVENT_COMM_EST: |
| case IBV_EVENT_QP_LAST_WQE_REACHED: |
| break; |
| |
| default: |
| break; |
| } |
| } |
| |
| static int siw_notify_cq(struct ibv_cq *ibcq, int solicited) |
| { |
| struct siw_cq *cq = cq_base2siw(ibcq); |
| int rv = 0; |
| |
| if (solicited) |
| atomic_store((_Atomic(uint32_t) *)&cq->ctrl->flags, |
| SIW_NOTIFY_SOLICITED); |
| else |
| atomic_store((_Atomic(uint32_t) *)&cq->ctrl->flags, |
| SIW_NOTIFY_SOLICITED | SIW_NOTIFY_NEXT_COMPLETION); |
| return rv; |
| } |
| |
| static const struct { |
| enum ibv_wr_opcode base; |
| enum siw_opcode siw; |
| } map_send_opcode[IBV_WR_DRIVER1 + 1] = { |
| { IBV_WR_RDMA_WRITE, SIW_OP_WRITE}, |
| { IBV_WR_RDMA_WRITE_WITH_IMM, SIW_NUM_OPCODES + 1 }, |
| { IBV_WR_SEND, SIW_OP_SEND }, |
| { IBV_WR_SEND_WITH_IMM, SIW_NUM_OPCODES + 1 }, |
| { IBV_WR_RDMA_READ, SIW_OP_READ }, |
| { IBV_WR_ATOMIC_CMP_AND_SWP, SIW_NUM_OPCODES + 1 }, |
| { IBV_WR_ATOMIC_FETCH_AND_ADD, SIW_NUM_OPCODES + 1 }, |
| { IBV_WR_LOCAL_INV, SIW_NUM_OPCODES + 1 }, |
| { IBV_WR_BIND_MW, SIW_NUM_OPCODES + 1 }, |
| { IBV_WR_SEND_WITH_INV, SIW_OP_SEND_REMOTE_INV }, |
| { IBV_WR_TSO, SIW_NUM_OPCODES + 1 }, |
| { IBV_WR_DRIVER1, SIW_NUM_OPCODES + 1 } |
| }; |
| |
| static inline uint16_t map_send_flags(int ibv_flags) |
| { |
| uint16_t flags = SIW_WQE_VALID; |
| |
| if (ibv_flags & IBV_SEND_SIGNALED) |
| flags |= SIW_WQE_SIGNALLED; |
| if (ibv_flags & IBV_SEND_SOLICITED) |
| flags |= SIW_WQE_SOLICITED; |
| if (ibv_flags & IBV_SEND_INLINE) |
| flags |= SIW_WQE_INLINE; |
| if (ibv_flags & IBV_SEND_FENCE) |
| flags |= SIW_WQE_READ_FENCE; |
| |
| return flags; |
| } |
| |
| static inline int push_send_wqe(struct ibv_qp *base_qp, |
| struct ibv_send_wr *base_wr, |
| struct siw_sqe *siw_sqe, int sig_all) |
| { |
| uint32_t flags = map_send_flags(base_wr->send_flags); |
| atomic_ushort *fp = (atomic_ushort *)&siw_sqe->flags; |
| |
| siw_sqe->id = base_wr->wr_id; |
| siw_sqe->num_sge = base_wr->num_sge; |
| siw_sqe->raddr = base_wr->wr.rdma.remote_addr; |
| siw_sqe->rkey = base_wr->wr.rdma.rkey; |
| |
| siw_sqe->opcode = map_send_opcode[base_wr->opcode].siw; |
| if (siw_sqe->opcode > SIW_NUM_OPCODES) { |
| verbs_err(verbs_get_ctx(base_qp->context), |
| "libsiw: opcode %d unsupported\n", base_wr->opcode); |
| return -EINVAL; |
| } |
| if (sig_all) |
| flags |= SIW_WQE_SIGNALLED; |
| |
| if (flags & SIW_WQE_INLINE) { |
| char *data = (char *)&siw_sqe->sge[1]; |
| int bytes = 0, i = 0; |
| |
| /* Allow more than SIW_MAX_SGE, since content copied here */ |
| while (i < base_wr->num_sge) { |
| bytes += base_wr->sg_list[i].length; |
| if (bytes > (int)SIW_MAX_INLINE) { |
| verbs_err(verbs_get_ctx(base_qp->context), |
| "libsiw: inline data: %d:%d\n", bytes, |
| (int)SIW_MAX_INLINE); |
| return -EINVAL; |
| } |
| memcpy(data, |
| (void *)(uintptr_t)base_wr->sg_list[i].addr, |
| base_wr->sg_list[i].length); |
| data += base_wr->sg_list[i++].length; |
| } |
| siw_sqe->sge[0].length = bytes; |
| |
| } else { |
| if (siw_sqe->num_sge > SIW_MAX_SGE) |
| return -EINVAL; |
| |
| /* this assumes same layout of siw and base SGE */ |
| memcpy(siw_sqe->sge, base_wr->sg_list, |
| siw_sqe->num_sge * sizeof(struct ibv_sge)); |
| } |
| atomic_store(fp, flags); |
| |
| return 0; |
| } |
| |
| static int siw_post_send(struct ibv_qp *base_qp, struct ibv_send_wr *wr, |
| struct ibv_send_wr **bad_wr) |
| { |
| struct siw_qp *qp = qp_base2siw(base_qp); |
| uint32_t sq_put; |
| atomic_ushort *fp; |
| int new_sqe = 0, rv = 0; |
| |
| *bad_wr = NULL; |
| |
| pthread_spin_lock(&qp->sq_lock); |
| |
| sq_put = qp->sq_put; |
| |
| /* |
| * Push all current work requests into mmapped SQ |
| */ |
| while (wr) { |
| uint32_t idx = sq_put % qp->num_sqe; |
| struct siw_sqe *sqe = &qp->sendq[idx]; |
| uint16_t sqe_flags; |
| |
| fp = (atomic_ushort *)&sqe->flags; |
| sqe_flags = atomic_load(fp); |
| |
| if (!(sqe_flags & SIW_WQE_VALID)) { |
| rv = push_send_wqe(base_qp, wr, sqe, qp->sq_sig_all); |
| if (rv) { |
| *bad_wr = wr; |
| break; |
| } |
| new_sqe++; |
| } else { |
| verbs_err(verbs_get_ctx(base_qp->context), |
| "libsiw: QP[%d]: SQ overflow, idx %d\n", |
| qp->id, idx); |
| rv = -ENOMEM; |
| *bad_wr = wr; |
| break; |
| } |
| sq_put++; |
| wr = wr->next; |
| } |
| if (new_sqe) { |
| /* |
| * If last WQE pushed before position where current post_send |
| * started is idle, we assume SQ is not being actively |
| * processed. Only then, the doorbell call will be issued. |
| * This may significantly reduce unnecessary doorbell calls |
| * on a busy SQ. We also always ring the doorbell, if the |
| * complete SQ was re-written during current post_send. |
| */ |
| if (new_sqe < qp->num_sqe) { |
| uint32_t old_idx = (qp->sq_put - 1) % qp->num_sqe; |
| struct siw_sqe *old_sqe = &qp->sendq[old_idx]; |
| |
| fp = (atomic_ushort *)&old_sqe->flags; |
| if (!(atomic_load(fp) & SIW_WQE_VALID)) |
| rv = siw_db(qp); |
| } else { |
| rv = siw_db(qp); |
| } |
| if (rv) |
| *bad_wr = wr; |
| |
| qp->sq_put = sq_put; |
| } |
| pthread_spin_unlock(&qp->sq_lock); |
| |
| return rv; |
| } |
| |
| static inline int push_recv_wqe(struct ibv_recv_wr *base_wr, |
| struct siw_rqe *siw_rqe) |
| { |
| atomic_ushort *fp = (atomic_ushort *)&siw_rqe->flags; |
| |
| siw_rqe->id = base_wr->wr_id; |
| siw_rqe->num_sge = base_wr->num_sge; |
| |
| if (base_wr->num_sge == 1) { |
| siw_rqe->sge[0].laddr = base_wr->sg_list[0].addr; |
| siw_rqe->sge[0].length = base_wr->sg_list[0].length; |
| siw_rqe->sge[0].lkey = base_wr->sg_list[0].lkey; |
| } else if (base_wr->num_sge && base_wr->num_sge <= SIW_MAX_SGE) |
| /* this assumes same layout of siw and base SGE */ |
| memcpy(siw_rqe->sge, base_wr->sg_list, |
| sizeof(struct ibv_sge) * base_wr->num_sge); |
| else |
| return -EINVAL; |
| |
| atomic_store(fp, SIW_WQE_VALID); |
| |
| return 0; |
| } |
| |
| static int siw_post_recv(struct ibv_qp *base_qp, struct ibv_recv_wr *wr, |
| struct ibv_recv_wr **bad_wr) |
| { |
| struct siw_qp *qp = qp_base2siw(base_qp); |
| uint32_t rq_put; |
| int rv = 0; |
| |
| pthread_spin_lock(&qp->rq_lock); |
| |
| rq_put = qp->rq_put; |
| |
| while (wr) { |
| int idx = rq_put % qp->num_rqe; |
| struct siw_rqe *rqe = &qp->recvq[idx]; |
| atomic_ushort *fp = (atomic_ushort *)&rqe->flags; |
| uint16_t rqe_flags = atomic_load(fp); |
| |
| if (!(rqe_flags & SIW_WQE_VALID)) { |
| if (push_recv_wqe(wr, rqe)) { |
| *bad_wr = wr; |
| rv = -EINVAL; |
| break; |
| } |
| } else { |
| verbs_err(verbs_get_ctx(base_qp->context), |
| "libsiw: QP[%d]: RQ overflow, idx %d\n", |
| qp->id, idx); |
| rv = -ENOMEM; |
| *bad_wr = wr; |
| break; |
| } |
| rq_put++; |
| wr = wr->next; |
| } |
| qp->rq_put = rq_put; |
| |
| pthread_spin_unlock(&qp->rq_lock); |
| |
| return rv; |
| } |
| |
| static int siw_post_srq_recv(struct ibv_srq *base_srq, struct ibv_recv_wr *wr, |
| struct ibv_recv_wr **bad_wr) |
| { |
| struct siw_srq *srq = srq_base2siw(base_srq); |
| uint32_t srq_put; |
| int rv = 0; |
| |
| pthread_spin_lock(&srq->lock); |
| |
| srq_put = srq->rq_put; |
| |
| while (wr) { |
| int idx = srq_put % srq->num_rqe; |
| struct siw_rqe *rqe = &srq->recvq[idx]; |
| atomic_ushort *fp = (atomic_ushort *)&rqe->flags; |
| uint16_t rqe_flags = atomic_load(fp); |
| |
| if (!(rqe_flags & SIW_WQE_VALID)) { |
| if (push_recv_wqe(wr, rqe)) { |
| *bad_wr = wr; |
| rv = -EINVAL; |
| break; |
| } |
| } else { |
| verbs_err(verbs_get_ctx(base_srq->context), |
| "libsiw: SRQ[%p]: SRQ overflow\n", srq); |
| rv = -ENOMEM; |
| *bad_wr = wr; |
| break; |
| } |
| srq_put++; |
| wr = wr->next; |
| } |
| srq->rq_put = srq_put; |
| |
| pthread_spin_unlock(&srq->lock); |
| |
| return rv; |
| } |
| |
| static const struct { |
| enum siw_opcode siw; |
| enum ibv_wc_opcode base; |
| } map_cqe_opcode[SIW_NUM_OPCODES] = { |
| { SIW_OP_WRITE, IBV_WC_RDMA_WRITE }, |
| { SIW_OP_READ, IBV_WC_RDMA_READ }, |
| { SIW_OP_READ_LOCAL_INV, IBV_WC_RDMA_READ }, |
| { SIW_OP_SEND, IBV_WC_SEND }, |
| { SIW_OP_SEND_WITH_IMM, IBV_WC_SEND }, |
| { SIW_OP_SEND_REMOTE_INV, IBV_WC_SEND }, |
| { SIW_OP_FETCH_AND_ADD, IBV_WC_FETCH_ADD }, |
| { SIW_OP_COMP_AND_SWAP, IBV_WC_COMP_SWAP }, |
| { SIW_OP_RECEIVE, IBV_WC_RECV } |
| }; |
| |
| static const struct { |
| enum siw_wc_status siw; |
| enum ibv_wc_status base; |
| } map_cqe_status[SIW_NUM_WC_STATUS] = { |
| { SIW_WC_SUCCESS, IBV_WC_SUCCESS }, |
| { SIW_WC_LOC_LEN_ERR, IBV_WC_LOC_LEN_ERR }, |
| { SIW_WC_LOC_PROT_ERR, IBV_WC_LOC_PROT_ERR }, |
| { SIW_WC_LOC_QP_OP_ERR, IBV_WC_LOC_QP_OP_ERR }, |
| { SIW_WC_WR_FLUSH_ERR, IBV_WC_WR_FLUSH_ERR }, |
| { SIW_WC_BAD_RESP_ERR, IBV_WC_BAD_RESP_ERR }, |
| { SIW_WC_LOC_ACCESS_ERR, IBV_WC_LOC_ACCESS_ERR }, |
| { SIW_WC_REM_ACCESS_ERR, IBV_WC_REM_ACCESS_ERR }, |
| { SIW_WC_REM_INV_REQ_ERR, IBV_WC_REM_INV_REQ_ERR }, |
| { SIW_WC_GENERAL_ERR, IBV_WC_GENERAL_ERR } |
| }; |
| |
| static inline void copy_cqe(struct siw_cqe *cqe, struct ibv_wc *wc) |
| { |
| wc->wr_id = cqe->id; |
| wc->byte_len = cqe->bytes; |
| |
| /* No immediate data supported yet */ |
| wc->wc_flags = 0; |
| wc->imm_data = 0; |
| |
| wc->vendor_err = 0; |
| wc->opcode = map_cqe_opcode[cqe->opcode].base; |
| wc->status = map_cqe_status[cqe->status].base; |
| wc->qp_num = (uint32_t)cqe->qp_id; |
| } |
| |
| static int siw_poll_cq(struct ibv_cq *ibcq, int num_entries, struct ibv_wc *wc) |
| { |
| struct siw_cq *cq = cq_base2siw(ibcq); |
| int new = 0; |
| |
| pthread_spin_lock(&cq->lock); |
| |
| for (; num_entries--; wc++) { |
| struct siw_cqe *cqe = &cq->queue[cq->cq_get % cq->num_cqe]; |
| atomic_uchar *fp = (atomic_uchar *)&cqe->flags; |
| |
| if (atomic_load(fp) & SIW_WQE_VALID) { |
| copy_cqe(cqe, wc); |
| atomic_store(fp, 0); |
| cq->cq_get++; |
| new++; |
| } else |
| break; |
| } |
| pthread_spin_unlock(&cq->lock); |
| |
| return new; |
| } |
| |
| static const struct verbs_context_ops siw_context_ops = { |
| .alloc_pd = siw_alloc_pd, |
| .async_event = siw_async_event, |
| .create_cq = siw_create_cq, |
| .create_qp = siw_create_qp, |
| .create_srq = siw_create_srq, |
| .dealloc_pd = siw_free_pd, |
| .dereg_mr = siw_dereg_mr, |
| .destroy_cq = siw_destroy_cq, |
| .destroy_qp = siw_destroy_qp, |
| .destroy_srq = siw_destroy_srq, |
| .free_context = siw_free_context, |
| .modify_qp = siw_modify_qp, |
| .modify_srq = siw_modify_srq, |
| .poll_cq = siw_poll_cq, |
| .post_recv = siw_post_recv, |
| .post_send = siw_post_send, |
| .post_srq_recv = siw_post_srq_recv, |
| .query_device_ex = siw_query_device, |
| .query_port = siw_query_port, |
| .query_qp = siw_query_qp, |
| .reg_mr = siw_reg_mr, |
| .req_notify_cq = siw_notify_cq, |
| }; |
| |
| static struct verbs_context *siw_alloc_context(struct ibv_device *base_dev, |
| int fd, void *pdata) |
| { |
| struct siw_context *ctx; |
| struct ibv_get_context cmd = {}; |
| struct siw_cmd_alloc_context_resp resp = {}; |
| |
| ctx = verbs_init_and_alloc_context(base_dev, fd, ctx, base_ctx, |
| RDMA_DRIVER_SIW); |
| if (!ctx) |
| return NULL; |
| |
| if (ibv_cmd_get_context(&ctx->base_ctx, &cmd, sizeof(cmd), |
| NULL, &resp.ibv_resp, sizeof(resp))) { |
| verbs_uninit_context(&ctx->base_ctx); |
| free(ctx); |
| |
| return NULL; |
| } |
| verbs_set_ops(&ctx->base_ctx, &siw_context_ops); |
| ctx->dev_id = resp.dev_id; |
| |
| return &ctx->base_ctx; |
| } |
| |
| static void siw_free_context(struct ibv_context *ibv_ctx) |
| { |
| struct siw_context *ctx = ctx_ibv2siw(ibv_ctx); |
| |
| verbs_uninit_context(&ctx->base_ctx); |
| free(ctx); |
| } |
| |
| static struct verbs_device *siw_device_alloc(struct verbs_sysfs_dev *unused) |
| { |
| struct siw_device *dev; |
| |
| dev = calloc(1, sizeof(*dev)); |
| if (!dev) |
| return NULL; |
| |
| return &dev->base_dev; |
| } |
| |
| static void siw_device_free(struct verbs_device *vdev) |
| { |
| struct siw_device *dev = |
| container_of(vdev, struct siw_device, base_dev); |
| free(dev); |
| } |
| |
| static const struct verbs_match_ent rnic_table[] = { |
| VERBS_DRIVER_ID(RDMA_DRIVER_SIW), |
| {}, |
| }; |
| |
| static const struct verbs_device_ops siw_dev_ops = { |
| .name = "siw", |
| .match_min_abi_version = SIW_ABI_VERSION, |
| .match_max_abi_version = SIW_ABI_VERSION, |
| .match_table = rnic_table, |
| .alloc_device = siw_device_alloc, |
| .uninit_device = siw_device_free, |
| .alloc_context = siw_alloc_context, |
| }; |
| |
| PROVIDER_DRIVER(siw, siw_dev_ops); |