|  | /* | 
|  | * Copyright (c) 2005 Topspin Communications.  All rights reserved. | 
|  | * Copyright (c) 2005 Mellanox Technologies Ltd.  All rights reserved. | 
|  | * Copyright (c) 2006, 2007 Cisco Systems.  All rights reserved. | 
|  | * | 
|  | * This software is available to you under a choice of one of two | 
|  | * licenses.  You may choose to be licensed under the terms of the GNU | 
|  | * General Public License (GPL) Version 2, available from the file | 
|  | * COPYING in the main directory of this source tree, or the | 
|  | * OpenIB.org BSD license below: | 
|  | * | 
|  | *     Redistribution and use in source and binary forms, with or | 
|  | *     without modification, are permitted provided that the following | 
|  | *     conditions are met: | 
|  | * | 
|  | *      - Redistributions of source code must retain the above | 
|  | *        copyright notice, this list of conditions and the following | 
|  | *        disclaimer. | 
|  | * | 
|  | *      - Redistributions in binary form must reproduce the above | 
|  | *        copyright notice, this list of conditions and the following | 
|  | *        disclaimer in the documentation and/or other materials | 
|  | *        provided with the distribution. | 
|  | * | 
|  | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | 
|  | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | 
|  | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | 
|  | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS | 
|  | * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN | 
|  | * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN | 
|  | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | 
|  | * SOFTWARE. | 
|  | */ | 
|  |  | 
|  | #include <config.h> | 
|  |  | 
|  | #include <stdio.h> | 
|  | #include <stdlib.h> | 
|  | #include <pthread.h> | 
|  | #include <string.h> | 
|  |  | 
|  | #include <util/compiler.h> | 
|  | #include <util/mmio.h> | 
|  | #include <infiniband/opcode.h> | 
|  |  | 
|  | #include "mlx4.h" | 
|  |  | 
|  | enum { | 
|  | CQ_OK					=  0, | 
|  | CQ_EMPTY				= -1, | 
|  | CQ_POLL_ERR				= -2 | 
|  | }; | 
|  |  | 
|  | static struct mlx4_cqe *get_cqe(struct mlx4_cq *cq, int entry) | 
|  | { | 
|  | return cq->buf.buf + entry * cq->cqe_size; | 
|  | } | 
|  |  | 
|  | static void *get_sw_cqe(struct mlx4_cq *cq, int n) | 
|  | { | 
|  | struct mlx4_cqe *cqe = get_cqe(cq, n & cq->verbs_cq.cq.cqe); | 
|  | struct mlx4_cqe *tcqe = cq->cqe_size == 64 ? cqe + 1 : cqe; | 
|  |  | 
|  | return (!!(tcqe->owner_sr_opcode & MLX4_CQE_OWNER_MASK) ^ | 
|  | !!(n & (cq->verbs_cq.cq.cqe + 1))) ? NULL : cqe; | 
|  | } | 
|  |  | 
|  | static struct mlx4_cqe *next_cqe_sw(struct mlx4_cq *cq) | 
|  | { | 
|  | return get_sw_cqe(cq, cq->cons_index); | 
|  | } | 
|  |  | 
|  | static enum ibv_wc_status mlx4_handle_error_cqe(struct mlx4_err_cqe *cqe) | 
|  | { | 
|  | if (cqe->syndrome == MLX4_CQE_SYNDROME_LOCAL_QP_OP_ERR) | 
|  | printf(PFX "local QP operation err " | 
|  | "(QPN %06x, WQE index %x, vendor syndrome %02x, " | 
|  | "opcode = %02x)\n", | 
|  | htobe32(cqe->vlan_my_qpn), htobe32(cqe->wqe_index), | 
|  | cqe->vendor_err, | 
|  | cqe->owner_sr_opcode & ~MLX4_CQE_OWNER_MASK); | 
|  |  | 
|  | switch (cqe->syndrome) { | 
|  | case MLX4_CQE_SYNDROME_LOCAL_LENGTH_ERR: | 
|  | return IBV_WC_LOC_LEN_ERR; | 
|  | case MLX4_CQE_SYNDROME_LOCAL_QP_OP_ERR: | 
|  | return IBV_WC_LOC_QP_OP_ERR; | 
|  | case MLX4_CQE_SYNDROME_LOCAL_PROT_ERR: | 
|  | return IBV_WC_LOC_PROT_ERR; | 
|  | case MLX4_CQE_SYNDROME_WR_FLUSH_ERR: | 
|  | return IBV_WC_WR_FLUSH_ERR; | 
|  | case MLX4_CQE_SYNDROME_MW_BIND_ERR: | 
|  | return IBV_WC_MW_BIND_ERR; | 
|  | case MLX4_CQE_SYNDROME_BAD_RESP_ERR: | 
|  | return IBV_WC_BAD_RESP_ERR; | 
|  | case MLX4_CQE_SYNDROME_LOCAL_ACCESS_ERR: | 
|  | return IBV_WC_LOC_ACCESS_ERR; | 
|  | case MLX4_CQE_SYNDROME_REMOTE_INVAL_REQ_ERR: | 
|  | return IBV_WC_REM_INV_REQ_ERR; | 
|  | case MLX4_CQE_SYNDROME_REMOTE_ACCESS_ERR: | 
|  | return IBV_WC_REM_ACCESS_ERR; | 
|  | case MLX4_CQE_SYNDROME_REMOTE_OP_ERR: | 
|  | return IBV_WC_REM_OP_ERR; | 
|  | case MLX4_CQE_SYNDROME_TRANSPORT_RETRY_EXC_ERR: | 
|  | return IBV_WC_RETRY_EXC_ERR; | 
|  | case MLX4_CQE_SYNDROME_RNR_RETRY_EXC_ERR: | 
|  | return IBV_WC_RNR_RETRY_EXC_ERR; | 
|  | case MLX4_CQE_SYNDROME_REMOTE_ABORTED_ERR: | 
|  | return IBV_WC_REM_ABORT_ERR; | 
|  | default: | 
|  | return IBV_WC_GENERAL_ERR; | 
|  | } | 
|  | } | 
|  |  | 
|  | static inline void handle_good_req(struct ibv_wc *wc, struct mlx4_cqe *cqe) | 
|  | { | 
|  | wc->wc_flags = 0; | 
|  | switch (mlx4dv_get_cqe_opcode(cqe)) { | 
|  | case MLX4_OPCODE_RDMA_WRITE_IMM: | 
|  | wc->wc_flags |= IBV_WC_WITH_IMM; | 
|  | SWITCH_FALLTHROUGH; | 
|  | case MLX4_OPCODE_RDMA_WRITE: | 
|  | wc->opcode    = IBV_WC_RDMA_WRITE; | 
|  | break; | 
|  | case MLX4_OPCODE_SEND_IMM: | 
|  | wc->wc_flags |= IBV_WC_WITH_IMM; | 
|  | SWITCH_FALLTHROUGH; | 
|  | case MLX4_OPCODE_SEND: | 
|  | case MLX4_OPCODE_SEND_INVAL: | 
|  | wc->opcode    = IBV_WC_SEND; | 
|  | break; | 
|  | case MLX4_OPCODE_RDMA_READ: | 
|  | wc->opcode    = IBV_WC_RDMA_READ; | 
|  | wc->byte_len  = be32toh(cqe->byte_cnt); | 
|  | break; | 
|  | case MLX4_OPCODE_ATOMIC_CS: | 
|  | wc->opcode    = IBV_WC_COMP_SWAP; | 
|  | wc->byte_len  = 8; | 
|  | break; | 
|  | case MLX4_OPCODE_ATOMIC_FA: | 
|  | wc->opcode    = IBV_WC_FETCH_ADD; | 
|  | wc->byte_len  = 8; | 
|  | break; | 
|  | case MLX4_OPCODE_LOCAL_INVAL: | 
|  | wc->opcode    = IBV_WC_LOCAL_INV; | 
|  | break; | 
|  | case MLX4_OPCODE_BIND_MW: | 
|  | wc->opcode    = IBV_WC_BIND_MW; | 
|  | break; | 
|  | default: | 
|  | /* assume it's a send completion */ | 
|  | wc->opcode    = IBV_WC_SEND; | 
|  | break; | 
|  | } | 
|  | } | 
|  |  | 
|  | static inline int mlx4_get_next_cqe(struct mlx4_cq *cq, | 
|  | struct mlx4_cqe **pcqe) | 
|  | ALWAYS_INLINE; | 
|  | static inline int mlx4_get_next_cqe(struct mlx4_cq *cq, | 
|  | struct mlx4_cqe **pcqe) | 
|  | { | 
|  | struct mlx4_cqe *cqe; | 
|  |  | 
|  | cqe = next_cqe_sw(cq); | 
|  | if (!cqe) | 
|  | return CQ_EMPTY; | 
|  |  | 
|  | if (cq->cqe_size == 64) | 
|  | ++cqe; | 
|  |  | 
|  | ++cq->cons_index; | 
|  |  | 
|  | VALGRIND_MAKE_MEM_DEFINED(cqe, sizeof *cqe); | 
|  |  | 
|  | /* | 
|  | * Make sure we read CQ entry contents after we've checked the | 
|  | * ownership bit. | 
|  | */ | 
|  | udma_from_device_barrier(); | 
|  |  | 
|  | *pcqe = cqe; | 
|  |  | 
|  | return CQ_OK; | 
|  | } | 
|  |  | 
|  | static inline int mlx4_parse_cqe(struct mlx4_cq *cq, | 
|  | struct mlx4_cqe *cqe, | 
|  | struct mlx4_qp **cur_qp, | 
|  | struct ibv_wc *wc, int lazy) | 
|  | ALWAYS_INLINE; | 
|  | static inline int mlx4_parse_cqe(struct mlx4_cq *cq, | 
|  | struct mlx4_cqe *cqe, | 
|  | struct mlx4_qp **cur_qp, | 
|  | struct ibv_wc *wc, int lazy) | 
|  | { | 
|  | struct mlx4_wq *wq; | 
|  | struct mlx4_srq *srq; | 
|  | uint32_t qpn; | 
|  | uint32_t g_mlpath_rqpn; | 
|  | uint64_t *pwr_id; | 
|  | uint16_t wqe_index; | 
|  | struct mlx4_err_cqe *ecqe; | 
|  | struct mlx4_context *mctx; | 
|  | int is_error; | 
|  | int is_send; | 
|  | enum ibv_wc_status *pstatus; | 
|  |  | 
|  | mctx = to_mctx(cq->verbs_cq.cq.context); | 
|  | qpn = be32toh(cqe->vlan_my_qpn) & MLX4_CQE_QPN_MASK; | 
|  | if (lazy) { | 
|  | cq->cqe = cqe; | 
|  | cq->flags &= (~MLX4_CQ_FLAGS_RX_CSUM_VALID); | 
|  | } else | 
|  | wc->qp_num = qpn; | 
|  |  | 
|  | is_send  = cqe->owner_sr_opcode & MLX4_CQE_IS_SEND_MASK; | 
|  | is_error = (mlx4dv_get_cqe_opcode(cqe)) == | 
|  | MLX4_CQE_OPCODE_ERROR; | 
|  |  | 
|  | if ((qpn & MLX4_XRC_QPN_BIT) && !is_send) { | 
|  | /* | 
|  | * We do not have to take the XSRQ table lock here, | 
|  | * because CQs will be locked while SRQs are removed | 
|  | * from the table. | 
|  | */ | 
|  | srq = mlx4_find_xsrq(&mctx->xsrq_table, | 
|  | be32toh(cqe->g_mlpath_rqpn) & MLX4_CQE_QPN_MASK); | 
|  | if (!srq) | 
|  | return CQ_POLL_ERR; | 
|  | } else { | 
|  | if (!*cur_qp || (qpn != (*cur_qp)->qpn_cache)) { | 
|  | /* | 
|  | * We do not have to take the QP table lock here, | 
|  | * because CQs will be locked while QPs are removed | 
|  | * from the table. | 
|  | */ | 
|  | *cur_qp = mlx4_find_qp(mctx, qpn); | 
|  | if (!*cur_qp) | 
|  | return CQ_POLL_ERR; | 
|  | } | 
|  | srq = ((*cur_qp)->type == MLX4_RSC_TYPE_SRQ) ? | 
|  | to_msrq((*cur_qp)->verbs_qp.qp.srq) : NULL; | 
|  | } | 
|  |  | 
|  | pwr_id = lazy ? &cq->verbs_cq.cq_ex.wr_id : &wc->wr_id; | 
|  | if (is_send) { | 
|  | wq = &(*cur_qp)->sq; | 
|  | wqe_index = be16toh(cqe->wqe_index); | 
|  | wq->tail += (uint16_t) (wqe_index - (uint16_t) wq->tail); | 
|  | *pwr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)]; | 
|  | ++wq->tail; | 
|  | } else if (srq) { | 
|  | wqe_index = be16toh(cqe->wqe_index); | 
|  | *pwr_id = srq->wrid[wqe_index]; | 
|  | mlx4_free_srq_wqe(srq, wqe_index); | 
|  | } else { | 
|  | wq = &(*cur_qp)->rq; | 
|  | *pwr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)]; | 
|  | ++wq->tail; | 
|  | } | 
|  |  | 
|  | pstatus = lazy ? &cq->verbs_cq.cq_ex.status : &wc->status; | 
|  | if (is_error) { | 
|  | ecqe = (struct mlx4_err_cqe *)cqe; | 
|  | *pstatus = mlx4_handle_error_cqe(ecqe); | 
|  | if (!lazy) | 
|  | wc->vendor_err = ecqe->vendor_err; | 
|  | return CQ_OK; | 
|  | } | 
|  |  | 
|  | *pstatus = IBV_WC_SUCCESS; | 
|  | if (lazy) { | 
|  | if (!is_send) | 
|  | if ((*cur_qp) && ((*cur_qp)->qp_cap_cache & MLX4_RX_CSUM_VALID)) | 
|  | cq->flags |= MLX4_CQ_FLAGS_RX_CSUM_VALID; | 
|  | } else if (is_send) { | 
|  | handle_good_req(wc, cqe); | 
|  | } else { | 
|  | wc->byte_len = be32toh(cqe->byte_cnt); | 
|  |  | 
|  | switch (mlx4dv_get_cqe_opcode(cqe)) { | 
|  | case MLX4_RECV_OPCODE_RDMA_WRITE_IMM: | 
|  | wc->opcode   = IBV_WC_RECV_RDMA_WITH_IMM; | 
|  | wc->wc_flags = IBV_WC_WITH_IMM; | 
|  | wc->imm_data = cqe->immed_rss_invalid; | 
|  | break; | 
|  | case MLX4_RECV_OPCODE_SEND_INVAL: | 
|  | wc->opcode   = IBV_WC_RECV; | 
|  | wc->wc_flags |= IBV_WC_WITH_INV; | 
|  | wc->invalidated_rkey = be32toh(cqe->immed_rss_invalid); | 
|  | break; | 
|  | case MLX4_RECV_OPCODE_SEND: | 
|  | wc->opcode   = IBV_WC_RECV; | 
|  | wc->wc_flags = 0; | 
|  | break; | 
|  | case MLX4_RECV_OPCODE_SEND_IMM: | 
|  | wc->opcode   = IBV_WC_RECV; | 
|  | wc->wc_flags = IBV_WC_WITH_IMM; | 
|  | wc->imm_data = cqe->immed_rss_invalid; | 
|  | break; | 
|  | } | 
|  |  | 
|  | wc->slid	   = be16toh(cqe->rlid); | 
|  | g_mlpath_rqpn	   = be32toh(cqe->g_mlpath_rqpn); | 
|  | wc->src_qp	   = g_mlpath_rqpn & 0xffffff; | 
|  | wc->dlid_path_bits = (g_mlpath_rqpn >> 24) & 0x7f; | 
|  | wc->wc_flags	  |= g_mlpath_rqpn & 0x80000000 ? IBV_WC_GRH : 0; | 
|  | wc->pkey_index     = be32toh(cqe->immed_rss_invalid) & 0x7f; | 
|  | /* When working with xrc srqs, don't have qp to check link layer. | 
|  | * Using IB SL, should consider Roce. (TBD) | 
|  | */ | 
|  | if ((*cur_qp) && (*cur_qp)->link_layer == IBV_LINK_LAYER_ETHERNET) | 
|  | wc->sl	   = be16toh(cqe->sl_vid) >> 13; | 
|  | else | 
|  | wc->sl	   = be16toh(cqe->sl_vid) >> 12; | 
|  |  | 
|  | if ((*cur_qp) && ((*cur_qp)->qp_cap_cache & MLX4_RX_CSUM_VALID)) { | 
|  | wc->wc_flags |= ((cqe->status & htobe32(MLX4_CQE_STATUS_IPV4_CSUM_OK)) == | 
|  | htobe32(MLX4_CQE_STATUS_IPV4_CSUM_OK)) << | 
|  | IBV_WC_IP_CSUM_OK_SHIFT; | 
|  | } | 
|  | } | 
|  |  | 
|  | return CQ_OK; | 
|  | } | 
|  |  | 
|  | static inline int mlx4_parse_lazy_cqe(struct mlx4_cq *cq, | 
|  | struct mlx4_cqe *cqe) | 
|  | ALWAYS_INLINE; | 
|  | static inline int mlx4_parse_lazy_cqe(struct mlx4_cq *cq, | 
|  | struct mlx4_cqe *cqe) | 
|  | { | 
|  | return mlx4_parse_cqe(cq, cqe, &cq->cur_qp, NULL, 1); | 
|  | } | 
|  |  | 
|  | static inline int mlx4_poll_one(struct mlx4_cq *cq, | 
|  | struct mlx4_qp **cur_qp, | 
|  | struct ibv_wc *wc) | 
|  | ALWAYS_INLINE; | 
|  | static inline int mlx4_poll_one(struct mlx4_cq *cq, | 
|  | struct mlx4_qp **cur_qp, | 
|  | struct ibv_wc *wc) | 
|  | { | 
|  | struct mlx4_cqe *cqe; | 
|  | int err; | 
|  |  | 
|  | err = mlx4_get_next_cqe(cq, &cqe); | 
|  | if (err == CQ_EMPTY) | 
|  | return err; | 
|  |  | 
|  | return mlx4_parse_cqe(cq, cqe, cur_qp, wc, 0); | 
|  | } | 
|  |  | 
|  | int mlx4_poll_cq(struct ibv_cq *ibcq, int ne, struct ibv_wc *wc) | 
|  | { | 
|  | struct mlx4_cq *cq = to_mcq(ibcq); | 
|  | struct mlx4_qp *qp = NULL; | 
|  | int npolled; | 
|  | int err = CQ_OK; | 
|  |  | 
|  | pthread_spin_lock(&cq->lock); | 
|  |  | 
|  | for (npolled = 0; npolled < ne; ++npolled) { | 
|  | err = mlx4_poll_one(cq, &qp, wc + npolled); | 
|  | if (err != CQ_OK) | 
|  | break; | 
|  | } | 
|  |  | 
|  | if (npolled || err == CQ_POLL_ERR) | 
|  | mlx4_update_cons_index(cq); | 
|  |  | 
|  | pthread_spin_unlock(&cq->lock); | 
|  |  | 
|  | return err == CQ_POLL_ERR ? err : npolled; | 
|  | } | 
|  |  | 
|  | static inline void _mlx4_end_poll(struct ibv_cq_ex *ibcq, int lock) | 
|  | ALWAYS_INLINE; | 
|  | static inline void _mlx4_end_poll(struct ibv_cq_ex *ibcq, int lock) | 
|  | { | 
|  | struct mlx4_cq *cq = to_mcq(ibv_cq_ex_to_cq(ibcq)); | 
|  |  | 
|  | mlx4_update_cons_index(cq); | 
|  |  | 
|  | if (lock) | 
|  | pthread_spin_unlock(&cq->lock); | 
|  | } | 
|  |  | 
|  | static inline int _mlx4_start_poll(struct ibv_cq_ex *ibcq, | 
|  | struct ibv_poll_cq_attr *attr, | 
|  | int lock) | 
|  | ALWAYS_INLINE; | 
|  | static inline int _mlx4_start_poll(struct ibv_cq_ex *ibcq, | 
|  | struct ibv_poll_cq_attr *attr, | 
|  | int lock) | 
|  | { | 
|  | struct mlx4_cq *cq = to_mcq(ibv_cq_ex_to_cq(ibcq)); | 
|  | struct mlx4_cqe *cqe; | 
|  | int err; | 
|  |  | 
|  | if (unlikely(attr->comp_mask)) | 
|  | return EINVAL; | 
|  |  | 
|  | if (lock) | 
|  | pthread_spin_lock(&cq->lock); | 
|  |  | 
|  | cq->cur_qp = NULL; | 
|  |  | 
|  | err = mlx4_get_next_cqe(cq, &cqe); | 
|  | if (err == CQ_EMPTY) { | 
|  | if (lock) | 
|  | pthread_spin_unlock(&cq->lock); | 
|  | return ENOENT; | 
|  | } | 
|  |  | 
|  | err = mlx4_parse_lazy_cqe(cq, cqe); | 
|  | if (lock && err) | 
|  | pthread_spin_unlock(&cq->lock); | 
|  |  | 
|  | return err; | 
|  | } | 
|  |  | 
|  | static int mlx4_next_poll(struct ibv_cq_ex *ibcq) | 
|  | { | 
|  | struct mlx4_cq *cq = to_mcq(ibv_cq_ex_to_cq(ibcq)); | 
|  | struct mlx4_cqe *cqe; | 
|  | int err; | 
|  |  | 
|  | err = mlx4_get_next_cqe(cq, &cqe); | 
|  | if (err == CQ_EMPTY) | 
|  | return ENOENT; | 
|  |  | 
|  | return mlx4_parse_lazy_cqe(cq, cqe); | 
|  | } | 
|  |  | 
|  | static void mlx4_end_poll(struct ibv_cq_ex *ibcq) | 
|  | { | 
|  | _mlx4_end_poll(ibcq, 0); | 
|  | } | 
|  |  | 
|  | static void mlx4_end_poll_lock(struct ibv_cq_ex *ibcq) | 
|  | { | 
|  | _mlx4_end_poll(ibcq, 1); | 
|  | } | 
|  |  | 
|  | static int mlx4_start_poll(struct ibv_cq_ex *ibcq, | 
|  | struct ibv_poll_cq_attr *attr) | 
|  | { | 
|  | return _mlx4_start_poll(ibcq, attr, 0); | 
|  | } | 
|  |  | 
|  | static int mlx4_start_poll_lock(struct ibv_cq_ex *ibcq, | 
|  | struct ibv_poll_cq_attr *attr) | 
|  | { | 
|  | return _mlx4_start_poll(ibcq, attr, 1); | 
|  | } | 
|  |  | 
|  | static enum ibv_wc_opcode mlx4_cq_read_wc_opcode(struct ibv_cq_ex *ibcq) | 
|  | { | 
|  | struct mlx4_cq *cq = to_mcq(ibv_cq_ex_to_cq(ibcq)); | 
|  |  | 
|  | if (cq->cqe->owner_sr_opcode & MLX4_CQE_IS_SEND_MASK) { | 
|  | switch (mlx4dv_get_cqe_opcode(cq->cqe)) { | 
|  | case MLX4_OPCODE_RDMA_WRITE_IMM: | 
|  | case MLX4_OPCODE_RDMA_WRITE: | 
|  | return IBV_WC_RDMA_WRITE; | 
|  | case MLX4_OPCODE_SEND_INVAL: | 
|  | case MLX4_OPCODE_SEND_IMM: | 
|  | case MLX4_OPCODE_SEND: | 
|  | return IBV_WC_SEND; | 
|  | case MLX4_OPCODE_RDMA_READ: | 
|  | return IBV_WC_RDMA_READ; | 
|  | case MLX4_OPCODE_ATOMIC_CS: | 
|  | return IBV_WC_COMP_SWAP; | 
|  | case MLX4_OPCODE_ATOMIC_FA: | 
|  | return IBV_WC_FETCH_ADD; | 
|  | case MLX4_OPCODE_LOCAL_INVAL: | 
|  | return IBV_WC_LOCAL_INV; | 
|  | case MLX4_OPCODE_BIND_MW: | 
|  | return IBV_WC_BIND_MW; | 
|  | } | 
|  | } else { | 
|  | switch (mlx4dv_get_cqe_opcode(cq->cqe)) { | 
|  | case MLX4_RECV_OPCODE_RDMA_WRITE_IMM: | 
|  | return IBV_WC_RECV_RDMA_WITH_IMM; | 
|  | case MLX4_RECV_OPCODE_SEND_INVAL: | 
|  | case MLX4_RECV_OPCODE_SEND_IMM: | 
|  | case MLX4_RECV_OPCODE_SEND: | 
|  | return IBV_WC_RECV; | 
|  | } | 
|  | } | 
|  |  | 
|  | return 0; | 
|  | } | 
|  |  | 
|  | static uint32_t mlx4_cq_read_wc_qp_num(struct ibv_cq_ex *ibcq) | 
|  | { | 
|  | struct mlx4_cq *cq = to_mcq(ibv_cq_ex_to_cq(ibcq)); | 
|  |  | 
|  | return be32toh(cq->cqe->vlan_my_qpn) & MLX4_CQE_QPN_MASK; | 
|  | } | 
|  |  | 
|  | static unsigned int mlx4_cq_read_wc_flags(struct ibv_cq_ex *ibcq) | 
|  | { | 
|  | struct mlx4_cq *cq = to_mcq(ibv_cq_ex_to_cq(ibcq)); | 
|  | int is_send  = cq->cqe->owner_sr_opcode & MLX4_CQE_IS_SEND_MASK; | 
|  | int wc_flags = 0; | 
|  |  | 
|  | if (is_send) { | 
|  | switch (mlx4dv_get_cqe_opcode(cq->cqe)) { | 
|  | case MLX4_OPCODE_RDMA_WRITE_IMM: | 
|  | case MLX4_OPCODE_SEND_IMM: | 
|  | wc_flags |= IBV_WC_WITH_IMM; | 
|  | break; | 
|  | } | 
|  | } else { | 
|  | if (cq->flags & MLX4_CQ_FLAGS_RX_CSUM_VALID) | 
|  | wc_flags |= ((cq->cqe->status & | 
|  | htobe32(MLX4_CQE_STATUS_IPV4_CSUM_OK)) == | 
|  | htobe32(MLX4_CQE_STATUS_IPV4_CSUM_OK)) << | 
|  | IBV_WC_IP_CSUM_OK_SHIFT; | 
|  |  | 
|  | switch (mlx4dv_get_cqe_opcode(cq->cqe)) { | 
|  | case MLX4_RECV_OPCODE_RDMA_WRITE_IMM: | 
|  | case MLX4_RECV_OPCODE_SEND_IMM: | 
|  | wc_flags |= IBV_WC_WITH_IMM; | 
|  | break; | 
|  | case MLX4_RECV_OPCODE_SEND_INVAL: | 
|  | wc_flags |= IBV_WC_WITH_INV; | 
|  | break; | 
|  | } | 
|  | wc_flags |= (be32toh(cq->cqe->g_mlpath_rqpn) & 0x80000000) ? IBV_WC_GRH : 0; | 
|  | } | 
|  |  | 
|  | return wc_flags; | 
|  | } | 
|  |  | 
|  | static uint32_t mlx4_cq_read_wc_byte_len(struct ibv_cq_ex *ibcq) | 
|  | { | 
|  | struct mlx4_cq *cq = to_mcq(ibv_cq_ex_to_cq(ibcq)); | 
|  |  | 
|  | return be32toh(cq->cqe->byte_cnt); | 
|  | } | 
|  |  | 
|  | static uint32_t mlx4_cq_read_wc_vendor_err(struct ibv_cq_ex *ibcq) | 
|  | { | 
|  | struct mlx4_cq *cq = to_mcq(ibv_cq_ex_to_cq(ibcq)); | 
|  | struct mlx4_err_cqe *ecqe = (struct mlx4_err_cqe *)cq->cqe; | 
|  |  | 
|  | return ecqe->vendor_err; | 
|  | } | 
|  |  | 
|  | static __be32 mlx4_cq_read_wc_imm_data(struct ibv_cq_ex *ibcq) | 
|  | { | 
|  | struct mlx4_cq *cq = to_mcq(ibv_cq_ex_to_cq(ibcq)); | 
|  |  | 
|  | switch (mlx4dv_get_cqe_opcode(cq->cqe)) { | 
|  | case MLX4_RECV_OPCODE_SEND_INVAL: | 
|  | /* This is returning invalidate_rkey which is in host order, see | 
|  | * ibv_wc_read_invalidated_rkey | 
|  | */ | 
|  | return (__force __be32)be32toh(cq->cqe->immed_rss_invalid); | 
|  | default: | 
|  | return cq->cqe->immed_rss_invalid; | 
|  | } | 
|  | } | 
|  |  | 
|  | static uint32_t mlx4_cq_read_wc_slid(struct ibv_cq_ex *ibcq) | 
|  | { | 
|  | struct mlx4_cq *cq = to_mcq(ibv_cq_ex_to_cq(ibcq)); | 
|  |  | 
|  | return (uint32_t)be16toh(cq->cqe->rlid); | 
|  | } | 
|  |  | 
|  | static uint8_t mlx4_cq_read_wc_sl(struct ibv_cq_ex *ibcq) | 
|  | { | 
|  | struct mlx4_cq *cq = to_mcq(ibv_cq_ex_to_cq(ibcq)); | 
|  |  | 
|  | if ((cq->cur_qp) && (cq->cur_qp->link_layer == IBV_LINK_LAYER_ETHERNET)) | 
|  | return be16toh(cq->cqe->sl_vid) >> 13; | 
|  | else | 
|  | return be16toh(cq->cqe->sl_vid) >> 12; | 
|  | } | 
|  |  | 
|  | static uint32_t mlx4_cq_read_wc_src_qp(struct ibv_cq_ex *ibcq) | 
|  | { | 
|  | struct mlx4_cq *cq = to_mcq(ibv_cq_ex_to_cq(ibcq)); | 
|  |  | 
|  | return be32toh(cq->cqe->g_mlpath_rqpn) & 0xffffff; | 
|  | } | 
|  |  | 
|  | static uint8_t mlx4_cq_read_wc_dlid_path_bits(struct ibv_cq_ex *ibcq) | 
|  | { | 
|  | struct mlx4_cq *cq = to_mcq(ibv_cq_ex_to_cq(ibcq)); | 
|  |  | 
|  | return (be32toh(cq->cqe->g_mlpath_rqpn) >> 24) & 0x7f; | 
|  | } | 
|  |  | 
|  | static uint64_t mlx4_cq_read_wc_completion_ts(struct ibv_cq_ex *ibcq) | 
|  | { | 
|  | struct mlx4_cq *cq = to_mcq(ibv_cq_ex_to_cq(ibcq)); | 
|  |  | 
|  | return ((uint64_t)be32toh(cq->cqe->ts_47_16) << 16) | | 
|  | (cq->cqe->ts_15_8   <<  8) | | 
|  | (cq->cqe->ts_7_0); | 
|  | } | 
|  |  | 
|  | void mlx4_cq_fill_pfns(struct mlx4_cq *cq, const struct ibv_cq_init_attr_ex *cq_attr) | 
|  | { | 
|  |  | 
|  | if (cq->flags & MLX4_CQ_FLAGS_SINGLE_THREADED) { | 
|  | cq->verbs_cq.cq_ex.start_poll = mlx4_start_poll; | 
|  | cq->verbs_cq.cq_ex.end_poll = mlx4_end_poll; | 
|  | } else { | 
|  | cq->verbs_cq.cq_ex.start_poll = mlx4_start_poll_lock; | 
|  | cq->verbs_cq.cq_ex.end_poll = mlx4_end_poll_lock; | 
|  | } | 
|  | cq->verbs_cq.cq_ex.next_poll = mlx4_next_poll; | 
|  |  | 
|  | cq->verbs_cq.cq_ex.read_opcode = mlx4_cq_read_wc_opcode; | 
|  | cq->verbs_cq.cq_ex.read_vendor_err = mlx4_cq_read_wc_vendor_err; | 
|  | cq->verbs_cq.cq_ex.read_wc_flags = mlx4_cq_read_wc_flags; | 
|  | if (cq_attr->wc_flags & IBV_WC_EX_WITH_BYTE_LEN) | 
|  | cq->verbs_cq.cq_ex.read_byte_len = mlx4_cq_read_wc_byte_len; | 
|  | if (cq_attr->wc_flags & IBV_WC_EX_WITH_IMM) | 
|  | cq->verbs_cq.cq_ex.read_imm_data = mlx4_cq_read_wc_imm_data; | 
|  | if (cq_attr->wc_flags & IBV_WC_EX_WITH_QP_NUM) | 
|  | cq->verbs_cq.cq_ex.read_qp_num = mlx4_cq_read_wc_qp_num; | 
|  | if (cq_attr->wc_flags & IBV_WC_EX_WITH_SRC_QP) | 
|  | cq->verbs_cq.cq_ex.read_src_qp = mlx4_cq_read_wc_src_qp; | 
|  | if (cq_attr->wc_flags & IBV_WC_EX_WITH_SLID) | 
|  | cq->verbs_cq.cq_ex.read_slid = mlx4_cq_read_wc_slid; | 
|  | if (cq_attr->wc_flags & IBV_WC_EX_WITH_SL) | 
|  | cq->verbs_cq.cq_ex.read_sl = mlx4_cq_read_wc_sl; | 
|  | if (cq_attr->wc_flags & IBV_WC_EX_WITH_DLID_PATH_BITS) | 
|  | cq->verbs_cq.cq_ex.read_dlid_path_bits = mlx4_cq_read_wc_dlid_path_bits; | 
|  | if (cq_attr->wc_flags & IBV_WC_EX_WITH_COMPLETION_TIMESTAMP) | 
|  | cq->verbs_cq.cq_ex.read_completion_ts = mlx4_cq_read_wc_completion_ts; | 
|  | } | 
|  |  | 
|  | int mlx4_arm_cq(struct ibv_cq *ibvcq, int solicited) | 
|  | { | 
|  | struct mlx4_cq *cq = to_mcq(ibvcq); | 
|  | uint64_t doorbell; | 
|  | uint32_t sn; | 
|  | uint32_t ci; | 
|  | uint32_t cmd; | 
|  |  | 
|  | sn  = cq->arm_sn & 3; | 
|  | ci  = cq->cons_index & 0xffffff; | 
|  | cmd = solicited ? MLX4_CQ_DB_REQ_NOT_SOL : MLX4_CQ_DB_REQ_NOT; | 
|  |  | 
|  | doorbell = sn << 28 | cmd | cq->cqn; | 
|  | doorbell <<= 32; | 
|  | doorbell |= ci; | 
|  |  | 
|  | *cq->arm_db = htobe32(sn << 28 | cmd | ci); | 
|  |  | 
|  | /* | 
|  | * Make sure that the doorbell record in host memory is | 
|  | * written before ringing the doorbell via PCI MMIO. | 
|  | */ | 
|  | udma_to_device_barrier(); | 
|  |  | 
|  | mmio_write64_be(to_mctx(ibvcq->context)->uar + MLX4_CQ_DOORBELL, | 
|  | htobe64(doorbell)); | 
|  |  | 
|  | return 0; | 
|  | } | 
|  |  | 
|  | void mlx4_cq_event(struct ibv_cq *cq) | 
|  | { | 
|  | to_mcq(cq)->arm_sn++; | 
|  | } | 
|  |  | 
|  | void __mlx4_cq_clean(struct mlx4_cq *cq, uint32_t qpn, struct mlx4_srq *srq) | 
|  | { | 
|  | struct mlx4_cqe *cqe, *dest; | 
|  | uint32_t prod_index; | 
|  | uint8_t owner_bit; | 
|  | int nfreed = 0; | 
|  | int cqe_inc = cq->cqe_size == 64 ? 1 : 0; | 
|  |  | 
|  | if (!cq || cq->flags & MLX4_CQ_FLAGS_DV_OWNED) | 
|  | return; | 
|  |  | 
|  | /* | 
|  | * First we need to find the current producer index, so we | 
|  | * know where to start cleaning from.  It doesn't matter if HW | 
|  | * adds new entries after this loop -- the QP we're worried | 
|  | * about is already in RESET, so the new entries won't come | 
|  | * from our QP and therefore don't need to be checked. | 
|  | */ | 
|  | for (prod_index = cq->cons_index; get_sw_cqe(cq, prod_index); ++prod_index) | 
|  | if (prod_index == cq->cons_index + cq->verbs_cq.cq.cqe) | 
|  | break; | 
|  |  | 
|  | /* | 
|  | * Now sweep backwards through the CQ, removing CQ entries | 
|  | * that match our QP by copying older entries on top of them. | 
|  | */ | 
|  | while ((int) --prod_index - (int) cq->cons_index >= 0) { | 
|  | cqe = get_cqe(cq, prod_index & cq->verbs_cq.cq.cqe); | 
|  | cqe += cqe_inc; | 
|  | if (srq && srq->ext_srq && | 
|  | (be32toh(cqe->g_mlpath_rqpn) & MLX4_CQE_QPN_MASK) == srq->verbs_srq.srq_num && | 
|  | !(cqe->owner_sr_opcode & MLX4_CQE_IS_SEND_MASK)) { | 
|  | mlx4_free_srq_wqe(srq, be16toh(cqe->wqe_index)); | 
|  | ++nfreed; | 
|  | } else if ((be32toh(cqe->vlan_my_qpn) & MLX4_CQE_QPN_MASK) == qpn) { | 
|  | if (srq && !(cqe->owner_sr_opcode & MLX4_CQE_IS_SEND_MASK)) | 
|  | mlx4_free_srq_wqe(srq, be16toh(cqe->wqe_index)); | 
|  | ++nfreed; | 
|  | } else if (nfreed) { | 
|  | dest = get_cqe(cq, (prod_index + nfreed) & cq->verbs_cq.cq.cqe); | 
|  | dest += cqe_inc; | 
|  | owner_bit = dest->owner_sr_opcode & MLX4_CQE_OWNER_MASK; | 
|  | memcpy(dest, cqe, sizeof *cqe); | 
|  | dest->owner_sr_opcode = owner_bit | | 
|  | (dest->owner_sr_opcode & ~MLX4_CQE_OWNER_MASK); | 
|  | } | 
|  | } | 
|  |  | 
|  | if (nfreed) { | 
|  | cq->cons_index += nfreed; | 
|  | /* | 
|  | * Make sure update of buffer contents is done before | 
|  | * updating consumer index. | 
|  | */ | 
|  | udma_to_device_barrier(); | 
|  | mlx4_update_cons_index(cq); | 
|  | } | 
|  | } | 
|  |  | 
|  | void mlx4_cq_clean(struct mlx4_cq *cq, uint32_t qpn, struct mlx4_srq *srq) | 
|  | { | 
|  | pthread_spin_lock(&cq->lock); | 
|  | __mlx4_cq_clean(cq, qpn, srq); | 
|  | pthread_spin_unlock(&cq->lock); | 
|  | } | 
|  |  | 
|  | int mlx4_get_outstanding_cqes(struct mlx4_cq *cq) | 
|  | { | 
|  | uint32_t i; | 
|  |  | 
|  | for (i = cq->cons_index; get_sw_cqe(cq, i); ++i) | 
|  | ; | 
|  |  | 
|  | return i - cq->cons_index; | 
|  | } | 
|  |  | 
|  | void mlx4_cq_resize_copy_cqes(struct mlx4_cq *cq, void *buf, int old_cqe) | 
|  | { | 
|  | struct mlx4_cqe *cqe; | 
|  | int i; | 
|  | int cqe_inc = cq->cqe_size == 64 ? 1 : 0; | 
|  |  | 
|  | i = cq->cons_index; | 
|  | cqe = get_cqe(cq, (i & old_cqe)); | 
|  | cqe += cqe_inc; | 
|  |  | 
|  | while ((mlx4dv_get_cqe_opcode(cqe)) != MLX4_CQE_OPCODE_RESIZE) { | 
|  | cqe->owner_sr_opcode = (cqe->owner_sr_opcode & ~MLX4_CQE_OWNER_MASK) | | 
|  | (((i + 1) & (cq->verbs_cq.cq.cqe + 1)) ? MLX4_CQE_OWNER_MASK : 0); | 
|  | memcpy(buf + ((i + 1) & cq->verbs_cq.cq.cqe) * cq->cqe_size, | 
|  | cqe - cqe_inc, cq->cqe_size); | 
|  | ++i; | 
|  | cqe = get_cqe(cq, (i & old_cqe)); | 
|  | cqe += cqe_inc; | 
|  | } | 
|  |  | 
|  | ++cq->cons_index; | 
|  | } | 
|  |  | 
|  | int mlx4_alloc_cq_buf(struct mlx4_device *dev, struct mlx4_context *ctx, | 
|  | struct mlx4_buf *buf, int nent, int entry_size) | 
|  | { | 
|  | if (mlx4_alloc_buf(ctx, buf, align(nent * entry_size, dev->page_size), | 
|  | dev->page_size)) | 
|  | return -1; | 
|  | memset(buf->buf, 0, nent * entry_size); | 
|  |  | 
|  | return 0; | 
|  | } |