| /* |
| * Copyright (c) 2006-2016 Chelsio, Inc. All rights reserved. |
| * |
| * This software is available to you under a choice of one of two |
| * licenses. You may choose to be licensed under the terms of the GNU |
| * General Public License (GPL) Version 2, available from the file |
| * COPYING in the main directory of this source tree, or the |
| * OpenIB.org BSD license below: |
| * |
| * Redistribution and use in source and binary forms, with or |
| * without modification, are permitted provided that the following |
| * conditions are met: |
| * |
| * - Redistributions of source code must retain the above |
| * copyright notice, this list of conditions and the following |
| * disclaimer. |
| * |
| * - Redistributions in binary form must reproduce the above |
| * copyright notice, this list of conditions and the following |
| * disclaimer in the documentation and/or other materials |
| * provided with the distribution. |
| * |
| * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, |
| * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
| * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND |
| * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS |
| * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN |
| * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN |
| * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
| * SOFTWARE. |
| */ |
| #include <config.h> |
| |
| #include <assert.h> |
| #include <stdlib.h> |
| #include <pthread.h> |
| #include <string.h> |
| #include <stdio.h> |
| #include <util/compiler.h> |
| #include "libcxgb4.h" |
| |
| #ifdef STATS |
| struct c4iw_stats c4iw_stats; |
| #endif |
| |
| static void copy_wr_to_sq(struct t4_wq *wq, union t4_wr *wqe, u8 len16) |
| { |
| u64 *src, *dst; |
| |
| src = (u64 *)wqe; |
| dst = (u64 *)((u8 *)wq->sq.queue + wq->sq.wq_pidx * T4_EQ_ENTRY_SIZE); |
| if (t4_sq_onchip(wq)) { |
| len16 = align(len16, 4); |
| |
| /* In onchip mode the copy below will be made to WC memory and |
| * could trigger DMA. In offchip mode the copy below only |
| * queues the WQE, DMA cannot start until t4_ring_sq_db |
| * happens */ |
| mmio_wc_start(); |
| } |
| while (len16) { |
| *dst++ = *src++; |
| if (dst == (u64 *)&wq->sq.queue[wq->sq.size]) |
| dst = (u64 *)wq->sq.queue; |
| *dst++ = *src++; |
| if (dst == (u64 *)&wq->sq.queue[wq->sq.size]) |
| dst = (u64 *)wq->sq.queue; |
| len16--; |
| |
| /* NOTE len16 cannot be large enough to write to the |
| same sq.queue memory twice in this loop */ |
| } |
| |
| if (t4_sq_onchip(wq)) |
| mmio_flush_writes(); |
| } |
| |
| static void copy_wr_to_rq(struct t4_wq *wq, union t4_recv_wr *wqe, u8 len16) |
| { |
| u64 *src, *dst; |
| |
| src = (u64 *)wqe; |
| dst = (u64 *)((u8 *)wq->rq.queue + wq->rq.wq_pidx * T4_EQ_ENTRY_SIZE); |
| while (len16) { |
| *dst++ = *src++; |
| if (dst >= (u64 *)&wq->rq.queue[wq->rq.size]) |
| dst = (u64 *)wq->rq.queue; |
| *dst++ = *src++; |
| if (dst >= (u64 *)&wq->rq.queue[wq->rq.size]) |
| dst = (u64 *)wq->rq.queue; |
| len16--; |
| } |
| } |
| |
| void c4iw_copy_wr_to_srq(struct t4_srq *srq, union t4_recv_wr *wqe, u8 len16) |
| { |
| u64 *src, *dst; |
| |
| src = (u64 *)wqe; |
| dst = (u64 *)((u8 *)srq->queue + srq->wq_pidx * T4_EQ_ENTRY_SIZE); |
| while (len16) { |
| *dst++ = *src++; |
| if (dst >= (u64 *)&srq->queue[srq->size]) |
| dst = (u64 *)srq->queue; |
| *dst++ = *src++; |
| if (dst >= (u64 *)&srq->queue[srq->size]) |
| dst = (u64 *)srq->queue; |
| len16--; |
| } |
| } |
| |
| static int build_immd(struct t4_sq *sq, struct fw_ri_immd *immdp, |
| struct ibv_send_wr *wr, int max, u32 *plenp) |
| { |
| u8 *dstp, *srcp; |
| u32 plen = 0; |
| int i; |
| int len; |
| |
| dstp = (u8 *)immdp->data; |
| for (i = 0; i < wr->num_sge; i++) { |
| if ((plen + wr->sg_list[i].length) > max) |
| return -EMSGSIZE; |
| srcp = (u8 *)(unsigned long)wr->sg_list[i].addr; |
| plen += wr->sg_list[i].length; |
| len = wr->sg_list[i].length; |
| memcpy(dstp, srcp, len); |
| dstp += len; |
| srcp += len; |
| } |
| len = ROUND_UP(plen + 8, 16) - (plen + 8); |
| if (len) |
| memset(dstp, 0, len); |
| immdp->op = FW_RI_DATA_IMMD; |
| immdp->r1 = 0; |
| immdp->r2 = 0; |
| immdp->immdlen = htobe32(plen); |
| *plenp = plen; |
| return 0; |
| } |
| |
| static int build_isgl(__be64 *queue_start, __be64 *queue_end, |
| struct fw_ri_isgl *isglp, struct ibv_sge *sg_list, |
| int num_sge, u32 *plenp) |
| { |
| int i; |
| u32 plen = 0; |
| __be64 *flitp; |
| |
| if ((__be64 *)isglp == queue_end) |
| isglp = (struct fw_ri_isgl *)queue_start; |
| |
| flitp = (__be64 *)isglp->sge; |
| for (i = 0; i < num_sge; i++) { |
| if ((plen + sg_list[i].length) < plen) |
| return -EMSGSIZE; |
| plen += sg_list[i].length; |
| *flitp = htobe64(((u64)sg_list[i].lkey << 32) | |
| sg_list[i].length); |
| if (++flitp == queue_end) |
| flitp = queue_start; |
| *flitp = htobe64(sg_list[i].addr); |
| if (++flitp == queue_end) |
| flitp = queue_start; |
| } |
| *flitp = 0; |
| isglp->op = FW_RI_DATA_ISGL; |
| isglp->r1 = 0; |
| isglp->nsge = htobe16(num_sge); |
| isglp->r2 = 0; |
| if (plenp) |
| *plenp = plen; |
| return 0; |
| } |
| |
| static int build_rdma_send(struct t4_sq *sq, union t4_wr *wqe, |
| struct ibv_send_wr *wr, u8 *len16) |
| { |
| u32 plen; |
| int size; |
| int ret; |
| |
| if (wr->num_sge > T4_MAX_SEND_SGE) |
| return -EINVAL; |
| switch (wr->opcode) { |
| case IBV_WR_SEND: |
| if (wr->send_flags & IBV_SEND_SOLICITED) |
| wqe->send.sendop_pkd = htobe32(FW_RI_SEND_WR_SENDOP_V(FW_RI_SEND_WITH_SE)); |
| else |
| wqe->send.sendop_pkd = htobe32(FW_RI_SEND_WR_SENDOP_V(FW_RI_SEND)); |
| wqe->send.stag_inv = 0; |
| break; |
| case IBV_WR_SEND_WITH_INV: |
| if (wr->send_flags & IBV_SEND_SOLICITED) |
| wqe->send.sendop_pkd = htobe32(FW_RI_SEND_WR_SENDOP_V(FW_RI_SEND_WITH_SE_INV)); |
| else |
| wqe->send.sendop_pkd = htobe32(FW_RI_SEND_WR_SENDOP_V(FW_RI_SEND_WITH_INV)); |
| wqe->send.stag_inv = htobe32(wr->invalidate_rkey); |
| break; |
| default: |
| return -EINVAL; |
| } |
| wqe->send.r3 = 0; |
| wqe->send.r4 = 0; |
| |
| plen = 0; |
| if (wr->num_sge) { |
| if (wr->send_flags & IBV_SEND_INLINE) { |
| ret = build_immd(sq, wqe->send.u.immd_src, wr, |
| T4_MAX_SEND_INLINE, &plen); |
| if (ret) |
| return ret; |
| size = sizeof wqe->send + sizeof(struct fw_ri_immd) + |
| plen; |
| } else { |
| ret = build_isgl((__be64 *)sq->queue, |
| (__be64 *)&sq->queue[sq->size], |
| wqe->send.u.isgl_src, |
| wr->sg_list, wr->num_sge, &plen); |
| if (ret) |
| return ret; |
| size = sizeof wqe->send + sizeof(struct fw_ri_isgl) + |
| wr->num_sge * sizeof (struct fw_ri_sge); |
| } |
| } else { |
| wqe->send.u.immd_src[0].op = FW_RI_DATA_IMMD; |
| wqe->send.u.immd_src[0].r1 = 0; |
| wqe->send.u.immd_src[0].r2 = 0; |
| wqe->send.u.immd_src[0].immdlen = 0; |
| size = sizeof wqe->send + sizeof(struct fw_ri_immd); |
| plen = 0; |
| } |
| *len16 = DIV_ROUND_UP(size, 16); |
| wqe->send.plen = htobe32(plen); |
| return 0; |
| } |
| |
| static int build_rdma_write(struct t4_sq *sq, union t4_wr *wqe, |
| struct ibv_send_wr *wr, u8 *len16) |
| { |
| u32 plen; |
| int size; |
| int ret; |
| |
| if (wr->num_sge > T4_MAX_SEND_SGE) |
| return -EINVAL; |
| if (wr->opcode == IBV_WR_RDMA_WRITE_WITH_IMM) |
| wqe->write.iw_imm_data.ib_imm_data.imm_data32 = wr->imm_data; |
| else |
| wqe->write.iw_imm_data.ib_imm_data.imm_data32 = 0; |
| wqe->write.stag_sink = htobe32(wr->wr.rdma.rkey); |
| wqe->write.to_sink = htobe64(wr->wr.rdma.remote_addr); |
| if (wr->num_sge) { |
| if (wr->send_flags & IBV_SEND_INLINE) { |
| ret = build_immd(sq, wqe->write.u.immd_src, wr, |
| T4_MAX_WRITE_INLINE, &plen); |
| if (ret) |
| return ret; |
| size = sizeof wqe->write + sizeof(struct fw_ri_immd) + |
| plen; |
| } else { |
| ret = build_isgl((__be64 *)sq->queue, |
| (__be64 *)&sq->queue[sq->size], |
| wqe->write.u.isgl_src, |
| wr->sg_list, wr->num_sge, &plen); |
| if (ret) |
| return ret; |
| size = sizeof wqe->write + sizeof(struct fw_ri_isgl) + |
| wr->num_sge * sizeof (struct fw_ri_sge); |
| } |
| } else { |
| wqe->write.u.immd_src[0].op = FW_RI_DATA_IMMD; |
| wqe->write.u.immd_src[0].r1 = 0; |
| wqe->write.u.immd_src[0].r2 = 0; |
| wqe->write.u.immd_src[0].immdlen = 0; |
| size = sizeof wqe->write + sizeof(struct fw_ri_immd); |
| plen = 0; |
| } |
| *len16 = DIV_ROUND_UP(size, 16); |
| wqe->write.plen = htobe32(plen); |
| return 0; |
| } |
| |
| static void build_immd_cmpl(struct t4_sq *sq, struct fw_ri_immd_cmpl *immdp, |
| struct ibv_send_wr *wr) |
| { |
| memcpy((u8 *)immdp->data, (u8 *)(uintptr_t)wr->sg_list->addr, 16); |
| memset(immdp->r1, 0, 6); |
| immdp->op = FW_RI_DATA_IMMD; |
| immdp->immdlen = 16; |
| } |
| |
| static void build_rdma_write_cmpl(struct t4_sq *sq, |
| struct fw_ri_rdma_write_cmpl_wr *wcwr, |
| struct ibv_send_wr *wr, u8 *len16) |
| { |
| u32 plen; |
| int size; |
| |
| /* |
| * This code assumes the struct fields preceding the write isgl fit |
| * in one 64B WR slot. This is because the WQE is built directly in |
| * the dma queue, and wrapping is only handled by the code buildling |
| * sgls. IE the "fixed part" of the wr structs must all fit in 64B. |
| * The WQE build code should probably be redesigned to avoid this |
| * restriction, but for now just add a static_assert() to catch if |
| * this WQE struct gets too big. |
| */ |
| static_assert(offsetof(struct fw_ri_rdma_write_cmpl_wr, u) <= 64, |
| "WQE structure too BIG!"); |
| |
| wcwr->stag_sink = htobe32(wr->wr.rdma.rkey); |
| wcwr->to_sink = htobe64(wr->wr.rdma.remote_addr); |
| if (wr->next->opcode == IBV_WR_SEND) |
| wcwr->stag_inv = 0; |
| else |
| wcwr->stag_inv = htobe32(wr->next->invalidate_rkey); |
| wcwr->r2 = 0; |
| wcwr->r3 = 0; |
| |
| /* SEND_INV SGL */ |
| if (wr->next->send_flags & IBV_SEND_INLINE) |
| build_immd_cmpl(sq, &wcwr->u_cmpl.immd_src, wr->next); |
| else |
| build_isgl((__be64 *)sq->queue, (__be64 *)&sq->queue[sq->size], |
| &wcwr->u_cmpl.isgl_src, wr->next->sg_list, 1, NULL); |
| |
| /* WRITE SGL */ |
| build_isgl((__be64 *)sq->queue, (__be64 *)&sq->queue[sq->size], |
| wcwr->u.isgl_src, wr->sg_list, wr->num_sge, &plen); |
| |
| size = sizeof(*wcwr) + sizeof(struct fw_ri_isgl) + |
| wr->num_sge * sizeof(struct fw_ri_sge); |
| wcwr->plen = htobe32(plen); |
| *len16 = DIV_ROUND_UP(size, 16); |
| } |
| |
| static int build_rdma_read(union t4_wr *wqe, struct ibv_send_wr *wr, u8 *len16) |
| { |
| if (wr->num_sge > 1) |
| return -EINVAL; |
| if (wr->num_sge) { |
| wqe->read.stag_src = htobe32(wr->wr.rdma.rkey); |
| wqe->read.to_src_hi = htobe32((u32)(wr->wr.rdma.remote_addr >>32)); |
| wqe->read.to_src_lo = htobe32((u32)wr->wr.rdma.remote_addr); |
| wqe->read.stag_sink = htobe32(wr->sg_list[0].lkey); |
| wqe->read.plen = htobe32(wr->sg_list[0].length); |
| wqe->read.to_sink_hi = htobe32((u32)(wr->sg_list[0].addr >> 32)); |
| wqe->read.to_sink_lo = htobe32((u32)(wr->sg_list[0].addr)); |
| } else { |
| wqe->read.stag_src = htobe32(2); |
| wqe->read.to_src_hi = 0; |
| wqe->read.to_src_lo = 0; |
| wqe->read.stag_sink = htobe32(2); |
| wqe->read.plen = 0; |
| wqe->read.to_sink_hi = 0; |
| wqe->read.to_sink_lo = 0; |
| } |
| wqe->read.r2 = 0; |
| wqe->read.r5 = 0; |
| *len16 = DIV_ROUND_UP(sizeof wqe->read, 16); |
| return 0; |
| } |
| |
| static int build_rdma_recv(struct t4_rq *rq, union t4_recv_wr *wqe, |
| struct ibv_recv_wr *wr, u8 *len16) |
| { |
| int ret; |
| |
| ret = build_isgl((__be64 *)rq->queue, (__be64 *)&rq->queue[rq->size], |
| &wqe->recv.isgl, wr->sg_list, wr->num_sge, NULL); |
| if (ret) |
| return ret; |
| *len16 = DIV_ROUND_UP(sizeof wqe->recv + |
| wr->num_sge * sizeof(struct fw_ri_sge), 16); |
| return 0; |
| } |
| |
| static int build_srq_recv(union t4_recv_wr *wqe, struct ibv_recv_wr *wr, |
| u8 *len16) |
| { |
| int ret; |
| |
| ret = build_isgl((__be64 *)wqe, (__be64 *)(wqe + 1), |
| &wqe->recv.isgl, wr->sg_list, wr->num_sge, NULL); |
| if (ret) |
| return ret; |
| *len16 = DIV_ROUND_UP(sizeof(wqe->recv) + |
| wr->num_sge * sizeof(struct fw_ri_sge), 16); |
| return 0; |
| } |
| |
| static void ring_kernel_db(struct c4iw_qp *qhp, u32 qid, u16 idx) |
| { |
| struct ibv_modify_qp cmd = {}; |
| struct ibv_qp_attr attr; |
| int mask; |
| int __attribute__((unused)) ret; |
| |
| /* FIXME: Why do we need this barrier if the kernel is going to |
| trigger the DMA? */ |
| udma_to_device_barrier(); |
| if (qid == qhp->wq.sq.qid) { |
| attr.sq_psn = idx; |
| mask = IBV_QP_SQ_PSN; |
| } else { |
| attr.rq_psn = idx; |
| mask = IBV_QP_RQ_PSN; |
| } |
| ret = ibv_cmd_modify_qp(&qhp->ibv_qp, &attr, mask, &cmd, sizeof cmd); |
| assert(!ret); |
| } |
| |
| static void post_write_cmpl(struct c4iw_qp *qhp, struct ibv_send_wr *wr) |
| { |
| bool send_signaled = (wr->next->send_flags & IBV_SEND_SIGNALED) || |
| qhp->sq_sig_all; |
| bool write_signaled = (wr->send_flags & IBV_SEND_SIGNALED) || |
| qhp->sq_sig_all; |
| struct t4_swsqe *swsqe; |
| union t4_wr *wqe; |
| u16 write_wrid; |
| u8 len16; |
| u16 idx; |
| |
| /* |
| * The sw_sq entries still look like a WRITE and a SEND and consume |
| * 2 slots. The FW WR, however, will be a single uber-WR. |
| */ |
| wqe = (union t4_wr *)((u8 *)qhp->wq.sq.queue + |
| qhp->wq.sq.wq_pidx * T4_EQ_ENTRY_SIZE); |
| build_rdma_write_cmpl(&qhp->wq.sq, &wqe->write_cmpl, wr, &len16); |
| |
| /* WRITE swsqe */ |
| swsqe = &qhp->wq.sq.sw_sq[qhp->wq.sq.pidx]; |
| swsqe->opcode = FW_RI_RDMA_WRITE; |
| swsqe->idx = qhp->wq.sq.pidx; |
| swsqe->complete = 0; |
| swsqe->signaled = write_signaled; |
| swsqe->flushed = 0; |
| swsqe->wr_id = wr->wr_id; |
| |
| write_wrid = qhp->wq.sq.pidx; |
| |
| /* just bump the sw_sq */ |
| qhp->wq.sq.in_use++; |
| if (++qhp->wq.sq.pidx == qhp->wq.sq.size) |
| qhp->wq.sq.pidx = 0; |
| |
| /* SEND swsqe */ |
| swsqe = &qhp->wq.sq.sw_sq[qhp->wq.sq.pidx]; |
| if (wr->next->opcode == IBV_WR_SEND) |
| swsqe->opcode = FW_RI_SEND; |
| else |
| swsqe->opcode = FW_RI_SEND_WITH_INV; |
| swsqe->idx = qhp->wq.sq.pidx; |
| swsqe->complete = 0; |
| swsqe->signaled = send_signaled; |
| swsqe->flushed = 0; |
| swsqe->wr_id = wr->next->wr_id; |
| |
| wqe->write_cmpl.flags_send = send_signaled ? FW_RI_COMPLETION_FLAG : 0; |
| wqe->write_cmpl.wrid_send = qhp->wq.sq.pidx; |
| |
| init_wr_hdr(wqe, write_wrid, FW_RI_RDMA_WRITE_CMPL_WR, |
| write_signaled ? FW_RI_COMPLETION_FLAG : 0, len16); |
| t4_sq_produce(&qhp->wq, len16); |
| idx = DIV_ROUND_UP(len16 * 16, T4_EQ_ENTRY_SIZE); |
| |
| t4_ring_sq_db(&qhp->wq, idx, dev_is_t4(qhp->rhp), |
| len16, wqe); |
| } |
| |
| int c4iw_post_send(struct ibv_qp *ibqp, struct ibv_send_wr *wr, |
| struct ibv_send_wr **bad_wr) |
| { |
| int err = 0; |
| u8 uninitialized_var(len16); |
| enum fw_wr_opcodes fw_opcode; |
| enum fw_ri_wr_flags fw_flags; |
| struct c4iw_qp *qhp; |
| union t4_wr *wqe, lwqe; |
| u32 num_wrs; |
| struct t4_swsqe *swsqe; |
| u16 idx = 0; |
| |
| qhp = to_c4iw_qp(ibqp); |
| pthread_spin_lock(&qhp->lock); |
| if (t4_wq_in_error(&qhp->wq)) { |
| pthread_spin_unlock(&qhp->lock); |
| *bad_wr = wr; |
| return -EINVAL; |
| } |
| num_wrs = t4_sq_avail(&qhp->wq); |
| if (num_wrs == 0) { |
| pthread_spin_unlock(&qhp->lock); |
| *bad_wr = wr; |
| return -ENOMEM; |
| } |
| |
| /* |
| * Fastpath for NVMe-oF target WRITE + SEND_WITH_INV wr chain which is |
| * the response for small NVMEe-oF READ requests. If the chain is |
| * exactly a WRITE->SEND_WITH_INV or a WRITE->SEND and the sgl depths |
| * and lengths meet the requirements of the fw_ri_write_cmpl_wr work |
| * request, then build and post the write_cmpl WR. If any of the tests |
| * below are not true, then we continue on with the tradtional WRITE |
| * and SEND WRs. |
| */ |
| if (qhp->rhp->write_cmpl_supported && |
| qhp->rhp->chip_version >= CHELSIO_T5 && |
| wr && wr->next && !wr->next->next && |
| wr->opcode == IBV_WR_RDMA_WRITE && wr->sg_list[0].length && |
| wr->num_sge <= T4_WRITE_CMPL_MAX_SGL && |
| (wr->next->opcode == IBV_WR_SEND_WITH_INV || |
| wr->next->opcode == IBV_WR_SEND) && |
| wr->next->sg_list[0].length == T4_WRITE_CMPL_MAX_CQE && |
| wr->next->num_sge == 1 && num_wrs >= 2) { |
| post_write_cmpl(qhp, wr); |
| pthread_spin_unlock(&qhp->lock); |
| return 0; |
| } |
| |
| while (wr) { |
| if (num_wrs == 0) { |
| err = -ENOMEM; |
| *bad_wr = wr; |
| break; |
| } |
| |
| wqe = &lwqe; |
| fw_flags = 0; |
| if (wr->send_flags & IBV_SEND_SOLICITED) |
| fw_flags |= FW_RI_SOLICITED_EVENT_FLAG; |
| if (wr->send_flags & IBV_SEND_SIGNALED || qhp->sq_sig_all) |
| fw_flags |= FW_RI_COMPLETION_FLAG; |
| swsqe = &qhp->wq.sq.sw_sq[qhp->wq.sq.pidx]; |
| switch (wr->opcode) { |
| case IBV_WR_SEND_WITH_INV: |
| case IBV_WR_SEND: |
| INC_STAT(send); |
| if (wr->send_flags & IBV_SEND_FENCE) |
| fw_flags |= FW_RI_READ_FENCE_FLAG; |
| fw_opcode = FW_RI_SEND_WR; |
| if (wr->opcode == IBV_WR_SEND) |
| swsqe->opcode = FW_RI_SEND; |
| else |
| swsqe->opcode = FW_RI_SEND_WITH_INV; |
| err = build_rdma_send(&qhp->wq.sq, wqe, wr, &len16); |
| break; |
| case IBV_WR_RDMA_WRITE_WITH_IMM: |
| if (unlikely(!(qhp->wq.sq.flags & T4_SQ_WRITE_W_IMM))) { |
| err = -EINVAL; |
| break; |
| } |
| fw_flags |= FW_RI_RDMA_WRITE_WITH_IMMEDIATE; |
| /*FALLTHROUGH*/ |
| case IBV_WR_RDMA_WRITE: |
| INC_STAT(write); |
| fw_opcode = FW_RI_RDMA_WRITE_WR; |
| swsqe->opcode = FW_RI_RDMA_WRITE; |
| err = build_rdma_write(&qhp->wq.sq, wqe, wr, &len16); |
| break; |
| case IBV_WR_RDMA_READ: |
| INC_STAT(read); |
| fw_opcode = FW_RI_RDMA_READ_WR; |
| swsqe->opcode = FW_RI_READ_REQ; |
| fw_flags = 0; |
| err = build_rdma_read(wqe, wr, &len16); |
| if (err) |
| break; |
| swsqe->read_len = wr->sg_list ? wr->sg_list[0].length : |
| 0; |
| if (!qhp->wq.sq.oldest_read) |
| qhp->wq.sq.oldest_read = swsqe; |
| break; |
| default: |
| PDBG("%s post of type=%d TBD!\n", __func__, |
| wr->opcode); |
| err = -EINVAL; |
| } |
| if (err) { |
| *bad_wr = wr; |
| break; |
| } |
| swsqe->idx = qhp->wq.sq.pidx; |
| swsqe->complete = 0; |
| swsqe->signaled = (wr->send_flags & IBV_SEND_SIGNALED) || |
| qhp->sq_sig_all; |
| swsqe->flushed = 0; |
| swsqe->wr_id = wr->wr_id; |
| |
| init_wr_hdr(wqe, qhp->wq.sq.pidx, fw_opcode, fw_flags, len16); |
| PDBG("%s cookie 0x%llx pidx 0x%x opcode 0x%x\n", |
| __func__, (unsigned long long)wr->wr_id, qhp->wq.sq.pidx, |
| swsqe->opcode); |
| wr = wr->next; |
| num_wrs--; |
| copy_wr_to_sq(&qhp->wq, wqe, len16); |
| t4_sq_produce(&qhp->wq, len16); |
| idx += DIV_ROUND_UP(len16*16, T4_EQ_ENTRY_SIZE); |
| } |
| if (t4_wq_db_enabled(&qhp->wq)) { |
| t4_ring_sq_db(&qhp->wq, idx, dev_is_t4(qhp->rhp), |
| len16, wqe); |
| } else |
| ring_kernel_db(qhp, qhp->wq.sq.qid, idx); |
| /* This write is only for debugging, the value does not matter for DMA |
| */ |
| qhp->wq.sq.queue[qhp->wq.sq.size].status.host_wq_pidx = \ |
| (qhp->wq.sq.wq_pidx); |
| |
| pthread_spin_unlock(&qhp->lock); |
| return err; |
| } |
| |
| static void defer_srq_wr(struct t4_srq *srq, union t4_recv_wr *wqe, |
| uint64_t wr_id, u8 len16) |
| { |
| struct t4_srq_pending_wr *pwr = &srq->pending_wrs[srq->pending_pidx]; |
| |
| PDBG("%s cidx %u pidx %u wq_pidx %u in_use %u ooo_count %u wr_id 0x%llx pending_cidx %u pending_pidx %u pending_in_use %u\n", |
| __func__, srq->cidx, srq->pidx, srq->wq_pidx, |
| srq->in_use, srq->ooo_count, (unsigned long long)wr_id, |
| srq->pending_cidx, srq->pending_pidx, srq->pending_in_use); |
| pwr->wr_id = wr_id; |
| pwr->len16 = len16; |
| memcpy(&pwr->wqe, wqe, len16*16); |
| t4_srq_produce_pending_wr(srq); |
| } |
| |
| int c4iw_post_srq_recv(struct ibv_srq *ibsrq, struct ibv_recv_wr *wr, |
| struct ibv_recv_wr **bad_wr) |
| { |
| int err = 0; |
| struct c4iw_srq *srq; |
| union t4_recv_wr *wqe, lwqe; |
| u32 num_wrs; |
| u8 len16 = 0; |
| u16 idx = 0; |
| |
| srq = to_c4iw_srq(ibsrq); |
| pthread_spin_lock(&srq->lock); |
| INC_STAT(srq_recv); |
| num_wrs = t4_srq_avail(&srq->wq); |
| if (num_wrs == 0) { |
| pthread_spin_unlock(&srq->lock); |
| return -ENOMEM; |
| } |
| while (wr) { |
| if (wr->num_sge > T4_MAX_RECV_SGE) { |
| err = -EINVAL; |
| *bad_wr = wr; |
| break; |
| } |
| wqe = &lwqe; |
| if (num_wrs) |
| err = build_srq_recv(wqe, wr, &len16); |
| else |
| err = -ENOMEM; |
| if (err) { |
| *bad_wr = wr; |
| break; |
| } |
| |
| wqe->recv.opcode = FW_RI_RECV_WR; |
| wqe->recv.r1 = 0; |
| wqe->recv.wrid = srq->wq.pidx; |
| wqe->recv.r2[0] = 0; |
| wqe->recv.r2[1] = 0; |
| wqe->recv.r2[2] = 0; |
| wqe->recv.len16 = len16; |
| |
| if (srq->wq.ooo_count || srq->wq.pending_in_use || |
| srq->wq.sw_rq[srq->wq.pidx].valid) |
| defer_srq_wr(&srq->wq, wqe, wr->wr_id, len16); |
| else { |
| srq->wq.sw_rq[srq->wq.pidx].wr_id = wr->wr_id; |
| srq->wq.sw_rq[srq->wq.pidx].valid = 1; |
| c4iw_copy_wr_to_srq(&srq->wq, wqe, len16); |
| PDBG("%s cidx %u pidx %u wq_pidx %u in_use %u wr_id 0x%llx\n", |
| __func__, srq->wq.cidx, srq->wq.pidx, |
| srq->wq.wq_pidx, srq->wq.in_use, |
| (unsigned long long)wr->wr_id); |
| t4_srq_produce(&srq->wq, len16); |
| idx += DIV_ROUND_UP(len16*16, T4_EQ_ENTRY_SIZE); |
| } |
| wr = wr->next; |
| num_wrs--; |
| } |
| |
| if (idx) { |
| t4_ring_srq_db(&srq->wq, idx, len16, wqe); |
| srq->wq.queue[srq->wq.size].status.host_wq_pidx = |
| srq->wq.wq_pidx; |
| } |
| pthread_spin_unlock(&srq->lock); |
| return err; |
| } |
| |
| int c4iw_post_receive(struct ibv_qp *ibqp, struct ibv_recv_wr *wr, |
| struct ibv_recv_wr **bad_wr) |
| { |
| int err = 0; |
| struct c4iw_qp *qhp; |
| union t4_recv_wr *wqe, lwqe; |
| u32 num_wrs; |
| u8 len16 = 0; |
| u16 idx = 0; |
| |
| qhp = to_c4iw_qp(ibqp); |
| pthread_spin_lock(&qhp->lock); |
| if (t4_wq_in_error(&qhp->wq)) { |
| pthread_spin_unlock(&qhp->lock); |
| *bad_wr = wr; |
| return -EINVAL; |
| } |
| INC_STAT(recv); |
| num_wrs = t4_rq_avail(&qhp->wq); |
| if (num_wrs == 0) { |
| pthread_spin_unlock(&qhp->lock); |
| *bad_wr = wr; |
| return -ENOMEM; |
| } |
| while (wr) { |
| if (wr->num_sge > T4_MAX_RECV_SGE) { |
| err = -EINVAL; |
| *bad_wr = wr; |
| break; |
| } |
| wqe = &lwqe; |
| if (num_wrs) |
| err = build_rdma_recv(&qhp->wq.rq, wqe, wr, &len16); |
| else |
| err = -ENOMEM; |
| if (err) { |
| *bad_wr = wr; |
| break; |
| } |
| |
| qhp->wq.rq.sw_rq[qhp->wq.rq.pidx].wr_id = wr->wr_id; |
| |
| wqe->recv.opcode = FW_RI_RECV_WR; |
| wqe->recv.r1 = 0; |
| wqe->recv.wrid = qhp->wq.rq.pidx; |
| wqe->recv.r2[0] = 0; |
| wqe->recv.r2[1] = 0; |
| wqe->recv.r2[2] = 0; |
| wqe->recv.len16 = len16; |
| PDBG("%s cookie 0x%llx pidx %u\n", __func__, |
| (unsigned long long) wr->wr_id, qhp->wq.rq.pidx); |
| copy_wr_to_rq(&qhp->wq, wqe, len16); |
| t4_rq_produce(&qhp->wq, len16); |
| idx += DIV_ROUND_UP(len16*16, T4_EQ_ENTRY_SIZE); |
| wr = wr->next; |
| num_wrs--; |
| } |
| if (t4_wq_db_enabled(&qhp->wq)) |
| t4_ring_rq_db(&qhp->wq, idx, dev_is_t4(qhp->rhp), |
| len16, wqe); |
| else |
| ring_kernel_db(qhp, qhp->wq.rq.qid, idx); |
| qhp->wq.rq.queue[qhp->wq.rq.size].status.host_wq_pidx = \ |
| (qhp->wq.rq.wq_pidx); |
| pthread_spin_unlock(&qhp->lock); |
| return err; |
| } |
| |
| void c4iw_flush_qp(struct c4iw_qp *qhp) |
| { |
| struct c4iw_cq *rchp, *schp; |
| u32 srqidx; |
| int count; |
| |
| srqidx = t4_wq_srqidx(&qhp->wq); |
| rchp = to_c4iw_cq(qhp->ibv_qp.recv_cq); |
| schp = to_c4iw_cq(qhp->ibv_qp.send_cq); |
| |
| PDBG("%s qhp %p rchp %p schp %p\n", __func__, qhp, rchp, schp); |
| |
| /* locking heirarchy: cq lock first, then qp lock. */ |
| pthread_spin_lock(&rchp->lock); |
| if (schp != rchp) |
| pthread_spin_lock(&schp->lock); |
| pthread_spin_lock(&qhp->lock); |
| |
| if (qhp->wq.flushed) { |
| pthread_spin_unlock(&qhp->lock); |
| if (rchp != schp) |
| pthread_spin_unlock(&schp->lock); |
| pthread_spin_unlock(&rchp->lock); |
| return; |
| } |
| |
| qhp->wq.flushed = 1; |
| t4_set_wq_in_error(&qhp->wq); |
| |
| if (qhp->srq) |
| pthread_spin_lock(&qhp->srq->lock); |
| |
| if (srqidx) |
| c4iw_flush_srqidx(qhp, srqidx); |
| |
| qhp->ibv_qp.state = IBV_QPS_ERR; |
| |
| c4iw_flush_hw_cq(rchp, qhp); |
| if (!qhp->srq) { |
| c4iw_count_rcqes(&rchp->cq, &qhp->wq, &count); |
| c4iw_flush_rq(&qhp->wq, &rchp->cq, count); |
| } |
| |
| if (schp != rchp) |
| c4iw_flush_hw_cq(schp, qhp); |
| |
| c4iw_flush_sq(qhp); |
| if (qhp->srq) |
| pthread_spin_unlock(&qhp->srq->lock); |
| |
| pthread_spin_unlock(&qhp->lock); |
| if (schp != rchp) |
| pthread_spin_unlock(&schp->lock); |
| pthread_spin_unlock(&rchp->lock); |
| |
| } |
| |
| void c4iw_flush_qps(struct c4iw_dev *dev) |
| { |
| int i; |
| |
| pthread_spin_lock(&dev->lock); |
| for (i=0; i < dev->max_qp; i++) { |
| struct c4iw_qp *qhp = dev->qpid2ptr[i]; |
| if (qhp) { |
| if (!qhp->wq.flushed && t4_wq_in_error(&qhp->wq)) { |
| c4iw_flush_qp(qhp); |
| } |
| } |
| } |
| pthread_spin_unlock(&dev->lock); |
| } |