diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2024-01-12 13:52:21 -0800 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2024-01-12 13:52:21 -0800 |
commit | bf9ca811bbadd7d853469d58284ed87906cc9321 (patch) | |
tree | ee7803b1615dbf0f9af6df402af7a63ed983ad37 | |
parent | 141d9c6e003b806d8faeddeec7053ee2691ea61a (diff) | |
parent | d24b923f1d696ddacb09f0f2d1b1f4f045cfe65e (diff) |
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma
Pull rdma updates from Jason Gunthorpe:
"Small cycle, with some typical driver updates:
- General code tidying in siw, hfi1, idrdma, usnic, hns rtrs and
bnxt_re
- Many small siw cleanups without an overeaching theme
- Debugfs stats for hns
- Fix a TX queue timeout in IPoIB and missed locking of the mcast
list
- Support more features of P7 devices in bnxt_re including a new work
submission protocol
- CQ interrupts for MANA
- netlink stats for erdma
- EFA multipath PCI support
- Fix Incorrect MR invalidation in iser"
* tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma: (66 commits)
RDMA/bnxt_re: Fix error code in bnxt_re_create_cq()
RDMA/efa: Add EFA query MR support
IB/iser: Prevent invalidating wrong MR
RDMA/erdma: Add hardware statistics support
RDMA/erdma: Introduce dma pool for hardware responses of CMDQ requests
IB/iser: iscsi_iser.h: fix kernel-doc warning and spellos
RDMA/mana_ib: Add CQ interrupt support for RAW QP
RDMA/mana_ib: query device capabilities
RDMA/mana_ib: register RDMA device with GDMA
RDMA/bnxt_re: Fix the sparse warnings
RDMA/bnxt_re: Fix the offset for GenP7 adapters for user applications
RDMA/bnxt_re: Share a page to expose per CQ info with userspace
RDMA/bnxt_re: Add UAPI to share a page with user space
IB/ipoib: Fix mcast list locking
RDMA/mlx5: Expose register c0 for RDMA device
net/mlx5: E-Switch, expose eswitch manager vport
net/mlx5: Manage ICM type of SW encap
RDMA/mlx5: Support handling of SW encap ICM area
net/mlx5: Introduce indirect-sw-encap ICM properties
RDMA/bnxt_re: Adds MSN table capability for Gen P7 adapters
...
81 files changed, 1921 insertions, 638 deletions
diff --git a/drivers/infiniband/hw/bnxt_re/bnxt_re.h b/drivers/infiniband/hw/bnxt_re/bnxt_re.h index 9fd9849ebdd1..9dca451ed522 100644 --- a/drivers/infiniband/hw/bnxt_re/bnxt_re.h +++ b/drivers/infiniband/hw/bnxt_re/bnxt_re.h @@ -41,6 +41,7 @@ #define __BNXT_RE_H__ #include <rdma/uverbs_ioctl.h> #include "hw_counters.h" +#include <linux/hashtable.h> #define ROCE_DRV_MODULE_NAME "bnxt_re" #define BNXT_RE_DESC "Broadcom NetXtreme-C/E RoCE Driver" @@ -135,6 +136,7 @@ struct bnxt_re_pacing { #define BNXT_RE_DB_FIFO_ROOM_SHIFT 15 #define BNXT_RE_GRC_FIFO_REG_BASE 0x2000 +#define MAX_CQ_HASH_BITS (16) struct bnxt_re_dev { struct ib_device ibdev; struct list_head list; @@ -189,6 +191,7 @@ struct bnxt_re_dev { struct bnxt_re_pacing pacing; struct work_struct dbq_fifo_check_work; struct delayed_work dbq_pacing_work; + DECLARE_HASHTABLE(cq_hash, MAX_CQ_HASH_BITS); }; #define to_bnxt_re_dev(ptr, member) \ diff --git a/drivers/infiniband/hw/bnxt_re/hw_counters.c b/drivers/infiniband/hw/bnxt_re/hw_counters.c index 93572405d6fa..128651c01595 100644 --- a/drivers/infiniband/hw/bnxt_re/hw_counters.c +++ b/drivers/infiniband/hw/bnxt_re/hw_counters.c @@ -371,7 +371,7 @@ int bnxt_re_ib_get_hw_stats(struct ib_device *ibdev, } done: - return bnxt_qplib_is_chip_gen_p5(rdev->chip_ctx) ? + return bnxt_qplib_is_chip_gen_p5_p7(rdev->chip_ctx) ? BNXT_RE_NUM_EXT_COUNTERS : BNXT_RE_NUM_STD_COUNTERS; } @@ -381,7 +381,7 @@ struct rdma_hw_stats *bnxt_re_ib_alloc_hw_port_stats(struct ib_device *ibdev, struct bnxt_re_dev *rdev = to_bnxt_re_dev(ibdev, ibdev); int num_counters = 0; - if (bnxt_qplib_is_chip_gen_p5(rdev->chip_ctx)) + if (bnxt_qplib_is_chip_gen_p5_p7(rdev->chip_ctx)) num_counters = BNXT_RE_NUM_EXT_COUNTERS; else num_counters = BNXT_RE_NUM_STD_COUNTERS; diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c index faa88d12ee86..824349659d69 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c @@ -50,6 +50,7 @@ #include <rdma/ib_mad.h> #include <rdma/ib_cache.h> #include <rdma/uverbs_ioctl.h> +#include <linux/hashtable.h> #include "bnxt_ulp.h" @@ -567,6 +568,7 @@ bnxt_re_mmap_entry_insert(struct bnxt_re_ucontext *uctx, u64 mem_offset, case BNXT_RE_MMAP_WC_DB: case BNXT_RE_MMAP_DBR_BAR: case BNXT_RE_MMAP_DBR_PAGE: + case BNXT_RE_MMAP_TOGGLE_PAGE: ret = rdma_user_mmap_entry_insert(&uctx->ib_uctx, &entry->rdma_entry, PAGE_SIZE); break; @@ -1023,7 +1025,7 @@ static int bnxt_re_init_user_qp(struct bnxt_re_dev *rdev, struct bnxt_re_pd *pd, bytes = (qplib_qp->sq.max_wqe * qplib_qp->sq.wqe_size); /* Consider mapping PSN search memory only for RC QPs. */ if (qplib_qp->type == CMDQ_CREATE_QP_TYPE_RC) { - psn_sz = bnxt_qplib_is_chip_gen_p5(rdev->chip_ctx) ? + psn_sz = bnxt_qplib_is_chip_gen_p5_p7(rdev->chip_ctx) ? sizeof(struct sq_psn_search_ext) : sizeof(struct sq_psn_search); psn_nume = (qplib_qp->wqe_mode == BNXT_QPLIB_WQE_MODE_STATIC) ? @@ -1184,7 +1186,8 @@ fail: } static int bnxt_re_init_rq_attr(struct bnxt_re_qp *qp, - struct ib_qp_init_attr *init_attr) + struct ib_qp_init_attr *init_attr, + struct bnxt_re_ucontext *uctx) { struct bnxt_qplib_dev_attr *dev_attr; struct bnxt_qplib_qp *qplqp; @@ -1213,7 +1216,7 @@ static int bnxt_re_init_rq_attr(struct bnxt_re_qp *qp, /* Allocate 1 more than what's provided so posting max doesn't * mean empty. */ - entries = roundup_pow_of_two(init_attr->cap.max_recv_wr + 1); + entries = bnxt_re_init_depth(init_attr->cap.max_recv_wr + 1, uctx); rq->max_wqe = min_t(u32, entries, dev_attr->max_qp_wqes + 1); rq->q_full_delta = 0; rq->sg_info.pgsize = PAGE_SIZE; @@ -1233,7 +1236,7 @@ static void bnxt_re_adjust_gsi_rq_attr(struct bnxt_re_qp *qp) qplqp = &qp->qplib_qp; dev_attr = &rdev->dev_attr; - if (!bnxt_qplib_is_chip_gen_p5(rdev->chip_ctx)) { + if (!bnxt_qplib_is_chip_gen_p5_p7(rdev->chip_ctx)) { qplqp->rq.max_sge = dev_attr->max_qp_sges; if (qplqp->rq.max_sge > dev_attr->max_qp_sges) qplqp->rq.max_sge = dev_attr->max_qp_sges; @@ -1243,7 +1246,7 @@ static void bnxt_re_adjust_gsi_rq_attr(struct bnxt_re_qp *qp) static int bnxt_re_init_sq_attr(struct bnxt_re_qp *qp, struct ib_qp_init_attr *init_attr, - struct ib_udata *udata) + struct bnxt_re_ucontext *uctx) { struct bnxt_qplib_dev_attr *dev_attr; struct bnxt_qplib_qp *qplqp; @@ -1272,7 +1275,7 @@ static int bnxt_re_init_sq_attr(struct bnxt_re_qp *qp, /* Allocate 128 + 1 more than what's provided */ diff = (qplqp->wqe_mode == BNXT_QPLIB_WQE_MODE_VARIABLE) ? 0 : BNXT_QPLIB_RESERVED_QP_WRS; - entries = roundup_pow_of_two(entries + diff + 1); + entries = bnxt_re_init_depth(entries + diff + 1, uctx); sq->max_wqe = min_t(u32, entries, dev_attr->max_qp_wqes + diff + 1); sq->q_full_delta = diff + 1; /* @@ -1288,7 +1291,8 @@ static int bnxt_re_init_sq_attr(struct bnxt_re_qp *qp, } static void bnxt_re_adjust_gsi_sq_attr(struct bnxt_re_qp *qp, - struct ib_qp_init_attr *init_attr) + struct ib_qp_init_attr *init_attr, + struct bnxt_re_ucontext *uctx) { struct bnxt_qplib_dev_attr *dev_attr; struct bnxt_qplib_qp *qplqp; @@ -1299,8 +1303,8 @@ static void bnxt_re_adjust_gsi_sq_attr(struct bnxt_re_qp *qp, qplqp = &qp->qplib_qp; dev_attr = &rdev->dev_attr; - if (!bnxt_qplib_is_chip_gen_p5(rdev->chip_ctx)) { - entries = roundup_pow_of_two(init_attr->cap.max_send_wr + 1); + if (!bnxt_qplib_is_chip_gen_p5_p7(rdev->chip_ctx)) { + entries = bnxt_re_init_depth(init_attr->cap.max_send_wr + 1, uctx); qplqp->sq.max_wqe = min_t(u32, entries, dev_attr->max_qp_wqes + 1); qplqp->sq.q_full_delta = qplqp->sq.max_wqe - @@ -1326,7 +1330,7 @@ static int bnxt_re_init_qp_type(struct bnxt_re_dev *rdev, goto out; } - if (bnxt_qplib_is_chip_gen_p5(chip_ctx) && + if (bnxt_qplib_is_chip_gen_p5_p7(chip_ctx) && init_attr->qp_type == IB_QPT_GSI) qptype = CMDQ_CREATE_QP_TYPE_GSI; out: @@ -1338,6 +1342,7 @@ static int bnxt_re_init_qp_attr(struct bnxt_re_qp *qp, struct bnxt_re_pd *pd, struct ib_udata *udata) { struct bnxt_qplib_dev_attr *dev_attr; + struct bnxt_re_ucontext *uctx; struct bnxt_qplib_qp *qplqp; struct bnxt_re_dev *rdev; struct bnxt_re_cq *cq; @@ -1347,6 +1352,7 @@ static int bnxt_re_init_qp_attr(struct bnxt_re_qp *qp, struct bnxt_re_pd *pd, qplqp = &qp->qplib_qp; dev_attr = &rdev->dev_attr; + uctx = rdma_udata_to_drv_context(udata, struct bnxt_re_ucontext, ib_uctx); /* Setup misc params */ ether_addr_copy(qplqp->smac, rdev->netdev->dev_addr); qplqp->pd = &pd->qplib_pd; @@ -1388,18 +1394,18 @@ static int bnxt_re_init_qp_attr(struct bnxt_re_qp *qp, struct bnxt_re_pd *pd, } /* Setup RQ/SRQ */ - rc = bnxt_re_init_rq_attr(qp, init_attr); + rc = bnxt_re_init_rq_attr(qp, init_attr, uctx); if (rc) goto out; if (init_attr->qp_type == IB_QPT_GSI) bnxt_re_adjust_gsi_rq_attr(qp); /* Setup SQ */ - rc = bnxt_re_init_sq_attr(qp, init_attr, udata); + rc = bnxt_re_init_sq_attr(qp, init_attr, uctx); if (rc) goto out; if (init_attr->qp_type == IB_QPT_GSI) - bnxt_re_adjust_gsi_sq_attr(qp, init_attr); + bnxt_re_adjust_gsi_sq_attr(qp, init_attr, uctx); if (udata) /* This will update DPI and qp_handle */ rc = bnxt_re_init_user_qp(rdev, pd, qp, udata); @@ -1523,7 +1529,7 @@ int bnxt_re_create_qp(struct ib_qp *ib_qp, struct ib_qp_init_attr *qp_init_attr, goto fail; if (qp_init_attr->qp_type == IB_QPT_GSI && - !(bnxt_qplib_is_chip_gen_p5(rdev->chip_ctx))) { + !(bnxt_qplib_is_chip_gen_p5_p7(rdev->chip_ctx))) { rc = bnxt_re_create_gsi_qp(qp, pd, qp_init_attr); if (rc == -ENODEV) goto qp_destroy; @@ -1715,6 +1721,7 @@ int bnxt_re_create_srq(struct ib_srq *ib_srq, { struct bnxt_qplib_dev_attr *dev_attr; struct bnxt_qplib_nq *nq = NULL; + struct bnxt_re_ucontext *uctx; struct bnxt_re_dev *rdev; struct bnxt_re_srq *srq; struct bnxt_re_pd *pd; @@ -1739,13 +1746,14 @@ int bnxt_re_create_srq(struct ib_srq *ib_srq, goto exit; } + uctx = rdma_udata_to_drv_context(udata, struct bnxt_re_ucontext, ib_uctx); srq->rdev = rdev; srq->qplib_srq.pd = &pd->qplib_pd; srq->qplib_srq.dpi = &rdev->dpi_privileged; /* Allocate 1 more than what's provided so posting max doesn't * mean empty */ - entries = roundup_pow_of_two(srq_init_attr->attr.max_wr + 1); + entries = bnxt_re_init_depth(srq_init_attr->attr.max_wr + 1, uctx); if (entries > dev_attr->max_srq_wqes + 1) entries = dev_attr->max_srq_wqes + 1; srq->qplib_srq.max_wqe = entries; @@ -2103,6 +2111,9 @@ int bnxt_re_modify_qp(struct ib_qp *ib_qp, struct ib_qp_attr *qp_attr, qp->qplib_qp.max_dest_rd_atomic = qp_attr->max_dest_rd_atomic; } if (qp_attr_mask & IB_QP_CAP) { + struct bnxt_re_ucontext *uctx = + rdma_udata_to_drv_context(udata, struct bnxt_re_ucontext, ib_uctx); + qp->qplib_qp.modify_flags |= CMDQ_MODIFY_QP_MODIFY_MASK_SQ_SIZE | CMDQ_MODIFY_QP_MODIFY_MASK_RQ_SIZE | @@ -2119,7 +2130,7 @@ int bnxt_re_modify_qp(struct ib_qp *ib_qp, struct ib_qp_attr *qp_attr, "Create QP failed - max exceeded"); return -EINVAL; } - entries = roundup_pow_of_two(qp_attr->cap.max_send_wr); + entries = bnxt_re_init_depth(qp_attr->cap.max_send_wr, uctx); qp->qplib_qp.sq.max_wqe = min_t(u32, entries, dev_attr->max_qp_wqes + 1); qp->qplib_qp.sq.q_full_delta = qp->qplib_qp.sq.max_wqe - @@ -2132,7 +2143,7 @@ int bnxt_re_modify_qp(struct ib_qp *ib_qp, struct ib_qp_attr *qp_attr, qp->qplib_qp.sq.q_full_delta -= 1; qp->qplib_qp.sq.max_sge = qp_attr->cap.max_send_sge; if (qp->qplib_qp.rq.max_wqe) { - entries = roundup_pow_of_two(qp_attr->cap.max_recv_wr); + entries = bnxt_re_init_depth(qp_attr->cap.max_recv_wr, uctx); qp->qplib_qp.rq.max_wqe = min_t(u32, entries, dev_attr->max_qp_wqes + 1); qp->qplib_qp.rq.q_full_delta = qp->qplib_qp.rq.max_wqe - @@ -2900,14 +2911,20 @@ int bnxt_re_post_recv(struct ib_qp *ib_qp, const struct ib_recv_wr *wr, /* Completion Queues */ int bnxt_re_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata) { - struct bnxt_re_cq *cq; + struct bnxt_qplib_chip_ctx *cctx; struct bnxt_qplib_nq *nq; struct bnxt_re_dev *rdev; + struct bnxt_re_cq *cq; cq = container_of(ib_cq, struct bnxt_re_cq, ib_cq); rdev = cq->rdev; nq = cq->qplib_cq.nq; + cctx = rdev->chip_ctx; + if (cctx->modes.toggle_bits & BNXT_QPLIB_CQ_TOGGLE_BIT) { + free_page((unsigned long)cq->uctx_cq_page); + hash_del(&cq->hash_entry); + } bnxt_qplib_destroy_cq(&rdev->qplib_res, &cq->qplib_cq); ib_umem_release(cq->umem); @@ -2920,13 +2937,16 @@ int bnxt_re_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata) int bnxt_re_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, struct ib_udata *udata) { + struct bnxt_re_cq *cq = container_of(ibcq, struct bnxt_re_cq, ib_cq); struct bnxt_re_dev *rdev = to_bnxt_re_dev(ibcq->device, ibdev); + struct bnxt_re_ucontext *uctx = + rdma_udata_to_drv_context(udata, struct bnxt_re_ucontext, ib_uctx); struct bnxt_qplib_dev_attr *dev_attr = &rdev->dev_attr; - struct bnxt_re_cq *cq = container_of(ibcq, struct bnxt_re_cq, ib_cq); - int rc, entries; - int cqe = attr->cqe; + struct bnxt_qplib_chip_ctx *cctx; struct bnxt_qplib_nq *nq = NULL; unsigned int nq_alloc_cnt; + int cqe = attr->cqe; + int rc, entries; u32 active_cqs; if (attr->flags) @@ -2939,9 +2959,10 @@ int bnxt_re_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, } cq->rdev = rdev; + cctx = rdev->chip_ctx; cq->qplib_cq.cq_handle = (u64)(unsigned long)(&cq->qplib_cq); - entries = roundup_pow_of_two(cqe + 1); + entries = bnxt_re_init_depth(cqe + 1, uctx); if (entries > dev_attr->max_cq_wqes + 1) entries = dev_attr->max_cq_wqes + 1; @@ -2949,8 +2970,6 @@ int bnxt_re_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, cq->qplib_cq.sg_info.pgshft = PAGE_SHIFT; if (udata) { struct bnxt_re_cq_req req; - struct bnxt_re_ucontext *uctx = rdma_udata_to_drv_context( - udata, struct bnxt_re_ucontext, ib_uctx); if (ib_copy_from_udata(&req, udata, sizeof(req))) { rc = -EFAULT; goto fail; @@ -3002,22 +3021,34 @@ int bnxt_re_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, spin_lock_init(&cq->cq_lock); if (udata) { - struct bnxt_re_cq_resp resp; + struct bnxt_re_cq_resp resp = {}; + if (cctx->modes.toggle_bits & BNXT_QPLIB_CQ_TOGGLE_BIT) { + hash_add(rdev->cq_hash, &cq->hash_entry, cq->qplib_cq.id); + /* Allocate a page */ + cq->uctx_cq_page = (void *)get_zeroed_page(GFP_KERNEL); + if (!cq->uctx_cq_page) { + rc = -ENOMEM; + goto c2fail; + } + resp.comp_mask |= BNXT_RE_CQ_TOGGLE_PAGE_SUPPORT; + } resp.cqid = cq->qplib_cq.id; resp.tail = cq->qplib_cq.hwq.cons; resp.phase = cq->qplib_cq.period; resp.rsvd = 0; - rc = ib_copy_to_udata(udata, &resp, sizeof(resp)); + rc = ib_copy_to_udata(udata, &resp, min(sizeof(resp), udata->outlen)); if (rc) { ibdev_err(&rdev->ibdev, "Failed to copy CQ udata"); bnxt_qplib_destroy_cq(&rdev->qplib_res, &cq->qplib_cq); - goto c2fail; + goto free_mem; } } return 0; +free_mem: + free_page((unsigned long)cq->uctx_cq_page); c2fail: ib_umem_release(cq->umem); fail: @@ -3072,12 +3103,11 @@ int bnxt_re_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata) return -EINVAL; } - entries = roundup_pow_of_two(cqe + 1); + uctx = rdma_udata_to_drv_context(udata, struct bnxt_re_ucontext, ib_uctx); + entries = bnxt_re_init_depth(cqe + 1, uctx); if (entries > dev_attr->max_cq_wqes + 1) entries = dev_attr->max_cq_wqes + 1; - uctx = rdma_udata_to_drv_context(udata, struct bnxt_re_ucontext, - ib_uctx); /* uverbs consumer */ if (ib_copy_from_udata(&req, udata, sizeof(req))) { rc = -EFAULT; @@ -4108,6 +4138,7 @@ int bnxt_re_alloc_ucontext(struct ib_ucontext *ctx, struct ib_udata *udata) struct bnxt_qplib_dev_attr *dev_attr = &rdev->dev_attr; struct bnxt_re_user_mmap_entry *entry; struct bnxt_re_uctx_resp resp = {}; + struct bnxt_re_uctx_req ureq = {}; u32 chip_met_rev_num = 0; int rc; @@ -4157,6 +4188,16 @@ int bnxt_re_alloc_ucontext(struct ib_ucontext *ctx, struct ib_udata *udata) if (rdev->pacing.dbr_pacing) resp.comp_mask |= BNXT_RE_UCNTX_CMASK_DBR_PACING_ENABLED; + if (udata->inlen >= sizeof(ureq)) { + rc = ib_copy_from_udata(&ureq, udata, min(udata->inlen, sizeof(ureq))); + if (rc) + goto cfail; + if (ureq.comp_mask & BNXT_RE_COMP_MASK_REQ_UCNTX_POW2_SUPPORT) { + resp.comp_mask |= BNXT_RE_UCNTX_CMASK_POW2_DISABLED; + uctx->cmask |= BNXT_RE_UCNTX_CMASK_POW2_DISABLED; + } + } + rc = ib_copy_to_udata(udata, &resp, min(udata->outlen, sizeof(resp))); if (rc) { ibdev_err(ibdev, "Failed to copy user context"); @@ -4194,6 +4235,19 @@ void bnxt_re_dealloc_ucontext(struct ib_ucontext *ib_uctx) } } +static struct bnxt_re_cq *bnxt_re_search_for_cq(struct bnxt_re_dev *rdev, u32 cq_id) +{ + struct bnxt_re_cq *cq = NULL, *tmp_cq; + + hash_for_each_possible(rdev->cq_hash, tmp_cq, hash_entry, cq_id) { + if (tmp_cq->qplib_cq.id == cq_id) { + cq = tmp_cq; + break; + } + } + return cq; +} + /* Helper function to mmap the virtual memory from user app */ int bnxt_re_mmap(struct ib_ucontext *ib_uctx, struct vm_area_struct *vma) { @@ -4235,6 +4289,7 @@ int bnxt_re_mmap(struct ib_ucontext *ib_uctx, struct vm_area_struct *vma) rdma_entry); break; case BNXT_RE_MMAP_DBR_PAGE: + case BNXT_RE_MMAP_TOGGLE_PAGE: /* Driver doesn't expect write access for user space */ if (vma->vm_flags & VM_WRITE) return -EFAULT; @@ -4411,8 +4466,126 @@ DECLARE_UVERBS_NAMED_METHOD(BNXT_RE_METHOD_NOTIFY_DRV); DECLARE_UVERBS_GLOBAL_METHODS(BNXT_RE_OBJECT_NOTIFY_DRV, &UVERBS_METHOD(BNXT_RE_METHOD_NOTIFY_DRV)); +/* Toggle MEM */ +static int UVERBS_HANDLER(BNXT_RE_METHOD_GET_TOGGLE_MEM)(struct uverbs_attr_bundle *attrs) +{ + struct ib_uobject *uobj = uverbs_attr_get_uobject(attrs, BNXT_RE_TOGGLE_MEM_HANDLE); + enum bnxt_re_mmap_flag mmap_flag = BNXT_RE_MMAP_TOGGLE_PAGE; + enum bnxt_re_get_toggle_mem_type res_type; + struct bnxt_re_user_mmap_entry *entry; + struct bnxt_re_ucontext *uctx; + struct ib_ucontext *ib_uctx; + struct bnxt_re_dev *rdev; + struct bnxt_re_cq *cq; + u64 mem_offset; + u64 addr = 0; + u32 length; + u32 offset; + u32 cq_id; + int err; + + ib_uctx = ib_uverbs_get_ucontext(attrs); + if (IS_ERR(ib_uctx)) + return PTR_ERR(ib_uctx); + + err = uverbs_get_const(&res_type, attrs, BNXT_RE_TOGGLE_MEM_TYPE); + if (err) + return err; + + uctx = container_of(ib_uctx, struct bnxt_re_ucontext, ib_uctx); + rdev = uctx->rdev; + + switch (res_type) { + case BNXT_RE_CQ_TOGGLE_MEM: + err = uverbs_copy_from(&cq_id, attrs, BNXT_RE_TOGGLE_MEM_RES_ID); + if (err) + return err; + + cq = bnxt_re_search_for_cq(rdev, cq_id); + if (!cq) + return -EINVAL; + + length = PAGE_SIZE; + addr = (u64)cq->uctx_cq_page; + mmap_flag = BNXT_RE_MMAP_TOGGLE_PAGE; + offset = 0; + break; + case BNXT_RE_SRQ_TOGGLE_MEM: + break; + + default: + return -EOPNOTSUPP; + } + + entry = bnxt_re_mmap_entry_insert(uctx, addr, mmap_flag, &mem_offset); + if (!entry) + return -ENOMEM; + + uobj->object = entry; + uverbs_finalize_uobj_create(attrs, BNXT_RE_TOGGLE_MEM_HANDLE); + err = uverbs_copy_to(attrs, BNXT_RE_TOGGLE_MEM_MMAP_PAGE, + &mem_offset, sizeof(mem_offset)); + if (err) + return err; + + err = uverbs_copy_to(attrs, BNXT_RE_TOGGLE_MEM_MMAP_LENGTH, + &length, sizeof(length)); + if (err) + return err; + + err = uverbs_copy_to(attrs, BNXT_RE_TOGGLE_MEM_MMAP_OFFSET, + &offset, sizeof(length)); + if (err) + return err; + + return 0; +} + +static int get_toggle_mem_obj_cleanup(struct ib_uobject *uobject, + enum rdma_remove_reason why, + struct uverbs_attr_bundle *attrs) +{ + struct bnxt_re_user_mmap_entry *entry = uobject->object; + + rdma_user_mmap_entry_remove(&entry->rdma_entry); + return 0; +} + +DECLARE_UVERBS_NAMED_METHOD(BNXT_RE_METHOD_GET_TOGGLE_MEM, + UVERBS_ATTR_IDR(BNXT_RE_TOGGLE_MEM_HANDLE, + BNXT_RE_OBJECT_GET_TOGGLE_MEM, + UVERBS_ACCESS_NEW, + UA_MANDATORY), + UVERBS_ATTR_CONST_IN(BNXT_RE_TOGGLE_MEM_TYPE, + enum bnxt_re_get_toggle_mem_type, + UA_MANDATORY), + UVERBS_ATTR_PTR_IN(BNXT_RE_TOGGLE_MEM_RES_ID, + UVERBS_ATTR_TYPE(u32), + UA_MANDATORY), + UVERBS_ATTR_PTR_OUT(BNXT_RE_TOGGLE_MEM_MMAP_PAGE, + UVERBS_ATTR_TYPE(u64), + UA_MANDATORY), + UVERBS_ATTR_PTR_OUT(BNXT_RE_TOGGLE_MEM_MMAP_OFFSET, + UVERBS_ATTR_TYPE(u32), + UA_MANDATORY), + UVERBS_ATTR_PTR_OUT(BNXT_RE_TOGGLE_MEM_MMAP_LENGTH, + UVERBS_ATTR_TYPE(u32), + UA_MANDATORY)); + +DECLARE_UVERBS_NAMED_METHOD_DESTROY(BNXT_RE_METHOD_RELEASE_TOGGLE_MEM, + UVERBS_ATTR_IDR(BNXT_RE_RELEASE_TOGGLE_MEM_HANDLE, + BNXT_RE_OBJECT_GET_TOGGLE_MEM, + UVERBS_ACCESS_DESTROY, + UA_MANDATORY)); + +DECLARE_UVERBS_NAMED_OBJECT(BNXT_RE_OBJECT_GET_TOGGLE_MEM, + UVERBS_TYPE_ALLOC_IDR(get_toggle_mem_obj_cleanup), + &UVERBS_METHOD(BNXT_RE_METHOD_GET_TOGGLE_MEM), + &UVERBS_METHOD(BNXT_RE_METHOD_RELEASE_TOGGLE_MEM)); + const struct uapi_definition bnxt_re_uapi_defs[] = { UAPI_DEF_CHAIN_OBJ_TREE_NAMED(BNXT_RE_OBJECT_ALLOC_PAGE), UAPI_DEF_CHAIN_OBJ_TREE_NAMED(BNXT_RE_OBJECT_NOTIFY_DRV), + UAPI_DEF_CHAIN_OBJ_TREE_NAMED(BNXT_RE_OBJECT_GET_TOGGLE_MEM), {} }; diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.h b/drivers/infiniband/hw/bnxt_re/ib_verbs.h index 84715b7e7a4e..b267d6d5975f 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.h +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.h @@ -108,6 +108,8 @@ struct bnxt_re_cq { struct ib_umem *umem; struct ib_umem *resize_umem; int resize_cqe; + void *uctx_cq_page; + struct hlist_node hash_entry; }; struct bnxt_re_mr { @@ -140,6 +142,7 @@ struct bnxt_re_ucontext { void *shpg; spinlock_t sh_lock; /* protect shpg */ struct rdma_user_mmap_entry *shpage_mmap; + u64 cmask; }; enum bnxt_re_mmap_flag { @@ -148,6 +151,7 @@ enum bnxt_re_mmap_flag { BNXT_RE_MMAP_WC_DB, BNXT_RE_MMAP_DBR_PAGE, BNXT_RE_MMAP_DBR_BAR, + BNXT_RE_MMAP_TOGGLE_PAGE, }; struct bnxt_re_user_mmap_entry { @@ -167,6 +171,12 @@ static inline u16 bnxt_re_get_rwqe_size(int nsge) return sizeof(struct rq_wqe_hdr) + (nsge * sizeof(struct sq_sge)); } +static inline u32 bnxt_re_init_depth(u32 ent, struct bnxt_re_ucontext *uctx) +{ + return uctx ? (uctx->cmask & BNXT_RE_UCNTX_CMASK_POW2_DISABLED) ? + ent : roundup_pow_of_two(ent) : ent; +} + int bnxt_re_query_device(struct ib_device *ibdev, struct ib_device_attr *ib_attr, struct ib_udata *udata); diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c index a99c68247af0..f022c922fae5 100644 --- a/drivers/infiniband/hw/bnxt_re/main.c +++ b/drivers/infiniband/hw/bnxt_re/main.c @@ -54,6 +54,7 @@ #include <rdma/ib_user_verbs.h> #include <rdma/ib_umem.h> #include <rdma/ib_addr.h> +#include <linux/hashtable.h> #include "bnxt_ulp.h" #include "roce_hsi.h" @@ -107,9 +108,14 @@ static void bnxt_re_set_db_offset(struct bnxt_re_dev *rdev) dev_info(rdev_to_dev(rdev), "Couldn't get DB bar size, Low latency framework is disabled\n"); /* set register offsets for both UC and WC */ - res->dpi_tbl.ucreg.offset = res->is_vf ? BNXT_QPLIB_DBR_VF_DB_OFFSET : - BNXT_QPLIB_DBR_PF_DB_OFFSET; - res->dpi_tbl.wcreg.offset = res->dpi_tbl.ucreg.offset; + if (bnxt_qplib_is_chip_gen_p7(cctx)) { + res->dpi_tbl.ucreg.offset = offset; + res->dpi_tbl.wcreg.offset = en_dev->l2_db_size; + } else { + res->dpi_tbl.ucreg.offset = res->is_vf ? BNXT_QPLIB_DBR_VF_DB_OFFSET : + BNXT_QPLIB_DBR_PF_DB_OFFSET; + res->dpi_tbl.wcreg.offset = res->dpi_tbl.ucreg.offset; + } /* If WC mapping is disabled by L2 driver then en_dev->l2_db_size * is equal to the DB-Bar actual size. This indicates that L2 @@ -128,11 +134,13 @@ static void bnxt_re_set_drv_mode(struct bnxt_re_dev *rdev, u8 mode) struct bnxt_qplib_chip_ctx *cctx; cctx = rdev->chip_ctx; - cctx->modes.wqe_mode = bnxt_qplib_is_chip_gen_p5(rdev->chip_ctx) ? + cctx->modes.wqe_mode = bnxt_qplib_is_chip_gen_p5_p7(rdev->chip_ctx) ? mode : BNXT_QPLIB_WQE_MODE_STATIC; if (bnxt_re_hwrm_qcaps(rdev)) dev_err(rdev_to_dev(rdev), "Failed to query hwrm qcaps\n"); + if (bnxt_qplib_is_chip_gen_p7(rdev->chip_ctx)) + cctx->modes.toggle_bits |= BNXT_QPLIB_CQ_TOGGLE_BIT; } static void bnxt_re_destroy_chip_ctx(struct bnxt_re_dev *rdev) @@ -215,7 +223,7 @@ static void bnxt_re_limit_pf_res(struct bnxt_re_dev *rdev) ctx->srqc_count = min_t(u32, BNXT_RE_MAX_SRQC_COUNT, attr->max_srq); ctx->cq_count = min_t(u32, BNXT_RE_MAX_CQ_COUNT, attr->max_cq); - if (!bnxt_qplib_is_chip_gen_p5(rdev->chip_ctx)) + if (!bnxt_qplib_is_chip_gen_p5_p7(rdev->chip_ctx)) for (i = 0; i < MAX_TQM_ALLOC_REQ; i++) rdev->qplib_ctx.tqm_ctx.qcount[i] = rdev->dev_attr.tqm_alloc_reqs[i]; @@ -264,7 +272,7 @@ static void bnxt_re_set_resource_limits(struct bnxt_re_dev *rdev) memset(&rdev->qplib_ctx.vf_res, 0, sizeof(struct bnxt_qplib_vf_res)); bnxt_re_limit_pf_res(rdev); - num_vfs = bnxt_qplib_is_chip_gen_p5(rdev->chip_ctx) ? + num_vfs = bnxt_qplib_is_chip_gen_p5_p7(rdev->chip_ctx) ? BNXT_RE_GEN_P5_MAX_VF : rdev->num_vfs; if (num_vfs) bnxt_re_limit_vf_res(&rdev->qplib_ctx, num_vfs); @@ -276,7 +284,7 @@ static void bnxt_re_vf_res_config(struct bnxt_re_dev *rdev) if (test_bit(BNXT_RE_FLAG_ERR_DEVICE_DETACHED, &rdev->flags)) return; rdev->num_vfs = pci_sriov_get_totalvfs(rdev->en_dev->pdev); - if (!bnxt_qplib_is_chip_gen_p5(rdev->chip_ctx)) { + if (!bnxt_qplib_is_chip_gen_p5_p7(rdev->chip_ctx)) { bnxt_re_set_resource_limits(rdev); bnxt_qplib_set_func_resources(&rdev->qplib_res, &rdev->rcfw, &rdev->qplib_ctx); @@ -1203,25 +1211,19 @@ static int bnxt_re_cqn_handler(struct bnxt_qplib_nq *nq, { struct bnxt_re_cq *cq = container_of(handle, struct bnxt_re_cq, qplib_cq); + u32 *cq_ptr; if (cq->ib_cq.comp_handler) { - /* Lock comp_handler? */ + if (cq->uctx_cq_page) { + cq_ptr = (u32 *)cq->uctx_cq_page; + *cq_ptr = cq->qplib_cq.toggle; + } (*cq->ib_cq.comp_handler)(&cq->ib_cq, cq->ib_cq.cq_context); } return 0; } -#define BNXT_RE_GEN_P5_PF_NQ_DB 0x10000 -#define BNXT_RE_GEN_P5_VF_NQ_DB 0x4000 -static u32 bnxt_re_get_nqdb_offset(struct bnxt_re_dev *rdev, u16 indx) -{ - return bnxt_qplib_is_chip_gen_p5(rdev->chip_ctx) ? - (rdev->is_virtfn ? BNXT_RE_GEN_P5_VF_NQ_DB : - BNXT_RE_GEN_P5_PF_NQ_DB) : - rdev->en_dev->msix_entries[indx].db_offset; -} - static void bnxt_re_cleanup_res(struct bnxt_re_dev *rdev) { int i; @@ -1242,7 +1244,7 @@ static int bnxt_re_init_res(struct bnxt_re_dev *rdev) bnxt_qplib_init_res(&rdev->qplib_res); for (i = 1; i < rdev->num_msix ; i++) { - db_offt = bnxt_re_get_nqdb_offset(rdev, i); + db_offt = rdev->en_dev->msix_entries[i].db_offset; rc = bnxt_qplib_enable_nq(rdev->en_dev->pdev, &rdev->nq[i - 1], i - 1, rdev->en_dev->msix_entries[i].vector, db_offt, &bnxt_re_cqn_handler, @@ -1653,7 +1655,7 @@ static int bnxt_re_dev_init(struct bnxt_re_dev *rdev, u8 wqe_mode) ibdev_err(&rdev->ibdev, "Failed to allocate CREQ: %#x\n", rc); goto free_rcfw; } - db_offt = bnxt_re_get_nqdb_offset(rdev, BNXT_RE_AEQ_IDX); + db_offt = rdev->en_dev->msix_entries[BNXT_RE_AEQ_IDX].db_offset; vid = rdev->en_dev->msix_entries[BNXT_RE_AEQ_IDX].vector; rc = bnxt_qplib_enable_rcfw_channel(&rdev->rcfw, vid, db_offt, @@ -1681,7 +1683,7 @@ static int bnxt_re_dev_init(struct bnxt_re_dev *rdev, u8 wqe_mode) bnxt_re_set_resource_limits(rdev); rc = bnxt_qplib_alloc_ctx(&rdev->qplib_res, &rdev->qplib_ctx, 0, - bnxt_qplib_is_chip_gen_p5(rdev->chip_ctx)); + bnxt_qplib_is_chip_gen_p5_p7(rdev->chip_ctx)); if (rc) { ibdev_err(&rdev->ibdev, "Failed to allocate QPLIB context: %#x\n", rc); @@ -1737,6 +1739,7 @@ static int bnxt_re_dev_init(struct bnxt_re_dev *rdev, u8 wqe_mode) */ bnxt_re_vf_res_config(rdev); } + hash_init(rdev->cq_hash); return 0; free_sctx: @@ -1804,7 +1807,7 @@ static void bnxt_re_setup_cc(struct bnxt_re_dev *rdev, bool enable) return; /* Currently enabling only for GenP5 adapters */ - if (!bnxt_qplib_is_chip_gen_p5(rdev->chip_ctx)) + if (!bnxt_qplib_is_chip_gen_p5_p7(rdev->chip_ctx)) return; if (enable) { diff --git a/drivers/infiniband/hw/bnxt_re/qplib_fp.c b/drivers/infiniband/hw/bnxt_re/qplib_fp.c index abbabea7f5fa..c98e04fe2ddd 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_fp.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_fp.c @@ -237,18 +237,15 @@ static void clean_nq(struct bnxt_qplib_nq *nq, struct bnxt_qplib_cq *cq) struct bnxt_qplib_hwq *hwq = &nq->hwq; struct nq_base *nqe, **nq_ptr; int budget = nq->budget; - u32 sw_cons, raw_cons; uintptr_t q_handle; u16 type; spin_lock_bh(&hwq->lock); /* Service the NQ until empty */ - raw_cons = hwq->cons; while (budget--) { - sw_cons = HWQ_CMP(raw_cons, hwq); nq_ptr = (struct nq_base **)hwq->pbl_ptr; - nqe = &nq_ptr[NQE_PG(sw_cons)][NQE_IDX(sw_cons)]; - if (!NQE_CMP_VALID(nqe, raw_cons, hwq->max_elements)) + nqe = &nq_ptr[NQE_PG(hwq->cons)][NQE_IDX(hwq->cons)]; + if (!NQE_CMP_VALID(nqe, nq->nq_db.dbinfo.flags)) break; /* @@ -276,7 +273,8 @@ static void clean_nq(struct bnxt_qplib_nq *nq, struct bnxt_qplib_cq *cq) default: break; } - raw_cons++; + bnxt_qplib_hwq_incr_cons(hwq->max_elements, &hwq->cons, + 1, &nq->nq_db.dbinfo.flags); } spin_unlock_bh(&hwq->lock); } @@ -302,18 +300,16 @@ static void bnxt_qplib_service_nq(struct tasklet_struct *t) struct bnxt_qplib_hwq *hwq = &nq->hwq; struct bnxt_qplib_cq *cq; int budget = nq->budget; - u32 sw_cons, raw_cons; struct nq_base *nqe; uintptr_t q_handle; + u32 hw_polled = 0; u16 type; spin_lock_bh(&hwq->lock); /* Service the NQ until empty */ - raw_cons = hwq->cons; while (budget--) { - sw_cons = HWQ_CMP(raw_cons, hwq); - nqe = bnxt_qplib_get_qe(hwq, sw_cons, NULL); - if (!NQE_CMP_VALID(nqe, raw_cons, hwq->max_elements)) + nqe = bnxt_qplib_get_qe(hwq, hwq->cons, NULL); + if (!NQE_CMP_VALID(nqe, nq->nq_db.dbinfo.flags)) break; /* @@ -334,6 +330,9 @@ static void bnxt_qplib_service_nq(struct tasklet_struct *t) cq = (struct bnxt_qplib_cq *)(unsigned long)q_handle; if (!cq) break; + cq->toggle = (le16_to_cpu(nqe->info10_type) & + NQ_CN_TOGGLE_MASK) >> NQ_CN_TOGGLE_SFT; + cq->dbinfo.toggle = cq->toggle; bnxt_qplib_armen_db(&cq->dbinfo, DBC_DBC_TYPE_CQ_ARMENA); spin_lock_bh(&cq->compl_lock); @@ -372,12 +371,12 @@ static void bnxt_qplib_service_nq(struct tasklet_struct *t) "nqe with type = 0x%x not handled\n", type); break; } - raw_cons++; + hw_polled++; + bnxt_qplib_hwq_incr_cons(hwq->max_elements, &hwq->cons, + 1, &nq->nq_db.dbinfo.flags); } - if (hwq->cons != raw_cons) { - hwq->cons = raw_cons; + if (hw_polled) bnxt_qplib_ring_nq_db(&nq->nq_db.dbinfo, nq->res->cctx, true); - } spin_unlock_bh(&hwq->lock); } @@ -505,6 +504,7 @@ static int bnxt_qplib_map_nq_db(struct bnxt_qplib_nq *nq, u32 reg_offt) pdev = nq->pdev; nq_db = &nq->nq_db; + nq_db->dbinfo.flags = 0; nq_db->reg.bar_id = NQ_CONS_PCI_BAR_REGION; nq_db->reg.bar_base = pci_resource_start(pdev, nq_db->reg.bar_id); if (!nq_db->reg.bar_base) { @@ -649,7 +649,7 @@ int bnxt_qplib_create_srq(struct bnxt_qplib_res *res, rc = -ENOMEM; goto fail; } - + srq->dbinfo.flags = 0; bnxt_qplib_rcfw_cmd_prep((struct cmdq_base *)&req, CMDQ_BASE_OPCODE_CREATE_SRQ, sizeof(req)); @@ -703,13 +703,9 @@ int bnxt_qplib_modify_srq(struct bnxt_qplib_res *res, struct bnxt_qplib_srq *srq) { struct bnxt_qplib_hwq *srq_hwq = &srq->hwq; - u32 sw_prod, sw_cons, count = 0; - - sw_prod = HWQ_CMP(srq_hwq->prod, srq_hwq); - sw_cons = HWQ_CMP(srq_hwq->cons, srq_hwq); + u32 count; - count = sw_prod > sw_cons ? sw_prod - sw_cons : - srq_hwq->max_elements - sw_cons + sw_prod; + count = __bnxt_qplib_get_avail(srq_hwq); if (count > srq->threshold) { srq->arm_req = false; bnxt_qplib_srq_arm_db(&srq->dbinfo, srq->threshold); @@ -761,7 +757,7 @@ int bnxt_qplib_post_srq_recv(struct bnxt_qplib_srq *srq, struct bnxt_qplib_hwq *srq_hwq = &srq->hwq; struct rq_wqe *srqe; struct sq_sge *hw_sge; - u32 sw_prod, sw_cons, count = 0; + u32 count = 0; int i, next; spin_lock(&srq_hwq->lock); @@ -775,8 +771,7 @@ int bnxt_qplib_post_srq_recv(struct bnxt_qplib_srq *srq, srq->start_idx = srq->swq[next].next_idx; spin_unlock(&srq_hwq->lock); - sw_prod = HWQ_CMP(srq_hwq->prod, srq_hwq); - srqe = bnxt_qplib_get_qe(srq_hwq, sw_prod, NULL); + srqe = bnxt_qplib_get_qe(srq_hwq, srq_hwq->prod, NULL); memset(srqe, 0, srq->wqe_size); /* Calculate wqe_size16 and data_len */ for (i = 0, hw_sge = (struct sq_sge *)srqe->data; @@ -792,17 +787,10 @@ int bnxt_qplib_post_srq_recv(struct bnxt_qplib_srq *srq, srqe->wr_id[0] = cpu_to_le32((u32)next); srq->swq[next].wr_id = wqe->wr_id; - srq_hwq->prod++; + bnxt_qplib_hwq_incr_prod(&srq->dbinfo, srq_hwq, srq->dbinfo.max_slot); spin_lock(&srq_hwq->lock); - sw_prod = HWQ_CMP(srq_hwq->prod, srq_hwq); - /* retaining srq_hwq->cons for this logic - * actually the lock is only required to - * read srq_hwq->cons. - */ - sw_cons = HWQ_CMP(srq_hwq->cons, srq_hwq); - count = sw_prod > sw_cons ? sw_prod - sw_cons : - srq_hwq->max_elements - sw_cons + sw_prod; + count = __bnxt_qplib_get_avail(srq_hwq); spin_unlock(&srq_hwq->lock); /* Ring DB */ bnxt_qplib_ring_prod_db(&srq->dbinfo, DBC_DBC_TYPE_SRQ); @@ -849,6 +837,7 @@ int bnxt_qplib_create_qp1(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp) u32 tbl_indx; int rc; + sq->dbinfo.flags = 0; bnxt_qplib_rcfw_cmd_prep((struct cmdq_base *)&req, CMDQ_BASE_OPCODE_CREATE_QP1, sizeof(req)); @@ -885,6 +874,7 @@ int bnxt_qplib_create_qp1(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp) /* RQ */ if (rq->max_wqe) { + rq->dbinfo.flags = 0; hwq_attr.res = res; hwq_attr.sginfo = &rq->sg_info; hwq_attr.stride = sizeof(struct sq_sge); @@ -992,6 +982,10 @@ int bnxt_qplib_create_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp) u32 tbl_indx; u16 nsge; + if (res->dattr) + qp->dev_cap_flags = res->dattr->dev_cap_flags; + + sq->dbinfo.flags = 0; bnxt_qplib_rcfw_cmd_prep((struct cmdq_base *)&req, CMDQ_BASE_OPCODE_CREATE_QP, sizeof(req)); @@ -1003,9 +997,14 @@ int bnxt_qplib_create_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp) /* SQ */ if (qp->type == CMDQ_CREATE_QP_TYPE_RC) { - psn_sz = bnxt_qplib_is_chip_gen_p5(res->cctx) ? + psn_sz = bnxt_qplib_is_chip_gen_p5_p7(res->cctx) ? sizeof(struct sq_psn_search_ext) : sizeof(struct sq_psn_search); + + if (BNXT_RE_HW_RETX(qp->dev_cap_flags)) { + psn_sz = sizeof(struct sq_msn_search); + qp->msn = 0; + } } hwq_attr.res = res; @@ -1014,6 +1013,13 @@ int bnxt_qplib_create_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp) hwq_attr.depth = bnxt_qplib_get_depth(sq); hwq_attr.aux_stride = psn_sz; hwq_attr.aux_depth = bnxt_qplib_set_sq_size(sq, qp->wqe_mode); + /* Update msn tbl size */ + if (BNXT_RE_HW_RETX(qp->dev_cap_flags) && psn_sz) { + hwq_attr.aux_depth = roundup_pow_of_two(bnxt_qplib_set_sq_size(sq, qp->wqe_mode)); + qp->msn_tbl_sz = hwq_attr.aux_depth; + qp->msn = 0; + } + hwq_attr.type = HWQ_TYPE_QUEUE; rc = bnxt_qplib_alloc_init_hwq(&sq->hwq, &hwq_attr); if (rc) @@ -1040,6 +1046,7 @@ int bnxt_qplib_create_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp) /* RQ */ if (!qp->srq) { + rq->dbinfo.flags = 0; hwq_attr.res = res; hwq_attr.sginfo = &rq->sg_info; hwq_attr.stride = sizeof(struct sq_sge); @@ -1454,12 +1461,15 @@ bail: static void __clean_cq(struct bnxt_qplib_cq *cq, u64 qp) { struct bnxt_qplib_hwq *cq_hwq = &cq->hwq; + u32 peek_flags, peek_cons; struct cq_base *hw_cqe; int i; + peek_flags = cq->dbinfo.flags; + peek_cons = cq_hwq->cons; for (i = 0; i < cq_hwq->max_elements; i++) { - hw_cqe = bnxt_qplib_get_qe(cq_hwq, i, NULL); - if (!CQE_CMP_VALID(hw_cqe, i, cq_hwq->max_elements)) + hw_cqe = bnxt_qplib_get_qe(cq_hwq, peek_cons, NULL); + if (!CQE_CMP_VALID(hw_cqe, peek_flags)) continue; /* * The valid test of the entry must be done first before @@ -1489,6 +1499,8 @@ static void __clean_cq(struct bnxt_qplib_cq *cq, u64 qp) default: break; } + bnxt_qplib_hwq_incr_cons(cq_hwq->max_elements, &peek_cons, + 1, &peek_flags); } } @@ -1590,6 +1602,27 @@ void *bnxt_qplib_get_qp1_rq_buf(struct bnxt_qplib_qp *qp, return NULL; } +/* Fil the MSN table into the next psn row */ +static void bnxt_qplib_fill_msn_search(struct bnxt_qplib_qp *qp, + struct bnxt_qplib_swqe *wqe, + struct bnxt_qplib_swq *swq) +{ + struct sq_msn_search *msns; + u32 start_psn, next_psn; + u16 start_idx; + + msns = (struct sq_msn_search *)swq->psn_search; + msns->start_idx_next_psn_start_psn = 0; + + start_psn = swq->start_psn; + next_psn = swq->next_psn; + start_idx = swq->slot_idx; + msns->start_idx_next_psn_start_psn |= + bnxt_re_update_msn_tbl(start_idx, next_psn, start_psn); + qp->msn++; + qp->msn %= qp->msn_tbl_sz; +} + static void bnxt_qplib_fill_psn_search(struct bnxt_qplib_qp *qp, struct bnxt_qplib_swqe *wqe, struct bnxt_qplib_swq *swq) @@ -1601,6 +1634,12 @@ static void bnxt_qplib_fill_psn_search(struct bnxt_qplib_qp *qp, if (!swq->psn_search) return; + /* Handle MSN differently on cap flags */ + if (BNXT_RE_HW_RETX(qp->dev_cap_flags)) { + bnxt_qplib_fill_msn_search(qp, wqe, swq); + return; + } + psns = (struct sq_psn_search *)swq->psn_search; psns = swq->psn_search; psns_ext = swq->psn_ext; @@ -1611,7 +1650,7 @@ static void bnxt_qplib_fill_psn_search(struct bnxt_qplib_qp *qp, flg_npsn = ((swq->next_psn << SQ_PSN_SEARCH_NEXT_PSN_SFT) & SQ_PSN_SEARCH_NEXT_PSN_MASK); - if (bnxt_qplib_is_chip_gen_p5(qp->cctx)) { + if (bnxt_qplib_is_chip_gen_p5_p7(qp->cctx)) { psns_ext->opcode_start_psn = cpu_to_le32(op_spsn); psns_ext->flags_next_psn = cpu_to_le32(flg_npsn); psns_ext->start_slot_idx = cpu_to_le16(swq->slot_idx); @@ -1709,8 +1748,8 @@ static u16 bnxt_qplib_required_slots(struct bnxt_qplib_qp *qp, return slot; } -static void bnxt_qplib_pull_psn_buff(struct bnxt_qplib_q *sq, - struct bnxt_qplib_swq *swq) +static void bnxt_qplib_pull_psn_buff(struct bnxt_qplib_qp *qp, struct bnxt_qplib_q *sq, + struct bnxt_qplib_swq *swq, bool hw_retx) { struct bnxt_qplib_hwq *hwq; u32 pg_num, pg_indx; @@ -1721,6 +1760,11 @@ static void bnxt_qplib_pull_psn_buff(struct bnxt_qplib_q *sq, if (!hwq->pad_pg) return; tail = swq->slot_idx / sq->dbinfo.max_slot; + if (hw_retx) { + /* For HW retx use qp msn index */ + tail = qp->msn; + tail %= qp->msn_tbl_sz; + } pg_num = (tail + hwq->pad_pgofft) / (PAGE_SIZE / hwq->pad_stride); pg_indx = (tail + hwq->pad_pgofft) % (PAGE_SIZE / hwq->pad_stride); buff = (void *)(hwq->pad_pg[pg_num] + pg_indx * hwq->pad_stride); @@ -1745,6 +1789,7 @@ int bnxt_qplib_post_send(struct bnxt_qplib_qp *qp, struct bnxt_qplib_swq *swq; bool sch_handler = false; u16 wqe_sz, qdf = 0; + bool msn_update; void *base_hdr; void *ext_hdr; __le32 temp32; @@ -1772,7 +1817,7 @@ int bnxt_qplib_post_send(struct bnxt_qplib_qp *qp, } swq = bnxt_qplib_get_swqe(sq, &wqe_idx); - bnxt_qplib_pull_psn_buff(sq, swq); + bnxt_qplib_pull_psn_buff(qp, sq, swq, BNXT_RE_HW_RETX(qp->dev_cap_flags)); idx = 0; swq->slot_idx = hwq->prod; @@ -1804,6 +1849,8 @@ int bnxt_qplib_post_send(struct bnxt_qplib_qp *qp, &idx); if (data_len < 0) goto queue_err; + /* Make sure we update MSN table only for wired wqes */ + msn_update = true; /* Specifics */ switch (wqe->type) { case BNXT_QPLIB_SWQE_TYPE_SEND: @@ -1844,6 +1891,7 @@ int bnxt_qplib_post_send(struct bnxt_qplib_qp *qp, SQ_SEND_DST_QP_MASK); ext_sqe->avid = cpu_to_le32(wqe->send.avid & SQ_SEND_AVID_MASK); + msn_update = false; } else { sqe->length = cpu_to_le32(data_len); if (qp->mtu) @@ -1901,7 +1949,7 @@ int bnxt_qplib_post_send(struct bnxt_qplib_qp *qp, sqe->wqe_type = wqe->type; sqe->flags = wqe->flags; sqe->inv_l_key = cpu_to_le32(wqe->local_inv.inv_l_key); - + msn_update = false; break; } case BNXT_QPLIB_SWQE_TYPE_FAST_REG_MR: @@ -1933,6 +1981,7 @@ int bnxt_qplib_post_send(struct bnxt_qplib_qp *qp, PTU_PTE_VALID); ext_sqe->pblptr = cpu_to_le64(wqe->frmr.pbl_dma_ptr); ext_sqe->va = cpu_to_le64(wqe->frmr.va); + msn_update = false; break; } @@ -1950,6 +1999,7 @@ int bnxt_qplib_post_send(struct bnxt_qplib_qp *qp, sqe->l_key = cpu_to_le32(wqe->bind.r_key); ext_sqe->va = cpu_to_le64(wqe->bind.va); ext_sqe->length_lo = cpu_to_le32(wqe->bind.length); + msn_update = false; break; } default: @@ -1957,11 +2007,13 @@ int bnxt_qplib_post_send(struct bnxt_qplib_qp *qp, rc = -EINVAL; goto done; } - swq->next_psn = sq->psn & BTH_PSN_MASK; - bnxt_qplib_fill_psn_search(qp, wqe, swq); + if (!BNXT_RE_HW_RETX(qp->dev_cap_flags) || msn_update) { + swq->next_psn = sq->psn & BTH_PSN_MASK; + bnxt_qplib_fill_psn_search(qp, wqe, swq); + } queue_err: bnxt_qplib_swq_mod_start(sq, wqe_idx); - bnxt_qplib_hwq_incr_prod(hwq, swq->slots); + bnxt_qplib_hwq_incr_prod(&sq->dbinfo, hwq, swq->slots); qp->wqe_cnt++; done: if (sch_handler) { @@ -2049,7 +2101,7 @@ int bnxt_qplib_post_recv(struct bnxt_qplib_qp *qp, base_hdr->wr_id[0] = cpu_to_le32(wqe_idx); queue_err: bnxt_qplib_swq_mod_start(rq, wqe_idx); - bnxt_qplib_hwq_incr_prod(hwq, swq->slots); + bnxt_qplib_hwq_incr_prod(&rq->dbinfo, hwq, swq->slots); done: if (sch_handler) { nq_work = kzalloc(sizeof(*nq_work), GFP_ATOMIC); @@ -2086,6 +2138,7 @@ int bnxt_qplib_create_cq(struct bnxt_qplib_res *res, struct bnxt_qplib_cq *cq) return -EINVAL; } + cq->dbinfo.flags = 0; hwq_attr.res = res; hwq_attr.depth = cq->max_wqe; hwq_attr.stride = sizeof(struct cq_base); @@ -2101,7 +2154,7 @@ int bnxt_qplib_create_cq(struct bnxt_qplib_res *res, struct bnxt_qplib_cq *cq) req.dpi = cpu_to_le32(cq->dpi->dpi); req.cq_handle = cpu_to_le64(cq->cq_handle); - req.cq_size = cpu_to_le32(cq->hwq.max_elements); + req.cq_size = cpu_to_le32(cq->max_wqe); pbl = &cq->hwq.pbl[PBL_LVL_0]; pg_sz_lvl = (bnxt_qplib_base_pg_size(&cq->hwq) << CMDQ_CREATE_CQ_PG_SIZE_SFT); @@ -2129,6 +2182,8 @@ int bnxt_qplib_create_cq(struct bnxt_qplib_res *res, struct bnxt_qplib_cq *cq) cq->dbinfo.xid = cq->id; cq->dbinfo.db = cq->dpi->dbr; cq->dbinfo.priv_db = res->dpi_tbl.priv_db; + cq->dbinfo.flags = 0; + cq->dbinfo.toggle = 0; bnxt_qplib_armen_db(&cq->dbinfo, DBC_DBC_TYPE_CQ_ARMENA); @@ -2144,6 +2199,8 @@ void bnxt_qplib_resize_cq_complete(struct bnxt_qplib_res *res, { bnxt_qplib_free_hwq(res, &cq->hwq); memcpy(&cq->hwq, &cq->resize_hwq, sizeof(cq->hwq)); + /* Reset only the cons bit in the flags */ + cq->dbinfo.flags &= ~(1UL << BNXT_QPLIB_FLAG_EPOCH_CONS_SHIFT); } int bnxt_qplib_resize_cq(struct bnxt_qplib_res *res, struct bnxt_qplib_cq *cq, @@ -2240,7 +2297,8 @@ static int __flush_sq(struct bnxt_qplib_q *sq, struct bnxt_qplib_qp *qp, cqe++; (*budget)--; skip_compl: - bnxt_qplib_hwq_incr_cons(&sq->hwq, sq->swq[last].slots); + bnxt_qplib_hwq_incr_cons(sq->hwq.max_elements, &sq->hwq.cons, + sq->swq[last].slots, &sq->dbinfo.flags); sq->swq_last = sq->swq[last].next_idx; } *pcqe = cqe; @@ -2287,7 +2345,8 @@ static int __flush_rq(struct bnxt_qplib_q *rq, struct bnxt_qplib_qp *qp, cqe->wr_id = rq->swq[last].wr_id; cqe++; (*budget)--; - bnxt_qplib_hwq_incr_cons(&rq->hwq, rq->swq[last].slots); + bnxt_qplib_hwq_incr_cons(rq->hwq.max_elements, &rq->hwq.cons, + rq->swq[last].slots, &rq->dbinfo.flags); rq->swq_last = rq->swq[last].next_idx; } *pcqe = cqe; @@ -2316,7 +2375,7 @@ void bnxt_qplib_mark_qp_error(void *qp_handle) static int do_wa9060(struct bnxt_qplib_qp *qp, struct bnxt_qplib_cq *cq, u32 cq_cons, u32 swq_last, u32 cqe_sq_cons) { - u32 peek_sw_cq_cons, peek_raw_cq_cons, peek_sq_cons_idx; + u32 peek_sw_cq_cons, peek_sq_cons_idx, peek_flags; struct bnxt_qplib_q *sq = &qp->sq; struct cq_req *peek_req_hwcqe; struct bnxt_qplib_qp *peek_qp; @@ -2347,16 +2406,14 @@ static int do_wa9060(struct bnxt_qplib_qp *qp, struct bnxt_qplib_cq *cq, } if (sq->condition) { /* Peek at the completions */ - peek_raw_cq_cons = cq->hwq.cons; + peek_flags = cq->dbinfo.flags; peek_sw_cq_cons = cq_cons; i = cq->hwq.max_elements; while (i--) { - peek_sw_cq_cons = HWQ_CMP((peek_sw_cq_cons), &cq->hwq); peek_hwcqe = bnxt_qplib_get_qe(&cq->hwq, peek_sw_cq_cons, NULL); /* If the next hwcqe is VALID */ - if (CQE_CMP_VALID(peek_hwcqe, peek_raw_cq_cons, - cq->hwq.max_elements)) { + if (CQE_CMP_VALID(peek_hwcqe, peek_flags)) { /* * The valid test of the entry must be done first before * reading any further. @@ -2399,8 +2456,9 @@ static int do_wa9060(struct bnxt_qplib_qp *qp, struct bnxt_qplib_cq *cq, rc = -EINVAL; goto out; } - peek_sw_cq_cons++; - peek_raw_cq_cons++; + bnxt_qplib_hwq_incr_cons(cq->hwq.max_elements, + &peek_sw_cq_cons, + 1, &peek_flags); } dev_err(&cq->hwq.pdev->dev, "Should not have come here! cq_cons=0x%x qp=0x%x sq cons sw=0x%x hw=0x%x\n", @@ -2487,7 +2545,8 @@ static int bnxt_qplib_cq_process_req(struct bnxt_qplib_cq *cq, } } skip: - bnxt_qplib_hwq_incr_cons(&sq->hwq, swq->slots); + bnxt_qplib_hwq_incr_cons(sq->hwq.max_elements, &sq->hwq.cons, + swq->slots, &sq->dbinfo.flags); sq->swq_last = swq->next_idx; if (sq->single) break; @@ -2514,7 +2573,8 @@ static void bnxt_qplib_release_srqe(struct bnxt_qplib_srq *srq, u32 tag) srq->swq[srq->last_idx].next_idx = (int)tag; srq->last_idx = (int)tag; srq->swq[srq->last_idx].next_idx = -1; - srq->hwq.cons++; /* Support for SRQE counter */ + bnxt_qplib_hwq_incr_cons(srq->hwq.max_elements, &srq->hwq.cons, + srq->dbinfo.max_slot, &srq->dbinfo.flags); spin_unlock(&srq->hwq.lock); } @@ -2583,7 +2643,8 @@ static int bnxt_qplib_cq_process_res_rc(struct bnxt_qplib_cq *cq, cqe->wr_id = swq->wr_id; cqe++; (*budget)--; - bnxt_qplib_hwq_incr_cons(&rq->hwq, swq->slots); + bnxt_qplib_hwq_incr_cons(rq->hwq.max_elements, &rq->hwq.cons, + swq->slots, &rq->dbinfo.flags); rq->swq_last = swq->next_idx; *pcqe = cqe; @@ -2669,7 +2730,8 @@ static int bnxt_qplib_cq_process_res_ud(struct bnxt_qplib_cq *cq, cqe->wr_id = swq->wr_id; cqe++; (*budget)--; - bnxt_qplib_hwq_incr_cons(&rq->hwq, swq->slots); + bnxt_qplib_hwq_incr_cons(rq->hwq.max_elements, &rq->hwq.cons, + swq->slots, &rq->dbinfo.flags); rq->swq_last = swq->next_idx; *pcqe = cqe; @@ -2686,14 +2748,11 @@ static int bnxt_qplib_cq_process_res_ud(struct bnxt_qplib_cq *cq, bool bnxt_qplib_is_cq_empty(struct bnxt_qplib_cq *cq) { struct cq_base *hw_cqe; - u32 sw_cons, raw_cons; bool rc = true; - raw_cons = cq->hwq.cons; - sw_cons = HWQ_CMP(raw_cons, &cq->hwq); - hw_cqe = bnxt_qplib_get_qe(&cq->hwq, sw_cons, NULL); + hw_cqe = bnxt_qplib_get_qe(&cq->hwq, cq->hwq.cons, NULL); /* Check for Valid bit. If the CQE is valid, return false */ - rc = !CQE_CMP_VALID(hw_cqe, raw_cons, cq->hwq.max_elements); + rc = !CQE_CMP_VALID(hw_cqe, cq->dbinfo.flags); return rc; } @@ -2775,7 +2834,8 @@ static int bnxt_qplib_cq_process_res_raweth_qp1(struct bnxt_qplib_cq *cq, cqe->wr_id = swq->wr_id; cqe++; (*budget)--; - bnxt_qplib_hwq_incr_cons(&rq->hwq, swq->slots); + bnxt_qplib_hwq_incr_cons(rq->hwq.max_elements, &rq->hwq.cons, + swq->slots, &rq->dbinfo.flags); rq->swq_last = swq->next_idx; *pcqe = cqe; @@ -2848,7 +2908,8 @@ static int bnxt_qplib_cq_process_terminal(struct bnxt_qplib_cq *cq, cqe++; (*budget)--; } - bnxt_qplib_hwq_incr_cons(&sq->hwq, sq->swq[swq_last].slots); + bnxt_qplib_hwq_incr_cons(sq->hwq.max_elements, &sq->hwq.cons, + sq->swq[swq_last].slots, &sq->dbinfo.flags); sq->swq_last = sq->swq[swq_last].next_idx; } *pcqe = cqe; @@ -2933,19 +2994,17 @@ int bnxt_qplib_poll_cq(struct bnxt_qplib_cq *cq, struct bnxt_qplib_cqe *cqe, int num_cqes, struct bnxt_qplib_qp **lib_qp) { struct cq_base *hw_cqe; - u32 sw_cons, raw_cons; int budget, rc = 0; + u32 hw_polled = 0; u8 type; - raw_cons = cq->hwq.cons; budget = num_cqes; while (budget) { - sw_cons = HWQ_CMP(raw_cons, &cq->hwq); - hw_cqe = bnxt_qplib_get_qe(&cq->hwq, sw_cons, NULL); + hw_cqe = bnxt_qplib_get_qe(&cq->hwq, cq->hwq.cons, NULL); /* Check for Valid bit */ - if (!CQE_CMP_VALID(hw_cqe, raw_cons, cq->hwq.max_elements)) + if (!CQE_CMP_VALID(hw_cqe, cq->dbinfo.flags)) break; /* @@ -2960,7 +3019,7 @@ int bnxt_qplib_poll_cq(struct bnxt_qplib_cq *cq, struct bnxt_qplib_cqe *cqe, rc = bnxt_qplib_cq_process_req(cq, (struct cq_req *)hw_cqe, &cqe, &budget, - sw_cons, lib_qp); + cq->hwq.cons, lib_qp); break; case CQ_BASE_CQE_TYPE_RES_RC: rc = bnxt_qplib_cq_process_res_rc(cq, @@ -3006,18 +3065,20 @@ int bnxt_qplib_poll_cq(struct bnxt_qplib_cq *cq, struct bnxt_qplib_cqe *cqe, dev_err(&cq->hwq.pdev->dev, "process_cqe error rc = 0x%x\n", rc); } - raw_cons++; + hw_polled++; + bnxt_qplib_hwq_incr_cons(cq->hwq.max_elements, &cq->hwq.cons, + 1, &cq->dbinfo.flags); + } - if (cq->hwq.cons != raw_cons) { - cq->hwq.cons = raw_cons; + if (hw_polled) bnxt_qplib_ring_db(&cq->dbinfo, DBC_DBC_TYPE_CQ); - } exit: return num_cqes - budget; } void bnxt_qplib_req_notify_cq(struct bnxt_qplib_cq *cq, u32 arm_type) { + cq->dbinfo.toggle = cq->toggle; if (arm_type) bnxt_qplib_ring_db(&cq->dbinfo, arm_type); /* Using cq->arm_state variable to track whether to issue cq handler */ diff --git a/drivers/infiniband/hw/bnxt_re/qplib_fp.h b/drivers/infiniband/hw/bnxt_re/qplib_fp.h index 404b851091ca..7fd4506b3584 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_fp.h +++ b/drivers/infiniband/hw/bnxt_re/qplib_fp.h @@ -338,6 +338,9 @@ struct bnxt_qplib_qp { dma_addr_t rq_hdr_buf_map; struct list_head sq_flush; struct list_head rq_flush; + u32 msn; + u32 msn_tbl_sz; + u16 dev_cap_flags; }; #define BNXT_QPLIB_MAX_CQE_ENTRY_SIZE sizeof(struct cq_base) @@ -348,9 +351,21 @@ struct bnxt_qplib_qp { #define CQE_IDX(x) ((x) & CQE_MAX_IDX_PER_PG) #define ROCE_CQE_CMP_V 0 -#define CQE_CMP_VALID(hdr, raw_cons, cp_bit) \ +#define CQE_CMP_VALID(hdr, pass) \ (!!((hdr)->cqe_type_toggle & CQ_BASE_TOGGLE) == \ - !((raw_cons) & (cp_bit))) + !((pass) & BNXT_QPLIB_FLAG_EPOCH_CONS_MASK)) + +static inline u32 __bnxt_qplib_get_avail(struct bnxt_qplib_hwq *hwq) +{ + int cons, prod, avail; + + cons = hwq->cons; + prod = hwq->prod; + avail = cons - prod; + if (cons <= prod) + avail += hwq->depth; + return avail; +} static inline bool bnxt_qplib_queue_full(struct bnxt_qplib_q *que, u8 slots) @@ -406,6 +421,7 @@ struct bnxt_qplib_cq { bool resize_in_progress; struct bnxt_qplib_sg_info sg_info; u64 cq_handle; + u8 toggle; #define CQ_RESIZE_WAIT_TIME_MS 500 unsigned long flags; @@ -443,9 +459,9 @@ struct bnxt_qplib_cq { #define NQE_PG(x) (((x) & ~NQE_MAX_IDX_PER_PG) / NQE_CNT_PER_PG) #define NQE_IDX(x) ((x) & NQE_MAX_IDX_PER_PG) -#define NQE_CMP_VALID(hdr, raw_cons, cp_bit) \ +#define NQE_CMP_VALID(hdr, pass) \ (!!(le32_to_cpu((hdr)->info63_v[0]) & NQ_BASE_V) == \ - !((raw_cons) & (cp_bit))) + !((pass) & BNXT_QPLIB_FLAG_EPOCH_CONS_MASK)) #define BNXT_QPLIB_NQE_MAX_CNT (128 * 1024) @@ -614,4 +630,15 @@ static inline u16 bnxt_qplib_calc_ilsize(struct bnxt_qplib_swqe *wqe, u16 max) return size; } + +/* MSN table update inlin */ +static inline __le64 bnxt_re_update_msn_tbl(u32 st_idx, u32 npsn, u32 start_psn) +{ + return cpu_to_le64((((u64)(st_idx) << SQ_MSN_SEARCH_START_IDX_SFT) & + SQ_MSN_SEARCH_START_IDX_MASK) | + (((u64)(npsn) << SQ_MSN_SEARCH_NEXT_PSN_SFT) & + SQ_MSN_SEARCH_NEXT_PSN_MASK) | + (((start_psn) << SQ_MSN_SEARCH_START_PSN_SFT) & + SQ_MSN_SEARCH_START_PSN_MASK)); +} #endif /* __BNXT_QPLIB_FP_H__ */ diff --git a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c index e47b4ca64d33..3ffaef0c2651 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c @@ -734,17 +734,15 @@ static void bnxt_qplib_service_creq(struct tasklet_struct *t) u32 type, budget = CREQ_ENTRY_POLL_BUDGET; struct bnxt_qplib_hwq *hwq = &creq->hwq; struct creq_base *creqe; - u32 sw_cons, raw_cons; unsigned long flags; u32 num_wakeup = 0; + u32 hw_polled = 0; /* Service the CREQ until budget is over */ spin_lock_irqsave(&hwq->lock, flags); - raw_cons = hwq->cons; while (budget > 0) { - sw_cons = HWQ_CMP(raw_cons, hwq); - creqe = bnxt_qplib_get_qe(hwq, sw_cons, NULL); - if (!CREQ_CMP_VALID(creqe, raw_cons, hwq->max_elements)) + creqe = bnxt_qplib_get_qe(hwq, hwq->cons, NULL); + if (!CREQ_CMP_VALID(creqe, creq->creq_db.dbinfo.flags)) break; /* The valid test of the entry must be done first before * reading any further. @@ -775,15 +773,15 @@ static void bnxt_qplib_service_creq(struct tasklet_struct *t) type); break; } - raw_cons++; budget--; + hw_polled++; + bnxt_qplib_hwq_incr_cons(hwq->max_elements, &hwq->cons, + 1, &creq->creq_db.dbinfo.flags); } - if (hwq->cons != raw_cons) { - hwq->cons = raw_cons; + if (hw_polled) bnxt_qplib_ring_nq_db(&creq->creq_db.dbinfo, rcfw->res->cctx, true); - } spin_unlock_irqrestore(&hwq->lock, flags); if (num_wakeup) wake_up_nr(&rcfw->cmdq.waitq, num_wakeup); @@ -854,7 +852,7 @@ int bnxt_qplib_init_rcfw(struct bnxt_qplib_rcfw *rcfw, */ if (is_virtfn) goto skip_ctx_setup; - if (bnxt_qplib_is_chip_gen_p5(rcfw->res->cctx)) + if (bnxt_qplib_is_chip_gen_p5_p7(rcfw->res->cctx)) goto config_vf_res; lvl = ctx->qpc_tbl.level; @@ -907,6 +905,8 @@ config_vf_res: req.max_gid_per_vf = cpu_to_le32(ctx->vf_res.max_gid_per_vf); skip_ctx_setup: + if (BNXT_RE_HW_RETX(rcfw->res->dattr->dev_cap_flags)) + req.flags |= cpu_to_le16(CMDQ_INITIALIZE_FW_FLAGS_HW_REQUESTER_RETX_SUPPORTED); req.stat_ctx_id = cpu_to_le32(ctx->stats.fw_id); bnxt_qplib_fill_cmdqmsg(&msg, &req, &resp, NULL, sizeof(req), sizeof(resp), 0); rc = bnxt_qplib_rcfw_send_message(rcfw, &msg); @@ -1113,6 +1113,7 @@ static int bnxt_qplib_map_creq_db(struct bnxt_qplib_rcfw *rcfw, u32 reg_offt) pdev = rcfw->pdev; creq_db = &rcfw->creq.creq_db; + creq_db->dbinfo.flags = 0; creq_db->reg.bar_id = RCFW_COMM_CONS_PCI_BAR_REGION; creq_db->reg.bar_base = pci_resource_start(pdev, creq_db->reg.bar_id); if (!creq_db->reg.bar_id) diff --git a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h index 7b31bee3e000..45996e60a0d0 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h +++ b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h @@ -141,9 +141,9 @@ struct bnxt_qplib_crsbe { /* Allocate 1 per QP for async error notification for now */ #define BNXT_QPLIB_CREQE_MAX_CNT (64 * 1024) #define BNXT_QPLIB_CREQE_UNITS 16 /* 16-Bytes per prod unit */ -#define CREQ_CMP_VALID(hdr, raw_cons, cp_bit) \ +#define CREQ_CMP_VALID(hdr, pass) \ (!!((hdr)->v & CREQ_BASE_V) == \ - !((raw_cons) & (cp_bit))) + !((pass) & BNXT_QPLIB_FLAG_EPOCH_CONS_MASK)) #define CREQ_ENTRY_POLL_BUDGET 0x100 /* HWQ */ diff --git a/drivers/infiniband/hw/bnxt_re/qplib_res.c b/drivers/infiniband/hw/bnxt_re/qplib_res.c index 157db6b7e119..dfc943fab87b 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_res.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_res.c @@ -343,7 +343,7 @@ done: hwq->cons = 0; hwq->pdev = pdev; hwq->depth = hwq_attr->depth; - hwq->max_elements = depth; + hwq->max_elements = hwq->depth; hwq->element_size = stride; hwq->qe_ppg = pg_size / stride; /* For direct access to the elements */ @@ -805,7 +805,7 @@ static int bnxt_qplib_alloc_dpi_tbl(struct bnxt_qplib_res *res, dpit = &res->dpi_tbl; reg = &dpit->wcreg; - if (!bnxt_qplib_is_chip_gen_p5(res->cctx)) { + if (!bnxt_qplib_is_chip_gen_p5_p7(res->cctx)) { /* Offest should come from L2 driver */ dbr_offset = dev_attr->l2_db_size; dpit->ucreg.offset = dbr_offset; diff --git a/drivers/infiniband/hw/bnxt_re/qplib_res.h b/drivers/infiniband/hw/bnxt_re/qplib_res.h index 5949f004f785..61628f7f1253 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_res.h +++ b/drivers/infiniband/hw/bnxt_re/qplib_res.h @@ -44,11 +44,23 @@ extern const struct bnxt_qplib_gid bnxt_qplib_gid_zero; #define CHIP_NUM_57508 0x1750 #define CHIP_NUM_57504 0x1751 #define CHIP_NUM_57502 0x1752 +#define CHIP_NUM_58818 0xd818 +#define CHIP_NUM_57608 0x1760 + +#define BNXT_QPLIB_DBR_VALID (0x1UL << 26) +#define BNXT_QPLIB_DBR_EPOCH_SHIFT 24 +#define BNXT_QPLIB_DBR_TOGGLE_SHIFT 25 struct bnxt_qplib_drv_modes { u8 wqe_mode; bool db_push; bool dbr_pacing; + u32 toggle_bits; +}; + +enum bnxt_re_toggle_modes { + BNXT_QPLIB_CQ_TOGGLE_BIT = 0x1, + BNXT_QPLIB_SRQ_TOGGLE_BIT = 0x2, }; struct bnxt_qplib_chip_ctx { @@ -186,6 +198,20 @@ struct bnxt_qplib_db_info { struct bnxt_qplib_hwq *hwq; u32 xid; u32 max_slot; + u32 flags; + u8 toggle; +}; + +enum bnxt_qplib_db_info_flags_mask { + BNXT_QPLIB_FLAG_EPOCH_CONS_SHIFT = 0x0UL, + BNXT_QPLIB_FLAG_EPOCH_PROD_SHIFT = 0x1UL, + BNXT_QPLIB_FLAG_EPOCH_CONS_MASK = 0x1UL, + BNXT_QPLIB_FLAG_EPOCH_PROD_MASK = 0x2UL, +}; + +enum bnxt_qplib_db_epoch_flag_shift { + BNXT_QPLIB_DB_EPOCH_CONS_SHIFT = BNXT_QPLIB_DBR_EPOCH_SHIFT, + BNXT_QPLIB_DB_EPOCH_PROD_SHIFT = (BNXT_QPLIB_DBR_EPOCH_SHIFT - 1), }; /* Tables */ @@ -288,6 +314,12 @@ struct bnxt_qplib_res { struct bnxt_qplib_db_pacing_data *pacing_data; }; +static inline bool bnxt_qplib_is_chip_gen_p7(struct bnxt_qplib_chip_ctx *cctx) +{ + return (cctx->chip_num == CHIP_NUM_58818 || + cctx->chip_num == CHIP_NUM_57608); +} + static inline bool bnxt_qplib_is_chip_gen_p5(struct bnxt_qplib_chip_ctx *cctx) { return (cctx->chip_num == CHIP_NUM_57508 || @@ -295,15 +327,20 @@ static inline bool bnxt_qplib_is_chip_gen_p5(struct bnxt_qplib_chip_ctx *cctx) cctx->chip_num == CHIP_NUM_57502); } +static inline bool bnxt_qplib_is_chip_gen_p5_p7(struct bnxt_qplib_chip_ctx *cctx) +{ + return bnxt_qplib_is_chip_gen_p5(cctx) || bnxt_qplib_is_chip_gen_p7(cctx); +} + static inline u8 bnxt_qplib_get_hwq_type(struct bnxt_qplib_res *res) { - return bnxt_qplib_is_chip_gen_p5(res->cctx) ? + return bnxt_qplib_is_chip_gen_p5_p7(res->cctx) ? HWQ_TYPE_QUEUE : HWQ_TYPE_L2_CMPL; } static inline u8 bnxt_qplib_get_ring_type(struct bnxt_qplib_chip_ctx *cctx) { - return bnxt_qplib_is_chip_gen_p5(cctx) ? + return bnxt_qplib_is_chip_gen_p5_p7(cctx) ? RING_ALLOC_REQ_RING_TYPE_NQ : RING_ALLOC_REQ_RING_TYPE_ROCE_CMPL; } @@ -396,39 +433,61 @@ void bnxt_qplib_unmap_db_bar(struct bnxt_qplib_res *res); int bnxt_qplib_determine_atomics(struct pci_dev *dev); -static inline void bnxt_qplib_hwq_incr_prod(struct bnxt_qplib_hwq *hwq, u32 cnt) +static inline void bnxt_qplib_hwq_incr_prod(struct bnxt_qplib_db_info *dbinfo, + struct bnxt_qplib_hwq *hwq, u32 cnt) { - hwq->prod = (hwq->prod + cnt) % hwq->depth; + /* move prod and update toggle/epoch if wrap around */ + hwq->prod += cnt; + if (hwq->prod >= hwq->depth) { + hwq->prod %= hwq->depth; + dbinfo->flags ^= 1UL << BNXT_QPLIB_FLAG_EPOCH_PROD_SHIFT; + } } -static inline void bnxt_qplib_hwq_incr_cons(struct bnxt_qplib_hwq *hwq, - u32 cnt) +static inline void bnxt_qplib_hwq_incr_cons(u32 max_elements, u32 *cons, u32 cnt, + u32 *dbinfo_flags) { - hwq->cons = (hwq->cons + cnt) % hwq->depth; + /* move cons and update toggle/epoch if wrap around */ + *cons += cnt; + if (*cons >= max_elements) { + *cons %= max_elements; + *dbinfo_flags ^= 1UL << BNXT_QPLIB_FLAG_EPOCH_CONS_SHIFT; + } } static inline void bnxt_qplib_ring_db32(struct bnxt_qplib_db_info *info, bool arm) { - u32 key; + u32 key = 0; - key = info->hwq->cons & (info->hwq->max_elements - 1); - key |= (CMPL_DOORBELL_IDX_VALID | + key |= info->hwq->cons | (CMPL_DOORBELL_IDX_VALID | (CMPL_DOORBELL_KEY_CMPL & CMPL_DOORBELL_KEY_MASK)); if (!arm) key |= CMPL_DOORBELL_MASK; writel(key, info->db); } +#define BNXT_QPLIB_INIT_DBHDR(xid, type, indx, toggle) \ + (((u64)(((xid) & DBC_DBC_XID_MASK) | DBC_DBC_PATH_ROCE | \ + (type) | BNXT_QPLIB_DBR_VALID) << 32) | (indx) | \ + (((u32)(toggle)) << (BNXT_QPLIB_DBR_TOGGLE_SHIFT))) + static inline void bnxt_qplib_ring_db(struct bnxt_qplib_db_info *info, u32 type) { u64 key = 0; + u32 indx; + u8 toggle = 0; + + if (type == DBC_DBC_TYPE_CQ_ARMALL || + type == DBC_DBC_TYPE_CQ_ARMSE) + toggle = info->toggle; + + indx = (info->hwq->cons & DBC_DBC_INDEX_MASK) | + ((info->flags & BNXT_QPLIB_FLAG_EPOCH_CONS_MASK) << + BNXT_QPLIB_DB_EPOCH_CONS_SHIFT); - key = (info->xid & DBC_DBC_XID_MASK) | DBC_DBC_PATH_ROCE | type; - key <<= 32; - key |= (info->hwq->cons & (info->hwq->max_elements - 1)) & - DBC_DBC_INDEX_MASK; + key = BNXT_QPLIB_INIT_DBHDR(info->xid, type, indx, toggle); writeq(key, info->db); } @@ -436,10 +495,12 @@ static inline void bnxt_qplib_ring_prod_db(struct bnxt_qplib_db_info *info, u32 type) { u64 key = 0; + u32 indx; - key = (info->xid & DBC_DBC_XID_MASK) | DBC_DBC_PATH_ROCE | type; - key <<= 32; - key |= ((info->hwq->prod / info->max_slot)) & DBC_DBC_INDEX_MASK; + indx = (((info->hwq->prod / info->max_slot) & DBC_DBC_INDEX_MASK) | + ((info->flags & BNXT_QPLIB_FLAG_EPOCH_PROD_MASK) << + BNXT_QPLIB_DB_EPOCH_PROD_SHIFT)); + key = BNXT_QPLIB_INIT_DBHDR(info->xid, type, indx, 0); writeq(key, info->db); } @@ -447,9 +508,12 @@ static inline void bnxt_qplib_armen_db(struct bnxt_qplib_db_info *info, u32 type) { u64 key = 0; + u8 toggle = 0; - key = (info->xid & DBC_DBC_XID_MASK) | DBC_DBC_PATH_ROCE | type; - key <<= 32; + if (type == DBC_DBC_TYPE_CQ_ARMENA || type == DBC_DBC_TYPE_SRQ_ARMENA) + toggle = info->toggle; + /* Index always at 0 */ + key = BNXT_QPLIB_INIT_DBHDR(info->xid, type, 0, toggle); writeq(key, info->priv_db); } @@ -458,9 +522,7 @@ static inline void bnxt_qplib_srq_arm_db(struct bnxt_qplib_db_info *info, { u64 key = 0; - key = (info->xid & DBC_DBC_XID_MASK) | DBC_DBC_PATH_ROCE | th; - key <<= 32; - key |= th & DBC_DBC_INDEX_MASK; + key = BNXT_QPLIB_INIT_DBHDR(info->xid, DBC_DBC_TYPE_SRQ_ARM, th, info->toggle); writeq(key, info->priv_db); } @@ -471,7 +533,7 @@ static inline void bnxt_qplib_ring_nq_db(struct bnxt_qplib_db_info *info, u32 type; type = arm ? DBC_DBC_TYPE_NQ_ARM : DBC_DBC_TYPE_NQ; - if (bnxt_qplib_is_chip_gen_p5(cctx)) + if (bnxt_qplib_is_chip_gen_p5_p7(cctx)) bnxt_qplib_ring_db(info, type); else bnxt_qplib_ring_db32(info, arm); @@ -483,6 +545,15 @@ static inline bool _is_ext_stats_supported(u16 dev_cap_flags) CREQ_QUERY_FUNC_RESP_SB_EXT_STATS; } +static inline bool _is_hw_retx_supported(u16 dev_cap_flags) +{ + return dev_cap_flags & + (CREQ_QUERY_FUNC_RESP_SB_HW_REQUESTER_RETX_ENABLED | + CREQ_QUERY_FUNC_RESP_SB_HW_RESPONDER_RETX_ENABLED); +} + +#define BNXT_RE_HW_RETX(a) _is_hw_retx_supported((a)) + static inline u8 bnxt_qplib_dbr_pacing_en(struct bnxt_qplib_chip_ctx *cctx) { return cctx->modes.dbr_pacing; diff --git a/drivers/infiniband/hw/bnxt_re/qplib_sp.c b/drivers/infiniband/hw/bnxt_re/qplib_sp.c index a27b68515164..8beeedd15061 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_sp.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_sp.c @@ -59,7 +59,7 @@ static bool bnxt_qplib_is_atomic_cap(struct bnxt_qplib_rcfw *rcfw) { u16 pcie_ctl2 = 0; - if (!bnxt_qplib_is_chip_gen_p5(rcfw->res->cctx)) + if (!bnxt_qplib_is_chip_gen_p5_p7(rcfw->res->cctx)) return false; pcie_capability_read_word(rcfw->pdev, PCI_EXP_DEVCTL2, &pcie_ctl2); @@ -133,7 +133,7 @@ int bnxt_qplib_get_dev_attr(struct bnxt_qplib_rcfw *rcfw, * reporting the max number */ attr->max_qp_wqes -= BNXT_QPLIB_RESERVED_QP_WRS + 1; - attr->max_qp_sges = bnxt_qplib_is_chip_gen_p5(rcfw->res->cctx) ? + attr->max_qp_sges = bnxt_qplib_is_chip_gen_p5_p7(rcfw->res->cctx) ? 6 : sb->max_sge; attr->max_cq = le32_to_cpu(sb->max_cq); attr->max_cq_wqes = le32_to_cpu(sb->max_cqe); @@ -151,8 +151,9 @@ int bnxt_qplib_get_dev_attr(struct bnxt_qplib_rcfw *rcfw, attr->max_srq_sges = sb->max_srq_sge; attr->max_pkey = 1; attr->max_inline_data = le32_to_cpu(sb->max_inline_data); - attr->l2_db_size = (sb->l2_db_space_size + 1) * - (0x01 << RCFW_DBR_BASE_PAGE_SHIFT); + if (!bnxt_qplib_is_chip_gen_p7(rcfw->res->cctx)) + attr->l2_db_size = (sb->l2_db_space_size + 1) * + (0x01 << RCFW_DBR_BASE_PAGE_SHIFT); attr->max_sgid = BNXT_QPLIB_NUM_GIDS_SUPPORTED; attr->dev_cap_flags = le16_to_cpu(sb->dev_cap_flags); @@ -934,7 +935,7 @@ int bnxt_qplib_modify_cc(struct bnxt_qplib_res *res, req->inactivity_th = cpu_to_le16(cc_param->inact_th); /* For chip gen P5 onwards fill extended cmd and header */ - if (bnxt_qplib_is_chip_gen_p5(res->cctx)) { + if (bnxt_qplib_is_chip_gen_p5_p7(res->cctx)) { struct roce_tlv *hdr; u32 payload; u32 chunks; diff --git a/drivers/infiniband/hw/bnxt_re/roce_hsi.h b/drivers/infiniband/hw/bnxt_re/roce_hsi.h index 84b5acd7f7a2..605c9463c408 100644 --- a/drivers/infiniband/hw/bnxt_re/roce_hsi.h +++ b/drivers/infiniband/hw/bnxt_re/roce_hsi.h @@ -555,7 +555,12 @@ struct cmdq_modify_qp { __le16 flags; __le16 cookie; u8 resp_size; - u8 reserved8; + u8 qp_type; + #define CMDQ_MODIFY_QP_QP_TYPE_RC 0x2UL + #define CMDQ_MODIFY_QP_QP_TYPE_UD 0x4UL + #define CMDQ_MODIFY_QP_QP_TYPE_RAW_ETHERTYPE 0x6UL + #define CMDQ_MODIFY_QP_QP_TYPE_GSI 0x7UL + #define CMDQ_MODIFY_QP_QP_TYPE_LAST CMDQ_MODIFY_QP_QP_TYPE_GSI __le64 resp_addr; __le32 modify_mask; #define CMDQ_MODIFY_QP_MODIFY_MASK_STATE 0x1UL @@ -611,14 +616,12 @@ struct cmdq_modify_qp { #define CMDQ_MODIFY_QP_NETWORK_TYPE_ROCEV2_IPV6 (0x3UL << 6) #define CMDQ_MODIFY_QP_NETWORK_TYPE_LAST CMDQ_MODIFY_QP_NETWORK_TYPE_ROCEV2_IPV6 u8 access; - #define CMDQ_MODIFY_QP_ACCESS_REMOTE_ATOMIC_REMOTE_READ_REMOTE_WRITE_LOCAL_WRITE_MASK \ - 0xffUL - #define CMDQ_MODIFY_QP_ACCESS_REMOTE_ATOMIC_REMOTE_READ_REMOTE_WRITE_LOCAL_WRITE_SFT \ - 0 - #define CMDQ_MODIFY_QP_ACCESS_LOCAL_WRITE 0x1UL - #define CMDQ_MODIFY_QP_ACCESS_REMOTE_WRITE 0x2UL - #define CMDQ_MODIFY_QP_ACCESS_REMOTE_READ 0x4UL - #define CMDQ_MODIFY_QP_ACCESS_REMOTE_ATOMIC 0x8UL + #define CMDQ_MODIFY_QP_ACCESS_REMOTE_ATOMIC_REMOTE_READ_REMOTE_WRITE_LOCAL_WRITE_MASK 0xffUL + #define CMDQ_MODIFY_QP_ACCESS_REMOTE_ATOMIC_REMOTE_READ_REMOTE_WRITE_LOCAL_WRITE_SFT 0 + #define CMDQ_MODIFY_QP_ACCESS_LOCAL_WRITE 0x1UL + #define CMDQ_MODIFY_QP_ACCESS_REMOTE_WRITE 0x2UL + #define CMDQ_MODIFY_QP_ACCESS_REMOTE_READ 0x4UL + #define CMDQ_MODIFY_QP_ACCESS_REMOTE_ATOMIC 0x8UL __le16 pkey; __le32 qkey; __le32 dgid[4]; @@ -673,6 +676,13 @@ struct cmdq_modify_qp { #define CMDQ_MODIFY_QP_VLAN_PCP_SFT 13 __le64 irrq_addr; __le64 orrq_addr; + __le32 ext_modify_mask; + #define CMDQ_MODIFY_QP_EXT_MODIFY_MASK_EXT_STATS_CTX 0x1UL + #define CMDQ_MODIFY_QP_EXT_MODIFY_MASK_SCHQ_ID_VALID 0x2UL + __le32 ext_stats_ctx_id; + __le16 schq_id; + __le16 unused_0; + __le32 reserved32; }; /* creq_modify_qp_resp (size:128b/16B) */ @@ -3075,6 +3085,17 @@ struct sq_psn_search_ext { __le32 reserved32; }; +/* sq_msn_search (size:64b/8B) */ +struct sq_msn_search { + __le64 start_idx_next_psn_start_psn; + #define SQ_MSN_SEARCH_START_PSN_MASK 0xffffffUL + #define SQ_MSN_SEARCH_START_PSN_SFT 0 + #define SQ_MSN_SEARCH_NEXT_PSN_MASK 0xffffff000000ULL + #define SQ_MSN_SEARCH_NEXT_PSN_SFT 24 + #define SQ_MSN_SEARCH_START_IDX_MASK 0xffff000000000000ULL + #define SQ_MSN_SEARCH_START_IDX_SFT 48 +}; + /* sq_send (size:1024b/128B) */ struct sq_send { u8 wqe_type; @@ -3763,13 +3784,35 @@ struct cq_base { #define CQ_BASE_CQE_TYPE_RES_UD (0x2UL << 1) #define CQ_BASE_CQE_TYPE_RES_RAWETH_QP1 (0x3UL << 1) #define CQ_BASE_CQE_TYPE_RES_UD_CFA (0x4UL << 1) + #define CQ_BASE_CQE_TYPE_REQ_V3 (0x8UL << 1) + #define CQ_BASE_CQE_TYPE_RES_RC_V3 (0x9UL << 1) + #define CQ_BASE_CQE_TYPE_RES_UD_V3 (0xaUL << 1) + #define CQ_BASE_CQE_TYPE_RES_RAWETH_QP1_V3 (0xbUL << 1) + #define CQ_BASE_CQE_TYPE_RES_UD_CFA_V3 (0xcUL << 1) #define CQ_BASE_CQE_TYPE_NO_OP (0xdUL << 1) #define CQ_BASE_CQE_TYPE_TERMINAL (0xeUL << 1) #define CQ_BASE_CQE_TYPE_CUT_OFF (0xfUL << 1) #define CQ_BASE_CQE_TYPE_LAST CQ_BASE_CQE_TYPE_CUT_OFF u8 status; + #define CQ_BASE_STATUS_OK 0x0UL + #define CQ_BASE_STATUS_BAD_RESPONSE_ERR 0x1UL + #define CQ_BASE_STATUS_LOCAL_LENGTH_ERR 0x2UL + #define CQ_BASE_STATUS_HW_LOCAL_LENGTH_ERR 0x3UL + #define CQ_BASE_STATUS_LOCAL_QP_OPERATION_ERR 0x4UL + #define CQ_BASE_STATUS_LOCAL_PROTECTION_ERR 0x5UL + #define CQ_BASE_STATUS_LOCAL_ACCESS_ERROR 0x6UL + #define CQ_BASE_STATUS_MEMORY_MGT_OPERATION_ERR 0x7UL + #define CQ_BASE_STATUS_REMOTE_INVALID_REQUEST_ERR 0x8UL + #define CQ_BASE_STATUS_REMOTE_ACCESS_ERR 0x9UL + #define CQ_BASE_STATUS_REMOTE_OPERATION_ERR 0xaUL + #define CQ_BASE_STATUS_RNR_NAK_RETRY_CNT_ERR 0xbUL + #define CQ_BASE_STATUS_TRANSPORT_RETRY_CNT_ERR 0xcUL + #define CQ_BASE_STATUS_WORK_REQUEST_FLUSHED_ERR 0xdUL + #define CQ_BASE_STATUS_HW_FLUSH_ERR 0xeUL + #define CQ_BASE_STATUS_OVERFLOW_ERR 0xfUL + #define CQ_BASE_STATUS_LAST CQ_BASE_STATUS_OVERFLOW_ERR __le16 reserved16; - __le32 reserved32; + __le32 opaque; }; /* cq_req (size:256b/32B) */ @@ -4384,6 +4427,8 @@ struct cq_cutoff { #define CQ_CUTOFF_CQE_TYPE_SFT 1 #define CQ_CUTOFF_CQE_TYPE_CUT_OFF (0xfUL << 1) #define CQ_CUTOFF_CQE_TYPE_LAST CQ_CUTOFF_CQE_TYPE_CUT_OFF + #define CQ_CUTOFF_RESIZE_TOGGLE_MASK 0x60UL + #define CQ_CUTOFF_RESIZE_TOGGLE_SFT 5 u8 status; #define CQ_CUTOFF_STATUS_OK 0x0UL #define CQ_CUTOFF_STATUS_LAST CQ_CUTOFF_STATUS_OK @@ -4435,6 +4480,8 @@ struct nq_srq_event { #define NQ_SRQ_EVENT_TYPE_SFT 0 #define NQ_SRQ_EVENT_TYPE_SRQ_EVENT 0x32UL #define NQ_SRQ_EVENT_TYPE_LAST NQ_SRQ_EVENT_TYPE_SRQ_EVENT + #define NQ_SRQ_EVENT_TOGGLE_MASK 0xc0UL + #define NQ_SRQ_EVENT_TOGGLE_SFT 6 u8 event; #define NQ_SRQ_EVENT_EVENT_SRQ_THRESHOLD_EVENT 0x1UL #define NQ_SRQ_EVENT_EVENT_LAST NQ_SRQ_EVENT_EVENT_SRQ_THRESHOLD_EVENT diff --git a/drivers/infiniband/hw/efa/efa.h b/drivers/infiniband/hw/efa/efa.h index 7352a1f5d811..e2bdec32ae80 100644 --- a/drivers/infiniband/hw/efa/efa.h +++ b/drivers/infiniband/hw/efa/efa.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */ /* - * Copyright 2018-2021 Amazon.com, Inc. or its affiliates. All rights reserved. + * Copyright 2018-2024 Amazon.com, Inc. or its affiliates. All rights reserved. */ #ifndef _EFA_H_ @@ -80,9 +80,19 @@ struct efa_pd { u16 pdn; }; +struct efa_mr_interconnect_info { + u16 recv_ic_id; + u16 rdma_read_ic_id; + u16 rdma_recv_ic_id; + u8 recv_ic_id_valid : 1; + u8 rdma_read_ic_id_valid : 1; + u8 rdma_recv_ic_id_valid : 1; +}; + struct efa_mr { struct ib_mr ibmr; struct ib_umem *umem; + struct efa_mr_interconnect_info ic_info; }; struct efa_cq { diff --git a/drivers/infiniband/hw/efa/efa_admin_cmds_defs.h b/drivers/infiniband/hw/efa/efa_admin_cmds_defs.h index 9c65bd27bae0..7377c8a9f4d5 100644 --- a/drivers/infiniband/hw/efa/efa_admin_cmds_defs.h +++ b/drivers/infiniband/hw/efa/efa_admin_cmds_defs.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */ /* - * Copyright 2018-2023 Amazon.com, Inc. or its affiliates. All rights reserved. + * Copyright 2018-2024 Amazon.com, Inc. or its affiliates. All rights reserved. */ #ifndef _EFA_ADMIN_CMDS_H_ @@ -415,6 +415,32 @@ struct efa_admin_reg_mr_resp { * memory region */ u32 r_key; + + /* + * Mask indicating which fields have valid values + * 0 : recv_ic_id + * 1 : rdma_read_ic_id + * 2 : rdma_recv_ic_id + */ + u8 validity; + + /* + * Physical interconnect used by the device to reach the MR for receive + * operation + */ + u8 recv_ic_id; + + /* + * Physical interconnect used by the device to reach the MR for RDMA + * read operation + */ + u8 rdma_read_ic_id; + + /* + * Physical interconnect used by the device to reach the MR for RDMA + * write receive + */ + u8 rdma_recv_ic_id; }; struct efa_admin_dereg_mr_cmd { @@ -999,6 +1025,11 @@ struct efa_admin_host_info { #define EFA_ADMIN_REG_MR_CMD_REMOTE_WRITE_ENABLE_MASK BIT(1) #define EFA_ADMIN_REG_MR_CMD_REMOTE_READ_ENABLE_MASK BIT(2) +/* reg_mr_resp */ +#define EFA_ADMIN_REG_MR_RESP_RECV_IC_ID_MASK BIT(0) +#define EFA_ADMIN_REG_MR_RESP_RDMA_READ_IC_ID_MASK BIT(1) +#define EFA_ADMIN_REG_MR_RESP_RDMA_RECV_IC_ID_MASK BIT(2) + /* create_cq_cmd */ #define EFA_ADMIN_CREATE_CQ_CMD_INTERRUPT_MODE_ENABLED_MASK BIT(5) #define EFA_ADMIN_CREATE_CQ_CMD_VIRT_MASK BIT(6) diff --git a/drivers/infiniband/hw/efa/efa_com_cmd.c b/drivers/infiniband/hw/efa/efa_com_cmd.c index 576811885d59..d3398c7b0bd0 100644 --- a/drivers/infiniband/hw/efa/efa_com_cmd.c +++ b/drivers/infiniband/hw/efa/efa_com_cmd.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause /* - * Copyright 2018-2023 Amazon.com, Inc. or its affiliates. All rights reserved. + * Copyright 2018-2024 Amazon.com, Inc. or its affiliates. All rights reserved. */ #include "efa_com.h" @@ -270,6 +270,15 @@ int efa_com_register_mr(struct efa_com_dev *edev, result->l_key = cmd_completion.l_key; result->r_key = cmd_completion.r_key; + result->ic_info.recv_ic_id = cmd_completion.recv_ic_id; + result->ic_info.rdma_read_ic_id = cmd_completion.rdma_read_ic_id; + result->ic_info.rdma_recv_ic_id = cmd_completion.rdma_recv_ic_id; + result->ic_info.recv_ic_id_valid = EFA_GET(&cmd_completion.validity, + EFA_ADMIN_REG_MR_RESP_RECV_IC_ID); + result->ic_info.rdma_read_ic_id_valid = EFA_GET(&cmd_completion.validity, + EFA_ADMIN_REG_MR_RESP_RDMA_READ_IC_ID); + result->ic_info.rdma_recv_ic_id_valid = EFA_GET(&cmd_completion.validity, + EFA_ADMIN_REG_MR_RESP_RDMA_RECV_IC_ID); return 0; } diff --git a/drivers/infiniband/hw/efa/efa_com_cmd.h b/drivers/infiniband/hw/efa/efa_com_cmd.h index fc97f37bb39b..720a99ba0f7d 100644 --- a/drivers/infiniband/hw/efa/efa_com_cmd.h +++ b/drivers/infiniband/hw/efa/efa_com_cmd.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */ /* - * Copyright 2018-2023 Amazon.com, Inc. or its affiliates. All rights reserved. + * Copyright 2018-2024 Amazon.com, Inc. or its affiliates. All rights reserved. */ #ifndef _EFA_COM_CMD_H_ @@ -199,6 +199,15 @@ struct efa_com_reg_mr_params { u8 indirect; }; +struct efa_com_mr_interconnect_info { + u16 recv_ic_id; + u16 rdma_read_ic_id; + u16 rdma_recv_ic_id; + u8 recv_ic_id_valid : 1; + u8 rdma_read_ic_id_valid : 1; + u8 rdma_recv_ic_id_valid : 1; +}; + struct efa_com_reg_mr_result { /* * To be used in conjunction with local buffers references in SQ and @@ -210,6 +219,7 @@ struct efa_com_reg_mr_result { * accessed memory region */ u32 r_key; + struct efa_com_mr_interconnect_info ic_info; }; struct efa_com_dereg_mr_params { diff --git a/drivers/infiniband/hw/efa/efa_main.c b/drivers/infiniband/hw/efa/efa_main.c index 15ee92081118..7b1910a86216 100644 --- a/drivers/infiniband/hw/efa/efa_main.c +++ b/drivers/infiniband/hw/efa/efa_main.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause /* - * Copyright 2018-2022 Amazon.com, Inc. or its affiliates. All rights reserved. + * Copyright 2018-2024 Amazon.com, Inc. or its affiliates. All rights reserved. */ #include <linux/module.h> @@ -9,6 +9,7 @@ #include <linux/version.h> #include <rdma/ib_user_verbs.h> +#include <rdma/uverbs_ioctl.h> #include "efa.h" @@ -36,6 +37,8 @@ MODULE_DEVICE_TABLE(pci, efa_pci_tbl); (BIT(EFA_ADMIN_FATAL_ERROR) | BIT(EFA_ADMIN_WARNING) | \ BIT(EFA_ADMIN_NOTIFICATION) | BIT(EFA_ADMIN_KEEP_ALIVE)) +extern const struct uapi_definition efa_uapi_defs[]; + /* This handler will called for unknown event group or unimplemented handlers */ static void unimplemented_aenq_handler(void *data, struct efa_admin_aenq_entry *aenq_e) @@ -432,6 +435,8 @@ static int efa_ib_device_add(struct efa_dev *dev) ib_set_device_ops(&dev->ibdev, &efa_dev_ops); + dev->ibdev.driver_def = efa_uapi_defs; + err = ib_register_device(&dev->ibdev, "efa_%d", &pdev->dev); if (err) goto err_destroy_eqs; diff --git a/drivers/infiniband/hw/efa/efa_verbs.c b/drivers/infiniband/hw/efa/efa_verbs.c index 0f8ca99d0827..2f412db2edcd 100644 --- a/drivers/infiniband/hw/efa/efa_verbs.c +++ b/drivers/infiniband/hw/efa/efa_verbs.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB /* - * Copyright 2018-2023 Amazon.com, Inc. or its affiliates. All rights reserved. + * Copyright 2018-2024 Amazon.com, Inc. or its affiliates. All rights reserved. */ #include <linux/dma-buf.h> @@ -13,6 +13,9 @@ #include <rdma/ib_user_verbs.h> #include <rdma/ib_verbs.h> #include <rdma/uverbs_ioctl.h> +#define UVERBS_MODULE_NAME efa_ib +#include <rdma/uverbs_named_ioctl.h> +#include <rdma/ib_user_ioctl_cmds.h> #include "efa.h" #include "efa_io_defs.h" @@ -1653,6 +1656,12 @@ static int efa_register_mr(struct ib_pd *ibpd, struct efa_mr *mr, u64 start, mr->ibmr.lkey = result.l_key; mr->ibmr.rkey = result.r_key; mr->ibmr.length = length; + mr->ic_info.recv_ic_id = result.ic_info.recv_ic_id; + mr->ic_info.rdma_read_ic_id = result.ic_info.rdma_read_ic_id; + mr->ic_info.rdma_recv_ic_id = result.ic_info.rdma_recv_ic_id; + mr->ic_info.recv_ic_id_valid = result.ic_info.recv_ic_id_valid; + mr->ic_info.rdma_read_ic_id_valid = result.ic_info.rdma_read_ic_id_valid; + mr->ic_info.rdma_recv_ic_id_valid = result.ic_info.rdma_recv_ic_id_valid; ibdev_dbg(&dev->ibdev, "Registered mr[%d]\n", mr->ibmr.lkey); return 0; @@ -1735,6 +1744,39 @@ err_out: return ERR_PTR(err); } +static int UVERBS_HANDLER(EFA_IB_METHOD_MR_QUERY)(struct uverbs_attr_bundle *attrs) +{ + struct ib_mr *ibmr = uverbs_attr_get_obj(attrs, EFA_IB_ATTR_QUERY_MR_HANDLE); + struct efa_mr *mr = to_emr(ibmr); + u16 ic_id_validity = 0; + int ret; + + ret = uverbs_copy_to(attrs, EFA_IB_ATTR_QUERY_MR_RESP_RECV_IC_ID, + &mr->ic_info.recv_ic_id, sizeof(mr->ic_info.recv_ic_id)); + if (ret) + return ret; + + ret = uverbs_copy_to(attrs, EFA_IB_ATTR_QUERY_MR_RESP_RDMA_READ_IC_ID, + &mr->ic_info.rdma_read_ic_id, sizeof(mr->ic_info.rdma_read_ic_id)); + if (ret) + return ret; + + ret = uverbs_copy_to(attrs, EFA_IB_ATTR_QUERY_MR_RESP_RDMA_RECV_IC_ID, + &mr->ic_info.rdma_recv_ic_id, sizeof(mr->ic_info.rdma_recv_ic_id)); + if (ret) + return ret; + + if (mr->ic_info.recv_ic_id_valid) + ic_id_validity |= EFA_QUERY_MR_VALIDITY_RECV_IC_ID; + if (mr->ic_info.rdma_read_ic_id_valid) + ic_id_validity |= EFA_QUERY_MR_VALIDITY_RDMA_READ_IC_ID; + if (mr->ic_info.rdma_recv_ic_id_valid) + ic_id_validity |= EFA_QUERY_MR_VALIDITY_RDMA_RECV_IC_ID; + + return uverbs_copy_to(attrs, EFA_IB_ATTR_QUERY_MR_RESP_IC_ID_VALIDITY, + &ic_id_validity, sizeof(ic_id_validity)); +} + int efa_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata) { struct efa_dev *dev = to_edev(ibmr->device); @@ -2157,3 +2199,30 @@ enum rdma_link_layer efa_port_link_layer(struct ib_device *ibdev, return IB_LINK_LAYER_UNSPECIFIED; } +DECLARE_UVERBS_NAMED_METHOD(EFA_IB_METHOD_MR_QUERY, + UVERBS_ATTR_IDR(EFA_IB_ATTR_QUERY_MR_HANDLE, + UVERBS_OBJECT_MR, + UVERBS_ACCESS_READ, + UA_MANDATORY), + UVERBS_ATTR_PTR_OUT(EFA_IB_ATTR_QUERY_MR_RESP_IC_ID_VALIDITY, + UVERBS_ATTR_TYPE(u16), + UA_MANDATORY), + UVERBS_ATTR_PTR_OUT(EFA_IB_ATTR_QUERY_MR_RESP_RECV_IC_ID, + UVERBS_ATTR_TYPE(u16), + UA_MANDATORY), + UVERBS_ATTR_PTR_OUT(EFA_IB_ATTR_QUERY_MR_RESP_RDMA_READ_IC_ID, + UVERBS_ATTR_TYPE(u16), + UA_MANDATORY), + UVERBS_ATTR_PTR_OUT(EFA_IB_ATTR_QUERY_MR_RESP_RDMA_RECV_IC_ID, + UVERBS_ATTR_TYPE(u16), + UA_MANDATORY)); + +ADD_UVERBS_METHODS(efa_mr, + UVERBS_OBJECT_MR, + &UVERBS_METHOD(EFA_IB_METHOD_MR_QUERY)); + +const struct uapi_definition efa_uapi_defs[] = { + UAPI_DEF_CHAIN_OBJ_TREE(UVERBS_OBJECT_MR, + &efa_mr), + {}, +}; diff --git a/drivers/infiniband/hw/erdma/erdma.h b/drivers/infiniband/hw/erdma/erdma.h index f190111840e9..5df401a30cb9 100644 --- a/drivers/infiniband/hw/erdma/erdma.h +++ b/drivers/infiniband/hw/erdma/erdma.h @@ -212,6 +212,8 @@ struct erdma_dev { atomic_t num_ctx; struct list_head cep_list; + + struct dma_pool *resp_pool; }; static inline void *get_queue_entry(void *qbuf, u32 idx, u32 depth, u32 shift) diff --git a/drivers/infiniband/hw/erdma/erdma_hw.h b/drivers/infiniband/hw/erdma/erdma_hw.h index a155519a862f..3212a1222760 100644 --- a/drivers/infiniband/hw/erdma/erdma_hw.h +++ b/drivers/infiniband/hw/erdma/erdma_hw.h @@ -144,6 +144,7 @@ enum CMDQ_COMMON_OPCODE { CMDQ_OPCODE_DESTROY_EQ = 1, CMDQ_OPCODE_QUERY_FW_INFO = 2, CMDQ_OPCODE_CONF_MTU = 3, + CMDQ_OPCODE_GET_STATS = 4, CMDQ_OPCODE_CONF_DEVICE = 5, CMDQ_OPCODE_ALLOC_DB = 8, CMDQ_OPCODE_FREE_DB = 9, @@ -355,6 +356,44 @@ struct erdma_cmdq_reflush_req { u32 rq_pi; }; +#define ERDMA_HW_RESP_SIZE 256 + +struct erdma_cmdq_query_req { + u64 hdr; + u32 rsvd; + u32 index; + + u64 target_addr; + u32 target_length; +}; + +#define ERDMA_HW_RESP_MAGIC 0x5566 + +struct erdma_cmdq_query_resp_hdr { + u16 magic; + u8 ver; + u8 length; + + u32 index; + u32 rsvd[2]; +}; + +struct erdma_cmdq_query_stats_resp { + struct erdma_cmdq_query_resp_hdr hdr; + + u64 tx_req_cnt; + u64 tx_packets_cnt; + u64 tx_bytes_cnt; + u64 tx_drop_packets_cnt; + u64 tx_bps_meter_drop_packets_cnt; + u64 tx_pps_meter_drop_packets_cnt; + u64 rx_packets_cnt; + u64 rx_bytes_cnt; + u64 rx_drop_packets_cnt; + u64 rx_bps_meter_drop_packets_cnt; + u64 rx_pps_meter_drop_packets_cnt; +}; + /* cap qword 0 definition */ #define ERDMA_CMD_DEV_CAP_MAX_CQE_MASK GENMASK_ULL(47, 40) #define ERDMA_CMD_DEV_CAP_FLAGS_MASK GENMASK_ULL(31, 24) diff --git a/drivers/infiniband/hw/erdma/erdma_main.c b/drivers/infiniband/hw/erdma/erdma_main.c index 0880c79a978c..472939172f0c 100644 --- a/drivers/infiniband/hw/erdma/erdma_main.c +++ b/drivers/infiniband/hw/erdma/erdma_main.c @@ -172,14 +172,30 @@ static int erdma_device_init(struct erdma_dev *dev, struct pci_dev *pdev) { int ret; + dev->resp_pool = dma_pool_create("erdma_resp_pool", &pdev->dev, + ERDMA_HW_RESP_SIZE, ERDMA_HW_RESP_SIZE, + 0); + if (!dev->resp_pool) + return -ENOMEM; + ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(ERDMA_PCI_WIDTH)); if (ret) - return ret; + goto destroy_pool; dma_set_max_seg_size(&pdev->dev, UINT_MAX); return 0; + +destroy_pool: + dma_pool_destroy(dev->resp_pool); + + return ret; +} + +static void erdma_device_uninit(struct erdma_dev *dev) +{ + dma_pool_destroy(dev->resp_pool); } static void erdma_hw_reset(struct erdma_dev *dev) @@ -273,7 +289,7 @@ static int erdma_probe_dev(struct pci_dev *pdev) err = erdma_request_vectors(dev); if (err) - goto err_iounmap_func_bar; + goto err_uninit_device; err = erdma_comm_irq_init(dev); if (err) @@ -314,6 +330,9 @@ err_uninit_comm_irq: err_free_vectors: pci_free_irq_vectors(dev->pdev); +err_uninit_device: + erdma_device_uninit(dev); + err_iounmap_func_bar: devm_iounmap(&pdev->dev, dev->func_bar); @@ -339,6 +358,7 @@ static void erdma_remove_dev(struct pci_dev *pdev) erdma_aeq_destroy(dev); erdma_comm_irq_uninit(dev); pci_free_irq_vectors(dev->pdev); + erdma_device_uninit(dev); devm_iounmap(&pdev->dev, dev->func_bar); pci_release_selected_regions(pdev, ERDMA_BAR_MASK); @@ -448,6 +468,7 @@ static const struct ib_device_ops erdma_device_ops = { .driver_id = RDMA_DRIVER_ERDMA, .uverbs_abi_ver = ERDMA_ABI_VERSION, + .alloc_hw_port_stats = erdma_alloc_hw_port_stats, .alloc_mr = erdma_ib_alloc_mr, .alloc_pd = erdma_alloc_pd, .alloc_ucontext = erdma_alloc_ucontext, @@ -459,6 +480,7 @@ static const struct ib_device_ops erdma_device_ops = { .destroy_cq = erdma_destroy_cq, .destroy_qp = erdma_destroy_qp, .get_dma_mr = erdma_get_dma_mr, + .get_hw_stats = erdma_get_hw_stats, .get_port_immutable = erdma_get_port_immutable, .iw_accept = erdma_accept, .iw_add_ref = erdma_qp_get_ref, diff --git a/drivers/infiniband/hw/erdma/erdma_verbs.c b/drivers/infiniband/hw/erdma/erdma_verbs.c index c317947563fb..23dfc01603f8 100644 --- a/drivers/infiniband/hw/erdma/erdma_verbs.c +++ b/drivers/infiniband/hw/erdma/erdma_verbs.c @@ -1708,3 +1708,93 @@ void erdma_port_event(struct erdma_dev *dev, enum ib_event_type reason) ib_dispatch_event(&event); } + +enum counters { + ERDMA_STATS_TX_REQS_CNT, + ERDMA_STATS_TX_PACKETS_CNT, + ERDMA_STATS_TX_BYTES_CNT, + ERDMA_STATS_TX_DISABLE_DROP_CNT, + ERDMA_STATS_TX_BPS_METER_DROP_CNT, + ERDMA_STATS_TX_PPS_METER_DROP_CNT, + + ERDMA_STATS_RX_PACKETS_CNT, + ERDMA_STATS_RX_BYTES_CNT, + ERDMA_STATS_RX_DISABLE_DROP_CNT, + ERDMA_STATS_RX_BPS_METER_DROP_CNT, + ERDMA_STATS_RX_PPS_METER_DROP_CNT, + + ERDMA_STATS_MAX +}; + +static const struct rdma_stat_desc erdma_descs[] = { + [ERDMA_STATS_TX_REQS_CNT].name = "tx_reqs_cnt", + [ERDMA_STATS_TX_PACKETS_CNT].name = "tx_packets_cnt", + [ERDMA_STATS_TX_BYTES_CNT].name = "tx_bytes_cnt", + [ERDMA_STATS_TX_DISABLE_DROP_CNT].name = "tx_disable_drop_cnt", + [ERDMA_STATS_TX_BPS_METER_DROP_CNT].name = "tx_bps_limit_drop_cnt", + [ERDMA_STATS_TX_PPS_METER_DROP_CNT].name = "tx_pps_limit_drop_cnt", + [ERDMA_STATS_RX_PACKETS_CNT].name = "rx_packets_cnt", + [ERDMA_STATS_RX_BYTES_CNT].name = "rx_bytes_cnt", + [ERDMA_STATS_RX_DISABLE_DROP_CNT].name = "rx_disable_drop_cnt", + [ERDMA_STATS_RX_BPS_METER_DROP_CNT].name = "rx_bps_limit_drop_cnt", + [ERDMA_STATS_RX_PPS_METER_DROP_CNT].name = "rx_pps_limit_drop_cnt", +}; + +struct rdma_hw_stats *erdma_alloc_hw_port_stats(struct ib_device *device, + u32 port_num) +{ + return rdma_alloc_hw_stats_struct(erdma_descs, ERDMA_STATS_MAX, + RDMA_HW_STATS_DEFAULT_LIFESPAN); +} + +static int erdma_query_hw_stats(struct erdma_dev *dev, + struct rdma_hw_stats *stats) +{ + struct erdma_cmdq_query_stats_resp *resp; + struct erdma_cmdq_query_req req; + dma_addr_t dma_addr; + int err; + + erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_COMMON, + CMDQ_OPCODE_GET_STATS); + + resp = dma_pool_zalloc(dev->resp_pool, GFP_KERNEL, &dma_addr); + if (!resp) + return -ENOMEM; + + req.target_addr = dma_addr; + req.target_length = ERDMA_HW_RESP_SIZE; + + err = erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL); + if (err) + goto out; + + if (resp->hdr.magic != ERDMA_HW_RESP_MAGIC) { + err = -EINVAL; + goto out; + } + + memcpy(&stats->value[0], &resp->tx_req_cnt, + sizeof(u64) * stats->num_counters); + +out: + dma_pool_free(dev->resp_pool, resp, dma_addr); + + return err; +} + +int erdma_get_hw_stats(struct ib_device *ibdev, struct rdma_hw_stats *stats, + u32 port, int index) +{ + struct erdma_dev *dev = to_edev(ibdev); + int ret; + + if (port == 0) + return 0; + + ret = erdma_query_hw_stats(dev, stats); + if (ret) + return ret; + + return stats->num_counters; +} diff --git a/drivers/infiniband/hw/erdma/erdma_verbs.h b/drivers/infiniband/hw/erdma/erdma_verbs.h index eb9c0f92fb6f..db6018529ccc 100644 --- a/drivers/infiniband/hw/erdma/erdma_verbs.h +++ b/drivers/infiniband/hw/erdma/erdma_verbs.h @@ -361,5 +361,9 @@ int erdma_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents, unsigned int *sg_offset); void erdma_port_event(struct erdma_dev *dev, enum ib_event_type reason); void erdma_set_mtu(struct erdma_dev *dev, u32 mtu); +struct rdma_hw_stats *erdma_alloc_hw_port_stats(struct ib_device *device, + u32 port_num); +int erdma_get_hw_stats(struct ib_device *ibdev, struct rdma_hw_stats *stats, + u32 port, int index); #endif diff --git a/drivers/infiniband/hw/hfi1/user_exp_rcv.c b/drivers/infiniband/hw/hfi1/user_exp_rcv.c index 6419872f95cf..cf2d29098406 100644 --- a/drivers/infiniband/hw/hfi1/user_exp_rcv.c +++ b/drivers/infiniband/hw/hfi1/user_exp_rcv.c @@ -491,8 +491,8 @@ int hfi1_user_exp_rcv_clear(struct hfi1_filedata *fd, if (unlikely(tinfo->tidcnt > fd->tid_used)) return -EINVAL; - tidinfo = memdup_user(u64_to_user_ptr(tinfo->tidlist), - sizeof(tidinfo[0]) * tinfo->tidcnt); + tidinfo = memdup_array_user(u64_to_user_ptr(tinfo->tidlist), + tinfo->tidcnt, sizeof(tidinfo[0])); if (IS_ERR(tidinfo)) return PTR_ERR(tidinfo); diff --git a/drivers/infiniband/hw/hfi1/user_sdma.c b/drivers/infiniband/hw/hfi1/user_sdma.c index 6a4aa59c0cbc..b72625283fcf 100644 --- a/drivers/infiniband/hw/hfi1/user_sdma.c +++ b/drivers/infiniband/hw/hfi1/user_sdma.c @@ -494,8 +494,8 @@ int hfi1_user_sdma_process_request(struct hfi1_filedata *fd, * equal to the pkt count. However, there is no way to * tell at this point. */ - tmp = memdup_user(iovec[idx].iov_base, - ntids * sizeof(*req->tids)); + tmp = memdup_array_user(iovec[idx].iov_base, + ntids, sizeof(*req->tids)); if (IS_ERR(tmp)) { ret = PTR_ERR(tmp); SDMA_DBG(req, "Failed to copy %d TIDs (%d)", diff --git a/drivers/infiniband/hw/hns/Makefile b/drivers/infiniband/hw/hns/Makefile index a7d259238305..be1e1cdbcfa8 100644 --- a/drivers/infiniband/hw/hns/Makefile +++ b/drivers/infiniband/hw/hns/Makefile @@ -7,7 +7,8 @@ ccflags-y := -I $(srctree)/drivers/net/ethernet/hisilicon/hns3 hns-roce-objs := hns_roce_main.o hns_roce_cmd.o hns_roce_pd.o \ hns_roce_ah.o hns_roce_hem.o hns_roce_mr.o hns_roce_qp.o \ - hns_roce_cq.o hns_roce_alloc.o hns_roce_db.o hns_roce_srq.o hns_roce_restrack.o + hns_roce_cq.o hns_roce_alloc.o hns_roce_db.o hns_roce_srq.o hns_roce_restrack.o \ + hns_roce_debugfs.o ifdef CONFIG_INFINIBAND_HNS_HIP08 hns-roce-hw-v2-objs := hns_roce_hw_v2.o $(hns-roce-objs) diff --git a/drivers/infiniband/hw/hns/hns_roce_ah.c b/drivers/infiniband/hw/hns/hns_roce_ah.c index 3df032ddda18..b4209b6aed8d 100644 --- a/drivers/infiniband/hw/hns/hns_roce_ah.c +++ b/drivers/infiniband/hw/hns/hns_roce_ah.c @@ -57,6 +57,7 @@ int hns_roce_create_ah(struct ib_ah *ibah, struct rdma_ah_init_attr *init_attr, struct rdma_ah_attr *ah_attr = init_attr->ah_attr; const struct ib_global_route *grh = rdma_ah_read_grh(ah_attr); struct hns_roce_dev *hr_dev = to_hr_dev(ibah->device); + struct hns_roce_ib_create_ah_resp resp = {}; struct hns_roce_ah *ah = to_hr_ah(ibah); int ret = 0; u32 max_sl; @@ -92,11 +93,21 @@ int hns_roce_create_ah(struct ib_ah *ibah, struct rdma_ah_init_attr *init_attr, ret = rdma_read_gid_l2_fields(ah_attr->grh.sgid_attr, &ah->av.vlan_id, NULL); if (ret) - return ret; + goto err_out; ah->av.vlan_en = ah->av.vlan_id < VLAN_N_VID; } + if (udata) { + memcpy(resp.dmac, ah_attr->roce.dmac, ETH_ALEN); + ret = ib_copy_to_udata(udata, &resp, + min(udata->outlen, sizeof(resp))); + } + +err_out: + if (ret) + atomic64_inc(&hr_dev->dfx_cnt[HNS_ROCE_DFX_AH_CREATE_ERR_CNT]); + return ret; } diff --git a/drivers/infiniband/hw/hns/hns_roce_cmd.c b/drivers/infiniband/hw/hns/hns_roce_cmd.c index 864413607571..873e8a69a1b9 100644 --- a/drivers/infiniband/hw/hns/hns_roce_cmd.c +++ b/drivers/infiniband/hw/hns/hns_roce_cmd.c @@ -41,7 +41,15 @@ static int hns_roce_cmd_mbox_post_hw(struct hns_roce_dev *hr_dev, struct hns_roce_mbox_msg *mbox_msg) { - return hr_dev->hw->post_mbox(hr_dev, mbox_msg); + int ret; + + ret = hr_dev->hw->post_mbox(hr_dev, mbox_msg); + if (ret) + return ret; + + atomic64_inc(&hr_dev->dfx_cnt[HNS_ROCE_DFX_MBX_POSTED_CNT]); + + return 0; } /* this should be called with "poll_sem" */ @@ -58,7 +66,13 @@ static int __hns_roce_cmd_mbox_poll(struct hns_roce_dev *hr_dev, return ret; } - return hr_dev->hw->poll_mbox_done(hr_dev); + ret = hr_dev->hw->poll_mbox_done(hr_dev); + if (ret) + return ret; + + atomic64_inc(&hr_dev->dfx_cnt[HNS_ROCE_DFX_MBX_POLLED_CNT]); + + return 0; } static int hns_roce_cmd_mbox_poll(struct hns_roce_dev *hr_dev, @@ -89,6 +103,7 @@ void hns_roce_cmd_event(struct hns_roce_dev *hr_dev, u16 token, u8 status, context->result = (status == HNS_ROCE_CMD_SUCCESS) ? 0 : (-EIO); context->out_param = out_param; complete(&context->done); + atomic64_inc(&hr_dev->dfx_cnt[HNS_ROCE_DFX_MBX_EVENT_CNT]); } static int __hns_roce_cmd_mbox_wait(struct hns_roce_dev *hr_dev, diff --git a/drivers/infiniband/hw/hns/hns_roce_cq.c b/drivers/infiniband/hw/hns/hns_roce_cq.c index 736dc2f993b4..1b6d16af8c12 100644 --- a/drivers/infiniband/hw/hns/hns_roce_cq.c +++ b/drivers/infiniband/hw/hns/hns_roce_cq.c @@ -363,29 +363,31 @@ int hns_roce_create_cq(struct ib_cq *ib_cq, const struct ib_cq_init_attr *attr, struct hns_roce_ib_create_cq ucmd = {}; int ret; - if (attr->flags) - return -EOPNOTSUPP; + if (attr->flags) { + ret = -EOPNOTSUPP; + goto err_out; + } ret = verify_cq_create_attr(hr_dev, attr); if (ret) - return ret; + goto err_out; if (udata) { ret = get_cq_ucmd(hr_cq, udata, &ucmd); if (ret) - return ret; + goto err_out; } set_cq_param(hr_cq, attr->cqe, attr->comp_vector, &ucmd); ret = set_cqe_size(hr_cq, udata, &ucmd); if (ret) - return ret; + goto err_out; ret = alloc_cq_buf(hr_dev, hr_cq, udata, ucmd.buf_addr); if (ret) { ibdev_err(ibdev, "failed to alloc CQ buf, ret = %d.\n", ret); - return ret; + goto err_out; } ret = alloc_cq_db(hr_dev, hr_cq, udata, ucmd.db_addr, &resp); @@ -430,6 +432,9 @@ err_cq_db: free_cq_db(hr_dev, hr_cq, udata); err_cq_buf: free_cq_buf(hr_dev, hr_cq); +err_out: + atomic64_inc(&hr_dev->dfx_cnt[HNS_ROCE_DFX_CQ_CREATE_ERR_CNT]); + return ret; } diff --git a/drivers/infiniband/hw/hns/hns_roce_debugfs.c b/drivers/infiniband/hw/hns/hns_roce_debugfs.c new file mode 100644 index 000000000000..e8febb40f645 --- /dev/null +++ b/drivers/infiniband/hw/hns/hns_roce_debugfs.c @@ -0,0 +1,110 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * Copyright (c) 2023 Hisilicon Limited. + */ + +#include <linux/debugfs.h> +#include <linux/device.h> + +#include "hns_roce_device.h" + +static struct dentry *hns_roce_dbgfs_root; + +static int hns_debugfs_seqfile_open(struct inode *inode, struct file *f) +{ + struct hns_debugfs_seqfile *seqfile = inode->i_private; + + return single_open(f, seqfile->read, seqfile->data); +} + +static const struct file_operations hns_debugfs_seqfile_fops = { + .owner = THIS_MODULE, + .open = hns_debugfs_seqfile_open, + .release = single_release, + .read = seq_read, + .llseek = seq_lseek +}; + +static void init_debugfs_seqfile(struct hns_debugfs_seqfile *seq, + const char *name, struct dentry *parent, + int (*read_fn)(struct seq_file *, void *), + void *data) +{ + debugfs_create_file(name, 0400, parent, seq, &hns_debugfs_seqfile_fops); + + seq->read = read_fn; + seq->data = data; +} + +static const char * const sw_stat_info[] = { + [HNS_ROCE_DFX_AEQE_CNT] = "aeqe", + [HNS_ROCE_DFX_CEQE_CNT] = "ceqe", + [HNS_ROCE_DFX_CMDS_CNT] = "cmds", + [HNS_ROCE_DFX_CMDS_ERR_CNT] = "cmds_err", + [HNS_ROCE_DFX_MBX_POSTED_CNT] = "posted_mbx", + [HNS_ROCE_DFX_MBX_POLLED_CNT] = "polled_mbx", + [HNS_ROCE_DFX_MBX_EVENT_CNT] = "mbx_event", + [HNS_ROCE_DFX_QP_CREATE_ERR_CNT] = "qp_create_err", + [HNS_ROCE_DFX_QP_MODIFY_ERR_CNT] = "qp_modify_err", + [HNS_ROCE_DFX_CQ_CREATE_ERR_CNT] = "cq_create_err", + [HNS_ROCE_DFX_CQ_MODIFY_ERR_CNT] = "cq_modify_err", + [HNS_ROCE_DFX_SRQ_CREATE_ERR_CNT] = "srq_create_err", + [HNS_ROCE_DFX_SRQ_MODIFY_ERR_CNT] = "srq_modify_err", + [HNS_ROCE_DFX_XRCD_ALLOC_ERR_CNT] = "xrcd_alloc_err", + [HNS_ROCE_DFX_MR_REG_ERR_CNT] = "mr_reg_err", + [HNS_ROCE_DFX_MR_REREG_ERR_CNT] = "mr_rereg_err", + [HNS_ROCE_DFX_AH_CREATE_ERR_CNT] = "ah_create_err", + [HNS_ROCE_DFX_MMAP_ERR_CNT] = "mmap_err", + [HNS_ROCE_DFX_UCTX_ALLOC_ERR_CNT] = "uctx_alloc_err", +}; + +static int sw_stat_debugfs_show(struct seq_file *file, void *offset) +{ + struct hns_roce_dev *hr_dev = file->private; + int i; + + for (i = 0; i < HNS_ROCE_DFX_CNT_TOTAL; i++) + seq_printf(file, "%-20s --- %lld\n", sw_stat_info[i], + atomic64_read(&hr_dev->dfx_cnt[i])); + + return 0; +} + +static void create_sw_stat_debugfs(struct hns_roce_dev *hr_dev, + struct dentry *parent) +{ + struct hns_sw_stat_debugfs *dbgfs = &hr_dev->dbgfs.sw_stat_root; + + dbgfs->root = debugfs_create_dir("sw_stat", parent); + + init_debugfs_seqfile(&dbgfs->sw_stat, "sw_stat", dbgfs->root, + sw_stat_debugfs_show, hr_dev); +} + +/* debugfs for device */ +void hns_roce_register_debugfs(struct hns_roce_dev *hr_dev) +{ + struct hns_roce_dev_debugfs *dbgfs = &hr_dev->dbgfs; + + dbgfs->root = debugfs_create_dir(dev_name(&hr_dev->ib_dev.dev), + hns_roce_dbgfs_root); + + create_sw_stat_debugfs(hr_dev, dbgfs->root); +} + +void hns_roce_unregister_debugfs(struct hns_roce_dev *hr_dev) +{ + debugfs_remove_recursive(hr_dev->dbgfs.root); +} + +/* debugfs for hns module */ +void hns_roce_init_debugfs(void) +{ + hns_roce_dbgfs_root = debugfs_create_dir("hns_roce", NULL); +} + +void hns_roce_cleanup_debugfs(void) +{ + debugfs_remove_recursive(hns_roce_dbgfs_root); + hns_roce_dbgfs_root = NULL; +} diff --git a/drivers/infiniband/hw/hns/hns_roce_debugfs.h b/drivers/infiniband/hw/hns/hns_roce_debugfs.h new file mode 100644 index 000000000000..98e87bd3161e --- /dev/null +++ b/drivers/infiniband/hw/hns/hns_roce_debugfs.h @@ -0,0 +1,33 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ +/* + * Copyright (c) 2023 Hisilicon Limited. + */ + +#ifndef __HNS_ROCE_DEBUGFS_H +#define __HNS_ROCE_DEBUGFS_H + +/* debugfs seqfile */ +struct hns_debugfs_seqfile { + int (*read)(struct seq_file *seq, void *data); + void *data; +}; + +struct hns_sw_stat_debugfs { + struct dentry *root; + struct hns_debugfs_seqfile sw_stat; +}; + +/* Debugfs for device */ +struct hns_roce_dev_debugfs { + struct dentry *root; + struct hns_sw_stat_debugfs sw_stat_root; +}; + +struct hns_roce_dev; + +void hns_roce_init_debugfs(void); +void hns_roce_cleanup_debugfs(void); +void hns_roce_register_debugfs(struct hns_roce_dev *hr_dev); +void hns_roce_unregister_debugfs(struct hns_roce_dev *hr_dev); + +#endif diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index 1627f3b0ef28..b1fce5ddf631 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -35,6 +35,7 @@ #include <rdma/ib_verbs.h> #include <rdma/hns-abi.h> +#include "hns_roce_debugfs.h" #define PCI_REVISION_ID_HIP08 0x21 #define PCI_REVISION_ID_HIP09 0x30 @@ -869,6 +870,29 @@ enum hns_roce_hw_pkt_stat_index { HNS_ROCE_HW_CNT_TOTAL }; +enum hns_roce_sw_dfx_stat_index { + HNS_ROCE_DFX_AEQE_CNT, + HNS_ROCE_DFX_CEQE_CNT, + HNS_ROCE_DFX_CMDS_CNT, + HNS_ROCE_DFX_CMDS_ERR_CNT, + HNS_ROCE_DFX_MBX_POSTED_CNT, + HNS_ROCE_DFX_MBX_POLLED_CNT, + HNS_ROCE_DFX_MBX_EVENT_CNT, + HNS_ROCE_DFX_QP_CREATE_ERR_CNT, + HNS_ROCE_DFX_QP_MODIFY_ERR_CNT, + HNS_ROCE_DFX_CQ_CREATE_ERR_CNT, + HNS_ROCE_DFX_CQ_MODIFY_ERR_CNT, + HNS_ROCE_DFX_SRQ_CREATE_ERR_CNT, + HNS_ROCE_DFX_SRQ_MODIFY_ERR_CNT, + HNS_ROCE_DFX_XRCD_ALLOC_ERR_CNT, + HNS_ROCE_DFX_MR_REG_ERR_CNT, + HNS_ROCE_DFX_MR_REREG_ERR_CNT, + HNS_ROCE_DFX_AH_CREATE_ERR_CNT, + HNS_ROCE_DFX_MMAP_ERR_CNT, + HNS_ROCE_DFX_UCTX_ALLOC_ERR_CNT, + HNS_ROCE_DFX_CNT_TOTAL +}; + struct hns_roce_hw { int (*cmq_init)(struct hns_roce_dev *hr_dev); void (*cmq_exit)(struct hns_roce_dev *hr_dev); @@ -979,6 +1003,8 @@ struct hns_roce_dev { u32 is_vf; u32 cong_algo_tmpl_id; u64 dwqe_page; + struct hns_roce_dev_debugfs dbgfs; + atomic64_t *dfx_cnt; }; static inline struct hns_roce_dev *to_hr_dev(struct ib_device *ib_dev) diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 2bca9560f32d..8206daea6767 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -1297,6 +1297,8 @@ static int __hns_roce_cmq_send(struct hns_roce_dev *hr_dev, /* Write to hardware */ roce_write(hr_dev, ROCEE_TX_CMQ_PI_REG, csq->head); + atomic64_inc(&hr_dev->dfx_cnt[HNS_ROCE_DFX_CMDS_CNT]); + do { if (hns_roce_cmq_csq_done(hr_dev)) break; @@ -1334,6 +1336,9 @@ static int __hns_roce_cmq_send(struct hns_roce_dev *hr_dev, spin_unlock_bh(&csq->lock); + if (ret) + atomic64_inc(&hr_dev->dfx_cnt[HNS_ROCE_DFX_CMDS_ERR_CNT]); + return ret; } @@ -2055,6 +2060,7 @@ static void set_hem_page_size(struct hns_roce_dev *hr_dev) /* Apply all loaded caps before setting to hardware */ static void apply_func_caps(struct hns_roce_dev *hr_dev) { +#define MAX_GID_TBL_LEN 256 struct hns_roce_caps *caps = &hr_dev->caps; struct hns_roce_v2_priv *priv = hr_dev->priv; @@ -2090,8 +2096,14 @@ static void apply_func_caps(struct hns_roce_dev *hr_dev) caps->gmv_entry_sz = HNS_ROCE_V3_GMV_ENTRY_SZ; caps->gmv_hop_num = HNS_ROCE_HOP_NUM_0; - caps->gid_table_len[0] = caps->gmv_bt_num * - (HNS_HW_PAGE_SIZE / caps->gmv_entry_sz); + + /* It's meaningless to support excessively large gid_table_len, + * as the type of sgid_index in kernel struct ib_global_route + * and userspace struct ibv_global_route are u8/uint8_t (0-255). + */ + caps->gid_table_len[0] = min_t(u32, MAX_GID_TBL_LEN, + caps->gmv_bt_num * + (HNS_HW_PAGE_SIZE / caps->gmv_entry_sz)); caps->gmv_entry_num = caps->gmv_bt_num * (PAGE_SIZE / caps->gmv_entry_sz); @@ -2698,6 +2710,10 @@ static int free_mr_alloc_res(struct hns_roce_dev *hr_dev) return 0; create_failed_qp: + for (i--; i >= 0; i--) { + hns_roce_v2_destroy_qp(&free_mr->rsv_qp[i]->ibqp, NULL); + kfree(free_mr->rsv_qp[i]); + } hns_roce_destroy_cq(cq, NULL); kfree(cq); @@ -5667,19 +5683,25 @@ static int hns_roce_v2_modify_srq(struct ib_srq *ibsrq, struct hns_roce_srq_context *srq_context; struct hns_roce_srq_context *srqc_mask; struct hns_roce_cmd_mailbox *mailbox; - int ret; + int ret = 0; /* Resizing SRQs is not supported yet */ - if (srq_attr_mask & IB_SRQ_MAX_WR) - return -EINVAL; + if (srq_attr_mask & IB_SRQ_MAX_WR) { + ret = -EOPNOTSUPP; + goto out; + } if (srq_attr_mask & IB_SRQ_LIMIT) { - if (srq_attr->srq_limit > srq->wqe_cnt) - return -EINVAL; + if (srq_attr->srq_limit > srq->wqe_cnt) { + ret = -EINVAL; + goto out; + } mailbox = hns_roce_alloc_cmd_mailbox(hr_dev); - if (IS_ERR(mailbox)) - return PTR_ERR(mailbox); + if (IS_ERR(mailbox)) { + ret = PTR_ERR(mailbox); + goto out; + } srq_context = mailbox->buf; srqc_mask = (struct hns_roce_srq_context *)mailbox->buf + 1; @@ -5692,15 +5714,17 @@ static int hns_roce_v2_modify_srq(struct ib_srq *ibsrq, ret = hns_roce_cmd_mbox(hr_dev, mailbox->dma, 0, HNS_ROCE_CMD_MODIFY_SRQC, srq->srqn); hns_roce_free_cmd_mailbox(hr_dev, mailbox); - if (ret) { + if (ret) ibdev_err(&hr_dev->ib_dev, "failed to handle cmd of modifying SRQ, ret = %d.\n", ret); - return ret; - } } - return 0; +out: + if (ret) + atomic64_inc(&hr_dev->dfx_cnt[HNS_ROCE_DFX_SRQ_MODIFY_ERR_CNT]); + + return ret; } static int hns_roce_v2_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr) @@ -5744,8 +5768,9 @@ static int hns_roce_v2_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period) int ret; mailbox = hns_roce_alloc_cmd_mailbox(hr_dev); - if (IS_ERR(mailbox)) - return PTR_ERR(mailbox); + ret = PTR_ERR_OR_ZERO(mailbox); + if (ret) + goto err_out; cq_context = mailbox->buf; cqc_mask = (struct hns_roce_v2_cq_context *)mailbox->buf + 1; @@ -5775,6 +5800,10 @@ static int hns_roce_v2_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period) "failed to process cmd when modifying CQ, ret = %d.\n", ret); +err_out: + if (ret) + atomic64_inc(&hr_dev->dfx_cnt[HNS_ROCE_DFX_CQ_MODIFY_ERR_CNT]); + return ret; } @@ -6014,6 +6043,8 @@ static irqreturn_t hns_roce_v2_aeq_int(struct hns_roce_dev *hr_dev, ++eq->cons_index; aeqe_found = IRQ_HANDLED; + atomic64_inc(&hr_dev->dfx_cnt[HNS_ROCE_DFX_AEQE_CNT]); + hns_roce_v2_init_irq_work(hr_dev, eq, queue_num); aeqe = next_aeqe_sw_v2(eq); @@ -6055,6 +6086,7 @@ static irqreturn_t hns_roce_v2_ceq_int(struct hns_roce_dev *hr_dev, ++eq->cons_index; ceqe_found = IRQ_HANDLED; + atomic64_inc(&hr_dev->dfx_cnt[HNS_ROCE_DFX_CEQE_CNT]); ceqe = next_ceqe_sw_v2(eq); } @@ -6457,15 +6489,16 @@ static int __hns_roce_request_irq(struct hns_roce_dev *hr_dev, int irq_num, /* irq contains: abnormal + AEQ + CEQ */ for (j = 0; j < other_num; j++) snprintf((char *)hr_dev->irq_names[j], HNS_ROCE_INT_NAME_LEN, - "hns-abn-%d", j); + "hns-%s-abn-%d", pci_name(hr_dev->pci_dev), j); for (j = other_num; j < (other_num + aeq_num); j++) snprintf((char *)hr_dev->irq_names[j], HNS_ROCE_INT_NAME_LEN, - "hns-aeq-%d", j - other_num); + "hns-%s-aeq-%d", pci_name(hr_dev->pci_dev), j - other_num); for (j = (other_num + aeq_num); j < irq_num; j++) snprintf((char *)hr_dev->irq_names[j], HNS_ROCE_INT_NAME_LEN, - "hns-ceq-%d", j - other_num - aeq_num); + "hns-%s-ceq-%d", pci_name(hr_dev->pci_dev), + j - other_num - aeq_num); for (j = 0; j < irq_num; j++) { if (j < other_num) @@ -6963,12 +6996,14 @@ static struct hnae3_client hns_roce_hw_v2_client = { static int __init hns_roce_hw_v2_init(void) { + hns_roce_init_debugfs(); return hnae3_register_client(&hns_roce_hw_v2_client); } static void __exit hns_roce_hw_v2_exit(void) { hnae3_unregister_client(&hns_roce_hw_v2_client); + hns_roce_cleanup_debugfs(); } module_init(hns_roce_hw_v2_init); diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c index a4a10a4e1aab..b55fe6911f9f 100644 --- a/drivers/infiniband/hw/hns/hns_roce_main.c +++ b/drivers/infiniband/hw/hns/hns_roce_main.c @@ -361,10 +361,10 @@ static int hns_roce_alloc_ucontext(struct ib_ucontext *uctx, struct hns_roce_dev *hr_dev = to_hr_dev(uctx->device); struct hns_roce_ib_alloc_ucontext_resp resp = {}; struct hns_roce_ib_alloc_ucontext ucmd = {}; - int ret; + int ret = -EAGAIN; if (!hr_dev->active) - return -EAGAIN; + goto error_out; resp.qp_tab_size = hr_dev->caps.num_qps; resp.srq_tab_size = hr_dev->caps.num_srqs; @@ -372,7 +372,7 @@ static int hns_roce_alloc_ucontext(struct ib_ucontext *uctx, ret = ib_copy_from_udata(&ucmd, udata, min(udata->inlen, sizeof(ucmd))); if (ret) - return ret; + goto error_out; if (hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP09) context->config = ucmd.config & HNS_ROCE_EXSGE_FLAGS; @@ -396,7 +396,7 @@ static int hns_roce_alloc_ucontext(struct ib_ucontext *uctx, ret = hns_roce_uar_alloc(hr_dev, &context->uar); if (ret) - goto error_fail_uar_alloc; + goto error_out; ret = hns_roce_alloc_uar_entry(uctx); if (ret) @@ -423,7 +423,9 @@ error_fail_copy_to_udata: error_fail_uar_entry: ida_free(&hr_dev->uar_ida.ida, (int)context->uar.logic_idx); -error_fail_uar_alloc: +error_out: + atomic64_inc(&hr_dev->dfx_cnt[HNS_ROCE_DFX_UCTX_ALLOC_ERR_CNT]); + return ret; } @@ -439,6 +441,7 @@ static void hns_roce_dealloc_ucontext(struct ib_ucontext *ibcontext) static int hns_roce_mmap(struct ib_ucontext *uctx, struct vm_area_struct *vma) { + struct hns_roce_dev *hr_dev = to_hr_dev(uctx->device); struct rdma_user_mmap_entry *rdma_entry; struct hns_user_mmap_entry *entry; phys_addr_t pfn; @@ -446,8 +449,10 @@ static int hns_roce_mmap(struct ib_ucontext *uctx, struct vm_area_struct *vma) int ret; rdma_entry = rdma_user_mmap_entry_get_pgoff(uctx, vma->vm_pgoff); - if (!rdma_entry) + if (!rdma_entry) { + atomic64_inc(&hr_dev->dfx_cnt[HNS_ROCE_DFX_MMAP_ERR_CNT]); return -EINVAL; + } entry = to_hns_mmap(rdma_entry); pfn = entry->address >> PAGE_SHIFT; @@ -467,6 +472,9 @@ static int hns_roce_mmap(struct ib_ucontext *uctx, struct vm_area_struct *vma) out: rdma_user_mmap_entry_put(rdma_entry); + if (ret) + atomic64_inc(&hr_dev->dfx_cnt[HNS_ROCE_DFX_MMAP_ERR_CNT]); + return ret; } @@ -1009,6 +1017,21 @@ void hns_roce_handle_device_err(struct hns_roce_dev *hr_dev) spin_unlock_irqrestore(&hr_dev->qp_list_lock, flags); } +static int hns_roce_alloc_dfx_cnt(struct hns_roce_dev *hr_dev) +{ + hr_dev->dfx_cnt = kvcalloc(HNS_ROCE_DFX_CNT_TOTAL, sizeof(atomic64_t), + GFP_KERNEL); + if (!hr_dev->dfx_cnt) + return -ENOMEM; + + return 0; +} + +static void hns_roce_dealloc_dfx_cnt(struct hns_roce_dev *hr_dev) +{ + kvfree(hr_dev->dfx_cnt); +} + int hns_roce_init(struct hns_roce_dev *hr_dev) { struct device *dev = hr_dev->dev; @@ -1016,11 +1039,15 @@ int hns_roce_init(struct hns_roce_dev *hr_dev) hr_dev->is_reset = false; + ret = hns_roce_alloc_dfx_cnt(hr_dev); + if (ret) + return ret; + if (hr_dev->hw->cmq_init) { ret = hr_dev->hw->cmq_init(hr_dev); if (ret) { dev_err(dev, "init RoCE Command Queue failed!\n"); - return ret; + goto error_failed_alloc_dfx_cnt; } } @@ -1079,6 +1106,8 @@ int hns_roce_init(struct hns_roce_dev *hr_dev) if (ret) goto error_failed_register_device; + hns_roce_register_debugfs(hr_dev); + return 0; error_failed_register_device: @@ -1103,11 +1132,15 @@ error_failed_cmd_init: if (hr_dev->hw->cmq_exit) hr_dev->hw->cmq_exit(hr_dev); +error_failed_alloc_dfx_cnt: + hns_roce_dealloc_dfx_cnt(hr_dev); + return ret; } void hns_roce_exit(struct hns_roce_dev *hr_dev) { + hns_roce_unregister_debugfs(hr_dev); hns_roce_unregister_device(hr_dev); if (hr_dev->hw->hw_exit) @@ -1122,6 +1155,7 @@ void hns_roce_exit(struct hns_roce_dev *hr_dev) hns_roce_cmd_cleanup(hr_dev); if (hr_dev->hw->cmq_exit) hr_dev->hw->cmq_exit(hr_dev); + hns_roce_dealloc_dfx_cnt(hr_dev); } MODULE_LICENSE("Dual BSD/GPL"); diff --git a/drivers/infiniband/hw/hns/hns_roce_mr.c b/drivers/infiniband/hw/hns/hns_roce_mr.c index 14376490ac22..91cd580480fe 100644 --- a/drivers/infiniband/hw/hns/hns_roce_mr.c +++ b/drivers/infiniband/hw/hns/hns_roce_mr.c @@ -228,8 +228,10 @@ struct ib_mr *hns_roce_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, int ret; mr = kzalloc(sizeof(*mr), GFP_KERNEL); - if (!mr) - return ERR_PTR(-ENOMEM); + if (!mr) { + ret = -ENOMEM; + goto err_out; + } mr->iova = virt_addr; mr->size = length; @@ -259,6 +261,9 @@ err_alloc_key: free_mr_key(hr_dev, mr); err_alloc_mr: kfree(mr); +err_out: + atomic64_inc(&hr_dev->dfx_cnt[HNS_ROCE_DFX_MR_REG_ERR_CNT]); + return ERR_PTR(ret); } @@ -274,12 +279,15 @@ struct ib_mr *hns_roce_rereg_user_mr(struct ib_mr *ibmr, int flags, u64 start, unsigned long mtpt_idx; int ret; - if (!mr->enabled) - return ERR_PTR(-EINVAL); + if (!mr->enabled) { + ret = -EINVAL; + goto err_out; + } mailbox = hns_roce_alloc_cmd_mailbox(hr_dev); - if (IS_ERR(mailbox)) - return ERR_CAST(mailbox); + ret = PTR_ERR_OR_ZERO(mailbox); + if (ret) + goto err_out; mtpt_idx = key_to_hw_index(mr->key) & (hr_dev->caps.num_mtpts - 1); @@ -331,8 +339,12 @@ struct ib_mr *hns_roce_rereg_user_mr(struct ib_mr *ibmr, int flags, u64 start, free_cmd_mbox: hns_roce_free_cmd_mailbox(hr_dev, mailbox); - if (ret) +err_out: + if (ret) { + atomic64_inc(&hr_dev->dfx_cnt[HNS_ROCE_DFX_MR_REREG_ERR_CNT]); return ERR_PTR(ret); + } + return NULL; } @@ -674,7 +686,7 @@ static int mtr_alloc_bufs(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, mtr->kmem = NULL; mtr->umem = ib_umem_get(ibdev, user_addr, total_size, buf_attr->user_access); - if (IS_ERR_OR_NULL(mtr->umem)) { + if (IS_ERR(mtr->umem)) { ibdev_err(ibdev, "failed to get umem, ret = %ld.\n", PTR_ERR(mtr->umem)); return -ENOMEM; diff --git a/drivers/infiniband/hw/hns/hns_roce_pd.c b/drivers/infiniband/hw/hns/hns_roce_pd.c index 783e71852c50..d35cf59d0f43 100644 --- a/drivers/infiniband/hw/hns/hns_roce_pd.c +++ b/drivers/infiniband/hw/hns/hns_roce_pd.c @@ -149,14 +149,18 @@ int hns_roce_alloc_xrcd(struct ib_xrcd *ib_xrcd, struct ib_udata *udata) struct hns_roce_xrcd *xrcd = to_hr_xrcd(ib_xrcd); int ret; - if (!(hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_XRC)) - return -EINVAL; + if (!(hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_XRC)) { + ret = -EOPNOTSUPP; + goto err_out; + } ret = hns_roce_xrcd_alloc(hr_dev, &xrcd->xrcdn); + +err_out: if (ret) - return ret; + atomic64_inc(&hr_dev->dfx_cnt[HNS_ROCE_DFX_XRCD_ALLOC_ERR_CNT]); - return 0; + return ret; } int hns_roce_dealloc_xrcd(struct ib_xrcd *ib_xrcd, struct ib_udata *udata) diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c index 828b58534aa9..31b147210688 100644 --- a/drivers/infiniband/hw/hns/hns_roce_qp.c +++ b/drivers/infiniband/hw/hns/hns_roce_qp.c @@ -1216,7 +1216,7 @@ int hns_roce_create_qp(struct ib_qp *qp, struct ib_qp_init_attr *init_attr, ret = check_qp_type(hr_dev, init_attr->qp_type, !!udata); if (ret) - return ret; + goto err_out; if (init_attr->qp_type == IB_QPT_XRC_TGT) hr_qp->xrcdn = to_hr_xrcd(init_attr->xrcd)->xrcdn; @@ -1231,6 +1231,10 @@ int hns_roce_create_qp(struct ib_qp *qp, struct ib_qp_init_attr *init_attr, ibdev_err(ibdev, "create QP type 0x%x failed(%d)\n", init_attr->qp_type, ret); +err_out: + if (ret) + atomic64_inc(&hr_dev->dfx_cnt[HNS_ROCE_DFX_QP_CREATE_ERR_CNT]); + return ret; } @@ -1366,6 +1370,8 @@ int hns_roce_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, out: mutex_unlock(&hr_qp->mutex); + if (ret) + atomic64_inc(&hr_dev->dfx_cnt[HNS_ROCE_DFX_QP_MODIFY_ERR_CNT]); return ret; } diff --git a/drivers/infiniband/hw/hns/hns_roce_srq.c b/drivers/infiniband/hw/hns/hns_roce_srq.c index 4e2d1c8e164a..4abae9477854 100644 --- a/drivers/infiniband/hw/hns/hns_roce_srq.c +++ b/drivers/infiniband/hw/hns/hns_roce_srq.c @@ -475,11 +475,11 @@ int hns_roce_create_srq(struct ib_srq *ib_srq, ret = set_srq_param(srq, init_attr, udata); if (ret) - return ret; + goto err_out; ret = alloc_srq_buf(hr_dev, srq, udata); if (ret) - return ret; + goto err_out; ret = alloc_srq_db(hr_dev, srq, udata, &resp); if (ret) @@ -517,6 +517,8 @@ err_srq_db: free_srq_db(hr_dev, srq, udata); err_srq_buf: free_srq_buf(hr_dev, srq); +err_out: + atomic64_inc(&hr_dev->dfx_cnt[HNS_ROCE_DFX_SRQ_CREATE_ERR_CNT]); return ret; } diff --git a/drivers/infiniband/hw/irdma/utils.c b/drivers/infiniband/hw/irdma/utils.c index 916bfe2a91eb..0422787592d8 100644 --- a/drivers/infiniband/hw/irdma/utils.c +++ b/drivers/infiniband/hw/irdma/utils.c @@ -1393,17 +1393,12 @@ int irdma_ieq_check_mpacrc(struct shash_desc *desc, void *addr, u32 len, u32 val) { u32 crc = 0; - int ret; - int ret_code = 0; - crypto_shash_init(desc); - ret = crypto_shash_update(desc, addr, len); - if (!ret) - crypto_shash_final(desc, (u8 *)&crc); + crypto_shash_digest(desc, addr, len, (u8 *)&crc); if (crc != val) - ret_code = -EINVAL; + return -EINVAL; - return ret_code; + return 0; } /** diff --git a/drivers/infiniband/hw/mana/cq.c b/drivers/infiniband/hw/mana/cq.c index d141cab8a1e6..83ebd070535a 100644 --- a/drivers/infiniband/hw/mana/cq.c +++ b/drivers/infiniband/hw/mana/cq.c @@ -12,13 +12,20 @@ int mana_ib_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, struct ib_device *ibdev = ibcq->device; struct mana_ib_create_cq ucmd = {}; struct mana_ib_dev *mdev; + struct gdma_context *gc; int err; mdev = container_of(ibdev, struct mana_ib_dev, ib_dev); + gc = mdev->gdma_dev->gdma_context; if (udata->inlen < sizeof(ucmd)) return -EINVAL; + if (attr->comp_vector > gc->max_num_queues) + return -EINVAL; + + cq->comp_vector = attr->comp_vector; + err = ib_copy_from_udata(&ucmd, udata, min(sizeof(ucmd), udata->inlen)); if (err) { ibdev_dbg(ibdev, @@ -26,7 +33,7 @@ int mana_ib_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, return err; } - if (attr->cqe > MAX_SEND_BUFFERS_PER_QUEUE) { + if (attr->cqe > mdev->adapter_caps.max_qp_wr) { ibdev_dbg(ibdev, "CQE %d exceeding limit\n", attr->cqe); return -EINVAL; } @@ -56,6 +63,7 @@ int mana_ib_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, /* * The CQ ID is not known at this time. The ID is generated at create_qp */ + cq->id = INVALID_QUEUE_ID; return 0; @@ -69,11 +77,33 @@ int mana_ib_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata) struct mana_ib_cq *cq = container_of(ibcq, struct mana_ib_cq, ibcq); struct ib_device *ibdev = ibcq->device; struct mana_ib_dev *mdev; + struct gdma_context *gc; + int err; mdev = container_of(ibdev, struct mana_ib_dev, ib_dev); + gc = mdev->gdma_dev->gdma_context; + + err = mana_ib_gd_destroy_dma_region(mdev, cq->gdma_region); + if (err) { + ibdev_dbg(ibdev, + "Failed to destroy dma region, %d\n", err); + return err; + } + + if (cq->id != INVALID_QUEUE_ID) { + kfree(gc->cq_table[cq->id]); + gc->cq_table[cq->id] = NULL; + } - mana_ib_gd_destroy_dma_region(mdev, cq->gdma_region); ib_umem_release(cq->umem); return 0; } + +void mana_ib_cq_handler(void *ctx, struct gdma_queue *gdma_cq) +{ + struct mana_ib_cq *cq = ctx; + + if (cq->ibcq.comp_handler) + cq->ibcq.comp_handler(&cq->ibcq, cq->ibcq.cq_context); +} diff --git a/drivers/infiniband/hw/mana/device.c b/drivers/infiniband/hw/mana/device.c index d4541b8707e4..6fa902ee80a6 100644 --- a/drivers/infiniband/hw/mana/device.c +++ b/drivers/infiniband/hw/mana/device.c @@ -68,7 +68,6 @@ static int mana_ib_probe(struct auxiliary_device *adev, ibdev_dbg(&dev->ib_dev, "mdev=%p id=%d num_ports=%d\n", mdev, mdev->dev_id.as_uint32, dev->ib_dev.phys_port_cnt); - dev->gdma_dev = mdev; dev->ib_dev.node_type = RDMA_NODE_IB_CA; /* @@ -78,16 +77,35 @@ static int mana_ib_probe(struct auxiliary_device *adev, dev->ib_dev.num_comp_vectors = 1; dev->ib_dev.dev.parent = mdev->gdma_context->dev; - ret = ib_register_device(&dev->ib_dev, "mana_%d", - mdev->gdma_context->dev); + ret = mana_gd_register_device(&mdev->gdma_context->mana_ib); + if (ret) { + ibdev_err(&dev->ib_dev, "Failed to register device, ret %d", + ret); + goto free_ib_device; + } + dev->gdma_dev = &mdev->gdma_context->mana_ib; + + ret = mana_ib_gd_query_adapter_caps(dev); if (ret) { - ib_dealloc_device(&dev->ib_dev); - return ret; + ibdev_err(&dev->ib_dev, "Failed to query device caps, ret %d", + ret); + goto deregister_device; } + ret = ib_register_device(&dev->ib_dev, "mana_%d", + mdev->gdma_context->dev); + if (ret) + goto deregister_device; + dev_set_drvdata(&adev->dev, dev); return 0; + +deregister_device: + mana_gd_deregister_device(dev->gdma_dev); +free_ib_device: + ib_dealloc_device(&dev->ib_dev); + return ret; } static void mana_ib_remove(struct auxiliary_device *adev) @@ -95,6 +113,9 @@ static void mana_ib_remove(struct auxiliary_device *adev) struct mana_ib_dev *dev = dev_get_drvdata(&adev->dev); ib_unregister_device(&dev->ib_dev); + + mana_gd_deregister_device(dev->gdma_dev); + ib_dealloc_device(&dev->ib_dev); } diff --git a/drivers/infiniband/hw/mana/main.c b/drivers/infiniband/hw/mana/main.c index 7be4c3adb4e2..faca092456fa 100644 --- a/drivers/infiniband/hw/mana/main.c +++ b/drivers/infiniband/hw/mana/main.c @@ -8,7 +8,7 @@ void mana_ib_uncfg_vport(struct mana_ib_dev *dev, struct mana_ib_pd *pd, u32 port) { - struct gdma_dev *gd = dev->gdma_dev; + struct gdma_dev *gd = &dev->gdma_dev->gdma_context->mana; struct mana_port_context *mpc; struct net_device *ndev; struct mana_context *mc; @@ -31,7 +31,7 @@ void mana_ib_uncfg_vport(struct mana_ib_dev *dev, struct mana_ib_pd *pd, int mana_ib_cfg_vport(struct mana_ib_dev *dev, u32 port, struct mana_ib_pd *pd, u32 doorbell_id) { - struct gdma_dev *mdev = dev->gdma_dev; + struct gdma_dev *mdev = &dev->gdma_dev->gdma_context->mana; struct mana_port_context *mpc; struct mana_context *mc; struct net_device *ndev; @@ -486,20 +486,17 @@ int mana_ib_get_port_immutable(struct ib_device *ibdev, u32 port_num, int mana_ib_query_device(struct ib_device *ibdev, struct ib_device_attr *props, struct ib_udata *uhw) { - props->max_qp = MANA_MAX_NUM_QUEUES; - props->max_qp_wr = MAX_SEND_BUFFERS_PER_QUEUE; - - /* - * max_cqe could be potentially much bigger. - * As this version of driver only support RAW QP, set it to the same - * value as max_qp_wr - */ - props->max_cqe = MAX_SEND_BUFFERS_PER_QUEUE; - + struct mana_ib_dev *dev = container_of(ibdev, + struct mana_ib_dev, ib_dev); + + props->max_qp = dev->adapter_caps.max_qp_count; + props->max_qp_wr = dev->adapter_caps.max_qp_wr; + props->max_cq = dev->adapter_caps.max_cq_count; + props->max_cqe = dev->adapter_caps.max_qp_wr; + props->max_mr = dev->adapter_caps.max_mr_count; props->max_mr_size = MANA_IB_MAX_MR_SIZE; - props->max_mr = MANA_IB_MAX_MR; - props->max_send_sge = MAX_TX_WQE_SGL_ENTRIES; - props->max_recv_sge = MAX_RX_WQE_SGL_ENTRIES; + props->max_send_sge = dev->adapter_caps.max_send_sge_count; + props->max_recv_sge = dev->adapter_caps.max_recv_sge_count; return 0; } @@ -521,3 +518,45 @@ int mana_ib_query_gid(struct ib_device *ibdev, u32 port, int index, void mana_ib_disassociate_ucontext(struct ib_ucontext *ibcontext) { } + +int mana_ib_gd_query_adapter_caps(struct mana_ib_dev *dev) +{ + struct mana_ib_adapter_caps *caps = &dev->adapter_caps; + struct mana_ib_query_adapter_caps_resp resp = {}; + struct mana_ib_query_adapter_caps_req req = {}; + int err; + + mana_gd_init_req_hdr(&req.hdr, MANA_IB_GET_ADAPTER_CAP, sizeof(req), + sizeof(resp)); + req.hdr.resp.msg_version = GDMA_MESSAGE_V3; + req.hdr.dev_id = dev->gdma_dev->dev_id; + + err = mana_gd_send_request(dev->gdma_dev->gdma_context, sizeof(req), + &req, sizeof(resp), &resp); + + if (err) { + ibdev_err(&dev->ib_dev, + "Failed to query adapter caps err %d", err); + return err; + } + + caps->max_sq_id = resp.max_sq_id; + caps->max_rq_id = resp.max_rq_id; + caps->max_cq_id = resp.max_cq_id; + caps->max_qp_count = resp.max_qp_count; + caps->max_cq_count = resp.max_cq_count; + caps->max_mr_count = resp.max_mr_count; + caps->max_pd_count = resp.max_pd_count; + caps->max_inbound_read_limit = resp.max_inbound_read_limit; + caps->max_outbound_read_limit = resp.max_outbound_read_limit; + caps->mw_count = resp.mw_count; + caps->max_srq_count = resp.max_srq_count; + caps->max_qp_wr = min_t(u32, + resp.max_requester_sq_size / GDMA_MAX_SQE_SIZE, + resp.max_requester_rq_size / GDMA_MAX_RQE_SIZE); + caps->max_inline_data_size = resp.max_inline_data_size; + caps->max_send_sge_count = resp.max_send_sge_count; + caps->max_recv_sge_count = resp.max_recv_sge_count; + + return 0; +} diff --git a/drivers/infiniband/hw/mana/mana_ib.h b/drivers/infiniband/hw/mana/mana_ib.h index 502cc8672eef..6bdc0f5498d5 100644 --- a/drivers/infiniband/hw/mana/mana_ib.h +++ b/drivers/infiniband/hw/mana/mana_ib.h @@ -27,9 +27,28 @@ */ #define MANA_IB_MAX_MR 0xFFFFFFu +struct mana_ib_adapter_caps { + u32 max_sq_id; + u32 max_rq_id; + u32 max_cq_id; + u32 max_qp_count; + u32 max_cq_count; + u32 max_mr_count; + u32 max_pd_count; + u32 max_inbound_read_limit; + u32 max_outbound_read_limit; + u32 mw_count; + u32 max_srq_count; + u32 max_qp_wr; + u32 max_send_sge_count; + u32 max_recv_sge_count; + u32 max_inline_data_size; +}; + struct mana_ib_dev { struct ib_device ib_dev; struct gdma_dev *gdma_dev; + struct mana_ib_adapter_caps adapter_caps; }; struct mana_ib_wq { @@ -67,6 +86,7 @@ struct mana_ib_cq { int cqe; u64 gdma_region; u64 id; + u32 comp_vector; }; struct mana_ib_qp { @@ -92,6 +112,36 @@ struct mana_ib_rwq_ind_table { struct ib_rwq_ind_table ib_ind_table; }; +enum mana_ib_command_code { + MANA_IB_GET_ADAPTER_CAP = 0x30001, +}; + +struct mana_ib_query_adapter_caps_req { + struct gdma_req_hdr hdr; +}; /*HW Data */ + +struct mana_ib_query_adapter_caps_resp { + struct gdma_resp_hdr hdr; + u32 max_sq_id; + u32 max_rq_id; + u32 max_cq_id; + u32 max_qp_count; + u32 max_cq_count; + u32 max_mr_count; + u32 max_pd_count; + u32 max_inbound_read_limit; + u32 max_outbound_read_limit; + u32 mw_count; + u32 max_srq_count; + u32 max_requester_sq_size; + u32 max_responder_sq_size; + u32 max_requester_rq_size; + u32 max_responder_rq_size; + u32 max_send_sge_count; + u32 max_recv_sge_count; + u32 max_inline_data_size; +}; /* HW Data */ + int mana_ib_gd_create_dma_region(struct mana_ib_dev *dev, struct ib_umem *umem, mana_handle_t *gdma_region); @@ -159,4 +209,7 @@ int mana_ib_query_gid(struct ib_device *ibdev, u32 port, int index, void mana_ib_disassociate_ucontext(struct ib_ucontext *ibcontext); +int mana_ib_gd_query_adapter_caps(struct mana_ib_dev *mdev); + +void mana_ib_cq_handler(void *ctx, struct gdma_queue *gdma_cq); #endif diff --git a/drivers/infiniband/hw/mana/qp.c b/drivers/infiniband/hw/mana/qp.c index 4b3b5b274e84..21ac9fcadf3f 100644 --- a/drivers/infiniband/hw/mana/qp.c +++ b/drivers/infiniband/hw/mana/qp.c @@ -21,8 +21,8 @@ static int mana_ib_cfg_vport_steering(struct mana_ib_dev *dev, u32 req_buf_size; int i, err; - mdev = dev->gdma_dev; - gc = mdev->gdma_context; + gc = dev->gdma_dev->gdma_context; + mdev = &gc->mana; req_buf_size = sizeof(*req) + sizeof(mana_handle_t) * MANA_INDIRECT_TABLE_SIZE; @@ -102,20 +102,26 @@ static int mana_ib_create_qp_rss(struct ib_qp *ibqp, struct ib_pd *pd, struct ib_rwq_ind_table *ind_tbl = attr->rwq_ind_tbl; struct mana_ib_create_qp_rss_resp resp = {}; struct mana_ib_create_qp_rss ucmd = {}; - struct gdma_dev *gd = mdev->gdma_dev; + struct gdma_queue **gdma_cq_allocated; mana_handle_t *mana_ind_table; struct mana_port_context *mpc; + struct gdma_queue *gdma_cq; + unsigned int ind_tbl_size; struct mana_context *mc; struct net_device *ndev; + struct gdma_context *gc; struct mana_ib_cq *cq; struct mana_ib_wq *wq; - unsigned int ind_tbl_size; + struct gdma_dev *gd; + struct mana_eq *eq; struct ib_cq *ibcq; struct ib_wq *ibwq; int i = 0; u32 port; int ret; + gc = mdev->gdma_dev->gdma_context; + gd = &gc->mana; mc = gd->driver_data; if (!udata || udata->inlen < sizeof(ucmd)) @@ -129,7 +135,7 @@ static int mana_ib_create_qp_rss(struct ib_qp *ibqp, struct ib_pd *pd, return ret; } - if (attr->cap.max_recv_wr > MAX_SEND_BUFFERS_PER_QUEUE) { + if (attr->cap.max_recv_wr > mdev->adapter_caps.max_qp_wr) { ibdev_dbg(&mdev->ib_dev, "Requested max_recv_wr %d exceeding limit\n", attr->cap.max_recv_wr); @@ -178,6 +184,13 @@ static int mana_ib_create_qp_rss(struct ib_qp *ibqp, struct ib_pd *pd, goto fail; } + gdma_cq_allocated = kcalloc(ind_tbl_size, sizeof(*gdma_cq_allocated), + GFP_KERNEL); + if (!gdma_cq_allocated) { + ret = -ENOMEM; + goto fail; + } + qp->port = port; for (i = 0; i < ind_tbl_size; i++) { @@ -196,12 +209,16 @@ static int mana_ib_create_qp_rss(struct ib_qp *ibqp, struct ib_pd *pd, cq_spec.gdma_region = cq->gdma_region; cq_spec.queue_size = cq->cqe * COMP_ENTRY_SIZE; cq_spec.modr_ctx_id = 0; - cq_spec.attached_eq = GDMA_CQ_NO_EQ; + eq = &mc->eqs[cq->comp_vector % gc->max_num_queues]; + cq_spec.attached_eq = eq->eq->id; ret = mana_create_wq_obj(mpc, mpc->port_handle, GDMA_RQ, &wq_spec, &cq_spec, &wq->rx_object); - if (ret) + if (ret) { + /* Do cleanup starting with index i-1 */ + i--; goto fail; + } /* The GDMA regions are now owned by the WQ object */ wq->gdma_region = GDMA_INVALID_DMA_REGION; @@ -218,6 +235,21 @@ static int mana_ib_create_qp_rss(struct ib_qp *ibqp, struct ib_pd *pd, resp.entries[i].wqid = wq->id; mana_ind_table[i] = wq->rx_object; + + /* Create CQ table entry */ + WARN_ON(gc->cq_table[cq->id]); + gdma_cq = kzalloc(sizeof(*gdma_cq), GFP_KERNEL); + if (!gdma_cq) { + ret = -ENOMEM; + goto fail; + } + gdma_cq_allocated[i] = gdma_cq; + + gdma_cq->cq.context = cq; + gdma_cq->type = GDMA_CQ; + gdma_cq->cq.callback = mana_ib_cq_handler; + gdma_cq->id = cq->id; + gc->cq_table[cq->id] = gdma_cq; } resp.num_entries = i; @@ -237,6 +269,7 @@ static int mana_ib_create_qp_rss(struct ib_qp *ibqp, struct ib_pd *pd, goto fail; } + kfree(gdma_cq_allocated); kfree(mana_ind_table); return 0; @@ -244,10 +277,17 @@ static int mana_ib_create_qp_rss(struct ib_qp *ibqp, struct ib_pd *pd, fail: while (i-- > 0) { ibwq = ind_tbl->ind_tbl[i]; + ibcq = ibwq->cq; wq = container_of(ibwq, struct mana_ib_wq, ibwq); + cq = container_of(ibcq, struct mana_ib_cq, ibcq); + + gc->cq_table[cq->id] = NULL; + kfree(gdma_cq_allocated[i]); + mana_destroy_wq_obj(mpc, GDMA_RQ, wq->rx_object); } + kfree(gdma_cq_allocated); kfree(mana_ind_table); return ret; @@ -266,17 +306,20 @@ static int mana_ib_create_qp_raw(struct ib_qp *ibqp, struct ib_pd *ibpd, struct mana_ib_ucontext *mana_ucontext = rdma_udata_to_drv_context(udata, struct mana_ib_ucontext, ibucontext); + struct gdma_dev *gd = &mdev->gdma_dev->gdma_context->mana; struct mana_ib_create_qp_resp resp = {}; - struct gdma_dev *gd = mdev->gdma_dev; struct mana_ib_create_qp ucmd = {}; + struct gdma_queue *gdma_cq = NULL; struct mana_obj_spec wq_spec = {}; struct mana_obj_spec cq_spec = {}; struct mana_port_context *mpc; struct mana_context *mc; struct net_device *ndev; struct ib_umem *umem; - int err; + struct mana_eq *eq; + int eq_vec; u32 port; + int err; mc = gd->driver_data; @@ -295,7 +338,7 @@ static int mana_ib_create_qp_raw(struct ib_qp *ibqp, struct ib_pd *ibpd, if (port < 1 || port > mc->num_ports) return -EINVAL; - if (attr->cap.max_send_wr > MAX_SEND_BUFFERS_PER_QUEUE) { + if (attr->cap.max_send_wr > mdev->adapter_caps.max_qp_wr) { ibdev_dbg(&mdev->ib_dev, "Requested max_send_wr %d exceeding limit\n", attr->cap.max_send_wr); @@ -353,7 +396,9 @@ static int mana_ib_create_qp_raw(struct ib_qp *ibqp, struct ib_pd *ibpd, cq_spec.gdma_region = send_cq->gdma_region; cq_spec.queue_size = send_cq->cqe * COMP_ENTRY_SIZE; cq_spec.modr_ctx_id = 0; - cq_spec.attached_eq = GDMA_CQ_NO_EQ; + eq_vec = send_cq->comp_vector % gd->gdma_context->max_num_queues; + eq = &mc->eqs[eq_vec]; + cq_spec.attached_eq = eq->eq->id; err = mana_create_wq_obj(mpc, mpc->port_handle, GDMA_SQ, &wq_spec, &cq_spec, &qp->tx_object); @@ -371,6 +416,20 @@ static int mana_ib_create_qp_raw(struct ib_qp *ibqp, struct ib_pd *ibpd, qp->sq_id = wq_spec.queue_index; send_cq->id = cq_spec.queue_index; + /* Create CQ table entry */ + WARN_ON(gd->gdma_context->cq_table[send_cq->id]); + gdma_cq = kzalloc(sizeof(*gdma_cq), GFP_KERNEL); + if (!gdma_cq) { + err = -ENOMEM; + goto err_destroy_wq_obj; + } + + gdma_cq->cq.context = send_cq; + gdma_cq->type = GDMA_CQ; + gdma_cq->cq.callback = mana_ib_cq_handler; + gdma_cq->id = send_cq->id; + gd->gdma_context->cq_table[send_cq->id] = gdma_cq; + ibdev_dbg(&mdev->ib_dev, "ret %d qp->tx_object 0x%llx sq id %llu cq id %llu\n", err, qp->tx_object, qp->sq_id, send_cq->id); @@ -384,11 +443,15 @@ static int mana_ib_create_qp_raw(struct ib_qp *ibqp, struct ib_pd *ibpd, ibdev_dbg(&mdev->ib_dev, "Failed copy udata for create qp-raw, %d\n", err); - goto err_destroy_wq_obj; + goto err_release_gdma_cq; } return 0; +err_release_gdma_cq: + kfree(gdma_cq); + gd->gdma_context->cq_table[send_cq->id] = NULL; + err_destroy_wq_obj: mana_destroy_wq_obj(mpc, GDMA_SQ, qp->tx_object); @@ -437,7 +500,7 @@ static int mana_ib_destroy_qp_rss(struct mana_ib_qp *qp, { struct mana_ib_dev *mdev = container_of(qp->ibqp.device, struct mana_ib_dev, ib_dev); - struct gdma_dev *gd = mdev->gdma_dev; + struct gdma_dev *gd = &mdev->gdma_dev->gdma_context->mana; struct mana_port_context *mpc; struct mana_context *mc; struct net_device *ndev; @@ -464,7 +527,7 @@ static int mana_ib_destroy_qp_raw(struct mana_ib_qp *qp, struct ib_udata *udata) { struct mana_ib_dev *mdev = container_of(qp->ibqp.device, struct mana_ib_dev, ib_dev); - struct gdma_dev *gd = mdev->gdma_dev; + struct gdma_dev *gd = &mdev->gdma_dev->gdma_context->mana; struct ib_pd *ibpd = qp->ibqp.pd; struct mana_port_context *mpc; struct mana_context *mc; diff --git a/drivers/infiniband/hw/mlx5/dm.c b/drivers/infiniband/hw/mlx5/dm.c index 3669c90b2dad..b4c97fb62abf 100644 --- a/drivers/infiniband/hw/mlx5/dm.c +++ b/drivers/infiniband/hw/mlx5/dm.c @@ -341,6 +341,8 @@ static enum mlx5_sw_icm_type get_icm_type(int uapi_type) return MLX5_SW_ICM_TYPE_HEADER_MODIFY; case MLX5_IB_UAPI_DM_TYPE_HEADER_MODIFY_PATTERN_SW_ICM: return MLX5_SW_ICM_TYPE_HEADER_MODIFY_PATTERN; + case MLX5_IB_UAPI_DM_TYPE_ENCAP_SW_ICM: + return MLX5_SW_ICM_TYPE_SW_ENCAP; case MLX5_IB_UAPI_DM_TYPE_STEERING_SW_ICM: default: return MLX5_SW_ICM_TYPE_STEERING; @@ -364,6 +366,7 @@ static struct ib_dm *handle_alloc_dm_sw_icm(struct ib_ucontext *ctx, switch (type) { case MLX5_IB_UAPI_DM_TYPE_STEERING_SW_ICM: case MLX5_IB_UAPI_DM_TYPE_HEADER_MODIFY_SW_ICM: + case MLX5_IB_UAPI_DM_TYPE_ENCAP_SW_ICM: if (!(MLX5_CAP_FLOWTABLE_NIC_RX(dev, sw_owner) || MLX5_CAP_FLOWTABLE_NIC_TX(dev, sw_owner) || MLX5_CAP_FLOWTABLE_NIC_RX(dev, sw_owner_v2) || @@ -438,6 +441,7 @@ struct ib_dm *mlx5_ib_alloc_dm(struct ib_device *ibdev, case MLX5_IB_UAPI_DM_TYPE_STEERING_SW_ICM: case MLX5_IB_UAPI_DM_TYPE_HEADER_MODIFY_SW_ICM: case MLX5_IB_UAPI_DM_TYPE_HEADER_MODIFY_PATTERN_SW_ICM: + case MLX5_IB_UAPI_DM_TYPE_ENCAP_SW_ICM: return handle_alloc_dm_sw_icm(context, attr, attrs, type); default: return ERR_PTR(-EOPNOTSUPP); @@ -491,6 +495,7 @@ static int mlx5_ib_dealloc_dm(struct ib_dm *ibdm, case MLX5_IB_UAPI_DM_TYPE_STEERING_SW_ICM: case MLX5_IB_UAPI_DM_TYPE_HEADER_MODIFY_SW_ICM: case MLX5_IB_UAPI_DM_TYPE_HEADER_MODIFY_PATTERN_SW_ICM: + case MLX5_IB_UAPI_DM_TYPE_ENCAP_SW_ICM: return mlx5_dm_icm_dealloc(ctx, to_icm(ibdm)); default: return -EOPNOTSUPP; diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 650a15b6cfbc..c2b557e64290 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -818,6 +818,17 @@ static int mlx5_query_node_desc(struct mlx5_ib_dev *dev, char *node_desc) MLX5_REG_NODE_DESC, 0, 0); } +static void fill_esw_mgr_reg_c0(struct mlx5_core_dev *mdev, + struct mlx5_ib_query_device_resp *resp) +{ + struct mlx5_eswitch *esw = mdev->priv.eswitch; + u16 vport = mlx5_eswitch_manager_vport(mdev); + + resp->reg_c0.value = mlx5_eswitch_get_vport_metadata_for_match(esw, + vport); + resp->reg_c0.mask = mlx5_eswitch_get_vport_metadata_mask(); +} + static int mlx5_ib_query_device(struct ib_device *ibdev, struct ib_device_attr *props, struct ib_udata *uhw) @@ -1209,6 +1220,19 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, MLX5_CAP_GEN(mdev, log_max_dci_errored_streams); } + if (offsetofend(typeof(resp), reserved) <= uhw_outlen) + resp.response_length += sizeof(resp.reserved); + + if (offsetofend(typeof(resp), reg_c0) <= uhw_outlen) { + struct mlx5_eswitch *esw = mdev->priv.eswitch; + + resp.response_length += sizeof(resp.reg_c0); + + if (mlx5_eswitch_mode(mdev) == MLX5_ESWITCH_OFFLOADS && + mlx5_eswitch_vport_match_metadata_enabled(esw)) + fill_esw_mgr_reg_c0(mdev, &resp); + } + if (uhw_outlen) { err = ib_copy_to_udata(uhw, &resp, resp.response_length); diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 18e459b55746..a8ee2ca1f4a1 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -1347,6 +1347,7 @@ struct ib_mr *mlx5_ib_reg_dm_mr(struct ib_pd *pd, struct ib_dm *dm, case MLX5_IB_UAPI_DM_TYPE_STEERING_SW_ICM: case MLX5_IB_UAPI_DM_TYPE_HEADER_MODIFY_SW_ICM: case MLX5_IB_UAPI_DM_TYPE_HEADER_MODIFY_PATTERN_SW_ICM: + case MLX5_IB_UAPI_DM_TYPE_ENCAP_SW_ICM: if (attr->access_flags & ~MLX5_IB_DM_SW_ICM_ALLOWED_ACCESS) return ERR_PTR(-EINVAL); diff --git a/drivers/infiniband/hw/mthca/mthca_cmd.c b/drivers/infiniband/hw/mthca/mthca_cmd.c index f330ce895d88..8fe0cef7e2be 100644 --- a/drivers/infiniband/hw/mthca/mthca_cmd.c +++ b/drivers/infiniband/hw/mthca/mthca_cmd.c @@ -635,7 +635,7 @@ void mthca_free_mailbox(struct mthca_dev *dev, struct mthca_mailbox *mailbox) int mthca_SYS_EN(struct mthca_dev *dev) { - u64 out; + u64 out = 0; int ret; ret = mthca_cmd_imm(dev, 0, &out, 0, 0, CMD_SYS_EN, CMD_TIME_CLASS_D); @@ -1955,7 +1955,7 @@ int mthca_WRITE_MGM(struct mthca_dev *dev, int index, int mthca_MGID_HASH(struct mthca_dev *dev, struct mthca_mailbox *mailbox, u16 *hash) { - u64 imm; + u64 imm = 0; int err; err = mthca_cmd_imm(dev, mailbox->dma, &imm, 0, 0, CMD_MGID_HASH, diff --git a/drivers/infiniband/hw/mthca/mthca_main.c b/drivers/infiniband/hw/mthca/mthca_main.c index b54bc8865dae..1ab268b77096 100644 --- a/drivers/infiniband/hw/mthca/mthca_main.c +++ b/drivers/infiniband/hw/mthca/mthca_main.c @@ -382,7 +382,7 @@ static int mthca_init_icm(struct mthca_dev *mdev, struct mthca_init_hca_param *init_hca, u64 icm_size) { - u64 aux_pages; + u64 aux_pages = 0; int err; err = mthca_SET_ICM_SIZE(mdev, icm_size, &aux_pages); diff --git a/drivers/infiniband/sw/siw/siw.h b/drivers/infiniband/sw/siw/siw.h index cec5cccd2e75..75253f2b3e3d 100644 --- a/drivers/infiniband/sw/siw/siw.h +++ b/drivers/infiniband/sw/siw/siw.h @@ -121,11 +121,10 @@ struct siw_page_chunk { }; struct siw_umem { + struct ib_umem *base_mem; struct siw_page_chunk *page_chunk; int num_pages; - bool writable; u64 fp_addr; /* First page base address */ - struct mm_struct *owning_mm; }; struct siw_pble { @@ -289,10 +288,11 @@ struct siw_rx_stream { int skb_offset; /* offset in skb */ int skb_copied; /* processed bytes in skb */ + enum siw_rx_state state; + union iwarp_hdr hdr; struct mpa_trailer trailer; - - enum siw_rx_state state; + struct shash_desc *mpa_crc_hd; /* * For each FPDU, main RX loop runs through 3 stages: @@ -314,7 +314,6 @@ struct siw_rx_stream { u64 ddp_to; u32 inval_stag; /* Stag to be invalidated */ - struct shash_desc *mpa_crc_hd; u8 rx_suspend : 1; u8 pad : 2; /* # of pad bytes expected */ u8 rdmap_op : 4; /* opcode of current frame */ @@ -418,10 +417,10 @@ struct siw_iwarp_tx { struct siw_qp { struct ib_qp base_qp; struct siw_device *sdev; + int tx_cpu; struct kref ref; struct completion qp_free; struct list_head devq; - int tx_cpu; struct siw_qp_attrs attrs; struct siw_cep *cep; @@ -466,7 +465,6 @@ struct siw_qp { } term_info; struct rdma_user_mmap_entry *sq_entry; /* mmap info for SQE array */ struct rdma_user_mmap_entry *rq_entry; /* mmap info for RQE array */ - struct rcu_head rcu; }; /* helper macros */ @@ -659,7 +657,7 @@ static inline struct siw_sqe *orq_get_free(struct siw_qp *qp) static inline int siw_orq_empty(struct siw_qp *qp) { - return qp->orq[qp->orq_get % qp->attrs.orq_size].flags == 0 ? 1 : 0; + return orq_get_current(qp)->flags == 0 ? 1 : 0; } static inline struct siw_sqe *irq_alloc_free(struct siw_qp *qp) diff --git a/drivers/infiniband/sw/siw/siw_cm.c b/drivers/infiniband/sw/siw/siw_cm.c index 7de651cb44e8..86323918a570 100644 --- a/drivers/infiniband/sw/siw/siw_cm.c +++ b/drivers/infiniband/sw/siw/siw_cm.c @@ -41,16 +41,6 @@ static int siw_cm_upcall(struct siw_cep *cep, enum iw_cm_event_type reason, static void siw_sk_assign_cm_upcalls(struct sock *sk) { - write_lock_bh(&sk->sk_callback_lock); - sk->sk_state_change = siw_cm_llp_state_change; - sk->sk_data_ready = siw_cm_llp_data_ready; - sk->sk_write_space = siw_cm_llp_write_space; - sk->sk_error_report = siw_cm_llp_error_report; - write_unlock_bh(&sk->sk_callback_lock); -} - -static void siw_sk_save_upcalls(struct sock *sk) -{ struct siw_cep *cep = sk_to_cep(sk); write_lock_bh(&sk->sk_callback_lock); @@ -58,6 +48,11 @@ static void siw_sk_save_upcalls(struct sock *sk) cep->sk_data_ready = sk->sk_data_ready; cep->sk_write_space = sk->sk_write_space; cep->sk_error_report = sk->sk_error_report; + + sk->sk_state_change = siw_cm_llp_state_change; + sk->sk_data_ready = siw_cm_llp_data_ready; + sk->sk_write_space = siw_cm_llp_write_space; + sk->sk_error_report = siw_cm_llp_error_report; write_unlock_bh(&sk->sk_callback_lock); } @@ -156,7 +151,6 @@ static void siw_cep_socket_assoc(struct siw_cep *cep, struct socket *s) siw_cep_get(cep); s->sk->sk_user_data = cep; - siw_sk_save_upcalls(s->sk); siw_sk_assign_cm_upcalls(s->sk); } @@ -364,6 +358,24 @@ static int siw_cm_upcall(struct siw_cep *cep, enum iw_cm_event_type reason, return id->event_handler(id, &event); } +static void siw_free_cm_id(struct siw_cep *cep) +{ + if (!cep->cm_id) + return; + + cep->cm_id->rem_ref(cep->cm_id); + cep->cm_id = NULL; +} + +static void siw_destroy_cep_sock(struct siw_cep *cep) +{ + if (cep->sock) { + siw_socket_disassoc(cep->sock); + sock_release(cep->sock); + cep->sock = NULL; + } +} + /* * siw_qp_cm_drop() * @@ -393,8 +405,7 @@ void siw_qp_cm_drop(struct siw_qp *qp, int schedule) } siw_dbg_cep(cep, "immediate close, state %d\n", cep->state); - if (qp->term_info.valid) - siw_send_terminate(qp); + siw_send_terminate(qp); if (cep->cm_id) { switch (cep->state) { @@ -416,20 +427,12 @@ void siw_qp_cm_drop(struct siw_qp *qp, int schedule) default: break; } - cep->cm_id->rem_ref(cep->cm_id); - cep->cm_id = NULL; + siw_free_cm_id(cep); siw_cep_put(cep); } cep->state = SIW_EPSTATE_CLOSED; - if (cep->sock) { - siw_socket_disassoc(cep->sock); - /* - * Immediately close socket - */ - sock_release(cep->sock); - cep->sock = NULL; - } + siw_destroy_cep_sock(cep); if (cep->qp) { cep->qp = NULL; siw_qp_put(qp); @@ -445,6 +448,12 @@ void siw_cep_put(struct siw_cep *cep) kref_put(&cep->ref, __siw_cep_dealloc); } +static void siw_cep_set_free_and_put(struct siw_cep *cep) +{ + siw_cep_set_free(cep); + siw_cep_put(cep); +} + void siw_cep_get(struct siw_cep *cep) { kref_get(&cep->ref); @@ -1061,7 +1070,7 @@ static void siw_cm_work_handler(struct work_struct *w) /* * QP scheduled LLP close */ - if (cep->qp && cep->qp->term_info.valid) + if (cep->qp) siw_send_terminate(cep->qp); if (cep->cm_id) @@ -1175,8 +1184,7 @@ static void siw_cm_work_handler(struct work_struct *w) cep->sock = NULL; } if (cep->cm_id) { - cep->cm_id->rem_ref(cep->cm_id); - cep->cm_id = NULL; + siw_free_cm_id(cep); siw_cep_put(cep); } } @@ -1515,9 +1523,7 @@ error: cep->state = SIW_EPSTATE_CLOSED; - siw_cep_set_free(cep); - - siw_cep_put(cep); + siw_cep_set_free_and_put(cep); } else if (s) { sock_release(s); @@ -1548,7 +1554,7 @@ int siw_accept(struct iw_cm_id *id, struct iw_cm_conn_param *params) struct siw_cep *cep = (struct siw_cep *)id->provider_data; struct siw_qp *qp; struct siw_qp_attrs qp_attrs; - int rv, max_priv_data = MPA_MAX_PRIVDATA; + int rv = -EINVAL, max_priv_data = MPA_MAX_PRIVDATA; bool wait_for_peer_rts = false; siw_cep_set_inuse(cep); @@ -1564,26 +1570,17 @@ int siw_accept(struct iw_cm_id *id, struct iw_cm_conn_param *params) if (cep->state != SIW_EPSTATE_RECVD_MPAREQ) { siw_dbg_cep(cep, "out of state\n"); - - siw_cep_set_free(cep); - siw_cep_put(cep); - - return -ECONNRESET; + rv = -ECONNRESET; + goto free_cep; } qp = siw_qp_id2obj(sdev, params->qpn); if (!qp) { WARN(1, "[QP %d] does not exist\n", params->qpn); - siw_cep_set_free(cep); - siw_cep_put(cep); - - return -EINVAL; + goto free_cep; } down_write(&qp->state_lock); - if (qp->attrs.state > SIW_QP_STATE_RTR) { - rv = -EINVAL; - up_write(&qp->state_lock); - goto error; - } + if (qp->attrs.state > SIW_QP_STATE_RTR) + goto error_unlock; siw_dbg_cep(cep, "[QP %d]\n", params->qpn); if (try_gso && cep->mpa.hdr.params.bits & MPA_RR_FLAG_GSO_EXP) { @@ -1597,9 +1594,7 @@ int siw_accept(struct iw_cm_id *id, struct iw_cm_conn_param *params) "[QP %u]: ord %d (max %d), ird %d (max %d)\n", qp_id(qp), params->ord, sdev->attrs.max_ord, params->ird, sdev->attrs.max_ird); - rv = -EINVAL; - up_write(&qp->state_lock); - goto error; + goto error_unlock; } if (cep->enhanced_rdma_conn_est) max_priv_data -= sizeof(struct mpa_v2_data); @@ -1609,9 +1604,7 @@ int siw_accept(struct iw_cm_id *id, struct iw_cm_conn_param *params) cep, "[QP %u]: private data length: %d (max %d)\n", qp_id(qp), params->private_data_len, max_priv_data); - rv = -EINVAL; - up_write(&qp->state_lock); - goto error; + goto error_unlock; } if (cep->enhanced_rdma_conn_est) { if (params->ord > cep->ord) { @@ -1620,9 +1613,7 @@ int siw_accept(struct iw_cm_id *id, struct iw_cm_conn_param *params) } else { cep->ird = params->ird; cep->ord = params->ord; - rv = -EINVAL; - up_write(&qp->state_lock); - goto error; + goto error_unlock; } } if (params->ird < cep->ird) { @@ -1631,8 +1622,7 @@ int siw_accept(struct iw_cm_id *id, struct iw_cm_conn_param *params) params->ird = cep->ird; else { rv = -ENOMEM; - up_write(&qp->state_lock); - goto error; + goto error_unlock; } } if (cep->mpa.v2_ctrl.ord & @@ -1679,7 +1669,6 @@ int siw_accept(struct iw_cm_id *id, struct iw_cm_conn_param *params) SIW_QP_ATTR_ORD | SIW_QP_ATTR_IRD | SIW_QP_ATTR_MPA); up_write(&qp->state_lock); - if (rv) goto error; @@ -1702,27 +1691,23 @@ int siw_accept(struct iw_cm_id *id, struct iw_cm_conn_param *params) siw_cep_set_free(cep); return 0; + +error_unlock: + up_write(&qp->state_lock); error: - siw_socket_disassoc(cep->sock); - sock_release(cep->sock); - cep->sock = NULL; + siw_destroy_cep_sock(cep); cep->state = SIW_EPSTATE_CLOSED; - if (cep->cm_id) { - cep->cm_id->rem_ref(id); - cep->cm_id = NULL; - } + siw_free_cm_id(cep); if (qp->cep) { siw_cep_put(cep); qp->cep = NULL; } cep->qp = NULL; siw_qp_put(qp); - - siw_cep_set_free(cep); - siw_cep_put(cep); - +free_cep: + siw_cep_set_free_and_put(cep); return rv; } @@ -1744,8 +1729,7 @@ int siw_reject(struct iw_cm_id *id, const void *pdata, u8 pd_len) if (cep->state != SIW_EPSTATE_RECVD_MPAREQ) { siw_dbg_cep(cep, "out of state\n"); - siw_cep_set_free(cep); - siw_cep_put(cep); /* put last reference */ + siw_cep_set_free_and_put(cep); /* put last reference */ return -ECONNRESET; } @@ -1756,14 +1740,11 @@ int siw_reject(struct iw_cm_id *id, const void *pdata, u8 pd_len) cep->mpa.hdr.params.bits |= MPA_RR_FLAG_REJECT; /* reject */ siw_send_mpareqrep(cep, pdata, pd_len); } - siw_socket_disassoc(cep->sock); - sock_release(cep->sock); - cep->sock = NULL; + siw_destroy_cep_sock(cep); cep->state = SIW_EPSTATE_CLOSED; - siw_cep_set_free(cep); - siw_cep_put(cep); + siw_cep_set_free_and_put(cep); return 0; } @@ -1890,16 +1871,12 @@ error: if (cep) { siw_cep_set_inuse(cep); - if (cep->cm_id) { - cep->cm_id->rem_ref(cep->cm_id); - cep->cm_id = NULL; - } + siw_free_cm_id(cep); cep->sock = NULL; siw_socket_disassoc(s); cep->state = SIW_EPSTATE_CLOSED; - siw_cep_set_free(cep); - siw_cep_put(cep); + siw_cep_set_free_and_put(cep); } sock_release(s); @@ -1923,18 +1900,14 @@ static void siw_drop_listeners(struct iw_cm_id *id) siw_cep_set_inuse(cep); - if (cep->cm_id) { - cep->cm_id->rem_ref(cep->cm_id); - cep->cm_id = NULL; - } + siw_free_cm_id(cep); if (cep->sock) { siw_socket_disassoc(cep->sock); sock_release(cep->sock); cep->sock = NULL; } cep->state = SIW_EPSTATE_CLOSED; - siw_cep_set_free(cep); - siw_cep_put(cep); + siw_cep_set_free_and_put(cep); } } diff --git a/drivers/infiniband/sw/siw/siw_main.c b/drivers/infiniband/sw/siw/siw_main.c index 1ab62982df74..61ad8ca3d1a2 100644 --- a/drivers/infiniband/sw/siw/siw_main.c +++ b/drivers/infiniband/sw/siw/siw_main.c @@ -109,6 +109,17 @@ static struct { int num_nodes; } siw_cpu_info; +static void siw_destroy_cpulist(int number) +{ + int i = 0; + + while (i < number) + kfree(siw_cpu_info.tx_valid_cpus[i++]); + + kfree(siw_cpu_info.tx_valid_cpus); + siw_cpu_info.tx_valid_cpus = NULL; +} + static int siw_init_cpulist(void) { int i, num_nodes = nr_node_ids; @@ -138,24 +149,11 @@ static int siw_init_cpulist(void) out_err: siw_cpu_info.num_nodes = 0; - while (--i >= 0) - kfree(siw_cpu_info.tx_valid_cpus[i]); - kfree(siw_cpu_info.tx_valid_cpus); - siw_cpu_info.tx_valid_cpus = NULL; + siw_destroy_cpulist(i); return -ENOMEM; } -static void siw_destroy_cpulist(void) -{ - int i = 0; - - while (i < siw_cpu_info.num_nodes) - kfree(siw_cpu_info.tx_valid_cpus[i++]); - - kfree(siw_cpu_info.tx_valid_cpus); -} - /* * Choose CPU with least number of active QP's from NUMA node of * TX interface. @@ -558,7 +556,7 @@ out_error: pr_info("SoftIWARP attach failed. Error: %d\n", rv); siw_cm_exit(); - siw_destroy_cpulist(); + siw_destroy_cpulist(siw_cpu_info.num_nodes); return rv; } @@ -573,7 +571,7 @@ static void __exit siw_exit_module(void) siw_cm_exit(); - siw_destroy_cpulist(); + siw_destroy_cpulist(siw_cpu_info.num_nodes); if (siw_crypto_shash) crypto_free_shash(siw_crypto_shash); diff --git a/drivers/infiniband/sw/siw/siw_mem.c b/drivers/infiniband/sw/siw/siw_mem.c index c5f7f1669d09..dcb963607c8b 100644 --- a/drivers/infiniband/sw/siw/siw_mem.c +++ b/drivers/infiniband/sw/siw/siw_mem.c @@ -5,6 +5,7 @@ #include <linux/gfp.h> #include <rdma/ib_verbs.h> +#include <rdma/ib_umem.h> #include <linux/dma-mapping.h> #include <linux/slab.h> #include <linux/sched/mm.h> @@ -13,18 +14,20 @@ #include "siw.h" #include "siw_mem.h" +/* Stag lookup is based on its index part only (24 bits). */ +#define SIW_STAG_MAX_INDEX 0x00ffffff + /* - * Stag lookup is based on its index part only (24 bits). * The code avoids special Stag of zero and tries to randomize * STag values between 1 and SIW_STAG_MAX_INDEX. */ int siw_mem_add(struct siw_device *sdev, struct siw_mem *m) { - struct xa_limit limit = XA_LIMIT(1, 0x00ffffff); + struct xa_limit limit = XA_LIMIT(1, SIW_STAG_MAX_INDEX); u32 id, next; get_random_bytes(&next, 4); - next &= 0x00ffffff; + next &= SIW_STAG_MAX_INDEX; if (xa_alloc_cyclic(&sdev->mem_xa, &id, m, limit, &next, GFP_KERNEL) < 0) @@ -60,28 +63,17 @@ struct siw_mem *siw_mem_id2obj(struct siw_device *sdev, int stag_index) return NULL; } -static void siw_free_plist(struct siw_page_chunk *chunk, int num_pages, - bool dirty) -{ - unpin_user_pages_dirty_lock(chunk->plist, num_pages, dirty); -} - -void siw_umem_release(struct siw_umem *umem, bool dirty) +void siw_umem_release(struct siw_umem *umem) { - struct mm_struct *mm_s = umem->owning_mm; int i, num_pages = umem->num_pages; - for (i = 0; num_pages; i++) { - int to_free = min_t(int, PAGES_PER_CHUNK, num_pages); + if (umem->base_mem) + ib_umem_release(umem->base_mem); - siw_free_plist(&umem->page_chunk[i], to_free, - umem->writable && dirty); + for (i = 0; num_pages > 0; i++) { kfree(umem->page_chunk[i].plist); - num_pages -= to_free; + num_pages -= PAGES_PER_CHUNK; } - atomic64_sub(umem->num_pages, &mm_s->pinned_vm); - - mmdrop(mm_s); kfree(umem->page_chunk); kfree(umem); } @@ -91,7 +83,7 @@ int siw_mr_add_mem(struct siw_mr *mr, struct ib_pd *pd, void *mem_obj, { struct siw_device *sdev = to_siw_dev(pd->device); struct siw_mem *mem = kzalloc(sizeof(*mem), GFP_KERNEL); - struct xa_limit limit = XA_LIMIT(1, 0x00ffffff); + struct xa_limit limit = XA_LIMIT(1, SIW_STAG_MAX_INDEX); u32 id, next; if (!mem) @@ -107,7 +99,7 @@ int siw_mr_add_mem(struct siw_mr *mr, struct ib_pd *pd, void *mem_obj, kref_init(&mem->ref); get_random_bytes(&next, 4); - next &= 0x00ffffff; + next &= SIW_STAG_MAX_INDEX; if (xa_alloc_cyclic(&sdev->mem_xa, &id, mem, limit, &next, GFP_KERNEL) < 0) { @@ -145,7 +137,7 @@ void siw_free_mem(struct kref *ref) if (!mem->is_mw && mem->mem_obj) { if (mem->is_pbl == 0) - siw_umem_release(mem->umem, true); + siw_umem_release(mem->umem); else kfree(mem->pbl); } @@ -362,18 +354,16 @@ struct siw_pbl *siw_pbl_alloc(u32 num_buf) return pbl; } -struct siw_umem *siw_umem_get(u64 start, u64 len, bool writable) +struct siw_umem *siw_umem_get(struct ib_device *base_dev, u64 start, + u64 len, int rights) { struct siw_umem *umem; - struct mm_struct *mm_s; + struct ib_umem *base_mem; + struct sg_page_iter sg_iter; + struct sg_table *sgt; u64 first_page_va; - unsigned long mlock_limit; - unsigned int foll_flags = FOLL_LONGTERM; int num_pages, num_chunks, i, rv = 0; - if (!can_do_mlock()) - return ERR_PTR(-EPERM); - if (!len) return ERR_PTR(-EINVAL); @@ -385,65 +375,50 @@ struct siw_umem *siw_umem_get(u64 start, u64 len, bool writable) if (!umem) return ERR_PTR(-ENOMEM); - mm_s = current->mm; - umem->owning_mm = mm_s; - umem->writable = writable; - - mmgrab(mm_s); - - if (writable) - foll_flags |= FOLL_WRITE; - - mmap_read_lock(mm_s); - - mlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; - - if (atomic64_add_return(num_pages, &mm_s->pinned_vm) > mlock_limit) { - rv = -ENOMEM; - goto out_sem_up; - } - umem->fp_addr = first_page_va; - umem->page_chunk = kcalloc(num_chunks, sizeof(struct siw_page_chunk), GFP_KERNEL); if (!umem->page_chunk) { rv = -ENOMEM; - goto out_sem_up; + goto err_out; } - for (i = 0; num_pages; i++) { + base_mem = ib_umem_get(base_dev, start, len, rights); + if (IS_ERR(base_mem)) { + rv = PTR_ERR(base_mem); + siw_dbg(base_dev, "Cannot pin user memory: %d\n", rv); + goto err_out; + } + umem->fp_addr = first_page_va; + umem->base_mem = base_mem; + + sgt = &base_mem->sgt_append.sgt; + __sg_page_iter_start(&sg_iter, sgt->sgl, sgt->orig_nents, 0); + + if (!__sg_page_iter_next(&sg_iter)) { + rv = -EINVAL; + goto err_out; + } + for (i = 0; num_pages > 0; i++) { int nents = min_t(int, num_pages, PAGES_PER_CHUNK); struct page **plist = kcalloc(nents, sizeof(struct page *), GFP_KERNEL); if (!plist) { rv = -ENOMEM; - goto out_sem_up; + goto err_out; } umem->page_chunk[i].plist = plist; - while (nents) { - rv = pin_user_pages(first_page_va, nents, foll_flags, - plist); - if (rv < 0) - goto out_sem_up; - - umem->num_pages += rv; - first_page_va += rv * PAGE_SIZE; - plist += rv; - nents -= rv; - num_pages -= rv; + while (nents--) { + *plist = sg_page_iter_page(&sg_iter); + umem->num_pages++; + num_pages--; + plist++; + if (!__sg_page_iter_next(&sg_iter)) + break; } } -out_sem_up: - mmap_read_unlock(mm_s); - - if (rv > 0) - return umem; - - /* Adjust accounting for pages not pinned */ - if (num_pages) - atomic64_sub(num_pages, &mm_s->pinned_vm); - - siw_umem_release(umem, false); + return umem; +err_out: + siw_umem_release(umem); return ERR_PTR(rv); } diff --git a/drivers/infiniband/sw/siw/siw_mem.h b/drivers/infiniband/sw/siw/siw_mem.h index a2835284fe5b..e74cfcd6dbc1 100644 --- a/drivers/infiniband/sw/siw/siw_mem.h +++ b/drivers/infiniband/sw/siw/siw_mem.h @@ -6,8 +6,9 @@ #ifndef _SIW_MEM_H #define _SIW_MEM_H -struct siw_umem *siw_umem_get(u64 start, u64 len, bool writable); -void siw_umem_release(struct siw_umem *umem, bool dirty); +struct siw_umem *siw_umem_get(struct ib_device *base_dave, u64 start, + u64 len, int rights); +void siw_umem_release(struct siw_umem *umem); struct siw_pbl *siw_pbl_alloc(u32 num_buf); dma_addr_t siw_pbl_get_buffer(struct siw_pbl *pbl, u64 off, int *len, int *idx); struct siw_mem *siw_mem_id2obj(struct siw_device *sdev, int stag_index); diff --git a/drivers/infiniband/sw/siw/siw_qp.c b/drivers/infiniband/sw/siw/siw_qp.c index 26e3904d2f41..da92cfa2073d 100644 --- a/drivers/infiniband/sw/siw/siw_qp.c +++ b/drivers/infiniband/sw/siw/siw_qp.c @@ -1183,7 +1183,7 @@ int siw_rqe_complete(struct siw_qp *qp, struct siw_rqe *rqe, u32 bytes, /* * siw_sq_flush() * - * Flush SQ and ORRQ entries to CQ. + * Flush SQ and ORQ entries to CQ. * * Must be called with QP state write lock held. * Therefore, SQ and ORQ lock must not be taken. diff --git a/drivers/infiniband/sw/siw/siw_qp_rx.c b/drivers/infiniband/sw/siw/siw_qp_rx.c index 33e0fdb362ff..ed4fc39718b4 100644 --- a/drivers/infiniband/sw/siw/siw_qp_rx.c +++ b/drivers/infiniband/sw/siw/siw_qp_rx.c @@ -405,6 +405,20 @@ out: return wqe; } +static int siw_rx_data(struct siw_mem *mem_p, struct siw_rx_stream *srx, + unsigned int *pbl_idx, u64 addr, int bytes) +{ + int rv; + + if (mem_p->mem_obj == NULL) + rv = siw_rx_kva(srx, ib_virt_dma_to_ptr(addr), bytes); + else if (!mem_p->is_pbl) + rv = siw_rx_umem(srx, mem_p->umem, addr, bytes); + else + rv = siw_rx_pbl(srx, pbl_idx, mem_p, addr, bytes); + return rv; +} + /* * siw_proc_send: * @@ -485,17 +499,8 @@ int siw_proc_send(struct siw_qp *qp) break; } mem_p = *mem; - if (mem_p->mem_obj == NULL) - rv = siw_rx_kva(srx, - ib_virt_dma_to_ptr(sge->laddr + frx->sge_off), - sge_bytes); - else if (!mem_p->is_pbl) - rv = siw_rx_umem(srx, mem_p->umem, - sge->laddr + frx->sge_off, sge_bytes); - else - rv = siw_rx_pbl(srx, &frx->pbl_idx, mem_p, - sge->laddr + frx->sge_off, sge_bytes); - + rv = siw_rx_data(mem_p, srx, &frx->pbl_idx, + sge->laddr + frx->sge_off, sge_bytes); if (unlikely(rv != sge_bytes)) { wqe->processed += rcvd_bytes; @@ -598,17 +603,8 @@ int siw_proc_write(struct siw_qp *qp) return -EINVAL; } - if (mem->mem_obj == NULL) - rv = siw_rx_kva(srx, - (void *)(uintptr_t)(srx->ddp_to + srx->fpdu_part_rcvd), - bytes); - else if (!mem->is_pbl) - rv = siw_rx_umem(srx, mem->umem, - srx->ddp_to + srx->fpdu_part_rcvd, bytes); - else - rv = siw_rx_pbl(srx, &frx->pbl_idx, mem, - srx->ddp_to + srx->fpdu_part_rcvd, bytes); - + rv = siw_rx_data(mem, srx, &frx->pbl_idx, + srx->ddp_to + srx->fpdu_part_rcvd, bytes); if (unlikely(rv != bytes)) { siw_init_terminate(qp, TERM_ERROR_LAYER_DDP, DDP_ETYPE_CATASTROPHIC, @@ -849,17 +845,8 @@ int siw_proc_rresp(struct siw_qp *qp) mem_p = *mem; bytes = min(srx->fpdu_part_rem, srx->skb_new); - - if (mem_p->mem_obj == NULL) - rv = siw_rx_kva(srx, - ib_virt_dma_to_ptr(sge->laddr + wqe->processed), - bytes); - else if (!mem_p->is_pbl) - rv = siw_rx_umem(srx, mem_p->umem, sge->laddr + wqe->processed, - bytes); - else - rv = siw_rx_pbl(srx, &frx->pbl_idx, mem_p, - sge->laddr + wqe->processed, bytes); + rv = siw_rx_data(mem_p, srx, &frx->pbl_idx, + sge->laddr + wqe->processed, bytes); if (rv != bytes) { wqe->wc_status = SIW_WC_GENERAL_ERR; rv = -EINVAL; @@ -881,6 +868,13 @@ error_term: return rv; } +static void siw_update_skb_rcvd(struct siw_rx_stream *srx, u16 length) +{ + srx->skb_offset += length; + srx->skb_new -= length; + srx->skb_copied += length; +} + int siw_proc_terminate(struct siw_qp *qp) { struct siw_rx_stream *srx = &qp->rx_stream; @@ -925,9 +919,7 @@ int siw_proc_terminate(struct siw_qp *qp) goto out; infop += to_copy; - srx->skb_offset += to_copy; - srx->skb_new -= to_copy; - srx->skb_copied += to_copy; + siw_update_skb_rcvd(srx, to_copy); srx->fpdu_part_rcvd += to_copy; srx->fpdu_part_rem -= to_copy; @@ -949,9 +941,7 @@ int siw_proc_terminate(struct siw_qp *qp) term->flag_m ? "valid" : "invalid"); } out: - srx->skb_new -= to_copy; - srx->skb_offset += to_copy; - srx->skb_copied += to_copy; + siw_update_skb_rcvd(srx, to_copy); srx->fpdu_part_rcvd += to_copy; srx->fpdu_part_rem -= to_copy; @@ -970,9 +960,7 @@ static int siw_get_trailer(struct siw_qp *qp, struct siw_rx_stream *srx) skb_copy_bits(skb, srx->skb_offset, tbuf, avail); - srx->skb_new -= avail; - srx->skb_offset += avail; - srx->skb_copied += avail; + siw_update_skb_rcvd(srx, avail); srx->fpdu_part_rem -= avail; if (srx->fpdu_part_rem) @@ -1023,12 +1011,8 @@ static int siw_get_hdr(struct siw_rx_stream *srx) skb_copy_bits(skb, srx->skb_offset, (char *)c_hdr + srx->fpdu_part_rcvd, bytes); + siw_update_skb_rcvd(srx, bytes); srx->fpdu_part_rcvd += bytes; - - srx->skb_new -= bytes; - srx->skb_offset += bytes; - srx->skb_copied += bytes; - if (srx->fpdu_part_rcvd < MIN_DDP_HDR) return -EAGAIN; @@ -1091,12 +1075,8 @@ static int siw_get_hdr(struct siw_rx_stream *srx) skb_copy_bits(skb, srx->skb_offset, (char *)c_hdr + srx->fpdu_part_rcvd, bytes); + siw_update_skb_rcvd(srx, bytes); srx->fpdu_part_rcvd += bytes; - - srx->skb_new -= bytes; - srx->skb_offset += bytes; - srx->skb_copied += bytes; - if (srx->fpdu_part_rcvd < hdrlen) return -EAGAIN; } diff --git a/drivers/infiniband/sw/siw/siw_qp_tx.c b/drivers/infiniband/sw/siw/siw_qp_tx.c index b2c06100cf01..64ad9e0895bd 100644 --- a/drivers/infiniband/sw/siw/siw_qp_tx.c +++ b/drivers/infiniband/sw/siw/siw_qp_tx.c @@ -34,6 +34,15 @@ static struct page *siw_get_pblpage(struct siw_mem *mem, u64 addr, int *idx) return NULL; } +static struct page *siw_get_page(struct siw_mem *mem, struct siw_sge *sge, + unsigned long offset, int *pbl_idx) +{ + if (!mem->is_pbl) + return siw_get_upage(mem->umem, sge->laddr + offset); + else + return siw_get_pblpage(mem, sge->laddr + offset, pbl_idx); +} + /* * Copy short payload at provided destination payload address */ @@ -67,11 +76,7 @@ static int siw_try_1seg(struct siw_iwarp_tx *c_tx, void *paddr) char *buffer; int pbl_idx = 0; - if (!mem->is_pbl) - p = siw_get_upage(mem->umem, sge->laddr); - else - p = siw_get_pblpage(mem, sge->laddr, &pbl_idx); - + p = siw_get_page(mem, sge, 0, &pbl_idx); if (unlikely(!p)) return -EFAULT; @@ -85,13 +90,7 @@ static int siw_try_1seg(struct siw_iwarp_tx *c_tx, void *paddr) memcpy(paddr, buffer + off, part); kunmap_local(buffer); - if (!mem->is_pbl) - p = siw_get_upage(mem->umem, - sge->laddr + part); - else - p = siw_get_pblpage(mem, - sge->laddr + part, - &pbl_idx); + p = siw_get_page(mem, sge, part, &pbl_idx); if (unlikely(!p)) return -EFAULT; @@ -249,14 +248,10 @@ static int siw_qp_prepare_tx(struct siw_iwarp_tx *c_tx) /* * Do complete CRC if enabled and short packet */ - if (c_tx->mpa_crc_hd) { - crypto_shash_init(c_tx->mpa_crc_hd); - if (crypto_shash_update(c_tx->mpa_crc_hd, - (u8 *)&c_tx->pkt, - c_tx->ctrl_len)) - return -EINVAL; - crypto_shash_final(c_tx->mpa_crc_hd, (u8 *)crc); - } + if (c_tx->mpa_crc_hd && + crypto_shash_digest(c_tx->mpa_crc_hd, (u8 *)&c_tx->pkt, + c_tx->ctrl_len, (u8 *)crc) != 0) + return -EINVAL; c_tx->ctrl_len += MPA_CRC_SIZE; return PKT_COMPLETE; @@ -297,8 +292,7 @@ static int siw_tx_ctrl(struct siw_iwarp_tx *c_tx, struct socket *s, (char *)&c_tx->pkt.ctrl + c_tx->ctrl_sent, .iov_len = c_tx->ctrl_len - c_tx->ctrl_sent }; - int rv = kernel_sendmsg(s, &msg, &iov, 1, - c_tx->ctrl_len - c_tx->ctrl_sent); + int rv = kernel_sendmsg(s, &msg, &iov, 1, iov.iov_len); if (rv >= 0) { c_tx->ctrl_sent += rv; @@ -502,13 +496,7 @@ static int siw_tx_hdt(struct siw_iwarp_tx *c_tx, struct socket *s) if (!is_kva) { struct page *p; - if (mem->is_pbl) - p = siw_get_pblpage( - mem, sge->laddr + sge_off, - &pbl_idx); - else - p = siw_get_upage(mem->umem, - sge->laddr + sge_off); + p = siw_get_page(mem, sge, sge_off, &pbl_idx); if (unlikely(!p)) { siw_unmap_pages(iov, kmap_mask, seg); wqe->processed -= c_tx->bytes_unsent; @@ -1009,13 +997,12 @@ static int siw_qp_sq_proc_local(struct siw_qp *qp, struct siw_wqe *wqe) * MPA FPDUs, each containing a DDP segment. * * SQ processing may occur in user context as a result of posting - * new WQE's or from siw_sq_work_handler() context. Processing in + * new WQE's or from siw_tx_thread context. Processing in * user context is limited to non-kernel verbs users. * * SQ processing may get paused anytime, possibly in the middle of a WR * or FPDU, if insufficient send space is available. SQ processing - * gets resumed from siw_sq_work_handler(), if send space becomes - * available again. + * gets resumed from siw_tx_thread, if send space becomes available again. * * Must be called with the QP state read-locked. * diff --git a/drivers/infiniband/sw/siw/siw_verbs.c b/drivers/infiniband/sw/siw/siw_verbs.c index c5c27db9c2fe..ecf0444666b4 100644 --- a/drivers/infiniband/sw/siw/siw_verbs.c +++ b/drivers/infiniband/sw/siw/siw_verbs.c @@ -19,6 +19,15 @@ #include "siw_verbs.h" #include "siw_mem.h" +static int siw_qp_state_to_ib_qp_state[SIW_QP_STATE_COUNT] = { + [SIW_QP_STATE_IDLE] = IB_QPS_INIT, + [SIW_QP_STATE_RTR] = IB_QPS_RTR, + [SIW_QP_STATE_RTS] = IB_QPS_RTS, + [SIW_QP_STATE_CLOSING] = IB_QPS_SQD, + [SIW_QP_STATE_TERMINATE] = IB_QPS_SQE, + [SIW_QP_STATE_ERROR] = IB_QPS_ERR +}; + static int ib_qp_state_to_siw_qp_state[IB_QPS_ERR + 1] = { [IB_QPS_RESET] = SIW_QP_STATE_IDLE, [IB_QPS_INIT] = SIW_QP_STATE_IDLE, @@ -66,12 +75,9 @@ int siw_mmap(struct ib_ucontext *ctx, struct vm_area_struct *vma) entry = to_siw_mmap_entry(rdma_entry); rv = remap_vmalloc_range(vma, entry->address, 0); - if (rv) { + if (rv) pr_warn("remap_vmalloc_range failed: %lu, %zu\n", vma->vm_pgoff, size); - goto out; - } -out: rdma_user_mmap_entry_put(rdma_entry); return rv; @@ -336,11 +342,10 @@ int siw_create_qp(struct ib_qp *ibqp, struct ib_qp_init_attr *attrs, goto err_atomic; } /* - * NOTE: we allow for zero element SQ and RQ WQE's SGL's - * but not for a QP unable to hold any WQE (SQ + RQ) + * NOTE: we don't allow for a QP unable to hold any SQ WQE */ - if (attrs->cap.max_send_wr + attrs->cap.max_recv_wr == 0) { - siw_dbg(base_dev, "QP must have send or receive queue\n"); + if (attrs->cap.max_send_wr == 0) { + siw_dbg(base_dev, "QP must have send queue\n"); rv = -EINVAL; goto err_atomic; } @@ -360,21 +365,14 @@ int siw_create_qp(struct ib_qp *ibqp, struct ib_qp_init_attr *attrs, if (rv) goto err_atomic; - num_sqe = attrs->cap.max_send_wr; - num_rqe = attrs->cap.max_recv_wr; /* All queue indices are derived from modulo operations * on a free running 'get' (consumer) and 'put' (producer) * unsigned counter. Having queue sizes at power of two * avoids handling counter wrap around. */ - if (num_sqe) - num_sqe = roundup_pow_of_two(num_sqe); - else { - /* Zero sized SQ is not supported */ - rv = -EINVAL; - goto err_out_xa; - } + num_sqe = roundup_pow_of_two(attrs->cap.max_send_wr); + num_rqe = attrs->cap.max_recv_wr; if (num_rqe) num_rqe = roundup_pow_of_two(num_rqe); @@ -515,6 +513,7 @@ int siw_query_qp(struct ib_qp *base_qp, struct ib_qp_attr *qp_attr, } else { return -EINVAL; } + qp_attr->qp_state = siw_qp_state_to_ib_qp_state[qp->attrs.state]; qp_attr->cap.max_inline_data = SIW_MAX_INLINE; qp_attr->cap.max_send_wr = qp->attrs.sq_size; qp_attr->cap.max_send_sge = qp->attrs.sq_max_sges; @@ -1321,8 +1320,6 @@ struct ib_mr *siw_reg_user_mr(struct ib_pd *pd, u64 start, u64 len, struct siw_umem *umem = NULL; struct siw_ureq_reg_mr ureq; struct siw_device *sdev = to_siw_dev(pd->device); - - unsigned long mem_limit = rlimit(RLIMIT_MEMLOCK); int rv; siw_dbg_pd(pd, "start: 0x%pK, va: 0x%pK, len: %llu\n", @@ -1338,20 +1335,7 @@ struct ib_mr *siw_reg_user_mr(struct ib_pd *pd, u64 start, u64 len, rv = -EINVAL; goto err_out; } - if (mem_limit != RLIM_INFINITY) { - unsigned long num_pages = - (PAGE_ALIGN(len + (start & ~PAGE_MASK))) >> PAGE_SHIFT; - mem_limit >>= PAGE_SHIFT; - - if (num_pages > mem_limit - current->mm->locked_vm) { - siw_dbg_pd(pd, "pages req %lu, max %lu, lock %lu\n", - num_pages, mem_limit, - current->mm->locked_vm); - rv = -ENOMEM; - goto err_out; - } - } - umem = siw_umem_get(start, len, ib_access_writable(rights)); + umem = siw_umem_get(pd->device, start, len, rights); if (IS_ERR(umem)) { rv = PTR_ERR(umem); siw_dbg_pd(pd, "getting user memory failed: %d\n", rv); @@ -1404,7 +1388,7 @@ err_out: kfree_rcu(mr, rcu); } else { if (umem) - siw_umem_release(umem, false); + siw_umem_release(umem); } return ERR_PTR(rv); } diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h index 35e9c8a330e2..963e936da5e3 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib.h +++ b/drivers/infiniband/ulp/ipoib/ipoib.h @@ -351,10 +351,12 @@ struct ipoib_dev_priv { struct workqueue_struct *wq; struct delayed_work mcast_task; struct work_struct carrier_on_task; + struct work_struct reschedule_napi_work; struct work_struct flush_light; struct work_struct flush_normal; struct work_struct flush_heavy; struct work_struct restart_task; + struct work_struct tx_timeout_work; struct delayed_work ah_reap_task; struct delayed_work neigh_reap_task; struct ib_device *ca; @@ -499,6 +501,7 @@ int ipoib_send(struct net_device *dev, struct sk_buff *skb, struct ib_ah *address, u32 dqpn); void ipoib_reap_ah(struct work_struct *work); +void ipoib_napi_schedule_work(struct work_struct *work); struct ipoib_path *__path_find(struct net_device *dev, void *gid); void ipoib_mark_paths_invalid(struct net_device *dev); void ipoib_flush_paths(struct net_device *dev); @@ -510,6 +513,7 @@ void ipoib_ib_tx_timer_func(struct timer_list *t); void ipoib_ib_dev_flush_light(struct work_struct *work); void ipoib_ib_dev_flush_normal(struct work_struct *work); void ipoib_ib_dev_flush_heavy(struct work_struct *work); +void ipoib_ib_tx_timeout_work(struct work_struct *work); void ipoib_pkey_event(struct work_struct *work); void ipoib_ib_dev_cleanup(struct net_device *dev); diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c index 7f84d9866cef..5cde275daa94 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c @@ -531,11 +531,35 @@ void ipoib_ib_rx_completion(struct ib_cq *cq, void *ctx_ptr) napi_schedule(&priv->recv_napi); } +/* The function will force napi_schedule */ +void ipoib_napi_schedule_work(struct work_struct *work) +{ + struct ipoib_dev_priv *priv = + container_of(work, struct ipoib_dev_priv, reschedule_napi_work); + bool ret; + + do { + ret = napi_schedule(&priv->send_napi); + if (!ret) + msleep(3); + } while (!ret && netif_queue_stopped(priv->dev) && + test_bit(IPOIB_FLAG_INITIALIZED, &priv->flags)); +} + void ipoib_ib_tx_completion(struct ib_cq *cq, void *ctx_ptr) { struct ipoib_dev_priv *priv = ctx_ptr; + bool ret; - napi_schedule(&priv->send_napi); + ret = napi_schedule(&priv->send_napi); + /* + * if the queue is closed the driver must be able to schedule napi, + * otherwise we can end with closed queue forever, because no new + * packets to send and napi callback might not get new event after + * its re-arm of the napi. + */ + if (!ret && netif_queue_stopped(priv->dev)) + schedule_work(&priv->reschedule_napi_work); } static inline int post_send(struct ipoib_dev_priv *priv, diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index 967004ccad98..7a5be705d718 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -1200,7 +1200,34 @@ static void ipoib_timeout(struct net_device *dev, unsigned int txqueue) netif_queue_stopped(dev), priv->tx_head, priv->tx_tail, priv->global_tx_head, priv->global_tx_tail); - /* XXX reset QP, etc. */ + + schedule_work(&priv->tx_timeout_work); +} + +void ipoib_ib_tx_timeout_work(struct work_struct *work) +{ + struct ipoib_dev_priv *priv = container_of(work, + struct ipoib_dev_priv, + tx_timeout_work); + int err; + + rtnl_lock(); + + if (!test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags)) + goto unlock; + + ipoib_stop(priv->dev); + err = ipoib_open(priv->dev); + if (err) { + ipoib_warn(priv, "ipoib_open failed recovering from a tx_timeout, err(%d).\n", + err); + goto unlock; + } + + netif_tx_wake_all_queues(priv->dev); +unlock: + rtnl_unlock(); + } static int ipoib_hard_header(struct sk_buff *skb, @@ -2112,7 +2139,7 @@ void ipoib_setup_common(struct net_device *dev) ipoib_set_ethtool_ops(dev); - dev->watchdog_timeo = HZ; + dev->watchdog_timeo = 10 * HZ; dev->flags |= IFF_BROADCAST | IFF_MULTICAST; @@ -2150,10 +2177,12 @@ static void ipoib_build_priv(struct net_device *dev) INIT_DELAYED_WORK(&priv->mcast_task, ipoib_mcast_join_task); INIT_WORK(&priv->carrier_on_task, ipoib_mcast_carrier_on_task); + INIT_WORK(&priv->reschedule_napi_work, ipoib_napi_schedule_work); INIT_WORK(&priv->flush_light, ipoib_ib_dev_flush_light); INIT_WORK(&priv->flush_normal, ipoib_ib_dev_flush_normal); INIT_WORK(&priv->flush_heavy, ipoib_ib_dev_flush_heavy); INIT_WORK(&priv->restart_task, ipoib_mcast_restart_task); + INIT_WORK(&priv->tx_timeout_work, ipoib_ib_tx_timeout_work); INIT_DELAYED_WORK(&priv->ah_reap_task, ipoib_reap_ah); INIT_DELAYED_WORK(&priv->neigh_reap_task, ipoib_reap_neigh); } diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c index 5b3154503bf4..319d4288eddd 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c @@ -531,21 +531,18 @@ static int ipoib_mcast_join(struct net_device *dev, struct ipoib_mcast *mcast) if (test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags)) rec.join_state = SENDONLY_FULLMEMBER_JOIN; } - spin_unlock_irq(&priv->lock); multicast = ib_sa_join_multicast(&ipoib_sa_client, priv->ca, priv->port, - &rec, comp_mask, GFP_KERNEL, + &rec, comp_mask, GFP_ATOMIC, ipoib_mcast_join_complete, mcast); - spin_lock_irq(&priv->lock); if (IS_ERR(multicast)) { ret = PTR_ERR(multicast); ipoib_warn(priv, "ib_sa_join_multicast failed, status %d\n", ret); /* Requeue this join task with a backoff delay */ __ipoib_mcast_schedule_join_thread(priv, mcast, 1); clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags); - spin_unlock_irq(&priv->lock); complete(&mcast->done); - spin_lock_irq(&priv->lock); + return ret; } return 0; } diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.h b/drivers/infiniband/ulp/iser/iscsi_iser.h index dee8c97ff056..68429a5f796d 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.h +++ b/drivers/infiniband/ulp/iser/iscsi_iser.h @@ -182,7 +182,7 @@ enum iser_data_dir { * * @sg: pointer to the sg list * @size: num entries of this sg - * @data_len: total beffer byte len + * @data_len: total buffer byte len * @dma_nents: returned by dma_map_sg */ struct iser_data_buf { @@ -299,7 +299,6 @@ struct ib_conn; * * @ib_device: RDMA device * @pd: Protection Domain for this device - * @mr: Global DMA memory region * @event_handler: IB events handle routine * @ig_list: entry in devices list * @refcount: Reference counter, dominated by open iser connections @@ -317,12 +316,10 @@ struct iser_device { * * @mr: memory region * @sig_mr: signature memory region - * @mr_valid: is mr valid indicator */ struct iser_reg_resources { struct ib_mr *mr; struct ib_mr *sig_mr; - u8 mr_valid:1; }; /** @@ -389,7 +386,7 @@ struct ib_conn { * to max number of post recvs * @max_cmds: maximum cmds allowed for this connection * @name: connection peer portal - * @release_work: deffered work for release job + * @release_work: deferred work for release job * @state_mutex: protects iser onnection state * @stop_completion: conn_stop completion * @ib_completion: RDMA cleanup completion diff --git a/drivers/infiniband/ulp/iser/iser_initiator.c b/drivers/infiniband/ulp/iser/iser_initiator.c index 39ea73f69016..f5f090dc4f1e 100644 --- a/drivers/infiniband/ulp/iser/iser_initiator.c +++ b/drivers/infiniband/ulp/iser/iser_initiator.c @@ -581,7 +581,10 @@ static inline int iser_inv_desc(struct iser_fr_desc *desc, u32 rkey) return -EINVAL; } - desc->rsc.mr_valid = 0; + if (desc->sig_protected) + desc->rsc.sig_mr->need_inval = false; + else + desc->rsc.mr->need_inval = false; return 0; } diff --git a/drivers/infiniband/ulp/iser/iser_memory.c b/drivers/infiniband/ulp/iser/iser_memory.c index 29ae2c6a250a..6efcb79c8efe 100644 --- a/drivers/infiniband/ulp/iser/iser_memory.c +++ b/drivers/infiniband/ulp/iser/iser_memory.c @@ -264,7 +264,7 @@ static int iser_reg_sig_mr(struct iscsi_iser_task *iser_task, iser_set_prot_checks(iser_task->sc, &sig_attrs->check_mask); - if (rsc->mr_valid) + if (rsc->sig_mr->need_inval) iser_inv_rkey(&tx_desc->inv_wr, mr, cqe, &wr->wr); ib_update_fast_reg_key(mr, ib_inc_rkey(mr->rkey)); @@ -288,7 +288,7 @@ static int iser_reg_sig_mr(struct iscsi_iser_task *iser_task, wr->access = IB_ACCESS_LOCAL_WRITE | IB_ACCESS_REMOTE_READ | IB_ACCESS_REMOTE_WRITE; - rsc->mr_valid = 1; + rsc->sig_mr->need_inval = true; sig_reg->sge.lkey = mr->lkey; sig_reg->rkey = mr->rkey; @@ -313,7 +313,7 @@ static int iser_fast_reg_mr(struct iscsi_iser_task *iser_task, struct ib_reg_wr *wr = &tx_desc->reg_wr; int n; - if (rsc->mr_valid) + if (rsc->mr->need_inval) iser_inv_rkey(&tx_desc->inv_wr, mr, cqe, &wr->wr); ib_update_fast_reg_key(mr, ib_inc_rkey(mr->rkey)); @@ -336,7 +336,7 @@ static int iser_fast_reg_mr(struct iscsi_iser_task *iser_task, IB_ACCESS_REMOTE_WRITE | IB_ACCESS_REMOTE_READ; - rsc->mr_valid = 1; + rsc->mr->need_inval = true; reg->sge.lkey = mr->lkey; reg->rkey = mr->rkey; diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c index 95b8eebf7e04..6801b70dc9e0 100644 --- a/drivers/infiniband/ulp/iser/iser_verbs.c +++ b/drivers/infiniband/ulp/iser/iser_verbs.c @@ -129,7 +129,6 @@ iser_create_fastreg_desc(struct iser_device *device, goto err_alloc_mr_integrity; } } - desc->rsc.mr_valid = 0; return desc; diff --git a/drivers/infiniband/ulp/rtrs/rtrs-clt.c b/drivers/infiniband/ulp/rtrs/rtrs-clt.c index 7f3167ce2972..88106cf5ce55 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-clt.c +++ b/drivers/infiniband/ulp/rtrs/rtrs-clt.c @@ -1391,9 +1391,9 @@ static int alloc_path_reqs(struct rtrs_clt_path *clt_path) clt_path->max_pages_per_mr); if (IS_ERR(req->mr)) { err = PTR_ERR(req->mr); + pr_err("Failed to alloc clt_path->max_pages_per_mr %d: %pe\n", + clt_path->max_pages_per_mr, req->mr); req->mr = NULL; - pr_err("Failed to alloc clt_path->max_pages_per_mr %d\n", - clt_path->max_pages_per_mr); goto out; } @@ -2025,6 +2025,8 @@ static int rtrs_clt_rdma_cm_handler(struct rdma_cm_id *cm_id, /* * Device removal is a special case. Queue close and return 0. */ + rtrs_wrn_rl(s, "CM event: %s, status: %d\n", rdma_event_msg(ev->event), + ev->status); rtrs_clt_close_conns(clt_path, false); return 0; default: @@ -2058,10 +2060,8 @@ static int create_cm(struct rtrs_clt_con *con) clt_path->s.dst_addr.ss_family == AF_IB ? RDMA_PS_IB : RDMA_PS_TCP, IB_QPT_RC); if (IS_ERR(cm_id)) { - err = PTR_ERR(cm_id); - rtrs_err(s, "Failed to create CM ID, err: %d\n", err); - - return err; + rtrs_err(s, "Failed to create CM ID, err: %pe\n", cm_id); + return PTR_ERR(cm_id); } con->c.cm_id = cm_id; con->cm_err = 0; diff --git a/drivers/infiniband/ulp/rtrs/rtrs.c b/drivers/infiniband/ulp/rtrs/rtrs.c index d80edfffd2e4..4e17d546d4cc 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs.c +++ b/drivers/infiniband/ulp/rtrs/rtrs.c @@ -242,8 +242,8 @@ static int create_cq(struct rtrs_con *con, int cq_vector, int nr_cqe, cq = ib_cq_pool_get(cm_id->device, nr_cqe, cq_vector, poll_ctx); if (IS_ERR(cq)) { - rtrs_err(con->path, "Creating completion queue failed, errno: %ld\n", - PTR_ERR(cq)); + rtrs_err(con->path, "Creating completion queue failed, errno: %pe\n", + cq); return PTR_ERR(cq); } con->cq = cq; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index b4eb17141edf..349e28a6dd8d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -618,13 +618,6 @@ static inline bool mlx5_esw_allowed(const struct mlx5_eswitch *esw) return esw && MLX5_ESWITCH_MANAGER(esw->dev); } -/* The returned number is valid only when the dev is eswitch manager. */ -static inline u16 mlx5_eswitch_manager_vport(struct mlx5_core_dev *dev) -{ - return mlx5_core_is_ecpf_esw_manager(dev) ? - MLX5_VPORT_ECPF : MLX5_VPORT_PF; -} - static inline bool mlx5_esw_is_manager_vport(const struct mlx5_eswitch *esw, u16 vport_num) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/dm.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/dm.c index 9482e51ac82a..7c5516b0a844 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/dm.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/dm.c @@ -13,11 +13,13 @@ struct mlx5_dm { unsigned long *steering_sw_icm_alloc_blocks; unsigned long *header_modify_sw_icm_alloc_blocks; unsigned long *header_modify_pattern_sw_icm_alloc_blocks; + unsigned long *header_encap_sw_icm_alloc_blocks; }; struct mlx5_dm *mlx5_dm_create(struct mlx5_core_dev *dev) { u64 header_modify_pattern_icm_blocks = 0; + u64 header_sw_encap_icm_blocks = 0; u64 header_modify_icm_blocks = 0; u64 steering_icm_blocks = 0; struct mlx5_dm *dm; @@ -54,6 +56,17 @@ struct mlx5_dm *mlx5_dm_create(struct mlx5_core_dev *dev) goto err_modify_hdr; } + if (MLX5_CAP_DEV_MEM(dev, log_indirect_encap_sw_icm_size)) { + header_sw_encap_icm_blocks = + BIT(MLX5_CAP_DEV_MEM(dev, log_indirect_encap_sw_icm_size) - + MLX5_LOG_SW_ICM_BLOCK_SIZE(dev)); + + dm->header_encap_sw_icm_alloc_blocks = + bitmap_zalloc(header_sw_encap_icm_blocks, GFP_KERNEL); + if (!dm->header_encap_sw_icm_alloc_blocks) + goto err_pattern; + } + support_v2 = MLX5_CAP_FLOWTABLE_NIC_RX(dev, sw_owner_v2) && MLX5_CAP_FLOWTABLE_NIC_TX(dev, sw_owner_v2) && MLX5_CAP64_DEV_MEM(dev, header_modify_pattern_sw_icm_start_address); @@ -66,11 +79,14 @@ struct mlx5_dm *mlx5_dm_create(struct mlx5_core_dev *dev) dm->header_modify_pattern_sw_icm_alloc_blocks = bitmap_zalloc(header_modify_pattern_icm_blocks, GFP_KERNEL); if (!dm->header_modify_pattern_sw_icm_alloc_blocks) - goto err_pattern; + goto err_sw_encap; } return dm; +err_sw_encap: + bitmap_free(dm->header_encap_sw_icm_alloc_blocks); + err_pattern: bitmap_free(dm->header_modify_sw_icm_alloc_blocks); @@ -105,6 +121,14 @@ void mlx5_dm_cleanup(struct mlx5_core_dev *dev) bitmap_free(dm->header_modify_sw_icm_alloc_blocks); } + if (dm->header_encap_sw_icm_alloc_blocks) { + WARN_ON(!bitmap_empty(dm->header_encap_sw_icm_alloc_blocks, + BIT(MLX5_CAP_DEV_MEM(dev, + log_indirect_encap_sw_icm_size) - + MLX5_LOG_SW_ICM_BLOCK_SIZE(dev)))); + bitmap_free(dm->header_encap_sw_icm_alloc_blocks); + } + if (dm->header_modify_pattern_sw_icm_alloc_blocks) { WARN_ON(!bitmap_empty(dm->header_modify_pattern_sw_icm_alloc_blocks, BIT(MLX5_CAP_DEV_MEM(dev, @@ -164,6 +188,13 @@ int mlx5_dm_sw_icm_alloc(struct mlx5_core_dev *dev, enum mlx5_sw_icm_type type, log_header_modify_pattern_sw_icm_size); block_map = dm->header_modify_pattern_sw_icm_alloc_blocks; break; + case MLX5_SW_ICM_TYPE_SW_ENCAP: + icm_start_addr = MLX5_CAP64_DEV_MEM(dev, + indirect_encap_sw_icm_start_address); + log_icm_size = MLX5_CAP_DEV_MEM(dev, + log_indirect_encap_sw_icm_size); + block_map = dm->header_encap_sw_icm_alloc_blocks; + break; default: return -EINVAL; } @@ -242,6 +273,11 @@ int mlx5_dm_sw_icm_dealloc(struct mlx5_core_dev *dev, enum mlx5_sw_icm_type type header_modify_pattern_sw_icm_start_address); block_map = dm->header_modify_pattern_sw_icm_alloc_blocks; break; + case MLX5_SW_ICM_TYPE_SW_ENCAP: + icm_start_addr = MLX5_CAP64_DEV_MEM(dev, + indirect_encap_sw_icm_start_address); + block_map = dm->header_encap_sw_icm_alloc_blocks; + break; default: return -EINVAL; } diff --git a/drivers/net/ethernet/microsoft/mana/gdma_main.c b/drivers/net/ethernet/microsoft/mana/gdma_main.c index a6863011d682..d33b27214539 100644 --- a/drivers/net/ethernet/microsoft/mana/gdma_main.c +++ b/drivers/net/ethernet/microsoft/mana/gdma_main.c @@ -158,6 +158,9 @@ static int mana_gd_detect_devices(struct pci_dev *pdev) if (dev_type == GDMA_DEVICE_MANA) { gc->mana.gdma_context = gc; gc->mana.dev_id = dev; + } else if (dev_type == GDMA_DEVICE_MANA_IB) { + gc->mana_ib.dev_id = dev; + gc->mana_ib.gdma_context = gc; } } @@ -967,6 +970,7 @@ int mana_gd_register_device(struct gdma_dev *gd) return 0; } +EXPORT_SYMBOL_NS(mana_gd_register_device, NET_MANA); int mana_gd_deregister_device(struct gdma_dev *gd) { @@ -997,6 +1001,7 @@ int mana_gd_deregister_device(struct gdma_dev *gd) return err; } +EXPORT_SYMBOL_NS(mana_gd_deregister_device, NET_MANA); u32 mana_gd_wq_avail_space(struct gdma_queue *wq) { diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 7ee5b79ff3d6..8c55ff351e5f 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -691,6 +691,7 @@ enum mlx5_sw_icm_type { MLX5_SW_ICM_TYPE_STEERING, MLX5_SW_ICM_TYPE_HEADER_MODIFY, MLX5_SW_ICM_TYPE_HEADER_MODIFY_PATTERN, + MLX5_SW_ICM_TYPE_SW_ENCAP, }; #define MLX5_MAX_RESERVED_GIDS 8 diff --git a/include/linux/mlx5/eswitch.h b/include/linux/mlx5/eswitch.h index 950d2431a53c..df73a2ccc9af 100644 --- a/include/linux/mlx5/eswitch.h +++ b/include/linux/mlx5/eswitch.h @@ -7,6 +7,7 @@ #define _MLX5_ESWITCH_ #include <linux/mlx5/driver.h> +#include <linux/mlx5/vport.h> #include <net/devlink.h> #define MLX5_ESWITCH_MANAGER(mdev) MLX5_CAP_GEN(mdev, eswitch_manager) @@ -210,4 +211,11 @@ static inline bool is_mdev_switchdev_mode(struct mlx5_core_dev *dev) return mlx5_eswitch_mode(dev) == MLX5_ESWITCH_OFFLOADS; } +/* The returned number is valid only when the dev is eswitch manager. */ +static inline u16 mlx5_eswitch_manager_vport(struct mlx5_core_dev *dev) +{ + return mlx5_core_is_ecpf_esw_manager(dev) ? + MLX5_VPORT_ECPF : MLX5_VPORT_PF; +} + #endif diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index fee20fc010c2..7235f3eaed8b 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -1193,7 +1193,8 @@ struct mlx5_ifc_device_mem_cap_bits { u8 log_sw_icm_alloc_granularity[0x6]; u8 log_steering_sw_icm_size[0x8]; - u8 reserved_at_120[0x18]; + u8 log_indirect_encap_sw_icm_size[0x8]; + u8 reserved_at_128[0x10]; u8 log_header_modify_pattern_sw_icm_size[0x8]; u8 header_modify_sw_icm_start_address[0x40]; @@ -1204,7 +1205,11 @@ struct mlx5_ifc_device_mem_cap_bits { u8 memic_operations[0x20]; - u8 reserved_at_220[0x5e0]; + u8 reserved_at_220[0x20]; + + u8 indirect_encap_sw_icm_start_address[0x40]; + + u8 reserved_at_280[0x580]; }; struct mlx5_ifc_device_event_cap_bits { diff --git a/include/net/mana/gdma.h b/include/net/mana/gdma.h index 76f2fd2645b7..27684135bb4d 100644 --- a/include/net/mana/gdma.h +++ b/include/net/mana/gdma.h @@ -66,6 +66,7 @@ enum { GDMA_DEVICE_NONE = 0, GDMA_DEVICE_HWC = 1, GDMA_DEVICE_MANA = 2, + GDMA_DEVICE_MANA_IB = 3, }; struct gdma_resource { @@ -149,6 +150,7 @@ struct gdma_general_req { #define GDMA_MESSAGE_V1 1 #define GDMA_MESSAGE_V2 2 +#define GDMA_MESSAGE_V3 3 struct gdma_general_resp { struct gdma_resp_hdr hdr; @@ -390,6 +392,9 @@ struct gdma_context { /* Azure network adapter */ struct gdma_dev mana; + + /* Azure RDMA adapter */ + struct gdma_dev mana_ib; }; #define MAX_NUM_GDMA_DEVICES 4 diff --git a/include/uapi/rdma/bnxt_re-abi.h b/include/uapi/rdma/bnxt_re-abi.h index 6e7c67a0cca3..c0c34aca90ec 100644 --- a/include/uapi/rdma/bnxt_re-abi.h +++ b/include/uapi/rdma/bnxt_re-abi.h @@ -54,6 +54,8 @@ enum { BNXT_RE_UCNTX_CMASK_HAVE_MODE = 0x02ULL, BNXT_RE_UCNTX_CMASK_WC_DPI_ENABLED = 0x04ULL, BNXT_RE_UCNTX_CMASK_DBR_PACING_ENABLED = 0x08ULL, + BNXT_RE_UCNTX_CMASK_POW2_DISABLED = 0x10ULL, + BNXT_RE_COMP_MASK_UCNTX_HW_RETX_ENABLED = 0x40, }; enum bnxt_re_wqe_mode { @@ -62,6 +64,14 @@ enum bnxt_re_wqe_mode { BNXT_QPLIB_WQE_MODE_INVALID = 0x02, }; +enum { + BNXT_RE_COMP_MASK_REQ_UCNTX_POW2_SUPPORT = 0x01, +}; + +struct bnxt_re_uctx_req { + __aligned_u64 comp_mask; +}; + struct bnxt_re_uctx_resp { __u32 dev_id; __u32 max_qp; @@ -92,11 +102,16 @@ struct bnxt_re_cq_req { __aligned_u64 cq_handle; }; +enum bnxt_re_cq_mask { + BNXT_RE_CQ_TOGGLE_PAGE_SUPPORT = 0x1, +}; + struct bnxt_re_cq_resp { __u32 cqid; __u32 tail; __u32 phase; __u32 rsvd; + __aligned_u64 comp_mask; }; struct bnxt_re_resize_cq_req { @@ -133,6 +148,7 @@ enum bnxt_re_shpg_offt { enum bnxt_re_objects { BNXT_RE_OBJECT_ALLOC_PAGE = (1U << UVERBS_ID_NS_SHIFT), BNXT_RE_OBJECT_NOTIFY_DRV, + BNXT_RE_OBJECT_GET_TOGGLE_MEM, }; enum bnxt_re_alloc_page_type { @@ -161,4 +177,29 @@ enum bnxt_re_alloc_page_methods { enum bnxt_re_notify_drv_methods { BNXT_RE_METHOD_NOTIFY_DRV = (1U << UVERBS_ID_NS_SHIFT), }; + +/* Toggle mem */ + +enum bnxt_re_get_toggle_mem_type { + BNXT_RE_CQ_TOGGLE_MEM = 0, + BNXT_RE_SRQ_TOGGLE_MEM, +}; + +enum bnxt_re_var_toggle_mem_attrs { + BNXT_RE_TOGGLE_MEM_HANDLE = (1U << UVERBS_ID_NS_SHIFT), + BNXT_RE_TOGGLE_MEM_TYPE, + BNXT_RE_TOGGLE_MEM_RES_ID, + BNXT_RE_TOGGLE_MEM_MMAP_PAGE, + BNXT_RE_TOGGLE_MEM_MMAP_OFFSET, + BNXT_RE_TOGGLE_MEM_MMAP_LENGTH, +}; + +enum bnxt_re_toggle_mem_attrs { + BNXT_RE_RELEASE_TOGGLE_MEM_HANDLE = (1U << UVERBS_ID_NS_SHIFT), +}; + +enum bnxt_re_toggle_mem_methods { + BNXT_RE_METHOD_GET_TOGGLE_MEM = (1U << UVERBS_ID_NS_SHIFT), + BNXT_RE_METHOD_RELEASE_TOGGLE_MEM, +}; #endif /* __BNXT_RE_UVERBS_ABI_H__*/ diff --git a/include/uapi/rdma/efa-abi.h b/include/uapi/rdma/efa-abi.h index d94c32f28804..701e2d567e41 100644 --- a/include/uapi/rdma/efa-abi.h +++ b/include/uapi/rdma/efa-abi.h @@ -1,12 +1,13 @@ /* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-2-Clause) */ /* - * Copyright 2018-2023 Amazon.com, Inc. or its affiliates. All rights reserved. + * Copyright 2018-2024 Amazon.com, Inc. or its affiliates. All rights reserved. */ #ifndef EFA_ABI_USER_H #define EFA_ABI_USER_H #include <linux/types.h> +#include <rdma/ib_user_ioctl_cmds.h> /* * Increment this value if any changes that break userspace ABI @@ -134,4 +135,22 @@ struct efa_ibv_ex_query_device_resp { __u32 device_caps; }; +enum { + EFA_QUERY_MR_VALIDITY_RECV_IC_ID = 1 << 0, + EFA_QUERY_MR_VALIDITY_RDMA_READ_IC_ID = 1 << 1, + EFA_QUERY_MR_VALIDITY_RDMA_RECV_IC_ID = 1 << 2, +}; + +enum efa_query_mr_attrs { + EFA_IB_ATTR_QUERY_MR_HANDLE = (1U << UVERBS_ID_NS_SHIFT), + EFA_IB_ATTR_QUERY_MR_RESP_IC_ID_VALIDITY, + EFA_IB_ATTR_QUERY_MR_RESP_RECV_IC_ID, + EFA_IB_ATTR_QUERY_MR_RESP_RDMA_READ_IC_ID, + EFA_IB_ATTR_QUERY_MR_RESP_RDMA_RECV_IC_ID, +}; + +enum efa_mr_methods { + EFA_IB_METHOD_MR_QUERY = (1U << UVERBS_ID_NS_SHIFT), +}; + #endif /* EFA_ABI_USER_H */ diff --git a/include/uapi/rdma/hns-abi.h b/include/uapi/rdma/hns-abi.h index ce0f37f83416..c996e151081e 100644 --- a/include/uapi/rdma/hns-abi.h +++ b/include/uapi/rdma/hns-abi.h @@ -125,4 +125,9 @@ struct hns_roce_ib_alloc_pd_resp { __u32 pdn; }; +struct hns_roce_ib_create_ah_resp { + __u8 dmac[6]; + __u8 reserved[2]; +}; + #endif /* HNS_ABI_USER_H */ diff --git a/include/uapi/rdma/mlx5-abi.h b/include/uapi/rdma/mlx5-abi.h index a96b7d2770e1..d4f6a36dffb0 100644 --- a/include/uapi/rdma/mlx5-abi.h +++ b/include/uapi/rdma/mlx5-abi.h @@ -37,6 +37,7 @@ #include <linux/types.h> #include <linux/if_ether.h> /* For ETH_ALEN. */ #include <rdma/ib_user_ioctl_verbs.h> +#include <rdma/mlx5_user_ioctl_verbs.h> enum { MLX5_QP_FLAG_SIGNATURE = 1 << 0, @@ -275,6 +276,7 @@ struct mlx5_ib_query_device_resp { __u32 tunnel_offloads_caps; /* enum mlx5_ib_tunnel_offloads */ struct mlx5_ib_dci_streams_caps dci_streams_caps; __u16 reserved; + struct mlx5_ib_uapi_reg reg_c0; }; enum mlx5_ib_create_cq_flags { diff --git a/include/uapi/rdma/mlx5_user_ioctl_verbs.h b/include/uapi/rdma/mlx5_user_ioctl_verbs.h index 7af9e09ea556..3189c7f08d17 100644 --- a/include/uapi/rdma/mlx5_user_ioctl_verbs.h +++ b/include/uapi/rdma/mlx5_user_ioctl_verbs.h @@ -64,6 +64,7 @@ enum mlx5_ib_uapi_dm_type { MLX5_IB_UAPI_DM_TYPE_STEERING_SW_ICM, MLX5_IB_UAPI_DM_TYPE_HEADER_MODIFY_SW_ICM, MLX5_IB_UAPI_DM_TYPE_HEADER_MODIFY_PATTERN_SW_ICM, + MLX5_IB_UAPI_DM_TYPE_ENCAP_SW_ICM, }; enum mlx5_ib_uapi_devx_create_event_channel_flags { |