diff options
Diffstat (limited to 'drivers/infiniband/hw')
78 files changed, 2749 insertions, 1221 deletions
diff --git a/drivers/infiniband/hw/cxgb4/Makefile b/drivers/infiniband/hw/cxgb4/Makefile index fa40b685831b..9edd92023e18 100644 --- a/drivers/infiniband/hw/cxgb4/Makefile +++ b/drivers/infiniband/hw/cxgb4/Makefile @@ -3,4 +3,5 @@ ccflags-y += -Idrivers/net/ethernet/chelsio/libcxgb obj-$(CONFIG_INFINIBAND_CXGB4) += iw_cxgb4.o -iw_cxgb4-y := device.o cm.o provider.o mem.o cq.o qp.o resource.o ev.o id_table.o +iw_cxgb4-y := device.o cm.o provider.o mem.o cq.o qp.o resource.o ev.o id_table.o \ + restrack.o diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index 4cf17c650c36..0912fa026327 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -3210,6 +3210,7 @@ int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) ep->com.cm_id = cm_id; ref_cm_id(&ep->com); + cm_id->provider_data = ep; ep->com.dev = dev; ep->com.qp = get_qhp(dev, conn_param->qpn); if (!ep->com.qp) { diff --git a/drivers/infiniband/hw/cxgb4/cq.c b/drivers/infiniband/hw/cxgb4/cq.c index 6f2b26126c64..2be2e1ac1b5f 100644 --- a/drivers/infiniband/hw/cxgb4/cq.c +++ b/drivers/infiniband/hw/cxgb4/cq.c @@ -315,7 +315,7 @@ static void advance_oldest_read(struct t4_wq *wq) * Deal with out-of-order and/or completions that complete * prior unsignalled WRs. */ -void c4iw_flush_hw_cq(struct c4iw_cq *chp) +void c4iw_flush_hw_cq(struct c4iw_cq *chp, struct c4iw_qp *flush_qhp) { struct t4_cqe *hw_cqe, *swcqe, read_cqe; struct c4iw_qp *qhp; @@ -339,6 +339,13 @@ void c4iw_flush_hw_cq(struct c4iw_cq *chp) if (qhp == NULL) goto next_cqe; + if (flush_qhp != qhp) { + spin_lock(&qhp->lock); + + if (qhp->wq.flushed == 1) + goto next_cqe; + } + if (CQE_OPCODE(hw_cqe) == FW_RI_TERMINATE) goto next_cqe; @@ -390,6 +397,8 @@ void c4iw_flush_hw_cq(struct c4iw_cq *chp) next_cqe: t4_hwcq_consume(&chp->cq); ret = t4_next_hw_cqe(&chp->cq, &hw_cqe); + if (qhp && flush_qhp != qhp) + spin_unlock(&qhp->lock); } } diff --git a/drivers/infiniband/hw/cxgb4/device.c b/drivers/infiniband/hw/cxgb4/device.c index feeb8ee6f4a2..44161ca4d2a8 100644 --- a/drivers/infiniband/hw/cxgb4/device.c +++ b/drivers/infiniband/hw/cxgb4/device.c @@ -875,6 +875,11 @@ static int c4iw_rdev_open(struct c4iw_rdev *rdev) rdev->status_page->db_off = 0; + init_completion(&rdev->rqt_compl); + init_completion(&rdev->pbl_compl); + kref_init(&rdev->rqt_kref); + kref_init(&rdev->pbl_kref); + return 0; err_free_status_page_and_wr_log: if (c4iw_wr_log && rdev->wr_log) @@ -893,13 +898,15 @@ destroy_resource: static void c4iw_rdev_close(struct c4iw_rdev *rdev) { - destroy_workqueue(rdev->free_workq); kfree(rdev->wr_log); c4iw_release_dev_ucontext(rdev, &rdev->uctx); free_page((unsigned long)rdev->status_page); c4iw_pblpool_destroy(rdev); c4iw_rqtpool_destroy(rdev); + wait_for_completion(&rdev->pbl_compl); + wait_for_completion(&rdev->rqt_compl); c4iw_ocqp_pool_destroy(rdev); + destroy_workqueue(rdev->free_workq); c4iw_destroy_resource(&rdev->resource); } diff --git a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h index cc929002c05e..870649ff049c 100644 --- a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h +++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h @@ -55,6 +55,7 @@ #include <rdma/iw_cm.h> #include <rdma/rdma_netlink.h> #include <rdma/iw_portmap.h> +#include <rdma/restrack.h> #include "cxgb4.h" #include "cxgb4_uld.h" @@ -185,6 +186,10 @@ struct c4iw_rdev { struct wr_log_entry *wr_log; int wr_log_size; struct workqueue_struct *free_workq; + struct completion rqt_compl; + struct completion pbl_compl; + struct kref rqt_kref; + struct kref pbl_kref; }; static inline int c4iw_fatal_error(struct c4iw_rdev *rdev) @@ -1049,7 +1054,7 @@ u32 c4iw_pblpool_alloc(struct c4iw_rdev *rdev, int size); void c4iw_pblpool_free(struct c4iw_rdev *rdev, u32 addr, int size); u32 c4iw_ocqp_pool_alloc(struct c4iw_rdev *rdev, int size); void c4iw_ocqp_pool_free(struct c4iw_rdev *rdev, u32 addr, int size); -void c4iw_flush_hw_cq(struct c4iw_cq *chp); +void c4iw_flush_hw_cq(struct c4iw_cq *chp, struct c4iw_qp *flush_qhp); void c4iw_count_rcqes(struct t4_cq *cq, struct t4_wq *wq, int *count); int c4iw_ep_disconnect(struct c4iw_ep *ep, int abrupt, gfp_t gfp); int c4iw_flush_rq(struct t4_wq *wq, struct t4_cq *cq, int count); @@ -1078,4 +1083,8 @@ extern int use_dsgl; void c4iw_invalidate_mr(struct c4iw_dev *rhp, u32 rkey); struct c4iw_wr_wait *c4iw_alloc_wr_wait(gfp_t gfp); +typedef int c4iw_restrack_func(struct sk_buff *msg, + struct rdma_restrack_entry *res); +extern c4iw_restrack_func *c4iw_restrack_funcs[RDMA_RESTRACK_MAX]; + #endif diff --git a/drivers/infiniband/hw/cxgb4/mem.c b/drivers/infiniband/hw/cxgb4/mem.c index e90f2fd8dc16..1445918e3239 100644 --- a/drivers/infiniband/hw/cxgb4/mem.c +++ b/drivers/infiniband/hw/cxgb4/mem.c @@ -489,10 +489,10 @@ struct ib_mr *c4iw_get_dma_mr(struct ib_pd *pd, int acc) err_dereg_mem: dereg_mem(&rhp->rdev, mhp->attr.stag, mhp->attr.pbl_size, mhp->attr.pbl_addr, mhp->dereg_skb, mhp->wr_waitp); -err_free_wr_wait: - c4iw_put_wr_wait(mhp->wr_waitp); err_free_skb: kfree_skb(mhp->dereg_skb); +err_free_wr_wait: + c4iw_put_wr_wait(mhp->wr_waitp); err_free_mhp: kfree(mhp); return ERR_PTR(ret); diff --git a/drivers/infiniband/hw/cxgb4/provider.c b/drivers/infiniband/hw/cxgb4/provider.c index 0b9cc73c3ded..1feade8bb4b3 100644 --- a/drivers/infiniband/hw/cxgb4/provider.c +++ b/drivers/infiniband/hw/cxgb4/provider.c @@ -551,6 +551,13 @@ static struct net_device *get_netdev(struct ib_device *dev, u8 port) return ndev; } +static int fill_res_entry(struct sk_buff *msg, struct rdma_restrack_entry *res) +{ + return (res->type < ARRAY_SIZE(c4iw_restrack_funcs) && + c4iw_restrack_funcs[res->type]) ? + c4iw_restrack_funcs[res->type](msg, res) : 0; +} + void c4iw_register_device(struct work_struct *work) { int ret; @@ -645,6 +652,7 @@ void c4iw_register_device(struct work_struct *work) dev->ibdev.iwcm->add_ref = c4iw_qp_add_ref; dev->ibdev.iwcm->rem_ref = c4iw_qp_rem_ref; dev->ibdev.iwcm->get_qp = c4iw_get_qp; + dev->ibdev.res.fill_res_entry = fill_res_entry; memcpy(dev->ibdev.iwcm->ifname, dev->rdev.lldi.ports[0]->name, sizeof(dev->ibdev.iwcm->ifname)); diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c index de77b6027d69..4106eed1b8fb 100644 --- a/drivers/infiniband/hw/cxgb4/qp.c +++ b/drivers/infiniband/hw/cxgb4/qp.c @@ -1297,8 +1297,7 @@ static void post_terminate(struct c4iw_qp *qhp, struct t4_cqe *err_cqe, set_wr_txq(skb, CPL_PRIORITY_DATA, qhp->ep->txq_idx); - wqe = __skb_put(skb, sizeof(*wqe)); - memset(wqe, 0, sizeof *wqe); + wqe = __skb_put_zero(skb, sizeof(*wqe)); wqe->op_compl = cpu_to_be32(FW_WR_OP_V(FW_RI_INIT_WR)); wqe->flowid_len16 = cpu_to_be32( FW_WR_FLOWID_V(qhp->ep->hwtid) | @@ -1343,12 +1342,12 @@ static void __flush_qp(struct c4iw_qp *qhp, struct c4iw_cq *rchp, qhp->wq.flushed = 1; t4_set_wq_in_error(&qhp->wq); - c4iw_flush_hw_cq(rchp); + c4iw_flush_hw_cq(rchp, qhp); c4iw_count_rcqes(&rchp->cq, &qhp->wq, &count); rq_flushed = c4iw_flush_rq(&qhp->wq, &rchp->cq, count); if (schp != rchp) - c4iw_flush_hw_cq(schp); + c4iw_flush_hw_cq(schp, qhp); sq_flushed = c4iw_flush_sq(qhp); spin_unlock(&qhp->lock); @@ -1421,8 +1420,7 @@ static int rdma_fini(struct c4iw_dev *rhp, struct c4iw_qp *qhp, set_wr_txq(skb, CPL_PRIORITY_DATA, ep->txq_idx); - wqe = __skb_put(skb, sizeof(*wqe)); - memset(wqe, 0, sizeof *wqe); + wqe = __skb_put_zero(skb, sizeof(*wqe)); wqe->op_compl = cpu_to_be32( FW_WR_OP_V(FW_RI_INIT_WR) | FW_WR_COMPL_F); @@ -1487,8 +1485,7 @@ static int rdma_init(struct c4iw_dev *rhp, struct c4iw_qp *qhp) } set_wr_txq(skb, CPL_PRIORITY_DATA, qhp->ep->txq_idx); - wqe = __skb_put(skb, sizeof(*wqe)); - memset(wqe, 0, sizeof *wqe); + wqe = __skb_put_zero(skb, sizeof(*wqe)); wqe->op_compl = cpu_to_be32( FW_WR_OP_V(FW_RI_INIT_WR) | FW_WR_COMPL_F); diff --git a/drivers/infiniband/hw/cxgb4/resource.c b/drivers/infiniband/hw/cxgb4/resource.c index 3cf25997ed2b..0ef25ae05e6f 100644 --- a/drivers/infiniband/hw/cxgb4/resource.c +++ b/drivers/infiniband/hw/cxgb4/resource.c @@ -260,12 +260,22 @@ u32 c4iw_pblpool_alloc(struct c4iw_rdev *rdev, int size) rdev->stats.pbl.cur += roundup(size, 1 << MIN_PBL_SHIFT); if (rdev->stats.pbl.cur > rdev->stats.pbl.max) rdev->stats.pbl.max = rdev->stats.pbl.cur; + kref_get(&rdev->pbl_kref); } else rdev->stats.pbl.fail++; mutex_unlock(&rdev->stats.lock); return (u32)addr; } +static void destroy_pblpool(struct kref *kref) +{ + struct c4iw_rdev *rdev; + + rdev = container_of(kref, struct c4iw_rdev, pbl_kref); + gen_pool_destroy(rdev->pbl_pool); + complete(&rdev->pbl_compl); +} + void c4iw_pblpool_free(struct c4iw_rdev *rdev, u32 addr, int size) { pr_debug("addr 0x%x size %d\n", addr, size); @@ -273,6 +283,7 @@ void c4iw_pblpool_free(struct c4iw_rdev *rdev, u32 addr, int size) rdev->stats.pbl.cur -= roundup(size, 1 << MIN_PBL_SHIFT); mutex_unlock(&rdev->stats.lock); gen_pool_free(rdev->pbl_pool, (unsigned long)addr, size); + kref_put(&rdev->pbl_kref, destroy_pblpool); } int c4iw_pblpool_create(struct c4iw_rdev *rdev) @@ -310,7 +321,7 @@ int c4iw_pblpool_create(struct c4iw_rdev *rdev) void c4iw_pblpool_destroy(struct c4iw_rdev *rdev) { - gen_pool_destroy(rdev->pbl_pool); + kref_put(&rdev->pbl_kref, destroy_pblpool); } /* @@ -331,12 +342,22 @@ u32 c4iw_rqtpool_alloc(struct c4iw_rdev *rdev, int size) rdev->stats.rqt.cur += roundup(size << 6, 1 << MIN_RQT_SHIFT); if (rdev->stats.rqt.cur > rdev->stats.rqt.max) rdev->stats.rqt.max = rdev->stats.rqt.cur; + kref_get(&rdev->rqt_kref); } else rdev->stats.rqt.fail++; mutex_unlock(&rdev->stats.lock); return (u32)addr; } +static void destroy_rqtpool(struct kref *kref) +{ + struct c4iw_rdev *rdev; + + rdev = container_of(kref, struct c4iw_rdev, rqt_kref); + gen_pool_destroy(rdev->rqt_pool); + complete(&rdev->rqt_compl); +} + void c4iw_rqtpool_free(struct c4iw_rdev *rdev, u32 addr, int size) { pr_debug("addr 0x%x size %d\n", addr, size << 6); @@ -344,6 +365,7 @@ void c4iw_rqtpool_free(struct c4iw_rdev *rdev, u32 addr, int size) rdev->stats.rqt.cur -= roundup(size << 6, 1 << MIN_RQT_SHIFT); mutex_unlock(&rdev->stats.lock); gen_pool_free(rdev->rqt_pool, (unsigned long)addr, size << 6); + kref_put(&rdev->rqt_kref, destroy_rqtpool); } int c4iw_rqtpool_create(struct c4iw_rdev *rdev) @@ -380,7 +402,7 @@ int c4iw_rqtpool_create(struct c4iw_rdev *rdev) void c4iw_rqtpool_destroy(struct c4iw_rdev *rdev) { - gen_pool_destroy(rdev->rqt_pool); + kref_put(&rdev->rqt_kref, destroy_rqtpool); } /* diff --git a/drivers/infiniband/hw/cxgb4/restrack.c b/drivers/infiniband/hw/cxgb4/restrack.c new file mode 100644 index 000000000000..9a7520ee41e0 --- /dev/null +++ b/drivers/infiniband/hw/cxgb4/restrack.c @@ -0,0 +1,501 @@ +/* + * Copyright (c) 2018 Chelsio, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <rdma/rdma_cm.h> + +#include "iw_cxgb4.h" +#include <rdma/restrack.h> +#include <uapi/rdma/rdma_netlink.h> + +static int fill_sq(struct sk_buff *msg, struct t4_wq *wq) +{ + /* WQ+SQ */ + if (rdma_nl_put_driver_u32(msg, "sqid", wq->sq.qid)) + goto err; + if (rdma_nl_put_driver_u32(msg, "flushed", wq->flushed)) + goto err; + if (rdma_nl_put_driver_u32(msg, "memsize", wq->sq.memsize)) + goto err; + if (rdma_nl_put_driver_u32(msg, "cidx", wq->sq.cidx)) + goto err; + if (rdma_nl_put_driver_u32(msg, "pidx", wq->sq.pidx)) + goto err; + if (rdma_nl_put_driver_u32(msg, "wq_pidx", wq->sq.wq_pidx)) + goto err; + if (rdma_nl_put_driver_u32(msg, "flush_cidx", wq->sq.flush_cidx)) + goto err; + if (rdma_nl_put_driver_u32(msg, "in_use", wq->sq.in_use)) + goto err; + if (rdma_nl_put_driver_u32(msg, "size", wq->sq.size)) + goto err; + if (rdma_nl_put_driver_u32_hex(msg, "flags", wq->sq.flags)) + goto err; + return 0; +err: + return -EMSGSIZE; +} + +static int fill_rq(struct sk_buff *msg, struct t4_wq *wq) +{ + /* RQ */ + if (rdma_nl_put_driver_u32(msg, "rqid", wq->rq.qid)) + goto err; + if (rdma_nl_put_driver_u32(msg, "memsize", wq->rq.memsize)) + goto err; + if (rdma_nl_put_driver_u32(msg, "cidx", wq->rq.cidx)) + goto err; + if (rdma_nl_put_driver_u32(msg, "pidx", wq->rq.pidx)) + goto err; + if (rdma_nl_put_driver_u32(msg, "wq_pidx", wq->rq.wq_pidx)) + goto err; + if (rdma_nl_put_driver_u32(msg, "msn", wq->rq.msn)) + goto err; + if (rdma_nl_put_driver_u32_hex(msg, "rqt_hwaddr", wq->rq.rqt_hwaddr)) + goto err; + if (rdma_nl_put_driver_u32(msg, "rqt_size", wq->rq.rqt_size)) + goto err; + if (rdma_nl_put_driver_u32(msg, "in_use", wq->rq.in_use)) + goto err; + if (rdma_nl_put_driver_u32(msg, "size", wq->rq.size)) + goto err; + return 0; +err: + return -EMSGSIZE; +} + +static int fill_swsqe(struct sk_buff *msg, struct t4_sq *sq, u16 idx, + struct t4_swsqe *sqe) +{ + if (rdma_nl_put_driver_u32(msg, "idx", idx)) + goto err; + if (rdma_nl_put_driver_u32(msg, "opcode", sqe->opcode)) + goto err; + if (rdma_nl_put_driver_u32(msg, "complete", sqe->complete)) + goto err; + if (sqe->complete && + rdma_nl_put_driver_u32(msg, "cqe_status", CQE_STATUS(&sqe->cqe))) + goto err; + if (rdma_nl_put_driver_u32(msg, "signaled", sqe->signaled)) + goto err; + if (rdma_nl_put_driver_u32(msg, "flushed", sqe->flushed)) + goto err; + return 0; +err: + return -EMSGSIZE; +} + +/* + * Dump the first and last pending sqes. + */ +static int fill_swsqes(struct sk_buff *msg, struct t4_sq *sq, + u16 first_idx, struct t4_swsqe *first_sqe, + u16 last_idx, struct t4_swsqe *last_sqe) +{ + if (!first_sqe) + goto out; + if (fill_swsqe(msg, sq, first_idx, first_sqe)) + goto err; + if (!last_sqe) + goto out; + if (fill_swsqe(msg, sq, last_idx, last_sqe)) + goto err; +out: + return 0; +err: + return -EMSGSIZE; +} + +static int fill_res_qp_entry(struct sk_buff *msg, + struct rdma_restrack_entry *res) +{ + struct ib_qp *ibqp = container_of(res, struct ib_qp, res); + struct t4_swsqe *fsp = NULL, *lsp = NULL; + struct c4iw_qp *qhp = to_c4iw_qp(ibqp); + u16 first_sq_idx = 0, last_sq_idx = 0; + struct t4_swsqe first_sqe, last_sqe; + struct nlattr *table_attr; + struct t4_wq wq; + + /* User qp state is not available, so don't dump user qps */ + if (qhp->ucontext) + return 0; + + table_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_DRIVER); + if (!table_attr) + goto err; + + /* Get a consistent snapshot */ + spin_lock_irq(&qhp->lock); + wq = qhp->wq; + + /* If there are any pending sqes, copy the first and last */ + if (wq.sq.cidx != wq.sq.pidx) { + first_sq_idx = wq.sq.cidx; + first_sqe = qhp->wq.sq.sw_sq[first_sq_idx]; + fsp = &first_sqe; + last_sq_idx = wq.sq.pidx; + if (last_sq_idx-- == 0) + last_sq_idx = wq.sq.size - 1; + if (last_sq_idx != first_sq_idx) { + last_sqe = qhp->wq.sq.sw_sq[last_sq_idx]; + lsp = &last_sqe; + } + } + spin_unlock_irq(&qhp->lock); + + if (fill_sq(msg, &wq)) + goto err_cancel_table; + + if (fill_swsqes(msg, &wq.sq, first_sq_idx, fsp, last_sq_idx, lsp)) + goto err_cancel_table; + + if (fill_rq(msg, &wq)) + goto err_cancel_table; + + nla_nest_end(msg, table_attr); + return 0; + +err_cancel_table: + nla_nest_cancel(msg, table_attr); +err: + return -EMSGSIZE; +} + +union union_ep { + struct c4iw_listen_ep lep; + struct c4iw_ep ep; +}; + +static int fill_res_ep_entry(struct sk_buff *msg, + struct rdma_restrack_entry *res) +{ + struct rdma_cm_id *cm_id = rdma_res_to_id(res); + struct nlattr *table_attr; + struct c4iw_ep_common *epcp; + struct c4iw_listen_ep *listen_ep = NULL; + struct c4iw_ep *ep = NULL; + struct iw_cm_id *iw_cm_id; + union union_ep *uep; + + iw_cm_id = rdma_iw_cm_id(cm_id); + if (!iw_cm_id) + return 0; + epcp = (struct c4iw_ep_common *)iw_cm_id->provider_data; + if (!epcp) + return 0; + uep = kcalloc(1, sizeof(*uep), GFP_KERNEL); + if (!uep) + return 0; + + table_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_DRIVER); + if (!table_attr) + goto err_free_uep; + + /* Get a consistent snapshot */ + mutex_lock(&epcp->mutex); + if (epcp->state == LISTEN) { + uep->lep = *(struct c4iw_listen_ep *)epcp; + mutex_unlock(&epcp->mutex); + listen_ep = &uep->lep; + epcp = &listen_ep->com; + } else { + uep->ep = *(struct c4iw_ep *)epcp; + mutex_unlock(&epcp->mutex); + ep = &uep->ep; + epcp = &ep->com; + } + + if (rdma_nl_put_driver_u32(msg, "state", epcp->state)) + goto err_cancel_table; + if (rdma_nl_put_driver_u64_hex(msg, "flags", epcp->flags)) + goto err_cancel_table; + if (rdma_nl_put_driver_u64_hex(msg, "history", epcp->history)) + goto err_cancel_table; + + if (epcp->state == LISTEN) { + if (rdma_nl_put_driver_u32(msg, "stid", listen_ep->stid)) + goto err_cancel_table; + if (rdma_nl_put_driver_u32(msg, "backlog", listen_ep->backlog)) + goto err_cancel_table; + } else { + if (rdma_nl_put_driver_u32(msg, "hwtid", ep->hwtid)) + goto err_cancel_table; + if (rdma_nl_put_driver_u32(msg, "ord", ep->ord)) + goto err_cancel_table; + if (rdma_nl_put_driver_u32(msg, "ird", ep->ird)) + goto err_cancel_table; + if (rdma_nl_put_driver_u32(msg, "emss", ep->emss)) + goto err_cancel_table; + + if (!ep->parent_ep && rdma_nl_put_driver_u32(msg, "atid", + ep->atid)) + goto err_cancel_table; + } + nla_nest_end(msg, table_attr); + kfree(uep); + return 0; + +err_cancel_table: + nla_nest_cancel(msg, table_attr); +err_free_uep: + kfree(uep); + return -EMSGSIZE; +} + +static int fill_cq(struct sk_buff *msg, struct t4_cq *cq) +{ + if (rdma_nl_put_driver_u32(msg, "cqid", cq->cqid)) + goto err; + if (rdma_nl_put_driver_u32(msg, "memsize", cq->memsize)) + goto err; + if (rdma_nl_put_driver_u32(msg, "size", cq->size)) + goto err; + if (rdma_nl_put_driver_u32(msg, "cidx", cq->cidx)) + goto err; + if (rdma_nl_put_driver_u32(msg, "cidx_inc", cq->cidx_inc)) + goto err; + if (rdma_nl_put_driver_u32(msg, "sw_cidx", cq->sw_cidx)) + goto err; + if (rdma_nl_put_driver_u32(msg, "sw_pidx", cq->sw_pidx)) + goto err; + if (rdma_nl_put_driver_u32(msg, "sw_in_use", cq->sw_in_use)) + goto err; + if (rdma_nl_put_driver_u32(msg, "vector", cq->vector)) + goto err; + if (rdma_nl_put_driver_u32(msg, "gen", cq->gen)) + goto err; + if (rdma_nl_put_driver_u32(msg, "error", cq->error)) + goto err; + if (rdma_nl_put_driver_u64_hex(msg, "bits_type_ts", + be64_to_cpu(cq->bits_type_ts))) + goto err; + if (rdma_nl_put_driver_u64_hex(msg, "flags", cq->flags)) + goto err; + + return 0; + +err: + return -EMSGSIZE; +} + +static int fill_cqe(struct sk_buff *msg, struct t4_cqe *cqe, u16 idx, + const char *qstr) +{ + if (rdma_nl_put_driver_u32(msg, qstr, idx)) + goto err; + if (rdma_nl_put_driver_u32_hex(msg, "header", + be32_to_cpu(cqe->header))) + goto err; + if (rdma_nl_put_driver_u32(msg, "len", be32_to_cpu(cqe->len))) + goto err; + if (rdma_nl_put_driver_u32_hex(msg, "wrid_hi", + be32_to_cpu(cqe->u.gen.wrid_hi))) + goto err; + if (rdma_nl_put_driver_u32_hex(msg, "wrid_low", + be32_to_cpu(cqe->u.gen.wrid_low))) + goto err; + if (rdma_nl_put_driver_u64_hex(msg, "bits_type_ts", + be64_to_cpu(cqe->bits_type_ts))) + goto err; + + return 0; + +err: + return -EMSGSIZE; +} + +static int fill_hwcqes(struct sk_buff *msg, struct t4_cq *cq, + struct t4_cqe *cqes) +{ + u16 idx; + + idx = (cq->cidx > 0) ? cq->cidx - 1 : cq->size - 1; + if (fill_cqe(msg, cqes, idx, "hwcq_idx")) + goto err; + idx = cq->cidx; + if (fill_cqe(msg, cqes + 1, idx, "hwcq_idx")) + goto err; + + return 0; +err: + return -EMSGSIZE; +} + +static int fill_swcqes(struct sk_buff *msg, struct t4_cq *cq, + struct t4_cqe *cqes) +{ + u16 idx; + + if (!cq->sw_in_use) + return 0; + + idx = cq->sw_cidx; + if (fill_cqe(msg, cqes, idx, "swcq_idx")) + goto err; + if (cq->sw_in_use == 1) + goto out; + idx = (cq->sw_pidx > 0) ? cq->sw_pidx - 1 : cq->size - 1; + if (fill_cqe(msg, cqes + 1, idx, "swcq_idx")) + goto err; +out: + return 0; +err: + return -EMSGSIZE; +} + +static int fill_res_cq_entry(struct sk_buff *msg, + struct rdma_restrack_entry *res) +{ + struct ib_cq *ibcq = container_of(res, struct ib_cq, res); + struct c4iw_cq *chp = to_c4iw_cq(ibcq); + struct nlattr *table_attr; + struct t4_cqe hwcqes[2]; + struct t4_cqe swcqes[2]; + struct t4_cq cq; + u16 idx; + + /* User cq state is not available, so don't dump user cqs */ + if (ibcq->uobject) + return 0; + + table_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_DRIVER); + if (!table_attr) + goto err; + + /* Get a consistent snapshot */ + spin_lock_irq(&chp->lock); + + /* t4_cq struct */ + cq = chp->cq; + + /* get 2 hw cqes: cidx-1, and cidx */ + idx = (cq.cidx > 0) ? cq.cidx - 1 : cq.size - 1; + hwcqes[0] = chp->cq.queue[idx]; + + idx = cq.cidx; + hwcqes[1] = chp->cq.queue[idx]; + + /* get first and last sw cqes */ + if (cq.sw_in_use) { + swcqes[0] = chp->cq.sw_queue[cq.sw_cidx]; + if (cq.sw_in_use > 1) { + idx = (cq.sw_pidx > 0) ? cq.sw_pidx - 1 : cq.size - 1; + swcqes[1] = chp->cq.sw_queue[idx]; + } + } + + spin_unlock_irq(&chp->lock); + + if (fill_cq(msg, &cq)) + goto err_cancel_table; + + if (fill_swcqes(msg, &cq, swcqes)) + goto err_cancel_table; + + if (fill_hwcqes(msg, &cq, hwcqes)) + goto err_cancel_table; + + nla_nest_end(msg, table_attr); + return 0; + +err_cancel_table: + nla_nest_cancel(msg, table_attr); +err: + return -EMSGSIZE; +} + +static int fill_res_mr_entry(struct sk_buff *msg, + struct rdma_restrack_entry *res) +{ + struct ib_mr *ibmr = container_of(res, struct ib_mr, res); + struct c4iw_mr *mhp = to_c4iw_mr(ibmr); + struct c4iw_dev *dev = mhp->rhp; + u32 stag = mhp->attr.stag; + struct nlattr *table_attr; + struct fw_ri_tpte tpte; + int ret; + + if (!stag) + return 0; + + table_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_DRIVER); + if (!table_attr) + goto err; + + ret = cxgb4_read_tpte(dev->rdev.lldi.ports[0], stag, (__be32 *)&tpte); + if (ret) { + dev_err(&dev->rdev.lldi.pdev->dev, + "%s cxgb4_read_tpte err %d\n", __func__, ret); + return 0; + } + + if (rdma_nl_put_driver_u32_hex(msg, "idx", stag >> 8)) + goto err_cancel_table; + if (rdma_nl_put_driver_u32(msg, "valid", + FW_RI_TPTE_VALID_G(ntohl(tpte.valid_to_pdid)))) + goto err_cancel_table; + if (rdma_nl_put_driver_u32_hex(msg, "key", stag & 0xff)) + goto err_cancel_table; + if (rdma_nl_put_driver_u32(msg, "state", + FW_RI_TPTE_STAGSTATE_G(ntohl(tpte.valid_to_pdid)))) + goto err_cancel_table; + if (rdma_nl_put_driver_u32(msg, "pdid", + FW_RI_TPTE_PDID_G(ntohl(tpte.valid_to_pdid)))) + goto err_cancel_table; + if (rdma_nl_put_driver_u32_hex(msg, "perm", + FW_RI_TPTE_PERM_G(ntohl(tpte.locread_to_qpid)))) + goto err_cancel_table; + if (rdma_nl_put_driver_u32(msg, "ps", + FW_RI_TPTE_PS_G(ntohl(tpte.locread_to_qpid)))) + goto err_cancel_table; + if (rdma_nl_put_driver_u64(msg, "len", + ((u64)ntohl(tpte.len_hi) << 32) | ntohl(tpte.len_lo))) + goto err_cancel_table; + if (rdma_nl_put_driver_u32_hex(msg, "pbl_addr", + FW_RI_TPTE_PBLADDR_G(ntohl(tpte.nosnoop_pbladdr)))) + goto err_cancel_table; + + nla_nest_end(msg, table_attr); + return 0; + +err_cancel_table: + nla_nest_cancel(msg, table_attr); +err: + return -EMSGSIZE; +} + +c4iw_restrack_func *c4iw_restrack_funcs[RDMA_RESTRACK_MAX] = { + [RDMA_RESTRACK_QP] = fill_res_qp_entry, + [RDMA_RESTRACK_CM_ID] = fill_res_ep_entry, + [RDMA_RESTRACK_CQ] = fill_res_cq_entry, + [RDMA_RESTRACK_MR] = fill_res_mr_entry, +}; diff --git a/drivers/infiniband/hw/hfi1/Makefile b/drivers/infiniband/hw/hfi1/Makefile index ce4010bad982..f451ba912f47 100644 --- a/drivers/infiniband/hw/hfi1/Makefile +++ b/drivers/infiniband/hw/hfi1/Makefile @@ -14,7 +14,15 @@ hfi1-y := affinity.o chip.o device.o driver.o efivar.o \ qp.o qsfp.o rc.o ruc.o sdma.o sysfs.o trace.o \ uc.o ud.o user_exp_rcv.o user_pages.o user_sdma.o verbs.o \ verbs_txreq.o vnic_main.o vnic_sdma.o -hfi1-$(CONFIG_DEBUG_FS) += debugfs.o + +ifdef CONFIG_DEBUG_FS +hfi1-y += debugfs.o +ifdef CONFIG_FAULT_INJECTION +ifdef CONFIG_FAULT_INJECTION_DEBUG_FS +hfi1-y += fault.o +endif +endif +endif CFLAGS_trace.o = -I$(src) ifdef MVERSION diff --git a/drivers/infiniband/hw/hfi1/affinity.c b/drivers/infiniband/hw/hfi1/affinity.c index a97055dd4fbd..fbe7198a715a 100644 --- a/drivers/infiniband/hw/hfi1/affinity.c +++ b/drivers/infiniband/hw/hfi1/affinity.c @@ -1,5 +1,5 @@ /* - * Copyright(c) 2015 - 2017 Intel Corporation. + * Copyright(c) 2015 - 2018 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -77,6 +77,58 @@ static inline void init_cpu_mask_set(struct cpu_mask_set *set) set->gen = 0; } +/* Increment generation of CPU set if needed */ +static void _cpu_mask_set_gen_inc(struct cpu_mask_set *set) +{ + if (cpumask_equal(&set->mask, &set->used)) { + /* + * We've used up all the CPUs, bump up the generation + * and reset the 'used' map + */ + set->gen++; + cpumask_clear(&set->used); + } +} + +static void _cpu_mask_set_gen_dec(struct cpu_mask_set *set) +{ + if (cpumask_empty(&set->used) && set->gen) { + set->gen--; + cpumask_copy(&set->used, &set->mask); + } +} + +/* Get the first CPU from the list of unused CPUs in a CPU set data structure */ +static int cpu_mask_set_get_first(struct cpu_mask_set *set, cpumask_var_t diff) +{ + int cpu; + + if (!diff || !set) + return -EINVAL; + + _cpu_mask_set_gen_inc(set); + + /* Find out CPUs left in CPU mask */ + cpumask_andnot(diff, &set->mask, &set->used); + + cpu = cpumask_first(diff); + if (cpu >= nr_cpu_ids) /* empty */ + cpu = -EINVAL; + else + cpumask_set_cpu(cpu, &set->used); + + return cpu; +} + +static void cpu_mask_set_put(struct cpu_mask_set *set, int cpu) +{ + if (!set) + return; + + cpumask_clear_cpu(cpu, &set->used); + _cpu_mask_set_gen_dec(set); +} + /* Initialize non-HT cpu cores mask */ void init_real_cpu_mask(void) { @@ -156,7 +208,13 @@ int node_affinity_init(void) return 0; } -void node_affinity_destroy(void) +static void node_affinity_destroy(struct hfi1_affinity_node *entry) +{ + free_percpu(entry->comp_vect_affinity); + kfree(entry); +} + +void node_affinity_destroy_all(void) { struct list_head *pos, *q; struct hfi1_affinity_node *entry; @@ -166,7 +224,7 @@ void node_affinity_destroy(void) entry = list_entry(pos, struct hfi1_affinity_node, list); list_del(pos); - kfree(entry); + node_affinity_destroy(entry); } mutex_unlock(&node_affinity.lock); kfree(hfi1_per_node_cntr); @@ -180,6 +238,7 @@ static struct hfi1_affinity_node *node_affinity_allocate(int node) if (!entry) return NULL; entry->node = node; + entry->comp_vect_affinity = alloc_percpu(u16); INIT_LIST_HEAD(&entry->list); return entry; @@ -209,6 +268,341 @@ static struct hfi1_affinity_node *node_affinity_lookup(int node) return NULL; } +static int per_cpu_affinity_get(cpumask_var_t possible_cpumask, + u16 __percpu *comp_vect_affinity) +{ + int curr_cpu; + u16 cntr; + u16 prev_cntr; + int ret_cpu; + + if (!possible_cpumask) { + ret_cpu = -EINVAL; + goto fail; + } + + if (!comp_vect_affinity) { + ret_cpu = -EINVAL; + goto fail; + } + + ret_cpu = cpumask_first(possible_cpumask); + if (ret_cpu >= nr_cpu_ids) { + ret_cpu = -EINVAL; + goto fail; + } + + prev_cntr = *per_cpu_ptr(comp_vect_affinity, ret_cpu); + for_each_cpu(curr_cpu, possible_cpumask) { + cntr = *per_cpu_ptr(comp_vect_affinity, curr_cpu); + + if (cntr < prev_cntr) { + ret_cpu = curr_cpu; + prev_cntr = cntr; + } + } + + *per_cpu_ptr(comp_vect_affinity, ret_cpu) += 1; + +fail: + return ret_cpu; +} + +static int per_cpu_affinity_put_max(cpumask_var_t possible_cpumask, + u16 __percpu *comp_vect_affinity) +{ + int curr_cpu; + int max_cpu; + u16 cntr; + u16 prev_cntr; + + if (!possible_cpumask) + return -EINVAL; + + if (!comp_vect_affinity) + return -EINVAL; + + max_cpu = cpumask_first(possible_cpumask); + if (max_cpu >= nr_cpu_ids) + return -EINVAL; + + prev_cntr = *per_cpu_ptr(comp_vect_affinity, max_cpu); + for_each_cpu(curr_cpu, possible_cpumask) { + cntr = *per_cpu_ptr(comp_vect_affinity, curr_cpu); + + if (cntr > prev_cntr) { + max_cpu = curr_cpu; + prev_cntr = cntr; + } + } + + *per_cpu_ptr(comp_vect_affinity, max_cpu) -= 1; + + return max_cpu; +} + +/* + * Non-interrupt CPUs are used first, then interrupt CPUs. + * Two already allocated cpu masks must be passed. + */ +static int _dev_comp_vect_cpu_get(struct hfi1_devdata *dd, + struct hfi1_affinity_node *entry, + cpumask_var_t non_intr_cpus, + cpumask_var_t available_cpus) + __must_hold(&node_affinity.lock) +{ + int cpu; + struct cpu_mask_set *set = dd->comp_vect; + + lockdep_assert_held(&node_affinity.lock); + if (!non_intr_cpus) { + cpu = -1; + goto fail; + } + + if (!available_cpus) { + cpu = -1; + goto fail; + } + + /* Available CPUs for pinning completion vectors */ + _cpu_mask_set_gen_inc(set); + cpumask_andnot(available_cpus, &set->mask, &set->used); + + /* Available CPUs without SDMA engine interrupts */ + cpumask_andnot(non_intr_cpus, available_cpus, + &entry->def_intr.used); + + /* If there are non-interrupt CPUs available, use them first */ + if (!cpumask_empty(non_intr_cpus)) + cpu = cpumask_first(non_intr_cpus); + else /* Otherwise, use interrupt CPUs */ + cpu = cpumask_first(available_cpus); + + if (cpu >= nr_cpu_ids) { /* empty */ + cpu = -1; + goto fail; + } + cpumask_set_cpu(cpu, &set->used); + +fail: + return cpu; +} + +static void _dev_comp_vect_cpu_put(struct hfi1_devdata *dd, int cpu) +{ + struct cpu_mask_set *set = dd->comp_vect; + + if (cpu < 0) + return; + + cpu_mask_set_put(set, cpu); +} + +/* _dev_comp_vect_mappings_destroy() is reentrant */ +static void _dev_comp_vect_mappings_destroy(struct hfi1_devdata *dd) +{ + int i, cpu; + + if (!dd->comp_vect_mappings) + return; + + for (i = 0; i < dd->comp_vect_possible_cpus; i++) { + cpu = dd->comp_vect_mappings[i]; + _dev_comp_vect_cpu_put(dd, cpu); + dd->comp_vect_mappings[i] = -1; + hfi1_cdbg(AFFINITY, + "[%s] Release CPU %d from completion vector %d", + rvt_get_ibdev_name(&(dd)->verbs_dev.rdi), cpu, i); + } + + kfree(dd->comp_vect_mappings); + dd->comp_vect_mappings = NULL; +} + +/* + * This function creates the table for looking up CPUs for completion vectors. + * num_comp_vectors needs to have been initilized before calling this function. + */ +static int _dev_comp_vect_mappings_create(struct hfi1_devdata *dd, + struct hfi1_affinity_node *entry) + __must_hold(&node_affinity.lock) +{ + int i, cpu, ret; + cpumask_var_t non_intr_cpus; + cpumask_var_t available_cpus; + + lockdep_assert_held(&node_affinity.lock); + + if (!zalloc_cpumask_var(&non_intr_cpus, GFP_KERNEL)) + return -ENOMEM; + + if (!zalloc_cpumask_var(&available_cpus, GFP_KERNEL)) { + free_cpumask_var(non_intr_cpus); + return -ENOMEM; + } + + dd->comp_vect_mappings = kcalloc(dd->comp_vect_possible_cpus, + sizeof(*dd->comp_vect_mappings), + GFP_KERNEL); + if (!dd->comp_vect_mappings) { + ret = -ENOMEM; + goto fail; + } + for (i = 0; i < dd->comp_vect_possible_cpus; i++) + dd->comp_vect_mappings[i] = -1; + + for (i = 0; i < dd->comp_vect_possible_cpus; i++) { + cpu = _dev_comp_vect_cpu_get(dd, entry, non_intr_cpus, + available_cpus); + if (cpu < 0) { + ret = -EINVAL; + goto fail; + } + + dd->comp_vect_mappings[i] = cpu; + hfi1_cdbg(AFFINITY, + "[%s] Completion Vector %d -> CPU %d", + rvt_get_ibdev_name(&(dd)->verbs_dev.rdi), i, cpu); + } + + return 0; + +fail: + free_cpumask_var(available_cpus); + free_cpumask_var(non_intr_cpus); + _dev_comp_vect_mappings_destroy(dd); + + return ret; +} + +int hfi1_comp_vectors_set_up(struct hfi1_devdata *dd) +{ + int ret; + struct hfi1_affinity_node *entry; + + mutex_lock(&node_affinity.lock); + entry = node_affinity_lookup(dd->node); + if (!entry) { + ret = -EINVAL; + goto unlock; + } + ret = _dev_comp_vect_mappings_create(dd, entry); +unlock: + mutex_unlock(&node_affinity.lock); + + return ret; +} + +void hfi1_comp_vectors_clean_up(struct hfi1_devdata *dd) +{ + _dev_comp_vect_mappings_destroy(dd); +} + +int hfi1_comp_vect_mappings_lookup(struct rvt_dev_info *rdi, int comp_vect) +{ + struct hfi1_ibdev *verbs_dev = dev_from_rdi(rdi); + struct hfi1_devdata *dd = dd_from_dev(verbs_dev); + + if (!dd->comp_vect_mappings) + return -EINVAL; + if (comp_vect >= dd->comp_vect_possible_cpus) + return -EINVAL; + + return dd->comp_vect_mappings[comp_vect]; +} + +/* + * It assumes dd->comp_vect_possible_cpus is available. + */ +static int _dev_comp_vect_cpu_mask_init(struct hfi1_devdata *dd, + struct hfi1_affinity_node *entry, + bool first_dev_init) + __must_hold(&node_affinity.lock) +{ + int i, j, curr_cpu; + int possible_cpus_comp_vect = 0; + struct cpumask *dev_comp_vect_mask = &dd->comp_vect->mask; + + lockdep_assert_held(&node_affinity.lock); + /* + * If there's only one CPU available for completion vectors, then + * there will only be one completion vector available. Othewise, + * the number of completion vector available will be the number of + * available CPUs divide it by the number of devices in the + * local NUMA node. + */ + if (cpumask_weight(&entry->comp_vect_mask) == 1) { + possible_cpus_comp_vect = 1; + dd_dev_warn(dd, + "Number of kernel receive queues is too large for completion vector affinity to be effective\n"); + } else { + possible_cpus_comp_vect += + cpumask_weight(&entry->comp_vect_mask) / + hfi1_per_node_cntr[dd->node]; + + /* + * If the completion vector CPUs available doesn't divide + * evenly among devices, then the first device device to be + * initialized gets an extra CPU. + */ + if (first_dev_init && + cpumask_weight(&entry->comp_vect_mask) % + hfi1_per_node_cntr[dd->node] != 0) + possible_cpus_comp_vect++; + } + + dd->comp_vect_possible_cpus = possible_cpus_comp_vect; + + /* Reserving CPUs for device completion vector */ + for (i = 0; i < dd->comp_vect_possible_cpus; i++) { + curr_cpu = per_cpu_affinity_get(&entry->comp_vect_mask, + entry->comp_vect_affinity); + if (curr_cpu < 0) + goto fail; + + cpumask_set_cpu(curr_cpu, dev_comp_vect_mask); + } + + hfi1_cdbg(AFFINITY, + "[%s] Completion vector affinity CPU set(s) %*pbl", + rvt_get_ibdev_name(&(dd)->verbs_dev.rdi), + cpumask_pr_args(dev_comp_vect_mask)); + + return 0; + +fail: + for (j = 0; j < i; j++) + per_cpu_affinity_put_max(&entry->comp_vect_mask, + entry->comp_vect_affinity); + + return curr_cpu; +} + +/* + * It assumes dd->comp_vect_possible_cpus is available. + */ +static void _dev_comp_vect_cpu_mask_clean_up(struct hfi1_devdata *dd, + struct hfi1_affinity_node *entry) + __must_hold(&node_affinity.lock) +{ + int i, cpu; + + lockdep_assert_held(&node_affinity.lock); + if (!dd->comp_vect_possible_cpus) + return; + + for (i = 0; i < dd->comp_vect_possible_cpus; i++) { + cpu = per_cpu_affinity_put_max(&dd->comp_vect->mask, + entry->comp_vect_affinity); + /* Clearing CPU in device completion vector cpu mask */ + if (cpu >= 0) + cpumask_clear_cpu(cpu, &dd->comp_vect->mask); + } + + dd->comp_vect_possible_cpus = 0; +} + /* * Interrupt affinity. * @@ -225,7 +619,8 @@ int hfi1_dev_affinity_init(struct hfi1_devdata *dd) int node = pcibus_to_node(dd->pcidev->bus); struct hfi1_affinity_node *entry; const struct cpumask *local_mask; - int curr_cpu, possible, i; + int curr_cpu, possible, i, ret; + bool new_entry = false; if (node < 0) node = numa_node_id(); @@ -247,11 +642,14 @@ int hfi1_dev_affinity_init(struct hfi1_devdata *dd) if (!entry) { dd_dev_err(dd, "Unable to allocate global affinity node\n"); - mutex_unlock(&node_affinity.lock); - return -ENOMEM; + ret = -ENOMEM; + goto fail; } + new_entry = true; + init_cpu_mask_set(&entry->def_intr); init_cpu_mask_set(&entry->rcv_intr); + cpumask_clear(&entry->comp_vect_mask); cpumask_clear(&entry->general_intr_mask); /* Use the "real" cpu mask of this node as the default */ cpumask_and(&entry->def_intr.mask, &node_affinity.real_cpu_mask, @@ -304,10 +702,64 @@ int hfi1_dev_affinity_init(struct hfi1_devdata *dd) &entry->general_intr_mask); } - node_affinity_add_tail(entry); + /* Determine completion vector CPUs for the entire node */ + cpumask_and(&entry->comp_vect_mask, + &node_affinity.real_cpu_mask, local_mask); + cpumask_andnot(&entry->comp_vect_mask, + &entry->comp_vect_mask, + &entry->rcv_intr.mask); + cpumask_andnot(&entry->comp_vect_mask, + &entry->comp_vect_mask, + &entry->general_intr_mask); + + /* + * If there ends up being 0 CPU cores leftover for completion + * vectors, use the same CPU core as the general/control + * context. + */ + if (cpumask_weight(&entry->comp_vect_mask) == 0) + cpumask_copy(&entry->comp_vect_mask, + &entry->general_intr_mask); } + + ret = _dev_comp_vect_cpu_mask_init(dd, entry, new_entry); + if (ret < 0) + goto fail; + + if (new_entry) + node_affinity_add_tail(entry); + mutex_unlock(&node_affinity.lock); + return 0; + +fail: + if (new_entry) + node_affinity_destroy(entry); + mutex_unlock(&node_affinity.lock); + return ret; +} + +void hfi1_dev_affinity_clean_up(struct hfi1_devdata *dd) +{ + struct hfi1_affinity_node *entry; + + if (dd->node < 0) + return; + + mutex_lock(&node_affinity.lock); + entry = node_affinity_lookup(dd->node); + if (!entry) + goto unlock; + + /* + * Free device completion vector CPUs to be used by future + * completion vectors + */ + _dev_comp_vect_cpu_mask_clean_up(dd, entry); +unlock: + mutex_unlock(&node_affinity.lock); + dd->node = -1; } /* @@ -412,7 +864,6 @@ static void hfi1_cleanup_sdma_notifier(struct hfi1_msix_entry *msix) static int get_irq_affinity(struct hfi1_devdata *dd, struct hfi1_msix_entry *msix) { - int ret; cpumask_var_t diff; struct hfi1_affinity_node *entry; struct cpu_mask_set *set = NULL; @@ -424,10 +875,6 @@ static int get_irq_affinity(struct hfi1_devdata *dd, extra[0] = '\0'; cpumask_clear(&msix->mask); - ret = zalloc_cpumask_var(&diff, GFP_KERNEL); - if (!ret) - return -ENOMEM; - entry = node_affinity_lookup(dd->node); switch (msix->type) { @@ -458,17 +905,17 @@ static int get_irq_affinity(struct hfi1_devdata *dd, * finds its CPU here. */ if (cpu == -1 && set) { - if (cpumask_equal(&set->mask, &set->used)) { - /* - * We've used up all the CPUs, bump up the generation - * and reset the 'used' map - */ - set->gen++; - cpumask_clear(&set->used); + if (!zalloc_cpumask_var(&diff, GFP_KERNEL)) + return -ENOMEM; + + cpu = cpu_mask_set_get_first(set, diff); + if (cpu < 0) { + free_cpumask_var(diff); + dd_dev_err(dd, "Failure to obtain CPU for IRQ\n"); + return cpu; } - cpumask_andnot(diff, &set->mask, &set->used); - cpu = cpumask_first(diff); - cpumask_set_cpu(cpu, &set->used); + + free_cpumask_var(diff); } cpumask_set_cpu(cpu, &msix->mask); @@ -482,7 +929,6 @@ static int get_irq_affinity(struct hfi1_devdata *dd, hfi1_setup_sdma_notifier(msix); } - free_cpumask_var(diff); return 0; } @@ -527,10 +973,7 @@ void hfi1_put_irq_affinity(struct hfi1_devdata *dd, if (set) { cpumask_andnot(&set->used, &set->used, &msix->mask); - if (cpumask_empty(&set->used) && set->gen) { - set->gen--; - cpumask_copy(&set->used, &set->mask); - } + _cpu_mask_set_gen_dec(set); } irq_set_affinity_hint(msix->irq, NULL); @@ -641,10 +1084,7 @@ int hfi1_get_proc_affinity(int node) * If we've used all available HW threads, clear the mask and start * overloading. */ - if (cpumask_equal(&set->mask, &set->used)) { - set->gen++; - cpumask_clear(&set->used); - } + _cpu_mask_set_gen_inc(set); /* * If NUMA node has CPUs used by interrupt handlers, include them in the @@ -768,11 +1208,7 @@ void hfi1_put_proc_affinity(int cpu) return; mutex_lock(&affinity->lock); - cpumask_clear_cpu(cpu, &set->used); + cpu_mask_set_put(set, cpu); hfi1_cdbg(PROC, "Returning CPU %d for future process assignment", cpu); - if (cpumask_empty(&set->used) && set->gen) { - set->gen--; - cpumask_copy(&set->used, &set->mask); - } mutex_unlock(&affinity->lock); } diff --git a/drivers/infiniband/hw/hfi1/affinity.h b/drivers/infiniband/hw/hfi1/affinity.h index 2a1e374169c0..6a7e6ea4e426 100644 --- a/drivers/infiniband/hw/hfi1/affinity.h +++ b/drivers/infiniband/hw/hfi1/affinity.h @@ -1,5 +1,5 @@ /* - * Copyright(c) 2015 - 2017 Intel Corporation. + * Copyright(c) 2015 - 2018 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -98,9 +98,11 @@ void hfi1_put_proc_affinity(int cpu); struct hfi1_affinity_node { int node; + u16 __percpu *comp_vect_affinity; struct cpu_mask_set def_intr; struct cpu_mask_set rcv_intr; struct cpumask general_intr_mask; + struct cpumask comp_vect_mask; struct list_head list; }; @@ -116,7 +118,11 @@ struct hfi1_affinity_node_list { }; int node_affinity_init(void); -void node_affinity_destroy(void); +void node_affinity_destroy_all(void); extern struct hfi1_affinity_node_list node_affinity; +void hfi1_dev_affinity_clean_up(struct hfi1_devdata *dd); +int hfi1_comp_vect_mappings_lookup(struct rvt_dev_info *rdi, int comp_vect); +int hfi1_comp_vectors_set_up(struct hfi1_devdata *dd); +void hfi1_comp_vectors_clean_up(struct hfi1_devdata *dd); #endif /* _HFI1_AFFINITY_H */ diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index e6a60fa59f2b..68580cb2ae1e 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -1,5 +1,5 @@ /* - * Copyright(c) 2015 - 2017 Intel Corporation. + * Copyright(c) 2015 - 2018 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -65,6 +65,7 @@ #include "aspm.h" #include "affinity.h" #include "debugfs.h" +#include "fault.h" #define NUM_IB_PORTS 1 @@ -1032,8 +1033,8 @@ static void read_vc_remote_fabric(struct hfi1_devdata *dd, u8 *vau, u8 *z, u8 *vcu, u16 *vl15buf, u8 *crc_sizes); static void read_vc_remote_link_width(struct hfi1_devdata *dd, u8 *remote_tx_rate, u16 *link_widths); -static void read_vc_local_link_width(struct hfi1_devdata *dd, u8 *misc_bits, - u8 *flag_bits, u16 *link_widths); +static void read_vc_local_link_mode(struct hfi1_devdata *dd, u8 *misc_bits, + u8 *flag_bits, u16 *link_widths); static void read_remote_device_id(struct hfi1_devdata *dd, u16 *device_id, u8 *device_rev); static void read_local_lni(struct hfi1_devdata *dd, u8 *enable_lane_rx); @@ -5944,6 +5945,7 @@ static void is_sendctxt_err_int(struct hfi1_devdata *dd, u64 status; u32 sw_index; int i = 0; + unsigned long irq_flags; sw_index = dd->hw_to_sw[hw_context]; if (sw_index >= dd->num_send_contexts) { @@ -5953,10 +5955,12 @@ static void is_sendctxt_err_int(struct hfi1_devdata *dd, return; } sci = &dd->send_contexts[sw_index]; + spin_lock_irqsave(&dd->sc_lock, irq_flags); sc = sci->sc; if (!sc) { dd_dev_err(dd, "%s: context %u(%u): no sc?\n", __func__, sw_index, hw_context); + spin_unlock_irqrestore(&dd->sc_lock, irq_flags); return; } @@ -5978,6 +5982,7 @@ static void is_sendctxt_err_int(struct hfi1_devdata *dd, */ if (sc->type != SC_USER) queue_work(dd->pport->hfi1_wq, &sc->halt_work); + spin_unlock_irqrestore(&dd->sc_lock, irq_flags); /* * Update the counters for the corresponding status bits. @@ -6351,6 +6356,18 @@ static void handle_8051_request(struct hfi1_pportdata *ppd) type); hreq_response(dd, HREQ_NOT_SUPPORTED, 0); break; + case HREQ_LCB_RESET: + /* Put the LCB, RX FPE and TX FPE into reset */ + write_csr(dd, DCC_CFG_RESET, LCB_RX_FPE_TX_FPE_INTO_RESET); + /* Make sure the write completed */ + (void)read_csr(dd, DCC_CFG_RESET); + /* Hold the reset long enough to take effect */ + udelay(1); + /* Take the LCB, RX FPE and TX FPE out of reset */ + write_csr(dd, DCC_CFG_RESET, LCB_RX_FPE_TX_FPE_OUT_OF_RESET); + hreq_response(dd, HREQ_SUCCESS, 0); + + break; case HREQ_CONFIG_DONE: hreq_response(dd, HREQ_SUCCESS, 0); break; @@ -6461,8 +6478,7 @@ static void lcb_shutdown(struct hfi1_devdata *dd, int abort) dd->lcb_err_en = read_csr(dd, DC_LCB_ERR_EN); reg = read_csr(dd, DCC_CFG_RESET); write_csr(dd, DCC_CFG_RESET, reg | - (1ull << DCC_CFG_RESET_RESET_LCB_SHIFT) | - (1ull << DCC_CFG_RESET_RESET_RX_FPE_SHIFT)); + DCC_CFG_RESET_RESET_LCB | DCC_CFG_RESET_RESET_RX_FPE); (void)read_csr(dd, DCC_CFG_RESET); /* make sure the write completed */ if (!abort) { udelay(1); /* must hold for the longer of 16cclks or 20ns */ @@ -6527,7 +6543,7 @@ static void _dc_start(struct hfi1_devdata *dd) __func__); /* Take away reset for LCB and RX FPE (set in lcb_shutdown). */ - write_csr(dd, DCC_CFG_RESET, 0x10); + write_csr(dd, DCC_CFG_RESET, LCB_RX_FPE_TX_FPE_OUT_OF_RESET); /* lcb_shutdown() with abort=1 does not restore these */ write_csr(dd, DC_LCB_ERR_EN, dd->lcb_err_en); dd->dc_shutdown = 0; @@ -7348,7 +7364,7 @@ static void get_linkup_widths(struct hfi1_devdata *dd, u16 *tx_width, u8 misc_bits, local_flags; u16 active_tx, active_rx; - read_vc_local_link_width(dd, &misc_bits, &local_flags, &widths); + read_vc_local_link_mode(dd, &misc_bits, &local_flags, &widths); tx = widths >> 12; rx = (widths >> 8) & 0xf; @@ -8820,29 +8836,29 @@ static int write_vc_local_fabric(struct hfi1_devdata *dd, u8 vau, u8 z, u8 vcu, GENERAL_CONFIG, frame); } -static void read_vc_local_link_width(struct hfi1_devdata *dd, u8 *misc_bits, - u8 *flag_bits, u16 *link_widths) +static void read_vc_local_link_mode(struct hfi1_devdata *dd, u8 *misc_bits, + u8 *flag_bits, u16 *link_widths) { u32 frame; - read_8051_config(dd, VERIFY_CAP_LOCAL_LINK_WIDTH, GENERAL_CONFIG, + read_8051_config(dd, VERIFY_CAP_LOCAL_LINK_MODE, GENERAL_CONFIG, &frame); *misc_bits = (frame >> MISC_CONFIG_BITS_SHIFT) & MISC_CONFIG_BITS_MASK; *flag_bits = (frame >> LOCAL_FLAG_BITS_SHIFT) & LOCAL_FLAG_BITS_MASK; *link_widths = (frame >> LINK_WIDTH_SHIFT) & LINK_WIDTH_MASK; } -static int write_vc_local_link_width(struct hfi1_devdata *dd, - u8 misc_bits, - u8 flag_bits, - u16 link_widths) +static int write_vc_local_link_mode(struct hfi1_devdata *dd, + u8 misc_bits, + u8 flag_bits, + u16 link_widths) { u32 frame; frame = (u32)misc_bits << MISC_CONFIG_BITS_SHIFT | (u32)flag_bits << LOCAL_FLAG_BITS_SHIFT | (u32)link_widths << LINK_WIDTH_SHIFT; - return load_8051_config(dd, VERIFY_CAP_LOCAL_LINK_WIDTH, GENERAL_CONFIG, + return load_8051_config(dd, VERIFY_CAP_LOCAL_LINK_MODE, GENERAL_CONFIG, frame); } @@ -9312,8 +9328,16 @@ static int set_local_link_attributes(struct hfi1_pportdata *ppd) if (loopback == LOOPBACK_SERDES) misc_bits |= 1 << LOOPBACK_SERDES_CONFIG_BIT_MASK_SHIFT; - ret = write_vc_local_link_width(dd, misc_bits, 0, - opa_to_vc_link_widths( + /* + * An external device configuration request is used to reset the LCB + * to retry to obtain operational lanes when the first attempt is + * unsuccesful. + */ + if (dd->dc8051_ver >= dc8051_ver(1, 25, 0)) + misc_bits |= 1 << EXT_CFG_LCB_RESET_SUPPORTED_SHIFT; + + ret = write_vc_local_link_mode(dd, misc_bits, 0, + opa_to_vc_link_widths( ppd->link_width_enabled)); if (ret != HCMD_SUCCESS) goto set_local_link_attributes_fail; @@ -10491,9 +10515,9 @@ u32 driver_pstate(struct hfi1_pportdata *ppd) case HLS_DN_OFFLINE: return OPA_PORTPHYSSTATE_OFFLINE; case HLS_VERIFY_CAP: - return IB_PORTPHYSSTATE_POLLING; + return IB_PORTPHYSSTATE_TRAINING; case HLS_GOING_UP: - return IB_PORTPHYSSTATE_POLLING; + return IB_PORTPHYSSTATE_TRAINING; case HLS_GOING_OFFLINE: return OPA_PORTPHYSSTATE_OFFLINE; case HLS_LINK_COOLDOWN: @@ -15018,13 +15042,6 @@ struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev, if (ret < 0) goto bail_cleanup; - /* verify that reads actually work, save revision for reset check */ - dd->revision = read_csr(dd, CCE_REVISION); - if (dd->revision == ~(u64)0) { - dd_dev_err(dd, "cannot read chip CSRs\n"); - ret = -EINVAL; - goto bail_cleanup; - } dd->majrev = (dd->revision >> CCE_REVISION_CHIP_REV_MAJOR_SHIFT) & CCE_REVISION_CHIP_REV_MAJOR_MASK; dd->minrev = (dd->revision >> CCE_REVISION_CHIP_REV_MINOR_SHIFT) @@ -15220,6 +15237,10 @@ struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev, if (ret) goto bail_cleanup; + ret = hfi1_comp_vectors_set_up(dd); + if (ret) + goto bail_clear_intr; + /* set up LCB access - must be after set_up_interrupts() */ init_lcb_access(dd); @@ -15262,6 +15283,7 @@ bail_free_rcverr: bail_free_cntrs: free_cntrs(dd); bail_clear_intr: + hfi1_comp_vectors_clean_up(dd); hfi1_clean_up_interrupts(dd); bail_cleanup: hfi1_pcie_ddcleanup(dd); diff --git a/drivers/infiniband/hw/hfi1/chip.h b/drivers/infiniband/hw/hfi1/chip.h index c0d70f255050..fdf389e46e19 100644 --- a/drivers/infiniband/hw/hfi1/chip.h +++ b/drivers/infiniband/hw/hfi1/chip.h @@ -196,6 +196,15 @@ #define LSTATE_ARMED 0x3 #define LSTATE_ACTIVE 0x4 +/* DCC_CFG_RESET reset states */ +#define LCB_RX_FPE_TX_FPE_INTO_RESET (DCC_CFG_RESET_RESET_LCB | \ + DCC_CFG_RESET_RESET_TX_FPE | \ + DCC_CFG_RESET_RESET_RX_FPE | \ + DCC_CFG_RESET_ENABLE_CCLK_BCC) + /* 0x17 */ + +#define LCB_RX_FPE_TX_FPE_OUT_OF_RESET DCC_CFG_RESET_ENABLE_CCLK_BCC /* 0x10 */ + /* DC8051_STS_CUR_STATE port values (physical link states) */ #define PLS_DISABLED 0x30 #define PLS_OFFLINE 0x90 @@ -283,6 +292,7 @@ #define HREQ_SET_TX_EQ_ABS 0x04 #define HREQ_SET_TX_EQ_REL 0x05 #define HREQ_ENABLE 0x06 +#define HREQ_LCB_RESET 0x07 #define HREQ_CONFIG_DONE 0xfe #define HREQ_INTERFACE_TEST 0xff @@ -383,7 +393,7 @@ #define TX_SETTINGS 0x06 #define VERIFY_CAP_LOCAL_PHY 0x07 #define VERIFY_CAP_LOCAL_FABRIC 0x08 -#define VERIFY_CAP_LOCAL_LINK_WIDTH 0x09 +#define VERIFY_CAP_LOCAL_LINK_MODE 0x09 #define LOCAL_DEVICE_ID 0x0a #define RESERVED_REGISTERS 0x0b #define LOCAL_LNI_INFO 0x0c @@ -584,8 +594,9 @@ enum { #define LOOPBACK_LCB 2 #define LOOPBACK_CABLE 3 /* external cable */ -/* set up serdes bit in MISC_CONFIG_BITS */ +/* set up bits in MISC_CONFIG_BITS */ #define LOOPBACK_SERDES_CONFIG_BIT_MASK_SHIFT 0 +#define EXT_CFG_LCB_RESET_SUPPORTED_SHIFT 3 /* read and write hardware registers */ u64 read_csr(const struct hfi1_devdata *dd, u32 offset); diff --git a/drivers/infiniband/hw/hfi1/chip_registers.h b/drivers/infiniband/hw/hfi1/chip_registers.h index 793514f1d15f..da598b5fe8f6 100644 --- a/drivers/infiniband/hw/hfi1/chip_registers.h +++ b/drivers/infiniband/hw/hfi1/chip_registers.h @@ -97,8 +97,11 @@ #define DCC_CFG_PORT_CONFIG_MTU_CAP_SHIFT 32 #define DCC_CFG_PORT_CONFIG_MTU_CAP_SMASK 0x700000000ull #define DCC_CFG_RESET (DCC_CSRS + 0x000000000000) -#define DCC_CFG_RESET_RESET_LCB_SHIFT 0 -#define DCC_CFG_RESET_RESET_RX_FPE_SHIFT 2 +#define DCC_CFG_RESET_RESET_LCB BIT_ULL(0) +#define DCC_CFG_RESET_RESET_TX_FPE BIT_ULL(1) +#define DCC_CFG_RESET_RESET_RX_FPE BIT_ULL(2) +#define DCC_CFG_RESET_RESET_8051 BIT_ULL(3) +#define DCC_CFG_RESET_ENABLE_CCLK_BCC BIT_ULL(4) #define DCC_CFG_SC_VL_TABLE_15_0 (DCC_CSRS + 0x000000000028) #define DCC_CFG_SC_VL_TABLE_15_0_ENTRY0_SHIFT 0 #define DCC_CFG_SC_VL_TABLE_15_0_ENTRY10_SHIFT 40 diff --git a/drivers/infiniband/hw/hfi1/debugfs.c b/drivers/infiniband/hw/hfi1/debugfs.c index 852173bf05d0..9f992ae36c89 100644 --- a/drivers/infiniband/hw/hfi1/debugfs.c +++ b/drivers/infiniband/hw/hfi1/debugfs.c @@ -60,15 +60,13 @@ #include "device.h" #include "qp.h" #include "sdma.h" +#include "fault.h" static struct dentry *hfi1_dbg_root; /* wrappers to enforce srcu in seq file */ -static ssize_t hfi1_seq_read( - struct file *file, - char __user *buf, - size_t size, - loff_t *ppos) +ssize_t hfi1_seq_read(struct file *file, char __user *buf, size_t size, + loff_t *ppos) { struct dentry *d = file->f_path.dentry; ssize_t r; @@ -81,10 +79,7 @@ static ssize_t hfi1_seq_read( return r; } -static loff_t hfi1_seq_lseek( - struct file *file, - loff_t offset, - int whence) +loff_t hfi1_seq_lseek(struct file *file, loff_t offset, int whence) { struct dentry *d = file->f_path.dentry; loff_t r; @@ -100,48 +95,6 @@ static loff_t hfi1_seq_lseek( #define private2dd(file) (file_inode(file)->i_private) #define private2ppd(file) (file_inode(file)->i_private) -#define DEBUGFS_SEQ_FILE_OPS(name) \ -static const struct seq_operations _##name##_seq_ops = { \ - .start = _##name##_seq_start, \ - .next = _##name##_seq_next, \ - .stop = _##name##_seq_stop, \ - .show = _##name##_seq_show \ -} - -#define DEBUGFS_SEQ_FILE_OPEN(name) \ -static int _##name##_open(struct inode *inode, struct file *s) \ -{ \ - struct seq_file *seq; \ - int ret; \ - ret = seq_open(s, &_##name##_seq_ops); \ - if (ret) \ - return ret; \ - seq = s->private_data; \ - seq->private = inode->i_private; \ - return 0; \ -} - -#define DEBUGFS_FILE_OPS(name) \ -static const struct file_operations _##name##_file_ops = { \ - .owner = THIS_MODULE, \ - .open = _##name##_open, \ - .read = hfi1_seq_read, \ - .llseek = hfi1_seq_lseek, \ - .release = seq_release \ -} - -#define DEBUGFS_FILE_CREATE(name, parent, data, ops, mode) \ -do { \ - struct dentry *ent; \ - ent = debugfs_create_file(name, mode, parent, \ - data, ops); \ - if (!ent) \ - pr_warn("create of %s failed\n", name); \ -} while (0) - -#define DEBUGFS_SEQ_FILE_CREATE(name, parent, data) \ - DEBUGFS_FILE_CREATE(#name, parent, data, &_##name##_file_ops, S_IRUGO) - static void *_opcode_stats_seq_start(struct seq_file *s, loff_t *pos) { struct hfi1_opcode_stats_perctx *opstats; @@ -1160,232 +1113,6 @@ DEBUGFS_SEQ_FILE_OPS(sdma_cpu_list); DEBUGFS_SEQ_FILE_OPEN(sdma_cpu_list) DEBUGFS_FILE_OPS(sdma_cpu_list); -#ifdef CONFIG_FAULT_INJECTION -static void *_fault_stats_seq_start(struct seq_file *s, loff_t *pos) -{ - struct hfi1_opcode_stats_perctx *opstats; - - if (*pos >= ARRAY_SIZE(opstats->stats)) - return NULL; - return pos; -} - -static void *_fault_stats_seq_next(struct seq_file *s, void *v, loff_t *pos) -{ - struct hfi1_opcode_stats_perctx *opstats; - - ++*pos; - if (*pos >= ARRAY_SIZE(opstats->stats)) - return NULL; - return pos; -} - -static void _fault_stats_seq_stop(struct seq_file *s, void *v) -{ -} - -static int _fault_stats_seq_show(struct seq_file *s, void *v) -{ - loff_t *spos = v; - loff_t i = *spos, j; - u64 n_packets = 0, n_bytes = 0; - struct hfi1_ibdev *ibd = (struct hfi1_ibdev *)s->private; - struct hfi1_devdata *dd = dd_from_dev(ibd); - struct hfi1_ctxtdata *rcd; - - for (j = 0; j < dd->first_dyn_alloc_ctxt; j++) { - rcd = hfi1_rcd_get_by_index(dd, j); - if (rcd) { - n_packets += rcd->opstats->stats[i].n_packets; - n_bytes += rcd->opstats->stats[i].n_bytes; - } - hfi1_rcd_put(rcd); - } - for_each_possible_cpu(j) { - struct hfi1_opcode_stats_perctx *sp = - per_cpu_ptr(dd->tx_opstats, j); - - n_packets += sp->stats[i].n_packets; - n_bytes += sp->stats[i].n_bytes; - } - if (!n_packets && !n_bytes) - return SEQ_SKIP; - if (!ibd->fault_opcode->n_rxfaults[i] && - !ibd->fault_opcode->n_txfaults[i]) - return SEQ_SKIP; - seq_printf(s, "%02llx %llu/%llu (faults rx:%llu faults: tx:%llu)\n", i, - (unsigned long long)n_packets, - (unsigned long long)n_bytes, - (unsigned long long)ibd->fault_opcode->n_rxfaults[i], - (unsigned long long)ibd->fault_opcode->n_txfaults[i]); - return 0; -} - -DEBUGFS_SEQ_FILE_OPS(fault_stats); -DEBUGFS_SEQ_FILE_OPEN(fault_stats); -DEBUGFS_FILE_OPS(fault_stats); - -static void fault_exit_opcode_debugfs(struct hfi1_ibdev *ibd) -{ - debugfs_remove_recursive(ibd->fault_opcode->dir); - kfree(ibd->fault_opcode); - ibd->fault_opcode = NULL; -} - -static int fault_init_opcode_debugfs(struct hfi1_ibdev *ibd) -{ - struct dentry *parent = ibd->hfi1_ibdev_dbg; - - ibd->fault_opcode = kzalloc(sizeof(*ibd->fault_opcode), GFP_KERNEL); - if (!ibd->fault_opcode) - return -ENOMEM; - - ibd->fault_opcode->attr.interval = 1; - ibd->fault_opcode->attr.require_end = ULONG_MAX; - ibd->fault_opcode->attr.stacktrace_depth = 32; - ibd->fault_opcode->attr.dname = NULL; - ibd->fault_opcode->attr.verbose = 0; - ibd->fault_opcode->fault_by_opcode = false; - ibd->fault_opcode->opcode = 0; - ibd->fault_opcode->mask = 0xff; - - ibd->fault_opcode->dir = - fault_create_debugfs_attr("fault_opcode", - parent, - &ibd->fault_opcode->attr); - if (IS_ERR(ibd->fault_opcode->dir)) { - kfree(ibd->fault_opcode); - return -ENOENT; - } - - DEBUGFS_SEQ_FILE_CREATE(fault_stats, ibd->fault_opcode->dir, ibd); - if (!debugfs_create_bool("fault_by_opcode", 0600, - ibd->fault_opcode->dir, - &ibd->fault_opcode->fault_by_opcode)) - goto fail; - if (!debugfs_create_x8("opcode", 0600, ibd->fault_opcode->dir, - &ibd->fault_opcode->opcode)) - goto fail; - if (!debugfs_create_x8("mask", 0600, ibd->fault_opcode->dir, - &ibd->fault_opcode->mask)) - goto fail; - - return 0; -fail: - fault_exit_opcode_debugfs(ibd); - return -ENOMEM; -} - -static void fault_exit_packet_debugfs(struct hfi1_ibdev *ibd) -{ - debugfs_remove_recursive(ibd->fault_packet->dir); - kfree(ibd->fault_packet); - ibd->fault_packet = NULL; -} - -static int fault_init_packet_debugfs(struct hfi1_ibdev *ibd) -{ - struct dentry *parent = ibd->hfi1_ibdev_dbg; - - ibd->fault_packet = kzalloc(sizeof(*ibd->fault_packet), GFP_KERNEL); - if (!ibd->fault_packet) - return -ENOMEM; - - ibd->fault_packet->attr.interval = 1; - ibd->fault_packet->attr.require_end = ULONG_MAX; - ibd->fault_packet->attr.stacktrace_depth = 32; - ibd->fault_packet->attr.dname = NULL; - ibd->fault_packet->attr.verbose = 0; - ibd->fault_packet->fault_by_packet = false; - - ibd->fault_packet->dir = - fault_create_debugfs_attr("fault_packet", - parent, - &ibd->fault_opcode->attr); - if (IS_ERR(ibd->fault_packet->dir)) { - kfree(ibd->fault_packet); - return -ENOENT; - } - - if (!debugfs_create_bool("fault_by_packet", 0600, - ibd->fault_packet->dir, - &ibd->fault_packet->fault_by_packet)) - goto fail; - if (!debugfs_create_u64("fault_stats", 0400, - ibd->fault_packet->dir, - &ibd->fault_packet->n_faults)) - goto fail; - - return 0; -fail: - fault_exit_packet_debugfs(ibd); - return -ENOMEM; -} - -static void fault_exit_debugfs(struct hfi1_ibdev *ibd) -{ - fault_exit_opcode_debugfs(ibd); - fault_exit_packet_debugfs(ibd); -} - -static int fault_init_debugfs(struct hfi1_ibdev *ibd) -{ - int ret = 0; - - ret = fault_init_opcode_debugfs(ibd); - if (ret) - return ret; - - ret = fault_init_packet_debugfs(ibd); - if (ret) - fault_exit_opcode_debugfs(ibd); - - return ret; -} - -bool hfi1_dbg_fault_suppress_err(struct hfi1_ibdev *ibd) -{ - return ibd->fault_suppress_err; -} - -bool hfi1_dbg_fault_opcode(struct rvt_qp *qp, u32 opcode, bool rx) -{ - bool ret = false; - struct hfi1_ibdev *ibd = to_idev(qp->ibqp.device); - - if (!ibd->fault_opcode || !ibd->fault_opcode->fault_by_opcode) - return false; - if (ibd->fault_opcode->opcode != (opcode & ibd->fault_opcode->mask)) - return false; - ret = should_fail(&ibd->fault_opcode->attr, 1); - if (ret) { - trace_hfi1_fault_opcode(qp, opcode); - if (rx) - ibd->fault_opcode->n_rxfaults[opcode]++; - else - ibd->fault_opcode->n_txfaults[opcode]++; - } - return ret; -} - -bool hfi1_dbg_fault_packet(struct hfi1_packet *packet) -{ - struct rvt_dev_info *rdi = &packet->rcd->ppd->dd->verbs_dev.rdi; - struct hfi1_ibdev *ibd = dev_from_rdi(rdi); - bool ret = false; - - if (!ibd->fault_packet || !ibd->fault_packet->fault_by_packet) - return false; - - ret = should_fail(&ibd->fault_packet->attr, 1); - if (ret) { - ++ibd->fault_packet->n_faults; - trace_hfi1_fault_packet(packet); - } - return ret; -} -#endif - void hfi1_dbg_ibdev_init(struct hfi1_ibdev *ibd) { char name[sizeof("port0counters") + 1]; @@ -1438,21 +1165,14 @@ void hfi1_dbg_ibdev_init(struct hfi1_ibdev *ibd) S_IRUGO : S_IRUGO | S_IWUSR); } -#ifdef CONFIG_FAULT_INJECTION - debugfs_create_bool("fault_suppress_err", 0600, - ibd->hfi1_ibdev_dbg, - &ibd->fault_suppress_err); - fault_init_debugfs(ibd); -#endif + hfi1_fault_init_debugfs(ibd); } void hfi1_dbg_ibdev_exit(struct hfi1_ibdev *ibd) { if (!hfi1_dbg_root) goto out; -#ifdef CONFIG_FAULT_INJECTION - fault_exit_debugfs(ibd); -#endif + hfi1_fault_exit_debugfs(ibd); debugfs_remove(ibd->hfi1_ibdev_link); debugfs_remove_recursive(ibd->hfi1_ibdev_dbg); out: diff --git a/drivers/infiniband/hw/hfi1/debugfs.h b/drivers/infiniband/hw/hfi1/debugfs.h index 38c38a98156d..d5d824459fcc 100644 --- a/drivers/infiniband/hw/hfi1/debugfs.h +++ b/drivers/infiniband/hw/hfi1/debugfs.h @@ -1,7 +1,7 @@ #ifndef _HFI1_DEBUGFS_H #define _HFI1_DEBUGFS_H /* - * Copyright(c) 2015, 2016 Intel Corporation. + * Copyright(c) 2015, 2016, 2018 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -48,51 +48,59 @@ */ struct hfi1_ibdev; -#ifdef CONFIG_DEBUG_FS -void hfi1_dbg_ibdev_init(struct hfi1_ibdev *ibd); -void hfi1_dbg_ibdev_exit(struct hfi1_ibdev *ibd); -void hfi1_dbg_init(void); -void hfi1_dbg_exit(void); - -#ifdef CONFIG_FAULT_INJECTION -#include <linux/fault-inject.h> -struct fault_opcode { - struct fault_attr attr; - struct dentry *dir; - bool fault_by_opcode; - u64 n_rxfaults[256]; - u64 n_txfaults[256]; - u8 opcode; - u8 mask; -}; -struct fault_packet { - struct fault_attr attr; - struct dentry *dir; - bool fault_by_packet; - u64 n_faults; -}; +#define DEBUGFS_FILE_CREATE(name, parent, data, ops, mode) \ +do { \ + struct dentry *ent; \ + const char *__name = name; \ + ent = debugfs_create_file(__name, mode, parent, \ + data, ops); \ + if (!ent) \ + pr_warn("create of %s failed\n", __name); \ +} while (0) -bool hfi1_dbg_fault_opcode(struct rvt_qp *qp, u32 opcode, bool rx); -bool hfi1_dbg_fault_packet(struct hfi1_packet *packet); -bool hfi1_dbg_fault_suppress_err(struct hfi1_ibdev *ibd); -#else -static inline bool hfi1_dbg_fault_packet(struct hfi1_packet *packet) -{ - return false; +#define DEBUGFS_SEQ_FILE_OPS(name) \ +static const struct seq_operations _##name##_seq_ops = { \ + .start = _##name##_seq_start, \ + .next = _##name##_seq_next, \ + .stop = _##name##_seq_stop, \ + .show = _##name##_seq_show \ } -static inline bool hfi1_dbg_fault_opcode(struct rvt_qp *qp, - u32 opcode, bool rx) -{ - return false; +#define DEBUGFS_SEQ_FILE_OPEN(name) \ +static int _##name##_open(struct inode *inode, struct file *s) \ +{ \ + struct seq_file *seq; \ + int ret; \ + ret = seq_open(s, &_##name##_seq_ops); \ + if (ret) \ + return ret; \ + seq = s->private_data; \ + seq->private = inode->i_private; \ + return 0; \ } -static inline bool hfi1_dbg_fault_suppress_err(struct hfi1_ibdev *ibd) -{ - return false; +#define DEBUGFS_FILE_OPS(name) \ +static const struct file_operations _##name##_file_ops = { \ + .owner = THIS_MODULE, \ + .open = _##name##_open, \ + .read = hfi1_seq_read, \ + .llseek = hfi1_seq_lseek, \ + .release = seq_release \ } -#endif + +#define DEBUGFS_SEQ_FILE_CREATE(name, parent, data) \ + DEBUGFS_FILE_CREATE(#name, parent, data, &_##name##_file_ops, 0444) + +ssize_t hfi1_seq_read(struct file *file, char __user *buf, size_t size, + loff_t *ppos); +loff_t hfi1_seq_lseek(struct file *file, loff_t offset, int whence); + +#ifdef CONFIG_DEBUG_FS +void hfi1_dbg_ibdev_init(struct hfi1_ibdev *ibd); +void hfi1_dbg_ibdev_exit(struct hfi1_ibdev *ibd); +void hfi1_dbg_init(void); +void hfi1_dbg_exit(void); #else static inline void hfi1_dbg_ibdev_init(struct hfi1_ibdev *ibd) @@ -110,22 +118,6 @@ static inline void hfi1_dbg_init(void) static inline void hfi1_dbg_exit(void) { } - -static inline bool hfi1_dbg_fault_packet(struct hfi1_packet *packet) -{ - return false; -} - -static inline bool hfi1_dbg_fault_opcode(struct rvt_qp *qp, - u32 opcode, bool rx) -{ - return false; -} - -static inline bool hfi1_dbg_fault_suppress_err(struct hfi1_ibdev *ibd) -{ - return false; -} #endif #endif /* _HFI1_DEBUGFS_H */ diff --git a/drivers/infiniband/hw/hfi1/driver.c b/drivers/infiniband/hw/hfi1/driver.c index 46d1475b2154..94dca95db04f 100644 --- a/drivers/infiniband/hw/hfi1/driver.c +++ b/drivers/infiniband/hw/hfi1/driver.c @@ -1,5 +1,5 @@ /* - * Copyright(c) 2015-2017 Intel Corporation. + * Copyright(c) 2015-2018 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -61,6 +61,7 @@ #include "sdma.h" #include "debugfs.h" #include "vnic.h" +#include "fault.h" #undef pr_fmt #define pr_fmt(fmt) DRIVER_NAME ": " fmt @@ -433,31 +434,43 @@ void hfi1_process_ecn_slowpath(struct rvt_qp *qp, struct hfi1_packet *pkt, bool do_cnp) { struct hfi1_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num); + struct hfi1_pportdata *ppd = ppd_from_ibp(ibp); struct ib_other_headers *ohdr = pkt->ohdr; struct ib_grh *grh = pkt->grh; u32 rqpn = 0, bth1; - u16 pkey, rlid, dlid = ib_get_dlid(pkt->hdr); + u16 pkey; + u32 rlid, slid, dlid = 0; u8 hdr_type, sc, svc_type; bool is_mcast = false; + /* can be called from prescan */ if (pkt->etype == RHF_RCV_TYPE_BYPASS) { is_mcast = hfi1_is_16B_mcast(dlid); pkey = hfi1_16B_get_pkey(pkt->hdr); sc = hfi1_16B_get_sc(pkt->hdr); + dlid = hfi1_16B_get_dlid(pkt->hdr); + slid = hfi1_16B_get_slid(pkt->hdr); hdr_type = HFI1_PKT_TYPE_16B; } else { is_mcast = (dlid > be16_to_cpu(IB_MULTICAST_LID_BASE)) && (dlid != be16_to_cpu(IB_LID_PERMISSIVE)); pkey = ib_bth_get_pkey(ohdr); sc = hfi1_9B_get_sc5(pkt->hdr, pkt->rhf); + dlid = ib_get_dlid(pkt->hdr); + slid = ib_get_slid(pkt->hdr); hdr_type = HFI1_PKT_TYPE_9B; } switch (qp->ibqp.qp_type) { + case IB_QPT_UD: + dlid = ppd->lid; + rlid = slid; + rqpn = ib_get_sqpn(pkt->ohdr); + svc_type = IB_CC_SVCTYPE_UD; + break; case IB_QPT_SMI: case IB_QPT_GSI: - case IB_QPT_UD: - rlid = ib_get_slid(pkt->hdr); + rlid = slid; rqpn = ib_get_sqpn(pkt->ohdr); svc_type = IB_CC_SVCTYPE_UD; break; @@ -482,7 +495,6 @@ void hfi1_process_ecn_slowpath(struct rvt_qp *qp, struct hfi1_packet *pkt, dlid, rlid, sc, grh); if (!is_mcast && (bth1 & IB_BECN_SMASK)) { - struct hfi1_pportdata *ppd = ppd_from_ibp(ibp); u32 lqpn = bth1 & RVT_QPN_MASK; u8 sl = ibp->sc_to_sl[sc]; @@ -1471,38 +1483,51 @@ static int hfi1_setup_bypass_packet(struct hfi1_packet *packet) struct hfi1_pportdata *ppd = rcd->ppd; struct hfi1_ibport *ibp = &ppd->ibport_data; u8 l4; - u8 grh_len; packet->hdr = (struct hfi1_16b_header *) hfi1_get_16B_header(packet->rcd->dd, packet->rhf_addr); - packet->hlen = (u8 *)packet->rhf_addr - (u8 *)packet->hdr; - l4 = hfi1_16B_get_l4(packet->hdr); if (l4 == OPA_16B_L4_IB_LOCAL) { - grh_len = 0; packet->ohdr = packet->ebuf; packet->grh = NULL; + packet->opcode = ib_bth_get_opcode(packet->ohdr); + packet->pad = hfi1_16B_bth_get_pad(packet->ohdr); + /* hdr_len_by_opcode already has an IB LRH factored in */ + packet->hlen = hdr_len_by_opcode[packet->opcode] + + (LRH_16B_BYTES - LRH_9B_BYTES); + packet->migrated = opa_bth_is_migration(packet->ohdr); } else if (l4 == OPA_16B_L4_IB_GLOBAL) { u32 vtf; + u8 grh_len = sizeof(struct ib_grh); - grh_len = sizeof(struct ib_grh); packet->ohdr = packet->ebuf + grh_len; packet->grh = packet->ebuf; + packet->opcode = ib_bth_get_opcode(packet->ohdr); + packet->pad = hfi1_16B_bth_get_pad(packet->ohdr); + /* hdr_len_by_opcode already has an IB LRH factored in */ + packet->hlen = hdr_len_by_opcode[packet->opcode] + + (LRH_16B_BYTES - LRH_9B_BYTES) + grh_len; + packet->migrated = opa_bth_is_migration(packet->ohdr); + if (packet->grh->next_hdr != IB_GRH_NEXT_HDR) goto drop; vtf = be32_to_cpu(packet->grh->version_tclass_flow); if ((vtf >> IB_GRH_VERSION_SHIFT) != IB_GRH_VERSION) goto drop; + } else if (l4 == OPA_16B_L4_FM) { + packet->mgmt = packet->ebuf; + packet->ohdr = NULL; + packet->grh = NULL; + packet->opcode = IB_OPCODE_UD_SEND_ONLY; + packet->pad = OPA_16B_L4_FM_PAD; + packet->hlen = OPA_16B_L4_FM_HLEN; + packet->migrated = false; } else { goto drop; } /* Query commonly used fields from packet header */ - packet->opcode = ib_bth_get_opcode(packet->ohdr); - /* hdr_len_by_opcode already has an IB LRH factored in */ - packet->hlen = hdr_len_by_opcode[packet->opcode] + - (LRH_16B_BYTES - LRH_9B_BYTES) + grh_len; packet->payload = packet->ebuf + packet->hlen - LRH_16B_BYTES; packet->slid = hfi1_16B_get_slid(packet->hdr); packet->dlid = hfi1_16B_get_dlid(packet->hdr); @@ -1512,10 +1537,8 @@ static int hfi1_setup_bypass_packet(struct hfi1_packet *packet) 16B); packet->sc = hfi1_16B_get_sc(packet->hdr); packet->sl = ibp->sc_to_sl[packet->sc]; - packet->pad = hfi1_16B_bth_get_pad(packet->ohdr); packet->extra_byte = SIZE_OF_LT; packet->pkey = hfi1_16B_get_pkey(packet->hdr); - packet->migrated = opa_bth_is_migration(packet->ohdr); if (hfi1_bypass_ingress_pkt_check(packet)) goto drop; @@ -1554,10 +1577,10 @@ void handle_eflags(struct hfi1_packet *packet) */ int process_receive_ib(struct hfi1_packet *packet) { - if (unlikely(hfi1_dbg_fault_packet(packet))) + if (hfi1_setup_9B_packet(packet)) return RHF_RCV_CONTINUE; - if (hfi1_setup_9B_packet(packet)) + if (unlikely(hfi1_dbg_should_fault_rx(packet))) return RHF_RCV_CONTINUE; trace_hfi1_rcvhdr(packet); @@ -1631,7 +1654,8 @@ int process_receive_error(struct hfi1_packet *packet) /* KHdrHCRCErr -- KDETH packet with a bad HCRC */ if (unlikely( hfi1_dbg_fault_suppress_err(&packet->rcd->dd->verbs_dev) && - rhf_rcv_type_err(packet->rhf) == 3)) + (rhf_rcv_type_err(packet->rhf) == RHF_RCV_TYPE_ERROR || + packet->rhf & RHF_DC_ERR))) return RHF_RCV_CONTINUE; hfi1_setup_ib_header(packet); @@ -1646,10 +1670,10 @@ int process_receive_error(struct hfi1_packet *packet) int kdeth_process_expected(struct hfi1_packet *packet) { - if (unlikely(hfi1_dbg_fault_packet(packet))) + hfi1_setup_9B_packet(packet); + if (unlikely(hfi1_dbg_should_fault_rx(packet))) return RHF_RCV_CONTINUE; - hfi1_setup_ib_header(packet); if (unlikely(rhf_err_flags(packet->rhf))) handle_eflags(packet); @@ -1660,11 +1684,11 @@ int kdeth_process_expected(struct hfi1_packet *packet) int kdeth_process_eager(struct hfi1_packet *packet) { - hfi1_setup_ib_header(packet); + hfi1_setup_9B_packet(packet); + if (unlikely(hfi1_dbg_should_fault_rx(packet))) + return RHF_RCV_CONTINUE; if (unlikely(rhf_err_flags(packet->rhf))) handle_eflags(packet); - if (unlikely(hfi1_dbg_fault_packet(packet))) - return RHF_RCV_CONTINUE; dd_dev_err(packet->rcd->dd, "Unhandled eager packet received. Dropping.\n"); diff --git a/drivers/infiniband/hw/hfi1/exp_rcv.c b/drivers/infiniband/hw/hfi1/exp_rcv.c index 0af91675acc6..1be49a0d9c11 100644 --- a/drivers/infiniband/hw/hfi1/exp_rcv.c +++ b/drivers/infiniband/hw/hfi1/exp_rcv.c @@ -52,13 +52,24 @@ * exp_tid_group_init - initialize exp_tid_set * @set - the set */ -void hfi1_exp_tid_group_init(struct exp_tid_set *set) +static void hfi1_exp_tid_set_init(struct exp_tid_set *set) { INIT_LIST_HEAD(&set->list); set->count = 0; } /** + * hfi1_exp_tid_group_init - initialize rcd expected receive + * @rcd - the rcd + */ +void hfi1_exp_tid_group_init(struct hfi1_ctxtdata *rcd) +{ + hfi1_exp_tid_set_init(&rcd->tid_group_list); + hfi1_exp_tid_set_init(&rcd->tid_used_list); + hfi1_exp_tid_set_init(&rcd->tid_full_list); +} + +/** * alloc_ctxt_rcv_groups - initialize expected receive groups * @rcd - the context to add the groupings to */ @@ -68,13 +79,17 @@ int hfi1_alloc_ctxt_rcv_groups(struct hfi1_ctxtdata *rcd) u32 tidbase; struct tid_group *grp; int i; + u32 ngroups; + ngroups = rcd->expected_count / dd->rcv_entries.group_size; + rcd->groups = + kcalloc_node(ngroups, sizeof(*rcd->groups), + GFP_KERNEL, rcd->numa_id); + if (!rcd->groups) + return -ENOMEM; tidbase = rcd->expected_base; - for (i = 0; i < rcd->expected_count / - dd->rcv_entries.group_size; i++) { - grp = kzalloc(sizeof(*grp), GFP_KERNEL); - if (!grp) - goto bail; + for (i = 0; i < ngroups; i++) { + grp = &rcd->groups[i]; grp->size = dd->rcv_entries.group_size; grp->base = tidbase; tid_group_add_tail(grp, &rcd->tid_group_list); @@ -82,9 +97,6 @@ int hfi1_alloc_ctxt_rcv_groups(struct hfi1_ctxtdata *rcd) } return 0; -bail: - hfi1_free_ctxt_rcv_groups(rcd); - return -ENOMEM; } /** @@ -100,15 +112,12 @@ bail: */ void hfi1_free_ctxt_rcv_groups(struct hfi1_ctxtdata *rcd) { - struct tid_group *grp, *gptr; - WARN_ON(!EXP_TID_SET_EMPTY(rcd->tid_full_list)); WARN_ON(!EXP_TID_SET_EMPTY(rcd->tid_used_list)); - list_for_each_entry_safe(grp, gptr, &rcd->tid_group_list.list, list) { - tid_group_remove(grp, &rcd->tid_group_list); - kfree(grp); - } + kfree(rcd->groups); + rcd->groups = NULL; + hfi1_exp_tid_group_init(rcd); hfi1_clear_tids(rcd); } diff --git a/drivers/infiniband/hw/hfi1/exp_rcv.h b/drivers/infiniband/hw/hfi1/exp_rcv.h index 08719047628a..f25362015095 100644 --- a/drivers/infiniband/hw/hfi1/exp_rcv.h +++ b/drivers/infiniband/hw/hfi1/exp_rcv.h @@ -183,8 +183,30 @@ static inline u32 rcventry2tidinfo(u32 rcventry) EXP_TID_SET(CTRL, 1 << (rcventry - pair)); } +/** + * hfi1_tid_group_to_idx - convert an index to a group + * @rcd - the receive context + * @grp - the group pointer + */ +static inline u16 +hfi1_tid_group_to_idx(struct hfi1_ctxtdata *rcd, struct tid_group *grp) +{ + return grp - &rcd->groups[0]; +} + +/** + * hfi1_idx_to_tid_group - convert a group to an index + * @rcd - the receive context + * @idx - the index + */ +static inline struct tid_group * +hfi1_idx_to_tid_group(struct hfi1_ctxtdata *rcd, u16 idx) +{ + return &rcd->groups[idx]; +} + int hfi1_alloc_ctxt_rcv_groups(struct hfi1_ctxtdata *rcd); void hfi1_free_ctxt_rcv_groups(struct hfi1_ctxtdata *rcd); -void hfi1_exp_tid_group_init(struct exp_tid_set *set); +void hfi1_exp_tid_group_init(struct hfi1_ctxtdata *rcd); #endif /* _HFI1_EXP_RCV_H */ diff --git a/drivers/infiniband/hw/hfi1/fault.c b/drivers/infiniband/hw/hfi1/fault.c new file mode 100644 index 000000000000..e2290f32c8d9 --- /dev/null +++ b/drivers/infiniband/hw/hfi1/fault.c @@ -0,0 +1,375 @@ +/* + * Copyright(c) 2018 Intel Corporation. + * + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * BSD LICENSE + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * - Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ +#include <linux/debugfs.h> +#include <linux/seq_file.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/types.h> +#include <linux/bitmap.h> + +#include "debugfs.h" +#include "fault.h" +#include "trace.h" + +#define HFI1_FAULT_DIR_TX BIT(0) +#define HFI1_FAULT_DIR_RX BIT(1) +#define HFI1_FAULT_DIR_TXRX (HFI1_FAULT_DIR_TX | HFI1_FAULT_DIR_RX) + +static void *_fault_stats_seq_start(struct seq_file *s, loff_t *pos) +{ + struct hfi1_opcode_stats_perctx *opstats; + + if (*pos >= ARRAY_SIZE(opstats->stats)) + return NULL; + return pos; +} + +static void *_fault_stats_seq_next(struct seq_file *s, void *v, loff_t *pos) +{ + struct hfi1_opcode_stats_perctx *opstats; + + ++*pos; + if (*pos >= ARRAY_SIZE(opstats->stats)) + return NULL; + return pos; +} + +static void _fault_stats_seq_stop(struct seq_file *s, void *v) +{ +} + +static int _fault_stats_seq_show(struct seq_file *s, void *v) +{ + loff_t *spos = v; + loff_t i = *spos, j; + u64 n_packets = 0, n_bytes = 0; + struct hfi1_ibdev *ibd = (struct hfi1_ibdev *)s->private; + struct hfi1_devdata *dd = dd_from_dev(ibd); + struct hfi1_ctxtdata *rcd; + + for (j = 0; j < dd->first_dyn_alloc_ctxt; j++) { + rcd = hfi1_rcd_get_by_index(dd, j); + if (rcd) { + n_packets += rcd->opstats->stats[i].n_packets; + n_bytes += rcd->opstats->stats[i].n_bytes; + } + hfi1_rcd_put(rcd); + } + for_each_possible_cpu(j) { + struct hfi1_opcode_stats_perctx *sp = + per_cpu_ptr(dd->tx_opstats, j); + + n_packets += sp->stats[i].n_packets; + n_bytes += sp->stats[i].n_bytes; + } + if (!n_packets && !n_bytes) + return SEQ_SKIP; + if (!ibd->fault->n_rxfaults[i] && !ibd->fault->n_txfaults[i]) + return SEQ_SKIP; + seq_printf(s, "%02llx %llu/%llu (faults rx:%llu faults: tx:%llu)\n", i, + (unsigned long long)n_packets, + (unsigned long long)n_bytes, + (unsigned long long)ibd->fault->n_rxfaults[i], + (unsigned long long)ibd->fault->n_txfaults[i]); + return 0; +} + +DEBUGFS_SEQ_FILE_OPS(fault_stats); +DEBUGFS_SEQ_FILE_OPEN(fault_stats); +DEBUGFS_FILE_OPS(fault_stats); + +static int fault_opcodes_open(struct inode *inode, struct file *file) +{ + file->private_data = inode->i_private; + return nonseekable_open(inode, file); +} + +static ssize_t fault_opcodes_write(struct file *file, const char __user *buf, + size_t len, loff_t *pos) +{ + ssize_t ret = 0; + /* 1280 = 256 opcodes * 4 chars/opcode + 255 commas + NULL */ + size_t copy, datalen = 1280; + char *data, *token, *ptr, *end; + struct fault *fault = file->private_data; + + data = kcalloc(datalen, sizeof(*data), GFP_KERNEL); + if (!data) + return -ENOMEM; + copy = min(len, datalen - 1); + if (copy_from_user(data, buf, copy)) + return -EFAULT; + + ret = debugfs_file_get(file->f_path.dentry); + if (unlikely(ret)) + return ret; + ptr = data; + token = ptr; + for (ptr = data; *ptr; ptr = end + 1, token = ptr) { + char *dash; + unsigned long range_start, range_end, i; + bool remove = false; + + end = strchr(ptr, ','); + if (end) + *end = '\0'; + if (token[0] == '-') { + remove = true; + token++; + } + dash = strchr(token, '-'); + if (dash) + *dash = '\0'; + if (kstrtoul(token, 0, &range_start)) + break; + if (dash) { + token = dash + 1; + if (kstrtoul(token, 0, &range_end)) + break; + } else { + range_end = range_start; + } + if (range_start == range_end && range_start == -1UL) { + bitmap_zero(fault->opcodes, sizeof(fault->opcodes) * + BITS_PER_BYTE); + break; + } + for (i = range_start; i <= range_end; i++) { + if (remove) + clear_bit(i, fault->opcodes); + else + set_bit(i, fault->opcodes); + } + if (!end) + break; + } + ret = len; + + debugfs_file_put(file->f_path.dentry); + kfree(data); + return ret; +} + +static ssize_t fault_opcodes_read(struct file *file, char __user *buf, + size_t len, loff_t *pos) +{ + ssize_t ret = 0; + char *data; + size_t datalen = 1280, size = 0; /* see fault_opcodes_write() */ + unsigned long bit = 0, zero = 0; + struct fault *fault = file->private_data; + size_t bitsize = sizeof(fault->opcodes) * BITS_PER_BYTE; + + data = kcalloc(datalen, sizeof(*data), GFP_KERNEL); + if (!data) + return -ENOMEM; + ret = debugfs_file_get(file->f_path.dentry); + if (unlikely(ret)) + return ret; + bit = find_first_bit(fault->opcodes, bitsize); + while (bit < bitsize) { + zero = find_next_zero_bit(fault->opcodes, bitsize, bit); + if (zero - 1 != bit) + size += snprintf(data + size, + datalen - size - 1, + "0x%lx-0x%lx,", bit, zero - 1); + else + size += snprintf(data + size, + datalen - size - 1, "0x%lx,", + bit); + bit = find_next_bit(fault->opcodes, bitsize, zero); + } + debugfs_file_put(file->f_path.dentry); + data[size - 1] = '\n'; + data[size] = '\0'; + ret = simple_read_from_buffer(buf, len, pos, data, size); + kfree(data); + return ret; +} + +static const struct file_operations __fault_opcodes_fops = { + .owner = THIS_MODULE, + .open = fault_opcodes_open, + .read = fault_opcodes_read, + .write = fault_opcodes_write, + .llseek = no_llseek +}; + +void hfi1_fault_exit_debugfs(struct hfi1_ibdev *ibd) +{ + if (ibd->fault) + debugfs_remove_recursive(ibd->fault->dir); + kfree(ibd->fault); + ibd->fault = NULL; +} + +int hfi1_fault_init_debugfs(struct hfi1_ibdev *ibd) +{ + struct dentry *parent = ibd->hfi1_ibdev_dbg; + + ibd->fault = kzalloc(sizeof(*ibd->fault), GFP_KERNEL); + if (!ibd->fault) + return -ENOMEM; + + ibd->fault->attr.interval = 1; + ibd->fault->attr.require_end = ULONG_MAX; + ibd->fault->attr.stacktrace_depth = 32; + ibd->fault->attr.dname = NULL; + ibd->fault->attr.verbose = 0; + ibd->fault->enable = false; + ibd->fault->opcode = false; + ibd->fault->fault_skip = 0; + ibd->fault->skip = 0; + ibd->fault->direction = HFI1_FAULT_DIR_TXRX; + ibd->fault->suppress_err = false; + bitmap_zero(ibd->fault->opcodes, + sizeof(ibd->fault->opcodes) * BITS_PER_BYTE); + + ibd->fault->dir = + fault_create_debugfs_attr("fault", parent, + &ibd->fault->attr); + if (IS_ERR(ibd->fault->dir)) { + kfree(ibd->fault); + ibd->fault = NULL; + return -ENOENT; + } + + DEBUGFS_SEQ_FILE_CREATE(fault_stats, ibd->fault->dir, ibd); + if (!debugfs_create_bool("enable", 0600, ibd->fault->dir, + &ibd->fault->enable)) + goto fail; + if (!debugfs_create_bool("suppress_err", 0600, + ibd->fault->dir, + &ibd->fault->suppress_err)) + goto fail; + if (!debugfs_create_bool("opcode_mode", 0600, ibd->fault->dir, + &ibd->fault->opcode)) + goto fail; + if (!debugfs_create_file("opcodes", 0600, ibd->fault->dir, + ibd->fault, &__fault_opcodes_fops)) + goto fail; + if (!debugfs_create_u64("skip_pkts", 0600, + ibd->fault->dir, + &ibd->fault->fault_skip)) + goto fail; + if (!debugfs_create_u64("skip_usec", 0600, + ibd->fault->dir, + &ibd->fault->fault_skip_usec)) + goto fail; + if (!debugfs_create_u8("direction", 0600, ibd->fault->dir, + &ibd->fault->direction)) + goto fail; + + return 0; +fail: + hfi1_fault_exit_debugfs(ibd); + return -ENOMEM; +} + +bool hfi1_dbg_fault_suppress_err(struct hfi1_ibdev *ibd) +{ + if (ibd->fault) + return ibd->fault->suppress_err; + return false; +} + +static bool __hfi1_should_fault(struct hfi1_ibdev *ibd, u32 opcode, + u8 direction) +{ + bool ret = false; + + if (!ibd->fault || !ibd->fault->enable) + return false; + if (!(ibd->fault->direction & direction)) + return false; + if (ibd->fault->opcode) { + if (bitmap_empty(ibd->fault->opcodes, + (sizeof(ibd->fault->opcodes) * + BITS_PER_BYTE))) + return false; + if (!(test_bit(opcode, ibd->fault->opcodes))) + return false; + } + if (ibd->fault->fault_skip_usec && + time_before(jiffies, ibd->fault->skip_usec)) + return false; + if (ibd->fault->fault_skip && ibd->fault->skip) { + ibd->fault->skip--; + return false; + } + ret = should_fail(&ibd->fault->attr, 1); + if (ret) { + ibd->fault->skip = ibd->fault->fault_skip; + ibd->fault->skip_usec = jiffies + + usecs_to_jiffies(ibd->fault->fault_skip_usec); + } + return ret; +} + +bool hfi1_dbg_should_fault_tx(struct rvt_qp *qp, u32 opcode) +{ + struct hfi1_ibdev *ibd = to_idev(qp->ibqp.device); + + if (__hfi1_should_fault(ibd, opcode, HFI1_FAULT_DIR_TX)) { + trace_hfi1_fault_opcode(qp, opcode); + ibd->fault->n_txfaults[opcode]++; + return true; + } + return false; +} + +bool hfi1_dbg_should_fault_rx(struct hfi1_packet *packet) +{ + struct hfi1_ibdev *ibd = &packet->rcd->dd->verbs_dev; + + if (__hfi1_should_fault(ibd, packet->opcode, HFI1_FAULT_DIR_RX)) { + trace_hfi1_fault_packet(packet); + ibd->fault->n_rxfaults[packet->opcode]++; + return true; + } + return false; +} diff --git a/drivers/infiniband/hw/hfi1/fault.h b/drivers/infiniband/hw/hfi1/fault.h new file mode 100644 index 000000000000..a83382700a7c --- /dev/null +++ b/drivers/infiniband/hw/hfi1/fault.h @@ -0,0 +1,109 @@ +#ifndef _HFI1_FAULT_H +#define _HFI1_FAULT_H +/* + * Copyright(c) 2018 Intel Corporation. + * + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * BSD LICENSE + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * - Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ +#include <linux/fault-inject.h> +#include <linux/dcache.h> +#include <linux/bitops.h> +#include <linux/kernel.h> +#include <rdma/rdma_vt.h> + +#include "hfi.h" + +struct hfi1_ibdev; + +#if defined(CONFIG_FAULT_INJECTION) && defined(CONFIG_FAULT_INJECTION_DEBUG_FS) +struct fault { + struct fault_attr attr; + struct dentry *dir; + u64 n_rxfaults[(1U << BITS_PER_BYTE)]; + u64 n_txfaults[(1U << BITS_PER_BYTE)]; + u64 fault_skip; + u64 skip; + u64 fault_skip_usec; + unsigned long skip_usec; + unsigned long opcodes[(1U << BITS_PER_BYTE) / BITS_PER_LONG]; + bool enable; + bool suppress_err; + bool opcode; + u8 direction; +}; + +int hfi1_fault_init_debugfs(struct hfi1_ibdev *ibd); +bool hfi1_dbg_should_fault_tx(struct rvt_qp *qp, u32 opcode); +bool hfi1_dbg_should_fault_rx(struct hfi1_packet *packet); +bool hfi1_dbg_fault_suppress_err(struct hfi1_ibdev *ibd); +void hfi1_fault_exit_debugfs(struct hfi1_ibdev *ibd); + +#else + +static inline int hfi1_fault_init_debugfs(struct hfi1_ibdev *ibd) +{ + return 0; +} + +static inline bool hfi1_dbg_should_fault_rx(struct hfi1_packet *packet) +{ + return false; +} + +static inline bool hfi1_dbg_should_fault_tx(struct rvt_qp *qp, + u32 opcode) +{ + return false; +} + +static inline bool hfi1_dbg_fault_suppress_err(struct hfi1_ibdev *ibd) +{ + return false; +} + +static inline void hfi1_fault_exit_debugfs(struct hfi1_ibdev *ibd) +{ +} +#endif +#endif /* _HFI1_FAULT_H */ diff --git a/drivers/infiniband/hw/hfi1/file_ops.c b/drivers/infiniband/hw/hfi1/file_ops.c index da4aa1a95b11..c9d23c37a371 100644 --- a/drivers/infiniband/hw/hfi1/file_ops.c +++ b/drivers/infiniband/hw/hfi1/file_ops.c @@ -110,7 +110,7 @@ static int set_ctxt_pkey(struct hfi1_ctxtdata *uctxt, unsigned long arg); static int ctxt_reset(struct hfi1_ctxtdata *uctxt); static int manage_rcvq(struct hfi1_ctxtdata *uctxt, u16 subctxt, unsigned long arg); -static int vma_fault(struct vm_fault *vmf); +static vm_fault_t vma_fault(struct vm_fault *vmf); static long hfi1_file_ioctl(struct file *fp, unsigned int cmd, unsigned long arg); @@ -591,7 +591,7 @@ done: * Local (non-chip) user memory is not mapped right away but as it is * accessed by the user-level code. */ -static int vma_fault(struct vm_fault *vmf) +static vm_fault_t vma_fault(struct vm_fault *vmf) { struct page *page; @@ -689,8 +689,8 @@ static int hfi1_file_close(struct inode *inode, struct file *fp) * checks to default and disable the send context. */ if (uctxt->sc) { - set_pio_integrity(uctxt->sc); sc_disable(uctxt->sc); + set_pio_integrity(uctxt->sc); } hfi1_free_ctxt_rcv_groups(uctxt); diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h index 32c48265405e..5eb3bf0849c7 100644 --- a/drivers/infiniband/hw/hfi1/hfi.h +++ b/drivers/infiniband/hw/hfi1/hfi.h @@ -1,7 +1,7 @@ #ifndef _HFI1_KERNEL_H #define _HFI1_KERNEL_H /* - * Copyright(c) 2015-2017 Intel Corporation. + * Copyright(c) 2015-2018 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -231,13 +231,15 @@ struct hfi1_ctxtdata { /* job key */ u16 jkey; /* number of RcvArray groups for this context. */ - u32 rcv_array_groups; + u16 rcv_array_groups; /* index of first eager TID entry. */ - u32 eager_base; + u16 eager_base; /* number of expected TID entries */ - u32 expected_count; + u16 expected_count; /* index of first expected TID entry. */ - u32 expected_base; + u16 expected_base; + /* array of tid_groups */ + struct tid_group *groups; struct exp_tid_set tid_group_list; struct exp_tid_set tid_used_list; @@ -282,7 +284,7 @@ struct hfi1_ctxtdata { /* interrupt handling */ u64 imask; /* clear interrupt mask */ int ireg; /* clear interrupt register */ - unsigned numa_id; /* numa node of this context */ + int numa_id; /* numa node of this context */ /* verbs rx_stats per rcd */ struct hfi1_opcode_stats_perctx *opstats; @@ -333,6 +335,7 @@ struct hfi1_packet { struct rvt_qp *qp; struct ib_other_headers *ohdr; struct ib_grh *grh; + struct opa_16b_mgmt *mgmt; u64 rhf; u32 maxcnt; u32 rhqoff; @@ -392,10 +395,17 @@ struct hfi1_packet { */ #define OPA_16B_L4_9B 0x00 #define OPA_16B_L2_TYPE 0x02 +#define OPA_16B_L4_FM 0x08 #define OPA_16B_L4_IB_LOCAL 0x09 #define OPA_16B_L4_IB_GLOBAL 0x0A #define OPA_16B_L4_ETHR OPA_VNIC_L4_ETHR +/* + * OPA 16B Management + */ +#define OPA_16B_L4_FM_PAD 3 /* fixed 3B pad */ +#define OPA_16B_L4_FM_HLEN 24 /* 16B(16) + L4_FM(8) */ + static inline u8 hfi1_16B_get_l4(struct hfi1_16b_header *hdr) { return (u8)(hdr->lrh[2] & OPA_16B_L4_MASK); @@ -472,6 +482,27 @@ static inline u8 hfi1_16B_bth_get_pad(struct ib_other_headers *ohdr) OPA_16B_BTH_PAD_MASK); } +/* + * 16B Management + */ +#define OPA_16B_MGMT_QPN_MASK 0xFFFFFF +static inline u32 hfi1_16B_get_dest_qpn(struct opa_16b_mgmt *mgmt) +{ + return be32_to_cpu(mgmt->dest_qpn) & OPA_16B_MGMT_QPN_MASK; +} + +static inline u32 hfi1_16B_get_src_qpn(struct opa_16b_mgmt *mgmt) +{ + return be32_to_cpu(mgmt->src_qpn) & OPA_16B_MGMT_QPN_MASK; +} + +static inline void hfi1_16B_set_qpn(struct opa_16b_mgmt *mgmt, + u32 dest_qp, u32 src_qp) +{ + mgmt->dest_qpn = cpu_to_be32(dest_qp & OPA_16B_MGMT_QPN_MASK); + mgmt->src_qpn = cpu_to_be32(src_qp & OPA_16B_MGMT_QPN_MASK); +} + struct rvt_sge_state; /* @@ -880,9 +911,9 @@ typedef void (*hfi1_make_req)(struct rvt_qp *qp, #define RHF_RCV_REPROCESS 2 /* stop. retain this packet */ struct rcv_array_data { - u8 group_size; u16 ngroups; u16 nctxt_extra; + u8 group_size; }; struct per_vl_data { @@ -1263,6 +1294,9 @@ struct hfi1_devdata { /* Save the enabled LCB error bits */ u64 lcb_err_en; + struct cpu_mask_set *comp_vect; + int *comp_vect_mappings; + u32 comp_vect_possible_cpus; /* * Capability to have different send engines simply by changing a @@ -1537,13 +1571,13 @@ void set_link_ipg(struct hfi1_pportdata *ppd); void process_becn(struct hfi1_pportdata *ppd, u8 sl, u32 rlid, u32 lqpn, u32 rqpn, u8 svc_type); void return_cnp(struct hfi1_ibport *ibp, struct rvt_qp *qp, u32 remote_qpn, - u32 pkey, u32 slid, u32 dlid, u8 sc5, + u16 pkey, u32 slid, u32 dlid, u8 sc5, const struct ib_grh *old_grh); void return_cnp_16B(struct hfi1_ibport *ibp, struct rvt_qp *qp, - u32 remote_qpn, u32 pkey, u32 slid, u32 dlid, + u32 remote_qpn, u16 pkey, u32 slid, u32 dlid, u8 sc5, const struct ib_grh *old_grh); typedef void (*hfi1_handle_cnp)(struct hfi1_ibport *ibp, struct rvt_qp *qp, - u32 remote_qpn, u32 pkey, u32 slid, u32 dlid, + u32 remote_qpn, u16 pkey, u32 slid, u32 dlid, u8 sc5, const struct ib_grh *old_grh); #define PKEY_CHECK_INVALID -1 @@ -1856,6 +1890,7 @@ struct cc_state *get_cc_state_protected(struct hfi1_pportdata *ppd) #define HFI1_HAS_SDMA_TIMEOUT 0x8 #define HFI1_HAS_SEND_DMA 0x10 /* Supports Send DMA */ #define HFI1_FORCED_FREEZE 0x80 /* driver forced freeze mode */ +#define HFI1_SHUTDOWN 0x100 /* device is shutting down */ /* IB dword length mask in PBC (lower 11 bits); same for all chips */ #define HFI1_PBC_LENGTH_MASK ((1 << 11) - 1) @@ -2048,7 +2083,9 @@ static inline u64 hfi1_pkt_default_send_ctxt_mask(struct hfi1_devdata *dd, | SEND_CTXT_CHECK_ENABLE_DISALLOW_TOO_LONG_BYPASS_PACKETS_SMASK | SEND_CTXT_CHECK_ENABLE_DISALLOW_TOO_LONG_IB_PACKETS_SMASK | SEND_CTXT_CHECK_ENABLE_DISALLOW_BAD_PKT_LEN_SMASK +#ifndef CONFIG_FAULT_INJECTION | SEND_CTXT_CHECK_ENABLE_DISALLOW_PBC_TEST_SMASK +#endif | SEND_CTXT_CHECK_ENABLE_DISALLOW_TOO_SMALL_BYPASS_PACKETS_SMASK | SEND_CTXT_CHECK_ENABLE_DISALLOW_TOO_SMALL_IB_PACKETS_SMASK | SEND_CTXT_CHECK_ENABLE_DISALLOW_RAW_IPV6_SMASK @@ -2061,7 +2098,11 @@ static inline u64 hfi1_pkt_default_send_ctxt_mask(struct hfi1_devdata *dd, | SEND_CTXT_CHECK_ENABLE_CHECK_ENABLE_SMASK; if (ctxt_type == SC_USER) - base_sc_integrity |= HFI1_PKT_USER_SC_INTEGRITY; + base_sc_integrity |= +#ifndef CONFIG_FAULT_INJECTION + SEND_CTXT_CHECK_ENABLE_DISALLOW_PBC_TEST_SMASK | +#endif + HFI1_PKT_USER_SC_INTEGRITY; else base_sc_integrity |= HFI1_PKT_KERNEL_SC_INTEGRITY; @@ -2437,7 +2478,7 @@ static inline void hfi1_make_16b_hdr(struct hfi1_16b_header *hdr, ((slid >> OPA_16B_SLID_SHIFT) << OPA_16B_SLID_HIGH_SHIFT); lrh2 = (lrh2 & ~OPA_16B_DLID_MASK) | ((dlid >> OPA_16B_DLID_SHIFT) << OPA_16B_DLID_HIGH_SHIFT); - lrh2 = (lrh2 & ~OPA_16B_PKEY_MASK) | (pkey << OPA_16B_PKEY_SHIFT); + lrh2 = (lrh2 & ~OPA_16B_PKEY_MASK) | ((u32)pkey << OPA_16B_PKEY_SHIFT); lrh2 = (lrh2 & ~OPA_16B_L4_MASK) | l4; hdr->lrh[0] = lrh0; diff --git a/drivers/infiniband/hw/hfi1/init.c b/drivers/infiniband/hw/hfi1/init.c index 33eba2356742..3feecf926322 100644 --- a/drivers/infiniband/hw/hfi1/init.c +++ b/drivers/infiniband/hw/hfi1/init.c @@ -1,5 +1,5 @@ /* - * Copyright(c) 2015-2017 Intel Corporation. + * Copyright(c) 2015 - 2018 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -88,9 +88,9 @@ * pio buffers per ctxt, etc.) Zero means use one user context per CPU. */ int num_user_contexts = -1; -module_param_named(num_user_contexts, num_user_contexts, uint, S_IRUGO); +module_param_named(num_user_contexts, num_user_contexts, int, 0444); MODULE_PARM_DESC( - num_user_contexts, "Set max number of user contexts to use"); + num_user_contexts, "Set max number of user contexts to use (default: -1 will use the real (non-HT) CPU count)"); uint krcvqs[RXE_NUM_DATA_VL]; int krcvqsset; @@ -361,9 +361,7 @@ int hfi1_create_ctxtdata(struct hfi1_pportdata *ppd, int numa, } INIT_LIST_HEAD(&rcd->qp_wait_list); - hfi1_exp_tid_group_init(&rcd->tid_group_list); - hfi1_exp_tid_group_init(&rcd->tid_used_list); - hfi1_exp_tid_group_init(&rcd->tid_full_list); + hfi1_exp_tid_group_init(rcd); rcd->ppd = ppd; rcd->dd = dd; __set_bit(0, rcd->in_use_ctxts); @@ -1058,6 +1056,10 @@ static void shutdown_device(struct hfi1_devdata *dd) unsigned pidx; int i; + if (dd->flags & HFI1_SHUTDOWN) + return; + dd->flags |= HFI1_SHUTDOWN; + for (pidx = 0; pidx < dd->num_pports; ++pidx) { ppd = dd->pport + pidx; @@ -1209,30 +1211,51 @@ static void finalize_asic_data(struct hfi1_devdata *dd, kfree(ad); } -static void __hfi1_free_devdata(struct kobject *kobj) +/** + * hfi1_clean_devdata - cleans up per-unit data structure + * @dd: pointer to a valid devdata structure + * + * It cleans up all data structures set up by + * by hfi1_alloc_devdata(). + */ +static void hfi1_clean_devdata(struct hfi1_devdata *dd) { - struct hfi1_devdata *dd = - container_of(kobj, struct hfi1_devdata, kobj); struct hfi1_asic_data *ad; unsigned long flags; spin_lock_irqsave(&hfi1_devs_lock, flags); - idr_remove(&hfi1_unit_table, dd->unit); - list_del(&dd->list); + if (!list_empty(&dd->list)) { + idr_remove(&hfi1_unit_table, dd->unit); + list_del_init(&dd->list); + } ad = release_asic_data(dd); spin_unlock_irqrestore(&hfi1_devs_lock, flags); - if (ad) - finalize_asic_data(dd, ad); + + finalize_asic_data(dd, ad); free_platform_config(dd); rcu_barrier(); /* wait for rcu callbacks to complete */ free_percpu(dd->int_counter); free_percpu(dd->rcv_limit); free_percpu(dd->send_schedule); free_percpu(dd->tx_opstats); + dd->int_counter = NULL; + dd->rcv_limit = NULL; + dd->send_schedule = NULL; + dd->tx_opstats = NULL; + kfree(dd->comp_vect); + dd->comp_vect = NULL; sdma_clean(dd, dd->num_sdma); rvt_dealloc_device(&dd->verbs_dev.rdi); } +static void __hfi1_free_devdata(struct kobject *kobj) +{ + struct hfi1_devdata *dd = + container_of(kobj, struct hfi1_devdata, kobj); + + hfi1_clean_devdata(dd); +} + static struct kobj_type hfi1_devdata_type = { .release = __hfi1_free_devdata, }; @@ -1265,6 +1288,8 @@ struct hfi1_devdata *hfi1_alloc_devdata(struct pci_dev *pdev, size_t extra) return ERR_PTR(-ENOMEM); dd->num_pports = nports; dd->pport = (struct hfi1_pportdata *)(dd + 1); + dd->pcidev = pdev; + pci_set_drvdata(pdev, dd); INIT_LIST_HEAD(&dd->list); idr_preload(GFP_KERNEL); @@ -1275,6 +1300,7 @@ struct hfi1_devdata *hfi1_alloc_devdata(struct pci_dev *pdev, size_t extra) dd->unit = ret; list_add(&dd->list, &hfi1_dev_list); } + dd->node = -1; spin_unlock_irqrestore(&hfi1_devs_lock, flags); idr_preload_end(); @@ -1327,13 +1353,17 @@ struct hfi1_devdata *hfi1_alloc_devdata(struct pci_dev *pdev, size_t extra) goto bail; } + dd->comp_vect = kzalloc(sizeof(*dd->comp_vect), GFP_KERNEL); + if (!dd->comp_vect) { + ret = -ENOMEM; + goto bail; + } + kobject_init(&dd->kobj, &hfi1_devdata_type); return dd; bail: - if (!list_empty(&dd->list)) - list_del_init(&dd->list); - rvt_dealloc_device(&dd->verbs_dev.rdi); + hfi1_clean_devdata(dd); return ERR_PTR(ret); } @@ -1372,6 +1402,7 @@ void hfi1_disable_after_error(struct hfi1_devdata *dd) static void remove_one(struct pci_dev *); static int init_one(struct pci_dev *, const struct pci_device_id *); +static void shutdown_one(struct pci_dev *); #define DRIVER_LOAD_MSG "Intel " DRIVER_NAME " loaded: " #define PFX DRIVER_NAME ": " @@ -1388,6 +1419,7 @@ static struct pci_driver hfi1_pci_driver = { .name = DRIVER_NAME, .probe = init_one, .remove = remove_one, + .shutdown = shutdown_one, .id_table = hfi1_pci_tbl, .err_handler = &hfi1_pci_err_handler, }; @@ -1496,7 +1528,7 @@ module_init(hfi1_mod_init); static void __exit hfi1_mod_cleanup(void) { pci_unregister_driver(&hfi1_pci_driver); - node_affinity_destroy(); + node_affinity_destroy_all(); hfi1_wss_exit(); hfi1_dbg_exit(); @@ -1580,6 +1612,8 @@ static void cleanup_device_data(struct hfi1_devdata *dd) static void postinit_cleanup(struct hfi1_devdata *dd) { hfi1_start_cleanup(dd); + hfi1_comp_vectors_clean_up(dd); + hfi1_dev_affinity_clean_up(dd); hfi1_pcie_ddcleanup(dd); hfi1_pcie_cleanup(dd->pcidev); @@ -1797,6 +1831,13 @@ static void remove_one(struct pci_dev *pdev) postinit_cleanup(dd); } +static void shutdown_one(struct pci_dev *pdev) +{ + struct hfi1_devdata *dd = pci_get_drvdata(pdev); + + shutdown_device(dd); +} + /** * hfi1_create_rcvhdrq - create a receive header queue * @dd: the hfi1_ib device diff --git a/drivers/infiniband/hw/hfi1/mad.c b/drivers/infiniband/hw/hfi1/mad.c index e9962c65c68f..0307405491e0 100644 --- a/drivers/infiniband/hw/hfi1/mad.c +++ b/drivers/infiniband/hw/hfi1/mad.c @@ -1238,7 +1238,7 @@ static int port_states_transition_allowed(struct hfi1_pportdata *ppd, } static int set_port_states(struct hfi1_pportdata *ppd, struct opa_smp *smp, - u32 logical_state, u32 phys_state) + u32 logical_state, u32 phys_state, int local_mad) { struct hfi1_devdata *dd = ppd->dd; u32 link_state; @@ -1314,7 +1314,7 @@ static int set_port_states(struct hfi1_pportdata *ppd, struct opa_smp *smp, * Don't send a reply if the response would be sent * through the disabled port. */ - if (link_state == HLS_DN_DISABLE && smp->hop_cnt) + if (link_state == HLS_DN_DISABLE && !local_mad) return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED; break; case IB_PORT_ARMED: @@ -1350,7 +1350,7 @@ static int set_port_states(struct hfi1_pportdata *ppd, struct opa_smp *smp, */ static int __subn_set_opa_portinfo(struct opa_smp *smp, u32 am, u8 *data, struct ib_device *ibdev, u8 port, - u32 *resp_len, u32 max_len) + u32 *resp_len, u32 max_len, int local_mad) { struct opa_port_info *pi = (struct opa_port_info *)data; struct ib_event event; @@ -1634,7 +1634,7 @@ static int __subn_set_opa_portinfo(struct opa_smp *smp, u32 am, u8 *data, */ if (!invalid) { - ret = set_port_states(ppd, smp, ls_new, ps_new); + ret = set_port_states(ppd, smp, ls_new, ps_new, local_mad); if (ret) return ret; } @@ -2085,7 +2085,7 @@ static int __subn_get_opa_psi(struct opa_smp *smp, u32 am, u8 *data, static int __subn_set_opa_psi(struct opa_smp *smp, u32 am, u8 *data, struct ib_device *ibdev, u8 port, - u32 *resp_len, u32 max_len) + u32 *resp_len, u32 max_len, int local_mad) { u32 nports = OPA_AM_NPORT(am); u32 start_of_sm_config = OPA_AM_START_SM_CFG(am); @@ -2122,7 +2122,7 @@ static int __subn_set_opa_psi(struct opa_smp *smp, u32 am, u8 *data, } if (!invalid) { - ret = set_port_states(ppd, smp, ls_new, ps_new); + ret = set_port_states(ppd, smp, ls_new, ps_new, local_mad); if (ret) return ret; } @@ -3424,6 +3424,7 @@ static int pma_get_opa_errorinfo(struct opa_pma_mad *pmp, pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD; return reply((struct ib_mad_hdr *)pmp); } + rsp->port_number = port; /* PortRcvErrorInfo */ rsp->port_rcv_ei.status_and_code = @@ -4190,7 +4191,7 @@ static int subn_get_opa_sma(__be16 attr_id, struct opa_smp *smp, u32 am, static int subn_set_opa_sma(__be16 attr_id, struct opa_smp *smp, u32 am, u8 *data, struct ib_device *ibdev, u8 port, - u32 *resp_len, u32 max_len) + u32 *resp_len, u32 max_len, int local_mad) { int ret; struct hfi1_ibport *ibp = to_iport(ibdev, port); @@ -4198,7 +4199,7 @@ static int subn_set_opa_sma(__be16 attr_id, struct opa_smp *smp, u32 am, switch (attr_id) { case IB_SMP_ATTR_PORT_INFO: ret = __subn_set_opa_portinfo(smp, am, data, ibdev, port, - resp_len, max_len); + resp_len, max_len, local_mad); break; case IB_SMP_ATTR_PKEY_TABLE: ret = __subn_set_opa_pkeytable(smp, am, data, ibdev, port, @@ -4222,7 +4223,7 @@ static int subn_set_opa_sma(__be16 attr_id, struct opa_smp *smp, u32 am, break; case OPA_ATTRIB_ID_PORT_STATE_INFO: ret = __subn_set_opa_psi(smp, am, data, ibdev, port, - resp_len, max_len); + resp_len, max_len, local_mad); break; case OPA_ATTRIB_ID_BUFFER_CONTROL_TABLE: ret = __subn_set_opa_bct(smp, am, data, ibdev, port, @@ -4314,7 +4315,7 @@ static int subn_get_opa_aggregate(struct opa_smp *smp, static int subn_set_opa_aggregate(struct opa_smp *smp, struct ib_device *ibdev, u8 port, - u32 *resp_len) + u32 *resp_len, int local_mad) { int i; u32 num_attr = be32_to_cpu(smp->attr_mod) & 0x000000ff; @@ -4344,7 +4345,9 @@ static int subn_set_opa_aggregate(struct opa_smp *smp, } (void)subn_set_opa_sma(agg->attr_id, smp, am, agg->data, - ibdev, port, NULL, (u32)agg_data_len); + ibdev, port, NULL, (u32)agg_data_len, + local_mad); + if (smp->status & IB_SMP_INVALID_FIELD) break; if (smp->status & ~IB_SMP_DIRECTION) { @@ -4519,7 +4522,7 @@ static int hfi1_pkey_validation_pma(struct hfi1_ibport *ibp, static int process_subn_opa(struct ib_device *ibdev, int mad_flags, u8 port, const struct opa_mad *in_mad, struct opa_mad *out_mad, - u32 *resp_len) + u32 *resp_len, int local_mad) { struct opa_smp *smp = (struct opa_smp *)out_mad; struct hfi1_ibport *ibp = to_iport(ibdev, port); @@ -4588,11 +4591,11 @@ static int process_subn_opa(struct ib_device *ibdev, int mad_flags, default: ret = subn_set_opa_sma(attr_id, smp, am, data, ibdev, port, resp_len, - data_size); + data_size, local_mad); break; case OPA_ATTRIB_ID_AGGREGATE: ret = subn_set_opa_aggregate(smp, ibdev, port, - resp_len); + resp_len, local_mad); break; } break; @@ -4832,6 +4835,7 @@ static int hfi1_process_opa_mad(struct ib_device *ibdev, int mad_flags, { int ret; int pkey_idx; + int local_mad = 0; u32 resp_len = 0; struct hfi1_ibport *ibp = to_iport(ibdev, port); @@ -4846,13 +4850,14 @@ static int hfi1_process_opa_mad(struct ib_device *ibdev, int mad_flags, switch (in_mad->mad_hdr.mgmt_class) { case IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE: case IB_MGMT_CLASS_SUBN_LID_ROUTED: - if (is_local_mad(ibp, in_mad, in_wc)) { + local_mad = is_local_mad(ibp, in_mad, in_wc); + if (local_mad) { ret = opa_local_smp_check(ibp, in_wc); if (ret) return IB_MAD_RESULT_FAILURE; } ret = process_subn_opa(ibdev, mad_flags, port, in_mad, - out_mad, &resp_len); + out_mad, &resp_len, local_mad); goto bail; case IB_MGMT_CLASS_PERF_MGMT: ret = hfi1_pkey_validation_pma(ibp, in_mad, in_wc); diff --git a/drivers/infiniband/hw/hfi1/pcie.c b/drivers/infiniband/hw/hfi1/pcie.c index 83d66e862207..87bd6b60cb53 100644 --- a/drivers/infiniband/hw/hfi1/pcie.c +++ b/drivers/infiniband/hw/hfi1/pcie.c @@ -163,9 +163,6 @@ int hfi1_pcie_ddinit(struct hfi1_devdata *dd, struct pci_dev *pdev) resource_size_t addr; int ret = 0; - dd->pcidev = pdev; - pci_set_drvdata(pdev, dd); - addr = pci_resource_start(pdev, 0); len = pci_resource_len(pdev, 0); @@ -186,6 +183,14 @@ int hfi1_pcie_ddinit(struct hfi1_devdata *dd, struct pci_dev *pdev) return -ENOMEM; } dd_dev_info(dd, "UC base1: %p for %x\n", dd->kregbase1, RCV_ARRAY); + + /* verify that reads actually work, save revision for reset check */ + dd->revision = readq(dd->kregbase1 + CCE_REVISION); + if (dd->revision == ~(u64)0) { + dd_dev_err(dd, "Cannot read chip CSRs\n"); + goto nomem; + } + dd->chip_rcv_array_count = readq(dd->kregbase1 + RCV_ARRAY_CNT); dd_dev_info(dd, "RcvArray count: %u\n", dd->chip_rcv_array_count); dd->base2_start = RCV_ARRAY + dd->chip_rcv_array_count * 8; diff --git a/drivers/infiniband/hw/hfi1/pio.c b/drivers/infiniband/hw/hfi1/pio.c index 40dac4d16eb8..9cac15d10c4f 100644 --- a/drivers/infiniband/hw/hfi1/pio.c +++ b/drivers/infiniband/hw/hfi1/pio.c @@ -50,8 +50,6 @@ #include "qp.h" #include "trace.h" -#define SC_CTXT_PACKET_EGRESS_TIMEOUT 350 /* in chip cycles */ - #define SC(name) SEND_CTXT_##name /* * Send Context functions @@ -961,15 +959,40 @@ void sc_disable(struct send_context *sc) } /* return SendEgressCtxtStatus.PacketOccupancy */ -#define packet_occupancy(r) \ - (((r) & SEND_EGRESS_CTXT_STATUS_CTXT_EGRESS_PACKET_OCCUPANCY_SMASK)\ - >> SEND_EGRESS_CTXT_STATUS_CTXT_EGRESS_PACKET_OCCUPANCY_SHIFT) +static u64 packet_occupancy(u64 reg) +{ + return (reg & + SEND_EGRESS_CTXT_STATUS_CTXT_EGRESS_PACKET_OCCUPANCY_SMASK) + >> SEND_EGRESS_CTXT_STATUS_CTXT_EGRESS_PACKET_OCCUPANCY_SHIFT; +} /* is egress halted on the context? */ -#define egress_halted(r) \ - ((r) & SEND_EGRESS_CTXT_STATUS_CTXT_EGRESS_HALT_STATUS_SMASK) +static bool egress_halted(u64 reg) +{ + return !!(reg & SEND_EGRESS_CTXT_STATUS_CTXT_EGRESS_HALT_STATUS_SMASK); +} -/* wait for packet egress, optionally pause for credit return */ +/* is the send context halted? */ +static bool is_sc_halted(struct hfi1_devdata *dd, u32 hw_context) +{ + return !!(read_kctxt_csr(dd, hw_context, SC(STATUS)) & + SC(STATUS_CTXT_HALTED_SMASK)); +} + +/** + * sc_wait_for_packet_egress + * @sc: valid send context + * @pause: wait for credit return + * + * Wait for packet egress, optionally pause for credit return + * + * Egress halt and Context halt are not necessarily the same thing, so + * check for both. + * + * NOTE: The context halt bit may not be set immediately. Because of this, + * it is necessary to check the SW SFC_HALTED bit (set in the IRQ) and the HW + * context bit to determine if the context is halted. + */ static void sc_wait_for_packet_egress(struct send_context *sc, int pause) { struct hfi1_devdata *dd = sc->dd; @@ -981,8 +1004,9 @@ static void sc_wait_for_packet_egress(struct send_context *sc, int pause) reg_prev = reg; reg = read_csr(dd, sc->hw_context * 8 + SEND_EGRESS_CTXT_STATUS); - /* done if egress is stopped */ - if (egress_halted(reg)) + /* done if any halt bits, SW or HW are set */ + if (sc->flags & SCF_HALTED || + is_sc_halted(dd, sc->hw_context) || egress_halted(reg)) break; reg = packet_occupancy(reg); if (reg == 0) diff --git a/drivers/infiniband/hw/hfi1/platform.c b/drivers/infiniband/hw/hfi1/platform.c index d486355880cb..cbf7faa5038c 100644 --- a/drivers/infiniband/hw/hfi1/platform.c +++ b/drivers/infiniband/hw/hfi1/platform.c @@ -199,6 +199,7 @@ void free_platform_config(struct hfi1_devdata *dd) { /* Release memory allocated for eprom or fallback file read. */ kfree(dd->platform_config.data); + dd->platform_config.data = NULL; } void get_port_type(struct hfi1_pportdata *ppd) diff --git a/drivers/infiniband/hw/hfi1/qsfp.c b/drivers/infiniband/hw/hfi1/qsfp.c index 1869f639c3ae..b5966991d647 100644 --- a/drivers/infiniband/hw/hfi1/qsfp.c +++ b/drivers/infiniband/hw/hfi1/qsfp.c @@ -204,6 +204,8 @@ static void clean_i2c_bus(struct hfi1_i2c_bus *bus) void clean_up_i2c(struct hfi1_devdata *dd, struct hfi1_asic_data *ad) { + if (!ad) + return; clean_i2c_bus(ad->i2c_bus0); ad->i2c_bus0 = NULL; clean_i2c_bus(ad->i2c_bus1); diff --git a/drivers/infiniband/hw/hfi1/rc.c b/drivers/infiniband/hw/hfi1/rc.c index da58046a02ea..1a1a47ac53c6 100644 --- a/drivers/infiniband/hw/hfi1/rc.c +++ b/drivers/infiniband/hw/hfi1/rc.c @@ -2012,7 +2012,7 @@ void process_becn(struct hfi1_pportdata *ppd, u8 sl, u32 rlid, u32 lqpn, unsigned long nsec = 1024 * ccti_timer; hrtimer_start(&cca_timer->hrtimer, ns_to_ktime(nsec), - HRTIMER_MODE_REL); + HRTIMER_MODE_REL_PINNED); } spin_unlock_irqrestore(&ppd->cca_timer_lock, flags); @@ -2123,7 +2123,7 @@ void hfi1_rc_rcv(struct hfi1_packet *packet) /* OK, process the packet. */ switch (opcode) { case OP(SEND_FIRST): - ret = hfi1_rvt_get_rwqe(qp, 0); + ret = rvt_get_rwqe(qp, false); if (ret < 0) goto nack_op_err; if (!ret) @@ -2149,7 +2149,7 @@ send_middle: case OP(RDMA_WRITE_LAST_WITH_IMMEDIATE): /* consume RWQE */ - ret = hfi1_rvt_get_rwqe(qp, 1); + ret = rvt_get_rwqe(qp, true); if (ret < 0) goto nack_op_err; if (!ret) @@ -2159,7 +2159,7 @@ send_middle: case OP(SEND_ONLY): case OP(SEND_ONLY_WITH_IMMEDIATE): case OP(SEND_ONLY_WITH_INVALIDATE): - ret = hfi1_rvt_get_rwqe(qp, 0); + ret = rvt_get_rwqe(qp, false); if (ret < 0) goto nack_op_err; if (!ret) @@ -2271,7 +2271,7 @@ send_last: goto send_middle; else if (opcode == OP(RDMA_WRITE_ONLY)) goto no_immediate_data; - ret = hfi1_rvt_get_rwqe(qp, 1); + ret = rvt_get_rwqe(qp, true); if (ret < 0) goto nack_op_err; if (!ret) { diff --git a/drivers/infiniband/hw/hfi1/ruc.c b/drivers/infiniband/hw/hfi1/ruc.c index 3daa94bdae3a..ef4c566e206f 100644 --- a/drivers/infiniband/hw/hfi1/ruc.c +++ b/drivers/infiniband/hw/hfi1/ruc.c @@ -53,156 +53,6 @@ #include "verbs_txreq.h" #include "trace.h" -/* - * Validate a RWQE and fill in the SGE state. - * Return 1 if OK. - */ -static int init_sge(struct rvt_qp *qp, struct rvt_rwqe *wqe) -{ - int i, j, ret; - struct ib_wc wc; - struct rvt_lkey_table *rkt; - struct rvt_pd *pd; - struct rvt_sge_state *ss; - - rkt = &to_idev(qp->ibqp.device)->rdi.lkey_table; - pd = ibpd_to_rvtpd(qp->ibqp.srq ? qp->ibqp.srq->pd : qp->ibqp.pd); - ss = &qp->r_sge; - ss->sg_list = qp->r_sg_list; - qp->r_len = 0; - for (i = j = 0; i < wqe->num_sge; i++) { - if (wqe->sg_list[i].length == 0) - continue; - /* Check LKEY */ - ret = rvt_lkey_ok(rkt, pd, j ? &ss->sg_list[j - 1] : &ss->sge, - NULL, &wqe->sg_list[i], - IB_ACCESS_LOCAL_WRITE); - if (unlikely(ret <= 0)) - goto bad_lkey; - qp->r_len += wqe->sg_list[i].length; - j++; - } - ss->num_sge = j; - ss->total_len = qp->r_len; - ret = 1; - goto bail; - -bad_lkey: - while (j) { - struct rvt_sge *sge = --j ? &ss->sg_list[j - 1] : &ss->sge; - - rvt_put_mr(sge->mr); - } - ss->num_sge = 0; - memset(&wc, 0, sizeof(wc)); - wc.wr_id = wqe->wr_id; - wc.status = IB_WC_LOC_PROT_ERR; - wc.opcode = IB_WC_RECV; - wc.qp = &qp->ibqp; - /* Signal solicited completion event. */ - rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.recv_cq), &wc, 1); - ret = 0; -bail: - return ret; -} - -/** - * hfi1_rvt_get_rwqe - copy the next RWQE into the QP's RWQE - * @qp: the QP - * @wr_id_only: update qp->r_wr_id only, not qp->r_sge - * - * Return -1 if there is a local error, 0 if no RWQE is available, - * otherwise return 1. - * - * Can be called from interrupt level. - */ -int hfi1_rvt_get_rwqe(struct rvt_qp *qp, int wr_id_only) -{ - unsigned long flags; - struct rvt_rq *rq; - struct rvt_rwq *wq; - struct rvt_srq *srq; - struct rvt_rwqe *wqe; - void (*handler)(struct ib_event *, void *); - u32 tail; - int ret; - - if (qp->ibqp.srq) { - srq = ibsrq_to_rvtsrq(qp->ibqp.srq); - handler = srq->ibsrq.event_handler; - rq = &srq->rq; - } else { - srq = NULL; - handler = NULL; - rq = &qp->r_rq; - } - - spin_lock_irqsave(&rq->lock, flags); - if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK)) { - ret = 0; - goto unlock; - } - - wq = rq->wq; - tail = wq->tail; - /* Validate tail before using it since it is user writable. */ - if (tail >= rq->size) - tail = 0; - if (unlikely(tail == wq->head)) { - ret = 0; - goto unlock; - } - /* Make sure entry is read after head index is read. */ - smp_rmb(); - wqe = rvt_get_rwqe_ptr(rq, tail); - /* - * Even though we update the tail index in memory, the verbs - * consumer is not supposed to post more entries until a - * completion is generated. - */ - if (++tail >= rq->size) - tail = 0; - wq->tail = tail; - if (!wr_id_only && !init_sge(qp, wqe)) { - ret = -1; - goto unlock; - } - qp->r_wr_id = wqe->wr_id; - - ret = 1; - set_bit(RVT_R_WRID_VALID, &qp->r_aflags); - if (handler) { - u32 n; - - /* - * Validate head pointer value and compute - * the number of remaining WQEs. - */ - n = wq->head; - if (n >= rq->size) - n = 0; - if (n < tail) - n += rq->size - tail; - else - n -= tail; - if (n < srq->limit) { - struct ib_event ev; - - srq->limit = 0; - spin_unlock_irqrestore(&rq->lock, flags); - ev.device = qp->ibqp.device; - ev.element.srq = qp->ibqp.srq; - ev.event = IB_EVENT_SRQ_LIMIT_REACHED; - handler(&ev, srq->ibsrq.srq_context); - goto bail; - } - } -unlock: - spin_unlock_irqrestore(&rq->lock, flags); -bail: - return ret; -} - static int gid_ok(union ib_gid *gid, __be64 gid_prefix, __be64 id) { return (gid->global.interface_id == id && @@ -423,7 +273,7 @@ again: /* FALLTHROUGH */ case IB_WR_SEND: send: - ret = hfi1_rvt_get_rwqe(qp, 0); + ret = rvt_get_rwqe(qp, false); if (ret < 0) goto op_err; if (!ret) @@ -435,7 +285,7 @@ send: goto inv_err; wc.wc_flags = IB_WC_WITH_IMM; wc.ex.imm_data = wqe->wr.ex.imm_data; - ret = hfi1_rvt_get_rwqe(qp, 1); + ret = rvt_get_rwqe(qp, true); if (ret < 0) goto op_err; if (!ret) @@ -733,6 +583,20 @@ static inline void hfi1_make_ruc_bth(struct rvt_qp *qp, ohdr->bth[2] = cpu_to_be32(bth2); } +/** + * hfi1_make_ruc_header_16B - build a 16B header + * @qp: the queue pair + * @ohdr: a pointer to the destination header memory + * @bth0: bth0 passed in from the RC/UC builder + * @bth2: bth2 passed in from the RC/UC builder + * @middle: non zero implies indicates ahg "could" be used + * @ps: the current packet state + * + * This routine may disarm ahg under these situations: + * - packet needs a GRH + * - BECN needed + * - migration state not IB_MIG_MIGRATED + */ static inline void hfi1_make_ruc_header_16B(struct rvt_qp *qp, struct ib_other_headers *ohdr, u32 bth0, u32 bth2, int middle, @@ -777,6 +641,12 @@ static inline void hfi1_make_ruc_header_16B(struct rvt_qp *qp, else middle = 0; + if (qp->s_flags & RVT_S_ECN) { + qp->s_flags &= ~RVT_S_ECN; + /* we recently received a FECN, so return a BECN */ + becn = true; + middle = 0; + } if (middle) build_ahg(qp, bth2); else @@ -784,11 +654,6 @@ static inline void hfi1_make_ruc_header_16B(struct rvt_qp *qp, bth0 |= pkey; bth0 |= extra_bytes << 20; - if (qp->s_flags & RVT_S_ECN) { - qp->s_flags &= ~RVT_S_ECN; - /* we recently received a FECN, so return a BECN */ - becn = true; - } hfi1_make_ruc_bth(qp, ohdr, bth0, bth1, bth2); if (!ppd->lid) @@ -806,6 +671,20 @@ static inline void hfi1_make_ruc_header_16B(struct rvt_qp *qp, pkey, becn, 0, l4, priv->s_sc); } +/** + * hfi1_make_ruc_header_9B - build a 9B header + * @qp: the queue pair + * @ohdr: a pointer to the destination header memory + * @bth0: bth0 passed in from the RC/UC builder + * @bth2: bth2 passed in from the RC/UC builder + * @middle: non zero implies indicates ahg "could" be used + * @ps: the current packet state + * + * This routine may disarm ahg under these situations: + * - packet needs a GRH + * - BECN needed + * - migration state not IB_MIG_MIGRATED + */ static inline void hfi1_make_ruc_header_9B(struct rvt_qp *qp, struct ib_other_headers *ohdr, u32 bth0, u32 bth2, int middle, @@ -839,6 +718,12 @@ static inline void hfi1_make_ruc_header_9B(struct rvt_qp *qp, else middle = 0; + if (qp->s_flags & RVT_S_ECN) { + qp->s_flags &= ~RVT_S_ECN; + /* we recently received a FECN, so return a BECN */ + bth1 |= (IB_BECN_MASK << IB_BECN_SHIFT); + middle = 0; + } if (middle) build_ahg(qp, bth2); else @@ -846,11 +731,6 @@ static inline void hfi1_make_ruc_header_9B(struct rvt_qp *qp, bth0 |= pkey; bth0 |= extra_bytes << 20; - if (qp->s_flags & RVT_S_ECN) { - qp->s_flags &= ~RVT_S_ECN; - /* we recently received a FECN, so return a BECN */ - bth1 |= (IB_BECN_MASK << IB_BECN_SHIFT); - } hfi1_make_ruc_bth(qp, ohdr, bth0, bth1, bth2); hfi1_make_ib_hdr(&ps->s_txreq->phdr.hdr.ibh, lrh0, diff --git a/drivers/infiniband/hw/hfi1/trace.c b/drivers/infiniband/hw/hfi1/trace.c index 89bd9851065b..7c8aed0ffc07 100644 --- a/drivers/infiniband/hw/hfi1/trace.c +++ b/drivers/infiniband/hw/hfi1/trace.c @@ -1,5 +1,5 @@ /* - * Copyright(c) 2015 - 2017 Intel Corporation. + * Copyright(c) 2015 - 2018 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -63,13 +63,20 @@ static u8 __get_ib_hdr_len(struct ib_header *hdr) static u8 __get_16b_hdr_len(struct hfi1_16b_header *hdr) { - struct ib_other_headers *ohdr; + struct ib_other_headers *ohdr = NULL; u8 opcode; + u8 l4 = hfi1_16B_get_l4(hdr); + + if (l4 == OPA_16B_L4_FM) { + opcode = IB_OPCODE_UD_SEND_ONLY; + return (8 + 8); /* No BTH */ + } - if (hfi1_16B_get_l4(hdr) == OPA_16B_L4_IB_LOCAL) + if (l4 == OPA_16B_L4_IB_LOCAL) ohdr = &hdr->u.oth; else ohdr = &hdr->u.l.oth; + opcode = ib_bth_get_opcode(ohdr); return hdr_len_by_opcode[opcode] == 0 ? 0 : hdr_len_by_opcode[opcode] - (12 + 8 + 8); @@ -234,17 +241,24 @@ const char *hfi1_trace_fmt_lrh(struct trace_seq *p, bool bypass, #define BTH_16B_PRN \ "op:0x%.2x,%s se:%d m:%d pad:%d tver:%d " \ "qpn:0x%.6x a:%d psn:0x%.8x" -const char *hfi1_trace_fmt_bth(struct trace_seq *p, bool bypass, - u8 ack, bool becn, bool fecn, u8 mig, - u8 se, u8 pad, u8 opcode, const char *opname, - u8 tver, u16 pkey, u32 psn, u32 qpn) +#define L4_FM_16B_PRN \ + "op:0x%.2x,%s dest_qpn:0x%.6x src_qpn:0x%.6x" +const char *hfi1_trace_fmt_rest(struct trace_seq *p, bool bypass, u8 l4, + u8 ack, bool becn, bool fecn, u8 mig, + u8 se, u8 pad, u8 opcode, const char *opname, + u8 tver, u16 pkey, u32 psn, u32 qpn, + u32 dest_qpn, u32 src_qpn) { const char *ret = trace_seq_buffer_ptr(p); if (bypass) - trace_seq_printf(p, BTH_16B_PRN, - opcode, opname, - se, mig, pad, tver, qpn, ack, psn); + if (l4 == OPA_16B_L4_FM) + trace_seq_printf(p, L4_FM_16B_PRN, + opcode, opname, dest_qpn, src_qpn); + else + trace_seq_printf(p, BTH_16B_PRN, + opcode, opname, + se, mig, pad, tver, qpn, ack, psn); else trace_seq_printf(p, BTH_9B_PRN, @@ -258,12 +272,17 @@ const char *hfi1_trace_fmt_bth(struct trace_seq *p, bool bypass, const char *parse_everbs_hdrs( struct trace_seq *p, - u8 opcode, + u8 opcode, u8 l4, u32 dest_qpn, u32 src_qpn, void *ehdrs) { union ib_ehdrs *eh = ehdrs; const char *ret = trace_seq_buffer_ptr(p); + if (l4 == OPA_16B_L4_FM) { + trace_seq_printf(p, "mgmt pkt"); + goto out; + } + switch (opcode) { /* imm */ case OP(RC, SEND_LAST_WITH_IMMEDIATE): @@ -334,6 +353,7 @@ const char *parse_everbs_hdrs( be32_to_cpu(eh->ieth)); break; } +out: trace_seq_putc(p, 0); return ret; } @@ -374,6 +394,7 @@ const char *print_u32_array( return ret; } +__hfi1_trace_fn(AFFINITY); __hfi1_trace_fn(PKT); __hfi1_trace_fn(PROC); __hfi1_trace_fn(SDMA); diff --git a/drivers/infiniband/hw/hfi1/trace_dbg.h b/drivers/infiniband/hw/hfi1/trace_dbg.h index 0e7d929530c5..e62171fb7379 100644 --- a/drivers/infiniband/hw/hfi1/trace_dbg.h +++ b/drivers/infiniband/hw/hfi1/trace_dbg.h @@ -1,5 +1,5 @@ /* -* Copyright(c) 2015, 2016 Intel Corporation. +* Copyright(c) 2015 - 2018 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -113,6 +113,7 @@ void __hfi1_trace_##lvl(const char *func, char *fmt, ...) \ * hfi1_cdbg(LVL, fmt, ...); as well as take care of all * the debugfs stuff. */ +__hfi1_trace_def(AFFINITY); __hfi1_trace_def(PKT); __hfi1_trace_def(PROC); __hfi1_trace_def(SDMA); diff --git a/drivers/infiniband/hw/hfi1/trace_ibhdrs.h b/drivers/infiniband/hw/hfi1/trace_ibhdrs.h index 2847626d3819..1dc2c28fc96e 100644 --- a/drivers/infiniband/hw/hfi1/trace_ibhdrs.h +++ b/drivers/infiniband/hw/hfi1/trace_ibhdrs.h @@ -96,7 +96,9 @@ __print_symbolic(opcode, \ ib_opcode_name(CNP)) u8 ibhdr_exhdr_len(struct ib_header *hdr); -const char *parse_everbs_hdrs(struct trace_seq *p, u8 opcode, void *ehdrs); +const char *parse_everbs_hdrs(struct trace_seq *p, u8 opcode, + u8 l4, u32 dest_qpn, u32 src_qpn, + void *ehdrs); u8 hfi1_trace_opa_hdr_len(struct hfi1_opa_header *opah); u8 hfi1_trace_packet_hdr_len(struct hfi1_packet *packet); const char *hfi1_trace_get_packet_l4_str(u8 l4); @@ -123,14 +125,16 @@ const char *hfi1_trace_fmt_lrh(struct trace_seq *p, bool bypass, u8 rc, u8 sc, u8 sl, u16 entropy, u16 len, u16 pkey, u32 dlid, u32 slid); -const char *hfi1_trace_fmt_bth(struct trace_seq *p, bool bypass, - u8 ack, bool becn, bool fecn, u8 mig, - u8 se, u8 pad, u8 opcode, const char *opname, - u8 tver, u16 pkey, u32 psn, u32 qpn); +const char *hfi1_trace_fmt_rest(struct trace_seq *p, bool bypass, u8 l4, + u8 ack, bool becn, bool fecn, u8 mig, + u8 se, u8 pad, u8 opcode, const char *opname, + u8 tver, u16 pkey, u32 psn, u32 qpn, + u32 dest_qpn, u32 src_qpn); const char *hfi1_trace_get_packet_l2_str(u8 l2); -#define __parse_ib_ehdrs(op, ehdrs) parse_everbs_hdrs(p, op, ehdrs) +#define __parse_ib_ehdrs(op, l4, dest_qpn, src_qpn, ehdrs) \ + parse_everbs_hdrs(p, op, l4, dest_qpn, src_qpn, ehdrs) #define lrh_name(lrh) { HFI1_##lrh, #lrh } #define show_lnh(lrh) \ @@ -169,6 +173,8 @@ DECLARE_EVENT_CLASS(hfi1_input_ibhdr_template, __field(u32, psn) __field(u32, qpn) __field(u32, slid) + __field(u32, dest_qpn) + __field(u32, src_qpn) /* extended headers */ __dynamic_array(u8, ehdrs, hfi1_trace_packet_hdr_len(packet)) @@ -178,6 +184,8 @@ DECLARE_EVENT_CLASS(hfi1_input_ibhdr_template, __entry->etype = packet->etype; __entry->l2 = hfi1_16B_get_l2(packet->hdr); + __entry->dest_qpn = 0; + __entry->src_qpn = 0; if (__entry->etype == RHF_RCV_TYPE_BYPASS) { hfi1_trace_parse_16b_hdr(packet->hdr, &__entry->age, @@ -192,16 +200,23 @@ DECLARE_EVENT_CLASS(hfi1_input_ibhdr_template, &__entry->dlid, &__entry->slid); - hfi1_trace_parse_16b_bth(packet->ohdr, - &__entry->ack, - &__entry->mig, - &__entry->opcode, - &__entry->pad, - &__entry->se, - &__entry->tver, - &__entry->psn, - &__entry->qpn); + if (__entry->l4 == OPA_16B_L4_FM) { + __entry->opcode = IB_OPCODE_UD_SEND_ONLY; + __entry->dest_qpn = hfi1_16B_get_dest_qpn(packet->mgmt); + __entry->src_qpn = hfi1_16B_get_src_qpn(packet->mgmt); + } else { + hfi1_trace_parse_16b_bth(packet->ohdr, + &__entry->ack, + &__entry->mig, + &__entry->opcode, + &__entry->pad, + &__entry->se, + &__entry->tver, + &__entry->psn, + &__entry->qpn); + } } else { + __entry->l4 = OPA_16B_L4_9B; hfi1_trace_parse_9b_hdr(packet->hdr, sc5, &__entry->lnh, &__entry->lver, @@ -223,8 +238,9 @@ DECLARE_EVENT_CLASS(hfi1_input_ibhdr_template, &__entry->pkey, &__entry->psn, &__entry->qpn); - } - /* extended headers */ + } + /* extended headers */ + if (__entry->l4 != OPA_16B_L4_FM) memcpy(__get_dynamic_array(ehdrs), &packet->ohdr->u, __get_dynamic_array_len(ehdrs)); @@ -253,25 +269,31 @@ DECLARE_EVENT_CLASS(hfi1_input_ibhdr_template, __entry->pkey, __entry->dlid, __entry->slid), - hfi1_trace_fmt_bth(p, - __entry->etype == + hfi1_trace_fmt_rest(p, + __entry->etype == RHF_RCV_TYPE_BYPASS, - __entry->ack, - __entry->becn, - __entry->fecn, - __entry->mig, - __entry->se, - __entry->pad, - __entry->opcode, - show_ib_opcode(__entry->opcode), - __entry->tver, - __entry->pkey, - __entry->psn, - __entry->qpn), + __entry->l4, + __entry->ack, + __entry->becn, + __entry->fecn, + __entry->mig, + __entry->se, + __entry->pad, + __entry->opcode, + show_ib_opcode(__entry->opcode), + __entry->tver, + __entry->pkey, + __entry->psn, + __entry->qpn, + __entry->dest_qpn, + __entry->src_qpn), /* extended headers */ __get_dynamic_array_len(ehdrs), __parse_ib_ehdrs( __entry->opcode, + __entry->l4, + __entry->dest_qpn, + __entry->src_qpn, (void *)__get_dynamic_array(ehdrs)) ) ); @@ -310,6 +332,8 @@ DECLARE_EVENT_CLASS(hfi1_output_ibhdr_template, __field(u32, psn) __field(u32, qpn) __field(u32, slid) + __field(u32, dest_qpn) + __field(u32, src_qpn) /* extended headers */ __dynamic_array(u8, ehdrs, hfi1_trace_opa_hdr_len(opah)) @@ -320,6 +344,8 @@ DECLARE_EVENT_CLASS(hfi1_output_ibhdr_template, DD_DEV_ASSIGN(dd); __entry->hdr_type = opah->hdr_type; + __entry->dest_qpn = 0; + __entry->src_qpn = 0; if (__entry->hdr_type) { hfi1_trace_parse_16b_hdr(&opah->opah, &__entry->age, @@ -334,19 +360,26 @@ DECLARE_EVENT_CLASS(hfi1_output_ibhdr_template, &__entry->dlid, &__entry->slid); - if (__entry->l4 == OPA_16B_L4_IB_LOCAL) - ohdr = &opah->opah.u.oth; - else - ohdr = &opah->opah.u.l.oth; - hfi1_trace_parse_16b_bth(ohdr, - &__entry->ack, - &__entry->mig, - &__entry->opcode, - &__entry->pad, - &__entry->se, - &__entry->tver, - &__entry->psn, - &__entry->qpn); + if (__entry->l4 == OPA_16B_L4_FM) { + ohdr = NULL; + __entry->opcode = IB_OPCODE_UD_SEND_ONLY; + __entry->dest_qpn = hfi1_16B_get_dest_qpn(&opah->opah.u.mgmt); + __entry->src_qpn = hfi1_16B_get_src_qpn(&opah->opah.u.mgmt); + } else { + if (__entry->l4 == OPA_16B_L4_IB_LOCAL) + ohdr = &opah->opah.u.oth; + else + ohdr = &opah->opah.u.l.oth; + hfi1_trace_parse_16b_bth(ohdr, + &__entry->ack, + &__entry->mig, + &__entry->opcode, + &__entry->pad, + &__entry->se, + &__entry->tver, + &__entry->psn, + &__entry->qpn); + } } else { __entry->l4 = OPA_16B_L4_9B; hfi1_trace_parse_9b_hdr(&opah->ibh, sc5, @@ -376,8 +409,9 @@ DECLARE_EVENT_CLASS(hfi1_output_ibhdr_template, } /* extended headers */ - memcpy(__get_dynamic_array(ehdrs), - &ohdr->u, __get_dynamic_array_len(ehdrs)); + if (__entry->l4 != OPA_16B_L4_FM) + memcpy(__get_dynamic_array(ehdrs), + &ohdr->u, __get_dynamic_array_len(ehdrs)); ), TP_printk("[%s] (%s) %s %s hlen:%d %s", __get_str(dev), @@ -399,24 +433,30 @@ DECLARE_EVENT_CLASS(hfi1_output_ibhdr_template, __entry->pkey, __entry->dlid, __entry->slid), - hfi1_trace_fmt_bth(p, - !!__entry->hdr_type, - __entry->ack, - __entry->becn, - __entry->fecn, - __entry->mig, - __entry->se, - __entry->pad, - __entry->opcode, - show_ib_opcode(__entry->opcode), - __entry->tver, - __entry->pkey, - __entry->psn, - __entry->qpn), + hfi1_trace_fmt_rest(p, + !!__entry->hdr_type, + __entry->l4, + __entry->ack, + __entry->becn, + __entry->fecn, + __entry->mig, + __entry->se, + __entry->pad, + __entry->opcode, + show_ib_opcode(__entry->opcode), + __entry->tver, + __entry->pkey, + __entry->psn, + __entry->qpn, + __entry->dest_qpn, + __entry->src_qpn), /* extended headers */ __get_dynamic_array_len(ehdrs), __parse_ib_ehdrs( __entry->opcode, + __entry->l4, + __entry->dest_qpn, + __entry->src_qpn, (void *)__get_dynamic_array(ehdrs)) ) ); diff --git a/drivers/infiniband/hw/hfi1/uc.c b/drivers/infiniband/hw/hfi1/uc.c index 9d7a3110c14c..b7b671017e59 100644 --- a/drivers/infiniband/hw/hfi1/uc.c +++ b/drivers/infiniband/hw/hfi1/uc.c @@ -397,7 +397,7 @@ send_first: if (test_and_clear_bit(RVT_R_REWIND_SGE, &qp->r_aflags)) { qp->r_sge = qp->s_rdma_read_sge; } else { - ret = hfi1_rvt_get_rwqe(qp, 0); + ret = rvt_get_rwqe(qp, false); if (ret < 0) goto op_err; if (!ret) @@ -542,7 +542,7 @@ rdma_last_imm: if (test_and_clear_bit(RVT_R_REWIND_SGE, &qp->r_aflags)) { rvt_put_ss(&qp->s_rdma_read_sge); } else { - ret = hfi1_rvt_get_rwqe(qp, 1); + ret = rvt_get_rwqe(qp, true); if (ret < 0) goto op_err; if (!ret) diff --git a/drivers/infiniband/hw/hfi1/ud.c b/drivers/infiniband/hw/hfi1/ud.c index bcf3b0bebac8..1ab332f1866e 100644 --- a/drivers/infiniband/hw/hfi1/ud.c +++ b/drivers/infiniband/hw/hfi1/ud.c @@ -163,7 +163,7 @@ static void ud_loopback(struct rvt_qp *sqp, struct rvt_swqe *swqe) } else { int ret; - ret = hfi1_rvt_get_rwqe(qp, 0); + ret = rvt_get_rwqe(qp, false); if (ret < 0) { rvt_rc_error(qp, IB_WC_LOC_QP_OP_ERR); goto bail_unlock; @@ -399,16 +399,30 @@ void hfi1_make_ud_req_16B(struct rvt_qp *qp, struct hfi1_pkt_state *ps, struct hfi1_pportdata *ppd; struct hfi1_ibport *ibp; u32 dlid, slid, nwords, extra_bytes; + u32 dest_qp = wqe->ud_wr.remote_qpn; + u32 src_qp = qp->ibqp.qp_num; u16 len, pkey; u8 l4, sc5; + bool is_mgmt = false; ibp = to_iport(qp->ibqp.device, qp->port_num); ppd = ppd_from_ibp(ibp); ah_attr = &ibah_to_rvtah(wqe->ud_wr.ah)->attr; - /* header size in dwords 16B LRH+BTH+DETH = (16+12+8)/4. */ - ps->s_txreq->hdr_dwords = 9; - if (wqe->wr.opcode == IB_WR_SEND_WITH_IMM) - ps->s_txreq->hdr_dwords++; + + /* + * Build 16B Management Packet if either the destination + * or source queue pair number is 0 or 1. + */ + if (dest_qp == 0 || src_qp == 0 || dest_qp == 1 || src_qp == 1) { + /* header size in dwords 16B LRH+L4_FM = (16+8)/4. */ + ps->s_txreq->hdr_dwords = 6; + is_mgmt = true; + } else { + /* header size in dwords 16B LRH+BTH+DETH = (16+12+8)/4. */ + ps->s_txreq->hdr_dwords = 9; + if (wqe->wr.opcode == IB_WR_SEND_WITH_IMM) + ps->s_txreq->hdr_dwords++; + } /* SW provides space for CRC and LT for bypass packets. */ extra_bytes = hfi1_get_16b_padding((ps->s_txreq->hdr_dwords << 2), @@ -453,7 +467,14 @@ void hfi1_make_ud_req_16B(struct rvt_qp *qp, struct hfi1_pkt_state *ps, slid = ppd->lid | (rdma_ah_get_path_bits(ah_attr) & ((1 << ppd->lmc) - 1)); - hfi1_make_bth_deth(qp, wqe, ohdr, &pkey, extra_bytes, true); + if (is_mgmt) { + l4 = OPA_16B_L4_FM; + pkey = hfi1_get_pkey(ibp, wqe->ud_wr.pkey_index); + hfi1_16B_set_qpn(&ps->s_txreq->phdr.hdr.opah.u.mgmt, + dest_qp, src_qp); + } else { + hfi1_make_bth_deth(qp, wqe, ohdr, &pkey, extra_bytes, true); + } /* Convert dwords to flits */ len = (ps->s_txreq->hdr_dwords + nwords) >> 1; @@ -628,7 +649,7 @@ int hfi1_lookup_pkey_idx(struct hfi1_ibport *ibp, u16 pkey) } void return_cnp_16B(struct hfi1_ibport *ibp, struct rvt_qp *qp, - u32 remote_qpn, u32 pkey, u32 slid, u32 dlid, + u32 remote_qpn, u16 pkey, u32 slid, u32 dlid, u8 sc5, const struct ib_grh *old_grh) { u64 pbc, pbc_flags = 0; @@ -687,7 +708,7 @@ void return_cnp_16B(struct hfi1_ibport *ibp, struct rvt_qp *qp, } void return_cnp(struct hfi1_ibport *ibp, struct rvt_qp *qp, u32 remote_qpn, - u32 pkey, u32 slid, u32 dlid, u8 sc5, + u16 pkey, u32 slid, u32 dlid, u8 sc5, const struct ib_grh *old_grh) { u64 pbc, pbc_flags = 0; @@ -845,10 +866,8 @@ static int opa_smp_check(struct hfi1_ibport *ibp, u16 pkey, u8 sc5, */ void hfi1_ud_rcv(struct hfi1_packet *packet) { - struct ib_other_headers *ohdr = packet->ohdr; u32 hdrsize = packet->hlen; struct ib_wc wc; - u32 qkey; u32 src_qp; u16 pkey; int mgmt_pkey_idx = -1; @@ -864,27 +883,35 @@ void hfi1_ud_rcv(struct hfi1_packet *packet) u32 dlid = packet->dlid; u32 slid = packet->slid; u8 extra_bytes; + u8 l4 = 0; bool dlid_is_permissive; bool slid_is_permissive; + bool solicited = false; extra_bytes = packet->pad + packet->extra_byte + (SIZE_OF_CRC << 2); - qkey = ib_get_qkey(ohdr); - src_qp = ib_get_sqpn(ohdr); if (packet->etype == RHF_RCV_TYPE_BYPASS) { u32 permissive_lid = opa_get_lid(be32_to_cpu(OPA_LID_PERMISSIVE), 16B); + l4 = hfi1_16B_get_l4(packet->hdr); pkey = hfi1_16B_get_pkey(packet->hdr); dlid_is_permissive = (dlid == permissive_lid); slid_is_permissive = (slid == permissive_lid); } else { - pkey = ib_bth_get_pkey(ohdr); + pkey = ib_bth_get_pkey(packet->ohdr); dlid_is_permissive = (dlid == be16_to_cpu(IB_LID_PERMISSIVE)); slid_is_permissive = (slid == be16_to_cpu(IB_LID_PERMISSIVE)); } sl_from_sc = ibp->sc_to_sl[sc5]; + if (likely(l4 != OPA_16B_L4_FM)) { + src_qp = ib_get_sqpn(packet->ohdr); + solicited = ib_bth_is_solicited(packet->ohdr); + } else { + src_qp = hfi1_16B_get_src_qpn(packet->mgmt); + } + process_ecn(qp, packet, (opcode != IB_OPCODE_CNP)); /* * Get the number of bytes the message was padded by @@ -922,8 +949,9 @@ void hfi1_ud_rcv(struct hfi1_packet *packet) if (mgmt_pkey_idx < 0) goto drop; } - if (unlikely(qkey != qp->qkey)) /* Silent drop */ - return; + if (unlikely(l4 != OPA_16B_L4_FM && + ib_get_qkey(packet->ohdr) != qp->qkey)) + return; /* Silent drop */ /* Drop invalid MAD packets (see 13.5.3.1). */ if (unlikely(qp->ibqp.qp_num == 1 && @@ -950,7 +978,7 @@ void hfi1_ud_rcv(struct hfi1_packet *packet) if (qp->ibqp.qp_num > 1 && opcode == IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE) { - wc.ex.imm_data = ohdr->u.ud.imm_data; + wc.ex.imm_data = packet->ohdr->u.ud.imm_data; wc.wc_flags = IB_WC_WITH_IMM; tlen -= sizeof(u32); } else if (opcode == IB_OPCODE_UD_SEND_ONLY) { @@ -974,7 +1002,7 @@ void hfi1_ud_rcv(struct hfi1_packet *packet) } else { int ret; - ret = hfi1_rvt_get_rwqe(qp, 0); + ret = rvt_get_rwqe(qp, false); if (ret < 0) { rvt_rc_error(qp, IB_WC_LOC_QP_OP_ERR); return; @@ -1047,8 +1075,7 @@ void hfi1_ud_rcv(struct hfi1_packet *packet) dlid & ((1 << ppd_from_ibp(ibp)->lmc) - 1); wc.port_num = qp->port_num; /* Signal completion event if the solicited bit is set. */ - rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.recv_cq), &wc, - ib_bth_is_solicited(ohdr)); + rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.recv_cq), &wc, solicited); return; drop: diff --git a/drivers/infiniband/hw/hfi1/user_exp_rcv.c b/drivers/infiniband/hw/hfi1/user_exp_rcv.c index 0d5330b7353d..6a4c5142515a 100644 --- a/drivers/infiniband/hw/hfi1/user_exp_rcv.c +++ b/drivers/infiniband/hw/hfi1/user_exp_rcv.c @@ -437,7 +437,6 @@ int hfi1_user_exp_rcv_setup(struct hfi1_filedata *fd, hfi1_cdbg(TID, "Failed to program RcvArray entries %d", ret); - ret = -EFAULT; goto unlock; } else if (ret > 0) { if (grp->used == grp->size) diff --git a/drivers/infiniband/hw/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c index c8cf4d4984d3..08991874c0e2 100644 --- a/drivers/infiniband/hw/hfi1/verbs.c +++ b/drivers/infiniband/hw/hfi1/verbs.c @@ -1,5 +1,5 @@ /* - * Copyright(c) 2015 - 2017 Intel Corporation. + * Copyright(c) 2015 - 2018 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -63,6 +63,8 @@ #include "verbs_txreq.h" #include "debugfs.h" #include "vnic.h" +#include "fault.h" +#include "affinity.h" static unsigned int hfi1_lkey_table_size = 16; module_param_named(lkey_table_size, hfi1_lkey_table_size, uint, @@ -615,7 +617,12 @@ static inline void hfi1_handle_packet(struct hfi1_packet *packet, wake_up(&mcast->wait); } else { /* Get the destination QP number. */ - qp_num = ib_bth_get_qpn(packet->ohdr); + if (packet->etype == RHF_RCV_TYPE_BYPASS && + hfi1_16B_get_l4(packet->hdr) == OPA_16B_L4_FM) + qp_num = hfi1_16B_get_dest_qpn(packet->mgmt); + else + qp_num = ib_bth_get_qpn(packet->ohdr); + rcu_read_lock(); packet->qp = rvt_lookup_qpn(rdi, &ibp->rvp, qp_num); if (!packet->qp) @@ -624,10 +631,6 @@ static inline void hfi1_handle_packet(struct hfi1_packet *packet, if (hfi1_do_pkey_check(packet)) goto unlock_drop; - if (unlikely(hfi1_dbg_fault_opcode(packet->qp, packet->opcode, - true))) - goto unlock_drop; - spin_lock_irqsave(&packet->qp->r_lock, flags); packet_handler = qp_ok(packet); if (likely(packet_handler)) @@ -934,8 +937,7 @@ int hfi1_verbs_send_dma(struct rvt_qp *qp, struct hfi1_pkt_state *ps, else pbc |= (ib_is_sc5(sc5) << PBC_DC_INFO_SHIFT); - if (unlikely(hfi1_dbg_fault_opcode(qp, ps->opcode, - false))) + if (unlikely(hfi1_dbg_should_fault_tx(qp, ps->opcode))) pbc = hfi1_fault_tx(qp, ps->opcode, pbc); pbc = create_pbc(ppd, pbc, @@ -1088,7 +1090,8 @@ int hfi1_verbs_send_pio(struct rvt_qp *qp, struct hfi1_pkt_state *ps, pbc |= PBC_PACKET_BYPASS | PBC_INSERT_BYPASS_ICRC; else pbc |= (ib_is_sc5(sc5) << PBC_DC_INFO_SHIFT); - if (unlikely(hfi1_dbg_fault_opcode(qp, ps->opcode, false))) + + if (unlikely(hfi1_dbg_should_fault_tx(qp, ps->opcode))) pbc = hfi1_fault_tx(qp, ps->opcode, pbc); pbc = create_pbc(ppd, pbc, qp->srate_mbps, vl, plen); } @@ -1310,21 +1313,23 @@ int hfi1_verbs_send(struct rvt_qp *qp, struct hfi1_pkt_state *ps) { struct hfi1_devdata *dd = dd_from_ibdev(qp->ibqp.device); struct hfi1_qp_priv *priv = qp->priv; - struct ib_other_headers *ohdr; + struct ib_other_headers *ohdr = NULL; send_routine sr; int ret; u16 pkey; u32 slid; + u8 l4 = 0; /* locate the pkey within the headers */ if (ps->s_txreq->phdr.hdr.hdr_type) { struct hfi1_16b_header *hdr = &ps->s_txreq->phdr.hdr.opah; - u8 l4 = hfi1_16B_get_l4(hdr); - if (l4 == OPA_16B_L4_IB_GLOBAL) - ohdr = &hdr->u.l.oth; - else + l4 = hfi1_16B_get_l4(hdr); + if (l4 == OPA_16B_L4_IB_LOCAL) ohdr = &hdr->u.oth; + else if (l4 == OPA_16B_L4_IB_GLOBAL) + ohdr = &hdr->u.l.oth; + slid = hfi1_16B_get_slid(hdr); pkey = hfi1_16B_get_pkey(hdr); } else { @@ -1339,7 +1344,11 @@ int hfi1_verbs_send(struct rvt_qp *qp, struct hfi1_pkt_state *ps) pkey = ib_bth_get_pkey(ohdr); } - ps->opcode = ib_bth_get_opcode(ohdr); + if (likely(l4 != OPA_16B_L4_FM)) + ps->opcode = ib_bth_get_opcode(ohdr); + else + ps->opcode = IB_OPCODE_UD_SEND_ONLY; + sr = get_send_routine(qp, ps); ret = egress_pkey_check(dd->pport, slid, pkey, priv->s_sc, qp->s_pkey_index); @@ -1937,11 +1946,11 @@ int hfi1_register_ib_device(struct hfi1_devdata *dd) dd->verbs_dev.rdi.driver_f.modify_qp = hfi1_modify_qp; dd->verbs_dev.rdi.driver_f.notify_restart_rc = hfi1_restart_rc; dd->verbs_dev.rdi.driver_f.check_send_wqe = hfi1_check_send_wqe; + dd->verbs_dev.rdi.driver_f.comp_vect_cpu_lookup = + hfi1_comp_vect_mappings_lookup; /* completeion queue */ - snprintf(dd->verbs_dev.rdi.dparms.cq_name, - sizeof(dd->verbs_dev.rdi.dparms.cq_name), - "hfi1_cq%d", dd->unit); + dd->verbs_dev.rdi.ibdev.num_comp_vectors = dd->comp_vect_possible_cpus; dd->verbs_dev.rdi.dparms.node = dd->node; /* misc settings */ diff --git a/drivers/infiniband/hw/hfi1/verbs.h b/drivers/infiniband/hw/hfi1/verbs.h index 2d787b8346ca..a4d06502f06d 100644 --- a/drivers/infiniband/hw/hfi1/verbs.h +++ b/drivers/infiniband/hw/hfi1/verbs.h @@ -1,5 +1,5 @@ /* - * Copyright(c) 2015 - 2017 Intel Corporation. + * Copyright(c) 2015 - 2018 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -110,6 +110,12 @@ enum { #define LRH_9B_BYTES (FIELD_SIZEOF(struct ib_header, lrh)) #define LRH_9B_DWORDS (LRH_9B_BYTES / sizeof(u32)) +/* 24Bits for qpn, upper 8Bits reserved */ +struct opa_16b_mgmt { + __be32 dest_qpn; + __be32 src_qpn; +}; + struct hfi1_16b_header { u32 lrh[4]; union { @@ -118,6 +124,7 @@ struct hfi1_16b_header { struct ib_other_headers oth; } l; struct ib_other_headers oth; + struct opa_16b_mgmt mgmt; } u; } __packed; @@ -227,9 +234,7 @@ struct hfi1_ibdev { /* per HFI symlinks to above */ struct dentry *hfi1_ibdev_link; #ifdef CONFIG_FAULT_INJECTION - struct fault_opcode *fault_opcode; - struct fault_packet *fault_packet; - bool fault_suppress_err; + struct fault *fault; #endif #endif }; @@ -330,8 +335,6 @@ void hfi1_ud_rcv(struct hfi1_packet *packet); int hfi1_lookup_pkey_idx(struct hfi1_ibport *ibp, u16 pkey); -int hfi1_rvt_get_rwqe(struct rvt_qp *qp, int wr_id_only); - void hfi1_migrate_qp(struct rvt_qp *qp); int hfi1_check_modify_qp(struct rvt_qp *qp, struct ib_qp_attr *attr, diff --git a/drivers/infiniband/hw/hns/hns_roce_cq.c b/drivers/infiniband/hw/hns/hns_roce_cq.c index 14734d0d0b76..3a485f50fede 100644 --- a/drivers/infiniband/hw/hns/hns_roce_cq.c +++ b/drivers/infiniband/hw/hns/hns_roce_cq.c @@ -377,6 +377,7 @@ struct ib_cq *hns_roce_ib_create_cq(struct ib_device *ib_dev, hr_cq->set_ci_db = hr_cq->db.db_record; *hr_cq->set_ci_db = 0; + hr_cq->db_en = 1; } /* Init mmt table and write buff address to mtt table */ diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index fb305b7f99a8..412297d4b86c 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -100,6 +100,9 @@ #define SERV_TYPE_UC 2 #define SERV_TYPE_UD 3 +/* Configure to HW for PAGE_SIZE larger than 4KB */ +#define PG_SHIFT_OFFSET (PAGE_SHIFT - 12) + #define PAGES_SHIFT_8 8 #define PAGES_SHIFT_16 16 #define PAGES_SHIFT_24 24 @@ -211,6 +214,7 @@ enum { struct hns_roce_uar { u64 pfn; unsigned long index; + unsigned long logic_idx; }; struct hns_roce_ucontext { diff --git a/drivers/infiniband/hw/hns/hns_roce_hem.c b/drivers/infiniband/hw/hns/hns_roce_hem.c index 0eeabfbee192..63b5b3edabcb 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hem.c +++ b/drivers/infiniband/hw/hns/hns_roce_hem.c @@ -912,7 +912,7 @@ int hns_roce_init_hem_table(struct hns_roce_dev *hr_dev, obj_per_chunk = buf_chunk_size / obj_size; num_hem = (nobj + obj_per_chunk - 1) / obj_per_chunk; bt_chunk_num = bt_chunk_size / 8; - if (table->type >= HEM_TYPE_MTT) + if (type >= HEM_TYPE_MTT) num_bt_l0 = bt_chunk_num; table->hem = kcalloc(num_hem, sizeof(*table->hem), @@ -920,7 +920,7 @@ int hns_roce_init_hem_table(struct hns_roce_dev *hr_dev, if (!table->hem) goto err_kcalloc_hem_buf; - if (check_whether_bt_num_3(table->type, hop_num)) { + if (check_whether_bt_num_3(type, hop_num)) { unsigned long num_bt_l1; num_bt_l1 = (num_hem + bt_chunk_num - 1) / @@ -939,8 +939,8 @@ int hns_roce_init_hem_table(struct hns_roce_dev *hr_dev, goto err_kcalloc_l1_dma; } - if (check_whether_bt_num_2(table->type, hop_num) || - check_whether_bt_num_3(table->type, hop_num)) { + if (check_whether_bt_num_2(type, hop_num) || + check_whether_bt_num_3(type, hop_num)) { table->bt_l0 = kcalloc(num_bt_l0, sizeof(*table->bt_l0), GFP_KERNEL); if (!table->bt_l0) @@ -1039,14 +1039,14 @@ void hns_roce_cleanup_hem_table(struct hns_roce_dev *hr_dev, void hns_roce_cleanup_hem(struct hns_roce_dev *hr_dev) { hns_roce_cleanup_hem_table(hr_dev, &hr_dev->cq_table.table); - hns_roce_cleanup_hem_table(hr_dev, &hr_dev->qp_table.irrl_table); if (hr_dev->caps.trrl_entry_sz) hns_roce_cleanup_hem_table(hr_dev, &hr_dev->qp_table.trrl_table); + hns_roce_cleanup_hem_table(hr_dev, &hr_dev->qp_table.irrl_table); hns_roce_cleanup_hem_table(hr_dev, &hr_dev->qp_table.qp_table); hns_roce_cleanup_hem_table(hr_dev, &hr_dev->mr_table.mtpt_table); - hns_roce_cleanup_hem_table(hr_dev, &hr_dev->mr_table.mtt_table); if (hns_roce_check_whether_mhop(hr_dev, HEM_TYPE_CQE)) hns_roce_cleanup_hem_table(hr_dev, &hr_dev->mr_table.mtt_cqe_table); + hns_roce_cleanup_hem_table(hr_dev, &hr_dev->mr_table.mtt_table); } diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c index 47e1b6ac1e1a..8013d69c5ac4 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c @@ -722,6 +722,7 @@ static int hns_roce_v1_rsv_lp_qp(struct hns_roce_dev *hr_dev) free_mr->mr_free_pd = to_hr_pd(pd); free_mr->mr_free_pd->ibpd.device = &hr_dev->ib_dev; free_mr->mr_free_pd->ibpd.uobject = NULL; + free_mr->mr_free_pd->ibpd.__internal_mr = NULL; atomic_set(&free_mr->mr_free_pd->ibpd.usecnt, 0); attr.qp_access_flags = IB_ACCESS_REMOTE_WRITE; @@ -1036,7 +1037,7 @@ static void hns_roce_v1_mr_free_work_fn(struct work_struct *work) do { ret = hns_roce_v1_poll_cq(&mr_free_cq->ib_cq, ne, wc); - if (ret < 0) { + if (ret < 0 && hr_qp) { dev_err(dev, "(qp:0x%lx) starts, Poll cqe failed(%d) for mr 0x%x free! Remain %d cqe\n", hr_qp->qpn, ret, hr_mr->key, ne); diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 8b84ab7800d8..dd716ed60661 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -71,6 +71,11 @@ static int set_rwqe_data_seg(struct ib_qp *ibqp, struct ib_send_wr *wr, return -EINVAL; } + if (wr->opcode == IB_WR_RDMA_READ) { + dev_err(hr_dev->dev, "Not support inline data!\n"); + return -EINVAL; + } + for (i = 0; i < wr->num_sge; i++) { memcpy(wqe, ((void *)wr->sg_list[i].addr), wr->sg_list[i].length); @@ -137,8 +142,8 @@ static int hns_roce_v2_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, unsigned long flags; unsigned int ind; void *wqe = NULL; - u32 tmp_len = 0; bool loopback; + u32 tmp_len; int ret = 0; u8 *smac; int nreq; @@ -148,7 +153,7 @@ static int hns_roce_v2_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, ibqp->qp_type != IB_QPT_GSI && ibqp->qp_type != IB_QPT_UD)) { dev_err(dev, "Not supported QP(0x%x)type!\n", ibqp->qp_type); - *bad_wr = NULL; + *bad_wr = wr; return -EOPNOTSUPP; } @@ -182,7 +187,9 @@ static int hns_roce_v2_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, qp->sq.wrid[(qp->sq.head + nreq) & (qp->sq.wqe_cnt - 1)] = wr->wr_id; - owner_bit = ~(qp->sq.head >> ilog2(qp->sq.wqe_cnt)) & 0x1; + owner_bit = + ~(((qp->sq.head + nreq) >> ilog2(qp->sq.wqe_cnt)) & 0x1); + tmp_len = 0; /* Corresponding to the QP type, wqe process separately */ if (ibqp->qp_type == IB_QPT_GSI) { @@ -456,6 +463,7 @@ static int hns_roce_v2_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, } else { dev_err(dev, "Illegal qp_type(0x%x)\n", ibqp->qp_type); spin_unlock_irqrestore(&qp->sq.lock, flags); + *bad_wr = wr; return -EOPNOTSUPP; } } @@ -473,8 +481,8 @@ out: V2_DB_BYTE_4_TAG_S, qp->doorbell_qpn); roce_set_field(sq_db.byte_4, V2_DB_BYTE_4_CMD_M, V2_DB_BYTE_4_CMD_S, HNS_ROCE_V2_SQ_DB); - roce_set_field(sq_db.parameter, V2_DB_PARAMETER_CONS_IDX_M, - V2_DB_PARAMETER_CONS_IDX_S, + roce_set_field(sq_db.parameter, V2_DB_PARAMETER_IDX_M, + V2_DB_PARAMETER_IDX_S, qp->sq.head & ((qp->sq.wqe_cnt << 1) - 1)); roce_set_field(sq_db.parameter, V2_DB_PARAMETER_SL_M, V2_DB_PARAMETER_SL_S, qp->sl); @@ -540,16 +548,20 @@ static int hns_roce_v2_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr, } if (i < hr_qp->rq.max_gs) { - dseg[i].lkey = cpu_to_le32(HNS_ROCE_INVALID_LKEY); - dseg[i].addr = 0; + dseg->lkey = cpu_to_le32(HNS_ROCE_INVALID_LKEY); + dseg->addr = 0; } /* rq support inline data */ - sge_list = hr_qp->rq_inl_buf.wqe_list[ind].sg_list; - hr_qp->rq_inl_buf.wqe_list[ind].sge_cnt = (u32)wr->num_sge; - for (i = 0; i < wr->num_sge; i++) { - sge_list[i].addr = (void *)(u64)wr->sg_list[i].addr; - sge_list[i].len = wr->sg_list[i].length; + if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RQ_INLINE) { + sge_list = hr_qp->rq_inl_buf.wqe_list[ind].sg_list; + hr_qp->rq_inl_buf.wqe_list[ind].sge_cnt = + (u32)wr->num_sge; + for (i = 0; i < wr->num_sge; i++) { + sge_list[i].addr = + (void *)(u64)wr->sg_list[i].addr; + sge_list[i].len = wr->sg_list[i].length; + } } hr_qp->rq.wrid[ind] = wr->wr_id; @@ -606,6 +618,8 @@ static void hns_roce_free_cmq_desc(struct hns_roce_dev *hr_dev, dma_unmap_single(hr_dev->dev, ring->desc_dma_addr, ring->desc_num * sizeof(struct hns_roce_cmq_desc), DMA_BIDIRECTIONAL); + + ring->desc_dma_addr = 0; kfree(ring->desc); } @@ -1017,40 +1031,40 @@ static int hns_roce_v2_set_bt(struct hns_roce_dev *hr_dev) roce_set_field(req->vf_qpc_cfg, CFG_BT_ATTR_DATA_0_VF_QPC_BA_PGSZ_M, CFG_BT_ATTR_DATA_0_VF_QPC_BA_PGSZ_S, - hr_dev->caps.qpc_ba_pg_sz); + hr_dev->caps.qpc_ba_pg_sz + PG_SHIFT_OFFSET); roce_set_field(req->vf_qpc_cfg, CFG_BT_ATTR_DATA_0_VF_QPC_BUF_PGSZ_M, CFG_BT_ATTR_DATA_0_VF_QPC_BUF_PGSZ_S, - hr_dev->caps.qpc_buf_pg_sz); + hr_dev->caps.qpc_buf_pg_sz + PG_SHIFT_OFFSET); roce_set_field(req->vf_qpc_cfg, CFG_BT_ATTR_DATA_0_VF_QPC_HOPNUM_M, CFG_BT_ATTR_DATA_0_VF_QPC_HOPNUM_S, qpc_hop_num == HNS_ROCE_HOP_NUM_0 ? 0 : qpc_hop_num); roce_set_field(req->vf_srqc_cfg, CFG_BT_ATTR_DATA_1_VF_SRQC_BA_PGSZ_M, CFG_BT_ATTR_DATA_1_VF_SRQC_BA_PGSZ_S, - hr_dev->caps.srqc_ba_pg_sz); + hr_dev->caps.srqc_ba_pg_sz + PG_SHIFT_OFFSET); roce_set_field(req->vf_srqc_cfg, CFG_BT_ATTR_DATA_1_VF_SRQC_BUF_PGSZ_M, CFG_BT_ATTR_DATA_1_VF_SRQC_BUF_PGSZ_S, - hr_dev->caps.srqc_buf_pg_sz); + hr_dev->caps.srqc_buf_pg_sz + PG_SHIFT_OFFSET); roce_set_field(req->vf_srqc_cfg, CFG_BT_ATTR_DATA_1_VF_SRQC_HOPNUM_M, CFG_BT_ATTR_DATA_1_VF_SRQC_HOPNUM_S, srqc_hop_num == HNS_ROCE_HOP_NUM_0 ? 0 : srqc_hop_num); roce_set_field(req->vf_cqc_cfg, CFG_BT_ATTR_DATA_2_VF_CQC_BA_PGSZ_M, CFG_BT_ATTR_DATA_2_VF_CQC_BA_PGSZ_S, - hr_dev->caps.cqc_ba_pg_sz); + hr_dev->caps.cqc_ba_pg_sz + PG_SHIFT_OFFSET); roce_set_field(req->vf_cqc_cfg, CFG_BT_ATTR_DATA_2_VF_CQC_BUF_PGSZ_M, CFG_BT_ATTR_DATA_2_VF_CQC_BUF_PGSZ_S, - hr_dev->caps.cqc_buf_pg_sz); + hr_dev->caps.cqc_buf_pg_sz + PG_SHIFT_OFFSET); roce_set_field(req->vf_cqc_cfg, CFG_BT_ATTR_DATA_2_VF_CQC_HOPNUM_M, CFG_BT_ATTR_DATA_2_VF_CQC_HOPNUM_S, cqc_hop_num == HNS_ROCE_HOP_NUM_0 ? 0 : cqc_hop_num); roce_set_field(req->vf_mpt_cfg, CFG_BT_ATTR_DATA_3_VF_MPT_BA_PGSZ_M, CFG_BT_ATTR_DATA_3_VF_MPT_BA_PGSZ_S, - hr_dev->caps.mpt_ba_pg_sz); + hr_dev->caps.mpt_ba_pg_sz + PG_SHIFT_OFFSET); roce_set_field(req->vf_mpt_cfg, CFG_BT_ATTR_DATA_3_VF_MPT_BUF_PGSZ_M, CFG_BT_ATTR_DATA_3_VF_MPT_BUF_PGSZ_S, - hr_dev->caps.mpt_buf_pg_sz); + hr_dev->caps.mpt_buf_pg_sz + PG_SHIFT_OFFSET); roce_set_field(req->vf_mpt_cfg, CFG_BT_ATTR_DATA_3_VF_MPT_HOPNUM_M, CFG_BT_ATTR_DATA_3_VF_MPT_HOPNUM_S, mpt_hop_num == HNS_ROCE_HOP_NUM_0 ? 0 : mpt_hop_num); @@ -1074,6 +1088,7 @@ static int hns_roce_v2_profile(struct hns_roce_dev *hr_dev) if (ret) { dev_err(hr_dev->dev, "Configure global param fail, ret = %d.\n", ret); + return ret; } /* Get pf resource owned by every pf */ @@ -1344,7 +1359,8 @@ static int hns_roce_v2_write_mtpt(void *mb_buf, struct hns_roce_mr *mr, HNS_ROCE_HOP_NUM_0 ? 0 : mr->pbl_hop_num); roce_set_field(mpt_entry->byte_4_pd_hop_st, V2_MPT_BYTE_4_PBL_BA_PG_SZ_M, - V2_MPT_BYTE_4_PBL_BA_PG_SZ_S, mr->pbl_ba_pg_sz); + V2_MPT_BYTE_4_PBL_BA_PG_SZ_S, + mr->pbl_ba_pg_sz + PG_SHIFT_OFFSET); roce_set_field(mpt_entry->byte_4_pd_hop_st, V2_MPT_BYTE_4_PD_M, V2_MPT_BYTE_4_PD_S, mr->pd); mpt_entry->byte_4_pd_hop_st = cpu_to_le32(mpt_entry->byte_4_pd_hop_st); @@ -1365,6 +1381,8 @@ static int hns_roce_v2_write_mtpt(void *mb_buf, struct hns_roce_mr *mr, roce_set_bit(mpt_entry->byte_12_mw_pa, V2_MPT_BYTE_12_PA_S, mr->type == MR_TYPE_MR ? 0 : 1); + roce_set_bit(mpt_entry->byte_12_mw_pa, V2_MPT_BYTE_12_INNER_PA_VLD_S, + 1); mpt_entry->byte_12_mw_pa = cpu_to_le32(mpt_entry->byte_12_mw_pa); mpt_entry->len_l = cpu_to_le32(lower_32_bits(mr->size)); @@ -1418,7 +1436,8 @@ found: roce_set_field(mpt_entry->byte_64_buf_pa1, V2_MPT_BYTE_64_PBL_BUF_PG_SZ_M, - V2_MPT_BYTE_64_PBL_BUF_PG_SZ_S, mr->pbl_buf_pg_sz); + V2_MPT_BYTE_64_PBL_BUF_PG_SZ_S, + mr->pbl_buf_pg_sz + PG_SHIFT_OFFSET); mpt_entry->byte_64_buf_pa1 = cpu_to_le32(mpt_entry->byte_64_buf_pa1); return 0; @@ -1599,11 +1618,11 @@ static void hns_roce_v2_write_cqc(struct hns_roce_dev *hr_dev, roce_set_field(cq_context->byte_24_pgsz_addr, V2_CQC_BYTE_24_CQE_BA_PG_SZ_M, V2_CQC_BYTE_24_CQE_BA_PG_SZ_S, - hr_dev->caps.cqe_ba_pg_sz); + hr_dev->caps.cqe_ba_pg_sz + PG_SHIFT_OFFSET); roce_set_field(cq_context->byte_24_pgsz_addr, V2_CQC_BYTE_24_CQE_BUF_PG_SZ_M, V2_CQC_BYTE_24_CQE_BUF_PG_SZ_S, - hr_dev->caps.cqe_buf_pg_sz); + hr_dev->caps.cqe_buf_pg_sz + PG_SHIFT_OFFSET); cq_context->cqe_ba = (u32)(dma_handle >> 3); @@ -2162,6 +2181,7 @@ static void modify_qp_reset_to_init(struct ib_qp *ibqp, struct hns_roce_v2_qp_context *context, struct hns_roce_v2_qp_context *qpc_mask) { + struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device); struct hns_roce_qp *hr_qp = to_hr_qp(ibqp); /* @@ -2274,7 +2294,8 @@ static void modify_qp_reset_to_init(struct ib_qp *ibqp, context->rq_db_record_addr = hr_qp->rdb.dma >> 32; qpc_mask->rq_db_record_addr = 0; - roce_set_bit(context->byte_76_srqn_op_en, V2_QPC_BYTE_76_RQIE_S, 1); + roce_set_bit(context->byte_76_srqn_op_en, V2_QPC_BYTE_76_RQIE_S, + (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RQ_INLINE) ? 1 : 0); roce_set_bit(qpc_mask->byte_76_srqn_op_en, V2_QPC_BYTE_76_RQIE_S, 0); roce_set_field(context->byte_80_rnr_rx_cqn, V2_QPC_BYTE_80_RX_CQN_M, @@ -2592,10 +2613,12 @@ static void modify_qp_init_to_init(struct ib_qp *ibqp, roce_set_field(qpc_mask->byte_4_sqpn_tst, V2_QPC_BYTE_4_SQPN_M, V2_QPC_BYTE_4_SQPN_S, 0); - roce_set_field(context->byte_56_dqpn_err, V2_QPC_BYTE_56_DQPN_M, - V2_QPC_BYTE_56_DQPN_S, hr_qp->qpn); - roce_set_field(qpc_mask->byte_56_dqpn_err, V2_QPC_BYTE_56_DQPN_M, - V2_QPC_BYTE_56_DQPN_S, 0); + if (attr_mask & IB_QP_DEST_QPN) { + roce_set_field(context->byte_56_dqpn_err, V2_QPC_BYTE_56_DQPN_M, + V2_QPC_BYTE_56_DQPN_S, hr_qp->qpn); + roce_set_field(qpc_mask->byte_56_dqpn_err, + V2_QPC_BYTE_56_DQPN_M, V2_QPC_BYTE_56_DQPN_S, 0); + } roce_set_field(context->byte_168_irrl_idx, V2_QPC_BYTE_168_SQ_SHIFT_BAK_M, V2_QPC_BYTE_168_SQ_SHIFT_BAK_S, @@ -2650,8 +2673,7 @@ static int modify_qp_init_to_rtr(struct ib_qp *ibqp, return -EINVAL; } - if ((attr_mask & IB_QP_ALT_PATH) || (attr_mask & IB_QP_ACCESS_FLAGS) || - (attr_mask & IB_QP_PKEY_INDEX) || (attr_mask & IB_QP_QKEY)) { + if (attr_mask & IB_QP_ALT_PATH) { dev_err(dev, "INIT2RTR attr_mask (0x%x) error\n", attr_mask); return -EINVAL; } @@ -2699,7 +2721,7 @@ static int modify_qp_init_to_rtr(struct ib_qp *ibqp, roce_set_field(context->byte_16_buf_ba_pg_sz, V2_QPC_BYTE_16_WQE_SGE_BA_PG_SZ_M, V2_QPC_BYTE_16_WQE_SGE_BA_PG_SZ_S, - hr_dev->caps.mtt_ba_pg_sz); + hr_dev->caps.mtt_ba_pg_sz + PG_SHIFT_OFFSET); roce_set_field(qpc_mask->byte_16_buf_ba_pg_sz, V2_QPC_BYTE_16_WQE_SGE_BA_PG_SZ_M, V2_QPC_BYTE_16_WQE_SGE_BA_PG_SZ_S, 0); @@ -2707,7 +2729,7 @@ static int modify_qp_init_to_rtr(struct ib_qp *ibqp, roce_set_field(context->byte_16_buf_ba_pg_sz, V2_QPC_BYTE_16_WQE_SGE_BUF_PG_SZ_M, V2_QPC_BYTE_16_WQE_SGE_BUF_PG_SZ_S, - hr_dev->caps.mtt_buf_pg_sz); + hr_dev->caps.mtt_buf_pg_sz + PG_SHIFT_OFFSET); roce_set_field(qpc_mask->byte_16_buf_ba_pg_sz, V2_QPC_BYTE_16_WQE_SGE_BUF_PG_SZ_M, V2_QPC_BYTE_16_WQE_SGE_BUF_PG_SZ_S, 0); @@ -2800,10 +2822,12 @@ static int modify_qp_init_to_rtr(struct ib_qp *ibqp, V2_QPC_BYTE_140_RR_MAX_S, 0); } - roce_set_field(context->byte_56_dqpn_err, V2_QPC_BYTE_56_DQPN_M, - V2_QPC_BYTE_56_DQPN_S, attr->dest_qp_num); - roce_set_field(qpc_mask->byte_56_dqpn_err, V2_QPC_BYTE_56_DQPN_M, - V2_QPC_BYTE_56_DQPN_S, 0); + if (attr_mask & IB_QP_DEST_QPN) { + roce_set_field(context->byte_56_dqpn_err, V2_QPC_BYTE_56_DQPN_M, + V2_QPC_BYTE_56_DQPN_S, attr->dest_qp_num); + roce_set_field(qpc_mask->byte_56_dqpn_err, + V2_QPC_BYTE_56_DQPN_M, V2_QPC_BYTE_56_DQPN_S, 0); + } /* Configure GID index */ port_num = rdma_ah_get_port_num(&attr->ah_attr); @@ -2845,7 +2869,7 @@ static int modify_qp_init_to_rtr(struct ib_qp *ibqp, if (ibqp->qp_type == IB_QPT_GSI || ibqp->qp_type == IB_QPT_UD) roce_set_field(context->byte_24_mtu_tc, V2_QPC_BYTE_24_MTU_M, V2_QPC_BYTE_24_MTU_S, IB_MTU_4096); - else + else if (attr_mask & IB_QP_PATH_MTU) roce_set_field(context->byte_24_mtu_tc, V2_QPC_BYTE_24_MTU_M, V2_QPC_BYTE_24_MTU_S, attr->path_mtu); @@ -2922,11 +2946,9 @@ static int modify_qp_rtr_to_rts(struct ib_qp *ibqp, return -EINVAL; } - /* If exist optional param, return error */ - if ((attr_mask & IB_QP_ALT_PATH) || (attr_mask & IB_QP_ACCESS_FLAGS) || - (attr_mask & IB_QP_QKEY) || (attr_mask & IB_QP_PATH_MIG_STATE) || - (attr_mask & IB_QP_CUR_STATE) || - (attr_mask & IB_QP_MIN_RNR_TIMER)) { + /* Not support alternate path and path migration */ + if ((attr_mask & IB_QP_ALT_PATH) || + (attr_mask & IB_QP_PATH_MIG_STATE)) { dev_err(dev, "RTR2RTS attr_mask (0x%x)error\n", attr_mask); return -EINVAL; } @@ -3161,7 +3183,8 @@ static int hns_roce_v2_modify_qp(struct ib_qp *ibqp, (cur_state == IB_QPS_RTR && new_state == IB_QPS_ERR) || (cur_state == IB_QPS_RTS && new_state == IB_QPS_ERR) || (cur_state == IB_QPS_SQD && new_state == IB_QPS_ERR) || - (cur_state == IB_QPS_SQE && new_state == IB_QPS_ERR)) { + (cur_state == IB_QPS_SQE && new_state == IB_QPS_ERR) || + (cur_state == IB_QPS_ERR && new_state == IB_QPS_ERR)) { /* Nothing */ ; } else { @@ -4140,12 +4163,14 @@ static void hns_roce_config_eqc(struct hns_roce_dev *hr_dev, /* set eqe_ba_pg_sz */ roce_set_field(eqc->byte_8, HNS_ROCE_EQC_BA_PG_SZ_M, - HNS_ROCE_EQC_BA_PG_SZ_S, eq->eqe_ba_pg_sz); + HNS_ROCE_EQC_BA_PG_SZ_S, + eq->eqe_ba_pg_sz + PG_SHIFT_OFFSET); /* set eqe_buf_pg_sz */ roce_set_field(eqc->byte_8, HNS_ROCE_EQC_BUF_PG_SZ_M, - HNS_ROCE_EQC_BUF_PG_SZ_S, eq->eqe_buf_pg_sz); + HNS_ROCE_EQC_BUF_PG_SZ_S, + eq->eqe_buf_pg_sz + PG_SHIFT_OFFSET); /* set eq_producer_idx */ roce_set_field(eqc->byte_8, @@ -4478,7 +4503,7 @@ static int hns_roce_v2_create_eq(struct hns_roce_dev *hr_dev, ret = hns_roce_cmd_mbox(hr_dev, mailbox->dma, 0, eq->eqn, 0, eq_cmd, HNS_ROCE_CMD_TIMEOUT_MSECS); if (ret) { - dev_err(dev, "[mailbox cmd] creat eqc failed.\n"); + dev_err(dev, "[mailbox cmd] create eqc failed.\n"); goto err_cmd_mbox; } @@ -4694,6 +4719,8 @@ static const struct pci_device_id hns_roce_hw_v2_pci_tbl[] = { {0, } }; +MODULE_DEVICE_TABLE(pci, hns_roce_hw_v2_pci_tbl); + static int hns_roce_hw_v2_get_cfg(struct hns_roce_dev *hr_dev, struct hnae3_handle *handle) { diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h index 182b6726f783..2caeb4cdad5c 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h @@ -76,7 +76,7 @@ #define HNS_ROCE_V2_PAGE_SIZE_SUPPORTED 0xFFFFF000 #define HNS_ROCE_V2_MAX_INNER_MTPT_NUM 2 #define HNS_ROCE_INVALID_LKEY 0x100 -#define HNS_ROCE_CMQ_TX_TIMEOUT 200 +#define HNS_ROCE_CMQ_TX_TIMEOUT 30000 #define HNS_ROCE_CONTEXT_HOP_NUM 1 #define HNS_ROCE_MTT_HOP_NUM 1 @@ -897,8 +897,8 @@ struct hns_roce_v2_mpt_entry { #define V2_DB_BYTE_4_CMD_S 24 #define V2_DB_BYTE_4_CMD_M GENMASK(27, 24) -#define V2_DB_PARAMETER_CONS_IDX_S 0 -#define V2_DB_PARAMETER_CONS_IDX_M GENMASK(15, 0) +#define V2_DB_PARAMETER_IDX_S 0 +#define V2_DB_PARAMETER_IDX_M GENMASK(15, 0) #define V2_DB_PARAMETER_SL_S 16 #define V2_DB_PARAMETER_SL_M GENMASK(18, 16) diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c index 9d48bc07a9e6..c614f9182b1a 100644 --- a/drivers/infiniband/hw/hns/hns_roce_main.c +++ b/drivers/infiniband/hw/hns/hns_roce_main.c @@ -99,7 +99,6 @@ static int hns_roce_del_gid(const struct ib_gid_attr *attr, void **context) { struct hns_roce_dev *hr_dev = to_hr_dev(attr->device); struct ib_gid_attr zattr = { }; - union ib_gid zgid = { {0} }; u8 port = attr->port_num - 1; unsigned long flags; int ret; @@ -199,7 +198,7 @@ static int hns_roce_query_device(struct ib_device *ib_dev, memset(props, 0, sizeof(*props)); - props->sys_image_guid = cpu_to_be32(hr_dev->sys_image_guid); + props->sys_image_guid = cpu_to_be64(hr_dev->sys_image_guid); props->max_mr_size = (u64)(~(0ULL)); props->page_size_cap = hr_dev->caps.page_size_cap; props->vendor_id = hr_dev->vendor_id; diff --git a/drivers/infiniband/hw/hns/hns_roce_mr.c b/drivers/infiniband/hw/hns/hns_roce_mr.c index f7256d88d38f..d1fe0e7957e3 100644 --- a/drivers/infiniband/hw/hns/hns_roce_mr.c +++ b/drivers/infiniband/hw/hns/hns_roce_mr.c @@ -1007,12 +1007,6 @@ struct ib_mr *hns_roce_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, } n = ib_umem_page_count(mr->umem); - if (mr->umem->page_shift != HNS_ROCE_HEM_PAGE_SHIFT) { - dev_err(dev, "Just support 4K page size but is 0x%lx now!\n", - BIT(mr->umem->page_shift)); - ret = -EINVAL; - goto err_umem; - } if (!hr_dev->caps.pbl_hop_num) { if (n > HNS_ROCE_MAX_MTPT_PBL_NUM) { diff --git a/drivers/infiniband/hw/hns/hns_roce_pd.c b/drivers/infiniband/hw/hns/hns_roce_pd.c index 4b41e041799c..b9f2c871ff9a 100644 --- a/drivers/infiniband/hw/hns/hns_roce_pd.c +++ b/drivers/infiniband/hw/hns/hns_roce_pd.c @@ -107,13 +107,15 @@ int hns_roce_uar_alloc(struct hns_roce_dev *hr_dev, struct hns_roce_uar *uar) int ret = 0; /* Using bitmap to manager UAR index */ - ret = hns_roce_bitmap_alloc(&hr_dev->uar_table.bitmap, &uar->index); + ret = hns_roce_bitmap_alloc(&hr_dev->uar_table.bitmap, &uar->logic_idx); if (ret == -1) return -ENOMEM; - if (uar->index > 0) - uar->index = (uar->index - 1) % + if (uar->logic_idx > 0 && hr_dev->caps.phy_num_uars > 1) + uar->index = (uar->logic_idx - 1) % (hr_dev->caps.phy_num_uars - 1) + 1; + else + uar->index = 0; if (!dev_is_pci(hr_dev->dev)) { res = platform_get_resource(hr_dev->pdev, IORESOURCE_MEM, 0); @@ -132,7 +134,7 @@ int hns_roce_uar_alloc(struct hns_roce_dev *hr_dev, struct hns_roce_uar *uar) void hns_roce_uar_free(struct hns_roce_dev *hr_dev, struct hns_roce_uar *uar) { - hns_roce_bitmap_free(&hr_dev->uar_table.bitmap, uar->index, + hns_roce_bitmap_free(&hr_dev->uar_table.bitmap, uar->logic_idx, BITMAP_NO_RR); } diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c index e289a924e789..baaf906f7c2e 100644 --- a/drivers/infiniband/hw/hns/hns_roce_qp.c +++ b/drivers/infiniband/hw/hns/hns_roce_qp.c @@ -620,7 +620,7 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev, to_hr_ucontext(ib_pd->uobject->context), ucmd.db_addr, &hr_qp->rdb); if (ret) { - dev_err(dev, "rp record doorbell map failed!\n"); + dev_err(dev, "rq record doorbell map failed!\n"); goto err_mtt; } } @@ -660,6 +660,7 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev, goto err_rq_sge_list; } *hr_qp->rdb.db_record = 0; + hr_qp->rdb_en = 1; } /* Allocate QP buf */ @@ -955,7 +956,14 @@ int hns_roce_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, } if (cur_state == new_state && cur_state == IB_QPS_RESET) { - ret = 0; + if (hr_dev->caps.min_wqes) { + ret = -EPERM; + dev_err(dev, "cur_state=%d new_state=%d\n", cur_state, + new_state); + } else { + ret = 0; + } + goto out; } diff --git a/drivers/infiniband/hw/i40iw/i40iw.h b/drivers/infiniband/hw/i40iw/i40iw.h index d5d8c1be345a..2f2b4426ded7 100644 --- a/drivers/infiniband/hw/i40iw/i40iw.h +++ b/drivers/infiniband/hw/i40iw/i40iw.h @@ -207,6 +207,7 @@ struct i40iw_msix_vector { u32 irq; u32 cpu_affinity; u32 ceq_id; + cpumask_t mask; }; struct l2params_work { diff --git a/drivers/infiniband/hw/i40iw/i40iw_cm.c b/drivers/infiniband/hw/i40iw/i40iw_cm.c index 4cfa8f4647e2..7b2655128b9f 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_cm.c +++ b/drivers/infiniband/hw/i40iw/i40iw_cm.c @@ -1519,18 +1519,13 @@ static void i40iw_add_hte_node(struct i40iw_cm_core *cm_core, /** * i40iw_find_port - find port that matches reference port - * @port: port number + * @hte: ptr to accelerated or non-accelerated list * @accelerated_list: flag for accelerated vs non-accelerated list */ -static bool i40iw_find_port(struct i40iw_cm_core *cm_core, u16 port, - bool accelerated_list) +static bool i40iw_find_port(struct list_head *hte, u16 port) { - struct list_head *hte; struct i40iw_cm_node *cm_node; - hte = accelerated_list ? - &cm_core->accelerated_list : &cm_core->non_accelerated_list; - list_for_each_entry(cm_node, hte, list) { if (cm_node->loc_port == port) return true; @@ -1540,35 +1535,32 @@ static bool i40iw_find_port(struct i40iw_cm_core *cm_core, u16 port, /** * i40iw_port_in_use - determine if port is in use + * @cm_core: cm's core * @port: port number - * @active_side: flag for listener side vs active side */ -static bool i40iw_port_in_use(struct i40iw_cm_core *cm_core, u16 port, bool active_side) +bool i40iw_port_in_use(struct i40iw_cm_core *cm_core, u16 port) { struct i40iw_cm_listener *listen_node; unsigned long flags; - bool ret = false; - if (active_side) { - spin_lock_irqsave(&cm_core->ht_lock, flags); - ret = i40iw_find_port(cm_core, port, true); - if (!ret) - ret = i40iw_find_port(cm_core, port, false); - if (!ret) - clear_bit(port, cm_core->active_side_ports); + spin_lock_irqsave(&cm_core->ht_lock, flags); + if (i40iw_find_port(&cm_core->accelerated_list, port) || + i40iw_find_port(&cm_core->non_accelerated_list, port)) { spin_unlock_irqrestore(&cm_core->ht_lock, flags); - } else { - spin_lock_irqsave(&cm_core->listen_list_lock, flags); - list_for_each_entry(listen_node, &cm_core->listen_nodes, list) { - if (listen_node->loc_port == port) { - ret = true; - break; - } + return true; + } + spin_unlock_irqrestore(&cm_core->ht_lock, flags); + + spin_lock_irqsave(&cm_core->listen_list_lock, flags); + list_for_each_entry(listen_node, &cm_core->listen_nodes, list) { + if (listen_node->loc_port == port) { + spin_unlock_irqrestore(&cm_core->listen_list_lock, flags); + return true; } - spin_unlock_irqrestore(&cm_core->listen_list_lock, flags); } + spin_unlock_irqrestore(&cm_core->listen_list_lock, flags); - return ret; + return false; } /** @@ -1788,7 +1780,7 @@ static enum i40iw_status_code i40iw_add_mqh_4( &ifa->ifa_address, rdma_vlan_dev_vlan_id(dev), dev->dev_addr); - child_listen_node = kzalloc(sizeof(*child_listen_node), GFP_ATOMIC); + child_listen_node = kzalloc(sizeof(*child_listen_node), GFP_KERNEL); cm_parent_listen_node->cm_core->stats_listen_nodes_created++; i40iw_debug(&iwdev->sc_dev, I40IW_DEBUG_CM, @@ -1917,7 +1909,7 @@ static int i40iw_dec_refcnt_listen(struct i40iw_cm_core *cm_core, spin_unlock_irqrestore(&cm_core->listen_list_lock, flags); if (listener->iwdev) { - if (apbvt_del && !i40iw_port_in_use(cm_core, listener->loc_port, false)) + if (apbvt_del) i40iw_manage_apbvt(listener->iwdev, listener->loc_port, I40IW_MANAGE_APBVT_DEL); @@ -2093,7 +2085,7 @@ static int i40iw_addr_resolve_neigh_ipv6(struct i40iw_device *iwdev, if (netif_is_bond_slave(netdev)) netdev = netdev_master_upper_dev_get(netdev); - neigh = dst_neigh_lookup(dst, &dst_addr); + neigh = dst_neigh_lookup(dst, dst_addr.sin6_addr.in6_u.u6_addr32); rcu_read_lock(); if (neigh) { @@ -2298,7 +2290,7 @@ static void i40iw_rem_ref_cm_node(struct i40iw_cm_node *cm_node) if (cm_node->listener) { i40iw_dec_refcnt_listen(cm_core, cm_node->listener, 0, true); } else { - if (!i40iw_port_in_use(cm_core, cm_node->loc_port, true) && cm_node->apbvt_set) { + if (cm_node->apbvt_set) { i40iw_manage_apbvt(cm_node->iwdev, cm_node->loc_port, I40IW_MANAGE_APBVT_DEL); @@ -2872,7 +2864,7 @@ static struct i40iw_cm_listener *i40iw_make_listen_node( if (!listener) { /* create a CM listen node (1/2 node to compare incoming traffic to) */ - listener = kzalloc(sizeof(*listener), GFP_ATOMIC); + listener = kzalloc(sizeof(*listener), GFP_KERNEL); if (!listener) return NULL; cm_core->stats_listen_nodes_created++; @@ -3244,6 +3236,7 @@ void i40iw_setup_cm_core(struct i40iw_device *iwdev) spin_lock_init(&cm_core->ht_lock); spin_lock_init(&cm_core->listen_list_lock); + spin_lock_init(&cm_core->apbvt_lock); cm_core->event_wq = alloc_ordered_workqueue("iwewq", WQ_MEM_RECLAIM); @@ -3811,7 +3804,6 @@ int i40iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) struct sockaddr_in6 *laddr6; struct sockaddr_in6 *raddr6; int ret = 0; - unsigned long flags; ibqp = i40iw_get_qp(cm_id->device, conn_param->qpn); if (!ibqp) @@ -3882,15 +3874,10 @@ int i40iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) cm_node->qhash_set = true; } - spin_lock_irqsave(&iwdev->cm_core.ht_lock, flags); - if (!test_and_set_bit(cm_info.loc_port, iwdev->cm_core.active_side_ports)) { - spin_unlock_irqrestore(&iwdev->cm_core.ht_lock, flags); - if (i40iw_manage_apbvt(iwdev, cm_info.loc_port, I40IW_MANAGE_APBVT_ADD)) { - ret = -EINVAL; - goto err; - } - } else { - spin_unlock_irqrestore(&iwdev->cm_core.ht_lock, flags); + if (i40iw_manage_apbvt(iwdev, cm_info.loc_port, + I40IW_MANAGE_APBVT_ADD)) { + ret = -EINVAL; + goto err; } cm_node->apbvt_set = true; diff --git a/drivers/infiniband/hw/i40iw/i40iw_cm.h b/drivers/infiniband/hw/i40iw/i40iw_cm.h index 78ba36ae2bbe..66dc1ba03389 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_cm.h +++ b/drivers/infiniband/hw/i40iw/i40iw_cm.h @@ -413,8 +413,9 @@ struct i40iw_cm_core { spinlock_t ht_lock; /* manage hash table */ spinlock_t listen_list_lock; /* listen list */ + spinlock_t apbvt_lock; /*manage apbvt entries*/ - unsigned long active_side_ports[BITS_TO_LONGS(MAX_PORTS)]; + unsigned long ports_in_use[BITS_TO_LONGS(MAX_PORTS)]; u64 stats_nodes_created; u64 stats_nodes_destroyed; @@ -457,4 +458,5 @@ void i40iw_if_notify(struct i40iw_device *iwdev, struct net_device *netdev, void i40iw_cm_teardown_connections(struct i40iw_device *iwdev, u32 *ipaddr, struct i40iw_cm_info *nfo, bool disconnect_all); +bool i40iw_port_in_use(struct i40iw_cm_core *cm_core, u16 port); #endif /* I40IW_CM_H */ diff --git a/drivers/infiniband/hw/i40iw/i40iw_hw.c b/drivers/infiniband/hw/i40iw/i40iw_hw.c index 6139836fb533..2836c5420d60 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_hw.c +++ b/drivers/infiniband/hw/i40iw/i40iw_hw.c @@ -331,7 +331,7 @@ void i40iw_process_aeq(struct i40iw_device *iwdev) switch (info->ae_id) { case I40IW_AE_LLP_FIN_RECEIVED: if (qp->term_flags) - continue; + break; if (atomic_inc_return(&iwqp->close_timer_started) == 1) { iwqp->hw_tcp_state = I40IW_TCP_STATE_CLOSE_WAIT; if ((iwqp->hw_tcp_state == I40IW_TCP_STATE_CLOSE_WAIT) && @@ -360,7 +360,7 @@ void i40iw_process_aeq(struct i40iw_device *iwdev) break; case I40IW_AE_LLP_CONNECTION_RESET: if (atomic_read(&iwqp->close_timer_started)) - continue; + break; i40iw_cm_disconn(iwqp); break; case I40IW_AE_QP_SUSPEND_COMPLETE: @@ -443,13 +443,37 @@ void i40iw_process_aeq(struct i40iw_device *iwdev) int i40iw_manage_apbvt(struct i40iw_device *iwdev, u16 accel_local_port, bool add_port) { struct i40iw_apbvt_info *info; - enum i40iw_status_code status; struct i40iw_cqp_request *cqp_request; struct cqp_commands_info *cqp_info; + unsigned long flags; + struct i40iw_cm_core *cm_core = &iwdev->cm_core; + enum i40iw_status_code status = 0; + bool in_use; + + /* apbvt_lock is held across CQP delete APBVT OP (non-waiting) to + * protect against race where add APBVT CQP can race ahead of the delete + * APBVT for same port. + */ + spin_lock_irqsave(&cm_core->apbvt_lock, flags); + + if (!add_port) { + in_use = i40iw_port_in_use(cm_core, accel_local_port); + if (in_use) + goto exit; + clear_bit(accel_local_port, cm_core->ports_in_use); + } else { + in_use = test_and_set_bit(accel_local_port, + cm_core->ports_in_use); + spin_unlock_irqrestore(&cm_core->apbvt_lock, flags); + if (in_use) + return 0; + } cqp_request = i40iw_get_cqp_request(&iwdev->cqp, add_port); - if (!cqp_request) - return -ENOMEM; + if (!cqp_request) { + status = -ENOMEM; + goto exit; + } cqp_info = &cqp_request->info; info = &cqp_info->in.u.manage_apbvt_entry.info; @@ -465,6 +489,10 @@ int i40iw_manage_apbvt(struct i40iw_device *iwdev, u16 accel_local_port, bool ad status = i40iw_handle_cqp_op(iwdev, cqp_request); if (status) i40iw_pr_err("CQP-OP Manage APBVT entry fail"); +exit: + if (!add_port) + spin_unlock_irqrestore(&cm_core->apbvt_lock, flags); + return status; } diff --git a/drivers/infiniband/hw/i40iw/i40iw_main.c b/drivers/infiniband/hw/i40iw/i40iw_main.c index 9cd0d3ef9057..68095f00d08f 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_main.c +++ b/drivers/infiniband/hw/i40iw/i40iw_main.c @@ -687,7 +687,6 @@ static enum i40iw_status_code i40iw_configure_ceq_vector(struct i40iw_device *iw struct i40iw_msix_vector *msix_vec) { enum i40iw_status_code status; - cpumask_t mask; if (iwdev->msix_shared && !ceq_id) { tasklet_init(&iwdev->dpc_tasklet, i40iw_dpc, (unsigned long)iwdev); @@ -697,9 +696,9 @@ static enum i40iw_status_code i40iw_configure_ceq_vector(struct i40iw_device *iw status = request_irq(msix_vec->irq, i40iw_ceq_handler, 0, "CEQ", iwceq); } - cpumask_clear(&mask); - cpumask_set_cpu(msix_vec->cpu_affinity, &mask); - irq_set_affinity_hint(msix_vec->irq, &mask); + cpumask_clear(&msix_vec->mask); + cpumask_set_cpu(msix_vec->cpu_affinity, &msix_vec->mask); + irq_set_affinity_hint(msix_vec->irq, &msix_vec->mask); if (status) { i40iw_pr_err("ceq irq config fail\n"); @@ -1758,7 +1757,7 @@ static void i40iw_l2param_change(struct i40e_info *ldev, struct i40e_client *cli return; - work = kzalloc(sizeof(*work), GFP_ATOMIC); + work = kzalloc(sizeof(*work), GFP_KERNEL); if (!work) return; diff --git a/drivers/infiniband/hw/i40iw/i40iw_verbs.c b/drivers/infiniband/hw/i40iw/i40iw_verbs.c index 40e4f5ab2b46..68679ad4c6da 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_verbs.c +++ b/drivers/infiniband/hw/i40iw/i40iw_verbs.c @@ -394,6 +394,7 @@ static struct i40iw_pbl *i40iw_get_pbl(unsigned long va, list_for_each_entry(iwpbl, pbl_list, list) { if (iwpbl->user_base == va) { + iwpbl->on_list = false; list_del(&iwpbl->list); return iwpbl; } @@ -614,6 +615,7 @@ static struct ib_qp *i40iw_create_qp(struct ib_pd *ibpd, return ERR_PTR(-ENOMEM); iwqp = (struct i40iw_qp *)mem; + iwqp->allocated_buffer = mem; qp = &iwqp->sc_qp; qp->back_qp = (void *)iwqp; qp->push_idx = I40IW_INVALID_PUSH_PAGE_INDEX; @@ -642,7 +644,6 @@ static struct ib_qp *i40iw_create_qp(struct ib_pd *ibpd, goto error; } - iwqp->allocated_buffer = mem; iwqp->iwdev = iwdev; iwqp->iwpd = iwpd; iwqp->ibqp.qp_num = qp_num; @@ -1898,6 +1899,7 @@ static struct ib_mr *i40iw_reg_user_mr(struct ib_pd *pd, goto error; spin_lock_irqsave(&ucontext->qp_reg_mem_list_lock, flags); list_add_tail(&iwpbl->list, &ucontext->qp_reg_mem_list); + iwpbl->on_list = true; spin_unlock_irqrestore(&ucontext->qp_reg_mem_list_lock, flags); break; case IW_MEMREG_TYPE_CQ: @@ -1908,6 +1910,7 @@ static struct ib_mr *i40iw_reg_user_mr(struct ib_pd *pd, spin_lock_irqsave(&ucontext->cq_reg_mem_list_lock, flags); list_add_tail(&iwpbl->list, &ucontext->cq_reg_mem_list); + iwpbl->on_list = true; spin_unlock_irqrestore(&ucontext->cq_reg_mem_list_lock, flags); break; case IW_MEMREG_TYPE_MEM: @@ -2045,14 +2048,18 @@ static void i40iw_del_memlist(struct i40iw_mr *iwmr, switch (iwmr->type) { case IW_MEMREG_TYPE_CQ: spin_lock_irqsave(&ucontext->cq_reg_mem_list_lock, flags); - if (!list_empty(&ucontext->cq_reg_mem_list)) + if (iwpbl->on_list) { + iwpbl->on_list = false; list_del(&iwpbl->list); + } spin_unlock_irqrestore(&ucontext->cq_reg_mem_list_lock, flags); break; case IW_MEMREG_TYPE_QP: spin_lock_irqsave(&ucontext->qp_reg_mem_list_lock, flags); - if (!list_empty(&ucontext->qp_reg_mem_list)) + if (iwpbl->on_list) { + iwpbl->on_list = false; list_del(&iwpbl->list); + } spin_unlock_irqrestore(&ucontext->qp_reg_mem_list_lock, flags); break; default: diff --git a/drivers/infiniband/hw/i40iw/i40iw_verbs.h b/drivers/infiniband/hw/i40iw/i40iw_verbs.h index 9067443cd311..76cf173377ab 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_verbs.h +++ b/drivers/infiniband/hw/i40iw/i40iw_verbs.h @@ -78,6 +78,7 @@ struct i40iw_pbl { }; bool pbl_allocated; + bool on_list; u64 user_base; struct i40iw_pble_alloc pble_alloc; struct i40iw_mr *iwmr; diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index 5b70744f414a..bf12394c13c1 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -276,7 +276,7 @@ static int mlx4_ib_add_gid(const union ib_gid *gid, found = i; break; } - if (free < 0 && !memcmp(&port_gid_table->gids[i].gid, &zgid, sizeof(*gid))) + if (free < 0 && rdma_is_zero_gid(&port_gid_table->gids[i].gid)) free = i; /* HW has space */ } @@ -345,7 +345,8 @@ static int mlx4_ib_del_gid(const struct ib_gid_attr *attr, void **context) if (!ctx->refcount) { unsigned int real_index = ctx->real_index; - memcpy(&port_gid_table->gids[real_index].gid, &zgid, sizeof(zgid)); + memset(&port_gid_table->gids[real_index].gid, 0, + sizeof(port_gid_table->gids[real_index].gid)); kfree(port_gid_table->gids[real_index].ctx); port_gid_table->gids[real_index].ctx = NULL; hw_update = 1; diff --git a/drivers/infiniband/hw/mlx4/mr.c b/drivers/infiniband/hw/mlx4/mr.c index 17f4f151a97f..ed1f253faf97 100644 --- a/drivers/infiniband/hw/mlx4/mr.c +++ b/drivers/infiniband/hw/mlx4/mr.c @@ -346,7 +346,7 @@ int mlx4_ib_umem_calc_optimal_mtt_size(struct ib_umem *umem, u64 start_va, /* Add to the first block the misalignment that it suffers from. */ total_len += (first_block_start & ((1ULL << block_shift) - 1ULL)); last_block_end = current_block_start + current_block_len; - last_block_aligned_end = round_up(last_block_end, 1 << block_shift); + last_block_aligned_end = round_up(last_block_end, 1ULL << block_shift); total_len += (last_block_aligned_end - last_block_end); if (total_len & ((1ULL << block_shift) - 1ULL)) @@ -367,6 +367,40 @@ end: return block_shift; } +static struct ib_umem *mlx4_get_umem_mr(struct ib_ucontext *context, u64 start, + u64 length, u64 virt_addr, + int access_flags) +{ + /* + * Force registering the memory as writable if the underlying pages + * are writable. This is so rereg can change the access permissions + * from readable to writable without having to run through ib_umem_get + * again + */ + if (!ib_access_writable(access_flags)) { + struct vm_area_struct *vma; + + down_read(¤t->mm->mmap_sem); + /* + * FIXME: Ideally this would iterate over all the vmas that + * cover the memory, but for now it requires a single vma to + * entirely cover the MR to support RO mappings. + */ + vma = find_vma(current->mm, start); + if (vma && vma->vm_end >= start + length && + vma->vm_start <= start) { + if (vma->vm_flags & VM_WRITE) + access_flags |= IB_ACCESS_LOCAL_WRITE; + } else { + access_flags |= IB_ACCESS_LOCAL_WRITE; + } + + up_read(¤t->mm->mmap_sem); + } + + return ib_umem_get(context, start, length, access_flags, 0); +} + struct ib_mr *mlx4_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, u64 virt_addr, int access_flags, struct ib_udata *udata) @@ -381,10 +415,8 @@ struct ib_mr *mlx4_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, if (!mr) return ERR_PTR(-ENOMEM); - /* Force registering the memory as writable. */ - /* Used for memory re-registeration. HCA protects the access */ - mr->umem = ib_umem_get(pd->uobject->context, start, length, - access_flags | IB_ACCESS_LOCAL_WRITE, 0); + mr->umem = mlx4_get_umem_mr(pd->uobject->context, start, length, + virt_addr, access_flags); if (IS_ERR(mr->umem)) { err = PTR_ERR(mr->umem); goto err_free; @@ -454,6 +486,9 @@ int mlx4_ib_rereg_user_mr(struct ib_mr *mr, int flags, } if (flags & IB_MR_REREG_ACCESS) { + if (ib_access_writable(mr_access_flags) && !mmr->umem->writable) + return -EPERM; + err = mlx4_mr_hw_change_access(dev->dev, *pmpt_entry, convert_access(mr_access_flags)); @@ -467,10 +502,9 @@ int mlx4_ib_rereg_user_mr(struct ib_mr *mr, int flags, mlx4_mr_rereg_mem_cleanup(dev->dev, &mmr->mmr); ib_umem_release(mmr->umem); - mmr->umem = ib_umem_get(mr->uobject->context, start, length, - mr_access_flags | - IB_ACCESS_LOCAL_WRITE, - 0); + mmr->umem = + mlx4_get_umem_mr(mr->uobject->context, start, length, + virt_addr, mr_access_flags); if (IS_ERR(mmr->umem)) { err = PTR_ERR(mmr->umem); /* Prevent mlx4_ib_dereg_mr from free'ing invalid pointer */ diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index 50af8915e7ec..cd2c08c45334 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -673,7 +673,8 @@ static int set_qp_rss(struct mlx4_ib_dev *dev, struct mlx4_ib_rss *rss_ctx, MLX4_IB_RX_HASH_SRC_PORT_TCP | MLX4_IB_RX_HASH_DST_PORT_TCP | MLX4_IB_RX_HASH_SRC_PORT_UDP | - MLX4_IB_RX_HASH_DST_PORT_UDP)) { + MLX4_IB_RX_HASH_DST_PORT_UDP | + MLX4_IB_RX_HASH_INNER)) { pr_debug("RX Hash fields_mask has unsupported mask (0x%llx)\n", ucmd->rx_hash_fields_mask); return (-EOPNOTSUPP); @@ -3077,7 +3078,7 @@ static int fill_gid_by_hw_index(struct mlx4_ib_dev *ibdev, u8 port_num, memcpy(gid, &port_gid_table->gids[index].gid, sizeof(*gid)); *gid_type = port_gid_table->gids[index].gid_type; spin_unlock_irqrestore(&iboe->lock, flags); - if (!memcmp(gid, &zgid, sizeof(*gid))) + if (rdma_is_zero_gid(gid)) return -ENOENT; return 0; diff --git a/drivers/infiniband/hw/mlx5/Kconfig b/drivers/infiniband/hw/mlx5/Kconfig index bce263b92821..fb4d77be019b 100644 --- a/drivers/infiniband/hw/mlx5/Kconfig +++ b/drivers/infiniband/hw/mlx5/Kconfig @@ -1,6 +1,7 @@ config MLX5_INFINIBAND tristate "Mellanox Connect-IB HCA support" depends on NETDEVICES && ETHERNET && PCI && MLX5_CORE + depends on INFINIBAND_USER_ACCESS || INFINIBAND_USER_ACCESS=n ---help--- This driver provides low-level InfiniBand support for Mellanox Connect-IB PCI Express host channel adapters (HCAs). diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c index 68775e12d721..ad39d64b8108 100644 --- a/drivers/infiniband/hw/mlx5/cq.c +++ b/drivers/infiniband/hw/mlx5/cq.c @@ -637,7 +637,7 @@ repoll: } static int poll_soft_wc(struct mlx5_ib_cq *cq, int num_entries, - struct ib_wc *wc) + struct ib_wc *wc, bool is_fatal_err) { struct mlx5_ib_dev *dev = to_mdev(cq->ibcq.device); struct mlx5_ib_wc *soft_wc, *next; @@ -650,6 +650,10 @@ static int poll_soft_wc(struct mlx5_ib_cq *cq, int num_entries, mlx5_ib_dbg(dev, "polled software generated completion on CQ 0x%x\n", cq->mcq.cqn); + if (unlikely(is_fatal_err)) { + soft_wc->wc.status = IB_WC_WR_FLUSH_ERR; + soft_wc->wc.vendor_err = MLX5_CQE_SYNDROME_WR_FLUSH_ERR; + } wc[npolled++] = soft_wc->wc; list_del(&soft_wc->list); kfree(soft_wc); @@ -670,12 +674,17 @@ int mlx5_ib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc) spin_lock_irqsave(&cq->lock, flags); if (mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) { - mlx5_ib_poll_sw_comp(cq, num_entries, wc, &npolled); + /* make sure no soft wqe's are waiting */ + if (unlikely(!list_empty(&cq->wc_list))) + soft_polled = poll_soft_wc(cq, num_entries, wc, true); + + mlx5_ib_poll_sw_comp(cq, num_entries - soft_polled, + wc + soft_polled, &npolled); goto out; } if (unlikely(!list_empty(&cq->wc_list))) - soft_polled = poll_soft_wc(cq, num_entries, wc); + soft_polled = poll_soft_wc(cq, num_entries, wc, false); for (npolled = 0; npolled < num_entries - soft_polled; npolled++) { if (mlx5_poll_one(cq, &cur_qp, wc + soft_polled + npolled)) diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index e0894b203d59..92879d2d3026 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -52,7 +52,6 @@ #include <linux/mlx5/port.h> #include <linux/mlx5/vport.h> #include <linux/mlx5/fs.h> -#include <linux/mlx5/fs_helpers.h> #include <linux/list.h> #include <rdma/ib_smi.h> #include <rdma/ib_umem.h> @@ -180,7 +179,7 @@ static int mlx5_netdev_event(struct notifier_block *this, if (rep_ndev == ndev) roce->netdev = (event == NETDEV_UNREGISTER) ? NULL : ndev; - } else if (ndev->dev.parent == &ibdev->mdev->pdev->dev) { + } else if (ndev->dev.parent == &mdev->pdev->dev) { roce->netdev = (event == NETDEV_UNREGISTER) ? NULL : ndev; } @@ -1093,6 +1092,14 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, if (MLX5_CAP_ETH(mdev, tunnel_stateless_gre)) resp.tunnel_offloads_caps |= MLX5_IB_TUNNELED_OFFLOADS_GRE; + if (MLX5_CAP_GEN(mdev, flex_parser_protocols) & + MLX5_FLEX_PROTO_CW_MPLS_GRE) + resp.tunnel_offloads_caps |= + MLX5_IB_TUNNELED_OFFLOADS_MPLS_GRE; + if (MLX5_CAP_GEN(mdev, flex_parser_protocols) & + MLX5_FLEX_PROTO_CW_MPLS_UDP) + resp.tunnel_offloads_caps |= + MLX5_IB_TUNNELED_OFFLOADS_MPLS_UDP; } if (uhw->outlen) { @@ -2060,10 +2067,6 @@ static int mlx5_ib_mmap_clock_info_page(struct mlx5_ib_dev *dev, if (err) return err; - mlx5_ib_dbg(dev, "mapped clock info at 0x%lx, PA 0x%llx\n", - vma->vm_start, - (unsigned long long)pfn << PAGE_SHIFT); - return mlx5_ib_set_vma_data(vma, context); } @@ -2158,15 +2161,14 @@ static int uar_mmap(struct mlx5_ib_dev *dev, enum mlx5_ib_mmap_cmd cmd, err = io_remap_pfn_range(vma, vma->vm_start, pfn, PAGE_SIZE, vma->vm_page_prot); if (err) { - mlx5_ib_err(dev, "io_remap_pfn_range failed with error=%d, vm_start=0x%lx, pfn=%pa, mmap_cmd=%s\n", - err, vma->vm_start, &pfn, mmap_cmd2str(cmd)); + mlx5_ib_err(dev, + "io_remap_pfn_range failed with error=%d, mmap_cmd=%s\n", + err, mmap_cmd2str(cmd)); err = -EAGAIN; goto err; } pa = pfn << PAGE_SHIFT; - mlx5_ib_dbg(dev, "mapped %s at 0x%lx, PA %pa\n", mmap_cmd2str(cmd), - vma->vm_start, &pa); err = mlx5_ib_set_vma_data(vma, context); if (err) @@ -2252,10 +2254,6 @@ static int mlx5_ib_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vm if (io_remap_pfn_range(vma, vma->vm_start, pfn, PAGE_SIZE, vma->vm_page_prot)) return -EAGAIN; - - mlx5_ib_dbg(dev, "mapped internal timer at 0x%lx, PA 0x%llx\n", - vma->vm_start, - (unsigned long long)pfn << PAGE_SHIFT); break; case MLX5_IB_MMAP_CLOCK_INFO: return mlx5_ib_mmap_clock_info_page(dev, vma, context); @@ -2395,7 +2393,8 @@ static int mlx5_ib_dealloc_pd(struct ib_pd *pd) enum { MATCH_CRITERIA_ENABLE_OUTER_BIT, MATCH_CRITERIA_ENABLE_MISC_BIT, - MATCH_CRITERIA_ENABLE_INNER_BIT + MATCH_CRITERIA_ENABLE_INNER_BIT, + MATCH_CRITERIA_ENABLE_MISC2_BIT }; #define HEADER_IS_ZERO(match_criteria, headers) \ @@ -2415,6 +2414,9 @@ static u8 get_match_criteria_enable(u32 *match_criteria) match_criteria_enable |= (!HEADER_IS_ZERO(match_criteria, inner_headers)) << MATCH_CRITERIA_ENABLE_INNER_BIT; + match_criteria_enable |= + (!HEADER_IS_ZERO(match_criteria, misc_parameters_2)) << + MATCH_CRITERIA_ENABLE_MISC2_BIT; return match_criteria_enable; } @@ -2425,7 +2427,7 @@ static void set_proto(void *outer_c, void *outer_v, u8 mask, u8 val) MLX5_SET(fte_match_set_lyr_2_4, outer_v, ip_protocol, val); } -static void set_flow_label(void *misc_c, void *misc_v, u8 mask, u8 val, +static void set_flow_label(void *misc_c, void *misc_v, u32 mask, u32 val, bool inner) { if (inner) { @@ -2449,6 +2451,27 @@ static void set_tos(void *outer_c, void *outer_v, u8 mask, u8 val) MLX5_SET(fte_match_set_lyr_2_4, outer_v, ip_dscp, val >> 2); } +static int check_mpls_supp_fields(u32 field_support, const __be32 *set_mask) +{ + if (MLX5_GET(fte_match_mpls, set_mask, mpls_label) && + !(field_support & MLX5_FIELD_SUPPORT_MPLS_LABEL)) + return -EOPNOTSUPP; + + if (MLX5_GET(fte_match_mpls, set_mask, mpls_exp) && + !(field_support & MLX5_FIELD_SUPPORT_MPLS_EXP)) + return -EOPNOTSUPP; + + if (MLX5_GET(fte_match_mpls, set_mask, mpls_s_bos) && + !(field_support & MLX5_FIELD_SUPPORT_MPLS_S_BOS)) + return -EOPNOTSUPP; + + if (MLX5_GET(fte_match_mpls, set_mask, mpls_ttl) && + !(field_support & MLX5_FIELD_SUPPORT_MPLS_TTL)) + return -EOPNOTSUPP; + + return 0; +} + #define LAST_ETH_FIELD vlan_tag #define LAST_IB_FIELD sl #define LAST_IPV4_FIELD tos @@ -2457,6 +2480,7 @@ static void set_tos(void *outer_c, void *outer_v, u8 mask, u8 val) #define LAST_TUNNEL_FIELD tunnel_id #define LAST_FLOW_TAG_FIELD tag_id #define LAST_DROP_FIELD size +#define LAST_DROP_FIELD size /* Field is the last supported field */ #define FIELDS_NOT_SUPPORTED(filter, field)\ @@ -2488,12 +2512,16 @@ static int parse_flow_flow_action(const union ib_flow_spec *ib_spec, static int parse_flow_attr(struct mlx5_core_dev *mdev, u32 *match_c, u32 *match_v, const union ib_flow_spec *ib_spec, const struct ib_flow_attr *flow_attr, - struct mlx5_flow_act *action) + struct mlx5_flow_act *action, u32 prev_type) { void *misc_params_c = MLX5_ADDR_OF(fte_match_param, match_c, misc_parameters); void *misc_params_v = MLX5_ADDR_OF(fte_match_param, match_v, misc_parameters); + void *misc_params2_c = MLX5_ADDR_OF(fte_match_param, match_c, + misc_parameters_2); + void *misc_params2_v = MLX5_ADDR_OF(fte_match_param, match_v, + misc_parameters_2); void *headers_c; void *headers_v; int match_ipv; @@ -2698,6 +2726,93 @@ static int parse_flow_attr(struct mlx5_core_dev *mdev, u32 *match_c, MLX5_SET(fte_match_set_lyr_2_4, headers_v, udp_dport, ntohs(ib_spec->tcp_udp.val.dst_port)); break; + case IB_FLOW_SPEC_GRE: + if (ib_spec->gre.mask.c_ks_res0_ver) + return -EOPNOTSUPP; + + MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_protocol, + 0xff); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol, + IPPROTO_GRE); + + MLX5_SET(fte_match_set_misc, misc_params_c, gre_protocol, + 0xffff); + MLX5_SET(fte_match_set_misc, misc_params_v, gre_protocol, + ntohs(ib_spec->gre.val.protocol)); + + memcpy(MLX5_ADDR_OF(fte_match_set_misc, misc_params_c, + gre_key_h), + &ib_spec->gre.mask.key, + sizeof(ib_spec->gre.mask.key)); + memcpy(MLX5_ADDR_OF(fte_match_set_misc, misc_params_v, + gre_key_h), + &ib_spec->gre.val.key, + sizeof(ib_spec->gre.val.key)); + break; + case IB_FLOW_SPEC_MPLS: + switch (prev_type) { + case IB_FLOW_SPEC_UDP: + if (check_mpls_supp_fields(MLX5_CAP_FLOWTABLE_NIC_RX(mdev, + ft_field_support.outer_first_mpls_over_udp), + &ib_spec->mpls.mask.tag)) + return -EOPNOTSUPP; + + memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_v, + outer_first_mpls_over_udp), + &ib_spec->mpls.val.tag, + sizeof(ib_spec->mpls.val.tag)); + memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_c, + outer_first_mpls_over_udp), + &ib_spec->mpls.mask.tag, + sizeof(ib_spec->mpls.mask.tag)); + break; + case IB_FLOW_SPEC_GRE: + if (check_mpls_supp_fields(MLX5_CAP_FLOWTABLE_NIC_RX(mdev, + ft_field_support.outer_first_mpls_over_gre), + &ib_spec->mpls.mask.tag)) + return -EOPNOTSUPP; + + memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_v, + outer_first_mpls_over_gre), + &ib_spec->mpls.val.tag, + sizeof(ib_spec->mpls.val.tag)); + memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_c, + outer_first_mpls_over_gre), + &ib_spec->mpls.mask.tag, + sizeof(ib_spec->mpls.mask.tag)); + break; + default: + if (ib_spec->type & IB_FLOW_SPEC_INNER) { + if (check_mpls_supp_fields(MLX5_CAP_FLOWTABLE_NIC_RX(mdev, + ft_field_support.inner_first_mpls), + &ib_spec->mpls.mask.tag)) + return -EOPNOTSUPP; + + memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_v, + inner_first_mpls), + &ib_spec->mpls.val.tag, + sizeof(ib_spec->mpls.val.tag)); + memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_c, + inner_first_mpls), + &ib_spec->mpls.mask.tag, + sizeof(ib_spec->mpls.mask.tag)); + } else { + if (check_mpls_supp_fields(MLX5_CAP_FLOWTABLE_NIC_RX(mdev, + ft_field_support.outer_first_mpls), + &ib_spec->mpls.mask.tag)) + return -EOPNOTSUPP; + + memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_v, + outer_first_mpls), + &ib_spec->mpls.val.tag, + sizeof(ib_spec->mpls.val.tag)); + memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_c, + outer_first_mpls), + &ib_spec->mpls.mask.tag, + sizeof(ib_spec->mpls.mask.tag)); + } + } + break; case IB_FLOW_SPEC_VXLAN_TUNNEL: if (FIELDS_NOT_SUPPORTED(ib_spec->tunnel.mask, LAST_TUNNEL_FIELD)) @@ -3029,6 +3144,7 @@ static struct mlx5_ib_flow_handler *_create_flow_rule(struct mlx5_ib_dev *dev, struct mlx5_flow_destination *rule_dst = dst; const void *ib_flow = (const void *)flow_attr + sizeof(*flow_attr); unsigned int spec_index; + u32 prev_type = 0; int err = 0; int dest_num = 1; bool is_egress = flow_attr->flags & IB_FLOW_ATTR_FLAGS_EGRESS; @@ -3048,10 +3164,12 @@ static struct mlx5_ib_flow_handler *_create_flow_rule(struct mlx5_ib_dev *dev, for (spec_index = 0; spec_index < flow_attr->num_of_specs; spec_index++) { err = parse_flow_attr(dev->mdev, spec->match_criteria, spec->match_value, - ib_flow, flow_attr, &flow_act); + ib_flow, flow_attr, &flow_act, + prev_type); if (err < 0) goto free; + prev_type = ((union ib_flow_spec *)ib_flow)->type; ib_flow += ((union ib_flow_spec *)ib_flow)->size; } @@ -5435,9 +5553,7 @@ static void mlx5_ib_stage_cong_debugfs_cleanup(struct mlx5_ib_dev *dev) static int mlx5_ib_stage_uar_init(struct mlx5_ib_dev *dev) { dev->mdev->priv.uar = mlx5_get_uars_page(dev->mdev); - if (!dev->mdev->priv.uar) - return -ENOMEM; - return 0; + return PTR_ERR_OR_ZERO(dev->mdev->priv.uar); } static void mlx5_ib_stage_uar_cleanup(struct mlx5_ib_dev *dev) diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 1520a2f20f98..90a9c461cedc 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -866,25 +866,28 @@ static int mr_umem_get(struct ib_pd *pd, u64 start, u64 length, int *order) { struct mlx5_ib_dev *dev = to_mdev(pd->device); + struct ib_umem *u; int err; - *umem = ib_umem_get(pd->uobject->context, start, length, - access_flags, 0); - err = PTR_ERR_OR_ZERO(*umem); + *umem = NULL; + + u = ib_umem_get(pd->uobject->context, start, length, access_flags, 0); + err = PTR_ERR_OR_ZERO(u); if (err) { - *umem = NULL; - mlx5_ib_err(dev, "umem get failed (%d)\n", err); + mlx5_ib_dbg(dev, "umem get failed (%d)\n", err); return err; } - mlx5_ib_cont_pages(*umem, start, MLX5_MKEY_PAGE_SHIFT_MASK, npages, + mlx5_ib_cont_pages(u, start, MLX5_MKEY_PAGE_SHIFT_MASK, npages, page_shift, ncont, order); if (!*npages) { mlx5_ib_warn(dev, "avoid zero region\n"); - ib_umem_release(*umem); + ib_umem_release(u); return -EINVAL; } + *umem = u; + mlx5_ib_dbg(dev, "npages %d, ncont %d, order %d, page_shift %d\n", *npages, *ncont, *order, *page_shift); @@ -1458,13 +1461,12 @@ int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start, int access_flags = flags & IB_MR_REREG_ACCESS ? new_access_flags : mr->access_flags; - u64 addr = (flags & IB_MR_REREG_TRANS) ? virt_addr : mr->umem->address; - u64 len = (flags & IB_MR_REREG_TRANS) ? length : mr->umem->length; int page_shift = 0; int upd_flags = 0; int npages = 0; int ncont = 0; int order = 0; + u64 addr, len; int err; mlx5_ib_dbg(dev, "start 0x%llx, virt_addr 0x%llx, length 0x%llx, access_flags 0x%x\n", @@ -1472,6 +1474,17 @@ int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start, atomic_sub(mr->npages, &dev->mdev->priv.reg_pages); + if (!mr->umem) + return -EINVAL; + + if (flags & IB_MR_REREG_TRANS) { + addr = virt_addr; + len = length; + } else { + addr = mr->umem->address; + len = mr->umem->length; + } + if (flags != IB_MR_REREG_PD) { /* * Replace umem. This needs to be done whether or not UMR is @@ -1479,6 +1492,7 @@ int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start, */ flags |= IB_MR_REREG_TRANS; ib_umem_release(mr->umem); + mr->umem = NULL; err = mr_umem_get(pd, addr, len, access_flags, &mr->umem, &npages, &page_shift, &ncont, &order); if (err) diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 7ed4b70f6447..a4f1f638509f 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -54,6 +54,7 @@ enum { enum { MLX5_IB_SQ_STRIDE = 6, + MLX5_IB_SQ_UMR_INLINE_THRESHOLD = 64, }; static const u32 mlx5_ib_opcode[] = { @@ -259,7 +260,11 @@ static int set_rq_size(struct mlx5_ib_dev *dev, struct ib_qp_cap *cap, } else { if (ucmd) { qp->rq.wqe_cnt = ucmd->rq_wqe_count; + if (ucmd->rq_wqe_shift > BITS_PER_BYTE * sizeof(ucmd->rq_wqe_shift)) + return -EINVAL; qp->rq.wqe_shift = ucmd->rq_wqe_shift; + if ((1 << qp->rq.wqe_shift) / sizeof(struct mlx5_wqe_data_seg) < qp->wq_sig) + return -EINVAL; qp->rq.max_gs = (1 << qp->rq.wqe_shift) / sizeof(struct mlx5_wqe_data_seg) - qp->wq_sig; qp->rq.max_post = qp->rq.wqe_cnt; } else { @@ -298,7 +303,9 @@ static int sq_overhead(struct ib_qp_init_attr *attr) max(sizeof(struct mlx5_wqe_atomic_seg) + sizeof(struct mlx5_wqe_raddr_seg), sizeof(struct mlx5_wqe_umr_ctrl_seg) + - sizeof(struct mlx5_mkey_seg)); + sizeof(struct mlx5_mkey_seg) + + MLX5_IB_SQ_UMR_INLINE_THRESHOLD / + MLX5_IB_UMR_OCTOWORD); break; case IB_QPT_XRC_TGT: @@ -480,11 +487,6 @@ static int qp_has_rq(struct ib_qp_init_attr *attr) return 1; } -static int first_med_bfreg(void) -{ - return 1; -} - enum { /* this is the first blue flame register in the array of bfregs assigned * to a processes. Since we do not use it for blue flame but rather @@ -510,6 +512,12 @@ static int num_med_bfreg(struct mlx5_ib_dev *dev, return n >= 0 ? n : 0; } +static int first_med_bfreg(struct mlx5_ib_dev *dev, + struct mlx5_bfreg_info *bfregi) +{ + return num_med_bfreg(dev, bfregi) ? 1 : -ENOMEM; +} + static int first_hi_bfreg(struct mlx5_ib_dev *dev, struct mlx5_bfreg_info *bfregi) { @@ -537,10 +545,13 @@ static int alloc_high_class_bfreg(struct mlx5_ib_dev *dev, static int alloc_med_class_bfreg(struct mlx5_ib_dev *dev, struct mlx5_bfreg_info *bfregi) { - int minidx = first_med_bfreg(); + int minidx = first_med_bfreg(dev, bfregi); int i; - for (i = first_med_bfreg(); i < first_hi_bfreg(dev, bfregi); i++) { + if (minidx < 0) + return minidx; + + for (i = minidx; i < first_hi_bfreg(dev, bfregi); i++) { if (bfregi->count[i] < bfregi->count[minidx]) minidx = i; if (!bfregi->count[minidx]) @@ -2451,18 +2462,18 @@ enum { static int ib_rate_to_mlx5(struct mlx5_ib_dev *dev, u8 rate) { - if (rate == IB_RATE_PORT_CURRENT) { + if (rate == IB_RATE_PORT_CURRENT) return 0; - } else if (rate < IB_RATE_2_5_GBPS || rate > IB_RATE_300_GBPS) { + + if (rate < IB_RATE_2_5_GBPS || rate > IB_RATE_300_GBPS) return -EINVAL; - } else { - while (rate != IB_RATE_2_5_GBPS && - !(1 << (rate + MLX5_STAT_RATE_OFFSET) & - MLX5_CAP_GEN(dev->mdev, stat_rate_support))) - --rate; - } - return rate + MLX5_STAT_RATE_OFFSET; + while (rate != IB_RATE_PORT_CURRENT && + !(1 << (rate + MLX5_STAT_RATE_OFFSET) & + MLX5_CAP_GEN(dev->mdev, stat_rate_support))) + --rate; + + return rate ? rate + MLX5_STAT_RATE_OFFSET : rate; } static int modify_raw_packet_eth_prio(struct mlx5_core_dev *dev, @@ -3633,13 +3644,15 @@ static __be64 sig_mkey_mask(void) } static void set_reg_umr_seg(struct mlx5_wqe_umr_ctrl_seg *umr, - struct mlx5_ib_mr *mr) + struct mlx5_ib_mr *mr, bool umr_inline) { int size = mr->ndescs * mr->desc_size; memset(umr, 0, sizeof(*umr)); umr->flags = MLX5_UMR_CHECK_NOT_FREE; + if (umr_inline) + umr->flags |= MLX5_UMR_INLINE; umr->xlt_octowords = cpu_to_be16(get_xlt_octo(size)); umr->mkey_mask = frwr_mkey_mask(); } @@ -3823,6 +3836,24 @@ static void set_reg_data_seg(struct mlx5_wqe_data_seg *dseg, dseg->lkey = cpu_to_be32(pd->ibpd.local_dma_lkey); } +static void set_reg_umr_inline_seg(void *seg, struct mlx5_ib_qp *qp, + struct mlx5_ib_mr *mr, int mr_list_size) +{ + void *qend = qp->sq.qend; + void *addr = mr->descs; + int copy; + + if (unlikely(seg + mr_list_size > qend)) { + copy = qend - seg; + memcpy(seg, addr, copy); + addr += copy; + mr_list_size -= copy; + seg = mlx5_get_send_wqe(qp, 0); + } + memcpy(seg, addr, mr_list_size); + seg += mr_list_size; +} + static __be32 send_ieth(struct ib_send_wr *wr) { switch (wr->opcode) { @@ -4217,6 +4248,8 @@ static int set_reg_wr(struct mlx5_ib_qp *qp, { struct mlx5_ib_mr *mr = to_mmr(wr->mr); struct mlx5_ib_pd *pd = to_mpd(qp->ibqp.pd); + int mr_list_size = mr->ndescs * mr->desc_size; + bool umr_inline = mr_list_size <= MLX5_IB_SQ_UMR_INLINE_THRESHOLD; if (unlikely(wr->wr.send_flags & IB_SEND_INLINE)) { mlx5_ib_warn(to_mdev(qp->ibqp.device), @@ -4224,7 +4257,7 @@ static int set_reg_wr(struct mlx5_ib_qp *qp, return -EINVAL; } - set_reg_umr_seg(*seg, mr); + set_reg_umr_seg(*seg, mr, umr_inline); *seg += sizeof(struct mlx5_wqe_umr_ctrl_seg); *size += sizeof(struct mlx5_wqe_umr_ctrl_seg) / 16; if (unlikely((*seg == qp->sq.qend))) @@ -4236,10 +4269,14 @@ static int set_reg_wr(struct mlx5_ib_qp *qp, if (unlikely((*seg == qp->sq.qend))) *seg = mlx5_get_send_wqe(qp, 0); - set_reg_data_seg(*seg, mr, pd); - *seg += sizeof(struct mlx5_wqe_data_seg); - *size += (sizeof(struct mlx5_wqe_data_seg) / 16); - + if (umr_inline) { + set_reg_umr_inline_seg(*seg, qp, mr, mr_list_size); + *size += get_xlt_octo(mr_list_size); + } else { + set_reg_data_seg(*seg, mr, pd); + *seg += sizeof(struct mlx5_wqe_data_seg); + *size += (sizeof(struct mlx5_wqe_data_seg) / 16); + } return 0; } diff --git a/drivers/infiniband/hw/nes/nes_nic.c b/drivers/infiniband/hw/nes/nes_nic.c index 0a75164cedea..007d5e8a0121 100644 --- a/drivers/infiniband/hw/nes/nes_nic.c +++ b/drivers/infiniband/hw/nes/nes_nic.c @@ -461,7 +461,7 @@ static bool nes_nic_send(struct sk_buff *skb, struct net_device *netdev) /** * nes_netdev_start_xmit */ -static int nes_netdev_start_xmit(struct sk_buff *skb, struct net_device *netdev) +static netdev_tx_t nes_netdev_start_xmit(struct sk_buff *skb, struct net_device *netdev) { struct nes_vnic *nesvnic = netdev_priv(netdev); struct nes_device *nesdev = nesvnic->nesdev; diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c index 7d3763b2e01c..988aace89430 100644 --- a/drivers/infiniband/hw/qedr/verbs.c +++ b/drivers/infiniband/hw/qedr/verbs.c @@ -401,49 +401,47 @@ int qedr_mmap(struct ib_ucontext *context, struct vm_area_struct *vma) { struct qedr_ucontext *ucontext = get_qedr_ucontext(context); struct qedr_dev *dev = get_qedr_dev(context->device); - unsigned long vm_page = vma->vm_pgoff << PAGE_SHIFT; - u64 unmapped_db = dev->db_phys_addr; + unsigned long phys_addr = vma->vm_pgoff << PAGE_SHIFT; unsigned long len = (vma->vm_end - vma->vm_start); - int rc = 0; - bool found; + unsigned long dpi_start; + + dpi_start = dev->db_phys_addr + (ucontext->dpi * ucontext->dpi_size); DP_DEBUG(dev, QEDR_MSG_INIT, - "qedr_mmap called vm_page=0x%lx vm_pgoff=0x%lx unmapped_db=0x%llx db_size=%x, len=%lx\n", - vm_page, vma->vm_pgoff, unmapped_db, dev->db_size, len); - if (vma->vm_start & (PAGE_SIZE - 1)) { - DP_ERR(dev, "Vma_start not page aligned = %ld\n", - vma->vm_start); + "mmap invoked with vm_start=0x%pK, vm_end=0x%pK,vm_pgoff=0x%pK; dpi_start=0x%pK dpi_size=0x%x\n", + (void *)vma->vm_start, (void *)vma->vm_end, + (void *)vma->vm_pgoff, (void *)dpi_start, ucontext->dpi_size); + + if ((vma->vm_start & (PAGE_SIZE - 1)) || (len & (PAGE_SIZE - 1))) { + DP_ERR(dev, + "failed mmap, adrresses must be page aligned: start=0x%pK, end=0x%pK\n", + (void *)vma->vm_start, (void *)vma->vm_end); return -EINVAL; } - found = qedr_search_mmap(ucontext, vm_page, len); - if (!found) { - DP_ERR(dev, "Vma_pgoff not found in mapped array = %ld\n", + if (!qedr_search_mmap(ucontext, phys_addr, len)) { + DP_ERR(dev, "failed mmap, vm_pgoff=0x%lx is not authorized\n", vma->vm_pgoff); return -EINVAL; } - DP_DEBUG(dev, QEDR_MSG_INIT, "Mapping doorbell bar\n"); - - if ((vm_page >= unmapped_db) && (vm_page <= (unmapped_db + - dev->db_size))) { - DP_DEBUG(dev, QEDR_MSG_INIT, "Mapping doorbell bar\n"); - if (vma->vm_flags & VM_READ) { - DP_ERR(dev, "Trying to map doorbell bar for read\n"); - return -EPERM; - } - - vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot); + if (phys_addr < dpi_start || + ((phys_addr + len) > (dpi_start + ucontext->dpi_size))) { + DP_ERR(dev, + "failed mmap, pages are outside of dpi; page address=0x%pK, dpi_start=0x%pK, dpi_size=0x%x\n", + (void *)phys_addr, (void *)dpi_start, + ucontext->dpi_size); + return -EINVAL; + } - rc = io_remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff, - PAGE_SIZE, vma->vm_page_prot); - } else { - DP_DEBUG(dev, QEDR_MSG_INIT, "Mapping chains\n"); - rc = remap_pfn_range(vma, vma->vm_start, - vma->vm_pgoff, len, vma->vm_page_prot); + if (vma->vm_flags & VM_READ) { + DP_ERR(dev, "failed mmap, cannot map doorbell bar for read\n"); + return -EINVAL; } - DP_DEBUG(dev, QEDR_MSG_INIT, "qedr_mmap return code: %d\n", rc); - return rc; + + vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot); + return io_remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff, len, + vma->vm_page_prot); } struct ib_pd *qedr_alloc_pd(struct ib_device *ibdev, @@ -2579,7 +2577,7 @@ static int qedr_set_page(struct ib_mr *ibmr, u64 addr) u32 pbes_in_page; if (unlikely(mr->npages == mr->info.pbl_info.num_pbes)) { - DP_ERR(mr->dev, "qedr_set_page failes when %d\n", mr->npages); + DP_ERR(mr->dev, "qedr_set_page fails when %d\n", mr->npages); return -ENOMEM; } diff --git a/drivers/infiniband/hw/qib/qib.h b/drivers/infiniband/hw/qib/qib.h index 46072455130c..3461df002f81 100644 --- a/drivers/infiniband/hw/qib/qib.h +++ b/drivers/infiniband/hw/qib/qib.h @@ -1228,6 +1228,7 @@ static inline struct qib_ibport *to_iport(struct ib_device *ibdev, u8 port) #define QIB_BADINTR 0x8000 /* severe interrupt problems */ #define QIB_DCA_ENABLED 0x10000 /* Direct Cache Access enabled */ #define QIB_HAS_QSFP 0x20000 /* device (card instance) has QSFP */ +#define QIB_SHUTDOWN 0x40000 /* device is shutting down */ /* * values for ppd->lflags (_ib_port_ related flags) @@ -1423,8 +1424,7 @@ u64 qib_sps_ints(void); /* * dma_addr wrappers - all 0's invalid for hw */ -dma_addr_t qib_map_page(struct pci_dev *, struct page *, unsigned long, - size_t, int); +int qib_map_page(struct pci_dev *d, struct page *p, dma_addr_t *daddr); struct pci_dev *qib_get_pci_dev(struct rvt_dev_info *rdi); /* diff --git a/drivers/infiniband/hw/qib/qib_file_ops.c b/drivers/infiniband/hw/qib/qib_file_ops.c index 6a8800b65047..98e1ce14fa2a 100644 --- a/drivers/infiniband/hw/qib/qib_file_ops.c +++ b/drivers/infiniband/hw/qib/qib_file_ops.c @@ -364,6 +364,8 @@ static int qib_tid_update(struct qib_ctxtdata *rcd, struct file *fp, goto done; } for (i = 0; i < cnt; i++, vaddr += PAGE_SIZE) { + dma_addr_t daddr; + for (; ntids--; tid++) { if (tid == tidcnt) tid = 0; @@ -380,12 +382,14 @@ static int qib_tid_update(struct qib_ctxtdata *rcd, struct file *fp, ret = -ENOMEM; break; } + ret = qib_map_page(dd->pcidev, pagep[i], &daddr); + if (ret) + break; + tidlist[i] = tid + tidoff; /* we "know" system pages and TID pages are same size */ dd->pageshadow[ctxttid + tid] = pagep[i]; - dd->physshadow[ctxttid + tid] = - qib_map_page(dd->pcidev, pagep[i], 0, PAGE_SIZE, - PCI_DMA_FROMDEVICE); + dd->physshadow[ctxttid + tid] = daddr; /* * don't need atomic or it's overhead */ @@ -868,7 +872,7 @@ bail: /* * qib_file_vma_fault - handle a VMA page fault. */ -static int qib_file_vma_fault(struct vm_fault *vmf) +static vm_fault_t qib_file_vma_fault(struct vm_fault *vmf) { struct page *page; diff --git a/drivers/infiniband/hw/qib/qib_init.c b/drivers/infiniband/hw/qib/qib_init.c index 6c68f8a97018..015520289735 100644 --- a/drivers/infiniband/hw/qib/qib_init.c +++ b/drivers/infiniband/hw/qib/qib_init.c @@ -841,6 +841,10 @@ static void qib_shutdown_device(struct qib_devdata *dd) struct qib_pportdata *ppd; unsigned pidx; + if (dd->flags & QIB_SHUTDOWN) + return; + dd->flags |= QIB_SHUTDOWN; + for (pidx = 0; pidx < dd->num_pports; ++pidx) { ppd = dd->pport + pidx; @@ -1182,6 +1186,7 @@ void qib_disable_after_error(struct qib_devdata *dd) static void qib_remove_one(struct pci_dev *); static int qib_init_one(struct pci_dev *, const struct pci_device_id *); +static void qib_shutdown_one(struct pci_dev *); #define DRIVER_LOAD_MSG "Intel " QIB_DRV_NAME " loaded: " #define PFX QIB_DRV_NAME ": " @@ -1199,6 +1204,7 @@ static struct pci_driver qib_driver = { .name = QIB_DRV_NAME, .probe = qib_init_one, .remove = qib_remove_one, + .shutdown = qib_shutdown_one, .id_table = qib_pci_tbl, .err_handler = &qib_pci_err_handler, }; @@ -1549,6 +1555,13 @@ static void qib_remove_one(struct pci_dev *pdev) qib_postinit_cleanup(dd); } +static void qib_shutdown_one(struct pci_dev *pdev) +{ + struct qib_devdata *dd = pci_get_drvdata(pdev); + + qib_shutdown_device(dd); +} + /** * qib_create_rcvhdrq - create a receive header queue * @dd: the qlogic_ib device diff --git a/drivers/infiniband/hw/qib/qib_rc.c b/drivers/infiniband/hw/qib/qib_rc.c index c9955d48c50f..f35fdeb14347 100644 --- a/drivers/infiniband/hw/qib/qib_rc.c +++ b/drivers/infiniband/hw/qib/qib_rc.c @@ -1828,7 +1828,7 @@ void qib_rc_rcv(struct qib_ctxtdata *rcd, struct ib_header *hdr, /* OK, process the packet. */ switch (opcode) { case OP(SEND_FIRST): - ret = qib_get_rwqe(qp, 0); + ret = rvt_get_rwqe(qp, false); if (ret < 0) goto nack_op_err; if (!ret) @@ -1849,7 +1849,7 @@ send_middle: case OP(RDMA_WRITE_LAST_WITH_IMMEDIATE): /* consume RWQE */ - ret = qib_get_rwqe(qp, 1); + ret = rvt_get_rwqe(qp, true); if (ret < 0) goto nack_op_err; if (!ret) @@ -1858,7 +1858,7 @@ send_middle: case OP(SEND_ONLY): case OP(SEND_ONLY_WITH_IMMEDIATE): - ret = qib_get_rwqe(qp, 0); + ret = rvt_get_rwqe(qp, false); if (ret < 0) goto nack_op_err; if (!ret) @@ -1949,7 +1949,7 @@ send_last: goto send_middle; else if (opcode == OP(RDMA_WRITE_ONLY)) goto no_immediate_data; - ret = qib_get_rwqe(qp, 1); + ret = rvt_get_rwqe(qp, true); if (ret < 0) goto nack_op_err; if (!ret) { diff --git a/drivers/infiniband/hw/qib/qib_ruc.c b/drivers/infiniband/hw/qib/qib_ruc.c index 4662cc7bde92..f8a7de795beb 100644 --- a/drivers/infiniband/hw/qib/qib_ruc.c +++ b/drivers/infiniband/hw/qib/qib_ruc.c @@ -38,156 +38,6 @@ #include "qib_mad.h" /* - * Validate a RWQE and fill in the SGE state. - * Return 1 if OK. - */ -static int qib_init_sge(struct rvt_qp *qp, struct rvt_rwqe *wqe) -{ - int i, j, ret; - struct ib_wc wc; - struct rvt_lkey_table *rkt; - struct rvt_pd *pd; - struct rvt_sge_state *ss; - - rkt = &to_idev(qp->ibqp.device)->rdi.lkey_table; - pd = ibpd_to_rvtpd(qp->ibqp.srq ? qp->ibqp.srq->pd : qp->ibqp.pd); - ss = &qp->r_sge; - ss->sg_list = qp->r_sg_list; - qp->r_len = 0; - for (i = j = 0; i < wqe->num_sge; i++) { - if (wqe->sg_list[i].length == 0) - continue; - /* Check LKEY */ - ret = rvt_lkey_ok(rkt, pd, j ? &ss->sg_list[j - 1] : &ss->sge, - NULL, &wqe->sg_list[i], - IB_ACCESS_LOCAL_WRITE); - if (unlikely(ret <= 0)) - goto bad_lkey; - qp->r_len += wqe->sg_list[i].length; - j++; - } - ss->num_sge = j; - ss->total_len = qp->r_len; - ret = 1; - goto bail; - -bad_lkey: - while (j) { - struct rvt_sge *sge = --j ? &ss->sg_list[j - 1] : &ss->sge; - - rvt_put_mr(sge->mr); - } - ss->num_sge = 0; - memset(&wc, 0, sizeof(wc)); - wc.wr_id = wqe->wr_id; - wc.status = IB_WC_LOC_PROT_ERR; - wc.opcode = IB_WC_RECV; - wc.qp = &qp->ibqp; - /* Signal solicited completion event. */ - rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.recv_cq), &wc, 1); - ret = 0; -bail: - return ret; -} - -/** - * qib_get_rwqe - copy the next RWQE into the QP's RWQE - * @qp: the QP - * @wr_id_only: update qp->r_wr_id only, not qp->r_sge - * - * Return -1 if there is a local error, 0 if no RWQE is available, - * otherwise return 1. - * - * Can be called from interrupt level. - */ -int qib_get_rwqe(struct rvt_qp *qp, int wr_id_only) -{ - unsigned long flags; - struct rvt_rq *rq; - struct rvt_rwq *wq; - struct rvt_srq *srq; - struct rvt_rwqe *wqe; - void (*handler)(struct ib_event *, void *); - u32 tail; - int ret; - - if (qp->ibqp.srq) { - srq = ibsrq_to_rvtsrq(qp->ibqp.srq); - handler = srq->ibsrq.event_handler; - rq = &srq->rq; - } else { - srq = NULL; - handler = NULL; - rq = &qp->r_rq; - } - - spin_lock_irqsave(&rq->lock, flags); - if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK)) { - ret = 0; - goto unlock; - } - - wq = rq->wq; - tail = wq->tail; - /* Validate tail before using it since it is user writable. */ - if (tail >= rq->size) - tail = 0; - if (unlikely(tail == wq->head)) { - ret = 0; - goto unlock; - } - /* Make sure entry is read after head index is read. */ - smp_rmb(); - wqe = rvt_get_rwqe_ptr(rq, tail); - /* - * Even though we update the tail index in memory, the verbs - * consumer is not supposed to post more entries until a - * completion is generated. - */ - if (++tail >= rq->size) - tail = 0; - wq->tail = tail; - if (!wr_id_only && !qib_init_sge(qp, wqe)) { - ret = -1; - goto unlock; - } - qp->r_wr_id = wqe->wr_id; - - ret = 1; - set_bit(RVT_R_WRID_VALID, &qp->r_aflags); - if (handler) { - u32 n; - - /* - * Validate head pointer value and compute - * the number of remaining WQEs. - */ - n = wq->head; - if (n >= rq->size) - n = 0; - if (n < tail) - n += rq->size - tail; - else - n -= tail; - if (n < srq->limit) { - struct ib_event ev; - - srq->limit = 0; - spin_unlock_irqrestore(&rq->lock, flags); - ev.device = qp->ibqp.device; - ev.element.srq = qp->ibqp.srq; - ev.event = IB_EVENT_SRQ_LIMIT_REACHED; - handler(&ev, srq->ibsrq.srq_context); - goto bail; - } - } -unlock: - spin_unlock_irqrestore(&rq->lock, flags); -bail: - return ret; -} - -/* * Switch to alternate path. * The QP s_lock should be held and interrupts disabled. */ @@ -419,7 +269,7 @@ again: wc.ex.imm_data = wqe->wr.ex.imm_data; /* FALLTHROUGH */ case IB_WR_SEND: - ret = qib_get_rwqe(qp, 0); + ret = rvt_get_rwqe(qp, false); if (ret < 0) goto op_err; if (!ret) @@ -431,7 +281,7 @@ again: goto inv_err; wc.wc_flags = IB_WC_WITH_IMM; wc.ex.imm_data = wqe->wr.ex.imm_data; - ret = qib_get_rwqe(qp, 1); + ret = rvt_get_rwqe(qp, true); if (ret < 0) goto op_err; if (!ret) diff --git a/drivers/infiniband/hw/qib/qib_uc.c b/drivers/infiniband/hw/qib/qib_uc.c index 840eec6ebc33..3e54bc11e0ae 100644 --- a/drivers/infiniband/hw/qib/qib_uc.c +++ b/drivers/infiniband/hw/qib/qib_uc.c @@ -335,7 +335,7 @@ send_first: if (test_and_clear_bit(RVT_R_REWIND_SGE, &qp->r_aflags)) qp->r_sge = qp->s_rdma_read_sge; else { - ret = qib_get_rwqe(qp, 0); + ret = rvt_get_rwqe(qp, false); if (ret < 0) goto op_err; if (!ret) @@ -471,7 +471,7 @@ rdma_last_imm: if (test_and_clear_bit(RVT_R_REWIND_SGE, &qp->r_aflags)) rvt_put_ss(&qp->s_rdma_read_sge); else { - ret = qib_get_rwqe(qp, 1); + ret = rvt_get_rwqe(qp, true); if (ret < 0) goto op_err; if (!ret) diff --git a/drivers/infiniband/hw/qib/qib_ud.c b/drivers/infiniband/hw/qib/qib_ud.c index 3e4ff77260c2..f8d029a2390f 100644 --- a/drivers/infiniband/hw/qib/qib_ud.c +++ b/drivers/infiniband/hw/qib/qib_ud.c @@ -139,7 +139,7 @@ static void qib_ud_loopback(struct rvt_qp *sqp, struct rvt_swqe *swqe) else { int ret; - ret = qib_get_rwqe(qp, 0); + ret = rvt_get_rwqe(qp, false); if (ret < 0) { rvt_rc_error(qp, IB_WC_LOC_QP_OP_ERR); goto bail_unlock; @@ -534,7 +534,7 @@ void qib_ud_rcv(struct qib_ibport *ibp, struct ib_header *hdr, else { int ret; - ret = qib_get_rwqe(qp, 0); + ret = rvt_get_rwqe(qp, false); if (ret < 0) { rvt_rc_error(qp, IB_WC_LOC_QP_OP_ERR); return; diff --git a/drivers/infiniband/hw/qib/qib_user_pages.c b/drivers/infiniband/hw/qib/qib_user_pages.c index ce83ba9a12ef..16543d5e80c3 100644 --- a/drivers/infiniband/hw/qib/qib_user_pages.c +++ b/drivers/infiniband/hw/qib/qib_user_pages.c @@ -99,23 +99,27 @@ bail: * * I'm sure we won't be so lucky with other iommu's, so FIXME. */ -dma_addr_t qib_map_page(struct pci_dev *hwdev, struct page *page, - unsigned long offset, size_t size, int direction) +int qib_map_page(struct pci_dev *hwdev, struct page *page, dma_addr_t *daddr) { dma_addr_t phys; - phys = pci_map_page(hwdev, page, offset, size, direction); + phys = pci_map_page(hwdev, page, 0, PAGE_SIZE, PCI_DMA_FROMDEVICE); + if (pci_dma_mapping_error(hwdev, phys)) + return -ENOMEM; - if (phys == 0) { - pci_unmap_page(hwdev, phys, size, direction); - phys = pci_map_page(hwdev, page, offset, size, direction); + if (!phys) { + pci_unmap_page(hwdev, phys, PAGE_SIZE, PCI_DMA_FROMDEVICE); + phys = pci_map_page(hwdev, page, 0, PAGE_SIZE, + PCI_DMA_FROMDEVICE); + if (pci_dma_mapping_error(hwdev, phys)) + return -ENOMEM; /* * FIXME: If we get 0 again, we should keep this page, * map another, then free the 0 page. */ } - - return phys; + *daddr = phys; + return 0; } /** diff --git a/drivers/infiniband/hw/qib/qib_verbs.c b/drivers/infiniband/hw/qib/qib_verbs.c index 3977abbc83ad..14b4057a2b8f 100644 --- a/drivers/infiniband/hw/qib/qib_verbs.c +++ b/drivers/infiniband/hw/qib/qib_verbs.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2012, 2013 Intel Corporation. All rights reserved. + * Copyright (c) 2012 - 2018 Intel Corporation. All rights reserved. * Copyright (c) 2006 - 2012 QLogic Corporation. All rights reserved. * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved. * @@ -1631,10 +1631,6 @@ int qib_register_ib_device(struct qib_devdata *dd) dd->verbs_dev.rdi.dparms.core_cap_flags = RDMA_CORE_PORT_IBA_IB; dd->verbs_dev.rdi.dparms.max_mad_size = IB_MGMT_MAD_SIZE; - snprintf(dd->verbs_dev.rdi.dparms.cq_name, - sizeof(dd->verbs_dev.rdi.dparms.cq_name), - "qib_cq%d", dd->unit); - qib_fill_device_attr(dd); ppd = dd->pport; diff --git a/drivers/infiniband/hw/qib/qib_verbs.h b/drivers/infiniband/hw/qib/qib_verbs.h index f887737ac142..f9a46768a19a 100644 --- a/drivers/infiniband/hw/qib/qib_verbs.h +++ b/drivers/infiniband/hw/qib/qib_verbs.h @@ -321,8 +321,6 @@ void qib_ud_rcv(struct qib_ibport *ibp, struct ib_header *hdr, void mr_rcu_callback(struct rcu_head *list); -int qib_get_rwqe(struct rvt_qp *qp, int wr_id_only); - void qib_migrate_qp(struct rvt_qp *qp); int qib_ruc_check_hdr(struct qib_ibport *ibp, struct ib_header *hdr, |