summaryrefslogtreecommitdiff
path: root/drivers/infiniband
diff options
context:
space:
mode:
authorJason Gunthorpe <jgg@nvidia.com>2023-06-27 14:06:29 -0300
committerJason Gunthorpe <jgg@nvidia.com>2023-06-27 14:06:29 -0300
commit5f004bcaee4cb552cf1b46a505f18f08777db7e5 (patch)
treec81bdd8e01ba8de9862c8bdb1b210559d4bcf154 /drivers/infiniband
parent4251f631fdfba0b38e4634510c5950ee157cc069 (diff)
parent6995e2de6891c724bfeb2db33d7b87775f913ad1 (diff)
Merge tag 'v6.4' into rdma.git for-next
Linux 6.4 Resolve conflicts between rdma rc and next in rxe_cq matching linux-next: drivers/infiniband/sw/rxe/rxe_cq.c: https://lore.kernel.org/r/20230622115246.365d30ad@canb.auug.org.au Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
Diffstat (limited to 'drivers/infiniband')
-rw-r--r--drivers/infiniband/core/cma.c4
-rw-r--r--drivers/infiniband/core/uverbs_cmd.c7
-rw-r--r--drivers/infiniband/core/uverbs_main.c12
-rw-r--r--drivers/infiniband/hw/bnxt_re/bnxt_re.h2
-rw-r--r--drivers/infiniband/hw/bnxt_re/ib_verbs.c11
-rw-r--r--drivers/infiniband/hw/bnxt_re/main.c6
-rw-r--r--drivers/infiniband/hw/bnxt_re/qplib_fp.c11
-rw-r--r--drivers/infiniband/hw/bnxt_re/qplib_res.c12
-rw-r--r--drivers/infiniband/hw/bnxt_re/qplib_sp.c7
-rw-r--r--drivers/infiniband/hw/efa/efa_verbs.c2
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_hw_v2.c25
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_hw_v2.h2
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_mr.c43
-rw-r--r--drivers/infiniband/hw/irdma/verbs.c12
-rw-r--r--drivers/infiniband/hw/mlx5/counters.c89
-rw-r--r--drivers/infiniband/hw/mlx5/fs.c276
-rw-r--r--drivers/infiniband/hw/mlx5/fs.h16
-rw-r--r--drivers/infiniband/hw/mlx5/main.c3
-rw-r--r--drivers/infiniband/hw/mlx5/mlx5_ib.h14
-rw-r--r--drivers/infiniband/hw/mlx5/qp.c3
-rw-r--r--drivers/infiniband/sw/rxe/rxe_comp.c26
-rw-r--r--drivers/infiniband/sw/rxe/rxe_cq.c4
-rw-r--r--drivers/infiniband/sw/rxe/rxe_net.c13
-rw-r--r--drivers/infiniband/sw/rxe/rxe_qp.c44
-rw-r--r--drivers/infiniband/sw/rxe/rxe_recv.c9
-rw-r--r--drivers/infiniband/sw/rxe/rxe_req.c30
-rw-r--r--drivers/infiniband/sw/rxe/rxe_resp.c17
-rw-r--r--drivers/infiniband/sw/rxe/rxe_verbs.c25
-rw-r--r--drivers/infiniband/ulp/isert/ib_isert.c16
-rw-r--r--drivers/infiniband/ulp/rtrs/rtrs-clt.c55
-rw-r--r--drivers/infiniband/ulp/rtrs/rtrs.c4
31 files changed, 608 insertions, 192 deletions
diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index 5146ef2dbfd9..1ee87c3aaeab 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -3295,7 +3295,7 @@ static int cma_resolve_iboe_route(struct rdma_id_private *id_priv)
route->path_rec->traffic_class = tos;
route->path_rec->mtu = iboe_get_mtu(ndev->mtu);
route->path_rec->rate_selector = IB_SA_EQ;
- route->path_rec->rate = iboe_get_rate(ndev);
+ route->path_rec->rate = IB_RATE_PORT_CURRENT;
dev_put(ndev);
route->path_rec->packet_life_time_selector = IB_SA_EQ;
/* In case ACK timeout is set, use this value to calculate
@@ -4963,7 +4963,7 @@ static int cma_iboe_join_multicast(struct rdma_id_private *id_priv,
if (!ndev)
return -ENODEV;
- ib.rec.rate = iboe_get_rate(ndev);
+ ib.rec.rate = IB_RATE_PORT_CURRENT;
ib.rec.hop_limit = 1;
ib.rec.mtu = iboe_get_mtu(ndev->mtu);
diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
index 4796f6a8828c..e836c9c477f6 100644
--- a/drivers/infiniband/core/uverbs_cmd.c
+++ b/drivers/infiniband/core/uverbs_cmd.c
@@ -1850,8 +1850,13 @@ static int modify_qp(struct uverbs_attr_bundle *attrs,
attr->path_mtu = cmd->base.path_mtu;
if (cmd->base.attr_mask & IB_QP_PATH_MIG_STATE)
attr->path_mig_state = cmd->base.path_mig_state;
- if (cmd->base.attr_mask & IB_QP_QKEY)
+ if (cmd->base.attr_mask & IB_QP_QKEY) {
+ if (cmd->base.qkey & IB_QP_SET_QKEY && !capable(CAP_NET_RAW)) {
+ ret = -EPERM;
+ goto release_qp;
+ }
attr->qkey = cmd->base.qkey;
+ }
if (cmd->base.attr_mask & IB_QP_RQ_PSN)
attr->rq_psn = cmd->base.rq_psn;
if (cmd->base.attr_mask & IB_QP_SQ_PSN)
diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c
index fbace69672ca..7c9c79c13941 100644
--- a/drivers/infiniband/core/uverbs_main.c
+++ b/drivers/infiniband/core/uverbs_main.c
@@ -222,8 +222,12 @@ static ssize_t ib_uverbs_event_read(struct ib_uverbs_event_queue *ev_queue,
spin_lock_irq(&ev_queue->lock);
while (list_empty(&ev_queue->event_list)) {
- spin_unlock_irq(&ev_queue->lock);
+ if (ev_queue->is_closed) {
+ spin_unlock_irq(&ev_queue->lock);
+ return -EIO;
+ }
+ spin_unlock_irq(&ev_queue->lock);
if (filp->f_flags & O_NONBLOCK)
return -EAGAIN;
@@ -233,12 +237,6 @@ static ssize_t ib_uverbs_event_read(struct ib_uverbs_event_queue *ev_queue,
return -ERESTARTSYS;
spin_lock_irq(&ev_queue->lock);
-
- /* If device was disassociated and no event exists set an error */
- if (list_empty(&ev_queue->event_list) && ev_queue->is_closed) {
- spin_unlock_irq(&ev_queue->lock);
- return -EIO;
- }
}
event = list_entry(ev_queue->event_list.next, struct ib_uverbs_event, list);
diff --git a/drivers/infiniband/hw/bnxt_re/bnxt_re.h b/drivers/infiniband/hw/bnxt_re/bnxt_re.h
index 9e278d23eb82..ea81b2497511 100644
--- a/drivers/infiniband/hw/bnxt_re/bnxt_re.h
+++ b/drivers/infiniband/hw/bnxt_re/bnxt_re.h
@@ -136,8 +136,6 @@ struct bnxt_re_dev {
struct delayed_work worker;
u8 cur_prio_map;
- u16 active_speed;
- u8 active_width;
/* FP Notification Queue (CQ & SRQ) */
struct tasklet_struct nq_task;
diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c
index ef47c32a53cb..abef0b8baa7c 100644
--- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c
+++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c
@@ -208,6 +208,7 @@ int bnxt_re_query_port(struct ib_device *ibdev, u32 port_num,
{
struct bnxt_re_dev *rdev = to_bnxt_re_dev(ibdev, ibdev);
struct bnxt_qplib_dev_attr *dev_attr = &rdev->dev_attr;
+ int rc;
memset(port_attr, 0, sizeof(*port_attr));
@@ -237,10 +238,10 @@ int bnxt_re_query_port(struct ib_device *ibdev, u32 port_num,
port_attr->sm_sl = 0;
port_attr->subnet_timeout = 0;
port_attr->init_type_reply = 0;
- port_attr->active_speed = rdev->active_speed;
- port_attr->active_width = rdev->active_width;
+ rc = ib_get_eth_speed(&rdev->ibdev, port_num, &port_attr->active_speed,
+ &port_attr->active_width);
- return 0;
+ return rc;
}
int bnxt_re_get_port_immutable(struct ib_device *ibdev, u32 port_num,
@@ -3413,9 +3414,7 @@ static int bnxt_re_process_raw_qp_pkt_rx(struct bnxt_re_qp *gsi_qp,
udwr.remote_qkey = gsi_sqp->qplib_qp.qkey;
/* post data received in the send queue */
- rc = bnxt_re_post_send_shadow_qp(rdev, gsi_sqp, swr);
-
- return 0;
+ return bnxt_re_post_send_shadow_qp(rdev, gsi_sqp, swr);
}
static void bnxt_re_process_res_rawqp1_wc(struct ib_wc *wc,
diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c
index 729a2f5318cc..b42166fe7454 100644
--- a/drivers/infiniband/hw/bnxt_re/main.c
+++ b/drivers/infiniband/hw/bnxt_re/main.c
@@ -1178,8 +1178,6 @@ static int bnxt_re_ib_init(struct bnxt_re_dev *rdev)
return rc;
}
dev_info(rdev_to_dev(rdev), "Device registered with IB successfully");
- ib_get_eth_speed(&rdev->ibdev, 1, &rdev->active_speed,
- &rdev->active_width);
set_bit(BNXT_RE_FLAG_ISSUE_ROCE_STATS, &rdev->flags);
event = netif_running(rdev->netdev) && netif_carrier_ok(rdev->netdev) ?
@@ -1437,6 +1435,10 @@ static void bnxt_re_setup_cc(struct bnxt_re_dev *rdev, bool enable)
{
struct bnxt_qplib_cc_param cc_param = {};
+ /* Do not enable congestion control on VFs */
+ if (rdev->is_virtfn)
+ return;
+
/* Currently enabling only for GenP5 adapters */
if (!bnxt_qplib_is_chip_gen_p5(rdev->chip_ctx))
return;
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_fp.c b/drivers/infiniband/hw/bnxt_re/qplib_fp.c
index d5d418a8b003..91aed77ce40d 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_fp.c
+++ b/drivers/infiniband/hw/bnxt_re/qplib_fp.c
@@ -2057,6 +2057,12 @@ int bnxt_qplib_create_cq(struct bnxt_qplib_res *res, struct bnxt_qplib_cq *cq)
u32 pg_sz_lvl;
int rc;
+ if (!cq->dpi) {
+ dev_err(&rcfw->pdev->dev,
+ "FP: CREATE_CQ failed due to NULL DPI\n");
+ return -EINVAL;
+ }
+
hwq_attr.res = res;
hwq_attr.depth = cq->max_wqe;
hwq_attr.stride = sizeof(struct cq_base);
@@ -2070,11 +2076,6 @@ int bnxt_qplib_create_cq(struct bnxt_qplib_res *res, struct bnxt_qplib_cq *cq)
CMDQ_BASE_OPCODE_CREATE_CQ,
sizeof(req));
- if (!cq->dpi) {
- dev_err(&rcfw->pdev->dev,
- "FP: CREATE_CQ failed due to NULL DPI\n");
- return -EINVAL;
- }
req.dpi = cpu_to_le32(cq->dpi->dpi);
req.cq_handle = cpu_to_le64(cq->cq_handle);
req.cq_size = cpu_to_le32(cq->hwq.max_elements);
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_res.c b/drivers/infiniband/hw/bnxt_re/qplib_res.c
index 7674136c08b2..5fd8f7c90bb0 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_res.c
+++ b/drivers/infiniband/hw/bnxt_re/qplib_res.c
@@ -215,17 +215,9 @@ int bnxt_qplib_alloc_init_hwq(struct bnxt_qplib_hwq *hwq,
return -EINVAL;
hwq_attr->sginfo->npages = npages;
} else {
- unsigned long sginfo_num_pages = ib_umem_num_dma_blocks(
- hwq_attr->sginfo->umem, hwq_attr->sginfo->pgsize);
-
+ npages = ib_umem_num_dma_blocks(hwq_attr->sginfo->umem,
+ hwq_attr->sginfo->pgsize);
hwq->is_user = true;
- npages = sginfo_num_pages;
- npages = (npages * PAGE_SIZE) /
- BIT_ULL(hwq_attr->sginfo->pgshft);
- if ((sginfo_num_pages * PAGE_SIZE) %
- BIT_ULL(hwq_attr->sginfo->pgshft))
- if (!npages)
- npages++;
}
if (npages == MAX_PBL_LVL_0_PGS && !hwq_attr->sginfo->nopte) {
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_sp.c b/drivers/infiniband/hw/bnxt_re/qplib_sp.c
index d5ad0861c537..ab45f9d4bb02 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_sp.c
+++ b/drivers/infiniband/hw/bnxt_re/qplib_sp.c
@@ -614,16 +614,15 @@ int bnxt_qplib_reg_mr(struct bnxt_qplib_res *res, struct bnxt_qplib_mrw *mr,
/* Free the hwq if it already exist, must be a rereg */
if (mr->hwq.max_elements)
bnxt_qplib_free_hwq(res, &mr->hwq);
- /* Use system PAGE_SIZE */
hwq_attr.res = res;
hwq_attr.depth = pages;
- hwq_attr.stride = buf_pg_size;
+ hwq_attr.stride = sizeof(dma_addr_t);
hwq_attr.type = HWQ_TYPE_MR;
hwq_attr.sginfo = &sginfo;
hwq_attr.sginfo->umem = umem;
hwq_attr.sginfo->npages = pages;
- hwq_attr.sginfo->pgsize = PAGE_SIZE;
- hwq_attr.sginfo->pgshft = PAGE_SHIFT;
+ hwq_attr.sginfo->pgsize = buf_pg_size;
+ hwq_attr.sginfo->pgshft = ilog2(buf_pg_size);
rc = bnxt_qplib_alloc_init_hwq(&mr->hwq, &hwq_attr);
if (rc) {
dev_err(&res->pdev->dev,
diff --git a/drivers/infiniband/hw/efa/efa_verbs.c b/drivers/infiniband/hw/efa/efa_verbs.c
index 8eca6c14d0cf..2a195c4b0f17 100644
--- a/drivers/infiniband/hw/efa/efa_verbs.c
+++ b/drivers/infiniband/hw/efa/efa_verbs.c
@@ -1403,7 +1403,7 @@ static int pbl_continuous_initialize(struct efa_dev *dev,
*/
static int pbl_indirect_initialize(struct efa_dev *dev, struct pbl_context *pbl)
{
- u32 size_in_pages = DIV_ROUND_UP(pbl->pbl_buf_size_in_bytes, PAGE_SIZE);
+ u32 size_in_pages = DIV_ROUND_UP(pbl->pbl_buf_size_in_bytes, EFA_CHUNK_PAYLOAD_SIZE);
struct scatterlist *sgl;
int sg_dma_cnt, err;
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
index eba5fdc10703..8f7eb11066b4 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
@@ -4565,11 +4565,9 @@ static int modify_qp_init_to_rtr(struct ib_qp *ibqp,
mtu = ib_mtu_enum_to_int(ib_mtu);
if (WARN_ON(mtu <= 0))
return -EINVAL;
-#define MAX_LP_MSG_LEN 16384
- /* MTU * (2 ^ LP_PKTN_INI) shouldn't be bigger than 16KB */
- lp_pktn_ini = ilog2(MAX_LP_MSG_LEN / mtu);
- if (WARN_ON(lp_pktn_ini >= 0xF))
- return -EINVAL;
+#define MIN_LP_MSG_LEN 1024
+ /* mtu * (2 ^ lp_pktn_ini) should be in the range of 1024 to mtu */
+ lp_pktn_ini = ilog2(max(mtu, MIN_LP_MSG_LEN) / mtu);
if (attr_mask & IB_QP_PATH_MTU) {
hr_reg_write(context, QPC_MTU, ib_mtu);
@@ -4994,7 +4992,6 @@ static int hns_roce_v2_set_abs_fields(struct ib_qp *ibqp,
static bool check_qp_timeout_cfg_range(struct hns_roce_dev *hr_dev, u8 *timeout)
{
#define QP_ACK_TIMEOUT_MAX_HIP08 20
-#define QP_ACK_TIMEOUT_OFFSET 10
#define QP_ACK_TIMEOUT_MAX 31
if (hr_dev->pci_dev->revision == PCI_REVISION_ID_HIP08) {
@@ -5003,7 +5000,7 @@ static bool check_qp_timeout_cfg_range(struct hns_roce_dev *hr_dev, u8 *timeout)
"local ACK timeout shall be 0 to 20.\n");
return false;
}
- *timeout += QP_ACK_TIMEOUT_OFFSET;
+ *timeout += HNS_ROCE_V2_QP_ACK_TIMEOUT_OFS_HIP08;
} else if (hr_dev->pci_dev->revision > PCI_REVISION_ID_HIP08) {
if (*timeout > QP_ACK_TIMEOUT_MAX) {
ibdev_warn(&hr_dev->ib_dev,
@@ -5289,6 +5286,18 @@ out:
return ret;
}
+static u8 get_qp_timeout_attr(struct hns_roce_dev *hr_dev,
+ struct hns_roce_v2_qp_context *context)
+{
+ u8 timeout;
+
+ timeout = (u8)hr_reg_read(context, QPC_AT);
+ if (hr_dev->pci_dev->revision == PCI_REVISION_ID_HIP08)
+ timeout -= HNS_ROCE_V2_QP_ACK_TIMEOUT_OFS_HIP08;
+
+ return timeout;
+}
+
static int hns_roce_v2_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr,
int qp_attr_mask,
struct ib_qp_init_attr *qp_init_attr)
@@ -5366,7 +5375,7 @@ static int hns_roce_v2_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr,
qp_attr->max_dest_rd_atomic = 1 << hr_reg_read(&context, QPC_RR_MAX);
qp_attr->min_rnr_timer = (u8)hr_reg_read(&context, QPC_MIN_RNR_TIME);
- qp_attr->timeout = (u8)hr_reg_read(&context, QPC_AT);
+ qp_attr->timeout = get_qp_timeout_attr(hr_dev, &context);
qp_attr->retry_cnt = hr_reg_read(&context, QPC_RETRY_NUM_INIT);
qp_attr->rnr_retry = hr_reg_read(&context, QPC_RNR_NUM_INIT);
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h
index 1b44d2434ab4..7033eae2407c 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h
@@ -44,6 +44,8 @@
#define HNS_ROCE_V2_MAX_XRCD_NUM 0x1000000
#define HNS_ROCE_V2_RSV_XRCD_NUM 0
+#define HNS_ROCE_V2_QP_ACK_TIMEOUT_OFS_HIP08 10
+
#define HNS_ROCE_V3_SCCC_SZ 64
#define HNS_ROCE_V3_GMV_ENTRY_SZ 32
diff --git a/drivers/infiniband/hw/hns/hns_roce_mr.c b/drivers/infiniband/hw/hns/hns_roce_mr.c
index 37a5cf62f88b..14376490ac22 100644
--- a/drivers/infiniband/hw/hns/hns_roce_mr.c
+++ b/drivers/infiniband/hw/hns/hns_roce_mr.c
@@ -33,6 +33,7 @@
#include <linux/vmalloc.h>
#include <rdma/ib_umem.h>
+#include <linux/math.h>
#include "hns_roce_device.h"
#include "hns_roce_cmd.h"
#include "hns_roce_hem.h"
@@ -909,6 +910,44 @@ static int mtr_init_buf_cfg(struct hns_roce_dev *hr_dev,
return page_cnt;
}
+static u64 cal_pages_per_l1ba(unsigned int ba_per_bt, unsigned int hopnum)
+{
+ return int_pow(ba_per_bt, hopnum - 1);
+}
+
+static unsigned int cal_best_bt_pg_sz(struct hns_roce_dev *hr_dev,
+ struct hns_roce_mtr *mtr,
+ unsigned int pg_shift)
+{
+ unsigned long cap = hr_dev->caps.page_size_cap;
+ struct hns_roce_buf_region *re;
+ unsigned int pgs_per_l1ba;
+ unsigned int ba_per_bt;
+ unsigned int ba_num;
+ int i;
+
+ for_each_set_bit_from(pg_shift, &cap, sizeof(cap) * BITS_PER_BYTE) {
+ if (!(BIT(pg_shift) & cap))
+ continue;
+
+ ba_per_bt = BIT(pg_shift) / BA_BYTE_LEN;
+ ba_num = 0;
+ for (i = 0; i < mtr->hem_cfg.region_count; i++) {
+ re = &mtr->hem_cfg.region[i];
+ if (re->hopnum == 0)
+ continue;
+
+ pgs_per_l1ba = cal_pages_per_l1ba(ba_per_bt, re->hopnum);
+ ba_num += DIV_ROUND_UP(re->count, pgs_per_l1ba);
+ }
+
+ if (ba_num <= ba_per_bt)
+ return pg_shift;
+ }
+
+ return 0;
+}
+
static int mtr_alloc_mtt(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr,
unsigned int ba_page_shift)
{
@@ -917,6 +956,10 @@ static int mtr_alloc_mtt(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr,
hns_roce_hem_list_init(&mtr->hem_list);
if (!cfg->is_direct) {
+ ba_page_shift = cal_best_bt_pg_sz(hr_dev, mtr, ba_page_shift);
+ if (!ba_page_shift)
+ return -ERANGE;
+
ret = hns_roce_hem_list_request(hr_dev, &mtr->hem_list,
cfg->region, cfg->region_count,
ba_page_shift);
diff --git a/drivers/infiniband/hw/irdma/verbs.c b/drivers/infiniband/hw/irdma/verbs.c
index 6242ab6af77f..9c4fe4fa9001 100644
--- a/drivers/infiniband/hw/irdma/verbs.c
+++ b/drivers/infiniband/hw/irdma/verbs.c
@@ -522,11 +522,6 @@ static int irdma_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata)
if (!iwqp->user_mode)
cancel_delayed_work_sync(&iwqp->dwork_flush);
- irdma_qp_rem_ref(&iwqp->ibqp);
- wait_for_completion(&iwqp->free_qp);
- irdma_free_lsmm_rsrc(iwqp);
- irdma_cqp_qp_destroy_cmd(&iwdev->rf->sc_dev, &iwqp->sc_qp);
-
if (!iwqp->user_mode) {
if (iwqp->iwscq) {
irdma_clean_cqes(iwqp, iwqp->iwscq);
@@ -534,6 +529,12 @@ static int irdma_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata)
irdma_clean_cqes(iwqp, iwqp->iwrcq);
}
}
+
+ irdma_qp_rem_ref(&iwqp->ibqp);
+ wait_for_completion(&iwqp->free_qp);
+ irdma_free_lsmm_rsrc(iwqp);
+ irdma_cqp_qp_destroy_cmd(&iwdev->rf->sc_dev, &iwqp->sc_qp);
+
irdma_remove_push_mmap_entries(iwqp);
irdma_free_qp_rsrc(iwqp);
@@ -3291,6 +3292,7 @@ static int irdma_post_send(struct ib_qp *ibqp,
break;
case IB_WR_LOCAL_INV:
info.op_type = IRDMA_OP_TYPE_INV_STAG;
+ info.local_fence = info.read_fence;
info.op.inv_local_stag.target_stag = ib_wr->ex.invalidate_rkey;
err = irdma_uk_stag_local_invalidate(ukqp, &info, true);
break;
diff --git a/drivers/infiniband/hw/mlx5/counters.c b/drivers/infiniband/hw/mlx5/counters.c
index 1c06920505d2..93257fa5aae8 100644
--- a/drivers/infiniband/hw/mlx5/counters.c
+++ b/drivers/infiniband/hw/mlx5/counters.c
@@ -209,7 +209,8 @@ static const struct mlx5_ib_counters *get_counters(struct mlx5_ib_dev *dev,
!vport_qcounters_supported(dev)) || !port_num)
return &dev->port[0].cnts;
- return &dev->port[port_num - 1].cnts;
+ return is_mdev_switchdev_mode(dev->mdev) ?
+ &dev->port[1].cnts : &dev->port[port_num - 1].cnts;
}
/**
@@ -262,7 +263,7 @@ static struct rdma_hw_stats *
mlx5_ib_alloc_hw_port_stats(struct ib_device *ibdev, u32 port_num)
{
struct mlx5_ib_dev *dev = to_mdev(ibdev);
- const struct mlx5_ib_counters *cnts = &dev->port[port_num - 1].cnts;
+ const struct mlx5_ib_counters *cnts = get_counters(dev, port_num);
return do_alloc_stats(cnts);
}
@@ -329,6 +330,7 @@ static int mlx5_ib_query_q_counters_vport(struct mlx5_ib_dev *dev,
{
u32 out[MLX5_ST_SZ_DW(query_q_counter_out)] = {};
u32 in[MLX5_ST_SZ_DW(query_q_counter_in)] = {};
+ struct mlx5_core_dev *mdev;
__be32 val;
int ret, i;
@@ -336,12 +338,16 @@ static int mlx5_ib_query_q_counters_vport(struct mlx5_ib_dev *dev,
dev->port[port_num].rep->vport == MLX5_VPORT_UPLINK)
return 0;
+ mdev = mlx5_eswitch_get_core_dev(dev->port[port_num].rep->esw);
+ if (!mdev)
+ return -EOPNOTSUPP;
+
MLX5_SET(query_q_counter_in, in, opcode, MLX5_CMD_OP_QUERY_Q_COUNTER);
MLX5_SET(query_q_counter_in, in, other_vport, 1);
MLX5_SET(query_q_counter_in, in, vport_number,
dev->port[port_num].rep->vport);
MLX5_SET(query_q_counter_in, in, aggregate, 1);
- ret = mlx5_cmd_exec_inout(dev->mdev, query_q_counter, in, out);
+ ret = mlx5_cmd_exec_inout(mdev, query_q_counter, in, out);
if (ret)
return ret;
@@ -575,43 +581,53 @@ static void mlx5_ib_fill_counters(struct mlx5_ib_dev *dev,
bool is_vport = is_mdev_switchdev_mode(dev->mdev) &&
port_num != MLX5_VPORT_PF;
const struct mlx5_ib_counter *names;
- int j = 0, i;
+ int j = 0, i, size;
names = is_vport ? vport_basic_q_cnts : basic_q_cnts;
- for (i = 0; i < ARRAY_SIZE(basic_q_cnts); i++, j++) {
+ size = is_vport ? ARRAY_SIZE(vport_basic_q_cnts) :
+ ARRAY_SIZE(basic_q_cnts);
+ for (i = 0; i < size; i++, j++) {
descs[j].name = names[i].name;
- offsets[j] = basic_q_cnts[i].offset;
+ offsets[j] = names[i].offset;
}
names = is_vport ? vport_out_of_seq_q_cnts : out_of_seq_q_cnts;
+ size = is_vport ? ARRAY_SIZE(vport_out_of_seq_q_cnts) :
+ ARRAY_SIZE(out_of_seq_q_cnts);
if (MLX5_CAP_GEN(dev->mdev, out_of_seq_cnt)) {
- for (i = 0; i < ARRAY_SIZE(out_of_seq_q_cnts); i++, j++) {
+ for (i = 0; i < size; i++, j++) {
descs[j].name = names[i].name;
- offsets[j] = out_of_seq_q_cnts[i].offset;
+ offsets[j] = names[i].offset;
}
}
names = is_vport ? vport_retrans_q_cnts : retrans_q_cnts;
+ size = is_vport ? ARRAY_SIZE(vport_retrans_q_cnts) :
+ ARRAY_SIZE(retrans_q_cnts);
if (MLX5_CAP_GEN(dev->mdev, retransmission_q_counters)) {
- for (i = 0; i < ARRAY_SIZE(retrans_q_cnts); i++, j++) {
+ for (i = 0; i < size; i++, j++) {
descs[j].name = names[i].name;
- offsets[j] = retrans_q_cnts[i].offset;
+ offsets[j] = names[i].offset;
}
}
names = is_vport ? vport_extended_err_cnts : extended_err_cnts;
+ size = is_vport ? ARRAY_SIZE(vport_extended_err_cnts) :
+ ARRAY_SIZE(extended_err_cnts);
if (MLX5_CAP_GEN(dev->mdev, enhanced_error_q_counters)) {
- for (i = 0; i < ARRAY_SIZE(extended_err_cnts); i++, j++) {
+ for (i = 0; i < size; i++, j++) {
descs[j].name = names[i].name;
- offsets[j] = extended_err_cnts[i].offset;
+ offsets[j] = names[i].offset;
}
}
names = is_vport ? vport_roce_accl_cnts : roce_accl_cnts;
+ size = is_vport ? ARRAY_SIZE(vport_roce_accl_cnts) :
+ ARRAY_SIZE(roce_accl_cnts);
if (MLX5_CAP_GEN(dev->mdev, roce_accl)) {
- for (i = 0; i < ARRAY_SIZE(roce_accl_cnts); i++, j++) {
+ for (i = 0; i < size; i++, j++) {
descs[j].name = names[i].name;
- offsets[j] = roce_accl_cnts[i].offset;
+ offsets[j] = names[i].offset;
}
}
@@ -661,25 +677,37 @@ static void mlx5_ib_fill_counters(struct mlx5_ib_dev *dev,
static int __mlx5_ib_alloc_counters(struct mlx5_ib_dev *dev,
struct mlx5_ib_counters *cnts, u32 port_num)
{
- u32 num_counters, num_op_counters = 0;
+ bool is_vport = is_mdev_switchdev_mode(dev->mdev) &&
+ port_num != MLX5_VPORT_PF;
+ u32 num_counters, num_op_counters = 0, size;
- num_counters = ARRAY_SIZE(basic_q_cnts);
+ size = is_vport ? ARRAY_SIZE(vport_basic_q_cnts) :
+ ARRAY_SIZE(basic_q_cnts);
+ num_counters = size;
+ size = is_vport ? ARRAY_SIZE(vport_out_of_seq_q_cnts) :
+ ARRAY_SIZE(out_of_seq_q_cnts);
if (MLX5_CAP_GEN(dev->mdev, out_of_seq_cnt))
- num_counters += ARRAY_SIZE(out_of_seq_q_cnts);
+ num_counters += size;
+ size = is_vport ? ARRAY_SIZE(vport_retrans_q_cnts) :
+ ARRAY_SIZE(retrans_q_cnts);
if (MLX5_CAP_GEN(dev->mdev, retransmission_q_counters))
- num_counters += ARRAY_SIZE(retrans_q_cnts);
+ num_counters += size;
+ size = is_vport ? ARRAY_SIZE(vport_extended_err_cnts) :
+ ARRAY_SIZE(extended_err_cnts);
if (MLX5_CAP_GEN(dev->mdev, enhanced_error_q_counters))
- num_counters += ARRAY_SIZE(extended_err_cnts);
+ num_counters += size;
+ size = is_vport ? ARRAY_SIZE(vport_roce_accl_cnts) :
+ ARRAY_SIZE(roce_accl_cnts);
if (MLX5_CAP_GEN(dev->mdev, roce_accl))
- num_counters += ARRAY_SIZE(roce_accl_cnts);
+ num_counters += size;
cnts->num_q_counters = num_counters;
- if (is_mdev_switchdev_mode(dev->mdev) && port_num != MLX5_VPORT_PF)
+ if (is_vport)
goto skip_non_qcounters;
if (MLX5_CAP_GEN(dev->mdev, cc_query_allowed)) {
@@ -725,11 +753,11 @@ err:
static void mlx5_ib_dealloc_counters(struct mlx5_ib_dev *dev)
{
u32 in[MLX5_ST_SZ_DW(dealloc_q_counter_in)] = {};
- int num_cnt_ports;
+ int num_cnt_ports = dev->num_ports;
int i, j;
- num_cnt_ports = (!is_mdev_switchdev_mode(dev->mdev) ||
- vport_qcounters_supported(dev)) ? dev->num_ports : 1;
+ if (is_mdev_switchdev_mode(dev->mdev))
+ num_cnt_ports = min(2, num_cnt_ports);
MLX5_SET(dealloc_q_counter_in, in, opcode,
MLX5_CMD_OP_DEALLOC_Q_COUNTER);
@@ -761,15 +789,22 @@ static int mlx5_ib_alloc_counters(struct mlx5_ib_dev *dev)
{
u32 out[MLX5_ST_SZ_DW(alloc_q_counter_out)] = {};
u32 in[MLX5_ST_SZ_DW(alloc_q_counter_in)] = {};
- int num_cnt_ports;
+ int num_cnt_ports = dev->num_ports;
int err = 0;
int i;
bool is_shared;
MLX5_SET(alloc_q_counter_in, in, opcode, MLX5_CMD_OP_ALLOC_Q_COUNTER);
is_shared = MLX5_CAP_GEN(dev->mdev, log_max_uctx) != 0;
- num_cnt_ports = (!is_mdev_switchdev_mode(dev->mdev) ||
- vport_qcounters_supported(dev)) ? dev->num_ports : 1;
+
+ /*
+ * In switchdev we need to allocate two ports, one that is used for
+ * the device Q_counters and it is essentially the real Q_counters of
+ * this device, while the other is used as a helper for PF to be able to
+ * query all other vports.
+ */
+ if (is_mdev_switchdev_mode(dev->mdev))
+ num_cnt_ports = min(2, num_cnt_ports);
for (i = 0; i < num_cnt_ports; i++) {
err = __mlx5_ib_alloc_counters(dev, &dev->port[i].cnts, i);
diff --git a/drivers/infiniband/hw/mlx5/fs.c b/drivers/infiniband/hw/mlx5/fs.c
index 3008632a6c20..1e419e080b53 100644
--- a/drivers/infiniband/hw/mlx5/fs.c
+++ b/drivers/infiniband/hw/mlx5/fs.c
@@ -695,8 +695,6 @@ static struct mlx5_ib_flow_prio *_get_prio(struct mlx5_ib_dev *dev,
struct mlx5_flow_table_attr ft_attr = {};
struct mlx5_flow_table *ft;
- if (mlx5_ib_shared_ft_allowed(&dev->ib_dev))
- ft_attr.uid = MLX5_SHARED_RESOURCE_UID;
ft_attr.prio = priority;
ft_attr.max_fte = num_entries;
ft_attr.flags = flags;
@@ -2025,6 +2023,237 @@ static int flow_matcher_cleanup(struct ib_uobject *uobject,
return 0;
}
+static int steering_anchor_create_ft(struct mlx5_ib_dev *dev,
+ struct mlx5_ib_flow_prio *ft_prio,
+ enum mlx5_flow_namespace_type ns_type)
+{
+ struct mlx5_flow_table_attr ft_attr = {};
+ struct mlx5_flow_namespace *ns;
+ struct mlx5_flow_table *ft;
+
+ if (ft_prio->anchor.ft)
+ return 0;
+
+ ns = mlx5_get_flow_namespace(dev->mdev, ns_type);
+ if (!ns)
+ return -EOPNOTSUPP;
+
+ ft_attr.flags = MLX5_FLOW_TABLE_UNMANAGED;
+ ft_attr.uid = MLX5_SHARED_RESOURCE_UID;
+ ft_attr.prio = 0;
+ ft_attr.max_fte = 2;
+ ft_attr.level = 1;
+
+ ft = mlx5_create_flow_table(ns, &ft_attr);
+ if (IS_ERR(ft))
+ return PTR_ERR(ft);
+
+ ft_prio->anchor.ft = ft;
+
+ return 0;
+}
+
+static void steering_anchor_destroy_ft(struct mlx5_ib_flow_prio *ft_prio)
+{
+ if (ft_prio->anchor.ft) {
+ mlx5_destroy_flow_table(ft_prio->anchor.ft);
+ ft_prio->anchor.ft = NULL;
+ }
+}
+
+static int
+steering_anchor_create_fg_drop(struct mlx5_ib_flow_prio *ft_prio)
+{
+ int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
+ struct mlx5_flow_group *fg;
+ void *flow_group_in;
+ int err = 0;
+
+ if (ft_prio->anchor.fg_drop)
+ return 0;
+
+ flow_group_in = kvzalloc(inlen, GFP_KERNEL);
+ if (!flow_group_in)
+ return -ENOMEM;
+
+ MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 1);
+ MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 1);
+
+ fg = mlx5_create_flow_group(ft_prio->anchor.ft, flow_group_in);
+ if (IS_ERR(fg)) {
+ err = PTR_ERR(fg);
+ goto out;
+ }
+
+ ft_prio->anchor.fg_drop = fg;
+
+out:
+ kvfree(flow_group_in);
+
+ return err;
+}
+
+static void
+steering_anchor_destroy_fg_drop(struct mlx5_ib_flow_prio *ft_prio)
+{
+ if (ft_prio->anchor.fg_drop) {
+ mlx5_destroy_flow_group(ft_prio->anchor.fg_drop);
+ ft_prio->anchor.fg_drop = NULL;
+ }
+}
+
+static int
+steering_anchor_create_fg_goto_table(struct mlx5_ib_flow_prio *ft_prio)
+{
+ int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
+ struct mlx5_flow_group *fg;
+ void *flow_group_in;
+ int err = 0;
+
+ if (ft_prio->anchor.fg_goto_table)
+ return 0;
+
+ flow_group_in = kvzalloc(inlen, GFP_KERNEL);
+ if (!flow_group_in)
+ return -ENOMEM;
+
+ fg = mlx5_create_flow_group(ft_prio->anchor.ft, flow_group_in);
+ if (IS_ERR(fg)) {
+ err = PTR_ERR(fg);
+ goto out;
+ }
+ ft_prio->anchor.fg_goto_table = fg;
+
+out:
+ kvfree(flow_group_in);
+
+ return err;
+}
+
+static void
+steering_anchor_destroy_fg_goto_table(struct mlx5_ib_flow_prio *ft_prio)
+{
+ if (ft_prio->anchor.fg_goto_table) {
+ mlx5_destroy_flow_group(ft_prio->anchor.fg_goto_table);
+ ft_prio->anchor.fg_goto_table = NULL;
+ }
+}
+
+static int
+steering_anchor_create_rule_drop(struct mlx5_ib_flow_prio *ft_prio)
+{
+ struct mlx5_flow_act flow_act = {};
+ struct mlx5_flow_handle *handle;
+
+ if (ft_prio->anchor.rule_drop)
+ return 0;
+
+ flow_act.fg = ft_prio->anchor.fg_drop;
+ flow_act.action = MLX5_FLOW_CONTEXT_ACTION_DROP;
+
+ handle = mlx5_add_flow_rules(ft_prio->anchor.ft, NULL, &flow_act,
+ NULL, 0);
+ if (IS_ERR(handle))
+ return PTR_ERR(handle);
+
+ ft_prio->anchor.rule_drop = handle;
+
+ return 0;
+}
+
+static void steering_anchor_destroy_rule_drop(struct mlx5_ib_flow_prio *ft_prio)
+{
+ if (ft_prio->anchor.rule_drop) {
+ mlx5_del_flow_rules(ft_prio->anchor.rule_drop);
+ ft_prio->anchor.rule_drop = NULL;
+ }
+}
+
+static int
+steering_anchor_create_rule_goto_table(struct mlx5_ib_flow_prio *ft_prio)
+{
+ struct mlx5_flow_destination dest = {};
+ struct mlx5_flow_act flow_act = {};
+ struct mlx5_flow_handle *handle;
+
+ if (ft_prio->anchor.rule_goto_table)
+ return 0;
+
+ flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
+ flow_act.flags |= FLOW_ACT_IGNORE_FLOW_LEVEL;
+ flow_act.fg = ft_prio->anchor.fg_goto_table;
+
+ dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
+ dest.ft = ft_prio->flow_table;
+
+ handle = mlx5_add_flow_rules(ft_prio->anchor.ft, NULL, &flow_act,
+ &dest, 1);
+ if (IS_ERR(handle))
+ return PTR_ERR(handle);
+
+ ft_prio->anchor.rule_goto_table = handle;
+
+ return 0;
+}
+
+static void
+steering_anchor_destroy_rule_goto_table(struct mlx5_ib_flow_prio *ft_prio)
+{
+ if (ft_prio->anchor.rule_goto_table) {
+ mlx5_del_flow_rules(ft_prio->anchor.rule_goto_table);
+ ft_prio->anchor.rule_goto_table = NULL;
+ }
+}
+
+static int steering_anchor_create_res(struct mlx5_ib_dev *dev,
+ struct mlx5_ib_flow_prio *ft_prio,
+ enum mlx5_flow_namespace_type ns_type)
+{
+ int err;
+
+ err = steering_anchor_create_ft(dev, ft_prio, ns_type);
+ if (err)
+ return err;
+
+ err = steering_anchor_create_fg_drop(ft_prio);
+ if (err)
+ goto destroy_ft;
+
+ err = steering_anchor_create_fg_goto_table(ft_prio);
+ if (err)
+ goto destroy_fg_drop;
+
+ err = steering_anchor_create_rule_drop(ft_prio);
+ if (err)
+ goto destroy_fg_goto_table;
+
+ err = steering_anchor_create_rule_goto_table(ft_prio);
+ if (err)
+ goto destroy_rule_drop;
+
+ return 0;
+
+destroy_rule_drop:
+ steering_anchor_destroy_rule_drop(ft_prio);
+destroy_fg_goto_table:
+ steering_anchor_destroy_fg_goto_table(ft_prio);
+destroy_fg_drop:
+ steering_anchor_destroy_fg_drop(ft_prio);
+destroy_ft:
+ steering_anchor_destroy_ft(ft_prio);
+
+ return err;
+}
+
+static void mlx5_steering_anchor_destroy_res(struct mlx5_ib_flow_prio *ft_prio)
+{
+ steering_anchor_destroy_rule_goto_table(ft_prio);
+ steering_anchor_destroy_rule_drop(ft_prio);
+ steering_anchor_destroy_fg_goto_table(ft_prio);
+ steering_anchor_destroy_fg_drop(ft_prio);
+ steering_anchor_destroy_ft(ft_prio);
+}
+
static int steering_anchor_cleanup(struct ib_uobject *uobject,
enum rdma_remove_reason why,
struct uverbs_attr_bundle *attrs)
@@ -2035,6 +2264,9 @@ static int steering_anchor_cleanup(struct ib_uobject *uobject,
return -EBUSY;
mutex_lock(&obj->dev->flow_db->lock);
+ if (!--obj->ft_prio->anchor.rule_goto_table_ref)
+ steering_anchor_destroy_rule_goto_table(obj->ft_prio);
+
put_flow_table(obj->dev, obj->ft_prio, true);
mutex_unlock(&obj->dev->flow_db->lock);
@@ -2042,6 +2274,24 @@ static int steering_anchor_cleanup(struct ib_uobject *uobject,
return 0;
}
+static void fs_cleanup_anchor(struct mlx5_ib_flow_prio *prio,
+ int count)
+{
+ while (count--)
+ mlx5_steering_anchor_destroy_res(&prio[count]);
+}
+
+void mlx5_ib_fs_cleanup_anchor(struct mlx5_ib_dev *dev)
+{
+ fs_cleanup_anchor(dev->flow_db->prios, MLX5_IB_NUM_FLOW_FT);
+ fs_cleanup_anchor(dev->flow_db->egress_prios, MLX5_IB_NUM_FLOW_FT);
+ fs_cleanup_anchor(dev->flow_db->sniffer, MLX5_IB_NUM_SNIFFER_FTS);
+ fs_cleanup_anchor(dev->flow_db->egress, MLX5_IB_NUM_EGRESS_FTS);
+ fs_cleanup_anchor(dev->flow_db->fdb, MLX5_IB_NUM_FDB_FTS);
+ fs_cleanup_anchor(dev->flow_db->rdma_rx, MLX5_IB_NUM_FLOW_FT);
+ fs_cleanup_anchor(dev->flow_db->rdma_tx, MLX5_IB_NUM_FLOW_FT);
+}
+
static int mlx5_ib_matcher_ns(struct uverbs_attr_bundle *attrs,
struct mlx5_ib_flow_matcher *obj)
{
@@ -2182,21 +2432,31 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_STEERING_ANCHOR_CREATE)(
return -ENOMEM;
mutex_lock(&dev->flow_db->lock);
+
ft_prio = _get_flow_table(dev, priority, ns_type, 0);
if (IS_ERR(ft_prio)) {
- mutex_unlock(&dev->flow_db->lock);
err = PTR_ERR(ft_prio);
goto free_obj;
}
ft_prio->refcount++;
- ft_id = mlx5_flow_table_id(ft_prio->flow_table);
- mutex_unlock(&dev->flow_db->lock);
+
+ if (!ft_prio->anchor.rule_goto_table_ref) {
+ err = steering_anchor_create_res(dev, ft_prio, ns_type);
+ if (err)
+ goto put_flow_table;
+ }
+
+ ft_prio->anchor.rule_goto_table_ref++;
+
+ ft_id = mlx5_flow_table_id(ft_prio->anchor.ft);
err = uverbs_copy_to(attrs, MLX5_IB_ATTR_STEERING_ANCHOR_FT_ID,
&ft_id, sizeof(ft_id));
if (err)
- goto put_flow_table;
+ goto destroy_res;
+
+ mutex_unlock(&dev->flow_db->lock);
uobj->object = obj;
obj->dev = dev;
@@ -2205,8 +2465,10 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_STEERING_ANCHOR_CREATE)(
return 0;
+destroy_res:
+ --ft_prio->anchor.rule_goto_table_ref;
+ mlx5_steering_anchor_destroy_res(ft_prio);
put_flow_table:
- mutex_lock(&dev->flow_db->lock);
put_flow_table(dev, ft_prio, true);
mutex_unlock(&dev->flow_db->lock);
free_obj:
diff --git a/drivers/infiniband/hw/mlx5/fs.h b/drivers/infiniband/hw/mlx5/fs.h
index ad320adaf321..b9734904f5f0 100644
--- a/drivers/infiniband/hw/mlx5/fs.h
+++ b/drivers/infiniband/hw/mlx5/fs.h
@@ -10,6 +10,7 @@
#if IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS)
int mlx5_ib_fs_init(struct mlx5_ib_dev *dev);
+void mlx5_ib_fs_cleanup_anchor(struct mlx5_ib_dev *dev);
#else
static inline int mlx5_ib_fs_init(struct mlx5_ib_dev *dev)
{
@@ -21,9 +22,24 @@ static inline int mlx5_ib_fs_init(struct mlx5_ib_dev *dev)
mutex_init(&dev->flow_db->lock);
return 0;
}
+
+inline void mlx5_ib_fs_cleanup_anchor(struct mlx5_ib_dev *dev) {}
#endif
+
static inline void mlx5_ib_fs_cleanup(struct mlx5_ib_dev *dev)
{
+ /* When a steering anchor is created, a special flow table is also
+ * created for the user to reference. Since the user can reference it,
+ * the kernel cannot trust that when the user destroys the steering
+ * anchor, they no longer reference the flow table.
+ *
+ * To address this issue, when a user destroys a steering anchor, only
+ * the flow steering rule in the table is destroyed, but the table
+ * itself is kept to deal with the above scenario. The remaining
+ * resources are only removed when the RDMA device is destroyed, which
+ * is a safe assumption that all references are gone.
+ */
+ mlx5_ib_fs_cleanup_anchor(dev);
kfree(dev->flow_db);
}
#endif /* _MLX5_IB_FS_H */
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index 5d45de223c43..f0b394ed7452 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -4275,6 +4275,9 @@ const struct mlx5_ib_profile raw_eth_profile = {
STAGE_CREATE(MLX5_IB_STAGE_POST_IB_REG_UMR,
mlx5_ib_stage_post_ib_reg_umr_init,
NULL),
+ STAGE_CREATE(MLX5_IB_STAGE_DELAY_DROP,
+ mlx5_ib_stage_delay_drop_init,
+ mlx5_ib_stage_delay_drop_cleanup),
STAGE_CREATE(MLX5_IB_STAGE_RESTRACK,
mlx5_ib_restrack_init,
NULL),
diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index 4bceef878c43..9c33d960af3c 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -238,8 +238,19 @@ enum {
#define MLX5_IB_NUM_SNIFFER_FTS 2
#define MLX5_IB_NUM_EGRESS_FTS 1
#define MLX5_IB_NUM_FDB_FTS MLX5_BY_PASS_NUM_REGULAR_PRIOS
+
+struct mlx5_ib_anchor {
+ struct mlx5_flow_table *ft;
+ struct mlx5_flow_group *fg_goto_table;
+ struct mlx5_flow_group *fg_drop;
+ struct mlx5_flow_handle *rule_goto_table;
+ struct mlx5_flow_handle *rule_drop;
+ unsigned int rule_goto_table_ref;
+};
+
struct mlx5_ib_flow_prio {
struct mlx5_flow_table *flow_table;
+ struct mlx5_ib_anchor anchor;
unsigned int refcount;
};
@@ -1588,6 +1599,9 @@ static inline bool mlx5_ib_lag_should_assign_affinity(struct mlx5_ib_dev *dev)
MLX5_CAP_PORT_SELECTION(dev->mdev, port_select_flow_table_bypass))
return 0;
+ if (mlx5_lag_is_lacp_owner(dev->mdev) && !dev->lag_active)
+ return 0;
+
return dev->lag_active ||
(MLX5_CAP_GEN(dev->mdev, num_lag_ports) > 1 &&
MLX5_CAP_GEN(dev->mdev, lag_tx_port_affinity));
diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c
index 70ca8ffa9256..78b96bfb4e6a 100644
--- a/drivers/infiniband/hw/mlx5/qp.c
+++ b/drivers/infiniband/hw/mlx5/qp.c
@@ -1237,6 +1237,9 @@ static int create_raw_packet_qp_tis(struct mlx5_ib_dev *dev,
MLX5_SET(create_tis_in, in, uid, to_mpd(pd)->uid);
MLX5_SET(tisc, tisc, transport_domain, tdn);
+ if (!mlx5_ib_lag_should_assign_affinity(dev) &&
+ mlx5_lag_is_lacp_owner(dev->mdev))
+ MLX5_SET(tisc, tisc, strict_lag_tx_port_affinity, 1);
if (qp->flags & IB_QP_CREATE_SOURCE_QPN)
MLX5_SET(tisc, tisc, underlay_qpn, qp->underlay_qpn);
diff --git a/drivers/infiniband/sw/rxe/rxe_comp.c b/drivers/infiniband/sw/rxe/rxe_comp.c
index 0c0ae214c3a9..5111735aafae 100644
--- a/drivers/infiniband/sw/rxe/rxe_comp.c
+++ b/drivers/infiniband/sw/rxe/rxe_comp.c
@@ -115,15 +115,16 @@ static enum ib_wc_opcode wr_to_wc_opcode(enum ib_wr_opcode opcode)
void retransmit_timer(struct timer_list *t)
{
struct rxe_qp *qp = from_timer(qp, t, retrans_timer);
+ unsigned long flags;
rxe_dbg_qp(qp, "retransmit timer fired\n");
- spin_lock_bh(&qp->state_lock);
+ spin_lock_irqsave(&qp->state_lock, flags);
if (qp->valid) {
qp->comp.timeout = 1;
rxe_sched_task(&qp->comp.task);
}
- spin_unlock_bh(&qp->state_lock);
+ spin_unlock_irqrestore(&qp->state_lock, flags);
}
void rxe_comp_queue_pkt(struct rxe_qp *qp, struct sk_buff *skb)
@@ -481,11 +482,13 @@ static void do_complete(struct rxe_qp *qp, struct rxe_send_wqe *wqe)
static void comp_check_sq_drain_done(struct rxe_qp *qp)
{
- spin_lock_bh(&qp->state_lock);
+ unsigned long flags;
+
+ spin_lock_irqsave(&qp->state_lock, flags);
if (unlikely(qp_state(qp) == IB_QPS_SQD)) {
if (qp->attr.sq_draining && qp->comp.psn == qp->req.psn) {
qp->attr.sq_draining = 0;
- spin_unlock_bh(&qp->state_lock);
+ spin_unlock_irqrestore(&qp->state_lock, flags);
if (qp->ibqp.event_handler) {
struct ib_event ev;
@@ -499,7 +502,7 @@ static void comp_check_sq_drain_done(struct rxe_qp *qp)
return;
}
}
- spin_unlock_bh(&qp->state_lock);
+ spin_unlock_irqrestore(&qp->state_lock, flags);
}
static inline enum comp_state complete_ack(struct rxe_qp *qp,
@@ -625,13 +628,15 @@ static void free_pkt(struct rxe_pkt_info *pkt)
*/
static void reset_retry_timer(struct rxe_qp *qp)
{
+ unsigned long flags;
+
if (qp_type(qp) == IB_QPT_RC && qp->qp_timeout_jiffies) {
- spin_lock_bh(&qp->state_lock);
+ spin_lock_irqsave(&qp->state_lock, flags);
if (qp_state(qp) >= IB_QPS_RTS &&
psn_compare(qp->req.psn, qp->comp.psn) > 0)
mod_timer(&qp->retrans_timer,
jiffies + qp->qp_timeout_jiffies);
- spin_unlock_bh(&qp->state_lock);
+ spin_unlock_irqrestore(&qp->state_lock, flags);
}
}
@@ -643,18 +648,19 @@ int rxe_completer(struct rxe_qp *qp)
struct rxe_pkt_info *pkt = NULL;
enum comp_state state;
int ret;
+ unsigned long flags;
- spin_lock_bh(&qp->state_lock);
+ spin_lock_irqsave(&qp->state_lock, flags);
if (!qp->valid || qp_state(qp) == IB_QPS_ERR ||
qp_state(qp) == IB_QPS_RESET) {
bool notify = qp->valid && (qp_state(qp) == IB_QPS_ERR);
drain_resp_pkts(qp);
flush_send_queue(qp, notify);
- spin_unlock_bh(&qp->state_lock);
+ spin_unlock_irqrestore(&qp->state_lock, flags);
goto exit;
}
- spin_unlock_bh(&qp->state_lock);
+ spin_unlock_irqrestore(&qp->state_lock, flags);
if (qp->comp.timeout) {
qp->comp.timeout_retry = 1;
diff --git a/drivers/infiniband/sw/rxe/rxe_cq.c b/drivers/infiniband/sw/rxe/rxe_cq.c
index 31a25aaa44a0..d5486cbb3f10 100644
--- a/drivers/infiniband/sw/rxe/rxe_cq.c
+++ b/drivers/infiniband/sw/rxe/rxe_cq.c
@@ -113,14 +113,14 @@ int rxe_cq_post(struct rxe_cq *cq, struct rxe_cqe *cqe, int solicited)
queue_advance_producer(cq->queue, QUEUE_TYPE_TO_CLIENT);
- spin_unlock_irqrestore(&cq->cq_lock, flags);
-
if ((cq->notify & IB_CQ_NEXT_COMP) ||
(cq->notify & IB_CQ_SOLICITED && solicited)) {
cq->notify = 0;
cq->ibcq.comp_handler(&cq->ibcq, cq->ibcq.cq_context);
}
+ spin_unlock_irqrestore(&cq->cq_lock, flags);
+
return 0;
}
diff --git a/drivers/infiniband/sw/rxe/rxe_net.c b/drivers/infiniband/sw/rxe/rxe_net.c
index 2bc7361152ea..cd59666158b1 100644
--- a/drivers/infiniband/sw/rxe/rxe_net.c
+++ b/drivers/infiniband/sw/rxe/rxe_net.c
@@ -159,6 +159,9 @@ static int rxe_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
pkt->mask = RXE_GRH_MASK;
pkt->paylen = be16_to_cpu(udph->len) - sizeof(*udph);
+ /* remove udp header */
+ skb_pull(skb, sizeof(struct udphdr));
+
rxe_rcv(skb);
return 0;
@@ -401,6 +404,9 @@ static int rxe_loopback(struct sk_buff *skb, struct rxe_pkt_info *pkt)
return -EIO;
}
+ /* remove udp header */
+ skb_pull(skb, sizeof(struct udphdr));
+
rxe_rcv(skb);
return 0;
@@ -412,15 +418,16 @@ int rxe_xmit_packet(struct rxe_qp *qp, struct rxe_pkt_info *pkt,
int err;
int is_request = pkt->mask & RXE_REQ_MASK;
struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
+ unsigned long flags;
- spin_lock_bh(&qp->state_lock);
+ spin_lock_irqsave(&qp->state_lock, flags);
if ((is_request && (qp_state(qp) < IB_QPS_RTS)) ||
(!is_request && (qp_state(qp) < IB_QPS_RTR))) {
- spin_unlock_bh(&qp->state_lock);
+ spin_unlock_irqrestore(&qp->state_lock, flags);
rxe_dbg_qp(qp, "Packet dropped. QP is not in ready state\n");
goto drop;
}
- spin_unlock_bh(&qp->state_lock);
+ spin_unlock_irqrestore(&qp->state_lock, flags);
rxe_icrc_generate(skb, pkt);
diff --git a/drivers/infiniband/sw/rxe/rxe_qp.c b/drivers/infiniband/sw/rxe/rxe_qp.c
index 95d4a6760c33..a569b111a9d2 100644
--- a/drivers/infiniband/sw/rxe/rxe_qp.c
+++ b/drivers/infiniband/sw/rxe/rxe_qp.c
@@ -176,6 +176,9 @@ static void rxe_qp_init_misc(struct rxe_dev *rxe, struct rxe_qp *qp,
spin_lock_init(&qp->rq.producer_lock);
spin_lock_init(&qp->rq.consumer_lock);
+ skb_queue_head_init(&qp->req_pkts);
+ skb_queue_head_init(&qp->resp_pkts);
+
atomic_set(&qp->ssn, 0);
atomic_set(&qp->skb_out, 0);
}
@@ -234,8 +237,6 @@ static int rxe_qp_init_req(struct rxe_dev *rxe, struct rxe_qp *qp,
qp->req.opcode = -1;
qp->comp.opcode = -1;
- skb_queue_head_init(&qp->req_pkts);
-
rxe_init_task(&qp->req.task, qp, rxe_requester);
rxe_init_task(&qp->comp.task, qp, rxe_completer);
@@ -279,8 +280,6 @@ static int rxe_qp_init_resp(struct rxe_dev *rxe, struct rxe_qp *qp,
}
}
- skb_queue_head_init(&qp->resp_pkts);
-
rxe_init_task(&qp->resp.task, qp, rxe_responder);
qp->resp.opcode = OPCODE_NONE;
@@ -300,6 +299,7 @@ int rxe_qp_from_init(struct rxe_dev *rxe, struct rxe_qp *qp, struct rxe_pd *pd,
struct rxe_cq *rcq = to_rcq(init->recv_cq);
struct rxe_cq *scq = to_rcq(init->send_cq);
struct rxe_srq *srq = init->srq ? to_rsrq(init->srq) : NULL;
+ unsigned long flags;
rxe_get(pd);
rxe_get(rcq);
@@ -325,10 +325,10 @@ int rxe_qp_from_init(struct rxe_dev *rxe, struct rxe_qp *qp, struct rxe_pd *pd,
if (err)
goto err2;
- spin_lock_bh(&qp->state_lock);
+ spin_lock_irqsave(&qp->state_lock, flags);
qp->attr.qp_state = IB_QPS_RESET;
qp->valid = 1;
- spin_unlock_bh(&qp->state_lock);
+ spin_unlock_irqrestore(&qp->state_lock, flags);
return 0;
@@ -499,24 +499,28 @@ static void rxe_qp_reset(struct rxe_qp *qp)
/* move the qp to the error state */
void rxe_qp_error(struct rxe_qp *qp)
{
- spin_lock_bh(&qp->state_lock);
+ unsigned long flags;
+
+ spin_lock_irqsave(&qp->state_lock, flags);
qp->attr.qp_state = IB_QPS_ERR;
/* drain work and packet queues */
rxe_sched_task(&qp->resp.task);
rxe_sched_task(&qp->comp.task);
rxe_sched_task(&qp->req.task);
- spin_unlock_bh(&qp->state_lock);
+ spin_unlock_irqrestore(&qp->state_lock, flags);
}
static void rxe_qp_sqd(struct rxe_qp *qp, struct ib_qp_attr *attr,
int mask)
{
- spin_lock_bh(&qp->state_lock);
+ unsigned long flags;
+
+ spin_lock_irqsave(&qp->state_lock, flags);
qp->attr.sq_draining = 1;
rxe_sched_task(&qp->comp.task);
rxe_sched_task(&qp->req.task);
- spin_unlock_bh(&qp->state_lock);
+ spin_unlock_irqrestore(&qp->state_lock, flags);
}
/* caller should hold qp->state_lock */
@@ -562,14 +566,16 @@ int rxe_qp_from_attr(struct rxe_qp *qp, struct ib_qp_attr *attr, int mask,
qp->attr.cur_qp_state = attr->qp_state;
if (mask & IB_QP_STATE) {
- spin_lock_bh(&qp->state_lock);
+ unsigned long flags;
+
+ spin_lock_irqsave(&qp->state_lock, flags);
err = __qp_chk_state(qp, attr, mask);
if (!err) {
qp->attr.qp_state = attr->qp_state;
rxe_dbg_qp(qp, "state -> %s\n",
qps2str[attr->qp_state]);
}
- spin_unlock_bh(&qp->state_lock);
+ spin_unlock_irqrestore(&qp->state_lock, flags);
if (err)
return err;
@@ -695,6 +701,8 @@ int rxe_qp_from_attr(struct rxe_qp *qp, struct ib_qp_attr *attr, int mask,
/* called by the query qp verb */
int rxe_qp_to_attr(struct rxe_qp *qp, struct ib_qp_attr *attr, int mask)
{
+ unsigned long flags;
+
*attr = qp->attr;
attr->rq_psn = qp->resp.psn;
@@ -715,12 +723,13 @@ int rxe_qp_to_attr(struct rxe_qp *qp, struct ib_qp_attr *attr, int mask)
/* Applications that get this state typically spin on it.
* Yield the processor
*/
- spin_lock_bh(&qp->state_lock);
+ spin_lock_irqsave(&qp->state_lock, flags);
if (qp->attr.sq_draining) {
- spin_unlock_bh(&qp->state_lock);
+ spin_unlock_irqrestore(&qp->state_lock, flags);
cond_resched();
+ } else {
+ spin_unlock_irqrestore(&qp->state_lock, flags);
}
- spin_unlock_bh(&qp->state_lock);
return 0;
}
@@ -743,10 +752,11 @@ int rxe_qp_chk_destroy(struct rxe_qp *qp)
static void rxe_qp_do_cleanup(struct work_struct *work)
{
struct rxe_qp *qp = container_of(work, typeof(*qp), cleanup_work.work);
+ unsigned long flags;
- spin_lock_bh(&qp->state_lock);
+ spin_lock_irqsave(&qp->state_lock, flags);
qp->valid = 0;
- spin_unlock_bh(&qp->state_lock);
+ spin_unlock_irqrestore(&qp->state_lock, flags);
qp->qp_timeout_jiffies = 0;
if (qp_type(qp) == IB_QPT_RC) {
diff --git a/drivers/infiniband/sw/rxe/rxe_recv.c b/drivers/infiniband/sw/rxe/rxe_recv.c
index 2f953cc74256..5861e4244049 100644
--- a/drivers/infiniband/sw/rxe/rxe_recv.c
+++ b/drivers/infiniband/sw/rxe/rxe_recv.c
@@ -14,6 +14,7 @@ static int check_type_state(struct rxe_dev *rxe, struct rxe_pkt_info *pkt,
struct rxe_qp *qp)
{
unsigned int pkt_type;
+ unsigned long flags;
if (unlikely(!qp->valid))
return -EINVAL;
@@ -38,19 +39,19 @@ static int check_type_state(struct rxe_dev *rxe, struct rxe_pkt_info *pkt,
return -EINVAL;
}
- spin_lock_bh(&qp->state_lock);
+ spin_lock_irqsave(&qp->state_lock, flags);
if (pkt->mask & RXE_REQ_MASK) {
if (unlikely(qp_state(qp) < IB_QPS_RTR)) {
- spin_unlock_bh(&qp->state_lock);
+ spin_unlock_irqrestore(&qp->state_lock, flags);
return -EINVAL;
}
} else {
if (unlikely(qp_state(qp) < IB_QPS_RTS)) {
- spin_unlock_bh(&qp->state_lock);
+ spin_unlock_irqrestore(&qp->state_lock, flags);
return -EINVAL;
}
}
- spin_unlock_bh(&qp->state_lock);
+ spin_unlock_irqrestore(&qp->state_lock, flags);
return 0;
}
diff --git a/drivers/infiniband/sw/rxe/rxe_req.c b/drivers/infiniband/sw/rxe/rxe_req.c
index 400840c913a9..2171f19494bc 100644
--- a/drivers/infiniband/sw/rxe/rxe_req.c
+++ b/drivers/infiniband/sw/rxe/rxe_req.c
@@ -99,17 +99,18 @@ static void req_retry(struct rxe_qp *qp)
void rnr_nak_timer(struct timer_list *t)
{
struct rxe_qp *qp = from_timer(qp, t, rnr_nak_timer);
+ unsigned long flags;
rxe_dbg_qp(qp, "nak timer fired\n");
- spin_lock_bh(&qp->state_lock);
+ spin_lock_irqsave(&qp->state_lock, flags);
if (qp->valid) {
/* request a send queue retry */
qp->req.need_retry = 1;
qp->req.wait_for_rnr_timer = 0;
rxe_sched_task(&qp->req.task);
}
- spin_unlock_bh(&qp->state_lock);
+ spin_unlock_irqrestore(&qp->state_lock, flags);
}
static void req_check_sq_drain_done(struct rxe_qp *qp)
@@ -118,8 +119,9 @@ static void req_check_sq_drain_done(struct rxe_qp *qp)
unsigned int index;
unsigned int cons;
struct rxe_send_wqe *wqe;
+ unsigned long flags;
- spin_lock_bh(&qp->state_lock);
+ spin_lock_irqsave(&qp->state_lock, flags);
if (qp_state(qp) == IB_QPS_SQD) {
q = qp->sq.queue;
index = qp->req.wqe_index;
@@ -140,7 +142,7 @@ static void req_check_sq_drain_done(struct rxe_qp *qp)
break;
qp->attr.sq_draining = 0;
- spin_unlock_bh(&qp->state_lock);
+ spin_unlock_irqrestore(&qp->state_lock, flags);
if (qp->ibqp.event_handler) {
struct ib_event ev;
@@ -154,7 +156,7 @@ static void req_check_sq_drain_done(struct rxe_qp *qp)
return;
} while (0);
}
- spin_unlock_bh(&qp->state_lock);
+ spin_unlock_irqrestore(&qp->state_lock, flags);
}
static struct rxe_send_wqe *__req_next_wqe(struct rxe_qp *qp)
@@ -173,6 +175,7 @@ static struct rxe_send_wqe *__req_next_wqe(struct rxe_qp *qp)
static struct rxe_send_wqe *req_next_wqe(struct rxe_qp *qp)
{
struct rxe_send_wqe *wqe;
+ unsigned long flags;
req_check_sq_drain_done(qp);
@@ -180,13 +183,13 @@ static struct rxe_send_wqe *req_next_wqe(struct rxe_qp *qp)
if (wqe == NULL)
return NULL;
- spin_lock_bh(&qp->state_lock);
+ spin_lock_irqsave(&qp->state_lock, flags);
if (unlikely((qp_state(qp) == IB_QPS_SQD) &&
(wqe->state != wqe_state_processing))) {
- spin_unlock_bh(&qp->state_lock);
+ spin_unlock_irqrestore(&qp->state_lock, flags);
return NULL;
}
- spin_unlock_bh(&qp->state_lock);
+ spin_unlock_irqrestore(&qp->state_lock, flags);
wqe->mask = wr_opcode_mask(wqe->wr.opcode, qp);
return wqe;
@@ -676,16 +679,17 @@ int rxe_requester(struct rxe_qp *qp)
struct rxe_queue *q = qp->sq.queue;
struct rxe_ah *ah;
struct rxe_av *av;
+ unsigned long flags;
- spin_lock_bh(&qp->state_lock);
+ spin_lock_irqsave(&qp->state_lock, flags);
if (unlikely(!qp->valid)) {
- spin_unlock_bh(&qp->state_lock);
+ spin_unlock_irqrestore(&qp->state_lock, flags);
goto exit;
}
if (unlikely(qp_state(qp) == IB_QPS_ERR)) {
wqe = __req_next_wqe(qp);
- spin_unlock_bh(&qp->state_lock);
+ spin_unlock_irqrestore(&qp->state_lock, flags);
if (wqe)
goto err;
else
@@ -700,10 +704,10 @@ int rxe_requester(struct rxe_qp *qp)
qp->req.wait_psn = 0;
qp->req.need_retry = 0;
qp->req.wait_for_rnr_timer = 0;
- spin_unlock_bh(&qp->state_lock);
+ spin_unlock_irqrestore(&qp->state_lock, flags);
goto exit;
}
- spin_unlock_bh(&qp->state_lock);
+ spin_unlock_irqrestore(&qp->state_lock, flags);
/* we come here if the retransmit timer has fired
* or if the rnr timer has fired. If the retransmit
diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c b/drivers/infiniband/sw/rxe/rxe_resp.c
index 8a3c9c2c5a2d..64c64f5f36a8 100644
--- a/drivers/infiniband/sw/rxe/rxe_resp.c
+++ b/drivers/infiniband/sw/rxe/rxe_resp.c
@@ -497,8 +497,9 @@ static enum resp_states check_rkey(struct rxe_qp *qp,
if (mw->access & IB_ZERO_BASED)
qp->resp.offset = mw->addr;
- rxe_put(mw);
rxe_get(mr);
+ rxe_put(mw);
+ mw = NULL;
} else {
mr = lookup_mr(qp->pd, access, rkey, RXE_LOOKUP_REMOTE);
if (!mr) {
@@ -1055,6 +1056,7 @@ static enum resp_states do_complete(struct rxe_qp *qp,
struct ib_uverbs_wc *uwc = &cqe.uibwc;
struct rxe_recv_wqe *wqe = qp->resp.wqe;
struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
+ unsigned long flags;
if (!wqe)
goto finish;
@@ -1145,12 +1147,12 @@ static enum resp_states do_complete(struct rxe_qp *qp,
return RESPST_ERR_CQ_OVERFLOW;
finish:
- spin_lock_bh(&qp->state_lock);
+ spin_lock_irqsave(&qp->state_lock, flags);
if (unlikely(qp_state(qp) == IB_QPS_ERR)) {
- spin_unlock_bh(&qp->state_lock);
+ spin_unlock_irqrestore(&qp->state_lock, flags);
return RESPST_CHK_RESOURCE;
}
- spin_unlock_bh(&qp->state_lock);
+ spin_unlock_irqrestore(&qp->state_lock, flags);
if (unlikely(!pkt))
return RESPST_DONE;
@@ -1485,18 +1487,19 @@ int rxe_responder(struct rxe_qp *qp)
enum resp_states state;
struct rxe_pkt_info *pkt = NULL;
int ret;
+ unsigned long flags;
- spin_lock_bh(&qp->state_lock);
+ spin_lock_irqsave(&qp->state_lock, flags);
if (!qp->valid || qp_state(qp) == IB_QPS_ERR ||
qp_state(qp) == IB_QPS_RESET) {
bool notify = qp->valid && (qp_state(qp) == IB_QPS_ERR);
drain_req_pkts(qp);
flush_recv_queue(qp, notify);
- spin_unlock_bh(&qp->state_lock);
+ spin_unlock_irqrestore(&qp->state_lock, flags);
goto exit;
}
- spin_unlock_bh(&qp->state_lock);
+ spin_unlock_irqrestore(&qp->state_lock, flags);
qp->resp.aeth_syndrome = AETH_ACK_UNLIMITED;
diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c
index 515f9ff72d18..f4321a172000 100644
--- a/drivers/infiniband/sw/rxe/rxe_verbs.c
+++ b/drivers/infiniband/sw/rxe/rxe_verbs.c
@@ -904,10 +904,10 @@ static int rxe_post_send_kernel(struct rxe_qp *qp,
if (!err)
rxe_sched_task(&qp->req.task);
- spin_lock_bh(&qp->state_lock);
+ spin_lock_irqsave(&qp->state_lock, flags);
if (qp_state(qp) == IB_QPS_ERR)
rxe_sched_task(&qp->comp.task);
- spin_unlock_bh(&qp->state_lock);
+ spin_unlock_irqrestore(&qp->state_lock, flags);
return err;
}
@@ -917,22 +917,23 @@ static int rxe_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
{
struct rxe_qp *qp = to_rqp(ibqp);
int err;
+ unsigned long flags;
- spin_lock_bh(&qp->state_lock);
+ spin_lock_irqsave(&qp->state_lock, flags);
/* caller has already called destroy_qp */
if (WARN_ON_ONCE(!qp->valid)) {
- spin_unlock_bh(&qp->state_lock);
+ spin_unlock_irqrestore(&qp->state_lock, flags);
rxe_err_qp(qp, "qp has been destroyed");
return -EINVAL;
}
if (unlikely(qp_state(qp) < IB_QPS_RTS)) {
- spin_unlock_bh(&qp->state_lock);
+ spin_unlock_irqrestore(&qp->state_lock, flags);
*bad_wr = wr;
rxe_err_qp(qp, "qp not ready to send");
return -EINVAL;
}
- spin_unlock_bh(&qp->state_lock);
+ spin_unlock_irqrestore(&qp->state_lock, flags);
if (qp->is_user) {
/* Utilize process context to do protocol processing */
@@ -1008,22 +1009,22 @@ static int rxe_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr,
struct rxe_rq *rq = &qp->rq;
unsigned long flags;
- spin_lock_bh(&qp->state_lock);
+ spin_lock_irqsave(&qp->state_lock, flags);
/* caller has already called destroy_qp */
if (WARN_ON_ONCE(!qp->valid)) {
- spin_unlock_bh(&qp->state_lock);
+ spin_unlock_irqrestore(&qp->state_lock, flags);
rxe_err_qp(qp, "qp has been destroyed");
return -EINVAL;
}
/* see C10-97.2.1 */
if (unlikely((qp_state(qp) < IB_QPS_INIT))) {
- spin_unlock_bh(&qp->state_lock);
+ spin_unlock_irqrestore(&qp->state_lock, flags);
*bad_wr = wr;
rxe_dbg_qp(qp, "qp not ready to post recv");
return -EINVAL;
}
- spin_unlock_bh(&qp->state_lock);
+ spin_unlock_irqrestore(&qp->state_lock, flags);
if (unlikely(qp->srq)) {
*bad_wr = wr;
@@ -1044,10 +1045,10 @@ static int rxe_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr,
spin_unlock_irqrestore(&rq->producer_lock, flags);
- spin_lock_bh(&qp->state_lock);
+ spin_lock_irqsave(&qp->state_lock, flags);
if (qp_state(qp) == IB_QPS_ERR)
rxe_sched_task(&qp->resp.task);
- spin_unlock_bh(&qp->state_lock);
+ spin_unlock_irqrestore(&qp->state_lock, flags);
return err;
}
diff --git a/drivers/infiniband/ulp/isert/ib_isert.c b/drivers/infiniband/ulp/isert/ib_isert.c
index f290cd49698e..92e1e7587af8 100644
--- a/drivers/infiniband/ulp/isert/ib_isert.c
+++ b/drivers/infiniband/ulp/isert/ib_isert.c
@@ -657,9 +657,13 @@ static int
isert_connect_error(struct rdma_cm_id *cma_id)
{
struct isert_conn *isert_conn = cma_id->qp->qp_context;
+ struct isert_np *isert_np = cma_id->context;
ib_drain_qp(isert_conn->qp);
+
+ mutex_lock(&isert_np->mutex);
list_del_init(&isert_conn->node);
+ mutex_unlock(&isert_np->mutex);
isert_conn->cm_id = NULL;
isert_put_conn(isert_conn);
@@ -2431,6 +2435,7 @@ isert_free_np(struct iscsi_np *np)
{
struct isert_np *isert_np = np->np_context;
struct isert_conn *isert_conn, *n;
+ LIST_HEAD(drop_conn_list);
if (isert_np->cm_id)
rdma_destroy_id(isert_np->cm_id);
@@ -2450,7 +2455,7 @@ isert_free_np(struct iscsi_np *np)
node) {
isert_info("cleaning isert_conn %p state (%d)\n",
isert_conn, isert_conn->state);
- isert_connect_release(isert_conn);
+ list_move_tail(&isert_conn->node, &drop_conn_list);
}
}
@@ -2461,11 +2466,16 @@ isert_free_np(struct iscsi_np *np)
node) {
isert_info("cleaning isert_conn %p state (%d)\n",
isert_conn, isert_conn->state);
- isert_connect_release(isert_conn);
+ list_move_tail(&isert_conn->node, &drop_conn_list);
}
}
mutex_unlock(&isert_np->mutex);
+ list_for_each_entry_safe(isert_conn, n, &drop_conn_list, node) {
+ list_del_init(&isert_conn->node);
+ isert_connect_release(isert_conn);
+ }
+
np->np_context = NULL;
kfree(isert_np);
}
@@ -2560,8 +2570,6 @@ static void isert_wait_conn(struct iscsit_conn *conn)
isert_put_unsol_pending_cmds(conn);
isert_wait4cmds(conn);
isert_wait4logout(isert_conn);
-
- queue_work(isert_release_wq, &isert_conn->release_work);
}
static void isert_free_conn(struct iscsit_conn *conn)
diff --git a/drivers/infiniband/ulp/rtrs/rtrs-clt.c b/drivers/infiniband/ulp/rtrs/rtrs-clt.c
index 3738aaefbe13..b32941dd67cb 100644
--- a/drivers/infiniband/ulp/rtrs/rtrs-clt.c
+++ b/drivers/infiniband/ulp/rtrs/rtrs-clt.c
@@ -2039,6 +2039,7 @@ static int rtrs_clt_rdma_cm_handler(struct rdma_cm_id *cm_id,
return 0;
}
+/* The caller should do the cleanup in case of error */
static int create_cm(struct rtrs_clt_con *con)
{
struct rtrs_path *s = con->c.path;
@@ -2061,14 +2062,14 @@ static int create_cm(struct rtrs_clt_con *con)
err = rdma_set_reuseaddr(cm_id, 1);
if (err != 0) {
rtrs_err(s, "Set address reuse failed, err: %d\n", err);
- goto destroy_cm;
+ return err;
}
err = rdma_resolve_addr(cm_id, (struct sockaddr *)&clt_path->s.src_addr,
(struct sockaddr *)&clt_path->s.dst_addr,
RTRS_CONNECT_TIMEOUT_MS);
if (err) {
rtrs_err(s, "Failed to resolve address, err: %d\n", err);
- goto destroy_cm;
+ return err;
}
/*
* Combine connection status and session events. This is needed
@@ -2083,29 +2084,15 @@ static int create_cm(struct rtrs_clt_con *con)
if (err == 0)
err = -ETIMEDOUT;
/* Timedout or interrupted */
- goto errr;
- }
- if (con->cm_err < 0) {
- err = con->cm_err;
- goto errr;
+ return err;
}
- if (READ_ONCE(clt_path->state) != RTRS_CLT_CONNECTING) {
+ if (con->cm_err < 0)
+ return con->cm_err;
+ if (READ_ONCE(clt_path->state) != RTRS_CLT_CONNECTING)
/* Device removal */
- err = -ECONNABORTED;
- goto errr;
- }
+ return -ECONNABORTED;
return 0;
-
-errr:
- stop_cm(con);
- mutex_lock(&con->con_mutex);
- destroy_con_cq_qp(con);
- mutex_unlock(&con->con_mutex);
-destroy_cm:
- destroy_cm(con);
-
- return err;
}
static void rtrs_clt_path_up(struct rtrs_clt_path *clt_path)
@@ -2333,7 +2320,7 @@ static void rtrs_clt_close_work(struct work_struct *work)
static int init_conns(struct rtrs_clt_path *clt_path)
{
unsigned int cid;
- int err;
+ int err, i;
/*
* On every new session connections increase reconnect counter
@@ -2349,10 +2336,8 @@ static int init_conns(struct rtrs_clt_path *clt_path)
goto destroy;
err = create_cm(to_clt_con(clt_path->s.con[cid]));
- if (err) {
- destroy_con(to_clt_con(clt_path->s.con[cid]));
+ if (err)
goto destroy;
- }
}
err = alloc_path_reqs(clt_path);
if (err)
@@ -2363,15 +2348,21 @@ static int init_conns(struct rtrs_clt_path *clt_path)
return 0;
destroy:
- while (cid--) {
- struct rtrs_clt_con *con = to_clt_con(clt_path->s.con[cid]);
+ /* Make sure we do the cleanup in the order they are created */
+ for (i = 0; i <= cid; i++) {
+ struct rtrs_clt_con *con;
- stop_cm(con);
+ if (!clt_path->s.con[i])
+ break;
- mutex_lock(&con->con_mutex);
- destroy_con_cq_qp(con);
- mutex_unlock(&con->con_mutex);
- destroy_cm(con);
+ con = to_clt_con(clt_path->s.con[i]);
+ if (con->c.cm_id) {
+ stop_cm(con);
+ mutex_lock(&con->con_mutex);
+ destroy_con_cq_qp(con);
+ mutex_unlock(&con->con_mutex);
+ destroy_cm(con);
+ }
destroy_con(con);
}
/*
diff --git a/drivers/infiniband/ulp/rtrs/rtrs.c b/drivers/infiniband/ulp/rtrs/rtrs.c
index 4bf9d868cc52..3696f367ff51 100644
--- a/drivers/infiniband/ulp/rtrs/rtrs.c
+++ b/drivers/infiniband/ulp/rtrs/rtrs.c
@@ -37,8 +37,10 @@ struct rtrs_iu *rtrs_iu_alloc(u32 iu_num, size_t size, gfp_t gfp_mask,
goto err;
iu->dma_addr = ib_dma_map_single(dma_dev, iu->buf, size, dir);
- if (ib_dma_mapping_error(dma_dev, iu->dma_addr))
+ if (ib_dma_mapping_error(dma_dev, iu->dma_addr)) {
+ kfree(iu->buf);
goto err;
+ }
iu->cqe.done = done;
iu->size = size;