summaryrefslogtreecommitdiff
path: root/drivers
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2020-08-06 16:43:36 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2020-08-06 16:43:36 -0700
commitd7806bbd22cabc3e3b0a985cfcffa29cf156bb30 (patch)
treeef24f40c658c2f015b7f96f429e47dd16ab6e5b4 /drivers
parentd6efb3ac3e6c19ab722b28bdb9252bae0b9676b6 (diff)
parent23fcc7dee2c6aba1060558683988263851e74bab (diff)
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma
Pull rdma updates from Jason Gunthorpe: "A quiet cycle after the larger 5.8 effort. Substantially cleanup and driver work with a few smaller features this time. - Driver updates for hfi1, rxe, mlx5, hns, qedr, usnic, bnxt_re - Removal of dead or redundant code across the drivers - RAW resource tracker dumps to include a device specific data blob for device objects to aide device debugging - Further advance the IOCTL interface, remove the ability to turn it off. Add QUERY_CONTEXT, QUERY_MR, and QUERY_PD commands - Remove stubs related to devices with no pkey table - A shared CQ scheme to allow multiple ULPs to share the CQ rings of a device to give higher performance - Several more static checker, syzkaller and rare crashers fixed" * tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma: (121 commits) RDMA/mlx5: Fix flow destination setting for RDMA TX flow table RDMA/rxe: Remove pkey table RDMA/umem: Add a schedule point in ib_umem_get() RDMA/hns: Fix the unneeded process when getting a general type of CQE error RDMA/hns: Fix error during modify qp RTS2RTS RDMA/hns: Delete unnecessary memset when allocating VF resource RDMA/hns: Remove redundant parameters in set_rc_wqe() RDMA/hns: Remove support for HIP08_A RDMA/hns: Refactor hns_roce_v2_set_hem() RDMA/hns: Remove redundant hardware opcode definitions RDMA/netlink: Remove CAP_NET_RAW check when dump a raw QP RDMA/include: Replace license text with SPDX tags RDMA/rtrs: remove WQ_MEM_RECLAIM for rtrs_wq RDMA/rtrs-clt: add an additional random 8 seconds before reconnecting RDMA/cma: Execute rdma_cm destruction from a handler properly RDMA/cma: Remove unneeded locking for req paths RDMA/cma: Using the standard locking pattern when delivering the removal event RDMA/cma: Simplify DEVICE_REMOVAL for internal_id RDMA/efa: Add EFA 0xefa1 PCI ID RDMA/efa: User/kernel compatibility handshake mechanism ...
Diffstat (limited to 'drivers')
-rw-r--r--drivers/infiniband/Kconfig8
-rw-r--r--drivers/infiniband/core/cache.c45
-rw-r--r--drivers/infiniband/core/cma.c257
-rw-r--r--drivers/infiniband/core/counters.c24
-rw-r--r--drivers/infiniband/core/device.c28
-rw-r--r--drivers/infiniband/core/mad.c30
-rw-r--r--drivers/infiniband/core/mad_priv.h2
-rw-r--r--drivers/infiniband/core/mad_rmpp.c27
-rw-r--r--drivers/infiniband/core/nldev.c223
-rw-r--r--drivers/infiniband/core/sysfs.c61
-rw-r--r--drivers/infiniband/core/trace.c2
-rw-r--r--drivers/infiniband/core/umem.c1
-rw-r--r--drivers/infiniband/core/umem_odp.c2
-rw-r--r--drivers/infiniband/core/uverbs_cmd.c321
-rw-r--r--drivers/infiniband/core/uverbs_ioctl.c1
-rw-r--r--drivers/infiniband/core/uverbs_main.c4
-rw-r--r--drivers/infiniband/core/uverbs_std_types_counters.c17
-rw-r--r--drivers/infiniband/core/uverbs_std_types_cq.c3
-rw-r--r--drivers/infiniband/core/uverbs_std_types_device.c48
-rw-r--r--drivers/infiniband/core/uverbs_std_types_mr.c54
-rw-r--r--drivers/infiniband/core/verbs.c185
-rw-r--r--drivers/infiniband/hw/bnxt_re/ib_verbs.c170
-rw-r--r--drivers/infiniband/hw/bnxt_re/ib_verbs.h10
-rw-r--r--drivers/infiniband/hw/bnxt_re/main.c23
-rw-r--r--drivers/infiniband/hw/bnxt_re/qplib_fp.c751
-rw-r--r--drivers/infiniband/hw/bnxt_re/qplib_fp.h127
-rw-r--r--drivers/infiniband/hw/bnxt_re/qplib_res.h58
-rw-r--r--drivers/infiniband/hw/bnxt_re/roce_hsi.h1
-rw-r--r--drivers/infiniband/hw/cxgb4/iw_cxgb4.h9
-rw-r--r--drivers/infiniband/hw/cxgb4/mem.c3
-rw-r--r--drivers/infiniband/hw/cxgb4/provider.c22
-rw-r--r--drivers/infiniband/hw/cxgb4/restrack.c24
-rw-r--r--drivers/infiniband/hw/efa/efa_admin_cmds_defs.h15
-rw-r--r--drivers/infiniband/hw/efa/efa_com_cmd.c2
-rw-r--r--drivers/infiniband/hw/efa/efa_com_cmd.h2
-rw-r--r--drivers/infiniband/hw/efa/efa_main.c6
-rw-r--r--drivers/infiniband/hw/efa/efa_verbs.c42
-rw-r--r--drivers/infiniband/hw/hfi1/chip.c27
-rw-r--r--drivers/infiniband/hw/hfi1/firmware.c16
-rw-r--r--drivers/infiniband/hw/hfi1/mad.c9
-rw-r--r--drivers/infiniband/hw/hfi1/pcie.c22
-rw-r--r--drivers/infiniband/hw/hfi1/pio.c2
-rw-r--r--drivers/infiniband/hw/hfi1/pio_copy.c12
-rw-r--r--drivers/infiniband/hw/hfi1/platform.c10
-rw-r--r--drivers/infiniband/hw/hfi1/qp.c2
-rw-r--r--drivers/infiniband/hw/hfi1/qp.h14
-rw-r--r--drivers/infiniband/hw/hfi1/qsfp.c4
-rw-r--r--drivers/infiniband/hw/hfi1/rc.c25
-rw-r--r--drivers/infiniband/hw/hfi1/sdma.c9
-rw-r--r--drivers/infiniband/hw/hfi1/tid_rdma.c4
-rw-r--r--drivers/infiniband/hw/hfi1/uc.c8
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_device.h31
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_hw_v1.c7
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_hw_v2.c253
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_hw_v2.h19
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_main.c2
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_mr.c210
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_qp.c10
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_restrack.c14
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_verbs.c22
-rw-r--r--drivers/infiniband/hw/mlx4/main.c37
-rw-r--r--drivers/infiniband/hw/mlx4/mlx4_ib.h2
-rw-r--r--drivers/infiniband/hw/mlx4/mr.c3
-rw-r--r--drivers/infiniband/hw/mlx5/Makefile6
-rw-r--r--drivers/infiniband/hw/mlx5/cmd.c12
-rw-r--r--drivers/infiniband/hw/mlx5/cmd.h1
-rw-r--r--drivers/infiniband/hw/mlx5/counters.c709
-rw-r--r--drivers/infiniband/hw/mlx5/counters.h17
-rw-r--r--drivers/infiniband/hw/mlx5/devx.c102
-rw-r--r--drivers/infiniband/hw/mlx5/devx.h45
-rw-r--r--drivers/infiniband/hw/mlx5/flow.c765
-rw-r--r--drivers/infiniband/hw/mlx5/fs.c2516
-rw-r--r--drivers/infiniband/hw/mlx5/fs.h29
-rw-r--r--drivers/infiniband/hw/mlx5/main.c3254
-rw-r--r--drivers/infiniband/hw/mlx5/mlx5_ib.h109
-rw-r--r--drivers/infiniband/hw/mlx5/mr.c2
-rw-r--r--drivers/infiniband/hw/mlx5/odp.c28
-rw-r--r--drivers/infiniband/hw/mlx5/qp.c71
-rw-r--r--drivers/infiniband/hw/mlx5/qp.h1
-rw-r--r--drivers/infiniband/hw/mlx5/restrack.c121
-rw-r--r--drivers/infiniband/hw/mlx5/restrack.h13
-rw-r--r--drivers/infiniband/hw/mlx5/srq.c4
-rw-r--r--drivers/infiniband/hw/mlx5/std_types.c45
-rw-r--r--drivers/infiniband/hw/mlx5/wr.c68
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_verbs.c2
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_verbs.h2
-rw-r--r--drivers/infiniband/hw/qedr/main.c3
-rw-r--r--drivers/infiniband/hw/qedr/qedr.h5
-rw-r--r--drivers/infiniband/hw/qedr/verbs.c45
-rw-r--r--drivers/infiniband/hw/qedr/verbs.h2
-rw-r--r--drivers/infiniband/hw/usnic/usnic_fwd.c4
-rw-r--r--drivers/infiniband/hw/vmw_pvrdma/pvrdma_mr.c2
-rw-r--r--drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h2
-rw-r--r--drivers/infiniband/sw/rdmavt/ah.c3
-rw-r--r--drivers/infiniband/sw/rdmavt/mr.c2
-rw-r--r--drivers/infiniband/sw/rdmavt/mr.h2
-rw-r--r--drivers/infiniband/sw/rxe/rxe.c41
-rw-r--r--drivers/infiniband/sw/rxe/rxe_loc.h8
-rw-r--r--drivers/infiniband/sw/rxe/rxe_mr.c50
-rw-r--r--drivers/infiniband/sw/rxe/rxe_net.c5
-rw-r--r--drivers/infiniband/sw/rxe/rxe_param.h4
-rw-r--r--drivers/infiniband/sw/rxe/rxe_recv.c35
-rw-r--r--drivers/infiniband/sw/rxe/rxe_req.c5
-rw-r--r--drivers/infiniband/sw/rxe/rxe_verbs.c48
-rw-r--r--drivers/infiniband/sw/rxe/rxe_verbs.h1
-rw-r--r--drivers/infiniband/sw/siw/siw_main.c1
-rw-r--r--drivers/infiniband/sw/siw/siw_verbs.c11
-rw-r--r--drivers/infiniband/sw/siw/siw_verbs.h3
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib.h4
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_ib.c67
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_main.c13
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_multicast.c4
-rw-r--r--drivers/infiniband/ulp/iser/iscsi_iser.h25
-rw-r--r--drivers/infiniband/ulp/iser/iser_verbs.c112
-rw-r--r--drivers/infiniband/ulp/isert/ib_isert.c175
-rw-r--r--drivers/infiniband/ulp/isert/ib_isert.h21
-rw-r--r--drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.h23
-rw-r--r--drivers/infiniband/ulp/rtrs/rtrs-clt.c16
-rw-r--r--drivers/infiniband/ulp/rtrs/rtrs-srv.c2
-rw-r--r--drivers/infiniband/ulp/srpt/ib_srpt.c20
-rw-r--r--drivers/infiniband/ulp/srpt/ib_srpt.h1
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/alloc.c11
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fs_core.c1
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c7
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/main.c3
125 files changed, 6097 insertions, 5994 deletions
diff --git a/drivers/infiniband/Kconfig b/drivers/infiniband/Kconfig
index a83f9eb86bfe..91b023341b77 100644
--- a/drivers/infiniband/Kconfig
+++ b/drivers/infiniband/Kconfig
@@ -37,14 +37,6 @@ config INFINIBAND_USER_ACCESS
libibverbs, libibcm and a hardware driver library from
rdma-core <https://github.com/linux-rdma/rdma-core>.
-config INFINIBAND_EXP_LEGACY_VERBS_NEW_UAPI
- bool "Allow experimental legacy verbs in new ioctl uAPI (EXPERIMENTAL)"
- depends on INFINIBAND_USER_ACCESS
- help
- IOCTL based uAPI support for Infiniband is enabled by default for
- new verbs only. This allows userspace to invoke the IOCTL based uAPI
- for current legacy verbs too.
-
config INFINIBAND_USER_MEM
bool
depends on INFINIBAND_USER_ACCESS != n
diff --git a/drivers/infiniband/core/cache.c b/drivers/infiniband/core/cache.c
index a670209bbce6..ffad73bb40ff 100644
--- a/drivers/infiniband/core/cache.c
+++ b/drivers/infiniband/core/cache.c
@@ -1054,7 +1054,7 @@ int ib_get_cached_pkey(struct ib_device *device,
cache = device->port_data[port_num].cache.pkey;
- if (index < 0 || index >= cache->table_len)
+ if (!cache || index < 0 || index >= cache->table_len)
ret = -EINVAL;
else
*pkey = cache->table[index];
@@ -1099,6 +1099,10 @@ int ib_find_cached_pkey(struct ib_device *device,
read_lock_irqsave(&device->cache_lock, flags);
cache = device->port_data[port_num].cache.pkey;
+ if (!cache) {
+ ret = -EINVAL;
+ goto err;
+ }
*index = -1;
@@ -1117,6 +1121,7 @@ int ib_find_cached_pkey(struct ib_device *device,
ret = 0;
}
+err:
read_unlock_irqrestore(&device->cache_lock, flags);
return ret;
@@ -1139,6 +1144,10 @@ int ib_find_exact_cached_pkey(struct ib_device *device,
read_lock_irqsave(&device->cache_lock, flags);
cache = device->port_data[port_num].cache.pkey;
+ if (!cache) {
+ ret = -EINVAL;
+ goto err;
+ }
*index = -1;
@@ -1149,6 +1158,7 @@ int ib_find_exact_cached_pkey(struct ib_device *device,
break;
}
+err:
read_unlock_irqrestore(&device->cache_lock, flags);
return ret;
@@ -1425,23 +1435,26 @@ ib_cache_update(struct ib_device *device, u8 port, bool enforce_security)
goto err;
}
- pkey_cache = kmalloc(struct_size(pkey_cache, table,
- tprops->pkey_tbl_len),
- GFP_KERNEL);
- if (!pkey_cache) {
- ret = -ENOMEM;
- goto err;
- }
+ if (tprops->pkey_tbl_len) {
+ pkey_cache = kmalloc(struct_size(pkey_cache, table,
+ tprops->pkey_tbl_len),
+ GFP_KERNEL);
+ if (!pkey_cache) {
+ ret = -ENOMEM;
+ goto err;
+ }
- pkey_cache->table_len = tprops->pkey_tbl_len;
+ pkey_cache->table_len = tprops->pkey_tbl_len;
- for (i = 0; i < pkey_cache->table_len; ++i) {
- ret = ib_query_pkey(device, port, i, pkey_cache->table + i);
- if (ret) {
- dev_warn(&device->dev,
- "ib_query_pkey failed (%d) for index %d\n",
- ret, i);
- goto err;
+ for (i = 0; i < pkey_cache->table_len; ++i) {
+ ret = ib_query_pkey(device, port, i,
+ pkey_cache->table + i);
+ if (ret) {
+ dev_warn(&device->dev,
+ "ib_query_pkey failed (%d) for index %d\n",
+ ret, i);
+ goto err;
+ }
}
}
diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index c30cf5307ce3..26de0dab60bb 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -428,19 +428,6 @@ static int cma_comp_exch(struct rdma_id_private *id_priv,
return ret;
}
-static enum rdma_cm_state cma_exch(struct rdma_id_private *id_priv,
- enum rdma_cm_state exch)
-{
- unsigned long flags;
- enum rdma_cm_state old;
-
- spin_lock_irqsave(&id_priv->lock, flags);
- old = id_priv->state;
- id_priv->state = exch;
- spin_unlock_irqrestore(&id_priv->lock, flags);
- return old;
-}
-
static inline u8 cma_get_ip_ver(const struct cma_hdr *hdr)
{
return hdr->ip_version >> 4;
@@ -1829,23 +1816,11 @@ static void cma_leave_mc_groups(struct rdma_id_private *id_priv)
}
}
-void rdma_destroy_id(struct rdma_cm_id *id)
+static void _destroy_id(struct rdma_id_private *id_priv,
+ enum rdma_cm_state state)
{
- struct rdma_id_private *id_priv;
- enum rdma_cm_state state;
-
- id_priv = container_of(id, struct rdma_id_private, id);
- trace_cm_id_destroy(id_priv);
- state = cma_exch(id_priv, RDMA_CM_DESTROYING);
cma_cancel_operation(id_priv, state);
- /*
- * Wait for any active callback to finish. New callbacks will find
- * the id_priv state set to destroying and abort.
- */
- mutex_lock(&id_priv->handler_mutex);
- mutex_unlock(&id_priv->handler_mutex);
-
rdma_restrack_del(&id_priv->res);
if (id_priv->cma_dev) {
if (rdma_cap_ib_cm(id_priv->id.device, 1)) {
@@ -1874,6 +1849,42 @@ void rdma_destroy_id(struct rdma_cm_id *id)
put_net(id_priv->id.route.addr.dev_addr.net);
kfree(id_priv);
}
+
+/*
+ * destroy an ID from within the handler_mutex. This ensures that no other
+ * handlers can start running concurrently.
+ */
+static void destroy_id_handler_unlock(struct rdma_id_private *id_priv)
+ __releases(&idprv->handler_mutex)
+{
+ enum rdma_cm_state state;
+ unsigned long flags;
+
+ trace_cm_id_destroy(id_priv);
+
+ /*
+ * Setting the state to destroyed under the handler mutex provides a
+ * fence against calling handler callbacks. If this is invoked due to
+ * the failure of a handler callback then it guarentees that no future
+ * handlers will be called.
+ */
+ lockdep_assert_held(&id_priv->handler_mutex);
+ spin_lock_irqsave(&id_priv->lock, flags);
+ state = id_priv->state;
+ id_priv->state = RDMA_CM_DESTROYING;
+ spin_unlock_irqrestore(&id_priv->lock, flags);
+ mutex_unlock(&id_priv->handler_mutex);
+ _destroy_id(id_priv, state);
+}
+
+void rdma_destroy_id(struct rdma_cm_id *id)
+{
+ struct rdma_id_private *id_priv =
+ container_of(id, struct rdma_id_private, id);
+
+ mutex_lock(&id_priv->handler_mutex);
+ destroy_id_handler_unlock(id_priv);
+}
EXPORT_SYMBOL(rdma_destroy_id);
static int cma_rep_recv(struct rdma_id_private *id_priv)
@@ -1925,6 +1936,8 @@ static int cma_cm_event_handler(struct rdma_id_private *id_priv,
{
int ret;
+ lockdep_assert_held(&id_priv->handler_mutex);
+
trace_cm_event_handler(id_priv, event);
ret = id_priv->id.event_handler(&id_priv->id, event);
trace_cm_event_done(id_priv, event, ret);
@@ -1936,7 +1949,7 @@ static int cma_ib_handler(struct ib_cm_id *cm_id,
{
struct rdma_id_private *id_priv = cm_id->context;
struct rdma_cm_event event = {};
- int ret = 0;
+ int ret;
mutex_lock(&id_priv->handler_mutex);
if ((ib_event->event != IB_CM_TIMEWAIT_EXIT &&
@@ -2005,14 +2018,12 @@ static int cma_ib_handler(struct ib_cm_id *cm_id,
if (ret) {
/* Destroy the CM ID by returning a non-zero value. */
id_priv->cm_id.ib = NULL;
- cma_exch(id_priv, RDMA_CM_DESTROYING);
- mutex_unlock(&id_priv->handler_mutex);
- rdma_destroy_id(&id_priv->id);
+ destroy_id_handler_unlock(id_priv);
return ret;
}
out:
mutex_unlock(&id_priv->handler_mutex);
- return ret;
+ return 0;
}
static struct rdma_id_private *
@@ -2174,7 +2185,7 @@ static int cma_ib_req_handler(struct ib_cm_id *cm_id,
mutex_lock(&listen_id->handler_mutex);
if (listen_id->state != RDMA_CM_LISTEN) {
ret = -ECONNABORTED;
- goto err1;
+ goto err_unlock;
}
offset = cma_user_data_offset(listen_id);
@@ -2191,55 +2202,38 @@ static int cma_ib_req_handler(struct ib_cm_id *cm_id,
}
if (!conn_id) {
ret = -ENOMEM;
- goto err1;
+ goto err_unlock;
}
mutex_lock_nested(&conn_id->handler_mutex, SINGLE_DEPTH_NESTING);
ret = cma_ib_acquire_dev(conn_id, listen_id, &req);
- if (ret)
- goto err2;
+ if (ret) {
+ destroy_id_handler_unlock(conn_id);
+ goto err_unlock;
+ }
conn_id->cm_id.ib = cm_id;
cm_id->context = conn_id;
cm_id->cm_handler = cma_ib_handler;
- /*
- * Protect against the user destroying conn_id from another thread
- * until we're done accessing it.
- */
- cma_id_get(conn_id);
ret = cma_cm_event_handler(conn_id, &event);
- if (ret)
- goto err3;
- /*
- * Acquire mutex to prevent user executing rdma_destroy_id()
- * while we're accessing the cm_id.
- */
- mutex_lock(&lock);
+ if (ret) {
+ /* Destroy the CM ID by returning a non-zero value. */
+ conn_id->cm_id.ib = NULL;
+ mutex_unlock(&listen_id->handler_mutex);
+ destroy_id_handler_unlock(conn_id);
+ goto net_dev_put;
+ }
+
if (cma_comp(conn_id, RDMA_CM_CONNECT) &&
(conn_id->id.qp_type != IB_QPT_UD)) {
trace_cm_send_mra(cm_id->context);
ib_send_cm_mra(cm_id, CMA_CM_MRA_SETTING, NULL, 0);
}
- mutex_unlock(&lock);
mutex_unlock(&conn_id->handler_mutex);
- mutex_unlock(&listen_id->handler_mutex);
- cma_id_put(conn_id);
- if (net_dev)
- dev_put(net_dev);
- return 0;
-err3:
- cma_id_put(conn_id);
- /* Destroy the CM ID by returning a non-zero value. */
- conn_id->cm_id.ib = NULL;
-err2:
- cma_exch(conn_id, RDMA_CM_DESTROYING);
- mutex_unlock(&conn_id->handler_mutex);
-err1:
+err_unlock:
mutex_unlock(&listen_id->handler_mutex);
- if (conn_id)
- rdma_destroy_id(&conn_id->id);
net_dev_put:
if (net_dev)
@@ -2339,9 +2333,7 @@ static int cma_iw_handler(struct iw_cm_id *iw_id, struct iw_cm_event *iw_event)
if (ret) {
/* Destroy the CM ID by returning a non-zero value. */
id_priv->cm_id.iw = NULL;
- cma_exch(id_priv, RDMA_CM_DESTROYING);
- mutex_unlock(&id_priv->handler_mutex);
- rdma_destroy_id(&id_priv->id);
+ destroy_id_handler_unlock(id_priv);
return ret;
}
@@ -2388,16 +2380,16 @@ static int iw_conn_req_handler(struct iw_cm_id *cm_id,
ret = rdma_translate_ip(laddr, &conn_id->id.route.addr.dev_addr);
if (ret) {
- mutex_unlock(&conn_id->handler_mutex);
- rdma_destroy_id(new_cm_id);
- goto out;
+ mutex_unlock(&listen_id->handler_mutex);
+ destroy_id_handler_unlock(conn_id);
+ return ret;
}
ret = cma_iw_acquire_dev(conn_id, listen_id);
if (ret) {
- mutex_unlock(&conn_id->handler_mutex);
- rdma_destroy_id(new_cm_id);
- goto out;
+ mutex_unlock(&listen_id->handler_mutex);
+ destroy_id_handler_unlock(conn_id);
+ return ret;
}
conn_id->cm_id.iw = cm_id;
@@ -2407,25 +2399,16 @@ static int iw_conn_req_handler(struct iw_cm_id *cm_id,
memcpy(cma_src_addr(conn_id), laddr, rdma_addr_size(laddr));
memcpy(cma_dst_addr(conn_id), raddr, rdma_addr_size(raddr));
- /*
- * Protect against the user destroying conn_id from another thread
- * until we're done accessing it.
- */
- cma_id_get(conn_id);
ret = cma_cm_event_handler(conn_id, &event);
if (ret) {
/* User wants to destroy the CM ID */
conn_id->cm_id.iw = NULL;
- cma_exch(conn_id, RDMA_CM_DESTROYING);
- mutex_unlock(&conn_id->handler_mutex);
mutex_unlock(&listen_id->handler_mutex);
- cma_id_put(conn_id);
- rdma_destroy_id(&conn_id->id);
+ destroy_id_handler_unlock(conn_id);
return ret;
}
mutex_unlock(&conn_id->handler_mutex);
- cma_id_put(conn_id);
out:
mutex_unlock(&listen_id->handler_mutex);
@@ -2482,6 +2465,10 @@ static int cma_listen_handler(struct rdma_cm_id *id,
{
struct rdma_id_private *id_priv = id->context;
+ /* Listening IDs are always destroyed on removal */
+ if (event->event == RDMA_CM_EVENT_DEVICE_REMOVAL)
+ return -1;
+
id->context = id_priv->id.context;
id->event_handler = id_priv->id.event_handler;
trace_cm_event_handler(id_priv, event);
@@ -2657,21 +2644,21 @@ static void cma_work_handler(struct work_struct *_work)
{
struct cma_work *work = container_of(_work, struct cma_work, work);
struct rdma_id_private *id_priv = work->id;
- int destroy = 0;
mutex_lock(&id_priv->handler_mutex);
if (!cma_comp_exch(id_priv, work->old_state, work->new_state))
- goto out;
+ goto out_unlock;
if (cma_cm_event_handler(id_priv, &work->event)) {
- cma_exch(id_priv, RDMA_CM_DESTROYING);
- destroy = 1;
+ cma_id_put(id_priv);
+ destroy_id_handler_unlock(id_priv);
+ goto out_free;
}
-out:
+
+out_unlock:
mutex_unlock(&id_priv->handler_mutex);
cma_id_put(id_priv);
- if (destroy)
- rdma_destroy_id(&id_priv->id);
+out_free:
kfree(work);
}
@@ -2679,23 +2666,22 @@ static void cma_ndev_work_handler(struct work_struct *_work)
{
struct cma_ndev_work *work = container_of(_work, struct cma_ndev_work, work);
struct rdma_id_private *id_priv = work->id;
- int destroy = 0;
mutex_lock(&id_priv->handler_mutex);
if (id_priv->state == RDMA_CM_DESTROYING ||
id_priv->state == RDMA_CM_DEVICE_REMOVAL)
- goto out;
+ goto out_unlock;
if (cma_cm_event_handler(id_priv, &work->event)) {
- cma_exch(id_priv, RDMA_CM_DESTROYING);
- destroy = 1;
+ cma_id_put(id_priv);
+ destroy_id_handler_unlock(id_priv);
+ goto out_free;
}
-out:
+out_unlock:
mutex_unlock(&id_priv->handler_mutex);
cma_id_put(id_priv);
- if (destroy)
- rdma_destroy_id(&id_priv->id);
+out_free:
kfree(work);
}
@@ -3171,9 +3157,7 @@ static void addr_handler(int status, struct sockaddr *src_addr,
event.event = RDMA_CM_EVENT_ADDR_RESOLVED;
if (cma_cm_event_handler(id_priv, &event)) {
- cma_exch(id_priv, RDMA_CM_DESTROYING);
- mutex_unlock(&id_priv->handler_mutex);
- rdma_destroy_id(&id_priv->id);
+ destroy_id_handler_unlock(id_priv);
return;
}
out:
@@ -3790,7 +3774,7 @@ static int cma_sidr_rep_handler(struct ib_cm_id *cm_id,
struct rdma_cm_event event = {};
const struct ib_cm_sidr_rep_event_param *rep =
&ib_event->param.sidr_rep_rcvd;
- int ret = 0;
+ int ret;
mutex_lock(&id_priv->handler_mutex);
if (id_priv->state != RDMA_CM_CONNECT)
@@ -3840,14 +3824,12 @@ static int cma_sidr_rep_handler(struct ib_cm_id *cm_id,
if (ret) {
/* Destroy the CM ID by returning a non-zero value. */
id_priv->cm_id.ib = NULL;
- cma_exch(id_priv, RDMA_CM_DESTROYING);
- mutex_unlock(&id_priv->handler_mutex);
- rdma_destroy_id(&id_priv->id);
+ destroy_id_handler_unlock(id_priv);
return ret;
}
out:
mutex_unlock(&id_priv->handler_mutex);
- return ret;
+ return 0;
}
static int cma_resolve_ib_udp(struct rdma_id_private *id_priv,
@@ -4372,9 +4354,7 @@ static int cma_ib_mc_handler(int status, struct ib_sa_multicast *multicast)
rdma_destroy_ah_attr(&event.param.ud.ah_attr);
if (ret) {
- cma_exch(id_priv, RDMA_CM_DESTROYING);
- mutex_unlock(&id_priv->handler_mutex);
- rdma_destroy_id(&id_priv->id);
+ destroy_id_handler_unlock(id_priv);
return 0;
}
@@ -4789,50 +4769,59 @@ free_cma_dev:
return ret;
}
-static int cma_remove_id_dev(struct rdma_id_private *id_priv)
+static void cma_send_device_removal_put(struct rdma_id_private *id_priv)
{
- struct rdma_cm_event event = {};
+ struct rdma_cm_event event = { .event = RDMA_CM_EVENT_DEVICE_REMOVAL };
enum rdma_cm_state state;
- int ret = 0;
-
- /* Record that we want to remove the device */
- state = cma_exch(id_priv, RDMA_CM_DEVICE_REMOVAL);
- if (state == RDMA_CM_DESTROYING)
- return 0;
+ unsigned long flags;
- cma_cancel_operation(id_priv, state);
mutex_lock(&id_priv->handler_mutex);
+ /* Record that we want to remove the device */
+ spin_lock_irqsave(&id_priv->lock, flags);
+ state = id_priv->state;
+ if (state == RDMA_CM_DESTROYING || state == RDMA_CM_DEVICE_REMOVAL) {
+ spin_unlock_irqrestore(&id_priv->lock, flags);
+ mutex_unlock(&id_priv->handler_mutex);
+ cma_id_put(id_priv);
+ return;
+ }
+ id_priv->state = RDMA_CM_DEVICE_REMOVAL;
+ spin_unlock_irqrestore(&id_priv->lock, flags);
- /* Check for destruction from another callback. */
- if (!cma_comp(id_priv, RDMA_CM_DEVICE_REMOVAL))
- goto out;
-
- event.event = RDMA_CM_EVENT_DEVICE_REMOVAL;
- ret = cma_cm_event_handler(id_priv, &event);
-out:
+ if (cma_cm_event_handler(id_priv, &event)) {
+ /*
+ * At this point the ULP promises it won't call
+ * rdma_destroy_id() concurrently
+ */
+ cma_id_put(id_priv);
+ mutex_unlock(&id_priv->handler_mutex);
+ trace_cm_id_destroy(id_priv);
+ _destroy_id(id_priv, state);
+ return;
+ }
mutex_unlock(&id_priv->handler_mutex);
- return ret;
+
+ /*
+ * If this races with destroy then the thread that first assigns state
+ * to a destroying does the cancel.
+ */
+ cma_cancel_operation(id_priv, state);
+ cma_id_put(id_priv);
}
static void cma_process_remove(struct cma_device *cma_dev)
{
- struct rdma_id_private *id_priv;
- int ret;
-
mutex_lock(&lock);
while (!list_empty(&cma_dev->id_list)) {
- id_priv = list_entry(cma_dev->id_list.next,
- struct rdma_id_private, list);
+ struct rdma_id_private *id_priv = list_first_entry(
+ &cma_dev->id_list, struct rdma_id_private, list);
list_del(&id_priv->listen_list);
list_del_init(&id_priv->list);
cma_id_get(id_priv);
mutex_unlock(&lock);
- ret = id_priv->internal_id ? 1 : cma_remove_id_dev(id_priv);
- cma_id_put(id_priv);
- if (ret)
- rdma_destroy_id(&id_priv->id);
+ cma_send_device_removal_put(id_priv);
mutex_lock(&lock);
}
diff --git a/drivers/infiniband/core/counters.c b/drivers/infiniband/core/counters.c
index 738d1faf4bba..636166880442 100644
--- a/drivers/infiniband/core/counters.c
+++ b/drivers/infiniband/core/counters.c
@@ -8,7 +8,7 @@
#include "core_priv.h"
#include "restrack.h"
-#define ALL_AUTO_MODE_MASKS (RDMA_COUNTER_MASK_QP_TYPE)
+#define ALL_AUTO_MODE_MASKS (RDMA_COUNTER_MASK_QP_TYPE | RDMA_COUNTER_MASK_PID)
static int __counter_set_mode(struct rdma_counter_mode *curr,
enum rdma_nl_counter_mode new_mode,
@@ -149,23 +149,13 @@ static bool auto_mode_match(struct ib_qp *qp, struct rdma_counter *counter,
struct auto_mode_param *param = &counter->mode.param;
bool match = true;
- /*
- * Ensure that counter belongs to the right PID. This operation can
- * race with user space which kills the process and leaves QP and
- * counters orphans.
- *
- * It is not a big deal because exitted task will leave both QP and
- * counter in the same bucket of zombie process. Just ensure that
- * process is still alive before procedding.
- *
- */
- if (task_pid_nr(counter->res.task) != task_pid_nr(qp->res.task) ||
- !task_pid_nr(qp->res.task))
- return false;
-
if (auto_mask & RDMA_COUNTER_MASK_QP_TYPE)
match &= (param->qp_type == qp->qp_type);
+ if (auto_mask & RDMA_COUNTER_MASK_PID)
+ match &= (task_pid_nr(counter->res.task) ==
+ task_pid_nr(qp->res.task));
+
return match;
}
@@ -288,7 +278,7 @@ int rdma_counter_bind_qp_auto(struct ib_qp *qp, u8 port)
struct rdma_counter *counter;
int ret;
- if (!qp->res.valid)
+ if (!qp->res.valid || rdma_is_kernel_res(&qp->res))
return 0;
if (!rdma_is_port_valid(dev, port))
@@ -483,7 +473,7 @@ int rdma_counter_bind_qpn(struct ib_device *dev, u8 port,
goto err;
}
- if (counter->res.task != qp->res.task) {
+ if (rdma_is_kernel_res(&counter->res) != rdma_is_kernel_res(&qp->res)) {
ret = -EINVAL;
goto err_task;
}
diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c
index 2927a9d16eaa..ef0cd2998671 100644
--- a/drivers/infiniband/core/device.c
+++ b/drivers/infiniband/core/device.c
@@ -272,7 +272,6 @@ static void ib_device_check_mandatory(struct ib_device *device)
} mandatory_table[] = {
IB_MANDATORY_FUNC(query_device),
IB_MANDATORY_FUNC(query_port),
- IB_MANDATORY_FUNC(query_pkey),
IB_MANDATORY_FUNC(alloc_pd),
IB_MANDATORY_FUNC(dealloc_pd),
IB_MANDATORY_FUNC(create_qp),
@@ -1343,6 +1342,10 @@ out:
return ret;
}
+static void prevent_dealloc_device(struct ib_device *ib_dev)
+{
+}
+
/**
* ib_register_device - Register an IB device with IB core
* @device: Device to register
@@ -1413,11 +1416,11 @@ int ib_register_device(struct ib_device *device, const char *name)
* possibility for a parallel unregistration along with this
* error flow. Since we have a refcount here we know any
* parallel flow is stopped in disable_device and will see the
- * NULL pointers, causing the responsibility to
+ * special dealloc_driver pointer, causing the responsibility to
* ib_dealloc_device() to revert back to this thread.
*/
dealloc_fn = device->ops.dealloc_driver;
- device->ops.dealloc_driver = NULL;
+ device->ops.dealloc_driver = prevent_dealloc_device;
ib_device_put(device);
__ib_unregister_device(device);
device->ops.dealloc_driver = dealloc_fn;
@@ -1466,7 +1469,8 @@ static void __ib_unregister_device(struct ib_device *ib_dev)
* Drivers using the new flow may not call ib_dealloc_device except
* in error unwind prior to registration success.
*/
- if (ib_dev->ops.dealloc_driver) {
+ if (ib_dev->ops.dealloc_driver &&
+ ib_dev->ops.dealloc_driver != prevent_dealloc_device) {
WARN_ON(kref_read(&ib_dev->dev.kobj.kref) <= 1);
ib_dealloc_device(ib_dev);
}
@@ -2361,6 +2365,9 @@ int ib_query_pkey(struct ib_device *device,
if (!rdma_is_port_valid(device, port_num))
return -EINVAL;
+ if (!device->ops.query_pkey)
+ return -EOPNOTSUPP;
+
return device->ops.query_pkey(device, port_num, index, pkey);
}
EXPORT_SYMBOL(ib_query_pkey);
@@ -2621,8 +2628,14 @@ void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops)
SET_DEVICE_OP(dev_ops, drain_rq);
SET_DEVICE_OP(dev_ops, drain_sq);
SET_DEVICE_OP(dev_ops, enable_driver);
- SET_DEVICE_OP(dev_ops, fill_res_entry);
- SET_DEVICE_OP(dev_ops, fill_stat_entry);
+ SET_DEVICE_OP(dev_ops, fill_res_cm_id_entry);
+ SET_DEVICE_OP(dev_ops, fill_res_cq_entry);
+ SET_DEVICE_OP(dev_ops, fill_res_cq_entry_raw);
+ SET_DEVICE_OP(dev_ops, fill_res_mr_entry);
+ SET_DEVICE_OP(dev_ops, fill_res_mr_entry_raw);
+ SET_DEVICE_OP(dev_ops, fill_res_qp_entry);
+ SET_DEVICE_OP(dev_ops, fill_res_qp_entry_raw);
+ SET_DEVICE_OP(dev_ops, fill_stat_mr_entry);
SET_DEVICE_OP(dev_ops, get_dev_fw_str);
SET_DEVICE_OP(dev_ops, get_dma_mr);
SET_DEVICE_OP(dev_ops, get_hw_stats);
@@ -2667,6 +2680,7 @@ void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops)
SET_DEVICE_OP(dev_ops, query_port);
SET_DEVICE_OP(dev_ops, query_qp);
SET_DEVICE_OP(dev_ops, query_srq);
+ SET_DEVICE_OP(dev_ops, query_ucontext);
SET_DEVICE_OP(dev_ops, rdma_netdev_get_params);
SET_DEVICE_OP(dev_ops, read_counters);
SET_DEVICE_OP(dev_ops, reg_dm_mr);
@@ -2679,10 +2693,12 @@ void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops)
SET_DEVICE_OP(dev_ops, set_vf_link_state);
SET_OBJ_SIZE(dev_ops, ib_ah);
+ SET_OBJ_SIZE(dev_ops, ib_counters);
SET_OBJ_SIZE(dev_ops, ib_cq);
SET_OBJ_SIZE(dev_ops, ib_pd);
SET_OBJ_SIZE(dev_ops, ib_srq);
SET_OBJ_SIZE(dev_ops, ib_ucontext);
+ SET_OBJ_SIZE(dev_ops, ib_xrcd);
}
EXPORT_SYMBOL(ib_set_device_ops);
diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c
index a09f8e3c7f3f..9355e521d9f4 100644
--- a/drivers/infiniband/core/mad.c
+++ b/drivers/infiniband/core/mad.c
@@ -402,7 +402,7 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device,
INIT_DELAYED_WORK(&mad_agent_priv->timed_work, timeout_sends);
INIT_LIST_HEAD(&mad_agent_priv->local_list);
INIT_WORK(&mad_agent_priv->local_work, local_completions);
- atomic_set(&mad_agent_priv->refcount, 1);
+ refcount_set(&mad_agent_priv->refcount, 1);
init_completion(&mad_agent_priv->comp);
ret2 = ib_mad_agent_security_setup(&mad_agent_priv->agent, qp_type);
@@ -484,7 +484,7 @@ EXPORT_SYMBOL(ib_register_mad_agent);
static inline void deref_mad_agent(struct ib_mad_agent_private *mad_agent_priv)
{
- if (atomic_dec_and_test(&mad_agent_priv->refcount))
+ if (refcount_dec_and_test(&mad_agent_priv->refcount))
complete(&mad_agent_priv->comp);
}
@@ -718,7 +718,7 @@ static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv,
* Reference MAD agent until receive
* side of local completion handled
*/
- atomic_inc(&mad_agent_priv->refcount);
+ refcount_inc(&mad_agent_priv->refcount);
} else
kfree(mad_priv);
break;
@@ -758,7 +758,7 @@ static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv,
local->return_wc_byte_len = mad_size;
}
/* Reference MAD agent until send side of local completion handled */
- atomic_inc(&mad_agent_priv->refcount);
+ refcount_inc(&mad_agent_priv->refcount);
/* Queue local completion to local list */
spin_lock_irqsave(&mad_agent_priv->lock, flags);
list_add_tail(&local->completion_list, &mad_agent_priv->local_list);
@@ -916,7 +916,7 @@ struct ib_mad_send_buf * ib_create_send_mad(struct ib_mad_agent *mad_agent,
}
mad_send_wr->send_buf.mad_agent = mad_agent;
- atomic_inc(&mad_agent_priv->refcount);
+ refcount_inc(&mad_agent_priv->refcount);
return &mad_send_wr->send_buf;
}
EXPORT_SYMBOL(ib_create_send_mad);
@@ -1131,7 +1131,7 @@ int ib_post_send_mad(struct ib_mad_send_buf *send_buf,
mad_send_wr->status = IB_WC_SUCCESS;
/* Reference MAD agent until send completes */
- atomic_inc(&mad_agent_priv->refcount);
+ refcount_inc(&mad_agent_priv->refcount);
spin_lock_irqsave(&mad_agent_priv->lock, flags);
list_add_tail(&mad_send_wr->agent_list,
&mad_agent_priv->send_list);
@@ -1148,7 +1148,7 @@ int ib_post_send_mad(struct ib_mad_send_buf *send_buf,
spin_lock_irqsave(&mad_agent_priv->lock, flags);
list_del(&mad_send_wr->agent_list);
spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
- atomic_dec(&mad_agent_priv->refcount);
+ deref_mad_agent(mad_agent_priv);
goto error;
}
}
@@ -1554,7 +1554,7 @@ find_mad_agent(struct ib_mad_port_private *port_priv,
hi_tid = be64_to_cpu(mad_hdr->tid) >> 32;
rcu_read_lock();
mad_agent = xa_load(&ib_mad_clients, hi_tid);
- if (mad_agent && !atomic_inc_not_zero(&mad_agent->refcount))
+ if (mad_agent && !refcount_inc_not_zero(&mad_agent->refcount))
mad_agent = NULL;
rcu_read_unlock();
} else {
@@ -1606,7 +1606,7 @@ find_mad_agent(struct ib_mad_port_private *port_priv,
}
}
if (mad_agent)
- atomic_inc(&mad_agent->refcount);
+ refcount_inc(&mad_agent->refcount);
out:
spin_unlock_irqrestore(&port_priv->reg_lock, flags);
}
@@ -1831,7 +1831,7 @@ static void ib_mad_complete_recv(struct ib_mad_agent_private *mad_agent_priv,
mad_agent_priv->agent.recv_handler(
&mad_agent_priv->agent, NULL,
mad_recv_wc);
- atomic_dec(&mad_agent_priv->refcount);
+ deref_mad_agent(mad_agent_priv);
} else {
/* not user rmpp, revert to normal behavior and
* drop the mad */
@@ -1848,7 +1848,7 @@ static void ib_mad_complete_recv(struct ib_mad_agent_private *mad_agent_priv,
&mad_agent_priv->agent,
&mad_send_wr->send_buf,
mad_recv_wc);
- atomic_dec(&mad_agent_priv->refcount);
+ deref_mad_agent(mad_agent_priv);
mad_send_wc.status = IB_WC_SUCCESS;
mad_send_wc.vendor_err = 0;
@@ -2438,7 +2438,7 @@ static void cancel_mads(struct ib_mad_agent_private *mad_agent_priv)
list_del(&mad_send_wr->agent_list);
mad_agent_priv->agent.send_handler(&mad_agent_priv->agent,
&mad_send_wc);
- atomic_dec(&mad_agent_priv->refcount);
+ deref_mad_agent(mad_agent_priv);
}
}
@@ -2572,7 +2572,7 @@ static void local_completions(struct work_struct *work)
&local->mad_send_wr->send_buf,
&local->mad_priv->header.recv_wc);
spin_lock_irqsave(&recv_mad_agent->lock, flags);
- atomic_dec(&recv_mad_agent->refcount);
+ deref_mad_agent(recv_mad_agent);
spin_unlock_irqrestore(&recv_mad_agent->lock, flags);
}
@@ -2585,7 +2585,7 @@ local_send_completion:
&mad_send_wc);
spin_lock_irqsave(&mad_agent_priv->lock, flags);
- atomic_dec(&mad_agent_priv->refcount);
+ deref_mad_agent(mad_agent_priv);
if (free_mad)
kfree(local->mad_priv);
kfree(local);
@@ -2671,7 +2671,7 @@ static void timeout_sends(struct work_struct *work)
mad_agent_priv->agent.send_handler(&mad_agent_priv->agent,
&mad_send_wc);
- atomic_dec(&mad_agent_priv->refcount);
+ deref_mad_agent(mad_agent_priv);
spin_lock_irqsave(&mad_agent_priv->lock, flags);
}
spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
diff --git a/drivers/infiniband/core/mad_priv.h b/drivers/infiniband/core/mad_priv.h
index 403d8673a2f9..4aa16b35dad0 100644
--- a/drivers/infiniband/core/mad_priv.h
+++ b/drivers/infiniband/core/mad_priv.h
@@ -103,7 +103,7 @@ struct ib_mad_agent_private {
struct work_struct local_work;
struct list_head rmpp_list;
- atomic_t refcount;
+ refcount_t refcount;
union {
struct completion comp;
struct rcu_head rcu;
diff --git a/drivers/infiniband/core/mad_rmpp.c b/drivers/infiniband/core/mad_rmpp.c
index 5ec57abc0849..e0573e4d0404 100644
--- a/drivers/infiniband/core/mad_rmpp.c
+++ b/drivers/infiniband/core/mad_rmpp.c
@@ -40,8 +40,7 @@
enum rmpp_state {
RMPP_STATE_ACTIVE,
RMPP_STATE_TIMEOUT,
- RMPP_STATE_COMPLETE,
- RMPP_STATE_CANCELING
+ RMPP_STATE_COMPLETE
};
struct mad_rmpp_recv {
@@ -52,7 +51,7 @@ struct mad_rmpp_recv {
struct completion comp;
enum rmpp_state state;
spinlock_t lock;
- atomic_t refcount;
+ refcount_t refcount;
struct ib_ah *ah;
struct ib_mad_recv_wc *rmpp_wc;
@@ -73,7 +72,7 @@ struct mad_rmpp_recv {
static inline void deref_rmpp_recv(struct mad_rmpp_recv *rmpp_recv)
{
- if (atomic_dec_and_test(&rmpp_recv->refcount))
+ if (refcount_dec_and_test(&rmpp_recv->refcount))
complete(&rmpp_recv->comp);
}
@@ -92,22 +91,18 @@ void ib_cancel_rmpp_recvs(struct ib_mad_agent_private *agent)
spin_lock_irqsave(&agent->lock, flags);
list_for_each_entry(rmpp_recv, &agent->rmpp_list, list) {
- if (rmpp_recv->state != RMPP_STATE_COMPLETE)
- ib_free_recv_mad(rmpp_recv->rmpp_wc);
- rmpp_recv->state = RMPP_STATE_CANCELING;
- }
- spin_unlock_irqrestore(&agent->lock, flags);
-
- list_for_each_entry(rmpp_recv, &agent->rmpp_list, list) {
cancel_delayed_work(&rmpp_recv->timeout_work);
cancel_delayed_work(&rmpp_recv->cleanup_work);
}
+ spin_unlock_irqrestore(&agent->lock, flags);
flush_workqueue(agent->qp_info->port_priv->wq);
list_for_each_entry_safe(rmpp_recv, temp_rmpp_recv,
&agent->rmpp_list, list) {
list_del(&rmpp_recv->list);
+ if (rmpp_recv->state != RMPP_STATE_COMPLETE)
+ ib_free_recv_mad(rmpp_recv->rmpp_wc);
destroy_rmpp_recv(rmpp_recv);
}
}
@@ -272,10 +267,6 @@ static void recv_cleanup_handler(struct work_struct *work)
unsigned long flags;
spin_lock_irqsave(&rmpp_recv->agent->lock, flags);
- if (rmpp_recv->state == RMPP_STATE_CANCELING) {
- spin_unlock_irqrestore(&rmpp_recv->agent->lock, flags);
- return;
- }
list_del(&rmpp_recv->list);
spin_unlock_irqrestore(&rmpp_recv->agent->lock, flags);
destroy_rmpp_recv(rmpp_recv);
@@ -305,7 +296,7 @@ create_rmpp_recv(struct ib_mad_agent_private *agent,
INIT_DELAYED_WORK(&rmpp_recv->cleanup_work, recv_cleanup_handler);
spin_lock_init(&rmpp_recv->lock);
rmpp_recv->state = RMPP_STATE_ACTIVE;
- atomic_set(&rmpp_recv->refcount, 1);
+ refcount_set(&rmpp_recv->refcount, 1);
rmpp_recv->rmpp_wc = mad_recv_wc;
rmpp_recv->cur_seg_buf = &mad_recv_wc->recv_buf;
@@ -357,7 +348,7 @@ acquire_rmpp_recv(struct ib_mad_agent_private *agent,
spin_lock_irqsave(&agent->lock, flags);
rmpp_recv = find_rmpp_recv(agent, mad_recv_wc);
if (rmpp_recv)
- atomic_inc(&rmpp_recv->refcount);
+ refcount_inc(&rmpp_recv->refcount);
spin_unlock_irqrestore(&agent->lock, flags);
return rmpp_recv;
}
@@ -553,7 +544,7 @@ start_rmpp(struct ib_mad_agent_private *agent,
destroy_rmpp_recv(rmpp_recv);
return continue_rmpp(agent, mad_recv_wc);
}
- atomic_inc(&rmpp_recv->refcount);
+ refcount_inc(&rmpp_recv->refcount);
if (get_last_flag(&mad_recv_wc->recv_buf)) {
rmpp_recv->state = RMPP_STATE_COMPLETE;
diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c
index e16105be2eb2..12d29d54a081 100644
--- a/drivers/infiniband/core/nldev.c
+++ b/drivers/infiniband/core/nldev.c
@@ -114,6 +114,7 @@ static const struct nla_policy nldev_policy[RDMA_NLDEV_ATTR_MAX] = {
[RDMA_NLDEV_ATTR_RES_PS] = { .type = NLA_U32 },
[RDMA_NLDEV_ATTR_RES_QP] = { .type = NLA_NESTED },
[RDMA_NLDEV_ATTR_RES_QP_ENTRY] = { .type = NLA_NESTED },
+ [RDMA_NLDEV_ATTR_RES_RAW] = { .type = NLA_BINARY },
[RDMA_NLDEV_ATTR_RES_RKEY] = { .type = NLA_U32 },
[RDMA_NLDEV_ATTR_RES_RQPN] = { .type = NLA_U32 },
[RDMA_NLDEV_ATTR_RES_RQ_PSN] = { .type = NLA_U32 },
@@ -446,27 +447,11 @@ static int fill_res_name_pid(struct sk_buff *msg,
return err ? -EMSGSIZE : 0;
}
-static bool fill_res_entry(struct ib_device *dev, struct sk_buff *msg,
- struct rdma_restrack_entry *res)
+static int fill_res_qp_entry_query(struct sk_buff *msg,
+ struct rdma_restrack_entry *res,
+ struct ib_device *dev,
+ struct ib_qp *qp)
{
- if (!dev->ops.fill_res_entry)
- return false;
- return dev->ops.fill_res_entry(msg, res);
-}
-
-static bool fill_stat_entry(struct ib_device *dev, struct sk_buff *msg,
- struct rdma_restrack_entry *res)
-{
- if (!dev->ops.fill_stat_entry)
- return false;
- return dev->ops.fill_stat_entry(msg, res);
-}
-
-static int fill_res_qp_entry(struct sk_buff *msg, bool has_cap_net_admin,
- struct rdma_restrack_entry *res, uint32_t port)
-{
- struct ib_qp *qp = container_of(res, struct ib_qp, res);
- struct ib_device *dev = qp->device;
struct ib_qp_init_attr qp_init_attr;
struct ib_qp_attr qp_attr;
int ret;
@@ -475,16 +460,6 @@ static int fill_res_qp_entry(struct sk_buff *msg, bool has_cap_net_admin,
if (ret)
return ret;
- if (port && port != qp_attr.port_num)
- return -EAGAIN;
-
- /* In create_qp() port is not set yet */
- if (qp_attr.port_num &&
- nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, qp_attr.port_num))
- goto err;
-
- if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LQPN, qp->qp_num))
- goto err;
if (qp->qp_type == IB_QPT_RC || qp->qp_type == IB_QPT_UC) {
if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_RQPN,
qp_attr.dest_qp_num))
@@ -508,19 +483,53 @@ static int fill_res_qp_entry(struct sk_buff *msg, bool has_cap_net_admin,
if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_STATE, qp_attr.qp_state))
goto err;
+ if (dev->ops.fill_res_qp_entry)
+ return dev->ops.fill_res_qp_entry(msg, qp);
+ return 0;
+
+err: return -EMSGSIZE;
+}
+
+static int fill_res_qp_entry(struct sk_buff *msg, bool has_cap_net_admin,
+ struct rdma_restrack_entry *res, uint32_t port)
+{
+ struct ib_qp *qp = container_of(res, struct ib_qp, res);
+ struct ib_device *dev = qp->device;
+ int ret;
+
+ if (port && port != qp->port)
+ return -EAGAIN;
+
+ /* In create_qp() port is not set yet */
+ if (qp->port && nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, qp->port))
+ return -EINVAL;
+
+ ret = nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LQPN, qp->qp_num);
+ if (ret)
+ return -EMSGSIZE;
+
if (!rdma_is_kernel_res(res) &&
nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PDN, qp->pd->res.id))
- goto err;
+ return -EMSGSIZE;
- if (fill_res_name_pid(msg, res))
- goto err;
+ ret = fill_res_name_pid(msg, res);
+ if (ret)
+ return -EMSGSIZE;
- if (fill_res_entry(dev, msg, res))
- goto err;
+ return fill_res_qp_entry_query(msg, res, dev, qp);
+}
- return 0;
+static int fill_res_qp_raw_entry(struct sk_buff *msg, bool has_cap_net_admin,
+ struct rdma_restrack_entry *res, uint32_t port)
+{
+ struct ib_qp *qp = container_of(res, struct ib_qp, res);
+ struct ib_device *dev = qp->device;
-err: return -EMSGSIZE;
+ if (port && port != qp->port)
+ return -EAGAIN;
+ if (!dev->ops.fill_res_qp_entry_raw)
+ return -EINVAL;
+ return dev->ops.fill_res_qp_entry_raw(msg, qp);
}
static int fill_res_cm_id_entry(struct sk_buff *msg, bool has_cap_net_admin,
@@ -568,9 +577,8 @@ static int fill_res_cm_id_entry(struct sk_buff *msg, bool has_cap_net_admin,
if (fill_res_name_pid(msg, res))
goto err;
- if (fill_res_entry(dev, msg, res))
- goto err;
-
+ if (dev->ops.fill_res_cm_id_entry)
+ return dev->ops.fill_res_cm_id_entry(msg, cm_id);
return 0;
err: return -EMSGSIZE;
@@ -583,35 +591,42 @@ static int fill_res_cq_entry(struct sk_buff *msg, bool has_cap_net_admin,
struct ib_device *dev = cq->device;
if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_CQE, cq->cqe))
- goto err;
+ return -EMSGSIZE;
if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_RES_USECNT,
atomic_read(&cq->usecnt), RDMA_NLDEV_ATTR_PAD))
- goto err;
+ return -EMSGSIZE;
/* Poll context is only valid for kernel CQs */
if (rdma_is_kernel_res(res) &&
nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_POLL_CTX, cq->poll_ctx))
- goto err;
+ return -EMSGSIZE;
if (nla_put_u8(msg, RDMA_NLDEV_ATTR_DEV_DIM, (cq->dim != NULL)))
- goto err;
+ return -EMSGSIZE;
if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_CQN, res->id))
- goto err;
+ return -EMSGSIZE;
if (!rdma_is_kernel_res(res) &&
nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_CTXN,
cq->uobject->uevent.uobject.context->res.id))
- goto err;
+ return -EMSGSIZE;
if (fill_res_name_pid(msg, res))
- goto err;
+ return -EMSGSIZE;
- if (fill_res_entry(dev, msg, res))
- goto err;
+ return (dev->ops.fill_res_cq_entry) ?
+ dev->ops.fill_res_cq_entry(msg, cq) : 0;
+}
- return 0;
+static int fill_res_cq_raw_entry(struct sk_buff *msg, bool has_cap_net_admin,
+ struct rdma_restrack_entry *res, uint32_t port)
+{
+ struct ib_cq *cq = container_of(res, struct ib_cq, res);
+ struct ib_device *dev = cq->device;
-err: return -EMSGSIZE;
+ if (!dev->ops.fill_res_cq_entry_raw)
+ return -EINVAL;
+ return dev->ops.fill_res_cq_entry_raw(msg, cq);
}
static int fill_res_mr_entry(struct sk_buff *msg, bool has_cap_net_admin,
@@ -622,38 +637,45 @@ static int fill_res_mr_entry(struct sk_buff *msg, bool has_cap_net_admin,
if (has_cap_net_admin) {
if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_RKEY, mr->rkey))
- goto err;
+ return -EMSGSIZE;
if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LKEY, mr->lkey))
- goto err;
+ return -EMSGSIZE;
}
if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_RES_MRLEN, mr->length,
RDMA_NLDEV_ATTR_PAD))
- goto err;
+ return -EMSGSIZE;
if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_MRN, res->id))
- goto err;
+ return -EMSGSIZE;
if (!rdma_is_kernel_res(res) &&
nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PDN, mr->pd->res.id))
- goto err;
+ return -EMSGSIZE;
if (fill_res_name_pid(msg, res))
- goto err;
+ return -EMSGSIZE;
- if (fill_res_entry(dev, msg, res))
- goto err;
+ return (dev->ops.fill_res_mr_entry) ?
+ dev->ops.fill_res_mr_entry(msg, mr) :
+ 0;
+}
- return 0;
+static int fill_res_mr_raw_entry(struct sk_buff *msg, bool has_cap_net_admin,
+ struct rdma_restrack_entry *res, uint32_t port)
+{
+ struct ib_mr *mr = container_of(res, struct ib_mr, res);
+ struct ib_device *dev = mr->pd->device;
-err: return -EMSGSIZE;
+ if (!dev->ops.fill_res_mr_entry_raw)
+ return -EINVAL;
+ return dev->ops.fill_res_mr_entry_raw(msg, mr);
}
static int fill_res_pd_entry(struct sk_buff *msg, bool has_cap_net_admin,
struct rdma_restrack_entry *res, uint32_t port)
{
struct ib_pd *pd = container_of(res, struct ib_pd, res);
- struct ib_device *dev = pd->device;
if (has_cap_net_admin) {
if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LOCAL_DMA_LKEY,
@@ -676,13 +698,7 @@ static int fill_res_pd_entry(struct sk_buff *msg, bool has_cap_net_admin,
pd->uobject->context->res.id))
goto err;
- if (fill_res_name_pid(msg, res))
- goto err;
-
- if (fill_res_entry(dev, msg, res))
- goto err;
-
- return 0;
+ return fill_res_name_pid(msg, res);
err: return -EMSGSIZE;
}
@@ -695,11 +711,16 @@ static int fill_stat_counter_mode(struct sk_buff *msg,
if (nla_put_u32(msg, RDMA_NLDEV_ATTR_STAT_MODE, m->mode))
return -EMSGSIZE;
- if (m->mode == RDMA_COUNTER_MODE_AUTO)
+ if (m->mode == RDMA_COUNTER_MODE_AUTO) {
if ((m->mask & RDMA_COUNTER_MASK_QP_TYPE) &&
nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_TYPE, m->param.qp_type))
return -EMSGSIZE;
+ if ((m->mask & RDMA_COUNTER_MASK_PID) &&
+ fill_res_name_pid(msg, &counter->res))
+ return -EMSGSIZE;
+ }
+
return 0;
}
@@ -738,9 +759,6 @@ static int fill_stat_counter_qps(struct sk_buff *msg,
xa_lock(&rt->xa);
xa_for_each(&rt->xa, id, res) {
qp = container_of(res, struct ib_qp, res);
- if (qp->qp_type == IB_QPT_RAW_PACKET && !capable(CAP_NET_RAW))
- continue;
-
if (!qp->counter || (qp->counter->id != counter->id))
continue;
@@ -793,9 +811,8 @@ static int fill_stat_mr_entry(struct sk_buff *msg, bool has_cap_net_admin,
if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_MRN, res->id))
goto err;
- if (fill_stat_entry(dev, msg, res))
- goto err;
-
+ if (dev->ops.fill_stat_mr_entry)
+ return dev->ops.fill_stat_mr_entry(msg, mr);
return 0;
err:
@@ -840,7 +857,6 @@ static int fill_res_counter_entry(struct sk_buff *msg, bool has_cap_net_admin,
if (nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, counter->port) ||
nla_put_u32(msg, RDMA_NLDEV_ATTR_STAT_COUNTER_ID, counter->id) ||
- fill_res_name_pid(msg, &counter->res) ||
fill_stat_counter_mode(msg, counter) ||
fill_stat_counter_qps(msg, counter) ||
fill_stat_counter_hwcounters(msg, counter))
@@ -1177,7 +1193,6 @@ static int nldev_res_get_dumpit(struct sk_buff *skb,
struct nldev_fill_res_entry {
enum rdma_nldev_attr nldev_attr;
- enum rdma_nldev_command nldev_cmd;
u8 flags;
u32 entry;
u32 id;
@@ -1189,40 +1204,34 @@ enum nldev_res_flags {
static const struct nldev_fill_res_entry fill_entries[RDMA_RESTRACK_MAX] = {
[RDMA_RESTRACK_QP] = {
- .nldev_cmd = RDMA_NLDEV_CMD_RES_QP_GET,
.nldev_attr = RDMA_NLDEV_ATTR_RES_QP,
.entry = RDMA_NLDEV_ATTR_RES_QP_ENTRY,
.id = RDMA_NLDEV_ATTR_RES_LQPN,
},
[RDMA_RESTRACK_CM_ID] = {
- .nldev_cmd = RDMA_NLDEV_CMD_RES_CM_ID_GET,
.nldev_attr = RDMA_NLDEV_ATTR_RES_CM_ID,
.entry = RDMA_NLDEV_ATTR_RES_CM_ID_ENTRY,
.id = RDMA_NLDEV_ATTR_RES_CM_IDN,
},
[RDMA_RESTRACK_CQ] = {
- .nldev_cmd = RDMA_NLDEV_CMD_RES_CQ_GET,
.nldev_attr = RDMA_NLDEV_ATTR_RES_CQ,
.flags = NLDEV_PER_DEV,
.entry = RDMA_NLDEV_ATTR_RES_CQ_ENTRY,
.id = RDMA_NLDEV_ATTR_RES_CQN,
},
[RDMA_RESTRACK_MR] = {
- .nldev_cmd = RDMA_NLDEV_CMD_RES_MR_GET,
.nldev_attr = RDMA_NLDEV_ATTR_RES_MR,
.flags = NLDEV_PER_DEV,
.entry = RDMA_NLDEV_ATTR_RES_MR_ENTRY,
.id = RDMA_NLDEV_ATTR_RES_MRN,
},
[RDMA_RESTRACK_PD] = {
- .nldev_cmd = RDMA_NLDEV_CMD_RES_PD_GET,
.nldev_attr = RDMA_NLDEV_ATTR_RES_PD,
.flags = NLDEV_PER_DEV,
.entry = RDMA_NLDEV_ATTR_RES_PD_ENTRY,
.id = RDMA_NLDEV_ATTR_RES_PDN,
},
[RDMA_RESTRACK_COUNTER] = {
- .nldev_cmd = RDMA_NLDEV_CMD_STAT_GET,
.nldev_attr = RDMA_NLDEV_ATTR_STAT_COUNTER,
.entry = RDMA_NLDEV_ATTR_STAT_COUNTER_ENTRY,
.id = RDMA_NLDEV_ATTR_STAT_COUNTER_ID,
@@ -1281,7 +1290,8 @@ static int res_get_common_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
}
nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
- RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, fe->nldev_cmd),
+ RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
+ RDMA_NL_GET_OP(nlh->nlmsg_type)),
0, 0);
if (fill_nldev_handle(msg, device)) {
@@ -1359,7 +1369,8 @@ static int res_get_common_dumpit(struct sk_buff *skb,
}
nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
- RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, fe->nldev_cmd),
+ RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
+ RDMA_NL_GET_OP(cb->nlh->nlmsg_type)),
0, NLM_F_MULTI);
if (fill_nldev_handle(skb, device)) {
@@ -1441,26 +1452,29 @@ err_index:
return ret;
}
-#define RES_GET_FUNCS(name, type) \
- static int nldev_res_get_##name##_dumpit(struct sk_buff *skb, \
+#define RES_GET_FUNCS(name, type) \
+ static int nldev_res_get_##name##_dumpit(struct sk_buff *skb, \
struct netlink_callback *cb) \
- { \
- return res_get_common_dumpit(skb, cb, type, \
- fill_res_##name##_entry); \
- } \
- static int nldev_res_get_##name##_doit(struct sk_buff *skb, \
- struct nlmsghdr *nlh, \
+ { \
+ return res_get_common_dumpit(skb, cb, type, \
+ fill_res_##name##_entry); \
+ } \
+ static int nldev_res_get_##name##_doit(struct sk_buff *skb, \
+ struct nlmsghdr *nlh, \
struct netlink_ext_ack *extack) \
- { \
- return res_get_common_doit(skb, nlh, extack, type, \
- fill_res_##name##_entry); \
+ { \
+ return res_get_common_doit(skb, nlh, extack, type, \
+ fill_res_##name##_entry); \
}
RES_GET_FUNCS(qp, RDMA_RESTRACK_QP);
+RES_GET_FUNCS(qp_raw, RDMA_RESTRACK_QP);
RES_GET_FUNCS(cm_id, RDMA_RESTRACK_CM_ID);
RES_GET_FUNCS(cq, RDMA_RESTRACK_CQ);
+RES_GET_FUNCS(cq_raw, RDMA_RESTRACK_CQ);
RES_GET_FUNCS(pd, RDMA_RESTRACK_PD);
RES_GET_FUNCS(mr, RDMA_RESTRACK_MR);
+RES_GET_FUNCS(mr_raw, RDMA_RESTRACK_MR);
RES_GET_FUNCS(counter, RDMA_RESTRACK_COUNTER);
static LIST_HEAD(link_ops);
@@ -2145,6 +2159,21 @@ static const struct rdma_nl_cbs nldev_cb_table[RDMA_NLDEV_NUM_OPS] = {
.doit = nldev_stat_del_doit,
.flags = RDMA_NL_ADMIN_PERM,
},
+ [RDMA_NLDEV_CMD_RES_QP_GET_RAW] = {
+ .doit = nldev_res_get_qp_raw_doit,
+ .dump = nldev_res_get_qp_raw_dumpit,
+ .flags = RDMA_NL_ADMIN_PERM,
+ },
+ [RDMA_NLDEV_CMD_RES_CQ_GET_RAW] = {
+ .doit = nldev_res_get_cq_raw_doit,
+ .dump = nldev_res_get_cq_raw_dumpit,
+ .flags = RDMA_NL_ADMIN_PERM,
+ },
+ [RDMA_NLDEV_CMD_RES_MR_GET_RAW] = {
+ .doit = nldev_res_get_mr_raw_doit,
+ .dump = nldev_res_get_mr_raw_dumpit,
+ .flags = RDMA_NL_ADMIN_PERM,
+ },
};
void __init nldev_init(void)
diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c
index defe9cd4c5ee..c11e50510e49 100644
--- a/drivers/infiniband/core/sysfs.c
+++ b/drivers/infiniband/core/sysfs.c
@@ -58,7 +58,7 @@ struct ib_port {
struct ib_device *ibdev;
struct gid_attr_group *gid_attr_group;
struct attribute_group gid_group;
- struct attribute_group pkey_group;
+ struct attribute_group *pkey_group;
struct attribute_group *pma_table;
struct attribute_group *hw_stats_ag;
struct rdma_hw_stats *hw_stats;
@@ -681,11 +681,16 @@ static void ib_port_release(struct kobject *kobj)
kfree(p->gid_group.attrs);
}
- if (p->pkey_group.attrs) {
- for (i = 0; (a = p->pkey_group.attrs[i]); ++i)
- kfree(a);
+ if (p->pkey_group) {
+ if (p->pkey_group->attrs) {
+ for (i = 0; (a = p->pkey_group->attrs[i]); ++i)
+ kfree(a);
+
+ kfree(p->pkey_group->attrs);
+ }
- kfree(p->pkey_group.attrs);
+ kfree(p->pkey_group);
+ p->pkey_group = NULL;
}
kfree(p);
@@ -1118,17 +1123,26 @@ static int add_port(struct ib_core_device *coredev, int port_num)
if (ret)
goto err_free_gid_type;
- p->pkey_group.name = "pkeys";
- p->pkey_group.attrs = alloc_group_attrs(show_port_pkey,
- attr.pkey_tbl_len);
- if (!p->pkey_group.attrs) {
- ret = -ENOMEM;
- goto err_remove_gid_type;
+ if (attr.pkey_tbl_len) {
+ p->pkey_group = kzalloc(sizeof(*p->pkey_group), GFP_KERNEL);
+ if (!p->pkey_group) {
+ ret = -ENOMEM;
+ goto err_remove_gid_type;
+ }
+
+ p->pkey_group->name = "pkeys";
+ p->pkey_group->attrs = alloc_group_attrs(show_port_pkey,
+ attr.pkey_tbl_len);
+ if (!p->pkey_group->attrs) {
+ ret = -ENOMEM;
+ goto err_free_pkey_group;
+ }
+
+ ret = sysfs_create_group(&p->kobj, p->pkey_group);
+ if (ret)
+ goto err_free_pkey;
}
- ret = sysfs_create_group(&p->kobj, &p->pkey_group);
- if (ret)
- goto err_free_pkey;
if (device->ops.init_port && is_full_dev) {
ret = device->ops.init_port(device, port_num, &p->kobj);
@@ -1150,14 +1164,20 @@ static int add_port(struct ib_core_device *coredev, int port_num)
return 0;
err_remove_pkey:
- sysfs_remove_group(&p->kobj, &p->pkey_group);
+ if (p->pkey_group)
+ sysfs_remove_group(&p->kobj, p->pkey_group);
err_free_pkey:
- for (i = 0; i < attr.pkey_tbl_len; ++i)
- kfree(p->pkey_group.attrs[i]);
+ if (p->pkey_group) {
+ for (i = 0; i < attr.pkey_tbl_len; ++i)
+ kfree(p->pkey_group->attrs[i]);
+
+ kfree(p->pkey_group->attrs);
+ p->pkey_group->attrs = NULL;
+ }
- kfree(p->pkey_group.attrs);
- p->pkey_group.attrs = NULL;
+err_free_pkey_group:
+ kfree(p->pkey_group);
err_remove_gid_type:
sysfs_remove_group(&p->gid_attr_group->kobj,
@@ -1317,7 +1337,8 @@ void ib_free_port_attrs(struct ib_core_device *coredev)
if (port->pma_table)
sysfs_remove_group(p, port->pma_table);
- sysfs_remove_group(p, &port->pkey_group);
+ if (port->pkey_group)
+ sysfs_remove_group(p, port->pkey_group);
sysfs_remove_group(p, &port->gid_group);
sysfs_remove_group(&port->gid_attr_group->kobj,
&port->gid_attr_group->ndev);
diff --git a/drivers/infiniband/core/trace.c b/drivers/infiniband/core/trace.c
index 6c3514beac4d..31e7860d35bf 100644
--- a/drivers/infiniband/core/trace.c
+++ b/drivers/infiniband/core/trace.c
@@ -9,6 +9,4 @@
#define CREATE_TRACE_POINTS
-#include <rdma/ib_verbs.h>
-
#include <trace/events/rdma_core.h>
diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c
index 82455a1392f1..831bff8d52e5 100644
--- a/drivers/infiniband/core/umem.c
+++ b/drivers/infiniband/core/umem.c
@@ -261,6 +261,7 @@ struct ib_umem *ib_umem_get(struct ib_device *device, unsigned long addr,
sg = umem->sg_head.sgl;
while (npages) {
+ cond_resched();
ret = pin_user_pages_fast(cur_base,
min_t(unsigned long, npages,
PAGE_SIZE /
diff --git a/drivers/infiniband/core/umem_odp.c b/drivers/infiniband/core/umem_odp.c
index ccd28405451c..5e32f61a2fe4 100644
--- a/drivers/infiniband/core/umem_odp.c
+++ b/drivers/infiniband/core/umem_odp.c
@@ -152,6 +152,7 @@ EXPORT_SYMBOL(ib_umem_odp_alloc_implicit);
* ib_alloc_implicit_odp_umem()
* @addr: The starting userspace VA
* @size: The length of the userspace VA
+ * @ops: MMU interval ops, currently only @invalidate
*/
struct ib_umem_odp *
ib_umem_odp_alloc_child(struct ib_umem_odp *root, unsigned long addr,
@@ -213,6 +214,7 @@ EXPORT_SYMBOL(ib_umem_odp_alloc_child);
* @addr: userspace virtual address to start at
* @size: length of region to pin
* @access: IB_ACCESS_xxx flags for memory being pinned
+ * @ops: MMU interval ops, currently only @invalidate
*
* The driver should use when the access flags indicate ODP memory. It avoids
* pinning, instead, stores the mm for future page fault handling in
diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
index 76e7ec0f0775..2fbc583d5bdd 100644
--- a/drivers/infiniband/core/uverbs_cmd.c
+++ b/drivers/infiniband/core/uverbs_cmd.c
@@ -415,8 +415,8 @@ static int ib_uverbs_query_port(struct uverbs_attr_bundle *attrs)
static int ib_uverbs_alloc_pd(struct uverbs_attr_bundle *attrs)
{
+ struct ib_uverbs_alloc_pd_resp resp = {};
struct ib_uverbs_alloc_pd cmd;
- struct ib_uverbs_alloc_pd_resp resp;
struct ib_uobject *uobj;
struct ib_pd *pd;
int ret;
@@ -438,29 +438,20 @@ static int ib_uverbs_alloc_pd(struct uverbs_attr_bundle *attrs)
pd->device = ib_dev;
pd->uobject = uobj;
- pd->__internal_mr = NULL;
atomic_set(&pd->usecnt, 0);
pd->res.type = RDMA_RESTRACK_PD;
ret = ib_dev->ops.alloc_pd(pd, &attrs->driver_udata);
if (ret)
goto err_alloc;
-
- uobj->object = pd;
- memset(&resp, 0, sizeof resp);
- resp.pd_handle = uobj->id;
rdma_restrack_uadd(&pd->res);
- ret = uverbs_response(attrs, &resp, sizeof(resp));
- if (ret)
- goto err_copy;
+ uobj->object = pd;
+ uobj_finalize_uobj_create(uobj, attrs);
- rdma_alloc_commit_uobject(uobj, attrs);
- return 0;
+ resp.pd_handle = uobj->id;
+ return uverbs_response(attrs, &resp, sizeof(resp));
-err_copy:
- ib_dealloc_pd_user(pd, uverbs_get_cleared_udata(attrs));
- pd = NULL;
err_alloc:
kfree(pd);
err:
@@ -568,15 +559,15 @@ static void xrcd_table_delete(struct ib_uverbs_device *dev,
static int ib_uverbs_open_xrcd(struct uverbs_attr_bundle *attrs)
{
struct ib_uverbs_device *ibudev = attrs->ufile->device;
+ struct ib_uverbs_open_xrcd_resp resp = {};
struct ib_uverbs_open_xrcd cmd;
- struct ib_uverbs_open_xrcd_resp resp;
struct ib_uxrcd_object *obj;
struct ib_xrcd *xrcd = NULL;
- struct fd f = {NULL, 0};
struct inode *inode = NULL;
- int ret = 0;
int new_xrcd = 0;
struct ib_device *ib_dev;
+ struct fd f = {};
+ int ret;
ret = uverbs_request(attrs, &cmd, sizeof(cmd));
if (ret)
@@ -614,24 +605,16 @@ static int ib_uverbs_open_xrcd(struct uverbs_attr_bundle *attrs)
}
if (!xrcd) {
- xrcd = ib_dev->ops.alloc_xrcd(ib_dev, &attrs->driver_udata);
+ xrcd = ib_alloc_xrcd_user(ib_dev, inode, &attrs->driver_udata);
if (IS_ERR(xrcd)) {
ret = PTR_ERR(xrcd);
goto err;
}
-
- xrcd->inode = inode;
- xrcd->device = ib_dev;
- atomic_set(&xrcd->usecnt, 0);
- mutex_init(&xrcd->tgt_qp_mutex);
- INIT_LIST_HEAD(&xrcd->tgt_qp_list);
new_xrcd = 1;
}
atomic_set(&obj->refcnt, 0);
obj->uobject.object = xrcd;
- memset(&resp, 0, sizeof resp);
- resp.xrcd_handle = obj->uobject.id;
if (inode) {
if (new_xrcd) {
@@ -643,27 +626,17 @@ static int ib_uverbs_open_xrcd(struct uverbs_attr_bundle *attrs)
atomic_inc(&xrcd->usecnt);
}
- ret = uverbs_response(attrs, &resp, sizeof(resp));
- if (ret)
- goto err_copy;
-
if (f.file)
fdput(f);
mutex_unlock(&ibudev->xrcd_tree_mutex);
+ uobj_finalize_uobj_create(&obj->uobject, attrs);
- rdma_alloc_commit_uobject(&obj->uobject, attrs);
- return 0;
-
-err_copy:
- if (inode) {
- if (new_xrcd)
- xrcd_table_delete(ibudev, inode);
- atomic_dec(&xrcd->usecnt);
- }
+ resp.xrcd_handle = obj->uobject.id;
+ return uverbs_response(attrs, &resp, sizeof(resp));
err_dealloc_xrcd:
- ib_dealloc_xrcd(xrcd, uverbs_get_cleared_udata(attrs));
+ ib_dealloc_xrcd_user(xrcd, uverbs_get_cleared_udata(attrs));
err:
uobj_alloc_abort(&obj->uobject, attrs);
@@ -701,7 +674,7 @@ int ib_uverbs_dealloc_xrcd(struct ib_uobject *uobject, struct ib_xrcd *xrcd,
if (inode && !atomic_dec_and_test(&xrcd->usecnt))
return 0;
- ret = ib_dealloc_xrcd(xrcd, &attrs->driver_udata);
+ ret = ib_dealloc_xrcd_user(xrcd, &attrs->driver_udata);
if (ib_is_destroy_retryable(ret, why, uobject)) {
atomic_inc(&xrcd->usecnt);
@@ -716,8 +689,8 @@ int ib_uverbs_dealloc_xrcd(struct ib_uobject *uobject, struct ib_xrcd *xrcd,
static int ib_uverbs_reg_mr(struct uverbs_attr_bundle *attrs)
{
+ struct ib_uverbs_reg_mr_resp resp = {};
struct ib_uverbs_reg_mr cmd;
- struct ib_uverbs_reg_mr_resp resp;
struct ib_uobject *uobj;
struct ib_pd *pd;
struct ib_mr *mr;
@@ -770,30 +743,20 @@ static int ib_uverbs_reg_mr(struct uverbs_attr_bundle *attrs)
mr->uobject = uobj;
atomic_inc(&pd->usecnt);
mr->res.type = RDMA_RESTRACK_MR;
+ mr->iova = cmd.hca_va;
rdma_restrack_uadd(&mr->res);
uobj->object = mr;
-
- memset(&resp, 0, sizeof resp);
- resp.lkey = mr->lkey;
- resp.rkey = mr->rkey;
- resp.mr_handle = uobj->id;
-
- ret = uverbs_response(attrs, &resp, sizeof(resp));
- if (ret)
- goto err_copy;
-
uobj_put_obj_read(pd);
+ uobj_finalize_uobj_create(uobj, attrs);
- rdma_alloc_commit_uobject(uobj, attrs);
- return 0;
-
-err_copy:
- ib_dereg_mr_user(mr, uverbs_get_cleared_udata(attrs));
+ resp.lkey = mr->lkey;
+ resp.rkey = mr->rkey;
+ resp.mr_handle = uobj->id;
+ return uverbs_response(attrs, &resp, sizeof(resp));
err_put:
uobj_put_obj_read(pd);
-
err_free:
uobj_alloc_abort(uobj, attrs);
return ret;
@@ -861,6 +824,9 @@ static int ib_uverbs_rereg_mr(struct uverbs_attr_bundle *attrs)
atomic_dec(&old_pd->usecnt);
}
+ if (cmd.flags & IB_MR_REREG_TRANS)
+ mr->iova = cmd.hca_va;
+
memset(&resp, 0, sizeof(resp));
resp.lkey = mr->lkey;
resp.rkey = mr->rkey;
@@ -930,21 +896,13 @@ static int ib_uverbs_alloc_mw(struct uverbs_attr_bundle *attrs)
atomic_inc(&pd->usecnt);
uobj->object = mw;
+ uobj_put_obj_read(pd);
+ uobj_finalize_uobj_create(uobj, attrs);
- memset(&resp, 0, sizeof(resp));
- resp.rkey = mw->rkey;
+ resp.rkey = mw->rkey;
resp.mw_handle = uobj->id;
+ return uverbs_response(attrs, &resp, sizeof(resp));
- ret = uverbs_response(attrs, &resp, sizeof(resp));
- if (ret)
- goto err_copy;
-
- uobj_put_obj_read(pd);
- rdma_alloc_commit_uobject(uobj, attrs);
- return 0;
-
-err_copy:
- uverbs_dealloc_mw(mw);
err_put:
uobj_put_obj_read(pd);
err_free:
@@ -981,40 +939,33 @@ static int ib_uverbs_create_comp_channel(struct uverbs_attr_bundle *attrs)
if (IS_ERR(uobj))
return PTR_ERR(uobj);
- resp.fd = uobj->id;
-
ev_file = container_of(uobj, struct ib_uverbs_completion_event_file,
uobj);
ib_uverbs_init_event_queue(&ev_file->ev_queue);
+ uobj_finalize_uobj_create(uobj, attrs);
- ret = uverbs_response(attrs, &resp, sizeof(resp));
- if (ret) {
- uobj_alloc_abort(uobj, attrs);
- return ret;
- }
-
- rdma_alloc_commit_uobject(uobj, attrs);
- return 0;
+ resp.fd = uobj->id;
+ return uverbs_response(attrs, &resp, sizeof(resp));
}
-static struct ib_ucq_object *create_cq(struct uverbs_attr_bundle *attrs,
- struct ib_uverbs_ex_create_cq *cmd)
+static int create_cq(struct uverbs_attr_bundle *attrs,
+ struct ib_uverbs_ex_create_cq *cmd)
{
struct ib_ucq_object *obj;
struct ib_uverbs_completion_event_file *ev_file = NULL;
struct ib_cq *cq;
int ret;
- struct ib_uverbs_ex_create_cq_resp resp;
+ struct ib_uverbs_ex_create_cq_resp resp = {};
struct ib_cq_init_attr attr = {};
struct ib_device *ib_dev;
if (cmd->comp_vector >= attrs->ufile->device->num_comp_vectors)
- return ERR_PTR(-EINVAL);
+ return -EINVAL;
obj = (struct ib_ucq_object *)uobj_alloc(UVERBS_OBJECT_CQ, attrs,
&ib_dev);
if (IS_ERR(obj))
- return obj;
+ return PTR_ERR(obj);
if (cmd->comp_channel >= 0) {
ev_file = ib_uverbs_lookup_comp_file(cmd->comp_channel, attrs);
@@ -1043,53 +994,38 @@ static struct ib_ucq_object *create_cq(struct uverbs_attr_bundle *attrs,
cq->event_handler = ib_uverbs_cq_event_handler;
cq->cq_context = ev_file ? &ev_file->ev_queue : NULL;
atomic_set(&cq->usecnt, 0);
+ cq->res.type = RDMA_RESTRACK_CQ;
ret = ib_dev->ops.create_cq(cq, &attr, &attrs->driver_udata);
if (ret)
goto err_free;
+ rdma_restrack_uadd(&cq->res);
obj->uevent.uobject.object = cq;
obj->uevent.event_file = READ_ONCE(attrs->ufile->default_async_file);
if (obj->uevent.event_file)
uverbs_uobject_get(&obj->uevent.event_file->uobj);
+ uobj_finalize_uobj_create(&obj->uevent.uobject, attrs);
- memset(&resp, 0, sizeof resp);
resp.base.cq_handle = obj->uevent.uobject.id;
- resp.base.cqe = cq->cqe;
+ resp.base.cqe = cq->cqe;
resp.response_length = uverbs_response_length(attrs, sizeof(resp));
+ return uverbs_response(attrs, &resp, sizeof(resp));
- cq->res.type = RDMA_RESTRACK_CQ;
- rdma_restrack_uadd(&cq->res);
-
- ret = uverbs_response(attrs, &resp, sizeof(resp));
- if (ret)
- goto err_cb;
-
- rdma_alloc_commit_uobject(&obj->uevent.uobject, attrs);
- return obj;
-
-err_cb:
- if (obj->uevent.event_file)
- uverbs_uobject_put(&obj->uevent.event_file->uobj);
- ib_destroy_cq_user(cq, uverbs_get_cleared_udata(attrs));
- cq = NULL;
err_free:
kfree(cq);
err_file:
if (ev_file)
ib_uverbs_release_ucq(ev_file, obj);
-
err:
uobj_alloc_abort(&obj->uevent.uobject, attrs);
-
- return ERR_PTR(ret);
+ return ret;
}
static int ib_uverbs_create_cq(struct uverbs_attr_bundle *attrs)
{
struct ib_uverbs_create_cq cmd;
struct ib_uverbs_ex_create_cq cmd_ex;
- struct ib_ucq_object *obj;
int ret;
ret = uverbs_request(attrs, &cmd, sizeof(cmd));
@@ -1102,14 +1038,12 @@ static int ib_uverbs_create_cq(struct uverbs_attr_bundle *attrs)
cmd_ex.comp_vector = cmd.comp_vector;
cmd_ex.comp_channel = cmd.comp_channel;
- obj = create_cq(attrs, &cmd_ex);
- return PTR_ERR_OR_ZERO(obj);
+ return create_cq(attrs, &cmd_ex);
}
static int ib_uverbs_ex_create_cq(struct uverbs_attr_bundle *attrs)
{
struct ib_uverbs_ex_create_cq cmd;
- struct ib_ucq_object *obj;
int ret;
ret = uverbs_request(attrs, &cmd, sizeof(cmd));
@@ -1122,8 +1056,7 @@ static int ib_uverbs_ex_create_cq(struct uverbs_attr_bundle *attrs)
if (cmd.reserved)
return -EINVAL;
- obj = create_cq(attrs, &cmd);
- return PTR_ERR_OR_ZERO(obj);
+ return create_cq(attrs, &cmd);
}
static int ib_uverbs_resize_cq(struct uverbs_attr_bundle *attrs)
@@ -1131,7 +1064,7 @@ static int ib_uverbs_resize_cq(struct uverbs_attr_bundle *attrs)
struct ib_uverbs_resize_cq cmd;
struct ib_uverbs_resize_cq_resp resp = {};
struct ib_cq *cq;
- int ret = -EINVAL;
+ int ret;
ret = uverbs_request(attrs, &cmd, sizeof(cmd));
if (ret)
@@ -1298,7 +1231,7 @@ static int create_qp(struct uverbs_attr_bundle *attrs,
struct ib_srq *srq = NULL;
struct ib_qp *qp;
struct ib_qp_init_attr attr = {};
- struct ib_uverbs_ex_create_qp_resp resp;
+ struct ib_uverbs_ex_create_qp_resp resp = {};
int ret;
struct ib_rwq_ind_table *ind_tbl = NULL;
bool has_sq = true;
@@ -1468,20 +1401,6 @@ static int create_qp(struct uverbs_attr_bundle *attrs,
if (obj->uevent.event_file)
uverbs_uobject_get(&obj->uevent.event_file->uobj);
- memset(&resp, 0, sizeof resp);
- resp.base.qpn = qp->qp_num;
- resp.base.qp_handle = obj->uevent.uobject.id;
- resp.base.max_recv_sge = attr.cap.max_recv_sge;
- resp.base.max_send_sge = attr.cap.max_send_sge;
- resp.base.max_recv_wr = attr.cap.max_recv_wr;
- resp.base.max_send_wr = attr.cap.max_send_wr;
- resp.base.max_inline_data = attr.cap.max_inline_data;
- resp.response_length = uverbs_response_length(attrs, sizeof(resp));
-
- ret = uverbs_response(attrs, &resp, sizeof(resp));
- if (ret)
- goto err_uevent;
-
if (xrcd) {
obj->uxrcd = container_of(xrcd_uobj, struct ib_uxrcd_object,
uobject);
@@ -1502,12 +1421,18 @@ static int create_qp(struct uverbs_attr_bundle *attrs,
UVERBS_LOOKUP_READ);
if (ind_tbl)
uobj_put_obj_read(ind_tbl);
+ uobj_finalize_uobj_create(&obj->uevent.uobject, attrs);
+
+ resp.base.qpn = qp->qp_num;
+ resp.base.qp_handle = obj->uevent.uobject.id;
+ resp.base.max_recv_sge = attr.cap.max_recv_sge;
+ resp.base.max_send_sge = attr.cap.max_send_sge;
+ resp.base.max_recv_wr = attr.cap.max_recv_wr;
+ resp.base.max_send_wr = attr.cap.max_send_wr;
+ resp.base.max_inline_data = attr.cap.max_inline_data;
+ resp.response_length = uverbs_response_length(attrs, sizeof(resp));
+ return uverbs_response(attrs, &resp, sizeof(resp));
- rdma_alloc_commit_uobject(&obj->uevent.uobject, attrs);
- return 0;
-err_uevent:
- if (obj->uevent.event_file)
- uverbs_uobject_put(&obj->uevent.event_file->uobj);
err_cb:
ib_destroy_qp_user(qp, uverbs_get_cleared_udata(attrs));
@@ -1580,14 +1505,14 @@ static int ib_uverbs_ex_create_qp(struct uverbs_attr_bundle *attrs)
static int ib_uverbs_open_qp(struct uverbs_attr_bundle *attrs)
{
+ struct ib_uverbs_create_qp_resp resp = {};
struct ib_uverbs_open_qp cmd;
- struct ib_uverbs_create_qp_resp resp;
struct ib_uqp_object *obj;
struct ib_xrcd *xrcd;
- struct ib_uobject *xrcd_uobj;
struct ib_qp *qp;
struct ib_qp_open_attr attr = {};
int ret;
+ struct ib_uobject *xrcd_uobj;
struct ib_device *ib_dev;
ret = uverbs_request(attrs, &cmd, sizeof(cmd));
@@ -1627,24 +1552,16 @@ static int ib_uverbs_open_qp(struct uverbs_attr_bundle *attrs)
obj->uevent.uobject.object = qp;
obj->uevent.uobject.user_handle = cmd.user_handle;
- memset(&resp, 0, sizeof resp);
- resp.qpn = qp->qp_num;
- resp.qp_handle = obj->uevent.uobject.id;
-
- ret = uverbs_response(attrs, &resp, sizeof(resp));
- if (ret)
- goto err_destroy;
-
obj->uxrcd = container_of(xrcd_uobj, struct ib_uxrcd_object, uobject);
atomic_inc(&obj->uxrcd->refcnt);
qp->uobject = obj;
uobj_put_read(xrcd_uobj);
+ uobj_finalize_uobj_create(&obj->uevent.uobject, attrs);
- rdma_alloc_commit_uobject(&obj->uevent.uobject, attrs);
- return 0;
+ resp.qpn = qp->qp_num;
+ resp.qp_handle = obj->uevent.uobject.id;
+ return uverbs_response(attrs, &resp, sizeof(resp));
-err_destroy:
- ib_destroy_qp_user(qp, uverbs_get_cleared_udata(attrs));
err_xrcd:
uobj_put_read(xrcd_uobj);
err_put:
@@ -1980,7 +1897,7 @@ static int ib_uverbs_ex_modify_qp(struct uverbs_attr_bundle *attrs)
* Last bit is reserved for extending the attr_mask by
* using another field.
*/
- BUILD_BUG_ON(IB_USER_LAST_QP_ATTR_MASK == (1 << 31));
+ BUILD_BUG_ON(IB_USER_LAST_QP_ATTR_MASK == (1ULL << 31));
if (cmd.base.attr_mask &
~((IB_USER_LAST_QP_ATTR_MASK << 1) - 1))
@@ -2480,24 +2397,14 @@ static int ib_uverbs_create_ah(struct uverbs_attr_bundle *attrs)
ah->uobject = uobj;
uobj->user_handle = cmd.user_handle;
uobj->object = ah;
-
- resp.ah_handle = uobj->id;
-
- ret = uverbs_response(attrs, &resp, sizeof(resp));
- if (ret)
- goto err_copy;
-
uobj_put_obj_read(pd);
- rdma_alloc_commit_uobject(uobj, attrs);
- return 0;
+ uobj_finalize_uobj_create(uobj, attrs);
-err_copy:
- rdma_destroy_ah_user(ah, RDMA_DESTROY_AH_SLEEPABLE,
- uverbs_get_cleared_udata(attrs));
+ resp.ah_handle = uobj->id;
+ return uverbs_response(attrs, &resp, sizeof(resp));
err_put:
uobj_put_obj_read(pd);
-
err:
uobj_alloc_abort(uobj, attrs);
return ret;
@@ -2989,26 +2896,18 @@ static int ib_uverbs_ex_create_wq(struct uverbs_attr_bundle *attrs)
if (obj->uevent.event_file)
uverbs_uobject_get(&obj->uevent.event_file->uobj);
- memset(&resp, 0, sizeof(resp));
+ uobj_put_obj_read(pd);
+ rdma_lookup_put_uobject(&cq->uobject->uevent.uobject,
+ UVERBS_LOOKUP_READ);
+ uobj_finalize_uobj_create(&obj->uevent.uobject, attrs);
+
resp.wq_handle = obj->uevent.uobject.id;
resp.max_sge = wq_init_attr.max_sge;
resp.max_wr = wq_init_attr.max_wr;
resp.wqn = wq->wq_num;
resp.response_length = uverbs_response_length(attrs, sizeof(resp));
- err = uverbs_response(attrs, &resp, sizeof(resp));
- if (err)
- goto err_copy;
-
- uobj_put_obj_read(pd);
- rdma_lookup_put_uobject(&cq->uobject->uevent.uobject,
- UVERBS_LOOKUP_READ);
- rdma_alloc_commit_uobject(&obj->uevent.uobject, attrs);
- return 0;
+ return uverbs_response(attrs, &resp, sizeof(resp));
-err_copy:
- if (obj->uevent.event_file)
- uverbs_uobject_put(&obj->uevent.event_file->uobj);
- ib_destroy_wq(wq, uverbs_get_cleared_udata(attrs));
err_put_cq:
rdma_lookup_put_uobject(&cq->uobject->uevent.uobject,
UVERBS_LOOKUP_READ);
@@ -3093,7 +2992,7 @@ static int ib_uverbs_ex_create_rwq_ind_table(struct uverbs_attr_bundle *attrs)
struct ib_wq **wqs = NULL;
u32 *wqs_handles = NULL;
struct ib_wq *wq = NULL;
- int i, j, num_read_wqs;
+ int i, num_read_wqs;
u32 num_wq_handles;
struct uverbs_req_iter iter;
struct ib_device *ib_dev;
@@ -3139,6 +3038,7 @@ static int ib_uverbs_ex_create_rwq_ind_table(struct uverbs_attr_bundle *attrs)
}
wqs[num_read_wqs] = wq;
+ atomic_inc(&wqs[num_read_wqs]->usecnt);
}
uobj = uobj_alloc(UVERBS_OBJECT_RWQ_IND_TBL, attrs, &ib_dev);
@@ -3166,33 +3066,24 @@ static int ib_uverbs_ex_create_rwq_ind_table(struct uverbs_attr_bundle *attrs)
atomic_set(&rwq_ind_tbl->usecnt, 0);
for (i = 0; i < num_wq_handles; i++)
- atomic_inc(&wqs[i]->usecnt);
+ rdma_lookup_put_uobject(&wqs[i]->uobject->uevent.uobject,
+ UVERBS_LOOKUP_READ);
+ kfree(wqs_handles);
+ uobj_finalize_uobj_create(uobj, attrs);
resp.ind_tbl_handle = uobj->id;
resp.ind_tbl_num = rwq_ind_tbl->ind_tbl_num;
resp.response_length = uverbs_response_length(attrs, sizeof(resp));
+ return uverbs_response(attrs, &resp, sizeof(resp));
- err = uverbs_response(attrs, &resp, sizeof(resp));
- if (err)
- goto err_copy;
-
- kfree(wqs_handles);
-
- for (j = 0; j < num_read_wqs; j++)
- rdma_lookup_put_uobject(&wqs[j]->uobject->uevent.uobject,
- UVERBS_LOOKUP_READ);
-
- rdma_alloc_commit_uobject(uobj, attrs);
- return 0;
-
-err_copy:
- ib_destroy_rwq_ind_table(rwq_ind_tbl);
err_uobj:
uobj_alloc_abort(uobj, attrs);
put_wqs:
- for (j = 0; j < num_read_wqs; j++)
- rdma_lookup_put_uobject(&wqs[j]->uobject->uevent.uobject,
+ for (i = 0; i < num_read_wqs; i++) {
+ rdma_lookup_put_uobject(&wqs[i]->uobject->uevent.uobject,
UVERBS_LOOKUP_READ);
+ atomic_dec(&wqs[i]->usecnt);
+ }
err_free:
kfree(wqs_handles);
kfree(wqs);
@@ -3218,7 +3109,7 @@ static int ib_uverbs_ex_destroy_rwq_ind_table(struct uverbs_attr_bundle *attrs)
static int ib_uverbs_ex_create_flow(struct uverbs_attr_bundle *attrs)
{
struct ib_uverbs_create_flow cmd;
- struct ib_uverbs_create_flow_resp resp;
+ struct ib_uverbs_create_flow_resp resp = {};
struct ib_uobject *uobj;
struct ib_flow *flow_id;
struct ib_uverbs_flow_attr *kern_flow_attr;
@@ -3351,23 +3242,17 @@ static int ib_uverbs_ex_create_flow(struct uverbs_attr_bundle *attrs)
ib_set_flow(uobj, flow_id, qp, qp->device, uflow_res);
- memset(&resp, 0, sizeof(resp));
- resp.flow_handle = uobj->id;
-
- err = uverbs_response(attrs, &resp, sizeof(resp));
- if (err)
- goto err_copy;
-
rdma_lookup_put_uobject(&qp->uobject->uevent.uobject,
UVERBS_LOOKUP_READ);
kfree(flow_attr);
+
if (cmd.flow_attr.num_of_specs)
kfree(kern_flow_attr);
- rdma_alloc_commit_uobject(uobj, attrs);
- return 0;
-err_copy:
- if (!qp->device->ops.destroy_flow(flow_id))
- atomic_dec(&qp->usecnt);
+ uobj_finalize_uobj_create(uobj, attrs);
+
+ resp.flow_handle = uobj->id;
+ return uverbs_response(attrs, &resp, sizeof(resp));
+
err_free:
ib_uverbs_flow_resources_free(uflow_res);
err_free_flow_attr:
@@ -3402,13 +3287,13 @@ static int __uverbs_create_xsrq(struct uverbs_attr_bundle *attrs,
struct ib_uverbs_create_xsrq *cmd,
struct ib_udata *udata)
{
- struct ib_uverbs_create_srq_resp resp;
+ struct ib_uverbs_create_srq_resp resp = {};
struct ib_usrq_object *obj;
struct ib_pd *pd;
struct ib_srq *srq;
- struct ib_uobject *xrcd_uobj;
struct ib_srq_init_attr attr;
int ret;
+ struct ib_uobject *xrcd_uobj;
struct ib_device *ib_dev;
obj = (struct ib_usrq_object *)uobj_alloc(UVERBS_OBJECT_SRQ, attrs,
@@ -3473,17 +3358,9 @@ static int __uverbs_create_xsrq(struct uverbs_attr_bundle *attrs,
if (obj->uevent.event_file)
uverbs_uobject_get(&obj->uevent.event_file->uobj);
- memset(&resp, 0, sizeof resp);
- resp.srq_handle = obj->uevent.uobject.id;
- resp.max_wr = attr.attr.max_wr;
- resp.max_sge = attr.attr.max_sge;
if (cmd->srq_type == IB_SRQT_XRC)
resp.srqn = srq->ext.xrc.srq_num;
- ret = uverbs_response(attrs, &resp, sizeof(resp));
- if (ret)
- goto err_copy;
-
if (cmd->srq_type == IB_SRQT_XRC)
uobj_put_read(xrcd_uobj);
@@ -3492,13 +3369,13 @@ static int __uverbs_create_xsrq(struct uverbs_attr_bundle *attrs,
UVERBS_LOOKUP_READ);
uobj_put_obj_read(pd);
- rdma_alloc_commit_uobject(&obj->uevent.uobject, attrs);
- return 0;
+ uobj_finalize_uobj_create(&obj->uevent.uobject, attrs);
+
+ resp.srq_handle = obj->uevent.uobject.id;
+ resp.max_wr = attr.attr.max_wr;
+ resp.max_sge = attr.attr.max_sge;
+ return uverbs_response(attrs, &resp, sizeof(resp));
-err_copy:
- if (obj->uevent.event_file)
- uverbs_uobject_put(&obj->uevent.event_file->uobj);
- ib_destroy_srq_user(srq, uverbs_get_cleared_udata(attrs));
err_put_pd:
uobj_put_obj_read(pd);
err_put_cq:
diff --git a/drivers/infiniband/core/uverbs_ioctl.c b/drivers/infiniband/core/uverbs_ioctl.c
index 2d882c02387c..ef04a261097f 100644
--- a/drivers/infiniband/core/uverbs_ioctl.c
+++ b/drivers/infiniband/core/uverbs_ioctl.c
@@ -790,6 +790,7 @@ int uverbs_copy_to_struct_or_zero(const struct uverbs_attr_bundle *bundle,
}
return uverbs_copy_to(bundle, idx, from, size);
}
+EXPORT_SYMBOL(uverbs_copy_to_struct_or_zero);
/* Once called an abort will call through to the type's destroy_hw() */
void uverbs_finalize_uobj_create(const struct uverbs_attr_bundle *bundle,
diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c
index 69e4755cc04b..37794d88b1f3 100644
--- a/drivers/infiniband/core/uverbs_main.c
+++ b/drivers/infiniband/core/uverbs_main.c
@@ -601,6 +601,7 @@ static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf,
memset(bundle.attr_present, 0, sizeof(bundle.attr_present));
bundle.ufile = file;
bundle.context = NULL; /* only valid if bundle has uobject */
+ bundle.uobject = NULL;
if (!method_elm->is_ex) {
size_t in_len = hdr.in_words * 4 - sizeof(hdr);
size_t out_len = hdr.out_words * 4;
@@ -664,6 +665,9 @@ static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf,
}
ret = method_elm->handler(&bundle);
+ if (bundle.uobject)
+ uverbs_finalize_object(bundle.uobject, UVERBS_ACCESS_NEW, true,
+ !ret, &bundle);
out_unlock:
srcu_read_unlock(&file->device->disassociate_srcu, srcu_key);
return (ret) ? : count;
diff --git a/drivers/infiniband/core/uverbs_std_types_counters.c b/drivers/infiniband/core/uverbs_std_types_counters.c
index 9f013304e677..c7e7438752bc 100644
--- a/drivers/infiniband/core/uverbs_std_types_counters.c
+++ b/drivers/infiniband/core/uverbs_std_types_counters.c
@@ -46,7 +46,9 @@ static int uverbs_free_counters(struct ib_uobject *uobject,
if (ret)
return ret;
- return counters->device->ops.destroy_counters(counters);
+ counters->device->ops.destroy_counters(counters);
+ kfree(counters);
+ return 0;
}
static int UVERBS_HANDLER(UVERBS_METHOD_COUNTERS_CREATE)(
@@ -66,20 +68,19 @@ static int UVERBS_HANDLER(UVERBS_METHOD_COUNTERS_CREATE)(
if (!ib_dev->ops.create_counters)
return -EOPNOTSUPP;
- counters = ib_dev->ops.create_counters(ib_dev, attrs);
- if (IS_ERR(counters)) {
- ret = PTR_ERR(counters);
- goto err_create_counters;
- }
+ counters = rdma_zalloc_drv_obj(ib_dev, ib_counters);
+ if (!counters)
+ return -ENOMEM;
counters->device = ib_dev;
counters->uobject = uobj;
uobj->object = counters;
atomic_set(&counters->usecnt, 0);
- return 0;
+ ret = ib_dev->ops.create_counters(counters, attrs);
+ if (ret)
+ kfree(counters);
-err_create_counters:
return ret;
}
diff --git a/drivers/infiniband/core/uverbs_std_types_cq.c b/drivers/infiniband/core/uverbs_std_types_cq.c
index 5dce2c7cc323..b1c7dacc02de 100644
--- a/drivers/infiniband/core/uverbs_std_types_cq.c
+++ b/drivers/infiniband/core/uverbs_std_types_cq.c
@@ -207,11 +207,8 @@ DECLARE_UVERBS_NAMED_METHOD(
DECLARE_UVERBS_NAMED_OBJECT(
UVERBS_OBJECT_CQ,
UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_ucq_object), uverbs_free_cq),
-
-#if IS_ENABLED(CONFIG_INFINIBAND_EXP_LEGACY_VERBS_NEW_UAPI)
&UVERBS_METHOD(UVERBS_METHOD_CQ_CREATE),
&UVERBS_METHOD(UVERBS_METHOD_CQ_DESTROY)
-#endif
);
const struct uapi_definition uverbs_def_obj_cq[] = {
diff --git a/drivers/infiniband/core/uverbs_std_types_device.c b/drivers/infiniband/core/uverbs_std_types_device.c
index ae4a59d6f9b1..75df2094a010 100644
--- a/drivers/infiniband/core/uverbs_std_types_device.c
+++ b/drivers/infiniband/core/uverbs_std_types_device.c
@@ -38,7 +38,12 @@ static int UVERBS_HANDLER(UVERBS_METHOD_INVOKE_WRITE)(
attrs->ucore.outlen < method_elm->resp_size)
return -ENOSPC;
- return method_elm->handler(attrs);
+ attrs->uobject = NULL;
+ rc = method_elm->handler(attrs);
+ if (attrs->uobject)
+ uverbs_finalize_object(attrs->uobject, UVERBS_ACCESS_NEW, true,
+ !rc, attrs);
+ return rc;
}
DECLARE_UVERBS_NAMED_METHOD(UVERBS_METHOD_INVOKE_WRITE,
@@ -229,6 +234,37 @@ static int UVERBS_HANDLER(UVERBS_METHOD_GET_CONTEXT)(
return 0;
}
+static int UVERBS_HANDLER(UVERBS_METHOD_QUERY_CONTEXT)(
+ struct uverbs_attr_bundle *attrs)
+{
+ u64 core_support = IB_UVERBS_CORE_SUPPORT_OPTIONAL_MR_ACCESS;
+ struct ib_ucontext *ucontext;
+ struct ib_device *ib_dev;
+ u32 num_comp;
+ int ret;
+
+ ucontext = ib_uverbs_get_ucontext(attrs);
+ if (IS_ERR(ucontext))
+ return PTR_ERR(ucontext);
+ ib_dev = ucontext->device;
+
+ if (!ib_dev->ops.query_ucontext)
+ return -EOPNOTSUPP;
+
+ num_comp = attrs->ufile->device->num_comp_vectors;
+ ret = uverbs_copy_to(attrs, UVERBS_ATTR_QUERY_CONTEXT_NUM_COMP_VECTORS,
+ &num_comp, sizeof(num_comp));
+ if (IS_UVERBS_COPY_ERR(ret))
+ return ret;
+
+ ret = uverbs_copy_to(attrs, UVERBS_ATTR_QUERY_CONTEXT_CORE_SUPPORT,
+ &core_support, sizeof(core_support));
+ if (IS_UVERBS_COPY_ERR(ret))
+ return ret;
+
+ return ucontext->device->ops.query_ucontext(ucontext, attrs);
+}
+
DECLARE_UVERBS_NAMED_METHOD(
UVERBS_METHOD_GET_CONTEXT,
UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_GET_CONTEXT_NUM_COMP_VECTORS,
@@ -238,6 +274,13 @@ DECLARE_UVERBS_NAMED_METHOD(
UVERBS_ATTR_UHW());
DECLARE_UVERBS_NAMED_METHOD(
+ UVERBS_METHOD_QUERY_CONTEXT,
+ UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_QUERY_CONTEXT_NUM_COMP_VECTORS,
+ UVERBS_ATTR_TYPE(u32), UA_OPTIONAL),
+ UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_QUERY_CONTEXT_CORE_SUPPORT,
+ UVERBS_ATTR_TYPE(u64), UA_OPTIONAL));
+
+DECLARE_UVERBS_NAMED_METHOD(
UVERBS_METHOD_INFO_HANDLES,
/* Also includes any device specific object ids */
UVERBS_ATTR_CONST_IN(UVERBS_ATTR_INFO_OBJECT_ID,
@@ -260,7 +303,8 @@ DECLARE_UVERBS_GLOBAL_METHODS(UVERBS_OBJECT_DEVICE,
&UVERBS_METHOD(UVERBS_METHOD_GET_CONTEXT),
&UVERBS_METHOD(UVERBS_METHOD_INVOKE_WRITE),
&UVERBS_METHOD(UVERBS_METHOD_INFO_HANDLES),
- &UVERBS_METHOD(UVERBS_METHOD_QUERY_PORT));
+ &UVERBS_METHOD(UVERBS_METHOD_QUERY_PORT),
+ &UVERBS_METHOD(UVERBS_METHOD_QUERY_CONTEXT));
const struct uapi_definition uverbs_def_obj_device[] = {
UAPI_DEF_CHAIN_OBJ_TREE_NAMED(UVERBS_OBJECT_DEVICE),
diff --git a/drivers/infiniband/core/uverbs_std_types_mr.c b/drivers/infiniband/core/uverbs_std_types_mr.c
index a2722ef8496e..9b22bb553e8b 100644
--- a/drivers/infiniband/core/uverbs_std_types_mr.c
+++ b/drivers/infiniband/core/uverbs_std_types_mr.c
@@ -69,7 +69,7 @@ static int UVERBS_HANDLER(UVERBS_METHOD_ADVISE_MR)(
num_sge = uverbs_attr_ptr_get_array_size(
attrs, UVERBS_ATTR_ADVISE_MR_SGE_LIST, sizeof(struct ib_sge));
- if (num_sge < 0)
+ if (num_sge <= 0)
return num_sge;
sg_list = uverbs_attr_get_alloced_ptr(attrs,
@@ -148,6 +148,36 @@ static int UVERBS_HANDLER(UVERBS_METHOD_DM_MR_REG)(
return ret;
}
+static int UVERBS_HANDLER(UVERBS_METHOD_QUERY_MR)(
+ struct uverbs_attr_bundle *attrs)
+{
+ struct ib_mr *mr =
+ uverbs_attr_get_obj(attrs, UVERBS_ATTR_QUERY_MR_HANDLE);
+ int ret;
+
+ ret = uverbs_copy_to(attrs, UVERBS_ATTR_QUERY_MR_RESP_LKEY, &mr->lkey,
+ sizeof(mr->lkey));
+ if (ret)
+ return ret;
+
+ ret = uverbs_copy_to(attrs, UVERBS_ATTR_QUERY_MR_RESP_RKEY,
+ &mr->rkey, sizeof(mr->rkey));
+
+ if (ret)
+ return ret;
+
+ ret = uverbs_copy_to(attrs, UVERBS_ATTR_QUERY_MR_RESP_LENGTH,
+ &mr->length, sizeof(mr->length));
+
+ if (ret)
+ return ret;
+
+ ret = uverbs_copy_to(attrs, UVERBS_ATTR_QUERY_MR_RESP_IOVA,
+ &mr->iova, sizeof(mr->iova));
+
+ return IS_UVERBS_COPY_ERR(ret) ? ret : 0;
+}
+
DECLARE_UVERBS_NAMED_METHOD(
UVERBS_METHOD_ADVISE_MR,
UVERBS_ATTR_IDR(UVERBS_ATTR_ADVISE_MR_PD_HANDLE,
@@ -166,6 +196,25 @@ DECLARE_UVERBS_NAMED_METHOD(
UA_ALLOC_AND_COPY));
DECLARE_UVERBS_NAMED_METHOD(
+ UVERBS_METHOD_QUERY_MR,
+ UVERBS_ATTR_IDR(UVERBS_ATTR_QUERY_MR_HANDLE,
+ UVERBS_OBJECT_MR,
+ UVERBS_ACCESS_READ,
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_QUERY_MR_RESP_RKEY,
+ UVERBS_ATTR_TYPE(u32),
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_QUERY_MR_RESP_LKEY,
+ UVERBS_ATTR_TYPE(u32),
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_QUERY_MR_RESP_LENGTH,
+ UVERBS_ATTR_TYPE(u64),
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_QUERY_MR_RESP_IOVA,
+ UVERBS_ATTR_TYPE(u64),
+ UA_OPTIONAL));
+
+DECLARE_UVERBS_NAMED_METHOD(
UVERBS_METHOD_DM_MR_REG,
UVERBS_ATTR_IDR(UVERBS_ATTR_REG_DM_MR_HANDLE,
UVERBS_OBJECT_MR,
@@ -206,7 +255,8 @@ DECLARE_UVERBS_NAMED_OBJECT(
UVERBS_TYPE_ALLOC_IDR(uverbs_free_mr),
&UVERBS_METHOD(UVERBS_METHOD_DM_MR_REG),
&UVERBS_METHOD(UVERBS_METHOD_MR_DESTROY),
- &UVERBS_METHOD(UVERBS_METHOD_ADVISE_MR));
+ &UVERBS_METHOD(UVERBS_METHOD_ADVISE_MR),
+ &UVERBS_METHOD(UVERBS_METHOD_QUERY_MR));
const struct uapi_definition uverbs_def_obj_mr[] = {
UAPI_DEF_CHAIN_OBJ_TREE_NAMED(UVERBS_OBJECT_MR,
diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c
index 53d6505c0c7b..3096e73797b7 100644
--- a/drivers/infiniband/core/verbs.c
+++ b/drivers/infiniband/core/verbs.c
@@ -988,8 +988,8 @@ EXPORT_SYMBOL(rdma_destroy_ah_user);
* @srq_init_attr: A list of initial attributes required to create the
* SRQ. If SRQ creation succeeds, then the attributes are updated to
* the actual capabilities of the created SRQ.
- * @uobject - uobject pointer if this is not a kernel SRQ
- * @udata - udata pointer if this is not a kernel SRQ
+ * @uobject: uobject pointer if this is not a kernel SRQ
+ * @udata: udata pointer if this is not a kernel SRQ
*
* srq_attr->max_wr and srq_attr->max_sge are read the determine the
* requested size of the SRQ, and set to the actual values allocated
@@ -1090,13 +1090,6 @@ static void __ib_shared_qp_event_handler(struct ib_event *event, void *context)
spin_unlock_irqrestore(&qp->device->qp_open_list_lock, flags);
}
-static void __ib_insert_xrcd_qp(struct ib_xrcd *xrcd, struct ib_qp *qp)
-{
- mutex_lock(&xrcd->tgt_qp_mutex);
- list_add(&qp->xrcd_list, &xrcd->tgt_qp_list);
- mutex_unlock(&xrcd->tgt_qp_mutex);
-}
-
static struct ib_qp *__ib_open_qp(struct ib_qp *real_qp,
void (*event_handler)(struct ib_event *, void *),
void *qp_context)
@@ -1139,16 +1132,15 @@ struct ib_qp *ib_open_qp(struct ib_xrcd *xrcd,
if (qp_open_attr->qp_type != IB_QPT_XRC_TGT)
return ERR_PTR(-EINVAL);
- qp = ERR_PTR(-EINVAL);
- mutex_lock(&xrcd->tgt_qp_mutex);
- list_for_each_entry(real_qp, &xrcd->tgt_qp_list, xrcd_list) {
- if (real_qp->qp_num == qp_open_attr->qp_num) {
- qp = __ib_open_qp(real_qp, qp_open_attr->event_handler,
- qp_open_attr->qp_context);
- break;
- }
+ down_read(&xrcd->tgt_qps_rwsem);
+ real_qp = xa_load(&xrcd->tgt_qps, qp_open_attr->qp_num);
+ if (!real_qp) {
+ up_read(&xrcd->tgt_qps_rwsem);
+ return ERR_PTR(-EINVAL);
}
- mutex_unlock(&xrcd->tgt_qp_mutex);
+ qp = __ib_open_qp(real_qp, qp_open_attr->event_handler,
+ qp_open_attr->qp_context);
+ up_read(&xrcd->tgt_qps_rwsem);
return qp;
}
EXPORT_SYMBOL(ib_open_qp);
@@ -1157,6 +1149,7 @@ static struct ib_qp *create_xrc_qp_user(struct ib_qp *qp,
struct ib_qp_init_attr *qp_init_attr)
{
struct ib_qp *real_qp = qp;
+ int err;
qp->event_handler = __ib_shared_qp_event_handler;
qp->qp_context = qp;
@@ -1172,7 +1165,12 @@ static struct ib_qp *create_xrc_qp_user(struct ib_qp *qp,
if (IS_ERR(qp))
return qp;
- __ib_insert_xrcd_qp(qp_init_attr->xrcd, real_qp);
+ err = xa_err(xa_store(&qp_init_attr->xrcd->tgt_qps, real_qp->qp_num,
+ real_qp, GFP_KERNEL));
+ if (err) {
+ ib_close_qp(qp);
+ return ERR_PTR(err);
+ }
return qp;
}
@@ -1712,7 +1710,7 @@ static int _ib_modify_qp(struct ib_qp *qp, struct ib_qp_attr *attr,
if (!(rdma_protocol_ib(qp->device,
attr->alt_ah_attr.port_num) &&
rdma_protocol_ib(qp->device, port))) {
- ret = EINVAL;
+ ret = -EINVAL;
goto out;
}
}
@@ -1887,21 +1885,18 @@ static int __ib_destroy_shared_qp(struct ib_qp *qp)
real_qp = qp->real_qp;
xrcd = real_qp->xrcd;
-
- mutex_lock(&xrcd->tgt_qp_mutex);
+ down_write(&xrcd->tgt_qps_rwsem);
ib_close_qp(qp);
if (atomic_read(&real_qp->usecnt) == 0)
- list_del(&real_qp->xrcd_list);
+ xa_erase(&xrcd->tgt_qps, real_qp->qp_num);
else
real_qp = NULL;
- mutex_unlock(&xrcd->tgt_qp_mutex);
+ up_write(&xrcd->tgt_qps_rwsem);
if (real_qp) {
ret = ib_destroy_qp(real_qp);
if (!ret)
atomic_dec(&xrcd->usecnt);
- else
- __ib_insert_xrcd_qp(xrcd, real_qp);
}
return 0;
@@ -2077,6 +2072,9 @@ int ib_advise_mr(struct ib_pd *pd, enum ib_uverbs_advise_mr_advice advice,
if (!pd->device->ops.advise_mr)
return -EOPNOTSUPP;
+ if (!num_sge)
+ return 0;
+
return pd->device->ops.advise_mr(pd, advice, flags, sg_list, num_sge,
NULL);
}
@@ -2104,11 +2102,10 @@ int ib_dereg_mr_user(struct ib_mr *mr, struct ib_udata *udata)
EXPORT_SYMBOL(ib_dereg_mr_user);
/**
- * ib_alloc_mr_user() - Allocates a memory region
+ * ib_alloc_mr() - Allocates a memory region
* @pd: protection domain associated with the region
* @mr_type: memory region type
* @max_num_sg: maximum sg entries available for registration.
- * @udata: user data or null for kernel objects
*
* Notes:
* Memory registeration page/sg lists must not exceed max_num_sg.
@@ -2116,8 +2113,8 @@ EXPORT_SYMBOL(ib_dereg_mr_user);
* max_num_sg * used_page_size.
*
*/
-struct ib_mr *ib_alloc_mr_user(struct ib_pd *pd, enum ib_mr_type mr_type,
- u32 max_num_sg, struct ib_udata *udata)
+struct ib_mr *ib_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type,
+ u32 max_num_sg)
{
struct ib_mr *mr;
@@ -2132,25 +2129,26 @@ struct ib_mr *ib_alloc_mr_user(struct ib_pd *pd, enum ib_mr_type mr_type,
goto out;
}
- mr = pd->device->ops.alloc_mr(pd, mr_type, max_num_sg, udata);
- if (!IS_ERR(mr)) {
- mr->device = pd->device;
- mr->pd = pd;
- mr->dm = NULL;
- mr->uobject = NULL;
- atomic_inc(&pd->usecnt);
- mr->need_inval = false;
- mr->res.type = RDMA_RESTRACK_MR;
- rdma_restrack_kadd(&mr->res);
- mr->type = mr_type;
- mr->sig_attrs = NULL;
- }
+ mr = pd->device->ops.alloc_mr(pd, mr_type, max_num_sg);
+ if (IS_ERR(mr))
+ goto out;
+
+ mr->device = pd->device;
+ mr->pd = pd;
+ mr->dm = NULL;
+ mr->uobject = NULL;
+ atomic_inc(&pd->usecnt);
+ mr->need_inval = false;
+ mr->res.type = RDMA_RESTRACK_MR;
+ rdma_restrack_kadd(&mr->res);
+ mr->type = mr_type;
+ mr->sig_attrs = NULL;
out:
trace_mr_alloc(pd, mr_type, max_num_sg, mr);
return mr;
}
-EXPORT_SYMBOL(ib_alloc_mr_user);
+EXPORT_SYMBOL(ib_alloc_mr);
/**
* ib_alloc_mr_integrity() - Allocates an integrity memory region
@@ -2288,45 +2286,57 @@ int ib_detach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid)
}
EXPORT_SYMBOL(ib_detach_mcast);
-struct ib_xrcd *__ib_alloc_xrcd(struct ib_device *device, const char *caller)
+/**
+ * ib_alloc_xrcd_user - Allocates an XRC domain.
+ * @device: The device on which to allocate the XRC domain.
+ * @inode: inode to connect XRCD
+ * @udata: Valid user data or NULL for kernel object
+ */
+struct ib_xrcd *ib_alloc_xrcd_user(struct ib_device *device,
+ struct inode *inode, struct ib_udata *udata)
{
struct ib_xrcd *xrcd;
+ int ret;
if (!device->ops.alloc_xrcd)
return ERR_PTR(-EOPNOTSUPP);
- xrcd = device->ops.alloc_xrcd(device, NULL);
- if (!IS_ERR(xrcd)) {
- xrcd->device = device;
- xrcd->inode = NULL;
- atomic_set(&xrcd->usecnt, 0);
- mutex_init(&xrcd->tgt_qp_mutex);
- INIT_LIST_HEAD(&xrcd->tgt_qp_list);
- }
+ xrcd = rdma_zalloc_drv_obj(device, ib_xrcd);
+ if (!xrcd)
+ return ERR_PTR(-ENOMEM);
+ xrcd->device = device;
+ xrcd->inode = inode;
+ atomic_set(&xrcd->usecnt, 0);
+ init_rwsem(&xrcd->tgt_qps_rwsem);
+ xa_init(&xrcd->tgt_qps);
+
+ ret = device->ops.alloc_xrcd(xrcd, udata);
+ if (ret)
+ goto err;
return xrcd;
+err:
+ kfree(xrcd);
+ return ERR_PTR(ret);
}
-EXPORT_SYMBOL(__ib_alloc_xrcd);
+EXPORT_SYMBOL(ib_alloc_xrcd_user);
-int ib_dealloc_xrcd(struct ib_xrcd *xrcd, struct ib_udata *udata)
+/**
+ * ib_dealloc_xrcd_user - Deallocates an XRC domain.
+ * @xrcd: The XRC domain to deallocate.
+ * @udata: Valid user data or NULL for kernel object
+ */
+int ib_dealloc_xrcd_user(struct ib_xrcd *xrcd, struct ib_udata *udata)
{
- struct ib_qp *qp;
- int ret;
-
if (atomic_read(&xrcd->usecnt))
return -EBUSY;
- while (!list_empty(&xrcd->tgt_qp_list)) {
- qp = list_entry(xrcd->tgt_qp_list.next, struct ib_qp, xrcd_list);
- ret = ib_destroy_qp(qp);
- if (ret)
- return ret;
- }
- mutex_destroy(&xrcd->tgt_qp_mutex);
-
- return xrcd->device->ops.dealloc_xrcd(xrcd, udata);
+ WARN_ON(!xa_empty(&xrcd->tgt_qps));
+ xrcd->device->ops.dealloc_xrcd(xrcd, udata);
+ kfree(xrcd);
+ return 0;
}
-EXPORT_SYMBOL(ib_dealloc_xrcd);
+EXPORT_SYMBOL(ib_dealloc_xrcd_user);
/**
* ib_create_wq - Creates a WQ associated with the specified protection
@@ -2410,45 +2420,6 @@ int ib_modify_wq(struct ib_wq *wq, struct ib_wq_attr *wq_attr,
EXPORT_SYMBOL(ib_modify_wq);
/*
- * ib_create_rwq_ind_table - Creates a RQ Indirection Table.
- * @device: The device on which to create the rwq indirection table.
- * @ib_rwq_ind_table_init_attr: A list of initial attributes required to
- * create the Indirection Table.
- *
- * Note: The life time of ib_rwq_ind_table_init_attr->ind_tbl is not less
- * than the created ib_rwq_ind_table object and the caller is responsible
- * for its memory allocation/free.
- */
-struct ib_rwq_ind_table *ib_create_rwq_ind_table(struct ib_device *device,
- struct ib_rwq_ind_table_init_attr *init_attr)
-{
- struct ib_rwq_ind_table *rwq_ind_table;
- int i;
- u32 table_size;
-
- if (!device->ops.create_rwq_ind_table)
- return ERR_PTR(-EOPNOTSUPP);
-
- table_size = (1 << init_attr->log_ind_tbl_size);
- rwq_ind_table = device->ops.create_rwq_ind_table(device,
- init_attr, NULL);
- if (IS_ERR(rwq_ind_table))
- return rwq_ind_table;
-
- rwq_ind_table->ind_tbl = init_attr->ind_tbl;
- rwq_ind_table->log_ind_tbl_size = init_attr->log_ind_tbl_size;
- rwq_ind_table->device = device;
- rwq_ind_table->uobject = NULL;
- atomic_set(&rwq_ind_table->usecnt, 0);
-
- for (i = 0; i < table_size; i++)
- atomic_inc(&rwq_ind_table->ind_tbl[i]->usecnt);
-
- return rwq_ind_table;
-}
-EXPORT_SYMBOL(ib_create_rwq_ind_table);
-
-/*
* ib_destroy_rwq_ind_table - Destroys the specified Indirection Table.
* @wq_ind_table: The Indirection Table to destroy.
*/
diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c
index 8b6ad5cddfce..3f18efc0c297 100644
--- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c
+++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c
@@ -842,16 +842,79 @@ static u8 __from_ib_qp_type(enum ib_qp_type type)
}
}
+static u16 bnxt_re_setup_rwqe_size(struct bnxt_qplib_qp *qplqp,
+ int rsge, int max)
+{
+ if (qplqp->wqe_mode == BNXT_QPLIB_WQE_MODE_STATIC)
+ rsge = max;
+ return bnxt_re_get_rwqe_size(rsge);
+}
+
+static u16 bnxt_re_get_wqe_size(int ilsize, int nsge)
+{
+ u16 wqe_size, calc_ils;
+
+ wqe_size = bnxt_re_get_swqe_size(nsge);
+ if (ilsize) {
+ calc_ils = sizeof(struct sq_send_hdr) + ilsize;
+ wqe_size = max_t(u16, calc_ils, wqe_size);
+ wqe_size = ALIGN(wqe_size, sizeof(struct sq_send_hdr));
+ }
+ return wqe_size;
+}
+
+static int bnxt_re_setup_swqe_size(struct bnxt_re_qp *qp,
+ struct ib_qp_init_attr *init_attr)
+{
+ struct bnxt_qplib_dev_attr *dev_attr;
+ struct bnxt_qplib_qp *qplqp;
+ struct bnxt_re_dev *rdev;
+ struct bnxt_qplib_q *sq;
+ int align, ilsize;
+
+ rdev = qp->rdev;
+ qplqp = &qp->qplib_qp;
+ sq = &qplqp->sq;
+ dev_attr = &rdev->dev_attr;
+
+ align = sizeof(struct sq_send_hdr);
+ ilsize = ALIGN(init_attr->cap.max_inline_data, align);
+
+ sq->wqe_size = bnxt_re_get_wqe_size(ilsize, sq->max_sge);
+ if (sq->wqe_size > bnxt_re_get_swqe_size(dev_attr->max_qp_sges))
+ return -EINVAL;
+ /* For gen p4 and gen p5 backward compatibility mode
+ * wqe size is fixed to 128 bytes
+ */
+ if (sq->wqe_size < bnxt_re_get_swqe_size(dev_attr->max_qp_sges) &&
+ qplqp->wqe_mode == BNXT_QPLIB_WQE_MODE_STATIC)
+ sq->wqe_size = bnxt_re_get_swqe_size(dev_attr->max_qp_sges);
+
+ if (init_attr->cap.max_inline_data) {
+ qplqp->max_inline_data = sq->wqe_size -
+ sizeof(struct sq_send_hdr);
+ init_attr->cap.max_inline_data = qplqp->max_inline_data;
+ if (qplqp->wqe_mode == BNXT_QPLIB_WQE_MODE_STATIC)
+ sq->max_sge = qplqp->max_inline_data /
+ sizeof(struct sq_sge);
+ }
+
+ return 0;
+}
+
static int bnxt_re_init_user_qp(struct bnxt_re_dev *rdev, struct bnxt_re_pd *pd,
struct bnxt_re_qp *qp, struct ib_udata *udata)
{
+ struct bnxt_qplib_qp *qplib_qp;
+ struct bnxt_re_ucontext *cntx;
struct bnxt_re_qp_req ureq;
- struct bnxt_qplib_qp *qplib_qp = &qp->qplib_qp;
- struct ib_umem *umem;
int bytes = 0, psn_sz;
- struct bnxt_re_ucontext *cntx = rdma_udata_to_drv_context(
- udata, struct bnxt_re_ucontext, ib_uctx);
+ struct ib_umem *umem;
+ int psn_nume;
+ qplib_qp = &qp->qplib_qp;
+ cntx = rdma_udata_to_drv_context(udata, struct bnxt_re_ucontext,
+ ib_uctx);
if (ib_copy_from_udata(&ureq, udata, sizeof(ureq)))
return -EFAULT;
@@ -859,10 +922,15 @@ static int bnxt_re_init_user_qp(struct bnxt_re_dev *rdev, struct bnxt_re_pd *pd,
/* Consider mapping PSN search memory only for RC QPs. */
if (qplib_qp->type == CMDQ_CREATE_QP_TYPE_RC) {
psn_sz = bnxt_qplib_is_chip_gen_p5(rdev->chip_ctx) ?
- sizeof(struct sq_psn_search_ext) :
- sizeof(struct sq_psn_search);
- bytes += (qplib_qp->sq.max_wqe * psn_sz);
+ sizeof(struct sq_psn_search_ext) :
+ sizeof(struct sq_psn_search);
+ psn_nume = (qplib_qp->wqe_mode == BNXT_QPLIB_WQE_MODE_STATIC) ?
+ qplib_qp->sq.max_wqe :
+ ((qplib_qp->sq.max_wqe * qplib_qp->sq.wqe_size) /
+ sizeof(struct bnxt_qplib_sge));
+ bytes += (psn_nume * psn_sz);
}
+
bytes = PAGE_ALIGN(bytes);
umem = ib_umem_get(&rdev->ibdev, ureq.qpsva, bytes,
IB_ACCESS_LOCAL_WRITE);
@@ -975,7 +1043,7 @@ static struct bnxt_re_qp *bnxt_re_create_shadow_qp
qp->qplib_qp.sig_type = true;
/* Shadow QP SQ depth should be same as QP1 RQ depth */
- qp->qplib_qp.sq.wqe_size = bnxt_re_get_swqe_size();
+ qp->qplib_qp.sq.wqe_size = bnxt_re_get_wqe_size(0, 6);
qp->qplib_qp.sq.max_wqe = qp1_qp->rq.max_wqe;
qp->qplib_qp.sq.max_sge = 2;
/* Q full delta can be 1 since it is internal QP */
@@ -986,7 +1054,7 @@ static struct bnxt_re_qp *bnxt_re_create_shadow_qp
qp->qplib_qp.scq = qp1_qp->scq;
qp->qplib_qp.rcq = qp1_qp->rcq;
- qp->qplib_qp.rq.wqe_size = bnxt_re_get_rwqe_size();
+ qp->qplib_qp.rq.wqe_size = bnxt_re_get_rwqe_size(6);
qp->qplib_qp.rq.max_wqe = qp1_qp->rq.max_wqe;
qp->qplib_qp.rq.max_sge = qp1_qp->rq.max_sge;
/* Q full delta can be 1 since it is internal QP */
@@ -1041,19 +1109,21 @@ static int bnxt_re_init_rq_attr(struct bnxt_re_qp *qp,
qplqp->srq = &srq->qplib_srq;
rq->max_wqe = 0;
} else {
- rq->wqe_size = bnxt_re_get_rwqe_size();
+ rq->max_sge = init_attr->cap.max_recv_sge;
+ if (rq->max_sge > dev_attr->max_qp_sges)
+ rq->max_sge = dev_attr->max_qp_sges;
+ init_attr->cap.max_recv_sge = rq->max_sge;
+ rq->wqe_size = bnxt_re_setup_rwqe_size(qplqp, rq->max_sge,
+ dev_attr->max_qp_sges);
/* Allocate 1 more than what's provided so posting max doesn't
* mean empty.
*/
entries = roundup_pow_of_two(init_attr->cap.max_recv_wr + 1);
rq->max_wqe = min_t(u32, entries, dev_attr->max_qp_wqes + 1);
- rq->q_full_delta = rq->max_wqe - init_attr->cap.max_recv_wr;
- rq->max_sge = init_attr->cap.max_recv_sge;
- if (rq->max_sge > dev_attr->max_qp_sges)
- rq->max_sge = dev_attr->max_qp_sges;
+ rq->q_full_delta = 0;
+ rq->sg_info.pgsize = PAGE_SIZE;
+ rq->sg_info.pgshft = PAGE_SHIFT;
}
- rq->sg_info.pgsize = PAGE_SIZE;
- rq->sg_info.pgshft = PAGE_SHIFT;
return 0;
}
@@ -1068,41 +1138,48 @@ static void bnxt_re_adjust_gsi_rq_attr(struct bnxt_re_qp *qp)
qplqp = &qp->qplib_qp;
dev_attr = &rdev->dev_attr;
- qplqp->rq.max_sge = dev_attr->max_qp_sges;
- if (qplqp->rq.max_sge > dev_attr->max_qp_sges)
+ if (!bnxt_qplib_is_chip_gen_p5(rdev->chip_ctx)) {
qplqp->rq.max_sge = dev_attr->max_qp_sges;
- qplqp->rq.max_sge = 6;
+ if (qplqp->rq.max_sge > dev_attr->max_qp_sges)
+ qplqp->rq.max_sge = dev_attr->max_qp_sges;
+ qplqp->rq.max_sge = 6;
+ }
}
-static void bnxt_re_init_sq_attr(struct bnxt_re_qp *qp,
- struct ib_qp_init_attr *init_attr,
- struct ib_udata *udata)
+static int bnxt_re_init_sq_attr(struct bnxt_re_qp *qp,
+ struct ib_qp_init_attr *init_attr,
+ struct ib_udata *udata)
{
struct bnxt_qplib_dev_attr *dev_attr;
struct bnxt_qplib_qp *qplqp;
struct bnxt_re_dev *rdev;
struct bnxt_qplib_q *sq;
int entries;
+ int diff;
+ int rc;
rdev = qp->rdev;
qplqp = &qp->qplib_qp;
sq = &qplqp->sq;
dev_attr = &rdev->dev_attr;
- sq->wqe_size = bnxt_re_get_swqe_size();
sq->max_sge = init_attr->cap.max_send_sge;
- if (sq->max_sge > dev_attr->max_qp_sges)
+ if (sq->max_sge > dev_attr->max_qp_sges) {
sq->max_sge = dev_attr->max_qp_sges;
- /*
- * Change the SQ depth if user has requested minimum using
- * configfs. Only supported for kernel consumers
- */
+ init_attr->cap.max_send_sge = sq->max_sge;
+ }
+
+ rc = bnxt_re_setup_swqe_size(qp, init_attr);
+ if (rc)
+ return rc;
+
entries = init_attr->cap.max_send_wr;
/* Allocate 128 + 1 more than what's provided */
- entries = roundup_pow_of_two(entries + BNXT_QPLIB_RESERVED_QP_WRS + 1);
- sq->max_wqe = min_t(u32, entries, dev_attr->max_qp_wqes +
- BNXT_QPLIB_RESERVED_QP_WRS + 1);
- sq->q_full_delta = BNXT_QPLIB_RESERVED_QP_WRS + 1;
+ diff = (qplqp->wqe_mode == BNXT_QPLIB_WQE_MODE_VARIABLE) ?
+ 0 : BNXT_QPLIB_RESERVED_QP_WRS;
+ entries = roundup_pow_of_two(entries + diff + 1);
+ sq->max_wqe = min_t(u32, entries, dev_attr->max_qp_wqes + diff + 1);
+ sq->q_full_delta = diff + 1;
/*
* Reserving one slot for Phantom WQE. Application can
* post one extra entry in this case. But allowing this to avoid
@@ -1111,6 +1188,8 @@ static void bnxt_re_init_sq_attr(struct bnxt_re_qp *qp,
qplqp->sq.q_full_delta -= 1;
qplqp->sq.sg_info.pgsize = PAGE_SIZE;
qplqp->sq.sg_info.pgshft = PAGE_SHIFT;
+
+ return 0;
}
static void bnxt_re_adjust_gsi_sq_attr(struct bnxt_re_qp *qp,
@@ -1125,13 +1204,16 @@ static void bnxt_re_adjust_gsi_sq_attr(struct bnxt_re_qp *qp,
qplqp = &qp->qplib_qp;
dev_attr = &rdev->dev_attr;
- entries = roundup_pow_of_two(init_attr->cap.max_send_wr + 1);
- qplqp->sq.max_wqe = min_t(u32, entries, dev_attr->max_qp_wqes + 1);
- qplqp->sq.q_full_delta = qplqp->sq.max_wqe -
- init_attr->cap.max_send_wr;
- qplqp->sq.max_sge++; /* Need one extra sge to put UD header */
- if (qplqp->sq.max_sge > dev_attr->max_qp_sges)
- qplqp->sq.max_sge = dev_attr->max_qp_sges;
+ if (!bnxt_qplib_is_chip_gen_p5(rdev->chip_ctx)) {
+ entries = roundup_pow_of_two(init_attr->cap.max_send_wr + 1);
+ qplqp->sq.max_wqe = min_t(u32, entries,
+ dev_attr->max_qp_wqes + 1);
+ qplqp->sq.q_full_delta = qplqp->sq.max_wqe -
+ init_attr->cap.max_send_wr;
+ qplqp->sq.max_sge++; /* Need one extra sge to put UD header */
+ if (qplqp->sq.max_sge > dev_attr->max_qp_sges)
+ qplqp->sq.max_sge = dev_attr->max_qp_sges;
+ }
}
static int bnxt_re_init_qp_type(struct bnxt_re_dev *rdev,
@@ -1183,6 +1265,7 @@ static int bnxt_re_init_qp_attr(struct bnxt_re_qp *qp, struct bnxt_re_pd *pd,
goto out;
}
qplqp->type = (u8)qptype;
+ qplqp->wqe_mode = rdev->chip_ctx->modes.wqe_mode;
if (init_attr->qp_type == IB_QPT_RC) {
qplqp->max_rd_atomic = dev_attr->max_qp_rd_atom;
@@ -1226,7 +1309,9 @@ static int bnxt_re_init_qp_attr(struct bnxt_re_qp *qp, struct bnxt_re_pd *pd,
bnxt_re_adjust_gsi_rq_attr(qp);
/* Setup SQ */
- bnxt_re_init_sq_attr(qp, init_attr, udata);
+ rc = bnxt_re_init_sq_attr(qp, init_attr, udata);
+ if (rc)
+ goto out;
if (init_attr->qp_type == IB_QPT_GSI)
bnxt_re_adjust_gsi_sq_attr(qp, init_attr);
@@ -1574,8 +1659,9 @@ int bnxt_re_create_srq(struct ib_srq *ib_srq,
entries = dev_attr->max_srq_wqes + 1;
srq->qplib_srq.max_wqe = entries;
- srq->qplib_srq.wqe_size = bnxt_re_get_rwqe_size();
srq->qplib_srq.max_sge = srq_init_attr->attr.max_sge;
+ srq->qplib_srq.wqe_size =
+ bnxt_re_get_rwqe_size(srq->qplib_srq.max_sge);
srq->qplib_srq.threshold = srq_init_attr->attr.srq_limit;
srq->srq_limit = srq_init_attr->attr.srq_limit;
srq->qplib_srq.eventq_hw_ring_id = rdev->nq[0].ring_id;
@@ -3569,7 +3655,7 @@ int bnxt_re_map_mr_sg(struct ib_mr *ib_mr, struct scatterlist *sg, int sg_nents,
}
struct ib_mr *bnxt_re_alloc_mr(struct ib_pd *ib_pd, enum ib_mr_type type,
- u32 max_num_sg, struct ib_udata *udata)
+ u32 max_num_sg)
{
struct bnxt_re_pd *pd = container_of(ib_pd, struct bnxt_re_pd, ib_pd);
struct bnxt_re_dev *rdev = pd->rdev;
diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.h b/drivers/infiniband/hw/bnxt_re/ib_verbs.h
index e5fbbeba6d28..1daeb30e06fd 100644
--- a/drivers/infiniband/hw/bnxt_re/ib_verbs.h
+++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.h
@@ -136,14 +136,14 @@ struct bnxt_re_ucontext {
spinlock_t sh_lock; /* protect shpg */
};
-static inline u16 bnxt_re_get_swqe_size(void)
+static inline u16 bnxt_re_get_swqe_size(int nsge)
{
- return sizeof(struct sq_send);
+ return sizeof(struct sq_send_hdr) + nsge * sizeof(struct sq_sge);
}
-static inline u16 bnxt_re_get_rwqe_size(void)
+static inline u16 bnxt_re_get_rwqe_size(int nsge)
{
- return sizeof(struct rq_wqe);
+ return sizeof(struct rq_wqe_hdr) + (nsge * sizeof(struct sq_sge));
}
int bnxt_re_query_device(struct ib_device *ibdev,
@@ -201,7 +201,7 @@ struct ib_mr *bnxt_re_get_dma_mr(struct ib_pd *pd, int mr_access_flags);
int bnxt_re_map_mr_sg(struct ib_mr *ib_mr, struct scatterlist *sg, int sg_nents,
unsigned int *sg_offset);
struct ib_mr *bnxt_re_alloc_mr(struct ib_pd *ib_pd, enum ib_mr_type mr_type,
- u32 max_num_sg, struct ib_udata *udata);
+ u32 max_num_sg);
int bnxt_re_dereg_mr(struct ib_mr *mr, struct ib_udata *udata);
struct ib_mw *bnxt_re_alloc_mw(struct ib_pd *ib_pd, enum ib_mw_type type,
struct ib_udata *udata);
diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c
index b12fbc857f94..dad0df8a2467 100644
--- a/drivers/infiniband/hw/bnxt_re/main.c
+++ b/drivers/infiniband/hw/bnxt_re/main.c
@@ -82,6 +82,15 @@ static void bnxt_re_remove_device(struct bnxt_re_dev *rdev);
static void bnxt_re_dealloc_driver(struct ib_device *ib_dev);
static void bnxt_re_stop_irq(void *handle);
+static void bnxt_re_set_drv_mode(struct bnxt_re_dev *rdev, u8 mode)
+{
+ struct bnxt_qplib_chip_ctx *cctx;
+
+ cctx = rdev->chip_ctx;
+ cctx->modes.wqe_mode = bnxt_qplib_is_chip_gen_p5(rdev->chip_ctx) ?
+ mode : BNXT_QPLIB_WQE_MODE_STATIC;
+}
+
static void bnxt_re_destroy_chip_ctx(struct bnxt_re_dev *rdev)
{
struct bnxt_qplib_chip_ctx *chip_ctx;
@@ -97,7 +106,7 @@ static void bnxt_re_destroy_chip_ctx(struct bnxt_re_dev *rdev)
kfree(chip_ctx);
}
-static int bnxt_re_setup_chip_ctx(struct bnxt_re_dev *rdev)
+static int bnxt_re_setup_chip_ctx(struct bnxt_re_dev *rdev, u8 wqe_mode)
{
struct bnxt_qplib_chip_ctx *chip_ctx;
struct bnxt_en_dev *en_dev;
@@ -117,6 +126,7 @@ static int bnxt_re_setup_chip_ctx(struct bnxt_re_dev *rdev)
rdev->qplib_res.cctx = rdev->chip_ctx;
rdev->rcfw.res = &rdev->qplib_res;
+ bnxt_re_set_drv_mode(rdev, wqe_mode);
return 0;
}
@@ -1386,7 +1396,7 @@ static void bnxt_re_worker(struct work_struct *work)
schedule_delayed_work(&rdev->worker, msecs_to_jiffies(30000));
}
-static int bnxt_re_dev_init(struct bnxt_re_dev *rdev)
+static int bnxt_re_dev_init(struct bnxt_re_dev *rdev, u8 wqe_mode)
{
struct bnxt_qplib_creq_ctx *creq;
struct bnxt_re_ring_attr rattr;
@@ -1406,7 +1416,7 @@ static int bnxt_re_dev_init(struct bnxt_re_dev *rdev)
}
set_bit(BNXT_RE_FLAG_NETDEV_REGISTERED, &rdev->flags);
- rc = bnxt_re_setup_chip_ctx(rdev);
+ rc = bnxt_re_setup_chip_ctx(rdev, wqe_mode);
if (rc) {
ibdev_err(&rdev->ibdev, "Failed to get chip context\n");
return -EINVAL;
@@ -1585,7 +1595,7 @@ static void bnxt_re_remove_device(struct bnxt_re_dev *rdev)
}
static int bnxt_re_add_device(struct bnxt_re_dev **rdev,
- struct net_device *netdev)
+ struct net_device *netdev, u8 wqe_mode)
{
int rc;
@@ -1599,7 +1609,7 @@ static int bnxt_re_add_device(struct bnxt_re_dev **rdev,
}
pci_dev_get((*rdev)->en_dev->pdev);
- rc = bnxt_re_dev_init(*rdev);
+ rc = bnxt_re_dev_init(*rdev, wqe_mode);
if (rc) {
pci_dev_put((*rdev)->en_dev->pdev);
bnxt_re_dev_unreg(*rdev);
@@ -1711,7 +1721,8 @@ static int bnxt_re_netdev_event(struct notifier_block *notifier,
case NETDEV_REGISTER:
if (rdev)
break;
- rc = bnxt_re_add_device(&rdev, real_dev);
+ rc = bnxt_re_add_device(&rdev, real_dev,
+ BNXT_QPLIB_WQE_MODE_STATIC);
if (!rc)
sch_work = true;
release = false;
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_fp.c b/drivers/infiniband/hw/bnxt_re/qplib_fp.c
index c5e29577cd43..117b42349a28 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_fp.c
+++ b/drivers/infiniband/hw/bnxt_re/qplib_fp.c
@@ -178,11 +178,11 @@ static void bnxt_qplib_free_qp_hdr_buf(struct bnxt_qplib_res *res,
if (qp->rq_hdr_buf)
dma_free_coherent(&res->pdev->dev,
- rq->hwq.max_elements * qp->rq_hdr_buf_size,
+ rq->max_wqe * qp->rq_hdr_buf_size,
qp->rq_hdr_buf, qp->rq_hdr_buf_map);
if (qp->sq_hdr_buf)
dma_free_coherent(&res->pdev->dev,
- sq->hwq.max_elements * qp->sq_hdr_buf_size,
+ sq->max_wqe * qp->sq_hdr_buf_size,
qp->sq_hdr_buf, qp->sq_hdr_buf_map);
qp->rq_hdr_buf = NULL;
qp->sq_hdr_buf = NULL;
@@ -199,10 +199,9 @@ static int bnxt_qplib_alloc_qp_hdr_buf(struct bnxt_qplib_res *res,
struct bnxt_qplib_q *sq = &qp->sq;
int rc = 0;
- if (qp->sq_hdr_buf_size && sq->hwq.max_elements) {
+ if (qp->sq_hdr_buf_size && sq->max_wqe) {
qp->sq_hdr_buf = dma_alloc_coherent(&res->pdev->dev,
- sq->hwq.max_elements *
- qp->sq_hdr_buf_size,
+ sq->max_wqe * qp->sq_hdr_buf_size,
&qp->sq_hdr_buf_map, GFP_KERNEL);
if (!qp->sq_hdr_buf) {
rc = -ENOMEM;
@@ -212,9 +211,9 @@ static int bnxt_qplib_alloc_qp_hdr_buf(struct bnxt_qplib_res *res,
}
}
- if (qp->rq_hdr_buf_size && rq->hwq.max_elements) {
+ if (qp->rq_hdr_buf_size && rq->max_wqe) {
qp->rq_hdr_buf = dma_alloc_coherent(&res->pdev->dev,
- rq->hwq.max_elements *
+ rq->max_wqe *
qp->rq_hdr_buf_size,
&qp->rq_hdr_buf_map,
GFP_KERNEL);
@@ -661,6 +660,7 @@ int bnxt_qplib_create_srq(struct bnxt_qplib_res *res,
srq->dbinfo.hwq = &srq->hwq;
srq->dbinfo.xid = srq->id;
srq->dbinfo.db = srq->dpi->dbr;
+ srq->dbinfo.max_slot = 1;
srq->dbinfo.priv_db = res->dpi_tbl.dbr_bar_reg_iomem;
if (srq->threshold)
bnxt_qplib_armen_db(&srq->dbinfo, DBC_DBC_TYPE_SRQ_ARMENA);
@@ -784,6 +784,28 @@ done:
}
/* QP */
+
+static int bnxt_qplib_alloc_init_swq(struct bnxt_qplib_q *que)
+{
+ int rc = 0;
+ int indx;
+
+ que->swq = kcalloc(que->max_wqe, sizeof(*que->swq), GFP_KERNEL);
+ if (!que->swq) {
+ rc = -ENOMEM;
+ goto out;
+ }
+
+ que->swq_start = 0;
+ que->swq_last = que->max_wqe - 1;
+ for (indx = 0; indx < que->max_wqe; indx++)
+ que->swq[indx].next_idx = indx + 1;
+ que->swq[que->swq_last].next_idx = 0; /* Make it circular */
+ que->swq_last = 0;
+out:
+ return rc;
+}
+
int bnxt_qplib_create_qp1(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp)
{
struct bnxt_qplib_hwq_attr hwq_attr = {};
@@ -808,71 +830,63 @@ int bnxt_qplib_create_qp1(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp)
/* SQ */
hwq_attr.res = res;
hwq_attr.sginfo = &sq->sg_info;
- hwq_attr.depth = sq->max_wqe;
- hwq_attr.stride = sq->wqe_size;
+ hwq_attr.stride = sizeof(struct sq_sge);
+ hwq_attr.depth = bnxt_qplib_get_depth(sq);
hwq_attr.type = HWQ_TYPE_QUEUE;
rc = bnxt_qplib_alloc_init_hwq(&sq->hwq, &hwq_attr);
if (rc)
goto exit;
- sq->swq = kcalloc(sq->hwq.max_elements, sizeof(*sq->swq), GFP_KERNEL);
- if (!sq->swq) {
- rc = -ENOMEM;
+ rc = bnxt_qplib_alloc_init_swq(sq);
+ if (rc)
goto fail_sq;
- }
+
+ req.sq_size = cpu_to_le32(bnxt_qplib_set_sq_size(sq, qp->wqe_mode));
pbl = &sq->hwq.pbl[PBL_LVL_0];
req.sq_pbl = cpu_to_le64(pbl->pg_map_arr[0]);
pg_sz_lvl = (bnxt_qplib_base_pg_size(&sq->hwq) <<
CMDQ_CREATE_QP1_SQ_PG_SIZE_SFT);
pg_sz_lvl |= (sq->hwq.level & CMDQ_CREATE_QP1_SQ_LVL_MASK);
req.sq_pg_size_sq_lvl = pg_sz_lvl;
+ req.sq_fwo_sq_sge =
+ cpu_to_le16((sq->max_sge & CMDQ_CREATE_QP1_SQ_SGE_MASK) <<
+ CMDQ_CREATE_QP1_SQ_SGE_SFT);
+ req.scq_cid = cpu_to_le32(qp->scq->id);
- if (qp->scq)
- req.scq_cid = cpu_to_le32(qp->scq->id);
/* RQ */
if (rq->max_wqe) {
hwq_attr.res = res;
hwq_attr.sginfo = &rq->sg_info;
- hwq_attr.stride = rq->wqe_size;
- hwq_attr.depth = qp->rq.max_wqe;
+ hwq_attr.stride = sizeof(struct sq_sge);
+ hwq_attr.depth = bnxt_qplib_get_depth(rq);
hwq_attr.type = HWQ_TYPE_QUEUE;
rc = bnxt_qplib_alloc_init_hwq(&rq->hwq, &hwq_attr);
if (rc)
- goto fail_sq;
-
- rq->swq = kcalloc(rq->hwq.max_elements, sizeof(*rq->swq),
- GFP_KERNEL);
- if (!rq->swq) {
- rc = -ENOMEM;
+ goto sq_swq;
+ rc = bnxt_qplib_alloc_init_swq(rq);
+ if (rc)
goto fail_rq;
- }
+ req.rq_size = cpu_to_le32(rq->max_wqe);
pbl = &rq->hwq.pbl[PBL_LVL_0];
req.rq_pbl = cpu_to_le64(pbl->pg_map_arr[0]);
pg_sz_lvl = (bnxt_qplib_base_pg_size(&rq->hwq) <<
CMDQ_CREATE_QP1_RQ_PG_SIZE_SFT);
pg_sz_lvl |= (rq->hwq.level & CMDQ_CREATE_QP1_RQ_LVL_MASK);
req.rq_pg_size_rq_lvl = pg_sz_lvl;
- if (qp->rcq)
- req.rcq_cid = cpu_to_le32(qp->rcq->id);
+ req.rq_fwo_rq_sge =
+ cpu_to_le16((rq->max_sge &
+ CMDQ_CREATE_QP1_RQ_SGE_MASK) <<
+ CMDQ_CREATE_QP1_RQ_SGE_SFT);
}
+ req.rcq_cid = cpu_to_le32(qp->rcq->id);
/* Header buffer - allow hdr_buf pass in */
rc = bnxt_qplib_alloc_qp_hdr_buf(res, qp);
if (rc) {
rc = -ENOMEM;
- goto fail;
+ goto rq_rwq;
}
qp_flags |= CMDQ_CREATE_QP1_QP_FLAGS_RESERVED_LKEY_ENABLE;
req.qp_flags = cpu_to_le32(qp_flags);
- req.sq_size = cpu_to_le32(sq->hwq.max_elements);
- req.rq_size = cpu_to_le32(rq->hwq.max_elements);
-
- req.sq_fwo_sq_sge =
- cpu_to_le16((sq->max_sge & CMDQ_CREATE_QP1_SQ_SGE_MASK) <<
- CMDQ_CREATE_QP1_SQ_SGE_SFT);
- req.rq_fwo_rq_sge =
- cpu_to_le16((rq->max_sge & CMDQ_CREATE_QP1_RQ_SGE_MASK) <<
- CMDQ_CREATE_QP1_RQ_SGE_SFT);
-
req.pd_id = cpu_to_le32(qp->pd->id);
rc = bnxt_qplib_rcfw_send_message(rcfw, (void *)&req,
@@ -886,10 +900,12 @@ int bnxt_qplib_create_qp1(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp)
sq->dbinfo.hwq = &sq->hwq;
sq->dbinfo.xid = qp->id;
sq->dbinfo.db = qp->dpi->dbr;
+ sq->dbinfo.max_slot = bnxt_qplib_set_sq_max_slot(qp->wqe_mode);
if (rq->max_wqe) {
rq->dbinfo.hwq = &rq->hwq;
rq->dbinfo.xid = qp->id;
rq->dbinfo.db = qp->dpi->dbr;
+ rq->dbinfo.max_slot = bnxt_qplib_set_rq_max_slot(rq->wqe_size);
}
rcfw->qp_tbl[qp->id].qp_id = qp->id;
rcfw->qp_tbl[qp->id].qp_handle = (void *)qp;
@@ -898,12 +914,14 @@ int bnxt_qplib_create_qp1(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp)
fail:
bnxt_qplib_free_qp_hdr_buf(res, qp);
+rq_rwq:
+ kfree(rq->swq);
fail_rq:
bnxt_qplib_free_hwq(res, &rq->hwq);
- kfree(rq->swq);
+sq_swq:
+ kfree(sq->swq);
fail_sq:
bnxt_qplib_free_hwq(res, &sq->hwq);
- kfree(sq->swq);
exit:
return rc;
}
@@ -912,26 +930,18 @@ static void bnxt_qplib_init_psn_ptr(struct bnxt_qplib_qp *qp, int size)
{
struct bnxt_qplib_hwq *hwq;
struct bnxt_qplib_q *sq;
- u64 fpsne, psne, psn_pg;
- u16 indx_pad = 0, indx;
- u16 pg_num, pg_indx;
- u64 *page;
+ u64 fpsne, psn_pg;
+ u16 indx_pad = 0;
sq = &qp->sq;
hwq = &sq->hwq;
-
- fpsne = (u64)bnxt_qplib_get_qe(hwq, hwq->max_elements, &psn_pg);
+ fpsne = (u64)bnxt_qplib_get_qe(hwq, hwq->depth, &psn_pg);
if (!IS_ALIGNED(fpsne, PAGE_SIZE))
indx_pad = ALIGN(fpsne, PAGE_SIZE) / size;
- page = (u64 *)psn_pg;
- for (indx = 0; indx < hwq->max_elements; indx++) {
- pg_num = (indx + indx_pad) / (PAGE_SIZE / size);
- pg_indx = (indx + indx_pad) % (PAGE_SIZE / size);
- psne = page[pg_num] + pg_indx * size;
- sq->swq[indx].psn_ext = (struct sq_psn_search_ext *)psne;
- sq->swq[indx].psn_search = (struct sq_psn_search *)psne;
- }
+ hwq->pad_pgofft = indx_pad;
+ hwq->pad_pg = (u64 *)psn_pg;
+ hwq->pad_stride = size;
}
int bnxt_qplib_create_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp)
@@ -944,12 +954,12 @@ int bnxt_qplib_create_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp)
struct creq_create_qp_resp resp;
int rc, req_size, psn_sz = 0;
struct bnxt_qplib_hwq *xrrq;
- u16 cmd_flags = 0, max_ssge;
struct bnxt_qplib_pbl *pbl;
struct cmdq_create_qp req;
+ u16 cmd_flags = 0;
u32 qp_flags = 0;
u8 pg_sz_lvl;
- u16 max_rsge;
+ u16 nsge;
RCFW_CMD_PREP(req, CREATE_QP, cmd_flags);
@@ -967,97 +977,78 @@ int bnxt_qplib_create_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp)
hwq_attr.res = res;
hwq_attr.sginfo = &sq->sg_info;
- hwq_attr.stride = sq->wqe_size;
- hwq_attr.depth = sq->max_wqe;
+ hwq_attr.stride = sizeof(struct sq_sge);
+ hwq_attr.depth = bnxt_qplib_get_depth(sq);
hwq_attr.aux_stride = psn_sz;
- hwq_attr.aux_depth = hwq_attr.depth;
+ hwq_attr.aux_depth = bnxt_qplib_set_sq_size(sq, qp->wqe_mode);
hwq_attr.type = HWQ_TYPE_QUEUE;
rc = bnxt_qplib_alloc_init_hwq(&sq->hwq, &hwq_attr);
if (rc)
goto exit;
- sq->swq = kcalloc(sq->hwq.max_elements, sizeof(*sq->swq), GFP_KERNEL);
- if (!sq->swq) {
- rc = -ENOMEM;
+ rc = bnxt_qplib_alloc_init_swq(sq);
+ if (rc)
goto fail_sq;
- }
if (psn_sz)
bnxt_qplib_init_psn_ptr(qp, psn_sz);
+ req.sq_size = cpu_to_le32(bnxt_qplib_set_sq_size(sq, qp->wqe_mode));
pbl = &sq->hwq.pbl[PBL_LVL_0];
req.sq_pbl = cpu_to_le64(pbl->pg_map_arr[0]);
pg_sz_lvl = (bnxt_qplib_base_pg_size(&sq->hwq) <<
CMDQ_CREATE_QP_SQ_PG_SIZE_SFT);
pg_sz_lvl |= (sq->hwq.level & CMDQ_CREATE_QP_SQ_LVL_MASK);
req.sq_pg_size_sq_lvl = pg_sz_lvl;
-
- if (qp->scq)
- req.scq_cid = cpu_to_le32(qp->scq->id);
+ req.sq_fwo_sq_sge =
+ cpu_to_le16(((sq->max_sge & CMDQ_CREATE_QP_SQ_SGE_MASK) <<
+ CMDQ_CREATE_QP_SQ_SGE_SFT) | 0);
+ req.scq_cid = cpu_to_le32(qp->scq->id);
/* RQ */
- if (rq->max_wqe) {
+ if (!qp->srq) {
hwq_attr.res = res;
hwq_attr.sginfo = &rq->sg_info;
- hwq_attr.stride = rq->wqe_size;
- hwq_attr.depth = rq->max_wqe;
+ hwq_attr.stride = sizeof(struct sq_sge);
+ hwq_attr.depth = bnxt_qplib_get_depth(rq);
hwq_attr.aux_stride = 0;
hwq_attr.aux_depth = 0;
hwq_attr.type = HWQ_TYPE_QUEUE;
rc = bnxt_qplib_alloc_init_hwq(&rq->hwq, &hwq_attr);
if (rc)
- goto fail_sq;
-
- rq->swq = kcalloc(rq->hwq.max_elements, sizeof(*rq->swq),
- GFP_KERNEL);
- if (!rq->swq) {
- rc = -ENOMEM;
+ goto sq_swq;
+ rc = bnxt_qplib_alloc_init_swq(rq);
+ if (rc)
goto fail_rq;
- }
+
+ req.rq_size = cpu_to_le32(rq->max_wqe);
pbl = &rq->hwq.pbl[PBL_LVL_0];
req.rq_pbl = cpu_to_le64(pbl->pg_map_arr[0]);
pg_sz_lvl = (bnxt_qplib_base_pg_size(&rq->hwq) <<
CMDQ_CREATE_QP_RQ_PG_SIZE_SFT);
pg_sz_lvl |= (rq->hwq.level & CMDQ_CREATE_QP_RQ_LVL_MASK);
req.rq_pg_size_rq_lvl = pg_sz_lvl;
+ nsge = (qp->wqe_mode == BNXT_QPLIB_WQE_MODE_STATIC) ?
+ 6 : rq->max_sge;
+ req.rq_fwo_rq_sge =
+ cpu_to_le16(((nsge &
+ CMDQ_CREATE_QP_RQ_SGE_MASK) <<
+ CMDQ_CREATE_QP_RQ_SGE_SFT) | 0);
} else {
/* SRQ */
- if (qp->srq) {
- qp_flags |= CMDQ_CREATE_QP_QP_FLAGS_SRQ_USED;
- req.srq_cid = cpu_to_le32(qp->srq->id);
- }
+ qp_flags |= CMDQ_CREATE_QP_QP_FLAGS_SRQ_USED;
+ req.srq_cid = cpu_to_le32(qp->srq->id);
}
-
- if (qp->rcq)
- req.rcq_cid = cpu_to_le32(qp->rcq->id);
+ req.rcq_cid = cpu_to_le32(qp->rcq->id);
qp_flags |= CMDQ_CREATE_QP_QP_FLAGS_RESERVED_LKEY_ENABLE;
qp_flags |= CMDQ_CREATE_QP_QP_FLAGS_FR_PMR_ENABLED;
if (qp->sig_type)
qp_flags |= CMDQ_CREATE_QP_QP_FLAGS_FORCE_COMPLETION;
+ if (qp->wqe_mode == BNXT_QPLIB_WQE_MODE_VARIABLE)
+ qp_flags |= CMDQ_CREATE_QP_QP_FLAGS_VARIABLE_SIZED_WQE_ENABLED;
req.qp_flags = cpu_to_le32(qp_flags);
- req.sq_size = cpu_to_le32(sq->hwq.max_elements);
- req.rq_size = cpu_to_le32(rq->hwq.max_elements);
- qp->sq_hdr_buf = NULL;
- qp->rq_hdr_buf = NULL;
-
- rc = bnxt_qplib_alloc_qp_hdr_buf(res, qp);
- if (rc)
- goto fail_rq;
-
- /* CTRL-22434: Irrespective of the requested SGE count on the SQ
- * always create the QP with max send sges possible if the requested
- * inline size is greater than 0.
- */
- max_ssge = qp->max_inline_data ? 6 : sq->max_sge;
- req.sq_fwo_sq_sge = cpu_to_le16(
- ((max_ssge & CMDQ_CREATE_QP_SQ_SGE_MASK)
- << CMDQ_CREATE_QP_SQ_SGE_SFT) | 0);
- max_rsge = bnxt_qplib_is_chip_gen_p5(res->cctx) ? 6 : rq->max_sge;
- req.rq_fwo_rq_sge = cpu_to_le16(
- ((max_rsge & CMDQ_CREATE_QP_RQ_SGE_MASK)
- << CMDQ_CREATE_QP_RQ_SGE_SFT) | 0);
/* ORRQ and IRRQ */
if (psn_sz) {
xrrq = &qp->orrq;
@@ -1078,7 +1069,7 @@ int bnxt_qplib_create_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp)
hwq_attr.type = HWQ_TYPE_CTX;
rc = bnxt_qplib_alloc_init_hwq(xrrq, &hwq_attr);
if (rc)
- goto fail_buf_free;
+ goto rq_swq;
pbl = &xrrq->pbl[PBL_LVL_0];
req.orrq_addr = cpu_to_le64(pbl->pg_map_arr[0]);
@@ -1113,30 +1104,29 @@ int bnxt_qplib_create_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp)
sq->dbinfo.hwq = &sq->hwq;
sq->dbinfo.xid = qp->id;
sq->dbinfo.db = qp->dpi->dbr;
+ sq->dbinfo.max_slot = bnxt_qplib_set_sq_max_slot(qp->wqe_mode);
if (rq->max_wqe) {
rq->dbinfo.hwq = &rq->hwq;
rq->dbinfo.xid = qp->id;
rq->dbinfo.db = qp->dpi->dbr;
+ rq->dbinfo.max_slot = bnxt_qplib_set_rq_max_slot(rq->wqe_size);
}
rcfw->qp_tbl[qp->id].qp_id = qp->id;
rcfw->qp_tbl[qp->id].qp_handle = (void *)qp;
return 0;
-
fail:
- if (qp->irrq.max_elements)
- bnxt_qplib_free_hwq(res, &qp->irrq);
+ bnxt_qplib_free_hwq(res, &qp->irrq);
fail_orrq:
- if (qp->orrq.max_elements)
- bnxt_qplib_free_hwq(res, &qp->orrq);
-fail_buf_free:
- bnxt_qplib_free_qp_hdr_buf(res, qp);
+ bnxt_qplib_free_hwq(res, &qp->orrq);
+rq_swq:
+ kfree(rq->swq);
fail_rq:
bnxt_qplib_free_hwq(res, &rq->hwq);
- kfree(rq->swq);
+sq_swq:
+ kfree(sq->swq);
fail_sq:
bnxt_qplib_free_hwq(res, &sq->hwq);
- kfree(sq->swq);
exit:
return rc;
}
@@ -1512,7 +1502,7 @@ void *bnxt_qplib_get_qp1_sq_buf(struct bnxt_qplib_qp *qp,
memset(sge, 0, sizeof(*sge));
if (qp->sq_hdr_buf) {
- sw_prod = HWQ_CMP(sq->hwq.prod, &sq->hwq);
+ sw_prod = sq->swq_start;
sge->addr = (dma_addr_t)(qp->sq_hdr_buf_map +
sw_prod * qp->sq_hdr_buf_size);
sge->lkey = 0xFFFFFFFF;
@@ -1526,7 +1516,7 @@ u32 bnxt_qplib_get_rq_prod_index(struct bnxt_qplib_qp *qp)
{
struct bnxt_qplib_q *rq = &qp->rq;
- return HWQ_CMP(rq->hwq.prod, &rq->hwq);
+ return rq->swq_start;
}
dma_addr_t bnxt_qplib_get_qp_buf_from_index(struct bnxt_qplib_qp *qp, u32 index)
@@ -1543,7 +1533,7 @@ void *bnxt_qplib_get_qp1_rq_buf(struct bnxt_qplib_qp *qp,
memset(sge, 0, sizeof(*sge));
if (qp->rq_hdr_buf) {
- sw_prod = HWQ_CMP(rq->hwq.prod, &rq->hwq);
+ sw_prod = rq->swq_start;
sge->addr = (dma_addr_t)(qp->rq_hdr_buf_map +
sw_prod * qp->rq_hdr_buf_size);
sge->lkey = 0xFFFFFFFF;
@@ -1562,6 +1552,8 @@ static void bnxt_qplib_fill_psn_search(struct bnxt_qplib_qp *qp,
u32 flg_npsn;
u32 op_spsn;
+ if (!swq->psn_search)
+ return;
psns = swq->psn_search;
psns_ext = swq->psn_ext;
@@ -1575,12 +1567,122 @@ static void bnxt_qplib_fill_psn_search(struct bnxt_qplib_qp *qp,
if (bnxt_qplib_is_chip_gen_p5(qp->cctx)) {
psns_ext->opcode_start_psn = cpu_to_le32(op_spsn);
psns_ext->flags_next_psn = cpu_to_le32(flg_npsn);
+ psns_ext->start_slot_idx = cpu_to_le16(swq->slot_idx);
} else {
psns->opcode_start_psn = cpu_to_le32(op_spsn);
psns->flags_next_psn = cpu_to_le32(flg_npsn);
}
}
+static int bnxt_qplib_put_inline(struct bnxt_qplib_qp *qp,
+ struct bnxt_qplib_swqe *wqe,
+ u16 *idx)
+{
+ struct bnxt_qplib_hwq *hwq;
+ int len, t_len, offt;
+ bool pull_dst = true;
+ void *il_dst = NULL;
+ void *il_src = NULL;
+ int t_cplen, cplen;
+ int indx;
+
+ hwq = &qp->sq.hwq;
+ t_len = 0;
+ for (indx = 0; indx < wqe->num_sge; indx++) {
+ len = wqe->sg_list[indx].size;
+ il_src = (void *)wqe->sg_list[indx].addr;
+ t_len += len;
+ if (t_len > qp->max_inline_data)
+ goto bad;
+ while (len) {
+ if (pull_dst) {
+ pull_dst = false;
+ il_dst = bnxt_qplib_get_prod_qe(hwq, *idx);
+ (*idx)++;
+ t_cplen = 0;
+ offt = 0;
+ }
+ cplen = min_t(int, len, sizeof(struct sq_sge));
+ cplen = min_t(int, cplen,
+ (sizeof(struct sq_sge) - offt));
+ memcpy(il_dst, il_src, cplen);
+ t_cplen += cplen;
+ il_src += cplen;
+ il_dst += cplen;
+ offt += cplen;
+ len -= cplen;
+ if (t_cplen == sizeof(struct sq_sge))
+ pull_dst = true;
+ }
+ }
+
+ return t_len;
+bad:
+ return -ENOMEM;
+}
+
+static u32 bnxt_qplib_put_sges(struct bnxt_qplib_hwq *hwq,
+ struct bnxt_qplib_sge *ssge,
+ u16 nsge, u16 *idx)
+{
+ struct sq_sge *dsge;
+ int indx, len = 0;
+
+ for (indx = 0; indx < nsge; indx++, (*idx)++) {
+ dsge = bnxt_qplib_get_prod_qe(hwq, *idx);
+ dsge->va_or_pa = cpu_to_le64(ssge[indx].addr);
+ dsge->l_key = cpu_to_le32(ssge[indx].lkey);
+ dsge->size = cpu_to_le32(ssge[indx].size);
+ len += ssge[indx].size;
+ }
+
+ return len;
+}
+
+static u16 bnxt_qplib_required_slots(struct bnxt_qplib_qp *qp,
+ struct bnxt_qplib_swqe *wqe,
+ u16 *wqe_sz, u16 *qdf, u8 mode)
+{
+ u32 ilsize, bytes;
+ u16 nsge;
+ u16 slot;
+
+ nsge = wqe->num_sge;
+ /* Adding sq_send_hdr is a misnomer, for rq also hdr size is same. */
+ bytes = sizeof(struct sq_send_hdr) + nsge * sizeof(struct sq_sge);
+ if (wqe->flags & BNXT_QPLIB_SWQE_FLAGS_INLINE) {
+ ilsize = bnxt_qplib_calc_ilsize(wqe, qp->max_inline_data);
+ bytes = ALIGN(ilsize, sizeof(struct sq_sge));
+ bytes += sizeof(struct sq_send_hdr);
+ }
+
+ *qdf = __xlate_qfd(qp->sq.q_full_delta, bytes);
+ slot = bytes >> 4;
+ *wqe_sz = slot;
+ if (mode == BNXT_QPLIB_WQE_MODE_STATIC)
+ slot = 8;
+ return slot;
+}
+
+static void bnxt_qplib_pull_psn_buff(struct bnxt_qplib_q *sq,
+ struct bnxt_qplib_swq *swq)
+{
+ struct bnxt_qplib_hwq *hwq;
+ u32 pg_num, pg_indx;
+ void *buff;
+ u32 tail;
+
+ hwq = &sq->hwq;
+ if (!hwq->pad_pg)
+ return;
+ tail = swq->slot_idx / sq->dbinfo.max_slot;
+ pg_num = (tail + hwq->pad_pgofft) / (PAGE_SIZE / hwq->pad_stride);
+ pg_indx = (tail + hwq->pad_pgofft) % (PAGE_SIZE / hwq->pad_stride);
+ buff = (void *)(hwq->pad_pg[pg_num] + pg_indx * hwq->pad_stride);
+ swq->psn_ext = buff;
+ swq->psn_search = buff;
+}
+
void bnxt_qplib_post_send_db(struct bnxt_qplib_qp *qp)
{
struct bnxt_qplib_q *sq = &qp->sq;
@@ -1594,88 +1696,84 @@ int bnxt_qplib_post_send(struct bnxt_qplib_qp *qp,
struct bnxt_qplib_nq_work *nq_work = NULL;
int i, rc = 0, data_len = 0, pkt_num = 0;
struct bnxt_qplib_q *sq = &qp->sq;
- struct sq_send *hw_sq_send_hdr;
+ struct bnxt_qplib_hwq *hwq;
struct bnxt_qplib_swq *swq;
bool sch_handler = false;
- struct sq_sge *hw_sge;
- u8 wqe_size16;
+ u16 wqe_sz, qdf = 0;
+ void *base_hdr;
+ void *ext_hdr;
__le32 temp32;
- u32 sw_prod;
+ u32 wqe_idx;
+ u32 slots;
+ u16 idx;
- if (qp->state != CMDQ_MODIFY_QP_NEW_STATE_RTS) {
- if (qp->state == CMDQ_MODIFY_QP_NEW_STATE_ERR) {
- sch_handler = true;
- dev_dbg(&sq->hwq.pdev->dev,
- "%s Error QP. Scheduling for poll_cq\n",
- __func__);
- goto queue_err;
- }
+ hwq = &sq->hwq;
+ if (qp->state != CMDQ_MODIFY_QP_NEW_STATE_RTS &&
+ qp->state != CMDQ_MODIFY_QP_NEW_STATE_ERR) {
+ dev_err(&hwq->pdev->dev,
+ "QPLIB: FP: QP (0x%x) is in the 0x%x state",
+ qp->id, qp->state);
+ rc = -EINVAL;
+ goto done;
}
- if (bnxt_qplib_queue_full(sq)) {
- dev_err(&sq->hwq.pdev->dev,
+ slots = bnxt_qplib_required_slots(qp, wqe, &wqe_sz, &qdf, qp->wqe_mode);
+ if (bnxt_qplib_queue_full(sq, slots + qdf)) {
+ dev_err(&hwq->pdev->dev,
"prod = %#x cons = %#x qdepth = %#x delta = %#x\n",
- sq->hwq.prod, sq->hwq.cons, sq->hwq.max_elements,
- sq->q_full_delta);
+ hwq->prod, hwq->cons, hwq->depth, sq->q_full_delta);
rc = -ENOMEM;
goto done;
}
- sw_prod = HWQ_CMP(sq->hwq.prod, &sq->hwq);
- swq = &sq->swq[sw_prod];
+
+ swq = bnxt_qplib_get_swqe(sq, &wqe_idx);
+ bnxt_qplib_pull_psn_buff(sq, swq);
+
+ idx = 0;
+ swq->slot_idx = hwq->prod;
+ swq->slots = slots;
swq->wr_id = wqe->wr_id;
swq->type = wqe->type;
swq->flags = wqe->flags;
+ swq->start_psn = sq->psn & BTH_PSN_MASK;
if (qp->sig_type)
swq->flags |= SQ_SEND_FLAGS_SIGNAL_COMP;
- swq->start_psn = sq->psn & BTH_PSN_MASK;
-
- hw_sq_send_hdr = bnxt_qplib_get_qe(&sq->hwq, sw_prod, NULL);
- memset(hw_sq_send_hdr, 0, sq->wqe_size);
- if (wqe->flags & BNXT_QPLIB_SWQE_FLAGS_INLINE) {
- /* Copy the inline data */
- if (wqe->inline_len > BNXT_QPLIB_SWQE_MAX_INLINE_LENGTH) {
- dev_warn(&sq->hwq.pdev->dev,
- "Inline data length > 96 detected\n");
- data_len = BNXT_QPLIB_SWQE_MAX_INLINE_LENGTH;
- } else {
- data_len = wqe->inline_len;
- }
- memcpy(hw_sq_send_hdr->data, wqe->inline_data, data_len);
- wqe_size16 = (data_len + 15) >> 4;
- } else {
- for (i = 0, hw_sge = (struct sq_sge *)hw_sq_send_hdr->data;
- i < wqe->num_sge; i++, hw_sge++) {
- hw_sge->va_or_pa = cpu_to_le64(wqe->sg_list[i].addr);
- hw_sge->l_key = cpu_to_le32(wqe->sg_list[i].lkey);
- hw_sge->size = cpu_to_le32(wqe->sg_list[i].size);
- data_len += wqe->sg_list[i].size;
- }
- /* Each SGE entry = 1 WQE size16 */
- wqe_size16 = wqe->num_sge;
- /* HW requires wqe size has room for atleast one SGE even if
- * none was supplied by ULP
- */
- if (!wqe->num_sge)
- wqe_size16++;
+ if (qp->state == CMDQ_MODIFY_QP_NEW_STATE_ERR) {
+ sch_handler = true;
+ dev_dbg(&hwq->pdev->dev,
+ "%s Error QP. Scheduling for poll_cq\n", __func__);
+ goto queue_err;
}
+ base_hdr = bnxt_qplib_get_prod_qe(hwq, idx++);
+ ext_hdr = bnxt_qplib_get_prod_qe(hwq, idx++);
+ memset(base_hdr, 0, sizeof(struct sq_sge));
+ memset(ext_hdr, 0, sizeof(struct sq_sge));
+
+ if (wqe->flags & BNXT_QPLIB_SWQE_FLAGS_INLINE)
+ /* Copy the inline data */
+ data_len = bnxt_qplib_put_inline(qp, wqe, &idx);
+ else
+ data_len = bnxt_qplib_put_sges(hwq, wqe->sg_list, wqe->num_sge,
+ &idx);
+ if (data_len < 0)
+ goto queue_err;
/* Specifics */
switch (wqe->type) {
case BNXT_QPLIB_SWQE_TYPE_SEND:
if (qp->type == CMDQ_CREATE_QP1_TYPE_GSI) {
+ struct sq_send_raweth_qp1_hdr *sqe = base_hdr;
+ struct sq_raw_ext_hdr *ext_sqe = ext_hdr;
/* Assemble info for Raw Ethertype QPs */
- struct sq_send_raweth_qp1 *sqe =
- (struct sq_send_raweth_qp1 *)hw_sq_send_hdr;
sqe->wqe_type = wqe->type;
sqe->flags = wqe->flags;
- sqe->wqe_size = wqe_size16 +
- ((offsetof(typeof(*sqe), data) + 15) >> 4);
+ sqe->wqe_size = wqe_sz;
sqe->cfa_action = cpu_to_le16(wqe->rawqp1.cfa_action);
sqe->lflags = cpu_to_le16(wqe->rawqp1.lflags);
sqe->length = cpu_to_le32(data_len);
- sqe->cfa_meta = cpu_to_le32((wqe->rawqp1.cfa_meta &
+ ext_sqe->cfa_meta = cpu_to_le32((wqe->rawqp1.cfa_meta &
SQ_SEND_RAWETH_QP1_CFA_META_VLAN_VID_MASK) <<
SQ_SEND_RAWETH_QP1_CFA_META_VLAN_VID_SFT);
@@ -1685,27 +1783,24 @@ int bnxt_qplib_post_send(struct bnxt_qplib_qp *qp,
case BNXT_QPLIB_SWQE_TYPE_SEND_WITH_IMM:
case BNXT_QPLIB_SWQE_TYPE_SEND_WITH_INV:
{
- struct sq_send *sqe = (struct sq_send *)hw_sq_send_hdr;
+ struct sq_ud_ext_hdr *ext_sqe = ext_hdr;
+ struct sq_send_hdr *sqe = base_hdr;
sqe->wqe_type = wqe->type;
sqe->flags = wqe->flags;
- sqe->wqe_size = wqe_size16 +
- ((offsetof(typeof(*sqe), data) + 15) >> 4);
- sqe->inv_key_or_imm_data = cpu_to_le32(
- wqe->send.inv_key);
+ sqe->wqe_size = wqe_sz;
+ sqe->inv_key_or_imm_data = cpu_to_le32(wqe->send.inv_key);
if (qp->type == CMDQ_CREATE_QP_TYPE_UD ||
qp->type == CMDQ_CREATE_QP_TYPE_GSI) {
sqe->q_key = cpu_to_le32(wqe->send.q_key);
- sqe->dst_qp = cpu_to_le32(
- wqe->send.dst_qp & SQ_SEND_DST_QP_MASK);
sqe->length = cpu_to_le32(data_len);
- sqe->avid = cpu_to_le32(wqe->send.avid &
- SQ_SEND_AVID_MASK);
sq->psn = (sq->psn + 1) & BTH_PSN_MASK;
+ ext_sqe->dst_qp = cpu_to_le32(wqe->send.dst_qp &
+ SQ_SEND_DST_QP_MASK);
+ ext_sqe->avid = cpu_to_le32(wqe->send.avid &
+ SQ_SEND_AVID_MASK);
} else {
sqe->length = cpu_to_le32(data_len);
- sqe->dst_qp = 0;
- sqe->avid = 0;
if (qp->mtu)
pkt_num = (data_len + qp->mtu - 1) / qp->mtu;
if (!pkt_num)
@@ -1718,16 +1813,16 @@ int bnxt_qplib_post_send(struct bnxt_qplib_qp *qp,
case BNXT_QPLIB_SWQE_TYPE_RDMA_WRITE_WITH_IMM:
case BNXT_QPLIB_SWQE_TYPE_RDMA_READ:
{
- struct sq_rdma *sqe = (struct sq_rdma *)hw_sq_send_hdr;
+ struct sq_rdma_ext_hdr *ext_sqe = ext_hdr;
+ struct sq_rdma_hdr *sqe = base_hdr;
sqe->wqe_type = wqe->type;
sqe->flags = wqe->flags;
- sqe->wqe_size = wqe_size16 +
- ((offsetof(typeof(*sqe), data) + 15) >> 4);
+ sqe->wqe_size = wqe_sz;
sqe->imm_data = cpu_to_le32(wqe->rdma.inv_key);
sqe->length = cpu_to_le32((u32)data_len);
- sqe->remote_va = cpu_to_le64(wqe->rdma.remote_va);
- sqe->remote_key = cpu_to_le32(wqe->rdma.r_key);
+ ext_sqe->remote_va = cpu_to_le64(wqe->rdma.remote_va);
+ ext_sqe->remote_key = cpu_to_le32(wqe->rdma.r_key);
if (qp->mtu)
pkt_num = (data_len + qp->mtu - 1) / qp->mtu;
if (!pkt_num)
@@ -1738,14 +1833,15 @@ int bnxt_qplib_post_send(struct bnxt_qplib_qp *qp,
case BNXT_QPLIB_SWQE_TYPE_ATOMIC_CMP_AND_SWP:
case BNXT_QPLIB_SWQE_TYPE_ATOMIC_FETCH_AND_ADD:
{
- struct sq_atomic *sqe = (struct sq_atomic *)hw_sq_send_hdr;
+ struct sq_atomic_ext_hdr *ext_sqe = ext_hdr;
+ struct sq_atomic_hdr *sqe = base_hdr;
sqe->wqe_type = wqe->type;
sqe->flags = wqe->flags;
sqe->remote_key = cpu_to_le32(wqe->atomic.r_key);
sqe->remote_va = cpu_to_le64(wqe->atomic.remote_va);
- sqe->swap_data = cpu_to_le64(wqe->atomic.swap_data);
- sqe->cmp_data = cpu_to_le64(wqe->atomic.cmp_data);
+ ext_sqe->swap_data = cpu_to_le64(wqe->atomic.swap_data);
+ ext_sqe->cmp_data = cpu_to_le64(wqe->atomic.cmp_data);
if (qp->mtu)
pkt_num = (data_len + qp->mtu - 1) / qp->mtu;
if (!pkt_num)
@@ -1755,8 +1851,7 @@ int bnxt_qplib_post_send(struct bnxt_qplib_qp *qp,
}
case BNXT_QPLIB_SWQE_TYPE_LOCAL_INV:
{
- struct sq_localinvalidate *sqe =
- (struct sq_localinvalidate *)hw_sq_send_hdr;
+ struct sq_localinvalidate *sqe = base_hdr;
sqe->wqe_type = wqe->type;
sqe->flags = wqe->flags;
@@ -1766,7 +1861,8 @@ int bnxt_qplib_post_send(struct bnxt_qplib_qp *qp,
}
case BNXT_QPLIB_SWQE_TYPE_FAST_REG_MR:
{
- struct sq_fr_pmr *sqe = (struct sq_fr_pmr *)hw_sq_send_hdr;
+ struct sq_fr_pmr_ext_hdr *ext_sqe = ext_hdr;
+ struct sq_fr_pmr_hdr *sqe = base_hdr;
sqe->wqe_type = wqe->type;
sqe->flags = wqe->flags;
@@ -1790,14 +1886,15 @@ int bnxt_qplib_post_send(struct bnxt_qplib_qp *qp,
wqe->frmr.pbl_ptr[i] = cpu_to_le64(
wqe->frmr.page_list[i] |
PTU_PTE_VALID);
- sqe->pblptr = cpu_to_le64(wqe->frmr.pbl_dma_ptr);
- sqe->va = cpu_to_le64(wqe->frmr.va);
+ ext_sqe->pblptr = cpu_to_le64(wqe->frmr.pbl_dma_ptr);
+ ext_sqe->va = cpu_to_le64(wqe->frmr.va);
break;
}
case BNXT_QPLIB_SWQE_TYPE_BIND_MW:
{
- struct sq_bind *sqe = (struct sq_bind *)hw_sq_send_hdr;
+ struct sq_bind_ext_hdr *ext_sqe = ext_hdr;
+ struct sq_bind_hdr *sqe = base_hdr;
sqe->wqe_type = wqe->type;
sqe->flags = wqe->flags;
@@ -1806,9 +1903,8 @@ int bnxt_qplib_post_send(struct bnxt_qplib_qp *qp,
(wqe->bind.zero_based ? SQ_BIND_ZERO_BASED : 0);
sqe->parent_l_key = cpu_to_le32(wqe->bind.parent_l_key);
sqe->l_key = cpu_to_le32(wqe->bind.r_key);
- sqe->va = cpu_to_le64(wqe->bind.va);
- temp32 = cpu_to_le32(wqe->bind.length);
- memcpy(&sqe->length, &temp32, sizeof(wqe->bind.length));
+ ext_sqe->va = cpu_to_le64(wqe->bind.va);
+ ext_sqe->length_lo = cpu_to_le32(wqe->bind.length);
break;
}
default:
@@ -1817,23 +1913,11 @@ int bnxt_qplib_post_send(struct bnxt_qplib_qp *qp,
goto done;
}
swq->next_psn = sq->psn & BTH_PSN_MASK;
- if (qp->type == CMDQ_CREATE_QP_TYPE_RC)
- bnxt_qplib_fill_psn_search(qp, wqe, swq);
+ bnxt_qplib_fill_psn_search(qp, wqe, swq);
queue_err:
- if (sch_handler) {
- /* Store the ULP info in the software structures */
- sw_prod = HWQ_CMP(sq->hwq.prod, &sq->hwq);
- swq = &sq->swq[sw_prod];
- swq->wr_id = wqe->wr_id;
- swq->type = wqe->type;
- swq->flags = wqe->flags;
- if (qp->sig_type)
- swq->flags |= SQ_SEND_FLAGS_SIGNAL_COMP;
- swq->start_psn = sq->psn & BTH_PSN_MASK;
- }
- sq->hwq.prod++;
+ bnxt_qplib_swq_mod_start(sq, wqe_idx);
+ bnxt_qplib_hwq_incr_prod(hwq, swq->slots);
qp->wqe_cnt++;
-
done:
if (sch_handler) {
nq_work = kzalloc(sizeof(*nq_work), GFP_ATOMIC);
@@ -1843,7 +1927,7 @@ done:
INIT_WORK(&nq_work->work, bnxt_qpn_cqn_sched_task);
queue_work(qp->scq->nq->cqn_wq, &nq_work->work);
} else {
- dev_err(&sq->hwq.pdev->dev,
+ dev_err(&hwq->pdev->dev,
"FP: Failed to allocate SQ nq_work!\n");
rc = -ENOMEM;
}
@@ -1863,58 +1947,65 @@ int bnxt_qplib_post_recv(struct bnxt_qplib_qp *qp,
{
struct bnxt_qplib_nq_work *nq_work = NULL;
struct bnxt_qplib_q *rq = &qp->rq;
+ struct rq_wqe_hdr *base_hdr;
+ struct rq_ext_hdr *ext_hdr;
+ struct bnxt_qplib_hwq *hwq;
+ struct bnxt_qplib_swq *swq;
bool sch_handler = false;
- struct sq_sge *hw_sge;
- struct rq_wqe *rqe;
- int i, rc = 0;
- u32 sw_prod;
+ u16 wqe_sz, idx;
+ u32 wqe_idx;
+ int rc = 0;
- if (qp->state == CMDQ_MODIFY_QP_NEW_STATE_ERR) {
- sch_handler = true;
- dev_dbg(&rq->hwq.pdev->dev,
- "%s: Error QP. Scheduling for poll_cq\n", __func__);
- goto queue_err;
+ hwq = &rq->hwq;
+ if (qp->state == CMDQ_MODIFY_QP_NEW_STATE_RESET) {
+ dev_err(&hwq->pdev->dev,
+ "QPLIB: FP: QP (0x%x) is in the 0x%x state",
+ qp->id, qp->state);
+ rc = -EINVAL;
+ goto done;
}
- if (bnxt_qplib_queue_full(rq)) {
- dev_err(&rq->hwq.pdev->dev,
+
+ if (bnxt_qplib_queue_full(rq, rq->dbinfo.max_slot)) {
+ dev_err(&hwq->pdev->dev,
"FP: QP (0x%x) RQ is full!\n", qp->id);
rc = -EINVAL;
goto done;
}
- sw_prod = HWQ_CMP(rq->hwq.prod, &rq->hwq);
- rq->swq[sw_prod].wr_id = wqe->wr_id;
- rqe = bnxt_qplib_get_qe(&rq->hwq, sw_prod, NULL);
- memset(rqe, 0, rq->wqe_size);
+ swq = bnxt_qplib_get_swqe(rq, &wqe_idx);
+ swq->wr_id = wqe->wr_id;
+ swq->slots = rq->dbinfo.max_slot;
- /* Calculate wqe_size16 and data_len */
- for (i = 0, hw_sge = (struct sq_sge *)rqe->data;
- i < wqe->num_sge; i++, hw_sge++) {
- hw_sge->va_or_pa = cpu_to_le64(wqe->sg_list[i].addr);
- hw_sge->l_key = cpu_to_le32(wqe->sg_list[i].lkey);
- hw_sge->size = cpu_to_le32(wqe->sg_list[i].size);
+ if (qp->state == CMDQ_MODIFY_QP_NEW_STATE_ERR) {
+ sch_handler = true;
+ dev_dbg(&hwq->pdev->dev,
+ "%s: Error QP. Scheduling for poll_cq\n", __func__);
+ goto queue_err;
}
- rqe->wqe_type = wqe->type;
- rqe->flags = wqe->flags;
- rqe->wqe_size = wqe->num_sge +
- ((offsetof(typeof(*rqe), data) + 15) >> 4);
- /* HW requires wqe size has room for atleast one SGE even if none
- * was supplied by ULP
- */
- if (!wqe->num_sge)
- rqe->wqe_size++;
-
- /* Supply the rqe->wr_id index to the wr_id_tbl for now */
- rqe->wr_id[0] = cpu_to_le32(sw_prod);
+ idx = 0;
+ base_hdr = bnxt_qplib_get_prod_qe(hwq, idx++);
+ ext_hdr = bnxt_qplib_get_prod_qe(hwq, idx++);
+ memset(base_hdr, 0, sizeof(struct sq_sge));
+ memset(ext_hdr, 0, sizeof(struct sq_sge));
+ wqe_sz = (sizeof(struct rq_wqe_hdr) +
+ wqe->num_sge * sizeof(struct sq_sge)) >> 4;
+ bnxt_qplib_put_sges(hwq, wqe->sg_list, wqe->num_sge, &idx);
+ if (!wqe->num_sge) {
+ struct sq_sge *sge;
+
+ sge = bnxt_qplib_get_prod_qe(hwq, idx++);
+ sge->size = 0;
+ wqe_sz++;
+ }
+ base_hdr->wqe_type = wqe->type;
+ base_hdr->flags = wqe->flags;
+ base_hdr->wqe_size = wqe_sz;
+ base_hdr->wr_id[0] = cpu_to_le32(wqe_idx);
queue_err:
- if (sch_handler) {
- /* Store the ULP info in the software structures */
- sw_prod = HWQ_CMP(rq->hwq.prod, &rq->hwq);
- rq->swq[sw_prod].wr_id = wqe->wr_id;
- }
-
- rq->hwq.prod++;
+ bnxt_qplib_swq_mod_start(rq, wqe_idx);
+ bnxt_qplib_hwq_incr_prod(hwq, swq->slots);
+done:
if (sch_handler) {
nq_work = kzalloc(sizeof(*nq_work), GFP_ATOMIC);
if (nq_work) {
@@ -1923,12 +2014,12 @@ queue_err:
INIT_WORK(&nq_work->work, bnxt_qpn_cqn_sched_task);
queue_work(qp->rcq->nq->cqn_wq, &nq_work->work);
} else {
- dev_err(&rq->hwq.pdev->dev,
+ dev_err(&hwq->pdev->dev,
"FP: Failed to allocate RQ nq_work!\n");
rc = -ENOMEM;
}
}
-done:
+
return rc;
}
@@ -2026,20 +2117,19 @@ int bnxt_qplib_destroy_cq(struct bnxt_qplib_res *res, struct bnxt_qplib_cq *cq)
static int __flush_sq(struct bnxt_qplib_q *sq, struct bnxt_qplib_qp *qp,
struct bnxt_qplib_cqe **pcqe, int *budget)
{
- u32 sw_prod, sw_cons;
struct bnxt_qplib_cqe *cqe;
+ u32 start, last;
int rc = 0;
/* Now complete all outstanding SQEs with FLUSHED_ERR */
- sw_prod = HWQ_CMP(sq->hwq.prod, &sq->hwq);
+ start = sq->swq_start;
cqe = *pcqe;
while (*budget) {
- sw_cons = HWQ_CMP(sq->hwq.cons, &sq->hwq);
- if (sw_cons == sw_prod) {
+ last = sq->swq_last;
+ if (start == last)
break;
- }
/* Skip the FENCE WQE completions */
- if (sq->swq[sw_cons].wr_id == BNXT_QPLIB_FENCE_WRID) {
+ if (sq->swq[last].wr_id == BNXT_QPLIB_FENCE_WRID) {
bnxt_qplib_cancel_phantom_processing(qp);
goto skip_compl;
}
@@ -2047,16 +2137,17 @@ static int __flush_sq(struct bnxt_qplib_q *sq, struct bnxt_qplib_qp *qp,
cqe->status = CQ_REQ_STATUS_WORK_REQUEST_FLUSHED_ERR;
cqe->opcode = CQ_BASE_CQE_TYPE_REQ;
cqe->qp_handle = (u64)(unsigned long)qp;
- cqe->wr_id = sq->swq[sw_cons].wr_id;
+ cqe->wr_id = sq->swq[last].wr_id;
cqe->src_qp = qp->id;
- cqe->type = sq->swq[sw_cons].type;
+ cqe->type = sq->swq[last].type;
cqe++;
(*budget)--;
skip_compl:
- sq->hwq.cons++;
+ bnxt_qplib_hwq_incr_cons(&sq->hwq, sq->swq[last].slots);
+ sq->swq_last = sq->swq[last].next_idx;
}
*pcqe = cqe;
- if (!(*budget) && HWQ_CMP(sq->hwq.cons, &sq->hwq) != sw_prod)
+ if (!(*budget) && sq->swq_last != start)
/* Out of budget */
rc = -EAGAIN;
@@ -2067,9 +2158,9 @@ static int __flush_rq(struct bnxt_qplib_q *rq, struct bnxt_qplib_qp *qp,
struct bnxt_qplib_cqe **pcqe, int *budget)
{
struct bnxt_qplib_cqe *cqe;
- u32 sw_prod, sw_cons;
- int rc = 0;
+ u32 start, last;
int opcode = 0;
+ int rc = 0;
switch (qp->type) {
case CMDQ_CREATE_QP1_TYPE_GSI:
@@ -2085,24 +2176,25 @@ static int __flush_rq(struct bnxt_qplib_q *rq, struct bnxt_qplib_qp *qp,
}
/* Flush the rest of the RQ */
- sw_prod = HWQ_CMP(rq->hwq.prod, &rq->hwq);
+ start = rq->swq_start;
cqe = *pcqe;
while (*budget) {
- sw_cons = HWQ_CMP(rq->hwq.cons, &rq->hwq);
- if (sw_cons == sw_prod)
+ last = rq->swq_last;
+ if (last == start)
break;
memset(cqe, 0, sizeof(*cqe));
cqe->status =
CQ_RES_RC_STATUS_WORK_REQUEST_FLUSHED_ERR;
cqe->opcode = opcode;
cqe->qp_handle = (unsigned long)qp;
- cqe->wr_id = rq->swq[sw_cons].wr_id;
+ cqe->wr_id = rq->swq[last].wr_id;
cqe++;
(*budget)--;
- rq->hwq.cons++;
+ bnxt_qplib_hwq_incr_cons(&rq->hwq, rq->swq[last].slots);
+ rq->swq_last = rq->swq[last].next_idx;
}
*pcqe = cqe;
- if (!*budget && HWQ_CMP(rq->hwq.cons, &rq->hwq) != sw_prod)
+ if (!*budget && rq->swq_last != start)
/* Out of budget */
rc = -EAGAIN;
@@ -2125,7 +2217,7 @@ void bnxt_qplib_mark_qp_error(void *qp_handle)
* CQE is track from sw_cq_cons to max_element but valid only if VALID=1
*/
static int do_wa9060(struct bnxt_qplib_qp *qp, struct bnxt_qplib_cq *cq,
- u32 cq_cons, u32 sw_sq_cons, u32 cqe_sq_cons)
+ u32 cq_cons, u32 swq_last, u32 cqe_sq_cons)
{
u32 peek_sw_cq_cons, peek_raw_cq_cons, peek_sq_cons_idx;
struct bnxt_qplib_q *sq = &qp->sq;
@@ -2138,7 +2230,7 @@ static int do_wa9060(struct bnxt_qplib_qp *qp, struct bnxt_qplib_cq *cq,
/* Normal mode */
/* Check for the psn_search marking before completing */
- swq = &sq->swq[sw_sq_cons];
+ swq = &sq->swq[swq_last];
if (swq->psn_search &&
le32_to_cpu(swq->psn_search->flags_next_psn) & 0x80000000) {
/* Unmark */
@@ -2147,7 +2239,7 @@ static int do_wa9060(struct bnxt_qplib_qp *qp, struct bnxt_qplib_cq *cq,
& ~0x80000000);
dev_dbg(&cq->hwq.pdev->dev,
"FP: Process Req cq_cons=0x%x qp=0x%x sq cons sw=0x%x cqe=0x%x marked!\n",
- cq_cons, qp->id, sw_sq_cons, cqe_sq_cons);
+ cq_cons, qp->id, swq_last, cqe_sq_cons);
sq->condition = true;
sq->send_phantom = true;
@@ -2184,9 +2276,10 @@ static int do_wa9060(struct bnxt_qplib_qp *qp, struct bnxt_qplib_cq *cq,
le64_to_cpu
(peek_req_hwcqe->qp_handle));
peek_sq = &peek_qp->sq;
- peek_sq_cons_idx = HWQ_CMP(le16_to_cpu(
- peek_req_hwcqe->sq_cons_idx) - 1
- , &sq->hwq);
+ peek_sq_cons_idx =
+ ((le16_to_cpu(
+ peek_req_hwcqe->sq_cons_idx)
+ - 1) % sq->max_wqe);
/* If the hwcqe's sq's wr_id matches */
if (peek_sq == sq &&
sq->swq[peek_sq_cons_idx].wr_id ==
@@ -2214,7 +2307,7 @@ static int do_wa9060(struct bnxt_qplib_qp *qp, struct bnxt_qplib_cq *cq,
}
dev_err(&cq->hwq.pdev->dev,
"Should not have come here! cq_cons=0x%x qp=0x%x sq cons sw=0x%x hw=0x%x\n",
- cq_cons, qp->id, sw_sq_cons, cqe_sq_cons);
+ cq_cons, qp->id, swq_last, cqe_sq_cons);
rc = -EINVAL;
}
out:
@@ -2226,11 +2319,11 @@ static int bnxt_qplib_cq_process_req(struct bnxt_qplib_cq *cq,
struct bnxt_qplib_cqe **pcqe, int *budget,
u32 cq_cons, struct bnxt_qplib_qp **lib_qp)
{
- u32 sw_sq_cons, cqe_sq_cons;
struct bnxt_qplib_swq *swq;
struct bnxt_qplib_cqe *cqe;
struct bnxt_qplib_qp *qp;
struct bnxt_qplib_q *sq;
+ u32 cqe_sq_cons;
int rc = 0;
qp = (struct bnxt_qplib_qp *)((unsigned long)
@@ -2242,14 +2335,7 @@ static int bnxt_qplib_cq_process_req(struct bnxt_qplib_cq *cq,
}
sq = &qp->sq;
- cqe_sq_cons = HWQ_CMP(le16_to_cpu(hwcqe->sq_cons_idx), &sq->hwq);
- if (cqe_sq_cons > sq->hwq.max_elements) {
- dev_err(&cq->hwq.pdev->dev,
- "FP: CQ Process req reported sq_cons_idx 0x%x which exceeded max 0x%x\n",
- cqe_sq_cons, sq->hwq.max_elements);
- return -EINVAL;
- }
-
+ cqe_sq_cons = le16_to_cpu(hwcqe->sq_cons_idx) % sq->max_wqe;
if (qp->sq.flushed) {
dev_dbg(&cq->hwq.pdev->dev,
"%s: QP in Flush QP = %p\n", __func__, qp);
@@ -2261,12 +2347,11 @@ static int bnxt_qplib_cq_process_req(struct bnxt_qplib_cq *cq,
*/
cqe = *pcqe;
while (*budget) {
- sw_sq_cons = HWQ_CMP(sq->hwq.cons, &sq->hwq);
- if (sw_sq_cons == cqe_sq_cons)
+ if (sq->swq_last == cqe_sq_cons)
/* Done */
break;
- swq = &sq->swq[sw_sq_cons];
+ swq = &sq->swq[sq->swq_last];
memset(cqe, 0, sizeof(*cqe));
cqe->opcode = CQ_BASE_CQE_TYPE_REQ;
cqe->qp_handle = (u64)(unsigned long)qp;
@@ -2280,12 +2365,12 @@ static int bnxt_qplib_cq_process_req(struct bnxt_qplib_cq *cq,
* of the request being signaled or not, it must complete with
* the hwcqe error status
*/
- if (HWQ_CMP((sw_sq_cons + 1), &sq->hwq) == cqe_sq_cons &&
+ if (swq->next_idx == cqe_sq_cons &&
hwcqe->status != CQ_REQ_STATUS_OK) {
cqe->status = hwcqe->status;
dev_err(&cq->hwq.pdev->dev,
"FP: CQ Processed Req wr_id[%d] = 0x%llx with status 0x%x\n",
- sw_sq_cons, cqe->wr_id, cqe->status);
+ sq->swq_last, cqe->wr_id, cqe->status);
cqe++;
(*budget)--;
bnxt_qplib_mark_qp_error(qp);
@@ -2293,7 +2378,7 @@ static int bnxt_qplib_cq_process_req(struct bnxt_qplib_cq *cq,
bnxt_qplib_add_flush_qp(qp);
} else {
/* Before we complete, do WA 9060 */
- if (do_wa9060(qp, cq, cq_cons, sw_sq_cons,
+ if (do_wa9060(qp, cq, cq_cons, sq->swq_last,
cqe_sq_cons)) {
*lib_qp = qp;
goto out;
@@ -2305,13 +2390,14 @@ static int bnxt_qplib_cq_process_req(struct bnxt_qplib_cq *cq,
}
}
skip:
- sq->hwq.cons++;
+ bnxt_qplib_hwq_incr_cons(&sq->hwq, swq->slots);
+ sq->swq_last = swq->next_idx;
if (sq->single)
break;
}
out:
*pcqe = cqe;
- if (HWQ_CMP(sq->hwq.cons, &sq->hwq) != cqe_sq_cons) {
+ if (sq->swq_last != cqe_sq_cons) {
/* Out of budget */
rc = -EAGAIN;
goto done;
@@ -2386,17 +2472,23 @@ static int bnxt_qplib_cq_process_res_rc(struct bnxt_qplib_cq *cq,
(*budget)--;
*pcqe = cqe;
} else {
+ struct bnxt_qplib_swq *swq;
+
rq = &qp->rq;
- if (wr_id_idx >= rq->hwq.max_elements) {
+ if (wr_id_idx > (rq->max_wqe - 1)) {
dev_err(&cq->hwq.pdev->dev,
"FP: CQ Process RC wr_id idx 0x%x exceeded RQ max 0x%x\n",
- wr_id_idx, rq->hwq.max_elements);
+ wr_id_idx, rq->max_wqe);
return -EINVAL;
}
- cqe->wr_id = rq->swq[wr_id_idx].wr_id;
+ if (wr_id_idx != rq->swq_last)
+ return -EINVAL;
+ swq = &rq->swq[rq->swq_last];
+ cqe->wr_id = swq->wr_id;
cqe++;
(*budget)--;
- rq->hwq.cons++;
+ bnxt_qplib_hwq_incr_cons(&rq->hwq, swq->slots);
+ rq->swq_last = swq->next_idx;
*pcqe = cqe;
if (hwcqe->status != CQ_RES_RC_STATUS_OK) {
@@ -2467,18 +2559,24 @@ static int bnxt_qplib_cq_process_res_ud(struct bnxt_qplib_cq *cq,
(*budget)--;
*pcqe = cqe;
} else {
+ struct bnxt_qplib_swq *swq;
+
rq = &qp->rq;
- if (wr_id_idx >= rq->hwq.max_elements) {
+ if (wr_id_idx > (rq->max_wqe - 1)) {
dev_err(&cq->hwq.pdev->dev,
"FP: CQ Process UD wr_id idx 0x%x exceeded RQ max 0x%x\n",
- wr_id_idx, rq->hwq.max_elements);
+ wr_id_idx, rq->max_wqe);
return -EINVAL;
}
- cqe->wr_id = rq->swq[wr_id_idx].wr_id;
+ if (rq->swq_last != wr_id_idx)
+ return -EINVAL;
+ swq = &rq->swq[rq->swq_last];
+ cqe->wr_id = swq->wr_id;
cqe++;
(*budget)--;
- rq->hwq.cons++;
+ bnxt_qplib_hwq_incr_cons(&rq->hwq, swq->slots);
+ rq->swq_last = swq->next_idx;
*pcqe = cqe;
if (hwcqe->status != CQ_RES_RC_STATUS_OK) {
@@ -2569,17 +2667,23 @@ static int bnxt_qplib_cq_process_res_raweth_qp1(struct bnxt_qplib_cq *cq,
(*budget)--;
*pcqe = cqe;
} else {
+ struct bnxt_qplib_swq *swq;
+
rq = &qp->rq;
- if (wr_id_idx >= rq->hwq.max_elements) {
+ if (wr_id_idx > (rq->max_wqe - 1)) {
dev_err(&cq->hwq.pdev->dev,
"FP: CQ Process Raw/QP1 RQ wr_id idx 0x%x exceeded RQ max 0x%x\n",
- wr_id_idx, rq->hwq.max_elements);
+ wr_id_idx, rq->max_wqe);
return -EINVAL;
}
- cqe->wr_id = rq->swq[wr_id_idx].wr_id;
+ if (rq->swq_last != wr_id_idx)
+ return -EINVAL;
+ swq = &rq->swq[rq->swq_last];
+ cqe->wr_id = swq->wr_id;
cqe++;
(*budget)--;
- rq->hwq.cons++;
+ bnxt_qplib_hwq_incr_cons(&rq->hwq, swq->slots);
+ rq->swq_last = swq->next_idx;
*pcqe = cqe;
if (hwcqe->status != CQ_RES_RC_STATUS_OK) {
@@ -2601,7 +2705,7 @@ static int bnxt_qplib_cq_process_terminal(struct bnxt_qplib_cq *cq,
struct bnxt_qplib_qp *qp;
struct bnxt_qplib_q *sq, *rq;
struct bnxt_qplib_cqe *cqe;
- u32 sw_cons = 0, cqe_cons;
+ u32 swq_last = 0, cqe_cons;
int rc = 0;
/* Check the Status */
@@ -2627,13 +2731,7 @@ static int bnxt_qplib_cq_process_terminal(struct bnxt_qplib_cq *cq,
cqe_cons = le16_to_cpu(hwcqe->sq_cons_idx);
if (cqe_cons == 0xFFFF)
goto do_rq;
-
- if (cqe_cons > sq->hwq.max_elements) {
- dev_err(&cq->hwq.pdev->dev,
- "FP: CQ Process terminal reported sq_cons_idx 0x%x which exceeded max 0x%x\n",
- cqe_cons, sq->hwq.max_elements);
- goto do_rq;
- }
+ cqe_cons %= sq->max_wqe;
if (qp->sq.flushed) {
dev_dbg(&cq->hwq.pdev->dev,
@@ -2647,24 +2745,25 @@ static int bnxt_qplib_cq_process_terminal(struct bnxt_qplib_cq *cq,
*/
cqe = *pcqe;
while (*budget) {
- sw_cons = HWQ_CMP(sq->hwq.cons, &sq->hwq);
- if (sw_cons == cqe_cons)
+ swq_last = sq->swq_last;
+ if (swq_last == cqe_cons)
break;
- if (sq->swq[sw_cons].flags & SQ_SEND_FLAGS_SIGNAL_COMP) {
+ if (sq->swq[swq_last].flags & SQ_SEND_FLAGS_SIGNAL_COMP) {
memset(cqe, 0, sizeof(*cqe));
cqe->status = CQ_REQ_STATUS_OK;
cqe->opcode = CQ_BASE_CQE_TYPE_REQ;
cqe->qp_handle = (u64)(unsigned long)qp;
cqe->src_qp = qp->id;
- cqe->wr_id = sq->swq[sw_cons].wr_id;
- cqe->type = sq->swq[sw_cons].type;
+ cqe->wr_id = sq->swq[swq_last].wr_id;
+ cqe->type = sq->swq[swq_last].type;
cqe++;
(*budget)--;
}
- sq->hwq.cons++;
+ bnxt_qplib_hwq_incr_cons(&sq->hwq, sq->swq[swq_last].slots);
+ sq->swq_last = sq->swq[swq_last].next_idx;
}
*pcqe = cqe;
- if (!(*budget) && sw_cons != cqe_cons) {
+ if (!(*budget) && swq_last != cqe_cons) {
/* Out of budget */
rc = -EAGAIN;
goto sq_done;
@@ -2676,10 +2775,10 @@ do_rq:
cqe_cons = le16_to_cpu(hwcqe->rq_cons_idx);
if (cqe_cons == 0xFFFF) {
goto done;
- } else if (cqe_cons > rq->hwq.max_elements) {
+ } else if (cqe_cons > rq->max_wqe - 1) {
dev_err(&cq->hwq.pdev->dev,
"FP: CQ Processed terminal reported rq_cons_idx 0x%x exceeds max 0x%x\n",
- cqe_cons, rq->hwq.max_elements);
+ cqe_cons, rq->max_wqe);
goto done;
}
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_fp.h b/drivers/infiniband/hw/bnxt_re/qplib_fp.h
index 568ca390322c..f50784405e27 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_fp.h
+++ b/drivers/infiniband/hw/bnxt_re/qplib_fp.h
@@ -39,6 +39,51 @@
#ifndef __BNXT_QPLIB_FP_H__
#define __BNXT_QPLIB_FP_H__
+/* Few helper structures temporarily defined here
+ * should get rid of these when roce_hsi.h is updated
+ * in original code base
+ */
+struct sq_ud_ext_hdr {
+ __le32 dst_qp;
+ __le32 avid;
+ __le64 rsvd;
+};
+
+struct sq_raw_ext_hdr {
+ __le32 cfa_meta;
+ __le32 rsvd0;
+ __le64 rsvd1;
+};
+
+struct sq_rdma_ext_hdr {
+ __le64 remote_va;
+ __le32 remote_key;
+ __le32 rsvd;
+};
+
+struct sq_atomic_ext_hdr {
+ __le64 swap_data;
+ __le64 cmp_data;
+};
+
+struct sq_fr_pmr_ext_hdr {
+ __le64 pblptr;
+ __le64 va;
+};
+
+struct sq_bind_ext_hdr {
+ __le64 va;
+ __le32 length_lo;
+ __le32 length_hi;
+};
+
+struct rq_ext_hdr {
+ __le64 rsvd1;
+ __le64 rsvd2;
+};
+
+/* Helper structures end */
+
struct bnxt_qplib_srq {
struct bnxt_qplib_pd *pd;
struct bnxt_qplib_dpi *dpi;
@@ -74,6 +119,8 @@ struct bnxt_qplib_swq {
u8 flags;
u32 start_psn;
u32 next_psn;
+ u32 slot_idx;
+ u8 slots;
struct sq_psn_search *psn_search;
struct sq_psn_search_ext *psn_ext;
};
@@ -213,6 +260,8 @@ struct bnxt_qplib_q {
u32 phantom_cqe_cnt;
u32 next_cq_cons;
bool flushed;
+ u32 swq_start;
+ u32 swq_last;
};
struct bnxt_qplib_qp {
@@ -224,9 +273,10 @@ struct bnxt_qplib_qp {
u32 id;
u8 type;
u8 sig_type;
- u32 modify_flags;
+ u8 wqe_mode;
u8 state;
u8 cur_qp_state;
+ u64 modify_flags;
u32 max_inline_data;
u32 mtu;
u8 path_mtu;
@@ -300,11 +350,18 @@ struct bnxt_qplib_qp {
(!!((hdr)->cqe_type_toggle & CQ_BASE_TOGGLE) == \
!((raw_cons) & (cp_bit)))
-static inline bool bnxt_qplib_queue_full(struct bnxt_qplib_q *qplib_q)
+static inline bool bnxt_qplib_queue_full(struct bnxt_qplib_q *que,
+ u8 slots)
{
- return HWQ_CMP((qplib_q->hwq.prod + qplib_q->q_full_delta),
- &qplib_q->hwq) == HWQ_CMP(qplib_q->hwq.cons,
- &qplib_q->hwq);
+ struct bnxt_qplib_hwq *hwq;
+ int avail;
+
+ hwq = &que->hwq;
+ /* False full is possible, retrying post-send makes sense */
+ avail = hwq->cons - hwq->prod;
+ if (hwq->cons <= hwq->prod)
+ avail += hwq->depth;
+ return avail <= slots;
}
struct bnxt_qplib_cqe {
@@ -489,4 +546,64 @@ int bnxt_qplib_process_flush_list(struct bnxt_qplib_cq *cq,
struct bnxt_qplib_cqe *cqe,
int num_cqes);
void bnxt_qplib_flush_cqn_wq(struct bnxt_qplib_qp *qp);
+
+static inline void *bnxt_qplib_get_swqe(struct bnxt_qplib_q *que, u32 *swq_idx)
+{
+ u32 idx;
+
+ idx = que->swq_start;
+ if (swq_idx)
+ *swq_idx = idx;
+ return &que->swq[idx];
+}
+
+static inline void bnxt_qplib_swq_mod_start(struct bnxt_qplib_q *que, u32 idx)
+{
+ que->swq_start = que->swq[idx].next_idx;
+}
+
+static inline u32 bnxt_qplib_get_depth(struct bnxt_qplib_q *que)
+{
+ return (que->wqe_size * que->max_wqe) / sizeof(struct sq_sge);
+}
+
+static inline u32 bnxt_qplib_set_sq_size(struct bnxt_qplib_q *que, u8 wqe_mode)
+{
+ return (wqe_mode == BNXT_QPLIB_WQE_MODE_STATIC) ?
+ que->max_wqe : bnxt_qplib_get_depth(que);
+}
+
+static inline u32 bnxt_qplib_set_sq_max_slot(u8 wqe_mode)
+{
+ return (wqe_mode == BNXT_QPLIB_WQE_MODE_STATIC) ?
+ sizeof(struct sq_send) / sizeof(struct sq_sge) : 1;
+}
+
+static inline u32 bnxt_qplib_set_rq_max_slot(u32 wqe_size)
+{
+ return (wqe_size / sizeof(struct sq_sge));
+}
+
+static inline u16 __xlate_qfd(u16 delta, u16 wqe_bytes)
+{
+ /* For Cu/Wh delta = 128, stride = 16, wqe_bytes = 128
+ * For Gen-p5 B/C mode delta = 0, stride = 16, wqe_bytes = 128.
+ * For Gen-p5 delta = 0, stride = 16, 32 <= wqe_bytes <= 512.
+ * when 8916 is disabled.
+ */
+ return (delta * wqe_bytes) / sizeof(struct sq_sge);
+}
+
+static inline u16 bnxt_qplib_calc_ilsize(struct bnxt_qplib_swqe *wqe, u16 max)
+{
+ u16 size = 0;
+ int indx;
+
+ for (indx = 0; indx < wqe->num_sge; indx++)
+ size += wqe->sg_list[indx].size;
+ if (size > max)
+ size = max;
+
+ return size;
+}
#endif /* __BNXT_QPLIB_FP_H__ */
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_res.h b/drivers/infiniband/hw/bnxt_re/qplib_res.h
index c29cbd3a2d7b..9da470d1e4a3 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_res.h
+++ b/drivers/infiniband/hw/bnxt_re/qplib_res.h
@@ -41,6 +41,28 @@
extern const struct bnxt_qplib_gid bnxt_qplib_gid_zero;
+#define CHIP_NUM_57508 0x1750
+#define CHIP_NUM_57504 0x1751
+#define CHIP_NUM_57502 0x1752
+
+enum bnxt_qplib_wqe_mode {
+ BNXT_QPLIB_WQE_MODE_STATIC = 0x00,
+ BNXT_QPLIB_WQE_MODE_VARIABLE = 0x01,
+ BNXT_QPLIB_WQE_MODE_INVALID = 0x02
+};
+
+struct bnxt_qplib_drv_modes {
+ u8 wqe_mode;
+ /* Other modes to follow here */
+};
+
+struct bnxt_qplib_chip_ctx {
+ u16 chip_num;
+ u8 chip_rev;
+ u8 chip_metal;
+ struct bnxt_qplib_drv_modes modes;
+};
+
#define PTR_CNT_PER_PG (PAGE_SIZE / sizeof(void *))
#define PTR_MAX_IDX_PER_PG (PTR_CNT_PER_PG - 1)
#define PTR_PG(x) (((x) & ~PTR_MAX_IDX_PER_PG) / PTR_CNT_PER_PG)
@@ -141,6 +163,9 @@ struct bnxt_qplib_hwq {
u32 cons; /* raw */
u8 cp_bit;
u8 is_user;
+ u64 *pad_pg;
+ u32 pad_stride;
+ u32 pad_pgofft;
};
struct bnxt_qplib_db_info {
@@ -148,6 +173,7 @@ struct bnxt_qplib_db_info {
void __iomem *priv_db;
struct bnxt_qplib_hwq *hwq;
u32 xid;
+ u32 max_slot;
};
/* Tables */
@@ -230,16 +256,6 @@ struct bnxt_qplib_ctx {
u64 hwrm_intf_ver;
};
-struct bnxt_qplib_chip_ctx {
- u16 chip_num;
- u8 chip_rev;
- u8 chip_metal;
-};
-
-#define CHIP_NUM_57508 0x1750
-#define CHIP_NUM_57504 0x1751
-#define CHIP_NUM_57502 0x1752
-
struct bnxt_qplib_res {
struct pci_dev *pdev;
struct bnxt_qplib_chip_ctx *cctx;
@@ -317,6 +333,14 @@ static inline void *bnxt_qplib_get_qe(struct bnxt_qplib_hwq *hwq,
return (void *)(hwq->pbl_ptr[pg_num] + hwq->element_size * pg_idx);
}
+static inline void *bnxt_qplib_get_prod_qe(struct bnxt_qplib_hwq *hwq, u32 idx)
+{
+ idx += hwq->prod;
+ if (idx >= hwq->depth)
+ idx -= hwq->depth;
+ return bnxt_qplib_get_qe(hwq, idx, NULL);
+}
+
#define to_bnxt_qplib(ptr, type, member) \
container_of(ptr, type, member)
@@ -351,6 +375,17 @@ int bnxt_qplib_alloc_ctx(struct bnxt_qplib_res *res,
struct bnxt_qplib_ctx *ctx,
bool virt_fn, bool is_p5);
+static inline void bnxt_qplib_hwq_incr_prod(struct bnxt_qplib_hwq *hwq, u32 cnt)
+{
+ hwq->prod = (hwq->prod + cnt) % hwq->depth;
+}
+
+static inline void bnxt_qplib_hwq_incr_cons(struct bnxt_qplib_hwq *hwq,
+ u32 cnt)
+{
+ hwq->cons = (hwq->cons + cnt) % hwq->depth;
+}
+
static inline void bnxt_qplib_ring_db32(struct bnxt_qplib_db_info *info,
bool arm)
{
@@ -383,8 +418,7 @@ static inline void bnxt_qplib_ring_prod_db(struct bnxt_qplib_db_info *info,
key = (info->xid & DBC_DBC_XID_MASK) | DBC_DBC_PATH_ROCE | type;
key <<= 32;
- key |= (info->hwq->prod & (info->hwq->max_elements - 1)) &
- DBC_DBC_INDEX_MASK;
+ key |= ((info->hwq->prod / info->max_slot)) & DBC_DBC_INDEX_MASK;
writeq(key, info->db);
}
diff --git a/drivers/infiniband/hw/bnxt_re/roce_hsi.h b/drivers/infiniband/hw/bnxt_re/roce_hsi.h
index 6f00f07420b7..3e40e0d76efd 100644
--- a/drivers/infiniband/hw/bnxt_re/roce_hsi.h
+++ b/drivers/infiniband/hw/bnxt_re/roce_hsi.h
@@ -1126,6 +1126,7 @@ struct cmdq_create_qp {
#define CMDQ_CREATE_QP_QP_FLAGS_FORCE_COMPLETION 0x2UL
#define CMDQ_CREATE_QP_QP_FLAGS_RESERVED_LKEY_ENABLE 0x4UL
#define CMDQ_CREATE_QP_QP_FLAGS_FR_PMR_ENABLED 0x8UL
+ #define CMDQ_CREATE_QP_QP_FLAGS_VARIABLE_SIZED_WQE_ENABLED 0x10UL
u8 type;
#define CMDQ_CREATE_QP_TYPE_RC 0x2UL
#define CMDQ_CREATE_QP_TYPE_UD 0x4UL
diff --git a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
index e8e11bd95e42..2b2b009b371a 100644
--- a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
+++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
@@ -980,7 +980,7 @@ int c4iw_reject_cr(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len);
void c4iw_qp_add_ref(struct ib_qp *qp);
void c4iw_qp_rem_ref(struct ib_qp *qp);
struct ib_mr *c4iw_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type,
- u32 max_num_sg, struct ib_udata *udata);
+ u32 max_num_sg);
int c4iw_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents,
unsigned int *sg_offset);
int c4iw_dealloc_mw(struct ib_mw *mw);
@@ -1053,8 +1053,9 @@ int c4iw_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr,
const struct ib_recv_wr **bad_wr);
struct c4iw_wr_wait *c4iw_alloc_wr_wait(gfp_t gfp);
-typedef int c4iw_restrack_func(struct sk_buff *msg,
- struct rdma_restrack_entry *res);
-extern c4iw_restrack_func *c4iw_restrack_funcs[RDMA_RESTRACK_MAX];
+int c4iw_fill_res_mr_entry(struct sk_buff *msg, struct ib_mr *ibmr);
+int c4iw_fill_res_cq_entry(struct sk_buff *msg, struct ib_cq *ibcq);
+int c4iw_fill_res_qp_entry(struct sk_buff *msg, struct ib_qp *ibqp);
+int c4iw_fill_res_cm_id_entry(struct sk_buff *msg, struct rdma_cm_id *cm_id);
#endif
diff --git a/drivers/infiniband/hw/cxgb4/mem.c b/drivers/infiniband/hw/cxgb4/mem.c
index 962dc97a8ff2..73936c3341b7 100644
--- a/drivers/infiniband/hw/cxgb4/mem.c
+++ b/drivers/infiniband/hw/cxgb4/mem.c
@@ -399,7 +399,6 @@ static int finish_mem_reg(struct c4iw_mr *mhp, u32 stag)
mmid = stag >> 8;
mhp->ibmr.rkey = mhp->ibmr.lkey = stag;
mhp->ibmr.length = mhp->attr.len;
- mhp->ibmr.iova = mhp->attr.va_fbo;
mhp->ibmr.page_size = 1U << (mhp->attr.page_size + 12);
pr_debug("mmid 0x%x mhp %p\n", mmid, mhp);
return xa_insert_irq(&mhp->rhp->mrs, mmid, mhp, GFP_KERNEL);
@@ -691,7 +690,7 @@ int c4iw_dealloc_mw(struct ib_mw *mw)
}
struct ib_mr *c4iw_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type,
- u32 max_num_sg, struct ib_udata *udata)
+ u32 max_num_sg)
{
struct c4iw_dev *rhp;
struct c4iw_pd *php;
diff --git a/drivers/infiniband/hw/cxgb4/provider.c b/drivers/infiniband/hw/cxgb4/provider.c
index ba83d942997c..6c579d2d3997 100644
--- a/drivers/infiniband/hw/cxgb4/provider.c
+++ b/drivers/infiniband/hw/cxgb4/provider.c
@@ -236,14 +236,6 @@ static int c4iw_allocate_pd(struct ib_pd *pd, struct ib_udata *udata)
return 0;
}
-static int c4iw_query_pkey(struct ib_device *ibdev, u8 port, u16 index,
- u16 *pkey)
-{
- pr_debug("ibdev %p\n", ibdev);
- *pkey = 0;
- return 0;
-}
-
static int c4iw_query_gid(struct ib_device *ibdev, u8 port, int index,
union ib_gid *gid)
{
@@ -317,7 +309,6 @@ static int c4iw_query_port(struct ib_device *ibdev, u8 port,
IB_PORT_DEVICE_MGMT_SUP |
IB_PORT_VENDOR_CLASS_SUP | IB_PORT_BOOT_MGMT_SUP;
props->gid_tbl_len = 1;
- props->pkey_tbl_len = 1;
props->max_msg_sz = -1;
return ret;
@@ -439,7 +430,6 @@ static int c4iw_port_immutable(struct ib_device *ibdev, u8 port_num,
if (err)
return err;
- immutable->pkey_tbl_len = attr.pkey_tbl_len;
immutable->gid_tbl_len = attr.gid_tbl_len;
return 0;
@@ -458,13 +448,6 @@ static void get_dev_fw_str(struct ib_device *dev, char *str)
FW_HDR_FW_VER_BUILD_G(c4iw_dev->rdev.lldi.fw_vers));
}
-static int fill_res_entry(struct sk_buff *msg, struct rdma_restrack_entry *res)
-{
- return (res->type < ARRAY_SIZE(c4iw_restrack_funcs) &&
- c4iw_restrack_funcs[res->type]) ?
- c4iw_restrack_funcs[res->type](msg, res) : 0;
-}
-
static const struct ib_device_ops c4iw_dev_ops = {
.owner = THIS_MODULE,
.driver_id = RDMA_DRIVER_CXGB4,
@@ -485,7 +468,9 @@ static const struct ib_device_ops c4iw_dev_ops = {
.destroy_cq = c4iw_destroy_cq,
.destroy_qp = c4iw_destroy_qp,
.destroy_srq = c4iw_destroy_srq,
- .fill_res_entry = fill_res_entry,
+ .fill_res_cq_entry = c4iw_fill_res_cq_entry,
+ .fill_res_cm_id_entry = c4iw_fill_res_cm_id_entry,
+ .fill_res_mr_entry = c4iw_fill_res_mr_entry,
.get_dev_fw_str = get_dev_fw_str,
.get_dma_mr = c4iw_get_dma_mr,
.get_hw_stats = c4iw_get_mib,
@@ -508,7 +493,6 @@ static const struct ib_device_ops c4iw_dev_ops = {
.post_srq_recv = c4iw_post_srq_recv,
.query_device = c4iw_query_device,
.query_gid = c4iw_query_gid,
- .query_pkey = c4iw_query_pkey,
.query_port = c4iw_query_port,
.query_qp = c4iw_ib_query_qp,
.reg_user_mr = c4iw_reg_user_mr,
diff --git a/drivers/infiniband/hw/cxgb4/restrack.c b/drivers/infiniband/hw/cxgb4/restrack.c
index f82d46ed969d..b32e6516d65f 100644
--- a/drivers/infiniband/hw/cxgb4/restrack.c
+++ b/drivers/infiniband/hw/cxgb4/restrack.c
@@ -134,10 +134,8 @@ err:
return -EMSGSIZE;
}
-static int fill_res_qp_entry(struct sk_buff *msg,
- struct rdma_restrack_entry *res)
+int c4iw_fill_res_qp_entry(struct sk_buff *msg, struct ib_qp *ibqp)
{
- struct ib_qp *ibqp = container_of(res, struct ib_qp, res);
struct t4_swsqe *fsp = NULL, *lsp = NULL;
struct c4iw_qp *qhp = to_c4iw_qp(ibqp);
u16 first_sq_idx = 0, last_sq_idx = 0;
@@ -195,10 +193,9 @@ union union_ep {
struct c4iw_ep ep;
};
-static int fill_res_ep_entry(struct sk_buff *msg,
- struct rdma_restrack_entry *res)
+int c4iw_fill_res_cm_id_entry(struct sk_buff *msg,
+ struct rdma_cm_id *cm_id)
{
- struct rdma_cm_id *cm_id = rdma_res_to_id(res);
struct nlattr *table_attr;
struct c4iw_ep_common *epcp;
struct c4iw_listen_ep *listen_ep = NULL;
@@ -372,10 +369,8 @@ err:
return -EMSGSIZE;
}
-static int fill_res_cq_entry(struct sk_buff *msg,
- struct rdma_restrack_entry *res)
+int c4iw_fill_res_cq_entry(struct sk_buff *msg, struct ib_cq *ibcq)
{
- struct ib_cq *ibcq = container_of(res, struct ib_cq, res);
struct c4iw_cq *chp = to_c4iw_cq(ibcq);
struct nlattr *table_attr;
struct t4_cqe hwcqes[2];
@@ -433,10 +428,8 @@ err:
return -EMSGSIZE;
}
-static int fill_res_mr_entry(struct sk_buff *msg,
- struct rdma_restrack_entry *res)
+int c4iw_fill_res_mr_entry(struct sk_buff *msg, struct ib_mr *ibmr)
{
- struct ib_mr *ibmr = container_of(res, struct ib_mr, res);
struct c4iw_mr *mhp = to_c4iw_mr(ibmr);
struct c4iw_dev *dev = mhp->rhp;
u32 stag = mhp->attr.stag;
@@ -492,10 +485,3 @@ err_cancel_table:
err:
return -EMSGSIZE;
}
-
-c4iw_restrack_func *c4iw_restrack_funcs[RDMA_RESTRACK_MAX] = {
- [RDMA_RESTRACK_QP] = fill_res_qp_entry,
- [RDMA_RESTRACK_CM_ID] = fill_res_ep_entry,
- [RDMA_RESTRACK_CQ] = fill_res_cq_entry,
- [RDMA_RESTRACK_MR] = fill_res_mr_entry,
-};
diff --git a/drivers/infiniband/hw/efa/efa_admin_cmds_defs.h b/drivers/infiniband/hw/efa/efa_admin_cmds_defs.h
index bef2bd291054..5484b08bbc5d 100644
--- a/drivers/infiniband/hw/efa/efa_admin_cmds_defs.h
+++ b/drivers/infiniband/hw/efa/efa_admin_cmds_defs.h
@@ -606,8 +606,8 @@ struct efa_admin_feature_queue_attr_desc {
/* Number of sub-CQs to be created for each CQ */
u16 sub_cqs_per_cq;
- /* MBZ */
- u16 reserved;
+ /* Minimum number of WQEs per SQ */
+ u16 min_sq_depth;
/* Maximum number of SGEs (buffers) allowed for a single send WQE */
u16 max_wr_send_sges;
@@ -632,6 +632,17 @@ struct efa_admin_feature_queue_attr_desc {
/* Maximum number of SGEs for a single RDMA read WQE */
u16 max_wr_rdma_sges;
+
+ /*
+ * Maximum number of bytes that can be written to SQ between two
+ * consecutive doorbells (in units of 64B). Driver must ensure that only
+ * complete WQEs are written to queue before issuing a doorbell.
+ * Examples: max_tx_batch=16 and WQE size = 64B, means up to 16 WQEs can
+ * be written to SQ between two consecutive doorbells. max_tx_batch=11
+ * and WQE size = 128B, means up to 5 WQEs can be written to SQ between
+ * two consecutive doorbells. Zero means unlimited.
+ */
+ u16 max_tx_batch;
};
struct efa_admin_feature_aenq_desc {
diff --git a/drivers/infiniband/hw/efa/efa_com_cmd.c b/drivers/infiniband/hw/efa/efa_com_cmd.c
index fabd8df2e78f..6ac23627f65a 100644
--- a/drivers/infiniband/hw/efa/efa_com_cmd.c
+++ b/drivers/infiniband/hw/efa/efa_com_cmd.c
@@ -480,6 +480,8 @@ int efa_com_get_device_attr(struct efa_com_dev *edev,
result->max_llq_size = resp.u.queue_attr.max_llq_size;
result->sub_cqs_per_cq = resp.u.queue_attr.sub_cqs_per_cq;
result->max_wr_rdma_sge = resp.u.queue_attr.max_wr_rdma_sges;
+ result->max_tx_batch = resp.u.queue_attr.max_tx_batch;
+ result->min_sq_depth = resp.u.queue_attr.min_sq_depth;
err = efa_com_get_feature(edev, &resp, EFA_ADMIN_NETWORK_ATTR);
if (err) {
diff --git a/drivers/infiniband/hw/efa/efa_com_cmd.h b/drivers/infiniband/hw/efa/efa_com_cmd.h
index 41ce4a476ee6..190bac23f585 100644
--- a/drivers/infiniband/hw/efa/efa_com_cmd.h
+++ b/drivers/infiniband/hw/efa/efa_com_cmd.h
@@ -127,6 +127,8 @@ struct efa_com_get_device_attr_result {
u16 max_sq_sge;
u16 max_rq_sge;
u16 max_wr_rdma_sge;
+ u16 max_tx_batch;
+ u16 min_sq_depth;
u8 db_bar;
};
diff --git a/drivers/infiniband/hw/efa/efa_main.c b/drivers/infiniband/hw/efa/efa_main.c
index 82145574c928..92d701146320 100644
--- a/drivers/infiniband/hw/efa/efa_main.c
+++ b/drivers/infiniband/hw/efa/efa_main.c
@@ -12,10 +12,12 @@
#include "efa.h"
-#define PCI_DEV_ID_EFA_VF 0xefa0
+#define PCI_DEV_ID_EFA0_VF 0xefa0
+#define PCI_DEV_ID_EFA1_VF 0xefa1
static const struct pci_device_id efa_pci_tbl[] = {
- { PCI_VDEVICE(AMAZON, PCI_DEV_ID_EFA_VF) },
+ { PCI_VDEVICE(AMAZON, PCI_DEV_ID_EFA0_VF) },
+ { PCI_VDEVICE(AMAZON, PCI_DEV_ID_EFA1_VF) },
{ }
};
diff --git a/drivers/infiniband/hw/efa/efa_verbs.c b/drivers/infiniband/hw/efa/efa_verbs.c
index 7dd082441333..9e201f169289 100644
--- a/drivers/infiniband/hw/efa/efa_verbs.c
+++ b/drivers/infiniband/hw/efa/efa_verbs.c
@@ -1502,11 +1502,39 @@ static int efa_dealloc_uar(struct efa_dev *dev, u16 uarn)
return efa_com_dealloc_uar(&dev->edev, &params);
}
+#define EFA_CHECK_USER_COMP(_dev, _comp_mask, _attr, _mask, _attr_str) \
+ (_attr_str = (!(_dev)->dev_attr._attr || ((_comp_mask) & (_mask))) ? \
+ NULL : #_attr)
+
+static int efa_user_comp_handshake(const struct ib_ucontext *ibucontext,
+ const struct efa_ibv_alloc_ucontext_cmd *cmd)
+{
+ struct efa_dev *dev = to_edev(ibucontext->device);
+ char *attr_str;
+
+ if (EFA_CHECK_USER_COMP(dev, cmd->comp_mask, max_tx_batch,
+ EFA_ALLOC_UCONTEXT_CMD_COMP_TX_BATCH, attr_str))
+ goto err;
+
+ if (EFA_CHECK_USER_COMP(dev, cmd->comp_mask, min_sq_depth,
+ EFA_ALLOC_UCONTEXT_CMD_COMP_MIN_SQ_WR,
+ attr_str))
+ goto err;
+
+ return 0;
+
+err:
+ ibdev_dbg(&dev->ibdev, "Userspace handshake failed for %s attribute\n",
+ attr_str);
+ return -EOPNOTSUPP;
+}
+
int efa_alloc_ucontext(struct ib_ucontext *ibucontext, struct ib_udata *udata)
{
struct efa_ucontext *ucontext = to_eucontext(ibucontext);
struct efa_dev *dev = to_edev(ibucontext->device);
struct efa_ibv_alloc_ucontext_resp resp = {};
+ struct efa_ibv_alloc_ucontext_cmd cmd = {};
struct efa_com_alloc_uar_result result;
int err;
@@ -1515,6 +1543,18 @@ int efa_alloc_ucontext(struct ib_ucontext *ibucontext, struct ib_udata *udata)
* we will ack input fields in our response.
*/
+ err = ib_copy_from_udata(&cmd, udata,
+ min(sizeof(cmd), udata->inlen));
+ if (err) {
+ ibdev_dbg(&dev->ibdev,
+ "Cannot copy udata for alloc_ucontext\n");
+ goto err_out;
+ }
+
+ err = efa_user_comp_handshake(ibucontext, &cmd);
+ if (err)
+ goto err_out;
+
err = efa_com_alloc_uar(&dev->edev, &result);
if (err)
goto err_out;
@@ -1526,6 +1566,8 @@ int efa_alloc_ucontext(struct ib_ucontext *ibucontext, struct ib_udata *udata)
resp.sub_cqs_per_cq = dev->dev_attr.sub_cqs_per_cq;
resp.inline_buf_size = dev->dev_attr.inline_buf_size;
resp.max_llq_size = dev->dev_attr.max_llq_size;
+ resp.max_tx_batch = dev->dev_attr.max_tx_batch;
+ resp.min_sq_wr = dev->dev_attr.min_sq_depth;
if (udata && udata->outlen) {
err = ib_copy_to_udata(udata, &resp,
diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c
index 15f9c635f292..7eaf99538216 100644
--- a/drivers/infiniband/hw/hfi1/chip.c
+++ b/drivers/infiniband/hw/hfi1/chip.c
@@ -7317,11 +7317,11 @@ static u16 link_width_to_bits(struct hfi1_devdata *dd, u16 width)
case 1: return OPA_LINK_WIDTH_1X;
case 2: return OPA_LINK_WIDTH_2X;
case 3: return OPA_LINK_WIDTH_3X;
+ case 4: return OPA_LINK_WIDTH_4X;
default:
dd_dev_info(dd, "%s: invalid width %d, using 4\n",
__func__, width);
- /* fall through */
- case 4: return OPA_LINK_WIDTH_4X;
+ return OPA_LINK_WIDTH_4X;
}
}
@@ -7376,12 +7376,13 @@ static void get_link_widths(struct hfi1_devdata *dd, u16 *tx_width,
case 0:
dd->pport[0].link_speed_active = OPA_LINK_SPEED_12_5G;
break;
+ case 1:
+ dd->pport[0].link_speed_active = OPA_LINK_SPEED_25G;
+ break;
default:
dd_dev_err(dd,
"%s: unexpected max rate %d, using 25Gb\n",
__func__, (int)max_rate);
- /* fall through */
- case 1:
dd->pport[0].link_speed_active = OPA_LINK_SPEED_25G;
break;
}
@@ -12878,11 +12879,6 @@ bail:
static u32 chip_to_opa_lstate(struct hfi1_devdata *dd, u32 chip_lstate)
{
switch (chip_lstate) {
- default:
- dd_dev_err(dd,
- "Unknown logical state 0x%x, reporting IB_PORT_DOWN\n",
- chip_lstate);
- /* fall through */
case LSTATE_DOWN:
return IB_PORT_DOWN;
case LSTATE_INIT:
@@ -12891,6 +12887,11 @@ static u32 chip_to_opa_lstate(struct hfi1_devdata *dd, u32 chip_lstate)
return IB_PORT_ARMED;
case LSTATE_ACTIVE:
return IB_PORT_ACTIVE;
+ default:
+ dd_dev_err(dd,
+ "Unknown logical state 0x%x, reporting IB_PORT_DOWN\n",
+ chip_lstate);
+ return IB_PORT_DOWN;
}
}
@@ -12898,10 +12899,6 @@ u32 chip_to_opa_pstate(struct hfi1_devdata *dd, u32 chip_pstate)
{
/* look at the HFI meta-states only */
switch (chip_pstate & 0xf0) {
- default:
- dd_dev_err(dd, "Unexpected chip physical state of 0x%x\n",
- chip_pstate);
- /* fall through */
case PLS_DISABLED:
return IB_PORTPHYSSTATE_DISABLED;
case PLS_OFFLINE:
@@ -12914,6 +12911,10 @@ u32 chip_to_opa_pstate(struct hfi1_devdata *dd, u32 chip_pstate)
return IB_PORTPHYSSTATE_LINKUP;
case PLS_PHYTEST:
return IB_PORTPHYSSTATE_PHY_TEST;
+ default:
+ dd_dev_err(dd, "Unexpected chip physical state of 0x%x\n",
+ chip_pstate);
+ return IB_PORTPHYSSTATE_DISABLED;
}
}
diff --git a/drivers/infiniband/hw/hfi1/firmware.c b/drivers/infiniband/hw/hfi1/firmware.c
index 2b57ba70ddd6..0e83d4b61e46 100644
--- a/drivers/infiniband/hw/hfi1/firmware.c
+++ b/drivers/infiniband/hw/hfi1/firmware.c
@@ -1868,11 +1868,8 @@ int parse_platform_config(struct hfi1_devdata *dd)
2;
break;
case PLATFORM_CONFIG_RX_PRESET_TABLE:
- /* fall through */
case PLATFORM_CONFIG_TX_PRESET_TABLE:
- /* fall through */
case PLATFORM_CONFIG_QSFP_ATTEN_TABLE:
- /* fall through */
case PLATFORM_CONFIG_VARIABLE_SETTINGS_TABLE:
pcfgcache->config_tables[table_type].num_table =
table_length_dwords;
@@ -1890,15 +1887,10 @@ int parse_platform_config(struct hfi1_devdata *dd)
/* metadata table */
switch (table_type) {
case PLATFORM_CONFIG_SYSTEM_TABLE:
- /* fall through */
case PLATFORM_CONFIG_PORT_TABLE:
- /* fall through */
case PLATFORM_CONFIG_RX_PRESET_TABLE:
- /* fall through */
case PLATFORM_CONFIG_TX_PRESET_TABLE:
- /* fall through */
case PLATFORM_CONFIG_QSFP_ATTEN_TABLE:
- /* fall through */
case PLATFORM_CONFIG_VARIABLE_SETTINGS_TABLE:
break;
default:
@@ -2027,15 +2019,10 @@ static int get_platform_fw_field_metadata(struct hfi1_devdata *dd, int table,
switch (table) {
case PLATFORM_CONFIG_SYSTEM_TABLE:
- /* fall through */
case PLATFORM_CONFIG_PORT_TABLE:
- /* fall through */
case PLATFORM_CONFIG_RX_PRESET_TABLE:
- /* fall through */
case PLATFORM_CONFIG_TX_PRESET_TABLE:
- /* fall through */
case PLATFORM_CONFIG_QSFP_ATTEN_TABLE:
- /* fall through */
case PLATFORM_CONFIG_VARIABLE_SETTINGS_TABLE:
if (field && field < platform_config_table_limits[table])
src_ptr =
@@ -2138,11 +2125,8 @@ int get_platform_config_field(struct hfi1_devdata *dd,
pcfgcache->config_tables[table_type].table;
break;
case PLATFORM_CONFIG_RX_PRESET_TABLE:
- /* fall through */
case PLATFORM_CONFIG_TX_PRESET_TABLE:
- /* fall through */
case PLATFORM_CONFIG_QSFP_ATTEN_TABLE:
- /* fall through */
case PLATFORM_CONFIG_VARIABLE_SETTINGS_TABLE:
src_ptr = pcfgcache->config_tables[table_type].table;
diff --git a/drivers/infiniband/hw/hfi1/mad.c b/drivers/infiniband/hw/hfi1/mad.c
index 7073f237a949..3222e3acb79c 100644
--- a/drivers/infiniband/hw/hfi1/mad.c
+++ b/drivers/infiniband/hw/hfi1/mad.c
@@ -721,7 +721,7 @@ static int check_mkey(struct hfi1_ibport *ibp, struct ib_mad_hdr *mad,
/* Bad mkey not a violation below level 2 */
if (ibp->rvp.mkeyprot < 2)
break;
- /* fall through */
+ fallthrough;
case IB_MGMT_METHOD_SET:
case IB_MGMT_METHOD_TRAP_REPRESS:
if (ibp->rvp.mkey_violations != 0xFFFF)
@@ -1272,7 +1272,7 @@ static int set_port_states(struct hfi1_pportdata *ppd, struct opa_smp *smp,
case IB_PORT_NOP:
if (phys_state == IB_PORTPHYSSTATE_NOP)
break;
- /* FALLTHROUGH */
+ fallthrough;
case IB_PORT_DOWN:
if (phys_state == IB_PORTPHYSSTATE_NOP) {
link_state = HLS_DN_DOWNDEF;
@@ -2300,7 +2300,6 @@ static int __subn_set_opa_vl_arb(struct opa_smp *smp, u32 am, u8 *data,
* can be changed from the default values
*/
case OPA_VLARB_PREEMPT_ELEMENTS:
- /* FALLTHROUGH */
case OPA_VLARB_PREEMPT_MATRIX:
smp->status |= IB_SMP_UNSUP_METH_ATTR;
break;
@@ -4170,7 +4169,7 @@ static int subn_get_opa_sma(__be16 attr_id, struct opa_smp *smp, u32 am,
return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED;
if (ibp->rvp.port_cap_flags & IB_PORT_SM)
return IB_MAD_RESULT_SUCCESS;
- /* FALLTHROUGH */
+ fallthrough;
default:
smp->status |= IB_SMP_UNSUP_METH_ATTR;
ret = reply((struct ib_mad_hdr *)smp);
@@ -4240,7 +4239,7 @@ static int subn_set_opa_sma(__be16 attr_id, struct opa_smp *smp, u32 am,
return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED;
if (ibp->rvp.port_cap_flags & IB_PORT_SM)
return IB_MAD_RESULT_SUCCESS;
- /* FALLTHROUGH */
+ fallthrough;
default:
smp->status |= IB_SMP_UNSUP_METH_ATTR;
ret = reply((struct ib_mad_hdr *)smp);
diff --git a/drivers/infiniband/hw/hfi1/pcie.c b/drivers/infiniband/hw/hfi1/pcie.c
index 1a6268d61977..18d32f053d26 100644
--- a/drivers/infiniband/hw/hfi1/pcie.c
+++ b/drivers/infiniband/hw/hfi1/pcie.c
@@ -306,7 +306,7 @@ int pcie_speeds(struct hfi1_devdata *dd)
ret = pcie_capability_read_dword(dd->pcidev, PCI_EXP_LNKCAP, &linkcap);
if (ret) {
dd_dev_err(dd, "Unable to read from PCI config\n");
- return ret;
+ return pcibios_err_to_errno(ret);
}
if ((linkcap & PCI_EXP_LNKCAP_SLS) != PCI_EXP_LNKCAP_SLS_8_0GB) {
@@ -334,10 +334,14 @@ int pcie_speeds(struct hfi1_devdata *dd)
return 0;
}
-/* restore command and BARs after a reset has wiped them out */
+/**
+ * Restore command and BARs after a reset has wiped them out
+ *
+ * Returns 0 on success, otherwise a negative error value
+ */
int restore_pci_variables(struct hfi1_devdata *dd)
{
- int ret = 0;
+ int ret;
ret = pci_write_config_word(dd->pcidev, PCI_COMMAND, dd->pci_command);
if (ret)
@@ -386,13 +390,17 @@ int restore_pci_variables(struct hfi1_devdata *dd)
error:
dd_dev_err(dd, "Unable to write to PCI config\n");
- return ret;
+ return pcibios_err_to_errno(ret);
}
-/* Save BARs and command to rewrite after device reset */
+/**
+ * Save BARs and command to rewrite after device reset
+ *
+ * Returns 0 on success, otherwise a negative error value
+ */
int save_pci_variables(struct hfi1_devdata *dd)
{
- int ret = 0;
+ int ret;
ret = pci_read_config_dword(dd->pcidev, PCI_BASE_ADDRESS_0,
&dd->pcibar0);
@@ -441,7 +449,7 @@ int save_pci_variables(struct hfi1_devdata *dd)
error:
dd_dev_err(dd, "Unable to read from PCI config\n");
- return ret;
+ return pcibios_err_to_errno(ret);
}
/*
diff --git a/drivers/infiniband/hw/hfi1/pio.c b/drivers/infiniband/hw/hfi1/pio.c
index 79126b2b14ab..ff864f6f0266 100644
--- a/drivers/infiniband/hw/hfi1/pio.c
+++ b/drivers/infiniband/hw/hfi1/pio.c
@@ -86,7 +86,7 @@ void pio_send_control(struct hfi1_devdata *dd, int op)
switch (op) {
case PSC_GLOBAL_ENABLE:
reg |= SEND_CTRL_SEND_ENABLE_SMASK;
- /* Fall through */
+ fallthrough;
case PSC_DATA_VL_ENABLE:
mask = 0;
for (i = 0; i < ARRAY_SIZE(dd->vld); i++)
diff --git a/drivers/infiniband/hw/hfi1/pio_copy.c b/drivers/infiniband/hw/hfi1/pio_copy.c
index 03024cec78dd..b12e4665c9ab 100644
--- a/drivers/infiniband/hw/hfi1/pio_copy.c
+++ b/drivers/infiniband/hw/hfi1/pio_copy.c
@@ -191,22 +191,22 @@ static inline void jcopy(u8 *dest, const u8 *src, u32 n)
switch (n) {
case 7:
*dest++ = *src++;
- /* fall through */
+ fallthrough;
case 6:
*dest++ = *src++;
- /* fall through */
+ fallthrough;
case 5:
*dest++ = *src++;
- /* fall through */
+ fallthrough;
case 4:
*dest++ = *src++;
- /* fall through */
+ fallthrough;
case 3:
*dest++ = *src++;
- /* fall through */
+ fallthrough;
case 2:
*dest++ = *src++;
- /* fall through */
+ fallthrough;
case 1:
*dest++ = *src++;
/* fall through */
diff --git a/drivers/infiniband/hw/hfi1/platform.c b/drivers/infiniband/hw/hfi1/platform.c
index 36593f2efe26..4642d6ceb890 100644
--- a/drivers/infiniband/hw/hfi1/platform.c
+++ b/drivers/infiniband/hw/hfi1/platform.c
@@ -668,8 +668,8 @@ static u8 aoc_low_power_setting(struct hfi1_pportdata *ppd)
/* active optical cables only */
switch ((cache[QSFP_MOD_TECH_OFFS] & 0xF0) >> 4) {
- case 0x0 ... 0x9: /* fallthrough */
- case 0xC: /* fallthrough */
+ case 0x0 ... 0x9: fallthrough;
+ case 0xC: fallthrough;
case 0xE:
/* active AOC */
power_class = get_qsfp_power_class(cache[QSFP_MOD_PWR_OFFS]);
@@ -899,8 +899,8 @@ static int tune_qsfp(struct hfi1_pportdata *ppd,
*ptr_tuning_method = OPA_PASSIVE_TUNING;
break;
- case 0x0 ... 0x9: /* fallthrough */
- case 0xC: /* fallthrough */
+ case 0x0 ... 0x9: fallthrough;
+ case 0xC: fallthrough;
case 0xE:
ret = tune_active_qsfp(ppd, ptr_tx_preset, ptr_rx_preset,
ptr_total_atten);
@@ -909,7 +909,7 @@ static int tune_qsfp(struct hfi1_pportdata *ppd,
*ptr_tuning_method = OPA_ACTIVE_TUNING;
break;
- case 0xD: /* fallthrough */
+ case 0xD: fallthrough;
case 0xF:
default:
dd_dev_warn(ppd->dd, "%s: Unknown/unsupported cable\n",
diff --git a/drivers/infiniband/hw/hfi1/qp.c b/drivers/infiniband/hw/hfi1/qp.c
index be62284e42d9..356518e17fa6 100644
--- a/drivers/infiniband/hw/hfi1/qp.c
+++ b/drivers/infiniband/hw/hfi1/qp.c
@@ -312,7 +312,7 @@ int hfi1_setup_wqe(struct rvt_qp *qp, struct rvt_swqe *wqe, bool *call_send)
switch (qp->ibqp.qp_type) {
case IB_QPT_RC:
hfi1_setup_tid_rdma_wqe(qp, wqe);
- /* fall through */
+ fallthrough;
case IB_QPT_UC:
if (wqe->length > 0x80000000U)
return -EINVAL;
diff --git a/drivers/infiniband/hw/hfi1/qp.h b/drivers/infiniband/hw/hfi1/qp.h
index b670321365d3..b0d053d12129 100644
--- a/drivers/infiniband/hw/hfi1/qp.h
+++ b/drivers/infiniband/hw/hfi1/qp.h
@@ -113,20 +113,6 @@ static inline void clear_ahg(struct rvt_qp *qp)
}
/**
- * hfi1_create_qp - create a queue pair for a device
- * @ibpd: the protection domain who's device we create the queue pair for
- * @init_attr: the attributes of the queue pair
- * @udata: user data for libibverbs.so
- *
- * Returns the queue pair on success, otherwise returns an errno.
- *
- * Called by the ib_create_qp() core verbs function.
- */
-struct ib_qp *hfi1_create_qp(struct ib_pd *ibpd,
- struct ib_qp_init_attr *init_attr,
- struct ib_udata *udata);
-
-/**
* hfi1_qp_wakeup - wake up on the indicated event
* @qp: the QP
* @flag: flag the qp on which the qp is stalled
diff --git a/drivers/infiniband/hw/hfi1/qsfp.c b/drivers/infiniband/hw/hfi1/qsfp.c
index b5966991d647..8386c84c2d92 100644
--- a/drivers/infiniband/hw/hfi1/qsfp.c
+++ b/drivers/infiniband/hw/hfi1/qsfp.c
@@ -231,7 +231,7 @@ static int i2c_bus_write(struct hfi1_devdata *dd, struct hfi1_i2c_bus *i2c,
break;
case 2:
offset_bytes[1] = (offset >> 8) & 0xff;
- /* fall through */
+ fallthrough;
case 1:
num_msgs = 2;
offset_bytes[0] = offset & 0xff;
@@ -279,7 +279,7 @@ static int i2c_bus_read(struct hfi1_devdata *dd, struct hfi1_i2c_bus *bus,
break;
case 2:
offset_bytes[1] = (offset >> 8) & 0xff;
- /* fall through */
+ fallthrough;
case 1:
num_msgs = 2;
offset_bytes[0] = offset & 0xff;
diff --git a/drivers/infiniband/hw/hfi1/rc.c b/drivers/infiniband/hw/hfi1/rc.c
index f1734e5e9ac4..1bb5f57152d3 100644
--- a/drivers/infiniband/hw/hfi1/rc.c
+++ b/drivers/infiniband/hw/hfi1/rc.c
@@ -141,7 +141,7 @@ static int make_rc_ack(struct hfi1_ibdev *dev, struct rvt_qp *qp,
case OP(RDMA_READ_RESPONSE_ONLY):
e = &qp->s_ack_queue[qp->s_tail_ack_queue];
release_rdma_sge_mr(e);
- /* FALLTHROUGH */
+ fallthrough;
case OP(ATOMIC_ACKNOWLEDGE):
/*
* We can increment the tail pointer now that the last
@@ -160,7 +160,7 @@ static int make_rc_ack(struct hfi1_ibdev *dev, struct rvt_qp *qp,
qp->s_acked_ack_queue = next;
qp->s_tail_ack_queue = next;
trace_hfi1_rsp_make_rc_ack(qp, e->psn);
- /* FALLTHROUGH */
+ fallthrough;
case OP(SEND_ONLY):
case OP(ACKNOWLEDGE):
/* Check for no next entry in the queue. */
@@ -267,7 +267,7 @@ static int make_rc_ack(struct hfi1_ibdev *dev, struct rvt_qp *qp,
case OP(RDMA_READ_RESPONSE_FIRST):
qp->s_ack_state = OP(RDMA_READ_RESPONSE_MIDDLE);
- /* FALLTHROUGH */
+ fallthrough;
case OP(RDMA_READ_RESPONSE_MIDDLE):
ps->s_txreq->ss = &qp->s_ack_rdma_sge;
ps->s_txreq->mr = qp->s_ack_rdma_sge.sge.mr;
@@ -881,8 +881,7 @@ no_flow_control:
goto bail;
}
qp->s_num_rd_atomic++;
-
- /* FALLTHROUGH */
+ fallthrough;
case IB_WR_OPFN:
if (newreq && !(qp->s_flags & RVT_S_UNLIMITED_CREDIT))
qp->s_lsn++;
@@ -946,10 +945,10 @@ no_flow_control:
* See restart_rc().
*/
qp->s_len = restart_sge(&qp->s_sge, wqe, qp->s_psn, pmtu);
- /* FALLTHROUGH */
+ fallthrough;
case OP(SEND_FIRST):
qp->s_state = OP(SEND_MIDDLE);
- /* FALLTHROUGH */
+ fallthrough;
case OP(SEND_MIDDLE):
bth2 = mask_psn(qp->s_psn++);
ss = &qp->s_sge;
@@ -991,10 +990,10 @@ no_flow_control:
* See restart_rc().
*/
qp->s_len = restart_sge(&qp->s_sge, wqe, qp->s_psn, pmtu);
- /* FALLTHROUGH */
+ fallthrough;
case OP(RDMA_WRITE_FIRST):
qp->s_state = OP(RDMA_WRITE_MIDDLE);
- /* FALLTHROUGH */
+ fallthrough;
case OP(RDMA_WRITE_MIDDLE):
bth2 = mask_psn(qp->s_psn++);
ss = &qp->s_sge;
@@ -2901,7 +2900,7 @@ void hfi1_rc_rcv(struct hfi1_packet *packet)
if (!ret)
goto rnr_nak;
qp->r_rcv_len = 0;
- /* FALLTHROUGH */
+ fallthrough;
case OP(SEND_MIDDLE):
case OP(RDMA_WRITE_MIDDLE):
send_middle:
@@ -2941,7 +2940,7 @@ send_middle:
goto no_immediate_data;
if (opcode == OP(SEND_ONLY_WITH_INVALIDATE))
goto send_last_inv;
- /* FALLTHROUGH -- for SEND_ONLY_WITH_IMMEDIATE */
+ fallthrough; /* for SEND_ONLY_WITH_IMMEDIATE */
case OP(SEND_LAST_WITH_IMMEDIATE):
send_last_imm:
wc.ex.imm_data = ohdr->u.imm_data;
@@ -2957,7 +2956,7 @@ send_last_inv:
goto send_last;
case OP(RDMA_WRITE_LAST):
copy_last = rvt_is_user_qp(qp);
- /* fall through */
+ fallthrough;
case OP(SEND_LAST):
no_immediate_data:
wc.wc_flags = 0;
@@ -3010,7 +3009,7 @@ send_last:
case OP(RDMA_WRITE_ONLY):
copy_last = rvt_is_user_qp(qp);
- /* fall through */
+ fallthrough;
case OP(RDMA_WRITE_FIRST):
case OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE):
if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_WRITE)))
diff --git a/drivers/infiniband/hw/hfi1/sdma.c b/drivers/infiniband/hw/hfi1/sdma.c
index c93ea021cf49..04575c9afd61 100644
--- a/drivers/infiniband/hw/hfi1/sdma.c
+++ b/drivers/infiniband/hw/hfi1/sdma.c
@@ -2584,7 +2584,7 @@ static void __sdma_process_event(struct sdma_engine *sde,
* 7220, e.g.
*/
ss->go_s99_running = 1;
- /* fall through -- and start dma engine */
+ fallthrough; /* and start dma engine */
case sdma_event_e10_go_hw_start:
/* This reference means the state machine is started */
sdma_get(&sde->state);
@@ -2726,7 +2726,6 @@ static void __sdma_process_event(struct sdma_engine *sde,
case sdma_event_e70_go_idle:
break;
case sdma_event_e85_link_down:
- /* fall through */
case sdma_event_e80_hw_freeze:
sdma_set_state(sde, sdma_state_s80_hw_freeze);
atomic_dec(&sde->dd->sdma_unfreeze_count);
@@ -3007,7 +3006,7 @@ static void __sdma_process_event(struct sdma_engine *sde,
case sdma_event_e60_hw_halted:
need_progress = 1;
sdma_err_progress_check_schedule(sde);
- /* fall through */
+ fallthrough;
case sdma_event_e90_sw_halted:
/*
* SW initiated halt does not perform engines
@@ -3021,7 +3020,7 @@ static void __sdma_process_event(struct sdma_engine *sde,
break;
case sdma_event_e85_link_down:
ss->go_s99_running = 0;
- /* fall through */
+ fallthrough;
case sdma_event_e80_hw_freeze:
sdma_set_state(sde, sdma_state_s80_hw_freeze);
atomic_dec(&sde->dd->sdma_unfreeze_count);
@@ -3252,7 +3251,7 @@ void _sdma_txreq_ahgadd(
tx->num_desc++;
tx->descs[2].qw[0] = 0;
tx->descs[2].qw[1] = 0;
- /* FALLTHROUGH */
+ fallthrough;
case SDMA_AHG_APPLY_UPDATE2:
tx->num_desc++;
tx->descs[1].qw[0] = 0;
diff --git a/drivers/infiniband/hw/hfi1/tid_rdma.c b/drivers/infiniband/hw/hfi1/tid_rdma.c
index facff133139a..9af82ff933d7 100644
--- a/drivers/infiniband/hw/hfi1/tid_rdma.c
+++ b/drivers/infiniband/hw/hfi1/tid_rdma.c
@@ -3227,7 +3227,7 @@ bool hfi1_tid_rdma_wqe_interlock(struct rvt_qp *qp, struct rvt_swqe *wqe)
case IB_WR_RDMA_READ:
if (prev->wr.opcode != IB_WR_TID_RDMA_WRITE)
break;
- /* fall through */
+ fallthrough;
case IB_WR_TID_RDMA_READ:
switch (prev->wr.opcode) {
case IB_WR_RDMA_READ:
@@ -5067,7 +5067,7 @@ int hfi1_make_tid_rdma_pkt(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
if (priv->s_state == TID_OP(WRITE_REQ))
hfi1_tid_rdma_restart_req(qp, wqe, &bth2);
priv->s_state = TID_OP(WRITE_DATA);
- /* fall through */
+ fallthrough;
case TID_OP(WRITE_DATA):
/*
diff --git a/drivers/infiniband/hw/hfi1/uc.c b/drivers/infiniband/hw/hfi1/uc.c
index 0c77f18120ed..1fb918399da0 100644
--- a/drivers/infiniband/hw/hfi1/uc.c
+++ b/drivers/infiniband/hw/hfi1/uc.c
@@ -216,7 +216,7 @@ int hfi1_make_uc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
case OP(SEND_FIRST):
qp->s_state = OP(SEND_MIDDLE);
- /* FALLTHROUGH */
+ fallthrough;
case OP(SEND_MIDDLE):
len = qp->s_len;
if (len > pmtu) {
@@ -241,7 +241,7 @@ int hfi1_make_uc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
case OP(RDMA_WRITE_FIRST):
qp->s_state = OP(RDMA_WRITE_MIDDLE);
- /* FALLTHROUGH */
+ fallthrough;
case OP(RDMA_WRITE_MIDDLE):
len = qp->s_len;
if (len > pmtu) {
@@ -414,7 +414,7 @@ send_first:
goto no_immediate_data;
else if (opcode == OP(SEND_ONLY_WITH_IMMEDIATE))
goto send_last_imm;
- /* FALLTHROUGH */
+ fallthrough;
case OP(SEND_MIDDLE):
/* Check for invalid length PMTU or posted rwqe len. */
/*
@@ -515,7 +515,7 @@ rdma_first:
wc.ex.imm_data = ohdr->u.rc.imm_data;
goto rdma_last_imm;
}
- /* FALLTHROUGH */
+ fallthrough;
case OP(RDMA_WRITE_MIDDLE):
/* Check for invalid length PMTU or posted rwqe len. */
if (unlikely(tlen != (hdrsize + pmtu + 4)))
diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h
index 479fa557993e..da9888deff8c 100644
--- a/drivers/infiniband/hw/hns/hns_roce_device.h
+++ b/drivers/infiniband/hw/hns/hns_roce_device.h
@@ -37,9 +37,8 @@
#define DRV_NAME "hns_roce"
-/* hip08 is a pci device, it includes two version according pci version id */
-#define PCI_REVISION_ID_HIP08_A 0x20
-#define PCI_REVISION_ID_HIP08_B 0x21
+/* hip08 is a pci device */
+#define PCI_REVISION_ID_HIP08 0x21
#define HNS_ROCE_HW_VER1 ('h' << 24 | 'i' << 16 | '0' << 8 | '6')
@@ -348,20 +347,22 @@ struct hns_roce_buf_attr {
bool mtt_only; /* only alloc buffer-required MTT memory */
};
+struct hns_roce_hem_cfg {
+ dma_addr_t root_ba; /* root BA table's address */
+ bool is_direct; /* addressing without BA table */
+ unsigned int ba_pg_shift; /* BA table page shift */
+ unsigned int buf_pg_shift; /* buffer page shift */
+ unsigned int buf_pg_count; /* buffer page count */
+ struct hns_roce_buf_region region[HNS_ROCE_MAX_BT_REGION];
+ int region_count;
+};
+
/* memory translate region */
struct hns_roce_mtr {
struct hns_roce_hem_list hem_list; /* multi-hop addressing resource */
struct ib_umem *umem; /* user space buffer */
struct hns_roce_buf *kmem; /* kernel space buffer */
- struct {
- dma_addr_t root_ba; /* root BA table's address */
- bool is_direct; /* addressing without BA table */
- unsigned int ba_pg_shift; /* BA table page shift */
- unsigned int buf_pg_shift; /* buffer page shift */
- int buf_pg_count; /* buffer page count */
- struct hns_roce_buf_region region[HNS_ROCE_MAX_BT_REGION];
- unsigned int region_count;
- } hem_cfg; /* config for hardware addressing */
+ struct hns_roce_hem_cfg hem_cfg; /* config for hardware addressing */
};
struct hns_roce_mw {
@@ -1192,7 +1193,7 @@ int hns_roce_rereg_user_mr(struct ib_mr *mr, int flags, u64 start, u64 length,
u64 virt_addr, int mr_access_flags, struct ib_pd *pd,
struct ib_udata *udata);
struct ib_mr *hns_roce_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type,
- u32 max_num_sg, struct ib_udata *udata);
+ u32 max_num_sg);
int hns_roce_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents,
unsigned int *sg_offset);
int hns_roce_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata);
@@ -1267,6 +1268,6 @@ void hns_roce_handle_device_err(struct hns_roce_dev *hr_dev);
int hns_roce_init(struct hns_roce_dev *hr_dev);
void hns_roce_exit(struct hns_roce_dev *hr_dev);
-int hns_roce_fill_res_entry(struct sk_buff *msg,
- struct rdma_restrack_entry *res);
+int hns_roce_fill_res_cq_entry(struct sk_buff *msg,
+ struct ib_cq *ib_cq);
#endif /* _HNS_ROCE_DEVICE_H */
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c
index cf39f560b800..07b4c85d341d 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c
@@ -2483,7 +2483,6 @@ static int find_wqe_mtt(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp,
u64 *sq_ba, u64 *rq_ba, dma_addr_t *bt_ba)
{
struct ib_device *ibdev = &hr_dev->ib_dev;
- int rq_pa_start;
int count;
count = hns_roce_mtr_find(hr_dev, &hr_qp->mtr, 0, sq_ba, 1, bt_ba);
@@ -2491,9 +2490,9 @@ static int find_wqe_mtt(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp,
ibdev_err(ibdev, "Failed to find SQ ba\n");
return -ENOBUFS;
}
- rq_pa_start = hr_qp->rq.offset >> hr_qp->mtr.hem_cfg.buf_pg_shift;
- count = hns_roce_mtr_find(hr_dev, &hr_qp->mtr, rq_pa_start, rq_ba, 1,
- NULL);
+
+ count = hns_roce_mtr_find(hr_dev, &hr_qp->mtr, hr_qp->rq.offset, rq_ba,
+ 1, NULL);
if (!count) {
ibdev_err(ibdev, "Failed to find RQ ba\n");
return -ENOBUFS;
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
index 0618ced45bf8..d2968594664b 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
@@ -91,10 +91,11 @@ static u32 to_hr_opcode(u32 ib_opcode)
}
static void set_frmr_seg(struct hns_roce_v2_rc_send_wqe *rc_sq_wqe,
- void *wqe, const struct ib_reg_wr *wr)
+ const struct ib_reg_wr *wr)
{
+ struct hns_roce_wqe_frmr_seg *fseg =
+ (void *)rc_sq_wqe + sizeof(struct hns_roce_v2_rc_send_wqe);
struct hns_roce_mr *mr = to_hr_mr(wr->mr);
- struct hns_roce_wqe_frmr_seg *fseg = wqe;
u64 pbl_ba;
/* use ib_access_flags */
@@ -128,14 +129,16 @@ static void set_frmr_seg(struct hns_roce_v2_rc_send_wqe *rc_sq_wqe,
V2_RC_FRMR_WQE_BYTE_40_BLK_MODE_S, 0);
}
-static void set_atomic_seg(const struct ib_send_wr *wr, void *wqe,
+static void set_atomic_seg(const struct ib_send_wr *wr,
struct hns_roce_v2_rc_send_wqe *rc_sq_wqe,
unsigned int valid_num_sge)
{
- struct hns_roce_wqe_atomic_seg *aseg;
+ struct hns_roce_v2_wqe_data_seg *dseg =
+ (void *)rc_sq_wqe + sizeof(struct hns_roce_v2_rc_send_wqe);
+ struct hns_roce_wqe_atomic_seg *aseg =
+ (void *)dseg + sizeof(struct hns_roce_v2_wqe_data_seg);
- set_data_seg_v2(wqe, wr->sg_list);
- aseg = wqe + sizeof(struct hns_roce_v2_wqe_data_seg);
+ set_data_seg_v2(dseg, wr->sg_list);
if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP) {
aseg->fetchadd_swap_data = cpu_to_le64(atomic_wr(wr)->swap);
@@ -143,7 +146,7 @@ static void set_atomic_seg(const struct ib_send_wr *wr, void *wqe,
} else {
aseg->fetchadd_swap_data =
cpu_to_le64(atomic_wr(wr)->compare_add);
- aseg->cmp_data = 0;
+ aseg->cmp_data = 0;
}
roce_set_field(rc_sq_wqe->byte_16, V2_RC_SEND_WQE_BYTE_16_SGE_NUM_M,
@@ -176,13 +179,15 @@ static void set_extend_sge(struct hns_roce_qp *qp, const struct ib_send_wr *wr,
static int set_rwqe_data_seg(struct ib_qp *ibqp, const struct ib_send_wr *wr,
struct hns_roce_v2_rc_send_wqe *rc_sq_wqe,
- void *wqe, unsigned int *sge_ind,
+ unsigned int *sge_ind,
unsigned int valid_num_sge)
{
struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device);
- struct hns_roce_v2_wqe_data_seg *dseg = wqe;
+ struct hns_roce_v2_wqe_data_seg *dseg =
+ (void *)rc_sq_wqe + sizeof(struct hns_roce_v2_rc_send_wqe);
struct ib_device *ibdev = &hr_dev->ib_dev;
struct hns_roce_qp *qp = to_hr_qp(ibqp);
+ void *wqe = dseg;
int j = 0;
int i;
@@ -438,7 +443,6 @@ static inline int set_rc_wqe(struct hns_roce_qp *qp,
roce_set_bit(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_OWNER_S,
owner_bit);
- wqe += sizeof(struct hns_roce_v2_rc_send_wqe);
switch (wr->opcode) {
case IB_WR_RDMA_READ:
case IB_WR_RDMA_WRITE:
@@ -451,7 +455,7 @@ static inline int set_rc_wqe(struct hns_roce_qp *qp,
rc_sq_wqe->inv_key = cpu_to_le32(wr->ex.invalidate_rkey);
break;
case IB_WR_REG_MR:
- set_frmr_seg(rc_sq_wqe, wqe, reg_wr(wr));
+ set_frmr_seg(rc_sq_wqe, reg_wr(wr));
break;
case IB_WR_ATOMIC_CMP_AND_SWP:
case IB_WR_ATOMIC_FETCH_AND_ADD:
@@ -468,10 +472,10 @@ static inline int set_rc_wqe(struct hns_roce_qp *qp,
if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP ||
wr->opcode == IB_WR_ATOMIC_FETCH_AND_ADD)
- set_atomic_seg(wr, wqe, rc_sq_wqe, valid_num_sge);
+ set_atomic_seg(wr, rc_sq_wqe, valid_num_sge);
else if (wr->opcode != IB_WR_REG_MR)
ret = set_rwqe_data_seg(&qp->ibqp, wr, rc_sq_wqe,
- wqe, &curr_idx, valid_num_sge);
+ &curr_idx, valid_num_sge);
*sge_idx = curr_idx;
@@ -1510,8 +1514,6 @@ static int hns_roce_alloc_vf_resource(struct hns_roce_dev *hr_dev)
req_a = (struct hns_roce_vf_res_a *)desc[0].data;
req_b = (struct hns_roce_vf_res_b *)desc[1].data;
- memset(req_a, 0, sizeof(*req_a));
- memset(req_b, 0, sizeof(*req_b));
for (i = 0; i < 2; i++) {
hns_roce_cmq_setup_basic_desc(&desc[i],
HNS_ROCE_OPC_ALLOC_VF_RES, false);
@@ -1744,27 +1746,25 @@ static void set_default_caps(struct hns_roce_dev *hr_dev)
caps->max_srq_wrs = HNS_ROCE_V2_MAX_SRQ_WR;
caps->max_srq_sges = HNS_ROCE_V2_MAX_SRQ_SGE;
- if (hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP08_B) {
- caps->flags |= HNS_ROCE_CAP_FLAG_ATOMIC | HNS_ROCE_CAP_FLAG_MW |
- HNS_ROCE_CAP_FLAG_SRQ | HNS_ROCE_CAP_FLAG_FRMR |
- HNS_ROCE_CAP_FLAG_QP_FLOW_CTRL;
+ caps->flags |= HNS_ROCE_CAP_FLAG_ATOMIC | HNS_ROCE_CAP_FLAG_MW |
+ HNS_ROCE_CAP_FLAG_SRQ | HNS_ROCE_CAP_FLAG_FRMR |
+ HNS_ROCE_CAP_FLAG_QP_FLOW_CTRL;
- caps->num_qpc_timer = HNS_ROCE_V2_MAX_QPC_TIMER_NUM;
- caps->qpc_timer_entry_sz = HNS_ROCE_V2_QPC_TIMER_ENTRY_SZ;
- caps->qpc_timer_ba_pg_sz = 0;
- caps->qpc_timer_buf_pg_sz = 0;
- caps->qpc_timer_hop_num = HNS_ROCE_HOP_NUM_0;
- caps->num_cqc_timer = HNS_ROCE_V2_MAX_CQC_TIMER_NUM;
- caps->cqc_timer_entry_sz = HNS_ROCE_V2_CQC_TIMER_ENTRY_SZ;
- caps->cqc_timer_ba_pg_sz = 0;
- caps->cqc_timer_buf_pg_sz = 0;
- caps->cqc_timer_hop_num = HNS_ROCE_HOP_NUM_0;
+ caps->num_qpc_timer = HNS_ROCE_V2_MAX_QPC_TIMER_NUM;
+ caps->qpc_timer_entry_sz = HNS_ROCE_V2_QPC_TIMER_ENTRY_SZ;
+ caps->qpc_timer_ba_pg_sz = 0;
+ caps->qpc_timer_buf_pg_sz = 0;
+ caps->qpc_timer_hop_num = HNS_ROCE_HOP_NUM_0;
+ caps->num_cqc_timer = HNS_ROCE_V2_MAX_CQC_TIMER_NUM;
+ caps->cqc_timer_entry_sz = HNS_ROCE_V2_CQC_TIMER_ENTRY_SZ;
+ caps->cqc_timer_ba_pg_sz = 0;
+ caps->cqc_timer_buf_pg_sz = 0;
+ caps->cqc_timer_hop_num = HNS_ROCE_HOP_NUM_0;
- caps->sccc_entry_sz = HNS_ROCE_V2_SCCC_ENTRY_SZ;
- caps->sccc_ba_pg_sz = 0;
- caps->sccc_buf_pg_sz = 0;
- caps->sccc_hop_num = HNS_ROCE_SCCC_HOP_NUM;
- }
+ caps->sccc_entry_sz = HNS_ROCE_V2_SCCC_ENTRY_SZ;
+ caps->sccc_ba_pg_sz = 0;
+ caps->sccc_buf_pg_sz = 0;
+ caps->sccc_hop_num = HNS_ROCE_SCCC_HOP_NUM;
}
static void calc_pg_sz(int obj_num, int obj_size, int hop_num, int ctx_bt_num,
@@ -1995,20 +1995,18 @@ static int hns_roce_query_pf_caps(struct hns_roce_dev *hr_dev)
caps->srqc_bt_num, &caps->srqc_buf_pg_sz,
&caps->srqc_ba_pg_sz, HEM_TYPE_SRQC);
- if (hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP08_B) {
- caps->sccc_hop_num = ctx_hop_num;
- caps->qpc_timer_hop_num = HNS_ROCE_HOP_NUM_0;
- caps->cqc_timer_hop_num = HNS_ROCE_HOP_NUM_0;
+ caps->sccc_hop_num = ctx_hop_num;
+ caps->qpc_timer_hop_num = HNS_ROCE_HOP_NUM_0;
+ caps->cqc_timer_hop_num = HNS_ROCE_HOP_NUM_0;
- calc_pg_sz(caps->num_qps, caps->sccc_entry_sz,
- caps->sccc_hop_num, caps->sccc_bt_num,
- &caps->sccc_buf_pg_sz, &caps->sccc_ba_pg_sz,
- HEM_TYPE_SCCC);
- calc_pg_sz(caps->num_cqc_timer, caps->cqc_timer_entry_sz,
- caps->cqc_timer_hop_num, caps->cqc_timer_bt_num,
- &caps->cqc_timer_buf_pg_sz,
- &caps->cqc_timer_ba_pg_sz, HEM_TYPE_CQC_TIMER);
- }
+ calc_pg_sz(caps->num_qps, caps->sccc_entry_sz,
+ caps->sccc_hop_num, caps->sccc_bt_num,
+ &caps->sccc_buf_pg_sz, &caps->sccc_ba_pg_sz,
+ HEM_TYPE_SCCC);
+ calc_pg_sz(caps->num_cqc_timer, caps->cqc_timer_entry_sz,
+ caps->cqc_timer_hop_num, caps->cqc_timer_bt_num,
+ &caps->cqc_timer_buf_pg_sz,
+ &caps->cqc_timer_ba_pg_sz, HEM_TYPE_CQC_TIMER);
calc_pg_sz(caps->num_cqe_segs, caps->mtt_entry_sz, caps->cqe_hop_num,
1, &caps->cqe_buf_pg_sz, &caps->cqe_ba_pg_sz, HEM_TYPE_CQE);
@@ -2055,22 +2053,19 @@ static int hns_roce_v2_profile(struct hns_roce_dev *hr_dev)
return ret;
}
- if (hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP08_B) {
- ret = hns_roce_query_pf_timer_resource(hr_dev);
- if (ret) {
- dev_err(hr_dev->dev,
- "Query pf timer resource fail, ret = %d.\n",
- ret);
- return ret;
- }
+ ret = hns_roce_query_pf_timer_resource(hr_dev);
+ if (ret) {
+ dev_err(hr_dev->dev,
+ "failed to query pf timer resource, ret = %d.\n", ret);
+ return ret;
+ }
- ret = hns_roce_set_vf_switch_param(hr_dev, 0);
- if (ret) {
- dev_err(hr_dev->dev,
- "Set function switch param fail, ret = %d.\n",
- ret);
- return ret;
- }
+ ret = hns_roce_set_vf_switch_param(hr_dev, 0);
+ if (ret) {
+ dev_err(hr_dev->dev,
+ "failed to set function switch param, ret = %d.\n",
+ ret);
+ return ret;
}
hr_dev->vendor_part_id = hr_dev->pci_dev->device;
@@ -2336,8 +2331,7 @@ static void hns_roce_v2_exit(struct hns_roce_dev *hr_dev)
{
struct hns_roce_v2_priv *priv = hr_dev->priv;
- if (hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP08_B)
- hns_roce_function_clear(hr_dev);
+ hns_roce_function_clear(hr_dev);
hns_roce_free_link_table(hr_dev, &priv->tpq);
hns_roce_free_link_table(hr_dev, &priv->tsq);
@@ -3053,6 +3047,7 @@ static void get_cqe_status(struct hns_roce_dev *hr_dev, struct hns_roce_qp *qp,
IB_WC_RETRY_EXC_ERR },
{ HNS_ROCE_CQE_V2_RNR_RETRY_EXC_ERR, IB_WC_RNR_RETRY_EXC_ERR },
{ HNS_ROCE_CQE_V2_REMOTE_ABORT_ERR, IB_WC_REM_ABORT_ERR },
+ { HNS_ROCE_CQE_V2_GENERAL_ERR, IB_WC_GENERAL_ERR}
};
u32 cqe_status = roce_get_field(cqe->byte_4, V2_CQE_BYTE_4_STATUS_M,
@@ -3075,6 +3070,14 @@ static void get_cqe_status(struct hns_roce_dev *hr_dev, struct hns_roce_qp *qp,
sizeof(*cqe), false);
/*
+ * For hns ROCEE, GENERAL_ERR is an error type that is not defined in
+ * the standard protocol, the driver must ignore it and needn't to set
+ * the QP to an error state.
+ */
+ if (cqe_status == HNS_ROCE_CQE_V2_GENERAL_ERR)
+ return;
+
+ /*
* Hip08 hardware cannot flush the WQEs in SQ/RQ if the QP state gets
* into errored mode. Hence, as a workaround to this hardware
* limitation, driver needs to assist in flushing. But the flushing
@@ -3170,51 +3173,51 @@ static int hns_roce_v2_poll_one(struct hns_roce_cq *hr_cq,
/* SQ corresponding to CQE */
switch (roce_get_field(cqe->byte_4, V2_CQE_BYTE_4_OPCODE_M,
V2_CQE_BYTE_4_OPCODE_S) & 0x1f) {
- case HNS_ROCE_SQ_OPCODE_SEND:
+ case HNS_ROCE_V2_WQE_OP_SEND:
wc->opcode = IB_WC_SEND;
break;
- case HNS_ROCE_SQ_OPCODE_SEND_WITH_INV:
+ case HNS_ROCE_V2_WQE_OP_SEND_WITH_INV:
wc->opcode = IB_WC_SEND;
break;
- case HNS_ROCE_SQ_OPCODE_SEND_WITH_IMM:
+ case HNS_ROCE_V2_WQE_OP_SEND_WITH_IMM:
wc->opcode = IB_WC_SEND;
wc->wc_flags |= IB_WC_WITH_IMM;
break;
- case HNS_ROCE_SQ_OPCODE_RDMA_READ:
+ case HNS_ROCE_V2_WQE_OP_RDMA_READ:
wc->opcode = IB_WC_RDMA_READ;
wc->byte_len = le32_to_cpu(cqe->byte_cnt);
break;
- case HNS_ROCE_SQ_OPCODE_RDMA_WRITE:
+ case HNS_ROCE_V2_WQE_OP_RDMA_WRITE:
wc->opcode = IB_WC_RDMA_WRITE;
break;
- case HNS_ROCE_SQ_OPCODE_RDMA_WRITE_WITH_IMM:
+ case HNS_ROCE_V2_WQE_OP_RDMA_WRITE_WITH_IMM:
wc->opcode = IB_WC_RDMA_WRITE;
wc->wc_flags |= IB_WC_WITH_IMM;
break;
- case HNS_ROCE_SQ_OPCODE_LOCAL_INV:
+ case HNS_ROCE_V2_WQE_OP_LOCAL_INV:
wc->opcode = IB_WC_LOCAL_INV;
wc->wc_flags |= IB_WC_WITH_INVALIDATE;
break;
- case HNS_ROCE_SQ_OPCODE_ATOMIC_COMP_AND_SWAP:
+ case HNS_ROCE_V2_WQE_OP_ATOM_CMP_AND_SWAP:
wc->opcode = IB_WC_COMP_SWAP;
wc->byte_len = 8;
break;
- case HNS_ROCE_SQ_OPCODE_ATOMIC_FETCH_AND_ADD:
+ case HNS_ROCE_V2_WQE_OP_ATOM_FETCH_AND_ADD:
wc->opcode = IB_WC_FETCH_ADD;
wc->byte_len = 8;
break;
- case HNS_ROCE_SQ_OPCODE_ATOMIC_MASK_COMP_AND_SWAP:
+ case HNS_ROCE_V2_WQE_OP_ATOM_MSK_CMP_AND_SWAP:
wc->opcode = IB_WC_MASKED_COMP_SWAP;
wc->byte_len = 8;
break;
- case HNS_ROCE_SQ_OPCODE_ATOMIC_MASK_FETCH_AND_ADD:
+ case HNS_ROCE_V2_WQE_OP_ATOM_MSK_FETCH_AND_ADD:
wc->opcode = IB_WC_MASKED_FETCH_ADD;
wc->byte_len = 8;
break;
- case HNS_ROCE_SQ_OPCODE_FAST_REG_WR:
+ case HNS_ROCE_V2_WQE_OP_FAST_REG_PMR:
wc->opcode = IB_WC_REG_MR;
break;
- case HNS_ROCE_SQ_OPCODE_BIND_MW:
+ case HNS_ROCE_V2_WQE_OP_BIND_MW:
wc->opcode = IB_WC_REG_MR;
break;
default:
@@ -3374,11 +3377,33 @@ static int get_op_for_set_hem(struct hns_roce_dev *hr_dev, u32 type,
return op + step_idx;
}
+static int set_hem_to_hw(struct hns_roce_dev *hr_dev, int obj, u64 bt_ba,
+ u32 hem_type, int step_idx)
+{
+ struct hns_roce_cmd_mailbox *mailbox;
+ int ret;
+ int op;
+
+ op = get_op_for_set_hem(hr_dev, hem_type, step_idx);
+ if (op < 0)
+ return 0;
+
+ mailbox = hns_roce_alloc_cmd_mailbox(hr_dev);
+ if (IS_ERR(mailbox))
+ return PTR_ERR(mailbox);
+
+ ret = hns_roce_cmd_mbox(hr_dev, bt_ba, mailbox->dma, obj,
+ 0, op, HNS_ROCE_CMD_TIMEOUT_MSECS);
+
+ hns_roce_free_cmd_mailbox(hr_dev, mailbox);
+
+ return ret;
+}
+
static int hns_roce_v2_set_hem(struct hns_roce_dev *hr_dev,
struct hns_roce_hem_table *table, int obj,
int step_idx)
{
- struct hns_roce_cmd_mailbox *mailbox;
struct hns_roce_hem_iter iter;
struct hns_roce_hem_mhop mhop;
struct hns_roce_hem *hem;
@@ -3390,7 +3415,6 @@ static int hns_roce_v2_set_hem(struct hns_roce_dev *hr_dev,
u64 bt_ba = 0;
u32 chunk_ba_num;
u32 hop_num;
- int op;
if (!hns_roce_check_whether_mhop(hr_dev, table->type))
return 0;
@@ -3412,14 +3436,6 @@ static int hns_roce_v2_set_hem(struct hns_roce_dev *hr_dev,
hem_idx = i;
}
- op = get_op_for_set_hem(hr_dev, table->type, step_idx);
- if (op == -EINVAL)
- return 0;
-
- mailbox = hns_roce_alloc_cmd_mailbox(hr_dev);
- if (IS_ERR(mailbox))
- return PTR_ERR(mailbox);
-
if (table->type == HEM_TYPE_SCCC)
obj = mhop.l0_idx;
@@ -3428,11 +3444,8 @@ static int hns_roce_v2_set_hem(struct hns_roce_dev *hr_dev,
for (hns_roce_hem_first(hem, &iter);
!hns_roce_hem_last(&iter); hns_roce_hem_next(&iter)) {
bt_ba = hns_roce_hem_addr(&iter);
-
- /* configure the ba, tag, and op */
- ret = hns_roce_cmd_mbox(hr_dev, bt_ba, mailbox->dma,
- obj, 0, op,
- HNS_ROCE_CMD_TIMEOUT_MSECS);
+ ret = set_hem_to_hw(hr_dev, obj, bt_ba, table->type,
+ step_idx);
}
} else {
if (step_idx == 0)
@@ -3440,12 +3453,9 @@ static int hns_roce_v2_set_hem(struct hns_roce_dev *hr_dev,
else if (step_idx == 1 && hop_num == 2)
bt_ba = table->bt_l1_dma_addr[l1_idx];
- /* configure the ba, tag, and op */
- ret = hns_roce_cmd_mbox(hr_dev, bt_ba, mailbox->dma, obj,
- 0, op, HNS_ROCE_CMD_TIMEOUT_MSECS);
+ ret = set_hem_to_hw(hr_dev, obj, bt_ba, table->type, step_idx);
}
- hns_roce_free_cmd_mailbox(hr_dev, mailbox);
return ret;
}
@@ -3745,51 +3755,23 @@ static void modify_qp_init_to_init(struct ib_qp *ibqp,
}
}
-static bool check_wqe_rq_mtt_count(struct hns_roce_dev *hr_dev,
- struct hns_roce_qp *hr_qp, int mtt_cnt,
- u32 page_size)
-{
- struct ib_device *ibdev = &hr_dev->ib_dev;
-
- if (hr_qp->rq.wqe_cnt < 1)
- return true;
-
- if (mtt_cnt < 1) {
- ibdev_err(ibdev, "failed to find RQWQE buf ba of QP(0x%lx)\n",
- hr_qp->qpn);
- return false;
- }
-
- if (mtt_cnt < MTT_MIN_COUNT &&
- (hr_qp->rq.offset + page_size) < hr_qp->buff_size) {
- ibdev_err(ibdev,
- "failed to find next RQWQE buf ba of QP(0x%lx)\n",
- hr_qp->qpn);
- return false;
- }
-
- return true;
-}
-
static int config_qp_rq_buf(struct hns_roce_dev *hr_dev,
struct hns_roce_qp *hr_qp,
struct hns_roce_v2_qp_context *context,
struct hns_roce_v2_qp_context *qpc_mask)
{
- struct ib_qp *ibqp = &hr_qp->ibqp;
u64 mtts[MTT_MIN_COUNT] = { 0 };
u64 wqe_sge_ba;
- u32 page_size;
int count;
/* Search qp buf's mtts */
- page_size = 1 << hr_qp->mtr.hem_cfg.buf_pg_shift;
- count = hns_roce_mtr_find(hr_dev, &hr_qp->mtr,
- hr_qp->rq.offset / page_size, mtts,
+ count = hns_roce_mtr_find(hr_dev, &hr_qp->mtr, hr_qp->rq.offset, mtts,
MTT_MIN_COUNT, &wqe_sge_ba);
- if (!ibqp->srq)
- if (!check_wqe_rq_mtt_count(hr_dev, hr_qp, count, page_size))
- return -EINVAL;
+ if (hr_qp->rq.wqe_cnt && count < 1) {
+ ibdev_err(&hr_dev->ib_dev,
+ "failed to find RQ WQE, QPN = 0x%lx.\n", hr_qp->qpn);
+ return -EINVAL;
+ }
context->wqe_sge_ba = cpu_to_le32(wqe_sge_ba >> 3);
qpc_mask->wqe_sge_ba = 0;
@@ -3891,7 +3873,6 @@ static int config_qp_sq_buf(struct hns_roce_dev *hr_dev,
struct ib_device *ibdev = &hr_dev->ib_dev;
u64 sge_cur_blk = 0;
u64 sq_cur_blk = 0;
- u32 page_size;
int count;
/* search qp buf's mtts */
@@ -3902,9 +3883,8 @@ static int config_qp_sq_buf(struct hns_roce_dev *hr_dev,
return -EINVAL;
}
if (hr_qp->sge.sge_cnt > 0) {
- page_size = 1 << hr_qp->mtr.hem_cfg.buf_pg_shift;
count = hns_roce_mtr_find(hr_dev, &hr_qp->mtr,
- hr_qp->sge.offset / page_size,
+ hr_qp->sge.offset,
&sge_cur_blk, 1, NULL);
if (count < 1) {
ibdev_err(ibdev, "failed to find QP(0x%lx) SGE buf.\n",
@@ -4265,12 +4245,13 @@ static int hns_roce_v2_set_path(struct ib_qp *ibqp,
roce_set_field(qpc_mask->byte_24_mtu_tc, V2_QPC_BYTE_24_HOP_LIMIT_M,
V2_QPC_BYTE_24_HOP_LIMIT_S, 0);
- if (hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP08_B && is_udp)
+ if (is_udp)
roce_set_field(context->byte_24_mtu_tc, V2_QPC_BYTE_24_TC_M,
V2_QPC_BYTE_24_TC_S, grh->traffic_class >> 2);
else
roce_set_field(context->byte_24_mtu_tc, V2_QPC_BYTE_24_TC_M,
V2_QPC_BYTE_24_TC_S, grh->traffic_class);
+
roce_set_field(qpc_mask->byte_24_mtu_tc, V2_QPC_BYTE_24_TC_M,
V2_QPC_BYTE_24_TC_S, 0);
roce_set_field(context->byte_28_at_fl, V2_QPC_BYTE_28_FL_M,
@@ -4301,7 +4282,9 @@ static bool check_qp_state(enum ib_qp_state cur_state,
[IB_QPS_RTR] = { [IB_QPS_RESET] = true,
[IB_QPS_RTS] = true,
[IB_QPS_ERR] = true },
- [IB_QPS_RTS] = { [IB_QPS_RESET] = true, [IB_QPS_ERR] = true },
+ [IB_QPS_RTS] = { [IB_QPS_RESET] = true,
+ [IB_QPS_RTS] = true,
+ [IB_QPS_ERR] = true },
[IB_QPS_SQD] = {},
[IB_QPS_SQE] = {},
[IB_QPS_ERR] = { [IB_QPS_RESET] = true, [IB_QPS_ERR] = true }
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h
index e176b0aaa4ac..1fb1c583d0f8 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h
@@ -179,27 +179,11 @@ enum {
HNS_ROCE_V2_WQE_OP_ATOM_MSK_FETCH_AND_ADD = 0x9,
HNS_ROCE_V2_WQE_OP_FAST_REG_PMR = 0xa,
HNS_ROCE_V2_WQE_OP_LOCAL_INV = 0xb,
- HNS_ROCE_V2_WQE_OP_BIND_MW_TYPE = 0xc,
+ HNS_ROCE_V2_WQE_OP_BIND_MW = 0xc,
HNS_ROCE_V2_WQE_OP_MASK = 0x1f,
};
enum {
- HNS_ROCE_SQ_OPCODE_SEND = 0x0,
- HNS_ROCE_SQ_OPCODE_SEND_WITH_INV = 0x1,
- HNS_ROCE_SQ_OPCODE_SEND_WITH_IMM = 0x2,
- HNS_ROCE_SQ_OPCODE_RDMA_WRITE = 0x3,
- HNS_ROCE_SQ_OPCODE_RDMA_WRITE_WITH_IMM = 0x4,
- HNS_ROCE_SQ_OPCODE_RDMA_READ = 0x5,
- HNS_ROCE_SQ_OPCODE_ATOMIC_COMP_AND_SWAP = 0x6,
- HNS_ROCE_SQ_OPCODE_ATOMIC_FETCH_AND_ADD = 0x7,
- HNS_ROCE_SQ_OPCODE_ATOMIC_MASK_COMP_AND_SWAP = 0x8,
- HNS_ROCE_SQ_OPCODE_ATOMIC_MASK_FETCH_AND_ADD = 0x9,
- HNS_ROCE_SQ_OPCODE_FAST_REG_WR = 0xa,
- HNS_ROCE_SQ_OPCODE_LOCAL_INV = 0xb,
- HNS_ROCE_SQ_OPCODE_BIND_MW = 0xc,
-};
-
-enum {
/* rq operations */
HNS_ROCE_V2_OPCODE_RDMA_WRITE_IMM = 0x0,
HNS_ROCE_V2_OPCODE_SEND = 0x1,
@@ -230,6 +214,7 @@ enum {
HNS_ROCE_CQE_V2_TRANSPORT_RETRY_EXC_ERR = 0x15,
HNS_ROCE_CQE_V2_RNR_RETRY_EXC_ERR = 0x16,
HNS_ROCE_CQE_V2_REMOTE_ABORT_ERR = 0x22,
+ HNS_ROCE_CQE_V2_GENERAL_ERR = 0x23,
HNS_ROCE_V2_CQE_STATUS_MASK = 0xff,
};
diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c
index 50763cf4fa3d..5907cfd878a6 100644
--- a/drivers/infiniband/hw/hns/hns_roce_main.c
+++ b/drivers/infiniband/hw/hns/hns_roce_main.c
@@ -428,7 +428,7 @@ static const struct ib_device_ops hns_roce_dev_ops = {
.destroy_ah = hns_roce_destroy_ah,
.destroy_cq = hns_roce_destroy_cq,
.disassociate_ucontext = hns_roce_disassociate_ucontext,
- .fill_res_entry = hns_roce_fill_res_entry,
+ .fill_res_cq_entry = hns_roce_fill_res_cq_entry,
.get_dma_mr = hns_roce_get_dma_mr,
.get_link_layer = hns_roce_get_link_layer,
.get_port_immutable = hns_roce_port_immutable,
diff --git a/drivers/infiniband/hw/hns/hns_roce_mr.c b/drivers/infiniband/hw/hns/hns_roce_mr.c
index 6b226a5eb7db..e5df3884b41d 100644
--- a/drivers/infiniband/hw/hns/hns_roce_mr.c
+++ b/drivers/infiniband/hw/hns/hns_roce_mr.c
@@ -415,7 +415,7 @@ int hns_roce_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata)
}
struct ib_mr *hns_roce_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type,
- u32 max_num_sg, struct ib_udata *udata)
+ u32 max_num_sg)
{
struct hns_roce_dev *hr_dev = to_hr_dev(pd->device);
struct device *dev = hr_dev->dev;
@@ -871,6 +871,15 @@ int hns_roce_mtr_map(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr,
int err;
int i;
+ /*
+ * Only use the first page address as root ba when hopnum is 0, this
+ * is because the addresses of all pages are consecutive in this case.
+ */
+ if (mtr->hem_cfg.is_direct) {
+ mtr->hem_cfg.root_ba = pages[0];
+ return 0;
+ }
+
for (i = 0; i < mtr->hem_cfg.region_count; i++) {
r = &mtr->hem_cfg.region[i];
if (r->offset + r->count > page_cnt) {
@@ -896,6 +905,8 @@ int hns_roce_mtr_map(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr,
int hns_roce_mtr_find(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr,
int offset, u64 *mtt_buf, int mtt_max, u64 *base_addr)
{
+ struct hns_roce_hem_cfg *cfg = &mtr->hem_cfg;
+ int start_index;
int mtt_count;
int total = 0;
__le64 *mtts;
@@ -907,26 +918,32 @@ int hns_roce_mtr_find(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr,
goto done;
/* no mtt memory in direct mode, so just return the buffer address */
- if (mtr->hem_cfg.is_direct) {
- npage = offset;
- for (total = 0; total < mtt_max; total++, npage++) {
- addr = mtr->hem_cfg.root_ba +
- (npage << mtr->hem_cfg.buf_pg_shift);
-
+ if (cfg->is_direct) {
+ start_index = offset >> HNS_HW_PAGE_SHIFT;
+ for (mtt_count = 0; mtt_count < cfg->region_count &&
+ total < mtt_max; mtt_count++) {
+ npage = cfg->region[mtt_count].offset;
+ if (npage < start_index)
+ continue;
+
+ addr = cfg->root_ba + (npage << HNS_HW_PAGE_SHIFT);
if (hr_dev->hw_rev == HNS_ROCE_HW_VER1)
mtt_buf[total] = to_hr_hw_page_addr(addr);
else
mtt_buf[total] = addr;
+
+ total++;
}
goto done;
}
+ start_index = offset >> cfg->buf_pg_shift;
left = mtt_max;
while (left > 0) {
mtt_count = 0;
mtts = hns_roce_hem_list_find_mtt(hr_dev, &mtr->hem_list,
- offset + total,
+ start_index + total,
&mtt_count, NULL);
if (!mtts || !mtt_count)
goto done;
@@ -939,104 +956,136 @@ int hns_roce_mtr_find(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr,
done:
if (base_addr)
- *base_addr = mtr->hem_cfg.root_ba;
+ *base_addr = cfg->root_ba;
return total;
}
-/* convert buffer size to page index and page count */
-static unsigned int mtr_init_region(struct hns_roce_buf_attr *attr,
- int page_cnt,
- struct hns_roce_buf_region *regions,
- int region_cnt, unsigned int page_shift)
+static int mtr_init_buf_cfg(struct hns_roce_dev *hr_dev,
+ struct hns_roce_buf_attr *attr,
+ struct hns_roce_hem_cfg *cfg,
+ unsigned int *buf_page_shift)
{
- unsigned int page_size = 1 << page_shift;
- int max_region = attr->region_count;
struct hns_roce_buf_region *r;
- unsigned int i = 0;
- int page_idx = 0;
-
- for (; i < region_cnt && i < max_region && page_idx < page_cnt; i++) {
- r = &regions[i];
- r->hopnum = attr->region[i].hopnum == HNS_ROCE_HOP_NUM_0 ?
- 0 : attr->region[i].hopnum;
- r->offset = page_idx;
- r->count = DIV_ROUND_UP(attr->region[i].size, page_size);
- page_idx += r->count;
+ unsigned int page_shift = 0;
+ int page_cnt = 0;
+ size_t buf_size;
+ int region_cnt;
+
+ if (cfg->is_direct) {
+ buf_size = cfg->buf_pg_count << cfg->buf_pg_shift;
+ page_cnt = DIV_ROUND_UP(buf_size, HNS_HW_PAGE_SIZE);
+ /*
+ * When HEM buffer use level-0 addressing, the page size equals
+ * the buffer size, and the the page size = 4K * 2^N.
+ */
+ cfg->buf_pg_shift = HNS_HW_PAGE_SHIFT + order_base_2(page_cnt);
+ if (attr->region_count > 1) {
+ cfg->buf_pg_count = page_cnt;
+ page_shift = HNS_HW_PAGE_SHIFT;
+ } else {
+ cfg->buf_pg_count = 1;
+ page_shift = cfg->buf_pg_shift;
+ if (buf_size != 1 << page_shift) {
+ ibdev_err(&hr_dev->ib_dev,
+ "failed to check direct size %zu shift %d.\n",
+ buf_size, page_shift);
+ return -EINVAL;
+ }
+ }
+ } else {
+ page_shift = cfg->buf_pg_shift;
+ }
+
+ /* convert buffer size to page index and page count */
+ for (page_cnt = 0, region_cnt = 0; page_cnt < cfg->buf_pg_count &&
+ region_cnt < attr->region_count &&
+ region_cnt < ARRAY_SIZE(cfg->region); region_cnt++) {
+ r = &cfg->region[region_cnt];
+ r->offset = page_cnt;
+ buf_size = hr_hw_page_align(attr->region[region_cnt].size);
+ r->count = DIV_ROUND_UP(buf_size, 1 << page_shift);
+ page_cnt += r->count;
+ r->hopnum = to_hr_hem_hopnum(attr->region[region_cnt].hopnum,
+ r->count);
+ }
+
+ if (region_cnt < 1) {
+ ibdev_err(&hr_dev->ib_dev,
+ "failed to check mtr region count, pages = %d.\n",
+ cfg->buf_pg_count);
+ return -ENOBUFS;
}
- return i;
+ cfg->region_count = region_cnt;
+ *buf_page_shift = page_shift;
+
+ return page_cnt;
}
/**
* hns_roce_mtr_create - Create hns memory translate region.
*
* @mtr: memory translate region
- * @init_attr: init attribute for creating mtr
- * @page_shift: page shift for multi-hop base address table
+ * @buf_attr: buffer attribute for creating mtr
+ * @ba_page_shift: page shift for multi-hop base address table
* @udata: user space context, if it's NULL, means kernel space
* @user_addr: userspace virtual address to start at
- * @buf_alloced: mtr has private buffer, true means need to alloc
*/
int hns_roce_mtr_create(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr,
struct hns_roce_buf_attr *buf_attr,
- unsigned int page_shift, struct ib_udata *udata,
+ unsigned int ba_page_shift, struct ib_udata *udata,
unsigned long user_addr)
{
+ struct hns_roce_hem_cfg *cfg = &mtr->hem_cfg;
struct ib_device *ibdev = &hr_dev->ib_dev;
+ unsigned int buf_page_shift = 0;
dma_addr_t *pages = NULL;
- int region_cnt = 0;
int all_pg_cnt;
int get_pg_cnt;
- bool has_mtt;
- int err = 0;
+ int ret = 0;
+
+ /* if disable mtt, all pages must in a continuous address range */
+ cfg->is_direct = !mtr_has_mtt(buf_attr);
- has_mtt = mtr_has_mtt(buf_attr);
/* if buffer only need mtt, just init the hem cfg */
if (buf_attr->mtt_only) {
- mtr->hem_cfg.buf_pg_shift = buf_attr->page_shift;
- mtr->hem_cfg.buf_pg_count = mtr_bufs_size(buf_attr) >>
- buf_attr->page_shift;
+ cfg->buf_pg_shift = buf_attr->page_shift;
+ cfg->buf_pg_count = mtr_bufs_size(buf_attr) >>
+ buf_attr->page_shift;
mtr->umem = NULL;
mtr->kmem = NULL;
} else {
- err = mtr_alloc_bufs(hr_dev, mtr, buf_attr, !has_mtt, udata,
- user_addr);
- if (err) {
- ibdev_err(ibdev, "Failed to alloc mtr bufs, err %d\n",
- err);
- return err;
+ ret = mtr_alloc_bufs(hr_dev, mtr, buf_attr, cfg->is_direct,
+ udata, user_addr);
+ if (ret) {
+ ibdev_err(ibdev,
+ "failed to alloc mtr bufs, ret = %d.\n", ret);
+ return ret;
}
}
- /* alloc mtt memory */
- all_pg_cnt = mtr->hem_cfg.buf_pg_count;
- hns_roce_hem_list_init(&mtr->hem_list);
- mtr->hem_cfg.is_direct = !has_mtt;
- mtr->hem_cfg.ba_pg_shift = page_shift;
- mtr->hem_cfg.region_count = 0;
- region_cnt = mtr_init_region(buf_attr, all_pg_cnt,
- mtr->hem_cfg.region,
- ARRAY_SIZE(mtr->hem_cfg.region),
- mtr->hem_cfg.buf_pg_shift);
- if (region_cnt < 1) {
- err = -ENOBUFS;
- ibdev_err(ibdev, "failed to init mtr region %d\n", region_cnt);
+ all_pg_cnt = mtr_init_buf_cfg(hr_dev, buf_attr, cfg, &buf_page_shift);
+ if (all_pg_cnt < 1) {
+ ret = -ENOBUFS;
+ ibdev_err(ibdev, "failed to init mtr buf cfg.\n");
goto err_alloc_bufs;
}
- mtr->hem_cfg.region_count = region_cnt;
-
- if (has_mtt) {
- err = hns_roce_hem_list_request(hr_dev, &mtr->hem_list,
- mtr->hem_cfg.region, region_cnt,
- page_shift);
- if (err) {
- ibdev_err(ibdev, "Failed to request mtr hem, err %d\n",
- err);
+ hns_roce_hem_list_init(&mtr->hem_list);
+ if (!cfg->is_direct) {
+ ret = hns_roce_hem_list_request(hr_dev, &mtr->hem_list,
+ cfg->region, cfg->region_count,
+ ba_page_shift);
+ if (ret) {
+ ibdev_err(ibdev, "failed to request mtr hem, ret = %d.\n",
+ ret);
goto err_alloc_bufs;
}
- mtr->hem_cfg.root_ba = mtr->hem_list.root_ba;
+ cfg->root_ba = mtr->hem_list.root_ba;
+ cfg->ba_pg_shift = ba_page_shift;
+ } else {
+ cfg->ba_pg_shift = cfg->buf_pg_shift;
}
/* no buffer to map */
@@ -1046,31 +1095,26 @@ int hns_roce_mtr_create(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr,
/* alloc a tmp array to store buffer's dma address */
pages = kvcalloc(all_pg_cnt, sizeof(dma_addr_t), GFP_KERNEL);
if (!pages) {
- err = -ENOMEM;
- ibdev_err(ibdev, "Failed to alloc mtr page list %d\n",
+ ret = -ENOMEM;
+ ibdev_err(ibdev, "failed to alloc mtr page list %d.\n",
all_pg_cnt);
goto err_alloc_hem_list;
}
get_pg_cnt = mtr_get_pages(hr_dev, mtr, pages, all_pg_cnt,
- mtr->hem_cfg.buf_pg_shift);
+ buf_page_shift);
if (get_pg_cnt != all_pg_cnt) {
- ibdev_err(ibdev, "Failed to get mtr page %d != %d\n",
+ ibdev_err(ibdev, "failed to get mtr page %d != %d.\n",
get_pg_cnt, all_pg_cnt);
- err = -ENOBUFS;
+ ret = -ENOBUFS;
goto err_alloc_page_list;
}
- if (!has_mtt) {
- mtr->hem_cfg.root_ba = pages[0];
- } else {
- /* write buffer's dma address to BA table */
- err = hns_roce_mtr_map(hr_dev, mtr, pages, all_pg_cnt);
- if (err) {
- ibdev_err(ibdev, "Failed to map mtr pages, err %d\n",
- err);
- goto err_alloc_page_list;
- }
+ /* write buffer's dma address to BA table */
+ ret = hns_roce_mtr_map(hr_dev, mtr, pages, all_pg_cnt);
+ if (ret) {
+ ibdev_err(ibdev, "failed to map mtr pages, ret = %d.\n", ret);
+ goto err_alloc_page_list;
}
/* drop tmp array */
@@ -1082,7 +1126,7 @@ err_alloc_hem_list:
hns_roce_hem_list_release(hr_dev, &mtr->hem_list);
err_alloc_bufs:
mtr_free_bufs(hr_dev, mtr);
- return err;
+ return ret;
}
void hns_roce_mtr_destroy(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr)
diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c
index a0a47bd66975..e94ca130ff5e 100644
--- a/drivers/infiniband/hw/hns/hns_roce_qp.c
+++ b/drivers/infiniband/hw/hns/hns_roce_qp.c
@@ -411,7 +411,6 @@ static int set_extend_sge_param(struct hns_roce_dev *hr_dev, u32 sq_wqe_cnt,
struct hns_roce_qp *hr_qp,
struct ib_qp_cap *cap)
{
- struct ib_device *ibdev = &hr_dev->ib_dev;
u32 cnt;
cnt = max(1U, cap->max_send_sge);
@@ -431,15 +430,6 @@ static int set_extend_sge_param(struct hns_roce_dev *hr_dev, u32 sq_wqe_cnt,
} else if (hr_qp->sq.max_gs > HNS_ROCE_SGE_IN_WQE) {
cnt = roundup_pow_of_two(sq_wqe_cnt *
(hr_qp->sq.max_gs - HNS_ROCE_SGE_IN_WQE));
-
- if (hr_dev->pci_dev->revision == PCI_REVISION_ID_HIP08_A) {
- if (cnt > hr_dev->caps.max_extend_sg) {
- ibdev_err(ibdev,
- "failed to check exSGE num, exSGE num = %d.\n",
- cnt);
- return -EINVAL;
- }
- }
} else {
cnt = 0;
}
diff --git a/drivers/infiniband/hw/hns/hns_roce_restrack.c b/drivers/infiniband/hw/hns/hns_roce_restrack.c
index 06871731ac43..259444c0a630 100644
--- a/drivers/infiniband/hw/hns/hns_roce_restrack.c
+++ b/drivers/infiniband/hw/hns/hns_roce_restrack.c
@@ -76,10 +76,9 @@ err:
return -EMSGSIZE;
}
-static int hns_roce_fill_res_cq_entry(struct sk_buff *msg,
- struct rdma_restrack_entry *res)
+int hns_roce_fill_res_cq_entry(struct sk_buff *msg,
+ struct ib_cq *ib_cq)
{
- struct ib_cq *ib_cq = container_of(res, struct ib_cq, res);
struct hns_roce_dev *hr_dev = to_hr_dev(ib_cq->device);
struct hns_roce_cq *hr_cq = to_hr_cq(ib_cq);
struct hns_roce_v2_cq_context *context;
@@ -119,12 +118,3 @@ err:
kfree(context);
return ret;
}
-
-int hns_roce_fill_res_entry(struct sk_buff *msg,
- struct rdma_restrack_entry *res)
-{
- if (res->type == RDMA_RESTRACK_CQ)
- return hns_roce_fill_res_cq_entry(msg, res);
-
- return 0;
-}
diff --git a/drivers/infiniband/hw/i40iw/i40iw_verbs.c b/drivers/infiniband/hw/i40iw/i40iw_verbs.c
index 19af29a48c55..6957e4f3404b 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_verbs.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_verbs.c
@@ -101,7 +101,6 @@ static int i40iw_query_port(struct ib_device *ibdev,
props->port_cap_flags = IB_PORT_CM_SUP | IB_PORT_REINIT_SUP |
IB_PORT_VENDOR_CLASS_SUP | IB_PORT_BOOT_MGMT_SUP;
props->gid_tbl_len = 1;
- props->pkey_tbl_len = 1;
props->active_width = IB_WIDTH_4X;
props->active_speed = 1;
props->max_msg_sz = I40IW_MAX_OUTBOUND_MESSAGE_SIZE;
@@ -1543,10 +1542,9 @@ static int i40iw_hw_alloc_stag(struct i40iw_device *iwdev, struct i40iw_mr *iwmr
* @pd: ibpd pointer
* @mr_type: memory for stag registrion
* @max_num_sg: man number of pages
- * @udata: user data or NULL for kernel objects
*/
static struct ib_mr *i40iw_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type,
- u32 max_num_sg, struct ib_udata *udata)
+ u32 max_num_sg)
{
struct i40iw_pd *iwpd = to_iwpd(pd);
struct i40iw_device *iwdev = to_iwdev(pd->device);
@@ -2460,7 +2458,6 @@ static int i40iw_port_immutable(struct ib_device *ibdev, u8 port_num,
if (err)
return err;
- immutable->pkey_tbl_len = attr.pkey_tbl_len;
immutable->gid_tbl_len = attr.gid_tbl_len;
return 0;
@@ -2616,22 +2613,6 @@ static int i40iw_query_gid(struct ib_device *ibdev,
return 0;
}
-/**
- * i40iw_query_pkey - Query partition key
- * @ibdev: device pointer from stack
- * @port: port number
- * @index: index of pkey
- * @pkey: pointer to store the pkey
- */
-static int i40iw_query_pkey(struct ib_device *ibdev,
- u8 port,
- u16 index,
- u16 *pkey)
-{
- *pkey = 0;
- return 0;
-}
-
static const struct ib_device_ops i40iw_dev_ops = {
.owner = THIS_MODULE,
.driver_id = RDMA_DRIVER_I40IW,
@@ -2671,7 +2652,6 @@ static const struct ib_device_ops i40iw_dev_ops = {
.post_send = i40iw_post_send,
.query_device = i40iw_query_device,
.query_gid = i40iw_query_gid,
- .query_pkey = i40iw_query_pkey,
.query_port = i40iw_query_port,
.query_qp = i40iw_query_qp,
.reg_user_mr = i40iw_reg_user_mr,
diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c
index 816d28854a8e..5e7910a517da 100644
--- a/drivers/infiniband/hw/mlx4/main.c
+++ b/drivers/infiniband/hw/mlx4/main.c
@@ -1219,56 +1219,47 @@ static void mlx4_ib_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata)
mlx4_pd_free(to_mdev(pd->device)->dev, to_mpd(pd)->pdn);
}
-static struct ib_xrcd *mlx4_ib_alloc_xrcd(struct ib_device *ibdev,
- struct ib_udata *udata)
+static int mlx4_ib_alloc_xrcd(struct ib_xrcd *ibxrcd, struct ib_udata *udata)
{
- struct mlx4_ib_xrcd *xrcd;
+ struct mlx4_ib_dev *dev = to_mdev(ibxrcd->device);
+ struct mlx4_ib_xrcd *xrcd = to_mxrcd(ibxrcd);
struct ib_cq_init_attr cq_attr = {};
int err;
- if (!(to_mdev(ibdev)->dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC))
- return ERR_PTR(-ENOSYS);
-
- xrcd = kmalloc(sizeof *xrcd, GFP_KERNEL);
- if (!xrcd)
- return ERR_PTR(-ENOMEM);
+ if (!(dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC))
+ return -EOPNOTSUPP;
- err = mlx4_xrcd_alloc(to_mdev(ibdev)->dev, &xrcd->xrcdn);
+ err = mlx4_xrcd_alloc(dev->dev, &xrcd->xrcdn);
if (err)
- goto err1;
+ return err;
- xrcd->pd = ib_alloc_pd(ibdev, 0);
+ xrcd->pd = ib_alloc_pd(ibxrcd->device, 0);
if (IS_ERR(xrcd->pd)) {
err = PTR_ERR(xrcd->pd);
goto err2;
}
cq_attr.cqe = 1;
- xrcd->cq = ib_create_cq(ibdev, NULL, NULL, xrcd, &cq_attr);
+ xrcd->cq = ib_create_cq(ibxrcd->device, NULL, NULL, xrcd, &cq_attr);
if (IS_ERR(xrcd->cq)) {
err = PTR_ERR(xrcd->cq);
goto err3;
}
- return &xrcd->ibxrcd;
+ return 0;
err3:
ib_dealloc_pd(xrcd->pd);
err2:
- mlx4_xrcd_free(to_mdev(ibdev)->dev, xrcd->xrcdn);
-err1:
- kfree(xrcd);
- return ERR_PTR(err);
+ mlx4_xrcd_free(dev->dev, xrcd->xrcdn);
+ return err;
}
-static int mlx4_ib_dealloc_xrcd(struct ib_xrcd *xrcd, struct ib_udata *udata)
+static void mlx4_ib_dealloc_xrcd(struct ib_xrcd *xrcd, struct ib_udata *udata)
{
ib_destroy_cq(to_mxrcd(xrcd)->cq);
ib_dealloc_pd(to_mxrcd(xrcd)->pd);
mlx4_xrcd_free(to_mdev(xrcd->device)->dev, to_mxrcd(xrcd)->xrcdn);
- kfree(xrcd);
-
- return 0;
}
static int add_gid_entry(struct ib_qp *ibqp, union ib_gid *gid)
@@ -2607,6 +2598,8 @@ static const struct ib_device_ops mlx4_ib_dev_mw_ops = {
static const struct ib_device_ops mlx4_ib_dev_xrc_ops = {
.alloc_xrcd = mlx4_ib_alloc_xrcd,
.dealloc_xrcd = mlx4_ib_dealloc_xrcd,
+
+ INIT_RDMA_OBJ_SIZE(ib_xrcd, mlx4_ib_xrcd, ibxrcd),
};
static const struct ib_device_ops mlx4_ib_dev_fs_ops = {
diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h
index 6f4ea1067095..38e87a700a2a 100644
--- a/drivers/infiniband/hw/mlx4/mlx4_ib.h
+++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h
@@ -729,7 +729,7 @@ struct ib_mw *mlx4_ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type,
struct ib_udata *udata);
int mlx4_ib_dealloc_mw(struct ib_mw *mw);
struct ib_mr *mlx4_ib_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type,
- u32 max_num_sg, struct ib_udata *udata);
+ u32 max_num_sg);
int mlx4_ib_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents,
unsigned int *sg_offset);
int mlx4_ib_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period);
diff --git a/drivers/infiniband/hw/mlx4/mr.c b/drivers/infiniband/hw/mlx4/mr.c
index 7e0b205c05eb..1d5ef0de12c9 100644
--- a/drivers/infiniband/hw/mlx4/mr.c
+++ b/drivers/infiniband/hw/mlx4/mr.c
@@ -439,7 +439,6 @@ struct ib_mr *mlx4_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
mr->ibmr.rkey = mr->ibmr.lkey = mr->mmr.key;
mr->ibmr.length = length;
- mr->ibmr.iova = virt_addr;
mr->ibmr.page_size = 1U << shift;
return &mr->ibmr;
@@ -655,7 +654,7 @@ int mlx4_ib_dealloc_mw(struct ib_mw *ibmw)
}
struct ib_mr *mlx4_ib_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type,
- u32 max_num_sg, struct ib_udata *udata)
+ u32 max_num_sg)
{
struct mlx4_ib_dev *dev = to_mdev(pd->device);
struct mlx4_ib_mr *mr;
diff --git a/drivers/infiniband/hw/mlx5/Makefile b/drivers/infiniband/hw/mlx5/Makefile
index 8cca61c671f8..b4c009bb0db6 100644
--- a/drivers/infiniband/hw/mlx5/Makefile
+++ b/drivers/infiniband/hw/mlx5/Makefile
@@ -4,6 +4,7 @@ obj-$(CONFIG_MLX5_INFINIBAND) += mlx5_ib.o
mlx5_ib-y := ah.o \
cmd.o \
cong.o \
+ counters.o \
cq.o \
doorbell.o \
gsi.o \
@@ -22,5 +23,6 @@ mlx5_ib-y := ah.o \
mlx5_ib-$(CONFIG_INFINIBAND_ON_DEMAND_PAGING) += odp.o
mlx5_ib-$(CONFIG_MLX5_ESWITCH) += ib_rep.o
mlx5_ib-$(CONFIG_INFINIBAND_USER_ACCESS) += devx.o \
- flow.o \
- qos.o
+ fs.o \
+ qos.o \
+ std_types.o
diff --git a/drivers/infiniband/hw/mlx5/cmd.c b/drivers/infiniband/hw/mlx5/cmd.c
index cc24c711e92a..ebb2f108b64f 100644
--- a/drivers/infiniband/hw/mlx5/cmd.c
+++ b/drivers/infiniband/hw/mlx5/cmd.c
@@ -148,18 +148,6 @@ void mlx5_cmd_dealloc_memic(struct mlx5_dm *dm, phys_addr_t addr, u64 length)
spin_unlock(&dm->lock);
}
-int mlx5_cmd_query_ext_ppcnt_counters(struct mlx5_core_dev *dev, void *out)
-{
- u32 in[MLX5_ST_SZ_DW(ppcnt_reg)] = {};
- int sz = MLX5_ST_SZ_BYTES(ppcnt_reg);
-
- MLX5_SET(ppcnt_reg, in, local_port, 1);
-
- MLX5_SET(ppcnt_reg, in, grp, MLX5_ETHERNET_EXTENDED_COUNTERS_GROUP);
- return mlx5_core_access_reg(dev, in, sz, out, sz, MLX5_REG_PPCNT,
- 0, 0);
-}
-
void mlx5_cmd_destroy_tir(struct mlx5_core_dev *dev, u32 tirn, u16 uid)
{
u32 in[MLX5_ST_SZ_DW(destroy_tir_in)] = {};
diff --git a/drivers/infiniband/hw/mlx5/cmd.h b/drivers/infiniband/hw/mlx5/cmd.h
index f4d8558db434..1d192a8ca87d 100644
--- a/drivers/infiniband/hw/mlx5/cmd.h
+++ b/drivers/infiniband/hw/mlx5/cmd.h
@@ -41,7 +41,6 @@ int mlx5_cmd_dump_fill_mkey(struct mlx5_core_dev *dev, u32 *mkey);
int mlx5_cmd_null_mkey(struct mlx5_core_dev *dev, u32 *null_mkey);
int mlx5_cmd_query_cong_params(struct mlx5_core_dev *dev, int cong_point,
void *out);
-int mlx5_cmd_query_ext_ppcnt_counters(struct mlx5_core_dev *dev, void *out);
int mlx5_cmd_alloc_memic(struct mlx5_dm *dm, phys_addr_t *addr,
u64 length, u32 alignment);
void mlx5_cmd_dealloc_memic(struct mlx5_dm *dm, phys_addr_t addr, u64 length);
diff --git a/drivers/infiniband/hw/mlx5/counters.c b/drivers/infiniband/hw/mlx5/counters.c
new file mode 100644
index 000000000000..145f3cb40ccb
--- /dev/null
+++ b/drivers/infiniband/hw/mlx5/counters.c
@@ -0,0 +1,709 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/*
+ * Copyright (c) 2013-2020, Mellanox Technologies inc. All rights reserved.
+ */
+
+#include "mlx5_ib.h"
+#include <linux/mlx5/eswitch.h>
+#include "counters.h"
+#include "ib_rep.h"
+#include "qp.h"
+
+struct mlx5_ib_counter {
+ const char *name;
+ size_t offset;
+};
+
+#define INIT_Q_COUNTER(_name) \
+ { .name = #_name, .offset = MLX5_BYTE_OFF(query_q_counter_out, _name)}
+
+static const struct mlx5_ib_counter basic_q_cnts[] = {
+ INIT_Q_COUNTER(rx_write_requests),
+ INIT_Q_COUNTER(rx_read_requests),
+ INIT_Q_COUNTER(rx_atomic_requests),
+ INIT_Q_COUNTER(out_of_buffer),
+};
+
+static const struct mlx5_ib_counter out_of_seq_q_cnts[] = {
+ INIT_Q_COUNTER(out_of_sequence),
+};
+
+static const struct mlx5_ib_counter retrans_q_cnts[] = {
+ INIT_Q_COUNTER(duplicate_request),
+ INIT_Q_COUNTER(rnr_nak_retry_err),
+ INIT_Q_COUNTER(packet_seq_err),
+ INIT_Q_COUNTER(implied_nak_seq_err),
+ INIT_Q_COUNTER(local_ack_timeout_err),
+};
+
+#define INIT_CONG_COUNTER(_name) \
+ { .name = #_name, .offset = \
+ MLX5_BYTE_OFF(query_cong_statistics_out, _name ## _high)}
+
+static const struct mlx5_ib_counter cong_cnts[] = {
+ INIT_CONG_COUNTER(rp_cnp_ignored),
+ INIT_CONG_COUNTER(rp_cnp_handled),
+ INIT_CONG_COUNTER(np_ecn_marked_roce_packets),
+ INIT_CONG_COUNTER(np_cnp_sent),
+};
+
+static const struct mlx5_ib_counter extended_err_cnts[] = {
+ INIT_Q_COUNTER(resp_local_length_error),
+ INIT_Q_COUNTER(resp_cqe_error),
+ INIT_Q_COUNTER(req_cqe_error),
+ INIT_Q_COUNTER(req_remote_invalid_request),
+ INIT_Q_COUNTER(req_remote_access_errors),
+ INIT_Q_COUNTER(resp_remote_access_errors),
+ INIT_Q_COUNTER(resp_cqe_flush_error),
+ INIT_Q_COUNTER(req_cqe_flush_error),
+};
+
+static const struct mlx5_ib_counter roce_accl_cnts[] = {
+ INIT_Q_COUNTER(roce_adp_retrans),
+ INIT_Q_COUNTER(roce_adp_retrans_to),
+ INIT_Q_COUNTER(roce_slow_restart),
+ INIT_Q_COUNTER(roce_slow_restart_cnps),
+ INIT_Q_COUNTER(roce_slow_restart_trans),
+};
+
+#define INIT_EXT_PPCNT_COUNTER(_name) \
+ { .name = #_name, .offset = \
+ MLX5_BYTE_OFF(ppcnt_reg, \
+ counter_set.eth_extended_cntrs_grp_data_layout._name##_high)}
+
+static const struct mlx5_ib_counter ext_ppcnt_cnts[] = {
+ INIT_EXT_PPCNT_COUNTER(rx_icrc_encapsulated),
+};
+
+static int mlx5_ib_read_counters(struct ib_counters *counters,
+ struct ib_counters_read_attr *read_attr,
+ struct uverbs_attr_bundle *attrs)
+{
+ struct mlx5_ib_mcounters *mcounters = to_mcounters(counters);
+ struct mlx5_read_counters_attr mread_attr = {};
+ struct mlx5_ib_flow_counters_desc *desc;
+ int ret, i;
+
+ mutex_lock(&mcounters->mcntrs_mutex);
+ if (mcounters->cntrs_max_index > read_attr->ncounters) {
+ ret = -EINVAL;
+ goto err_bound;
+ }
+
+ mread_attr.out = kcalloc(mcounters->counters_num, sizeof(u64),
+ GFP_KERNEL);
+ if (!mread_attr.out) {
+ ret = -ENOMEM;
+ goto err_bound;
+ }
+
+ mread_attr.hw_cntrs_hndl = mcounters->hw_cntrs_hndl;
+ mread_attr.flags = read_attr->flags;
+ ret = mcounters->read_counters(counters->device, &mread_attr);
+ if (ret)
+ goto err_read;
+
+ /* do the pass over the counters data array to assign according to the
+ * descriptions and indexing pairs
+ */
+ desc = mcounters->counters_data;
+ for (i = 0; i < mcounters->ncounters; i++)
+ read_attr->counters_buff[desc[i].index] += mread_attr.out[desc[i].description];
+
+err_read:
+ kfree(mread_attr.out);
+err_bound:
+ mutex_unlock(&mcounters->mcntrs_mutex);
+ return ret;
+}
+
+static void mlx5_ib_destroy_counters(struct ib_counters *counters)
+{
+ struct mlx5_ib_mcounters *mcounters = to_mcounters(counters);
+
+ mlx5_ib_counters_clear_description(counters);
+ if (mcounters->hw_cntrs_hndl)
+ mlx5_fc_destroy(to_mdev(counters->device)->mdev,
+ mcounters->hw_cntrs_hndl);
+}
+
+static int mlx5_ib_create_counters(struct ib_counters *counters,
+ struct uverbs_attr_bundle *attrs)
+{
+ struct mlx5_ib_mcounters *mcounters = to_mcounters(counters);
+
+ mutex_init(&mcounters->mcntrs_mutex);
+ return 0;
+}
+
+
+static bool is_mdev_switchdev_mode(const struct mlx5_core_dev *mdev)
+{
+ return MLX5_ESWITCH_MANAGER(mdev) &&
+ mlx5_ib_eswitch_mode(mdev->priv.eswitch) ==
+ MLX5_ESWITCH_OFFLOADS;
+}
+
+static const struct mlx5_ib_counters *get_counters(struct mlx5_ib_dev *dev,
+ u8 port_num)
+{
+ return is_mdev_switchdev_mode(dev->mdev) ? &dev->port[0].cnts :
+ &dev->port[port_num].cnts;
+}
+
+/**
+ * mlx5_ib_get_counters_id - Returns counters id to use for device+port
+ * @dev: Pointer to mlx5 IB device
+ * @port_num: Zero based port number
+ *
+ * mlx5_ib_get_counters_id() Returns counters set id to use for given
+ * device port combination in switchdev and non switchdev mode of the
+ * parent device.
+ */
+u16 mlx5_ib_get_counters_id(struct mlx5_ib_dev *dev, u8 port_num)
+{
+ const struct mlx5_ib_counters *cnts = get_counters(dev, port_num);
+
+ return cnts->set_id;
+}
+
+static struct rdma_hw_stats *mlx5_ib_alloc_hw_stats(struct ib_device *ibdev,
+ u8 port_num)
+{
+ struct mlx5_ib_dev *dev = to_mdev(ibdev);
+ const struct mlx5_ib_counters *cnts;
+ bool is_switchdev = is_mdev_switchdev_mode(dev->mdev);
+
+ if ((is_switchdev && port_num) || (!is_switchdev && !port_num))
+ return NULL;
+
+ cnts = get_counters(dev, port_num - 1);
+
+ return rdma_alloc_hw_stats_struct(cnts->names,
+ cnts->num_q_counters +
+ cnts->num_cong_counters +
+ cnts->num_ext_ppcnt_counters,
+ RDMA_HW_STATS_DEFAULT_LIFESPAN);
+}
+
+static int mlx5_ib_query_q_counters(struct mlx5_core_dev *mdev,
+ const struct mlx5_ib_counters *cnts,
+ struct rdma_hw_stats *stats,
+ u16 set_id)
+{
+ u32 out[MLX5_ST_SZ_DW(query_q_counter_out)] = {};
+ u32 in[MLX5_ST_SZ_DW(query_q_counter_in)] = {};
+ __be32 val;
+ int ret, i;
+
+ MLX5_SET(query_q_counter_in, in, opcode, MLX5_CMD_OP_QUERY_Q_COUNTER);
+ MLX5_SET(query_q_counter_in, in, counter_set_id, set_id);
+ ret = mlx5_cmd_exec_inout(mdev, query_q_counter, in, out);
+ if (ret)
+ return ret;
+
+ for (i = 0; i < cnts->num_q_counters; i++) {
+ val = *(__be32 *)((void *)out + cnts->offsets[i]);
+ stats->value[i] = (u64)be32_to_cpu(val);
+ }
+
+ return 0;
+}
+
+static int mlx5_ib_query_ext_ppcnt_counters(struct mlx5_ib_dev *dev,
+ const struct mlx5_ib_counters *cnts,
+ struct rdma_hw_stats *stats)
+{
+ int offset = cnts->num_q_counters + cnts->num_cong_counters;
+ u32 in[MLX5_ST_SZ_DW(ppcnt_reg)] = {};
+ int sz = MLX5_ST_SZ_BYTES(ppcnt_reg);
+ int ret, i;
+ void *out;
+
+ out = kvzalloc(sz, GFP_KERNEL);
+ if (!out)
+ return -ENOMEM;
+
+ MLX5_SET(ppcnt_reg, in, local_port, 1);
+ MLX5_SET(ppcnt_reg, in, grp, MLX5_ETHERNET_EXTENDED_COUNTERS_GROUP);
+ ret = mlx5_core_access_reg(dev->mdev, in, sz, out, sz, MLX5_REG_PPCNT,
+ 0, 0);
+ if (ret)
+ goto free;
+
+ for (i = 0; i < cnts->num_ext_ppcnt_counters; i++)
+ stats->value[i + offset] =
+ be64_to_cpup((__be64 *)(out +
+ cnts->offsets[i + offset]));
+free:
+ kvfree(out);
+ return ret;
+}
+
+static int mlx5_ib_get_hw_stats(struct ib_device *ibdev,
+ struct rdma_hw_stats *stats,
+ u8 port_num, int index)
+{
+ struct mlx5_ib_dev *dev = to_mdev(ibdev);
+ const struct mlx5_ib_counters *cnts = get_counters(dev, port_num - 1);
+ struct mlx5_core_dev *mdev;
+ int ret, num_counters;
+ u8 mdev_port_num;
+
+ if (!stats)
+ return -EINVAL;
+
+ num_counters = cnts->num_q_counters +
+ cnts->num_cong_counters +
+ cnts->num_ext_ppcnt_counters;
+
+ /* q_counters are per IB device, query the master mdev */
+ ret = mlx5_ib_query_q_counters(dev->mdev, cnts, stats, cnts->set_id);
+ if (ret)
+ return ret;
+
+ if (MLX5_CAP_PCAM_FEATURE(dev->mdev, rx_icrc_encapsulated_counter)) {
+ ret = mlx5_ib_query_ext_ppcnt_counters(dev, cnts, stats);
+ if (ret)
+ return ret;
+ }
+
+ if (MLX5_CAP_GEN(dev->mdev, cc_query_allowed)) {
+ mdev = mlx5_ib_get_native_port_mdev(dev, port_num,
+ &mdev_port_num);
+ if (!mdev) {
+ /* If port is not affiliated yet, its in down state
+ * which doesn't have any counters yet, so it would be
+ * zero. So no need to read from the HCA.
+ */
+ goto done;
+ }
+ ret = mlx5_lag_query_cong_counters(dev->mdev,
+ stats->value +
+ cnts->num_q_counters,
+ cnts->num_cong_counters,
+ cnts->offsets +
+ cnts->num_q_counters);
+
+ mlx5_ib_put_native_port_mdev(dev, port_num);
+ if (ret)
+ return ret;
+ }
+
+done:
+ return num_counters;
+}
+
+static struct rdma_hw_stats *
+mlx5_ib_counter_alloc_stats(struct rdma_counter *counter)
+{
+ struct mlx5_ib_dev *dev = to_mdev(counter->device);
+ const struct mlx5_ib_counters *cnts =
+ get_counters(dev, counter->port - 1);
+
+ return rdma_alloc_hw_stats_struct(cnts->names,
+ cnts->num_q_counters +
+ cnts->num_cong_counters +
+ cnts->num_ext_ppcnt_counters,
+ RDMA_HW_STATS_DEFAULT_LIFESPAN);
+}
+
+static int mlx5_ib_counter_update_stats(struct rdma_counter *counter)
+{
+ struct mlx5_ib_dev *dev = to_mdev(counter->device);
+ const struct mlx5_ib_counters *cnts =
+ get_counters(dev, counter->port - 1);
+
+ return mlx5_ib_query_q_counters(dev->mdev, cnts,
+ counter->stats, counter->id);
+}
+
+static int mlx5_ib_counter_dealloc(struct rdma_counter *counter)
+{
+ struct mlx5_ib_dev *dev = to_mdev(counter->device);
+ u32 in[MLX5_ST_SZ_DW(dealloc_q_counter_in)] = {};
+
+ if (!counter->id)
+ return 0;
+
+ MLX5_SET(dealloc_q_counter_in, in, opcode,
+ MLX5_CMD_OP_DEALLOC_Q_COUNTER);
+ MLX5_SET(dealloc_q_counter_in, in, counter_set_id, counter->id);
+ return mlx5_cmd_exec_in(dev->mdev, dealloc_q_counter, in);
+}
+
+static int mlx5_ib_counter_bind_qp(struct rdma_counter *counter,
+ struct ib_qp *qp)
+{
+ struct mlx5_ib_dev *dev = to_mdev(qp->device);
+ int err;
+
+ if (!counter->id) {
+ u32 out[MLX5_ST_SZ_DW(alloc_q_counter_out)] = {};
+ u32 in[MLX5_ST_SZ_DW(alloc_q_counter_in)] = {};
+
+ MLX5_SET(alloc_q_counter_in, in, opcode,
+ MLX5_CMD_OP_ALLOC_Q_COUNTER);
+ MLX5_SET(alloc_q_counter_in, in, uid, MLX5_SHARED_RESOURCE_UID);
+ err = mlx5_cmd_exec_inout(dev->mdev, alloc_q_counter, in, out);
+ if (err)
+ return err;
+ counter->id =
+ MLX5_GET(alloc_q_counter_out, out, counter_set_id);
+ }
+
+ err = mlx5_ib_qp_set_counter(qp, counter);
+ if (err)
+ goto fail_set_counter;
+
+ return 0;
+
+fail_set_counter:
+ mlx5_ib_counter_dealloc(counter);
+ counter->id = 0;
+
+ return err;
+}
+
+static int mlx5_ib_counter_unbind_qp(struct ib_qp *qp)
+{
+ return mlx5_ib_qp_set_counter(qp, NULL);
+}
+
+
+static void mlx5_ib_fill_counters(struct mlx5_ib_dev *dev,
+ const char **names,
+ size_t *offsets)
+{
+ int i;
+ int j = 0;
+
+ for (i = 0; i < ARRAY_SIZE(basic_q_cnts); i++, j++) {
+ names[j] = basic_q_cnts[i].name;
+ offsets[j] = basic_q_cnts[i].offset;
+ }
+
+ if (MLX5_CAP_GEN(dev->mdev, out_of_seq_cnt)) {
+ for (i = 0; i < ARRAY_SIZE(out_of_seq_q_cnts); i++, j++) {
+ names[j] = out_of_seq_q_cnts[i].name;
+ offsets[j] = out_of_seq_q_cnts[i].offset;
+ }
+ }
+
+ if (MLX5_CAP_GEN(dev->mdev, retransmission_q_counters)) {
+ for (i = 0; i < ARRAY_SIZE(retrans_q_cnts); i++, j++) {
+ names[j] = retrans_q_cnts[i].name;
+ offsets[j] = retrans_q_cnts[i].offset;
+ }
+ }
+
+ if (MLX5_CAP_GEN(dev->mdev, enhanced_error_q_counters)) {
+ for (i = 0; i < ARRAY_SIZE(extended_err_cnts); i++, j++) {
+ names[j] = extended_err_cnts[i].name;
+ offsets[j] = extended_err_cnts[i].offset;
+ }
+ }
+
+ if (MLX5_CAP_GEN(dev->mdev, roce_accl)) {
+ for (i = 0; i < ARRAY_SIZE(roce_accl_cnts); i++, j++) {
+ names[j] = roce_accl_cnts[i].name;
+ offsets[j] = roce_accl_cnts[i].offset;
+ }
+ }
+
+ if (MLX5_CAP_GEN(dev->mdev, cc_query_allowed)) {
+ for (i = 0; i < ARRAY_SIZE(cong_cnts); i++, j++) {
+ names[j] = cong_cnts[i].name;
+ offsets[j] = cong_cnts[i].offset;
+ }
+ }
+
+ if (MLX5_CAP_PCAM_FEATURE(dev->mdev, rx_icrc_encapsulated_counter)) {
+ for (i = 0; i < ARRAY_SIZE(ext_ppcnt_cnts); i++, j++) {
+ names[j] = ext_ppcnt_cnts[i].name;
+ offsets[j] = ext_ppcnt_cnts[i].offset;
+ }
+ }
+}
+
+
+static int __mlx5_ib_alloc_counters(struct mlx5_ib_dev *dev,
+ struct mlx5_ib_counters *cnts)
+{
+ u32 num_counters;
+
+ num_counters = ARRAY_SIZE(basic_q_cnts);
+
+ if (MLX5_CAP_GEN(dev->mdev, out_of_seq_cnt))
+ num_counters += ARRAY_SIZE(out_of_seq_q_cnts);
+
+ if (MLX5_CAP_GEN(dev->mdev, retransmission_q_counters))
+ num_counters += ARRAY_SIZE(retrans_q_cnts);
+
+ if (MLX5_CAP_GEN(dev->mdev, enhanced_error_q_counters))
+ num_counters += ARRAY_SIZE(extended_err_cnts);
+
+ if (MLX5_CAP_GEN(dev->mdev, roce_accl))
+ num_counters += ARRAY_SIZE(roce_accl_cnts);
+
+ cnts->num_q_counters = num_counters;
+
+ if (MLX5_CAP_GEN(dev->mdev, cc_query_allowed)) {
+ cnts->num_cong_counters = ARRAY_SIZE(cong_cnts);
+ num_counters += ARRAY_SIZE(cong_cnts);
+ }
+ if (MLX5_CAP_PCAM_FEATURE(dev->mdev, rx_icrc_encapsulated_counter)) {
+ cnts->num_ext_ppcnt_counters = ARRAY_SIZE(ext_ppcnt_cnts);
+ num_counters += ARRAY_SIZE(ext_ppcnt_cnts);
+ }
+ cnts->names = kcalloc(num_counters, sizeof(cnts->names), GFP_KERNEL);
+ if (!cnts->names)
+ return -ENOMEM;
+
+ cnts->offsets = kcalloc(num_counters,
+ sizeof(cnts->offsets), GFP_KERNEL);
+ if (!cnts->offsets)
+ goto err_names;
+
+ return 0;
+
+err_names:
+ kfree(cnts->names);
+ cnts->names = NULL;
+ return -ENOMEM;
+}
+
+static void mlx5_ib_dealloc_counters(struct mlx5_ib_dev *dev)
+{
+ u32 in[MLX5_ST_SZ_DW(dealloc_q_counter_in)] = {};
+ int num_cnt_ports;
+ int i;
+
+ num_cnt_ports = is_mdev_switchdev_mode(dev->mdev) ? 1 : dev->num_ports;
+
+ MLX5_SET(dealloc_q_counter_in, in, opcode,
+ MLX5_CMD_OP_DEALLOC_Q_COUNTER);
+
+ for (i = 0; i < num_cnt_ports; i++) {
+ if (dev->port[i].cnts.set_id) {
+ MLX5_SET(dealloc_q_counter_in, in, counter_set_id,
+ dev->port[i].cnts.set_id);
+ mlx5_cmd_exec_in(dev->mdev, dealloc_q_counter, in);
+ }
+ kfree(dev->port[i].cnts.names);
+ kfree(dev->port[i].cnts.offsets);
+ }
+}
+
+static int mlx5_ib_alloc_counters(struct mlx5_ib_dev *dev)
+{
+ u32 out[MLX5_ST_SZ_DW(alloc_q_counter_out)] = {};
+ u32 in[MLX5_ST_SZ_DW(alloc_q_counter_in)] = {};
+ int num_cnt_ports;
+ int err = 0;
+ int i;
+ bool is_shared;
+
+ MLX5_SET(alloc_q_counter_in, in, opcode, MLX5_CMD_OP_ALLOC_Q_COUNTER);
+ is_shared = MLX5_CAP_GEN(dev->mdev, log_max_uctx) != 0;
+ num_cnt_ports = is_mdev_switchdev_mode(dev->mdev) ? 1 : dev->num_ports;
+
+ for (i = 0; i < num_cnt_ports; i++) {
+ err = __mlx5_ib_alloc_counters(dev, &dev->port[i].cnts);
+ if (err)
+ goto err_alloc;
+
+ mlx5_ib_fill_counters(dev, dev->port[i].cnts.names,
+ dev->port[i].cnts.offsets);
+
+ MLX5_SET(alloc_q_counter_in, in, uid,
+ is_shared ? MLX5_SHARED_RESOURCE_UID : 0);
+
+ err = mlx5_cmd_exec_inout(dev->mdev, alloc_q_counter, in, out);
+ if (err) {
+ mlx5_ib_warn(dev,
+ "couldn't allocate queue counter for port %d, err %d\n",
+ i + 1, err);
+ goto err_alloc;
+ }
+
+ dev->port[i].cnts.set_id =
+ MLX5_GET(alloc_q_counter_out, out, counter_set_id);
+ }
+ return 0;
+
+err_alloc:
+ mlx5_ib_dealloc_counters(dev);
+ return err;
+}
+
+static int read_flow_counters(struct ib_device *ibdev,
+ struct mlx5_read_counters_attr *read_attr)
+{
+ struct mlx5_fc *fc = read_attr->hw_cntrs_hndl;
+ struct mlx5_ib_dev *dev = to_mdev(ibdev);
+
+ return mlx5_fc_query(dev->mdev, fc,
+ &read_attr->out[IB_COUNTER_PACKETS],
+ &read_attr->out[IB_COUNTER_BYTES]);
+}
+
+/* flow counters currently expose two counters packets and bytes */
+#define FLOW_COUNTERS_NUM 2
+static int counters_set_description(
+ struct ib_counters *counters, enum mlx5_ib_counters_type counters_type,
+ struct mlx5_ib_flow_counters_desc *desc_data, u32 ncounters)
+{
+ struct mlx5_ib_mcounters *mcounters = to_mcounters(counters);
+ u32 cntrs_max_index = 0;
+ int i;
+
+ if (counters_type != MLX5_IB_COUNTERS_FLOW)
+ return -EINVAL;
+
+ /* init the fields for the object */
+ mcounters->type = counters_type;
+ mcounters->read_counters = read_flow_counters;
+ mcounters->counters_num = FLOW_COUNTERS_NUM;
+ mcounters->ncounters = ncounters;
+ /* each counter entry have both description and index pair */
+ for (i = 0; i < ncounters; i++) {
+ if (desc_data[i].description > IB_COUNTER_BYTES)
+ return -EINVAL;
+
+ if (cntrs_max_index <= desc_data[i].index)
+ cntrs_max_index = desc_data[i].index + 1;
+ }
+
+ mutex_lock(&mcounters->mcntrs_mutex);
+ mcounters->counters_data = desc_data;
+ mcounters->cntrs_max_index = cntrs_max_index;
+ mutex_unlock(&mcounters->mcntrs_mutex);
+
+ return 0;
+}
+
+#define MAX_COUNTERS_NUM (USHRT_MAX / (sizeof(u32) * 2))
+int mlx5_ib_flow_counters_set_data(struct ib_counters *ibcounters,
+ struct mlx5_ib_create_flow *ucmd)
+{
+ struct mlx5_ib_mcounters *mcounters = to_mcounters(ibcounters);
+ struct mlx5_ib_flow_counters_data *cntrs_data = NULL;
+ struct mlx5_ib_flow_counters_desc *desc_data = NULL;
+ bool hw_hndl = false;
+ int ret = 0;
+
+ if (ucmd && ucmd->ncounters_data != 0) {
+ cntrs_data = ucmd->data;
+ if (cntrs_data->ncounters > MAX_COUNTERS_NUM)
+ return -EINVAL;
+
+ desc_data = kcalloc(cntrs_data->ncounters,
+ sizeof(*desc_data),
+ GFP_KERNEL);
+ if (!desc_data)
+ return -ENOMEM;
+
+ if (copy_from_user(desc_data,
+ u64_to_user_ptr(cntrs_data->counters_data),
+ sizeof(*desc_data) * cntrs_data->ncounters)) {
+ ret = -EFAULT;
+ goto free;
+ }
+ }
+
+ if (!mcounters->hw_cntrs_hndl) {
+ mcounters->hw_cntrs_hndl = mlx5_fc_create(
+ to_mdev(ibcounters->device)->mdev, false);
+ if (IS_ERR(mcounters->hw_cntrs_hndl)) {
+ ret = PTR_ERR(mcounters->hw_cntrs_hndl);
+ goto free;
+ }
+ hw_hndl = true;
+ }
+
+ if (desc_data) {
+ /* counters already bound to at least one flow */
+ if (mcounters->cntrs_max_index) {
+ ret = -EINVAL;
+ goto free_hndl;
+ }
+
+ ret = counters_set_description(ibcounters,
+ MLX5_IB_COUNTERS_FLOW,
+ desc_data,
+ cntrs_data->ncounters);
+ if (ret)
+ goto free_hndl;
+
+ } else if (!mcounters->cntrs_max_index) {
+ /* counters not bound yet, must have udata passed */
+ ret = -EINVAL;
+ goto free_hndl;
+ }
+
+ return 0;
+
+free_hndl:
+ if (hw_hndl) {
+ mlx5_fc_destroy(to_mdev(ibcounters->device)->mdev,
+ mcounters->hw_cntrs_hndl);
+ mcounters->hw_cntrs_hndl = NULL;
+ }
+free:
+ kfree(desc_data);
+ return ret;
+}
+
+void mlx5_ib_counters_clear_description(struct ib_counters *counters)
+{
+ struct mlx5_ib_mcounters *mcounters;
+
+ if (!counters || atomic_read(&counters->usecnt) != 1)
+ return;
+
+ mcounters = to_mcounters(counters);
+
+ mutex_lock(&mcounters->mcntrs_mutex);
+ kfree(mcounters->counters_data);
+ mcounters->counters_data = NULL;
+ mcounters->cntrs_max_index = 0;
+ mutex_unlock(&mcounters->mcntrs_mutex);
+}
+
+static const struct ib_device_ops hw_stats_ops = {
+ .alloc_hw_stats = mlx5_ib_alloc_hw_stats,
+ .get_hw_stats = mlx5_ib_get_hw_stats,
+ .counter_bind_qp = mlx5_ib_counter_bind_qp,
+ .counter_unbind_qp = mlx5_ib_counter_unbind_qp,
+ .counter_dealloc = mlx5_ib_counter_dealloc,
+ .counter_alloc_stats = mlx5_ib_counter_alloc_stats,
+ .counter_update_stats = mlx5_ib_counter_update_stats,
+};
+
+static const struct ib_device_ops counters_ops = {
+ .create_counters = mlx5_ib_create_counters,
+ .destroy_counters = mlx5_ib_destroy_counters,
+ .read_counters = mlx5_ib_read_counters,
+
+ INIT_RDMA_OBJ_SIZE(ib_counters, mlx5_ib_mcounters, ibcntrs),
+};
+
+int mlx5_ib_counters_init(struct mlx5_ib_dev *dev)
+{
+ ib_set_device_ops(&dev->ib_dev, &counters_ops);
+
+ if (!MLX5_CAP_GEN(dev->mdev, max_qp_cnt))
+ return 0;
+
+ ib_set_device_ops(&dev->ib_dev, &hw_stats_ops);
+ return mlx5_ib_alloc_counters(dev);
+}
+
+void mlx5_ib_counters_cleanup(struct mlx5_ib_dev *dev)
+{
+ if (!MLX5_CAP_GEN(dev->mdev, max_qp_cnt))
+ return;
+
+ mlx5_ib_dealloc_counters(dev);
+}
diff --git a/drivers/infiniband/hw/mlx5/counters.h b/drivers/infiniband/hw/mlx5/counters.h
new file mode 100644
index 000000000000..1aa30c2f3f4d
--- /dev/null
+++ b/drivers/infiniband/hw/mlx5/counters.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/*
+ * Copyright (c) 2013-2020, Mellanox Technologies inc. All rights reserved.
+ */
+
+#ifndef _MLX5_IB_COUNTERS_H
+#define _MLX5_IB_COUNTERS_H
+
+#include "mlx5_ib.h"
+
+int mlx5_ib_counters_init(struct mlx5_ib_dev *dev);
+void mlx5_ib_counters_cleanup(struct mlx5_ib_dev *dev);
+void mlx5_ib_counters_clear_description(struct ib_counters *counters);
+int mlx5_ib_flow_counters_set_data(struct ib_counters *ibcounters,
+ struct mlx5_ib_create_flow *ucmd);
+u16 mlx5_ib_get_counters_id(struct mlx5_ib_dev *dev, u8 port_num);
+#endif /* _MLX5_IB_COUNTERS_H */
diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c
index 655ea9c984e1..9e3d8b826498 100644
--- a/drivers/infiniband/hw/mlx5/devx.c
+++ b/drivers/infiniband/hw/mlx5/devx.c
@@ -14,6 +14,7 @@
#include <linux/mlx5/driver.h>
#include <linux/mlx5/fs.h>
#include "mlx5_ib.h"
+#include "devx.h"
#include "qp.h"
#include <linux/xarray.h>
@@ -89,22 +90,6 @@ struct devx_async_event_file {
u8 is_destroyed:1;
};
-#define MLX5_MAX_DESTROY_INBOX_SIZE_DW MLX5_ST_SZ_DW(delete_fte_in)
-struct devx_obj {
- struct mlx5_ib_dev *ib_dev;
- u64 obj_id;
- u32 dinlen; /* destroy inbox length */
- u32 dinbox[MLX5_MAX_DESTROY_INBOX_SIZE_DW];
- u32 flags;
- union {
- struct mlx5_ib_devx_mr devx_mr;
- struct mlx5_core_dct core_dct;
- struct mlx5_core_cq core_cq;
- u32 flow_counter_bulk_size;
- };
- struct list_head event_sub; /* holds devx_event_subscription entries */
-};
-
struct devx_umem {
struct mlx5_core_dev *mdev;
struct ib_umem *umem;
@@ -171,48 +156,6 @@ void mlx5_ib_devx_destroy(struct mlx5_ib_dev *dev, u16 uid)
mlx5_cmd_exec(dev->mdev, in, sizeof(in), out, sizeof(out));
}
-bool mlx5_ib_devx_is_flow_dest(void *obj, int *dest_id, int *dest_type)
-{
- struct devx_obj *devx_obj = obj;
- u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, devx_obj->dinbox, opcode);
-
- switch (opcode) {
- case MLX5_CMD_OP_DESTROY_TIR:
- *dest_type = MLX5_FLOW_DESTINATION_TYPE_TIR;
- *dest_id = MLX5_GET(general_obj_in_cmd_hdr, devx_obj->dinbox,
- obj_id);
- return true;
-
- case MLX5_CMD_OP_DESTROY_FLOW_TABLE:
- *dest_type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
- *dest_id = MLX5_GET(destroy_flow_table_in, devx_obj->dinbox,
- table_id);
- return true;
- default:
- return false;
- }
-}
-
-bool mlx5_ib_devx_is_flow_counter(void *obj, u32 offset, u32 *counter_id)
-{
- struct devx_obj *devx_obj = obj;
- u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, devx_obj->dinbox, opcode);
-
- if (opcode == MLX5_CMD_OP_DEALLOC_FLOW_COUNTER) {
-
- if (offset && offset >= devx_obj->flow_counter_bulk_size)
- return false;
-
- *counter_id = MLX5_GET(dealloc_flow_counter_in,
- devx_obj->dinbox,
- flow_counter_id);
- *counter_id += offset;
- return true;
- }
-
- return false;
-}
-
static bool is_legacy_unaffiliated_event_num(u16 event_num)
{
switch (event_num) {
@@ -2419,17 +2362,24 @@ static int devx_event_notifier(struct notifier_block *nb,
return NOTIFY_OK;
}
-void mlx5_ib_devx_init_event_table(struct mlx5_ib_dev *dev)
+int mlx5_ib_devx_init(struct mlx5_ib_dev *dev)
{
struct mlx5_devx_event_table *table = &dev->devx_event_table;
+ int uid;
- xa_init(&table->event_xa);
- mutex_init(&table->event_xa_lock);
- MLX5_NB_INIT(&table->devx_nb, devx_event_notifier, NOTIFY_ANY);
- mlx5_eq_notifier_register(dev->mdev, &table->devx_nb);
+ uid = mlx5_ib_devx_create(dev, false);
+ if (uid > 0) {
+ dev->devx_whitelist_uid = uid;
+ xa_init(&table->event_xa);
+ mutex_init(&table->event_xa_lock);
+ MLX5_NB_INIT(&table->devx_nb, devx_event_notifier, NOTIFY_ANY);
+ mlx5_eq_notifier_register(dev->mdev, &table->devx_nb);
+ }
+
+ return 0;
}
-void mlx5_ib_devx_cleanup_event_table(struct mlx5_ib_dev *dev)
+void mlx5_ib_devx_cleanup(struct mlx5_ib_dev *dev)
{
struct mlx5_devx_event_table *table = &dev->devx_event_table;
struct devx_event_subscription *sub, *tmp;
@@ -2437,17 +2387,21 @@ void mlx5_ib_devx_cleanup_event_table(struct mlx5_ib_dev *dev)
void *entry;
unsigned long id;
- mlx5_eq_notifier_unregister(dev->mdev, &table->devx_nb);
- mutex_lock(&dev->devx_event_table.event_xa_lock);
- xa_for_each(&table->event_xa, id, entry) {
- event = entry;
- list_for_each_entry_safe(sub, tmp, &event->unaffiliated_list,
- xa_list)
- devx_cleanup_subscription(dev, sub);
- kfree(entry);
+ if (dev->devx_whitelist_uid) {
+ mlx5_eq_notifier_unregister(dev->mdev, &table->devx_nb);
+ mutex_lock(&dev->devx_event_table.event_xa_lock);
+ xa_for_each(&table->event_xa, id, entry) {
+ event = entry;
+ list_for_each_entry_safe(
+ sub, tmp, &event->unaffiliated_list, xa_list)
+ devx_cleanup_subscription(dev, sub);
+ kfree(entry);
+ }
+ mutex_unlock(&dev->devx_event_table.event_xa_lock);
+ xa_destroy(&table->event_xa);
+
+ mlx5_ib_devx_destroy(dev, dev->devx_whitelist_uid);
}
- mutex_unlock(&dev->devx_event_table.event_xa_lock);
- xa_destroy(&table->event_xa);
}
static ssize_t devx_async_cmd_event_read(struct file *filp, char __user *buf,
diff --git a/drivers/infiniband/hw/mlx5/devx.h b/drivers/infiniband/hw/mlx5/devx.h
new file mode 100644
index 000000000000..1f69866aed16
--- /dev/null
+++ b/drivers/infiniband/hw/mlx5/devx.h
@@ -0,0 +1,45 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/*
+ * Copyright (c) 2019-2020, Mellanox Technologies inc. All rights reserved.
+ */
+
+#ifndef _MLX5_IB_DEVX_H
+#define _MLX5_IB_DEVX_H
+
+#include "mlx5_ib.h"
+
+#define MLX5_MAX_DESTROY_INBOX_SIZE_DW MLX5_ST_SZ_DW(delete_fte_in)
+struct devx_obj {
+ struct mlx5_ib_dev *ib_dev;
+ u64 obj_id;
+ u32 dinlen; /* destroy inbox length */
+ u32 dinbox[MLX5_MAX_DESTROY_INBOX_SIZE_DW];
+ u32 flags;
+ union {
+ struct mlx5_ib_devx_mr devx_mr;
+ struct mlx5_core_dct core_dct;
+ struct mlx5_core_cq core_cq;
+ u32 flow_counter_bulk_size;
+ };
+ struct list_head event_sub; /* holds devx_event_subscription entries */
+};
+#if IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS)
+int mlx5_ib_devx_create(struct mlx5_ib_dev *dev, bool is_user);
+void mlx5_ib_devx_destroy(struct mlx5_ib_dev *dev, u16 uid);
+int mlx5_ib_devx_init(struct mlx5_ib_dev *dev);
+void mlx5_ib_devx_cleanup(struct mlx5_ib_dev *dev);
+#else
+static inline int mlx5_ib_devx_create(struct mlx5_ib_dev *dev, bool is_user)
+{
+ return -EOPNOTSUPP;
+}
+static inline void mlx5_ib_devx_destroy(struct mlx5_ib_dev *dev, u16 uid) {}
+static inline int mlx5_ib_devx_init(struct mlx5_ib_dev *dev)
+{
+ return 0;
+}
+static inline void mlx5_ib_devx_cleanup(struct mlx5_ib_dev *dev)
+{
+}
+#endif
+#endif /* _MLX5_IB_DEVX_H */
diff --git a/drivers/infiniband/hw/mlx5/flow.c b/drivers/infiniband/hw/mlx5/flow.c
deleted file mode 100644
index 216a1108ad34..000000000000
--- a/drivers/infiniband/hw/mlx5/flow.c
+++ /dev/null
@@ -1,765 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
-/*
- * Copyright (c) 2018, Mellanox Technologies inc. All rights reserved.
- */
-
-#include <rdma/ib_user_verbs.h>
-#include <rdma/ib_verbs.h>
-#include <rdma/uverbs_types.h>
-#include <rdma/uverbs_ioctl.h>
-#include <rdma/uverbs_std_types.h>
-#include <rdma/mlx5_user_ioctl_cmds.h>
-#include <rdma/mlx5_user_ioctl_verbs.h>
-#include <rdma/ib_umem.h>
-#include <linux/mlx5/driver.h>
-#include <linux/mlx5/fs.h>
-#include "mlx5_ib.h"
-
-#define UVERBS_MODULE_NAME mlx5_ib
-#include <rdma/uverbs_named_ioctl.h>
-
-static int
-mlx5_ib_ft_type_to_namespace(enum mlx5_ib_uapi_flow_table_type table_type,
- enum mlx5_flow_namespace_type *namespace)
-{
- switch (table_type) {
- case MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_RX:
- *namespace = MLX5_FLOW_NAMESPACE_BYPASS;
- break;
- case MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_TX:
- *namespace = MLX5_FLOW_NAMESPACE_EGRESS;
- break;
- case MLX5_IB_UAPI_FLOW_TABLE_TYPE_FDB:
- *namespace = MLX5_FLOW_NAMESPACE_FDB;
- break;
- case MLX5_IB_UAPI_FLOW_TABLE_TYPE_RDMA_RX:
- *namespace = MLX5_FLOW_NAMESPACE_RDMA_RX;
- break;
- case MLX5_IB_UAPI_FLOW_TABLE_TYPE_RDMA_TX:
- *namespace = MLX5_FLOW_NAMESPACE_RDMA_TX;
- break;
- default:
- return -EINVAL;
- }
-
- return 0;
-}
-
-static const struct uverbs_attr_spec mlx5_ib_flow_type[] = {
- [MLX5_IB_FLOW_TYPE_NORMAL] = {
- .type = UVERBS_ATTR_TYPE_PTR_IN,
- .u.ptr = {
- .len = sizeof(u16), /* data is priority */
- .min_len = sizeof(u16),
- }
- },
- [MLX5_IB_FLOW_TYPE_SNIFFER] = {
- .type = UVERBS_ATTR_TYPE_PTR_IN,
- UVERBS_ATTR_NO_DATA(),
- },
- [MLX5_IB_FLOW_TYPE_ALL_DEFAULT] = {
- .type = UVERBS_ATTR_TYPE_PTR_IN,
- UVERBS_ATTR_NO_DATA(),
- },
- [MLX5_IB_FLOW_TYPE_MC_DEFAULT] = {
- .type = UVERBS_ATTR_TYPE_PTR_IN,
- UVERBS_ATTR_NO_DATA(),
- },
-};
-
-static int get_dests(struct uverbs_attr_bundle *attrs,
- struct mlx5_ib_flow_matcher *fs_matcher, int *dest_id,
- int *dest_type, struct ib_qp **qp, u32 *flags)
-{
- bool dest_devx, dest_qp;
- void *devx_obj;
- int err;
-
- dest_devx = uverbs_attr_is_valid(attrs,
- MLX5_IB_ATTR_CREATE_FLOW_DEST_DEVX);
- dest_qp = uverbs_attr_is_valid(attrs,
- MLX5_IB_ATTR_CREATE_FLOW_DEST_QP);
-
- *flags = 0;
- err = uverbs_get_flags32(flags, attrs, MLX5_IB_ATTR_CREATE_FLOW_FLAGS,
- MLX5_IB_ATTR_CREATE_FLOW_FLAGS_DEFAULT_MISS |
- MLX5_IB_ATTR_CREATE_FLOW_FLAGS_DROP);
- if (err)
- return err;
-
- /* Both flags are not allowed */
- if (*flags & MLX5_IB_ATTR_CREATE_FLOW_FLAGS_DEFAULT_MISS &&
- *flags & MLX5_IB_ATTR_CREATE_FLOW_FLAGS_DROP)
- return -EINVAL;
-
- if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_BYPASS) {
- if (dest_devx && (dest_qp || *flags))
- return -EINVAL;
- else if (dest_qp && *flags)
- return -EINVAL;
- }
-
- /* Allow only DEVX object, drop as dest for FDB */
- if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_FDB && !(dest_devx ||
- (*flags & MLX5_IB_ATTR_CREATE_FLOW_FLAGS_DROP)))
- return -EINVAL;
-
- /* Allow only DEVX object or QP as dest when inserting to RDMA_RX */
- if ((fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_RX) &&
- ((!dest_devx && !dest_qp) || (dest_devx && dest_qp)))
- return -EINVAL;
-
- *qp = NULL;
- if (dest_devx) {
- devx_obj =
- uverbs_attr_get_obj(attrs,
- MLX5_IB_ATTR_CREATE_FLOW_DEST_DEVX);
-
- /* Verify that the given DEVX object is a flow
- * steering destination.
- */
- if (!mlx5_ib_devx_is_flow_dest(devx_obj, dest_id, dest_type))
- return -EINVAL;
- /* Allow only flow table as dest when inserting to FDB or RDMA_RX */
- if ((fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_FDB ||
- fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_RX) &&
- *dest_type != MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE)
- return -EINVAL;
- } else if (dest_qp) {
- struct mlx5_ib_qp *mqp;
-
- *qp = uverbs_attr_get_obj(attrs,
- MLX5_IB_ATTR_CREATE_FLOW_DEST_QP);
- if (IS_ERR(*qp))
- return PTR_ERR(*qp);
-
- if ((*qp)->qp_type != IB_QPT_RAW_PACKET)
- return -EINVAL;
-
- mqp = to_mqp(*qp);
- if (mqp->is_rss)
- *dest_id = mqp->rss_qp.tirn;
- else
- *dest_id = mqp->raw_packet_qp.rq.tirn;
- *dest_type = MLX5_FLOW_DESTINATION_TYPE_TIR;
- } else if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_EGRESS) {
- *dest_type = MLX5_FLOW_DESTINATION_TYPE_PORT;
- }
-
- if (*dest_type == MLX5_FLOW_DESTINATION_TYPE_TIR &&
- fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_EGRESS)
- return -EINVAL;
-
- return 0;
-}
-
-#define MLX5_IB_CREATE_FLOW_MAX_FLOW_ACTIONS 2
-static int UVERBS_HANDLER(MLX5_IB_METHOD_CREATE_FLOW)(
- struct uverbs_attr_bundle *attrs)
-{
- struct mlx5_flow_context flow_context = {.flow_tag =
- MLX5_FS_DEFAULT_FLOW_TAG};
- u32 *offset_attr, offset = 0, counter_id = 0;
- int dest_id, dest_type, inlen, len, ret, i;
- struct mlx5_ib_flow_handler *flow_handler;
- struct mlx5_ib_flow_matcher *fs_matcher;
- struct ib_uobject **arr_flow_actions;
- struct ib_uflow_resources *uflow_res;
- struct mlx5_flow_act flow_act = {};
- struct ib_qp *qp = NULL;
- void *devx_obj, *cmd_in;
- struct ib_uobject *uobj;
- struct mlx5_ib_dev *dev;
- u32 flags;
-
- if (!capable(CAP_NET_RAW))
- return -EPERM;
-
- fs_matcher = uverbs_attr_get_obj(attrs,
- MLX5_IB_ATTR_CREATE_FLOW_MATCHER);
- uobj = uverbs_attr_get_uobject(attrs, MLX5_IB_ATTR_CREATE_FLOW_HANDLE);
- dev = mlx5_udata_to_mdev(&attrs->driver_udata);
-
- if (get_dests(attrs, fs_matcher, &dest_id, &dest_type, &qp, &flags))
- return -EINVAL;
-
- if (flags & MLX5_IB_ATTR_CREATE_FLOW_FLAGS_DEFAULT_MISS)
- flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_NS;
-
- if (flags & MLX5_IB_ATTR_CREATE_FLOW_FLAGS_DROP)
- flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_DROP;
-
- len = uverbs_attr_get_uobjs_arr(attrs,
- MLX5_IB_ATTR_CREATE_FLOW_ARR_COUNTERS_DEVX, &arr_flow_actions);
- if (len) {
- devx_obj = arr_flow_actions[0]->object;
-
- if (uverbs_attr_is_valid(attrs,
- MLX5_IB_ATTR_CREATE_FLOW_ARR_COUNTERS_DEVX_OFFSET)) {
-
- int num_offsets = uverbs_attr_ptr_get_array_size(
- attrs,
- MLX5_IB_ATTR_CREATE_FLOW_ARR_COUNTERS_DEVX_OFFSET,
- sizeof(u32));
-
- if (num_offsets != 1)
- return -EINVAL;
-
- offset_attr = uverbs_attr_get_alloced_ptr(
- attrs,
- MLX5_IB_ATTR_CREATE_FLOW_ARR_COUNTERS_DEVX_OFFSET);
- offset = *offset_attr;
- }
-
- if (!mlx5_ib_devx_is_flow_counter(devx_obj, offset,
- &counter_id))
- return -EINVAL;
-
- flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_COUNT;
- }
-
- cmd_in = uverbs_attr_get_alloced_ptr(
- attrs, MLX5_IB_ATTR_CREATE_FLOW_MATCH_VALUE);
- inlen = uverbs_attr_get_len(attrs,
- MLX5_IB_ATTR_CREATE_FLOW_MATCH_VALUE);
-
- uflow_res = flow_resources_alloc(MLX5_IB_CREATE_FLOW_MAX_FLOW_ACTIONS);
- if (!uflow_res)
- return -ENOMEM;
-
- len = uverbs_attr_get_uobjs_arr(attrs,
- MLX5_IB_ATTR_CREATE_FLOW_ARR_FLOW_ACTIONS, &arr_flow_actions);
- for (i = 0; i < len; i++) {
- struct mlx5_ib_flow_action *maction =
- to_mflow_act(arr_flow_actions[i]->object);
-
- ret = parse_flow_flow_action(maction, false, &flow_act);
- if (ret)
- goto err_out;
- flow_resources_add(uflow_res, IB_FLOW_SPEC_ACTION_HANDLE,
- arr_flow_actions[i]->object);
- }
-
- ret = uverbs_copy_from(&flow_context.flow_tag, attrs,
- MLX5_IB_ATTR_CREATE_FLOW_TAG);
- if (!ret) {
- if (flow_context.flow_tag >= BIT(24)) {
- ret = -EINVAL;
- goto err_out;
- }
- flow_context.flags |= FLOW_CONTEXT_HAS_TAG;
- }
-
- flow_handler = mlx5_ib_raw_fs_rule_add(dev, fs_matcher,
- &flow_context,
- &flow_act,
- counter_id,
- cmd_in, inlen,
- dest_id, dest_type);
- if (IS_ERR(flow_handler)) {
- ret = PTR_ERR(flow_handler);
- goto err_out;
- }
-
- ib_set_flow(uobj, &flow_handler->ibflow, qp, &dev->ib_dev, uflow_res);
-
- return 0;
-err_out:
- ib_uverbs_flow_resources_free(uflow_res);
- return ret;
-}
-
-static int flow_matcher_cleanup(struct ib_uobject *uobject,
- enum rdma_remove_reason why,
- struct uverbs_attr_bundle *attrs)
-{
- struct mlx5_ib_flow_matcher *obj = uobject->object;
- int ret;
-
- ret = ib_destroy_usecnt(&obj->usecnt, why, uobject);
- if (ret)
- return ret;
-
- kfree(obj);
- return 0;
-}
-
-static int mlx5_ib_matcher_ns(struct uverbs_attr_bundle *attrs,
- struct mlx5_ib_flow_matcher *obj)
-{
- enum mlx5_ib_uapi_flow_table_type ft_type =
- MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_RX;
- u32 flags;
- int err;
-
- /* New users should use MLX5_IB_ATTR_FLOW_MATCHER_FT_TYPE and older
- * users should switch to it. We leave this to not break userspace
- */
- if (uverbs_attr_is_valid(attrs, MLX5_IB_ATTR_FLOW_MATCHER_FT_TYPE) &&
- uverbs_attr_is_valid(attrs, MLX5_IB_ATTR_FLOW_MATCHER_FLOW_FLAGS))
- return -EINVAL;
-
- if (uverbs_attr_is_valid(attrs, MLX5_IB_ATTR_FLOW_MATCHER_FT_TYPE)) {
- err = uverbs_get_const(&ft_type, attrs,
- MLX5_IB_ATTR_FLOW_MATCHER_FT_TYPE);
- if (err)
- return err;
-
- err = mlx5_ib_ft_type_to_namespace(ft_type, &obj->ns_type);
- if (err)
- return err;
-
- return 0;
- }
-
- if (uverbs_attr_is_valid(attrs, MLX5_IB_ATTR_FLOW_MATCHER_FLOW_FLAGS)) {
- err = uverbs_get_flags32(&flags, attrs,
- MLX5_IB_ATTR_FLOW_MATCHER_FLOW_FLAGS,
- IB_FLOW_ATTR_FLAGS_EGRESS);
- if (err)
- return err;
-
- if (flags) {
- mlx5_ib_ft_type_to_namespace(
- MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_TX,
- &obj->ns_type);
- return 0;
- }
- }
-
- obj->ns_type = MLX5_FLOW_NAMESPACE_BYPASS;
-
- return 0;
-}
-
-static int UVERBS_HANDLER(MLX5_IB_METHOD_FLOW_MATCHER_CREATE)(
- struct uverbs_attr_bundle *attrs)
-{
- struct ib_uobject *uobj = uverbs_attr_get_uobject(
- attrs, MLX5_IB_ATTR_FLOW_MATCHER_CREATE_HANDLE);
- struct mlx5_ib_dev *dev = mlx5_udata_to_mdev(&attrs->driver_udata);
- struct mlx5_ib_flow_matcher *obj;
- int err;
-
- obj = kzalloc(sizeof(struct mlx5_ib_flow_matcher), GFP_KERNEL);
- if (!obj)
- return -ENOMEM;
-
- obj->mask_len = uverbs_attr_get_len(
- attrs, MLX5_IB_ATTR_FLOW_MATCHER_MATCH_MASK);
- err = uverbs_copy_from(&obj->matcher_mask,
- attrs,
- MLX5_IB_ATTR_FLOW_MATCHER_MATCH_MASK);
- if (err)
- goto end;
-
- obj->flow_type = uverbs_attr_get_enum_id(
- attrs, MLX5_IB_ATTR_FLOW_MATCHER_FLOW_TYPE);
-
- if (obj->flow_type == MLX5_IB_FLOW_TYPE_NORMAL) {
- err = uverbs_copy_from(&obj->priority,
- attrs,
- MLX5_IB_ATTR_FLOW_MATCHER_FLOW_TYPE);
- if (err)
- goto end;
- }
-
- err = uverbs_copy_from(&obj->match_criteria_enable,
- attrs,
- MLX5_IB_ATTR_FLOW_MATCHER_MATCH_CRITERIA);
- if (err)
- goto end;
-
- err = mlx5_ib_matcher_ns(attrs, obj);
- if (err)
- goto end;
-
- uobj->object = obj;
- obj->mdev = dev->mdev;
- atomic_set(&obj->usecnt, 0);
- return 0;
-
-end:
- kfree(obj);
- return err;
-}
-
-void mlx5_ib_destroy_flow_action_raw(struct mlx5_ib_flow_action *maction)
-{
- switch (maction->flow_action_raw.sub_type) {
- case MLX5_IB_FLOW_ACTION_MODIFY_HEADER:
- mlx5_modify_header_dealloc(maction->flow_action_raw.dev->mdev,
- maction->flow_action_raw.modify_hdr);
- break;
- case MLX5_IB_FLOW_ACTION_PACKET_REFORMAT:
- mlx5_packet_reformat_dealloc(maction->flow_action_raw.dev->mdev,
- maction->flow_action_raw.pkt_reformat);
- break;
- case MLX5_IB_FLOW_ACTION_DECAP:
- break;
- default:
- break;
- }
-}
-
-static struct ib_flow_action *
-mlx5_ib_create_modify_header(struct mlx5_ib_dev *dev,
- enum mlx5_ib_uapi_flow_table_type ft_type,
- u8 num_actions, void *in)
-{
- enum mlx5_flow_namespace_type namespace;
- struct mlx5_ib_flow_action *maction;
- int ret;
-
- ret = mlx5_ib_ft_type_to_namespace(ft_type, &namespace);
- if (ret)
- return ERR_PTR(-EINVAL);
-
- maction = kzalloc(sizeof(*maction), GFP_KERNEL);
- if (!maction)
- return ERR_PTR(-ENOMEM);
-
- maction->flow_action_raw.modify_hdr =
- mlx5_modify_header_alloc(dev->mdev, namespace, num_actions, in);
-
- if (IS_ERR(maction->flow_action_raw.modify_hdr)) {
- ret = PTR_ERR(maction->flow_action_raw.modify_hdr);
- kfree(maction);
- return ERR_PTR(ret);
- }
- maction->flow_action_raw.sub_type =
- MLX5_IB_FLOW_ACTION_MODIFY_HEADER;
- maction->flow_action_raw.dev = dev;
-
- return &maction->ib_action;
-}
-
-static bool mlx5_ib_modify_header_supported(struct mlx5_ib_dev *dev)
-{
- return MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev,
- max_modify_header_actions) ||
- MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev,
- max_modify_header_actions) ||
- MLX5_CAP_FLOWTABLE_RDMA_TX(dev->mdev,
- max_modify_header_actions);
-}
-
-static int UVERBS_HANDLER(MLX5_IB_METHOD_FLOW_ACTION_CREATE_MODIFY_HEADER)(
- struct uverbs_attr_bundle *attrs)
-{
- struct ib_uobject *uobj = uverbs_attr_get_uobject(
- attrs, MLX5_IB_ATTR_CREATE_MODIFY_HEADER_HANDLE);
- struct mlx5_ib_dev *mdev = mlx5_udata_to_mdev(&attrs->driver_udata);
- enum mlx5_ib_uapi_flow_table_type ft_type;
- struct ib_flow_action *action;
- int num_actions;
- void *in;
- int ret;
-
- if (!mlx5_ib_modify_header_supported(mdev))
- return -EOPNOTSUPP;
-
- in = uverbs_attr_get_alloced_ptr(attrs,
- MLX5_IB_ATTR_CREATE_MODIFY_HEADER_ACTIONS_PRM);
-
- num_actions = uverbs_attr_ptr_get_array_size(
- attrs, MLX5_IB_ATTR_CREATE_MODIFY_HEADER_ACTIONS_PRM,
- MLX5_UN_SZ_BYTES(set_add_copy_action_in_auto));
- if (num_actions < 0)
- return num_actions;
-
- ret = uverbs_get_const(&ft_type, attrs,
- MLX5_IB_ATTR_CREATE_MODIFY_HEADER_FT_TYPE);
- if (ret)
- return ret;
- action = mlx5_ib_create_modify_header(mdev, ft_type, num_actions, in);
- if (IS_ERR(action))
- return PTR_ERR(action);
-
- uverbs_flow_action_fill_action(action, uobj, &mdev->ib_dev,
- IB_FLOW_ACTION_UNSPECIFIED);
-
- return 0;
-}
-
-static bool mlx5_ib_flow_action_packet_reformat_valid(struct mlx5_ib_dev *ibdev,
- u8 packet_reformat_type,
- u8 ft_type)
-{
- switch (packet_reformat_type) {
- case MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TO_L2_TUNNEL:
- if (ft_type == MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_TX)
- return MLX5_CAP_FLOWTABLE(ibdev->mdev,
- encap_general_header);
- break;
- case MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TO_L3_TUNNEL:
- if (ft_type == MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_TX)
- return MLX5_CAP_FLOWTABLE_NIC_TX(ibdev->mdev,
- reformat_l2_to_l3_tunnel);
- break;
- case MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L3_TUNNEL_TO_L2:
- if (ft_type == MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_RX)
- return MLX5_CAP_FLOWTABLE_NIC_RX(ibdev->mdev,
- reformat_l3_tunnel_to_l2);
- break;
- case MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TUNNEL_TO_L2:
- if (ft_type == MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_RX)
- return MLX5_CAP_FLOWTABLE_NIC_RX(ibdev->mdev, decap);
- break;
- default:
- break;
- }
-
- return false;
-}
-
-static int mlx5_ib_dv_to_prm_packet_reforamt_type(u8 dv_prt, u8 *prm_prt)
-{
- switch (dv_prt) {
- case MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TO_L2_TUNNEL:
- *prm_prt = MLX5_REFORMAT_TYPE_L2_TO_L2_TUNNEL;
- break;
- case MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L3_TUNNEL_TO_L2:
- *prm_prt = MLX5_REFORMAT_TYPE_L3_TUNNEL_TO_L2;
- break;
- case MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TO_L3_TUNNEL:
- *prm_prt = MLX5_REFORMAT_TYPE_L2_TO_L3_TUNNEL;
- break;
- default:
- return -EINVAL;
- }
-
- return 0;
-}
-
-static int mlx5_ib_flow_action_create_packet_reformat_ctx(
- struct mlx5_ib_dev *dev,
- struct mlx5_ib_flow_action *maction,
- u8 ft_type, u8 dv_prt,
- void *in, size_t len)
-{
- enum mlx5_flow_namespace_type namespace;
- u8 prm_prt;
- int ret;
-
- ret = mlx5_ib_ft_type_to_namespace(ft_type, &namespace);
- if (ret)
- return ret;
-
- ret = mlx5_ib_dv_to_prm_packet_reforamt_type(dv_prt, &prm_prt);
- if (ret)
- return ret;
-
- maction->flow_action_raw.pkt_reformat =
- mlx5_packet_reformat_alloc(dev->mdev, prm_prt, len,
- in, namespace);
- if (IS_ERR(maction->flow_action_raw.pkt_reformat)) {
- ret = PTR_ERR(maction->flow_action_raw.pkt_reformat);
- return ret;
- }
-
- maction->flow_action_raw.sub_type =
- MLX5_IB_FLOW_ACTION_PACKET_REFORMAT;
- maction->flow_action_raw.dev = dev;
-
- return 0;
-}
-
-static int UVERBS_HANDLER(MLX5_IB_METHOD_FLOW_ACTION_CREATE_PACKET_REFORMAT)(
- struct uverbs_attr_bundle *attrs)
-{
- struct ib_uobject *uobj = uverbs_attr_get_uobject(attrs,
- MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_HANDLE);
- struct mlx5_ib_dev *mdev = mlx5_udata_to_mdev(&attrs->driver_udata);
- enum mlx5_ib_uapi_flow_action_packet_reformat_type dv_prt;
- enum mlx5_ib_uapi_flow_table_type ft_type;
- struct mlx5_ib_flow_action *maction;
- int ret;
-
- ret = uverbs_get_const(&ft_type, attrs,
- MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_FT_TYPE);
- if (ret)
- return ret;
-
- ret = uverbs_get_const(&dv_prt, attrs,
- MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_TYPE);
- if (ret)
- return ret;
-
- if (!mlx5_ib_flow_action_packet_reformat_valid(mdev, dv_prt, ft_type))
- return -EOPNOTSUPP;
-
- maction = kzalloc(sizeof(*maction), GFP_KERNEL);
- if (!maction)
- return -ENOMEM;
-
- if (dv_prt ==
- MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TUNNEL_TO_L2) {
- maction->flow_action_raw.sub_type =
- MLX5_IB_FLOW_ACTION_DECAP;
- maction->flow_action_raw.dev = mdev;
- } else {
- void *in;
- int len;
-
- in = uverbs_attr_get_alloced_ptr(attrs,
- MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_DATA_BUF);
- if (IS_ERR(in)) {
- ret = PTR_ERR(in);
- goto free_maction;
- }
-
- len = uverbs_attr_get_len(attrs,
- MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_DATA_BUF);
-
- ret = mlx5_ib_flow_action_create_packet_reformat_ctx(mdev,
- maction, ft_type, dv_prt, in, len);
- if (ret)
- goto free_maction;
- }
-
- uverbs_flow_action_fill_action(&maction->ib_action, uobj, &mdev->ib_dev,
- IB_FLOW_ACTION_UNSPECIFIED);
- return 0;
-
-free_maction:
- kfree(maction);
- return ret;
-}
-
-DECLARE_UVERBS_NAMED_METHOD(
- MLX5_IB_METHOD_CREATE_FLOW,
- UVERBS_ATTR_IDR(MLX5_IB_ATTR_CREATE_FLOW_HANDLE,
- UVERBS_OBJECT_FLOW,
- UVERBS_ACCESS_NEW,
- UA_MANDATORY),
- UVERBS_ATTR_PTR_IN(
- MLX5_IB_ATTR_CREATE_FLOW_MATCH_VALUE,
- UVERBS_ATTR_SIZE(1, sizeof(struct mlx5_ib_match_params)),
- UA_MANDATORY,
- UA_ALLOC_AND_COPY),
- UVERBS_ATTR_IDR(MLX5_IB_ATTR_CREATE_FLOW_MATCHER,
- MLX5_IB_OBJECT_FLOW_MATCHER,
- UVERBS_ACCESS_READ,
- UA_MANDATORY),
- UVERBS_ATTR_IDR(MLX5_IB_ATTR_CREATE_FLOW_DEST_QP,
- UVERBS_OBJECT_QP,
- UVERBS_ACCESS_READ),
- UVERBS_ATTR_IDR(MLX5_IB_ATTR_CREATE_FLOW_DEST_DEVX,
- MLX5_IB_OBJECT_DEVX_OBJ,
- UVERBS_ACCESS_READ),
- UVERBS_ATTR_IDRS_ARR(MLX5_IB_ATTR_CREATE_FLOW_ARR_FLOW_ACTIONS,
- UVERBS_OBJECT_FLOW_ACTION,
- UVERBS_ACCESS_READ, 1,
- MLX5_IB_CREATE_FLOW_MAX_FLOW_ACTIONS,
- UA_OPTIONAL),
- UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_CREATE_FLOW_TAG,
- UVERBS_ATTR_TYPE(u32),
- UA_OPTIONAL),
- UVERBS_ATTR_IDRS_ARR(MLX5_IB_ATTR_CREATE_FLOW_ARR_COUNTERS_DEVX,
- MLX5_IB_OBJECT_DEVX_OBJ,
- UVERBS_ACCESS_READ, 1, 1,
- UA_OPTIONAL),
- UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_CREATE_FLOW_ARR_COUNTERS_DEVX_OFFSET,
- UVERBS_ATTR_MIN_SIZE(sizeof(u32)),
- UA_OPTIONAL,
- UA_ALLOC_AND_COPY),
- UVERBS_ATTR_FLAGS_IN(MLX5_IB_ATTR_CREATE_FLOW_FLAGS,
- enum mlx5_ib_create_flow_flags,
- UA_OPTIONAL));
-
-DECLARE_UVERBS_NAMED_METHOD_DESTROY(
- MLX5_IB_METHOD_DESTROY_FLOW,
- UVERBS_ATTR_IDR(MLX5_IB_ATTR_CREATE_FLOW_HANDLE,
- UVERBS_OBJECT_FLOW,
- UVERBS_ACCESS_DESTROY,
- UA_MANDATORY));
-
-ADD_UVERBS_METHODS(mlx5_ib_fs,
- UVERBS_OBJECT_FLOW,
- &UVERBS_METHOD(MLX5_IB_METHOD_CREATE_FLOW),
- &UVERBS_METHOD(MLX5_IB_METHOD_DESTROY_FLOW));
-
-DECLARE_UVERBS_NAMED_METHOD(
- MLX5_IB_METHOD_FLOW_ACTION_CREATE_MODIFY_HEADER,
- UVERBS_ATTR_IDR(MLX5_IB_ATTR_CREATE_MODIFY_HEADER_HANDLE,
- UVERBS_OBJECT_FLOW_ACTION,
- UVERBS_ACCESS_NEW,
- UA_MANDATORY),
- UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_CREATE_MODIFY_HEADER_ACTIONS_PRM,
- UVERBS_ATTR_MIN_SIZE(MLX5_UN_SZ_BYTES(
- set_add_copy_action_in_auto)),
- UA_MANDATORY,
- UA_ALLOC_AND_COPY),
- UVERBS_ATTR_CONST_IN(MLX5_IB_ATTR_CREATE_MODIFY_HEADER_FT_TYPE,
- enum mlx5_ib_uapi_flow_table_type,
- UA_MANDATORY));
-
-DECLARE_UVERBS_NAMED_METHOD(
- MLX5_IB_METHOD_FLOW_ACTION_CREATE_PACKET_REFORMAT,
- UVERBS_ATTR_IDR(MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_HANDLE,
- UVERBS_OBJECT_FLOW_ACTION,
- UVERBS_ACCESS_NEW,
- UA_MANDATORY),
- UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_DATA_BUF,
- UVERBS_ATTR_MIN_SIZE(1),
- UA_ALLOC_AND_COPY,
- UA_OPTIONAL),
- UVERBS_ATTR_CONST_IN(MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_TYPE,
- enum mlx5_ib_uapi_flow_action_packet_reformat_type,
- UA_MANDATORY),
- UVERBS_ATTR_CONST_IN(MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_FT_TYPE,
- enum mlx5_ib_uapi_flow_table_type,
- UA_MANDATORY));
-
-ADD_UVERBS_METHODS(
- mlx5_ib_flow_actions,
- UVERBS_OBJECT_FLOW_ACTION,
- &UVERBS_METHOD(MLX5_IB_METHOD_FLOW_ACTION_CREATE_MODIFY_HEADER),
- &UVERBS_METHOD(MLX5_IB_METHOD_FLOW_ACTION_CREATE_PACKET_REFORMAT));
-
-DECLARE_UVERBS_NAMED_METHOD(
- MLX5_IB_METHOD_FLOW_MATCHER_CREATE,
- UVERBS_ATTR_IDR(MLX5_IB_ATTR_FLOW_MATCHER_CREATE_HANDLE,
- MLX5_IB_OBJECT_FLOW_MATCHER,
- UVERBS_ACCESS_NEW,
- UA_MANDATORY),
- UVERBS_ATTR_PTR_IN(
- MLX5_IB_ATTR_FLOW_MATCHER_MATCH_MASK,
- UVERBS_ATTR_SIZE(1, sizeof(struct mlx5_ib_match_params)),
- UA_MANDATORY),
- UVERBS_ATTR_ENUM_IN(MLX5_IB_ATTR_FLOW_MATCHER_FLOW_TYPE,
- mlx5_ib_flow_type,
- UA_MANDATORY),
- UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_FLOW_MATCHER_MATCH_CRITERIA,
- UVERBS_ATTR_TYPE(u8),
- UA_MANDATORY),
- UVERBS_ATTR_FLAGS_IN(MLX5_IB_ATTR_FLOW_MATCHER_FLOW_FLAGS,
- enum ib_flow_flags,
- UA_OPTIONAL),
- UVERBS_ATTR_CONST_IN(MLX5_IB_ATTR_FLOW_MATCHER_FT_TYPE,
- enum mlx5_ib_uapi_flow_table_type,
- UA_OPTIONAL));
-
-DECLARE_UVERBS_NAMED_METHOD_DESTROY(
- MLX5_IB_METHOD_FLOW_MATCHER_DESTROY,
- UVERBS_ATTR_IDR(MLX5_IB_ATTR_FLOW_MATCHER_DESTROY_HANDLE,
- MLX5_IB_OBJECT_FLOW_MATCHER,
- UVERBS_ACCESS_DESTROY,
- UA_MANDATORY));
-
-DECLARE_UVERBS_NAMED_OBJECT(MLX5_IB_OBJECT_FLOW_MATCHER,
- UVERBS_TYPE_ALLOC_IDR(flow_matcher_cleanup),
- &UVERBS_METHOD(MLX5_IB_METHOD_FLOW_MATCHER_CREATE),
- &UVERBS_METHOD(MLX5_IB_METHOD_FLOW_MATCHER_DESTROY));
-
-const struct uapi_definition mlx5_ib_flow_defs[] = {
- UAPI_DEF_CHAIN_OBJ_TREE_NAMED(
- MLX5_IB_OBJECT_FLOW_MATCHER),
- UAPI_DEF_CHAIN_OBJ_TREE(
- UVERBS_OBJECT_FLOW,
- &mlx5_ib_fs),
- UAPI_DEF_CHAIN_OBJ_TREE(UVERBS_OBJECT_FLOW_ACTION,
- &mlx5_ib_flow_actions),
- {},
-};
diff --git a/drivers/infiniband/hw/mlx5/fs.c b/drivers/infiniband/hw/mlx5/fs.c
new file mode 100644
index 000000000000..e9cfb9a2ef41
--- /dev/null
+++ b/drivers/infiniband/hw/mlx5/fs.c
@@ -0,0 +1,2516 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/*
+ * Copyright (c) 2018, Mellanox Technologies inc. All rights reserved.
+ */
+
+#include <rdma/ib_user_verbs.h>
+#include <rdma/ib_verbs.h>
+#include <rdma/uverbs_types.h>
+#include <rdma/uverbs_ioctl.h>
+#include <rdma/uverbs_std_types.h>
+#include <rdma/mlx5_user_ioctl_cmds.h>
+#include <rdma/mlx5_user_ioctl_verbs.h>
+#include <rdma/ib_umem.h>
+#include <linux/mlx5/driver.h>
+#include <linux/mlx5/fs.h>
+#include <linux/mlx5/fs_helpers.h>
+#include <linux/mlx5/accel.h>
+#include <linux/mlx5/eswitch.h>
+#include "mlx5_ib.h"
+#include "counters.h"
+#include "devx.h"
+#include "fs.h"
+
+#define UVERBS_MODULE_NAME mlx5_ib
+#include <rdma/uverbs_named_ioctl.h>
+
+enum {
+ MATCH_CRITERIA_ENABLE_OUTER_BIT,
+ MATCH_CRITERIA_ENABLE_MISC_BIT,
+ MATCH_CRITERIA_ENABLE_INNER_BIT,
+ MATCH_CRITERIA_ENABLE_MISC2_BIT
+};
+
+#define HEADER_IS_ZERO(match_criteria, headers) \
+ !(memchr_inv(MLX5_ADDR_OF(fte_match_param, match_criteria, headers), \
+ 0, MLX5_FLD_SZ_BYTES(fte_match_param, headers))) \
+
+static u8 get_match_criteria_enable(u32 *match_criteria)
+{
+ u8 match_criteria_enable;
+
+ match_criteria_enable =
+ (!HEADER_IS_ZERO(match_criteria, outer_headers)) <<
+ MATCH_CRITERIA_ENABLE_OUTER_BIT;
+ match_criteria_enable |=
+ (!HEADER_IS_ZERO(match_criteria, misc_parameters)) <<
+ MATCH_CRITERIA_ENABLE_MISC_BIT;
+ match_criteria_enable |=
+ (!HEADER_IS_ZERO(match_criteria, inner_headers)) <<
+ MATCH_CRITERIA_ENABLE_INNER_BIT;
+ match_criteria_enable |=
+ (!HEADER_IS_ZERO(match_criteria, misc_parameters_2)) <<
+ MATCH_CRITERIA_ENABLE_MISC2_BIT;
+
+ return match_criteria_enable;
+}
+
+static int set_proto(void *outer_c, void *outer_v, u8 mask, u8 val)
+{
+ u8 entry_mask;
+ u8 entry_val;
+ int err = 0;
+
+ if (!mask)
+ goto out;
+
+ entry_mask = MLX5_GET(fte_match_set_lyr_2_4, outer_c,
+ ip_protocol);
+ entry_val = MLX5_GET(fte_match_set_lyr_2_4, outer_v,
+ ip_protocol);
+ if (!entry_mask) {
+ MLX5_SET(fte_match_set_lyr_2_4, outer_c, ip_protocol, mask);
+ MLX5_SET(fte_match_set_lyr_2_4, outer_v, ip_protocol, val);
+ goto out;
+ }
+ /* Don't override existing ip protocol */
+ if (mask != entry_mask || val != entry_val)
+ err = -EINVAL;
+out:
+ return err;
+}
+
+static void set_flow_label(void *misc_c, void *misc_v, u32 mask, u32 val,
+ bool inner)
+{
+ if (inner) {
+ MLX5_SET(fte_match_set_misc,
+ misc_c, inner_ipv6_flow_label, mask);
+ MLX5_SET(fte_match_set_misc,
+ misc_v, inner_ipv6_flow_label, val);
+ } else {
+ MLX5_SET(fte_match_set_misc,
+ misc_c, outer_ipv6_flow_label, mask);
+ MLX5_SET(fte_match_set_misc,
+ misc_v, outer_ipv6_flow_label, val);
+ }
+}
+
+static void set_tos(void *outer_c, void *outer_v, u8 mask, u8 val)
+{
+ MLX5_SET(fte_match_set_lyr_2_4, outer_c, ip_ecn, mask);
+ MLX5_SET(fte_match_set_lyr_2_4, outer_v, ip_ecn, val);
+ MLX5_SET(fte_match_set_lyr_2_4, outer_c, ip_dscp, mask >> 2);
+ MLX5_SET(fte_match_set_lyr_2_4, outer_v, ip_dscp, val >> 2);
+}
+
+static int check_mpls_supp_fields(u32 field_support, const __be32 *set_mask)
+{
+ if (MLX5_GET(fte_match_mpls, set_mask, mpls_label) &&
+ !(field_support & MLX5_FIELD_SUPPORT_MPLS_LABEL))
+ return -EOPNOTSUPP;
+
+ if (MLX5_GET(fte_match_mpls, set_mask, mpls_exp) &&
+ !(field_support & MLX5_FIELD_SUPPORT_MPLS_EXP))
+ return -EOPNOTSUPP;
+
+ if (MLX5_GET(fte_match_mpls, set_mask, mpls_s_bos) &&
+ !(field_support & MLX5_FIELD_SUPPORT_MPLS_S_BOS))
+ return -EOPNOTSUPP;
+
+ if (MLX5_GET(fte_match_mpls, set_mask, mpls_ttl) &&
+ !(field_support & MLX5_FIELD_SUPPORT_MPLS_TTL))
+ return -EOPNOTSUPP;
+
+ return 0;
+}
+
+#define LAST_ETH_FIELD vlan_tag
+#define LAST_IB_FIELD sl
+#define LAST_IPV4_FIELD tos
+#define LAST_IPV6_FIELD traffic_class
+#define LAST_TCP_UDP_FIELD src_port
+#define LAST_TUNNEL_FIELD tunnel_id
+#define LAST_FLOW_TAG_FIELD tag_id
+#define LAST_DROP_FIELD size
+#define LAST_COUNTERS_FIELD counters
+
+/* Field is the last supported field */
+#define FIELDS_NOT_SUPPORTED(filter, field)\
+ memchr_inv((void *)&filter.field +\
+ sizeof(filter.field), 0,\
+ sizeof(filter) -\
+ offsetof(typeof(filter), field) -\
+ sizeof(filter.field))
+
+int parse_flow_flow_action(struct mlx5_ib_flow_action *maction,
+ bool is_egress,
+ struct mlx5_flow_act *action)
+{
+
+ switch (maction->ib_action.type) {
+ case IB_FLOW_ACTION_ESP:
+ if (action->action & (MLX5_FLOW_CONTEXT_ACTION_ENCRYPT |
+ MLX5_FLOW_CONTEXT_ACTION_DECRYPT))
+ return -EINVAL;
+ /* Currently only AES_GCM keymat is supported by the driver */
+ action->esp_id = (uintptr_t)maction->esp_aes_gcm.ctx;
+ action->action |= is_egress ?
+ MLX5_FLOW_CONTEXT_ACTION_ENCRYPT :
+ MLX5_FLOW_CONTEXT_ACTION_DECRYPT;
+ return 0;
+ case IB_FLOW_ACTION_UNSPECIFIED:
+ if (maction->flow_action_raw.sub_type ==
+ MLX5_IB_FLOW_ACTION_MODIFY_HEADER) {
+ if (action->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)
+ return -EINVAL;
+ action->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
+ action->modify_hdr =
+ maction->flow_action_raw.modify_hdr;
+ return 0;
+ }
+ if (maction->flow_action_raw.sub_type ==
+ MLX5_IB_FLOW_ACTION_DECAP) {
+ if (action->action & MLX5_FLOW_CONTEXT_ACTION_DECAP)
+ return -EINVAL;
+ action->action |= MLX5_FLOW_CONTEXT_ACTION_DECAP;
+ return 0;
+ }
+ if (maction->flow_action_raw.sub_type ==
+ MLX5_IB_FLOW_ACTION_PACKET_REFORMAT) {
+ if (action->action &
+ MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT)
+ return -EINVAL;
+ action->action |=
+ MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT;
+ action->pkt_reformat =
+ maction->flow_action_raw.pkt_reformat;
+ return 0;
+ }
+ fallthrough;
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
+static int parse_flow_attr(struct mlx5_core_dev *mdev,
+ struct mlx5_flow_spec *spec,
+ const union ib_flow_spec *ib_spec,
+ const struct ib_flow_attr *flow_attr,
+ struct mlx5_flow_act *action, u32 prev_type)
+{
+ struct mlx5_flow_context *flow_context = &spec->flow_context;
+ u32 *match_c = spec->match_criteria;
+ u32 *match_v = spec->match_value;
+ void *misc_params_c = MLX5_ADDR_OF(fte_match_param, match_c,
+ misc_parameters);
+ void *misc_params_v = MLX5_ADDR_OF(fte_match_param, match_v,
+ misc_parameters);
+ void *misc_params2_c = MLX5_ADDR_OF(fte_match_param, match_c,
+ misc_parameters_2);
+ void *misc_params2_v = MLX5_ADDR_OF(fte_match_param, match_v,
+ misc_parameters_2);
+ void *headers_c;
+ void *headers_v;
+ int match_ipv;
+ int ret;
+
+ if (ib_spec->type & IB_FLOW_SPEC_INNER) {
+ headers_c = MLX5_ADDR_OF(fte_match_param, match_c,
+ inner_headers);
+ headers_v = MLX5_ADDR_OF(fte_match_param, match_v,
+ inner_headers);
+ match_ipv = MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
+ ft_field_support.inner_ip_version);
+ } else {
+ headers_c = MLX5_ADDR_OF(fte_match_param, match_c,
+ outer_headers);
+ headers_v = MLX5_ADDR_OF(fte_match_param, match_v,
+ outer_headers);
+ match_ipv = MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
+ ft_field_support.outer_ip_version);
+ }
+
+ switch (ib_spec->type & ~IB_FLOW_SPEC_INNER) {
+ case IB_FLOW_SPEC_ETH:
+ if (FIELDS_NOT_SUPPORTED(ib_spec->eth.mask, LAST_ETH_FIELD))
+ return -EOPNOTSUPP;
+
+ ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
+ dmac_47_16),
+ ib_spec->eth.mask.dst_mac);
+ ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
+ dmac_47_16),
+ ib_spec->eth.val.dst_mac);
+
+ ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
+ smac_47_16),
+ ib_spec->eth.mask.src_mac);
+ ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
+ smac_47_16),
+ ib_spec->eth.val.src_mac);
+
+ if (ib_spec->eth.mask.vlan_tag) {
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c,
+ cvlan_tag, 1);
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v,
+ cvlan_tag, 1);
+
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c,
+ first_vid, ntohs(ib_spec->eth.mask.vlan_tag));
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v,
+ first_vid, ntohs(ib_spec->eth.val.vlan_tag));
+
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c,
+ first_cfi,
+ ntohs(ib_spec->eth.mask.vlan_tag) >> 12);
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v,
+ first_cfi,
+ ntohs(ib_spec->eth.val.vlan_tag) >> 12);
+
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c,
+ first_prio,
+ ntohs(ib_spec->eth.mask.vlan_tag) >> 13);
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v,
+ first_prio,
+ ntohs(ib_spec->eth.val.vlan_tag) >> 13);
+ }
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c,
+ ethertype, ntohs(ib_spec->eth.mask.ether_type));
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v,
+ ethertype, ntohs(ib_spec->eth.val.ether_type));
+ break;
+ case IB_FLOW_SPEC_IPV4:
+ if (FIELDS_NOT_SUPPORTED(ib_spec->ipv4.mask, LAST_IPV4_FIELD))
+ return -EOPNOTSUPP;
+
+ if (match_ipv) {
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c,
+ ip_version, 0xf);
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v,
+ ip_version, MLX5_FS_IPV4_VERSION);
+ } else {
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c,
+ ethertype, 0xffff);
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v,
+ ethertype, ETH_P_IP);
+ }
+
+ memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
+ src_ipv4_src_ipv6.ipv4_layout.ipv4),
+ &ib_spec->ipv4.mask.src_ip,
+ sizeof(ib_spec->ipv4.mask.src_ip));
+ memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
+ src_ipv4_src_ipv6.ipv4_layout.ipv4),
+ &ib_spec->ipv4.val.src_ip,
+ sizeof(ib_spec->ipv4.val.src_ip));
+ memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
+ dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
+ &ib_spec->ipv4.mask.dst_ip,
+ sizeof(ib_spec->ipv4.mask.dst_ip));
+ memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
+ dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
+ &ib_spec->ipv4.val.dst_ip,
+ sizeof(ib_spec->ipv4.val.dst_ip));
+
+ set_tos(headers_c, headers_v,
+ ib_spec->ipv4.mask.tos, ib_spec->ipv4.val.tos);
+
+ if (set_proto(headers_c, headers_v,
+ ib_spec->ipv4.mask.proto,
+ ib_spec->ipv4.val.proto))
+ return -EINVAL;
+ break;
+ case IB_FLOW_SPEC_IPV6:
+ if (FIELDS_NOT_SUPPORTED(ib_spec->ipv6.mask, LAST_IPV6_FIELD))
+ return -EOPNOTSUPP;
+
+ if (match_ipv) {
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c,
+ ip_version, 0xf);
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v,
+ ip_version, MLX5_FS_IPV6_VERSION);
+ } else {
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c,
+ ethertype, 0xffff);
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v,
+ ethertype, ETH_P_IPV6);
+ }
+
+ memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
+ src_ipv4_src_ipv6.ipv6_layout.ipv6),
+ &ib_spec->ipv6.mask.src_ip,
+ sizeof(ib_spec->ipv6.mask.src_ip));
+ memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
+ src_ipv4_src_ipv6.ipv6_layout.ipv6),
+ &ib_spec->ipv6.val.src_ip,
+ sizeof(ib_spec->ipv6.val.src_ip));
+ memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
+ dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
+ &ib_spec->ipv6.mask.dst_ip,
+ sizeof(ib_spec->ipv6.mask.dst_ip));
+ memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
+ dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
+ &ib_spec->ipv6.val.dst_ip,
+ sizeof(ib_spec->ipv6.val.dst_ip));
+
+ set_tos(headers_c, headers_v,
+ ib_spec->ipv6.mask.traffic_class,
+ ib_spec->ipv6.val.traffic_class);
+
+ if (set_proto(headers_c, headers_v,
+ ib_spec->ipv6.mask.next_hdr,
+ ib_spec->ipv6.val.next_hdr))
+ return -EINVAL;
+
+ set_flow_label(misc_params_c, misc_params_v,
+ ntohl(ib_spec->ipv6.mask.flow_label),
+ ntohl(ib_spec->ipv6.val.flow_label),
+ ib_spec->type & IB_FLOW_SPEC_INNER);
+ break;
+ case IB_FLOW_SPEC_ESP:
+ if (ib_spec->esp.mask.seq)
+ return -EOPNOTSUPP;
+
+ MLX5_SET(fte_match_set_misc, misc_params_c, outer_esp_spi,
+ ntohl(ib_spec->esp.mask.spi));
+ MLX5_SET(fte_match_set_misc, misc_params_v, outer_esp_spi,
+ ntohl(ib_spec->esp.val.spi));
+ break;
+ case IB_FLOW_SPEC_TCP:
+ if (FIELDS_NOT_SUPPORTED(ib_spec->tcp_udp.mask,
+ LAST_TCP_UDP_FIELD))
+ return -EOPNOTSUPP;
+
+ if (set_proto(headers_c, headers_v, 0xff, IPPROTO_TCP))
+ return -EINVAL;
+
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c, tcp_sport,
+ ntohs(ib_spec->tcp_udp.mask.src_port));
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v, tcp_sport,
+ ntohs(ib_spec->tcp_udp.val.src_port));
+
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c, tcp_dport,
+ ntohs(ib_spec->tcp_udp.mask.dst_port));
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v, tcp_dport,
+ ntohs(ib_spec->tcp_udp.val.dst_port));
+ break;
+ case IB_FLOW_SPEC_UDP:
+ if (FIELDS_NOT_SUPPORTED(ib_spec->tcp_udp.mask,
+ LAST_TCP_UDP_FIELD))
+ return -EOPNOTSUPP;
+
+ if (set_proto(headers_c, headers_v, 0xff, IPPROTO_UDP))
+ return -EINVAL;
+
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c, udp_sport,
+ ntohs(ib_spec->tcp_udp.mask.src_port));
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v, udp_sport,
+ ntohs(ib_spec->tcp_udp.val.src_port));
+
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c, udp_dport,
+ ntohs(ib_spec->tcp_udp.mask.dst_port));
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v, udp_dport,
+ ntohs(ib_spec->tcp_udp.val.dst_port));
+ break;
+ case IB_FLOW_SPEC_GRE:
+ if (ib_spec->gre.mask.c_ks_res0_ver)
+ return -EOPNOTSUPP;
+
+ if (set_proto(headers_c, headers_v, 0xff, IPPROTO_GRE))
+ return -EINVAL;
+
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_protocol,
+ 0xff);
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol,
+ IPPROTO_GRE);
+
+ MLX5_SET(fte_match_set_misc, misc_params_c, gre_protocol,
+ ntohs(ib_spec->gre.mask.protocol));
+ MLX5_SET(fte_match_set_misc, misc_params_v, gre_protocol,
+ ntohs(ib_spec->gre.val.protocol));
+
+ memcpy(MLX5_ADDR_OF(fte_match_set_misc, misc_params_c,
+ gre_key.nvgre.hi),
+ &ib_spec->gre.mask.key,
+ sizeof(ib_spec->gre.mask.key));
+ memcpy(MLX5_ADDR_OF(fte_match_set_misc, misc_params_v,
+ gre_key.nvgre.hi),
+ &ib_spec->gre.val.key,
+ sizeof(ib_spec->gre.val.key));
+ break;
+ case IB_FLOW_SPEC_MPLS:
+ switch (prev_type) {
+ case IB_FLOW_SPEC_UDP:
+ if (check_mpls_supp_fields(MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
+ ft_field_support.outer_first_mpls_over_udp),
+ &ib_spec->mpls.mask.tag))
+ return -EOPNOTSUPP;
+
+ memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_v,
+ outer_first_mpls_over_udp),
+ &ib_spec->mpls.val.tag,
+ sizeof(ib_spec->mpls.val.tag));
+ memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_c,
+ outer_first_mpls_over_udp),
+ &ib_spec->mpls.mask.tag,
+ sizeof(ib_spec->mpls.mask.tag));
+ break;
+ case IB_FLOW_SPEC_GRE:
+ if (check_mpls_supp_fields(MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
+ ft_field_support.outer_first_mpls_over_gre),
+ &ib_spec->mpls.mask.tag))
+ return -EOPNOTSUPP;
+
+ memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_v,
+ outer_first_mpls_over_gre),
+ &ib_spec->mpls.val.tag,
+ sizeof(ib_spec->mpls.val.tag));
+ memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_c,
+ outer_first_mpls_over_gre),
+ &ib_spec->mpls.mask.tag,
+ sizeof(ib_spec->mpls.mask.tag));
+ break;
+ default:
+ if (ib_spec->type & IB_FLOW_SPEC_INNER) {
+ if (check_mpls_supp_fields(MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
+ ft_field_support.inner_first_mpls),
+ &ib_spec->mpls.mask.tag))
+ return -EOPNOTSUPP;
+
+ memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_v,
+ inner_first_mpls),
+ &ib_spec->mpls.val.tag,
+ sizeof(ib_spec->mpls.val.tag));
+ memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_c,
+ inner_first_mpls),
+ &ib_spec->mpls.mask.tag,
+ sizeof(ib_spec->mpls.mask.tag));
+ } else {
+ if (check_mpls_supp_fields(MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
+ ft_field_support.outer_first_mpls),
+ &ib_spec->mpls.mask.tag))
+ return -EOPNOTSUPP;
+
+ memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_v,
+ outer_first_mpls),
+ &ib_spec->mpls.val.tag,
+ sizeof(ib_spec->mpls.val.tag));
+ memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_c,
+ outer_first_mpls),
+ &ib_spec->mpls.mask.tag,
+ sizeof(ib_spec->mpls.mask.tag));
+ }
+ }
+ break;
+ case IB_FLOW_SPEC_VXLAN_TUNNEL:
+ if (FIELDS_NOT_SUPPORTED(ib_spec->tunnel.mask,
+ LAST_TUNNEL_FIELD))
+ return -EOPNOTSUPP;
+
+ MLX5_SET(fte_match_set_misc, misc_params_c, vxlan_vni,
+ ntohl(ib_spec->tunnel.mask.tunnel_id));
+ MLX5_SET(fte_match_set_misc, misc_params_v, vxlan_vni,
+ ntohl(ib_spec->tunnel.val.tunnel_id));
+ break;
+ case IB_FLOW_SPEC_ACTION_TAG:
+ if (FIELDS_NOT_SUPPORTED(ib_spec->flow_tag,
+ LAST_FLOW_TAG_FIELD))
+ return -EOPNOTSUPP;
+ if (ib_spec->flow_tag.tag_id >= BIT(24))
+ return -EINVAL;
+
+ flow_context->flow_tag = ib_spec->flow_tag.tag_id;
+ flow_context->flags |= FLOW_CONTEXT_HAS_TAG;
+ break;
+ case IB_FLOW_SPEC_ACTION_DROP:
+ if (FIELDS_NOT_SUPPORTED(ib_spec->drop,
+ LAST_DROP_FIELD))
+ return -EOPNOTSUPP;
+ action->action |= MLX5_FLOW_CONTEXT_ACTION_DROP;
+ break;
+ case IB_FLOW_SPEC_ACTION_HANDLE:
+ ret = parse_flow_flow_action(to_mflow_act(ib_spec->action.act),
+ flow_attr->flags & IB_FLOW_ATTR_FLAGS_EGRESS, action);
+ if (ret)
+ return ret;
+ break;
+ case IB_FLOW_SPEC_ACTION_COUNT:
+ if (FIELDS_NOT_SUPPORTED(ib_spec->flow_count,
+ LAST_COUNTERS_FIELD))
+ return -EOPNOTSUPP;
+
+ /* for now support only one counters spec per flow */
+ if (action->action & MLX5_FLOW_CONTEXT_ACTION_COUNT)
+ return -EINVAL;
+
+ action->counters = ib_spec->flow_count.counters;
+ action->action |= MLX5_FLOW_CONTEXT_ACTION_COUNT;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+/* If a flow could catch both multicast and unicast packets,
+ * it won't fall into the multicast flow steering table and this rule
+ * could steal other multicast packets.
+ */
+static bool flow_is_multicast_only(const struct ib_flow_attr *ib_attr)
+{
+ union ib_flow_spec *flow_spec;
+
+ if (ib_attr->type != IB_FLOW_ATTR_NORMAL ||
+ ib_attr->num_of_specs < 1)
+ return false;
+
+ flow_spec = (union ib_flow_spec *)(ib_attr + 1);
+ if (flow_spec->type == IB_FLOW_SPEC_IPV4) {
+ struct ib_flow_spec_ipv4 *ipv4_spec;
+
+ ipv4_spec = (struct ib_flow_spec_ipv4 *)flow_spec;
+ if (ipv4_is_multicast(ipv4_spec->val.dst_ip))
+ return true;
+
+ return false;
+ }
+
+ if (flow_spec->type == IB_FLOW_SPEC_ETH) {
+ struct ib_flow_spec_eth *eth_spec;
+
+ eth_spec = (struct ib_flow_spec_eth *)flow_spec;
+ return is_multicast_ether_addr(eth_spec->mask.dst_mac) &&
+ is_multicast_ether_addr(eth_spec->val.dst_mac);
+ }
+
+ return false;
+}
+
+enum valid_spec {
+ VALID_SPEC_INVALID,
+ VALID_SPEC_VALID,
+ VALID_SPEC_NA,
+};
+
+static enum valid_spec
+is_valid_esp_aes_gcm(struct mlx5_core_dev *mdev,
+ const struct mlx5_flow_spec *spec,
+ const struct mlx5_flow_act *flow_act,
+ bool egress)
+{
+ const u32 *match_c = spec->match_criteria;
+ bool is_crypto =
+ (flow_act->action & (MLX5_FLOW_CONTEXT_ACTION_ENCRYPT |
+ MLX5_FLOW_CONTEXT_ACTION_DECRYPT));
+ bool is_ipsec = mlx5_fs_is_ipsec_flow(match_c);
+ bool is_drop = flow_act->action & MLX5_FLOW_CONTEXT_ACTION_DROP;
+
+ /*
+ * Currently only crypto is supported in egress, when regular egress
+ * rules would be supported, always return VALID_SPEC_NA.
+ */
+ if (!is_crypto)
+ return VALID_SPEC_NA;
+
+ return is_crypto && is_ipsec &&
+ (!egress || (!is_drop &&
+ !(spec->flow_context.flags & FLOW_CONTEXT_HAS_TAG))) ?
+ VALID_SPEC_VALID : VALID_SPEC_INVALID;
+}
+
+static bool is_valid_spec(struct mlx5_core_dev *mdev,
+ const struct mlx5_flow_spec *spec,
+ const struct mlx5_flow_act *flow_act,
+ bool egress)
+{
+ /* We curretly only support ipsec egress flow */
+ return is_valid_esp_aes_gcm(mdev, spec, flow_act, egress) != VALID_SPEC_INVALID;
+}
+
+static bool is_valid_ethertype(struct mlx5_core_dev *mdev,
+ const struct ib_flow_attr *flow_attr,
+ bool check_inner)
+{
+ union ib_flow_spec *ib_spec = (union ib_flow_spec *)(flow_attr + 1);
+ int match_ipv = check_inner ?
+ MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
+ ft_field_support.inner_ip_version) :
+ MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
+ ft_field_support.outer_ip_version);
+ int inner_bit = check_inner ? IB_FLOW_SPEC_INNER : 0;
+ bool ipv4_spec_valid, ipv6_spec_valid;
+ unsigned int ip_spec_type = 0;
+ bool has_ethertype = false;
+ unsigned int spec_index;
+ bool mask_valid = true;
+ u16 eth_type = 0;
+ bool type_valid;
+
+ /* Validate that ethertype is correct */
+ for (spec_index = 0; spec_index < flow_attr->num_of_specs; spec_index++) {
+ if ((ib_spec->type == (IB_FLOW_SPEC_ETH | inner_bit)) &&
+ ib_spec->eth.mask.ether_type) {
+ mask_valid = (ib_spec->eth.mask.ether_type ==
+ htons(0xffff));
+ has_ethertype = true;
+ eth_type = ntohs(ib_spec->eth.val.ether_type);
+ } else if ((ib_spec->type == (IB_FLOW_SPEC_IPV4 | inner_bit)) ||
+ (ib_spec->type == (IB_FLOW_SPEC_IPV6 | inner_bit))) {
+ ip_spec_type = ib_spec->type;
+ }
+ ib_spec = (void *)ib_spec + ib_spec->size;
+ }
+
+ type_valid = (!has_ethertype) || (!ip_spec_type);
+ if (!type_valid && mask_valid) {
+ ipv4_spec_valid = (eth_type == ETH_P_IP) &&
+ (ip_spec_type == (IB_FLOW_SPEC_IPV4 | inner_bit));
+ ipv6_spec_valid = (eth_type == ETH_P_IPV6) &&
+ (ip_spec_type == (IB_FLOW_SPEC_IPV6 | inner_bit));
+
+ type_valid = (ipv4_spec_valid) || (ipv6_spec_valid) ||
+ (((eth_type == ETH_P_MPLS_UC) ||
+ (eth_type == ETH_P_MPLS_MC)) && match_ipv);
+ }
+
+ return type_valid;
+}
+
+static bool is_valid_attr(struct mlx5_core_dev *mdev,
+ const struct ib_flow_attr *flow_attr)
+{
+ return is_valid_ethertype(mdev, flow_attr, false) &&
+ is_valid_ethertype(mdev, flow_attr, true);
+}
+
+static void put_flow_table(struct mlx5_ib_dev *dev,
+ struct mlx5_ib_flow_prio *prio, bool ft_added)
+{
+ prio->refcount -= !!ft_added;
+ if (!prio->refcount) {
+ mlx5_destroy_flow_table(prio->flow_table);
+ prio->flow_table = NULL;
+ }
+}
+
+static int mlx5_ib_destroy_flow(struct ib_flow *flow_id)
+{
+ struct mlx5_ib_flow_handler *handler = container_of(flow_id,
+ struct mlx5_ib_flow_handler,
+ ibflow);
+ struct mlx5_ib_flow_handler *iter, *tmp;
+ struct mlx5_ib_dev *dev = handler->dev;
+
+ mutex_lock(&dev->flow_db->lock);
+
+ list_for_each_entry_safe(iter, tmp, &handler->list, list) {
+ mlx5_del_flow_rules(iter->rule);
+ put_flow_table(dev, iter->prio, true);
+ list_del(&iter->list);
+ kfree(iter);
+ }
+
+ mlx5_del_flow_rules(handler->rule);
+ put_flow_table(dev, handler->prio, true);
+ mlx5_ib_counters_clear_description(handler->ibcounters);
+ mutex_unlock(&dev->flow_db->lock);
+ if (handler->flow_matcher)
+ atomic_dec(&handler->flow_matcher->usecnt);
+ kfree(handler);
+
+ return 0;
+}
+
+static int ib_prio_to_core_prio(unsigned int priority, bool dont_trap)
+{
+ priority *= 2;
+ if (!dont_trap)
+ priority++;
+ return priority;
+}
+
+enum flow_table_type {
+ MLX5_IB_FT_RX,
+ MLX5_IB_FT_TX
+};
+
+#define MLX5_FS_MAX_TYPES 6
+#define MLX5_FS_MAX_ENTRIES BIT(16)
+
+static struct mlx5_ib_flow_prio *_get_prio(struct mlx5_flow_namespace *ns,
+ struct mlx5_ib_flow_prio *prio,
+ int priority,
+ int num_entries, int num_groups,
+ u32 flags)
+{
+ struct mlx5_flow_table_attr ft_attr = {};
+ struct mlx5_flow_table *ft;
+
+ ft_attr.prio = priority;
+ ft_attr.max_fte = num_entries;
+ ft_attr.flags = flags;
+ ft_attr.autogroup.max_num_groups = num_groups;
+ ft = mlx5_create_auto_grouped_flow_table(ns, &ft_attr);
+ if (IS_ERR(ft))
+ return ERR_CAST(ft);
+
+ prio->flow_table = ft;
+ prio->refcount = 0;
+ return prio;
+}
+
+static struct mlx5_ib_flow_prio *get_flow_table(struct mlx5_ib_dev *dev,
+ struct ib_flow_attr *flow_attr,
+ enum flow_table_type ft_type)
+{
+ bool dont_trap = flow_attr->flags & IB_FLOW_ATTR_FLAGS_DONT_TRAP;
+ struct mlx5_flow_namespace *ns = NULL;
+ struct mlx5_ib_flow_prio *prio;
+ struct mlx5_flow_table *ft;
+ int max_table_size;
+ int num_entries;
+ int num_groups;
+ bool esw_encap;
+ u32 flags = 0;
+ int priority;
+
+ max_table_size = BIT(MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev,
+ log_max_ft_size));
+ esw_encap = mlx5_eswitch_get_encap_mode(dev->mdev) !=
+ DEVLINK_ESWITCH_ENCAP_MODE_NONE;
+ if (flow_attr->type == IB_FLOW_ATTR_NORMAL) {
+ enum mlx5_flow_namespace_type fn_type;
+
+ if (flow_is_multicast_only(flow_attr) &&
+ !dont_trap)
+ priority = MLX5_IB_FLOW_MCAST_PRIO;
+ else
+ priority = ib_prio_to_core_prio(flow_attr->priority,
+ dont_trap);
+ if (ft_type == MLX5_IB_FT_RX) {
+ fn_type = MLX5_FLOW_NAMESPACE_BYPASS;
+ prio = &dev->flow_db->prios[priority];
+ if (!dev->is_rep && !esw_encap &&
+ MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, decap))
+ flags |= MLX5_FLOW_TABLE_TUNNEL_EN_DECAP;
+ if (!dev->is_rep && !esw_encap &&
+ MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev,
+ reformat_l3_tunnel_to_l2))
+ flags |= MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT;
+ } else {
+ max_table_size =
+ BIT(MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev,
+ log_max_ft_size));
+ fn_type = MLX5_FLOW_NAMESPACE_EGRESS;
+ prio = &dev->flow_db->egress_prios[priority];
+ if (!dev->is_rep && !esw_encap &&
+ MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev, reformat))
+ flags |= MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT;
+ }
+ ns = mlx5_get_flow_namespace(dev->mdev, fn_type);
+ num_entries = MLX5_FS_MAX_ENTRIES;
+ num_groups = MLX5_FS_MAX_TYPES;
+ } else if (flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT ||
+ flow_attr->type == IB_FLOW_ATTR_MC_DEFAULT) {
+ ns = mlx5_get_flow_namespace(dev->mdev,
+ MLX5_FLOW_NAMESPACE_LEFTOVERS);
+ build_leftovers_ft_param(&priority,
+ &num_entries,
+ &num_groups);
+ prio = &dev->flow_db->prios[MLX5_IB_FLOW_LEFTOVERS_PRIO];
+ } else if (flow_attr->type == IB_FLOW_ATTR_SNIFFER) {
+ if (!MLX5_CAP_FLOWTABLE(dev->mdev,
+ allow_sniffer_and_nic_rx_shared_tir))
+ return ERR_PTR(-EOPNOTSUPP);
+
+ ns = mlx5_get_flow_namespace(dev->mdev, ft_type == MLX5_IB_FT_RX ?
+ MLX5_FLOW_NAMESPACE_SNIFFER_RX :
+ MLX5_FLOW_NAMESPACE_SNIFFER_TX);
+
+ prio = &dev->flow_db->sniffer[ft_type];
+ priority = 0;
+ num_entries = 1;
+ num_groups = 1;
+ }
+
+ if (!ns)
+ return ERR_PTR(-EOPNOTSUPP);
+
+ max_table_size = min_t(int, num_entries, max_table_size);
+
+ ft = prio->flow_table;
+ if (!ft)
+ return _get_prio(ns, prio, priority, max_table_size, num_groups,
+ flags);
+
+ return prio;
+}
+
+static void set_underlay_qp(struct mlx5_ib_dev *dev,
+ struct mlx5_flow_spec *spec,
+ u32 underlay_qpn)
+{
+ void *misc_params_c = MLX5_ADDR_OF(fte_match_param,
+ spec->match_criteria,
+ misc_parameters);
+ void *misc_params_v = MLX5_ADDR_OF(fte_match_param, spec->match_value,
+ misc_parameters);
+
+ if (underlay_qpn &&
+ MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev,
+ ft_field_support.bth_dst_qp)) {
+ MLX5_SET(fte_match_set_misc,
+ misc_params_v, bth_dst_qp, underlay_qpn);
+ MLX5_SET(fte_match_set_misc,
+ misc_params_c, bth_dst_qp, 0xffffff);
+ }
+}
+
+static void mlx5_ib_set_rule_source_port(struct mlx5_ib_dev *dev,
+ struct mlx5_flow_spec *spec,
+ struct mlx5_eswitch_rep *rep)
+{
+ struct mlx5_eswitch *esw = dev->mdev->priv.eswitch;
+ void *misc;
+
+ if (mlx5_eswitch_vport_match_metadata_enabled(esw)) {
+ misc = MLX5_ADDR_OF(fte_match_param, spec->match_value,
+ misc_parameters_2);
+
+ MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_0,
+ mlx5_eswitch_get_vport_metadata_for_match(esw,
+ rep->vport));
+ misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
+ misc_parameters_2);
+
+ MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_0,
+ mlx5_eswitch_get_vport_metadata_mask());
+ } else {
+ misc = MLX5_ADDR_OF(fte_match_param, spec->match_value,
+ misc_parameters);
+
+ MLX5_SET(fte_match_set_misc, misc, source_port, rep->vport);
+
+ misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
+ misc_parameters);
+
+ MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port);
+ }
+}
+
+static struct mlx5_ib_flow_handler *_create_flow_rule(struct mlx5_ib_dev *dev,
+ struct mlx5_ib_flow_prio *ft_prio,
+ const struct ib_flow_attr *flow_attr,
+ struct mlx5_flow_destination *dst,
+ u32 underlay_qpn,
+ struct mlx5_ib_create_flow *ucmd)
+{
+ struct mlx5_flow_table *ft = ft_prio->flow_table;
+ struct mlx5_ib_flow_handler *handler;
+ struct mlx5_flow_act flow_act = {};
+ struct mlx5_flow_spec *spec;
+ struct mlx5_flow_destination dest_arr[2] = {};
+ struct mlx5_flow_destination *rule_dst = dest_arr;
+ const void *ib_flow = (const void *)flow_attr + sizeof(*flow_attr);
+ unsigned int spec_index;
+ u32 prev_type = 0;
+ int err = 0;
+ int dest_num = 0;
+ bool is_egress = flow_attr->flags & IB_FLOW_ATTR_FLAGS_EGRESS;
+
+ if (!is_valid_attr(dev->mdev, flow_attr))
+ return ERR_PTR(-EINVAL);
+
+ if (dev->is_rep && is_egress)
+ return ERR_PTR(-EINVAL);
+
+ spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
+ handler = kzalloc(sizeof(*handler), GFP_KERNEL);
+ if (!handler || !spec) {
+ err = -ENOMEM;
+ goto free;
+ }
+
+ INIT_LIST_HEAD(&handler->list);
+
+ for (spec_index = 0; spec_index < flow_attr->num_of_specs; spec_index++) {
+ err = parse_flow_attr(dev->mdev, spec,
+ ib_flow, flow_attr, &flow_act,
+ prev_type);
+ if (err < 0)
+ goto free;
+
+ prev_type = ((union ib_flow_spec *)ib_flow)->type;
+ ib_flow += ((union ib_flow_spec *)ib_flow)->size;
+ }
+
+ if (dst && !(flow_act.action & MLX5_FLOW_CONTEXT_ACTION_DROP)) {
+ memcpy(&dest_arr[0], dst, sizeof(*dst));
+ dest_num++;
+ }
+
+ if (!flow_is_multicast_only(flow_attr))
+ set_underlay_qp(dev, spec, underlay_qpn);
+
+ if (dev->is_rep) {
+ struct mlx5_eswitch_rep *rep;
+
+ rep = dev->port[flow_attr->port - 1].rep;
+ if (!rep) {
+ err = -EINVAL;
+ goto free;
+ }
+
+ mlx5_ib_set_rule_source_port(dev, spec, rep);
+ }
+
+ spec->match_criteria_enable = get_match_criteria_enable(spec->match_criteria);
+
+ if (is_egress &&
+ !is_valid_spec(dev->mdev, spec, &flow_act, is_egress)) {
+ err = -EINVAL;
+ goto free;
+ }
+
+ if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_COUNT) {
+ struct mlx5_ib_mcounters *mcounters;
+
+ err = mlx5_ib_flow_counters_set_data(flow_act.counters, ucmd);
+ if (err)
+ goto free;
+
+ mcounters = to_mcounters(flow_act.counters);
+ handler->ibcounters = flow_act.counters;
+ dest_arr[dest_num].type =
+ MLX5_FLOW_DESTINATION_TYPE_COUNTER;
+ dest_arr[dest_num].counter_id =
+ mlx5_fc_id(mcounters->hw_cntrs_hndl);
+ dest_num++;
+ }
+
+ if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_DROP) {
+ if (!dest_num)
+ rule_dst = NULL;
+ } else {
+ if (flow_attr->flags & IB_FLOW_ATTR_FLAGS_DONT_TRAP)
+ flow_act.action |=
+ MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO;
+ if (is_egress)
+ flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_ALLOW;
+ else if (dest_num)
+ flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
+ }
+
+ if ((spec->flow_context.flags & FLOW_CONTEXT_HAS_TAG) &&
+ (flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT ||
+ flow_attr->type == IB_FLOW_ATTR_MC_DEFAULT)) {
+ mlx5_ib_warn(dev, "Flow tag %u and attribute type %x isn't allowed in leftovers\n",
+ spec->flow_context.flow_tag, flow_attr->type);
+ err = -EINVAL;
+ goto free;
+ }
+ handler->rule = mlx5_add_flow_rules(ft, spec,
+ &flow_act,
+ rule_dst, dest_num);
+
+ if (IS_ERR(handler->rule)) {
+ err = PTR_ERR(handler->rule);
+ goto free;
+ }
+
+ ft_prio->refcount++;
+ handler->prio = ft_prio;
+ handler->dev = dev;
+
+ ft_prio->flow_table = ft;
+free:
+ if (err && handler) {
+ mlx5_ib_counters_clear_description(handler->ibcounters);
+ kfree(handler);
+ }
+ kvfree(spec);
+ return err ? ERR_PTR(err) : handler;
+}
+
+static struct mlx5_ib_flow_handler *create_flow_rule(struct mlx5_ib_dev *dev,
+ struct mlx5_ib_flow_prio *ft_prio,
+ const struct ib_flow_attr *flow_attr,
+ struct mlx5_flow_destination *dst)
+{
+ return _create_flow_rule(dev, ft_prio, flow_attr, dst, 0, NULL);
+}
+
+enum {
+ LEFTOVERS_MC,
+ LEFTOVERS_UC,
+};
+
+static struct mlx5_ib_flow_handler *create_leftovers_rule(struct mlx5_ib_dev *dev,
+ struct mlx5_ib_flow_prio *ft_prio,
+ struct ib_flow_attr *flow_attr,
+ struct mlx5_flow_destination *dst)
+{
+ struct mlx5_ib_flow_handler *handler_ucast = NULL;
+ struct mlx5_ib_flow_handler *handler = NULL;
+
+ static struct {
+ struct ib_flow_attr flow_attr;
+ struct ib_flow_spec_eth eth_flow;
+ } leftovers_specs[] = {
+ [LEFTOVERS_MC] = {
+ .flow_attr = {
+ .num_of_specs = 1,
+ .size = sizeof(leftovers_specs[0])
+ },
+ .eth_flow = {
+ .type = IB_FLOW_SPEC_ETH,
+ .size = sizeof(struct ib_flow_spec_eth),
+ .mask = {.dst_mac = {0x1} },
+ .val = {.dst_mac = {0x1} }
+ }
+ },
+ [LEFTOVERS_UC] = {
+ .flow_attr = {
+ .num_of_specs = 1,
+ .size = sizeof(leftovers_specs[0])
+ },
+ .eth_flow = {
+ .type = IB_FLOW_SPEC_ETH,
+ .size = sizeof(struct ib_flow_spec_eth),
+ .mask = {.dst_mac = {0x1} },
+ .val = {.dst_mac = {} }
+ }
+ }
+ };
+
+ handler = create_flow_rule(dev, ft_prio,
+ &leftovers_specs[LEFTOVERS_MC].flow_attr,
+ dst);
+ if (!IS_ERR(handler) &&
+ flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT) {
+ handler_ucast = create_flow_rule(dev, ft_prio,
+ &leftovers_specs[LEFTOVERS_UC].flow_attr,
+ dst);
+ if (IS_ERR(handler_ucast)) {
+ mlx5_del_flow_rules(handler->rule);
+ ft_prio->refcount--;
+ kfree(handler);
+ handler = handler_ucast;
+ } else {
+ list_add(&handler_ucast->list, &handler->list);
+ }
+ }
+
+ return handler;
+}
+
+static struct mlx5_ib_flow_handler *create_sniffer_rule(struct mlx5_ib_dev *dev,
+ struct mlx5_ib_flow_prio *ft_rx,
+ struct mlx5_ib_flow_prio *ft_tx,
+ struct mlx5_flow_destination *dst)
+{
+ struct mlx5_ib_flow_handler *handler_rx;
+ struct mlx5_ib_flow_handler *handler_tx;
+ int err;
+ static const struct ib_flow_attr flow_attr = {
+ .num_of_specs = 0,
+ .size = sizeof(flow_attr)
+ };
+
+ handler_rx = create_flow_rule(dev, ft_rx, &flow_attr, dst);
+ if (IS_ERR(handler_rx)) {
+ err = PTR_ERR(handler_rx);
+ goto err;
+ }
+
+ handler_tx = create_flow_rule(dev, ft_tx, &flow_attr, dst);
+ if (IS_ERR(handler_tx)) {
+ err = PTR_ERR(handler_tx);
+ goto err_tx;
+ }
+
+ list_add(&handler_tx->list, &handler_rx->list);
+
+ return handler_rx;
+
+err_tx:
+ mlx5_del_flow_rules(handler_rx->rule);
+ ft_rx->refcount--;
+ kfree(handler_rx);
+err:
+ return ERR_PTR(err);
+}
+
+
+static struct ib_flow *mlx5_ib_create_flow(struct ib_qp *qp,
+ struct ib_flow_attr *flow_attr,
+ int domain,
+ struct ib_udata *udata)
+{
+ struct mlx5_ib_dev *dev = to_mdev(qp->device);
+ struct mlx5_ib_qp *mqp = to_mqp(qp);
+ struct mlx5_ib_flow_handler *handler = NULL;
+ struct mlx5_flow_destination *dst = NULL;
+ struct mlx5_ib_flow_prio *ft_prio_tx = NULL;
+ struct mlx5_ib_flow_prio *ft_prio;
+ bool is_egress = flow_attr->flags & IB_FLOW_ATTR_FLAGS_EGRESS;
+ struct mlx5_ib_create_flow *ucmd = NULL, ucmd_hdr;
+ size_t min_ucmd_sz, required_ucmd_sz;
+ int err;
+ int underlay_qpn;
+
+ if (udata && udata->inlen) {
+ min_ucmd_sz = offsetof(typeof(ucmd_hdr), reserved) +
+ sizeof(ucmd_hdr.reserved);
+ if (udata->inlen < min_ucmd_sz)
+ return ERR_PTR(-EOPNOTSUPP);
+
+ err = ib_copy_from_udata(&ucmd_hdr, udata, min_ucmd_sz);
+ if (err)
+ return ERR_PTR(err);
+
+ /* currently supports only one counters data */
+ if (ucmd_hdr.ncounters_data > 1)
+ return ERR_PTR(-EINVAL);
+
+ required_ucmd_sz = min_ucmd_sz +
+ sizeof(struct mlx5_ib_flow_counters_data) *
+ ucmd_hdr.ncounters_data;
+ if (udata->inlen > required_ucmd_sz &&
+ !ib_is_udata_cleared(udata, required_ucmd_sz,
+ udata->inlen - required_ucmd_sz))
+ return ERR_PTR(-EOPNOTSUPP);
+
+ ucmd = kzalloc(required_ucmd_sz, GFP_KERNEL);
+ if (!ucmd)
+ return ERR_PTR(-ENOMEM);
+
+ err = ib_copy_from_udata(ucmd, udata, required_ucmd_sz);
+ if (err)
+ goto free_ucmd;
+ }
+
+ if (flow_attr->priority > MLX5_IB_FLOW_LAST_PRIO) {
+ err = -ENOMEM;
+ goto free_ucmd;
+ }
+
+ if (domain != IB_FLOW_DOMAIN_USER ||
+ flow_attr->port > dev->num_ports ||
+ (flow_attr->flags & ~(IB_FLOW_ATTR_FLAGS_DONT_TRAP |
+ IB_FLOW_ATTR_FLAGS_EGRESS))) {
+ err = -EINVAL;
+ goto free_ucmd;
+ }
+
+ if (is_egress &&
+ (flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT ||
+ flow_attr->type == IB_FLOW_ATTR_MC_DEFAULT)) {
+ err = -EINVAL;
+ goto free_ucmd;
+ }
+
+ dst = kzalloc(sizeof(*dst), GFP_KERNEL);
+ if (!dst) {
+ err = -ENOMEM;
+ goto free_ucmd;
+ }
+
+ mutex_lock(&dev->flow_db->lock);
+
+ ft_prio = get_flow_table(dev, flow_attr,
+ is_egress ? MLX5_IB_FT_TX : MLX5_IB_FT_RX);
+ if (IS_ERR(ft_prio)) {
+ err = PTR_ERR(ft_prio);
+ goto unlock;
+ }
+ if (flow_attr->type == IB_FLOW_ATTR_SNIFFER) {
+ ft_prio_tx = get_flow_table(dev, flow_attr, MLX5_IB_FT_TX);
+ if (IS_ERR(ft_prio_tx)) {
+ err = PTR_ERR(ft_prio_tx);
+ ft_prio_tx = NULL;
+ goto destroy_ft;
+ }
+ }
+
+ if (is_egress) {
+ dst->type = MLX5_FLOW_DESTINATION_TYPE_PORT;
+ } else {
+ dst->type = MLX5_FLOW_DESTINATION_TYPE_TIR;
+ if (mqp->is_rss)
+ dst->tir_num = mqp->rss_qp.tirn;
+ else
+ dst->tir_num = mqp->raw_packet_qp.rq.tirn;
+ }
+
+ if (flow_attr->type == IB_FLOW_ATTR_NORMAL) {
+ underlay_qpn = (mqp->flags & IB_QP_CREATE_SOURCE_QPN) ?
+ mqp->underlay_qpn :
+ 0;
+ handler = _create_flow_rule(dev, ft_prio, flow_attr, dst,
+ underlay_qpn, ucmd);
+ } else if (flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT ||
+ flow_attr->type == IB_FLOW_ATTR_MC_DEFAULT) {
+ handler = create_leftovers_rule(dev, ft_prio, flow_attr,
+ dst);
+ } else if (flow_attr->type == IB_FLOW_ATTR_SNIFFER) {
+ handler = create_sniffer_rule(dev, ft_prio, ft_prio_tx, dst);
+ } else {
+ err = -EINVAL;
+ goto destroy_ft;
+ }
+
+ if (IS_ERR(handler)) {
+ err = PTR_ERR(handler);
+ handler = NULL;
+ goto destroy_ft;
+ }
+
+ mutex_unlock(&dev->flow_db->lock);
+ kfree(dst);
+ kfree(ucmd);
+
+ return &handler->ibflow;
+
+destroy_ft:
+ put_flow_table(dev, ft_prio, false);
+ if (ft_prio_tx)
+ put_flow_table(dev, ft_prio_tx, false);
+unlock:
+ mutex_unlock(&dev->flow_db->lock);
+ kfree(dst);
+free_ucmd:
+ kfree(ucmd);
+ return ERR_PTR(err);
+}
+
+static struct mlx5_ib_flow_prio *
+_get_flow_table(struct mlx5_ib_dev *dev,
+ struct mlx5_ib_flow_matcher *fs_matcher,
+ bool mcast)
+{
+ struct mlx5_flow_namespace *ns = NULL;
+ struct mlx5_ib_flow_prio *prio = NULL;
+ int max_table_size = 0;
+ bool esw_encap;
+ u32 flags = 0;
+ int priority;
+
+ if (mcast)
+ priority = MLX5_IB_FLOW_MCAST_PRIO;
+ else
+ priority = ib_prio_to_core_prio(fs_matcher->priority, false);
+
+ esw_encap = mlx5_eswitch_get_encap_mode(dev->mdev) !=
+ DEVLINK_ESWITCH_ENCAP_MODE_NONE;
+ if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_BYPASS) {
+ max_table_size = BIT(MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev,
+ log_max_ft_size));
+ if (MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, decap) && !esw_encap)
+ flags |= MLX5_FLOW_TABLE_TUNNEL_EN_DECAP;
+ if (MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev,
+ reformat_l3_tunnel_to_l2) &&
+ !esw_encap)
+ flags |= MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT;
+ } else if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_EGRESS) {
+ max_table_size = BIT(
+ MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev, log_max_ft_size));
+ if (MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev, reformat) && !esw_encap)
+ flags |= MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT;
+ } else if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_FDB) {
+ max_table_size = BIT(
+ MLX5_CAP_ESW_FLOWTABLE_FDB(dev->mdev, log_max_ft_size));
+ if (MLX5_CAP_ESW_FLOWTABLE_FDB(dev->mdev, decap) && esw_encap)
+ flags |= MLX5_FLOW_TABLE_TUNNEL_EN_DECAP;
+ if (MLX5_CAP_ESW_FLOWTABLE_FDB(dev->mdev, reformat_l3_tunnel_to_l2) &&
+ esw_encap)
+ flags |= MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT;
+ priority = FDB_BYPASS_PATH;
+ } else if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_RX) {
+ max_table_size =
+ BIT(MLX5_CAP_FLOWTABLE_RDMA_RX(dev->mdev,
+ log_max_ft_size));
+ priority = fs_matcher->priority;
+ } else if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_TX) {
+ max_table_size =
+ BIT(MLX5_CAP_FLOWTABLE_RDMA_TX(dev->mdev,
+ log_max_ft_size));
+ priority = fs_matcher->priority;
+ }
+
+ max_table_size = min_t(int, max_table_size, MLX5_FS_MAX_ENTRIES);
+
+ ns = mlx5_get_flow_namespace(dev->mdev, fs_matcher->ns_type);
+ if (!ns)
+ return ERR_PTR(-EOPNOTSUPP);
+
+ if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_BYPASS)
+ prio = &dev->flow_db->prios[priority];
+ else if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_EGRESS)
+ prio = &dev->flow_db->egress_prios[priority];
+ else if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_FDB)
+ prio = &dev->flow_db->fdb;
+ else if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_RX)
+ prio = &dev->flow_db->rdma_rx[priority];
+ else if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_TX)
+ prio = &dev->flow_db->rdma_tx[priority];
+
+ if (!prio)
+ return ERR_PTR(-EINVAL);
+
+ if (prio->flow_table)
+ return prio;
+
+ return _get_prio(ns, prio, priority, max_table_size,
+ MLX5_FS_MAX_TYPES, flags);
+}
+
+static struct mlx5_ib_flow_handler *
+_create_raw_flow_rule(struct mlx5_ib_dev *dev,
+ struct mlx5_ib_flow_prio *ft_prio,
+ struct mlx5_flow_destination *dst,
+ struct mlx5_ib_flow_matcher *fs_matcher,
+ struct mlx5_flow_context *flow_context,
+ struct mlx5_flow_act *flow_act,
+ void *cmd_in, int inlen,
+ int dst_num)
+{
+ struct mlx5_ib_flow_handler *handler;
+ struct mlx5_flow_spec *spec;
+ struct mlx5_flow_table *ft = ft_prio->flow_table;
+ int err = 0;
+
+ spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
+ handler = kzalloc(sizeof(*handler), GFP_KERNEL);
+ if (!handler || !spec) {
+ err = -ENOMEM;
+ goto free;
+ }
+
+ INIT_LIST_HEAD(&handler->list);
+
+ memcpy(spec->match_value, cmd_in, inlen);
+ memcpy(spec->match_criteria, fs_matcher->matcher_mask.match_params,
+ fs_matcher->mask_len);
+ spec->match_criteria_enable = fs_matcher->match_criteria_enable;
+ spec->flow_context = *flow_context;
+
+ handler->rule = mlx5_add_flow_rules(ft, spec,
+ flow_act, dst, dst_num);
+
+ if (IS_ERR(handler->rule)) {
+ err = PTR_ERR(handler->rule);
+ goto free;
+ }
+
+ ft_prio->refcount++;
+ handler->prio = ft_prio;
+ handler->dev = dev;
+ ft_prio->flow_table = ft;
+
+free:
+ if (err)
+ kfree(handler);
+ kvfree(spec);
+ return err ? ERR_PTR(err) : handler;
+}
+
+static bool raw_fs_is_multicast(struct mlx5_ib_flow_matcher *fs_matcher,
+ void *match_v)
+{
+ void *match_c;
+ void *match_v_set_lyr_2_4, *match_c_set_lyr_2_4;
+ void *dmac, *dmac_mask;
+ void *ipv4, *ipv4_mask;
+
+ if (!(fs_matcher->match_criteria_enable &
+ (1 << MATCH_CRITERIA_ENABLE_OUTER_BIT)))
+ return false;
+
+ match_c = fs_matcher->matcher_mask.match_params;
+ match_v_set_lyr_2_4 = MLX5_ADDR_OF(fte_match_param, match_v,
+ outer_headers);
+ match_c_set_lyr_2_4 = MLX5_ADDR_OF(fte_match_param, match_c,
+ outer_headers);
+
+ dmac = MLX5_ADDR_OF(fte_match_set_lyr_2_4, match_v_set_lyr_2_4,
+ dmac_47_16);
+ dmac_mask = MLX5_ADDR_OF(fte_match_set_lyr_2_4, match_c_set_lyr_2_4,
+ dmac_47_16);
+
+ if (is_multicast_ether_addr(dmac) &&
+ is_multicast_ether_addr(dmac_mask))
+ return true;
+
+ ipv4 = MLX5_ADDR_OF(fte_match_set_lyr_2_4, match_v_set_lyr_2_4,
+ dst_ipv4_dst_ipv6.ipv4_layout.ipv4);
+
+ ipv4_mask = MLX5_ADDR_OF(fte_match_set_lyr_2_4, match_c_set_lyr_2_4,
+ dst_ipv4_dst_ipv6.ipv4_layout.ipv4);
+
+ if (ipv4_is_multicast(*(__be32 *)(ipv4)) &&
+ ipv4_is_multicast(*(__be32 *)(ipv4_mask)))
+ return true;
+
+ return false;
+}
+
+static struct mlx5_ib_flow_handler *raw_fs_rule_add(
+ struct mlx5_ib_dev *dev, struct mlx5_ib_flow_matcher *fs_matcher,
+ struct mlx5_flow_context *flow_context, struct mlx5_flow_act *flow_act,
+ u32 counter_id, void *cmd_in, int inlen, int dest_id, int dest_type)
+{
+ struct mlx5_flow_destination *dst;
+ struct mlx5_ib_flow_prio *ft_prio;
+ struct mlx5_ib_flow_handler *handler;
+ int dst_num = 0;
+ bool mcast;
+ int err;
+
+ if (fs_matcher->flow_type != MLX5_IB_FLOW_TYPE_NORMAL)
+ return ERR_PTR(-EOPNOTSUPP);
+
+ if (fs_matcher->priority > MLX5_IB_FLOW_LAST_PRIO)
+ return ERR_PTR(-ENOMEM);
+
+ dst = kcalloc(2, sizeof(*dst), GFP_KERNEL);
+ if (!dst)
+ return ERR_PTR(-ENOMEM);
+
+ mcast = raw_fs_is_multicast(fs_matcher, cmd_in);
+ mutex_lock(&dev->flow_db->lock);
+
+ ft_prio = _get_flow_table(dev, fs_matcher, mcast);
+ if (IS_ERR(ft_prio)) {
+ err = PTR_ERR(ft_prio);
+ goto unlock;
+ }
+
+ if (dest_type == MLX5_FLOW_DESTINATION_TYPE_TIR) {
+ dst[dst_num].type = dest_type;
+ dst[dst_num++].tir_num = dest_id;
+ flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
+ } else if (dest_type == MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE) {
+ dst[dst_num].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE_NUM;
+ dst[dst_num++].ft_num = dest_id;
+ flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
+ } else if (dest_type == MLX5_FLOW_DESTINATION_TYPE_PORT) {
+ dst[dst_num++].type = MLX5_FLOW_DESTINATION_TYPE_PORT;
+ flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_ALLOW;
+ }
+
+
+ if (flow_act->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) {
+ dst[dst_num].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
+ dst[dst_num].counter_id = counter_id;
+ dst_num++;
+ }
+
+ handler = _create_raw_flow_rule(dev, ft_prio, dst, fs_matcher,
+ flow_context, flow_act,
+ cmd_in, inlen, dst_num);
+
+ if (IS_ERR(handler)) {
+ err = PTR_ERR(handler);
+ goto destroy_ft;
+ }
+
+ mutex_unlock(&dev->flow_db->lock);
+ atomic_inc(&fs_matcher->usecnt);
+ handler->flow_matcher = fs_matcher;
+
+ kfree(dst);
+
+ return handler;
+
+destroy_ft:
+ put_flow_table(dev, ft_prio, false);
+unlock:
+ mutex_unlock(&dev->flow_db->lock);
+ kfree(dst);
+
+ return ERR_PTR(err);
+}
+
+static u32 mlx5_ib_flow_action_flags_to_accel_xfrm_flags(u32 mlx5_flags)
+{
+ u32 flags = 0;
+
+ if (mlx5_flags & MLX5_IB_UAPI_FLOW_ACTION_FLAGS_REQUIRE_METADATA)
+ flags |= MLX5_ACCEL_XFRM_FLAG_REQUIRE_METADATA;
+
+ return flags;
+}
+
+#define MLX5_FLOW_ACTION_ESP_CREATE_LAST_SUPPORTED \
+ MLX5_IB_UAPI_FLOW_ACTION_FLAGS_REQUIRE_METADATA
+static struct ib_flow_action *
+mlx5_ib_create_flow_action_esp(struct ib_device *device,
+ const struct ib_flow_action_attrs_esp *attr,
+ struct uverbs_attr_bundle *attrs)
+{
+ struct mlx5_ib_dev *mdev = to_mdev(device);
+ struct ib_uverbs_flow_action_esp_keymat_aes_gcm *aes_gcm;
+ struct mlx5_accel_esp_xfrm_attrs accel_attrs = {};
+ struct mlx5_ib_flow_action *action;
+ u64 action_flags;
+ u64 flags;
+ int err = 0;
+
+ err = uverbs_get_flags64(
+ &action_flags, attrs, MLX5_IB_ATTR_CREATE_FLOW_ACTION_FLAGS,
+ ((MLX5_FLOW_ACTION_ESP_CREATE_LAST_SUPPORTED << 1) - 1));
+ if (err)
+ return ERR_PTR(err);
+
+ flags = mlx5_ib_flow_action_flags_to_accel_xfrm_flags(action_flags);
+
+ /* We current only support a subset of the standard features. Only a
+ * keymat of type AES_GCM, with icv_len == 16, iv_algo == SEQ and esn
+ * (with overlap). Full offload mode isn't supported.
+ */
+ if (!attr->keymat || attr->replay || attr->encap ||
+ attr->spi || attr->seq || attr->tfc_pad ||
+ attr->hard_limit_pkts ||
+ (attr->flags & ~(IB_FLOW_ACTION_ESP_FLAGS_ESN_TRIGGERED |
+ IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ENCRYPT)))
+ return ERR_PTR(-EOPNOTSUPP);
+
+ if (attr->keymat->protocol !=
+ IB_UVERBS_FLOW_ACTION_ESP_KEYMAT_AES_GCM)
+ return ERR_PTR(-EOPNOTSUPP);
+
+ aes_gcm = &attr->keymat->keymat.aes_gcm;
+
+ if (aes_gcm->icv_len != 16 ||
+ aes_gcm->iv_algo != IB_UVERBS_FLOW_ACTION_IV_ALGO_SEQ)
+ return ERR_PTR(-EOPNOTSUPP);
+
+ action = kmalloc(sizeof(*action), GFP_KERNEL);
+ if (!action)
+ return ERR_PTR(-ENOMEM);
+
+ action->esp_aes_gcm.ib_flags = attr->flags;
+ memcpy(&accel_attrs.keymat.aes_gcm.aes_key, &aes_gcm->aes_key,
+ sizeof(accel_attrs.keymat.aes_gcm.aes_key));
+ accel_attrs.keymat.aes_gcm.key_len = aes_gcm->key_len * 8;
+ memcpy(&accel_attrs.keymat.aes_gcm.salt, &aes_gcm->salt,
+ sizeof(accel_attrs.keymat.aes_gcm.salt));
+ memcpy(&accel_attrs.keymat.aes_gcm.seq_iv, &aes_gcm->iv,
+ sizeof(accel_attrs.keymat.aes_gcm.seq_iv));
+ accel_attrs.keymat.aes_gcm.icv_len = aes_gcm->icv_len * 8;
+ accel_attrs.keymat.aes_gcm.iv_algo = MLX5_ACCEL_ESP_AES_GCM_IV_ALGO_SEQ;
+ accel_attrs.keymat_type = MLX5_ACCEL_ESP_KEYMAT_AES_GCM;
+
+ accel_attrs.esn = attr->esn;
+ if (attr->flags & IB_FLOW_ACTION_ESP_FLAGS_ESN_TRIGGERED)
+ accel_attrs.flags |= MLX5_ACCEL_ESP_FLAGS_ESN_TRIGGERED;
+ if (attr->flags & IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ESN_NEW_WINDOW)
+ accel_attrs.flags |= MLX5_ACCEL_ESP_FLAGS_ESN_STATE_OVERLAP;
+
+ if (attr->flags & IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ENCRYPT)
+ accel_attrs.action |= MLX5_ACCEL_ESP_ACTION_ENCRYPT;
+
+ action->esp_aes_gcm.ctx =
+ mlx5_accel_esp_create_xfrm(mdev->mdev, &accel_attrs, flags);
+ if (IS_ERR(action->esp_aes_gcm.ctx)) {
+ err = PTR_ERR(action->esp_aes_gcm.ctx);
+ goto err_parse;
+ }
+
+ action->esp_aes_gcm.ib_flags = attr->flags;
+
+ return &action->ib_action;
+
+err_parse:
+ kfree(action);
+ return ERR_PTR(err);
+}
+
+static int
+mlx5_ib_modify_flow_action_esp(struct ib_flow_action *action,
+ const struct ib_flow_action_attrs_esp *attr,
+ struct uverbs_attr_bundle *attrs)
+{
+ struct mlx5_ib_flow_action *maction = to_mflow_act(action);
+ struct mlx5_accel_esp_xfrm_attrs accel_attrs;
+ int err = 0;
+
+ if (attr->keymat || attr->replay || attr->encap ||
+ attr->spi || attr->seq || attr->tfc_pad ||
+ attr->hard_limit_pkts ||
+ (attr->flags & ~(IB_FLOW_ACTION_ESP_FLAGS_ESN_TRIGGERED |
+ IB_FLOW_ACTION_ESP_FLAGS_MOD_ESP_ATTRS |
+ IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ESN_NEW_WINDOW)))
+ return -EOPNOTSUPP;
+
+ /* Only the ESN value or the MLX5_ACCEL_ESP_FLAGS_ESN_STATE_OVERLAP can
+ * be modified.
+ */
+ if (!(maction->esp_aes_gcm.ib_flags &
+ IB_FLOW_ACTION_ESP_FLAGS_ESN_TRIGGERED) &&
+ attr->flags & (IB_FLOW_ACTION_ESP_FLAGS_ESN_TRIGGERED |
+ IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ESN_NEW_WINDOW))
+ return -EINVAL;
+
+ memcpy(&accel_attrs, &maction->esp_aes_gcm.ctx->attrs,
+ sizeof(accel_attrs));
+
+ accel_attrs.esn = attr->esn;
+ if (attr->flags & IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ESN_NEW_WINDOW)
+ accel_attrs.flags |= MLX5_ACCEL_ESP_FLAGS_ESN_STATE_OVERLAP;
+ else
+ accel_attrs.flags &= ~MLX5_ACCEL_ESP_FLAGS_ESN_STATE_OVERLAP;
+
+ err = mlx5_accel_esp_modify_xfrm(maction->esp_aes_gcm.ctx,
+ &accel_attrs);
+ if (err)
+ return err;
+
+ maction->esp_aes_gcm.ib_flags &=
+ ~IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ESN_NEW_WINDOW;
+ maction->esp_aes_gcm.ib_flags |=
+ attr->flags & IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ESN_NEW_WINDOW;
+
+ return 0;
+}
+
+static void destroy_flow_action_raw(struct mlx5_ib_flow_action *maction)
+{
+ switch (maction->flow_action_raw.sub_type) {
+ case MLX5_IB_FLOW_ACTION_MODIFY_HEADER:
+ mlx5_modify_header_dealloc(maction->flow_action_raw.dev->mdev,
+ maction->flow_action_raw.modify_hdr);
+ break;
+ case MLX5_IB_FLOW_ACTION_PACKET_REFORMAT:
+ mlx5_packet_reformat_dealloc(maction->flow_action_raw.dev->mdev,
+ maction->flow_action_raw.pkt_reformat);
+ break;
+ case MLX5_IB_FLOW_ACTION_DECAP:
+ break;
+ default:
+ break;
+ }
+}
+
+static int mlx5_ib_destroy_flow_action(struct ib_flow_action *action)
+{
+ struct mlx5_ib_flow_action *maction = to_mflow_act(action);
+
+ switch (action->type) {
+ case IB_FLOW_ACTION_ESP:
+ /*
+ * We only support aes_gcm by now, so we implicitly know this is
+ * the underline crypto.
+ */
+ mlx5_accel_esp_destroy_xfrm(maction->esp_aes_gcm.ctx);
+ break;
+ case IB_FLOW_ACTION_UNSPECIFIED:
+ destroy_flow_action_raw(maction);
+ break;
+ default:
+ WARN_ON(true);
+ break;
+ }
+
+ kfree(maction);
+ return 0;
+}
+
+static int
+mlx5_ib_ft_type_to_namespace(enum mlx5_ib_uapi_flow_table_type table_type,
+ enum mlx5_flow_namespace_type *namespace)
+{
+ switch (table_type) {
+ case MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_RX:
+ *namespace = MLX5_FLOW_NAMESPACE_BYPASS;
+ break;
+ case MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_TX:
+ *namespace = MLX5_FLOW_NAMESPACE_EGRESS;
+ break;
+ case MLX5_IB_UAPI_FLOW_TABLE_TYPE_FDB:
+ *namespace = MLX5_FLOW_NAMESPACE_FDB;
+ break;
+ case MLX5_IB_UAPI_FLOW_TABLE_TYPE_RDMA_RX:
+ *namespace = MLX5_FLOW_NAMESPACE_RDMA_RX;
+ break;
+ case MLX5_IB_UAPI_FLOW_TABLE_TYPE_RDMA_TX:
+ *namespace = MLX5_FLOW_NAMESPACE_RDMA_TX;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static const struct uverbs_attr_spec mlx5_ib_flow_type[] = {
+ [MLX5_IB_FLOW_TYPE_NORMAL] = {
+ .type = UVERBS_ATTR_TYPE_PTR_IN,
+ .u.ptr = {
+ .len = sizeof(u16), /* data is priority */
+ .min_len = sizeof(u16),
+ }
+ },
+ [MLX5_IB_FLOW_TYPE_SNIFFER] = {
+ .type = UVERBS_ATTR_TYPE_PTR_IN,
+ UVERBS_ATTR_NO_DATA(),
+ },
+ [MLX5_IB_FLOW_TYPE_ALL_DEFAULT] = {
+ .type = UVERBS_ATTR_TYPE_PTR_IN,
+ UVERBS_ATTR_NO_DATA(),
+ },
+ [MLX5_IB_FLOW_TYPE_MC_DEFAULT] = {
+ .type = UVERBS_ATTR_TYPE_PTR_IN,
+ UVERBS_ATTR_NO_DATA(),
+ },
+};
+
+static bool is_flow_dest(void *obj, int *dest_id, int *dest_type)
+{
+ struct devx_obj *devx_obj = obj;
+ u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, devx_obj->dinbox, opcode);
+
+ switch (opcode) {
+ case MLX5_CMD_OP_DESTROY_TIR:
+ *dest_type = MLX5_FLOW_DESTINATION_TYPE_TIR;
+ *dest_id = MLX5_GET(general_obj_in_cmd_hdr, devx_obj->dinbox,
+ obj_id);
+ return true;
+
+ case MLX5_CMD_OP_DESTROY_FLOW_TABLE:
+ *dest_type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
+ *dest_id = MLX5_GET(destroy_flow_table_in, devx_obj->dinbox,
+ table_id);
+ return true;
+ default:
+ return false;
+ }
+}
+
+static int get_dests(struct uverbs_attr_bundle *attrs,
+ struct mlx5_ib_flow_matcher *fs_matcher, int *dest_id,
+ int *dest_type, struct ib_qp **qp, u32 *flags)
+{
+ bool dest_devx, dest_qp;
+ void *devx_obj;
+ int err;
+
+ dest_devx = uverbs_attr_is_valid(attrs,
+ MLX5_IB_ATTR_CREATE_FLOW_DEST_DEVX);
+ dest_qp = uverbs_attr_is_valid(attrs,
+ MLX5_IB_ATTR_CREATE_FLOW_DEST_QP);
+
+ *flags = 0;
+ err = uverbs_get_flags32(flags, attrs, MLX5_IB_ATTR_CREATE_FLOW_FLAGS,
+ MLX5_IB_ATTR_CREATE_FLOW_FLAGS_DEFAULT_MISS |
+ MLX5_IB_ATTR_CREATE_FLOW_FLAGS_DROP);
+ if (err)
+ return err;
+
+ /* Both flags are not allowed */
+ if (*flags & MLX5_IB_ATTR_CREATE_FLOW_FLAGS_DEFAULT_MISS &&
+ *flags & MLX5_IB_ATTR_CREATE_FLOW_FLAGS_DROP)
+ return -EINVAL;
+
+ if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_BYPASS) {
+ if (dest_devx && (dest_qp || *flags))
+ return -EINVAL;
+ else if (dest_qp && *flags)
+ return -EINVAL;
+ }
+
+ /* Allow only DEVX object, drop as dest for FDB */
+ if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_FDB && !(dest_devx ||
+ (*flags & MLX5_IB_ATTR_CREATE_FLOW_FLAGS_DROP)))
+ return -EINVAL;
+
+ /* Allow only DEVX object or QP as dest when inserting to RDMA_RX */
+ if ((fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_RX) &&
+ ((!dest_devx && !dest_qp) || (dest_devx && dest_qp)))
+ return -EINVAL;
+
+ *qp = NULL;
+ if (dest_devx) {
+ devx_obj =
+ uverbs_attr_get_obj(attrs,
+ MLX5_IB_ATTR_CREATE_FLOW_DEST_DEVX);
+
+ /* Verify that the given DEVX object is a flow
+ * steering destination.
+ */
+ if (!is_flow_dest(devx_obj, dest_id, dest_type))
+ return -EINVAL;
+ /* Allow only flow table as dest when inserting to FDB or RDMA_RX */
+ if ((fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_FDB ||
+ fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_RX) &&
+ *dest_type != MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE)
+ return -EINVAL;
+ } else if (dest_qp) {
+ struct mlx5_ib_qp *mqp;
+
+ *qp = uverbs_attr_get_obj(attrs,
+ MLX5_IB_ATTR_CREATE_FLOW_DEST_QP);
+ if (IS_ERR(*qp))
+ return PTR_ERR(*qp);
+
+ if ((*qp)->qp_type != IB_QPT_RAW_PACKET)
+ return -EINVAL;
+
+ mqp = to_mqp(*qp);
+ if (mqp->is_rss)
+ *dest_id = mqp->rss_qp.tirn;
+ else
+ *dest_id = mqp->raw_packet_qp.rq.tirn;
+ *dest_type = MLX5_FLOW_DESTINATION_TYPE_TIR;
+ } else if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_EGRESS ||
+ fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_TX) {
+ *dest_type = MLX5_FLOW_DESTINATION_TYPE_PORT;
+ }
+
+ if (*dest_type == MLX5_FLOW_DESTINATION_TYPE_TIR &&
+ (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_EGRESS ||
+ fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_TX))
+ return -EINVAL;
+
+ return 0;
+}
+
+static bool is_flow_counter(void *obj, u32 offset, u32 *counter_id)
+{
+ struct devx_obj *devx_obj = obj;
+ u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, devx_obj->dinbox, opcode);
+
+ if (opcode == MLX5_CMD_OP_DEALLOC_FLOW_COUNTER) {
+
+ if (offset && offset >= devx_obj->flow_counter_bulk_size)
+ return false;
+
+ *counter_id = MLX5_GET(dealloc_flow_counter_in,
+ devx_obj->dinbox,
+ flow_counter_id);
+ *counter_id += offset;
+ return true;
+ }
+
+ return false;
+}
+
+#define MLX5_IB_CREATE_FLOW_MAX_FLOW_ACTIONS 2
+static int UVERBS_HANDLER(MLX5_IB_METHOD_CREATE_FLOW)(
+ struct uverbs_attr_bundle *attrs)
+{
+ struct mlx5_flow_context flow_context = {.flow_tag =
+ MLX5_FS_DEFAULT_FLOW_TAG};
+ u32 *offset_attr, offset = 0, counter_id = 0;
+ int dest_id, dest_type = -1, inlen, len, ret, i;
+ struct mlx5_ib_flow_handler *flow_handler;
+ struct mlx5_ib_flow_matcher *fs_matcher;
+ struct ib_uobject **arr_flow_actions;
+ struct ib_uflow_resources *uflow_res;
+ struct mlx5_flow_act flow_act = {};
+ struct ib_qp *qp = NULL;
+ void *devx_obj, *cmd_in;
+ struct ib_uobject *uobj;
+ struct mlx5_ib_dev *dev;
+ u32 flags;
+
+ if (!capable(CAP_NET_RAW))
+ return -EPERM;
+
+ fs_matcher = uverbs_attr_get_obj(attrs,
+ MLX5_IB_ATTR_CREATE_FLOW_MATCHER);
+ uobj = uverbs_attr_get_uobject(attrs, MLX5_IB_ATTR_CREATE_FLOW_HANDLE);
+ dev = mlx5_udata_to_mdev(&attrs->driver_udata);
+
+ if (get_dests(attrs, fs_matcher, &dest_id, &dest_type, &qp, &flags))
+ return -EINVAL;
+
+ if (flags & MLX5_IB_ATTR_CREATE_FLOW_FLAGS_DEFAULT_MISS)
+ flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_NS;
+
+ if (flags & MLX5_IB_ATTR_CREATE_FLOW_FLAGS_DROP)
+ flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_DROP;
+
+ len = uverbs_attr_get_uobjs_arr(attrs,
+ MLX5_IB_ATTR_CREATE_FLOW_ARR_COUNTERS_DEVX, &arr_flow_actions);
+ if (len) {
+ devx_obj = arr_flow_actions[0]->object;
+
+ if (uverbs_attr_is_valid(attrs,
+ MLX5_IB_ATTR_CREATE_FLOW_ARR_COUNTERS_DEVX_OFFSET)) {
+
+ int num_offsets = uverbs_attr_ptr_get_array_size(
+ attrs,
+ MLX5_IB_ATTR_CREATE_FLOW_ARR_COUNTERS_DEVX_OFFSET,
+ sizeof(u32));
+
+ if (num_offsets != 1)
+ return -EINVAL;
+
+ offset_attr = uverbs_attr_get_alloced_ptr(
+ attrs,
+ MLX5_IB_ATTR_CREATE_FLOW_ARR_COUNTERS_DEVX_OFFSET);
+ offset = *offset_attr;
+ }
+
+ if (!is_flow_counter(devx_obj, offset, &counter_id))
+ return -EINVAL;
+
+ flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_COUNT;
+ }
+
+ cmd_in = uverbs_attr_get_alloced_ptr(
+ attrs, MLX5_IB_ATTR_CREATE_FLOW_MATCH_VALUE);
+ inlen = uverbs_attr_get_len(attrs,
+ MLX5_IB_ATTR_CREATE_FLOW_MATCH_VALUE);
+
+ uflow_res = flow_resources_alloc(MLX5_IB_CREATE_FLOW_MAX_FLOW_ACTIONS);
+ if (!uflow_res)
+ return -ENOMEM;
+
+ len = uverbs_attr_get_uobjs_arr(attrs,
+ MLX5_IB_ATTR_CREATE_FLOW_ARR_FLOW_ACTIONS, &arr_flow_actions);
+ for (i = 0; i < len; i++) {
+ struct mlx5_ib_flow_action *maction =
+ to_mflow_act(arr_flow_actions[i]->object);
+
+ ret = parse_flow_flow_action(maction, false, &flow_act);
+ if (ret)
+ goto err_out;
+ flow_resources_add(uflow_res, IB_FLOW_SPEC_ACTION_HANDLE,
+ arr_flow_actions[i]->object);
+ }
+
+ ret = uverbs_copy_from(&flow_context.flow_tag, attrs,
+ MLX5_IB_ATTR_CREATE_FLOW_TAG);
+ if (!ret) {
+ if (flow_context.flow_tag >= BIT(24)) {
+ ret = -EINVAL;
+ goto err_out;
+ }
+ flow_context.flags |= FLOW_CONTEXT_HAS_TAG;
+ }
+
+ flow_handler =
+ raw_fs_rule_add(dev, fs_matcher, &flow_context, &flow_act,
+ counter_id, cmd_in, inlen, dest_id, dest_type);
+ if (IS_ERR(flow_handler)) {
+ ret = PTR_ERR(flow_handler);
+ goto err_out;
+ }
+
+ ib_set_flow(uobj, &flow_handler->ibflow, qp, &dev->ib_dev, uflow_res);
+
+ return 0;
+err_out:
+ ib_uverbs_flow_resources_free(uflow_res);
+ return ret;
+}
+
+static int flow_matcher_cleanup(struct ib_uobject *uobject,
+ enum rdma_remove_reason why,
+ struct uverbs_attr_bundle *attrs)
+{
+ struct mlx5_ib_flow_matcher *obj = uobject->object;
+ int ret;
+
+ ret = ib_destroy_usecnt(&obj->usecnt, why, uobject);
+ if (ret)
+ return ret;
+
+ kfree(obj);
+ return 0;
+}
+
+static int mlx5_ib_matcher_ns(struct uverbs_attr_bundle *attrs,
+ struct mlx5_ib_flow_matcher *obj)
+{
+ enum mlx5_ib_uapi_flow_table_type ft_type =
+ MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_RX;
+ u32 flags;
+ int err;
+
+ /* New users should use MLX5_IB_ATTR_FLOW_MATCHER_FT_TYPE and older
+ * users should switch to it. We leave this to not break userspace
+ */
+ if (uverbs_attr_is_valid(attrs, MLX5_IB_ATTR_FLOW_MATCHER_FT_TYPE) &&
+ uverbs_attr_is_valid(attrs, MLX5_IB_ATTR_FLOW_MATCHER_FLOW_FLAGS))
+ return -EINVAL;
+
+ if (uverbs_attr_is_valid(attrs, MLX5_IB_ATTR_FLOW_MATCHER_FT_TYPE)) {
+ err = uverbs_get_const(&ft_type, attrs,
+ MLX5_IB_ATTR_FLOW_MATCHER_FT_TYPE);
+ if (err)
+ return err;
+
+ err = mlx5_ib_ft_type_to_namespace(ft_type, &obj->ns_type);
+ if (err)
+ return err;
+
+ return 0;
+ }
+
+ if (uverbs_attr_is_valid(attrs, MLX5_IB_ATTR_FLOW_MATCHER_FLOW_FLAGS)) {
+ err = uverbs_get_flags32(&flags, attrs,
+ MLX5_IB_ATTR_FLOW_MATCHER_FLOW_FLAGS,
+ IB_FLOW_ATTR_FLAGS_EGRESS);
+ if (err)
+ return err;
+
+ if (flags) {
+ mlx5_ib_ft_type_to_namespace(
+ MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_TX,
+ &obj->ns_type);
+ return 0;
+ }
+ }
+
+ obj->ns_type = MLX5_FLOW_NAMESPACE_BYPASS;
+
+ return 0;
+}
+
+static int UVERBS_HANDLER(MLX5_IB_METHOD_FLOW_MATCHER_CREATE)(
+ struct uverbs_attr_bundle *attrs)
+{
+ struct ib_uobject *uobj = uverbs_attr_get_uobject(
+ attrs, MLX5_IB_ATTR_FLOW_MATCHER_CREATE_HANDLE);
+ struct mlx5_ib_dev *dev = mlx5_udata_to_mdev(&attrs->driver_udata);
+ struct mlx5_ib_flow_matcher *obj;
+ int err;
+
+ obj = kzalloc(sizeof(struct mlx5_ib_flow_matcher), GFP_KERNEL);
+ if (!obj)
+ return -ENOMEM;
+
+ obj->mask_len = uverbs_attr_get_len(
+ attrs, MLX5_IB_ATTR_FLOW_MATCHER_MATCH_MASK);
+ err = uverbs_copy_from(&obj->matcher_mask,
+ attrs,
+ MLX5_IB_ATTR_FLOW_MATCHER_MATCH_MASK);
+ if (err)
+ goto end;
+
+ obj->flow_type = uverbs_attr_get_enum_id(
+ attrs, MLX5_IB_ATTR_FLOW_MATCHER_FLOW_TYPE);
+
+ if (obj->flow_type == MLX5_IB_FLOW_TYPE_NORMAL) {
+ err = uverbs_copy_from(&obj->priority,
+ attrs,
+ MLX5_IB_ATTR_FLOW_MATCHER_FLOW_TYPE);
+ if (err)
+ goto end;
+ }
+
+ err = uverbs_copy_from(&obj->match_criteria_enable,
+ attrs,
+ MLX5_IB_ATTR_FLOW_MATCHER_MATCH_CRITERIA);
+ if (err)
+ goto end;
+
+ err = mlx5_ib_matcher_ns(attrs, obj);
+ if (err)
+ goto end;
+
+ uobj->object = obj;
+ obj->mdev = dev->mdev;
+ atomic_set(&obj->usecnt, 0);
+ return 0;
+
+end:
+ kfree(obj);
+ return err;
+}
+
+static struct ib_flow_action *
+mlx5_ib_create_modify_header(struct mlx5_ib_dev *dev,
+ enum mlx5_ib_uapi_flow_table_type ft_type,
+ u8 num_actions, void *in)
+{
+ enum mlx5_flow_namespace_type namespace;
+ struct mlx5_ib_flow_action *maction;
+ int ret;
+
+ ret = mlx5_ib_ft_type_to_namespace(ft_type, &namespace);
+ if (ret)
+ return ERR_PTR(-EINVAL);
+
+ maction = kzalloc(sizeof(*maction), GFP_KERNEL);
+ if (!maction)
+ return ERR_PTR(-ENOMEM);
+
+ maction->flow_action_raw.modify_hdr =
+ mlx5_modify_header_alloc(dev->mdev, namespace, num_actions, in);
+
+ if (IS_ERR(maction->flow_action_raw.modify_hdr)) {
+ ret = PTR_ERR(maction->flow_action_raw.modify_hdr);
+ kfree(maction);
+ return ERR_PTR(ret);
+ }
+ maction->flow_action_raw.sub_type =
+ MLX5_IB_FLOW_ACTION_MODIFY_HEADER;
+ maction->flow_action_raw.dev = dev;
+
+ return &maction->ib_action;
+}
+
+static bool mlx5_ib_modify_header_supported(struct mlx5_ib_dev *dev)
+{
+ return MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev,
+ max_modify_header_actions) ||
+ MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev,
+ max_modify_header_actions) ||
+ MLX5_CAP_FLOWTABLE_RDMA_TX(dev->mdev,
+ max_modify_header_actions);
+}
+
+static int UVERBS_HANDLER(MLX5_IB_METHOD_FLOW_ACTION_CREATE_MODIFY_HEADER)(
+ struct uverbs_attr_bundle *attrs)
+{
+ struct ib_uobject *uobj = uverbs_attr_get_uobject(
+ attrs, MLX5_IB_ATTR_CREATE_MODIFY_HEADER_HANDLE);
+ struct mlx5_ib_dev *mdev = mlx5_udata_to_mdev(&attrs->driver_udata);
+ enum mlx5_ib_uapi_flow_table_type ft_type;
+ struct ib_flow_action *action;
+ int num_actions;
+ void *in;
+ int ret;
+
+ if (!mlx5_ib_modify_header_supported(mdev))
+ return -EOPNOTSUPP;
+
+ in = uverbs_attr_get_alloced_ptr(attrs,
+ MLX5_IB_ATTR_CREATE_MODIFY_HEADER_ACTIONS_PRM);
+
+ num_actions = uverbs_attr_ptr_get_array_size(
+ attrs, MLX5_IB_ATTR_CREATE_MODIFY_HEADER_ACTIONS_PRM,
+ MLX5_UN_SZ_BYTES(set_add_copy_action_in_auto));
+ if (num_actions < 0)
+ return num_actions;
+
+ ret = uverbs_get_const(&ft_type, attrs,
+ MLX5_IB_ATTR_CREATE_MODIFY_HEADER_FT_TYPE);
+ if (ret)
+ return ret;
+ action = mlx5_ib_create_modify_header(mdev, ft_type, num_actions, in);
+ if (IS_ERR(action))
+ return PTR_ERR(action);
+
+ uverbs_flow_action_fill_action(action, uobj, &mdev->ib_dev,
+ IB_FLOW_ACTION_UNSPECIFIED);
+
+ return 0;
+}
+
+static bool mlx5_ib_flow_action_packet_reformat_valid(struct mlx5_ib_dev *ibdev,
+ u8 packet_reformat_type,
+ u8 ft_type)
+{
+ switch (packet_reformat_type) {
+ case MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TO_L2_TUNNEL:
+ if (ft_type == MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_TX)
+ return MLX5_CAP_FLOWTABLE(ibdev->mdev,
+ encap_general_header);
+ break;
+ case MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TO_L3_TUNNEL:
+ if (ft_type == MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_TX)
+ return MLX5_CAP_FLOWTABLE_NIC_TX(ibdev->mdev,
+ reformat_l2_to_l3_tunnel);
+ break;
+ case MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L3_TUNNEL_TO_L2:
+ if (ft_type == MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_RX)
+ return MLX5_CAP_FLOWTABLE_NIC_RX(ibdev->mdev,
+ reformat_l3_tunnel_to_l2);
+ break;
+ case MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TUNNEL_TO_L2:
+ if (ft_type == MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_RX)
+ return MLX5_CAP_FLOWTABLE_NIC_RX(ibdev->mdev, decap);
+ break;
+ default:
+ break;
+ }
+
+ return false;
+}
+
+static int mlx5_ib_dv_to_prm_packet_reforamt_type(u8 dv_prt, u8 *prm_prt)
+{
+ switch (dv_prt) {
+ case MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TO_L2_TUNNEL:
+ *prm_prt = MLX5_REFORMAT_TYPE_L2_TO_L2_TUNNEL;
+ break;
+ case MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L3_TUNNEL_TO_L2:
+ *prm_prt = MLX5_REFORMAT_TYPE_L3_TUNNEL_TO_L2;
+ break;
+ case MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TO_L3_TUNNEL:
+ *prm_prt = MLX5_REFORMAT_TYPE_L2_TO_L3_TUNNEL;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int mlx5_ib_flow_action_create_packet_reformat_ctx(
+ struct mlx5_ib_dev *dev,
+ struct mlx5_ib_flow_action *maction,
+ u8 ft_type, u8 dv_prt,
+ void *in, size_t len)
+{
+ enum mlx5_flow_namespace_type namespace;
+ u8 prm_prt;
+ int ret;
+
+ ret = mlx5_ib_ft_type_to_namespace(ft_type, &namespace);
+ if (ret)
+ return ret;
+
+ ret = mlx5_ib_dv_to_prm_packet_reforamt_type(dv_prt, &prm_prt);
+ if (ret)
+ return ret;
+
+ maction->flow_action_raw.pkt_reformat =
+ mlx5_packet_reformat_alloc(dev->mdev, prm_prt, len,
+ in, namespace);
+ if (IS_ERR(maction->flow_action_raw.pkt_reformat)) {
+ ret = PTR_ERR(maction->flow_action_raw.pkt_reformat);
+ return ret;
+ }
+
+ maction->flow_action_raw.sub_type =
+ MLX5_IB_FLOW_ACTION_PACKET_REFORMAT;
+ maction->flow_action_raw.dev = dev;
+
+ return 0;
+}
+
+static int UVERBS_HANDLER(MLX5_IB_METHOD_FLOW_ACTION_CREATE_PACKET_REFORMAT)(
+ struct uverbs_attr_bundle *attrs)
+{
+ struct ib_uobject *uobj = uverbs_attr_get_uobject(attrs,
+ MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_HANDLE);
+ struct mlx5_ib_dev *mdev = mlx5_udata_to_mdev(&attrs->driver_udata);
+ enum mlx5_ib_uapi_flow_action_packet_reformat_type dv_prt;
+ enum mlx5_ib_uapi_flow_table_type ft_type;
+ struct mlx5_ib_flow_action *maction;
+ int ret;
+
+ ret = uverbs_get_const(&ft_type, attrs,
+ MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_FT_TYPE);
+ if (ret)
+ return ret;
+
+ ret = uverbs_get_const(&dv_prt, attrs,
+ MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_TYPE);
+ if (ret)
+ return ret;
+
+ if (!mlx5_ib_flow_action_packet_reformat_valid(mdev, dv_prt, ft_type))
+ return -EOPNOTSUPP;
+
+ maction = kzalloc(sizeof(*maction), GFP_KERNEL);
+ if (!maction)
+ return -ENOMEM;
+
+ if (dv_prt ==
+ MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TUNNEL_TO_L2) {
+ maction->flow_action_raw.sub_type =
+ MLX5_IB_FLOW_ACTION_DECAP;
+ maction->flow_action_raw.dev = mdev;
+ } else {
+ void *in;
+ int len;
+
+ in = uverbs_attr_get_alloced_ptr(attrs,
+ MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_DATA_BUF);
+ if (IS_ERR(in)) {
+ ret = PTR_ERR(in);
+ goto free_maction;
+ }
+
+ len = uverbs_attr_get_len(attrs,
+ MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_DATA_BUF);
+
+ ret = mlx5_ib_flow_action_create_packet_reformat_ctx(mdev,
+ maction, ft_type, dv_prt, in, len);
+ if (ret)
+ goto free_maction;
+ }
+
+ uverbs_flow_action_fill_action(&maction->ib_action, uobj, &mdev->ib_dev,
+ IB_FLOW_ACTION_UNSPECIFIED);
+ return 0;
+
+free_maction:
+ kfree(maction);
+ return ret;
+}
+
+DECLARE_UVERBS_NAMED_METHOD(
+ MLX5_IB_METHOD_CREATE_FLOW,
+ UVERBS_ATTR_IDR(MLX5_IB_ATTR_CREATE_FLOW_HANDLE,
+ UVERBS_OBJECT_FLOW,
+ UVERBS_ACCESS_NEW,
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_IN(
+ MLX5_IB_ATTR_CREATE_FLOW_MATCH_VALUE,
+ UVERBS_ATTR_SIZE(1, sizeof(struct mlx5_ib_match_params)),
+ UA_MANDATORY,
+ UA_ALLOC_AND_COPY),
+ UVERBS_ATTR_IDR(MLX5_IB_ATTR_CREATE_FLOW_MATCHER,
+ MLX5_IB_OBJECT_FLOW_MATCHER,
+ UVERBS_ACCESS_READ,
+ UA_MANDATORY),
+ UVERBS_ATTR_IDR(MLX5_IB_ATTR_CREATE_FLOW_DEST_QP,
+ UVERBS_OBJECT_QP,
+ UVERBS_ACCESS_READ),
+ UVERBS_ATTR_IDR(MLX5_IB_ATTR_CREATE_FLOW_DEST_DEVX,
+ MLX5_IB_OBJECT_DEVX_OBJ,
+ UVERBS_ACCESS_READ),
+ UVERBS_ATTR_IDRS_ARR(MLX5_IB_ATTR_CREATE_FLOW_ARR_FLOW_ACTIONS,
+ UVERBS_OBJECT_FLOW_ACTION,
+ UVERBS_ACCESS_READ, 1,
+ MLX5_IB_CREATE_FLOW_MAX_FLOW_ACTIONS,
+ UA_OPTIONAL),
+ UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_CREATE_FLOW_TAG,
+ UVERBS_ATTR_TYPE(u32),
+ UA_OPTIONAL),
+ UVERBS_ATTR_IDRS_ARR(MLX5_IB_ATTR_CREATE_FLOW_ARR_COUNTERS_DEVX,
+ MLX5_IB_OBJECT_DEVX_OBJ,
+ UVERBS_ACCESS_READ, 1, 1,
+ UA_OPTIONAL),
+ UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_CREATE_FLOW_ARR_COUNTERS_DEVX_OFFSET,
+ UVERBS_ATTR_MIN_SIZE(sizeof(u32)),
+ UA_OPTIONAL,
+ UA_ALLOC_AND_COPY),
+ UVERBS_ATTR_FLAGS_IN(MLX5_IB_ATTR_CREATE_FLOW_FLAGS,
+ enum mlx5_ib_create_flow_flags,
+ UA_OPTIONAL));
+
+DECLARE_UVERBS_NAMED_METHOD_DESTROY(
+ MLX5_IB_METHOD_DESTROY_FLOW,
+ UVERBS_ATTR_IDR(MLX5_IB_ATTR_CREATE_FLOW_HANDLE,
+ UVERBS_OBJECT_FLOW,
+ UVERBS_ACCESS_DESTROY,
+ UA_MANDATORY));
+
+ADD_UVERBS_METHODS(mlx5_ib_fs,
+ UVERBS_OBJECT_FLOW,
+ &UVERBS_METHOD(MLX5_IB_METHOD_CREATE_FLOW),
+ &UVERBS_METHOD(MLX5_IB_METHOD_DESTROY_FLOW));
+
+DECLARE_UVERBS_NAMED_METHOD(
+ MLX5_IB_METHOD_FLOW_ACTION_CREATE_MODIFY_HEADER,
+ UVERBS_ATTR_IDR(MLX5_IB_ATTR_CREATE_MODIFY_HEADER_HANDLE,
+ UVERBS_OBJECT_FLOW_ACTION,
+ UVERBS_ACCESS_NEW,
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_CREATE_MODIFY_HEADER_ACTIONS_PRM,
+ UVERBS_ATTR_MIN_SIZE(MLX5_UN_SZ_BYTES(
+ set_add_copy_action_in_auto)),
+ UA_MANDATORY,
+ UA_ALLOC_AND_COPY),
+ UVERBS_ATTR_CONST_IN(MLX5_IB_ATTR_CREATE_MODIFY_HEADER_FT_TYPE,
+ enum mlx5_ib_uapi_flow_table_type,
+ UA_MANDATORY));
+
+DECLARE_UVERBS_NAMED_METHOD(
+ MLX5_IB_METHOD_FLOW_ACTION_CREATE_PACKET_REFORMAT,
+ UVERBS_ATTR_IDR(MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_HANDLE,
+ UVERBS_OBJECT_FLOW_ACTION,
+ UVERBS_ACCESS_NEW,
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_DATA_BUF,
+ UVERBS_ATTR_MIN_SIZE(1),
+ UA_ALLOC_AND_COPY,
+ UA_OPTIONAL),
+ UVERBS_ATTR_CONST_IN(MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_TYPE,
+ enum mlx5_ib_uapi_flow_action_packet_reformat_type,
+ UA_MANDATORY),
+ UVERBS_ATTR_CONST_IN(MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_FT_TYPE,
+ enum mlx5_ib_uapi_flow_table_type,
+ UA_MANDATORY));
+
+ADD_UVERBS_METHODS(
+ mlx5_ib_flow_actions,
+ UVERBS_OBJECT_FLOW_ACTION,
+ &UVERBS_METHOD(MLX5_IB_METHOD_FLOW_ACTION_CREATE_MODIFY_HEADER),
+ &UVERBS_METHOD(MLX5_IB_METHOD_FLOW_ACTION_CREATE_PACKET_REFORMAT));
+
+DECLARE_UVERBS_NAMED_METHOD(
+ MLX5_IB_METHOD_FLOW_MATCHER_CREATE,
+ UVERBS_ATTR_IDR(MLX5_IB_ATTR_FLOW_MATCHER_CREATE_HANDLE,
+ MLX5_IB_OBJECT_FLOW_MATCHER,
+ UVERBS_ACCESS_NEW,
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_IN(
+ MLX5_IB_ATTR_FLOW_MATCHER_MATCH_MASK,
+ UVERBS_ATTR_SIZE(1, sizeof(struct mlx5_ib_match_params)),
+ UA_MANDATORY),
+ UVERBS_ATTR_ENUM_IN(MLX5_IB_ATTR_FLOW_MATCHER_FLOW_TYPE,
+ mlx5_ib_flow_type,
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_FLOW_MATCHER_MATCH_CRITERIA,
+ UVERBS_ATTR_TYPE(u8),
+ UA_MANDATORY),
+ UVERBS_ATTR_FLAGS_IN(MLX5_IB_ATTR_FLOW_MATCHER_FLOW_FLAGS,
+ enum ib_flow_flags,
+ UA_OPTIONAL),
+ UVERBS_ATTR_CONST_IN(MLX5_IB_ATTR_FLOW_MATCHER_FT_TYPE,
+ enum mlx5_ib_uapi_flow_table_type,
+ UA_OPTIONAL));
+
+DECLARE_UVERBS_NAMED_METHOD_DESTROY(
+ MLX5_IB_METHOD_FLOW_MATCHER_DESTROY,
+ UVERBS_ATTR_IDR(MLX5_IB_ATTR_FLOW_MATCHER_DESTROY_HANDLE,
+ MLX5_IB_OBJECT_FLOW_MATCHER,
+ UVERBS_ACCESS_DESTROY,
+ UA_MANDATORY));
+
+DECLARE_UVERBS_NAMED_OBJECT(MLX5_IB_OBJECT_FLOW_MATCHER,
+ UVERBS_TYPE_ALLOC_IDR(flow_matcher_cleanup),
+ &UVERBS_METHOD(MLX5_IB_METHOD_FLOW_MATCHER_CREATE),
+ &UVERBS_METHOD(MLX5_IB_METHOD_FLOW_MATCHER_DESTROY));
+
+const struct uapi_definition mlx5_ib_flow_defs[] = {
+ UAPI_DEF_CHAIN_OBJ_TREE_NAMED(
+ MLX5_IB_OBJECT_FLOW_MATCHER),
+ UAPI_DEF_CHAIN_OBJ_TREE(
+ UVERBS_OBJECT_FLOW,
+ &mlx5_ib_fs),
+ UAPI_DEF_CHAIN_OBJ_TREE(UVERBS_OBJECT_FLOW_ACTION,
+ &mlx5_ib_flow_actions),
+ {},
+};
+
+static const struct ib_device_ops flow_ops = {
+ .create_flow = mlx5_ib_create_flow,
+ .destroy_flow = mlx5_ib_destroy_flow,
+ .destroy_flow_action = mlx5_ib_destroy_flow_action,
+};
+
+static const struct ib_device_ops flow_ipsec_ops = {
+ .create_flow_action_esp = mlx5_ib_create_flow_action_esp,
+ .modify_flow_action_esp = mlx5_ib_modify_flow_action_esp,
+};
+
+int mlx5_ib_fs_init(struct mlx5_ib_dev *dev)
+{
+ dev->flow_db = kzalloc(sizeof(*dev->flow_db), GFP_KERNEL);
+
+ if (!dev->flow_db)
+ return -ENOMEM;
+
+ mutex_init(&dev->flow_db->lock);
+
+ ib_set_device_ops(&dev->ib_dev, &flow_ops);
+ if (mlx5_accel_ipsec_device_caps(dev->mdev) &
+ MLX5_ACCEL_IPSEC_CAP_DEVICE)
+ ib_set_device_ops(&dev->ib_dev, &flow_ipsec_ops);
+
+ return 0;
+}
diff --git a/drivers/infiniband/hw/mlx5/fs.h b/drivers/infiniband/hw/mlx5/fs.h
new file mode 100644
index 000000000000..ad320adaf321
--- /dev/null
+++ b/drivers/infiniband/hw/mlx5/fs.h
@@ -0,0 +1,29 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/*
+ * Copyright (c) 2013-2020, Mellanox Technologies inc. All rights reserved.
+ */
+
+#ifndef _MLX5_IB_FS_H
+#define _MLX5_IB_FS_H
+
+#include "mlx5_ib.h"
+
+#if IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS)
+int mlx5_ib_fs_init(struct mlx5_ib_dev *dev);
+#else
+static inline int mlx5_ib_fs_init(struct mlx5_ib_dev *dev)
+{
+ dev->flow_db = kzalloc(sizeof(*dev->flow_db), GFP_KERNEL);
+
+ if (!dev->flow_db)
+ return -ENOMEM;
+
+ mutex_init(&dev->flow_db->lock);
+ return 0;
+}
+#endif
+static inline void mlx5_ib_fs_cleanup(struct mlx5_ib_dev *dev)
+{
+ kfree(dev->flow_db);
+}
+#endif /* _MLX5_IB_FS_H */
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index 6f99ed03d88e..fbc45a5e76c5 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -1,33 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
/*
- * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
+ * Copyright (c) 2013-2020, Mellanox Technologies inc. All rights reserved.
*/
#include <linux/debugfs.h>
@@ -59,10 +32,13 @@
#include "mlx5_ib.h"
#include "ib_rep.h"
#include "cmd.h"
+#include "devx.h"
+#include "fs.h"
#include "srq.h"
#include "qp.h"
#include "wr.h"
-#include <linux/mlx5/fs_helpers.h>
+#include "restrack.h"
+#include "counters.h"
#include <linux/mlx5/accel.h>
#include <rdma/uverbs_std_types.h>
#include <rdma/mlx5_user_ioctl_verbs.h>
@@ -311,9 +287,6 @@ struct mlx5_core_dev *mlx5_ib_get_native_port_mdev(struct mlx5_ib_dev *ibdev,
*native_port_num = 1;
port = &ibdev->port[ib_port_num - 1];
- if (!port)
- return NULL;
-
spin_lock(&port->mp.mpi_lock);
mpi = ibdev->port[ib_port_num - 1].mp.mpi;
if (mpi && !mpi->unaffiliate) {
@@ -1765,6 +1738,92 @@ static void mlx5_ib_dealloc_transport_domain(struct mlx5_ib_dev *dev, u32 tdn,
mlx5_ib_disable_lb(dev, true, false);
}
+static int set_ucontext_resp(struct ib_ucontext *uctx,
+ struct mlx5_ib_alloc_ucontext_resp *resp)
+{
+ struct ib_device *ibdev = uctx->device;
+ struct mlx5_ib_dev *dev = to_mdev(ibdev);
+ struct mlx5_ib_ucontext *context = to_mucontext(uctx);
+ struct mlx5_bfreg_info *bfregi = &context->bfregi;
+ int err;
+
+ if (MLX5_CAP_GEN(dev->mdev, dump_fill_mkey)) {
+ err = mlx5_cmd_dump_fill_mkey(dev->mdev,
+ &resp->dump_fill_mkey);
+ if (err)
+ return err;
+ resp->comp_mask |=
+ MLX5_IB_ALLOC_UCONTEXT_RESP_MASK_DUMP_FILL_MKEY;
+ }
+
+ resp->qp_tab_size = 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp);
+ if (dev->wc_support)
+ resp->bf_reg_size = 1 << MLX5_CAP_GEN(dev->mdev,
+ log_bf_reg_size);
+ resp->cache_line_size = cache_line_size();
+ resp->max_sq_desc_sz = MLX5_CAP_GEN(dev->mdev, max_wqe_sz_sq);
+ resp->max_rq_desc_sz = MLX5_CAP_GEN(dev->mdev, max_wqe_sz_rq);
+ resp->max_send_wqebb = 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp_sz);
+ resp->max_recv_wr = 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp_sz);
+ resp->max_srq_recv_wr = 1 << MLX5_CAP_GEN(dev->mdev, log_max_srq_sz);
+ resp->cqe_version = context->cqe_version;
+ resp->log_uar_size = MLX5_CAP_GEN(dev->mdev, uar_4k) ?
+ MLX5_ADAPTER_PAGE_SHIFT : PAGE_SHIFT;
+ resp->num_uars_per_page = MLX5_CAP_GEN(dev->mdev, uar_4k) ?
+ MLX5_CAP_GEN(dev->mdev,
+ num_of_uars_per_page) : 1;
+
+ if (mlx5_accel_ipsec_device_caps(dev->mdev) &
+ MLX5_ACCEL_IPSEC_CAP_DEVICE) {
+ if (mlx5_get_flow_namespace(dev->mdev,
+ MLX5_FLOW_NAMESPACE_EGRESS))
+ resp->flow_action_flags |= MLX5_USER_ALLOC_UCONTEXT_FLOW_ACTION_FLAGS_ESP_AES_GCM;
+ if (mlx5_accel_ipsec_device_caps(dev->mdev) &
+ MLX5_ACCEL_IPSEC_CAP_REQUIRED_METADATA)
+ resp->flow_action_flags |= MLX5_USER_ALLOC_UCONTEXT_FLOW_ACTION_FLAGS_ESP_AES_GCM_REQ_METADATA;
+ if (MLX5_CAP_FLOWTABLE(dev->mdev, flow_table_properties_nic_receive.ft_field_support.outer_esp_spi))
+ resp->flow_action_flags |= MLX5_USER_ALLOC_UCONTEXT_FLOW_ACTION_FLAGS_ESP_AES_GCM_SPI_STEERING;
+ if (mlx5_accel_ipsec_device_caps(dev->mdev) &
+ MLX5_ACCEL_IPSEC_CAP_TX_IV_IS_ESN)
+ resp->flow_action_flags |= MLX5_USER_ALLOC_UCONTEXT_FLOW_ACTION_FLAGS_ESP_AES_GCM_TX_IV_IS_ESN;
+ /* MLX5_USER_ALLOC_UCONTEXT_FLOW_ACTION_FLAGS_ESP_AES_GCM_FULL_OFFLOAD is currently always 0 */
+ }
+
+ resp->tot_bfregs = bfregi->lib_uar_dyn ? 0 :
+ bfregi->total_num_bfregs - bfregi->num_dyn_bfregs;
+ resp->num_ports = dev->num_ports;
+ resp->cmds_supp_uhw |= MLX5_USER_CMDS_SUPP_UHW_QUERY_DEVICE |
+ MLX5_USER_CMDS_SUPP_UHW_CREATE_AH;
+
+ if (mlx5_ib_port_link_layer(ibdev, 1) == IB_LINK_LAYER_ETHERNET) {
+ mlx5_query_min_inline(dev->mdev, &resp->eth_min_inline);
+ resp->eth_min_inline++;
+ }
+
+ if (dev->mdev->clock_info)
+ resp->clock_info_versions = BIT(MLX5_IB_CLOCK_INFO_V1);
+
+ /*
+ * We don't want to expose information from the PCI bar that is located
+ * after 4096 bytes, so if the arch only supports larger pages, let's
+ * pretend we don't support reading the HCA's core clock. This is also
+ * forced by mmap function.
+ */
+ if (PAGE_SIZE <= 4096) {
+ resp->comp_mask |=
+ MLX5_IB_ALLOC_UCONTEXT_RESP_MASK_CORE_CLOCK_OFFSET;
+ resp->hca_core_clock_offset =
+ offsetof(struct mlx5_init_seg,
+ internal_timer_h) % PAGE_SIZE;
+ }
+
+ if (MLX5_CAP_GEN(dev->mdev, ece_support))
+ resp->comp_mask |= MLX5_IB_ALLOC_UCONTEXT_RESP_MASK_ECE;
+
+ resp->num_dyn_bfregs = bfregi->num_dyn_bfregs;
+ return 0;
+}
+
static int mlx5_ib_alloc_ucontext(struct ib_ucontext *uctx,
struct ib_udata *udata)
{
@@ -1772,14 +1831,12 @@ static int mlx5_ib_alloc_ucontext(struct ib_ucontext *uctx,
struct mlx5_ib_dev *dev = to_mdev(ibdev);
struct mlx5_ib_alloc_ucontext_req_v2 req = {};
struct mlx5_ib_alloc_ucontext_resp resp = {};
- struct mlx5_core_dev *mdev = dev->mdev;
struct mlx5_ib_ucontext *context = to_mucontext(uctx);
struct mlx5_bfreg_info *bfregi;
int ver;
int err;
size_t min_req_v2 = offsetof(struct mlx5_ib_alloc_ucontext_req_v2,
max_cqe_version);
- u32 dump_fill_mkey;
bool lib_uar_4k;
bool lib_uar_dyn;
@@ -1808,37 +1865,6 @@ static int mlx5_ib_alloc_ucontext(struct ib_ucontext *uctx,
if (req.num_low_latency_bfregs > req.total_num_bfregs - 1)
return -EINVAL;
- resp.qp_tab_size = 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp);
- if (dev->wc_support)
- resp.bf_reg_size = 1 << MLX5_CAP_GEN(dev->mdev, log_bf_reg_size);
- resp.cache_line_size = cache_line_size();
- resp.max_sq_desc_sz = MLX5_CAP_GEN(dev->mdev, max_wqe_sz_sq);
- resp.max_rq_desc_sz = MLX5_CAP_GEN(dev->mdev, max_wqe_sz_rq);
- resp.max_send_wqebb = 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp_sz);
- resp.max_recv_wr = 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp_sz);
- resp.max_srq_recv_wr = 1 << MLX5_CAP_GEN(dev->mdev, log_max_srq_sz);
- resp.cqe_version = min_t(__u8,
- (__u8)MLX5_CAP_GEN(dev->mdev, cqe_version),
- req.max_cqe_version);
- resp.log_uar_size = MLX5_CAP_GEN(dev->mdev, uar_4k) ?
- MLX5_ADAPTER_PAGE_SHIFT : PAGE_SHIFT;
- resp.num_uars_per_page = MLX5_CAP_GEN(dev->mdev, uar_4k) ?
- MLX5_CAP_GEN(dev->mdev, num_of_uars_per_page) : 1;
- resp.response_length = min(offsetof(typeof(resp), response_length) +
- sizeof(resp.response_length), udata->outlen);
-
- if (mlx5_accel_ipsec_device_caps(dev->mdev) & MLX5_ACCEL_IPSEC_CAP_DEVICE) {
- if (mlx5_get_flow_namespace(dev->mdev, MLX5_FLOW_NAMESPACE_EGRESS))
- resp.flow_action_flags |= MLX5_USER_ALLOC_UCONTEXT_FLOW_ACTION_FLAGS_ESP_AES_GCM;
- if (mlx5_accel_ipsec_device_caps(dev->mdev) & MLX5_ACCEL_IPSEC_CAP_REQUIRED_METADATA)
- resp.flow_action_flags |= MLX5_USER_ALLOC_UCONTEXT_FLOW_ACTION_FLAGS_ESP_AES_GCM_REQ_METADATA;
- if (MLX5_CAP_FLOWTABLE(dev->mdev, flow_table_properties_nic_receive.ft_field_support.outer_esp_spi))
- resp.flow_action_flags |= MLX5_USER_ALLOC_UCONTEXT_FLOW_ACTION_FLAGS_ESP_AES_GCM_SPI_STEERING;
- if (mlx5_accel_ipsec_device_caps(dev->mdev) & MLX5_ACCEL_IPSEC_CAP_TX_IV_IS_ESN)
- resp.flow_action_flags |= MLX5_USER_ALLOC_UCONTEXT_FLOW_ACTION_FLAGS_ESP_AES_GCM_TX_IV_IS_ESN;
- /* MLX5_USER_ALLOC_UCONTEXT_FLOW_ACTION_FLAGS_ESP_AES_GCM_FULL_OFFLOAD is currently always 0 */
- }
-
lib_uar_4k = req.lib_caps & MLX5_LIB_CAP_4K_UAR;
lib_uar_dyn = req.lib_caps & MLX5_LIB_CAP_DYN_UAR;
bfregi = &context->bfregi;
@@ -1887,87 +1913,24 @@ uar_done:
if (err)
goto out_devx;
- if (MLX5_CAP_GEN(dev->mdev, dump_fill_mkey)) {
- err = mlx5_cmd_dump_fill_mkey(dev->mdev, &dump_fill_mkey);
- if (err)
- goto out_mdev;
- }
-
INIT_LIST_HEAD(&context->db_page_list);
mutex_init(&context->db_page_mutex);
- resp.tot_bfregs = lib_uar_dyn ? 0 : req.total_num_bfregs;
- resp.num_ports = dev->num_ports;
-
- if (offsetofend(typeof(resp), cqe_version) <= udata->outlen)
- resp.response_length += sizeof(resp.cqe_version);
-
- if (offsetofend(typeof(resp), cmds_supp_uhw) <= udata->outlen) {
- resp.cmds_supp_uhw |= MLX5_USER_CMDS_SUPP_UHW_QUERY_DEVICE |
- MLX5_USER_CMDS_SUPP_UHW_CREATE_AH;
- resp.response_length += sizeof(resp.cmds_supp_uhw);
- }
-
- if (offsetofend(typeof(resp), eth_min_inline) <= udata->outlen) {
- if (mlx5_ib_port_link_layer(ibdev, 1) == IB_LINK_LAYER_ETHERNET) {
- mlx5_query_min_inline(dev->mdev, &resp.eth_min_inline);
- resp.eth_min_inline++;
- }
- resp.response_length += sizeof(resp.eth_min_inline);
- }
-
- if (offsetofend(typeof(resp), clock_info_versions) <= udata->outlen) {
- if (mdev->clock_info)
- resp.clock_info_versions = BIT(MLX5_IB_CLOCK_INFO_V1);
- resp.response_length += sizeof(resp.clock_info_versions);
- }
-
- /*
- * We don't want to expose information from the PCI bar that is located
- * after 4096 bytes, so if the arch only supports larger pages, let's
- * pretend we don't support reading the HCA's core clock. This is also
- * forced by mmap function.
- */
- if (offsetofend(typeof(resp), hca_core_clock_offset) <= udata->outlen) {
- if (PAGE_SIZE <= 4096) {
- resp.comp_mask |=
- MLX5_IB_ALLOC_UCONTEXT_RESP_MASK_CORE_CLOCK_OFFSET;
- resp.hca_core_clock_offset =
- offsetof(struct mlx5_init_seg, internal_timer_h) % PAGE_SIZE;
- }
- resp.response_length += sizeof(resp.hca_core_clock_offset);
- }
-
- if (offsetofend(typeof(resp), log_uar_size) <= udata->outlen)
- resp.response_length += sizeof(resp.log_uar_size);
-
- if (offsetofend(typeof(resp), num_uars_per_page) <= udata->outlen)
- resp.response_length += sizeof(resp.num_uars_per_page);
-
- if (offsetofend(typeof(resp), num_dyn_bfregs) <= udata->outlen) {
- resp.num_dyn_bfregs = bfregi->num_dyn_bfregs;
- resp.response_length += sizeof(resp.num_dyn_bfregs);
- }
-
- if (offsetofend(typeof(resp), dump_fill_mkey) <= udata->outlen) {
- if (MLX5_CAP_GEN(dev->mdev, dump_fill_mkey)) {
- resp.dump_fill_mkey = dump_fill_mkey;
- resp.comp_mask |=
- MLX5_IB_ALLOC_UCONTEXT_RESP_MASK_DUMP_FILL_MKEY;
- }
- resp.response_length += sizeof(resp.dump_fill_mkey);
- }
+ context->cqe_version = min_t(__u8,
+ (__u8)MLX5_CAP_GEN(dev->mdev, cqe_version),
+ req.max_cqe_version);
- if (MLX5_CAP_GEN(dev->mdev, ece_support))
- resp.comp_mask |= MLX5_IB_ALLOC_UCONTEXT_RESP_MASK_ECE;
+ err = set_ucontext_resp(uctx, &resp);
+ if (err)
+ goto out_mdev;
+ resp.response_length = min(udata->outlen, sizeof(resp));
err = ib_copy_to_udata(udata, &resp, resp.response_length);
if (err)
goto out_mdev;
bfregi->ver = ver;
bfregi->num_low_latency_bfregs = req.num_low_latency_bfregs;
- context->cqe_version = resp.cqe_version;
context->lib_caps = req.lib_caps;
print_lib_caps(dev, context->lib_caps);
@@ -2000,6 +1963,29 @@ out_ctx:
return err;
}
+static int mlx5_ib_query_ucontext(struct ib_ucontext *ibcontext,
+ struct uverbs_attr_bundle *attrs)
+{
+ struct mlx5_ib_alloc_ucontext_resp uctx_resp = {};
+ int ret;
+
+ ret = set_ucontext_resp(ibcontext, &uctx_resp);
+ if (ret)
+ return ret;
+
+ uctx_resp.response_length =
+ min_t(size_t,
+ uverbs_attr_get_len(attrs,
+ MLX5_IB_ATTR_QUERY_CONTEXT_RESP_UCTX),
+ sizeof(uctx_resp));
+
+ ret = uverbs_copy_to_struct_or_zero(attrs,
+ MLX5_IB_ATTR_QUERY_CONTEXT_RESP_UCTX,
+ &uctx_resp,
+ sizeof(uctx_resp));
+ return ret;
+}
+
static void mlx5_ib_dealloc_ucontext(struct ib_ucontext *ibcontext)
{
struct mlx5_ib_ucontext *context = to_mucontext(ibcontext);
@@ -2591,1820 +2577,6 @@ static void mlx5_ib_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata)
mlx5_cmd_dealloc_pd(mdev->mdev, mpd->pdn, mpd->uid);
}
-enum {
- MATCH_CRITERIA_ENABLE_OUTER_BIT,
- MATCH_CRITERIA_ENABLE_MISC_BIT,
- MATCH_CRITERIA_ENABLE_INNER_BIT,
- MATCH_CRITERIA_ENABLE_MISC2_BIT
-};
-
-#define HEADER_IS_ZERO(match_criteria, headers) \
- !(memchr_inv(MLX5_ADDR_OF(fte_match_param, match_criteria, headers), \
- 0, MLX5_FLD_SZ_BYTES(fte_match_param, headers))) \
-
-static u8 get_match_criteria_enable(u32 *match_criteria)
-{
- u8 match_criteria_enable;
-
- match_criteria_enable =
- (!HEADER_IS_ZERO(match_criteria, outer_headers)) <<
- MATCH_CRITERIA_ENABLE_OUTER_BIT;
- match_criteria_enable |=
- (!HEADER_IS_ZERO(match_criteria, misc_parameters)) <<
- MATCH_CRITERIA_ENABLE_MISC_BIT;
- match_criteria_enable |=
- (!HEADER_IS_ZERO(match_criteria, inner_headers)) <<
- MATCH_CRITERIA_ENABLE_INNER_BIT;
- match_criteria_enable |=
- (!HEADER_IS_ZERO(match_criteria, misc_parameters_2)) <<
- MATCH_CRITERIA_ENABLE_MISC2_BIT;
-
- return match_criteria_enable;
-}
-
-static int set_proto(void *outer_c, void *outer_v, u8 mask, u8 val)
-{
- u8 entry_mask;
- u8 entry_val;
- int err = 0;
-
- if (!mask)
- goto out;
-
- entry_mask = MLX5_GET(fte_match_set_lyr_2_4, outer_c,
- ip_protocol);
- entry_val = MLX5_GET(fte_match_set_lyr_2_4, outer_v,
- ip_protocol);
- if (!entry_mask) {
- MLX5_SET(fte_match_set_lyr_2_4, outer_c, ip_protocol, mask);
- MLX5_SET(fte_match_set_lyr_2_4, outer_v, ip_protocol, val);
- goto out;
- }
- /* Don't override existing ip protocol */
- if (mask != entry_mask || val != entry_val)
- err = -EINVAL;
-out:
- return err;
-}
-
-static void set_flow_label(void *misc_c, void *misc_v, u32 mask, u32 val,
- bool inner)
-{
- if (inner) {
- MLX5_SET(fte_match_set_misc,
- misc_c, inner_ipv6_flow_label, mask);
- MLX5_SET(fte_match_set_misc,
- misc_v, inner_ipv6_flow_label, val);
- } else {
- MLX5_SET(fte_match_set_misc,
- misc_c, outer_ipv6_flow_label, mask);
- MLX5_SET(fte_match_set_misc,
- misc_v, outer_ipv6_flow_label, val);
- }
-}
-
-static void set_tos(void *outer_c, void *outer_v, u8 mask, u8 val)
-{
- MLX5_SET(fte_match_set_lyr_2_4, outer_c, ip_ecn, mask);
- MLX5_SET(fte_match_set_lyr_2_4, outer_v, ip_ecn, val);
- MLX5_SET(fte_match_set_lyr_2_4, outer_c, ip_dscp, mask >> 2);
- MLX5_SET(fte_match_set_lyr_2_4, outer_v, ip_dscp, val >> 2);
-}
-
-static int check_mpls_supp_fields(u32 field_support, const __be32 *set_mask)
-{
- if (MLX5_GET(fte_match_mpls, set_mask, mpls_label) &&
- !(field_support & MLX5_FIELD_SUPPORT_MPLS_LABEL))
- return -EOPNOTSUPP;
-
- if (MLX5_GET(fte_match_mpls, set_mask, mpls_exp) &&
- !(field_support & MLX5_FIELD_SUPPORT_MPLS_EXP))
- return -EOPNOTSUPP;
-
- if (MLX5_GET(fte_match_mpls, set_mask, mpls_s_bos) &&
- !(field_support & MLX5_FIELD_SUPPORT_MPLS_S_BOS))
- return -EOPNOTSUPP;
-
- if (MLX5_GET(fte_match_mpls, set_mask, mpls_ttl) &&
- !(field_support & MLX5_FIELD_SUPPORT_MPLS_TTL))
- return -EOPNOTSUPP;
-
- return 0;
-}
-
-#define LAST_ETH_FIELD vlan_tag
-#define LAST_IB_FIELD sl
-#define LAST_IPV4_FIELD tos
-#define LAST_IPV6_FIELD traffic_class
-#define LAST_TCP_UDP_FIELD src_port
-#define LAST_TUNNEL_FIELD tunnel_id
-#define LAST_FLOW_TAG_FIELD tag_id
-#define LAST_DROP_FIELD size
-#define LAST_COUNTERS_FIELD counters
-
-/* Field is the last supported field */
-#define FIELDS_NOT_SUPPORTED(filter, field)\
- memchr_inv((void *)&filter.field +\
- sizeof(filter.field), 0,\
- sizeof(filter) -\
- offsetof(typeof(filter), field) -\
- sizeof(filter.field))
-
-int parse_flow_flow_action(struct mlx5_ib_flow_action *maction,
- bool is_egress,
- struct mlx5_flow_act *action)
-{
-
- switch (maction->ib_action.type) {
- case IB_FLOW_ACTION_ESP:
- if (action->action & (MLX5_FLOW_CONTEXT_ACTION_ENCRYPT |
- MLX5_FLOW_CONTEXT_ACTION_DECRYPT))
- return -EINVAL;
- /* Currently only AES_GCM keymat is supported by the driver */
- action->esp_id = (uintptr_t)maction->esp_aes_gcm.ctx;
- action->action |= is_egress ?
- MLX5_FLOW_CONTEXT_ACTION_ENCRYPT :
- MLX5_FLOW_CONTEXT_ACTION_DECRYPT;
- return 0;
- case IB_FLOW_ACTION_UNSPECIFIED:
- if (maction->flow_action_raw.sub_type ==
- MLX5_IB_FLOW_ACTION_MODIFY_HEADER) {
- if (action->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)
- return -EINVAL;
- action->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
- action->modify_hdr =
- maction->flow_action_raw.modify_hdr;
- return 0;
- }
- if (maction->flow_action_raw.sub_type ==
- MLX5_IB_FLOW_ACTION_DECAP) {
- if (action->action & MLX5_FLOW_CONTEXT_ACTION_DECAP)
- return -EINVAL;
- action->action |= MLX5_FLOW_CONTEXT_ACTION_DECAP;
- return 0;
- }
- if (maction->flow_action_raw.sub_type ==
- MLX5_IB_FLOW_ACTION_PACKET_REFORMAT) {
- if (action->action &
- MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT)
- return -EINVAL;
- action->action |=
- MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT;
- action->pkt_reformat =
- maction->flow_action_raw.pkt_reformat;
- return 0;
- }
- /* fall through */
- default:
- return -EOPNOTSUPP;
- }
-}
-
-static int parse_flow_attr(struct mlx5_core_dev *mdev,
- struct mlx5_flow_spec *spec,
- const union ib_flow_spec *ib_spec,
- const struct ib_flow_attr *flow_attr,
- struct mlx5_flow_act *action, u32 prev_type)
-{
- struct mlx5_flow_context *flow_context = &spec->flow_context;
- u32 *match_c = spec->match_criteria;
- u32 *match_v = spec->match_value;
- void *misc_params_c = MLX5_ADDR_OF(fte_match_param, match_c,
- misc_parameters);
- void *misc_params_v = MLX5_ADDR_OF(fte_match_param, match_v,
- misc_parameters);
- void *misc_params2_c = MLX5_ADDR_OF(fte_match_param, match_c,
- misc_parameters_2);
- void *misc_params2_v = MLX5_ADDR_OF(fte_match_param, match_v,
- misc_parameters_2);
- void *headers_c;
- void *headers_v;
- int match_ipv;
- int ret;
-
- if (ib_spec->type & IB_FLOW_SPEC_INNER) {
- headers_c = MLX5_ADDR_OF(fte_match_param, match_c,
- inner_headers);
- headers_v = MLX5_ADDR_OF(fte_match_param, match_v,
- inner_headers);
- match_ipv = MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
- ft_field_support.inner_ip_version);
- } else {
- headers_c = MLX5_ADDR_OF(fte_match_param, match_c,
- outer_headers);
- headers_v = MLX5_ADDR_OF(fte_match_param, match_v,
- outer_headers);
- match_ipv = MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
- ft_field_support.outer_ip_version);
- }
-
- switch (ib_spec->type & ~IB_FLOW_SPEC_INNER) {
- case IB_FLOW_SPEC_ETH:
- if (FIELDS_NOT_SUPPORTED(ib_spec->eth.mask, LAST_ETH_FIELD))
- return -EOPNOTSUPP;
-
- ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
- dmac_47_16),
- ib_spec->eth.mask.dst_mac);
- ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
- dmac_47_16),
- ib_spec->eth.val.dst_mac);
-
- ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
- smac_47_16),
- ib_spec->eth.mask.src_mac);
- ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
- smac_47_16),
- ib_spec->eth.val.src_mac);
-
- if (ib_spec->eth.mask.vlan_tag) {
- MLX5_SET(fte_match_set_lyr_2_4, headers_c,
- cvlan_tag, 1);
- MLX5_SET(fte_match_set_lyr_2_4, headers_v,
- cvlan_tag, 1);
-
- MLX5_SET(fte_match_set_lyr_2_4, headers_c,
- first_vid, ntohs(ib_spec->eth.mask.vlan_tag));
- MLX5_SET(fte_match_set_lyr_2_4, headers_v,
- first_vid, ntohs(ib_spec->eth.val.vlan_tag));
-
- MLX5_SET(fte_match_set_lyr_2_4, headers_c,
- first_cfi,
- ntohs(ib_spec->eth.mask.vlan_tag) >> 12);
- MLX5_SET(fte_match_set_lyr_2_4, headers_v,
- first_cfi,
- ntohs(ib_spec->eth.val.vlan_tag) >> 12);
-
- MLX5_SET(fte_match_set_lyr_2_4, headers_c,
- first_prio,
- ntohs(ib_spec->eth.mask.vlan_tag) >> 13);
- MLX5_SET(fte_match_set_lyr_2_4, headers_v,
- first_prio,
- ntohs(ib_spec->eth.val.vlan_tag) >> 13);
- }
- MLX5_SET(fte_match_set_lyr_2_4, headers_c,
- ethertype, ntohs(ib_spec->eth.mask.ether_type));
- MLX5_SET(fte_match_set_lyr_2_4, headers_v,
- ethertype, ntohs(ib_spec->eth.val.ether_type));
- break;
- case IB_FLOW_SPEC_IPV4:
- if (FIELDS_NOT_SUPPORTED(ib_spec->ipv4.mask, LAST_IPV4_FIELD))
- return -EOPNOTSUPP;
-
- if (match_ipv) {
- MLX5_SET(fte_match_set_lyr_2_4, headers_c,
- ip_version, 0xf);
- MLX5_SET(fte_match_set_lyr_2_4, headers_v,
- ip_version, MLX5_FS_IPV4_VERSION);
- } else {
- MLX5_SET(fte_match_set_lyr_2_4, headers_c,
- ethertype, 0xffff);
- MLX5_SET(fte_match_set_lyr_2_4, headers_v,
- ethertype, ETH_P_IP);
- }
-
- memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
- src_ipv4_src_ipv6.ipv4_layout.ipv4),
- &ib_spec->ipv4.mask.src_ip,
- sizeof(ib_spec->ipv4.mask.src_ip));
- memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
- src_ipv4_src_ipv6.ipv4_layout.ipv4),
- &ib_spec->ipv4.val.src_ip,
- sizeof(ib_spec->ipv4.val.src_ip));
- memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
- dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
- &ib_spec->ipv4.mask.dst_ip,
- sizeof(ib_spec->ipv4.mask.dst_ip));
- memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
- dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
- &ib_spec->ipv4.val.dst_ip,
- sizeof(ib_spec->ipv4.val.dst_ip));
-
- set_tos(headers_c, headers_v,
- ib_spec->ipv4.mask.tos, ib_spec->ipv4.val.tos);
-
- if (set_proto(headers_c, headers_v,
- ib_spec->ipv4.mask.proto,
- ib_spec->ipv4.val.proto))
- return -EINVAL;
- break;
- case IB_FLOW_SPEC_IPV6:
- if (FIELDS_NOT_SUPPORTED(ib_spec->ipv6.mask, LAST_IPV6_FIELD))
- return -EOPNOTSUPP;
-
- if (match_ipv) {
- MLX5_SET(fte_match_set_lyr_2_4, headers_c,
- ip_version, 0xf);
- MLX5_SET(fte_match_set_lyr_2_4, headers_v,
- ip_version, MLX5_FS_IPV6_VERSION);
- } else {
- MLX5_SET(fte_match_set_lyr_2_4, headers_c,
- ethertype, 0xffff);
- MLX5_SET(fte_match_set_lyr_2_4, headers_v,
- ethertype, ETH_P_IPV6);
- }
-
- memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
- src_ipv4_src_ipv6.ipv6_layout.ipv6),
- &ib_spec->ipv6.mask.src_ip,
- sizeof(ib_spec->ipv6.mask.src_ip));
- memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
- src_ipv4_src_ipv6.ipv6_layout.ipv6),
- &ib_spec->ipv6.val.src_ip,
- sizeof(ib_spec->ipv6.val.src_ip));
- memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
- dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
- &ib_spec->ipv6.mask.dst_ip,
- sizeof(ib_spec->ipv6.mask.dst_ip));
- memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
- dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
- &ib_spec->ipv6.val.dst_ip,
- sizeof(ib_spec->ipv6.val.dst_ip));
-
- set_tos(headers_c, headers_v,
- ib_spec->ipv6.mask.traffic_class,
- ib_spec->ipv6.val.traffic_class);
-
- if (set_proto(headers_c, headers_v,
- ib_spec->ipv6.mask.next_hdr,
- ib_spec->ipv6.val.next_hdr))
- return -EINVAL;
-
- set_flow_label(misc_params_c, misc_params_v,
- ntohl(ib_spec->ipv6.mask.flow_label),
- ntohl(ib_spec->ipv6.val.flow_label),
- ib_spec->type & IB_FLOW_SPEC_INNER);
- break;
- case IB_FLOW_SPEC_ESP:
- if (ib_spec->esp.mask.seq)
- return -EOPNOTSUPP;
-
- MLX5_SET(fte_match_set_misc, misc_params_c, outer_esp_spi,
- ntohl(ib_spec->esp.mask.spi));
- MLX5_SET(fte_match_set_misc, misc_params_v, outer_esp_spi,
- ntohl(ib_spec->esp.val.spi));
- break;
- case IB_FLOW_SPEC_TCP:
- if (FIELDS_NOT_SUPPORTED(ib_spec->tcp_udp.mask,
- LAST_TCP_UDP_FIELD))
- return -EOPNOTSUPP;
-
- if (set_proto(headers_c, headers_v, 0xff, IPPROTO_TCP))
- return -EINVAL;
-
- MLX5_SET(fte_match_set_lyr_2_4, headers_c, tcp_sport,
- ntohs(ib_spec->tcp_udp.mask.src_port));
- MLX5_SET(fte_match_set_lyr_2_4, headers_v, tcp_sport,
- ntohs(ib_spec->tcp_udp.val.src_port));
-
- MLX5_SET(fte_match_set_lyr_2_4, headers_c, tcp_dport,
- ntohs(ib_spec->tcp_udp.mask.dst_port));
- MLX5_SET(fte_match_set_lyr_2_4, headers_v, tcp_dport,
- ntohs(ib_spec->tcp_udp.val.dst_port));
- break;
- case IB_FLOW_SPEC_UDP:
- if (FIELDS_NOT_SUPPORTED(ib_spec->tcp_udp.mask,
- LAST_TCP_UDP_FIELD))
- return -EOPNOTSUPP;
-
- if (set_proto(headers_c, headers_v, 0xff, IPPROTO_UDP))
- return -EINVAL;
-
- MLX5_SET(fte_match_set_lyr_2_4, headers_c, udp_sport,
- ntohs(ib_spec->tcp_udp.mask.src_port));
- MLX5_SET(fte_match_set_lyr_2_4, headers_v, udp_sport,
- ntohs(ib_spec->tcp_udp.val.src_port));
-
- MLX5_SET(fte_match_set_lyr_2_4, headers_c, udp_dport,
- ntohs(ib_spec->tcp_udp.mask.dst_port));
- MLX5_SET(fte_match_set_lyr_2_4, headers_v, udp_dport,
- ntohs(ib_spec->tcp_udp.val.dst_port));
- break;
- case IB_FLOW_SPEC_GRE:
- if (ib_spec->gre.mask.c_ks_res0_ver)
- return -EOPNOTSUPP;
-
- if (set_proto(headers_c, headers_v, 0xff, IPPROTO_GRE))
- return -EINVAL;
-
- MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_protocol,
- 0xff);
- MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol,
- IPPROTO_GRE);
-
- MLX5_SET(fte_match_set_misc, misc_params_c, gre_protocol,
- ntohs(ib_spec->gre.mask.protocol));
- MLX5_SET(fte_match_set_misc, misc_params_v, gre_protocol,
- ntohs(ib_spec->gre.val.protocol));
-
- memcpy(MLX5_ADDR_OF(fte_match_set_misc, misc_params_c,
- gre_key.nvgre.hi),
- &ib_spec->gre.mask.key,
- sizeof(ib_spec->gre.mask.key));
- memcpy(MLX5_ADDR_OF(fte_match_set_misc, misc_params_v,
- gre_key.nvgre.hi),
- &ib_spec->gre.val.key,
- sizeof(ib_spec->gre.val.key));
- break;
- case IB_FLOW_SPEC_MPLS:
- switch (prev_type) {
- case IB_FLOW_SPEC_UDP:
- if (check_mpls_supp_fields(MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
- ft_field_support.outer_first_mpls_over_udp),
- &ib_spec->mpls.mask.tag))
- return -EOPNOTSUPP;
-
- memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_v,
- outer_first_mpls_over_udp),
- &ib_spec->mpls.val.tag,
- sizeof(ib_spec->mpls.val.tag));
- memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_c,
- outer_first_mpls_over_udp),
- &ib_spec->mpls.mask.tag,
- sizeof(ib_spec->mpls.mask.tag));
- break;
- case IB_FLOW_SPEC_GRE:
- if (check_mpls_supp_fields(MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
- ft_field_support.outer_first_mpls_over_gre),
- &ib_spec->mpls.mask.tag))
- return -EOPNOTSUPP;
-
- memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_v,
- outer_first_mpls_over_gre),
- &ib_spec->mpls.val.tag,
- sizeof(ib_spec->mpls.val.tag));
- memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_c,
- outer_first_mpls_over_gre),
- &ib_spec->mpls.mask.tag,
- sizeof(ib_spec->mpls.mask.tag));
- break;
- default:
- if (ib_spec->type & IB_FLOW_SPEC_INNER) {
- if (check_mpls_supp_fields(MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
- ft_field_support.inner_first_mpls),
- &ib_spec->mpls.mask.tag))
- return -EOPNOTSUPP;
-
- memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_v,
- inner_first_mpls),
- &ib_spec->mpls.val.tag,
- sizeof(ib_spec->mpls.val.tag));
- memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_c,
- inner_first_mpls),
- &ib_spec->mpls.mask.tag,
- sizeof(ib_spec->mpls.mask.tag));
- } else {
- if (check_mpls_supp_fields(MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
- ft_field_support.outer_first_mpls),
- &ib_spec->mpls.mask.tag))
- return -EOPNOTSUPP;
-
- memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_v,
- outer_first_mpls),
- &ib_spec->mpls.val.tag,
- sizeof(ib_spec->mpls.val.tag));
- memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_c,
- outer_first_mpls),
- &ib_spec->mpls.mask.tag,
- sizeof(ib_spec->mpls.mask.tag));
- }
- }
- break;
- case IB_FLOW_SPEC_VXLAN_TUNNEL:
- if (FIELDS_NOT_SUPPORTED(ib_spec->tunnel.mask,
- LAST_TUNNEL_FIELD))
- return -EOPNOTSUPP;
-
- MLX5_SET(fte_match_set_misc, misc_params_c, vxlan_vni,
- ntohl(ib_spec->tunnel.mask.tunnel_id));
- MLX5_SET(fte_match_set_misc, misc_params_v, vxlan_vni,
- ntohl(ib_spec->tunnel.val.tunnel_id));
- break;
- case IB_FLOW_SPEC_ACTION_TAG:
- if (FIELDS_NOT_SUPPORTED(ib_spec->flow_tag,
- LAST_FLOW_TAG_FIELD))
- return -EOPNOTSUPP;
- if (ib_spec->flow_tag.tag_id >= BIT(24))
- return -EINVAL;
-
- flow_context->flow_tag = ib_spec->flow_tag.tag_id;
- flow_context->flags |= FLOW_CONTEXT_HAS_TAG;
- break;
- case IB_FLOW_SPEC_ACTION_DROP:
- if (FIELDS_NOT_SUPPORTED(ib_spec->drop,
- LAST_DROP_FIELD))
- return -EOPNOTSUPP;
- action->action |= MLX5_FLOW_CONTEXT_ACTION_DROP;
- break;
- case IB_FLOW_SPEC_ACTION_HANDLE:
- ret = parse_flow_flow_action(to_mflow_act(ib_spec->action.act),
- flow_attr->flags & IB_FLOW_ATTR_FLAGS_EGRESS, action);
- if (ret)
- return ret;
- break;
- case IB_FLOW_SPEC_ACTION_COUNT:
- if (FIELDS_NOT_SUPPORTED(ib_spec->flow_count,
- LAST_COUNTERS_FIELD))
- return -EOPNOTSUPP;
-
- /* for now support only one counters spec per flow */
- if (action->action & MLX5_FLOW_CONTEXT_ACTION_COUNT)
- return -EINVAL;
-
- action->counters = ib_spec->flow_count.counters;
- action->action |= MLX5_FLOW_CONTEXT_ACTION_COUNT;
- break;
- default:
- return -EINVAL;
- }
-
- return 0;
-}
-
-/* If a flow could catch both multicast and unicast packets,
- * it won't fall into the multicast flow steering table and this rule
- * could steal other multicast packets.
- */
-static bool flow_is_multicast_only(const struct ib_flow_attr *ib_attr)
-{
- union ib_flow_spec *flow_spec;
-
- if (ib_attr->type != IB_FLOW_ATTR_NORMAL ||
- ib_attr->num_of_specs < 1)
- return false;
-
- flow_spec = (union ib_flow_spec *)(ib_attr + 1);
- if (flow_spec->type == IB_FLOW_SPEC_IPV4) {
- struct ib_flow_spec_ipv4 *ipv4_spec;
-
- ipv4_spec = (struct ib_flow_spec_ipv4 *)flow_spec;
- if (ipv4_is_multicast(ipv4_spec->val.dst_ip))
- return true;
-
- return false;
- }
-
- if (flow_spec->type == IB_FLOW_SPEC_ETH) {
- struct ib_flow_spec_eth *eth_spec;
-
- eth_spec = (struct ib_flow_spec_eth *)flow_spec;
- return is_multicast_ether_addr(eth_spec->mask.dst_mac) &&
- is_multicast_ether_addr(eth_spec->val.dst_mac);
- }
-
- return false;
-}
-
-enum valid_spec {
- VALID_SPEC_INVALID,
- VALID_SPEC_VALID,
- VALID_SPEC_NA,
-};
-
-static enum valid_spec
-is_valid_esp_aes_gcm(struct mlx5_core_dev *mdev,
- const struct mlx5_flow_spec *spec,
- const struct mlx5_flow_act *flow_act,
- bool egress)
-{
- const u32 *match_c = spec->match_criteria;
- bool is_crypto =
- (flow_act->action & (MLX5_FLOW_CONTEXT_ACTION_ENCRYPT |
- MLX5_FLOW_CONTEXT_ACTION_DECRYPT));
- bool is_ipsec = mlx5_fs_is_ipsec_flow(match_c);
- bool is_drop = flow_act->action & MLX5_FLOW_CONTEXT_ACTION_DROP;
-
- /*
- * Currently only crypto is supported in egress, when regular egress
- * rules would be supported, always return VALID_SPEC_NA.
- */
- if (!is_crypto)
- return VALID_SPEC_NA;
-
- return is_crypto && is_ipsec &&
- (!egress || (!is_drop &&
- !(spec->flow_context.flags & FLOW_CONTEXT_HAS_TAG))) ?
- VALID_SPEC_VALID : VALID_SPEC_INVALID;
-}
-
-static bool is_valid_spec(struct mlx5_core_dev *mdev,
- const struct mlx5_flow_spec *spec,
- const struct mlx5_flow_act *flow_act,
- bool egress)
-{
- /* We curretly only support ipsec egress flow */
- return is_valid_esp_aes_gcm(mdev, spec, flow_act, egress) != VALID_SPEC_INVALID;
-}
-
-static bool is_valid_ethertype(struct mlx5_core_dev *mdev,
- const struct ib_flow_attr *flow_attr,
- bool check_inner)
-{
- union ib_flow_spec *ib_spec = (union ib_flow_spec *)(flow_attr + 1);
- int match_ipv = check_inner ?
- MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
- ft_field_support.inner_ip_version) :
- MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
- ft_field_support.outer_ip_version);
- int inner_bit = check_inner ? IB_FLOW_SPEC_INNER : 0;
- bool ipv4_spec_valid, ipv6_spec_valid;
- unsigned int ip_spec_type = 0;
- bool has_ethertype = false;
- unsigned int spec_index;
- bool mask_valid = true;
- u16 eth_type = 0;
- bool type_valid;
-
- /* Validate that ethertype is correct */
- for (spec_index = 0; spec_index < flow_attr->num_of_specs; spec_index++) {
- if ((ib_spec->type == (IB_FLOW_SPEC_ETH | inner_bit)) &&
- ib_spec->eth.mask.ether_type) {
- mask_valid = (ib_spec->eth.mask.ether_type ==
- htons(0xffff));
- has_ethertype = true;
- eth_type = ntohs(ib_spec->eth.val.ether_type);
- } else if ((ib_spec->type == (IB_FLOW_SPEC_IPV4 | inner_bit)) ||
- (ib_spec->type == (IB_FLOW_SPEC_IPV6 | inner_bit))) {
- ip_spec_type = ib_spec->type;
- }
- ib_spec = (void *)ib_spec + ib_spec->size;
- }
-
- type_valid = (!has_ethertype) || (!ip_spec_type);
- if (!type_valid && mask_valid) {
- ipv4_spec_valid = (eth_type == ETH_P_IP) &&
- (ip_spec_type == (IB_FLOW_SPEC_IPV4 | inner_bit));
- ipv6_spec_valid = (eth_type == ETH_P_IPV6) &&
- (ip_spec_type == (IB_FLOW_SPEC_IPV6 | inner_bit));
-
- type_valid = (ipv4_spec_valid) || (ipv6_spec_valid) ||
- (((eth_type == ETH_P_MPLS_UC) ||
- (eth_type == ETH_P_MPLS_MC)) && match_ipv);
- }
-
- return type_valid;
-}
-
-static bool is_valid_attr(struct mlx5_core_dev *mdev,
- const struct ib_flow_attr *flow_attr)
-{
- return is_valid_ethertype(mdev, flow_attr, false) &&
- is_valid_ethertype(mdev, flow_attr, true);
-}
-
-static void put_flow_table(struct mlx5_ib_dev *dev,
- struct mlx5_ib_flow_prio *prio, bool ft_added)
-{
- prio->refcount -= !!ft_added;
- if (!prio->refcount) {
- mlx5_destroy_flow_table(prio->flow_table);
- prio->flow_table = NULL;
- }
-}
-
-static void counters_clear_description(struct ib_counters *counters)
-{
- struct mlx5_ib_mcounters *mcounters = to_mcounters(counters);
-
- mutex_lock(&mcounters->mcntrs_mutex);
- kfree(mcounters->counters_data);
- mcounters->counters_data = NULL;
- mcounters->cntrs_max_index = 0;
- mutex_unlock(&mcounters->mcntrs_mutex);
-}
-
-static int mlx5_ib_destroy_flow(struct ib_flow *flow_id)
-{
- struct mlx5_ib_flow_handler *handler = container_of(flow_id,
- struct mlx5_ib_flow_handler,
- ibflow);
- struct mlx5_ib_flow_handler *iter, *tmp;
- struct mlx5_ib_dev *dev = handler->dev;
-
- mutex_lock(&dev->flow_db->lock);
-
- list_for_each_entry_safe(iter, tmp, &handler->list, list) {
- mlx5_del_flow_rules(iter->rule);
- put_flow_table(dev, iter->prio, true);
- list_del(&iter->list);
- kfree(iter);
- }
-
- mlx5_del_flow_rules(handler->rule);
- put_flow_table(dev, handler->prio, true);
- if (handler->ibcounters &&
- atomic_read(&handler->ibcounters->usecnt) == 1)
- counters_clear_description(handler->ibcounters);
-
- mutex_unlock(&dev->flow_db->lock);
- if (handler->flow_matcher)
- atomic_dec(&handler->flow_matcher->usecnt);
- kfree(handler);
-
- return 0;
-}
-
-static int ib_prio_to_core_prio(unsigned int priority, bool dont_trap)
-{
- priority *= 2;
- if (!dont_trap)
- priority++;
- return priority;
-}
-
-enum flow_table_type {
- MLX5_IB_FT_RX,
- MLX5_IB_FT_TX
-};
-
-#define MLX5_FS_MAX_TYPES 6
-#define MLX5_FS_MAX_ENTRIES BIT(16)
-
-static struct mlx5_ib_flow_prio *_get_prio(struct mlx5_flow_namespace *ns,
- struct mlx5_ib_flow_prio *prio,
- int priority,
- int num_entries, int num_groups,
- u32 flags)
-{
- struct mlx5_flow_table_attr ft_attr = {};
- struct mlx5_flow_table *ft;
-
- ft_attr.prio = priority;
- ft_attr.max_fte = num_entries;
- ft_attr.flags = flags;
- ft_attr.autogroup.max_num_groups = num_groups;
- ft = mlx5_create_auto_grouped_flow_table(ns, &ft_attr);
- if (IS_ERR(ft))
- return ERR_CAST(ft);
-
- prio->flow_table = ft;
- prio->refcount = 0;
- return prio;
-}
-
-static struct mlx5_ib_flow_prio *get_flow_table(struct mlx5_ib_dev *dev,
- struct ib_flow_attr *flow_attr,
- enum flow_table_type ft_type)
-{
- bool dont_trap = flow_attr->flags & IB_FLOW_ATTR_FLAGS_DONT_TRAP;
- struct mlx5_flow_namespace *ns = NULL;
- struct mlx5_ib_flow_prio *prio;
- struct mlx5_flow_table *ft;
- int max_table_size;
- int num_entries;
- int num_groups;
- bool esw_encap;
- u32 flags = 0;
- int priority;
-
- max_table_size = BIT(MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev,
- log_max_ft_size));
- esw_encap = mlx5_eswitch_get_encap_mode(dev->mdev) !=
- DEVLINK_ESWITCH_ENCAP_MODE_NONE;
- if (flow_attr->type == IB_FLOW_ATTR_NORMAL) {
- enum mlx5_flow_namespace_type fn_type;
-
- if (flow_is_multicast_only(flow_attr) &&
- !dont_trap)
- priority = MLX5_IB_FLOW_MCAST_PRIO;
- else
- priority = ib_prio_to_core_prio(flow_attr->priority,
- dont_trap);
- if (ft_type == MLX5_IB_FT_RX) {
- fn_type = MLX5_FLOW_NAMESPACE_BYPASS;
- prio = &dev->flow_db->prios[priority];
- if (!dev->is_rep && !esw_encap &&
- MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, decap))
- flags |= MLX5_FLOW_TABLE_TUNNEL_EN_DECAP;
- if (!dev->is_rep && !esw_encap &&
- MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev,
- reformat_l3_tunnel_to_l2))
- flags |= MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT;
- } else {
- max_table_size =
- BIT(MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev,
- log_max_ft_size));
- fn_type = MLX5_FLOW_NAMESPACE_EGRESS;
- prio = &dev->flow_db->egress_prios[priority];
- if (!dev->is_rep && !esw_encap &&
- MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev, reformat))
- flags |= MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT;
- }
- ns = mlx5_get_flow_namespace(dev->mdev, fn_type);
- num_entries = MLX5_FS_MAX_ENTRIES;
- num_groups = MLX5_FS_MAX_TYPES;
- } else if (flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT ||
- flow_attr->type == IB_FLOW_ATTR_MC_DEFAULT) {
- ns = mlx5_get_flow_namespace(dev->mdev,
- MLX5_FLOW_NAMESPACE_LEFTOVERS);
- build_leftovers_ft_param(&priority,
- &num_entries,
- &num_groups);
- prio = &dev->flow_db->prios[MLX5_IB_FLOW_LEFTOVERS_PRIO];
- } else if (flow_attr->type == IB_FLOW_ATTR_SNIFFER) {
- if (!MLX5_CAP_FLOWTABLE(dev->mdev,
- allow_sniffer_and_nic_rx_shared_tir))
- return ERR_PTR(-ENOTSUPP);
-
- ns = mlx5_get_flow_namespace(dev->mdev, ft_type == MLX5_IB_FT_RX ?
- MLX5_FLOW_NAMESPACE_SNIFFER_RX :
- MLX5_FLOW_NAMESPACE_SNIFFER_TX);
-
- prio = &dev->flow_db->sniffer[ft_type];
- priority = 0;
- num_entries = 1;
- num_groups = 1;
- }
-
- if (!ns)
- return ERR_PTR(-ENOTSUPP);
-
- max_table_size = min_t(int, num_entries, max_table_size);
-
- ft = prio->flow_table;
- if (!ft)
- return _get_prio(ns, prio, priority, max_table_size, num_groups,
- flags);
-
- return prio;
-}
-
-static void set_underlay_qp(struct mlx5_ib_dev *dev,
- struct mlx5_flow_spec *spec,
- u32 underlay_qpn)
-{
- void *misc_params_c = MLX5_ADDR_OF(fte_match_param,
- spec->match_criteria,
- misc_parameters);
- void *misc_params_v = MLX5_ADDR_OF(fte_match_param, spec->match_value,
- misc_parameters);
-
- if (underlay_qpn &&
- MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev,
- ft_field_support.bth_dst_qp)) {
- MLX5_SET(fte_match_set_misc,
- misc_params_v, bth_dst_qp, underlay_qpn);
- MLX5_SET(fte_match_set_misc,
- misc_params_c, bth_dst_qp, 0xffffff);
- }
-}
-
-static int read_flow_counters(struct ib_device *ibdev,
- struct mlx5_read_counters_attr *read_attr)
-{
- struct mlx5_fc *fc = read_attr->hw_cntrs_hndl;
- struct mlx5_ib_dev *dev = to_mdev(ibdev);
-
- return mlx5_fc_query(dev->mdev, fc,
- &read_attr->out[IB_COUNTER_PACKETS],
- &read_attr->out[IB_COUNTER_BYTES]);
-}
-
-/* flow counters currently expose two counters packets and bytes */
-#define FLOW_COUNTERS_NUM 2
-static int counters_set_description(struct ib_counters *counters,
- enum mlx5_ib_counters_type counters_type,
- struct mlx5_ib_flow_counters_desc *desc_data,
- u32 ncounters)
-{
- struct mlx5_ib_mcounters *mcounters = to_mcounters(counters);
- u32 cntrs_max_index = 0;
- int i;
-
- if (counters_type != MLX5_IB_COUNTERS_FLOW)
- return -EINVAL;
-
- /* init the fields for the object */
- mcounters->type = counters_type;
- mcounters->read_counters = read_flow_counters;
- mcounters->counters_num = FLOW_COUNTERS_NUM;
- mcounters->ncounters = ncounters;
- /* each counter entry have both description and index pair */
- for (i = 0; i < ncounters; i++) {
- if (desc_data[i].description > IB_COUNTER_BYTES)
- return -EINVAL;
-
- if (cntrs_max_index <= desc_data[i].index)
- cntrs_max_index = desc_data[i].index + 1;
- }
-
- mutex_lock(&mcounters->mcntrs_mutex);
- mcounters->counters_data = desc_data;
- mcounters->cntrs_max_index = cntrs_max_index;
- mutex_unlock(&mcounters->mcntrs_mutex);
-
- return 0;
-}
-
-#define MAX_COUNTERS_NUM (USHRT_MAX / (sizeof(u32) * 2))
-static int flow_counters_set_data(struct ib_counters *ibcounters,
- struct mlx5_ib_create_flow *ucmd)
-{
- struct mlx5_ib_mcounters *mcounters = to_mcounters(ibcounters);
- struct mlx5_ib_flow_counters_data *cntrs_data = NULL;
- struct mlx5_ib_flow_counters_desc *desc_data = NULL;
- bool hw_hndl = false;
- int ret = 0;
-
- if (ucmd && ucmd->ncounters_data != 0) {
- cntrs_data = ucmd->data;
- if (cntrs_data->ncounters > MAX_COUNTERS_NUM)
- return -EINVAL;
-
- desc_data = kcalloc(cntrs_data->ncounters,
- sizeof(*desc_data),
- GFP_KERNEL);
- if (!desc_data)
- return -ENOMEM;
-
- if (copy_from_user(desc_data,
- u64_to_user_ptr(cntrs_data->counters_data),
- sizeof(*desc_data) * cntrs_data->ncounters)) {
- ret = -EFAULT;
- goto free;
- }
- }
-
- if (!mcounters->hw_cntrs_hndl) {
- mcounters->hw_cntrs_hndl = mlx5_fc_create(
- to_mdev(ibcounters->device)->mdev, false);
- if (IS_ERR(mcounters->hw_cntrs_hndl)) {
- ret = PTR_ERR(mcounters->hw_cntrs_hndl);
- goto free;
- }
- hw_hndl = true;
- }
-
- if (desc_data) {
- /* counters already bound to at least one flow */
- if (mcounters->cntrs_max_index) {
- ret = -EINVAL;
- goto free_hndl;
- }
-
- ret = counters_set_description(ibcounters,
- MLX5_IB_COUNTERS_FLOW,
- desc_data,
- cntrs_data->ncounters);
- if (ret)
- goto free_hndl;
-
- } else if (!mcounters->cntrs_max_index) {
- /* counters not bound yet, must have udata passed */
- ret = -EINVAL;
- goto free_hndl;
- }
-
- return 0;
-
-free_hndl:
- if (hw_hndl) {
- mlx5_fc_destroy(to_mdev(ibcounters->device)->mdev,
- mcounters->hw_cntrs_hndl);
- mcounters->hw_cntrs_hndl = NULL;
- }
-free:
- kfree(desc_data);
- return ret;
-}
-
-static void mlx5_ib_set_rule_source_port(struct mlx5_ib_dev *dev,
- struct mlx5_flow_spec *spec,
- struct mlx5_eswitch_rep *rep)
-{
- struct mlx5_eswitch *esw = dev->mdev->priv.eswitch;
- void *misc;
-
- if (mlx5_eswitch_vport_match_metadata_enabled(esw)) {
- misc = MLX5_ADDR_OF(fte_match_param, spec->match_value,
- misc_parameters_2);
-
- MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_0,
- mlx5_eswitch_get_vport_metadata_for_match(esw,
- rep->vport));
- misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
- misc_parameters_2);
-
- MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_0,
- mlx5_eswitch_get_vport_metadata_mask());
- } else {
- misc = MLX5_ADDR_OF(fte_match_param, spec->match_value,
- misc_parameters);
-
- MLX5_SET(fte_match_set_misc, misc, source_port, rep->vport);
-
- misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
- misc_parameters);
-
- MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port);
- }
-}
-
-static struct mlx5_ib_flow_handler *_create_flow_rule(struct mlx5_ib_dev *dev,
- struct mlx5_ib_flow_prio *ft_prio,
- const struct ib_flow_attr *flow_attr,
- struct mlx5_flow_destination *dst,
- u32 underlay_qpn,
- struct mlx5_ib_create_flow *ucmd)
-{
- struct mlx5_flow_table *ft = ft_prio->flow_table;
- struct mlx5_ib_flow_handler *handler;
- struct mlx5_flow_act flow_act = {};
- struct mlx5_flow_spec *spec;
- struct mlx5_flow_destination dest_arr[2] = {};
- struct mlx5_flow_destination *rule_dst = dest_arr;
- const void *ib_flow = (const void *)flow_attr + sizeof(*flow_attr);
- unsigned int spec_index;
- u32 prev_type = 0;
- int err = 0;
- int dest_num = 0;
- bool is_egress = flow_attr->flags & IB_FLOW_ATTR_FLAGS_EGRESS;
-
- if (!is_valid_attr(dev->mdev, flow_attr))
- return ERR_PTR(-EINVAL);
-
- if (dev->is_rep && is_egress)
- return ERR_PTR(-EINVAL);
-
- spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
- handler = kzalloc(sizeof(*handler), GFP_KERNEL);
- if (!handler || !spec) {
- err = -ENOMEM;
- goto free;
- }
-
- INIT_LIST_HEAD(&handler->list);
-
- for (spec_index = 0; spec_index < flow_attr->num_of_specs; spec_index++) {
- err = parse_flow_attr(dev->mdev, spec,
- ib_flow, flow_attr, &flow_act,
- prev_type);
- if (err < 0)
- goto free;
-
- prev_type = ((union ib_flow_spec *)ib_flow)->type;
- ib_flow += ((union ib_flow_spec *)ib_flow)->size;
- }
-
- if (dst && !(flow_act.action & MLX5_FLOW_CONTEXT_ACTION_DROP)) {
- memcpy(&dest_arr[0], dst, sizeof(*dst));
- dest_num++;
- }
-
- if (!flow_is_multicast_only(flow_attr))
- set_underlay_qp(dev, spec, underlay_qpn);
-
- if (dev->is_rep) {
- struct mlx5_eswitch_rep *rep;
-
- rep = dev->port[flow_attr->port - 1].rep;
- if (!rep) {
- err = -EINVAL;
- goto free;
- }
-
- mlx5_ib_set_rule_source_port(dev, spec, rep);
- }
-
- spec->match_criteria_enable = get_match_criteria_enable(spec->match_criteria);
-
- if (is_egress &&
- !is_valid_spec(dev->mdev, spec, &flow_act, is_egress)) {
- err = -EINVAL;
- goto free;
- }
-
- if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_COUNT) {
- struct mlx5_ib_mcounters *mcounters;
-
- err = flow_counters_set_data(flow_act.counters, ucmd);
- if (err)
- goto free;
-
- mcounters = to_mcounters(flow_act.counters);
- handler->ibcounters = flow_act.counters;
- dest_arr[dest_num].type =
- MLX5_FLOW_DESTINATION_TYPE_COUNTER;
- dest_arr[dest_num].counter_id =
- mlx5_fc_id(mcounters->hw_cntrs_hndl);
- dest_num++;
- }
-
- if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_DROP) {
- if (!dest_num)
- rule_dst = NULL;
- } else {
- if (flow_attr->flags & IB_FLOW_ATTR_FLAGS_DONT_TRAP)
- flow_act.action |=
- MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO;
- if (is_egress)
- flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_ALLOW;
- else if (dest_num)
- flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
- }
-
- if ((spec->flow_context.flags & FLOW_CONTEXT_HAS_TAG) &&
- (flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT ||
- flow_attr->type == IB_FLOW_ATTR_MC_DEFAULT)) {
- mlx5_ib_warn(dev, "Flow tag %u and attribute type %x isn't allowed in leftovers\n",
- spec->flow_context.flow_tag, flow_attr->type);
- err = -EINVAL;
- goto free;
- }
- handler->rule = mlx5_add_flow_rules(ft, spec,
- &flow_act,
- rule_dst, dest_num);
-
- if (IS_ERR(handler->rule)) {
- err = PTR_ERR(handler->rule);
- goto free;
- }
-
- ft_prio->refcount++;
- handler->prio = ft_prio;
- handler->dev = dev;
-
- ft_prio->flow_table = ft;
-free:
- if (err && handler) {
- if (handler->ibcounters &&
- atomic_read(&handler->ibcounters->usecnt) == 1)
- counters_clear_description(handler->ibcounters);
- kfree(handler);
- }
- kvfree(spec);
- return err ? ERR_PTR(err) : handler;
-}
-
-static struct mlx5_ib_flow_handler *create_flow_rule(struct mlx5_ib_dev *dev,
- struct mlx5_ib_flow_prio *ft_prio,
- const struct ib_flow_attr *flow_attr,
- struct mlx5_flow_destination *dst)
-{
- return _create_flow_rule(dev, ft_prio, flow_attr, dst, 0, NULL);
-}
-
-enum {
- LEFTOVERS_MC,
- LEFTOVERS_UC,
-};
-
-static struct mlx5_ib_flow_handler *create_leftovers_rule(struct mlx5_ib_dev *dev,
- struct mlx5_ib_flow_prio *ft_prio,
- struct ib_flow_attr *flow_attr,
- struct mlx5_flow_destination *dst)
-{
- struct mlx5_ib_flow_handler *handler_ucast = NULL;
- struct mlx5_ib_flow_handler *handler = NULL;
-
- static struct {
- struct ib_flow_attr flow_attr;
- struct ib_flow_spec_eth eth_flow;
- } leftovers_specs[] = {
- [LEFTOVERS_MC] = {
- .flow_attr = {
- .num_of_specs = 1,
- .size = sizeof(leftovers_specs[0])
- },
- .eth_flow = {
- .type = IB_FLOW_SPEC_ETH,
- .size = sizeof(struct ib_flow_spec_eth),
- .mask = {.dst_mac = {0x1} },
- .val = {.dst_mac = {0x1} }
- }
- },
- [LEFTOVERS_UC] = {
- .flow_attr = {
- .num_of_specs = 1,
- .size = sizeof(leftovers_specs[0])
- },
- .eth_flow = {
- .type = IB_FLOW_SPEC_ETH,
- .size = sizeof(struct ib_flow_spec_eth),
- .mask = {.dst_mac = {0x1} },
- .val = {.dst_mac = {} }
- }
- }
- };
-
- handler = create_flow_rule(dev, ft_prio,
- &leftovers_specs[LEFTOVERS_MC].flow_attr,
- dst);
- if (!IS_ERR(handler) &&
- flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT) {
- handler_ucast = create_flow_rule(dev, ft_prio,
- &leftovers_specs[LEFTOVERS_UC].flow_attr,
- dst);
- if (IS_ERR(handler_ucast)) {
- mlx5_del_flow_rules(handler->rule);
- ft_prio->refcount--;
- kfree(handler);
- handler = handler_ucast;
- } else {
- list_add(&handler_ucast->list, &handler->list);
- }
- }
-
- return handler;
-}
-
-static struct mlx5_ib_flow_handler *create_sniffer_rule(struct mlx5_ib_dev *dev,
- struct mlx5_ib_flow_prio *ft_rx,
- struct mlx5_ib_flow_prio *ft_tx,
- struct mlx5_flow_destination *dst)
-{
- struct mlx5_ib_flow_handler *handler_rx;
- struct mlx5_ib_flow_handler *handler_tx;
- int err;
- static const struct ib_flow_attr flow_attr = {
- .num_of_specs = 0,
- .size = sizeof(flow_attr)
- };
-
- handler_rx = create_flow_rule(dev, ft_rx, &flow_attr, dst);
- if (IS_ERR(handler_rx)) {
- err = PTR_ERR(handler_rx);
- goto err;
- }
-
- handler_tx = create_flow_rule(dev, ft_tx, &flow_attr, dst);
- if (IS_ERR(handler_tx)) {
- err = PTR_ERR(handler_tx);
- goto err_tx;
- }
-
- list_add(&handler_tx->list, &handler_rx->list);
-
- return handler_rx;
-
-err_tx:
- mlx5_del_flow_rules(handler_rx->rule);
- ft_rx->refcount--;
- kfree(handler_rx);
-err:
- return ERR_PTR(err);
-}
-
-static struct ib_flow *mlx5_ib_create_flow(struct ib_qp *qp,
- struct ib_flow_attr *flow_attr,
- int domain,
- struct ib_udata *udata)
-{
- struct mlx5_ib_dev *dev = to_mdev(qp->device);
- struct mlx5_ib_qp *mqp = to_mqp(qp);
- struct mlx5_ib_flow_handler *handler = NULL;
- struct mlx5_flow_destination *dst = NULL;
- struct mlx5_ib_flow_prio *ft_prio_tx = NULL;
- struct mlx5_ib_flow_prio *ft_prio;
- bool is_egress = flow_attr->flags & IB_FLOW_ATTR_FLAGS_EGRESS;
- struct mlx5_ib_create_flow *ucmd = NULL, ucmd_hdr;
- size_t min_ucmd_sz, required_ucmd_sz;
- int err;
- int underlay_qpn;
-
- if (udata && udata->inlen) {
- min_ucmd_sz = offsetof(typeof(ucmd_hdr), reserved) +
- sizeof(ucmd_hdr.reserved);
- if (udata->inlen < min_ucmd_sz)
- return ERR_PTR(-EOPNOTSUPP);
-
- err = ib_copy_from_udata(&ucmd_hdr, udata, min_ucmd_sz);
- if (err)
- return ERR_PTR(err);
-
- /* currently supports only one counters data */
- if (ucmd_hdr.ncounters_data > 1)
- return ERR_PTR(-EINVAL);
-
- required_ucmd_sz = min_ucmd_sz +
- sizeof(struct mlx5_ib_flow_counters_data) *
- ucmd_hdr.ncounters_data;
- if (udata->inlen > required_ucmd_sz &&
- !ib_is_udata_cleared(udata, required_ucmd_sz,
- udata->inlen - required_ucmd_sz))
- return ERR_PTR(-EOPNOTSUPP);
-
- ucmd = kzalloc(required_ucmd_sz, GFP_KERNEL);
- if (!ucmd)
- return ERR_PTR(-ENOMEM);
-
- err = ib_copy_from_udata(ucmd, udata, required_ucmd_sz);
- if (err)
- goto free_ucmd;
- }
-
- if (flow_attr->priority > MLX5_IB_FLOW_LAST_PRIO) {
- err = -ENOMEM;
- goto free_ucmd;
- }
-
- if (domain != IB_FLOW_DOMAIN_USER ||
- flow_attr->port > dev->num_ports ||
- (flow_attr->flags & ~(IB_FLOW_ATTR_FLAGS_DONT_TRAP |
- IB_FLOW_ATTR_FLAGS_EGRESS))) {
- err = -EINVAL;
- goto free_ucmd;
- }
-
- if (is_egress &&
- (flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT ||
- flow_attr->type == IB_FLOW_ATTR_MC_DEFAULT)) {
- err = -EINVAL;
- goto free_ucmd;
- }
-
- dst = kzalloc(sizeof(*dst), GFP_KERNEL);
- if (!dst) {
- err = -ENOMEM;
- goto free_ucmd;
- }
-
- mutex_lock(&dev->flow_db->lock);
-
- ft_prio = get_flow_table(dev, flow_attr,
- is_egress ? MLX5_IB_FT_TX : MLX5_IB_FT_RX);
- if (IS_ERR(ft_prio)) {
- err = PTR_ERR(ft_prio);
- goto unlock;
- }
- if (flow_attr->type == IB_FLOW_ATTR_SNIFFER) {
- ft_prio_tx = get_flow_table(dev, flow_attr, MLX5_IB_FT_TX);
- if (IS_ERR(ft_prio_tx)) {
- err = PTR_ERR(ft_prio_tx);
- ft_prio_tx = NULL;
- goto destroy_ft;
- }
- }
-
- if (is_egress) {
- dst->type = MLX5_FLOW_DESTINATION_TYPE_PORT;
- } else {
- dst->type = MLX5_FLOW_DESTINATION_TYPE_TIR;
- if (mqp->is_rss)
- dst->tir_num = mqp->rss_qp.tirn;
- else
- dst->tir_num = mqp->raw_packet_qp.rq.tirn;
- }
-
- if (flow_attr->type == IB_FLOW_ATTR_NORMAL) {
- underlay_qpn = (mqp->flags & IB_QP_CREATE_SOURCE_QPN) ?
- mqp->underlay_qpn :
- 0;
- handler = _create_flow_rule(dev, ft_prio, flow_attr, dst,
- underlay_qpn, ucmd);
- } else if (flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT ||
- flow_attr->type == IB_FLOW_ATTR_MC_DEFAULT) {
- handler = create_leftovers_rule(dev, ft_prio, flow_attr,
- dst);
- } else if (flow_attr->type == IB_FLOW_ATTR_SNIFFER) {
- handler = create_sniffer_rule(dev, ft_prio, ft_prio_tx, dst);
- } else {
- err = -EINVAL;
- goto destroy_ft;
- }
-
- if (IS_ERR(handler)) {
- err = PTR_ERR(handler);
- handler = NULL;
- goto destroy_ft;
- }
-
- mutex_unlock(&dev->flow_db->lock);
- kfree(dst);
- kfree(ucmd);
-
- return &handler->ibflow;
-
-destroy_ft:
- put_flow_table(dev, ft_prio, false);
- if (ft_prio_tx)
- put_flow_table(dev, ft_prio_tx, false);
-unlock:
- mutex_unlock(&dev->flow_db->lock);
- kfree(dst);
-free_ucmd:
- kfree(ucmd);
- return ERR_PTR(err);
-}
-
-static struct mlx5_ib_flow_prio *
-_get_flow_table(struct mlx5_ib_dev *dev,
- struct mlx5_ib_flow_matcher *fs_matcher,
- bool mcast)
-{
- struct mlx5_flow_namespace *ns = NULL;
- struct mlx5_ib_flow_prio *prio = NULL;
- int max_table_size = 0;
- bool esw_encap;
- u32 flags = 0;
- int priority;
-
- if (mcast)
- priority = MLX5_IB_FLOW_MCAST_PRIO;
- else
- priority = ib_prio_to_core_prio(fs_matcher->priority, false);
-
- esw_encap = mlx5_eswitch_get_encap_mode(dev->mdev) !=
- DEVLINK_ESWITCH_ENCAP_MODE_NONE;
- if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_BYPASS) {
- max_table_size = BIT(MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev,
- log_max_ft_size));
- if (MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, decap) && !esw_encap)
- flags |= MLX5_FLOW_TABLE_TUNNEL_EN_DECAP;
- if (MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev,
- reformat_l3_tunnel_to_l2) &&
- !esw_encap)
- flags |= MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT;
- } else if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_EGRESS) {
- max_table_size = BIT(
- MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev, log_max_ft_size));
- if (MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev, reformat) && !esw_encap)
- flags |= MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT;
- } else if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_FDB) {
- max_table_size = BIT(
- MLX5_CAP_ESW_FLOWTABLE_FDB(dev->mdev, log_max_ft_size));
- if (MLX5_CAP_ESW_FLOWTABLE_FDB(dev->mdev, decap) && esw_encap)
- flags |= MLX5_FLOW_TABLE_TUNNEL_EN_DECAP;
- if (MLX5_CAP_ESW_FLOWTABLE_FDB(dev->mdev, reformat_l3_tunnel_to_l2) &&
- esw_encap)
- flags |= MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT;
- priority = FDB_BYPASS_PATH;
- } else if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_RX) {
- max_table_size =
- BIT(MLX5_CAP_FLOWTABLE_RDMA_RX(dev->mdev,
- log_max_ft_size));
- priority = fs_matcher->priority;
- } else if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_TX) {
- max_table_size =
- BIT(MLX5_CAP_FLOWTABLE_RDMA_TX(dev->mdev,
- log_max_ft_size));
- priority = fs_matcher->priority;
- }
-
- max_table_size = min_t(int, max_table_size, MLX5_FS_MAX_ENTRIES);
-
- ns = mlx5_get_flow_namespace(dev->mdev, fs_matcher->ns_type);
- if (!ns)
- return ERR_PTR(-ENOTSUPP);
-
- if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_BYPASS)
- prio = &dev->flow_db->prios[priority];
- else if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_EGRESS)
- prio = &dev->flow_db->egress_prios[priority];
- else if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_FDB)
- prio = &dev->flow_db->fdb;
- else if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_RX)
- prio = &dev->flow_db->rdma_rx[priority];
- else if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_TX)
- prio = &dev->flow_db->rdma_tx[priority];
-
- if (!prio)
- return ERR_PTR(-EINVAL);
-
- if (prio->flow_table)
- return prio;
-
- return _get_prio(ns, prio, priority, max_table_size,
- MLX5_FS_MAX_TYPES, flags);
-}
-
-static struct mlx5_ib_flow_handler *
-_create_raw_flow_rule(struct mlx5_ib_dev *dev,
- struct mlx5_ib_flow_prio *ft_prio,
- struct mlx5_flow_destination *dst,
- struct mlx5_ib_flow_matcher *fs_matcher,
- struct mlx5_flow_context *flow_context,
- struct mlx5_flow_act *flow_act,
- void *cmd_in, int inlen,
- int dst_num)
-{
- struct mlx5_ib_flow_handler *handler;
- struct mlx5_flow_spec *spec;
- struct mlx5_flow_table *ft = ft_prio->flow_table;
- int err = 0;
-
- spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
- handler = kzalloc(sizeof(*handler), GFP_KERNEL);
- if (!handler || !spec) {
- err = -ENOMEM;
- goto free;
- }
-
- INIT_LIST_HEAD(&handler->list);
-
- memcpy(spec->match_value, cmd_in, inlen);
- memcpy(spec->match_criteria, fs_matcher->matcher_mask.match_params,
- fs_matcher->mask_len);
- spec->match_criteria_enable = fs_matcher->match_criteria_enable;
- spec->flow_context = *flow_context;
-
- handler->rule = mlx5_add_flow_rules(ft, spec,
- flow_act, dst, dst_num);
-
- if (IS_ERR(handler->rule)) {
- err = PTR_ERR(handler->rule);
- goto free;
- }
-
- ft_prio->refcount++;
- handler->prio = ft_prio;
- handler->dev = dev;
- ft_prio->flow_table = ft;
-
-free:
- if (err)
- kfree(handler);
- kvfree(spec);
- return err ? ERR_PTR(err) : handler;
-}
-
-static bool raw_fs_is_multicast(struct mlx5_ib_flow_matcher *fs_matcher,
- void *match_v)
-{
- void *match_c;
- void *match_v_set_lyr_2_4, *match_c_set_lyr_2_4;
- void *dmac, *dmac_mask;
- void *ipv4, *ipv4_mask;
-
- if (!(fs_matcher->match_criteria_enable &
- (1 << MATCH_CRITERIA_ENABLE_OUTER_BIT)))
- return false;
-
- match_c = fs_matcher->matcher_mask.match_params;
- match_v_set_lyr_2_4 = MLX5_ADDR_OF(fte_match_param, match_v,
- outer_headers);
- match_c_set_lyr_2_4 = MLX5_ADDR_OF(fte_match_param, match_c,
- outer_headers);
-
- dmac = MLX5_ADDR_OF(fte_match_set_lyr_2_4, match_v_set_lyr_2_4,
- dmac_47_16);
- dmac_mask = MLX5_ADDR_OF(fte_match_set_lyr_2_4, match_c_set_lyr_2_4,
- dmac_47_16);
-
- if (is_multicast_ether_addr(dmac) &&
- is_multicast_ether_addr(dmac_mask))
- return true;
-
- ipv4 = MLX5_ADDR_OF(fte_match_set_lyr_2_4, match_v_set_lyr_2_4,
- dst_ipv4_dst_ipv6.ipv4_layout.ipv4);
-
- ipv4_mask = MLX5_ADDR_OF(fte_match_set_lyr_2_4, match_c_set_lyr_2_4,
- dst_ipv4_dst_ipv6.ipv4_layout.ipv4);
-
- if (ipv4_is_multicast(*(__be32 *)(ipv4)) &&
- ipv4_is_multicast(*(__be32 *)(ipv4_mask)))
- return true;
-
- return false;
-}
-
-struct mlx5_ib_flow_handler *
-mlx5_ib_raw_fs_rule_add(struct mlx5_ib_dev *dev,
- struct mlx5_ib_flow_matcher *fs_matcher,
- struct mlx5_flow_context *flow_context,
- struct mlx5_flow_act *flow_act,
- u32 counter_id,
- void *cmd_in, int inlen, int dest_id,
- int dest_type)
-{
- struct mlx5_flow_destination *dst;
- struct mlx5_ib_flow_prio *ft_prio;
- struct mlx5_ib_flow_handler *handler;
- int dst_num = 0;
- bool mcast;
- int err;
-
- if (fs_matcher->flow_type != MLX5_IB_FLOW_TYPE_NORMAL)
- return ERR_PTR(-EOPNOTSUPP);
-
- if (fs_matcher->priority > MLX5_IB_FLOW_LAST_PRIO)
- return ERR_PTR(-ENOMEM);
-
- dst = kcalloc(2, sizeof(*dst), GFP_KERNEL);
- if (!dst)
- return ERR_PTR(-ENOMEM);
-
- mcast = raw_fs_is_multicast(fs_matcher, cmd_in);
- mutex_lock(&dev->flow_db->lock);
-
- ft_prio = _get_flow_table(dev, fs_matcher, mcast);
- if (IS_ERR(ft_prio)) {
- err = PTR_ERR(ft_prio);
- goto unlock;
- }
-
- if (dest_type == MLX5_FLOW_DESTINATION_TYPE_TIR) {
- dst[dst_num].type = dest_type;
- dst[dst_num++].tir_num = dest_id;
- flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
- } else if (dest_type == MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE) {
- dst[dst_num].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE_NUM;
- dst[dst_num++].ft_num = dest_id;
- flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
- } else if (dest_type == MLX5_FLOW_DESTINATION_TYPE_PORT) {
- dst[dst_num++].type = MLX5_FLOW_DESTINATION_TYPE_PORT;
- flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_ALLOW;
- }
-
-
- if (flow_act->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) {
- dst[dst_num].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
- dst[dst_num].counter_id = counter_id;
- dst_num++;
- }
-
- handler = _create_raw_flow_rule(dev, ft_prio, dst, fs_matcher,
- flow_context, flow_act,
- cmd_in, inlen, dst_num);
-
- if (IS_ERR(handler)) {
- err = PTR_ERR(handler);
- goto destroy_ft;
- }
-
- mutex_unlock(&dev->flow_db->lock);
- atomic_inc(&fs_matcher->usecnt);
- handler->flow_matcher = fs_matcher;
-
- kfree(dst);
-
- return handler;
-
-destroy_ft:
- put_flow_table(dev, ft_prio, false);
-unlock:
- mutex_unlock(&dev->flow_db->lock);
- kfree(dst);
-
- return ERR_PTR(err);
-}
-
-static u32 mlx5_ib_flow_action_flags_to_accel_xfrm_flags(u32 mlx5_flags)
-{
- u32 flags = 0;
-
- if (mlx5_flags & MLX5_IB_UAPI_FLOW_ACTION_FLAGS_REQUIRE_METADATA)
- flags |= MLX5_ACCEL_XFRM_FLAG_REQUIRE_METADATA;
-
- return flags;
-}
-
-#define MLX5_FLOW_ACTION_ESP_CREATE_LAST_SUPPORTED MLX5_IB_UAPI_FLOW_ACTION_FLAGS_REQUIRE_METADATA
-static struct ib_flow_action *
-mlx5_ib_create_flow_action_esp(struct ib_device *device,
- const struct ib_flow_action_attrs_esp *attr,
- struct uverbs_attr_bundle *attrs)
-{
- struct mlx5_ib_dev *mdev = to_mdev(device);
- struct ib_uverbs_flow_action_esp_keymat_aes_gcm *aes_gcm;
- struct mlx5_accel_esp_xfrm_attrs accel_attrs = {};
- struct mlx5_ib_flow_action *action;
- u64 action_flags;
- u64 flags;
- int err = 0;
-
- err = uverbs_get_flags64(
- &action_flags, attrs, MLX5_IB_ATTR_CREATE_FLOW_ACTION_FLAGS,
- ((MLX5_FLOW_ACTION_ESP_CREATE_LAST_SUPPORTED << 1) - 1));
- if (err)
- return ERR_PTR(err);
-
- flags = mlx5_ib_flow_action_flags_to_accel_xfrm_flags(action_flags);
-
- /* We current only support a subset of the standard features. Only a
- * keymat of type AES_GCM, with icv_len == 16, iv_algo == SEQ and esn
- * (with overlap). Full offload mode isn't supported.
- */
- if (!attr->keymat || attr->replay || attr->encap ||
- attr->spi || attr->seq || attr->tfc_pad ||
- attr->hard_limit_pkts ||
- (attr->flags & ~(IB_FLOW_ACTION_ESP_FLAGS_ESN_TRIGGERED |
- IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ENCRYPT)))
- return ERR_PTR(-EOPNOTSUPP);
-
- if (attr->keymat->protocol !=
- IB_UVERBS_FLOW_ACTION_ESP_KEYMAT_AES_GCM)
- return ERR_PTR(-EOPNOTSUPP);
-
- aes_gcm = &attr->keymat->keymat.aes_gcm;
-
- if (aes_gcm->icv_len != 16 ||
- aes_gcm->iv_algo != IB_UVERBS_FLOW_ACTION_IV_ALGO_SEQ)
- return ERR_PTR(-EOPNOTSUPP);
-
- action = kmalloc(sizeof(*action), GFP_KERNEL);
- if (!action)
- return ERR_PTR(-ENOMEM);
-
- action->esp_aes_gcm.ib_flags = attr->flags;
- memcpy(&accel_attrs.keymat.aes_gcm.aes_key, &aes_gcm->aes_key,
- sizeof(accel_attrs.keymat.aes_gcm.aes_key));
- accel_attrs.keymat.aes_gcm.key_len = aes_gcm->key_len * 8;
- memcpy(&accel_attrs.keymat.aes_gcm.salt, &aes_gcm->salt,
- sizeof(accel_attrs.keymat.aes_gcm.salt));
- memcpy(&accel_attrs.keymat.aes_gcm.seq_iv, &aes_gcm->iv,
- sizeof(accel_attrs.keymat.aes_gcm.seq_iv));
- accel_attrs.keymat.aes_gcm.icv_len = aes_gcm->icv_len * 8;
- accel_attrs.keymat.aes_gcm.iv_algo = MLX5_ACCEL_ESP_AES_GCM_IV_ALGO_SEQ;
- accel_attrs.keymat_type = MLX5_ACCEL_ESP_KEYMAT_AES_GCM;
-
- accel_attrs.esn = attr->esn;
- if (attr->flags & IB_FLOW_ACTION_ESP_FLAGS_ESN_TRIGGERED)
- accel_attrs.flags |= MLX5_ACCEL_ESP_FLAGS_ESN_TRIGGERED;
- if (attr->flags & IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ESN_NEW_WINDOW)
- accel_attrs.flags |= MLX5_ACCEL_ESP_FLAGS_ESN_STATE_OVERLAP;
-
- if (attr->flags & IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ENCRYPT)
- accel_attrs.action |= MLX5_ACCEL_ESP_ACTION_ENCRYPT;
-
- action->esp_aes_gcm.ctx =
- mlx5_accel_esp_create_xfrm(mdev->mdev, &accel_attrs, flags);
- if (IS_ERR(action->esp_aes_gcm.ctx)) {
- err = PTR_ERR(action->esp_aes_gcm.ctx);
- goto err_parse;
- }
-
- action->esp_aes_gcm.ib_flags = attr->flags;
-
- return &action->ib_action;
-
-err_parse:
- kfree(action);
- return ERR_PTR(err);
-}
-
-static int
-mlx5_ib_modify_flow_action_esp(struct ib_flow_action *action,
- const struct ib_flow_action_attrs_esp *attr,
- struct uverbs_attr_bundle *attrs)
-{
- struct mlx5_ib_flow_action *maction = to_mflow_act(action);
- struct mlx5_accel_esp_xfrm_attrs accel_attrs;
- int err = 0;
-
- if (attr->keymat || attr->replay || attr->encap ||
- attr->spi || attr->seq || attr->tfc_pad ||
- attr->hard_limit_pkts ||
- (attr->flags & ~(IB_FLOW_ACTION_ESP_FLAGS_ESN_TRIGGERED |
- IB_FLOW_ACTION_ESP_FLAGS_MOD_ESP_ATTRS |
- IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ESN_NEW_WINDOW)))
- return -EOPNOTSUPP;
-
- /* Only the ESN value or the MLX5_ACCEL_ESP_FLAGS_ESN_STATE_OVERLAP can
- * be modified.
- */
- if (!(maction->esp_aes_gcm.ib_flags &
- IB_FLOW_ACTION_ESP_FLAGS_ESN_TRIGGERED) &&
- attr->flags & (IB_FLOW_ACTION_ESP_FLAGS_ESN_TRIGGERED |
- IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ESN_NEW_WINDOW))
- return -EINVAL;
-
- memcpy(&accel_attrs, &maction->esp_aes_gcm.ctx->attrs,
- sizeof(accel_attrs));
-
- accel_attrs.esn = attr->esn;
- if (attr->flags & IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ESN_NEW_WINDOW)
- accel_attrs.flags |= MLX5_ACCEL_ESP_FLAGS_ESN_STATE_OVERLAP;
- else
- accel_attrs.flags &= ~MLX5_ACCEL_ESP_FLAGS_ESN_STATE_OVERLAP;
-
- err = mlx5_accel_esp_modify_xfrm(maction->esp_aes_gcm.ctx,
- &accel_attrs);
- if (err)
- return err;
-
- maction->esp_aes_gcm.ib_flags &=
- ~IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ESN_NEW_WINDOW;
- maction->esp_aes_gcm.ib_flags |=
- attr->flags & IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ESN_NEW_WINDOW;
-
- return 0;
-}
-
-static int mlx5_ib_destroy_flow_action(struct ib_flow_action *action)
-{
- struct mlx5_ib_flow_action *maction = to_mflow_act(action);
-
- switch (action->type) {
- case IB_FLOW_ACTION_ESP:
- /*
- * We only support aes_gcm by now, so we implicitly know this is
- * the underline crypto.
- */
- mlx5_accel_esp_destroy_xfrm(maction->esp_aes_gcm.ctx);
- break;
- case IB_FLOW_ACTION_UNSPECIFIED:
- mlx5_ib_destroy_flow_action_raw(maction);
- break;
- default:
- WARN_ON(true);
- break;
- }
-
- kfree(maction);
- return 0;
-}
-
static int mlx5_ib_mcg_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
{
struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
@@ -4848,137 +3020,6 @@ static int get_port_caps(struct mlx5_ib_dev *dev, u8 port)
return __get_port_caps(dev, port);
}
-static void destroy_umrc_res(struct mlx5_ib_dev *dev)
-{
- int err;
-
- err = mlx5_mr_cache_cleanup(dev);
- if (err)
- mlx5_ib_warn(dev, "mr cache cleanup failed\n");
-
- if (dev->umrc.qp)
- mlx5_ib_destroy_qp(dev->umrc.qp, NULL);
- if (dev->umrc.cq)
- ib_free_cq(dev->umrc.cq);
- if (dev->umrc.pd)
- ib_dealloc_pd(dev->umrc.pd);
-}
-
-enum {
- MAX_UMR_WR = 128,
-};
-
-static int create_umr_res(struct mlx5_ib_dev *dev)
-{
- struct ib_qp_init_attr *init_attr = NULL;
- struct ib_qp_attr *attr = NULL;
- struct ib_pd *pd;
- struct ib_cq *cq;
- struct ib_qp *qp;
- int ret;
-
- attr = kzalloc(sizeof(*attr), GFP_KERNEL);
- init_attr = kzalloc(sizeof(*init_attr), GFP_KERNEL);
- if (!attr || !init_attr) {
- ret = -ENOMEM;
- goto error_0;
- }
-
- pd = ib_alloc_pd(&dev->ib_dev, 0);
- if (IS_ERR(pd)) {
- mlx5_ib_dbg(dev, "Couldn't create PD for sync UMR QP\n");
- ret = PTR_ERR(pd);
- goto error_0;
- }
-
- cq = ib_alloc_cq(&dev->ib_dev, NULL, 128, 0, IB_POLL_SOFTIRQ);
- if (IS_ERR(cq)) {
- mlx5_ib_dbg(dev, "Couldn't create CQ for sync UMR QP\n");
- ret = PTR_ERR(cq);
- goto error_2;
- }
-
- init_attr->send_cq = cq;
- init_attr->recv_cq = cq;
- init_attr->sq_sig_type = IB_SIGNAL_ALL_WR;
- init_attr->cap.max_send_wr = MAX_UMR_WR;
- init_attr->cap.max_send_sge = 1;
- init_attr->qp_type = MLX5_IB_QPT_REG_UMR;
- init_attr->port_num = 1;
- qp = mlx5_ib_create_qp(pd, init_attr, NULL);
- if (IS_ERR(qp)) {
- mlx5_ib_dbg(dev, "Couldn't create sync UMR QP\n");
- ret = PTR_ERR(qp);
- goto error_3;
- }
- qp->device = &dev->ib_dev;
- qp->real_qp = qp;
- qp->uobject = NULL;
- qp->qp_type = MLX5_IB_QPT_REG_UMR;
- qp->send_cq = init_attr->send_cq;
- qp->recv_cq = init_attr->recv_cq;
-
- attr->qp_state = IB_QPS_INIT;
- attr->port_num = 1;
- ret = mlx5_ib_modify_qp(qp, attr, IB_QP_STATE | IB_QP_PKEY_INDEX |
- IB_QP_PORT, NULL);
- if (ret) {
- mlx5_ib_dbg(dev, "Couldn't modify UMR QP\n");
- goto error_4;
- }
-
- memset(attr, 0, sizeof(*attr));
- attr->qp_state = IB_QPS_RTR;
- attr->path_mtu = IB_MTU_256;
-
- ret = mlx5_ib_modify_qp(qp, attr, IB_QP_STATE, NULL);
- if (ret) {
- mlx5_ib_dbg(dev, "Couldn't modify umr QP to rtr\n");
- goto error_4;
- }
-
- memset(attr, 0, sizeof(*attr));
- attr->qp_state = IB_QPS_RTS;
- ret = mlx5_ib_modify_qp(qp, attr, IB_QP_STATE, NULL);
- if (ret) {
- mlx5_ib_dbg(dev, "Couldn't modify umr QP to rts\n");
- goto error_4;
- }
-
- dev->umrc.qp = qp;
- dev->umrc.cq = cq;
- dev->umrc.pd = pd;
-
- sema_init(&dev->umrc.sem, MAX_UMR_WR);
- ret = mlx5_mr_cache_init(dev);
- if (ret) {
- mlx5_ib_warn(dev, "mr cache init failed %d\n", ret);
- goto error_4;
- }
-
- kfree(attr);
- kfree(init_attr);
-
- return 0;
-
-error_4:
- mlx5_ib_destroy_qp(qp, NULL);
- dev->umrc.qp = NULL;
-
-error_3:
- ib_free_cq(cq);
- dev->umrc.cq = NULL;
-
-error_2:
- ib_dealloc_pd(pd);
- dev->umrc.pd = NULL;
-
-error_0:
- kfree(attr);
- kfree(init_attr);
- return ret;
-}
-
static u8 mlx5_get_umr_fence(u8 umr_fence_cap)
{
switch (umr_fence_cap) {
@@ -4991,18 +3032,20 @@ static u8 mlx5_get_umr_fence(u8 umr_fence_cap)
}
}
-static int create_dev_resources(struct mlx5_ib_resources *devr)
+static int mlx5_ib_dev_res_init(struct mlx5_ib_dev *dev)
{
+ struct mlx5_ib_resources *devr = &dev->devr;
struct ib_srq_init_attr attr;
- struct mlx5_ib_dev *dev;
struct ib_device *ibdev;
struct ib_cq_init_attr cq_attr = {.cqe = 1};
int port;
int ret = 0;
- dev = container_of(devr, struct mlx5_ib_dev, devr);
ibdev = &dev->ib_dev;
+ if (!MLX5_CAP_GEN(dev->mdev, xrc))
+ return -EOPNOTSUPP;
+
mutex_init(&devr->mutex);
devr->p0 = rdma_zalloc_drv_obj(ibdev, ib_pd);
@@ -5030,34 +3073,19 @@ static int create_dev_resources(struct mlx5_ib_resources *devr)
if (ret)
goto err_create_cq;
- devr->x0 = mlx5_ib_alloc_xrcd(&dev->ib_dev, NULL);
- if (IS_ERR(devr->x0)) {
- ret = PTR_ERR(devr->x0);
+ ret = mlx5_cmd_xrcd_alloc(dev->mdev, &devr->xrcdn0, 0);
+ if (ret)
goto error2;
- }
- devr->x0->device = &dev->ib_dev;
- devr->x0->inode = NULL;
- atomic_set(&devr->x0->usecnt, 0);
- mutex_init(&devr->x0->tgt_qp_mutex);
- INIT_LIST_HEAD(&devr->x0->tgt_qp_list);
-
- devr->x1 = mlx5_ib_alloc_xrcd(&dev->ib_dev, NULL);
- if (IS_ERR(devr->x1)) {
- ret = PTR_ERR(devr->x1);
+
+ ret = mlx5_cmd_xrcd_alloc(dev->mdev, &devr->xrcdn1, 0);
+ if (ret)
goto error3;
- }
- devr->x1->device = &dev->ib_dev;
- devr->x1->inode = NULL;
- atomic_set(&devr->x1->usecnt, 0);
- mutex_init(&devr->x1->tgt_qp_mutex);
- INIT_LIST_HEAD(&devr->x1->tgt_qp_list);
memset(&attr, 0, sizeof(attr));
attr.attr.max_sge = 1;
attr.attr.max_wr = 1;
attr.srq_type = IB_SRQT_XRC;
attr.ext.cq = devr->c0;
- attr.ext.xrc.xrcd = devr->x0;
devr->s0 = rdma_zalloc_drv_obj(ibdev, ib_srq);
if (!devr->s0) {
@@ -5068,13 +3096,11 @@ static int create_dev_resources(struct mlx5_ib_resources *devr)
devr->s0->device = &dev->ib_dev;
devr->s0->pd = devr->p0;
devr->s0->srq_type = IB_SRQT_XRC;
- devr->s0->ext.xrc.xrcd = devr->x0;
devr->s0->ext.cq = devr->c0;
ret = mlx5_ib_create_srq(devr->s0, &attr, NULL);
if (ret)
goto err_create;
- atomic_inc(&devr->s0->ext.xrc.xrcd->usecnt);
atomic_inc(&devr->s0->ext.cq->usecnt);
atomic_inc(&devr->p0->usecnt);
atomic_set(&devr->s0->usecnt, 0);
@@ -5116,9 +3142,9 @@ error5:
err_create:
kfree(devr->s0);
error4:
- mlx5_ib_dealloc_xrcd(devr->x1, NULL);
+ mlx5_cmd_xrcd_dealloc(dev->mdev, devr->xrcdn1, 0);
error3:
- mlx5_ib_dealloc_xrcd(devr->x0, NULL);
+ mlx5_cmd_xrcd_dealloc(dev->mdev, devr->xrcdn0, 0);
error2:
mlx5_ib_destroy_cq(devr->c0, NULL);
err_create_cq:
@@ -5130,16 +3156,17 @@ error0:
return ret;
}
-static void destroy_dev_resources(struct mlx5_ib_resources *devr)
+static void mlx5_ib_dev_res_cleanup(struct mlx5_ib_dev *dev)
{
+ struct mlx5_ib_resources *devr = &dev->devr;
int port;
mlx5_ib_destroy_srq(devr->s1, NULL);
kfree(devr->s1);
mlx5_ib_destroy_srq(devr->s0, NULL);
kfree(devr->s0);
- mlx5_ib_dealloc_xrcd(devr->x0, NULL);
- mlx5_ib_dealloc_xrcd(devr->x1, NULL);
+ mlx5_cmd_xrcd_dealloc(dev->mdev, devr->xrcdn1, 0);
+ mlx5_cmd_xrcd_dealloc(dev->mdev, devr->xrcdn0, 0);
mlx5_ib_destroy_cq(devr->c0, NULL);
kfree(devr->c0);
mlx5_ib_dealloc_pd(devr->p0, NULL);
@@ -5332,466 +3359,6 @@ static void mlx5_disable_eth(struct mlx5_ib_dev *dev)
mlx5_nic_vport_disable_roce(dev->mdev);
}
-struct mlx5_ib_counter {
- const char *name;
- size_t offset;
-};
-
-#define INIT_Q_COUNTER(_name) \
- { .name = #_name, .offset = MLX5_BYTE_OFF(query_q_counter_out, _name)}
-
-static const struct mlx5_ib_counter basic_q_cnts[] = {
- INIT_Q_COUNTER(rx_write_requests),
- INIT_Q_COUNTER(rx_read_requests),
- INIT_Q_COUNTER(rx_atomic_requests),
- INIT_Q_COUNTER(out_of_buffer),
-};
-
-static const struct mlx5_ib_counter out_of_seq_q_cnts[] = {
- INIT_Q_COUNTER(out_of_sequence),
-};
-
-static const struct mlx5_ib_counter retrans_q_cnts[] = {
- INIT_Q_COUNTER(duplicate_request),
- INIT_Q_COUNTER(rnr_nak_retry_err),
- INIT_Q_COUNTER(packet_seq_err),
- INIT_Q_COUNTER(implied_nak_seq_err),
- INIT_Q_COUNTER(local_ack_timeout_err),
-};
-
-#define INIT_CONG_COUNTER(_name) \
- { .name = #_name, .offset = \
- MLX5_BYTE_OFF(query_cong_statistics_out, _name ## _high)}
-
-static const struct mlx5_ib_counter cong_cnts[] = {
- INIT_CONG_COUNTER(rp_cnp_ignored),
- INIT_CONG_COUNTER(rp_cnp_handled),
- INIT_CONG_COUNTER(np_ecn_marked_roce_packets),
- INIT_CONG_COUNTER(np_cnp_sent),
-};
-
-static const struct mlx5_ib_counter extended_err_cnts[] = {
- INIT_Q_COUNTER(resp_local_length_error),
- INIT_Q_COUNTER(resp_cqe_error),
- INIT_Q_COUNTER(req_cqe_error),
- INIT_Q_COUNTER(req_remote_invalid_request),
- INIT_Q_COUNTER(req_remote_access_errors),
- INIT_Q_COUNTER(resp_remote_access_errors),
- INIT_Q_COUNTER(resp_cqe_flush_error),
- INIT_Q_COUNTER(req_cqe_flush_error),
-};
-
-static const struct mlx5_ib_counter roce_accl_cnts[] = {
- INIT_Q_COUNTER(roce_adp_retrans),
- INIT_Q_COUNTER(roce_adp_retrans_to),
- INIT_Q_COUNTER(roce_slow_restart),
- INIT_Q_COUNTER(roce_slow_restart_cnps),
- INIT_Q_COUNTER(roce_slow_restart_trans),
-};
-
-#define INIT_EXT_PPCNT_COUNTER(_name) \
- { .name = #_name, .offset = \
- MLX5_BYTE_OFF(ppcnt_reg, \
- counter_set.eth_extended_cntrs_grp_data_layout._name##_high)}
-
-static const struct mlx5_ib_counter ext_ppcnt_cnts[] = {
- INIT_EXT_PPCNT_COUNTER(rx_icrc_encapsulated),
-};
-
-static bool is_mdev_switchdev_mode(const struct mlx5_core_dev *mdev)
-{
- return MLX5_ESWITCH_MANAGER(mdev) &&
- mlx5_ib_eswitch_mode(mdev->priv.eswitch) ==
- MLX5_ESWITCH_OFFLOADS;
-}
-
-static void mlx5_ib_dealloc_counters(struct mlx5_ib_dev *dev)
-{
- u32 in[MLX5_ST_SZ_DW(dealloc_q_counter_in)] = {};
- int num_cnt_ports;
- int i;
-
- num_cnt_ports = is_mdev_switchdev_mode(dev->mdev) ? 1 : dev->num_ports;
-
- MLX5_SET(dealloc_q_counter_in, in, opcode,
- MLX5_CMD_OP_DEALLOC_Q_COUNTER);
-
- for (i = 0; i < num_cnt_ports; i++) {
- if (dev->port[i].cnts.set_id) {
- MLX5_SET(dealloc_q_counter_in, in, counter_set_id,
- dev->port[i].cnts.set_id);
- mlx5_cmd_exec_in(dev->mdev, dealloc_q_counter, in);
- }
- kfree(dev->port[i].cnts.names);
- kfree(dev->port[i].cnts.offsets);
- }
-}
-
-static int __mlx5_ib_alloc_counters(struct mlx5_ib_dev *dev,
- struct mlx5_ib_counters *cnts)
-{
- u32 num_counters;
-
- num_counters = ARRAY_SIZE(basic_q_cnts);
-
- if (MLX5_CAP_GEN(dev->mdev, out_of_seq_cnt))
- num_counters += ARRAY_SIZE(out_of_seq_q_cnts);
-
- if (MLX5_CAP_GEN(dev->mdev, retransmission_q_counters))
- num_counters += ARRAY_SIZE(retrans_q_cnts);
-
- if (MLX5_CAP_GEN(dev->mdev, enhanced_error_q_counters))
- num_counters += ARRAY_SIZE(extended_err_cnts);
-
- if (MLX5_CAP_GEN(dev->mdev, roce_accl))
- num_counters += ARRAY_SIZE(roce_accl_cnts);
-
- cnts->num_q_counters = num_counters;
-
- if (MLX5_CAP_GEN(dev->mdev, cc_query_allowed)) {
- cnts->num_cong_counters = ARRAY_SIZE(cong_cnts);
- num_counters += ARRAY_SIZE(cong_cnts);
- }
- if (MLX5_CAP_PCAM_FEATURE(dev->mdev, rx_icrc_encapsulated_counter)) {
- cnts->num_ext_ppcnt_counters = ARRAY_SIZE(ext_ppcnt_cnts);
- num_counters += ARRAY_SIZE(ext_ppcnt_cnts);
- }
- cnts->names = kcalloc(num_counters, sizeof(cnts->names), GFP_KERNEL);
- if (!cnts->names)
- return -ENOMEM;
-
- cnts->offsets = kcalloc(num_counters,
- sizeof(cnts->offsets), GFP_KERNEL);
- if (!cnts->offsets)
- goto err_names;
-
- return 0;
-
-err_names:
- kfree(cnts->names);
- cnts->names = NULL;
- return -ENOMEM;
-}
-
-static void mlx5_ib_fill_counters(struct mlx5_ib_dev *dev,
- const char **names,
- size_t *offsets)
-{
- int i;
- int j = 0;
-
- for (i = 0; i < ARRAY_SIZE(basic_q_cnts); i++, j++) {
- names[j] = basic_q_cnts[i].name;
- offsets[j] = basic_q_cnts[i].offset;
- }
-
- if (MLX5_CAP_GEN(dev->mdev, out_of_seq_cnt)) {
- for (i = 0; i < ARRAY_SIZE(out_of_seq_q_cnts); i++, j++) {
- names[j] = out_of_seq_q_cnts[i].name;
- offsets[j] = out_of_seq_q_cnts[i].offset;
- }
- }
-
- if (MLX5_CAP_GEN(dev->mdev, retransmission_q_counters)) {
- for (i = 0; i < ARRAY_SIZE(retrans_q_cnts); i++, j++) {
- names[j] = retrans_q_cnts[i].name;
- offsets[j] = retrans_q_cnts[i].offset;
- }
- }
-
- if (MLX5_CAP_GEN(dev->mdev, enhanced_error_q_counters)) {
- for (i = 0; i < ARRAY_SIZE(extended_err_cnts); i++, j++) {
- names[j] = extended_err_cnts[i].name;
- offsets[j] = extended_err_cnts[i].offset;
- }
- }
-
- if (MLX5_CAP_GEN(dev->mdev, roce_accl)) {
- for (i = 0; i < ARRAY_SIZE(roce_accl_cnts); i++, j++) {
- names[j] = roce_accl_cnts[i].name;
- offsets[j] = roce_accl_cnts[i].offset;
- }
- }
-
- if (MLX5_CAP_GEN(dev->mdev, cc_query_allowed)) {
- for (i = 0; i < ARRAY_SIZE(cong_cnts); i++, j++) {
- names[j] = cong_cnts[i].name;
- offsets[j] = cong_cnts[i].offset;
- }
- }
-
- if (MLX5_CAP_PCAM_FEATURE(dev->mdev, rx_icrc_encapsulated_counter)) {
- for (i = 0; i < ARRAY_SIZE(ext_ppcnt_cnts); i++, j++) {
- names[j] = ext_ppcnt_cnts[i].name;
- offsets[j] = ext_ppcnt_cnts[i].offset;
- }
- }
-}
-
-static int mlx5_ib_alloc_counters(struct mlx5_ib_dev *dev)
-{
- u32 out[MLX5_ST_SZ_DW(alloc_q_counter_out)] = {};
- u32 in[MLX5_ST_SZ_DW(alloc_q_counter_in)] = {};
- int num_cnt_ports;
- int err = 0;
- int i;
- bool is_shared;
-
- MLX5_SET(alloc_q_counter_in, in, opcode, MLX5_CMD_OP_ALLOC_Q_COUNTER);
- is_shared = MLX5_CAP_GEN(dev->mdev, log_max_uctx) != 0;
- num_cnt_ports = is_mdev_switchdev_mode(dev->mdev) ? 1 : dev->num_ports;
-
- for (i = 0; i < num_cnt_ports; i++) {
- err = __mlx5_ib_alloc_counters(dev, &dev->port[i].cnts);
- if (err)
- goto err_alloc;
-
- mlx5_ib_fill_counters(dev, dev->port[i].cnts.names,
- dev->port[i].cnts.offsets);
-
- MLX5_SET(alloc_q_counter_in, in, uid,
- is_shared ? MLX5_SHARED_RESOURCE_UID : 0);
-
- err = mlx5_cmd_exec_inout(dev->mdev, alloc_q_counter, in, out);
- if (err) {
- mlx5_ib_warn(dev,
- "couldn't allocate queue counter for port %d, err %d\n",
- i + 1, err);
- goto err_alloc;
- }
-
- dev->port[i].cnts.set_id =
- MLX5_GET(alloc_q_counter_out, out, counter_set_id);
- }
- return 0;
-
-err_alloc:
- mlx5_ib_dealloc_counters(dev);
- return err;
-}
-
-static const struct mlx5_ib_counters *get_counters(struct mlx5_ib_dev *dev,
- u8 port_num)
-{
- return is_mdev_switchdev_mode(dev->mdev) ? &dev->port[0].cnts :
- &dev->port[port_num].cnts;
-}
-
-/**
- * mlx5_ib_get_counters_id - Returns counters id to use for device+port
- * @dev: Pointer to mlx5 IB device
- * @port_num: Zero based port number
- *
- * mlx5_ib_get_counters_id() Returns counters set id to use for given
- * device port combination in switchdev and non switchdev mode of the
- * parent device.
- */
-u16 mlx5_ib_get_counters_id(struct mlx5_ib_dev *dev, u8 port_num)
-{
- const struct mlx5_ib_counters *cnts = get_counters(dev, port_num);
-
- return cnts->set_id;
-}
-
-static struct rdma_hw_stats *mlx5_ib_alloc_hw_stats(struct ib_device *ibdev,
- u8 port_num)
-{
- struct mlx5_ib_dev *dev = to_mdev(ibdev);
- const struct mlx5_ib_counters *cnts;
- bool is_switchdev = is_mdev_switchdev_mode(dev->mdev);
-
- if ((is_switchdev && port_num) || (!is_switchdev && !port_num))
- return NULL;
-
- cnts = get_counters(dev, port_num - 1);
-
- return rdma_alloc_hw_stats_struct(cnts->names,
- cnts->num_q_counters +
- cnts->num_cong_counters +
- cnts->num_ext_ppcnt_counters,
- RDMA_HW_STATS_DEFAULT_LIFESPAN);
-}
-
-static int mlx5_ib_query_q_counters(struct mlx5_core_dev *mdev,
- const struct mlx5_ib_counters *cnts,
- struct rdma_hw_stats *stats,
- u16 set_id)
-{
- u32 out[MLX5_ST_SZ_DW(query_q_counter_out)] = {};
- u32 in[MLX5_ST_SZ_DW(query_q_counter_in)] = {};
- __be32 val;
- int ret, i;
-
- MLX5_SET(query_q_counter_in, in, opcode, MLX5_CMD_OP_QUERY_Q_COUNTER);
- MLX5_SET(query_q_counter_in, in, counter_set_id, set_id);
- ret = mlx5_cmd_exec_inout(mdev, query_q_counter, in, out);
- if (ret)
- return ret;
-
- for (i = 0; i < cnts->num_q_counters; i++) {
- val = *(__be32 *)((void *)out + cnts->offsets[i]);
- stats->value[i] = (u64)be32_to_cpu(val);
- }
-
- return 0;
-}
-
-static int mlx5_ib_query_ext_ppcnt_counters(struct mlx5_ib_dev *dev,
- const struct mlx5_ib_counters *cnts,
- struct rdma_hw_stats *stats)
-{
- int offset = cnts->num_q_counters + cnts->num_cong_counters;
- int sz = MLX5_ST_SZ_BYTES(ppcnt_reg);
- int ret, i;
- void *out;
-
- out = kvzalloc(sz, GFP_KERNEL);
- if (!out)
- return -ENOMEM;
-
- ret = mlx5_cmd_query_ext_ppcnt_counters(dev->mdev, out);
- if (ret)
- goto free;
-
- for (i = 0; i < cnts->num_ext_ppcnt_counters; i++)
- stats->value[i + offset] =
- be64_to_cpup((__be64 *)(out +
- cnts->offsets[i + offset]));
-free:
- kvfree(out);
- return ret;
-}
-
-static int mlx5_ib_get_hw_stats(struct ib_device *ibdev,
- struct rdma_hw_stats *stats,
- u8 port_num, int index)
-{
- struct mlx5_ib_dev *dev = to_mdev(ibdev);
- const struct mlx5_ib_counters *cnts = get_counters(dev, port_num - 1);
- struct mlx5_core_dev *mdev;
- int ret, num_counters;
- u8 mdev_port_num;
-
- if (!stats)
- return -EINVAL;
-
- num_counters = cnts->num_q_counters +
- cnts->num_cong_counters +
- cnts->num_ext_ppcnt_counters;
-
- /* q_counters are per IB device, query the master mdev */
- ret = mlx5_ib_query_q_counters(dev->mdev, cnts, stats, cnts->set_id);
- if (ret)
- return ret;
-
- if (MLX5_CAP_PCAM_FEATURE(dev->mdev, rx_icrc_encapsulated_counter)) {
- ret = mlx5_ib_query_ext_ppcnt_counters(dev, cnts, stats);
- if (ret)
- return ret;
- }
-
- if (MLX5_CAP_GEN(dev->mdev, cc_query_allowed)) {
- mdev = mlx5_ib_get_native_port_mdev(dev, port_num,
- &mdev_port_num);
- if (!mdev) {
- /* If port is not affiliated yet, its in down state
- * which doesn't have any counters yet, so it would be
- * zero. So no need to read from the HCA.
- */
- goto done;
- }
- ret = mlx5_lag_query_cong_counters(dev->mdev,
- stats->value +
- cnts->num_q_counters,
- cnts->num_cong_counters,
- cnts->offsets +
- cnts->num_q_counters);
-
- mlx5_ib_put_native_port_mdev(dev, port_num);
- if (ret)
- return ret;
- }
-
-done:
- return num_counters;
-}
-
-static struct rdma_hw_stats *
-mlx5_ib_counter_alloc_stats(struct rdma_counter *counter)
-{
- struct mlx5_ib_dev *dev = to_mdev(counter->device);
- const struct mlx5_ib_counters *cnts =
- get_counters(dev, counter->port - 1);
-
- return rdma_alloc_hw_stats_struct(cnts->names,
- cnts->num_q_counters +
- cnts->num_cong_counters +
- cnts->num_ext_ppcnt_counters,
- RDMA_HW_STATS_DEFAULT_LIFESPAN);
-}
-
-static int mlx5_ib_counter_update_stats(struct rdma_counter *counter)
-{
- struct mlx5_ib_dev *dev = to_mdev(counter->device);
- const struct mlx5_ib_counters *cnts =
- get_counters(dev, counter->port - 1);
-
- return mlx5_ib_query_q_counters(dev->mdev, cnts,
- counter->stats, counter->id);
-}
-
-static int mlx5_ib_counter_dealloc(struct rdma_counter *counter)
-{
- struct mlx5_ib_dev *dev = to_mdev(counter->device);
- u32 in[MLX5_ST_SZ_DW(dealloc_q_counter_in)] = {};
-
- if (!counter->id)
- return 0;
-
- MLX5_SET(dealloc_q_counter_in, in, opcode,
- MLX5_CMD_OP_DEALLOC_Q_COUNTER);
- MLX5_SET(dealloc_q_counter_in, in, counter_set_id, counter->id);
- return mlx5_cmd_exec_in(dev->mdev, dealloc_q_counter, in);
-}
-
-static int mlx5_ib_counter_bind_qp(struct rdma_counter *counter,
- struct ib_qp *qp)
-{
- struct mlx5_ib_dev *dev = to_mdev(qp->device);
- int err;
-
- if (!counter->id) {
- u32 out[MLX5_ST_SZ_DW(alloc_q_counter_out)] = {};
- u32 in[MLX5_ST_SZ_DW(alloc_q_counter_in)] = {};
-
- MLX5_SET(alloc_q_counter_in, in, opcode,
- MLX5_CMD_OP_ALLOC_Q_COUNTER);
- MLX5_SET(alloc_q_counter_in, in, uid, MLX5_SHARED_RESOURCE_UID);
- err = mlx5_cmd_exec_inout(dev->mdev, alloc_q_counter, in, out);
- if (err)
- return err;
- counter->id =
- MLX5_GET(alloc_q_counter_out, out, counter_set_id);
- }
-
- err = mlx5_ib_qp_set_counter(qp, counter);
- if (err)
- goto fail_set_counter;
-
- return 0;
-
-fail_set_counter:
- mlx5_ib_counter_dealloc(counter);
- counter->id = 0;
-
- return err;
-}
-
-static int mlx5_ib_counter_unbind_qp(struct ib_qp *qp)
-{
- return mlx5_ib_qp_set_counter(qp, NULL);
-}
-
static int mlx5_ib_rn_get_params(struct ib_device *device, u8 port_num,
enum rdma_netdev_t type,
struct rdma_netdev_alloc_params *params)
@@ -5802,23 +3369,6 @@ static int mlx5_ib_rn_get_params(struct ib_device *device, u8 port_num,
return mlx5_rdma_rn_get_params(to_mdev(device)->mdev, device, params);
}
-static void delay_drop_debugfs_cleanup(struct mlx5_ib_dev *dev)
-{
- if (!dev->delay_drop.dir_debugfs)
- return;
- debugfs_remove_recursive(dev->delay_drop.dir_debugfs);
- dev->delay_drop.dir_debugfs = NULL;
-}
-
-static void cancel_delay_drop(struct mlx5_ib_dev *dev)
-{
- if (!(dev->ib_dev.attrs.raw_packet_caps & IB_RAW_PACKET_CAP_DELAY_DROP))
- return;
-
- cancel_work_sync(&dev->delay_drop.delay_drop_work);
- delay_drop_debugfs_cleanup(dev);
-}
-
static ssize_t delay_drop_timeout_read(struct file *filp, char __user *buf,
size_t count, loff_t *pos)
{
@@ -5858,40 +3408,6 @@ static const struct file_operations fops_delay_drop_timeout = {
.read = delay_drop_timeout_read,
};
-static void delay_drop_debugfs_init(struct mlx5_ib_dev *dev)
-{
- struct dentry *root;
-
- if (!mlx5_debugfs_root)
- return;
-
- root = debugfs_create_dir("delay_drop", dev->mdev->priv.dbg_root);
- dev->delay_drop.dir_debugfs = root;
-
- debugfs_create_atomic_t("num_timeout_events", 0400, root,
- &dev->delay_drop.events_cnt);
- debugfs_create_atomic_t("num_rqs", 0400, root,
- &dev->delay_drop.rqs_cnt);
- debugfs_create_file("timeout", 0600, root, &dev->delay_drop,
- &fops_delay_drop_timeout);
-}
-
-static void init_delay_drop(struct mlx5_ib_dev *dev)
-{
- if (!(dev->ib_dev.attrs.raw_packet_caps & IB_RAW_PACKET_CAP_DELAY_DROP))
- return;
-
- mutex_init(&dev->delay_drop.lock);
- dev->delay_drop.dev = dev;
- dev->delay_drop.activate = false;
- dev->delay_drop.timeout = MLX5_MAX_DELAY_DROP_TIMEOUT_MS * 1000;
- INIT_WORK(&dev->delay_drop.delay_drop_work, delay_drop_handler);
- atomic_set(&dev->delay_drop.rqs_cnt, 0);
- atomic_set(&dev->delay_drop.events_cnt, 0);
-
- delay_drop_debugfs_init(dev);
-}
-
static void mlx5_ib_unbind_slave_port(struct mlx5_ib_dev *ibdev,
struct mlx5_ib_multiport_info *mpi)
{
@@ -6385,90 +3901,32 @@ ADD_UVERBS_ATTRIBUTES_SIMPLE(
UVERBS_ATTR_FLAGS_IN(MLX5_IB_ATTR_CREATE_FLOW_ACTION_FLAGS,
enum mlx5_ib_uapi_flow_action_flags));
+ADD_UVERBS_ATTRIBUTES_SIMPLE(
+ mlx5_ib_query_context,
+ UVERBS_OBJECT_DEVICE,
+ UVERBS_METHOD_QUERY_CONTEXT,
+ UVERBS_ATTR_PTR_OUT(
+ MLX5_IB_ATTR_QUERY_CONTEXT_RESP_UCTX,
+ UVERBS_ATTR_STRUCT(struct mlx5_ib_alloc_ucontext_resp,
+ dump_fill_mkey),
+ UA_MANDATORY));
+
static const struct uapi_definition mlx5_ib_defs[] = {
UAPI_DEF_CHAIN(mlx5_ib_devx_defs),
UAPI_DEF_CHAIN(mlx5_ib_flow_defs),
UAPI_DEF_CHAIN(mlx5_ib_qos_defs),
+ UAPI_DEF_CHAIN(mlx5_ib_std_types_defs),
UAPI_DEF_CHAIN_OBJ_TREE(UVERBS_OBJECT_FLOW_ACTION,
&mlx5_ib_flow_action),
UAPI_DEF_CHAIN_OBJ_TREE(UVERBS_OBJECT_DM, &mlx5_ib_dm),
+ UAPI_DEF_CHAIN_OBJ_TREE(UVERBS_OBJECT_DEVICE, &mlx5_ib_query_context),
UAPI_DEF_CHAIN_OBJ_TREE_NAMED(MLX5_IB_OBJECT_VAR,
UAPI_DEF_IS_OBJ_SUPPORTED(var_is_supported)),
UAPI_DEF_CHAIN_OBJ_TREE_NAMED(MLX5_IB_OBJECT_UAR),
{}
};
-static int mlx5_ib_read_counters(struct ib_counters *counters,
- struct ib_counters_read_attr *read_attr,
- struct uverbs_attr_bundle *attrs)
-{
- struct mlx5_ib_mcounters *mcounters = to_mcounters(counters);
- struct mlx5_read_counters_attr mread_attr = {};
- struct mlx5_ib_flow_counters_desc *desc;
- int ret, i;
-
- mutex_lock(&mcounters->mcntrs_mutex);
- if (mcounters->cntrs_max_index > read_attr->ncounters) {
- ret = -EINVAL;
- goto err_bound;
- }
-
- mread_attr.out = kcalloc(mcounters->counters_num, sizeof(u64),
- GFP_KERNEL);
- if (!mread_attr.out) {
- ret = -ENOMEM;
- goto err_bound;
- }
-
- mread_attr.hw_cntrs_hndl = mcounters->hw_cntrs_hndl;
- mread_attr.flags = read_attr->flags;
- ret = mcounters->read_counters(counters->device, &mread_attr);
- if (ret)
- goto err_read;
-
- /* do the pass over the counters data array to assign according to the
- * descriptions and indexing pairs
- */
- desc = mcounters->counters_data;
- for (i = 0; i < mcounters->ncounters; i++)
- read_attr->counters_buff[desc[i].index] += mread_attr.out[desc[i].description];
-
-err_read:
- kfree(mread_attr.out);
-err_bound:
- mutex_unlock(&mcounters->mcntrs_mutex);
- return ret;
-}
-
-static int mlx5_ib_destroy_counters(struct ib_counters *counters)
-{
- struct mlx5_ib_mcounters *mcounters = to_mcounters(counters);
-
- counters_clear_description(counters);
- if (mcounters->hw_cntrs_hndl)
- mlx5_fc_destroy(to_mdev(counters->device)->mdev,
- mcounters->hw_cntrs_hndl);
-
- kfree(mcounters);
-
- return 0;
-}
-
-static struct ib_counters *mlx5_ib_create_counters(struct ib_device *device,
- struct uverbs_attr_bundle *attrs)
-{
- struct mlx5_ib_mcounters *mcounters;
-
- mcounters = kzalloc(sizeof(*mcounters), GFP_KERNEL);
- if (!mcounters)
- return ERR_PTR(-ENOMEM);
-
- mutex_init(&mcounters->mcntrs_mutex);
-
- return &mcounters->ibcntrs;
-}
-
static void mlx5_ib_stage_init_cleanup(struct mlx5_ib_dev *dev)
{
mlx5_ib_cleanup_multiport_master(dev);
@@ -6547,21 +4005,16 @@ err_mp:
return -ENOMEM;
}
-static int mlx5_ib_stage_flow_db_init(struct mlx5_ib_dev *dev)
+static int mlx5_ib_enable_driver(struct ib_device *dev)
{
- dev->flow_db = kzalloc(sizeof(*dev->flow_db), GFP_KERNEL);
-
- if (!dev->flow_db)
- return -ENOMEM;
-
- mutex_init(&dev->flow_db->lock);
+ struct mlx5_ib_dev *mdev = to_mdev(dev);
+ int ret;
- return 0;
-}
+ ret = mlx5_ib_test_wc(mdev);
+ mlx5_ib_dbg(mdev, "Write-Combining %s",
+ mdev->wc_support ? "supported" : "not supported");
-static void mlx5_ib_stage_flow_db_cleanup(struct mlx5_ib_dev *dev)
-{
- kfree(dev->flow_db);
+ return ret;
}
static const struct ib_device_ops mlx5_ib_dev_ops = {
@@ -6577,9 +4030,7 @@ static const struct ib_device_ops mlx5_ib_dev_ops = {
.attach_mcast = mlx5_ib_mcg_attach,
.check_mr_status = mlx5_ib_check_mr_status,
.create_ah = mlx5_ib_create_ah,
- .create_counters = mlx5_ib_create_counters,
.create_cq = mlx5_ib_create_cq,
- .create_flow = mlx5_ib_create_flow,
.create_qp = mlx5_ib_create_qp,
.create_srq = mlx5_ib_create_srq,
.dealloc_pd = mlx5_ib_dealloc_pd,
@@ -6587,10 +4038,7 @@ static const struct ib_device_ops mlx5_ib_dev_ops = {
.del_gid = mlx5_ib_del_gid,
.dereg_mr = mlx5_ib_dereg_mr,
.destroy_ah = mlx5_ib_destroy_ah,
- .destroy_counters = mlx5_ib_destroy_counters,
.destroy_cq = mlx5_ib_destroy_cq,
- .destroy_flow = mlx5_ib_destroy_flow,
- .destroy_flow_action = mlx5_ib_destroy_flow_action,
.destroy_qp = mlx5_ib_destroy_qp,
.destroy_srq = mlx5_ib_destroy_srq,
.detach_mcast = mlx5_ib_mcg_detach,
@@ -6598,8 +4046,6 @@ static const struct ib_device_ops mlx5_ib_dev_ops = {
.drain_rq = mlx5_ib_drain_rq,
.drain_sq = mlx5_ib_drain_sq,
.enable_driver = mlx5_ib_enable_driver,
- .fill_res_entry = mlx5_ib_fill_res_entry,
- .fill_stat_entry = mlx5_ib_fill_stat_entry,
.get_dev_fw_str = get_dev_fw_str,
.get_dma_mr = mlx5_ib_get_dma_mr,
.get_link_layer = mlx5_ib_port_link_layer,
@@ -6623,24 +4069,20 @@ static const struct ib_device_ops mlx5_ib_dev_ops = {
.query_pkey = mlx5_ib_query_pkey,
.query_qp = mlx5_ib_query_qp,
.query_srq = mlx5_ib_query_srq,
- .read_counters = mlx5_ib_read_counters,
+ .query_ucontext = mlx5_ib_query_ucontext,
.reg_user_mr = mlx5_ib_reg_user_mr,
.req_notify_cq = mlx5_ib_arm_cq,
.rereg_user_mr = mlx5_ib_rereg_user_mr,
.resize_cq = mlx5_ib_resize_cq,
INIT_RDMA_OBJ_SIZE(ib_ah, mlx5_ib_ah, ibah),
+ INIT_RDMA_OBJ_SIZE(ib_counters, mlx5_ib_mcounters, ibcntrs),
INIT_RDMA_OBJ_SIZE(ib_cq, mlx5_ib_cq, ibcq),
INIT_RDMA_OBJ_SIZE(ib_pd, mlx5_ib_pd, ibpd),
INIT_RDMA_OBJ_SIZE(ib_srq, mlx5_ib_srq, ibsrq),
INIT_RDMA_OBJ_SIZE(ib_ucontext, mlx5_ib_ucontext, ibucontext),
};
-static const struct ib_device_ops mlx5_ib_dev_flow_ipsec_ops = {
- .create_flow_action_esp = mlx5_ib_create_flow_action_esp,
- .modify_flow_action_esp = mlx5_ib_modify_flow_action_esp,
-};
-
static const struct ib_device_ops mlx5_ib_dev_ipoib_enhanced_ops = {
.rdma_netdev_get_params = mlx5_ib_rn_get_params,
};
@@ -6661,6 +4103,8 @@ static const struct ib_device_ops mlx5_ib_dev_mw_ops = {
static const struct ib_device_ops mlx5_ib_dev_xrc_ops = {
.alloc_xrcd = mlx5_ib_alloc_xrcd,
.dealloc_xrcd = mlx5_ib_dealloc_xrcd,
+
+ INIT_RDMA_OBJ_SIZE(ib_xrcd, mlx5_ib_xrcd, ibxrcd),
};
static const struct ib_device_ops mlx5_ib_dev_dm_ops = {
@@ -6769,9 +4213,6 @@ static int mlx5_ib_stage_caps_init(struct mlx5_ib_dev *dev)
MLX5_GENERAL_OBJ_TYPES_CAP_SW_ICM)
ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_dm_ops);
- if (mlx5_accel_ipsec_device_caps(dev->mdev) &
- MLX5_ACCEL_IPSEC_CAP_DEVICE)
- ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_flow_ipsec_ops);
ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_ops);
if (IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS))
@@ -6829,65 +4270,36 @@ static const struct ib_device_ops mlx5_ib_dev_common_roce_ops = {
.modify_wq = mlx5_ib_modify_wq,
};
-static int mlx5_ib_stage_common_roce_init(struct mlx5_ib_dev *dev)
-{
- u8 port_num;
-
- dev->ib_dev.uverbs_ex_cmd_mask |=
- (1ull << IB_USER_VERBS_EX_CMD_CREATE_WQ) |
- (1ull << IB_USER_VERBS_EX_CMD_MODIFY_WQ) |
- (1ull << IB_USER_VERBS_EX_CMD_DESTROY_WQ) |
- (1ull << IB_USER_VERBS_EX_CMD_CREATE_RWQ_IND_TBL) |
- (1ull << IB_USER_VERBS_EX_CMD_DESTROY_RWQ_IND_TBL);
- ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_common_roce_ops);
-
- port_num = mlx5_core_native_port_num(dev->mdev) - 1;
-
- /* Register only for native ports */
- return mlx5_add_netdev_notifier(dev, port_num);
-}
-
-static void mlx5_ib_stage_common_roce_cleanup(struct mlx5_ib_dev *dev)
-{
- u8 port_num = mlx5_core_native_port_num(dev->mdev) - 1;
-
- mlx5_remove_netdev_notifier(dev, port_num);
-}
-
-static int mlx5_ib_stage_raw_eth_roce_init(struct mlx5_ib_dev *dev)
-{
- struct mlx5_core_dev *mdev = dev->mdev;
- enum rdma_link_layer ll;
- int port_type_cap;
- int err = 0;
-
- port_type_cap = MLX5_CAP_GEN(mdev, port_type);
- ll = mlx5_port_type_cap_to_rdma_ll(port_type_cap);
-
- if (ll == IB_LINK_LAYER_ETHERNET)
- err = mlx5_ib_stage_common_roce_init(dev);
-
- return err;
-}
-
-static void mlx5_ib_stage_raw_eth_roce_cleanup(struct mlx5_ib_dev *dev)
-{
- mlx5_ib_stage_common_roce_cleanup(dev);
-}
-
-static int mlx5_ib_stage_roce_init(struct mlx5_ib_dev *dev)
+static int mlx5_ib_roce_init(struct mlx5_ib_dev *dev)
{
struct mlx5_core_dev *mdev = dev->mdev;
enum rdma_link_layer ll;
int port_type_cap;
+ u8 port_num = 0;
int err;
port_type_cap = MLX5_CAP_GEN(mdev, port_type);
ll = mlx5_port_type_cap_to_rdma_ll(port_type_cap);
if (ll == IB_LINK_LAYER_ETHERNET) {
- err = mlx5_ib_stage_common_roce_init(dev);
- if (err)
+ dev->ib_dev.uverbs_ex_cmd_mask |=
+ (1ull << IB_USER_VERBS_EX_CMD_CREATE_WQ) |
+ (1ull << IB_USER_VERBS_EX_CMD_MODIFY_WQ) |
+ (1ull << IB_USER_VERBS_EX_CMD_DESTROY_WQ) |
+ (1ull << IB_USER_VERBS_EX_CMD_CREATE_RWQ_IND_TBL) |
+ (1ull << IB_USER_VERBS_EX_CMD_DESTROY_RWQ_IND_TBL);
+ ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_common_roce_ops);
+
+ port_num = mlx5_core_native_port_num(dev->mdev) - 1;
+
+ /* Register only for native ports */
+ err = mlx5_add_netdev_notifier(dev, port_num);
+ if (err || dev->is_rep || !mlx5_is_roce_enabled(mdev))
+ /*
+ * We don't enable ETH interface for
+ * 1. IB representors
+ * 2. User disabled ROCE through devlink interface
+ */
return err;
err = mlx5_enable_eth(dev);
@@ -6897,71 +4309,27 @@ static int mlx5_ib_stage_roce_init(struct mlx5_ib_dev *dev)
return 0;
cleanup:
- mlx5_ib_stage_common_roce_cleanup(dev);
-
+ mlx5_remove_netdev_notifier(dev, port_num);
return err;
}
-static void mlx5_ib_stage_roce_cleanup(struct mlx5_ib_dev *dev)
+static void mlx5_ib_roce_cleanup(struct mlx5_ib_dev *dev)
{
struct mlx5_core_dev *mdev = dev->mdev;
enum rdma_link_layer ll;
int port_type_cap;
+ u8 port_num;
port_type_cap = MLX5_CAP_GEN(mdev, port_type);
ll = mlx5_port_type_cap_to_rdma_ll(port_type_cap);
if (ll == IB_LINK_LAYER_ETHERNET) {
- mlx5_disable_eth(dev);
- mlx5_ib_stage_common_roce_cleanup(dev);
- }
-}
-
-static int mlx5_ib_stage_dev_res_init(struct mlx5_ib_dev *dev)
-{
- return create_dev_resources(&dev->devr);
-}
+ if (!dev->is_rep)
+ mlx5_disable_eth(dev);
-static void mlx5_ib_stage_dev_res_cleanup(struct mlx5_ib_dev *dev)
-{
- destroy_dev_resources(&dev->devr);
-}
-
-static int mlx5_ib_stage_odp_init(struct mlx5_ib_dev *dev)
-{
- return mlx5_ib_odp_init_one(dev);
-}
-
-static void mlx5_ib_stage_odp_cleanup(struct mlx5_ib_dev *dev)
-{
- mlx5_ib_odp_cleanup_one(dev);
-}
-
-static const struct ib_device_ops mlx5_ib_dev_hw_stats_ops = {
- .alloc_hw_stats = mlx5_ib_alloc_hw_stats,
- .get_hw_stats = mlx5_ib_get_hw_stats,
- .counter_bind_qp = mlx5_ib_counter_bind_qp,
- .counter_unbind_qp = mlx5_ib_counter_unbind_qp,
- .counter_dealloc = mlx5_ib_counter_dealloc,
- .counter_alloc_stats = mlx5_ib_counter_alloc_stats,
- .counter_update_stats = mlx5_ib_counter_update_stats,
-};
-
-static int mlx5_ib_stage_counters_init(struct mlx5_ib_dev *dev)
-{
- if (MLX5_CAP_GEN(dev->mdev, max_qp_cnt)) {
- ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_hw_stats_ops);
-
- return mlx5_ib_alloc_counters(dev);
+ port_num = mlx5_core_native_port_num(dev->mdev) - 1;
+ mlx5_remove_netdev_notifier(dev, port_num);
}
-
- return 0;
-}
-
-static void mlx5_ib_stage_counters_cleanup(struct mlx5_ib_dev *dev)
-{
- if (MLX5_CAP_GEN(dev->mdev, max_qp_cnt))
- mlx5_ib_dealloc_counters(dev);
}
static int mlx5_ib_stage_cong_debugfs_init(struct mlx5_ib_dev *dev)
@@ -7023,7 +4391,18 @@ static int mlx5_ib_stage_ib_reg_init(struct mlx5_ib_dev *dev)
static void mlx5_ib_stage_pre_ib_reg_umr_cleanup(struct mlx5_ib_dev *dev)
{
- destroy_umrc_res(dev);
+ int err;
+
+ err = mlx5_mr_cache_cleanup(dev);
+ if (err)
+ mlx5_ib_warn(dev, "mr cache cleanup failed\n");
+
+ if (dev->umrc.qp)
+ mlx5_ib_destroy_qp(dev->umrc.qp, NULL);
+ if (dev->umrc.cq)
+ ib_free_cq(dev->umrc.cq);
+ if (dev->umrc.pd)
+ ib_dealloc_pd(dev->umrc.pd);
}
static void mlx5_ib_stage_ib_reg_cleanup(struct mlx5_ib_dev *dev)
@@ -7031,21 +4410,162 @@ static void mlx5_ib_stage_ib_reg_cleanup(struct mlx5_ib_dev *dev)
ib_unregister_device(&dev->ib_dev);
}
+enum {
+ MAX_UMR_WR = 128,
+};
+
static int mlx5_ib_stage_post_ib_reg_umr_init(struct mlx5_ib_dev *dev)
{
- return create_umr_res(dev);
+ struct ib_qp_init_attr *init_attr = NULL;
+ struct ib_qp_attr *attr = NULL;
+ struct ib_pd *pd;
+ struct ib_cq *cq;
+ struct ib_qp *qp;
+ int ret;
+
+ attr = kzalloc(sizeof(*attr), GFP_KERNEL);
+ init_attr = kzalloc(sizeof(*init_attr), GFP_KERNEL);
+ if (!attr || !init_attr) {
+ ret = -ENOMEM;
+ goto error_0;
+ }
+
+ pd = ib_alloc_pd(&dev->ib_dev, 0);
+ if (IS_ERR(pd)) {
+ mlx5_ib_dbg(dev, "Couldn't create PD for sync UMR QP\n");
+ ret = PTR_ERR(pd);
+ goto error_0;
+ }
+
+ cq = ib_alloc_cq(&dev->ib_dev, NULL, 128, 0, IB_POLL_SOFTIRQ);
+ if (IS_ERR(cq)) {
+ mlx5_ib_dbg(dev, "Couldn't create CQ for sync UMR QP\n");
+ ret = PTR_ERR(cq);
+ goto error_2;
+ }
+
+ init_attr->send_cq = cq;
+ init_attr->recv_cq = cq;
+ init_attr->sq_sig_type = IB_SIGNAL_ALL_WR;
+ init_attr->cap.max_send_wr = MAX_UMR_WR;
+ init_attr->cap.max_send_sge = 1;
+ init_attr->qp_type = MLX5_IB_QPT_REG_UMR;
+ init_attr->port_num = 1;
+ qp = mlx5_ib_create_qp(pd, init_attr, NULL);
+ if (IS_ERR(qp)) {
+ mlx5_ib_dbg(dev, "Couldn't create sync UMR QP\n");
+ ret = PTR_ERR(qp);
+ goto error_3;
+ }
+ qp->device = &dev->ib_dev;
+ qp->real_qp = qp;
+ qp->uobject = NULL;
+ qp->qp_type = MLX5_IB_QPT_REG_UMR;
+ qp->send_cq = init_attr->send_cq;
+ qp->recv_cq = init_attr->recv_cq;
+
+ attr->qp_state = IB_QPS_INIT;
+ attr->port_num = 1;
+ ret = mlx5_ib_modify_qp(qp, attr, IB_QP_STATE | IB_QP_PKEY_INDEX |
+ IB_QP_PORT, NULL);
+ if (ret) {
+ mlx5_ib_dbg(dev, "Couldn't modify UMR QP\n");
+ goto error_4;
+ }
+
+ memset(attr, 0, sizeof(*attr));
+ attr->qp_state = IB_QPS_RTR;
+ attr->path_mtu = IB_MTU_256;
+
+ ret = mlx5_ib_modify_qp(qp, attr, IB_QP_STATE, NULL);
+ if (ret) {
+ mlx5_ib_dbg(dev, "Couldn't modify umr QP to rtr\n");
+ goto error_4;
+ }
+
+ memset(attr, 0, sizeof(*attr));
+ attr->qp_state = IB_QPS_RTS;
+ ret = mlx5_ib_modify_qp(qp, attr, IB_QP_STATE, NULL);
+ if (ret) {
+ mlx5_ib_dbg(dev, "Couldn't modify umr QP to rts\n");
+ goto error_4;
+ }
+
+ dev->umrc.qp = qp;
+ dev->umrc.cq = cq;
+ dev->umrc.pd = pd;
+
+ sema_init(&dev->umrc.sem, MAX_UMR_WR);
+ ret = mlx5_mr_cache_init(dev);
+ if (ret) {
+ mlx5_ib_warn(dev, "mr cache init failed %d\n", ret);
+ goto error_4;
+ }
+
+ kfree(attr);
+ kfree(init_attr);
+
+ return 0;
+
+error_4:
+ mlx5_ib_destroy_qp(qp, NULL);
+ dev->umrc.qp = NULL;
+
+error_3:
+ ib_free_cq(cq);
+ dev->umrc.cq = NULL;
+
+error_2:
+ ib_dealloc_pd(pd);
+ dev->umrc.pd = NULL;
+
+error_0:
+ kfree(attr);
+ kfree(init_attr);
+ return ret;
}
static int mlx5_ib_stage_delay_drop_init(struct mlx5_ib_dev *dev)
{
- init_delay_drop(dev);
+ struct dentry *root;
+
+ if (!(dev->ib_dev.attrs.raw_packet_caps & IB_RAW_PACKET_CAP_DELAY_DROP))
+ return 0;
+
+ mutex_init(&dev->delay_drop.lock);
+ dev->delay_drop.dev = dev;
+ dev->delay_drop.activate = false;
+ dev->delay_drop.timeout = MLX5_MAX_DELAY_DROP_TIMEOUT_MS * 1000;
+ INIT_WORK(&dev->delay_drop.delay_drop_work, delay_drop_handler);
+ atomic_set(&dev->delay_drop.rqs_cnt, 0);
+ atomic_set(&dev->delay_drop.events_cnt, 0);
+
+ if (!mlx5_debugfs_root)
+ return 0;
+
+ root = debugfs_create_dir("delay_drop", dev->mdev->priv.dbg_root);
+ dev->delay_drop.dir_debugfs = root;
+ debugfs_create_atomic_t("num_timeout_events", 0400, root,
+ &dev->delay_drop.events_cnt);
+ debugfs_create_atomic_t("num_rqs", 0400, root,
+ &dev->delay_drop.rqs_cnt);
+ debugfs_create_file("timeout", 0600, root, &dev->delay_drop,
+ &fops_delay_drop_timeout);
return 0;
}
static void mlx5_ib_stage_delay_drop_cleanup(struct mlx5_ib_dev *dev)
{
- cancel_delay_drop(dev);
+ if (!(dev->ib_dev.attrs.raw_packet_caps & IB_RAW_PACKET_CAP_DELAY_DROP))
+ return;
+
+ cancel_work_sync(&dev->delay_drop.delay_drop_work);
+ if (!dev->delay_drop.dir_debugfs)
+ return;
+
+ debugfs_remove_recursive(dev->delay_drop.dir_debugfs);
+ dev->delay_drop.dir_debugfs = NULL;
}
static int mlx5_ib_stage_dev_notifier_init(struct mlx5_ib_dev *dev)
@@ -7060,38 +4580,6 @@ static void mlx5_ib_stage_dev_notifier_cleanup(struct mlx5_ib_dev *dev)
mlx5_notifier_unregister(dev->mdev, &dev->mdev_events);
}
-static int mlx5_ib_stage_devx_init(struct mlx5_ib_dev *dev)
-{
- int uid;
-
- uid = mlx5_ib_devx_create(dev, false);
- if (uid > 0) {
- dev->devx_whitelist_uid = uid;
- mlx5_ib_devx_init_event_table(dev);
- }
-
- return 0;
-}
-static void mlx5_ib_stage_devx_cleanup(struct mlx5_ib_dev *dev)
-{
- if (dev->devx_whitelist_uid) {
- mlx5_ib_devx_cleanup_event_table(dev);
- mlx5_ib_devx_destroy(dev, dev->devx_whitelist_uid);
- }
-}
-
-int mlx5_ib_enable_driver(struct ib_device *dev)
-{
- struct mlx5_ib_dev *mdev = to_mdev(dev);
- int ret;
-
- ret = mlx5_ib_test_wc(mdev);
- mlx5_ib_dbg(mdev, "Write-Combining %s",
- mdev->wc_support ? "supported" : "not supported");
-
- return ret;
-}
-
void __mlx5_ib_remove(struct mlx5_ib_dev *dev,
const struct mlx5_ib_profile *profile,
int stage)
@@ -7139,9 +4627,9 @@ static const struct mlx5_ib_profile pf_profile = {
STAGE_CREATE(MLX5_IB_STAGE_INIT,
mlx5_ib_stage_init_init,
mlx5_ib_stage_init_cleanup),
- STAGE_CREATE(MLX5_IB_STAGE_FLOW_DB,
- mlx5_ib_stage_flow_db_init,
- mlx5_ib_stage_flow_db_cleanup),
+ STAGE_CREATE(MLX5_IB_STAGE_FS,
+ mlx5_ib_fs_init,
+ mlx5_ib_fs_cleanup),
STAGE_CREATE(MLX5_IB_STAGE_CAPS,
mlx5_ib_stage_caps_init,
mlx5_ib_stage_caps_cleanup),
@@ -7149,8 +4637,8 @@ static const struct mlx5_ib_profile pf_profile = {
mlx5_ib_stage_non_default_cb,
NULL),
STAGE_CREATE(MLX5_IB_STAGE_ROCE,
- mlx5_ib_stage_roce_init,
- mlx5_ib_stage_roce_cleanup),
+ mlx5_ib_roce_init,
+ mlx5_ib_roce_cleanup),
STAGE_CREATE(MLX5_IB_STAGE_QP,
mlx5_init_qp_table,
mlx5_cleanup_qp_table),
@@ -7158,17 +4646,17 @@ static const struct mlx5_ib_profile pf_profile = {
mlx5_init_srq_table,
mlx5_cleanup_srq_table),
STAGE_CREATE(MLX5_IB_STAGE_DEVICE_RESOURCES,
- mlx5_ib_stage_dev_res_init,
- mlx5_ib_stage_dev_res_cleanup),
+ mlx5_ib_dev_res_init,
+ mlx5_ib_dev_res_cleanup),
STAGE_CREATE(MLX5_IB_STAGE_DEVICE_NOTIFIER,
mlx5_ib_stage_dev_notifier_init,
mlx5_ib_stage_dev_notifier_cleanup),
STAGE_CREATE(MLX5_IB_STAGE_ODP,
- mlx5_ib_stage_odp_init,
- mlx5_ib_stage_odp_cleanup),
+ mlx5_ib_odp_init_one,
+ mlx5_ib_odp_cleanup_one),
STAGE_CREATE(MLX5_IB_STAGE_COUNTERS,
- mlx5_ib_stage_counters_init,
- mlx5_ib_stage_counters_cleanup),
+ mlx5_ib_counters_init,
+ mlx5_ib_counters_cleanup),
STAGE_CREATE(MLX5_IB_STAGE_CONG_DEBUGFS,
mlx5_ib_stage_cong_debugfs_init,
mlx5_ib_stage_cong_debugfs_cleanup),
@@ -7182,8 +4670,8 @@ static const struct mlx5_ib_profile pf_profile = {
NULL,
mlx5_ib_stage_pre_ib_reg_umr_cleanup),
STAGE_CREATE(MLX5_IB_STAGE_WHITELIST_UID,
- mlx5_ib_stage_devx_init,
- mlx5_ib_stage_devx_cleanup),
+ mlx5_ib_devx_init,
+ mlx5_ib_devx_cleanup),
STAGE_CREATE(MLX5_IB_STAGE_IB_REG,
mlx5_ib_stage_ib_reg_init,
mlx5_ib_stage_ib_reg_cleanup),
@@ -7193,15 +4681,18 @@ static const struct mlx5_ib_profile pf_profile = {
STAGE_CREATE(MLX5_IB_STAGE_DELAY_DROP,
mlx5_ib_stage_delay_drop_init,
mlx5_ib_stage_delay_drop_cleanup),
+ STAGE_CREATE(MLX5_IB_STAGE_RESTRACK,
+ mlx5_ib_restrack_init,
+ NULL),
};
const struct mlx5_ib_profile raw_eth_profile = {
STAGE_CREATE(MLX5_IB_STAGE_INIT,
mlx5_ib_stage_init_init,
mlx5_ib_stage_init_cleanup),
- STAGE_CREATE(MLX5_IB_STAGE_FLOW_DB,
- mlx5_ib_stage_flow_db_init,
- mlx5_ib_stage_flow_db_cleanup),
+ STAGE_CREATE(MLX5_IB_STAGE_FS,
+ mlx5_ib_fs_init,
+ mlx5_ib_fs_cleanup),
STAGE_CREATE(MLX5_IB_STAGE_CAPS,
mlx5_ib_stage_caps_init,
mlx5_ib_stage_caps_cleanup),
@@ -7209,8 +4700,8 @@ const struct mlx5_ib_profile raw_eth_profile = {
mlx5_ib_stage_raw_eth_non_default_cb,
NULL),
STAGE_CREATE(MLX5_IB_STAGE_ROCE,
- mlx5_ib_stage_raw_eth_roce_init,
- mlx5_ib_stage_raw_eth_roce_cleanup),
+ mlx5_ib_roce_init,
+ mlx5_ib_roce_cleanup),
STAGE_CREATE(MLX5_IB_STAGE_QP,
mlx5_init_qp_table,
mlx5_cleanup_qp_table),
@@ -7218,14 +4709,14 @@ const struct mlx5_ib_profile raw_eth_profile = {
mlx5_init_srq_table,
mlx5_cleanup_srq_table),
STAGE_CREATE(MLX5_IB_STAGE_DEVICE_RESOURCES,
- mlx5_ib_stage_dev_res_init,
- mlx5_ib_stage_dev_res_cleanup),
+ mlx5_ib_dev_res_init,
+ mlx5_ib_dev_res_cleanup),
STAGE_CREATE(MLX5_IB_STAGE_DEVICE_NOTIFIER,
mlx5_ib_stage_dev_notifier_init,
mlx5_ib_stage_dev_notifier_cleanup),
STAGE_CREATE(MLX5_IB_STAGE_COUNTERS,
- mlx5_ib_stage_counters_init,
- mlx5_ib_stage_counters_cleanup),
+ mlx5_ib_counters_init,
+ mlx5_ib_counters_cleanup),
STAGE_CREATE(MLX5_IB_STAGE_CONG_DEBUGFS,
mlx5_ib_stage_cong_debugfs_init,
mlx5_ib_stage_cong_debugfs_cleanup),
@@ -7239,14 +4730,17 @@ const struct mlx5_ib_profile raw_eth_profile = {
NULL,
mlx5_ib_stage_pre_ib_reg_umr_cleanup),
STAGE_CREATE(MLX5_IB_STAGE_WHITELIST_UID,
- mlx5_ib_stage_devx_init,
- mlx5_ib_stage_devx_cleanup),
+ mlx5_ib_devx_init,
+ mlx5_ib_devx_cleanup),
STAGE_CREATE(MLX5_IB_STAGE_IB_REG,
mlx5_ib_stage_ib_reg_init,
mlx5_ib_stage_ib_reg_cleanup),
STAGE_CREATE(MLX5_IB_STAGE_POST_IB_REG_UMR,
mlx5_ib_stage_post_ib_reg_umr_init,
NULL),
+ STAGE_CREATE(MLX5_IB_STAGE_RESTRACK,
+ mlx5_ib_restrack_init,
+ NULL),
};
static void *mlx5_ib_add_slave_port(struct mlx5_core_dev *mdev)
diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index 5dbe3eb0d9cb..5287fc868662 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -1,33 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
/*
- * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
+ * Copyright (c) 2013-2020, Mellanox Technologies inc. All rights reserved.
*/
#ifndef MLX5_IB_H
@@ -730,8 +703,8 @@ struct mlx5_ib_port_resources {
struct mlx5_ib_resources {
struct ib_cq *c0;
- struct ib_xrcd *x0;
- struct ib_xrcd *x1;
+ u32 xrcdn0;
+ u32 xrcdn1;
struct ib_pd *p0;
struct ib_srq *s0;
struct ib_srq *s1;
@@ -832,7 +805,7 @@ struct mlx5_ib_delay_drop {
enum mlx5_ib_stages {
MLX5_IB_STAGE_INIT,
- MLX5_IB_STAGE_FLOW_DB,
+ MLX5_IB_STAGE_FS,
MLX5_IB_STAGE_CAPS,
MLX5_IB_STAGE_NON_DEFAULT_CB,
MLX5_IB_STAGE_ROCE,
@@ -850,7 +823,7 @@ enum mlx5_ib_stages {
MLX5_IB_STAGE_IB_REG,
MLX5_IB_STAGE_POST_IB_REG_UMR,
MLX5_IB_STAGE_DELAY_DROP,
- MLX5_IB_STAGE_CLASS_ATTR,
+ MLX5_IB_STAGE_RESTRACK,
MLX5_IB_STAGE_MAX,
};
@@ -1078,11 +1051,6 @@ static inline struct mlx5_ib_rwq *to_mibrwq(struct mlx5_core_qp *core_qp)
return container_of(core_qp, struct mlx5_ib_rwq, core_qp);
}
-static inline struct mlx5_ib_mr *to_mibmr(struct mlx5_core_mkey *mmkey)
-{
- return container_of(mmkey, struct mlx5_ib_mr, mmkey);
-}
-
static inline struct mlx5_ib_pd *to_mpd(struct ib_pd *ibpd)
{
return container_of(ibpd, struct mlx5_ib_pd, ibpd);
@@ -1210,7 +1178,7 @@ int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start,
struct ib_pd *pd, struct ib_udata *udata);
int mlx5_ib_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata);
struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type,
- u32 max_num_sg, struct ib_udata *udata);
+ u32 max_num_sg);
struct ib_mr *mlx5_ib_alloc_mr_integrity(struct ib_pd *pd,
u32 max_num_sg,
u32 max_num_meta_sg);
@@ -1224,9 +1192,8 @@ int mlx5_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
const struct ib_wc *in_wc, const struct ib_grh *in_grh,
const struct ib_mad *in, struct ib_mad *out,
size_t *out_mad_size, u16 *out_mad_pkey_index);
-struct ib_xrcd *mlx5_ib_alloc_xrcd(struct ib_device *ibdev,
- struct ib_udata *udata);
-int mlx5_ib_dealloc_xrcd(struct ib_xrcd *xrcd, struct ib_udata *udata);
+int mlx5_ib_alloc_xrcd(struct ib_xrcd *xrcd, struct ib_udata *udata);
+void mlx5_ib_dealloc_xrcd(struct ib_xrcd *xrcd, struct ib_udata *udata);
int mlx5_ib_get_buf_offset(u64 addr, int page_shift, u32 *offset);
int mlx5_query_ext_port_caps(struct mlx5_ib_dev *dev, u8 port);
int mlx5_query_mad_ifc_smp_attr_node_info(struct ib_device *ibdev,
@@ -1375,46 +1342,12 @@ struct mlx5_core_dev *mlx5_ib_get_native_port_mdev(struct mlx5_ib_dev *dev,
u8 *native_port_num);
void mlx5_ib_put_native_port_mdev(struct mlx5_ib_dev *dev,
u8 port_num);
-int mlx5_ib_fill_res_entry(struct sk_buff *msg,
- struct rdma_restrack_entry *res);
-int mlx5_ib_fill_stat_entry(struct sk_buff *msg,
- struct rdma_restrack_entry *res);
extern const struct uapi_definition mlx5_ib_devx_defs[];
extern const struct uapi_definition mlx5_ib_flow_defs[];
extern const struct uapi_definition mlx5_ib_qos_defs[];
+extern const struct uapi_definition mlx5_ib_std_types_defs[];
-#if IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS)
-int mlx5_ib_devx_create(struct mlx5_ib_dev *dev, bool is_user);
-void mlx5_ib_devx_destroy(struct mlx5_ib_dev *dev, u16 uid);
-void mlx5_ib_devx_init_event_table(struct mlx5_ib_dev *dev);
-void mlx5_ib_devx_cleanup_event_table(struct mlx5_ib_dev *dev);
-struct mlx5_ib_flow_handler *mlx5_ib_raw_fs_rule_add(
- struct mlx5_ib_dev *dev, struct mlx5_ib_flow_matcher *fs_matcher,
- struct mlx5_flow_context *flow_context,
- struct mlx5_flow_act *flow_act, u32 counter_id,
- void *cmd_in, int inlen, int dest_id, int dest_type);
-bool mlx5_ib_devx_is_flow_dest(void *obj, int *dest_id, int *dest_type);
-bool mlx5_ib_devx_is_flow_counter(void *obj, u32 offset, u32 *counter_id);
-void mlx5_ib_destroy_flow_action_raw(struct mlx5_ib_flow_action *maction);
-#else
-static inline int
-mlx5_ib_devx_create(struct mlx5_ib_dev *dev,
- bool is_user) { return -EOPNOTSUPP; }
-static inline void mlx5_ib_devx_destroy(struct mlx5_ib_dev *dev, u16 uid) {}
-static inline void mlx5_ib_devx_init_event_table(struct mlx5_ib_dev *dev) {}
-static inline void mlx5_ib_devx_cleanup_event_table(struct mlx5_ib_dev *dev) {}
-static inline bool mlx5_ib_devx_is_flow_dest(void *obj, int *dest_id,
- int *dest_type)
-{
- return false;
-}
-static inline void
-mlx5_ib_destroy_flow_action_raw(struct mlx5_ib_flow_action *maction)
-{
- return;
-};
-#endif
static inline void init_query_mad(struct ib_smp *mad)
{
mad->base_version = 1;
@@ -1423,15 +1356,6 @@ static inline void init_query_mad(struct ib_smp *mad)
mad->method = IB_MGMT_METHOD_GET;
}
-static inline u8 convert_access(int acc)
-{
- return (acc & IB_ACCESS_REMOTE_ATOMIC ? MLX5_PERM_ATOMIC : 0) |
- (acc & IB_ACCESS_REMOTE_WRITE ? MLX5_PERM_REMOTE_WRITE : 0) |
- (acc & IB_ACCESS_REMOTE_READ ? MLX5_PERM_REMOTE_READ : 0) |
- (acc & IB_ACCESS_LOCAL_WRITE ? MLX5_PERM_LOCAL_WRITE : 0) |
- MLX5_PERM_LOCAL_READ;
-}
-
static inline int is_qp1(enum ib_qp_type qp_type)
{
return qp_type == MLX5_IB_QPT_HW_GSI;
@@ -1518,9 +1442,6 @@ int bfregn_to_uar_index(struct mlx5_ib_dev *dev,
struct mlx5_bfreg_info *bfregi, u32 bfregn,
bool dyn_bfreg);
-int mlx5_ib_qp_set_counter(struct ib_qp *qp, struct rdma_counter *counter);
-u16 mlx5_ib_get_counters_id(struct mlx5_ib_dev *dev, u8 port_num);
-
static inline bool mlx5_ib_can_use_umr(struct mlx5_ib_dev *dev,
bool do_modify_atomic, int access_flags)
{
@@ -1533,14 +1454,18 @@ static inline bool mlx5_ib_can_use_umr(struct mlx5_ib_dev *dev,
return false;
if (access_flags & IB_ACCESS_RELAXED_ORDERING &&
- (MLX5_CAP_GEN(dev->mdev, relaxed_ordering_write) ||
- MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read)))
+ MLX5_CAP_GEN(dev->mdev, relaxed_ordering_write) &&
+ !MLX5_CAP_GEN(dev->mdev, relaxed_ordering_write_umr))
+ return false;
+
+ if (access_flags & IB_ACCESS_RELAXED_ORDERING &&
+ MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read) &&
+ !MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read_umr))
return false;
return true;
}
-int mlx5_ib_enable_driver(struct ib_device *dev);
int mlx5_ib_test_wc(struct mlx5_ib_dev *dev);
static inline bool mlx5_ib_lag_should_assign_affinity(struct mlx5_ib_dev *dev)
diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c
index 44683073be0c..3e6f2f9c6655 100644
--- a/drivers/infiniband/hw/mlx5/mr.c
+++ b/drivers/infiniband/hw/mlx5/mr.c
@@ -1961,7 +1961,7 @@ err_free:
}
struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type,
- u32 max_num_sg, struct ib_udata *udata)
+ u32 max_num_sg)
{
return __mlx5_ib_alloc_mr(pd, mr_type, max_num_sg, 0);
}
diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c
index 77dca1e05bba..cfd7efab114e 100644
--- a/drivers/infiniband/hw/mlx5/odp.c
+++ b/drivers/infiniband/hw/mlx5/odp.c
@@ -816,6 +816,7 @@ static int pagefault_mr(struct mlx5_ib_mr *mr, u64 io_virt, size_t bcnt,
{
struct ib_umem_odp *odp = to_ib_umem_odp(mr->umem);
+ lockdep_assert_held(&mr->dev->odp_srcu);
if (unlikely(io_virt < mr->mmkey.iova))
return -EFAULT;
@@ -929,11 +930,6 @@ next_mr:
if (ret < 0)
goto srcu_unlock;
- /*
- * When prefetching a page, page fault is generated
- * in order to bring the page to the main memory.
- * In the current flow, page faults are being counted.
- */
mlx5_update_odp_stats(mr, faults, ret);
npages += ret;
@@ -1770,13 +1766,26 @@ static void mlx5_ib_prefetch_mr_work(struct work_struct *w)
{
struct prefetch_mr_work *work =
container_of(w, struct prefetch_mr_work, work);
+ struct mlx5_ib_dev *dev;
u32 bytes_mapped = 0;
+ int srcu_key;
+ int ret;
u32 i;
- for (i = 0; i < work->num_sge; ++i)
- pagefault_mr(work->frags[i].mr, work->frags[i].io_virt,
- work->frags[i].length, &bytes_mapped,
- work->pf_flags);
+ /* We rely on IB/core that work is executed if we have num_sge != 0 only. */
+ WARN_ON(!work->num_sge);
+ dev = work->frags[0].mr->dev;
+ /* SRCU should be held when calling to mlx5_odp_populate_xlt() */
+ srcu_key = srcu_read_lock(&dev->odp_srcu);
+ for (i = 0; i < work->num_sge; ++i) {
+ ret = pagefault_mr(work->frags[i].mr, work->frags[i].io_virt,
+ work->frags[i].length, &bytes_mapped,
+ work->pf_flags);
+ if (ret <= 0)
+ continue;
+ mlx5_update_odp_stats(work->frags[i].mr, prefetch, ret);
+ }
+ srcu_read_unlock(&dev->odp_srcu, srcu_key);
destroy_prefetch_work(work);
}
@@ -1832,6 +1841,7 @@ static int mlx5_ib_prefetch_sg_list(struct ib_pd *pd,
&bytes_mapped, pf_flags);
if (ret < 0)
goto out;
+ mlx5_update_odp_stats(mr, prefetch, ret);
}
ret = 0;
diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c
index 1225b8d77510..59fce5fac7a3 100644
--- a/drivers/infiniband/hw/mlx5/qp.c
+++ b/drivers/infiniband/hw/mlx5/qp.c
@@ -38,6 +38,7 @@
#include <linux/mlx5/fs.h>
#include "mlx5_ib.h"
#include "ib_rep.h"
+#include "counters.h"
#include "cmd.h"
#include "qp.h"
#include "wr.h"
@@ -2031,15 +2032,15 @@ static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd,
switch (init_attr->qp_type) {
case IB_QPT_XRC_INI:
MLX5_SET(qpc, qpc, cqn_rcv, to_mcq(devr->c0)->mcq.cqn);
- MLX5_SET(qpc, qpc, xrcd, to_mxrcd(devr->x1)->xrcdn);
+ MLX5_SET(qpc, qpc, xrcd, devr->xrcdn1);
MLX5_SET(qpc, qpc, srqn_rmpn_xrqn, to_msrq(devr->s0)->msrq.srqn);
break;
default:
if (init_attr->srq) {
- MLX5_SET(qpc, qpc, xrcd, to_mxrcd(devr->x0)->xrcdn);
+ MLX5_SET(qpc, qpc, xrcd, devr->xrcdn0);
MLX5_SET(qpc, qpc, srqn_rmpn_xrqn, to_msrq(init_attr->srq)->msrq.srqn);
} else {
- MLX5_SET(qpc, qpc, xrcd, to_mxrcd(devr->x1)->xrcdn);
+ MLX5_SET(qpc, qpc, xrcd, devr->xrcdn1);
MLX5_SET(qpc, qpc, srqn_rmpn_xrqn, to_msrq(devr->s1)->msrq.srqn);
}
}
@@ -2178,11 +2179,11 @@ static int create_kernel_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd,
MLX5_SET(qpc, qpc, no_sq, 1);
if (attr->srq) {
- MLX5_SET(qpc, qpc, xrcd, to_mxrcd(devr->x0)->xrcdn);
+ MLX5_SET(qpc, qpc, xrcd, devr->xrcdn0);
MLX5_SET(qpc, qpc, srqn_rmpn_xrqn,
to_msrq(attr->srq)->msrq.srqn);
} else {
- MLX5_SET(qpc, qpc, xrcd, to_mxrcd(devr->x1)->xrcdn);
+ MLX5_SET(qpc, qpc, xrcd, devr->xrcdn1);
MLX5_SET(qpc, qpc, srqn_rmpn_xrqn,
to_msrq(devr->s1)->msrq.srqn);
}
@@ -3554,7 +3555,7 @@ static int modify_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
switch (raw_qp_param->operation) {
case MLX5_CMD_OP_RST2INIT_QP:
rq_state = MLX5_RQC_STATE_RDY;
- sq_state = MLX5_SQC_STATE_RDY;
+ sq_state = MLX5_SQC_STATE_RST;
break;
case MLX5_CMD_OP_2ERR_QP:
rq_state = MLX5_RQC_STATE_ERR;
@@ -3566,13 +3567,11 @@ static int modify_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
break;
case MLX5_CMD_OP_RTR2RTS_QP:
case MLX5_CMD_OP_RTS2RTS_QP:
- if (raw_qp_param->set_mask ==
- MLX5_RAW_QP_RATE_LIMIT) {
- modify_rq = 0;
- sq_state = sq->state;
- } else {
- return raw_qp_param->set_mask ? -EINVAL : 0;
- }
+ if (raw_qp_param->set_mask & ~MLX5_RAW_QP_RATE_LIMIT)
+ return -EINVAL;
+
+ modify_rq = 0;
+ sq_state = MLX5_SQC_STATE_RDY;
break;
case MLX5_CMD_OP_INIT2INIT_QP:
case MLX5_CMD_OP_INIT2RTR_QP:
@@ -4114,9 +4113,9 @@ static int mlx5_ib_modify_dct(struct ib_qp *ibqp, struct ib_qp_attr *attr,
struct mlx5_ib_qp *qp = to_mqp(ibqp);
struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
enum ib_qp_state cur_state, new_state;
- int err = 0;
int required = IB_QP_STATE;
void *dctc;
+ int err;
if (!(attr_mask & IB_QP_STATE))
return -EINVAL;
@@ -4208,11 +4207,9 @@ static int mlx5_ib_modify_dct(struct ib_qp *ibqp, struct ib_qp_attr *attr,
mlx5_ib_warn(dev, "Modify DCT: Invalid transition from %d to %d\n", cur_state, new_state);
return -EINVAL;
}
- if (err)
- qp->state = IB_QPS_ERR;
- else
- qp->state = new_state;
- return err;
+
+ qp->state = new_state;
+ return 0;
}
int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
@@ -4450,7 +4447,7 @@ static int sqrq_state_to_qp_state(u8 sq_state, u8 rq_state,
[MLX5_SQ_STATE_NA] = IB_QPS_RESET,
},
[MLX5_RQC_STATE_RDY] = {
- [MLX5_SQC_STATE_RST] = MLX5_QP_STATE_BAD,
+ [MLX5_SQC_STATE_RST] = MLX5_QP_STATE,
[MLX5_SQC_STATE_RDY] = MLX5_QP_STATE,
[MLX5_SQC_STATE_ERR] = IB_QPS_SQE,
[MLX5_SQ_STATE_NA] = MLX5_QP_STATE,
@@ -4462,7 +4459,7 @@ static int sqrq_state_to_qp_state(u8 sq_state, u8 rq_state,
[MLX5_SQ_STATE_NA] = IB_QPS_ERR,
},
[MLX5_RQ_STATE_NA] = {
- [MLX5_SQC_STATE_RST] = IB_QPS_RESET,
+ [MLX5_SQC_STATE_RST] = MLX5_QP_STATE,
[MLX5_SQC_STATE_RDY] = MLX5_QP_STATE,
[MLX5_SQC_STATE_ERR] = MLX5_QP_STATE,
[MLX5_SQ_STATE_NA] = MLX5_QP_STATE_BAD,
@@ -4708,41 +4705,23 @@ out:
return err;
}
-struct ib_xrcd *mlx5_ib_alloc_xrcd(struct ib_device *ibdev,
- struct ib_udata *udata)
+int mlx5_ib_alloc_xrcd(struct ib_xrcd *ibxrcd, struct ib_udata *udata)
{
- struct mlx5_ib_dev *dev = to_mdev(ibdev);
- struct mlx5_ib_xrcd *xrcd;
- int err;
+ struct mlx5_ib_dev *dev = to_mdev(ibxrcd->device);
+ struct mlx5_ib_xrcd *xrcd = to_mxrcd(ibxrcd);
if (!MLX5_CAP_GEN(dev->mdev, xrc))
- return ERR_PTR(-ENOSYS);
-
- xrcd = kmalloc(sizeof(*xrcd), GFP_KERNEL);
- if (!xrcd)
- return ERR_PTR(-ENOMEM);
-
- err = mlx5_cmd_xrcd_alloc(dev->mdev, &xrcd->xrcdn, 0);
- if (err) {
- kfree(xrcd);
- return ERR_PTR(-ENOMEM);
- }
+ return -EOPNOTSUPP;
- return &xrcd->ibxrcd;
+ return mlx5_cmd_xrcd_alloc(dev->mdev, &xrcd->xrcdn, 0);
}
-int mlx5_ib_dealloc_xrcd(struct ib_xrcd *xrcd, struct ib_udata *udata)
+void mlx5_ib_dealloc_xrcd(struct ib_xrcd *xrcd, struct ib_udata *udata)
{
struct mlx5_ib_dev *dev = to_mdev(xrcd->device);
u32 xrcdn = to_mxrcd(xrcd)->xrcdn;
- int err;
- err = mlx5_cmd_xrcd_dealloc(dev->mdev, xrcdn, 0);
- if (err)
- mlx5_ib_warn(dev, "failed to dealloc xrcdn 0x%x\n", xrcdn);
-
- kfree(xrcd);
- return 0;
+ mlx5_cmd_xrcd_dealloc(dev->mdev, xrcdn, 0);
}
static void mlx5_ib_wq_event(struct mlx5_core_qp *core_qp, int type)
diff --git a/drivers/infiniband/hw/mlx5/qp.h b/drivers/infiniband/hw/mlx5/qp.h
index 82ea2b94dfa6..ba899df44c5b 100644
--- a/drivers/infiniband/hw/mlx5/qp.h
+++ b/drivers/infiniband/hw/mlx5/qp.h
@@ -43,4 +43,5 @@ void mlx5_core_res_put(struct mlx5_core_rsc_common *res);
int mlx5_core_xrcd_alloc(struct mlx5_ib_dev *dev, u32 *xrcdn);
int mlx5_core_xrcd_dealloc(struct mlx5_ib_dev *dev, u32 xrcdn);
+int mlx5_ib_qp_set_counter(struct ib_qp *qp, struct rdma_counter *counter);
#endif /* _MLX5_IB_QP_H */
diff --git a/drivers/infiniband/hw/mlx5/restrack.c b/drivers/infiniband/hw/mlx5/restrack.c
index 8f6c04f12531..887270dd3ce2 100644
--- a/drivers/infiniband/hw/mlx5/restrack.c
+++ b/drivers/infiniband/hw/mlx5/restrack.c
@@ -1,17 +1,85 @@
// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
/*
- * Copyright (c) 2019, Mellanox Technologies inc. All rights reserved.
+ * Copyright (c) 2019-2020, Mellanox Technologies Ltd. All rights reserved.
*/
#include <uapi/rdma/rdma_netlink.h>
+#include <linux/mlx5/rsc_dump.h>
#include <rdma/ib_umem_odp.h>
#include <rdma/restrack.h>
#include "mlx5_ib.h"
+#include "restrack.h"
-static int fill_stat_mr_entry(struct sk_buff *msg,
- struct rdma_restrack_entry *res)
+#define MAX_DUMP_SIZE 1024
+
+static int dump_rsc(struct mlx5_core_dev *dev, enum mlx5_sgmt_type type,
+ int index, void *data, int *data_len)
+{
+ struct mlx5_core_dev *mdev = dev;
+ struct mlx5_rsc_dump_cmd *cmd;
+ struct mlx5_rsc_key key = {};
+ struct page *page;
+ int offset = 0;
+ int err = 0;
+ int cmd_err;
+ int size;
+
+ page = alloc_page(GFP_KERNEL);
+ if (!page)
+ return -ENOMEM;
+
+ key.size = PAGE_SIZE;
+ key.rsc = type;
+ key.index1 = index;
+ key.num_of_obj1 = 1;
+
+ cmd = mlx5_rsc_dump_cmd_create(mdev, &key);
+ if (IS_ERR(cmd)) {
+ err = PTR_ERR(cmd);
+ goto free_page;
+ }
+
+ do {
+ cmd_err = mlx5_rsc_dump_next(mdev, cmd, page, &size);
+ if (cmd_err < 0 || size + offset > MAX_DUMP_SIZE) {
+ err = cmd_err;
+ goto destroy_cmd;
+ }
+ memcpy(data + offset, page_address(page), size);
+ offset += size;
+ } while (cmd_err > 0);
+ *data_len = offset;
+
+destroy_cmd:
+ mlx5_rsc_dump_cmd_destroy(cmd);
+free_page:
+ __free_page(page);
+ return err;
+}
+
+static int fill_res_raw(struct sk_buff *msg, struct mlx5_ib_dev *dev,
+ enum mlx5_sgmt_type type, u32 key)
+{
+ int len = 0;
+ void *data;
+ int err;
+
+ data = kzalloc(MAX_DUMP_SIZE, GFP_KERNEL);
+ if (!data)
+ return -ENOMEM;
+
+ err = dump_rsc(dev->mdev, type, key, data, &len);
+ if (err)
+ goto out;
+
+ err = nla_put(msg, RDMA_NLDEV_ATTR_RES_RAW, len, data);
+out:
+ kfree(data);
+ return err;
+}
+
+static int fill_stat_mr_entry(struct sk_buff *msg, struct ib_mr *ibmr)
{
- struct ib_mr *ibmr = container_of(res, struct ib_mr, res);
struct mlx5_ib_mr *mr = to_mmr(ibmr);
struct nlattr *table_attr;
@@ -31,6 +99,9 @@ static int fill_stat_mr_entry(struct sk_buff *msg,
msg, "page_invalidations",
atomic64_read(&mr->odp_stats.invalidations)))
goto err_table;
+ if (rdma_nl_stat_hwcounter_entry(msg, "page_prefetch",
+ atomic64_read(&mr->odp_stats.prefetch)))
+ goto err_table;
nla_nest_end(msg, table_attr);
return 0;
@@ -41,10 +112,16 @@ err:
return -EMSGSIZE;
}
-static int fill_res_mr_entry(struct sk_buff *msg,
- struct rdma_restrack_entry *res)
+static int fill_res_mr_entry_raw(struct sk_buff *msg, struct ib_mr *ibmr)
+{
+ struct mlx5_ib_mr *mr = to_mmr(ibmr);
+
+ return fill_res_raw(msg, mr->dev, MLX5_SGMT_TYPE_PRM_QUERY_MKEY,
+ mlx5_mkey_to_idx(mr->mmkey.key));
+}
+
+static int fill_res_mr_entry(struct sk_buff *msg, struct ib_mr *ibmr)
{
- struct ib_mr *ibmr = container_of(res, struct ib_mr, res);
struct mlx5_ib_mr *mr = to_mmr(ibmr);
struct nlattr *table_attr;
@@ -71,20 +148,32 @@ err:
return -EMSGSIZE;
}
-int mlx5_ib_fill_res_entry(struct sk_buff *msg,
- struct rdma_restrack_entry *res)
+static int fill_res_cq_entry_raw(struct sk_buff *msg, struct ib_cq *ibcq)
{
- if (res->type == RDMA_RESTRACK_MR)
- return fill_res_mr_entry(msg, res);
+ struct mlx5_ib_dev *dev = to_mdev(ibcq->device);
+ struct mlx5_ib_cq *cq = to_mcq(ibcq);
- return 0;
+ return fill_res_raw(msg, dev, MLX5_SGMT_TYPE_PRM_QUERY_CQ, cq->mcq.cqn);
}
-int mlx5_ib_fill_stat_entry(struct sk_buff *msg,
- struct rdma_restrack_entry *res)
+static int fill_res_qp_entry_raw(struct sk_buff *msg, struct ib_qp *ibqp)
{
- if (res->type == RDMA_RESTRACK_MR)
- return fill_stat_mr_entry(msg, res);
+ struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
+
+ return fill_res_raw(msg, dev, MLX5_SGMT_TYPE_PRM_QUERY_QP,
+ ibqp->qp_num);
+}
+static const struct ib_device_ops restrack_ops = {
+ .fill_res_cq_entry_raw = fill_res_cq_entry_raw,
+ .fill_res_mr_entry = fill_res_mr_entry,
+ .fill_res_mr_entry_raw = fill_res_mr_entry_raw,
+ .fill_res_qp_entry_raw = fill_res_qp_entry_raw,
+ .fill_stat_mr_entry = fill_stat_mr_entry,
+};
+
+int mlx5_ib_restrack_init(struct mlx5_ib_dev *dev)
+{
+ ib_set_device_ops(&dev->ib_dev, &restrack_ops);
return 0;
}
diff --git a/drivers/infiniband/hw/mlx5/restrack.h b/drivers/infiniband/hw/mlx5/restrack.h
new file mode 100644
index 000000000000..e8d81270f1b6
--- /dev/null
+++ b/drivers/infiniband/hw/mlx5/restrack.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/*
+ * Copyright (c) 2013-2020, Mellanox Technologies Ltd. All rights reserved.
+ */
+
+#ifndef _MLX5_IB_RESTRACK_H
+#define _MLX5_IB_RESTRACK_H
+
+#include "mlx5_ib.h"
+
+int mlx5_ib_restrack_init(struct mlx5_ib_dev *dev);
+
+#endif /* _MLX5_IB_RESTRACK_H */
diff --git a/drivers/infiniband/hw/mlx5/srq.c b/drivers/infiniband/hw/mlx5/srq.c
index 6d1ff13d2283..7e10cbcb6d5c 100644
--- a/drivers/infiniband/hw/mlx5/srq.c
+++ b/drivers/infiniband/hw/mlx5/srq.c
@@ -274,10 +274,10 @@ int mlx5_ib_create_srq(struct ib_srq *ib_srq,
if (srq->wq_sig)
in.flags |= MLX5_SRQ_FLAG_WQ_SIG;
- if (init_attr->srq_type == IB_SRQT_XRC)
+ if (init_attr->srq_type == IB_SRQT_XRC && init_attr->ext.xrc.xrcd)
in.xrcd = to_mxrcd(init_attr->ext.xrc.xrcd)->xrcdn;
else
- in.xrcd = to_mxrcd(dev->devr.x0)->xrcdn;
+ in.xrcd = dev->devr.xrcdn0;
if (init_attr->srq_type == IB_SRQT_TM) {
in.tm_log_list_size =
diff --git a/drivers/infiniband/hw/mlx5/std_types.c b/drivers/infiniband/hw/mlx5/std_types.c
new file mode 100644
index 000000000000..16145fda68d0
--- /dev/null
+++ b/drivers/infiniband/hw/mlx5/std_types.c
@@ -0,0 +1,45 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/*
+ * Copyright (c) 2020, Mellanox Technologies inc. All rights reserved.
+ */
+
+#include <rdma/uverbs_ioctl.h>
+#include <rdma/mlx5_user_ioctl_cmds.h>
+#include <rdma/mlx5_user_ioctl_verbs.h>
+#include <linux/mlx5/driver.h>
+#include "mlx5_ib.h"
+
+#define UVERBS_MODULE_NAME mlx5_ib
+#include <rdma/uverbs_named_ioctl.h>
+
+static int UVERBS_HANDLER(MLX5_IB_METHOD_PD_QUERY)(
+ struct uverbs_attr_bundle *attrs)
+{
+ struct ib_pd *pd =
+ uverbs_attr_get_obj(attrs, MLX5_IB_ATTR_QUERY_PD_HANDLE);
+ struct mlx5_ib_pd *mpd = to_mpd(pd);
+
+ return uverbs_copy_to(attrs, MLX5_IB_ATTR_QUERY_PD_RESP_PDN,
+ &mpd->pdn, sizeof(mpd->pdn));
+}
+
+DECLARE_UVERBS_NAMED_METHOD(
+ MLX5_IB_METHOD_PD_QUERY,
+ UVERBS_ATTR_IDR(MLX5_IB_ATTR_QUERY_PD_HANDLE,
+ UVERBS_OBJECT_PD,
+ UVERBS_ACCESS_READ,
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_QUERY_PD_RESP_PDN,
+ UVERBS_ATTR_TYPE(u32),
+ UA_MANDATORY));
+
+ADD_UVERBS_METHODS(mlx5_ib_pd,
+ UVERBS_OBJECT_PD,
+ &UVERBS_METHOD(MLX5_IB_METHOD_PD_QUERY));
+
+const struct uapi_definition mlx5_ib_std_types_defs[] = {
+ UAPI_DEF_CHAIN_OBJ_TREE(
+ UVERBS_OBJECT_PD,
+ &mlx5_ib_pd),
+ {},
+};
diff --git a/drivers/infiniband/hw/mlx5/wr.c b/drivers/infiniband/hw/mlx5/wr.c
index bc35dbe4855b..43880973a512 100644
--- a/drivers/infiniband/hw/mlx5/wr.c
+++ b/drivers/infiniband/hw/mlx5/wr.c
@@ -263,7 +263,9 @@ static __be64 get_umr_update_translation_mask(void)
return cpu_to_be64(result);
}
-static __be64 get_umr_update_access_mask(int atomic)
+static __be64 get_umr_update_access_mask(int atomic,
+ int relaxed_ordering_write,
+ int relaxed_ordering_read)
{
u64 result;
@@ -275,6 +277,12 @@ static __be64 get_umr_update_access_mask(int atomic)
if (atomic)
result |= MLX5_MKEY_MASK_A;
+ if (relaxed_ordering_write)
+ result |= MLX5_MKEY_MASK_RELAXED_ORDERING_WRITE;
+
+ if (relaxed_ordering_read)
+ result |= MLX5_MKEY_MASK_RELAXED_ORDERING_READ;
+
return cpu_to_be64(result);
}
@@ -289,17 +297,28 @@ static __be64 get_umr_update_pd_mask(void)
static int umr_check_mkey_mask(struct mlx5_ib_dev *dev, u64 mask)
{
- if ((mask & MLX5_MKEY_MASK_PAGE_SIZE &&
- MLX5_CAP_GEN(dev->mdev, umr_modify_entity_size_disabled)) ||
- (mask & MLX5_MKEY_MASK_A &&
- MLX5_CAP_GEN(dev->mdev, umr_modify_atomic_disabled)))
+ if (mask & MLX5_MKEY_MASK_PAGE_SIZE &&
+ MLX5_CAP_GEN(dev->mdev, umr_modify_entity_size_disabled))
return -EPERM;
+
+ if (mask & MLX5_MKEY_MASK_A &&
+ MLX5_CAP_GEN(dev->mdev, umr_modify_atomic_disabled))
+ return -EPERM;
+
+ if (mask & MLX5_MKEY_MASK_RELAXED_ORDERING_WRITE &&
+ !MLX5_CAP_GEN(dev->mdev, relaxed_ordering_write_umr))
+ return -EPERM;
+
+ if (mask & MLX5_MKEY_MASK_RELAXED_ORDERING_READ &&
+ !MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read_umr))
+ return -EPERM;
+
return 0;
}
static int set_reg_umr_segment(struct mlx5_ib_dev *dev,
struct mlx5_wqe_umr_ctrl_seg *umr,
- const struct ib_send_wr *wr, int atomic)
+ const struct ib_send_wr *wr)
{
const struct mlx5_umr_wr *umrwr = umr_wr(wr);
@@ -325,7 +344,10 @@ static int set_reg_umr_segment(struct mlx5_ib_dev *dev,
if (wr->send_flags & MLX5_IB_SEND_UMR_UPDATE_TRANSLATION)
umr->mkey_mask |= get_umr_update_translation_mask();
if (wr->send_flags & MLX5_IB_SEND_UMR_UPDATE_PD_ACCESS) {
- umr->mkey_mask |= get_umr_update_access_mask(atomic);
+ umr->mkey_mask |= get_umr_update_access_mask(
+ !!(MLX5_CAP_GEN(dev->mdev, atomic)),
+ !!(MLX5_CAP_GEN(dev->mdev, relaxed_ordering_write_umr)),
+ !!(MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read_umr)));
umr->mkey_mask |= get_umr_update_pd_mask();
}
if (wr->send_flags & MLX5_IB_SEND_UMR_ENABLE_MR)
@@ -383,20 +405,31 @@ static void set_reg_mkey_segment(struct mlx5_mkey_seg *seg,
memset(seg, 0, sizeof(*seg));
if (wr->send_flags & MLX5_IB_SEND_UMR_DISABLE_MR)
- seg->status = MLX5_MKEY_STATUS_FREE;
+ MLX5_SET(mkc, seg, free, 1);
+
+ MLX5_SET(mkc, seg, a,
+ !!(umrwr->access_flags & IB_ACCESS_REMOTE_ATOMIC));
+ MLX5_SET(mkc, seg, rw,
+ !!(umrwr->access_flags & IB_ACCESS_REMOTE_WRITE));
+ MLX5_SET(mkc, seg, rr, !!(umrwr->access_flags & IB_ACCESS_REMOTE_READ));
+ MLX5_SET(mkc, seg, lw, !!(umrwr->access_flags & IB_ACCESS_LOCAL_WRITE));
+ MLX5_SET(mkc, seg, lr, 1);
+ MLX5_SET(mkc, seg, relaxed_ordering_write,
+ !!(umrwr->access_flags & IB_ACCESS_RELAXED_ORDERING));
+ MLX5_SET(mkc, seg, relaxed_ordering_read,
+ !!(umrwr->access_flags & IB_ACCESS_RELAXED_ORDERING));
- seg->flags = convert_access(umrwr->access_flags);
if (umrwr->pd)
- seg->flags_pd = cpu_to_be32(to_mpd(umrwr->pd)->pdn);
+ MLX5_SET(mkc, seg, pd, to_mpd(umrwr->pd)->pdn);
if (wr->send_flags & MLX5_IB_SEND_UMR_UPDATE_TRANSLATION &&
!umrwr->length)
- seg->flags_pd |= cpu_to_be32(MLX5_MKEY_LEN64);
+ MLX5_SET(mkc, seg, length64, 1);
- seg->start_addr = cpu_to_be64(umrwr->virt_addr);
- seg->len = cpu_to_be64(umrwr->length);
- seg->log2_page_size = umrwr->page_shift;
- seg->qpn_mkey7_0 = cpu_to_be32(0xffffff00 |
- mlx5_mkey_variant(umrwr->mkey));
+ MLX5_SET64(mkc, seg, start_addr, umrwr->virt_addr);
+ MLX5_SET64(mkc, seg, len, umrwr->length);
+ MLX5_SET(mkc, seg, log_page_size, umrwr->page_shift);
+ MLX5_SET(mkc, seg, qpn, 0xffffff);
+ MLX5_SET(mkc, seg, mkey_7_0, mlx5_mkey_variant(umrwr->mkey));
}
static void set_reg_data_seg(struct mlx5_wqe_data_seg *dseg,
@@ -1224,8 +1257,7 @@ static int handle_qpt_reg_umr(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
qp->sq.wr_data[idx] = MLX5_IB_WR_UMR;
(*ctrl)->imm = cpu_to_be32(umr_wr(wr)->mkey);
- err = set_reg_umr_segment(dev, *seg, wr,
- !!(MLX5_CAP_GEN(dev->mdev, atomic)));
+ err = set_reg_umr_segment(dev, *seg, wr);
if (unlikely(err))
goto out;
*seg += sizeof(struct mlx5_wqe_umr_ctrl_seg);
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
index d11c74390a12..6cdbec13756a 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
@@ -2901,7 +2901,7 @@ int ocrdma_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags cq_flags)
}
struct ib_mr *ocrdma_alloc_mr(struct ib_pd *ibpd, enum ib_mr_type mr_type,
- u32 max_num_sg, struct ib_udata *udata)
+ u32 max_num_sg)
{
int status;
struct ocrdma_mr *mr;
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h
index 3a5010881be5..df8e3b923a44 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h
@@ -101,7 +101,7 @@ struct ib_mr *ocrdma_get_dma_mr(struct ib_pd *, int acc);
struct ib_mr *ocrdma_reg_user_mr(struct ib_pd *, u64 start, u64 length,
u64 virt, int acc, struct ib_udata *);
struct ib_mr *ocrdma_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type,
- u32 max_num_sg, struct ib_udata *udata);
+ u32 max_num_sg);
int ocrdma_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents,
unsigned int *sg_offset);
diff --git a/drivers/infiniband/hw/qedr/main.c b/drivers/infiniband/hw/qedr/main.c
index b1de8d608e4d..d85f992bac29 100644
--- a/drivers/infiniband/hw/qedr/main.c
+++ b/drivers/infiniband/hw/qedr/main.c
@@ -110,7 +110,6 @@ static int qedr_iw_port_immutable(struct ib_device *ibdev, u8 port_num,
if (err)
return err;
- immutable->pkey_tbl_len = 1;
immutable->gid_tbl_len = 1;
immutable->core_cap_flags = RDMA_CORE_PORT_IWARP;
immutable->max_mad_size = 0;
@@ -179,6 +178,7 @@ static int qedr_iw_register_device(struct qedr_dev *dev)
static const struct ib_device_ops qedr_roce_dev_ops = {
.get_port_immutable = qedr_roce_port_immutable,
+ .query_pkey = qedr_query_pkey,
};
static void qedr_roce_register_device(struct qedr_dev *dev)
@@ -221,7 +221,6 @@ static const struct ib_device_ops qedr_dev_ops = {
.post_srq_recv = qedr_post_srq_recv,
.process_mad = qedr_process_mad,
.query_device = qedr_query_device,
- .query_pkey = qedr_query_pkey,
.query_port = qedr_query_port,
.query_qp = qedr_query_qp,
.query_srq = qedr_query_srq,
diff --git a/drivers/infiniband/hw/qedr/qedr.h b/drivers/infiniband/hw/qedr/qedr.h
index fdf90ecb2699..460292179b32 100644
--- a/drivers/infiniband/hw/qedr/qedr.h
+++ b/drivers/infiniband/hw/qedr/qedr.h
@@ -235,6 +235,7 @@ struct qedr_ucontext {
u32 dpi_size;
u16 dpi;
bool db_rec;
+ u8 edpm_mode;
};
union db_prod32 {
@@ -344,10 +345,10 @@ struct qedr_srq_hwq_info {
u32 wqe_prod;
u32 sge_prod;
u32 wr_prod_cnt;
- u32 wr_cons_cnt;
+ atomic_t wr_cons_cnt;
u32 num_elems;
- u32 *virt_prod_pair_addr;
+ struct rdma_srq_producers *virt_prod_pair_addr;
dma_addr_t phy_prod_pair_addr;
};
diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c
index f5aa85a5377c..4ce4e2eef6cc 100644
--- a/drivers/infiniband/hw/qedr/verbs.c
+++ b/drivers/infiniband/hw/qedr/verbs.c
@@ -239,7 +239,6 @@ int qedr_query_port(struct ib_device *ibdev, u8 port, struct ib_port_attr *attr)
attr->ip_gids = true;
if (rdma_protocol_iwarp(&dev->ibdev, 1)) {
attr->gid_tbl_len = 1;
- attr->pkey_tbl_len = 1;
} else {
attr->gid_tbl_len = QEDR_MAX_SGID;
attr->pkey_tbl_len = QEDR_ROCE_PKEY_TABLE_LEN;
@@ -275,7 +274,8 @@ int qedr_alloc_ucontext(struct ib_ucontext *uctx, struct ib_udata *udata)
DP_ERR(dev, "Problem copying data from user space\n");
return -EFAULT;
}
-
+ ctx->edpm_mode = !!(ureq.context_flags &
+ QEDR_ALLOC_UCTX_EDPM_MODE);
ctx->db_rec = !!(ureq.context_flags & QEDR_ALLOC_UCTX_DB_REC);
}
@@ -316,11 +316,15 @@ int qedr_alloc_ucontext(struct ib_ucontext *uctx, struct ib_udata *udata)
uresp.dpm_flags = QEDR_DPM_TYPE_IWARP_LEGACY;
else
uresp.dpm_flags = QEDR_DPM_TYPE_ROCE_ENHANCED |
- QEDR_DPM_TYPE_ROCE_LEGACY;
+ QEDR_DPM_TYPE_ROCE_LEGACY |
+ QEDR_DPM_TYPE_ROCE_EDPM_MODE;
- uresp.dpm_flags |= QEDR_DPM_SIZES_SET;
- uresp.ldpm_limit_size = QEDR_LDPM_MAX_SIZE;
- uresp.edpm_trans_size = QEDR_EDPM_TRANS_SIZE;
+ if (ureq.context_flags & QEDR_SUPPORT_DPM_SIZES) {
+ uresp.dpm_flags |= QEDR_DPM_SIZES_SET;
+ uresp.ldpm_limit_size = QEDR_LDPM_MAX_SIZE;
+ uresp.edpm_trans_size = QEDR_EDPM_TRANS_SIZE;
+ uresp.edpm_limit_size = QEDR_EDPM_MAX_SIZE;
+ }
uresp.wids_enabled = 1;
uresp.wid_count = oparams.wid_count;
@@ -1754,7 +1758,7 @@ static int qedr_create_user_qp(struct qedr_dev *dev,
struct qed_rdma_create_qp_out_params out_params;
struct qedr_pd *pd = get_qedr_pd(ibpd);
struct qedr_create_qp_uresp uresp;
- struct qedr_ucontext *ctx = NULL;
+ struct qedr_ucontext *ctx = pd ? pd->uctx : NULL;
struct qedr_create_qp_ureq ureq;
int alloc_and_init = rdma_protocol_roce(&dev->ibdev, 1);
int rc = -EINVAL;
@@ -1792,6 +1796,9 @@ static int qedr_create_user_qp(struct qedr_dev *dev,
in_params.rq_pbl_ptr = qp->urq.pbl_tbl->pa;
}
+ if (ctx)
+ SET_FIELD(in_params.flags, QED_ROCE_EDPM_MODE, ctx->edpm_mode);
+
qp->qed_qp = dev->ops->rdma_create_qp(dev->rdma_ctx,
&in_params, &out_params);
@@ -3004,7 +3011,7 @@ err0:
}
struct ib_mr *qedr_alloc_mr(struct ib_pd *ibpd, enum ib_mr_type mr_type,
- u32 max_num_sg, struct ib_udata *udata)
+ u32 max_num_sg)
{
struct qedr_mr *mr;
@@ -3687,7 +3694,7 @@ static u32 qedr_srq_elem_left(struct qedr_srq_hwq_info *hw_srq)
* count and consumer count and subtract it from max
* work request supported so that we get elements left.
*/
- used = hw_srq->wr_prod_cnt - hw_srq->wr_cons_cnt;
+ used = hw_srq->wr_prod_cnt - (u32)atomic_read(&hw_srq->wr_cons_cnt);
return hw_srq->max_wr - used;
}
@@ -3702,7 +3709,6 @@ int qedr_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr,
unsigned long flags;
int status = 0;
u32 num_sge;
- u32 offset;
spin_lock_irqsave(&srq->lock, flags);
@@ -3715,7 +3721,8 @@ int qedr_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr,
if (!qedr_srq_elem_left(hw_srq) ||
wr->num_sge > srq->hw_srq.max_sges) {
DP_ERR(dev, "Can't post WR (%d,%d) || (%d > %d)\n",
- hw_srq->wr_prod_cnt, hw_srq->wr_cons_cnt,
+ hw_srq->wr_prod_cnt,
+ atomic_read(&hw_srq->wr_cons_cnt),
wr->num_sge, srq->hw_srq.max_sges);
status = -ENOMEM;
*bad_wr = wr;
@@ -3749,22 +3756,20 @@ int qedr_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr,
hw_srq->sge_prod++;
}
- /* Flush WQE and SGE information before
+ /* Update WQE and SGE information before
* updating producer.
*/
- wmb();
+ dma_wmb();
/* SRQ producer is 8 bytes. Need to update SGE producer index
* in first 4 bytes and need to update WQE producer in
* next 4 bytes.
*/
- *srq->hw_srq.virt_prod_pair_addr = hw_srq->sge_prod;
- offset = offsetof(struct rdma_srq_producers, wqe_prod);
- *((u8 *)srq->hw_srq.virt_prod_pair_addr + offset) =
- hw_srq->wqe_prod;
+ srq->hw_srq.virt_prod_pair_addr->sge_prod = hw_srq->sge_prod;
+ /* Make sure sge producer is updated first */
+ dma_wmb();
+ srq->hw_srq.virt_prod_pair_addr->wqe_prod = hw_srq->wqe_prod;
- /* Flush producer after updating it. */
- wmb();
wr = wr->next;
}
@@ -4183,7 +4188,7 @@ static int process_resp_one_srq(struct qedr_dev *dev, struct qedr_qp *qp,
} else {
__process_resp_one(dev, qp, cq, wc, resp, wr_id);
}
- srq->hw_srq.wr_cons_cnt++;
+ atomic_inc(&srq->hw_srq.wr_cons_cnt);
return 1;
}
diff --git a/drivers/infiniband/hw/qedr/verbs.h b/drivers/infiniband/hw/qedr/verbs.h
index 5e02387e068d..39dd6286ba39 100644
--- a/drivers/infiniband/hw/qedr/verbs.h
+++ b/drivers/infiniband/hw/qedr/verbs.h
@@ -84,7 +84,7 @@ int qedr_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg,
int sg_nents, unsigned int *sg_offset);
struct ib_mr *qedr_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type,
- u32 max_num_sg, struct ib_udata *udata);
+ u32 max_num_sg);
int qedr_poll_cq(struct ib_cq *, int num_entries, struct ib_wc *wc);
int qedr_post_send(struct ib_qp *, const struct ib_send_wr *,
const struct ib_send_wr **bad_wr);
diff --git a/drivers/infiniband/hw/usnic/usnic_fwd.c b/drivers/infiniband/hw/usnic/usnic_fwd.c
index 7875883621f4..398c4c00b932 100644
--- a/drivers/infiniband/hw/usnic/usnic_fwd.c
+++ b/drivers/infiniband/hw/usnic/usnic_fwd.c
@@ -214,7 +214,7 @@ usnic_fwd_alloc_flow(struct usnic_fwd_dev *ufdev, struct filter *filter,
if (!flow)
return ERR_PTR(-ENOMEM);
- tlv = pci_alloc_consistent(pdev, tlv_size, &tlv_pa);
+ tlv = dma_alloc_coherent(&pdev->dev, tlv_size, &tlv_pa, GFP_ATOMIC);
if (!tlv) {
usnic_err("Failed to allocate memory\n");
status = -ENOMEM;
@@ -258,7 +258,7 @@ usnic_fwd_alloc_flow(struct usnic_fwd_dev *ufdev, struct filter *filter,
out_free_tlv:
spin_unlock(&ufdev->lock);
- pci_free_consistent(pdev, tlv_size, tlv, tlv_pa);
+ dma_free_coherent(&pdev->dev, tlv_size, tlv, tlv_pa);
if (!status)
return flow;
out_free_flow:
diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_mr.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_mr.c
index b039f1f00e05..77a010e68208 100644
--- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_mr.c
+++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_mr.c
@@ -202,7 +202,7 @@ err_umem:
* @return: ib_mr pointer on success, otherwise returns an errno.
*/
struct ib_mr *pvrdma_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type,
- u32 max_num_sg, struct ib_udata *udata)
+ u32 max_num_sg)
{
struct pvrdma_dev *dev = to_vdev(pd->device);
struct pvrdma_user_mr *mr;
diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h
index 267702226f10..699b20849a7e 100644
--- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h
+++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h
@@ -406,7 +406,7 @@ struct ib_mr *pvrdma_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
struct ib_udata *udata);
int pvrdma_dereg_mr(struct ib_mr *mr, struct ib_udata *udata);
struct ib_mr *pvrdma_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type,
- u32 max_num_sg, struct ib_udata *udata);
+ u32 max_num_sg);
int pvrdma_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg,
int sg_nents, unsigned int *sg_offset);
int pvrdma_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
diff --git a/drivers/infiniband/sw/rdmavt/ah.c b/drivers/infiniband/sw/rdmavt/ah.c
index 40480add7dd3..75a04b1497c4 100644
--- a/drivers/infiniband/sw/rdmavt/ah.c
+++ b/drivers/infiniband/sw/rdmavt/ah.c
@@ -90,8 +90,7 @@ EXPORT_SYMBOL(rvt_check_ah);
/**
* rvt_create_ah - create an address handle
* @ibah: the IB address handle
- * @ah_attr: the attributes of the AH
- * @create_flags: create address handle flags (see enum rdma_create_ah_flags)
+ * @init_attr: the attributes of the AH
* @udata: pointer to user's input output buffer information.
*
* This may be called from interrupt context.
diff --git a/drivers/infiniband/sw/rdmavt/mr.c b/drivers/infiniband/sw/rdmavt/mr.c
index 60864e5ca7cb..2f7c25fea44a 100644
--- a/drivers/infiniband/sw/rdmavt/mr.c
+++ b/drivers/infiniband/sw/rdmavt/mr.c
@@ -576,7 +576,7 @@ out:
* Return: the memory region on success, otherwise return an errno.
*/
struct ib_mr *rvt_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type,
- u32 max_num_sg, struct ib_udata *udata)
+ u32 max_num_sg)
{
struct rvt_mr *mr;
diff --git a/drivers/infiniband/sw/rdmavt/mr.h b/drivers/infiniband/sw/rdmavt/mr.h
index 780fc63af98b..b3aba359401b 100644
--- a/drivers/infiniband/sw/rdmavt/mr.h
+++ b/drivers/infiniband/sw/rdmavt/mr.h
@@ -71,7 +71,7 @@ struct ib_mr *rvt_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
struct ib_udata *udata);
int rvt_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata);
struct ib_mr *rvt_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type,
- u32 max_num_sg, struct ib_udata *udata);
+ u32 max_num_sg);
int rvt_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg,
int sg_nents, unsigned int *sg_offset);
diff --git a/drivers/infiniband/sw/rxe/rxe.c b/drivers/infiniband/sw/rxe/rxe.c
index 5642eefb4ba1..907203afbd99 100644
--- a/drivers/infiniband/sw/rxe/rxe.c
+++ b/drivers/infiniband/sw/rxe/rxe.c
@@ -40,14 +40,6 @@ MODULE_AUTHOR("Bob Pearson, Frank Zago, John Groves, Kamal Heib");
MODULE_DESCRIPTION("Soft RDMA transport");
MODULE_LICENSE("Dual BSD/GPL");
-/* free resources for all ports on a device */
-static void rxe_cleanup_ports(struct rxe_dev *rxe)
-{
- kfree(rxe->port.pkey_tbl);
- rxe->port.pkey_tbl = NULL;
-
-}
-
/* free resources for a rxe device all objects created for this device must
* have been destroyed
*/
@@ -66,8 +58,6 @@ void rxe_dealloc(struct ib_device *ib_dev)
rxe_pool_cleanup(&rxe->mc_grp_pool);
rxe_pool_cleanup(&rxe->mc_elem_pool);
- rxe_cleanup_ports(rxe);
-
if (rxe->tfm)
crypto_free_shash(rxe->tfm);
}
@@ -111,7 +101,7 @@ static void rxe_init_device_param(struct rxe_dev *rxe)
}
/* initialize port attributes */
-static int rxe_init_port_param(struct rxe_port *port)
+static void rxe_init_port_param(struct rxe_port *port)
{
port->attr.state = IB_PORT_DOWN;
port->attr.max_mtu = IB_MTU_4096;
@@ -134,35 +124,19 @@ static int rxe_init_port_param(struct rxe_port *port)
port->attr.phys_state = RXE_PORT_PHYS_STATE;
port->mtu_cap = ib_mtu_enum_to_int(IB_MTU_256);
port->subnet_prefix = cpu_to_be64(RXE_PORT_SUBNET_PREFIX);
-
- return 0;
}
/* initialize port state, note IB convention that HCA ports are always
* numbered from 1
*/
-static int rxe_init_ports(struct rxe_dev *rxe)
+static void rxe_init_ports(struct rxe_dev *rxe)
{
struct rxe_port *port = &rxe->port;
rxe_init_port_param(port);
-
- if (!port->attr.pkey_tbl_len || !port->attr.gid_tbl_len)
- return -EINVAL;
-
- port->pkey_tbl = kcalloc(port->attr.pkey_tbl_len,
- sizeof(*port->pkey_tbl), GFP_KERNEL);
-
- if (!port->pkey_tbl)
- return -ENOMEM;
-
- port->pkey_tbl[0] = 0xffff;
addrconf_addr_eui48((unsigned char *)&port->port_guid,
rxe->ndev->dev_addr);
-
spin_lock_init(&port->port_lock);
-
- return 0;
}
/* init pools of managed objects */
@@ -252,13 +226,11 @@ static int rxe_init(struct rxe_dev *rxe)
/* init default device parameters */
rxe_init_device_param(rxe);
- err = rxe_init_ports(rxe);
- if (err)
- goto err1;
+ rxe_init_ports(rxe);
err = rxe_init_pools(rxe);
if (err)
- goto err2;
+ return err;
/* init pending mmap list */
spin_lock_init(&rxe->mmap_offset_lock);
@@ -268,11 +240,6 @@ static int rxe_init(struct rxe_dev *rxe)
mutex_init(&rxe->usdev_lock);
return 0;
-
-err2:
- rxe_cleanup_ports(rxe);
-err1:
- return err;
}
void rxe_set_mtu(struct rxe_dev *rxe, unsigned int ndev_mtu)
diff --git a/drivers/infiniband/sw/rxe/rxe_loc.h b/drivers/infiniband/sw/rxe/rxe_loc.h
index 775c23becaec..39dc3bfa5d5d 100644
--- a/drivers/infiniband/sw/rxe/rxe_loc.h
+++ b/drivers/infiniband/sw/rxe/rxe_loc.h
@@ -103,8 +103,8 @@ enum copy_direction {
from_mem_obj,
};
-int rxe_mem_init_dma(struct rxe_pd *pd,
- int access, struct rxe_mem *mem);
+void rxe_mem_init_dma(struct rxe_pd *pd,
+ int access, struct rxe_mem *mem);
int rxe_mem_init_user(struct rxe_pd *pd, u64 start,
u64 length, u64 iova, int access, struct ib_udata *udata,
@@ -132,9 +132,6 @@ struct rxe_mem *lookup_mem(struct rxe_pd *pd, int access, u32 key,
int mem_check_range(struct rxe_mem *mem, u64 iova, size_t length);
-int rxe_mem_map_pages(struct rxe_dev *rxe, struct rxe_mem *mem,
- u64 *page, int num_pages, u64 iova);
-
void rxe_mem_cleanup(struct rxe_pool_entry *arg);
int advance_dma_data(struct rxe_dma_info *dma, unsigned int length);
@@ -145,7 +142,6 @@ int rxe_send(struct rxe_pkt_info *pkt, struct sk_buff *skb);
struct sk_buff *rxe_init_packet(struct rxe_dev *rxe, struct rxe_av *av,
int paylen, struct rxe_pkt_info *pkt);
int rxe_prepare(struct rxe_pkt_info *pkt, struct sk_buff *skb, u32 *crc);
-enum rdma_link_layer rxe_link_layer(struct rxe_dev *rxe, unsigned int port_num);
const char *rxe_parent_name(struct rxe_dev *rxe, unsigned int port_num);
struct device *rxe_dma_device(struct rxe_dev *rxe);
int rxe_mcast_add(struct rxe_dev *rxe, union ib_gid *mgid);
diff --git a/drivers/infiniband/sw/rxe/rxe_mr.c b/drivers/infiniband/sw/rxe/rxe_mr.c
index e83c7b518bfa..cdd811a45120 100644
--- a/drivers/infiniband/sw/rxe/rxe_mr.c
+++ b/drivers/infiniband/sw/rxe/rxe_mr.c
@@ -144,8 +144,8 @@ err1:
return -ENOMEM;
}
-int rxe_mem_init_dma(struct rxe_pd *pd,
- int access, struct rxe_mem *mem)
+void rxe_mem_init_dma(struct rxe_pd *pd,
+ int access, struct rxe_mem *mem)
{
rxe_mem_init(access, mem);
@@ -153,8 +153,6 @@ int rxe_mem_init_dma(struct rxe_pd *pd,
mem->access = access;
mem->state = RXE_MEM_STATE_VALID;
mem->type = RXE_MEM_TYPE_DMA;
-
- return 0;
}
int rxe_mem_init_user(struct rxe_pd *pd, u64 start,
@@ -587,47 +585,3 @@ struct rxe_mem *lookup_mem(struct rxe_pd *pd, int access, u32 key,
return mem;
}
-
-int rxe_mem_map_pages(struct rxe_dev *rxe, struct rxe_mem *mem,
- u64 *page, int num_pages, u64 iova)
-{
- int i;
- int num_buf;
- int err;
- struct rxe_map **map;
- struct rxe_phys_buf *buf;
- int page_size;
-
- if (num_pages > mem->max_buf) {
- err = -EINVAL;
- goto err1;
- }
-
- num_buf = 0;
- page_size = 1 << mem->page_shift;
- map = mem->map;
- buf = map[0]->buf;
-
- for (i = 0; i < num_pages; i++) {
- buf->addr = *page++;
- buf->size = page_size;
- buf++;
- num_buf++;
-
- if (num_buf == RXE_BUF_PER_MAP) {
- map++;
- buf = map[0]->buf;
- num_buf = 0;
- }
- }
-
- mem->iova = iova;
- mem->va = iova;
- mem->length = num_pages << mem->page_shift;
- mem->state = RXE_MEM_STATE_VALID;
-
- return 0;
-
-err1:
- return err;
-}
diff --git a/drivers/infiniband/sw/rxe/rxe_net.c b/drivers/infiniband/sw/rxe/rxe_net.c
index 312c2fc961c0..0c3808611f95 100644
--- a/drivers/infiniband/sw/rxe/rxe_net.c
+++ b/drivers/infiniband/sw/rxe/rxe_net.c
@@ -520,11 +520,6 @@ const char *rxe_parent_name(struct rxe_dev *rxe, unsigned int port_num)
return rxe->ndev->name;
}
-enum rdma_link_layer rxe_link_layer(struct rxe_dev *rxe, unsigned int port_num)
-{
- return IB_LINK_LAYER_ETHERNET;
-}
-
int rxe_net_add(const char *ibdev_name, struct net_device *ndev)
{
int err;
diff --git a/drivers/infiniband/sw/rxe/rxe_param.h b/drivers/infiniband/sw/rxe/rxe_param.h
index 99e9d8ba9767..2f381aeafcb5 100644
--- a/drivers/infiniband/sw/rxe/rxe_param.h
+++ b/drivers/infiniband/sw/rxe/rxe_param.h
@@ -100,7 +100,7 @@ enum rxe_device_param {
RXE_MAX_SRQ_SGE = 27,
RXE_MIN_SRQ_SGE = 1,
RXE_MAX_FMR_PAGE_LIST_LEN = 512,
- RXE_MAX_PKEYS = 64,
+ RXE_MAX_PKEYS = 1,
RXE_LOCAL_CA_ACK_DELAY = 15,
RXE_MAX_UCONTEXT = 512,
@@ -148,7 +148,7 @@ enum rxe_port_param {
RXE_PORT_INIT_TYPE_REPLY = 0,
RXE_PORT_ACTIVE_WIDTH = IB_WIDTH_1X,
RXE_PORT_ACTIVE_SPEED = 1,
- RXE_PORT_PKEY_TBL_LEN = 64,
+ RXE_PORT_PKEY_TBL_LEN = 1,
RXE_PORT_PHYS_STATE = IB_PORT_PHYS_STATE_POLLING,
RXE_PORT_SUBNET_PREFIX = 0xfe80000000000000ULL,
};
diff --git a/drivers/infiniband/sw/rxe/rxe_recv.c b/drivers/infiniband/sw/rxe/rxe_recv.c
index 831ad578a7b2..7e123d3c4d09 100644
--- a/drivers/infiniband/sw/rxe/rxe_recv.c
+++ b/drivers/infiniband/sw/rxe/rxe_recv.c
@@ -101,36 +101,15 @@ static void set_qkey_viol_cntr(struct rxe_port *port)
static int check_keys(struct rxe_dev *rxe, struct rxe_pkt_info *pkt,
u32 qpn, struct rxe_qp *qp)
{
- int i;
- int found_pkey = 0;
struct rxe_port *port = &rxe->port;
u16 pkey = bth_pkey(pkt);
pkt->pkey_index = 0;
- if (qpn == 1) {
- for (i = 0; i < port->attr.pkey_tbl_len; i++) {
- if (pkey_match(pkey, port->pkey_tbl[i])) {
- pkt->pkey_index = i;
- found_pkey = 1;
- break;
- }
- }
-
- if (!found_pkey) {
- pr_warn_ratelimited("bad pkey = 0x%x\n", pkey);
- set_bad_pkey_cntr(port);
- goto err1;
- }
- } else {
- if (unlikely(!pkey_match(pkey,
- port->pkey_tbl[qp->attr.pkey_index]
- ))) {
- pr_warn_ratelimited("bad pkey = 0x%0x\n", pkey);
- set_bad_pkey_cntr(port);
- goto err1;
- }
- pkt->pkey_index = qp->attr.pkey_index;
+ if (!pkey_match(pkey, IB_DEFAULT_PKEY_FULL)) {
+ pr_warn_ratelimited("bad pkey = 0x%x\n", pkey);
+ set_bad_pkey_cntr(port);
+ goto err1;
}
if ((qp_type(qp) == IB_QPT_UD || qp_type(qp) == IB_QPT_GSI) &&
@@ -330,10 +309,14 @@ err1:
static int rxe_match_dgid(struct rxe_dev *rxe, struct sk_buff *skb)
{
+ struct rxe_pkt_info *pkt = SKB_TO_PKT(skb);
const struct ib_gid_attr *gid_attr;
union ib_gid dgid;
union ib_gid *pdgid;
+ if (pkt->mask & RXE_LOOPBACK_MASK)
+ return 0;
+
if (skb->protocol == htons(ETH_P_IP)) {
ipv6_addr_set_v4mapped(ip_hdr(skb)->daddr,
(struct in6_addr *)&dgid);
@@ -366,7 +349,7 @@ void rxe_rcv(struct sk_buff *skb)
if (unlikely(skb->len < pkt->offset + RXE_BTH_BYTES))
goto drop;
- if (unlikely(rxe_match_dgid(rxe, skb) < 0)) {
+ if (rxe_match_dgid(rxe, skb) < 0) {
pr_warn_ratelimited("failed matching dgid\n");
goto drop;
}
diff --git a/drivers/infiniband/sw/rxe/rxe_req.c b/drivers/infiniband/sw/rxe/rxe_req.c
index e5031172c019..34df2b55e650 100644
--- a/drivers/infiniband/sw/rxe/rxe_req.c
+++ b/drivers/infiniband/sw/rxe/rxe_req.c
@@ -381,7 +381,6 @@ static struct sk_buff *init_req_packet(struct rxe_qp *qp,
struct rxe_pkt_info *pkt)
{
struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
- struct rxe_port *port = &rxe->port;
struct sk_buff *skb;
struct rxe_send_wr *ibwr = &wqe->wr;
struct rxe_av *av;
@@ -419,9 +418,7 @@ static struct sk_buff *init_req_packet(struct rxe_qp *qp,
(pkt->mask & (RXE_WRITE_MASK | RXE_IMMDT_MASK)) ==
(RXE_WRITE_MASK | RXE_IMMDT_MASK));
- pkey = (qp_type(qp) == IB_QPT_GSI) ?
- port->pkey_tbl[ibwr->wr.ud.pkey_index] :
- port->pkey_tbl[qp->attr.pkey_index];
+ pkey = IB_DEFAULT_PKEY_FULL;
qp_num = (pkt->mask & RXE_DETH_MASK) ? ibwr->wr.ud.remote_qpn :
qp->attr.dest_qp_num;
diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c
index b8a22af724e8..bb61e534e468 100644
--- a/drivers/infiniband/sw/rxe/rxe_verbs.c
+++ b/drivers/infiniband/sw/rxe/rxe_verbs.c
@@ -83,22 +83,11 @@ static int rxe_query_port(struct ib_device *dev,
static int rxe_query_pkey(struct ib_device *device,
u8 port_num, u16 index, u16 *pkey)
{
- struct rxe_dev *rxe = to_rdev(device);
- struct rxe_port *port;
-
- port = &rxe->port;
-
- if (unlikely(index >= port->attr.pkey_tbl_len)) {
- dev_warn(device->dev.parent, "invalid index = %d\n",
- index);
- goto err1;
- }
+ if (index > 0)
+ return -EINVAL;
- *pkey = port->pkey_tbl[index];
+ *pkey = IB_DEFAULT_PKEY_FULL;
return 0;
-
-err1:
- return -EINVAL;
}
static int rxe_modify_device(struct ib_device *dev,
@@ -141,9 +130,7 @@ static int rxe_modify_port(struct ib_device *dev,
static enum rdma_link_layer rxe_get_link_layer(struct ib_device *dev,
u8 port_num)
{
- struct rxe_dev *rxe = to_rdev(dev);
-
- return rxe_link_layer(rxe, port_num);
+ return IB_LINK_LAYER_ETHERNET;
}
static int rxe_alloc_ucontext(struct ib_ucontext *uctx, struct ib_udata *udata)
@@ -684,6 +671,7 @@ static int rxe_post_send_kernel(struct rxe_qp *qp, const struct ib_send_wr *wr,
unsigned int mask;
unsigned int length = 0;
int i;
+ struct ib_send_wr *next;
while (wr) {
mask = wr_opcode_mask(wr->opcode, qp);
@@ -700,6 +688,8 @@ static int rxe_post_send_kernel(struct rxe_qp *qp, const struct ib_send_wr *wr,
break;
}
+ next = wr->next;
+
length = 0;
for (i = 0; i < wr->num_sge; i++)
length += wr->sg_list[i].length;
@@ -710,7 +700,7 @@ static int rxe_post_send_kernel(struct rxe_qp *qp, const struct ib_send_wr *wr,
*bad_wr = wr;
break;
}
- wr = wr->next;
+ wr = next;
}
rxe_run_task(&qp->req.task, 1);
@@ -901,30 +891,16 @@ static struct ib_mr *rxe_get_dma_mr(struct ib_pd *ibpd, int access)
struct rxe_dev *rxe = to_rdev(ibpd->device);
struct rxe_pd *pd = to_rpd(ibpd);
struct rxe_mem *mr;
- int err;
mr = rxe_alloc(&rxe->mr_pool);
- if (!mr) {
- err = -ENOMEM;
- goto err1;
- }
+ if (!mr)
+ return ERR_PTR(-ENOMEM);
rxe_add_index(mr);
-
rxe_add_ref(pd);
-
- err = rxe_mem_init_dma(pd, access, mr);
- if (err)
- goto err2;
+ rxe_mem_init_dma(pd, access, mr);
return &mr->ibmr;
-
-err2:
- rxe_drop_ref(pd);
- rxe_drop_index(mr);
- rxe_drop_ref(mr);
-err1:
- return ERR_PTR(err);
}
static struct ib_mr *rxe_reg_user_mr(struct ib_pd *ibpd,
@@ -975,7 +951,7 @@ static int rxe_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata)
}
static struct ib_mr *rxe_alloc_mr(struct ib_pd *ibpd, enum ib_mr_type mr_type,
- u32 max_num_sg, struct ib_udata *udata)
+ u32 max_num_sg)
{
struct rxe_dev *rxe = to_rdev(ibpd->device);
struct rxe_pd *pd = to_rpd(ibpd);
diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.h b/drivers/infiniband/sw/rxe/rxe_verbs.h
index 92de39c4a7c1..c664c7f36ab5 100644
--- a/drivers/infiniband/sw/rxe/rxe_verbs.h
+++ b/drivers/infiniband/sw/rxe/rxe_verbs.h
@@ -371,7 +371,6 @@ struct rxe_mc_elem {
struct rxe_port {
struct ib_port_attr attr;
- u16 *pkey_tbl;
__be64 port_guid;
__be64 subnet_prefix;
spinlock_t port_lock; /* guard port */
diff --git a/drivers/infiniband/sw/siw/siw_main.c b/drivers/infiniband/sw/siw/siw_main.c
index ed60c9e4643e..d862bec84376 100644
--- a/drivers/infiniband/sw/siw/siw_main.c
+++ b/drivers/infiniband/sw/siw/siw_main.c
@@ -289,7 +289,6 @@ static const struct ib_device_ops siw_device_ops = {
.post_srq_recv = siw_post_srq_recv,
.query_device = siw_query_device,
.query_gid = siw_query_gid,
- .query_pkey = siw_query_pkey,
.query_port = siw_query_port,
.query_qp = siw_query_qp,
.query_srq = siw_query_srq,
diff --git a/drivers/infiniband/sw/siw/siw_verbs.c b/drivers/infiniband/sw/siw/siw_verbs.c
index 987e2ba05dbc..adafa1b8bebe 100644
--- a/drivers/infiniband/sw/siw/siw_verbs.c
+++ b/drivers/infiniband/sw/siw/siw_verbs.c
@@ -176,7 +176,6 @@ int siw_query_port(struct ib_device *base_dev, u8 port,
attr->active_mtu = ib_mtu_int_to_enum(sdev->netdev->mtu);
attr->phys_state = sdev->state == IB_PORT_ACTIVE ?
IB_PORT_PHYS_STATE_LINK_UP : IB_PORT_PHYS_STATE_DISABLED;
- attr->pkey_tbl_len = 1;
attr->port_cap_flags = IB_PORT_CM_SUP | IB_PORT_DEVICE_MGMT_SUP;
attr->state = sdev->state;
/*
@@ -204,20 +203,12 @@ int siw_get_port_immutable(struct ib_device *base_dev, u8 port,
if (rv)
return rv;
- port_immutable->pkey_tbl_len = attr.pkey_tbl_len;
port_immutable->gid_tbl_len = attr.gid_tbl_len;
port_immutable->core_cap_flags = RDMA_CORE_PORT_IWARP;
return 0;
}
-int siw_query_pkey(struct ib_device *base_dev, u8 port, u16 idx, u16 *pkey)
-{
- /* Report the default pkey */
- *pkey = 0xffff;
- return 0;
-}
-
int siw_query_gid(struct ib_device *base_dev, u8 port, int idx,
union ib_gid *gid)
{
@@ -1373,7 +1364,7 @@ err_out:
}
struct ib_mr *siw_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type,
- u32 max_sge, struct ib_udata *udata)
+ u32 max_sge)
{
struct siw_device *sdev = to_siw_dev(pd->device);
struct siw_mr *mr = NULL;
diff --git a/drivers/infiniband/sw/siw/siw_verbs.h b/drivers/infiniband/sw/siw/siw_verbs.h
index 1a731989fad6..d9572275a6b6 100644
--- a/drivers/infiniband/sw/siw/siw_verbs.h
+++ b/drivers/infiniband/sw/siw/siw_verbs.h
@@ -46,7 +46,6 @@ int siw_create_cq(struct ib_cq *base_cq, const struct ib_cq_init_attr *attr,
struct ib_udata *udata);
int siw_query_port(struct ib_device *base_dev, u8 port,
struct ib_port_attr *attr);
-int siw_query_pkey(struct ib_device *base_dev, u8 port, u16 idx, u16 *pkey);
int siw_query_gid(struct ib_device *base_dev, u8 port, int idx,
union ib_gid *gid);
int siw_alloc_pd(struct ib_pd *base_pd, struct ib_udata *udata);
@@ -69,7 +68,7 @@ int siw_req_notify_cq(struct ib_cq *base_cq, enum ib_cq_notify_flags flags);
struct ib_mr *siw_reg_user_mr(struct ib_pd *base_pd, u64 start, u64 len,
u64 rnic_va, int rights, struct ib_udata *udata);
struct ib_mr *siw_alloc_mr(struct ib_pd *base_pd, enum ib_mr_type mr_type,
- u32 max_sge, struct ib_udata *udata);
+ u32 max_sge);
struct ib_mr *siw_get_dma_mr(struct ib_pd *base_pd, int rights);
int siw_map_mr_sg(struct ib_mr *base_mr, struct scatterlist *sl, int num_sle,
unsigned int *sg_off);
diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h
index 9a3379c49541..3440dc48d02c 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib.h
+++ b/drivers/infiniband/ulp/ipoib/ipoib.h
@@ -515,7 +515,7 @@ void ipoib_ib_dev_cleanup(struct net_device *dev);
int ipoib_ib_dev_open_default(struct net_device *dev);
int ipoib_ib_dev_open(struct net_device *dev);
-int ipoib_ib_dev_stop(struct net_device *dev);
+void ipoib_ib_dev_stop(struct net_device *dev);
void ipoib_ib_dev_up(struct net_device *dev);
void ipoib_ib_dev_down(struct net_device *dev);
int ipoib_ib_dev_stop_default(struct net_device *dev);
@@ -527,7 +527,7 @@ void ipoib_mcast_send(struct net_device *dev, u8 *daddr, struct sk_buff *skb);
void ipoib_mcast_restart_task(struct work_struct *work);
void ipoib_mcast_start_thread(struct net_device *dev);
-int ipoib_mcast_stop_thread(struct net_device *dev);
+void ipoib_mcast_stop_thread(struct net_device *dev);
void ipoib_mcast_dev_down(struct net_device *dev);
void ipoib_mcast_dev_flush(struct net_device *dev);
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
index da3c5315bbb5..494f413dc3c6 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
@@ -670,13 +670,12 @@ int ipoib_send(struct net_device *dev, struct sk_buff *skb,
return rc;
}
-static void __ipoib_reap_ah(struct net_device *dev)
+static void ipoib_reap_dead_ahs(struct ipoib_dev_priv *priv)
{
- struct ipoib_dev_priv *priv = ipoib_priv(dev);
struct ipoib_ah *ah, *tah;
unsigned long flags;
- netif_tx_lock_bh(dev);
+ netif_tx_lock_bh(priv->dev);
spin_lock_irqsave(&priv->lock, flags);
list_for_each_entry_safe(ah, tah, &priv->dead_ahs, list)
@@ -687,37 +686,37 @@ static void __ipoib_reap_ah(struct net_device *dev)
}
spin_unlock_irqrestore(&priv->lock, flags);
- netif_tx_unlock_bh(dev);
+ netif_tx_unlock_bh(priv->dev);
}
void ipoib_reap_ah(struct work_struct *work)
{
struct ipoib_dev_priv *priv =
container_of(work, struct ipoib_dev_priv, ah_reap_task.work);
- struct net_device *dev = priv->dev;
- __ipoib_reap_ah(dev);
+ ipoib_reap_dead_ahs(priv);
if (!test_bit(IPOIB_STOP_REAPER, &priv->flags))
queue_delayed_work(priv->wq, &priv->ah_reap_task,
round_jiffies_relative(HZ));
}
-static void ipoib_flush_ah(struct net_device *dev)
+static void ipoib_start_ah_reaper(struct ipoib_dev_priv *priv)
{
- struct ipoib_dev_priv *priv = ipoib_priv(dev);
-
- cancel_delayed_work(&priv->ah_reap_task);
- flush_workqueue(priv->wq);
- ipoib_reap_ah(&priv->ah_reap_task.work);
+ clear_bit(IPOIB_STOP_REAPER, &priv->flags);
+ queue_delayed_work(priv->wq, &priv->ah_reap_task,
+ round_jiffies_relative(HZ));
}
-static void ipoib_stop_ah(struct net_device *dev)
+static void ipoib_stop_ah_reaper(struct ipoib_dev_priv *priv)
{
- struct ipoib_dev_priv *priv = ipoib_priv(dev);
-
set_bit(IPOIB_STOP_REAPER, &priv->flags);
- ipoib_flush_ah(dev);
+ cancel_delayed_work(&priv->ah_reap_task);
+ /*
+ * After ipoib_stop_ah_reaper() we always go through
+ * ipoib_reap_dead_ahs() which ensures the work is really stopped and
+ * does a final flush out of the dead_ah's list
+ */
}
static int recvs_pending(struct net_device *dev)
@@ -846,18 +845,6 @@ timeout:
return 0;
}
-int ipoib_ib_dev_stop(struct net_device *dev)
-{
- struct ipoib_dev_priv *priv = ipoib_priv(dev);
-
- priv->rn_ops->ndo_stop(dev);
-
- clear_bit(IPOIB_FLAG_INITIALIZED, &priv->flags);
- ipoib_flush_ah(dev);
-
- return 0;
-}
-
int ipoib_ib_dev_open_default(struct net_device *dev)
{
struct ipoib_dev_priv *priv = ipoib_priv(dev);
@@ -901,10 +888,7 @@ int ipoib_ib_dev_open(struct net_device *dev)
return -1;
}
- clear_bit(IPOIB_STOP_REAPER, &priv->flags);
- queue_delayed_work(priv->wq, &priv->ah_reap_task,
- round_jiffies_relative(HZ));
-
+ ipoib_start_ah_reaper(priv);
if (priv->rn_ops->ndo_open(dev)) {
pr_warn("%s: Failed to open dev\n", dev->name);
goto dev_stop;
@@ -915,13 +899,20 @@ int ipoib_ib_dev_open(struct net_device *dev)
return 0;
dev_stop:
- set_bit(IPOIB_STOP_REAPER, &priv->flags);
- cancel_delayed_work(&priv->ah_reap_task);
- set_bit(IPOIB_FLAG_INITIALIZED, &priv->flags);
- ipoib_ib_dev_stop(dev);
+ ipoib_stop_ah_reaper(priv);
return -1;
}
+void ipoib_ib_dev_stop(struct net_device *dev)
+{
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
+
+ priv->rn_ops->ndo_stop(dev);
+
+ clear_bit(IPOIB_FLAG_INITIALIZED, &priv->flags);
+ ipoib_stop_ah_reaper(priv);
+}
+
void ipoib_pkey_dev_check_presence(struct net_device *dev)
{
struct ipoib_dev_priv *priv = ipoib_priv(dev);
@@ -1232,7 +1223,7 @@ static void __ipoib_ib_dev_flush(struct ipoib_dev_priv *priv,
ipoib_mcast_dev_flush(dev);
if (oper_up)
set_bit(IPOIB_FLAG_OPER_UP, &priv->flags);
- ipoib_flush_ah(dev);
+ ipoib_reap_dead_ahs(priv);
}
if (level >= IPOIB_FLUSH_NORMAL)
@@ -1307,7 +1298,7 @@ void ipoib_ib_dev_cleanup(struct net_device *dev)
* the neighbor garbage collection is stopped and reaped.
* That should all be done now, so make a final ah flush.
*/
- ipoib_stop_ah(dev);
+ ipoib_reap_dead_ahs(priv);
clear_bit(IPOIB_PKEY_ASSIGNED, &priv->flags);
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
index 3cfb682b91b0..752581a8627b 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -1892,8 +1892,15 @@ static void ipoib_child_init(struct net_device *ndev)
priv->max_ib_mtu = ppriv->max_ib_mtu;
set_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags);
- memcpy(priv->dev->dev_addr, ppriv->dev->dev_addr, INFINIBAND_ALEN);
- memcpy(&priv->local_gid, &ppriv->local_gid, sizeof(priv->local_gid));
+ if (memchr_inv(priv->dev->dev_addr, 0, INFINIBAND_ALEN))
+ memcpy(&priv->local_gid, priv->dev->dev_addr + 4,
+ sizeof(priv->local_gid));
+ else {
+ memcpy(priv->dev->dev_addr, ppriv->dev->dev_addr,
+ INFINIBAND_ALEN);
+ memcpy(&priv->local_gid, &ppriv->local_gid,
+ sizeof(priv->local_gid));
+ }
}
static int ipoib_ndo_init(struct net_device *ndev)
@@ -1976,6 +1983,8 @@ static void ipoib_ndo_uninit(struct net_device *dev)
/* no more works over the priv->wq */
if (priv->wq) {
+ /* See ipoib_mcast_carrier_on_task() */
+ WARN_ON(test_bit(IPOIB_FLAG_OPER_UP, &priv->flags));
flush_workqueue(priv->wq);
destroy_workqueue(priv->wq);
priv->wq = NULL;
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
index 9bfa514473d5..86e4ed64e4e2 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
@@ -680,15 +680,13 @@ void ipoib_mcast_start_thread(struct net_device *dev)
spin_unlock_irqrestore(&priv->lock, flags);
}
-int ipoib_mcast_stop_thread(struct net_device *dev)
+void ipoib_mcast_stop_thread(struct net_device *dev)
{
struct ipoib_dev_priv *priv = ipoib_priv(dev);
ipoib_dbg_mcast(priv, "stopping multicast thread\n");
cancel_delayed_work_sync(&priv->mcast_task);
-
- return 0;
}
static int ipoib_mcast_leave(struct net_device *dev, struct ipoib_mcast *mcast)
diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.h b/drivers/infiniband/ulp/iser/iscsi_iser.h
index 1d77c7f42e38..78ee9445f801 100644
--- a/drivers/infiniband/ulp/iser/iscsi_iser.h
+++ b/drivers/infiniband/ulp/iser/iscsi_iser.h
@@ -300,18 +300,6 @@ struct ib_conn;
struct iscsi_iser_task;
/**
- * struct iser_comp - iSER completion context
- *
- * @cq: completion queue
- * @active_qps: Number of active QPs attached
- * to completion context
- */
-struct iser_comp {
- struct ib_cq *cq;
- int active_qps;
-};
-
-/**
* struct iser_device - iSER device handle
*
* @ib_device: RDMA device
@@ -320,9 +308,6 @@ struct iser_comp {
* @event_handler: IB events handle routine
* @ig_list: entry in devices list
* @refcount: Reference counter, dominated by open iser connections
- * @comps_used: Number of completion contexts used, Min between online
- * cpus and device max completion vectors
- * @comps: Dinamically allocated array of completion handlers
*/
struct iser_device {
struct ib_device *ib_device;
@@ -330,8 +315,6 @@ struct iser_device {
struct ib_event_handler event_handler;
struct list_head ig_list;
int refcount;
- int comps_used;
- struct iser_comp *comps;
};
/**
@@ -353,6 +336,7 @@ struct iser_reg_resources {
* @list: entry in connection fastreg pool
* @rsc: data buffer registration resources
* @sig_protected: is region protected indicator
+ * @all_list: first and last list members
*/
struct iser_fr_desc {
struct list_head list;
@@ -367,6 +351,7 @@ struct iser_fr_desc {
* @list: list of fastreg descriptors
* @lock: protects fastreg pool
* @size: size of the pool
+ * @all_list: first and last list members
*/
struct iser_fr_pool {
struct list_head list;
@@ -380,11 +365,12 @@ struct iser_fr_pool {
*
* @cma_id: rdma_cm connection maneger handle
* @qp: Connection Queue-pair
+ * @cq: Connection completion queue
+ * @cq_size: The number of max outstanding completions
* @post_recv_buf_count: post receive counter
* @sig_count: send work request signal count
* @rx_wr: receive work request for batch posts
* @device: reference to iser device
- * @comp: iser completion context
* @fr_pool: connection fast registration poool
* @pi_support: Indicate device T10-PI support
* @reg_cqe: completion handler
@@ -392,11 +378,12 @@ struct iser_fr_pool {
struct ib_conn {
struct rdma_cm_id *cma_id;
struct ib_qp *qp;
+ struct ib_cq *cq;
+ u32 cq_size;
int post_recv_buf_count;
u8 sig_count;
struct ib_recv_wr rx_wr[ISER_MIN_POSTED_RX];
struct iser_device *device;
- struct iser_comp *comp;
struct iser_fr_pool fr_pool;
bool pi_support;
struct ib_cqe reg_cqe;
diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c
index c1f44c41f501..699e075ae1b3 100644
--- a/drivers/infiniband/ulp/iser/iser_verbs.c
+++ b/drivers/infiniband/ulp/iser/iser_verbs.c
@@ -68,59 +68,23 @@ static void iser_event_handler(struct ib_event_handler *handler,
static int iser_create_device_ib_res(struct iser_device *device)
{
struct ib_device *ib_dev = device->ib_device;
- int i, max_cqe;
if (!(ib_dev->attrs.device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS)) {
iser_err("IB device does not support memory registrations\n");
return -1;
}
- device->comps_used = min_t(int, num_online_cpus(),
- ib_dev->num_comp_vectors);
-
- device->comps = kcalloc(device->comps_used, sizeof(*device->comps),
- GFP_KERNEL);
- if (!device->comps)
- goto comps_err;
-
- max_cqe = min(ISER_MAX_CQ_LEN, ib_dev->attrs.max_cqe);
-
- iser_info("using %d CQs, device %s supports %d vectors max_cqe %d\n",
- device->comps_used, dev_name(&ib_dev->dev),
- ib_dev->num_comp_vectors, max_cqe);
-
device->pd = ib_alloc_pd(ib_dev,
iser_always_reg ? 0 : IB_PD_UNSAFE_GLOBAL_RKEY);
if (IS_ERR(device->pd))
goto pd_err;
- for (i = 0; i < device->comps_used; i++) {
- struct iser_comp *comp = &device->comps[i];
-
- comp->cq = ib_alloc_cq(ib_dev, comp, max_cqe, i,
- IB_POLL_SOFTIRQ);
- if (IS_ERR(comp->cq)) {
- comp->cq = NULL;
- goto cq_err;
- }
- }
-
INIT_IB_EVENT_HANDLER(&device->event_handler, ib_dev,
iser_event_handler);
ib_register_event_handler(&device->event_handler);
return 0;
-cq_err:
- for (i = 0; i < device->comps_used; i++) {
- struct iser_comp *comp = &device->comps[i];
-
- if (comp->cq)
- ib_free_cq(comp->cq);
- }
- ib_dealloc_pd(device->pd);
pd_err:
- kfree(device->comps);
-comps_err:
iser_err("failed to allocate an IB resource\n");
return -1;
}
@@ -131,20 +95,9 @@ comps_err:
*/
static void iser_free_device_ib_res(struct iser_device *device)
{
- int i;
-
- for (i = 0; i < device->comps_used; i++) {
- struct iser_comp *comp = &device->comps[i];
-
- ib_free_cq(comp->cq);
- comp->cq = NULL;
- }
-
ib_unregister_event_handler(&device->event_handler);
ib_dealloc_pd(device->pd);
- kfree(device->comps);
- device->comps = NULL;
device->pd = NULL;
}
@@ -287,70 +240,57 @@ static int iser_create_ib_conn_res(struct ib_conn *ib_conn)
struct ib_device *ib_dev;
struct ib_qp_init_attr init_attr;
int ret = -ENOMEM;
- int index, min_index = 0;
+ unsigned int max_send_wr, cq_size;
BUG_ON(ib_conn->device == NULL);
device = ib_conn->device;
ib_dev = device->ib_device;
- memset(&init_attr, 0, sizeof init_attr);
+ if (ib_conn->pi_support)
+ max_send_wr = ISER_QP_SIG_MAX_REQ_DTOS + 1;
+ else
+ max_send_wr = ISER_QP_MAX_REQ_DTOS + 1;
+ max_send_wr = min_t(unsigned int, max_send_wr,
+ (unsigned int)ib_dev->attrs.max_qp_wr);
- mutex_lock(&ig.connlist_mutex);
- /* select the CQ with the minimal number of usages */
- for (index = 0; index < device->comps_used; index++) {
- if (device->comps[index].active_qps <
- device->comps[min_index].active_qps)
- min_index = index;
+ cq_size = max_send_wr + ISER_QP_MAX_RECV_DTOS;
+ ib_conn->cq = ib_cq_pool_get(ib_dev, cq_size, -1, IB_POLL_SOFTIRQ);
+ if (IS_ERR(ib_conn->cq)) {
+ ret = PTR_ERR(ib_conn->cq);
+ goto cq_err;
}
- ib_conn->comp = &device->comps[min_index];
- ib_conn->comp->active_qps++;
- mutex_unlock(&ig.connlist_mutex);
- iser_info("cq index %d used for ib_conn %p\n", min_index, ib_conn);
+ ib_conn->cq_size = cq_size;
+
+ memset(&init_attr, 0, sizeof(init_attr));
init_attr.event_handler = iser_qp_event_callback;
init_attr.qp_context = (void *)ib_conn;
- init_attr.send_cq = ib_conn->comp->cq;
- init_attr.recv_cq = ib_conn->comp->cq;
+ init_attr.send_cq = ib_conn->cq;
+ init_attr.recv_cq = ib_conn->cq;
init_attr.cap.max_recv_wr = ISER_QP_MAX_RECV_DTOS;
init_attr.cap.max_send_sge = 2;
init_attr.cap.max_recv_sge = 1;
init_attr.sq_sig_type = IB_SIGNAL_REQ_WR;
init_attr.qp_type = IB_QPT_RC;
- if (ib_conn->pi_support) {
- init_attr.cap.max_send_wr = ISER_QP_SIG_MAX_REQ_DTOS + 1;
+ init_attr.cap.max_send_wr = max_send_wr;
+ if (ib_conn->pi_support)
init_attr.create_flags |= IB_QP_CREATE_INTEGRITY_EN;
- iser_conn->max_cmds =
- ISER_GET_MAX_XMIT_CMDS(ISER_QP_SIG_MAX_REQ_DTOS);
- } else {
- if (ib_dev->attrs.max_qp_wr > ISER_QP_MAX_REQ_DTOS) {
- init_attr.cap.max_send_wr = ISER_QP_MAX_REQ_DTOS + 1;
- iser_conn->max_cmds =
- ISER_GET_MAX_XMIT_CMDS(ISER_QP_MAX_REQ_DTOS);
- } else {
- init_attr.cap.max_send_wr = ib_dev->attrs.max_qp_wr;
- iser_conn->max_cmds =
- ISER_GET_MAX_XMIT_CMDS(ib_dev->attrs.max_qp_wr);
- iser_dbg("device %s supports max_send_wr %d\n",
- dev_name(&device->ib_device->dev),
- ib_dev->attrs.max_qp_wr);
- }
- }
+ iser_conn->max_cmds = ISER_GET_MAX_XMIT_CMDS(max_send_wr - 1);
ret = rdma_create_qp(ib_conn->cma_id, device->pd, &init_attr);
if (ret)
goto out_err;
ib_conn->qp = ib_conn->cma_id->qp;
- iser_info("setting conn %p cma_id %p qp %p\n",
+ iser_info("setting conn %p cma_id %p qp %p max_send_wr %d\n",
ib_conn, ib_conn->cma_id,
- ib_conn->cma_id->qp);
+ ib_conn->cma_id->qp, max_send_wr);
return ret;
out_err:
- mutex_lock(&ig.connlist_mutex);
- ib_conn->comp->active_qps--;
- mutex_unlock(&ig.connlist_mutex);
+ ib_cq_pool_put(ib_conn->cq, ib_conn->cq_size);
+cq_err:
iser_err("unable to alloc mem or create resource, err %d\n", ret);
return ret;
@@ -462,10 +402,8 @@ static void iser_free_ib_conn_res(struct iser_conn *iser_conn,
iser_conn, ib_conn->cma_id, ib_conn->qp);
if (ib_conn->qp != NULL) {
- mutex_lock(&ig.connlist_mutex);
- ib_conn->comp->active_qps--;
- mutex_unlock(&ig.connlist_mutex);
rdma_destroy_qp(ib_conn->cma_id);
+ ib_cq_pool_put(ib_conn->cq, ib_conn->cq_size);
ib_conn->qp = NULL;
}
diff --git a/drivers/infiniband/ulp/isert/ib_isert.c b/drivers/infiniband/ulp/isert/ib_isert.c
index b7df38ee8ae0..61e2f7fc513d 100644
--- a/drivers/infiniband/ulp/isert/ib_isert.c
+++ b/drivers/infiniband/ulp/isert/ib_isert.c
@@ -24,13 +24,6 @@
#include "ib_isert.h"
-#define ISERT_MAX_CONN 8
-#define ISER_MAX_RX_CQ_LEN (ISERT_QP_MAX_RECV_DTOS * ISERT_MAX_CONN)
-#define ISER_MAX_TX_CQ_LEN \
- ((ISERT_QP_MAX_REQ_DTOS + ISCSI_DEF_XMIT_CMDS_MAX) * ISERT_MAX_CONN)
-#define ISER_MAX_CQ_LEN (ISER_MAX_RX_CQ_LEN + ISER_MAX_TX_CQ_LEN + \
- ISERT_MAX_CONN)
-
static int isert_debug_level;
module_param_named(debug_level, isert_debug_level, int, 0644);
MODULE_PARM_DESC(debug_level, "Enable debug tracing if > 0 (default:0)");
@@ -82,53 +75,34 @@ isert_qp_event_callback(struct ib_event *e, void *context)
}
}
-static struct isert_comp *
-isert_comp_get(struct isert_conn *isert_conn)
-{
- struct isert_device *device = isert_conn->device;
- struct isert_comp *comp;
- int i, min = 0;
-
- mutex_lock(&device_list_mutex);
- for (i = 0; i < device->comps_used; i++)
- if (device->comps[i].active_qps <
- device->comps[min].active_qps)
- min = i;
- comp = &device->comps[min];
- comp->active_qps++;
- mutex_unlock(&device_list_mutex);
-
- isert_info("conn %p, using comp %p min_index: %d\n",
- isert_conn, comp, min);
-
- return comp;
-}
-
-static void
-isert_comp_put(struct isert_comp *comp)
-{
- mutex_lock(&device_list_mutex);
- comp->active_qps--;
- mutex_unlock(&device_list_mutex);
-}
-
static struct ib_qp *
isert_create_qp(struct isert_conn *isert_conn,
- struct isert_comp *comp,
struct rdma_cm_id *cma_id)
{
+ u32 cq_size = ISERT_QP_MAX_REQ_DTOS + ISERT_QP_MAX_RECV_DTOS + 2;
struct isert_device *device = isert_conn->device;
+ struct ib_device *ib_dev = device->ib_device;
struct ib_qp_init_attr attr;
- int ret;
+ int ret, factor;
+
+ isert_conn->cq = ib_cq_pool_get(ib_dev, cq_size, -1, IB_POLL_WORKQUEUE);
+ if (IS_ERR(isert_conn->cq)) {
+ isert_err("Unable to allocate cq\n");
+ ret = PTR_ERR(isert_conn->cq);
+ return ERR_PTR(ret);
+ }
+ isert_conn->cq_size = cq_size;
memset(&attr, 0, sizeof(struct ib_qp_init_attr));
attr.event_handler = isert_qp_event_callback;
attr.qp_context = isert_conn;
- attr.send_cq = comp->cq;
- attr.recv_cq = comp->cq;
+ attr.send_cq = isert_conn->cq;
+ attr.recv_cq = isert_conn->cq;
attr.cap.max_send_wr = ISERT_QP_MAX_REQ_DTOS + 1;
attr.cap.max_recv_wr = ISERT_QP_MAX_RECV_DTOS + 1;
- attr.cap.max_rdma_ctxs = ISCSI_DEF_XMIT_CMDS_MAX;
+ factor = rdma_rw_mr_factor(device->ib_device, cma_id->port_num,
+ ISCSI_ISER_MAX_SG_TABLESIZE);
+ attr.cap.max_rdma_ctxs = ISCSI_DEF_XMIT_CMDS_MAX * factor;
attr.cap.max_send_sge = device->ib_device->attrs.max_send_sge;
attr.cap.max_recv_sge = 1;
attr.sq_sig_type = IB_SIGNAL_REQ_WR;
@@ -139,6 +113,8 @@ isert_create_qp(struct isert_conn *isert_conn,
ret = rdma_create_qp(cma_id, device->pd, &attr);
if (ret) {
isert_err("rdma_create_qp failed for cma_id %d\n", ret);
+ ib_cq_pool_put(isert_conn->cq, isert_conn->cq_size);
+
return ERR_PTR(ret);
}
@@ -146,25 +122,6 @@ isert_create_qp(struct isert_conn *isert_conn,
}
static int
-isert_conn_setup_qp(struct isert_conn *isert_conn, struct rdma_cm_id *cma_id)
-{
- struct isert_comp *comp;
- int ret;
-
- comp = isert_comp_get(isert_conn);
- isert_conn->qp = isert_create_qp(isert_conn, comp, cma_id);
- if (IS_ERR(isert_conn->qp)) {
- ret = PTR_ERR(isert_conn->qp);
- goto err;
- }
-
- return 0;
-err:
- isert_comp_put(comp);
- return ret;
-}
-
-static int
isert_alloc_rx_descriptors(struct isert_conn *isert_conn)
{
struct isert_device *device = isert_conn->device;
@@ -231,61 +188,6 @@ isert_free_rx_descriptors(struct isert_conn *isert_conn)
isert_conn->rx_descs = NULL;
}
-static void
-isert_free_comps(struct isert_device *device)
-{
- int i;
-
- for (i = 0; i < device->comps_used; i++) {
- struct isert_comp *comp = &device->comps[i];
-
- if (comp->cq)
- ib_free_cq(comp->cq);
- }
- kfree(device->comps);
-}
-
-static int
-isert_alloc_comps(struct isert_device *device)
-{
- int i, max_cqe, ret = 0;
-
- device->comps_used = min(ISERT_MAX_CQ, min_t(int, num_online_cpus(),
- device->ib_device->num_comp_vectors));
-
- isert_info("Using %d CQs, %s supports %d vectors support "
- "pi_capable %d\n",
- device->comps_used, dev_name(&device->ib_device->dev),
- device->ib_device->num_comp_vectors,
- device->pi_capable);
-
- device->comps = kcalloc(device->comps_used, sizeof(struct isert_comp),
- GFP_KERNEL);
- if (!device->comps)
- return -ENOMEM;
-
- max_cqe = min(ISER_MAX_CQ_LEN, device->ib_device->attrs.max_cqe);
-
- for (i = 0; i < device->comps_used; i++) {
- struct isert_comp *comp = &device->comps[i];
-
- comp->device = device;
- comp->cq = ib_alloc_cq(device->ib_device, comp, max_cqe, i,
- IB_POLL_WORKQUEUE);
- if (IS_ERR(comp->cq)) {
- isert_err("Unable to allocate cq\n");
- ret = PTR_ERR(comp->cq);
- comp->cq = NULL;
- goto out_cq;
- }
- }
-
- return 0;
-out_cq:
- isert_free_comps(device);
- return ret;
-}
-
static int
isert_create_device_ib_res(struct isert_device *device)
{
@@ -296,16 +198,12 @@ isert_create_device_ib_res(struct isert_device *device)
ib_dev->attrs.max_send_sge, ib_dev->attrs.max_recv_sge);
isert_dbg("devattr->max_sge_rd: %d\n", ib_dev->attrs.max_sge_rd);
- ret = isert_alloc_comps(device);
- if (ret)
- goto out;
-
device->pd = ib_alloc_pd(ib_dev, 0);
if (IS_ERR(device->pd)) {
ret = PTR_ERR(device->pd);
isert_err("failed to allocate pd, device %p, ret=%d\n",
device, ret);
- goto out_cq;
+ return ret;
}
/* Check signature cap */
@@ -313,13 +211,6 @@ isert_create_device_ib_res(struct isert_device *device)
IB_DEVICE_INTEGRITY_HANDOVER ? true : false;
return 0;
-
-out_cq:
- isert_free_comps(device);
-out:
- if (ret > 0)
- ret = -EINVAL;
- return ret;
}
static void
@@ -328,7 +219,6 @@ isert_free_device_ib_res(struct isert_device *device)
isert_info("device %p\n", device);
ib_dealloc_pd(device->pd);
- isert_free_comps(device);
}
static void
@@ -490,6 +380,13 @@ isert_set_nego_params(struct isert_conn *isert_conn,
}
}
+static void
+isert_destroy_qp(struct isert_conn *isert_conn)
+{
+ ib_destroy_qp(isert_conn->qp);
+ ib_cq_pool_put(isert_conn->cq, isert_conn->cq_size);
+}
+
static int
isert_connect_request(struct rdma_cm_id *cma_id, struct rdma_cm_event *event)
{
@@ -530,17 +427,19 @@ isert_connect_request(struct rdma_cm_id *cma_id, struct rdma_cm_event *event)
isert_set_nego_params(isert_conn, &event->param.conn);
- ret = isert_conn_setup_qp(isert_conn, cma_id);
- if (ret)
+ isert_conn->qp = isert_create_qp(isert_conn, cma_id);
+ if (IS_ERR(isert_conn->qp)) {
+ ret = PTR_ERR(isert_conn->qp);
goto out_conn_dev;
+ }
ret = isert_login_post_recv(isert_conn);
if (ret)
- goto out_conn_dev;
+ goto out_destroy_qp;
ret = isert_rdma_accept(isert_conn);
if (ret)
- goto out_conn_dev;
+ goto out_destroy_qp;
mutex_lock(&isert_np->mutex);
list_add_tail(&isert_conn->node, &isert_np->accepted);
@@ -548,6 +447,8 @@ isert_connect_request(struct rdma_cm_id *cma_id, struct rdma_cm_event *event)
return 0;
+out_destroy_qp:
+ isert_destroy_qp(isert_conn);
out_conn_dev:
isert_device_put(device);
out_rsp_dma_map:
@@ -572,12 +473,8 @@ isert_connect_release(struct isert_conn *isert_conn)
!isert_conn->dev_removed)
rdma_destroy_id(isert_conn->cm_id);
- if (isert_conn->qp) {
- struct isert_comp *comp = isert_conn->qp->recv_cq->cq_context;
-
- isert_comp_put(comp);
- ib_destroy_qp(isert_conn->qp);
- }
+ if (isert_conn->qp)
+ isert_destroy_qp(isert_conn);
if (isert_conn->login_req_buf)
isert_free_login_buf(isert_conn);
diff --git a/drivers/infiniband/ulp/isert/ib_isert.h b/drivers/infiniband/ulp/isert/ib_isert.h
index 3b296bac4f60..c55f7d9bfced 100644
--- a/drivers/infiniband/ulp/isert/ib_isert.h
+++ b/drivers/infiniband/ulp/isert/ib_isert.h
@@ -63,7 +63,8 @@
(ISER_RX_PAYLOAD_SIZE + sizeof(u64) + sizeof(struct ib_sge) + \
sizeof(struct ib_cqe) + sizeof(bool)))
-#define ISCSI_ISER_SG_TABLESIZE 256
+/* Maximum support is 16MB I/O size */
+#define ISCSI_ISER_MAX_SG_TABLESIZE 4096
enum isert_desc_type {
ISCSI_TX_CONTROL,
@@ -155,6 +156,8 @@ struct isert_conn {
struct iser_tx_desc login_tx_desc;
struct rdma_cm_id *cm_id;
struct ib_qp *qp;
+ struct ib_cq *cq;
+ u32 cq_size;
struct isert_device *device;
struct mutex mutex;
struct kref kref;
@@ -165,22 +168,6 @@ struct isert_conn {
bool dev_removed;
};
-#define ISERT_MAX_CQ 64
-
-/**
- * struct isert_comp - iSER completion context
- *
- * @device: pointer to device handle
- * @cq: completion queue
- * @active_qps: Number of active QPs attached
- * to completion context
- */
-struct isert_comp {
- struct isert_device *device;
- struct ib_cq *cq;
- int active_qps;
-};
-
struct isert_device {
bool pi_capable;
int refcount;
diff --git a/drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.h b/drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.h
index d324312a373c..f64519872297 100644
--- a/drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.h
+++ b/drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.h
@@ -118,12 +118,17 @@
* struct opa_vesw_info - OPA vnic switch information
* @fabric_id: 10-bit fabric id
* @vesw_id: 12-bit virtual ethernet switch id
+ * @rsvd0: reserved bytes
* @def_port_mask: bitmask of default ports
+ * @rsvd1: reserved bytes
* @pkey: partition key
+ * @rsvd2: reserved bytes
* @u_mcast_dlid: unknown multicast dlid
* @u_ucast_dlid: array of unknown unicast dlids
+ * @rsvd3: reserved bytes
* @rc: routing control
* @eth_mtu: Ethernet MTU
+ * @rsvd4: reserved bytes
*/
struct opa_vesw_info {
__be16 fabric_id;
@@ -150,12 +155,14 @@ struct opa_vesw_info {
* struct opa_per_veswport_info - OPA vnic per port information
* @port_num: port number
* @eth_link_status: current ethernet link state
+ * @rsvd0: reserved bytes
* @base_mac_addr: base mac address
* @config_state: configured port state
* @oper_state: operational port state
* @max_mac_tbl_ent: max number of mac table entries
* @max_smac_ent: max smac entries in mac table
* @mac_tbl_digest: mac table digest
+ * @rsvd1: reserved bytes
* @encap_slid: base slid for the port
* @pcp_to_sc_uc: sc by pcp index for unicast ethernet packets
* @pcp_to_vl_uc: vl by pcp index for unicast ethernet packets
@@ -165,8 +172,10 @@ struct opa_vesw_info {
* @non_vlan_vl_uc: vl for non-vlan unicast ethernet packets
* @non_vlan_sc_mc: sc for non-vlan multicast ethernet packets
* @non_vlan_vl_mc: vl for non-vlan multicast ethernet packets
+ * @rsvd2: reserved bytes
* @uc_macs_gen_count: generation count for unicast macs list
* @mc_macs_gen_count: generation count for multicast macs list
+ * @rsvd3: reserved bytes
*/
struct opa_per_veswport_info {
__be32 port_num;
@@ -294,6 +303,7 @@ struct opa_veswport_mactable {
* @rx_512_1023: received packet length is >=512 and < 1023 bytes
* @rx_1024_1518: received packet length is >=1024 and < 1518 bytes
* @rx_1519_max: received packet length >= 1519 bytes
+ * @reserved: reserved bytes
*
* All the above are counters of corresponding conditions.
*/
@@ -347,16 +357,26 @@ struct opa_veswport_summary_counters {
* @veswport_num: virtual ethernet switch port number
* @tx_errors: transmit errors
* @rx_errors: receive errors
+ * @rsvd0: reserved bytes
* @tx_smac_filt: smac filter errors
+ * @rsvd1: reserved bytes
+ * @rsvd2: reserved bytes
+ * @rsvd3: reserved bytes
* @tx_dlid_zero: transmit packets with invalid dlid
+ * @rsvd4: reserved bytes
* @tx_logic: other transmit errors
+ * @rsvd5: reserved bytes
* @tx_drop_state: packet tansmission in non-forward port state
* @rx_bad_veswid: received packet with invalid vesw id
+ * @rsvd6: reserved bytes
* @rx_runt: received ethernet packet with length < 64 bytes
* @rx_oversize: received ethernet packet with length > MTU size
+ * @rsvd7: reserved bytes
* @rx_eth_down: received packets when interface is down
* @rx_drop_state: received packets in non-forwarding port state
* @rx_logic: other receive errors
+ * @rsvd8: reserved bytes
+ * @rsvd9: reserved bytes
*
* All the above are counters of corresponding error conditions.
*/
@@ -447,6 +467,7 @@ struct opa_veswport_iface_macs {
* struct opa_vnic_vema_mad - Generic VEMA MAD
* @mad_hdr: Generic MAD header
* @rmpp_hdr: RMPP header for vendor specific MADs
+ * @reserved: reserved bytes
* @oui: Unique org identifier
* @data: MAD data
*/
@@ -467,6 +488,7 @@ struct opa_vnic_vema_mad {
* @trap_num: Trap number
* @toggle_count: Notice toggle bit and count value
* @issuer_lid: Trap issuer's lid
+ * @reserved: reserved bytes
* @issuer_gid: Issuer GID (only if Report method)
* @raw_data: Trap message body
*/
@@ -487,6 +509,7 @@ struct opa_vnic_notice_attr {
* struct opa_vnic_vema_mad_trap - Generic VEMA MAD Trap
* @mad_hdr: Generic MAD header
* @rmpp_hdr: RMPP header for vendor specific MADs
+ * @reserved: reserved bytes
* @oui: Unique org identifier
* @notice: Notice structure
*/
diff --git a/drivers/infiniband/ulp/rtrs/rtrs-clt.c b/drivers/infiniband/ulp/rtrs/rtrs-clt.c
index 564388a85603..776e89231c52 100644
--- a/drivers/infiniband/ulp/rtrs/rtrs-clt.c
+++ b/drivers/infiniband/ulp/rtrs/rtrs-clt.c
@@ -12,6 +12,7 @@
#include <linux/module.h>
#include <linux/rculist.h>
+#include <linux/random.h>
#include "rtrs-clt.h"
#include "rtrs-log.h"
@@ -23,6 +24,12 @@
* leads to "false positives" failed reconnect attempts
*/
#define RTRS_RECONNECT_BACKOFF 1000
+/*
+ * Wait for additional random time between 0 and 8 seconds
+ * before starting to reconnect to avoid clients reconnecting
+ * all at once in case of a major network outage
+ */
+#define RTRS_RECONNECT_SEED 8
MODULE_DESCRIPTION("RDMA Transport Client");
MODULE_LICENSE("GPL");
@@ -306,7 +313,8 @@ static void rtrs_rdma_error_recovery(struct rtrs_clt_con *con)
*/
delay_ms = clt->reconnect_delay_sec * 1000;
queue_delayed_work(rtrs_wq, &sess->reconnect_dwork,
- msecs_to_jiffies(delay_ms));
+ msecs_to_jiffies(delay_ms +
+ prandom_u32() % RTRS_RECONNECT_SEED));
} else {
/*
* Error can happen just on establishing new connection,
@@ -2503,7 +2511,9 @@ reconnect_again:
sess->stats->reconnects.fail_cnt++;
delay_ms = clt->reconnect_delay_sec * 1000;
queue_delayed_work(rtrs_wq, &sess->reconnect_dwork,
- msecs_to_jiffies(delay_ms));
+ msecs_to_jiffies(delay_ms +
+ prandom_u32() %
+ RTRS_RECONNECT_SEED));
}
}
@@ -2972,7 +2982,7 @@ static int __init rtrs_client_init(void)
pr_err("Failed to create rtrs-client dev class\n");
return PTR_ERR(rtrs_clt_dev_class);
}
- rtrs_wq = alloc_workqueue("rtrs_client_wq", WQ_MEM_RECLAIM, 0);
+ rtrs_wq = alloc_workqueue("rtrs_client_wq", 0, 0);
if (!rtrs_wq) {
class_destroy(rtrs_clt_dev_class);
return -ENOMEM;
diff --git a/drivers/infiniband/ulp/rtrs/rtrs-srv.c b/drivers/infiniband/ulp/rtrs/rtrs-srv.c
index 0d9241f5d9e6..a219bd1bdbc2 100644
--- a/drivers/infiniband/ulp/rtrs/rtrs-srv.c
+++ b/drivers/infiniband/ulp/rtrs/rtrs-srv.c
@@ -2150,7 +2150,7 @@ static int __init rtrs_server_init(void)
err = PTR_ERR(rtrs_dev_class);
goto out_chunk_pool;
}
- rtrs_wq = alloc_workqueue("rtrs_server_wq", WQ_MEM_RECLAIM, 0);
+ rtrs_wq = alloc_workqueue("rtrs_server_wq", 0, 0);
if (!rtrs_wq) {
err = -ENOMEM;
goto out_dev_class;
diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c
index ef7fcd3e8e15..0065eb17ae36 100644
--- a/drivers/infiniband/ulp/srpt/ib_srpt.c
+++ b/drivers/infiniband/ulp/srpt/ib_srpt.c
@@ -869,7 +869,7 @@ static int srpt_zerolength_write(struct srpt_rdma_ch *ch)
static void srpt_zerolength_write_done(struct ib_cq *cq, struct ib_wc *wc)
{
- struct srpt_rdma_ch *ch = cq->cq_context;
+ struct srpt_rdma_ch *ch = wc->qp->qp_context;
pr_debug("%s-%d wc->status %d\n", ch->sess_name, ch->qp->qp_num,
wc->status);
@@ -1322,7 +1322,7 @@ static int srpt_abort_cmd(struct srpt_send_ioctx *ioctx)
*/
static void srpt_rdma_read_done(struct ib_cq *cq, struct ib_wc *wc)
{
- struct srpt_rdma_ch *ch = cq->cq_context;
+ struct srpt_rdma_ch *ch = wc->qp->qp_context;
struct srpt_send_ioctx *ioctx =
container_of(wc->wr_cqe, struct srpt_send_ioctx, rdma_cqe);
@@ -1683,7 +1683,7 @@ push:
static void srpt_recv_done(struct ib_cq *cq, struct ib_wc *wc)
{
- struct srpt_rdma_ch *ch = cq->cq_context;
+ struct srpt_rdma_ch *ch = wc->qp->qp_context;
struct srpt_recv_ioctx *ioctx =
container_of(wc->wr_cqe, struct srpt_recv_ioctx, ioctx.cqe);
@@ -1744,7 +1744,7 @@ static void srpt_process_wait_list(struct srpt_rdma_ch *ch)
*/
static void srpt_send_done(struct ib_cq *cq, struct ib_wc *wc)
{
- struct srpt_rdma_ch *ch = cq->cq_context;
+ struct srpt_rdma_ch *ch = wc->qp->qp_context;
struct srpt_send_ioctx *ioctx =
container_of(wc->wr_cqe, struct srpt_send_ioctx, ioctx.cqe);
enum srpt_command_state state;
@@ -1791,7 +1791,7 @@ static int srpt_create_ch_ib(struct srpt_rdma_ch *ch)
goto out;
retry:
- ch->cq = ib_alloc_cq_any(sdev->device, ch, ch->rq_size + sq_size,
+ ch->cq = ib_cq_pool_get(sdev->device, ch->rq_size + sq_size, -1,
IB_POLL_WORKQUEUE);
if (IS_ERR(ch->cq)) {
ret = PTR_ERR(ch->cq);
@@ -1799,6 +1799,7 @@ retry:
ch->rq_size + sq_size, ret);
goto out;
}
+ ch->cq_size = ch->rq_size + sq_size;
qp_init->qp_context = (void *)ch;
qp_init->event_handler
@@ -1843,7 +1844,7 @@ retry:
if (retry) {
pr_debug("failed to create queue pair with sq_size = %d (%d) - retrying\n",
sq_size, ret);
- ib_free_cq(ch->cq);
+ ib_cq_pool_put(ch->cq, ch->cq_size);
sq_size = max(sq_size / 2, MIN_SRPT_SQ_SIZE);
goto retry;
} else {
@@ -1869,14 +1870,14 @@ out:
err_destroy_cq:
ch->qp = NULL;
- ib_free_cq(ch->cq);
+ ib_cq_pool_put(ch->cq, ch->cq_size);
goto out;
}
static void srpt_destroy_ch_ib(struct srpt_rdma_ch *ch)
{
ib_destroy_qp(ch->qp);
- ib_free_cq(ch->cq);
+ ib_cq_pool_put(ch->cq, ch->cq_size);
}
/**
@@ -2156,9 +2157,6 @@ static int srpt_cm_req_recv(struct srpt_device *const sdev,
WARN_ON_ONCE(irqs_disabled());
- if (WARN_ON(!sdev || !req))
- return -EINVAL;
-
it_iu_len = be32_to_cpu(req->req_it_iu_len);
pr_info("Received SRP_LOGIN_REQ with i_port_id %pI6, t_port_id %pI6 and it_iu_len %d on port %d (guid=%pI6); pkey %#04x\n",
diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.h b/drivers/infiniband/ulp/srpt/ib_srpt.h
index f31c349d07a1..41435a699b53 100644
--- a/drivers/infiniband/ulp/srpt/ib_srpt.h
+++ b/drivers/infiniband/ulp/srpt/ib_srpt.h
@@ -300,6 +300,7 @@ struct srpt_rdma_ch {
} rdma_cm;
};
struct ib_cq *cq;
+ u32 cq_size;
struct ib_cqe zw_cqe;
struct rcu_head rcu;
struct kref kref;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/alloc.c b/drivers/net/ethernet/mellanox/mlx5/core/alloc.c
index 42198e64a7f4..8db4b5f0f963 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/alloc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/alloc.c
@@ -299,11 +299,18 @@ void mlx5_fill_page_array(struct mlx5_frag_buf *buf, __be64 *pas)
}
EXPORT_SYMBOL_GPL(mlx5_fill_page_array);
-void mlx5_fill_page_frag_array(struct mlx5_frag_buf *buf, __be64 *pas)
+void mlx5_fill_page_frag_array_perm(struct mlx5_frag_buf *buf, __be64 *pas, u8 perm)
{
int i;
+ WARN_ON(perm & 0xfc);
for (i = 0; i < buf->npages; i++)
- pas[i] = cpu_to_be64(buf->frags[i].map);
+ pas[i] = cpu_to_be64(buf->frags[i].map | perm);
+}
+EXPORT_SYMBOL_GPL(mlx5_fill_page_frag_array_perm);
+
+void mlx5_fill_page_frag_array(struct mlx5_frag_buf *buf, __be64 *pas)
+{
+ mlx5_fill_page_frag_array_perm(buf, pas, 0);
}
EXPORT_SYMBOL_GPL(mlx5_fill_page_frag_array);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
index 7e70a8178a46..9ccec5f8b92a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
@@ -1577,6 +1577,7 @@ static struct mlx5_flow_handle *add_rule_fg(struct mlx5_flow_group *fg,
static bool counter_is_valid(u32 action)
{
return (action & (MLX5_FLOW_CONTEXT_ACTION_DROP |
+ MLX5_FLOW_CONTEXT_ACTION_ALLOW |
MLX5_FLOW_CONTEXT_ACTION_FWD_DEST));
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c
index 5763965d5ef3..97b5fcb1f406 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c
@@ -226,13 +226,20 @@ void mlx5i_uninit_underlay_qp(struct mlx5e_priv *priv)
int mlx5i_create_underlay_qp(struct mlx5e_priv *priv)
{
+ unsigned char *dev_addr = priv->netdev->dev_addr;
u32 out[MLX5_ST_SZ_DW(create_qp_out)] = {};
u32 in[MLX5_ST_SZ_DW(create_qp_in)] = {};
struct mlx5i_priv *ipriv = priv->ppriv;
void *addr_path;
+ int qpn = 0;
int ret = 0;
void *qpc;
+ if (MLX5_CAP_GEN(priv->mdev, mkey_by_name)) {
+ qpn = (dev_addr[1] << 16) + (dev_addr[2] << 8) + dev_addr[3];
+ MLX5_SET(create_qp_in, in, input_qpn, qpn);
+ }
+
qpc = MLX5_ADDR_OF(create_qp_in, in, qpc);
MLX5_SET(qpc, qpc, st, MLX5_QP_ST_UD);
MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index e32d46c33701..ce43e3feccd9 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -557,6 +557,9 @@ static int handle_hca_cap(struct mlx5_core_dev *dev, void *set_ctx)
if (MLX5_CAP_GEN_MAX(dev, release_all_pages))
MLX5_SET(cmd_hca_cap, set_hca_cap, release_all_pages, 1);
+ if (MLX5_CAP_GEN_MAX(dev, mkey_by_name))
+ MLX5_SET(cmd_hca_cap, set_hca_cap, mkey_by_name, 1);
+
return set_caps(dev, set_ctx, MLX5_SET_HCA_CAP_OP_MOD_GENERAL_DEVICE);
}