summaryrefslogtreecommitdiff
path: root/drivers/infiniband/core
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2020-06-05 14:05:57 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2020-06-05 14:05:57 -0700
commit242b23319809e05170b3cc0d44d3b4bd202bb073 (patch)
tree195e39fd02942ee0ef60ead7239859f2fe0c12a1 /drivers/infiniband/core
parent3f7e82379fc91102d82ed89822bd4242c83e40d5 (diff)
parentfba97dc7fc76b2c9a909fa0b3786d30a9899f5cf (diff)
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma
Pull rdma updates from Jason Gunthorpe: "A more active cycle than most of the recent past, with a few large, long discussed works this time. The RNBD block driver has been posted for nearly two years now, and flowing through RDMA due to it also introducing a new ULP. The removal of FMR has been a recurring discussion theme for a long time. And the usual smattering of features and bug fixes. Summary: - Various small driver bugs fixes in rxe, mlx5, hfi1, and efa - Continuing driver cleanups in bnxt_re, hns - Big cleanup of mlx5 QP creation flows - More consistent use of src port and flow label when LAG is used and a mlx5 implementation - Additional set of cleanups for IB CM - 'RNBD' network block driver and target. This is a network block RDMA device specific to ionos's cloud environment. It brings strong multipath and resiliency capabilities. - Accelerated IPoIB for HFI1 - QP/WQ/SRQ ioctl migration for uverbs, and support for multiple async fds - Support for exchanging the new IBTA defiend ECE data during RDMA CM exchanges - Removal of the very old and insecure FMR interface from all ULPs and drivers. FRWR should be preferred for at least a decade now" * tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma: (247 commits) RDMA/cm: Spurious WARNING triggered in cm_destroy_id() RDMA/mlx5: Return ECE DC support RDMA/mlx5: Don't rely on FW to set zeros in ECE response RDMA/mlx5: Return an error if copy_to_user fails IB/hfi1: Use free_netdev() in hfi1_netdev_free() RDMA/hns: Uninitialized variable in modify_qp_init_to_rtr() RDMA/core: Move and rename trace_cm_id_create() IB/hfi1: Fix hfi1_netdev_rx_init() error handling RDMA: Remove 'max_map_per_fmr' RDMA: Remove 'max_fmr' RDMA/core: Remove FMR device ops RDMA/rdmavt: Remove FMR memory registration RDMA/mthca: Remove FMR support for memory registration RDMA/mlx4: Remove FMR support for memory registration RDMA/i40iw: Remove FMR leftovers RDMA/bnxt_re: Remove FMR leftovers RDMA/mlx5: Remove FMR leftovers RDMA/core: Remove FMR pool API RDMA/rds: Remove FMR support for memory registration RDMA/srp: Remove support for FMR memory registration ...
Diffstat (limited to 'drivers/infiniband/core')
-rw-r--r--drivers/infiniband/core/Makefile9
-rw-r--r--drivers/infiniband/core/addr.c4
-rw-r--r--drivers/infiniband/core/cm.c306
-rw-r--r--drivers/infiniband/core/cma.c114
-rw-r--r--drivers/infiniband/core/cma_configfs.c13
-rw-r--r--drivers/infiniband/core/cma_priv.h1
-rw-r--r--drivers/infiniband/core/cma_trace.h20
-rw-r--r--drivers/infiniband/core/core_priv.h3
-rw-r--r--drivers/infiniband/core/cq.c173
-rw-r--r--drivers/infiniband/core/device.c22
-rw-r--r--drivers/infiniband/core/fmr_pool.c494
-rw-r--r--drivers/infiniband/core/lag.c138
-rw-r--r--drivers/infiniband/core/mad.c255
-rw-r--r--drivers/infiniband/core/multicast.c12
-rw-r--r--drivers/infiniband/core/rdma_core.c25
-rw-r--r--drivers/infiniband/core/rdma_core.h7
-rw-r--r--drivers/infiniband/core/rw.c2
-rw-r--r--drivers/infiniband/core/sa_query.c51
-rw-r--r--drivers/infiniband/core/sysfs.c10
-rw-r--r--drivers/infiniband/core/ucma.c65
-rw-r--r--drivers/infiniband/core/ud_header.c2
-rw-r--r--drivers/infiniband/core/user_mad.c22
-rw-r--r--drivers/infiniband/core/uverbs.h21
-rw-r--r--drivers/infiniband/core/uverbs_cmd.c76
-rw-r--r--drivers/infiniband/core/uverbs_ioctl.c24
-rw-r--r--drivers/infiniband/core/uverbs_main.c40
-rw-r--r--drivers/infiniband/core/uverbs_std_types.c95
-rw-r--r--drivers/infiniband/core/uverbs_std_types_cq.c17
-rw-r--r--drivers/infiniband/core/uverbs_std_types_mr.c12
-rw-r--r--drivers/infiniband/core/uverbs_std_types_qp.c401
-rw-r--r--drivers/infiniband/core/uverbs_std_types_srq.c234
-rw-r--r--drivers/infiniband/core/uverbs_std_types_wq.c194
-rw-r--r--drivers/infiniband/core/uverbs_uapi.c3
-rw-r--r--drivers/infiniband/core/verbs.c159
34 files changed, 1761 insertions, 1263 deletions
diff --git a/drivers/infiniband/core/Makefile b/drivers/infiniband/core/Makefile
index d1b14887960e..24cb71a16a28 100644
--- a/drivers/infiniband/core/Makefile
+++ b/drivers/infiniband/core/Makefile
@@ -8,11 +8,11 @@ obj-$(CONFIG_INFINIBAND_USER_MAD) += ib_umad.o
obj-$(CONFIG_INFINIBAND_USER_ACCESS) += ib_uverbs.o $(user_access-y)
ib_core-y := packer.o ud_header.o verbs.o cq.o rw.o sysfs.o \
- device.o fmr_pool.o cache.o netlink.o \
+ device.o cache.o netlink.o \
roce_gid_mgmt.o mr_pool.o addr.o sa_query.o \
multicast.o mad.o smi.o agent.o mad_rmpp.o \
nldev.o restrack.o counters.o ib_core_uverbs.o \
- trace.o
+ trace.o lag.o
ib_core-$(CONFIG_SECURITY_INFINIBAND) += security.o
ib_core-$(CONFIG_CGROUP_RDMA) += cgroup.o
@@ -36,6 +36,9 @@ ib_uverbs-y := uverbs_main.o uverbs_cmd.o uverbs_marshall.o \
uverbs_std_types_flow_action.o uverbs_std_types_dm.o \
uverbs_std_types_mr.o uverbs_std_types_counters.o \
uverbs_uapi.o uverbs_std_types_device.o \
- uverbs_std_types_async_fd.o
+ uverbs_std_types_async_fd.o \
+ uverbs_std_types_srq.o \
+ uverbs_std_types_wq.o \
+ uverbs_std_types_qp.o
ib_uverbs-$(CONFIG_INFINIBAND_USER_MEM) += umem.o
ib_uverbs-$(CONFIG_INFINIBAND_ON_DEMAND_PAGING) += umem_odp.o
diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c
index 1753a9801b70..3a98439bba83 100644
--- a/drivers/infiniband/core/addr.c
+++ b/drivers/infiniband/core/addr.c
@@ -371,6 +371,8 @@ static int fetch_ha(const struct dst_entry *dst, struct rdma_dev_addr *dev_addr,
(const void *)&dst_in6->sin6_addr;
sa_family_t family = dst_in->sa_family;
+ might_sleep();
+
/* If we have a gateway in IB mode then it must be an IB network */
if (has_gateway(dst, family) && dev_addr->network == RDMA_NETWORK_IB)
return ib_nl_fetch_ha(dev_addr, daddr, seq, family);
@@ -727,6 +729,8 @@ int roce_resolve_route_from_path(struct sa_path_rec *rec,
struct rdma_dev_addr dev_addr = {};
int ret;
+ might_sleep();
+
if (rec->roce.route_resolved)
return 0;
diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c
index 17f14e0eafe4..9ce787e37e22 100644
--- a/drivers/infiniband/core/cm.c
+++ b/drivers/infiniband/core/cm.c
@@ -66,6 +66,8 @@ static const char * const ibcm_rej_reason_strs[] = {
[IB_CM_REJ_INVALID_CLASS_VERSION] = "invalid class version",
[IB_CM_REJ_INVALID_FLOW_LABEL] = "invalid flow label",
[IB_CM_REJ_INVALID_ALT_FLOW_LABEL] = "invalid alt flow label",
+ [IB_CM_REJ_VENDOR_OPTION_NOT_SUPPORTED] =
+ "vendor option is not supported",
};
const char *__attribute_const__ ibcm_reject_msg(int reason)
@@ -81,8 +83,11 @@ const char *__attribute_const__ ibcm_reject_msg(int reason)
EXPORT_SYMBOL(ibcm_reject_msg);
struct cm_id_private;
-static void cm_add_one(struct ib_device *device);
+struct cm_work;
+static int cm_add_one(struct ib_device *device);
static void cm_remove_one(struct ib_device *device, void *client_data);
+static void cm_process_work(struct cm_id_private *cm_id_priv,
+ struct cm_work *work);
static int cm_send_sidr_rep_locked(struct cm_id_private *cm_id_priv,
struct ib_cm_sidr_rep_param *param);
static int cm_send_dreq_locked(struct cm_id_private *cm_id_priv,
@@ -287,6 +292,8 @@ struct cm_id_private {
struct list_head work_list;
atomic_t work_count;
+
+ struct rdma_ucm_ece ece;
};
static void cm_work_handler(struct work_struct *work);
@@ -474,24 +481,19 @@ static int cm_init_av_for_response(struct cm_port *port, struct ib_wc *wc,
grh, &av->ah_attr);
}
-static int add_cm_id_to_port_list(struct cm_id_private *cm_id_priv,
- struct cm_av *av,
- struct cm_port *port)
+static void add_cm_id_to_port_list(struct cm_id_private *cm_id_priv,
+ struct cm_av *av, struct cm_port *port)
{
unsigned long flags;
- int ret = 0;
spin_lock_irqsave(&cm.lock, flags);
-
if (&cm_id_priv->av == av)
list_add_tail(&cm_id_priv->prim_list, &port->cm_priv_prim_list);
else if (&cm_id_priv->alt_av == av)
list_add_tail(&cm_id_priv->altr_list, &port->cm_priv_altr_list);
else
- ret = -EINVAL;
-
+ WARN_ON(true);
spin_unlock_irqrestore(&cm.lock, flags);
- return ret;
}
static struct cm_port *
@@ -572,12 +574,7 @@ static int cm_init_av_by_path(struct sa_path_rec *path,
return ret;
av->timeout = path->packet_life_time + 1;
-
- ret = add_cm_id_to_port_list(cm_id_priv, av, port);
- if (ret) {
- rdma_destroy_ah_attr(&new_ah_attr);
- return ret;
- }
+ add_cm_id_to_port_list(cm_id_priv, av, port);
rdma_move_ah_attr(&av->ah_attr, &new_ah_attr);
return 0;
}
@@ -587,11 +584,6 @@ static u32 cm_local_id(__be32 local_id)
return (__force u32) (local_id ^ cm.random_id_operand);
}
-static void cm_free_id(__be32 local_id)
-{
- xa_erase_irq(&cm.local_id_table, cm_local_id(local_id));
-}
-
static struct cm_id_private *cm_acquire_id(__be32 local_id, __be32 remote_id)
{
struct cm_id_private *cm_id_priv;
@@ -698,9 +690,10 @@ static struct cm_id_private * cm_find_listen(struct ib_device *device,
cm_id_priv = rb_entry(node, struct cm_id_private, service_node);
if ((cm_id_priv->id.service_mask & service_id) ==
cm_id_priv->id.service_id &&
- (cm_id_priv->id.device == device))
+ (cm_id_priv->id.device == device)) {
+ refcount_inc(&cm_id_priv->refcount);
return cm_id_priv;
-
+ }
if (device < cm_id_priv->id.device)
node = node->rb_left;
else if (device > cm_id_priv->id.device)
@@ -745,12 +738,14 @@ static struct cm_timewait_info * cm_insert_remote_id(struct cm_timewait_info
return NULL;
}
-static struct cm_timewait_info * cm_find_remote_id(__be64 remote_ca_guid,
- __be32 remote_id)
+static struct cm_id_private *cm_find_remote_id(__be64 remote_ca_guid,
+ __be32 remote_id)
{
struct rb_node *node = cm.remote_id_table.rb_node;
struct cm_timewait_info *timewait_info;
+ struct cm_id_private *res = NULL;
+ spin_lock_irq(&cm.lock);
while (node) {
timewait_info = rb_entry(node, struct cm_timewait_info,
remote_id_node);
@@ -762,10 +757,14 @@ static struct cm_timewait_info * cm_find_remote_id(__be64 remote_ca_guid,
node = node->rb_left;
else if (be64_gt(remote_ca_guid, timewait_info->remote_ca_guid))
node = node->rb_right;
- else
- return timewait_info;
+ else {
+ res = cm_acquire_id(timewait_info->work.local_id,
+ timewait_info->work.remote_id);
+ break;
+ }
}
- return NULL;
+ spin_unlock_irq(&cm.lock);
+ return res;
}
static struct cm_timewait_info * cm_insert_remote_qpn(struct cm_timewait_info
@@ -917,6 +916,35 @@ static void cm_free_work(struct cm_work *work)
kfree(work);
}
+static void cm_queue_work_unlock(struct cm_id_private *cm_id_priv,
+ struct cm_work *work)
+{
+ bool immediate;
+
+ /*
+ * To deliver the event to the user callback we have the drop the
+ * spinlock, however, we need to ensure that the user callback is single
+ * threaded and receives events in the temporal order. If there are
+ * already events being processed then thread new events onto a list,
+ * the thread currently processing will pick them up.
+ */
+ immediate = atomic_inc_and_test(&cm_id_priv->work_count);
+ if (!immediate) {
+ list_add_tail(&work->list, &cm_id_priv->work_list);
+ /*
+ * This routine always consumes incoming reference. Once queued
+ * to the work_list then a reference is held by the thread
+ * currently running cm_process_work() and this reference is not
+ * needed.
+ */
+ cm_deref_id(cm_id_priv);
+ }
+ spin_unlock_irq(&cm_id_priv->lock);
+
+ if (immediate)
+ cm_process_work(cm_id_priv, work);
+}
+
static inline int cm_convert_to_ms(int iba_time)
{
/* approximate conversion to ms from 4.096us x 2^iba_time */
@@ -942,8 +970,10 @@ static u8 cm_ack_timeout(u8 ca_ack_delay, u8 packet_life_time)
return min(31, ack_timeout);
}
-static void cm_cleanup_timewait(struct cm_timewait_info *timewait_info)
+static void cm_remove_remote(struct cm_id_private *cm_id_priv)
{
+ struct cm_timewait_info *timewait_info = cm_id_priv->timewait_info;
+
if (timewait_info->inserted_remote_id) {
rb_erase(&timewait_info->remote_id_node, &cm.remote_id_table);
timewait_info->inserted_remote_id = 0;
@@ -982,7 +1012,7 @@ static void cm_enter_timewait(struct cm_id_private *cm_id_priv)
return;
spin_lock_irqsave(&cm.lock, flags);
- cm_cleanup_timewait(cm_id_priv->timewait_info);
+ cm_remove_remote(cm_id_priv);
list_add_tail(&cm_id_priv->timewait_info->list, &cm.timewait_list);
spin_unlock_irqrestore(&cm.lock, flags);
@@ -1001,6 +1031,11 @@ static void cm_enter_timewait(struct cm_id_private *cm_id_priv)
msecs_to_jiffies(wait_time));
spin_unlock_irqrestore(&cm.lock, flags);
+ /*
+ * The timewait_info is converted into a work and gets freed during
+ * cm_free_work() in cm_timewait_handler().
+ */
+ BUILD_BUG_ON(offsetof(struct cm_timewait_info, work) != 0);
cm_id_priv->timewait_info = NULL;
}
@@ -1013,7 +1048,7 @@ static void cm_reset_to_idle(struct cm_id_private *cm_id_priv)
cm_id_priv->id.state = IB_CM_IDLE;
if (cm_id_priv->timewait_info) {
spin_lock_irqsave(&cm.lock, flags);
- cm_cleanup_timewait(cm_id_priv->timewait_info);
+ cm_remove_remote(cm_id_priv);
spin_unlock_irqrestore(&cm.lock, flags);
kfree(cm_id_priv->timewait_info);
cm_id_priv->timewait_info = NULL;
@@ -1076,7 +1111,9 @@ retest:
case IB_CM_REP_SENT:
case IB_CM_MRA_REP_RCVD:
ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
- /* Fall through */
+ cm_send_rej_locked(cm_id_priv, IB_CM_REJ_CONSUMER_DEFINED, NULL,
+ 0, NULL, 0);
+ goto retest;
case IB_CM_MRA_REQ_SENT:
case IB_CM_REP_RCVD:
case IB_CM_MRA_REP_SENT:
@@ -1101,7 +1138,7 @@ retest:
case IB_CM_TIMEWAIT:
/*
* The cm_acquire_id in cm_timewait_handler will stop working
- * once we do cm_free_id() below, so just move to idle here for
+ * once we do xa_erase below, so just move to idle here for
* consistency.
*/
cm_id->state = IB_CM_IDLE;
@@ -1114,7 +1151,7 @@ retest:
spin_lock(&cm.lock);
/* Required for cleanup paths related cm_req_handler() */
if (cm_id_priv->timewait_info) {
- cm_cleanup_timewait(cm_id_priv->timewait_info);
+ cm_remove_remote(cm_id_priv);
kfree(cm_id_priv->timewait_info);
cm_id_priv->timewait_info = NULL;
}
@@ -1131,7 +1168,7 @@ retest:
spin_unlock(&cm.lock);
spin_unlock_irq(&cm_id_priv->lock);
- cm_free_id(cm_id->local_id);
+ xa_erase_irq(&cm.local_id_table, cm_local_id(cm_id->local_id));
cm_deref_id(cm_id_priv);
wait_for_completion(&cm_id_priv->comp);
while ((work = cm_dequeue_work(cm_id_priv)) != NULL)
@@ -1287,6 +1324,13 @@ static void cm_format_mad_hdr(struct ib_mad_hdr *hdr,
hdr->tid = tid;
}
+static void cm_format_mad_ece_hdr(struct ib_mad_hdr *hdr, __be16 attr_id,
+ __be64 tid, u32 attr_mod)
+{
+ cm_format_mad_hdr(hdr, attr_id, tid);
+ hdr->attr_mod = cpu_to_be32(attr_mod);
+}
+
static void cm_format_req(struct cm_req_msg *req_msg,
struct cm_id_private *cm_id_priv,
struct ib_cm_req_param *param)
@@ -1299,8 +1343,8 @@ static void cm_format_req(struct cm_req_msg *req_msg,
pri_ext = opa_is_extended_lid(pri_path->opa.dlid,
pri_path->opa.slid);
- cm_format_mad_hdr(&req_msg->hdr, CM_REQ_ATTR_ID,
- cm_form_tid(cm_id_priv));
+ cm_format_mad_ece_hdr(&req_msg->hdr, CM_REQ_ATTR_ID,
+ cm_form_tid(cm_id_priv), param->ece.attr_mod);
IBA_SET(CM_REQ_LOCAL_COMM_ID, req_msg,
be32_to_cpu(cm_id_priv->id.local_id));
@@ -1423,6 +1467,7 @@ static void cm_format_req(struct cm_req_msg *req_msg,
cm_ack_timeout(cm_id_priv->av.port->cm_dev->ack_delay,
alt_path->packet_life_time));
}
+ IBA_SET(CM_REQ_VENDOR_ID, req_msg, param->ece.vendor_id);
if (param->private_data && param->private_data_len)
IBA_SET_MEM(CM_REQ_PRIVATE_DATA, req_msg, param->private_data,
@@ -1779,6 +1824,9 @@ static void cm_format_req_event(struct cm_work *work,
param->rnr_retry_count = IBA_GET(CM_REQ_RNR_RETRY_COUNT, req_msg);
param->srq = IBA_GET(CM_REQ_SRQ, req_msg);
param->ppath_sgid_attr = cm_id_priv->av.ah_attr.grh.sgid_attr;
+ param->ece.vendor_id = IBA_GET(CM_REQ_VENDOR_ID, req_msg);
+ param->ece.attr_mod = be32_to_cpu(req_msg->hdr.attr_mod);
+
work->cm_event.private_data =
IBA_GET_MEM_PTR(CM_REQ_PRIVATE_DATA, req_msg);
}
@@ -1927,7 +1975,6 @@ static struct cm_id_private * cm_match_req(struct cm_work *work,
struct cm_id_private *listen_cm_id_priv, *cur_cm_id_priv;
struct cm_timewait_info *timewait_info;
struct cm_req_msg *req_msg;
- struct ib_cm_id *cm_id;
req_msg = (struct cm_req_msg *)work->mad_recv_wc->recv_buf.mad;
@@ -1948,7 +1995,7 @@ static struct cm_id_private * cm_match_req(struct cm_work *work,
/* Check for stale connections. */
timewait_info = cm_insert_remote_qpn(cm_id_priv->timewait_info);
if (timewait_info) {
- cm_cleanup_timewait(cm_id_priv->timewait_info);
+ cm_remove_remote(cm_id_priv);
cur_cm_id_priv = cm_acquire_id(timewait_info->work.local_id,
timewait_info->work.remote_id);
@@ -1957,8 +2004,7 @@ static struct cm_id_private * cm_match_req(struct cm_work *work,
IB_CM_REJ_STALE_CONN, CM_MSG_RESPONSE_REQ,
NULL, 0);
if (cur_cm_id_priv) {
- cm_id = &cur_cm_id_priv->id;
- ib_send_cm_dreq(cm_id, NULL, 0);
+ ib_send_cm_dreq(&cur_cm_id_priv->id, NULL, 0);
cm_deref_id(cur_cm_id_priv);
}
return NULL;
@@ -1969,14 +2015,13 @@ static struct cm_id_private * cm_match_req(struct cm_work *work,
cm_id_priv->id.device,
cpu_to_be64(IBA_GET(CM_REQ_SERVICE_ID, req_msg)));
if (!listen_cm_id_priv) {
- cm_cleanup_timewait(cm_id_priv->timewait_info);
+ cm_remove_remote(cm_id_priv);
spin_unlock_irq(&cm.lock);
cm_issue_rej(work->port, work->mad_recv_wc,
IB_CM_REJ_INVALID_SERVICE_ID, CM_MSG_RESPONSE_REQ,
NULL, 0);
return NULL;
}
- refcount_inc(&listen_cm_id_priv->refcount);
spin_unlock_irq(&cm.lock);
return listen_cm_id_priv;
}
@@ -2153,9 +2198,7 @@ static int cm_req_handler(struct cm_work *work)
/* Refcount belongs to the event, pairs with cm_process_work() */
refcount_inc(&cm_id_priv->refcount);
- atomic_inc(&cm_id_priv->work_count);
- spin_unlock_irq(&cm_id_priv->lock);
- cm_process_work(cm_id_priv, work);
+ cm_queue_work_unlock(cm_id_priv, work);
/*
* Since this ID was just created and was not made visible to other MAD
* handlers until the cm_finalize_id() above we know that the
@@ -2176,7 +2219,8 @@ static void cm_format_rep(struct cm_rep_msg *rep_msg,
struct cm_id_private *cm_id_priv,
struct ib_cm_rep_param *param)
{
- cm_format_mad_hdr(&rep_msg->hdr, CM_REP_ATTR_ID, cm_id_priv->tid);
+ cm_format_mad_ece_hdr(&rep_msg->hdr, CM_REP_ATTR_ID, cm_id_priv->tid,
+ param->ece.attr_mod);
IBA_SET(CM_REP_LOCAL_COMM_ID, rep_msg,
be32_to_cpu(cm_id_priv->id.local_id));
IBA_SET(CM_REP_REMOTE_COMM_ID, rep_msg,
@@ -2203,6 +2247,10 @@ static void cm_format_rep(struct cm_rep_msg *rep_msg,
IBA_SET(CM_REP_LOCAL_EE_CONTEXT_NUMBER, rep_msg, param->qp_num);
}
+ IBA_SET(CM_REP_VENDOR_ID_L, rep_msg, param->ece.vendor_id);
+ IBA_SET(CM_REP_VENDOR_ID_M, rep_msg, param->ece.vendor_id >> 8);
+ IBA_SET(CM_REP_VENDOR_ID_H, rep_msg, param->ece.vendor_id >> 16);
+
if (param->private_data && param->private_data_len)
IBA_SET_MEM(CM_REP_PRIVATE_DATA, rep_msg, param->private_data,
param->private_data_len);
@@ -2350,6 +2398,11 @@ static void cm_format_rep_event(struct cm_work *work, enum ib_qp_type qp_type)
param->flow_control = IBA_GET(CM_REP_END_TO_END_FLOW_CONTROL, rep_msg);
param->rnr_retry_count = IBA_GET(CM_REP_RNR_RETRY_COUNT, rep_msg);
param->srq = IBA_GET(CM_REP_SRQ, rep_msg);
+ param->ece.vendor_id = IBA_GET(CM_REP_VENDOR_ID_H, rep_msg) << 16;
+ param->ece.vendor_id |= IBA_GET(CM_REP_VENDOR_ID_M, rep_msg) << 8;
+ param->ece.vendor_id |= IBA_GET(CM_REP_VENDOR_ID_L, rep_msg);
+ param->ece.attr_mod = be32_to_cpu(rep_msg->hdr.attr_mod);
+
work->cm_event.private_data =
IBA_GET_MEM_PTR(CM_REP_PRIVATE_DATA, rep_msg);
}
@@ -2404,7 +2457,6 @@ static int cm_rep_handler(struct cm_work *work)
struct cm_rep_msg *rep_msg;
int ret;
struct cm_id_private *cur_cm_id_priv;
- struct ib_cm_id *cm_id;
struct cm_timewait_info *timewait_info;
rep_msg = (struct cm_rep_msg *)work->mad_recv_wc->recv_buf.mad;
@@ -2454,9 +2506,7 @@ static int cm_rep_handler(struct cm_work *work)
/* Check for a stale connection. */
timewait_info = cm_insert_remote_qpn(cm_id_priv->timewait_info);
if (timewait_info) {
- rb_erase(&cm_id_priv->timewait_info->remote_id_node,
- &cm.remote_id_table);
- cm_id_priv->timewait_info->inserted_remote_id = 0;
+ cm_remove_remote(cm_id_priv);
cur_cm_id_priv = cm_acquire_id(timewait_info->work.local_id,
timewait_info->work.remote_id);
@@ -2472,8 +2522,7 @@ static int cm_rep_handler(struct cm_work *work)
IBA_GET(CM_REP_REMOTE_COMM_ID, rep_msg));
if (cur_cm_id_priv) {
- cm_id = &cur_cm_id_priv->id;
- ib_send_cm_dreq(cm_id, NULL, 0);
+ ib_send_cm_dreq(&cur_cm_id_priv->id, NULL, 0);
cm_deref_id(cur_cm_id_priv);
}
@@ -2501,15 +2550,7 @@ static int cm_rep_handler(struct cm_work *work)
cm_id_priv->alt_av.timeout - 1);
ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
- ret = atomic_inc_and_test(&cm_id_priv->work_count);
- if (!ret)
- list_add_tail(&work->list, &cm_id_priv->work_list);
- spin_unlock_irq(&cm_id_priv->lock);
-
- if (ret)
- cm_process_work(cm_id_priv, work);
- else
- cm_deref_id(cm_id_priv);
+ cm_queue_work_unlock(cm_id_priv, work);
return 0;
error:
@@ -2520,7 +2561,6 @@ error:
static int cm_establish_handler(struct cm_work *work)
{
struct cm_id_private *cm_id_priv;
- int ret;
/* See comment in cm_establish about lookup. */
cm_id_priv = cm_acquire_id(work->local_id, work->remote_id);
@@ -2534,15 +2574,7 @@ static int cm_establish_handler(struct cm_work *work)
}
ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
- ret = atomic_inc_and_test(&cm_id_priv->work_count);
- if (!ret)
- list_add_tail(&work->list, &cm_id_priv->work_list);
- spin_unlock_irq(&cm_id_priv->lock);
-
- if (ret)
- cm_process_work(cm_id_priv, work);
- else
- cm_deref_id(cm_id_priv);
+ cm_queue_work_unlock(cm_id_priv, work);
return 0;
out:
cm_deref_id(cm_id_priv);
@@ -2553,7 +2585,6 @@ static int cm_rtu_handler(struct cm_work *work)
{
struct cm_id_private *cm_id_priv;
struct cm_rtu_msg *rtu_msg;
- int ret;
rtu_msg = (struct cm_rtu_msg *)work->mad_recv_wc->recv_buf.mad;
cm_id_priv = cm_acquire_id(
@@ -2576,15 +2607,7 @@ static int cm_rtu_handler(struct cm_work *work)
cm_id_priv->id.state = IB_CM_ESTABLISHED;
ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
- ret = atomic_inc_and_test(&cm_id_priv->work_count);
- if (!ret)
- list_add_tail(&work->list, &cm_id_priv->work_list);
- spin_unlock_irq(&cm_id_priv->lock);
-
- if (ret)
- cm_process_work(cm_id_priv, work);
- else
- cm_deref_id(cm_id_priv);
+ cm_queue_work_unlock(cm_id_priv, work);
return 0;
out:
cm_deref_id(cm_id_priv);
@@ -2777,7 +2800,6 @@ static int cm_dreq_handler(struct cm_work *work)
struct cm_id_private *cm_id_priv;
struct cm_dreq_msg *dreq_msg;
struct ib_mad_send_buf *msg = NULL;
- int ret;
dreq_msg = (struct cm_dreq_msg *)work->mad_recv_wc->recv_buf.mad;
cm_id_priv = cm_acquire_id(
@@ -2842,15 +2864,7 @@ static int cm_dreq_handler(struct cm_work *work)
}
cm_id_priv->id.state = IB_CM_DREQ_RCVD;
cm_id_priv->tid = dreq_msg->hdr.tid;
- ret = atomic_inc_and_test(&cm_id_priv->work_count);
- if (!ret)
- list_add_tail(&work->list, &cm_id_priv->work_list);
- spin_unlock_irq(&cm_id_priv->lock);
-
- if (ret)
- cm_process_work(cm_id_priv, work);
- else
- cm_deref_id(cm_id_priv);
+ cm_queue_work_unlock(cm_id_priv, work);
return 0;
unlock: spin_unlock_irq(&cm_id_priv->lock);
@@ -2862,7 +2876,6 @@ static int cm_drep_handler(struct cm_work *work)
{
struct cm_id_private *cm_id_priv;
struct cm_drep_msg *drep_msg;
- int ret;
drep_msg = (struct cm_drep_msg *)work->mad_recv_wc->recv_buf.mad;
cm_id_priv = cm_acquire_id(
@@ -2883,15 +2896,7 @@ static int cm_drep_handler(struct cm_work *work)
cm_enter_timewait(cm_id_priv);
ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
- ret = atomic_inc_and_test(&cm_id_priv->work_count);
- if (!ret)
- list_add_tail(&work->list, &cm_id_priv->work_list);
- spin_unlock_irq(&cm_id_priv->lock);
-
- if (ret)
- cm_process_work(cm_id_priv, work);
- else
- cm_deref_id(cm_id_priv);
+ cm_queue_work_unlock(cm_id_priv, work);
return 0;
out:
cm_deref_id(cm_id_priv);
@@ -2987,24 +2992,15 @@ static void cm_format_rej_event(struct cm_work *work)
static struct cm_id_private * cm_acquire_rejected_id(struct cm_rej_msg *rej_msg)
{
- struct cm_timewait_info *timewait_info;
struct cm_id_private *cm_id_priv;
__be32 remote_id;
remote_id = cpu_to_be32(IBA_GET(CM_REJ_LOCAL_COMM_ID, rej_msg));
if (IBA_GET(CM_REJ_REASON, rej_msg) == IB_CM_REJ_TIMEOUT) {
- spin_lock_irq(&cm.lock);
- timewait_info = cm_find_remote_id(
+ cm_id_priv = cm_find_remote_id(
*((__be64 *)IBA_GET_MEM_PTR(CM_REJ_ARI, rej_msg)),
remote_id);
- if (!timewait_info) {
- spin_unlock_irq(&cm.lock);
- return NULL;
- }
- cm_id_priv =
- cm_acquire_id(timewait_info->work.local_id, remote_id);
- spin_unlock_irq(&cm.lock);
} else if (IBA_GET(CM_REJ_MESSAGE_REJECTED, rej_msg) ==
CM_MSG_RESPONSE_REQ)
cm_id_priv = cm_acquire_id(
@@ -3022,7 +3018,6 @@ static int cm_rej_handler(struct cm_work *work)
{
struct cm_id_private *cm_id_priv;
struct cm_rej_msg *rej_msg;
- int ret;
rej_msg = (struct cm_rej_msg *)work->mad_recv_wc->recv_buf.mad;
cm_id_priv = cm_acquire_rejected_id(rej_msg);
@@ -3068,19 +3063,10 @@ static int cm_rej_handler(struct cm_work *work)
__func__, be32_to_cpu(cm_id_priv->id.local_id),
cm_id_priv->id.state);
spin_unlock_irq(&cm_id_priv->lock);
- ret = -EINVAL;
goto out;
}
- ret = atomic_inc_and_test(&cm_id_priv->work_count);
- if (!ret)
- list_add_tail(&work->list, &cm_id_priv->work_list);
- spin_unlock_irq(&cm_id_priv->lock);
-
- if (ret)
- cm_process_work(cm_id_priv, work);
- else
- cm_deref_id(cm_id_priv);
+ cm_queue_work_unlock(cm_id_priv, work);
return 0;
out:
cm_deref_id(cm_id_priv);
@@ -3190,7 +3176,7 @@ static int cm_mra_handler(struct cm_work *work)
{
struct cm_id_private *cm_id_priv;
struct cm_mra_msg *mra_msg;
- int timeout, ret;
+ int timeout;
mra_msg = (struct cm_mra_msg *)work->mad_recv_wc->recv_buf.mad;
cm_id_priv = cm_acquire_mraed_id(mra_msg);
@@ -3250,15 +3236,7 @@ static int cm_mra_handler(struct cm_work *work)
cm_id_priv->msg->context[1] = (void *) (unsigned long)
cm_id_priv->id.state;
- ret = atomic_inc_and_test(&cm_id_priv->work_count);
- if (!ret)
- list_add_tail(&work->list, &cm_id_priv->work_list);
- spin_unlock_irq(&cm_id_priv->lock);
-
- if (ret)
- cm_process_work(cm_id_priv, work);
- else
- cm_deref_id(cm_id_priv);
+ cm_queue_work_unlock(cm_id_priv, work);
return 0;
out:
spin_unlock_irq(&cm_id_priv->lock);
@@ -3393,15 +3371,7 @@ static int cm_lap_handler(struct cm_work *work)
cm_id_priv->id.lap_state = IB_CM_LAP_RCVD;
cm_id_priv->tid = lap_msg->hdr.tid;
- ret = atomic_inc_and_test(&cm_id_priv->work_count);
- if (!ret)
- list_add_tail(&work->list, &cm_id_priv->work_list);
- spin_unlock_irq(&cm_id_priv->lock);
-
- if (ret)
- cm_process_work(cm_id_priv, work);
- else
- cm_deref_id(cm_id_priv);
+ cm_queue_work_unlock(cm_id_priv, work);
return 0;
unlock: spin_unlock_irq(&cm_id_priv->lock);
@@ -3413,7 +3383,6 @@ static int cm_apr_handler(struct cm_work *work)
{
struct cm_id_private *cm_id_priv;
struct cm_apr_msg *apr_msg;
- int ret;
/* Currently Alternate path messages are not supported for
* RoCE link layer.
@@ -3448,16 +3417,7 @@ static int cm_apr_handler(struct cm_work *work)
cm_id_priv->id.lap_state = IB_CM_LAP_IDLE;
ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
cm_id_priv->msg = NULL;
-
- ret = atomic_inc_and_test(&cm_id_priv->work_count);
- if (!ret)
- list_add_tail(&work->list, &cm_id_priv->work_list);
- spin_unlock_irq(&cm_id_priv->lock);
-
- if (ret)
- cm_process_work(cm_id_priv, work);
- else
- cm_deref_id(cm_id_priv);
+ cm_queue_work_unlock(cm_id_priv, work);
return 0;
out:
cm_deref_id(cm_id_priv);
@@ -3468,7 +3428,6 @@ static int cm_timewait_handler(struct cm_work *work)
{
struct cm_timewait_info *timewait_info;
struct cm_id_private *cm_id_priv;
- int ret;
timewait_info = container_of(work, struct cm_timewait_info, work);
spin_lock_irq(&cm.lock);
@@ -3487,15 +3446,7 @@ static int cm_timewait_handler(struct cm_work *work)
goto out;
}
cm_id_priv->id.state = IB_CM_IDLE;
- ret = atomic_inc_and_test(&cm_id_priv->work_count);
- if (!ret)
- list_add_tail(&work->list, &cm_id_priv->work_list);
- spin_unlock_irq(&cm_id_priv->lock);
-
- if (ret)
- cm_process_work(cm_id_priv, work);
- else
- cm_deref_id(cm_id_priv);
+ cm_queue_work_unlock(cm_id_priv, work);
return 0;
out:
cm_deref_id(cm_id_priv);
@@ -3642,7 +3593,6 @@ static int cm_sidr_req_handler(struct cm_work *work)
.status = IB_SIDR_UNSUPPORTED });
goto out; /* No match. */
}
- refcount_inc(&listen_cm_id_priv->refcount);
spin_unlock_irq(&cm.lock);
cm_id_priv->id.cm_handler = listen_cm_id_priv->id.cm_handler;
@@ -3674,8 +3624,8 @@ static void cm_format_sidr_rep(struct cm_sidr_rep_msg *sidr_rep_msg,
struct cm_id_private *cm_id_priv,
struct ib_cm_sidr_rep_param *param)
{
- cm_format_mad_hdr(&sidr_rep_msg->hdr, CM_SIDR_REP_ATTR_ID,
- cm_id_priv->tid);
+ cm_format_mad_ece_hdr(&sidr_rep_msg->hdr, CM_SIDR_REP_ATTR_ID,
+ cm_id_priv->tid, param->ece.attr_mod);
IBA_SET(CM_SIDR_REP_REQUESTID, sidr_rep_msg,
be32_to_cpu(cm_id_priv->id.remote_id));
IBA_SET(CM_SIDR_REP_STATUS, sidr_rep_msg, param->status);
@@ -3683,6 +3633,10 @@ static void cm_format_sidr_rep(struct cm_sidr_rep_msg *sidr_rep_msg,
IBA_SET(CM_SIDR_REP_SERVICEID, sidr_rep_msg,
be64_to_cpu(cm_id_priv->id.service_id));
IBA_SET(CM_SIDR_REP_Q_KEY, sidr_rep_msg, param->qkey);
+ IBA_SET(CM_SIDR_REP_VENDOR_ID_L, sidr_rep_msg,
+ param->ece.vendor_id & 0xFF);
+ IBA_SET(CM_SIDR_REP_VENDOR_ID_H, sidr_rep_msg,
+ (param->ece.vendor_id >> 8) & 0xFF);
if (param->info && param->info_length)
IBA_SET_MEM(CM_SIDR_REP_ADDITIONAL_INFORMATION, sidr_rep_msg,
@@ -4384,7 +4338,7 @@ static void cm_remove_port_fs(struct cm_port *port)
}
-static void cm_add_one(struct ib_device *ib_device)
+static int cm_add_one(struct ib_device *ib_device)
{
struct cm_device *cm_dev;
struct cm_port *port;
@@ -4403,7 +4357,7 @@ static void cm_add_one(struct ib_device *ib_device)
cm_dev = kzalloc(struct_size(cm_dev, port, ib_device->phys_port_cnt),
GFP_KERNEL);
if (!cm_dev)
- return;
+ return -ENOMEM;
cm_dev->ib_device = ib_device;
cm_dev->ack_delay = ib_device->attrs.local_ca_ack_delay;
@@ -4415,8 +4369,10 @@ static void cm_add_one(struct ib_device *ib_device)
continue;
port = kzalloc(sizeof *port, GFP_KERNEL);
- if (!port)
+ if (!port) {
+ ret = -ENOMEM;
goto error1;
+ }
cm_dev->port[i-1] = port;
port->cm_dev = cm_dev;
@@ -4437,8 +4393,10 @@ static void cm_add_one(struct ib_device *ib_device)
cm_recv_handler,
port,
0);
- if (IS_ERR(port->mad_agent))
+ if (IS_ERR(port->mad_agent)) {
+ ret = PTR_ERR(port->mad_agent);
goto error2;
+ }
ret = ib_modify_port(ib_device, i, 0, &port_modify);
if (ret)
@@ -4447,15 +4405,17 @@ static void cm_add_one(struct ib_device *ib_device)
count++;
}
- if (!count)
+ if (!count) {
+ ret = -EOPNOTSUPP;
goto free;
+ }
ib_set_client_data(ib_device, &cm_client, cm_dev);
write_lock_irqsave(&cm.device_lock, flags);
list_add_tail(&cm_dev->list, &cm.device_list);
write_unlock_irqrestore(&cm.device_lock, flags);
- return;
+ return 0;
error3:
ib_unregister_mad_agent(port->mad_agent);
@@ -4477,6 +4437,7 @@ error1:
}
free:
kfree(cm_dev);
+ return ret;
}
static void cm_remove_one(struct ib_device *ib_device, void *client_data)
@@ -4491,9 +4452,6 @@ static void cm_remove_one(struct ib_device *ib_device, void *client_data)
unsigned long flags;
int i;
- if (!cm_dev)
- return;
-
write_lock_irqsave(&cm.device_lock, flags);
list_del(&cm_dev->list);
write_unlock_irqrestore(&cm.device_lock, flags);
diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index 26e6f7df247b..3d7cc9f0f3d4 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -91,7 +91,13 @@ const char *__attribute_const__ rdma_reject_msg(struct rdma_cm_id *id,
}
EXPORT_SYMBOL(rdma_reject_msg);
-bool rdma_is_consumer_reject(struct rdma_cm_id *id, int reason)
+/**
+ * rdma_is_consumer_reject - return true if the consumer rejected the connect
+ * request.
+ * @id: Communication identifier that received the REJECT event.
+ * @reason: Value returned in the REJECT event status field.
+ */
+static bool rdma_is_consumer_reject(struct rdma_cm_id *id, int reason)
{
if (rdma_ib_or_roce(id->device, id->port_num))
return reason == IB_CM_REJ_CONSUMER_DEFINED;
@@ -102,7 +108,6 @@ bool rdma_is_consumer_reject(struct rdma_cm_id *id, int reason)
WARN_ON_ONCE(1);
return false;
}
-EXPORT_SYMBOL(rdma_is_consumer_reject);
const void *rdma_consumer_reject_data(struct rdma_cm_id *id,
struct rdma_cm_event *ev, u8 *data_len)
@@ -148,7 +153,7 @@ struct rdma_cm_id *rdma_res_to_id(struct rdma_restrack_entry *res)
}
EXPORT_SYMBOL(rdma_res_to_id);
-static void cma_add_one(struct ib_device *device);
+static int cma_add_one(struct ib_device *device);
static void cma_remove_one(struct ib_device *device, void *client_data);
static struct ib_client cma_client = {
@@ -479,6 +484,7 @@ static void _cma_attach_to_dev(struct rdma_id_private *id_priv,
rdma_restrack_kadd(&id_priv->res);
else
rdma_restrack_uadd(&id_priv->res);
+ trace_cm_id_attach(id_priv, cma_dev->device);
}
static void cma_attach_to_dev(struct rdma_id_private *id_priv,
@@ -883,7 +889,6 @@ struct rdma_cm_id *__rdma_create_id(struct net *net,
id_priv->id.route.addr.dev_addr.net = get_net(net);
id_priv->seq_num &= 0x00ffffff;
- trace_cm_id_create(id_priv);
return &id_priv->id;
}
EXPORT_SYMBOL(__rdma_create_id);
@@ -1906,6 +1911,9 @@ static void cma_set_rep_event_data(struct rdma_cm_event *event,
event->param.conn.rnr_retry_count = rep_data->rnr_retry_count;
event->param.conn.srq = rep_data->srq;
event->param.conn.qp_num = rep_data->remote_qpn;
+
+ event->ece.vendor_id = rep_data->ece.vendor_id;
+ event->ece.attr_mod = rep_data->ece.attr_mod;
}
static int cma_cm_event_handler(struct rdma_id_private *id_priv,
@@ -2124,6 +2132,9 @@ static void cma_set_req_event_data(struct rdma_cm_event *event,
event->param.conn.rnr_retry_count = req_data->rnr_retry_count;
event->param.conn.srq = req_data->srq;
event->param.conn.qp_num = req_data->remote_qpn;
+
+ event->ece.vendor_id = req_data->ece.vendor_id;
+ event->ece.attr_mod = req_data->ece.attr_mod;
}
static int cma_ib_check_req_qp_type(const struct rdma_cm_id *id,
@@ -2904,6 +2915,24 @@ static int iboe_tos_to_sl(struct net_device *ndev, int tos)
return 0;
}
+static __be32 cma_get_roce_udp_flow_label(struct rdma_id_private *id_priv)
+{
+ struct sockaddr_in6 *addr6;
+ u16 dport, sport;
+ u32 hash, fl;
+
+ addr6 = (struct sockaddr_in6 *)cma_src_addr(id_priv);
+ fl = be32_to_cpu(addr6->sin6_flowinfo) & IB_GRH_FLOWLABEL_MASK;
+ if ((cma_family(id_priv) != AF_INET6) || !fl) {
+ dport = be16_to_cpu(cma_port(cma_dst_addr(id_priv)));
+ sport = be16_to_cpu(cma_port(cma_src_addr(id_priv)));
+ hash = (u32)sport * 31 + dport;
+ fl = hash & IB_GRH_FLOWLABEL_MASK;
+ }
+
+ return cpu_to_be32(fl);
+}
+
static int cma_resolve_iboe_route(struct rdma_id_private *id_priv)
{
struct rdma_route *route = &id_priv->id.route;
@@ -2970,6 +2999,11 @@ static int cma_resolve_iboe_route(struct rdma_id_private *id_priv)
goto err2;
}
+ if (rdma_protocol_roce_udp_encap(id_priv->id.device,
+ id_priv->id.port_num))
+ route->path_rec->flow_label =
+ cma_get_roce_udp_flow_label(id_priv);
+
cma_init_resolve_route_work(work, id_priv);
queue_work(cma_wq, &work->work);
@@ -3919,6 +3953,8 @@ static int cma_connect_ib(struct rdma_id_private *id_priv,
req.local_cm_response_timeout = CMA_CM_RESPONSE_TIMEOUT;
req.max_cm_retries = CMA_MAX_CM_RETRIES;
req.srq = id_priv->srq ? 1 : 0;
+ req.ece.vendor_id = id_priv->ece.vendor_id;
+ req.ece.attr_mod = id_priv->ece.attr_mod;
trace_cm_send_req(id_priv);
ret = ib_send_cm_req(id_priv->cm_id.ib, &req);
@@ -4008,6 +4044,27 @@ err:
}
EXPORT_SYMBOL(rdma_connect);
+/**
+ * rdma_connect_ece - Initiate an active connection request with ECE data.
+ * @id: Connection identifier to connect.
+ * @conn_param: Connection information used for connected QPs.
+ * @ece: ECE parameters
+ *
+ * See rdma_connect() explanation.
+ */
+int rdma_connect_ece(struct rdma_cm_id *id, struct rdma_conn_param *conn_param,
+ struct rdma_ucm_ece *ece)
+{
+ struct rdma_id_private *id_priv =
+ container_of(id, struct rdma_id_private, id);
+
+ id_priv->ece.vendor_id = ece->vendor_id;
+ id_priv->ece.attr_mod = ece->attr_mod;
+
+ return rdma_connect(id, conn_param);
+}
+EXPORT_SYMBOL(rdma_connect_ece);
+
static int cma_accept_ib(struct rdma_id_private *id_priv,
struct rdma_conn_param *conn_param)
{
@@ -4033,6 +4090,8 @@ static int cma_accept_ib(struct rdma_id_private *id_priv,
rep.flow_control = conn_param->flow_control;
rep.rnr_retry_count = min_t(u8, 7, conn_param->rnr_retry_count);
rep.srq = id_priv->srq ? 1 : 0;
+ rep.ece.vendor_id = id_priv->ece.vendor_id;
+ rep.ece.attr_mod = id_priv->ece.attr_mod;
trace_cm_send_rep(id_priv);
ret = ib_send_cm_rep(id_priv->cm_id.ib, &rep);
@@ -4080,7 +4139,11 @@ static int cma_send_sidr_rep(struct rdma_id_private *id_priv,
return ret;
rep.qp_num = id_priv->qp_num;
rep.qkey = id_priv->qkey;
+
+ rep.ece.vendor_id = id_priv->ece.vendor_id;
+ rep.ece.attr_mod = id_priv->ece.attr_mod;
}
+
rep.private_data = private_data;
rep.private_data_len = private_data_len;
@@ -4133,11 +4196,24 @@ int __rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param *conn_param,
return 0;
reject:
cma_modify_qp_err(id_priv);
- rdma_reject(id, NULL, 0);
+ rdma_reject(id, NULL, 0, IB_CM_REJ_CONSUMER_DEFINED);
return ret;
}
EXPORT_SYMBOL(__rdma_accept);
+int __rdma_accept_ece(struct rdma_cm_id *id, struct rdma_conn_param *conn_param,
+ const char *caller, struct rdma_ucm_ece *ece)
+{
+ struct rdma_id_private *id_priv =
+ container_of(id, struct rdma_id_private, id);
+
+ id_priv->ece.vendor_id = ece->vendor_id;
+ id_priv->ece.attr_mod = ece->attr_mod;
+
+ return __rdma_accept(id, conn_param, caller);
+}
+EXPORT_SYMBOL(__rdma_accept_ece);
+
int rdma_notify(struct rdma_cm_id *id, enum ib_event_type event)
{
struct rdma_id_private *id_priv;
@@ -4160,7 +4236,7 @@ int rdma_notify(struct rdma_cm_id *id, enum ib_event_type event)
EXPORT_SYMBOL(rdma_notify);
int rdma_reject(struct rdma_cm_id *id, const void *private_data,
- u8 private_data_len)
+ u8 private_data_len, u8 reason)
{
struct rdma_id_private *id_priv;
int ret;
@@ -4175,9 +4251,8 @@ int rdma_reject(struct rdma_cm_id *id, const void *private_data,
private_data, private_data_len);
} else {
trace_cm_send_rej(id_priv);
- ret = ib_send_cm_rej(id_priv->cm_id.ib,
- IB_CM_REJ_CONSUMER_DEFINED, NULL,
- 0, private_data, private_data_len);
+ ret = ib_send_cm_rej(id_priv->cm_id.ib, reason, NULL, 0,
+ private_data, private_data_len);
}
} else if (rdma_cap_iw_cm(id->device, id->port_num)) {
ret = iw_cm_reject(id_priv->cm_id.iw,
@@ -4633,29 +4708,34 @@ static struct notifier_block cma_nb = {
.notifier_call = cma_netdev_callback
};
-static void cma_add_one(struct ib_device *device)
+static int cma_add_one(struct ib_device *device)
{
struct cma_device *cma_dev;
struct rdma_id_private *id_priv;
unsigned int i;
unsigned long supported_gids = 0;
+ int ret;
cma_dev = kmalloc(sizeof *cma_dev, GFP_KERNEL);
if (!cma_dev)
- return;
+ return -ENOMEM;
cma_dev->device = device;
cma_dev->default_gid_type = kcalloc(device->phys_port_cnt,
sizeof(*cma_dev->default_gid_type),
GFP_KERNEL);
- if (!cma_dev->default_gid_type)
+ if (!cma_dev->default_gid_type) {
+ ret = -ENOMEM;
goto free_cma_dev;
+ }
cma_dev->default_roce_tos = kcalloc(device->phys_port_cnt,
sizeof(*cma_dev->default_roce_tos),
GFP_KERNEL);
- if (!cma_dev->default_roce_tos)
+ if (!cma_dev->default_roce_tos) {
+ ret = -ENOMEM;
goto free_gid_type;
+ }
rdma_for_each_port (device, i) {
supported_gids = roce_gid_type_mask_support(device, i);
@@ -4681,15 +4761,14 @@ static void cma_add_one(struct ib_device *device)
mutex_unlock(&lock);
trace_cm_add_one(device);
- return;
+ return 0;
free_gid_type:
kfree(cma_dev->default_gid_type);
free_cma_dev:
kfree(cma_dev);
-
- return;
+ return ret;
}
static int cma_remove_id_dev(struct rdma_id_private *id_priv)
@@ -4751,9 +4830,6 @@ static void cma_remove_one(struct ib_device *device, void *client_data)
trace_cm_remove_one(device);
- if (!cma_dev)
- return;
-
mutex_lock(&lock);
list_del(&cma_dev->list);
mutex_unlock(&lock);
diff --git a/drivers/infiniband/core/cma_configfs.c b/drivers/infiniband/core/cma_configfs.c
index c672a4978bfd..3c1e2ca564fe 100644
--- a/drivers/infiniband/core/cma_configfs.c
+++ b/drivers/infiniband/core/cma_configfs.c
@@ -322,8 +322,21 @@ fail:
return ERR_PTR(err);
}
+static void drop_cma_dev(struct config_group *cgroup, struct config_item *item)
+{
+ struct config_group *group =
+ container_of(item, struct config_group, cg_item);
+ struct cma_dev_group *cma_dev_group =
+ container_of(group, struct cma_dev_group, device_group);
+
+ configfs_remove_default_groups(&cma_dev_group->ports_group);
+ configfs_remove_default_groups(&cma_dev_group->device_group);
+ config_item_put(item);
+}
+
static struct configfs_group_operations cma_subsys_group_ops = {
.make_group = make_cma_dev,
+ .drop_item = drop_cma_dev,
};
static const struct config_item_type cma_subsys_type = {
diff --git a/drivers/infiniband/core/cma_priv.h b/drivers/infiniband/core/cma_priv.h
index 5edcf44a9307..caece96ebcf5 100644
--- a/drivers/infiniband/core/cma_priv.h
+++ b/drivers/infiniband/core/cma_priv.h
@@ -95,6 +95,7 @@ struct rdma_id_private {
* Internal to RDMA/core, don't use in the drivers
*/
struct rdma_restrack_entry res;
+ struct rdma_ucm_ece ece;
};
#if IS_ENABLED(CONFIG_INFINIBAND_ADDR_TRANS_CONFIGFS)
diff --git a/drivers/infiniband/core/cma_trace.h b/drivers/infiniband/core/cma_trace.h
index 81e36bf13159..e6e20c36c538 100644
--- a/drivers/infiniband/core/cma_trace.h
+++ b/drivers/infiniband/core/cma_trace.h
@@ -103,23 +103,33 @@ DEFINE_CMA_FSM_EVENT(sent_drep);
DEFINE_CMA_FSM_EVENT(sent_dreq);
DEFINE_CMA_FSM_EVENT(id_destroy);
-TRACE_EVENT(cm_id_create,
+TRACE_EVENT(cm_id_attach,
TP_PROTO(
- const struct rdma_id_private *id_priv
+ const struct rdma_id_private *id_priv,
+ const struct ib_device *device
),
- TP_ARGS(id_priv),
+ TP_ARGS(id_priv, device),
TP_STRUCT__entry(
__field(u32, cm_id)
+ __array(unsigned char, srcaddr, sizeof(struct sockaddr_in6))
+ __array(unsigned char, dstaddr, sizeof(struct sockaddr_in6))
+ __string(devname, device->name)
),
TP_fast_assign(
__entry->cm_id = id_priv->res.id;
+ memcpy(__entry->srcaddr, &id_priv->id.route.addr.src_addr,
+ sizeof(struct sockaddr_in6));
+ memcpy(__entry->dstaddr, &id_priv->id.route.addr.dst_addr,
+ sizeof(struct sockaddr_in6));
+ __assign_str(devname, device->name);
),
- TP_printk("cm.id=%u",
- __entry->cm_id
+ TP_printk("cm.id=%u src=%pISpc dst=%pISpc device=%s",
+ __entry->cm_id, __entry->srcaddr, __entry->dstaddr,
+ __get_str(devname)
)
);
diff --git a/drivers/infiniband/core/core_priv.h b/drivers/infiniband/core/core_priv.h
index cf42acca4a3a..a1e6a67b2c4a 100644
--- a/drivers/infiniband/core/core_priv.h
+++ b/drivers/infiniband/core/core_priv.h
@@ -414,4 +414,7 @@ void rdma_umap_priv_init(struct rdma_umap_priv *priv,
struct vm_area_struct *vma,
struct rdma_user_mmap_entry *entry);
+void ib_cq_pool_init(struct ib_device *dev);
+void ib_cq_pool_destroy(struct ib_device *dev);
+
#endif /* _CORE_PRIV_H */
diff --git a/drivers/infiniband/core/cq.c b/drivers/infiniband/core/cq.c
index 4f25b2400694..655795bfa0ee 100644
--- a/drivers/infiniband/core/cq.c
+++ b/drivers/infiniband/core/cq.c
@@ -7,7 +7,11 @@
#include <linux/slab.h>
#include <rdma/ib_verbs.h>
+#include "core_priv.h"
+
#include <trace/events/rdma_core.h>
+/* Max size for shared CQ, may require tuning */
+#define IB_MAX_SHARED_CQ_SZ 4096U
/* # of WCs to poll for with a single call to ib_poll_cq */
#define IB_POLL_BATCH 16
@@ -218,6 +222,7 @@ struct ib_cq *__ib_alloc_cq_user(struct ib_device *dev, void *private,
cq->cq_context = private;
cq->poll_ctx = poll_ctx;
atomic_set(&cq->usecnt, 0);
+ cq->comp_vector = comp_vector;
cq->wc = kmalloc_array(IB_POLL_BATCH, sizeof(*cq->wc), GFP_KERNEL);
if (!cq->wc)
@@ -309,6 +314,8 @@ void ib_free_cq_user(struct ib_cq *cq, struct ib_udata *udata)
{
if (WARN_ON_ONCE(atomic_read(&cq->usecnt)))
return;
+ if (WARN_ON_ONCE(cq->cqe_used))
+ return;
switch (cq->poll_ctx) {
case IB_POLL_DIRECT:
@@ -334,3 +341,169 @@ void ib_free_cq_user(struct ib_cq *cq, struct ib_udata *udata)
kfree(cq);
}
EXPORT_SYMBOL(ib_free_cq_user);
+
+void ib_cq_pool_init(struct ib_device *dev)
+{
+ unsigned int i;
+
+ spin_lock_init(&dev->cq_pools_lock);
+ for (i = 0; i < ARRAY_SIZE(dev->cq_pools); i++)
+ INIT_LIST_HEAD(&dev->cq_pools[i]);
+}
+
+void ib_cq_pool_destroy(struct ib_device *dev)
+{
+ struct ib_cq *cq, *n;
+ unsigned int i;
+
+ for (i = 0; i < ARRAY_SIZE(dev->cq_pools); i++) {
+ list_for_each_entry_safe(cq, n, &dev->cq_pools[i],
+ pool_entry) {
+ WARN_ON(cq->cqe_used);
+ cq->shared = false;
+ ib_free_cq(cq);
+ }
+ }
+}
+
+static int ib_alloc_cqs(struct ib_device *dev, unsigned int nr_cqes,
+ enum ib_poll_context poll_ctx)
+{
+ LIST_HEAD(tmp_list);
+ unsigned int nr_cqs, i;
+ struct ib_cq *cq;
+ int ret;
+
+ if (poll_ctx > IB_POLL_LAST_POOL_TYPE) {
+ WARN_ON_ONCE(poll_ctx > IB_POLL_LAST_POOL_TYPE);
+ return -EINVAL;
+ }
+
+ /*
+ * Allocate at least as many CQEs as requested, and otherwise
+ * a reasonable batch size so that we can share CQs between
+ * multiple users instead of allocating a larger number of CQs.
+ */
+ nr_cqes = min_t(unsigned int, dev->attrs.max_cqe,
+ max(nr_cqes, IB_MAX_SHARED_CQ_SZ));
+ nr_cqs = min_t(unsigned int, dev->num_comp_vectors, num_online_cpus());
+ for (i = 0; i < nr_cqs; i++) {
+ cq = ib_alloc_cq(dev, NULL, nr_cqes, i, poll_ctx);
+ if (IS_ERR(cq)) {
+ ret = PTR_ERR(cq);
+ goto out_free_cqs;
+ }
+ cq->shared = true;
+ list_add_tail(&cq->pool_entry, &tmp_list);
+ }
+
+ spin_lock_irq(&dev->cq_pools_lock);
+ list_splice(&tmp_list, &dev->cq_pools[poll_ctx]);
+ spin_unlock_irq(&dev->cq_pools_lock);
+
+ return 0;
+
+out_free_cqs:
+ list_for_each_entry(cq, &tmp_list, pool_entry) {
+ cq->shared = false;
+ ib_free_cq(cq);
+ }
+ return ret;
+}
+
+/**
+ * ib_cq_pool_get() - Find the least used completion queue that matches
+ * a given cpu hint (or least used for wild card affinity) and fits
+ * nr_cqe.
+ * @dev: rdma device
+ * @nr_cqe: number of needed cqe entries
+ * @comp_vector_hint: completion vector hint (-1) for the driver to assign
+ * a comp vector based on internal counter
+ * @poll_ctx: cq polling context
+ *
+ * Finds a cq that satisfies @comp_vector_hint and @nr_cqe requirements and
+ * claim entries in it for us. In case there is no available cq, allocate
+ * a new cq with the requirements and add it to the device pool.
+ * IB_POLL_DIRECT cannot be used for shared cqs so it is not a valid value
+ * for @poll_ctx.
+ */
+struct ib_cq *ib_cq_pool_get(struct ib_device *dev, unsigned int nr_cqe,
+ int comp_vector_hint,
+ enum ib_poll_context poll_ctx)
+{
+ static unsigned int default_comp_vector;
+ unsigned int vector, num_comp_vectors;
+ struct ib_cq *cq, *found = NULL;
+ int ret;
+
+ if (poll_ctx > IB_POLL_LAST_POOL_TYPE) {
+ WARN_ON_ONCE(poll_ctx > IB_POLL_LAST_POOL_TYPE);
+ return ERR_PTR(-EINVAL);
+ }
+
+ num_comp_vectors =
+ min_t(unsigned int, dev->num_comp_vectors, num_online_cpus());
+ /* Project the affinty to the device completion vector range */
+ if (comp_vector_hint < 0) {
+ comp_vector_hint =
+ (READ_ONCE(default_comp_vector) + 1) % num_comp_vectors;
+ WRITE_ONCE(default_comp_vector, comp_vector_hint);
+ }
+ vector = comp_vector_hint % num_comp_vectors;
+
+ /*
+ * Find the least used CQ with correct affinity and
+ * enough free CQ entries
+ */
+ while (!found) {
+ spin_lock_irq(&dev->cq_pools_lock);
+ list_for_each_entry(cq, &dev->cq_pools[poll_ctx],
+ pool_entry) {
+ /*
+ * Check to see if we have found a CQ with the
+ * correct completion vector
+ */
+ if (vector != cq->comp_vector)
+ continue;
+ if (cq->cqe_used + nr_cqe > cq->cqe)
+ continue;
+ found = cq;
+ break;
+ }
+
+ if (found) {
+ found->cqe_used += nr_cqe;
+ spin_unlock_irq(&dev->cq_pools_lock);
+
+ return found;
+ }
+ spin_unlock_irq(&dev->cq_pools_lock);
+
+ /*
+ * Didn't find a match or ran out of CQs in the device
+ * pool, allocate a new array of CQs.
+ */
+ ret = ib_alloc_cqs(dev, nr_cqe, poll_ctx);
+ if (ret)
+ return ERR_PTR(ret);
+ }
+
+ return found;
+}
+EXPORT_SYMBOL(ib_cq_pool_get);
+
+/**
+ * ib_cq_pool_put - Return a CQ taken from a shared pool.
+ * @cq: The CQ to return.
+ * @nr_cqe: The max number of cqes that the user had requested.
+ */
+void ib_cq_pool_put(struct ib_cq *cq, unsigned int nr_cqe)
+{
+ if (WARN_ON_ONCE(nr_cqe > cq->cqe_used))
+ return;
+
+ spin_lock_irq(&cq->device->cq_pools_lock);
+ cq->cqe_used -= nr_cqe;
+ spin_unlock_irq(&cq->device->cq_pools_lock);
+}
+EXPORT_SYMBOL(ib_cq_pool_put);
diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c
index d0b3d35ad3e4..905a2beaf885 100644
--- a/drivers/infiniband/core/device.c
+++ b/drivers/infiniband/core/device.c
@@ -677,8 +677,20 @@ static int add_client_context(struct ib_device *device,
if (ret)
goto out;
downgrade_write(&device->client_data_rwsem);
- if (client->add)
- client->add(device);
+ if (client->add) {
+ if (client->add(device)) {
+ /*
+ * If a client fails to add then the error code is
+ * ignored, but we won't call any more ops on this
+ * client.
+ */
+ xa_erase(&device->client_data, client->client_id);
+ up_read(&device->client_data_rwsem);
+ ib_device_put(device);
+ ib_client_put(client);
+ return 0;
+ }
+ }
/* Readers shall not see a client until add has been completed */
xa_set_mark(&device->client_data, client->client_id,
@@ -1381,6 +1393,7 @@ int ib_register_device(struct ib_device *device, const char *name)
goto dev_cleanup;
}
+ ib_cq_pool_init(device);
ret = enable_device_and_get(device);
dev_set_uevent_suppress(&device->dev, false);
/* Mark for userspace that device is ready */
@@ -1435,6 +1448,7 @@ static void __ib_unregister_device(struct ib_device *ib_dev)
goto out;
disable_device(ib_dev);
+ ib_cq_pool_destroy(ib_dev);
/* Expedite removing unregistered pointers from the hash table */
free_netdevs(ib_dev);
@@ -2557,7 +2571,6 @@ void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops)
SET_DEVICE_OP(dev_ops, add_gid);
SET_DEVICE_OP(dev_ops, advise_mr);
SET_DEVICE_OP(dev_ops, alloc_dm);
- SET_DEVICE_OP(dev_ops, alloc_fmr);
SET_DEVICE_OP(dev_ops, alloc_hw_stats);
SET_DEVICE_OP(dev_ops, alloc_mr);
SET_DEVICE_OP(dev_ops, alloc_mr_integrity);
@@ -2584,7 +2597,6 @@ void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops)
SET_DEVICE_OP(dev_ops, create_wq);
SET_DEVICE_OP(dev_ops, dealloc_dm);
SET_DEVICE_OP(dev_ops, dealloc_driver);
- SET_DEVICE_OP(dev_ops, dealloc_fmr);
SET_DEVICE_OP(dev_ops, dealloc_mw);
SET_DEVICE_OP(dev_ops, dealloc_pd);
SET_DEVICE_OP(dev_ops, dealloc_ucontext);
@@ -2628,7 +2640,6 @@ void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops)
SET_DEVICE_OP(dev_ops, iw_rem_ref);
SET_DEVICE_OP(dev_ops, map_mr_sg);
SET_DEVICE_OP(dev_ops, map_mr_sg_pi);
- SET_DEVICE_OP(dev_ops, map_phys_fmr);
SET_DEVICE_OP(dev_ops, mmap);
SET_DEVICE_OP(dev_ops, mmap_free);
SET_DEVICE_OP(dev_ops, modify_ah);
@@ -2662,7 +2673,6 @@ void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops)
SET_DEVICE_OP(dev_ops, resize_cq);
SET_DEVICE_OP(dev_ops, set_vf_guid);
SET_DEVICE_OP(dev_ops, set_vf_link_state);
- SET_DEVICE_OP(dev_ops, unmap_fmr);
SET_OBJ_SIZE(dev_ops, ib_ah);
SET_OBJ_SIZE(dev_ops, ib_cq);
diff --git a/drivers/infiniband/core/fmr_pool.c b/drivers/infiniband/core/fmr_pool.c
deleted file mode 100644
index e08aec427027..000000000000
--- a/drivers/infiniband/core/fmr_pool.c
+++ /dev/null
@@ -1,494 +0,0 @@
-/*
- * Copyright (c) 2004 Topspin Communications. All rights reserved.
- * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include <linux/errno.h>
-#include <linux/spinlock.h>
-#include <linux/export.h>
-#include <linux/slab.h>
-#include <linux/jhash.h>
-#include <linux/kthread.h>
-
-#include <rdma/ib_fmr_pool.h>
-
-#include "core_priv.h"
-
-#define PFX "fmr_pool: "
-
-enum {
- IB_FMR_MAX_REMAPS = 32,
-
- IB_FMR_HASH_BITS = 8,
- IB_FMR_HASH_SIZE = 1 << IB_FMR_HASH_BITS,
- IB_FMR_HASH_MASK = IB_FMR_HASH_SIZE - 1
-};
-
-/*
- * If an FMR is not in use, then the list member will point to either
- * its pool's free_list (if the FMR can be mapped again; that is,
- * remap_count < pool->max_remaps) or its pool's dirty_list (if the
- * FMR needs to be unmapped before being remapped). In either of
- * these cases it is a bug if the ref_count is not 0. In other words,
- * if ref_count is > 0, then the list member must not be linked into
- * either free_list or dirty_list.
- *
- * The cache_node member is used to link the FMR into a cache bucket
- * (if caching is enabled). This is independent of the reference
- * count of the FMR. When a valid FMR is released, its ref_count is
- * decremented, and if ref_count reaches 0, the FMR is placed in
- * either free_list or dirty_list as appropriate. However, it is not
- * removed from the cache and may be "revived" if a call to
- * ib_fmr_register_physical() occurs before the FMR is remapped. In
- * this case we just increment the ref_count and remove the FMR from
- * free_list/dirty_list.
- *
- * Before we remap an FMR from free_list, we remove it from the cache
- * (to prevent another user from obtaining a stale FMR). When an FMR
- * is released, we add it to the tail of the free list, so that our
- * cache eviction policy is "least recently used."
- *
- * All manipulation of ref_count, list and cache_node is protected by
- * pool_lock to maintain consistency.
- */
-
-struct ib_fmr_pool {
- spinlock_t pool_lock;
-
- int pool_size;
- int max_pages;
- int max_remaps;
- int dirty_watermark;
- int dirty_len;
- struct list_head free_list;
- struct list_head dirty_list;
- struct hlist_head *cache_bucket;
-
- void (*flush_function)(struct ib_fmr_pool *pool,
- void * arg);
- void *flush_arg;
-
- struct kthread_worker *worker;
- struct kthread_work work;
-
- atomic_t req_ser;
- atomic_t flush_ser;
-
- wait_queue_head_t force_wait;
-};
-
-static inline u32 ib_fmr_hash(u64 first_page)
-{
- return jhash_2words((u32) first_page, (u32) (first_page >> 32), 0) &
- (IB_FMR_HASH_SIZE - 1);
-}
-
-/* Caller must hold pool_lock */
-static inline struct ib_pool_fmr *ib_fmr_cache_lookup(struct ib_fmr_pool *pool,
- u64 *page_list,
- int page_list_len,
- u64 io_virtual_address)
-{
- struct hlist_head *bucket;
- struct ib_pool_fmr *fmr;
-
- if (!pool->cache_bucket)
- return NULL;
-
- bucket = pool->cache_bucket + ib_fmr_hash(*page_list);
-
- hlist_for_each_entry(fmr, bucket, cache_node)
- if (io_virtual_address == fmr->io_virtual_address &&
- page_list_len == fmr->page_list_len &&
- !memcmp(page_list, fmr->page_list,
- page_list_len * sizeof *page_list))
- return fmr;
-
- return NULL;
-}
-
-static void ib_fmr_batch_release(struct ib_fmr_pool *pool)
-{
- int ret;
- struct ib_pool_fmr *fmr;
- LIST_HEAD(unmap_list);
- LIST_HEAD(fmr_list);
-
- spin_lock_irq(&pool->pool_lock);
-
- list_for_each_entry(fmr, &pool->dirty_list, list) {
- hlist_del_init(&fmr->cache_node);
- fmr->remap_count = 0;
- list_add_tail(&fmr->fmr->list, &fmr_list);
- }
-
- list_splice_init(&pool->dirty_list, &unmap_list);
- pool->dirty_len = 0;
-
- spin_unlock_irq(&pool->pool_lock);
-
- if (list_empty(&unmap_list)) {
- return;
- }
-
- ret = ib_unmap_fmr(&fmr_list);
- if (ret)
- pr_warn(PFX "ib_unmap_fmr returned %d\n", ret);
-
- spin_lock_irq(&pool->pool_lock);
- list_splice(&unmap_list, &pool->free_list);
- spin_unlock_irq(&pool->pool_lock);
-}
-
-static void ib_fmr_cleanup_func(struct kthread_work *work)
-{
- struct ib_fmr_pool *pool = container_of(work, struct ib_fmr_pool, work);
-
- ib_fmr_batch_release(pool);
- atomic_inc(&pool->flush_ser);
- wake_up_interruptible(&pool->force_wait);
-
- if (pool->flush_function)
- pool->flush_function(pool, pool->flush_arg);
-
- if (atomic_read(&pool->flush_ser) - atomic_read(&pool->req_ser) < 0)
- kthread_queue_work(pool->worker, &pool->work);
-}
-
-/**
- * ib_create_fmr_pool - Create an FMR pool
- * @pd:Protection domain for FMRs
- * @params:FMR pool parameters
- *
- * Create a pool of FMRs. Return value is pointer to new pool or
- * error code if creation failed.
- */
-struct ib_fmr_pool *ib_create_fmr_pool(struct ib_pd *pd,
- struct ib_fmr_pool_param *params)
-{
- struct ib_device *device;
- struct ib_fmr_pool *pool;
- int i;
- int ret;
- int max_remaps;
-
- if (!params)
- return ERR_PTR(-EINVAL);
-
- device = pd->device;
- if (!device->ops.alloc_fmr || !device->ops.dealloc_fmr ||
- !device->ops.map_phys_fmr || !device->ops.unmap_fmr) {
- dev_info(&device->dev, "Device does not support FMRs\n");
- return ERR_PTR(-ENOSYS);
- }
-
- if (!device->attrs.max_map_per_fmr)
- max_remaps = IB_FMR_MAX_REMAPS;
- else
- max_remaps = device->attrs.max_map_per_fmr;
-
- pool = kmalloc(sizeof *pool, GFP_KERNEL);
- if (!pool)
- return ERR_PTR(-ENOMEM);
-
- pool->cache_bucket = NULL;
- pool->flush_function = params->flush_function;
- pool->flush_arg = params->flush_arg;
-
- INIT_LIST_HEAD(&pool->free_list);
- INIT_LIST_HEAD(&pool->dirty_list);
-
- if (params->cache) {
- pool->cache_bucket =
- kmalloc_array(IB_FMR_HASH_SIZE,
- sizeof(*pool->cache_bucket),
- GFP_KERNEL);
- if (!pool->cache_bucket) {
- ret = -ENOMEM;
- goto out_free_pool;
- }
-
- for (i = 0; i < IB_FMR_HASH_SIZE; ++i)
- INIT_HLIST_HEAD(pool->cache_bucket + i);
- }
-
- pool->pool_size = 0;
- pool->max_pages = params->max_pages_per_fmr;
- pool->max_remaps = max_remaps;
- pool->dirty_watermark = params->dirty_watermark;
- pool->dirty_len = 0;
- spin_lock_init(&pool->pool_lock);
- atomic_set(&pool->req_ser, 0);
- atomic_set(&pool->flush_ser, 0);
- init_waitqueue_head(&pool->force_wait);
-
- pool->worker =
- kthread_create_worker(0, "ib_fmr(%s)", dev_name(&device->dev));
- if (IS_ERR(pool->worker)) {
- pr_warn(PFX "couldn't start cleanup kthread worker\n");
- ret = PTR_ERR(pool->worker);
- goto out_free_pool;
- }
- kthread_init_work(&pool->work, ib_fmr_cleanup_func);
-
- {
- struct ib_pool_fmr *fmr;
- struct ib_fmr_attr fmr_attr = {
- .max_pages = params->max_pages_per_fmr,
- .max_maps = pool->max_remaps,
- .page_shift = params->page_shift
- };
- int bytes_per_fmr = sizeof *fmr;
-
- if (pool->cache_bucket)
- bytes_per_fmr += params->max_pages_per_fmr * sizeof (u64);
-
- for (i = 0; i < params->pool_size; ++i) {
- fmr = kmalloc(bytes_per_fmr, GFP_KERNEL);
- if (!fmr)
- goto out_fail;
-
- fmr->pool = pool;
- fmr->remap_count = 0;
- fmr->ref_count = 0;
- INIT_HLIST_NODE(&fmr->cache_node);
-
- fmr->fmr = ib_alloc_fmr(pd, params->access, &fmr_attr);
- if (IS_ERR(fmr->fmr)) {
- pr_warn(PFX "fmr_create failed for FMR %d\n",
- i);
- kfree(fmr);
- goto out_fail;
- }
-
- list_add_tail(&fmr->list, &pool->free_list);
- ++pool->pool_size;
- }
- }
-
- return pool;
-
- out_free_pool:
- kfree(pool->cache_bucket);
- kfree(pool);
-
- return ERR_PTR(ret);
-
- out_fail:
- ib_destroy_fmr_pool(pool);
-
- return ERR_PTR(-ENOMEM);
-}
-EXPORT_SYMBOL(ib_create_fmr_pool);
-
-/**
- * ib_destroy_fmr_pool - Free FMR pool
- * @pool:FMR pool to free
- *
- * Destroy an FMR pool and free all associated resources.
- */
-void ib_destroy_fmr_pool(struct ib_fmr_pool *pool)
-{
- struct ib_pool_fmr *fmr;
- struct ib_pool_fmr *tmp;
- LIST_HEAD(fmr_list);
- int i;
-
- kthread_destroy_worker(pool->worker);
- ib_fmr_batch_release(pool);
-
- i = 0;
- list_for_each_entry_safe(fmr, tmp, &pool->free_list, list) {
- if (fmr->remap_count) {
- INIT_LIST_HEAD(&fmr_list);
- list_add_tail(&fmr->fmr->list, &fmr_list);
- ib_unmap_fmr(&fmr_list);
- }
- ib_dealloc_fmr(fmr->fmr);
- list_del(&fmr->list);
- kfree(fmr);
- ++i;
- }
-
- if (i < pool->pool_size)
- pr_warn(PFX "pool still has %d regions registered\n",
- pool->pool_size - i);
-
- kfree(pool->cache_bucket);
- kfree(pool);
-}
-EXPORT_SYMBOL(ib_destroy_fmr_pool);
-
-/**
- * ib_flush_fmr_pool - Invalidate all unmapped FMRs
- * @pool:FMR pool to flush
- *
- * Ensure that all unmapped FMRs are fully invalidated.
- */
-int ib_flush_fmr_pool(struct ib_fmr_pool *pool)
-{
- int serial;
- struct ib_pool_fmr *fmr, *next;
-
- /*
- * The free_list holds FMRs that may have been used
- * but have not been remapped enough times to be dirty.
- * Put them on the dirty list now so that the cleanup
- * thread will reap them too.
- */
- spin_lock_irq(&pool->pool_lock);
- list_for_each_entry_safe(fmr, next, &pool->free_list, list) {
- if (fmr->remap_count > 0)
- list_move(&fmr->list, &pool->dirty_list);
- }
- spin_unlock_irq(&pool->pool_lock);
-
- serial = atomic_inc_return(&pool->req_ser);
- kthread_queue_work(pool->worker, &pool->work);
-
- if (wait_event_interruptible(pool->force_wait,
- atomic_read(&pool->flush_ser) - serial >= 0))
- return -EINTR;
-
- return 0;
-}
-EXPORT_SYMBOL(ib_flush_fmr_pool);
-
-/**
- * ib_fmr_pool_map_phys - Map an FMR from an FMR pool.
- * @pool_handle: FMR pool to allocate FMR from
- * @page_list: List of pages to map
- * @list_len: Number of pages in @page_list
- * @io_virtual_address: I/O virtual address for new FMR
- */
-struct ib_pool_fmr *ib_fmr_pool_map_phys(struct ib_fmr_pool *pool_handle,
- u64 *page_list,
- int list_len,
- u64 io_virtual_address)
-{
- struct ib_fmr_pool *pool = pool_handle;
- struct ib_pool_fmr *fmr;
- unsigned long flags;
- int result;
-
- if (list_len < 1 || list_len > pool->max_pages)
- return ERR_PTR(-EINVAL);
-
- spin_lock_irqsave(&pool->pool_lock, flags);
- fmr = ib_fmr_cache_lookup(pool,
- page_list,
- list_len,
- io_virtual_address);
- if (fmr) {
- /* found in cache */
- ++fmr->ref_count;
- if (fmr->ref_count == 1) {
- list_del(&fmr->list);
- }
-
- spin_unlock_irqrestore(&pool->pool_lock, flags);
-
- return fmr;
- }
-
- if (list_empty(&pool->free_list)) {
- spin_unlock_irqrestore(&pool->pool_lock, flags);
- return ERR_PTR(-EAGAIN);
- }
-
- fmr = list_entry(pool->free_list.next, struct ib_pool_fmr, list);
- list_del(&fmr->list);
- hlist_del_init(&fmr->cache_node);
- spin_unlock_irqrestore(&pool->pool_lock, flags);
-
- result = ib_map_phys_fmr(fmr->fmr, page_list, list_len,
- io_virtual_address);
-
- if (result) {
- spin_lock_irqsave(&pool->pool_lock, flags);
- list_add(&fmr->list, &pool->free_list);
- spin_unlock_irqrestore(&pool->pool_lock, flags);
-
- pr_warn(PFX "fmr_map returns %d\n", result);
-
- return ERR_PTR(result);
- }
-
- ++fmr->remap_count;
- fmr->ref_count = 1;
-
- if (pool->cache_bucket) {
- fmr->io_virtual_address = io_virtual_address;
- fmr->page_list_len = list_len;
- memcpy(fmr->page_list, page_list, list_len * sizeof(*page_list));
-
- spin_lock_irqsave(&pool->pool_lock, flags);
- hlist_add_head(&fmr->cache_node,
- pool->cache_bucket + ib_fmr_hash(fmr->page_list[0]));
- spin_unlock_irqrestore(&pool->pool_lock, flags);
- }
-
- return fmr;
-}
-EXPORT_SYMBOL(ib_fmr_pool_map_phys);
-
-/**
- * ib_fmr_pool_unmap - Unmap FMR
- * @fmr:FMR to unmap
- *
- * Unmap an FMR. The FMR mapping may remain valid until the FMR is
- * reused (or until ib_flush_fmr_pool() is called).
- */
-void ib_fmr_pool_unmap(struct ib_pool_fmr *fmr)
-{
- struct ib_fmr_pool *pool;
- unsigned long flags;
-
- pool = fmr->pool;
-
- spin_lock_irqsave(&pool->pool_lock, flags);
-
- --fmr->ref_count;
- if (!fmr->ref_count) {
- if (fmr->remap_count < pool->max_remaps) {
- list_add_tail(&fmr->list, &pool->free_list);
- } else {
- list_add_tail(&fmr->list, &pool->dirty_list);
- if (++pool->dirty_len >= pool->dirty_watermark) {
- atomic_inc(&pool->req_ser);
- kthread_queue_work(pool->worker, &pool->work);
- }
- }
- }
-
- spin_unlock_irqrestore(&pool->pool_lock, flags);
-}
-EXPORT_SYMBOL(ib_fmr_pool_unmap);
diff --git a/drivers/infiniband/core/lag.c b/drivers/infiniband/core/lag.c
new file mode 100644
index 000000000000..7063e41eaf26
--- /dev/null
+++ b/drivers/infiniband/core/lag.c
@@ -0,0 +1,138 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/*
+ * Copyright (c) 2020 Mellanox Technologies. All rights reserved.
+ */
+
+#include <rdma/ib_verbs.h>
+#include <rdma/ib_cache.h>
+#include <rdma/lag.h>
+
+static struct sk_buff *rdma_build_skb(struct ib_device *device,
+ struct net_device *netdev,
+ struct rdma_ah_attr *ah_attr,
+ gfp_t flags)
+{
+ struct ipv6hdr *ip6h;
+ struct sk_buff *skb;
+ struct ethhdr *eth;
+ struct iphdr *iph;
+ struct udphdr *uh;
+ u8 smac[ETH_ALEN];
+ bool is_ipv4;
+ int hdr_len;
+
+ is_ipv4 = ipv6_addr_v4mapped((struct in6_addr *)ah_attr->grh.dgid.raw);
+ hdr_len = ETH_HLEN + sizeof(struct udphdr) + LL_RESERVED_SPACE(netdev);
+ hdr_len += is_ipv4 ? sizeof(struct iphdr) : sizeof(struct ipv6hdr);
+
+ skb = alloc_skb(hdr_len, flags);
+ if (!skb)
+ return NULL;
+
+ skb->dev = netdev;
+ skb_reserve(skb, hdr_len);
+ skb_push(skb, sizeof(struct udphdr));
+ skb_reset_transport_header(skb);
+ uh = udp_hdr(skb);
+ uh->source =
+ htons(rdma_flow_label_to_udp_sport(ah_attr->grh.flow_label));
+ uh->dest = htons(ROCE_V2_UDP_DPORT);
+ uh->len = htons(sizeof(struct udphdr));
+
+ if (is_ipv4) {
+ skb_push(skb, sizeof(struct iphdr));
+ skb_reset_network_header(skb);
+ iph = ip_hdr(skb);
+ iph->frag_off = 0;
+ iph->version = 4;
+ iph->protocol = IPPROTO_UDP;
+ iph->ihl = 0x5;
+ iph->tot_len = htons(sizeof(struct udphdr) + sizeof(struct
+ iphdr));
+ memcpy(&iph->saddr, ah_attr->grh.sgid_attr->gid.raw + 12,
+ sizeof(struct in_addr));
+ memcpy(&iph->daddr, ah_attr->grh.dgid.raw + 12,
+ sizeof(struct in_addr));
+ } else {
+ skb_push(skb, sizeof(struct ipv6hdr));
+ skb_reset_network_header(skb);
+ ip6h = ipv6_hdr(skb);
+ ip6h->version = 6;
+ ip6h->nexthdr = IPPROTO_UDP;
+ memcpy(&ip6h->flow_lbl, &ah_attr->grh.flow_label,
+ sizeof(*ip6h->flow_lbl));
+ memcpy(&ip6h->saddr, ah_attr->grh.sgid_attr->gid.raw,
+ sizeof(struct in6_addr));
+ memcpy(&ip6h->daddr, ah_attr->grh.dgid.raw,
+ sizeof(struct in6_addr));
+ }
+
+ skb_push(skb, sizeof(struct ethhdr));
+ skb_reset_mac_header(skb);
+ eth = eth_hdr(skb);
+ skb->protocol = eth->h_proto = htons(is_ipv4 ? ETH_P_IP : ETH_P_IPV6);
+ rdma_read_gid_l2_fields(ah_attr->grh.sgid_attr, NULL, smac);
+ memcpy(eth->h_source, smac, ETH_ALEN);
+ memcpy(eth->h_dest, ah_attr->roce.dmac, ETH_ALEN);
+
+ return skb;
+}
+
+static struct net_device *rdma_get_xmit_slave_udp(struct ib_device *device,
+ struct net_device *master,
+ struct rdma_ah_attr *ah_attr,
+ gfp_t flags)
+{
+ struct net_device *slave;
+ struct sk_buff *skb;
+
+ skb = rdma_build_skb(device, master, ah_attr, flags);
+ if (!skb)
+ return ERR_PTR(-ENOMEM);
+
+ rcu_read_lock();
+ slave = netdev_get_xmit_slave(master, skb,
+ !!(device->lag_flags &
+ RDMA_LAG_FLAGS_HASH_ALL_SLAVES));
+ if (slave)
+ dev_hold(slave);
+ rcu_read_unlock();
+ kfree_skb(skb);
+ return slave;
+}
+
+void rdma_lag_put_ah_roce_slave(struct net_device *xmit_slave)
+{
+ if (xmit_slave)
+ dev_put(xmit_slave);
+}
+
+struct net_device *rdma_lag_get_ah_roce_slave(struct ib_device *device,
+ struct rdma_ah_attr *ah_attr,
+ gfp_t flags)
+{
+ struct net_device *slave = NULL;
+ struct net_device *master;
+
+ if (!(ah_attr->type == RDMA_AH_ATTR_TYPE_ROCE &&
+ ah_attr->grh.sgid_attr->gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP &&
+ ah_attr->grh.flow_label))
+ return NULL;
+
+ rcu_read_lock();
+ master = rdma_read_gid_attr_ndev_rcu(ah_attr->grh.sgid_attr);
+ if (IS_ERR(master)) {
+ rcu_read_unlock();
+ return master;
+ }
+ dev_hold(master);
+ rcu_read_unlock();
+
+ if (!netif_is_bond_master(master))
+ goto put;
+
+ slave = rdma_get_xmit_slave_udp(device, master, ah_attr, flags);
+put:
+ dev_put(master);
+ return slave;
+}
diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c
index c54db13fa9b0..186e0d652e8b 100644
--- a/drivers/infiniband/core/mad.c
+++ b/drivers/infiniband/core/mad.c
@@ -85,7 +85,6 @@ MODULE_PARM_DESC(send_queue_size, "Size of send queue in number of work requests
module_param_named(recv_queue_size, mad_recvq_size, int, 0444);
MODULE_PARM_DESC(recv_queue_size, "Size of receive queue in number of work requests");
-/* Client ID 0 is used for snoop-only clients */
static DEFINE_XARRAY_ALLOC1(ib_mad_clients);
static u32 ib_mad_client_next;
static struct list_head ib_mad_port_list;
@@ -483,141 +482,12 @@ error1:
}
EXPORT_SYMBOL(ib_register_mad_agent);
-static inline int is_snooping_sends(int mad_snoop_flags)
-{
- return (mad_snoop_flags &
- (/*IB_MAD_SNOOP_POSTED_SENDS |
- IB_MAD_SNOOP_RMPP_SENDS |*/
- IB_MAD_SNOOP_SEND_COMPLETIONS /*|
- IB_MAD_SNOOP_RMPP_SEND_COMPLETIONS*/));
-}
-
-static inline int is_snooping_recvs(int mad_snoop_flags)
-{
- return (mad_snoop_flags &
- (IB_MAD_SNOOP_RECVS /*|
- IB_MAD_SNOOP_RMPP_RECVS*/));
-}
-
-static int register_snoop_agent(struct ib_mad_qp_info *qp_info,
- struct ib_mad_snoop_private *mad_snoop_priv)
-{
- struct ib_mad_snoop_private **new_snoop_table;
- unsigned long flags;
- int i;
-
- spin_lock_irqsave(&qp_info->snoop_lock, flags);
- /* Check for empty slot in array. */
- for (i = 0; i < qp_info->snoop_table_size; i++)
- if (!qp_info->snoop_table[i])
- break;
-
- if (i == qp_info->snoop_table_size) {
- /* Grow table. */
- new_snoop_table = krealloc(qp_info->snoop_table,
- sizeof mad_snoop_priv *
- (qp_info->snoop_table_size + 1),
- GFP_ATOMIC);
- if (!new_snoop_table) {
- i = -ENOMEM;
- goto out;
- }
-
- qp_info->snoop_table = new_snoop_table;
- qp_info->snoop_table_size++;
- }
- qp_info->snoop_table[i] = mad_snoop_priv;
- atomic_inc(&qp_info->snoop_count);
-out:
- spin_unlock_irqrestore(&qp_info->snoop_lock, flags);
- return i;
-}
-
-struct ib_mad_agent *ib_register_mad_snoop(struct ib_device *device,
- u8 port_num,
- enum ib_qp_type qp_type,
- int mad_snoop_flags,
- ib_mad_snoop_handler snoop_handler,
- ib_mad_recv_handler recv_handler,
- void *context)
-{
- struct ib_mad_port_private *port_priv;
- struct ib_mad_agent *ret;
- struct ib_mad_snoop_private *mad_snoop_priv;
- int qpn;
- int err;
-
- /* Validate parameters */
- if ((is_snooping_sends(mad_snoop_flags) && !snoop_handler) ||
- (is_snooping_recvs(mad_snoop_flags) && !recv_handler)) {
- ret = ERR_PTR(-EINVAL);
- goto error1;
- }
- qpn = get_spl_qp_index(qp_type);
- if (qpn == -1) {
- ret = ERR_PTR(-EINVAL);
- goto error1;
- }
- port_priv = ib_get_mad_port(device, port_num);
- if (!port_priv) {
- ret = ERR_PTR(-ENODEV);
- goto error1;
- }
- /* Allocate structures */
- mad_snoop_priv = kzalloc(sizeof *mad_snoop_priv, GFP_KERNEL);
- if (!mad_snoop_priv) {
- ret = ERR_PTR(-ENOMEM);
- goto error1;
- }
-
- /* Now, fill in the various structures */
- mad_snoop_priv->qp_info = &port_priv->qp_info[qpn];
- mad_snoop_priv->agent.device = device;
- mad_snoop_priv->agent.recv_handler = recv_handler;
- mad_snoop_priv->agent.snoop_handler = snoop_handler;
- mad_snoop_priv->agent.context = context;
- mad_snoop_priv->agent.qp = port_priv->qp_info[qpn].qp;
- mad_snoop_priv->agent.port_num = port_num;
- mad_snoop_priv->mad_snoop_flags = mad_snoop_flags;
- init_completion(&mad_snoop_priv->comp);
-
- err = ib_mad_agent_security_setup(&mad_snoop_priv->agent, qp_type);
- if (err) {
- ret = ERR_PTR(err);
- goto error2;
- }
-
- mad_snoop_priv->snoop_index = register_snoop_agent(
- &port_priv->qp_info[qpn],
- mad_snoop_priv);
- if (mad_snoop_priv->snoop_index < 0) {
- ret = ERR_PTR(mad_snoop_priv->snoop_index);
- goto error3;
- }
-
- atomic_set(&mad_snoop_priv->refcount, 1);
- return &mad_snoop_priv->agent;
-error3:
- ib_mad_agent_security_cleanup(&mad_snoop_priv->agent);
-error2:
- kfree(mad_snoop_priv);
-error1:
- return ret;
-}
-EXPORT_SYMBOL(ib_register_mad_snoop);
-
static inline void deref_mad_agent(struct ib_mad_agent_private *mad_agent_priv)
{
if (atomic_dec_and_test(&mad_agent_priv->refcount))
complete(&mad_agent_priv->comp);
}
-static inline void deref_snoop_agent(struct ib_mad_snoop_private *mad_snoop_priv)
-{
- if (atomic_dec_and_test(&mad_snoop_priv->refcount))
- complete(&mad_snoop_priv->comp);
-}
-
static void unregister_mad_agent(struct ib_mad_agent_private *mad_agent_priv)
{
struct ib_mad_port_private *port_priv;
@@ -650,25 +520,6 @@ static void unregister_mad_agent(struct ib_mad_agent_private *mad_agent_priv)
kfree_rcu(mad_agent_priv, rcu);
}
-static void unregister_mad_snoop(struct ib_mad_snoop_private *mad_snoop_priv)
-{
- struct ib_mad_qp_info *qp_info;
- unsigned long flags;
-
- qp_info = mad_snoop_priv->qp_info;
- spin_lock_irqsave(&qp_info->snoop_lock, flags);
- qp_info->snoop_table[mad_snoop_priv->snoop_index] = NULL;
- atomic_dec(&qp_info->snoop_count);
- spin_unlock_irqrestore(&qp_info->snoop_lock, flags);
-
- deref_snoop_agent(mad_snoop_priv);
- wait_for_completion(&mad_snoop_priv->comp);
-
- ib_mad_agent_security_cleanup(&mad_snoop_priv->agent);
-
- kfree(mad_snoop_priv);
-}
-
/*
* ib_unregister_mad_agent - Unregisters a client from using MAD services
*
@@ -677,20 +528,11 @@ static void unregister_mad_snoop(struct ib_mad_snoop_private *mad_snoop_priv)
void ib_unregister_mad_agent(struct ib_mad_agent *mad_agent)
{
struct ib_mad_agent_private *mad_agent_priv;
- struct ib_mad_snoop_private *mad_snoop_priv;
-
- /* If the TID is zero, the agent can only snoop. */
- if (mad_agent->hi_tid) {
- mad_agent_priv = container_of(mad_agent,
- struct ib_mad_agent_private,
- agent);
- unregister_mad_agent(mad_agent_priv);
- } else {
- mad_snoop_priv = container_of(mad_agent,
- struct ib_mad_snoop_private,
- agent);
- unregister_mad_snoop(mad_snoop_priv);
- }
+
+ mad_agent_priv = container_of(mad_agent,
+ struct ib_mad_agent_private,
+ agent);
+ unregister_mad_agent(mad_agent_priv);
}
EXPORT_SYMBOL(ib_unregister_mad_agent);
@@ -706,57 +548,6 @@ static void dequeue_mad(struct ib_mad_list_head *mad_list)
spin_unlock_irqrestore(&mad_queue->lock, flags);
}
-static void snoop_send(struct ib_mad_qp_info *qp_info,
- struct ib_mad_send_buf *send_buf,
- struct ib_mad_send_wc *mad_send_wc,
- int mad_snoop_flags)
-{
- struct ib_mad_snoop_private *mad_snoop_priv;
- unsigned long flags;
- int i;
-
- spin_lock_irqsave(&qp_info->snoop_lock, flags);
- for (i = 0; i < qp_info->snoop_table_size; i++) {
- mad_snoop_priv = qp_info->snoop_table[i];
- if (!mad_snoop_priv ||
- !(mad_snoop_priv->mad_snoop_flags & mad_snoop_flags))
- continue;
-
- atomic_inc(&mad_snoop_priv->refcount);
- spin_unlock_irqrestore(&qp_info->snoop_lock, flags);
- mad_snoop_priv->agent.snoop_handler(&mad_snoop_priv->agent,
- send_buf, mad_send_wc);
- deref_snoop_agent(mad_snoop_priv);
- spin_lock_irqsave(&qp_info->snoop_lock, flags);
- }
- spin_unlock_irqrestore(&qp_info->snoop_lock, flags);
-}
-
-static void snoop_recv(struct ib_mad_qp_info *qp_info,
- struct ib_mad_recv_wc *mad_recv_wc,
- int mad_snoop_flags)
-{
- struct ib_mad_snoop_private *mad_snoop_priv;
- unsigned long flags;
- int i;
-
- spin_lock_irqsave(&qp_info->snoop_lock, flags);
- for (i = 0; i < qp_info->snoop_table_size; i++) {
- mad_snoop_priv = qp_info->snoop_table[i];
- if (!mad_snoop_priv ||
- !(mad_snoop_priv->mad_snoop_flags & mad_snoop_flags))
- continue;
-
- atomic_inc(&mad_snoop_priv->refcount);
- spin_unlock_irqrestore(&qp_info->snoop_lock, flags);
- mad_snoop_priv->agent.recv_handler(&mad_snoop_priv->agent, NULL,
- mad_recv_wc);
- deref_snoop_agent(mad_snoop_priv);
- spin_lock_irqsave(&qp_info->snoop_lock, flags);
- }
- spin_unlock_irqrestore(&qp_info->snoop_lock, flags);
-}
-
static void build_smp_wc(struct ib_qp *qp, struct ib_cqe *cqe, u16 slid,
u16 pkey_index, u8 port_num, struct ib_wc *wc)
{
@@ -2289,9 +2080,6 @@ static void ib_mad_recv_done(struct ib_cq *cq, struct ib_wc *wc)
recv->header.recv_wc.recv_buf.mad = (struct ib_mad *)recv->mad;
recv->header.recv_wc.recv_buf.grh = &recv->grh;
- if (atomic_read(&qp_info->snoop_count))
- snoop_recv(qp_info, &recv->header.recv_wc, IB_MAD_SNOOP_RECVS);
-
/* Validate MAD */
if (!validate_mad((const struct ib_mad_hdr *)recv->mad, qp_info, opa))
goto out;
@@ -2538,9 +2326,6 @@ retry:
mad_send_wc.send_buf = &mad_send_wr->send_buf;
mad_send_wc.status = wc->status;
mad_send_wc.vendor_err = wc->vendor_err;
- if (atomic_read(&qp_info->snoop_count))
- snoop_send(qp_info, &mad_send_wr->send_buf, &mad_send_wc,
- IB_MAD_SNOOP_SEND_COMPLETIONS);
ib_mad_complete_send_wr(mad_send_wr, &mad_send_wc);
if (queued_send_wr) {
@@ -2782,10 +2567,6 @@ static void local_completions(struct work_struct *work)
local->mad_priv->header.recv_wc.recv_buf.grh = NULL;
local->mad_priv->header.recv_wc.recv_buf.mad =
(struct ib_mad *)local->mad_priv->mad;
- if (atomic_read(&recv_mad_agent->qp_info->snoop_count))
- snoop_recv(recv_mad_agent->qp_info,
- &local->mad_priv->header.recv_wc,
- IB_MAD_SNOOP_RECVS);
recv_mad_agent->agent.recv_handler(
&recv_mad_agent->agent,
&local->mad_send_wr->send_buf,
@@ -2800,10 +2581,6 @@ local_send_completion:
mad_send_wc.status = IB_WC_SUCCESS;
mad_send_wc.vendor_err = 0;
mad_send_wc.send_buf = &local->mad_send_wr->send_buf;
- if (atomic_read(&mad_agent_priv->qp_info->snoop_count))
- snoop_send(mad_agent_priv->qp_info,
- &local->mad_send_wr->send_buf,
- &mad_send_wc, IB_MAD_SNOOP_SEND_COMPLETIONS);
mad_agent_priv->agent.send_handler(&mad_agent_priv->agent,
&mad_send_wc);
@@ -3119,10 +2896,6 @@ static void init_mad_qp(struct ib_mad_port_private *port_priv,
init_mad_queue(qp_info, &qp_info->send_queue);
init_mad_queue(qp_info, &qp_info->recv_queue);
INIT_LIST_HEAD(&qp_info->overflow_list);
- spin_lock_init(&qp_info->snoop_lock);
- qp_info->snoop_table = NULL;
- qp_info->snoop_table_size = 0;
- atomic_set(&qp_info->snoop_count, 0);
}
static int create_mad_qp(struct ib_mad_qp_info *qp_info,
@@ -3166,7 +2939,6 @@ static void destroy_mad_qp(struct ib_mad_qp_info *qp_info)
return;
ib_destroy_qp(qp_info->qp);
- kfree(qp_info->snoop_table);
}
/*
@@ -3304,9 +3076,11 @@ static int ib_mad_port_close(struct ib_device *device, int port_num)
return 0;
}
-static void ib_mad_init_device(struct ib_device *device)
+static int ib_mad_init_device(struct ib_device *device)
{
int start, i;
+ unsigned int count = 0;
+ int ret;
start = rdma_start_port(device);
@@ -3314,17 +3088,23 @@ static void ib_mad_init_device(struct ib_device *device)
if (!rdma_cap_ib_mad(device, i))
continue;
- if (ib_mad_port_open(device, i)) {
+ ret = ib_mad_port_open(device, i);
+ if (ret) {
dev_err(&device->dev, "Couldn't open port %d\n", i);
goto error;
}
- if (ib_agent_port_open(device, i)) {
+ ret = ib_agent_port_open(device, i);
+ if (ret) {
dev_err(&device->dev,
"Couldn't open port %d for agents\n", i);
goto error_agent;
}
+ count++;
}
- return;
+ if (!count)
+ return -EOPNOTSUPP;
+
+ return 0;
error_agent:
if (ib_mad_port_close(device, i))
@@ -3341,6 +3121,7 @@ error:
if (ib_mad_port_close(device, i))
dev_err(&device->dev, "Couldn't close port %d\n", i);
}
+ return ret;
}
static void ib_mad_remove_device(struct ib_device *device, void *client_data)
diff --git a/drivers/infiniband/core/multicast.c b/drivers/infiniband/core/multicast.c
index 9c2d8b7f1af9..740f03ecc05d 100644
--- a/drivers/infiniband/core/multicast.c
+++ b/drivers/infiniband/core/multicast.c
@@ -42,7 +42,7 @@
#include <rdma/ib_cache.h>
#include "sa.h"
-static void mcast_add_one(struct ib_device *device);
+static int mcast_add_one(struct ib_device *device);
static void mcast_remove_one(struct ib_device *device, void *client_data);
static struct ib_client mcast_client = {
@@ -815,7 +815,7 @@ static void mcast_event_handler(struct ib_event_handler *handler,
}
}
-static void mcast_add_one(struct ib_device *device)
+static int mcast_add_one(struct ib_device *device)
{
struct mcast_device *dev;
struct mcast_port *port;
@@ -825,7 +825,7 @@ static void mcast_add_one(struct ib_device *device)
dev = kmalloc(struct_size(dev, port, device->phys_port_cnt),
GFP_KERNEL);
if (!dev)
- return;
+ return -ENOMEM;
dev->start_port = rdma_start_port(device);
dev->end_port = rdma_end_port(device);
@@ -845,7 +845,7 @@ static void mcast_add_one(struct ib_device *device)
if (!count) {
kfree(dev);
- return;
+ return -EOPNOTSUPP;
}
dev->device = device;
@@ -853,6 +853,7 @@ static void mcast_add_one(struct ib_device *device)
INIT_IB_EVENT_HANDLER(&dev->event_handler, device, mcast_event_handler);
ib_register_event_handler(&dev->event_handler);
+ return 0;
}
static void mcast_remove_one(struct ib_device *device, void *client_data)
@@ -861,9 +862,6 @@ static void mcast_remove_one(struct ib_device *device, void *client_data)
struct mcast_port *port;
int i;
- if (!dev)
- return;
-
ib_unregister_event_handler(&dev->event_handler);
flush_workqueue(mcast_wq);
diff --git a/drivers/infiniband/core/rdma_core.c b/drivers/infiniband/core/rdma_core.c
index e0a5e897e4b1..38de4942c682 100644
--- a/drivers/infiniband/core/rdma_core.c
+++ b/drivers/infiniband/core/rdma_core.c
@@ -130,6 +130,17 @@ static int uverbs_destroy_uobject(struct ib_uobject *uobj,
lockdep_assert_held(&ufile->hw_destroy_rwsem);
assert_uverbs_usecnt(uobj, UVERBS_LOOKUP_WRITE);
+ if (reason == RDMA_REMOVE_ABORT_HWOBJ) {
+ reason = RDMA_REMOVE_ABORT;
+ ret = uobj->uapi_object->type_class->destroy_hw(uobj, reason,
+ attrs);
+ /*
+ * Drivers are not permitted to ignore RDMA_REMOVE_ABORT, see
+ * ib_is_destroy_retryable, cleanup_retryable == false here.
+ */
+ WARN_ON(ret);
+ }
+
if (reason == RDMA_REMOVE_ABORT) {
WARN_ON(!list_empty(&uobj->list));
WARN_ON(!uobj->context);
@@ -653,11 +664,15 @@ void rdma_alloc_commit_uobject(struct ib_uobject *uobj,
* object and anything else connected to uobj before calling this.
*/
void rdma_alloc_abort_uobject(struct ib_uobject *uobj,
- struct uverbs_attr_bundle *attrs)
+ struct uverbs_attr_bundle *attrs,
+ bool hw_obj_valid)
{
struct ib_uverbs_file *ufile = uobj->ufile;
- uverbs_destroy_uobject(uobj, RDMA_REMOVE_ABORT, attrs);
+ uverbs_destroy_uobject(uobj,
+ hw_obj_valid ? RDMA_REMOVE_ABORT_HWOBJ :
+ RDMA_REMOVE_ABORT,
+ attrs);
/* Matches the down_read in rdma_alloc_begin_uobject */
up_read(&ufile->hw_destroy_rwsem);
@@ -927,8 +942,8 @@ uverbs_get_uobject_from_file(u16 object_id, enum uverbs_obj_access access,
}
void uverbs_finalize_object(struct ib_uobject *uobj,
- enum uverbs_obj_access access, bool commit,
- struct uverbs_attr_bundle *attrs)
+ enum uverbs_obj_access access, bool hw_obj_valid,
+ bool commit, struct uverbs_attr_bundle *attrs)
{
/*
* refcounts should be handled at the object level and not at the
@@ -951,7 +966,7 @@ void uverbs_finalize_object(struct ib_uobject *uobj,
if (commit)
rdma_alloc_commit_uobject(uobj, attrs);
else
- rdma_alloc_abort_uobject(uobj, attrs);
+ rdma_alloc_abort_uobject(uobj, attrs, hw_obj_valid);
break;
default:
WARN_ON(true);
diff --git a/drivers/infiniband/core/rdma_core.h b/drivers/infiniband/core/rdma_core.h
index 33978e0f1262..33706dad6c0f 100644
--- a/drivers/infiniband/core/rdma_core.h
+++ b/drivers/infiniband/core/rdma_core.h
@@ -64,8 +64,8 @@ uverbs_get_uobject_from_file(u16 object_id, enum uverbs_obj_access access,
s64 id, struct uverbs_attr_bundle *attrs);
void uverbs_finalize_object(struct ib_uobject *uobj,
- enum uverbs_obj_access access, bool commit,
- struct uverbs_attr_bundle *attrs);
+ enum uverbs_obj_access access, bool hw_obj_valid,
+ bool commit, struct uverbs_attr_bundle *attrs);
int uverbs_output_written(const struct uverbs_attr_bundle *bundle, size_t idx);
@@ -159,6 +159,9 @@ extern const struct uapi_definition uverbs_def_obj_dm[];
extern const struct uapi_definition uverbs_def_obj_flow_action[];
extern const struct uapi_definition uverbs_def_obj_intf[];
extern const struct uapi_definition uverbs_def_obj_mr[];
+extern const struct uapi_definition uverbs_def_obj_qp[];
+extern const struct uapi_definition uverbs_def_obj_srq[];
+extern const struct uapi_definition uverbs_def_obj_wq[];
extern const struct uapi_definition uverbs_def_write_intf[];
static inline const struct uverbs_api_write_method *
diff --git a/drivers/infiniband/core/rw.c b/drivers/infiniband/core/rw.c
index 557efbf29197..614cff89fc71 100644
--- a/drivers/infiniband/core/rw.c
+++ b/drivers/infiniband/core/rw.c
@@ -129,7 +129,7 @@ static int rdma_rw_init_mr_wrs(struct rdma_rw_ctx *ctx, struct ib_qp *qp,
qp->integrity_en);
int i, j, ret = 0, count = 0;
- ctx->nr_ops = (sg_cnt + pages_per_mr - 1) / pages_per_mr;
+ ctx->nr_ops = DIV_ROUND_UP(sg_cnt, pages_per_mr);
ctx->reg = kcalloc(ctx->nr_ops, sizeof(*ctx->reg), GFP_KERNEL);
if (!ctx->reg) {
ret = -ENOMEM;
diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c
index 74e0058fcf9e..a2ed09a3c714 100644
--- a/drivers/infiniband/core/sa_query.c
+++ b/drivers/infiniband/core/sa_query.c
@@ -174,7 +174,7 @@ static const struct nla_policy ib_nl_policy[LS_NLA_TYPE_MAX] = {
};
-static void ib_sa_add_one(struct ib_device *device);
+static int ib_sa_add_one(struct ib_device *device);
static void ib_sa_remove_one(struct ib_device *device, void *client_data);
static struct ib_client sa_client = {
@@ -190,7 +190,7 @@ static u32 tid;
#define PATH_REC_FIELD(field) \
.struct_offset_bytes = offsetof(struct sa_path_rec, field), \
- .struct_size_bytes = sizeof((struct sa_path_rec *)0)->field, \
+ .struct_size_bytes = sizeof_field(struct sa_path_rec, field), \
.field_name = "sa_path_rec:" #field
static const struct ib_field path_rec_table[] = {
@@ -292,7 +292,7 @@ static const struct ib_field path_rec_table[] = {
.struct_offset_bytes = \
offsetof(struct sa_path_rec, field), \
.struct_size_bytes = \
- sizeof((struct sa_path_rec *)0)->field, \
+ sizeof_field(struct sa_path_rec, field), \
.field_name = "sa_path_rec:" #field
static const struct ib_field opa_path_rec_table[] = {
@@ -420,7 +420,7 @@ static const struct ib_field opa_path_rec_table[] = {
#define MCMEMBER_REC_FIELD(field) \
.struct_offset_bytes = offsetof(struct ib_sa_mcmember_rec, field), \
- .struct_size_bytes = sizeof ((struct ib_sa_mcmember_rec *) 0)->field, \
+ .struct_size_bytes = sizeof_field(struct ib_sa_mcmember_rec, field), \
.field_name = "sa_mcmember_rec:" #field
static const struct ib_field mcmember_rec_table[] = {
@@ -504,7 +504,7 @@ static const struct ib_field mcmember_rec_table[] = {
#define SERVICE_REC_FIELD(field) \
.struct_offset_bytes = offsetof(struct ib_sa_service_rec, field), \
- .struct_size_bytes = sizeof ((struct ib_sa_service_rec *) 0)->field, \
+ .struct_size_bytes = sizeof_field(struct ib_sa_service_rec, field), \
.field_name = "sa_service_rec:" #field
static const struct ib_field service_rec_table[] = {
@@ -552,7 +552,7 @@ static const struct ib_field service_rec_table[] = {
#define CLASSPORTINFO_REC_FIELD(field) \
.struct_offset_bytes = offsetof(struct ib_class_port_info, field), \
- .struct_size_bytes = sizeof((struct ib_class_port_info *)0)->field, \
+ .struct_size_bytes = sizeof_field(struct ib_class_port_info, field), \
.field_name = "ib_class_port_info:" #field
static const struct ib_field ib_classport_info_rec_table[] = {
@@ -630,7 +630,7 @@ static const struct ib_field ib_classport_info_rec_table[] = {
.struct_offset_bytes =\
offsetof(struct opa_class_port_info, field), \
.struct_size_bytes = \
- sizeof((struct opa_class_port_info *)0)->field, \
+ sizeof_field(struct opa_class_port_info, field), \
.field_name = "opa_class_port_info:" #field
static const struct ib_field opa_classport_info_rec_table[] = {
@@ -710,7 +710,7 @@ static const struct ib_field opa_classport_info_rec_table[] = {
#define GUIDINFO_REC_FIELD(field) \
.struct_offset_bytes = offsetof(struct ib_sa_guidinfo_rec, field), \
- .struct_size_bytes = sizeof((struct ib_sa_guidinfo_rec *) 0)->field, \
+ .struct_size_bytes = sizeof_field(struct ib_sa_guidinfo_rec, field), \
.field_name = "sa_guidinfo_rec:" #field
static const struct ib_field guidinfo_rec_table[] = {
@@ -1412,17 +1412,13 @@ void ib_sa_pack_path(struct sa_path_rec *rec, void *attribute)
EXPORT_SYMBOL(ib_sa_pack_path);
static bool ib_sa_opa_pathrecord_support(struct ib_sa_client *client,
- struct ib_device *device,
+ struct ib_sa_device *sa_dev,
u8 port_num)
{
- struct ib_sa_device *sa_dev = ib_get_client_data(device, &sa_client);
struct ib_sa_port *port;
unsigned long flags;
bool ret = false;
- if (!sa_dev)
- return ret;
-
port = &sa_dev->port[port_num - sa_dev->start_port];
spin_lock_irqsave(&port->classport_lock, flags);
if (!port->classport_info.valid)
@@ -1450,8 +1446,8 @@ enum opa_pr_supported {
* query is possible.
*/
static int opa_pr_query_possible(struct ib_sa_client *client,
- struct ib_device *device,
- u8 port_num,
+ struct ib_sa_device *sa_dev,
+ struct ib_device *device, u8 port_num,
struct sa_path_rec *rec)
{
struct ib_port_attr port_attr;
@@ -1459,7 +1455,7 @@ static int opa_pr_query_possible(struct ib_sa_client *client,
if (ib_query_port(device, port_num, &port_attr))
return PR_NOT_SUPPORTED;
- if (ib_sa_opa_pathrecord_support(client, device, port_num))
+ if (ib_sa_opa_pathrecord_support(client, sa_dev, port_num))
return PR_OPA_SUPPORTED;
if (port_attr.lid >= be16_to_cpu(IB_MULTICAST_LID_BASE))
@@ -1574,7 +1570,8 @@ int ib_sa_path_rec_get(struct ib_sa_client *client,
query->sa_query.port = port;
if (rec->rec_type == SA_PATH_REC_TYPE_OPA) {
- status = opa_pr_query_possible(client, device, port_num, rec);
+ status = opa_pr_query_possible(client, sa_dev, device, port_num,
+ rec);
if (status == PR_NOT_SUPPORTED) {
ret = -EINVAL;
goto err1;
@@ -2325,18 +2322,19 @@ static void ib_sa_event(struct ib_event_handler *handler,
}
}
-static void ib_sa_add_one(struct ib_device *device)
+static int ib_sa_add_one(struct ib_device *device)
{
struct ib_sa_device *sa_dev;
int s, e, i;
int count = 0;
+ int ret;
s = rdma_start_port(device);
e = rdma_end_port(device);
sa_dev = kzalloc(struct_size(sa_dev, port, e - s + 1), GFP_KERNEL);
if (!sa_dev)
- return;
+ return -ENOMEM;
sa_dev->start_port = s;
sa_dev->end_port = e;
@@ -2356,8 +2354,10 @@ static void ib_sa_add_one(struct ib_device *device)
ib_register_mad_agent(device, i + s, IB_QPT_GSI,
NULL, 0, send_handler,
recv_handler, sa_dev, 0);
- if (IS_ERR(sa_dev->port[i].agent))
+ if (IS_ERR(sa_dev->port[i].agent)) {
+ ret = PTR_ERR(sa_dev->port[i].agent);
goto err;
+ }
INIT_WORK(&sa_dev->port[i].update_task, update_sm_ah);
INIT_DELAYED_WORK(&sa_dev->port[i].ib_cpi_work,
@@ -2366,8 +2366,10 @@ static void ib_sa_add_one(struct ib_device *device)
count++;
}
- if (!count)
+ if (!count) {
+ ret = -EOPNOTSUPP;
goto free;
+ }
ib_set_client_data(device, &sa_client, sa_dev);
@@ -2386,7 +2388,7 @@ static void ib_sa_add_one(struct ib_device *device)
update_sm_ah(&sa_dev->port[i].update_task);
}
- return;
+ return 0;
err:
while (--i >= 0) {
@@ -2395,7 +2397,7 @@ err:
}
free:
kfree(sa_dev);
- return;
+ return ret;
}
static void ib_sa_remove_one(struct ib_device *device, void *client_data)
@@ -2403,9 +2405,6 @@ static void ib_sa_remove_one(struct ib_device *device, void *client_data)
struct ib_sa_device *sa_dev = client_data;
int i;
- if (!sa_dev)
- return;
-
ib_unregister_event_handler(&sa_dev->event_handler);
flush_workqueue(ib_wq);
diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c
index 087682e6969e..defe9cd4c5ee 100644
--- a/drivers/infiniband/core/sysfs.c
+++ b/drivers/infiniband/core/sysfs.c
@@ -1058,8 +1058,7 @@ static int add_port(struct ib_core_device *coredev, int port_num)
coredev->ports_kobj,
"%d", port_num);
if (ret) {
- kfree(p);
- return ret;
+ goto err_put;
}
p->gid_attr_group = kzalloc(sizeof(*p->gid_attr_group), GFP_KERNEL);
@@ -1072,8 +1071,7 @@ static int add_port(struct ib_core_device *coredev, int port_num)
ret = kobject_init_and_add(&p->gid_attr_group->kobj, &gid_attr_type,
&p->kobj, "gid_attrs");
if (ret) {
- kfree(p->gid_attr_group);
- goto err_put;
+ goto err_put_gid_attrs;
}
if (device->ops.process_mad && is_full_dev) {
@@ -1404,8 +1402,10 @@ int ib_port_register_module_stat(struct ib_device *device, u8 port_num,
ret = kobject_init_and_add(kobj, ktype, &port->kobj, "%s",
name);
- if (ret)
+ if (ret) {
+ kobject_put(kobj);
return ret;
+ }
}
return 0;
diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c
index 16b6cf57fa85..5b87eee8ccc8 100644
--- a/drivers/infiniband/core/ucma.c
+++ b/drivers/infiniband/core/ucma.c
@@ -52,6 +52,7 @@
#include <rdma/rdma_cm_ib.h>
#include <rdma/ib_addr.h>
#include <rdma/ib.h>
+#include <rdma/ib_cm.h>
#include <rdma/rdma_netlink.h>
#include "core_priv.h"
@@ -360,6 +361,9 @@ static int ucma_event_handler(struct rdma_cm_id *cm_id,
ucma_copy_conn_event(&uevent->resp.param.conn,
&event->param.conn);
+ uevent->resp.ece.vendor_id = event->ece.vendor_id;
+ uevent->resp.ece.attr_mod = event->ece.attr_mod;
+
if (event->event == RDMA_CM_EVENT_CONNECT_REQUEST) {
if (!ctx->backlog) {
ret = -ENOMEM;
@@ -404,7 +408,8 @@ static ssize_t ucma_get_event(struct ucma_file *file, const char __user *inbuf,
* Old 32 bit user space does not send the 4 byte padding in the
* reserved field. We don't care, allow it to keep working.
*/
- if (out_len < sizeof(uevent->resp) - sizeof(uevent->resp.reserved))
+ if (out_len < sizeof(uevent->resp) - sizeof(uevent->resp.reserved) -
+ sizeof(uevent->resp.ece))
return -ENOSPC;
if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
@@ -845,7 +850,7 @@ static ssize_t ucma_query_route(struct ucma_file *file,
struct sockaddr *addr;
int ret = 0;
- if (out_len < sizeof(resp))
+ if (out_len < offsetof(struct rdma_ucm_query_route_resp, ibdev_index))
return -ENOSPC;
if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
@@ -869,6 +874,7 @@ static ssize_t ucma_query_route(struct ucma_file *file,
goto out;
resp.node_guid = (__force __u64) ctx->cm_id->device->node_guid;
+ resp.ibdev_index = ctx->cm_id->device->index;
resp.port_num = ctx->cm_id->port_num;
if (rdma_cap_ib_sa(ctx->cm_id->device, ctx->cm_id->port_num))
@@ -880,8 +886,8 @@ static ssize_t ucma_query_route(struct ucma_file *file,
out:
mutex_unlock(&ctx->mutex);
- if (copy_to_user(u64_to_user_ptr(cmd.response),
- &resp, sizeof(resp)))
+ if (copy_to_user(u64_to_user_ptr(cmd.response), &resp,
+ min_t(size_t, out_len, sizeof(resp))))
ret = -EFAULT;
ucma_put_ctx(ctx);
@@ -895,6 +901,7 @@ static void ucma_query_device_addr(struct rdma_cm_id *cm_id,
return;
resp->node_guid = (__force __u64) cm_id->device->node_guid;
+ resp->ibdev_index = cm_id->device->index;
resp->port_num = cm_id->port_num;
resp->pkey = (__force __u16) cpu_to_be16(
ib_addr_get_pkey(&cm_id->route.addr.dev_addr));
@@ -907,7 +914,7 @@ static ssize_t ucma_query_addr(struct ucma_context *ctx,
struct sockaddr *addr;
int ret = 0;
- if (out_len < sizeof(resp))
+ if (out_len < offsetof(struct rdma_ucm_query_addr_resp, ibdev_index))
return -ENOSPC;
memset(&resp, 0, sizeof resp);
@@ -922,7 +929,7 @@ static ssize_t ucma_query_addr(struct ucma_context *ctx,
ucma_query_device_addr(ctx->cm_id, &resp);
- if (copy_to_user(response, &resp, sizeof(resp)))
+ if (copy_to_user(response, &resp, min_t(size_t, out_len, sizeof(resp))))
ret = -EFAULT;
return ret;
@@ -974,7 +981,7 @@ static ssize_t ucma_query_gid(struct ucma_context *ctx,
struct sockaddr_ib *addr;
int ret = 0;
- if (out_len < sizeof(resp))
+ if (out_len < offsetof(struct rdma_ucm_query_addr_resp, ibdev_index))
return -ENOSPC;
memset(&resp, 0, sizeof resp);
@@ -1007,7 +1014,7 @@ static ssize_t ucma_query_gid(struct ucma_context *ctx,
&ctx->cm_id->route.addr.dst_addr);
}
- if (copy_to_user(response, &resp, sizeof(resp)))
+ if (copy_to_user(response, &resp, min_t(size_t, out_len, sizeof(resp))))
ret = -EFAULT;
return ret;
@@ -1070,12 +1077,15 @@ static void ucma_copy_conn_param(struct rdma_cm_id *id,
static ssize_t ucma_connect(struct ucma_file *file, const char __user *inbuf,
int in_len, int out_len)
{
- struct rdma_ucm_connect cmd;
struct rdma_conn_param conn_param;
+ struct rdma_ucm_ece ece = {};
+ struct rdma_ucm_connect cmd;
struct ucma_context *ctx;
+ size_t in_size;
int ret;
- if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
+ in_size = min_t(size_t, in_len, sizeof(cmd));
+ if (copy_from_user(&cmd, inbuf, in_size))
return -EFAULT;
if (!cmd.conn_param.valid)
@@ -1086,8 +1096,13 @@ static ssize_t ucma_connect(struct ucma_file *file, const char __user *inbuf,
return PTR_ERR(ctx);
ucma_copy_conn_param(ctx->cm_id, &conn_param, &cmd.conn_param);
+ if (offsetofend(typeof(cmd), ece) <= in_size) {
+ ece.vendor_id = cmd.ece.vendor_id;
+ ece.attr_mod = cmd.ece.attr_mod;
+ }
+
mutex_lock(&ctx->mutex);
- ret = rdma_connect(ctx->cm_id, &conn_param);
+ ret = rdma_connect_ece(ctx->cm_id, &conn_param, &ece);
mutex_unlock(&ctx->mutex);
ucma_put_ctx(ctx);
return ret;
@@ -1121,28 +1136,36 @@ static ssize_t ucma_accept(struct ucma_file *file, const char __user *inbuf,
{
struct rdma_ucm_accept cmd;
struct rdma_conn_param conn_param;
+ struct rdma_ucm_ece ece = {};
struct ucma_context *ctx;
+ size_t in_size;
int ret;
- if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
+ in_size = min_t(size_t, in_len, sizeof(cmd));
+ if (copy_from_user(&cmd, inbuf, in_size))
return -EFAULT;
ctx = ucma_get_ctx_dev(file, cmd.id);
if (IS_ERR(ctx))
return PTR_ERR(ctx);
+ if (offsetofend(typeof(cmd), ece) <= in_size) {
+ ece.vendor_id = cmd.ece.vendor_id;
+ ece.attr_mod = cmd.ece.attr_mod;
+ }
+
if (cmd.conn_param.valid) {
ucma_copy_conn_param(ctx->cm_id, &conn_param, &cmd.conn_param);
mutex_lock(&file->mut);
mutex_lock(&ctx->mutex);
- ret = __rdma_accept(ctx->cm_id, &conn_param, NULL);
+ ret = __rdma_accept_ece(ctx->cm_id, &conn_param, NULL, &ece);
mutex_unlock(&ctx->mutex);
if (!ret)
ctx->uid = cmd.uid;
mutex_unlock(&file->mut);
} else {
mutex_lock(&ctx->mutex);
- ret = __rdma_accept(ctx->cm_id, NULL, NULL);
+ ret = __rdma_accept_ece(ctx->cm_id, NULL, NULL, &ece);
mutex_unlock(&ctx->mutex);
}
ucma_put_ctx(ctx);
@@ -1159,12 +1182,24 @@ static ssize_t ucma_reject(struct ucma_file *file, const char __user *inbuf,
if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
return -EFAULT;
+ if (!cmd.reason)
+ cmd.reason = IB_CM_REJ_CONSUMER_DEFINED;
+
+ switch (cmd.reason) {
+ case IB_CM_REJ_CONSUMER_DEFINED:
+ case IB_CM_REJ_VENDOR_OPTION_NOT_SUPPORTED:
+ break;
+ default:
+ return -EINVAL;
+ }
+
ctx = ucma_get_ctx_dev(file, cmd.id);
if (IS_ERR(ctx))
return PTR_ERR(ctx);
mutex_lock(&ctx->mutex);
- ret = rdma_reject(ctx->cm_id, cmd.private_data, cmd.private_data_len);
+ ret = rdma_reject(ctx->cm_id, cmd.private_data, cmd.private_data_len,
+ cmd.reason);
mutex_unlock(&ctx->mutex);
ucma_put_ctx(ctx);
return ret;
diff --git a/drivers/infiniband/core/ud_header.c b/drivers/infiniband/core/ud_header.c
index 29a45d2f8898..d65d541b9a25 100644
--- a/drivers/infiniband/core/ud_header.c
+++ b/drivers/infiniband/core/ud_header.c
@@ -41,7 +41,7 @@
#define STRUCT_FIELD(header, field) \
.struct_offset_bytes = offsetof(struct ib_unpacked_ ## header, field), \
- .struct_size_bytes = sizeof ((struct ib_unpacked_ ## header *) 0)->field, \
+ .struct_size_bytes = sizeof_field(struct ib_unpacked_ ## header, field), \
.field_name = #header ":" #field
static const struct ib_field lrh_table[] = {
diff --git a/drivers/infiniband/core/user_mad.c b/drivers/infiniband/core/user_mad.c
index da229eab5903..b0d0b522cc76 100644
--- a/drivers/infiniband/core/user_mad.c
+++ b/drivers/infiniband/core/user_mad.c
@@ -142,7 +142,7 @@ static dev_t dynamic_issm_dev;
static DEFINE_IDA(umad_ida);
-static void ib_umad_add_one(struct ib_device *device);
+static int ib_umad_add_one(struct ib_device *device);
static void ib_umad_remove_one(struct ib_device *device, void *client_data);
static void ib_umad_dev_free(struct kref *kref)
@@ -1352,37 +1352,41 @@ static void ib_umad_kill_port(struct ib_umad_port *port)
put_device(&port->dev);
}
-static void ib_umad_add_one(struct ib_device *device)
+static int ib_umad_add_one(struct ib_device *device)
{
struct ib_umad_device *umad_dev;
int s, e, i;
int count = 0;
+ int ret;
s = rdma_start_port(device);
e = rdma_end_port(device);
umad_dev = kzalloc(struct_size(umad_dev, ports, e - s + 1), GFP_KERNEL);
if (!umad_dev)
- return;
+ return -ENOMEM;
kref_init(&umad_dev->kref);
for (i = s; i <= e; ++i) {
if (!rdma_cap_ib_mad(device, i))
continue;
- if (ib_umad_init_port(device, i, umad_dev,
- &umad_dev->ports[i - s]))
+ ret = ib_umad_init_port(device, i, umad_dev,
+ &umad_dev->ports[i - s]);
+ if (ret)
goto err;
count++;
}
- if (!count)
+ if (!count) {
+ ret = -EOPNOTSUPP;
goto free;
+ }
ib_set_client_data(device, &umad_client, umad_dev);
- return;
+ return 0;
err:
while (--i >= s) {
@@ -1394,6 +1398,7 @@ err:
free:
/* balances kref_init */
ib_umad_dev_put(umad_dev);
+ return ret;
}
static void ib_umad_remove_one(struct ib_device *device, void *client_data)
@@ -1401,9 +1406,6 @@ static void ib_umad_remove_one(struct ib_device *device, void *client_data)
struct ib_umad_device *umad_dev = client_data;
unsigned int i;
- if (!umad_dev)
- return;
-
rdma_for_each_port (device, i) {
if (rdma_cap_ib_mad(device, i))
ib_umad_kill_port(
diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h
index 3d189c7ee59e..53a10479958b 100644
--- a/drivers/infiniband/core/uverbs.h
+++ b/drivers/infiniband/core/uverbs.h
@@ -142,7 +142,7 @@ struct ib_uverbs_file {
* ucontext_lock held
*/
struct ib_ucontext *ucontext;
- struct ib_uverbs_async_event_file *async_file;
+ struct ib_uverbs_async_event_file *default_async_file;
struct list_head list;
/*
@@ -180,6 +180,7 @@ struct ib_uverbs_mcast_entry {
struct ib_uevent_object {
struct ib_uobject uobject;
+ struct ib_uverbs_async_event_file *event_file;
/* List member for ib_uverbs_async_event_file list */
struct list_head event_list;
u32 events_reported;
@@ -296,6 +297,24 @@ static inline u32 make_port_cap_flags(const struct ib_port_attr *attr)
return res;
}
+static inline struct ib_uverbs_async_event_file *
+ib_uverbs_get_async_event(struct uverbs_attr_bundle *attrs,
+ u16 id)
+{
+ struct ib_uobject *async_ev_file_uobj;
+ struct ib_uverbs_async_event_file *async_ev_file;
+
+ async_ev_file_uobj = uverbs_attr_get_uobject(attrs, id);
+ if (IS_ERR(async_ev_file_uobj))
+ async_ev_file = READ_ONCE(attrs->ufile->default_async_file);
+ else
+ async_ev_file = container_of(async_ev_file_uobj,
+ struct ib_uverbs_async_event_file,
+ uobj);
+ if (async_ev_file)
+ uverbs_uobject_get(&async_ev_file->uobj);
+ return async_ev_file;
+}
void copy_port_attr_to_resp(struct ib_port_attr *attr,
struct ib_uverbs_query_port_resp *resp,
diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
index 060b4ebbd2ba..b48b3f6e632d 100644
--- a/drivers/infiniband/core/uverbs_cmd.c
+++ b/drivers/infiniband/core/uverbs_cmd.c
@@ -311,7 +311,7 @@ static int ib_uverbs_get_context(struct uverbs_attr_bundle *attrs)
return 0;
err_uobj:
- rdma_alloc_abort_uobject(uobj, attrs);
+ rdma_alloc_abort_uobject(uobj, attrs, false);
err_ucontext:
kfree(attrs->context);
attrs->context = NULL;
@@ -356,8 +356,6 @@ static void copy_query_dev_fields(struct ib_ucontext *ucontext,
resp->max_mcast_qp_attach = attr->max_mcast_qp_attach;
resp->max_total_mcast_qp_attach = attr->max_total_mcast_qp_attach;
resp->max_ah = attr->max_ah;
- resp->max_fmr = attr->max_fmr;
- resp->max_map_per_fmr = attr->max_map_per_fmr;
resp->max_srq = attr->max_srq;
resp->max_srq_wr = attr->max_srq_wr;
resp->max_srq_sge = attr->max_srq_sge;
@@ -1051,6 +1049,10 @@ static struct ib_ucq_object *create_cq(struct uverbs_attr_bundle *attrs,
goto err_free;
obj->uevent.uobject.object = cq;
+ obj->uevent.event_file = READ_ONCE(attrs->ufile->default_async_file);
+ if (obj->uevent.event_file)
+ uverbs_uobject_get(&obj->uevent.event_file->uobj);
+
memset(&resp, 0, sizeof resp);
resp.base.cq_handle = obj->uevent.uobject.id;
resp.base.cqe = cq->cqe;
@@ -1067,6 +1069,8 @@ static struct ib_ucq_object *create_cq(struct uverbs_attr_bundle *attrs,
return obj;
err_cb:
+ if (obj->uevent.event_file)
+ uverbs_uobject_put(&obj->uevent.event_file->uobj);
ib_destroy_cq_user(cq, uverbs_get_cleared_udata(attrs));
cq = NULL;
err_free:
@@ -1460,6 +1464,9 @@ static int create_qp(struct uverbs_attr_bundle *attrs,
}
obj->uevent.uobject.object = qp;
+ obj->uevent.event_file = READ_ONCE(attrs->ufile->default_async_file);
+ if (obj->uevent.event_file)
+ uverbs_uobject_get(&obj->uevent.event_file->uobj);
memset(&resp, 0, sizeof resp);
resp.base.qpn = qp->qp_num;
@@ -1473,7 +1480,7 @@ static int create_qp(struct uverbs_attr_bundle *attrs,
ret = uverbs_response(attrs, &resp, sizeof(resp));
if (ret)
- goto err_cb;
+ goto err_uevent;
if (xrcd) {
obj->uxrcd = container_of(xrcd_uobj, struct ib_uxrcd_object,
@@ -1498,6 +1505,9 @@ static int create_qp(struct uverbs_attr_bundle *attrs,
rdma_alloc_commit_uobject(&obj->uevent.uobject, attrs);
return 0;
+err_uevent:
+ if (obj->uevent.event_file)
+ uverbs_uobject_put(&obj->uevent.event_file->uobj);
err_cb:
ib_destroy_qp_user(qp, uverbs_get_cleared_udata(attrs));
@@ -2954,11 +2964,11 @@ static int ib_uverbs_ex_create_wq(struct uverbs_attr_bundle *attrs)
wq_init_attr.cq = cq;
wq_init_attr.max_sge = cmd.max_sge;
wq_init_attr.max_wr = cmd.max_wr;
- wq_init_attr.wq_context = attrs->ufile;
wq_init_attr.wq_type = cmd.wq_type;
wq_init_attr.event_handler = ib_uverbs_wq_event_handler;
wq_init_attr.create_flags = cmd.create_flags;
INIT_LIST_HEAD(&obj->uevent.event_list);
+ obj->uevent.uobject.user_handle = cmd.user_handle;
wq = pd->device->ops.create_wq(pd, &wq_init_attr, &attrs->driver_udata);
if (IS_ERR(wq)) {
@@ -2972,12 +2982,12 @@ static int ib_uverbs_ex_create_wq(struct uverbs_attr_bundle *attrs)
wq->cq = cq;
wq->pd = pd;
wq->device = pd->device;
- wq->wq_context = wq_init_attr.wq_context;
atomic_set(&wq->usecnt, 0);
atomic_inc(&pd->usecnt);
atomic_inc(&cq->usecnt);
- wq->uobject = obj;
- obj->uevent.uobject.object = wq;
+ obj->uevent.event_file = READ_ONCE(attrs->ufile->default_async_file);
+ if (obj->uevent.event_file)
+ uverbs_uobject_get(&obj->uevent.event_file->uobj);
memset(&resp, 0, sizeof(resp));
resp.wq_handle = obj->uevent.uobject.id;
@@ -2996,6 +3006,8 @@ static int ib_uverbs_ex_create_wq(struct uverbs_attr_bundle *attrs)
return 0;
err_copy:
+ if (obj->uevent.event_file)
+ uverbs_uobject_put(&obj->uevent.event_file->uobj);
ib_destroy_wq(wq, uverbs_get_cleared_udata(attrs));
err_put_cq:
rdma_lookup_put_uobject(&cq->uobject->uevent.uobject,
@@ -3441,46 +3453,25 @@ static int __uverbs_create_xsrq(struct uverbs_attr_bundle *attrs,
}
attr.event_handler = ib_uverbs_srq_event_handler;
- attr.srq_context = attrs->ufile;
attr.srq_type = cmd->srq_type;
attr.attr.max_wr = cmd->max_wr;
attr.attr.max_sge = cmd->max_sge;
attr.attr.srq_limit = cmd->srq_limit;
INIT_LIST_HEAD(&obj->uevent.event_list);
+ obj->uevent.uobject.user_handle = cmd->user_handle;
- srq = rdma_zalloc_drv_obj(ib_dev, ib_srq);
- if (!srq) {
- ret = -ENOMEM;
- goto err_put;
- }
-
- srq->device = pd->device;
- srq->pd = pd;
- srq->srq_type = cmd->srq_type;
- srq->uobject = obj;
- srq->event_handler = attr.event_handler;
- srq->srq_context = attr.srq_context;
-
- ret = pd->device->ops.create_srq(srq, &attr, udata);
- if (ret)
- goto err_free;
-
- if (ib_srq_has_cq(cmd->srq_type)) {
- srq->ext.cq = attr.ext.cq;
- atomic_inc(&attr.ext.cq->usecnt);
- }
-
- if (cmd->srq_type == IB_SRQT_XRC) {
- srq->ext.xrc.xrcd = attr.ext.xrc.xrcd;
- atomic_inc(&attr.ext.xrc.xrcd->usecnt);
+ srq = ib_create_srq_user(pd, &attr, obj, udata);
+ if (IS_ERR(srq)) {
+ ret = PTR_ERR(srq);
+ goto err_put_pd;
}
- atomic_inc(&pd->usecnt);
- atomic_set(&srq->usecnt, 0);
-
obj->uevent.uobject.object = srq;
obj->uevent.uobject.user_handle = cmd->user_handle;
+ obj->uevent.event_file = READ_ONCE(attrs->ufile->default_async_file);
+ if (obj->uevent.event_file)
+ uverbs_uobject_get(&obj->uevent.event_file->uobj);
memset(&resp, 0, sizeof resp);
resp.srq_handle = obj->uevent.uobject.id;
@@ -3505,14 +3496,11 @@ static int __uverbs_create_xsrq(struct uverbs_attr_bundle *attrs,
return 0;
err_copy:
+ if (obj->uevent.event_file)
+ uverbs_uobject_put(&obj->uevent.event_file->uobj);
ib_destroy_srq_user(srq, uverbs_get_cleared_udata(attrs));
- /* It was released in ib_destroy_srq_user */
- srq = NULL;
-err_free:
- kfree(srq);
-err_put:
+err_put_pd:
uobj_put_obj_read(pd);
-
err_put_cq:
if (ib_srq_has_cq(cmd->srq_type))
rdma_lookup_put_uobject(&attr.ext.cq->uobject->uevent.uobject,
@@ -3751,7 +3739,7 @@ static int ib_uverbs_ex_modify_cq(struct uverbs_attr_bundle *attrs)
#define UAPI_DEF_WRITE_IO(req, resp) \
.write.has_resp = 1 + \
BUILD_BUG_ON_ZERO(offsetof(req, response) != 0) + \
- BUILD_BUG_ON_ZERO(sizeof(((req *)0)->response) != \
+ BUILD_BUG_ON_ZERO(sizeof_field(req, response) != \
sizeof(u64)), \
.write.req_size = sizeof(req), .write.resp_size = sizeof(resp)
diff --git a/drivers/infiniband/core/uverbs_ioctl.c b/drivers/infiniband/core/uverbs_ioctl.c
index 538affbc517e..2d882c02387c 100644
--- a/drivers/infiniband/core/uverbs_ioctl.c
+++ b/drivers/infiniband/core/uverbs_ioctl.c
@@ -58,6 +58,7 @@ struct bundle_priv {
DECLARE_BITMAP(uobj_finalize, UVERBS_API_ATTR_BKEY_LEN);
DECLARE_BITMAP(spec_finalize, UVERBS_API_ATTR_BKEY_LEN);
+ DECLARE_BITMAP(uobj_hw_obj_valid, UVERBS_API_ATTR_BKEY_LEN);
/*
* Must be last. bundle ends in a flex array which overlaps
@@ -136,7 +137,7 @@ EXPORT_SYMBOL(_uverbs_alloc);
static bool uverbs_is_attr_cleared(const struct ib_uverbs_attr *uattr,
u16 len)
{
- if (uattr->len > sizeof(((struct ib_uverbs_attr *)0)->data))
+ if (uattr->len > sizeof_field(struct ib_uverbs_attr, data))
return ib_is_buffer_cleared(u64_to_user_ptr(uattr->data) + len,
uattr->len - len);
@@ -230,7 +231,8 @@ static void uverbs_free_idrs_array(const struct uverbs_api_attr *attr_uapi,
for (i = 0; i != attr->len; i++)
uverbs_finalize_object(attr->uobjects[i],
- spec->u2.objs_arr.access, commit, attrs);
+ spec->u2.objs_arr.access, false, commit,
+ attrs);
}
static int uverbs_process_attr(struct bundle_priv *pbundle,
@@ -502,7 +504,9 @@ static void bundle_destroy(struct bundle_priv *pbundle, bool commit)
uverbs_finalize_object(
attr->obj_attr.uobject,
- attr->obj_attr.attr_elm->spec.u.obj.access, commit,
+ attr->obj_attr.attr_elm->spec.u.obj.access,
+ test_bit(i, pbundle->uobj_hw_obj_valid),
+ commit,
&pbundle->bundle);
}
@@ -590,6 +594,8 @@ static int ib_uverbs_cmd_verbs(struct ib_uverbs_file *ufile,
sizeof(pbundle->bundle.attr_present));
memset(pbundle->uobj_finalize, 0, sizeof(pbundle->uobj_finalize));
memset(pbundle->spec_finalize, 0, sizeof(pbundle->spec_finalize));
+ memset(pbundle->uobj_hw_obj_valid, 0,
+ sizeof(pbundle->uobj_hw_obj_valid));
ret = ib_uverbs_run_method(pbundle, hdr->num_attrs);
bundle_destroy(pbundle, ret == 0);
@@ -784,3 +790,15 @@ int uverbs_copy_to_struct_or_zero(const struct uverbs_attr_bundle *bundle,
}
return uverbs_copy_to(bundle, idx, from, size);
}
+
+/* Once called an abort will call through to the type's destroy_hw() */
+void uverbs_finalize_uobj_create(const struct uverbs_attr_bundle *bundle,
+ u16 idx)
+{
+ struct bundle_priv *pbundle =
+ container_of(bundle, struct bundle_priv, bundle);
+
+ __set_bit(uapi_bkey_attr(uapi_key_attr(idx)),
+ pbundle->uobj_hw_obj_valid);
+}
+EXPORT_SYMBOL(uverbs_finalize_uobj_create);
diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c
index 1bab8de14757..47794c85e9af 100644
--- a/drivers/infiniband/core/uverbs_main.c
+++ b/drivers/infiniband/core/uverbs_main.c
@@ -75,7 +75,7 @@ static dev_t dynamic_uverbs_dev;
static struct class *uverbs_class;
static DEFINE_IDA(uverbs_ida);
-static void ib_uverbs_add_one(struct ib_device *device);
+static int ib_uverbs_add_one(struct ib_device *device);
static void ib_uverbs_remove_one(struct ib_device *device, void *client_data);
/*
@@ -146,8 +146,7 @@ void ib_uverbs_release_ucq(struct ib_uverbs_completion_event_file *ev_file,
void ib_uverbs_release_uevent(struct ib_uevent_object *uobj)
{
- struct ib_uverbs_async_event_file *async_file =
- READ_ONCE(uobj->uobject.ufile->async_file);
+ struct ib_uverbs_async_event_file *async_file = uobj->event_file;
struct ib_uverbs_event *evt, *tmp;
if (!async_file)
@@ -159,6 +158,7 @@ void ib_uverbs_release_uevent(struct ib_uevent_object *uobj)
kfree(evt);
}
spin_unlock_irq(&async_file->ev_queue.lock);
+ uverbs_uobject_put(&async_file->uobj);
}
void ib_uverbs_detach_umcast(struct ib_qp *qp,
@@ -197,8 +197,8 @@ void ib_uverbs_release_file(struct kref *ref)
if (atomic_dec_and_test(&file->device->refcount))
ib_uverbs_comp_dev(file->device);
- if (file->async_file)
- uverbs_uobject_put(&file->async_file->uobj);
+ if (file->default_async_file)
+ uverbs_uobject_put(&file->default_async_file->uobj);
put_device(&file->device->dev);
if (file->disassociate_page)
@@ -296,6 +296,8 @@ static __poll_t ib_uverbs_event_poll(struct ib_uverbs_event_queue *ev_queue,
spin_lock_irq(&ev_queue->lock);
if (!list_empty(&ev_queue->event_list))
pollflags = EPOLLIN | EPOLLRDNORM;
+ else if (ev_queue->is_closed)
+ pollflags = EPOLLERR;
spin_unlock_irq(&ev_queue->lock);
return pollflags;
@@ -425,7 +427,7 @@ void ib_uverbs_async_handler(struct ib_uverbs_async_event_file *async_file,
static void uverbs_uobj_event(struct ib_uevent_object *eobj,
struct ib_event *event)
{
- ib_uverbs_async_handler(READ_ONCE(eobj->uobject.ufile->async_file),
+ ib_uverbs_async_handler(eobj->event_file,
eobj->uobject.user_handle, event->event,
&eobj->event_list, &eobj->events_reported);
}
@@ -482,10 +484,10 @@ void ib_uverbs_init_async_event_file(
/* The first async_event_file becomes the default one for the file. */
mutex_lock(&uverbs_file->ucontext_lock);
- if (!uverbs_file->async_file) {
+ if (!uverbs_file->default_async_file) {
/* Pairs with the put in ib_uverbs_release_file */
uverbs_uobject_get(&async_file->uobj);
- smp_store_release(&uverbs_file->async_file, async_file);
+ smp_store_release(&uverbs_file->default_async_file, async_file);
}
mutex_unlock(&uverbs_file->ucontext_lock);
@@ -1092,7 +1094,7 @@ static int ib_uverbs_create_uapi(struct ib_device *device,
return 0;
}
-static void ib_uverbs_add_one(struct ib_device *device)
+static int ib_uverbs_add_one(struct ib_device *device)
{
int devnum;
dev_t base;
@@ -1100,16 +1102,16 @@ static void ib_uverbs_add_one(struct ib_device *device)
int ret;
if (!device->ops.alloc_ucontext)
- return;
+ return -EOPNOTSUPP;
uverbs_dev = kzalloc(sizeof(*uverbs_dev), GFP_KERNEL);
if (!uverbs_dev)
- return;
+ return -ENOMEM;
ret = init_srcu_struct(&uverbs_dev->disassociate_srcu);
if (ret) {
kfree(uverbs_dev);
- return;
+ return -ENOMEM;
}
device_initialize(&uverbs_dev->dev);
@@ -1129,15 +1131,18 @@ static void ib_uverbs_add_one(struct ib_device *device)
devnum = ida_alloc_max(&uverbs_ida, IB_UVERBS_MAX_DEVICES - 1,
GFP_KERNEL);
- if (devnum < 0)
+ if (devnum < 0) {
+ ret = -ENOMEM;
goto err;
+ }
uverbs_dev->devnum = devnum;
if (devnum >= IB_UVERBS_NUM_FIXED_MINOR)
base = dynamic_uverbs_dev + devnum - IB_UVERBS_NUM_FIXED_MINOR;
else
base = IB_UVERBS_BASE_DEV + devnum;
- if (ib_uverbs_create_uapi(device, uverbs_dev))
+ ret = ib_uverbs_create_uapi(device, uverbs_dev);
+ if (ret)
goto err_uapi;
uverbs_dev->dev.devt = base;
@@ -1152,7 +1157,7 @@ static void ib_uverbs_add_one(struct ib_device *device)
goto err_uapi;
ib_set_client_data(device, &uverbs_client, uverbs_dev);
- return;
+ return 0;
err_uapi:
ida_free(&uverbs_ida, devnum);
@@ -1161,7 +1166,7 @@ err:
ib_uverbs_comp_dev(uverbs_dev);
wait_for_completion(&uverbs_dev->comp);
put_device(&uverbs_dev->dev);
- return;
+ return ret;
}
static void ib_uverbs_free_hw_resources(struct ib_uverbs_device *uverbs_dev,
@@ -1201,9 +1206,6 @@ static void ib_uverbs_remove_one(struct ib_device *device, void *client_data)
struct ib_uverbs_device *uverbs_dev = client_data;
int wait_clients = 1;
- if (!uverbs_dev)
- return;
-
cdev_device_del(&uverbs_dev->cdev, &uverbs_dev->dev);
ida_free(&uverbs_ida, uverbs_dev->devnum);
diff --git a/drivers/infiniband/core/uverbs_std_types.c b/drivers/infiniband/core/uverbs_std_types.c
index 3abfc63225cb..08c39cfb1bd9 100644
--- a/drivers/infiniband/core/uverbs_std_types.c
+++ b/drivers/infiniband/core/uverbs_std_types.c
@@ -75,40 +75,6 @@ static int uverbs_free_mw(struct ib_uobject *uobject,
return uverbs_dealloc_mw((struct ib_mw *)uobject->object);
}
-static int uverbs_free_qp(struct ib_uobject *uobject,
- enum rdma_remove_reason why,
- struct uverbs_attr_bundle *attrs)
-{
- struct ib_qp *qp = uobject->object;
- struct ib_uqp_object *uqp =
- container_of(uobject, struct ib_uqp_object, uevent.uobject);
- int ret;
-
- /*
- * If this is a user triggered destroy then do not allow destruction
- * until the user cleans up all the mcast bindings. Unlike in other
- * places we forcibly clean up the mcast attachments for !DESTROY
- * because the mcast attaches are not ubojects and will not be
- * destroyed by anything else during cleanup processing.
- */
- if (why == RDMA_REMOVE_DESTROY) {
- if (!list_empty(&uqp->mcast_list))
- return -EBUSY;
- } else if (qp == qp->real_qp) {
- ib_uverbs_detach_umcast(qp, uqp);
- }
-
- ret = ib_destroy_qp_user(qp, &attrs->driver_udata);
- if (ib_is_destroy_retryable(ret, why, uobject))
- return ret;
-
- if (uqp->uxrcd)
- atomic_dec(&uqp->uxrcd->refcnt);
-
- ib_uverbs_release_uevent(&uqp->uevent);
- return ret;
-}
-
static int uverbs_free_rwq_ind_tbl(struct ib_uobject *uobject,
enum rdma_remove_reason why,
struct uverbs_attr_bundle *attrs)
@@ -125,48 +91,6 @@ static int uverbs_free_rwq_ind_tbl(struct ib_uobject *uobject,
return ret;
}
-static int uverbs_free_wq(struct ib_uobject *uobject,
- enum rdma_remove_reason why,
- struct uverbs_attr_bundle *attrs)
-{
- struct ib_wq *wq = uobject->object;
- struct ib_uwq_object *uwq =
- container_of(uobject, struct ib_uwq_object, uevent.uobject);
- int ret;
-
- ret = ib_destroy_wq(wq, &attrs->driver_udata);
- if (ib_is_destroy_retryable(ret, why, uobject))
- return ret;
-
- ib_uverbs_release_uevent(&uwq->uevent);
- return ret;
-}
-
-static int uverbs_free_srq(struct ib_uobject *uobject,
- enum rdma_remove_reason why,
- struct uverbs_attr_bundle *attrs)
-{
- struct ib_srq *srq = uobject->object;
- struct ib_uevent_object *uevent =
- container_of(uobject, struct ib_uevent_object, uobject);
- enum ib_srq_type srq_type = srq->srq_type;
- int ret;
-
- ret = ib_destroy_srq_user(srq, &attrs->driver_udata);
- if (ib_is_destroy_retryable(ret, why, uobject))
- return ret;
-
- if (srq_type == IB_SRQT_XRC) {
- struct ib_usrq_object *us =
- container_of(uevent, struct ib_usrq_object, uevent);
-
- atomic_dec(&us->uxrcd->refcnt);
- }
-
- ib_uverbs_release_uevent(uevent);
- return ret;
-}
-
static int uverbs_free_xrcd(struct ib_uobject *uobject,
enum rdma_remove_reason why,
struct uverbs_attr_bundle *attrs)
@@ -252,10 +176,6 @@ DECLARE_UVERBS_NAMED_OBJECT(
"[infinibandevent]",
O_RDONLY));
-DECLARE_UVERBS_NAMED_OBJECT(
- UVERBS_OBJECT_QP,
- UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_uqp_object), uverbs_free_qp));
-
DECLARE_UVERBS_NAMED_METHOD_DESTROY(
UVERBS_METHOD_MW_DESTROY,
UVERBS_ATTR_IDR(UVERBS_ATTR_DESTROY_MW_HANDLE,
@@ -267,11 +187,6 @@ DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_MW,
UVERBS_TYPE_ALLOC_IDR(uverbs_free_mw),
&UVERBS_METHOD(UVERBS_METHOD_MW_DESTROY));
-DECLARE_UVERBS_NAMED_OBJECT(
- UVERBS_OBJECT_SRQ,
- UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_usrq_object),
- uverbs_free_srq));
-
DECLARE_UVERBS_NAMED_METHOD_DESTROY(
UVERBS_METHOD_AH_DESTROY,
UVERBS_ATTR_IDR(UVERBS_ATTR_DESTROY_AH_HANDLE,
@@ -296,10 +211,6 @@ DECLARE_UVERBS_NAMED_OBJECT(
uverbs_free_flow),
&UVERBS_METHOD(UVERBS_METHOD_FLOW_DESTROY));
-DECLARE_UVERBS_NAMED_OBJECT(
- UVERBS_OBJECT_WQ,
- UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_uwq_object), uverbs_free_wq));
-
DECLARE_UVERBS_NAMED_METHOD_DESTROY(
UVERBS_METHOD_RWQ_IND_TBL_DESTROY,
UVERBS_ATTR_IDR(UVERBS_ATTR_DESTROY_RWQ_IND_TBL_HANDLE,
@@ -340,18 +251,12 @@ const struct uapi_definition uverbs_def_obj_intf[] = {
UAPI_DEF_OBJ_NEEDS_FN(dealloc_pd)),
UAPI_DEF_CHAIN_OBJ_TREE_NAMED(UVERBS_OBJECT_COMP_CHANNEL,
UAPI_DEF_OBJ_NEEDS_FN(dealloc_pd)),
- UAPI_DEF_CHAIN_OBJ_TREE_NAMED(UVERBS_OBJECT_QP,
- UAPI_DEF_OBJ_NEEDS_FN(destroy_qp)),
UAPI_DEF_CHAIN_OBJ_TREE_NAMED(UVERBS_OBJECT_AH,
UAPI_DEF_OBJ_NEEDS_FN(destroy_ah)),
UAPI_DEF_CHAIN_OBJ_TREE_NAMED(UVERBS_OBJECT_MW,
UAPI_DEF_OBJ_NEEDS_FN(dealloc_mw)),
- UAPI_DEF_CHAIN_OBJ_TREE_NAMED(UVERBS_OBJECT_SRQ,
- UAPI_DEF_OBJ_NEEDS_FN(destroy_srq)),
UAPI_DEF_CHAIN_OBJ_TREE_NAMED(UVERBS_OBJECT_FLOW,
UAPI_DEF_OBJ_NEEDS_FN(destroy_flow)),
- UAPI_DEF_CHAIN_OBJ_TREE_NAMED(UVERBS_OBJECT_WQ,
- UAPI_DEF_OBJ_NEEDS_FN(destroy_wq)),
UAPI_DEF_CHAIN_OBJ_TREE_NAMED(
UVERBS_OBJECT_RWQ_IND_TBL,
UAPI_DEF_OBJ_NEEDS_FN(destroy_rwq_ind_table)),
diff --git a/drivers/infiniband/core/uverbs_std_types_cq.c b/drivers/infiniband/core/uverbs_std_types_cq.c
index da4110a0eea2..5dce2c7cc323 100644
--- a/drivers/infiniband/core/uverbs_std_types_cq.c
+++ b/drivers/infiniband/core/uverbs_std_types_cq.c
@@ -100,6 +100,9 @@ static int UVERBS_HANDLER(UVERBS_METHOD_CQ_CREATE)(
uverbs_uobject_get(ev_file_uobj);
}
+ obj->uevent.event_file = ib_uverbs_get_async_event(
+ attrs, UVERBS_ATTR_CREATE_CQ_EVENT_FD);
+
if (attr.comp_vector >= attrs->ufile->device->num_comp_vectors) {
ret = -EINVAL;
goto err_event_file;
@@ -129,19 +132,17 @@ static int UVERBS_HANDLER(UVERBS_METHOD_CQ_CREATE)(
obj->uevent.uobject.object = cq;
obj->uevent.uobject.user_handle = user_handle;
rdma_restrack_uadd(&cq->res);
+ uverbs_finalize_uobj_create(attrs, UVERBS_ATTR_CREATE_CQ_HANDLE);
ret = uverbs_copy_to(attrs, UVERBS_ATTR_CREATE_CQ_RESP_CQE, &cq->cqe,
sizeof(cq->cqe));
- if (ret)
- goto err_cq;
+ return ret;
- return 0;
-err_cq:
- ib_destroy_cq_user(cq, uverbs_get_cleared_udata(attrs));
- cq = NULL;
err_free:
kfree(cq);
err_event_file:
+ if (obj->uevent.event_file)
+ uverbs_uobject_put(&obj->uevent.event_file->uobj);
if (ev_file)
uverbs_uobject_put(ev_file_uobj);
return ret;
@@ -171,6 +172,10 @@ DECLARE_UVERBS_NAMED_METHOD(
UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_CREATE_CQ_RESP_CQE,
UVERBS_ATTR_TYPE(u32),
UA_MANDATORY),
+ UVERBS_ATTR_FD(UVERBS_ATTR_CREATE_CQ_EVENT_FD,
+ UVERBS_OBJECT_ASYNC_EVENT,
+ UVERBS_ACCESS_READ,
+ UA_OPTIONAL),
UVERBS_ATTR_UHW());
static int UVERBS_HANDLER(UVERBS_METHOD_CQ_DESTROY)(
diff --git a/drivers/infiniband/core/uverbs_std_types_mr.c b/drivers/infiniband/core/uverbs_std_types_mr.c
index c1286a52dc84..a2722ef8496e 100644
--- a/drivers/infiniband/core/uverbs_std_types_mr.c
+++ b/drivers/infiniband/core/uverbs_std_types_mr.c
@@ -136,21 +136,15 @@ static int UVERBS_HANDLER(UVERBS_METHOD_DM_MR_REG)(
uobj->object = mr;
+ uverbs_finalize_uobj_create(attrs, UVERBS_ATTR_REG_DM_MR_HANDLE);
+
ret = uverbs_copy_to(attrs, UVERBS_ATTR_REG_DM_MR_RESP_LKEY, &mr->lkey,
sizeof(mr->lkey));
if (ret)
- goto err_dereg;
+ return ret;
ret = uverbs_copy_to(attrs, UVERBS_ATTR_REG_DM_MR_RESP_RKEY,
&mr->rkey, sizeof(mr->rkey));
- if (ret)
- goto err_dereg;
-
- return 0;
-
-err_dereg:
- ib_dereg_mr_user(mr, uverbs_get_cleared_udata(attrs));
-
return ret;
}
diff --git a/drivers/infiniband/core/uverbs_std_types_qp.c b/drivers/infiniband/core/uverbs_std_types_qp.c
new file mode 100644
index 000000000000..3bf8dcdfe7eb
--- /dev/null
+++ b/drivers/infiniband/core/uverbs_std_types_qp.c
@@ -0,0 +1,401 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/*
+ * Copyright (c) 2020, Mellanox Technologies inc. All rights reserved.
+ */
+
+#include <rdma/uverbs_std_types.h>
+#include "rdma_core.h"
+#include "uverbs.h"
+#include "core_priv.h"
+
+static int uverbs_free_qp(struct ib_uobject *uobject,
+ enum rdma_remove_reason why,
+ struct uverbs_attr_bundle *attrs)
+{
+ struct ib_qp *qp = uobject->object;
+ struct ib_uqp_object *uqp =
+ container_of(uobject, struct ib_uqp_object, uevent.uobject);
+ int ret;
+
+ /*
+ * If this is a user triggered destroy then do not allow destruction
+ * until the user cleans up all the mcast bindings. Unlike in other
+ * places we forcibly clean up the mcast attachments for !DESTROY
+ * because the mcast attaches are not ubojects and will not be
+ * destroyed by anything else during cleanup processing.
+ */
+ if (why == RDMA_REMOVE_DESTROY) {
+ if (!list_empty(&uqp->mcast_list))
+ return -EBUSY;
+ } else if (qp == qp->real_qp) {
+ ib_uverbs_detach_umcast(qp, uqp);
+ }
+
+ ret = ib_destroy_qp_user(qp, &attrs->driver_udata);
+ if (ib_is_destroy_retryable(ret, why, uobject))
+ return ret;
+
+ if (uqp->uxrcd)
+ atomic_dec(&uqp->uxrcd->refcnt);
+
+ ib_uverbs_release_uevent(&uqp->uevent);
+ return ret;
+}
+
+static int check_creation_flags(enum ib_qp_type qp_type,
+ u32 create_flags)
+{
+ create_flags &= ~IB_UVERBS_QP_CREATE_SQ_SIG_ALL;
+
+ if (!create_flags || qp_type == IB_QPT_DRIVER)
+ return 0;
+
+ if (qp_type != IB_QPT_RAW_PACKET && qp_type != IB_QPT_UD)
+ return -EINVAL;
+
+ if ((create_flags & IB_UVERBS_QP_CREATE_SCATTER_FCS ||
+ create_flags & IB_UVERBS_QP_CREATE_CVLAN_STRIPPING) &&
+ qp_type != IB_QPT_RAW_PACKET)
+ return -EINVAL;
+
+ return 0;
+}
+
+static void set_caps(struct ib_qp_init_attr *attr,
+ struct ib_uverbs_qp_cap *cap, bool req)
+{
+ if (req) {
+ attr->cap.max_send_wr = cap->max_send_wr;
+ attr->cap.max_recv_wr = cap->max_recv_wr;
+ attr->cap.max_send_sge = cap->max_send_sge;
+ attr->cap.max_recv_sge = cap->max_recv_sge;
+ attr->cap.max_inline_data = cap->max_inline_data;
+ } else {
+ cap->max_send_wr = attr->cap.max_send_wr;
+ cap->max_recv_wr = attr->cap.max_recv_wr;
+ cap->max_send_sge = attr->cap.max_send_sge;
+ cap->max_recv_sge = attr->cap.max_recv_sge;
+ cap->max_inline_data = attr->cap.max_inline_data;
+ }
+}
+
+static int UVERBS_HANDLER(UVERBS_METHOD_QP_CREATE)(
+ struct uverbs_attr_bundle *attrs)
+{
+ struct ib_uqp_object *obj = container_of(
+ uverbs_attr_get_uobject(attrs, UVERBS_ATTR_CREATE_QP_HANDLE),
+ typeof(*obj), uevent.uobject);
+ struct ib_qp_init_attr attr = {};
+ struct ib_uverbs_qp_cap cap = {};
+ struct ib_rwq_ind_table *rwq_ind_tbl = NULL;
+ struct ib_qp *qp;
+ struct ib_pd *pd = NULL;
+ struct ib_srq *srq = NULL;
+ struct ib_cq *recv_cq = NULL;
+ struct ib_cq *send_cq = NULL;
+ struct ib_xrcd *xrcd = NULL;
+ struct ib_uobject *xrcd_uobj = NULL;
+ struct ib_device *device;
+ u64 user_handle;
+ int ret;
+
+ ret = uverbs_copy_from_or_zero(&cap, attrs,
+ UVERBS_ATTR_CREATE_QP_CAP);
+ if (!ret)
+ ret = uverbs_copy_from(&user_handle, attrs,
+ UVERBS_ATTR_CREATE_QP_USER_HANDLE);
+ if (!ret)
+ ret = uverbs_get_const(&attr.qp_type, attrs,
+ UVERBS_ATTR_CREATE_QP_TYPE);
+ if (ret)
+ return ret;
+
+ switch (attr.qp_type) {
+ case IB_QPT_XRC_TGT:
+ if (uverbs_attr_is_valid(attrs,
+ UVERBS_ATTR_CREATE_QP_RECV_CQ_HANDLE) ||
+ uverbs_attr_is_valid(attrs,
+ UVERBS_ATTR_CREATE_QP_SEND_CQ_HANDLE) ||
+ uverbs_attr_is_valid(attrs,
+ UVERBS_ATTR_CREATE_QP_PD_HANDLE) ||
+ uverbs_attr_is_valid(attrs,
+ UVERBS_ATTR_CREATE_QP_IND_TABLE_HANDLE))
+ return -EINVAL;
+
+ xrcd_uobj = uverbs_attr_get_uobject(attrs,
+ UVERBS_ATTR_CREATE_QP_XRCD_HANDLE);
+ if (IS_ERR(xrcd_uobj))
+ return PTR_ERR(xrcd_uobj);
+
+ xrcd = (struct ib_xrcd *)xrcd_uobj->object;
+ if (!xrcd)
+ return -EINVAL;
+ device = xrcd->device;
+ break;
+ case IB_UVERBS_QPT_RAW_PACKET:
+ if (!capable(CAP_NET_RAW))
+ return -EPERM;
+ fallthrough;
+ case IB_UVERBS_QPT_RC:
+ case IB_UVERBS_QPT_UC:
+ case IB_UVERBS_QPT_UD:
+ case IB_UVERBS_QPT_XRC_INI:
+ case IB_UVERBS_QPT_DRIVER:
+ if (uverbs_attr_is_valid(attrs,
+ UVERBS_ATTR_CREATE_QP_XRCD_HANDLE) ||
+ (uverbs_attr_is_valid(attrs,
+ UVERBS_ATTR_CREATE_QP_SRQ_HANDLE) &&
+ attr.qp_type == IB_QPT_XRC_INI))
+ return -EINVAL;
+
+ pd = uverbs_attr_get_obj(attrs,
+ UVERBS_ATTR_CREATE_QP_PD_HANDLE);
+ if (IS_ERR(pd))
+ return PTR_ERR(pd);
+
+ rwq_ind_tbl = uverbs_attr_get_obj(attrs,
+ UVERBS_ATTR_CREATE_QP_IND_TABLE_HANDLE);
+ if (!IS_ERR(rwq_ind_tbl)) {
+ if (cap.max_recv_wr || cap.max_recv_sge ||
+ uverbs_attr_is_valid(attrs,
+ UVERBS_ATTR_CREATE_QP_RECV_CQ_HANDLE) ||
+ uverbs_attr_is_valid(attrs,
+ UVERBS_ATTR_CREATE_QP_SRQ_HANDLE))
+ return -EINVAL;
+
+ /* send_cq is optinal */
+ if (cap.max_send_wr) {
+ send_cq = uverbs_attr_get_obj(attrs,
+ UVERBS_ATTR_CREATE_QP_SEND_CQ_HANDLE);
+ if (IS_ERR(send_cq))
+ return PTR_ERR(send_cq);
+ }
+ attr.rwq_ind_tbl = rwq_ind_tbl;
+ } else {
+ send_cq = uverbs_attr_get_obj(attrs,
+ UVERBS_ATTR_CREATE_QP_SEND_CQ_HANDLE);
+ if (IS_ERR(send_cq))
+ return PTR_ERR(send_cq);
+
+ if (attr.qp_type != IB_QPT_XRC_INI) {
+ recv_cq = uverbs_attr_get_obj(attrs,
+ UVERBS_ATTR_CREATE_QP_RECV_CQ_HANDLE);
+ if (IS_ERR(recv_cq))
+ return PTR_ERR(recv_cq);
+ }
+ }
+
+ device = pd->device;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ ret = uverbs_get_flags32(&attr.create_flags, attrs,
+ UVERBS_ATTR_CREATE_QP_FLAGS,
+ IB_UVERBS_QP_CREATE_BLOCK_MULTICAST_LOOPBACK |
+ IB_UVERBS_QP_CREATE_SCATTER_FCS |
+ IB_UVERBS_QP_CREATE_CVLAN_STRIPPING |
+ IB_UVERBS_QP_CREATE_PCI_WRITE_END_PADDING |
+ IB_UVERBS_QP_CREATE_SQ_SIG_ALL);
+ if (ret)
+ return ret;
+
+ ret = check_creation_flags(attr.qp_type, attr.create_flags);
+ if (ret)
+ return ret;
+
+ if (uverbs_attr_is_valid(attrs,
+ UVERBS_ATTR_CREATE_QP_SOURCE_QPN)) {
+ ret = uverbs_copy_from(&attr.source_qpn, attrs,
+ UVERBS_ATTR_CREATE_QP_SOURCE_QPN);
+ if (ret)
+ return ret;
+ attr.create_flags |= IB_QP_CREATE_SOURCE_QPN;
+ }
+
+ srq = uverbs_attr_get_obj(attrs,
+ UVERBS_ATTR_CREATE_QP_SRQ_HANDLE);
+ if (!IS_ERR(srq)) {
+ if ((srq->srq_type == IB_SRQT_XRC &&
+ attr.qp_type != IB_QPT_XRC_TGT) ||
+ (srq->srq_type != IB_SRQT_XRC &&
+ attr.qp_type == IB_QPT_XRC_TGT))
+ return -EINVAL;
+ attr.srq = srq;
+ }
+
+ obj->uevent.event_file = ib_uverbs_get_async_event(attrs,
+ UVERBS_ATTR_CREATE_QP_EVENT_FD);
+ INIT_LIST_HEAD(&obj->uevent.event_list);
+ INIT_LIST_HEAD(&obj->mcast_list);
+ obj->uevent.uobject.user_handle = user_handle;
+ attr.event_handler = ib_uverbs_qp_event_handler;
+ attr.send_cq = send_cq;
+ attr.recv_cq = recv_cq;
+ attr.xrcd = xrcd;
+ if (attr.create_flags & IB_UVERBS_QP_CREATE_SQ_SIG_ALL) {
+ /* This creation bit is uverbs one, need to mask before
+ * calling drivers. It was added to prevent an extra user attr
+ * only for that when using ioctl.
+ */
+ attr.create_flags &= ~IB_UVERBS_QP_CREATE_SQ_SIG_ALL;
+ attr.sq_sig_type = IB_SIGNAL_ALL_WR;
+ } else {
+ attr.sq_sig_type = IB_SIGNAL_REQ_WR;
+ }
+
+ set_caps(&attr, &cap, true);
+ mutex_init(&obj->mcast_lock);
+
+ if (attr.qp_type == IB_QPT_XRC_TGT)
+ qp = ib_create_qp(pd, &attr);
+ else
+ qp = _ib_create_qp(device, pd, &attr, &attrs->driver_udata,
+ obj);
+
+ if (IS_ERR(qp)) {
+ ret = PTR_ERR(qp);
+ goto err_put;
+ }
+
+ if (attr.qp_type != IB_QPT_XRC_TGT) {
+ atomic_inc(&pd->usecnt);
+ if (attr.send_cq)
+ atomic_inc(&attr.send_cq->usecnt);
+ if (attr.recv_cq)
+ atomic_inc(&attr.recv_cq->usecnt);
+ if (attr.srq)
+ atomic_inc(&attr.srq->usecnt);
+ if (attr.rwq_ind_tbl)
+ atomic_inc(&attr.rwq_ind_tbl->usecnt);
+ } else {
+ obj->uxrcd = container_of(xrcd_uobj, struct ib_uxrcd_object,
+ uobject);
+ atomic_inc(&obj->uxrcd->refcnt);
+ /* It is done in _ib_create_qp for other QP types */
+ qp->uobject = obj;
+ }
+
+ obj->uevent.uobject.object = qp;
+ uverbs_finalize_uobj_create(attrs, UVERBS_ATTR_CREATE_QP_HANDLE);
+
+ if (attr.qp_type != IB_QPT_XRC_TGT) {
+ ret = ib_create_qp_security(qp, device);
+ if (ret)
+ return ret;
+ }
+
+ set_caps(&attr, &cap, false);
+ ret = uverbs_copy_to_struct_or_zero(attrs,
+ UVERBS_ATTR_CREATE_QP_RESP_CAP, &cap,
+ sizeof(cap));
+ if (ret)
+ return ret;
+
+ ret = uverbs_copy_to(attrs, UVERBS_ATTR_CREATE_QP_RESP_QP_NUM,
+ &qp->qp_num,
+ sizeof(qp->qp_num));
+
+ return ret;
+err_put:
+ if (obj->uevent.event_file)
+ uverbs_uobject_put(&obj->uevent.event_file->uobj);
+ return ret;
+};
+
+DECLARE_UVERBS_NAMED_METHOD(
+ UVERBS_METHOD_QP_CREATE,
+ UVERBS_ATTR_IDR(UVERBS_ATTR_CREATE_QP_HANDLE,
+ UVERBS_OBJECT_QP,
+ UVERBS_ACCESS_NEW,
+ UA_MANDATORY),
+ UVERBS_ATTR_IDR(UVERBS_ATTR_CREATE_QP_XRCD_HANDLE,
+ UVERBS_OBJECT_XRCD,
+ UVERBS_ACCESS_READ,
+ UA_OPTIONAL),
+ UVERBS_ATTR_IDR(UVERBS_ATTR_CREATE_QP_PD_HANDLE,
+ UVERBS_OBJECT_PD,
+ UVERBS_ACCESS_READ,
+ UA_OPTIONAL),
+ UVERBS_ATTR_IDR(UVERBS_ATTR_CREATE_QP_SRQ_HANDLE,
+ UVERBS_OBJECT_SRQ,
+ UVERBS_ACCESS_READ,
+ UA_OPTIONAL),
+ UVERBS_ATTR_IDR(UVERBS_ATTR_CREATE_QP_SEND_CQ_HANDLE,
+ UVERBS_OBJECT_CQ,
+ UVERBS_ACCESS_READ,
+ UA_OPTIONAL),
+ UVERBS_ATTR_IDR(UVERBS_ATTR_CREATE_QP_RECV_CQ_HANDLE,
+ UVERBS_OBJECT_CQ,
+ UVERBS_ACCESS_READ,
+ UA_OPTIONAL),
+ UVERBS_ATTR_IDR(UVERBS_ATTR_CREATE_QP_IND_TABLE_HANDLE,
+ UVERBS_OBJECT_RWQ_IND_TBL,
+ UVERBS_ACCESS_READ,
+ UA_OPTIONAL),
+ UVERBS_ATTR_PTR_IN(UVERBS_ATTR_CREATE_QP_USER_HANDLE,
+ UVERBS_ATTR_TYPE(u64),
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_IN(UVERBS_ATTR_CREATE_QP_CAP,
+ UVERBS_ATTR_STRUCT(struct ib_uverbs_qp_cap,
+ max_inline_data),
+ UA_MANDATORY),
+ UVERBS_ATTR_CONST_IN(UVERBS_ATTR_CREATE_QP_TYPE,
+ enum ib_uverbs_qp_type,
+ UA_MANDATORY),
+ UVERBS_ATTR_FLAGS_IN(UVERBS_ATTR_CREATE_QP_FLAGS,
+ enum ib_uverbs_qp_create_flags,
+ UA_OPTIONAL),
+ UVERBS_ATTR_PTR_IN(UVERBS_ATTR_CREATE_QP_SOURCE_QPN,
+ UVERBS_ATTR_TYPE(u32),
+ UA_OPTIONAL),
+ UVERBS_ATTR_FD(UVERBS_ATTR_CREATE_QP_EVENT_FD,
+ UVERBS_OBJECT_ASYNC_EVENT,
+ UVERBS_ACCESS_READ,
+ UA_OPTIONAL),
+ UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_CREATE_QP_RESP_CAP,
+ UVERBS_ATTR_STRUCT(struct ib_uverbs_qp_cap,
+ max_inline_data),
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_CREATE_QP_RESP_QP_NUM,
+ UVERBS_ATTR_TYPE(u32),
+ UA_MANDATORY),
+ UVERBS_ATTR_UHW());
+
+static int UVERBS_HANDLER(UVERBS_METHOD_QP_DESTROY)(
+ struct uverbs_attr_bundle *attrs)
+{
+ struct ib_uobject *uobj =
+ uverbs_attr_get_uobject(attrs, UVERBS_ATTR_DESTROY_QP_HANDLE);
+ struct ib_uqp_object *obj =
+ container_of(uobj, struct ib_uqp_object, uevent.uobject);
+ struct ib_uverbs_destroy_qp_resp resp = {
+ .events_reported = obj->uevent.events_reported
+ };
+
+ return uverbs_copy_to(attrs, UVERBS_ATTR_DESTROY_QP_RESP, &resp,
+ sizeof(resp));
+}
+
+DECLARE_UVERBS_NAMED_METHOD(
+ UVERBS_METHOD_QP_DESTROY,
+ UVERBS_ATTR_IDR(UVERBS_ATTR_DESTROY_QP_HANDLE,
+ UVERBS_OBJECT_QP,
+ UVERBS_ACCESS_DESTROY,
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_DESTROY_QP_RESP,
+ UVERBS_ATTR_TYPE(struct ib_uverbs_destroy_qp_resp),
+ UA_MANDATORY));
+
+DECLARE_UVERBS_NAMED_OBJECT(
+ UVERBS_OBJECT_QP,
+ UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_uqp_object), uverbs_free_qp),
+ &UVERBS_METHOD(UVERBS_METHOD_QP_CREATE),
+ &UVERBS_METHOD(UVERBS_METHOD_QP_DESTROY));
+
+const struct uapi_definition uverbs_def_obj_qp[] = {
+ UAPI_DEF_CHAIN_OBJ_TREE_NAMED(UVERBS_OBJECT_QP,
+ UAPI_DEF_OBJ_NEEDS_FN(destroy_qp)),
+ {}
+};
diff --git a/drivers/infiniband/core/uverbs_std_types_srq.c b/drivers/infiniband/core/uverbs_std_types_srq.c
new file mode 100644
index 000000000000..c0ecbba26bf4
--- /dev/null
+++ b/drivers/infiniband/core/uverbs_std_types_srq.c
@@ -0,0 +1,234 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/*
+ * Copyright (c) 2020, Mellanox Technologies inc. All rights reserved.
+ */
+
+#include <rdma/uverbs_std_types.h>
+#include "rdma_core.h"
+#include "uverbs.h"
+
+static int uverbs_free_srq(struct ib_uobject *uobject,
+ enum rdma_remove_reason why,
+ struct uverbs_attr_bundle *attrs)
+{
+ struct ib_srq *srq = uobject->object;
+ struct ib_uevent_object *uevent =
+ container_of(uobject, struct ib_uevent_object, uobject);
+ enum ib_srq_type srq_type = srq->srq_type;
+ int ret;
+
+ ret = ib_destroy_srq_user(srq, &attrs->driver_udata);
+ if (ib_is_destroy_retryable(ret, why, uobject))
+ return ret;
+
+ if (srq_type == IB_SRQT_XRC) {
+ struct ib_usrq_object *us =
+ container_of(uobject, struct ib_usrq_object,
+ uevent.uobject);
+
+ atomic_dec(&us->uxrcd->refcnt);
+ }
+
+ ib_uverbs_release_uevent(uevent);
+ return ret;
+}
+
+static int UVERBS_HANDLER(UVERBS_METHOD_SRQ_CREATE)(
+ struct uverbs_attr_bundle *attrs)
+{
+ struct ib_usrq_object *obj = container_of(
+ uverbs_attr_get_uobject(attrs, UVERBS_ATTR_CREATE_SRQ_HANDLE),
+ typeof(*obj), uevent.uobject);
+ struct ib_pd *pd =
+ uverbs_attr_get_obj(attrs, UVERBS_ATTR_CREATE_SRQ_PD_HANDLE);
+ struct ib_srq_init_attr attr = {};
+ struct ib_uobject *xrcd_uobj;
+ struct ib_srq *srq;
+ u64 user_handle;
+ int ret;
+
+ ret = uverbs_copy_from(&attr.attr.max_sge, attrs,
+ UVERBS_ATTR_CREATE_SRQ_MAX_SGE);
+ if (!ret)
+ ret = uverbs_copy_from(&attr.attr.max_wr, attrs,
+ UVERBS_ATTR_CREATE_SRQ_MAX_WR);
+ if (!ret)
+ ret = uverbs_copy_from(&attr.attr.srq_limit, attrs,
+ UVERBS_ATTR_CREATE_SRQ_LIMIT);
+ if (!ret)
+ ret = uverbs_copy_from(&user_handle, attrs,
+ UVERBS_ATTR_CREATE_SRQ_USER_HANDLE);
+ if (!ret)
+ ret = uverbs_get_const(&attr.srq_type, attrs,
+ UVERBS_ATTR_CREATE_SRQ_TYPE);
+ if (ret)
+ return ret;
+
+ if (ib_srq_has_cq(attr.srq_type)) {
+ attr.ext.cq = uverbs_attr_get_obj(attrs,
+ UVERBS_ATTR_CREATE_SRQ_CQ_HANDLE);
+ if (IS_ERR(attr.ext.cq))
+ return PTR_ERR(attr.ext.cq);
+ }
+
+ switch (attr.srq_type) {
+ case IB_UVERBS_SRQT_XRC:
+ xrcd_uobj = uverbs_attr_get_uobject(attrs,
+ UVERBS_ATTR_CREATE_SRQ_XRCD_HANDLE);
+ if (IS_ERR(xrcd_uobj))
+ return PTR_ERR(xrcd_uobj);
+
+ attr.ext.xrc.xrcd = (struct ib_xrcd *)xrcd_uobj->object;
+ if (!attr.ext.xrc.xrcd)
+ return -EINVAL;
+ obj->uxrcd = container_of(xrcd_uobj, struct ib_uxrcd_object,
+ uobject);
+ atomic_inc(&obj->uxrcd->refcnt);
+ break;
+ case IB_UVERBS_SRQT_TM:
+ ret = uverbs_copy_from(&attr.ext.tag_matching.max_num_tags,
+ attrs,
+ UVERBS_ATTR_CREATE_SRQ_MAX_NUM_TAGS);
+ if (ret)
+ return ret;
+ break;
+ case IB_UVERBS_SRQT_BASIC:
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ obj->uevent.event_file = ib_uverbs_get_async_event(attrs,
+ UVERBS_ATTR_CREATE_SRQ_EVENT_FD);
+ INIT_LIST_HEAD(&obj->uevent.event_list);
+ attr.event_handler = ib_uverbs_srq_event_handler;
+ obj->uevent.uobject.user_handle = user_handle;
+
+ srq = ib_create_srq_user(pd, &attr, obj, &attrs->driver_udata);
+ if (IS_ERR(srq)) {
+ ret = PTR_ERR(srq);
+ goto err;
+ }
+
+ obj->uevent.uobject.object = srq;
+ uverbs_finalize_uobj_create(attrs, UVERBS_ATTR_CREATE_SRQ_HANDLE);
+
+ ret = uverbs_copy_to(attrs, UVERBS_ATTR_CREATE_SRQ_RESP_MAX_WR,
+ &attr.attr.max_wr,
+ sizeof(attr.attr.max_wr));
+ if (ret)
+ return ret;
+
+ ret = uverbs_copy_to(attrs, UVERBS_ATTR_CREATE_SRQ_RESP_MAX_SGE,
+ &attr.attr.max_sge,
+ sizeof(attr.attr.max_sge));
+ if (ret)
+ return ret;
+
+ if (attr.srq_type == IB_SRQT_XRC) {
+ ret = uverbs_copy_to(attrs,
+ UVERBS_ATTR_CREATE_SRQ_RESP_SRQ_NUM,
+ &srq->ext.xrc.srq_num,
+ sizeof(srq->ext.xrc.srq_num));
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+err:
+ if (obj->uevent.event_file)
+ uverbs_uobject_put(&obj->uevent.event_file->uobj);
+ if (attr.srq_type == IB_SRQT_XRC)
+ atomic_dec(&obj->uxrcd->refcnt);
+ return ret;
+};
+
+DECLARE_UVERBS_NAMED_METHOD(
+ UVERBS_METHOD_SRQ_CREATE,
+ UVERBS_ATTR_IDR(UVERBS_ATTR_CREATE_SRQ_HANDLE,
+ UVERBS_OBJECT_SRQ,
+ UVERBS_ACCESS_NEW,
+ UA_MANDATORY),
+ UVERBS_ATTR_IDR(UVERBS_ATTR_CREATE_SRQ_PD_HANDLE,
+ UVERBS_OBJECT_PD,
+ UVERBS_ACCESS_READ,
+ UA_MANDATORY),
+ UVERBS_ATTR_CONST_IN(UVERBS_ATTR_CREATE_SRQ_TYPE,
+ enum ib_uverbs_srq_type,
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_IN(UVERBS_ATTR_CREATE_SRQ_USER_HANDLE,
+ UVERBS_ATTR_TYPE(u64),
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_IN(UVERBS_ATTR_CREATE_SRQ_MAX_WR,
+ UVERBS_ATTR_TYPE(u32),
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_IN(UVERBS_ATTR_CREATE_SRQ_MAX_SGE,
+ UVERBS_ATTR_TYPE(u32),
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_IN(UVERBS_ATTR_CREATE_SRQ_LIMIT,
+ UVERBS_ATTR_TYPE(u32),
+ UA_MANDATORY),
+ UVERBS_ATTR_IDR(UVERBS_ATTR_CREATE_SRQ_XRCD_HANDLE,
+ UVERBS_OBJECT_XRCD,
+ UVERBS_ACCESS_READ,
+ UA_OPTIONAL),
+ UVERBS_ATTR_IDR(UVERBS_ATTR_CREATE_SRQ_CQ_HANDLE,
+ UVERBS_OBJECT_CQ,
+ UVERBS_ACCESS_READ,
+ UA_OPTIONAL),
+ UVERBS_ATTR_PTR_IN(UVERBS_ATTR_CREATE_SRQ_MAX_NUM_TAGS,
+ UVERBS_ATTR_TYPE(u32),
+ UA_OPTIONAL),
+ UVERBS_ATTR_FD(UVERBS_ATTR_CREATE_SRQ_EVENT_FD,
+ UVERBS_OBJECT_ASYNC_EVENT,
+ UVERBS_ACCESS_READ,
+ UA_OPTIONAL),
+ UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_CREATE_SRQ_RESP_MAX_WR,
+ UVERBS_ATTR_TYPE(u32),
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_CREATE_SRQ_RESP_MAX_SGE,
+ UVERBS_ATTR_TYPE(u32),
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_CREATE_SRQ_RESP_SRQ_NUM,
+ UVERBS_ATTR_TYPE(u32),
+ UA_OPTIONAL),
+ UVERBS_ATTR_UHW());
+
+static int UVERBS_HANDLER(UVERBS_METHOD_SRQ_DESTROY)(
+ struct uverbs_attr_bundle *attrs)
+{
+ struct ib_uobject *uobj =
+ uverbs_attr_get_uobject(attrs, UVERBS_ATTR_DESTROY_SRQ_HANDLE);
+ struct ib_usrq_object *obj =
+ container_of(uobj, struct ib_usrq_object, uevent.uobject);
+ struct ib_uverbs_destroy_srq_resp resp = {
+ .events_reported = obj->uevent.events_reported
+ };
+
+ return uverbs_copy_to(attrs, UVERBS_ATTR_DESTROY_SRQ_RESP, &resp,
+ sizeof(resp));
+}
+
+DECLARE_UVERBS_NAMED_METHOD(
+ UVERBS_METHOD_SRQ_DESTROY,
+ UVERBS_ATTR_IDR(UVERBS_ATTR_DESTROY_SRQ_HANDLE,
+ UVERBS_OBJECT_SRQ,
+ UVERBS_ACCESS_DESTROY,
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_DESTROY_SRQ_RESP,
+ UVERBS_ATTR_TYPE(struct ib_uverbs_destroy_srq_resp),
+ UA_MANDATORY));
+
+DECLARE_UVERBS_NAMED_OBJECT(
+ UVERBS_OBJECT_SRQ,
+ UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_usrq_object),
+ uverbs_free_srq),
+ &UVERBS_METHOD(UVERBS_METHOD_SRQ_CREATE),
+ &UVERBS_METHOD(UVERBS_METHOD_SRQ_DESTROY)
+);
+
+const struct uapi_definition uverbs_def_obj_srq[] = {
+ UAPI_DEF_CHAIN_OBJ_TREE_NAMED(UVERBS_OBJECT_SRQ,
+ UAPI_DEF_OBJ_NEEDS_FN(destroy_srq)),
+ {}
+};
diff --git a/drivers/infiniband/core/uverbs_std_types_wq.c b/drivers/infiniband/core/uverbs_std_types_wq.c
new file mode 100644
index 000000000000..cad842ede077
--- /dev/null
+++ b/drivers/infiniband/core/uverbs_std_types_wq.c
@@ -0,0 +1,194 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/*
+ * Copyright (c) 2020, Mellanox Technologies inc. All rights reserved.
+ */
+
+#include <rdma/uverbs_std_types.h>
+#include "rdma_core.h"
+#include "uverbs.h"
+
+static int uverbs_free_wq(struct ib_uobject *uobject,
+ enum rdma_remove_reason why,
+ struct uverbs_attr_bundle *attrs)
+{
+ struct ib_wq *wq = uobject->object;
+ struct ib_uwq_object *uwq =
+ container_of(uobject, struct ib_uwq_object, uevent.uobject);
+ int ret;
+
+ ret = ib_destroy_wq(wq, &attrs->driver_udata);
+ if (ib_is_destroy_retryable(ret, why, uobject))
+ return ret;
+
+ ib_uverbs_release_uevent(&uwq->uevent);
+ return ret;
+}
+
+static int UVERBS_HANDLER(UVERBS_METHOD_WQ_CREATE)(
+ struct uverbs_attr_bundle *attrs)
+{
+ struct ib_uwq_object *obj = container_of(
+ uverbs_attr_get_uobject(attrs, UVERBS_ATTR_CREATE_WQ_HANDLE),
+ typeof(*obj), uevent.uobject);
+ struct ib_pd *pd =
+ uverbs_attr_get_obj(attrs, UVERBS_ATTR_CREATE_WQ_PD_HANDLE);
+ struct ib_cq *cq =
+ uverbs_attr_get_obj(attrs, UVERBS_ATTR_CREATE_WQ_CQ_HANDLE);
+ struct ib_wq_init_attr wq_init_attr = {};
+ struct ib_wq *wq;
+ u64 user_handle;
+ int ret;
+
+ ret = uverbs_get_flags32(&wq_init_attr.create_flags, attrs,
+ UVERBS_ATTR_CREATE_WQ_FLAGS,
+ IB_UVERBS_WQ_FLAGS_CVLAN_STRIPPING |
+ IB_UVERBS_WQ_FLAGS_SCATTER_FCS |
+ IB_UVERBS_WQ_FLAGS_DELAY_DROP |
+ IB_UVERBS_WQ_FLAGS_PCI_WRITE_END_PADDING);
+ if (!ret)
+ ret = uverbs_copy_from(&wq_init_attr.max_sge, attrs,
+ UVERBS_ATTR_CREATE_WQ_MAX_SGE);
+ if (!ret)
+ ret = uverbs_copy_from(&wq_init_attr.max_wr, attrs,
+ UVERBS_ATTR_CREATE_WQ_MAX_WR);
+ if (!ret)
+ ret = uverbs_copy_from(&user_handle, attrs,
+ UVERBS_ATTR_CREATE_WQ_USER_HANDLE);
+ if (!ret)
+ ret = uverbs_get_const(&wq_init_attr.wq_type, attrs,
+ UVERBS_ATTR_CREATE_WQ_TYPE);
+ if (ret)
+ return ret;
+
+ if (wq_init_attr.wq_type != IB_WQT_RQ)
+ return -EINVAL;
+
+ obj->uevent.event_file = ib_uverbs_get_async_event(attrs,
+ UVERBS_ATTR_CREATE_WQ_EVENT_FD);
+ obj->uevent.uobject.user_handle = user_handle;
+ INIT_LIST_HEAD(&obj->uevent.event_list);
+ wq_init_attr.event_handler = ib_uverbs_wq_event_handler;
+ wq_init_attr.wq_context = attrs->ufile;
+ wq_init_attr.cq = cq;
+
+ wq = pd->device->ops.create_wq(pd, &wq_init_attr, &attrs->driver_udata);
+ if (IS_ERR(wq)) {
+ ret = PTR_ERR(wq);
+ goto err;
+ }
+
+ obj->uevent.uobject.object = wq;
+ wq->wq_type = wq_init_attr.wq_type;
+ wq->cq = cq;
+ wq->pd = pd;
+ wq->device = pd->device;
+ wq->wq_context = wq_init_attr.wq_context;
+ atomic_set(&wq->usecnt, 0);
+ atomic_inc(&pd->usecnt);
+ atomic_inc(&cq->usecnt);
+ wq->uobject = obj;
+ uverbs_finalize_uobj_create(attrs, UVERBS_ATTR_CREATE_WQ_HANDLE);
+
+ ret = uverbs_copy_to(attrs, UVERBS_ATTR_CREATE_WQ_RESP_MAX_WR,
+ &wq_init_attr.max_wr,
+ sizeof(wq_init_attr.max_wr));
+ if (ret)
+ return ret;
+
+ ret = uverbs_copy_to(attrs, UVERBS_ATTR_CREATE_WQ_RESP_MAX_SGE,
+ &wq_init_attr.max_sge,
+ sizeof(wq_init_attr.max_sge));
+ if (ret)
+ return ret;
+
+ ret = uverbs_copy_to(attrs, UVERBS_ATTR_CREATE_WQ_RESP_WQ_NUM,
+ &wq->wq_num,
+ sizeof(wq->wq_num));
+ return ret;
+
+err:
+ if (obj->uevent.event_file)
+ uverbs_uobject_put(&obj->uevent.event_file->uobj);
+ return ret;
+};
+
+DECLARE_UVERBS_NAMED_METHOD(
+ UVERBS_METHOD_WQ_CREATE,
+ UVERBS_ATTR_IDR(UVERBS_ATTR_CREATE_WQ_HANDLE,
+ UVERBS_OBJECT_WQ,
+ UVERBS_ACCESS_NEW,
+ UA_MANDATORY),
+ UVERBS_ATTR_IDR(UVERBS_ATTR_CREATE_WQ_PD_HANDLE,
+ UVERBS_OBJECT_PD,
+ UVERBS_ACCESS_READ,
+ UA_MANDATORY),
+ UVERBS_ATTR_CONST_IN(UVERBS_ATTR_CREATE_WQ_TYPE,
+ enum ib_wq_type,
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_IN(UVERBS_ATTR_CREATE_WQ_USER_HANDLE,
+ UVERBS_ATTR_TYPE(u64),
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_IN(UVERBS_ATTR_CREATE_WQ_MAX_WR,
+ UVERBS_ATTR_TYPE(u32),
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_IN(UVERBS_ATTR_CREATE_WQ_MAX_SGE,
+ UVERBS_ATTR_TYPE(u32),
+ UA_MANDATORY),
+ UVERBS_ATTR_FLAGS_IN(UVERBS_ATTR_CREATE_WQ_FLAGS,
+ enum ib_uverbs_wq_flags,
+ UA_MANDATORY),
+ UVERBS_ATTR_IDR(UVERBS_ATTR_CREATE_WQ_CQ_HANDLE,
+ UVERBS_OBJECT_CQ,
+ UVERBS_ACCESS_READ,
+ UA_OPTIONAL),
+ UVERBS_ATTR_FD(UVERBS_ATTR_CREATE_WQ_EVENT_FD,
+ UVERBS_OBJECT_ASYNC_EVENT,
+ UVERBS_ACCESS_READ,
+ UA_OPTIONAL),
+ UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_CREATE_WQ_RESP_MAX_WR,
+ UVERBS_ATTR_TYPE(u32),
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_CREATE_WQ_RESP_MAX_SGE,
+ UVERBS_ATTR_TYPE(u32),
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_CREATE_WQ_RESP_WQ_NUM,
+ UVERBS_ATTR_TYPE(u32),
+ UA_OPTIONAL),
+ UVERBS_ATTR_UHW());
+
+static int UVERBS_HANDLER(UVERBS_METHOD_WQ_DESTROY)(
+ struct uverbs_attr_bundle *attrs)
+{
+ struct ib_uobject *uobj =
+ uverbs_attr_get_uobject(attrs, UVERBS_ATTR_DESTROY_WQ_HANDLE);
+ struct ib_uwq_object *obj =
+ container_of(uobj, struct ib_uwq_object, uevent.uobject);
+
+ return uverbs_copy_to(attrs, UVERBS_ATTR_DESTROY_WQ_RESP,
+ &obj->uevent.events_reported,
+ sizeof(obj->uevent.events_reported));
+}
+
+DECLARE_UVERBS_NAMED_METHOD(
+ UVERBS_METHOD_WQ_DESTROY,
+ UVERBS_ATTR_IDR(UVERBS_ATTR_DESTROY_WQ_HANDLE,
+ UVERBS_OBJECT_WQ,
+ UVERBS_ACCESS_DESTROY,
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_DESTROY_WQ_RESP,
+ UVERBS_ATTR_TYPE(u32),
+ UA_MANDATORY));
+
+
+DECLARE_UVERBS_NAMED_OBJECT(
+ UVERBS_OBJECT_WQ,
+ UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_uwq_object), uverbs_free_wq),
+ &UVERBS_METHOD(UVERBS_METHOD_WQ_CREATE),
+ &UVERBS_METHOD(UVERBS_METHOD_WQ_DESTROY)
+);
+
+const struct uapi_definition uverbs_def_obj_wq[] = {
+ UAPI_DEF_CHAIN_OBJ_TREE_NAMED(UVERBS_OBJECT_WQ,
+ UAPI_DEF_OBJ_NEEDS_FN(destroy_wq)),
+ {}
+};
diff --git a/drivers/infiniband/core/uverbs_uapi.c b/drivers/infiniband/core/uverbs_uapi.c
index 3f121ac31e0a..5addc8fae3f3 100644
--- a/drivers/infiniband/core/uverbs_uapi.c
+++ b/drivers/infiniband/core/uverbs_uapi.c
@@ -634,6 +634,9 @@ static const struct uapi_definition uverbs_core_api[] = {
UAPI_DEF_CHAIN(uverbs_def_obj_flow_action),
UAPI_DEF_CHAIN(uverbs_def_obj_intf),
UAPI_DEF_CHAIN(uverbs_def_obj_mr),
+ UAPI_DEF_CHAIN(uverbs_def_obj_qp),
+ UAPI_DEF_CHAIN(uverbs_def_obj_srq),
+ UAPI_DEF_CHAIN(uverbs_def_obj_wq),
UAPI_DEF_CHAIN(uverbs_def_write_intf),
{},
};
diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c
index 56a71337112c..53d6505c0c7b 100644
--- a/drivers/infiniband/core/verbs.c
+++ b/drivers/infiniband/core/verbs.c
@@ -50,6 +50,7 @@
#include <rdma/ib_cache.h>
#include <rdma/ib_addr.h>
#include <rdma/rw.h>
+#include <rdma/lag.h>
#include "core_priv.h"
#include <trace/events/rdma_core.h>
@@ -500,8 +501,10 @@ rdma_update_sgid_attr(struct rdma_ah_attr *ah_attr,
static struct ib_ah *_rdma_create_ah(struct ib_pd *pd,
struct rdma_ah_attr *ah_attr,
u32 flags,
- struct ib_udata *udata)
+ struct ib_udata *udata,
+ struct net_device *xmit_slave)
{
+ struct rdma_ah_init_attr init_attr = {};
struct ib_device *device = pd->device;
struct ib_ah *ah;
int ret;
@@ -521,8 +524,11 @@ static struct ib_ah *_rdma_create_ah(struct ib_pd *pd,
ah->pd = pd;
ah->type = ah_attr->type;
ah->sgid_attr = rdma_update_sgid_attr(ah_attr, NULL);
+ init_attr.ah_attr = ah_attr;
+ init_attr.flags = flags;
+ init_attr.xmit_slave = xmit_slave;
- ret = device->ops.create_ah(ah, ah_attr, flags, udata);
+ ret = device->ops.create_ah(ah, &init_attr, udata);
if (ret) {
kfree(ah);
return ERR_PTR(ret);
@@ -547,15 +553,22 @@ struct ib_ah *rdma_create_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr,
u32 flags)
{
const struct ib_gid_attr *old_sgid_attr;
+ struct net_device *slave;
struct ib_ah *ah;
int ret;
ret = rdma_fill_sgid_attr(pd->device, ah_attr, &old_sgid_attr);
if (ret)
return ERR_PTR(ret);
-
- ah = _rdma_create_ah(pd, ah_attr, flags, NULL);
-
+ slave = rdma_lag_get_ah_roce_slave(pd->device, ah_attr,
+ (flags & RDMA_CREATE_AH_SLEEPABLE) ?
+ GFP_KERNEL : GFP_ATOMIC);
+ if (IS_ERR(slave)) {
+ rdma_unfill_sgid_attr(ah_attr, old_sgid_attr);
+ return (void *)slave;
+ }
+ ah = _rdma_create_ah(pd, ah_attr, flags, NULL, slave);
+ rdma_lag_put_ah_roce_slave(slave);
rdma_unfill_sgid_attr(ah_attr, old_sgid_attr);
return ah;
}
@@ -594,7 +607,8 @@ struct ib_ah *rdma_create_user_ah(struct ib_pd *pd,
}
}
- ah = _rdma_create_ah(pd, ah_attr, RDMA_CREATE_AH_SLEEPABLE, udata);
+ ah = _rdma_create_ah(pd, ah_attr, RDMA_CREATE_AH_SLEEPABLE,
+ udata, NULL);
out:
rdma_unfill_sgid_attr(ah_attr, old_sgid_attr);
@@ -967,15 +981,29 @@ EXPORT_SYMBOL(rdma_destroy_ah_user);
/* Shared receive queues */
-struct ib_srq *ib_create_srq(struct ib_pd *pd,
- struct ib_srq_init_attr *srq_init_attr)
+/**
+ * ib_create_srq_user - Creates a SRQ associated with the specified protection
+ * domain.
+ * @pd: The protection domain associated with the SRQ.
+ * @srq_init_attr: A list of initial attributes required to create the
+ * SRQ. If SRQ creation succeeds, then the attributes are updated to
+ * the actual capabilities of the created SRQ.
+ * @uobject - uobject pointer if this is not a kernel SRQ
+ * @udata - udata pointer if this is not a kernel SRQ
+ *
+ * srq_attr->max_wr and srq_attr->max_sge are read the determine the
+ * requested size of the SRQ, and set to the actual values allocated
+ * on return. If ib_create_srq() succeeds, then max_wr and max_sge
+ * will always be at least as large as the requested values.
+ */
+struct ib_srq *ib_create_srq_user(struct ib_pd *pd,
+ struct ib_srq_init_attr *srq_init_attr,
+ struct ib_usrq_object *uobject,
+ struct ib_udata *udata)
{
struct ib_srq *srq;
int ret;
- if (!pd->device->ops.create_srq)
- return ERR_PTR(-EOPNOTSUPP);
-
srq = rdma_zalloc_drv_obj(pd->device, ib_srq);
if (!srq)
return ERR_PTR(-ENOMEM);
@@ -985,6 +1013,7 @@ struct ib_srq *ib_create_srq(struct ib_pd *pd,
srq->event_handler = srq_init_attr->event_handler;
srq->srq_context = srq_init_attr->srq_context;
srq->srq_type = srq_init_attr->srq_type;
+ srq->uobject = uobject;
if (ib_srq_has_cq(srq->srq_type)) {
srq->ext.cq = srq_init_attr->ext.cq;
@@ -996,7 +1025,7 @@ struct ib_srq *ib_create_srq(struct ib_pd *pd,
}
atomic_inc(&pd->usecnt);
- ret = pd->device->ops.create_srq(srq, srq_init_attr, NULL);
+ ret = pd->device->ops.create_srq(srq, srq_init_attr, udata);
if (ret) {
atomic_dec(&srq->pd->usecnt);
if (srq->srq_type == IB_SRQT_XRC)
@@ -1009,7 +1038,7 @@ struct ib_srq *ib_create_srq(struct ib_pd *pd,
return srq;
}
-EXPORT_SYMBOL(ib_create_srq);
+EXPORT_SYMBOL(ib_create_srq_user);
int ib_modify_srq(struct ib_srq *srq,
struct ib_srq_attr *srq_attr,
@@ -1633,11 +1662,35 @@ static int _ib_modify_qp(struct ib_qp *qp, struct ib_qp_attr *attr,
const struct ib_gid_attr *old_sgid_attr_alt_av;
int ret;
+ attr->xmit_slave = NULL;
if (attr_mask & IB_QP_AV) {
ret = rdma_fill_sgid_attr(qp->device, &attr->ah_attr,
&old_sgid_attr_av);
if (ret)
return ret;
+
+ if (attr->ah_attr.type == RDMA_AH_ATTR_TYPE_ROCE &&
+ is_qp_type_connected(qp)) {
+ struct net_device *slave;
+
+ /*
+ * If the user provided the qp_attr then we have to
+ * resolve it. Kerne users have to provide already
+ * resolved rdma_ah_attr's.
+ */
+ if (udata) {
+ ret = ib_resolve_eth_dmac(qp->device,
+ &attr->ah_attr);
+ if (ret)
+ goto out_av;
+ }
+ slave = rdma_lag_get_ah_roce_slave(qp->device,
+ &attr->ah_attr,
+ GFP_KERNEL);
+ if (IS_ERR(slave))
+ goto out_av;
+ attr->xmit_slave = slave;
+ }
}
if (attr_mask & IB_QP_ALT_PATH) {
/*
@@ -1664,18 +1717,6 @@ static int _ib_modify_qp(struct ib_qp *qp, struct ib_qp_attr *attr,
}
}
- /*
- * If the user provided the qp_attr then we have to resolve it. Kernel
- * users have to provide already resolved rdma_ah_attr's
- */
- if (udata && (attr_mask & IB_QP_AV) &&
- attr->ah_attr.type == RDMA_AH_ATTR_TYPE_ROCE &&
- is_qp_type_connected(qp)) {
- ret = ib_resolve_eth_dmac(qp->device, &attr->ah_attr);
- if (ret)
- goto out;
- }
-
if (rdma_ib_or_roce(qp->device, port)) {
if (attr_mask & IB_QP_RQ_PSN && attr->rq_psn & ~0xffffff) {
dev_warn(&qp->device->dev,
@@ -1717,8 +1758,10 @@ out:
if (attr_mask & IB_QP_ALT_PATH)
rdma_unfill_sgid_attr(&attr->alt_ah_attr, old_sgid_attr_alt_av);
out_av:
- if (attr_mask & IB_QP_AV)
+ if (attr_mask & IB_QP_AV) {
+ rdma_lag_put_ah_roce_slave(attr->xmit_slave);
rdma_unfill_sgid_attr(&attr->ah_attr, old_sgid_attr_av);
+ }
return ret;
}
@@ -1962,6 +2005,9 @@ EXPORT_SYMBOL(__ib_create_cq);
int rdma_set_cq_moderation(struct ib_cq *cq, u16 cq_count, u16 cq_period)
{
+ if (cq->shared)
+ return -EOPNOTSUPP;
+
return cq->device->ops.modify_cq ?
cq->device->ops.modify_cq(cq, cq_count,
cq_period) : -EOPNOTSUPP;
@@ -1970,6 +2016,9 @@ EXPORT_SYMBOL(rdma_set_cq_moderation);
int ib_destroy_cq_user(struct ib_cq *cq, struct ib_udata *udata)
{
+ if (WARN_ON_ONCE(cq->shared))
+ return -EOPNOTSUPP;
+
if (atomic_read(&cq->usecnt))
return -EBUSY;
@@ -1982,6 +2031,9 @@ EXPORT_SYMBOL(ib_destroy_cq_user);
int ib_resize_cq(struct ib_cq *cq, int cqe)
{
+ if (cq->shared)
+ return -EOPNOTSUPP;
+
return cq->device->ops.resize_cq ?
cq->device->ops.resize_cq(cq, cqe, NULL) : -EOPNOTSUPP;
}
@@ -2160,54 +2212,6 @@ out:
}
EXPORT_SYMBOL(ib_alloc_mr_integrity);
-/* "Fast" memory regions */
-
-struct ib_fmr *ib_alloc_fmr(struct ib_pd *pd,
- int mr_access_flags,
- struct ib_fmr_attr *fmr_attr)
-{
- struct ib_fmr *fmr;
-
- if (!pd->device->ops.alloc_fmr)
- return ERR_PTR(-EOPNOTSUPP);
-
- fmr = pd->device->ops.alloc_fmr(pd, mr_access_flags, fmr_attr);
- if (!IS_ERR(fmr)) {
- fmr->device = pd->device;
- fmr->pd = pd;
- atomic_inc(&pd->usecnt);
- }
-
- return fmr;
-}
-EXPORT_SYMBOL(ib_alloc_fmr);
-
-int ib_unmap_fmr(struct list_head *fmr_list)
-{
- struct ib_fmr *fmr;
-
- if (list_empty(fmr_list))
- return 0;
-
- fmr = list_entry(fmr_list->next, struct ib_fmr, list);
- return fmr->device->ops.unmap_fmr(fmr_list);
-}
-EXPORT_SYMBOL(ib_unmap_fmr);
-
-int ib_dealloc_fmr(struct ib_fmr *fmr)
-{
- struct ib_pd *pd;
- int ret;
-
- pd = fmr->pd;
- ret = fmr->device->ops.dealloc_fmr(fmr);
- if (!ret)
- atomic_dec(&pd->usecnt);
-
- return ret;
-}
-EXPORT_SYMBOL(ib_dealloc_fmr);
-
/* Multicast groups */
static bool is_valid_mcast_lid(struct ib_qp *qp, u16 lid)
@@ -2574,6 +2578,7 @@ EXPORT_SYMBOL(ib_map_mr_sg_pi);
* @page_size: page vector desired page size
*
* Constraints:
+ *
* - The first sg element is allowed to have an offset.
* - Each sg element must either be aligned to page_size or virtually
* contiguous to the previous element. In case an sg element has a
@@ -2607,10 +2612,12 @@ EXPORT_SYMBOL(ib_map_mr_sg);
* @mr: memory region
* @sgl: dma mapped scatterlist
* @sg_nents: number of entries in sg
- * @sg_offset_p: IN: start offset in bytes into sg
- * OUT: offset in bytes for element n of the sg of the first
+ * @sg_offset_p: ==== =======================================================
+ * IN start offset in bytes into sg
+ * OUT offset in bytes for element n of the sg of the first
* byte that has not been processed where n is the return
* value of this function.
+ * ==== =======================================================
* @set_page: driver page assignment function pointer
*
* Core service helper for drivers to convert the largest