summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMichael Margolin <mrgolin@amazon.com>2022-08-18 17:04:49 +0300
committerLeon Romanovsky <leonro@nvidia.com>2022-08-21 12:35:59 +0300
commitdc13fbf79ec8f983fc398cd200ed12973f390957 (patch)
treec3eb939641374caafc6d0b647a4f62fbace1956a
parentb722d3e63fcc95674bd4dd92bbbfb3bd9de12380 (diff)
RDMA/efa: Support CQ receive entries with source GID
Add a parameter for create CQ admin command to set source address on receive completion descriptors. Report capability for this feature through query device verb. Link: https://lore.kernel.org/r/20220818140449.414-1-mrgolin@amazon.com Reviewed-by: Firas Jahjah <firasj@amazon.com> Reviewed-by: Yossi Leybovich <sleybo@amazon.com> Signed-off-by: Daniel Kranzdorf <dkkranzd@amazon.com> Signed-off-by: Michael Margolin <mrgolin@amazon.com> Signed-off-by: Leon Romanovsky <leon@kernel.org>
-rw-r--r--drivers/infiniband/hw/efa/efa_admin_cmds_defs.h6
-rw-r--r--drivers/infiniband/hw/efa/efa_com_cmd.c5
-rw-r--r--drivers/infiniband/hw/efa/efa_com_cmd.h3
-rw-r--r--drivers/infiniband/hw/efa/efa_io_defs.h289
-rw-r--r--drivers/infiniband/hw/efa/efa_verbs.c11
-rw-r--r--include/uapi/rdma/efa-abi.h4
6 files changed, 312 insertions, 6 deletions
diff --git a/drivers/infiniband/hw/efa/efa_admin_cmds_defs.h b/drivers/infiniband/hw/efa/efa_admin_cmds_defs.h
index 0b0b93b529f3..d4b9226088bd 100644
--- a/drivers/infiniband/hw/efa/efa_admin_cmds_defs.h
+++ b/drivers/infiniband/hw/efa/efa_admin_cmds_defs.h
@@ -444,7 +444,10 @@ struct efa_admin_create_cq_cmd {
/*
* 4:0 : cq_entry_size_words - size of CQ entry in
* 32-bit words, valid values: 4, 8.
- * 7:5 : reserved7 - MBZ
+ * 5 : set_src_addr - If set, source address will be
+ * filled on RX completions from unknown senders.
+ * Requires 8 words CQ entry size.
+ * 7:6 : reserved7 - MBZ
*/
u8 cq_caps_2;
@@ -980,6 +983,7 @@ struct efa_admin_host_info {
#define EFA_ADMIN_CREATE_CQ_CMD_INTERRUPT_MODE_ENABLED_MASK BIT(5)
#define EFA_ADMIN_CREATE_CQ_CMD_VIRT_MASK BIT(6)
#define EFA_ADMIN_CREATE_CQ_CMD_CQ_ENTRY_SIZE_WORDS_MASK GENMASK(4, 0)
+#define EFA_ADMIN_CREATE_CQ_CMD_SET_SRC_ADDR_MASK BIT(5)
/* create_cq_resp */
#define EFA_ADMIN_CREATE_CQ_RESP_DB_VALID_MASK BIT(0)
diff --git a/drivers/infiniband/hw/efa/efa_com_cmd.c b/drivers/infiniband/hw/efa/efa_com_cmd.c
index fb405da4e1db..8f8885e002ba 100644
--- a/drivers/infiniband/hw/efa/efa_com_cmd.c
+++ b/drivers/infiniband/hw/efa/efa_com_cmd.c
@@ -168,7 +168,10 @@ int efa_com_create_cq(struct efa_com_dev *edev,
EFA_ADMIN_CREATE_CQ_CMD_INTERRUPT_MODE_ENABLED, 1);
create_cmd.eqn = params->eqn;
}
-
+ if (params->set_src_addr) {
+ EFA_SET(&create_cmd.cq_caps_2,
+ EFA_ADMIN_CREATE_CQ_CMD_SET_SRC_ADDR, 1);
+ }
efa_com_set_dma_addr(params->dma_addr,
&create_cmd.cq_ba.mem_addr_high,
&create_cmd.cq_ba.mem_addr_low);
diff --git a/drivers/infiniband/hw/efa/efa_com_cmd.h b/drivers/infiniband/hw/efa/efa_com_cmd.h
index c33010bbf9e8..0898ad5bc340 100644
--- a/drivers/infiniband/hw/efa/efa_com_cmd.h
+++ b/drivers/infiniband/hw/efa/efa_com_cmd.h
@@ -75,7 +75,8 @@ struct efa_com_create_cq_params {
u16 uarn;
u16 eqn;
u8 entry_size_in_bytes;
- bool interrupt_mode_enabled;
+ u8 interrupt_mode_enabled : 1;
+ u8 set_src_addr : 1;
};
struct efa_com_create_cq_result {
diff --git a/drivers/infiniband/hw/efa/efa_io_defs.h b/drivers/infiniband/hw/efa/efa_io_defs.h
new file mode 100644
index 000000000000..17ba8984b11e
--- /dev/null
+++ b/drivers/infiniband/hw/efa/efa_io_defs.h
@@ -0,0 +1,289 @@
+/* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */
+/*
+ * Copyright 2018-2022 Amazon.com, Inc. or its affiliates. All rights reserved.
+ */
+
+#ifndef _EFA_IO_H_
+#define _EFA_IO_H_
+
+#define EFA_IO_TX_DESC_NUM_BUFS 2
+#define EFA_IO_TX_DESC_NUM_RDMA_BUFS 1
+#define EFA_IO_TX_DESC_INLINE_MAX_SIZE 32
+#define EFA_IO_TX_DESC_IMM_DATA_SIZE 4
+
+enum efa_io_queue_type {
+ /* send queue (of a QP) */
+ EFA_IO_SEND_QUEUE = 1,
+ /* recv queue (of a QP) */
+ EFA_IO_RECV_QUEUE = 2,
+};
+
+enum efa_io_send_op_type {
+ /* send message */
+ EFA_IO_SEND = 0,
+ /* RDMA read */
+ EFA_IO_RDMA_READ = 1,
+};
+
+enum efa_io_comp_status {
+ /* Successful completion */
+ EFA_IO_COMP_STATUS_OK = 0,
+ /* Flushed during QP destroy */
+ EFA_IO_COMP_STATUS_FLUSHED = 1,
+ /* Internal QP error */
+ EFA_IO_COMP_STATUS_LOCAL_ERROR_QP_INTERNAL_ERROR = 2,
+ /* Bad operation type */
+ EFA_IO_COMP_STATUS_LOCAL_ERROR_INVALID_OP_TYPE = 3,
+ /* Bad AH */
+ EFA_IO_COMP_STATUS_LOCAL_ERROR_INVALID_AH = 4,
+ /* LKEY not registered or does not match IOVA */
+ EFA_IO_COMP_STATUS_LOCAL_ERROR_INVALID_LKEY = 5,
+ /* Message too long */
+ EFA_IO_COMP_STATUS_LOCAL_ERROR_BAD_LENGTH = 6,
+ /* Destination ENI is down or does not run EFA */
+ EFA_IO_COMP_STATUS_REMOTE_ERROR_BAD_ADDRESS = 7,
+ /* Connection was reset by remote side */
+ EFA_IO_COMP_STATUS_REMOTE_ERROR_ABORT = 8,
+ /* Bad dest QP number (QP does not exist or is in error state) */
+ EFA_IO_COMP_STATUS_REMOTE_ERROR_BAD_DEST_QPN = 9,
+ /* Destination resource not ready (no WQEs posted on RQ) */
+ EFA_IO_COMP_STATUS_REMOTE_ERROR_RNR = 10,
+ /* Receiver SGL too short */
+ EFA_IO_COMP_STATUS_REMOTE_ERROR_BAD_LENGTH = 11,
+ /* Unexpected status returned by responder */
+ EFA_IO_COMP_STATUS_REMOTE_ERROR_BAD_STATUS = 12,
+ /* Unresponsive remote - detected locally */
+ EFA_IO_COMP_STATUS_LOCAL_ERROR_UNRESP_REMOTE = 13,
+};
+
+struct efa_io_tx_meta_desc {
+ /* Verbs-generated Request ID */
+ u16 req_id;
+
+ /*
+ * control flags
+ * 3:0 : op_type - operation type: send/rdma/fast mem
+ * ops/etc
+ * 4 : has_imm - immediate_data field carries valid
+ * data.
+ * 5 : inline_msg - inline mode - inline message data
+ * follows this descriptor (no buffer descriptors).
+ * Note that it is different from immediate data
+ * 6 : meta_extension - Extended metadata. MBZ
+ * 7 : meta_desc - Indicates metadata descriptor.
+ * Must be set.
+ */
+ u8 ctrl1;
+
+ /*
+ * control flags
+ * 0 : phase
+ * 1 : reserved25 - MBZ
+ * 2 : first - Indicates first descriptor in
+ * transaction. Must be set.
+ * 3 : last - Indicates last descriptor in
+ * transaction. Must be set.
+ * 4 : comp_req - Indicates whether completion should
+ * be posted, after packet is transmitted. Valid only
+ * for the first descriptor
+ * 7:5 : reserved29 - MBZ
+ */
+ u8 ctrl2;
+
+ u16 dest_qp_num;
+
+ /*
+ * If inline_msg bit is set, length of inline message in bytes,
+ * otherwise length of SGL (number of buffers).
+ */
+ u16 length;
+
+ /*
+ * immediate data: if has_imm is set, then this field is included
+ * within Tx message and reported in remote Rx completion.
+ */
+ u32 immediate_data;
+
+ u16 ah;
+
+ u16 reserved;
+
+ /* Queue key */
+ u32 qkey;
+
+ u8 reserved2[12];
+};
+
+/*
+ * Tx queue buffer descriptor, for any transport type. Preceded by metadata
+ * descriptor.
+ */
+struct efa_io_tx_buf_desc {
+ /* length in bytes */
+ u32 length;
+
+ /*
+ * 23:0 : lkey - local memory translation key
+ * 31:24 : reserved - MBZ
+ */
+ u32 lkey;
+
+ /* Buffer address bits[31:0] */
+ u32 buf_addr_lo;
+
+ /* Buffer address bits[63:32] */
+ u32 buf_addr_hi;
+};
+
+struct efa_io_remote_mem_addr {
+ /* length in bytes */
+ u32 length;
+
+ /* remote memory translation key */
+ u32 rkey;
+
+ /* Buffer address bits[31:0] */
+ u32 buf_addr_lo;
+
+ /* Buffer address bits[63:32] */
+ u32 buf_addr_hi;
+};
+
+struct efa_io_rdma_req {
+ /* Remote memory address */
+ struct efa_io_remote_mem_addr remote_mem;
+
+ /* Local memory address */
+ struct efa_io_tx_buf_desc local_mem[1];
+};
+
+/*
+ * Tx WQE, composed of tx meta descriptors followed by either tx buffer
+ * descriptors or inline data
+ */
+struct efa_io_tx_wqe {
+ /* TX meta */
+ struct efa_io_tx_meta_desc meta;
+
+ union {
+ /* Send buffer descriptors */
+ struct efa_io_tx_buf_desc sgl[2];
+
+ u8 inline_data[32];
+
+ /* RDMA local and remote memory addresses */
+ struct efa_io_rdma_req rdma_req;
+ } data;
+};
+
+/*
+ * Rx buffer descriptor; RX WQE is composed of one or more RX buffer
+ * descriptors.
+ */
+struct efa_io_rx_desc {
+ /* Buffer address bits[31:0] */
+ u32 buf_addr_lo;
+
+ /* Buffer Pointer[63:32] */
+ u32 buf_addr_hi;
+
+ /* Verbs-generated request id. */
+ u16 req_id;
+
+ /* Length in bytes. */
+ u16 length;
+
+ /*
+ * LKey and control flags
+ * 23:0 : lkey
+ * 29:24 : reserved - MBZ
+ * 30 : first - Indicates first descriptor in WQE
+ * 31 : last - Indicates last descriptor in WQE
+ */
+ u32 lkey_ctrl;
+};
+
+/* Common IO completion descriptor */
+struct efa_io_cdesc_common {
+ /*
+ * verbs-generated request ID, as provided in the completed tx or rx
+ * descriptor.
+ */
+ u16 req_id;
+
+ u8 status;
+
+ /*
+ * flags
+ * 0 : phase - Phase bit
+ * 2:1 : q_type - enum efa_io_queue_type: send/recv
+ * 3 : has_imm - indicates that immediate data is
+ * present - for RX completions only
+ * 7:4 : reserved28 - MBZ
+ */
+ u8 flags;
+
+ /* local QP number */
+ u16 qp_num;
+
+ /* Transferred length */
+ u16 length;
+};
+
+/* Tx completion descriptor */
+struct efa_io_tx_cdesc {
+ /* Common completion info */
+ struct efa_io_cdesc_common common;
+};
+
+/* Rx Completion Descriptor */
+struct efa_io_rx_cdesc {
+ /* Common completion info */
+ struct efa_io_cdesc_common common;
+
+ /* Remote Address Handle FW index, 0xFFFF indicates invalid ah */
+ u16 ah;
+
+ u16 src_qp_num;
+
+ /* Immediate data */
+ u32 imm;
+};
+
+/* Extended Rx Completion Descriptor */
+struct efa_io_rx_cdesc_ex {
+ /* Base RX completion info */
+ struct efa_io_rx_cdesc rx_cdesc_base;
+
+ /*
+ * Valid only in case of unknown AH (0xFFFF) and CQ set_src_addr is
+ * enabled.
+ */
+ u8 src_addr[16];
+};
+
+/* tx_meta_desc */
+#define EFA_IO_TX_META_DESC_OP_TYPE_MASK GENMASK(3, 0)
+#define EFA_IO_TX_META_DESC_HAS_IMM_MASK BIT(4)
+#define EFA_IO_TX_META_DESC_INLINE_MSG_MASK BIT(5)
+#define EFA_IO_TX_META_DESC_META_EXTENSION_MASK BIT(6)
+#define EFA_IO_TX_META_DESC_META_DESC_MASK BIT(7)
+#define EFA_IO_TX_META_DESC_PHASE_MASK BIT(0)
+#define EFA_IO_TX_META_DESC_FIRST_MASK BIT(2)
+#define EFA_IO_TX_META_DESC_LAST_MASK BIT(3)
+#define EFA_IO_TX_META_DESC_COMP_REQ_MASK BIT(4)
+
+/* tx_buf_desc */
+#define EFA_IO_TX_BUF_DESC_LKEY_MASK GENMASK(23, 0)
+
+/* rx_desc */
+#define EFA_IO_RX_DESC_LKEY_MASK GENMASK(23, 0)
+#define EFA_IO_RX_DESC_FIRST_MASK BIT(30)
+#define EFA_IO_RX_DESC_LAST_MASK BIT(31)
+
+/* cdesc_common */
+#define EFA_IO_CDESC_COMMON_PHASE_MASK BIT(0)
+#define EFA_IO_CDESC_COMMON_Q_TYPE_MASK GENMASK(2, 1)
+#define EFA_IO_CDESC_COMMON_HAS_IMM_MASK BIT(3)
+
+#endif /* _EFA_IO_H_ */
diff --git a/drivers/infiniband/hw/efa/efa_verbs.c b/drivers/infiniband/hw/efa/efa_verbs.c
index ecfe70eb5efb..31454643f8c5 100644
--- a/drivers/infiniband/hw/efa/efa_verbs.c
+++ b/drivers/infiniband/hw/efa/efa_verbs.c
@@ -1,6 +1,6 @@
// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
/*
- * Copyright 2018-2021 Amazon.com, Inc. or its affiliates. All rights reserved.
+ * Copyright 2018-2022 Amazon.com, Inc. or its affiliates. All rights reserved.
*/
#include <linux/dma-buf.h>
@@ -15,6 +15,7 @@
#include <rdma/uverbs_ioctl.h>
#include "efa.h"
+#include "efa_io_defs.h"
enum {
EFA_MMAP_DMA_PAGE = 0,
@@ -242,6 +243,7 @@ int efa_query_device(struct ib_device *ibdev,
resp.max_rq_wr = dev_attr->max_rq_depth;
resp.max_rdma_size = dev_attr->max_rdma_size;
+ resp.device_caps |= EFA_QUERY_DEVICE_CAPS_CQ_WITH_SGID;
if (EFA_DEV_CAP(dev, RDMA_READ))
resp.device_caps |= EFA_QUERY_DEVICE_CAPS_RDMA_READ;
@@ -1064,6 +1066,7 @@ int efa_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
struct efa_ibv_create_cq cmd = {};
struct efa_cq *cq = to_ecq(ibcq);
int entries = attr->cqe;
+ bool set_src_addr;
int err;
ibdev_dbg(ibdev, "create_cq entries %d\n", entries);
@@ -1109,7 +1112,10 @@ int efa_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
goto err_out;
}
- if (!cmd.cq_entry_size) {
+ set_src_addr = !!(cmd.flags & EFA_CREATE_CQ_WITH_SGID);
+ if ((cmd.cq_entry_size != sizeof(struct efa_io_rx_cdesc_ex)) &&
+ (set_src_addr ||
+ cmd.cq_entry_size != sizeof(struct efa_io_rx_cdesc))) {
ibdev_dbg(ibdev,
"Invalid entry size [%u]\n", cmd.cq_entry_size);
err = -EINVAL;
@@ -1138,6 +1144,7 @@ int efa_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
params.dma_addr = cq->dma_addr;
params.entry_size_in_bytes = cmd.cq_entry_size;
params.num_sub_cqs = cmd.num_sub_cqs;
+ params.set_src_addr = set_src_addr;
if (cmd.flags & EFA_CREATE_CQ_WITH_COMPLETION_CHANNEL) {
cq->eq = efa_vec2eq(dev, attr->comp_vector);
params.eqn = cq->eq->eeq.eqn;
diff --git a/include/uapi/rdma/efa-abi.h b/include/uapi/rdma/efa-abi.h
index 08035ccf1fff..163ac79556d6 100644
--- a/include/uapi/rdma/efa-abi.h
+++ b/include/uapi/rdma/efa-abi.h
@@ -1,6 +1,6 @@
/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-2-Clause) */
/*
- * Copyright 2018-2021 Amazon.com, Inc. or its affiliates. All rights reserved.
+ * Copyright 2018-2022 Amazon.com, Inc. or its affiliates. All rights reserved.
*/
#ifndef EFA_ABI_USER_H
@@ -54,6 +54,7 @@ struct efa_ibv_alloc_pd_resp {
enum {
EFA_CREATE_CQ_WITH_COMPLETION_CHANNEL = 1 << 0,
+ EFA_CREATE_CQ_WITH_SGID = 1 << 1,
};
struct efa_ibv_create_cq {
@@ -118,6 +119,7 @@ enum {
EFA_QUERY_DEVICE_CAPS_RDMA_READ = 1 << 0,
EFA_QUERY_DEVICE_CAPS_RNR_RETRY = 1 << 1,
EFA_QUERY_DEVICE_CAPS_CQ_NOTIFICATIONS = 1 << 2,
+ EFA_QUERY_DEVICE_CAPS_CQ_WITH_SGID = 1 << 3,
};
struct efa_ibv_ex_query_device_resp {