summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJakub Kicinski <kuba@kernel.org>2024-07-15 12:42:44 -0700
committerJakub Kicinski <kuba@kernel.org>2024-07-15 12:42:45 -0700
commitdd3cd3ca691df33292e73996f92d2a98906f69c5 (patch)
tree6e19bab443d59640b4a49e33b9c62aea28b6b028
parente7afb958fa90c4c9bd1d375e5292d8ea5f8f11b5 (diff)
parent0477d5168bbb8767275822830b47112519a8446d (diff)
Merge tag 'aux-sysfs-irqs' of git://git.kernel.org/pub/scm/linux/kernel/git/mellanox/linux
Saeed Mahameed says: ==================== aux-sysfs-irqs Shay Says: ========== Introduce auxiliary bus IRQs sysfs Today, PCI PFs and VFs, which are anchored on the PCI bus, display their IRQ information in the <pci_device>/msi_irqs/<irq_num> sysfs files. PCI subfunctions (SFs) are similar to PFs and VFs and these SFs are anchored on the auxiliary bus. However, these PCI SFs lack such IRQ information on the auxiliary bus, leaving users without visibility into which IRQs are used by the SFs. This absence makes it impossible to debug situations and to understand the source of interrupts/SFs for performance tuning and debug. Additionally, the SFs are multifunctional devices supporting RDMA, network devices, clocks, and more, similar to their peer PCI PFs and VFs. Therefore, it is desirable to have SFs' IRQ information available at the bus/device level. To overcome the above limitations, this short series extends the auxiliary bus to display IRQ information in sysfs, similar to that of PFs and VFs. It adds an 'irqs' directory under the auxiliary device and includes an <irq_num> sysfs file within it. For example: $ ls /sys/bus/auxiliary/devices/mlx5_core.sf.1/irqs/ 50 51 52 53 54 55 56 57 58 Patch summary: patch-1 adds auxiliary bus to support irqs used by auxiliary device patch-2 mlx5 driver using exposing irqs for PCI SF devices via auxiliary bus ========== * tag 'aux-sysfs-irqs' of git://git.kernel.org/pub/scm/linux/kernel/git/mellanox/linux: net/mlx5: Expose SFs IRQs driver core: auxiliary bus: show auxiliary device IRQs RDMA/mlx5: Add Qcounters req_transport_retries_exceeded/req_rnr_retries_exceeded net/mlx5: Reimplement write combining test ==================== Link: https://patch.msgid.link/20240711213140.256997-1-saeed@kernel.org Signed-off-by: Jakub Kicinski <kuba@kernel.org>
-rw-r--r--Documentation/ABI/testing/sysfs-bus-auxiliary9
-rw-r--r--drivers/base/Makefile1
-rw-r--r--drivers/base/auxiliary.c1
-rw-r--r--drivers/base/auxiliary_sysfs.c113
-rw-r--r--drivers/infiniband/hw/mlx5/counters.c4
-rw-r--r--drivers/infiniband/hw/mlx5/main.c19
-rw-r--r--drivers/infiniband/hw/mlx5/mem.c198
-rw-r--r--drivers/infiniband/hw/mlx5/mlx5_ib.h3
-rw-r--r--drivers/infiniband/hw/mlx5/qp.c16
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/Makefile2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/eq.c6
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/irq_affinity.c18
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/main.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h6
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/mlx5_irq.h12
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c12
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/wc.c434
-rw-r--r--include/linux/auxiliary_bus.h24
-rw-r--r--include/linux/mlx5/driver.h11
-rw-r--r--include/linux/mlx5/mlx5_ifc.h6
20 files changed, 651 insertions, 246 deletions
diff --git a/Documentation/ABI/testing/sysfs-bus-auxiliary b/Documentation/ABI/testing/sysfs-bus-auxiliary
new file mode 100644
index 000000000000..cc856079690f
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-bus-auxiliary
@@ -0,0 +1,9 @@
+What: /sys/bus/auxiliary/devices/.../irqs/
+Date: April, 2024
+Contact: Shay Drory <shayd@nvidia.com>
+Description:
+ The /sys/devices/.../irqs directory contains a variable set of
+ files, with each file is named as irq number similar to PCI PF
+ or VF's irq number located in msi_irqs directory.
+ These irq files are added and removed dynamically when an IRQ
+ is requested and freed respectively for the PCI SF.
diff --git a/drivers/base/Makefile b/drivers/base/Makefile
index 3079bfe53d04..7fb21768ca36 100644
--- a/drivers/base/Makefile
+++ b/drivers/base/Makefile
@@ -16,6 +16,7 @@ obj-$(CONFIG_NUMA) += node.o
obj-$(CONFIG_MEMORY_HOTPLUG) += memory.o
ifeq ($(CONFIG_SYSFS),y)
obj-$(CONFIG_MODULES) += module.o
+obj-$(CONFIG_AUXILIARY_BUS) += auxiliary_sysfs.o
endif
obj-$(CONFIG_SYS_HYPERVISOR) += hypervisor.o
obj-$(CONFIG_REGMAP) += regmap/
diff --git a/drivers/base/auxiliary.c b/drivers/base/auxiliary.c
index d3a2c40c2f12..3f01f4ec69e5 100644
--- a/drivers/base/auxiliary.c
+++ b/drivers/base/auxiliary.c
@@ -287,6 +287,7 @@ int auxiliary_device_init(struct auxiliary_device *auxdev)
dev->bus = &auxiliary_bus_type;
device_initialize(&auxdev->dev);
+ mutex_init(&auxdev->sysfs.lock);
return 0;
}
EXPORT_SYMBOL_GPL(auxiliary_device_init);
diff --git a/drivers/base/auxiliary_sysfs.c b/drivers/base/auxiliary_sysfs.c
new file mode 100644
index 000000000000..754f21730afd
--- /dev/null
+++ b/drivers/base/auxiliary_sysfs.c
@@ -0,0 +1,113 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES
+ */
+
+#include <linux/auxiliary_bus.h>
+#include <linux/slab.h>
+
+#define AUXILIARY_MAX_IRQ_NAME 11
+
+struct auxiliary_irq_info {
+ struct device_attribute sysfs_attr;
+ char name[AUXILIARY_MAX_IRQ_NAME];
+};
+
+static struct attribute *auxiliary_irq_attrs[] = {
+ NULL
+};
+
+static const struct attribute_group auxiliary_irqs_group = {
+ .name = "irqs",
+ .attrs = auxiliary_irq_attrs,
+};
+
+static int auxiliary_irq_dir_prepare(struct auxiliary_device *auxdev)
+{
+ int ret = 0;
+
+ guard(mutex)(&auxdev->sysfs.lock);
+ if (auxdev->sysfs.irq_dir_exists)
+ return 0;
+
+ ret = devm_device_add_group(&auxdev->dev, &auxiliary_irqs_group);
+ if (ret)
+ return ret;
+
+ auxdev->sysfs.irq_dir_exists = true;
+ xa_init(&auxdev->sysfs.irqs);
+ return 0;
+}
+
+/**
+ * auxiliary_device_sysfs_irq_add - add a sysfs entry for the given IRQ
+ * @auxdev: auxiliary bus device to add the sysfs entry.
+ * @irq: The associated interrupt number.
+ *
+ * This function should be called after auxiliary device have successfully
+ * received the irq.
+ * The driver is responsible to add a unique irq for the auxiliary device. The
+ * driver can invoke this function from multiple thread context safely for
+ * unique irqs of the auxiliary devices. The driver must not invoke this API
+ * multiple times if the irq is already added previously.
+ *
+ * Return: zero on success or an error code on failure.
+ */
+int auxiliary_device_sysfs_irq_add(struct auxiliary_device *auxdev, int irq)
+{
+ struct auxiliary_irq_info *info __free(kfree) = NULL;
+ struct device *dev = &auxdev->dev;
+ int ret;
+
+ ret = auxiliary_irq_dir_prepare(auxdev);
+ if (ret)
+ return ret;
+
+ info = kzalloc(sizeof(*info), GFP_KERNEL);
+ if (!info)
+ return -ENOMEM;
+
+ sysfs_attr_init(&info->sysfs_attr.attr);
+ snprintf(info->name, AUXILIARY_MAX_IRQ_NAME, "%d", irq);
+
+ ret = xa_insert(&auxdev->sysfs.irqs, irq, info, GFP_KERNEL);
+ if (ret)
+ return ret;
+
+ info->sysfs_attr.attr.name = info->name;
+ ret = sysfs_add_file_to_group(&dev->kobj, &info->sysfs_attr.attr,
+ auxiliary_irqs_group.name);
+ if (ret)
+ goto sysfs_add_err;
+
+ xa_store(&auxdev->sysfs.irqs, irq, no_free_ptr(info), GFP_KERNEL);
+ return 0;
+
+sysfs_add_err:
+ xa_erase(&auxdev->sysfs.irqs, irq);
+ return ret;
+}
+EXPORT_SYMBOL_GPL(auxiliary_device_sysfs_irq_add);
+
+/**
+ * auxiliary_device_sysfs_irq_remove - remove a sysfs entry for the given IRQ
+ * @auxdev: auxiliary bus device to add the sysfs entry.
+ * @irq: the IRQ to remove.
+ *
+ * This function should be called to remove an IRQ sysfs entry.
+ * The driver must invoke this API when IRQ is released by the device.
+ */
+void auxiliary_device_sysfs_irq_remove(struct auxiliary_device *auxdev, int irq)
+{
+ struct auxiliary_irq_info *info __free(kfree) = xa_load(&auxdev->sysfs.irqs, irq);
+ struct device *dev = &auxdev->dev;
+
+ if (!info) {
+ dev_err(&auxdev->dev, "IRQ %d doesn't exist\n", irq);
+ return;
+ }
+ sysfs_remove_file_from_group(&dev->kobj, &info->sysfs_attr.attr,
+ auxiliary_irqs_group.name);
+ xa_erase(&auxdev->sysfs.irqs, irq);
+}
+EXPORT_SYMBOL_GPL(auxiliary_device_sysfs_irq_remove);
diff --git a/drivers/infiniband/hw/mlx5/counters.c b/drivers/infiniband/hw/mlx5/counters.c
index 8300ce622835..4f6c1968a2ee 100644
--- a/drivers/infiniband/hw/mlx5/counters.c
+++ b/drivers/infiniband/hw/mlx5/counters.c
@@ -83,6 +83,8 @@ static const struct mlx5_ib_counter extended_err_cnts[] = {
INIT_Q_COUNTER(resp_remote_access_errors),
INIT_Q_COUNTER(resp_cqe_flush_error),
INIT_Q_COUNTER(req_cqe_flush_error),
+ INIT_Q_COUNTER(req_transport_retries_exceeded),
+ INIT_Q_COUNTER(req_rnr_retries_exceeded),
};
static const struct mlx5_ib_counter roce_accl_cnts[] = {
@@ -102,6 +104,8 @@ static const struct mlx5_ib_counter vport_extended_err_cnts[] = {
INIT_VPORT_Q_COUNTER(resp_remote_access_errors),
INIT_VPORT_Q_COUNTER(resp_cqe_flush_error),
INIT_VPORT_Q_COUNTER(req_cqe_flush_error),
+ INIT_VPORT_Q_COUNTER(req_transport_retries_exceeded),
+ INIT_VPORT_Q_COUNTER(req_rnr_retries_exceeded),
};
static const struct mlx5_ib_counter vport_roce_accl_cnts[] = {
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index 43660c831b22..086de6a022f9 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -1810,7 +1810,7 @@ static int set_ucontext_resp(struct ib_ucontext *uctx,
}
resp->qp_tab_size = 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp);
- if (dev->wc_support)
+ if (mlx5_wc_support_get(dev->mdev))
resp->bf_reg_size = 1 << MLX5_CAP_GEN(dev->mdev,
log_bf_reg_size);
resp->cache_line_size = cache_line_size();
@@ -2337,7 +2337,7 @@ static int mlx5_ib_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vm
switch (command) {
case MLX5_IB_MMAP_WC_PAGE:
case MLX5_IB_MMAP_ALLOC_WC:
- if (!dev->wc_support)
+ if (!mlx5_wc_support_get(dev->mdev))
return -EPERM;
fallthrough;
case MLX5_IB_MMAP_NC_PAGE:
@@ -3612,7 +3612,7 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_UAR_OBJ_ALLOC)(
alloc_type != MLX5_IB_UAPI_UAR_ALLOC_TYPE_NC)
return -EOPNOTSUPP;
- if (!to_mdev(c->ibucontext.device)->wc_support &&
+ if (!mlx5_wc_support_get(to_mdev(c->ibucontext.device)->mdev) &&
alloc_type == MLX5_IB_UAPI_UAR_ALLOC_TYPE_BF)
return -EOPNOTSUPP;
@@ -3766,18 +3766,6 @@ err:
return err;
}
-static int mlx5_ib_enable_driver(struct ib_device *dev)
-{
- struct mlx5_ib_dev *mdev = to_mdev(dev);
- int ret;
-
- ret = mlx5_ib_test_wc(mdev);
- mlx5_ib_dbg(mdev, "Write-Combining %s",
- mdev->wc_support ? "supported" : "not supported");
-
- return ret;
-}
-
static const struct ib_device_ops mlx5_ib_dev_ops = {
.owner = THIS_MODULE,
.driver_id = RDMA_DRIVER_MLX5,
@@ -3808,7 +3796,6 @@ static const struct ib_device_ops mlx5_ib_dev_ops = {
.drain_rq = mlx5_ib_drain_rq,
.drain_sq = mlx5_ib_drain_sq,
.device_group = &mlx5_attr_group,
- .enable_driver = mlx5_ib_enable_driver,
.get_dev_fw_str = get_dev_fw_str,
.get_dma_mr = mlx5_ib_get_dma_mr,
.get_link_layer = mlx5_ib_port_link_layer,
diff --git a/drivers/infiniband/hw/mlx5/mem.c b/drivers/infiniband/hw/mlx5/mem.c
index 5a22be14d958..af321f6ef7f5 100644
--- a/drivers/infiniband/hw/mlx5/mem.c
+++ b/drivers/infiniband/hw/mlx5/mem.c
@@ -30,10 +30,8 @@
* SOFTWARE.
*/
-#include <linux/io.h>
#include <rdma/ib_umem_odp.h>
#include "mlx5_ib.h"
-#include <linux/jiffies.h>
/*
* Fill in a physical address list. ib_umem_num_dma_blocks() entries will be
@@ -95,199 +93,3 @@ unsigned long __mlx5_umem_find_best_quantized_pgoff(
return 0;
return page_size;
}
-
-#define WR_ID_BF 0xBF
-#define WR_ID_END 0xBAD
-#define TEST_WC_NUM_WQES 255
-#define TEST_WC_POLLING_MAX_TIME_JIFFIES msecs_to_jiffies(100)
-static int post_send_nop(struct mlx5_ib_dev *dev, struct ib_qp *ibqp, u64 wr_id,
- bool signaled)
-{
- struct mlx5_ib_qp *qp = to_mqp(ibqp);
- struct mlx5_wqe_ctrl_seg *ctrl;
- struct mlx5_bf *bf = &qp->bf;
- __be32 mmio_wqe[16] = {};
- unsigned long flags;
- unsigned int idx;
-
- if (unlikely(dev->mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR))
- return -EIO;
-
- spin_lock_irqsave(&qp->sq.lock, flags);
-
- idx = qp->sq.cur_post & (qp->sq.wqe_cnt - 1);
- ctrl = mlx5_frag_buf_get_wqe(&qp->sq.fbc, idx);
-
- memset(ctrl, 0, sizeof(struct mlx5_wqe_ctrl_seg));
- ctrl->fm_ce_se = signaled ? MLX5_WQE_CTRL_CQ_UPDATE : 0;
- ctrl->opmod_idx_opcode =
- cpu_to_be32(((u32)(qp->sq.cur_post) << 8) | MLX5_OPCODE_NOP);
- ctrl->qpn_ds = cpu_to_be32((sizeof(struct mlx5_wqe_ctrl_seg) / 16) |
- (qp->trans_qp.base.mqp.qpn << 8));
-
- qp->sq.wrid[idx] = wr_id;
- qp->sq.w_list[idx].opcode = MLX5_OPCODE_NOP;
- qp->sq.wqe_head[idx] = qp->sq.head + 1;
- qp->sq.cur_post += DIV_ROUND_UP(sizeof(struct mlx5_wqe_ctrl_seg),
- MLX5_SEND_WQE_BB);
- qp->sq.w_list[idx].next = qp->sq.cur_post;
- qp->sq.head++;
-
- memcpy(mmio_wqe, ctrl, sizeof(*ctrl));
- ((struct mlx5_wqe_ctrl_seg *)&mmio_wqe)->fm_ce_se |=
- MLX5_WQE_CTRL_CQ_UPDATE;
-
- /* Make sure that descriptors are written before
- * updating doorbell record and ringing the doorbell
- */
- wmb();
-
- qp->db.db[MLX5_SND_DBR] = cpu_to_be32(qp->sq.cur_post);
-
- /* Make sure doorbell record is visible to the HCA before
- * we hit doorbell
- */
- wmb();
- __iowrite64_copy(bf->bfreg->map + bf->offset, mmio_wqe,
- sizeof(mmio_wqe) / 8);
-
- bf->offset ^= bf->buf_size;
-
- spin_unlock_irqrestore(&qp->sq.lock, flags);
-
- return 0;
-}
-
-static int test_wc_poll_cq_result(struct mlx5_ib_dev *dev, struct ib_cq *cq)
-{
- int ret;
- struct ib_wc wc = {};
- unsigned long end = jiffies + TEST_WC_POLLING_MAX_TIME_JIFFIES;
-
- do {
- ret = ib_poll_cq(cq, 1, &wc);
- if (ret < 0 || wc.status)
- return ret < 0 ? ret : -EINVAL;
- if (ret)
- break;
- } while (!time_after(jiffies, end));
-
- if (!ret)
- return -ETIMEDOUT;
-
- if (wc.wr_id != WR_ID_BF)
- ret = 0;
-
- return ret;
-}
-
-static int test_wc_do_send(struct mlx5_ib_dev *dev, struct ib_qp *qp)
-{
- int err, i;
-
- for (i = 0; i < TEST_WC_NUM_WQES; i++) {
- err = post_send_nop(dev, qp, WR_ID_BF, false);
- if (err)
- return err;
- }
-
- return post_send_nop(dev, qp, WR_ID_END, true);
-}
-
-int mlx5_ib_test_wc(struct mlx5_ib_dev *dev)
-{
- struct ib_cq_init_attr cq_attr = { .cqe = TEST_WC_NUM_WQES + 1 };
- int port_type_cap = MLX5_CAP_GEN(dev->mdev, port_type);
- struct ib_qp_init_attr qp_init_attr = {
- .cap = { .max_send_wr = TEST_WC_NUM_WQES },
- .qp_type = IB_QPT_UD,
- .sq_sig_type = IB_SIGNAL_REQ_WR,
- .create_flags = MLX5_IB_QP_CREATE_WC_TEST,
- };
- struct ib_qp_attr qp_attr = { .port_num = 1 };
- struct ib_device *ibdev = &dev->ib_dev;
- struct ib_qp *qp;
- struct ib_cq *cq;
- struct ib_pd *pd;
- int ret;
-
- if (!MLX5_CAP_GEN(dev->mdev, bf))
- return 0;
-
- if (!dev->mdev->roce.roce_en &&
- port_type_cap == MLX5_CAP_PORT_TYPE_ETH) {
- if (mlx5_core_is_pf(dev->mdev))
- dev->wc_support = arch_can_pci_mmap_wc();
- return 0;
- }
-
- ret = mlx5_alloc_bfreg(dev->mdev, &dev->wc_bfreg, true, false);
- if (ret)
- goto print_err;
-
- if (!dev->wc_bfreg.wc)
- goto out1;
-
- pd = ib_alloc_pd(ibdev, 0);
- if (IS_ERR(pd)) {
- ret = PTR_ERR(pd);
- goto out1;
- }
-
- cq = ib_create_cq(ibdev, NULL, NULL, NULL, &cq_attr);
- if (IS_ERR(cq)) {
- ret = PTR_ERR(cq);
- goto out2;
- }
-
- qp_init_attr.recv_cq = cq;
- qp_init_attr.send_cq = cq;
- qp = ib_create_qp(pd, &qp_init_attr);
- if (IS_ERR(qp)) {
- ret = PTR_ERR(qp);
- goto out3;
- }
-
- qp_attr.qp_state = IB_QPS_INIT;
- ret = ib_modify_qp(qp, &qp_attr,
- IB_QP_STATE | IB_QP_PORT | IB_QP_PKEY_INDEX |
- IB_QP_QKEY);
- if (ret)
- goto out4;
-
- qp_attr.qp_state = IB_QPS_RTR;
- ret = ib_modify_qp(qp, &qp_attr, IB_QP_STATE);
- if (ret)
- goto out4;
-
- qp_attr.qp_state = IB_QPS_RTS;
- ret = ib_modify_qp(qp, &qp_attr, IB_QP_STATE | IB_QP_SQ_PSN);
- if (ret)
- goto out4;
-
- ret = test_wc_do_send(dev, qp);
- if (ret < 0)
- goto out4;
-
- ret = test_wc_poll_cq_result(dev, cq);
- if (ret > 0) {
- dev->wc_support = true;
- ret = 0;
- }
-
-out4:
- ib_destroy_qp(qp);
-out3:
- ib_destroy_cq(cq);
-out2:
- ib_dealloc_pd(pd);
-out1:
- mlx5_free_bfreg(dev->mdev, &dev->wc_bfreg);
-print_err:
- if (ret)
- mlx5_ib_err(
- dev,
- "Error %d while trying to test write-combining support\n",
- ret);
- return ret;
-}
diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index f255a12e26a0..b68779e9d86c 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -341,7 +341,6 @@ struct mlx5_ib_flow_db {
* rely on the range reserved for that use in the ib_qp_create_flags enum.
*/
#define MLX5_IB_QP_CREATE_SQPN_QP1 IB_QP_CREATE_RESERVED_START
-#define MLX5_IB_QP_CREATE_WC_TEST (IB_QP_CREATE_RESERVED_START << 1)
struct wr_list {
u16 opcode;
@@ -1123,7 +1122,6 @@ struct mlx5_ib_dev {
u8 ib_active:1;
u8 is_rep:1;
u8 lag_active:1;
- u8 wc_support:1;
u8 fill_delay;
struct umr_common umrc;
/* sync used page count stats
@@ -1149,7 +1147,6 @@ struct mlx5_ib_dev {
/* Array with num_ports elements */
struct mlx5_ib_port *port;
struct mlx5_sq_bfreg bfreg;
- struct mlx5_sq_bfreg wc_bfreg;
struct mlx5_sq_bfreg fp_bfreg;
struct mlx5_ib_delay_drop delay_drop;
const struct mlx5_ib_profile *profile;
diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c
index e2164f813607..e8c0fead4062 100644
--- a/drivers/infiniband/hw/mlx5/qp.c
+++ b/drivers/infiniband/hw/mlx5/qp.c
@@ -1107,8 +1107,6 @@ static int _create_kernel_qp(struct mlx5_ib_dev *dev,
if (init_attr->qp_type == MLX5_IB_QPT_REG_UMR)
qp->bf.bfreg = &dev->fp_bfreg;
- else if (qp->flags & MLX5_IB_QP_CREATE_WC_TEST)
- qp->bf.bfreg = &dev->wc_bfreg;
else
qp->bf.bfreg = &dev->bfreg;
@@ -2959,14 +2957,6 @@ static void process_create_flag(struct mlx5_ib_dev *dev, int *flags, int flag,
return;
}
- if (flag == MLX5_IB_QP_CREATE_WC_TEST) {
- /*
- * Special case, if condition didn't meet, it won't be error,
- * just different in-kernel flow.
- */
- *flags &= ~MLX5_IB_QP_CREATE_WC_TEST;
- return;
- }
mlx5_ib_dbg(dev, "Verbs create QP flag 0x%X is not supported\n", flag);
}
@@ -3027,8 +3017,6 @@ static int process_create_flags(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
IB_QP_CREATE_PCI_WRITE_END_PADDING,
MLX5_CAP_GEN(mdev, end_pad), qp);
- process_create_flag(dev, &create_flags, MLX5_IB_QP_CREATE_WC_TEST,
- qp_type != MLX5_IB_QPT_REG_UMR, qp);
process_create_flag(dev, &create_flags, MLX5_IB_QP_CREATE_SQPN_QP1,
true, qp);
@@ -4609,10 +4597,6 @@ static bool mlx5_ib_modify_qp_allowed(struct mlx5_ib_dev *dev,
if (qp->type == IB_QPT_RAW_PACKET || qp->type == MLX5_IB_QPT_REG_UMR)
return true;
- /* Internal QP used for wc testing, with NOPs in wq */
- if (qp->flags & MLX5_IB_QP_CREATE_WC_TEST)
- return true;
-
return false;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
index 76dc5a9b9648..1289475e7be7 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile
+++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
@@ -17,7 +17,7 @@ mlx5_core-y := main.o cmd.o debugfs.o fw.o eq.o uar.o pagealloc.o \
fs_counters.o fs_ft_pool.o rl.o lag/debugfs.o lag/lag.o dev.o events.o wq.o lib/gid.o \
lib/devcom.o lib/pci_vsc.o lib/dm.o lib/fs_ttc.o diag/fs_tracepoint.o \
diag/fw_tracer.o diag/crdump.o devlink.o diag/rsc_dump.o diag/reporter_vnic.o \
- fw_reset.o qos.o lib/tout.o lib/aso.o
+ fw_reset.o qos.o lib/tout.o lib/aso.o wc.o
#
# Netdev basic
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
index 4326aa42bf2d..cb7e7e4104af 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
@@ -714,7 +714,7 @@ err2:
err1:
mlx5_cmd_allowed_opcode(dev, CMD_ALLOWED_OPCODE_ALL);
mlx5_eq_notifier_unregister(dev, &table->cq_err_nb);
- mlx5_ctrl_irq_release(table->ctrl_irq);
+ mlx5_ctrl_irq_release(dev, table->ctrl_irq);
return err;
}
@@ -730,7 +730,7 @@ static void destroy_async_eqs(struct mlx5_core_dev *dev)
cleanup_async_eq(dev, &table->cmd_eq, "cmd");
mlx5_cmd_allowed_opcode(dev, CMD_ALLOWED_OPCODE_ALL);
mlx5_eq_notifier_unregister(dev, &table->cq_err_nb);
- mlx5_ctrl_irq_release(table->ctrl_irq);
+ mlx5_ctrl_irq_release(dev, table->ctrl_irq);
}
struct mlx5_eq *mlx5_get_async_eq(struct mlx5_core_dev *dev)
@@ -918,7 +918,7 @@ static int comp_irq_request_sf(struct mlx5_core_dev *dev, u16 vecidx)
af_desc.is_managed = 1;
cpumask_copy(&af_desc.mask, cpu_online_mask);
cpumask_andnot(&af_desc.mask, &af_desc.mask, &table->used_cpus);
- irq = mlx5_irq_affinity_request(pool, &af_desc);
+ irq = mlx5_irq_affinity_request(dev, pool, &af_desc);
if (IS_ERR(irq))
return PTR_ERR(irq);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/irq_affinity.c b/drivers/net/ethernet/mellanox/mlx5/core/irq_affinity.c
index 612e666ec263..f7b01b3f0cba 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/irq_affinity.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/irq_affinity.c
@@ -112,15 +112,18 @@ irq_pool_find_least_loaded(struct mlx5_irq_pool *pool, const struct cpumask *req
/**
* mlx5_irq_affinity_request - request an IRQ according to the given mask.
+ * @dev: mlx5 core device which is requesting the IRQ.
* @pool: IRQ pool to request from.
* @af_desc: affinity descriptor for this IRQ.
*
* This function returns a pointer to IRQ, or ERR_PTR in case of error.
*/
struct mlx5_irq *
-mlx5_irq_affinity_request(struct mlx5_irq_pool *pool, struct irq_affinity_desc *af_desc)
+mlx5_irq_affinity_request(struct mlx5_core_dev *dev, struct mlx5_irq_pool *pool,
+ struct irq_affinity_desc *af_desc)
{
struct mlx5_irq *least_loaded_irq, *new_irq;
+ int ret;
mutex_lock(&pool->lock);
least_loaded_irq = irq_pool_find_least_loaded(pool, &af_desc->mask);
@@ -153,6 +156,16 @@ out:
mlx5_irq_read_locked(least_loaded_irq) / MLX5_EQ_REFS_PER_IRQ);
unlock:
mutex_unlock(&pool->lock);
+ if (mlx5_irq_pool_is_sf_pool(pool)) {
+ ret = auxiliary_device_sysfs_irq_add(mlx5_sf_coredev_to_adev(dev),
+ mlx5_irq_get_irq(least_loaded_irq));
+ if (ret) {
+ mlx5_core_err(dev, "Failed to create sysfs entry for irq %d, ret = %d\n",
+ mlx5_irq_get_irq(least_loaded_irq), ret);
+ mlx5_irq_put(least_loaded_irq);
+ least_loaded_irq = ERR_PTR(ret);
+ }
+ }
return least_loaded_irq;
}
@@ -164,6 +177,9 @@ void mlx5_irq_affinity_irq_release(struct mlx5_core_dev *dev, struct mlx5_irq *i
cpu = cpumask_first(mlx5_irq_get_affinity_mask(irq));
synchronize_irq(pci_irq_vector(pool->dev->pdev,
mlx5_irq_get_index(irq)));
+ if (mlx5_irq_pool_is_sf_pool(pool))
+ auxiliary_device_sysfs_irq_remove(mlx5_sf_coredev_to_adev(dev),
+ mlx5_irq_get_irq(irq));
if (mlx5_irq_put(irq))
if (pool->irqs_per_cpu)
cpu_put(pool, cpu);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index 459a836a5d9c..527da58c7953 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -1819,6 +1819,7 @@ int mlx5_mdev_init(struct mlx5_core_dev *dev, int profile_idx)
mutex_init(&dev->intf_state_mutex);
lockdep_set_class(&dev->intf_state_mutex, &dev->lock_key);
mutex_init(&dev->mlx5e_res.uplink_netdev_lock);
+ mutex_init(&dev->wc_state_lock);
mutex_init(&priv->bfregs.reg_head.lock);
mutex_init(&priv->bfregs.wc_head.lock);
@@ -1916,6 +1917,7 @@ void mlx5_mdev_uninit(struct mlx5_core_dev *dev)
mutex_destroy(&priv->alloc_mutex);
mutex_destroy(&priv->bfregs.wc_head.lock);
mutex_destroy(&priv->bfregs.reg_head.lock);
+ mutex_destroy(&dev->wc_state_lock);
mutex_destroy(&dev->mlx5e_res.uplink_netdev_lock);
mutex_destroy(&dev->intf_state_mutex);
lockdep_unregister_key(&dev->lock_key);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
index a7fd18888b6e..62c770b0eaa8 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
@@ -320,6 +320,12 @@ static inline bool mlx5_core_is_sf(const struct mlx5_core_dev *dev)
return dev->coredev_type == MLX5_COREDEV_SF;
}
+static inline struct auxiliary_device *
+mlx5_sf_coredev_to_adev(struct mlx5_core_dev *mdev)
+{
+ return container_of(mdev->device, struct auxiliary_device, dev);
+}
+
int mlx5_mdev_init(struct mlx5_core_dev *dev, int profile_idx);
void mlx5_mdev_uninit(struct mlx5_core_dev *dev);
int mlx5_init_one(struct mlx5_core_dev *dev);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_irq.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_irq.h
index 1088114e905d..0881e961d8b1 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_irq.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_irq.h
@@ -25,7 +25,7 @@ int mlx5_set_msix_vec_count(struct mlx5_core_dev *dev, int devfn,
int mlx5_get_default_msix_vec_count(struct mlx5_core_dev *dev, int num_vfs);
struct mlx5_irq *mlx5_ctrl_irq_request(struct mlx5_core_dev *dev);
-void mlx5_ctrl_irq_release(struct mlx5_irq *ctrl_irq);
+void mlx5_ctrl_irq_release(struct mlx5_core_dev *dev, struct mlx5_irq *ctrl_irq);
struct mlx5_irq *mlx5_irq_request(struct mlx5_core_dev *dev, u16 vecidx,
struct irq_affinity_desc *af_desc,
struct cpu_rmap **rmap);
@@ -36,13 +36,15 @@ int mlx5_irq_attach_nb(struct mlx5_irq *irq, struct notifier_block *nb);
int mlx5_irq_detach_nb(struct mlx5_irq *irq, struct notifier_block *nb);
struct cpumask *mlx5_irq_get_affinity_mask(struct mlx5_irq *irq);
int mlx5_irq_get_index(struct mlx5_irq *irq);
+int mlx5_irq_get_irq(const struct mlx5_irq *irq);
struct mlx5_irq_pool;
#ifdef CONFIG_MLX5_SF
struct mlx5_irq *mlx5_irq_affinity_irq_request_auto(struct mlx5_core_dev *dev,
struct cpumask *used_cpus, u16 vecidx);
-struct mlx5_irq *mlx5_irq_affinity_request(struct mlx5_irq_pool *pool,
- struct irq_affinity_desc *af_desc);
+struct mlx5_irq *
+mlx5_irq_affinity_request(struct mlx5_core_dev *dev, struct mlx5_irq_pool *pool,
+ struct irq_affinity_desc *af_desc);
void mlx5_irq_affinity_irq_release(struct mlx5_core_dev *dev, struct mlx5_irq *irq);
#else
static inline
@@ -53,7 +55,8 @@ struct mlx5_irq *mlx5_irq_affinity_irq_request_auto(struct mlx5_core_dev *dev,
}
static inline struct mlx5_irq *
-mlx5_irq_affinity_request(struct mlx5_irq_pool *pool, struct irq_affinity_desc *af_desc)
+mlx5_irq_affinity_request(struct mlx5_core_dev *dev, struct mlx5_irq_pool *pool,
+ struct irq_affinity_desc *af_desc)
{
return ERR_PTR(-EOPNOTSUPP);
}
@@ -61,6 +64,7 @@ mlx5_irq_affinity_request(struct mlx5_irq_pool *pool, struct irq_affinity_desc *
static inline
void mlx5_irq_affinity_irq_release(struct mlx5_core_dev *dev, struct mlx5_irq *irq)
{
+ mlx5_irq_release_vector(irq);
}
#endif
#endif /* __MLX5_IRQ_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c
index 86208b86eea8..81a9232a03e1 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c
@@ -368,6 +368,11 @@ struct cpumask *mlx5_irq_get_affinity_mask(struct mlx5_irq *irq)
return irq->mask;
}
+int mlx5_irq_get_irq(const struct mlx5_irq *irq)
+{
+ return irq->map.virq;
+}
+
int mlx5_irq_get_index(struct mlx5_irq *irq)
{
return irq->map.index;
@@ -441,11 +446,12 @@ static void _mlx5_irq_release(struct mlx5_irq *irq)
/**
* mlx5_ctrl_irq_release - release a ctrl IRQ back to the system.
+ * @dev: mlx5 device that releasing the IRQ.
* @ctrl_irq: ctrl IRQ to be released.
*/
-void mlx5_ctrl_irq_release(struct mlx5_irq *ctrl_irq)
+void mlx5_ctrl_irq_release(struct mlx5_core_dev *dev, struct mlx5_irq *ctrl_irq)
{
- _mlx5_irq_release(ctrl_irq);
+ mlx5_irq_affinity_irq_release(dev, ctrl_irq);
}
/**
@@ -474,7 +480,7 @@ struct mlx5_irq *mlx5_ctrl_irq_request(struct mlx5_core_dev *dev)
/* Allocate the IRQ in index 0. The vector was already allocated */
irq = irq_pool_request_vector(pool, 0, &af_desc, NULL);
} else {
- irq = mlx5_irq_affinity_request(pool, &af_desc);
+ irq = mlx5_irq_affinity_request(dev, pool, &af_desc);
}
return irq;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/wc.c b/drivers/net/ethernet/mellanox/mlx5/core/wc.c
new file mode 100644
index 000000000000..1bed75eca97d
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/wc.c
@@ -0,0 +1,434 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+// Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+
+#include <linux/io.h>
+#include <linux/mlx5/transobj.h>
+#include "lib/clock.h"
+#include "mlx5_core.h"
+#include "wq.h"
+
+#define TEST_WC_NUM_WQES 255
+#define TEST_WC_LOG_CQ_SZ (order_base_2(TEST_WC_NUM_WQES))
+#define TEST_WC_SQ_LOG_WQ_SZ TEST_WC_LOG_CQ_SZ
+#define TEST_WC_POLLING_MAX_TIME_JIFFIES msecs_to_jiffies(100)
+
+struct mlx5_wc_cq {
+ /* data path - accessed per cqe */
+ struct mlx5_cqwq wq;
+
+ /* data path - accessed per napi poll */
+ struct mlx5_core_cq mcq;
+
+ /* control */
+ struct mlx5_core_dev *mdev;
+ struct mlx5_wq_ctrl wq_ctrl;
+};
+
+struct mlx5_wc_sq {
+ /* data path */
+ u16 cc;
+ u16 pc;
+
+ /* read only */
+ struct mlx5_wq_cyc wq;
+ u32 sqn;
+
+ /* control path */
+ struct mlx5_wq_ctrl wq_ctrl;
+
+ struct mlx5_wc_cq cq;
+ struct mlx5_sq_bfreg bfreg;
+};
+
+static int mlx5_wc_create_cqwq(struct mlx5_core_dev *mdev, void *cqc,
+ struct mlx5_wc_cq *cq)
+{
+ struct mlx5_core_cq *mcq = &cq->mcq;
+ struct mlx5_wq_param param = {};
+ int err;
+ u32 i;
+
+ err = mlx5_cqwq_create(mdev, &param, cqc, &cq->wq, &cq->wq_ctrl);
+ if (err)
+ return err;
+
+ mcq->cqe_sz = 64;
+ mcq->set_ci_db = cq->wq_ctrl.db.db;
+ mcq->arm_db = cq->wq_ctrl.db.db + 1;
+
+ for (i = 0; i < mlx5_cqwq_get_size(&cq->wq); i++) {
+ struct mlx5_cqe64 *cqe = mlx5_cqwq_get_wqe(&cq->wq, i);
+
+ cqe->op_own = 0xf1;
+ }
+
+ cq->mdev = mdev;
+
+ return 0;
+}
+
+static int create_wc_cq(struct mlx5_wc_cq *cq, void *cqc_data)
+{
+ u32 out[MLX5_ST_SZ_DW(create_cq_out)];
+ struct mlx5_core_dev *mdev = cq->mdev;
+ struct mlx5_core_cq *mcq = &cq->mcq;
+ int err, inlen, eqn;
+ void *in, *cqc;
+
+ err = mlx5_comp_eqn_get(mdev, 0, &eqn);
+ if (err)
+ return err;
+
+ inlen = MLX5_ST_SZ_BYTES(create_cq_in) +
+ sizeof(u64) * cq->wq_ctrl.buf.npages;
+ in = kvzalloc(inlen, GFP_KERNEL);
+ if (!in)
+ return -ENOMEM;
+
+ cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context);
+
+ memcpy(cqc, cqc_data, MLX5_ST_SZ_BYTES(cqc));
+
+ mlx5_fill_page_frag_array(&cq->wq_ctrl.buf,
+ (__be64 *)MLX5_ADDR_OF(create_cq_in, in, pas));
+
+ MLX5_SET(cqc, cqc, cq_period_mode, MLX5_CQ_PERIOD_MODE_START_FROM_EQE);
+ MLX5_SET(cqc, cqc, c_eqn_or_apu_element, eqn);
+ MLX5_SET(cqc, cqc, uar_page, mdev->priv.uar->index);
+ MLX5_SET(cqc, cqc, log_page_size, cq->wq_ctrl.buf.page_shift -
+ MLX5_ADAPTER_PAGE_SHIFT);
+ MLX5_SET64(cqc, cqc, dbr_addr, cq->wq_ctrl.db.dma);
+
+ err = mlx5_core_create_cq(mdev, mcq, in, inlen, out, sizeof(out));
+
+ kvfree(in);
+
+ return err;
+}
+
+static int mlx5_wc_create_cq(struct mlx5_core_dev *mdev, struct mlx5_wc_cq *cq)
+{
+ void *cqc;
+ int err;
+
+ cqc = kvzalloc(MLX5_ST_SZ_BYTES(cqc), GFP_KERNEL);
+ if (!cqc)
+ return -ENOMEM;
+
+ MLX5_SET(cqc, cqc, log_cq_size, TEST_WC_LOG_CQ_SZ);
+ MLX5_SET(cqc, cqc, uar_page, mdev->priv.uar->index);
+ if (MLX5_CAP_GEN(mdev, cqe_128_always) && cache_line_size() >= 128)
+ MLX5_SET(cqc, cqc, cqe_sz, CQE_STRIDE_128_PAD);
+
+ err = mlx5_wc_create_cqwq(mdev, cqc, cq);
+ if (err) {
+ mlx5_core_err(mdev, "Failed to create wc cq wq, err=%d\n", err);
+ goto err_create_cqwq;
+ }
+
+ err = create_wc_cq(cq, cqc);
+ if (err) {
+ mlx5_core_err(mdev, "Failed to create wc cq, err=%d\n", err);
+ goto err_create_cq;
+ }
+
+ kvfree(cqc);
+ return 0;
+
+err_create_cq:
+ mlx5_wq_destroy(&cq->wq_ctrl);
+err_create_cqwq:
+ kvfree(cqc);
+ return err;
+}
+
+static void mlx5_wc_destroy_cq(struct mlx5_wc_cq *cq)
+{
+ mlx5_core_destroy_cq(cq->mdev, &cq->mcq);
+ mlx5_wq_destroy(&cq->wq_ctrl);
+}
+
+static int create_wc_sq(struct mlx5_core_dev *mdev, void *sqc_data,
+ struct mlx5_wc_sq *sq)
+{
+ void *in, *sqc, *wq;
+ int inlen, err;
+ u8 ts_format;
+
+ inlen = MLX5_ST_SZ_BYTES(create_sq_in) +
+ sizeof(u64) * sq->wq_ctrl.buf.npages;
+ in = kvzalloc(inlen, GFP_KERNEL);
+ if (!in)
+ return -ENOMEM;
+
+ sqc = MLX5_ADDR_OF(create_sq_in, in, ctx);
+ wq = MLX5_ADDR_OF(sqc, sqc, wq);
+
+ memcpy(sqc, sqc_data, MLX5_ST_SZ_BYTES(sqc));
+ MLX5_SET(sqc, sqc, cqn, sq->cq.mcq.cqn);
+
+ MLX5_SET(sqc, sqc, state, MLX5_SQC_STATE_RST);
+ MLX5_SET(sqc, sqc, flush_in_error_en, 1);
+
+ ts_format = mlx5_is_real_time_sq(mdev) ?
+ MLX5_TIMESTAMP_FORMAT_REAL_TIME :
+ MLX5_TIMESTAMP_FORMAT_FREE_RUNNING;
+ MLX5_SET(sqc, sqc, ts_format, ts_format);
+
+ MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_CYCLIC);
+ MLX5_SET(wq, wq, uar_page, sq->bfreg.index);
+ MLX5_SET(wq, wq, log_wq_pg_sz, sq->wq_ctrl.buf.page_shift -
+ MLX5_ADAPTER_PAGE_SHIFT);
+ MLX5_SET64(wq, wq, dbr_addr, sq->wq_ctrl.db.dma);
+
+ mlx5_fill_page_frag_array(&sq->wq_ctrl.buf,
+ (__be64 *)MLX5_ADDR_OF(wq, wq, pas));
+
+ err = mlx5_core_create_sq(mdev, in, inlen, &sq->sqn);
+ if (err) {
+ mlx5_core_err(mdev, "Failed to create wc sq, err=%d\n", err);
+ goto err_create_sq;
+ }
+
+ memset(in, 0, MLX5_ST_SZ_BYTES(modify_sq_in));
+ MLX5_SET(modify_sq_in, in, sq_state, MLX5_SQC_STATE_RST);
+ sqc = MLX5_ADDR_OF(modify_sq_in, in, ctx);
+ MLX5_SET(sqc, sqc, state, MLX5_SQC_STATE_RDY);
+
+ err = mlx5_core_modify_sq(mdev, sq->sqn, in);
+ if (err) {
+ mlx5_core_err(mdev, "Failed to set wc sq(sqn=0x%x) ready, err=%d\n",
+ sq->sqn, err);
+ goto err_modify_sq;
+ }
+
+ kvfree(in);
+ return 0;
+
+err_modify_sq:
+ mlx5_core_destroy_sq(mdev, sq->sqn);
+err_create_sq:
+ kvfree(in);
+ return err;
+}
+
+static int mlx5_wc_create_sq(struct mlx5_core_dev *mdev, struct mlx5_wc_sq *sq)
+{
+ struct mlx5_wq_param param = {};
+ void *sqc_data, *wq;
+ int err;
+
+ sqc_data = kvzalloc(MLX5_ST_SZ_BYTES(sqc), GFP_KERNEL);
+ if (!sqc_data)
+ return -ENOMEM;
+
+ wq = MLX5_ADDR_OF(sqc, sqc_data, wq);
+ MLX5_SET(wq, wq, log_wq_stride, ilog2(MLX5_SEND_WQE_BB));
+ MLX5_SET(wq, wq, pd, mdev->mlx5e_res.hw_objs.pdn);
+ MLX5_SET(wq, wq, log_wq_sz, TEST_WC_SQ_LOG_WQ_SZ);
+
+ err = mlx5_wq_cyc_create(mdev, &param, wq, &sq->wq, &sq->wq_ctrl);
+ if (err) {
+ mlx5_core_err(mdev, "Failed to create wc sq wq, err=%d\n", err);
+ goto err_create_wq_cyc;
+ }
+
+ err = create_wc_sq(mdev, sqc_data, sq);
+ if (err)
+ goto err_create_sq;
+
+ mlx5_core_dbg(mdev, "wc sq->sqn = 0x%x created\n", sq->sqn);
+
+ kvfree(sqc_data);
+ return 0;
+
+err_create_sq:
+ mlx5_wq_destroy(&sq->wq_ctrl);
+err_create_wq_cyc:
+ kvfree(sqc_data);
+ return err;
+}
+
+static void mlx5_wc_destroy_sq(struct mlx5_wc_sq *sq)
+{
+ mlx5_core_destroy_sq(sq->cq.mdev, sq->sqn);
+ mlx5_wq_destroy(&sq->wq_ctrl);
+}
+
+static void mlx5_wc_post_nop(struct mlx5_wc_sq *sq, bool signaled)
+{
+ int buf_size = (1 << MLX5_CAP_GEN(sq->cq.mdev, log_bf_reg_size)) / 2;
+ struct mlx5_wqe_ctrl_seg *ctrl;
+ __be32 mmio_wqe[16] = {};
+ u16 pi;
+
+ pi = mlx5_wq_cyc_ctr2ix(&sq->wq, sq->pc);
+ ctrl = mlx5_wq_cyc_get_wqe(&sq->wq, pi);
+ memset(ctrl, 0, sizeof(*ctrl));
+ ctrl->opmod_idx_opcode =
+ cpu_to_be32((sq->pc << MLX5_WQE_CTRL_WQE_INDEX_SHIFT) | MLX5_OPCODE_NOP);
+ ctrl->qpn_ds =
+ cpu_to_be32((sq->sqn << MLX5_WQE_CTRL_QPN_SHIFT) |
+ DIV_ROUND_UP(sizeof(struct mlx5_wqe_ctrl_seg), MLX5_SEND_WQE_DS));
+ if (signaled)
+ ctrl->fm_ce_se |= MLX5_WQE_CTRL_CQ_UPDATE;
+
+ memcpy(mmio_wqe, ctrl, sizeof(*ctrl));
+ ((struct mlx5_wqe_ctrl_seg *)&mmio_wqe)->fm_ce_se |=
+ MLX5_WQE_CTRL_CQ_UPDATE;
+
+ /* ensure wqe is visible to device before updating doorbell record */
+ dma_wmb();
+
+ sq->pc++;
+ sq->wq.db[MLX5_SND_DBR] = cpu_to_be32(sq->pc);
+
+ /* ensure doorbell record is visible to device before ringing the
+ * doorbell
+ */
+ wmb();
+
+ __iowrite64_copy(sq->bfreg.map + sq->bfreg.offset, mmio_wqe,
+ sizeof(mmio_wqe) / 8);
+
+ sq->bfreg.offset ^= buf_size;
+}
+
+static int mlx5_wc_poll_cq(struct mlx5_wc_sq *sq)
+{
+ struct mlx5_wc_cq *cq = &sq->cq;
+ struct mlx5_cqe64 *cqe;
+
+ cqe = mlx5_cqwq_get_cqe(&cq->wq);
+ if (!cqe)
+ return -ETIMEDOUT;
+
+ /* sq->cc must be updated only after mlx5_cqwq_update_db_record(),
+ * otherwise a cq overrun may occur
+ */
+ mlx5_cqwq_pop(&cq->wq);
+
+ if (get_cqe_opcode(cqe) == MLX5_CQE_REQ) {
+ int wqe_counter = be16_to_cpu(cqe->wqe_counter);
+ struct mlx5_core_dev *mdev = cq->mdev;
+
+ if (wqe_counter == TEST_WC_NUM_WQES - 1)
+ mdev->wc_state = MLX5_WC_STATE_UNSUPPORTED;
+ else
+ mdev->wc_state = MLX5_WC_STATE_SUPPORTED;
+
+ mlx5_core_dbg(mdev, "wc wqe_counter = 0x%x\n", wqe_counter);
+ }
+
+ mlx5_cqwq_update_db_record(&cq->wq);
+
+ /* ensure cq space is freed before enabling more cqes */
+ wmb();
+
+ sq->cc++;
+
+ return 0;
+}
+
+static void mlx5_core_test_wc(struct mlx5_core_dev *mdev)
+{
+ unsigned long expires;
+ struct mlx5_wc_sq *sq;
+ int i, err;
+
+ if (mdev->wc_state != MLX5_WC_STATE_UNINITIALIZED)
+ return;
+
+ sq = kzalloc(sizeof(*sq), GFP_KERNEL);
+ if (!sq)
+ return;
+
+ err = mlx5_alloc_bfreg(mdev, &sq->bfreg, true, false);
+ if (err) {
+ mlx5_core_err(mdev, "Failed to alloc bfreg for wc, err=%d\n", err);
+ goto err_alloc_bfreg;
+ }
+
+ err = mlx5_wc_create_cq(mdev, &sq->cq);
+ if (err)
+ goto err_create_cq;
+
+ err = mlx5_wc_create_sq(mdev, sq);
+ if (err)
+ goto err_create_sq;
+
+ for (i = 0; i < TEST_WC_NUM_WQES - 1; i++)
+ mlx5_wc_post_nop(sq, false);
+
+ mlx5_wc_post_nop(sq, true);
+
+ expires = jiffies + TEST_WC_POLLING_MAX_TIME_JIFFIES;
+ do {
+ err = mlx5_wc_poll_cq(sq);
+ if (err)
+ usleep_range(2, 10);
+ } while (mdev->wc_state == MLX5_WC_STATE_UNINITIALIZED &&
+ time_is_after_jiffies(expires));
+
+ mlx5_wc_destroy_sq(sq);
+
+err_create_sq:
+ mlx5_wc_destroy_cq(&sq->cq);
+err_create_cq:
+ mlx5_free_bfreg(mdev, &sq->bfreg);
+err_alloc_bfreg:
+ kfree(sq);
+}
+
+bool mlx5_wc_support_get(struct mlx5_core_dev *mdev)
+{
+ struct mlx5_core_dev *parent = NULL;
+
+ if (!MLX5_CAP_GEN(mdev, bf)) {
+ mlx5_core_dbg(mdev, "BlueFlame not supported\n");
+ goto out;
+ }
+
+ if (!MLX5_CAP_GEN(mdev, log_max_sq)) {
+ mlx5_core_dbg(mdev, "SQ not supported\n");
+ goto out;
+ }
+
+ if (mdev->wc_state != MLX5_WC_STATE_UNINITIALIZED)
+ /* No need to lock anything as we perform WC test only
+ * once for whole device and was already done.
+ */
+ goto out;
+
+ mutex_lock(&mdev->wc_state_lock);
+
+ if (mdev->wc_state != MLX5_WC_STATE_UNINITIALIZED)
+ goto unlock;
+
+#ifdef CONFIG_MLX5_SF
+ if (mlx5_core_is_sf(mdev))
+ parent = mdev->priv.parent_mdev;
+#endif
+
+ if (parent) {
+ mutex_lock(&parent->wc_state_lock);
+
+ mlx5_core_test_wc(parent);
+
+ mlx5_core_dbg(mdev, "parent set wc_state=%d\n",
+ parent->wc_state);
+ mdev->wc_state = parent->wc_state;
+
+ mutex_unlock(&parent->wc_state_lock);
+ }
+
+ mlx5_core_test_wc(mdev);
+
+unlock:
+ mutex_unlock(&mdev->wc_state_lock);
+out:
+ mlx5_core_dbg(mdev, "wc_state=%d\n", mdev->wc_state);
+
+ return mdev->wc_state == MLX5_WC_STATE_SUPPORTED;
+}
+EXPORT_SYMBOL(mlx5_wc_support_get);
diff --git a/include/linux/auxiliary_bus.h b/include/linux/auxiliary_bus.h
index de21d9d24a95..3ba4487c9cd9 100644
--- a/include/linux/auxiliary_bus.h
+++ b/include/linux/auxiliary_bus.h
@@ -58,6 +58,9 @@
* in
* @name: Match name found by the auxiliary device driver,
* @id: unique identitier if multiple devices of the same name are exported,
+ * @irqs: irqs xarray contains irq indices which are used by the device,
+ * @lock: Synchronize irq sysfs creation,
+ * @irq_dir_exists: whether "irqs" directory exists,
*
* An auxiliary_device represents a part of its parent device's functionality.
* It is given a name that, combined with the registering drivers
@@ -139,6 +142,11 @@ struct auxiliary_device {
struct device dev;
const char *name;
u32 id;
+ struct {
+ struct xarray irqs;
+ struct mutex lock; /* Synchronize irq sysfs creation */
+ bool irq_dir_exists;
+ } sysfs;
};
/**
@@ -212,8 +220,24 @@ int auxiliary_device_init(struct auxiliary_device *auxdev);
int __auxiliary_device_add(struct auxiliary_device *auxdev, const char *modname);
#define auxiliary_device_add(auxdev) __auxiliary_device_add(auxdev, KBUILD_MODNAME)
+#ifdef CONFIG_SYSFS
+int auxiliary_device_sysfs_irq_add(struct auxiliary_device *auxdev, int irq);
+void auxiliary_device_sysfs_irq_remove(struct auxiliary_device *auxdev,
+ int irq);
+#else /* CONFIG_SYSFS */
+static inline int
+auxiliary_device_sysfs_irq_add(struct auxiliary_device *auxdev, int irq)
+{
+ return 0;
+}
+
+static inline void
+auxiliary_device_sysfs_irq_remove(struct auxiliary_device *auxdev, int irq) {}
+#endif
+
static inline void auxiliary_device_uninit(struct auxiliary_device *auxdev)
{
+ mutex_destroy(&auxdev->sysfs.lock);
put_device(&auxdev->dev);
}
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 779cfdf2e9d6..0d31f77396fc 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -766,6 +766,12 @@ struct mlx5_hca_cap {
u32 max[MLX5_UN_SZ_DW(hca_cap_union)];
};
+enum mlx5_wc_state {
+ MLX5_WC_STATE_UNINITIALIZED,
+ MLX5_WC_STATE_UNSUPPORTED,
+ MLX5_WC_STATE_SUPPORTED,
+};
+
struct mlx5_core_dev {
struct device *device;
enum mlx5_coredev_type coredev_type;
@@ -824,6 +830,9 @@ struct mlx5_core_dev {
#endif
u64 num_ipsec_offloads;
struct mlx5_sd *sd;
+ enum mlx5_wc_state wc_state;
+ /* sync write combining state */
+ struct mutex wc_state_lock;
};
struct mlx5_db {
@@ -1375,4 +1384,6 @@ static inline bool mlx5_is_macsec_roce_supported(struct mlx5_core_dev *mdev)
enum {
MLX5_OCTWORD = 16,
};
+
+bool mlx5_wc_support_get(struct mlx5_core_dev *mdev);
#endif /* MLX5_DRIVER_H */
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index 360d42f041b0..c176953edcf8 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -5642,7 +5642,11 @@ struct mlx5_ifc_query_q_counter_out_bits {
u8 local_ack_timeout_err[0x20];
- u8 reserved_at_320[0xa0];
+ u8 reserved_at_320[0x60];
+
+ u8 req_rnr_retries_exceeded[0x20];
+
+ u8 reserved_at_3a0[0x20];
u8 resp_local_length_error[0x20];