From d98995b4bf981519dde4af0a081c393d62474039 Mon Sep 17 00:00:00 2001 From: Jianbo Liu Date: Mon, 3 Jun 2024 13:26:37 +0300 Subject: net/mlx5: Reimplement write combining test The test of write combining was added before in mlx5_ib driver. It opens UD QP and posts NOP WQEs, and uses BlueFlame doorbell. When BlueFlame is used, WQEs get written directly to a PCI BAR of the device (in addition to memory) so that the device handles them without having to access memory. In this test, the WQEs written in memory are different from the ones written to the BlueFlame which request CQE update. By checking the completion reports posted on CQ, we can know if BlueFlame succeeds or not. The write combining must be supported if BlueFlame succeeds as its register is written using write combining. This patch reimplements the test in the same way, but using a pair of SQ and CQ only. It is moved to mlx5_core as a general feature used by both mlx5_core and mlx5_ib. Besides, save write combine test result of the PCI function, so that its thousands of child functions such as SF can query without paying the time and resource penalty by itself. The test function is called only after failing to get the cached result. With this enhancement, all thousands of SFs of the PF attached to same driver no longer need to perform WC check explicitly, which is already done in the system. This saves several commands per SF, thereby speeds up SF creation and also saves completion EQ creation. Signed-off-by: Jianbo Liu Reviewed-by: Tariq Toukan Link: https://lore.kernel.org/r/4ff5a8cc4c5b5b0d98397baa45a5019bcdbf096e.1717409369.git.leon@kernel.org Signed-off-by: Leon Romanovsky --- include/linux/mlx5/driver.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include') diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 779cfdf2e9d6..0d31f77396fc 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -766,6 +766,12 @@ struct mlx5_hca_cap { u32 max[MLX5_UN_SZ_DW(hca_cap_union)]; }; +enum mlx5_wc_state { + MLX5_WC_STATE_UNINITIALIZED, + MLX5_WC_STATE_UNSUPPORTED, + MLX5_WC_STATE_SUPPORTED, +}; + struct mlx5_core_dev { struct device *device; enum mlx5_coredev_type coredev_type; @@ -824,6 +830,9 @@ struct mlx5_core_dev { #endif u64 num_ipsec_offloads; struct mlx5_sd *sd; + enum mlx5_wc_state wc_state; + /* sync write combining state */ + struct mutex wc_state_lock; }; struct mlx5_db { @@ -1375,4 +1384,6 @@ static inline bool mlx5_is_macsec_roce_supported(struct mlx5_core_dev *mdev) enum { MLX5_OCTWORD = 16, }; + +bool mlx5_wc_support_get(struct mlx5_core_dev *mdev); #endif /* MLX5_DRIVER_H */ -- cgit v1.2.3-58-ga151 From b339e0a39dc37726712b9f0485d78fe4306d1667 Mon Sep 17 00:00:00 2001 From: Patrisious Haddad Date: Thu, 13 Jun 2024 21:00:04 +0300 Subject: RDMA/mlx5: Add Qcounters req_transport_retries_exceeded/req_rnr_retries_exceeded The req_transport_retries_exceeded counter shows the number of times requester detected transport retries exceed error. The req_rnr_retries_exceeded counter show the number of times the requester detected RNR NAKs retries exceed error. Signed-off-by: Patrisious Haddad Link: https://lore.kernel.org/r/250466af94f4989d638fab168e246035530e912f.1718301543.git.leon@kernel.org Reviewed-by: Simon Horman Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/mlx5/counters.c | 4 ++++ include/linux/mlx5/mlx5_ifc.h | 6 +++++- 2 files changed, 9 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/drivers/infiniband/hw/mlx5/counters.c b/drivers/infiniband/hw/mlx5/counters.c index 8300ce622835..4f6c1968a2ee 100644 --- a/drivers/infiniband/hw/mlx5/counters.c +++ b/drivers/infiniband/hw/mlx5/counters.c @@ -83,6 +83,8 @@ static const struct mlx5_ib_counter extended_err_cnts[] = { INIT_Q_COUNTER(resp_remote_access_errors), INIT_Q_COUNTER(resp_cqe_flush_error), INIT_Q_COUNTER(req_cqe_flush_error), + INIT_Q_COUNTER(req_transport_retries_exceeded), + INIT_Q_COUNTER(req_rnr_retries_exceeded), }; static const struct mlx5_ib_counter roce_accl_cnts[] = { @@ -102,6 +104,8 @@ static const struct mlx5_ib_counter vport_extended_err_cnts[] = { INIT_VPORT_Q_COUNTER(resp_remote_access_errors), INIT_VPORT_Q_COUNTER(resp_cqe_flush_error), INIT_VPORT_Q_COUNTER(req_cqe_flush_error), + INIT_VPORT_Q_COUNTER(req_transport_retries_exceeded), + INIT_VPORT_Q_COUNTER(req_rnr_retries_exceeded), }; static const struct mlx5_ib_counter vport_roce_accl_cnts[] = { diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 5df52e15f7d6..09d9d87d62c6 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -5629,7 +5629,11 @@ struct mlx5_ifc_query_q_counter_out_bits { u8 local_ack_timeout_err[0x20]; - u8 reserved_at_320[0xa0]; + u8 reserved_at_320[0x60]; + + u8 req_rnr_retries_exceeded[0x20]; + + u8 reserved_at_3a0[0x20]; u8 resp_local_length_error[0x20]; -- cgit v1.2.3-58-ga151 From a808878308a8041ae10a151d69e2d22f94cae9f4 Mon Sep 17 00:00:00 2001 From: Shay Drory Date: Sun, 14 Apr 2024 11:05:25 +0300 Subject: driver core: auxiliary bus: show auxiliary device IRQs PCI subfunctions (SF) are anchored on the auxiliary bus. PCI physical and virtual functions are anchored on the PCI bus. The irq information of each such function is visible to users via sysfs directory "msi_irqs" containing files for each irq entry. However, for PCI SFs such information is unavailable. Due to this users have no visibility on IRQs used by the SFs. Secondly, an SF can be multi function device supporting rdma, netdevice and more. Without irq information at the bus level, the user is unable to view or use the affinity of the SF IRQs. Hence to match to the equivalent PCI PFs and VFs, add "irqs" directory, for supporting auxiliary devices, containing file for each irq entry. For example: $ ls /sys/bus/auxiliary/devices/mlx5_core.sf.1/irqs/ 50 51 52 53 54 55 56 57 58 Cc: Simon Horman Reviewed-by: Przemek Kitszel Reviewed-by: Parav Pandit Reviewed-by: Greg Kroah-Hartman Signed-off-by: Shay Drory Signed-off-by: Saeed Mahameed --- v9-v10: - remove Przemek RB - add name field to auxiliary_irq_info (Greg and Przemek) - handle bogus IRQ in auxiliary_device_sysfs_irq_remove (Greg) v8-v9: - add Przemek RB - use guard() in auxiliary_irq_dir_prepare (Paolo) v7-v8: - use cleanup.h for info and name fields (Greg) - correct error flow in auxiliary_irq_dir_prepare (Przemek) - add documentation for new fields of auxiliary_device (Simon) v6-v7: - dynamically creating irqs directory when first irq file created (Greg) - removed irqs flag and simplified the dev_add() API (Greg) - move sysfs related new code to a new auxiliary_sysfs.c file (Greg) v5-v6: - removed concept of shared and exclusive and hence global xarray (Greg) v4-v5: - restore global mutex and replace refcount_t with simple integer (Greg) v3->4: - remove global mutex (Przemek) v2->v3: - fix function declaration in case SYSFS isn't defined v1->v2: - move #ifdefs from drivers/base/auxiliary.c to include/linux/auxiliary_bus.h (Greg) - use EXPORT_SYMBOL_GPL instead of EXPORT_SYMBOL (Greg) - Fix kzalloc(ref) to kzalloc(*ref) (Simon) - Add return description in auxiliary_device_sysfs_irq_add() kdoc (Simon) - Fix auxiliary_irq_mode_show doc (kernel test boot) --- Documentation/ABI/testing/sysfs-bus-auxiliary | 9 ++ drivers/base/Makefile | 1 + drivers/base/auxiliary.c | 1 + drivers/base/auxiliary_sysfs.c | 113 ++++++++++++++++++++++++++ include/linux/auxiliary_bus.h | 24 ++++++ 5 files changed, 148 insertions(+) create mode 100644 Documentation/ABI/testing/sysfs-bus-auxiliary create mode 100644 drivers/base/auxiliary_sysfs.c (limited to 'include') diff --git a/Documentation/ABI/testing/sysfs-bus-auxiliary b/Documentation/ABI/testing/sysfs-bus-auxiliary new file mode 100644 index 000000000000..cc856079690f --- /dev/null +++ b/Documentation/ABI/testing/sysfs-bus-auxiliary @@ -0,0 +1,9 @@ +What: /sys/bus/auxiliary/devices/.../irqs/ +Date: April, 2024 +Contact: Shay Drory +Description: + The /sys/devices/.../irqs directory contains a variable set of + files, with each file is named as irq number similar to PCI PF + or VF's irq number located in msi_irqs directory. + These irq files are added and removed dynamically when an IRQ + is requested and freed respectively for the PCI SF. diff --git a/drivers/base/Makefile b/drivers/base/Makefile index 3079bfe53d04..7fb21768ca36 100644 --- a/drivers/base/Makefile +++ b/drivers/base/Makefile @@ -16,6 +16,7 @@ obj-$(CONFIG_NUMA) += node.o obj-$(CONFIG_MEMORY_HOTPLUG) += memory.o ifeq ($(CONFIG_SYSFS),y) obj-$(CONFIG_MODULES) += module.o +obj-$(CONFIG_AUXILIARY_BUS) += auxiliary_sysfs.o endif obj-$(CONFIG_SYS_HYPERVISOR) += hypervisor.o obj-$(CONFIG_REGMAP) += regmap/ diff --git a/drivers/base/auxiliary.c b/drivers/base/auxiliary.c index d3a2c40c2f12..3f01f4ec69e5 100644 --- a/drivers/base/auxiliary.c +++ b/drivers/base/auxiliary.c @@ -287,6 +287,7 @@ int auxiliary_device_init(struct auxiliary_device *auxdev) dev->bus = &auxiliary_bus_type; device_initialize(&auxdev->dev); + mutex_init(&auxdev->sysfs.lock); return 0; } EXPORT_SYMBOL_GPL(auxiliary_device_init); diff --git a/drivers/base/auxiliary_sysfs.c b/drivers/base/auxiliary_sysfs.c new file mode 100644 index 000000000000..754f21730afd --- /dev/null +++ b/drivers/base/auxiliary_sysfs.c @@ -0,0 +1,113 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES + */ + +#include +#include + +#define AUXILIARY_MAX_IRQ_NAME 11 + +struct auxiliary_irq_info { + struct device_attribute sysfs_attr; + char name[AUXILIARY_MAX_IRQ_NAME]; +}; + +static struct attribute *auxiliary_irq_attrs[] = { + NULL +}; + +static const struct attribute_group auxiliary_irqs_group = { + .name = "irqs", + .attrs = auxiliary_irq_attrs, +}; + +static int auxiliary_irq_dir_prepare(struct auxiliary_device *auxdev) +{ + int ret = 0; + + guard(mutex)(&auxdev->sysfs.lock); + if (auxdev->sysfs.irq_dir_exists) + return 0; + + ret = devm_device_add_group(&auxdev->dev, &auxiliary_irqs_group); + if (ret) + return ret; + + auxdev->sysfs.irq_dir_exists = true; + xa_init(&auxdev->sysfs.irqs); + return 0; +} + +/** + * auxiliary_device_sysfs_irq_add - add a sysfs entry for the given IRQ + * @auxdev: auxiliary bus device to add the sysfs entry. + * @irq: The associated interrupt number. + * + * This function should be called after auxiliary device have successfully + * received the irq. + * The driver is responsible to add a unique irq for the auxiliary device. The + * driver can invoke this function from multiple thread context safely for + * unique irqs of the auxiliary devices. The driver must not invoke this API + * multiple times if the irq is already added previously. + * + * Return: zero on success or an error code on failure. + */ +int auxiliary_device_sysfs_irq_add(struct auxiliary_device *auxdev, int irq) +{ + struct auxiliary_irq_info *info __free(kfree) = NULL; + struct device *dev = &auxdev->dev; + int ret; + + ret = auxiliary_irq_dir_prepare(auxdev); + if (ret) + return ret; + + info = kzalloc(sizeof(*info), GFP_KERNEL); + if (!info) + return -ENOMEM; + + sysfs_attr_init(&info->sysfs_attr.attr); + snprintf(info->name, AUXILIARY_MAX_IRQ_NAME, "%d", irq); + + ret = xa_insert(&auxdev->sysfs.irqs, irq, info, GFP_KERNEL); + if (ret) + return ret; + + info->sysfs_attr.attr.name = info->name; + ret = sysfs_add_file_to_group(&dev->kobj, &info->sysfs_attr.attr, + auxiliary_irqs_group.name); + if (ret) + goto sysfs_add_err; + + xa_store(&auxdev->sysfs.irqs, irq, no_free_ptr(info), GFP_KERNEL); + return 0; + +sysfs_add_err: + xa_erase(&auxdev->sysfs.irqs, irq); + return ret; +} +EXPORT_SYMBOL_GPL(auxiliary_device_sysfs_irq_add); + +/** + * auxiliary_device_sysfs_irq_remove - remove a sysfs entry for the given IRQ + * @auxdev: auxiliary bus device to add the sysfs entry. + * @irq: the IRQ to remove. + * + * This function should be called to remove an IRQ sysfs entry. + * The driver must invoke this API when IRQ is released by the device. + */ +void auxiliary_device_sysfs_irq_remove(struct auxiliary_device *auxdev, int irq) +{ + struct auxiliary_irq_info *info __free(kfree) = xa_load(&auxdev->sysfs.irqs, irq); + struct device *dev = &auxdev->dev; + + if (!info) { + dev_err(&auxdev->dev, "IRQ %d doesn't exist\n", irq); + return; + } + sysfs_remove_file_from_group(&dev->kobj, &info->sysfs_attr.attr, + auxiliary_irqs_group.name); + xa_erase(&auxdev->sysfs.irqs, irq); +} +EXPORT_SYMBOL_GPL(auxiliary_device_sysfs_irq_remove); diff --git a/include/linux/auxiliary_bus.h b/include/linux/auxiliary_bus.h index de21d9d24a95..3ba4487c9cd9 100644 --- a/include/linux/auxiliary_bus.h +++ b/include/linux/auxiliary_bus.h @@ -58,6 +58,9 @@ * in * @name: Match name found by the auxiliary device driver, * @id: unique identitier if multiple devices of the same name are exported, + * @irqs: irqs xarray contains irq indices which are used by the device, + * @lock: Synchronize irq sysfs creation, + * @irq_dir_exists: whether "irqs" directory exists, * * An auxiliary_device represents a part of its parent device's functionality. * It is given a name that, combined with the registering drivers @@ -139,6 +142,11 @@ struct auxiliary_device { struct device dev; const char *name; u32 id; + struct { + struct xarray irqs; + struct mutex lock; /* Synchronize irq sysfs creation */ + bool irq_dir_exists; + } sysfs; }; /** @@ -212,8 +220,24 @@ int auxiliary_device_init(struct auxiliary_device *auxdev); int __auxiliary_device_add(struct auxiliary_device *auxdev, const char *modname); #define auxiliary_device_add(auxdev) __auxiliary_device_add(auxdev, KBUILD_MODNAME) +#ifdef CONFIG_SYSFS +int auxiliary_device_sysfs_irq_add(struct auxiliary_device *auxdev, int irq); +void auxiliary_device_sysfs_irq_remove(struct auxiliary_device *auxdev, + int irq); +#else /* CONFIG_SYSFS */ +static inline int +auxiliary_device_sysfs_irq_add(struct auxiliary_device *auxdev, int irq) +{ + return 0; +} + +static inline void +auxiliary_device_sysfs_irq_remove(struct auxiliary_device *auxdev, int irq) {} +#endif + static inline void auxiliary_device_uninit(struct auxiliary_device *auxdev) { + mutex_destroy(&auxdev->sysfs.lock); put_device(&auxdev->dev); } -- cgit v1.2.3-58-ga151