From b67483b3c44eaef2f771fa4c712e13f452675a67 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Fri, 19 Apr 2024 17:54:45 +0100 Subject: iommu/dma: Centralise iommu_setup_dma_ops() It's somewhat hard to see, but arm64's arch_setup_dma_ops() should only ever call iommu_setup_dma_ops() after a successful iommu_probe_device(), which means there should be no harm in achieving the same order of operations by running it off the back of iommu_probe_device() itself. This then puts it in line with the x86 and s390 .probe_finalize bodges, letting us pull it all into the main flow properly. As a bonus this lets us fold in and de-scope the PCI workaround setup as well. At this point we can also then pull the call up inside the group mutex, and avoid having to think about whether iommu_group_store_type() could theoretically race and free the domain if iommu_setup_dma_ops() ran just *before* iommu_device_use_default_domain() claims it... Furthermore we replace one .probe_finalize call completely, since the only remaining implementations are now one which only needs to run once for the initial boot-time probe, and two which themselves render that path unreachable. This leaves us a big step closer to realistically being able to unpick the variety of different things that iommu_setup_dma_ops() has been muddling together, and further streamline iommu-dma into core API flows in future. Reviewed-by: Lu Baolu # For Intel IOMMU Reviewed-by: Jason Gunthorpe Tested-by: Hanjun Guo Signed-off-by: Robin Murphy Acked-by: Catalin Marinas Link: https://lore.kernel.org/r/bebea331c1d688b34d9862eefd5ede47503961b8.1713523152.git.robin.murphy@arm.com Signed-off-by: Joerg Roedel --- arch/arm64/mm/dma-mapping.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'arch/arm64/mm') diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c index 61886e43e3a1..313d8938a2f0 100644 --- a/arch/arm64/mm/dma-mapping.c +++ b/arch/arm64/mm/dma-mapping.c @@ -58,8 +58,6 @@ void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size, ARCH_DMA_MINALIGN, cls); dev->dma_coherent = coherent; - if (device_iommu_mapped(dev)) - iommu_setup_dma_ops(dev, dma_base, dma_base + size - 1); xen_setup_dma_ops(dev); } -- cgit v1.2.3-58-ga151 From f091e93306e0429ebb7589b9874590b6a9705e64 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Fri, 19 Apr 2024 17:54:46 +0100 Subject: dma-mapping: Simplify arch_setup_dma_ops() The dma_base, size and iommu arguments are only used by ARM, and can now easily be deduced from the device itself, so there's no need to pass them through the callchain as well. Acked-by: Rob Herring Reviewed-by: Christoph Hellwig Reviewed-by: Michael Kelley # For Hyper-V Reviewed-by: Jason Gunthorpe Tested-by: Hanjun Guo Signed-off-by: Robin Murphy Acked-by: Catalin Marinas Link: https://lore.kernel.org/r/5291c2326eab405b1aa7693aa964e8d3cb7193de.1713523152.git.robin.murphy@arm.com Signed-off-by: Joerg Roedel --- arch/arc/mm/dma.c | 3 +-- arch/arm/mm/dma-mapping-nommu.c | 3 +-- arch/arm/mm/dma-mapping.c | 16 +++++++++------- arch/arm64/mm/dma-mapping.c | 3 +-- arch/mips/mm/dma-noncoherent.c | 3 +-- arch/riscv/mm/dma-noncoherent.c | 3 +-- drivers/acpi/scan.c | 7 +------ drivers/hv/hv_common.c | 6 +----- drivers/of/device.c | 4 +--- include/linux/dma-map-ops.h | 6 ++---- 10 files changed, 19 insertions(+), 35 deletions(-) (limited to 'arch/arm64/mm') diff --git a/arch/arc/mm/dma.c b/arch/arc/mm/dma.c index 197707bc7658..6b85e94f3275 100644 --- a/arch/arc/mm/dma.c +++ b/arch/arc/mm/dma.c @@ -90,8 +90,7 @@ void arch_sync_dma_for_cpu(phys_addr_t paddr, size_t size, /* * Plug in direct dma map ops. */ -void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size, - bool coherent) +void arch_setup_dma_ops(struct device *dev, bool coherent) { /* * IOC hardware snoops all DMA traffic keeping the caches consistent diff --git a/arch/arm/mm/dma-mapping-nommu.c b/arch/arm/mm/dma-mapping-nommu.c index b94850b57995..97db5397c320 100644 --- a/arch/arm/mm/dma-mapping-nommu.c +++ b/arch/arm/mm/dma-mapping-nommu.c @@ -33,8 +33,7 @@ void arch_sync_dma_for_cpu(phys_addr_t paddr, size_t size, } } -void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size, - bool coherent) +void arch_setup_dma_ops(struct device *dev, bool coherent) { if (IS_ENABLED(CONFIG_CPU_V7M)) { /* diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c index f68db05eba29..5adf1769eee4 100644 --- a/arch/arm/mm/dma-mapping.c +++ b/arch/arm/mm/dma-mapping.c @@ -1709,11 +1709,15 @@ void arm_iommu_detach_device(struct device *dev) } EXPORT_SYMBOL_GPL(arm_iommu_detach_device); -static void arm_setup_iommu_dma_ops(struct device *dev, u64 dma_base, u64 size, - bool coherent) +static void arm_setup_iommu_dma_ops(struct device *dev) { struct dma_iommu_mapping *mapping; + u64 dma_base = 0, size = 1ULL << 32; + if (dev->dma_range_map) { + dma_base = dma_range_map_min(dev->dma_range_map); + size = dma_range_map_max(dev->dma_range_map) - dma_base; + } mapping = arm_iommu_create_mapping(dev->bus, dma_base, size); if (IS_ERR(mapping)) { pr_warn("Failed to create %llu-byte IOMMU mapping for device %s\n", @@ -1744,8 +1748,7 @@ static void arm_teardown_iommu_dma_ops(struct device *dev) #else -static void arm_setup_iommu_dma_ops(struct device *dev, u64 dma_base, u64 size, - bool coherent) +static void arm_setup_iommu_dma_ops(struct device *dev) { } @@ -1753,8 +1756,7 @@ static void arm_teardown_iommu_dma_ops(struct device *dev) { } #endif /* CONFIG_ARM_DMA_USE_IOMMU */ -void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size, - bool coherent) +void arch_setup_dma_ops(struct device *dev, bool coherent) { /* * Due to legacy code that sets the ->dma_coherent flag from a bus @@ -1774,7 +1776,7 @@ void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size, return; if (device_iommu_mapped(dev)) - arm_setup_iommu_dma_ops(dev, dma_base, size, coherent); + arm_setup_iommu_dma_ops(dev); xen_setup_dma_ops(dev); dev->archdata.dma_ops_setup = true; diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c index 313d8938a2f0..0b320a25a471 100644 --- a/arch/arm64/mm/dma-mapping.c +++ b/arch/arm64/mm/dma-mapping.c @@ -46,8 +46,7 @@ void arch_teardown_dma_ops(struct device *dev) } #endif -void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size, - bool coherent) +void arch_setup_dma_ops(struct device *dev, bool coherent) { int cls = cache_line_size_of_cpu(); diff --git a/arch/mips/mm/dma-noncoherent.c b/arch/mips/mm/dma-noncoherent.c index 0f3cec663a12..ab4f2a75a7d0 100644 --- a/arch/mips/mm/dma-noncoherent.c +++ b/arch/mips/mm/dma-noncoherent.c @@ -137,8 +137,7 @@ void arch_sync_dma_for_cpu(phys_addr_t paddr, size_t size, #endif #ifdef CONFIG_ARCH_HAS_SETUP_DMA_OPS -void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size, - bool coherent) +void arch_setup_dma_ops(struct device *dev, bool coherent) { dev->dma_coherent = coherent; } diff --git a/arch/riscv/mm/dma-noncoherent.c b/arch/riscv/mm/dma-noncoherent.c index 843107f834b2..cb89d7e0ba88 100644 --- a/arch/riscv/mm/dma-noncoherent.c +++ b/arch/riscv/mm/dma-noncoherent.c @@ -128,8 +128,7 @@ void arch_dma_prep_coherent(struct page *page, size_t size) ALT_CMO_OP(FLUSH, flush_addr, size, riscv_cbom_block_size); } -void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size, - bool coherent) +void arch_setup_dma_ops(struct device *dev, bool coherent) { WARN_TAINT(!coherent && riscv_cbom_block_size > ARCH_DMA_MINALIGN, TAINT_CPU_OUT_OF_SPEC, diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c index 7c157bf92695..b1a88992c1a9 100644 --- a/drivers/acpi/scan.c +++ b/drivers/acpi/scan.c @@ -1675,12 +1675,7 @@ int acpi_dma_configure_id(struct device *dev, enum dev_dma_attr attr, if (ret == -EPROBE_DEFER) return -EPROBE_DEFER; - /* - * Historically this routine doesn't fail driver probing due to errors - * in acpi_iommu_configure_id() - */ - - arch_setup_dma_ops(dev, 0, U64_MAX, attr == DEV_DMA_COHERENT); + arch_setup_dma_ops(dev, attr == DEV_DMA_COHERENT); return 0; } diff --git a/drivers/hv/hv_common.c b/drivers/hv/hv_common.c index dde3f9b6871a..9c452bfbd571 100644 --- a/drivers/hv/hv_common.c +++ b/drivers/hv/hv_common.c @@ -561,11 +561,7 @@ EXPORT_SYMBOL_GPL(hv_query_ext_cap); void hv_setup_dma_ops(struct device *dev, bool coherent) { - /* - * Hyper-V does not offer a vIOMMU in the guest - * VM, so pass 0/NULL for the IOMMU settings - */ - arch_setup_dma_ops(dev, 0, 0, coherent); + arch_setup_dma_ops(dev, coherent); } EXPORT_SYMBOL_GPL(hv_setup_dma_ops); diff --git a/drivers/of/device.c b/drivers/of/device.c index 9e7963972fa7..312c63361211 100644 --- a/drivers/of/device.c +++ b/drivers/of/device.c @@ -95,7 +95,6 @@ int of_dma_configure_id(struct device *dev, struct device_node *np, { const struct bus_dma_region *map = NULL; struct device_node *bus_np; - u64 dma_start = 0; u64 mask, end = 0; bool coherent; int iommu_ret; @@ -118,7 +117,6 @@ int of_dma_configure_id(struct device *dev, struct device_node *np, return ret == -ENODEV ? 0 : ret; } else { /* Determine the overall bounds of all DMA regions */ - dma_start = dma_range_map_min(map); end = dma_range_map_max(map); } @@ -175,7 +173,7 @@ int of_dma_configure_id(struct device *dev, struct device_node *np, } else dev_dbg(dev, "device is behind an iommu\n"); - arch_setup_dma_ops(dev, dma_start, end - dma_start + 1, coherent); + arch_setup_dma_ops(dev, coherent); if (iommu_ret) of_dma_set_restricted_buffer(dev, np); diff --git a/include/linux/dma-map-ops.h b/include/linux/dma-map-ops.h index 4abc60f04209..ed89e1ce0114 100644 --- a/include/linux/dma-map-ops.h +++ b/include/linux/dma-map-ops.h @@ -426,11 +426,9 @@ bool arch_dma_unmap_sg_direct(struct device *dev, struct scatterlist *sg, #endif #ifdef CONFIG_ARCH_HAS_SETUP_DMA_OPS -void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size, - bool coherent); +void arch_setup_dma_ops(struct device *dev, bool coherent); #else -static inline void arch_setup_dma_ops(struct device *dev, u64 dma_base, - u64 size, bool coherent) +static inline void arch_setup_dma_ops(struct device *dev, bool coherent) { } #endif /* CONFIG_ARCH_HAS_SETUP_DMA_OPS */ -- cgit v1.2.3-58-ga151 From 8b80549f1bc692cf9130af8555b6c89cec24e1a6 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Tue, 30 Apr 2024 11:22:53 +0100 Subject: arm64: Properly clean up iommu-dma remnants Thanks to the somewhat asymmetrical nature, while removing iommu_setup_dma_ops() from the arch_setup_dma_ops() flow, I managed to forget that arm64's teardown path was also specific to iommu-dma. Clean that up to match, otherwise probe deferral will lead to the arch code erroneously removing DMA ops set elsewhere. Reported-by: Dmitry Baryshkov Link: https://lore.kernel.org/linux-iommu/Zi_LV28TR-P-PzXi@eriador.lumag.spb.ru/ Fixes: b67483b3c44e ("iommu/dma: Centralise iommu_setup_dma_ops()") Signed-off-by: Robin Murphy Tested-by: Dmitry Baryshkov Acked-by: Catalin Marinas Reviewed-by: Konrad Dybcio Acked-by: Will Deacon Tested-by: Nicolin Chen Link: https://lore.kernel.org/r/d4cc20cbb0c45175e98dd76bf187e2ad6421296d.1714472573.git.robin.murphy@arm.com Signed-off-by: Joerg Roedel --- arch/arm64/Kconfig | 1 - arch/arm64/mm/dma-mapping.c | 8 -------- 2 files changed, 9 deletions(-) (limited to 'arch/arm64/mm') diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 7b11c98b3e84..8fe59fb9cb35 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -46,7 +46,6 @@ config ARM64 select ARCH_HAS_SYNC_DMA_FOR_DEVICE select ARCH_HAS_SYNC_DMA_FOR_CPU select ARCH_HAS_SYSCALL_WRAPPER - select ARCH_HAS_TEARDOWN_DMA_OPS if IOMMU_SUPPORT select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST select ARCH_HAS_ZONE_DMA_SET if EXPERT select ARCH_HAVE_ELF_PROT diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c index 0b320a25a471..b2b5792b2caa 100644 --- a/arch/arm64/mm/dma-mapping.c +++ b/arch/arm64/mm/dma-mapping.c @@ -7,7 +7,6 @@ #include #include #include -#include #include #include @@ -39,13 +38,6 @@ void arch_dma_prep_coherent(struct page *page, size_t size) dcache_clean_poc(start, start + size); } -#ifdef CONFIG_IOMMU_DMA -void arch_teardown_dma_ops(struct device *dev) -{ - dev->dma_ops = NULL; -} -#endif - void arch_setup_dma_ops(struct device *dev, bool coherent) { int cls = cache_line_size_of_cpu(); -- cgit v1.2.3-58-ga151