From 9ba26f5cecd86c85551a5120529c2f548099100f Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 3 Feb 2022 09:36:33 +0100 Subject: ARM: sa1100/assabet: move dmabounce hack to ohci driver The sa1111 platform is one of the two remaining users of the old Arm specific "dmabounce" code, which is an earlier implementation of the generic swiotlb. Linus Walleij submitted a patch that removes dmabounce support from the ixp4xx, and I had a look at the other user, which is the sa1111 companion chip. Looking at how dmabounce is used, I could narrow it down to one driver one three machines: - dmabounce is only initialized on assabet/neponset, jornada720 and badge4, which are the platforms that have an sa1111 and support DMA on it. - All three of these suffer from "erratum #7" that requires only doing DMA to half the memory sections based on one of the address lines, in addition, the neponset also can't DMA to the RAM that is connected to sa1111 itself. - the pxa lubbock machine also has sa1111, but does not support DMA on it and does not set dmabounce. - only the OHCI and audio devices on sa1111 support DMA, but as there is no audio driver for this hardware, only OHCI remains. In the OHCI code, I noticed that two other platforms already have a local bounce buffer support in the form of the "local_mem" allocator. Specifically, TMIO and SM501 use this on a few other ARM boards with 16KB or 128KB of local SRAM that can be accessed from the OHCI and from the CPU. While this is not the same problem as on sa1111, I could not find a reason why we can't re-use the existing implementation but replace the physical SRAM address mapping with a locally allocated DMA buffer. There are two main downsides: - rather than using a dynamically sized pool, this buffer needs to be allocated at probe time using a fixed size. Without having any idea of what it should be, I picked a size of 64KB, which is between what the other two OHCI front-ends use in their SRAM. If anyone has a better idea what that size is reasonable, this can be trivially changed. - Previously, only USB transfers to unaddressable memory needed to go through the bounce buffer, now all of them do, which may impact runtime performance for USB endpoints that do a lot of transfers. On the upside, the local_mem support uses write-combining buffers, which should be a bit faster for transfers to the device compared to normal uncached coherent memory as used in dmabounce. Cc: Linus Walleij Cc: Russell King Cc: Christoph Hellwig Cc: Laurentiu Tudor Cc: linux-usb@vger.kernel.org Signed-off-by: Arnd Bergmann Reviewed-by: Greg Kroah-Hartman Acked-by: Alan Stern Signed-off-by: Christoph Hellwig --- drivers/usb/core/hcd.c | 17 ++++++++++++++--- drivers/usb/host/ohci-sa1111.c | 25 +++++++++++++++++++++++++ 2 files changed, 39 insertions(+), 3 deletions(-) (limited to 'drivers') diff --git a/drivers/usb/core/hcd.c b/drivers/usb/core/hcd.c index 06eea8848ccc..4db3add28b44 100644 --- a/drivers/usb/core/hcd.c +++ b/drivers/usb/core/hcd.c @@ -1251,7 +1251,8 @@ void usb_hcd_unlink_urb_from_ep(struct usb_hcd *hcd, struct urb *urb) EXPORT_SYMBOL_GPL(usb_hcd_unlink_urb_from_ep); /* - * Some usb host controllers can only perform dma using a small SRAM area. + * Some usb host controllers can only perform dma using a small SRAM area, + * or have restrictions on addressable DRAM. * The usb core itself is however optimized for host controllers that can dma * using regular system memory - like pci devices doing bus mastering. * @@ -3117,8 +3118,18 @@ int usb_hcd_setup_local_mem(struct usb_hcd *hcd, phys_addr_t phys_addr, if (IS_ERR(hcd->localmem_pool)) return PTR_ERR(hcd->localmem_pool); - local_mem = devm_memremap(hcd->self.sysdev, phys_addr, - size, MEMREMAP_WC); + /* + * if a physical SRAM address was passed, map it, otherwise + * allocate system memory as a buffer. + */ + if (phys_addr) + local_mem = devm_memremap(hcd->self.sysdev, phys_addr, + size, MEMREMAP_WC); + else + local_mem = dmam_alloc_attrs(hcd->self.sysdev, size, &dma, + GFP_KERNEL, + DMA_ATTR_WRITE_COMBINE); + if (IS_ERR(local_mem)) return PTR_ERR(local_mem); diff --git a/drivers/usb/host/ohci-sa1111.c b/drivers/usb/host/ohci-sa1111.c index feca826d3f6a..75c2b28b3379 100644 --- a/drivers/usb/host/ohci-sa1111.c +++ b/drivers/usb/host/ohci-sa1111.c @@ -203,6 +203,31 @@ static int ohci_hcd_sa1111_probe(struct sa1111_dev *dev) goto err1; } + /* + * According to the "Intel StrongARM SA-1111 Microprocessor Companion + * Chip Specification Update" (June 2000), erratum #7, there is a + * significant bug in the SA1111 SDRAM shared memory controller. If + * an access to a region of memory above 1MB relative to the bank base, + * it is important that address bit 10 _NOT_ be asserted. Depending + * on the configuration of the RAM, bit 10 may correspond to one + * of several different (processor-relative) address bits. + * + * Section 4.6 of the "Intel StrongARM SA-1111 Development Module + * User's Guide" mentions that jumpers R51 and R52 control the + * target of SA-1111 DMA (either SDRAM bank 0 on Assabet, or + * SDRAM bank 1 on Neponset). The default configuration selects + * Assabet, so any address in bank 1 is necessarily invalid. + * + * As a workaround, use a bounce buffer in addressable memory + * as local_mem, relying on ZONE_DMA to provide an area that + * fits within the above constraints. + * + * SZ_64K is an estimate for what size this might need. + */ + ret = usb_hcd_setup_local_mem(hcd, 0, 0, SZ_64K); + if (ret) + goto err1; + if (!request_mem_region(hcd->rsrc_start, hcd->rsrc_len, hcd_name)) { dev_dbg(&dev->dev, "request_mem_region failed\n"); ret = -EBUSY; -- cgit v1.2.3-58-ga151 From 6d9870b7e5def2450e21316515b9efc0529204dd Mon Sep 17 00:00:00 2001 From: John Garry Date: Thu, 14 Jul 2022 19:15:25 +0800 Subject: dma-iommu: add iommu_dma_opt_mapping_size() Add the IOMMU callback for DMA mapping API dma_opt_mapping_size(), which allows the drivers to know the optimal mapping limit and thus limit the requested IOVA lengths. This value is based on the IOVA rcache range limit, as IOVAs allocated above this limit must always be newly allocated, which may be quite slow. Signed-off-by: John Garry Reviewed-by: Damien Le Moal Acked-by: Robin Murphy Acked-by: Martin K. Petersen Signed-off-by: Christoph Hellwig --- drivers/iommu/dma-iommu.c | 6 ++++++ drivers/iommu/iova.c | 5 +++++ include/linux/iova.h | 2 ++ 3 files changed, 13 insertions(+) (limited to 'drivers') diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c index f90251572a5d..9e1586447ee8 100644 --- a/drivers/iommu/dma-iommu.c +++ b/drivers/iommu/dma-iommu.c @@ -1459,6 +1459,11 @@ static unsigned long iommu_dma_get_merge_boundary(struct device *dev) return (1UL << __ffs(domain->pgsize_bitmap)) - 1; } +static size_t iommu_dma_opt_mapping_size(void) +{ + return iova_rcache_range(); +} + static const struct dma_map_ops iommu_dma_ops = { .alloc = iommu_dma_alloc, .free = iommu_dma_free, @@ -1479,6 +1484,7 @@ static const struct dma_map_ops iommu_dma_ops = { .map_resource = iommu_dma_map_resource, .unmap_resource = iommu_dma_unmap_resource, .get_merge_boundary = iommu_dma_get_merge_boundary, + .opt_mapping_size = iommu_dma_opt_mapping_size, }; /* diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c index db77aa675145..9f00b58d546e 100644 --- a/drivers/iommu/iova.c +++ b/drivers/iommu/iova.c @@ -26,6 +26,11 @@ static unsigned long iova_rcache_get(struct iova_domain *iovad, static void free_cpu_cached_iovas(unsigned int cpu, struct iova_domain *iovad); static void free_iova_rcaches(struct iova_domain *iovad); +unsigned long iova_rcache_range(void) +{ + return PAGE_SIZE << (IOVA_RANGE_CACHE_MAX_SIZE - 1); +} + static int iova_cpuhp_dead(unsigned int cpu, struct hlist_node *node) { struct iova_domain *iovad; diff --git a/include/linux/iova.h b/include/linux/iova.h index 320a70e40233..c6ba6d95d79c 100644 --- a/include/linux/iova.h +++ b/include/linux/iova.h @@ -79,6 +79,8 @@ static inline unsigned long iova_pfn(struct iova_domain *iovad, dma_addr_t iova) int iova_cache_get(void); void iova_cache_put(void); +unsigned long iova_rcache_range(void); + void free_iova(struct iova_domain *iovad, unsigned long pfn); void __free_iova(struct iova_domain *iovad, struct iova *iova); struct iova *alloc_iova(struct iova_domain *iovad, unsigned long size, -- cgit v1.2.3-58-ga151 From bb7d1283e6280acdfe0faf10e8b6ebd1f5970fed Mon Sep 17 00:00:00 2001 From: John Garry Date: Thu, 14 Jul 2022 19:15:26 +0800 Subject: scsi: core: cap shost max_sectors according to DMA limits only once The shost->max_sectors is repeatedly capped according to the host DMA mapping limit for each sdev in __scsi_init_queue(). This is unnecessary, so set only once when adding the host. Signed-off-by: John Garry Reviewed-by: Damien Le Moal Acked-by: Martin K. Petersen Signed-off-by: Christoph Hellwig --- drivers/scsi/hosts.c | 5 +++++ drivers/scsi/scsi_lib.c | 4 ---- 2 files changed, 5 insertions(+), 4 deletions(-) (limited to 'drivers') diff --git a/drivers/scsi/hosts.c b/drivers/scsi/hosts.c index 8352f90d997d..d04bd2c7c9f1 100644 --- a/drivers/scsi/hosts.c +++ b/drivers/scsi/hosts.c @@ -236,6 +236,11 @@ int scsi_add_host_with_dma(struct Scsi_Host *shost, struct device *dev, shost->dma_dev = dma_dev; + if (dma_dev->dma_mask) { + shost->max_sectors = min_t(unsigned int, shost->max_sectors, + dma_max_mapping_size(dma_dev) >> SECTOR_SHIFT); + } + error = scsi_mq_setup_tags(shost); if (error) goto fail; diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index 6ffc9e4258a8..6ce8acea322a 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -1884,10 +1884,6 @@ void __scsi_init_queue(struct Scsi_Host *shost, struct request_queue *q) blk_queue_max_integrity_segments(q, shost->sg_prot_tablesize); } - if (dev->dma_mask) { - shost->max_sectors = min_t(unsigned int, shost->max_sectors, - dma_max_mapping_size(dev) >> SECTOR_SHIFT); - } blk_queue_max_hw_sectors(q, shost->max_sectors); blk_queue_segment_boundary(q, shost->dma_boundary); dma_set_seg_boundary(dev, shost->dma_boundary); -- cgit v1.2.3-58-ga151 From 608128d391fa5c95d515d2c198f40239016e1fcd Mon Sep 17 00:00:00 2001 From: John Garry Date: Thu, 14 Jul 2022 19:15:27 +0800 Subject: scsi: sd: allow max_sectors be capped at DMA optimal size limit Streaming DMA mappings may be considerably slower when mappings go through an IOMMU and the total mapping length is somewhat long. This is because the IOMMU IOVA code allocates and free an IOVA for each mapping, which may affect performance. New member Scsi_Host.opt_sectors is added, which is the optimal host max_sectors, and use this value to cap the request queue max_sectors when set. It could be considered to have request queues io_opt value initially set at Scsi_Host.opt_sectors in __scsi_init_queue(), but that is not really the purpose of io_opt. Finally, even though Scsi_Host.opt_sectors value should never be greater than the request queue max_hw_sectors value, continue to limit to this value for safety. Signed-off-by: John Garry Reviewed-by: Damien Le Moal Acked-by: Martin K. Petersen Signed-off-by: Christoph Hellwig --- drivers/scsi/sd.c | 2 ++ include/scsi/scsi_host.h | 1 + 2 files changed, 3 insertions(+) (limited to 'drivers') diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index a1a2ac09066f..3eaee1f7aaca 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -3296,6 +3296,8 @@ static int sd_revalidate_disk(struct gendisk *disk) (sector_t)BLK_DEF_MAX_SECTORS); } + rw_max = min_not_zero(rw_max, sdp->host->opt_sectors); + /* Do not exceed controller limit */ rw_max = min(rw_max, queue_max_hw_sectors(q)); diff --git a/include/scsi/scsi_host.h b/include/scsi/scsi_host.h index 667d889b92b5..d32a84b2bb40 100644 --- a/include/scsi/scsi_host.h +++ b/include/scsi/scsi_host.h @@ -607,6 +607,7 @@ struct Scsi_Host { short unsigned int sg_tablesize; short unsigned int sg_prot_tablesize; unsigned int max_sectors; + unsigned int opt_sectors; unsigned int max_segment_size; unsigned long dma_boundary; unsigned long virt_boundary_mask; -- cgit v1.2.3-58-ga151 From 4cbfca5f7750520ff993a0d6eb79cc4a6f6cee97 Mon Sep 17 00:00:00 2001 From: John Garry Date: Thu, 14 Jul 2022 19:15:28 +0800 Subject: scsi: scsi_transport_sas: cap shost opt_sectors according to DMA optimal limit Streaming DMA mappings may be considerably slower when mappings go through an IOMMU and the total mapping length is somewhat long. This is because the IOMMU IOVA code allocates and free an IOVA for each mapping, which may affect performance. For performance reasons set the request queue max_sectors from dma_opt_mapping_size(), which knows this mapping limit. Signed-off-by: John Garry Reviewed-by: Damien Le Moal Signed-off-by: Christoph Hellwig --- drivers/scsi/scsi_transport_sas.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'drivers') diff --git a/drivers/scsi/scsi_transport_sas.c b/drivers/scsi/scsi_transport_sas.c index 12bff64dade6..2f88c61216ee 100644 --- a/drivers/scsi/scsi_transport_sas.c +++ b/drivers/scsi/scsi_transport_sas.c @@ -225,6 +225,7 @@ static int sas_host_setup(struct transport_container *tc, struct device *dev, { struct Scsi_Host *shost = dev_to_shost(dev); struct sas_host_attrs *sas_host = to_sas_host_attrs(shost); + struct device *dma_dev = shost->dma_dev; INIT_LIST_HEAD(&sas_host->rphy_list); mutex_init(&sas_host->lock); @@ -236,6 +237,11 @@ static int sas_host_setup(struct transport_container *tc, struct device *dev, dev_printk(KERN_ERR, dev, "fail to a bsg device %d\n", shost->host_no); + if (dma_dev->dma_mask) { + shost->opt_sectors = min_t(unsigned int, shost->max_sectors, + dma_opt_mapping_size(dma_dev) >> SECTOR_SHIFT); + } + return 0; } -- cgit v1.2.3-58-ga151 From 0568e6122574dcc1aded2979cd0245038efe22b6 Mon Sep 17 00:00:00 2001 From: John Garry Date: Thu, 14 Jul 2022 19:15:29 +0800 Subject: ata: libata-scsi: cap ata_device->max_sectors according to shost->max_sectors ATA devices (struct ata_device) have a max_sectors field which is configured internally in libata. This is then used to (re)configure the associated sdev request queue max_sectors value from how it is earlier set in __scsi_init_queue(). In __scsi_init_queue() the max_sectors value is set according to shost limits, which includes host DMA mapping limits. Cap the ata_device max_sectors according to shost->max_sectors to respect this shost limit. Signed-off-by: John Garry Acked-by: Damien Le Moal Acked-by: Martin K. Petersen Signed-off-by: Christoph Hellwig --- drivers/ata/libata-scsi.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers') diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c index 86dbb1cdfabd..24a43d540d9f 100644 --- a/drivers/ata/libata-scsi.c +++ b/drivers/ata/libata-scsi.c @@ -1060,6 +1060,7 @@ int ata_scsi_dev_config(struct scsi_device *sdev, struct ata_device *dev) dev->flags |= ATA_DFLAG_NO_UNLOAD; /* configure max sectors */ + dev->max_sectors = min(dev->max_sectors, sdev->host->max_sectors); blk_queue_max_hw_sectors(q, dev->max_sectors); if (dev->class == ATA_DEV_ATAPI) { -- cgit v1.2.3-58-ga151 From c9337ad4390cdff8877e64824c2a2827d815743d Mon Sep 17 00:00:00 2001 From: John Garry Date: Tue, 19 Jul 2022 17:51:04 +0800 Subject: scsi: sd: Add a comment about limiting max_sectors to shost optimal limit Add a comment about limiting the default the SCSI disk request_queue max_sectors initial value to that of the SCSI host optimal sectors limit. Suggested-by: Damien Le Moal Signed-off-by: John Garry Reviewed-by: Damien Le Moal Reviewed-by: Martin K. Petersen Signed-off-by: Christoph Hellwig --- drivers/scsi/sd.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'drivers') diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index 3eaee1f7aaca..ed9f43f9512e 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -3296,6 +3296,11 @@ static int sd_revalidate_disk(struct gendisk *disk) (sector_t)BLK_DEF_MAX_SECTORS); } + /* + * Limit default to SCSI host optimal sector limit if set. There may be + * an impact on performance for when the size of a request exceeds this + * host limit. + */ rw_max = min_not_zero(rw_max, sdp->host->opt_sectors); /* Do not exceed controller limit */ -- cgit v1.2.3-58-ga151 From 42399301203e3cddef36cde457228f9247618313 Mon Sep 17 00:00:00 2001 From: Logan Gunthorpe Date: Fri, 8 Jul 2022 10:50:52 -0600 Subject: lib/scatterlist: add flag for indicating P2PDMA segments in an SGL Introduce a dma_flags field in struct scatterlist. These flags will be used by dma_[un]map_sg_p2pdma() to determine when a given SGL segments dma_address points to a PCI bus address. dma_unmap_sg_p2pdma() will need to perform different cleanup when a segment is marked as a bus address. The dma_flags field will fit in the existing padding on 64BIT systems (assuming CONFIG_NEED_SG_DMA_LENGTH is also set). The new bit will only be used when CONFIG_PCI_P2PDMA is set; this means PCI P2PDMA will require CONFIG_64BIT. This should be acceptable as the majority of P2PDMA use cases are restricted to newer root complexes and roughly require the extra address space for memory BARs used in the transactions. Signed-off-by: Logan Gunthorpe Signed-off-by: Christoph Hellwig --- drivers/pci/Kconfig | 5 ++++ include/linux/scatterlist.h | 69 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 74 insertions(+) (limited to 'drivers') diff --git a/drivers/pci/Kconfig b/drivers/pci/Kconfig index 133c73207782..5cc7cba1941f 100644 --- a/drivers/pci/Kconfig +++ b/drivers/pci/Kconfig @@ -164,6 +164,11 @@ config PCI_PASID config PCI_P2PDMA bool "PCI peer-to-peer transfer support" depends on ZONE_DEVICE + # + # The need for the scatterlist DMA bus address flag means PCI P2PDMA + # requires 64bit + # + depends on 64BIT select GENERIC_ALLOCATOR help Enableѕ drivers to do PCI peer-to-peer transactions to and from diff --git a/include/linux/scatterlist.h b/include/linux/scatterlist.h index 7ff9d6386c12..375a5e90d86a 100644 --- a/include/linux/scatterlist.h +++ b/include/linux/scatterlist.h @@ -16,6 +16,9 @@ struct scatterlist { #ifdef CONFIG_NEED_SG_DMA_LENGTH unsigned int dma_length; #endif +#ifdef CONFIG_PCI_P2PDMA + unsigned int dma_flags; +#endif }; /* @@ -245,6 +248,72 @@ static inline void sg_unmark_end(struct scatterlist *sg) sg->page_link &= ~SG_END; } +/* + * CONFGI_PCI_P2PDMA depends on CONFIG_64BIT which means there is 4 bytes + * in struct scatterlist (assuming also CONFIG_NEED_SG_DMA_LENGTH is set). + * Use this padding for DMA flags bits to indicate when a specific + * dma address is a bus address. + */ +#ifdef CONFIG_PCI_P2PDMA + +#define SG_DMA_BUS_ADDRESS (1 << 0) + +/** + * sg_dma_is_bus address - Return whether a given segment was marked + * as a bus address + * @sg: SG entry + * + * Description: + * Returns true if sg_dma_mark_bus_address() has been called on + * this segment. + **/ +static inline bool sg_is_dma_bus_address(struct scatterlist *sg) +{ + return sg->dma_flags & SG_DMA_BUS_ADDRESS; +} + +/** + * sg_dma_mark_bus address - Mark the scatterlist entry as a bus address + * @sg: SG entry + * + * Description: + * Marks the passed in sg entry to indicate that the dma_address is + * a bus address and doesn't need to be unmapped. This should only be + * used by dma_map_sg() implementations to mark bus addresses + * so they can be properly cleaned up in dma_unmap_sg(). + **/ +static inline void sg_dma_mark_bus_address(struct scatterlist *sg) +{ + sg->dma_flags |= SG_DMA_BUS_ADDRESS; +} + +/** + * sg_unmark_bus_address - Unmark the scatterlist entry as a bus address + * @sg: SG entry + * + * Description: + * Clears the bus address mark. + **/ +static inline void sg_dma_unmark_bus_address(struct scatterlist *sg) +{ + sg->dma_flags &= ~SG_DMA_BUS_ADDRESS; +} + +#else + +static inline bool sg_is_dma_bus_address(struct scatterlist *sg) +{ + return false; +} +static inline void sg_dma_mark_bus_address(struct scatterlist *sg) +{ +} +static inline void sg_dma_unmark_bus_address(struct scatterlist *sg) +{ +} + +#endif + /** * sg_phys - Return physical address of an sg entry * @sg: SG entry -- cgit v1.2.3-58-ga151 From 719c98658058f4aca0f7663ff11f2606e99cce4f Mon Sep 17 00:00:00 2001 From: Logan Gunthorpe Date: Fri, 8 Jul 2022 10:50:53 -0600 Subject: PCI/P2PDMA: Attempt to set map_type if it has not been set Attempt to find the mapping type for P2PDMA pages on the first DMA map attempt if it has not been done ahead of time. Previously, the mapping type was expected to be calculated ahead of time, but if pages are to come from userspace then there's no way to ensure the path was checked ahead of time. This change will calculate the mapping type if it hasn't pre-calculated so it is no longer invalid to call pci_p2pdma_map_sg() before the mapping type is calculated, so drop the WARN_ON when that is the case. Signed-off-by: Logan Gunthorpe Acked-by: Bjorn Helgaas Reviewed-by: Chaitanya Kulkarni Reviewed-by: Christoph Hellwig Signed-off-by: Christoph Hellwig --- drivers/pci/p2pdma.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/pci/p2pdma.c b/drivers/pci/p2pdma.c index 462b429ad243..4e8bc457e29a 100644 --- a/drivers/pci/p2pdma.c +++ b/drivers/pci/p2pdma.c @@ -854,6 +854,7 @@ static enum pci_p2pdma_map_type pci_p2pdma_map_type(struct dev_pagemap *pgmap, struct pci_dev *provider = to_p2p_pgmap(pgmap)->provider; struct pci_dev *client; struct pci_p2pdma *p2pdma; + int dist; if (!provider->p2pdma) return PCI_P2PDMA_MAP_NOT_SUPPORTED; @@ -870,6 +871,10 @@ static enum pci_p2pdma_map_type pci_p2pdma_map_type(struct dev_pagemap *pgmap, type = xa_to_value(xa_load(&p2pdma->map_types, map_types_idx(client))); rcu_read_unlock(); + + if (type == PCI_P2PDMA_MAP_UNKNOWN) + return calc_map_type_and_dist(provider, client, &dist, true); + return type; } @@ -912,7 +917,7 @@ int pci_p2pdma_map_sg_attrs(struct device *dev, struct scatterlist *sg, case PCI_P2PDMA_MAP_BUS_ADDR: return __pci_p2pdma_map_sg(p2p_pgmap, dev, sg, nents); default: - WARN_ON_ONCE(1); + /* Mapping is not Supported */ return 0; } } -- cgit v1.2.3-58-ga151 From 5e180ff326b43bd3fb72892e1083b2707159aa6e Mon Sep 17 00:00:00 2001 From: Logan Gunthorpe Date: Fri, 8 Jul 2022 10:50:54 -0600 Subject: PCI/P2PDMA: Introduce helpers for dma_map_sg implementations Add pci_p2pdma_map_segment() as a helper for dma_map_sg() implementations. It takes an scatterlist segment that must point to a pci_p2pdma struct page and will map it if the mapping requires a bus address. The return value indicates whether the mapping required a bus address or whether the caller still needs to map the segment normally. If the segment should not be mapped, -EREMOTEIO is returned. This helper uses a state structure to track the changes to the pgmap across calls and avoid needing to lookup into the xarray for every page. The prototype for the helper is added to dma-map-ops.h as it is only useful to dma map implementations and don't need to pollute the public pci-p2pdma header. Signed-off-by: Logan Gunthorpe Acked-by: Bjorn Helgaas Reviewed-by: Christoph Hellwig Signed-off-by: Christoph Hellwig --- drivers/pci/p2pdma.c | 44 +++++++++++++++++++++++++++++++------ include/linux/dma-map-ops.h | 53 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 90 insertions(+), 7 deletions(-) (limited to 'drivers') diff --git a/drivers/pci/p2pdma.c b/drivers/pci/p2pdma.c index 4e8bc457e29a..5d2538aa0778 100644 --- a/drivers/pci/p2pdma.c +++ b/drivers/pci/p2pdma.c @@ -10,6 +10,7 @@ #define pr_fmt(fmt) "pci-p2pdma: " fmt #include +#include #include #include #include @@ -20,13 +21,6 @@ #include #include -enum pci_p2pdma_map_type { - PCI_P2PDMA_MAP_UNKNOWN = 0, - PCI_P2PDMA_MAP_NOT_SUPPORTED, - PCI_P2PDMA_MAP_BUS_ADDR, - PCI_P2PDMA_MAP_THRU_HOST_BRIDGE, -}; - struct pci_p2pdma { struct gen_pool *pool; bool p2pmem_published; @@ -944,6 +938,42 @@ void pci_p2pdma_unmap_sg_attrs(struct device *dev, struct scatterlist *sg, } EXPORT_SYMBOL_GPL(pci_p2pdma_unmap_sg_attrs); +/** + * pci_p2pdma_map_segment - map an sg segment determining the mapping type + * @state: State structure that should be declared outside of the for_each_sg() + * loop and initialized to zero. + * @dev: DMA device that's doing the mapping operation + * @sg: scatterlist segment to map + * + * This is a helper to be used by non-IOMMU dma_map_sg() implementations where + * the sg segment is the same for the page_link and the dma_address. + * + * Attempt to map a single segment in an SGL with the PCI bus address. + * The segment must point to a PCI P2PDMA page and thus must be + * wrapped in a is_pci_p2pdma_page(sg_page(sg)) check. + * + * Returns the type of mapping used and maps the page if the type is + * PCI_P2PDMA_MAP_BUS_ADDR. + */ +enum pci_p2pdma_map_type +pci_p2pdma_map_segment(struct pci_p2pdma_map_state *state, struct device *dev, + struct scatterlist *sg) +{ + if (state->pgmap != sg_page(sg)->pgmap) { + state->pgmap = sg_page(sg)->pgmap; + state->map = pci_p2pdma_map_type(state->pgmap, dev); + state->bus_off = to_p2p_pgmap(state->pgmap)->bus_offset; + } + + if (state->map == PCI_P2PDMA_MAP_BUS_ADDR) { + sg->dma_address = sg_phys(sg) + state->bus_off; + sg_dma_len(sg) = sg->length; + sg_dma_mark_bus_address(sg); + } + + return state->map; +} + /** * pci_p2pdma_enable_store - parse a configfs/sysfs attribute store * to enable p2pdma diff --git a/include/linux/dma-map-ops.h b/include/linux/dma-map-ops.h index 98ceba6fa848..99cec59dbfcb 100644 --- a/include/linux/dma-map-ops.h +++ b/include/linux/dma-map-ops.h @@ -380,4 +380,57 @@ static inline void debug_dma_dump_mappings(struct device *dev) extern const struct dma_map_ops dma_dummy_ops; +enum pci_p2pdma_map_type { + /* + * PCI_P2PDMA_MAP_UNKNOWN: Used internally for indicating the mapping + * type hasn't been calculated yet. Functions that return this enum + * never return this value. + */ + PCI_P2PDMA_MAP_UNKNOWN = 0, + + /* + * PCI_P2PDMA_MAP_NOT_SUPPORTED: Indicates the transaction will + * traverse the host bridge and the host bridge is not in the + * allowlist. DMA Mapping routines should return an error when + * this is returned. + */ + PCI_P2PDMA_MAP_NOT_SUPPORTED, + + /* + * PCI_P2PDMA_BUS_ADDR: Indicates that two devices can talk to + * each other directly through a PCI switch and the transaction will + * not traverse the host bridge. Such a mapping should program + * the DMA engine with PCI bus addresses. + */ + PCI_P2PDMA_MAP_BUS_ADDR, + + /* + * PCI_P2PDMA_MAP_THRU_HOST_BRIDGE: Indicates two devices can talk + * to each other, but the transaction traverses a host bridge on the + * allowlist. In this case, a normal mapping either with CPU physical + * addresses (in the case of dma-direct) or IOVA addresses (in the + * case of IOMMUs) should be used to program the DMA engine. + */ + PCI_P2PDMA_MAP_THRU_HOST_BRIDGE, +}; + +struct pci_p2pdma_map_state { + struct dev_pagemap *pgmap; + int map; + u64 bus_off; +}; + +#ifdef CONFIG_PCI_P2PDMA +enum pci_p2pdma_map_type +pci_p2pdma_map_segment(struct pci_p2pdma_map_state *state, struct device *dev, + struct scatterlist *sg); +#else /* CONFIG_PCI_P2PDMA */ +static inline enum pci_p2pdma_map_type +pci_p2pdma_map_segment(struct pci_p2pdma_map_state *state, struct device *dev, + struct scatterlist *sg) +{ + return PCI_P2PDMA_MAP_NOT_SUPPORTED; +} +#endif /* CONFIG_PCI_P2PDMA */ + #endif /* _LINUX_DMA_MAP_OPS_H */ -- cgit v1.2.3-58-ga151 From c96321834b2f7aa442cf4c076f73d8273e622725 Mon Sep 17 00:00:00 2001 From: Logan Gunthorpe Date: Fri, 8 Jul 2022 10:50:58 -0600 Subject: iommu: Explicitly skip bus address marked segments in __iommu_map_sg() In order to support PCI P2PDMA mappings with dma-iommu, explicitly skip any segments marked with sg_dma_mark_bus_address() in __iommu_map_sg(). These segments should not be mapped into the IOVA and will be handled separately in as subsequent patch for dma-iommu. Signed-off-by: Logan Gunthorpe Signed-off-by: Christoph Hellwig --- drivers/iommu/iommu.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'drivers') diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 847ad47a2dfd..2844a3e02a89 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -2457,6 +2457,9 @@ static ssize_t __iommu_map_sg(struct iommu_domain *domain, unsigned long iova, len = 0; } + if (sg_is_dma_bus_address(sg)) + goto next; + if (len) { len += sg->length; } else { @@ -2464,6 +2467,7 @@ static ssize_t __iommu_map_sg(struct iommu_domain *domain, unsigned long iova, start = s_phys; } +next: if (++i < nents) sg = sg_next(sg); } -- cgit v1.2.3-58-ga151 From 30280eee2db10ed6f9f2e1b4ec9197465a96b996 Mon Sep 17 00:00:00 2001 From: Logan Gunthorpe Date: Fri, 8 Jul 2022 10:50:59 -0600 Subject: iommu/dma: support PCI P2PDMA pages in dma-iommu map_sg Call pci_p2pdma_map_segment() when a PCI P2PDMA page is seen so the bus address is set in the dma address and the segment is marked with sg_dma_mark_bus_address(). iommu_map_sg() will then skip these segments. Then, in __finalise_sg(), copy the dma address from the input segment to the output segment. __invalidate_sg() must also learn to skip these segments. A P2PDMA page may have three possible outcomes when being mapped: 1) If the data path between the two devices doesn't go through the root port, then it should be mapped with a PCI bus address 2) If the data path goes through the host bridge, it should be mapped normally with an IOMMU IOVA. 3) It is not possible for the two devices to communicate and thus the mapping operation should fail (and it will return -EREMOTEIO). Similar to dma-direct, the sg_dma_mark_pci_p2pdma() flag is used to indicate bus address segments. On unmap, P2PDMA segments are skipped over when determining the start and end IOVA addresses. With this change, the flags variable in the dma_map_ops is set to DMA_F_PCI_P2PDMA_SUPPORTED to indicate support for P2PDMA pages. Signed-off-by: Logan Gunthorpe Signed-off-by: Christoph Hellwig --- drivers/iommu/dma-iommu.c | 99 ++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 85 insertions(+), 14 deletions(-) (limited to 'drivers') diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c index 9e1586447ee8..8a0d16d320ef 100644 --- a/drivers/iommu/dma-iommu.c +++ b/drivers/iommu/dma-iommu.c @@ -21,6 +21,7 @@ #include #include #include +#include #include #include #include @@ -1053,15 +1054,30 @@ static int __finalise_sg(struct device *dev, struct scatterlist *sg, int nents, for_each_sg(sg, s, nents, i) { /* Restore this segment's original unaligned fields first */ + dma_addr_t s_dma_addr = sg_dma_address(s); unsigned int s_iova_off = sg_dma_address(s); unsigned int s_length = sg_dma_len(s); unsigned int s_iova_len = s->length; - s->offset += s_iova_off; - s->length = s_length; sg_dma_address(s) = DMA_MAPPING_ERROR; sg_dma_len(s) = 0; + if (sg_is_dma_bus_address(s)) { + if (i > 0) + cur = sg_next(cur); + + sg_dma_unmark_bus_address(s); + sg_dma_address(cur) = s_dma_addr; + sg_dma_len(cur) = s_length; + sg_dma_mark_bus_address(cur); + count++; + cur_len = 0; + continue; + } + + s->offset += s_iova_off; + s->length = s_length; + /* * Now fill in the real DMA data. If... * - there is a valid output segment to append to @@ -1102,10 +1118,14 @@ static void __invalidate_sg(struct scatterlist *sg, int nents) int i; for_each_sg(sg, s, nents, i) { - if (sg_dma_address(s) != DMA_MAPPING_ERROR) - s->offset += sg_dma_address(s); - if (sg_dma_len(s)) - s->length = sg_dma_len(s); + if (sg_is_dma_bus_address(s)) { + sg_dma_unmark_bus_address(s); + } else { + if (sg_dma_address(s) != DMA_MAPPING_ERROR) + s->offset += sg_dma_address(s); + if (sg_dma_len(s)) + s->length = sg_dma_len(s); + } sg_dma_address(s) = DMA_MAPPING_ERROR; sg_dma_len(s) = 0; } @@ -1158,6 +1178,8 @@ static int iommu_dma_map_sg(struct device *dev, struct scatterlist *sg, struct iova_domain *iovad = &cookie->iovad; struct scatterlist *s, *prev = NULL; int prot = dma_info_to_prot(dir, dev_is_dma_coherent(dev), attrs); + struct pci_p2pdma_map_state p2pdma_state = {}; + enum pci_p2pdma_map_type map; dma_addr_t iova; size_t iova_len = 0; unsigned long mask = dma_get_seg_boundary(dev); @@ -1187,6 +1209,30 @@ static int iommu_dma_map_sg(struct device *dev, struct scatterlist *sg, size_t s_length = s->length; size_t pad_len = (mask - iova_len + 1) & mask; + if (is_pci_p2pdma_page(sg_page(s))) { + map = pci_p2pdma_map_segment(&p2pdma_state, dev, s); + switch (map) { + case PCI_P2PDMA_MAP_BUS_ADDR: + /* + * iommu_map_sg() will skip this segment as + * it is marked as a bus address, + * __finalise_sg() will copy the dma address + * into the output segment. + */ + continue; + case PCI_P2PDMA_MAP_THRU_HOST_BRIDGE: + /* + * Mapping through host bridge should be + * mapped with regular IOVAs, thus we + * do nothing here and continue below. + */ + break; + default: + ret = -EREMOTEIO; + goto out_restore_sg; + } + } + sg_dma_address(s) = s_iova_off; sg_dma_len(s) = s_length; s->offset -= s_iova_off; @@ -1215,6 +1261,9 @@ static int iommu_dma_map_sg(struct device *dev, struct scatterlist *sg, prev = s; } + if (!iova_len) + return __finalise_sg(dev, sg, nents, 0); + iova = iommu_dma_alloc_iova(domain, iova_len, dma_get_mask(dev), dev); if (!iova) { ret = -ENOMEM; @@ -1236,7 +1285,7 @@ out_free_iova: out_restore_sg: __invalidate_sg(sg, nents); out: - if (ret != -ENOMEM) + if (ret != -ENOMEM && ret != -EREMOTEIO) return -EINVAL; return ret; } @@ -1244,7 +1293,7 @@ out: static void iommu_dma_unmap_sg(struct device *dev, struct scatterlist *sg, int nents, enum dma_data_direction dir, unsigned long attrs) { - dma_addr_t start, end; + dma_addr_t end = 0, start; struct scatterlist *tmp; int i; @@ -1258,16 +1307,37 @@ static void iommu_dma_unmap_sg(struct device *dev, struct scatterlist *sg, /* * The scatterlist segments are mapped into a single - * contiguous IOVA allocation, so this is incredibly easy. + * contiguous IOVA allocation, the start and end points + * just have to be determined. */ - start = sg_dma_address(sg); - for_each_sg(sg_next(sg), tmp, nents - 1, i) { + for_each_sg(sg, tmp, nents, i) { + if (sg_is_dma_bus_address(tmp)) { + sg_dma_unmark_bus_address(tmp); + continue; + } + + if (sg_dma_len(tmp) == 0) + break; + + start = sg_dma_address(tmp); + break; + } + + nents -= i; + for_each_sg(tmp, tmp, nents, i) { + if (sg_is_dma_bus_address(tmp)) { + sg_dma_unmark_bus_address(tmp); + continue; + } + if (sg_dma_len(tmp) == 0) break; - sg = tmp; + + end = sg_dma_address(tmp) + sg_dma_len(tmp); } - end = sg_dma_address(sg) + sg_dma_len(sg); - __iommu_dma_unmap(dev, start, end - start); + + if (end) + __iommu_dma_unmap(dev, start, end - start); } static dma_addr_t iommu_dma_map_resource(struct device *dev, phys_addr_t phys, @@ -1465,6 +1535,7 @@ static size_t iommu_dma_opt_mapping_size(void) } static const struct dma_map_ops iommu_dma_ops = { + .flags = DMA_F_PCI_P2PDMA_SUPPORTED, .alloc = iommu_dma_alloc, .free = iommu_dma_free, .alloc_pages = dma_common_alloc_pages, -- cgit v1.2.3-58-ga151 From 2f8594412b4b21023a1f147d881ca05ddffc1e36 Mon Sep 17 00:00:00 2001 From: Logan Gunthorpe Date: Fri, 8 Jul 2022 10:51:00 -0600 Subject: nvme-pci: check DMA ops when indicating support for PCI P2PDMA Introduce a supports_pci_p2pdma() operation in nvme_ctrl_ops to replace the fixed NVME_F_PCI_P2PDMA flag such that the dma_map_ops flags can be checked for PCI P2PDMA support. Signed-off-by: Logan Gunthorpe Reviewed-by: Chaitanya Kulkarni Reviewed-by: Christoph Hellwig Signed-off-by: Christoph Hellwig --- drivers/nvme/host/core.c | 3 ++- drivers/nvme/host/nvme.h | 2 +- drivers/nvme/host/pci.c | 12 +++++++++--- 3 files changed, 12 insertions(+), 5 deletions(-) (limited to 'drivers') diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 3ab2cfd254a4..4669045a76f8 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -3982,7 +3982,8 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid, blk_queue_flag_set(QUEUE_FLAG_STABLE_WRITES, ns->queue); blk_queue_flag_set(QUEUE_FLAG_NONROT, ns->queue); - if (ctrl->ops->flags & NVME_F_PCI_P2PDMA) + if (ctrl->ops->supports_pci_p2pdma && + ctrl->ops->supports_pci_p2pdma(ctrl)) blk_queue_flag_set(QUEUE_FLAG_PCI_P2PDMA, ns->queue); ns->ctrl = ctrl; diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index 0da94b233fed..544b4ec81b2b 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -495,7 +495,6 @@ struct nvme_ctrl_ops { unsigned int flags; #define NVME_F_FABRICS (1 << 0) #define NVME_F_METADATA_SUPPORTED (1 << 1) -#define NVME_F_PCI_P2PDMA (1 << 2) int (*reg_read32)(struct nvme_ctrl *ctrl, u32 off, u32 *val); int (*reg_write32)(struct nvme_ctrl *ctrl, u32 off, u32 val); int (*reg_read64)(struct nvme_ctrl *ctrl, u32 off, u64 *val); @@ -504,6 +503,7 @@ struct nvme_ctrl_ops { void (*delete_ctrl)(struct nvme_ctrl *ctrl); int (*get_address)(struct nvme_ctrl *ctrl, char *buf, int size); void (*print_device_info)(struct nvme_ctrl *ctrl); + bool (*supports_pci_p2pdma)(struct nvme_ctrl *ctrl); }; /* diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index c7012e85d035..0ca42ae68f58 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -2984,7 +2984,6 @@ static int nvme_pci_get_address(struct nvme_ctrl *ctrl, char *buf, int size) return snprintf(buf, size, "%s\n", dev_name(&pdev->dev)); } - static void nvme_pci_print_device_info(struct nvme_ctrl *ctrl) { struct pci_dev *pdev = to_pci_dev(to_nvme_dev(ctrl)->dev); @@ -2999,11 +2998,17 @@ static void nvme_pci_print_device_info(struct nvme_ctrl *ctrl) subsys->firmware_rev); } +static bool nvme_pci_supports_pci_p2pdma(struct nvme_ctrl *ctrl) +{ + struct nvme_dev *dev = to_nvme_dev(ctrl); + + return dma_pci_p2pdma_supported(dev->dev); +} + static const struct nvme_ctrl_ops nvme_pci_ctrl_ops = { .name = "pcie", .module = THIS_MODULE, - .flags = NVME_F_METADATA_SUPPORTED | - NVME_F_PCI_P2PDMA, + .flags = NVME_F_METADATA_SUPPORTED, .reg_read32 = nvme_pci_reg_read32, .reg_write32 = nvme_pci_reg_write32, .reg_read64 = nvme_pci_reg_read64, @@ -3011,6 +3016,7 @@ static const struct nvme_ctrl_ops nvme_pci_ctrl_ops = { .submit_async_event = nvme_pci_submit_async_event, .get_address = nvme_pci_get_address, .print_device_info = nvme_pci_print_device_info, + .supports_pci_p2pdma = nvme_pci_supports_pci_p2pdma, }; static int nvme_dev_map(struct nvme_dev *dev) -- cgit v1.2.3-58-ga151 From 91fb2b6052f75a2f9375db5d5924860ab2313a1d Mon Sep 17 00:00:00 2001 From: Logan Gunthorpe Date: Fri, 8 Jul 2022 10:51:01 -0600 Subject: nvme-pci: convert to using dma_map_sgtable() The dma_map operations now support P2PDMA pages directly. So remove the calls to pci_p2pdma_[un]map_sg_attrs() and replace them with calls to dma_map_sgtable(). dma_map_sgtable() returns more complete error codes than dma_map_sg() and allows differentiating EREMOTEIO errors in case an unsupported P2PDMA transfer is requested. When this happens, return BLK_STS_TARGET so the request isn't retried. Signed-off-by: Logan Gunthorpe Reviewed-by: Max Gurtovoy Reviewed-by: Chaitanya Kulkarni Reviewed-by: Christoph Hellwig Signed-off-by: Christoph Hellwig --- drivers/nvme/host/pci.c | 69 +++++++++++++++++++++---------------------------- 1 file changed, 29 insertions(+), 40 deletions(-) (limited to 'drivers') diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 0ca42ae68f58..c6bf83d74560 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -230,11 +230,10 @@ struct nvme_iod { bool use_sgl; int aborted; int npages; /* In the PRP list. 0 means small pool in use */ - int nents; /* Used in scatterlist */ dma_addr_t first_dma; unsigned int dma_len; /* length of single DMA segment mapping */ dma_addr_t meta_dma; - struct scatterlist *sg; + struct sg_table sgt; }; static inline unsigned int nvme_dbbuf_size(struct nvme_dev *dev) @@ -524,7 +523,7 @@ static void nvme_commit_rqs(struct blk_mq_hw_ctx *hctx) static void **nvme_pci_iod_list(struct request *req) { struct nvme_iod *iod = blk_mq_rq_to_pdu(req); - return (void **)(iod->sg + blk_rq_nr_phys_segments(req)); + return (void **)(iod->sgt.sgl + blk_rq_nr_phys_segments(req)); } static inline bool nvme_pci_use_sgls(struct nvme_dev *dev, struct request *req) @@ -576,17 +575,6 @@ static void nvme_free_sgls(struct nvme_dev *dev, struct request *req) } } -static void nvme_unmap_sg(struct nvme_dev *dev, struct request *req) -{ - struct nvme_iod *iod = blk_mq_rq_to_pdu(req); - - if (is_pci_p2pdma_page(sg_page(iod->sg))) - pci_p2pdma_unmap_sg(dev->dev, iod->sg, iod->nents, - rq_dma_dir(req)); - else - dma_unmap_sg(dev->dev, iod->sg, iod->nents, rq_dma_dir(req)); -} - static void nvme_unmap_data(struct nvme_dev *dev, struct request *req) { struct nvme_iod *iod = blk_mq_rq_to_pdu(req); @@ -597,9 +585,10 @@ static void nvme_unmap_data(struct nvme_dev *dev, struct request *req) return; } - WARN_ON_ONCE(!iod->nents); + WARN_ON_ONCE(!iod->sgt.nents); + + dma_unmap_sgtable(dev->dev, &iod->sgt, rq_dma_dir(req), 0); - nvme_unmap_sg(dev, req); if (iod->npages == 0) dma_pool_free(dev->prp_small_pool, nvme_pci_iod_list(req)[0], iod->first_dma); @@ -607,7 +596,7 @@ static void nvme_unmap_data(struct nvme_dev *dev, struct request *req) nvme_free_sgls(dev, req); else nvme_free_prps(dev, req); - mempool_free(iod->sg, dev->iod_mempool); + mempool_free(iod->sgt.sgl, dev->iod_mempool); } static void nvme_print_sgl(struct scatterlist *sgl, int nents) @@ -630,7 +619,7 @@ static blk_status_t nvme_pci_setup_prps(struct nvme_dev *dev, struct nvme_iod *iod = blk_mq_rq_to_pdu(req); struct dma_pool *pool; int length = blk_rq_payload_bytes(req); - struct scatterlist *sg = iod->sg; + struct scatterlist *sg = iod->sgt.sgl; int dma_len = sg_dma_len(sg); u64 dma_addr = sg_dma_address(sg); int offset = dma_addr & (NVME_CTRL_PAGE_SIZE - 1); @@ -703,16 +692,16 @@ static blk_status_t nvme_pci_setup_prps(struct nvme_dev *dev, dma_len = sg_dma_len(sg); } done: - cmnd->dptr.prp1 = cpu_to_le64(sg_dma_address(iod->sg)); + cmnd->dptr.prp1 = cpu_to_le64(sg_dma_address(iod->sgt.sgl)); cmnd->dptr.prp2 = cpu_to_le64(iod->first_dma); return BLK_STS_OK; free_prps: nvme_free_prps(dev, req); return BLK_STS_RESOURCE; bad_sgl: - WARN(DO_ONCE(nvme_print_sgl, iod->sg, iod->nents), + WARN(DO_ONCE(nvme_print_sgl, iod->sgt.sgl, iod->sgt.nents), "Invalid SGL for payload:%d nents:%d\n", - blk_rq_payload_bytes(req), iod->nents); + blk_rq_payload_bytes(req), iod->sgt.nents); return BLK_STS_IOERR; } @@ -738,12 +727,13 @@ static void nvme_pci_sgl_set_seg(struct nvme_sgl_desc *sge, } static blk_status_t nvme_pci_setup_sgls(struct nvme_dev *dev, - struct request *req, struct nvme_rw_command *cmd, int entries) + struct request *req, struct nvme_rw_command *cmd) { struct nvme_iod *iod = blk_mq_rq_to_pdu(req); struct dma_pool *pool; struct nvme_sgl_desc *sg_list; - struct scatterlist *sg = iod->sg; + struct scatterlist *sg = iod->sgt.sgl; + unsigned int entries = iod->sgt.nents; dma_addr_t sgl_dma; int i = 0; @@ -841,7 +831,7 @@ static blk_status_t nvme_map_data(struct nvme_dev *dev, struct request *req, { struct nvme_iod *iod = blk_mq_rq_to_pdu(req); blk_status_t ret = BLK_STS_RESOURCE; - int nr_mapped; + int rc; if (blk_rq_nr_phys_segments(req) == 1) { struct bio_vec bv = req_bvec(req); @@ -859,26 +849,25 @@ static blk_status_t nvme_map_data(struct nvme_dev *dev, struct request *req, } iod->dma_len = 0; - iod->sg = mempool_alloc(dev->iod_mempool, GFP_ATOMIC); - if (!iod->sg) + iod->sgt.sgl = mempool_alloc(dev->iod_mempool, GFP_ATOMIC); + if (!iod->sgt.sgl) return BLK_STS_RESOURCE; - sg_init_table(iod->sg, blk_rq_nr_phys_segments(req)); - iod->nents = blk_rq_map_sg(req->q, req, iod->sg); - if (!iod->nents) + sg_init_table(iod->sgt.sgl, blk_rq_nr_phys_segments(req)); + iod->sgt.orig_nents = blk_rq_map_sg(req->q, req, iod->sgt.sgl); + if (!iod->sgt.orig_nents) goto out_free_sg; - if (is_pci_p2pdma_page(sg_page(iod->sg))) - nr_mapped = pci_p2pdma_map_sg_attrs(dev->dev, iod->sg, - iod->nents, rq_dma_dir(req), DMA_ATTR_NO_WARN); - else - nr_mapped = dma_map_sg_attrs(dev->dev, iod->sg, iod->nents, - rq_dma_dir(req), DMA_ATTR_NO_WARN); - if (!nr_mapped) + rc = dma_map_sgtable(dev->dev, &iod->sgt, rq_dma_dir(req), + DMA_ATTR_NO_WARN); + if (rc) { + if (rc == -EREMOTEIO) + ret = BLK_STS_TARGET; goto out_free_sg; + } iod->use_sgl = nvme_pci_use_sgls(dev, req); if (iod->use_sgl) - ret = nvme_pci_setup_sgls(dev, req, &cmnd->rw, nr_mapped); + ret = nvme_pci_setup_sgls(dev, req, &cmnd->rw); else ret = nvme_pci_setup_prps(dev, req, &cmnd->rw); if (ret != BLK_STS_OK) @@ -886,9 +875,9 @@ static blk_status_t nvme_map_data(struct nvme_dev *dev, struct request *req, return BLK_STS_OK; out_unmap_sg: - nvme_unmap_sg(dev, req); + dma_unmap_sgtable(dev->dev, &iod->sgt, rq_dma_dir(req), 0); out_free_sg: - mempool_free(iod->sg, dev->iod_mempool); + mempool_free(iod->sgt.sgl, dev->iod_mempool); return ret; } @@ -912,7 +901,7 @@ static blk_status_t nvme_prep_rq(struct nvme_dev *dev, struct request *req) iod->aborted = 0; iod->npages = -1; - iod->nents = 0; + iod->sgt.nents = 0; ret = nvme_setup_cmd(req->q->queuedata, req); if (ret) -- cgit v1.2.3-58-ga151 From 495758bb1a72316094ee854dc51f667ad3db29b5 Mon Sep 17 00:00:00 2001 From: Logan Gunthorpe Date: Fri, 8 Jul 2022 10:51:02 -0600 Subject: RDMA/core: introduce ib_dma_pci_p2p_dma_supported() Introduce the helper function ib_dma_pci_p2p_dma_supported() to check if a given ib_device can be used in P2PDMA transfers. This ensures the ib_device is not using virt_dma and also that the underlying dma_device supports P2PDMA. Use the new helper in nvme-rdma to replace the existing check for ib_uses_virt_dma(). Adding the dma_pci_p2pdma_supported() check allows switching away from pci_p2pdma_[un]map_sg(). Signed-off-by: Logan Gunthorpe Reviewed-by: Jason Gunthorpe Reviewed-by: Max Gurtovoy Reviewed-by: Christoph Hellwig Signed-off-by: Christoph Hellwig --- drivers/nvme/target/rdma.c | 2 +- include/rdma/ib_verbs.h | 11 +++++++++++ 2 files changed, 12 insertions(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c index 09fdcac87d17..4597bca43a6d 100644 --- a/drivers/nvme/target/rdma.c +++ b/drivers/nvme/target/rdma.c @@ -415,7 +415,7 @@ static int nvmet_rdma_alloc_rsp(struct nvmet_rdma_device *ndev, if (ib_dma_mapping_error(ndev->device, r->send_sge.addr)) goto out_free_rsp; - if (!ib_uses_virt_dma(ndev->device)) + if (ib_dma_pci_p2p_dma_supported(ndev->device)) r->req.p2p_client = &ndev->device->dev; r->send_sge.length = sizeof(*r->req.cqe); r->send_sge.lkey = ndev->pd->local_dma_lkey; diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 9c6317cf80d5..523843d9ed6c 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -4013,6 +4013,17 @@ static inline bool ib_uses_virt_dma(struct ib_device *dev) return IS_ENABLED(CONFIG_INFINIBAND_VIRT_DMA) && !dev->dma_device; } +/* + * Check if a IB device's underlying DMA mapping supports P2PDMA transfers. + */ +static inline bool ib_dma_pci_p2p_dma_supported(struct ib_device *dev) +{ + if (ib_uses_virt_dma(dev)) + return false; + + return dma_pci_p2pdma_supported(dev->dma_device); +} + /** * ib_dma_mapping_error - check a DMA addr for error * @dev: The device for which the dma_addr was created -- cgit v1.2.3-58-ga151 From 1e97af7f2f0ed487e5abbb6b755d1df156cf3d7e Mon Sep 17 00:00:00 2001 From: Logan Gunthorpe Date: Fri, 8 Jul 2022 10:51:03 -0600 Subject: RDMA/rw: drop pci_p2pdma_[un]map_sg() dma_map_sg() now supports the use of P2PDMA pages so pci_p2pdma_map_sg() is no longer necessary and may be dropped. This means the rdma_rw_[un]map_sg() helpers are no longer necessary. Remove it all. Signed-off-by: Logan Gunthorpe Reviewed-by: Jason Gunthorpe Reviewed-by: Christoph Hellwig Signed-off-by: Christoph Hellwig --- drivers/infiniband/core/rw.c | 45 +++++++++----------------------------------- 1 file changed, 9 insertions(+), 36 deletions(-) (limited to 'drivers') diff --git a/drivers/infiniband/core/rw.c b/drivers/infiniband/core/rw.c index 4d98f931a13d..8367974b7998 100644 --- a/drivers/infiniband/core/rw.c +++ b/drivers/infiniband/core/rw.c @@ -274,33 +274,6 @@ static int rdma_rw_init_single_wr(struct rdma_rw_ctx *ctx, struct ib_qp *qp, return 1; } -static void rdma_rw_unmap_sg(struct ib_device *dev, struct scatterlist *sg, - u32 sg_cnt, enum dma_data_direction dir) -{ - if (is_pci_p2pdma_page(sg_page(sg))) - pci_p2pdma_unmap_sg(dev->dma_device, sg, sg_cnt, dir); - else - ib_dma_unmap_sg(dev, sg, sg_cnt, dir); -} - -static int rdma_rw_map_sgtable(struct ib_device *dev, struct sg_table *sgt, - enum dma_data_direction dir) -{ - int nents; - - if (is_pci_p2pdma_page(sg_page(sgt->sgl))) { - if (WARN_ON_ONCE(ib_uses_virt_dma(dev))) - return 0; - nents = pci_p2pdma_map_sg(dev->dma_device, sgt->sgl, - sgt->orig_nents, dir); - if (!nents) - return -EIO; - sgt->nents = nents; - return 0; - } - return ib_dma_map_sgtable_attrs(dev, sgt, dir, 0); -} - /** * rdma_rw_ctx_init - initialize a RDMA READ/WRITE context * @ctx: context to initialize @@ -327,7 +300,7 @@ int rdma_rw_ctx_init(struct rdma_rw_ctx *ctx, struct ib_qp *qp, u32 port_num, }; int ret; - ret = rdma_rw_map_sgtable(dev, &sgt, dir); + ret = ib_dma_map_sgtable_attrs(dev, &sgt, dir, 0); if (ret) return ret; sg_cnt = sgt.nents; @@ -366,7 +339,7 @@ int rdma_rw_ctx_init(struct rdma_rw_ctx *ctx, struct ib_qp *qp, u32 port_num, return ret; out_unmap_sg: - rdma_rw_unmap_sg(dev, sgt.sgl, sgt.orig_nents, dir); + ib_dma_unmap_sgtable_attrs(dev, &sgt, dir, 0); return ret; } EXPORT_SYMBOL(rdma_rw_ctx_init); @@ -414,12 +387,12 @@ int rdma_rw_ctx_signature_init(struct rdma_rw_ctx *ctx, struct ib_qp *qp, return -EINVAL; } - ret = rdma_rw_map_sgtable(dev, &sgt, dir); + ret = ib_dma_map_sgtable_attrs(dev, &sgt, dir, 0); if (ret) return ret; if (prot_sg_cnt) { - ret = rdma_rw_map_sgtable(dev, &prot_sgt, dir); + ret = ib_dma_map_sgtable_attrs(dev, &prot_sgt, dir, 0); if (ret) goto out_unmap_sg; } @@ -486,9 +459,9 @@ out_free_ctx: kfree(ctx->reg); out_unmap_prot_sg: if (prot_sgt.nents) - rdma_rw_unmap_sg(dev, prot_sgt.sgl, prot_sgt.orig_nents, dir); + ib_dma_unmap_sgtable_attrs(dev, &prot_sgt, dir, 0); out_unmap_sg: - rdma_rw_unmap_sg(dev, sgt.sgl, sgt.orig_nents, dir); + ib_dma_unmap_sgtable_attrs(dev, &sgt, dir, 0); return ret; } EXPORT_SYMBOL(rdma_rw_ctx_signature_init); @@ -621,7 +594,7 @@ void rdma_rw_ctx_destroy(struct rdma_rw_ctx *ctx, struct ib_qp *qp, break; } - rdma_rw_unmap_sg(qp->pd->device, sg, sg_cnt, dir); + ib_dma_unmap_sg(qp->pd->device, sg, sg_cnt, dir); } EXPORT_SYMBOL(rdma_rw_ctx_destroy); @@ -649,8 +622,8 @@ void rdma_rw_ctx_destroy_signature(struct rdma_rw_ctx *ctx, struct ib_qp *qp, kfree(ctx->reg); if (prot_sg_cnt) - rdma_rw_unmap_sg(qp->pd->device, prot_sg, prot_sg_cnt, dir); - rdma_rw_unmap_sg(qp->pd->device, sg, sg_cnt, dir); + ib_dma_unmap_sg(qp->pd->device, prot_sg, prot_sg_cnt, dir); + ib_dma_unmap_sg(qp->pd->device, sg, sg_cnt, dir); } EXPORT_SYMBOL(rdma_rw_ctx_destroy_signature); -- cgit v1.2.3-58-ga151 From 0d06132fc84bd91379300768c75a19474e06d0c5 Mon Sep 17 00:00:00 2001 From: Logan Gunthorpe Date: Fri, 8 Jul 2022 10:51:04 -0600 Subject: PCI/P2PDMA: Remove pci_p2pdma_[un]map_sg() This interface is superseded by support in dma_map_sg() which now supports heterogeneous scatterlists. There are no longer any users, so remove it. Signed-off-by: Logan Gunthorpe Acked-by: Bjorn Helgaas Reviewed-by: Jason Gunthorpe Reviewed-by: Max Gurtovoy Reviewed-by: Christoph Hellwig Signed-off-by: Christoph Hellwig --- drivers/pci/p2pdma.c | 66 ---------------------------------------------- include/linux/pci-p2pdma.h | 27 ------------------- 2 files changed, 93 deletions(-) (limited to 'drivers') diff --git a/drivers/pci/p2pdma.c b/drivers/pci/p2pdma.c index 5d2538aa0778..4496a7c5c478 100644 --- a/drivers/pci/p2pdma.c +++ b/drivers/pci/p2pdma.c @@ -872,72 +872,6 @@ static enum pci_p2pdma_map_type pci_p2pdma_map_type(struct dev_pagemap *pgmap, return type; } -static int __pci_p2pdma_map_sg(struct pci_p2pdma_pagemap *p2p_pgmap, - struct device *dev, struct scatterlist *sg, int nents) -{ - struct scatterlist *s; - int i; - - for_each_sg(sg, s, nents, i) { - s->dma_address = sg_phys(s) + p2p_pgmap->bus_offset; - sg_dma_len(s) = s->length; - } - - return nents; -} - -/** - * pci_p2pdma_map_sg_attrs - map a PCI peer-to-peer scatterlist for DMA - * @dev: device doing the DMA request - * @sg: scatter list to map - * @nents: elements in the scatterlist - * @dir: DMA direction - * @attrs: DMA attributes passed to dma_map_sg() (if called) - * - * Scatterlists mapped with this function should be unmapped using - * pci_p2pdma_unmap_sg_attrs(). - * - * Returns the number of SG entries mapped or 0 on error. - */ -int pci_p2pdma_map_sg_attrs(struct device *dev, struct scatterlist *sg, - int nents, enum dma_data_direction dir, unsigned long attrs) -{ - struct pci_p2pdma_pagemap *p2p_pgmap = - to_p2p_pgmap(sg_page(sg)->pgmap); - - switch (pci_p2pdma_map_type(sg_page(sg)->pgmap, dev)) { - case PCI_P2PDMA_MAP_THRU_HOST_BRIDGE: - return dma_map_sg_attrs(dev, sg, nents, dir, attrs); - case PCI_P2PDMA_MAP_BUS_ADDR: - return __pci_p2pdma_map_sg(p2p_pgmap, dev, sg, nents); - default: - /* Mapping is not Supported */ - return 0; - } -} -EXPORT_SYMBOL_GPL(pci_p2pdma_map_sg_attrs); - -/** - * pci_p2pdma_unmap_sg_attrs - unmap a PCI peer-to-peer scatterlist that was - * mapped with pci_p2pdma_map_sg() - * @dev: device doing the DMA request - * @sg: scatter list to map - * @nents: number of elements returned by pci_p2pdma_map_sg() - * @dir: DMA direction - * @attrs: DMA attributes passed to dma_unmap_sg() (if called) - */ -void pci_p2pdma_unmap_sg_attrs(struct device *dev, struct scatterlist *sg, - int nents, enum dma_data_direction dir, unsigned long attrs) -{ - enum pci_p2pdma_map_type map_type; - - map_type = pci_p2pdma_map_type(sg_page(sg)->pgmap, dev); - - if (map_type == PCI_P2PDMA_MAP_THRU_HOST_BRIDGE) - dma_unmap_sg_attrs(dev, sg, nents, dir, attrs); -} -EXPORT_SYMBOL_GPL(pci_p2pdma_unmap_sg_attrs); - /** * pci_p2pdma_map_segment - map an sg segment determining the mapping type * @state: State structure that should be declared outside of the for_each_sg() diff --git a/include/linux/pci-p2pdma.h b/include/linux/pci-p2pdma.h index 8318a97c9c61..2c07aa6b7665 100644 --- a/include/linux/pci-p2pdma.h +++ b/include/linux/pci-p2pdma.h @@ -30,10 +30,6 @@ struct scatterlist *pci_p2pmem_alloc_sgl(struct pci_dev *pdev, unsigned int *nents, u32 length); void pci_p2pmem_free_sgl(struct pci_dev *pdev, struct scatterlist *sgl); void pci_p2pmem_publish(struct pci_dev *pdev, bool publish); -int pci_p2pdma_map_sg_attrs(struct device *dev, struct scatterlist *sg, - int nents, enum dma_data_direction dir, unsigned long attrs); -void pci_p2pdma_unmap_sg_attrs(struct device *dev, struct scatterlist *sg, - int nents, enum dma_data_direction dir, unsigned long attrs); int pci_p2pdma_enable_store(const char *page, struct pci_dev **p2p_dev, bool *use_p2pdma); ssize_t pci_p2pdma_enable_show(char *page, struct pci_dev *p2p_dev, @@ -83,17 +79,6 @@ static inline void pci_p2pmem_free_sgl(struct pci_dev *pdev, static inline void pci_p2pmem_publish(struct pci_dev *pdev, bool publish) { } -static inline int pci_p2pdma_map_sg_attrs(struct device *dev, - struct scatterlist *sg, int nents, enum dma_data_direction dir, - unsigned long attrs) -{ - return 0; -} -static inline void pci_p2pdma_unmap_sg_attrs(struct device *dev, - struct scatterlist *sg, int nents, enum dma_data_direction dir, - unsigned long attrs) -{ -} static inline int pci_p2pdma_enable_store(const char *page, struct pci_dev **p2p_dev, bool *use_p2pdma) { @@ -119,16 +104,4 @@ static inline struct pci_dev *pci_p2pmem_find(struct device *client) return pci_p2pmem_find_many(&client, 1); } -static inline int pci_p2pdma_map_sg(struct device *dev, struct scatterlist *sg, - int nents, enum dma_data_direction dir) -{ - return pci_p2pdma_map_sg_attrs(dev, sg, nents, dir, 0); -} - -static inline void pci_p2pdma_unmap_sg(struct device *dev, - struct scatterlist *sg, int nents, enum dma_data_direction dir) -{ - pci_p2pdma_unmap_sg_attrs(dev, sg, nents, dir, 0); -} - #endif /* _LINUX_PCI_P2P_H */ -- cgit v1.2.3-58-ga151