diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2019-07-12 15:13:55 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2019-07-12 15:13:55 -0700 |
commit | 9e3a25dc992dd9f3170fb643bdd95da5ca9c5576 (patch) | |
tree | f636ae59fa83c83e837a6668b2693175a6e39f3a | |
parent | 9787aed57dd33ba5c15a713c2c50e78baeb5052d (diff) | |
parent | 15ffe5e1acf5fe1512e98b20702e46ce9f25e2f7 (diff) |
Merge tag 'dma-mapping-5.3' of git://git.infradead.org/users/hch/dma-mapping
Pull dma-mapping updates from Christoph Hellwig:
- move the USB special case that bounced DMA through a device bar into
the USB code instead of handling it in the common DMA code (Laurentiu
Tudor and Fredrik Noring)
- don't dip into the global CMA pool for single page allocations
(Nicolin Chen)
- fix a crash when allocating memory for the atomic pool failed during
boot (Florian Fainelli)
- move support for MIPS-style uncached segments to the common code and
use that for MIPS and nios2 (me)
- make support for DMA_ATTR_NON_CONSISTENT and
DMA_ATTR_NO_KERNEL_MAPPING generic (me)
- convert nds32 to the generic remapping allocator (me)
* tag 'dma-mapping-5.3' of git://git.infradead.org/users/hch/dma-mapping: (29 commits)
dma-mapping: mark dma_alloc_need_uncached as __always_inline
MIPS: only select ARCH_HAS_UNCACHED_SEGMENT for non-coherent platforms
usb: host: Fix excessive alignment restriction for local memory allocations
lib/genalloc.c: Add algorithm, align and zeroed family of DMA allocators
nios2: use the generic uncached segment support in dma-direct
nds32: use the generic remapping allocator for coherent DMA allocations
arc: use the generic remapping allocator for coherent DMA allocations
dma-direct: handle DMA_ATTR_NO_KERNEL_MAPPING in common code
dma-direct: handle DMA_ATTR_NON_CONSISTENT in common code
dma-mapping: add a dma_alloc_need_uncached helper
openrisc: remove the partial DMA_ATTR_NON_CONSISTENT support
arc: remove the partial DMA_ATTR_NON_CONSISTENT support
arm-nommu: remove the partial DMA_ATTR_NON_CONSISTENT support
ARM: dma-mapping: allow larger DMA mask than supported
dma-mapping: truncate dma masks to what dma_addr_t can hold
iommu/dma: Apply dma_{alloc,free}_contiguous functions
dma-remap: Avoid de-referencing NULL atomic_pool
MIPS: use the generic uncached segment support in dma-direct
dma-direct: provide generic support for uncached kernel segments
au1100fb: fix DMA API abuse
...
41 files changed, 515 insertions, 654 deletions
diff --git a/arch/Kconfig b/arch/Kconfig index c47b328eada0..e8d19c3cb91f 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -260,6 +260,14 @@ config ARCH_HAS_SET_MEMORY config ARCH_HAS_SET_DIRECT_MAP bool +# +# Select if arch has an uncached kernel segment and provides the +# uncached_kernel_address / cached_kernel_address symbols to use it +# +config ARCH_HAS_UNCACHED_SEGMENT + select ARCH_HAS_DMA_PREP_COHERENT + bool + # Select if arch init_task must go in the __init_task_data section config ARCH_TASK_STRUCT_ON_STACK bool diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig index 1c8137e7247b..8383155c8c82 100644 --- a/arch/arc/Kconfig +++ b/arch/arc/Kconfig @@ -7,6 +7,7 @@ config ARC def_bool y select ARC_TIMERS select ARCH_HAS_DMA_COHERENT_TO_PFN + select ARCH_HAS_DMA_PREP_COHERENT select ARCH_HAS_PTE_SPECIAL select ARCH_HAS_SETUP_DMA_OPS select ARCH_HAS_SYNC_DMA_FOR_CPU @@ -16,6 +17,7 @@ config ARC select BUILDTIME_EXTABLE_SORT select CLONE_BACKWARDS select COMMON_CLK + select DMA_DIRECT_REMAP select GENERIC_ATOMIC64 if !ISA_ARCV2 || !(ARC_HAS_LL64 && ARC_HAS_LLSC) select GENERIC_CLOCKEVENTS select GENERIC_FIND_FIRST_BIT diff --git a/arch/arc/mm/dma.c b/arch/arc/mm/dma.c index 0bf1468c35a3..62c210e7ee4c 100644 --- a/arch/arc/mm/dma.c +++ b/arch/arc/mm/dma.c @@ -8,51 +8,15 @@ #include <asm/cacheflush.h> /* - * ARCH specific callbacks for generic noncoherent DMA ops (dma/noncoherent.c) + * ARCH specific callbacks for generic noncoherent DMA ops * - hardware IOC not available (or "dma-coherent" not set for device in DT) * - But still handle both coherent and non-coherent requests from caller * * For DMA coherent hardware (IOC) generic code suffices */ -void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle, - gfp_t gfp, unsigned long attrs) -{ - unsigned long order = get_order(size); - struct page *page; - phys_addr_t paddr; - void *kvaddr; - bool need_coh = !(attrs & DMA_ATTR_NON_CONSISTENT); - - /* - * __GFP_HIGHMEM flag is cleared by upper layer functions - * (in include/linux/dma-mapping.h) so we should never get a - * __GFP_HIGHMEM here. - */ - BUG_ON(gfp & __GFP_HIGHMEM); - - page = alloc_pages(gfp | __GFP_ZERO, order); - if (!page) - return NULL; - - /* This is linear addr (0x8000_0000 based) */ - paddr = page_to_phys(page); - - *dma_handle = paddr; - - /* - * A coherent buffer needs MMU mapping to enforce non-cachability. - * kvaddr is kernel Virtual address (0x7000_0000 based). - */ - if (need_coh) { - kvaddr = ioremap_nocache(paddr, size); - if (kvaddr == NULL) { - __free_pages(page, order); - return NULL; - } - } else { - kvaddr = (void *)(u32)paddr; - } +void arch_dma_prep_coherent(struct page *page, size_t size) +{ /* * Evict any existing L1 and/or L2 lines for the backing page * in case it was used earlier as a normal "cached" page. @@ -63,28 +27,7 @@ void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle, * Currently flush_cache_vmap nukes the L1 cache completely which * will be optimized as a separate commit */ - if (need_coh) - dma_cache_wback_inv(paddr, size); - - return kvaddr; -} - -void arch_dma_free(struct device *dev, size_t size, void *vaddr, - dma_addr_t dma_handle, unsigned long attrs) -{ - phys_addr_t paddr = dma_handle; - struct page *page = virt_to_page(paddr); - - if (!(attrs & DMA_ATTR_NON_CONSISTENT)) - iounmap((void __force __iomem *)vaddr); - - __free_pages(page, get_order(size)); -} - -long arch_dma_coherent_to_pfn(struct device *dev, void *cpu_addr, - dma_addr_t dma_addr) -{ - return __phys_to_pfn(dma_addr); + dma_cache_wback_inv(page_to_phys(page), size); } /* @@ -161,3 +104,9 @@ void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size, dev_info(dev, "use %sncoherent DMA ops\n", dev->dma_coherent ? "" : "non"); } + +static int __init atomic_pool_init(void) +{ + return dma_atomic_pool_init(GFP_KERNEL, pgprot_noncached(PAGE_KERNEL)); +} +postcore_initcall(atomic_pool_init); diff --git a/arch/arm/mm/dma-mapping-nommu.c b/arch/arm/mm/dma-mapping-nommu.c index 1aea01ba1262..52b82559d99b 100644 --- a/arch/arm/mm/dma-mapping-nommu.c +++ b/arch/arm/mm/dma-mapping-nommu.c @@ -35,18 +35,7 @@ static void *arm_nommu_dma_alloc(struct device *dev, size_t size, unsigned long attrs) { - void *ret; - - /* - * Try generic allocator first if we are advertised that - * consistency is not required. - */ - - if (attrs & DMA_ATTR_NON_CONSISTENT) - return dma_direct_alloc_pages(dev, size, dma_handle, gfp, - attrs); - - ret = dma_alloc_from_global_coherent(size, dma_handle); + void *ret = dma_alloc_from_global_coherent(size, dma_handle); /* * dma_alloc_from_global_coherent() may fail because: @@ -66,16 +55,9 @@ static void arm_nommu_dma_free(struct device *dev, size_t size, void *cpu_addr, dma_addr_t dma_addr, unsigned long attrs) { - if (attrs & DMA_ATTR_NON_CONSISTENT) { - dma_direct_free_pages(dev, size, cpu_addr, dma_addr, attrs); - } else { - int ret = dma_release_from_global_coherent(get_order(size), - cpu_addr); - - WARN_ON_ONCE(ret == 0); - } + int ret = dma_release_from_global_coherent(get_order(size), cpu_addr); - return; + WARN_ON_ONCE(ret == 0); } static int arm_nommu_dma_mmap(struct device *dev, struct vm_area_struct *vma, diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c index 1fb5c0ca1ed8..4789c60a86e3 100644 --- a/arch/arm/mm/dma-mapping.c +++ b/arch/arm/mm/dma-mapping.c @@ -216,25 +216,7 @@ EXPORT_SYMBOL(arm_coherent_dma_ops); static int __dma_supported(struct device *dev, u64 mask, bool warn) { - unsigned long max_dma_pfn; - - /* - * If the mask allows for more memory than we can address, - * and we actually have that much memory, then we must - * indicate that DMA to this device is not supported. - */ - if (sizeof(mask) != sizeof(dma_addr_t) && - mask > (dma_addr_t)~0 && - dma_to_pfn(dev, ~0) < max_pfn - 1) { - if (warn) { - dev_warn(dev, "Coherent DMA mask %#llx is larger than dma_addr_t allows\n", - mask); - dev_warn(dev, "Driver did not use or check the return value from dma_set_coherent_mask()?\n"); - } - return 0; - } - - max_dma_pfn = min(max_pfn, arm_dma_pfn_limit); + unsigned long max_dma_pfn = min(max_pfn, arm_dma_pfn_limit); /* * Translate the device's DMA mask to a PFN limit. This diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index 7957d3457156..d50fafd7bf3a 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -1121,6 +1121,7 @@ config DMA_NONCOHERENT bool select ARCH_HAS_DMA_MMAP_PGPROT select ARCH_HAS_SYNC_DMA_FOR_DEVICE + select ARCH_HAS_UNCACHED_SEGMENT select NEED_DMA_MAP_STATE select ARCH_HAS_DMA_COHERENT_TO_PFN select DMA_NONCOHERENT_CACHE_SYNC diff --git a/arch/mips/include/asm/page.h b/arch/mips/include/asm/page.h index a25643d258cb..0ba4ce6e2bf3 100644 --- a/arch/mips/include/asm/page.h +++ b/arch/mips/include/asm/page.h @@ -258,9 +258,6 @@ extern bool __virt_addr_valid(const volatile void *kaddr); ((current->personality & READ_IMPLIES_EXEC) ? VM_EXEC : 0) | \ VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) -#define UNCAC_ADDR(addr) (UNCAC_BASE + __pa(addr)) -#define CAC_ADDR(addr) ((unsigned long)__va((addr) - UNCAC_BASE)) - #include <asm-generic/memory_model.h> #include <asm-generic/getorder.h> diff --git a/arch/mips/jazz/jazzdma.c b/arch/mips/jazz/jazzdma.c index bedb5047aff3..1804dc9d8136 100644 --- a/arch/mips/jazz/jazzdma.c +++ b/arch/mips/jazz/jazzdma.c @@ -575,10 +575,6 @@ static void *jazz_dma_alloc(struct device *dev, size_t size, return NULL; } - if (!(attrs & DMA_ATTR_NON_CONSISTENT)) { - dma_cache_wback_inv((unsigned long)ret, size); - ret = (void *)UNCAC_ADDR(ret); - } return ret; } @@ -586,8 +582,6 @@ static void jazz_dma_free(struct device *dev, size_t size, void *vaddr, dma_addr_t dma_handle, unsigned long attrs) { vdma_free(dma_handle); - if (!(attrs & DMA_ATTR_NON_CONSISTENT)) - vaddr = (void *)CAC_ADDR((unsigned long)vaddr); dma_direct_free_pages(dev, size, vaddr, dma_handle, attrs); } diff --git a/arch/mips/mm/cache.c b/arch/mips/mm/cache.c index 3da216988672..33b409391ddb 100644 --- a/arch/mips/mm/cache.c +++ b/arch/mips/mm/cache.c @@ -62,8 +62,6 @@ void (*_dma_cache_wback_inv)(unsigned long start, unsigned long size); void (*_dma_cache_wback)(unsigned long start, unsigned long size); void (*_dma_cache_inv)(unsigned long start, unsigned long size); -EXPORT_SYMBOL(_dma_cache_wback_inv); - #endif /* CONFIG_DMA_NONCOHERENT */ /* diff --git a/arch/mips/mm/dma-noncoherent.c b/arch/mips/mm/dma-noncoherent.c index f9549d2fbea3..ed56c6fa7be2 100644 --- a/arch/mips/mm/dma-noncoherent.c +++ b/arch/mips/mm/dma-noncoherent.c @@ -44,33 +44,25 @@ static inline bool cpu_needs_post_dma_flush(struct device *dev) } } -void *arch_dma_alloc(struct device *dev, size_t size, - dma_addr_t *dma_handle, gfp_t gfp, unsigned long attrs) +void arch_dma_prep_coherent(struct page *page, size_t size) { - void *ret; - - ret = dma_direct_alloc_pages(dev, size, dma_handle, gfp, attrs); - if (ret && !(attrs & DMA_ATTR_NON_CONSISTENT)) { - dma_cache_wback_inv((unsigned long) ret, size); - ret = (void *)UNCAC_ADDR(ret); - } + dma_cache_wback_inv((unsigned long)page_address(page), size); +} - return ret; +void *uncached_kernel_address(void *addr) +{ + return (void *)(__pa(addr) + UNCAC_BASE); } -void arch_dma_free(struct device *dev, size_t size, void *cpu_addr, - dma_addr_t dma_addr, unsigned long attrs) +void *cached_kernel_address(void *addr) { - if (!(attrs & DMA_ATTR_NON_CONSISTENT)) - cpu_addr = (void *)CAC_ADDR((unsigned long)cpu_addr); - dma_direct_free_pages(dev, size, cpu_addr, dma_addr, attrs); + return __va(addr) - UNCAC_BASE; } long arch_dma_coherent_to_pfn(struct device *dev, void *cpu_addr, dma_addr_t dma_addr) { - unsigned long addr = CAC_ADDR((unsigned long)cpu_addr); - return page_to_pfn(virt_to_page((void *)addr)); + return page_to_pfn(virt_to_page(cached_kernel_address(cpu_addr))); } pgprot_t arch_dma_mmap_pgprot(struct device *dev, pgprot_t prot, diff --git a/arch/nds32/Kconfig b/arch/nds32/Kconfig index fd0d0639454f..fbd68329737f 100644 --- a/arch/nds32/Kconfig +++ b/arch/nds32/Kconfig @@ -7,12 +7,14 @@ config NDS32 def_bool y select ARCH_32BIT_OFF_T + select ARCH_HAS_DMA_PREP_COHERENT select ARCH_HAS_SYNC_DMA_FOR_CPU select ARCH_HAS_SYNC_DMA_FOR_DEVICE select ARCH_WANT_FRAME_POINTERS if FTRACE select CLKSRC_MMIO select CLONE_BACKWARDS select COMMON_CLK + select DMA_DIRECT_REMAP select GENERIC_ATOMIC64 select GENERIC_CPU_DEVICES select GENERIC_CLOCKEVENTS diff --git a/arch/nds32/kernel/dma.c b/arch/nds32/kernel/dma.c index d0dbd4fe9645..490e3720d694 100644 --- a/arch/nds32/kernel/dma.c +++ b/arch/nds32/kernel/dma.c @@ -3,327 +3,13 @@ #include <linux/types.h> #include <linux/mm.h> -#include <linux/string.h> #include <linux/dma-noncoherent.h> -#include <linux/io.h> #include <linux/cache.h> #include <linux/highmem.h> -#include <linux/slab.h> #include <asm/cacheflush.h> #include <asm/tlbflush.h> #include <asm/proc-fns.h> -/* - * This is the page table (2MB) covering uncached, DMA consistent allocations - */ -static pte_t *consistent_pte; -static DEFINE_RAW_SPINLOCK(consistent_lock); - -/* - * VM region handling support. - * - * This should become something generic, handling VM region allocations for - * vmalloc and similar (ioremap, module space, etc). - * - * I envisage vmalloc()'s supporting vm_struct becoming: - * - * struct vm_struct { - * struct vm_region region; - * unsigned long flags; - * struct page **pages; - * unsigned int nr_pages; - * unsigned long phys_addr; - * }; - * - * get_vm_area() would then call vm_region_alloc with an appropriate - * struct vm_region head (eg): - * - * struct vm_region vmalloc_head = { - * .vm_list = LIST_HEAD_INIT(vmalloc_head.vm_list), - * .vm_start = VMALLOC_START, - * .vm_end = VMALLOC_END, - * }; - * - * However, vmalloc_head.vm_start is variable (typically, it is dependent on - * the amount of RAM found at boot time.) I would imagine that get_vm_area() - * would have to initialise this each time prior to calling vm_region_alloc(). - */ -struct arch_vm_region { - struct list_head vm_list; - unsigned long vm_start; - unsigned long vm_end; - struct page *vm_pages; -}; - -static struct arch_vm_region consistent_head = { - .vm_list = LIST_HEAD_INIT(consistent_head.vm_list), - .vm_start = CONSISTENT_BASE, - .vm_end = CONSISTENT_END, -}; - -static struct arch_vm_region *vm_region_alloc(struct arch_vm_region *head, - size_t size, int gfp) -{ - unsigned long addr = head->vm_start, end = head->vm_end - size; - unsigned long flags; - struct arch_vm_region *c, *new; - - new = kmalloc(sizeof(struct arch_vm_region), gfp); - if (!new) - goto out; - - raw_spin_lock_irqsave(&consistent_lock, flags); - - list_for_each_entry(c, &head->vm_list, vm_list) { - if ((addr + size) < addr) - goto nospc; - if ((addr + size) <= c->vm_start) - goto found; - addr = c->vm_end; - if (addr > end) - goto nospc; - } - -found: - /* - * Insert this entry _before_ the one we found. - */ - list_add_tail(&new->vm_list, &c->vm_list); - new->vm_start = addr; - new->vm_end = addr + size; - - raw_spin_unlock_irqrestore(&consistent_lock, flags); - return new; - -nospc: - raw_spin_unlock_irqrestore(&consistent_lock, flags); - kfree(new); -out: - return NULL; -} - -static struct arch_vm_region *vm_region_find(struct arch_vm_region *head, - unsigned long addr) -{ - struct arch_vm_region *c; - - list_for_each_entry(c, &head->vm_list, vm_list) { - if (c->vm_start == addr) - goto out; - } - c = NULL; -out: - return c; -} - -void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, - gfp_t gfp, unsigned long attrs) -{ - struct page *page; - struct arch_vm_region *c; - unsigned long order; - u64 mask = ~0ULL, limit; - pgprot_t prot = pgprot_noncached(PAGE_KERNEL); - - if (!consistent_pte) { - pr_err("%s: not initialized\n", __func__); - dump_stack(); - return NULL; - } - - if (dev) { - mask = dev->coherent_dma_mask; - - /* - * Sanity check the DMA mask - it must be non-zero, and - * must be able to be satisfied by a DMA allocation. - */ - if (mask == 0) { - dev_warn(dev, "coherent DMA mask is unset\n"); - goto no_page; - } - - } - - /* - * Sanity check the allocation size. - */ - size = PAGE_ALIGN(size); - limit = (mask + 1) & ~mask; - if ((limit && size >= limit) || - size >= (CONSISTENT_END - CONSISTENT_BASE)) { - pr_warn("coherent allocation too big " - "(requested %#x mask %#llx)\n", size, mask); - goto no_page; - } - - order = get_order(size); - - if (mask != 0xffffffff) - gfp |= GFP_DMA; - - page = alloc_pages(gfp, order); - if (!page) - goto no_page; - - /* - * Invalidate any data that might be lurking in the - * kernel direct-mapped region for device DMA. - */ - { - unsigned long kaddr = (unsigned long)page_address(page); - memset(page_address(page), 0, size); - cpu_dma_wbinval_range(kaddr, kaddr + size); - } - - /* - * Allocate a virtual address in the consistent mapping region. - */ - c = vm_region_alloc(&consistent_head, size, - gfp & ~(__GFP_DMA | __GFP_HIGHMEM)); - if (c) { - pte_t *pte = consistent_pte + CONSISTENT_OFFSET(c->vm_start); - struct page *end = page + (1 << order); - - c->vm_pages = page; - - /* - * Set the "dma handle" - */ - *handle = page_to_phys(page); - - do { - BUG_ON(!pte_none(*pte)); - - /* - * x86 does not mark the pages reserved... - */ - SetPageReserved(page); - set_pte(pte, mk_pte(page, prot)); - page++; - pte++; - } while (size -= PAGE_SIZE); - - /* - * Free the otherwise unused pages. - */ - while (page < end) { - __free_page(page); - page++; - } - - return (void *)c->vm_start; - } - - if (page) - __free_pages(page, order); -no_page: - *handle = ~0; - return NULL; -} - -void arch_dma_free(struct device *dev, size_t size, void *cpu_addr, - dma_addr_t handle, unsigned long attrs) -{ - struct arch_vm_region *c; - unsigned long flags, addr; - pte_t *ptep; - - size = PAGE_ALIGN(size); - - raw_spin_lock_irqsave(&consistent_lock, flags); - - c = vm_region_find(&consistent_head, (unsigned long)cpu_addr); - if (!c) - goto no_area; - - if ((c->vm_end - c->vm_start) != size) { - pr_err("%s: freeing wrong coherent size (%ld != %d)\n", - __func__, c->vm_end - c->vm_start, size); - dump_stack(); - size = c->vm_end - c->vm_start; - } - - ptep = consistent_pte + CONSISTENT_OFFSET(c->vm_start); - addr = c->vm_start; - do { - pte_t pte = ptep_get_and_clear(&init_mm, addr, ptep); - unsigned long pfn; - - ptep++; - addr += PAGE_SIZE; - - if (!pte_none(pte) && pte_present(pte)) { - pfn = pte_pfn(pte); - - if (pfn_valid(pfn)) { - struct page *page = pfn_to_page(pfn); - - /* - * x86 does not mark the pages reserved... - */ - ClearPageReserved(page); - - __free_page(page); - continue; - } - } - - pr_crit("%s: bad page in kernel page table\n", __func__); - } while (size -= PAGE_SIZE); - - flush_tlb_kernel_range(c->vm_start, c->vm_end); - - list_del(&c->vm_list); - - raw_spin_unlock_irqrestore(&consistent_lock, flags); - - kfree(c); - return; - -no_area: - raw_spin_unlock_irqrestore(&consistent_lock, flags); - pr_err("%s: trying to free invalid coherent area: %p\n", - __func__, cpu_addr); - dump_stack(); -} - -/* - * Initialise the consistent memory allocation. - */ -static int __init consistent_init(void) -{ - pgd_t *pgd; - pmd_t *pmd; - pte_t *pte; - int ret = 0; - - do { - pgd = pgd_offset(&init_mm, CONSISTENT_BASE); - pmd = pmd_alloc(&init_mm, pgd, CONSISTENT_BASE); - if (!pmd) { - pr_err("%s: no pmd tables\n", __func__); - ret = -ENOMEM; - break; - } - /* The first level mapping may be created in somewhere. - * It's not necessary to warn here. */ - /* WARN_ON(!pmd_none(*pmd)); */ - - pte = pte_alloc_kernel(pmd, CONSISTENT_BASE); - if (!pte) { - ret = -ENOMEM; - break; - } - - consistent_pte = pte; - } while (0); - - return ret; -} - -core_initcall(consistent_init); - static inline void cache_op(phys_addr_t paddr, size_t size, void (*fn)(unsigned long start, unsigned long end)) { @@ -389,3 +75,14 @@ void arch_sync_dma_for_cpu(struct device *dev, phys_addr_t paddr, BUG(); } } + +void arch_dma_prep_coherent(struct page *page, size_t size) +{ + cache_op(page_to_phys(page), size, cpu_dma_wbinval_range); +} + +static int __init atomic_pool_init(void) +{ + return dma_atomic_pool_init(GFP_KERNEL, pgprot_noncached(PAGE_KERNEL)); +} +postcore_initcall(atomic_pool_init); diff --git a/arch/nios2/Kconfig b/arch/nios2/Kconfig index 26a9c760a98b..44b5da37e8bd 100644 --- a/arch/nios2/Kconfig +++ b/arch/nios2/Kconfig @@ -4,6 +4,7 @@ config NIOS2 select ARCH_32BIT_OFF_T select ARCH_HAS_SYNC_DMA_FOR_CPU select ARCH_HAS_SYNC_DMA_FOR_DEVICE + select ARCH_HAS_UNCACHED_SEGMENT select ARCH_NO_SWAP select TIMER_OF select GENERIC_ATOMIC64 diff --git a/arch/nios2/include/asm/page.h b/arch/nios2/include/asm/page.h index f1fbdc47bdaf..79fcac61f6ef 100644 --- a/arch/nios2/include/asm/page.h +++ b/arch/nios2/include/asm/page.h @@ -101,12 +101,6 @@ static inline bool pfn_valid(unsigned long pfn) # define VM_DATA_DEFAULT_FLAGS (VM_READ | VM_WRITE | \ VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) -# define UNCAC_ADDR(addr) \ - ((void *)((unsigned)(addr) | CONFIG_NIOS2_IO_REGION_BASE)) -# define CAC_ADDR(addr) \ - ((void *)(((unsigned)(addr) & ~CONFIG_NIOS2_IO_REGION_BASE) | \ - CONFIG_NIOS2_KERNEL_REGION_BASE)) - #include <asm-generic/memory_model.h> #include <asm-generic/getorder.h> diff --git a/arch/nios2/mm/dma-mapping.c b/arch/nios2/mm/dma-mapping.c index 4af9e5b5ba1c..9cb238664584 100644 --- a/arch/nios2/mm/dma-mapping.c +++ b/arch/nios2/mm/dma-mapping.c @@ -60,32 +60,28 @@ void arch_sync_dma_for_cpu(struct device *dev, phys_addr_t paddr, } } -void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle, - gfp_t gfp, unsigned long attrs) +void arch_dma_prep_coherent(struct page *page, size_t size) { - void *ret; + unsigned long start = (unsigned long)page_address(page); - /* optimized page clearing */ - gfp |= __GFP_ZERO; + flush_dcache_range(start, start + size); +} - if (dev == NULL || (dev->coherent_dma_mask < 0xffffffff)) - gfp |= GFP_DMA; +void *uncached_kernel_address(void *ptr) +{ + unsigned long addr = (unsigned long)ptr; - ret = (void *) __get_free_pages(gfp, get_order(size)); - if (ret != NULL) { - *dma_handle = virt_to_phys(ret); - flush_dcache_range((unsigned long) ret, - (unsigned long) ret + size); - ret = UNCAC_ADDR(ret); - } + addr |= CONFIG_NIOS2_IO_REGION_BASE; - return ret; + return (void *)ptr; } -void arch_dma_free(struct device *dev, size_t size, void *vaddr, - dma_addr_t dma_handle, unsigned long attrs) +void *cached_kernel_address(void *ptr) { - unsigned long addr = (unsigned long) CAC_ADDR((unsigned long) vaddr); + unsigned long addr = (unsigned long)ptr; + + addr &= ~CONFIG_NIOS2_IO_REGION_BASE; + addr |= CONFIG_NIOS2_KERNEL_REGION_BASE; - free_pages(addr, get_order(size)); + return (void *)ptr; } diff --git a/arch/openrisc/kernel/dma.c b/arch/openrisc/kernel/dma.c index 43e340c4cd9c..b41a79fcdbd9 100644 --- a/arch/openrisc/kernel/dma.c +++ b/arch/openrisc/kernel/dma.c @@ -94,15 +94,13 @@ arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle, va = (unsigned long)page; - if ((attrs & DMA_ATTR_NON_CONSISTENT) == 0) { - /* - * We need to iterate through the pages, clearing the dcache for - * them and setting the cache-inhibit bit. - */ - if (walk_page_range(va, va + size, &walk)) { - free_pages_exact(page, size); - return NULL; - } + /* + * We need to iterate through the pages, clearing the dcache for + * them and setting the cache-inhibit bit. + */ + if (walk_page_range(va, va + size, &walk)) { + free_pages_exact(page, size); + return NULL; } return (void *)va; @@ -118,10 +116,8 @@ arch_dma_free(struct device *dev, size_t size, void *vaddr, .mm = &init_mm }; - if ((attrs & DMA_ATTR_NON_CONSISTENT) == 0) { - /* walk_page_range shouldn't be able to fail here */ - WARN_ON(walk_page_range(va, va + size, &walk)); - } + /* walk_page_range shouldn't be able to fail here */ + WARN_ON(walk_page_range(va, va + size, &walk)); free_pages_exact(vaddr, size); } diff --git a/arch/parisc/kernel/pci-dma.c b/arch/parisc/kernel/pci-dma.c index 239162355b58..ca35d9a76e50 100644 --- a/arch/parisc/kernel/pci-dma.c +++ b/arch/parisc/kernel/pci-dma.c @@ -394,17 +394,20 @@ pcxl_dma_init(void) __initcall(pcxl_dma_init); -static void *pcxl_dma_alloc(struct device *dev, size_t size, - dma_addr_t *dma_handle, gfp_t flag, unsigned long attrs) +void *arch_dma_alloc(struct device *dev, size_t size, + dma_addr_t *dma_handle, gfp_t gfp, unsigned long attrs) { unsigned long vaddr; unsigned long paddr; int order; + if (boot_cpu_data.cpu_type != pcxl2 && boot_cpu_data.cpu_type != pcxl) + return NULL; + order = get_order(size); size = 1 << (order + PAGE_SHIFT); vaddr = pcxl_alloc_range(size); - paddr = __get_free_pages(flag | __GFP_ZERO, order); + paddr = __get_free_pages(gfp | __GFP_ZERO, order); flush_kernel_dcache_range(paddr, size); paddr = __pa(paddr); map_uncached_pages(vaddr, size, paddr); @@ -421,44 +424,19 @@ static void *pcxl_dma_alloc(struct device *dev, size_t size, return (void *)vaddr; } -static void *pcx_dma_alloc(struct device *dev, size_t size, - dma_addr_t *dma_handle, gfp_t flag, unsigned long attrs) -{ - void *addr; - - if ((attrs & DMA_ATTR_NON_CONSISTENT) == 0) - return NULL; - - addr = (void *)__get_free_pages(flag | __GFP_ZERO, get_order(size)); - if (addr) - *dma_handle = (dma_addr_t)virt_to_phys(addr); - - return addr; -} - -void *arch_dma_alloc(struct device *dev, size_t size, - dma_addr_t *dma_handle, gfp_t gfp, unsigned long attrs) -{ - - if (boot_cpu_data.cpu_type == pcxl2 || boot_cpu_data.cpu_type == pcxl) - return pcxl_dma_alloc(dev, size, dma_handle, gfp, attrs); - else - return pcx_dma_alloc(dev, size, dma_handle, gfp, attrs); -} - void arch_dma_free(struct device *dev, size_t size, void *vaddr, dma_addr_t dma_handle, unsigned long attrs) { int order = get_order(size); - if (boot_cpu_data.cpu_type == pcxl2 || boot_cpu_data.cpu_type == pcxl) { - size = 1 << (order + PAGE_SHIFT); - unmap_uncached_pages((unsigned long)vaddr, size); - pcxl_free_range((unsigned long)vaddr, size); + WARN_ON_ONCE(boot_cpu_data.cpu_type != pcxl2 && + boot_cpu_data.cpu_type != pcxl); - vaddr = __va(dma_handle); - } - free_pages((unsigned long)vaddr, get_order(size)); + size = 1 << (order + PAGE_SHIFT); + unmap_uncached_pages((unsigned long)vaddr, size); + pcxl_free_range((unsigned long)vaddr, size); + + free_pages((unsigned long)__va(dma_handle), order); } void arch_sync_dma_for_device(struct device *dev, phys_addr_t paddr, diff --git a/arch/xtensa/kernel/pci-dma.c b/arch/xtensa/kernel/pci-dma.c index a87f8a308cc1..65f05776d827 100644 --- a/arch/xtensa/kernel/pci-dma.c +++ b/arch/xtensa/kernel/pci-dma.c @@ -163,10 +163,6 @@ void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, *handle = phys_to_dma(dev, page_to_phys(page)); - if (attrs & DMA_ATTR_NO_KERNEL_MAPPING) { - return page; - } - #ifdef CONFIG_MMU if (PageHighMem(page)) { void *p; @@ -192,9 +188,7 @@ void arch_dma_free(struct device *dev, size_t size, void *vaddr, unsigned long count = PAGE_ALIGN(size) >> PAGE_SHIFT; struct page *page; - if (attrs & DMA_ATTR_NO_KERNEL_MAPPING) { - page = vaddr; - } else if (platform_vaddr_uncached(vaddr)) { + if (platform_vaddr_uncached(vaddr)) { page = virt_to_page(platform_vaddr_to_cached(vaddr)); } else { #ifdef CONFIG_MMU diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c index f802255219d3..a7f9c3edbcb2 100644 --- a/drivers/iommu/dma-iommu.c +++ b/drivers/iommu/dma-iommu.c @@ -951,8 +951,8 @@ static void __iommu_dma_free(struct device *dev, size_t size, void *cpu_addr) if (pages) __iommu_dma_free_pages(pages, count); - if (page && !dma_release_from_contiguous(dev, page, count)) - __free_pages(page, get_order(alloc_size)); + if (page) + dma_free_contiguous(dev, page, alloc_size); } static void iommu_dma_free(struct device *dev, size_t size, void *cpu_addr, @@ -970,12 +970,7 @@ static void *iommu_dma_alloc_pages(struct device *dev, size_t size, struct page *page = NULL; void *cpu_addr; - if (gfpflags_allow_blocking(gfp)) - page = dma_alloc_from_contiguous(dev, alloc_size >> PAGE_SHIFT, - get_order(alloc_size), - gfp & __GFP_NOWARN); - if (!page) - page = alloc_pages(gfp, get_order(alloc_size)); + page = dma_alloc_contiguous(dev, alloc_size, gfp); if (!page) return NULL; @@ -997,8 +992,7 @@ static void *iommu_dma_alloc_pages(struct device *dev, size_t size, memset(cpu_addr, 0, alloc_size); return cpu_addr; out_free_pages: - if (!dma_release_from_contiguous(dev, page, alloc_size >> PAGE_SHIFT)) - __free_pages(page, get_order(alloc_size)); + dma_free_contiguous(dev, page, alloc_size); return NULL; } diff --git a/drivers/usb/Kconfig b/drivers/usb/Kconfig index 94573fb68304..6e59d370ef81 100644 --- a/drivers/usb/Kconfig +++ b/drivers/usb/Kconfig @@ -45,6 +45,7 @@ config USB_ARCH_HAS_HCD config USB tristate "Support for Host-side USB" depends on USB_ARCH_HAS_HCD + select GENERIC_ALLOCATOR select USB_COMMON select NLS # for UTF-8 strings ---help--- diff --git a/drivers/usb/core/buffer.c b/drivers/usb/core/buffer.c index f641342cdec0..1359b78a624e 100644 --- a/drivers/usb/core/buffer.c +++ b/drivers/usb/core/buffer.c @@ -16,6 +16,7 @@ #include <linux/io.h> #include <linux/dma-mapping.h> #include <linux/dmapool.h> +#include <linux/genalloc.h> #include <linux/usb.h> #include <linux/usb/hcd.h> @@ -67,7 +68,7 @@ int hcd_buffer_create(struct usb_hcd *hcd) if (!IS_ENABLED(CONFIG_HAS_DMA) || (!is_device_dma_capable(hcd->self.sysdev) && - !(hcd->driver->flags & HCD_LOCAL_MEM))) + !hcd->localmem_pool)) return 0; for (i = 0; i < HCD_BUFFER_POOLS; i++) { @@ -124,10 +125,12 @@ void *hcd_buffer_alloc( if (size == 0) return NULL; + if (hcd->localmem_pool) + return gen_pool_dma_alloc(hcd->localmem_pool, size, dma); + /* some USB hosts just use PIO */ if (!IS_ENABLED(CONFIG_HAS_DMA) || - (!is_device_dma_capable(bus->sysdev) && - !(hcd->driver->flags & HCD_LOCAL_MEM))) { + !is_device_dma_capable(bus->sysdev)) { *dma = ~(dma_addr_t) 0; return kmalloc(size, mem_flags); } @@ -152,9 +155,13 @@ void hcd_buffer_free( if (!addr) return; + if (hcd->localmem_pool) { + gen_pool_free(hcd->localmem_pool, (unsigned long)addr, size); + return; + } + if (!IS_ENABLED(CONFIG_HAS_DMA) || - (!is_device_dma_capable(bus->sysdev) && - !(hcd->driver->flags & HCD_LOCAL_MEM))) { + !is_device_dma_capable(bus->sysdev)) { kfree(addr); return; } diff --git a/drivers/usb/core/hcd.c b/drivers/usb/core/hcd.c index 94d22551fc1b..88533938ce19 100644 --- a/drivers/usb/core/hcd.c +++ b/drivers/usb/core/hcd.c @@ -29,6 +29,8 @@ #include <linux/workqueue.h> #include <linux/pm_runtime.h> #include <linux/types.h> +#include <linux/genalloc.h> +#include <linux/io.h> #include <linux/phy/phy.h> #include <linux/usb.h> @@ -1345,14 +1347,14 @@ EXPORT_SYMBOL_GPL(usb_hcd_unlink_urb_from_ep); * using regular system memory - like pci devices doing bus mastering. * * To support host controllers with limited dma capabilities we provide dma - * bounce buffers. This feature can be enabled using the HCD_LOCAL_MEM flag. + * bounce buffers. This feature can be enabled by initializing + * hcd->localmem_pool using usb_hcd_setup_local_mem(). * For this to work properly the host controller code must first use the * function dma_declare_coherent_memory() to point out which memory area * that should be used for dma allocations. * - * The HCD_LOCAL_MEM flag then tells the usb code to allocate all data for - * dma using dma_alloc_coherent() which in turn allocates from the memory - * area pointed out with dma_declare_coherent_memory(). + * The initialized hcd->localmem_pool then tells the usb code to allocate all + * data for dma using the genalloc API. * * So, to summarize... * @@ -1362,9 +1364,6 @@ EXPORT_SYMBOL_GPL(usb_hcd_unlink_urb_from_ep); * (a) "normal" kernel memory is no good, and * (b) there's not enough to share * - * - The only *portable* hook for such stuff in the - * DMA framework is dma_declare_coherent_memory() - * * - So we use that, even though the primary requirement * is that the memory be "local" (hence addressable * by that device), not "coherent". @@ -1531,7 +1530,7 @@ int usb_hcd_map_urb_for_dma(struct usb_hcd *hcd, struct urb *urb, urb->setup_dma)) return -EAGAIN; urb->transfer_flags |= URB_SETUP_MAP_SINGLE; - } else if (hcd->driver->flags & HCD_LOCAL_MEM) { + } else if (hcd->localmem_pool) { ret = hcd_alloc_coherent( urb->dev->bus, mem_flags, &urb->setup_dma, @@ -1601,7 +1600,7 @@ int usb_hcd_map_urb_for_dma(struct usb_hcd *hcd, struct urb *urb, else urb->transfer_flags |= URB_DMA_MAP_SINGLE; } - } else if (hcd->driver->flags & HCD_LOCAL_MEM) { + } else if (hcd->localmem_pool) { ret = hcd_alloc_coherent( urb->dev->bus, mem_flags, &urb->transfer_dma, @@ -3039,6 +3038,40 @@ usb_hcd_platform_shutdown(struct platform_device *dev) } EXPORT_SYMBOL_GPL(usb_hcd_platform_shutdown); +int usb_hcd_setup_local_mem(struct usb_hcd *hcd, phys_addr_t phys_addr, + dma_addr_t dma, size_t size) +{ + int err; + void *local_mem; + + hcd->localmem_pool = devm_gen_pool_create(hcd->self.sysdev, 4, + dev_to_node(hcd->self.sysdev), + dev_name(hcd->self.sysdev)); + if (IS_ERR(hcd->localmem_pool)) + return PTR_ERR(hcd->localmem_pool); + + local_mem = devm_memremap(hcd->self.sysdev, phys_addr, + size, MEMREMAP_WC); + if (!local_mem) + return -ENOMEM; + + /* + * Here we pass a dma_addr_t but the arg type is a phys_addr_t. + * It's not backed by system memory and thus there's no kernel mapping + * for it. + */ + err = gen_pool_add_virt(hcd->localmem_pool, (unsigned long)local_mem, + dma, size, dev_to_node(hcd->self.sysdev)); + if (err < 0) { + dev_err(hcd->self.sysdev, "gen_pool_add_virt failed with %d\n", + err); + return err; + } + + return 0; +} +EXPORT_SYMBOL_GPL(usb_hcd_setup_local_mem); + /*-------------------------------------------------------------------------*/ #if IS_ENABLED(CONFIG_USB_MON) diff --git a/drivers/usb/host/ehci-hcd.c b/drivers/usb/host/ehci-hcd.c index cdafa97f632d..9da7e22848c9 100644 --- a/drivers/usb/host/ehci-hcd.c +++ b/drivers/usb/host/ehci-hcd.c @@ -559,7 +559,7 @@ static int ehci_init(struct usb_hcd *hcd) ehci->command = temp; /* Accept arbitrarily long scatter-gather lists */ - if (!(hcd->driver->flags & HCD_LOCAL_MEM)) + if (!hcd->localmem_pool) hcd->self.sg_tablesize = ~0; /* Prepare for unlinking active QHs */ diff --git a/drivers/usb/host/fotg210-hcd.c b/drivers/usb/host/fotg210-hcd.c index e835a22b12af..77cc36efae95 100644 --- a/drivers/usb/host/fotg210-hcd.c +++ b/drivers/usb/host/fotg210-hcd.c @@ -4996,7 +4996,7 @@ static int hcd_fotg210_init(struct usb_hcd *hcd) fotg210->command = temp; /* Accept arbitrarily long scatter-gather lists */ - if (!(hcd->driver->flags & HCD_LOCAL_MEM)) + if (!hcd->localmem_pool) hcd->self.sg_tablesize = ~0; return 0; } diff --git a/drivers/usb/host/ohci-hcd.c b/drivers/usb/host/ohci-hcd.c index 210181fd98d2..b457fdaff297 100644 --- a/drivers/usb/host/ohci-hcd.c +++ b/drivers/usb/host/ohci-hcd.c @@ -40,6 +40,7 @@ #include <linux/dmapool.h> #include <linux/workqueue.h> #include <linux/debugfs.h> +#include <linux/genalloc.h> #include <asm/io.h> #include <asm/irq.h> @@ -447,7 +448,7 @@ static int ohci_init (struct ohci_hcd *ohci) struct usb_hcd *hcd = ohci_to_hcd(ohci); /* Accept arbitrarily long scatter-gather lists */ - if (!(hcd->driver->flags & HCD_LOCAL_MEM)) + if (!hcd->localmem_pool) hcd->self.sg_tablesize = ~0; if (distrust_firmware) @@ -505,8 +506,15 @@ static int ohci_init (struct ohci_hcd *ohci) timer_setup(&ohci->io_watchdog, io_watchdog_func, 0); ohci->prev_frame_no = IO_WATCHDOG_OFF; - ohci->hcca = dma_alloc_coherent (hcd->self.controller, - sizeof(*ohci->hcca), &ohci->hcca_dma, GFP_KERNEL); + if (hcd->localmem_pool) + ohci->hcca = gen_pool_dma_alloc_align(hcd->localmem_pool, + sizeof(*ohci->hcca), + &ohci->hcca_dma, 256); + else + ohci->hcca = dma_alloc_coherent(hcd->self.controller, + sizeof(*ohci->hcca), + &ohci->hcca_dma, + GFP_KERNEL); if (!ohci->hcca) return -ENOMEM; @@ -990,9 +998,14 @@ static void ohci_stop (struct usb_hcd *hcd) remove_debug_files (ohci); ohci_mem_cleanup (ohci); if (ohci->hcca) { - dma_free_coherent (hcd->self.controller, - sizeof *ohci->hcca, - ohci->hcca, ohci->hcca_dma); + if (hcd->localmem_pool) + gen_pool_free(hcd->localmem_pool, + (unsigned long)ohci->hcca, + sizeof(*ohci->hcca)); + else + dma_free_coherent(hcd->self.controller, + sizeof(*ohci->hcca), + ohci->hcca, ohci->hcca_dma); ohci->hcca = NULL; ohci->hcca_dma = 0; } diff --git a/drivers/usb/host/ohci-mem.c b/drivers/usb/host/ohci-mem.c index 3965ac0341eb..1425335c6baf 100644 --- a/drivers/usb/host/ohci-mem.c +++ b/drivers/usb/host/ohci-mem.c @@ -36,6 +36,13 @@ static void ohci_hcd_init (struct ohci_hcd *ohci) static int ohci_mem_init (struct ohci_hcd *ohci) { + /* + * HCs with local memory allocate from localmem_pool so there's + * no need to create the below dma pools. + */ + if (ohci_to_hcd(ohci)->localmem_pool) + return 0; + ohci->td_cache = dma_pool_create ("ohci_td", ohci_to_hcd(ohci)->self.controller, sizeof (struct td), @@ -84,8 +91,13 @@ td_alloc (struct ohci_hcd *hc, gfp_t mem_flags) { dma_addr_t dma; struct td *td; + struct usb_hcd *hcd = ohci_to_hcd(hc); - td = dma_pool_zalloc (hc->td_cache, mem_flags, &dma); + if (hcd->localmem_pool) + td = gen_pool_dma_zalloc_align(hcd->localmem_pool, + sizeof(*td), &dma, 32); + else + td = dma_pool_zalloc(hc->td_cache, mem_flags, &dma); if (td) { /* in case hc fetches it, make it look dead */ td->hwNextTD = cpu_to_hc32 (hc, dma); @@ -99,6 +111,7 @@ static void td_free (struct ohci_hcd *hc, struct td *td) { struct td **prev = &hc->td_hash [TD_HASH_FUNC (td->td_dma)]; + struct usb_hcd *hcd = ohci_to_hcd(hc); while (*prev && *prev != td) prev = &(*prev)->td_hash; @@ -106,7 +119,12 @@ td_free (struct ohci_hcd *hc, struct td *td) *prev = td->td_hash; else if ((td->hwINFO & cpu_to_hc32(hc, TD_DONE)) != 0) ohci_dbg (hc, "no hash for td %p\n", td); - dma_pool_free (hc->td_cache, td, td->td_dma); + + if (hcd->localmem_pool) + gen_pool_free(hcd->localmem_pool, (unsigned long)td, + sizeof(*td)); + else + dma_pool_free(hc->td_cache, td, td->td_dma); } /*-------------------------------------------------------------------------*/ @@ -117,8 +135,13 @@ ed_alloc (struct ohci_hcd *hc, gfp_t mem_flags) { dma_addr_t dma; struct ed *ed; + struct usb_hcd *hcd = ohci_to_hcd(hc); - ed = dma_pool_zalloc (hc->ed_cache, mem_flags, &dma); + if (hcd->localmem_pool) + ed = gen_pool_dma_zalloc_align(hcd->localmem_pool, + sizeof(*ed), &dma, 16); + else + ed = dma_pool_zalloc(hc->ed_cache, mem_flags, &dma); if (ed) { INIT_LIST_HEAD (&ed->td_list); ed->dma = dma; @@ -129,6 +152,12 @@ ed_alloc (struct ohci_hcd *hc, gfp_t mem_flags) static void ed_free (struct ohci_hcd *hc, struct ed *ed) { - dma_pool_free (hc->ed_cache, ed, ed->dma); + struct usb_hcd *hcd = ohci_to_hcd(hc); + + if (hcd->localmem_pool) + gen_pool_free(hcd->localmem_pool, (unsigned long)ed, + sizeof(*ed)); + else + dma_pool_free(hc->ed_cache, ed, ed->dma); } diff --git a/drivers/usb/host/ohci-sm501.c b/drivers/usb/host/ohci-sm501.c index c26228c25f99..c158cda9e4b9 100644 --- a/drivers/usb/host/ohci-sm501.c +++ b/drivers/usb/host/ohci-sm501.c @@ -49,7 +49,7 @@ static const struct hc_driver ohci_sm501_hc_driver = { * generic hardware linkage */ .irq = ohci_irq, - .flags = HCD_USB11 | HCD_MEMORY | HCD_LOCAL_MEM, + .flags = HCD_USB11 | HCD_MEMORY, /* * basic lifecycle operations @@ -110,40 +110,18 @@ static int ohci_hcd_sm501_drv_probe(struct platform_device *pdev) goto err0; } - /* The sm501 chip is equipped with local memory that may be used - * by on-chip devices such as the video controller and the usb host. - * This driver uses dma_declare_coherent_memory() to make sure - * usb allocations with dma_alloc_coherent() allocate from - * this local memory. The dma_handle returned by dma_alloc_coherent() - * will be an offset starting from 0 for the first local memory byte. - * - * So as long as data is allocated using dma_alloc_coherent() all is - * fine. This is however not always the case - buffers may be allocated - * using kmalloc() - so the usb core needs to be told that it must copy - * data into our local memory if the buffers happen to be placed in - * regular memory. The HCD_LOCAL_MEM flag does just that. - */ - - retval = dma_declare_coherent_memory(dev, mem->start, - mem->start - mem->parent->start, - resource_size(mem)); - if (retval) { - dev_err(dev, "cannot declare coherent memory\n"); - goto err1; - } - /* allocate, reserve and remap resources for registers */ res = platform_get_resource(pdev, IORESOURCE_MEM, 0); if (res == NULL) { dev_err(dev, "no resource definition for registers\n"); retval = -ENOENT; - goto err2; + goto err1; } hcd = usb_create_hcd(driver, &pdev->dev, dev_name(&pdev->dev)); if (!hcd) { retval = -ENOMEM; - goto err2; + goto err1; } hcd->rsrc_start = res->start; @@ -164,6 +142,25 @@ static int ohci_hcd_sm501_drv_probe(struct platform_device *pdev) ohci_hcd_init(hcd_to_ohci(hcd)); + /* The sm501 chip is equipped with local memory that may be used + * by on-chip devices such as the video controller and the usb host. + * This driver uses genalloc so that usb allocations with + * gen_pool_dma_alloc() allocate from this local memory. The dma_handle + * returned by gen_pool_dma_alloc() will be an offset starting from 0 + * for the first local memory byte. + * + * So as long as data is allocated using gen_pool_dma_alloc() all is + * fine. This is however not always the case - buffers may be allocated + * using kmalloc() - so the usb core needs to be told that it must copy + * data into our local memory if the buffers happen to be placed in + * regular memory. A non-null hcd->localmem_pool initialized by the + * the call to usb_hcd_setup_local_mem() below does just that. + */ + + if (usb_hcd_setup_local_mem(hcd, mem->start, + mem->start - mem->parent->start, + resource_size(mem)) < 0) + goto err5; retval = usb_add_hcd(hcd, irq, IRQF_SHARED); if (retval) goto err5; @@ -181,8 +178,6 @@ err4: release_mem_region(hcd->rsrc_start, hcd->rsrc_len); err3: usb_put_hcd(hcd); -err2: - dma_release_declared_memory(dev); err1: release_mem_region(mem->start, resource_size(mem)); err0: @@ -197,7 +192,6 @@ static int ohci_hcd_sm501_drv_remove(struct platform_device *pdev) usb_remove_hcd(hcd); release_mem_region(hcd->rsrc_start, hcd->rsrc_len); usb_put_hcd(hcd); - dma_release_declared_memory(&pdev->dev); mem = platform_get_resource(pdev, IORESOURCE_MEM, 1); if (mem) release_mem_region(mem->start, resource_size(mem)); diff --git a/drivers/usb/host/ohci-tmio.c b/drivers/usb/host/ohci-tmio.c index f88a0370659f..d5a293a707b6 100644 --- a/drivers/usb/host/ohci-tmio.c +++ b/drivers/usb/host/ohci-tmio.c @@ -153,7 +153,7 @@ static const struct hc_driver ohci_tmio_hc_driver = { /* generic hardware linkage */ .irq = ohci_irq, - .flags = HCD_USB11 | HCD_MEMORY | HCD_LOCAL_MEM, + .flags = HCD_USB11 | HCD_MEMORY, /* basic lifecycle operations */ .start = ohci_tmio_start, @@ -224,11 +224,6 @@ static int ohci_hcd_tmio_drv_probe(struct platform_device *dev) goto err_ioremap_regs; } - ret = dma_declare_coherent_memory(&dev->dev, sram->start, sram->start, - resource_size(sram)); - if (ret) - goto err_dma_declare; - if (cell->enable) { ret = cell->enable(dev); if (ret) @@ -239,6 +234,11 @@ static int ohci_hcd_tmio_drv_probe(struct platform_device *dev) ohci = hcd_to_ohci(hcd); ohci_hcd_init(ohci); + ret = usb_hcd_setup_local_mem(hcd, sram->start, sram->start, + resource_size(sram)); + if (ret < 0) + goto err_enable; + ret = usb_add_hcd(hcd, irq, 0); if (ret) goto err_add_hcd; @@ -254,8 +254,6 @@ err_add_hcd: if (cell->disable) cell->disable(dev); err_enable: - dma_release_declared_memory(&dev->dev); -err_dma_declare: iounmap(hcd->regs); err_ioremap_regs: iounmap(tmio->ccr); @@ -276,7 +274,6 @@ static int ohci_hcd_tmio_drv_remove(struct platform_device *dev) tmio_stop_hc(dev); if (cell->disable) cell->disable(dev); - dma_release_declared_memory(&dev->dev); iounmap(hcd->regs); iounmap(tmio->ccr); usb_put_hcd(hcd); diff --git a/drivers/usb/host/ohci.h b/drivers/usb/host/ohci.h index ef4813bfc5bf..b015b00774b2 100644 --- a/drivers/usb/host/ohci.h +++ b/drivers/usb/host/ohci.h @@ -385,6 +385,8 @@ struct ohci_hcd { /* * memory management for queue data structures + * + * @td_cache and @ed_cache are %NULL if &usb_hcd.localmem_pool is used. */ struct dma_pool *td_cache; struct dma_pool *ed_cache; diff --git a/drivers/usb/host/uhci-hcd.c b/drivers/usb/host/uhci-hcd.c index 98deb5f64268..03bc59755123 100644 --- a/drivers/usb/host/uhci-hcd.c +++ b/drivers/usb/host/uhci-hcd.c @@ -581,7 +581,7 @@ static int uhci_start(struct usb_hcd *hcd) hcd->uses_new_polling = 1; /* Accept arbitrarily long scatter-gather lists */ - if (!(hcd->driver->flags & HCD_LOCAL_MEM)) + if (!hcd->localmem_pool) hcd->self.sg_tablesize = ~0; spin_lock_init(&uhci->lock); diff --git a/drivers/video/fbdev/au1100fb.c b/drivers/video/fbdev/au1100fb.c index 0adf0683cf08..99941ae1f3a1 100644 --- a/drivers/video/fbdev/au1100fb.c +++ b/drivers/video/fbdev/au1100fb.c @@ -340,14 +340,12 @@ int au1100fb_fb_pan_display(struct fb_var_screeninfo *var, struct fb_info *fbi) */ int au1100fb_fb_mmap(struct fb_info *fbi, struct vm_area_struct *vma) { - struct au1100fb_device *fbdev; - - fbdev = to_au1100fb_device(fbi); + struct au1100fb_device *fbdev = to_au1100fb_device(fbi); - vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); pgprot_val(vma->vm_page_prot) |= (6 << 9); //CCA=6 - return vm_iomap_memory(vma, fbdev->fb_phys, fbdev->fb_len); + return dma_mmap_coherent(fbdev->dev, vma, fbdev->fb_mem, fbdev->fb_phys, + fbdev->fb_len); } static struct fb_ops au1100fb_ops = @@ -412,7 +410,6 @@ static int au1100fb_drv_probe(struct platform_device *dev) { struct au1100fb_device *fbdev; struct resource *regs_res; - unsigned long page; struct clk *c; /* Allocate new device private */ @@ -424,6 +421,7 @@ static int au1100fb_drv_probe(struct platform_device *dev) goto failed; platform_set_drvdata(dev, (void *)fbdev); + fbdev->dev = &dev->dev; /* Allocate region for our registers and map them */ regs_res = platform_get_resource(dev, IORESOURCE_MEM, 0); @@ -472,20 +470,6 @@ static int au1100fb_drv_probe(struct platform_device *dev) au1100fb_fix.smem_start = fbdev->fb_phys; au1100fb_fix.smem_len = fbdev->fb_len; - /* - * Set page reserved so that mmap will work. This is necessary - * since we'll be remapping normal memory. - */ - for (page = (unsigned long)fbdev->fb_mem; - page < PAGE_ALIGN((unsigned long)fbdev->fb_mem + fbdev->fb_len); - page += PAGE_SIZE) { -#ifdef CONFIG_DMA_NONCOHERENT - SetPageReserved(virt_to_page(CAC_ADDR((void *)page))); -#else - SetPageReserved(virt_to_page(page)); -#endif - } - print_dbg("Framebuffer memory map at %p", fbdev->fb_mem); print_dbg("phys=0x%08x, size=%dK", fbdev->fb_phys, fbdev->fb_len / 1024); diff --git a/drivers/video/fbdev/au1100fb.h b/drivers/video/fbdev/au1100fb.h index 9af19939a9c6..e7239bceefd3 100644 --- a/drivers/video/fbdev/au1100fb.h +++ b/drivers/video/fbdev/au1100fb.h @@ -110,6 +110,7 @@ struct au1100fb_device { dma_addr_t fb_phys; int panel_idx; struct clk *lcdclk; + struct device *dev; }; /********************************************************************/ diff --git a/include/linux/dma-contiguous.h b/include/linux/dma-contiguous.h index 6665fa03c0d1..c05d4e661489 100644 --- a/include/linux/dma-contiguous.h +++ b/include/linux/dma-contiguous.h @@ -50,6 +50,7 @@ #ifdef __KERNEL__ #include <linux/device.h> +#include <linux/mm.h> struct cma; struct page; @@ -111,6 +112,8 @@ struct page *dma_alloc_from_contiguous(struct device *dev, size_t count, unsigned int order, bool no_warn); bool dma_release_from_contiguous(struct device *dev, struct page *pages, int count); +struct page *dma_alloc_contiguous(struct device *dev, size_t size, gfp_t gfp); +void dma_free_contiguous(struct device *dev, struct page *page, size_t size); #else @@ -153,6 +156,22 @@ bool dma_release_from_contiguous(struct device *dev, struct page *pages, return false; } +/* Use fallback alloc() and free() when CONFIG_DMA_CMA=n */ +static inline struct page *dma_alloc_contiguous(struct device *dev, size_t size, + gfp_t gfp) +{ + int node = dev ? dev_to_node(dev) : NUMA_NO_NODE; + size_t align = get_order(PAGE_ALIGN(size)); + + return alloc_pages_node(node, gfp, align); +} + +static inline void dma_free_contiguous(struct device *dev, struct page *page, + size_t size) +{ + __free_pages(page, get_order(size)); +} + #endif #endif diff --git a/include/linux/dma-noncoherent.h b/include/linux/dma-noncoherent.h index 9741767e400f..3813211a9aad 100644 --- a/include/linux/dma-noncoherent.h +++ b/include/linux/dma-noncoherent.h @@ -20,6 +20,22 @@ static inline bool dev_is_dma_coherent(struct device *dev) } #endif /* CONFIG_ARCH_HAS_DMA_COHERENCE_H */ +/* + * Check if an allocation needs to be marked uncached to be coherent. + */ +static __always_inline bool dma_alloc_need_uncached(struct device *dev, + unsigned long attrs) +{ + if (dev_is_dma_coherent(dev)) + return false; + if (attrs & DMA_ATTR_NO_KERNEL_MAPPING) + return false; + if (IS_ENABLED(CONFIG_DMA_NONCOHERENT_CACHE_SYNC) && + (attrs & DMA_ATTR_NON_CONSISTENT)) + return false; + return true; +} + void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t gfp, unsigned long attrs); void arch_dma_free(struct device *dev, size_t size, void *cpu_addr, @@ -80,4 +96,7 @@ static inline void arch_dma_prep_coherent(struct page *page, size_t size) } #endif /* CONFIG_ARCH_HAS_DMA_PREP_COHERENT */ +void *uncached_kernel_address(void *addr); +void *cached_kernel_address(void *addr); + #endif /* _LINUX_DMA_NONCOHERENT_H */ diff --git a/include/linux/genalloc.h b/include/linux/genalloc.h index 205f62b8d291..4bd583bd6934 100644 --- a/include/linux/genalloc.h +++ b/include/linux/genalloc.h @@ -155,6 +155,15 @@ static inline unsigned long gen_pool_alloc(struct gen_pool *pool, size_t size) extern void *gen_pool_dma_alloc(struct gen_pool *pool, size_t size, dma_addr_t *dma); +extern void *gen_pool_dma_alloc_algo(struct gen_pool *pool, size_t size, + dma_addr_t *dma, genpool_algo_t algo, void *data); +extern void *gen_pool_dma_alloc_align(struct gen_pool *pool, size_t size, + dma_addr_t *dma, int align); +extern void *gen_pool_dma_zalloc(struct gen_pool *pool, size_t size, dma_addr_t *dma); +extern void *gen_pool_dma_zalloc_algo(struct gen_pool *pool, size_t size, + dma_addr_t *dma, genpool_algo_t algo, void *data); +extern void *gen_pool_dma_zalloc_align(struct gen_pool *pool, size_t size, + dma_addr_t *dma, int align); extern void gen_pool_free_owner(struct gen_pool *pool, unsigned long addr, size_t size, void **owner); static inline void gen_pool_free(struct gen_pool *pool, unsigned long addr, diff --git a/include/linux/usb/hcd.h b/include/linux/usb/hcd.h index bb57b5af4700..bab27ccc8ff5 100644 --- a/include/linux/usb/hcd.h +++ b/include/linux/usb/hcd.h @@ -216,6 +216,9 @@ struct usb_hcd { #define HC_IS_RUNNING(state) ((state) & __ACTIVE) #define HC_IS_SUSPENDED(state) ((state) & __SUSPEND) + /* memory pool for HCs having local memory, or %NULL */ + struct gen_pool *localmem_pool; + /* more shared queuing code would be good; it should support * smarter scheduling, handle transaction translators, etc; * input size of periodic table to an interrupt scheduler. @@ -253,7 +256,6 @@ struct hc_driver { int flags; #define HCD_MEMORY 0x0001 /* HC regs use memory (else I/O) */ -#define HCD_LOCAL_MEM 0x0002 /* HC needs local memory */ #define HCD_SHARED 0x0004 /* Two (or more) usb_hcds share HW */ #define HCD_USB11 0x0010 /* USB 1.1 */ #define HCD_USB2 0x0020 /* USB 2.0 */ @@ -461,6 +463,8 @@ extern int usb_add_hcd(struct usb_hcd *hcd, unsigned int irqnum, unsigned long irqflags); extern void usb_remove_hcd(struct usb_hcd *hcd); extern int usb_hcd_find_raw_port_number(struct usb_hcd *hcd, int port1); +int usb_hcd_setup_local_mem(struct usb_hcd *hcd, phys_addr_t phys_addr, + dma_addr_t dma, size_t size); struct platform_device; extern void usb_hcd_platform_shutdown(struct platform_device *dev); diff --git a/kernel/dma/contiguous.c b/kernel/dma/contiguous.c index b2a87905846d..bfc0c17f2a3d 100644 --- a/kernel/dma/contiguous.c +++ b/kernel/dma/contiguous.c @@ -214,6 +214,62 @@ bool dma_release_from_contiguous(struct device *dev, struct page *pages, return cma_release(dev_get_cma_area(dev), pages, count); } +/** + * dma_alloc_contiguous() - allocate contiguous pages + * @dev: Pointer to device for which the allocation is performed. + * @size: Requested allocation size. + * @gfp: Allocation flags. + * + * This function allocates contiguous memory buffer for specified device. It + * first tries to use device specific contiguous memory area if available or + * the default global one, then tries a fallback allocation of normal pages. + * + * Note that it byapss one-page size of allocations from the global area as + * the addresses within one page are always contiguous, so there is no need + * to waste CMA pages for that kind; it also helps reduce fragmentations. + */ +struct page *dma_alloc_contiguous(struct device *dev, size_t size, gfp_t gfp) +{ + int node = dev ? dev_to_node(dev) : NUMA_NO_NODE; + size_t count = PAGE_ALIGN(size) >> PAGE_SHIFT; + size_t align = get_order(PAGE_ALIGN(size)); + struct page *page = NULL; + struct cma *cma = NULL; + + if (dev && dev->cma_area) + cma = dev->cma_area; + else if (count > 1) + cma = dma_contiguous_default_area; + + /* CMA can be used only in the context which permits sleeping */ + if (cma && gfpflags_allow_blocking(gfp)) { + align = min_t(size_t, align, CONFIG_CMA_ALIGNMENT); + page = cma_alloc(cma, count, align, gfp & __GFP_NOWARN); + } + + /* Fallback allocation of normal pages */ + if (!page) + page = alloc_pages_node(node, gfp, align); + return page; +} + +/** + * dma_free_contiguous() - release allocated pages + * @dev: Pointer to device for which the pages were allocated. + * @page: Pointer to the allocated pages. + * @size: Size of allocated pages. + * + * This function releases memory allocated by dma_alloc_contiguous(). As the + * cma_release returns false when provided pages do not belong to contiguous + * area and true otherwise, this function then does a fallback __free_pages() + * upon a false-return. + */ +void dma_free_contiguous(struct device *dev, struct page *page, size_t size) +{ + if (!cma_release(dev_get_cma_area(dev), page, size >> PAGE_SHIFT)) + __free_pages(page, get_order(size)); +} + /* * Support for reserved memory regions defined in device tree */ diff --git a/kernel/dma/direct.c b/kernel/dma/direct.c index 2c2772e9702a..b90e1aede743 100644 --- a/kernel/dma/direct.c +++ b/kernel/dma/direct.c @@ -96,8 +96,6 @@ static bool dma_coherent_ok(struct device *dev, phys_addr_t phys, size_t size) struct page *__dma_direct_alloc_pages(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t gfp, unsigned long attrs) { - unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT; - int page_order = get_order(size); struct page *page = NULL; u64 phys_mask; @@ -109,20 +107,9 @@ struct page *__dma_direct_alloc_pages(struct device *dev, size_t size, gfp |= __dma_direct_optimal_gfp_mask(dev, dev->coherent_dma_mask, &phys_mask); again: - /* CMA can be used only in the context which permits sleeping */ - if (gfpflags_allow_blocking(gfp)) { - page = dma_alloc_from_contiguous(dev, count, page_order, - gfp & __GFP_NOWARN); - if (page && !dma_coherent_ok(dev, page_to_phys(page), size)) { - dma_release_from_contiguous(dev, page, count); - page = NULL; - } - } - if (!page) - page = alloc_pages_node(dev_to_node(dev), gfp, page_order); - + page = dma_alloc_contiguous(dev, size, gfp); if (page && !dma_coherent_ok(dev, page_to_phys(page), size)) { - __free_pages(page, page_order); + dma_free_contiguous(dev, page, size); page = NULL; if (IS_ENABLED(CONFIG_ZONE_DMA32) && @@ -151,10 +138,18 @@ void *dma_direct_alloc_pages(struct device *dev, size_t size, if (!page) return NULL; + if (attrs & DMA_ATTR_NO_KERNEL_MAPPING) { + /* remove any dirty cache lines on the kernel alias */ + if (!PageHighMem(page)) + arch_dma_prep_coherent(page, size); + /* return the page pointer as the opaque cookie */ + return page; + } + if (PageHighMem(page)) { /* * Depending on the cma= arguments and per-arch setup - * dma_alloc_from_contiguous could return highmem pages. + * dma_alloc_contiguous could return highmem pages. * Without remapping there is no way to return them here, * so log an error and fail. */ @@ -171,15 +166,19 @@ void *dma_direct_alloc_pages(struct device *dev, size_t size, *dma_handle = phys_to_dma(dev, page_to_phys(page)); } memset(ret, 0, size); + + if (IS_ENABLED(CONFIG_ARCH_HAS_UNCACHED_SEGMENT) && + dma_alloc_need_uncached(dev, attrs)) { + arch_dma_prep_coherent(page, size); + ret = uncached_kernel_address(ret); + } + return ret; } void __dma_direct_free_pages(struct device *dev, size_t size, struct page *page) { - unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT; - - if (!dma_release_from_contiguous(dev, page, count)) - __free_pages(page, get_order(size)); + dma_free_contiguous(dev, page, size); } void dma_direct_free_pages(struct device *dev, size_t size, void *cpu_addr, @@ -187,15 +186,26 @@ void dma_direct_free_pages(struct device *dev, size_t size, void *cpu_addr, { unsigned int page_order = get_order(size); + if (attrs & DMA_ATTR_NO_KERNEL_MAPPING) { + /* cpu_addr is a struct page cookie, not a kernel address */ + __dma_direct_free_pages(dev, size, cpu_addr); + return; + } + if (force_dma_unencrypted()) set_memory_encrypted((unsigned long)cpu_addr, 1 << page_order); + + if (IS_ENABLED(CONFIG_ARCH_HAS_UNCACHED_SEGMENT) && + dma_alloc_need_uncached(dev, attrs)) + cpu_addr = cached_kernel_address(cpu_addr); __dma_direct_free_pages(dev, size, virt_to_page(cpu_addr)); } void *dma_direct_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t gfp, unsigned long attrs) { - if (!dev_is_dma_coherent(dev)) + if (!IS_ENABLED(CONFIG_ARCH_HAS_UNCACHED_SEGMENT) && + dma_alloc_need_uncached(dev, attrs)) return arch_dma_alloc(dev, size, dma_handle, gfp, attrs); return dma_direct_alloc_pages(dev, size, dma_handle, gfp, attrs); } @@ -203,7 +213,8 @@ void *dma_direct_alloc(struct device *dev, size_t size, void dma_direct_free(struct device *dev, size_t size, void *cpu_addr, dma_addr_t dma_addr, unsigned long attrs) { - if (!dev_is_dma_coherent(dev)) + if (!IS_ENABLED(CONFIG_ARCH_HAS_UNCACHED_SEGMENT) && + dma_alloc_need_uncached(dev, attrs)) arch_dma_free(dev, size, cpu_addr, dma_addr, attrs); else dma_direct_free_pages(dev, size, cpu_addr, dma_addr, attrs); diff --git a/kernel/dma/mapping.c b/kernel/dma/mapping.c index f7afdadb6770..1f628e7ac709 100644 --- a/kernel/dma/mapping.c +++ b/kernel/dma/mapping.c @@ -317,6 +317,12 @@ void arch_dma_set_mask(struct device *dev, u64 mask); int dma_set_mask(struct device *dev, u64 mask) { + /* + * Truncate the mask to the actually supported dma_addr_t width to + * avoid generating unsupportable addresses. + */ + mask = (dma_addr_t)mask; + if (!dev->dma_mask || !dma_supported(dev, mask)) return -EIO; @@ -330,6 +336,12 @@ EXPORT_SYMBOL(dma_set_mask); #ifndef CONFIG_ARCH_HAS_DMA_SET_COHERENT_MASK int dma_set_coherent_mask(struct device *dev, u64 mask) { + /* + * Truncate the mask to the actually supported dma_addr_t width to + * avoid generating unsupportable addresses. + */ + mask = (dma_addr_t)mask; + if (!dma_supported(dev, mask)) return -EIO; diff --git a/kernel/dma/remap.c b/kernel/dma/remap.c index 7a723194ecbe..a594aec07882 100644 --- a/kernel/dma/remap.c +++ b/kernel/dma/remap.c @@ -158,6 +158,9 @@ out: bool dma_in_atomic_pool(void *start, size_t size) { + if (unlikely(!atomic_pool)) + return false; + return addr_in_gen_pool(atomic_pool, (unsigned long)start, size); } @@ -199,8 +202,7 @@ void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle, size = PAGE_ALIGN(size); - if (!gfpflags_allow_blocking(flags) && - !(attrs & DMA_ATTR_NO_KERNEL_MAPPING)) { + if (!gfpflags_allow_blocking(flags)) { ret = dma_alloc_from_pool(size, &page, flags); if (!ret) return NULL; @@ -214,11 +216,6 @@ void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle, /* remove any dirty cache lines on the kernel alias */ arch_dma_prep_coherent(page, size); - if (attrs & DMA_ATTR_NO_KERNEL_MAPPING) { - ret = page; /* opaque cookie */ - goto done; - } - /* create a coherent mapping */ ret = dma_common_contiguous_remap(page, size, VM_USERMAP, arch_dma_mmap_pgprot(dev, PAGE_KERNEL, attrs), @@ -237,10 +234,7 @@ done: void arch_dma_free(struct device *dev, size_t size, void *vaddr, dma_addr_t dma_handle, unsigned long attrs) { - if (attrs & DMA_ATTR_NO_KERNEL_MAPPING) { - /* vaddr is a struct page cookie, not a kernel address */ - __dma_direct_free_pages(dev, size, vaddr); - } else if (!dma_free_from_pool(vaddr, PAGE_ALIGN(size))) { + if (!dma_free_from_pool(vaddr, PAGE_ALIGN(size))) { phys_addr_t phys = dma_to_phys(dev, dma_handle); struct page *page = pfn_to_page(__phys_to_pfn(phys)); diff --git a/lib/genalloc.c b/lib/genalloc.c index 5257f74fccf3..9fc31292cfa1 100644 --- a/lib/genalloc.c +++ b/lib/genalloc.c @@ -327,21 +327,45 @@ EXPORT_SYMBOL(gen_pool_alloc_algo_owner); * gen_pool_dma_alloc - allocate special memory from the pool for DMA usage * @pool: pool to allocate from * @size: number of bytes to allocate from the pool - * @dma: dma-view physical address return value. Use NULL if unneeded. + * @dma: dma-view physical address return value. Use %NULL if unneeded. * * Allocate the requested number of bytes from the specified pool. * Uses the pool allocation function (with first-fit algorithm by default). * Can not be used in NMI handler on architectures without * NMI-safe cmpxchg implementation. + * + * Return: virtual address of the allocated memory, or %NULL on failure */ void *gen_pool_dma_alloc(struct gen_pool *pool, size_t size, dma_addr_t *dma) { + return gen_pool_dma_alloc_algo(pool, size, dma, pool->algo, pool->data); +} +EXPORT_SYMBOL(gen_pool_dma_alloc); + +/** + * gen_pool_dma_alloc_algo - allocate special memory from the pool for DMA + * usage with the given pool algorithm + * @pool: pool to allocate from + * @size: number of bytes to allocate from the pool + * @dma: DMA-view physical address return value. Use %NULL if unneeded. + * @algo: algorithm passed from caller + * @data: data passed to algorithm + * + * Allocate the requested number of bytes from the specified pool. Uses the + * given pool allocation function. Can not be used in NMI handler on + * architectures without NMI-safe cmpxchg implementation. + * + * Return: virtual address of the allocated memory, or %NULL on failure + */ +void *gen_pool_dma_alloc_algo(struct gen_pool *pool, size_t size, + dma_addr_t *dma, genpool_algo_t algo, void *data) +{ unsigned long vaddr; if (!pool) return NULL; - vaddr = gen_pool_alloc(pool, size); + vaddr = gen_pool_alloc_algo(pool, size, algo, data); if (!vaddr) return NULL; @@ -350,7 +374,102 @@ void *gen_pool_dma_alloc(struct gen_pool *pool, size_t size, dma_addr_t *dma) return (void *)vaddr; } -EXPORT_SYMBOL(gen_pool_dma_alloc); +EXPORT_SYMBOL(gen_pool_dma_alloc_algo); + +/** + * gen_pool_dma_alloc_align - allocate special memory from the pool for DMA + * usage with the given alignment + * @pool: pool to allocate from + * @size: number of bytes to allocate from the pool + * @dma: DMA-view physical address return value. Use %NULL if unneeded. + * @align: alignment in bytes for starting address + * + * Allocate the requested number bytes from the specified pool, with the given + * alignment restriction. Can not be used in NMI handler on architectures + * without NMI-safe cmpxchg implementation. + * + * Return: virtual address of the allocated memory, or %NULL on failure + */ +void *gen_pool_dma_alloc_align(struct gen_pool *pool, size_t size, + dma_addr_t *dma, int align) +{ + struct genpool_data_align data = { .align = align }; + + return gen_pool_dma_alloc_algo(pool, size, dma, + gen_pool_first_fit_align, &data); +} +EXPORT_SYMBOL(gen_pool_dma_alloc_align); + +/** + * gen_pool_dma_zalloc - allocate special zeroed memory from the pool for + * DMA usage + * @pool: pool to allocate from + * @size: number of bytes to allocate from the pool + * @dma: dma-view physical address return value. Use %NULL if unneeded. + * + * Allocate the requested number of zeroed bytes from the specified pool. + * Uses the pool allocation function (with first-fit algorithm by default). + * Can not be used in NMI handler on architectures without + * NMI-safe cmpxchg implementation. + * + * Return: virtual address of the allocated zeroed memory, or %NULL on failure + */ +void *gen_pool_dma_zalloc(struct gen_pool *pool, size_t size, dma_addr_t *dma) +{ + return gen_pool_dma_zalloc_algo(pool, size, dma, pool->algo, pool->data); +} +EXPORT_SYMBOL(gen_pool_dma_zalloc); + +/** + * gen_pool_dma_zalloc_algo - allocate special zeroed memory from the pool for + * DMA usage with the given pool algorithm + * @pool: pool to allocate from + * @size: number of bytes to allocate from the pool + * @dma: DMA-view physical address return value. Use %NULL if unneeded. + * @algo: algorithm passed from caller + * @data: data passed to algorithm + * + * Allocate the requested number of zeroed bytes from the specified pool. Uses + * the given pool allocation function. Can not be used in NMI handler on + * architectures without NMI-safe cmpxchg implementation. + * + * Return: virtual address of the allocated zeroed memory, or %NULL on failure + */ +void *gen_pool_dma_zalloc_algo(struct gen_pool *pool, size_t size, + dma_addr_t *dma, genpool_algo_t algo, void *data) +{ + void *vaddr = gen_pool_dma_alloc_algo(pool, size, dma, algo, data); + + if (vaddr) + memset(vaddr, 0, size); + + return vaddr; +} +EXPORT_SYMBOL(gen_pool_dma_zalloc_algo); + +/** + * gen_pool_dma_zalloc_align - allocate special zeroed memory from the pool for + * DMA usage with the given alignment + * @pool: pool to allocate from + * @size: number of bytes to allocate from the pool + * @dma: DMA-view physical address return value. Use %NULL if unneeded. + * @align: alignment in bytes for starting address + * + * Allocate the requested number of zeroed bytes from the specified pool, + * with the given alignment restriction. Can not be used in NMI handler on + * architectures without NMI-safe cmpxchg implementation. + * + * Return: virtual address of the allocated zeroed memory, or %NULL on failure + */ +void *gen_pool_dma_zalloc_align(struct gen_pool *pool, size_t size, + dma_addr_t *dma, int align) +{ + struct genpool_data_align data = { .align = align }; + + return gen_pool_dma_zalloc_algo(pool, size, dma, + gen_pool_first_fit_align, &data); +} +EXPORT_SYMBOL(gen_pool_dma_zalloc_align); /** * gen_pool_free - free allocated special memory back to the pool |