diff options
author | Ralf Baechle <ralf@linux-mips.org> | 2012-01-11 15:41:47 +0100 |
---|---|---|
committer | Ralf Baechle <ralf@linux-mips.org> | 2012-01-11 15:41:47 +0100 |
commit | 39b741431af7f6f46b2e0e7f7f13ea2351fb4a5f (patch) | |
tree | 89355f4ae7bbb874537bb65f71ba0d19b3d468e1 /arch/mips/mm | |
parent | 5b0ec2efb7d373faa7b1a7632c459b93895d45cd (diff) | |
parent | d7a887a73dec6c387b02a966a71aac767bbd9ce6 (diff) |
Merge branch 'next/generic' into mips-for-linux-next
Diffstat (limited to 'arch/mips/mm')
-rw-r--r-- | arch/mips/mm/Makefile | 4 | ||||
-rw-r--r-- | arch/mips/mm/gup.c | 315 | ||||
-rw-r--r-- | arch/mips/mm/init.c | 9 | ||||
-rw-r--r-- | arch/mips/mm/tlb-r4k.c | 67 |
4 files changed, 338 insertions, 57 deletions
diff --git a/arch/mips/mm/Makefile b/arch/mips/mm/Makefile index 4d8c1623eee2..3ca2a065cf76 100644 --- a/arch/mips/mm/Makefile +++ b/arch/mips/mm/Makefile @@ -3,8 +3,8 @@ # obj-y += cache.o dma-default.o extable.o fault.o \ - init.o mmap.o tlbex.o tlbex-fault.o uasm.o \ - page.o + gup.o init.o mmap.o page.o tlbex.o \ + tlbex-fault.o uasm.o obj-$(CONFIG_32BIT) += ioremap.o pgtable-32.o obj-$(CONFIG_64BIT) += pgtable-64.o diff --git a/arch/mips/mm/gup.c b/arch/mips/mm/gup.c new file mode 100644 index 000000000000..33aadbcf170b --- /dev/null +++ b/arch/mips/mm/gup.c @@ -0,0 +1,315 @@ +/* + * Lockless get_user_pages_fast for MIPS + * + * Copyright (C) 2008 Nick Piggin + * Copyright (C) 2008 Novell Inc. + * Copyright (C) 2011 Ralf Baechle + */ +#include <linux/sched.h> +#include <linux/mm.h> +#include <linux/vmstat.h> +#include <linux/highmem.h> +#include <linux/swap.h> +#include <linux/hugetlb.h> + +#include <asm/pgtable.h> + +static inline pte_t gup_get_pte(pte_t *ptep) +{ +#if defined(CONFIG_64BIT_PHYS_ADDR) && defined(CONFIG_CPU_MIPS32) + pte_t pte; + +retry: + pte.pte_low = ptep->pte_low; + smp_rmb(); + pte.pte_high = ptep->pte_high; + smp_rmb(); + if (unlikely(pte.pte_low != ptep->pte_low)) + goto retry; + + return pte; +#else + return ACCESS_ONCE(*ptep); +#endif +} + +static int gup_pte_range(pmd_t pmd, unsigned long addr, unsigned long end, + int write, struct page **pages, int *nr) +{ + pte_t *ptep = pte_offset_map(&pmd, addr); + do { + pte_t pte = gup_get_pte(ptep); + struct page *page; + + if (!pte_present(pte) || + pte_special(pte) || (write && !pte_write(pte))) { + pte_unmap(ptep); + return 0; + } + VM_BUG_ON(!pfn_valid(pte_pfn(pte))); + page = pte_page(pte); + get_page(page); + SetPageReferenced(page); + pages[*nr] = page; + (*nr)++; + + } while (ptep++, addr += PAGE_SIZE, addr != end); + + pte_unmap(ptep - 1); + return 1; +} + +static inline void get_head_page_multiple(struct page *page, int nr) +{ + VM_BUG_ON(page != compound_head(page)); + VM_BUG_ON(page_count(page) == 0); + atomic_add(nr, &page->_count); + SetPageReferenced(page); +} + +static int gup_huge_pmd(pmd_t pmd, unsigned long addr, unsigned long end, + int write, struct page **pages, int *nr) +{ + pte_t pte = *(pte_t *)&pmd; + struct page *head, *page; + int refs; + + if (write && !pte_write(pte)) + return 0; + /* hugepages are never "special" */ + VM_BUG_ON(pte_special(pte)); + VM_BUG_ON(!pfn_valid(pte_pfn(pte))); + + refs = 0; + head = pte_page(pte); + page = head + ((addr & ~PMD_MASK) >> PAGE_SHIFT); + do { + VM_BUG_ON(compound_head(page) != head); + pages[*nr] = page; + if (PageTail(page)) + get_huge_page_tail(page); + (*nr)++; + page++; + refs++; + } while (addr += PAGE_SIZE, addr != end); + + get_head_page_multiple(head, refs); + return 1; +} + +static int gup_pmd_range(pud_t pud, unsigned long addr, unsigned long end, + int write, struct page **pages, int *nr) +{ + unsigned long next; + pmd_t *pmdp; + + pmdp = pmd_offset(&pud, addr); + do { + pmd_t pmd = *pmdp; + + next = pmd_addr_end(addr, end); + /* + * The pmd_trans_splitting() check below explains why + * pmdp_splitting_flush has to flush the tlb, to stop + * this gup-fast code from running while we set the + * splitting bit in the pmd. Returning zero will take + * the slow path that will call wait_split_huge_page() + * if the pmd is still in splitting state. gup-fast + * can't because it has irq disabled and + * wait_split_huge_page() would never return as the + * tlb flush IPI wouldn't run. + */ + if (pmd_none(pmd) || pmd_trans_splitting(pmd)) + return 0; + if (unlikely(pmd_huge(pmd))) { + if (!gup_huge_pmd(pmd, addr, next, write, pages,nr)) + return 0; + } else { + if (!gup_pte_range(pmd, addr, next, write, pages,nr)) + return 0; + } + } while (pmdp++, addr = next, addr != end); + + return 1; +} + +static int gup_huge_pud(pud_t pud, unsigned long addr, unsigned long end, + int write, struct page **pages, int *nr) +{ + pte_t pte = *(pte_t *)&pud; + struct page *head, *page; + int refs; + + if (write && !pte_write(pte)) + return 0; + /* hugepages are never "special" */ + VM_BUG_ON(pte_special(pte)); + VM_BUG_ON(!pfn_valid(pte_pfn(pte))); + + refs = 0; + head = pte_page(pte); + page = head + ((addr & ~PUD_MASK) >> PAGE_SHIFT); + do { + VM_BUG_ON(compound_head(page) != head); + pages[*nr] = page; + (*nr)++; + page++; + refs++; + } while (addr += PAGE_SIZE, addr != end); + + get_head_page_multiple(head, refs); + return 1; +} + +static int gup_pud_range(pgd_t pgd, unsigned long addr, unsigned long end, + int write, struct page **pages, int *nr) +{ + unsigned long next; + pud_t *pudp; + + pudp = pud_offset(&pgd, addr); + do { + pud_t pud = *pudp; + + next = pud_addr_end(addr, end); + if (pud_none(pud)) + return 0; + if (unlikely(pud_huge(pud))) { + if (!gup_huge_pud(pud, addr, next, write, pages,nr)) + return 0; + } else { + if (!gup_pmd_range(pud, addr, next, write, pages,nr)) + return 0; + } + } while (pudp++, addr = next, addr != end); + + return 1; +} + +/* + * Like get_user_pages_fast() except its IRQ-safe in that it won't fall + * back to the regular GUP. + */ +int __get_user_pages_fast(unsigned long start, int nr_pages, int write, + struct page **pages) +{ + struct mm_struct *mm = current->mm; + unsigned long addr, len, end; + unsigned long next; + unsigned long flags; + pgd_t *pgdp; + int nr = 0; + + start &= PAGE_MASK; + addr = start; + len = (unsigned long) nr_pages << PAGE_SHIFT; + end = start + len; + if (unlikely(!access_ok(write ? VERIFY_WRITE : VERIFY_READ, + (void __user *)start, len))) + return 0; + + /* + * XXX: batch / limit 'nr', to avoid large irq off latency + * needs some instrumenting to determine the common sizes used by + * important workloads (eg. DB2), and whether limiting the batch + * size will decrease performance. + * + * It seems like we're in the clear for the moment. Direct-IO is + * the main guy that batches up lots of get_user_pages, and even + * they are limited to 64-at-a-time which is not so many. + */ + /* + * This doesn't prevent pagetable teardown, but does prevent + * the pagetables and pages from being freed. + * + * So long as we atomically load page table pointers versus teardown, + * we can follow the address down to the page and take a ref on it. + */ + local_irq_save(flags); + pgdp = pgd_offset(mm, addr); + do { + pgd_t pgd = *pgdp; + + next = pgd_addr_end(addr, end); + if (pgd_none(pgd)) + break; + if (!gup_pud_range(pgd, addr, next, write, pages, &nr)) + break; + } while (pgdp++, addr = next, addr != end); + local_irq_restore(flags); + + return nr; +} + +/** + * get_user_pages_fast() - pin user pages in memory + * @start: starting user address + * @nr_pages: number of pages from start to pin + * @write: whether pages will be written to + * @pages: array that receives pointers to the pages pinned. + * Should be at least nr_pages long. + * + * Attempt to pin user pages in memory without taking mm->mmap_sem. + * If not successful, it will fall back to taking the lock and + * calling get_user_pages(). + * + * Returns number of pages pinned. This may be fewer than the number + * requested. If nr_pages is 0 or negative, returns 0. If no pages + * were pinned, returns -errno. + */ +int get_user_pages_fast(unsigned long start, int nr_pages, int write, + struct page **pages) +{ + struct mm_struct *mm = current->mm; + unsigned long addr, len, end; + unsigned long next; + pgd_t *pgdp; + int ret, nr = 0; + + start &= PAGE_MASK; + addr = start; + len = (unsigned long) nr_pages << PAGE_SHIFT; + + end = start + len; + if (end < start) + goto slow_irqon; + + /* XXX: batch / limit 'nr' */ + local_irq_disable(); + pgdp = pgd_offset(mm, addr); + do { + pgd_t pgd = *pgdp; + + next = pgd_addr_end(addr, end); + if (pgd_none(pgd)) + goto slow; + if (!gup_pud_range(pgd, addr, next, write, pages, &nr)) + goto slow; + } while (pgdp++, addr = next, addr != end); + local_irq_enable(); + + VM_BUG_ON(nr != (end - start) >> PAGE_SHIFT); + return nr; +slow: + local_irq_enable(); + +slow_irqon: + /* Try to get the remaining pages with get_user_pages */ + start += nr << PAGE_SHIFT; + pages += nr; + + down_read(&mm->mmap_sem); + ret = get_user_pages(current, mm, start, + (end - start) >> PAGE_SHIFT, + write, 0, pages, NULL); + up_read(&mm->mmap_sem); + + /* Have to be a bit careful with return values */ + if (nr > 0) { + if (ret < 0) + ret = nr; + else + ret += nr; + } + return ret; +} diff --git a/arch/mips/mm/init.c b/arch/mips/mm/init.c index b7ebc4fa89bc..3b3ffd439cd7 100644 --- a/arch/mips/mm/init.c +++ b/arch/mips/mm/init.c @@ -304,9 +304,14 @@ int page_is_ram(unsigned long pagenr) for (i = 0; i < boot_mem_map.nr_map; i++) { unsigned long addr, end; - if (boot_mem_map.map[i].type != BOOT_MEM_RAM) + switch (boot_mem_map.map[i].type) { + case BOOT_MEM_RAM: + case BOOT_MEM_INIT_RAM: + break; + default: /* not usable memory */ continue; + } addr = PFN_UP(boot_mem_map.map[i].addr); end = PFN_DOWN(boot_mem_map.map[i].addr + @@ -379,7 +384,7 @@ void __init mem_init(void) reservedpages = ram = 0; for (tmp = 0; tmp < max_low_pfn; tmp++) - if (page_is_ram(tmp)) { + if (page_is_ram(tmp) && pfn_valid(tmp)) { ram++; if (PageReserved(pfn_to_page(tmp))) reservedpages++; diff --git a/arch/mips/mm/tlb-r4k.c b/arch/mips/mm/tlb-r4k.c index d163455552b0..2dc625346c40 100644 --- a/arch/mips/mm/tlb-r4k.c +++ b/arch/mips/mm/tlb-r4k.c @@ -121,22 +121,30 @@ void local_flush_tlb_range(struct vm_area_struct *vma, unsigned long start, if (cpu_context(cpu, mm) != 0) { unsigned long size, flags; + int huge = is_vm_hugetlb_page(vma); ENTER_CRITICAL(flags); - size = (end - start + (PAGE_SIZE - 1)) >> PAGE_SHIFT; - size = (size + 1) >> 1; + if (huge) { + start = round_down(start, HPAGE_SIZE); + end = round_up(end, HPAGE_SIZE); + size = (end - start) >> HPAGE_SHIFT; + } else { + start = round_down(start, PAGE_SIZE << 1); + end = round_up(end, PAGE_SIZE << 1); + size = (end - start) >> (PAGE_SHIFT + 1); + } if (size <= current_cpu_data.tlbsize/2) { int oldpid = read_c0_entryhi(); int newpid = cpu_asid(cpu, mm); - start &= (PAGE_MASK << 1); - end += ((PAGE_SIZE << 1) - 1); - end &= (PAGE_MASK << 1); while (start < end) { int idx; write_c0_entryhi(start | newpid); - start += (PAGE_SIZE << 1); + if (huge) + start += HPAGE_SIZE; + else + start += (PAGE_SIZE << 1); mtc0_tlbw_hazard(); tlb_probe(); tlb_probe_hazard(); @@ -369,51 +377,6 @@ void add_wired_entry(unsigned long entrylo0, unsigned long entrylo1, EXIT_CRITICAL(flags); } -/* - * Used for loading TLB entries before trap_init() has started, when we - * don't actually want to add a wired entry which remains throughout the - * lifetime of the system - */ - -static int temp_tlb_entry __cpuinitdata; - -__init int add_temporary_entry(unsigned long entrylo0, unsigned long entrylo1, - unsigned long entryhi, unsigned long pagemask) -{ - int ret = 0; - unsigned long flags; - unsigned long wired; - unsigned long old_pagemask; - unsigned long old_ctx; - - ENTER_CRITICAL(flags); - /* Save old context and create impossible VPN2 value */ - old_ctx = read_c0_entryhi(); - old_pagemask = read_c0_pagemask(); - wired = read_c0_wired(); - if (--temp_tlb_entry < wired) { - printk(KERN_WARNING - "No TLB space left for add_temporary_entry\n"); - ret = -ENOSPC; - goto out; - } - - write_c0_index(temp_tlb_entry); - write_c0_pagemask(pagemask); - write_c0_entryhi(entryhi); - write_c0_entrylo0(entrylo0); - write_c0_entrylo1(entrylo1); - mtc0_tlbw_hazard(); - tlb_write_indexed(); - tlbw_use_hazard(); - - write_c0_entryhi(old_ctx); - write_c0_pagemask(old_pagemask); -out: - EXIT_CRITICAL(flags); - return ret; -} - static int __cpuinitdata ntlb; static int __init set_ntlb(char *str) { @@ -451,8 +414,6 @@ void __cpuinit tlb_init(void) write_c0_pagegrain(pg); } - temp_tlb_entry = current_cpu_data.tlbsize - 1; - /* From this point on the ARC firmware is dead. */ local_flush_tlb_all(); |