diff options
Diffstat (limited to 'arch/powerpc/mm')
-rw-r--r-- | arch/powerpc/mm/8xx_mmu.c | 2 | ||||
-rw-r--r-- | arch/powerpc/mm/dma-noncoherent.c | 2 | ||||
-rw-r--r-- | arch/powerpc/mm/dump_hashpagetable.c | 2 | ||||
-rw-r--r-- | arch/powerpc/mm/fault.c | 17 | ||||
-rw-r--r-- | arch/powerpc/mm/hash_native_64.c | 41 | ||||
-rw-r--r-- | arch/powerpc/mm/hash_utils_64.c | 2 | ||||
-rw-r--r-- | arch/powerpc/mm/hugetlbpage.c | 5 | ||||
-rw-r--r-- | arch/powerpc/mm/init_64.c | 82 | ||||
-rw-r--r-- | arch/powerpc/mm/mem.c | 20 | ||||
-rw-r--r-- | arch/powerpc/mm/mmu_context_book3s64.c | 15 | ||||
-rw-r--r-- | arch/powerpc/mm/mmu_decl.h | 1 | ||||
-rw-r--r-- | arch/powerpc/mm/pgtable-book3s64.c | 4 | ||||
-rw-r--r-- | arch/powerpc/mm/pgtable-hash64.c | 115 | ||||
-rw-r--r-- | arch/powerpc/mm/pgtable-radix.c | 90 | ||||
-rw-r--r-- | arch/powerpc/mm/pgtable_32.c | 15 | ||||
-rw-r--r-- | arch/powerpc/mm/pgtable_64.c | 45 | ||||
-rw-r--r-- | arch/powerpc/mm/slb.c | 10 | ||||
-rw-r--r-- | arch/powerpc/mm/slb_low.S | 30 | ||||
-rw-r--r-- | arch/powerpc/mm/tlb-radix.c | 9 | ||||
-rw-r--r-- | arch/powerpc/mm/tlb_hash64.c | 6 |
20 files changed, 411 insertions, 102 deletions
diff --git a/arch/powerpc/mm/8xx_mmu.c b/arch/powerpc/mm/8xx_mmu.c index 6c5025e81236..f4c6472f2fc4 100644 --- a/arch/powerpc/mm/8xx_mmu.c +++ b/arch/powerpc/mm/8xx_mmu.c @@ -88,7 +88,7 @@ static void mmu_mapin_immr(void) int offset; for (offset = 0; offset < IMMR_SIZE; offset += PAGE_SIZE) - map_page(v + offset, p + offset, f); + map_kernel_page(v + offset, p + offset, f); } /* Address of instructions to patch */ diff --git a/arch/powerpc/mm/dma-noncoherent.c b/arch/powerpc/mm/dma-noncoherent.c index 2dc74e5c6458..382528475433 100644 --- a/arch/powerpc/mm/dma-noncoherent.c +++ b/arch/powerpc/mm/dma-noncoherent.c @@ -227,7 +227,7 @@ __dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *handle, gfp_t do { SetPageReserved(page); - map_page(vaddr, page_to_phys(page), + map_kernel_page(vaddr, page_to_phys(page), pgprot_val(pgprot_noncached(PAGE_KERNEL))); page++; vaddr += PAGE_SIZE; diff --git a/arch/powerpc/mm/dump_hashpagetable.c b/arch/powerpc/mm/dump_hashpagetable.c index c6b900f54c07..b1c144b03fcf 100644 --- a/arch/powerpc/mm/dump_hashpagetable.c +++ b/arch/powerpc/mm/dump_hashpagetable.c @@ -335,7 +335,7 @@ static unsigned long hpte_find(struct pg_state *st, unsigned long ea, int psize) unsigned long rpn, lp_bits; int base_psize = 0, actual_psize = 0; - if (ea <= PAGE_OFFSET) + if (ea < PAGE_OFFSET) return -1; /* Look in primary table */ diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c index 3a7d580fdc59..4c422632047b 100644 --- a/arch/powerpc/mm/fault.c +++ b/arch/powerpc/mm/fault.c @@ -206,6 +206,7 @@ int do_page_fault(struct pt_regs *regs, unsigned long address, int is_write = 0; int trap = TRAP(regs); int is_exec = trap == 0x400; + int is_user = user_mode(regs); int fault; int rc = 0, store_update_sp = 0; @@ -216,7 +217,7 @@ int do_page_fault(struct pt_regs *regs, unsigned long address, * bits we are interested in. But there are some bits which * indicate errors in DSISR but can validly be set in SRR1. */ - if (trap == 0x400) + if (is_exec) error_code &= 0x48200000; else is_write = error_code & DSISR_ISSTORE; @@ -247,13 +248,13 @@ int do_page_fault(struct pt_regs *regs, unsigned long address, * The kernel should never take an execute fault nor should it * take a page fault to a kernel address. */ - if (!user_mode(regs) && (is_exec || (address >= TASK_SIZE))) { + if (!is_user && (is_exec || (address >= TASK_SIZE))) { rc = SIGSEGV; goto bail; } #if !(defined(CONFIG_4xx) || defined(CONFIG_BOOKE) || \ - defined(CONFIG_PPC_BOOK3S_64)) + defined(CONFIG_PPC_BOOK3S_64) || defined(CONFIG_PPC_8xx)) if (error_code & DSISR_DABRMATCH) { /* breakpoint match */ do_break(regs, address, error_code); @@ -266,7 +267,7 @@ int do_page_fault(struct pt_regs *regs, unsigned long address, local_irq_enable(); if (faulthandler_disabled() || mm == NULL) { - if (!user_mode(regs)) { + if (!is_user) { rc = SIGSEGV; goto bail; } @@ -287,10 +288,10 @@ int do_page_fault(struct pt_regs *regs, unsigned long address, * can result in fault, which will cause a deadlock when called with * mmap_sem held */ - if (!is_exec && user_mode(regs)) + if (is_write && is_user) store_update_sp = store_updates_sp(regs); - if (user_mode(regs)) + if (is_user) flags |= FAULT_FLAG_USER; /* When running in the kernel we expect faults to occur only to @@ -309,7 +310,7 @@ int do_page_fault(struct pt_regs *regs, unsigned long address, * thus avoiding the deadlock. */ if (!down_read_trylock(&mm->mmap_sem)) { - if (!user_mode(regs) && !search_exception_tables(regs->nip)) + if (!is_user && !search_exception_tables(regs->nip)) goto bad_area_nosemaphore; retry: @@ -509,7 +510,7 @@ bad_area: bad_area_nosemaphore: /* User mode accesses cause a SIGSEGV */ - if (user_mode(regs)) { + if (is_user) { _exception(SIGSEGV, regs, code, address); goto bail; } diff --git a/arch/powerpc/mm/hash_native_64.c b/arch/powerpc/mm/hash_native_64.c index 65bb8f33b399..3848af167df9 100644 --- a/arch/powerpc/mm/hash_native_64.c +++ b/arch/powerpc/mm/hash_native_64.c @@ -15,6 +15,7 @@ #include <linux/spinlock.h> #include <linux/bitops.h> #include <linux/of.h> +#include <linux/processor.h> #include <linux/threads.h> #include <linux/smp.h> @@ -23,6 +24,7 @@ #include <asm/mmu_context.h> #include <asm/pgtable.h> #include <asm/tlbflush.h> +#include <asm/trace.h> #include <asm/tlb.h> #include <asm/cputable.h> #include <asm/udbg.h> @@ -98,6 +100,7 @@ static inline void __tlbie(unsigned long vpn, int psize, int apsize, int ssize) : "memory"); break; } + trace_tlbie(0, 0, va, 0, 0, 0, 0); } static inline void __tlbiel(unsigned long vpn, int psize, int apsize, int ssize) @@ -147,6 +150,7 @@ static inline void __tlbiel(unsigned long vpn, int psize, int apsize, int ssize) : "memory"); break; } + trace_tlbie(0, 1, va, 0, 0, 0, 0); } @@ -181,8 +185,10 @@ static inline void native_lock_hpte(struct hash_pte *hptep) while (1) { if (!test_and_set_bit_lock(HPTE_LOCK_BIT, word)) break; + spin_begin(); while(test_bit(HPTE_LOCK_BIT, word)) - cpu_relax(); + spin_cpu_relax(); + spin_end(); } } @@ -407,6 +413,38 @@ static void native_hpte_updateboltedpp(unsigned long newpp, unsigned long ea, tlbie(vpn, psize, psize, ssize, 0); } +/* + * Remove a bolted kernel entry. Memory hotplug uses this. + * + * No need to lock here because we should be the only user. + */ +static int native_hpte_removebolted(unsigned long ea, int psize, int ssize) +{ + unsigned long vpn; + unsigned long vsid; + long slot; + struct hash_pte *hptep; + + vsid = get_kernel_vsid(ea, ssize); + vpn = hpt_vpn(ea, vsid, ssize); + + slot = native_hpte_find(vpn, psize, ssize); + if (slot == -1) + return -ENOENT; + + hptep = htab_address + slot; + + VM_WARN_ON(!(be64_to_cpu(hptep->v) & HPTE_V_BOLTED)); + + /* Invalidate the hpte */ + hptep->v = 0; + + /* Invalidate the TLB */ + tlbie(vpn, psize, psize, ssize, 0); + return 0; +} + + static void native_hpte_invalidate(unsigned long slot, unsigned long vpn, int bpsize, int apsize, int ssize, int local) { @@ -725,6 +763,7 @@ void __init hpte_init_native(void) mmu_hash_ops.hpte_invalidate = native_hpte_invalidate; mmu_hash_ops.hpte_updatepp = native_hpte_updatepp; mmu_hash_ops.hpte_updateboltedpp = native_hpte_updateboltedpp; + mmu_hash_ops.hpte_removebolted = native_hpte_removebolted; mmu_hash_ops.hpte_insert = native_hpte_insert; mmu_hash_ops.hpte_remove = native_hpte_remove; mmu_hash_ops.hpte_clear_all = native_hpte_clear; diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index f2095ce9d4b0..7a20669c19e7 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -810,6 +810,8 @@ static void update_hid_for_hash(void) asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) : : "r"(rb), "i"(0), "i"(0), "i"(2), "r"(0) : "memory"); asm volatile("eieio; tlbsync; ptesync; isync; slbia": : :"memory"); + trace_tlbie(0, 0, rb, 0, 2, 0, 0); + /* * now switch the HID */ diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c index c41dc44472c5..e1bf5ca397fe 100644 --- a/arch/powerpc/mm/hugetlbpage.c +++ b/arch/powerpc/mm/hugetlbpage.c @@ -34,6 +34,7 @@ #define PAGE_SHIFT_16G 34 unsigned int HPAGE_SHIFT; +EXPORT_SYMBOL(HPAGE_SHIFT); /* * Tracks gpages after the device tree is scanned and before the @@ -79,7 +80,7 @@ static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp, num_hugepd = 1; } - new = kmem_cache_zalloc(cachep, GFP_KERNEL); + new = kmem_cache_zalloc(cachep, pgtable_gfp_flags(mm, GFP_KERNEL)); BUG_ON(pshift > HUGEPD_SHIFT_MASK); BUG_ON((unsigned long)new & HUGEPD_SHIFT_MASK); @@ -945,7 +946,7 @@ pte_t *__find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea, if (pmd_none(pmd)) return NULL; - if (pmd_trans_huge(pmd)) { + if (pmd_trans_huge(pmd) || pmd_devmap(pmd)) { if (is_thp) *is_thp = true; ret_pte = (pte_t *) pmdp; diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c index ec84b31c6c86..5b4c25d12ff3 100644 --- a/arch/powerpc/mm/init_64.c +++ b/arch/powerpc/mm/init_64.c @@ -44,6 +44,7 @@ #include <linux/slab.h> #include <linux/of_fdt.h> #include <linux/libfdt.h> +#include <linux/memremap.h> #include <asm/pgalloc.h> #include <asm/page.h> @@ -110,8 +111,29 @@ static int __meminit vmemmap_populated(unsigned long start, int page_size) return 0; } +/* + * vmemmap virtual address space management does not have a traditonal page + * table to track which virtual struct pages are backed by physical mapping. + * The virtual to physical mappings are tracked in a simple linked list + * format. 'vmemmap_list' maintains the entire vmemmap physical mapping at + * all times where as the 'next' list maintains the available + * vmemmap_backing structures which have been deleted from the + * 'vmemmap_global' list during system runtime (memory hotplug remove + * operation). The freed 'vmemmap_backing' structures are reused later when + * new requests come in without allocating fresh memory. This pointer also + * tracks the allocated 'vmemmap_backing' structures as we allocate one + * full page memory at a time when we dont have any. + */ struct vmemmap_backing *vmemmap_list; static struct vmemmap_backing *next; + +/* + * The same pointer 'next' tracks individual chunks inside the allocated + * full page during the boot time and again tracks the freeed nodes during + * runtime. It is racy but it does not happen as they are separated by the + * boot process. Will create problem if some how we have memory hotplug + * operation during boot !! + */ static int num_left; static int num_freed; @@ -171,13 +193,17 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node) pr_debug("vmemmap_populate %lx..%lx, node %d\n", start, end, node); for (; start < end; start += page_size) { + struct vmem_altmap *altmap; void *p; int rc; if (vmemmap_populated(start, page_size)) continue; - p = vmemmap_alloc_block(page_size, node); + /* altmap lookups only work at section boundaries */ + altmap = to_vmem_altmap(SECTION_ALIGN_DOWN(start)); + + p = __vmemmap_alloc_block_buf(page_size, node, altmap); if (!p) return -ENOMEM; @@ -234,13 +260,17 @@ static unsigned long vmemmap_list_free(unsigned long start) void __ref vmemmap_free(unsigned long start, unsigned long end) { unsigned long page_size = 1 << mmu_psize_defs[mmu_vmemmap_psize].shift; + unsigned long page_order = get_order(page_size); start = _ALIGN_DOWN(start, page_size); pr_debug("vmemmap_free %lx...%lx\n", start, end); for (; start < end; start += page_size) { - unsigned long addr; + unsigned long nr_pages, addr; + struct vmem_altmap *altmap; + struct page *section_base; + struct page *page; /* * the section has already be marked as invalid, so @@ -251,29 +281,33 @@ void __ref vmemmap_free(unsigned long start, unsigned long end) continue; addr = vmemmap_list_free(start); - if (addr) { - struct page *page = pfn_to_page(addr >> PAGE_SHIFT); - - if (PageReserved(page)) { - /* allocated from bootmem */ - if (page_size < PAGE_SIZE) { - /* - * this shouldn't happen, but if it is - * the case, leave the memory there - */ - WARN_ON_ONCE(1); - } else { - unsigned int nr_pages = - 1 << get_order(page_size); - while (nr_pages--) - free_reserved_page(page++); - } - } else - free_pages((unsigned long)(__va(addr)), - get_order(page_size)); - - vmemmap_remove_mapping(start, page_size); + if (!addr) + continue; + + page = pfn_to_page(addr >> PAGE_SHIFT); + section_base = pfn_to_page(vmemmap_section_start(start)); + nr_pages = 1 << page_order; + + altmap = to_vmem_altmap((unsigned long) section_base); + if (altmap) { + vmem_altmap_free(altmap, nr_pages); + } else if (PageReserved(page)) { + /* allocated from bootmem */ + if (page_size < PAGE_SIZE) { + /* + * this shouldn't happen, but if it is + * the case, leave the memory there + */ + WARN_ON_ONCE(1); + } else { + while (nr_pages--) + free_reserved_page(page++); + } + } else { + free_pages((unsigned long)(__va(addr)), page_order); } + + vmemmap_remove_mapping(start, page_size); } } #endif diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index de5a90e1ceaa..8541f18694a4 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -36,6 +36,7 @@ #include <linux/hugetlb.h> #include <linux/slab.h> #include <linux/vmalloc.h> +#include <linux/memremap.h> #include <asm/pgalloc.h> #include <asm/prom.h> @@ -151,11 +152,20 @@ int arch_remove_memory(u64 start, u64 size) { unsigned long start_pfn = start >> PAGE_SHIFT; unsigned long nr_pages = size >> PAGE_SHIFT; - struct zone *zone; + struct vmem_altmap *altmap; + struct page *page; int ret; - zone = page_zone(pfn_to_page(start_pfn)); - ret = __remove_pages(zone, start_pfn, nr_pages); + /* + * If we have an altmap then we need to skip over any reserved PFNs + * when querying the zone. + */ + page = pfn_to_page(start_pfn); + altmap = to_vmem_altmap((unsigned long) page); + if (altmap) + page += vmem_altmap_offset(altmap); + + ret = __remove_pages(page_zone(page), start_pfn, nr_pages); if (ret) return ret; @@ -305,11 +315,11 @@ void __init paging_init(void) unsigned long end = __fix_to_virt(FIX_HOLE); for (; v < end; v += PAGE_SIZE) - map_page(v, 0, 0); /* XXX gross */ + map_kernel_page(v, 0, 0); /* XXX gross */ #endif #ifdef CONFIG_HIGHMEM - map_page(PKMAP_BASE, 0, 0); /* XXX gross */ + map_kernel_page(PKMAP_BASE, 0, 0); /* XXX gross */ pkmap_page_table = virt_to_kpte(PKMAP_BASE); kmap_pte = virt_to_kpte(__fix_to_virt(FIX_KMAP_BEGIN)); diff --git a/arch/powerpc/mm/mmu_context_book3s64.c b/arch/powerpc/mm/mmu_context_book3s64.c index a3edf813d455..71de2c6d88f3 100644 --- a/arch/powerpc/mm/mmu_context_book3s64.c +++ b/arch/powerpc/mm/mmu_context_book3s64.c @@ -235,10 +235,15 @@ void destroy_context(struct mm_struct *mm) #ifdef CONFIG_PPC_RADIX_MMU void radix__switch_mmu_context(struct mm_struct *prev, struct mm_struct *next) { - asm volatile("isync": : :"memory"); - mtspr(SPRN_PID, next->context.id); - asm volatile("isync \n" - PPC_SLBIA(0x7) - : : :"memory"); + + if (cpu_has_feature(CPU_FTR_POWER9_DD1)) { + isync(); + mtspr(SPRN_PID, next->context.id); + isync(); + asm volatile(PPC_INVALIDATE_ERAT : : :"memory"); + } else { + mtspr(SPRN_PID, next->context.id); + isync(); + } } #endif diff --git a/arch/powerpc/mm/mmu_decl.h b/arch/powerpc/mm/mmu_decl.h index f988db655e5b..d46128b22150 100644 --- a/arch/powerpc/mm/mmu_decl.h +++ b/arch/powerpc/mm/mmu_decl.h @@ -94,7 +94,6 @@ extern void _tlbia(void); #ifdef CONFIG_PPC32 extern void mapin_ram(void); -extern int map_page(unsigned long va, phys_addr_t pa, int flags); extern void setbat(int index, unsigned long virt, phys_addr_t phys, unsigned int size, pgprot_t prot); diff --git a/arch/powerpc/mm/pgtable-book3s64.c b/arch/powerpc/mm/pgtable-book3s64.c index 5fcb3dd74c13..31eed8fa8e99 100644 --- a/arch/powerpc/mm/pgtable-book3s64.c +++ b/arch/powerpc/mm/pgtable-book3s64.c @@ -32,7 +32,7 @@ int pmdp_set_access_flags(struct vm_area_struct *vma, unsigned long address, { int changed; #ifdef CONFIG_DEBUG_VM - WARN_ON(!pmd_trans_huge(*pmdp)); + WARN_ON(!pmd_trans_huge(*pmdp) && !pmd_devmap(*pmdp)); assert_spin_locked(&vma->vm_mm->page_table_lock); #endif changed = !pmd_same(*(pmdp), entry); @@ -59,7 +59,7 @@ void set_pmd_at(struct mm_struct *mm, unsigned long addr, #ifdef CONFIG_DEBUG_VM WARN_ON(pte_present(pmd_pte(*pmdp)) && !pte_protnone(pmd_pte(*pmdp))); assert_spin_locked(&mm->page_table_lock); - WARN_ON(!pmd_trans_huge(pmd)); + WARN_ON(!(pmd_trans_huge(pmd) || pmd_devmap(pmd))); #endif trace_hugepage_set_pmd(addr, pmd_val(pmd)); return set_pte_at(mm, addr, pmdp_ptep(pmdp), pmd_pte(pmd)); diff --git a/arch/powerpc/mm/pgtable-hash64.c b/arch/powerpc/mm/pgtable-hash64.c index 8b85a14b08ea..188b4107584d 100644 --- a/arch/powerpc/mm/pgtable-hash64.c +++ b/arch/powerpc/mm/pgtable-hash64.c @@ -11,8 +11,12 @@ #include <linux/sched.h> #include <linux/mm_types.h> +#include <linux/mm.h> #include <asm/pgalloc.h> +#include <asm/pgtable.h> +#include <asm/sections.h> +#include <asm/mmu.h> #include <asm/tlb.h> #include "mmu_decl.h" @@ -22,6 +26,81 @@ #ifdef CONFIG_SPARSEMEM_VMEMMAP /* + * vmemmap is the starting address of the virtual address space where + * struct pages are allocated for all possible PFNs present on the system + * including holes and bad memory (hence sparse). These virtual struct + * pages are stored in sequence in this virtual address space irrespective + * of the fact whether the corresponding PFN is valid or not. This achieves + * constant relationship between address of struct page and its PFN. + * + * During boot or memory hotplug operation when a new memory section is + * added, physical memory allocation (including hash table bolting) will + * be performed for the set of struct pages which are part of the memory + * section. This saves memory by not allocating struct pages for PFNs + * which are not valid. + * + * ---------------------------------------------- + * | PHYSICAL ALLOCATION OF VIRTUAL STRUCT PAGES| + * ---------------------------------------------- + * + * f000000000000000 c000000000000000 + * vmemmap +--------------+ +--------------+ + * + | page struct | +--------------> | page struct | + * | +--------------+ +--------------+ + * | | page struct | +--------------> | page struct | + * | +--------------+ | +--------------+ + * | | page struct | + +------> | page struct | + * | +--------------+ | +--------------+ + * | | page struct | | +--> | page struct | + * | +--------------+ | | +--------------+ + * | | page struct | | | + * | +--------------+ | | + * | | page struct | | | + * | +--------------+ | | + * | | page struct | | | + * | +--------------+ | | + * | | page struct | | | + * | +--------------+ | | + * | | page struct | +-------+ | + * | +--------------+ | + * | | page struct | +-----------+ + * | +--------------+ + * | | page struct | No mapping + * | +--------------+ + * | | page struct | No mapping + * v +--------------+ + * + * ----------------------------------------- + * | RELATION BETWEEN STRUCT PAGES AND PFNS| + * ----------------------------------------- + * + * vmemmap +--------------+ +---------------+ + * + | page struct | +-------------> | PFN | + * | +--------------+ +---------------+ + * | | page struct | +-------------> | PFN | + * | +--------------+ +---------------+ + * | | page struct | +-------------> | PFN | + * | +--------------+ +---------------+ + * | | page struct | +-------------> | PFN | + * | +--------------+ +---------------+ + * | | | + * | +--------------+ + * | | | + * | +--------------+ + * | | | + * | +--------------+ +---------------+ + * | | page struct | +-------------> | PFN | + * | +--------------+ +---------------+ + * | | | + * | +--------------+ + * | | | + * | +--------------+ +---------------+ + * | | page struct | +-------------> | PFN | + * | +--------------+ +---------------+ + * | | page struct | +-------------> | PFN | + * v +--------------+ +---------------+ + */ +/* * On hash-based CPUs, the vmemmap is bolted in the hash table. * */ @@ -109,7 +188,7 @@ unsigned long hash__pmd_hugepage_update(struct mm_struct *mm, unsigned long addr unsigned long old; #ifdef CONFIG_DEBUG_VM - WARN_ON(!pmd_trans_huge(*pmdp)); + WARN_ON(!hash__pmd_trans_huge(*pmdp) && !pmd_devmap(*pmdp)); assert_spin_locked(&mm->page_table_lock); #endif @@ -141,6 +220,7 @@ pmd_t hash__pmdp_collapse_flush(struct vm_area_struct *vma, unsigned long addres VM_BUG_ON(address & ~HPAGE_PMD_MASK); VM_BUG_ON(pmd_trans_huge(*pmdp)); + VM_BUG_ON(pmd_devmap(*pmdp)); pmd = *pmdp; pmd_clear(pmdp); @@ -221,6 +301,7 @@ void hash__pmdp_huge_split_prepare(struct vm_area_struct *vma, { VM_BUG_ON(address & ~HPAGE_PMD_MASK); VM_BUG_ON(REGION_ID(address) != USER_REGION_ID); + VM_BUG_ON(pmd_devmap(*pmdp)); /* * We can't mark the pmd none here, because that will cause a race @@ -342,3 +423,35 @@ int hash__has_transparent_hugepage(void) return 1; } #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ + +#ifdef CONFIG_STRICT_KERNEL_RWX +void hash__mark_rodata_ro(void) +{ + unsigned long start = (unsigned long)_stext; + unsigned long end = (unsigned long)__init_begin; + unsigned long idx; + unsigned int step, shift; + unsigned long newpp = PP_RXXX; + + shift = mmu_psize_defs[mmu_linear_psize].shift; + step = 1 << shift; + + start = ((start + step - 1) >> shift) << shift; + end = (end >> shift) << shift; + + pr_devel("marking ro start %lx, end %lx, step %x\n", + start, end, step); + + if (start == end) { + pr_warn("could not set rodata ro, relocate the start" + " of the kernel to a 0x%x boundary\n", step); + return; + } + + for (idx = start; idx < end; idx += step) + /* Not sure if we can do much with the return value */ + mmu_hash_ops.hpte_updateboltedpp(newpp, idx, mmu_linear_psize, + mmu_kernel_ssize); + +} +#endif diff --git a/arch/powerpc/mm/pgtable-radix.c b/arch/powerpc/mm/pgtable-radix.c index c28165d8970b..8c13e4282308 100644 --- a/arch/powerpc/mm/pgtable-radix.c +++ b/arch/powerpc/mm/pgtable-radix.c @@ -11,6 +11,7 @@ #include <linux/sched/mm.h> #include <linux/memblock.h> #include <linux/of_fdt.h> +#include <linux/mm.h> #include <asm/pgtable.h> #include <asm/pgalloc.h> @@ -19,6 +20,8 @@ #include <asm/mmu.h> #include <asm/firmware.h> #include <asm/powernv.h> +#include <asm/sections.h> +#include <asm/trace.h> #include <trace/events/thp.h> @@ -108,6 +111,49 @@ set_the_pte: return 0; } +#ifdef CONFIG_STRICT_KERNEL_RWX +void radix__mark_rodata_ro(void) +{ + unsigned long start = (unsigned long)_stext; + unsigned long end = (unsigned long)__init_begin; + unsigned long idx; + pgd_t *pgdp; + pud_t *pudp; + pmd_t *pmdp; + pte_t *ptep; + + start = ALIGN_DOWN(start, PAGE_SIZE); + end = PAGE_ALIGN(end); // aligns up + + pr_devel("marking ro start %lx, end %lx\n", start, end); + + for (idx = start; idx < end; idx += PAGE_SIZE) { + pgdp = pgd_offset_k(idx); + pudp = pud_alloc(&init_mm, pgdp, idx); + if (!pudp) + continue; + if (pud_huge(*pudp)) { + ptep = (pte_t *)pudp; + goto update_the_pte; + } + pmdp = pmd_alloc(&init_mm, pudp, idx); + if (!pmdp) + continue; + if (pmd_huge(*pmdp)) { + ptep = pmdp_ptep(pmdp); + goto update_the_pte; + } + ptep = pte_alloc_kernel(pmdp, idx); + if (!ptep) + continue; +update_the_pte: + radix__pte_update(&init_mm, idx, ptep, _PAGE_WRITE, 0, 0); + } + + radix__flush_tlb_kernel_range(start, end); +} +#endif /* CONFIG_STRICT_KERNEL_RWX */ + static inline void __meminit print_mapping(unsigned long start, unsigned long end, unsigned long size) @@ -121,7 +167,14 @@ static inline void __meminit print_mapping(unsigned long start, static int __meminit create_physical_mapping(unsigned long start, unsigned long end) { - unsigned long addr, mapping_size = 0; + unsigned long vaddr, addr, mapping_size = 0; + pgprot_t prot; + unsigned long max_mapping_size; +#ifdef CONFIG_STRICT_KERNEL_RWX + int split_text_mapping = 1; +#else + int split_text_mapping = 0; +#endif start = _ALIGN_UP(start, PAGE_SIZE); for (addr = start; addr < end; addr += mapping_size) { @@ -130,9 +183,12 @@ static int __meminit create_physical_mapping(unsigned long start, gap = end - addr; previous_size = mapping_size; + max_mapping_size = PUD_SIZE; +retry: if (IS_ALIGNED(addr, PUD_SIZE) && gap >= PUD_SIZE && - mmu_psize_defs[MMU_PAGE_1G].shift) + mmu_psize_defs[MMU_PAGE_1G].shift && + PUD_SIZE <= max_mapping_size) mapping_size = PUD_SIZE; else if (IS_ALIGNED(addr, PMD_SIZE) && gap >= PMD_SIZE && mmu_psize_defs[MMU_PAGE_2M].shift) @@ -140,13 +196,32 @@ static int __meminit create_physical_mapping(unsigned long start, else mapping_size = PAGE_SIZE; + if (split_text_mapping && (mapping_size == PUD_SIZE) && + (addr <= __pa_symbol(__init_begin)) && + (addr + mapping_size) >= __pa_symbol(_stext)) { + max_mapping_size = PMD_SIZE; + goto retry; + } + + if (split_text_mapping && (mapping_size == PMD_SIZE) && + (addr <= __pa_symbol(__init_begin)) && + (addr + mapping_size) >= __pa_symbol(_stext)) + mapping_size = PAGE_SIZE; + if (mapping_size != previous_size) { print_mapping(start, addr, previous_size); start = addr; } - rc = radix__map_kernel_page((unsigned long)__va(addr), addr, - PAGE_KERNEL_X, mapping_size); + vaddr = (unsigned long)__va(addr); + + if (overlaps_kernel_text(vaddr, vaddr + mapping_size) || + overlaps_interrupt_vector_text(vaddr, vaddr + mapping_size)) + prot = PAGE_KERNEL_X; + else + prot = PAGE_KERNEL; + + rc = radix__map_kernel_page(vaddr, addr, prot, mapping_size); if (rc) return rc; } @@ -190,6 +265,7 @@ static void __init radix_init_pgtable(void) asm volatile(PPC_TLBIE_5(%0,%1,2,1,1) : : "r" (TLBIEL_INVAL_SET_LPID), "r" (0)); asm volatile("eieio; tlbsync; ptesync" : : : "memory"); + trace_tlbie(0, 0, TLBIEL_INVAL_SET_LPID, 0, 2, 1, 1); } static void __init radix_init_partition_table(void) @@ -316,6 +392,9 @@ static void update_hid_for_radix(void) asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) : : "r"(rb), "i"(1), "i"(1), "i"(2), "r"(0) : "memory"); asm volatile("eieio; tlbsync; ptesync; isync; slbia": : :"memory"); + trace_tlbie(0, 0, rb, 0, 2, 0, 1); + trace_tlbie(0, 0, rb, 0, 2, 1, 1); + /* * now switch the HID */ @@ -683,7 +762,7 @@ unsigned long radix__pmd_hugepage_update(struct mm_struct *mm, unsigned long add unsigned long old; #ifdef CONFIG_DEBUG_VM - WARN_ON(!radix__pmd_trans_huge(*pmdp)); + WARN_ON(!radix__pmd_trans_huge(*pmdp) && !pmd_devmap(*pmdp)); assert_spin_locked(&mm->page_table_lock); #endif @@ -701,6 +780,7 @@ pmd_t radix__pmdp_collapse_flush(struct vm_area_struct *vma, unsigned long addre VM_BUG_ON(address & ~HPAGE_PMD_MASK); VM_BUG_ON(radix__pmd_trans_huge(*pmdp)); + VM_BUG_ON(pmd_devmap(*pmdp)); /* * khugepaged calls this for normal pmd */ diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c index a65c0b4c0669..a9e4bfc025bc 100644 --- a/arch/powerpc/mm/pgtable_32.c +++ b/arch/powerpc/mm/pgtable_32.c @@ -60,7 +60,7 @@ pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address) { struct page *ptepage; - gfp_t flags = GFP_KERNEL | __GFP_ZERO; + gfp_t flags = GFP_KERNEL | __GFP_ZERO | __GFP_ACCOUNT; ptepage = alloc_pages(flags, 0); if (!ptepage) @@ -189,7 +189,7 @@ __ioremap_caller(phys_addr_t addr, unsigned long size, unsigned long flags, err = 0; for (i = 0; i < size && err == 0; i += PAGE_SIZE) - err = map_page(v+i, p+i, flags); + err = map_kernel_page(v+i, p+i, flags); if (err) { if (slab_is_available()) vunmap((void *)v); @@ -215,7 +215,7 @@ void iounmap(volatile void __iomem *addr) } EXPORT_SYMBOL(iounmap); -int map_page(unsigned long va, phys_addr_t pa, int flags) +int map_kernel_page(unsigned long va, phys_addr_t pa, int flags) { pmd_t *pd; pte_t *pg; @@ -255,7 +255,7 @@ void __init __mapin_ram_chunk(unsigned long offset, unsigned long top) ktext = ((char *)v >= _stext && (char *)v < etext) || ((char *)v >= _sinittext && (char *)v < _einittext); f = ktext ? pgprot_val(PAGE_KERNEL_TEXT) : pgprot_val(PAGE_KERNEL); - map_page(v, p, f); + map_kernel_page(v, p, f); #ifdef CONFIG_PPC_STD_MMU_32 if (ktext) hash_preload(&init_mm, v, 0, 0x300); @@ -387,11 +387,6 @@ void __set_fixmap (enum fixed_addresses idx, phys_addr_t phys, pgprot_t flags) return; } - map_page(address, phys, pgprot_val(flags)); + map_kernel_page(address, phys, pgprot_val(flags)); fixmaps++; } - -void __this_fixmap_does_not_exist(void) -{ - WARN_ON(1); -} diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c index db93cf747a03..5c0b795d656c 100644 --- a/arch/powerpc/mm/pgtable_64.c +++ b/arch/powerpc/mm/pgtable_64.c @@ -47,6 +47,7 @@ #include <asm/smp.h> #include <asm/machdep.h> #include <asm/tlb.h> +#include <asm/trace.h> #include <asm/processor.h> #include <asm/cputable.h> #include <asm/sections.h> @@ -323,7 +324,7 @@ struct page *pud_page(pud_t pud) */ struct page *pmd_page(pmd_t pmd) { - if (pmd_trans_huge(pmd) || pmd_huge(pmd)) + if (pmd_trans_huge(pmd) || pmd_huge(pmd) || pmd_devmap(pmd)) return pte_page(pmd_pte(pmd)); return virt_to_page(pmd_page_vaddr(pmd)); } @@ -351,12 +352,20 @@ static pte_t *get_from_cache(struct mm_struct *mm) static pte_t *__alloc_for_cache(struct mm_struct *mm, int kernel) { void *ret = NULL; - struct page *page = alloc_page(GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO); - if (!page) - return NULL; - if (!kernel && !pgtable_page_ctor(page)) { - __free_page(page); - return NULL; + struct page *page; + + if (!kernel) { + page = alloc_page(PGALLOC_GFP | __GFP_ACCOUNT); + if (!page) + return NULL; + if (!pgtable_page_ctor(page)) { + __free_page(page); + return NULL; + } + } else { + page = alloc_page(PGALLOC_GFP); + if (!page) + return NULL; } ret = page_address(page); @@ -469,13 +478,31 @@ void mmu_partition_table_set_entry(unsigned int lpid, unsigned long dw0, * use of this partition ID was, not the new use. */ asm volatile("ptesync" : : : "memory"); - if (old & PATB_HR) + if (old & PATB_HR) { asm volatile(PPC_TLBIE_5(%0,%1,2,0,1) : : "r" (TLBIEL_INVAL_SET_LPID), "r" (lpid)); - else + trace_tlbie(lpid, 0, TLBIEL_INVAL_SET_LPID, lpid, 2, 0, 1); + } else { asm volatile(PPC_TLBIE_5(%0,%1,2,0,0) : : "r" (TLBIEL_INVAL_SET_LPID), "r" (lpid)); + trace_tlbie(lpid, 0, TLBIEL_INVAL_SET_LPID, lpid, 2, 0, 0); + } asm volatile("eieio; tlbsync; ptesync" : : : "memory"); } EXPORT_SYMBOL_GPL(mmu_partition_table_set_entry); #endif /* CONFIG_PPC_BOOK3S_64 */ + +#ifdef CONFIG_STRICT_KERNEL_RWX +void mark_rodata_ro(void) +{ + if (!mmu_has_feature(MMU_FTR_KERNEL_RO)) { + pr_warn("Warning: Unable to mark rodata read only on this CPU.\n"); + return; + } + + if (radix_enabled()) + radix__mark_rodata_ro(); + else + hash__mark_rodata_ro(); +} +#endif diff --git a/arch/powerpc/mm/slb.c b/arch/powerpc/mm/slb.c index 654a0d7ba0e7..13cfe413b40d 100644 --- a/arch/powerpc/mm/slb.c +++ b/arch/powerpc/mm/slb.c @@ -33,15 +33,7 @@ enum slb_index { KSTACK_INDEX = 2, /* Kernel stack map */ }; -extern void slb_allocate_realmode(unsigned long ea); - -static void slb_allocate(unsigned long ea) -{ - /* Currently, we do real mode for all SLBs including user, but - * that will change if we bring back dynamic VSIDs - */ - slb_allocate_realmode(ea); -} +extern void slb_allocate(unsigned long ea); #define slb_esid_mask(ssize) \ (((ssize) == MMU_SEGSIZE_256M)? ESID_MASK: ESID_MASK_1T) diff --git a/arch/powerpc/mm/slb_low.S b/arch/powerpc/mm/slb_low.S index 1519617aab36..bde378559d01 100644 --- a/arch/powerpc/mm/slb_low.S +++ b/arch/powerpc/mm/slb_low.S @@ -65,14 +65,15 @@ MMU_FTR_SECTION_ELSE \ ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_68_BIT_VA) -/* void slb_allocate_realmode(unsigned long ea); +/* void slb_allocate(unsigned long ea); * * Create an SLB entry for the given EA (user or kernel). * r3 = faulting address, r13 = PACA * r9, r10, r11 are clobbered by this function + * r3 is preserved. * No other registers are examined or changed. */ -_GLOBAL(slb_allocate_realmode) +_GLOBAL(slb_allocate) /* * check for bad kernel/user address * (ea & ~REGION_MASK) >= PGTABLE_RANGE @@ -235,6 +236,9 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_1T_SEGMENT) * dont have any LRU information to help us choose a slot. */ + mr r9,r3 + + /* slb_finish_load_1T continues here. r9=EA with non-ESID bits clear */ 7: ld r10,PACASTABRR(r13) addi r10,r10,1 /* This gets soft patched on boot. */ @@ -249,10 +253,10 @@ slb_compare_rr_to_size: std r10,PACASTABRR(r13) 3: - rldimi r3,r10,0,36 /* r3= EA[0:35] | entry */ - oris r10,r3,SLB_ESID_V@h /* r3 |= SLB_ESID_V */ + rldimi r9,r10,0,36 /* r9 = EA[0:35] | entry */ + oris r10,r9,SLB_ESID_V@h /* r10 = r9 | SLB_ESID_V */ - /* r3 = ESID data, r11 = VSID data */ + /* r9 = ESID data, r11 = VSID data */ /* * No need for an isync before or after this slbmte. The exception @@ -265,21 +269,21 @@ slb_compare_rr_to_size: bgelr cr7 /* Update the slb cache */ - lhz r3,PACASLBCACHEPTR(r13) /* offset = paca->slb_cache_ptr */ - cmpldi r3,SLB_CACHE_ENTRIES + lhz r9,PACASLBCACHEPTR(r13) /* offset = paca->slb_cache_ptr */ + cmpldi r9,SLB_CACHE_ENTRIES bge 1f /* still room in the slb cache */ - sldi r11,r3,2 /* r11 = offset * sizeof(u32) */ + sldi r11,r9,2 /* r11 = offset * sizeof(u32) */ srdi r10,r10,28 /* get the 36 bits of the ESID */ add r11,r11,r13 /* r11 = (u32 *)paca + offset */ stw r10,PACASLBCACHE(r11) /* paca->slb_cache[offset] = esid */ - addi r3,r3,1 /* offset++ */ + addi r9,r9,1 /* offset++ */ b 2f 1: /* offset >= SLB_CACHE_ENTRIES */ - li r3,SLB_CACHE_ENTRIES+1 + li r9,SLB_CACHE_ENTRIES+1 2: - sth r3,PACASLBCACHEPTR(r13) /* paca->slb_cache_ptr = offset */ + sth r9,PACASLBCACHEPTR(r13) /* paca->slb_cache_ptr = offset */ crclr 4*cr0+eq /* set result to "success" */ blr @@ -301,11 +305,11 @@ slb_compare_rr_to_size: rldimi r11,r10,SLB_VSID_SSIZE_SHIFT,0 /* insert segment size */ /* r3 = EA, r11 = VSID data */ - clrrdi r3,r3,SID_SHIFT_1T /* clear out non-ESID bits */ + clrrdi r9,r3,SID_SHIFT_1T /* clear out non-ESID bits */ b 7b -_ASM_NOKPROBE_SYMBOL(slb_allocate_realmode) +_ASM_NOKPROBE_SYMBOL(slb_allocate) _ASM_NOKPROBE_SYMBOL(slb_miss_kernel_load_linear) _ASM_NOKPROBE_SYMBOL(slb_miss_kernel_load_io) _ASM_NOKPROBE_SYMBOL(slb_compare_rr_to_size) diff --git a/arch/powerpc/mm/tlb-radix.c b/arch/powerpc/mm/tlb-radix.c index 02e71402fdd3..744e0164ecf5 100644 --- a/arch/powerpc/mm/tlb-radix.c +++ b/arch/powerpc/mm/tlb-radix.c @@ -16,6 +16,7 @@ #include <asm/tlb.h> #include <asm/tlbflush.h> +#include <asm/trace.h> #define RIC_FLUSH_TLB 0 @@ -35,6 +36,7 @@ static inline void __tlbiel_pid(unsigned long pid, int set, asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1) : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); + trace_tlbie(0, 1, rb, rs, ric, prs, r); } /* @@ -87,6 +89,7 @@ static inline void _tlbie_pid(unsigned long pid, unsigned long ric) asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); asm volatile("eieio; tlbsync; ptesync": : :"memory"); + trace_tlbie(0, 0, rb, rs, ric, prs, r); } static inline void _tlbiel_va(unsigned long va, unsigned long pid, @@ -104,6 +107,7 @@ static inline void _tlbiel_va(unsigned long va, unsigned long pid, asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1) : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); asm volatile("ptesync": : :"memory"); + trace_tlbie(0, 1, rb, rs, ric, prs, r); } static inline void _tlbie_va(unsigned long va, unsigned long pid, @@ -121,6 +125,7 @@ static inline void _tlbie_va(unsigned long va, unsigned long pid, asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); asm volatile("eieio; tlbsync; ptesync": : :"memory"); + trace_tlbie(0, 0, rb, rs, ric, prs, r); } /* @@ -377,6 +382,7 @@ void radix__flush_tlb_lpid_va(unsigned long lpid, unsigned long gpa, asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); asm volatile("eieio; tlbsync; ptesync": : :"memory"); + trace_tlbie(lpid, 0, rb, rs, ric, prs, r); } EXPORT_SYMBOL(radix__flush_tlb_lpid_va); @@ -394,6 +400,7 @@ void radix__flush_tlb_lpid(unsigned long lpid) asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); asm volatile("eieio; tlbsync; ptesync": : :"memory"); + trace_tlbie(lpid, 0, rb, rs, ric, prs, r); } EXPORT_SYMBOL(radix__flush_tlb_lpid); @@ -420,12 +427,14 @@ void radix__flush_tlb_all(void) */ asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) : : "r"(rb), "i"(r), "i"(1), "i"(ric), "r"(rs) : "memory"); + trace_tlbie(0, 0, rb, rs, ric, prs, r); /* * now flush host entires by passing PRS = 0 and LPID == 0 */ asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(0) : "memory"); asm volatile("eieio; tlbsync; ptesync": : :"memory"); + trace_tlbie(0, 0, rb, 0, ric, prs, r); } void radix__flush_tlb_pte_p9_dd1(unsigned long old_pte, struct mm_struct *mm, diff --git a/arch/powerpc/mm/tlb_hash64.c b/arch/powerpc/mm/tlb_hash64.c index 4517aa43a8b1..b5b0fb97b9c0 100644 --- a/arch/powerpc/mm/tlb_hash64.c +++ b/arch/powerpc/mm/tlb_hash64.c @@ -93,12 +93,10 @@ void hpte_need_flush(struct mm_struct *mm, unsigned long addr, /* * Check if we have an active batch on this CPU. If not, just - * flush now and return. For now, we don global invalidates - * in that case, might be worth testing the mm cpu mask though - * and decide to use local invalidates instead... + * flush now and return. */ if (!batch->active) { - flush_hash_page(vpn, rpte, psize, ssize, 0); + flush_hash_page(vpn, rpte, psize, ssize, mm_is_thread_local(mm)); put_cpu_var(ppc64_tlb_batch); return; } |