From a6f197f889ce0a5fd583674d9fa39259f5c8f72f Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Tue, 24 Sep 2019 09:54:40 +0530 Subject: powerpc/book3s64: Export has_transparent_hugepage() related functions. In later patch, we want to use hash_transparent_hugepage() in a kernel module. Export two related functions. Acked-by: Michael Ellerman Signed-off-by: Aneesh Kumar K.V Link: https://lore.kernel.org/r/20190924042440.27946-1-aneesh.kumar@linux.ibm.com Signed-off-by: Dan Williams --- arch/powerpc/include/asm/book3s/64/radix.h | 8 +++++++- arch/powerpc/mm/book3s64/hash_pgtable.c | 2 ++ arch/powerpc/mm/book3s64/radix_pgtable.c | 7 ------- 3 files changed, 9 insertions(+), 8 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/include/asm/book3s/64/radix.h b/arch/powerpc/include/asm/book3s/64/radix.h index e04a839cb5b9..65a6966f1de4 100644 --- a/arch/powerpc/include/asm/book3s/64/radix.h +++ b/arch/powerpc/include/asm/book3s/64/radix.h @@ -254,7 +254,13 @@ extern void radix__pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp, extern pgtable_t radix__pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp); extern pmd_t radix__pmdp_huge_get_and_clear(struct mm_struct *mm, unsigned long addr, pmd_t *pmdp); -extern int radix__has_transparent_hugepage(void); +static inline int radix__has_transparent_hugepage(void) +{ + /* For radix 2M at PMD level means thp */ + if (mmu_psize_defs[MMU_PAGE_2M].shift == PMD_SHIFT) + return 1; + return 0; +} #endif extern int __meminit radix__vmemmap_create_mapping(unsigned long start, diff --git a/arch/powerpc/mm/book3s64/hash_pgtable.c b/arch/powerpc/mm/book3s64/hash_pgtable.c index d1f390ac9cdb..64733b9cb20a 100644 --- a/arch/powerpc/mm/book3s64/hash_pgtable.c +++ b/arch/powerpc/mm/book3s64/hash_pgtable.c @@ -406,6 +406,8 @@ int hash__has_transparent_hugepage(void) return 1; } +EXPORT_SYMBOL_GPL(hash__has_transparent_hugepage); + #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ #ifdef CONFIG_STRICT_KERNEL_RWX diff --git a/arch/powerpc/mm/book3s64/radix_pgtable.c b/arch/powerpc/mm/book3s64/radix_pgtable.c index b4ca9e95e678..dc7a38f0a45b 100644 --- a/arch/powerpc/mm/book3s64/radix_pgtable.c +++ b/arch/powerpc/mm/book3s64/radix_pgtable.c @@ -1057,13 +1057,6 @@ pmd_t radix__pmdp_huge_get_and_clear(struct mm_struct *mm, return old_pmd; } -int radix__has_transparent_hugepage(void) -{ - /* For radix 2M at PMD level means thp */ - if (mmu_psize_defs[MMU_PAGE_2M].shift == PMD_SHIFT) - return 1; - return 0; -} #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ void radix__ptep_set_access_flags(struct vm_area_struct *vma, pte_t *ptep, -- cgit v1.2.3-58-ga151 From cf387d9644d8c78721cf9b77af9f67bb5b04da16 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Tue, 10 Sep 2019 11:58:25 +0530 Subject: libnvdimm/altmap: Track namespace boundaries in altmap With PFN_MODE_PMEM namespace, the memmap area is allocated from the device area. Some architectures map the memmap area with large page size. On architectures like ppc64, 16MB page for memap mapping can map 262144 pfns. This maps a namespace size of 16G. When populating memmap region with 16MB page from the device area, make sure the allocated space is not used to map resources outside this namespace. Such usage of device area will prevent a namespace destroy. Add resource end pnf in altmap and use that to check if the memmap area allocation can map pfn outside the namespace. On ppc64 in such case we fallback to allocation from memory. This fix kernel crash reported below: [ 132.034989] WARNING: CPU: 13 PID: 13719 at mm/memremap.c:133 devm_memremap_pages_release+0x2d8/0x2e0 [ 133.464754] BUG: Unable to handle kernel data access at 0xc00c00010b204000 [ 133.464760] Faulting instruction address: 0xc00000000007580c [ 133.464766] Oops: Kernel access of bad area, sig: 11 [#1] [ 133.464771] LE PAGE_SIZE=64K MMU=Hash SMP NR_CPUS=2048 NUMA pSeries ..... [ 133.464901] NIP [c00000000007580c] vmemmap_free+0x2ac/0x3d0 [ 133.464906] LR [c0000000000757f8] vmemmap_free+0x298/0x3d0 [ 133.464910] Call Trace: [ 133.464914] [c000007cbfd0f7b0] [c0000000000757f8] vmemmap_free+0x298/0x3d0 (unreliable) [ 133.464921] [c000007cbfd0f8d0] [c000000000370a44] section_deactivate+0x1a4/0x240 [ 133.464928] [c000007cbfd0f980] [c000000000386270] __remove_pages+0x3a0/0x590 [ 133.464935] [c000007cbfd0fa50] [c000000000074158] arch_remove_memory+0x88/0x160 [ 133.464942] [c000007cbfd0fae0] [c0000000003be8c0] devm_memremap_pages_release+0x150/0x2e0 [ 133.464949] [c000007cbfd0fb70] [c000000000738ea0] devm_action_release+0x30/0x50 [ 133.464955] [c000007cbfd0fb90] [c00000000073a5a4] release_nodes+0x344/0x400 [ 133.464961] [c000007cbfd0fc40] [c00000000073378c] device_release_driver_internal+0x15c/0x250 [ 133.464968] [c000007cbfd0fc80] [c00000000072fd14] unbind_store+0x104/0x110 [ 133.464973] [c000007cbfd0fcd0] [c00000000072ee24] drv_attr_store+0x44/0x70 [ 133.464981] [c000007cbfd0fcf0] [c0000000004a32bc] sysfs_kf_write+0x6c/0xa0 [ 133.464987] [c000007cbfd0fd10] [c0000000004a1dfc] kernfs_fop_write+0x17c/0x250 [ 133.464993] [c000007cbfd0fd60] [c0000000003c348c] __vfs_write+0x3c/0x70 [ 133.464999] [c000007cbfd0fd80] [c0000000003c75d0] vfs_write+0xd0/0x250 djbw: Aneesh notes that this crash can likely be triggered in any kernel that supports 'papr_scm', so flagging that commit for -stable consideration. Fixes: b5beae5e224f ("powerpc/pseries: Add driver for PAPR SCM regions") Cc: Reported-by: Sachin Sant Signed-off-by: Aneesh Kumar K.V Reviewed-by: Pankaj Gupta Tested-by: Santosh Sivaraj Reviewed-by: Johannes Thumshirn Link: https://lore.kernel.org/r/20190910062826.10041-1-aneesh.kumar@linux.ibm.com Signed-off-by: Dan Williams --- arch/powerpc/mm/init_64.c | 17 ++++++++++++++++- drivers/nvdimm/pfn_devs.c | 2 ++ include/linux/memremap.h | 1 + 3 files changed, 19 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c index a44f6281ca3a..4e08246acd79 100644 --- a/arch/powerpc/mm/init_64.c +++ b/arch/powerpc/mm/init_64.c @@ -172,6 +172,21 @@ static __meminit void vmemmap_list_populate(unsigned long phys, vmemmap_list = vmem_back; } +static bool altmap_cross_boundary(struct vmem_altmap *altmap, unsigned long start, + unsigned long page_size) +{ + unsigned long nr_pfn = page_size / sizeof(struct page); + unsigned long start_pfn = page_to_pfn((struct page *)start); + + if ((start_pfn + nr_pfn) > altmap->end_pfn) + return true; + + if (start_pfn < altmap->base_pfn) + return true; + + return false; +} + int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node, struct vmem_altmap *altmap) { @@ -194,7 +209,7 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node, * fail due to alignment issues when using 16MB hugepages, so * fall back to system memory if the altmap allocation fail. */ - if (altmap) { + if (altmap && !altmap_cross_boundary(altmap, start, page_size)) { p = altmap_alloc_block_buf(page_size, altmap); if (!p) pr_debug("altmap block allocation failed, falling back to system memory"); diff --git a/drivers/nvdimm/pfn_devs.c b/drivers/nvdimm/pfn_devs.c index 934cdcaaae97..80c7992bc538 100644 --- a/drivers/nvdimm/pfn_devs.c +++ b/drivers/nvdimm/pfn_devs.c @@ -672,9 +672,11 @@ static int __nvdimm_setup_pfn(struct nd_pfn *nd_pfn, struct dev_pagemap *pgmap) struct nd_namespace_common *ndns = nd_pfn->ndns; struct nd_namespace_io *nsio = to_nd_namespace_io(&ndns->dev); resource_size_t base = nsio->res.start + start_pad; + resource_size_t end = nsio->res.end - end_trunc; struct vmem_altmap __altmap = { .base_pfn = init_altmap_base(base), .reserve = init_altmap_reserve(base), + .end_pfn = PHYS_PFN(end), }; memcpy(res, &nsio->res, sizeof(*res)); diff --git a/include/linux/memremap.h b/include/linux/memremap.h index f8a5b2a19945..c70996fe48c8 100644 --- a/include/linux/memremap.h +++ b/include/linux/memremap.h @@ -17,6 +17,7 @@ struct device; */ struct vmem_altmap { const unsigned long base_pfn; + const unsigned long end_pfn; const unsigned long reserve; unsigned long free; unsigned long align; -- cgit v1.2.3-58-ga151