diff options
Diffstat (limited to 'mm/sparse-vmemmap.c')
-rw-r--r-- | mm/sparse-vmemmap.c | 176 |
1 files changed, 154 insertions, 22 deletions
diff --git a/mm/sparse-vmemmap.c b/mm/sparse-vmemmap.c index 8aecd6b3896c..f4fa61dbbee3 100644 --- a/mm/sparse-vmemmap.c +++ b/mm/sparse-vmemmap.c @@ -34,7 +34,7 @@ #include <asm/pgalloc.h> #include <asm/tlbflush.h> -#ifdef CONFIG_HUGETLB_PAGE_FREE_VMEMMAP +#ifdef CONFIG_HUGETLB_PAGE_OPTIMIZE_VMEMMAP /** * struct vmemmap_remap_walk - walk vmemmap page table * @@ -420,7 +420,7 @@ int vmemmap_remap_alloc(unsigned long start, unsigned long end, return 0; } -#endif /* CONFIG_HUGETLB_PAGE_FREE_VMEMMAP */ +#endif /* CONFIG_HUGETLB_PAGE_OPTIMIZE_VMEMMAP */ /* * Allocate a block of memory to be used to back the virtual memory map @@ -533,16 +533,31 @@ void __meminit vmemmap_verify(pte_t *pte, int node, } pte_t * __meminit vmemmap_pte_populate(pmd_t *pmd, unsigned long addr, int node, - struct vmem_altmap *altmap) + struct vmem_altmap *altmap, + struct page *reuse) { pte_t *pte = pte_offset_kernel(pmd, addr); if (pte_none(*pte)) { pte_t entry; void *p; - p = vmemmap_alloc_block_buf(PAGE_SIZE, node, altmap); - if (!p) - return NULL; + if (!reuse) { + p = vmemmap_alloc_block_buf(PAGE_SIZE, node, altmap); + if (!p) + return NULL; + } else { + /* + * When a PTE/PMD entry is freed from the init_mm + * there's a a free_pages() call to this page allocated + * above. Thus this get_page() is paired with the + * put_page_testzero() on the freeing path. + * This can only called by certain ZONE_DEVICE path, + * and through vmemmap_populate_compound_pages() when + * slab is available. + */ + get_page(reuse); + p = page_to_virt(reuse); + } entry = pfn_pte(__pa(p) >> PAGE_SHIFT, PAGE_KERNEL); set_pte_at(&init_mm, addr, pte, entry); } @@ -608,49 +623,166 @@ pgd_t * __meminit vmemmap_pgd_populate(unsigned long addr, int node) return pgd; } -int __meminit vmemmap_populate_basepages(unsigned long start, unsigned long end, - int node, struct vmem_altmap *altmap) +static pte_t * __meminit vmemmap_populate_address(unsigned long addr, int node, + struct vmem_altmap *altmap, + struct page *reuse) { - unsigned long addr = start; pgd_t *pgd; p4d_t *p4d; pud_t *pud; pmd_t *pmd; pte_t *pte; + pgd = vmemmap_pgd_populate(addr, node); + if (!pgd) + return NULL; + p4d = vmemmap_p4d_populate(pgd, addr, node); + if (!p4d) + return NULL; + pud = vmemmap_pud_populate(p4d, addr, node); + if (!pud) + return NULL; + pmd = vmemmap_pmd_populate(pud, addr, node); + if (!pmd) + return NULL; + pte = vmemmap_pte_populate(pmd, addr, node, altmap, reuse); + if (!pte) + return NULL; + vmemmap_verify(pte, node, addr, addr + PAGE_SIZE); + + return pte; +} + +static int __meminit vmemmap_populate_range(unsigned long start, + unsigned long end, int node, + struct vmem_altmap *altmap, + struct page *reuse) +{ + unsigned long addr = start; + pte_t *pte; + for (; addr < end; addr += PAGE_SIZE) { - pgd = vmemmap_pgd_populate(addr, node); - if (!pgd) - return -ENOMEM; - p4d = vmemmap_p4d_populate(pgd, addr, node); - if (!p4d) + pte = vmemmap_populate_address(addr, node, altmap, reuse); + if (!pte) return -ENOMEM; - pud = vmemmap_pud_populate(p4d, addr, node); - if (!pud) + } + + return 0; +} + +int __meminit vmemmap_populate_basepages(unsigned long start, unsigned long end, + int node, struct vmem_altmap *altmap) +{ + return vmemmap_populate_range(start, end, node, altmap, NULL); +} + +/* + * For compound pages bigger than section size (e.g. x86 1G compound + * pages with 2M subsection size) fill the rest of sections as tail + * pages. + * + * Note that memremap_pages() resets @nr_range value and will increment + * it after each range successful onlining. Thus the value or @nr_range + * at section memmap populate corresponds to the in-progress range + * being onlined here. + */ +static bool __meminit reuse_compound_section(unsigned long start_pfn, + struct dev_pagemap *pgmap) +{ + unsigned long nr_pages = pgmap_vmemmap_nr(pgmap); + unsigned long offset = start_pfn - + PHYS_PFN(pgmap->ranges[pgmap->nr_range].start); + + return !IS_ALIGNED(offset, nr_pages) && nr_pages > PAGES_PER_SUBSECTION; +} + +static pte_t * __meminit compound_section_tail_page(unsigned long addr) +{ + pte_t *pte; + + addr -= PAGE_SIZE; + + /* + * Assuming sections are populated sequentially, the previous section's + * page data can be reused. + */ + pte = pte_offset_kernel(pmd_off_k(addr), addr); + if (!pte) + return NULL; + + return pte; +} + +static int __meminit vmemmap_populate_compound_pages(unsigned long start_pfn, + unsigned long start, + unsigned long end, int node, + struct dev_pagemap *pgmap) +{ + unsigned long size, addr; + pte_t *pte; + int rc; + + if (reuse_compound_section(start_pfn, pgmap)) { + pte = compound_section_tail_page(start); + if (!pte) return -ENOMEM; - pmd = vmemmap_pmd_populate(pud, addr, node); - if (!pmd) + + /* + * Reuse the page that was populated in the prior iteration + * with just tail struct pages. + */ + return vmemmap_populate_range(start, end, node, NULL, + pte_page(*pte)); + } + + size = min(end - start, pgmap_vmemmap_nr(pgmap) * sizeof(struct page)); + for (addr = start; addr < end; addr += size) { + unsigned long next = addr, last = addr + size; + + /* Populate the head page vmemmap page */ + pte = vmemmap_populate_address(addr, node, NULL, NULL); + if (!pte) return -ENOMEM; - pte = vmemmap_pte_populate(pmd, addr, node, altmap); + + /* Populate the tail pages vmemmap page */ + next = addr + PAGE_SIZE; + pte = vmemmap_populate_address(next, node, NULL, NULL); if (!pte) return -ENOMEM; - vmemmap_verify(pte, node, addr, addr + PAGE_SIZE); + + /* + * Reuse the previous page for the rest of tail pages + * See layout diagram in Documentation/vm/vmemmap_dedup.rst + */ + next += PAGE_SIZE; + rc = vmemmap_populate_range(next, last, node, NULL, + pte_page(*pte)); + if (rc) + return -ENOMEM; } return 0; } struct page * __meminit __populate_section_memmap(unsigned long pfn, - unsigned long nr_pages, int nid, struct vmem_altmap *altmap) + unsigned long nr_pages, int nid, struct vmem_altmap *altmap, + struct dev_pagemap *pgmap) { unsigned long start = (unsigned long) pfn_to_page(pfn); unsigned long end = start + nr_pages * sizeof(struct page); + int r; if (WARN_ON_ONCE(!IS_ALIGNED(pfn, PAGES_PER_SUBSECTION) || !IS_ALIGNED(nr_pages, PAGES_PER_SUBSECTION))) return NULL; - if (vmemmap_populate(start, end, nid, altmap)) + if (is_power_of_2(sizeof(struct page)) && + pgmap && pgmap_vmemmap_nr(pgmap) > 1 && !altmap) + r = vmemmap_populate_compound_pages(pfn, start, end, nid, pgmap); + else + r = vmemmap_populate(start, end, nid, altmap); + + if (r < 0) return NULL; return pfn_to_page(pfn); |