diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2024-07-18 14:48:11 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2024-07-18 14:48:11 -0700 |
commit | b2fc97c18614f99179700be263ecbc667c91a4e8 (patch) | |
tree | 13914a5bb2fcef7691c84796ab5a36459b949873 /mm | |
parent | 68b59730459e5d1fe4e0bbeb04ceb9df0f002270 (diff) | |
parent | 9364a7e40d54e6858479f0a96e1a04aa1204be16 (diff) |
Merge tag 'memblock-v6.11-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/rppt/memblock
Pull memblock updates from Mike Rapoport:
- 'reserve_mem' command line parameter to allow creation of named
memory reservation at boot time.
The driving use-case is to improve the ability of pstore to retain
ramoops data across reboots.
- cleanups and small improvements in memblock and mm_init
- new tests cases in memblock test suite
* tag 'memblock-v6.11-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/rppt/memblock:
memblock tests: fix implicit declaration of function 'numa_valid_node'
memblock: Move late alloc warning down to phys alloc
pstore/ramoops: Add ramoops.mem_name= command line option
mm/memblock: Add "reserve_mem" to reserved named memory at boot up
mm/mm_init.c: don't initialize page->lru again
mm/mm_init.c: not always search next deferred_init_pfn from very beginning
mm/mm_init.c: use deferred_init_mem_pfn_range_in_zone() to decide loop condition
mm/mm_init.c: get the highest zone directly
mm/mm_init.c: move nr_initialised reset down a bit
mm/memblock: fix a typo in description of for_each_mem_region()
mm/mm_init.c: use memblock_region_memory_base_pfn() to get startpfn
mm/memblock: use PAGE_ALIGN_DOWN to get pgend in free_memmap
mm/memblock: return true directly on finding overlap region
memblock tests: add memblock_overlaps_region_checks
mm/memblock: fix comment for memblock_isolate_range()
memblock tests: add memblock_reserve_many_may_conflict_check()
memblock tests: add memblock_reserve_all_locations_check()
mm/memblock: remove empty dummy entry
Diffstat (limited to 'mm')
-rw-r--r-- | mm/memblock.c | 151 | ||||
-rw-r--r-- | mm/mm_init.c | 69 |
2 files changed, 166 insertions, 54 deletions
diff --git a/mm/memblock.c b/mm/memblock.c index e81fb68f7f88..3b9dc2d89b8a 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -114,12 +114,10 @@ static struct memblock_region memblock_physmem_init_regions[INIT_PHYSMEM_REGIONS struct memblock memblock __initdata_memblock = { .memory.regions = memblock_memory_init_regions, - .memory.cnt = 1, /* empty dummy entry */ .memory.max = INIT_MEMBLOCK_MEMORY_REGIONS, .memory.name = "memory", .reserved.regions = memblock_reserved_init_regions, - .reserved.cnt = 1, /* empty dummy entry */ .reserved.max = INIT_MEMBLOCK_RESERVED_REGIONS, .reserved.name = "reserved", @@ -130,7 +128,6 @@ struct memblock memblock __initdata_memblock = { #ifdef CONFIG_HAVE_MEMBLOCK_PHYS_MAP struct memblock_type physmem = { .regions = memblock_physmem_init_regions, - .cnt = 1, /* empty dummy entry */ .max = INIT_PHYSMEM_REGIONS, .name = "physmem", }; @@ -197,8 +194,8 @@ bool __init_memblock memblock_overlaps_region(struct memblock_type *type, for (i = 0; i < type->cnt; i++) if (memblock_addrs_overlap(base, size, type->regions[i].base, type->regions[i].size)) - break; - return i < type->cnt; + return true; + return false; } /** @@ -356,7 +353,6 @@ static void __init_memblock memblock_remove_region(struct memblock_type *type, u /* Special case for empty arrays */ if (type->cnt == 0) { WARN_ON(type->total_size != 0); - type->cnt = 1; type->regions[0].base = 0; type->regions[0].size = 0; type->regions[0].flags = 0; @@ -600,12 +596,13 @@ static int __init_memblock memblock_add_range(struct memblock_type *type, /* special case for empty array */ if (type->regions[0].size == 0) { - WARN_ON(type->cnt != 1 || type->total_size); + WARN_ON(type->cnt != 0 || type->total_size); type->regions[0].base = base; type->regions[0].size = size; type->regions[0].flags = flags; memblock_set_region_node(&type->regions[0], nid); type->total_size = size; + type->cnt = 1; return 0; } @@ -780,7 +777,8 @@ bool __init_memblock memblock_validate_numa_coverage(unsigned long threshold_byt * Walk @type and ensure that regions don't cross the boundaries defined by * [@base, @base + @size). Crossing regions are split at the boundaries, * which may create at most two more regions. The index of the first - * region inside the range is returned in *@start_rgn and end in *@end_rgn. + * region inside the range is returned in *@start_rgn and the index of the + * first region after the range is returned in *@end_rgn. * * Return: * 0 on success, -errno on failure. @@ -1441,6 +1439,17 @@ phys_addr_t __init memblock_alloc_range_nid(phys_addr_t size, enum memblock_flags flags = choose_memblock_flags(); phys_addr_t found; + /* + * Detect any accidental use of these APIs after slab is ready, as at + * this moment memblock may be deinitialized already and its + * internal data may be destroyed (after execution of memblock_free_all) + */ + if (WARN_ON_ONCE(slab_is_available())) { + void *vaddr = kzalloc_node(size, GFP_NOWAIT, nid); + + return vaddr ? virt_to_phys(vaddr) : 0; + } + if (!align) { /* Can't use WARNs this early in boot on powerpc */ dump_stack(); @@ -1566,13 +1575,6 @@ static void * __init memblock_alloc_internal( { phys_addr_t alloc; - /* - * Detect any accidental use of these APIs after slab is ready, as at - * this moment memblock may be deinitialized already and its - * internal data may be destroyed (after execution of memblock_free_all) - */ - if (WARN_ON_ONCE(slab_is_available())) - return kzalloc_node(size, GFP_NOWAIT, nid); if (max_addr > memblock.current_limit) max_addr = memblock.current_limit; @@ -2031,7 +2033,7 @@ static void __init free_memmap(unsigned long start_pfn, unsigned long end_pfn) * downwards. */ pg = PAGE_ALIGN(__pa(start_pg)); - pgend = __pa(end_pg) & PAGE_MASK; + pgend = PAGE_ALIGN_DOWN(__pa(end_pg)); /* * If there are free pages between these, free the section of the @@ -2234,6 +2236,123 @@ void __init memblock_free_all(void) totalram_pages_add(pages); } +/* Keep a table to reserve named memory */ +#define RESERVE_MEM_MAX_ENTRIES 8 +#define RESERVE_MEM_NAME_SIZE 16 +struct reserve_mem_table { + char name[RESERVE_MEM_NAME_SIZE]; + phys_addr_t start; + phys_addr_t size; +}; +static struct reserve_mem_table reserved_mem_table[RESERVE_MEM_MAX_ENTRIES]; +static int reserved_mem_count; + +/* Add wildcard region with a lookup name */ +static void __init reserved_mem_add(phys_addr_t start, phys_addr_t size, + const char *name) +{ + struct reserve_mem_table *map; + + map = &reserved_mem_table[reserved_mem_count++]; + map->start = start; + map->size = size; + strscpy(map->name, name); +} + +/** + * reserve_mem_find_by_name - Find reserved memory region with a given name + * @name: The name that is attached to a reserved memory region + * @start: If found, holds the start address + * @size: If found, holds the size of the address. + * + * @start and @size are only updated if @name is found. + * + * Returns: 1 if found or 0 if not found. + */ +int reserve_mem_find_by_name(const char *name, phys_addr_t *start, phys_addr_t *size) +{ + struct reserve_mem_table *map; + int i; + + for (i = 0; i < reserved_mem_count; i++) { + map = &reserved_mem_table[i]; + if (!map->size) + continue; + if (strcmp(name, map->name) == 0) { + *start = map->start; + *size = map->size; + return 1; + } + } + return 0; +} +EXPORT_SYMBOL_GPL(reserve_mem_find_by_name); + +/* + * Parse reserve_mem=nn:align:name + */ +static int __init reserve_mem(char *p) +{ + phys_addr_t start, size, align, tmp; + char *name; + char *oldp; + int len; + + if (!p) + return -EINVAL; + + /* Check if there's room for more reserved memory */ + if (reserved_mem_count >= RESERVE_MEM_MAX_ENTRIES) + return -EBUSY; + + oldp = p; + size = memparse(p, &p); + if (!size || p == oldp) + return -EINVAL; + + if (*p != ':') + return -EINVAL; + + align = memparse(p+1, &p); + if (*p != ':') + return -EINVAL; + + /* + * memblock_phys_alloc() doesn't like a zero size align, + * but it is OK for this command to have it. + */ + if (align < SMP_CACHE_BYTES) + align = SMP_CACHE_BYTES; + + name = p + 1; + len = strlen(name); + + /* name needs to have length but not too big */ + if (!len || len >= RESERVE_MEM_NAME_SIZE) + return -EINVAL; + + /* Make sure that name has text */ + for (p = name; *p; p++) { + if (!isspace(*p)) + break; + } + if (!*p) + return -EINVAL; + + /* Make sure the name is not already used */ + if (reserve_mem_find_by_name(name, &start, &tmp)) + return -EBUSY; + + start = memblock_phys_alloc(size, align); + if (!start) + return -ENOMEM; + + reserved_mem_add(start, size, name); + + return 1; +} +__setup("reserve_mem=", reserve_mem); + #if defined(CONFIG_DEBUG_FS) && defined(CONFIG_ARCH_KEEP_MEMBLOCK) static const char * const flagname[] = { [ilog2(MEMBLOCK_HOTPLUG)] = "HOTPLUG", diff --git a/mm/mm_init.c b/mm/mm_init.c index 3ec04933f7fd..804df0309257 100644 --- a/mm/mm_init.c +++ b/mm/mm_init.c @@ -363,7 +363,7 @@ static void __init find_zone_movable_pfns_for_nodes(void) nid = memblock_get_region_node(r); - usable_startpfn = PFN_DOWN(r->base); + usable_startpfn = memblock_region_memory_base_pfn(r); zone_movable_pfn[nid] = zone_movable_pfn[nid] ? min(usable_startpfn, zone_movable_pfn[nid]) : usable_startpfn; @@ -676,6 +676,14 @@ defer_init(int nid, unsigned long pfn, unsigned long end_pfn) if (early_page_ext_enabled()) return false; + + /* Always populate low zones for address-constrained allocations */ + if (end_pfn < pgdat_end_pfn(NODE_DATA(nid))) + return false; + + if (NODE_DATA(nid)->first_deferred_pfn != ULONG_MAX) + return true; + /* * prev_end_pfn static that contains the end of previous zone * No need to protect because called very early in boot before smp_init. @@ -685,12 +693,6 @@ defer_init(int nid, unsigned long pfn, unsigned long end_pfn) nr_initialised = 0; } - /* Always populate low zones for address-constrained allocations */ - if (end_pfn < pgdat_end_pfn(NODE_DATA(nid))) - return false; - - if (NODE_DATA(nid)->first_deferred_pfn != ULONG_MAX) - return true; /* * We start only with one section of pages, more pages are added as * needed until the rest of deferred pages are initialized. @@ -758,9 +760,6 @@ void __meminit reserve_bootmem_region(phys_addr_t start, init_reserved_page(start_pfn, nid); - /* Avoid false-positive PageTail() */ - INIT_LIST_HEAD(&page->lru); - /* * no need for atomic set_bit because the struct * page is not visible yet so nobody should @@ -2019,24 +2018,29 @@ static unsigned long __init deferred_init_pages(struct zone *zone, } /* - * This function is meant to pre-load the iterator for the zone init. - * Specifically it walks through the ranges until we are caught up to the - * first_init_pfn value and exits there. If we never encounter the value we - * return false indicating there are no valid ranges left. + * This function is meant to pre-load the iterator for the zone init from + * a given point. + * Specifically it walks through the ranges starting with initial index + * passed to it until we are caught up to the first_init_pfn value and + * exits there. If we never encounter the value we return false indicating + * there are no valid ranges left. */ static bool __init deferred_init_mem_pfn_range_in_zone(u64 *i, struct zone *zone, unsigned long *spfn, unsigned long *epfn, unsigned long first_init_pfn) { - u64 j; + u64 j = *i; + + if (j == 0) + __next_mem_pfn_range_in_zone(&j, zone, spfn, epfn); /* * Start out by walking through the ranges in this zone that have * already been initialized. We don't need to do anything with them * so we just need to flush them out of the system. */ - for_each_free_mem_pfn_range_in_zone(j, zone, spfn, epfn) { + for_each_free_mem_pfn_range_in_zone_from(j, zone, spfn, epfn) { if (*epfn <= first_init_pfn) continue; if (*spfn < first_init_pfn) @@ -2108,7 +2112,7 @@ deferred_init_memmap_chunk(unsigned long start_pfn, unsigned long end_pfn, { unsigned long spfn, epfn; struct zone *zone = arg; - u64 i; + u64 i = 0; deferred_init_mem_pfn_range_in_zone(&i, zone, &spfn, &epfn, start_pfn); @@ -2138,8 +2142,8 @@ static int __init deferred_init_memmap(void *data) unsigned long first_init_pfn, flags; unsigned long start = jiffies; struct zone *zone; - int zid, max_threads; - u64 i; + int max_threads; + u64 i = 0; /* Bind memory initialisation thread to a local node if possible */ if (!cpumask_empty(cpumask)) @@ -2165,27 +2169,18 @@ static int __init deferred_init_memmap(void *data) */ pgdat_resize_unlock(pgdat, &flags); - /* Only the highest zone is deferred so find it */ - for (zid = 0; zid < MAX_NR_ZONES; zid++) { - zone = pgdat->node_zones + zid; - if (first_init_pfn < zone_end_pfn(zone)) - break; - } - - /* If the zone is empty somebody else may have cleared out the zone */ - if (!deferred_init_mem_pfn_range_in_zone(&i, zone, &spfn, &epfn, - first_init_pfn)) - goto zone_empty; + /* Only the highest zone is deferred */ + zone = pgdat->node_zones + pgdat->nr_zones - 1; max_threads = deferred_page_init_max_threads(cpumask); - while (spfn < epfn) { - unsigned long epfn_align = ALIGN(epfn, PAGES_PER_SECTION); + while (deferred_init_mem_pfn_range_in_zone(&i, zone, &spfn, &epfn, first_init_pfn)) { + first_init_pfn = ALIGN(epfn, PAGES_PER_SECTION); struct padata_mt_job job = { .thread_fn = deferred_init_memmap_chunk, .fn_arg = zone, .start = spfn, - .size = epfn_align - spfn, + .size = first_init_pfn - spfn, .align = PAGES_PER_SECTION, .min_chunk = PAGES_PER_SECTION, .max_threads = max_threads, @@ -2193,12 +2188,10 @@ static int __init deferred_init_memmap(void *data) }; padata_do_multithreaded(&job); - deferred_init_mem_pfn_range_in_zone(&i, zone, &spfn, &epfn, - epfn_align); } -zone_empty: + /* Sanity check that the next zone really is unpopulated */ - WARN_ON(++zid < MAX_NR_ZONES && populated_zone(++zone)); + WARN_ON(pgdat->nr_zones < MAX_NR_ZONES && populated_zone(++zone)); pr_info("node %d deferred pages initialised in %ums\n", pgdat->node_id, jiffies_to_msecs(jiffies - start)); @@ -2225,7 +2218,7 @@ bool __init deferred_grow_zone(struct zone *zone, unsigned int order) unsigned long first_deferred_pfn = pgdat->first_deferred_pfn; unsigned long spfn, epfn, flags; unsigned long nr_pages = 0; - u64 i; + u64 i = 0; /* Only the last zone may have deferred pages */ if (zone_end_pfn(zone) != pgdat_end_pfn(pgdat)) |