summaryrefslogtreecommitdiff
path: root/mm
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2024-07-18 14:48:11 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2024-07-18 14:48:11 -0700
commitb2fc97c18614f99179700be263ecbc667c91a4e8 (patch)
tree13914a5bb2fcef7691c84796ab5a36459b949873 /mm
parent68b59730459e5d1fe4e0bbeb04ceb9df0f002270 (diff)
parent9364a7e40d54e6858479f0a96e1a04aa1204be16 (diff)
Merge tag 'memblock-v6.11-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/rppt/memblock
Pull memblock updates from Mike Rapoport: - 'reserve_mem' command line parameter to allow creation of named memory reservation at boot time. The driving use-case is to improve the ability of pstore to retain ramoops data across reboots. - cleanups and small improvements in memblock and mm_init - new tests cases in memblock test suite * tag 'memblock-v6.11-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/rppt/memblock: memblock tests: fix implicit declaration of function 'numa_valid_node' memblock: Move late alloc warning down to phys alloc pstore/ramoops: Add ramoops.mem_name= command line option mm/memblock: Add "reserve_mem" to reserved named memory at boot up mm/mm_init.c: don't initialize page->lru again mm/mm_init.c: not always search next deferred_init_pfn from very beginning mm/mm_init.c: use deferred_init_mem_pfn_range_in_zone() to decide loop condition mm/mm_init.c: get the highest zone directly mm/mm_init.c: move nr_initialised reset down a bit mm/memblock: fix a typo in description of for_each_mem_region() mm/mm_init.c: use memblock_region_memory_base_pfn() to get startpfn mm/memblock: use PAGE_ALIGN_DOWN to get pgend in free_memmap mm/memblock: return true directly on finding overlap region memblock tests: add memblock_overlaps_region_checks mm/memblock: fix comment for memblock_isolate_range() memblock tests: add memblock_reserve_many_may_conflict_check() memblock tests: add memblock_reserve_all_locations_check() mm/memblock: remove empty dummy entry
Diffstat (limited to 'mm')
-rw-r--r--mm/memblock.c151
-rw-r--r--mm/mm_init.c69
2 files changed, 166 insertions, 54 deletions
diff --git a/mm/memblock.c b/mm/memblock.c
index e81fb68f7f88..3b9dc2d89b8a 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -114,12 +114,10 @@ static struct memblock_region memblock_physmem_init_regions[INIT_PHYSMEM_REGIONS
struct memblock memblock __initdata_memblock = {
.memory.regions = memblock_memory_init_regions,
- .memory.cnt = 1, /* empty dummy entry */
.memory.max = INIT_MEMBLOCK_MEMORY_REGIONS,
.memory.name = "memory",
.reserved.regions = memblock_reserved_init_regions,
- .reserved.cnt = 1, /* empty dummy entry */
.reserved.max = INIT_MEMBLOCK_RESERVED_REGIONS,
.reserved.name = "reserved",
@@ -130,7 +128,6 @@ struct memblock memblock __initdata_memblock = {
#ifdef CONFIG_HAVE_MEMBLOCK_PHYS_MAP
struct memblock_type physmem = {
.regions = memblock_physmem_init_regions,
- .cnt = 1, /* empty dummy entry */
.max = INIT_PHYSMEM_REGIONS,
.name = "physmem",
};
@@ -197,8 +194,8 @@ bool __init_memblock memblock_overlaps_region(struct memblock_type *type,
for (i = 0; i < type->cnt; i++)
if (memblock_addrs_overlap(base, size, type->regions[i].base,
type->regions[i].size))
- break;
- return i < type->cnt;
+ return true;
+ return false;
}
/**
@@ -356,7 +353,6 @@ static void __init_memblock memblock_remove_region(struct memblock_type *type, u
/* Special case for empty arrays */
if (type->cnt == 0) {
WARN_ON(type->total_size != 0);
- type->cnt = 1;
type->regions[0].base = 0;
type->regions[0].size = 0;
type->regions[0].flags = 0;
@@ -600,12 +596,13 @@ static int __init_memblock memblock_add_range(struct memblock_type *type,
/* special case for empty array */
if (type->regions[0].size == 0) {
- WARN_ON(type->cnt != 1 || type->total_size);
+ WARN_ON(type->cnt != 0 || type->total_size);
type->regions[0].base = base;
type->regions[0].size = size;
type->regions[0].flags = flags;
memblock_set_region_node(&type->regions[0], nid);
type->total_size = size;
+ type->cnt = 1;
return 0;
}
@@ -780,7 +777,8 @@ bool __init_memblock memblock_validate_numa_coverage(unsigned long threshold_byt
* Walk @type and ensure that regions don't cross the boundaries defined by
* [@base, @base + @size). Crossing regions are split at the boundaries,
* which may create at most two more regions. The index of the first
- * region inside the range is returned in *@start_rgn and end in *@end_rgn.
+ * region inside the range is returned in *@start_rgn and the index of the
+ * first region after the range is returned in *@end_rgn.
*
* Return:
* 0 on success, -errno on failure.
@@ -1441,6 +1439,17 @@ phys_addr_t __init memblock_alloc_range_nid(phys_addr_t size,
enum memblock_flags flags = choose_memblock_flags();
phys_addr_t found;
+ /*
+ * Detect any accidental use of these APIs after slab is ready, as at
+ * this moment memblock may be deinitialized already and its
+ * internal data may be destroyed (after execution of memblock_free_all)
+ */
+ if (WARN_ON_ONCE(slab_is_available())) {
+ void *vaddr = kzalloc_node(size, GFP_NOWAIT, nid);
+
+ return vaddr ? virt_to_phys(vaddr) : 0;
+ }
+
if (!align) {
/* Can't use WARNs this early in boot on powerpc */
dump_stack();
@@ -1566,13 +1575,6 @@ static void * __init memblock_alloc_internal(
{
phys_addr_t alloc;
- /*
- * Detect any accidental use of these APIs after slab is ready, as at
- * this moment memblock may be deinitialized already and its
- * internal data may be destroyed (after execution of memblock_free_all)
- */
- if (WARN_ON_ONCE(slab_is_available()))
- return kzalloc_node(size, GFP_NOWAIT, nid);
if (max_addr > memblock.current_limit)
max_addr = memblock.current_limit;
@@ -2031,7 +2033,7 @@ static void __init free_memmap(unsigned long start_pfn, unsigned long end_pfn)
* downwards.
*/
pg = PAGE_ALIGN(__pa(start_pg));
- pgend = __pa(end_pg) & PAGE_MASK;
+ pgend = PAGE_ALIGN_DOWN(__pa(end_pg));
/*
* If there are free pages between these, free the section of the
@@ -2234,6 +2236,123 @@ void __init memblock_free_all(void)
totalram_pages_add(pages);
}
+/* Keep a table to reserve named memory */
+#define RESERVE_MEM_MAX_ENTRIES 8
+#define RESERVE_MEM_NAME_SIZE 16
+struct reserve_mem_table {
+ char name[RESERVE_MEM_NAME_SIZE];
+ phys_addr_t start;
+ phys_addr_t size;
+};
+static struct reserve_mem_table reserved_mem_table[RESERVE_MEM_MAX_ENTRIES];
+static int reserved_mem_count;
+
+/* Add wildcard region with a lookup name */
+static void __init reserved_mem_add(phys_addr_t start, phys_addr_t size,
+ const char *name)
+{
+ struct reserve_mem_table *map;
+
+ map = &reserved_mem_table[reserved_mem_count++];
+ map->start = start;
+ map->size = size;
+ strscpy(map->name, name);
+}
+
+/**
+ * reserve_mem_find_by_name - Find reserved memory region with a given name
+ * @name: The name that is attached to a reserved memory region
+ * @start: If found, holds the start address
+ * @size: If found, holds the size of the address.
+ *
+ * @start and @size are only updated if @name is found.
+ *
+ * Returns: 1 if found or 0 if not found.
+ */
+int reserve_mem_find_by_name(const char *name, phys_addr_t *start, phys_addr_t *size)
+{
+ struct reserve_mem_table *map;
+ int i;
+
+ for (i = 0; i < reserved_mem_count; i++) {
+ map = &reserved_mem_table[i];
+ if (!map->size)
+ continue;
+ if (strcmp(name, map->name) == 0) {
+ *start = map->start;
+ *size = map->size;
+ return 1;
+ }
+ }
+ return 0;
+}
+EXPORT_SYMBOL_GPL(reserve_mem_find_by_name);
+
+/*
+ * Parse reserve_mem=nn:align:name
+ */
+static int __init reserve_mem(char *p)
+{
+ phys_addr_t start, size, align, tmp;
+ char *name;
+ char *oldp;
+ int len;
+
+ if (!p)
+ return -EINVAL;
+
+ /* Check if there's room for more reserved memory */
+ if (reserved_mem_count >= RESERVE_MEM_MAX_ENTRIES)
+ return -EBUSY;
+
+ oldp = p;
+ size = memparse(p, &p);
+ if (!size || p == oldp)
+ return -EINVAL;
+
+ if (*p != ':')
+ return -EINVAL;
+
+ align = memparse(p+1, &p);
+ if (*p != ':')
+ return -EINVAL;
+
+ /*
+ * memblock_phys_alloc() doesn't like a zero size align,
+ * but it is OK for this command to have it.
+ */
+ if (align < SMP_CACHE_BYTES)
+ align = SMP_CACHE_BYTES;
+
+ name = p + 1;
+ len = strlen(name);
+
+ /* name needs to have length but not too big */
+ if (!len || len >= RESERVE_MEM_NAME_SIZE)
+ return -EINVAL;
+
+ /* Make sure that name has text */
+ for (p = name; *p; p++) {
+ if (!isspace(*p))
+ break;
+ }
+ if (!*p)
+ return -EINVAL;
+
+ /* Make sure the name is not already used */
+ if (reserve_mem_find_by_name(name, &start, &tmp))
+ return -EBUSY;
+
+ start = memblock_phys_alloc(size, align);
+ if (!start)
+ return -ENOMEM;
+
+ reserved_mem_add(start, size, name);
+
+ return 1;
+}
+__setup("reserve_mem=", reserve_mem);
+
#if defined(CONFIG_DEBUG_FS) && defined(CONFIG_ARCH_KEEP_MEMBLOCK)
static const char * const flagname[] = {
[ilog2(MEMBLOCK_HOTPLUG)] = "HOTPLUG",
diff --git a/mm/mm_init.c b/mm/mm_init.c
index 3ec04933f7fd..804df0309257 100644
--- a/mm/mm_init.c
+++ b/mm/mm_init.c
@@ -363,7 +363,7 @@ static void __init find_zone_movable_pfns_for_nodes(void)
nid = memblock_get_region_node(r);
- usable_startpfn = PFN_DOWN(r->base);
+ usable_startpfn = memblock_region_memory_base_pfn(r);
zone_movable_pfn[nid] = zone_movable_pfn[nid] ?
min(usable_startpfn, zone_movable_pfn[nid]) :
usable_startpfn;
@@ -676,6 +676,14 @@ defer_init(int nid, unsigned long pfn, unsigned long end_pfn)
if (early_page_ext_enabled())
return false;
+
+ /* Always populate low zones for address-constrained allocations */
+ if (end_pfn < pgdat_end_pfn(NODE_DATA(nid)))
+ return false;
+
+ if (NODE_DATA(nid)->first_deferred_pfn != ULONG_MAX)
+ return true;
+
/*
* prev_end_pfn static that contains the end of previous zone
* No need to protect because called very early in boot before smp_init.
@@ -685,12 +693,6 @@ defer_init(int nid, unsigned long pfn, unsigned long end_pfn)
nr_initialised = 0;
}
- /* Always populate low zones for address-constrained allocations */
- if (end_pfn < pgdat_end_pfn(NODE_DATA(nid)))
- return false;
-
- if (NODE_DATA(nid)->first_deferred_pfn != ULONG_MAX)
- return true;
/*
* We start only with one section of pages, more pages are added as
* needed until the rest of deferred pages are initialized.
@@ -758,9 +760,6 @@ void __meminit reserve_bootmem_region(phys_addr_t start,
init_reserved_page(start_pfn, nid);
- /* Avoid false-positive PageTail() */
- INIT_LIST_HEAD(&page->lru);
-
/*
* no need for atomic set_bit because the struct
* page is not visible yet so nobody should
@@ -2019,24 +2018,29 @@ static unsigned long __init deferred_init_pages(struct zone *zone,
}
/*
- * This function is meant to pre-load the iterator for the zone init.
- * Specifically it walks through the ranges until we are caught up to the
- * first_init_pfn value and exits there. If we never encounter the value we
- * return false indicating there are no valid ranges left.
+ * This function is meant to pre-load the iterator for the zone init from
+ * a given point.
+ * Specifically it walks through the ranges starting with initial index
+ * passed to it until we are caught up to the first_init_pfn value and
+ * exits there. If we never encounter the value we return false indicating
+ * there are no valid ranges left.
*/
static bool __init
deferred_init_mem_pfn_range_in_zone(u64 *i, struct zone *zone,
unsigned long *spfn, unsigned long *epfn,
unsigned long first_init_pfn)
{
- u64 j;
+ u64 j = *i;
+
+ if (j == 0)
+ __next_mem_pfn_range_in_zone(&j, zone, spfn, epfn);
/*
* Start out by walking through the ranges in this zone that have
* already been initialized. We don't need to do anything with them
* so we just need to flush them out of the system.
*/
- for_each_free_mem_pfn_range_in_zone(j, zone, spfn, epfn) {
+ for_each_free_mem_pfn_range_in_zone_from(j, zone, spfn, epfn) {
if (*epfn <= first_init_pfn)
continue;
if (*spfn < first_init_pfn)
@@ -2108,7 +2112,7 @@ deferred_init_memmap_chunk(unsigned long start_pfn, unsigned long end_pfn,
{
unsigned long spfn, epfn;
struct zone *zone = arg;
- u64 i;
+ u64 i = 0;
deferred_init_mem_pfn_range_in_zone(&i, zone, &spfn, &epfn, start_pfn);
@@ -2138,8 +2142,8 @@ static int __init deferred_init_memmap(void *data)
unsigned long first_init_pfn, flags;
unsigned long start = jiffies;
struct zone *zone;
- int zid, max_threads;
- u64 i;
+ int max_threads;
+ u64 i = 0;
/* Bind memory initialisation thread to a local node if possible */
if (!cpumask_empty(cpumask))
@@ -2165,27 +2169,18 @@ static int __init deferred_init_memmap(void *data)
*/
pgdat_resize_unlock(pgdat, &flags);
- /* Only the highest zone is deferred so find it */
- for (zid = 0; zid < MAX_NR_ZONES; zid++) {
- zone = pgdat->node_zones + zid;
- if (first_init_pfn < zone_end_pfn(zone))
- break;
- }
-
- /* If the zone is empty somebody else may have cleared out the zone */
- if (!deferred_init_mem_pfn_range_in_zone(&i, zone, &spfn, &epfn,
- first_init_pfn))
- goto zone_empty;
+ /* Only the highest zone is deferred */
+ zone = pgdat->node_zones + pgdat->nr_zones - 1;
max_threads = deferred_page_init_max_threads(cpumask);
- while (spfn < epfn) {
- unsigned long epfn_align = ALIGN(epfn, PAGES_PER_SECTION);
+ while (deferred_init_mem_pfn_range_in_zone(&i, zone, &spfn, &epfn, first_init_pfn)) {
+ first_init_pfn = ALIGN(epfn, PAGES_PER_SECTION);
struct padata_mt_job job = {
.thread_fn = deferred_init_memmap_chunk,
.fn_arg = zone,
.start = spfn,
- .size = epfn_align - spfn,
+ .size = first_init_pfn - spfn,
.align = PAGES_PER_SECTION,
.min_chunk = PAGES_PER_SECTION,
.max_threads = max_threads,
@@ -2193,12 +2188,10 @@ static int __init deferred_init_memmap(void *data)
};
padata_do_multithreaded(&job);
- deferred_init_mem_pfn_range_in_zone(&i, zone, &spfn, &epfn,
- epfn_align);
}
-zone_empty:
+
/* Sanity check that the next zone really is unpopulated */
- WARN_ON(++zid < MAX_NR_ZONES && populated_zone(++zone));
+ WARN_ON(pgdat->nr_zones < MAX_NR_ZONES && populated_zone(++zone));
pr_info("node %d deferred pages initialised in %ums\n",
pgdat->node_id, jiffies_to_msecs(jiffies - start));
@@ -2225,7 +2218,7 @@ bool __init deferred_grow_zone(struct zone *zone, unsigned int order)
unsigned long first_deferred_pfn = pgdat->first_deferred_pfn;
unsigned long spfn, epfn, flags;
unsigned long nr_pages = 0;
- u64 i;
+ u64 i = 0;
/* Only the last zone may have deferred pages */
if (zone_end_pfn(zone) != pgdat_end_pfn(pgdat))