diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2022-03-22 16:11:53 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2022-03-22 16:11:53 -0700 |
commit | 3bf03b9a0839c9fb06927ae53ebd0f960b19d408 (patch) | |
tree | 06114247eb7760edca7b57cc0108a351ffe1971b /mm/memory_hotplug.c | |
parent | 3fe2f7446f1e029b220f7f650df6d138f91651f2 (diff) | |
parent | 15423a52cc84e23bc11e4a903cd775adc7c6ab00 (diff) |
Merge branch 'akpm' (patches from Andrew)
Merge updates from Andrew Morton:
- A few misc subsystems: kthread, scripts, ntfs, ocfs2, block, and vfs
- Most the MM patches which precede the patches in Willy's tree: kasan,
pagecache, gup, swap, shmem, memcg, selftests, pagemap, mremap,
sparsemem, vmalloc, pagealloc, memory-failure, mlock, hugetlb,
userfaultfd, vmscan, compaction, mempolicy, oom-kill, migration, thp,
cma, autonuma, psi, ksm, page-poison, madvise, memory-hotplug, rmap,
zswap, uaccess, ioremap, highmem, cleanups, kfence, hmm, and damon.
* emailed patches from Andrew Morton <akpm@linux-foundation.org>: (227 commits)
mm/damon/sysfs: remove repeat container_of() in damon_sysfs_kdamond_release()
Docs/ABI/testing: add DAMON sysfs interface ABI document
Docs/admin-guide/mm/damon/usage: document DAMON sysfs interface
selftests/damon: add a test for DAMON sysfs interface
mm/damon/sysfs: support DAMOS stats
mm/damon/sysfs: support DAMOS watermarks
mm/damon/sysfs: support schemes prioritization
mm/damon/sysfs: support DAMOS quotas
mm/damon/sysfs: support DAMON-based Operation Schemes
mm/damon/sysfs: support the physical address space monitoring
mm/damon/sysfs: link DAMON for virtual address spaces monitoring
mm/damon: implement a minimal stub for sysfs-based DAMON interface
mm/damon/core: add number of each enum type values
mm/damon/core: allow non-exclusive DAMON start/stop
Docs/damon: update outdated term 'regions update interval'
Docs/vm/damon/design: update DAMON-Idle Page Tracking interference handling
Docs/vm/damon: call low level monitoring primitives the operations
mm/damon: remove unnecessary CONFIG_DAMON option
mm/damon/paddr,vaddr: remove damon_{p,v}a_{target_valid,set_operations}()
mm/damon/dbgfs-test: fix is_target_id() change
...
Diffstat (limited to 'mm/memory_hotplug.c')
-rw-r--r-- | mm/memory_hotplug.c | 132 |
1 files changed, 30 insertions, 102 deletions
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 2a9627dc784c..aee69281dad6 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -295,12 +295,6 @@ struct page *pfn_to_online_page(unsigned long pfn) } EXPORT_SYMBOL_GPL(pfn_to_online_page); -/* - * Reasonably generic function for adding memory. It is - * expected that archs that support memory hotplug will - * call this function after deciding the zone to which to - * add the new pages. - */ int __ref __add_pages(int nid, unsigned long pfn, unsigned long nr_pages, struct mhp_params *params) { @@ -829,7 +823,7 @@ static struct zone *default_kernel_zone_for_pfn(int nid, unsigned long start_pfn struct pglist_data *pgdat = NODE_DATA(nid); int zid; - for (zid = 0; zid <= ZONE_NORMAL; zid++) { + for (zid = 0; zid < ZONE_NORMAL; zid++) { struct zone *zone = &pgdat->node_zones[zid]; if (zone_intersects(zone, start_pfn, nr_pages)) @@ -1162,43 +1156,20 @@ static void reset_node_present_pages(pg_data_t *pgdat) } /* we are OK calling __meminit stuff here - we have CONFIG_MEMORY_HOTPLUG */ -static pg_data_t __ref *hotadd_new_pgdat(int nid) +static pg_data_t __ref *hotadd_init_pgdat(int nid) { struct pglist_data *pgdat; + /* + * NODE_DATA is preallocated (free_area_init) but its internal + * state is not allocated completely. Add missing pieces. + * Completely offline nodes stay around and they just need + * reintialization. + */ pgdat = NODE_DATA(nid); - if (!pgdat) { - pgdat = arch_alloc_nodedata(nid); - if (!pgdat) - return NULL; - - pgdat->per_cpu_nodestats = - alloc_percpu(struct per_cpu_nodestat); - arch_refresh_nodedata(nid, pgdat); - } else { - int cpu; - /* - * Reset the nr_zones, order and highest_zoneidx before reuse. - * Note that kswapd will init kswapd_highest_zoneidx properly - * when it starts in the near future. - */ - pgdat->nr_zones = 0; - pgdat->kswapd_order = 0; - pgdat->kswapd_highest_zoneidx = 0; - for_each_online_cpu(cpu) { - struct per_cpu_nodestat *p; - - p = per_cpu_ptr(pgdat->per_cpu_nodestats, cpu); - memset(p, 0, sizeof(*p)); - } - } - - /* we can use NODE_DATA(nid) from here */ - pgdat->node_id = nid; - pgdat->node_start_pfn = 0; /* init node's zones as empty zones, we don't have any present pages.*/ - free_area_init_core_hotplug(nid); + free_area_init_core_hotplug(pgdat); /* * The node we allocated has no zone fallback lists. For avoiding @@ -1210,6 +1181,7 @@ static pg_data_t __ref *hotadd_new_pgdat(int nid) * When memory is hot-added, all the memory is in offline state. So * clear all zones' present_pages because they will be updated in * online_pages() and offline_pages(). + * TODO: should be in free_area_init_core_hotplug? */ reset_node_managed_pages(pgdat); reset_node_present_pages(pgdat); @@ -1217,16 +1189,6 @@ static pg_data_t __ref *hotadd_new_pgdat(int nid) return pgdat; } -static void rollback_node_hotadd(int nid) -{ - pg_data_t *pgdat = NODE_DATA(nid); - - arch_refresh_nodedata(nid, NULL); - free_percpu(pgdat->per_cpu_nodestats); - arch_free_nodedata(pgdat); -} - - /* * __try_online_node - online a node if offlined * @nid: the node ID @@ -1246,7 +1208,7 @@ static int __try_online_node(int nid, bool set_node_online) if (node_online(nid)) return 0; - pgdat = hotadd_new_pgdat(nid); + pgdat = hotadd_init_pgdat(nid); if (!pgdat) { pr_err("Cannot online node %d due to NULL pgdat\n", nid); ret = -ENOMEM; @@ -1327,7 +1289,7 @@ bool mhp_supports_memmap_on_memory(unsigned long size) * populate a single PMD. */ return memmap_on_memory && - !hugetlb_free_vmemmap_enabled && + !hugetlb_free_vmemmap_enabled() && IS_ENABLED(CONFIG_MHP_MEMMAP_ON_MEMORY) && size == memory_block_size_bytes() && IS_ALIGNED(vmemmap_size, PMD_SIZE) && @@ -1421,9 +1383,9 @@ int __ref add_memory_resource(int nid, struct resource *res, mhp_t mhp_flags) BUG_ON(ret); } - /* link memory sections under this node.*/ - link_mem_sections(nid, PFN_DOWN(start), PFN_UP(start + size - 1), - MEMINIT_HOTPLUG); + register_memory_blocks_under_node(nid, PFN_DOWN(start), + PFN_UP(start + size - 1), + MEMINIT_HOTPLUG); /* create new memmap entry */ if (!strcmp(res->name, "System RAM")) @@ -1445,9 +1407,6 @@ int __ref add_memory_resource(int nid, struct resource *res, mhp_t mhp_flags) return ret; error: - /* rollback pgdat allocation and others */ - if (new_node) - rollback_node_hotadd(nid); if (IS_ENABLED(CONFIG_ARCH_KEEP_MEMBLOCK)) memblock_remove(start, size); error_mem_hotplug_end: @@ -1590,38 +1549,6 @@ bool mhp_range_allowed(u64 start, u64 size, bool need_mapping) #ifdef CONFIG_MEMORY_HOTREMOVE /* - * Confirm all pages in a range [start, end) belong to the same zone (skipping - * memory holes). When true, return the zone. - */ -struct zone *test_pages_in_a_zone(unsigned long start_pfn, - unsigned long end_pfn) -{ - unsigned long pfn, sec_end_pfn; - struct zone *zone = NULL; - struct page *page; - - for (pfn = start_pfn, sec_end_pfn = SECTION_ALIGN_UP(start_pfn + 1); - pfn < end_pfn; - pfn = sec_end_pfn, sec_end_pfn += PAGES_PER_SECTION) { - /* Make sure the memory section is present first */ - if (!present_section_nr(pfn_to_section_nr(pfn))) - continue; - for (; pfn < sec_end_pfn && pfn < end_pfn; - pfn += MAX_ORDER_NR_PAGES) { - /* Check if we got outside of the zone */ - if (zone && !zone_spans_pfn(zone, pfn)) - return NULL; - page = pfn_to_page(pfn); - if (zone && page_zone(page) != zone) - return NULL; - zone = page_zone(page); - } - } - - return zone; -} - -/* * Scan pfn range [start,end) to find movable/migratable pages (LRU pages, * non-lru movable pages and hugepages). Will skip over most unmovable * pages (esp., pages that can be skipped when offlining), but bail out on @@ -1844,15 +1771,15 @@ static int count_system_ram_pages_cb(unsigned long start_pfn, } int __ref offline_pages(unsigned long start_pfn, unsigned long nr_pages, - struct memory_group *group) + struct zone *zone, struct memory_group *group) { const unsigned long end_pfn = start_pfn + nr_pages; unsigned long pfn, system_ram_pages = 0; + const int node = zone_to_nid(zone); unsigned long flags; - struct zone *zone; struct memory_notify arg; - int ret, node; char *reason; + int ret; /* * {on,off}lining is constrained to full memory sections (or more @@ -1884,15 +1811,17 @@ int __ref offline_pages(unsigned long start_pfn, unsigned long nr_pages, goto failed_removal; } - /* This makes hotplug much easier...and readable. - we assume this for now. .*/ - zone = test_pages_in_a_zone(start_pfn, end_pfn); - if (!zone) { + /* + * We only support offlining of memory blocks managed by a single zone, + * checked by calling code. This is just a sanity check that we might + * want to remove in the future. + */ + if (WARN_ON_ONCE(page_zone(pfn_to_page(start_pfn)) != zone || + page_zone(pfn_to_page(end_pfn - 1)) != zone)) { ret = -EINVAL; reason = "multizone range"; goto failed_removal; } - node = zone_to_nid(zone); /* * Disable pcplists so that page isolation cannot race with freeing @@ -2004,6 +1933,7 @@ int __ref offline_pages(unsigned long start_pfn, unsigned long nr_pages, return 0; failed_removal_isolated: + /* pushback to free area */ undo_isolate_page_range(start_pfn, end_pfn, MIGRATE_MOVABLE); memory_notify(MEM_CANCEL_OFFLINE, &arg); failed_removal_pcplists_disabled: @@ -2014,7 +1944,6 @@ failed_removal: (unsigned long long) start_pfn << PAGE_SHIFT, ((unsigned long long) end_pfn << PAGE_SHIFT) - 1, reason); - /* pushback to free area */ mem_hotplug_done(); return ret; } @@ -2046,12 +1975,12 @@ static int get_nr_vmemmap_pages_cb(struct memory_block *mem, void *arg) return mem->nr_vmemmap_pages; } -static int check_cpu_on_node(pg_data_t *pgdat) +static int check_cpu_on_node(int nid) { int cpu; for_each_present_cpu(cpu) { - if (cpu_to_node(cpu) == pgdat->node_id) + if (cpu_to_node(cpu) == nid) /* * the cpu on this node isn't removed, and we can't * offline this node. @@ -2085,7 +2014,6 @@ static int check_no_memblock_for_node_cb(struct memory_block *mem, void *arg) */ void try_offline_node(int nid) { - pg_data_t *pgdat = NODE_DATA(nid); int rc; /* @@ -2093,7 +2021,7 @@ void try_offline_node(int nid) * offline it. A node spans memory after move_pfn_range_to_zone(), * e.g., after the memory block was onlined. */ - if (pgdat->node_spanned_pages) + if (node_spanned_pages(nid)) return; /* @@ -2105,7 +2033,7 @@ void try_offline_node(int nid) if (rc) return; - if (check_cpu_on_node(pgdat)) + if (check_cpu_on_node(nid)) return; /* |