summaryrefslogtreecommitdiff
path: root/mm/memory_hotplug.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2022-03-22 16:11:53 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2022-03-22 16:11:53 -0700
commit3bf03b9a0839c9fb06927ae53ebd0f960b19d408 (patch)
tree06114247eb7760edca7b57cc0108a351ffe1971b /mm/memory_hotplug.c
parent3fe2f7446f1e029b220f7f650df6d138f91651f2 (diff)
parent15423a52cc84e23bc11e4a903cd775adc7c6ab00 (diff)
Merge branch 'akpm' (patches from Andrew)
Merge updates from Andrew Morton: - A few misc subsystems: kthread, scripts, ntfs, ocfs2, block, and vfs - Most the MM patches which precede the patches in Willy's tree: kasan, pagecache, gup, swap, shmem, memcg, selftests, pagemap, mremap, sparsemem, vmalloc, pagealloc, memory-failure, mlock, hugetlb, userfaultfd, vmscan, compaction, mempolicy, oom-kill, migration, thp, cma, autonuma, psi, ksm, page-poison, madvise, memory-hotplug, rmap, zswap, uaccess, ioremap, highmem, cleanups, kfence, hmm, and damon. * emailed patches from Andrew Morton <akpm@linux-foundation.org>: (227 commits) mm/damon/sysfs: remove repeat container_of() in damon_sysfs_kdamond_release() Docs/ABI/testing: add DAMON sysfs interface ABI document Docs/admin-guide/mm/damon/usage: document DAMON sysfs interface selftests/damon: add a test for DAMON sysfs interface mm/damon/sysfs: support DAMOS stats mm/damon/sysfs: support DAMOS watermarks mm/damon/sysfs: support schemes prioritization mm/damon/sysfs: support DAMOS quotas mm/damon/sysfs: support DAMON-based Operation Schemes mm/damon/sysfs: support the physical address space monitoring mm/damon/sysfs: link DAMON for virtual address spaces monitoring mm/damon: implement a minimal stub for sysfs-based DAMON interface mm/damon/core: add number of each enum type values mm/damon/core: allow non-exclusive DAMON start/stop Docs/damon: update outdated term 'regions update interval' Docs/vm/damon/design: update DAMON-Idle Page Tracking interference handling Docs/vm/damon: call low level monitoring primitives the operations mm/damon: remove unnecessary CONFIG_DAMON option mm/damon/paddr,vaddr: remove damon_{p,v}a_{target_valid,set_operations}() mm/damon/dbgfs-test: fix is_target_id() change ...
Diffstat (limited to 'mm/memory_hotplug.c')
-rw-r--r--mm/memory_hotplug.c132
1 files changed, 30 insertions, 102 deletions
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 2a9627dc784c..aee69281dad6 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -295,12 +295,6 @@ struct page *pfn_to_online_page(unsigned long pfn)
}
EXPORT_SYMBOL_GPL(pfn_to_online_page);
-/*
- * Reasonably generic function for adding memory. It is
- * expected that archs that support memory hotplug will
- * call this function after deciding the zone to which to
- * add the new pages.
- */
int __ref __add_pages(int nid, unsigned long pfn, unsigned long nr_pages,
struct mhp_params *params)
{
@@ -829,7 +823,7 @@ static struct zone *default_kernel_zone_for_pfn(int nid, unsigned long start_pfn
struct pglist_data *pgdat = NODE_DATA(nid);
int zid;
- for (zid = 0; zid <= ZONE_NORMAL; zid++) {
+ for (zid = 0; zid < ZONE_NORMAL; zid++) {
struct zone *zone = &pgdat->node_zones[zid];
if (zone_intersects(zone, start_pfn, nr_pages))
@@ -1162,43 +1156,20 @@ static void reset_node_present_pages(pg_data_t *pgdat)
}
/* we are OK calling __meminit stuff here - we have CONFIG_MEMORY_HOTPLUG */
-static pg_data_t __ref *hotadd_new_pgdat(int nid)
+static pg_data_t __ref *hotadd_init_pgdat(int nid)
{
struct pglist_data *pgdat;
+ /*
+ * NODE_DATA is preallocated (free_area_init) but its internal
+ * state is not allocated completely. Add missing pieces.
+ * Completely offline nodes stay around and they just need
+ * reintialization.
+ */
pgdat = NODE_DATA(nid);
- if (!pgdat) {
- pgdat = arch_alloc_nodedata(nid);
- if (!pgdat)
- return NULL;
-
- pgdat->per_cpu_nodestats =
- alloc_percpu(struct per_cpu_nodestat);
- arch_refresh_nodedata(nid, pgdat);
- } else {
- int cpu;
- /*
- * Reset the nr_zones, order and highest_zoneidx before reuse.
- * Note that kswapd will init kswapd_highest_zoneidx properly
- * when it starts in the near future.
- */
- pgdat->nr_zones = 0;
- pgdat->kswapd_order = 0;
- pgdat->kswapd_highest_zoneidx = 0;
- for_each_online_cpu(cpu) {
- struct per_cpu_nodestat *p;
-
- p = per_cpu_ptr(pgdat->per_cpu_nodestats, cpu);
- memset(p, 0, sizeof(*p));
- }
- }
-
- /* we can use NODE_DATA(nid) from here */
- pgdat->node_id = nid;
- pgdat->node_start_pfn = 0;
/* init node's zones as empty zones, we don't have any present pages.*/
- free_area_init_core_hotplug(nid);
+ free_area_init_core_hotplug(pgdat);
/*
* The node we allocated has no zone fallback lists. For avoiding
@@ -1210,6 +1181,7 @@ static pg_data_t __ref *hotadd_new_pgdat(int nid)
* When memory is hot-added, all the memory is in offline state. So
* clear all zones' present_pages because they will be updated in
* online_pages() and offline_pages().
+ * TODO: should be in free_area_init_core_hotplug?
*/
reset_node_managed_pages(pgdat);
reset_node_present_pages(pgdat);
@@ -1217,16 +1189,6 @@ static pg_data_t __ref *hotadd_new_pgdat(int nid)
return pgdat;
}
-static void rollback_node_hotadd(int nid)
-{
- pg_data_t *pgdat = NODE_DATA(nid);
-
- arch_refresh_nodedata(nid, NULL);
- free_percpu(pgdat->per_cpu_nodestats);
- arch_free_nodedata(pgdat);
-}
-
-
/*
* __try_online_node - online a node if offlined
* @nid: the node ID
@@ -1246,7 +1208,7 @@ static int __try_online_node(int nid, bool set_node_online)
if (node_online(nid))
return 0;
- pgdat = hotadd_new_pgdat(nid);
+ pgdat = hotadd_init_pgdat(nid);
if (!pgdat) {
pr_err("Cannot online node %d due to NULL pgdat\n", nid);
ret = -ENOMEM;
@@ -1327,7 +1289,7 @@ bool mhp_supports_memmap_on_memory(unsigned long size)
* populate a single PMD.
*/
return memmap_on_memory &&
- !hugetlb_free_vmemmap_enabled &&
+ !hugetlb_free_vmemmap_enabled() &&
IS_ENABLED(CONFIG_MHP_MEMMAP_ON_MEMORY) &&
size == memory_block_size_bytes() &&
IS_ALIGNED(vmemmap_size, PMD_SIZE) &&
@@ -1421,9 +1383,9 @@ int __ref add_memory_resource(int nid, struct resource *res, mhp_t mhp_flags)
BUG_ON(ret);
}
- /* link memory sections under this node.*/
- link_mem_sections(nid, PFN_DOWN(start), PFN_UP(start + size - 1),
- MEMINIT_HOTPLUG);
+ register_memory_blocks_under_node(nid, PFN_DOWN(start),
+ PFN_UP(start + size - 1),
+ MEMINIT_HOTPLUG);
/* create new memmap entry */
if (!strcmp(res->name, "System RAM"))
@@ -1445,9 +1407,6 @@ int __ref add_memory_resource(int nid, struct resource *res, mhp_t mhp_flags)
return ret;
error:
- /* rollback pgdat allocation and others */
- if (new_node)
- rollback_node_hotadd(nid);
if (IS_ENABLED(CONFIG_ARCH_KEEP_MEMBLOCK))
memblock_remove(start, size);
error_mem_hotplug_end:
@@ -1590,38 +1549,6 @@ bool mhp_range_allowed(u64 start, u64 size, bool need_mapping)
#ifdef CONFIG_MEMORY_HOTREMOVE
/*
- * Confirm all pages in a range [start, end) belong to the same zone (skipping
- * memory holes). When true, return the zone.
- */
-struct zone *test_pages_in_a_zone(unsigned long start_pfn,
- unsigned long end_pfn)
-{
- unsigned long pfn, sec_end_pfn;
- struct zone *zone = NULL;
- struct page *page;
-
- for (pfn = start_pfn, sec_end_pfn = SECTION_ALIGN_UP(start_pfn + 1);
- pfn < end_pfn;
- pfn = sec_end_pfn, sec_end_pfn += PAGES_PER_SECTION) {
- /* Make sure the memory section is present first */
- if (!present_section_nr(pfn_to_section_nr(pfn)))
- continue;
- for (; pfn < sec_end_pfn && pfn < end_pfn;
- pfn += MAX_ORDER_NR_PAGES) {
- /* Check if we got outside of the zone */
- if (zone && !zone_spans_pfn(zone, pfn))
- return NULL;
- page = pfn_to_page(pfn);
- if (zone && page_zone(page) != zone)
- return NULL;
- zone = page_zone(page);
- }
- }
-
- return zone;
-}
-
-/*
* Scan pfn range [start,end) to find movable/migratable pages (LRU pages,
* non-lru movable pages and hugepages). Will skip over most unmovable
* pages (esp., pages that can be skipped when offlining), but bail out on
@@ -1844,15 +1771,15 @@ static int count_system_ram_pages_cb(unsigned long start_pfn,
}
int __ref offline_pages(unsigned long start_pfn, unsigned long nr_pages,
- struct memory_group *group)
+ struct zone *zone, struct memory_group *group)
{
const unsigned long end_pfn = start_pfn + nr_pages;
unsigned long pfn, system_ram_pages = 0;
+ const int node = zone_to_nid(zone);
unsigned long flags;
- struct zone *zone;
struct memory_notify arg;
- int ret, node;
char *reason;
+ int ret;
/*
* {on,off}lining is constrained to full memory sections (or more
@@ -1884,15 +1811,17 @@ int __ref offline_pages(unsigned long start_pfn, unsigned long nr_pages,
goto failed_removal;
}
- /* This makes hotplug much easier...and readable.
- we assume this for now. .*/
- zone = test_pages_in_a_zone(start_pfn, end_pfn);
- if (!zone) {
+ /*
+ * We only support offlining of memory blocks managed by a single zone,
+ * checked by calling code. This is just a sanity check that we might
+ * want to remove in the future.
+ */
+ if (WARN_ON_ONCE(page_zone(pfn_to_page(start_pfn)) != zone ||
+ page_zone(pfn_to_page(end_pfn - 1)) != zone)) {
ret = -EINVAL;
reason = "multizone range";
goto failed_removal;
}
- node = zone_to_nid(zone);
/*
* Disable pcplists so that page isolation cannot race with freeing
@@ -2004,6 +1933,7 @@ int __ref offline_pages(unsigned long start_pfn, unsigned long nr_pages,
return 0;
failed_removal_isolated:
+ /* pushback to free area */
undo_isolate_page_range(start_pfn, end_pfn, MIGRATE_MOVABLE);
memory_notify(MEM_CANCEL_OFFLINE, &arg);
failed_removal_pcplists_disabled:
@@ -2014,7 +1944,6 @@ failed_removal:
(unsigned long long) start_pfn << PAGE_SHIFT,
((unsigned long long) end_pfn << PAGE_SHIFT) - 1,
reason);
- /* pushback to free area */
mem_hotplug_done();
return ret;
}
@@ -2046,12 +1975,12 @@ static int get_nr_vmemmap_pages_cb(struct memory_block *mem, void *arg)
return mem->nr_vmemmap_pages;
}
-static int check_cpu_on_node(pg_data_t *pgdat)
+static int check_cpu_on_node(int nid)
{
int cpu;
for_each_present_cpu(cpu) {
- if (cpu_to_node(cpu) == pgdat->node_id)
+ if (cpu_to_node(cpu) == nid)
/*
* the cpu on this node isn't removed, and we can't
* offline this node.
@@ -2085,7 +2014,6 @@ static int check_no_memblock_for_node_cb(struct memory_block *mem, void *arg)
*/
void try_offline_node(int nid)
{
- pg_data_t *pgdat = NODE_DATA(nid);
int rc;
/*
@@ -2093,7 +2021,7 @@ void try_offline_node(int nid)
* offline it. A node spans memory after move_pfn_range_to_zone(),
* e.g., after the memory block was onlined.
*/
- if (pgdat->node_spanned_pages)
+ if (node_spanned_pages(nid))
return;
/*
@@ -2105,7 +2033,7 @@ void try_offline_node(int nid)
if (rc)
return;
- if (check_cpu_on_node(pgdat))
+ if (check_cpu_on_node(nid))
return;
/*