summaryrefslogtreecommitdiff
path: root/mm/memcontrol.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2022-10-10 17:53:04 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2022-10-10 17:53:04 -0700
commit27bc50fc90647bbf7b734c3fc306a5e61350da53 (patch)
tree75fc525fbfec8c07a97a7875a89592317bcad4ca /mm/memcontrol.c
parent70442fc54e6889a2a77f0e9554e8188a1557f00e (diff)
parentbbff39cc6cbcb86ccfacb2dcafc79912a9f9df69 (diff)
Merge tag 'mm-stable-2022-10-08' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Pull MM updates from Andrew Morton: - Yu Zhao's Multi-Gen LRU patches are here. They've been under test in linux-next for a couple of months without, to my knowledge, any negative reports (or any positive ones, come to that). - Also the Maple Tree from Liam Howlett. An overlapping range-based tree for vmas. It it apparently slightly more efficient in its own right, but is mainly targeted at enabling work to reduce mmap_lock contention. Liam has identified a number of other tree users in the kernel which could be beneficially onverted to mapletrees. Yu Zhao has identified a hard-to-hit but "easy to fix" lockdep splat at [1]. This has yet to be addressed due to Liam's unfortunately timed vacation. He is now back and we'll get this fixed up. - Dmitry Vyukov introduces KMSAN: the Kernel Memory Sanitizer. It uses clang-generated instrumentation to detect used-unintialized bugs down to the single bit level. KMSAN keeps finding bugs. New ones, as well as the legacy ones. - Yang Shi adds a userspace mechanism (madvise) to induce a collapse of memory into THPs. - Zach O'Keefe has expanded Yang Shi's madvise(MADV_COLLAPSE) to support file/shmem-backed pages. - userfaultfd updates from Axel Rasmussen - zsmalloc cleanups from Alexey Romanov - cleanups from Miaohe Lin: vmscan, hugetlb_cgroup, hugetlb and memory-failure - Huang Ying adds enhancements to NUMA balancing memory tiering mode's page promotion, with a new way of detecting hot pages. - memcg updates from Shakeel Butt: charging optimizations and reduced memory consumption. - memcg cleanups from Kairui Song. - memcg fixes and cleanups from Johannes Weiner. - Vishal Moola provides more folio conversions - Zhang Yi removed ll_rw_block() :( - migration enhancements from Peter Xu - migration error-path bugfixes from Huang Ying - Aneesh Kumar added ability for a device driver to alter the memory tiering promotion paths. For optimizations by PMEM drivers, DRM drivers, etc. - vma merging improvements from Jakub Matěn. - NUMA hinting cleanups from David Hildenbrand. - xu xin added aditional userspace visibility into KSM merging activity. - THP & KSM code consolidation from Qi Zheng. - more folio work from Matthew Wilcox. - KASAN updates from Andrey Konovalov. - DAMON cleanups from Kaixu Xia. - DAMON work from SeongJae Park: fixes, cleanups. - hugetlb sysfs cleanups from Muchun Song. - Mike Kravetz fixes locking issues in hugetlbfs and in hugetlb core. Link: https://lkml.kernel.org/r/CAOUHufZabH85CeUN-MEMgL8gJGzJEWUrkiM58JkTbBhh-jew0Q@mail.gmail.com [1] * tag 'mm-stable-2022-10-08' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm: (555 commits) hugetlb: allocate vma lock for all sharable vmas hugetlb: take hugetlb vma_lock when clearing vma_lock->vma pointer hugetlb: fix vma lock handling during split vma and range unmapping mglru: mm/vmscan.c: fix imprecise comments mm/mglru: don't sync disk for each aging cycle mm: memcontrol: drop dead CONFIG_MEMCG_SWAP config symbol mm: memcontrol: use do_memsw_account() in a few more places mm: memcontrol: deprecate swapaccounting=0 mode mm: memcontrol: don't allocate cgroup swap arrays when memcg is disabled mm/secretmem: remove reduntant return value mm/hugetlb: add available_huge_pages() func mm: remove unused inline functions from include/linux/mm_inline.h selftests/vm: add selftest for MADV_COLLAPSE of uffd-minor memory selftests/vm: add file/shmem MADV_COLLAPSE selftest for cleared pmd selftests/vm: add thp collapse shmem testing selftests/vm: add thp collapse file and tmpfs testing selftests/vm: modularize thp collapse memory operations selftests/vm: dedup THP helpers mm/khugepaged: add tracepoint to hpage_collapse_scan_file() mm/madvise: add file and shmem support to MADV_COLLAPSE ...
Diffstat (limited to 'mm/memcontrol.c')
-rw-r--r--mm/memcontrol.c281
1 files changed, 184 insertions, 97 deletions
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index bac2de4b9c42..2d8549ae1b30 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -88,13 +88,6 @@ static bool cgroup_memory_nosocket __ro_after_init;
/* Kernel memory accounting disabled? */
static bool cgroup_memory_nokmem __ro_after_init;
-/* Whether the swap controller is active */
-#ifdef CONFIG_MEMCG_SWAP
-static bool cgroup_memory_noswap __ro_after_init;
-#else
-#define cgroup_memory_noswap 1
-#endif
-
#ifdef CONFIG_CGROUP_WRITEBACK
static DECLARE_WAIT_QUEUE_HEAD(memcg_cgwb_frn_waitq);
#endif
@@ -102,7 +95,7 @@ static DECLARE_WAIT_QUEUE_HEAD(memcg_cgwb_frn_waitq);
/* Whether legacy memory+swap accounting is active */
static bool do_memsw_account(void)
{
- return !cgroup_subsys_on_dfl(memory_cgrp_subsys) && !cgroup_memory_noswap;
+ return !cgroup_subsys_on_dfl(memory_cgrp_subsys);
}
#define THRESHOLDS_EVENTS_TARGET 128
@@ -662,6 +655,81 @@ static void flush_memcg_stats_dwork(struct work_struct *w)
queue_delayed_work(system_unbound_wq, &stats_flush_dwork, FLUSH_TIME);
}
+/* Subset of vm_event_item to report for memcg event stats */
+static const unsigned int memcg_vm_event_stat[] = {
+ PGPGIN,
+ PGPGOUT,
+ PGSCAN_KSWAPD,
+ PGSCAN_DIRECT,
+ PGSTEAL_KSWAPD,
+ PGSTEAL_DIRECT,
+ PGFAULT,
+ PGMAJFAULT,
+ PGREFILL,
+ PGACTIVATE,
+ PGDEACTIVATE,
+ PGLAZYFREE,
+ PGLAZYFREED,
+#if defined(CONFIG_MEMCG_KMEM) && defined(CONFIG_ZSWAP)
+ ZSWPIN,
+ ZSWPOUT,
+#endif
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+ THP_FAULT_ALLOC,
+ THP_COLLAPSE_ALLOC,
+#endif
+};
+
+#define NR_MEMCG_EVENTS ARRAY_SIZE(memcg_vm_event_stat)
+static int mem_cgroup_events_index[NR_VM_EVENT_ITEMS] __read_mostly;
+
+static void init_memcg_events(void)
+{
+ int i;
+
+ for (i = 0; i < NR_MEMCG_EVENTS; ++i)
+ mem_cgroup_events_index[memcg_vm_event_stat[i]] = i + 1;
+}
+
+static inline int memcg_events_index(enum vm_event_item idx)
+{
+ return mem_cgroup_events_index[idx] - 1;
+}
+
+struct memcg_vmstats_percpu {
+ /* Local (CPU and cgroup) page state & events */
+ long state[MEMCG_NR_STAT];
+ unsigned long events[NR_MEMCG_EVENTS];
+
+ /* Delta calculation for lockless upward propagation */
+ long state_prev[MEMCG_NR_STAT];
+ unsigned long events_prev[NR_MEMCG_EVENTS];
+
+ /* Cgroup1: threshold notifications & softlimit tree updates */
+ unsigned long nr_page_events;
+ unsigned long targets[MEM_CGROUP_NTARGETS];
+};
+
+struct memcg_vmstats {
+ /* Aggregated (CPU and subtree) page state & events */
+ long state[MEMCG_NR_STAT];
+ unsigned long events[NR_MEMCG_EVENTS];
+
+ /* Pending child counts during tree propagation */
+ long state_pending[MEMCG_NR_STAT];
+ unsigned long events_pending[NR_MEMCG_EVENTS];
+};
+
+unsigned long memcg_page_state(struct mem_cgroup *memcg, int idx)
+{
+ long x = READ_ONCE(memcg->vmstats->state[idx]);
+#ifdef CONFIG_SMP
+ if (x < 0)
+ x = 0;
+#endif
+ return x;
+}
+
/**
* __mod_memcg_state - update cgroup memory statistics
* @memcg: the memory cgroup
@@ -809,27 +877,37 @@ void __mod_lruvec_kmem_state(void *p, enum node_stat_item idx, int val)
void __count_memcg_events(struct mem_cgroup *memcg, enum vm_event_item idx,
unsigned long count)
{
- if (mem_cgroup_disabled())
+ int index = memcg_events_index(idx);
+
+ if (mem_cgroup_disabled() || index < 0)
return;
memcg_stats_lock();
- __this_cpu_add(memcg->vmstats_percpu->events[idx], count);
+ __this_cpu_add(memcg->vmstats_percpu->events[index], count);
memcg_rstat_updated(memcg, count);
memcg_stats_unlock();
}
static unsigned long memcg_events(struct mem_cgroup *memcg, int event)
{
- return READ_ONCE(memcg->vmstats.events[event]);
+ int index = memcg_events_index(event);
+
+ if (index < 0)
+ return 0;
+ return READ_ONCE(memcg->vmstats->events[index]);
}
static unsigned long memcg_events_local(struct mem_cgroup *memcg, int event)
{
long x = 0;
int cpu;
+ int index = memcg_events_index(event);
+
+ if (index < 0)
+ return 0;
for_each_possible_cpu(cpu)
- x += per_cpu(memcg->vmstats_percpu->events[event], cpu);
+ x += per_cpu(memcg->vmstats_percpu->events[index], cpu);
return x;
}
@@ -1136,7 +1214,7 @@ static void invalidate_reclaim_iterators(struct mem_cgroup *dead_memcg)
} while ((memcg = parent_mem_cgroup(memcg)));
/*
- * When cgruop1 non-hierarchy mode is used,
+ * When cgroup1 non-hierarchy mode is used,
* parent_mem_cgroup() does not walk all the way up to the
* cgroup root (root_mem_cgroup). So we have to handle
* dead_memcg from cgroup root separately.
@@ -1461,29 +1539,6 @@ static inline unsigned long memcg_page_state_output(struct mem_cgroup *memcg,
return memcg_page_state(memcg, item) * memcg_page_state_unit(item);
}
-/* Subset of vm_event_item to report for memcg event stats */
-static const unsigned int memcg_vm_event_stat[] = {
- PGSCAN_KSWAPD,
- PGSCAN_DIRECT,
- PGSTEAL_KSWAPD,
- PGSTEAL_DIRECT,
- PGFAULT,
- PGMAJFAULT,
- PGREFILL,
- PGACTIVATE,
- PGDEACTIVATE,
- PGLAZYFREE,
- PGLAZYFREED,
-#if defined(CONFIG_MEMCG_KMEM) && defined(CONFIG_ZSWAP)
- ZSWPIN,
- ZSWPOUT,
-#endif
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
- THP_FAULT_ALLOC,
- THP_COLLAPSE_ALLOC,
-#endif
-};
-
static void memory_stat_format(struct mem_cgroup *memcg, char *buf, int bufsize)
{
struct seq_buf s;
@@ -1524,10 +1579,15 @@ static void memory_stat_format(struct mem_cgroup *memcg, char *buf, int bufsize)
memcg_events(memcg, PGSTEAL_KSWAPD) +
memcg_events(memcg, PGSTEAL_DIRECT));
- for (i = 0; i < ARRAY_SIZE(memcg_vm_event_stat); i++)
+ for (i = 0; i < ARRAY_SIZE(memcg_vm_event_stat); i++) {
+ if (memcg_vm_event_stat[i] == PGPGIN ||
+ memcg_vm_event_stat[i] == PGPGOUT)
+ continue;
+
seq_buf_printf(&s, "%s %lu\n",
vm_event_name(memcg_vm_event_stat[i]),
memcg_events(memcg, memcg_vm_event_stat[i]));
+ }
/* The above should easily fit into one page */
WARN_ON_ONCE(seq_buf_has_overflowed(&s));
@@ -1601,17 +1661,17 @@ unsigned long mem_cgroup_get_max(struct mem_cgroup *memcg)
{
unsigned long max = READ_ONCE(memcg->memory.max);
- if (cgroup_subsys_on_dfl(memory_cgrp_subsys)) {
- if (mem_cgroup_swappiness(memcg))
- max += min(READ_ONCE(memcg->swap.max),
- (unsigned long)total_swap_pages);
- } else { /* v1 */
+ if (do_memsw_account()) {
if (mem_cgroup_swappiness(memcg)) {
/* Calculate swap excess capacity from memsw limit */
unsigned long swap = READ_ONCE(memcg->memsw.max) - max;
max += min(swap, (unsigned long)total_swap_pages);
}
+ } else {
+ if (mem_cgroup_swappiness(memcg))
+ max += min(READ_ONCE(memcg->swap.max),
+ (unsigned long)total_swap_pages);
}
return max;
}
@@ -2783,6 +2843,7 @@ static void commit_charge(struct folio *folio, struct mem_cgroup *memcg)
* - LRU isolation
* - lock_page_memcg()
* - exclusive reference
+ * - mem_cgroup_trylock_pages()
*/
folio->memcg_data = (unsigned long)memcg;
}
@@ -3356,7 +3417,7 @@ void split_page_memcg(struct page *head, unsigned int nr)
css_get_many(&memcg->css, nr - 1);
}
-#ifdef CONFIG_MEMCG_SWAP
+#ifdef CONFIG_SWAP
/**
* mem_cgroup_move_swap_account - move swap charge and swap_cgroup's record.
* @entry: swap entry to be moved
@@ -3969,6 +4030,8 @@ static const unsigned int memcg1_stats[] = {
NR_FILE_MAPPED,
NR_FILE_DIRTY,
NR_WRITEBACK,
+ WORKINGSET_REFAULT_ANON,
+ WORKINGSET_REFAULT_FILE,
MEMCG_SWAP,
};
@@ -3982,6 +4045,8 @@ static const char *const memcg1_stat_names[] = {
"mapped_file",
"dirty",
"writeback",
+ "workingset_refault_anon",
+ "workingset_refault_file",
"swap",
};
@@ -4010,7 +4075,8 @@ static int memcg_stat_show(struct seq_file *m, void *v)
if (memcg1_stats[i] == MEMCG_SWAP && !do_memsw_account())
continue;
nr = memcg_page_state_local(memcg, memcg1_stats[i]);
- seq_printf(m, "%s %lu\n", memcg1_stat_names[i], nr * PAGE_SIZE);
+ seq_printf(m, "%s %lu\n", memcg1_stat_names[i],
+ nr * memcg_page_state_unit(memcg1_stats[i]));
}
for (i = 0; i < ARRAY_SIZE(memcg1_events); i++)
@@ -4041,7 +4107,7 @@ static int memcg_stat_show(struct seq_file *m, void *v)
continue;
nr = memcg_page_state(memcg, memcg1_stats[i]);
seq_printf(m, "total_%s %llu\n", memcg1_stat_names[i],
- (u64)nr * PAGE_SIZE);
+ (u64)nr * memcg_page_state_unit(memcg1_stats[i]));
}
for (i = 0; i < ARRAY_SIZE(memcg1_events); i++)
@@ -5158,12 +5224,14 @@ static void __mem_cgroup_free(struct mem_cgroup *memcg)
for_each_node(node)
free_mem_cgroup_per_node_info(memcg, node);
+ kfree(memcg->vmstats);
free_percpu(memcg->vmstats_percpu);
kfree(memcg);
}
static void mem_cgroup_free(struct mem_cgroup *memcg)
{
+ lru_gen_exit_memcg(memcg);
memcg_wb_domain_exit(memcg);
__mem_cgroup_free(memcg);
}
@@ -5186,6 +5254,10 @@ static struct mem_cgroup *mem_cgroup_alloc(void)
goto fail;
}
+ memcg->vmstats = kzalloc(sizeof(struct memcg_vmstats), GFP_KERNEL);
+ if (!memcg->vmstats)
+ goto fail;
+
memcg->vmstats_percpu = alloc_percpu_gfp(struct memcg_vmstats_percpu,
GFP_KERNEL_ACCOUNT);
if (!memcg->vmstats_percpu)
@@ -5222,6 +5294,7 @@ static struct mem_cgroup *mem_cgroup_alloc(void)
memcg->deferred_split_queue.split_queue_len = 0;
#endif
idr_replace(&mem_cgroup_idr, memcg, memcg->id.id);
+ lru_gen_init_memcg(memcg);
return memcg;
fail:
mem_cgroup_id_remove(memcg);
@@ -5256,6 +5329,7 @@ mem_cgroup_css_alloc(struct cgroup_subsys_state *parent_css)
page_counter_init(&memcg->kmem, &parent->kmem);
page_counter_init(&memcg->tcpmem, &parent->tcpmem);
} else {
+ init_memcg_events();
page_counter_init(&memcg->memory, NULL);
page_counter_init(&memcg->swap, NULL);
page_counter_init(&memcg->kmem, NULL);
@@ -5404,9 +5478,9 @@ static void mem_cgroup_css_rstat_flush(struct cgroup_subsys_state *css, int cpu)
* below us. We're in a per-cpu loop here and this is
* a global counter, so the first cycle will get them.
*/
- delta = memcg->vmstats.state_pending[i];
+ delta = memcg->vmstats->state_pending[i];
if (delta)
- memcg->vmstats.state_pending[i] = 0;
+ memcg->vmstats->state_pending[i] = 0;
/* Add CPU changes on this level since the last flush */
v = READ_ONCE(statc->state[i]);
@@ -5419,15 +5493,15 @@ static void mem_cgroup_css_rstat_flush(struct cgroup_subsys_state *css, int cpu)
continue;
/* Aggregate counts on this level and propagate upwards */
- memcg->vmstats.state[i] += delta;
+ memcg->vmstats->state[i] += delta;
if (parent)
- parent->vmstats.state_pending[i] += delta;
+ parent->vmstats->state_pending[i] += delta;
}
- for (i = 0; i < NR_VM_EVENT_ITEMS; i++) {
- delta = memcg->vmstats.events_pending[i];
+ for (i = 0; i < NR_MEMCG_EVENTS; i++) {
+ delta = memcg->vmstats->events_pending[i];
if (delta)
- memcg->vmstats.events_pending[i] = 0;
+ memcg->vmstats->events_pending[i] = 0;
v = READ_ONCE(statc->events[i]);
if (v != statc->events_prev[i]) {
@@ -5438,9 +5512,9 @@ static void mem_cgroup_css_rstat_flush(struct cgroup_subsys_state *css, int cpu)
if (!delta)
continue;
- memcg->vmstats.events[i] += delta;
+ memcg->vmstats->events[i] += delta;
if (parent)
- parent->vmstats.events_pending[i] += delta;
+ parent->vmstats->events_pending[i] += delta;
}
for_each_node_state(nid, N_MEMORY) {
@@ -5555,7 +5629,7 @@ static struct page *mc_handle_swap_pte(struct vm_area_struct *vma,
return NULL;
/*
- * Because lookup_swap_cache() updates some statistics counter,
+ * Because swap_cache_get_folio() updates some statistics counter,
* we call find_get_page() with swapper_space directly.
*/
page = find_get_page(swap_address_space(ent), swp_offset(ent));
@@ -5865,7 +5939,7 @@ static unsigned long mem_cgroup_count_precharge(struct mm_struct *mm)
unsigned long precharge;
mmap_read_lock(mm);
- walk_page_range(mm, 0, mm->highest_vm_end, &precharge_walk_ops, NULL);
+ walk_page_range(mm, 0, ULONG_MAX, &precharge_walk_ops, NULL);
mmap_read_unlock(mm);
precharge = mc.precharge;
@@ -6163,9 +6237,7 @@ retry:
* When we have consumed all precharges and failed in doing
* additional charge, the page walk just aborts.
*/
- walk_page_range(mc.mm, 0, mc.mm->highest_vm_end, &charge_walk_ops,
- NULL);
-
+ walk_page_range(mc.mm, 0, ULONG_MAX, &charge_walk_ops, NULL);
mmap_read_unlock(mc.mm);
atomic_dec(&mc.from->moving_account);
}
@@ -6190,6 +6262,30 @@ static void mem_cgroup_move_task(void)
}
#endif
+#ifdef CONFIG_LRU_GEN
+static void mem_cgroup_attach(struct cgroup_taskset *tset)
+{
+ struct task_struct *task;
+ struct cgroup_subsys_state *css;
+
+ /* find the first leader if there is any */
+ cgroup_taskset_for_each_leader(task, css, tset)
+ break;
+
+ if (!task)
+ return;
+
+ task_lock(task);
+ if (task->mm && READ_ONCE(task->mm->owner) == task)
+ lru_gen_migrate_mm(task->mm);
+ task_unlock(task);
+}
+#else
+static void mem_cgroup_attach(struct cgroup_taskset *tset)
+{
+}
+#endif /* CONFIG_LRU_GEN */
+
static int seq_puts_memcg_tunable(struct seq_file *m, unsigned long value)
{
if (value == PAGE_COUNTER_MAX)
@@ -6595,6 +6691,7 @@ struct cgroup_subsys memory_cgrp_subsys = {
.css_reset = mem_cgroup_css_reset,
.css_rstat_flush = mem_cgroup_css_rstat_flush,
.can_attach = mem_cgroup_can_attach,
+ .attach = mem_cgroup_attach,
.cancel_attach = mem_cgroup_cancel_attach,
.post_attach = mem_cgroup_move_task,
.dfl_cftypes = memory_files,
@@ -6807,21 +6904,20 @@ int __mem_cgroup_charge(struct folio *folio, struct mm_struct *mm, gfp_t gfp)
}
/**
- * mem_cgroup_swapin_charge_page - charge a newly allocated page for swapin
- * @page: page to charge
+ * mem_cgroup_swapin_charge_folio - Charge a newly allocated folio for swapin.
+ * @folio: folio to charge.
* @mm: mm context of the victim
* @gfp: reclaim mode
- * @entry: swap entry for which the page is allocated
+ * @entry: swap entry for which the folio is allocated
*
- * This function charges a page allocated for swapin. Please call this before
- * adding the page to the swapcache.
+ * This function charges a folio allocated for swapin. Please call this before
+ * adding the folio to the swapcache.
*
* Returns 0 on success. Otherwise, an error code is returned.
*/
-int mem_cgroup_swapin_charge_page(struct page *page, struct mm_struct *mm,
+int mem_cgroup_swapin_charge_folio(struct folio *folio, struct mm_struct *mm,
gfp_t gfp, swp_entry_t entry)
{
- struct folio *folio = page_folio(page);
struct mem_cgroup *memcg;
unsigned short id;
int ret;
@@ -7194,7 +7290,7 @@ static int __init mem_cgroup_init(void)
}
subsys_initcall(mem_cgroup_init);
-#ifdef CONFIG_MEMCG_SWAP
+#ifdef CONFIG_SWAP
static struct mem_cgroup *mem_cgroup_id_get_online(struct mem_cgroup *memcg)
{
while (!refcount_inc_not_zero(&memcg->id.ref)) {
@@ -7232,7 +7328,7 @@ void mem_cgroup_swapout(struct folio *folio, swp_entry_t entry)
if (mem_cgroup_disabled())
return;
- if (cgroup_subsys_on_dfl(memory_cgrp_subsys))
+ if (!do_memsw_account())
return;
memcg = folio_memcg(folio);
@@ -7261,7 +7357,7 @@ void mem_cgroup_swapout(struct folio *folio, swp_entry_t entry)
if (!mem_cgroup_is_root(memcg))
page_counter_uncharge(&memcg->memory, nr_entries);
- if (!cgroup_memory_noswap && memcg != swap_memcg) {
+ if (memcg != swap_memcg) {
if (!mem_cgroup_is_root(swap_memcg))
page_counter_charge(&swap_memcg->memsw, nr_entries);
page_counter_uncharge(&memcg->memsw, nr_entries);
@@ -7297,7 +7393,7 @@ int __mem_cgroup_try_charge_swap(struct folio *folio, swp_entry_t entry)
struct mem_cgroup *memcg;
unsigned short oldid;
- if (!cgroup_subsys_on_dfl(memory_cgrp_subsys))
+ if (do_memsw_account())
return 0;
memcg = folio_memcg(folio);
@@ -7313,7 +7409,7 @@ int __mem_cgroup_try_charge_swap(struct folio *folio, swp_entry_t entry)
memcg = mem_cgroup_id_get_online(memcg);
- if (!cgroup_memory_noswap && !mem_cgroup_is_root(memcg) &&
+ if (!mem_cgroup_is_root(memcg) &&
!page_counter_try_charge(&memcg->swap, nr_pages, &counter)) {
memcg_memory_event(memcg, MEMCG_SWAP_MAX);
memcg_memory_event(memcg, MEMCG_SWAP_FAIL);
@@ -7341,15 +7437,18 @@ void __mem_cgroup_uncharge_swap(swp_entry_t entry, unsigned int nr_pages)
struct mem_cgroup *memcg;
unsigned short id;
+ if (mem_cgroup_disabled())
+ return;
+
id = swap_cgroup_record(entry, 0, nr_pages);
rcu_read_lock();
memcg = mem_cgroup_from_id(id);
if (memcg) {
- if (!cgroup_memory_noswap && !mem_cgroup_is_root(memcg)) {
- if (cgroup_subsys_on_dfl(memory_cgrp_subsys))
- page_counter_uncharge(&memcg->swap, nr_pages);
- else
+ if (!mem_cgroup_is_root(memcg)) {
+ if (do_memsw_account())
page_counter_uncharge(&memcg->memsw, nr_pages);
+ else
+ page_counter_uncharge(&memcg->swap, nr_pages);
}
mod_memcg_state(memcg, MEMCG_SWAP, -nr_pages);
mem_cgroup_id_put_many(memcg, nr_pages);
@@ -7361,7 +7460,7 @@ long mem_cgroup_get_nr_swap_pages(struct mem_cgroup *memcg)
{
long nr_swap_pages = get_nr_swap_pages();
- if (cgroup_memory_noswap || !cgroup_subsys_on_dfl(memory_cgrp_subsys))
+ if (mem_cgroup_disabled() || do_memsw_account())
return nr_swap_pages;
for (; memcg != root_mem_cgroup; memcg = parent_mem_cgroup(memcg))
nr_swap_pages = min_t(long, nr_swap_pages,
@@ -7370,18 +7469,18 @@ long mem_cgroup_get_nr_swap_pages(struct mem_cgroup *memcg)
return nr_swap_pages;
}
-bool mem_cgroup_swap_full(struct page *page)
+bool mem_cgroup_swap_full(struct folio *folio)
{
struct mem_cgroup *memcg;
- VM_BUG_ON_PAGE(!PageLocked(page), page);
+ VM_BUG_ON_FOLIO(!folio_test_locked(folio), folio);
if (vm_swap_full())
return true;
- if (cgroup_memory_noswap || !cgroup_subsys_on_dfl(memory_cgrp_subsys))
+ if (do_memsw_account())
return false;
- memcg = page_memcg(page);
+ memcg = folio_memcg(folio);
if (!memcg)
return false;
@@ -7398,10 +7497,9 @@ bool mem_cgroup_swap_full(struct page *page)
static int __init setup_swap_account(char *s)
{
- if (!strcmp(s, "1"))
- cgroup_memory_noswap = false;
- else if (!strcmp(s, "0"))
- cgroup_memory_noswap = true;
+ pr_warn_once("The swapaccount= commandline option is deprecated. "
+ "Please report your usecase to linux-mm@kvack.org if you "
+ "depend on this functionality.\n");
return 1;
}
__setup("swapaccount=", setup_swap_account);
@@ -7670,20 +7768,9 @@ static struct cftype zswap_files[] = {
};
#endif /* CONFIG_MEMCG_KMEM && CONFIG_ZSWAP */
-/*
- * If mem_cgroup_swap_init() is implemented as a subsys_initcall()
- * instead of a core_initcall(), this could mean cgroup_memory_noswap still
- * remains set to false even when memcg is disabled via "cgroup_disable=memory"
- * boot parameter. This may result in premature OOPS inside
- * mem_cgroup_get_nr_swap_pages() function in corner cases.
- */
static int __init mem_cgroup_swap_init(void)
{
- /* No memory control -> no swap control */
if (mem_cgroup_disabled())
- cgroup_memory_noswap = true;
-
- if (cgroup_memory_noswap)
return 0;
WARN_ON(cgroup_add_dfl_cftypes(&memory_cgrp_subsys, swap_files));
@@ -7693,6 +7780,6 @@ static int __init mem_cgroup_swap_init(void)
#endif
return 0;
}
-core_initcall(mem_cgroup_swap_init);
+subsys_initcall(mem_cgroup_swap_init);
-#endif /* CONFIG_MEMCG_SWAP */
+#endif /* CONFIG_SWAP */