diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2022-05-27 11:40:49 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2022-05-27 11:40:49 -0700 |
commit | 8291eaafed36f575f23951f3ce18407f480e9ecf (patch) | |
tree | 279b61422ba2df7b8579af8ccc81331de80affa8 /tools | |
parent | 77fb622de1393b1d54f24f4f7ed98f84feeda502 (diff) | |
parent | fa020a2b87d24016723fff4a4237deb612478a32 (diff) |
Merge tag 'mm-stable-2022-05-27' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Pull more MM updates from Andrew Morton:
- Two follow-on fixes for the post-5.19 series "Use pageblock_order for
cma and alloc_contig_range alignment", from Zi Yan.
- A series of z3fold cleanups and fixes from Miaohe Lin.
- Some memcg selftests work from Michal Koutný <mkoutny@suse.com>
- Some swap fixes and cleanups from Miaohe Lin
- Several individual minor fixups
* tag 'mm-stable-2022-05-27' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm: (25 commits)
mm/shmem.c: suppress shift warning
mm: Kconfig: reorganize misplaced mm options
mm: kasan: fix input of vmalloc_to_page()
mm: fix is_pinnable_page against a cma page
mm: filter out swapin error entry in shmem mapping
mm/shmem: fix infinite loop when swap in shmem error at swapoff time
mm/madvise: free hwpoison and swapin error entry in madvise_free_pte_range
mm/swapfile: fix lost swap bits in unuse_pte()
mm/swapfile: unuse_pte can map random data if swap read fails
selftests: memcg: factor out common parts of memory.{low,min} tests
selftests: memcg: remove protection from top level memcg
selftests: memcg: adjust expected reclaim values of protected cgroups
selftests: memcg: expect no low events in unprotected sibling
selftests: memcg: fix compilation
mm/z3fold: fix z3fold_page_migrate races with z3fold_map
mm/z3fold: fix z3fold_reclaim_page races with z3fold_free
mm/z3fold: always clear PAGE_CLAIMED under z3fold page lock
mm/z3fold: put z3fold page back into unbuddied list when reclaim or migration fails
revert "mm/z3fold.c: allow __GFP_HIGHMEM in z3fold_alloc"
mm/z3fold: throw warning on failure of trylock_page in z3fold_alloc
...
Diffstat (limited to 'tools')
-rw-r--r-- | tools/testing/selftests/cgroup/memcg_protection.m | 89 | ||||
-rw-r--r-- | tools/testing/selftests/cgroup/test_memcontrol.c | 247 |
2 files changed, 151 insertions, 185 deletions
diff --git a/tools/testing/selftests/cgroup/memcg_protection.m b/tools/testing/selftests/cgroup/memcg_protection.m new file mode 100644 index 000000000000..051daa3477b6 --- /dev/null +++ b/tools/testing/selftests/cgroup/memcg_protection.m @@ -0,0 +1,89 @@ +% SPDX-License-Identifier: GPL-2.0 +% +% run as: octave-cli memcg_protection.m +% +% This script simulates reclaim protection behavior on a single level of memcg +% hierarchy to illustrate how overcommitted protection spreads among siblings +% (as it depends also on their current consumption). +% +% Simulation assumes siblings consumed the initial amount of memory (w/out +% reclaim) and then the reclaim starts, all memory is reclaimable, i.e. treated +% same. It simulates only non-low reclaim and assumes all memory.min = 0. +% +% Input configurations +% -------------------- +% E number parent effective protection +% n vector nominal protection of siblings set at the given level (memory.low) +% c vector current consumption -,,- (memory.current) + +% example from testcase (values in GB) +E = 50 / 1024; +n = [75 25 0 500 ] / 1024; +c = [50 50 50 0] / 1024; + +% Reclaim parameters +% ------------------ + +% Minimal reclaim amount (GB) +cluster = 32*4 / 2**20; + +% Reclaim coefficient (think as 0.5^sc->priority) +alpha = .1 + +% Simulation parameters +% --------------------- +epsilon = 1e-7; +timeout = 1000; + +% Simulation loop +% --------------- + +ch = []; +eh = []; +rh = []; + +for t = 1:timeout + % low_usage + u = min(c, n); + siblings = sum(u); + + % effective_protection() + protected = min(n, c); % start with nominal + e = protected * min(1, E / siblings); % normalize overcommit + + % recursive protection + unclaimed = max(0, E - siblings); + parent_overuse = sum(c) - siblings; + if (unclaimed > 0 && parent_overuse > 0) + overuse = max(0, c - protected); + e += unclaimed * (overuse / parent_overuse); + endif + + % get_scan_count() + r = alpha * c; % assume all memory is in a single LRU list + + % commit 1bc63fb1272b ("mm, memcg: make scan aggression always exclude protection") + sz = max(e, c); + r .*= (1 - (e+epsilon) ./ (sz+epsilon)); + + % uncomment to debug prints + % e, c, r + + % nothing to reclaim, reached equilibrium + if max(r) < epsilon + break; + endif + + % SWAP_CLUSTER_MAX roundup + r = max(r, (r > epsilon) .* cluster); + % XXX here I do parallel reclaim of all siblings + % in reality reclaim is serialized and each sibling recalculates own residual + c = max(c - r, 0); + + ch = [ch ; c]; + eh = [eh ; e]; + rh = [rh ; r]; +endfor + +t +c, e diff --git a/tools/testing/selftests/cgroup/test_memcontrol.c b/tools/testing/selftests/cgroup/test_memcontrol.c index 44a974ec472c..8833359556f3 100644 --- a/tools/testing/selftests/cgroup/test_memcontrol.c +++ b/tools/testing/selftests/cgroup/test_memcontrol.c @@ -190,13 +190,6 @@ cleanup: return ret; } -static int alloc_pagecache_50M(const char *cgroup, void *arg) -{ - int fd = (long)arg; - - return alloc_pagecache(fd, MB(50)); -} - static int alloc_pagecache_50M_noexit(const char *cgroup, void *arg) { int fd = (long)arg; @@ -247,33 +240,39 @@ static int cg_test_proc_killed(const char *cgroup) /* * First, this test creates the following hierarchy: - * A memory.min = 50M, memory.max = 200M - * A/B memory.min = 50M, memory.current = 50M + * A memory.min = 0, memory.max = 200M + * A/B memory.min = 50M * A/B/C memory.min = 75M, memory.current = 50M * A/B/D memory.min = 25M, memory.current = 50M * A/B/E memory.min = 0, memory.current = 50M * A/B/F memory.min = 500M, memory.current = 0 * - * Usages are pagecache, but the test keeps a running + * (or memory.low if we test soft protection) + * + * Usages are pagecache and the test keeps a running * process in every leaf cgroup. * Then it creates A/G and creates a significant - * memory pressure in it. + * memory pressure in A. * + * Then it checks actual memory usages and expects that: * A/B memory.current ~= 50M - * A/B/C memory.current ~= 33M - * A/B/D memory.current ~= 17M - * A/B/F memory.current ~= 0 + * A/B/C memory.current ~= 29M + * A/B/D memory.current ~= 21M + * A/B/E memory.current ~= 0 + * A/B/F memory.current = 0 + * (for origin of the numbers, see model in memcg_protection.m.) * * After that it tries to allocate more than there is - * unprotected memory in A available, and checks - * checks that memory.min protects pagecache even - * in this case. + * unprotected memory in A available, and checks that: + * a) memory.min protects pagecache even in this case, + * b) memory.low allows reclaiming page cache with low events. */ -static int test_memcg_min(const char *root) +static int test_memcg_protection(const char *root, bool min) { - int ret = KSFT_FAIL; + int ret = KSFT_FAIL, rc; char *parent[3] = {NULL}; char *children[4] = {NULL}; + const char *attribute = min ? "memory.min" : "memory.low"; long c[4]; int i, attempts; int fd; @@ -297,8 +296,10 @@ static int test_memcg_min(const char *root) if (cg_create(parent[0])) goto cleanup; - if (cg_read_long(parent[0], "memory.min")) { - ret = KSFT_SKIP; + if (cg_read_long(parent[0], attribute)) { + /* No memory.min on older kernels is fine */ + if (min) + ret = KSFT_SKIP; goto cleanup; } @@ -335,17 +336,15 @@ static int test_memcg_min(const char *root) (void *)(long)fd); } - if (cg_write(parent[0], "memory.min", "50M")) + if (cg_write(parent[1], attribute, "50M")) goto cleanup; - if (cg_write(parent[1], "memory.min", "50M")) + if (cg_write(children[0], attribute, "75M")) goto cleanup; - if (cg_write(children[0], "memory.min", "75M")) + if (cg_write(children[1], attribute, "25M")) goto cleanup; - if (cg_write(children[1], "memory.min", "25M")) + if (cg_write(children[2], attribute, "0")) goto cleanup; - if (cg_write(children[2], "memory.min", "0")) - goto cleanup; - if (cg_write(children[3], "memory.min", "500M")) + if (cg_write(children[3], attribute, "500M")) goto cleanup; attempts = 0; @@ -365,170 +364,35 @@ static int test_memcg_min(const char *root) for (i = 0; i < ARRAY_SIZE(children); i++) c[i] = cg_read_long(children[i], "memory.current"); - if (!values_close(c[0], MB(33), 10)) + if (!values_close(c[0], MB(29), 10)) goto cleanup; - if (!values_close(c[1], MB(17), 10)) + if (!values_close(c[1], MB(21), 10)) goto cleanup; if (c[3] != 0) goto cleanup; - if (!cg_run(parent[2], alloc_anon, (void *)MB(170))) - goto cleanup; - - if (!values_close(cg_read_long(parent[1], "memory.current"), MB(50), 3)) - goto cleanup; - - ret = KSFT_PASS; - -cleanup: - for (i = ARRAY_SIZE(children) - 1; i >= 0; i--) { - if (!children[i]) - continue; - - cg_destroy(children[i]); - free(children[i]); - } - - for (i = ARRAY_SIZE(parent) - 1; i >= 0; i--) { - if (!parent[i]) - continue; - - cg_destroy(parent[i]); - free(parent[i]); - } - close(fd); - return ret; -} - -/* - * First, this test creates the following hierarchy: - * A memory.low = 50M, memory.max = 200M - * A/B memory.low = 50M, memory.current = 50M - * A/B/C memory.low = 75M, memory.current = 50M - * A/B/D memory.low = 25M, memory.current = 50M - * A/B/E memory.low = 0, memory.current = 50M - * A/B/F memory.low = 500M, memory.current = 0 - * - * Usages are pagecache. - * Then it creates A/G an creates a significant - * memory pressure in it. - * - * Then it checks actual memory usages and expects that: - * A/B memory.current ~= 50M - * A/B/ memory.current ~= 33M - * A/B/D memory.current ~= 17M - * A/B/F memory.current ~= 0 - * - * After that it tries to allocate more than there is - * unprotected memory in A available, - * and checks low and oom events in memory.events. - */ -static int test_memcg_low(const char *root) -{ - int ret = KSFT_FAIL; - char *parent[3] = {NULL}; - char *children[4] = {NULL}; - long low, oom; - long c[4]; - int i; - int fd; - - fd = get_temp_fd(); - if (fd < 0) - goto cleanup; - - parent[0] = cg_name(root, "memcg_test_0"); - if (!parent[0]) - goto cleanup; - - parent[1] = cg_name(parent[0], "memcg_test_1"); - if (!parent[1]) - goto cleanup; - - parent[2] = cg_name(parent[0], "memcg_test_2"); - if (!parent[2]) - goto cleanup; - - if (cg_create(parent[0])) - goto cleanup; - - if (cg_read_long(parent[0], "memory.low")) - goto cleanup; - - if (cg_write(parent[0], "cgroup.subtree_control", "+memory")) - goto cleanup; - - if (cg_write(parent[0], "memory.max", "200M")) - goto cleanup; - - if (cg_write(parent[0], "memory.swap.max", "0")) - goto cleanup; - - if (cg_create(parent[1])) - goto cleanup; - - if (cg_write(parent[1], "cgroup.subtree_control", "+memory")) + rc = cg_run(parent[2], alloc_anon, (void *)MB(170)); + if (min && !rc) goto cleanup; - - if (cg_create(parent[2])) + else if (!min && rc) { + fprintf(stderr, + "memory.low prevents from allocating anon memory\n"); goto cleanup; - - for (i = 0; i < ARRAY_SIZE(children); i++) { - children[i] = cg_name_indexed(parent[1], "child_memcg", i); - if (!children[i]) - goto cleanup; - - if (cg_create(children[i])) - goto cleanup; - - if (i > 2) - continue; - - if (cg_run(children[i], alloc_pagecache_50M, (void *)(long)fd)) - goto cleanup; } - if (cg_write(parent[0], "memory.low", "50M")) - goto cleanup; - if (cg_write(parent[1], "memory.low", "50M")) - goto cleanup; - if (cg_write(children[0], "memory.low", "75M")) - goto cleanup; - if (cg_write(children[1], "memory.low", "25M")) - goto cleanup; - if (cg_write(children[2], "memory.low", "0")) - goto cleanup; - if (cg_write(children[3], "memory.low", "500M")) - goto cleanup; - - if (cg_run(parent[2], alloc_anon, (void *)MB(148))) - goto cleanup; - if (!values_close(cg_read_long(parent[1], "memory.current"), MB(50), 3)) goto cleanup; - for (i = 0; i < ARRAY_SIZE(children); i++) - c[i] = cg_read_long(children[i], "memory.current"); - - if (!values_close(c[0], MB(33), 10)) - goto cleanup; - - if (!values_close(c[1], MB(17), 10)) - goto cleanup; - - if (c[3] != 0) - goto cleanup; - - if (cg_run(parent[2], alloc_anon, (void *)MB(166))) { - fprintf(stderr, - "memory.low prevents from allocating anon memory\n"); + if (min) { + ret = KSFT_PASS; goto cleanup; } for (i = 0; i < ARRAY_SIZE(children); i++) { - int no_low_events_index = has_recursiveprot ? 2 : 1; + int no_low_events_index = 1; + long low, oom; oom = cg_read_key_long(children[i], "memory.events", "oom "); low = cg_read_key_long(children[i], "memory.events", "low "); @@ -564,6 +428,16 @@ cleanup: return ret; } +static int test_memcg_min(const char *root) +{ + return test_memcg_protection(root, true); +} + +static int test_memcg_low(const char *root) +{ + return test_memcg_protection(root, false); +} + static int alloc_pagecache_max_30M(const char *cgroup, void *arg) { size_t size = MB(50); @@ -1241,7 +1115,16 @@ static int test_memcg_oom_group_leaf_events(const char *root) if (cg_read_key_long(child, "memory.events", "oom_kill ") <= 0) goto cleanup; - if (cg_read_key_long(parent, "memory.events", "oom_kill ") <= 0) + parent_oom_events = cg_read_key_long( + parent, "memory.events", "oom_kill "); + /* + * If memory_localevents is not enabled (the default), the parent should + * count OOM events in its children groups. Otherwise, it should not + * have observed any events. + */ + if (has_localevents && parent_oom_events != 0) + goto cleanup; + else if (!has_localevents && parent_oom_events <= 0) goto cleanup; ret = KSFT_PASS; @@ -1349,20 +1232,14 @@ static int test_memcg_oom_group_score_events(const char *root) if (!cg_run(memcg, alloc_anon, (void *)MB(100))) goto cleanup; - parent_oom_events = cg_read_key_long( - parent, "memory.events", "oom_kill "); - /* - * If memory_localevents is not enabled (the default), the parent should - * count OOM events in its children groups. Otherwise, it should not - * have observed any events. - */ - if ((has_localevents && parent_oom_events == 0) || - parent_oom_events > 0) - ret = KSFT_PASS; + if (cg_read_key_long(memcg, "memory.events", "oom_kill ") != 3) + goto cleanup; if (kill(safe_pid, SIGKILL)) goto cleanup; + ret = KSFT_PASS; + cleanup: if (memcg) cg_destroy(memcg); |