diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2022-04-22 10:10:43 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2022-04-22 10:10:43 -0700 |
commit | 281b9d9a4b02229b602a14f7540206b0fbe4134f (patch) | |
tree | dd5c1aee3a7effe79c1bcc2fd035500d38b6d373 /mm/memory-failure.c | |
parent | 3b8000ae185cb068adbda5f966a3835053c85fd4 (diff) | |
parent | 319561669a59d8e9206ab311ae5433ef92fd79d1 (diff) |
Merge branch 'akpm' (patches from Andrew)
Merge misc fixes from Andrew Morton:
"13 patches.
Subsystems affected by this patch series: mm (memory-failure, memcg,
userfaultfd, hugetlbfs, mremap, oom-kill, kasan, hmm), and kcov"
* emailed patches from Andrew Morton <akpm@linux-foundation.org>:
mm/mmu_notifier.c: fix race in mmu_interval_notifier_remove()
kcov: don't generate a warning on vm_insert_page()'s failure
MAINTAINERS: add Vincenzo Frascino to KASAN reviewers
oom_kill.c: futex: delay the OOM reaper to allow time for proper futex cleanup
selftest/vm: add skip support to mremap_test
selftest/vm: support xfail in mremap_test
selftest/vm: verify remap destination address in mremap_test
selftest/vm: verify mmap addr in mremap_test
mm, hugetlb: allow for "high" userspace addresses
userfaultfd: mark uffd_wp regardless of VM_WRITE flag
memcg: sync flush only if periodic flush is delayed
mm/memory-failure.c: skip huge_zero_page in memory_failure()
mm/hwpoison: fix race between hugetlb free/demotion and memory_failure_hugetlb()
Diffstat (limited to 'mm/memory-failure.c')
-rw-r--r-- | mm/memory-failure.c | 158 |
1 files changed, 116 insertions, 42 deletions
diff --git a/mm/memory-failure.c b/mm/memory-failure.c index dcb6bb9cf731..27760c19bad7 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -1498,50 +1498,113 @@ static int try_to_split_thp_page(struct page *page, const char *msg) return 0; } -static int memory_failure_hugetlb(unsigned long pfn, int flags) +/* + * Called from hugetlb code with hugetlb_lock held. + * + * Return values: + * 0 - free hugepage + * 1 - in-use hugepage + * 2 - not a hugepage + * -EBUSY - the hugepage is busy (try to retry) + * -EHWPOISON - the hugepage is already hwpoisoned + */ +int __get_huge_page_for_hwpoison(unsigned long pfn, int flags) +{ + struct page *page = pfn_to_page(pfn); + struct page *head = compound_head(page); + int ret = 2; /* fallback to normal page handling */ + bool count_increased = false; + + if (!PageHeadHuge(head)) + goto out; + + if (flags & MF_COUNT_INCREASED) { + ret = 1; + count_increased = true; + } else if (HPageFreed(head) || HPageMigratable(head)) { + ret = get_page_unless_zero(head); + if (ret) + count_increased = true; + } else { + ret = -EBUSY; + goto out; + } + + if (TestSetPageHWPoison(head)) { + ret = -EHWPOISON; + goto out; + } + + return ret; +out: + if (count_increased) + put_page(head); + return ret; +} + +#ifdef CONFIG_HUGETLB_PAGE +/* + * Taking refcount of hugetlb pages needs extra care about race conditions + * with basic operations like hugepage allocation/free/demotion. + * So some of prechecks for hwpoison (pinning, and testing/setting + * PageHWPoison) should be done in single hugetlb_lock range. + */ +static int try_memory_failure_hugetlb(unsigned long pfn, int flags, int *hugetlb) { - struct page *p = pfn_to_page(pfn); - struct page *head = compound_head(p); int res; + struct page *p = pfn_to_page(pfn); + struct page *head; unsigned long page_flags; + bool retry = true; - if (TestSetPageHWPoison(head)) { - pr_err("Memory failure: %#lx: already hardware poisoned\n", - pfn); - res = -EHWPOISON; - if (flags & MF_ACTION_REQUIRED) + *hugetlb = 1; +retry: + res = get_huge_page_for_hwpoison(pfn, flags); + if (res == 2) { /* fallback to normal page handling */ + *hugetlb = 0; + return 0; + } else if (res == -EHWPOISON) { + pr_err("Memory failure: %#lx: already hardware poisoned\n", pfn); + if (flags & MF_ACTION_REQUIRED) { + head = compound_head(p); res = kill_accessing_process(current, page_to_pfn(head), flags); + } + return res; + } else if (res == -EBUSY) { + if (retry) { + retry = false; + goto retry; + } + action_result(pfn, MF_MSG_UNKNOWN, MF_IGNORED); return res; } + head = compound_head(p); + lock_page(head); + + if (hwpoison_filter(p)) { + ClearPageHWPoison(head); + res = -EOPNOTSUPP; + goto out; + } + num_poisoned_pages_inc(); - if (!(flags & MF_COUNT_INCREASED)) { - res = get_hwpoison_page(p, flags); - if (!res) { - lock_page(head); - if (hwpoison_filter(p)) { - if (TestClearPageHWPoison(head)) - num_poisoned_pages_dec(); - unlock_page(head); - return -EOPNOTSUPP; - } - unlock_page(head); - res = MF_FAILED; - if (__page_handle_poison(p)) { - page_ref_inc(p); - res = MF_RECOVERED; - } - action_result(pfn, MF_MSG_FREE_HUGE, res); - return res == MF_RECOVERED ? 0 : -EBUSY; - } else if (res < 0) { - action_result(pfn, MF_MSG_UNKNOWN, MF_IGNORED); - return -EBUSY; + /* + * Handling free hugepage. The possible race with hugepage allocation + * or demotion can be prevented by PageHWPoison flag. + */ + if (res == 0) { + unlock_page(head); + res = MF_FAILED; + if (__page_handle_poison(p)) { + page_ref_inc(p); + res = MF_RECOVERED; } + action_result(pfn, MF_MSG_FREE_HUGE, res); + return res == MF_RECOVERED ? 0 : -EBUSY; } - lock_page(head); - /* * The page could have changed compound pages due to race window. * If this happens just bail out. @@ -1554,14 +1617,6 @@ static int memory_failure_hugetlb(unsigned long pfn, int flags) page_flags = head->flags; - if (hwpoison_filter(p)) { - if (TestClearPageHWPoison(head)) - num_poisoned_pages_dec(); - put_page(p); - res = -EOPNOTSUPP; - goto out; - } - /* * TODO: hwpoison for pud-sized hugetlb doesn't work right now, so * simply disable it. In order to make it work properly, we need @@ -1588,6 +1643,12 @@ out: unlock_page(head); return res; } +#else +static inline int try_memory_failure_hugetlb(unsigned long pfn, int flags, int *hugetlb) +{ + return 0; +} +#endif static int memory_failure_dev_pagemap(unsigned long pfn, int flags, struct dev_pagemap *pgmap) @@ -1712,6 +1773,7 @@ int memory_failure(unsigned long pfn, int flags) int res = 0; unsigned long page_flags; bool retry = true; + int hugetlb = 0; if (!sysctl_memory_failure_recovery) panic("Memory failure on page %lx", pfn); @@ -1739,10 +1801,9 @@ int memory_failure(unsigned long pfn, int flags) } try_again: - if (PageHuge(p)) { - res = memory_failure_hugetlb(pfn, flags); + res = try_memory_failure_hugetlb(pfn, flags, &hugetlb); + if (hugetlb) goto unlock_mutex; - } if (TestSetPageHWPoison(p)) { pr_err("Memory failure: %#lx: already hardware poisoned\n", @@ -1800,6 +1861,19 @@ try_again: if (PageTransHuge(hpage)) { /* + * Bail out before SetPageHasHWPoisoned() if hpage is + * huge_zero_page, although PG_has_hwpoisoned is not + * checked in set_huge_zero_page(). + * + * TODO: Handle memory failure of huge_zero_page thoroughly. + */ + if (is_huge_zero_page(hpage)) { + action_result(pfn, MF_MSG_UNSPLIT_THP, MF_IGNORED); + res = -EBUSY; + goto unlock_mutex; + } + + /* * The flag must be set after the refcount is bumped * otherwise it may race with THP split. * And the flag can't be set in get_hwpoison_page() since |