summaryrefslogtreecommitdiff
path: root/mm/memory-failure.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2022-04-22 10:10:43 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2022-04-22 10:10:43 -0700
commit281b9d9a4b02229b602a14f7540206b0fbe4134f (patch)
treedd5c1aee3a7effe79c1bcc2fd035500d38b6d373 /mm/memory-failure.c
parent3b8000ae185cb068adbda5f966a3835053c85fd4 (diff)
parent319561669a59d8e9206ab311ae5433ef92fd79d1 (diff)
Merge branch 'akpm' (patches from Andrew)
Merge misc fixes from Andrew Morton: "13 patches. Subsystems affected by this patch series: mm (memory-failure, memcg, userfaultfd, hugetlbfs, mremap, oom-kill, kasan, hmm), and kcov" * emailed patches from Andrew Morton <akpm@linux-foundation.org>: mm/mmu_notifier.c: fix race in mmu_interval_notifier_remove() kcov: don't generate a warning on vm_insert_page()'s failure MAINTAINERS: add Vincenzo Frascino to KASAN reviewers oom_kill.c: futex: delay the OOM reaper to allow time for proper futex cleanup selftest/vm: add skip support to mremap_test selftest/vm: support xfail in mremap_test selftest/vm: verify remap destination address in mremap_test selftest/vm: verify mmap addr in mremap_test mm, hugetlb: allow for "high" userspace addresses userfaultfd: mark uffd_wp regardless of VM_WRITE flag memcg: sync flush only if periodic flush is delayed mm/memory-failure.c: skip huge_zero_page in memory_failure() mm/hwpoison: fix race between hugetlb free/demotion and memory_failure_hugetlb()
Diffstat (limited to 'mm/memory-failure.c')
-rw-r--r--mm/memory-failure.c158
1 files changed, 116 insertions, 42 deletions
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index dcb6bb9cf731..27760c19bad7 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -1498,50 +1498,113 @@ static int try_to_split_thp_page(struct page *page, const char *msg)
return 0;
}
-static int memory_failure_hugetlb(unsigned long pfn, int flags)
+/*
+ * Called from hugetlb code with hugetlb_lock held.
+ *
+ * Return values:
+ * 0 - free hugepage
+ * 1 - in-use hugepage
+ * 2 - not a hugepage
+ * -EBUSY - the hugepage is busy (try to retry)
+ * -EHWPOISON - the hugepage is already hwpoisoned
+ */
+int __get_huge_page_for_hwpoison(unsigned long pfn, int flags)
+{
+ struct page *page = pfn_to_page(pfn);
+ struct page *head = compound_head(page);
+ int ret = 2; /* fallback to normal page handling */
+ bool count_increased = false;
+
+ if (!PageHeadHuge(head))
+ goto out;
+
+ if (flags & MF_COUNT_INCREASED) {
+ ret = 1;
+ count_increased = true;
+ } else if (HPageFreed(head) || HPageMigratable(head)) {
+ ret = get_page_unless_zero(head);
+ if (ret)
+ count_increased = true;
+ } else {
+ ret = -EBUSY;
+ goto out;
+ }
+
+ if (TestSetPageHWPoison(head)) {
+ ret = -EHWPOISON;
+ goto out;
+ }
+
+ return ret;
+out:
+ if (count_increased)
+ put_page(head);
+ return ret;
+}
+
+#ifdef CONFIG_HUGETLB_PAGE
+/*
+ * Taking refcount of hugetlb pages needs extra care about race conditions
+ * with basic operations like hugepage allocation/free/demotion.
+ * So some of prechecks for hwpoison (pinning, and testing/setting
+ * PageHWPoison) should be done in single hugetlb_lock range.
+ */
+static int try_memory_failure_hugetlb(unsigned long pfn, int flags, int *hugetlb)
{
- struct page *p = pfn_to_page(pfn);
- struct page *head = compound_head(p);
int res;
+ struct page *p = pfn_to_page(pfn);
+ struct page *head;
unsigned long page_flags;
+ bool retry = true;
- if (TestSetPageHWPoison(head)) {
- pr_err("Memory failure: %#lx: already hardware poisoned\n",
- pfn);
- res = -EHWPOISON;
- if (flags & MF_ACTION_REQUIRED)
+ *hugetlb = 1;
+retry:
+ res = get_huge_page_for_hwpoison(pfn, flags);
+ if (res == 2) { /* fallback to normal page handling */
+ *hugetlb = 0;
+ return 0;
+ } else if (res == -EHWPOISON) {
+ pr_err("Memory failure: %#lx: already hardware poisoned\n", pfn);
+ if (flags & MF_ACTION_REQUIRED) {
+ head = compound_head(p);
res = kill_accessing_process(current, page_to_pfn(head), flags);
+ }
+ return res;
+ } else if (res == -EBUSY) {
+ if (retry) {
+ retry = false;
+ goto retry;
+ }
+ action_result(pfn, MF_MSG_UNKNOWN, MF_IGNORED);
return res;
}
+ head = compound_head(p);
+ lock_page(head);
+
+ if (hwpoison_filter(p)) {
+ ClearPageHWPoison(head);
+ res = -EOPNOTSUPP;
+ goto out;
+ }
+
num_poisoned_pages_inc();
- if (!(flags & MF_COUNT_INCREASED)) {
- res = get_hwpoison_page(p, flags);
- if (!res) {
- lock_page(head);
- if (hwpoison_filter(p)) {
- if (TestClearPageHWPoison(head))
- num_poisoned_pages_dec();
- unlock_page(head);
- return -EOPNOTSUPP;
- }
- unlock_page(head);
- res = MF_FAILED;
- if (__page_handle_poison(p)) {
- page_ref_inc(p);
- res = MF_RECOVERED;
- }
- action_result(pfn, MF_MSG_FREE_HUGE, res);
- return res == MF_RECOVERED ? 0 : -EBUSY;
- } else if (res < 0) {
- action_result(pfn, MF_MSG_UNKNOWN, MF_IGNORED);
- return -EBUSY;
+ /*
+ * Handling free hugepage. The possible race with hugepage allocation
+ * or demotion can be prevented by PageHWPoison flag.
+ */
+ if (res == 0) {
+ unlock_page(head);
+ res = MF_FAILED;
+ if (__page_handle_poison(p)) {
+ page_ref_inc(p);
+ res = MF_RECOVERED;
}
+ action_result(pfn, MF_MSG_FREE_HUGE, res);
+ return res == MF_RECOVERED ? 0 : -EBUSY;
}
- lock_page(head);
-
/*
* The page could have changed compound pages due to race window.
* If this happens just bail out.
@@ -1554,14 +1617,6 @@ static int memory_failure_hugetlb(unsigned long pfn, int flags)
page_flags = head->flags;
- if (hwpoison_filter(p)) {
- if (TestClearPageHWPoison(head))
- num_poisoned_pages_dec();
- put_page(p);
- res = -EOPNOTSUPP;
- goto out;
- }
-
/*
* TODO: hwpoison for pud-sized hugetlb doesn't work right now, so
* simply disable it. In order to make it work properly, we need
@@ -1588,6 +1643,12 @@ out:
unlock_page(head);
return res;
}
+#else
+static inline int try_memory_failure_hugetlb(unsigned long pfn, int flags, int *hugetlb)
+{
+ return 0;
+}
+#endif
static int memory_failure_dev_pagemap(unsigned long pfn, int flags,
struct dev_pagemap *pgmap)
@@ -1712,6 +1773,7 @@ int memory_failure(unsigned long pfn, int flags)
int res = 0;
unsigned long page_flags;
bool retry = true;
+ int hugetlb = 0;
if (!sysctl_memory_failure_recovery)
panic("Memory failure on page %lx", pfn);
@@ -1739,10 +1801,9 @@ int memory_failure(unsigned long pfn, int flags)
}
try_again:
- if (PageHuge(p)) {
- res = memory_failure_hugetlb(pfn, flags);
+ res = try_memory_failure_hugetlb(pfn, flags, &hugetlb);
+ if (hugetlb)
goto unlock_mutex;
- }
if (TestSetPageHWPoison(p)) {
pr_err("Memory failure: %#lx: already hardware poisoned\n",
@@ -1800,6 +1861,19 @@ try_again:
if (PageTransHuge(hpage)) {
/*
+ * Bail out before SetPageHasHWPoisoned() if hpage is
+ * huge_zero_page, although PG_has_hwpoisoned is not
+ * checked in set_huge_zero_page().
+ *
+ * TODO: Handle memory failure of huge_zero_page thoroughly.
+ */
+ if (is_huge_zero_page(hpage)) {
+ action_result(pfn, MF_MSG_UNSPLIT_THP, MF_IGNORED);
+ res = -EBUSY;
+ goto unlock_mutex;
+ }
+
+ /*
* The flag must be set after the refcount is bumped
* otherwise it may race with THP split.
* And the flag can't be set in get_hwpoison_page() since