summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNaoya Horiguchi <naoya.horiguchi@nec.com>2021-06-15 18:23:13 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2021-06-16 09:24:42 -0700
commit25182f05ffed0b45602438693e4eed5d7f3ebadd (patch)
tree6ed39c894b33b6e84222639c60e018d4cc46b6c9
parent94f0b2d4a1d0c52035aef425da5e022bd2cb1c71 (diff)
mm,hwpoison: fix race with hugetlb page allocation
When hugetlb page fault (under overcommitting situation) and memory_failure() race, VM_BUG_ON_PAGE() is triggered by the following race: CPU0: CPU1: gather_surplus_pages() page = alloc_surplus_huge_page() memory_failure_hugetlb() get_hwpoison_page(page) __get_hwpoison_page(page) get_page_unless_zero(page) zero = put_page_testzero(page) VM_BUG_ON_PAGE(!zero, page) enqueue_huge_page(h, page) put_page(page) __get_hwpoison_page() only checks the page refcount before taking an additional one for memory error handling, which is not enough because there's a time window where compound pages have non-zero refcount during hugetlb page initialization. So make __get_hwpoison_page() check page status a bit more for hugetlb pages with get_hwpoison_huge_page(). Checking hugetlb-specific flags under hugetlb_lock makes sure that the hugetlb page is not transitive. It's notable that another new function, HWPoisonHandlable(), is helpful to prevent a race against other transitive page states (like a generic compound page just before PageHuge becomes true). Link: https://lkml.kernel.org/r/20210603233632.2964832-2-nao.horiguchi@gmail.com Fixes: ead07f6a867b ("mm/memory-failure: introduce get_hwpoison_page() for consistent refcount handling") Signed-off-by: Naoya Horiguchi <naoya.horiguchi@nec.com> Reported-by: Muchun Song <songmuchun@bytedance.com> Acked-by: Mike Kravetz <mike.kravetz@oracle.com> Cc: Oscar Salvador <osalvador@suse.de> Cc: Michal Hocko <mhocko@suse.com> Cc: Tony Luck <tony.luck@intel.com> Cc: <stable@vger.kernel.org> [5.12+] Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--include/linux/hugetlb.h6
-rw-r--r--mm/hugetlb.c15
-rw-r--r--mm/memory-failure.c29
3 files changed, 48 insertions, 2 deletions
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index b92f25ccef58..790ae618548d 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -149,6 +149,7 @@ bool hugetlb_reserve_pages(struct inode *inode, long from, long to,
long hugetlb_unreserve_pages(struct inode *inode, long start, long end,
long freed);
bool isolate_huge_page(struct page *page, struct list_head *list);
+int get_hwpoison_huge_page(struct page *page, bool *hugetlb);
void putback_active_hugepage(struct page *page);
void move_hugetlb_state(struct page *oldpage, struct page *newpage, int reason);
void free_huge_page(struct page *page);
@@ -339,6 +340,11 @@ static inline bool isolate_huge_page(struct page *page, struct list_head *list)
return false;
}
+static inline int get_hwpoison_huge_page(struct page *page, bool *hugetlb)
+{
+ return 0;
+}
+
static inline void putback_active_hugepage(struct page *page)
{
}
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 5560b50876fb..85f42ec1a927 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -5857,6 +5857,21 @@ unlock:
return ret;
}
+int get_hwpoison_huge_page(struct page *page, bool *hugetlb)
+{
+ int ret = 0;
+
+ *hugetlb = false;
+ spin_lock_irq(&hugetlb_lock);
+ if (PageHeadHuge(page)) {
+ *hugetlb = true;
+ if (HPageFreed(page) || HPageMigratable(page))
+ ret = get_page_unless_zero(page);
+ }
+ spin_unlock_irq(&hugetlb_lock);
+ return ret;
+}
+
void putback_active_hugepage(struct page *page)
{
spin_lock_irq(&hugetlb_lock);
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index 85ad98c00fd9..29ab7b70d326 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -949,6 +949,17 @@ static int page_action(struct page_state *ps, struct page *p,
return (result == MF_RECOVERED || result == MF_DELAYED) ? 0 : -EBUSY;
}
+/*
+ * Return true if a page type of a given page is supported by hwpoison
+ * mechanism (while handling could fail), otherwise false. This function
+ * does not return true for hugetlb or device memory pages, so it's assumed
+ * to be called only in the context where we never have such pages.
+ */
+static inline bool HWPoisonHandlable(struct page *page)
+{
+ return PageLRU(page) || __PageMovable(page);
+}
+
/**
* __get_hwpoison_page() - Get refcount for memory error handling:
* @page: raw error page (hit by memory error)
@@ -959,8 +970,22 @@ static int page_action(struct page_state *ps, struct page *p,
static int __get_hwpoison_page(struct page *page)
{
struct page *head = compound_head(page);
+ int ret = 0;
+ bool hugetlb = false;
+
+ ret = get_hwpoison_huge_page(head, &hugetlb);
+ if (hugetlb)
+ return ret;
+
+ /*
+ * This check prevents from calling get_hwpoison_unless_zero()
+ * for any unsupported type of page in order to reduce the risk of
+ * unexpected races caused by taking a page refcount.
+ */
+ if (!HWPoisonHandlable(head))
+ return 0;
- if (!PageHuge(head) && PageTransHuge(head)) {
+ if (PageTransHuge(head)) {
/*
* Non anonymous thp exists only in allocation/free time. We
* can't handle such a case correctly, so let's give it up.
@@ -1017,7 +1042,7 @@ try_again:
ret = -EIO;
}
} else {
- if (PageHuge(p) || PageLRU(p) || __PageMovable(p)) {
+ if (PageHuge(p) || HWPoisonHandlable(p)) {
ret = 1;
} else {
/*