summaryrefslogtreecommitdiff
path: root/mm/ksm.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/ksm.c')
-rw-r--r--mm/ksm.c261
1 files changed, 85 insertions, 176 deletions
diff --git a/mm/ksm.c b/mm/ksm.c
index 34c4820e0d3d..df6bae3a5a2c 100644
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -488,21 +488,17 @@ static DECLARE_WAIT_QUEUE_HEAD(ksm_iter_wait);
static DEFINE_MUTEX(ksm_thread_mutex);
static DEFINE_SPINLOCK(ksm_mmlist_lock);
-#define KSM_KMEM_CACHE(__struct, __flags) kmem_cache_create(#__struct,\
- sizeof(struct __struct), __alignof__(struct __struct),\
- (__flags), NULL)
-
static int __init ksm_slab_init(void)
{
- rmap_item_cache = KSM_KMEM_CACHE(ksm_rmap_item, 0);
+ rmap_item_cache = KMEM_CACHE(ksm_rmap_item, 0);
if (!rmap_item_cache)
goto out;
- stable_node_cache = KSM_KMEM_CACHE(ksm_stable_node, 0);
+ stable_node_cache = KMEM_CACHE(ksm_stable_node, 0);
if (!stable_node_cache)
goto out_free1;
- mm_slot_cache = KSM_KMEM_CACHE(ksm_mm_slot, 0);
+ mm_slot_cache = KMEM_CACHE(ksm_mm_slot, 0);
if (!mm_slot_cache)
goto out_free2;
@@ -1532,6 +1528,44 @@ out:
}
/*
+ * This function returns 0 if the pages were merged or if they are
+ * no longer merging candidates (e.g., VMA stale), -EFAULT otherwise.
+ */
+static int try_to_merge_with_zero_page(struct ksm_rmap_item *rmap_item,
+ struct page *page)
+{
+ struct mm_struct *mm = rmap_item->mm;
+ int err = -EFAULT;
+
+ /*
+ * Same checksum as an empty page. We attempt to merge it with the
+ * appropriate zero page if the user enabled this via sysfs.
+ */
+ if (ksm_use_zero_pages && (rmap_item->oldchecksum == zero_checksum)) {
+ struct vm_area_struct *vma;
+
+ mmap_read_lock(mm);
+ vma = find_mergeable_vma(mm, rmap_item->address);
+ if (vma) {
+ err = try_to_merge_one_page(vma, page,
+ ZERO_PAGE(rmap_item->address));
+ trace_ksm_merge_one_page(
+ page_to_pfn(ZERO_PAGE(rmap_item->address)),
+ rmap_item, mm, err);
+ } else {
+ /*
+ * If the vma is out of date, we do not need to
+ * continue.
+ */
+ err = 0;
+ }
+ mmap_read_unlock(mm);
+ }
+
+ return err;
+}
+
+/*
* try_to_merge_with_ksm_page - like try_to_merge_two_pages,
* but no new kernel page is allocated: kpage must already be a ksm page.
*
@@ -1625,7 +1659,6 @@ static struct folio *stable_node_dup(struct ksm_stable_node **_stable_node_dup,
struct ksm_stable_node *dup, *found = NULL, *stable_node = *_stable_node;
struct hlist_node *hlist_safe;
struct folio *folio, *tree_folio = NULL;
- int nr = 0;
int found_rmap_hlist_len;
if (!prune_stale_stable_nodes ||
@@ -1652,33 +1685,26 @@ static struct folio *stable_node_dup(struct ksm_stable_node **_stable_node_dup,
folio = ksm_get_folio(dup, KSM_GET_FOLIO_NOLOCK);
if (!folio)
continue;
- nr += 1;
- if (is_page_sharing_candidate(dup)) {
- if (!found ||
- dup->rmap_hlist_len > found_rmap_hlist_len) {
- if (found)
- folio_put(tree_folio);
- found = dup;
- found_rmap_hlist_len = found->rmap_hlist_len;
- tree_folio = folio;
-
- /* skip put_page for found dup */
- if (!prune_stale_stable_nodes)
- break;
- continue;
- }
+ /* Pick the best candidate if possible. */
+ if (!found || (is_page_sharing_candidate(dup) &&
+ (!is_page_sharing_candidate(found) ||
+ dup->rmap_hlist_len > found_rmap_hlist_len))) {
+ if (found)
+ folio_put(tree_folio);
+ found = dup;
+ found_rmap_hlist_len = found->rmap_hlist_len;
+ tree_folio = folio;
+ /* skip put_page for found candidate */
+ if (!prune_stale_stable_nodes &&
+ is_page_sharing_candidate(found))
+ break;
+ continue;
}
folio_put(folio);
}
if (found) {
- /*
- * nr is counting all dups in the chain only if
- * prune_stale_stable_nodes is true, otherwise we may
- * break the loop at nr == 1 even if there are
- * multiple entries.
- */
- if (prune_stale_stable_nodes && nr == 1) {
+ if (hlist_is_singular_node(&found->hlist_dup, &stable_node->hlist)) {
/*
* If there's not just one entry it would
* corrupt memory, better BUG_ON. In KSM
@@ -1730,25 +1756,15 @@ static struct folio *stable_node_dup(struct ksm_stable_node **_stable_node_dup,
hlist_add_head(&found->hlist_dup,
&stable_node->hlist);
}
+ } else {
+ /* Its hlist must be empty if no one found. */
+ free_stable_node_chain(stable_node, root);
}
*_stable_node_dup = found;
return tree_folio;
}
-static struct ksm_stable_node *stable_node_dup_any(struct ksm_stable_node *stable_node,
- struct rb_root *root)
-{
- if (!is_stable_node_chain(stable_node))
- return stable_node;
- if (hlist_empty(&stable_node->hlist)) {
- free_stable_node_chain(stable_node, root);
- return NULL;
- }
- return hlist_entry(stable_node->hlist.first,
- typeof(*stable_node), hlist_dup);
-}
-
/*
* Like for ksm_get_folio, this function can free the *_stable_node and
* *_stable_node_dup if the returned tree_page is NULL.
@@ -1769,17 +1785,10 @@ static struct folio *__stable_node_chain(struct ksm_stable_node **_stable_node_d
bool prune_stale_stable_nodes)
{
struct ksm_stable_node *stable_node = *_stable_node;
+
if (!is_stable_node_chain(stable_node)) {
- if (is_page_sharing_candidate(stable_node)) {
- *_stable_node_dup = stable_node;
- return ksm_get_folio(stable_node, KSM_GET_FOLIO_NOLOCK);
- }
- /*
- * _stable_node_dup set to NULL means the stable_node
- * reached the ksm_max_page_sharing limit.
- */
- *_stable_node_dup = NULL;
- return NULL;
+ *_stable_node_dup = stable_node;
+ return ksm_get_folio(stable_node, KSM_GET_FOLIO_NOLOCK);
}
return stable_node_dup(_stable_node_dup, _stable_node, root,
prune_stale_stable_nodes);
@@ -1793,16 +1802,10 @@ static __always_inline struct folio *chain_prune(struct ksm_stable_node **s_n_d,
}
static __always_inline struct folio *chain(struct ksm_stable_node **s_n_d,
- struct ksm_stable_node *s_n,
+ struct ksm_stable_node **s_n,
struct rb_root *root)
{
- struct ksm_stable_node *old_stable_node = s_n;
- struct folio *tree_folio;
-
- tree_folio = __stable_node_chain(s_n_d, &s_n, root, false);
- /* not pruning dups so s_n cannot have changed */
- VM_BUG_ON(s_n != old_stable_node);
- return tree_folio;
+ return __stable_node_chain(s_n_d, s_n, root, false);
}
/*
@@ -1820,7 +1823,7 @@ static struct page *stable_tree_search(struct page *page)
struct rb_root *root;
struct rb_node **new;
struct rb_node *parent;
- struct ksm_stable_node *stable_node, *stable_node_dup, *stable_node_any;
+ struct ksm_stable_node *stable_node, *stable_node_dup;
struct ksm_stable_node *page_node;
struct folio *folio;
@@ -1844,45 +1847,7 @@ again:
cond_resched();
stable_node = rb_entry(*new, struct ksm_stable_node, node);
- stable_node_any = NULL;
tree_folio = chain_prune(&stable_node_dup, &stable_node, root);
- /*
- * NOTE: stable_node may have been freed by
- * chain_prune() if the returned stable_node_dup is
- * not NULL. stable_node_dup may have been inserted in
- * the rbtree instead as a regular stable_node (in
- * order to collapse the stable_node chain if a single
- * stable_node dup was found in it). In such case the
- * stable_node is overwritten by the callee to point
- * to the stable_node_dup that was collapsed in the
- * stable rbtree and stable_node will be equal to
- * stable_node_dup like if the chain never existed.
- */
- if (!stable_node_dup) {
- /*
- * Either all stable_node dups were full in
- * this stable_node chain, or this chain was
- * empty and should be rb_erased.
- */
- stable_node_any = stable_node_dup_any(stable_node,
- root);
- if (!stable_node_any) {
- /* rb_erase just run */
- goto again;
- }
- /*
- * Take any of the stable_node dups page of
- * this stable_node chain to let the tree walk
- * continue. All KSM pages belonging to the
- * stable_node dups in a stable_node chain
- * have the same content and they're
- * write protected at all times. Any will work
- * fine to continue the walk.
- */
- tree_folio = ksm_get_folio(stable_node_any,
- KSM_GET_FOLIO_NOLOCK);
- }
- VM_BUG_ON(!stable_node_dup ^ !!stable_node_any);
if (!tree_folio) {
/*
* If we walked over a stale stable_node,
@@ -1920,7 +1885,7 @@ again:
goto chain_append;
}
- if (!stable_node_dup) {
+ if (!is_page_sharing_candidate(stable_node_dup)) {
/*
* If the stable_node is a chain and
* we got a payload match in memcmp
@@ -2029,9 +1994,6 @@ replace:
return &folio->page;
chain_append:
- /* stable_node_dup could be null if it reached the limit */
- if (!stable_node_dup)
- stable_node_dup = stable_node_any;
/*
* If stable_node was a chain and chain_prune collapsed it,
* stable_node has been updated to be the new regular
@@ -2076,7 +2038,7 @@ static struct ksm_stable_node *stable_tree_insert(struct folio *kfolio)
struct rb_root *root;
struct rb_node **new;
struct rb_node *parent;
- struct ksm_stable_node *stable_node, *stable_node_dup, *stable_node_any;
+ struct ksm_stable_node *stable_node, *stable_node_dup;
bool need_chain = false;
kpfn = folio_pfn(kfolio);
@@ -2092,33 +2054,7 @@ again:
cond_resched();
stable_node = rb_entry(*new, struct ksm_stable_node, node);
- stable_node_any = NULL;
- tree_folio = chain(&stable_node_dup, stable_node, root);
- if (!stable_node_dup) {
- /*
- * Either all stable_node dups were full in
- * this stable_node chain, or this chain was
- * empty and should be rb_erased.
- */
- stable_node_any = stable_node_dup_any(stable_node,
- root);
- if (!stable_node_any) {
- /* rb_erase just run */
- goto again;
- }
- /*
- * Take any of the stable_node dups page of
- * this stable_node chain to let the tree walk
- * continue. All KSM pages belonging to the
- * stable_node dups in a stable_node chain
- * have the same content and they're
- * write protected at all times. Any will work
- * fine to continue the walk.
- */
- tree_folio = ksm_get_folio(stable_node_any,
- KSM_GET_FOLIO_NOLOCK);
- }
- VM_BUG_ON(!stable_node_dup ^ !!stable_node_any);
+ tree_folio = chain(&stable_node_dup, &stable_node, root);
if (!tree_folio) {
/*
* If we walked over a stale stable_node,
@@ -2306,7 +2242,6 @@ static void stable_tree_append(struct ksm_rmap_item *rmap_item,
*/
static void cmp_and_merge_page(struct page *page, struct ksm_rmap_item *rmap_item)
{
- struct mm_struct *mm = rmap_item->mm;
struct ksm_rmap_item *tree_rmap_item;
struct page *tree_page = NULL;
struct ksm_stable_node *stable_node;
@@ -2333,6 +2268,23 @@ static void cmp_and_merge_page(struct page *page, struct ksm_rmap_item *rmap_ite
*/
if (!is_page_sharing_candidate(stable_node))
max_page_sharing_bypass = true;
+ } else {
+ remove_rmap_item_from_tree(rmap_item);
+
+ /*
+ * If the hash value of the page has changed from the last time
+ * we calculated it, this page is changing frequently: therefore we
+ * don't want to insert it in the unstable tree, and we don't want
+ * to waste our time searching for something identical to it there.
+ */
+ checksum = calc_checksum(page);
+ if (rmap_item->oldchecksum != checksum) {
+ rmap_item->oldchecksum = checksum;
+ return;
+ }
+
+ if (!try_to_merge_with_zero_page(rmap_item, page))
+ return;
}
/* We first start with searching the page inside the stable tree */
@@ -2363,48 +2315,6 @@ static void cmp_and_merge_page(struct page *page, struct ksm_rmap_item *rmap_ite
return;
}
- /*
- * If the hash value of the page has changed from the last time
- * we calculated it, this page is changing frequently: therefore we
- * don't want to insert it in the unstable tree, and we don't want
- * to waste our time searching for something identical to it there.
- */
- checksum = calc_checksum(page);
- if (rmap_item->oldchecksum != checksum) {
- rmap_item->oldchecksum = checksum;
- return;
- }
-
- /*
- * Same checksum as an empty page. We attempt to merge it with the
- * appropriate zero page if the user enabled this via sysfs.
- */
- if (ksm_use_zero_pages && (checksum == zero_checksum)) {
- struct vm_area_struct *vma;
-
- mmap_read_lock(mm);
- vma = find_mergeable_vma(mm, rmap_item->address);
- if (vma) {
- err = try_to_merge_one_page(vma, page,
- ZERO_PAGE(rmap_item->address));
- trace_ksm_merge_one_page(
- page_to_pfn(ZERO_PAGE(rmap_item->address)),
- rmap_item, mm, err);
- } else {
- /*
- * If the vma is out of date, we do not need to
- * continue.
- */
- err = 0;
- }
- mmap_read_unlock(mm);
- /*
- * In case of failure, the page was not really empty, so we
- * need to continue. Otherwise we're done.
- */
- if (!err)
- return;
- }
tree_rmap_item =
unstable_tree_search_insert(rmap_item, page, &tree_page);
if (tree_rmap_item) {
@@ -3088,7 +2998,6 @@ struct folio *ksm_might_need_to_copy(struct folio *folio,
if (copy_mc_user_highpage(folio_page(new_folio, 0), page,
addr, vma)) {
folio_put(new_folio);
- memory_failure_queue(folio_pfn(folio), 0);
return ERR_PTR(-EHWPOISON);
}
folio_set_dirty(new_folio);