diff options
Diffstat (limited to 'mm/ksm.c')
-rw-r--r-- | mm/ksm.c | 261 |
1 files changed, 85 insertions, 176 deletions
@@ -488,21 +488,17 @@ static DECLARE_WAIT_QUEUE_HEAD(ksm_iter_wait); static DEFINE_MUTEX(ksm_thread_mutex); static DEFINE_SPINLOCK(ksm_mmlist_lock); -#define KSM_KMEM_CACHE(__struct, __flags) kmem_cache_create(#__struct,\ - sizeof(struct __struct), __alignof__(struct __struct),\ - (__flags), NULL) - static int __init ksm_slab_init(void) { - rmap_item_cache = KSM_KMEM_CACHE(ksm_rmap_item, 0); + rmap_item_cache = KMEM_CACHE(ksm_rmap_item, 0); if (!rmap_item_cache) goto out; - stable_node_cache = KSM_KMEM_CACHE(ksm_stable_node, 0); + stable_node_cache = KMEM_CACHE(ksm_stable_node, 0); if (!stable_node_cache) goto out_free1; - mm_slot_cache = KSM_KMEM_CACHE(ksm_mm_slot, 0); + mm_slot_cache = KMEM_CACHE(ksm_mm_slot, 0); if (!mm_slot_cache) goto out_free2; @@ -1532,6 +1528,44 @@ out: } /* + * This function returns 0 if the pages were merged or if they are + * no longer merging candidates (e.g., VMA stale), -EFAULT otherwise. + */ +static int try_to_merge_with_zero_page(struct ksm_rmap_item *rmap_item, + struct page *page) +{ + struct mm_struct *mm = rmap_item->mm; + int err = -EFAULT; + + /* + * Same checksum as an empty page. We attempt to merge it with the + * appropriate zero page if the user enabled this via sysfs. + */ + if (ksm_use_zero_pages && (rmap_item->oldchecksum == zero_checksum)) { + struct vm_area_struct *vma; + + mmap_read_lock(mm); + vma = find_mergeable_vma(mm, rmap_item->address); + if (vma) { + err = try_to_merge_one_page(vma, page, + ZERO_PAGE(rmap_item->address)); + trace_ksm_merge_one_page( + page_to_pfn(ZERO_PAGE(rmap_item->address)), + rmap_item, mm, err); + } else { + /* + * If the vma is out of date, we do not need to + * continue. + */ + err = 0; + } + mmap_read_unlock(mm); + } + + return err; +} + +/* * try_to_merge_with_ksm_page - like try_to_merge_two_pages, * but no new kernel page is allocated: kpage must already be a ksm page. * @@ -1625,7 +1659,6 @@ static struct folio *stable_node_dup(struct ksm_stable_node **_stable_node_dup, struct ksm_stable_node *dup, *found = NULL, *stable_node = *_stable_node; struct hlist_node *hlist_safe; struct folio *folio, *tree_folio = NULL; - int nr = 0; int found_rmap_hlist_len; if (!prune_stale_stable_nodes || @@ -1652,33 +1685,26 @@ static struct folio *stable_node_dup(struct ksm_stable_node **_stable_node_dup, folio = ksm_get_folio(dup, KSM_GET_FOLIO_NOLOCK); if (!folio) continue; - nr += 1; - if (is_page_sharing_candidate(dup)) { - if (!found || - dup->rmap_hlist_len > found_rmap_hlist_len) { - if (found) - folio_put(tree_folio); - found = dup; - found_rmap_hlist_len = found->rmap_hlist_len; - tree_folio = folio; - - /* skip put_page for found dup */ - if (!prune_stale_stable_nodes) - break; - continue; - } + /* Pick the best candidate if possible. */ + if (!found || (is_page_sharing_candidate(dup) && + (!is_page_sharing_candidate(found) || + dup->rmap_hlist_len > found_rmap_hlist_len))) { + if (found) + folio_put(tree_folio); + found = dup; + found_rmap_hlist_len = found->rmap_hlist_len; + tree_folio = folio; + /* skip put_page for found candidate */ + if (!prune_stale_stable_nodes && + is_page_sharing_candidate(found)) + break; + continue; } folio_put(folio); } if (found) { - /* - * nr is counting all dups in the chain only if - * prune_stale_stable_nodes is true, otherwise we may - * break the loop at nr == 1 even if there are - * multiple entries. - */ - if (prune_stale_stable_nodes && nr == 1) { + if (hlist_is_singular_node(&found->hlist_dup, &stable_node->hlist)) { /* * If there's not just one entry it would * corrupt memory, better BUG_ON. In KSM @@ -1730,25 +1756,15 @@ static struct folio *stable_node_dup(struct ksm_stable_node **_stable_node_dup, hlist_add_head(&found->hlist_dup, &stable_node->hlist); } + } else { + /* Its hlist must be empty if no one found. */ + free_stable_node_chain(stable_node, root); } *_stable_node_dup = found; return tree_folio; } -static struct ksm_stable_node *stable_node_dup_any(struct ksm_stable_node *stable_node, - struct rb_root *root) -{ - if (!is_stable_node_chain(stable_node)) - return stable_node; - if (hlist_empty(&stable_node->hlist)) { - free_stable_node_chain(stable_node, root); - return NULL; - } - return hlist_entry(stable_node->hlist.first, - typeof(*stable_node), hlist_dup); -} - /* * Like for ksm_get_folio, this function can free the *_stable_node and * *_stable_node_dup if the returned tree_page is NULL. @@ -1769,17 +1785,10 @@ static struct folio *__stable_node_chain(struct ksm_stable_node **_stable_node_d bool prune_stale_stable_nodes) { struct ksm_stable_node *stable_node = *_stable_node; + if (!is_stable_node_chain(stable_node)) { - if (is_page_sharing_candidate(stable_node)) { - *_stable_node_dup = stable_node; - return ksm_get_folio(stable_node, KSM_GET_FOLIO_NOLOCK); - } - /* - * _stable_node_dup set to NULL means the stable_node - * reached the ksm_max_page_sharing limit. - */ - *_stable_node_dup = NULL; - return NULL; + *_stable_node_dup = stable_node; + return ksm_get_folio(stable_node, KSM_GET_FOLIO_NOLOCK); } return stable_node_dup(_stable_node_dup, _stable_node, root, prune_stale_stable_nodes); @@ -1793,16 +1802,10 @@ static __always_inline struct folio *chain_prune(struct ksm_stable_node **s_n_d, } static __always_inline struct folio *chain(struct ksm_stable_node **s_n_d, - struct ksm_stable_node *s_n, + struct ksm_stable_node **s_n, struct rb_root *root) { - struct ksm_stable_node *old_stable_node = s_n; - struct folio *tree_folio; - - tree_folio = __stable_node_chain(s_n_d, &s_n, root, false); - /* not pruning dups so s_n cannot have changed */ - VM_BUG_ON(s_n != old_stable_node); - return tree_folio; + return __stable_node_chain(s_n_d, s_n, root, false); } /* @@ -1820,7 +1823,7 @@ static struct page *stable_tree_search(struct page *page) struct rb_root *root; struct rb_node **new; struct rb_node *parent; - struct ksm_stable_node *stable_node, *stable_node_dup, *stable_node_any; + struct ksm_stable_node *stable_node, *stable_node_dup; struct ksm_stable_node *page_node; struct folio *folio; @@ -1844,45 +1847,7 @@ again: cond_resched(); stable_node = rb_entry(*new, struct ksm_stable_node, node); - stable_node_any = NULL; tree_folio = chain_prune(&stable_node_dup, &stable_node, root); - /* - * NOTE: stable_node may have been freed by - * chain_prune() if the returned stable_node_dup is - * not NULL. stable_node_dup may have been inserted in - * the rbtree instead as a regular stable_node (in - * order to collapse the stable_node chain if a single - * stable_node dup was found in it). In such case the - * stable_node is overwritten by the callee to point - * to the stable_node_dup that was collapsed in the - * stable rbtree and stable_node will be equal to - * stable_node_dup like if the chain never existed. - */ - if (!stable_node_dup) { - /* - * Either all stable_node dups were full in - * this stable_node chain, or this chain was - * empty and should be rb_erased. - */ - stable_node_any = stable_node_dup_any(stable_node, - root); - if (!stable_node_any) { - /* rb_erase just run */ - goto again; - } - /* - * Take any of the stable_node dups page of - * this stable_node chain to let the tree walk - * continue. All KSM pages belonging to the - * stable_node dups in a stable_node chain - * have the same content and they're - * write protected at all times. Any will work - * fine to continue the walk. - */ - tree_folio = ksm_get_folio(stable_node_any, - KSM_GET_FOLIO_NOLOCK); - } - VM_BUG_ON(!stable_node_dup ^ !!stable_node_any); if (!tree_folio) { /* * If we walked over a stale stable_node, @@ -1920,7 +1885,7 @@ again: goto chain_append; } - if (!stable_node_dup) { + if (!is_page_sharing_candidate(stable_node_dup)) { /* * If the stable_node is a chain and * we got a payload match in memcmp @@ -2029,9 +1994,6 @@ replace: return &folio->page; chain_append: - /* stable_node_dup could be null if it reached the limit */ - if (!stable_node_dup) - stable_node_dup = stable_node_any; /* * If stable_node was a chain and chain_prune collapsed it, * stable_node has been updated to be the new regular @@ -2076,7 +2038,7 @@ static struct ksm_stable_node *stable_tree_insert(struct folio *kfolio) struct rb_root *root; struct rb_node **new; struct rb_node *parent; - struct ksm_stable_node *stable_node, *stable_node_dup, *stable_node_any; + struct ksm_stable_node *stable_node, *stable_node_dup; bool need_chain = false; kpfn = folio_pfn(kfolio); @@ -2092,33 +2054,7 @@ again: cond_resched(); stable_node = rb_entry(*new, struct ksm_stable_node, node); - stable_node_any = NULL; - tree_folio = chain(&stable_node_dup, stable_node, root); - if (!stable_node_dup) { - /* - * Either all stable_node dups were full in - * this stable_node chain, or this chain was - * empty and should be rb_erased. - */ - stable_node_any = stable_node_dup_any(stable_node, - root); - if (!stable_node_any) { - /* rb_erase just run */ - goto again; - } - /* - * Take any of the stable_node dups page of - * this stable_node chain to let the tree walk - * continue. All KSM pages belonging to the - * stable_node dups in a stable_node chain - * have the same content and they're - * write protected at all times. Any will work - * fine to continue the walk. - */ - tree_folio = ksm_get_folio(stable_node_any, - KSM_GET_FOLIO_NOLOCK); - } - VM_BUG_ON(!stable_node_dup ^ !!stable_node_any); + tree_folio = chain(&stable_node_dup, &stable_node, root); if (!tree_folio) { /* * If we walked over a stale stable_node, @@ -2306,7 +2242,6 @@ static void stable_tree_append(struct ksm_rmap_item *rmap_item, */ static void cmp_and_merge_page(struct page *page, struct ksm_rmap_item *rmap_item) { - struct mm_struct *mm = rmap_item->mm; struct ksm_rmap_item *tree_rmap_item; struct page *tree_page = NULL; struct ksm_stable_node *stable_node; @@ -2333,6 +2268,23 @@ static void cmp_and_merge_page(struct page *page, struct ksm_rmap_item *rmap_ite */ if (!is_page_sharing_candidate(stable_node)) max_page_sharing_bypass = true; + } else { + remove_rmap_item_from_tree(rmap_item); + + /* + * If the hash value of the page has changed from the last time + * we calculated it, this page is changing frequently: therefore we + * don't want to insert it in the unstable tree, and we don't want + * to waste our time searching for something identical to it there. + */ + checksum = calc_checksum(page); + if (rmap_item->oldchecksum != checksum) { + rmap_item->oldchecksum = checksum; + return; + } + + if (!try_to_merge_with_zero_page(rmap_item, page)) + return; } /* We first start with searching the page inside the stable tree */ @@ -2363,48 +2315,6 @@ static void cmp_and_merge_page(struct page *page, struct ksm_rmap_item *rmap_ite return; } - /* - * If the hash value of the page has changed from the last time - * we calculated it, this page is changing frequently: therefore we - * don't want to insert it in the unstable tree, and we don't want - * to waste our time searching for something identical to it there. - */ - checksum = calc_checksum(page); - if (rmap_item->oldchecksum != checksum) { - rmap_item->oldchecksum = checksum; - return; - } - - /* - * Same checksum as an empty page. We attempt to merge it with the - * appropriate zero page if the user enabled this via sysfs. - */ - if (ksm_use_zero_pages && (checksum == zero_checksum)) { - struct vm_area_struct *vma; - - mmap_read_lock(mm); - vma = find_mergeable_vma(mm, rmap_item->address); - if (vma) { - err = try_to_merge_one_page(vma, page, - ZERO_PAGE(rmap_item->address)); - trace_ksm_merge_one_page( - page_to_pfn(ZERO_PAGE(rmap_item->address)), - rmap_item, mm, err); - } else { - /* - * If the vma is out of date, we do not need to - * continue. - */ - err = 0; - } - mmap_read_unlock(mm); - /* - * In case of failure, the page was not really empty, so we - * need to continue. Otherwise we're done. - */ - if (!err) - return; - } tree_rmap_item = unstable_tree_search_insert(rmap_item, page, &tree_page); if (tree_rmap_item) { @@ -3088,7 +2998,6 @@ struct folio *ksm_might_need_to_copy(struct folio *folio, if (copy_mc_user_highpage(folio_page(new_folio, 0), page, addr, vma)) { folio_put(new_folio); - memory_failure_queue(folio_pfn(folio), 0); return ERR_PTR(-EHWPOISON); } folio_set_dirty(new_folio); |