summaryrefslogtreecommitdiff
path: root/mm/vmalloc.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2022-08-05 16:32:45 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2022-08-05 16:32:45 -0700
commit6614a3c3164a5df2b54abb0b3559f51041cf705b (patch)
tree1c25c23d9efed988705287fc2ccb78e0e76e311d /mm/vmalloc.c
parent74cae210a335d159f2eb822e261adee905b6951a (diff)
parent360614c01f81f48a89d8b13f8fa69c3ae0a1f5c7 (diff)
Merge tag 'mm-stable-2022-08-03' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Pull MM updates from Andrew Morton: "Most of the MM queue. A few things are still pending. Liam's maple tree rework didn't make it. This has resulted in a few other minor patch series being held over for next time. Multi-gen LRU still isn't merged as we were waiting for mapletree to stabilize. The current plan is to merge MGLRU into -mm soon and to later reintroduce mapletree, with a view to hopefully getting both into 6.1-rc1. Summary: - The usual batches of cleanups from Baoquan He, Muchun Song, Miaohe Lin, Yang Shi, Anshuman Khandual and Mike Rapoport - Some kmemleak fixes from Patrick Wang and Waiman Long - DAMON updates from SeongJae Park - memcg debug/visibility work from Roman Gushchin - vmalloc speedup from Uladzislau Rezki - more folio conversion work from Matthew Wilcox - enhancements for coherent device memory mapping from Alex Sierra - addition of shared pages tracking and CoW support for fsdax, from Shiyang Ruan - hugetlb optimizations from Mike Kravetz - Mel Gorman has contributed some pagealloc changes to improve latency and realtime behaviour. - mprotect soft-dirty checking has been improved by Peter Xu - Many other singleton patches all over the place" [ XFS merge from hell as per Darrick Wong in https://lore.kernel.org/all/YshKnxb4VwXycPO8@magnolia/ ] * tag 'mm-stable-2022-08-03' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm: (282 commits) tools/testing/selftests/vm/hmm-tests.c: fix build mm: Kconfig: fix typo mm: memory-failure: convert to pr_fmt() mm: use is_zone_movable_page() helper hugetlbfs: fix inaccurate comment in hugetlbfs_statfs() hugetlbfs: cleanup some comments in inode.c hugetlbfs: remove unneeded header file hugetlbfs: remove unneeded hugetlbfs_ops forward declaration hugetlbfs: use helper macro SZ_1{K,M} mm: cleanup is_highmem() mm/hmm: add a test for cross device private faults selftests: add soft-dirty into run_vmtests.sh selftests: soft-dirty: add test for mprotect mm/mprotect: fix soft-dirty check in can_change_pte_writable() mm: memcontrol: fix potential oom_lock recursion deadlock mm/gup.c: fix formatting in check_and_migrate_movable_page() xfs: fail dax mount if reflink is enabled on a partition mm/memcontrol.c: remove the redundant updating of stats_flush_threshold userfaultfd: don't fail on unrecognized features hugetlb_cgroup: fix wrong hugetlb cgroup numa stat ...
Diffstat (limited to 'mm/vmalloc.c')
-rw-r--r--mm/vmalloc.c148
1 files changed, 88 insertions, 60 deletions
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index effd1ff6a4b4..dd6cdb201195 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -790,6 +790,7 @@ unsigned long vmalloc_nr_pages(void)
return atomic_long_read(&nr_vmalloc_pages);
}
+/* Look up the first VA which satisfies addr < va_end, NULL if none. */
static struct vmap_area *find_vmap_area_exceed_addr(unsigned long addr)
{
struct vmap_area *va = NULL;
@@ -814,9 +815,9 @@ static struct vmap_area *find_vmap_area_exceed_addr(unsigned long addr)
return va;
}
-static struct vmap_area *__find_vmap_area(unsigned long addr)
+static struct vmap_area *__find_vmap_area(unsigned long addr, struct rb_root *root)
{
- struct rb_node *n = vmap_area_root.rb_node;
+ struct rb_node *n = root->rb_node;
addr = (unsigned long)kasan_reset_tag((void *)addr);
@@ -874,11 +875,9 @@ find_va_links(struct vmap_area *va,
* Trigger the BUG() if there are sides(left/right)
* or full overlaps.
*/
- if (va->va_start < tmp_va->va_end &&
- va->va_end <= tmp_va->va_start)
+ if (va->va_end <= tmp_va->va_start)
link = &(*link)->rb_left;
- else if (va->va_end > tmp_va->va_start &&
- va->va_start >= tmp_va->va_end)
+ else if (va->va_start >= tmp_va->va_end)
link = &(*link)->rb_right;
else {
WARN(1, "vmalloc bug: 0x%lx-0x%lx overlaps with 0x%lx-0x%lx\n",
@@ -911,8 +910,9 @@ get_va_next_sibling(struct rb_node *parent, struct rb_node **link)
}
static __always_inline void
-link_va(struct vmap_area *va, struct rb_root *root,
- struct rb_node *parent, struct rb_node **link, struct list_head *head)
+__link_va(struct vmap_area *va, struct rb_root *root,
+ struct rb_node *parent, struct rb_node **link,
+ struct list_head *head, bool augment)
{
/*
* VA is still not in the list, but we can
@@ -926,12 +926,12 @@ link_va(struct vmap_area *va, struct rb_root *root,
/* Insert to the rb-tree */
rb_link_node(&va->rb_node, parent, link);
- if (root == &free_vmap_area_root) {
+ if (augment) {
/*
* Some explanation here. Just perform simple insertion
* to the tree. We do not set va->subtree_max_size to
* its current size before calling rb_insert_augmented().
- * It is because of we populate the tree from the bottom
+ * It is because we populate the tree from the bottom
* to parent levels when the node _is_ in the tree.
*
* Therefore we set subtree_max_size to zero after insertion,
@@ -950,21 +950,49 @@ link_va(struct vmap_area *va, struct rb_root *root,
}
static __always_inline void
-unlink_va(struct vmap_area *va, struct rb_root *root)
+link_va(struct vmap_area *va, struct rb_root *root,
+ struct rb_node *parent, struct rb_node **link,
+ struct list_head *head)
+{
+ __link_va(va, root, parent, link, head, false);
+}
+
+static __always_inline void
+link_va_augment(struct vmap_area *va, struct rb_root *root,
+ struct rb_node *parent, struct rb_node **link,
+ struct list_head *head)
+{
+ __link_va(va, root, parent, link, head, true);
+}
+
+static __always_inline void
+__unlink_va(struct vmap_area *va, struct rb_root *root, bool augment)
{
if (WARN_ON(RB_EMPTY_NODE(&va->rb_node)))
return;
- if (root == &free_vmap_area_root)
+ if (augment)
rb_erase_augmented(&va->rb_node,
root, &free_vmap_area_rb_augment_cb);
else
rb_erase(&va->rb_node, root);
- list_del(&va->list);
+ list_del_init(&va->list);
RB_CLEAR_NODE(&va->rb_node);
}
+static __always_inline void
+unlink_va(struct vmap_area *va, struct rb_root *root)
+{
+ __unlink_va(va, root, false);
+}
+
+static __always_inline void
+unlink_va_augment(struct vmap_area *va, struct rb_root *root)
+{
+ __unlink_va(va, root, true);
+}
+
#if DEBUG_AUGMENT_PROPAGATE_CHECK
/*
* Gets called when remove the node and rotate.
@@ -1060,7 +1088,7 @@ insert_vmap_area_augment(struct vmap_area *va,
link = find_va_links(va, root, NULL, &parent);
if (link) {
- link_va(va, root, parent, link, head);
+ link_va_augment(va, root, parent, link, head);
augment_tree_propagate_from(va);
}
}
@@ -1077,8 +1105,8 @@ insert_vmap_area_augment(struct vmap_area *va,
* ongoing.
*/
static __always_inline struct vmap_area *
-merge_or_add_vmap_area(struct vmap_area *va,
- struct rb_root *root, struct list_head *head)
+__merge_or_add_vmap_area(struct vmap_area *va,
+ struct rb_root *root, struct list_head *head, bool augment)
{
struct vmap_area *sibling;
struct list_head *next;
@@ -1140,7 +1168,7 @@ merge_or_add_vmap_area(struct vmap_area *va,
* "normalized" because of rotation operations.
*/
if (merged)
- unlink_va(va, root);
+ __unlink_va(va, root, augment);
sibling->va_end = va->va_end;
@@ -1155,16 +1183,23 @@ merge_or_add_vmap_area(struct vmap_area *va,
insert:
if (!merged)
- link_va(va, root, parent, link, head);
+ __link_va(va, root, parent, link, head, augment);
return va;
}
static __always_inline struct vmap_area *
+merge_or_add_vmap_area(struct vmap_area *va,
+ struct rb_root *root, struct list_head *head)
+{
+ return __merge_or_add_vmap_area(va, root, head, false);
+}
+
+static __always_inline struct vmap_area *
merge_or_add_vmap_area_augment(struct vmap_area *va,
struct rb_root *root, struct list_head *head)
{
- va = merge_or_add_vmap_area(va, root, head);
+ va = __merge_or_add_vmap_area(va, root, head, true);
if (va)
augment_tree_propagate_from(va);
@@ -1198,15 +1233,15 @@ is_within_this_va(struct vmap_area *va, unsigned long size,
* overhead.
*/
static __always_inline struct vmap_area *
-find_vmap_lowest_match(unsigned long size, unsigned long align,
- unsigned long vstart, bool adjust_search_size)
+find_vmap_lowest_match(struct rb_root *root, unsigned long size,
+ unsigned long align, unsigned long vstart, bool adjust_search_size)
{
struct vmap_area *va;
struct rb_node *node;
unsigned long length;
/* Start from the root. */
- node = free_vmap_area_root.rb_node;
+ node = root->rb_node;
/* Adjust the search size for alignment overhead. */
length = adjust_search_size ? size + align - 1 : size;
@@ -1334,11 +1369,12 @@ classify_va_fit_type(struct vmap_area *va,
}
static __always_inline int
-adjust_va_to_fit_type(struct vmap_area *va,
- unsigned long nva_start_addr, unsigned long size,
- enum fit_type type)
+adjust_va_to_fit_type(struct rb_root *root, struct list_head *head,
+ struct vmap_area *va, unsigned long nva_start_addr,
+ unsigned long size)
{
struct vmap_area *lva = NULL;
+ enum fit_type type = classify_va_fit_type(va, nva_start_addr, size);
if (type == FL_FIT_TYPE) {
/*
@@ -1348,7 +1384,7 @@ adjust_va_to_fit_type(struct vmap_area *va,
* V NVA V
* |---------------|
*/
- unlink_va(va, &free_vmap_area_root);
+ unlink_va_augment(va, root);
kmem_cache_free(vmap_area_cachep, va);
} else if (type == LE_FIT_TYPE) {
/*
@@ -1426,8 +1462,7 @@ adjust_va_to_fit_type(struct vmap_area *va,
augment_tree_propagate_from(va);
if (lva) /* type == NE_FIT_TYPE */
- insert_vmap_area_augment(lva, &va->rb_node,
- &free_vmap_area_root, &free_vmap_area_list);
+ insert_vmap_area_augment(lva, &va->rb_node, root, head);
}
return 0;
@@ -1438,13 +1473,13 @@ adjust_va_to_fit_type(struct vmap_area *va,
* Otherwise a vend is returned that indicates failure.
*/
static __always_inline unsigned long
-__alloc_vmap_area(unsigned long size, unsigned long align,
+__alloc_vmap_area(struct rb_root *root, struct list_head *head,
+ unsigned long size, unsigned long align,
unsigned long vstart, unsigned long vend)
{
bool adjust_search_size = true;
unsigned long nva_start_addr;
struct vmap_area *va;
- enum fit_type type;
int ret;
/*
@@ -1459,7 +1494,7 @@ __alloc_vmap_area(unsigned long size, unsigned long align,
if (align <= PAGE_SIZE || (align > PAGE_SIZE && (vend - vstart) == size))
adjust_search_size = false;
- va = find_vmap_lowest_match(size, align, vstart, adjust_search_size);
+ va = find_vmap_lowest_match(root, size, align, vstart, adjust_search_size);
if (unlikely(!va))
return vend;
@@ -1472,14 +1507,9 @@ __alloc_vmap_area(unsigned long size, unsigned long align,
if (nva_start_addr + size > vend)
return vend;
- /* Classify what we have found. */
- type = classify_va_fit_type(va, nva_start_addr, size);
- if (WARN_ON_ONCE(type == NOTHING_FIT))
- return vend;
-
/* Update the free vmap_area. */
- ret = adjust_va_to_fit_type(va, nva_start_addr, size, type);
- if (ret)
+ ret = adjust_va_to_fit_type(root, head, va, nva_start_addr, size);
+ if (WARN_ON_ONCE(ret))
return vend;
#if DEBUG_AUGMENT_LOWEST_MATCH_CHECK
@@ -1569,7 +1599,8 @@ static struct vmap_area *alloc_vmap_area(unsigned long size,
retry:
preload_this_cpu_lock(&free_vmap_area_lock, gfp_mask, node);
- addr = __alloc_vmap_area(size, align, vstart, vend);
+ addr = __alloc_vmap_area(&free_vmap_area_root, &free_vmap_area_list,
+ size, align, vstart, vend);
spin_unlock(&free_vmap_area_lock);
/*
@@ -1663,7 +1694,7 @@ static atomic_long_t vmap_lazy_nr = ATOMIC_LONG_INIT(0);
/*
* Serialize vmap purging. There is no actual critical section protected
- * by this look, but we want to avoid concurrent calls for performance
+ * by this lock, but we want to avoid concurrent calls for performance
* reasons and to make the pcpu_get_vm_areas more deterministic.
*/
static DEFINE_MUTEX(vmap_purge_lock);
@@ -1677,32 +1708,32 @@ static void purge_fragmented_blocks_allcpus(void);
static bool __purge_vmap_area_lazy(unsigned long start, unsigned long end)
{
unsigned long resched_threshold;
- struct list_head local_pure_list;
+ struct list_head local_purge_list;
struct vmap_area *va, *n_va;
lockdep_assert_held(&vmap_purge_lock);
spin_lock(&purge_vmap_area_lock);
purge_vmap_area_root = RB_ROOT;
- list_replace_init(&purge_vmap_area_list, &local_pure_list);
+ list_replace_init(&purge_vmap_area_list, &local_purge_list);
spin_unlock(&purge_vmap_area_lock);
- if (unlikely(list_empty(&local_pure_list)))
+ if (unlikely(list_empty(&local_purge_list)))
return false;
start = min(start,
- list_first_entry(&local_pure_list,
+ list_first_entry(&local_purge_list,
struct vmap_area, list)->va_start);
end = max(end,
- list_last_entry(&local_pure_list,
+ list_last_entry(&local_purge_list,
struct vmap_area, list)->va_end);
flush_tlb_kernel_range(start, end);
resched_threshold = lazy_max_pages() << 1;
spin_lock(&free_vmap_area_lock);
- list_for_each_entry_safe(va, n_va, &local_pure_list, list) {
+ list_for_each_entry_safe(va, n_va, &local_purge_list, list) {
unsigned long nr = (va->va_end - va->va_start) >> PAGE_SHIFT;
unsigned long orig_start = va->va_start;
unsigned long orig_end = va->va_end;
@@ -1803,7 +1834,7 @@ struct vmap_area *find_vmap_area(unsigned long addr)
struct vmap_area *va;
spin_lock(&vmap_area_lock);
- va = __find_vmap_area(addr);
+ va = __find_vmap_area(addr, &vmap_area_root);
spin_unlock(&vmap_area_lock);
return va;
@@ -2546,7 +2577,7 @@ struct vm_struct *remove_vm_area(const void *addr)
might_sleep();
spin_lock(&vmap_area_lock);
- va = __find_vmap_area((unsigned long)addr);
+ va = __find_vmap_area((unsigned long)addr, &vmap_area_root);
if (va && va->vm) {
struct vm_struct *vm = va->vm;
@@ -3168,15 +3199,15 @@ again:
/*
* Mark the pages as accessible, now that they are mapped.
- * The init condition should match the one in post_alloc_hook()
- * (except for the should_skip_init() check) to make sure that memory
- * is initialized under the same conditions regardless of the enabled
- * KASAN mode.
+ * The condition for setting KASAN_VMALLOC_INIT should complement the
+ * one in post_alloc_hook() with regards to the __GFP_SKIP_ZERO check
+ * to make sure that memory is initialized under the same conditions.
* Tag-based KASAN modes only assign tags to normal non-executable
* allocations, see __kasan_unpoison_vmalloc().
*/
kasan_flags |= KASAN_VMALLOC_VM_ALLOC;
- if (!want_init_on_free() && want_init_on_alloc(gfp_mask))
+ if (!want_init_on_free() && want_init_on_alloc(gfp_mask) &&
+ (gfp_mask & __GFP_SKIP_ZERO))
kasan_flags |= KASAN_VMALLOC_INIT;
/* KASAN_VMALLOC_PROT_NORMAL already set if required. */
area->addr = kasan_unpoison_vmalloc(area->addr, real_size, kasan_flags);
@@ -3735,7 +3766,6 @@ struct vm_struct **pcpu_get_vm_areas(const unsigned long *offsets,
int area, area2, last_area, term_area;
unsigned long base, start, size, end, last_end, orig_start, orig_end;
bool purged = false;
- enum fit_type type;
/* verify parameters and allocate data structures */
BUG_ON(offset_in_page(align) || !is_power_of_2(align));
@@ -3846,15 +3876,13 @@ retry:
/* It is a BUG(), but trigger recovery instead. */
goto recovery;
- type = classify_va_fit_type(va, start, size);
- if (WARN_ON_ONCE(type == NOTHING_FIT))
+ ret = adjust_va_to_fit_type(&free_vmap_area_root,
+ &free_vmap_area_list,
+ va, start, size);
+ if (WARN_ON_ONCE(unlikely(ret)))
/* It is a BUG(), but trigger recovery instead. */
goto recovery;
- ret = adjust_va_to_fit_type(va, start, size, type);
- if (unlikely(ret))
- goto recovery;
-
/* Allocated area. */
va = vas[area];
va->va_start = start;