diff options
Diffstat (limited to 'mm')
-rw-r--r-- | mm/gup.c | 4 | ||||
-rw-r--r-- | mm/highmem.c | 4 | ||||
-rw-r--r-- | mm/hugetlb.c | 41 | ||||
-rw-r--r-- | mm/hugetlb_cgroup.c | 10 | ||||
-rw-r--r-- | mm/internal.h | 20 | ||||
-rw-r--r-- | mm/kfence/core.c | 9 | ||||
-rw-r--r-- | mm/kmemleak.c | 3 | ||||
-rw-r--r-- | mm/memory.c | 2 | ||||
-rw-r--r-- | mm/mmu_notifier.c | 23 | ||||
-rw-r--r-- | mm/page-writeback.c | 16 | ||||
-rw-r--r-- | mm/page_poison.c | 4 | ||||
-rw-r--r-- | mm/percpu-internal.h | 2 | ||||
-rw-r--r-- | mm/percpu-stats.c | 9 | ||||
-rw-r--r-- | mm/percpu.c | 14 | ||||
-rw-r--r-- | mm/z3fold.c | 16 |
15 files changed, 155 insertions, 22 deletions
@@ -1535,6 +1535,10 @@ struct page *get_dump_page(unsigned long addr) FOLL_FORCE | FOLL_DUMP | FOLL_GET); if (locked) mmap_read_unlock(mm); + + if (ret == 1 && is_page_poisoned(page)) + return NULL; + return (ret == 1) ? page : NULL; } #endif /* CONFIG_ELF_CORE */ diff --git a/mm/highmem.c b/mm/highmem.c index 86f2b9495f9c..6ef8f5e05e7e 100644 --- a/mm/highmem.c +++ b/mm/highmem.c @@ -618,7 +618,7 @@ void __kmap_local_sched_out(void) int idx; /* With debug all even slots are unmapped and act as guard */ - if (IS_ENABLED(CONFIG_DEBUG_HIGHMEM) && !(i & 0x01)) { + if (IS_ENABLED(CONFIG_DEBUG_KMAP_LOCAL) && !(i & 0x01)) { WARN_ON_ONCE(!pte_none(pteval)); continue; } @@ -654,7 +654,7 @@ void __kmap_local_sched_in(void) int idx; /* With debug all even slots are unmapped and act as guard */ - if (IS_ENABLED(CONFIG_DEBUG_HIGHMEM) && !(i & 0x01)) { + if (IS_ENABLED(CONFIG_DEBUG_KMAP_LOCAL) && !(i & 0x01)) { WARN_ON_ONCE(!pte_none(pteval)); continue; } diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 5b1ab1f427c5..a86a58ef132d 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -280,6 +280,17 @@ static void record_hugetlb_cgroup_uncharge_info(struct hugetlb_cgroup *h_cg, nrg->reservation_counter = &h_cg->rsvd_hugepage[hstate_index(h)]; nrg->css = &h_cg->css; + /* + * The caller will hold exactly one h_cg->css reference for the + * whole contiguous reservation region. But this area might be + * scattered when there are already some file_regions reside in + * it. As a result, many file_regions may share only one css + * reference. In order to ensure that one file_region must hold + * exactly one h_cg->css reference, we should do css_get for + * each file_region and leave the reference held by caller + * untouched. + */ + css_get(&h_cg->css); if (!resv->pages_per_hpage) resv->pages_per_hpage = pages_per_huge_page(h); /* pages_per_hpage should be the same for all entries in @@ -293,6 +304,14 @@ static void record_hugetlb_cgroup_uncharge_info(struct hugetlb_cgroup *h_cg, #endif } +static void put_uncharge_info(struct file_region *rg) +{ +#ifdef CONFIG_CGROUP_HUGETLB + if (rg->css) + css_put(rg->css); +#endif +} + static bool has_same_uncharge_info(struct file_region *rg, struct file_region *org) { @@ -316,6 +335,7 @@ static void coalesce_file_region(struct resv_map *resv, struct file_region *rg) prg->to = rg->to; list_del(&rg->link); + put_uncharge_info(rg); kfree(rg); rg = prg; @@ -327,6 +347,7 @@ static void coalesce_file_region(struct resv_map *resv, struct file_region *rg) nrg->from = rg->from; list_del(&rg->link); + put_uncharge_info(rg); kfree(rg); } } @@ -662,7 +683,7 @@ retry: del += t - f; hugetlb_cgroup_uncharge_file_region( - resv, rg, t - f); + resv, rg, t - f, false); /* New entry for end of split region */ nrg->from = t; @@ -683,7 +704,7 @@ retry: if (f <= rg->from && t >= rg->to) { /* Remove entire region */ del += rg->to - rg->from; hugetlb_cgroup_uncharge_file_region(resv, rg, - rg->to - rg->from); + rg->to - rg->from, true); list_del(&rg->link); kfree(rg); continue; @@ -691,13 +712,13 @@ retry: if (f <= rg->from) { /* Trim beginning of region */ hugetlb_cgroup_uncharge_file_region(resv, rg, - t - rg->from); + t - rg->from, false); del += t - rg->from; rg->from = t; } else { /* Trim end of region */ hugetlb_cgroup_uncharge_file_region(resv, rg, - rg->to - f); + rg->to - f, false); del += rg->to - f; rg->to = f; @@ -5187,6 +5208,10 @@ bool hugetlb_reserve_pages(struct inode *inode, */ long rsv_adjust; + /* + * hugetlb_cgroup_uncharge_cgroup_rsvd() will put the + * reference to h_cg->css. See comment below for detail. + */ hugetlb_cgroup_uncharge_cgroup_rsvd( hstate_index(h), (chg - add) * pages_per_huge_page(h), h_cg); @@ -5194,6 +5219,14 @@ bool hugetlb_reserve_pages(struct inode *inode, rsv_adjust = hugepage_subpool_put_pages(spool, chg - add); hugetlb_acct_memory(h, -rsv_adjust); + } else if (h_cg) { + /* + * The file_regions will hold their own reference to + * h_cg->css. So we should release the reference held + * via hugetlb_cgroup_charge_cgroup_rsvd() when we are + * done. + */ + hugetlb_cgroup_put_rsvd_cgroup(h_cg); } } return true; diff --git a/mm/hugetlb_cgroup.c b/mm/hugetlb_cgroup.c index f68b51fcda3d..603a131e262d 100644 --- a/mm/hugetlb_cgroup.c +++ b/mm/hugetlb_cgroup.c @@ -391,7 +391,8 @@ void hugetlb_cgroup_uncharge_counter(struct resv_map *resv, unsigned long start, void hugetlb_cgroup_uncharge_file_region(struct resv_map *resv, struct file_region *rg, - unsigned long nr_pages) + unsigned long nr_pages, + bool region_del) { if (hugetlb_cgroup_disabled() || !resv || !rg || !nr_pages) return; @@ -400,7 +401,12 @@ void hugetlb_cgroup_uncharge_file_region(struct resv_map *resv, !resv->reservation_counter) { page_counter_uncharge(rg->reservation_counter, nr_pages * resv->pages_per_hpage); - css_put(rg->css); + /* + * Only do css_put(rg->css) when we delete the entire region + * because one file_region must hold exactly one css reference. + */ + if (region_del) + css_put(rg->css); } } diff --git a/mm/internal.h b/mm/internal.h index 1432feec62df..cb3c5e0a7799 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -97,6 +97,26 @@ static inline void set_page_refcounted(struct page *page) set_page_count(page, 1); } +/* + * When kernel touch the user page, the user page may be have been marked + * poison but still mapped in user space, if without this page, the kernel + * can guarantee the data integrity and operation success, the kernel is + * better to check the posion status and avoid touching it, be good not to + * panic, coredump for process fatal signal is a sample case matching this + * scenario. Or if kernel can't guarantee the data integrity, it's better + * not to call this function, let kernel touch the poison page and get to + * panic. + */ +static inline bool is_page_poisoned(struct page *page) +{ + if (PageHWPoison(page)) + return true; + else if (PageHuge(page) && PageHWPoison(compound_head(page))) + return true; + + return false; +} + extern unsigned long highest_memmap_pfn; /* diff --git a/mm/kfence/core.c b/mm/kfence/core.c index 3b8ec938470a..d53c91f881a4 100644 --- a/mm/kfence/core.c +++ b/mm/kfence/core.c @@ -12,6 +12,7 @@ #include <linux/debugfs.h> #include <linux/kcsan-checks.h> #include <linux/kfence.h> +#include <linux/kmemleak.h> #include <linux/list.h> #include <linux/lockdep.h> #include <linux/memblock.h> @@ -480,6 +481,14 @@ static bool __init kfence_init_pool(void) addr += 2 * PAGE_SIZE; } + /* + * The pool is live and will never be deallocated from this point on. + * Remove the pool object from the kmemleak object tree, as it would + * otherwise overlap with allocations returned by kfence_alloc(), which + * are registered with kmemleak through the slab post-alloc hook. + */ + kmemleak_free(__kfence_pool); + return true; err: diff --git a/mm/kmemleak.c b/mm/kmemleak.c index c0014d3b91c1..fe6e3ae8e8c6 100644 --- a/mm/kmemleak.c +++ b/mm/kmemleak.c @@ -97,6 +97,7 @@ #include <linux/atomic.h> #include <linux/kasan.h> +#include <linux/kfence.h> #include <linux/kmemleak.h> #include <linux/memory_hotplug.h> @@ -589,7 +590,7 @@ static struct kmemleak_object *create_object(unsigned long ptr, size_t size, atomic_set(&object->use_count, 1); object->flags = OBJECT_ALLOCATED; object->pointer = ptr; - object->size = size; + object->size = kfence_ksize((void *)ptr) ?: size; object->excess_ref = 0; object->min_count = min_count; object->count = 0; /* white color initially */ diff --git a/mm/memory.c b/mm/memory.c index 5efa07fb6cdc..550405fc3b5e 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -166,7 +166,7 @@ static int __init init_zero_pfn(void) zero_pfn = page_to_pfn(ZERO_PAGE(0)); return 0; } -core_initcall(init_zero_pfn); +early_initcall(init_zero_pfn); void mm_trace_rss_stat(struct mm_struct *mm, int member, long count) { diff --git a/mm/mmu_notifier.c b/mm/mmu_notifier.c index 61ee40ed804e..459d195d2ff6 100644 --- a/mm/mmu_notifier.c +++ b/mm/mmu_notifier.c @@ -501,10 +501,33 @@ static int mn_hlist_invalidate_range_start( ""); WARN_ON(mmu_notifier_range_blockable(range) || _ret != -EAGAIN); + /* + * We call all the notifiers on any EAGAIN, + * there is no way for a notifier to know if + * its start method failed, thus a start that + * does EAGAIN can't also do end. + */ + WARN_ON(ops->invalidate_range_end); ret = _ret; } } } + + if (ret) { + /* + * Must be non-blocking to get here. If there are multiple + * notifiers and one or more failed start, any that succeeded + * start are expecting their end to be called. Do so now. + */ + hlist_for_each_entry_rcu(subscription, &subscriptions->list, + hlist, srcu_read_lock_held(&srcu)) { + if (!subscription->ops->invalidate_range_end) + continue; + + subscription->ops->invalidate_range_end(subscription, + range); + } + } srcu_read_unlock(&srcu, id); return ret; diff --git a/mm/page-writeback.c b/mm/page-writeback.c index eb34d204d4ee..9e35b636a393 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -2833,6 +2833,22 @@ void wait_on_page_writeback(struct page *page) } EXPORT_SYMBOL_GPL(wait_on_page_writeback); +/* + * Wait for a page to complete writeback. Returns -EINTR if we get a + * fatal signal while waiting. + */ +int wait_on_page_writeback_killable(struct page *page) +{ + while (PageWriteback(page)) { + trace_wait_on_page_writeback(page, page_mapping(page)); + if (wait_on_page_bit_killable(page, PG_writeback)) + return -EINTR; + } + + return 0; +} +EXPORT_SYMBOL_GPL(wait_on_page_writeback_killable); + /** * wait_for_stable_page() - wait for writeback to finish, if necessary. * @page: The page to wait on. diff --git a/mm/page_poison.c b/mm/page_poison.c index 65cdf844c8ad..655dc5895604 100644 --- a/mm/page_poison.c +++ b/mm/page_poison.c @@ -77,12 +77,14 @@ static void unpoison_page(struct page *page) void *addr; addr = kmap_atomic(page); + kasan_disable_current(); /* * Page poisoning when enabled poisons each and every page * that is freed to buddy. Thus no extra check is done to * see if a page was poisoned. */ - check_poison_mem(addr, PAGE_SIZE); + check_poison_mem(kasan_reset_tag(addr), PAGE_SIZE); + kasan_enable_current(); kunmap_atomic(addr); } diff --git a/mm/percpu-internal.h b/mm/percpu-internal.h index 18b768ac7dca..095d7eaa0db4 100644 --- a/mm/percpu-internal.h +++ b/mm/percpu-internal.h @@ -87,7 +87,7 @@ extern spinlock_t pcpu_lock; extern struct list_head *pcpu_chunk_lists; extern int pcpu_nr_slots; -extern int pcpu_nr_empty_pop_pages; +extern int pcpu_nr_empty_pop_pages[]; extern struct pcpu_chunk *pcpu_first_chunk; extern struct pcpu_chunk *pcpu_reserved_chunk; diff --git a/mm/percpu-stats.c b/mm/percpu-stats.c index c8400a2adbc2..f6026dbcdf6b 100644 --- a/mm/percpu-stats.c +++ b/mm/percpu-stats.c @@ -145,6 +145,7 @@ static int percpu_stats_show(struct seq_file *m, void *v) int slot, max_nr_alloc; int *buffer; enum pcpu_chunk_type type; + int nr_empty_pop_pages; alloc_buffer: spin_lock_irq(&pcpu_lock); @@ -165,7 +166,11 @@ alloc_buffer: goto alloc_buffer; } -#define PL(X) \ + nr_empty_pop_pages = 0; + for (type = 0; type < PCPU_NR_CHUNK_TYPES; type++) + nr_empty_pop_pages += pcpu_nr_empty_pop_pages[type]; + +#define PL(X) \ seq_printf(m, " %-20s: %12lld\n", #X, (long long int)pcpu_stats_ai.X) seq_printf(m, @@ -196,7 +201,7 @@ alloc_buffer: PU(nr_max_chunks); PU(min_alloc_size); PU(max_alloc_size); - P("empty_pop_pages", pcpu_nr_empty_pop_pages); + P("empty_pop_pages", nr_empty_pop_pages); seq_putc(m, '\n'); #undef PU diff --git a/mm/percpu.c b/mm/percpu.c index 6596a0a4286e..23308113a5ff 100644 --- a/mm/percpu.c +++ b/mm/percpu.c @@ -173,10 +173,10 @@ struct list_head *pcpu_chunk_lists __ro_after_init; /* chunk list slots */ static LIST_HEAD(pcpu_map_extend_chunks); /* - * The number of empty populated pages, protected by pcpu_lock. The - * reserved chunk doesn't contribute to the count. + * The number of empty populated pages by chunk type, protected by pcpu_lock. + * The reserved chunk doesn't contribute to the count. */ -int pcpu_nr_empty_pop_pages; +int pcpu_nr_empty_pop_pages[PCPU_NR_CHUNK_TYPES]; /* * The number of populated pages in use by the allocator, protected by @@ -556,7 +556,7 @@ static inline void pcpu_update_empty_pages(struct pcpu_chunk *chunk, int nr) { chunk->nr_empty_pop_pages += nr; if (chunk != pcpu_reserved_chunk) - pcpu_nr_empty_pop_pages += nr; + pcpu_nr_empty_pop_pages[pcpu_chunk_type(chunk)] += nr; } /* @@ -1832,7 +1832,7 @@ area_found: mutex_unlock(&pcpu_alloc_mutex); } - if (pcpu_nr_empty_pop_pages < PCPU_EMPTY_POP_PAGES_LOW) + if (pcpu_nr_empty_pop_pages[type] < PCPU_EMPTY_POP_PAGES_LOW) pcpu_schedule_balance_work(); /* clear the areas and return address relative to base address */ @@ -2000,7 +2000,7 @@ retry_pop: pcpu_atomic_alloc_failed = false; } else { nr_to_pop = clamp(PCPU_EMPTY_POP_PAGES_HIGH - - pcpu_nr_empty_pop_pages, + pcpu_nr_empty_pop_pages[type], 0, PCPU_EMPTY_POP_PAGES_HIGH); } @@ -2580,7 +2580,7 @@ void __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai, /* link the first chunk in */ pcpu_first_chunk = chunk; - pcpu_nr_empty_pop_pages = pcpu_first_chunk->nr_empty_pop_pages; + pcpu_nr_empty_pop_pages[PCPU_CHUNK_ROOT] = pcpu_first_chunk->nr_empty_pop_pages; pcpu_chunk_relocate(pcpu_first_chunk, -1); /* include all regions of the first chunk */ diff --git a/mm/z3fold.c b/mm/z3fold.c index b5dafa7e44e4..9d889ad2bb86 100644 --- a/mm/z3fold.c +++ b/mm/z3fold.c @@ -1346,8 +1346,22 @@ static int z3fold_reclaim_page(struct z3fold_pool *pool, unsigned int retries) page = list_entry(pos, struct page, lru); zhdr = page_address(page); - if (test_bit(PAGE_HEADLESS, &page->private)) + if (test_bit(PAGE_HEADLESS, &page->private)) { + /* + * For non-headless pages, we wait to do this + * until we have the page lock to avoid racing + * with __z3fold_alloc(). Headless pages don't + * have a lock (and __z3fold_alloc() will never + * see them), but we still need to test and set + * PAGE_CLAIMED to avoid racing with + * z3fold_free(), so just do it now before + * leaving the loop. + */ + if (test_and_set_bit(PAGE_CLAIMED, &page->private)) + continue; + break; + } if (kref_get_unless_zero(&zhdr->refcount) == 0) { zhdr = NULL; |