summaryrefslogtreecommitdiff
path: root/mm
diff options
context:
space:
mode:
Diffstat (limited to 'mm')
-rw-r--r--mm/gup.c4
-rw-r--r--mm/highmem.c4
-rw-r--r--mm/hugetlb.c41
-rw-r--r--mm/hugetlb_cgroup.c10
-rw-r--r--mm/internal.h20
-rw-r--r--mm/kfence/core.c9
-rw-r--r--mm/kmemleak.c3
-rw-r--r--mm/memory.c2
-rw-r--r--mm/mmu_notifier.c23
-rw-r--r--mm/page-writeback.c16
-rw-r--r--mm/page_poison.c4
-rw-r--r--mm/percpu-internal.h2
-rw-r--r--mm/percpu-stats.c9
-rw-r--r--mm/percpu.c14
-rw-r--r--mm/z3fold.c16
15 files changed, 155 insertions, 22 deletions
diff --git a/mm/gup.c b/mm/gup.c
index e40579624f10..ef7d2da9f03f 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -1535,6 +1535,10 @@ struct page *get_dump_page(unsigned long addr)
FOLL_FORCE | FOLL_DUMP | FOLL_GET);
if (locked)
mmap_read_unlock(mm);
+
+ if (ret == 1 && is_page_poisoned(page))
+ return NULL;
+
return (ret == 1) ? page : NULL;
}
#endif /* CONFIG_ELF_CORE */
diff --git a/mm/highmem.c b/mm/highmem.c
index 86f2b9495f9c..6ef8f5e05e7e 100644
--- a/mm/highmem.c
+++ b/mm/highmem.c
@@ -618,7 +618,7 @@ void __kmap_local_sched_out(void)
int idx;
/* With debug all even slots are unmapped and act as guard */
- if (IS_ENABLED(CONFIG_DEBUG_HIGHMEM) && !(i & 0x01)) {
+ if (IS_ENABLED(CONFIG_DEBUG_KMAP_LOCAL) && !(i & 0x01)) {
WARN_ON_ONCE(!pte_none(pteval));
continue;
}
@@ -654,7 +654,7 @@ void __kmap_local_sched_in(void)
int idx;
/* With debug all even slots are unmapped and act as guard */
- if (IS_ENABLED(CONFIG_DEBUG_HIGHMEM) && !(i & 0x01)) {
+ if (IS_ENABLED(CONFIG_DEBUG_KMAP_LOCAL) && !(i & 0x01)) {
WARN_ON_ONCE(!pte_none(pteval));
continue;
}
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 5b1ab1f427c5..a86a58ef132d 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -280,6 +280,17 @@ static void record_hugetlb_cgroup_uncharge_info(struct hugetlb_cgroup *h_cg,
nrg->reservation_counter =
&h_cg->rsvd_hugepage[hstate_index(h)];
nrg->css = &h_cg->css;
+ /*
+ * The caller will hold exactly one h_cg->css reference for the
+ * whole contiguous reservation region. But this area might be
+ * scattered when there are already some file_regions reside in
+ * it. As a result, many file_regions may share only one css
+ * reference. In order to ensure that one file_region must hold
+ * exactly one h_cg->css reference, we should do css_get for
+ * each file_region and leave the reference held by caller
+ * untouched.
+ */
+ css_get(&h_cg->css);
if (!resv->pages_per_hpage)
resv->pages_per_hpage = pages_per_huge_page(h);
/* pages_per_hpage should be the same for all entries in
@@ -293,6 +304,14 @@ static void record_hugetlb_cgroup_uncharge_info(struct hugetlb_cgroup *h_cg,
#endif
}
+static void put_uncharge_info(struct file_region *rg)
+{
+#ifdef CONFIG_CGROUP_HUGETLB
+ if (rg->css)
+ css_put(rg->css);
+#endif
+}
+
static bool has_same_uncharge_info(struct file_region *rg,
struct file_region *org)
{
@@ -316,6 +335,7 @@ static void coalesce_file_region(struct resv_map *resv, struct file_region *rg)
prg->to = rg->to;
list_del(&rg->link);
+ put_uncharge_info(rg);
kfree(rg);
rg = prg;
@@ -327,6 +347,7 @@ static void coalesce_file_region(struct resv_map *resv, struct file_region *rg)
nrg->from = rg->from;
list_del(&rg->link);
+ put_uncharge_info(rg);
kfree(rg);
}
}
@@ -662,7 +683,7 @@ retry:
del += t - f;
hugetlb_cgroup_uncharge_file_region(
- resv, rg, t - f);
+ resv, rg, t - f, false);
/* New entry for end of split region */
nrg->from = t;
@@ -683,7 +704,7 @@ retry:
if (f <= rg->from && t >= rg->to) { /* Remove entire region */
del += rg->to - rg->from;
hugetlb_cgroup_uncharge_file_region(resv, rg,
- rg->to - rg->from);
+ rg->to - rg->from, true);
list_del(&rg->link);
kfree(rg);
continue;
@@ -691,13 +712,13 @@ retry:
if (f <= rg->from) { /* Trim beginning of region */
hugetlb_cgroup_uncharge_file_region(resv, rg,
- t - rg->from);
+ t - rg->from, false);
del += t - rg->from;
rg->from = t;
} else { /* Trim end of region */
hugetlb_cgroup_uncharge_file_region(resv, rg,
- rg->to - f);
+ rg->to - f, false);
del += rg->to - f;
rg->to = f;
@@ -5187,6 +5208,10 @@ bool hugetlb_reserve_pages(struct inode *inode,
*/
long rsv_adjust;
+ /*
+ * hugetlb_cgroup_uncharge_cgroup_rsvd() will put the
+ * reference to h_cg->css. See comment below for detail.
+ */
hugetlb_cgroup_uncharge_cgroup_rsvd(
hstate_index(h),
(chg - add) * pages_per_huge_page(h), h_cg);
@@ -5194,6 +5219,14 @@ bool hugetlb_reserve_pages(struct inode *inode,
rsv_adjust = hugepage_subpool_put_pages(spool,
chg - add);
hugetlb_acct_memory(h, -rsv_adjust);
+ } else if (h_cg) {
+ /*
+ * The file_regions will hold their own reference to
+ * h_cg->css. So we should release the reference held
+ * via hugetlb_cgroup_charge_cgroup_rsvd() when we are
+ * done.
+ */
+ hugetlb_cgroup_put_rsvd_cgroup(h_cg);
}
}
return true;
diff --git a/mm/hugetlb_cgroup.c b/mm/hugetlb_cgroup.c
index f68b51fcda3d..603a131e262d 100644
--- a/mm/hugetlb_cgroup.c
+++ b/mm/hugetlb_cgroup.c
@@ -391,7 +391,8 @@ void hugetlb_cgroup_uncharge_counter(struct resv_map *resv, unsigned long start,
void hugetlb_cgroup_uncharge_file_region(struct resv_map *resv,
struct file_region *rg,
- unsigned long nr_pages)
+ unsigned long nr_pages,
+ bool region_del)
{
if (hugetlb_cgroup_disabled() || !resv || !rg || !nr_pages)
return;
@@ -400,7 +401,12 @@ void hugetlb_cgroup_uncharge_file_region(struct resv_map *resv,
!resv->reservation_counter) {
page_counter_uncharge(rg->reservation_counter,
nr_pages * resv->pages_per_hpage);
- css_put(rg->css);
+ /*
+ * Only do css_put(rg->css) when we delete the entire region
+ * because one file_region must hold exactly one css reference.
+ */
+ if (region_del)
+ css_put(rg->css);
}
}
diff --git a/mm/internal.h b/mm/internal.h
index 1432feec62df..cb3c5e0a7799 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -97,6 +97,26 @@ static inline void set_page_refcounted(struct page *page)
set_page_count(page, 1);
}
+/*
+ * When kernel touch the user page, the user page may be have been marked
+ * poison but still mapped in user space, if without this page, the kernel
+ * can guarantee the data integrity and operation success, the kernel is
+ * better to check the posion status and avoid touching it, be good not to
+ * panic, coredump for process fatal signal is a sample case matching this
+ * scenario. Or if kernel can't guarantee the data integrity, it's better
+ * not to call this function, let kernel touch the poison page and get to
+ * panic.
+ */
+static inline bool is_page_poisoned(struct page *page)
+{
+ if (PageHWPoison(page))
+ return true;
+ else if (PageHuge(page) && PageHWPoison(compound_head(page)))
+ return true;
+
+ return false;
+}
+
extern unsigned long highest_memmap_pfn;
/*
diff --git a/mm/kfence/core.c b/mm/kfence/core.c
index 3b8ec938470a..d53c91f881a4 100644
--- a/mm/kfence/core.c
+++ b/mm/kfence/core.c
@@ -12,6 +12,7 @@
#include <linux/debugfs.h>
#include <linux/kcsan-checks.h>
#include <linux/kfence.h>
+#include <linux/kmemleak.h>
#include <linux/list.h>
#include <linux/lockdep.h>
#include <linux/memblock.h>
@@ -480,6 +481,14 @@ static bool __init kfence_init_pool(void)
addr += 2 * PAGE_SIZE;
}
+ /*
+ * The pool is live and will never be deallocated from this point on.
+ * Remove the pool object from the kmemleak object tree, as it would
+ * otherwise overlap with allocations returned by kfence_alloc(), which
+ * are registered with kmemleak through the slab post-alloc hook.
+ */
+ kmemleak_free(__kfence_pool);
+
return true;
err:
diff --git a/mm/kmemleak.c b/mm/kmemleak.c
index c0014d3b91c1..fe6e3ae8e8c6 100644
--- a/mm/kmemleak.c
+++ b/mm/kmemleak.c
@@ -97,6 +97,7 @@
#include <linux/atomic.h>
#include <linux/kasan.h>
+#include <linux/kfence.h>
#include <linux/kmemleak.h>
#include <linux/memory_hotplug.h>
@@ -589,7 +590,7 @@ static struct kmemleak_object *create_object(unsigned long ptr, size_t size,
atomic_set(&object->use_count, 1);
object->flags = OBJECT_ALLOCATED;
object->pointer = ptr;
- object->size = size;
+ object->size = kfence_ksize((void *)ptr) ?: size;
object->excess_ref = 0;
object->min_count = min_count;
object->count = 0; /* white color initially */
diff --git a/mm/memory.c b/mm/memory.c
index 5efa07fb6cdc..550405fc3b5e 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -166,7 +166,7 @@ static int __init init_zero_pfn(void)
zero_pfn = page_to_pfn(ZERO_PAGE(0));
return 0;
}
-core_initcall(init_zero_pfn);
+early_initcall(init_zero_pfn);
void mm_trace_rss_stat(struct mm_struct *mm, int member, long count)
{
diff --git a/mm/mmu_notifier.c b/mm/mmu_notifier.c
index 61ee40ed804e..459d195d2ff6 100644
--- a/mm/mmu_notifier.c
+++ b/mm/mmu_notifier.c
@@ -501,10 +501,33 @@ static int mn_hlist_invalidate_range_start(
"");
WARN_ON(mmu_notifier_range_blockable(range) ||
_ret != -EAGAIN);
+ /*
+ * We call all the notifiers on any EAGAIN,
+ * there is no way for a notifier to know if
+ * its start method failed, thus a start that
+ * does EAGAIN can't also do end.
+ */
+ WARN_ON(ops->invalidate_range_end);
ret = _ret;
}
}
}
+
+ if (ret) {
+ /*
+ * Must be non-blocking to get here. If there are multiple
+ * notifiers and one or more failed start, any that succeeded
+ * start are expecting their end to be called. Do so now.
+ */
+ hlist_for_each_entry_rcu(subscription, &subscriptions->list,
+ hlist, srcu_read_lock_held(&srcu)) {
+ if (!subscription->ops->invalidate_range_end)
+ continue;
+
+ subscription->ops->invalidate_range_end(subscription,
+ range);
+ }
+ }
srcu_read_unlock(&srcu, id);
return ret;
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index eb34d204d4ee..9e35b636a393 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -2833,6 +2833,22 @@ void wait_on_page_writeback(struct page *page)
}
EXPORT_SYMBOL_GPL(wait_on_page_writeback);
+/*
+ * Wait for a page to complete writeback. Returns -EINTR if we get a
+ * fatal signal while waiting.
+ */
+int wait_on_page_writeback_killable(struct page *page)
+{
+ while (PageWriteback(page)) {
+ trace_wait_on_page_writeback(page, page_mapping(page));
+ if (wait_on_page_bit_killable(page, PG_writeback))
+ return -EINTR;
+ }
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(wait_on_page_writeback_killable);
+
/**
* wait_for_stable_page() - wait for writeback to finish, if necessary.
* @page: The page to wait on.
diff --git a/mm/page_poison.c b/mm/page_poison.c
index 65cdf844c8ad..655dc5895604 100644
--- a/mm/page_poison.c
+++ b/mm/page_poison.c
@@ -77,12 +77,14 @@ static void unpoison_page(struct page *page)
void *addr;
addr = kmap_atomic(page);
+ kasan_disable_current();
/*
* Page poisoning when enabled poisons each and every page
* that is freed to buddy. Thus no extra check is done to
* see if a page was poisoned.
*/
- check_poison_mem(addr, PAGE_SIZE);
+ check_poison_mem(kasan_reset_tag(addr), PAGE_SIZE);
+ kasan_enable_current();
kunmap_atomic(addr);
}
diff --git a/mm/percpu-internal.h b/mm/percpu-internal.h
index 18b768ac7dca..095d7eaa0db4 100644
--- a/mm/percpu-internal.h
+++ b/mm/percpu-internal.h
@@ -87,7 +87,7 @@ extern spinlock_t pcpu_lock;
extern struct list_head *pcpu_chunk_lists;
extern int pcpu_nr_slots;
-extern int pcpu_nr_empty_pop_pages;
+extern int pcpu_nr_empty_pop_pages[];
extern struct pcpu_chunk *pcpu_first_chunk;
extern struct pcpu_chunk *pcpu_reserved_chunk;
diff --git a/mm/percpu-stats.c b/mm/percpu-stats.c
index c8400a2adbc2..f6026dbcdf6b 100644
--- a/mm/percpu-stats.c
+++ b/mm/percpu-stats.c
@@ -145,6 +145,7 @@ static int percpu_stats_show(struct seq_file *m, void *v)
int slot, max_nr_alloc;
int *buffer;
enum pcpu_chunk_type type;
+ int nr_empty_pop_pages;
alloc_buffer:
spin_lock_irq(&pcpu_lock);
@@ -165,7 +166,11 @@ alloc_buffer:
goto alloc_buffer;
}
-#define PL(X) \
+ nr_empty_pop_pages = 0;
+ for (type = 0; type < PCPU_NR_CHUNK_TYPES; type++)
+ nr_empty_pop_pages += pcpu_nr_empty_pop_pages[type];
+
+#define PL(X) \
seq_printf(m, " %-20s: %12lld\n", #X, (long long int)pcpu_stats_ai.X)
seq_printf(m,
@@ -196,7 +201,7 @@ alloc_buffer:
PU(nr_max_chunks);
PU(min_alloc_size);
PU(max_alloc_size);
- P("empty_pop_pages", pcpu_nr_empty_pop_pages);
+ P("empty_pop_pages", nr_empty_pop_pages);
seq_putc(m, '\n');
#undef PU
diff --git a/mm/percpu.c b/mm/percpu.c
index 6596a0a4286e..23308113a5ff 100644
--- a/mm/percpu.c
+++ b/mm/percpu.c
@@ -173,10 +173,10 @@ struct list_head *pcpu_chunk_lists __ro_after_init; /* chunk list slots */
static LIST_HEAD(pcpu_map_extend_chunks);
/*
- * The number of empty populated pages, protected by pcpu_lock. The
- * reserved chunk doesn't contribute to the count.
+ * The number of empty populated pages by chunk type, protected by pcpu_lock.
+ * The reserved chunk doesn't contribute to the count.
*/
-int pcpu_nr_empty_pop_pages;
+int pcpu_nr_empty_pop_pages[PCPU_NR_CHUNK_TYPES];
/*
* The number of populated pages in use by the allocator, protected by
@@ -556,7 +556,7 @@ static inline void pcpu_update_empty_pages(struct pcpu_chunk *chunk, int nr)
{
chunk->nr_empty_pop_pages += nr;
if (chunk != pcpu_reserved_chunk)
- pcpu_nr_empty_pop_pages += nr;
+ pcpu_nr_empty_pop_pages[pcpu_chunk_type(chunk)] += nr;
}
/*
@@ -1832,7 +1832,7 @@ area_found:
mutex_unlock(&pcpu_alloc_mutex);
}
- if (pcpu_nr_empty_pop_pages < PCPU_EMPTY_POP_PAGES_LOW)
+ if (pcpu_nr_empty_pop_pages[type] < PCPU_EMPTY_POP_PAGES_LOW)
pcpu_schedule_balance_work();
/* clear the areas and return address relative to base address */
@@ -2000,7 +2000,7 @@ retry_pop:
pcpu_atomic_alloc_failed = false;
} else {
nr_to_pop = clamp(PCPU_EMPTY_POP_PAGES_HIGH -
- pcpu_nr_empty_pop_pages,
+ pcpu_nr_empty_pop_pages[type],
0, PCPU_EMPTY_POP_PAGES_HIGH);
}
@@ -2580,7 +2580,7 @@ void __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai,
/* link the first chunk in */
pcpu_first_chunk = chunk;
- pcpu_nr_empty_pop_pages = pcpu_first_chunk->nr_empty_pop_pages;
+ pcpu_nr_empty_pop_pages[PCPU_CHUNK_ROOT] = pcpu_first_chunk->nr_empty_pop_pages;
pcpu_chunk_relocate(pcpu_first_chunk, -1);
/* include all regions of the first chunk */
diff --git a/mm/z3fold.c b/mm/z3fold.c
index b5dafa7e44e4..9d889ad2bb86 100644
--- a/mm/z3fold.c
+++ b/mm/z3fold.c
@@ -1346,8 +1346,22 @@ static int z3fold_reclaim_page(struct z3fold_pool *pool, unsigned int retries)
page = list_entry(pos, struct page, lru);
zhdr = page_address(page);
- if (test_bit(PAGE_HEADLESS, &page->private))
+ if (test_bit(PAGE_HEADLESS, &page->private)) {
+ /*
+ * For non-headless pages, we wait to do this
+ * until we have the page lock to avoid racing
+ * with __z3fold_alloc(). Headless pages don't
+ * have a lock (and __z3fold_alloc() will never
+ * see them), but we still need to test and set
+ * PAGE_CLAIMED to avoid racing with
+ * z3fold_free(), so just do it now before
+ * leaving the loop.
+ */
+ if (test_and_set_bit(PAGE_CLAIMED, &page->private))
+ continue;
+
break;
+ }
if (kref_get_unless_zero(&zhdr->refcount) == 0) {
zhdr = NULL;