diff options
Diffstat (limited to 'include')
32 files changed, 293 insertions, 104 deletions
diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h index e2e2bef07dd2..329b8c8ca703 100644 --- a/include/asm-generic/pgtable.h +++ b/include/asm-generic/pgtable.h @@ -10,6 +10,7 @@ #include <linux/mm_types.h> #include <linux/bug.h> #include <linux/errno.h> +#include <asm-generic/pgtable_uffd.h> #if 5 - defined(__PAGETABLE_P4D_FOLDED) - defined(__PAGETABLE_PUD_FOLDED) - \ defined(__PAGETABLE_PMD_FOLDED) != CONFIG_PGTABLE_LEVELS diff --git a/include/asm-generic/pgtable_uffd.h b/include/asm-generic/pgtable_uffd.h new file mode 100644 index 000000000000..828966d4c281 --- /dev/null +++ b/include/asm-generic/pgtable_uffd.h @@ -0,0 +1,66 @@ +#ifndef _ASM_GENERIC_PGTABLE_UFFD_H +#define _ASM_GENERIC_PGTABLE_UFFD_H + +#ifndef CONFIG_HAVE_ARCH_USERFAULTFD_WP +static __always_inline int pte_uffd_wp(pte_t pte) +{ + return 0; +} + +static __always_inline int pmd_uffd_wp(pmd_t pmd) +{ + return 0; +} + +static __always_inline pte_t pte_mkuffd_wp(pte_t pte) +{ + return pte; +} + +static __always_inline pmd_t pmd_mkuffd_wp(pmd_t pmd) +{ + return pmd; +} + +static __always_inline pte_t pte_clear_uffd_wp(pte_t pte) +{ + return pte; +} + +static __always_inline pmd_t pmd_clear_uffd_wp(pmd_t pmd) +{ + return pmd; +} + +static __always_inline pte_t pte_swp_mkuffd_wp(pte_t pte) +{ + return pte; +} + +static __always_inline int pte_swp_uffd_wp(pte_t pte) +{ + return 0; +} + +static __always_inline pte_t pte_swp_clear_uffd_wp(pte_t pte) +{ + return pte; +} + +static inline pmd_t pmd_swp_mkuffd_wp(pmd_t pmd) +{ + return pmd; +} + +static inline int pmd_swp_uffd_wp(pmd_t pmd) +{ + return 0; +} + +static inline pmd_t pmd_swp_clear_uffd_wp(pmd_t pmd) +{ + return pmd; +} +#endif /* CONFIG_HAVE_ARCH_USERFAULTFD_WP */ + +#endif /* _ASM_GENERIC_PGTABLE_UFFD_H */ diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h index f391f6b500b4..3f1649a8cf55 100644 --- a/include/asm-generic/tlb.h +++ b/include/asm-generic/tlb.h @@ -13,6 +13,7 @@ #include <linux/mmu_notifier.h> #include <linux/swap.h> +#include <linux/hugetlb_inline.h> #include <asm/pgalloc.h> #include <asm/tlbflush.h> #include <asm/cacheflush.h> @@ -398,7 +399,7 @@ tlb_update_vma_flags(struct mmu_gather *tlb, struct vm_area_struct *vma) * We rely on tlb_end_vma() to issue a flush, such that when we reset * these values the batch is empty. */ - tlb->vma_huge = !!(vma->vm_flags & VM_HUGETLB); + tlb->vma_huge = is_vm_hugetlb_page(vma); tlb->vma_exec = !!(vma->vm_flags & VM_EXEC); } diff --git a/include/linux/bitops.h b/include/linux/bitops.h index 47f54b459c26..9acf654f0b19 100644 --- a/include/linux/bitops.h +++ b/include/linux/bitops.h @@ -162,7 +162,7 @@ static inline __u8 ror8(__u8 word, unsigned int shift) * * This is safe to use for 16- and 8-bit types as well. */ -static inline __s32 sign_extend32(__u32 value, int index) +static __always_inline __s32 sign_extend32(__u32 value, int index) { __u8 shift = 31 - index; return (__s32)(value << shift) >> shift; @@ -173,7 +173,7 @@ static inline __s32 sign_extend32(__u32 value, int index) * @value: value to sign extend * @index: 0 based bit index (0<=index<64) to sign bit */ -static inline __s64 sign_extend64(__u64 value, int index) +static __always_inline __s64 sign_extend64(__u64 value, int index) { __u8 shift = 63 - index; return (__s64)(value << shift) >> shift; diff --git a/include/linux/bits.h b/include/linux/bits.h index a740bbcf3cd2..4671fbf28842 100644 --- a/include/linux/bits.h +++ b/include/linux/bits.h @@ -18,12 +18,30 @@ * position @h. For example * GENMASK_ULL(39, 21) gives us the 64bit vector 0x000000ffffe00000. */ -#define GENMASK(h, l) \ +#if !defined(__ASSEMBLY__) && \ + (!defined(CONFIG_CC_IS_GCC) || CONFIG_GCC_VERSION >= 49000) +#include <linux/build_bug.h> +#define GENMASK_INPUT_CHECK(h, l) \ + (BUILD_BUG_ON_ZERO(__builtin_choose_expr( \ + __builtin_constant_p((l) > (h)), (l) > (h), 0))) +#else +/* + * BUILD_BUG_ON_ZERO is not available in h files included from asm files, + * disable the input check if that is the case. + */ +#define GENMASK_INPUT_CHECK(h, l) 0 +#endif + +#define __GENMASK(h, l) \ (((~UL(0)) - (UL(1) << (l)) + 1) & \ (~UL(0) >> (BITS_PER_LONG - 1 - (h)))) +#define GENMASK(h, l) \ + (GENMASK_INPUT_CHECK(h, l) + __GENMASK(h, l)) -#define GENMASK_ULL(h, l) \ +#define __GENMASK_ULL(h, l) \ (((~ULL(0)) - (ULL(1) << (l)) + 1) & \ (~ULL(0) >> (BITS_PER_LONG_LONG - 1 - (h)))) +#define GENMASK_ULL(h, l) \ + (GENMASK_INPUT_CHECK(h, l) + __GENMASK_ULL(h, l)) #endif /* __LINUX_BITS_H */ diff --git a/include/linux/compiler.h b/include/linux/compiler.h index 5e88e7e33abe..034b0a644efc 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -347,7 +347,7 @@ static inline void *offset_to_ptr(const int *off) * compiler has support to do so. */ #define compiletime_assert(condition, msg) \ - _compiletime_assert(condition, msg, __compiletime_assert_, __LINE__) + _compiletime_assert(condition, msg, __compiletime_assert_, __COUNTER__) #define compiletime_assert_atomic_type(t) \ compiletime_assert(__native_word(t), \ diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h index 72393a8c1a6c..e970f97a7fcb 100644 --- a/include/linux/compiler_types.h +++ b/include/linux/compiler_types.h @@ -129,22 +129,13 @@ struct ftrace_likely_data { #define __compiler_offsetof(a, b) __builtin_offsetof(a, b) /* - * Force always-inline if the user requests it so via the .config. * Prefer gnu_inline, so that extern inline functions do not emit an * externally visible function. This makes extern inline behave as per gnu89 * semantics rather than c99. This prevents multiple symbol definition errors * of extern inline functions at link time. * A lot of inline functions can cause havoc with function tracing. - * Do not use __always_inline here, since currently it expands to inline again - * (which would break users of __always_inline). */ -#if !defined(CONFIG_OPTIMIZE_INLINING) -#define inline inline __attribute__((__always_inline__)) __gnu_inline \ - __inline_maybe_unused notrace -#else -#define inline inline __gnu_inline \ - __inline_maybe_unused notrace -#endif +#define inline inline __gnu_inline __inline_maybe_unused notrace /* * gcc provides both __inline__ and __inline as alternate spellings of diff --git a/include/linux/gfp.h b/include/linux/gfp.h index be2754841369..4aba4c86c626 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -124,6 +124,8 @@ struct vm_area_struct; * * Reclaim modifiers * ~~~~~~~~~~~~~~~~~ + * Please note that all the following flags are only applicable to sleepable + * allocations (e.g. %GFP_NOWAIT and %GFP_ATOMIC will ignore them). * * %__GFP_IO can start physical IO. * diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index f2df2247026a..cfbb0a87c5f0 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -46,7 +46,7 @@ extern bool move_huge_pmd(struct vm_area_struct *vma, unsigned long old_addr, pmd_t *old_pmd, pmd_t *new_pmd); extern int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd, unsigned long addr, pgprot_t newprot, - int prot_numa); + unsigned long cp_flags); vm_fault_t vmf_insert_pfn_pmd_prot(struct vm_fault *vmf, pfn_t pfn, pgprot_t pgprot, bool write); diff --git a/include/linux/memory.h b/include/linux/memory.h index 0b8d791b6669..439a89e758d8 100644 --- a/include/linux/memory.h +++ b/include/linux/memory.h @@ -26,7 +26,6 @@ struct memory_block { unsigned long start_section_nr; unsigned long state; /* serialized by the dev->lock */ - int section_count; /* serialized by mem_sysfs_mutex */ int online_type; /* for passing data to online routine */ int phys_device; /* to which fru does this belong? */ struct device dev; diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h index f4d59155f3d4..ef55115320fb 100644 --- a/include/linux/memory_hotplug.h +++ b/include/linux/memory_hotplug.h @@ -47,9 +47,13 @@ enum { /* Types for control the zone type of onlined and offlined memory */ enum { - MMOP_OFFLINE = -1, - MMOP_ONLINE_KEEP, + /* Offline the memory. */ + MMOP_OFFLINE = 0, + /* Online the memory. Zone depends, see default_zone_for_pfn(). */ + MMOP_ONLINE, + /* Online the memory to ZONE_NORMAL. */ MMOP_ONLINE_KERNEL, + /* Online the memory to ZONE_MOVABLE. */ MMOP_ONLINE_MOVABLE, }; @@ -113,7 +117,10 @@ extern int arch_add_memory(int nid, u64 start, u64 size, struct mhp_restrictions *restrictions); extern u64 max_mem_size; -extern bool memhp_auto_online; +extern int memhp_online_type_from_str(const char *str); + +/* Default online_type (MMOP_*) when new memory blocks are added. */ +extern int memhp_default_online_type; /* If movable_node boot option specified */ extern bool movable_node_enabled; static inline bool movable_node_is_enabled(void) diff --git a/include/linux/memremap.h b/include/linux/memremap.h index 60d97e8fd3c0..8b37c4c9222c 100644 --- a/include/linux/memremap.h +++ b/include/linux/memremap.h @@ -98,8 +98,6 @@ struct dev_pagemap_ops { * @ref: reference count that pins the devm_memremap_pages() mapping * @internal_ref: internal reference if @ref is not provided by the caller * @done: completion for @internal_ref - * @dev: host device of the mapping for debug - * @data: private data pointer for page_free() * @type: memory type: see MEMORY_* in memory_hotplug.h * @flags: PGMAP_* flags to specify defailed behavior * @ops: method table diff --git a/include/linux/mm.h b/include/linux/mm.h index 7dd5c4ccbf85..e2f938c5a9d8 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -629,6 +629,12 @@ static inline bool vma_is_foreign(struct vm_area_struct *vma) return false; } + +static inline bool vma_is_accessible(struct vm_area_struct *vma) +{ + return vma->vm_flags & (VM_READ | VM_WRITE | VM_EXEC); +} + #ifdef CONFIG_SHMEM /* * The vma_is_shmem is not inline because it is used only by slow @@ -1765,9 +1771,26 @@ extern unsigned long move_page_tables(struct vm_area_struct *vma, unsigned long old_addr, struct vm_area_struct *new_vma, unsigned long new_addr, unsigned long len, bool need_rmap_locks); + +/* + * Flags used by change_protection(). For now we make it a bitmap so + * that we can pass in multiple flags just like parameters. However + * for now all the callers are only use one of the flags at the same + * time. + */ +/* Whether we should allow dirty bit accounting */ +#define MM_CP_DIRTY_ACCT (1UL << 0) +/* Whether this protection change is for NUMA hints */ +#define MM_CP_PROT_NUMA (1UL << 1) +/* Whether this change is for write protecting */ +#define MM_CP_UFFD_WP (1UL << 2) /* do wp */ +#define MM_CP_UFFD_WP_RESOLVE (1UL << 3) /* Resolve wp */ +#define MM_CP_UFFD_WP_ALL (MM_CP_UFFD_WP | \ + MM_CP_UFFD_WP_RESOLVE) + extern unsigned long change_protection(struct vm_area_struct *vma, unsigned long start, unsigned long end, pgprot_t newprot, - int dirty_accountable, int prot_numa); + unsigned long cp_flags); extern int mprotect_fixup(struct vm_area_struct *vma, struct vm_area_struct **pprev, unsigned long start, unsigned long end, unsigned long newflags); diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h index 6f2fef7b0784..219bef41d87c 100644 --- a/include/linux/mm_inline.h +++ b/include/linux/mm_inline.h @@ -6,19 +6,20 @@ #include <linux/swap.h> /** - * page_is_file_cache - should the page be on a file LRU or anon LRU? + * page_is_file_lru - should the page be on a file LRU or anon LRU? * @page: the page to test * - * Returns 1 if @page is page cache page backed by a regular filesystem, - * or 0 if @page is anonymous, tmpfs or otherwise ram or swap backed. - * Used by functions that manipulate the LRU lists, to sort a page - * onto the right LRU list. + * Returns 1 if @page is a regular filesystem backed page cache page or a lazily + * freed anonymous page (e.g. via MADV_FREE). Returns 0 if @page is a normal + * anonymous page, a tmpfs page or otherwise ram or swap backed page. Used by + * functions that manipulate the LRU lists, to sort a page onto the right LRU + * list. * * We would like to get this info without a page flag, but the state * needs to survive until the page is last deleted from the LRU, which * could be as far down as __page_cache_release. */ -static inline int page_is_file_cache(struct page *page) +static inline int page_is_file_lru(struct page *page) { return !PageSwapBacked(page); } @@ -75,7 +76,7 @@ static __always_inline void del_page_from_lru_list(struct page *page, */ static inline enum lru_list page_lru_base_type(struct page *page) { - if (page_is_file_cache(page)) + if (page_is_file_lru(page)) return LRU_INACTIVE_FILE; return LRU_INACTIVE_ANON; } diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index dd555e6d23f3..4aba6c0c2ba8 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -289,8 +289,8 @@ struct vm_userfaultfd_ctx {}; #endif /* CONFIG_USERFAULTFD */ /* - * This struct defines a memory VMM memory area. There is one of these - * per VM-area/task. A VM area is any part of the process virtual memory + * This struct describes a virtual memory area. There is one of these + * per VM-area/task. A VM area is any part of the process virtual memory * space that has a special rule for the page-fault handlers (ie a shared * library, the executable area etc). */ diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index e84d448988b6..e9892bf9eba9 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -100,41 +100,6 @@ struct free_area { unsigned long nr_free; }; -/* Used for pages not on another list */ -static inline void add_to_free_area(struct page *page, struct free_area *area, - int migratetype) -{ - list_add(&page->lru, &area->free_list[migratetype]); - area->nr_free++; -} - -/* Used for pages not on another list */ -static inline void add_to_free_area_tail(struct page *page, struct free_area *area, - int migratetype) -{ - list_add_tail(&page->lru, &area->free_list[migratetype]); - area->nr_free++; -} - -#ifdef CONFIG_SHUFFLE_PAGE_ALLOCATOR -/* Used to preserve page allocation order entropy */ -void add_to_free_area_random(struct page *page, struct free_area *area, - int migratetype); -#else -static inline void add_to_free_area_random(struct page *page, - struct free_area *area, int migratetype) -{ - add_to_free_area(page, area, migratetype); -} -#endif - -/* Used for pages which are on another list */ -static inline void move_to_free_area(struct page *page, struct free_area *area, - int migratetype) -{ - list_move(&page->lru, &area->free_list[migratetype]); -} - static inline struct page *get_page_from_free_area(struct free_area *area, int migratetype) { @@ -142,15 +107,6 @@ static inline struct page *get_page_from_free_area(struct free_area *area, struct page, lru); } -static inline void del_page_from_free_area(struct page *page, - struct free_area *area) -{ - list_del(&page->lru); - __ClearPageBuddy(page); - set_page_private(page, 0); - area->nr_free--; -} - static inline bool free_area_empty(struct free_area *area, int migratetype) { return list_empty(&area->free_list[migratetype]); @@ -708,7 +664,6 @@ struct deferred_split { * Memory statistics and page replacement data structures are maintained on a * per-zone basis. */ -struct bootmem_data; typedef struct pglist_data { struct zone node_zones[MAX_NR_ZONES]; struct zonelist node_zonelists[MAX_ZONELISTS]; @@ -1187,7 +1142,9 @@ static inline unsigned long section_nr_to_pfn(unsigned long sec) #define SUBSECTION_ALIGN_DOWN(pfn) ((pfn) & PAGE_SUBSECTION_MASK) struct mem_section_usage { +#ifdef CONFIG_SPARSEMEM_VMEMMAP DECLARE_BITMAP(subsection_map, SUBSECTIONS_PER_SECTION); +#endif /* See declaration of similar field in struct zone */ unsigned long pageblock_flags[0]; }; diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index 77de28bfefb0..222f6f7b2bb3 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -63,6 +63,11 @@ * page_waitqueue(page) is a wait queue of all tasks waiting for the page * to become unlocked. * + * PG_swapbacked is set when a page uses swap as a backing storage. This are + * usually PageAnon or shmem pages but please note that even anonymous pages + * might lose their PG_swapbacked flag when they simply can be dropped (e.g. as + * a result of MADV_FREE). + * * PG_uptodate tells whether the page's contents is valid. When a read * completes, the page becomes uptodate, unless a disk I/O error happened. * @@ -163,6 +168,9 @@ enum pageflags { /* non-lru isolated movable page */ PG_isolated = PG_reclaim, + + /* Only valid for buddy pages. Used to track pages that are reported */ + PG_reported = PG_uptodate, }; #ifndef __GENERATING_BOUNDS_H @@ -432,6 +440,14 @@ PAGEFLAG(Idle, idle, PF_ANY) #endif /* + * PageReported() is used to track reported free pages within the Buddy + * allocator. We can use the non-atomic version of the test and set + * operations as both should be shielded with the zone lock to prevent + * any possible races on the setting or clearing of the bit. + */ +__PAGEFLAG(Reported, reported, PF_NO_COMPOUND) + +/* * On an anonymous page mapped into a user virtual memory area, * page->mapping points to its anon_vma, not to a struct address_space; * with the PAGE_MAPPING_ANON bit set to distinguish it. See rmap.h. diff --git a/include/linux/page_reporting.h b/include/linux/page_reporting.h new file mode 100644 index 000000000000..3b99e0ec24f2 --- /dev/null +++ b/include/linux/page_reporting.h @@ -0,0 +1,26 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_PAGE_REPORTING_H +#define _LINUX_PAGE_REPORTING_H + +#include <linux/mmzone.h> +#include <linux/scatterlist.h> + +/* This value should always be a power of 2, see page_reporting_cycle() */ +#define PAGE_REPORTING_CAPACITY 32 + +struct page_reporting_dev_info { + /* function that alters pages to make them "reported" */ + int (*report)(struct page_reporting_dev_info *prdev, + struct scatterlist *sg, unsigned int nents); + + /* work struct for processing reports */ + struct delayed_work work; + + /* Current state of page reporting */ + atomic_t state; +}; + +/* Tear-down and bring-up for page reporting devices */ +void page_reporting_unregister(struct page_reporting_dev_info *prdev); +int page_reporting_register(struct page_reporting_dev_info *prdev); +#endif /*_LINUX_PAGE_REPORTING_H */ diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index f56282491a48..a8f7bd8ea1c6 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -341,9 +341,7 @@ static inline struct page *find_subpage(struct page *head, pgoff_t index) if (PageHuge(head)) return head; - VM_BUG_ON_PAGE(PageTail(head), head); - - return head + (index & (compound_nr(head) - 1)); + return head + (index & (hpage_nr_pages(head) - 1)); } struct page *find_get_entry(struct address_space *mapping, pgoff_t offset); diff --git a/include/linux/percpu_counter.h b/include/linux/percpu_counter.h index 4f052496cdfd..0a4f54dd4737 100644 --- a/include/linux/percpu_counter.h +++ b/include/linux/percpu_counter.h @@ -78,9 +78,9 @@ static inline s64 percpu_counter_read(struct percpu_counter *fbc) */ static inline s64 percpu_counter_read_positive(struct percpu_counter *fbc) { - s64 ret = fbc->count; + /* Prevent reloads of fbc->count */ + s64 ret = READ_ONCE(fbc->count); - barrier(); /* Prevent reloads of fbc->count */ if (ret >= 0) return ret; return 0; diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h index 40a7982b7285..45c05fd9c99d 100644 --- a/include/linux/proc_fs.h +++ b/include/linux/proc_fs.h @@ -5,6 +5,7 @@ #ifndef _LINUX_PROC_FS_H #define _LINUX_PROC_FS_H +#include <linux/compiler.h> #include <linux/types.h> #include <linux/fs.h> @@ -12,7 +13,21 @@ struct proc_dir_entry; struct seq_file; struct seq_operations; +enum { + /* + * All /proc entries using this ->proc_ops instance are never removed. + * + * If in doubt, ignore this flag. + */ +#ifdef MODULE + PROC_ENTRY_PERMANENT = 0U, +#else + PROC_ENTRY_PERMANENT = 1U << 0, +#endif +}; + struct proc_ops { + unsigned int proc_flags; int (*proc_open)(struct inode *, struct file *); ssize_t (*proc_read)(struct file *, char __user *, size_t, loff_t *); ssize_t (*proc_write)(struct file *, const char __user *, size_t, loff_t *); @@ -25,7 +40,7 @@ struct proc_ops { #endif int (*proc_mmap)(struct file *, struct vm_area_struct *); unsigned long (*proc_get_unmapped_area)(struct file *, unsigned long, unsigned long, unsigned long, unsigned long); -}; +} __randomize_layout; #ifdef CONFIG_PROC_FS diff --git a/include/linux/seq_file.h b/include/linux/seq_file.h index 770c2bf3aa43..1672cf6f7614 100644 --- a/include/linux/seq_file.h +++ b/include/linux/seq_file.h @@ -21,7 +21,6 @@ struct seq_file { size_t pad_until; loff_t index; loff_t read_pos; - u64 version; struct mutex lock; const struct seq_operations *op; int poll_event; diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h index d56fefef8905..7a35a6901221 100644 --- a/include/linux/shmem_fs.h +++ b/include/linux/shmem_fs.h @@ -78,6 +78,7 @@ extern void shmem_truncate_range(struct inode *inode, loff_t start, loff_t end); extern int shmem_unuse(unsigned int type, bool frontswap, unsigned long *fs_pages_to_unuse); +extern bool shmem_huge_enabled(struct vm_area_struct *vma); extern unsigned long shmem_swap_usage(struct vm_area_struct *vma); extern unsigned long shmem_partial_swap_usage(struct address_space *mapping, pgoff_t start, pgoff_t end); @@ -114,15 +115,6 @@ static inline bool shmem_file(struct file *file) extern bool shmem_charge(struct inode *inode, long pages); extern void shmem_uncharge(struct inode *inode, long pages); -#ifdef CONFIG_TRANSPARENT_HUGE_PAGECACHE -extern bool shmem_huge_enabled(struct vm_area_struct *vma); -#else -static inline bool shmem_huge_enabled(struct vm_area_struct *vma) -{ - return false; -} -#endif - #ifdef CONFIG_SHMEM extern int shmem_mcopy_atomic_pte(struct mm_struct *dst_mm, pmd_t *dst_pmd, struct vm_area_struct *dst_vma, diff --git a/include/linux/stackdepot.h b/include/linux/stackdepot.h index 3efa97d482cb..24d49c732341 100644 --- a/include/linux/stackdepot.h +++ b/include/linux/stackdepot.h @@ -19,4 +19,6 @@ depot_stack_handle_t stack_depot_save(unsigned long *entries, unsigned int stack_depot_fetch(depot_stack_handle_t handle, unsigned long **entries); +unsigned int filter_irq_stacks(unsigned long *entries, unsigned int nr_entries); + #endif diff --git a/include/linux/swapops.h b/include/linux/swapops.h index 877fd239b6ff..d9b7c9132c2f 100644 --- a/include/linux/swapops.h +++ b/include/linux/swapops.h @@ -68,6 +68,8 @@ static inline swp_entry_t pte_to_swp_entry(pte_t pte) if (pte_swp_soft_dirty(pte)) pte = pte_swp_clear_soft_dirty(pte); + if (pte_swp_uffd_wp(pte)) + pte = pte_swp_clear_uffd_wp(pte); arch_entry = __pte_to_swp_entry(pte); return swp_entry(__swp_type(arch_entry), __swp_offset(arch_entry)); } @@ -348,7 +350,8 @@ static inline void num_poisoned_pages_inc(void) } #endif -#if defined(CONFIG_MEMORY_FAILURE) || defined(CONFIG_MIGRATION) +#if defined(CONFIG_MEMORY_FAILURE) || defined(CONFIG_MIGRATION) || \ + defined(CONFIG_DEVICE_PRIVATE) static inline int non_swap_entry(swp_entry_t entry) { return swp_type(entry) >= MAX_SWAPFILES; diff --git a/include/linux/userfaultfd_k.h b/include/linux/userfaultfd_k.h index ac9d71e24b81..a8e5f3ea9bb2 100644 --- a/include/linux/userfaultfd_k.h +++ b/include/linux/userfaultfd_k.h @@ -14,6 +14,8 @@ #include <linux/userfaultfd.h> /* linux/include/uapi/linux/userfaultfd.h */ #include <linux/fcntl.h> +#include <linux/mm.h> +#include <asm-generic/pgtable_uffd.h> /* * CAREFUL: Check include/uapi/asm-generic/fcntl.h when defining @@ -34,11 +36,14 @@ extern vm_fault_t handle_userfault(struct vm_fault *vmf, unsigned long reason); extern ssize_t mcopy_atomic(struct mm_struct *dst_mm, unsigned long dst_start, unsigned long src_start, unsigned long len, - bool *mmap_changing); + bool *mmap_changing, __u64 mode); extern ssize_t mfill_zeropage(struct mm_struct *dst_mm, unsigned long dst_start, unsigned long len, bool *mmap_changing); +extern int mwriteprotect_range(struct mm_struct *dst_mm, + unsigned long start, unsigned long len, + bool enable_wp, bool *mmap_changing); /* mm helpers */ static inline bool is_mergeable_vm_userfaultfd_ctx(struct vm_area_struct *vma, @@ -52,6 +57,23 @@ static inline bool userfaultfd_missing(struct vm_area_struct *vma) return vma->vm_flags & VM_UFFD_MISSING; } +static inline bool userfaultfd_wp(struct vm_area_struct *vma) +{ + return vma->vm_flags & VM_UFFD_WP; +} + +static inline bool userfaultfd_pte_wp(struct vm_area_struct *vma, + pte_t pte) +{ + return userfaultfd_wp(vma) && pte_uffd_wp(pte); +} + +static inline bool userfaultfd_huge_pmd_wp(struct vm_area_struct *vma, + pmd_t pmd) +{ + return userfaultfd_wp(vma) && pmd_uffd_wp(pmd); +} + static inline bool userfaultfd_armed(struct vm_area_struct *vma) { return vma->vm_flags & (VM_UFFD_MISSING | VM_UFFD_WP); @@ -96,6 +118,24 @@ static inline bool userfaultfd_missing(struct vm_area_struct *vma) return false; } +static inline bool userfaultfd_wp(struct vm_area_struct *vma) +{ + return false; +} + +static inline bool userfaultfd_pte_wp(struct vm_area_struct *vma, + pte_t pte) +{ + return false; +} + +static inline bool userfaultfd_huge_pmd_wp(struct vm_area_struct *vma, + pmd_t pmd) +{ + return false; +} + + static inline bool userfaultfd_armed(struct vm_area_struct *vma) { return false; diff --git a/include/linux/vm_event_item.h b/include/linux/vm_event_item.h index 47a3441cf4c4..ffef0f279747 100644 --- a/include/linux/vm_event_item.h +++ b/include/linux/vm_event_item.h @@ -73,9 +73,12 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT, #ifdef CONFIG_TRANSPARENT_HUGEPAGE THP_FAULT_ALLOC, THP_FAULT_FALLBACK, + THP_FAULT_FALLBACK_CHARGE, THP_COLLAPSE_ALLOC, THP_COLLAPSE_ALLOC_FAILED, THP_FILE_ALLOC, + THP_FILE_FALLBACK, + THP_FILE_FALLBACK_CHARGE, THP_FILE_MAPPED, THP_SPLIT_PAGE, THP_SPLIT_PAGE_FAILED, @@ -115,6 +118,8 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT, #ifndef CONFIG_TRANSPARENT_HUGEPAGE #define THP_FILE_ALLOC ({ BUILD_BUG(); 0; }) +#define THP_FILE_FALLBACK ({ BUILD_BUG(); 0; }) +#define THP_FILE_FALLBACK_CHARGE ({ BUILD_BUG(); 0; }) #define THP_FILE_MAPPED ({ BUILD_BUG(); 0; }) #endif diff --git a/include/trace/events/huge_memory.h b/include/trace/events/huge_memory.h index d82a0f4e824d..70e32ff096ec 100644 --- a/include/trace/events/huge_memory.h +++ b/include/trace/events/huge_memory.h @@ -13,6 +13,7 @@ EM( SCAN_PMD_NULL, "pmd_null") \ EM( SCAN_EXCEED_NONE_PTE, "exceed_none_pte") \ EM( SCAN_PTE_NON_PRESENT, "pte_non_present") \ + EM( SCAN_PTE_UFFD_WP, "pte_uffd_wp") \ EM( SCAN_PAGE_RO, "no_writable_page") \ EM( SCAN_LACK_REFERENCED_PAGE, "lack_referenced_page") \ EM( SCAN_PAGE_NULL, "page_null") \ diff --git a/include/trace/events/mmflags.h b/include/trace/events/mmflags.h index a1675d43777e..5fb752034386 100644 --- a/include/trace/events/mmflags.h +++ b/include/trace/events/mmflags.h @@ -154,6 +154,7 @@ IF_HAVE_PG_IDLE(PG_idle, "idle" ) {VM_ACCOUNT, "account" }, \ {VM_NORESERVE, "noreserve" }, \ {VM_HUGETLB, "hugetlb" }, \ + {VM_SYNC, "sync" }, \ __VM_ARCH_SPECIFIC_1 , \ {VM_WIPEONFORK, "wipeonfork" }, \ {VM_DONTDUMP, "dontdump" }, \ diff --git a/include/trace/events/vmscan.h b/include/trace/events/vmscan.h index a5ab2973e8dc..74bb594ccb25 100644 --- a/include/trace/events/vmscan.h +++ b/include/trace/events/vmscan.h @@ -323,7 +323,7 @@ TRACE_EVENT(mm_vmscan_writepage, TP_fast_assign( __entry->pfn = page_to_pfn(page); __entry->reclaim_flags = trace_reclaim_flags( - page_is_file_cache(page)); + page_is_file_lru(page)); ), TP_printk("page=%p pfn=%lu flags=%s", diff --git a/include/uapi/linux/userfaultfd.h b/include/uapi/linux/userfaultfd.h index 48f1a7c2f1f0..e7e98bde221f 100644 --- a/include/uapi/linux/userfaultfd.h +++ b/include/uapi/linux/userfaultfd.h @@ -19,7 +19,8 @@ * means the userland is reading). */ #define UFFD_API ((__u64)0xAA) -#define UFFD_API_FEATURES (UFFD_FEATURE_EVENT_FORK | \ +#define UFFD_API_FEATURES (UFFD_FEATURE_PAGEFAULT_FLAG_WP | \ + UFFD_FEATURE_EVENT_FORK | \ UFFD_FEATURE_EVENT_REMAP | \ UFFD_FEATURE_EVENT_REMOVE | \ UFFD_FEATURE_EVENT_UNMAP | \ @@ -34,7 +35,8 @@ #define UFFD_API_RANGE_IOCTLS \ ((__u64)1 << _UFFDIO_WAKE | \ (__u64)1 << _UFFDIO_COPY | \ - (__u64)1 << _UFFDIO_ZEROPAGE) + (__u64)1 << _UFFDIO_ZEROPAGE | \ + (__u64)1 << _UFFDIO_WRITEPROTECT) #define UFFD_API_RANGE_IOCTLS_BASIC \ ((__u64)1 << _UFFDIO_WAKE | \ (__u64)1 << _UFFDIO_COPY) @@ -52,6 +54,7 @@ #define _UFFDIO_WAKE (0x02) #define _UFFDIO_COPY (0x03) #define _UFFDIO_ZEROPAGE (0x04) +#define _UFFDIO_WRITEPROTECT (0x06) #define _UFFDIO_API (0x3F) /* userfaultfd ioctl ids */ @@ -68,6 +71,8 @@ struct uffdio_copy) #define UFFDIO_ZEROPAGE _IOWR(UFFDIO, _UFFDIO_ZEROPAGE, \ struct uffdio_zeropage) +#define UFFDIO_WRITEPROTECT _IOWR(UFFDIO, _UFFDIO_WRITEPROTECT, \ + struct uffdio_writeprotect) /* read() structure */ struct uffd_msg { @@ -203,13 +208,14 @@ struct uffdio_copy { __u64 dst; __u64 src; __u64 len; +#define UFFDIO_COPY_MODE_DONTWAKE ((__u64)1<<0) /* - * There will be a wrprotection flag later that allows to map - * pages wrprotected on the fly. And such a flag will be - * available if the wrprotection ioctl are implemented for the - * range according to the uffdio_register.ioctls. + * UFFDIO_COPY_MODE_WP will map the page write protected on + * the fly. UFFDIO_COPY_MODE_WP is available only if the + * write protected ioctl is implemented for the range + * according to the uffdio_register.ioctls. */ -#define UFFDIO_COPY_MODE_DONTWAKE ((__u64)1<<0) +#define UFFDIO_COPY_MODE_WP ((__u64)1<<1) __u64 mode; /* @@ -231,4 +237,24 @@ struct uffdio_zeropage { __s64 zeropage; }; +struct uffdio_writeprotect { + struct uffdio_range range; +/* + * UFFDIO_WRITEPROTECT_MODE_WP: set the flag to write protect a range, + * unset the flag to undo protection of a range which was previously + * write protected. + * + * UFFDIO_WRITEPROTECT_MODE_DONTWAKE: set the flag to avoid waking up + * any wait thread after the operation succeeds. + * + * NOTE: Write protecting a region (WP=1) is unrelated to page faults, + * therefore DONTWAKE flag is meaningless with WP=1. Removing write + * protection (WP=0) in response to a page fault wakes the faulting + * task unless DONTWAKE is set. + */ +#define UFFDIO_WRITEPROTECT_MODE_WP ((__u64)1<<0) +#define UFFDIO_WRITEPROTECT_MODE_DONTWAKE ((__u64)1<<1) + __u64 mode; +}; + #endif /* _LINUX_USERFAULTFD_H */ diff --git a/include/uapi/linux/virtio_balloon.h b/include/uapi/linux/virtio_balloon.h index a1966cd7b677..19974392d324 100644 --- a/include/uapi/linux/virtio_balloon.h +++ b/include/uapi/linux/virtio_balloon.h @@ -36,6 +36,7 @@ #define VIRTIO_BALLOON_F_DEFLATE_ON_OOM 2 /* Deflate balloon on OOM */ #define VIRTIO_BALLOON_F_FREE_PAGE_HINT 3 /* VQ to report free pages */ #define VIRTIO_BALLOON_F_PAGE_POISON 4 /* Guest is using page poisoning */ +#define VIRTIO_BALLOON_F_REPORTING 5 /* Page reporting virtqueue */ /* Size of a PFN in the balloon interface. */ #define VIRTIO_BALLOON_PFN_SHIFT 12 |