diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2018-12-28 16:55:46 -0800 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2018-12-28 16:55:46 -0800 |
commit | f346b0becb1bc62e45495f9cdbae3eef35d0b635 (patch) | |
tree | ae79f3dfb8e031da51d38f0f095f89d7d23f3643 /include | |
parent | 00d59fde8532b2d42e80909d2e58678755e04da9 (diff) | |
parent | 0f4991e8fd48987ae476a92cdee6bfec4aff31b8 (diff) |
Merge branch 'akpm' (patches from Andrew)
Merge misc updates from Andrew Morton:
- large KASAN update to use arm's "software tag-based mode"
- a few misc things
- sh updates
- ocfs2 updates
- just about all of MM
* emailed patches from Andrew Morton <akpm@linux-foundation.org>: (167 commits)
kernel/fork.c: mark 'stack_vm_area' with __maybe_unused
memcg, oom: notify on oom killer invocation from the charge path
mm, swap: fix swapoff with KSM pages
include/linux/gfp.h: fix typo
mm/hmm: fix memremap.h, move dev_page_fault_t callback to hmm
hugetlbfs: Use i_mmap_rwsem to fix page fault/truncate race
hugetlbfs: use i_mmap_rwsem for more pmd sharing synchronization
memory_hotplug: add missing newlines to debugging output
mm: remove __hugepage_set_anon_rmap()
include/linux/vmstat.h: remove unused page state adjustment macro
mm/page_alloc.c: allow error injection
mm: migrate: drop unused argument of migrate_page_move_mapping()
blkdev: avoid migration stalls for blkdev pages
mm: migrate: provide buffer_migrate_page_norefs()
mm: migrate: move migrate_page_lock_buffers()
mm: migrate: lock buffers before migrate_page_move_mapping()
mm: migration: factor out code to compute expected number of page references
mm, page_alloc: enable pcpu_drain with zone capability
kmemleak: add config to select auto scan
mm/page_alloc.c: don't call kasan_free_pages() at deferred mem init
...
Diffstat (limited to 'include')
31 files changed, 437 insertions, 188 deletions
diff --git a/include/asm-generic/error-injection.h b/include/asm-generic/error-injection.h index 296c65442f00..95a159a4137f 100644 --- a/include/asm-generic/error-injection.h +++ b/include/asm-generic/error-injection.h @@ -8,6 +8,7 @@ enum { EI_ETYPE_NULL, /* Return NULL if failure */ EI_ETYPE_ERRNO, /* Return -ERRNO if failure */ EI_ETYPE_ERRNO_NULL, /* Return -ERRNO or NULL if failure */ + EI_ETYPE_TRUE, /* Return true if failure */ }; struct error_injection_entry { diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h index a9cac82e9a7a..05e61e6c843f 100644 --- a/include/asm-generic/pgtable.h +++ b/include/asm-generic/pgtable.h @@ -1057,6 +1057,7 @@ int pud_set_huge(pud_t *pud, phys_addr_t addr, pgprot_t prot); int pmd_set_huge(pmd_t *pmd, phys_addr_t addr, pgprot_t prot); int pud_clear_huge(pud_t *pud); int pmd_clear_huge(pmd_t *pmd); +int p4d_free_pud_page(p4d_t *p4d, unsigned long addr); int pud_free_pmd_page(pud_t *pud, unsigned long addr); int pmd_free_pte_page(pmd_t *pmd, unsigned long addr); #else /* !CONFIG_HAVE_ARCH_HUGE_VMAP */ @@ -1084,6 +1085,10 @@ static inline int pmd_clear_huge(pmd_t *pmd) { return 0; } +static inline int p4d_free_pud_page(p4d_t *p4d, unsigned long addr) +{ + return 0; +} static inline int pud_free_pmd_page(pud_t *pud, unsigned long addr) { return 0; diff --git a/include/linux/backing-dev-defs.h b/include/linux/backing-dev-defs.h index 9a6bc0951cfa..c31157135598 100644 --- a/include/linux/backing-dev-defs.h +++ b/include/linux/backing-dev-defs.h @@ -258,6 +258,14 @@ static inline void wb_get(struct bdi_writeback *wb) */ static inline void wb_put(struct bdi_writeback *wb) { + if (WARN_ON_ONCE(!wb->bdi)) { + /* + * A driver bug might cause a file to be removed before bdi was + * initialized. + */ + return; + } + if (wb != &wb->bdi->wb) percpu_ref_put(&wb->refcnt); } diff --git a/include/linux/compiler-clang.h b/include/linux/compiler-clang.h index 3e7dafb3ea80..39f668d5066b 100644 --- a/include/linux/compiler-clang.h +++ b/include/linux/compiler-clang.h @@ -16,9 +16,13 @@ /* all clang versions usable with the kernel support KASAN ABI version 5 */ #define KASAN_ABI_VERSION 5 +#if __has_feature(address_sanitizer) || __has_feature(hwaddress_sanitizer) /* emulate gcc's __SANITIZE_ADDRESS__ flag */ -#if __has_feature(address_sanitizer) #define __SANITIZE_ADDRESS__ +#define __no_sanitize_address \ + __attribute__((no_sanitize("address", "hwaddress"))) +#else +#define __no_sanitize_address #endif /* diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h index 2010493e1040..5776da43da97 100644 --- a/include/linux/compiler-gcc.h +++ b/include/linux/compiler-gcc.h @@ -143,6 +143,12 @@ #define KASAN_ABI_VERSION 3 #endif +#if __has_attribute(__no_sanitize_address__) +#define __no_sanitize_address __attribute__((no_sanitize_address)) +#else +#define __no_sanitize_address +#endif + #if GCC_VERSION >= 50100 #define COMPILER_HAS_GENERIC_BUILTIN_OVERFLOW 1 #endif diff --git a/include/linux/compiler_attributes.h b/include/linux/compiler_attributes.h index fe07b680dd4a..19f32b0c29af 100644 --- a/include/linux/compiler_attributes.h +++ b/include/linux/compiler_attributes.h @@ -200,19 +200,6 @@ #define __noreturn __attribute__((__noreturn__)) /* - * Optional: only supported since gcc >= 4.8 - * Optional: not supported by icc - * - * gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#index-no_005fsanitize_005faddress-function-attribute - * clang: https://clang.llvm.org/docs/AttributeReference.html#no-sanitize-address-no-address-safety-analysis - */ -#if __has_attribute(__no_sanitize_address__) -# define __no_sanitize_address __attribute__((__no_sanitize_address__)) -#else -# define __no_sanitize_address -#endif - -/* * gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Type-Attributes.html#index-packed-type-attribute * clang: https://gcc.gnu.org/onlinedocs/gcc/Common-Variable-Attributes.html#index-packed-variable-attribute */ diff --git a/include/linux/fs.h b/include/linux/fs.h index 6d52ce6af4ff..811c77743dad 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -3269,8 +3269,12 @@ extern int generic_check_addressable(unsigned, u64); extern int buffer_migrate_page(struct address_space *, struct page *, struct page *, enum migrate_mode); +extern int buffer_migrate_page_norefs(struct address_space *, + struct page *, struct page *, + enum migrate_mode); #else #define buffer_migrate_page NULL +#define buffer_migrate_page_norefs NULL #endif extern int setattr_prepare(struct dentry *, struct iattr *); diff --git a/include/linux/gfp.h b/include/linux/gfp.h index 0705164f928c..5f5e25fd6149 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -81,7 +81,7 @@ struct vm_area_struct; * * %__GFP_HARDWALL enforces the cpuset memory allocation policy. * - * %__GFP_THISNODE forces the allocation to be satisified from the requested + * %__GFP_THISNODE forces the allocation to be satisfied from the requested * node with no fallbacks or placement policy enforcements. * * %__GFP_ACCOUNT causes the allocation to be accounted to kmemcg. diff --git a/include/linux/highmem.h b/include/linux/highmem.h index 0690679832d4..ea5cdbd8c2c3 100644 --- a/include/linux/highmem.h +++ b/include/linux/highmem.h @@ -36,7 +36,31 @@ static inline void invalidate_kernel_vmap_range(void *vaddr, int size) /* declarations for linux/mm/highmem.c */ unsigned int nr_free_highpages(void); -extern unsigned long totalhigh_pages; +extern atomic_long_t _totalhigh_pages; +static inline unsigned long totalhigh_pages(void) +{ + return (unsigned long)atomic_long_read(&_totalhigh_pages); +} + +static inline void totalhigh_pages_inc(void) +{ + atomic_long_inc(&_totalhigh_pages); +} + +static inline void totalhigh_pages_dec(void) +{ + atomic_long_dec(&_totalhigh_pages); +} + +static inline void totalhigh_pages_add(long count) +{ + atomic_long_add(count, &_totalhigh_pages); +} + +static inline void totalhigh_pages_set(long val) +{ + atomic_long_set(&_totalhigh_pages, val); +} void kmap_flush_unused(void); @@ -51,7 +75,7 @@ static inline struct page *kmap_to_page(void *addr) return virt_to_page(addr); } -#define totalhigh_pages 0UL +static inline unsigned long totalhigh_pages(void) { return 0UL; } #ifndef ARCH_HAS_KMAP static inline void *kmap(struct page *page) diff --git a/include/linux/hmm.h b/include/linux/hmm.h index c6fb869a81c0..66f9ebbb1df3 100644 --- a/include/linux/hmm.h +++ b/include/linux/hmm.h @@ -69,6 +69,7 @@ #define LINUX_HMM_H #include <linux/kconfig.h> +#include <asm/pgtable.h> #if IS_ENABLED(CONFIG_HMM) @@ -486,6 +487,7 @@ struct hmm_devmem_ops { * @device: device to bind resource to * @ops: memory operations callback * @ref: per CPU refcount + * @page_fault: callback when CPU fault on an unaddressable device page * * This an helper structure for device drivers that do not wish to implement * the gory details related to hotplugging new memoy and allocating struct @@ -493,7 +495,28 @@ struct hmm_devmem_ops { * * Device drivers can directly use ZONE_DEVICE memory on their own if they * wish to do so. + * + * The page_fault() callback must migrate page back, from device memory to + * system memory, so that the CPU can access it. This might fail for various + * reasons (device issues, device have been unplugged, ...). When such error + * conditions happen, the page_fault() callback must return VM_FAULT_SIGBUS and + * set the CPU page table entry to "poisoned". + * + * Note that because memory cgroup charges are transferred to the device memory, + * this should never fail due to memory restrictions. However, allocation + * of a regular system page might still fail because we are out of memory. If + * that happens, the page_fault() callback must return VM_FAULT_OOM. + * + * The page_fault() callback can also try to migrate back multiple pages in one + * chunk, as an optimization. It must, however, prioritize the faulting address + * over all the others. */ +typedef int (*dev_page_fault_t)(struct vm_area_struct *vma, + unsigned long addr, + const struct page *page, + unsigned int flags, + pmd_t *pmdp); + struct hmm_devmem { struct completion completion; unsigned long pfn_first; @@ -503,6 +526,7 @@ struct hmm_devmem { struct dev_pagemap pagemap; const struct hmm_devmem_ops *ops; struct percpu_ref ref; + dev_page_fault_t page_fault; }; /* @@ -512,8 +536,7 @@ struct hmm_devmem { * enough and allocate struct page for it. * * The device driver can wrap the hmm_devmem struct inside a private device - * driver struct. The device driver must call hmm_devmem_remove() before the - * device goes away and before freeing the hmm_devmem struct memory. + * driver struct. */ struct hmm_devmem *hmm_devmem_add(const struct hmm_devmem_ops *ops, struct device *device, @@ -521,7 +544,6 @@ struct hmm_devmem *hmm_devmem_add(const struct hmm_devmem_ops *ops, struct hmm_devmem *hmm_devmem_add_resource(const struct hmm_devmem_ops *ops, struct device *device, struct resource *res); -void hmm_devmem_remove(struct hmm_devmem *devmem); /* * hmm_devmem_page_set_drvdata - set per-page driver data field diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index 4663ee96cf59..381e872bfde0 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -93,7 +93,11 @@ extern bool is_vma_temporary_stack(struct vm_area_struct *vma); extern unsigned long transparent_hugepage_flags; -static inline bool transparent_hugepage_enabled(struct vm_area_struct *vma) +/* + * to be used on vmas which are known to support THP. + * Use transparent_hugepage_enabled otherwise + */ +static inline bool __transparent_hugepage_enabled(struct vm_area_struct *vma) { if (vma->vm_flags & VM_NOHUGEPAGE) return false; @@ -117,6 +121,8 @@ static inline bool transparent_hugepage_enabled(struct vm_area_struct *vma) return false; } +bool transparent_hugepage_enabled(struct vm_area_struct *vma); + #define transparent_hugepage_use_zero_page() \ (transparent_hugepage_flags & \ (1<<TRANSPARENT_HUGEPAGE_USE_ZERO_PAGE_FLAG)) @@ -257,6 +263,11 @@ static inline bool thp_migration_supported(void) #define hpage_nr_pages(x) 1 +static inline bool __transparent_hugepage_enabled(struct vm_area_struct *vma) +{ + return false; +} + static inline bool transparent_hugepage_enabled(struct vm_area_struct *vma) { return false; diff --git a/include/linux/kasan.h b/include/linux/kasan.h index 46aae129917c..b40ea104dd36 100644 --- a/include/linux/kasan.h +++ b/include/linux/kasan.h @@ -14,13 +14,13 @@ struct task_struct; #include <asm/kasan.h> #include <asm/pgtable.h> -extern unsigned char kasan_zero_page[PAGE_SIZE]; -extern pte_t kasan_zero_pte[PTRS_PER_PTE]; -extern pmd_t kasan_zero_pmd[PTRS_PER_PMD]; -extern pud_t kasan_zero_pud[PTRS_PER_PUD]; -extern p4d_t kasan_zero_p4d[MAX_PTRS_PER_P4D]; +extern unsigned char kasan_early_shadow_page[PAGE_SIZE]; +extern pte_t kasan_early_shadow_pte[PTRS_PER_PTE]; +extern pmd_t kasan_early_shadow_pmd[PTRS_PER_PMD]; +extern pud_t kasan_early_shadow_pud[PTRS_PER_PUD]; +extern p4d_t kasan_early_shadow_p4d[MAX_PTRS_PER_P4D]; -int kasan_populate_zero_shadow(const void *shadow_start, +int kasan_populate_early_shadow(const void *shadow_start, const void *shadow_end); static inline void *kasan_mem_to_shadow(const void *addr) @@ -45,22 +45,24 @@ void kasan_free_pages(struct page *page, unsigned int order); void kasan_cache_create(struct kmem_cache *cache, unsigned int *size, slab_flags_t *flags); -void kasan_cache_shrink(struct kmem_cache *cache); -void kasan_cache_shutdown(struct kmem_cache *cache); void kasan_poison_slab(struct page *page); void kasan_unpoison_object_data(struct kmem_cache *cache, void *object); void kasan_poison_object_data(struct kmem_cache *cache, void *object); -void kasan_init_slab_obj(struct kmem_cache *cache, const void *object); +void * __must_check kasan_init_slab_obj(struct kmem_cache *cache, + const void *object); -void kasan_kmalloc_large(const void *ptr, size_t size, gfp_t flags); +void * __must_check kasan_kmalloc_large(const void *ptr, size_t size, + gfp_t flags); void kasan_kfree_large(void *ptr, unsigned long ip); void kasan_poison_kfree(void *ptr, unsigned long ip); -void kasan_kmalloc(struct kmem_cache *s, const void *object, size_t size, - gfp_t flags); -void kasan_krealloc(const void *object, size_t new_size, gfp_t flags); +void * __must_check kasan_kmalloc(struct kmem_cache *s, const void *object, + size_t size, gfp_t flags); +void * __must_check kasan_krealloc(const void *object, size_t new_size, + gfp_t flags); -void kasan_slab_alloc(struct kmem_cache *s, void *object, gfp_t flags); +void * __must_check kasan_slab_alloc(struct kmem_cache *s, void *object, + gfp_t flags); bool kasan_slab_free(struct kmem_cache *s, void *object, unsigned long ip); struct kasan_cache { @@ -97,27 +99,40 @@ static inline void kasan_free_pages(struct page *page, unsigned int order) {} static inline void kasan_cache_create(struct kmem_cache *cache, unsigned int *size, slab_flags_t *flags) {} -static inline void kasan_cache_shrink(struct kmem_cache *cache) {} -static inline void kasan_cache_shutdown(struct kmem_cache *cache) {} static inline void kasan_poison_slab(struct page *page) {} static inline void kasan_unpoison_object_data(struct kmem_cache *cache, void *object) {} static inline void kasan_poison_object_data(struct kmem_cache *cache, void *object) {} -static inline void kasan_init_slab_obj(struct kmem_cache *cache, - const void *object) {} +static inline void *kasan_init_slab_obj(struct kmem_cache *cache, + const void *object) +{ + return (void *)object; +} -static inline void kasan_kmalloc_large(void *ptr, size_t size, gfp_t flags) {} +static inline void *kasan_kmalloc_large(void *ptr, size_t size, gfp_t flags) +{ + return ptr; +} static inline void kasan_kfree_large(void *ptr, unsigned long ip) {} static inline void kasan_poison_kfree(void *ptr, unsigned long ip) {} -static inline void kasan_kmalloc(struct kmem_cache *s, const void *object, - size_t size, gfp_t flags) {} -static inline void kasan_krealloc(const void *object, size_t new_size, - gfp_t flags) {} +static inline void *kasan_kmalloc(struct kmem_cache *s, const void *object, + size_t size, gfp_t flags) +{ + return (void *)object; +} +static inline void *kasan_krealloc(const void *object, size_t new_size, + gfp_t flags) +{ + return (void *)object; +} -static inline void kasan_slab_alloc(struct kmem_cache *s, void *object, - gfp_t flags) {} +static inline void *kasan_slab_alloc(struct kmem_cache *s, void *object, + gfp_t flags) +{ + return object; +} static inline bool kasan_slab_free(struct kmem_cache *s, void *object, unsigned long ip) { @@ -140,4 +155,40 @@ static inline size_t kasan_metadata_size(struct kmem_cache *cache) { return 0; } #endif /* CONFIG_KASAN */ +#ifdef CONFIG_KASAN_GENERIC + +#define KASAN_SHADOW_INIT 0 + +void kasan_cache_shrink(struct kmem_cache *cache); +void kasan_cache_shutdown(struct kmem_cache *cache); + +#else /* CONFIG_KASAN_GENERIC */ + +static inline void kasan_cache_shrink(struct kmem_cache *cache) {} +static inline void kasan_cache_shutdown(struct kmem_cache *cache) {} + +#endif /* CONFIG_KASAN_GENERIC */ + +#ifdef CONFIG_KASAN_SW_TAGS + +#define KASAN_SHADOW_INIT 0xFF + +void kasan_init_tags(void); + +void *kasan_reset_tag(const void *addr); + +void kasan_report(unsigned long addr, size_t size, + bool is_write, unsigned long ip); + +#else /* CONFIG_KASAN_SW_TAGS */ + +static inline void kasan_init_tags(void) { } + +static inline void *kasan_reset_tag(const void *addr) +{ + return (void *)addr; +} + +#endif /* CONFIG_KASAN_SW_TAGS */ + #endif /* LINUX_KASAN_H */ diff --git a/include/linux/memblock.h b/include/linux/memblock.h index aee299a6aa76..64c41cf45590 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -154,7 +154,6 @@ void __next_mem_range_rev(u64 *idx, int nid, enum memblock_flags flags, void __next_reserved_mem_region(u64 *idx, phys_addr_t *out_start, phys_addr_t *out_end); -void __memblock_free_early(phys_addr_t base, phys_addr_t size); void __memblock_free_late(phys_addr_t base, phys_addr_t size); /** @@ -320,6 +319,7 @@ static inline int memblock_get_region_node(const struct memblock_region *r) /* Flags for memblock allocation APIs */ #define MEMBLOCK_ALLOC_ANYWHERE (~(phys_addr_t)0) #define MEMBLOCK_ALLOC_ACCESSIBLE 0 +#define MEMBLOCK_ALLOC_KASAN 1 /* We are using top down, so it is safe to use 0 here */ #define MEMBLOCK_LOW_LIMIT 0 @@ -414,13 +414,13 @@ static inline void * __init memblock_alloc_node_nopanic(phys_addr_t size, static inline void __init memblock_free_early(phys_addr_t base, phys_addr_t size) { - __memblock_free_early(base, size); + memblock_free(base, size); } static inline void __init memblock_free_early_nid(phys_addr_t base, phys_addr_t size, int nid) { - __memblock_free_early(base, size); + memblock_free(base, size); } static inline void __init memblock_free_late(phys_addr_t base, phys_addr_t size) diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 7ab2120155a4..83ae11cbd12c 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -526,9 +526,11 @@ void mem_cgroup_handle_over_high(void); unsigned long mem_cgroup_get_max(struct mem_cgroup *memcg); -void mem_cgroup_print_oom_info(struct mem_cgroup *memcg, +void mem_cgroup_print_oom_context(struct mem_cgroup *memcg, struct task_struct *p); +void mem_cgroup_print_oom_meminfo(struct mem_cgroup *memcg); + static inline void mem_cgroup_enter_user_fault(void) { WARN_ON(current->in_user_fault); @@ -970,7 +972,12 @@ static inline unsigned long mem_cgroup_get_max(struct mem_cgroup *memcg) } static inline void -mem_cgroup_print_oom_info(struct mem_cgroup *memcg, struct task_struct *p) +mem_cgroup_print_oom_context(struct mem_cgroup *memcg, struct task_struct *p) +{ +} + +static inline void +mem_cgroup_print_oom_meminfo(struct mem_cgroup *memcg) { } diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h index ffd9cd10fcf3..07da5c6c5ba0 100644 --- a/include/linux/memory_hotplug.h +++ b/include/linux/memory_hotplug.h @@ -107,8 +107,8 @@ static inline bool movable_node_is_enabled(void) } #ifdef CONFIG_MEMORY_HOTREMOVE -extern int arch_remove_memory(u64 start, u64 size, - struct vmem_altmap *altmap); +extern int arch_remove_memory(int nid, u64 start, u64 size, + struct vmem_altmap *altmap); extern int __remove_pages(struct zone *zone, unsigned long start_pfn, unsigned long nr_pages, struct vmem_altmap *altmap); #endif /* CONFIG_MEMORY_HOTREMOVE */ @@ -326,15 +326,14 @@ extern int walk_memory_range(unsigned long start_pfn, unsigned long end_pfn, void *arg, int (*func)(struct memory_block *, void *)); extern int __add_memory(int nid, u64 start, u64 size); extern int add_memory(int nid, u64 start, u64 size); -extern int add_memory_resource(int nid, struct resource *resource, bool online); +extern int add_memory_resource(int nid, struct resource *resource); extern int arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap, bool want_memblock); extern void move_pfn_range_to_zone(struct zone *zone, unsigned long start_pfn, unsigned long nr_pages, struct vmem_altmap *altmap); -extern int offline_pages(unsigned long start_pfn, unsigned long nr_pages); extern bool is_memblock_offlined(struct memory_block *mem); -extern int sparse_add_one_section(struct pglist_data *pgdat, - unsigned long start_pfn, struct vmem_altmap *altmap); +extern int sparse_add_one_section(int nid, unsigned long start_pfn, + struct vmem_altmap *altmap); extern void sparse_remove_one_section(struct zone *zone, struct mem_section *ms, unsigned long map_offset, struct vmem_altmap *altmap); extern struct page *sparse_decode_mem_map(unsigned long coded_mem_map, diff --git a/include/linux/memremap.h b/include/linux/memremap.h index 0ac69ddf5fc4..f0628660d541 100644 --- a/include/linux/memremap.h +++ b/include/linux/memremap.h @@ -4,8 +4,6 @@ #include <linux/ioport.h> #include <linux/percpu-refcount.h> -#include <asm/pgtable.h> - struct resource; struct device; @@ -66,62 +64,34 @@ enum memory_type { }; /* - * For MEMORY_DEVICE_PRIVATE we use ZONE_DEVICE and extend it with two - * callbacks: - * page_fault() - * page_free() - * * Additional notes about MEMORY_DEVICE_PRIVATE may be found in * include/linux/hmm.h and Documentation/vm/hmm.rst. There is also a brief * explanation in include/linux/memory_hotplug.h. * - * The page_fault() callback must migrate page back, from device memory to - * system memory, so that the CPU can access it. This might fail for various - * reasons (device issues, device have been unplugged, ...). When such error - * conditions happen, the page_fault() callback must return VM_FAULT_SIGBUS and - * set the CPU page table entry to "poisoned". - * - * Note that because memory cgroup charges are transferred to the device memory, - * this should never fail due to memory restrictions. However, allocation - * of a regular system page might still fail because we are out of memory. If - * that happens, the page_fault() callback must return VM_FAULT_OOM. - * - * The page_fault() callback can also try to migrate back multiple pages in one - * chunk, as an optimization. It must, however, prioritize the faulting address - * over all the others. - * - * * The page_free() callback is called once the page refcount reaches 1 * (ZONE_DEVICE pages never reach 0 refcount unless there is a refcount bug. * This allows the device driver to implement its own memory management.) - * - * For MEMORY_DEVICE_PUBLIC only the page_free() callback matter. */ -typedef int (*dev_page_fault_t)(struct vm_area_struct *vma, - unsigned long addr, - const struct page *page, - unsigned int flags, - pmd_t *pmdp); typedef void (*dev_page_free_t)(struct page *page, void *data); /** * struct dev_pagemap - metadata for ZONE_DEVICE mappings - * @page_fault: callback when CPU fault on an unaddressable device page * @page_free: free page callback when page refcount reaches 1 * @altmap: pre-allocated/reserved memory for vmemmap allocations * @res: physical address range covered by @ref * @ref: reference count that pins the devm_memremap_pages() mapping + * @kill: callback to transition @ref to the dead state * @dev: host device of the mapping for debug * @data: private data pointer for page_free() * @type: memory type: see MEMORY_* in memory_hotplug.h */ struct dev_pagemap { - dev_page_fault_t page_fault; dev_page_free_t page_free; struct vmem_altmap altmap; bool altmap_valid; struct resource res; struct percpu_ref *ref; + void (*kill)(struct percpu_ref *ref); struct device *dev; void *data; enum memory_type type; diff --git a/include/linux/migrate.h b/include/linux/migrate.h index f2b4abbca55e..e13d9bf2f9a5 100644 --- a/include/linux/migrate.h +++ b/include/linux/migrate.h @@ -29,7 +29,7 @@ enum migrate_reason { }; /* In mm/debug.c; also keep sync with include/trace/events/migrate.h */ -extern char *migrate_reason_names[MR_TYPES]; +extern const char *migrate_reason_names[MR_TYPES]; static inline struct page *new_page_nodemask(struct page *page, int preferred_nid, nodemask_t *nodemask) @@ -77,8 +77,7 @@ extern void migrate_page_copy(struct page *newpage, struct page *page); extern int migrate_huge_page_move_mapping(struct address_space *mapping, struct page *newpage, struct page *page); extern int migrate_page_move_mapping(struct address_space *mapping, - struct page *newpage, struct page *page, - struct buffer_head *head, enum migrate_mode mode, + struct page *newpage, struct page *page, enum migrate_mode mode, int extra_count); #else diff --git a/include/linux/mm.h b/include/linux/mm.h index 5411de93a363..ea1f12d15365 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -48,7 +48,32 @@ static inline void set_max_mapnr(unsigned long limit) static inline void set_max_mapnr(unsigned long limit) { } #endif -extern unsigned long totalram_pages; +extern atomic_long_t _totalram_pages; +static inline unsigned long totalram_pages(void) +{ + return (unsigned long)atomic_long_read(&_totalram_pages); +} + +static inline void totalram_pages_inc(void) +{ + atomic_long_inc(&_totalram_pages); +} + +static inline void totalram_pages_dec(void) +{ + atomic_long_dec(&_totalram_pages); +} + +static inline void totalram_pages_add(long count) +{ + atomic_long_add(count, &_totalram_pages); +} + +static inline void totalram_pages_set(long val) +{ + atomic_long_set(&_totalram_pages, val); +} + extern void * high_memory; extern int page_cluster; @@ -804,6 +829,7 @@ vm_fault_t finish_mkwrite_fault(struct vm_fault *vmf); #define NODES_PGOFF (SECTIONS_PGOFF - NODES_WIDTH) #define ZONES_PGOFF (NODES_PGOFF - ZONES_WIDTH) #define LAST_CPUPID_PGOFF (ZONES_PGOFF - LAST_CPUPID_WIDTH) +#define KASAN_TAG_PGOFF (LAST_CPUPID_PGOFF - KASAN_TAG_WIDTH) /* * Define the bit shifts to access each section. For non-existent @@ -814,6 +840,7 @@ vm_fault_t finish_mkwrite_fault(struct vm_fault *vmf); #define NODES_PGSHIFT (NODES_PGOFF * (NODES_WIDTH != 0)) #define ZONES_PGSHIFT (ZONES_PGOFF * (ZONES_WIDTH != 0)) #define LAST_CPUPID_PGSHIFT (LAST_CPUPID_PGOFF * (LAST_CPUPID_WIDTH != 0)) +#define KASAN_TAG_PGSHIFT (KASAN_TAG_PGOFF * (KASAN_TAG_WIDTH != 0)) /* NODE:ZONE or SECTION:ZONE is used to ID a zone for the buddy allocator */ #ifdef NODE_NOT_IN_PAGE_FLAGS @@ -836,6 +863,7 @@ vm_fault_t finish_mkwrite_fault(struct vm_fault *vmf); #define NODES_MASK ((1UL << NODES_WIDTH) - 1) #define SECTIONS_MASK ((1UL << SECTIONS_WIDTH) - 1) #define LAST_CPUPID_MASK ((1UL << LAST_CPUPID_SHIFT) - 1) +#define KASAN_TAG_MASK ((1UL << KASAN_TAG_WIDTH) - 1) #define ZONEID_MASK ((1UL << ZONEID_SHIFT) - 1) static inline enum zone_type page_zonenum(const struct page *page) @@ -1101,6 +1129,32 @@ static inline bool cpupid_match_pid(struct task_struct *task, int cpupid) } #endif /* CONFIG_NUMA_BALANCING */ +#ifdef CONFIG_KASAN_SW_TAGS +static inline u8 page_kasan_tag(const struct page *page) +{ + return (page->flags >> KASAN_TAG_PGSHIFT) & KASAN_TAG_MASK; +} + +static inline void page_kasan_tag_set(struct page *page, u8 tag) +{ + page->flags &= ~(KASAN_TAG_MASK << KASAN_TAG_PGSHIFT); + page->flags |= (tag & KASAN_TAG_MASK) << KASAN_TAG_PGSHIFT; +} + +static inline void page_kasan_tag_reset(struct page *page) +{ + page_kasan_tag_set(page, 0xff); +} +#else +static inline u8 page_kasan_tag(const struct page *page) +{ + return 0xff; +} + +static inline void page_kasan_tag_set(struct page *page, u8 tag) { } +static inline void page_kasan_tag_reset(struct page *page) { } +#endif + static inline struct zone *page_zone(const struct page *page) { return &NODE_DATA(page_to_nid(page))->node_zones[page_zonenum(page)]; @@ -1397,6 +1451,8 @@ struct mm_walk { void *private; }; +struct mmu_notifier_range; + int walk_page_range(unsigned long addr, unsigned long end, struct mm_walk *walk); int walk_page_vma(struct vm_area_struct *vma, struct mm_walk *walk); @@ -1405,8 +1461,8 @@ void free_pgd_range(struct mmu_gather *tlb, unsigned long addr, int copy_page_range(struct mm_struct *dst, struct mm_struct *src, struct vm_area_struct *vma); int follow_pte_pmd(struct mm_struct *mm, unsigned long address, - unsigned long *start, unsigned long *end, - pte_t **ptepp, pmd_t **pmdpp, spinlock_t **ptlp); + struct mmu_notifier_range *range, + pte_t **ptepp, pmd_t **pmdpp, spinlock_t **ptlp); int follow_pfn(struct vm_area_struct *vma, unsigned long address, unsigned long *pfn); int follow_phys(struct vm_area_struct *vma, unsigned long address, @@ -1900,13 +1956,6 @@ static inline bool ptlock_init(struct page *page) return true; } -/* Reset page->mapping so free_pages_check won't complain. */ -static inline void pte_lock_deinit(struct page *page) -{ - page->mapping = NULL; - ptlock_free(page); -} - #else /* !USE_SPLIT_PTE_PTLOCKS */ /* * We use mm->page_table_lock to guard all pagetable pages of the mm. @@ -1917,7 +1966,7 @@ static inline spinlock_t *pte_lockptr(struct mm_struct *mm, pmd_t *pmd) } static inline void ptlock_cache_init(void) {} static inline bool ptlock_init(struct page *page) { return true; } -static inline void pte_lock_deinit(struct page *page) {} +static inline void ptlock_free(struct page *page) {} #endif /* USE_SPLIT_PTE_PTLOCKS */ static inline void pgtable_init(void) @@ -1937,7 +1986,7 @@ static inline bool pgtable_page_ctor(struct page *page) static inline void pgtable_page_dtor(struct page *page) { - pte_lock_deinit(page); + ptlock_free(page); __ClearPageTable(page); dec_zone_page_state(page, NR_PAGETABLE); } @@ -2054,7 +2103,7 @@ extern void free_initmem(void); * Return pages freed into the buddy system. */ extern unsigned long free_reserved_area(void *start, void *end, - int poison, char *s); + int poison, const char *s); #ifdef CONFIG_HIGHMEM /* @@ -2202,6 +2251,7 @@ extern void zone_pcp_reset(struct zone *zone); /* page_alloc.c */ extern int min_free_kbytes; +extern int watermark_boost_factor; extern int watermark_scale_factor; /* nommu.c */ diff --git a/include/linux/mmu_notifier.h b/include/linux/mmu_notifier.h index 9893a6432adf..4050ec1c3b45 100644 --- a/include/linux/mmu_notifier.h +++ b/include/linux/mmu_notifier.h @@ -25,6 +25,13 @@ struct mmu_notifier_mm { spinlock_t lock; }; +struct mmu_notifier_range { + struct mm_struct *mm; + unsigned long start; + unsigned long end; + bool blockable; +}; + struct mmu_notifier_ops { /* * Called either by mmu_notifier_unregister or when the mm is @@ -146,12 +153,9 @@ struct mmu_notifier_ops { * */ int (*invalidate_range_start)(struct mmu_notifier *mn, - struct mm_struct *mm, - unsigned long start, unsigned long end, - bool blockable); + const struct mmu_notifier_range *range); void (*invalidate_range_end)(struct mmu_notifier *mn, - struct mm_struct *mm, - unsigned long start, unsigned long end); + const struct mmu_notifier_range *range); /* * invalidate_range() is either called between @@ -216,11 +220,8 @@ extern int __mmu_notifier_test_young(struct mm_struct *mm, unsigned long address); extern void __mmu_notifier_change_pte(struct mm_struct *mm, unsigned long address, pte_t pte); -extern int __mmu_notifier_invalidate_range_start(struct mm_struct *mm, - unsigned long start, unsigned long end, - bool blockable); -extern void __mmu_notifier_invalidate_range_end(struct mm_struct *mm, - unsigned long start, unsigned long end, +extern int __mmu_notifier_invalidate_range_start(struct mmu_notifier_range *r); +extern void __mmu_notifier_invalidate_range_end(struct mmu_notifier_range *r, bool only_end); extern void __mmu_notifier_invalidate_range(struct mm_struct *mm, unsigned long start, unsigned long end); @@ -264,33 +265,37 @@ static inline void mmu_notifier_change_pte(struct mm_struct *mm, __mmu_notifier_change_pte(mm, address, pte); } -static inline void mmu_notifier_invalidate_range_start(struct mm_struct *mm, - unsigned long start, unsigned long end) +static inline void +mmu_notifier_invalidate_range_start(struct mmu_notifier_range *range) { - if (mm_has_notifiers(mm)) - __mmu_notifier_invalidate_range_start(mm, start, end, true); + if (mm_has_notifiers(range->mm)) { + range->blockable = true; + __mmu_notifier_invalidate_range_start(range); + } } -static inline int mmu_notifier_invalidate_range_start_nonblock(struct mm_struct *mm, - unsigned long start, unsigned long end) +static inline int +mmu_notifier_invalidate_range_start_nonblock(struct mmu_notifier_range *range) { - if (mm_has_notifiers(mm)) - return __mmu_notifier_invalidate_range_start(mm, start, end, false); + if (mm_has_notifiers(range->mm)) { + range->blockable = false; + return __mmu_notifier_invalidate_range_start(range); + } return 0; } -static inline void mmu_notifier_invalidate_range_end(struct mm_struct *mm, - unsigned long start, unsigned long end) +static inline void +mmu_notifier_invalidate_range_end(struct mmu_notifier_range *range) { - if (mm_has_notifiers(mm)) - __mmu_notifier_invalidate_range_end(mm, start, end, false); + if (mm_has_notifiers(range->mm)) + __mmu_notifier_invalidate_range_end(range, false); } -static inline void mmu_notifier_invalidate_range_only_end(struct mm_struct *mm, - unsigned long start, unsigned long end) +static inline void +mmu_notifier_invalidate_range_only_end(struct mmu_notifier_range *range) { - if (mm_has_notifiers(mm)) - __mmu_notifier_invalidate_range_end(mm, start, end, true); + if (mm_has_notifiers(range->mm)) + __mmu_notifier_invalidate_range_end(range, true); } static inline void mmu_notifier_invalidate_range(struct mm_struct *mm, @@ -311,6 +316,17 @@ static inline void mmu_notifier_mm_destroy(struct mm_struct *mm) __mmu_notifier_mm_destroy(mm); } + +static inline void mmu_notifier_range_init(struct mmu_notifier_range *range, + struct mm_struct *mm, + unsigned long start, + unsigned long end) +{ + range->mm = mm; + range->start = start; + range->end = end; +} + #define ptep_clear_flush_young_notify(__vma, __address, __ptep) \ ({ \ int __young; \ @@ -420,10 +436,26 @@ static inline void mmu_notifier_mm_destroy(struct mm_struct *mm) extern void mmu_notifier_call_srcu(struct rcu_head *rcu, void (*func)(struct rcu_head *rcu)); -extern void mmu_notifier_synchronize(void); #else /* CONFIG_MMU_NOTIFIER */ +struct mmu_notifier_range { + unsigned long start; + unsigned long end; +}; + +static inline void _mmu_notifier_range_init(struct mmu_notifier_range *range, + unsigned long start, + unsigned long end) +{ + range->start = start; + range->end = end; +} + +#define mmu_notifier_range_init(range, mm, start, end) \ + _mmu_notifier_range_init(range, start, end) + + static inline int mm_has_notifiers(struct mm_struct *mm) { return 0; @@ -451,24 +483,24 @@ static inline void mmu_notifier_change_pte(struct mm_struct *mm, { } -static inline void mmu_notifier_invalidate_range_start(struct mm_struct *mm, - unsigned long start, unsigned long end) +static inline void +mmu_notifier_invalidate_range_start(struct mmu_notifier_range *range) { } -static inline int mmu_notifier_invalidate_range_start_nonblock(struct mm_struct *mm, - unsigned long start, unsigned long end) +static inline int +mmu_notifier_invalidate_range_start_nonblock(struct mmu_notifier_range *range) { return 0; } -static inline void mmu_notifier_invalidate_range_end(struct mm_struct *mm, - unsigned long start, unsigned long end) +static inline +void mmu_notifier_invalidate_range_end(struct mmu_notifier_range *range) { } -static inline void mmu_notifier_invalidate_range_only_end(struct mm_struct *mm, - unsigned long start, unsigned long end) +static inline void +mmu_notifier_invalidate_range_only_end(struct mmu_notifier_range *range) { } diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 077d797d1f60..cc4a507d7ca4 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -65,7 +65,7 @@ enum migratetype { }; /* In mm/page_alloc.c; keep in sync also with show_migration_types() there */ -extern char * const migratetype_names[MIGRATE_TYPES]; +extern const char * const migratetype_names[MIGRATE_TYPES]; #ifdef CONFIG_CMA # define is_migrate_cma(migratetype) unlikely((migratetype) == MIGRATE_CMA) @@ -269,9 +269,10 @@ enum zone_watermarks { NR_WMARK }; -#define min_wmark_pages(z) (z->watermark[WMARK_MIN]) -#define low_wmark_pages(z) (z->watermark[WMARK_LOW]) -#define high_wmark_pages(z) (z->watermark[WMARK_HIGH]) +#define min_wmark_pages(z) (z->_watermark[WMARK_MIN] + z->watermark_boost) +#define low_wmark_pages(z) (z->_watermark[WMARK_LOW] + z->watermark_boost) +#define high_wmark_pages(z) (z->_watermark[WMARK_HIGH] + z->watermark_boost) +#define wmark_pages(z, i) (z->_watermark[i] + z->watermark_boost) struct per_cpu_pages { int count; /* number of pages in the list */ @@ -362,7 +363,8 @@ struct zone { /* Read-mostly fields */ /* zone watermarks, access with *_wmark_pages(zone) macros */ - unsigned long watermark[NR_WMARK]; + unsigned long _watermark[NR_WMARK]; + unsigned long watermark_boost; unsigned long nr_reserved_highatomic; @@ -428,14 +430,8 @@ struct zone { * Write access to present_pages at runtime should be protected by * mem_hotplug_begin/end(). Any reader who can't tolerant drift of * present_pages should get_online_mems() to get a stable value. - * - * Read access to managed_pages should be safe because it's unsigned - * long. Write access to zone->managed_pages and totalram_pages are - * protected by managed_page_count_lock at runtime. Idealy only - * adjust_managed_page_count() should be used instead of directly - * touching zone->managed_pages and totalram_pages. */ - unsigned long managed_pages; + atomic_long_t managed_pages; unsigned long spanned_pages; unsigned long present_pages; @@ -524,6 +520,11 @@ enum pgdat_flags { PGDAT_RECLAIM_LOCKED, /* prevents concurrent reclaim */ }; +static inline unsigned long zone_managed_pages(struct zone *zone) +{ + return (unsigned long)atomic_long_read(&zone->managed_pages); +} + static inline unsigned long zone_end_pfn(const struct zone *zone) { return zone->zone_start_pfn + zone->spanned_pages; @@ -635,9 +636,8 @@ typedef struct pglist_data { #endif #if defined(CONFIG_MEMORY_HOTPLUG) || defined(CONFIG_DEFERRED_STRUCT_PAGE_INIT) /* - * Must be held any time you expect node_start_pfn, node_present_pages - * or node_spanned_pages stay constant. Holding this will also - * guarantee that any pfn_valid() stays that way. + * Must be held any time you expect node_start_pfn, + * node_present_pages, node_spanned_pages or nr_zones to stay constant. * * pgdat_resize_lock() and pgdat_resize_unlock() are provided to * manipulate node_size_lock without checking for CONFIG_MEMORY_HOTPLUG @@ -691,8 +691,6 @@ typedef struct pglist_data { * is the first PFN that needs to be initialised. */ unsigned long first_deferred_pfn; - /* Number of non-deferred pages */ - unsigned long static_init_pgcnt; #endif /* CONFIG_DEFERRED_STRUCT_PAGE_INIT */ #ifdef CONFIG_TRANSPARENT_HUGEPAGE @@ -820,7 +818,7 @@ static inline bool is_dev_zone(const struct zone *zone) */ static inline bool managed_zone(struct zone *zone) { - return zone->managed_pages; + return zone_managed_pages(zone); } /* Returns true if a zone has memory */ @@ -890,6 +888,8 @@ static inline int is_highmem(struct zone *zone) struct ctl_table; int min_free_kbytes_sysctl_handler(struct ctl_table *, int, void __user *, size_t *, loff_t *); +int watermark_boost_factor_sysctl_handler(struct ctl_table *, int, + void __user *, size_t *, loff_t *); int watermark_scale_factor_sysctl_handler(struct ctl_table *, int, void __user *, size_t *, loff_t *); extern int sysctl_lowmem_reserve_ratio[MAX_NR_ZONES]; diff --git a/include/linux/oom.h b/include/linux/oom.h index 69864a547663..d07992009265 100644 --- a/include/linux/oom.h +++ b/include/linux/oom.h @@ -15,6 +15,13 @@ struct notifier_block; struct mem_cgroup; struct task_struct; +enum oom_constraint { + CONSTRAINT_NONE, + CONSTRAINT_CPUSET, + CONSTRAINT_MEMORY_POLICY, + CONSTRAINT_MEMCG, +}; + /* * Details of the page allocation that triggered the oom killer that are used to * determine what should be killed. @@ -42,6 +49,9 @@ struct oom_control { unsigned long totalpages; struct task_struct *chosen; unsigned long chosen_points; + + /* Used to print the constraint info. */ + enum oom_constraint constraint; }; extern struct mutex oom_lock; diff --git a/include/linux/page-flags-layout.h b/include/linux/page-flags-layout.h index 7ec86bf31ce4..1dda31825ec4 100644 --- a/include/linux/page-flags-layout.h +++ b/include/linux/page-flags-layout.h @@ -82,6 +82,16 @@ #define LAST_CPUPID_WIDTH 0 #endif +#ifdef CONFIG_KASAN_SW_TAGS +#define KASAN_TAG_WIDTH 8 +#if SECTIONS_WIDTH+NODES_WIDTH+ZONES_WIDTH+LAST_CPUPID_WIDTH+KASAN_TAG_WIDTH \ + > BITS_PER_LONG - NR_PAGEFLAGS +#error "KASAN: not enough bits in page flags for tag" +#endif +#else +#define KASAN_TAG_WIDTH 0 +#endif + /* * We are going to use the flags for the page to node mapping if its in * there. This includes the case where there is no node, so it is implicit. diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index 50ce1bddaf56..39b4494e29f1 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -669,6 +669,7 @@ PAGEFLAG_FALSE(DoubleMap) #define PAGE_TYPE_BASE 0xf0000000 /* Reserve 0x0000007f to catch underflows of page_mapcount */ +#define PAGE_MAPCOUNT_RESERVE -128 #define PG_buddy 0x00000080 #define PG_balloon 0x00000100 #define PG_kmemcg 0x00000200 @@ -677,6 +678,11 @@ PAGEFLAG_FALSE(DoubleMap) #define PageType(page, flag) \ ((page->page_type & (PAGE_TYPE_BASE | flag)) == PAGE_TYPE_BASE) +static inline int page_has_type(struct page *page) +{ + return (int)page->page_type < PAGE_MAPCOUNT_RESERVE; +} + #define PAGE_TYPE_OPS(uname, lname) \ static __always_inline int Page##uname(struct page *page) \ { \ diff --git a/include/linux/page-isolation.h b/include/linux/page-isolation.h index 4ae347cbc36d..4eb26d278046 100644 --- a/include/linux/page-isolation.h +++ b/include/linux/page-isolation.h @@ -30,8 +30,11 @@ static inline bool is_migrate_isolate(int migratetype) } #endif +#define SKIP_HWPOISON 0x1 +#define REPORT_FAILURE 0x2 + bool has_unmovable_pages(struct zone *zone, struct page *page, int count, - int migratetype, bool skip_hwpoisoned_pages); + int migratetype, int flags); void set_pageblock_migratetype(struct page *page, int migratetype); int move_freepages_block(struct zone *zone, struct page *page, int migratetype, int *num_movable); @@ -44,10 +47,14 @@ int move_freepages_block(struct zone *zone, struct page *page, * For isolating all pages in the range finally, the caller have to * free all pages in the range. test_page_isolated() can be used for * test it. + * + * The following flags are allowed (they can be combined in a bit mask) + * SKIP_HWPOISON - ignore hwpoison pages + * REPORT_FAILURE - report details about the failure to isolate the range */ int start_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn, - unsigned migratetype, bool skip_hwpoisoned_pages); + unsigned migratetype, int flags); /* * Changes MIGRATE_ISOLATE to MIGRATE_MOVABLE. diff --git a/include/linux/pageblock-flags.h b/include/linux/pageblock-flags.h index 9132c5cb41f1..06a66327333d 100644 --- a/include/linux/pageblock-flags.h +++ b/include/linux/pageblock-flags.h @@ -25,10 +25,11 @@ #include <linux/types.h> +#define PB_migratetype_bits 3 /* Bit indices that affect a whole block of pages */ enum pageblock_bits { PB_migrate, - PB_migrate_end = PB_migrate + 3 - 1, + PB_migrate_end = PB_migrate + PB_migratetype_bits - 1, /* 3 bits required for migrate types */ PB_migrate_skip,/* If set the block is skipped by compaction */ diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index 226f96f0dee0..e2d7039af6a3 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -537,6 +537,8 @@ static inline int wait_on_page_locked_killable(struct page *page) return wait_on_page_bit_killable(compound_head(page), PG_locked); } +extern void put_and_wait_on_page_locked(struct page *page); + /* * Wait for a page to complete writeback */ diff --git a/include/linux/slab.h b/include/linux/slab.h index 918f374e7156..6d9bd6fc0c57 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -314,22 +314,22 @@ kmalloc_caches[NR_KMALLOC_TYPES][KMALLOC_SHIFT_HIGH + 1]; static __always_inline enum kmalloc_cache_type kmalloc_type(gfp_t flags) { - int is_dma = 0; - int type_dma = 0; - int is_reclaimable; - #ifdef CONFIG_ZONE_DMA - is_dma = !!(flags & __GFP_DMA); - type_dma = is_dma * KMALLOC_DMA; -#endif - - is_reclaimable = !!(flags & __GFP_RECLAIMABLE); + /* + * The most common case is KMALLOC_NORMAL, so test for it + * with a single branch for both flags. + */ + if (likely((flags & (__GFP_DMA | __GFP_RECLAIMABLE)) == 0)) + return KMALLOC_NORMAL; /* - * If an allocation is both __GFP_DMA and __GFP_RECLAIMABLE, return - * KMALLOC_DMA and effectively ignore __GFP_RECLAIMABLE + * At least one of the flags has to be set. If both are, __GFP_DMA + * is more important. */ - return type_dma + (is_reclaimable & !is_dma) * KMALLOC_RECLAIM; + return flags & __GFP_DMA ? KMALLOC_DMA : KMALLOC_RECLAIM; +#else + return flags & __GFP_RECLAIMABLE ? KMALLOC_RECLAIM : KMALLOC_NORMAL; +#endif } /* @@ -444,7 +444,7 @@ static __always_inline void *kmem_cache_alloc_trace(struct kmem_cache *s, { void *ret = kmem_cache_alloc(s, flags); - kasan_kmalloc(s, ret, size, flags); + ret = kasan_kmalloc(s, ret, size, flags); return ret; } @@ -455,7 +455,7 @@ kmem_cache_alloc_node_trace(struct kmem_cache *s, { void *ret = kmem_cache_alloc_node(s, gfpflags, node); - kasan_kmalloc(s, ret, size, gfpflags); + ret = kasan_kmalloc(s, ret, size, gfpflags); return ret; } #endif /* CONFIG_TRACING */ diff --git a/include/linux/slab_def.h b/include/linux/slab_def.h index 3485c58cfd1c..9a5eafb7145b 100644 --- a/include/linux/slab_def.h +++ b/include/linux/slab_def.h @@ -104,4 +104,17 @@ static inline void *nearest_obj(struct kmem_cache *cache, struct page *page, return object; } +/* + * We want to avoid an expensive divide : (offset / cache->size) + * Using the fact that size is a constant for a particular cache, + * we can replace (offset / cache->size) by + * reciprocal_divide(offset, cache->reciprocal_buffer_size) + */ +static inline unsigned int obj_to_index(const struct kmem_cache *cache, + const struct page *page, void *obj) +{ + u32 offset = (obj - page->s_mem); + return reciprocal_divide(offset, cache->reciprocal_buffer_size); +} + #endif /* _LINUX_SLAB_DEF_H */ diff --git a/include/linux/swap.h b/include/linux/swap.h index a8f6d5d89524..622025ac1461 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -235,7 +235,6 @@ struct swap_info_struct { unsigned long flags; /* SWP_USED etc: see above */ signed short prio; /* swap priority of this type */ struct plist_node list; /* entry in swap_active_head */ - struct plist_node avail_lists[MAX_NUMNODES];/* entry in swap_avail_heads */ signed char type; /* strange name for an index */ unsigned int max; /* extent of the swap_map */ unsigned char *swap_map; /* vmalloc'ed array of usage counts */ @@ -276,6 +275,16 @@ struct swap_info_struct { */ struct work_struct discard_work; /* discard worker */ struct swap_cluster_list discard_clusters; /* discard clusters list */ + struct plist_node avail_lists[0]; /* + * entries in swap_avail_heads, one + * entry per node. + * Must be last as the number of the + * array is nr_node_ids, which is not + * a fixed value so have to allocate + * dynamically. + * And it has to be an array so that + * plist_for_each_* can work. + */ }; #ifdef CONFIG_64BIT @@ -310,7 +319,6 @@ void workingset_update_node(struct xa_node *node); } while (0) /* linux/mm/page_alloc.c */ -extern unsigned long totalram_pages; extern unsigned long totalreserve_pages; extern unsigned long nr_free_buffer_pages(void); extern unsigned long nr_free_pagecache_pages(void); @@ -360,14 +368,8 @@ extern unsigned long vm_total_pages; extern int node_reclaim_mode; extern int sysctl_min_unmapped_ratio; extern int sysctl_min_slab_ratio; -extern int node_reclaim(struct pglist_data *, gfp_t, unsigned int); #else #define node_reclaim_mode 0 -static inline int node_reclaim(struct pglist_data *pgdat, gfp_t mask, - unsigned int order) -{ - return 0; -} #endif extern int page_evictable(struct page *page); diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h index f25cef84b41d..2db8d60981fe 100644 --- a/include/linux/vmstat.h +++ b/include/linux/vmstat.h @@ -239,11 +239,6 @@ extern unsigned long node_page_state(struct pglist_data *pgdat, #define node_page_state(node, item) global_node_page_state(item) #endif /* CONFIG_NUMA */ -#define add_zone_page_state(__z, __i, __d) mod_zone_page_state(__z, __i, __d) -#define sub_zone_page_state(__z, __i, __d) mod_zone_page_state(__z, __i, -(__d)) -#define add_node_page_state(__p, __i, __d) mod_node_page_state(__p, __i, __d) -#define sub_node_page_state(__p, __i, __d) mod_node_page_state(__p, __i, -(__d)) - #ifdef CONFIG_SMP void __mod_zone_page_state(struct zone *, enum zone_stat_item item, long); void __inc_zone_page_state(struct page *, enum zone_stat_item); diff --git a/include/linux/xxhash.h b/include/linux/xxhash.h index 9e1f42cb57e9..52b073fea17f 100644 --- a/include/linux/xxhash.h +++ b/include/linux/xxhash.h @@ -107,6 +107,29 @@ uint32_t xxh32(const void *input, size_t length, uint32_t seed); */ uint64_t xxh64(const void *input, size_t length, uint64_t seed); +/** + * xxhash() - calculate wordsize hash of the input with a given seed + * @input: The data to hash. + * @length: The length of the data to hash. + * @seed: The seed can be used to alter the result predictably. + * + * If the hash does not need to be comparable between machines with + * different word sizes, this function will call whichever of xxh32() + * or xxh64() is faster. + * + * Return: wordsize hash of the data. + */ + +static inline unsigned long xxhash(const void *input, size_t length, + uint64_t seed) +{ +#if BITS_PER_LONG == 64 + return xxh64(input, length, seed); +#else + return xxh32(input, length, seed); +#endif +} + /*-**************************** * Streaming Hash Functions *****************************/ |