From 6260618e09d375ee6aa19be16813dac40cc47513 Mon Sep 17 00:00:00 2001 From: Po-Hsu Lin Date: Thu, 2 Sep 2021 14:56:08 -0700 Subject: selftests/vm: use kselftest skip code for skipped tests There are several test cases in the vm directory are still using exit 0 when they need to be skipped. Use the kselftest framework to skip code instead so it can help us to distinguish the return status. Criterion to filter out what should be fixed in vm directory: grep -r "exit 0" -B1 | grep -i skip This change might cause some false-positives if people are running these test scripts directly and only checking their return codes, which will change from 0 to 4. However I think the impact should be small as most of our scripts here are already using this skip code. And there will be no such issue if running them with the kselftest framework. Link: https://lkml.kernel.org/r/20210823073433.37653-1-po-hsu.lin@canonical.com Signed-off-by: Po-Hsu Lin Cc: Shuah Khan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- tools/testing/selftests/vm/charge_reserved_hugetlb.sh | 5 ++++- tools/testing/selftests/vm/hugetlb_reparenting_test.sh | 5 ++++- 2 files changed, 8 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/vm/charge_reserved_hugetlb.sh b/tools/testing/selftests/vm/charge_reserved_hugetlb.sh index 18d33684faad..fe8fcfb334e0 100644 --- a/tools/testing/selftests/vm/charge_reserved_hugetlb.sh +++ b/tools/testing/selftests/vm/charge_reserved_hugetlb.sh @@ -1,11 +1,14 @@ #!/bin/sh # SPDX-License-Identifier: GPL-2.0 +# Kselftest framework requirement - SKIP code is 4. +ksft_skip=4 + set -e if [[ $(id -u) -ne 0 ]]; then echo "This test must be run as root. Skipping..." - exit 0 + exit $ksft_skip fi fault_limit_file=limit_in_bytes diff --git a/tools/testing/selftests/vm/hugetlb_reparenting_test.sh b/tools/testing/selftests/vm/hugetlb_reparenting_test.sh index d11d1febccc3..4a9a3afe9fd4 100644 --- a/tools/testing/selftests/vm/hugetlb_reparenting_test.sh +++ b/tools/testing/selftests/vm/hugetlb_reparenting_test.sh @@ -1,11 +1,14 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0 +# Kselftest framework requirement - SKIP code is 4. +ksft_skip=4 + set -e if [[ $(id -u) -ne 0 ]]; then echo "This test must be run as root. Skipping..." - exit 0 + exit $ksft_skip fi usage_file=usage_in_bytes -- cgit v1.2.3-58-ga151 From 0c52ec9513b309b250046fdb2c4c6c3726fa5cfb Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Thu, 2 Sep 2021 14:56:11 -0700 Subject: selftests: Fix spelling mistake "cann't" -> "cannot" There is a spelling mistake in an error message. Fix it. Link: https://lkml.kernel.org/r/20210826121217.12885-1-colin.king@canonical.com Signed-off-by: Colin Ian King Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- tools/testing/selftests/vm/mlock-random-test.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/vm/mlock-random-test.c b/tools/testing/selftests/vm/mlock-random-test.c index ff4d72eb74b9..782ea94dee2f 100644 --- a/tools/testing/selftests/vm/mlock-random-test.c +++ b/tools/testing/selftests/vm/mlock-random-test.c @@ -70,7 +70,7 @@ int get_proc_locked_vm_size(void) } } - perror("cann't parse VmLck in /proc/self/status\n"); + perror("cannot parse VmLck in /proc/self/status\n"); fclose(f); return -1; } -- cgit v1.2.3-58-ga151 From f358afc52c3066f4e8cd7b3a2d75b31e822519e9 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 2 Sep 2021 14:56:36 -0700 Subject: mm: remove flush_kernel_dcache_page flush_kernel_dcache_page is a rather confusing interface that implements a subset of flush_dcache_page by not being able to properly handle page cache mapped pages. The only callers left are in the exec code as all other previous callers were incorrect as they could have dealt with page cache pages. Replace the calls to flush_kernel_dcache_page with calls to flush_dcache_page, which for all architectures does either exactly the same thing, can contains one or more of the following: 1) an optimization to defer the cache flush for page cache pages not mapped into userspace 2) additional flushing for mapped page cache pages if cache aliases are possible Link: https://lkml.kernel.org/r/20210712060928.4161649-7-hch@lst.de Signed-off-by: Christoph Hellwig Acked-by: Linus Torvalds Reviewed-by: Ira Weiny Cc: Alex Shi Cc: Geoff Levand Cc: Greentime Hu Cc: Guo Ren Cc: Helge Deller Cc: "James E.J. Bottomley" Cc: Nick Hu Cc: Paul Cercueil Cc: Rich Felker Cc: Russell King Cc: Thomas Bogendoerfer Cc: Ulf Hansson Cc: Vincent Chen Cc: Yoshinori Sato Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/core-api/cachetlb.rst | 86 ++++++++++------------ .../translations/zh_CN/core-api/cachetlb.rst | 9 --- arch/arm/include/asm/cacheflush.h | 4 +- arch/arm/mm/flush.c | 33 --------- arch/arm/mm/nommu.c | 6 -- arch/csky/abiv1/cacheflush.c | 11 --- arch/csky/abiv1/inc/abi/cacheflush.h | 4 +- arch/mips/include/asm/cacheflush.h | 8 +- arch/nds32/include/asm/cacheflush.h | 3 +- arch/nds32/mm/cacheflush.c | 9 --- arch/parisc/include/asm/cacheflush.h | 8 +- arch/parisc/kernel/cache.c | 3 +- arch/sh/include/asm/cacheflush.h | 8 +- block/blk-map.c | 2 +- fs/exec.c | 6 +- include/linux/highmem.h | 5 +- tools/testing/scatterlist/linux/mm.h | 1 - 17 files changed, 51 insertions(+), 155 deletions(-) (limited to 'tools') diff --git a/Documentation/core-api/cachetlb.rst b/Documentation/core-api/cachetlb.rst index fe4290e26729..8aed9103e48a 100644 --- a/Documentation/core-api/cachetlb.rst +++ b/Documentation/core-api/cachetlb.rst @@ -271,10 +271,15 @@ maps this page at its virtual address. ``void flush_dcache_page(struct page *page)`` - Any time the kernel writes to a page cache page, _OR_ - the kernel is about to read from a page cache page and - user space shared/writable mappings of this page potentially - exist, this routine is called. + This routines must be called when: + + a) the kernel did write to a page that is in the page cache page + and / or in high memory + b) the kernel is about to read from a page cache page and user space + shared/writable mappings of this page potentially exist. Note + that {get,pin}_user_pages{_fast} already call flush_dcache_page + on any page found in the user address space and thus driver + code rarely needs to take this into account. .. note:: @@ -284,38 +289,34 @@ maps this page at its virtual address. handling vfs symlinks in the page cache need not call this interface at all. - The phrase "kernel writes to a page cache page" means, - specifically, that the kernel executes store instructions - that dirty data in that page at the page->virtual mapping - of that page. It is important to flush here to handle - D-cache aliasing, to make sure these kernel stores are - visible to user space mappings of that page. - - The corollary case is just as important, if there are users - which have shared+writable mappings of this file, we must make - sure that kernel reads of these pages will see the most recent - stores done by the user. - - If D-cache aliasing is not an issue, this routine may - simply be defined as a nop on that architecture. - - There is a bit set aside in page->flags (PG_arch_1) as - "architecture private". The kernel guarantees that, - for pagecache pages, it will clear this bit when such - a page first enters the pagecache. - - This allows these interfaces to be implemented much more - efficiently. It allows one to "defer" (perhaps indefinitely) - the actual flush if there are currently no user processes - mapping this page. See sparc64's flush_dcache_page and - update_mmu_cache implementations for an example of how to go - about doing this. - - The idea is, first at flush_dcache_page() time, if - page->mapping->i_mmap is an empty tree, just mark the architecture - private page flag bit. Later, in update_mmu_cache(), a check is - made of this flag bit, and if set the flush is done and the flag - bit is cleared. + The phrase "kernel writes to a page cache page" means, specifically, + that the kernel executes store instructions that dirty data in that + page at the page->virtual mapping of that page. It is important to + flush here to handle D-cache aliasing, to make sure these kernel stores + are visible to user space mappings of that page. + + The corollary case is just as important, if there are users which have + shared+writable mappings of this file, we must make sure that kernel + reads of these pages will see the most recent stores done by the user. + + If D-cache aliasing is not an issue, this routine may simply be defined + as a nop on that architecture. + + There is a bit set aside in page->flags (PG_arch_1) as "architecture + private". The kernel guarantees that, for pagecache pages, it will + clear this bit when such a page first enters the pagecache. + + This allows these interfaces to be implemented much more efficiently. + It allows one to "defer" (perhaps indefinitely) the actual flush if + there are currently no user processes mapping this page. See sparc64's + flush_dcache_page and update_mmu_cache implementations for an example + of how to go about doing this. + + The idea is, first at flush_dcache_page() time, if page_file_mapping() + returns a mapping, and mapping_mapped on that mapping returns %false, + just mark the architecture private page flag bit. Later, in + update_mmu_cache(), a check is made of this flag bit, and if set the + flush is done and the flag bit is cleared. .. important:: @@ -351,19 +352,6 @@ maps this page at its virtual address. architectures). For incoherent architectures, it should flush the cache of the page at vmaddr. - ``void flush_kernel_dcache_page(struct page *page)`` - - When the kernel needs to modify a user page is has obtained - with kmap, it calls this function after all modifications are - complete (but before kunmapping it) to bring the underlying - page up to date. It is assumed here that the user has no - incoherent cached copies (i.e. the original page was obtained - from a mechanism like get_user_pages()). The default - implementation is a nop and should remain so on all coherent - architectures. On incoherent architectures, this should flush - the kernel cache for page (using page_address(page)). - - ``void flush_icache_range(unsigned long start, unsigned long end)`` When the kernel stores into addresses that it will execute diff --git a/Documentation/translations/zh_CN/core-api/cachetlb.rst b/Documentation/translations/zh_CN/core-api/cachetlb.rst index 8376485a534d..55827b8a7c53 100644 --- a/Documentation/translations/zh_CN/core-api/cachetlb.rst +++ b/Documentation/translations/zh_CN/core-api/cachetlb.rst @@ -298,15 +298,6 @@ HyperSparc cpu就是这样一个具有这种属性的cpu。 用。默认的实现是nop(对于所有相干的架构应该保持这样)。对于不一致性 的架构,它应该刷新vmaddr处的页面缓存。 - ``void flush_kernel_dcache_page(struct page *page)`` - - 当内核需要修改一个用kmap获得的用户页时,它会在所有修改完成后(但在 - kunmapping之前)调用这个函数,以使底层页面达到最新状态。这里假定用 - 户没有不一致性的缓存副本(即原始页面是从类似get_user_pages()的机制 - 中获得的)。默认的实现是一个nop,在所有相干的架构上都应该如此。在不 - 一致性的架构上,这应该刷新内核缓存中的页面(使用page_address(page))。 - - ``void flush_icache_range(unsigned long start, unsigned long end)`` 当内核存储到它将执行的地址中时(例如在加载模块时),这个函数被调用。 diff --git a/arch/arm/include/asm/cacheflush.h b/arch/arm/include/asm/cacheflush.h index 2e24e765e6d3..5e56288e343b 100644 --- a/arch/arm/include/asm/cacheflush.h +++ b/arch/arm/include/asm/cacheflush.h @@ -291,6 +291,7 @@ extern void flush_cache_page(struct vm_area_struct *vma, unsigned long user_addr #define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1 extern void flush_dcache_page(struct page *); +#define ARCH_IMPLEMENTS_FLUSH_KERNEL_VMAP_RANGE 1 static inline void flush_kernel_vmap_range(void *addr, int size) { if ((cache_is_vivt() || cache_is_vipt_aliasing())) @@ -312,9 +313,6 @@ static inline void flush_anon_page(struct vm_area_struct *vma, __flush_anon_page(vma, page, vmaddr); } -#define ARCH_HAS_FLUSH_KERNEL_DCACHE_PAGE -extern void flush_kernel_dcache_page(struct page *); - #define flush_dcache_mmap_lock(mapping) xa_lock_irq(&mapping->i_pages) #define flush_dcache_mmap_unlock(mapping) xa_unlock_irq(&mapping->i_pages) diff --git a/arch/arm/mm/flush.c b/arch/arm/mm/flush.c index 6d89db7895d1..7ff9feea13a6 100644 --- a/arch/arm/mm/flush.c +++ b/arch/arm/mm/flush.c @@ -345,39 +345,6 @@ void flush_dcache_page(struct page *page) } EXPORT_SYMBOL(flush_dcache_page); -/* - * Ensure cache coherency for the kernel mapping of this page. We can - * assume that the page is pinned via kmap. - * - * If the page only exists in the page cache and there are no user - * space mappings, this is a no-op since the page was already marked - * dirty at creation. Otherwise, we need to flush the dirty kernel - * cache lines directly. - */ -void flush_kernel_dcache_page(struct page *page) -{ - if (cache_is_vivt() || cache_is_vipt_aliasing()) { - struct address_space *mapping; - - mapping = page_mapping_file(page); - - if (!mapping || mapping_mapped(mapping)) { - void *addr; - - addr = page_address(page); - /* - * kmap_atomic() doesn't set the page virtual - * address for highmem pages, and - * kunmap_atomic() takes care of cache - * flushing already. - */ - if (!IS_ENABLED(CONFIG_HIGHMEM) || addr) - __cpuc_flush_dcache_area(addr, PAGE_SIZE); - } - } -} -EXPORT_SYMBOL(flush_kernel_dcache_page); - /* * Flush an anonymous page so that users of get_user_pages() * can safely access the data. The expected sequence is: diff --git a/arch/arm/mm/nommu.c b/arch/arm/mm/nommu.c index 8b3d7191e2b8..2658f52903da 100644 --- a/arch/arm/mm/nommu.c +++ b/arch/arm/mm/nommu.c @@ -166,12 +166,6 @@ void flush_dcache_page(struct page *page) } EXPORT_SYMBOL(flush_dcache_page); -void flush_kernel_dcache_page(struct page *page) -{ - __cpuc_flush_dcache_area(page_address(page), PAGE_SIZE); -} -EXPORT_SYMBOL(flush_kernel_dcache_page); - void copy_to_user_page(struct vm_area_struct *vma, struct page *page, unsigned long uaddr, void *dst, const void *src, unsigned long len) diff --git a/arch/csky/abiv1/cacheflush.c b/arch/csky/abiv1/cacheflush.c index 07ff17ea33de..fb91b069dc69 100644 --- a/arch/csky/abiv1/cacheflush.c +++ b/arch/csky/abiv1/cacheflush.c @@ -56,17 +56,6 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long addr, } } -void flush_kernel_dcache_page(struct page *page) -{ - struct address_space *mapping; - - mapping = page_mapping_file(page); - - if (!mapping || mapping_mapped(mapping)) - dcache_wbinv_all(); -} -EXPORT_SYMBOL(flush_kernel_dcache_page); - void flush_cache_range(struct vm_area_struct *vma, unsigned long start, unsigned long end) { diff --git a/arch/csky/abiv1/inc/abi/cacheflush.h b/arch/csky/abiv1/inc/abi/cacheflush.h index 6cab7afae962..ed62e2066ba7 100644 --- a/arch/csky/abiv1/inc/abi/cacheflush.h +++ b/arch/csky/abiv1/inc/abi/cacheflush.h @@ -14,12 +14,10 @@ extern void flush_dcache_page(struct page *); #define flush_cache_page(vma, page, pfn) cache_wbinv_all() #define flush_cache_dup_mm(mm) cache_wbinv_all() -#define ARCH_HAS_FLUSH_KERNEL_DCACHE_PAGE -extern void flush_kernel_dcache_page(struct page *); - #define flush_dcache_mmap_lock(mapping) xa_lock_irq(&mapping->i_pages) #define flush_dcache_mmap_unlock(mapping) xa_unlock_irq(&mapping->i_pages) +#define ARCH_IMPLEMENTS_FLUSH_KERNEL_VMAP_RANGE 1 static inline void flush_kernel_vmap_range(void *addr, int size) { dcache_wbinv_all(); diff --git a/arch/mips/include/asm/cacheflush.h b/arch/mips/include/asm/cacheflush.h index d687b40b9fbb..b3dc9c589442 100644 --- a/arch/mips/include/asm/cacheflush.h +++ b/arch/mips/include/asm/cacheflush.h @@ -125,13 +125,7 @@ static inline void kunmap_noncoherent(void) kunmap_coherent(); } -#define ARCH_HAS_FLUSH_KERNEL_DCACHE_PAGE -static inline void flush_kernel_dcache_page(struct page *page) -{ - BUG_ON(cpu_has_dc_aliases && PageHighMem(page)); - flush_dcache_page(page); -} - +#define ARCH_IMPLEMENTS_FLUSH_KERNEL_VMAP_RANGE 1 /* * For now flush_kernel_vmap_range and invalidate_kernel_vmap_range both do a * cache writeback and invalidate operation. diff --git a/arch/nds32/include/asm/cacheflush.h b/arch/nds32/include/asm/cacheflush.h index 7d6824f7c0e8..c2a222ebfa2a 100644 --- a/arch/nds32/include/asm/cacheflush.h +++ b/arch/nds32/include/asm/cacheflush.h @@ -36,8 +36,7 @@ void copy_from_user_page(struct vm_area_struct *vma, struct page *page, void flush_anon_page(struct vm_area_struct *vma, struct page *page, unsigned long vaddr); -#define ARCH_HAS_FLUSH_KERNEL_DCACHE_PAGE -void flush_kernel_dcache_page(struct page *page); +#define ARCH_IMPLEMENTS_FLUSH_KERNEL_VMAP_RANGE 1 void flush_kernel_vmap_range(void *addr, int size); void invalidate_kernel_vmap_range(void *addr, int size); #define flush_dcache_mmap_lock(mapping) xa_lock_irq(&(mapping)->i_pages) diff --git a/arch/nds32/mm/cacheflush.c b/arch/nds32/mm/cacheflush.c index ad5344ef5d33..07aac65d1cab 100644 --- a/arch/nds32/mm/cacheflush.c +++ b/arch/nds32/mm/cacheflush.c @@ -318,15 +318,6 @@ void flush_anon_page(struct vm_area_struct *vma, local_irq_restore(flags); } -void flush_kernel_dcache_page(struct page *page) -{ - unsigned long flags; - local_irq_save(flags); - cpu_dcache_wbinval_page((unsigned long)page_address(page)); - local_irq_restore(flags); -} -EXPORT_SYMBOL(flush_kernel_dcache_page); - void flush_kernel_vmap_range(void *addr, int size) { unsigned long flags; diff --git a/arch/parisc/include/asm/cacheflush.h b/arch/parisc/include/asm/cacheflush.h index 99663fc1f997..eef0096db5f8 100644 --- a/arch/parisc/include/asm/cacheflush.h +++ b/arch/parisc/include/asm/cacheflush.h @@ -36,16 +36,12 @@ void flush_cache_all_local(void); void flush_cache_all(void); void flush_cache_mm(struct mm_struct *mm); -#define ARCH_HAS_FLUSH_KERNEL_DCACHE_PAGE void flush_kernel_dcache_page_addr(void *addr); -static inline void flush_kernel_dcache_page(struct page *page) -{ - flush_kernel_dcache_page_addr(page_address(page)); -} #define flush_kernel_dcache_range(start,size) \ flush_kernel_dcache_range_asm((start), (start)+(size)); +#define ARCH_IMPLEMENTS_FLUSH_KERNEL_VMAP_RANGE 1 void flush_kernel_vmap_range(void *vaddr, int size); void invalidate_kernel_vmap_range(void *vaddr, int size); @@ -59,7 +55,7 @@ extern void flush_dcache_page(struct page *page); #define flush_dcache_mmap_unlock(mapping) xa_unlock_irq(&mapping->i_pages) #define flush_icache_page(vma,page) do { \ - flush_kernel_dcache_page(page); \ + flush_kernel_dcache_page_addr(page_address(page)); \ flush_kernel_icache_page(page_address(page)); \ } while (0) diff --git a/arch/parisc/kernel/cache.c b/arch/parisc/kernel/cache.c index 86a1a63563fd..39e02227e231 100644 --- a/arch/parisc/kernel/cache.c +++ b/arch/parisc/kernel/cache.c @@ -334,7 +334,7 @@ void flush_dcache_page(struct page *page) return; } - flush_kernel_dcache_page(page); + flush_kernel_dcache_page_addr(page_address(page)); if (!mapping) return; @@ -375,7 +375,6 @@ EXPORT_SYMBOL(flush_dcache_page); /* Defined in arch/parisc/kernel/pacache.S */ EXPORT_SYMBOL(flush_kernel_dcache_range_asm); -EXPORT_SYMBOL(flush_kernel_dcache_page_asm); EXPORT_SYMBOL(flush_data_cache_local); EXPORT_SYMBOL(flush_kernel_icache_range_asm); diff --git a/arch/sh/include/asm/cacheflush.h b/arch/sh/include/asm/cacheflush.h index 4486a865ff62..372afa82fee6 100644 --- a/arch/sh/include/asm/cacheflush.h +++ b/arch/sh/include/asm/cacheflush.h @@ -63,6 +63,8 @@ static inline void flush_anon_page(struct vm_area_struct *vma, if (boot_cpu_data.dcache.n_aliases && PageAnon(page)) __flush_anon_page(page, vmaddr); } + +#define ARCH_IMPLEMENTS_FLUSH_KERNEL_VMAP_RANGE 1 static inline void flush_kernel_vmap_range(void *addr, int size) { __flush_wback_region(addr, size); @@ -72,12 +74,6 @@ static inline void invalidate_kernel_vmap_range(void *addr, int size) __flush_invalidate_region(addr, size); } -#define ARCH_HAS_FLUSH_KERNEL_DCACHE_PAGE -static inline void flush_kernel_dcache_page(struct page *page) -{ - flush_dcache_page(page); -} - extern void copy_to_user_page(struct vm_area_struct *vma, struct page *page, unsigned long vaddr, void *dst, const void *src, unsigned long len); diff --git a/block/blk-map.c b/block/blk-map.c index 3743158ddaeb..4639bc6b5c62 100644 --- a/block/blk-map.c +++ b/block/blk-map.c @@ -309,7 +309,7 @@ static int bio_map_user_iov(struct request *rq, struct iov_iter *iter, static void bio_invalidate_vmalloc_pages(struct bio *bio) { -#ifdef ARCH_HAS_FLUSH_KERNEL_DCACHE_PAGE +#ifdef ARCH_IMPLEMENTS_FLUSH_KERNEL_VMAP_RANGE if (bio->bi_private && !op_is_write(bio_op(bio))) { unsigned long i, len = 0; diff --git a/fs/exec.c b/fs/exec.c index 38f63451b928..41a888d4edde 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -574,7 +574,7 @@ static int copy_strings(int argc, struct user_arg_ptr argv, } if (kmapped_page) { - flush_kernel_dcache_page(kmapped_page); + flush_dcache_page(kmapped_page); kunmap(kmapped_page); put_arg_page(kmapped_page); } @@ -592,7 +592,7 @@ static int copy_strings(int argc, struct user_arg_ptr argv, ret = 0; out: if (kmapped_page) { - flush_kernel_dcache_page(kmapped_page); + flush_dcache_page(kmapped_page); kunmap(kmapped_page); put_arg_page(kmapped_page); } @@ -634,7 +634,7 @@ int copy_string_kernel(const char *arg, struct linux_binprm *bprm) kaddr = kmap_atomic(page); flush_arg_page(bprm, pos & PAGE_MASK, page); memcpy(kaddr + offset_in_page(pos), arg, bytes_to_copy); - flush_kernel_dcache_page(page); + flush_dcache_page(page); kunmap_atomic(kaddr); put_arg_page(page); } diff --git a/include/linux/highmem.h b/include/linux/highmem.h index d9a606a9fc64..b4c49f9cc379 100644 --- a/include/linux/highmem.h +++ b/include/linux/highmem.h @@ -130,10 +130,7 @@ static inline void flush_anon_page(struct vm_area_struct *vma, struct page *page } #endif -#ifndef ARCH_HAS_FLUSH_KERNEL_DCACHE_PAGE -static inline void flush_kernel_dcache_page(struct page *page) -{ -} +#ifndef ARCH_IMPLEMENTS_FLUSH_KERNEL_VMAP_RANGE static inline void flush_kernel_vmap_range(void *vaddr, int size) { } diff --git a/tools/testing/scatterlist/linux/mm.h b/tools/testing/scatterlist/linux/mm.h index f9a12005fcea..16ec895bbe5f 100644 --- a/tools/testing/scatterlist/linux/mm.h +++ b/tools/testing/scatterlist/linux/mm.h @@ -127,7 +127,6 @@ kmalloc_array(unsigned int n, unsigned int size, unsigned int flags) #define kmemleak_free(a) #define PageSlab(p) (0) -#define flush_kernel_dcache_page(p) #define MAX_ERRNO 4095 -- cgit v1.2.3-58-ga151 From 4410cbb5c9f9371556c4e928b5dd00226b073082 Mon Sep 17 00:00:00 2001 From: Nadav Amit Date: Thu, 2 Sep 2021 14:59:02 -0700 Subject: selftests/vm/userfaultfd: wake after copy failure When userfaultfd copy-ioctl fails since the PTE already exists, an -EEXIST error is returned and the faulting thread is not woken. The current userfaultfd test does not wake the faulting thread in such case. The assumption is presumably that another thread set the PTE through copy/wp ioctl and would wake the faulting thread or that alternatively the fault handler would realize there is no need to "must_wait" and continue. This is not necessarily true. There is an assumption that the "must_wait" tests in handle_userfault() are sufficient to provide definitive answer whether the offending PTE is populated or not. However, userfaultfd_must_wait() test is lockless. Consequently, concurrent calls to ptep_modify_prot_start(), for instance, can clear the PTE and can cause userfaultfd_must_wait() to wrongly assume it is not populated and a wait is needed. There are therefore 3 options: (1) Change the tests to wake on copy failure. (2) Wake faulting thread unconditionally on zero/copy ioctls before returning -EEXIST. (3) Change the userfaultfd_must_wait() to hold locks. This patch took the first approach, but the others are valid solutions with different tradeoffs. Link: https://lkml.kernel.org/r/20210808020724.1022515-4-namit@vmware.com Signed-off-by: Nadav Amit Cc: Jens Axboe Cc: Andrea Arcangeli Cc: Peter Xu Cc: Alexander Viro Cc: Axel Rasmussen Cc: Mike Rapoport Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- tools/testing/selftests/vm/userfaultfd.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/vm/userfaultfd.c b/tools/testing/selftests/vm/userfaultfd.c index 2ea438e6b8b1..10ab56c2484a 100644 --- a/tools/testing/selftests/vm/userfaultfd.c +++ b/tools/testing/selftests/vm/userfaultfd.c @@ -566,6 +566,18 @@ static void retry_copy_page(int ufd, struct uffdio_copy *uffdio_copy, } } +static void wake_range(int ufd, unsigned long addr, unsigned long len) +{ + struct uffdio_range uffdio_wake; + + uffdio_wake.start = addr; + uffdio_wake.len = len; + + if (ioctl(ufd, UFFDIO_WAKE, &uffdio_wake)) + fprintf(stderr, "error waking %lu\n", + addr), exit(1); +} + static int __copy_page(int ufd, unsigned long offset, bool retry) { struct uffdio_copy uffdio_copy; @@ -585,6 +597,7 @@ static int __copy_page(int ufd, unsigned long offset, bool retry) if (uffdio_copy.copy != -EEXIST) err("UFFDIO_COPY error: %"PRId64, (int64_t)uffdio_copy.copy); + wake_range(ufd, uffdio_copy.dst, page_size); } else if (uffdio_copy.copy != page_size) { err("UFFDIO_COPY error: %"PRId64, (int64_t)uffdio_copy.copy); } else { -- cgit v1.2.3-58-ga151 From 68d6289baa35c5e59b5359577d3dc01c00c437d2 Mon Sep 17 00:00:00 2001 From: Zhansaya Bagdauletkyzy Date: Thu, 2 Sep 2021 15:00:39 -0700 Subject: selftests: vm: add KSM merge test Patch series "add KSM selftests". Introduce selftests to validate the functionality of KSM. The tests are run on private anonymous pages. Since some KSM tunables are modified, their starting values are saved and restored after testing. At the start, run is set to 2 to ensure that only test pages will be merged (we assume that no applications make madvise syscalls in the background). If KSM config not enabled, all tests will be skipped. This patch (of 4): Add check_ksm_merge() function to check the basic merging feature of KSM. First, some number of identical pages are allocated and the MADV_MERGEABLE advice is given to merge these pages. Then, pages_shared and pages_sharing values are compared with the expected numbers using assert_ksm_pages_count() function. The number of pages can be changed using -p option. Link: https://lkml.kernel.org/r/cover.1626252248.git.zhansayabagdaulet@gmail.com Link: https://lkml.kernel.org/r/90287685c13300972ea84de93d1f3f900373f9fe.1626252248.git.zhansayabagdaulet@gmail.com Signed-off-by: Zhansaya Bagdauletkyzy Reviewed-by: Pavel Tatashin Reviewed-by: Tyler Hicks Cc: Shuah Khan Cc: Hugh Dickins Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- tools/testing/selftests/vm/.gitignore | 1 + tools/testing/selftests/vm/Makefile | 1 + tools/testing/selftests/vm/ksm_tests.c | 306 ++++++++++++++++++++++++++++++ tools/testing/selftests/vm/run_vmtests.sh | 16 ++ 4 files changed, 324 insertions(+) create mode 100644 tools/testing/selftests/vm/ksm_tests.c (limited to 'tools') diff --git a/tools/testing/selftests/vm/.gitignore b/tools/testing/selftests/vm/.gitignore index f0fd80ef17df..b02eac613fdd 100644 --- a/tools/testing/selftests/vm/.gitignore +++ b/tools/testing/selftests/vm/.gitignore @@ -27,3 +27,4 @@ hmm-tests memfd_secret local_config.* split_huge_page_test +ksm_tests diff --git a/tools/testing/selftests/vm/Makefile b/tools/testing/selftests/vm/Makefile index 521243770f26..e6f22a801b71 100644 --- a/tools/testing/selftests/vm/Makefile +++ b/tools/testing/selftests/vm/Makefile @@ -45,6 +45,7 @@ TEST_GEN_FILES += thuge-gen TEST_GEN_FILES += transhuge-stress TEST_GEN_FILES += userfaultfd TEST_GEN_FILES += split_huge_page_test +TEST_GEN_FILES += ksm_tests ifeq ($(MACHINE),x86_64) CAN_BUILD_I386 := $(shell ./../x86/check_cc.sh $(CC) ../x86/trivial_32bit_program.c -m32) diff --git a/tools/testing/selftests/vm/ksm_tests.c b/tools/testing/selftests/vm/ksm_tests.c new file mode 100644 index 000000000000..d74d5aa34b16 --- /dev/null +++ b/tools/testing/selftests/vm/ksm_tests.c @@ -0,0 +1,306 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include +#include +#include +#include + +#include "../kselftest.h" + +#define KSM_SYSFS_PATH "/sys/kernel/mm/ksm/" +#define KSM_FP(s) (KSM_SYSFS_PATH s) +#define KSM_SCAN_LIMIT_SEC_DEFAULT 120 +#define KSM_PAGE_COUNT_DEFAULT 10l +#define KSM_PROT_STR_DEFAULT "rw" + +struct ksm_sysfs { + unsigned long max_page_sharing; + unsigned long merge_across_nodes; + unsigned long pages_to_scan; + unsigned long run; + unsigned long sleep_millisecs; + unsigned long stable_node_chains_prune_millisecs; + unsigned long use_zero_pages; +}; + +static int ksm_write_sysfs(const char *file_path, unsigned long val) +{ + FILE *f = fopen(file_path, "w"); + + if (!f) { + fprintf(stderr, "f %s\n", file_path); + perror("fopen"); + return 1; + } + if (fprintf(f, "%lu", val) < 0) { + perror("fprintf"); + return 1; + } + fclose(f); + + return 0; +} + +static int ksm_read_sysfs(const char *file_path, unsigned long *val) +{ + FILE *f = fopen(file_path, "r"); + + if (!f) { + fprintf(stderr, "f %s\n", file_path); + perror("fopen"); + return 1; + } + if (fscanf(f, "%lu", val) != 1) { + perror("fscanf"); + return 1; + } + fclose(f); + + return 0; +} + +static int str_to_prot(char *prot_str) +{ + int prot = 0; + + if ((strchr(prot_str, 'r')) != NULL) + prot |= PROT_READ; + if ((strchr(prot_str, 'w')) != NULL) + prot |= PROT_WRITE; + if ((strchr(prot_str, 'x')) != NULL) + prot |= PROT_EXEC; + + return prot; +} + +static void print_help(void) +{ + printf("usage: ksm_tests [-h] [-a prot] [-p page_count] [-l timeout]\n"); + printf(" -a: specify the access protections of pages.\n" + " must be of the form [rwx].\n" + " Default: %s\n", KSM_PROT_STR_DEFAULT); + printf(" -p: specify the number of pages to test.\n" + " Default: %ld\n", KSM_PAGE_COUNT_DEFAULT); + printf(" -l: limit the maximum running time (in seconds) for a test.\n" + " Default: %d seconds\n", KSM_SCAN_LIMIT_SEC_DEFAULT); + + exit(0); +} + +static void *allocate_memory(void *ptr, int prot, int mapping, char data, size_t map_size) +{ + void *map_ptr = mmap(ptr, map_size, PROT_WRITE, mapping, -1, 0); + + if (!map_ptr) { + perror("mmap"); + return NULL; + } + memset(map_ptr, data, map_size); + if (mprotect(map_ptr, map_size, prot)) { + perror("mprotect"); + munmap(map_ptr, map_size); + return NULL; + } + + return map_ptr; +} + +static int ksm_do_scan(int scan_count, struct timespec start_time, int timeout) +{ + struct timespec cur_time; + unsigned long cur_scan, init_scan; + + if (ksm_read_sysfs(KSM_FP("full_scans"), &init_scan)) + return 1; + cur_scan = init_scan; + + while (cur_scan < init_scan + scan_count) { + if (ksm_read_sysfs(KSM_FP("full_scans"), &cur_scan)) + return 1; + if (clock_gettime(CLOCK_MONOTONIC_RAW, &cur_time)) { + perror("clock_gettime"); + return 1; + } + if ((cur_time.tv_sec - start_time.tv_sec) > timeout) { + printf("Scan time limit exceeded\n"); + return 1; + } + } + + return 0; +} + +static int ksm_merge_pages(void *addr, size_t size, struct timespec start_time, int timeout) +{ + if (madvise(addr, size, MADV_MERGEABLE)) { + perror("madvise"); + return 1; + } + if (ksm_write_sysfs(KSM_FP("run"), 1)) + return 1; + + /* Since merging occurs only after 2 scans, make sure to get at least 2 full scans */ + if (ksm_do_scan(2, start_time, timeout)) + return 1; + + return 0; +} + +static bool assert_ksm_pages_count(long dupl_page_count) +{ + unsigned long max_page_sharing, pages_sharing, pages_shared; + + if (ksm_read_sysfs(KSM_FP("pages_shared"), &pages_shared) || + ksm_read_sysfs(KSM_FP("pages_sharing"), &pages_sharing) || + ksm_read_sysfs(KSM_FP("max_page_sharing"), &max_page_sharing)) + return false; + + /* + * Since there must be at least 2 pages for merging and 1 page can be + * shared with the limited number of pages (max_page_sharing), sometimes + * there are 'leftover' pages that cannot be merged. For example, if there + * are 11 pages and max_page_sharing = 10, then only 10 pages will be + * merged and the 11th page won't be affected. As a result, when the number + * of duplicate pages is divided by max_page_sharing and the remainder is 1, + * pages_shared and pages_sharing values will be equal between dupl_page_count + * and dupl_page_count - 1. + */ + if (dupl_page_count % max_page_sharing == 1 || dupl_page_count % max_page_sharing == 0) { + if (pages_shared == dupl_page_count / max_page_sharing && + pages_sharing == pages_shared * (max_page_sharing - 1)) + return true; + } else { + if (pages_shared == (dupl_page_count / max_page_sharing + 1) && + pages_sharing == dupl_page_count - pages_shared) + return true; + } + + return false; +} + +static int ksm_save_def(struct ksm_sysfs *ksm_sysfs) +{ + if (ksm_read_sysfs(KSM_FP("max_page_sharing"), &ksm_sysfs->max_page_sharing) || + ksm_read_sysfs(KSM_FP("merge_across_nodes"), &ksm_sysfs->merge_across_nodes) || + ksm_read_sysfs(KSM_FP("sleep_millisecs"), &ksm_sysfs->sleep_millisecs) || + ksm_read_sysfs(KSM_FP("pages_to_scan"), &ksm_sysfs->pages_to_scan) || + ksm_read_sysfs(KSM_FP("run"), &ksm_sysfs->run) || + ksm_read_sysfs(KSM_FP("stable_node_chains_prune_millisecs"), + &ksm_sysfs->stable_node_chains_prune_millisecs) || + ksm_read_sysfs(KSM_FP("use_zero_pages"), &ksm_sysfs->use_zero_pages)) + return 1; + + return 0; +} + +static int ksm_restore(struct ksm_sysfs *ksm_sysfs) +{ + if (ksm_write_sysfs(KSM_FP("max_page_sharing"), ksm_sysfs->max_page_sharing) || + ksm_write_sysfs(KSM_FP("merge_across_nodes"), ksm_sysfs->merge_across_nodes) || + ksm_write_sysfs(KSM_FP("pages_to_scan"), ksm_sysfs->pages_to_scan) || + ksm_write_sysfs(KSM_FP("run"), ksm_sysfs->run) || + ksm_write_sysfs(KSM_FP("sleep_millisecs"), ksm_sysfs->sleep_millisecs) || + ksm_write_sysfs(KSM_FP("stable_node_chains_prune_millisecs"), + ksm_sysfs->stable_node_chains_prune_millisecs) || + ksm_write_sysfs(KSM_FP("use_zero_pages"), ksm_sysfs->use_zero_pages)) + return 1; + + return 0; +} + +static int check_ksm_merge(int mapping, int prot, long page_count, int timeout, size_t page_size) +{ + void *map_ptr; + struct timespec start_time; + + if (clock_gettime(CLOCK_MONOTONIC_RAW, &start_time)) { + perror("clock_gettime"); + return KSFT_FAIL; + } + + /* fill pages with the same data and merge them */ + map_ptr = allocate_memory(NULL, prot, mapping, '*', page_size * page_count); + if (!map_ptr) + return KSFT_FAIL; + + if (ksm_merge_pages(map_ptr, page_size * page_count, start_time, timeout)) + goto err_out; + + /* verify that the right number of pages are merged */ + if (assert_ksm_pages_count(page_count)) { + printf("OK\n"); + munmap(map_ptr, page_size * page_count); + return KSFT_PASS; + } + +err_out: + printf("Not OK\n"); + munmap(map_ptr, page_size * page_count); + return KSFT_FAIL; +} + +int main(int argc, char *argv[]) +{ + int ret, opt; + int prot = 0; + int ksm_scan_limit_sec = KSM_SCAN_LIMIT_SEC_DEFAULT; + long page_count = KSM_PAGE_COUNT_DEFAULT; + size_t page_size = sysconf(_SC_PAGESIZE); + struct ksm_sysfs ksm_sysfs_old; + + while ((opt = getopt(argc, argv, "ha:p:l:")) != -1) { + switch (opt) { + case 'a': + prot = str_to_prot(optarg); + break; + case 'p': + page_count = atol(optarg); + if (page_count <= 0) { + printf("The number of pages must be greater than 0\n"); + return KSFT_FAIL; + } + break; + case 'l': + ksm_scan_limit_sec = atoi(optarg); + if (ksm_scan_limit_sec <= 0) { + printf("Timeout value must be greater than 0\n"); + return KSFT_FAIL; + } + break; + case 'h': + print_help(); + break; + default: + return KSFT_FAIL; + } + } + + if (prot == 0) + prot = str_to_prot(KSM_PROT_STR_DEFAULT); + + if (access(KSM_SYSFS_PATH, F_OK)) { + printf("Config KSM not enabled\n"); + return KSFT_SKIP; + } + + if (ksm_save_def(&ksm_sysfs_old)) { + printf("Cannot save default tunables\n"); + return KSFT_FAIL; + } + + if (ksm_write_sysfs(KSM_FP("run"), 2) || + ksm_write_sysfs(KSM_FP("sleep_millisecs"), 0) || + ksm_write_sysfs(KSM_FP("merge_across_nodes"), 1) || + ksm_write_sysfs(KSM_FP("pages_to_scan"), page_count)) + return KSFT_FAIL; + + ret = check_ksm_merge(MAP_PRIVATE | MAP_ANONYMOUS, prot, page_count, ksm_scan_limit_sec, + page_size); + + if (ksm_restore(&ksm_sysfs_old)) { + printf("Cannot restore default tunables\n"); + return KSFT_FAIL; + } + + return ret; +} diff --git a/tools/testing/selftests/vm/run_vmtests.sh b/tools/testing/selftests/vm/run_vmtests.sh index d09a6b71f1e9..97b6f712134d 100755 --- a/tools/testing/selftests/vm/run_vmtests.sh +++ b/tools/testing/selftests/vm/run_vmtests.sh @@ -377,6 +377,22 @@ else exitcode=1 fi +echo "-------------------------------------------------------" +echo "running KSM MADV_MERGEABLE test with 10 identical pages" +echo "-------------------------------------------------------" +./ksm_tests -p 10 +ret_val=$? + +if [ $ret_val -eq 0 ]; then + echo "[PASS]" +elif [ $ret_val -eq $ksft_skip ]; then + echo "[SKIP]" + exitcode=$ksft_skip +else + echo "[FAIL]" + exitcode=1 +fi + exit $exitcode exit $exitcode -- cgit v1.2.3-58-ga151 From a40c80e348fac4ecff8abcc3fae31e2e84c055d6 Mon Sep 17 00:00:00 2001 From: Zhansaya Bagdauletkyzy Date: Thu, 2 Sep 2021 15:00:42 -0700 Subject: selftests: vm: add KSM unmerge test Add check_ksm_unmerge() function to verify that KSM is properly unmerging shared pages. For this, two duplicate pages are merged first and then their contents are modified. Since they are not identical anymore, the pages must be unmerged and the number of merged pages has to be 0. The test is run as follows: ./ksm_tests -U Link: https://lkml.kernel.org/r/c0f55420440d704d5b094275b4365aa1b2ad46b5.1626252248.git.zhansayabagdaulet@gmail.com Signed-off-by: Zhansaya Bagdauletkyzy Reviewed-by: Pavel Tatashin Reviewed-by: Tyler Hicks Cc: Hugh Dickins Cc: Shuah Khan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- tools/testing/selftests/vm/ksm_tests.c | 72 +++++++++++++++++++++++++++++-- tools/testing/selftests/vm/run_vmtests.sh | 18 +++++++- 2 files changed, 85 insertions(+), 5 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/vm/ksm_tests.c b/tools/testing/selftests/vm/ksm_tests.c index d74d5aa34b16..80302bb8f64c 100644 --- a/tools/testing/selftests/vm/ksm_tests.c +++ b/tools/testing/selftests/vm/ksm_tests.c @@ -23,6 +23,11 @@ struct ksm_sysfs { unsigned long use_zero_pages; }; +enum ksm_test_name { + CHECK_KSM_MERGE, + CHECK_KSM_UNMERGE +}; + static int ksm_write_sysfs(const char *file_path, unsigned long val) { FILE *f = fopen(file_path, "w"); @@ -75,7 +80,12 @@ static int str_to_prot(char *prot_str) static void print_help(void) { - printf("usage: ksm_tests [-h] [-a prot] [-p page_count] [-l timeout]\n"); + printf("usage: ksm_tests [-h] [-a prot] [-p page_count] [-l timeout]\n"); + + printf("Supported :\n" + " -M (page merging)\n" + " -U (page unmerging)\n\n"); + printf(" -a: specify the access protections of pages.\n" " must be of the form [rwx].\n" " Default: %s\n", KSM_PROT_STR_DEFAULT); @@ -239,6 +249,46 @@ err_out: return KSFT_FAIL; } +static int check_ksm_unmerge(int mapping, int prot, int timeout, size_t page_size) +{ + void *map_ptr; + struct timespec start_time; + int page_count = 2; + + if (clock_gettime(CLOCK_MONOTONIC_RAW, &start_time)) { + perror("clock_gettime"); + return KSFT_FAIL; + } + + /* fill pages with the same data and merge them */ + map_ptr = allocate_memory(NULL, prot, mapping, '*', page_size * page_count); + if (!map_ptr) + return KSFT_FAIL; + + if (ksm_merge_pages(map_ptr, page_size * page_count, start_time, timeout)) + goto err_out; + + /* change 1 byte in each of the 2 pages -- KSM must automatically unmerge them */ + memset(map_ptr, '-', 1); + memset(map_ptr + page_size, '+', 1); + + /* get at least 1 scan, so KSM can detect that the pages were modified */ + if (ksm_do_scan(1, start_time, timeout)) + goto err_out; + + /* check that unmerging was successful and 0 pages are currently merged */ + if (assert_ksm_pages_count(0)) { + printf("OK\n"); + munmap(map_ptr, page_size * page_count); + return KSFT_PASS; + } + +err_out: + printf("Not OK\n"); + munmap(map_ptr, page_size * page_count); + return KSFT_FAIL; +} + int main(int argc, char *argv[]) { int ret, opt; @@ -247,8 +297,9 @@ int main(int argc, char *argv[]) long page_count = KSM_PAGE_COUNT_DEFAULT; size_t page_size = sysconf(_SC_PAGESIZE); struct ksm_sysfs ksm_sysfs_old; + int test_name = CHECK_KSM_MERGE; - while ((opt = getopt(argc, argv, "ha:p:l:")) != -1) { + while ((opt = getopt(argc, argv, "ha:p:l:MU")) != -1) { switch (opt) { case 'a': prot = str_to_prot(optarg); @@ -270,6 +321,11 @@ int main(int argc, char *argv[]) case 'h': print_help(); break; + case 'M': + break; + case 'U': + test_name = CHECK_KSM_UNMERGE; + break; default: return KSFT_FAIL; } @@ -294,8 +350,16 @@ int main(int argc, char *argv[]) ksm_write_sysfs(KSM_FP("pages_to_scan"), page_count)) return KSFT_FAIL; - ret = check_ksm_merge(MAP_PRIVATE | MAP_ANONYMOUS, prot, page_count, ksm_scan_limit_sec, - page_size); + switch (test_name) { + case CHECK_KSM_MERGE: + ret = check_ksm_merge(MAP_PRIVATE | MAP_ANONYMOUS, prot, page_count, + ksm_scan_limit_sec, page_size); + break; + case CHECK_KSM_UNMERGE: + ret = check_ksm_unmerge(MAP_PRIVATE | MAP_ANONYMOUS, prot, ksm_scan_limit_sec, + page_size); + break; + } if (ksm_restore(&ksm_sysfs_old)) { printf("Cannot restore default tunables\n"); diff --git a/tools/testing/selftests/vm/run_vmtests.sh b/tools/testing/selftests/vm/run_vmtests.sh index 97b6f712134d..3a23c6b47da2 100755 --- a/tools/testing/selftests/vm/run_vmtests.sh +++ b/tools/testing/selftests/vm/run_vmtests.sh @@ -380,7 +380,23 @@ fi echo "-------------------------------------------------------" echo "running KSM MADV_MERGEABLE test with 10 identical pages" echo "-------------------------------------------------------" -./ksm_tests -p 10 +./ksm_tests -M -p 10 +ret_val=$? + +if [ $ret_val -eq 0 ]; then + echo "[PASS]" +elif [ $ret_val -eq $ksft_skip ]; then + echo "[SKIP]" + exitcode=$ksft_skip +else + echo "[FAIL]" + exitcode=1 +fi + +echo "------------------------" +echo "running KSM unmerge test" +echo "------------------------" +./ksm_tests -U ret_val=$? if [ $ret_val -eq 0 ]; then -- cgit v1.2.3-58-ga151 From 39619982c5be6ed57390f17aabee6bc11e4af37e Mon Sep 17 00:00:00 2001 From: Zhansaya Bagdauletkyzy Date: Thu, 2 Sep 2021 15:00:45 -0700 Subject: selftests: vm: add KSM zero page merging test Add check_ksm_zero_page_merge() function to test that empty pages are being handled properly. For this, several zero pages are allocated and merged using madvise. If use_zero_pages is enabled, the pages must be shared with the special kernel zero pages; otherwise, they are merged as usual duplicate pages. The test is run as follows: ./ksm_tests -Z Link: https://lkml.kernel.org/r/6d0caab00d4bdccf5e3791cb95cf6dfd5eb85e45.1626252248.git.zhansayabagdaulet@gmail.com Signed-off-by: Zhansaya Bagdauletkyzy Reviewed-by: Pavel Tatashin Reviewed-by: Tyler Hicks Cc: Hugh Dickins Cc: Shuah Khan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- tools/testing/selftests/vm/ksm_tests.c | 70 +++++++++++++++++++++++++++++-- tools/testing/selftests/vm/run_vmtests.sh | 32 ++++++++++++++ 2 files changed, 99 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/vm/ksm_tests.c b/tools/testing/selftests/vm/ksm_tests.c index 80302bb8f64c..5843526471e1 100644 --- a/tools/testing/selftests/vm/ksm_tests.c +++ b/tools/testing/selftests/vm/ksm_tests.c @@ -12,6 +12,7 @@ #define KSM_SCAN_LIMIT_SEC_DEFAULT 120 #define KSM_PAGE_COUNT_DEFAULT 10l #define KSM_PROT_STR_DEFAULT "rw" +#define KSM_USE_ZERO_PAGES_DEFAULT false struct ksm_sysfs { unsigned long max_page_sharing; @@ -25,7 +26,8 @@ struct ksm_sysfs { enum ksm_test_name { CHECK_KSM_MERGE, - CHECK_KSM_UNMERGE + CHECK_KSM_UNMERGE, + CHECK_KSM_ZERO_PAGE_MERGE }; static int ksm_write_sysfs(const char *file_path, unsigned long val) @@ -80,10 +82,12 @@ static int str_to_prot(char *prot_str) static void print_help(void) { - printf("usage: ksm_tests [-h] [-a prot] [-p page_count] [-l timeout]\n"); + printf("usage: ksm_tests [-h] [-a prot] [-p page_count] [-l timeout]\n" + "[-z use_zero_pages]\n"); printf("Supported :\n" " -M (page merging)\n" + " -Z (zero pages merging)\n" " -U (page unmerging)\n\n"); printf(" -a: specify the access protections of pages.\n" @@ -93,6 +97,8 @@ static void print_help(void) " Default: %ld\n", KSM_PAGE_COUNT_DEFAULT); printf(" -l: limit the maximum running time (in seconds) for a test.\n" " Default: %d seconds\n", KSM_SCAN_LIMIT_SEC_DEFAULT); + printf(" -z: change use_zero_pages tunable\n" + " Default: %d\n", KSM_USE_ZERO_PAGES_DEFAULT); exit(0); } @@ -289,6 +295,50 @@ err_out: return KSFT_FAIL; } +static int check_ksm_zero_page_merge(int mapping, int prot, long page_count, int timeout, + bool use_zero_pages, size_t page_size) +{ + void *map_ptr; + struct timespec start_time; + + if (clock_gettime(CLOCK_MONOTONIC_RAW, &start_time)) { + perror("clock_gettime"); + return KSFT_FAIL; + } + + if (ksm_write_sysfs(KSM_FP("use_zero_pages"), use_zero_pages)) + return KSFT_FAIL; + + /* fill pages with zero and try to merge them */ + map_ptr = allocate_memory(NULL, prot, mapping, 0, page_size * page_count); + if (!map_ptr) + return KSFT_FAIL; + + if (ksm_merge_pages(map_ptr, page_size * page_count, start_time, timeout)) + goto err_out; + + /* + * verify that the right number of pages are merged: + * 1) if use_zero_pages is set to 1, empty pages are merged + * with the kernel zero page instead of with each other; + * 2) if use_zero_pages is set to 0, empty pages are not treated specially + * and merged as usual. + */ + if (use_zero_pages && !assert_ksm_pages_count(0)) + goto err_out; + else if (!use_zero_pages && !assert_ksm_pages_count(page_count)) + goto err_out; + + printf("OK\n"); + munmap(map_ptr, page_size * page_count); + return KSFT_PASS; + +err_out: + printf("Not OK\n"); + munmap(map_ptr, page_size * page_count); + return KSFT_FAIL; +} + int main(int argc, char *argv[]) { int ret, opt; @@ -298,8 +348,9 @@ int main(int argc, char *argv[]) size_t page_size = sysconf(_SC_PAGESIZE); struct ksm_sysfs ksm_sysfs_old; int test_name = CHECK_KSM_MERGE; + bool use_zero_pages = KSM_USE_ZERO_PAGES_DEFAULT; - while ((opt = getopt(argc, argv, "ha:p:l:MU")) != -1) { + while ((opt = getopt(argc, argv, "ha:p:l:z:MUZ")) != -1) { switch (opt) { case 'a': prot = str_to_prot(optarg); @@ -321,11 +372,20 @@ int main(int argc, char *argv[]) case 'h': print_help(); break; + case 'z': + if (strcmp(optarg, "0") == 0) + use_zero_pages = 0; + else + use_zero_pages = 1; + break; case 'M': break; case 'U': test_name = CHECK_KSM_UNMERGE; break; + case 'Z': + test_name = CHECK_KSM_ZERO_PAGE_MERGE; + break; default: return KSFT_FAIL; } @@ -359,6 +419,10 @@ int main(int argc, char *argv[]) ret = check_ksm_unmerge(MAP_PRIVATE | MAP_ANONYMOUS, prot, ksm_scan_limit_sec, page_size); break; + case CHECK_KSM_ZERO_PAGE_MERGE: + ret = check_ksm_zero_page_merge(MAP_PRIVATE | MAP_ANONYMOUS, prot, page_count, + ksm_scan_limit_sec, use_zero_pages, page_size); + break; } if (ksm_restore(&ksm_sysfs_old)) { diff --git a/tools/testing/selftests/vm/run_vmtests.sh b/tools/testing/selftests/vm/run_vmtests.sh index 3a23c6b47da2..9b4e444fc4ed 100755 --- a/tools/testing/selftests/vm/run_vmtests.sh +++ b/tools/testing/selftests/vm/run_vmtests.sh @@ -409,6 +409,38 @@ else exitcode=1 fi +echo "----------------------------------------------------------" +echo "running KSM test with 10 zero pages and use_zero_pages = 0" +echo "----------------------------------------------------------" +./ksm_tests -Z -p 10 -z 0 +ret_val=$? + +if [ $ret_val -eq 0 ]; then + echo "[PASS]" +elif [ $ret_val -eq $ksft_skip ]; then + echo "[SKIP]" + exitcode=$ksft_skip +else + echo "[FAIL]" + exitcode=1 +fi + +echo "----------------------------------------------------------" +echo "running KSM test with 10 zero pages and use_zero_pages = 1" +echo "----------------------------------------------------------" +./ksm_tests -Z -p 10 -z 1 +ret_val=$? + +if [ $ret_val -eq 0 ]; then + echo "[PASS]" +elif [ $ret_val -eq $ksft_skip ]; then + echo "[SKIP]" + exitcode=$ksft_skip +else + echo "[FAIL]" + exitcode=1 +fi + exit $exitcode exit $exitcode -- cgit v1.2.3-58-ga151 From 82e717ad35018ce59ba1b66297dfd898b2a1a03f Mon Sep 17 00:00:00 2001 From: Zhansaya Bagdauletkyzy Date: Thu, 2 Sep 2021 15:00:48 -0700 Subject: selftests: vm: add KSM merging across nodes test Add check_ksm_numa_merge() function to test that pages in different NUMA nodes are being handled properly. First, two duplicate pages are allocated in two separate NUMA nodes using the libnuma library. Since there is one unique page in each node, with merge_across_nodes = 0, there won't be any shared pages. If merge_across_nodes is set to 1, the pages will be treated as usual duplicate pages and will be merged. If NUMA config is not enabled or the number of NUMA nodes is less than two, then the test is skipped. The test is run as follows: ./ksm_tests -N Link: https://lkml.kernel.org/r/071c17b5b04ebb0dfeba137acc495e5dd9d2a719.1626252248.git.zhansayabagdaulet@gmail.com Signed-off-by: Zhansaya Bagdauletkyzy Reviewed-by: Pavel Tatashin Reviewed-by: Tyler Hicks Cc: Hugh Dickins Cc: Shuah Khan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- tools/testing/selftests/vm/Makefile | 2 + tools/testing/selftests/vm/ksm_tests.c | 88 +++++++++++++++++++++++++++++-- tools/testing/selftests/vm/run_vmtests.sh | 32 +++++++++++ 3 files changed, 119 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/vm/Makefile b/tools/testing/selftests/vm/Makefile index e6f22a801b71..d9605bd10f2d 100644 --- a/tools/testing/selftests/vm/Makefile +++ b/tools/testing/selftests/vm/Makefile @@ -146,6 +146,8 @@ $(OUTPUT)/hmm-tests: local_config.h # HMM_EXTRA_LIBS may get set in local_config.mk, or it may be left empty. $(OUTPUT)/hmm-tests: LDLIBS += $(HMM_EXTRA_LIBS) +$(OUTPUT)/ksm_tests: LDLIBS += -lnuma + local_config.mk local_config.h: check_config.sh /bin/sh ./check_config.sh $(CC) diff --git a/tools/testing/selftests/vm/ksm_tests.c b/tools/testing/selftests/vm/ksm_tests.c index 5843526471e1..cdeb4a028538 100644 --- a/tools/testing/selftests/vm/ksm_tests.c +++ b/tools/testing/selftests/vm/ksm_tests.c @@ -4,6 +4,7 @@ #include #include #include +#include #include "../kselftest.h" @@ -13,6 +14,7 @@ #define KSM_PAGE_COUNT_DEFAULT 10l #define KSM_PROT_STR_DEFAULT "rw" #define KSM_USE_ZERO_PAGES_DEFAULT false +#define KSM_MERGE_ACROSS_NODES_DEFAULT true struct ksm_sysfs { unsigned long max_page_sharing; @@ -27,7 +29,8 @@ struct ksm_sysfs { enum ksm_test_name { CHECK_KSM_MERGE, CHECK_KSM_UNMERGE, - CHECK_KSM_ZERO_PAGE_MERGE + CHECK_KSM_ZERO_PAGE_MERGE, + CHECK_KSM_NUMA_MERGE }; static int ksm_write_sysfs(const char *file_path, unsigned long val) @@ -83,11 +86,12 @@ static int str_to_prot(char *prot_str) static void print_help(void) { printf("usage: ksm_tests [-h] [-a prot] [-p page_count] [-l timeout]\n" - "[-z use_zero_pages]\n"); + "[-z use_zero_pages] [-m merge_across_nodes]\n"); printf("Supported :\n" " -M (page merging)\n" " -Z (zero pages merging)\n" + " -N (merging of pages in different NUMA nodes)\n" " -U (page unmerging)\n\n"); printf(" -a: specify the access protections of pages.\n" @@ -99,6 +103,8 @@ static void print_help(void) " Default: %d seconds\n", KSM_SCAN_LIMIT_SEC_DEFAULT); printf(" -z: change use_zero_pages tunable\n" " Default: %d\n", KSM_USE_ZERO_PAGES_DEFAULT); + printf(" -m: change merge_across_nodes tunable\n" + " Default: %d\n", KSM_MERGE_ACROSS_NODES_DEFAULT); exit(0); } @@ -339,6 +345,68 @@ err_out: return KSFT_FAIL; } +static int check_ksm_numa_merge(int mapping, int prot, int timeout, bool merge_across_nodes, + size_t page_size) +{ + void *numa1_map_ptr, *numa2_map_ptr; + struct timespec start_time; + int page_count = 2; + + if (clock_gettime(CLOCK_MONOTONIC_RAW, &start_time)) { + perror("clock_gettime"); + return KSFT_FAIL; + } + + if (numa_available() < 0) { + perror("NUMA support not enabled"); + return KSFT_SKIP; + } + if (numa_max_node() < 1) { + printf("At least 2 NUMA nodes must be available\n"); + return KSFT_SKIP; + } + if (ksm_write_sysfs(KSM_FP("merge_across_nodes"), merge_across_nodes)) + return KSFT_FAIL; + + /* allocate 2 pages in 2 different NUMA nodes and fill them with the same data */ + numa1_map_ptr = numa_alloc_onnode(page_size, 0); + numa2_map_ptr = numa_alloc_onnode(page_size, 1); + if (!numa1_map_ptr || !numa2_map_ptr) { + perror("numa_alloc_onnode"); + return KSFT_FAIL; + } + + memset(numa1_map_ptr, '*', page_size); + memset(numa2_map_ptr, '*', page_size); + + /* try to merge the pages */ + if (ksm_merge_pages(numa1_map_ptr, page_size, start_time, timeout) || + ksm_merge_pages(numa2_map_ptr, page_size, start_time, timeout)) + goto err_out; + + /* + * verify that the right number of pages are merged: + * 1) if merge_across_nodes was enabled, 2 duplicate pages will be merged; + * 2) if merge_across_nodes = 0, there must be 0 merged pages, since there is + * only 1 unique page in each node and they can't be shared. + */ + if (merge_across_nodes && !assert_ksm_pages_count(page_count)) + goto err_out; + else if (!merge_across_nodes && !assert_ksm_pages_count(0)) + goto err_out; + + numa_free(numa1_map_ptr, page_size); + numa_free(numa2_map_ptr, page_size); + printf("OK\n"); + return KSFT_PASS; + +err_out: + numa_free(numa1_map_ptr, page_size); + numa_free(numa2_map_ptr, page_size); + printf("Not OK\n"); + return KSFT_FAIL; +} + int main(int argc, char *argv[]) { int ret, opt; @@ -349,8 +417,9 @@ int main(int argc, char *argv[]) struct ksm_sysfs ksm_sysfs_old; int test_name = CHECK_KSM_MERGE; bool use_zero_pages = KSM_USE_ZERO_PAGES_DEFAULT; + bool merge_across_nodes = KSM_MERGE_ACROSS_NODES_DEFAULT; - while ((opt = getopt(argc, argv, "ha:p:l:z:MUZ")) != -1) { + while ((opt = getopt(argc, argv, "ha:p:l:z:m:MUZN")) != -1) { switch (opt) { case 'a': prot = str_to_prot(optarg); @@ -378,6 +447,12 @@ int main(int argc, char *argv[]) else use_zero_pages = 1; break; + case 'm': + if (strcmp(optarg, "0") == 0) + merge_across_nodes = 0; + else + merge_across_nodes = 1; + break; case 'M': break; case 'U': @@ -386,6 +461,9 @@ int main(int argc, char *argv[]) case 'Z': test_name = CHECK_KSM_ZERO_PAGE_MERGE; break; + case 'N': + test_name = CHECK_KSM_NUMA_MERGE; + break; default: return KSFT_FAIL; } @@ -423,6 +501,10 @@ int main(int argc, char *argv[]) ret = check_ksm_zero_page_merge(MAP_PRIVATE | MAP_ANONYMOUS, prot, page_count, ksm_scan_limit_sec, use_zero_pages, page_size); break; + case CHECK_KSM_NUMA_MERGE: + ret = check_ksm_numa_merge(MAP_PRIVATE | MAP_ANONYMOUS, prot, ksm_scan_limit_sec, + merge_across_nodes, page_size); + break; } if (ksm_restore(&ksm_sysfs_old)) { diff --git a/tools/testing/selftests/vm/run_vmtests.sh b/tools/testing/selftests/vm/run_vmtests.sh index 9b4e444fc4ed..45e803af7c77 100755 --- a/tools/testing/selftests/vm/run_vmtests.sh +++ b/tools/testing/selftests/vm/run_vmtests.sh @@ -441,6 +441,38 @@ else exitcode=1 fi +echo "-------------------------------------------------------------" +echo "running KSM test with 2 NUMA nodes and merge_across_nodes = 1" +echo "-------------------------------------------------------------" +./ksm_tests -N -m 1 +ret_val=$? + +if [ $ret_val -eq 0 ]; then + echo "[PASS]" +elif [ $ret_val -eq $ksft_skip ]; then + echo "[SKIP]" + exitcode=$ksft_skip +else + echo "[FAIL]" + exitcode=1 +fi + +echo "-------------------------------------------------------------" +echo "running KSM test with 2 NUMA nodes and merge_across_nodes = 0" +echo "-------------------------------------------------------------" +./ksm_tests -N -m 0 +ret_val=$? + +if [ $ret_val -eq 0 ]; then + echo "[PASS]" +elif [ $ret_val -eq $ksft_skip ]; then + echo "[SKIP]" + exitcode=$ksft_skip +else + echo "[FAIL]" + exitcode=1 +fi + exit $exitcode exit $exitcode -- cgit v1.2.3-58-ga151 From 9e7cb94ca218816bfd51e07b50bcfeadd3166d42 Mon Sep 17 00:00:00 2001 From: Zhansaya Bagdauletkyzy Date: Thu, 2 Sep 2021 15:00:54 -0700 Subject: selftests: vm: add KSM merging time test Patch series "add KSM performance tests", v3. Extend KSM self tests with a performance benchmark. These tests are not part of regular regression testing, as they are mainly intended to be used by developers making changes to the memory management subsystem. This patch (of 2): Add ksm_merge_time() function to determine speed and time needed for merging. The total spent time is shown in seconds while speed is in MiB/s. User must specify the size of duplicated memory area (in MiB) before running the test. The test is run as follows: ./ksm_tests -P -s 100 The output: Total size: 100 MiB Total time: 0.201106786 s Average speed: 497.248 MiB/s Link: https://lkml.kernel.org/r/cover.1629386192.git.zhansayabagdaulet@gmail.com Link: https://lkml.kernel.org/r/318b946ac80cc9205c89d0962048378f7ce0705b.1629386192.git.zhansayabagdaulet@gmail.com Signed-off-by: Zhansaya Bagdauletkyzy Reviewed-by: Tyler Hicks Reviewed-by: Pavel Tatashin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- tools/testing/selftests/vm/ksm_tests.c | 74 ++++++++++++++++++++++++++++++++-- 1 file changed, 70 insertions(+), 4 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/vm/ksm_tests.c b/tools/testing/selftests/vm/ksm_tests.c index cdeb4a028538..432dfe615e50 100644 --- a/tools/testing/selftests/vm/ksm_tests.c +++ b/tools/testing/selftests/vm/ksm_tests.c @@ -7,6 +7,7 @@ #include #include "../kselftest.h" +#include "../../../../include/vdso/time64.h" #define KSM_SYSFS_PATH "/sys/kernel/mm/ksm/" #define KSM_FP(s) (KSM_SYSFS_PATH s) @@ -15,6 +16,7 @@ #define KSM_PROT_STR_DEFAULT "rw" #define KSM_USE_ZERO_PAGES_DEFAULT false #define KSM_MERGE_ACROSS_NODES_DEFAULT true +#define MB (1ul << 20) struct ksm_sysfs { unsigned long max_page_sharing; @@ -30,7 +32,8 @@ enum ksm_test_name { CHECK_KSM_MERGE, CHECK_KSM_UNMERGE, CHECK_KSM_ZERO_PAGE_MERGE, - CHECK_KSM_NUMA_MERGE + CHECK_KSM_NUMA_MERGE, + KSM_MERGE_TIME }; static int ksm_write_sysfs(const char *file_path, unsigned long val) @@ -86,13 +89,16 @@ static int str_to_prot(char *prot_str) static void print_help(void) { printf("usage: ksm_tests [-h] [-a prot] [-p page_count] [-l timeout]\n" - "[-z use_zero_pages] [-m merge_across_nodes]\n"); + "[-z use_zero_pages] [-m merge_across_nodes] [-s size]\n"); printf("Supported :\n" " -M (page merging)\n" " -Z (zero pages merging)\n" " -N (merging of pages in different NUMA nodes)\n" - " -U (page unmerging)\n\n"); + " -U (page unmerging)\n" + " -P evaluate merging time and speed.\n" + " For this test, the size of duplicated memory area (in MiB)\n" + " must be provided using -s option\n\n"); printf(" -a: specify the access protections of pages.\n" " must be of the form [rwx].\n" @@ -105,6 +111,7 @@ static void print_help(void) " Default: %d\n", KSM_USE_ZERO_PAGES_DEFAULT); printf(" -m: change merge_across_nodes tunable\n" " Default: %d\n", KSM_MERGE_ACROSS_NODES_DEFAULT); + printf(" -s: the size of duplicated memory area (in MiB)\n"); exit(0); } @@ -407,6 +414,47 @@ err_out: return KSFT_FAIL; } +static int ksm_merge_time(int mapping, int prot, int timeout, size_t map_size) +{ + void *map_ptr; + struct timespec start_time, end_time; + unsigned long scan_time_ns; + + map_size *= MB; + + map_ptr = allocate_memory(NULL, prot, mapping, '*', map_size); + if (!map_ptr) + return KSFT_FAIL; + + if (clock_gettime(CLOCK_MONOTONIC_RAW, &start_time)) { + perror("clock_gettime"); + goto err_out; + } + if (ksm_merge_pages(map_ptr, map_size, start_time, timeout)) + goto err_out; + if (clock_gettime(CLOCK_MONOTONIC_RAW, &end_time)) { + perror("clock_gettime"); + goto err_out; + } + + scan_time_ns = (end_time.tv_sec - start_time.tv_sec) * NSEC_PER_SEC + + (end_time.tv_nsec - start_time.tv_nsec); + + printf("Total size: %lu MiB\n", map_size / MB); + printf("Total time: %ld.%09ld s\n", scan_time_ns / NSEC_PER_SEC, + scan_time_ns % NSEC_PER_SEC); + printf("Average speed: %.3f MiB/s\n", (map_size / MB) / + ((double)scan_time_ns / NSEC_PER_SEC)); + + munmap(map_ptr, map_size); + return KSFT_PASS; + +err_out: + printf("Not OK\n"); + munmap(map_ptr, map_size); + return KSFT_FAIL; +} + int main(int argc, char *argv[]) { int ret, opt; @@ -418,8 +466,9 @@ int main(int argc, char *argv[]) int test_name = CHECK_KSM_MERGE; bool use_zero_pages = KSM_USE_ZERO_PAGES_DEFAULT; bool merge_across_nodes = KSM_MERGE_ACROSS_NODES_DEFAULT; + long size_MB = 0; - while ((opt = getopt(argc, argv, "ha:p:l:z:m:MUZN")) != -1) { + while ((opt = getopt(argc, argv, "ha:p:l:z:m:s:MUZNP")) != -1) { switch (opt) { case 'a': prot = str_to_prot(optarg); @@ -453,6 +502,12 @@ int main(int argc, char *argv[]) else merge_across_nodes = 1; break; + case 's': + size_MB = atoi(optarg); + if (size_MB <= 0) { + printf("Size must be greater than 0\n"); + return KSFT_FAIL; + } case 'M': break; case 'U': @@ -464,6 +519,9 @@ int main(int argc, char *argv[]) case 'N': test_name = CHECK_KSM_NUMA_MERGE; break; + case 'P': + test_name = KSM_MERGE_TIME; + break; default: return KSFT_FAIL; } @@ -505,6 +563,14 @@ int main(int argc, char *argv[]) ret = check_ksm_numa_merge(MAP_PRIVATE | MAP_ANONYMOUS, prot, ksm_scan_limit_sec, merge_across_nodes, page_size); break; + case KSM_MERGE_TIME: + if (size_MB == 0) { + printf("Option '-s' is required.\n"); + return KSFT_FAIL; + } + ret = ksm_merge_time(MAP_PRIVATE | MAP_ANONYMOUS, prot, ksm_scan_limit_sec, + size_MB); + break; } if (ksm_restore(&ksm_sysfs_old)) { -- cgit v1.2.3-58-ga151 From 924a11bd1623787c6604590202e0920eb572fa42 Mon Sep 17 00:00:00 2001 From: Zhansaya Bagdauletkyzy Date: Thu, 2 Sep 2021 15:00:57 -0700 Subject: selftests: vm: add COW time test for KSM pages Since merged pages are copied every time they need to be modified, the write access time is different between shared and non-shared pages. Add ksm_cow_time() function which evaluates latency of these COW breaks. First, 4000 pages are allocated and the time, required to modify 1 byte in every other page, is measured. After this, the pages are merged into 2000 pairs and in each pair, 1 page is modified (i.e. they are decoupled) to detect COW breaks. The time needed to break COW of merged pages is then compared with performance of non-shared pages. The test is run as follows: ./ksm_tests -C The output: Total size: 15 MiB Not merged pages: Total time: 0.002185489 s Average speed: 3202.945 MiB/s Merged pages: Total time: 0.004386872 s Average speed: 1595.670 MiB/s Link: https://lkml.kernel.org/r/1d03ee0d1b341959d4b61672c6401d498bff5652.1629386192.git.zhansayabagdaulet@gmail.com Signed-off-by: Zhansaya Bagdauletkyzy Reviewed-by: Tyler Hicks Reviewed-by: Pavel Tatashin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- tools/testing/selftests/vm/ksm_tests.c | 86 ++++++++++++++++++++++++++++++++-- 1 file changed, 83 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/vm/ksm_tests.c b/tools/testing/selftests/vm/ksm_tests.c index 432dfe615e50..b61dcdb44c5b 100644 --- a/tools/testing/selftests/vm/ksm_tests.c +++ b/tools/testing/selftests/vm/ksm_tests.c @@ -33,7 +33,8 @@ enum ksm_test_name { CHECK_KSM_UNMERGE, CHECK_KSM_ZERO_PAGE_MERGE, CHECK_KSM_NUMA_MERGE, - KSM_MERGE_TIME + KSM_MERGE_TIME, + KSM_COW_TIME }; static int ksm_write_sysfs(const char *file_path, unsigned long val) @@ -98,7 +99,8 @@ static void print_help(void) " -U (page unmerging)\n" " -P evaluate merging time and speed.\n" " For this test, the size of duplicated memory area (in MiB)\n" - " must be provided using -s option\n\n"); + " must be provided using -s option\n" + " -C evaluate the time required to break COW of merged pages.\n\n"); printf(" -a: specify the access protections of pages.\n" " must be of the form [rwx].\n" @@ -455,6 +457,77 @@ err_out: return KSFT_FAIL; } +static int ksm_cow_time(int mapping, int prot, int timeout, size_t page_size) +{ + void *map_ptr; + struct timespec start_time, end_time; + unsigned long cow_time_ns; + + /* page_count must be less than 2*page_size */ + size_t page_count = 4000; + + map_ptr = allocate_memory(NULL, prot, mapping, '*', page_size * page_count); + if (!map_ptr) + return KSFT_FAIL; + + if (clock_gettime(CLOCK_MONOTONIC_RAW, &start_time)) { + perror("clock_gettime"); + return KSFT_FAIL; + } + for (size_t i = 0; i < page_count - 1; i = i + 2) + memset(map_ptr + page_size * i, '-', 1); + if (clock_gettime(CLOCK_MONOTONIC_RAW, &end_time)) { + perror("clock_gettime"); + return KSFT_FAIL; + } + + cow_time_ns = (end_time.tv_sec - start_time.tv_sec) * NSEC_PER_SEC + + (end_time.tv_nsec - start_time.tv_nsec); + + printf("Total size: %lu MiB\n\n", (page_size * page_count) / MB); + printf("Not merged pages:\n"); + printf("Total time: %ld.%09ld s\n", cow_time_ns / NSEC_PER_SEC, + cow_time_ns % NSEC_PER_SEC); + printf("Average speed: %.3f MiB/s\n\n", ((page_size * (page_count / 2)) / MB) / + ((double)cow_time_ns / NSEC_PER_SEC)); + + /* Create 2000 pairs of duplicate pages */ + for (size_t i = 0; i < page_count - 1; i = i + 2) { + memset(map_ptr + page_size * i, '+', i / 2 + 1); + memset(map_ptr + page_size * (i + 1), '+', i / 2 + 1); + } + if (ksm_merge_pages(map_ptr, page_size * page_count, start_time, timeout)) + goto err_out; + + if (clock_gettime(CLOCK_MONOTONIC_RAW, &start_time)) { + perror("clock_gettime"); + goto err_out; + } + for (size_t i = 0; i < page_count - 1; i = i + 2) + memset(map_ptr + page_size * i, '-', 1); + if (clock_gettime(CLOCK_MONOTONIC_RAW, &end_time)) { + perror("clock_gettime"); + goto err_out; + } + + cow_time_ns = (end_time.tv_sec - start_time.tv_sec) * NSEC_PER_SEC + + (end_time.tv_nsec - start_time.tv_nsec); + + printf("Merged pages:\n"); + printf("Total time: %ld.%09ld s\n", cow_time_ns / NSEC_PER_SEC, + cow_time_ns % NSEC_PER_SEC); + printf("Average speed: %.3f MiB/s\n", ((page_size * (page_count / 2)) / MB) / + ((double)cow_time_ns / NSEC_PER_SEC)); + + munmap(map_ptr, page_size * page_count); + return KSFT_PASS; + +err_out: + printf("Not OK\n"); + munmap(map_ptr, page_size * page_count); + return KSFT_FAIL; +} + int main(int argc, char *argv[]) { int ret, opt; @@ -468,7 +541,7 @@ int main(int argc, char *argv[]) bool merge_across_nodes = KSM_MERGE_ACROSS_NODES_DEFAULT; long size_MB = 0; - while ((opt = getopt(argc, argv, "ha:p:l:z:m:s:MUZNP")) != -1) { + while ((opt = getopt(argc, argv, "ha:p:l:z:m:s:MUZNPC")) != -1) { switch (opt) { case 'a': prot = str_to_prot(optarg); @@ -522,6 +595,9 @@ int main(int argc, char *argv[]) case 'P': test_name = KSM_MERGE_TIME; break; + case 'C': + test_name = KSM_COW_TIME; + break; default: return KSFT_FAIL; } @@ -571,6 +647,10 @@ int main(int argc, char *argv[]) ret = ksm_merge_time(MAP_PRIVATE | MAP_ANONYMOUS, prot, ksm_scan_limit_sec, size_MB); break; + case KSM_COW_TIME: + ret = ksm_cow_time(MAP_PRIVATE | MAP_ANONYMOUS, prot, ksm_scan_limit_sec, + page_size); + break; } if (ksm_restore(&ksm_sysfs_old)) { -- cgit v1.2.3-58-ga151