From 22722799de869912fb541c2c85b51cec4226c614 Mon Sep 17 00:00:00 2001 From: Pavel Machek Date: Wed, 14 Dec 2016 15:06:18 -0800 Subject: ktest.pl: fix english Ajdust spelling to more common "mandatory". Variant "mandidory" is certainly wrong. Link: http://lkml.kernel.org/r/20161011073003.GA19476@amd Signed-off-by: Pavel Machek Acked-by: Steven Rostedt Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- tools/testing/ktest/ktest.pl | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'tools') diff --git a/tools/testing/ktest/ktest.pl b/tools/testing/ktest/ktest.pl index d08e214ec6e7..be93ab02b490 100755 --- a/tools/testing/ktest/ktest.pl +++ b/tools/testing/ktest/ktest.pl @@ -719,14 +719,14 @@ sub set_value { if ($buildonly && $lvalue =~ /^TEST_TYPE(\[.*\])?$/ && $prvalue ne "build") { # Note if a test is something other than build, then we - # will need other manditory options. + # will need other mandatory options. if ($prvalue ne "install") { # for bisect, we need to check BISECT_TYPE if ($prvalue ne "bisect") { $buildonly = 0; } } else { - # install still limits some manditory options. + # install still limits some mandatory options. $buildonly = 2; } } @@ -735,7 +735,7 @@ sub set_value { if ($prvalue ne "install") { $buildonly = 0; } else { - # install still limits some manditory options. + # install still limits some mandatory options. $buildonly = 2; } } @@ -3989,7 +3989,7 @@ sub make_min_config { } } - # Save off all the current mandidory configs + # Save off all the current mandatory configs open (OUT, ">$temp_config") or die "Can't write to $temp_config"; foreach my $config (keys %keep_configs) { -- cgit v1.2.3-58-ga151 From ebb9a9aedb95f697673a96a26ff0322a13676381 Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Wed, 14 Dec 2016 15:07:56 -0800 Subject: tools: add WARN_ON_ONCE Patch series "Radix tree patches for 4.10", v3. Mostly these are improvements; the only bug fixes in here relate to multiorder entries (which are unused in the 4.9 tree). This patch (of 32): The radix tree uses its own buggy WARN_ON_ONCE. Replace it with the definition from asm-generic/bug.h Link: http://lkml.kernel.org/r/1480369871-5271-37-git-send-email-mawilcox@linuxonhyperv.com Signed-off-by: Matthew Wilcox Tested-by: Kirill A. Shutemov Cc: Konstantin Khlebnikov Cc: Ross Zwisler Cc: Matthew Wilcox Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- tools/include/asm/bug.h | 11 +++++++++++ tools/testing/radix-tree/Makefile | 2 +- tools/testing/radix-tree/linux/bug.h | 2 +- tools/testing/radix-tree/linux/types.h | 2 -- 4 files changed, 13 insertions(+), 4 deletions(-) (limited to 'tools') diff --git a/tools/include/asm/bug.h b/tools/include/asm/bug.h index 9e5f4846967f..beda1a884b50 100644 --- a/tools/include/asm/bug.h +++ b/tools/include/asm/bug.h @@ -12,6 +12,17 @@ unlikely(__ret_warn_on); \ }) +#define WARN_ON_ONCE(condition) ({ \ + static int __warned; \ + int __ret_warn_once = !!(condition); \ + \ + if (unlikely(__ret_warn_once && !__warned)) { \ + __warned = true; \ + WARN_ON(1); \ + } \ + unlikely(__ret_warn_once); \ +}) + #define WARN_ONCE(condition, format...) ({ \ static int __warned; \ int __ret_warn_once = !!(condition); \ diff --git a/tools/testing/radix-tree/Makefile b/tools/testing/radix-tree/Makefile index f2e07f2fd4b4..3c338dc3b162 100644 --- a/tools/testing/radix-tree/Makefile +++ b/tools/testing/radix-tree/Makefile @@ -1,5 +1,5 @@ -CFLAGS += -I. -g -O2 -Wall -D_LGPL_SOURCE +CFLAGS += -I. -I../../include -g -O2 -Wall -D_LGPL_SOURCE LDFLAGS += -lpthread -lurcu TARGETS = main OFILES = main.o radix-tree.o linux.o test.o tag_check.o find_next_bit.o \ diff --git a/tools/testing/radix-tree/linux/bug.h b/tools/testing/radix-tree/linux/bug.h index ccbe444977df..23b8ed52f8c8 100644 --- a/tools/testing/radix-tree/linux/bug.h +++ b/tools/testing/radix-tree/linux/bug.h @@ -1 +1 @@ -#define WARN_ON_ONCE(x) assert(x) +#include "asm/bug.h" diff --git a/tools/testing/radix-tree/linux/types.h b/tools/testing/radix-tree/linux/types.h index faa0b6ff9ca8..8491d89873bb 100644 --- a/tools/testing/radix-tree/linux/types.h +++ b/tools/testing/radix-tree/linux/types.h @@ -6,8 +6,6 @@ #define __rcu #define __read_mostly -#define BITS_PER_LONG (sizeof(long) * 8) - static inline void INIT_LIST_HEAD(struct list_head *list) { list->next = list; -- cgit v1.2.3-58-ga151 From 31023cd66468359790beb046b6808fe0444672a2 Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Wed, 14 Dec 2016 15:07:59 -0800 Subject: radix tree test suite: allow GFP_ATOMIC allocations to fail In order to test the preload code, it is necessary to fail GFP_ATOMIC allocations, which requires defining GFP_KERNEL and GFP_ATOMIC properly. Remove the obsolete __GFP_WAIT and copy the definitions of the __GFP flags which are used from the kernel include files. We also need the real definition of gfpflags_allow_blocking() to persuade the radix tree to actually use its preallocated nodes. Link: http://lkml.kernel.org/r/1480369871-5271-38-git-send-email-mawilcox@linuxonhyperv.com Signed-off-by: Matthew Wilcox Tested-by: Kirill A. Shutemov Cc: Konstantin Khlebnikov Cc: Ross Zwisler Cc: Matthew Wilcox Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- tools/testing/radix-tree/linux.c | 7 ++++++- tools/testing/radix-tree/linux/gfp.h | 22 +++++++++++++++++++--- tools/testing/radix-tree/linux/slab.h | 5 ----- 3 files changed, 25 insertions(+), 9 deletions(-) (limited to 'tools') diff --git a/tools/testing/radix-tree/linux.c b/tools/testing/radix-tree/linux.c index 154823737b20..3cfb04e98e2f 100644 --- a/tools/testing/radix-tree/linux.c +++ b/tools/testing/radix-tree/linux.c @@ -33,7 +33,12 @@ mempool_t *mempool_create(int min_nr, mempool_alloc_t *alloc_fn, void *kmem_cache_alloc(struct kmem_cache *cachep, int flags) { - void *ret = malloc(cachep->size); + void *ret; + + if (flags & __GFP_NOWARN) + return NULL; + + ret = malloc(cachep->size); if (cachep->ctor) cachep->ctor(ret); uatomic_inc(&nr_allocated); diff --git a/tools/testing/radix-tree/linux/gfp.h b/tools/testing/radix-tree/linux/gfp.h index 5201b915f631..5b09b2ce6c33 100644 --- a/tools/testing/radix-tree/linux/gfp.h +++ b/tools/testing/radix-tree/linux/gfp.h @@ -3,8 +3,24 @@ #define __GFP_BITS_SHIFT 26 #define __GFP_BITS_MASK ((gfp_t)((1 << __GFP_BITS_SHIFT) - 1)) -#define __GFP_WAIT 1 -#define __GFP_ACCOUNT 0 -#define __GFP_NOWARN 0 + +#define __GFP_HIGH 0x20u +#define __GFP_IO 0x40u +#define __GFP_FS 0x80u +#define __GFP_NOWARN 0x200u +#define __GFP_ATOMIC 0x80000u +#define __GFP_ACCOUNT 0x100000u +#define __GFP_DIRECT_RECLAIM 0x400000u +#define __GFP_KSWAPD_RECLAIM 0x2000000u + +#define __GFP_RECLAIM (__GFP_DIRECT_RECLAIM|__GFP_KSWAPD_RECLAIM) + +#define GFP_ATOMIC (__GFP_HIGH|__GFP_ATOMIC|__GFP_KSWAPD_RECLAIM) +#define GFP_KERNEL (__GFP_RECLAIM | __GFP_IO | __GFP_FS) + +static inline bool gfpflags_allow_blocking(const gfp_t gfp_flags) +{ + return !!(gfp_flags & __GFP_DIRECT_RECLAIM); +} #endif diff --git a/tools/testing/radix-tree/linux/slab.h b/tools/testing/radix-tree/linux/slab.h index 6d5a34770fd4..452e2bf502e3 100644 --- a/tools/testing/radix-tree/linux/slab.h +++ b/tools/testing/radix-tree/linux/slab.h @@ -7,11 +7,6 @@ #define SLAB_PANIC 2 #define SLAB_RECLAIM_ACCOUNT 0x00020000UL /* Objects are reclaimable */ -static inline int gfpflags_allow_blocking(gfp_t mask) -{ - return 1; -} - struct kmem_cache { int size; void (*ctor)(void *); -- cgit v1.2.3-58-ga151 From 847d357635ce4c63b8901ab81333586a0f115fa5 Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Wed, 14 Dec 2016 15:08:02 -0800 Subject: radix tree test suite: track preempt_count Rather than simply NOP out preempt_enable() and preempt_disable(), keep track of preempt_count and display it regularly in case either the test suite or the code under test is forgetting to balance the enables & disables. Only found a test-case that was forgetting to re-enable preemption, but it's a possibility worth checking. Link: http://lkml.kernel.org/r/1480369871-5271-39-git-send-email-mawilcox@linuxonhyperv.com Signed-off-by: Matthew Wilcox Tested-by: Kirill A. Shutemov Cc: Konstantin Khlebnikov Cc: Ross Zwisler Cc: Matthew Wilcox Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- tools/testing/radix-tree/linux.c | 1 + tools/testing/radix-tree/linux/preempt.h | 6 +++--- tools/testing/radix-tree/main.c | 30 ++++++++++++++++++++---------- 3 files changed, 24 insertions(+), 13 deletions(-) (limited to 'tools') diff --git a/tools/testing/radix-tree/linux.c b/tools/testing/radix-tree/linux.c index 3cfb04e98e2f..1f32a16a3848 100644 --- a/tools/testing/radix-tree/linux.c +++ b/tools/testing/radix-tree/linux.c @@ -9,6 +9,7 @@ #include int nr_allocated; +int preempt_count; void *mempool_alloc(mempool_t *pool, int gfp_mask) { diff --git a/tools/testing/radix-tree/linux/preempt.h b/tools/testing/radix-tree/linux/preempt.h index 6210672e3baa..65c04c226965 100644 --- a/tools/testing/radix-tree/linux/preempt.h +++ b/tools/testing/radix-tree/linux/preempt.h @@ -1,4 +1,4 @@ -/* */ +extern int preempt_count; -#define preempt_disable() do { } while (0) -#define preempt_enable() do { } while (0) +#define preempt_disable() uatomic_inc(&preempt_count) +#define preempt_enable() uatomic_dec(&preempt_count) diff --git a/tools/testing/radix-tree/main.c b/tools/testing/radix-tree/main.c index daa9010693e8..64ffe67605d4 100644 --- a/tools/testing/radix-tree/main.c +++ b/tools/testing/radix-tree/main.c @@ -293,27 +293,36 @@ static void single_thread_tests(bool long_run) { int i; - printf("starting single_thread_tests: %d allocated\n", nr_allocated); + printf("starting single_thread_tests: %d allocated, preempt %d\n", + nr_allocated, preempt_count); multiorder_checks(); - printf("after multiorder_check: %d allocated\n", nr_allocated); + printf("after multiorder_check: %d allocated, preempt %d\n", + nr_allocated, preempt_count); locate_check(); - printf("after locate_check: %d allocated\n", nr_allocated); + printf("after locate_check: %d allocated, preempt %d\n", + nr_allocated, preempt_count); tag_check(); - printf("after tag_check: %d allocated\n", nr_allocated); + printf("after tag_check: %d allocated, preempt %d\n", + nr_allocated, preempt_count); gang_check(); - printf("after gang_check: %d allocated\n", nr_allocated); + printf("after gang_check: %d allocated, preempt %d\n", + nr_allocated, preempt_count); add_and_check(); - printf("after add_and_check: %d allocated\n", nr_allocated); + printf("after add_and_check: %d allocated, preempt %d\n", + nr_allocated, preempt_count); dynamic_height_check(); - printf("after dynamic_height_check: %d allocated\n", nr_allocated); + printf("after dynamic_height_check: %d allocated, preempt %d\n", + nr_allocated, preempt_count); big_gang_check(long_run); - printf("after big_gang_check: %d allocated\n", nr_allocated); + printf("after big_gang_check: %d allocated, preempt %d\n", + nr_allocated, preempt_count); for (i = 0; i < (long_run ? 2000 : 3); i++) { copy_tag_check(); printf("%d ", i); fflush(stdout); } - printf("after copy_tag_check: %d allocated\n", nr_allocated); + printf("after copy_tag_check: %d allocated, preempt %d\n", + nr_allocated, preempt_count); } int main(int argc, char **argv) @@ -336,7 +345,8 @@ int main(int argc, char **argv) single_thread_tests(long_run); sleep(1); - printf("after sleep(1): %d allocated\n", nr_allocated); + printf("after sleep(1): %d allocated, preempt %d\n", + nr_allocated, preempt_count); rcu_unregister_thread(); exit(0); -- cgit v1.2.3-58-ga151 From 6df5ee786786ddafdddc922344a0b789f5b25fa4 Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Wed, 14 Dec 2016 15:08:05 -0800 Subject: radix tree test suite: free preallocated nodes It can be a source of mild concern when the test suite shows that we're leaking nodes. While poring over the source code looking for leaks can lead to some fascinating bugs being discovered, sometimes the leak is simply that these nodes were preallocated and are sitting on the per-CPU list. Free them by calling the CPU dead callback. Link: http://lkml.kernel.org/r/1480369871-5271-40-git-send-email-mawilcox@linuxonhyperv.com Signed-off-by: Matthew Wilcox Tested-by: Kirill A. Shutemov Cc: Konstantin Khlebnikov Cc: Ross Zwisler Cc: Matthew Wilcox Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- tools/testing/radix-tree/main.c | 3 +++ tools/testing/radix-tree/test.h | 1 + 2 files changed, 4 insertions(+) (limited to 'tools') diff --git a/tools/testing/radix-tree/main.c b/tools/testing/radix-tree/main.c index 64ffe67605d4..52ce1eab0fd3 100644 --- a/tools/testing/radix-tree/main.c +++ b/tools/testing/radix-tree/main.c @@ -344,6 +344,9 @@ int main(int argc, char **argv) iteration_test(); single_thread_tests(long_run); + /* Free any remaining preallocated nodes */ + radix_tree_cpu_dead(0); + sleep(1); printf("after sleep(1): %d allocated, preempt %d\n", nr_allocated, preempt_count); diff --git a/tools/testing/radix-tree/test.h b/tools/testing/radix-tree/test.h index 217fb2403f09..5d2fad05b263 100644 --- a/tools/testing/radix-tree/test.h +++ b/tools/testing/radix-tree/test.h @@ -44,3 +44,4 @@ void radix_tree_dump(struct radix_tree_root *root); int root_tag_get(struct radix_tree_root *root, unsigned int tag); unsigned long node_maxindex(struct radix_tree_node *); unsigned long shift_maxindex(unsigned int shift); +int radix_tree_cpu_dead(unsigned int cpu); -- cgit v1.2.3-58-ga151 From 061ef3936b16edc8f779d403d569392505665ed5 Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Wed, 14 Dec 2016 15:08:08 -0800 Subject: radix tree test suite: make runs more reproducible Instead of reseeding the random number generator every time around the loop in big_gang_check(), seed it at the beginning of execution. Use rand_r() and an independent base seed for each thread in iteration_test() so they don't stomp all over each others state. Since this particular test depends on the kernel scheduler, the iteration test can't be reproduced based purely on the random seed, but at least it won't pollute the other tests. Print the seed, and allow the seed to be specified so that a run which hits a problem can be reproduced. Link: http://lkml.kernel.org/r/1480369871-5271-41-git-send-email-mawilcox@linuxonhyperv.com Signed-off-by: Matthew Wilcox Tested-by: Kirill A. Shutemov Cc: Konstantin Khlebnikov Cc: Ross Zwisler Cc: Matthew Wilcox Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- tools/testing/radix-tree/iteration_check.c | 11 +++++++---- tools/testing/radix-tree/main.c | 9 +++++++-- 2 files changed, 14 insertions(+), 6 deletions(-) (limited to 'tools') diff --git a/tools/testing/radix-tree/iteration_check.c b/tools/testing/radix-tree/iteration_check.c index 9adb8e7415a6..11d570c3fc83 100644 --- a/tools/testing/radix-tree/iteration_check.c +++ b/tools/testing/radix-tree/iteration_check.c @@ -20,6 +20,7 @@ #define TAG 0 static pthread_mutex_t tree_lock = PTHREAD_MUTEX_INITIALIZER; static pthread_t threads[NUM_THREADS]; +static unsigned int seeds[3]; RADIX_TREE(tree, GFP_KERNEL); bool test_complete; @@ -71,7 +72,7 @@ static void *tagged_iteration_fn(void *arg) continue; } - if (rand() % 50 == 0) + if (rand_r(&seeds[0]) % 50 == 0) slot = radix_tree_iter_next(&iter); } rcu_read_unlock(); @@ -111,7 +112,7 @@ static void *untagged_iteration_fn(void *arg) continue; } - if (rand() % 50 == 0) + if (rand_r(&seeds[1]) % 50 == 0) slot = radix_tree_iter_next(&iter); } rcu_read_unlock(); @@ -129,7 +130,7 @@ static void *remove_entries_fn(void *arg) while (!test_complete) { int pgoff; - pgoff = rand() % 100; + pgoff = rand_r(&seeds[2]) % 100; pthread_mutex_lock(&tree_lock); item_delete(&tree, pgoff); @@ -146,9 +147,11 @@ void iteration_test(void) printf("Running iteration tests for 10 seconds\n"); - srand(time(0)); test_complete = false; + for (i = 0; i < 3; i++) + seeds[i] = rand(); + if (pthread_create(&threads[0], NULL, tagged_iteration_fn, NULL)) { perror("pthread_create"); exit(1); diff --git a/tools/testing/radix-tree/main.c b/tools/testing/radix-tree/main.c index 52ce1eab0fd3..2eb694994497 100644 --- a/tools/testing/radix-tree/main.c +++ b/tools/testing/radix-tree/main.c @@ -67,7 +67,6 @@ void big_gang_check(bool long_run) for (i = 0; i < (long_run ? 1000 : 3); i++) { __big_gang_check(); - srand(time(0)); printf("%d ", i); fflush(stdout); } @@ -329,12 +328,18 @@ int main(int argc, char **argv) { bool long_run = false; int opt; + unsigned int seed = time(NULL); - while ((opt = getopt(argc, argv, "l")) != -1) { + while ((opt = getopt(argc, argv, "ls:")) != -1) { if (opt == 'l') long_run = true; + else if (opt == 's') + seed = strtoul(optarg, NULL, 0); } + printf("random seed %u\n", seed); + srand(seed); + rcu_register_thread(); radix_tree_init(); -- cgit v1.2.3-58-ga151 From ba20cd60c97945f0de9fe313f869b3a5855e1503 Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Wed, 14 Dec 2016 15:08:11 -0800 Subject: radix tree test suite: iteration test misuses RCU Each thread needs to register itself with RCU, otherwise the reading thread's read lock has no effect and the freeing thread will free the memory in the tree without waiting for the read lock to be dropped. Link: http://lkml.kernel.org/r/1480369871-5271-42-git-send-email-mawilcox@linuxonhyperv.com Signed-off-by: Matthew Wilcox Tested-by: Kirill A. Shutemov Cc: Konstantin Khlebnikov Cc: Ross Zwisler Cc: Matthew Wilcox Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- tools/testing/radix-tree/iteration_check.c | 28 ++++++++++++++++++++++++++-- 1 file changed, 26 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/testing/radix-tree/iteration_check.c b/tools/testing/radix-tree/iteration_check.c index 11d570c3fc83..df71cb841385 100644 --- a/tools/testing/radix-tree/iteration_check.c +++ b/tools/testing/radix-tree/iteration_check.c @@ -29,6 +29,8 @@ static void *add_entries_fn(void *arg) { int pgoff; + rcu_register_thread(); + while (!test_complete) { for (pgoff = 0; pgoff < 100; pgoff++) { pthread_mutex_lock(&tree_lock); @@ -38,6 +40,8 @@ static void *add_entries_fn(void *arg) } } + rcu_unregister_thread(); + return NULL; } @@ -53,6 +57,8 @@ static void *tagged_iteration_fn(void *arg) struct radix_tree_iter iter; void **slot; + rcu_register_thread(); + while (!test_complete) { rcu_read_lock(); radix_tree_for_each_tagged(slot, &tree, &iter, 0, TAG) { @@ -72,12 +78,18 @@ static void *tagged_iteration_fn(void *arg) continue; } - if (rand_r(&seeds[0]) % 50 == 0) + if (rand_r(&seeds[0]) % 50 == 0) { slot = radix_tree_iter_next(&iter); + rcu_read_unlock(); + rcu_barrier(); + rcu_read_lock(); + } } rcu_read_unlock(); } + rcu_unregister_thread(); + return NULL; } @@ -93,6 +105,8 @@ static void *untagged_iteration_fn(void *arg) struct radix_tree_iter iter; void **slot; + rcu_register_thread(); + while (!test_complete) { rcu_read_lock(); radix_tree_for_each_slot(slot, &tree, &iter, 0) { @@ -112,12 +126,18 @@ static void *untagged_iteration_fn(void *arg) continue; } - if (rand_r(&seeds[1]) % 50 == 0) + if (rand_r(&seeds[1]) % 50 == 0) { slot = radix_tree_iter_next(&iter); + rcu_read_unlock(); + rcu_barrier(); + rcu_read_lock(); + } } rcu_read_unlock(); } + rcu_unregister_thread(); + return NULL; } @@ -127,6 +147,8 @@ static void *untagged_iteration_fn(void *arg) */ static void *remove_entries_fn(void *arg) { + rcu_register_thread(); + while (!test_complete) { int pgoff; @@ -137,6 +159,8 @@ static void *remove_entries_fn(void *arg) pthread_mutex_unlock(&tree_lock); } + rcu_unregister_thread(); + return NULL; } -- cgit v1.2.3-58-ga151 From cfa40bcfd6fed7010b1633bf127ed8571d3b607e Mon Sep 17 00:00:00 2001 From: Konstantin Khlebnikov Date: Wed, 14 Dec 2016 15:08:14 -0800 Subject: radix tree test suite: benchmark for iterator This adds simple benchmark for iterator similar to one I've used for commit 78c1d78488a3 ("radix-tree: introduce bit-optimized iterator") Building with make BENCHMARK=1 set radix tree order to 6, this allows to get performance comparable to in kernel performance. Link: http://lkml.kernel.org/r/1480369871-5271-43-git-send-email-mawilcox@linuxonhyperv.com Signed-off-by: Konstantin Khlebnikov Signed-off-by: Matthew Wilcox Tested-by: Kirill A. Shutemov Cc: Ross Zwisler Cc: Matthew Wilcox Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- tools/testing/radix-tree/Makefile | 6 +- tools/testing/radix-tree/benchmark.c | 98 +++++++++++++++++++++++++++++++++ tools/testing/radix-tree/linux/kernel.h | 4 ++ tools/testing/radix-tree/main.c | 2 + tools/testing/radix-tree/test.h | 1 + 5 files changed, 110 insertions(+), 1 deletion(-) create mode 100644 tools/testing/radix-tree/benchmark.c (limited to 'tools') diff --git a/tools/testing/radix-tree/Makefile b/tools/testing/radix-tree/Makefile index 3c338dc3b162..08283a841066 100644 --- a/tools/testing/radix-tree/Makefile +++ b/tools/testing/radix-tree/Makefile @@ -4,7 +4,11 @@ LDFLAGS += -lpthread -lurcu TARGETS = main OFILES = main.o radix-tree.o linux.o test.o tag_check.o find_next_bit.o \ regression1.o regression2.o regression3.o multiorder.o \ - iteration_check.o + iteration_check.o benchmark.o + +ifdef BENCHMARK + CFLAGS += -DBENCHMARK=1 +endif targets: $(TARGETS) diff --git a/tools/testing/radix-tree/benchmark.c b/tools/testing/radix-tree/benchmark.c new file mode 100644 index 000000000000..215ca86c7605 --- /dev/null +++ b/tools/testing/radix-tree/benchmark.c @@ -0,0 +1,98 @@ +/* + * benchmark.c: + * Author: Konstantin Khlebnikov + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ +#include +#include +#include +#include +#include "test.h" + +#define NSEC_PER_SEC 1000000000L + +static long long benchmark_iter(struct radix_tree_root *root, bool tagged) +{ + volatile unsigned long sink = 0; + struct radix_tree_iter iter; + struct timespec start, finish; + long long nsec; + int l, loops = 1; + void **slot; + +#ifdef BENCHMARK +again: +#endif + clock_gettime(CLOCK_MONOTONIC, &start); + for (l = 0; l < loops; l++) { + if (tagged) { + radix_tree_for_each_tagged(slot, root, &iter, 0, 0) + sink ^= (unsigned long)slot; + } else { + radix_tree_for_each_slot(slot, root, &iter, 0) + sink ^= (unsigned long)slot; + } + } + clock_gettime(CLOCK_MONOTONIC, &finish); + + nsec = (finish.tv_sec - start.tv_sec) * NSEC_PER_SEC + + (finish.tv_nsec - start.tv_nsec); + +#ifdef BENCHMARK + if (loops == 1 && nsec * 5 < NSEC_PER_SEC) { + loops = NSEC_PER_SEC / nsec / 4 + 1; + goto again; + } +#endif + + nsec /= loops; + return nsec; +} + +static void benchmark_size(unsigned long size, unsigned long step, int order) +{ + RADIX_TREE(tree, GFP_KERNEL); + long long normal, tagged; + unsigned long index; + + for (index = 0 ; index < size ; index += step) { + item_insert_order(&tree, index, order); + radix_tree_tag_set(&tree, index, 0); + } + + tagged = benchmark_iter(&tree, true); + normal = benchmark_iter(&tree, false); + + printf("Size %ld, step %6ld, order %d tagged %10lld ns, normal %10lld ns\n", + size, step, order, tagged, normal); + + item_kill_tree(&tree); + rcu_barrier(); +} + +void benchmark(void) +{ + unsigned long size[] = {1 << 10, 1 << 20, 0}; + unsigned long step[] = {1, 2, 7, 15, 63, 64, 65, + 128, 256, 512, 12345, 0}; + int c, s; + + printf("starting benchmarks\n"); + printf("RADIX_TREE_MAP_SHIFT = %d\n", RADIX_TREE_MAP_SHIFT); + + for (c = 0; size[c]; c++) + for (s = 0; step[s]; s++) + benchmark_size(size[c], step[s], 0); + + for (c = 0; size[c]; c++) + for (s = 0; step[s]; s++) + benchmark_size(size[c], step[s] << 9, 9); +} diff --git a/tools/testing/radix-tree/linux/kernel.h b/tools/testing/radix-tree/linux/kernel.h index be98a47b4e1b..dbe4b92806b8 100644 --- a/tools/testing/radix-tree/linux/kernel.h +++ b/tools/testing/radix-tree/linux/kernel.h @@ -10,7 +10,11 @@ #include "../../include/linux/compiler.h" #include "../../../include/linux/kconfig.h" +#ifdef BENCHMARK +#define RADIX_TREE_MAP_SHIFT 6 +#else #define RADIX_TREE_MAP_SHIFT 3 +#endif #ifndef NULL #define NULL 0 diff --git a/tools/testing/radix-tree/main.c b/tools/testing/radix-tree/main.c index 2eb694994497..f1d1e3bd9464 100644 --- a/tools/testing/radix-tree/main.c +++ b/tools/testing/radix-tree/main.c @@ -352,6 +352,8 @@ int main(int argc, char **argv) /* Free any remaining preallocated nodes */ radix_tree_cpu_dead(0); + benchmark(); + sleep(1); printf("after sleep(1): %d allocated, preempt %d\n", nr_allocated, preempt_count); diff --git a/tools/testing/radix-tree/test.h b/tools/testing/radix-tree/test.h index 5d2fad05b263..215ab77a56e3 100644 --- a/tools/testing/radix-tree/test.h +++ b/tools/testing/radix-tree/test.h @@ -28,6 +28,7 @@ void item_kill_tree(struct radix_tree_root *root); void tag_check(void); void multiorder_checks(void); void iteration_test(void); +void benchmark(void); struct item * item_tag_set(struct radix_tree_root *root, unsigned long index, int tag); -- cgit v1.2.3-58-ga151 From af1c5cca9030f1bb935463ceb8274bfe82719128 Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Wed, 14 Dec 2016 15:08:17 -0800 Subject: radix tree test suite: use rcu_barrier Calling rcu_barrier() allows all of the rcu-freed memory to be actually returned to the pool, and allows nr_allocated to return to 0. As well as allowing diffs between runs to be more useful, it also lets us pinpoint leaks more effectively. Link: http://lkml.kernel.org/r/1480369871-5271-44-git-send-email-mawilcox@linuxonhyperv.com Signed-off-by: Matthew Wilcox Tested-by: Kirill A. Shutemov Cc: Konstantin Khlebnikov Cc: Ross Zwisler Cc: Matthew Wilcox Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- tools/testing/radix-tree/main.c | 12 ++++++++++-- tools/testing/radix-tree/tag_check.c | 5 +++++ 2 files changed, 15 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/testing/radix-tree/main.c b/tools/testing/radix-tree/main.c index f1d1e3bd9464..76d9c95aa487 100644 --- a/tools/testing/radix-tree/main.c +++ b/tools/testing/radix-tree/main.c @@ -295,24 +295,31 @@ static void single_thread_tests(bool long_run) printf("starting single_thread_tests: %d allocated, preempt %d\n", nr_allocated, preempt_count); multiorder_checks(); + rcu_barrier(); printf("after multiorder_check: %d allocated, preempt %d\n", nr_allocated, preempt_count); locate_check(); + rcu_barrier(); printf("after locate_check: %d allocated, preempt %d\n", nr_allocated, preempt_count); tag_check(); + rcu_barrier(); printf("after tag_check: %d allocated, preempt %d\n", nr_allocated, preempt_count); gang_check(); + rcu_barrier(); printf("after gang_check: %d allocated, preempt %d\n", nr_allocated, preempt_count); add_and_check(); + rcu_barrier(); printf("after add_and_check: %d allocated, preempt %d\n", nr_allocated, preempt_count); dynamic_height_check(); + rcu_barrier(); printf("after dynamic_height_check: %d allocated, preempt %d\n", nr_allocated, preempt_count); big_gang_check(long_run); + rcu_barrier(); printf("after big_gang_check: %d allocated, preempt %d\n", nr_allocated, preempt_count); for (i = 0; i < (long_run ? 2000 : 3); i++) { @@ -320,6 +327,7 @@ static void single_thread_tests(bool long_run) printf("%d ", i); fflush(stdout); } + rcu_barrier(); printf("after copy_tag_check: %d allocated, preempt %d\n", nr_allocated, preempt_count); } @@ -354,8 +362,8 @@ int main(int argc, char **argv) benchmark(); - sleep(1); - printf("after sleep(1): %d allocated, preempt %d\n", + rcu_barrier(); + printf("after rcu_barrier: %d allocated, preempt %d\n", nr_allocated, preempt_count); rcu_unregister_thread(); diff --git a/tools/testing/radix-tree/tag_check.c b/tools/testing/radix-tree/tag_check.c index b0ac05741750..186f6e4914e4 100644 --- a/tools/testing/radix-tree/tag_check.c +++ b/tools/testing/radix-tree/tag_check.c @@ -51,6 +51,7 @@ void simple_checks(void) verify_tag_consistency(&tree, 1); printf("before item_kill_tree: %d allocated\n", nr_allocated); item_kill_tree(&tree); + rcu_barrier(); printf("after item_kill_tree: %d allocated\n", nr_allocated); } @@ -331,12 +332,16 @@ void tag_check(void) single_check(); extend_checks(); contract_checks(); + rcu_barrier(); printf("after extend_checks: %d allocated\n", nr_allocated); __leak_check(); leak_check(); + rcu_barrier(); printf("after leak_check: %d allocated\n", nr_allocated); simple_checks(); + rcu_barrier(); printf("after simple_checks: %d allocated\n", nr_allocated); thrash_tags(); + rcu_barrier(); printf("after thrash_tags: %d allocated\n", nr_allocated); } -- cgit v1.2.3-58-ga151 From 3ad75f8a1d9b047989059c67afc38d57161270e9 Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Wed, 14 Dec 2016 15:08:20 -0800 Subject: radix tree test suite: handle exceptional entries item_kill_tree() assumes that everything in the tree is a pointer to a struct item, which is annoying when testing the behaviour of exceptional entries. Fix it to delete exceptional entries on the assumption they don't need to be freed. Link: http://lkml.kernel.org/r/1480369871-5271-45-git-send-email-mawilcox@linuxonhyperv.com Signed-off-by: Matthew Wilcox Tested-by: Kirill A. Shutemov Cc: Konstantin Khlebnikov Cc: Ross Zwisler Cc: Matthew Wilcox Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- tools/testing/radix-tree/test.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'tools') diff --git a/tools/testing/radix-tree/test.c b/tools/testing/radix-tree/test.c index a6e8099eaf4f..6f8dafc797ce 100644 --- a/tools/testing/radix-tree/test.c +++ b/tools/testing/radix-tree/test.c @@ -200,9 +200,16 @@ void verify_tag_consistency(struct radix_tree_root *root, unsigned int tag) void item_kill_tree(struct radix_tree_root *root) { + struct radix_tree_iter iter; + void **slot; struct item *items[32]; int nfound; + radix_tree_for_each_slot(slot, root, &iter, 0) { + if (radix_tree_exceptional_entry(*slot)) + radix_tree_delete(root, iter.index); + } + while ((nfound = radix_tree_gang_lookup(root, (void **)items, 0, 32))) { int i; -- cgit v1.2.3-58-ga151 From 101d9607fffefdfc9e3922f0ac9061a61edda1b0 Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Wed, 14 Dec 2016 15:08:23 -0800 Subject: radix tree test suite: record order in each item This probably doubles the size of each item allocated by the test suite but it lets us check a few more things, and may be needed for upcoming API changes that require the caller pass in the order of the entry. Link: http://lkml.kernel.org/r/1480369871-5271-46-git-send-email-mawilcox@linuxonhyperv.com Signed-off-by: Matthew Wilcox Tested-by: Kirill A. Shutemov Cc: Konstantin Khlebnikov Cc: Ross Zwisler Cc: Matthew Wilcox Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- tools/testing/radix-tree/multiorder.c | 2 +- tools/testing/radix-tree/test.c | 29 +++++++++++++++++++---------- tools/testing/radix-tree/test.h | 6 +++--- 3 files changed, 23 insertions(+), 14 deletions(-) (limited to 'tools') diff --git a/tools/testing/radix-tree/multiorder.c b/tools/testing/radix-tree/multiorder.c index d1be94667a30..8d5865c95664 100644 --- a/tools/testing/radix-tree/multiorder.c +++ b/tools/testing/radix-tree/multiorder.c @@ -125,7 +125,7 @@ static void multiorder_check(unsigned long index, int order) unsigned long min = index & ~((1UL << order) - 1); unsigned long max = min + (1UL << order); void **slot; - struct item *item2 = item_create(min); + struct item *item2 = item_create(min, order); RADIX_TREE(tree, GFP_KERNEL); printf("Multiorder index %ld, order %d\n", index, order); diff --git a/tools/testing/radix-tree/test.c b/tools/testing/radix-tree/test.c index 6f8dafc797ce..0de548939a2e 100644 --- a/tools/testing/radix-tree/test.c +++ b/tools/testing/radix-tree/test.c @@ -24,21 +24,29 @@ int item_tag_get(struct radix_tree_root *root, unsigned long index, int tag) return radix_tree_tag_get(root, index, tag); } -int __item_insert(struct radix_tree_root *root, struct item *item, - unsigned order) +int __item_insert(struct radix_tree_root *root, struct item *item) { - return __radix_tree_insert(root, item->index, order, item); + return __radix_tree_insert(root, item->index, item->order, item); } int item_insert(struct radix_tree_root *root, unsigned long index) { - return __item_insert(root, item_create(index), 0); + return __item_insert(root, item_create(index, 0)); } int item_insert_order(struct radix_tree_root *root, unsigned long index, unsigned order) { - return __item_insert(root, item_create(index), order); + return __item_insert(root, item_create(index, order)); +} + +void item_sanity(struct item *item, unsigned long index) +{ + unsigned long mask; + assert(!radix_tree_is_internal_node(item)); + assert(item->order < BITS_PER_LONG); + mask = (1UL << item->order) - 1; + assert((item->index | mask) == (index | mask)); } int item_delete(struct radix_tree_root *root, unsigned long index) @@ -46,18 +54,19 @@ int item_delete(struct radix_tree_root *root, unsigned long index) struct item *item = radix_tree_delete(root, index); if (item) { - assert(item->index == index); + item_sanity(item, index); free(item); return 1; } return 0; } -struct item *item_create(unsigned long index) +struct item *item_create(unsigned long index, unsigned int order) { struct item *ret = malloc(sizeof(*ret)); ret->index = index; + ret->order = order; return ret; } @@ -66,8 +75,8 @@ void item_check_present(struct radix_tree_root *root, unsigned long index) struct item *item; item = radix_tree_lookup(root, index); - assert(item != 0); - assert(item->index == index); + assert(item != NULL); + item_sanity(item, index); } struct item *item_lookup(struct radix_tree_root *root, unsigned long index) @@ -80,7 +89,7 @@ void item_check_absent(struct radix_tree_root *root, unsigned long index) struct item *item; item = radix_tree_lookup(root, index); - assert(item == 0); + assert(item == NULL); } /* diff --git a/tools/testing/radix-tree/test.h b/tools/testing/radix-tree/test.h index 215ab77a56e3..423c528aaee9 100644 --- a/tools/testing/radix-tree/test.h +++ b/tools/testing/radix-tree/test.h @@ -5,11 +5,11 @@ struct item { unsigned long index; + unsigned int order; }; -struct item *item_create(unsigned long index); -int __item_insert(struct radix_tree_root *root, struct item *item, - unsigned order); +struct item *item_create(unsigned long index, unsigned int order); +int __item_insert(struct radix_tree_root *root, struct item *item); int item_insert(struct radix_tree_root *root, unsigned long index); int item_insert_order(struct radix_tree_root *root, unsigned long index, unsigned order); -- cgit v1.2.3-58-ga151 From b328daf3b7130098b105c18bdae694ddaad5b6e3 Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Wed, 14 Dec 2016 15:08:26 -0800 Subject: tools: add more bitmap functions I need the following functions for the radix tree: bitmap_fill bitmap_empty bitmap_full Copy the implementations from include/linux/bitmap.h Signed-off-by: Matthew Wilcox Tested-by: Kirill A. Shutemov Cc: Konstantin Khlebnikov Cc: Ross Zwisler Cc: Matthew Wilcox Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- tools/include/linux/bitmap.h | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) (limited to 'tools') diff --git a/tools/include/linux/bitmap.h b/tools/include/linux/bitmap.h index 43c1c5021e4b..eef41d500e9e 100644 --- a/tools/include/linux/bitmap.h +++ b/tools/include/linux/bitmap.h @@ -35,6 +35,32 @@ static inline void bitmap_zero(unsigned long *dst, int nbits) } } +static inline void bitmap_fill(unsigned long *dst, unsigned int nbits) +{ + unsigned int nlongs = BITS_TO_LONGS(nbits); + if (!small_const_nbits(nbits)) { + unsigned int len = (nlongs - 1) * sizeof(unsigned long); + memset(dst, 0xff, len); + } + dst[nlongs - 1] = BITMAP_LAST_WORD_MASK(nbits); +} + +static inline int bitmap_empty(const unsigned long *src, unsigned nbits) +{ + if (small_const_nbits(nbits)) + return ! (*src & BITMAP_LAST_WORD_MASK(nbits)); + + return find_first_bit(src, nbits) == nbits; +} + +static inline int bitmap_full(const unsigned long *src, unsigned int nbits) +{ + if (small_const_nbits(nbits)) + return ! (~(*src) & BITMAP_LAST_WORD_MASK(nbits)); + + return find_first_zero_bit(src, nbits) == nbits; +} + static inline int bitmap_weight(const unsigned long *src, int nbits) { if (small_const_nbits(nbits)) -- cgit v1.2.3-58-ga151 From 0629573e6bbd60f20ed2d7a91da1214a6274e751 Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Wed, 14 Dec 2016 15:08:29 -0800 Subject: radix tree test suite: use common find-bit code Remove the old find_next_bit code in favour of linking in the find_bit code from tools/lib. Link: http://lkml.kernel.org/r/1480369871-5271-48-git-send-email-mawilcox@linuxonhyperv.com Signed-off-by: Matthew Wilcox Tested-by: Kirill A. Shutemov Cc: Konstantin Khlebnikov Cc: Ross Zwisler Cc: Matthew Wilcox Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- tools/testing/radix-tree/Makefile | 7 ++- tools/testing/radix-tree/find_next_bit.c | 57 ---------------------- tools/testing/radix-tree/linux/bitops.h | 40 +++++++++------ tools/testing/radix-tree/linux/bitops/non-atomic.h | 13 +++-- tools/testing/radix-tree/linux/kernel.h | 11 +++++ 5 files changed, 48 insertions(+), 80 deletions(-) delete mode 100644 tools/testing/radix-tree/find_next_bit.c (limited to 'tools') diff --git a/tools/testing/radix-tree/Makefile b/tools/testing/radix-tree/Makefile index 08283a841066..3635e4d3eca7 100644 --- a/tools/testing/radix-tree/Makefile +++ b/tools/testing/radix-tree/Makefile @@ -18,7 +18,12 @@ main: $(OFILES) clean: $(RM) -f $(TARGETS) *.o radix-tree.c -$(OFILES): *.h */*.h ../../../include/linux/radix-tree.h ../../include/linux/*.h +find_next_bit.o: ../../lib/find_bit.c + $(CC) $(CFLAGS) -c -o $@ $< + +$(OFILES): *.h */*.h \ + ../../include/linux/*.h \ + ../../../include/linux/radix-tree.h radix-tree.c: ../../../lib/radix-tree.c sed -e 's/^static //' -e 's/__always_inline //' -e 's/inline //' < $< > $@ diff --git a/tools/testing/radix-tree/find_next_bit.c b/tools/testing/radix-tree/find_next_bit.c deleted file mode 100644 index d1c2178bb2d4..000000000000 --- a/tools/testing/radix-tree/find_next_bit.c +++ /dev/null @@ -1,57 +0,0 @@ -/* find_next_bit.c: fallback find next bit implementation - * - * Copyright (C) 2004 Red Hat, Inc. All Rights Reserved. - * Written by David Howells (dhowells@redhat.com) - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -#include -#include - -#define BITOP_WORD(nr) ((nr) / BITS_PER_LONG) - -/* - * Find the next set bit in a memory region. - */ -unsigned long find_next_bit(const unsigned long *addr, unsigned long size, - unsigned long offset) -{ - const unsigned long *p = addr + BITOP_WORD(offset); - unsigned long result = offset & ~(BITS_PER_LONG-1); - unsigned long tmp; - - if (offset >= size) - return size; - size -= result; - offset %= BITS_PER_LONG; - if (offset) { - tmp = *(p++); - tmp &= (~0UL << offset); - if (size < BITS_PER_LONG) - goto found_first; - if (tmp) - goto found_middle; - size -= BITS_PER_LONG; - result += BITS_PER_LONG; - } - while (size & ~(BITS_PER_LONG-1)) { - if ((tmp = *(p++))) - goto found_middle; - result += BITS_PER_LONG; - size -= BITS_PER_LONG; - } - if (!size) - return result; - tmp = *p; - -found_first: - tmp &= (~0UL >> (BITS_PER_LONG - size)); - if (tmp == 0UL) /* Are any bits set? */ - return result + size; /* Nope. */ -found_middle: - return result + __ffs(tmp); -} diff --git a/tools/testing/radix-tree/linux/bitops.h b/tools/testing/radix-tree/linux/bitops.h index 71d58427ab60..a13e9bc76eec 100644 --- a/tools/testing/radix-tree/linux/bitops.h +++ b/tools/testing/radix-tree/linux/bitops.h @@ -2,9 +2,14 @@ #define _ASM_GENERIC_BITOPS_NON_ATOMIC_H_ #include +#include +#include +#include -#define BITOP_MASK(nr) (1UL << ((nr) % BITS_PER_LONG)) -#define BITOP_WORD(nr) ((nr) / BITS_PER_LONG) +#define BIT_MASK(nr) (1UL << ((nr) % BITS_PER_LONG)) +#define BIT_WORD(nr) ((nr) / BITS_PER_LONG) +#define BITS_PER_BYTE 8 +#define BITS_TO_LONGS(nr) DIV_ROUND_UP(nr, BITS_PER_BYTE * sizeof(long)) /** * __set_bit - Set a bit in memory @@ -17,16 +22,16 @@ */ static inline void __set_bit(int nr, volatile unsigned long *addr) { - unsigned long mask = BITOP_MASK(nr); - unsigned long *p = ((unsigned long *)addr) + BITOP_WORD(nr); + unsigned long mask = BIT_MASK(nr); + unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr); *p |= mask; } static inline void __clear_bit(int nr, volatile unsigned long *addr) { - unsigned long mask = BITOP_MASK(nr); - unsigned long *p = ((unsigned long *)addr) + BITOP_WORD(nr); + unsigned long mask = BIT_MASK(nr); + unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr); *p &= ~mask; } @@ -42,8 +47,8 @@ static inline void __clear_bit(int nr, volatile unsigned long *addr) */ static inline void __change_bit(int nr, volatile unsigned long *addr) { - unsigned long mask = BITOP_MASK(nr); - unsigned long *p = ((unsigned long *)addr) + BITOP_WORD(nr); + unsigned long mask = BIT_MASK(nr); + unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr); *p ^= mask; } @@ -59,8 +64,8 @@ static inline void __change_bit(int nr, volatile unsigned long *addr) */ static inline int __test_and_set_bit(int nr, volatile unsigned long *addr) { - unsigned long mask = BITOP_MASK(nr); - unsigned long *p = ((unsigned long *)addr) + BITOP_WORD(nr); + unsigned long mask = BIT_MASK(nr); + unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr); unsigned long old = *p; *p = old | mask; @@ -78,8 +83,8 @@ static inline int __test_and_set_bit(int nr, volatile unsigned long *addr) */ static inline int __test_and_clear_bit(int nr, volatile unsigned long *addr) { - unsigned long mask = BITOP_MASK(nr); - unsigned long *p = ((unsigned long *)addr) + BITOP_WORD(nr); + unsigned long mask = BIT_MASK(nr); + unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr); unsigned long old = *p; *p = old & ~mask; @@ -90,8 +95,8 @@ static inline int __test_and_clear_bit(int nr, volatile unsigned long *addr) static inline int __test_and_change_bit(int nr, volatile unsigned long *addr) { - unsigned long mask = BITOP_MASK(nr); - unsigned long *p = ((unsigned long *)addr) + BITOP_WORD(nr); + unsigned long mask = BIT_MASK(nr); + unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr); unsigned long old = *p; *p = old ^ mask; @@ -105,7 +110,7 @@ static inline int __test_and_change_bit(int nr, */ static inline int test_bit(int nr, const volatile unsigned long *addr) { - return 1UL & (addr[BITOP_WORD(nr)] >> (nr & (BITS_PER_LONG-1))); + return 1UL & (addr[BIT_WORD(nr)] >> (nr & (BITS_PER_LONG-1))); } /** @@ -147,4 +152,9 @@ unsigned long find_next_bit(const unsigned long *addr, unsigned long size, unsigned long offset); +static inline unsigned long hweight_long(unsigned long w) +{ + return sizeof(w) == 4 ? hweight32(w) : hweight64(w); +} + #endif /* _ASM_GENERIC_BITOPS_NON_ATOMIC_H_ */ diff --git a/tools/testing/radix-tree/linux/bitops/non-atomic.h b/tools/testing/radix-tree/linux/bitops/non-atomic.h index 46a825cf2ae1..6a1bcb9d2c4a 100644 --- a/tools/testing/radix-tree/linux/bitops/non-atomic.h +++ b/tools/testing/radix-tree/linux/bitops/non-atomic.h @@ -3,7 +3,6 @@ #include -#define BITOP_MASK(nr) (1UL << ((nr) % BITS_PER_LONG)) #define BITOP_WORD(nr) ((nr) / BITS_PER_LONG) /** @@ -17,7 +16,7 @@ */ static inline void __set_bit(int nr, volatile unsigned long *addr) { - unsigned long mask = BITOP_MASK(nr); + unsigned long mask = BIT_MASK(nr); unsigned long *p = ((unsigned long *)addr) + BITOP_WORD(nr); *p |= mask; @@ -25,7 +24,7 @@ static inline void __set_bit(int nr, volatile unsigned long *addr) static inline void __clear_bit(int nr, volatile unsigned long *addr) { - unsigned long mask = BITOP_MASK(nr); + unsigned long mask = BIT_MASK(nr); unsigned long *p = ((unsigned long *)addr) + BITOP_WORD(nr); *p &= ~mask; @@ -42,7 +41,7 @@ static inline void __clear_bit(int nr, volatile unsigned long *addr) */ static inline void __change_bit(int nr, volatile unsigned long *addr) { - unsigned long mask = BITOP_MASK(nr); + unsigned long mask = BIT_MASK(nr); unsigned long *p = ((unsigned long *)addr) + BITOP_WORD(nr); *p ^= mask; @@ -59,7 +58,7 @@ static inline void __change_bit(int nr, volatile unsigned long *addr) */ static inline int __test_and_set_bit(int nr, volatile unsigned long *addr) { - unsigned long mask = BITOP_MASK(nr); + unsigned long mask = BIT_MASK(nr); unsigned long *p = ((unsigned long *)addr) + BITOP_WORD(nr); unsigned long old = *p; @@ -78,7 +77,7 @@ static inline int __test_and_set_bit(int nr, volatile unsigned long *addr) */ static inline int __test_and_clear_bit(int nr, volatile unsigned long *addr) { - unsigned long mask = BITOP_MASK(nr); + unsigned long mask = BIT_MASK(nr); unsigned long *p = ((unsigned long *)addr) + BITOP_WORD(nr); unsigned long old = *p; @@ -90,7 +89,7 @@ static inline int __test_and_clear_bit(int nr, volatile unsigned long *addr) static inline int __test_and_change_bit(int nr, volatile unsigned long *addr) { - unsigned long mask = BITOP_MASK(nr); + unsigned long mask = BIT_MASK(nr); unsigned long *p = ((unsigned long *)addr) + BITOP_WORD(nr); unsigned long old = *p; diff --git a/tools/testing/radix-tree/linux/kernel.h b/tools/testing/radix-tree/linux/kernel.h index dbe4b92806b8..23e77f5673ce 100644 --- a/tools/testing/radix-tree/linux/kernel.h +++ b/tools/testing/radix-tree/linux/kernel.h @@ -47,4 +47,15 @@ static inline int in_interrupt(void) { return 0; } + +/* + * This looks more complex than it should be. But we need to + * get the type for the ~ right in round_down (it needs to be + * as wide as the result!), and we want to evaluate the macro + * arguments just once each. + */ +#define __round_mask(x, y) ((__typeof__(x))((y)-1)) +#define round_up(x, y) ((((x)-1) | __round_mask(x, y))+1) +#define round_down(x, y) ((x) & ~__round_mask(x, y)) + #endif /* _KERNEL_H */ -- cgit v1.2.3-58-ga151 From 148deab223b23734069abcacb5c7118b0e7deadc Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Wed, 14 Dec 2016 15:08:49 -0800 Subject: radix-tree: improve multiorder iterators This fixes several interlinked problems with the iterators in the presence of multiorder entries. 1. radix_tree_iter_next() would only advance by one slot, which would result in the iterators returning the same entry more than once if there were sibling entries. 2. radix_tree_next_slot() could return an internal pointer instead of a user pointer if a tagged multiorder entry was immediately followed by an entry of lower order. 3. radix_tree_next_slot() expanded to a lot more code than it used to when multiorder support was compiled in. And I wasn't comfortable with entry_to_node() being in a header file. Fixing radix_tree_iter_next() for the presence of sibling entries necessarily involves examining the contents of the radix tree, so we now need to pass 'slot' to radix_tree_iter_next(), and we need to change the calling convention so it is called *before* dropping the lock which protects the tree. Also rename it to radix_tree_iter_resume(), as some people thought it was necessary to call radix_tree_iter_next() each time around the loop. radix_tree_next_slot() becomes closer to how it looked before multiorder support was introduced. It only checks to see if the next entry in the chunk is a sibling entry or a pointer to a node; this should be rare enough that handling this case out of line is not a performance impact (and such impact is amortised by the fact that the entry we just processed was a multiorder entry). Also, radix_tree_next_slot() used to force a new chunk lookup for untagged entries, which is more expensive than the out of line sibling entry skipping. Link: http://lkml.kernel.org/r/1480369871-5271-55-git-send-email-mawilcox@linuxonhyperv.com Signed-off-by: Matthew Wilcox Tested-by: Kirill A. Shutemov Cc: Konstantin Khlebnikov Cc: Ross Zwisler Cc: Matthew Wilcox Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/btrfs/tests/btrfs-tests.c | 2 +- include/linux/radix-tree.h | 71 +++++++-------- lib/radix-tree.c | 138 +++++++++++++++++++++++++---- mm/khugepaged.c | 7 +- mm/shmem.c | 6 +- tools/testing/radix-tree/iteration_check.c | 12 +-- tools/testing/radix-tree/multiorder.c | 28 ++++-- tools/testing/radix-tree/regression3.c | 8 +- tools/testing/radix-tree/test.h | 1 + 9 files changed, 188 insertions(+), 85 deletions(-) (limited to 'tools') diff --git a/fs/btrfs/tests/btrfs-tests.c b/fs/btrfs/tests/btrfs-tests.c index 73076a0ea6a9..00ee006a8aa2 100644 --- a/fs/btrfs/tests/btrfs-tests.c +++ b/fs/btrfs/tests/btrfs-tests.c @@ -162,7 +162,7 @@ void btrfs_free_dummy_fs_info(struct btrfs_fs_info *fs_info) slot = radix_tree_iter_retry(&iter); continue; } - slot = radix_tree_iter_next(&iter); + slot = radix_tree_iter_resume(slot, &iter); spin_unlock(&fs_info->buffer_lock); free_extent_buffer_stale(eb); spin_lock(&fs_info->buffer_lock); diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h index d04073ac3a9f..289d007d487b 100644 --- a/include/linux/radix-tree.h +++ b/include/linux/radix-tree.h @@ -403,20 +403,17 @@ __radix_tree_iter_add(struct radix_tree_iter *iter, unsigned long slots) } /** - * radix_tree_iter_next - resume iterating when the chunk may be invalid - * @iter: iterator state + * radix_tree_iter_resume - resume iterating when the chunk may be invalid + * @slot: pointer to current slot + * @iter: iterator state + * Returns: New slot pointer * * If the iterator needs to release then reacquire a lock, the chunk may * have been invalidated by an insertion or deletion. Call this function - * to continue the iteration from the next index. + * before releasing the lock to continue the iteration from the next index. */ -static inline __must_check -void **radix_tree_iter_next(struct radix_tree_iter *iter) -{ - iter->next_index = __radix_tree_iter_add(iter, 1); - iter->tags = 0; - return NULL; -} +void **__must_check radix_tree_iter_resume(void **slot, + struct radix_tree_iter *iter); /** * radix_tree_chunk_size - get current chunk size @@ -430,10 +427,17 @@ radix_tree_chunk_size(struct radix_tree_iter *iter) return (iter->next_index - iter->index) >> iter_shift(iter); } -static inline struct radix_tree_node *entry_to_node(void *ptr) +#ifdef CONFIG_RADIX_TREE_MULTIORDER +void ** __radix_tree_next_slot(void **slot, struct radix_tree_iter *iter, + unsigned flags); +#else +/* Can't happen without sibling entries, but the compiler can't tell that */ +static inline void ** __radix_tree_next_slot(void **slot, + struct radix_tree_iter *iter, unsigned flags) { - return (void *)((unsigned long)ptr & ~RADIX_TREE_INTERNAL_NODE); + return slot; } +#endif /** * radix_tree_next_slot - find next slot in chunk @@ -447,7 +451,7 @@ static inline struct radix_tree_node *entry_to_node(void *ptr) * For tagged lookup it also eats @iter->tags. * * There are several cases where 'slot' can be passed in as NULL to this - * function. These cases result from the use of radix_tree_iter_next() or + * function. These cases result from the use of radix_tree_iter_resume() or * radix_tree_iter_retry(). In these cases we don't end up dereferencing * 'slot' because either: * a) we are doing tagged iteration and iter->tags has been set to 0, or @@ -458,51 +462,31 @@ static __always_inline void ** radix_tree_next_slot(void **slot, struct radix_tree_iter *iter, unsigned flags) { if (flags & RADIX_TREE_ITER_TAGGED) { - void *canon = slot; - iter->tags >>= 1; if (unlikely(!iter->tags)) return NULL; - while (IS_ENABLED(CONFIG_RADIX_TREE_MULTIORDER) && - radix_tree_is_internal_node(slot[1])) { - if (entry_to_node(slot[1]) == canon) { - iter->tags >>= 1; - iter->index = __radix_tree_iter_add(iter, 1); - slot++; - continue; - } - iter->next_index = __radix_tree_iter_add(iter, 1); - return NULL; - } if (likely(iter->tags & 1ul)) { iter->index = __radix_tree_iter_add(iter, 1); - return slot + 1; + slot++; + goto found; } if (!(flags & RADIX_TREE_ITER_CONTIG)) { unsigned offset = __ffs(iter->tags); - iter->tags >>= offset; - iter->index = __radix_tree_iter_add(iter, offset + 1); - return slot + offset + 1; + iter->tags >>= offset++; + iter->index = __radix_tree_iter_add(iter, offset); + slot += offset; + goto found; } } else { long count = radix_tree_chunk_size(iter); - void *canon = slot; while (--count > 0) { slot++; iter->index = __radix_tree_iter_add(iter, 1); - if (IS_ENABLED(CONFIG_RADIX_TREE_MULTIORDER) && - radix_tree_is_internal_node(*slot)) { - if (entry_to_node(*slot) == canon) - continue; - iter->next_index = iter->index; - break; - } - if (likely(*slot)) - return slot; + goto found; if (flags & RADIX_TREE_ITER_CONTIG) { /* forbid switching to the next chunk */ iter->next_index = 0; @@ -511,6 +495,11 @@ radix_tree_next_slot(void **slot, struct radix_tree_iter *iter, unsigned flags) } } return NULL; + + found: + if (unlikely(radix_tree_is_internal_node(*slot))) + return __radix_tree_next_slot(slot, iter, flags); + return slot; } /** @@ -561,6 +550,6 @@ radix_tree_next_slot(void **slot, struct radix_tree_iter *iter, unsigned flags) slot || (slot = radix_tree_next_chunk(root, iter, \ RADIX_TREE_ITER_TAGGED | tag)) ; \ slot = radix_tree_next_slot(slot, iter, \ - RADIX_TREE_ITER_TAGGED)) + RADIX_TREE_ITER_TAGGED | tag)) #endif /* _LINUX_RADIX_TREE_H */ diff --git a/lib/radix-tree.c b/lib/radix-tree.c index dbf4e8c8e100..0a7ac0fb4a65 100644 --- a/lib/radix-tree.c +++ b/lib/radix-tree.c @@ -69,6 +69,11 @@ struct radix_tree_preload { }; static DEFINE_PER_CPU(struct radix_tree_preload, radix_tree_preloads) = { 0, }; +static inline struct radix_tree_node *entry_to_node(void *ptr) +{ + return (void *)((unsigned long)ptr & ~RADIX_TREE_INTERNAL_NODE); +} + static inline void *node_to_entry(void *ptr) { return (void *)((unsigned long)ptr | RADIX_TREE_INTERNAL_NODE); @@ -1104,6 +1109,120 @@ static inline void __set_iter_shift(struct radix_tree_iter *iter, #endif } +/* Construct iter->tags bit-mask from node->tags[tag] array */ +static void set_iter_tags(struct radix_tree_iter *iter, + struct radix_tree_node *node, unsigned offset, + unsigned tag) +{ + unsigned tag_long = offset / BITS_PER_LONG; + unsigned tag_bit = offset % BITS_PER_LONG; + + iter->tags = node->tags[tag][tag_long] >> tag_bit; + + /* This never happens if RADIX_TREE_TAG_LONGS == 1 */ + if (tag_long < RADIX_TREE_TAG_LONGS - 1) { + /* Pick tags from next element */ + if (tag_bit) + iter->tags |= node->tags[tag][tag_long + 1] << + (BITS_PER_LONG - tag_bit); + /* Clip chunk size, here only BITS_PER_LONG tags */ + iter->next_index = __radix_tree_iter_add(iter, BITS_PER_LONG); + } +} + +#ifdef CONFIG_RADIX_TREE_MULTIORDER +static void **skip_siblings(struct radix_tree_node **nodep, + void **slot, struct radix_tree_iter *iter) +{ + void *sib = node_to_entry(slot - 1); + + while (iter->index < iter->next_index) { + *nodep = rcu_dereference_raw(*slot); + if (*nodep && *nodep != sib) + return slot; + slot++; + iter->index = __radix_tree_iter_add(iter, 1); + iter->tags >>= 1; + } + + *nodep = NULL; + return NULL; +} + +void ** __radix_tree_next_slot(void **slot, struct radix_tree_iter *iter, + unsigned flags) +{ + unsigned tag = flags & RADIX_TREE_ITER_TAG_MASK; + struct radix_tree_node *node = rcu_dereference_raw(*slot); + + slot = skip_siblings(&node, slot, iter); + + while (radix_tree_is_internal_node(node)) { + unsigned offset; + unsigned long next_index; + + if (node == RADIX_TREE_RETRY) + return slot; + node = entry_to_node(node); + iter->shift = node->shift; + + if (flags & RADIX_TREE_ITER_TAGGED) { + offset = radix_tree_find_next_bit(node, tag, 0); + if (offset == RADIX_TREE_MAP_SIZE) + return NULL; + slot = &node->slots[offset]; + iter->index = __radix_tree_iter_add(iter, offset); + set_iter_tags(iter, node, offset, tag); + node = rcu_dereference_raw(*slot); + } else { + offset = 0; + slot = &node->slots[0]; + for (;;) { + node = rcu_dereference_raw(*slot); + if (node) + break; + slot++; + offset++; + if (offset == RADIX_TREE_MAP_SIZE) + return NULL; + } + iter->index = __radix_tree_iter_add(iter, offset); + } + if ((flags & RADIX_TREE_ITER_CONTIG) && (offset > 0)) + goto none; + next_index = (iter->index | shift_maxindex(iter->shift)) + 1; + if (next_index < iter->next_index) + iter->next_index = next_index; + } + + return slot; + none: + iter->next_index = 0; + return NULL; +} +EXPORT_SYMBOL(__radix_tree_next_slot); +#else +static void **skip_siblings(struct radix_tree_node **nodep, + void **slot, struct radix_tree_iter *iter) +{ + return slot; +} +#endif + +void **radix_tree_iter_resume(void **slot, struct radix_tree_iter *iter) +{ + struct radix_tree_node *node; + + slot++; + iter->index = __radix_tree_iter_add(iter, 1); + node = rcu_dereference_raw(*slot); + skip_siblings(&node, slot, iter); + iter->next_index = iter->index; + iter->tags = 0; + return NULL; +} +EXPORT_SYMBOL(radix_tree_iter_resume); + /** * radix_tree_next_chunk - find next chunk of slots for iteration * @@ -1191,23 +1310,8 @@ void **radix_tree_next_chunk(struct radix_tree_root *root, iter->next_index = (index | node_maxindex(node)) + 1; __set_iter_shift(iter, node->shift); - /* Construct iter->tags bit-mask from node->tags[tag] array */ - if (flags & RADIX_TREE_ITER_TAGGED) { - unsigned tag_long, tag_bit; - - tag_long = offset / BITS_PER_LONG; - tag_bit = offset % BITS_PER_LONG; - iter->tags = node->tags[tag][tag_long] >> tag_bit; - /* This never happens if RADIX_TREE_TAG_LONGS == 1 */ - if (tag_long < RADIX_TREE_TAG_LONGS - 1) { - /* Pick tags from next element */ - if (tag_bit) - iter->tags |= node->tags[tag][tag_long + 1] << - (BITS_PER_LONG - tag_bit); - /* Clip chunk size, here only BITS_PER_LONG tags */ - iter->next_index = index + BITS_PER_LONG; - } - } + if (flags & RADIX_TREE_ITER_TAGGED) + set_iter_tags(iter, node, offset, tag); return node->slots + offset; } diff --git a/mm/khugepaged.c b/mm/khugepaged.c index 7434a63cac94..e32389a97030 100644 --- a/mm/khugepaged.c +++ b/mm/khugepaged.c @@ -1446,7 +1446,7 @@ static void collapse_shmem(struct mm_struct *mm, radix_tree_replace_slot(&mapping->page_tree, slot, new_page + (index % HPAGE_PMD_NR)); - slot = radix_tree_iter_next(&iter); + slot = radix_tree_iter_resume(slot, &iter); index++; continue; out_lru: @@ -1546,7 +1546,6 @@ tree_unlocked: /* Put holes back where they were */ radix_tree_delete(&mapping->page_tree, iter.index); - slot = radix_tree_iter_next(&iter); continue; } @@ -1557,11 +1556,11 @@ tree_unlocked: page_ref_unfreeze(page, 2); radix_tree_replace_slot(&mapping->page_tree, slot, page); + slot = radix_tree_iter_resume(slot, &iter); spin_unlock_irq(&mapping->tree_lock); putback_lru_page(page); unlock_page(page); spin_lock_irq(&mapping->tree_lock); - slot = radix_tree_iter_next(&iter); } VM_BUG_ON(nr_none); spin_unlock_irq(&mapping->tree_lock); @@ -1641,8 +1640,8 @@ static void khugepaged_scan_shmem(struct mm_struct *mm, present++; if (need_resched()) { + slot = radix_tree_iter_resume(slot, &iter); cond_resched_rcu(); - slot = radix_tree_iter_next(&iter); } } rcu_read_unlock(); diff --git a/mm/shmem.c b/mm/shmem.c index abd7403aba41..be11c6dd414a 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -661,8 +661,8 @@ unsigned long shmem_partial_swap_usage(struct address_space *mapping, swapped++; if (need_resched()) { + slot = radix_tree_iter_resume(slot, &iter); cond_resched_rcu(); - slot = radix_tree_iter_next(&iter); } } @@ -2447,8 +2447,8 @@ static void shmem_tag_pins(struct address_space *mapping) } if (need_resched()) { + slot = radix_tree_iter_resume(slot, &iter); cond_resched_rcu(); - slot = radix_tree_iter_next(&iter); } } rcu_read_unlock(); @@ -2517,8 +2517,8 @@ static int shmem_wait_for_pins(struct address_space *mapping) spin_unlock_irq(&mapping->tree_lock); continue_resched: if (need_resched()) { + slot = radix_tree_iter_resume(slot, &iter); cond_resched_rcu(); - slot = radix_tree_iter_next(&iter); } } rcu_read_unlock(); diff --git a/tools/testing/radix-tree/iteration_check.c b/tools/testing/radix-tree/iteration_check.c index df71cb841385..f328a66899b4 100644 --- a/tools/testing/radix-tree/iteration_check.c +++ b/tools/testing/radix-tree/iteration_check.c @@ -48,8 +48,8 @@ static void *add_entries_fn(void *arg) /* * Iterate over the tagged entries, doing a radix_tree_iter_retry() as we find * things that have been removed and randomly resetting our iteration to the - * next chunk with radix_tree_iter_next(). Both radix_tree_iter_retry() and - * radix_tree_iter_next() cause radix_tree_next_slot() to be called with a + * next chunk with radix_tree_iter_resume(). Both radix_tree_iter_retry() and + * radix_tree_iter_resume() cause radix_tree_next_slot() to be called with a * NULL 'slot' variable. */ static void *tagged_iteration_fn(void *arg) @@ -79,7 +79,7 @@ static void *tagged_iteration_fn(void *arg) } if (rand_r(&seeds[0]) % 50 == 0) { - slot = radix_tree_iter_next(&iter); + slot = radix_tree_iter_resume(slot, &iter); rcu_read_unlock(); rcu_barrier(); rcu_read_lock(); @@ -96,8 +96,8 @@ static void *tagged_iteration_fn(void *arg) /* * Iterate over the entries, doing a radix_tree_iter_retry() as we find things * that have been removed and randomly resetting our iteration to the next - * chunk with radix_tree_iter_next(). Both radix_tree_iter_retry() and - * radix_tree_iter_next() cause radix_tree_next_slot() to be called with a + * chunk with radix_tree_iter_resume(). Both radix_tree_iter_retry() and + * radix_tree_iter_resume() cause radix_tree_next_slot() to be called with a * NULL 'slot' variable. */ static void *untagged_iteration_fn(void *arg) @@ -127,7 +127,7 @@ static void *untagged_iteration_fn(void *arg) } if (rand_r(&seeds[1]) % 50 == 0) { - slot = radix_tree_iter_next(&iter); + slot = radix_tree_iter_resume(slot, &iter); rcu_read_unlock(); rcu_barrier(); rcu_read_lock(); diff --git a/tools/testing/radix-tree/multiorder.c b/tools/testing/radix-tree/multiorder.c index 8d5865c95664..b9be8856d652 100644 --- a/tools/testing/radix-tree/multiorder.c +++ b/tools/testing/radix-tree/multiorder.c @@ -231,11 +231,14 @@ void multiorder_iteration(void) radix_tree_for_each_slot(slot, &tree, &iter, j) { int height = order[i] / RADIX_TREE_MAP_SHIFT; int shift = height * RADIX_TREE_MAP_SHIFT; - int mask = (1 << order[i]) - 1; + unsigned long mask = (1UL << order[i]) - 1; + struct item *item = *slot; - assert(iter.index >= (index[i] &~ mask)); - assert(iter.index <= (index[i] | mask)); + assert((iter.index | mask) == (index[i] | mask)); assert(iter.shift == shift); + assert(!radix_tree_is_internal_node(item)); + assert((item->index | mask) == (index[i] | mask)); + assert(item->order == order[i]); i++; } } @@ -269,7 +272,7 @@ void multiorder_tagged_iteration(void) assert(radix_tree_tag_set(&tree, tag_index[i], 1)); for (j = 0; j < 256; j++) { - int mask, k; + int k; for (i = 0; i < TAG_ENTRIES; i++) { for (k = i; index[k] < tag_index[i]; k++) @@ -279,12 +282,16 @@ void multiorder_tagged_iteration(void) } radix_tree_for_each_tagged(slot, &tree, &iter, j, 1) { + unsigned long mask; + struct item *item = *slot; for (k = i; index[k] < tag_index[i]; k++) ; - mask = (1 << order[k]) - 1; + mask = (1UL << order[k]) - 1; - assert(iter.index >= (tag_index[i] &~ mask)); - assert(iter.index <= (tag_index[i] | mask)); + assert((iter.index | mask) == (tag_index[i] | mask)); + assert(!radix_tree_is_internal_node(item)); + assert((item->index | mask) == (tag_index[i] | mask)); + assert(item->order == order[k]); i++; } } @@ -303,12 +310,15 @@ void multiorder_tagged_iteration(void) } radix_tree_for_each_tagged(slot, &tree, &iter, j, 2) { + struct item *item = *slot; for (k = i; index[k] < tag_index[i]; k++) ; mask = (1 << order[k]) - 1; - assert(iter.index >= (tag_index[i] &~ mask)); - assert(iter.index <= (tag_index[i] | mask)); + assert((iter.index | mask) == (tag_index[i] | mask)); + assert(!radix_tree_is_internal_node(item)); + assert((item->index | mask) == (tag_index[i] | mask)); + assert(item->order == order[k]); i++; } } diff --git a/tools/testing/radix-tree/regression3.c b/tools/testing/radix-tree/regression3.c index 1f06ed73d0a8..b594841fae85 100644 --- a/tools/testing/radix-tree/regression3.c +++ b/tools/testing/radix-tree/regression3.c @@ -5,7 +5,7 @@ * In following radix_tree_next_slot current chunk size becomes zero. * This isn't checked and it tries to dereference null pointer in slot. * - * Helper radix_tree_iter_next reset slot to NULL and next_index to index + 1, + * Helper radix_tree_iter_resume reset slot to NULL and next_index to index + 1, * for tagger iteraction it also must reset cached tags in iterator to abort * next radix_tree_next_slot and go to slow-path into radix_tree_next_chunk. * @@ -88,7 +88,7 @@ void regression3_test(void) printf("slot %ld %p\n", iter.index, *slot); if (!iter.index) { printf("next at %ld\n", iter.index); - slot = radix_tree_iter_next(&iter); + slot = radix_tree_iter_resume(slot, &iter); } } @@ -96,7 +96,7 @@ void regression3_test(void) printf("contig %ld %p\n", iter.index, *slot); if (!iter.index) { printf("next at %ld\n", iter.index); - slot = radix_tree_iter_next(&iter); + slot = radix_tree_iter_resume(slot, &iter); } } @@ -106,7 +106,7 @@ void regression3_test(void) printf("tagged %ld %p\n", iter.index, *slot); if (!iter.index) { printf("next at %ld\n", iter.index); - slot = radix_tree_iter_next(&iter); + slot = radix_tree_iter_resume(slot, &iter); } } diff --git a/tools/testing/radix-tree/test.h b/tools/testing/radix-tree/test.h index 423c528aaee9..617416ec3c5e 100644 --- a/tools/testing/radix-tree/test.h +++ b/tools/testing/radix-tree/test.h @@ -41,6 +41,7 @@ void verify_tag_consistency(struct radix_tree_root *root, unsigned int tag); extern int nr_allocated; /* Normally private parts of lib/radix-tree.c */ +struct radix_tree_node *entry_to_node(void *ptr); void radix_tree_dump(struct radix_tree_root *root); int root_tag_get(struct radix_tree_root *root, unsigned int tag); unsigned long node_maxindex(struct radix_tree_node *); -- cgit v1.2.3-58-ga151 From 478922e2b0f41567e4a530771bfb3f693f857d45 Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Wed, 14 Dec 2016 15:08:52 -0800 Subject: radix-tree: delete radix_tree_locate_item() This rather complicated function can be better implemented as an iterator. It has only one caller, so move the functionality to the only place that needs it. Update the test suite to follow the same pattern. Link: http://lkml.kernel.org/r/1480369871-5271-56-git-send-email-mawilcox@linuxonhyperv.com Signed-off-by: Matthew Wilcox Acked-by: Konstantin Khlebnikov Tested-by: Kirill A. Shutemov Cc: Ross Zwisler Cc: Matthew Wilcox Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/radix-tree.h | 1 - lib/radix-tree.c | 99 ----------------------------------------- mm/shmem.c | 26 ++++++++++- tools/testing/radix-tree/main.c | 8 ++-- tools/testing/radix-tree/test.c | 22 +++++++++ tools/testing/radix-tree/test.h | 2 + 6 files changed, 53 insertions(+), 105 deletions(-) (limited to 'tools') diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h index 289d007d487b..a13d3f7c6c65 100644 --- a/include/linux/radix-tree.h +++ b/include/linux/radix-tree.h @@ -296,7 +296,6 @@ unsigned long radix_tree_range_tag_if_tagged(struct radix_tree_root *root, unsigned long nr_to_tag, unsigned int fromtag, unsigned int totag); int radix_tree_tagged(struct radix_tree_root *root, unsigned int tag); -unsigned long radix_tree_locate_item(struct radix_tree_root *root, void *item); static inline void radix_tree_preload_end(void) { diff --git a/lib/radix-tree.c b/lib/radix-tree.c index 0a7ac0fb4a65..5bdf1bdbca00 100644 --- a/lib/radix-tree.c +++ b/lib/radix-tree.c @@ -1579,105 +1579,6 @@ radix_tree_gang_lookup_tag_slot(struct radix_tree_root *root, void ***results, } EXPORT_SYMBOL(radix_tree_gang_lookup_tag_slot); -#if defined(CONFIG_SHMEM) && defined(CONFIG_SWAP) -#include /* for cond_resched() */ - -struct locate_info { - unsigned long found_index; - bool stop; -}; - -/* - * This linear search is at present only useful to shmem_unuse_inode(). - */ -static unsigned long __locate(struct radix_tree_node *slot, void *item, - unsigned long index, struct locate_info *info) -{ - unsigned long i; - - do { - unsigned int shift = slot->shift; - - for (i = (index >> shift) & RADIX_TREE_MAP_MASK; - i < RADIX_TREE_MAP_SIZE; - i++, index += (1UL << shift)) { - struct radix_tree_node *node = - rcu_dereference_raw(slot->slots[i]); - if (node == RADIX_TREE_RETRY) - goto out; - if (!radix_tree_is_internal_node(node)) { - if (node == item) { - info->found_index = index; - info->stop = true; - goto out; - } - continue; - } - node = entry_to_node(node); - if (is_sibling_entry(slot, node)) - continue; - slot = node; - break; - } - } while (i < RADIX_TREE_MAP_SIZE); - -out: - if ((index == 0) && (i == RADIX_TREE_MAP_SIZE)) - info->stop = true; - return index; -} - -/** - * radix_tree_locate_item - search through radix tree for item - * @root: radix tree root - * @item: item to be found - * - * Returns index where item was found, or -1 if not found. - * Caller must hold no lock (since this time-consuming function needs - * to be preemptible), and must check afterwards if item is still there. - */ -unsigned long radix_tree_locate_item(struct radix_tree_root *root, void *item) -{ - struct radix_tree_node *node; - unsigned long max_index; - unsigned long cur_index = 0; - struct locate_info info = { - .found_index = -1, - .stop = false, - }; - - do { - rcu_read_lock(); - node = rcu_dereference_raw(root->rnode); - if (!radix_tree_is_internal_node(node)) { - rcu_read_unlock(); - if (node == item) - info.found_index = 0; - break; - } - - node = entry_to_node(node); - - max_index = node_maxindex(node); - if (cur_index > max_index) { - rcu_read_unlock(); - break; - } - - cur_index = __locate(node, item, cur_index, &info); - rcu_read_unlock(); - cond_resched(); - } while (!info.stop && cur_index <= max_index); - - return info.found_index; -} -#else -unsigned long radix_tree_locate_item(struct radix_tree_root *root, void *item) -{ - return -1; -} -#endif /* CONFIG_SHMEM && CONFIG_SWAP */ - /** * __radix_tree_delete_node - try to free node after clearing a slot * @root: radix tree root diff --git a/mm/shmem.c b/mm/shmem.c index be11c6dd414a..54287d443806 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -1049,6 +1049,30 @@ static void shmem_evict_inode(struct inode *inode) clear_inode(inode); } +static unsigned long find_swap_entry(struct radix_tree_root *root, void *item) +{ + struct radix_tree_iter iter; + void **slot; + unsigned long found = -1; + unsigned int checked = 0; + + rcu_read_lock(); + radix_tree_for_each_slot(slot, root, &iter, 0) { + if (*slot == item) { + found = iter.index; + break; + } + checked++; + if ((checked % 4096) != 0) + continue; + slot = radix_tree_iter_resume(slot, &iter); + cond_resched_rcu(); + } + + rcu_read_unlock(); + return found; +} + /* * If swap found in inode, free it and move page from swapcache to filecache. */ @@ -1062,7 +1086,7 @@ static int shmem_unuse_inode(struct shmem_inode_info *info, int error = 0; radswap = swp_to_radix_entry(swap); - index = radix_tree_locate_item(&mapping->page_tree, radswap); + index = find_swap_entry(&mapping->page_tree, radswap); if (index == -1) return -EAGAIN; /* tell shmem_unuse we found nothing */ diff --git a/tools/testing/radix-tree/main.c b/tools/testing/radix-tree/main.c index 76d9c95aa487..a028dae8a043 100644 --- a/tools/testing/radix-tree/main.c +++ b/tools/testing/radix-tree/main.c @@ -239,7 +239,7 @@ static void __locate_check(struct radix_tree_root *tree, unsigned long index, item_insert_order(tree, index, order); item = item_lookup(tree, index); - index2 = radix_tree_locate_item(tree, item); + index2 = find_item(tree, item); if (index != index2) { printf("index %ld order %d inserted; found %ld\n", index, order, index2); @@ -273,17 +273,17 @@ static void locate_check(void) index += (1UL << order)) { __locate_check(&tree, index + offset, order); } - if (radix_tree_locate_item(&tree, &tree) != -1) + if (find_item(&tree, &tree) != -1) abort(); item_kill_tree(&tree); } } - if (radix_tree_locate_item(&tree, &tree) != -1) + if (find_item(&tree, &tree) != -1) abort(); __locate_check(&tree, -1, 0); - if (radix_tree_locate_item(&tree, &tree) != -1) + if (find_item(&tree, &tree) != -1) abort(); item_kill_tree(&tree); } diff --git a/tools/testing/radix-tree/test.c b/tools/testing/radix-tree/test.c index 0de548939a2e..88bf57f7175e 100644 --- a/tools/testing/radix-tree/test.c +++ b/tools/testing/radix-tree/test.c @@ -151,6 +151,28 @@ void item_full_scan(struct radix_tree_root *root, unsigned long start, assert(nfound == 0); } +/* Use the same pattern as find_swap_entry() in mm/shmem.c */ +unsigned long find_item(struct radix_tree_root *root, void *item) +{ + struct radix_tree_iter iter; + void **slot; + unsigned long found = -1; + unsigned long checked = 0; + + radix_tree_for_each_slot(slot, root, &iter, 0) { + if (*slot == item) { + found = iter.index; + break; + } + checked++; + if ((checked % 4) != 0) + continue; + slot = radix_tree_iter_resume(slot, &iter); + } + + return found; +} + static int verify_node(struct radix_tree_node *slot, unsigned int tag, int tagged) { diff --git a/tools/testing/radix-tree/test.h b/tools/testing/radix-tree/test.h index 617416ec3c5e..3d9d1d30da22 100644 --- a/tools/testing/radix-tree/test.h +++ b/tools/testing/radix-tree/test.h @@ -25,6 +25,8 @@ void item_full_scan(struct radix_tree_root *root, unsigned long start, unsigned long nr, int chunk); void item_kill_tree(struct radix_tree_root *root); +unsigned long find_item(struct radix_tree_root *, void *item); + void tag_check(void); void multiorder_checks(void); void iteration_test(void); -- cgit v1.2.3-58-ga151 From 268f42de718128cd0301293177e79c08c38e39a6 Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Wed, 14 Dec 2016 15:08:55 -0800 Subject: radix-tree: delete radix_tree_range_tag_if_tagged() This is an exceptionally complicated function with just one caller (tag_pages_for_writeback). We devote a large portion of the runtime of the test suite to testing this one function which has one caller. By introducing the new function radix_tree_iter_tag_set(), we can eliminate all of the complexity while keeping the performance. The caller can now use a fairly standard radix_tree_for_each() loop, and it doesn't need to worry about tricksy things like 'start' wrapping. The test suite continues to spend a large amount of time investigating this function, but now it's testing the underlying primitives such as radix_tree_iter_resume() and the radix_tree_for_each_tagged() iterator which are also used by other parts of the kernel. Link: http://lkml.kernel.org/r/1480369871-5271-57-git-send-email-mawilcox@linuxonhyperv.com Signed-off-by: Matthew Wilcox Tested-by: Kirill A. Shutemov Cc: Konstantin Khlebnikov Cc: Ross Zwisler Cc: Matthew Wilcox Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/radix-tree.h | 74 ++++++++++----------- lib/radix-tree.c | 117 ++++++--------------------------- mm/page-writeback.c | 28 +++++--- tools/testing/radix-tree/main.c | 12 ++-- tools/testing/radix-tree/multiorder.c | 13 ++-- tools/testing/radix-tree/regression2.c | 3 +- tools/testing/radix-tree/tag_check.c | 4 +- tools/testing/radix-tree/test.c | 34 ++++++++++ tools/testing/radix-tree/test.h | 3 + 9 files changed, 125 insertions(+), 163 deletions(-) (limited to 'tools') diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h index a13d3f7c6c65..7a8d2516c73a 100644 --- a/include/linux/radix-tree.h +++ b/include/linux/radix-tree.h @@ -120,6 +120,41 @@ static inline bool radix_tree_empty(struct radix_tree_root *root) return root->rnode == NULL; } +/** + * struct radix_tree_iter - radix tree iterator state + * + * @index: index of current slot + * @next_index: one beyond the last index for this chunk + * @tags: bit-mask for tag-iterating + * @node: node that contains current slot + * @shift: shift for the node that holds our slots + * + * This radix tree iterator works in terms of "chunks" of slots. A chunk is a + * subinterval of slots contained within one radix tree leaf node. It is + * described by a pointer to its first slot and a struct radix_tree_iter + * which holds the chunk's position in the tree and its size. For tagged + * iteration radix_tree_iter also holds the slots' bit-mask for one chosen + * radix tree tag. + */ +struct radix_tree_iter { + unsigned long index; + unsigned long next_index; + unsigned long tags; + struct radix_tree_node *node; +#ifdef CONFIG_RADIX_TREE_MULTIORDER + unsigned int shift; +#endif +}; + +static inline unsigned int iter_shift(const struct radix_tree_iter *iter) +{ +#ifdef CONFIG_RADIX_TREE_MULTIORDER + return iter->shift; +#else + return 0; +#endif +} + /** * Radix-tree synchronization * @@ -283,6 +318,8 @@ void *radix_tree_tag_clear(struct radix_tree_root *root, unsigned long index, unsigned int tag); int radix_tree_tag_get(struct radix_tree_root *root, unsigned long index, unsigned int tag); +void radix_tree_iter_tag_set(struct radix_tree_root *root, + const struct radix_tree_iter *iter, unsigned int tag); unsigned int radix_tree_gang_lookup_tag(struct radix_tree_root *root, void **results, unsigned long first_index, unsigned int max_items, @@ -291,10 +328,6 @@ unsigned int radix_tree_gang_lookup_tag_slot(struct radix_tree_root *root, void ***results, unsigned long first_index, unsigned int max_items, unsigned int tag); -unsigned long radix_tree_range_tag_if_tagged(struct radix_tree_root *root, - unsigned long *first_indexp, unsigned long last_index, - unsigned long nr_to_tag, - unsigned int fromtag, unsigned int totag); int radix_tree_tagged(struct radix_tree_root *root, unsigned int tag); static inline void radix_tree_preload_end(void) @@ -302,39 +335,6 @@ static inline void radix_tree_preload_end(void) preempt_enable(); } -/** - * struct radix_tree_iter - radix tree iterator state - * - * @index: index of current slot - * @next_index: one beyond the last index for this chunk - * @tags: bit-mask for tag-iterating - * @shift: shift for the node that holds our slots - * - * This radix tree iterator works in terms of "chunks" of slots. A chunk is a - * subinterval of slots contained within one radix tree leaf node. It is - * described by a pointer to its first slot and a struct radix_tree_iter - * which holds the chunk's position in the tree and its size. For tagged - * iteration radix_tree_iter also holds the slots' bit-mask for one chosen - * radix tree tag. - */ -struct radix_tree_iter { - unsigned long index; - unsigned long next_index; - unsigned long tags; -#ifdef CONFIG_RADIX_TREE_MULTIORDER - unsigned int shift; -#endif -}; - -static inline unsigned int iter_shift(struct radix_tree_iter *iter) -{ -#ifdef CONFIG_RADIX_TREE_MULTIORDER - return iter->shift; -#else - return 0; -#endif -} - #define RADIX_TREE_ITER_TAG_MASK 0x00FF /* tag index in lower byte */ #define RADIX_TREE_ITER_TAGGED 0x0100 /* lookup tagged slots */ #define RADIX_TREE_ITER_CONTIG 0x0200 /* stop at first hole */ diff --git a/lib/radix-tree.c b/lib/radix-tree.c index 5bdf1bdbca00..d1f4ca5b7989 100644 --- a/lib/radix-tree.c +++ b/lib/radix-tree.c @@ -219,6 +219,11 @@ radix_tree_find_next_bit(struct radix_tree_node *node, unsigned int tag, return RADIX_TREE_MAP_SIZE; } +static unsigned int iter_offset(const struct radix_tree_iter *iter) +{ + return (iter->index >> iter_shift(iter)) & RADIX_TREE_MAP_MASK; +} + /* * The maximum index which can be stored in a radix tree */ @@ -1014,6 +1019,18 @@ static void node_tag_set(struct radix_tree_root *root, root_tag_set(root, tag); } +/** + * radix_tree_iter_tag_set - set a tag on the current iterator entry + * @root: radix tree root + * @iter: iterator state + * @tag: tag to set + */ +void radix_tree_iter_tag_set(struct radix_tree_root *root, + const struct radix_tree_iter *iter, unsigned int tag) +{ + node_tag_set(root, iter->node, tag, iter_offset(iter)); +} + /** * radix_tree_tag_clear - clear a tag on a radix tree node * @root: radix tree root @@ -1164,6 +1181,7 @@ void ** __radix_tree_next_slot(void **slot, struct radix_tree_iter *iter, if (node == RADIX_TREE_RETRY) return slot; node = entry_to_node(node); + iter->node = node; iter->shift = node->shift; if (flags & RADIX_TREE_ITER_TAGGED) { @@ -1266,6 +1284,7 @@ void **radix_tree_next_chunk(struct radix_tree_root *root, iter->index = index; iter->next_index = maxindex + 1; iter->tags = 1; + iter->node = NULL; __set_iter_shift(iter, 0); return (void **)&root->rnode; } @@ -1308,6 +1327,7 @@ void **radix_tree_next_chunk(struct radix_tree_root *root, /* Update the iterator state */ iter->index = (index &~ node_maxindex(node)) | (offset << node->shift); iter->next_index = (index | node_maxindex(node)) + 1; + iter->node = node; __set_iter_shift(iter, node->shift); if (flags & RADIX_TREE_ITER_TAGGED) @@ -1317,103 +1337,6 @@ void **radix_tree_next_chunk(struct radix_tree_root *root, } EXPORT_SYMBOL(radix_tree_next_chunk); -/** - * radix_tree_range_tag_if_tagged - for each item in given range set given - * tag if item has another tag set - * @root: radix tree root - * @first_indexp: pointer to a starting index of a range to scan - * @last_index: last index of a range to scan - * @nr_to_tag: maximum number items to tag - * @iftag: tag index to test - * @settag: tag index to set if tested tag is set - * - * This function scans range of radix tree from first_index to last_index - * (inclusive). For each item in the range if iftag is set, the function sets - * also settag. The function stops either after tagging nr_to_tag items or - * after reaching last_index. - * - * The tags must be set from the leaf level only and propagated back up the - * path to the root. We must do this so that we resolve the full path before - * setting any tags on intermediate nodes. If we set tags as we descend, then - * we can get to the leaf node and find that the index that has the iftag - * set is outside the range we are scanning. This reults in dangling tags and - * can lead to problems with later tag operations (e.g. livelocks on lookups). - * - * The function returns the number of leaves where the tag was set and sets - * *first_indexp to the first unscanned index. - * WARNING! *first_indexp can wrap if last_index is ULONG_MAX. Caller must - * be prepared to handle that. - */ -unsigned long radix_tree_range_tag_if_tagged(struct radix_tree_root *root, - unsigned long *first_indexp, unsigned long last_index, - unsigned long nr_to_tag, - unsigned int iftag, unsigned int settag) -{ - struct radix_tree_node *node, *child; - unsigned long maxindex; - unsigned long tagged = 0; - unsigned long index = *first_indexp; - - radix_tree_load_root(root, &child, &maxindex); - last_index = min(last_index, maxindex); - if (index > last_index) - return 0; - if (!nr_to_tag) - return 0; - if (!root_tag_get(root, iftag)) { - *first_indexp = last_index + 1; - return 0; - } - if (!radix_tree_is_internal_node(child)) { - *first_indexp = last_index + 1; - root_tag_set(root, settag); - return 1; - } - - node = entry_to_node(child); - - for (;;) { - unsigned offset = radix_tree_descend(node, &child, index); - if (!child) - goto next; - if (!tag_get(node, iftag, offset)) - goto next; - /* Sibling slots never have tags set on them */ - if (radix_tree_is_internal_node(child)) { - node = entry_to_node(child); - continue; - } - - tagged++; - node_tag_set(root, node, settag, offset); - next: - /* Go to next entry in node */ - index = ((index >> node->shift) + 1) << node->shift; - /* Overflow can happen when last_index is ~0UL... */ - if (index > last_index || !index) - break; - offset = (index >> node->shift) & RADIX_TREE_MAP_MASK; - while (offset == 0) { - /* - * We've fully scanned this node. Go up. Because - * last_index is guaranteed to be in the tree, what - * we do below cannot wander astray. - */ - node = node->parent; - offset = (index >> node->shift) & RADIX_TREE_MAP_MASK; - } - if (is_sibling_entry(node, node->slots[offset])) - goto next; - if (tagged >= nr_to_tag) - break; - } - - *first_indexp = index; - - return tagged; -} -EXPORT_SYMBOL(radix_tree_range_tag_if_tagged); - /** * radix_tree_gang_lookup - perform multiple lookup on a radix tree * @root: radix tree root diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 52e2f8e3b472..290e8b7d3181 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -2106,18 +2106,26 @@ void tag_pages_for_writeback(struct address_space *mapping, pgoff_t start, pgoff_t end) { #define WRITEBACK_TAG_BATCH 4096 - unsigned long tagged; - - do { - spin_lock_irq(&mapping->tree_lock); - tagged = radix_tree_range_tag_if_tagged(&mapping->page_tree, - &start, end, WRITEBACK_TAG_BATCH, - PAGECACHE_TAG_DIRTY, PAGECACHE_TAG_TOWRITE); + unsigned long tagged = 0; + struct radix_tree_iter iter; + void **slot; + + spin_lock_irq(&mapping->tree_lock); + radix_tree_for_each_tagged(slot, &mapping->page_tree, &iter, start, + PAGECACHE_TAG_DIRTY) { + if (iter.index > end) + break; + radix_tree_iter_tag_set(&mapping->page_tree, &iter, + PAGECACHE_TAG_TOWRITE); + tagged++; + if ((tagged % WRITEBACK_TAG_BATCH) != 0) + continue; + slot = radix_tree_iter_resume(slot, &iter); spin_unlock_irq(&mapping->tree_lock); - WARN_ON_ONCE(tagged > WRITEBACK_TAG_BATCH); cond_resched(); - /* We check 'start' to handle wrapping when end == ~0UL */ - } while (tagged >= WRITEBACK_TAG_BATCH && start); + spin_lock_irq(&mapping->tree_lock); + } + spin_unlock_irq(&mapping->tree_lock); } EXPORT_SYMBOL(tag_pages_for_writeback); diff --git a/tools/testing/radix-tree/main.c b/tools/testing/radix-tree/main.c index a028dae8a043..170175ca4105 100644 --- a/tools/testing/radix-tree/main.c +++ b/tools/testing/radix-tree/main.c @@ -205,8 +205,7 @@ void copy_tag_check(void) } // printf("\ncopying tags...\n"); - cur = start; - tagged = radix_tree_range_tag_if_tagged(&tree, &cur, end, ITEMS, 0, 1); + tagged = tag_tagged_items(&tree, NULL, start, end, ITEMS, 0, 1); // printf("checking copied tags\n"); assert(tagged == count); @@ -214,16 +213,13 @@ void copy_tag_check(void) /* Copy tags in several rounds */ // printf("\ncopying tags...\n"); - cur = start; - do { - tmp = rand() % (count/10+2); - tagged = radix_tree_range_tag_if_tagged(&tree, &cur, end, tmp, 0, 2); - } while (tmp == tagged); + tmp = rand() % (count / 10 + 2); + tagged = tag_tagged_items(&tree, NULL, start, end, tmp, 0, 2); + assert(tagged == count); // printf("%lu %lu %lu\n", tagged, tmp, count); // printf("checking copied tags\n"); check_copied_tags(&tree, start, end, idx, ITEMS, 0, 2); - assert(tagged < tmp); verify_tag_consistency(&tree, 0); verify_tag_consistency(&tree, 1); verify_tag_consistency(&tree, 2); diff --git a/tools/testing/radix-tree/multiorder.c b/tools/testing/radix-tree/multiorder.c index b9be8856d652..86daf23b3509 100644 --- a/tools/testing/radix-tree/multiorder.c +++ b/tools/testing/radix-tree/multiorder.c @@ -26,7 +26,6 @@ static void __multiorder_tag_test(int index, int order) { RADIX_TREE(tree, GFP_KERNEL); int base, err, i; - unsigned long first = 0; /* our canonical entry */ base = index & ~((1 << order) - 1); @@ -60,7 +59,7 @@ static void __multiorder_tag_test(int index, int order) assert(!radix_tree_tag_get(&tree, i, 1)); } - assert(radix_tree_range_tag_if_tagged(&tree, &first, ~0UL, 10, 0, 1) == 1); + assert(tag_tagged_items(&tree, NULL, 0, ~0UL, 10, 0, 1) == 1); assert(radix_tree_tag_clear(&tree, index, 0)); for_each_index(i, base, order) { @@ -251,7 +250,6 @@ void multiorder_tagged_iteration(void) RADIX_TREE(tree, GFP_KERNEL); struct radix_tree_iter iter; void **slot; - unsigned long first = 0; int i, j; printf("Multiorder tagged iteration test\n"); @@ -296,8 +294,8 @@ void multiorder_tagged_iteration(void) } } - radix_tree_range_tag_if_tagged(&tree, &first, ~0UL, - MT_NUM_ENTRIES, 1, 2); + assert(tag_tagged_items(&tree, NULL, 0, ~0UL, TAG_ENTRIES, 1, 2) == + TAG_ENTRIES); for (j = 0; j < 256; j++) { int mask, k; @@ -323,9 +321,8 @@ void multiorder_tagged_iteration(void) } } - first = 1; - radix_tree_range_tag_if_tagged(&tree, &first, ~0UL, - MT_NUM_ENTRIES, 1, 0); + assert(tag_tagged_items(&tree, NULL, 1, ~0UL, MT_NUM_ENTRIES * 2, 1, 0) + == TAG_ENTRIES); i = 0; radix_tree_for_each_tagged(slot, &tree, &iter, 0, 0) { assert(iter.index == tag_index[i]); diff --git a/tools/testing/radix-tree/regression2.c b/tools/testing/radix-tree/regression2.c index 63bf347aaf33..a41325d7a170 100644 --- a/tools/testing/radix-tree/regression2.c +++ b/tools/testing/radix-tree/regression2.c @@ -50,6 +50,7 @@ #include #include "regression.h" +#include "test.h" #define PAGECACHE_TAG_DIRTY 0 #define PAGECACHE_TAG_WRITEBACK 1 @@ -90,7 +91,7 @@ void regression2_test(void) /* 1. */ start = 0; end = max_slots - 2; - radix_tree_range_tag_if_tagged(&mt_tree, &start, end, 1, + tag_tagged_items(&mt_tree, NULL, start, end, 1, PAGECACHE_TAG_DIRTY, PAGECACHE_TAG_TOWRITE); /* 2. */ diff --git a/tools/testing/radix-tree/tag_check.c b/tools/testing/radix-tree/tag_check.c index 186f6e4914e4..ed5f87d1dce2 100644 --- a/tools/testing/radix-tree/tag_check.c +++ b/tools/testing/radix-tree/tag_check.c @@ -23,7 +23,7 @@ __simple_checks(struct radix_tree_root *tree, unsigned long index, int tag) item_tag_set(tree, index, tag); ret = item_tag_get(tree, index, tag); assert(ret != 0); - ret = radix_tree_range_tag_if_tagged(tree, &first, ~0UL, 10, tag, !tag); + ret = tag_tagged_items(tree, NULL, first, ~0UL, 10, tag, !tag); assert(ret == 1); ret = item_tag_get(tree, index, !tag); assert(ret != 0); @@ -320,7 +320,7 @@ static void single_check(void) assert(ret == 0); verify_tag_consistency(&tree, 0); verify_tag_consistency(&tree, 1); - ret = radix_tree_range_tag_if_tagged(&tree, &first, 10, 10, 0, 1); + ret = tag_tagged_items(&tree, NULL, first, 10, 10, 0, 1); assert(ret == 1); ret = radix_tree_gang_lookup_tag(&tree, (void **)items, 0, BATCH, 1); assert(ret == 1); diff --git a/tools/testing/radix-tree/test.c b/tools/testing/radix-tree/test.c index 88bf57f7175e..e5726e373646 100644 --- a/tools/testing/radix-tree/test.c +++ b/tools/testing/radix-tree/test.c @@ -151,6 +151,40 @@ void item_full_scan(struct radix_tree_root *root, unsigned long start, assert(nfound == 0); } +/* Use the same pattern as tag_pages_for_writeback() in mm/page-writeback.c */ +int tag_tagged_items(struct radix_tree_root *root, pthread_mutex_t *lock, + unsigned long start, unsigned long end, unsigned batch, + unsigned iftag, unsigned thentag) +{ + unsigned long tagged = 0; + struct radix_tree_iter iter; + void **slot; + + if (batch == 0) + batch = 1; + + if (lock) + pthread_mutex_lock(lock); + radix_tree_for_each_tagged(slot, root, &iter, start, iftag) { + if (iter.index > end) + break; + radix_tree_iter_tag_set(root, &iter, thentag); + tagged++; + if ((tagged % batch) != 0) + continue; + slot = radix_tree_iter_resume(slot, &iter); + if (lock) { + pthread_mutex_unlock(lock); + rcu_barrier(); + pthread_mutex_lock(lock); + } + } + if (lock) + pthread_mutex_unlock(lock); + + return tagged; +} + /* Use the same pattern as find_swap_entry() in mm/shmem.c */ unsigned long find_item(struct radix_tree_root *root, void *item) { diff --git a/tools/testing/radix-tree/test.h b/tools/testing/radix-tree/test.h index 3d9d1d30da22..e11e4d260b4e 100644 --- a/tools/testing/radix-tree/test.h +++ b/tools/testing/radix-tree/test.h @@ -25,6 +25,9 @@ void item_full_scan(struct radix_tree_root *root, unsigned long start, unsigned long nr, int chunk); void item_kill_tree(struct radix_tree_root *root); +int tag_tagged_items(struct radix_tree_root *, pthread_mutex_t *, + unsigned long start, unsigned long end, unsigned batch, + unsigned iftag, unsigned thentag); unsigned long find_item(struct radix_tree_root *, void *item); void tag_check(void); -- cgit v1.2.3-58-ga151 From 175542f575723e43f897ddb09d0011c13f7cf0ec Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Wed, 14 Dec 2016 15:08:58 -0800 Subject: radix-tree: add radix_tree_join This new function allows for the replacement of many smaller entries in the radix tree with one larger multiorder entry. From the point of view of an RCU walker, they may see a mixture of the smaller entries and the large entry during the same walk, but they will never see NULL for an index which was populated before the join. Link: http://lkml.kernel.org/r/1480369871-5271-58-git-send-email-mawilcox@linuxonhyperv.com Signed-off-by: Matthew Wilcox Tested-by: Kirill A. Shutemov Cc: Konstantin Khlebnikov Cc: Ross Zwisler Cc: Matthew Wilcox Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/radix-tree.h | 3 + lib/radix-tree.c | 183 ++++++++++++++++++++++++++++------ tools/testing/radix-tree/multiorder.c | 58 +++++++++++ 3 files changed, 213 insertions(+), 31 deletions(-) (limited to 'tools') diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h index 7a8d2516c73a..935293a24f7d 100644 --- a/include/linux/radix-tree.h +++ b/include/linux/radix-tree.h @@ -335,6 +335,9 @@ static inline void radix_tree_preload_end(void) preempt_enable(); } +int radix_tree_join(struct radix_tree_root *, unsigned long index, + unsigned new_order, void *); + #define RADIX_TREE_ITER_TAG_MASK 0x00FF /* tag index in lower byte */ #define RADIX_TREE_ITER_TAGGED 0x0100 /* lookup tagged slots */ #define RADIX_TREE_ITER_CONTIG 0x0200 /* stop at first hole */ diff --git a/lib/radix-tree.c b/lib/radix-tree.c index d1f4ca5b7989..962cfb3a7671 100644 --- a/lib/radix-tree.c +++ b/lib/radix-tree.c @@ -339,17 +339,14 @@ static void radix_tree_node_rcu_free(struct rcu_head *head) { struct radix_tree_node *node = container_of(head, struct radix_tree_node, rcu_head); - int i; /* - * must only free zeroed nodes into the slab. radix_tree_shrink - * can leave us with a non-NULL entry in the first slot, so clear - * that here to make sure. + * Must only free zeroed nodes into the slab. We can be left with + * non-NULL entries by radix_tree_free_nodes, so clear the entries + * and tags here. */ - for (i = 0; i < RADIX_TREE_MAX_TAGS; i++) - tag_clear(node, i, 0); - - node->slots[0] = NULL; + memset(node->slots, 0, sizeof(node->slots)); + memset(node->tags, 0, sizeof(node->tags)); INIT_LIST_HEAD(&node->private_list); kmem_cache_free(radix_tree_node_cachep, node); @@ -678,14 +675,14 @@ int __radix_tree_create(struct radix_tree_root *root, unsigned long index, shift = radix_tree_load_root(root, &child, &maxindex); /* Make sure the tree is high enough. */ + if (order > 0 && max == ((1UL << order) - 1)) + max++; if (max > maxindex) { int error = radix_tree_extend(root, max, shift); if (error < 0) return error; shift = error; child = root->rnode; - if (order == shift) - shift += RADIX_TREE_MAP_SHIFT; } while (shift > order) { @@ -697,6 +694,8 @@ int __radix_tree_create(struct radix_tree_root *root, unsigned long index, return -ENOMEM; child->shift = shift; child->offset = offset; + child->count = 0; + child->exceptional = 0; child->parent = node; rcu_assign_pointer(*slot, node_to_entry(child)); if (node) @@ -710,31 +709,121 @@ int __radix_tree_create(struct radix_tree_root *root, unsigned long index, slot = &node->slots[offset]; } + if (nodep) + *nodep = node; + if (slotp) + *slotp = slot; + return 0; +} + #ifdef CONFIG_RADIX_TREE_MULTIORDER - /* Insert pointers to the canonical entry */ - if (order > shift) { - unsigned i, n = 1 << (order - shift); +/* + * Free any nodes below this node. The tree is presumed to not need + * shrinking, and any user data in the tree is presumed to not need a + * destructor called on it. If we need to add a destructor, we can + * add that functionality later. Note that we may not clear tags or + * slots from the tree as an RCU walker may still have a pointer into + * this subtree. We could replace the entries with RADIX_TREE_RETRY, + * but we'll still have to clear those in rcu_free. + */ +static void radix_tree_free_nodes(struct radix_tree_node *node) +{ + unsigned offset = 0; + struct radix_tree_node *child = entry_to_node(node); + + for (;;) { + void *entry = child->slots[offset]; + if (radix_tree_is_internal_node(entry) && + !is_sibling_entry(child, entry)) { + child = entry_to_node(entry); + offset = 0; + continue; + } + offset++; + while (offset == RADIX_TREE_MAP_SIZE) { + struct radix_tree_node *old = child; + offset = child->offset + 1; + child = child->parent; + radix_tree_node_free(old); + if (old == entry_to_node(node)) + return; + } + } +} + +static inline int insert_entries(struct radix_tree_node *node, void **slot, + void *item, unsigned order, bool replace) +{ + struct radix_tree_node *child; + unsigned i, n, tag, offset, tags = 0; + + if (node) { + n = 1 << (order - node->shift); + offset = get_slot_offset(node, slot); + } else { + n = 1; + offset = 0; + } + + if (n > 1) { offset = offset & ~(n - 1); slot = &node->slots[offset]; - child = node_to_entry(slot); - for (i = 0; i < n; i++) { - if (slot[i]) + } + child = node_to_entry(slot); + + for (i = 0; i < n; i++) { + if (slot[i]) { + if (replace) { + node->count--; + for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) + if (tag_get(node, tag, offset + i)) + tags |= 1 << tag; + } else return -EEXIST; } + } - for (i = 1; i < n; i++) { + for (i = 0; i < n; i++) { + struct radix_tree_node *old = slot[i]; + if (i) { rcu_assign_pointer(slot[i], child); - node->count++; + for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) + if (tags & (1 << tag)) + tag_clear(node, tag, offset + i); + } else { + rcu_assign_pointer(slot[i], item); + for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) + if (tags & (1 << tag)) + tag_set(node, tag, offset); } + if (radix_tree_is_internal_node(old) && + !is_sibling_entry(node, old)) + radix_tree_free_nodes(old); + if (radix_tree_exceptional_entry(old)) + node->exceptional--; } -#endif - - if (nodep) - *nodep = node; - if (slotp) - *slotp = slot; - return 0; + if (node) { + node->count += n; + if (radix_tree_exceptional_entry(item)) + node->exceptional += n; + } + return n; } +#else +static inline int insert_entries(struct radix_tree_node *node, void **slot, + void *item, unsigned order, bool replace) +{ + if (*slot) + return -EEXIST; + rcu_assign_pointer(*slot, item); + if (node) { + node->count++; + if (radix_tree_exceptional_entry(item)) + node->exceptional++; + } + return 1; +} +#endif /** * __radix_tree_insert - insert into a radix tree @@ -757,15 +846,13 @@ int __radix_tree_insert(struct radix_tree_root *root, unsigned long index, error = __radix_tree_create(root, index, order, &node, &slot); if (error) return error; - if (*slot != NULL) - return -EEXIST; - rcu_assign_pointer(*slot, item); + + error = insert_entries(node, slot, item, order, false); + if (error < 0) + return error; if (node) { unsigned offset = get_slot_offset(node, slot); - node->count++; - if (radix_tree_exceptional_entry(item)) - node->exceptional++; BUG_ON(tag_get(node, 0, offset)); BUG_ON(tag_get(node, 1, offset)); BUG_ON(tag_get(node, 2, offset)); @@ -942,6 +1029,40 @@ void radix_tree_replace_slot(struct radix_tree_root *root, replace_slot(root, NULL, slot, item, true); } +#ifdef CONFIG_RADIX_TREE_MULTIORDER +/** + * radix_tree_join - replace multiple entries with one multiorder entry + * @root: radix tree root + * @index: an index inside the new entry + * @order: order of the new entry + * @item: new entry + * + * Call this function to replace several entries with one larger entry. + * The existing entries are presumed to not need freeing as a result of + * this call. + * + * The replacement entry will have all the tags set on it that were set + * on any of the entries it is replacing. + */ +int radix_tree_join(struct radix_tree_root *root, unsigned long index, + unsigned order, void *item) +{ + struct radix_tree_node *node; + void **slot; + int error; + + BUG_ON(radix_tree_is_internal_node(item)); + + error = __radix_tree_create(root, index, order, &node, &slot); + if (!error) + error = insert_entries(node, slot, item, order, true); + if (error > 0) + error = 0; + + return error; +} +#endif + /** * radix_tree_tag_set - set a tag on a radix tree node * @root: radix tree root diff --git a/tools/testing/radix-tree/multiorder.c b/tools/testing/radix-tree/multiorder.c index 86daf23b3509..c9f656cf5f52 100644 --- a/tools/testing/radix-tree/multiorder.c +++ b/tools/testing/radix-tree/multiorder.c @@ -332,6 +332,63 @@ void multiorder_tagged_iteration(void) item_kill_tree(&tree); } +static void __multiorder_join(unsigned long index, + unsigned order1, unsigned order2) +{ + unsigned long loc; + void *item, *item2 = item_create(index + 1, order1); + RADIX_TREE(tree, GFP_KERNEL); + + item_insert_order(&tree, index, order2); + item = radix_tree_lookup(&tree, index); + radix_tree_join(&tree, index + 1, order1, item2); + loc = find_item(&tree, item); + if (loc == -1) + free(item); + item = radix_tree_lookup(&tree, index + 1); + assert(item == item2); + item_kill_tree(&tree); +} + +static void __multiorder_join2(unsigned order1, unsigned order2) +{ + RADIX_TREE(tree, GFP_KERNEL); + struct radix_tree_node *node; + void *item1 = item_create(0, order1); + void *item2; + + item_insert_order(&tree, 0, order2); + radix_tree_insert(&tree, 1 << order2, (void *)0x12UL); + item2 = __radix_tree_lookup(&tree, 1 << order2, &node, NULL); + assert(item2 == (void *)0x12UL); + assert(node->exceptional == 1); + + radix_tree_join(&tree, 0, order1, item1); + item2 = __radix_tree_lookup(&tree, 1 << order2, &node, NULL); + assert(item2 == item1); + assert(node->exceptional == 0); + item_kill_tree(&tree); +} + +static void multiorder_join(void) +{ + int i, j, idx; + + for (idx = 0; idx < 1024; idx = idx * 2 + 3) { + for (i = 1; i < 15; i++) { + for (j = 0; j < i; j++) { + __multiorder_join(idx, i, j); + } + } + } + + for (i = 1; i < 15; i++) { + for (j = 0; j < i; j++) { + __multiorder_join2(i, j); + } + } +} + void multiorder_checks(void) { int i; @@ -349,4 +406,5 @@ void multiorder_checks(void) multiorder_tag_tests(); multiorder_iteration(); multiorder_tagged_iteration(); + multiorder_join(); } -- cgit v1.2.3-58-ga151 From e157b555945fb16ddc6cce605a1eb6b4135ea5f1 Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Wed, 14 Dec 2016 15:09:01 -0800 Subject: radix-tree: add radix_tree_split This new function splits a larger multiorder entry into smaller entries (potentially multi-order entries). These entries are initialised to RADIX_TREE_RETRY to ensure that RCU walkers who see this state aren't confused. The caller should then call radix_tree_for_each_slot() and radix_tree_replace_slot() in order to turn these retry entries into the intended new entries. Tags are replicated from the original multiorder entry into each new entry. Link: http://lkml.kernel.org/r/1480369871-5271-59-git-send-email-mawilcox@linuxonhyperv.com Signed-off-by: Matthew Wilcox Tested-by: Kirill A. Shutemov Cc: Konstantin Khlebnikov Cc: Ross Zwisler Cc: Matthew Wilcox Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/radix-tree.h | 12 +++ lib/radix-tree.c | 142 +++++++++++++++++++++++++++++++++- tools/testing/radix-tree/multiorder.c | 64 +++++++++++++++ 3 files changed, 214 insertions(+), 4 deletions(-) (limited to 'tools') diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h index 935293a24f7d..1f4b56120de8 100644 --- a/include/linux/radix-tree.h +++ b/include/linux/radix-tree.h @@ -80,6 +80,14 @@ static inline bool radix_tree_is_internal_node(void *ptr) #define RADIX_TREE_MAX_PATH (DIV_ROUND_UP(RADIX_TREE_INDEX_BITS, \ RADIX_TREE_MAP_SHIFT)) +/* + * @count is the count of every non-NULL element in the ->slots array + * whether that is an exceptional entry, a retry entry, a user pointer, + * a sibling entry or a pointer to the next level of the tree. + * @exceptional is the count of every element in ->slots which is + * either radix_tree_exceptional_entry() or is a sibling entry for an + * exceptional entry. + */ struct radix_tree_node { unsigned char shift; /* Bits remaining in each slot */ unsigned char offset; /* Slot offset in parent */ @@ -293,6 +301,8 @@ void __radix_tree_replace(struct radix_tree_root *root, struct radix_tree_node *node, void **slot, void *item, radix_tree_update_node_t update_node, void *private); +void radix_tree_iter_replace(struct radix_tree_root *, + const struct radix_tree_iter *, void **slot, void *item); void radix_tree_replace_slot(struct radix_tree_root *root, void **slot, void *item); void __radix_tree_delete_node(struct radix_tree_root *root, @@ -335,6 +345,8 @@ static inline void radix_tree_preload_end(void) preempt_enable(); } +int radix_tree_split(struct radix_tree_root *, unsigned long index, + unsigned new_order); int radix_tree_join(struct radix_tree_root *, unsigned long index, unsigned new_order, void *); diff --git a/lib/radix-tree.c b/lib/radix-tree.c index 962cfb3a7671..ade2ed3f5190 100644 --- a/lib/radix-tree.c +++ b/lib/radix-tree.c @@ -22,6 +22,7 @@ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ +#include #include #include #include @@ -758,7 +759,10 @@ static inline int insert_entries(struct radix_tree_node *node, void **slot, unsigned i, n, tag, offset, tags = 0; if (node) { - n = 1 << (order - node->shift); + if (order > node->shift) + n = 1 << (order - node->shift); + else + n = 1; offset = get_slot_offset(node, slot); } else { n = 1; @@ -797,7 +801,8 @@ static inline int insert_entries(struct radix_tree_node *node, void **slot, tag_set(node, tag, offset); } if (radix_tree_is_internal_node(old) && - !is_sibling_entry(node, old)) + !is_sibling_entry(node, old) && + (old != RADIX_TREE_RETRY)) radix_tree_free_nodes(old); if (radix_tree_exceptional_entry(old)) node->exceptional--; @@ -1021,7 +1026,8 @@ void __radix_tree_replace(struct radix_tree_root *root, * NOTE: This cannot be used to switch between non-entries (empty slots), * regular entries, and exceptional entries, as that requires accounting * inside the radix tree node. When switching from one type of entry or - * deleting, use __radix_tree_lookup() and __radix_tree_replace(). + * deleting, use __radix_tree_lookup() and __radix_tree_replace() or + * radix_tree_iter_replace(). */ void radix_tree_replace_slot(struct radix_tree_root *root, void **slot, void *item) @@ -1029,6 +1035,21 @@ void radix_tree_replace_slot(struct radix_tree_root *root, replace_slot(root, NULL, slot, item, true); } +/** + * radix_tree_iter_replace - replace item in a slot + * @root: radix tree root + * @slot: pointer to slot + * @item: new item to store in the slot. + * + * For use with radix_tree_split() and radix_tree_for_each_slot(). + * Caller must hold tree write locked across split and replacement. + */ +void radix_tree_iter_replace(struct radix_tree_root *root, + const struct radix_tree_iter *iter, void **slot, void *item) +{ + __radix_tree_replace(root, iter->node, slot, item, NULL, NULL); +} + #ifdef CONFIG_RADIX_TREE_MULTIORDER /** * radix_tree_join - replace multiple entries with one multiorder entry @@ -1061,6 +1082,117 @@ int radix_tree_join(struct radix_tree_root *root, unsigned long index, return error; } + +/** + * radix_tree_split - Split an entry into smaller entries + * @root: radix tree root + * @index: An index within the large entry + * @order: Order of new entries + * + * Call this function as the first step in replacing a multiorder entry + * with several entries of lower order. After this function returns, + * loop over the relevant portion of the tree using radix_tree_for_each_slot() + * and call radix_tree_iter_replace() to set up each new entry. + * + * The tags from this entry are replicated to all the new entries. + * + * The radix tree should be locked against modification during the entire + * replacement operation. Lock-free lookups will see RADIX_TREE_RETRY which + * should prompt RCU walkers to restart the lookup from the root. + */ +int radix_tree_split(struct radix_tree_root *root, unsigned long index, + unsigned order) +{ + struct radix_tree_node *parent, *node, *child; + void **slot; + unsigned int offset, end; + unsigned n, tag, tags = 0; + + if (!__radix_tree_lookup(root, index, &parent, &slot)) + return -ENOENT; + if (!parent) + return -ENOENT; + + offset = get_slot_offset(parent, slot); + + for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) + if (tag_get(parent, tag, offset)) + tags |= 1 << tag; + + for (end = offset + 1; end < RADIX_TREE_MAP_SIZE; end++) { + if (!is_sibling_entry(parent, parent->slots[end])) + break; + for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) + if (tags & (1 << tag)) + tag_set(parent, tag, end); + /* rcu_assign_pointer ensures tags are set before RETRY */ + rcu_assign_pointer(parent->slots[end], RADIX_TREE_RETRY); + } + rcu_assign_pointer(parent->slots[offset], RADIX_TREE_RETRY); + parent->exceptional -= (end - offset); + + if (order == parent->shift) + return 0; + if (order > parent->shift) { + while (offset < end) + offset += insert_entries(parent, &parent->slots[offset], + RADIX_TREE_RETRY, order, true); + return 0; + } + + node = parent; + + for (;;) { + if (node->shift > order) { + child = radix_tree_node_alloc(root); + if (!child) + goto nomem; + child->shift = node->shift - RADIX_TREE_MAP_SHIFT; + child->offset = offset; + child->count = 0; + child->parent = node; + if (node != parent) { + node->count++; + node->slots[offset] = node_to_entry(child); + for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) + if (tags & (1 << tag)) + tag_set(node, tag, offset); + } + + node = child; + offset = 0; + continue; + } + + n = insert_entries(node, &node->slots[offset], + RADIX_TREE_RETRY, order, false); + BUG_ON(n > RADIX_TREE_MAP_SIZE); + + for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) + if (tags & (1 << tag)) + tag_set(node, tag, offset); + offset += n; + + while (offset == RADIX_TREE_MAP_SIZE) { + if (node == parent) + break; + offset = node->offset; + child = node; + node = node->parent; + rcu_assign_pointer(node->slots[offset], + node_to_entry(child)); + offset++; + } + if ((node == parent) && (offset == end)) + return 0; + } + + nomem: + /* Shouldn't happen; did user forget to preload? */ + /* TODO: free all the allocated nodes */ + WARN_ON(1); + return -ENOMEM; +} #endif /** @@ -1441,8 +1573,10 @@ void **radix_tree_next_chunk(struct radix_tree_root *root, child = rcu_dereference_raw(node->slots[offset]); } - if ((child == NULL) || (child == RADIX_TREE_RETRY)) + if (!child) goto restart; + if (child == RADIX_TREE_RETRY) + break; } while (radix_tree_is_internal_node(child)); /* Update the iterator state */ diff --git a/tools/testing/radix-tree/multiorder.c b/tools/testing/radix-tree/multiorder.c index c9f656cf5f52..fa6effe997a3 100644 --- a/tools/testing/radix-tree/multiorder.c +++ b/tools/testing/radix-tree/multiorder.c @@ -389,6 +389,69 @@ static void multiorder_join(void) } } +static void __multiorder_split(int old_order, int new_order) +{ + RADIX_TREE(tree, GFP_KERNEL); + void **slot; + struct radix_tree_iter iter; + struct radix_tree_node *node; + void *item; + + item_insert_order(&tree, 0, old_order); + radix_tree_tag_set(&tree, 0, 2); + radix_tree_split(&tree, 0, new_order); + radix_tree_for_each_slot(slot, &tree, &iter, 0) { + radix_tree_iter_replace(&tree, &iter, slot, + item_create(iter.index, new_order)); + } + + item_kill_tree(&tree); + + __radix_tree_insert(&tree, 0, old_order, (void *)0x12); + + item = __radix_tree_lookup(&tree, 0, &node, NULL); + assert(item == (void *)0x12); + assert(node->exceptional > 0); + + radix_tree_split(&tree, 0, new_order); + radix_tree_for_each_slot(slot, &tree, &iter, 0) { + radix_tree_iter_replace(&tree, &iter, slot, + item_create(iter.index, new_order)); + } + + item = __radix_tree_lookup(&tree, 0, &node, NULL); + assert(item != (void *)0x12); + assert(node->exceptional == 0); + + item_kill_tree(&tree); + + __radix_tree_insert(&tree, 0, old_order, (void *)0x12); + + item = __radix_tree_lookup(&tree, 0, &node, NULL); + assert(item == (void *)0x12); + assert(node->exceptional > 0); + + radix_tree_split(&tree, 0, new_order); + radix_tree_for_each_slot(slot, &tree, &iter, 0) { + radix_tree_iter_replace(&tree, &iter, slot, (void *)0x16); + } + + item = __radix_tree_lookup(&tree, 0, &node, NULL); + assert(item == (void *)0x16); + assert(node->exceptional > 0); + + item_kill_tree(&tree); +} + +static void multiorder_split(void) +{ + int i, j; + + for (i = 9; i < 19; i++) + for (j = 0; j < i; j++) + __multiorder_split(i, j); +} + void multiorder_checks(void) { int i; @@ -407,4 +470,5 @@ void multiorder_checks(void) multiorder_iteration(); multiorder_tagged_iteration(); multiorder_join(); + multiorder_split(); } -- cgit v1.2.3-58-ga151 From 2791653a6814d170fa893344618563a7b1da95c6 Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Wed, 14 Dec 2016 15:09:04 -0800 Subject: radix-tree: add radix_tree_split_preload() Calculate how many nodes we need to allocate to split an old_order entry into multiple entries, each of size new_order. The test suite checks that we allocated exactly the right number of nodes; neither too many (checked by rtp->nr == 0), nor too few (checked by comparing nr_allocated before and after the call to radix_tree_split()). Link: http://lkml.kernel.org/r/1480369871-5271-60-git-send-email-mawilcox@linuxonhyperv.com Signed-off-by: Matthew Wilcox Tested-by: Kirill A. Shutemov Cc: Konstantin Khlebnikov Cc: Ross Zwisler Cc: Matthew Wilcox Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/radix-tree.h | 1 + lib/radix-tree.c | 24 +++++++++++++++++++- tools/testing/radix-tree/multiorder.c | 42 +++++++++++++++++++++++++++++++++-- tools/testing/radix-tree/test.h | 5 +++++ 4 files changed, 69 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h index 1f4b56120de8..5dea8f6440e4 100644 --- a/include/linux/radix-tree.h +++ b/include/linux/radix-tree.h @@ -345,6 +345,7 @@ static inline void radix_tree_preload_end(void) preempt_enable(); } +int radix_tree_split_preload(unsigned old_order, unsigned new_order, gfp_t); int radix_tree_split(struct radix_tree_root *, unsigned long index, unsigned new_order); int radix_tree_join(struct radix_tree_root *, unsigned long index, diff --git a/lib/radix-tree.c b/lib/radix-tree.c index ade2ed3f5190..be1183e62590 100644 --- a/lib/radix-tree.c +++ b/lib/radix-tree.c @@ -368,7 +368,7 @@ radix_tree_node_free(struct radix_tree_node *node) * To make use of this facility, the radix tree must be initialised without * __GFP_DIRECT_RECLAIM being passed to INIT_RADIX_TREE(). */ -static int __radix_tree_preload(gfp_t gfp_mask, int nr) +static int __radix_tree_preload(gfp_t gfp_mask, unsigned nr) { struct radix_tree_preload *rtp; struct radix_tree_node *node; @@ -434,6 +434,28 @@ int radix_tree_maybe_preload(gfp_t gfp_mask) } EXPORT_SYMBOL(radix_tree_maybe_preload); +#ifdef CONFIG_RADIX_TREE_MULTIORDER +/* + * Preload with enough objects to ensure that we can split a single entry + * of order @old_order into many entries of size @new_order + */ +int radix_tree_split_preload(unsigned int old_order, unsigned int new_order, + gfp_t gfp_mask) +{ + unsigned top = 1 << (old_order % RADIX_TREE_MAP_SHIFT); + unsigned layers = (old_order / RADIX_TREE_MAP_SHIFT) - + (new_order / RADIX_TREE_MAP_SHIFT); + unsigned nr = 0; + + WARN_ON_ONCE(!gfpflags_allow_blocking(gfp_mask)); + BUG_ON(new_order >= old_order); + + while (layers--) + nr = nr * RADIX_TREE_MAP_SIZE + 1; + return __radix_tree_preload(gfp_mask, top * nr); +} +#endif + /* * The same as function above, but preload number of nodes required to insert * (1 << order) continuous naturally-aligned elements. diff --git a/tools/testing/radix-tree/multiorder.c b/tools/testing/radix-tree/multiorder.c index fa6effe997a3..5421f015f46c 100644 --- a/tools/testing/radix-tree/multiorder.c +++ b/tools/testing/radix-tree/multiorder.c @@ -389,35 +389,67 @@ static void multiorder_join(void) } } +static void check_mem(unsigned old_order, unsigned new_order, unsigned alloc) +{ + struct radix_tree_preload *rtp = &radix_tree_preloads; + if (rtp->nr != 0) + printf("split(%u %u) remaining %u\n", old_order, new_order, + rtp->nr); + /* + * Can't check for equality here as some nodes may have been + * RCU-freed while we ran. But we should never finish with more + * nodes allocated since they should have all been preloaded. + */ + if (nr_allocated > alloc) + printf("split(%u %u) allocated %u %u\n", old_order, new_order, + alloc, nr_allocated); +} + static void __multiorder_split(int old_order, int new_order) { - RADIX_TREE(tree, GFP_KERNEL); + RADIX_TREE(tree, GFP_ATOMIC); void **slot; struct radix_tree_iter iter; struct radix_tree_node *node; void *item; + unsigned alloc; + + radix_tree_preload(GFP_KERNEL); + assert(item_insert_order(&tree, 0, old_order) == 0); + radix_tree_preload_end(); + + /* Wipe out the preloaded cache or it'll confuse check_mem() */ + radix_tree_cpu_dead(0); - item_insert_order(&tree, 0, old_order); radix_tree_tag_set(&tree, 0, 2); + + radix_tree_split_preload(old_order, new_order, GFP_KERNEL); + alloc = nr_allocated; radix_tree_split(&tree, 0, new_order); + check_mem(old_order, new_order, alloc); radix_tree_for_each_slot(slot, &tree, &iter, 0) { radix_tree_iter_replace(&tree, &iter, slot, item_create(iter.index, new_order)); } + radix_tree_preload_end(); item_kill_tree(&tree); + radix_tree_preload(GFP_KERNEL); __radix_tree_insert(&tree, 0, old_order, (void *)0x12); + radix_tree_preload_end(); item = __radix_tree_lookup(&tree, 0, &node, NULL); assert(item == (void *)0x12); assert(node->exceptional > 0); + radix_tree_split_preload(old_order, new_order, GFP_KERNEL); radix_tree_split(&tree, 0, new_order); radix_tree_for_each_slot(slot, &tree, &iter, 0) { radix_tree_iter_replace(&tree, &iter, slot, item_create(iter.index, new_order)); } + radix_tree_preload_end(); item = __radix_tree_lookup(&tree, 0, &node, NULL); assert(item != (void *)0x12); @@ -425,16 +457,20 @@ static void __multiorder_split(int old_order, int new_order) item_kill_tree(&tree); + radix_tree_preload(GFP_KERNEL); __radix_tree_insert(&tree, 0, old_order, (void *)0x12); + radix_tree_preload_end(); item = __radix_tree_lookup(&tree, 0, &node, NULL); assert(item == (void *)0x12); assert(node->exceptional > 0); + radix_tree_split_preload(old_order, new_order, GFP_KERNEL); radix_tree_split(&tree, 0, new_order); radix_tree_for_each_slot(slot, &tree, &iter, 0) { radix_tree_iter_replace(&tree, &iter, slot, (void *)0x16); } + radix_tree_preload_end(); item = __radix_tree_lookup(&tree, 0, &node, NULL); assert(item == (void *)0x16); @@ -471,4 +507,6 @@ void multiorder_checks(void) multiorder_tagged_iteration(); multiorder_join(); multiorder_split(); + + radix_tree_cpu_dead(0); } diff --git a/tools/testing/radix-tree/test.h b/tools/testing/radix-tree/test.h index e11e4d260b4e..7c2611caa6d2 100644 --- a/tools/testing/radix-tree/test.h +++ b/tools/testing/radix-tree/test.h @@ -52,3 +52,8 @@ int root_tag_get(struct radix_tree_root *root, unsigned int tag); unsigned long node_maxindex(struct radix_tree_node *); unsigned long shift_maxindex(unsigned int shift); int radix_tree_cpu_dead(unsigned int cpu); +struct radix_tree_preload { + unsigned nr; + struct radix_tree_node *nodes; +}; +extern struct radix_tree_preload radix_tree_preloads; -- cgit v1.2.3-58-ga151 From a90eb3a2a405cf7e96093ed531a285067dfdbc9d Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Wed, 14 Dec 2016 15:09:07 -0800 Subject: radix-tree: fix replacement for multiorder entries When replacing an entry with NULL, we need to delete any sibling entries. Also account deleting exceptional entries properly. Also fix a bug with radix_tree_iter_replace() where we would fail to remove entirely freed nodes. Also fix accounting bug when switching between normal and exceptional entries with replace_slot. Also add testcases for all these bugs. Link: http://lkml.kernel.org/r/1480369871-5271-61-git-send-email-mawilcox@linuxonhyperv.com Signed-off-by: Matthew Wilcox Tested-by: Kirill A. Shutemov Cc: Konstantin Khlebnikov Cc: Ross Zwisler Cc: Matthew Wilcox Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/radix-tree.c | 60 +++++++++++++++++------- tools/testing/radix-tree/multiorder.c | 87 ++++++++++++++++++++++++++++++----- 2 files changed, 119 insertions(+), 28 deletions(-) (limited to 'tools') diff --git a/lib/radix-tree.c b/lib/radix-tree.c index be1183e62590..d09c17dd60ae 100644 --- a/lib/radix-tree.c +++ b/lib/radix-tree.c @@ -977,6 +977,24 @@ void *radix_tree_lookup(struct radix_tree_root *root, unsigned long index) } EXPORT_SYMBOL(radix_tree_lookup); +static inline int slot_count(struct radix_tree_node *node, + void **slot) +{ + int n = 1; +#ifdef CONFIG_RADIX_TREE_MULTIORDER + void *ptr = node_to_entry(slot); + unsigned offset = get_slot_offset(node, slot); + int i; + + for (i = 1; offset + i < RADIX_TREE_MAP_SIZE; i++) { + if (node->slots[offset + i] != ptr) + break; + n++; + } +#endif + return n; +} + static void replace_slot(struct radix_tree_root *root, struct radix_tree_node *node, void **slot, void *item, @@ -995,12 +1013,35 @@ static void replace_slot(struct radix_tree_root *root, if (node) { node->count += count; - node->exceptional += exceptional; + if (exceptional) { + exceptional *= slot_count(node, slot); + node->exceptional += exceptional; + } } rcu_assign_pointer(*slot, item); } +static inline void delete_sibling_entries(struct radix_tree_node *node, + void **slot) +{ +#ifdef CONFIG_RADIX_TREE_MULTIORDER + bool exceptional = radix_tree_exceptional_entry(*slot); + void *ptr = node_to_entry(slot); + unsigned offset = get_slot_offset(node, slot); + int i; + + for (i = 1; offset + i < RADIX_TREE_MAP_SIZE; i++) { + if (node->slots[offset + i] != ptr) + break; + node->slots[offset + i] = NULL; + node->count--; + if (exceptional) + node->exceptional--; + } +#endif +} + /** * __radix_tree_replace - replace item in a slot * @root: radix tree root @@ -1018,6 +1059,8 @@ void __radix_tree_replace(struct radix_tree_root *root, void **slot, void *item, radix_tree_update_node_t update_node, void *private) { + if (!item) + delete_sibling_entries(node, slot); /* * This function supports replacing exceptional entries and * deleting entries, but that needs accounting against the @@ -1794,20 +1837,6 @@ void __radix_tree_delete_node(struct radix_tree_root *root, delete_node(root, node, NULL, NULL); } -static inline void delete_sibling_entries(struct radix_tree_node *node, - void *ptr, unsigned offset) -{ -#ifdef CONFIG_RADIX_TREE_MULTIORDER - int i; - for (i = 1; offset + i < RADIX_TREE_MAP_SIZE; i++) { - if (node->slots[offset + i] != ptr) - break; - node->slots[offset + i] = NULL; - node->count--; - } -#endif -} - /** * radix_tree_delete_item - delete an item from a radix tree * @root: radix tree root @@ -1847,7 +1876,6 @@ void *radix_tree_delete_item(struct radix_tree_root *root, for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) node_tag_clear(root, node, tag, offset); - delete_sibling_entries(node, node_to_entry(slot), offset); __radix_tree_replace(root, node, slot, NULL, NULL, NULL); return entry; diff --git a/tools/testing/radix-tree/multiorder.c b/tools/testing/radix-tree/multiorder.c index 5421f015f46c..9757b8928bd4 100644 --- a/tools/testing/radix-tree/multiorder.c +++ b/tools/testing/radix-tree/multiorder.c @@ -410,8 +410,6 @@ static void __multiorder_split(int old_order, int new_order) RADIX_TREE(tree, GFP_ATOMIC); void **slot; struct radix_tree_iter iter; - struct radix_tree_node *node; - void *item; unsigned alloc; radix_tree_preload(GFP_KERNEL); @@ -434,58 +432,122 @@ static void __multiorder_split(int old_order, int new_order) radix_tree_preload_end(); item_kill_tree(&tree); +} + +static void __multiorder_split2(int old_order, int new_order) +{ + RADIX_TREE(tree, GFP_KERNEL); + void **slot; + struct radix_tree_iter iter; + struct radix_tree_node *node; + void *item; - radix_tree_preload(GFP_KERNEL); __radix_tree_insert(&tree, 0, old_order, (void *)0x12); - radix_tree_preload_end(); item = __radix_tree_lookup(&tree, 0, &node, NULL); assert(item == (void *)0x12); assert(node->exceptional > 0); - radix_tree_split_preload(old_order, new_order, GFP_KERNEL); radix_tree_split(&tree, 0, new_order); radix_tree_for_each_slot(slot, &tree, &iter, 0) { radix_tree_iter_replace(&tree, &iter, slot, item_create(iter.index, new_order)); } - radix_tree_preload_end(); item = __radix_tree_lookup(&tree, 0, &node, NULL); assert(item != (void *)0x12); assert(node->exceptional == 0); item_kill_tree(&tree); +} + +static void __multiorder_split3(int old_order, int new_order) +{ + RADIX_TREE(tree, GFP_KERNEL); + void **slot; + struct radix_tree_iter iter; + struct radix_tree_node *node; + void *item; - radix_tree_preload(GFP_KERNEL); __radix_tree_insert(&tree, 0, old_order, (void *)0x12); - radix_tree_preload_end(); item = __radix_tree_lookup(&tree, 0, &node, NULL); assert(item == (void *)0x12); assert(node->exceptional > 0); - radix_tree_split_preload(old_order, new_order, GFP_KERNEL); radix_tree_split(&tree, 0, new_order); radix_tree_for_each_slot(slot, &tree, &iter, 0) { radix_tree_iter_replace(&tree, &iter, slot, (void *)0x16); } - radix_tree_preload_end(); item = __radix_tree_lookup(&tree, 0, &node, NULL); assert(item == (void *)0x16); assert(node->exceptional > 0); item_kill_tree(&tree); + + __radix_tree_insert(&tree, 0, old_order, (void *)0x12); + + item = __radix_tree_lookup(&tree, 0, &node, NULL); + assert(item == (void *)0x12); + assert(node->exceptional > 0); + + radix_tree_split(&tree, 0, new_order); + radix_tree_for_each_slot(slot, &tree, &iter, 0) { + if (iter.index == (1 << new_order)) + radix_tree_iter_replace(&tree, &iter, slot, + (void *)0x16); + else + radix_tree_iter_replace(&tree, &iter, slot, NULL); + } + + item = __radix_tree_lookup(&tree, 1 << new_order, &node, NULL); + assert(item == (void *)0x16); + assert(node->count == node->exceptional); + do { + node = node->parent; + if (!node) + break; + assert(node->count == 1); + assert(node->exceptional == 0); + } while (1); + + item_kill_tree(&tree); } static void multiorder_split(void) { int i, j; - for (i = 9; i < 19; i++) - for (j = 0; j < i; j++) + for (i = 3; i < 11; i++) + for (j = 0; j < i; j++) { __multiorder_split(i, j); + __multiorder_split2(i, j); + __multiorder_split3(i, j); + } +} + +static void multiorder_account(void) +{ + RADIX_TREE(tree, GFP_KERNEL); + struct radix_tree_node *node; + void **slot; + + item_insert_order(&tree, 0, 5); + + __radix_tree_insert(&tree, 1 << 5, 5, (void *)0x12); + __radix_tree_lookup(&tree, 0, &node, NULL); + assert(node->count == node->exceptional * 2); + radix_tree_delete(&tree, 1 << 5); + assert(node->exceptional == 0); + + __radix_tree_insert(&tree, 1 << 5, 5, (void *)0x12); + __radix_tree_lookup(&tree, 1 << 5, &node, &slot); + assert(node->count == node->exceptional * 2); + __radix_tree_replace(&tree, node, slot, NULL, NULL, NULL); + assert(node->exceptional == 0); + + item_kill_tree(&tree); } void multiorder_checks(void) @@ -507,6 +569,7 @@ void multiorder_checks(void) multiorder_tagged_iteration(); multiorder_join(); multiorder_split(); + multiorder_account(); radix_tree_cpu_dead(0); } -- cgit v1.2.3-58-ga151 From 3e3cdc68bede179a957fcd6be7b833a83df4e5de Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Wed, 14 Dec 2016 15:09:10 -0800 Subject: radix tree test suite: check multiorder iteration The random iteration test only inserts order-0 entries currently. Update it to insert entries of order between 7 and 0. Also make the maximum index configurable, make some variables static, make the test duration variable, remove some useless spinning, and add a fifth thread which calls tag_tagged_items(). Link: http://lkml.kernel.org/r/1480369871-5271-62-git-send-email-mawilcox@linuxonhyperv.com Signed-off-by: Matthew Wilcox Tested-by: Kirill A. Shutemov Cc: Konstantin Khlebnikov Cc: Ross Zwisler Cc: Matthew Wilcox Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- tools/testing/radix-tree/iteration_check.c | 80 ++++++++++++++++++------------ tools/testing/radix-tree/main.c | 3 +- tools/testing/radix-tree/multiorder.c | 23 +++++++++ tools/testing/radix-tree/test.h | 2 +- 4 files changed, 73 insertions(+), 35 deletions(-) (limited to 'tools') diff --git a/tools/testing/radix-tree/iteration_check.c b/tools/testing/radix-tree/iteration_check.c index f328a66899b4..7572b7ed930e 100644 --- a/tools/testing/radix-tree/iteration_check.c +++ b/tools/testing/radix-tree/iteration_check.c @@ -16,26 +16,36 @@ #include #include "test.h" -#define NUM_THREADS 4 -#define TAG 0 +#define NUM_THREADS 5 +#define MAX_IDX 100 +#define TAG 0 +#define NEW_TAG 1 + static pthread_mutex_t tree_lock = PTHREAD_MUTEX_INITIALIZER; static pthread_t threads[NUM_THREADS]; static unsigned int seeds[3]; -RADIX_TREE(tree, GFP_KERNEL); -bool test_complete; +static RADIX_TREE(tree, GFP_KERNEL); +static bool test_complete; +static int max_order; /* relentlessly fill the tree with tagged entries */ static void *add_entries_fn(void *arg) { - int pgoff; - rcu_register_thread(); while (!test_complete) { - for (pgoff = 0; pgoff < 100; pgoff++) { + unsigned long pgoff; + int order; + + for (pgoff = 0; pgoff < MAX_IDX; pgoff++) { pthread_mutex_lock(&tree_lock); - if (item_insert(&tree, pgoff) == 0) - item_tag_set(&tree, pgoff, TAG); + for (order = max_order; order >= 0; order--) { + if (item_insert_order(&tree, pgoff, order) + == 0) { + item_tag_set(&tree, pgoff, TAG); + break; + } + } pthread_mutex_unlock(&tree_lock); } } @@ -62,14 +72,7 @@ static void *tagged_iteration_fn(void *arg) while (!test_complete) { rcu_read_lock(); radix_tree_for_each_tagged(slot, &tree, &iter, 0, TAG) { - void *entry; - int i; - - /* busy wait to let removals happen */ - for (i = 0; i < 1000000; i++) - ; - - entry = radix_tree_deref_slot(slot); + void *entry = radix_tree_deref_slot(slot); if (unlikely(!entry)) continue; @@ -110,14 +113,7 @@ static void *untagged_iteration_fn(void *arg) while (!test_complete) { rcu_read_lock(); radix_tree_for_each_slot(slot, &tree, &iter, 0) { - void *entry; - int i; - - /* busy wait to let removals happen */ - for (i = 0; i < 1000000; i++) - ; - - entry = radix_tree_deref_slot(slot); + void *entry = radix_tree_deref_slot(slot); if (unlikely(!entry)) continue; @@ -152,7 +148,7 @@ static void *remove_entries_fn(void *arg) while (!test_complete) { int pgoff; - pgoff = rand_r(&seeds[2]) % 100; + pgoff = rand_r(&seeds[2]) % MAX_IDX; pthread_mutex_lock(&tree_lock); item_delete(&tree, pgoff); @@ -164,36 +160,54 @@ static void *remove_entries_fn(void *arg) return NULL; } +static void *tag_entries_fn(void *arg) +{ + rcu_register_thread(); + + while (!test_complete) { + tag_tagged_items(&tree, &tree_lock, 0, MAX_IDX, 10, TAG, + NEW_TAG); + } + rcu_unregister_thread(); + return NULL; +} + /* This is a unit test for a bug found by the syzkaller tester */ -void iteration_test(void) +void iteration_test(unsigned order, unsigned test_duration) { int i; - printf("Running iteration tests for 10 seconds\n"); + printf("Running %siteration tests for %d seconds\n", + order > 0 ? "multiorder " : "", test_duration); + max_order = order; test_complete = false; for (i = 0; i < 3; i++) seeds[i] = rand(); if (pthread_create(&threads[0], NULL, tagged_iteration_fn, NULL)) { - perror("pthread_create"); + perror("create tagged iteration thread"); exit(1); } if (pthread_create(&threads[1], NULL, untagged_iteration_fn, NULL)) { - perror("pthread_create"); + perror("create untagged iteration thread"); exit(1); } if (pthread_create(&threads[2], NULL, add_entries_fn, NULL)) { - perror("pthread_create"); + perror("create add entry thread"); exit(1); } if (pthread_create(&threads[3], NULL, remove_entries_fn, NULL)) { - perror("pthread_create"); + perror("create remove entry thread"); + exit(1); + } + if (pthread_create(&threads[4], NULL, tag_entries_fn, NULL)) { + perror("create tag entry thread"); exit(1); } - sleep(10); + sleep(test_duration); test_complete = true; for (i = 0; i < NUM_THREADS; i++) { diff --git a/tools/testing/radix-tree/main.c b/tools/testing/radix-tree/main.c index 170175ca4105..f7e9801a6754 100644 --- a/tools/testing/radix-tree/main.c +++ b/tools/testing/radix-tree/main.c @@ -350,7 +350,8 @@ int main(int argc, char **argv) regression1_test(); regression2_test(); regression3_test(); - iteration_test(); + iteration_test(0, 10); + iteration_test(7, 20); single_thread_tests(long_run); /* Free any remaining preallocated nodes */ diff --git a/tools/testing/radix-tree/multiorder.c b/tools/testing/radix-tree/multiorder.c index 9757b8928bd4..08b4e16dc86f 100644 --- a/tools/testing/radix-tree/multiorder.c +++ b/tools/testing/radix-tree/multiorder.c @@ -75,8 +75,27 @@ static void __multiorder_tag_test(int index, int order) item_kill_tree(&tree); } +static void __multiorder_tag_test2(unsigned order, unsigned long index2) +{ + RADIX_TREE(tree, GFP_KERNEL); + unsigned long index = (1 << order); + index2 += index; + + assert(item_insert_order(&tree, 0, order) == 0); + assert(item_insert(&tree, index2) == 0); + + assert(radix_tree_tag_set(&tree, 0, 0)); + assert(radix_tree_tag_set(&tree, index2, 0)); + + assert(tag_tagged_items(&tree, NULL, 0, ~0UL, 10, 0, 1) == 2); + + item_kill_tree(&tree); +} + static void multiorder_tag_tests(void) { + int i, j; + /* test multi-order entry for indices 0-7 with no sibling pointers */ __multiorder_tag_test(0, 3); __multiorder_tag_test(5, 3); @@ -116,6 +135,10 @@ static void multiorder_tag_tests(void) __multiorder_tag_test(300, 8); __multiorder_tag_test(0x12345678UL, 8); + + for (i = 1; i < 10; i++) + for (j = 0; j < (10 << i); j++) + __multiorder_tag_test2(i, j); } static void multiorder_check(unsigned long index, int order) diff --git a/tools/testing/radix-tree/test.h b/tools/testing/radix-tree/test.h index 7c2611caa6d2..056a23b56467 100644 --- a/tools/testing/radix-tree/test.h +++ b/tools/testing/radix-tree/test.h @@ -32,7 +32,7 @@ unsigned long find_item(struct radix_tree_root *, void *item); void tag_check(void); void multiorder_checks(void); -void iteration_test(void); +void iteration_test(unsigned order, unsigned duration); void benchmark(void); struct item * -- cgit v1.2.3-58-ga151 From de1af8f62a78ea8abcc2dddb6de622e4069a368e Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Wed, 14 Dec 2016 15:09:25 -0800 Subject: radix tree test suite: add some more functionality IDR needs more functionality from the kernel: kmalloc()/kfree(), and xchg(). Link: http://lkml.kernel.org/r/1480369871-5271-67-git-send-email-mawilcox@linuxonhyperv.com Signed-off-by: Matthew Wilcox Tested-by: Kirill A. Shutemov Cc: Konstantin Khlebnikov Cc: Ross Zwisler Cc: Matthew Wilcox Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- tools/testing/radix-tree/linux.c | 15 +++++++++++++++ tools/testing/radix-tree/linux/kernel.h | 3 +++ tools/testing/radix-tree/linux/slab.h | 3 +++ 3 files changed, 21 insertions(+) (limited to 'tools') diff --git a/tools/testing/radix-tree/linux.c b/tools/testing/radix-tree/linux.c index 1f32a16a3848..ff0452e8a0c4 100644 --- a/tools/testing/radix-tree/linux.c +++ b/tools/testing/radix-tree/linux.c @@ -54,6 +54,21 @@ void kmem_cache_free(struct kmem_cache *cachep, void *objp) free(objp); } +void *kmalloc(size_t size, gfp_t gfp) +{ + void *ret = malloc(size); + uatomic_inc(&nr_allocated); + return ret; +} + +void kfree(void *p) +{ + if (!p) + return; + uatomic_dec(&nr_allocated); + free(p); +} + struct kmem_cache * kmem_cache_create(const char *name, size_t size, size_t offset, unsigned long flags, void (*ctor)(void *)) diff --git a/tools/testing/radix-tree/linux/kernel.h b/tools/testing/radix-tree/linux/kernel.h index 23e77f5673ce..9b43b4975d83 100644 --- a/tools/testing/radix-tree/linux/kernel.h +++ b/tools/testing/radix-tree/linux/kernel.h @@ -8,6 +8,7 @@ #include #include "../../include/linux/compiler.h" +#include "../../include/linux/err.h" #include "../../../include/linux/kconfig.h" #ifdef BENCHMARK @@ -58,4 +59,6 @@ static inline int in_interrupt(void) #define round_up(x, y) ((((x)-1) | __round_mask(x, y))+1) #define round_down(x, y) ((x) & ~__round_mask(x, y)) +#define xchg(ptr, x) uatomic_xchg(ptr, x) + #endif /* _KERNEL_H */ diff --git a/tools/testing/radix-tree/linux/slab.h b/tools/testing/radix-tree/linux/slab.h index 452e2bf502e3..446639f78fc1 100644 --- a/tools/testing/radix-tree/linux/slab.h +++ b/tools/testing/radix-tree/linux/slab.h @@ -7,6 +7,9 @@ #define SLAB_PANIC 2 #define SLAB_RECLAIM_ACCOUNT 0x00020000UL /* Objects are reclaimable */ +void *kmalloc(size_t size, gfp_t); +void kfree(void *); + struct kmem_cache { int size; void (*ctor)(void *); -- cgit v1.2.3-58-ga151 From bbe9d71f2c545398987a6fea5090a6ca76f4a8dc Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Wed, 14 Dec 2016 15:09:28 -0800 Subject: radix tree test suite: cache recently freed objects The kmem_cache_alloc implementation simply allocates new memory from malloc() and calls the ctor, which zeroes out the entire object. This means it cannot spot bugs where the object isn't properly reinitialised before being freed. Add a small (11 objects) cache before freeing objects back to malloc. This is enough to let us write a test to catch it, although the memory allocator is now aware of the structure of the radix tree node, since it chains free objects through ->private_data (like the percpu cache does). Link: http://lkml.kernel.org/r/1481667692-14500-2-git-send-email-mawilcox@linuxonhyperv.com Signed-off-by: Matthew Wilcox Cc: Kirill A. Shutemov Cc: Konstantin Khlebnikov Cc: Ross Zwisler Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- tools/testing/radix-tree/linux.c | 48 ++++++++++++++++++++++++++++++----- tools/testing/radix-tree/linux/slab.h | 5 ---- 2 files changed, 41 insertions(+), 12 deletions(-) (limited to 'tools') diff --git a/tools/testing/radix-tree/linux.c b/tools/testing/radix-tree/linux.c index ff0452e8a0c4..d31ea7c9abec 100644 --- a/tools/testing/radix-tree/linux.c +++ b/tools/testing/radix-tree/linux.c @@ -1,16 +1,27 @@ #include #include #include +#include #include #include #include +#include #include +#include #include int nr_allocated; int preempt_count; +struct kmem_cache { + pthread_mutex_t lock; + int size; + int nr_objs; + void *objs; + void (*ctor)(void *); +}; + void *mempool_alloc(mempool_t *pool, int gfp_mask) { return pool->alloc(gfp_mask, pool->data); @@ -34,24 +45,44 @@ mempool_t *mempool_create(int min_nr, mempool_alloc_t *alloc_fn, void *kmem_cache_alloc(struct kmem_cache *cachep, int flags) { - void *ret; + struct radix_tree_node *node; if (flags & __GFP_NOWARN) return NULL; - ret = malloc(cachep->size); - if (cachep->ctor) - cachep->ctor(ret); + pthread_mutex_lock(&cachep->lock); + if (cachep->nr_objs) { + cachep->nr_objs--; + node = cachep->objs; + cachep->objs = node->private_data; + pthread_mutex_unlock(&cachep->lock); + node->private_data = NULL; + } else { + pthread_mutex_unlock(&cachep->lock); + node = malloc(cachep->size); + if (cachep->ctor) + cachep->ctor(node); + } + uatomic_inc(&nr_allocated); - return ret; + return node; } void kmem_cache_free(struct kmem_cache *cachep, void *objp) { assert(objp); uatomic_dec(&nr_allocated); - memset(objp, 0, cachep->size); - free(objp); + pthread_mutex_lock(&cachep->lock); + if (cachep->nr_objs > 10) { + memset(objp, POISON_FREE, cachep->size); + free(objp); + } else { + struct radix_tree_node *node = objp; + cachep->nr_objs++; + node->private_data = cachep->objs; + cachep->objs = node; + } + pthread_mutex_unlock(&cachep->lock); } void *kmalloc(size_t size, gfp_t gfp) @@ -75,7 +106,10 @@ kmem_cache_create(const char *name, size_t size, size_t offset, { struct kmem_cache *ret = malloc(sizeof(*ret)); + pthread_mutex_init(&ret->lock, NULL); ret->size = size; + ret->nr_objs = 0; + ret->objs = NULL; ret->ctor = ctor; return ret; } diff --git a/tools/testing/radix-tree/linux/slab.h b/tools/testing/radix-tree/linux/slab.h index 446639f78fc1..e40337f41a38 100644 --- a/tools/testing/radix-tree/linux/slab.h +++ b/tools/testing/radix-tree/linux/slab.h @@ -10,11 +10,6 @@ void *kmalloc(size_t size, gfp_t); void kfree(void *); -struct kmem_cache { - int size; - void (*ctor)(void *); -}; - void *kmem_cache_alloc(struct kmem_cache *cachep, int flags); void kmem_cache_free(struct kmem_cache *cachep, void *objp); -- cgit v1.2.3-58-ga151 From e8de4340767dd002978c285e3adddaeda8ac652c Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Wed, 14 Dec 2016 15:09:31 -0800 Subject: radix-tree: ensure counts are initialised radix_tree_join() was freeing nodes with a non-zero ->exceptional count, and radix_tree_split() wasn't zeroing ->exceptional when it allocated the new node. Fix this by making all callers of radix_tree_node_alloc() pass in the new counts (and some other always-initialised fields), which will prevent the problem recurring if in future we decide to do something similar. Link: http://lkml.kernel.org/r/1481667692-14500-3-git-send-email-mawilcox@linuxonhyperv.com Signed-off-by: Matthew Wilcox Cc: Kirill A. Shutemov Cc: Konstantin Khlebnikov Cc: Ross Zwisler Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/radix-tree.c | 41 ++++++++++++++++--------------- tools/testing/radix-tree/multiorder.c | 45 +++++++++++++++++++++++++++++++---- 2 files changed, 61 insertions(+), 25 deletions(-) (limited to 'tools') diff --git a/lib/radix-tree.c b/lib/radix-tree.c index d09c17dd60ae..0019aca0f328 100644 --- a/lib/radix-tree.c +++ b/lib/radix-tree.c @@ -288,7 +288,10 @@ static void radix_tree_dump(struct radix_tree_root *root) * that the caller has pinned this thread of control to the current CPU. */ static struct radix_tree_node * -radix_tree_node_alloc(struct radix_tree_root *root) +radix_tree_node_alloc(struct radix_tree_root *root, + struct radix_tree_node *parent, + unsigned int shift, unsigned int offset, + unsigned int count, unsigned int exceptional) { struct radix_tree_node *ret = NULL; gfp_t gfp_mask = root_gfp_mask(root); @@ -333,6 +336,13 @@ radix_tree_node_alloc(struct radix_tree_root *root) ret = kmem_cache_alloc(radix_tree_node_cachep, gfp_mask); out: BUG_ON(radix_tree_is_internal_node(ret)); + if (ret) { + ret->parent = parent; + ret->shift = shift; + ret->offset = offset; + ret->count = count; + ret->exceptional = exceptional; + } return ret; } @@ -538,8 +548,8 @@ static int radix_tree_extend(struct radix_tree_root *root, goto out; do { - struct radix_tree_node *node = radix_tree_node_alloc(root); - + struct radix_tree_node *node = radix_tree_node_alloc(root, + NULL, shift, 0, 1, 0); if (!node) return -ENOMEM; @@ -550,16 +560,11 @@ static int radix_tree_extend(struct radix_tree_root *root, } BUG_ON(shift > BITS_PER_LONG); - node->shift = shift; - node->offset = 0; - node->count = 1; - node->parent = NULL; if (radix_tree_is_internal_node(slot)) { entry_to_node(slot)->parent = node; - } else { + } else if (radix_tree_exceptional_entry(slot)) { /* Moving an exceptional root->rnode to a node */ - if (radix_tree_exceptional_entry(slot)) - node->exceptional = 1; + node->exceptional = 1; } node->slots[0] = slot; slot = node_to_entry(node); @@ -712,14 +717,10 @@ int __radix_tree_create(struct radix_tree_root *root, unsigned long index, shift -= RADIX_TREE_MAP_SHIFT; if (child == NULL) { /* Have to add a child node. */ - child = radix_tree_node_alloc(root); + child = radix_tree_node_alloc(root, node, shift, + offset, 0, 0); if (!child) return -ENOMEM; - child->shift = shift; - child->offset = offset; - child->count = 0; - child->exceptional = 0; - child->parent = node; rcu_assign_pointer(*slot, node_to_entry(child)); if (node) node->count++; @@ -1209,13 +1210,11 @@ int radix_tree_split(struct radix_tree_root *root, unsigned long index, for (;;) { if (node->shift > order) { - child = radix_tree_node_alloc(root); + child = radix_tree_node_alloc(root, node, + node->shift - RADIX_TREE_MAP_SHIFT, + offset, 0, 0); if (!child) goto nomem; - child->shift = node->shift - RADIX_TREE_MAP_SHIFT; - child->offset = offset; - child->count = 0; - child->parent = node; if (node != parent) { node->count++; node->slots[offset] = node_to_entry(child); diff --git a/tools/testing/radix-tree/multiorder.c b/tools/testing/radix-tree/multiorder.c index 08b4e16dc86f..f79812a5e070 100644 --- a/tools/testing/radix-tree/multiorder.c +++ b/tools/testing/radix-tree/multiorder.c @@ -355,7 +355,7 @@ void multiorder_tagged_iteration(void) item_kill_tree(&tree); } -static void __multiorder_join(unsigned long index, +static void multiorder_join1(unsigned long index, unsigned order1, unsigned order2) { unsigned long loc; @@ -373,7 +373,7 @@ static void __multiorder_join(unsigned long index, item_kill_tree(&tree); } -static void __multiorder_join2(unsigned order1, unsigned order2) +static void multiorder_join2(unsigned order1, unsigned order2) { RADIX_TREE(tree, GFP_KERNEL); struct radix_tree_node *node; @@ -393,6 +393,39 @@ static void __multiorder_join2(unsigned order1, unsigned order2) item_kill_tree(&tree); } +/* + * This test revealed an accounting bug for exceptional entries at one point. + * Nodes were being freed back into the pool with an elevated exception count + * by radix_tree_join() and then radix_tree_split() was failing to zero the + * count of exceptional entries. + */ +static void multiorder_join3(unsigned int order) +{ + RADIX_TREE(tree, GFP_KERNEL); + struct radix_tree_node *node; + void **slot; + struct radix_tree_iter iter; + unsigned long i; + + for (i = 0; i < (1 << order); i++) { + radix_tree_insert(&tree, i, (void *)0x12UL); + } + + radix_tree_join(&tree, 0, order, (void *)0x16UL); + rcu_barrier(); + + radix_tree_split(&tree, 0, 0); + + radix_tree_for_each_slot(slot, &tree, &iter, 0) { + radix_tree_iter_replace(&tree, &iter, slot, (void *)0x12UL); + } + + __radix_tree_lookup(&tree, 0, &node, NULL); + assert(node->exceptional == node->count); + + item_kill_tree(&tree); +} + static void multiorder_join(void) { int i, j, idx; @@ -400,16 +433,20 @@ static void multiorder_join(void) for (idx = 0; idx < 1024; idx = idx * 2 + 3) { for (i = 1; i < 15; i++) { for (j = 0; j < i; j++) { - __multiorder_join(idx, i, j); + multiorder_join1(idx, i, j); } } } for (i = 1; i < 15; i++) { for (j = 0; j < i; j++) { - __multiorder_join2(i, j); + multiorder_join2(i, j); } } + + for (i = 3; i < 10; i++) { + multiorder_join3(i); + } } static void check_mem(unsigned old_order, unsigned new_order, unsigned alloc) -- cgit v1.2.3-58-ga151 From 092bc0b225a9bdb6dca0959ddd6826afb6d40747 Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Wed, 14 Dec 2016 15:09:34 -0800 Subject: radix tree test suite: add new tag check We have a check that setting a tag on a single entry at root succeeds, but we were missing a check that clearing a tag on that same entry also succeeds. Link: http://lkml.kernel.org/r/1481667692-14500-4-git-send-email-mawilcox@linuxonhyperv.com Signed-off-by: Matthew Wilcox Cc: Kirill A. Shutemov Cc: Konstantin Khlebnikov Cc: Ross Zwisler Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- tools/testing/radix-tree/tag_check.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'tools') diff --git a/tools/testing/radix-tree/tag_check.c b/tools/testing/radix-tree/tag_check.c index ed5f87d1dce2..fd98c132207a 100644 --- a/tools/testing/radix-tree/tag_check.c +++ b/tools/testing/radix-tree/tag_check.c @@ -324,6 +324,9 @@ static void single_check(void) assert(ret == 1); ret = radix_tree_gang_lookup_tag(&tree, (void **)items, 0, BATCH, 1); assert(ret == 1); + item_tag_clear(&tree, 0, 0); + ret = radix_tree_gang_lookup_tag(&tree, (void **)items, 0, BATCH, 0); + assert(ret == 0); item_kill_tree(&tree); } -- cgit v1.2.3-58-ga151 From e1e14ab8411df344a17687821f8f78f0a1e73cbb Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Wed, 14 Dec 2016 15:09:36 -0800 Subject: radix tree test suite: delete unused rcupdate.c This file was used to implement call_rcu() before liburcu implemented that function. It hasn't even been compiled since before the test suite was added to the kernel. Remove it to reduce confusion. Link: http://lkml.kernel.org/r/1481667692-14500-5-git-send-email-mawilcox@linuxonhyperv.com Signed-off-by: Matthew Wilcox Cc: Kirill A. Shutemov Cc: Konstantin Khlebnikov Cc: Ross Zwisler Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- tools/testing/radix-tree/rcupdate.c | 86 ------------------------------------- 1 file changed, 86 deletions(-) delete mode 100644 tools/testing/radix-tree/rcupdate.c (limited to 'tools') diff --git a/tools/testing/radix-tree/rcupdate.c b/tools/testing/radix-tree/rcupdate.c deleted file mode 100644 index 31a2d14225d6..000000000000 --- a/tools/testing/radix-tree/rcupdate.c +++ /dev/null @@ -1,86 +0,0 @@ -#include -#include -#include -#include - -static pthread_mutex_t rculock = PTHREAD_MUTEX_INITIALIZER; -static struct rcu_head *rcuhead_global = NULL; -static __thread int nr_rcuhead = 0; -static __thread struct rcu_head *rcuhead = NULL; -static __thread struct rcu_head *rcutail = NULL; - -static pthread_cond_t rcu_worker_cond = PTHREAD_COND_INITIALIZER; - -/* switch to urcu implementation when it is merged. */ -void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *head)) -{ - head->func = func; - head->next = rcuhead; - rcuhead = head; - if (!rcutail) - rcutail = head; - nr_rcuhead++; - if (nr_rcuhead >= 1000) { - int signal = 0; - - pthread_mutex_lock(&rculock); - if (!rcuhead_global) - signal = 1; - rcutail->next = rcuhead_global; - rcuhead_global = head; - pthread_mutex_unlock(&rculock); - - nr_rcuhead = 0; - rcuhead = NULL; - rcutail = NULL; - - if (signal) { - pthread_cond_signal(&rcu_worker_cond); - } - } -} - -static void *rcu_worker(void *arg) -{ - struct rcu_head *r; - - rcupdate_thread_init(); - - while (1) { - pthread_mutex_lock(&rculock); - while (!rcuhead_global) { - pthread_cond_wait(&rcu_worker_cond, &rculock); - } - r = rcuhead_global; - rcuhead_global = NULL; - - pthread_mutex_unlock(&rculock); - - synchronize_rcu(); - - while (r) { - struct rcu_head *tmp = r->next; - r->func(r); - r = tmp; - } - } - - rcupdate_thread_exit(); - - return NULL; -} - -static pthread_t worker_thread; -void rcupdate_init(void) -{ - pthread_create(&worker_thread, NULL, rcu_worker, NULL); -} - -void rcupdate_thread_init(void) -{ - rcu_register_thread(); -} -void rcupdate_thread_exit(void) -{ - rcu_unregister_thread(); -} -- cgit v1.2.3-58-ga151