summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAlexei Starovoitov <ast@kernel.org>2022-09-02 14:10:49 -0700
committerDaniel Borkmann <daniel@iogearbox.net>2022-09-05 15:33:06 +0200
commit0fd7c5d43339b783ee3301a05f925d1e52ac87c9 (patch)
treeb19c7f9b83221183677889fd7644b11549453de3
parent86fe28f7692d96d20232af0fc6d7632d5cc89a01 (diff)
bpf: Optimize call_rcu in non-preallocated hash map.
Doing call_rcu() million times a second becomes a bottle neck. Convert non-preallocated hash map from call_rcu to SLAB_TYPESAFE_BY_RCU. The rcu critical section is no longer observed for one htab element which makes non-preallocated hash map behave just like preallocated hash map. The map elements are released back to kernel memory after observing rcu critical section. This improves 'map_perf_test 4' performance from 100k events per second to 250k events per second. bpf_mem_alloc + percpu_counter + typesafe_by_rcu provide 10x performance boost to non-preallocated hash map and make it within few % of preallocated map while consuming fraction of memory. Signed-off-by: Alexei Starovoitov <ast@kernel.org> Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> Acked-by: Kumar Kartikeya Dwivedi <memxor@gmail.com> Acked-by: Andrii Nakryiko <andrii@kernel.org> Link: https://lore.kernel.org/bpf/20220902211058.60789-8-alexei.starovoitov@gmail.com
-rw-r--r--kernel/bpf/hashtab.c8
-rw-r--r--kernel/bpf/memalloc.c2
-rw-r--r--tools/testing/selftests/bpf/progs/timer.c11
3 files changed, 7 insertions, 14 deletions
diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c
index 36aa16dc43ad..0d888a90a805 100644
--- a/kernel/bpf/hashtab.c
+++ b/kernel/bpf/hashtab.c
@@ -953,8 +953,12 @@ static void free_htab_elem(struct bpf_htab *htab, struct htab_elem *l)
__pcpu_freelist_push(&htab->freelist, &l->fnode);
} else {
dec_elem_count(htab);
- l->htab = htab;
- call_rcu(&l->rcu, htab_elem_free_rcu);
+ if (htab->map.map_type == BPF_MAP_TYPE_PERCPU_HASH) {
+ l->htab = htab;
+ call_rcu(&l->rcu, htab_elem_free_rcu);
+ } else {
+ htab_elem_free(htab, l);
+ }
}
}
diff --git a/kernel/bpf/memalloc.c b/kernel/bpf/memalloc.c
index 1c46763d855e..da0721f8c28f 100644
--- a/kernel/bpf/memalloc.c
+++ b/kernel/bpf/memalloc.c
@@ -281,7 +281,7 @@ int bpf_mem_alloc_init(struct bpf_mem_alloc *ma, int size)
return -ENOMEM;
size += LLIST_NODE_SZ; /* room for llist_node */
snprintf(buf, sizeof(buf), "bpf-%u", size);
- kmem_cache = kmem_cache_create(buf, size, 8, 0, NULL);
+ kmem_cache = kmem_cache_create(buf, size, 8, SLAB_TYPESAFE_BY_RCU, NULL);
if (!kmem_cache) {
free_percpu(pc);
return -ENOMEM;
diff --git a/tools/testing/selftests/bpf/progs/timer.c b/tools/testing/selftests/bpf/progs/timer.c
index 5f5309791649..0053c5402173 100644
--- a/tools/testing/selftests/bpf/progs/timer.c
+++ b/tools/testing/selftests/bpf/progs/timer.c
@@ -208,17 +208,6 @@ static int timer_cb2(void *map, int *key, struct hmap_elem *val)
*/
bpf_map_delete_elem(map, key);
- /* in non-preallocated hashmap both 'key' and 'val' are RCU
- * protected and still valid though this element was deleted
- * from the map. Arm this timer for ~35 seconds. When callback
- * finishes the call_rcu will invoke:
- * htab_elem_free_rcu
- * check_and_free_timer
- * bpf_timer_cancel_and_free
- * to cancel this 35 second sleep and delete the timer for real.
- */
- if (bpf_timer_start(&val->timer, 1ull << 35, 0) != 0)
- err |= 256;
ok |= 4;
}
return 0;