summaryrefslogtreecommitdiff
path: root/include/linux/objpool.h
diff options
context:
space:
mode:
authorAndrii Nakryiko <andrii@kernel.org>2024-04-24 14:52:14 -0700
committerMasami Hiramatsu (Google) <mhiramat@kernel.org>2024-05-01 23:18:48 +0900
commit78d0b16127daa26d016c215a089ae330878291f7 (patch)
treed9fef1a56441ea32fd2e5cae6d6fb39b71be0350 /include/linux/objpool.h
parenta3b00f10da808bd4a354f890b551cba471082d0e (diff)
objpool: cache nr_possible_cpus() and avoid caching nr_cpu_ids
Profiling shows that calling nr_possible_cpus() in objpool_pop() takes a noticeable amount of CPU (when profiled on 80-core machine), as we need to recalculate number of set bits in a CPU bit mask. This number can't change, so there is no point in paying the price for recalculating it. As such, cache this value in struct objpool_head and use it in objpool_pop(). On the other hand, cached pool->nr_cpus isn't necessary, as it's not used in hot path and is also a pretty trivial value to retrieve. So drop pool->nr_cpus in favor of using nr_cpu_ids everywhere. This way the size of struct objpool_head remains the same, which is a nice bonus. Same BPF selftests benchmarks were used to evaluate the effect. Using changes in previous patch (inlining of objpool_pop/objpool_push) as baseline, here are the differences: BASELINE ======== kretprobe : 9.937 ± 0.174M/s kretprobe-multi: 10.440 ± 0.108M/s AFTER ===== kretprobe : 10.106 ± 0.120M/s (+1.7%) kretprobe-multi: 10.515 ± 0.180M/s (+0.7%) Link: https://lore.kernel.org/all/20240424215214.3956041-3-andrii@kernel.org/ Cc: Matt (Qiang) Wu <wuqiang.matt@bytedance.com> Signed-off-by: Andrii Nakryiko <andrii@kernel.org> Signed-off-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
Diffstat (limited to 'include/linux/objpool.h')
-rw-r--r--include/linux/objpool.h6
1 files changed, 3 insertions, 3 deletions
diff --git a/include/linux/objpool.h b/include/linux/objpool.h
index d8b1f7b91128..cb1758eaa2d3 100644
--- a/include/linux/objpool.h
+++ b/include/linux/objpool.h
@@ -73,7 +73,7 @@ typedef int (*objpool_fini_cb)(struct objpool_head *head, void *context);
* struct objpool_head - object pooling metadata
* @obj_size: object size, aligned to sizeof(void *)
* @nr_objs: total objs (to be pre-allocated with objpool)
- * @nr_cpus: local copy of nr_cpu_ids
+ * @nr_possible_cpus: cached value of num_possible_cpus()
* @capacity: max objs can be managed by one objpool_slot
* @gfp: gfp flags for kmalloc & vmalloc
* @ref: refcount of objpool
@@ -85,7 +85,7 @@ typedef int (*objpool_fini_cb)(struct objpool_head *head, void *context);
struct objpool_head {
int obj_size;
int nr_objs;
- int nr_cpus;
+ int nr_possible_cpus;
int capacity;
gfp_t gfp;
refcount_t ref;
@@ -176,7 +176,7 @@ static inline void *objpool_pop(struct objpool_head *pool)
raw_local_irq_save(flags);
cpu = raw_smp_processor_id();
- for (i = 0; i < num_possible_cpus(); i++) {
+ for (i = 0; i < pool->nr_possible_cpus; i++) {
obj = __objpool_try_get_slot(pool, cpu);
if (obj)
break;