summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPavel Begunkov <asml.silence@gmail.com>2022-07-12 21:52:39 +0100
committerJens Axboe <axboe@kernel.dk>2022-07-24 18:41:06 -0600
commiteb4a299b2f95437af6183946c2a2e850621cefdb (patch)
tree16bd25d7e2ca9aab186eae6e517425c4da8e9b96
parenteb42cebb2cf24c48f60c32856a4bba93d42659c8 (diff)
io_uring: cache struct io_notif
kmalloc'ing struct io_notif is too expensive when done frequently, cache them as many other resources in io_uring. Keep two list, the first one is from where we're getting notifiers, it's protected by ->uring_lock. The second is protected by ->completion_lock, to which we queue released notifiers. Then we splice one list into another when needed. Signed-off-by: Pavel Begunkov <asml.silence@gmail.com> Link: https://lore.kernel.org/r/9dec18f7fcbab9f4bd40b96e5ae158b119945230.1657643355.git.asml.silence@gmail.com Signed-off-by: Jens Axboe <axboe@kernel.dk>
-rw-r--r--include/linux/io_uring_types.h7
-rw-r--r--io_uring/io_uring.c3
-rw-r--r--io_uring/notif.c57
-rw-r--r--io_uring/notif.h5
4 files changed, 65 insertions, 7 deletions
diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h
index f7fab3758cb9..144493cbadb5 100644
--- a/include/linux/io_uring_types.h
+++ b/include/linux/io_uring_types.h
@@ -249,6 +249,9 @@ struct io_ring_ctx {
struct xarray io_bl_xa;
struct list_head io_buffers_cache;
+ /* struct io_notif cache, protected by uring_lock */
+ struct list_head notif_list;
+
struct io_hash_table cancel_table_locked;
struct list_head cq_overflow_list;
struct io_alloc_cache apoll_cache;
@@ -259,6 +262,10 @@ struct io_ring_ctx {
struct io_wq_work_list locked_free_list;
unsigned int locked_free_nr;
+ /* struct io_notif cache protected by completion_lock */
+ struct list_head notif_list_locked;
+ unsigned int notif_locked_nr;
+
const struct cred *sq_creds; /* cred used for __io_sq_thread() */
struct io_sq_data *sq_data; /* if using sq thread polling */
diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
index 65ac407dd74e..20e65d45ca1c 100644
--- a/io_uring/io_uring.c
+++ b/io_uring/io_uring.c
@@ -321,6 +321,8 @@ static __cold struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p)
INIT_WQ_LIST(&ctx->locked_free_list);
INIT_DELAYED_WORK(&ctx->fallback_work, io_fallback_req_func);
INIT_WQ_LIST(&ctx->submit_state.compl_reqs);
+ INIT_LIST_HEAD(&ctx->notif_list);
+ INIT_LIST_HEAD(&ctx->notif_list_locked);
return ctx;
err:
kfree(ctx->dummy_ubuf);
@@ -2493,6 +2495,7 @@ static __cold void io_ring_ctx_free(struct io_ring_ctx *ctx)
WARN_ON_ONCE(!list_empty(&ctx->ltimeout_list));
WARN_ON_ONCE(ctx->notif_slots || ctx->nr_notif_slots);
+ io_notif_cache_purge(ctx);
io_mem_free(ctx->rings);
io_mem_free(ctx->sq_sqes);
diff --git a/io_uring/notif.c b/io_uring/notif.c
index 6ee948af6a49..b257db2120b4 100644
--- a/io_uring/notif.c
+++ b/io_uring/notif.c
@@ -15,10 +15,12 @@ static void __io_notif_complete_tw(struct callback_head *cb)
io_cq_lock(ctx);
io_fill_cqe_aux(ctx, notif->tag, 0, notif->seq, true);
+
+ list_add(&notif->cache_node, &ctx->notif_list_locked);
+ ctx->notif_locked_nr++;
io_cq_unlock_post(ctx);
percpu_ref_put(&ctx->refs);
- kfree(notif);
}
static inline void io_notif_complete(struct io_notif *notif)
@@ -45,21 +47,62 @@ static void io_uring_tx_zerocopy_callback(struct sk_buff *skb,
queue_work(system_unbound_wq, &notif->commit_work);
}
+static void io_notif_splice_cached(struct io_ring_ctx *ctx)
+ __must_hold(&ctx->uring_lock)
+{
+ spin_lock(&ctx->completion_lock);
+ list_splice_init(&ctx->notif_list_locked, &ctx->notif_list);
+ ctx->notif_locked_nr = 0;
+ spin_unlock(&ctx->completion_lock);
+}
+
+void io_notif_cache_purge(struct io_ring_ctx *ctx)
+ __must_hold(&ctx->uring_lock)
+{
+ io_notif_splice_cached(ctx);
+
+ while (!list_empty(&ctx->notif_list)) {
+ struct io_notif *notif = list_first_entry(&ctx->notif_list,
+ struct io_notif, cache_node);
+
+ list_del(&notif->cache_node);
+ kfree(notif);
+ }
+}
+
+static inline bool io_notif_has_cached(struct io_ring_ctx *ctx)
+ __must_hold(&ctx->uring_lock)
+{
+ if (likely(!list_empty(&ctx->notif_list)))
+ return true;
+ if (data_race(READ_ONCE(ctx->notif_locked_nr) <= IO_NOTIF_SPLICE_BATCH))
+ return false;
+ io_notif_splice_cached(ctx);
+ return !list_empty(&ctx->notif_list);
+}
+
struct io_notif *io_alloc_notif(struct io_ring_ctx *ctx,
struct io_notif_slot *slot)
__must_hold(&ctx->uring_lock)
{
struct io_notif *notif;
- notif = kzalloc(sizeof(*notif), GFP_ATOMIC | __GFP_ACCOUNT);
- if (!notif)
- return NULL;
+ if (likely(io_notif_has_cached(ctx))) {
+ notif = list_first_entry(&ctx->notif_list,
+ struct io_notif, cache_node);
+ list_del(&notif->cache_node);
+ } else {
+ notif = kzalloc(sizeof(*notif), GFP_ATOMIC | __GFP_ACCOUNT);
+ if (!notif)
+ return NULL;
+ /* pre-initialise some fields */
+ notif->ctx = ctx;
+ notif->uarg.flags = SKBFL_ZEROCOPY_FRAG | SKBFL_DONT_ORPHAN;
+ notif->uarg.callback = io_uring_tx_zerocopy_callback;
+ }
notif->seq = slot->seq++;
notif->tag = slot->tag;
- notif->ctx = ctx;
- notif->uarg.flags = SKBFL_ZEROCOPY_FRAG | SKBFL_DONT_ORPHAN;
- notif->uarg.callback = io_uring_tx_zerocopy_callback;
/* master ref owned by io_notif_slot, will be dropped on flush */
refcount_set(&notif->uarg.refcnt, 1);
percpu_ref_get(&ctx->refs);
diff --git a/io_uring/notif.h b/io_uring/notif.h
index 3d7a1d242e17..b23c9c0515bb 100644
--- a/io_uring/notif.h
+++ b/io_uring/notif.h
@@ -5,6 +5,8 @@
#include <net/sock.h>
#include <linux/nospec.h>
+#define IO_NOTIF_SPLICE_BATCH 32
+
struct io_notif {
struct ubuf_info uarg;
struct io_ring_ctx *ctx;
@@ -13,6 +15,8 @@ struct io_notif {
u64 tag;
/* see struct io_notif_slot::seq */
u32 seq;
+ /* hook into ctx->notif_list and ctx->notif_list_locked */
+ struct list_head cache_node;
union {
struct callback_head task_work;
@@ -41,6 +45,7 @@ struct io_notif_slot {
};
int io_notif_unregister(struct io_ring_ctx *ctx);
+void io_notif_cache_purge(struct io_ring_ctx *ctx);
struct io_notif *io_alloc_notif(struct io_ring_ctx *ctx,
struct io_notif_slot *slot);