summaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
authorJens Axboe <axboe@kernel.dk>2021-06-17 10:19:54 -0600
committerJens Axboe <axboe@kernel.dk>2021-06-17 10:25:50 -0600
commitfe76421d1da1dcdb3a2cd8428ac40106bff28bc0 (patch)
treedc590c0c33f23e65a0e11cceb7be870944d0360a /fs
parent0e03496d1967abf1ebb151a24318c07d07f41f7f (diff)
io_uring: allow user configurable IO thread CPU affinity
io-wq defaults to per-node masks for IO workers. This works fine by default, but isn't particularly handy for workloads that prefer more specific affinities, for either performance or isolation reasons. This adds IORING_REGISTER_IOWQ_AFF that allows the user to pass in a CPU mask that is then applied to IO thread workers, and an IORING_UNREGISTER_IOWQ_AFF that simply resets the masks back to the default of per-node. Note that no care is given to existing IO threads, they will need to go through a reschedule before the affinity is correct if they are already running or sleeping. Signed-off-by: Jens Axboe <axboe@kernel.dk>
Diffstat (limited to 'fs')
-rw-r--r--fs/io-wq.c17
-rw-r--r--fs/io-wq.h2
-rw-r--r--fs/io_uring.c51
3 files changed, 70 insertions, 0 deletions
diff --git a/fs/io-wq.c b/fs/io-wq.c
index 2af8e1df4646..bb4d3ee9592e 100644
--- a/fs/io-wq.c
+++ b/fs/io-wq.c
@@ -1087,6 +1087,23 @@ static int io_wq_cpu_offline(unsigned int cpu, struct hlist_node *node)
return __io_wq_cpu_online(wq, cpu, false);
}
+int io_wq_cpu_affinity(struct io_wq *wq, cpumask_var_t mask)
+{
+ int i;
+
+ rcu_read_lock();
+ for_each_node(i) {
+ struct io_wqe *wqe = wq->wqes[i];
+
+ if (mask)
+ cpumask_copy(wqe->cpu_mask, mask);
+ else
+ cpumask_copy(wqe->cpu_mask, cpumask_of_node(i));
+ }
+ rcu_read_unlock();
+ return 0;
+}
+
static __init int io_wq_init(void)
{
int ret;
diff --git a/fs/io-wq.h b/fs/io-wq.h
index af2df0680ee2..02299cdcf55c 100644
--- a/fs/io-wq.h
+++ b/fs/io-wq.h
@@ -128,6 +128,8 @@ void io_wq_put_and_exit(struct io_wq *wq);
void io_wq_enqueue(struct io_wq *wq, struct io_wq_work *work);
void io_wq_hash_work(struct io_wq_work *work, void *val);
+int io_wq_cpu_affinity(struct io_wq *wq, cpumask_var_t mask);
+
static inline bool io_wq_is_hashed(struct io_wq_work *work)
{
return work->flags & IO_WQ_WORK_HASHED;
diff --git a/fs/io_uring.c b/fs/io_uring.c
index d916eb2cef09..46a25a7cb70a 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -9983,6 +9983,43 @@ static int io_register_rsrc(struct io_ring_ctx *ctx, void __user *arg,
return -EINVAL;
}
+static int io_register_iowq_aff(struct io_ring_ctx *ctx, void __user *arg,
+ unsigned len)
+{
+ struct io_uring_task *tctx = current->io_uring;
+ cpumask_var_t new_mask;
+ int ret;
+
+ if (!tctx || !tctx->io_wq)
+ return -EINVAL;
+
+ if (!alloc_cpumask_var(&new_mask, GFP_KERNEL))
+ return -ENOMEM;
+
+ cpumask_clear(new_mask);
+ if (len > cpumask_size())
+ len = cpumask_size();
+
+ if (copy_from_user(new_mask, arg, len)) {
+ free_cpumask_var(new_mask);
+ return -EFAULT;
+ }
+
+ ret = io_wq_cpu_affinity(tctx->io_wq, new_mask);
+ free_cpumask_var(new_mask);
+ return ret;
+}
+
+static int io_unregister_iowq_aff(struct io_ring_ctx *ctx)
+{
+ struct io_uring_task *tctx = current->io_uring;
+
+ if (!tctx || !tctx->io_wq)
+ return -EINVAL;
+
+ return io_wq_cpu_affinity(tctx->io_wq, NULL);
+}
+
static bool io_register_op_must_quiesce(int op)
{
switch (op) {
@@ -9998,6 +10035,8 @@ static bool io_register_op_must_quiesce(int op)
case IORING_REGISTER_FILES_UPDATE2:
case IORING_REGISTER_BUFFERS2:
case IORING_REGISTER_BUFFERS_UPDATE:
+ case IORING_REGISTER_IOWQ_AFF:
+ case IORING_UNREGISTER_IOWQ_AFF:
return false;
default:
return true;
@@ -10137,6 +10176,18 @@ static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode,
ret = io_register_rsrc_update(ctx, arg, nr_args,
IORING_RSRC_BUFFER);
break;
+ case IORING_REGISTER_IOWQ_AFF:
+ ret = -EINVAL;
+ if (!arg || !nr_args)
+ break;
+ ret = io_register_iowq_aff(ctx, arg, nr_args);
+ break;
+ case IORING_UNREGISTER_IOWQ_AFF:
+ ret = -EINVAL;
+ if (arg || nr_args)
+ break;
+ ret = io_unregister_iowq_aff(ctx);
+ break;
default:
ret = -EINVAL;
break;