diff options
Diffstat (limited to 'io_uring')
-rw-r--r-- | io_uring/register.c | 60 | ||||
-rw-r--r-- | io_uring/register.h | 1 | ||||
-rw-r--r-- | io_uring/rsrc.c | 96 | ||||
-rw-r--r-- | io_uring/rsrc.h | 2 |
4 files changed, 137 insertions, 22 deletions
diff --git a/io_uring/register.c b/io_uring/register.c index 57cb85c42526..dab0f8024ddf 100644 --- a/io_uring/register.c +++ b/io_uring/register.c @@ -542,6 +542,12 @@ static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode, break; ret = io_register_clock(ctx, arg); break; + case IORING_REGISTER_COPY_BUFFERS: + ret = -EINVAL; + if (!arg || nr_args != 1) + break; + ret = io_register_copy_buffers(ctx, arg); + break; default: ret = -EINVAL; break; @@ -550,21 +556,16 @@ static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode, return ret; } -SYSCALL_DEFINE4(io_uring_register, unsigned int, fd, unsigned int, opcode, - void __user *, arg, unsigned int, nr_args) +/* + * Given an 'fd' value, return the ctx associated with if. If 'registered' is + * true, then the registered index is used. Otherwise, the normal fd table. + * Caller must call fput() on the returned file, unless it's an ERR_PTR. + */ +struct file *io_uring_register_get_file(int fd, bool registered) { - struct io_ring_ctx *ctx; - long ret = -EBADF; struct file *file; - bool use_registered_ring; - - use_registered_ring = !!(opcode & IORING_REGISTER_USE_REGISTERED_RING); - opcode &= ~IORING_REGISTER_USE_REGISTERED_RING; - - if (opcode >= IORING_REGISTER_LAST) - return -EINVAL; - if (use_registered_ring) { + if (registered) { /* * Ring fd has been registered via IORING_REGISTER_RING_FDS, we * need only dereference our task private array to find it. @@ -572,27 +573,44 @@ SYSCALL_DEFINE4(io_uring_register, unsigned int, fd, unsigned int, opcode, struct io_uring_task *tctx = current->io_uring; if (unlikely(!tctx || fd >= IO_RINGFD_REG_MAX)) - return -EINVAL; + return ERR_PTR(-EINVAL); fd = array_index_nospec(fd, IO_RINGFD_REG_MAX); file = tctx->registered_rings[fd]; - if (unlikely(!file)) - return -EBADF; } else { file = fget(fd); - if (unlikely(!file)) - return -EBADF; - ret = -EOPNOTSUPP; - if (!io_is_uring_fops(file)) - goto out_fput; } + if (unlikely(!file)) + return ERR_PTR(-EBADF); + if (io_is_uring_fops(file)) + return file; + fput(file); + return ERR_PTR(-EOPNOTSUPP); +} + +SYSCALL_DEFINE4(io_uring_register, unsigned int, fd, unsigned int, opcode, + void __user *, arg, unsigned int, nr_args) +{ + struct io_ring_ctx *ctx; + long ret = -EBADF; + struct file *file; + bool use_registered_ring; + + use_registered_ring = !!(opcode & IORING_REGISTER_USE_REGISTERED_RING); + opcode &= ~IORING_REGISTER_USE_REGISTERED_RING; + + if (opcode >= IORING_REGISTER_LAST) + return -EINVAL; + + file = io_uring_register_get_file(fd, use_registered_ring); + if (IS_ERR(file)) + return PTR_ERR(file); ctx = file->private_data; mutex_lock(&ctx->uring_lock); ret = __io_uring_register(ctx, opcode, arg, nr_args); mutex_unlock(&ctx->uring_lock); trace_io_uring_register(ctx, opcode, ctx->nr_user_files, ctx->nr_user_bufs, ret); -out_fput: if (!use_registered_ring) fput(file); return ret; diff --git a/io_uring/register.h b/io_uring/register.h index c9da997d503c..cc69b88338fe 100644 --- a/io_uring/register.h +++ b/io_uring/register.h @@ -4,5 +4,6 @@ int io_eventfd_unregister(struct io_ring_ctx *ctx); int io_unregister_personality(struct io_ring_ctx *ctx, unsigned id); +struct file *io_uring_register_get_file(int fd, bool registered); #endif diff --git a/io_uring/rsrc.c b/io_uring/rsrc.c index e8639993b61e..a7164aa7d13e 100644 --- a/io_uring/rsrc.c +++ b/io_uring/rsrc.c @@ -17,6 +17,7 @@ #include "openclose.h" #include "rsrc.h" #include "memmap.h" +#include "register.h" struct io_rsrc_update { struct file *file; @@ -114,14 +115,16 @@ static void io_buffer_unmap(struct io_ring_ctx *ctx, struct io_mapped_ubuf **slo struct io_mapped_ubuf *imu = *slot; unsigned int i; + *slot = NULL; if (imu != &dummy_ubuf) { + if (!refcount_dec_and_test(&imu->refs)) + return; for (i = 0; i < imu->nr_bvecs; i++) unpin_user_page(imu->bvec[i].bv_page); if (imu->acct_pages) io_unaccount_mem(ctx, imu->acct_pages); kvfree(imu); } - *slot = NULL; } static void io_rsrc_put_work(struct io_rsrc_node *node) @@ -996,6 +999,7 @@ static int io_sqe_buffer_register(struct io_ring_ctx *ctx, struct iovec *iov, imu->folio_shift = data.folio_shift; imu->folio_mask = ~((1UL << data.folio_shift) - 1); } + refcount_set(&imu->refs, 1); off = (unsigned long) iov->iov_base & ~imu->folio_mask; *pimu = imu; ret = 0; @@ -1145,3 +1149,93 @@ int io_import_fixed(int ddir, struct iov_iter *iter, return 0; } + +static int io_copy_buffers(struct io_ring_ctx *ctx, struct io_ring_ctx *src_ctx) +{ + struct io_mapped_ubuf **user_bufs; + struct io_rsrc_data *data; + int i, ret, nbufs; + + /* + * Drop our own lock here. We'll setup the data we need and reference + * the source buffers, then re-grab, check, and assign at the end. + */ + mutex_unlock(&ctx->uring_lock); + + mutex_lock(&src_ctx->uring_lock); + ret = -ENXIO; + nbufs = src_ctx->nr_user_bufs; + if (!nbufs) + goto out_unlock; + ret = io_rsrc_data_alloc(ctx, IORING_RSRC_BUFFER, NULL, nbufs, &data); + if (ret) + goto out_unlock; + + ret = -ENOMEM; + user_bufs = kcalloc(nbufs, sizeof(*ctx->user_bufs), GFP_KERNEL); + if (!user_bufs) + goto out_free_data; + + for (i = 0; i < nbufs; i++) { + struct io_mapped_ubuf *src = src_ctx->user_bufs[i]; + + refcount_inc(&src->refs); + user_bufs[i] = src; + } + + /* Have a ref on the bufs now, drop src lock and re-grab our own lock */ + mutex_unlock(&src_ctx->uring_lock); + mutex_lock(&ctx->uring_lock); + if (!ctx->user_bufs) { + ctx->user_bufs = user_bufs; + ctx->buf_data = data; + ctx->nr_user_bufs = nbufs; + return 0; + } + + /* someone raced setting up buffers, dump ours */ + for (i = 0; i < nbufs; i++) + io_buffer_unmap(ctx, &user_bufs[i]); + io_rsrc_data_free(data); + kfree(user_bufs); + return -EBUSY; +out_free_data: + io_rsrc_data_free(data); +out_unlock: + mutex_unlock(&src_ctx->uring_lock); + mutex_lock(&ctx->uring_lock); + return ret; +} + +/* + * Copy the registered buffers from the source ring whose file descriptor + * is given in the src_fd to the current ring. This is identical to registering + * the buffers with ctx, except faster as mappings already exist. + * + * Since the memory is already accounted once, don't account it again. + */ +int io_register_copy_buffers(struct io_ring_ctx *ctx, void __user *arg) +{ + struct io_uring_copy_buffers buf; + bool registered_src; + struct file *file; + int ret; + + if (ctx->user_bufs || ctx->nr_user_bufs) + return -EBUSY; + if (copy_from_user(&buf, arg, sizeof(buf))) + return -EFAULT; + if (buf.flags & ~IORING_REGISTER_SRC_REGISTERED) + return -EINVAL; + if (memchr_inv(buf.pad, 0, sizeof(buf.pad))) + return -EINVAL; + + registered_src = (buf.flags & IORING_REGISTER_SRC_REGISTERED) != 0; + file = io_uring_register_get_file(buf.src_fd, registered_src); + if (IS_ERR(file)) + return PTR_ERR(file); + ret = io_copy_buffers(ctx, file->private_data); + if (!registered_src) + fput(file); + return ret; +} diff --git a/io_uring/rsrc.h b/io_uring/rsrc.h index 3d0dda3556e6..93546ab337a6 100644 --- a/io_uring/rsrc.h +++ b/io_uring/rsrc.h @@ -47,6 +47,7 @@ struct io_mapped_ubuf { unsigned int folio_shift; unsigned long acct_pages; unsigned long folio_mask; + refcount_t refs; struct bio_vec bvec[] __counted_by(nr_bvecs); }; @@ -67,6 +68,7 @@ int io_import_fixed(int ddir, struct iov_iter *iter, struct io_mapped_ubuf *imu, u64 buf_addr, size_t len); +int io_register_copy_buffers(struct io_ring_ctx *ctx, void __user *arg); void __io_sqe_buffers_unregister(struct io_ring_ctx *ctx); int io_sqe_buffers_unregister(struct io_ring_ctx *ctx); int io_sqe_buffers_register(struct io_ring_ctx *ctx, void __user *arg, |