summaryrefslogtreecommitdiff
path: root/io_uring
diff options
context:
space:
mode:
Diffstat (limited to 'io_uring')
-rw-r--r--io_uring/register.c60
-rw-r--r--io_uring/register.h1
-rw-r--r--io_uring/rsrc.c96
-rw-r--r--io_uring/rsrc.h2
4 files changed, 137 insertions, 22 deletions
diff --git a/io_uring/register.c b/io_uring/register.c
index 57cb85c42526..dab0f8024ddf 100644
--- a/io_uring/register.c
+++ b/io_uring/register.c
@@ -542,6 +542,12 @@ static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode,
break;
ret = io_register_clock(ctx, arg);
break;
+ case IORING_REGISTER_COPY_BUFFERS:
+ ret = -EINVAL;
+ if (!arg || nr_args != 1)
+ break;
+ ret = io_register_copy_buffers(ctx, arg);
+ break;
default:
ret = -EINVAL;
break;
@@ -550,21 +556,16 @@ static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode,
return ret;
}
-SYSCALL_DEFINE4(io_uring_register, unsigned int, fd, unsigned int, opcode,
- void __user *, arg, unsigned int, nr_args)
+/*
+ * Given an 'fd' value, return the ctx associated with if. If 'registered' is
+ * true, then the registered index is used. Otherwise, the normal fd table.
+ * Caller must call fput() on the returned file, unless it's an ERR_PTR.
+ */
+struct file *io_uring_register_get_file(int fd, bool registered)
{
- struct io_ring_ctx *ctx;
- long ret = -EBADF;
struct file *file;
- bool use_registered_ring;
-
- use_registered_ring = !!(opcode & IORING_REGISTER_USE_REGISTERED_RING);
- opcode &= ~IORING_REGISTER_USE_REGISTERED_RING;
-
- if (opcode >= IORING_REGISTER_LAST)
- return -EINVAL;
- if (use_registered_ring) {
+ if (registered) {
/*
* Ring fd has been registered via IORING_REGISTER_RING_FDS, we
* need only dereference our task private array to find it.
@@ -572,27 +573,44 @@ SYSCALL_DEFINE4(io_uring_register, unsigned int, fd, unsigned int, opcode,
struct io_uring_task *tctx = current->io_uring;
if (unlikely(!tctx || fd >= IO_RINGFD_REG_MAX))
- return -EINVAL;
+ return ERR_PTR(-EINVAL);
fd = array_index_nospec(fd, IO_RINGFD_REG_MAX);
file = tctx->registered_rings[fd];
- if (unlikely(!file))
- return -EBADF;
} else {
file = fget(fd);
- if (unlikely(!file))
- return -EBADF;
- ret = -EOPNOTSUPP;
- if (!io_is_uring_fops(file))
- goto out_fput;
}
+ if (unlikely(!file))
+ return ERR_PTR(-EBADF);
+ if (io_is_uring_fops(file))
+ return file;
+ fput(file);
+ return ERR_PTR(-EOPNOTSUPP);
+}
+
+SYSCALL_DEFINE4(io_uring_register, unsigned int, fd, unsigned int, opcode,
+ void __user *, arg, unsigned int, nr_args)
+{
+ struct io_ring_ctx *ctx;
+ long ret = -EBADF;
+ struct file *file;
+ bool use_registered_ring;
+
+ use_registered_ring = !!(opcode & IORING_REGISTER_USE_REGISTERED_RING);
+ opcode &= ~IORING_REGISTER_USE_REGISTERED_RING;
+
+ if (opcode >= IORING_REGISTER_LAST)
+ return -EINVAL;
+
+ file = io_uring_register_get_file(fd, use_registered_ring);
+ if (IS_ERR(file))
+ return PTR_ERR(file);
ctx = file->private_data;
mutex_lock(&ctx->uring_lock);
ret = __io_uring_register(ctx, opcode, arg, nr_args);
mutex_unlock(&ctx->uring_lock);
trace_io_uring_register(ctx, opcode, ctx->nr_user_files, ctx->nr_user_bufs, ret);
-out_fput:
if (!use_registered_ring)
fput(file);
return ret;
diff --git a/io_uring/register.h b/io_uring/register.h
index c9da997d503c..cc69b88338fe 100644
--- a/io_uring/register.h
+++ b/io_uring/register.h
@@ -4,5 +4,6 @@
int io_eventfd_unregister(struct io_ring_ctx *ctx);
int io_unregister_personality(struct io_ring_ctx *ctx, unsigned id);
+struct file *io_uring_register_get_file(int fd, bool registered);
#endif
diff --git a/io_uring/rsrc.c b/io_uring/rsrc.c
index e8639993b61e..a7164aa7d13e 100644
--- a/io_uring/rsrc.c
+++ b/io_uring/rsrc.c
@@ -17,6 +17,7 @@
#include "openclose.h"
#include "rsrc.h"
#include "memmap.h"
+#include "register.h"
struct io_rsrc_update {
struct file *file;
@@ -114,14 +115,16 @@ static void io_buffer_unmap(struct io_ring_ctx *ctx, struct io_mapped_ubuf **slo
struct io_mapped_ubuf *imu = *slot;
unsigned int i;
+ *slot = NULL;
if (imu != &dummy_ubuf) {
+ if (!refcount_dec_and_test(&imu->refs))
+ return;
for (i = 0; i < imu->nr_bvecs; i++)
unpin_user_page(imu->bvec[i].bv_page);
if (imu->acct_pages)
io_unaccount_mem(ctx, imu->acct_pages);
kvfree(imu);
}
- *slot = NULL;
}
static void io_rsrc_put_work(struct io_rsrc_node *node)
@@ -996,6 +999,7 @@ static int io_sqe_buffer_register(struct io_ring_ctx *ctx, struct iovec *iov,
imu->folio_shift = data.folio_shift;
imu->folio_mask = ~((1UL << data.folio_shift) - 1);
}
+ refcount_set(&imu->refs, 1);
off = (unsigned long) iov->iov_base & ~imu->folio_mask;
*pimu = imu;
ret = 0;
@@ -1145,3 +1149,93 @@ int io_import_fixed(int ddir, struct iov_iter *iter,
return 0;
}
+
+static int io_copy_buffers(struct io_ring_ctx *ctx, struct io_ring_ctx *src_ctx)
+{
+ struct io_mapped_ubuf **user_bufs;
+ struct io_rsrc_data *data;
+ int i, ret, nbufs;
+
+ /*
+ * Drop our own lock here. We'll setup the data we need and reference
+ * the source buffers, then re-grab, check, and assign at the end.
+ */
+ mutex_unlock(&ctx->uring_lock);
+
+ mutex_lock(&src_ctx->uring_lock);
+ ret = -ENXIO;
+ nbufs = src_ctx->nr_user_bufs;
+ if (!nbufs)
+ goto out_unlock;
+ ret = io_rsrc_data_alloc(ctx, IORING_RSRC_BUFFER, NULL, nbufs, &data);
+ if (ret)
+ goto out_unlock;
+
+ ret = -ENOMEM;
+ user_bufs = kcalloc(nbufs, sizeof(*ctx->user_bufs), GFP_KERNEL);
+ if (!user_bufs)
+ goto out_free_data;
+
+ for (i = 0; i < nbufs; i++) {
+ struct io_mapped_ubuf *src = src_ctx->user_bufs[i];
+
+ refcount_inc(&src->refs);
+ user_bufs[i] = src;
+ }
+
+ /* Have a ref on the bufs now, drop src lock and re-grab our own lock */
+ mutex_unlock(&src_ctx->uring_lock);
+ mutex_lock(&ctx->uring_lock);
+ if (!ctx->user_bufs) {
+ ctx->user_bufs = user_bufs;
+ ctx->buf_data = data;
+ ctx->nr_user_bufs = nbufs;
+ return 0;
+ }
+
+ /* someone raced setting up buffers, dump ours */
+ for (i = 0; i < nbufs; i++)
+ io_buffer_unmap(ctx, &user_bufs[i]);
+ io_rsrc_data_free(data);
+ kfree(user_bufs);
+ return -EBUSY;
+out_free_data:
+ io_rsrc_data_free(data);
+out_unlock:
+ mutex_unlock(&src_ctx->uring_lock);
+ mutex_lock(&ctx->uring_lock);
+ return ret;
+}
+
+/*
+ * Copy the registered buffers from the source ring whose file descriptor
+ * is given in the src_fd to the current ring. This is identical to registering
+ * the buffers with ctx, except faster as mappings already exist.
+ *
+ * Since the memory is already accounted once, don't account it again.
+ */
+int io_register_copy_buffers(struct io_ring_ctx *ctx, void __user *arg)
+{
+ struct io_uring_copy_buffers buf;
+ bool registered_src;
+ struct file *file;
+ int ret;
+
+ if (ctx->user_bufs || ctx->nr_user_bufs)
+ return -EBUSY;
+ if (copy_from_user(&buf, arg, sizeof(buf)))
+ return -EFAULT;
+ if (buf.flags & ~IORING_REGISTER_SRC_REGISTERED)
+ return -EINVAL;
+ if (memchr_inv(buf.pad, 0, sizeof(buf.pad)))
+ return -EINVAL;
+
+ registered_src = (buf.flags & IORING_REGISTER_SRC_REGISTERED) != 0;
+ file = io_uring_register_get_file(buf.src_fd, registered_src);
+ if (IS_ERR(file))
+ return PTR_ERR(file);
+ ret = io_copy_buffers(ctx, file->private_data);
+ if (!registered_src)
+ fput(file);
+ return ret;
+}
diff --git a/io_uring/rsrc.h b/io_uring/rsrc.h
index 3d0dda3556e6..93546ab337a6 100644
--- a/io_uring/rsrc.h
+++ b/io_uring/rsrc.h
@@ -47,6 +47,7 @@ struct io_mapped_ubuf {
unsigned int folio_shift;
unsigned long acct_pages;
unsigned long folio_mask;
+ refcount_t refs;
struct bio_vec bvec[] __counted_by(nr_bvecs);
};
@@ -67,6 +68,7 @@ int io_import_fixed(int ddir, struct iov_iter *iter,
struct io_mapped_ubuf *imu,
u64 buf_addr, size_t len);
+int io_register_copy_buffers(struct io_ring_ctx *ctx, void __user *arg);
void __io_sqe_buffers_unregister(struct io_ring_ctx *ctx);
int io_sqe_buffers_unregister(struct io_ring_ctx *ctx);
int io_sqe_buffers_register(struct io_ring_ctx *ctx, void __user *arg,