diff options
author | Jens Axboe <axboe@kernel.dk> | 2022-09-30 07:47:42 -0600 |
---|---|---|
committer | Jens Axboe <axboe@kernel.dk> | 2022-09-30 07:47:42 -0600 |
commit | 5853a7b5512c3017f64ca26494bd7361a12d6992 (patch) | |
tree | 5777f2d130ad2d7ee127df3eaaafc23ec73721ae /io_uring/net.c | |
parent | 736feaa3a08124020afe6e51f50bae8598c99f55 (diff) | |
parent | 108893ddcc4d3aa0a4a02aeb02d478e997001227 (diff) |
Merge branch 'for-6.1/io_uring' into for-6.1/passthrough
* for-6.1/io_uring: (56 commits)
io_uring/net: fix notif cqe reordering
io_uring/net: don't update msg_name if not provided
io_uring: don't gate task_work run on TIF_NOTIFY_SIGNAL
io_uring/rw: defer fsnotify calls to task context
io_uring/net: fix fast_iov assignment in io_setup_async_msg()
io_uring/net: fix non-zc send with address
io_uring/net: don't skip notifs for failed requests
io_uring/rw: don't lose short results on io_setup_async_rw()
io_uring/rw: fix unexpected link breakage
io_uring/net: fix cleanup double free free_iov init
io_uring: fix CQE reordering
io_uring/net: fix UAF in io_sendrecv_fail()
selftest/net: adjust io_uring sendzc notif handling
io_uring: ensure local task_work marks task as running
io_uring/net: zerocopy sendmsg
io_uring/net: combine fail handlers
io_uring/net: rename io_sendzc()
io_uring/net: support non-zerocopy sendto
io_uring/net: refactor io_setup_async_addr
io_uring/net: don't lose partial send_zc on fail
...
Diffstat (limited to 'io_uring/net.c')
-rw-r--r-- | io_uring/net.c | 308 |
1 files changed, 222 insertions, 86 deletions
diff --git a/io_uring/net.c b/io_uring/net.c index 60e392f7f2dc..caa6a803cb72 100644 --- a/io_uring/net.c +++ b/io_uring/net.c @@ -55,21 +55,14 @@ struct io_sr_msg { struct user_msghdr __user *umsg; void __user *buf; }; + unsigned len; + unsigned done_io; unsigned msg_flags; - unsigned flags; - size_t len; - size_t done_io; -}; - -struct io_sendzc { - struct file *file; - void __user *buf; - size_t len; - unsigned msg_flags; - unsigned flags; - unsigned addr_len; + u16 flags; + /* initialised and used only by !msg send variants */ + u16 addr_len; void __user *addr; - size_t done_io; + /* used only for send zerocopy */ struct io_kiocb *notif; }; @@ -126,28 +119,36 @@ static void io_netmsg_recycle(struct io_kiocb *req, unsigned int issue_flags) } } -static struct io_async_msghdr *io_recvmsg_alloc_async(struct io_kiocb *req, - unsigned int issue_flags) +static struct io_async_msghdr *io_msg_alloc_async(struct io_kiocb *req, + unsigned int issue_flags) { struct io_ring_ctx *ctx = req->ctx; struct io_cache_entry *entry; + struct io_async_msghdr *hdr; if (!(issue_flags & IO_URING_F_UNLOCKED) && (entry = io_alloc_cache_get(&ctx->netmsg_cache)) != NULL) { - struct io_async_msghdr *hdr; - hdr = container_of(entry, struct io_async_msghdr, cache); + hdr->free_iov = NULL; req->flags |= REQ_F_ASYNC_DATA; req->async_data = hdr; return hdr; } - if (!io_alloc_async_data(req)) - return req->async_data; - + if (!io_alloc_async_data(req)) { + hdr = req->async_data; + hdr->free_iov = NULL; + return hdr; + } return NULL; } +static inline struct io_async_msghdr *io_msg_alloc_async_prep(struct io_kiocb *req) +{ + /* ->prep_async is always called from the submission context */ + return io_msg_alloc_async(req, 0); +} + static int io_setup_async_msg(struct io_kiocb *req, struct io_async_msghdr *kmsg, unsigned int issue_flags) @@ -156,17 +157,20 @@ static int io_setup_async_msg(struct io_kiocb *req, if (req_has_async_data(req)) return -EAGAIN; - async_msg = io_recvmsg_alloc_async(req, issue_flags); + async_msg = io_msg_alloc_async(req, issue_flags); if (!async_msg) { kfree(kmsg->free_iov); return -ENOMEM; } req->flags |= REQ_F_NEED_CLEANUP; memcpy(async_msg, kmsg, sizeof(*kmsg)); - async_msg->msg.msg_name = &async_msg->addr; + if (async_msg->msg.msg_name) + async_msg->msg.msg_name = &async_msg->addr; /* if were using fast_iov, set it to the new one */ - if (!async_msg->free_iov) - async_msg->msg.msg_iter.iov = async_msg->fast_iov; + if (!kmsg->free_iov) { + size_t fast_idx = kmsg->msg.msg_iter.iov - kmsg->fast_iov; + async_msg->msg.msg_iter.iov = &async_msg->fast_iov[fast_idx]; + } return -EAGAIN; } @@ -182,34 +186,34 @@ static int io_sendmsg_copy_hdr(struct io_kiocb *req, &iomsg->free_iov); } -int io_sendzc_prep_async(struct io_kiocb *req) +int io_send_prep_async(struct io_kiocb *req) { - struct io_sendzc *zc = io_kiocb_to_cmd(req, struct io_sendzc); + struct io_sr_msg *zc = io_kiocb_to_cmd(req, struct io_sr_msg); struct io_async_msghdr *io; int ret; if (!zc->addr || req_has_async_data(req)) return 0; - if (io_alloc_async_data(req)) + io = io_msg_alloc_async_prep(req); + if (!io) return -ENOMEM; - - io = req->async_data; ret = move_addr_to_kernel(zc->addr, zc->addr_len, &io->addr); return ret; } static int io_setup_async_addr(struct io_kiocb *req, - struct sockaddr_storage *addr, + struct sockaddr_storage *addr_storage, unsigned int issue_flags) { + struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); struct io_async_msghdr *io; - if (!addr || req_has_async_data(req)) + if (!sr->addr || req_has_async_data(req)) return -EAGAIN; - if (io_alloc_async_data(req)) + io = io_msg_alloc_async(req, issue_flags); + if (!io) return -ENOMEM; - io = req->async_data; - memcpy(&io->addr, addr, sizeof(io->addr)); + memcpy(&io->addr, addr_storage, sizeof(io->addr)); return -EAGAIN; } @@ -217,6 +221,8 @@ int io_sendmsg_prep_async(struct io_kiocb *req) { int ret; + if (!io_msg_alloc_async_prep(req)) + return -ENOMEM; ret = io_sendmsg_copy_hdr(req, req->async_data); if (!ret) req->flags |= REQ_F_NEED_CLEANUP; @@ -234,8 +240,14 @@ int io_sendmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) { struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); - if (unlikely(sqe->file_index || sqe->addr2)) + if (req->opcode == IORING_OP_SEND) { + if (READ_ONCE(sqe->__pad3[0])) + return -EINVAL; + sr->addr = u64_to_user_ptr(READ_ONCE(sqe->addr2)); + sr->addr_len = READ_ONCE(sqe->addr_len); + } else if (sqe->addr2 || sqe->file_index) { return -EINVAL; + } sr->umsg = u64_to_user_ptr(READ_ONCE(sqe->addr)); sr->len = READ_ONCE(sqe->len); @@ -291,13 +303,13 @@ int io_sendmsg(struct io_kiocb *req, unsigned int issue_flags) if (ret < min_ret) { if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK)) return io_setup_async_msg(req, kmsg, issue_flags); - if (ret == -ERESTARTSYS) - ret = -EINTR; if (ret > 0 && io_net_retry(sock, flags)) { sr->done_io += ret; req->flags |= REQ_F_PARTIAL_IO; return io_setup_async_msg(req, kmsg, issue_flags); } + if (ret == -ERESTARTSYS) + ret = -EINTR; req_set_fail(req); } /* fast path, check for non-NULL to avoid function call */ @@ -315,6 +327,7 @@ int io_sendmsg(struct io_kiocb *req, unsigned int issue_flags) int io_send(struct io_kiocb *req, unsigned int issue_flags) { + struct sockaddr_storage __address; struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); struct msghdr msg; struct iovec iov; @@ -323,9 +336,29 @@ int io_send(struct io_kiocb *req, unsigned int issue_flags) int min_ret = 0; int ret; + msg.msg_name = NULL; + msg.msg_control = NULL; + msg.msg_controllen = 0; + msg.msg_namelen = 0; + msg.msg_ubuf = NULL; + + if (sr->addr) { + if (req_has_async_data(req)) { + struct io_async_msghdr *io = req->async_data; + + msg.msg_name = &io->addr; + } else { + ret = move_addr_to_kernel(sr->addr, sr->addr_len, &__address); + if (unlikely(ret < 0)) + return ret; + msg.msg_name = (struct sockaddr *)&__address; + } + msg.msg_namelen = sr->addr_len; + } + if (!(req->flags & REQ_F_POLLED) && (sr->flags & IORING_RECVSEND_POLL_FIRST)) - return -EAGAIN; + return io_setup_async_addr(req, &__address, issue_flags); sock = sock_from_file(req->file); if (unlikely(!sock)) @@ -335,12 +368,6 @@ int io_send(struct io_kiocb *req, unsigned int issue_flags) if (unlikely(ret)) return ret; - msg.msg_name = NULL; - msg.msg_control = NULL; - msg.msg_controllen = 0; - msg.msg_namelen = 0; - msg.msg_ubuf = NULL; - flags = sr->msg_flags; if (issue_flags & IO_URING_F_NONBLOCK) flags |= MSG_DONTWAIT; @@ -351,16 +378,17 @@ int io_send(struct io_kiocb *req, unsigned int issue_flags) ret = sock_sendmsg(sock, &msg); if (ret < min_ret) { if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK)) - return -EAGAIN; - if (ret == -ERESTARTSYS) - ret = -EINTR; + return io_setup_async_addr(req, &__address, issue_flags); + if (ret > 0 && io_net_retry(sock, flags)) { sr->len -= ret; sr->buf += ret; sr->done_io += ret; req->flags |= REQ_F_PARTIAL_IO; - return -EAGAIN; + return io_setup_async_addr(req, &__address, issue_flags); } + if (ret == -ERESTARTSYS) + ret = -EINTR; req_set_fail(req); } if (ret >= 0) @@ -454,7 +482,6 @@ static int __io_compat_recvmsg_copy_hdr(struct io_kiocb *req, if (msg.msg_iovlen == 0) { sr->len = 0; - iomsg->free_iov = NULL; } else if (msg.msg_iovlen > 1) { return -EINVAL; } else { @@ -465,7 +492,6 @@ static int __io_compat_recvmsg_copy_hdr(struct io_kiocb *req, if (clen < 0) return -EINVAL; sr->len = clen; - iomsg->free_iov = NULL; } if (req->flags & REQ_F_APOLL_MULTISHOT) { @@ -504,6 +530,8 @@ int io_recvmsg_prep_async(struct io_kiocb *req) { int ret; + if (!io_msg_alloc_async_prep(req)) + return -ENOMEM; ret = io_recvmsg_copy_hdr(req, req->async_data); if (!ret) req->flags |= REQ_F_NEED_CLEANUP; @@ -751,13 +779,13 @@ retry_multishot: } return ret; } - if (ret == -ERESTARTSYS) - ret = -EINTR; if (ret > 0 && io_net_retry(sock, flags)) { sr->done_io += ret; req->flags |= REQ_F_PARTIAL_IO; return io_setup_async_msg(req, kmsg, issue_flags); } + if (ret == -ERESTARTSYS) + ret = -EINTR; req_set_fail(req); } else if ((flags & MSG_WAITALL) && (kmsg->msg.msg_flags & (MSG_TRUNC | MSG_CTRUNC))) { req_set_fail(req); @@ -847,8 +875,6 @@ retry_multishot: return -EAGAIN; } - if (ret == -ERESTARTSYS) - ret = -EINTR; if (ret > 0 && io_net_retry(sock, flags)) { sr->len -= ret; sr->buf += ret; @@ -856,6 +882,8 @@ retry_multishot: req->flags |= REQ_F_PARTIAL_IO; return -EAGAIN; } + if (ret == -ERESTARTSYS) + ret = -EINTR; req_set_fail(req); } else if ((flags & MSG_WAITALL) && (msg.msg_flags & (MSG_TRUNC | MSG_CTRUNC))) { out_free: @@ -879,23 +907,30 @@ out_free: return ret; } -void io_sendzc_cleanup(struct io_kiocb *req) +void io_send_zc_cleanup(struct io_kiocb *req) { - struct io_sendzc *zc = io_kiocb_to_cmd(req, struct io_sendzc); + struct io_sr_msg *zc = io_kiocb_to_cmd(req, struct io_sr_msg); + struct io_async_msghdr *io; - zc->notif->flags |= REQ_F_CQE_SKIP; - io_notif_flush(zc->notif); - zc->notif = NULL; + if (req_has_async_data(req)) { + io = req->async_data; + /* might be ->fast_iov if *msg_copy_hdr failed */ + if (io->free_iov != io->fast_iov) + kfree(io->free_iov); + } + if (zc->notif) { + io_notif_flush(zc->notif); + zc->notif = NULL; + } } -int io_sendzc_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) +int io_send_zc_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) { - struct io_sendzc *zc = io_kiocb_to_cmd(req, struct io_sendzc); + struct io_sr_msg *zc = io_kiocb_to_cmd(req, struct io_sr_msg); struct io_ring_ctx *ctx = req->ctx; struct io_kiocb *notif; - if (READ_ONCE(sqe->__pad2[0]) || READ_ONCE(sqe->addr3) || - READ_ONCE(sqe->__pad3[0])) + if (unlikely(READ_ONCE(sqe->__pad2[0]) || READ_ONCE(sqe->addr3))) return -EINVAL; /* we don't support IOSQE_CQE_SKIP_SUCCESS just yet */ if (req->flags & REQ_F_CQE_SKIP) @@ -922,14 +957,24 @@ int io_sendzc_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) io_req_set_rsrc_node(notif, ctx, 0); } + if (req->opcode == IORING_OP_SEND_ZC) { + if (READ_ONCE(sqe->__pad3[0])) + return -EINVAL; + zc->addr = u64_to_user_ptr(READ_ONCE(sqe->addr2)); + zc->addr_len = READ_ONCE(sqe->addr_len); + } else { + if (unlikely(sqe->addr2 || sqe->file_index)) + return -EINVAL; + if (unlikely(zc->flags & IORING_RECVSEND_FIXED_BUF)) + return -EINVAL; + } + zc->buf = u64_to_user_ptr(READ_ONCE(sqe->addr)); zc->len = READ_ONCE(sqe->len); zc->msg_flags = READ_ONCE(sqe->msg_flags) | MSG_NOSIGNAL; if (zc->msg_flags & MSG_DONTWAIT) req->flags |= REQ_F_NOWAIT; - zc->addr = u64_to_user_ptr(READ_ONCE(sqe->addr2)); - zc->addr_len = READ_ONCE(sqe->addr_len); zc->done_io = 0; #ifdef CONFIG_COMPAT @@ -939,6 +984,13 @@ int io_sendzc_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) return 0; } +static int io_sg_from_iter_iovec(struct sock *sk, struct sk_buff *skb, + struct iov_iter *from, size_t length) +{ + skb_zcopy_downgrade_managed(skb); + return __zerocopy_sg_from_iter(NULL, sk, skb, from, length); +} + static int io_sg_from_iter(struct sock *sk, struct sk_buff *skb, struct iov_iter *from, size_t length) { @@ -949,13 +1001,10 @@ static int io_sg_from_iter(struct sock *sk, struct sk_buff *skb, ssize_t copied = 0; unsigned long truesize = 0; - if (!shinfo->nr_frags) + if (!frag) shinfo->flags |= SKBFL_MANAGED_FRAG_REFS; - - if (!skb_zcopy_managed(skb) || !iov_iter_is_bvec(from)) { - skb_zcopy_downgrade_managed(skb); + else if (unlikely(!skb_zcopy_managed(skb))) return __zerocopy_sg_from_iter(NULL, sk, skb, from, length); - } bi.bi_size = min(from->count, length); bi.bi_bvec_done = from->iov_offset; @@ -993,14 +1042,14 @@ static int io_sg_from_iter(struct sock *sk, struct sk_buff *skb, return ret; } -int io_sendzc(struct io_kiocb *req, unsigned int issue_flags) +int io_send_zc(struct io_kiocb *req, unsigned int issue_flags) { - struct sockaddr_storage __address, *addr = NULL; - struct io_sendzc *zc = io_kiocb_to_cmd(req, struct io_sendzc); + struct sockaddr_storage __address; + struct io_sr_msg *zc = io_kiocb_to_cmd(req, struct io_sr_msg); struct msghdr msg; struct iovec iov; struct socket *sock; - unsigned msg_flags, cflags; + unsigned msg_flags; int ret, min_ret = 0; sock = sock_from_file(req->file); @@ -1016,26 +1065,26 @@ int io_sendzc(struct io_kiocb *req, unsigned int issue_flags) if (req_has_async_data(req)) { struct io_async_msghdr *io = req->async_data; - msg.msg_name = addr = &io->addr; + msg.msg_name = &io->addr; } else { ret = move_addr_to_kernel(zc->addr, zc->addr_len, &__address); if (unlikely(ret < 0)) return ret; msg.msg_name = (struct sockaddr *)&__address; - addr = &__address; } msg.msg_namelen = zc->addr_len; } if (!(req->flags & REQ_F_POLLED) && (zc->flags & IORING_RECVSEND_POLL_FIRST)) - return io_setup_async_addr(req, addr, issue_flags); + return io_setup_async_addr(req, &__address, issue_flags); if (zc->flags & IORING_RECVSEND_FIXED_BUF) { ret = io_import_fixed(WRITE, &msg.msg_iter, req->imu, (u64)(uintptr_t)zc->buf, zc->len); if (unlikely(ret)) return ret; + msg.sg_from_iter = io_sg_from_iter; } else { ret = import_single_range(WRITE, zc->buf, zc->len, &iov, &msg.msg_iter); @@ -1044,6 +1093,7 @@ int io_sendzc(struct io_kiocb *req, unsigned int issue_flags) ret = io_notif_account_mem(zc->notif, zc->len); if (unlikely(ret)) return ret; + msg.sg_from_iter = io_sg_from_iter_iovec; } msg_flags = zc->msg_flags | MSG_ZEROCOPY; @@ -1054,22 +1104,19 @@ int io_sendzc(struct io_kiocb *req, unsigned int issue_flags) msg.msg_flags = msg_flags; msg.msg_ubuf = &io_notif_to_data(zc->notif)->uarg; - msg.sg_from_iter = io_sg_from_iter; ret = sock_sendmsg(sock, &msg); if (unlikely(ret < min_ret)) { if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK)) - return io_setup_async_addr(req, addr, issue_flags); + return io_setup_async_addr(req, &__address, issue_flags); if (ret > 0 && io_net_retry(sock, msg.msg_flags)) { zc->len -= ret; zc->buf += ret; zc->done_io += ret; req->flags |= REQ_F_PARTIAL_IO; - return io_setup_async_addr(req, addr, issue_flags); + return io_setup_async_addr(req, &__address, issue_flags); } - if (ret < 0 && !zc->done_io) - zc->notif->flags |= REQ_F_CQE_SKIP; if (ret == -ERESTARTSYS) ret = -EINTR; req_set_fail(req); @@ -1080,13 +1127,102 @@ int io_sendzc(struct io_kiocb *req, unsigned int issue_flags) else if (zc->done_io) ret = zc->done_io; - io_notif_flush(zc->notif); - req->flags &= ~REQ_F_NEED_CLEANUP; - cflags = ret >= 0 ? IORING_CQE_F_MORE : 0; - io_req_set_res(req, ret, cflags); + /* + * If we're in io-wq we can't rely on tw ordering guarantees, defer + * flushing notif to io_send_zc_cleanup() + */ + if (!(issue_flags & IO_URING_F_UNLOCKED)) { + io_notif_flush(zc->notif); + req->flags &= ~REQ_F_NEED_CLEANUP; + } + io_req_set_res(req, ret, IORING_CQE_F_MORE); return IOU_OK; } +int io_sendmsg_zc(struct io_kiocb *req, unsigned int issue_flags) +{ + struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); + struct io_async_msghdr iomsg, *kmsg; + struct socket *sock; + unsigned flags; + int ret, min_ret = 0; + + sock = sock_from_file(req->file); + if (unlikely(!sock)) + return -ENOTSOCK; + + if (req_has_async_data(req)) { + kmsg = req->async_data; + } else { + ret = io_sendmsg_copy_hdr(req, &iomsg); + if (ret) + return ret; + kmsg = &iomsg; + } + + if (!(req->flags & REQ_F_POLLED) && + (sr->flags & IORING_RECVSEND_POLL_FIRST)) + return io_setup_async_msg(req, kmsg, issue_flags); + + flags = sr->msg_flags | MSG_ZEROCOPY; + if (issue_flags & IO_URING_F_NONBLOCK) + flags |= MSG_DONTWAIT; + if (flags & MSG_WAITALL) + min_ret = iov_iter_count(&kmsg->msg.msg_iter); + + kmsg->msg.msg_ubuf = &io_notif_to_data(sr->notif)->uarg; + kmsg->msg.sg_from_iter = io_sg_from_iter_iovec; + ret = __sys_sendmsg_sock(sock, &kmsg->msg, flags); + + if (unlikely(ret < min_ret)) { + if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK)) + return io_setup_async_msg(req, kmsg, issue_flags); + + if (ret > 0 && io_net_retry(sock, flags)) { + sr->done_io += ret; + req->flags |= REQ_F_PARTIAL_IO; + return io_setup_async_msg(req, kmsg, issue_flags); + } + if (ret == -ERESTARTSYS) + ret = -EINTR; + req_set_fail(req); + } + /* fast path, check for non-NULL to avoid function call */ + if (kmsg->free_iov) { + kfree(kmsg->free_iov); + kmsg->free_iov = NULL; + } + + io_netmsg_recycle(req, issue_flags); + if (ret >= 0) + ret += sr->done_io; + else if (sr->done_io) + ret = sr->done_io; + + /* + * If we're in io-wq we can't rely on tw ordering guarantees, defer + * flushing notif to io_send_zc_cleanup() + */ + if (!(issue_flags & IO_URING_F_UNLOCKED)) { + io_notif_flush(sr->notif); + req->flags &= ~REQ_F_NEED_CLEANUP; + } + io_req_set_res(req, ret, IORING_CQE_F_MORE); + return IOU_OK; +} + +void io_sendrecv_fail(struct io_kiocb *req) +{ + struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); + + if (req->flags & REQ_F_PARTIAL_IO) + req->cqe.res = sr->done_io; + + if ((req->flags & REQ_F_NEED_CLEANUP) && + (req->opcode == IORING_OP_SEND_ZC || req->opcode == IORING_OP_SENDMSG_ZC)) + req->cqe.flags |= IORING_CQE_F_MORE; +} + int io_accept_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) { struct io_accept *accept = io_kiocb_to_cmd(req, struct io_accept); |