From 7fbeb95d0f68e21e6ca61284f1ac681630976947 Mon Sep 17 00:00:00 2001
From: Pavel Begunkov <asml.silence@gmail.com>
Date: Sun, 16 Feb 2020 01:01:18 +0300
Subject: io_uring: add missing io_req_cancelled()

fallocate_finish() is missing cancellation check. Add it.
It's safe to do that, as only flags setup and sqe fields copy are done
before it gets into __io_fallocate().

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io_uring.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 5a826017ebb8..29565d82291f 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -2517,6 +2517,9 @@ static void io_fallocate_finish(struct io_wq_work **workptr)
 	struct io_kiocb *nxt = NULL;
 	int ret;
 
+	if (io_req_cancelled(req))
+		return;
+
 	ret = vfs_fallocate(req->file, req->sync.mode, req->sync.off,
 				req->sync.len);
 	if (ret < 0)
@@ -2904,6 +2907,7 @@ static void io_close_finish(struct io_wq_work **workptr)
 	struct io_kiocb *req = container_of(*workptr, struct io_kiocb, work);
 	struct io_kiocb *nxt = NULL;
 
+	/* not cancellable, don't do io_req_cancelled() */
 	__io_close_finish(req, &nxt);
 	if (nxt)
 		io_wq_assign_next(workptr, nxt);
-- 
cgit v1.2.3-58-ga151


From 297a31e3e8318f533cff4fe33ffaefb74f72c6e2 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Mon, 17 Feb 2020 17:39:45 +0300
Subject: io_uring: remove unnecessary NULL checks

The "kmsg" pointer can't be NULL and we have already dereferenced it so
a check here would be useless.

Reviewed-by: Stefano Garzarella <sgarzare@redhat.com>
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io_uring.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 29565d82291f..d35b45696c73 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -3075,7 +3075,7 @@ static int io_sendmsg(struct io_kiocb *req, struct io_kiocb **nxt,
 			if (req->io)
 				return -EAGAIN;
 			if (io_alloc_async_ctx(req)) {
-				if (kmsg && kmsg->iov != kmsg->fast_iov)
+				if (kmsg->iov != kmsg->fast_iov)
 					kfree(kmsg->iov);
 				return -ENOMEM;
 			}
@@ -3229,7 +3229,7 @@ static int io_recvmsg(struct io_kiocb *req, struct io_kiocb **nxt,
 			if (req->io)
 				return -EAGAIN;
 			if (io_alloc_async_ctx(req)) {
-				if (kmsg && kmsg->iov != kmsg->fast_iov)
+				if (kmsg->iov != kmsg->fast_iov)
 					kfree(kmsg->iov);
 				return -ENOMEM;
 			}
-- 
cgit v1.2.3-58-ga151


From 929a3af90f0f4bd7132d83552c1a98c83f60ef7e Mon Sep 17 00:00:00 2001
From: Pavel Begunkov <asml.silence@gmail.com>
Date: Wed, 19 Feb 2020 00:19:09 +0300
Subject: io_uring: fix use-after-free by io_cleanup_req()

io_cleanup_req() should be called before req->io is freed, and so
shouldn't be after __io_free_req() -> __io_req_aux_free(). Also,
it will be ignored for in io_free_req_many(), which use
__io_req_aux_free().

Place cleanup_req() into __io_req_aux_free().

Fixes: 99bc4c38537d774 ("io_uring: fix iovec leaks")
Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io_uring.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index d35b45696c73..6e249aa97ba3 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -1260,6 +1260,9 @@ static void __io_req_aux_free(struct io_kiocb *req)
 {
 	struct io_ring_ctx *ctx = req->ctx;
 
+	if (req->flags & REQ_F_NEED_CLEANUP)
+		io_cleanup_req(req);
+
 	kfree(req->io);
 	if (req->file) {
 		if (req->flags & REQ_F_FIXED_FILE)
@@ -1275,9 +1278,6 @@ static void __io_free_req(struct io_kiocb *req)
 {
 	__io_req_aux_free(req);
 
-	if (req->flags & REQ_F_NEED_CLEANUP)
-		io_cleanup_req(req);
-
 	if (req->flags & REQ_F_INFLIGHT) {
 		struct io_ring_ctx *ctx = req->ctx;
 		unsigned long flags;
-- 
cgit v1.2.3-58-ga151


From 7143b5ac5750f404ff3a594b34fdf3fc2f99f828 Mon Sep 17 00:00:00 2001
From: Stefano Garzarella <sgarzare@redhat.com>
Date: Fri, 21 Feb 2020 16:42:16 +0100
Subject: io_uring: prevent sq_thread from spinning when it should stop

This patch drops 'cur_mm' before calling cond_resched(), to prevent
the sq_thread from spinning even when the user process is finished.

Before this patch, if the user process ended without closing the
io_uring fd, the sq_thread continues to spin until the
'sq_thread_idle' timeout ends.

In the worst case where the 'sq_thread_idle' parameter is bigger than
INT_MAX, the sq_thread will spin forever.

Fixes: 6c271ce2f1d5 ("io_uring: add submission polling")
Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io_uring.c | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 6e249aa97ba3..b43467b3a8dc 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -5142,6 +5142,18 @@ static int io_sq_thread(void *data)
 		 * to enter the kernel to reap and flush events.
 		 */
 		if (!to_submit || ret == -EBUSY) {
+			/*
+			 * Drop cur_mm before scheduling, we can't hold it for
+			 * long periods (or over schedule()). Do this before
+			 * adding ourselves to the waitqueue, as the unuse/drop
+			 * may sleep.
+			 */
+			if (cur_mm) {
+				unuse_mm(cur_mm);
+				mmput(cur_mm);
+				cur_mm = NULL;
+			}
+
 			/*
 			 * We're polling. If we're within the defined idle
 			 * period, then let us spin without work before going
@@ -5156,18 +5168,6 @@ static int io_sq_thread(void *data)
 				continue;
 			}
 
-			/*
-			 * Drop cur_mm before scheduling, we can't hold it for
-			 * long periods (or over schedule()). Do this before
-			 * adding ourselves to the waitqueue, as the unuse/drop
-			 * may sleep.
-			 */
-			if (cur_mm) {
-				unuse_mm(cur_mm);
-				mmput(cur_mm);
-				cur_mm = NULL;
-			}
-
 			prepare_to_wait(&ctx->sqo_wait, &wait,
 						TASK_INTERRUPTIBLE);
 
-- 
cgit v1.2.3-58-ga151


From c7849be9cc2dd2754c48ddbaca27c2de6d80a95d Mon Sep 17 00:00:00 2001
From: Xiaoguang Wang <xiaoguang.wang@linux.alibaba.com>
Date: Sat, 22 Feb 2020 14:46:05 +0800
Subject: io_uring: fix __io_iopoll_check deadlock in io_sq_thread

Since commit a3a0e43fd770 ("io_uring: don't enter poll loop if we have
CQEs pending"), if we already events pending, we won't enter poll loop.
In case SETUP_IOPOLL and SETUP_SQPOLL are both enabled, if app has
been terminated and don't reap pending events which are already in cq
ring, and there are some reqs in poll_list, io_sq_thread will enter
__io_iopoll_check(), and find pending events, then return, this loop
will never have a chance to exit.

I have seen this issue in fio stress tests, to fix this issue, let
io_sq_thread call io_iopoll_getevents() with argument 'min' being zero,
and remove __io_iopoll_check().

Fixes: a3a0e43fd770 ("io_uring: don't enter poll loop if we have CQEs pending")
Signed-off-by: Xiaoguang Wang <xiaoguang.wang@linux.alibaba.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io_uring.c | 27 +++++++++------------------
 1 file changed, 9 insertions(+), 18 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index b43467b3a8dc..de650df9ac53 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -1672,11 +1672,17 @@ static void io_iopoll_reap_events(struct io_ring_ctx *ctx)
 	mutex_unlock(&ctx->uring_lock);
 }
 
-static int __io_iopoll_check(struct io_ring_ctx *ctx, unsigned *nr_events,
-			    long min)
+static int io_iopoll_check(struct io_ring_ctx *ctx, unsigned *nr_events,
+			   long min)
 {
 	int iters = 0, ret = 0;
 
+	/*
+	 * We disallow the app entering submit/complete with polling, but we
+	 * still need to lock the ring to prevent racing with polled issue
+	 * that got punted to a workqueue.
+	 */
+	mutex_lock(&ctx->uring_lock);
 	do {
 		int tmin = 0;
 
@@ -1712,21 +1718,6 @@ static int __io_iopoll_check(struct io_ring_ctx *ctx, unsigned *nr_events,
 		ret = 0;
 	} while (min && !*nr_events && !need_resched());
 
-	return ret;
-}
-
-static int io_iopoll_check(struct io_ring_ctx *ctx, unsigned *nr_events,
-			   long min)
-{
-	int ret;
-
-	/*
-	 * We disallow the app entering submit/complete with polling, but we
-	 * still need to lock the ring to prevent racing with polled issue
-	 * that got punted to a workqueue.
-	 */
-	mutex_lock(&ctx->uring_lock);
-	ret = __io_iopoll_check(ctx, nr_events, min);
 	mutex_unlock(&ctx->uring_lock);
 	return ret;
 }
@@ -5118,7 +5109,7 @@ static int io_sq_thread(void *data)
 				 */
 				mutex_lock(&ctx->uring_lock);
 				if (!list_empty(&ctx->poll_list))
-					__io_iopoll_check(ctx, &nr_events, 0);
+					io_iopoll_getevents(ctx, &nr_events, 0);
 				else
 					inflight = 0;
 				mutex_unlock(&ctx->uring_lock);
-- 
cgit v1.2.3-58-ga151