From 9b038d004ce95551cb35381c49fe896c5bc11ffe Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Tue, 21 May 2024 14:37:43 +0100
Subject: netfs: Fix io_uring based write-through

This can be triggered by mounting a cifs filesystem with a cache=strict
mount option and then, using the fsx program from xfstests, doing:

        ltp/fsx -A -d -N 1000 -S 11463 -P /tmp /cifs-mount/foo \
          --replay-ops=gen112-fsxops

Where gen112-fsxops holds:

        fallocate 0x6be7 0x8fc5 0x377d3
        copy_range 0x9c71 0x77e8 0x2edaf 0x377d3
        write 0x2776d 0x8f65 0x377d3

The problem is that netfs_io_request::len is being used for two purposes
and ends up getting set to the amount of data we transferred, not the
amount of data the caller asked to be transferred (for various reasons,
such as mmap'd writes, we might end up rounding out the data written to the
server to include the entire folio at each end).

Fix this by keeping the amount we were asked to write in ->len and using
->submitted to track what we issued ops for.  Then, when we come to calling
->ki_complete(), ->len is the right size.

This also required netfs_cleanup_dio_write() to change since we're no
longer advancing wreq->len.  Use wreq->transferred instead as we might have
done a short read.

With this, the generic/112 xfstest passes if cifs is forced to put all
non-DIO opens into write-through mode.

Fixes: 288ace2f57c9 ("netfs: New writeback implementation")
Signed-off-by: David Howells <dhowells@redhat.com>
Link: https://lore.kernel.org/r/295086.1716298663@warthog.procyon.org.uk
cc: Jeff Layton <jlayton@kernel.org>
cc: Steve French <stfrench@microsoft.com>
cc: Enzo Matsumiya <ematsumiya@suse.de>
cc: netfs@lists.linux.dev
cc: v9fs@lists.linux.dev
cc: linux-afs@lists.infradead.org
cc: linux-cifs@vger.kernel.org
cc: linux-fsdevel@vger.kernel.org
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 fs/netfs/direct_write.c  | 2 +-
 fs/netfs/write_collect.c | 7 ++++---
 fs/netfs/write_issue.c   | 2 +-
 3 files changed, 6 insertions(+), 5 deletions(-)

(limited to 'fs')

diff --git a/fs/netfs/direct_write.c b/fs/netfs/direct_write.c
index 608ba6416919..28163516bf03 100644
--- a/fs/netfs/direct_write.c
+++ b/fs/netfs/direct_write.c
@@ -12,7 +12,7 @@
 static void netfs_cleanup_dio_write(struct netfs_io_request *wreq)
 {
 	struct inode *inode = wreq->inode;
-	unsigned long long end = wreq->start + wreq->len;
+	unsigned long long end = wreq->start + wreq->transferred;
 
 	if (!wreq->error &&
 	    i_size_read(inode) < end) {
diff --git a/fs/netfs/write_collect.c b/fs/netfs/write_collect.c
index 60112e4b2c5e..426cf87aaf2e 100644
--- a/fs/netfs/write_collect.c
+++ b/fs/netfs/write_collect.c
@@ -510,7 +510,7 @@ reassess_streams:
 	 * stream has a gap that can be jumped.
 	 */
 	if (notes & SOME_EMPTY) {
-		unsigned long long jump_to = wreq->start + wreq->len;
+		unsigned long long jump_to = wreq->start + READ_ONCE(wreq->submitted);
 
 		for (s = 0; s < NR_IO_STREAMS; s++) {
 			stream = &wreq->io_streams[s];
@@ -690,10 +690,11 @@ void netfs_write_collection_worker(struct work_struct *work)
 	wake_up_bit(&wreq->flags, NETFS_RREQ_IN_PROGRESS);
 
 	if (wreq->iocb) {
-		wreq->iocb->ki_pos += wreq->transferred;
+		size_t written = min(wreq->transferred, wreq->len);
+		wreq->iocb->ki_pos += written;
 		if (wreq->iocb->ki_complete)
 			wreq->iocb->ki_complete(
-				wreq->iocb, wreq->error ? wreq->error : wreq->transferred);
+				wreq->iocb, wreq->error ? wreq->error : written);
 		wreq->iocb = VFS_PTR_POISON;
 	}
 
diff --git a/fs/netfs/write_issue.c b/fs/netfs/write_issue.c
index e190043bc0da..86dad7e4202b 100644
--- a/fs/netfs/write_issue.c
+++ b/fs/netfs/write_issue.c
@@ -254,7 +254,7 @@ static void netfs_issue_write(struct netfs_io_request *wreq,
 	stream->construct = NULL;
 
 	if (subreq->start + subreq->len > wreq->start + wreq->submitted)
-		wreq->len = wreq->submitted = subreq->start + subreq->len - wreq->start;
+		WRITE_ONCE(wreq->submitted, subreq->start + subreq->len - wreq->start);
 	netfs_do_issue_write(stream, subreq);
 }
 
-- 
cgit v1.2.3-58-ga151


From 2c6b531020f0590db3b6b4950a41c692e9aa4f4a Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Tue, 21 May 2024 14:36:27 +0100
Subject: netfs: Fix AIO error handling when doing write-through

If an error occurs whilst we're doing an AIO write in write-through mode,
we may end up calling ->ki_complete() *and* returning an error from
->write_iter().  This can result in either a UAF (the ->ki_complete() func
pointer may get overwritten, for example) or a refcount underflow in
io_submit() as ->ki_complete is called twice.

Fix this by making netfs_end_writethrough() - and thus
netfs_perform_write() - unconditionally return -EIOCBQUEUED if we're doing
an AIO write and wait for completion if we're not.

Fixes: 288ace2f57c9 ("netfs: New writeback implementation")
Signed-off-by: David Howells <dhowells@redhat.com>
Link: https://lore.kernel.org/r/295052.1716298587@warthog.procyon.org.uk
cc: Jeff Layton <jlayton@kernel.org>
cc: Enzo Matsumiya <ematsumiya@suse.de>
cc: netfs@lists.linux.dev
cc: v9fs@lists.linux.dev
cc: linux-afs@lists.infradead.org
cc: linux-cifs@vger.kernel.org
cc: linux-fsdevel@vger.kernel.org
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 fs/netfs/write_issue.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/netfs/write_issue.c b/fs/netfs/write_issue.c
index 86dad7e4202b..3aa86e268f40 100644
--- a/fs/netfs/write_issue.c
+++ b/fs/netfs/write_issue.c
@@ -636,7 +636,12 @@ int netfs_end_writethrough(struct netfs_io_request *wreq, struct writeback_contr
 
 	mutex_unlock(&ictx->wb_lock);
 
-	ret = wreq->error;
+	if (wreq->iocb) {
+		ret = -EIOCBQUEUED;
+	} else {
+		wait_on_bit(&wreq->flags, NETFS_RREQ_IN_PROGRESS, TASK_UNINTERRUPTIBLE);
+		ret = wreq->error;
+	}
 	netfs_put_request(wreq, false, netfs_rreq_trace_put_return);
 	return ret;
 }
-- 
cgit v1.2.3-58-ga151


From 4e527d5841e24623181edc7fd6f6598ffa810e10 Mon Sep 17 00:00:00 2001
From: Xu Yang <xu.yang_2@nxp.com>
Date: Tue, 21 May 2024 19:49:39 +0800
Subject: iomap: fault in smaller chunks for non-large folio mappings

Since commit (5d8edfb900d5 "iomap: Copy larger chunks from userspace"),
iomap will try to copy in larger chunks than PAGE_SIZE. However, if the
mapping doesn't support large folio, only one page of maximum 4KB will
be created and 4KB data will be writen to pagecache each time. Then,
next 4KB will be handled in next iteration. This will cause potential
write performance problem.

If chunk is 2MB, total 512 pages need to be handled finally. During this
period, fault_in_iov_iter_readable() is called to check iov_iter readable
validity. Since only 4KB will be handled each time, below address space
will be checked over and over again:

start         	end
-
buf,    	buf+2MB
buf+4KB, 	buf+2MB
buf+8KB, 	buf+2MB
...
buf+2044KB 	buf+2MB

Obviously the checking size is wrong since only 4KB will be handled each
time. So this will get a correct chunk to let iomap work well in non-large
folio case.

With this change, the write speed will be stable. Tested on ARM64 device.

Before:

 - dd if=/dev/zero of=/dev/sda bs=400K  count=10485  (334 MB/s)
 - dd if=/dev/zero of=/dev/sda bs=800K  count=5242   (278 MB/s)
 - dd if=/dev/zero of=/dev/sda bs=1600K count=2621   (204 MB/s)
 - dd if=/dev/zero of=/dev/sda bs=2200K count=1906   (170 MB/s)
 - dd if=/dev/zero of=/dev/sda bs=3000K count=1398   (150 MB/s)
 - dd if=/dev/zero of=/dev/sda bs=4500K count=932    (139 MB/s)

After:

 - dd if=/dev/zero of=/dev/sda bs=400K  count=10485  (339 MB/s)
 - dd if=/dev/zero of=/dev/sda bs=800K  count=5242   (330 MB/s)
 - dd if=/dev/zero of=/dev/sda bs=1600K count=2621   (332 MB/s)
 - dd if=/dev/zero of=/dev/sda bs=2200K count=1906   (333 MB/s)
 - dd if=/dev/zero of=/dev/sda bs=3000K count=1398   (333 MB/s)
 - dd if=/dev/zero of=/dev/sda bs=4500K count=932    (333 MB/s)

Fixes: 5d8edfb900d5 ("iomap: Copy larger chunks from userspace")
Cc: stable@vger.kernel.org
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Signed-off-by: Xu Yang <xu.yang_2@nxp.com>
Link: https://lore.kernel.org/r/20240521114939.2541461-2-xu.yang_2@nxp.com
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 fs/iomap/buffered-io.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c
index 41c8f0c68ef5..c5802a459334 100644
--- a/fs/iomap/buffered-io.c
+++ b/fs/iomap/buffered-io.c
@@ -898,11 +898,11 @@ static bool iomap_write_end(struct iomap_iter *iter, loff_t pos, size_t len,
 static loff_t iomap_write_iter(struct iomap_iter *iter, struct iov_iter *i)
 {
 	loff_t length = iomap_length(iter);
-	size_t chunk = PAGE_SIZE << MAX_PAGECACHE_ORDER;
 	loff_t pos = iter->pos;
 	ssize_t total_written = 0;
 	long status = 0;
 	struct address_space *mapping = iter->inode->i_mapping;
+	size_t chunk = mapping_max_folio_size(mapping);
 	unsigned int bdp_flags = (iter->flags & IOMAP_NOWAIT) ? BDP_ASYNC : 0;
 
 	do {
-- 
cgit v1.2.3-58-ga151


From f826bc9d6fc2f0e089fb8d104415d72e4d2e204c Mon Sep 17 00:00:00 2001
From: Fedor Pchelkin <pchelkin@ispras.ru>
Date: Mon, 20 May 2024 12:08:18 +0300
Subject: signalfd: fix error return code

If anon_inode_getfile() fails, return appropriate error code. This looks
like a single typo: the similar code changes in timerfd and userfaultfd
are okay.

Found by Linux Verification Center (linuxtesting.org).

Fixes: fbe38120eb1d ("signalfd: convert to ->read_iter()")
Signed-off-by: Fedor Pchelkin <pchelkin@ispras.ru>
Link: https://lore.kernel.org/r/20240520090819.76342-1-pchelkin@ispras.ru
Reviewed-by: Jens Axboe <axboe@kernel.dk>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 fs/signalfd.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/signalfd.c b/fs/signalfd.c
index 4a5614442dbf..65fe5eed0be4 100644
--- a/fs/signalfd.c
+++ b/fs/signalfd.c
@@ -282,7 +282,7 @@ static int do_signalfd4(int ufd, sigset_t *mask, int flags)
 		if (IS_ERR(file)) {
 			put_unused_fd(ufd);
 			kfree(ctx);
-			return ufd;
+			return PTR_ERR(file);
 		}
 		file->f_mode |= FMODE_NOWAIT;
 
-- 
cgit v1.2.3-58-ga151


From 65bea9953715b19371164a8bec4f74fdd22c9e5a Mon Sep 17 00:00:00 2001
From: Fedor Pchelkin <pchelkin@ispras.ru>
Date: Mon, 20 May 2024 12:08:19 +0300
Subject: signalfd: drop an obsolete comment

Commit fbe38120eb1d ("signalfd: convert to ->read_iter()") removed the
call to anon_inode_getfd() by splitting fd setup into two parts. Drop the
comment referencing the internal details of that function.

Signed-off-by: Fedor Pchelkin <pchelkin@ispras.ru>
Link: https://lore.kernel.org/r/20240520090819.76342-2-pchelkin@ispras.ru
Reviewed-by: Jens Axboe <axboe@kernel.dk>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 fs/signalfd.c | 4 ----
 1 file changed, 4 deletions(-)

(limited to 'fs')

diff --git a/fs/signalfd.c b/fs/signalfd.c
index 65fe5eed0be4..ec7b2da2477a 100644
--- a/fs/signalfd.c
+++ b/fs/signalfd.c
@@ -286,10 +286,6 @@ static int do_signalfd4(int ufd, sigset_t *mask, int flags)
 		}
 		file->f_mode |= FMODE_NOWAIT;
 
-		/*
-		 * When we call this, the initialization must be complete, since
-		 * anon_inode_getfd() will install the fd.
-		 */
 		fd_install(ufd, file);
 	} else {
 		struct fd f = fdget(ufd);
-- 
cgit v1.2.3-58-ga151


From c596bea1452ddf172ec9b588e4597228e9a1f4d5 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Tue, 21 May 2024 16:49:46 +0100
Subject: netfs: Fix setting of BDP_ASYNC from iocb flags

Fix netfs_perform_write() to set BDP_ASYNC if IOCB_NOWAIT is set rather
than if IOCB_SYNC is not set.  It reflects asynchronicity in the sense of
not waiting rather than synchronicity in the sense of not returning until
the op is complete.

Without this, generic/590 fails on cifs in strict caching mode with a
complaint that one of the writes fails with EAGAIN.  The test can be
distilled down to:

        mount -t cifs /my/share /mnt -ostuff
        xfs_io -i -c 'falloc 0 8191M -c fsync -f /mnt/file
        xfs_io -i -c 'pwrite -b 1M -W 0 8191M' /mnt/file

Fixes: c38f4e96e605 ("netfs: Provide func to copy data to pagecache for buffered write")
Signed-off-by: David Howells <dhowells@redhat.com>
Link: https://lore.kernel.org/r/316306.1716306586@warthog.procyon.org.uk
Reviewed-by: Jens Axboe <axboe@kernel.dk>
cc: Jeff Layton <jlayton@kernel.org>
cc: Enzo Matsumiya <ematsumiya@suse.de>
cc: Jens Axboe <axboe@kernel.dk>
cc: Matthew Wilcox <willy@infradead.org>
cc: netfs@lists.linux.dev
cc: v9fs@lists.linux.dev
cc: linux-afs@lists.infradead.org
cc: linux-cifs@vger.kernel.org
cc: linux-fsdevel@vger.kernel.org
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 fs/netfs/buffered_write.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/netfs/buffered_write.c b/fs/netfs/buffered_write.c
index 1121601536d1..07bc1fd43530 100644
--- a/fs/netfs/buffered_write.c
+++ b/fs/netfs/buffered_write.c
@@ -181,7 +181,7 @@ ssize_t netfs_perform_write(struct kiocb *iocb, struct iov_iter *iter,
 	struct folio *folio, *writethrough = NULL;
 	enum netfs_how_to_modify howto;
 	enum netfs_folio_trace trace;
-	unsigned int bdp_flags = (iocb->ki_flags & IOCB_SYNC) ? 0: BDP_ASYNC;
+	unsigned int bdp_flags = (iocb->ki_flags & IOCB_NOWAIT) ? BDP_ASYNC : 0;
 	ssize_t written = 0, ret, ret2;
 	loff_t i_size, pos = iocb->ki_pos, from, to;
 	size_t max_chunk = PAGE_SIZE << MAX_PAGECACHE_ORDER;
-- 
cgit v1.2.3-58-ga151


From 29be9100aca2915fab54b5693309bc42956542e5 Mon Sep 17 00:00:00 2001
From: Marc Dionne <marc.dionne@auristor.com>
Date: Fri, 24 May 2024 17:17:55 +0100
Subject: afs: Don't cross .backup mountpoint from backup volume

Don't cross a mountpoint that explicitly specifies a backup volume
(target is <vol>.backup) when starting from a backup volume.

It it not uncommon to mount a volume's backup directly in the volume
itself.  This can cause tools that are not paying attention to get
into a loop mounting the volume onto itself as they attempt to
traverse the tree, leading to a variety of problems.

This doesn't prevent the general case of loops in a sequence of
mountpoints, but addresses a common special case in the same way
as other afs clients.

Reported-by: Jan Henrik Sylvester <jan.henrik.sylvester@uni-hamburg.de>
Link: http://lists.infradead.org/pipermail/linux-afs/2024-May/008454.html
Reported-by: Markus Suvanto <markus.suvanto@gmail.com>
Link: http://lists.infradead.org/pipermail/linux-afs/2024-February/008074.html
Signed-off-by: Marc Dionne <marc.dionne@auristor.com>
Signed-off-by: David Howells <dhowells@redhat.com>
Link: https://lore.kernel.org/r/768760.1716567475@warthog.procyon.org.uk
Reviewed-by: Jeffrey Altman <jaltman@auristor.com>
cc: linux-afs@lists.infradead.org
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 fs/afs/mntpt.c | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'fs')

diff --git a/fs/afs/mntpt.c b/fs/afs/mntpt.c
index 97f50e9fd9eb..297487ee8323 100644
--- a/fs/afs/mntpt.c
+++ b/fs/afs/mntpt.c
@@ -140,6 +140,11 @@ static int afs_mntpt_set_params(struct fs_context *fc, struct dentry *mntpt)
 		put_page(page);
 		if (ret < 0)
 			return ret;
+
+		/* Don't cross a backup volume mountpoint from a backup volume */
+		if (src_as->volume && src_as->volume->type == AFSVL_BACKVOL &&
+		    ctx->type == AFSVL_BACKVOL)
+			return -ENODEV;
 	}
 
 	return 0;
-- 
cgit v1.2.3-58-ga151


From f89ea63f1c65d3e93b255f14f9d9e05df87955fa Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Fri, 24 May 2024 15:26:11 +0100
Subject: netfs, 9p: Fix race between umount and async request completion

There's a problem in 9p's interaction with netfslib whereby a crash occurs
because the 9p_fid structs get forcibly destroyed during client teardown
(without paying attention to their refcounts) before netfslib has finished
with them.  However, it's not a simple case of deferring the clunking that
p9_fid_put() does as that requires the p9_client record to still be
present.

The problem is that netfslib has to unlock pages and clear the IN_PROGRESS
flag before destroying the objects involved - including the fid - and, in
any case, nothing checks to see if writeback completed barring looking at
the page flags.

Fix this by keeping a count of outstanding I/O requests (of any type) and
waiting for it to quiesce during inode eviction.

Reported-by: syzbot+df038d463cca332e8414@syzkaller.appspotmail.com
Link: https://lore.kernel.org/all/0000000000005be0aa061846f8d6@google.com/
Reported-by: syzbot+d7c7a495a5e466c031b6@syzkaller.appspotmail.com
Link: https://lore.kernel.org/all/000000000000b86c5e06130da9c6@google.com/
Reported-by: syzbot+1527696d41a634cc1819@syzkaller.appspotmail.com
Link: https://lore.kernel.org/all/000000000000041f960618206d7e@google.com/
Signed-off-by: David Howells <dhowells@redhat.com>
Link: https://lore.kernel.org/r/755891.1716560771@warthog.procyon.org.uk
Tested-by: syzbot+d7c7a495a5e466c031b6@syzkaller.appspotmail.com
Reviewed-by: Dominique Martinet <asmadeus@codewreck.org>
cc: Eric Van Hensbergen <ericvh@kernel.org>
cc: Latchesar Ionkov <lucho@ionkov.net>
cc: Christian Schoenebeck <linux_oss@crudebyte.com>
cc: Jeff Layton <jlayton@kernel.org>
cc: Steve French <sfrench@samba.org>
cc: Hillf Danton <hdanton@sina.com>
cc: v9fs@lists.linux.dev
cc: linux-afs@lists.infradead.org
cc: linux-cifs@vger.kernel.org
cc: netfs@lists.linux.dev
cc: linux-fsdevel@vger.kernel.org
Reported-and-tested-by: syzbot+d7c7a495a5e466c031b6@syzkaller.appspotmail.com
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 fs/9p/vfs_inode.c      |  1 +
 fs/afs/inode.c         |  1 +
 fs/netfs/objects.c     |  5 +++++
 fs/smb/client/cifsfs.c |  1 +
 include/linux/netfs.h  | 18 ++++++++++++++++++
 5 files changed, 26 insertions(+)

(limited to 'fs')

diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index 7a3308d77606..fd72fc38c8f5 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -348,6 +348,7 @@ void v9fs_evict_inode(struct inode *inode)
 	__le32 __maybe_unused version;
 
 	if (!is_bad_inode(inode)) {
+		netfs_wait_for_outstanding_io(inode);
 		truncate_inode_pages_final(&inode->i_data);
 
 		version = cpu_to_le32(v9inode->qid.version);
diff --git a/fs/afs/inode.c b/fs/afs/inode.c
index 94fc049aff58..15bb7989c387 100644
--- a/fs/afs/inode.c
+++ b/fs/afs/inode.c
@@ -648,6 +648,7 @@ void afs_evict_inode(struct inode *inode)
 
 	ASSERTCMP(inode->i_ino, ==, vnode->fid.vnode);
 
+	netfs_wait_for_outstanding_io(inode);
 	truncate_inode_pages_final(&inode->i_data);
 
 	afs_set_cache_aux(vnode, &aux);
diff --git a/fs/netfs/objects.c b/fs/netfs/objects.c
index c90d482b1650..f4a642727479 100644
--- a/fs/netfs/objects.c
+++ b/fs/netfs/objects.c
@@ -72,6 +72,7 @@ struct netfs_io_request *netfs_alloc_request(struct address_space *mapping,
 		}
 	}
 
+	atomic_inc(&ctx->io_count);
 	trace_netfs_rreq_ref(rreq->debug_id, 1, netfs_rreq_trace_new);
 	netfs_proc_add_rreq(rreq);
 	netfs_stat(&netfs_n_rh_rreq);
@@ -124,6 +125,7 @@ static void netfs_free_request(struct work_struct *work)
 {
 	struct netfs_io_request *rreq =
 		container_of(work, struct netfs_io_request, work);
+	struct netfs_inode *ictx = netfs_inode(rreq->inode);
 	unsigned int i;
 
 	trace_netfs_rreq(rreq, netfs_rreq_trace_free);
@@ -142,6 +144,9 @@ static void netfs_free_request(struct work_struct *work)
 		}
 		kvfree(rreq->direct_bv);
 	}
+
+	if (atomic_dec_and_test(&ictx->io_count))
+		wake_up_var(&ictx->io_count);
 	call_rcu(&rreq->rcu, netfs_free_request_rcu);
 }
 
diff --git a/fs/smb/client/cifsfs.c b/fs/smb/client/cifsfs.c
index ec5b639f421a..14810ffd15c8 100644
--- a/fs/smb/client/cifsfs.c
+++ b/fs/smb/client/cifsfs.c
@@ -431,6 +431,7 @@ cifs_free_inode(struct inode *inode)
 static void
 cifs_evict_inode(struct inode *inode)
 {
+	netfs_wait_for_outstanding_io(inode);
 	truncate_inode_pages_final(&inode->i_data);
 	if (inode->i_state & I_PINNING_NETFS_WB)
 		cifs_fscache_unuse_inode_cookie(inode, true);
diff --git a/include/linux/netfs.h b/include/linux/netfs.h
index ca56a4428043..3b22ce0d064c 100644
--- a/include/linux/netfs.h
+++ b/include/linux/netfs.h
@@ -68,6 +68,7 @@ struct netfs_inode {
 	loff_t			remote_i_size;	/* Size of the remote file */
 	loff_t			zero_point;	/* Size after which we assume there's no data
 						 * on the server */
+	atomic_t		io_count;	/* Number of outstanding reqs */
 	unsigned long		flags;
 #define NETFS_ICTX_ODIRECT	0		/* The file has DIO in progress */
 #define NETFS_ICTX_UNBUFFERED	1		/* I/O should not use the pagecache */
@@ -472,6 +473,7 @@ static inline void netfs_inode_init(struct netfs_inode *ctx,
 	ctx->remote_i_size = i_size_read(&ctx->inode);
 	ctx->zero_point = LLONG_MAX;
 	ctx->flags = 0;
+	atomic_set(&ctx->io_count, 0);
 #if IS_ENABLED(CONFIG_FSCACHE)
 	ctx->cache = NULL;
 #endif
@@ -515,4 +517,20 @@ static inline struct fscache_cookie *netfs_i_cookie(struct netfs_inode *ctx)
 #endif
 }
 
+/**
+ * netfs_wait_for_outstanding_io - Wait for outstanding I/O to complete
+ * @ctx: The netfs inode to wait on
+ *
+ * Wait for outstanding I/O requests of any type to complete.  This is intended
+ * to be called from inode eviction routines.  This makes sure that any
+ * resources held by those requests are cleaned up before we let the inode get
+ * cleaned up.
+ */
+static inline void netfs_wait_for_outstanding_io(struct inode *inode)
+{
+	struct netfs_inode *ictx = netfs_inode(inode);
+
+	wait_var_event(&ictx->io_count, atomic_read(&ictx->io_count) == 0);
+}
+
 #endif /* _LINUX_NETFS_H */
-- 
cgit v1.2.3-58-ga151