btrfs: avoid double nocow check when doing nowait dio writes

When doing a NOWAIT direct IO write we are checking twice if we can COW into the target file range using can_nocow_extent() - once at the very beginning of the write path, at btrfs_write_check() via check_nocow_nolock(), and later again at btrfs_get_blocks_direct_write(). The can_nocow_extent() function does a lot of expensive things - searching for the file extent item in the inode's subvolume tree, searching for the extent item in the extent tree, checking delayed references, etc, so it isn't a very cheap call. We can remove the first check at btrfs_write_check(), and add there a quick check to verify if the inode has the NODATACOW or PREALLOC flags, and quickly bail out if it doesn't have neither of those flags, as that means we have to COW and therefore can't comply with the NOWAIT semantics. After this we do only one call to can_nocow_extent(), while we are at btrfs_get_blocks_direct_write(), where we have already locked the file range and we did a try lock on the range before, at btrfs_dio_iomap_begin() (since the previous patch in the series). Signed-off-by: Filipe Manana <fdmanana@suse.com> Signed-off-by: David Sterba <dsterba@suse.com>
author: Filipe Manana <fdmanana@suse.com> 2022-03-23 16:19:25 +0000
committer: David Sterba <dsterba@suse.com> 2022-05-16 17:03:10 +0200
commit: d7a8ab4e9b889de6daddbb674460e42b86d5dccf (patch)
tree: cae42fc8bfd1046f46e369e8bb0a2fd29ca371d1 /fs/btrfs/file.c
parent: 59094403444089343dd21ac32b30a936518b7e1a (diff)
1 files changed, 33 insertions, 69 deletions
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index bd329316945f..ceac806155b8 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -1460,8 +1460,27 @@ lock_and_cleanup_extent_if_need(struct btrfs_inode *inode, struct page **pages,
 	return ret;
 }
 
-static int check_can_nocow(struct btrfs_inode *inode, loff_t pos,
-			   size_t *write_bytes, bool nowait)
+/*
+ * Check if we can do nocow write into the range [@pos, @pos + @write_bytes)
+ *
+ * @pos:         File offset.
+ * @write_bytes: The length to write, will be updated to the nocow writeable
+ *               range.
+ *
+ * This function will flush ordered extents in the range to ensure proper
+ * nocow checks.
+ *
+ * Return:
+ * > 0          If we can nocow, and updates @write_bytes.
+ *  0           If we can't do a nocow write.
+ * -EAGAIN      If we can't do a nocow write because snapshoting of the inode's
+ *              root is in progress.
+ * < 0          If an error happened.
+ *
+ * NOTE: Callers need to call btrfs_check_nocow_unlock() if we return > 0.
+ */
+int btrfs_check_nocow_lock(struct btrfs_inode *inode, loff_t pos,
+			   size_t *write_bytes)
 {
 	struct btrfs_fs_info *fs_info = inode->root->fs_info;
 	struct btrfs_root *root = inode->root;
@@ -1472,7 +1491,7 @@ static int check_can_nocow(struct btrfs_inode *inode, loff_t pos,
 	if (!(inode->flags & (BTRFS_INODE_NODATACOW | BTRFS_INODE_PREALLOC)))
 		return 0;
 
-	if (!nowait && !btrfs_drew_try_write_lock(&root->snapshot_lock))
+	if (!btrfs_drew_try_write_lock(&root->snapshot_lock))
 		return -EAGAIN;
 
 	lockstart = round_down(pos, fs_info->sectorsize);
@@ -1480,71 +1499,21 @@ static int check_can_nocow(struct btrfs_inode *inode, loff_t pos,
 			   fs_info->sectorsize) - 1;
 	num_bytes = lockend - lockstart + 1;
 
-	if (nowait) {
-		struct btrfs_ordered_extent *ordered;
-
-		if (!try_lock_extent(&inode->io_tree, lockstart, lockend))
-			return -EAGAIN;
-
-		ordered = btrfs_lookup_ordered_range(inode, lockstart,
-						     num_bytes);
-		if (ordered) {
-			btrfs_put_ordered_extent(ordered);
-			ret = -EAGAIN;
-			goto out_unlock;
-		}
-	} else {
-		btrfs_lock_and_flush_ordered_range(inode, lockstart,
-						   lockend, NULL);
-	}
-
+	btrfs_lock_and_flush_ordered_range(inode, lockstart, lockend, NULL);
 	ret = can_nocow_extent(&inode->vfs_inode, lockstart, &num_bytes,
 			NULL, NULL, NULL, false);
 	if (ret <= 0) {
 		ret = 0;
-		if (!nowait)
-			btrfs_drew_write_unlock(&root->snapshot_lock);
+		btrfs_drew_write_unlock(&root->snapshot_lock);
 	} else {
 		*write_bytes = min_t(size_t, *write_bytes ,
 				     num_bytes - pos + lockstart);
 	}
-out_unlock:
 	unlock_extent(&inode->io_tree, lockstart, lockend);
 
 	return ret;
 }
 
-static int check_nocow_nolock(struct btrfs_inode *inode, loff_t pos,
-			      size_t *write_bytes)
-{
-	return check_can_nocow(inode, pos, write_bytes, true);
-}
-
-/*
- * Check if we can do nocow write into the range [@pos, @pos + @write_bytes)
- *
- * @pos:	 File offset
- * @write_bytes: The length to write, will be updated to the nocow writeable
- *		 range
- *
- * This function will flush ordered extents in the range to ensure proper
- * nocow checks.
- *
- * Return:
- * >0		and update @write_bytes if we can do nocow write
- *  0		if we can't do nocow write
- * -EAGAIN	if we can't get the needed lock or there are ordered extents
- * 		for * (nowait == true) case
- * <0		if other error happened
- *
- * NOTE: Callers need to release the lock by btrfs_check_nocow_unlock().
- */
-int btrfs_check_nocow_lock(struct btrfs_inode *inode, loff_t pos,
-			   size_t *write_bytes)
-{
-	return check_can_nocow(inode, pos, write_bytes, false);
-}
-
 void btrfs_check_nocow_unlock(struct btrfs_inode *inode)
 {
 	btrfs_drew_write_unlock(&inode->root->snapshot_lock);
@@ -1579,20 +1548,15 @@ static int btrfs_write_check(struct kiocb *iocb, struct iov_iter *from,
 	loff_t oldsize;
 	loff_t start_pos;
 
-	if (iocb->ki_flags & IOCB_NOWAIT) {
-		size_t nocow_bytes = count;
-
-		/* We will allocate space in case nodatacow is not set, so bail */
-		if (check_nocow_nolock(BTRFS_I(inode), pos, &nocow_bytes) <= 0)
-			return -EAGAIN;
-		/*
-		 * There are holes in the range or parts of the range that must
-		 * be COWed (shared extents, RO block groups, etc), so just bail
-		 * out.
-		 */
-		if (nocow_bytes < count)
-			return -EAGAIN;
-	}
+	/*
+	 * Quickly bail out on NOWAIT writes if we don't have the nodatacow or
+	 * prealloc flags, as without those flags we always have to COW. We will
+	 * later check if we can really COW into the target range (using
+	 * can_nocow_extent() at btrfs_get_blocks_direct_write()).
+	 */
+	if ((iocb->ki_flags & IOCB_NOWAIT) &&
+	    !(BTRFS_I(inode)->flags & (BTRFS_INODE_NODATACOW | BTRFS_INODE_PREALLOC)))
+		return -EAGAIN;
 
 	current->backing_dev_info = inode_to_bdi(inode);
 	ret = file_remove_privs(file);
author	Filipe Manana <fdmanana@suse.com>	2022-03-23 16:19:25 +0000
committer	David Sterba <dsterba@suse.com>	2022-05-16 17:03:10 +0200
commit	d7a8ab4e9b889de6daddbb674460e42b86d5dccf (patch)
tree	cae42fc8bfd1046f46e369e8bb0a2fd29ca371d1 /fs/btrfs/file.c
parent	59094403444089343dd21ac32b30a936518b7e1a (diff)