From 1383a7ed67490fb00d793e36c7a4d599ff88a64d Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Tue, 30 Oct 2018 10:40:31 +1100 Subject: vfs: check file ranges before cloning files Move the file range checks from vfs_clone_file_prep into a separate generic_remap_checks function so that all the checks are collected in a central location. This forms the basis for adding more checks from generic_write_checks that will make cloning's input checking more consistent with write input checking. Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig Reviewed-by: Amir Goldstein Signed-off-by: Dave Chinner --- fs/xfs/xfs_reflink.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/xfs') diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c index 42ea7bab9144..281d5f53f2ec 100644 --- a/fs/xfs/xfs_reflink.c +++ b/fs/xfs/xfs_reflink.c @@ -1326,7 +1326,7 @@ xfs_reflink_remap_prep( if (IS_DAX(inode_in) || IS_DAX(inode_out)) goto out_unlock; - ret = vfs_clone_file_prep_inodes(inode_in, pos_in, inode_out, pos_out, + ret = vfs_clone_file_prep(file_in, pos_in, file_out, pos_out, len, is_dedupe); if (ret <= 0) goto out_unlock; -- cgit v1.2.3-58-ga151 From a83ab01a62e61616ebb8b97f90f568c1214dc10d Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Tue, 30 Oct 2018 10:41:08 +1100 Subject: vfs: rename vfs_clone_file_prep to be more descriptive The vfs_clone_file_prep is a generic function to be called by filesystem implementations only. Rename the prefix to generic_ and make it more clear that it applies to remap operations, not just clones. Signed-off-by: Darrick J. Wong Reviewed-by: Amir Goldstein Signed-off-by: Dave Chinner --- fs/ocfs2/refcounttree.c | 2 +- fs/read_write.c | 8 ++++---- fs/xfs/xfs_reflink.c | 2 +- include/linux/fs.h | 6 +++--- 4 files changed, 9 insertions(+), 9 deletions(-) (limited to 'fs/xfs') diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c index 19e03936c5e1..36c56dfbe485 100644 --- a/fs/ocfs2/refcounttree.c +++ b/fs/ocfs2/refcounttree.c @@ -4850,7 +4850,7 @@ int ocfs2_reflink_remap_range(struct file *file_in, (OCFS2_I(inode_out)->ip_flags & OCFS2_INODE_SYSTEM_FILE)) goto out_unlock; - ret = vfs_clone_file_prep(file_in, pos_in, file_out, pos_out, + ret = generic_remap_file_range_prep(file_in, pos_in, file_out, pos_out, &len, is_dedupe); if (ret <= 0) goto out_unlock; diff --git a/fs/read_write.c b/fs/read_write.c index f5395d8da741..aca75a97a695 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -1745,9 +1745,9 @@ static int generic_remap_check_len(struct inode *inode_in, * Returns: 0 for "nothing to clone", 1 for "something to clone", or * the usual negative error code. */ -int vfs_clone_file_prep(struct file *file_in, loff_t pos_in, - struct file *file_out, loff_t pos_out, - u64 *len, bool is_dedupe) +int generic_remap_file_range_prep(struct file *file_in, loff_t pos_in, + struct file *file_out, loff_t pos_out, + u64 *len, bool is_dedupe) { struct inode *inode_in = file_inode(file_in); struct inode *inode_out = file_inode(file_out); @@ -1822,7 +1822,7 @@ int vfs_clone_file_prep(struct file *file_in, loff_t pos_in, return 1; } -EXPORT_SYMBOL(vfs_clone_file_prep); +EXPORT_SYMBOL(generic_remap_file_range_prep); int do_clone_file_range(struct file *file_in, loff_t pos_in, struct file *file_out, loff_t pos_out, u64 len) diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c index 281d5f53f2ec..a7757a128a78 100644 --- a/fs/xfs/xfs_reflink.c +++ b/fs/xfs/xfs_reflink.c @@ -1326,7 +1326,7 @@ xfs_reflink_remap_prep( if (IS_DAX(inode_in) || IS_DAX(inode_out)) goto out_unlock; - ret = vfs_clone_file_prep(file_in, pos_in, file_out, pos_out, + ret = generic_remap_file_range_prep(file_in, pos_in, file_out, pos_out, len, is_dedupe); if (ret <= 0) goto out_unlock; diff --git a/include/linux/fs.h b/include/linux/fs.h index ba93a6e7dac4..55729e1c2e75 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1825,9 +1825,9 @@ extern ssize_t vfs_readv(struct file *, const struct iovec __user *, unsigned long, loff_t *, rwf_t); extern ssize_t vfs_copy_file_range(struct file *, loff_t , struct file *, loff_t, size_t, unsigned int); -extern int vfs_clone_file_prep(struct file *file_in, loff_t pos_in, - struct file *file_out, loff_t pos_out, - u64 *count, bool is_dedupe); +extern int generic_remap_file_range_prep(struct file *file_in, loff_t pos_in, + struct file *file_out, loff_t pos_out, + u64 *count, bool is_dedupe); extern int do_clone_file_range(struct file *file_in, loff_t pos_in, struct file *file_out, loff_t pos_out, u64 len); extern int vfs_clone_file_range(struct file *file_in, loff_t pos_in, -- cgit v1.2.3-58-ga151 From 2e5dfc99f2e61c42083ba742395e7a7b353513d1 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Tue, 30 Oct 2018 10:41:21 +1100 Subject: vfs: combine the clone and dedupe into a single remap_file_range Combine the clone_file_range and dedupe_file_range operations into a single remap_file_range file operation dispatch since they're fundamentally the same operation. The differences between the two can be made in the prep functions. Signed-off-by: Darrick J. Wong Reviewed-by: Amir Goldstein Reviewed-by: Christoph Hellwig Signed-off-by: Dave Chinner --- Documentation/filesystems/porting | 5 +++++ Documentation/filesystems/vfs.txt | 20 +++++++++++------ fs/btrfs/ctree.h | 8 +++---- fs/btrfs/file.c | 3 +-- fs/btrfs/ioctl.c | 45 ++++++++++++++++++++------------------- fs/cifs/cifsfs.c | 22 +++++++++++-------- fs/nfs/nfs4file.c | 10 ++++++--- fs/ocfs2/file.c | 24 +++++++-------------- fs/overlayfs/file.c | 30 +++++++++++++++----------- fs/read_write.c | 18 ++++++++-------- fs/xfs/xfs_file.c | 23 ++++++-------------- include/linux/fs.h | 25 ++++++++++++++++++---- 12 files changed, 127 insertions(+), 106 deletions(-) (limited to 'fs/xfs') diff --git a/Documentation/filesystems/porting b/Documentation/filesystems/porting index 7b7b845c490a..e6d4466268dd 100644 --- a/Documentation/filesystems/porting +++ b/Documentation/filesystems/porting @@ -622,3 +622,8 @@ in your dentry operations instead. alloc_file_clone(file, flags, ops) does not affect any caller's references. On success you get a new struct file sharing the mount/dentry with the original, on failure - ERR_PTR(). +-- +[mandatory] + ->clone_file_range() and ->dedupe_file_range have been replaced with + ->remap_file_range(). See Documentation/filesystems/vfs.txt for more + information. diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt index a6c6a8af48a2..6f5babfee27b 100644 --- a/Documentation/filesystems/vfs.txt +++ b/Documentation/filesystems/vfs.txt @@ -883,8 +883,9 @@ struct file_operations { unsigned (*mmap_capabilities)(struct file *); #endif ssize_t (*copy_file_range)(struct file *, loff_t, struct file *, loff_t, size_t, unsigned int); - int (*clone_file_range)(struct file *, loff_t, struct file *, loff_t, u64); - int (*dedupe_file_range)(struct file *, loff_t, struct file *, loff_t, u64); + int (*remap_file_range)(struct file *file_in, loff_t pos_in, + struct file *file_out, loff_t pos_out, + u64 len, unsigned int remap_flags); int (*fadvise)(struct file *, loff_t, loff_t, int); }; @@ -960,11 +961,16 @@ otherwise noted. copy_file_range: called by the copy_file_range(2) system call. - clone_file_range: called by the ioctl(2) system call for FICLONERANGE and - FICLONE commands. - - dedupe_file_range: called by the ioctl(2) system call for FIDEDUPERANGE - command. + remap_file_range: called by the ioctl(2) system call for FICLONERANGE and + FICLONE and FIDEDUPERANGE commands to remap file ranges. An + implementation should remap len bytes at pos_in of the source file into + the dest file at pos_out. Implementations must handle callers passing + in len == 0; this means "remap to the end of the source file". The + return value should be zero if all bytes were remapped, or the usual + negative error code if the remapping did not succeed completely. + The remap_flags parameter accepts REMAP_FILE_* flags. If + REMAP_FILE_DEDUP is set then the implementation must only remap if the + requested file ranges have identical contents. fadvise: possibly called by the fadvise64() system call. diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 2cddfe7806a4..124a05662fc2 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -3218,9 +3218,6 @@ void btrfs_get_block_group_info(struct list_head *groups_list, struct btrfs_ioctl_space_info *space); void btrfs_update_ioctl_balance_args(struct btrfs_fs_info *fs_info, struct btrfs_ioctl_balance_args *bargs); -int btrfs_dedupe_file_range(struct file *src_file, loff_t src_loff, - struct file *dst_file, loff_t dst_loff, - u64 olen); /* file.c */ int __init btrfs_auto_defrag_init(void); @@ -3250,8 +3247,9 @@ int btrfs_dirty_pages(struct inode *inode, struct page **pages, size_t num_pages, loff_t pos, size_t write_bytes, struct extent_state **cached); int btrfs_fdatawrite_range(struct inode *inode, loff_t start, loff_t end); -int btrfs_clone_file_range(struct file *file_in, loff_t pos_in, - struct file *file_out, loff_t pos_out, u64 len); +int btrfs_remap_file_range(struct file *file_in, loff_t pos_in, + struct file *file_out, loff_t pos_out, u64 len, + unsigned int remap_flags); /* tree-defrag.c */ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans, diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 2be00e873e92..9a963f061393 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -3269,8 +3269,7 @@ const struct file_operations btrfs_file_operations = { #ifdef CONFIG_COMPAT .compat_ioctl = btrfs_compat_ioctl, #endif - .clone_file_range = btrfs_clone_file_range, - .dedupe_file_range = btrfs_dedupe_file_range, + .remap_file_range = btrfs_remap_file_range, }; void __cold btrfs_auto_defrag_exit(void) diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index d60b6caf09e8..bfd99c66723e 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -3627,26 +3627,6 @@ out_unlock: return ret; } -int btrfs_dedupe_file_range(struct file *src_file, loff_t src_loff, - struct file *dst_file, loff_t dst_loff, - u64 olen) -{ - struct inode *src = file_inode(src_file); - struct inode *dst = file_inode(dst_file); - u64 bs = BTRFS_I(src)->root->fs_info->sb->s_blocksize; - - if (WARN_ON_ONCE(bs < PAGE_SIZE)) { - /* - * Btrfs does not support blocksize < page_size. As a - * result, btrfs_cmp_data() won't correctly handle - * this situation without an update. - */ - return -EINVAL; - } - - return btrfs_extent_same(src, src_loff, olen, dst, dst_loff); -} - static int clone_finish_inode_update(struct btrfs_trans_handle *trans, struct inode *inode, u64 endoff, @@ -4348,9 +4328,30 @@ out_unlock: return ret; } -int btrfs_clone_file_range(struct file *src_file, loff_t off, - struct file *dst_file, loff_t destoff, u64 len) +int btrfs_remap_file_range(struct file *src_file, loff_t off, + struct file *dst_file, loff_t destoff, u64 len, + unsigned int remap_flags) { + if (remap_flags & ~(REMAP_FILE_DEDUP | REMAP_FILE_ADVISORY)) + return -EINVAL; + + if (remap_flags & REMAP_FILE_DEDUP) { + struct inode *src = file_inode(src_file); + struct inode *dst = file_inode(dst_file); + u64 bs = BTRFS_I(src)->root->fs_info->sb->s_blocksize; + + if (WARN_ON_ONCE(bs < PAGE_SIZE)) { + /* + * Btrfs does not support blocksize < page_size. As a + * result, btrfs_cmp_data() won't correctly handle + * this situation without an update. + */ + return -EINVAL; + } + + return btrfs_extent_same(src, off, len, dst, destoff); + } + return btrfs_clone_files(dst_file, src_file, off, len, destoff); } diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index 7065426b3280..e8144d0dcde2 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -975,8 +975,9 @@ const struct inode_operations cifs_symlink_inode_ops = { .listxattr = cifs_listxattr, }; -static int cifs_clone_file_range(struct file *src_file, loff_t off, - struct file *dst_file, loff_t destoff, u64 len) +static int cifs_remap_file_range(struct file *src_file, loff_t off, + struct file *dst_file, loff_t destoff, u64 len, + unsigned int remap_flags) { struct inode *src_inode = file_inode(src_file); struct inode *target_inode = file_inode(dst_file); @@ -986,6 +987,9 @@ static int cifs_clone_file_range(struct file *src_file, loff_t off, unsigned int xid; int rc; + if (remap_flags & ~REMAP_FILE_ADVISORY) + return -EINVAL; + cifs_dbg(FYI, "clone range\n"); xid = get_xid(); @@ -1134,7 +1138,7 @@ const struct file_operations cifs_file_ops = { .llseek = cifs_llseek, .unlocked_ioctl = cifs_ioctl, .copy_file_range = cifs_copy_file_range, - .clone_file_range = cifs_clone_file_range, + .remap_file_range = cifs_remap_file_range, .setlease = cifs_setlease, .fallocate = cifs_fallocate, }; @@ -1153,7 +1157,7 @@ const struct file_operations cifs_file_strict_ops = { .llseek = cifs_llseek, .unlocked_ioctl = cifs_ioctl, .copy_file_range = cifs_copy_file_range, - .clone_file_range = cifs_clone_file_range, + .remap_file_range = cifs_remap_file_range, .setlease = cifs_setlease, .fallocate = cifs_fallocate, }; @@ -1172,7 +1176,7 @@ const struct file_operations cifs_file_direct_ops = { .splice_write = iter_file_splice_write, .unlocked_ioctl = cifs_ioctl, .copy_file_range = cifs_copy_file_range, - .clone_file_range = cifs_clone_file_range, + .remap_file_range = cifs_remap_file_range, .llseek = cifs_llseek, .setlease = cifs_setlease, .fallocate = cifs_fallocate, @@ -1191,7 +1195,7 @@ const struct file_operations cifs_file_nobrl_ops = { .llseek = cifs_llseek, .unlocked_ioctl = cifs_ioctl, .copy_file_range = cifs_copy_file_range, - .clone_file_range = cifs_clone_file_range, + .remap_file_range = cifs_remap_file_range, .setlease = cifs_setlease, .fallocate = cifs_fallocate, }; @@ -1209,7 +1213,7 @@ const struct file_operations cifs_file_strict_nobrl_ops = { .llseek = cifs_llseek, .unlocked_ioctl = cifs_ioctl, .copy_file_range = cifs_copy_file_range, - .clone_file_range = cifs_clone_file_range, + .remap_file_range = cifs_remap_file_range, .setlease = cifs_setlease, .fallocate = cifs_fallocate, }; @@ -1227,7 +1231,7 @@ const struct file_operations cifs_file_direct_nobrl_ops = { .splice_write = iter_file_splice_write, .unlocked_ioctl = cifs_ioctl, .copy_file_range = cifs_copy_file_range, - .clone_file_range = cifs_clone_file_range, + .remap_file_range = cifs_remap_file_range, .llseek = cifs_llseek, .setlease = cifs_setlease, .fallocate = cifs_fallocate, @@ -1239,7 +1243,7 @@ const struct file_operations cifs_dir_ops = { .read = generic_read_dir, .unlocked_ioctl = cifs_ioctl, .copy_file_range = cifs_copy_file_range, - .clone_file_range = cifs_clone_file_range, + .remap_file_range = cifs_remap_file_range, .llseek = generic_file_llseek, .fsync = cifs_dir_fsync, }; diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c index 4288a6ecaf75..ae5780ce41dc 100644 --- a/fs/nfs/nfs4file.c +++ b/fs/nfs/nfs4file.c @@ -180,8 +180,9 @@ static long nfs42_fallocate(struct file *filep, int mode, loff_t offset, loff_t return nfs42_proc_allocate(filep, offset, len); } -static int nfs42_clone_file_range(struct file *src_file, loff_t src_off, - struct file *dst_file, loff_t dst_off, u64 count) +static int nfs42_remap_file_range(struct file *src_file, loff_t src_off, + struct file *dst_file, loff_t dst_off, u64 count, + unsigned int remap_flags) { struct inode *dst_inode = file_inode(dst_file); struct nfs_server *server = NFS_SERVER(dst_inode); @@ -190,6 +191,9 @@ static int nfs42_clone_file_range(struct file *src_file, loff_t src_off, bool same_inode = false; int ret; + if (remap_flags & ~REMAP_FILE_ADVISORY) + return -EINVAL; + /* check alignment w.r.t. clone_blksize */ ret = -EINVAL; if (bs) { @@ -262,7 +266,7 @@ const struct file_operations nfs4_file_operations = { .copy_file_range = nfs4_copy_file_range, .llseek = nfs4_file_llseek, .fallocate = nfs42_fallocate, - .clone_file_range = nfs42_clone_file_range, + .remap_file_range = nfs42_remap_file_range, #else .llseek = nfs_file_llseek, #endif diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index 9fa35cb6f6e0..0b757a24567c 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c @@ -2527,24 +2527,18 @@ out: return offset; } -static int ocfs2_file_clone_range(struct file *file_in, +static int ocfs2_remap_file_range(struct file *file_in, loff_t pos_in, struct file *file_out, loff_t pos_out, - u64 len) + u64 len, + unsigned int remap_flags) { - return ocfs2_reflink_remap_range(file_in, pos_in, file_out, pos_out, - len, false); -} + if (remap_flags & ~(REMAP_FILE_DEDUP | REMAP_FILE_ADVISORY)) + return -EINVAL; -static int ocfs2_file_dedupe_range(struct file *file_in, - loff_t pos_in, - struct file *file_out, - loff_t pos_out, - u64 len) -{ return ocfs2_reflink_remap_range(file_in, pos_in, file_out, pos_out, - len, true); + len, remap_flags & REMAP_FILE_DEDUP); } const struct inode_operations ocfs2_file_iops = { @@ -2586,8 +2580,7 @@ const struct file_operations ocfs2_fops = { .splice_read = generic_file_splice_read, .splice_write = iter_file_splice_write, .fallocate = ocfs2_fallocate, - .clone_file_range = ocfs2_file_clone_range, - .dedupe_file_range = ocfs2_file_dedupe_range, + .remap_file_range = ocfs2_remap_file_range, }; const struct file_operations ocfs2_dops = { @@ -2633,8 +2626,7 @@ const struct file_operations ocfs2_fops_no_plocks = { .splice_read = generic_file_splice_read, .splice_write = iter_file_splice_write, .fallocate = ocfs2_fallocate, - .clone_file_range = ocfs2_file_clone_range, - .dedupe_file_range = ocfs2_file_dedupe_range, + .remap_file_range = ocfs2_remap_file_range, }; const struct file_operations ocfs2_dops_no_plocks = { diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c index 986313da0c88..fffb36fd5920 100644 --- a/fs/overlayfs/file.c +++ b/fs/overlayfs/file.c @@ -489,26 +489,31 @@ static ssize_t ovl_copy_file_range(struct file *file_in, loff_t pos_in, OVL_COPY); } -static int ovl_clone_file_range(struct file *file_in, loff_t pos_in, - struct file *file_out, loff_t pos_out, u64 len) +static int ovl_remap_file_range(struct file *file_in, loff_t pos_in, + struct file *file_out, loff_t pos_out, + u64 len, unsigned int remap_flags) { - return ovl_copyfile(file_in, pos_in, file_out, pos_out, len, 0, - OVL_CLONE); -} + enum ovl_copyop op; + + if (remap_flags & ~(REMAP_FILE_DEDUP | REMAP_FILE_ADVISORY)) + return -EINVAL; + + if (remap_flags & REMAP_FILE_DEDUP) + op = OVL_DEDUPE; + else + op = OVL_CLONE; -static int ovl_dedupe_file_range(struct file *file_in, loff_t pos_in, - struct file *file_out, loff_t pos_out, u64 len) -{ /* * Don't copy up because of a dedupe request, this wouldn't make sense * most of the time (data would be duplicated instead of deduplicated). */ - if (!ovl_inode_upper(file_inode(file_in)) || - !ovl_inode_upper(file_inode(file_out))) + if (op == OVL_DEDUPE && + (!ovl_inode_upper(file_inode(file_in)) || + !ovl_inode_upper(file_inode(file_out)))) return -EPERM; return ovl_copyfile(file_in, pos_in, file_out, pos_out, len, 0, - OVL_DEDUPE); + op); } const struct file_operations ovl_file_operations = { @@ -525,6 +530,5 @@ const struct file_operations ovl_file_operations = { .compat_ioctl = ovl_compat_ioctl, .copy_file_range = ovl_copy_file_range, - .clone_file_range = ovl_clone_file_range, - .dedupe_file_range = ovl_dedupe_file_range, + .remap_file_range = ovl_remap_file_range, }; diff --git a/fs/read_write.c b/fs/read_write.c index 734c5661fb69..766bdcb381f3 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -1588,9 +1588,9 @@ ssize_t vfs_copy_file_range(struct file *file_in, loff_t pos_in, * Try cloning first, this is supported by more file systems, and * more efficient if both clone and copy are supported (e.g. NFS). */ - if (file_in->f_op->clone_file_range) { - ret = file_in->f_op->clone_file_range(file_in, pos_in, - file_out, pos_out, len); + if (file_in->f_op->remap_file_range) { + ret = file_in->f_op->remap_file_range(file_in, pos_in, + file_out, pos_out, len, 0); if (ret == 0) { ret = len; goto done; @@ -1849,7 +1849,7 @@ int do_clone_file_range(struct file *file_in, loff_t pos_in, (file_out->f_flags & O_APPEND)) return -EBADF; - if (!file_in->f_op->clone_file_range) + if (!file_in->f_op->remap_file_range) return -EOPNOTSUPP; ret = remap_verify_area(file_in, pos_in, len, false); @@ -1860,8 +1860,8 @@ int do_clone_file_range(struct file *file_in, loff_t pos_in, if (ret) return ret; - ret = file_in->f_op->clone_file_range(file_in, pos_in, - file_out, pos_out, len); + ret = file_in->f_op->remap_file_range(file_in, pos_in, + file_out, pos_out, len, 0); if (!ret) { fsnotify_access(file_in); fsnotify_modify(file_out); @@ -2006,7 +2006,7 @@ int vfs_dedupe_file_range_one(struct file *src_file, loff_t src_pos, goto out_drop_write; ret = -EINVAL; - if (!dst_file->f_op->dedupe_file_range) + if (!dst_file->f_op->remap_file_range) goto out_drop_write; if (len == 0) { @@ -2014,8 +2014,8 @@ int vfs_dedupe_file_range_one(struct file *src_file, loff_t src_pos, goto out_drop_write; } - ret = dst_file->f_op->dedupe_file_range(src_file, src_pos, - dst_file, dst_pos, len); + ret = dst_file->f_op->remap_file_range(src_file, src_pos, dst_file, + dst_pos, len, REMAP_FILE_DEDUP); out_drop_write: mnt_drop_write_file(dst_file); diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index 61a5ad2600e8..2ad94d508f80 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -920,27 +920,19 @@ out_unlock: } STATIC int -xfs_file_clone_range( +xfs_file_remap_range( struct file *file_in, loff_t pos_in, struct file *file_out, loff_t pos_out, - u64 len) + u64 len, + unsigned int remap_flags) { - return xfs_reflink_remap_range(file_in, pos_in, file_out, pos_out, - len, false); -} + if (remap_flags & ~(REMAP_FILE_DEDUP | REMAP_FILE_ADVISORY)) + return -EINVAL; -STATIC int -xfs_file_dedupe_range( - struct file *file_in, - loff_t pos_in, - struct file *file_out, - loff_t pos_out, - u64 len) -{ return xfs_reflink_remap_range(file_in, pos_in, file_out, pos_out, - len, true); + len, remap_flags & REMAP_FILE_DEDUP); } STATIC int @@ -1175,8 +1167,7 @@ const struct file_operations xfs_file_operations = { .fsync = xfs_file_fsync, .get_unmapped_area = thp_get_unmapped_area, .fallocate = xfs_file_fallocate, - .clone_file_range = xfs_file_clone_range, - .dedupe_file_range = xfs_file_dedupe_range, + .remap_file_range = xfs_file_remap_range, }; const struct file_operations xfs_dir_file_operations = { diff --git a/include/linux/fs.h b/include/linux/fs.h index 55729e1c2e75..888cef35c7d7 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1721,6 +1721,24 @@ struct block_device_operations; #define NOMMU_VMFLAGS \ (NOMMU_MAP_READ | NOMMU_MAP_WRITE | NOMMU_MAP_EXEC) +/* + * These flags control the behavior of the remap_file_range function pointer. + * If it is called with len == 0 that means "remap to end of source file". + * See Documentation/filesystems/vfs.txt for more details about this call. + * + * REMAP_FILE_DEDUP: only remap if contents identical (i.e. deduplicate) + */ +#define REMAP_FILE_DEDUP (1 << 0) + +/* + * These flags signal that the caller is ok with altering various aspects of + * the behavior of the remap operation. The changes must be made by the + * implementation; the vfs remap helper functions can take advantage of them. + * Flags in this category exist to preserve the quirky behavior of the hoisted + * btrfs clone/dedupe ioctls. + * There are no flags yet, but subsequent commits will add some. + */ +#define REMAP_FILE_ADVISORY (0) struct iov_iter; @@ -1759,10 +1777,9 @@ struct file_operations { #endif ssize_t (*copy_file_range)(struct file *, loff_t, struct file *, loff_t, size_t, unsigned int); - int (*clone_file_range)(struct file *, loff_t, struct file *, loff_t, - u64); - int (*dedupe_file_range)(struct file *, loff_t, struct file *, loff_t, - u64); + int (*remap_file_range)(struct file *file_in, loff_t pos_in, + struct file *file_out, loff_t pos_out, + u64 len, unsigned int remap_flags); int (*fadvise)(struct file *, loff_t, loff_t, int); } __randomize_layout; -- cgit v1.2.3-58-ga151 From a91ae49bbaf43910edb09e03fedf26b23875bd52 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Tue, 30 Oct 2018 10:41:28 +1100 Subject: vfs: pass remap flags to generic_remap_file_range_prep Plumb the remap flags through the filesystem from the vfs function dispatcher all the way to the prep function to prepare for behavior changes in subsequent patches. Signed-off-by: Darrick J. Wong Reviewed-by: Amir Goldstein Reviewed-by: Christoph Hellwig Signed-off-by: Dave Chinner --- fs/ocfs2/file.c | 2 +- fs/ocfs2/refcounttree.c | 4 ++-- fs/ocfs2/refcounttree.h | 2 +- fs/read_write.c | 14 +++++++------- fs/xfs/xfs_file.c | 2 +- fs/xfs/xfs_reflink.c | 21 +++++++++++---------- fs/xfs/xfs_reflink.h | 3 ++- include/linux/fs.h | 2 +- 8 files changed, 26 insertions(+), 24 deletions(-) (limited to 'fs/xfs') diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index 0b757a24567c..9809b0e5746f 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c @@ -2538,7 +2538,7 @@ static int ocfs2_remap_file_range(struct file *file_in, return -EINVAL; return ocfs2_reflink_remap_range(file_in, pos_in, file_out, pos_out, - len, remap_flags & REMAP_FILE_DEDUP); + len, remap_flags); } const struct inode_operations ocfs2_file_iops = { diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c index 36c56dfbe485..df9781567ec0 100644 --- a/fs/ocfs2/refcounttree.c +++ b/fs/ocfs2/refcounttree.c @@ -4825,7 +4825,7 @@ int ocfs2_reflink_remap_range(struct file *file_in, struct file *file_out, loff_t pos_out, u64 len, - bool is_dedupe) + unsigned int remap_flags) { struct inode *inode_in = file_inode(file_in); struct inode *inode_out = file_inode(file_out); @@ -4851,7 +4851,7 @@ int ocfs2_reflink_remap_range(struct file *file_in, goto out_unlock; ret = generic_remap_file_range_prep(file_in, pos_in, file_out, pos_out, - &len, is_dedupe); + &len, remap_flags); if (ret <= 0) goto out_unlock; diff --git a/fs/ocfs2/refcounttree.h b/fs/ocfs2/refcounttree.h index 4af55bf4b35b..d2c5f526edff 100644 --- a/fs/ocfs2/refcounttree.h +++ b/fs/ocfs2/refcounttree.h @@ -120,6 +120,6 @@ int ocfs2_reflink_remap_range(struct file *file_in, struct file *file_out, loff_t pos_out, u64 len, - bool is_dedupe); + unsigned int remap_flags); #endif /* OCFS2_REFCOUNTTREE_H */ diff --git a/fs/read_write.c b/fs/read_write.c index 766bdcb381f3..201381689284 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -1722,14 +1722,14 @@ static int generic_remap_check_len(struct inode *inode_in, struct inode *inode_out, loff_t pos_out, u64 *len, - bool is_dedupe) + unsigned int remap_flags) { u64 blkmask = i_blocksize(inode_in) - 1; if ((*len & blkmask) == 0) return 0; - if (is_dedupe) + if (remap_flags & REMAP_FILE_DEDUP) *len &= ~blkmask; else if (pos_out + *len < i_size_read(inode_out)) return -EINVAL; @@ -1747,7 +1747,7 @@ static int generic_remap_check_len(struct inode *inode_in, */ int generic_remap_file_range_prep(struct file *file_in, loff_t pos_in, struct file *file_out, loff_t pos_out, - u64 *len, bool is_dedupe) + u64 *len, unsigned int remap_flags) { struct inode *inode_in = file_inode(file_in); struct inode *inode_out = file_inode(file_out); @@ -1771,7 +1771,7 @@ int generic_remap_file_range_prep(struct file *file_in, loff_t pos_in, if (*len == 0) { loff_t isize = i_size_read(inode_in); - if (is_dedupe || pos_in == isize) + if ((remap_flags & REMAP_FILE_DEDUP) || pos_in == isize) return 0; if (pos_in > isize) return -EINVAL; @@ -1782,7 +1782,7 @@ int generic_remap_file_range_prep(struct file *file_in, loff_t pos_in, /* Check that we don't violate system file offset limits. */ ret = generic_remap_checks(file_in, pos_in, file_out, pos_out, len, - is_dedupe); + (remap_flags & REMAP_FILE_DEDUP)); if (ret) return ret; @@ -1804,7 +1804,7 @@ int generic_remap_file_range_prep(struct file *file_in, loff_t pos_in, /* * Check that the extents are the same. */ - if (is_dedupe) { + if (remap_flags & REMAP_FILE_DEDUP) { bool is_same = false; ret = vfs_dedupe_file_range_compare(inode_in, pos_in, @@ -1816,7 +1816,7 @@ int generic_remap_file_range_prep(struct file *file_in, loff_t pos_in, } ret = generic_remap_check_len(inode_in, inode_out, pos_out, len, - is_dedupe); + remap_flags); if (ret) return ret; diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index 2ad94d508f80..20314eb4677a 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -932,7 +932,7 @@ xfs_file_remap_range( return -EINVAL; return xfs_reflink_remap_range(file_in, pos_in, file_out, pos_out, - len, remap_flags & REMAP_FILE_DEDUP); + len, remap_flags); } STATIC int diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c index a7757a128a78..29aab196ce7e 100644 --- a/fs/xfs/xfs_reflink.c +++ b/fs/xfs/xfs_reflink.c @@ -921,13 +921,14 @@ xfs_reflink_update_dest( struct xfs_inode *dest, xfs_off_t newlen, xfs_extlen_t cowextsize, - bool is_dedupe) + unsigned int remap_flags) { struct xfs_mount *mp = dest->i_mount; struct xfs_trans *tp; int error; - if (is_dedupe && newlen <= i_size_read(VFS_I(dest)) && cowextsize == 0) + if ((remap_flags & REMAP_FILE_DEDUP) && + newlen <= i_size_read(VFS_I(dest)) && cowextsize == 0) return 0; error = xfs_trans_alloc(mp, &M_RES(mp)->tr_ichange, 0, 0, 0, &tp); @@ -948,7 +949,7 @@ xfs_reflink_update_dest( dest->i_d.di_flags2 |= XFS_DIFLAG2_COWEXTSIZE; } - if (!is_dedupe) { + if (!(remap_flags & REMAP_FILE_DEDUP)) { xfs_trans_ichgtime(tp, dest, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); } @@ -1296,7 +1297,7 @@ xfs_reflink_remap_prep( struct file *file_out, loff_t pos_out, u64 *len, - bool is_dedupe) + unsigned int remap_flags) { struct inode *inode_in = file_inode(file_in); struct xfs_inode *src = XFS_I(inode_in); @@ -1327,7 +1328,7 @@ xfs_reflink_remap_prep( goto out_unlock; ret = generic_remap_file_range_prep(file_in, pos_in, file_out, pos_out, - len, is_dedupe); + len, remap_flags); if (ret <= 0) goto out_unlock; @@ -1336,7 +1337,7 @@ xfs_reflink_remap_prep( * from the source file so we don't try to dedupe the partial * EOF block. */ - if (is_dedupe) { + if (remap_flags & REMAP_FILE_DEDUP) { *len &= ~blkmask; } else if (*len & blkmask) { /* @@ -1372,7 +1373,7 @@ xfs_reflink_remap_prep( PAGE_ALIGN(pos_out + *len) - 1); /* If we're altering the file contents... */ - if (!is_dedupe) { + if (!(remap_flags & REMAP_FILE_DEDUP)) { /* * ...update the timestamps (which will grab the ilock again * from xfs_fs_dirty_inode, so we have to call it before we @@ -1410,7 +1411,7 @@ xfs_reflink_remap_range( struct file *file_out, loff_t pos_out, u64 len, - bool is_dedupe) + unsigned int remap_flags) { struct inode *inode_in = file_inode(file_in); struct xfs_inode *src = XFS_I(inode_in); @@ -1430,7 +1431,7 @@ xfs_reflink_remap_range( /* Prepare and then clone file data. */ ret = xfs_reflink_remap_prep(file_in, pos_in, file_out, pos_out, - &len, is_dedupe); + &len, remap_flags); if (ret <= 0) return ret; @@ -1457,7 +1458,7 @@ xfs_reflink_remap_range( cowextsize = src->i_d.di_cowextsize; ret = xfs_reflink_update_dest(dest, pos_out + len, cowextsize, - is_dedupe); + remap_flags); out_unlock: xfs_reflink_remap_unlock(file_in, file_out); diff --git a/fs/xfs/xfs_reflink.h b/fs/xfs/xfs_reflink.h index c585ad9552b2..6f82d628bf17 100644 --- a/fs/xfs/xfs_reflink.h +++ b/fs/xfs/xfs_reflink.h @@ -28,7 +28,8 @@ extern int xfs_reflink_end_cow(struct xfs_inode *ip, xfs_off_t offset, xfs_off_t count); extern int xfs_reflink_recover_cow(struct xfs_mount *mp); extern int xfs_reflink_remap_range(struct file *file_in, loff_t pos_in, - struct file *file_out, loff_t pos_out, u64 len, bool is_dedupe); + struct file *file_out, loff_t pos_out, u64 len, + unsigned int remap_flags); extern int xfs_reflink_inode_has_shared_extents(struct xfs_trans *tp, struct xfs_inode *ip, bool *has_shared); extern int xfs_reflink_clear_inode_flag(struct xfs_inode *ip, diff --git a/include/linux/fs.h b/include/linux/fs.h index 888cef35c7d7..631c28ce1436 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1844,7 +1844,7 @@ extern ssize_t vfs_copy_file_range(struct file *, loff_t , struct file *, loff_t, size_t, unsigned int); extern int generic_remap_file_range_prep(struct file *file_in, loff_t pos_in, struct file *file_out, loff_t pos_out, - u64 *count, bool is_dedupe); + u64 *count, unsigned int remap_flags); extern int do_clone_file_range(struct file *file_in, loff_t pos_in, struct file *file_out, loff_t pos_out, u64 len); extern int vfs_clone_file_range(struct file *file_in, loff_t pos_in, -- cgit v1.2.3-58-ga151 From 8dde90bca6fca3736ea20109654bcf6dcf2ecf1d Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Tue, 30 Oct 2018 10:41:41 +1100 Subject: vfs: remap helper should update destination inode metadata Extend generic_remap_file_range_prep to handle inode metadata updates when remapping into a file. If the operation can possibly alter the file contents, we must update the ctime and mtime and remove security privileges, just like we do for regular file writes. Signed-off-by: Darrick J. Wong Reviewed-by: Amir Goldstein Signed-off-by: Dave Chinner --- fs/read_write.c | 19 +++++++++++++++++++ fs/xfs/xfs_reflink.c | 23 ----------------------- 2 files changed, 19 insertions(+), 23 deletions(-) (limited to 'fs/xfs') diff --git a/fs/read_write.c b/fs/read_write.c index ebcbfc4f2907..b61bd3fc7154 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -1820,6 +1820,25 @@ int generic_remap_file_range_prep(struct file *file_in, loff_t pos_in, if (ret) return ret; + /* If can't alter the file contents, we're done. */ + if (!(remap_flags & REMAP_FILE_DEDUP)) { + /* Update the timestamps, since we can alter file contents. */ + if (!(file_out->f_mode & FMODE_NOCMTIME)) { + ret = file_update_time(file_out); + if (ret) + return ret; + } + + /* + * Clear the security bits if the process is not being run by + * root. This keeps people from modifying setuid and setgid + * binaries. + */ + ret = file_remove_privs(file_out); + if (ret) + return ret; + } + return 1; } EXPORT_SYMBOL(generic_remap_file_range_prep); diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c index 29aab196ce7e..2d7dd8b28d7c 100644 --- a/fs/xfs/xfs_reflink.c +++ b/fs/xfs/xfs_reflink.c @@ -1372,29 +1372,6 @@ xfs_reflink_remap_prep( truncate_inode_pages_range(&inode_out->i_data, pos_out, PAGE_ALIGN(pos_out + *len) - 1); - /* If we're altering the file contents... */ - if (!(remap_flags & REMAP_FILE_DEDUP)) { - /* - * ...update the timestamps (which will grab the ilock again - * from xfs_fs_dirty_inode, so we have to call it before we - * take the ilock). - */ - if (!(file_out->f_mode & FMODE_NOCMTIME)) { - ret = file_update_time(file_out); - if (ret) - goto out_unlock; - } - - /* - * ...clear the security bits if the process is not being run - * by root. This keeps people from modifying setuid and setgid - * binaries. - */ - ret = file_remove_privs(file_out); - if (ret) - goto out_unlock; - } - return 1; out_unlock: xfs_reflink_remap_unlock(file_in, file_out); -- cgit v1.2.3-58-ga151 From 42ec3d4c02187a18e27ff94b409ec27234bf2ffd Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Tue, 30 Oct 2018 10:41:49 +1100 Subject: vfs: make remap_file_range functions take and return bytes completed Change the remap_file_range functions to take a number of bytes to operate upon and return the number of bytes they operated on. This is a requirement for allowing fs implementations to return short clone/dedupe results to the user, which will enable us to obey resource limits in a graceful manner. A subsequent patch will enable copy_file_range to signal to the ->clone_file_range implementation that it can handle a short length, which will be returned in the function's return value. For now the short return is not implemented anywhere so the behavior won't change -- either copy_file_range manages to clone the entire range or it tries an alternative. Neither clone ioctl can take advantage of this, alas. Signed-off-by: Darrick J. Wong Reviewed-by: Amir Goldstein Signed-off-by: Dave Chinner --- Documentation/filesystems/vfs.txt | 10 ++++---- fs/btrfs/ctree.h | 6 ++--- fs/btrfs/ioctl.c | 13 +++++++---- fs/cifs/cifsfs.c | 6 ++--- fs/ioctl.c | 10 +++++++- fs/nfs/nfs4file.c | 6 ++--- fs/nfsd/vfs.c | 8 +++++-- fs/ocfs2/file.c | 16 ++++++------- fs/ocfs2/refcounttree.c | 2 +- fs/ocfs2/refcounttree.h | 2 +- fs/overlayfs/copy_up.c | 6 ++--- fs/overlayfs/file.c | 12 +++++----- fs/read_write.c | 49 +++++++++++++++++++++------------------ fs/xfs/xfs_file.c | 9 ++++--- fs/xfs/xfs_reflink.c | 4 ++-- fs/xfs/xfs_reflink.h | 2 +- include/linux/fs.h | 27 +++++++++++---------- mm/filemap.c | 2 +- 18 files changed, 108 insertions(+), 82 deletions(-) (limited to 'fs/xfs') diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt index 6f5babfee27b..1bd2919deaca 100644 --- a/Documentation/filesystems/vfs.txt +++ b/Documentation/filesystems/vfs.txt @@ -883,9 +883,9 @@ struct file_operations { unsigned (*mmap_capabilities)(struct file *); #endif ssize_t (*copy_file_range)(struct file *, loff_t, struct file *, loff_t, size_t, unsigned int); - int (*remap_file_range)(struct file *file_in, loff_t pos_in, - struct file *file_out, loff_t pos_out, - u64 len, unsigned int remap_flags); + loff_t (*remap_file_range)(struct file *file_in, loff_t pos_in, + struct file *file_out, loff_t pos_out, + loff_t len, unsigned int remap_flags); int (*fadvise)(struct file *, loff_t, loff_t, int); }; @@ -966,8 +966,8 @@ otherwise noted. implementation should remap len bytes at pos_in of the source file into the dest file at pos_out. Implementations must handle callers passing in len == 0; this means "remap to the end of the source file". The - return value should be zero if all bytes were remapped, or the usual - negative error code if the remapping did not succeed completely. + return value should the number of bytes remapped, or the usual + negative error code if errors occurred before any bytes were remapped. The remap_flags parameter accepts REMAP_FILE_* flags. If REMAP_FILE_DEDUP is set then the implementation must only remap if the requested file ranges have identical contents. diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 124a05662fc2..771a961d77ad 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -3247,9 +3247,9 @@ int btrfs_dirty_pages(struct inode *inode, struct page **pages, size_t num_pages, loff_t pos, size_t write_bytes, struct extent_state **cached); int btrfs_fdatawrite_range(struct inode *inode, loff_t start, loff_t end); -int btrfs_remap_file_range(struct file *file_in, loff_t pos_in, - struct file *file_out, loff_t pos_out, u64 len, - unsigned int remap_flags); +loff_t btrfs_remap_file_range(struct file *file_in, loff_t pos_in, + struct file *file_out, loff_t pos_out, + loff_t len, unsigned int remap_flags); /* tree-defrag.c */ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans, diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index bfd99c66723e..b0c513e10977 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -4328,10 +4328,12 @@ out_unlock: return ret; } -int btrfs_remap_file_range(struct file *src_file, loff_t off, - struct file *dst_file, loff_t destoff, u64 len, +loff_t btrfs_remap_file_range(struct file *src_file, loff_t off, + struct file *dst_file, loff_t destoff, loff_t len, unsigned int remap_flags) { + int ret; + if (remap_flags & ~(REMAP_FILE_DEDUP | REMAP_FILE_ADVISORY)) return -EINVAL; @@ -4349,10 +4351,11 @@ int btrfs_remap_file_range(struct file *src_file, loff_t off, return -EINVAL; } - return btrfs_extent_same(src, off, len, dst, destoff); + ret = btrfs_extent_same(src, off, len, dst, destoff); + } else { + ret = btrfs_clone_files(dst_file, src_file, off, len, destoff); } - - return btrfs_clone_files(dst_file, src_file, off, len, destoff); + return ret < 0 ? ret : len; } static long btrfs_ioctl_default_subvol(struct file *file, void __user *argp) diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index e8144d0dcde2..5ca71c6c8be2 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -975,8 +975,8 @@ const struct inode_operations cifs_symlink_inode_ops = { .listxattr = cifs_listxattr, }; -static int cifs_remap_file_range(struct file *src_file, loff_t off, - struct file *dst_file, loff_t destoff, u64 len, +static loff_t cifs_remap_file_range(struct file *src_file, loff_t off, + struct file *dst_file, loff_t destoff, loff_t len, unsigned int remap_flags) { struct inode *src_inode = file_inode(src_file); @@ -1029,7 +1029,7 @@ static int cifs_remap_file_range(struct file *src_file, loff_t off, unlock_two_nondirectories(src_inode, target_inode); out: free_xid(xid); - return rc; + return rc < 0 ? rc : len; } ssize_t cifs_file_copychunk_range(unsigned int xid, diff --git a/fs/ioctl.c b/fs/ioctl.c index 2005529af560..72537b68c272 100644 --- a/fs/ioctl.c +++ b/fs/ioctl.c @@ -223,6 +223,7 @@ static long ioctl_file_clone(struct file *dst_file, unsigned long srcfd, u64 off, u64 olen, u64 destoff) { struct fd src_file = fdget(srcfd); + loff_t cloned; int ret; if (!src_file.file) @@ -230,7 +231,14 @@ static long ioctl_file_clone(struct file *dst_file, unsigned long srcfd, ret = -EXDEV; if (src_file.file->f_path.mnt != dst_file->f_path.mnt) goto fdput; - ret = vfs_clone_file_range(src_file.file, off, dst_file, destoff, olen); + cloned = vfs_clone_file_range(src_file.file, off, dst_file, destoff, + olen); + if (cloned < 0) + ret = cloned; + else if (olen && cloned != olen) + ret = -EINVAL; + else + ret = 0; fdput: fdput(src_file); return ret; diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c index ae5780ce41dc..46d691ba04bc 100644 --- a/fs/nfs/nfs4file.c +++ b/fs/nfs/nfs4file.c @@ -180,8 +180,8 @@ static long nfs42_fallocate(struct file *filep, int mode, loff_t offset, loff_t return nfs42_proc_allocate(filep, offset, len); } -static int nfs42_remap_file_range(struct file *src_file, loff_t src_off, - struct file *dst_file, loff_t dst_off, u64 count, +static loff_t nfs42_remap_file_range(struct file *src_file, loff_t src_off, + struct file *dst_file, loff_t dst_off, loff_t count, unsigned int remap_flags) { struct inode *dst_inode = file_inode(dst_file); @@ -244,7 +244,7 @@ out_unlock: inode_unlock(src_inode); } out: - return ret; + return ret < 0 ? ret : count; } #endif /* CONFIG_NFS_V4_2 */ diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index b53e76391e52..ac6cb6101cbe 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -541,8 +541,12 @@ __be32 nfsd4_set_nfs4_label(struct svc_rqst *rqstp, struct svc_fh *fhp, __be32 nfsd4_clone_file_range(struct file *src, u64 src_pos, struct file *dst, u64 dst_pos, u64 count) { - return nfserrno(vfs_clone_file_range(src, src_pos, dst, dst_pos, - count)); + loff_t cloned; + + cloned = vfs_clone_file_range(src, src_pos, dst, dst_pos, count); + if (count && cloned != count) + cloned = -EINVAL; + return nfserrno(cloned < 0 ? cloned : 0); } ssize_t nfsd_copy_file_range(struct file *src, u64 src_pos, struct file *dst, diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index 9809b0e5746f..fbaeafe44b5f 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c @@ -2527,18 +2527,18 @@ out: return offset; } -static int ocfs2_remap_file_range(struct file *file_in, - loff_t pos_in, - struct file *file_out, - loff_t pos_out, - u64 len, - unsigned int remap_flags) +static loff_t ocfs2_remap_file_range(struct file *file_in, loff_t pos_in, + struct file *file_out, loff_t pos_out, + loff_t len, unsigned int remap_flags) { + int ret; + if (remap_flags & ~(REMAP_FILE_DEDUP | REMAP_FILE_ADVISORY)) return -EINVAL; - return ocfs2_reflink_remap_range(file_in, pos_in, file_out, pos_out, - len, remap_flags); + ret = ocfs2_reflink_remap_range(file_in, pos_in, file_out, pos_out, + len, remap_flags); + return ret < 0 ? ret : len; } const struct inode_operations ocfs2_file_iops = { diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c index df9781567ec0..6a42c04ac0ab 100644 --- a/fs/ocfs2/refcounttree.c +++ b/fs/ocfs2/refcounttree.c @@ -4824,7 +4824,7 @@ int ocfs2_reflink_remap_range(struct file *file_in, loff_t pos_in, struct file *file_out, loff_t pos_out, - u64 len, + loff_t len, unsigned int remap_flags) { struct inode *inode_in = file_inode(file_in); diff --git a/fs/ocfs2/refcounttree.h b/fs/ocfs2/refcounttree.h index d2c5f526edff..eb65c1d0843c 100644 --- a/fs/ocfs2/refcounttree.h +++ b/fs/ocfs2/refcounttree.h @@ -119,7 +119,7 @@ int ocfs2_reflink_remap_range(struct file *file_in, loff_t pos_in, struct file *file_out, loff_t pos_out, - u64 len, + loff_t len, unsigned int remap_flags); #endif /* OCFS2_REFCOUNTTREE_H */ diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c index 1cc797a08a5b..8750b7235516 100644 --- a/fs/overlayfs/copy_up.c +++ b/fs/overlayfs/copy_up.c @@ -125,6 +125,7 @@ static int ovl_copy_up_data(struct path *old, struct path *new, loff_t len) struct file *new_file; loff_t old_pos = 0; loff_t new_pos = 0; + loff_t cloned; int error = 0; if (len == 0) @@ -141,11 +142,10 @@ static int ovl_copy_up_data(struct path *old, struct path *new, loff_t len) } /* Try to use clone_file_range to clone up within the same fs */ - error = do_clone_file_range(old_file, 0, new_file, 0, len); - if (!error) + cloned = do_clone_file_range(old_file, 0, new_file, 0, len); + if (cloned == len) goto out; /* Couldn't clone, so now we try to copy the data */ - error = 0; /* FIXME: copy up sparse files efficiently */ while (len) { diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c index fffb36fd5920..6c3fec6168e9 100644 --- a/fs/overlayfs/file.c +++ b/fs/overlayfs/file.c @@ -434,14 +434,14 @@ enum ovl_copyop { OVL_DEDUPE, }; -static ssize_t ovl_copyfile(struct file *file_in, loff_t pos_in, +static loff_t ovl_copyfile(struct file *file_in, loff_t pos_in, struct file *file_out, loff_t pos_out, - u64 len, unsigned int flags, enum ovl_copyop op) + loff_t len, unsigned int flags, enum ovl_copyop op) { struct inode *inode_out = file_inode(file_out); struct fd real_in, real_out; const struct cred *old_cred; - ssize_t ret; + loff_t ret; ret = ovl_real_fdget(file_out, &real_out); if (ret) @@ -489,9 +489,9 @@ static ssize_t ovl_copy_file_range(struct file *file_in, loff_t pos_in, OVL_COPY); } -static int ovl_remap_file_range(struct file *file_in, loff_t pos_in, - struct file *file_out, loff_t pos_out, - u64 len, unsigned int remap_flags) +static loff_t ovl_remap_file_range(struct file *file_in, loff_t pos_in, + struct file *file_out, loff_t pos_out, + loff_t len, unsigned int remap_flags) { enum ovl_copyop op; diff --git a/fs/read_write.c b/fs/read_write.c index b61bd3fc7154..356641afa487 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -1589,10 +1589,13 @@ ssize_t vfs_copy_file_range(struct file *file_in, loff_t pos_in, * more efficient if both clone and copy are supported (e.g. NFS). */ if (file_in->f_op->remap_file_range) { - ret = file_in->f_op->remap_file_range(file_in, pos_in, - file_out, pos_out, len, 0); - if (ret == 0) { - ret = len; + loff_t cloned; + + cloned = file_in->f_op->remap_file_range(file_in, pos_in, + file_out, pos_out, + min_t(loff_t, MAX_RW_COUNT, len), 0); + if (cloned > 0) { + ret = cloned; goto done; } } @@ -1686,11 +1689,12 @@ out2: return ret; } -static int remap_verify_area(struct file *file, loff_t pos, u64 len, bool write) +static int remap_verify_area(struct file *file, loff_t pos, loff_t len, + bool write) { struct inode *inode = file_inode(file); - if (unlikely(pos < 0)) + if (unlikely(pos < 0 || len < 0)) return -EINVAL; if (unlikely((loff_t) (pos + len) < 0)) @@ -1721,7 +1725,7 @@ static int remap_verify_area(struct file *file, loff_t pos, u64 len, bool write) static int generic_remap_check_len(struct inode *inode_in, struct inode *inode_out, loff_t pos_out, - u64 *len, + loff_t *len, unsigned int remap_flags) { u64 blkmask = i_blocksize(inode_in) - 1; @@ -1747,7 +1751,7 @@ static int generic_remap_check_len(struct inode *inode_in, */ int generic_remap_file_range_prep(struct file *file_in, loff_t pos_in, struct file *file_out, loff_t pos_out, - u64 *len, unsigned int remap_flags) + loff_t *len, unsigned int remap_flags) { struct inode *inode_in = file_inode(file_in); struct inode *inode_out = file_inode(file_out); @@ -1843,12 +1847,12 @@ int generic_remap_file_range_prep(struct file *file_in, loff_t pos_in, } EXPORT_SYMBOL(generic_remap_file_range_prep); -int do_clone_file_range(struct file *file_in, loff_t pos_in, - struct file *file_out, loff_t pos_out, u64 len) +loff_t do_clone_file_range(struct file *file_in, loff_t pos_in, + struct file *file_out, loff_t pos_out, loff_t len) { struct inode *inode_in = file_inode(file_in); struct inode *inode_out = file_inode(file_out); - int ret; + loff_t ret; if (S_ISDIR(inode_in->i_mode) || S_ISDIR(inode_out->i_mode)) return -EISDIR; @@ -1881,19 +1885,19 @@ int do_clone_file_range(struct file *file_in, loff_t pos_in, ret = file_in->f_op->remap_file_range(file_in, pos_in, file_out, pos_out, len, 0); - if (!ret) { - fsnotify_access(file_in); - fsnotify_modify(file_out); - } + if (ret < 0) + return ret; + fsnotify_access(file_in); + fsnotify_modify(file_out); return ret; } EXPORT_SYMBOL(do_clone_file_range); -int vfs_clone_file_range(struct file *file_in, loff_t pos_in, - struct file *file_out, loff_t pos_out, u64 len) +loff_t vfs_clone_file_range(struct file *file_in, loff_t pos_in, + struct file *file_out, loff_t pos_out, loff_t len) { - int ret; + loff_t ret; file_start_write(file_out); ret = do_clone_file_range(file_in, pos_in, file_out, pos_out, len); @@ -1999,10 +2003,11 @@ out_error: } EXPORT_SYMBOL(vfs_dedupe_file_range_compare); -int vfs_dedupe_file_range_one(struct file *src_file, loff_t src_pos, - struct file *dst_file, loff_t dst_pos, u64 len) +loff_t vfs_dedupe_file_range_one(struct file *src_file, loff_t src_pos, + struct file *dst_file, loff_t dst_pos, + loff_t len) { - s64 ret; + loff_t ret; ret = mnt_want_write_file(dst_file); if (ret) @@ -2051,7 +2056,7 @@ int vfs_dedupe_file_range(struct file *file, struct file_dedupe_range *same) int i; int ret; u16 count = same->dest_count; - int deduped; + loff_t deduped; if (!(file->f_mode & FMODE_READ)) return -EINVAL; diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index 20314eb4677a..38fde4e11714 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -919,20 +919,23 @@ out_unlock: return error; } -STATIC int +STATIC loff_t xfs_file_remap_range( struct file *file_in, loff_t pos_in, struct file *file_out, loff_t pos_out, - u64 len, + loff_t len, unsigned int remap_flags) { + int ret; + if (remap_flags & ~(REMAP_FILE_DEDUP | REMAP_FILE_ADVISORY)) return -EINVAL; - return xfs_reflink_remap_range(file_in, pos_in, file_out, pos_out, + ret = xfs_reflink_remap_range(file_in, pos_in, file_out, pos_out, len, remap_flags); + return ret < 0 ? ret : len; } STATIC int diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c index 2d7dd8b28d7c..3dbe5fb7e9c0 100644 --- a/fs/xfs/xfs_reflink.c +++ b/fs/xfs/xfs_reflink.c @@ -1296,7 +1296,7 @@ xfs_reflink_remap_prep( loff_t pos_in, struct file *file_out, loff_t pos_out, - u64 *len, + loff_t *len, unsigned int remap_flags) { struct inode *inode_in = file_inode(file_in); @@ -1387,7 +1387,7 @@ xfs_reflink_remap_range( loff_t pos_in, struct file *file_out, loff_t pos_out, - u64 len, + loff_t len, unsigned int remap_flags) { struct inode *inode_in = file_inode(file_in); diff --git a/fs/xfs/xfs_reflink.h b/fs/xfs/xfs_reflink.h index 6f82d628bf17..c3c46c276fe1 100644 --- a/fs/xfs/xfs_reflink.h +++ b/fs/xfs/xfs_reflink.h @@ -28,7 +28,7 @@ extern int xfs_reflink_end_cow(struct xfs_inode *ip, xfs_off_t offset, xfs_off_t count); extern int xfs_reflink_recover_cow(struct xfs_mount *mp); extern int xfs_reflink_remap_range(struct file *file_in, loff_t pos_in, - struct file *file_out, loff_t pos_out, u64 len, + struct file *file_out, loff_t pos_out, loff_t len, unsigned int remap_flags); extern int xfs_reflink_inode_has_shared_extents(struct xfs_trans *tp, struct xfs_inode *ip, bool *has_shared); diff --git a/include/linux/fs.h b/include/linux/fs.h index c5435ca81132..c72d8c3c065a 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1777,9 +1777,9 @@ struct file_operations { #endif ssize_t (*copy_file_range)(struct file *, loff_t, struct file *, loff_t, size_t, unsigned int); - int (*remap_file_range)(struct file *file_in, loff_t pos_in, - struct file *file_out, loff_t pos_out, - u64 len, unsigned int remap_flags); + loff_t (*remap_file_range)(struct file *file_in, loff_t pos_in, + struct file *file_out, loff_t pos_out, + loff_t len, unsigned int remap_flags); int (*fadvise)(struct file *, loff_t, loff_t, int); } __randomize_layout; @@ -1844,19 +1844,22 @@ extern ssize_t vfs_copy_file_range(struct file *, loff_t , struct file *, loff_t, size_t, unsigned int); extern int generic_remap_file_range_prep(struct file *file_in, loff_t pos_in, struct file *file_out, loff_t pos_out, - u64 *count, unsigned int remap_flags); -extern int do_clone_file_range(struct file *file_in, loff_t pos_in, - struct file *file_out, loff_t pos_out, u64 len); -extern int vfs_clone_file_range(struct file *file_in, loff_t pos_in, - struct file *file_out, loff_t pos_out, u64 len); + loff_t *count, + unsigned int remap_flags); +extern loff_t do_clone_file_range(struct file *file_in, loff_t pos_in, + struct file *file_out, loff_t pos_out, + loff_t len); +extern loff_t vfs_clone_file_range(struct file *file_in, loff_t pos_in, + struct file *file_out, loff_t pos_out, + loff_t len); extern int vfs_dedupe_file_range_compare(struct inode *src, loff_t srcoff, struct inode *dest, loff_t destoff, loff_t len, bool *is_same); extern int vfs_dedupe_file_range(struct file *file, struct file_dedupe_range *same); -extern int vfs_dedupe_file_range_one(struct file *src_file, loff_t src_pos, - struct file *dst_file, loff_t dst_pos, - u64 len); +extern loff_t vfs_dedupe_file_range_one(struct file *src_file, loff_t src_pos, + struct file *dst_file, loff_t dst_pos, + loff_t len); struct super_operations { @@ -2986,7 +2989,7 @@ extern int generic_file_readonly_mmap(struct file *, struct vm_area_struct *); extern ssize_t generic_write_checks(struct kiocb *, struct iov_iter *); extern int generic_remap_checks(struct file *file_in, loff_t pos_in, struct file *file_out, loff_t pos_out, - uint64_t *count, unsigned int remap_flags); + loff_t *count, unsigned int remap_flags); extern ssize_t generic_file_read_iter(struct kiocb *, struct iov_iter *); extern ssize_t __generic_file_write_iter(struct kiocb *, struct iov_iter *); extern ssize_t generic_file_write_iter(struct kiocb *, struct iov_iter *); diff --git a/mm/filemap.c b/mm/filemap.c index 410dc58f7b16..e9091d731f84 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -2994,7 +2994,7 @@ EXPORT_SYMBOL(generic_write_checks); */ int generic_remap_checks(struct file *file_in, loff_t pos_in, struct file *file_out, loff_t pos_out, - uint64_t *req_count, unsigned int remap_flags) + loff_t *req_count, unsigned int remap_flags) { struct inode *inode_in = file_in->f_mapping->host; struct inode *inode_out = file_out->f_mapping->host; -- cgit v1.2.3-58-ga151 From 8c5c836bd6c3b9f9fc1c5a210d630b8c42f4f7df Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Tue, 30 Oct 2018 10:42:24 +1100 Subject: vfs: clean up generic_remap_file_range_prep return value Since the remap prep function can update the length of the remap request, we can change this function to return the usual return status instead of the odd behavior it has now. Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig Signed-off-by: Dave Chinner --- fs/ocfs2/refcounttree.c | 2 +- fs/read_write.c | 6 +++--- fs/xfs/xfs_reflink.c | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) (limited to 'fs/xfs') diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c index 6a42c04ac0ab..46bbd315c39f 100644 --- a/fs/ocfs2/refcounttree.c +++ b/fs/ocfs2/refcounttree.c @@ -4852,7 +4852,7 @@ int ocfs2_reflink_remap_range(struct file *file_in, ret = generic_remap_file_range_prep(file_in, pos_in, file_out, pos_out, &len, remap_flags); - if (ret <= 0) + if (ret < 0 || len == 0) goto out_unlock; /* Lock out changes to the allocation maps and remap. */ diff --git a/fs/read_write.c b/fs/read_write.c index e4d295d0d236..6b40a43edf18 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -1848,8 +1848,8 @@ out_error: * sense, and then flush all dirty data. Caller must ensure that the * inodes have been locked against any other modifications. * - * Returns: 0 for "nothing to clone", 1 for "something to clone", or - * the usual negative error code. + * If there's an error, then the usual negative error code is returned. + * Otherwise returns 0 with *len set to the request length. */ int generic_remap_file_range_prep(struct file *file_in, loff_t pos_in, struct file *file_out, loff_t pos_out, @@ -1945,7 +1945,7 @@ int generic_remap_file_range_prep(struct file *file_in, loff_t pos_in, return ret; } - return 1; + return 0; } EXPORT_SYMBOL(generic_remap_file_range_prep); diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c index 3dbe5fb7e9c0..9b1ea42c81d1 100644 --- a/fs/xfs/xfs_reflink.c +++ b/fs/xfs/xfs_reflink.c @@ -1329,7 +1329,7 @@ xfs_reflink_remap_prep( ret = generic_remap_file_range_prep(file_in, pos_in, file_out, pos_out, len, remap_flags); - if (ret <= 0) + if (ret < 0 || *len == 0) goto out_unlock; /* @@ -1409,7 +1409,7 @@ xfs_reflink_remap_range( /* Prepare and then clone file data. */ ret = xfs_reflink_remap_prep(file_in, pos_in, file_out, pos_out, &len, remap_flags); - if (ret <= 0) + if (ret < 0 || len == 0) return ret; trace_xfs_reflink_remap_range(src, pos_in, len, dest, pos_out); -- cgit v1.2.3-58-ga151 From 4918ef4ea008cd2ff47eb852894e3f9b9047f4f3 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Tue, 30 Oct 2018 10:46:33 +1100 Subject: xfs: fix pagecache truncation prior to reflink Prior to remapping blocks, it is necessary to remove pages from the destination file's page cache. Unfortunately, the truncation is not aggressive enough -- if page size > block size, we'll end up zeroing subpage blocks instead of removing them. So, round the start offset down and the end offset up to page boundaries. We already wrote all the dirty data so the larger range shouldn't be a problem. Signed-off-by: Darrick J. Wong Reviewed-by: Dave Chinner Reviewed-by: Christoph Hellwig Signed-off-by: Dave Chinner --- fs/xfs/xfs_reflink.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'fs/xfs') diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c index 9b1ea42c81d1..e8e86646bb4b 100644 --- a/fs/xfs/xfs_reflink.c +++ b/fs/xfs/xfs_reflink.c @@ -1369,8 +1369,9 @@ xfs_reflink_remap_prep( goto out_unlock; /* Zap any page cache for the destination file's range. */ - truncate_inode_pages_range(&inode_out->i_data, pos_out, - PAGE_ALIGN(pos_out + *len) - 1); + truncate_inode_pages_range(&inode_out->i_data, + round_down(pos_out, PAGE_SIZE), + round_up(pos_out + *len, PAGE_SIZE) - 1); return 1; out_unlock: -- cgit v1.2.3-58-ga151 From 9f04aaffddb3e487f3eda1945f1a9531d6cc7628 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Tue, 30 Oct 2018 10:46:50 +1100 Subject: xfs: clean up xfs_reflink_remap_blocks call site Move the offset <-> blocks unit conversions into xfs_reflink_remap_blocks to make the call site less ugly. Signed-off-by: Darrick J. Wong Reviewed-by: Dave Chinner Signed-off-by: Dave Chinner --- fs/xfs/xfs_reflink.c | 37 ++++++++++++++++++------------------- 1 file changed, 18 insertions(+), 19 deletions(-) (limited to 'fs/xfs') diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c index e8e86646bb4b..79dec457f7fb 100644 --- a/fs/xfs/xfs_reflink.c +++ b/fs/xfs/xfs_reflink.c @@ -1119,16 +1119,23 @@ out: STATIC int xfs_reflink_remap_blocks( struct xfs_inode *src, - xfs_fileoff_t srcoff, + loff_t pos_in, struct xfs_inode *dest, - xfs_fileoff_t destoff, - xfs_filblks_t len, - xfs_off_t new_isize) + loff_t pos_out, + loff_t remap_len) { struct xfs_bmbt_irec imap; + xfs_fileoff_t srcoff; + xfs_fileoff_t destoff; + xfs_filblks_t len; + xfs_filblks_t range_len; + xfs_off_t new_isize = pos_out + remap_len; int nimaps; int error = 0; - xfs_filblks_t range_len; + + destoff = XFS_B_TO_FSBT(src->i_mount, pos_out); + srcoff = XFS_B_TO_FSBT(src->i_mount, pos_in); + len = XFS_B_TO_FSB(src->i_mount, remap_len); /* drange = (destoff, destoff + len); srange = (srcoff, srcoff + len) */ while (len) { @@ -1143,7 +1150,7 @@ xfs_reflink_remap_blocks( error = xfs_bmapi_read(src, srcoff, len, &imap, &nimaps, 0); xfs_iunlock(src, lock_mode); if (error) - goto err; + break; ASSERT(nimaps == 1); trace_xfs_reflink_remap_imap(src, srcoff, len, XFS_IO_OVERWRITE, @@ -1157,11 +1164,11 @@ xfs_reflink_remap_blocks( error = xfs_reflink_remap_extent(dest, &imap, destoff, new_isize); if (error) - goto err; + break; if (fatal_signal_pending(current)) { error = -EINTR; - goto err; + break; } /* Advance drange/srange */ @@ -1170,10 +1177,8 @@ xfs_reflink_remap_blocks( len -= range_len; } - return 0; - -err: - trace_xfs_reflink_remap_blocks_error(dest, error, _RET_IP_); + if (error) + trace_xfs_reflink_remap_blocks_error(dest, error, _RET_IP_); return error; } @@ -1396,8 +1401,6 @@ xfs_reflink_remap_range( struct inode *inode_out = file_inode(file_out); struct xfs_inode *dest = XFS_I(inode_out); struct xfs_mount *mp = src->i_mount; - xfs_fileoff_t sfsbno, dfsbno; - xfs_filblks_t fsblen; xfs_extlen_t cowextsize; ssize_t ret; @@ -1415,11 +1418,7 @@ xfs_reflink_remap_range( trace_xfs_reflink_remap_range(src, pos_in, len, dest, pos_out); - dfsbno = XFS_B_TO_FSBT(mp, pos_out); - sfsbno = XFS_B_TO_FSBT(mp, pos_in); - fsblen = XFS_B_TO_FSB(mp, len); - ret = xfs_reflink_remap_blocks(src, sfsbno, dest, dfsbno, fsblen, - pos_out + len); + ret = xfs_reflink_remap_blocks(src, pos_in, dest, pos_out, len); if (ret) goto out_unlock; -- cgit v1.2.3-58-ga151 From 3f68c1f562f1e4c5e1a515b392a2e0a509a342d5 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Tue, 30 Oct 2018 10:47:06 +1100 Subject: xfs: support returning partial reflink results Back when the XFS reflink code only supported clone_file_range, we were only able to return zero or negative error codes to userspace. However, now that copy_file_range (which returns bytes copied) can use XFS' clone_file_range, we have the opportunity to return partial results. For example, if userspace sends a 1GB clone request and we run out of space halfway through, we at least can tell userspace that we completed 512M of that request like a regular write. Signed-off-by: Darrick J. Wong Reviewed-by: Dave Chinner Signed-off-by: Dave Chinner --- fs/xfs/xfs_file.c | 5 +---- fs/xfs/xfs_reflink.c | 17 ++++++++++++----- fs/xfs/xfs_reflink.h | 2 +- 3 files changed, 14 insertions(+), 10 deletions(-) (limited to 'fs/xfs') diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index 38fde4e11714..7d42ab8fe6e1 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -928,14 +928,11 @@ xfs_file_remap_range( loff_t len, unsigned int remap_flags) { - int ret; - if (remap_flags & ~(REMAP_FILE_DEDUP | REMAP_FILE_ADVISORY)) return -EINVAL; - ret = xfs_reflink_remap_range(file_in, pos_in, file_out, pos_out, + return xfs_reflink_remap_range(file_in, pos_in, file_out, pos_out, len, remap_flags); - return ret < 0 ? ret : len; } STATIC int diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c index 79dec457f7fb..4abb2aea8f31 100644 --- a/fs/xfs/xfs_reflink.c +++ b/fs/xfs/xfs_reflink.c @@ -1122,13 +1122,15 @@ xfs_reflink_remap_blocks( loff_t pos_in, struct xfs_inode *dest, loff_t pos_out, - loff_t remap_len) + loff_t remap_len, + loff_t *remapped) { struct xfs_bmbt_irec imap; xfs_fileoff_t srcoff; xfs_fileoff_t destoff; xfs_filblks_t len; xfs_filblks_t range_len; + xfs_filblks_t remapped_len = 0; xfs_off_t new_isize = pos_out + remap_len; int nimaps; int error = 0; @@ -1175,10 +1177,13 @@ xfs_reflink_remap_blocks( srcoff += range_len; destoff += range_len; len -= range_len; + remapped_len += range_len; } if (error) trace_xfs_reflink_remap_blocks_error(dest, error, _RET_IP_); + *remapped = min_t(loff_t, remap_len, + XFS_FSB_TO_B(src->i_mount, remapped_len)); return error; } @@ -1387,7 +1392,7 @@ out_unlock: /* * Link a range of blocks from one file to another. */ -int +loff_t xfs_reflink_remap_range( struct file *file_in, loff_t pos_in, @@ -1401,8 +1406,9 @@ xfs_reflink_remap_range( struct inode *inode_out = file_inode(file_out); struct xfs_inode *dest = XFS_I(inode_out); struct xfs_mount *mp = src->i_mount; + loff_t remapped = 0; xfs_extlen_t cowextsize; - ssize_t ret; + int ret; if (!xfs_sb_version_hasreflink(&mp->m_sb)) return -EOPNOTSUPP; @@ -1418,7 +1424,8 @@ xfs_reflink_remap_range( trace_xfs_reflink_remap_range(src, pos_in, len, dest, pos_out); - ret = xfs_reflink_remap_blocks(src, pos_in, dest, pos_out, len); + ret = xfs_reflink_remap_blocks(src, pos_in, dest, pos_out, len, + &remapped); if (ret) goto out_unlock; @@ -1441,7 +1448,7 @@ out_unlock: xfs_reflink_remap_unlock(file_in, file_out); if (ret) trace_xfs_reflink_remap_range_error(dest, ret, _RET_IP_); - return ret; + return remapped > 0 ? remapped : ret; } /* diff --git a/fs/xfs/xfs_reflink.h b/fs/xfs/xfs_reflink.h index c3c46c276fe1..cbc26ff79a8f 100644 --- a/fs/xfs/xfs_reflink.h +++ b/fs/xfs/xfs_reflink.h @@ -27,7 +27,7 @@ extern int xfs_reflink_cancel_cow_range(struct xfs_inode *ip, xfs_off_t offset, extern int xfs_reflink_end_cow(struct xfs_inode *ip, xfs_off_t offset, xfs_off_t count); extern int xfs_reflink_recover_cow(struct xfs_mount *mp); -extern int xfs_reflink_remap_range(struct file *file_in, loff_t pos_in, +extern loff_t xfs_reflink_remap_range(struct file *file_in, loff_t pos_in, struct file *file_out, loff_t pos_out, loff_t len, unsigned int remap_flags); extern int xfs_reflink_inode_has_shared_extents(struct xfs_trans *tp, -- cgit v1.2.3-58-ga151 From 7a6ccf004e234c01fb2a11771de9837c9ff3d56d Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Tue, 30 Oct 2018 10:47:16 +1100 Subject: xfs: remove redundant remap partial EOF block checks Now that we've moved the partial EOF block checks to the VFS helpers, we can remove the redundant functionality from XFS. Signed-off-by: Darrick J. Wong Reviewed-by: Dave Chinner Signed-off-by: Dave Chinner --- fs/xfs/xfs_reflink.c | 19 ------------------- 1 file changed, 19 deletions(-) (limited to 'fs/xfs') diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c index 4abb2aea8f31..bccc66316cc4 100644 --- a/fs/xfs/xfs_reflink.c +++ b/fs/xfs/xfs_reflink.c @@ -1314,7 +1314,6 @@ xfs_reflink_remap_prep( struct inode *inode_out = file_inode(file_out); struct xfs_inode *dest = XFS_I(inode_out); bool same_inode = (inode_in == inode_out); - u64 blkmask = i_blocksize(inode_in) - 1; ssize_t ret; /* Lock both files against IO */ @@ -1342,24 +1341,6 @@ xfs_reflink_remap_prep( if (ret < 0 || *len == 0) goto out_unlock; - /* - * If the dedupe data matches, chop off the partial EOF block - * from the source file so we don't try to dedupe the partial - * EOF block. - */ - if (remap_flags & REMAP_FILE_DEDUP) { - *len &= ~blkmask; - } else if (*len & blkmask) { - /* - * The user is attempting to share a partial EOF block, - * if it's inside the destination EOF then reject it. - */ - if (pos_out + *len < i_size_read(inode_out)) { - ret = -EINVAL; - goto out_unlock; - } - } - /* Attach dquots to dest inode before changing block map */ ret = xfs_qm_dqattach(dest); if (ret) -- cgit v1.2.3-58-ga151 From 3fc9f5e409319e994d113cf1327ba6ab147423c2 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Tue, 30 Oct 2018 10:47:26 +1100 Subject: xfs: remove xfs_reflink_remap_range Since xfs_file_remap_range is a thin wrapper, move the contents of xfs_reflink_remap_range into the shell. This cuts down on the vfs calls being made from internal xfs code. Signed-off-by: Darrick J. Wong Reviewed-by: Dave Chinner Signed-off-by: Dave Chinner --- fs/xfs/xfs_file.c | 65 +++++++++++++++++++++++++++++++++++++++++------- fs/xfs/xfs_reflink.c | 70 +++------------------------------------------------- fs/xfs/xfs_reflink.h | 10 ++++++++ 3 files changed, 70 insertions(+), 75 deletions(-) (limited to 'fs/xfs') diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index 7d42ab8fe6e1..53c9ab8fb777 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -919,20 +919,67 @@ out_unlock: return error; } -STATIC loff_t + +loff_t xfs_file_remap_range( - struct file *file_in, - loff_t pos_in, - struct file *file_out, - loff_t pos_out, - loff_t len, - unsigned int remap_flags) + struct file *file_in, + loff_t pos_in, + struct file *file_out, + loff_t pos_out, + loff_t len, + unsigned int remap_flags) { + struct inode *inode_in = file_inode(file_in); + struct xfs_inode *src = XFS_I(inode_in); + struct inode *inode_out = file_inode(file_out); + struct xfs_inode *dest = XFS_I(inode_out); + struct xfs_mount *mp = src->i_mount; + loff_t remapped = 0; + xfs_extlen_t cowextsize; + int ret; + if (remap_flags & ~(REMAP_FILE_DEDUP | REMAP_FILE_ADVISORY)) return -EINVAL; - return xfs_reflink_remap_range(file_in, pos_in, file_out, pos_out, - len, remap_flags); + if (!xfs_sb_version_hasreflink(&mp->m_sb)) + return -EOPNOTSUPP; + + if (XFS_FORCED_SHUTDOWN(mp)) + return -EIO; + + /* Prepare and then clone file data. */ + ret = xfs_reflink_remap_prep(file_in, pos_in, file_out, pos_out, + &len, remap_flags); + if (ret < 0 || len == 0) + return ret; + + trace_xfs_reflink_remap_range(src, pos_in, len, dest, pos_out); + + ret = xfs_reflink_remap_blocks(src, pos_in, dest, pos_out, len, + &remapped); + if (ret) + goto out_unlock; + + /* + * Carry the cowextsize hint from src to dest if we're sharing the + * entire source file to the entire destination file, the source file + * has a cowextsize hint, and the destination file does not. + */ + cowextsize = 0; + if (pos_in == 0 && len == i_size_read(inode_in) && + (src->i_d.di_flags2 & XFS_DIFLAG2_COWEXTSIZE) && + pos_out == 0 && len >= i_size_read(inode_out) && + !(dest->i_d.di_flags2 & XFS_DIFLAG2_COWEXTSIZE)) + cowextsize = src->i_d.di_cowextsize; + + ret = xfs_reflink_update_dest(dest, pos_out + len, cowextsize, + remap_flags); + +out_unlock: + xfs_reflink_remap_unlock(file_in, file_out); + if (ret) + trace_xfs_reflink_remap_range_error(dest, ret, _RET_IP_); + return remapped > 0 ? remapped : ret; } STATIC int diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c index bccc66316cc4..84f372f7ea04 100644 --- a/fs/xfs/xfs_reflink.c +++ b/fs/xfs/xfs_reflink.c @@ -916,7 +916,7 @@ out_error: /* * Update destination inode size & cowextsize hint, if necessary. */ -STATIC int +int xfs_reflink_update_dest( struct xfs_inode *dest, xfs_off_t newlen, @@ -1116,7 +1116,7 @@ out: /* * Iteratively remap one file's extents (and holes) to another's. */ -STATIC int +int xfs_reflink_remap_blocks( struct xfs_inode *src, loff_t pos_in, @@ -1232,7 +1232,7 @@ retry: } /* Unlock both inodes after they've been prepped for a range clone. */ -STATIC void +void xfs_reflink_remap_unlock( struct file *file_in, struct file *file_out) @@ -1300,7 +1300,7 @@ xfs_reflink_zero_posteof( * stale data in the destination file. Hence we reject these clone attempts with * -EINVAL in this case. */ -STATIC int +int xfs_reflink_remap_prep( struct file *file_in, loff_t pos_in, @@ -1370,68 +1370,6 @@ out_unlock: return ret; } -/* - * Link a range of blocks from one file to another. - */ -loff_t -xfs_reflink_remap_range( - struct file *file_in, - loff_t pos_in, - struct file *file_out, - loff_t pos_out, - loff_t len, - unsigned int remap_flags) -{ - struct inode *inode_in = file_inode(file_in); - struct xfs_inode *src = XFS_I(inode_in); - struct inode *inode_out = file_inode(file_out); - struct xfs_inode *dest = XFS_I(inode_out); - struct xfs_mount *mp = src->i_mount; - loff_t remapped = 0; - xfs_extlen_t cowextsize; - int ret; - - if (!xfs_sb_version_hasreflink(&mp->m_sb)) - return -EOPNOTSUPP; - - if (XFS_FORCED_SHUTDOWN(mp)) - return -EIO; - - /* Prepare and then clone file data. */ - ret = xfs_reflink_remap_prep(file_in, pos_in, file_out, pos_out, - &len, remap_flags); - if (ret < 0 || len == 0) - return ret; - - trace_xfs_reflink_remap_range(src, pos_in, len, dest, pos_out); - - ret = xfs_reflink_remap_blocks(src, pos_in, dest, pos_out, len, - &remapped); - if (ret) - goto out_unlock; - - /* - * Carry the cowextsize hint from src to dest if we're sharing the - * entire source file to the entire destination file, the source file - * has a cowextsize hint, and the destination file does not. - */ - cowextsize = 0; - if (pos_in == 0 && len == i_size_read(inode_in) && - (src->i_d.di_flags2 & XFS_DIFLAG2_COWEXTSIZE) && - pos_out == 0 && len >= i_size_read(inode_out) && - !(dest->i_d.di_flags2 & XFS_DIFLAG2_COWEXTSIZE)) - cowextsize = src->i_d.di_cowextsize; - - ret = xfs_reflink_update_dest(dest, pos_out + len, cowextsize, - remap_flags); - -out_unlock: - xfs_reflink_remap_unlock(file_in, file_out); - if (ret) - trace_xfs_reflink_remap_range_error(dest, ret, _RET_IP_); - return remapped > 0 ? remapped : ret; -} - /* * The user wants to preemptively CoW all shared blocks in this file, * which enables us to turn off the reflink flag. Iterate all diff --git a/fs/xfs/xfs_reflink.h b/fs/xfs/xfs_reflink.h index cbc26ff79a8f..28a84edda889 100644 --- a/fs/xfs/xfs_reflink.h +++ b/fs/xfs/xfs_reflink.h @@ -36,5 +36,15 @@ extern int xfs_reflink_clear_inode_flag(struct xfs_inode *ip, struct xfs_trans **tpp); extern int xfs_reflink_unshare(struct xfs_inode *ip, xfs_off_t offset, xfs_off_t len); +extern int xfs_reflink_remap_prep(struct file *file_in, loff_t pos_in, + struct file *file_out, loff_t pos_out, loff_t *len, + unsigned int remap_flags); +extern int xfs_reflink_remap_blocks(struct xfs_inode *src, loff_t pos_in, + struct xfs_inode *dest, loff_t pos_out, loff_t remap_len, + loff_t *remapped); +extern int xfs_reflink_update_dest(struct xfs_inode *dest, xfs_off_t newlen, + xfs_extlen_t cowextsize, unsigned int remap_flags); +extern void xfs_reflink_remap_unlock(struct file *file_in, + struct file *file_out); #endif /* __XFS_REFLINK_H */ -- cgit v1.2.3-58-ga151 From bf4a1fcf0bc18d52cf0fce6571d6f327ab5eaf22 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Tue, 30 Oct 2018 10:47:48 +1100 Subject: xfs: remove [cm]time update from reflink calls Now that the vfs remap helper dirties the inode [cm]time for us, xfs no longer needs to do that on its own. Signed-off-by: Darrick J. Wong Reviewed-by: Dave Chinner Signed-off-by: Dave Chinner --- fs/xfs/xfs_reflink.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) (limited to 'fs/xfs') diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c index 84f372f7ea04..e72218477bf2 100644 --- a/fs/xfs/xfs_reflink.c +++ b/fs/xfs/xfs_reflink.c @@ -927,8 +927,7 @@ xfs_reflink_update_dest( struct xfs_trans *tp; int error; - if ((remap_flags & REMAP_FILE_DEDUP) && - newlen <= i_size_read(VFS_I(dest)) && cowextsize == 0) + if (newlen <= i_size_read(VFS_I(dest)) && cowextsize == 0) return 0; error = xfs_trans_alloc(mp, &M_RES(mp)->tr_ichange, 0, 0, 0, &tp); @@ -949,10 +948,6 @@ xfs_reflink_update_dest( dest->i_d.di_flags2 |= XFS_DIFLAG2_COWEXTSIZE; } - if (!(remap_flags & REMAP_FILE_DEDUP)) { - xfs_trans_ichgtime(tp, dest, - XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); - } xfs_trans_log_inode(tp, dest, XFS_ILOG_CORE); error = xfs_trans_commit(tp); -- cgit v1.2.3-58-ga151