From 3acc4522d89e0a326db69e9d0afaad8cf763a54c Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Sun, 25 Oct 2020 07:35:47 -0700 Subject: f2fs: call f2fs_get_meta_page_retry for nat page When running fault injection test, if we don't stop checkpoint, some stale NAT entries were flushed which breaks consistency. Fixes: 86f33603f8c5 ("f2fs: handle errors of f2fs_get_meta_page_nofail") Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/node.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index d5d8ce077f29..42394de6c7eb 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -109,7 +109,7 @@ static void clear_node_page_dirty(struct page *page) static struct page *get_current_nat_page(struct f2fs_sb_info *sbi, nid_t nid) { - return f2fs_get_meta_page(sbi, current_nat_addr(sbi, nid)); + return f2fs_get_meta_page_retry(sbi, current_nat_addr(sbi, nid)); } static struct page *get_next_nat_page(struct f2fs_sb_info *sbi, nid_t nid) -- cgit v1.2.3-58-ga151 From 7a6e59d719ef0ec9b3d765cba3ba98ee585cbde3 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Mon, 2 Nov 2020 17:36:58 +0800 Subject: f2fs: fix to seek incorrect data offset in inline data file As kitestramuort reported: F2FS-fs (nvme0n1p4): access invalid blkaddr:1598541474 [ 25.725898] ------------[ cut here ]------------ [ 25.725903] WARNING: CPU: 6 PID: 2018 at f2fs_is_valid_blkaddr+0x23a/0x250 [ 25.725923] Call Trace: [ 25.725927] ? f2fs_llseek+0x204/0x620 [ 25.725929] ? ovl_copy_up_data+0x14f/0x200 [ 25.725931] ? ovl_copy_up_inode+0x174/0x1e0 [ 25.725933] ? ovl_copy_up_one+0xa22/0xdf0 [ 25.725936] ? ovl_copy_up_flags+0xa6/0xf0 [ 25.725938] ? ovl_aio_cleanup_handler+0xd0/0xd0 [ 25.725939] ? ovl_maybe_copy_up+0x86/0xa0 [ 25.725941] ? ovl_open+0x22/0x80 [ 25.725943] ? do_dentry_open+0x136/0x350 [ 25.725945] ? path_openat+0xb7e/0xf40 [ 25.725947] ? __check_sticky+0x40/0x40 [ 25.725948] ? do_filp_open+0x70/0x100 [ 25.725950] ? __check_sticky+0x40/0x40 [ 25.725951] ? __check_sticky+0x40/0x40 [ 25.725953] ? __x64_sys_openat+0x1db/0x2c0 [ 25.725955] ? do_syscall_64+0x2d/0x40 [ 25.725957] ? entry_SYSCALL_64_after_hwframe+0x44/0xa9 llseek() reports invalid block address access, the root cause is if file has inline data, f2fs_seek_block() will access inline data regard as block address index in inode block, which should be wrong, fix it. Reported-by: kitestramuort Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/file.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index ee861c6d9ff0..fe39e591e5b4 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -412,9 +412,14 @@ static loff_t f2fs_seek_block(struct file *file, loff_t offset, int whence) goto fail; /* handle inline data case */ - if (f2fs_has_inline_data(inode) && whence == SEEK_HOLE) { - data_ofs = isize; - goto found; + if (f2fs_has_inline_data(inode)) { + if (whence == SEEK_HOLE) { + data_ofs = isize; + goto found; + } else if (whence == SEEK_DATA) { + data_ofs = offset; + goto found; + } } pgofs = (pgoff_t)(offset >> PAGE_SHIFT); -- cgit v1.2.3-58-ga151 From fa4320cefb8537a70cc28c55d311a1f569697cd3 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Mon, 2 Nov 2020 14:21:31 +0800 Subject: f2fs: move ioctl interface definitions to separated file Like other filesystem does, we introduce a new file f2fs.h in path of include/uapi/linux/, and move f2fs-specified ioctl interface definitions to that file, after then, in order to use those definitions, userspace developer only need to include the new header file rather than copy & paste definitions from fs/f2fs/f2fs.h. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- MAINTAINERS | 1 + fs/f2fs/f2fs.h | 79 ---------------------------------------- fs/f2fs/file.c | 1 + include/trace/events/f2fs.h | 1 + include/uapi/linux/f2fs.h | 87 +++++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 90 insertions(+), 79 deletions(-) create mode 100644 include/uapi/linux/f2fs.h diff --git a/MAINTAINERS b/MAINTAINERS index b516bb34a8d5..13d8fbd74d72 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -6681,6 +6681,7 @@ F: Documentation/filesystems/f2fs.rst F: fs/f2fs/ F: include/linux/f2fs_fs.h F: include/trace/events/f2fs.h +F: include/uapi/linux/f2fs.h F71805F HARDWARE MONITORING DRIVER M: Jean Delvare diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index cb700d797296..99bcf4b44a9c 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -402,85 +402,6 @@ static inline bool __has_cursum_space(struct f2fs_journal *journal, return size <= MAX_SIT_JENTRIES(journal); } -/* - * f2fs-specific ioctl commands - */ -#define F2FS_IOCTL_MAGIC 0xf5 -#define F2FS_IOC_START_ATOMIC_WRITE _IO(F2FS_IOCTL_MAGIC, 1) -#define F2FS_IOC_COMMIT_ATOMIC_WRITE _IO(F2FS_IOCTL_MAGIC, 2) -#define F2FS_IOC_START_VOLATILE_WRITE _IO(F2FS_IOCTL_MAGIC, 3) -#define F2FS_IOC_RELEASE_VOLATILE_WRITE _IO(F2FS_IOCTL_MAGIC, 4) -#define F2FS_IOC_ABORT_VOLATILE_WRITE _IO(F2FS_IOCTL_MAGIC, 5) -#define F2FS_IOC_GARBAGE_COLLECT _IOW(F2FS_IOCTL_MAGIC, 6, __u32) -#define F2FS_IOC_WRITE_CHECKPOINT _IO(F2FS_IOCTL_MAGIC, 7) -#define F2FS_IOC_DEFRAGMENT _IOWR(F2FS_IOCTL_MAGIC, 8, \ - struct f2fs_defragment) -#define F2FS_IOC_MOVE_RANGE _IOWR(F2FS_IOCTL_MAGIC, 9, \ - struct f2fs_move_range) -#define F2FS_IOC_FLUSH_DEVICE _IOW(F2FS_IOCTL_MAGIC, 10, \ - struct f2fs_flush_device) -#define F2FS_IOC_GARBAGE_COLLECT_RANGE _IOW(F2FS_IOCTL_MAGIC, 11, \ - struct f2fs_gc_range) -#define F2FS_IOC_GET_FEATURES _IOR(F2FS_IOCTL_MAGIC, 12, __u32) -#define F2FS_IOC_SET_PIN_FILE _IOW(F2FS_IOCTL_MAGIC, 13, __u32) -#define F2FS_IOC_GET_PIN_FILE _IOR(F2FS_IOCTL_MAGIC, 14, __u32) -#define F2FS_IOC_PRECACHE_EXTENTS _IO(F2FS_IOCTL_MAGIC, 15) -#define F2FS_IOC_RESIZE_FS _IOW(F2FS_IOCTL_MAGIC, 16, __u64) -#define F2FS_IOC_GET_COMPRESS_BLOCKS _IOR(F2FS_IOCTL_MAGIC, 17, __u64) -#define F2FS_IOC_RELEASE_COMPRESS_BLOCKS \ - _IOR(F2FS_IOCTL_MAGIC, 18, __u64) -#define F2FS_IOC_RESERVE_COMPRESS_BLOCKS \ - _IOR(F2FS_IOCTL_MAGIC, 19, __u64) -#define F2FS_IOC_SEC_TRIM_FILE _IOW(F2FS_IOCTL_MAGIC, 20, \ - struct f2fs_sectrim_range) - -/* - * should be same as XFS_IOC_GOINGDOWN. - * Flags for going down operation used by FS_IOC_GOINGDOWN - */ -#define F2FS_IOC_SHUTDOWN _IOR('X', 125, __u32) /* Shutdown */ -#define F2FS_GOING_DOWN_FULLSYNC 0x0 /* going down with full sync */ -#define F2FS_GOING_DOWN_METASYNC 0x1 /* going down with metadata */ -#define F2FS_GOING_DOWN_NOSYNC 0x2 /* going down */ -#define F2FS_GOING_DOWN_METAFLUSH 0x3 /* going down with meta flush */ -#define F2FS_GOING_DOWN_NEED_FSCK 0x4 /* going down to trigger fsck */ - -/* - * Flags used by F2FS_IOC_SEC_TRIM_FILE - */ -#define F2FS_TRIM_FILE_DISCARD 0x1 /* send discard command */ -#define F2FS_TRIM_FILE_ZEROOUT 0x2 /* zero out */ -#define F2FS_TRIM_FILE_MASK 0x3 - -struct f2fs_gc_range { - u32 sync; - u64 start; - u64 len; -}; - -struct f2fs_defragment { - u64 start; - u64 len; -}; - -struct f2fs_move_range { - u32 dst_fd; /* destination fd */ - u64 pos_in; /* start position in src_fd */ - u64 pos_out; /* start position in dst_fd */ - u64 len; /* size to move */ -}; - -struct f2fs_flush_device { - u32 dev_num; /* device number to flush */ - u32 segments; /* # of segments to flush */ -}; - -struct f2fs_sectrim_range { - u64 start; - u64 len; - u64 flags; -}; - /* for inline stuff */ #define DEF_INLINE_RESERVED_SIZE 1 static inline int get_extra_isize(struct inode *inode); diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index fe39e591e5b4..89c451f09344 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -31,6 +31,7 @@ #include "gc.h" #include "trace.h" #include +#include static vm_fault_t f2fs_filemap_fault(struct vm_fault *vmf) { diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h index f8f1e85ff130..56b113e3cd6a 100644 --- a/include/trace/events/f2fs.h +++ b/include/trace/events/f2fs.h @@ -6,6 +6,7 @@ #define _TRACE_F2FS_H #include +#include #define show_dev(dev) MAJOR(dev), MINOR(dev) #define show_dev_ino(entry) show_dev(entry->dev), (unsigned long)entry->ino diff --git a/include/uapi/linux/f2fs.h b/include/uapi/linux/f2fs.h new file mode 100644 index 000000000000..28bcfe8d2c27 --- /dev/null +++ b/include/uapi/linux/f2fs.h @@ -0,0 +1,87 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + +#ifndef _UAPI_LINUX_F2FS_H +#define _UAPI_LINUX_F2FS_H +#include +#include + +/* + * f2fs-specific ioctl commands + */ +#define F2FS_IOCTL_MAGIC 0xf5 +#define F2FS_IOC_START_ATOMIC_WRITE _IO(F2FS_IOCTL_MAGIC, 1) +#define F2FS_IOC_COMMIT_ATOMIC_WRITE _IO(F2FS_IOCTL_MAGIC, 2) +#define F2FS_IOC_START_VOLATILE_WRITE _IO(F2FS_IOCTL_MAGIC, 3) +#define F2FS_IOC_RELEASE_VOLATILE_WRITE _IO(F2FS_IOCTL_MAGIC, 4) +#define F2FS_IOC_ABORT_VOLATILE_WRITE _IO(F2FS_IOCTL_MAGIC, 5) +#define F2FS_IOC_GARBAGE_COLLECT _IOW(F2FS_IOCTL_MAGIC, 6, __u32) +#define F2FS_IOC_WRITE_CHECKPOINT _IO(F2FS_IOCTL_MAGIC, 7) +#define F2FS_IOC_DEFRAGMENT _IOWR(F2FS_IOCTL_MAGIC, 8, \ + struct f2fs_defragment) +#define F2FS_IOC_MOVE_RANGE _IOWR(F2FS_IOCTL_MAGIC, 9, \ + struct f2fs_move_range) +#define F2FS_IOC_FLUSH_DEVICE _IOW(F2FS_IOCTL_MAGIC, 10, \ + struct f2fs_flush_device) +#define F2FS_IOC_GARBAGE_COLLECT_RANGE _IOW(F2FS_IOCTL_MAGIC, 11, \ + struct f2fs_gc_range) +#define F2FS_IOC_GET_FEATURES _IOR(F2FS_IOCTL_MAGIC, 12, __u32) +#define F2FS_IOC_SET_PIN_FILE _IOW(F2FS_IOCTL_MAGIC, 13, __u32) +#define F2FS_IOC_GET_PIN_FILE _IOR(F2FS_IOCTL_MAGIC, 14, __u32) +#define F2FS_IOC_PRECACHE_EXTENTS _IO(F2FS_IOCTL_MAGIC, 15) +#define F2FS_IOC_RESIZE_FS _IOW(F2FS_IOCTL_MAGIC, 16, __u64) +#define F2FS_IOC_GET_COMPRESS_BLOCKS _IOR(F2FS_IOCTL_MAGIC, 17, __u64) +#define F2FS_IOC_RELEASE_COMPRESS_BLOCKS \ + _IOR(F2FS_IOCTL_MAGIC, 18, __u64) +#define F2FS_IOC_RESERVE_COMPRESS_BLOCKS \ + _IOR(F2FS_IOCTL_MAGIC, 19, __u64) +#define F2FS_IOC_SEC_TRIM_FILE _IOW(F2FS_IOCTL_MAGIC, 20, \ + struct f2fs_sectrim_range) + +/* + * should be same as XFS_IOC_GOINGDOWN. + * Flags for going down operation used by FS_IOC_GOINGDOWN + */ +#define F2FS_IOC_SHUTDOWN _IOR('X', 125, __u32) /* Shutdown */ +#define F2FS_GOING_DOWN_FULLSYNC 0x0 /* going down with full sync */ +#define F2FS_GOING_DOWN_METASYNC 0x1 /* going down with metadata */ +#define F2FS_GOING_DOWN_NOSYNC 0x2 /* going down */ +#define F2FS_GOING_DOWN_METAFLUSH 0x3 /* going down with meta flush */ +#define F2FS_GOING_DOWN_NEED_FSCK 0x4 /* going down to trigger fsck */ + +/* + * Flags used by F2FS_IOC_SEC_TRIM_FILE + */ +#define F2FS_TRIM_FILE_DISCARD 0x1 /* send discard command */ +#define F2FS_TRIM_FILE_ZEROOUT 0x2 /* zero out */ +#define F2FS_TRIM_FILE_MASK 0x3 + +struct f2fs_gc_range { + __u32 sync; + __u64 start; + __u64 len; +}; + +struct f2fs_defragment { + __u64 start; + __u64 len; +}; + +struct f2fs_move_range { + __u32 dst_fd; /* destination fd */ + __u64 pos_in; /* start position in src_fd */ + __u64 pos_out; /* start position in dst_fd */ + __u64 len; /* size to move */ +}; + +struct f2fs_flush_device { + __u32 dev_num; /* device number to flush */ + __u32 segments; /* # of segments to flush */ +}; + +struct f2fs_sectrim_range { + __u64 start; + __u64 len; + __u64 flags; +}; + +#endif /* _UAPI_LINUX_F2FS_H */ -- cgit v1.2.3-58-ga151 From 9e2a5f8cfb4d9371783e21e27bba4338401f1260 Mon Sep 17 00:00:00 2001 From: Daeho Jeong Date: Fri, 30 Oct 2020 13:10:34 +0900 Subject: f2fs: add F2FS_IOC_GET_COMPRESS_OPTION ioctl Added a new F2FS_IOC_GET_COMPRESS_OPTION ioctl to get file compression option of a file. struct f2fs_comp_option { u8 algorithm; => compression algorithm => 0:lzo, 1:lz4, 2:zstd, 3:lzorle u8 log_cluster_size; => log scale cluster size => 2 ~ 8 }; struct f2fs_comp_option option; ioctl(fd, F2FS_IOC_GET_COMPRESS_OPTION, &option); Signed-off-by: Daeho Jeong Signed-off-by: Jaegeuk Kim --- fs/f2fs/file.c | 30 ++++++++++++++++++++++++++++++ include/uapi/linux/f2fs.h | 7 +++++++ 2 files changed, 37 insertions(+) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 89c451f09344..c747f5dd595c 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -3945,6 +3945,33 @@ err: return ret; } +static int f2fs_ioc_get_compress_option(struct file *filp, unsigned long arg) +{ + struct inode *inode = file_inode(filp); + struct f2fs_comp_option option; + + if (!f2fs_sb_has_compression(F2FS_I_SB(inode))) + return -EOPNOTSUPP; + + inode_lock_shared(inode); + + if (!f2fs_compressed_file(inode)) { + inode_unlock_shared(inode); + return -ENODATA; + } + + option.algorithm = F2FS_I(inode)->i_compress_algorithm; + option.log_cluster_size = F2FS_I(inode)->i_log_cluster_size; + + inode_unlock_shared(inode); + + if (copy_to_user((struct f2fs_comp_option __user *)arg, &option, + sizeof(option))) + return -EFAULT; + + return 0; +} + long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { if (unlikely(f2fs_cp_error(F2FS_I_SB(file_inode(filp))))) @@ -4033,6 +4060,8 @@ long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) return f2fs_reserve_compress_blocks(filp, arg); case F2FS_IOC_SEC_TRIM_FILE: return f2fs_sec_trim_file(filp, arg); + case F2FS_IOC_GET_COMPRESS_OPTION: + return f2fs_ioc_get_compress_option(filp, arg); default: return -ENOTTY; } @@ -4203,6 +4232,7 @@ long f2fs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) case F2FS_IOC_RELEASE_COMPRESS_BLOCKS: case F2FS_IOC_RESERVE_COMPRESS_BLOCKS: case F2FS_IOC_SEC_TRIM_FILE: + case F2FS_IOC_GET_COMPRESS_OPTION: break; default: return -ENOIOCTLCMD; diff --git a/include/uapi/linux/f2fs.h b/include/uapi/linux/f2fs.h index 28bcfe8d2c27..872e61d78f29 100644 --- a/include/uapi/linux/f2fs.h +++ b/include/uapi/linux/f2fs.h @@ -36,6 +36,8 @@ _IOR(F2FS_IOCTL_MAGIC, 19, __u64) #define F2FS_IOC_SEC_TRIM_FILE _IOW(F2FS_IOCTL_MAGIC, 20, \ struct f2fs_sectrim_range) +#define F2FS_IOC_GET_COMPRESS_OPTION _IOR(F2FS_IOCTL_MAGIC, 21, \ + struct f2fs_comp_option) /* * should be same as XFS_IOC_GOINGDOWN. @@ -84,4 +86,9 @@ struct f2fs_sectrim_range { __u64 flags; }; +struct f2fs_comp_option { + __u8 algorithm; + __u8 log_cluster_size; +}; + #endif /* _UAPI_LINUX_F2FS_H */ -- cgit v1.2.3-58-ga151 From e1e8debec6564d0c355aab84c93de8e357f397bd Mon Sep 17 00:00:00 2001 From: Daeho Jeong Date: Fri, 30 Oct 2020 13:10:35 +0900 Subject: f2fs: add F2FS_IOC_SET_COMPRESS_OPTION ioctl Added a new F2FS_IOC_SET_COMPRESS_OPTION ioctl to change file compression option of a file. struct f2fs_comp_option { u8 algorithm; => compression algorithm => 0:lzo, 1:lz4, 2:zstd, 3:lzorle u8 log_cluster_size; => log scale cluster size => 2 ~ 8 }; struct f2fs_comp_option option; option.algorithm = 1; option.log_cluster_size = 7; ioctl(fd, F2FS_IOC_SET_COMPRESS_OPTION, &option); Signed-off-by: Daeho Jeong [Chao Yu: remove f2fs_is_compress_algorithm_valid()] Signed-off-by: Jaegeuk Kim --- fs/f2fs/file.c | 54 +++++++++++++++++++++++++++++++++++++++++++++++ include/uapi/linux/f2fs.h | 2 ++ 2 files changed, 56 insertions(+) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index c747f5dd595c..ea2f1ba408c4 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -3972,6 +3972,57 @@ static int f2fs_ioc_get_compress_option(struct file *filp, unsigned long arg) return 0; } +static int f2fs_ioc_set_compress_option(struct file *filp, unsigned long arg) +{ + struct inode *inode = file_inode(filp); + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + struct f2fs_comp_option option; + int ret = 0; + + if (!f2fs_sb_has_compression(sbi)) + return -EOPNOTSUPP; + + if (!(filp->f_mode & FMODE_WRITE)) + return -EBADF; + + if (copy_from_user(&option, (struct f2fs_comp_option __user *)arg, + sizeof(option))) + return -EFAULT; + + if (!f2fs_compressed_file(inode) || + option.log_cluster_size < MIN_COMPRESS_LOG_SIZE || + option.log_cluster_size > MAX_COMPRESS_LOG_SIZE || + option.algorithm >= COMPRESS_MAX) + return -EINVAL; + + file_start_write(filp); + inode_lock(inode); + + if (f2fs_is_mmap_file(inode) || get_dirty_pages(inode)) { + ret = -EBUSY; + goto out; + } + + if (inode->i_size != 0) { + ret = -EFBIG; + goto out; + } + + F2FS_I(inode)->i_compress_algorithm = option.algorithm; + F2FS_I(inode)->i_log_cluster_size = option.log_cluster_size; + F2FS_I(inode)->i_cluster_size = 1 << option.log_cluster_size; + f2fs_mark_inode_dirty_sync(inode, true); + + if (!f2fs_is_compress_backend_ready(inode)) + f2fs_warn(sbi, "compression algorithm is successfully set, " + "but current kernel doesn't support this algorithm."); +out: + inode_unlock(inode); + file_end_write(filp); + + return ret; +} + long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { if (unlikely(f2fs_cp_error(F2FS_I_SB(file_inode(filp))))) @@ -4062,6 +4113,8 @@ long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) return f2fs_sec_trim_file(filp, arg); case F2FS_IOC_GET_COMPRESS_OPTION: return f2fs_ioc_get_compress_option(filp, arg); + case F2FS_IOC_SET_COMPRESS_OPTION: + return f2fs_ioc_set_compress_option(filp, arg); default: return -ENOTTY; } @@ -4233,6 +4286,7 @@ long f2fs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) case F2FS_IOC_RESERVE_COMPRESS_BLOCKS: case F2FS_IOC_SEC_TRIM_FILE: case F2FS_IOC_GET_COMPRESS_OPTION: + case F2FS_IOC_SET_COMPRESS_OPTION: break; default: return -ENOIOCTLCMD; diff --git a/include/uapi/linux/f2fs.h b/include/uapi/linux/f2fs.h index 872e61d78f29..f00199a2e38b 100644 --- a/include/uapi/linux/f2fs.h +++ b/include/uapi/linux/f2fs.h @@ -38,6 +38,8 @@ struct f2fs_sectrim_range) #define F2FS_IOC_GET_COMPRESS_OPTION _IOR(F2FS_IOCTL_MAGIC, 21, \ struct f2fs_comp_option) +#define F2FS_IOC_SET_COMPRESS_OPTION _IOW(F2FS_IOCTL_MAGIC, 22, \ + struct f2fs_comp_option) /* * should be same as XFS_IOC_GOINGDOWN. -- cgit v1.2.3-58-ga151 From 3a1b9eaf727b4ab84ebf059e09c38fc6a53e5614 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 10 Nov 2020 09:24:36 +0800 Subject: f2fs: avoid unneeded data copy in f2fs_ioc_move_range() Fields in struct f2fs_move_range won't change in f2fs_ioc_move_range(), let's avoid copying this structure's data to userspace. Signed-off-by: Chao Yu Reviewed-by: Eric Biggers Signed-off-by: Jaegeuk Kim --- fs/f2fs/file.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index ea2f1ba408c4..861a6697960a 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -2898,12 +2898,6 @@ static int f2fs_ioc_move_range(struct file *filp, unsigned long arg) range.pos_out, range.len); mnt_drop_write_file(filp); - if (err) - goto err_out; - - if (copy_to_user((struct f2fs_move_range __user *)arg, - &range, sizeof(range))) - err = -EFAULT; err_out: fdput(dst); return err; -- cgit v1.2.3-58-ga151 From 34178b1bc4b5c936eab3adb4835578093095a571 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 10 Nov 2020 09:24:37 +0800 Subject: f2fs: fix compat F2FS_IOC_{MOVE,GARBAGE_COLLECT}_RANGE Eric reported a ioctl bug in below link: https://lore.kernel.org/linux-f2fs-devel/20201103032234.GB2875@sol.localdomain/ That said, on some 32-bit architectures, u64 has only 32-bit alignment, notably i386 and x86_32, so that size of struct f2fs_gc_range compiled in x86_32 is 20 bytes, however the size in x86_64 is 24 bytes, binary compiled in x86_32 can not call F2FS_IOC_GARBAGE_COLLECT_RANGE successfully due to mismatched value of ioctl command in between binary and f2fs module, similarly, F2FS_IOC_MOVE_RANGE will fail too. In this patch we introduce two ioctls for compatibility of above special 32-bit binary: - F2FS_IOC32_GARBAGE_COLLECT_RANGE - F2FS_IOC32_MOVE_RANGE Reported-by: Eric Biggers Signed-off-by: Chao Yu Reviewed-by: Eric Biggers Signed-off-by: Jaegeuk Kim --- fs/f2fs/file.c | 137 +++++++++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 104 insertions(+), 33 deletions(-) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 861a6697960a..8241b8bdb33c 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -2480,26 +2480,19 @@ out: return ret; } -static int f2fs_ioc_gc_range(struct file *filp, unsigned long arg) +static int __f2fs_ioc_gc_range(struct file *filp, struct f2fs_gc_range *range) { - struct inode *inode = file_inode(filp); - struct f2fs_sb_info *sbi = F2FS_I_SB(inode); - struct f2fs_gc_range range; + struct f2fs_sb_info *sbi = F2FS_I_SB(file_inode(filp)); u64 end; int ret; if (!capable(CAP_SYS_ADMIN)) return -EPERM; - - if (copy_from_user(&range, (struct f2fs_gc_range __user *)arg, - sizeof(range))) - return -EFAULT; - if (f2fs_readonly(sbi->sb)) return -EROFS; - end = range.start + range.len; - if (end < range.start || range.start < MAIN_BLKADDR(sbi) || + end = range->start + range->len; + if (end < range->start || range->start < MAIN_BLKADDR(sbi) || end >= MAX_BLKADDR(sbi)) return -EINVAL; @@ -2508,7 +2501,7 @@ static int f2fs_ioc_gc_range(struct file *filp, unsigned long arg) return ret; do_more: - if (!range.sync) { + if (!range->sync) { if (!down_write_trylock(&sbi->gc_lock)) { ret = -EBUSY; goto out; @@ -2517,20 +2510,30 @@ do_more: down_write(&sbi->gc_lock); } - ret = f2fs_gc(sbi, range.sync, true, GET_SEGNO(sbi, range.start)); + ret = f2fs_gc(sbi, range->sync, true, GET_SEGNO(sbi, range->start)); if (ret) { if (ret == -EBUSY) ret = -EAGAIN; goto out; } - range.start += BLKS_PER_SEC(sbi); - if (range.start <= end) + range->start += BLKS_PER_SEC(sbi); + if (range->start <= end) goto do_more; out: mnt_drop_write_file(filp); return ret; } +static int f2fs_ioc_gc_range(struct file *filp, unsigned long arg) +{ + struct f2fs_gc_range range; + + if (copy_from_user(&range, (struct f2fs_gc_range __user *)arg, + sizeof(range))) + return -EFAULT; + return __f2fs_ioc_gc_range(filp, &range); +} + static int f2fs_ioc_write_checkpoint(struct file *filp, unsigned long arg) { struct inode *inode = file_inode(filp); @@ -2867,9 +2870,9 @@ out: return ret; } -static int f2fs_ioc_move_range(struct file *filp, unsigned long arg) +static int __f2fs_ioc_move_range(struct file *filp, + struct f2fs_move_range *range) { - struct f2fs_move_range range; struct fd dst; int err; @@ -2877,11 +2880,7 @@ static int f2fs_ioc_move_range(struct file *filp, unsigned long arg) !(filp->f_mode & FMODE_WRITE)) return -EBADF; - if (copy_from_user(&range, (struct f2fs_move_range __user *)arg, - sizeof(range))) - return -EFAULT; - - dst = fdget(range.dst_fd); + dst = fdget(range->dst_fd); if (!dst.file) return -EBADF; @@ -2894,8 +2893,8 @@ static int f2fs_ioc_move_range(struct file *filp, unsigned long arg) if (err) goto err_out; - err = f2fs_move_file_range(filp, range.pos_in, dst.file, - range.pos_out, range.len); + err = f2fs_move_file_range(filp, range->pos_in, dst.file, + range->pos_out, range->len); mnt_drop_write_file(filp); err_out: @@ -2903,6 +2902,16 @@ err_out: return err; } +static int f2fs_ioc_move_range(struct file *filp, unsigned long arg) +{ + struct f2fs_move_range range; + + if (copy_from_user(&range, (struct f2fs_move_range __user *)arg, + sizeof(range))) + return -EFAULT; + return __f2fs_ioc_move_range(filp, &range); +} + static int f2fs_ioc_flush_device(struct file *filp, unsigned long arg) { struct inode *inode = file_inode(filp); @@ -4017,13 +4026,8 @@ out: return ret; } -long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) +static long __f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { - if (unlikely(f2fs_cp_error(F2FS_I_SB(file_inode(filp))))) - return -EIO; - if (!f2fs_is_checkpoint_ready(F2FS_I_SB(file_inode(filp)))) - return -ENOSPC; - switch (cmd) { case FS_IOC_GETFLAGS: return f2fs_ioc_getflags(filp, arg); @@ -4114,6 +4118,16 @@ long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) } } +long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) +{ + if (unlikely(f2fs_cp_error(F2FS_I_SB(file_inode(filp))))) + return -EIO; + if (!f2fs_is_checkpoint_ready(F2FS_I_SB(file_inode(filp)))) + return -ENOSPC; + + return __f2fs_ioctl(filp, cmd, arg); +} + static ssize_t f2fs_file_read_iter(struct kiocb *iocb, struct iov_iter *iter) { struct file *file = iocb->ki_filp; @@ -4230,8 +4244,63 @@ out: } #ifdef CONFIG_COMPAT +struct compat_f2fs_gc_range { + u32 sync; + compat_u64 start; + compat_u64 len; +}; +#define F2FS_IOC32_GARBAGE_COLLECT_RANGE _IOW(F2FS_IOCTL_MAGIC, 11,\ + struct compat_f2fs_gc_range) + +static int f2fs_compat_ioc_gc_range(struct file *file, unsigned long arg) +{ + struct compat_f2fs_gc_range __user *urange; + struct f2fs_gc_range range; + int err; + + urange = compat_ptr(arg); + err = get_user(range.sync, &urange->sync); + err |= get_user(range.start, &urange->start); + err |= get_user(range.len, &urange->len); + if (err) + return -EFAULT; + + return __f2fs_ioc_gc_range(file, &range); +} + +struct compat_f2fs_move_range { + u32 dst_fd; + compat_u64 pos_in; + compat_u64 pos_out; + compat_u64 len; +}; +#define F2FS_IOC32_MOVE_RANGE _IOWR(F2FS_IOCTL_MAGIC, 9, \ + struct compat_f2fs_move_range) + +static int f2fs_compat_ioc_move_range(struct file *file, unsigned long arg) +{ + struct compat_f2fs_move_range __user *urange; + struct f2fs_move_range range; + int err; + + urange = compat_ptr(arg); + err = get_user(range.dst_fd, &urange->dst_fd); + err |= get_user(range.pos_in, &urange->pos_in); + err |= get_user(range.pos_out, &urange->pos_out); + err |= get_user(range.len, &urange->len); + if (err) + return -EFAULT; + + return __f2fs_ioc_move_range(file, &range); +} + long f2fs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { + if (unlikely(f2fs_cp_error(F2FS_I_SB(file_inode(file))))) + return -EIO; + if (!f2fs_is_checkpoint_ready(F2FS_I_SB(file_inode(file)))) + return -ENOSPC; + switch (cmd) { case FS_IOC32_GETFLAGS: cmd = FS_IOC_GETFLAGS; @@ -4242,6 +4311,10 @@ long f2fs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) case FS_IOC32_GETVERSION: cmd = FS_IOC_GETVERSION; break; + case F2FS_IOC32_GARBAGE_COLLECT_RANGE: + return f2fs_compat_ioc_gc_range(file, arg); + case F2FS_IOC32_MOVE_RANGE: + return f2fs_compat_ioc_move_range(file, arg); case F2FS_IOC_START_ATOMIC_WRITE: case F2FS_IOC_COMMIT_ATOMIC_WRITE: case F2FS_IOC_START_VOLATILE_WRITE: @@ -4259,10 +4332,8 @@ long f2fs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) case FS_IOC_GET_ENCRYPTION_KEY_STATUS: case FS_IOC_GET_ENCRYPTION_NONCE: case F2FS_IOC_GARBAGE_COLLECT: - case F2FS_IOC_GARBAGE_COLLECT_RANGE: case F2FS_IOC_WRITE_CHECKPOINT: case F2FS_IOC_DEFRAGMENT: - case F2FS_IOC_MOVE_RANGE: case F2FS_IOC_FLUSH_DEVICE: case F2FS_IOC_GET_FEATURES: case FS_IOC_FSGETXATTR: @@ -4285,7 +4356,7 @@ long f2fs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) default: return -ENOIOCTLCMD; } - return f2fs_ioctl(file, cmd, (unsigned long) compat_ptr(arg)); + return __f2fs_ioctl(file, cmd, (unsigned long) compat_ptr(arg)); } #endif -- cgit v1.2.3-58-ga151 From 89ff6005039a878afac87889fee748fa3f957c3a Mon Sep 17 00:00:00 2001 From: Hyeongseok Kim Date: Thu, 12 Nov 2020 18:14:54 +0900 Subject: f2fs: fix double free of unicode map In case of retrying fill_super with skip_recovery, s_encoding for casefold would not be loaded again even though it's already been freed because it's not NULL. Set NULL after free to prevent double freeing when unmount. Fixes: eca4873ee1b6 ("f2fs: Use generic casefolding support") Signed-off-by: Hyeongseok Kim Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/super.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 00eff2f51807..fef22e476c52 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -3918,6 +3918,7 @@ free_bio_info: #ifdef CONFIG_UNICODE utf8_unload(sb->s_encoding); + sb->s_encoding = NULL; #endif free_options: #ifdef CONFIG_QUOTA -- cgit v1.2.3-58-ga151 From 9f7e334aec1671b89df12b98002705cb254de5d9 Mon Sep 17 00:00:00 2001 From: Liu Song Date: Wed, 18 Nov 2020 22:01:04 +0800 Subject: f2fs: remove writeback_inodes_sb in f2fs_remount Since sync_inodes_sb has been used, there is no need to use writeback_inodes_sb, so remove it. Signed-off-by: Liu Song Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/super.c | 1 - 1 file changed, 1 deletion(-) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index fef22e476c52..0ec292d7fcdb 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -1904,7 +1904,6 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data) if (*flags & SB_RDONLY || F2FS_OPTION(sbi).whint_mode != org_mount_opt.whint_mode) { - writeback_inodes_sb(sb, WB_REASON_SYNC); sync_inodes_sb(sb); set_sbi_flag(sbi, SBI_IS_DIRTY); -- cgit v1.2.3-58-ga151 From beb78181f13430ed7a1e621d9faa50e076d7906f Mon Sep 17 00:00:00 2001 From: Zhang Qilong Date: Fri, 20 Nov 2020 21:33:34 +0800 Subject: f2fs: Remove the redundancy initialization There are two assignments are meaningless, and remove them. Signed-off-by: Zhang Qilong Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/acl.c | 2 +- fs/f2fs/checkpoint.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/f2fs/acl.c b/fs/f2fs/acl.c index 306413589827..1e5e9b1136ee 100644 --- a/fs/f2fs/acl.c +++ b/fs/f2fs/acl.c @@ -384,7 +384,7 @@ int f2fs_init_acl(struct inode *inode, struct inode *dir, struct page *ipage, struct page *dpage) { struct posix_acl *default_acl = NULL, *acl = NULL; - int error = 0; + int error; error = f2fs_acl_create(dir, &inode->i_mode, &default_acl, &acl, dpage); if (error) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 023462e80e58..9b0628e0d8bc 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -37,7 +37,7 @@ void f2fs_stop_checkpoint(struct f2fs_sb_info *sbi, bool end_io) struct page *f2fs_grab_meta_page(struct f2fs_sb_info *sbi, pgoff_t index) { struct address_space *mapping = META_MAPPING(sbi); - struct page *page = NULL; + struct page *page; repeat: page = f2fs_grab_cache_page(mapping, index, false); if (!page) { -- cgit v1.2.3-58-ga151 From 608af703519a58f5a7da4273809211cac27edfb2 Mon Sep 17 00:00:00 2001 From: Daniel Rosenberg Date: Thu, 19 Nov 2020 06:09:02 +0000 Subject: libfs: Add generic function for setting dentry_ops This adds a function to set dentry operations at lookup time that will work for both encrypted filenames and casefolded filenames. A filesystem that supports both features simultaneously can use this function during lookup preparations to set up its dentry operations once fscrypt no longer does that itself. Currently the casefolding dentry operation are always set if the filesystem defines an encoding because the features is toggleable on empty directories. Unlike in the encryption case, the dentry operations used come from the parent. Since we don't know what set of functions we'll eventually need, and cannot change them later, we enable the casefolding operations if the filesystem supports them at all. By splitting out the various cases, we support as few dentry operations as we can get away with, maximizing compatibility with overlayfs, which will not function if a filesystem supports certain dentry_operations. Signed-off-by: Daniel Rosenberg Reviewed-by: Theodore Ts'o Reviewed-by: Eric Biggers Reviewed-by: Gabriel Krisman Bertazi Signed-off-by: Jaegeuk Kim --- fs/libfs.c | 70 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ include/linux/fs.h | 1 + 2 files changed, 71 insertions(+) diff --git a/fs/libfs.c b/fs/libfs.c index fc34361c1489..bac918699022 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -1449,4 +1449,74 @@ int generic_ci_d_hash(const struct dentry *dentry, struct qstr *str) return 0; } EXPORT_SYMBOL(generic_ci_d_hash); + +static const struct dentry_operations generic_ci_dentry_ops = { + .d_hash = generic_ci_d_hash, + .d_compare = generic_ci_d_compare, +}; +#endif + +#ifdef CONFIG_FS_ENCRYPTION +static const struct dentry_operations generic_encrypted_dentry_ops = { + .d_revalidate = fscrypt_d_revalidate, +}; +#endif + +#if defined(CONFIG_FS_ENCRYPTION) && defined(CONFIG_UNICODE) +static const struct dentry_operations generic_encrypted_ci_dentry_ops = { + .d_hash = generic_ci_d_hash, + .d_compare = generic_ci_d_compare, + .d_revalidate = fscrypt_d_revalidate, +}; +#endif + +/** + * generic_set_encrypted_ci_d_ops - helper for setting d_ops for given dentry + * @dentry: dentry to set ops on + * + * Casefolded directories need d_hash and d_compare set, so that the dentries + * contained in them are handled case-insensitively. Note that these operations + * are needed on the parent directory rather than on the dentries in it, and + * while the casefolding flag can be toggled on and off on an empty directory, + * dentry_operations can't be changed later. As a result, if the filesystem has + * casefolding support enabled at all, we have to give all dentries the + * casefolding operations even if their inode doesn't have the casefolding flag + * currently (and thus the casefolding ops would be no-ops for now). + * + * Encryption works differently in that the only dentry operation it needs is + * d_revalidate, which it only needs on dentries that have the no-key name flag. + * The no-key flag can't be set "later", so we don't have to worry about that. + * + * Finally, to maximize compatibility with overlayfs (which isn't compatible + * with certain dentry operations) and to avoid taking an unnecessary + * performance hit, we use custom dentry_operations for each possible + * combination rather than always installing all operations. + */ +void generic_set_encrypted_ci_d_ops(struct dentry *dentry) +{ +#ifdef CONFIG_FS_ENCRYPTION + bool needs_encrypt_ops = dentry->d_flags & DCACHE_NOKEY_NAME; +#endif +#ifdef CONFIG_UNICODE + bool needs_ci_ops = dentry->d_sb->s_encoding; +#endif +#if defined(CONFIG_FS_ENCRYPTION) && defined(CONFIG_UNICODE) + if (needs_encrypt_ops && needs_ci_ops) { + d_set_d_op(dentry, &generic_encrypted_ci_dentry_ops); + return; + } #endif +#ifdef CONFIG_FS_ENCRYPTION + if (needs_encrypt_ops) { + d_set_d_op(dentry, &generic_encrypted_dentry_ops); + return; + } +#endif +#ifdef CONFIG_UNICODE + if (needs_ci_ops) { + d_set_d_op(dentry, &generic_ci_dentry_ops); + return; + } +#endif +} +EXPORT_SYMBOL(generic_set_encrypted_ci_d_ops); diff --git a/include/linux/fs.h b/include/linux/fs.h index 21cc971fd960..4a25ab4dbd3e 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -3186,6 +3186,7 @@ extern int generic_ci_d_hash(const struct dentry *dentry, struct qstr *str); extern int generic_ci_d_compare(const struct dentry *dentry, unsigned int len, const char *str, const struct qstr *name); #endif +extern void generic_set_encrypted_ci_d_ops(struct dentry *dentry); #ifdef CONFIG_MIGRATION extern int buffer_migrate_page(struct address_space *, -- cgit v1.2.3-58-ga151 From bb9cd9106b22b4fc5ff8d78a752be8a4ba2cbba5 Mon Sep 17 00:00:00 2001 From: Daniel Rosenberg Date: Thu, 19 Nov 2020 06:09:03 +0000 Subject: fscrypt: Have filesystems handle their d_ops This shifts the responsibility of setting up dentry operations from fscrypt to the individual filesystems, allowing them to have their own operations while still setting fscrypt's d_revalidate as appropriate. Most filesystems can just use generic_set_encrypted_ci_d_ops, unless they have their own specific dentry operations as well. That operation will set the minimal d_ops required under the circumstances. Since the fscrypt d_ops are set later on, we must set all d_ops there, since we cannot adjust those later on. This should not result in any change in behavior. Signed-off-by: Daniel Rosenberg Acked-by: Theodore Ts'o Acked-by: Eric Biggers Signed-off-by: Jaegeuk Kim --- fs/crypto/fname.c | 4 ---- fs/crypto/fscrypt_private.h | 1 - fs/crypto/hooks.c | 1 - fs/ext4/dir.c | 7 ------- fs/ext4/ext4.h | 4 ---- fs/ext4/namei.c | 1 + fs/ext4/super.c | 5 ----- fs/f2fs/dir.c | 7 ------- fs/f2fs/f2fs.h | 3 --- fs/f2fs/namei.c | 1 + fs/f2fs/super.c | 1 - fs/ubifs/dir.c | 1 + include/linux/fscrypt.h | 7 +++++-- 13 files changed, 8 insertions(+), 35 deletions(-) diff --git a/fs/crypto/fname.c b/fs/crypto/fname.c index 1fbe6c24d705..cb3cfa6329ba 100644 --- a/fs/crypto/fname.c +++ b/fs/crypto/fname.c @@ -570,7 +570,3 @@ int fscrypt_d_revalidate(struct dentry *dentry, unsigned int flags) return valid; } EXPORT_SYMBOL_GPL(fscrypt_d_revalidate); - -const struct dentry_operations fscrypt_d_ops = { - .d_revalidate = fscrypt_d_revalidate, -}; diff --git a/fs/crypto/fscrypt_private.h b/fs/crypto/fscrypt_private.h index 4f5806a3b73d..df9c48c1fbf7 100644 --- a/fs/crypto/fscrypt_private.h +++ b/fs/crypto/fscrypt_private.h @@ -294,7 +294,6 @@ int fscrypt_fname_encrypt(const struct inode *inode, const struct qstr *iname, bool fscrypt_fname_encrypted_size(const union fscrypt_policy *policy, u32 orig_len, u32 max_len, u32 *encrypted_len_ret); -extern const struct dentry_operations fscrypt_d_ops; /* hkdf.c */ diff --git a/fs/crypto/hooks.c b/fs/crypto/hooks.c index 20b0df47fe6a..9006fa983335 100644 --- a/fs/crypto/hooks.c +++ b/fs/crypto/hooks.c @@ -117,7 +117,6 @@ int __fscrypt_prepare_lookup(struct inode *dir, struct dentry *dentry, spin_lock(&dentry->d_lock); dentry->d_flags |= DCACHE_NOKEY_NAME; spin_unlock(&dentry->d_lock); - d_set_d_op(dentry, &fscrypt_d_ops); } return err; } diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c index ca50c90adc4c..e757319a4472 100644 --- a/fs/ext4/dir.c +++ b/fs/ext4/dir.c @@ -667,10 +667,3 @@ const struct file_operations ext4_dir_operations = { .open = ext4_dir_open, .release = ext4_release_dir, }; - -#ifdef CONFIG_UNICODE -const struct dentry_operations ext4_dentry_ops = { - .d_hash = generic_ci_d_hash, - .d_compare = generic_ci_d_compare, -}; -#endif diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 45fcdbf538d1..983f2b970d6a 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -3354,10 +3354,6 @@ static inline void ext4_unlock_group(struct super_block *sb, /* dir.c */ extern const struct file_operations ext4_dir_operations; -#ifdef CONFIG_UNICODE -extern const struct dentry_operations ext4_dentry_ops; -#endif - /* file.c */ extern const struct inode_operations ext4_file_inode_operations; extern const struct file_operations ext4_file_operations; diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index f458d1d81d96..8e2398e5d0fe 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -1614,6 +1614,7 @@ static struct buffer_head *ext4_lookup_entry(struct inode *dir, struct buffer_head *bh; err = ext4_fname_prepare_lookup(dir, dentry, &fname); + generic_set_encrypted_ci_d_ops(dentry); if (err == -ENOENT) return NULL; if (err) diff --git a/fs/ext4/super.c b/fs/ext4/super.c index ef4734b40e2a..82b365acedf8 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -4974,11 +4974,6 @@ no_journal: goto failed_mount4; } -#ifdef CONFIG_UNICODE - if (sb->s_encoding) - sb->s_d_op = &ext4_dentry_ops; -#endif - sb->s_root = d_make_root(root); if (!sb->s_root) { ext4_msg(sb, KERN_ERR, "get root dentry failed"); diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index 4b9ef8bbfa4a..71fdf5076461 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -1099,10 +1099,3 @@ const struct file_operations f2fs_dir_operations = { .compat_ioctl = f2fs_compat_ioctl, #endif }; - -#ifdef CONFIG_UNICODE -const struct dentry_operations f2fs_dentry_ops = { - .d_hash = generic_ci_d_hash, - .d_compare = generic_ci_d_compare, -}; -#endif diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 99bcf4b44a9c..01fd42843e49 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -3688,9 +3688,6 @@ static inline void f2fs_update_sit_info(struct f2fs_sb_info *sbi) {} #endif extern const struct file_operations f2fs_dir_operations; -#ifdef CONFIG_UNICODE -extern const struct dentry_operations f2fs_dentry_ops; -#endif extern const struct file_operations f2fs_file_operations; extern const struct inode_operations f2fs_file_inode_operations; extern const struct address_space_operations f2fs_dblock_aops; diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index 8fa37d1434de..6edb1ab579a1 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -497,6 +497,7 @@ static struct dentry *f2fs_lookup(struct inode *dir, struct dentry *dentry, } err = f2fs_prepare_lookup(dir, dentry, &fname); + generic_set_encrypted_ci_d_ops(dentry); if (err == -ENOENT) goto out_splice; if (err) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 0ec292d7fcdb..08e63c8caa1e 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -3426,7 +3426,6 @@ static int f2fs_setup_casefold(struct f2fs_sb_info *sbi) sbi->sb->s_encoding = encoding; sbi->sb->s_encoding_flags = encoding_flags; - sbi->sb->s_d_op = &f2fs_dentry_ops; } #else if (f2fs_sb_has_casefold(sbi)) { diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c index 155521e51ac5..7a920434d741 100644 --- a/fs/ubifs/dir.c +++ b/fs/ubifs/dir.c @@ -203,6 +203,7 @@ static struct dentry *ubifs_lookup(struct inode *dir, struct dentry *dentry, dbg_gen("'%pd' in dir ino %lu", dentry, dir->i_ino); err = fscrypt_prepare_lookup(dir, dentry, &nm); + generic_set_encrypted_ci_d_ops(dentry); if (err == -ENOENT) return d_splice_alias(NULL, dentry); if (err) diff --git a/include/linux/fscrypt.h b/include/linux/fscrypt.h index a8f7a43f031b..e72f80482671 100644 --- a/include/linux/fscrypt.h +++ b/include/linux/fscrypt.h @@ -741,8 +741,11 @@ static inline int fscrypt_prepare_rename(struct inode *old_dir, * directory's encryption key is available, then the lookup is assumed to be by * plaintext name; otherwise, it is assumed to be by no-key name. * - * This also installs a custom ->d_revalidate() method which will invalidate the - * dentry if it was created without the key and the key is later added. + * This will set DCACHE_NOKEY_NAME on the dentry if the lookup is by no-key + * name. In this case the filesystem must assign the dentry a dentry_operations + * which contains fscrypt_d_revalidate (or contains a d_revalidate method that + * calls fscrypt_d_revalidate), so that the dentry will be invalidated if the + * directory's encryption key is later added. * * Return: 0 on success; -ENOENT if the directory's key is unavailable but the * filename isn't a valid no-key name, so a negative dentry should be created; -- cgit v1.2.3-58-ga151 From 7ad08a58bf67594057362e45cbddd3e27e53e557 Mon Sep 17 00:00:00 2001 From: Daniel Rosenberg Date: Thu, 19 Nov 2020 06:09:04 +0000 Subject: f2fs: Handle casefolding with Encryption Expand f2fs's casefolding support to include encrypted directories. To index casefolded+encrypted directories, we use the SipHash of the casefolded name, keyed by a key derived from the directory's fscrypt master key. This ensures that the dirhash doesn't leak information about the plaintext filenames. Encryption keys are unavailable during roll-forward recovery, so we can't compute the dirhash when recovering a new dentry in an encrypted + casefolded directory. To avoid having to force a checkpoint when a new file is fsync'ed, store the dirhash on-disk appended to i_name. This patch incorporates work by Eric Biggers and Jaegeuk Kim . Co-developed-by: Eric Biggers Signed-off-by: Eric Biggers Signed-off-by: Daniel Rosenberg Reviewed-by: Eric Biggers Signed-off-by: Jaegeuk Kim --- fs/f2fs/dir.c | 98 ++++++++++++++++++++++++++++++++++++++++++------------ fs/f2fs/f2fs.h | 8 +++-- fs/f2fs/hash.c | 11 +++++- fs/f2fs/inline.c | 4 +++ fs/f2fs/recovery.c | 12 ++++++- fs/f2fs/super.c | 6 ---- 6 files changed, 106 insertions(+), 33 deletions(-) diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index 71fdf5076461..82b58d1f80eb 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -5,6 +5,7 @@ * Copyright (c) 2012 Samsung Electronics Co., Ltd. * http://www.samsung.com/ */ +#include #include #include #include @@ -206,30 +207,55 @@ static struct f2fs_dir_entry *find_in_block(struct inode *dir, /* * Test whether a case-insensitive directory entry matches the filename * being searched for. + * + * Returns 1 for a match, 0 for no match, and -errno on an error. */ -static bool f2fs_match_ci_name(const struct inode *dir, const struct qstr *name, +static int f2fs_match_ci_name(const struct inode *dir, const struct qstr *name, const u8 *de_name, u32 de_name_len) { const struct super_block *sb = dir->i_sb; const struct unicode_map *um = sb->s_encoding; + struct fscrypt_str decrypted_name = FSTR_INIT(NULL, de_name_len); struct qstr entry = QSTR_INIT(de_name, de_name_len); int res; + if (IS_ENCRYPTED(dir)) { + const struct fscrypt_str encrypted_name = + FSTR_INIT((u8 *)de_name, de_name_len); + + if (WARN_ON_ONCE(!fscrypt_has_encryption_key(dir))) + return -EINVAL; + + decrypted_name.name = kmalloc(de_name_len, GFP_KERNEL); + if (!decrypted_name.name) + return -ENOMEM; + res = fscrypt_fname_disk_to_usr(dir, 0, 0, &encrypted_name, + &decrypted_name); + if (res < 0) + goto out; + entry.name = decrypted_name.name; + entry.len = decrypted_name.len; + } + res = utf8_strncasecmp_folded(um, name, &entry); - if (res < 0) { - /* - * In strict mode, ignore invalid names. In non-strict mode, - * fall back to treating them as opaque byte sequences. - */ - if (sb_has_strict_encoding(sb) || name->len != entry.len) - return false; - return !memcmp(name->name, entry.name, name->len); + /* + * In strict mode, ignore invalid names. In non-strict mode, + * fall back to treating them as opaque byte sequences. + */ + if (res < 0 && !sb_has_strict_encoding(sb)) { + res = name->len == entry.len && + memcmp(name->name, entry.name, name->len) == 0; + } else { + /* utf8_strncasecmp_folded returns 0 on match */ + res = (res == 0); } - return res == 0; +out: + kfree(decrypted_name.name); + return res; } #endif /* CONFIG_UNICODE */ -static inline bool f2fs_match_name(const struct inode *dir, +static inline int f2fs_match_name(const struct inode *dir, const struct f2fs_filename *fname, const u8 *de_name, u32 de_name_len) { @@ -256,6 +282,7 @@ struct f2fs_dir_entry *f2fs_find_target_dentry(const struct f2fs_dentry_ptr *d, struct f2fs_dir_entry *de; unsigned long bit_pos = 0; int max_len = 0; + int res = 0; if (max_slots) *max_slots = 0; @@ -273,10 +300,15 @@ struct f2fs_dir_entry *f2fs_find_target_dentry(const struct f2fs_dentry_ptr *d, continue; } - if (de->hash_code == fname->hash && - f2fs_match_name(d->inode, fname, d->filename[bit_pos], - le16_to_cpu(de->name_len))) - goto found; + if (de->hash_code == fname->hash) { + res = f2fs_match_name(d->inode, fname, + d->filename[bit_pos], + le16_to_cpu(de->name_len)); + if (res < 0) + return ERR_PTR(res); + if (res) + goto found; + } if (max_slots && max_len > *max_slots) *max_slots = max_len; @@ -326,7 +358,11 @@ static struct f2fs_dir_entry *find_in_level(struct inode *dir, } de = find_in_block(dir, dentry_page, fname, &max_slots); - if (de) { + if (IS_ERR(de)) { + *res_page = ERR_CAST(de); + de = NULL; + break; + } else if (de) { *res_page = dentry_page; break; } @@ -448,17 +484,39 @@ void f2fs_set_link(struct inode *dir, struct f2fs_dir_entry *de, f2fs_put_page(page, 1); } -static void init_dent_inode(const struct f2fs_filename *fname, +static void init_dent_inode(struct inode *dir, struct inode *inode, + const struct f2fs_filename *fname, struct page *ipage) { struct f2fs_inode *ri; + if (!fname) /* tmpfile case? */ + return; + f2fs_wait_on_page_writeback(ipage, NODE, true, true); /* copy name info. to this inode page */ ri = F2FS_INODE(ipage); ri->i_namelen = cpu_to_le32(fname->disk_name.len); memcpy(ri->i_name, fname->disk_name.name, fname->disk_name.len); + if (IS_ENCRYPTED(dir)) { + file_set_enc_name(inode); + /* + * Roll-forward recovery doesn't have encryption keys available, + * so it can't compute the dirhash for encrypted+casefolded + * filenames. Append it to i_name if possible. Else, disable + * roll-forward recovery of the dentry (i.e., make fsync'ing the + * file force a checkpoint) by setting LOST_PINO. + */ + if (IS_CASEFOLDED(dir)) { + if (fname->disk_name.len + sizeof(f2fs_hash_t) <= + F2FS_NAME_LEN) + put_unaligned(fname->hash, (f2fs_hash_t *) + &ri->i_name[fname->disk_name.len]); + else + file_lost_pino(inode); + } + } set_page_dirty(ipage); } @@ -541,11 +599,7 @@ struct page *f2fs_init_inode_metadata(struct inode *inode, struct inode *dir, return page; } - if (fname) { - init_dent_inode(fname, page); - if (IS_ENCRYPTED(dir)) - file_set_enc_name(inode); - } + init_dent_inode(dir, inode, fname, page); /* * This file should be checkpointed during fsync. diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 01fd42843e49..c626fb03d5bc 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -454,9 +454,11 @@ struct f2fs_filename { #ifdef CONFIG_UNICODE /* * For casefolded directories: the casefolded name, but it's left NULL - * if the original name is not valid Unicode or if the filesystem is - * doing an internal operation where usr_fname is also NULL. In these - * cases we fall back to treating the name as an opaque byte sequence. + * if the original name is not valid Unicode, if the directory is both + * casefolded and encrypted and its encryption key is unavailable, or if + * the filesystem is doing an internal operation where usr_fname is also + * NULL. In all these cases we fall back to treating the name as an + * opaque byte sequence. */ struct fscrypt_str cf_name; #endif diff --git a/fs/f2fs/hash.c b/fs/f2fs/hash.c index de841aaf3c43..e3beac546c63 100644 --- a/fs/f2fs/hash.c +++ b/fs/f2fs/hash.c @@ -111,7 +111,9 @@ void f2fs_hash_filename(const struct inode *dir, struct f2fs_filename *fname) * If the casefolded name is provided, hash it instead of the * on-disk name. If the casefolded name is *not* provided, that * should only be because the name wasn't valid Unicode, so fall - * back to treating the name as an opaque byte sequence. + * back to treating the name as an opaque byte sequence. Note + * that to handle encrypted directories, the fallback must use + * usr_fname (plaintext) rather than disk_name (ciphertext). */ WARN_ON_ONCE(!fname->usr_fname->name); if (fname->cf_name.name) { @@ -121,6 +123,13 @@ void f2fs_hash_filename(const struct inode *dir, struct f2fs_filename *fname) name = fname->usr_fname->name; len = fname->usr_fname->len; } + if (IS_ENCRYPTED(dir)) { + struct qstr tmp = QSTR_INIT(name, len); + + fname->hash = + cpu_to_le32(fscrypt_fname_siphash(dir, &tmp)); + return; + } } #endif fname->hash = cpu_to_le32(TEA_hash_name(name, len)); diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index 70384e31788d..92e9852d316a 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -332,6 +332,10 @@ struct f2fs_dir_entry *f2fs_find_in_inline_dir(struct inode *dir, make_dentry_ptr_inline(dir, &d, inline_dentry); de = f2fs_find_target_dentry(&d, fname, NULL); unlock_page(ipage); + if (IS_ERR(de)) { + *res_page = ERR_CAST(de); + de = NULL; + } if (de) *res_page = ipage; else diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index 4f12ade6410a..0947d36af1a8 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -5,6 +5,7 @@ * Copyright (c) 2012 Samsung Electronics Co., Ltd. * http://www.samsung.com/ */ +#include #include #include #include "f2fs.h" @@ -128,7 +129,16 @@ static int init_recovered_filename(const struct inode *dir, } /* Compute the hash of the filename */ - if (IS_CASEFOLDED(dir)) { + if (IS_ENCRYPTED(dir) && IS_CASEFOLDED(dir)) { + /* + * In this case the hash isn't computable without the key, so it + * was saved on-disk. + */ + if (fname->disk_name.len + sizeof(f2fs_hash_t) > F2FS_NAME_LEN) + return -EINVAL; + fname->hash = get_unaligned((f2fs_hash_t *) + &raw_inode->i_name[fname->disk_name.len]); + } else if (IS_CASEFOLDED(dir)) { err = f2fs_init_casefolded_name(dir, fname); if (err) return err; diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 08e63c8caa1e..1c282c3da263 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -3398,12 +3398,6 @@ static int f2fs_setup_casefold(struct f2fs_sb_info *sbi) struct unicode_map *encoding; __u16 encoding_flags; - if (f2fs_sb_has_encrypt(sbi)) { - f2fs_err(sbi, - "Can't mount with encoding and encryption"); - return -EINVAL; - } - if (f2fs_sb_read_encoding(sbi->raw_super, &encoding_info, &encoding_flags)) { f2fs_err(sbi, -- cgit v1.2.3-58-ga151 From 8769918bf06c9cfb01a1a6ee0e547c9916881496 Mon Sep 17 00:00:00 2001 From: Sahitya Tummala Date: Mon, 23 Nov 2020 10:58:32 +0530 Subject: f2fs: change to use rwsem for cp_mutex Use rwsem to ensure serialization of the callers and to avoid starvation of high priority tasks, when the system is under heavy IO workload. Signed-off-by: Sahitya Tummala Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 8 ++++---- fs/f2fs/f2fs.h | 2 +- fs/f2fs/gc.c | 4 ++-- fs/f2fs/recovery.c | 4 ++-- fs/f2fs/super.c | 2 +- 5 files changed, 10 insertions(+), 10 deletions(-) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 9b0628e0d8bc..e5d2ffa80b56 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -348,13 +348,13 @@ static int f2fs_write_meta_pages(struct address_space *mapping, goto skip_write; /* if locked failed, cp will flush dirty pages instead */ - if (!mutex_trylock(&sbi->cp_mutex)) + if (!down_write_trylock(&sbi->cp_global_sem)) goto skip_write; trace_f2fs_writepages(mapping->host, wbc, META); diff = nr_pages_to_write(sbi, META, wbc); written = f2fs_sync_meta_pages(sbi, META, wbc->nr_to_write, FS_META_IO); - mutex_unlock(&sbi->cp_mutex); + up_write(&sbi->cp_global_sem); wbc->nr_to_write = max((long)0, wbc->nr_to_write - written - diff); return 0; @@ -1572,7 +1572,7 @@ int f2fs_write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) f2fs_warn(sbi, "Start checkpoint disabled!"); } if (cpc->reason != CP_RESIZE) - mutex_lock(&sbi->cp_mutex); + down_write(&sbi->cp_global_sem); if (!is_sbi_flag_set(sbi, SBI_IS_DIRTY) && ((cpc->reason & CP_FASTBOOT) || (cpc->reason & CP_SYNC) || @@ -1647,7 +1647,7 @@ stop: trace_f2fs_write_checkpoint(sbi->sb, cpc->reason, "finish checkpoint"); out: if (cpc->reason != CP_RESIZE) - mutex_unlock(&sbi->cp_mutex); + up_write(&sbi->cp_global_sem); return err; } diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index c626fb03d5bc..273f068e8899 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1369,7 +1369,7 @@ struct f2fs_sb_info { int cur_cp_pack; /* remain current cp pack */ spinlock_t cp_lock; /* for flag in ckpt */ struct inode *meta_inode; /* cache meta blocks */ - struct mutex cp_mutex; /* checkpoint procedure lock */ + struct rw_semaphore cp_global_sem; /* checkpoint procedure lock */ struct rw_semaphore cp_rwsem; /* blocking FS operations */ struct rw_semaphore node_write; /* locking node writes */ struct rw_semaphore node_change; /* locking node change */ diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 05641a1e36cc..3ef84e6ded41 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -1986,7 +1986,7 @@ int f2fs_resize_fs(struct f2fs_sb_info *sbi, __u64 block_count) freeze_super(sbi->sb); down_write(&sbi->gc_lock); - mutex_lock(&sbi->cp_mutex); + down_write(&sbi->cp_global_sem); spin_lock(&sbi->stat_lock); if (shrunk_blocks + valid_user_blocks(sbi) + @@ -2031,7 +2031,7 @@ recover_out: spin_unlock(&sbi->stat_lock); } out_err: - mutex_unlock(&sbi->cp_mutex); + up_write(&sbi->cp_global_sem); up_write(&sbi->gc_lock); thaw_super(sbi->sb); clear_sbi_flag(sbi, SBI_IS_RESIZEFS); diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index 0947d36af1a8..da75d5d52f0a 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -799,7 +799,7 @@ int f2fs_recover_fsync_data(struct f2fs_sb_info *sbi, bool check_only) INIT_LIST_HEAD(&dir_list); /* prevent checkpoint */ - mutex_lock(&sbi->cp_mutex); + down_write(&sbi->cp_global_sem); /* step #1: find fsynced inode numbers */ err = find_fsync_dnodes(sbi, &inode_list, check_only); @@ -850,7 +850,7 @@ skip: if (!err) clear_sbi_flag(sbi, SBI_POR_DOING); - mutex_unlock(&sbi->cp_mutex); + up_write(&sbi->cp_global_sem); /* let's drop all the directory inodes for clean checkpoint */ destroy_fsync_dnodes(&dir_list, err); diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 1c282c3da263..82baaa89c893 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -3551,7 +3551,7 @@ try_onemore: sbi->valid_super_block = valid_super_block; init_rwsem(&sbi->gc_lock); mutex_init(&sbi->writepages); - mutex_init(&sbi->cp_mutex); + init_rwsem(&sbi->cp_global_sem); init_rwsem(&sbi->node_write); init_rwsem(&sbi->node_change); -- cgit v1.2.3-58-ga151 From 493720a4854343b7c3fe100cda6a3a2c3f8d4b5d Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 25 Nov 2020 10:57:36 +0800 Subject: f2fs: fix to avoid REQ_TIME and CP_TIME collision Lei Li reported a issue: if foreground operations are frequent, background checkpoint may be always skipped due to below check, result in losing more data after sudden power-cut. f2fs_balance_fs_bg() ... if (!is_idle(sbi, REQ_TIME) && (!excess_dirty_nats(sbi) && !excess_dirty_nodes(sbi))) return; E.g: cp_interval = 5 second idle_interval = 2 second foreground operation interval = 1 second (append 1 byte per second into file) In such case, no matter when it calls f2fs_balance_fs_bg(), is_idle(, REQ_TIME) returns false, result in skipping background checkpoint. This patch changes as below to make trigger condition being more reasonable: - trigger sync_fs() if dirty_{nats,nodes} and prefree segs exceeds threshold; - skip triggering sync_fs() if there is any background inflight IO or there is foreground operation recently and meanwhile cp_rwsem is being held by someone; Reported-by: Lei Li Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 19 +++++++++++++------ fs/f2fs/segment.c | 47 +++++++++++++++++++++++++++-------------------- 2 files changed, 40 insertions(+), 26 deletions(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 273f068e8899..0d25f5ca5618 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -2400,24 +2400,31 @@ static inline void *f2fs_kmem_cache_alloc(struct kmem_cache *cachep, return entry; } -static inline bool is_idle(struct f2fs_sb_info *sbi, int type) +static inline bool is_inflight_io(struct f2fs_sb_info *sbi, int type) { - if (sbi->gc_mode == GC_URGENT_HIGH) - return true; - if (get_pages(sbi, F2FS_RD_DATA) || get_pages(sbi, F2FS_RD_NODE) || get_pages(sbi, F2FS_RD_META) || get_pages(sbi, F2FS_WB_DATA) || get_pages(sbi, F2FS_WB_CP_DATA) || get_pages(sbi, F2FS_DIO_READ) || get_pages(sbi, F2FS_DIO_WRITE)) - return false; + return true; if (type != DISCARD_TIME && SM_I(sbi) && SM_I(sbi)->dcc_info && atomic_read(&SM_I(sbi)->dcc_info->queued_discard)) - return false; + return true; if (SM_I(sbi) && SM_I(sbi)->fcc_info && atomic_read(&SM_I(sbi)->fcc_info->queued_flush)) + return true; + return false; +} + +static inline bool is_idle(struct f2fs_sb_info *sbi, int type) +{ + if (sbi->gc_mode == GC_URGENT_HIGH) + return true; + + if (is_inflight_io(sbi, type)) return false; if (sbi->gc_mode == GC_URGENT_LOW && diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 1596502f7375..d9e2e656764d 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -529,31 +529,38 @@ void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi, bool from_bg) else f2fs_build_free_nids(sbi, false, false); - if (!is_idle(sbi, REQ_TIME) && - (!excess_dirty_nats(sbi) && !excess_dirty_nodes(sbi))) + if (excess_dirty_nats(sbi) || excess_dirty_nodes(sbi) || + excess_prefree_segs(sbi)) + goto do_sync; + + /* there is background inflight IO or foreground operation recently */ + if (is_inflight_io(sbi, REQ_TIME) || + (!f2fs_time_over(sbi, REQ_TIME) && rwsem_is_locked(&sbi->cp_rwsem))) return; + /* exceed periodical checkpoint timeout threshold */ + if (f2fs_time_over(sbi, CP_TIME)) + goto do_sync; + /* checkpoint is the only way to shrink partial cached entries */ - if (!f2fs_available_free_memory(sbi, NAT_ENTRIES) || - !f2fs_available_free_memory(sbi, INO_ENTRIES) || - excess_prefree_segs(sbi) || - excess_dirty_nats(sbi) || - excess_dirty_nodes(sbi) || - f2fs_time_over(sbi, CP_TIME)) { - if (test_opt(sbi, DATA_FLUSH) && from_bg) { - struct blk_plug plug; - - mutex_lock(&sbi->flush_lock); - - blk_start_plug(&plug); - f2fs_sync_dirty_inodes(sbi, FILE_INODE); - blk_finish_plug(&plug); + if (f2fs_available_free_memory(sbi, NAT_ENTRIES) || + f2fs_available_free_memory(sbi, INO_ENTRIES)) + return; - mutex_unlock(&sbi->flush_lock); - } - f2fs_sync_fs(sbi->sb, true); - stat_inc_bg_cp_count(sbi->stat_info); +do_sync: + if (test_opt(sbi, DATA_FLUSH) && from_bg) { + struct blk_plug plug; + + mutex_lock(&sbi->flush_lock); + + blk_start_plug(&plug); + f2fs_sync_dirty_inodes(sbi, FILE_INODE); + blk_finish_plug(&plug); + + mutex_unlock(&sbi->flush_lock); } + f2fs_sync_fs(sbi->sb, true); + stat_inc_bg_cp_count(sbi->stat_info); } static int __submit_flush_wait(struct f2fs_sb_info *sbi, -- cgit v1.2.3-58-ga151 From b28f047b28c51d0b9864c34b097bb0b221ea7247 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 26 Nov 2020 18:32:09 +0800 Subject: f2fs: compress: support chksum This patch supports to store chksum value with compressed data, and verify the integrality of compressed data while reading the data. The feature can be enabled through specifying mount option 'compress_chksum'. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- Documentation/filesystems/f2fs.rst | 1 + fs/f2fs/compress.c | 23 +++++++++++++++++++++++ fs/f2fs/f2fs.h | 16 ++++++++++++++-- fs/f2fs/inode.c | 3 +++ fs/f2fs/super.c | 9 +++++++++ include/linux/f2fs_fs.h | 2 +- 6 files changed, 51 insertions(+), 3 deletions(-) diff --git a/Documentation/filesystems/f2fs.rst b/Documentation/filesystems/f2fs.rst index b8ee761c9922..985ae7d35066 100644 --- a/Documentation/filesystems/f2fs.rst +++ b/Documentation/filesystems/f2fs.rst @@ -260,6 +260,7 @@ compress_extension=%s Support adding specified extension, so that f2fs can enab For other files, we can still enable compression via ioctl. Note that, there is one reserved special extension '*', it can be set to enable compression for all files. +compress_chksum Support verifying chksum of raw data in compressed cluster. inlinecrypt When possible, encrypt/decrypt the contents of encrypted files using the blk-crypto framework rather than filesystem-layer encryption. This allows the use of diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c index 14262e0f1cd6..7ec1592a0973 100644 --- a/fs/f2fs/compress.c +++ b/fs/f2fs/compress.c @@ -602,6 +602,7 @@ static int f2fs_compress_pages(struct compress_ctx *cc) f2fs_cops[fi->i_compress_algorithm]; unsigned int max_len, new_nr_cpages; struct page **new_cpages; + u32 chksum = 0; int i, ret; trace_f2fs_compress_pages_start(cc->inode, cc->cluster_idx, @@ -655,6 +656,11 @@ static int f2fs_compress_pages(struct compress_ctx *cc) cc->cbuf->clen = cpu_to_le32(cc->clen); + if (fi->i_compress_flag & 1 << COMPRESS_CHKSUM) + chksum = f2fs_crc32(F2FS_I_SB(cc->inode), + cc->cbuf->cdata, cc->clen); + cc->cbuf->chksum = cpu_to_le32(chksum); + for (i = 0; i < COMPRESS_DATA_RESERVED_SIZE; i++) cc->cbuf->reserved[i] = cpu_to_le32(0); @@ -790,6 +796,23 @@ void f2fs_decompress_pages(struct bio *bio, struct page *page, bool verity) ret = cops->decompress_pages(dic); + if (!ret && fi->i_compress_flag & 1 << COMPRESS_CHKSUM) { + u32 provided = le32_to_cpu(dic->cbuf->chksum); + u32 calculated = f2fs_crc32(sbi, dic->cbuf->cdata, dic->clen); + + if (provided != calculated) { + if (!is_inode_flag_set(dic->inode, FI_COMPRESS_CORRUPT)) { + set_inode_flag(dic->inode, FI_COMPRESS_CORRUPT); + printk_ratelimited( + "%sF2FS-fs (%s): checksum invalid, nid = %lu, %x vs %x", + KERN_INFO, sbi->sb->s_id, dic->inode->i_ino, + provided, calculated); + } + set_sbi_flag(sbi, SBI_NEED_FSCK); + WARN_ON_ONCE(1); + } + } + out_vunmap_cbuf: vm_unmap_ram(dic->cbuf, dic->nr_cpages); out_vunmap_rbuf: diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 0d25f5ca5618..0b314b2034d8 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -147,7 +147,8 @@ struct f2fs_mount_info { /* For compression */ unsigned char compress_algorithm; /* algorithm type */ - unsigned compress_log_size; /* cluster log size */ + unsigned char compress_log_size; /* cluster log size */ + bool compress_chksum; /* compressed data chksum */ unsigned char compress_ext_cnt; /* extension count */ unsigned char extensions[COMPRESS_EXT_NUM][F2FS_EXTENSION_LEN]; /* extensions */ }; @@ -676,6 +677,7 @@ enum { FI_ATOMIC_REVOKE_REQUEST, /* request to drop atomic data */ FI_VERITY_IN_PROGRESS, /* building fs-verity Merkle tree */ FI_COMPRESSED_FILE, /* indicate file's data can be compressed */ + FI_COMPRESS_CORRUPT, /* indicate compressed cluster is corrupted */ FI_MMAP_FILE, /* indicate file was mmapped */ FI_MAX, /* max flag, never be used */ }; @@ -733,6 +735,7 @@ struct f2fs_inode_info { atomic_t i_compr_blocks; /* # of compressed blocks */ unsigned char i_compress_algorithm; /* algorithm type */ unsigned char i_log_cluster_size; /* log of cluster size */ + unsigned short i_compress_flag; /* compress flag */ unsigned int i_cluster_size; /* cluster size */ }; @@ -1272,9 +1275,15 @@ enum compress_algorithm_type { COMPRESS_MAX, }; -#define COMPRESS_DATA_RESERVED_SIZE 5 +enum compress_flag { + COMPRESS_CHKSUM, + COMPRESS_MAX_FLAG, +}; + +#define COMPRESS_DATA_RESERVED_SIZE 4 struct compress_data { __le32 clen; /* compressed data size */ + __le32 chksum; /* compressed data chksum */ __le32 reserved[COMPRESS_DATA_RESERVED_SIZE]; /* reserved */ u8 cdata[]; /* compressed data */ }; @@ -3888,6 +3897,9 @@ static inline void set_compress_context(struct inode *inode) F2FS_OPTION(sbi).compress_algorithm; F2FS_I(inode)->i_log_cluster_size = F2FS_OPTION(sbi).compress_log_size; + F2FS_I(inode)->i_compress_flag = + F2FS_OPTION(sbi).compress_chksum ? + 1 << COMPRESS_CHKSUM : 0; F2FS_I(inode)->i_cluster_size = 1 << F2FS_I(inode)->i_log_cluster_size; F2FS_I(inode)->i_flags |= F2FS_COMPR_FL; diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index 657db2fb6739..349d9cb933ee 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -456,6 +456,7 @@ static int do_read_inode(struct inode *inode) le64_to_cpu(ri->i_compr_blocks)); fi->i_compress_algorithm = ri->i_compress_algorithm; fi->i_log_cluster_size = ri->i_log_cluster_size; + fi->i_compress_flag = le16_to_cpu(ri->i_compress_flag); fi->i_cluster_size = 1 << fi->i_log_cluster_size; set_inode_flag(inode, FI_COMPRESSED_FILE); } @@ -634,6 +635,8 @@ void f2fs_update_inode(struct inode *inode, struct page *node_page) &F2FS_I(inode)->i_compr_blocks)); ri->i_compress_algorithm = F2FS_I(inode)->i_compress_algorithm; + ri->i_compress_flag = + cpu_to_le16(F2FS_I(inode)->i_compress_flag); ri->i_log_cluster_size = F2FS_I(inode)->i_log_cluster_size; } diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 82baaa89c893..f3d919ee4dee 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -146,6 +146,7 @@ enum { Opt_compress_algorithm, Opt_compress_log_size, Opt_compress_extension, + Opt_compress_chksum, Opt_atgc, Opt_err, }; @@ -214,6 +215,7 @@ static match_table_t f2fs_tokens = { {Opt_compress_algorithm, "compress_algorithm=%s"}, {Opt_compress_log_size, "compress_log_size=%u"}, {Opt_compress_extension, "compress_extension=%s"}, + {Opt_compress_chksum, "compress_chksum"}, {Opt_atgc, "atgc"}, {Opt_err, NULL}, }; @@ -934,10 +936,14 @@ static int parse_options(struct super_block *sb, char *options, bool is_remount) F2FS_OPTION(sbi).compress_ext_cnt++; kfree(name); break; + case Opt_compress_chksum: + F2FS_OPTION(sbi).compress_chksum = true; + break; #else case Opt_compress_algorithm: case Opt_compress_log_size: case Opt_compress_extension: + case Opt_compress_chksum: f2fs_info(sbi, "compression options not supported"); break; #endif @@ -1523,6 +1529,9 @@ static inline void f2fs_show_compress_options(struct seq_file *seq, seq_printf(seq, ",compress_extension=%s", F2FS_OPTION(sbi).extensions[i]); } + + if (F2FS_OPTION(sbi).compress_chksum) + seq_puts(seq, ",compress_chksum"); } static int f2fs_show_options(struct seq_file *seq, struct dentry *root) diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h index a5dbb57a687f..7dc2a06cf19a 100644 --- a/include/linux/f2fs_fs.h +++ b/include/linux/f2fs_fs.h @@ -273,7 +273,7 @@ struct f2fs_inode { __le64 i_compr_blocks; /* # of compressed blocks */ __u8 i_compress_algorithm; /* compress algorithm */ __u8 i_log_cluster_size; /* log of cluster size */ - __le16 i_padding; /* padding */ + __le16 i_compress_flag; /* compress flag */ __le32 i_extra_end[0]; /* for attribute size calculation */ } __packed; __le32 i_addr[DEF_ADDRS_PER_INODE]; /* Pointers to data blocks */ -- cgit v1.2.3-58-ga151 From 3a0a9cbc44bbb8a23a3fc12edfd1834452360a50 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Fri, 27 Nov 2020 21:20:06 +0800 Subject: f2fs: fix kbytes written stat for multi-device case For multi-device case, one f2fs image includes multi devices, so it needs to account bytes written of all block devices belong to the image rather than one main block device, fix it. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 27 +++++++++++++++++++++++---- fs/f2fs/f2fs.h | 8 +------- fs/f2fs/super.c | 5 +---- fs/f2fs/sysfs.c | 3 ++- 4 files changed, 27 insertions(+), 16 deletions(-) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index e5d2ffa80b56..14ba1519639e 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -1385,6 +1385,27 @@ static void commit_checkpoint(struct f2fs_sb_info *sbi, f2fs_submit_merged_write(sbi, META_FLUSH); } +static inline u64 get_sectors_written(struct block_device *bdev) +{ + return bdev->bd_part ? + (u64)part_stat_read(bdev->bd_part, sectors[STAT_WRITE]) : 0; +} + +u64 f2fs_get_sectors_written(struct f2fs_sb_info *sbi) +{ + if (f2fs_is_multi_device(sbi)) { + u64 sectors = 0; + int i; + + for (i = 0; i < sbi->s_ndevs; i++) + sectors += get_sectors_written(FDEV(i).bdev); + + return sectors; + } + + return get_sectors_written(sbi->sb->s_bdev); +} + static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) { struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); @@ -1395,7 +1416,6 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) __u32 crc32 = 0; int i; int cp_payload_blks = __cp_payload(sbi); - struct super_block *sb = sbi->sb; struct curseg_info *seg_i = CURSEG_I(sbi, CURSEG_HOT_NODE); u64 kbytes_written; int err; @@ -1490,9 +1510,8 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) /* Record write statistics in the hot node summary */ kbytes_written = sbi->kbytes_written; - if (sb->s_bdev->bd_part) - kbytes_written += BD_PART_WRITTEN(sbi); - + kbytes_written += (f2fs_get_sectors_written(sbi) - + sbi->sectors_written_start) >> 1; seg_i->journal->info.kbytes_written = cpu_to_le64(kbytes_written); if (__remain_node_summaries(cpc->reason)) { diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 0b314b2034d8..90e1fe0ce4bb 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1603,13 +1603,6 @@ static inline bool f2fs_is_multi_device(struct f2fs_sb_info *sbi) return sbi->s_ndevs > 1; } -/* For write statistics. Suppose sector size is 512 bytes, - * and the return value is in kbytes. s is of struct f2fs_sb_info. - */ -#define BD_PART_WRITTEN(s) \ -(((u64)part_stat_read((s)->sb->s_bdev->bd_part, sectors[STAT_WRITE]) - \ - (s)->sectors_written_start) >> 1) - static inline void f2fs_update_time(struct f2fs_sb_info *sbi, int type) { unsigned long now = jiffies; @@ -3382,6 +3375,7 @@ void f2fs_update_dirty_page(struct inode *inode, struct page *page); void f2fs_remove_dirty_inode(struct inode *inode); int f2fs_sync_dirty_inodes(struct f2fs_sb_info *sbi, enum inode_type type); void f2fs_wait_on_all_pages(struct f2fs_sb_info *sbi, int type); +u64 f2fs_get_sectors_written(struct f2fs_sb_info *sbi); int f2fs_write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc); void f2fs_init_ino_entry_info(struct f2fs_sb_info *sbi); int __init f2fs_create_checkpoint_caches(void); diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index f3d919ee4dee..0ac4dbd6488a 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -3701,10 +3701,7 @@ try_onemore: } /* For write statistics */ - if (sb->s_bdev->bd_part) - sbi->sectors_written_start = - (u64)part_stat_read(sb->s_bdev->bd_part, - sectors[STAT_WRITE]); + sbi->sectors_written_start = f2fs_get_sectors_written(sbi); /* Read accumulated write IO statistics if exists */ seg_i = CURSEG_I(sbi, CURSEG_HOT_NODE); diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c index ec77ccfea923..fce2997382af 100644 --- a/fs/f2fs/sysfs.c +++ b/fs/f2fs/sysfs.c @@ -97,7 +97,8 @@ static ssize_t lifetime_write_kbytes_show(struct f2fs_attr *a, return sprintf(buf, "%llu\n", (unsigned long long)(sbi->kbytes_written + - BD_PART_WRITTEN(sbi))); + ((f2fs_get_sectors_written(sbi) - + sbi->sectors_written_start) >> 1))); } static ssize_t features_show(struct f2fs_attr *a, -- cgit v1.2.3-58-ga151 From 6cbfcab5ff51491e67c75fd8d672cff97127fd55 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Tue, 24 Nov 2020 14:55:47 -0800 Subject: f2fs: rename logical_to_blk and blk_to_logical This patch renames two functions like below having u64. - logical_to_blk to bytes_to_blks - blk_to_logical to blks_to_bytes Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 40 ++++++++++++++++++++-------------------- 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index be4da52604ed..a8612c6f40ab 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -1808,14 +1808,14 @@ static int get_data_block_bmap(struct inode *inode, sector_t iblock, NO_CHECK_TYPE, create); } -static inline sector_t logical_to_blk(struct inode *inode, loff_t offset) +static inline u64 bytes_to_blks(struct inode *inode, u64 bytes) { - return (offset >> inode->i_blkbits); + return (bytes >> inode->i_blkbits); } -static inline loff_t blk_to_logical(struct inode *inode, sector_t blk) +static inline u64 blks_to_bytes(struct inode *inode, u64 blks) { - return (blk << inode->i_blkbits); + return (blks << inode->i_blkbits); } static int f2fs_xattr_fiemap(struct inode *inode, @@ -1843,7 +1843,7 @@ static int f2fs_xattr_fiemap(struct inode *inode, return err; } - phys = (__u64)blk_to_logical(inode, ni.blk_addr); + phys = blks_to_bytes(inode, ni.blk_addr); offset = offsetof(struct f2fs_inode, i_addr) + sizeof(__le32) * (DEF_ADDRS_PER_INODE - get_inline_xattr_addrs(inode)); @@ -1875,7 +1875,7 @@ static int f2fs_xattr_fiemap(struct inode *inode, return err; } - phys = (__u64)blk_to_logical(inode, ni.blk_addr); + phys = blks_to_bytes(inode, ni.blk_addr); len = inode->i_sb->s_blocksize; f2fs_put_page(page, 1); @@ -1945,18 +1945,18 @@ int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, goto out; } - if (logical_to_blk(inode, len) == 0) - len = blk_to_logical(inode, 1); + if (bytes_to_blks(inode, len) == 0) + len = blks_to_bytes(inode, 1); - start_blk = logical_to_blk(inode, start); - last_blk = logical_to_blk(inode, start + len - 1); + start_blk = bytes_to_blks(inode, start); + last_blk = bytes_to_blks(inode, start + len - 1); next: memset(&map_bh, 0, sizeof(struct buffer_head)); map_bh.b_size = len; if (compr_cluster) - map_bh.b_size = blk_to_logical(inode, cluster_size - 1); + map_bh.b_size = blks_to_bytes(inode, cluster_size - 1); ret = get_data_block(inode, start_blk, &map_bh, 0, F2FS_GET_BLOCK_FIEMAP, &next_pgofs); @@ -1967,7 +1967,7 @@ next: if (!buffer_mapped(&map_bh)) { start_blk = next_pgofs; - if (blk_to_logical(inode, start_blk) < blk_to_logical(inode, + if (blks_to_bytes(inode, start_blk) < blks_to_bytes(inode, max_inode_blocks(inode))) goto prep_next; @@ -1993,9 +1993,9 @@ next: compr_cluster = false; - logical = blk_to_logical(inode, start_blk - 1); - phys = blk_to_logical(inode, map_bh.b_blocknr); - size = blk_to_logical(inode, cluster_size); + logical = blks_to_bytes(inode, start_blk - 1); + phys = blks_to_bytes(inode, map_bh.b_blocknr); + size = blks_to_bytes(inode, cluster_size); flags |= FIEMAP_EXTENT_ENCODED; @@ -2013,14 +2013,14 @@ next: goto prep_next; } - logical = blk_to_logical(inode, start_blk); - phys = blk_to_logical(inode, map_bh.b_blocknr); + logical = blks_to_bytes(inode, start_blk); + phys = blks_to_bytes(inode, map_bh.b_blocknr); size = map_bh.b_size; flags = 0; if (buffer_unwritten(&map_bh)) flags = FIEMAP_EXTENT_UNWRITTEN; - start_blk += logical_to_blk(inode, size); + start_blk += bytes_to_blks(inode, size); prep_next: cond_resched(); @@ -3903,7 +3903,7 @@ static int check_swap_activate_fast(struct swap_info_struct *sis, * to be very smart. */ cur_lblock = 0; - last_lblock = logical_to_blk(inode, i_size_read(inode)); + last_lblock = bytes_to_blks(inode, i_size_read(inode)); len = i_size_read(inode); while (cur_lblock <= last_lblock && cur_lblock < sis->max) { @@ -3925,7 +3925,7 @@ static int check_swap_activate_fast(struct swap_info_struct *sis, goto err_out; pblock = map_bh.b_blocknr; - nr_pblocks = logical_to_blk(inode, map_bh.b_size); + nr_pblocks = bytes_to_blks(inode, map_bh.b_size); if (cur_lblock + nr_pblocks >= sis->max) nr_pblocks = sis->max - cur_lblock; -- cgit v1.2.3-58-ga151 From 43b9d4b4d91c02ca1159507d46e3687d98b4cb8f Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Tue, 24 Nov 2020 15:04:27 -0800 Subject: f2fs: use new conversion functions between blks and bytes This patch cleans up blks and bytes conversions. Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 46 +++++++++++++++++++++------------------------- 1 file changed, 21 insertions(+), 25 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index a8612c6f40ab..a84e5bc09337 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -1750,6 +1750,16 @@ bool f2fs_overwrite_io(struct inode *inode, loff_t pos, size_t len) return true; } +static inline u64 bytes_to_blks(struct inode *inode, u64 bytes) +{ + return (bytes >> inode->i_blkbits); +} + +static inline u64 blks_to_bytes(struct inode *inode, u64 blks) +{ + return (blks << inode->i_blkbits); +} + static int __get_data_block(struct inode *inode, sector_t iblock, struct buffer_head *bh, int create, int flag, pgoff_t *next_pgofs, int seg_type, bool may_write) @@ -1758,7 +1768,7 @@ static int __get_data_block(struct inode *inode, sector_t iblock, int err; map.m_lblk = iblock; - map.m_len = bh->b_size >> inode->i_blkbits; + map.m_len = bytes_to_blks(inode, bh->b_size); map.m_next_pgofs = next_pgofs; map.m_next_extent = NULL; map.m_seg_type = seg_type; @@ -1768,7 +1778,7 @@ static int __get_data_block(struct inode *inode, sector_t iblock, if (!err) { map_bh(bh, inode->i_sb, map.m_pblk); bh->b_state = (bh->b_state & ~F2FS_MAP_FLAGS) | map.m_flags; - bh->b_size = (u64)map.m_len << inode->i_blkbits; + bh->b_size = blks_to_bytes(inode, map.m_len); } return err; } @@ -1808,16 +1818,6 @@ static int get_data_block_bmap(struct inode *inode, sector_t iblock, NO_CHECK_TYPE, create); } -static inline u64 bytes_to_blks(struct inode *inode, u64 bytes) -{ - return (bytes >> inode->i_blkbits); -} - -static inline u64 blks_to_bytes(struct inode *inode, u64 blks) -{ - return (blks << inode->i_blkbits); -} - static int f2fs_xattr_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo) { @@ -2053,8 +2053,7 @@ static int f2fs_read_single_page(struct inode *inode, struct page *page, bool is_readahead) { struct bio *bio = *bio_ret; - const unsigned blkbits = inode->i_blkbits; - const unsigned blocksize = 1 << blkbits; + const unsigned blocksize = blks_to_bytes(inode, 1); sector_t block_in_file; sector_t last_block; sector_t last_block_in_file; @@ -2063,8 +2062,8 @@ static int f2fs_read_single_page(struct inode *inode, struct page *page, block_in_file = (sector_t)page_index(page); last_block = block_in_file + nr_pages; - last_block_in_file = (f2fs_readpage_limit(inode) + blocksize - 1) >> - blkbits; + last_block_in_file = bytes_to_blks(inode, + f2fs_readpage_limit(inode) + blocksize - 1); if (last_block > last_block_in_file) last_block = last_block_in_file; @@ -2177,16 +2176,15 @@ int f2fs_read_multi_pages(struct compress_ctx *cc, struct bio **bio_ret, struct bio *bio = *bio_ret; unsigned int start_idx = cc->cluster_idx << cc->log_cluster_size; sector_t last_block_in_file; - const unsigned blkbits = inode->i_blkbits; - const unsigned blocksize = 1 << blkbits; + const unsigned blocksize = blks_to_bytes(inode, 1); struct decompress_io_ctx *dic = NULL; int i; int ret = 0; f2fs_bug_on(sbi, f2fs_cluster_is_empty(cc)); - last_block_in_file = (f2fs_readpage_limit(inode) + - blocksize - 1) >> blkbits; + last_block_in_file = bytes_to_blks(inode, + f2fs_readpage_limit(inode) + blocksize - 1); /* get rid of pages beyond EOF */ for (i = 0; i < cc->cluster_size; i++) { @@ -3968,7 +3966,6 @@ static int check_swap_activate(struct swap_info_struct *sis, struct inode *inode = mapping->host; unsigned blocks_per_page; unsigned long page_no; - unsigned blkbits; sector_t probe_block; sector_t last_block; sector_t lowest_block = -1; @@ -3979,8 +3976,7 @@ static int check_swap_activate(struct swap_info_struct *sis, if (PAGE_SIZE == F2FS_BLKSIZE) return check_swap_activate_fast(sis, swap_file, span); - blkbits = inode->i_blkbits; - blocks_per_page = PAGE_SIZE >> blkbits; + blocks_per_page = bytes_to_blks(inode, PAGE_SIZE); /* * Map all the blocks into the extent list. This code doesn't try @@ -3988,7 +3984,7 @@ static int check_swap_activate(struct swap_info_struct *sis, */ probe_block = 0; page_no = 0; - last_block = i_size_read(inode) >> blkbits; + last_block = bytes_to_blks(inode, i_size_read(inode)); while ((probe_block + blocks_per_page) <= last_block && page_no < sis->max) { unsigned block_in_page; @@ -4028,7 +4024,7 @@ static int check_swap_activate(struct swap_info_struct *sis, } } - first_block >>= (PAGE_SHIFT - blkbits); + first_block >>= (PAGE_SHIFT - inode->i_blkbits); if (page_no) { /* exclude the header page */ if (first_block < lowest_block) lowest_block = first_block; -- cgit v1.2.3-58-ga151 From 963ba7f983767d79e914cd616c3e57be1938677e Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Tue, 24 Nov 2020 15:09:07 -0800 Subject: f2fs: fix wrong block count instead of bytes We should convert cur_lblock, a block count, to bytes for len. Fixes: af4b6b8edf6a ("f2fs: introduce check_swap_activate_fast()") Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index a84e5bc09337..e49c14ccfafe 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -3893,7 +3893,7 @@ static int check_swap_activate_fast(struct swap_info_struct *sis, sector_t highest_pblock = 0; int nr_extents = 0; unsigned long nr_pblocks; - unsigned long len; + u64 len; int ret; /* @@ -3911,7 +3911,7 @@ static int check_swap_activate_fast(struct swap_info_struct *sis, cond_resched(); memset(&map_bh, 0, sizeof(struct buffer_head)); - map_bh.b_size = len - cur_lblock; + map_bh.b_size = len - blks_to_bytes(inode, cur_lblock); ret = get_data_block(inode, cur_lblock, &map_bh, 0, F2FS_GET_BLOCK_FIEMAP, &next_pgofs); -- cgit v1.2.3-58-ga151 From b876f4c94c3d1688edea021d45a528571499e0b9 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Tue, 24 Nov 2020 15:19:10 -0800 Subject: f2fs: remove buffer_head which has 32bits limit This patch removes buffer_head dependency when getting block addresses. Light reported there's a 32bit issue in f2fs_fiemap where map_bh.b_size is 32bits while len is 64bits given by user. This will give wrong length to f2fs_map_block. Reported-by: Light Hsieh Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 76 ++++++++++++++++++++++++++-------------------------------- 1 file changed, 34 insertions(+), 42 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index e49c14ccfafe..bfe0d787c9e6 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -1783,15 +1783,6 @@ static int __get_data_block(struct inode *inode, sector_t iblock, return err; } -static int get_data_block(struct inode *inode, sector_t iblock, - struct buffer_head *bh_result, int create, int flag, - pgoff_t *next_pgofs) -{ - return __get_data_block(inode, iblock, bh_result, create, - flag, next_pgofs, - NO_CHECK_TYPE, create); -} - static int get_data_block_dio_write(struct inode *inode, sector_t iblock, struct buffer_head *bh_result, int create) { @@ -1810,14 +1801,6 @@ static int get_data_block_dio(struct inode *inode, sector_t iblock, false); } -static int get_data_block_bmap(struct inode *inode, sector_t iblock, - struct buffer_head *bh_result, int create) -{ - return __get_data_block(inode, iblock, bh_result, create, - F2FS_GET_BLOCK_BMAP, NULL, - NO_CHECK_TYPE, create); -} - static int f2fs_xattr_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo) { @@ -1913,7 +1896,7 @@ static loff_t max_inode_blocks(struct inode *inode) int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, u64 start, u64 len) { - struct buffer_head map_bh; + struct f2fs_map_blocks map; sector_t start_blk, last_blk; pgoff_t next_pgofs; u64 logical = 0, phys = 0, size = 0; @@ -1952,19 +1935,21 @@ int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, last_blk = bytes_to_blks(inode, start + len - 1); next: - memset(&map_bh, 0, sizeof(struct buffer_head)); - map_bh.b_size = len; + memset(&map, 0, sizeof(map)); + map.m_lblk = start_blk; + map.m_len = bytes_to_blks(inode, len); + map.m_next_pgofs = &next_pgofs; + map.m_seg_type = NO_CHECK_TYPE; if (compr_cluster) - map_bh.b_size = blks_to_bytes(inode, cluster_size - 1); + map.m_len = cluster_size - 1; - ret = get_data_block(inode, start_blk, &map_bh, 0, - F2FS_GET_BLOCK_FIEMAP, &next_pgofs); + ret = f2fs_map_blocks(inode, &map, 0, F2FS_GET_BLOCK_FIEMAP); if (ret) goto out; /* HOLE */ - if (!buffer_mapped(&map_bh)) { + if (!(map.m_flags & F2FS_MAP_FLAGS)) { start_blk = next_pgofs; if (blks_to_bytes(inode, start_blk) < blks_to_bytes(inode, @@ -1994,7 +1979,7 @@ next: logical = blks_to_bytes(inode, start_blk - 1); - phys = blks_to_bytes(inode, map_bh.b_blocknr); + phys = blks_to_bytes(inode, map.m_pblk); size = blks_to_bytes(inode, cluster_size); flags |= FIEMAP_EXTENT_ENCODED; @@ -2007,17 +1992,17 @@ next: goto prep_next; } - if (map_bh.b_blocknr == COMPRESS_ADDR) { + if (map.m_pblk == COMPRESS_ADDR) { compr_cluster = true; start_blk++; goto prep_next; } logical = blks_to_bytes(inode, start_blk); - phys = blks_to_bytes(inode, map_bh.b_blocknr); - size = map_bh.b_size; + phys = blks_to_bytes(inode, map.m_pblk); + size = blks_to_bytes(inode, map.m_len); flags = 0; - if (buffer_unwritten(&map_bh)) + if (map.m_flags & F2FS_MAP_UNWRITTEN) flags = FIEMAP_EXTENT_UNWRITTEN; start_blk += bytes_to_blks(inode, size); @@ -3797,9 +3782,6 @@ static sector_t f2fs_bmap_compress(struct inode *inode, sector_t block) static sector_t f2fs_bmap(struct address_space *mapping, sector_t block) { struct inode *inode = mapping->host; - struct buffer_head tmp = { - .b_size = i_blocksize(inode), - }; sector_t blknr = 0; if (f2fs_has_inline_data(inode)) @@ -3816,8 +3798,16 @@ static sector_t f2fs_bmap(struct address_space *mapping, sector_t block) if (f2fs_compressed_file(inode)) { blknr = f2fs_bmap_compress(inode, block); } else { - if (!get_data_block_bmap(inode, block, &tmp, 0)) - blknr = tmp.b_blocknr; + struct f2fs_map_blocks map; + + memset(&map, 0, sizeof(map)); + map.m_lblk = block; + map.m_len = 1; + map.m_next_pgofs = NULL; + map.m_seg_type = NO_CHECK_TYPE; + + if (!f2fs_map_blocks(inode, &map, 0, F2FS_GET_BLOCK_BMAP)) + blknr = map.m_pblk; } out: trace_f2fs_bmap(inode, block, blknr); @@ -3905,25 +3895,27 @@ static int check_swap_activate_fast(struct swap_info_struct *sis, len = i_size_read(inode); while (cur_lblock <= last_lblock && cur_lblock < sis->max) { - struct buffer_head map_bh; + struct f2fs_map_blocks map; pgoff_t next_pgofs; cond_resched(); - memset(&map_bh, 0, sizeof(struct buffer_head)); - map_bh.b_size = len - blks_to_bytes(inode, cur_lblock); + memset(&map, 0, sizeof(map)); + map.m_lblk = cur_lblock; + map.m_len = bytes_to_blks(inode, len) - cur_lblock; + map.m_next_pgofs = &next_pgofs; + map.m_seg_type = NO_CHECK_TYPE; - ret = get_data_block(inode, cur_lblock, &map_bh, 0, - F2FS_GET_BLOCK_FIEMAP, &next_pgofs); + ret = f2fs_map_blocks(inode, &map, 0, F2FS_GET_BLOCK_FIEMAP); if (ret) goto err_out; /* hole */ - if (!buffer_mapped(&map_bh)) + if (!(map.m_flags & F2FS_MAP_FLAGS)) goto err_out; - pblock = map_bh.b_blocknr; - nr_pblocks = bytes_to_blks(inode, map_bh.b_size); + pblock = map.m_pblk; + nr_pblocks = map.m_len; if (cur_lblock + nr_pblocks >= sis->max) nr_pblocks = sis->max - cur_lblock; -- cgit v1.2.3-58-ga151 From 5335bfc6eb688344bfcd4b4133c002c0ae0d0719 Mon Sep 17 00:00:00 2001 From: Jack Qiu Date: Tue, 1 Dec 2020 15:45:47 +0800 Subject: f2fs: init dirty_secmap incorrectly section is dirty, but dirty_secmap may not set Reported-by: Jia Yang Fixes: da52f8ade40b ("f2fs: get the right gc victim section when section has several segments") Cc: Signed-off-by: Jack Qiu Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index d9e2e656764d..bfc597037413 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -4551,7 +4551,7 @@ static void init_dirty_segmap(struct f2fs_sb_info *sbi) return; mutex_lock(&dirty_i->seglist_lock); - for (segno = 0; segno < MAIN_SECS(sbi); segno += blks_per_sec) { + for (segno = 0; segno < MAIN_SEGS(sbi); segno += sbi->segs_per_sec) { valid_blocks = get_valid_blocks(sbi, segno, true); secno = GET_SEC_FROM_SEG(sbi, segno); -- cgit v1.2.3-58-ga151 From db48965264110dd74d1436fc21dac328d04385d2 Mon Sep 17 00:00:00 2001 From: Yangtao Li Date: Tue, 1 Dec 2020 15:17:39 +0800 Subject: f2fs: Remove unnecessary unlikely() WARN_ON() already contains an unlikely(), so it's not necessary to use unlikely. Signed-off-by: Yangtao Li Signed-off-by: Shuosheng Huang Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 90e1fe0ce4bb..4417f791dbc6 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -33,10 +33,8 @@ #else #define f2fs_bug_on(sbi, condition) \ do { \ - if (unlikely(condition)) { \ - WARN_ON(1); \ + if (WARN_ON(condition)) \ set_sbi_flag(sbi, SBI_NEED_FSCK); \ - } \ } while (0) #endif -- cgit v1.2.3-58-ga151 From 602a16d58e9aab3c423bcf051033ea6c9e8a6d37 Mon Sep 17 00:00:00 2001 From: Daeho Jeong Date: Tue, 1 Dec 2020 13:08:02 +0900 Subject: f2fs: add compress_mode mount option We will add a new "compress_mode" mount option to control file compression mode. This supports "fs" and "user". In "fs" mode (default), f2fs does automatic compression on the compression enabled files. In "user" mode, f2fs disables the automaic compression and gives the user discretion of choosing the target file and the timing. It means the user can do manual compression/decompression on the compression enabled files using ioctls. Signed-off-by: Daeho Jeong Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- Documentation/filesystems/f2fs.rst | 35 +++++++++++++++++++++++++++++++++++ fs/f2fs/compress.c | 2 +- fs/f2fs/data.c | 2 +- fs/f2fs/f2fs.h | 30 ++++++++++++++++++++++++++++++ fs/f2fs/segment.c | 2 +- fs/f2fs/super.c | 23 +++++++++++++++++++++++ 6 files changed, 91 insertions(+), 3 deletions(-) diff --git a/Documentation/filesystems/f2fs.rst b/Documentation/filesystems/f2fs.rst index 985ae7d35066..dae15c96e659 100644 --- a/Documentation/filesystems/f2fs.rst +++ b/Documentation/filesystems/f2fs.rst @@ -261,6 +261,13 @@ compress_extension=%s Support adding specified extension, so that f2fs can enab Note that, there is one reserved special extension '*', it can be set to enable compression for all files. compress_chksum Support verifying chksum of raw data in compressed cluster. +compress_mode=%s Control file compression mode. This supports "fs" and "user" + modes. In "fs" mode (default), f2fs does automatic compression + on the compression enabled files. In "user" mode, f2fs disables + the automaic compression and gives the user discretion of + choosing the target file and the timing. The user can do manual + compression/decompression on the compression enabled files using + ioctls. inlinecrypt When possible, encrypt/decrypt the contents of encrypted files using the blk-crypto framework rather than filesystem-layer encryption. This allows the use of @@ -811,6 +818,34 @@ Compress metadata layout:: | data length | data chksum | reserved | compressed data | +-------------+-------------+----------+----------------------------+ +Compression mode +-------------------------- + +f2fs supports "fs" and "user" compression modes with "compression_mode" mount option. +With this option, f2fs provides a choice to select the way how to compress the +compression enabled files (refer to "Compression implementation" section for how to +enable compression on a regular inode). + +1) compress_mode=fs +This is the default option. f2fs does automatic compression in the writeback of the +compression enabled files. + +2) compress_mode=user +This disables the automaic compression and gives the user discretion of choosing the +target file and the timing. The user can do manual compression/decompression on the +compression enabled files using F2FS_IOC_DECOMPRESS_FILE and F2FS_IOC_COMPRESS_FILE +ioctls like the below. + +To decompress a file, + +fd = open(filename, O_WRONLY, 0); +ret = ioctl(fd, F2FS_IOC_DECOMPRESS_FILE); + +To compress a file, + +fd = open(filename, O_WRONLY, 0); +ret = ioctl(fd, F2FS_IOC_COMPRESS_FILE); + NVMe Zoned Namespace devices ---------------------------- diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c index 7ec1592a0973..d23bebb6ccd3 100644 --- a/fs/f2fs/compress.c +++ b/fs/f2fs/compress.c @@ -944,7 +944,7 @@ int f2fs_is_compressed_cluster(struct inode *inode, pgoff_t index) static bool cluster_may_compress(struct compress_ctx *cc) { - if (!f2fs_compressed_file(cc->inode)) + if (!f2fs_need_compress_data(cc->inode)) return false; if (f2fs_is_atomic_file(cc->inode)) return false; diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index bfe0d787c9e6..e85fd8f77f3f 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -3147,7 +3147,7 @@ static inline bool __should_serialize_io(struct inode *inode, if (IS_NOQUOTA(inode)) return false; - if (f2fs_compressed_file(inode)) + if (f2fs_need_compress_data(inode)) return true; if (wbc->sync_mode != WB_SYNC_ALL) return true; diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 4417f791dbc6..7c02b6d8465c 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -148,6 +148,7 @@ struct f2fs_mount_info { unsigned char compress_log_size; /* cluster log size */ bool compress_chksum; /* compressed data chksum */ unsigned char compress_ext_cnt; /* extension count */ + int compress_mode; /* compression mode */ unsigned char extensions[COMPRESS_EXT_NUM][F2FS_EXTENSION_LEN]; /* extensions */ }; @@ -677,6 +678,7 @@ enum { FI_COMPRESSED_FILE, /* indicate file's data can be compressed */ FI_COMPRESS_CORRUPT, /* indicate compressed cluster is corrupted */ FI_MMAP_FILE, /* indicate file was mmapped */ + FI_ENABLE_COMPRESS, /* enable compression in "user" compression mode */ FI_MAX, /* max flag, never be used */ }; @@ -1244,6 +1246,18 @@ enum fsync_mode { FSYNC_MODE_NOBARRIER, /* fsync behaves nobarrier based on posix */ }; +enum { + COMPR_MODE_FS, /* + * automatically compress compression + * enabled files + */ + COMPR_MODE_USER, /* + * automatical compression is disabled. + * user can control the file compression + * using ioctls + */ +}; + /* * this value is set in page as a private data which indicate that * the page is atomically written, and it is in inmem_pages list. @@ -2759,6 +2773,22 @@ static inline int f2fs_compressed_file(struct inode *inode) is_inode_flag_set(inode, FI_COMPRESSED_FILE); } +static inline bool f2fs_need_compress_data(struct inode *inode) +{ + int compress_mode = F2FS_OPTION(F2FS_I_SB(inode)).compress_mode; + + if (!f2fs_compressed_file(inode)) + return false; + + if (compress_mode == COMPR_MODE_FS) + return true; + else if (compress_mode == COMPR_MODE_USER && + is_inode_flag_set(inode, FI_ENABLE_COMPRESS)) + return true; + + return false; +} + static inline unsigned int addrs_per_inode(struct inode *inode) { unsigned int addrs = CUR_ADDRS_PER_INODE(inode) - diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index bfc597037413..deca74cb17df 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -3261,7 +3261,7 @@ static int __get_segment_type_6(struct f2fs_io_info *fio) else return CURSEG_COLD_DATA; } - if (file_is_cold(inode) || f2fs_compressed_file(inode)) + if (file_is_cold(inode) || f2fs_need_compress_data(inode)) return CURSEG_COLD_DATA; if (file_is_hot(inode) || is_inode_flag_set(inode, FI_HOT_DATA) || diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 0ac4dbd6488a..c6a3365f4844 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -147,6 +147,7 @@ enum { Opt_compress_log_size, Opt_compress_extension, Opt_compress_chksum, + Opt_compress_mode, Opt_atgc, Opt_err, }; @@ -216,6 +217,7 @@ static match_table_t f2fs_tokens = { {Opt_compress_log_size, "compress_log_size=%u"}, {Opt_compress_extension, "compress_extension=%s"}, {Opt_compress_chksum, "compress_chksum"}, + {Opt_compress_mode, "compress_mode=%s"}, {Opt_atgc, "atgc"}, {Opt_err, NULL}, }; @@ -939,11 +941,26 @@ static int parse_options(struct super_block *sb, char *options, bool is_remount) case Opt_compress_chksum: F2FS_OPTION(sbi).compress_chksum = true; break; + case Opt_compress_mode: + name = match_strdup(&args[0]); + if (!name) + return -ENOMEM; + if (!strcmp(name, "fs")) { + F2FS_OPTION(sbi).compress_mode = COMPR_MODE_FS; + } else if (!strcmp(name, "user")) { + F2FS_OPTION(sbi).compress_mode = COMPR_MODE_USER; + } else { + kfree(name); + return -EINVAL; + } + kfree(name); + break; #else case Opt_compress_algorithm: case Opt_compress_log_size: case Opt_compress_extension: case Opt_compress_chksum: + case Opt_compress_mode: f2fs_info(sbi, "compression options not supported"); break; #endif @@ -1532,6 +1549,11 @@ static inline void f2fs_show_compress_options(struct seq_file *seq, if (F2FS_OPTION(sbi).compress_chksum) seq_puts(seq, ",compress_chksum"); + + if (F2FS_OPTION(sbi).compress_mode == COMPR_MODE_FS) + seq_printf(seq, ",compress_mode=%s", "fs"); + else if (F2FS_OPTION(sbi).compress_mode == COMPR_MODE_USER) + seq_printf(seq, ",compress_mode=%s", "user"); } static int f2fs_show_options(struct seq_file *seq, struct dentry *root) @@ -1681,6 +1703,7 @@ static void default_options(struct f2fs_sb_info *sbi) F2FS_OPTION(sbi).compress_algorithm = COMPRESS_LZ4; F2FS_OPTION(sbi).compress_log_size = MIN_COMPRESS_LOG_SIZE; F2FS_OPTION(sbi).compress_ext_cnt = 0; + F2FS_OPTION(sbi).compress_mode = COMPR_MODE_FS; F2FS_OPTION(sbi).bggc_mode = BGGC_MODE_ON; sbi->sb->s_flags &= ~SB_INLINECRYPT; -- cgit v1.2.3-58-ga151 From 5fdb322ff2c2b4ad519f490dcb7ebb96c5439af7 Mon Sep 17 00:00:00 2001 From: Daeho Jeong Date: Thu, 3 Dec 2020 15:56:15 +0900 Subject: f2fs: add F2FS_IOC_DECOMPRESS_FILE and F2FS_IOC_COMPRESS_FILE Added two ioctl to decompress/compress explicitly the compression enabled file in "compress_mode=user" mount option. Using these two ioctls, the users can make a control of compression and decompression of their files. Signed-off-by: Daeho Jeong Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/file.c | 185 ++++++++++++++++++++++++++++++++++++++++++++++ include/uapi/linux/f2fs.h | 2 + 2 files changed, 187 insertions(+) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 8241b8bdb33c..16ea10f2bcf5 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -4026,6 +4026,185 @@ out: return ret; } +static int redirty_blocks(struct inode *inode, pgoff_t page_idx, int len) +{ + DEFINE_READAHEAD(ractl, NULL, inode->i_mapping, page_idx); + struct address_space *mapping = inode->i_mapping; + struct page *page; + pgoff_t redirty_idx = page_idx; + int i, page_len = 0, ret = 0; + + page_cache_ra_unbounded(&ractl, len, 0); + + for (i = 0; i < len; i++, page_idx++) { + page = read_cache_page(mapping, page_idx, NULL, NULL); + if (IS_ERR(page)) { + ret = PTR_ERR(page); + break; + } + page_len++; + } + + for (i = 0; i < page_len; i++, redirty_idx++) { + page = find_lock_page(mapping, redirty_idx); + if (!page) + ret = -ENOENT; + set_page_dirty(page); + f2fs_put_page(page, 1); + f2fs_put_page(page, 0); + } + + return ret; +} + +static int f2fs_ioc_decompress_file(struct file *filp, unsigned long arg) +{ + struct inode *inode = file_inode(filp); + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + struct f2fs_inode_info *fi = F2FS_I(inode); + pgoff_t page_idx = 0, last_idx; + unsigned int blk_per_seg = sbi->blocks_per_seg; + int cluster_size = F2FS_I(inode)->i_cluster_size; + int count, ret; + + if (!f2fs_sb_has_compression(sbi) || + F2FS_OPTION(sbi).compress_mode != COMPR_MODE_USER) + return -EOPNOTSUPP; + + if (!(filp->f_mode & FMODE_WRITE)) + return -EBADF; + + if (!f2fs_compressed_file(inode)) + return -EINVAL; + + f2fs_balance_fs(F2FS_I_SB(inode), true); + + file_start_write(filp); + inode_lock(inode); + + if (!f2fs_is_compress_backend_ready(inode)) { + ret = -EOPNOTSUPP; + goto out; + } + + if (f2fs_is_mmap_file(inode)) { + ret = -EBUSY; + goto out; + } + + ret = filemap_write_and_wait_range(inode->i_mapping, 0, LLONG_MAX); + if (ret) + goto out; + + if (!atomic_read(&fi->i_compr_blocks)) + goto out; + + last_idx = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE); + + count = last_idx - page_idx; + while (count) { + int len = min(cluster_size, count); + + ret = redirty_blocks(inode, page_idx, len); + if (ret < 0) + break; + + if (get_dirty_pages(inode) >= blk_per_seg) + filemap_fdatawrite(inode->i_mapping); + + count -= len; + page_idx += len; + } + + if (!ret) + ret = filemap_write_and_wait_range(inode->i_mapping, 0, + LLONG_MAX); + + if (ret) + f2fs_warn(sbi, "%s: The file might be partially decompressed " + "(errno=%d). Please delete the file.\n", + __func__, ret); +out: + inode_unlock(inode); + file_end_write(filp); + + return ret; +} + +static int f2fs_ioc_compress_file(struct file *filp, unsigned long arg) +{ + struct inode *inode = file_inode(filp); + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + pgoff_t page_idx = 0, last_idx; + unsigned int blk_per_seg = sbi->blocks_per_seg; + int cluster_size = F2FS_I(inode)->i_cluster_size; + int count, ret; + + if (!f2fs_sb_has_compression(sbi) || + F2FS_OPTION(sbi).compress_mode != COMPR_MODE_USER) + return -EOPNOTSUPP; + + if (!(filp->f_mode & FMODE_WRITE)) + return -EBADF; + + if (!f2fs_compressed_file(inode)) + return -EINVAL; + + f2fs_balance_fs(F2FS_I_SB(inode), true); + + file_start_write(filp); + inode_lock(inode); + + if (!f2fs_is_compress_backend_ready(inode)) { + ret = -EOPNOTSUPP; + goto out; + } + + if (f2fs_is_mmap_file(inode)) { + ret = -EBUSY; + goto out; + } + + ret = filemap_write_and_wait_range(inode->i_mapping, 0, LLONG_MAX); + if (ret) + goto out; + + set_inode_flag(inode, FI_ENABLE_COMPRESS); + + last_idx = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE); + + count = last_idx - page_idx; + while (count) { + int len = min(cluster_size, count); + + ret = redirty_blocks(inode, page_idx, len); + if (ret < 0) + break; + + if (get_dirty_pages(inode) >= blk_per_seg) + filemap_fdatawrite(inode->i_mapping); + + count -= len; + page_idx += len; + } + + if (!ret) + ret = filemap_write_and_wait_range(inode->i_mapping, 0, + LLONG_MAX); + + clear_inode_flag(inode, FI_ENABLE_COMPRESS); + + if (ret) + f2fs_warn(sbi, "%s: The file might be partially compressed " + "(errno=%d). Please delete the file.\n", + __func__, ret); +out: + inode_unlock(inode); + file_end_write(filp); + + return ret; +} + static long __f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { switch (cmd) { @@ -4113,6 +4292,10 @@ static long __f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) return f2fs_ioc_get_compress_option(filp, arg); case F2FS_IOC_SET_COMPRESS_OPTION: return f2fs_ioc_set_compress_option(filp, arg); + case F2FS_IOC_DECOMPRESS_FILE: + return f2fs_ioc_decompress_file(filp, arg); + case F2FS_IOC_COMPRESS_FILE: + return f2fs_ioc_compress_file(filp, arg); default: return -ENOTTY; } @@ -4352,6 +4535,8 @@ long f2fs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) case F2FS_IOC_SEC_TRIM_FILE: case F2FS_IOC_GET_COMPRESS_OPTION: case F2FS_IOC_SET_COMPRESS_OPTION: + case F2FS_IOC_DECOMPRESS_FILE: + case F2FS_IOC_COMPRESS_FILE: break; default: return -ENOIOCTLCMD; diff --git a/include/uapi/linux/f2fs.h b/include/uapi/linux/f2fs.h index f00199a2e38b..352a822d4370 100644 --- a/include/uapi/linux/f2fs.h +++ b/include/uapi/linux/f2fs.h @@ -40,6 +40,8 @@ struct f2fs_comp_option) #define F2FS_IOC_SET_COMPRESS_OPTION _IOW(F2FS_IOCTL_MAGIC, 22, \ struct f2fs_comp_option) +#define F2FS_IOC_DECOMPRESS_FILE _IO(F2FS_IOCTL_MAGIC, 23) +#define F2FS_IOC_COMPRESS_FILE _IO(F2FS_IOCTL_MAGIC, 24) /* * should be same as XFS_IOC_GOINGDOWN. -- cgit v1.2.3-58-ga151 From a95ba66ac1457b76fe472c8e092ab1006271f16c Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Fri, 6 Nov 2020 13:22:05 -0800 Subject: f2fs: avoid race condition for shrinker count Light reported sometimes shinker gets nat_cnt < dirty_nat_cnt resulting in wrong do_shinker work. Let's avoid to return insane overflowed value by adding single tracking value. Reported-by: Light Hsieh Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 2 +- fs/f2fs/debug.c | 11 ++++++----- fs/f2fs/f2fs.h | 10 ++++++++-- fs/f2fs/node.c | 29 ++++++++++++++++++----------- fs/f2fs/node.h | 4 ++-- fs/f2fs/shrinker.c | 4 +--- 6 files changed, 36 insertions(+), 24 deletions(-) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 14ba1519639e..617d0f6b0836 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -1619,7 +1619,7 @@ int f2fs_write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) goto out; } - if (NM_I(sbi)->dirty_nat_cnt == 0 && + if (NM_I(sbi)->nat_cnt[DIRTY_NAT] == 0 && SIT_I(sbi)->dirty_sentries == 0 && prefree_segments(sbi) == 0) { f2fs_flush_sit_entries(sbi, cpc); diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c index a8357fd4f5fa..197c914119da 100644 --- a/fs/f2fs/debug.c +++ b/fs/f2fs/debug.c @@ -145,8 +145,8 @@ static void update_general_status(struct f2fs_sb_info *sbi) si->node_pages = NODE_MAPPING(sbi)->nrpages; if (sbi->meta_inode) si->meta_pages = META_MAPPING(sbi)->nrpages; - si->nats = NM_I(sbi)->nat_cnt; - si->dirty_nats = NM_I(sbi)->dirty_nat_cnt; + si->nats = NM_I(sbi)->nat_cnt[TOTAL_NAT]; + si->dirty_nats = NM_I(sbi)->nat_cnt[DIRTY_NAT]; si->sits = MAIN_SEGS(sbi); si->dirty_sits = SIT_I(sbi)->dirty_sentries; si->free_nids = NM_I(sbi)->nid_cnt[FREE_NID]; @@ -278,9 +278,10 @@ get_cache: si->cache_mem += (NM_I(sbi)->nid_cnt[FREE_NID] + NM_I(sbi)->nid_cnt[PREALLOC_NID]) * sizeof(struct free_nid); - si->cache_mem += NM_I(sbi)->nat_cnt * sizeof(struct nat_entry); - si->cache_mem += NM_I(sbi)->dirty_nat_cnt * - sizeof(struct nat_entry_set); + si->cache_mem += NM_I(sbi)->nat_cnt[TOTAL_NAT] * + sizeof(struct nat_entry); + si->cache_mem += NM_I(sbi)->nat_cnt[DIRTY_NAT] * + sizeof(struct nat_entry_set); si->cache_mem += si->inmem_pages * sizeof(struct inmem_pages); for (i = 0; i < MAX_INO_ENTRY; i++) si->cache_mem += sbi->im[i].ino_num * sizeof(struct ino_entry); diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 7c02b6d8465c..36090cd09011 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -820,6 +820,13 @@ enum nid_state { MAX_NID_STATE, }; +enum nat_state { + TOTAL_NAT, + DIRTY_NAT, + RECLAIMABLE_NAT, + MAX_NAT_STATE, +}; + struct f2fs_nm_info { block_t nat_blkaddr; /* base disk address of NAT */ nid_t max_nid; /* maximum possible node ids */ @@ -835,8 +842,7 @@ struct f2fs_nm_info { struct rw_semaphore nat_tree_lock; /* protect nat_tree_lock */ struct list_head nat_entries; /* cached nat entry list (clean) */ spinlock_t nat_list_lock; /* protect clean nat entry list */ - unsigned int nat_cnt; /* the # of cached nat entries */ - unsigned int dirty_nat_cnt; /* total num of nat entries in set */ + unsigned int nat_cnt[MAX_NAT_STATE]; /* the # of cached nat entries */ unsigned int nat_blocks; /* # of nat blocks */ /* free node ids management */ diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 42394de6c7eb..e65d73293a3f 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -62,8 +62,8 @@ bool f2fs_available_free_memory(struct f2fs_sb_info *sbi, int type) sizeof(struct free_nid)) >> PAGE_SHIFT; res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 2); } else if (type == NAT_ENTRIES) { - mem_size = (nm_i->nat_cnt * sizeof(struct nat_entry)) >> - PAGE_SHIFT; + mem_size = (nm_i->nat_cnt[TOTAL_NAT] * + sizeof(struct nat_entry)) >> PAGE_SHIFT; res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 2); if (excess_cached_nats(sbi)) res = false; @@ -177,7 +177,8 @@ static struct nat_entry *__init_nat_entry(struct f2fs_nm_info *nm_i, list_add_tail(&ne->list, &nm_i->nat_entries); spin_unlock(&nm_i->nat_list_lock); - nm_i->nat_cnt++; + nm_i->nat_cnt[TOTAL_NAT]++; + nm_i->nat_cnt[RECLAIMABLE_NAT]++; return ne; } @@ -207,7 +208,8 @@ static unsigned int __gang_lookup_nat_cache(struct f2fs_nm_info *nm_i, static void __del_from_nat_cache(struct f2fs_nm_info *nm_i, struct nat_entry *e) { radix_tree_delete(&nm_i->nat_root, nat_get_nid(e)); - nm_i->nat_cnt--; + nm_i->nat_cnt[TOTAL_NAT]--; + nm_i->nat_cnt[RECLAIMABLE_NAT]--; __free_nat_entry(e); } @@ -253,7 +255,8 @@ static void __set_nat_cache_dirty(struct f2fs_nm_info *nm_i, if (get_nat_flag(ne, IS_DIRTY)) goto refresh_list; - nm_i->dirty_nat_cnt++; + nm_i->nat_cnt[DIRTY_NAT]++; + nm_i->nat_cnt[RECLAIMABLE_NAT]--; set_nat_flag(ne, IS_DIRTY, true); refresh_list: spin_lock(&nm_i->nat_list_lock); @@ -273,7 +276,8 @@ static void __clear_nat_cache_dirty(struct f2fs_nm_info *nm_i, set_nat_flag(ne, IS_DIRTY, false); set->entry_cnt--; - nm_i->dirty_nat_cnt--; + nm_i->nat_cnt[DIRTY_NAT]--; + nm_i->nat_cnt[RECLAIMABLE_NAT]++; } static unsigned int __gang_lookup_nat_set(struct f2fs_nm_info *nm_i, @@ -2944,14 +2948,17 @@ int f2fs_flush_nat_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc) LIST_HEAD(sets); int err = 0; - /* during unmount, let's flush nat_bits before checking dirty_nat_cnt */ + /* + * during unmount, let's flush nat_bits before checking + * nat_cnt[DIRTY_NAT]. + */ if (enabled_nat_bits(sbi, cpc)) { down_write(&nm_i->nat_tree_lock); remove_nats_in_journal(sbi); up_write(&nm_i->nat_tree_lock); } - if (!nm_i->dirty_nat_cnt) + if (!nm_i->nat_cnt[DIRTY_NAT]) return 0; down_write(&nm_i->nat_tree_lock); @@ -2962,7 +2969,8 @@ int f2fs_flush_nat_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc) * into nat entry set. */ if (enabled_nat_bits(sbi, cpc) || - !__has_cursum_space(journal, nm_i->dirty_nat_cnt, NAT_JOURNAL)) + !__has_cursum_space(journal, + nm_i->nat_cnt[DIRTY_NAT], NAT_JOURNAL)) remove_nats_in_journal(sbi); while ((found = __gang_lookup_nat_set(nm_i, @@ -3086,7 +3094,6 @@ static int init_node_manager(struct f2fs_sb_info *sbi) F2FS_RESERVED_NODE_NUM; nm_i->nid_cnt[FREE_NID] = 0; nm_i->nid_cnt[PREALLOC_NID] = 0; - nm_i->nat_cnt = 0; nm_i->ram_thresh = DEF_RAM_THRESHOLD; nm_i->ra_nid_pages = DEF_RA_NID_PAGES; nm_i->dirty_nats_ratio = DEF_DIRTY_NAT_RATIO_THRESHOLD; @@ -3220,7 +3227,7 @@ void f2fs_destroy_node_manager(struct f2fs_sb_info *sbi) __del_from_nat_cache(nm_i, natvec[idx]); } } - f2fs_bug_on(sbi, nm_i->nat_cnt); + f2fs_bug_on(sbi, nm_i->nat_cnt[TOTAL_NAT]); /* destroy nat set cache */ nid = 0; diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h index 69e5859e993c..f84541b57acb 100644 --- a/fs/f2fs/node.h +++ b/fs/f2fs/node.h @@ -126,13 +126,13 @@ static inline void raw_nat_from_node_info(struct f2fs_nat_entry *raw_ne, static inline bool excess_dirty_nats(struct f2fs_sb_info *sbi) { - return NM_I(sbi)->dirty_nat_cnt >= NM_I(sbi)->max_nid * + return NM_I(sbi)->nat_cnt[DIRTY_NAT] >= NM_I(sbi)->max_nid * NM_I(sbi)->dirty_nats_ratio / 100; } static inline bool excess_cached_nats(struct f2fs_sb_info *sbi) { - return NM_I(sbi)->nat_cnt >= DEF_NAT_CACHE_THRESHOLD; + return NM_I(sbi)->nat_cnt[TOTAL_NAT] >= DEF_NAT_CACHE_THRESHOLD; } static inline bool excess_dirty_nodes(struct f2fs_sb_info *sbi) diff --git a/fs/f2fs/shrinker.c b/fs/f2fs/shrinker.c index d66de5999a26..dd3c3c7a90ec 100644 --- a/fs/f2fs/shrinker.c +++ b/fs/f2fs/shrinker.c @@ -18,9 +18,7 @@ static unsigned int shrinker_run_no; static unsigned long __count_nat_entries(struct f2fs_sb_info *sbi) { - long count = NM_I(sbi)->nat_cnt - NM_I(sbi)->dirty_nat_cnt; - - return count > 0 ? count : 0; + return NM_I(sbi)->nat_cnt[RECLAIMABLE_NAT]; } static unsigned long __count_free_nids(struct f2fs_sb_info *sbi) -- cgit v1.2.3-58-ga151 From ec2ddf499402a665d1f6f7f5ce1391100e54089e Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 3 Dec 2020 09:14:28 -0800 Subject: f2fs: don't allow any writes on readonly mount generic_make_request: Trying to write to read-only block-device dm-5 (partno 0) WARNING: CPU: 7 PID: 546 at block/blk-core.c:2190 generic_make_request_checks+0x664/0x690 pc : generic_make_request_checks+0x664/0x690 lr : generic_make_request_checks+0x664/0x690 Call trace: generic_make_request_checks+0x664/0x690 generic_make_request+0xf0/0x3a4 submit_bio+0x80/0x250 __submit_merged_bio+0x368/0x4e0 __submit_merged_write_cond.llvm.12294350193007536502+0xe0/0x3e8 f2fs_wait_on_page_writeback+0x84/0x128 f2fs_convert_inline_page+0x35c/0x6f8 f2fs_convert_inline_inode+0xe0/0x2e0 f2fs_file_mmap+0x48/0x9c mmap_region+0x41c/0x74c do_mmap+0x40c/0x4fc vm_mmap_pgoff+0xb8/0x114 vm_mmap+0x34/0x48 elf_map+0x68/0x108 load_elf_binary+0x538/0xb70 search_binary_handler+0xac/0x1dc exec_binprm+0x50/0x15c __do_execve_file+0x620/0x740 __arm64_sys_execve+0x54/0x68 el0_svc_common+0x9c/0x168 el0_svc_handler+0x60/0x6c el0_svc+0x8/0xc Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/inline.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index 92e9852d316a..d09a0bdc0197 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -188,7 +188,8 @@ int f2fs_convert_inline_inode(struct inode *inode) struct page *ipage, *page; int err = 0; - if (!f2fs_has_inline_data(inode)) + if (!f2fs_has_inline_data(inode) || + f2fs_hw_is_readonly(sbi) || f2fs_readonly(sbi->sb)) return 0; page = f2fs_grab_cache_page(inode->i_mapping, 0, false); -- cgit v1.2.3-58-ga151 From 10208567f11bd572331cbbcb9a89c61a143811a1 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 3 Dec 2020 09:52:45 -0800 Subject: f2fs: introduce max_io_bytes, a sysfs entry, to limit bio size This patch adds max_io_bytes to limit bio size when f2fs tries to merge consecutive IOs. This can give a testing point to split out bios and check end_io handles those bios correctly. This is used to capture a recent bug on the decompression and fsverity flow. Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- Documentation/ABI/testing/sysfs-fs-f2fs | 7 +++++++ fs/f2fs/data.c | 3 +++ fs/f2fs/f2fs.h | 1 + fs/f2fs/sysfs.c | 2 ++ 4 files changed, 13 insertions(+) diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs b/Documentation/ABI/testing/sysfs-fs-f2fs index 67b3ed8e8c2f..3dfee94e0618 100644 --- a/Documentation/ABI/testing/sysfs-fs-f2fs +++ b/Documentation/ABI/testing/sysfs-fs-f2fs @@ -370,3 +370,10 @@ Date: April 2020 Contact: "Daeho Jeong" Description: Give a way to change iostat_period time. 3secs by default. The new iostat trace gives stats gap given the period. +What: /sys/fs/f2fs//max_io_bytes +Date: December 2020 +Contact: "Jaegeuk Kim" +Description: This gives a control to limit the bio size in f2fs. + Default is zero, which will follow underlying block layer limit, + whereas, if it has a certain bytes value, f2fs won't submit a + bio larger than that size. diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index e85fd8f77f3f..cb28089e1eff 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -736,6 +736,9 @@ int f2fs_submit_page_bio(struct f2fs_io_info *fio) static bool page_is_mergeable(struct f2fs_sb_info *sbi, struct bio *bio, block_t last_blkaddr, block_t cur_blkaddr) { + if (unlikely(sbi->max_io_bytes && + bio->bi_iter.bi_size >= sbi->max_io_bytes)) + return false; if (last_blkaddr + 1 != cur_blkaddr) return false; return __same_bdev(sbi, cur_blkaddr, bio); diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 36090cd09011..594df6391390 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1446,6 +1446,7 @@ struct f2fs_sb_info { loff_t max_file_blocks; /* max block index of file */ int dir_level; /* directory level */ int readdir_ra; /* readahead inode in readdir */ + u64 max_io_bytes; /* max io bytes to merge IOs */ block_t user_block_count; /* # of user blocks */ block_t total_valid_block_count; /* # of valid blocks */ diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c index fce2997382af..989a649cfa8b 100644 --- a/fs/f2fs/sysfs.c +++ b/fs/f2fs/sysfs.c @@ -567,6 +567,7 @@ F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, iostat_enable, iostat_enable); F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, iostat_period_ms, iostat_period_ms); F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, readdir_ra, readdir_ra); +F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, max_io_bytes, max_io_bytes); F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, gc_pin_file_thresh, gc_pin_file_threshold); F2FS_RW_ATTR(F2FS_SBI, f2fs_super_block, extension_list, extension_list); #ifdef CONFIG_F2FS_FAULT_INJECTION @@ -651,6 +652,7 @@ static struct attribute *f2fs_attrs[] = { ATTR_LIST(iostat_enable), ATTR_LIST(iostat_period_ms), ATTR_LIST(readdir_ra), + ATTR_LIST(max_io_bytes), ATTR_LIST(gc_pin_file_thresh), ATTR_LIST(extension_list), #ifdef CONFIG_F2FS_FAULT_INJECTION -- cgit v1.2.3-58-ga151 From b9ec10948ff66f33e52d6617eae3c960cfd90638 Mon Sep 17 00:00:00 2001 From: Yangtao Li Date: Mon, 7 Dec 2020 18:59:33 +0800 Subject: f2fs: convert to F2FS_*_INO macro Use F2FS_ROOT_INO, F2FS_NODE_INO and F2FS_META_INO macro for better code readability. Signed-off-by: Yangtao Li Signed-off-by: Shaohua Liu Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/super.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index c6a3365f4844..b49eec7758d8 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -3102,9 +3102,9 @@ static void init_sb_info(struct f2fs_sb_info *sbi) sbi->total_node_count = (le32_to_cpu(raw_super->segment_count_nat) / 2) * sbi->blocks_per_seg * NAT_ENTRY_PER_BLOCK; - sbi->root_ino_num = le32_to_cpu(raw_super->root_ino); - sbi->node_ino_num = le32_to_cpu(raw_super->node_ino); - sbi->meta_ino_num = le32_to_cpu(raw_super->meta_ino); + F2FS_ROOT_INO(sbi) = le32_to_cpu(raw_super->root_ino); + F2FS_NODE_INO(sbi) = le32_to_cpu(raw_super->node_ino); + F2FS_META_INO(sbi) = le32_to_cpu(raw_super->meta_ino); sbi->cur_victim_sec = NULL_SECNO; sbi->next_victim_seg[BG_GC] = NULL_SEGNO; sbi->next_victim_seg[FG_GC] = NULL_SEGNO; -- cgit v1.2.3-58-ga151 From d540e35d4e547776ea78d51f614ec38ed2824fbe Mon Sep 17 00:00:00 2001 From: Yangtao Li Date: Mon, 7 Dec 2020 18:59:34 +0800 Subject: f2fs: don't check PAGE_SIZE again in sanity_check_raw_super() Many flash devices read and write a single IO based on a multiple of 4KB, and we support only 4KB page cache size now. Since we already check page size in init_f2fs_fs(), so remove page size check in sanity_check_raw_super(). Signed-off-by: Yangtao Li Signed-off-by: Shaohua Liu Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/super.c | 7 ------- 1 file changed, 7 deletions(-) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index b49eec7758d8..8a82721b69ef 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -2801,13 +2801,6 @@ static int sanity_check_raw_super(struct f2fs_sb_info *sbi, } } - /* Currently, support only 4KB page cache size */ - if (F2FS_BLKSIZE != PAGE_SIZE) { - f2fs_info(sbi, "Invalid page_cache_size (%lu), supports only 4KB", - PAGE_SIZE); - return -EFSCORRUPTED; - } - /* Currently, support only 4KB block size */ blocksize = 1 << le32_to_cpu(raw_super->log_blocksize); if (blocksize != F2FS_BLKSIZE) { -- cgit v1.2.3-58-ga151 From 6e5ca4fce7b3fc6065593a3a58f734a14d5a44c3 Mon Sep 17 00:00:00 2001 From: Jack Qiu Date: Mon, 7 Dec 2020 20:01:12 +0800 Subject: f2fs: inline: correct comment in f2fs_recover_inline_data In 3rd scene, it should remove data blocks instead of inline_data. Signed-off-by: Jack Qiu Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/inline.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index d09a0bdc0197..9822f56359aa 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -267,7 +267,7 @@ int f2fs_recover_inline_data(struct inode *inode, struct page *npage) * [prev.] [next] of inline_data flag * o o -> recover inline_data * o x -> remove inline_data, and then recover data blocks - * x o -> remove inline_data, and then recover inline_data + * x o -> remove data blocks, and then recover inline_data * x x -> recover data blocks */ if (IS_INODE(npage)) -- cgit v1.2.3-58-ga151 From 84921561532f771bd338e7791bf275b2b605d642 Mon Sep 17 00:00:00 2001 From: Jack Qiu Date: Mon, 7 Dec 2020 20:01:14 +0800 Subject: f2fs: inline: fix wrong inline inode stat Miss to stat inline inode in f2fs_recover_inline_data. Signed-off-by: Jack Qiu Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/inline.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index 9822f56359aa..806ebabf5870 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -299,6 +299,7 @@ process_inline: if (IS_ERR(ipage)) return PTR_ERR(ipage); f2fs_truncate_inline_inode(inode, ipage, 0); + stat_dec_inline_inode(inode); clear_inode_flag(inode, FI_INLINE_DATA); f2fs_put_page(ipage, 1); } else if (ri && (ri->i_inline & F2FS_INLINE_DATA)) { @@ -307,6 +308,7 @@ process_inline: ret = f2fs_truncate_blocks(inode, 0, false); if (ret) return ret; + stat_inc_inline_inode(inode); goto process_inline; } return 0; -- cgit v1.2.3-58-ga151 From 96dd02519580faa731066351f438a2f967d9a0ee Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Mon, 7 Dec 2020 17:54:41 +0800 Subject: f2fs: fix to account inline xattr correctly during recovery During recovery, we may missed to update inline xattr count correctly, fix it. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/node.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index e65d73293a3f..3a24423ac65f 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -2594,9 +2594,15 @@ int f2fs_recover_inline_xattr(struct inode *inode, struct page *page) ri = F2FS_INODE(page); if (ri->i_inline & F2FS_INLINE_XATTR) { - set_inode_flag(inode, FI_INLINE_XATTR); + if (!f2fs_has_inline_xattr(inode)) { + set_inode_flag(inode, FI_INLINE_XATTR); + stat_inc_inline_xattr(inode); + } } else { - clear_inode_flag(inode, FI_INLINE_XATTR); + if (f2fs_has_inline_xattr(inode)) { + stat_dec_inline_xattr(inode); + clear_inode_flag(inode, FI_INLINE_XATTR); + } goto update_inode; } -- cgit v1.2.3-58-ga151 From 6422a71ef40e4751d59b8c9412e7e2dafe085878 Mon Sep 17 00:00:00 2001 From: Daeho Jeong Date: Sat, 5 Dec 2020 13:26:26 +0900 Subject: f2fs: fix race of pending_pages in decompression I found out f2fs_free_dic() is invoked in a wrong timing, but f2fs_verify_bio() still needed the dic info and it triggered the below kernel panic. It has been caused by the race condition of pending_pages value between decompression and verity logic, when the same compression cluster had been split in different bios. By split bios, f2fs_verify_bio() ended up with decreasing pending_pages value before it is reset to nr_cpages by f2fs_decompress_pages() and caused the kernel panic. [ 4416.564763] Unable to handle kernel NULL pointer dereference at virtual address 0000000000000000 ... [ 4416.896016] Workqueue: fsverity_read_queue f2fs_verity_work [ 4416.908515] pc : fsverity_verify_page+0x20/0x78 [ 4416.913721] lr : f2fs_verify_bio+0x11c/0x29c [ 4416.913722] sp : ffffffc019533cd0 [ 4416.913723] x29: ffffffc019533cd0 x28: 0000000000000402 [ 4416.913724] x27: 0000000000000001 x26: 0000000000000100 [ 4416.913726] x25: 0000000000000001 x24: 0000000000000004 [ 4416.913727] x23: 0000000000001000 x22: 0000000000000000 [ 4416.913728] x21: 0000000000000000 x20: ffffffff2076f9c0 [ 4416.913729] x19: ffffffff2076f9c0 x18: ffffff8a32380c30 [ 4416.913731] x17: ffffffc01f966d97 x16: 0000000000000298 [ 4416.913732] x15: 0000000000000000 x14: 0000000000000000 [ 4416.913733] x13: f074faec89ffffff x12: 0000000000000000 [ 4416.913734] x11: 0000000000001000 x10: 0000000000001000 [ 4416.929176] x9 : ffffffff20d1f5c7 x8 : 0000000000000000 [ 4416.929178] x7 : 626d7464ff286b6b x6 : ffffffc019533ade [ 4416.929179] x5 : 000000008049000e x4 : ffffffff2793e9e0 [ 4416.929180] x3 : 000000008049000e x2 : ffffff89ecfa74d0 [ 4416.929181] x1 : 0000000000000c40 x0 : ffffffff2076f9c0 [ 4416.929184] Call trace: [ 4416.929187] fsverity_verify_page+0x20/0x78 [ 4416.929189] f2fs_verify_bio+0x11c/0x29c [ 4416.929192] f2fs_verity_work+0x58/0x84 [ 4417.050667] process_one_work+0x270/0x47c [ 4417.055354] worker_thread+0x27c/0x4d8 [ 4417.059784] kthread+0x13c/0x320 [ 4417.063693] ret_from_fork+0x10/0x18 Chao pointed this can happen by the below race condition. Thread A f2fs_post_read_wq fsverity_wq - f2fs_read_multi_pages() - f2fs_alloc_dic - dic->pending_pages = 2 - submit_bio() - submit_bio() - f2fs_post_read_work() handle first bio - f2fs_decompress_work() - __read_end_io() - f2fs_decompress_pages() - dic->pending_pages-- - enqueue f2fs_verity_work() - f2fs_verity_work() handle first bio - f2fs_verify_bio() - dic->pending_pages-- - f2fs_post_read_work() handle second bio - f2fs_decompress_work() - enqueue f2fs_verity_work() - f2fs_verify_pages() - f2fs_free_dic() - f2fs_verity_work() handle second bio - f2fs_verfy_bio() - use-after-free on dic Signed-off-by: Daeho Jeong Signed-off-by: Jaegeuk Kim --- fs/f2fs/compress.c | 2 -- fs/f2fs/data.c | 58 +++++++++++++++++++++++++++++++++++++++++++----------- fs/f2fs/f2fs.h | 1 + 3 files changed, 48 insertions(+), 13 deletions(-) diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c index d23bebb6ccd3..f05d409fd0ed 100644 --- a/fs/f2fs/compress.c +++ b/fs/f2fs/compress.c @@ -821,8 +821,6 @@ destroy_decompress_ctx: if (cops->destroy_decompress_ctx) cops->destroy_decompress_ctx(dic); out_free_dic: - if (verity) - atomic_set(&dic->pending_pages, dic->nr_cpages); if (!verity) f2fs_decompress_end_io(dic->rpages, dic->cluster_size, ret, false); diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index cb28089e1eff..aa34d620bec9 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -202,7 +202,7 @@ static void f2fs_verify_bio(struct bio *bio) dic = (struct decompress_io_ctx *)page_private(page); if (dic) { - if (atomic_dec_return(&dic->pending_pages)) + if (atomic_dec_return(&dic->verity_pages)) continue; f2fs_verify_pages(dic->rpages, dic->cluster_size); @@ -1030,7 +1030,8 @@ static inline bool f2fs_need_verity(const struct inode *inode, pgoff_t idx) static struct bio *f2fs_grab_read_bio(struct inode *inode, block_t blkaddr, unsigned nr_pages, unsigned op_flag, - pgoff_t first_idx, bool for_write) + pgoff_t first_idx, bool for_write, + bool for_verity) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct bio *bio; @@ -1052,7 +1053,7 @@ static struct bio *f2fs_grab_read_bio(struct inode *inode, block_t blkaddr, post_read_steps |= 1 << STEP_DECRYPT; if (f2fs_compressed_file(inode)) post_read_steps |= 1 << STEP_DECOMPRESS_NOWQ; - if (f2fs_need_verity(inode, first_idx)) + if (for_verity && f2fs_need_verity(inode, first_idx)) post_read_steps |= 1 << STEP_VERITY; if (post_read_steps) { @@ -1082,7 +1083,7 @@ static int f2fs_submit_page_read(struct inode *inode, struct page *page, struct bio *bio; bio = f2fs_grab_read_bio(inode, blkaddr, 1, op_flags, - page->index, for_write); + page->index, for_write, true); if (IS_ERR(bio)) return PTR_ERR(bio); @@ -2120,7 +2121,7 @@ submit_and_realloc: if (bio == NULL) { bio = f2fs_grab_read_bio(inode, block_nr, nr_pages, is_readahead ? REQ_RAHEAD : 0, page->index, - false); + false, true); if (IS_ERR(bio)) { ret = PTR_ERR(bio); bio = NULL; @@ -2166,6 +2167,8 @@ int f2fs_read_multi_pages(struct compress_ctx *cc, struct bio **bio_ret, sector_t last_block_in_file; const unsigned blocksize = blks_to_bytes(inode, 1); struct decompress_io_ctx *dic = NULL; + struct bio_post_read_ctx *ctx; + bool for_verity = false; int i; int ret = 0; @@ -2231,10 +2234,29 @@ int f2fs_read_multi_pages(struct compress_ctx *cc, struct bio **bio_ret, goto out_put_dnode; } + /* + * It's possible to enable fsverity on the fly when handling a cluster, + * which requires complicated error handling. Instead of adding more + * complexity, let's give a rule where end_io post-processes fsverity + * per cluster. In order to do that, we need to submit bio, if previous + * bio sets a different post-process policy. + */ + if (fsverity_active(cc->inode)) { + atomic_set(&dic->verity_pages, cc->nr_cpages); + for_verity = true; + + if (bio) { + ctx = bio->bi_private; + if (!(ctx->enabled_steps & (1 << STEP_VERITY))) { + __submit_bio(sbi, bio, DATA); + bio = NULL; + } + } + } + for (i = 0; i < dic->nr_cpages; i++) { struct page *page = dic->cpages[i]; block_t blkaddr; - struct bio_post_read_ctx *ctx; blkaddr = data_blkaddr(dn.inode, dn.node_page, dn.ofs_in_node + i + 1); @@ -2250,17 +2272,31 @@ submit_and_realloc: if (!bio) { bio = f2fs_grab_read_bio(inode, blkaddr, nr_pages, is_readahead ? REQ_RAHEAD : 0, - page->index, for_write); + page->index, for_write, for_verity); if (IS_ERR(bio)) { + unsigned int remained = dic->nr_cpages - i; + bool release = false; + ret = PTR_ERR(bio); dic->failed = true; - if (!atomic_sub_return(dic->nr_cpages - i, - &dic->pending_pages)) { + + if (for_verity) { + if (!atomic_sub_return(remained, + &dic->verity_pages)) + release = true; + } else { + if (!atomic_sub_return(remained, + &dic->pending_pages)) + release = true; + } + + if (release) { f2fs_decompress_end_io(dic->rpages, - cc->cluster_size, true, - false); + cc->cluster_size, true, + false); f2fs_free_dic(dic); } + f2fs_put_dnode(&dn); *bio_ret = NULL; return ret; diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 594df6391390..7364d453783f 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1354,6 +1354,7 @@ struct decompress_io_ctx { size_t rlen; /* valid data length in rbuf */ size_t clen; /* valid data length in cbuf */ atomic_t pending_pages; /* in-flight compressed page count */ + atomic_t verity_pages; /* in-flight page count for verity */ bool failed; /* indicate IO error during decompression */ void *private; /* payload buffer for specified decompression algorithm */ void *private2; /* extra payload buffer */ -- cgit v1.2.3-58-ga151 From e584bbe821229a3e7cc409eecd51df66f9268c21 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 9 Dec 2020 16:49:36 +0800 Subject: f2fs: fix shift-out-of-bounds in sanity_check_raw_super() syzbot reported a bug which could cause shift-out-of-bounds issue, fix it. Call Trace: __dump_stack lib/dump_stack.c:79 [inline] dump_stack+0x107/0x163 lib/dump_stack.c:120 ubsan_epilogue+0xb/0x5a lib/ubsan.c:148 __ubsan_handle_shift_out_of_bounds.cold+0xb1/0x181 lib/ubsan.c:395 sanity_check_raw_super fs/f2fs/super.c:2812 [inline] read_raw_super_block fs/f2fs/super.c:3267 [inline] f2fs_fill_super.cold+0x16c9/0x16f6 fs/f2fs/super.c:3519 mount_bdev+0x34d/0x410 fs/super.c:1366 legacy_get_tree+0x105/0x220 fs/fs_context.c:592 vfs_get_tree+0x89/0x2f0 fs/super.c:1496 do_new_mount fs/namespace.c:2896 [inline] path_mount+0x12ae/0x1e70 fs/namespace.c:3227 do_mount fs/namespace.c:3240 [inline] __do_sys_mount fs/namespace.c:3448 [inline] __se_sys_mount fs/namespace.c:3425 [inline] __x64_sys_mount+0x27f/0x300 fs/namespace.c:3425 do_syscall_64+0x2d/0x70 arch/x86/entry/common.c:46 entry_SYSCALL_64_after_hwframe+0x44/0xa9 Reported-by: syzbot+ca9a785f8ac472085994@syzkaller.appspotmail.com Signed-off-by: Anant Thazhemadam Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/super.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 8a82721b69ef..e8b94bb14bd8 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -2775,7 +2775,6 @@ static int sanity_check_raw_super(struct f2fs_sb_info *sbi, block_t total_sections, blocks_per_seg; struct f2fs_super_block *raw_super = (struct f2fs_super_block *) (bh->b_data + F2FS_SUPER_OFFSET); - unsigned int blocksize; size_t crc_offset = 0; __u32 crc = 0; @@ -2802,10 +2801,10 @@ static int sanity_check_raw_super(struct f2fs_sb_info *sbi, } /* Currently, support only 4KB block size */ - blocksize = 1 << le32_to_cpu(raw_super->log_blocksize); - if (blocksize != F2FS_BLKSIZE) { - f2fs_info(sbi, "Invalid blocksize (%u), supports only 4KB", - blocksize); + if (le32_to_cpu(raw_super->log_blocksize) != F2FS_BLKSIZE_BITS) { + f2fs_info(sbi, "Invalid log_blocksize (%u), supports only %u", + le32_to_cpu(raw_super->log_blocksize), + F2FS_BLKSIZE_BITS); return -EFSCORRUPTED; } -- cgit v1.2.3-58-ga151 From 75e91c888989cf2df5c78b251b07de1f5052e30e Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 9 Dec 2020 16:42:14 +0800 Subject: f2fs: compress: fix compression chksum This patch addresses minor issues in compression chksum. Fixes: b28f047b28c5 ("f2fs: compress: support chksum") Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/compress.c | 3 +-- fs/f2fs/compress.h | 0 2 files changed, 1 insertion(+), 2 deletions(-) create mode 100644 fs/f2fs/compress.h diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c index f05d409fd0ed..4bcbacfe3325 100644 --- a/fs/f2fs/compress.c +++ b/fs/f2fs/compress.c @@ -796,7 +796,7 @@ void f2fs_decompress_pages(struct bio *bio, struct page *page, bool verity) ret = cops->decompress_pages(dic); - if (!ret && fi->i_compress_flag & 1 << COMPRESS_CHKSUM) { + if (!ret && (fi->i_compress_flag & 1 << COMPRESS_CHKSUM)) { u32 provided = le32_to_cpu(dic->cbuf->chksum); u32 calculated = f2fs_crc32(sbi, dic->cbuf->cdata, dic->clen); @@ -809,7 +809,6 @@ void f2fs_decompress_pages(struct bio *bio, struct page *page, bool verity) provided, calculated); } set_sbi_flag(sbi, SBI_NEED_FSCK); - WARN_ON_ONCE(1); } } diff --git a/fs/f2fs/compress.h b/fs/f2fs/compress.h new file mode 100644 index 000000000000..e69de29bb2d1 -- cgit v1.2.3-58-ga151