From fe1897eaa6646f5a64a4cee0e6473ed9887d324b Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Fri, 27 Sep 2019 18:01:35 +0800 Subject: f2fs: fix to update time in lazytime mode generic/018 reports an inconsistent status of atime, the testcase is as below: - open file with O_SYNC - write file to construct fraged space - calc md5 of file - record {a,c,m}time - defrag file --- do nothing - umount & mount - check {a,c,m}time The root cause is, as f2fs enables lazytime by default, atime update will dirty vfs inode, rather than dirtying f2fs inode (by set with FI_DIRTY_INODE), so later f2fs_write_inode() called from VFS will fail to update inode page due to our skip: f2fs_write_inode() if (is_inode_flag_set(inode, FI_DIRTY_INODE)) return 0; So eventually, after evict(), we lose last atime for ever. To fix this issue, we need to check whether {a,c,m,cr}time is consistent in between inode cache and inode page, and only skip f2fs_update_inode() if f2fs inode is not dirty and time is consistent as well. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 23 +++++++++++++++-------- fs/f2fs/inode.c | 6 +++++- 2 files changed, 20 insertions(+), 9 deletions(-) (limited to 'fs') diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 4024790028aa..f078cd20dab8 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -2704,6 +2704,20 @@ static inline void clear_file(struct inode *inode, int type) f2fs_mark_inode_dirty_sync(inode, true); } +static inline bool f2fs_is_time_consistent(struct inode *inode) +{ + if (!timespec64_equal(F2FS_I(inode)->i_disk_time, &inode->i_atime)) + return false; + if (!timespec64_equal(F2FS_I(inode)->i_disk_time + 1, &inode->i_ctime)) + return false; + if (!timespec64_equal(F2FS_I(inode)->i_disk_time + 2, &inode->i_mtime)) + return false; + if (!timespec64_equal(F2FS_I(inode)->i_disk_time + 3, + &F2FS_I(inode)->i_crtime)) + return false; + return true; +} + static inline bool f2fs_skip_inode_update(struct inode *inode, int dsync) { bool ret; @@ -2721,14 +2735,7 @@ static inline bool f2fs_skip_inode_update(struct inode *inode, int dsync) i_size_read(inode) & ~PAGE_MASK) return false; - if (!timespec64_equal(F2FS_I(inode)->i_disk_time, &inode->i_atime)) - return false; - if (!timespec64_equal(F2FS_I(inode)->i_disk_time + 1, &inode->i_ctime)) - return false; - if (!timespec64_equal(F2FS_I(inode)->i_disk_time + 2, &inode->i_mtime)) - return false; - if (!timespec64_equal(F2FS_I(inode)->i_disk_time + 3, - &F2FS_I(inode)->i_crtime)) + if (!f2fs_is_time_consistent(inode)) return false; down_read(&F2FS_I(inode)->i_sem); diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index db4fec30c30d..386ad54c13c3 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -615,7 +615,11 @@ int f2fs_write_inode(struct inode *inode, struct writeback_control *wbc) inode->i_ino == F2FS_META_INO(sbi)) return 0; - if (!is_inode_flag_set(inode, FI_DIRTY_INODE)) + /* + * atime could be updated without dirtying f2fs inode in lazytime mode + */ + if (f2fs_is_time_consistent(inode) && + !is_inode_flag_set(inode, FI_DIRTY_INODE)) return 0; if (!f2fs_is_checkpoint_ready(sbi)) -- cgit v1.2.3-58-ga151 From ed3520427f57327f581de0cc28c1c30df08f0103 Mon Sep 17 00:00:00 2001 From: Chengguang Xu via Linux-f2fs-devel Date: Fri, 27 Sep 2019 09:35:48 +0800 Subject: f2fs: mark recovery flag correctly in read_raw_super_block() On the combination of first fail and second success, we will miss to mark recovery flag because currently we reuse err variable in the loop. Signed-off-by: Chengguang Xu Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/super.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 1443cee15863..f3a68af3b43a 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -2951,6 +2951,7 @@ static int read_raw_super_block(struct f2fs_sb_info *sbi, f2fs_err(sbi, "Unable to read %dth superblock", block + 1); err = -EIO; + *recovery = 1; continue; } @@ -2960,6 +2961,7 @@ static int read_raw_super_block(struct f2fs_sb_info *sbi, f2fs_err(sbi, "Can't find valid F2FS filesystem in %dth superblock", block + 1); brelse(bh); + *recovery = 1; continue; } @@ -2972,10 +2974,6 @@ static int read_raw_super_block(struct f2fs_sb_info *sbi, brelse(bh); } - /* Fail to read any one of the superblocks*/ - if (err < 0) - *recovery = 1; - /* No valid superblock */ if (!*raw_super) kvfree(super); -- cgit v1.2.3-58-ga151 From 46d9ce195a2b1b8aceeafae1d8f407383a117b0e Mon Sep 17 00:00:00 2001 From: Qiuyang Sun Date: Mon, 23 Sep 2019 12:21:39 +0800 Subject: f2fs: update multi-dev metadata in resize_fs Multi-device metadata should be updated in resize_fs as well. Also, we check that the new FS size still reaches the last device. Signed-off-by: Qiuyang Sun Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/gc.c | 34 ++++++++++++++++++++++++++++++++-- 1 file changed, 32 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 5877bd729689..ef7686a82722 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -1437,11 +1437,20 @@ static void update_sb_metadata(struct f2fs_sb_info *sbi, int secs) raw_sb->segment_count_main = cpu_to_le32(segment_count_main + segs); raw_sb->block_count = cpu_to_le64(block_count + (long long)segs * sbi->blocks_per_seg); + if (f2fs_is_multi_device(sbi)) { + int last_dev = sbi->s_ndevs - 1; + int dev_segs = + le32_to_cpu(raw_sb->devs[last_dev].total_segments); + + raw_sb->devs[last_dev].total_segments = + cpu_to_le32(dev_segs + segs); + } } static void update_fs_metadata(struct f2fs_sb_info *sbi, int secs) { int segs = secs * sbi->segs_per_sec; + long long blks = (long long)segs * sbi->blocks_per_seg; long long user_block_count = le64_to_cpu(F2FS_CKPT(sbi)->user_block_count); @@ -1449,8 +1458,20 @@ static void update_fs_metadata(struct f2fs_sb_info *sbi, int secs) MAIN_SEGS(sbi) = (int)MAIN_SEGS(sbi) + segs; FREE_I(sbi)->free_sections = (int)FREE_I(sbi)->free_sections + secs; FREE_I(sbi)->free_segments = (int)FREE_I(sbi)->free_segments + segs; - F2FS_CKPT(sbi)->user_block_count = cpu_to_le64(user_block_count + - (long long)segs * sbi->blocks_per_seg); + F2FS_CKPT(sbi)->user_block_count = cpu_to_le64(user_block_count + blks); + + if (f2fs_is_multi_device(sbi)) { + int last_dev = sbi->s_ndevs - 1; + + FDEV(last_dev).total_segments = + (int)FDEV(last_dev).total_segments + segs; + FDEV(last_dev).end_blk = + (long long)FDEV(last_dev).end_blk + blks; +#ifdef CONFIG_BLK_DEV_ZONED + FDEV(last_dev).nr_blkz = (int)FDEV(last_dev).nr_blkz + + (int)(blks >> sbi->log_blocks_per_blkz); +#endif + } } int f2fs_resize_fs(struct f2fs_sb_info *sbi, __u64 block_count) @@ -1465,6 +1486,15 @@ int f2fs_resize_fs(struct f2fs_sb_info *sbi, __u64 block_count) if (block_count > old_block_count) return -EINVAL; + if (f2fs_is_multi_device(sbi)) { + int last_dev = sbi->s_ndevs - 1; + __u64 last_segs = FDEV(last_dev).total_segments; + + if (block_count + last_segs * sbi->blocks_per_seg <= + old_block_count) + return -EINVAL; + } + /* new fs size should align to section size */ div_u64_rem(block_count, BLKS_PER_SEC(sbi), &rem); if (rem) -- cgit v1.2.3-58-ga151 From 9f701f6c772b15461843b92f9b41a0705e190a86 Mon Sep 17 00:00:00 2001 From: Qiuyang Sun Date: Mon, 23 Sep 2019 12:22:35 +0800 Subject: f2fs: check total_segments from devices in raw_super For multi-device F2FS, we should check if the sum of total_segments from all devices matches segment_count. Signed-off-by: Qiuyang Sun Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/super.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'fs') diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index f3a68af3b43a..112eb86a120f 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -2604,6 +2604,21 @@ static int sanity_check_raw_super(struct f2fs_sb_info *sbi, return -EFSCORRUPTED; } + if (RDEV(0).path[0]) { + block_t dev_seg_count = le32_to_cpu(RDEV(0).total_segments); + int i = 1; + + while (i < MAX_DEVICES && RDEV(i).path[0]) { + dev_seg_count += le32_to_cpu(RDEV(i).total_segments); + i++; + } + if (segment_count != dev_seg_count) { + f2fs_info(sbi, "Segment count (%u) mismatch with total segments from devices (%u)", + segment_count, dev_seg_count); + return -EFSCORRUPTED; + } + } + if (secs_per_zone > total_sections || !secs_per_zone) { f2fs_info(sbi, "Wrong secs_per_zone / total_sections (%u, %u)", secs_per_zone, total_sections); -- cgit v1.2.3-58-ga151 From 688078e7f36c293dae25b338ddc9e0a2790f6e06 Mon Sep 17 00:00:00 2001 From: Randall Huang Date: Fri, 18 Oct 2019 14:56:22 +0800 Subject: f2fs: fix to avoid memory leakage in f2fs_listxattr In f2fs_listxattr, there is no boundary check before memcpy e_name to buffer. If the e_name_len is corrupted, unexpected memory contents may be returned to the buffer. Signed-off-by: Randall Huang Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/xattr.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c index 181900af2576..296b3189448a 100644 --- a/fs/f2fs/xattr.c +++ b/fs/f2fs/xattr.c @@ -539,8 +539,9 @@ out: ssize_t f2fs_listxattr(struct dentry *dentry, char *buffer, size_t buffer_size) { struct inode *inode = d_inode(dentry); + nid_t xnid = F2FS_I(inode)->i_xattr_nid; struct f2fs_xattr_entry *entry; - void *base_addr; + void *base_addr, *last_base_addr; int error = 0; size_t rest = buffer_size; @@ -550,6 +551,8 @@ ssize_t f2fs_listxattr(struct dentry *dentry, char *buffer, size_t buffer_size) if (error) return error; + last_base_addr = (void *)base_addr + XATTR_SIZE(xnid, inode); + list_for_each_xattr(entry, base_addr) { const struct xattr_handler *handler = f2fs_xattr_handler(entry->e_name_index); @@ -557,6 +560,15 @@ ssize_t f2fs_listxattr(struct dentry *dentry, char *buffer, size_t buffer_size) size_t prefix_len; size_t size; + if ((void *)(entry) + sizeof(__u32) > last_base_addr || + (void *)XATTR_NEXT_ENTRY(entry) > last_base_addr) { + f2fs_err(F2FS_I_SB(inode), "inode (%lu) has corrupted xattr", + inode->i_ino); + set_sbi_flag(F2FS_I_SB(inode), SBI_NEED_FSCK); + error = -EFSCORRUPTED; + goto cleanup; + } + if (!handler || (handler->list && !handler->list(dentry))) continue; -- cgit v1.2.3-58-ga151 From 0b20fcec8651569935a10afe03fedc0b812d044e Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Mon, 30 Sep 2019 18:53:25 +0800 Subject: f2fs: cache global IPU bio In commit 8648de2c581e ("f2fs: add bio cache for IPU"), we added f2fs_submit_ipu_bio() in __write_data_page() as below: __write_data_page() if (!S_ISDIR(inode->i_mode) && !IS_NOQUOTA(inode)) { f2fs_submit_ipu_bio(sbi, bio, page); .... } in order to avoid below deadlock: Thread A Thread B - __write_data_page (inode x, page y) - f2fs_do_write_data_page - set_page_writeback ---- set writeback flag in page y - f2fs_inplace_write_data - f2fs_balance_fs - lock gc_mutex - lock gc_mutex - f2fs_gc - do_garbage_collect - gc_data_segment - move_data_page - f2fs_wait_on_page_writeback - wait_on_page_writeback --- wait writeback of page y However, the bio submission breaks the merge of IPU IOs. So in this patch let's add a global bio cache for merged IPU pages, then f2fs_wait_on_page_writeback() is able to submit bio if a writebacked page is cached in global bio cache. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 179 ++++++++++++++++++++++++++++++++++++++++++++---------- fs/f2fs/f2fs.h | 11 ++++ fs/f2fs/segment.c | 3 + fs/f2fs/super.c | 8 +++ 4 files changed, 169 insertions(+), 32 deletions(-) (limited to 'fs') diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 5755e897a5f0..ba3bcf4c7889 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -29,6 +29,7 @@ #define NUM_PREALLOC_POST_READ_CTXS 128 static struct kmem_cache *bio_post_read_ctx_cache; +static struct kmem_cache *bio_entry_slab; static mempool_t *bio_post_read_ctx_pool; static bool __is_cp_guaranteed(struct page *page) @@ -543,6 +544,126 @@ static bool io_is_mergeable(struct f2fs_sb_info *sbi, struct bio *bio, return io_type_is_mergeable(io, fio); } +static void add_bio_entry(struct f2fs_sb_info *sbi, struct bio *bio, + struct page *page, enum temp_type temp) +{ + struct f2fs_bio_info *io = sbi->write_io[DATA] + temp; + struct bio_entry *be; + + be = f2fs_kmem_cache_alloc(bio_entry_slab, GFP_NOFS); + be->bio = bio; + bio_get(bio); + + if (bio_add_page(bio, page, PAGE_SIZE, 0) != PAGE_SIZE) + f2fs_bug_on(sbi, 1); + + down_write(&io->bio_list_lock); + list_add_tail(&be->list, &io->bio_list); + up_write(&io->bio_list_lock); +} + +static void del_bio_entry(struct bio_entry *be) +{ + list_del(&be->list); + kmem_cache_free(bio_entry_slab, be); +} + +static int add_ipu_page(struct f2fs_sb_info *sbi, struct bio **bio, + struct page *page) +{ + enum temp_type temp; + bool found = false; + int ret = -EAGAIN; + + for (temp = HOT; temp < NR_TEMP_TYPE && !found; temp++) { + struct f2fs_bio_info *io = sbi->write_io[DATA] + temp; + struct list_head *head = &io->bio_list; + struct bio_entry *be; + + down_write(&io->bio_list_lock); + list_for_each_entry(be, head, list) { + if (be->bio != *bio) + continue; + + found = true; + + if (bio_add_page(*bio, page, PAGE_SIZE, 0) == PAGE_SIZE) { + ret = 0; + break; + } + + /* bio is full */ + del_bio_entry(be); + __submit_bio(sbi, *bio, DATA); + break; + } + up_write(&io->bio_list_lock); + } + + if (ret) { + bio_put(*bio); + *bio = NULL; + } + + return ret; +} + +void f2fs_submit_merged_ipu_write(struct f2fs_sb_info *sbi, + struct bio **bio, struct page *page) +{ + enum temp_type temp; + bool found = false; + struct bio *target = bio ? *bio : NULL; + + for (temp = HOT; temp < NR_TEMP_TYPE && !found; temp++) { + struct f2fs_bio_info *io = sbi->write_io[DATA] + temp; + struct list_head *head = &io->bio_list; + struct bio_entry *be; + + if (list_empty(head)) + continue; + + down_read(&io->bio_list_lock); + list_for_each_entry(be, head, list) { + if (target) + found = (target == be->bio); + else + found = __has_merged_page(be->bio, NULL, + page, 0); + if (found) + break; + } + up_read(&io->bio_list_lock); + + if (!found) + continue; + + found = false; + + down_write(&io->bio_list_lock); + list_for_each_entry(be, head, list) { + if (target) + found = (target == be->bio); + else + found = __has_merged_page(be->bio, NULL, + page, 0); + if (found) { + target = be->bio; + del_bio_entry(be); + break; + } + } + up_write(&io->bio_list_lock); + } + + if (found) + __submit_bio(sbi, target, DATA); + if (bio && *bio) { + bio_put(*bio); + *bio = NULL; + } +} + int f2fs_merge_page_bio(struct f2fs_io_info *fio) { struct bio *bio = *fio->bio; @@ -557,20 +678,17 @@ int f2fs_merge_page_bio(struct f2fs_io_info *fio) f2fs_trace_ios(fio, 0); if (bio && !page_is_mergeable(fio->sbi, bio, *fio->last_block, - fio->new_blkaddr)) { - __submit_bio(fio->sbi, bio, fio->type); - bio = NULL; - } + fio->new_blkaddr)) + f2fs_submit_merged_ipu_write(fio->sbi, &bio, NULL); alloc_new: if (!bio) { bio = __bio_alloc(fio, BIO_MAX_PAGES); bio_set_op_attrs(bio, fio->op, fio->op_flags); - } - if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE) { - __submit_bio(fio->sbi, bio, fio->type); - bio = NULL; - goto alloc_new; + add_bio_entry(fio->sbi, bio, page, fio->temp); + } else { + if (add_ipu_page(fio->sbi, &bio, page)) + goto alloc_new; } if (fio->io_wbc) @@ -584,19 +702,6 @@ alloc_new: return 0; } -static void f2fs_submit_ipu_bio(struct f2fs_sb_info *sbi, struct bio **bio, - struct page *page) -{ - if (!bio) - return; - - if (!__has_merged_page(*bio, NULL, page, 0)) - return; - - __submit_bio(sbi, *bio, DATA); - *bio = NULL; -} - void f2fs_submit_page_write(struct f2fs_io_info *fio) { struct f2fs_sb_info *sbi = fio->sbi; @@ -2215,14 +2320,12 @@ out: unlock_page(page); if (!S_ISDIR(inode->i_mode) && !IS_NOQUOTA(inode) && - !F2FS_I(inode)->cp_task) { - f2fs_submit_ipu_bio(sbi, bio, page); + !F2FS_I(inode)->cp_task) f2fs_balance_fs(sbi, need_balance_fs); - } if (unlikely(f2fs_cp_error(sbi))) { - f2fs_submit_ipu_bio(sbi, bio, page); f2fs_submit_merged_write(sbi, DATA); + f2fs_submit_merged_ipu_write(sbi, bio, NULL); submitted = NULL; } @@ -2342,13 +2445,11 @@ continue_unlock: } if (PageWriteback(page)) { - if (wbc->sync_mode != WB_SYNC_NONE) { + if (wbc->sync_mode != WB_SYNC_NONE) f2fs_wait_on_page_writeback(page, DATA, true, true); - f2fs_submit_ipu_bio(sbi, &bio, page); - } else { + else goto continue_unlock; - } } if (!clear_page_dirty_for_io(page)) @@ -2406,7 +2507,7 @@ continue_unlock: NULL, 0, DATA); /* submit cached bio of IPU write */ if (bio) - __submit_bio(sbi, bio, DATA); + f2fs_submit_merged_ipu_write(sbi, &bio, NULL); return ret; } @@ -3211,8 +3312,22 @@ fail: return -ENOMEM; } -void __exit f2fs_destroy_post_read_processing(void) +void f2fs_destroy_post_read_processing(void) { mempool_destroy(bio_post_read_ctx_pool); kmem_cache_destroy(bio_post_read_ctx_cache); } + +int __init f2fs_init_bio_entry_cache(void) +{ + bio_entry_slab = f2fs_kmem_cache_create("bio_entry_slab", + sizeof(struct bio_entry)); + if (!bio_entry_slab) + return -ENOMEM; + return 0; +} + +void __exit f2fs_destroy_bio_entry_cache(void) +{ + kmem_cache_destroy(bio_entry_slab); +} diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index f078cd20dab8..ca342f4c7db1 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1068,6 +1068,11 @@ struct f2fs_io_info { unsigned char version; /* version of the node */ }; +struct bio_entry { + struct bio *bio; + struct list_head list; +}; + #define is_read_io(rw) ((rw) == READ) struct f2fs_bio_info { struct f2fs_sb_info *sbi; /* f2fs superblock */ @@ -1077,6 +1082,8 @@ struct f2fs_bio_info { struct rw_semaphore io_rwsem; /* blocking op for bio */ spinlock_t io_lock; /* serialize DATA/NODE IOs */ struct list_head io_list; /* track fios */ + struct list_head bio_list; /* bio entry list head */ + struct rw_semaphore bio_list_lock; /* lock to protect bio entry list */ }; #define FDEV(i) (sbi->devs[i]) @@ -3195,10 +3202,14 @@ void f2fs_destroy_checkpoint_caches(void); */ int f2fs_init_post_read_processing(void); void f2fs_destroy_post_read_processing(void); +int f2fs_init_bio_entry_cache(void); +void f2fs_destroy_bio_entry_cache(void); void f2fs_submit_merged_write(struct f2fs_sb_info *sbi, enum page_type type); void f2fs_submit_merged_write_cond(struct f2fs_sb_info *sbi, struct inode *inode, struct page *page, nid_t ino, enum page_type type); +void f2fs_submit_merged_ipu_write(struct f2fs_sb_info *sbi, + struct bio **bio, struct page *page); void f2fs_flush_merged_writes(struct f2fs_sb_info *sbi); int f2fs_submit_page_bio(struct f2fs_io_info *fio); int f2fs_merge_page_bio(struct f2fs_io_info *fio); diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 808709581481..25c750cd0272 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -3379,7 +3379,10 @@ void f2fs_wait_on_page_writeback(struct page *page, if (PageWriteback(page)) { struct f2fs_sb_info *sbi = F2FS_P_SB(page); + /* submit cached LFS IO */ f2fs_submit_merged_write_cond(sbi, NULL, page, 0, type); + /* sbumit cached IPU IO */ + f2fs_submit_merged_ipu_write(sbi, NULL, page); if (ordered) { wait_on_page_writeback(page); f2fs_bug_on(sbi, locked && PageWriteback(page)); diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 112eb86a120f..f320fd11db48 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -3342,6 +3342,8 @@ try_onemore: sbi->write_io[i][j].bio = NULL; spin_lock_init(&sbi->write_io[i][j].io_lock); INIT_LIST_HEAD(&sbi->write_io[i][j].io_list); + INIT_LIST_HEAD(&sbi->write_io[i][j].bio_list); + init_rwsem(&sbi->write_io[i][j].bio_list_lock); } } @@ -3753,8 +3755,13 @@ static int __init init_f2fs_fs(void) err = f2fs_init_post_read_processing(); if (err) goto free_root_stats; + err = f2fs_init_bio_entry_cache(); + if (err) + goto free_post_read; return 0; +free_post_read: + f2fs_destroy_post_read_processing(); free_root_stats: f2fs_destroy_root_stats(); unregister_filesystem(&f2fs_fs_type); @@ -3778,6 +3785,7 @@ fail: static void __exit exit_f2fs_fs(void) { + f2fs_destroy_bio_entry_cache(); f2fs_destroy_post_read_processing(); f2fs_destroy_root_stats(); unregister_filesystem(&f2fs_fs_type); -- cgit v1.2.3-58-ga151 From bc005a4d5347da68e690f78d365d8927c87dc85a Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Fri, 1 Nov 2019 09:34:21 -0700 Subject: f2fs: avoid kernel panic on corruption test xfstests/generic/475 complains kernel warn/panic while testing corrupted disk. Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/node.c | 1 - 1 file changed, 1 deletion(-) (limited to 'fs') diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 8b66bc4c004b..06fd6d77d34b 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -2349,7 +2349,6 @@ static int __f2fs_build_free_nids(struct f2fs_sb_info *sbi, if (ret) { up_read(&nm_i->nat_tree_lock); - f2fs_bug_on(sbi, !mount); f2fs_err(sbi, "NAT is corrupt, run fsck to fix it"); return ret; } -- cgit v1.2.3-58-ga151 From f5a53edcf01eae21dc3ef1845515229e8459e5cc Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Fri, 18 Oct 2019 10:06:40 -0700 Subject: f2fs: support aligned pinned file This patch supports 2MB-aligned pinned file, which can guarantee no GC at all by allocating fully valid 2MB segment. Check free segments by has_not_enough_free_secs() with large budget. Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 4 +++- fs/f2fs/file.c | 42 +++++++++++++++++++++++++++++++++++++----- fs/f2fs/recovery.c | 2 +- fs/f2fs/segment.c | 31 +++++++++++++++++++++++++++---- fs/f2fs/segment.h | 2 ++ fs/f2fs/super.c | 1 + fs/f2fs/sysfs.c | 2 ++ 7 files changed, 73 insertions(+), 11 deletions(-) (limited to 'fs') diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index ca342f4c7db1..c681f51e351b 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -890,6 +890,7 @@ enum { CURSEG_WARM_NODE, /* direct node blocks of normal files */ CURSEG_COLD_NODE, /* indirect node blocks */ NO_CHECK_TYPE, + CURSEG_COLD_DATA_PINNED,/* cold data for pinned file */ }; struct flush_cmd { @@ -1301,6 +1302,7 @@ struct f2fs_sb_info { /* threshold for gc trials on pinned files */ u64 gc_pin_file_threshold; + struct rw_semaphore pin_sem; /* maximum # of trials to find a victim segment for SSR and GC */ unsigned int max_victim_search; @@ -3116,7 +3118,7 @@ void f2fs_release_discard_addrs(struct f2fs_sb_info *sbi); int f2fs_npages_for_summary_flush(struct f2fs_sb_info *sbi, bool for_ra); void allocate_segment_for_resize(struct f2fs_sb_info *sbi, int type, unsigned int start, unsigned int end); -void f2fs_allocate_new_segments(struct f2fs_sb_info *sbi); +void f2fs_allocate_new_segments(struct f2fs_sb_info *sbi, int type); int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range); bool f2fs_exist_trim_candidates(struct f2fs_sb_info *sbi, struct cp_control *cpc); diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 29bc0a542759..c31a5bbc8090 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -1545,12 +1545,44 @@ static int expand_inode_data(struct inode *inode, loff_t offset, if (off_end) map.m_len++; - if (f2fs_is_pinned_file(inode)) - map.m_seg_type = CURSEG_COLD_DATA; + if (!map.m_len) + return 0; + + if (f2fs_is_pinned_file(inode)) { + block_t len = (map.m_len >> sbi->log_blocks_per_seg) << + sbi->log_blocks_per_seg; + block_t done = 0; + + if (map.m_len % sbi->blocks_per_seg) + len += sbi->blocks_per_seg; + + map.m_len = sbi->blocks_per_seg; +next_alloc: + if (has_not_enough_free_secs(sbi, 0, + GET_SEC_FROM_SEG(sbi, overprovision_segments(sbi)))) { + mutex_lock(&sbi->gc_mutex); + err = f2fs_gc(sbi, true, false, NULL_SEGNO); + if (err && err != -ENODATA && err != -EAGAIN) + goto out_err; + } - err = f2fs_map_blocks(inode, &map, 1, (f2fs_is_pinned_file(inode) ? - F2FS_GET_BLOCK_PRE_DIO : - F2FS_GET_BLOCK_PRE_AIO)); + down_write(&sbi->pin_sem); + map.m_seg_type = CURSEG_COLD_DATA_PINNED; + f2fs_allocate_new_segments(sbi, CURSEG_COLD_DATA); + err = f2fs_map_blocks(inode, &map, 1, F2FS_GET_BLOCK_PRE_DIO); + up_write(&sbi->pin_sem); + + done += map.m_len; + len -= map.m_len; + map.m_lblk += map.m_len; + if (!err && len) + goto next_alloc; + + map.m_len = done; + } else { + err = f2fs_map_blocks(inode, &map, 1, F2FS_GET_BLOCK_PRE_AIO); + } +out_err: if (err) { pgoff_t last_off; diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index 783773e4560d..76477f71d4ee 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -711,7 +711,7 @@ next: f2fs_put_page(page, 1); } if (!err) - f2fs_allocate_new_segments(sbi); + f2fs_allocate_new_segments(sbi, NO_CHECK_TYPE); return err; } diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 25c750cd0272..8bb37f8a1845 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -2690,7 +2690,7 @@ unlock: up_read(&SM_I(sbi)->curseg_lock); } -void f2fs_allocate_new_segments(struct f2fs_sb_info *sbi) +void f2fs_allocate_new_segments(struct f2fs_sb_info *sbi, int type) { struct curseg_info *curseg; unsigned int old_segno; @@ -2699,10 +2699,17 @@ void f2fs_allocate_new_segments(struct f2fs_sb_info *sbi) down_write(&SIT_I(sbi)->sentry_lock); for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) { + if (type != NO_CHECK_TYPE && i != type) + continue; + curseg = CURSEG_I(sbi, i); - old_segno = curseg->segno; - SIT_I(sbi)->s_ops->allocate_segment(sbi, i, true); - locate_dirty_segment(sbi, old_segno); + if (type == NO_CHECK_TYPE || curseg->next_blkoff || + get_valid_blocks(sbi, curseg->segno, false) || + get_ckpt_valid_blocks(sbi, curseg->segno)) { + old_segno = curseg->segno; + SIT_I(sbi)->s_ops->allocate_segment(sbi, i, true); + locate_dirty_segment(sbi, old_segno); + } } up_write(&SIT_I(sbi)->sentry_lock); @@ -3068,6 +3075,19 @@ void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page, { struct sit_info *sit_i = SIT_I(sbi); struct curseg_info *curseg = CURSEG_I(sbi, type); + bool put_pin_sem = false; + + if (type == CURSEG_COLD_DATA) { + /* GC during CURSEG_COLD_DATA_PINNED allocation */ + if (down_read_trylock(&sbi->pin_sem)) { + put_pin_sem = true; + } else { + type = CURSEG_WARM_DATA; + curseg = CURSEG_I(sbi, type); + } + } else if (type == CURSEG_COLD_DATA_PINNED) { + type = CURSEG_COLD_DATA; + } down_read(&SM_I(sbi)->curseg_lock); @@ -3133,6 +3153,9 @@ void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page, mutex_unlock(&curseg->curseg_mutex); up_read(&SM_I(sbi)->curseg_lock); + + if (put_pin_sem) + up_read(&sbi->pin_sem); } static void update_device_state(struct f2fs_io_info *fio) diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h index 325781a1ae4d..a95467b202ea 100644 --- a/fs/f2fs/segment.h +++ b/fs/f2fs/segment.h @@ -313,6 +313,8 @@ struct sit_entry_set { */ static inline struct curseg_info *CURSEG_I(struct f2fs_sb_info *sbi, int type) { + if (type == CURSEG_COLD_DATA_PINNED) + type = CURSEG_COLD_DATA; return (struct curseg_info *)(SM_I(sbi)->curseg_array + type); } diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index f320fd11db48..c02a47ce551b 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -2853,6 +2853,7 @@ static void init_sb_info(struct f2fs_sb_info *sbi) spin_lock_init(&sbi->dev_lock); init_rwsem(&sbi->sb_lock); + init_rwsem(&sbi->pin_sem); } static int init_percpu_info(struct f2fs_sb_info *sbi) diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c index b558b64a4c9c..f164959e4224 100644 --- a/fs/f2fs/sysfs.c +++ b/fs/f2fs/sysfs.c @@ -154,6 +154,8 @@ static ssize_t features_show(struct f2fs_attr *a, if (f2fs_sb_has_casefold(sbi)) len += snprintf(buf + len, PAGE_SIZE - len, "%s%s", len ? ", " : "", "casefold"); + len += snprintf(buf + len, PAGE_SIZE - len, "%s%s", + len ? ", " : "", "pin_file"); len += snprintf(buf + len, PAGE_SIZE - len, "\n"); return len; } -- cgit v1.2.3-58-ga151 From 2a60637f06ac94869b2e630eaf837110d39bf291 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 7 Nov 2019 14:12:05 +0800 Subject: f2fs: fix to update dir's i_pino during cross_rename As Eric reported: RENAME_EXCHANGE support was just added to fsstress in xfstests: commit 65dfd40a97b6bbbd2a22538977bab355c5bc0f06 Author: kaixuxia Date: Thu Oct 31 14:41:48 2019 +0800 fsstress: add EXCHANGE renameat2 support This is causing xfstest generic/579 to fail due to fsck.f2fs reporting errors. I'm not sure what the problem is, but it still happens even with all the fs-verity stuff in the test commented out, so that the test just runs fsstress. generic/579 23s ... [10:02:25] [ 7.745370] run fstests generic/579 at 2019-11-04 10:02:25 _check_generic_filesystem: filesystem on /dev/vdc is inconsistent (see /results/f2fs/results-default/generic/579.full for details) [10:02:47] Ran: generic/579 Failures: generic/579 Failed 1 of 1 tests Xunit report: /results/f2fs/results-default/result.xml Here's the contents of 579.full: _check_generic_filesystem: filesystem on /dev/vdc is inconsistent *** fsck.f2fs output *** [ASSERT] (__chk_dots_dentries:1378) --> Bad inode number[0x24] for '..', parent parent ino is [0xd10] The root cause is that we forgot to update directory's i_pino during cross_rename, fix it. Fixes: 32f9bc25cbda0 ("f2fs: support ->rename2()") Signed-off-by: Chao Yu Tested-by: Eric Biggers Signed-off-by: Jaegeuk Kim --- fs/f2fs/namei.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index 4faf06e8bf89..a1c507b0b4ac 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -981,7 +981,8 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry, if (!old_dir_entry || whiteout) file_lost_pino(old_inode); else - F2FS_I(old_inode)->i_pino = new_dir->i_ino; + /* adjust dir's i_pino to pass fsck check */ + f2fs_i_pino_write(old_inode, new_dir->i_ino); up_write(&F2FS_I(old_inode)->i_sem); old_inode->i_ctime = current_time(old_inode); @@ -1141,7 +1142,11 @@ static int f2fs_cross_rename(struct inode *old_dir, struct dentry *old_dentry, f2fs_set_link(old_dir, old_entry, old_page, new_inode); down_write(&F2FS_I(old_inode)->i_sem); - file_lost_pino(old_inode); + if (!old_dir_entry) + file_lost_pino(old_inode); + else + /* adjust dir's i_pino to pass fsck check */ + f2fs_i_pino_write(old_inode, new_dir->i_ino); up_write(&F2FS_I(old_inode)->i_sem); old_dir->i_ctime = current_time(old_dir); @@ -1156,7 +1161,11 @@ static int f2fs_cross_rename(struct inode *old_dir, struct dentry *old_dentry, f2fs_set_link(new_dir, new_entry, new_page, old_inode); down_write(&F2FS_I(new_inode)->i_sem); - file_lost_pino(new_inode); + if (!new_dir_entry) + file_lost_pino(new_inode); + else + /* adjust dir's i_pino to pass fsck check */ + f2fs_i_pino_write(new_inode, old_dir->i_ino); up_write(&F2FS_I(new_inode)->i_sem); new_dir->i_ctime = current_time(new_dir); -- cgit v1.2.3-58-ga151 From 1f0d5c911b64165c9754139a26c8c2fad352c132 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 7 Nov 2019 17:29:00 +0800 Subject: f2fs: fix potential overflow We expect 64-bit calculation result from below statement, however in 32-bit machine, looped left shift operation on pgoff_t type variable may cause overflow issue, fix it by forcing type cast. page->index << PAGE_SHIFT; Fixes: 26de9b117130 ("f2fs: avoid unnecessary updating inode during fsync") Fixes: 0a2aa8fbb969 ("f2fs: refactor __exchange_data_block for speed up") Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 2 +- fs/f2fs/file.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index ba3bcf4c7889..8a80fc86a44f 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -2203,7 +2203,7 @@ static int __write_data_page(struct page *page, bool *submitted, loff_t i_size = i_size_read(inode); const pgoff_t end_index = ((unsigned long long) i_size) >> PAGE_SHIFT; - loff_t psize = (page->index + 1) << PAGE_SHIFT; + loff_t psize = (loff_t)(page->index + 1) << PAGE_SHIFT; unsigned offset = 0; bool need_balance_fs = false; int err = 0; diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index c31a5bbc8090..f1f507f6561a 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -1139,7 +1139,7 @@ static int __clone_blkaddrs(struct inode *src_inode, struct inode *dst_inode, } dn.ofs_in_node++; i++; - new_size = (dst + i) << PAGE_SHIFT; + new_size = (loff_t)(dst + i) << PAGE_SHIFT; if (dst_inode->i_size < new_size) f2fs_i_size_write(dst_inode, new_size); } while (--ilen && (do_replace[i] || blkaddr[i] == NULL_ADDR)); -- cgit v1.2.3-58-ga151 From c45d6002ff7a322022560e9b19ad867b01fec77f Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Fri, 1 Nov 2019 17:53:23 +0800 Subject: f2fs: show f2fs instance in printk_ratelimited As Eric mentioned, bare printk{,_ratelimited} won't show which filesystem instance these message is coming from, this patch tries to show fs instance with sb->s_id field in all places we missed before. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 2 +- fs/f2fs/data.c | 9 +++++---- fs/f2fs/dir.c | 7 ++++--- fs/f2fs/f2fs.h | 24 +++++++++++++----------- fs/f2fs/file.c | 2 +- fs/f2fs/gc.c | 2 +- fs/f2fs/inode.c | 2 +- fs/f2fs/node.c | 2 +- fs/f2fs/segment.c | 9 +++++---- 9 files changed, 32 insertions(+), 27 deletions(-) (limited to 'fs') diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index a0eef95b9e0e..ffdaba0c55d2 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -581,7 +581,7 @@ int f2fs_acquire_orphan_inode(struct f2fs_sb_info *sbi) if (time_to_inject(sbi, FAULT_ORPHAN)) { spin_unlock(&im->ino_lock); - f2fs_show_injection_info(FAULT_ORPHAN); + f2fs_show_injection_info(sbi, FAULT_ORPHAN); return -ENOSPC; } diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 8a80fc86a44f..a034cd0ce021 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -168,9 +168,10 @@ static bool f2fs_bio_post_read_required(struct bio *bio) static void f2fs_read_end_io(struct bio *bio) { - if (time_to_inject(F2FS_P_SB(bio_first_page_all(bio)), - FAULT_READ_IO)) { - f2fs_show_injection_info(FAULT_READ_IO); + struct f2fs_sb_info *sbi = F2FS_P_SB(bio_first_page_all(bio)); + + if (time_to_inject(sbi, FAULT_READ_IO)) { + f2fs_show_injection_info(sbi, FAULT_READ_IO); bio->bi_status = BLK_STS_IOERR; } @@ -192,7 +193,7 @@ static void f2fs_write_end_io(struct bio *bio) struct bvec_iter_all iter_all; if (time_to_inject(sbi, FAULT_WRITE_IO)) { - f2fs_show_injection_info(FAULT_WRITE_IO); + f2fs_show_injection_info(sbi, FAULT_WRITE_IO); bio->bi_status = BLK_STS_IOERR; } diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index 4033778bcbbf..c967cacf979e 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -628,7 +628,7 @@ int f2fs_add_regular_entry(struct inode *dir, const struct qstr *new_name, start: if (time_to_inject(F2FS_I_SB(dir), FAULT_DIR_DEPTH)) { - f2fs_show_injection_info(FAULT_DIR_DEPTH); + f2fs_show_injection_info(F2FS_I_SB(dir), FAULT_DIR_DEPTH); return -ENOSPC; } @@ -919,8 +919,9 @@ int f2fs_fill_dentries(struct dir_context *ctx, struct f2fs_dentry_ptr *d, bit_pos++; ctx->pos = start_pos + bit_pos; printk_ratelimited( - "%s, invalid namelen(0), ino:%u, run fsck to fix.", - KERN_WARNING, le32_to_cpu(de->ino)); + "%sF2FS-fs (%s): invalid namelen(0), ino:%u, run fsck to fix.", + KERN_WARNING, sbi->sb->s_id, + le32_to_cpu(de->ino)); set_sbi_flag(sbi, SBI_NEED_FSCK); continue; } diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index c681f51e351b..3f6204202788 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1374,9 +1374,10 @@ struct f2fs_private_dio { }; #ifdef CONFIG_F2FS_FAULT_INJECTION -#define f2fs_show_injection_info(type) \ - printk_ratelimited("%sF2FS-fs : inject %s in %s of %pS\n", \ - KERN_INFO, f2fs_fault_name[type], \ +#define f2fs_show_injection_info(sbi, type) \ + printk_ratelimited("%sF2FS-fs (%s) : inject %s in %s of %pS\n", \ + KERN_INFO, sbi->sb->s_id, \ + f2fs_fault_name[type], \ __func__, __builtin_return_address(0)) static inline bool time_to_inject(struct f2fs_sb_info *sbi, int type) { @@ -1396,7 +1397,7 @@ static inline bool time_to_inject(struct f2fs_sb_info *sbi, int type) return false; } #else -#define f2fs_show_injection_info(type) do { } while (0) +#define f2fs_show_injection_info(sbi, type) do { } while (0) static inline bool time_to_inject(struct f2fs_sb_info *sbi, int type) { return false; @@ -1781,7 +1782,7 @@ static inline int inc_valid_block_count(struct f2fs_sb_info *sbi, return ret; if (time_to_inject(sbi, FAULT_BLOCK)) { - f2fs_show_injection_info(FAULT_BLOCK); + f2fs_show_injection_info(sbi, FAULT_BLOCK); release = *count; goto release_quota; } @@ -2033,7 +2034,7 @@ static inline int inc_valid_node_count(struct f2fs_sb_info *sbi, } if (time_to_inject(sbi, FAULT_BLOCK)) { - f2fs_show_injection_info(FAULT_BLOCK); + f2fs_show_injection_info(sbi, FAULT_BLOCK); goto enospc; } @@ -2148,7 +2149,8 @@ static inline struct page *f2fs_grab_cache_page(struct address_space *mapping, return page; if (time_to_inject(F2FS_M_SB(mapping), FAULT_PAGE_ALLOC)) { - f2fs_show_injection_info(FAULT_PAGE_ALLOC); + f2fs_show_injection_info(F2FS_M_SB(mapping), + FAULT_PAGE_ALLOC); return NULL; } } @@ -2163,7 +2165,7 @@ static inline struct page *f2fs_pagecache_get_page( int fgp_flags, gfp_t gfp_mask) { if (time_to_inject(F2FS_M_SB(mapping), FAULT_PAGE_GET)) { - f2fs_show_injection_info(FAULT_PAGE_GET); + f2fs_show_injection_info(F2FS_M_SB(mapping), FAULT_PAGE_GET); return NULL; } @@ -2232,7 +2234,7 @@ static inline struct bio *f2fs_bio_alloc(struct f2fs_sb_info *sbi, return bio; } if (time_to_inject(sbi, FAULT_ALLOC_BIO)) { - f2fs_show_injection_info(FAULT_ALLOC_BIO); + f2fs_show_injection_info(sbi, FAULT_ALLOC_BIO); return NULL; } @@ -2799,7 +2801,7 @@ static inline void *f2fs_kmalloc(struct f2fs_sb_info *sbi, void *ret; if (time_to_inject(sbi, FAULT_KMALLOC)) { - f2fs_show_injection_info(FAULT_KMALLOC); + f2fs_show_injection_info(sbi, FAULT_KMALLOC); return NULL; } @@ -2820,7 +2822,7 @@ static inline void *f2fs_kvmalloc(struct f2fs_sb_info *sbi, size_t size, gfp_t flags) { if (time_to_inject(sbi, FAULT_KVMALLOC)) { - f2fs_show_injection_info(FAULT_KVMALLOC); + f2fs_show_injection_info(sbi, FAULT_KVMALLOC); return NULL; } diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index f1f507f6561a..f9f3a417a0cd 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -681,7 +681,7 @@ int f2fs_truncate(struct inode *inode) trace_f2fs_truncate(inode); if (time_to_inject(F2FS_I_SB(inode), FAULT_TRUNCATE)) { - f2fs_show_injection_info(FAULT_TRUNCATE); + f2fs_show_injection_info(F2FS_I_SB(inode), FAULT_TRUNCATE); return -EIO; } diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index ef7686a82722..24a3b6b52210 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -54,7 +54,7 @@ static int gc_thread_func(void *data) } if (time_to_inject(sbi, FAULT_CHECKPOINT)) { - f2fs_show_injection_info(FAULT_CHECKPOINT); + f2fs_show_injection_info(sbi, FAULT_CHECKPOINT); f2fs_stop_checkpoint(sbi, false); } diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index 386ad54c13c3..502bd491336a 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -681,7 +681,7 @@ retry: err = f2fs_truncate(inode); if (time_to_inject(sbi, FAULT_EVICT_INODE)) { - f2fs_show_injection_info(FAULT_EVICT_INODE); + f2fs_show_injection_info(sbi, FAULT_EVICT_INODE); err = -EIO; } diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 06fd6d77d34b..3314a0f3405e 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -2398,7 +2398,7 @@ bool f2fs_alloc_nid(struct f2fs_sb_info *sbi, nid_t *nid) struct free_nid *i = NULL; retry: if (time_to_inject(sbi, FAULT_ALLOC_NID)) { - f2fs_show_injection_info(FAULT_ALLOC_NID); + f2fs_show_injection_info(sbi, FAULT_ALLOC_NID); return false; } diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 8bb37f8a1845..43daa9431160 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -480,7 +480,7 @@ int f2fs_commit_inmem_pages(struct inode *inode) void f2fs_balance_fs(struct f2fs_sb_info *sbi, bool need) { if (time_to_inject(sbi, FAULT_CHECKPOINT)) { - f2fs_show_injection_info(FAULT_CHECKPOINT); + f2fs_show_injection_info(sbi, FAULT_CHECKPOINT); f2fs_stop_checkpoint(sbi, false); } @@ -1008,8 +1008,9 @@ static void __remove_discard_cmd(struct f2fs_sb_info *sbi, if (dc->error) printk_ratelimited( - "%sF2FS-fs: Issue discard(%u, %u, %u) failed, ret: %d", - KERN_INFO, dc->lstart, dc->start, dc->len, dc->error); + "%sF2FS-fs (%s): Issue discard(%u, %u, %u) failed, ret: %d", + KERN_INFO, sbi->sb->s_id, + dc->lstart, dc->start, dc->len, dc->error); __detach_discard_cmd(dcc, dc); } @@ -1149,7 +1150,7 @@ static int __submit_discard_cmd(struct f2fs_sb_info *sbi, dc->len += len; if (time_to_inject(sbi, FAULT_DISCARD)) { - f2fs_show_injection_info(FAULT_DISCARD); + f2fs_show_injection_info(sbi, FAULT_DISCARD); err = -EIO; goto submit; } -- cgit v1.2.3-58-ga151 From 677017d196ba2a4cfff13626b951cc9a206b8c7c Mon Sep 17 00:00:00 2001 From: Sahitya Tummala Date: Wed, 13 Nov 2019 16:01:03 +0530 Subject: f2fs: Fix deadlock in f2fs_gc() context during atomic files handling The FS got stuck in the below stack when the storage is almost full/dirty condition (when FG_GC is being done). schedule_timeout io_schedule_timeout congestion_wait f2fs_drop_inmem_pages_all f2fs_gc f2fs_balance_fs __write_node_page f2fs_fsync_node_pages f2fs_do_sync_file f2fs_ioctl The root cause for this issue is there is a potential infinite loop in f2fs_drop_inmem_pages_all() for the case where gc_failure is true and when there an inode whose i_gc_failures[GC_FAILURE_ATOMIC] is not set. Fix this by keeping track of the total atomic files currently opened and using that to exit from this condition. Fix-suggested-by: Chao Yu Signed-off-by: Chao Yu Signed-off-by: Sahitya Tummala Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 1 + fs/f2fs/file.c | 1 + fs/f2fs/segment.c | 21 +++++++++++++++------ 3 files changed, 17 insertions(+), 6 deletions(-) (limited to 'fs') diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 3f6204202788..5a888a063c7f 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1297,6 +1297,7 @@ struct f2fs_sb_info { unsigned int gc_mode; /* current GC state */ unsigned int next_victim_seg[2]; /* next segment in victim section */ /* for skip statistic */ + unsigned int atomic_files; /* # of opened atomic file */ unsigned long long skipped_atomic_files[2]; /* FG_GC and BG_GC */ unsigned long long skipped_gc_rwsem; /* FG_GC only */ diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index f9f3a417a0cd..c0560d62dbee 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -1922,6 +1922,7 @@ static int f2fs_ioc_start_atomic_write(struct file *filp) spin_lock(&sbi->inode_lock[ATOMIC_FILE]); if (list_empty(&fi->inmem_ilist)) list_add_tail(&fi->inmem_ilist, &sbi->inode_list[ATOMIC_FILE]); + sbi->atomic_files++; spin_unlock(&sbi->inode_lock[ATOMIC_FILE]); /* add inode in inmem_list first and set atomic_file */ diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 43daa9431160..fa32ce92ed65 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -288,6 +288,8 @@ void f2fs_drop_inmem_pages_all(struct f2fs_sb_info *sbi, bool gc_failure) struct list_head *head = &sbi->inode_list[ATOMIC_FILE]; struct inode *inode; struct f2fs_inode_info *fi; + unsigned int count = sbi->atomic_files; + unsigned int looped = 0; next: spin_lock(&sbi->inode_lock[ATOMIC_FILE]); if (list_empty(head)) { @@ -296,22 +298,26 @@ next: } fi = list_first_entry(head, struct f2fs_inode_info, inmem_ilist); inode = igrab(&fi->vfs_inode); + if (inode) + list_move_tail(&fi->inmem_ilist, head); spin_unlock(&sbi->inode_lock[ATOMIC_FILE]); if (inode) { if (gc_failure) { - if (fi->i_gc_failures[GC_FAILURE_ATOMIC]) - goto drop; - goto skip; + if (!fi->i_gc_failures[GC_FAILURE_ATOMIC]) + goto skip; } -drop: set_inode_flag(inode, FI_ATOMIC_REVOKE_REQUEST); f2fs_drop_inmem_pages(inode); +skip: iput(inode); } -skip: congestion_wait(BLK_RW_ASYNC, HZ/50); cond_resched(); + if (gc_failure) { + if (++looped >= count) + return; + } goto next; } @@ -327,13 +333,16 @@ void f2fs_drop_inmem_pages(struct inode *inode) mutex_unlock(&fi->inmem_lock); } - clear_inode_flag(inode, FI_ATOMIC_FILE); fi->i_gc_failures[GC_FAILURE_ATOMIC] = 0; stat_dec_atomic_write(inode); spin_lock(&sbi->inode_lock[ATOMIC_FILE]); if (!list_empty(&fi->inmem_ilist)) list_del_init(&fi->inmem_ilist); + if (f2fs_is_atomic_file(inode)) { + clear_inode_flag(inode, FI_ATOMIC_FILE); + sbi->atomic_files--; + } spin_unlock(&sbi->inode_lock[ATOMIC_FILE]); } -- cgit v1.2.3-58-ga151 From 909110c060f22e65756659ec6fa957ae75777e00 Mon Sep 17 00:00:00 2001 From: Chengguang Xu Date: Mon, 25 Nov 2019 11:20:36 +0800 Subject: f2fs: choose hardlimit when softlimit is larger than hardlimit in f2fs_statfs_project() Setting softlimit larger than hardlimit seems meaningless for disk quota but currently it is allowed. In this case, there may be a bit of comfusion for users when they run df comamnd to directory which has project quota. For example, we set 20M softlimit and 10M hardlimit of block usage limit for project quota of test_dir(project id 123). [root@hades f2fs]# repquota -P -a *** Report for project quotas on device /dev/nvme0n1p8 Block grace time: 7days; Inode grace time: 7days Block limits File limits Project used soft hard grace used soft hard grace ---------------------------------------------------------------------- 0 -- 4 0 0 1 0 0 123 +- 10248 20480 10240 2 0 0 The result of df command as below: [root@hades f2fs]# df -h /mnt/f2fs/test Filesystem Size Used Avail Use% Mounted on /dev/nvme0n1p8 20M 11M 10M 51% /mnt/f2fs Even though it looks like there is another 10M free space to use, if we write new data to diretory test(inherit project id), the write will fail with errno(-EDQUOT). After this patch, the df result looks like below. [root@hades f2fs]# df -h /mnt/f2fs/test Filesystem Size Used Avail Use% Mounted on /dev/nvme0n1p8 10M 10M 0 100% /mnt/f2fs Signed-off-by: Chengguang Xu Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/super.c | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) (limited to 'fs') diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index c02a47ce551b..75a3b7220ecb 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -1213,9 +1213,13 @@ static int f2fs_statfs_project(struct super_block *sb, return PTR_ERR(dquot); spin_lock(&dquot->dq_dqb_lock); - limit = (dquot->dq_dqb.dqb_bsoftlimit ? - dquot->dq_dqb.dqb_bsoftlimit : - dquot->dq_dqb.dqb_bhardlimit) >> sb->s_blocksize_bits; + limit = 0; + if (dquot->dq_dqb.dqb_bsoftlimit) + limit = dquot->dq_dqb.dqb_bsoftlimit; + if (dquot->dq_dqb.dqb_bhardlimit && + (!limit || dquot->dq_dqb.dqb_bhardlimit < limit)) + limit = dquot->dq_dqb.dqb_bhardlimit; + if (limit && buf->f_blocks > limit) { curblock = dquot->dq_dqb.dqb_curspace >> sb->s_blocksize_bits; buf->f_blocks = limit; @@ -1224,9 +1228,13 @@ static int f2fs_statfs_project(struct super_block *sb, (buf->f_blocks - curblock) : 0; } - limit = dquot->dq_dqb.dqb_isoftlimit ? - dquot->dq_dqb.dqb_isoftlimit : - dquot->dq_dqb.dqb_ihardlimit; + limit = 0; + if (dquot->dq_dqb.dqb_isoftlimit) + limit = dquot->dq_dqb.dqb_isoftlimit; + if (dquot->dq_dqb.dqb_ihardlimit && + (!limit || dquot->dq_dqb.dqb_ihardlimit < limit)) + limit = dquot->dq_dqb.dqb_ihardlimit; + if (limit && buf->f_files > limit) { buf->f_files = limit; buf->f_ffree = -- cgit v1.2.3-58-ga151 From a4db59ac9058207cea77df7d7893aab5c3009f87 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Fri, 22 Nov 2019 11:53:10 -0800 Subject: f2fs: expose main_blkaddr in sysfs Expose in /sys/fs/f2fs//main_blkaddr the block address where the main area starts. This allows user mode programs to determine: - That pinned files that are made exclusively of fully allocated 2MB segments will never be unpinned by the file system. - Where the main area starts. This is required by programs that want to verify if a file is made exclusively of 2MB f2fs segments, the alignment boundary for segments starts at this address. Testing for 2MB alignment relative to the start of the device is incorrect, because for some filesystems main_blkaddr is not at a 2MB boundary relative to the start of the device. The entry will be used when validating reliable pinning file feature proposed by "f2fs: support aligned pinned file". Signed-off-by: Ramon Pantin Signed-off-by: Jaegeuk Kim --- Documentation/ABI/testing/sysfs-fs-f2fs | 6 ++++++ Documentation/filesystems/f2fs.txt | 3 +++ fs/f2fs/sysfs.c | 2 ++ 3 files changed, 11 insertions(+) (limited to 'fs') diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs b/Documentation/ABI/testing/sysfs-fs-f2fs index 7ab2b1b5e255..aedeae1e8ec1 100644 --- a/Documentation/ABI/testing/sysfs-fs-f2fs +++ b/Documentation/ABI/testing/sysfs-fs-f2fs @@ -31,6 +31,12 @@ Contact: "Jaegeuk Kim" Description: Controls the issue rate of segment discard commands. +What: /sys/fs/f2fs//max_blkaddr +Date: November 2019 +Contact: "Ramon Pantin" +Description: + Shows first block address of MAIN area. + What: /sys/fs/f2fs//ipu_policy Date: November 2013 Contact: "Jaegeuk Kim" diff --git a/Documentation/filesystems/f2fs.txt b/Documentation/filesystems/f2fs.txt index 29020af0cff9..3135b80df6da 100644 --- a/Documentation/filesystems/f2fs.txt +++ b/Documentation/filesystems/f2fs.txt @@ -297,6 +297,9 @@ Files in /sys/fs/f2fs/ reclaim the prefree segments to free segments. By default, 5% over total # of segments. + main_blkaddr This value gives the first block address of + MAIN area in the partition. + max_small_discards This parameter controls the number of discard commands that consist small blocks less than 2MB. The candidates to be discarded are cached until diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c index f164959e4224..70945ceb9c0c 100644 --- a/fs/f2fs/sysfs.c +++ b/fs/f2fs/sysfs.c @@ -445,6 +445,7 @@ F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_no_gc_sleep_time, no_gc_sleep_time); F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, gc_idle, gc_mode); F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, gc_urgent, gc_mode); F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, reclaim_segments, rec_prefree_segments); +F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, main_blkaddr, main_blkaddr); F2FS_RW_ATTR(DCC_INFO, discard_cmd_control, max_small_discards, max_discards); F2FS_RW_ATTR(DCC_INFO, discard_cmd_control, discard_granularity, discard_granularity); F2FS_RW_ATTR(RESERVED_BLOCKS, f2fs_sb_info, reserved_blocks, reserved_blocks); @@ -512,6 +513,7 @@ static struct attribute *f2fs_attrs[] = { ATTR_LIST(gc_idle), ATTR_LIST(gc_urgent), ATTR_LIST(reclaim_segments), + ATTR_LIST(main_blkaddr), ATTR_LIST(max_small_discards), ATTR_LIST(discard_granularity), ATTR_LIST(batched_trim_sections), -- cgit v1.2.3-58-ga151 From 803e74be04b32f7785742dcabfc62116718fbb06 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Fri, 22 Nov 2019 12:02:06 -0800 Subject: f2fs: stop GC when the victim becomes fully valid We must stop GC, once the segment becomes fully valid. Otherwise, it can produce another dirty segments by moving valid blocks in the segment partially. Ramon hit no free segment panic sometimes and saw this case happens when validating reliable file pinning feature. Signed-off-by: Ramon Pantin Signed-off-by: Jaegeuk Kim --- fs/f2fs/gc.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 24a3b6b52210..b3d399623290 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -1012,8 +1012,14 @@ next_step: block_t start_bidx; nid_t nid = le32_to_cpu(entry->nid); - /* stop BG_GC if there is not enough free sections. */ - if (gc_type == BG_GC && has_not_enough_free_secs(sbi, 0, 0)) + /* + * stop BG_GC if there is not enough free sections. + * Or, stop GC if the segment becomes fully valid caused by + * race condition along with SSR block allocation. + */ + if ((gc_type == BG_GC && has_not_enough_free_secs(sbi, 0, 0)) || + get_valid_blocks(sbi, segno, false) == + sbi->blocks_per_seg) return submitted; if (check_valid_map(sbi, segno, off) == 0) -- cgit v1.2.3-58-ga151