diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2022-12-14 15:27:57 -0800 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2022-12-14 15:27:57 -0800 |
commit | 041fae9c105ae342a4245cf1e0dc56a23fbb9d3c (patch) | |
tree | 473a16aff587fa6c7d44d85ab5b02542566dadaa /fs/f2fs | |
parent | eb67d239f3aa1711afb0a42eab50459d9f3d672e (diff) | |
parent | 26a8057a1ada97b528b93fdf3ac4fd03170f1900 (diff) |
Merge tag 'f2fs-for-6.2-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs
Pull f2fs updates from Jaegeuk Kim:
"In this round, we've added two features: F2FS_IOC_START_ATOMIC_REPLACE
and a per-block age-based extent cache.
F2FS_IOC_START_ATOMIC_REPLACE is a variant of the previous atomic
write feature which guarantees a per-file atomicity. It would be more
efficient than AtomicFile implementation in Android framework.
The per-block age-based extent cache implements another type of extent
cache in memory which keeps the per-block age in a file, so that block
allocator could split the hot and cold data blocks more accurately.
Enhancements:
- introduce F2FS_IOC_START_ATOMIC_REPLACE
- refactor extent_cache to add a new per-block-age-based extent cache support
- introduce discard_urgent_util, gc_mode, max_ordered_discard sysfs knobs
- add proc entry to show discard_plist info
- optimize iteration over sparse directories
- add barrier mount option
Bug fixes:
- avoid victim selection from previous victim section
- fix to enable compress for newly created file if extension matches
- set zstd compress level correctly
- initialize locks early in f2fs_fill_super() to fix bugs reported by syzbot
- correct i_size change for atomic writes
- allow to read node block after shutdown
- allow to set compression for inlined file
- fix gc mode when gc_urgent_high_remaining is 1
- should put a page when checking the summary info
Minor fixes and various clean-ups in GC, discard, debugfs, sysfs, and
doc"
* tag 'f2fs-for-6.2-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs: (63 commits)
f2fs: reset wait_ms to default if any of the victims have been selected
f2fs: fix some format WARNING in debug.c and sysfs.c
f2fs: don't call f2fs_issue_discard_timeout() when discard_cmd_cnt is 0 in f2fs_put_super()
f2fs: fix iostat parameter for discard
f2fs: Fix spelling mistake in label: free_bio_enrty_cache -> free_bio_entry_cache
f2fs: add block_age-based extent cache
f2fs: allocate the extent_cache by default
f2fs: refactor extent_cache to support for read and more
f2fs: remove unnecessary __init_extent_tree
f2fs: move internal functions into extent_cache.c
f2fs: specify extent cache for read explicitly
f2fs: introduce f2fs_is_readonly() for readability
f2fs: remove F2FS_SET_FEATURE() and F2FS_CLEAR_FEATURE() macro
f2fs: do some cleanup for f2fs module init
MAINTAINERS: Add f2fs bug tracker link
f2fs: remove the unused flush argument to change_curseg
f2fs: open code allocate_segment_by_default
f2fs: remove struct segment_allocation default_salloc_ops
f2fs: introduce discard_urgent_util sysfs node
f2fs: define MIN_DISCARD_GRANULARITY macro
...
Diffstat (limited to 'fs/f2fs')
-rw-r--r-- | fs/f2fs/checkpoint.c | 9 | ||||
-rw-r--r-- | fs/f2fs/compress.c | 48 | ||||
-rw-r--r-- | fs/f2fs/data.c | 54 | ||||
-rw-r--r-- | fs/f2fs/debug.c | 131 | ||||
-rw-r--r-- | fs/f2fs/dir.c | 36 | ||||
-rw-r--r-- | fs/f2fs/extent_cache.c | 693 | ||||
-rw-r--r-- | fs/f2fs/f2fs.h | 278 | ||||
-rw-r--r-- | fs/f2fs/file.c | 46 | ||||
-rw-r--r-- | fs/f2fs/gc.c | 79 | ||||
-rw-r--r-- | fs/f2fs/inode.c | 20 | ||||
-rw-r--r-- | fs/f2fs/namei.c | 391 | ||||
-rw-r--r-- | fs/f2fs/node.c | 19 | ||||
-rw-r--r-- | fs/f2fs/node.h | 3 | ||||
-rw-r--r-- | fs/f2fs/recovery.c | 4 | ||||
-rw-r--r-- | fs/f2fs/segment.c | 201 | ||||
-rw-r--r-- | fs/f2fs/segment.h | 6 | ||||
-rw-r--r-- | fs/f2fs/shrinker.c | 25 | ||||
-rw-r--r-- | fs/f2fs/super.c | 126 | ||||
-rw-r--r-- | fs/f2fs/sysfs.c | 164 |
19 files changed, 1453 insertions, 880 deletions
diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 0c82dae082aa..56f7d0d6a8b2 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -171,6 +171,11 @@ static bool __is_bitmap_valid(struct f2fs_sb_info *sbi, block_t blkaddr, bool f2fs_is_valid_blkaddr(struct f2fs_sb_info *sbi, block_t blkaddr, int type) { + if (time_to_inject(sbi, FAULT_BLKADDR)) { + f2fs_show_injection_info(sbi, FAULT_BLKADDR); + return false; + } + switch (type) { case META_NAT: break; @@ -1897,8 +1902,10 @@ int f2fs_start_ckpt_thread(struct f2fs_sb_info *sbi) cprc->f2fs_issue_ckpt = kthread_run(issue_checkpoint_thread, sbi, "f2fs_ckpt-%u:%u", MAJOR(dev), MINOR(dev)); if (IS_ERR(cprc->f2fs_issue_ckpt)) { + int err = PTR_ERR(cprc->f2fs_issue_ckpt); + cprc->f2fs_issue_ckpt = NULL; - return -ENOMEM; + return err; } set_task_ioprio(cprc->f2fs_issue_ckpt, cprc->ckpt_thread_ioprio); diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c index 2b7a5cc4ed66..2532f369cb10 100644 --- a/fs/f2fs/compress.c +++ b/fs/f2fs/compress.c @@ -346,7 +346,7 @@ static int zstd_init_compress_ctx(struct compress_ctx *cc) if (!level) level = F2FS_ZSTD_DEFAULT_CLEVEL; - params = zstd_get_params(F2FS_ZSTD_DEFAULT_CLEVEL, cc->rlen); + params = zstd_get_params(level, cc->rlen); workspace_size = zstd_cstream_workspace_bound(¶ms.cParams); workspace = f2fs_kvmalloc(F2FS_I_SB(cc->inode), @@ -567,10 +567,7 @@ MODULE_PARM_DESC(num_compress_pages, int f2fs_init_compress_mempool(void) { compress_page_pool = mempool_create_page_pool(num_compress_pages, 0); - if (!compress_page_pool) - return -ENOMEM; - - return 0; + return compress_page_pool ? 0 : -ENOMEM; } void f2fs_destroy_compress_mempool(void) @@ -1981,9 +1978,7 @@ int f2fs_init_page_array_cache(struct f2fs_sb_info *sbi) sbi->page_array_slab = f2fs_kmem_cache_create(slab_name, sbi->page_array_slab_size); - if (!sbi->page_array_slab) - return -ENOMEM; - return 0; + return sbi->page_array_slab ? 0 : -ENOMEM; } void f2fs_destroy_page_array_cache(struct f2fs_sb_info *sbi) @@ -1991,53 +1986,24 @@ void f2fs_destroy_page_array_cache(struct f2fs_sb_info *sbi) kmem_cache_destroy(sbi->page_array_slab); } -static int __init f2fs_init_cic_cache(void) +int __init f2fs_init_compress_cache(void) { cic_entry_slab = f2fs_kmem_cache_create("f2fs_cic_entry", sizeof(struct compress_io_ctx)); if (!cic_entry_slab) return -ENOMEM; - return 0; -} - -static void f2fs_destroy_cic_cache(void) -{ - kmem_cache_destroy(cic_entry_slab); -} - -static int __init f2fs_init_dic_cache(void) -{ dic_entry_slab = f2fs_kmem_cache_create("f2fs_dic_entry", sizeof(struct decompress_io_ctx)); if (!dic_entry_slab) - return -ENOMEM; - return 0; -} - -static void f2fs_destroy_dic_cache(void) -{ - kmem_cache_destroy(dic_entry_slab); -} - -int __init f2fs_init_compress_cache(void) -{ - int err; - - err = f2fs_init_cic_cache(); - if (err) - goto out; - err = f2fs_init_dic_cache(); - if (err) goto free_cic; return 0; free_cic: - f2fs_destroy_cic_cache(); -out: + kmem_cache_destroy(cic_entry_slab); return -ENOMEM; } void f2fs_destroy_compress_cache(void) { - f2fs_destroy_dic_cache(); - f2fs_destroy_cic_cache(); + kmem_cache_destroy(dic_entry_slab); + kmem_cache_destroy(cic_entry_slab); } diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 7af75041bd81..6e43e19c7d1c 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -39,10 +39,8 @@ static struct bio_set f2fs_bioset; int __init f2fs_init_bioset(void) { - if (bioset_init(&f2fs_bioset, F2FS_BIO_POOL_SIZE, - 0, BIOSET_NEED_BVECS)) - return -ENOMEM; - return 0; + return bioset_init(&f2fs_bioset, F2FS_BIO_POOL_SIZE, + 0, BIOSET_NEED_BVECS); } void f2fs_destroy_bioset(void) @@ -1145,7 +1143,7 @@ void f2fs_update_data_blkaddr(struct dnode_of_data *dn, block_t blkaddr) { dn->data_blkaddr = blkaddr; f2fs_set_data_blkaddr(dn); - f2fs_update_extent_cache(dn); + f2fs_update_read_extent_cache(dn); } /* dn->ofs_in_node will be returned with up-to-date last block pointer */ @@ -1214,7 +1212,7 @@ int f2fs_get_block(struct dnode_of_data *dn, pgoff_t index) struct extent_info ei = {0, }; struct inode *inode = dn->inode; - if (f2fs_lookup_extent_cache(inode, index, &ei)) { + if (f2fs_lookup_read_extent_cache(inode, index, &ei)) { dn->data_blkaddr = ei.blk + index - ei.fofs; return 0; } @@ -1223,7 +1221,8 @@ int f2fs_get_block(struct dnode_of_data *dn, pgoff_t index) } struct page *f2fs_get_read_data_page(struct inode *inode, pgoff_t index, - blk_opf_t op_flags, bool for_write) + blk_opf_t op_flags, bool for_write, + pgoff_t *next_pgofs) { struct address_space *mapping = inode->i_mapping; struct dnode_of_data dn; @@ -1235,7 +1234,7 @@ struct page *f2fs_get_read_data_page(struct inode *inode, pgoff_t index, if (!page) return ERR_PTR(-ENOMEM); - if (f2fs_lookup_extent_cache(inode, index, &ei)) { + if (f2fs_lookup_read_extent_cache(inode, index, &ei)) { dn.data_blkaddr = ei.blk + index - ei.fofs; if (!f2fs_is_valid_blkaddr(F2FS_I_SB(inode), dn.data_blkaddr, DATA_GENERIC_ENHANCE_READ)) { @@ -1249,12 +1248,17 @@ struct page *f2fs_get_read_data_page(struct inode *inode, pgoff_t index, set_new_dnode(&dn, inode, NULL, NULL, 0); err = f2fs_get_dnode_of_data(&dn, index, LOOKUP_NODE); - if (err) + if (err) { + if (err == -ENOENT && next_pgofs) + *next_pgofs = f2fs_get_next_page_offset(&dn, index); goto put_err; + } f2fs_put_dnode(&dn); if (unlikely(dn.data_blkaddr == NULL_ADDR)) { err = -ENOENT; + if (next_pgofs) + *next_pgofs = index + 1; goto put_err; } if (dn.data_blkaddr != NEW_ADDR && @@ -1298,7 +1302,8 @@ put_err: return ERR_PTR(err); } -struct page *f2fs_find_data_page(struct inode *inode, pgoff_t index) +struct page *f2fs_find_data_page(struct inode *inode, pgoff_t index, + pgoff_t *next_pgofs) { struct address_space *mapping = inode->i_mapping; struct page *page; @@ -1308,7 +1313,7 @@ struct page *f2fs_find_data_page(struct inode *inode, pgoff_t index) return page; f2fs_put_page(page, 0); - page = f2fs_get_read_data_page(inode, index, 0, false); + page = f2fs_get_read_data_page(inode, index, 0, false, next_pgofs); if (IS_ERR(page)) return page; @@ -1334,7 +1339,7 @@ struct page *f2fs_get_lock_data_page(struct inode *inode, pgoff_t index, struct address_space *mapping = inode->i_mapping; struct page *page; repeat: - page = f2fs_get_read_data_page(inode, index, 0, for_write); + page = f2fs_get_read_data_page(inode, index, 0, for_write, NULL); if (IS_ERR(page)) return page; @@ -1497,7 +1502,7 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map, pgofs = (pgoff_t)map->m_lblk; end = pgofs + maxblocks; - if (!create && f2fs_lookup_extent_cache(inode, pgofs, &ei)) { + if (!create && f2fs_lookup_read_extent_cache(inode, pgofs, &ei)) { if (f2fs_lfs_mode(sbi) && flag == F2FS_GET_BLOCK_DIO && map->m_may_create) goto next_dnode; @@ -1707,7 +1712,7 @@ skip: if (map->m_flags & F2FS_MAP_MAPPED) { unsigned int ofs = start_pgofs - map->m_lblk; - f2fs_update_extent_cache_range(&dn, + f2fs_update_read_extent_cache_range(&dn, start_pgofs, map->m_pblk + ofs, map->m_len - ofs); } @@ -1752,7 +1757,7 @@ sync_out: if (map->m_flags & F2FS_MAP_MAPPED) { unsigned int ofs = start_pgofs - map->m_lblk; - f2fs_update_extent_cache_range(&dn, + f2fs_update_read_extent_cache_range(&dn, start_pgofs, map->m_pblk + ofs, map->m_len - ofs); } @@ -2212,7 +2217,7 @@ int f2fs_read_multi_pages(struct compress_ctx *cc, struct bio **bio_ret, if (f2fs_cluster_is_empty(cc)) goto out; - if (f2fs_lookup_extent_cache(inode, start_idx, &ei)) + if (f2fs_lookup_read_extent_cache(inode, start_idx, &ei)) from_dnode = false; if (!from_dnode) @@ -2643,7 +2648,7 @@ int f2fs_do_write_data_page(struct f2fs_io_info *fio) set_new_dnode(&dn, inode, NULL, NULL, 0); if (need_inplace_update(fio) && - f2fs_lookup_extent_cache(inode, page->index, &ei)) { + f2fs_lookup_read_extent_cache(inode, page->index, &ei)) { fio->old_blkaddr = ei.blk + page->index - ei.fofs; if (!f2fs_is_valid_blkaddr(fio->sbi, fio->old_blkaddr, @@ -3367,7 +3372,7 @@ restart: } else if (locked) { err = f2fs_get_block(&dn, index); } else { - if (f2fs_lookup_extent_cache(inode, index, &ei)) { + if (f2fs_lookup_read_extent_cache(inode, index, &ei)) { dn.data_blkaddr = ei.blk + index - ei.fofs; } else { /* hole case */ @@ -3408,7 +3413,7 @@ static int __find_data_block(struct inode *inode, pgoff_t index, set_new_dnode(&dn, inode, ipage, ipage, 0); - if (f2fs_lookup_extent_cache(inode, index, &ei)) { + if (f2fs_lookup_read_extent_cache(inode, index, &ei)) { dn.data_blkaddr = ei.blk + index - ei.fofs; } else { /* hole case */ @@ -3472,6 +3477,9 @@ static int prepare_atomic_write_begin(struct f2fs_sb_info *sbi, else if (*blk_addr != NULL_ADDR) return 0; + if (is_inode_flag_set(inode, FI_ATOMIC_REPLACE)) + goto reserve_block; + /* Look for the block in the original inode */ err = __find_data_block(inode, index, &ori_blk_addr); if (err) @@ -4093,9 +4101,7 @@ int f2fs_init_post_read_wq(struct f2fs_sb_info *sbi) sbi->post_read_wq = alloc_workqueue("f2fs_post_read_wq", WQ_UNBOUND | WQ_HIGHPRI, num_online_cpus()); - if (!sbi->post_read_wq) - return -ENOMEM; - return 0; + return sbi->post_read_wq ? 0 : -ENOMEM; } void f2fs_destroy_post_read_wq(struct f2fs_sb_info *sbi) @@ -4108,9 +4114,7 @@ int __init f2fs_init_bio_entry_cache(void) { bio_entry_slab = f2fs_kmem_cache_create("f2fs_bio_entry_slab", sizeof(struct bio_entry)); - if (!bio_entry_slab) - return -ENOMEM; - return 0; + return bio_entry_slab ? 0 : -ENOMEM; } void f2fs_destroy_bio_entry_cache(void) diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c index a216dcdf6941..32af4f0c5735 100644 --- a/fs/f2fs/debug.c +++ b/fs/f2fs/debug.c @@ -72,15 +72,26 @@ static void update_general_status(struct f2fs_sb_info *sbi) si->main_area_zones = si->main_area_sections / le32_to_cpu(raw_super->secs_per_zone); - /* validation check of the segment numbers */ + /* general extent cache stats */ + for (i = 0; i < NR_EXTENT_CACHES; i++) { + struct extent_tree_info *eti = &sbi->extent_tree[i]; + + si->hit_cached[i] = atomic64_read(&sbi->read_hit_cached[i]); + si->hit_rbtree[i] = atomic64_read(&sbi->read_hit_rbtree[i]); + si->total_ext[i] = atomic64_read(&sbi->total_hit_ext[i]); + si->hit_total[i] = si->hit_cached[i] + si->hit_rbtree[i]; + si->ext_tree[i] = atomic_read(&eti->total_ext_tree); + si->zombie_tree[i] = atomic_read(&eti->total_zombie_tree); + si->ext_node[i] = atomic_read(&eti->total_ext_node); + } + /* read extent_cache only */ si->hit_largest = atomic64_read(&sbi->read_hit_largest); - si->hit_cached = atomic64_read(&sbi->read_hit_cached); - si->hit_rbtree = atomic64_read(&sbi->read_hit_rbtree); - si->hit_total = si->hit_largest + si->hit_cached + si->hit_rbtree; - si->total_ext = atomic64_read(&sbi->total_hit_ext); - si->ext_tree = atomic_read(&sbi->total_ext_tree); - si->zombie_tree = atomic_read(&sbi->total_zombie_tree); - si->ext_node = atomic_read(&sbi->total_ext_node); + si->hit_total[EX_READ] += si->hit_largest; + + /* block age extent_cache only */ + si->allocated_data_blocks = atomic64_read(&sbi->allocated_data_blocks); + + /* validation check of the segment numbers */ si->ndirty_node = get_pages(sbi, F2FS_DIRTY_NODES); si->ndirty_dent = get_pages(sbi, F2FS_DIRTY_DENTS); si->ndirty_meta = get_pages(sbi, F2FS_DIRTY_META); @@ -294,25 +305,32 @@ get_cache: sizeof(struct nat_entry_set); for (i = 0; i < MAX_INO_ENTRY; i++) si->cache_mem += sbi->im[i].ino_num * sizeof(struct ino_entry); - si->cache_mem += atomic_read(&sbi->total_ext_tree) * + + for (i = 0; i < NR_EXTENT_CACHES; i++) { + struct extent_tree_info *eti = &sbi->extent_tree[i]; + + si->ext_mem[i] = atomic_read(&eti->total_ext_tree) * sizeof(struct extent_tree); - si->cache_mem += atomic_read(&sbi->total_ext_node) * + si->ext_mem[i] += atomic_read(&eti->total_ext_node) * sizeof(struct extent_node); + si->cache_mem += si->ext_mem[i]; + } si->page_mem = 0; if (sbi->node_inode) { - unsigned npages = NODE_MAPPING(sbi)->nrpages; + unsigned long npages = NODE_MAPPING(sbi)->nrpages; si->page_mem += (unsigned long long)npages << PAGE_SHIFT; } if (sbi->meta_inode) { - unsigned npages = META_MAPPING(sbi)->nrpages; + unsigned long npages = META_MAPPING(sbi)->nrpages; si->page_mem += (unsigned long long)npages << PAGE_SHIFT; } #ifdef CONFIG_F2FS_FS_COMPRESSION if (sbi->compress_inode) { - unsigned npages = COMPRESS_MAPPING(sbi)->nrpages; + unsigned long npages = COMPRESS_MAPPING(sbi)->nrpages; + si->page_mem += (unsigned long long)npages << PAGE_SHIFT; } #endif @@ -460,28 +478,28 @@ static int stat_show(struct seq_file *s, void *v) si->meta_count[META_NAT]); seq_printf(s, " - ssa blocks : %u\n", si->meta_count[META_SSA]); - seq_printf(s, "CP merge (Queued: %4d, Issued: %4d, Total: %4d, " - "Cur time: %4d(ms), Peak time: %4d(ms))\n", - si->nr_queued_ckpt, si->nr_issued_ckpt, - si->nr_total_ckpt, si->cur_ckpt_time, - si->peak_ckpt_time); + seq_puts(s, "CP merge:\n"); + seq_printf(s, " - Queued : %4d\n", si->nr_queued_ckpt); + seq_printf(s, " - Issued : %4d\n", si->nr_issued_ckpt); + seq_printf(s, " - Total : %4d\n", si->nr_total_ckpt); + seq_printf(s, " - Cur time : %4d(ms)\n", si->cur_ckpt_time); + seq_printf(s, " - Peak time : %4d(ms)\n", si->peak_ckpt_time); seq_printf(s, "GC calls: %d (BG: %d)\n", si->call_count, si->bg_gc); seq_printf(s, " - data segments : %d (%d)\n", si->data_segs, si->bg_data_segs); seq_printf(s, " - node segments : %d (%d)\n", si->node_segs, si->bg_node_segs); - seq_printf(s, " - Reclaimed segs : Normal (%d), Idle CB (%d), " - "Idle Greedy (%d), Idle AT (%d), " - "Urgent High (%d), Urgent Mid (%d), " - "Urgent Low (%d)\n", - si->sbi->gc_reclaimed_segs[GC_NORMAL], - si->sbi->gc_reclaimed_segs[GC_IDLE_CB], - si->sbi->gc_reclaimed_segs[GC_IDLE_GREEDY], - si->sbi->gc_reclaimed_segs[GC_IDLE_AT], - si->sbi->gc_reclaimed_segs[GC_URGENT_HIGH], - si->sbi->gc_reclaimed_segs[GC_URGENT_MID], - si->sbi->gc_reclaimed_segs[GC_URGENT_LOW]); + seq_puts(s, " - Reclaimed segs :\n"); + seq_printf(s, " - Normal : %d\n", si->sbi->gc_reclaimed_segs[GC_NORMAL]); + seq_printf(s, " - Idle CB : %d\n", si->sbi->gc_reclaimed_segs[GC_IDLE_CB]); + seq_printf(s, " - Idle Greedy : %d\n", + si->sbi->gc_reclaimed_segs[GC_IDLE_GREEDY]); + seq_printf(s, " - Idle AT : %d\n", si->sbi->gc_reclaimed_segs[GC_IDLE_AT]); + seq_printf(s, " - Urgent High : %d\n", + si->sbi->gc_reclaimed_segs[GC_URGENT_HIGH]); + seq_printf(s, " - Urgent Mid : %d\n", si->sbi->gc_reclaimed_segs[GC_URGENT_MID]); + seq_printf(s, " - Urgent Low : %d\n", si->sbi->gc_reclaimed_segs[GC_URGENT_LOW]); seq_printf(s, "Try to move %d blocks (BG: %d)\n", si->tot_blks, si->bg_data_blks + si->bg_node_blks); seq_printf(s, " - data blocks : %d (%d)\n", si->data_blks, @@ -490,26 +508,44 @@ static int stat_show(struct seq_file *s, void *v) si->bg_node_blks); seq_printf(s, "BG skip : IO: %u, Other: %u\n", si->io_skip_bggc, si->other_skip_bggc); - seq_puts(s, "\nExtent Cache:\n"); + seq_puts(s, "\nExtent Cache (Read):\n"); seq_printf(s, " - Hit Count: L1-1:%llu L1-2:%llu L2:%llu\n", - si->hit_largest, si->hit_cached, - si->hit_rbtree); + si->hit_largest, si->hit_cached[EX_READ], + si->hit_rbtree[EX_READ]); + seq_printf(s, " - Hit Ratio: %llu%% (%llu / %llu)\n", + !si->total_ext[EX_READ] ? 0 : + div64_u64(si->hit_total[EX_READ] * 100, + si->total_ext[EX_READ]), + si->hit_total[EX_READ], si->total_ext[EX_READ]); + seq_printf(s, " - Inner Struct Count: tree: %d(%d), node: %d\n", + si->ext_tree[EX_READ], si->zombie_tree[EX_READ], + si->ext_node[EX_READ]); + seq_puts(s, "\nExtent Cache (Block Age):\n"); + seq_printf(s, " - Allocated Data Blocks: %llu\n", + si->allocated_data_blocks); + seq_printf(s, " - Hit Count: L1:%llu L2:%llu\n", + si->hit_cached[EX_BLOCK_AGE], + si->hit_rbtree[EX_BLOCK_AGE]); seq_printf(s, " - Hit Ratio: %llu%% (%llu / %llu)\n", - !si->total_ext ? 0 : - div64_u64(si->hit_total * 100, si->total_ext), - si->hit_total, si->total_ext); + !si->total_ext[EX_BLOCK_AGE] ? 0 : + div64_u64(si->hit_total[EX_BLOCK_AGE] * 100, + si->total_ext[EX_BLOCK_AGE]), + si->hit_total[EX_BLOCK_AGE], + si->total_ext[EX_BLOCK_AGE]); seq_printf(s, " - Inner Struct Count: tree: %d(%d), node: %d\n", - si->ext_tree, si->zombie_tree, si->ext_node); + si->ext_tree[EX_BLOCK_AGE], + si->zombie_tree[EX_BLOCK_AGE], + si->ext_node[EX_BLOCK_AGE]); seq_puts(s, "\nBalancing F2FS Async:\n"); seq_printf(s, " - DIO (R: %4d, W: %4d)\n", si->nr_dio_read, si->nr_dio_write); seq_printf(s, " - IO_R (Data: %4d, Node: %4d, Meta: %4d\n", si->nr_rd_data, si->nr_rd_node, si->nr_rd_meta); - seq_printf(s, " - IO_W (CP: %4d, Data: %4d, Flush: (%4d %4d %4d), " - "Discard: (%4d %4d)) cmd: %4d undiscard:%4u\n", + seq_printf(s, " - IO_W (CP: %4d, Data: %4d, Flush: (%4d %4d %4d), ", si->nr_wb_cp_data, si->nr_wb_data, si->nr_flushing, si->nr_flushed, - si->flush_list_empty, + si->flush_list_empty); + seq_printf(s, "Discard: (%4d %4d)) cmd: %4d undiscard:%4u\n", si->nr_discarding, si->nr_discarded, si->nr_discard_cmd, si->undiscard_blks); seq_printf(s, " - atomic IO: %4d (Max. %4d)\n", @@ -566,8 +602,12 @@ static int stat_show(struct seq_file *s, void *v) (si->base_mem + si->cache_mem + si->page_mem) >> 10); seq_printf(s, " - static: %llu KB\n", si->base_mem >> 10); - seq_printf(s, " - cached: %llu KB\n", + seq_printf(s, " - cached all: %llu KB\n", si->cache_mem >> 10); + seq_printf(s, " - read extent cache: %llu KB\n", + si->ext_mem[EX_READ] >> 10); + seq_printf(s, " - block age extent cache: %llu KB\n", + si->ext_mem[EX_BLOCK_AGE] >> 10); seq_printf(s, " - paged : %llu KB\n", si->page_mem >> 10); } @@ -600,10 +640,15 @@ int f2fs_build_stats(struct f2fs_sb_info *sbi) si->sbi = sbi; sbi->stat_info = si; - atomic64_set(&sbi->total_hit_ext, 0); - atomic64_set(&sbi->read_hit_rbtree, 0); + /* general extent cache stats */ + for (i = 0; i < NR_EXTENT_CACHES; i++) { + atomic64_set(&sbi->total_hit_ext[i], 0); + atomic64_set(&sbi->read_hit_rbtree[i], 0); + atomic64_set(&sbi->read_hit_cached[i], 0); + } + + /* read extent_cache only */ atomic64_set(&sbi->read_hit_largest, 0); - atomic64_set(&sbi->read_hit_cached, 0); atomic_set(&sbi->inline_xattr, 0); atomic_set(&sbi->inline_inode, 0); diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index 21960a899b6a..8e025157f35c 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -340,6 +340,7 @@ static struct f2fs_dir_entry *find_in_level(struct inode *dir, unsigned int bidx, end_block; struct page *dentry_page; struct f2fs_dir_entry *de = NULL; + pgoff_t next_pgofs; bool room = false; int max_slots; @@ -350,12 +351,13 @@ static struct f2fs_dir_entry *find_in_level(struct inode *dir, le32_to_cpu(fname->hash) % nbucket); end_block = bidx + nblock; - for (; bidx < end_block; bidx++) { + while (bidx < end_block) { /* no need to allocate new dentry pages to all the indices */ - dentry_page = f2fs_find_data_page(dir, bidx); + dentry_page = f2fs_find_data_page(dir, bidx, &next_pgofs); if (IS_ERR(dentry_page)) { if (PTR_ERR(dentry_page) == -ENOENT) { room = true; + bidx = next_pgofs; continue; } else { *res_page = dentry_page; @@ -376,6 +378,8 @@ static struct f2fs_dir_entry *find_in_level(struct inode *dir, if (max_slots >= s) room = true; f2fs_put_page(dentry_page, 0); + + bidx++; } if (!de && room && F2FS_I(dir)->chash != fname->hash) { @@ -956,7 +960,7 @@ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page, bool f2fs_empty_dir(struct inode *dir) { - unsigned long bidx; + unsigned long bidx = 0; struct page *dentry_page; unsigned int bit_pos; struct f2fs_dentry_block *dentry_blk; @@ -965,13 +969,17 @@ bool f2fs_empty_dir(struct inode *dir) if (f2fs_has_inline_dentry(dir)) return f2fs_empty_inline_dir(dir); - for (bidx = 0; bidx < nblock; bidx++) { - dentry_page = f2fs_get_lock_data_page(dir, bidx, false); + while (bidx < nblock) { + pgoff_t next_pgofs; + + dentry_page = f2fs_find_data_page(dir, bidx, &next_pgofs); if (IS_ERR(dentry_page)) { - if (PTR_ERR(dentry_page) == -ENOENT) + if (PTR_ERR(dentry_page) == -ENOENT) { + bidx = next_pgofs; continue; - else + } else { return false; + } } dentry_blk = page_address(dentry_page); @@ -983,10 +991,12 @@ bool f2fs_empty_dir(struct inode *dir) NR_DENTRY_IN_BLOCK, bit_pos); - f2fs_put_page(dentry_page, 1); + f2fs_put_page(dentry_page, 0); if (bit_pos < NR_DENTRY_IN_BLOCK) return false; + + bidx++; } return true; } @@ -1000,7 +1010,7 @@ int f2fs_fill_dentries(struct dir_context *ctx, struct f2fs_dentry_ptr *d, struct fscrypt_str de_name = FSTR_INIT(NULL, 0); struct f2fs_sb_info *sbi = F2FS_I_SB(d->inode); struct blk_plug plug; - bool readdir_ra = sbi->readdir_ra == 1; + bool readdir_ra = sbi->readdir_ra; bool found_valid_dirent = false; int err = 0; @@ -1104,7 +1114,8 @@ static int f2fs_readdir(struct file *file, struct dir_context *ctx) goto out_free; } - for (; n < npages; n++, ctx->pos = n * NR_DENTRY_IN_BLOCK) { + for (; n < npages; ctx->pos = n * NR_DENTRY_IN_BLOCK) { + pgoff_t next_pgofs; /* allow readdir() to be interrupted */ if (fatal_signal_pending(current)) { @@ -1118,11 +1129,12 @@ static int f2fs_readdir(struct file *file, struct dir_context *ctx) page_cache_sync_readahead(inode->i_mapping, ra, file, n, min(npages - n, (pgoff_t)MAX_DIR_RA_PAGES)); - dentry_page = f2fs_find_data_page(inode, n); + dentry_page = f2fs_find_data_page(inode, n, &next_pgofs); if (IS_ERR(dentry_page)) { err = PTR_ERR(dentry_page); if (err == -ENOENT) { err = 0; + n = next_pgofs; continue; } else { goto out_free; @@ -1141,6 +1153,8 @@ static int f2fs_readdir(struct file *file, struct dir_context *ctx) } f2fs_put_page(dentry_page, 0); + + n++; } out_free: fscrypt_fname_free_buffer(&fstr); diff --git a/fs/f2fs/extent_cache.c b/fs/f2fs/extent_cache.c index 932c070173b9..1bd38a78ebba 100644 --- a/fs/f2fs/extent_cache.c +++ b/fs/f2fs/extent_cache.c @@ -6,6 +6,10 @@ * Copyright (c) 2015 Samsung Electronics * Authors: Jaegeuk Kim <jaegeuk@kernel.org> * Chao Yu <chao2.yu@samsung.com> + * + * block_age-based extent cache added by: + * Copyright (c) 2022 xiaomi Co., Ltd. + * http://www.xiaomi.com/ */ #include <linux/fs.h> @@ -15,6 +19,123 @@ #include "node.h" #include <trace/events/f2fs.h> +static void __set_extent_info(struct extent_info *ei, + unsigned int fofs, unsigned int len, + block_t blk, bool keep_clen, + unsigned long age, unsigned long last_blocks, + enum extent_type type) +{ + ei->fofs = fofs; + ei->len = len; + + if (type == EX_READ) { + ei->blk = blk; + if (keep_clen) + return; +#ifdef CONFIG_F2FS_FS_COMPRESSION + ei->c_len = 0; +#endif + } else if (type == EX_BLOCK_AGE) { + ei->age = age; + ei->last_blocks = last_blocks; + } +} + +static bool __may_read_extent_tree(struct inode *inode) +{ + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + + if (!test_opt(sbi, READ_EXTENT_CACHE)) + return false; + if (is_inode_flag_set(inode, FI_NO_EXTENT)) + return false; + if (is_inode_flag_set(inode, FI_COMPRESSED_FILE) && + !f2fs_sb_has_readonly(sbi)) + return false; + return S_ISREG(inode->i_mode); +} + +static bool __may_age_extent_tree(struct inode *inode) +{ + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + + if (!test_opt(sbi, AGE_EXTENT_CACHE)) + return false; + /* don't cache block age info for cold file */ + if (is_inode_flag_set(inode, FI_COMPRESSED_FILE)) + return false; + if (file_is_cold(inode)) + return false; + + return S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode); +} + +static bool __init_may_extent_tree(struct inode *inode, enum extent_type type) +{ + if (type == EX_READ) + return __may_read_extent_tree(inode); + else if (type == EX_BLOCK_AGE) + return __may_age_extent_tree(inode); + return false; +} + +static bool __may_extent_tree(struct inode *inode, enum extent_type type) +{ + /* + * for recovered files during mount do not create extents + * if shrinker is not registered. + */ + if (list_empty(&F2FS_I_SB(inode)->s_list)) + return false; + + return __init_may_extent_tree(inode, type); +} + +static void __try_update_largest_extent(struct extent_tree *et, + struct extent_node *en) +{ + if (et->type != EX_READ) + return; + if (en->ei.len <= et->largest.len) + return; + + et->largest = en->ei; + et->largest_updated = true; +} + +static bool __is_extent_mergeable(struct extent_info *back, + struct extent_info *front, enum extent_type type) +{ + if (type == EX_READ) { +#ifdef CONFIG_F2FS_FS_COMPRESSION + if (back->c_len && back->len != back->c_len) + return false; + if (front->c_len && front->len != front->c_len) + return false; +#endif + return (back->fofs + back->len == front->fofs && + back->blk + back->len == front->blk); + } else if (type == EX_BLOCK_AGE) { + return (back->fofs + back->len == front->fofs && + abs(back->age - front->age) <= SAME_AGE_REGION && + abs(back->last_blocks - front->last_blocks) <= + SAME_AGE_REGION); + } + return false; +} + +static bool __is_back_mergeable(struct extent_info *cur, + struct extent_info *back, enum extent_type type) +{ + return __is_extent_mergeable(back, cur, type); +} + +static bool __is_front_mergeable(struct extent_info *cur, + struct extent_info *front, enum extent_type type) +{ + return __is_extent_mergeable(cur, front, type); +} + static struct rb_entry *__lookup_rb_tree_fast(struct rb_entry *cached_re, unsigned int ofs) { @@ -237,6 +358,7 @@ static struct extent_node *__attach_extent_node(struct f2fs_sb_info *sbi, struct rb_node *parent, struct rb_node **p, bool leftmost) { + struct extent_tree_info *eti = &sbi->extent_tree[et->type]; struct extent_node *en; en = f2fs_kmem_cache_alloc(extent_node_slab, GFP_ATOMIC, false, sbi); @@ -250,16 +372,18 @@ static struct extent_node *__attach_extent_node(struct f2fs_sb_info *sbi, rb_link_node(&en->rb_node, parent, p); rb_insert_color_cached(&en->rb_node, &et->root, leftmost); atomic_inc(&et->node_cnt); - atomic_inc(&sbi->total_ext_node); + atomic_inc(&eti->total_ext_node); return en; } static void __detach_extent_node(struct f2fs_sb_info *sbi, struct extent_tree *et, struct extent_node *en) { + struct extent_tree_info *eti = &sbi->extent_tree[et->type]; + rb_erase_cached(&en->rb_node, &et->root); atomic_dec(&et->node_cnt); - atomic_dec(&sbi->total_ext_node); + atomic_dec(&eti->total_ext_node); if (et->cached_en == en) et->cached_en = NULL; @@ -275,61 +399,51 @@ static void __detach_extent_node(struct f2fs_sb_info *sbi, static void __release_extent_node(struct f2fs_sb_info *sbi, struct extent_tree *et, struct extent_node *en) { - spin_lock(&sbi->extent_lock); + struct extent_tree_info *eti = &sbi->extent_tree[et->type]; + + spin_lock(&eti->extent_lock); f2fs_bug_on(sbi, list_empty(&en->list)); list_del_init(&en->list); - spin_unlock(&sbi->extent_lock); + spin_unlock(&eti->extent_lock); __detach_extent_node(sbi, et, en); } -static struct extent_tree *__grab_extent_tree(struct inode *inode) +static struct extent_tree *__grab_extent_tree(struct inode *inode, + enum extent_type type) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + struct extent_tree_info *eti = &sbi->extent_tree[type]; struct extent_tree *et; nid_t ino = inode->i_ino; - mutex_lock(&sbi->extent_tree_lock); - et = radix_tree_lookup(&sbi->extent_tree_root, ino); + mutex_lock(&eti->extent_tree_lock); + et = radix_tree_lookup(&eti->extent_tree_root, ino); if (!et) { et = f2fs_kmem_cache_alloc(extent_tree_slab, GFP_NOFS, true, NULL); - f2fs_radix_tree_insert(&sbi->extent_tree_root, ino, et); + f2fs_radix_tree_insert(&eti->extent_tree_root, ino, et); memset(et, 0, sizeof(struct extent_tree)); et->ino = ino; + et->type = type; et->root = RB_ROOT_CACHED; et->cached_en = NULL; rwlock_init(&et->lock); INIT_LIST_HEAD(&et->list); atomic_set(&et->node_cnt, 0); - atomic_inc(&sbi->total_ext_tree); + atomic_inc(&eti->total_ext_tree); } else { - atomic_dec(&sbi->total_zombie_tree); + atomic_dec(&eti->total_zombie_tree); list_del_init(&et->list); } - mutex_unlock(&sbi->extent_tree_lock); + mutex_unlock(&eti->extent_tree_lock); /* never died until evict_inode */ - F2FS_I(inode)->extent_tree = et; + F2FS_I(inode)->extent_tree[type] = et; return et; } -static struct extent_node *__init_extent_tree(struct f2fs_sb_info *sbi, - struct extent_tree *et, struct extent_info *ei) -{ - struct rb_node **p = &et->root.rb_root.rb_node; - struct extent_node *en; - - en = __attach_extent_node(sbi, et, ei, NULL, p, true); - if (!en) - return NULL; - - et->largest = en->ei; - et->cached_en = en; - return en; -} - static unsigned int __free_extent_tree(struct f2fs_sb_info *sbi, struct extent_tree *et) { @@ -358,70 +472,88 @@ static void __drop_largest_extent(struct extent_tree *et, } } -/* return true, if inode page is changed */ -static void __f2fs_init_extent_tree(struct inode *inode, struct page *ipage) +void f2fs_init_read_extent_tree(struct inode *inode, struct page *ipage) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); - struct f2fs_extent *i_ext = ipage ? &F2FS_INODE(ipage)->i_ext : NULL; + struct extent_tree_info *eti = &sbi->extent_tree[EX_READ]; + struct f2fs_extent *i_ext = &F2FS_INODE(ipage)->i_ext; struct extent_tree *et; struct extent_node *en; struct extent_info ei; - if (!f2fs_may_extent_tree(inode)) { - /* drop largest extent */ + if (!__may_extent_tree(inode, EX_READ)) { + /* drop largest read extent */ if (i_ext && i_ext->len) { f2fs_wait_on_page_writeback(ipage, NODE, true, true); i_ext->len = 0; set_page_dirty(ipage); - return; } - return; + goto out; } - et = __grab_extent_tree(inode); + et = __grab_extent_tree(inode, EX_READ); if (!i_ext || !i_ext->len) - return; + goto out; - get_extent_info(&ei, i_ext); + get_read_extent_info(&ei, i_ext); write_lock(&et->lock); if (atomic_read(&et->node_cnt)) - goto out; + goto unlock_out; - en = __init_extent_tree(sbi, et, &ei); + en = __attach_extent_node(sbi, et, &ei, NULL, + &et->root.rb_root.rb_node, true); if (en) { - spin_lock(&sbi->extent_lock); - list_add_tail(&en->list, &sbi->extent_list); - spin_unlock(&sbi->extent_lock); + et->largest = en->ei; + et->cached_en = en; + + spin_lock(&eti->extent_lock); + list_add_tail(&en->list, &eti->extent_list); + spin_unlock(&eti->extent_lock); } -out: +unlock_out: write_unlock(&et->lock); +out: + if (!F2FS_I(inode)->extent_tree[EX_READ]) + set_inode_flag(inode, FI_NO_EXTENT); } -void f2fs_init_extent_tree(struct inode *inode, struct page *ipage) +void f2fs_init_age_extent_tree(struct inode *inode) { - __f2fs_init_extent_tree(inode, ipage); + if (!__init_may_extent_tree(inode, EX_BLOCK_AGE)) + return; + __grab_extent_tree(inode, EX_BLOCK_AGE); +} - if (!F2FS_I(inode)->extent_tree) - set_inode_flag(inode, FI_NO_EXTENT); +void f2fs_init_extent_tree(struct inode *inode) +{ + /* initialize read cache */ + if (__init_may_extent_tree(inode, EX_READ)) + __grab_extent_tree(inode, EX_READ); + + /* initialize block age cache */ + if (__init_may_extent_tree(inode, EX_BLOCK_AGE)) + __grab_extent_tree(inode, EX_BLOCK_AGE); } -static bool f2fs_lookup_extent_tree(struct inode *inode, pgoff_t pgofs, - struct extent_info *ei) +static bool __lookup_extent_tree(struct inode *inode, pgoff_t pgofs, + struct extent_info *ei, enum extent_type type) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); - struct extent_tree *et = F2FS_I(inode)->extent_tree; + struct extent_tree_info *eti = &sbi->extent_tree[type]; + struct extent_tree *et = F2FS_I(inode)->extent_tree[type]; struct extent_node *en; bool ret = false; f2fs_bug_on(sbi, !et); - trace_f2fs_lookup_extent_tree_start(inode, pgofs); + trace_f2fs_lookup_extent_tree_start(inode, pgofs, type); read_lock(&et->lock); - if (et->largest.fofs <= pgofs && + if (type == EX_READ && + et->largest.fofs <= pgofs && et->largest.fofs + et->largest.len > pgofs) { *ei = et->largest; ret = true; @@ -435,23 +567,26 @@ static bool f2fs_lookup_extent_tree(struct inode *inode, pgoff_t pgofs, goto out; if (en == et->cached_en) - stat_inc_cached_node_hit(sbi); + stat_inc_cached_node_hit(sbi, type); else - stat_inc_rbtree_node_hit(sbi); + stat_inc_rbtree_node_hit(sbi, type); *ei = en->ei; - spin_lock(&sbi->extent_lock); + spin_lock(&eti->extent_lock); if (!list_empty(&en->list)) { - list_move_tail(&en->list, &sbi->extent_list); + list_move_tail(&en->list, &eti->extent_list); et->cached_en = en; } - spin_unlock(&sbi->extent_lock); + spin_unlock(&eti->extent_lock); ret = true; out: - stat_inc_total_hit(sbi); + stat_inc_total_hit(sbi, type); read_unlock(&et->lock); - trace_f2fs_lookup_extent_tree_end(inode, pgofs, ei); + if (type == EX_READ) + trace_f2fs_lookup_read_extent_tree_end(inode, pgofs, ei); + else if (type == EX_BLOCK_AGE) + trace_f2fs_lookup_age_extent_tree_end(inode, pgofs, ei); return ret; } @@ -460,18 +595,20 @@ static struct extent_node *__try_merge_extent_node(struct f2fs_sb_info *sbi, struct extent_node *prev_ex, struct extent_node *next_ex) { + struct extent_tree_info *eti = &sbi->extent_tree[et->type]; struct extent_node *en = NULL; - if (prev_ex && __is_back_mergeable(ei, &prev_ex->ei)) { + if (prev_ex && __is_back_mergeable(ei, &prev_ex->ei, et->type)) { prev_ex->ei.len += ei->len; ei = &prev_ex->ei; en = prev_ex; } - if (next_ex && __is_front_mergeable(ei, &next_ex->ei)) { + if (next_ex && __is_front_mergeable(ei, &next_ex->ei, et->type)) { next_ex->ei.fofs = ei->fofs; - next_ex->ei.blk = ei->blk; next_ex->ei.len += ei->len; + if (et->type == EX_READ) + next_ex->ei.blk = ei->blk; if (en) __release_extent_node(sbi, et, prev_ex); @@ -483,12 +620,12 @@ static struct extent_node *__try_merge_extent_node(struct f2fs_sb_info *sbi, __try_update_largest_extent(et, en); - spin_lock(&sbi->extent_lock); + spin_lock(&eti->extent_lock); if (!list_empty(&en->list)) { - list_move_tail(&en->list, &sbi->extent_list); + list_move_tail(&en->list, &eti->extent_list); et->cached_en = en; } - spin_unlock(&sbi->extent_lock); + spin_unlock(&eti->extent_lock); return en; } @@ -498,6 +635,7 @@ static struct extent_node *__insert_extent_tree(struct f2fs_sb_info *sbi, struct rb_node *insert_parent, bool leftmost) { + struct extent_tree_info *eti = &sbi->extent_tree[et->type]; struct rb_node **p; struct rb_node *parent = NULL; struct extent_node *en = NULL; @@ -520,47 +658,54 @@ do_insert: __try_update_largest_extent(et, en); /* update in global extent list */ - spin_lock(&sbi->extent_lock); - list_add_tail(&en->list, &sbi->extent_list); + spin_lock(&eti->extent_lock); + list_add_tail(&en->list, &eti->extent_list); et->cached_en = en; - spin_unlock(&sbi->extent_lock); + spin_unlock(&eti->extent_lock); return en; } -static void f2fs_update_extent_tree_range(struct inode *inode, - pgoff_t fofs, block_t blkaddr, unsigned int len) +static void __update_extent_tree_range(struct inode *inode, + struct extent_info *tei, enum extent_type type) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); - struct extent_tree *et = F2FS_I(inode)->extent_tree; + struct extent_tree *et = F2FS_I(inode)->extent_tree[type]; struct extent_node *en = NULL, *en1 = NULL; struct extent_node *prev_en = NULL, *next_en = NULL; struct extent_info ei, dei, prev; struct rb_node **insert_p = NULL, *insert_parent = NULL; + unsigned int fofs = tei->fofs, len = tei->len; unsigned int end = fofs + len; - unsigned int pos = (unsigned int)fofs; bool updated = false; bool leftmost = false; if (!et) return; - trace_f2fs_update_extent_tree_range(inode, fofs, blkaddr, len, 0); + if (type == EX_READ) + trace_f2fs_update_read_extent_tree_range(inode, fofs, len, + tei->blk, 0); + else if (type == EX_BLOCK_AGE) + trace_f2fs_update_age_extent_tree_range(inode, fofs, len, + tei->age, tei->last_blocks); write_lock(&et->lock); - if (is_inode_flag_set(inode, FI_NO_EXTENT)) { - write_unlock(&et->lock); - return; - } + if (type == EX_READ) { + if (is_inode_flag_set(inode, FI_NO_EXTENT)) { + write_unlock(&et->lock); + return; + } - prev = et->largest; - dei.len = 0; + prev = et->largest; + dei.len = 0; - /* - * drop largest extent before lookup, in case it's already - * been shrunk from extent tree - */ - __drop_largest_extent(et, fofs, len); + /* + * drop largest extent before lookup, in case it's already + * been shrunk from extent tree + */ + __drop_largest_extent(et, fofs, len); + } /* 1. lookup first extent node in range [fofs, fofs + len - 1] */ en = (struct extent_node *)f2fs_lookup_rb_tree_ret(&et->root, @@ -581,26 +726,32 @@ static void f2fs_update_extent_tree_range(struct inode *inode, dei = en->ei; org_end = dei.fofs + dei.len; - f2fs_bug_on(sbi, pos >= org_end); + f2fs_bug_on(sbi, fofs >= org_end); - if (pos > dei.fofs && pos - dei.fofs >= F2FS_MIN_EXTENT_LEN) { - en->ei.len = pos - en->ei.fofs; + if (fofs > dei.fofs && (type != EX_READ || + fofs - dei.fofs >= F2FS_MIN_EXTENT_LEN)) { + en->ei.len = fofs - en->ei.fofs; prev_en = en; parts = 1; } - if (end < org_end && org_end - end >= F2FS_MIN_EXTENT_LEN) { + if (end < org_end && (type != EX_READ || + org_end - end >= F2FS_MIN_EXTENT_LEN)) { if (parts) { - set_extent_info(&ei, end, - end - dei.fofs + dei.blk, - org_end - end); + __set_extent_info(&ei, + end, org_end - end, + end - dei.fofs + dei.blk, false, + dei.age, dei.last_blocks, + type); en1 = __insert_extent_tree(sbi, et, &ei, NULL, NULL, true); next_en = en1; } else { - en->ei.fofs = end; - en->ei.blk += end - dei.fofs; - en->ei.len -= end - dei.fofs; + __set_extent_info(&en->ei, + end, en->ei.len - (end - dei.fofs), + en->ei.blk + (end - dei.fofs), true, + dei.age, dei.last_blocks, + type); next_en = en; } parts++; @@ -630,10 +781,15 @@ static void f2fs_update_extent_tree_range(struct inode *inode, en = next_en; } - /* 3. update extent in extent cache */ - if (blkaddr) { + if (type == EX_BLOCK_AGE) + goto update_age_extent_cache; + + /* 3. update extent in read extent cache */ + BUG_ON(type != EX_READ); - set_extent_info(&ei, fofs, blkaddr, len); + if (tei->blk) { + __set_extent_info(&ei, fofs, len, tei->blk, false, + 0, 0, EX_READ); if (!__try_merge_extent_node(sbi, et, &ei, prev_en, next_en)) __insert_extent_tree(sbi, et, &ei, insert_p, insert_parent, leftmost); @@ -655,7 +811,17 @@ static void f2fs_update_extent_tree_range(struct inode *inode, et->largest_updated = false; updated = true; } + goto out_read_extent_cache; +update_age_extent_cache: + if (!tei->last_blocks) + goto out_read_extent_cache; + __set_extent_info(&ei, fofs, len, 0, false, + tei->age, tei->last_blocks, EX_BLOCK_AGE); + if (!__try_merge_extent_node(sbi, et, &ei, prev_en, next_en)) + __insert_extent_tree(sbi, et, &ei, + insert_p, insert_parent, leftmost); +out_read_extent_cache: write_unlock(&et->lock); if (updated) @@ -663,19 +829,20 @@ static void f2fs_update_extent_tree_range(struct inode *inode, } #ifdef CONFIG_F2FS_FS_COMPRESSION -void f2fs_update_extent_tree_range_compressed(struct inode *inode, +void f2fs_update_read_extent_tree_range_compressed(struct inode *inode, pgoff_t fofs, block_t blkaddr, unsigned int llen, unsigned int c_len) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); - struct extent_tree *et = F2FS_I(inode)->extent_tree; + struct extent_tree *et = F2FS_I(inode)->extent_tree[EX_READ]; struct extent_node *en = NULL; struct extent_node *prev_en = NULL, *next_en = NULL; struct extent_info ei; struct rb_node **insert_p = NULL, *insert_parent = NULL; bool leftmost = false; - trace_f2fs_update_extent_tree_range(inode, fofs, blkaddr, llen, c_len); + trace_f2fs_update_read_extent_tree_range(inode, fofs, llen, + blkaddr, c_len); /* it is safe here to check FI_NO_EXTENT w/o et->lock in ro image */ if (is_inode_flag_set(inode, FI_NO_EXTENT)) @@ -692,7 +859,7 @@ void f2fs_update_extent_tree_range_compressed(struct inode *inode, if (en) goto unlock_out; - set_extent_info(&ei, fofs, blkaddr, llen); + __set_extent_info(&ei, fofs, llen, blkaddr, true, 0, 0, EX_READ); ei.c_len = c_len; if (!__try_merge_extent_node(sbi, et, &ei, prev_en, next_en)) @@ -703,24 +870,113 @@ unlock_out: } #endif -unsigned int f2fs_shrink_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink) +static unsigned long long __calculate_block_age(unsigned long long new, + unsigned long long old) { + unsigned long long diff; + + diff = (new >= old) ? new - (new - old) : new + (old - new); + + return div_u64(diff * LAST_AGE_WEIGHT, 100); +} + +/* This returns a new age and allocated blocks in ei */ +static int __get_new_block_age(struct inode *inode, struct extent_info *ei) +{ + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + loff_t f_size = i_size_read(inode); + unsigned long long cur_blocks = + atomic64_read(&sbi->allocated_data_blocks); + + /* + * When I/O is not aligned to a PAGE_SIZE, update will happen to the last + * file block even in seq write. So don't record age for newly last file + * block here. + */ + if ((f_size >> PAGE_SHIFT) == ei->fofs && f_size & (PAGE_SIZE - 1) && + ei->blk == NEW_ADDR) + return -EINVAL; + + if (__lookup_extent_tree(inode, ei->fofs, ei, EX_BLOCK_AGE)) { + unsigned long long cur_age; + + if (cur_blocks >= ei->last_blocks) + cur_age = cur_blocks - ei->last_blocks; + else + /* allocated_data_blocks overflow */ + cur_age = ULLONG_MAX - ei->last_blocks + cur_blocks; + + if (ei->age) + ei->age = __calculate_block_age(cur_age, ei->age); + else + ei->age = cur_age; + ei->last_blocks = cur_blocks; + WARN_ON(ei->age > cur_blocks); + return 0; + } + + f2fs_bug_on(sbi, ei->blk == NULL_ADDR); + + /* the data block was allocated for the first time */ + if (ei->blk == NEW_ADDR) + goto out; + + if (__is_valid_data_blkaddr(ei->blk) && + !f2fs_is_valid_blkaddr(sbi, ei->blk, DATA_GENERIC_ENHANCE)) { + f2fs_bug_on(sbi, 1); + return -EINVAL; + } +out: + /* + * init block age with zero, this can happen when the block age extent + * was reclaimed due to memory constraint or system reboot + */ + ei->age = 0; + ei->last_blocks = cur_blocks; + return 0; +} + +static void __update_extent_cache(struct dnode_of_data *dn, enum extent_type type) +{ + struct extent_info ei; + + if (!__may_extent_tree(dn->inode, type)) + return; + + ei.fofs = f2fs_start_bidx_of_node(ofs_of_node(dn->node_page), dn->inode) + + dn->ofs_in_node; + ei.len = 1; + + if (type == EX_READ) { + if (dn->data_blkaddr == NEW_ADDR) + ei.blk = NULL_ADDR; + else + ei.blk = dn->data_blkaddr; + } else if (type == EX_BLOCK_AGE) { + ei.blk = dn->data_blkaddr; + if (__get_new_block_age(dn->inode, &ei)) + return; + } + __update_extent_tree_range(dn->inode, &ei, type); +} + +static unsigned int __shrink_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink, + enum extent_type type) +{ + struct extent_tree_info *eti = &sbi->extent_tree[type]; struct extent_tree *et, *next; struct extent_node *en; unsigned int node_cnt = 0, tree_cnt = 0; int remained; - if (!test_opt(sbi, EXTENT_CACHE)) - return 0; - - if (!atomic_read(&sbi->total_zombie_tree)) + if (!atomic_read(&eti->total_zombie_tree)) goto free_node; - if (!mutex_trylock(&sbi->extent_tree_lock)) + if (!mutex_trylock(&eti->extent_tree_lock)) goto out; /* 1. remove unreferenced extent tree */ - list_for_each_entry_safe(et, next, &sbi->zombie_list, list) { + list_for_each_entry_safe(et, next, &eti->zombie_list, list) { if (atomic_read(&et->node_cnt)) { write_lock(&et->lock); node_cnt += __free_extent_tree(sbi, et); @@ -728,61 +984,137 @@ unsigned int f2fs_shrink_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink) } f2fs_bug_on(sbi, atomic_read(&et->node_cnt)); list_del_init(&et->list); - radix_tree_delete(&sbi->extent_tree_root, et->ino); + radix_tree_delete(&eti->extent_tree_root, et->ino); kmem_cache_free(extent_tree_slab, et); - atomic_dec(&sbi->total_ext_tree); - atomic_dec(&sbi->total_zombie_tree); + atomic_dec(&eti->total_ext_tree); + atomic_dec(&eti->total_zombie_tree); tree_cnt++; if (node_cnt + tree_cnt >= nr_shrink) goto unlock_out; cond_resched(); } - mutex_unlock(&sbi->extent_tree_lock); + mutex_unlock(&eti->extent_tree_lock); free_node: /* 2. remove LRU extent entries */ - if (!mutex_trylock(&sbi->extent_tree_lock)) + if (!mutex_trylock(&eti->extent_tree_lock)) goto out; remained = nr_shrink - (node_cnt + tree_cnt); - spin_lock(&sbi->extent_lock); + spin_lock(&eti->extent_lock); for (; remained > 0; remained--) { - if (list_empty(&sbi->extent_list)) + if (list_empty(&eti->extent_list)) break; - en = list_first_entry(&sbi->extent_list, + en = list_first_entry(&eti->extent_list, struct extent_node, list); et = en->et; if (!write_trylock(&et->lock)) { /* refresh this extent node's position in extent list */ - list_move_tail(&en->list, &sbi->extent_list); + list_move_tail(&en->list, &eti->extent_list); continue; } list_del_init(&en->list); - spin_unlock(&sbi->extent_lock); + spin_unlock(&eti->extent_lock); __detach_extent_node(sbi, et, en); write_unlock(&et->lock); node_cnt++; - spin_lock(&sbi->extent_lock); + spin_lock(&eti->extent_lock); } - spin_unlock(&sbi->extent_lock); + spin_unlock(&eti->extent_lock); unlock_out: - mutex_unlock(&sbi->extent_tree_lock); + mutex_unlock(&eti->extent_tree_lock); out: - trace_f2fs_shrink_extent_tree(sbi, node_cnt, tree_cnt); + trace_f2fs_shrink_extent_tree(sbi, node_cnt, tree_cnt, type); return node_cnt + tree_cnt; } -unsigned int f2fs_destroy_extent_node(struct inode *inode) +/* read extent cache operations */ +bool f2fs_lookup_read_extent_cache(struct inode *inode, pgoff_t pgofs, + struct extent_info *ei) +{ + if (!__may_extent_tree(inode, EX_READ)) + return false; + + return __lookup_extent_tree(inode, pgofs, ei, EX_READ); +} + +void f2fs_update_read_extent_cache(struct dnode_of_data *dn) +{ + return __update_extent_cache(dn, EX_READ); +} + +void f2fs_update_read_extent_cache_range(struct dnode_of_data *dn, + pgoff_t fofs, block_t blkaddr, unsigned int len) +{ + struct extent_info ei = { + .fofs = fofs, + .len = len, + .blk = blkaddr, + }; + + if (!__may_extent_tree(dn->inode, EX_READ)) + return; + + __update_extent_tree_range(dn->inode, &ei, EX_READ); +} + +unsigned int f2fs_shrink_read_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink) +{ + if (!test_opt(sbi, READ_EXTENT_CACHE)) + return 0; + + return __shrink_extent_tree(sbi, nr_shrink, EX_READ); +} + +/* block age extent cache operations */ +bool f2fs_lookup_age_extent_cache(struct inode *inode, pgoff_t pgofs, + struct extent_info *ei) +{ + if (!__may_extent_tree(inode, EX_BLOCK_AGE)) + return false; + + return __lookup_extent_tree(inode, pgofs, ei, EX_BLOCK_AGE); +} + +void f2fs_update_age_extent_cache(struct dnode_of_data *dn) +{ + return __update_extent_cache(dn, EX_BLOCK_AGE); +} + +void f2fs_update_age_extent_cache_range(struct dnode_of_data *dn, + pgoff_t fofs, unsigned int len) +{ + struct extent_info ei = { + .fofs = fofs, + .len = len, + }; + + if (!__may_extent_tree(dn->inode, EX_BLOCK_AGE)) + return; + + __update_extent_tree_range(dn->inode, &ei, EX_BLOCK_AGE); +} + +unsigned int f2fs_shrink_age_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink) +{ + if (!test_opt(sbi, AGE_EXTENT_CACHE)) + return 0; + + return __shrink_extent_tree(sbi, nr_shrink, EX_BLOCK_AGE); +} + +static unsigned int __destroy_extent_node(struct inode *inode, + enum extent_type type) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); - struct extent_tree *et = F2FS_I(inode)->extent_tree; + struct extent_tree *et = F2FS_I(inode)->extent_tree[type]; unsigned int node_cnt = 0; if (!et || !atomic_read(&et->node_cnt)) @@ -795,31 +1127,46 @@ unsigned int f2fs_destroy_extent_node(struct inode *inode) return node_cnt; } -void f2fs_drop_extent_tree(struct inode *inode) +void f2fs_destroy_extent_node(struct inode *inode) +{ + __destroy_extent_node(inode, EX_READ); + __destroy_extent_node(inode, EX_BLOCK_AGE); +} + +static void __drop_extent_tree(struct inode *inode, enum extent_type type) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); - struct extent_tree *et = F2FS_I(inode)->extent_tree; + struct extent_tree *et = F2FS_I(inode)->extent_tree[type]; bool updated = false; - if (!f2fs_may_extent_tree(inode)) + if (!__may_extent_tree(inode, type)) return; write_lock(&et->lock); - set_inode_flag(inode, FI_NO_EXTENT); __free_extent_tree(sbi, et); - if (et->largest.len) { - et->largest.len = 0; - updated = true; + if (type == EX_READ) { + set_inode_flag(inode, FI_NO_EXTENT); + if (et->largest.len) { + et->largest.len = 0; + updated = true; + } } write_unlock(&et->lock); if (updated) f2fs_mark_inode_dirty_sync(inode, true); } -void f2fs_destroy_extent_tree(struct inode *inode) +void f2fs_drop_extent_tree(struct inode *inode) +{ + __drop_extent_tree(inode, EX_READ); + __drop_extent_tree(inode, EX_BLOCK_AGE); +} + +static void __destroy_extent_tree(struct inode *inode, enum extent_type type) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); - struct extent_tree *et = F2FS_I(inode)->extent_tree; + struct extent_tree_info *eti = &sbi->extent_tree[type]; + struct extent_tree *et = F2FS_I(inode)->extent_tree[type]; unsigned int node_cnt = 0; if (!et) @@ -827,76 +1174,56 @@ void f2fs_destroy_extent_tree(struct inode *inode) if (inode->i_nlink && !is_bad_inode(inode) && atomic_read(&et->node_cnt)) { - mutex_lock(&sbi->extent_tree_lock); - list_add_tail(&et->list, &sbi->zombie_list); - atomic_inc(&sbi->total_zombie_tree); - mutex_unlock(&sbi->extent_tree_lock); + mutex_lock(&eti->extent_tree_lock); + list_add_tail(&et->list, &eti->zombie_list); + atomic_inc(&eti->total_zombie_tree); + mutex_unlock(&eti->extent_tree_lock); return; } /* free all extent info belong to this extent tree */ - node_cnt = f2fs_destroy_extent_node(inode); + node_cnt = __destroy_extent_node(inode, type); /* delete extent tree entry in radix tree */ - mutex_lock(&sbi->extent_tree_lock); + mutex_lock(&eti->extent_tree_lock); f2fs_bug_on(sbi, atomic_read(&et->node_cnt)); - radix_tree_delete(&sbi->extent_tree_root, inode->i_ino); + radix_tree_delete(&eti->extent_tree_root, inode->i_ino); kmem_cache_free(extent_tree_slab, et); - atomic_dec(&sbi->total_ext_tree); - mutex_unlock(&sbi->extent_tree_lock); + atomic_dec(&eti->total_ext_tree); + mutex_unlock(&eti->extent_tree_lock); - F2FS_I(inode)->extent_tree = NULL; + F2FS_I(inode)->extent_tree[type] = NULL; - trace_f2fs_destroy_extent_tree(inode, node_cnt); + trace_f2fs_destroy_extent_tree(inode, node_cnt, type); } -bool f2fs_lookup_extent_cache(struct inode *inode, pgoff_t pgofs, - struct extent_info *ei) -{ - if (!f2fs_may_extent_tree(inode)) - return false; - - return f2fs_lookup_extent_tree(inode, pgofs, ei); -} - -void f2fs_update_extent_cache(struct dnode_of_data *dn) +void f2fs_destroy_extent_tree(struct inode *inode) { - pgoff_t fofs; - block_t blkaddr; - - if (!f2fs_may_extent_tree(dn->inode)) - return; - - if (dn->data_blkaddr == NEW_ADDR) - blkaddr = NULL_ADDR; - else - blkaddr = dn->data_blkaddr; - - fofs = f2fs_start_bidx_of_node(ofs_of_node(dn->node_page), dn->inode) + - dn->ofs_in_node; - f2fs_update_extent_tree_range(dn->inode, fofs, blkaddr, 1); + __destroy_extent_tree(inode, EX_READ); + __destroy_extent_tree(inode, EX_BLOCK_AGE); } -void f2fs_update_extent_cache_range(struct dnode_of_data *dn, - pgoff_t fofs, block_t blkaddr, unsigned int len) - +static void __init_extent_tree_info(struct extent_tree_info *eti) { - if (!f2fs_may_extent_tree(dn->inode)) - return; - - f2fs_update_extent_tree_range(dn->inode, fofs, blkaddr, len); + INIT_RADIX_TREE(&eti->extent_tree_root, GFP_NOIO); + mutex_init(&eti->extent_tree_lock); + INIT_LIST_HEAD(&eti->extent_list); + spin_lock_init(&eti->extent_lock); + atomic_set(&eti->total_ext_tree, 0); + INIT_LIST_HEAD(&eti->zombie_list); + atomic_set(&eti->total_zombie_tree, 0); + atomic_set(&eti->total_ext_node, 0); } void f2fs_init_extent_cache_info(struct f2fs_sb_info *sbi) { - INIT_RADIX_TREE(&sbi->extent_tree_root, GFP_NOIO); - mutex_init(&sbi->extent_tree_lock); - INIT_LIST_HEAD(&sbi->extent_list); - spin_lock_init(&sbi->extent_lock); - atomic_set(&sbi->total_ext_tree, 0); - INIT_LIST_HEAD(&sbi->zombie_list); - atomic_set(&sbi->total_zombie_tree, 0); - atomic_set(&sbi->total_ext_node, 0); + __init_extent_tree_info(&sbi->extent_tree[EX_READ]); + __init_extent_tree_info(&sbi->extent_tree[EX_BLOCK_AGE]); + + /* initialize for block age extents */ + atomic64_set(&sbi->allocated_data_blocks, 0); + sbi->hot_data_age_threshold = DEF_HOT_DATA_AGE_THRESHOLD; + sbi->warm_data_age_threshold = DEF_WARM_DATA_AGE_THRESHOLD; } int __init f2fs_create_extent_cache(void) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index e6355a5683b7..e8953c3dc81a 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -60,6 +60,7 @@ enum { FAULT_SLAB_ALLOC, FAULT_DQUOT_INIT, FAULT_LOCK_OP, + FAULT_BLKADDR, FAULT_MAX, }; @@ -91,7 +92,7 @@ extern const char *f2fs_fault_name[FAULT_MAX]; #define F2FS_MOUNT_FLUSH_MERGE 0x00000400 #define F2FS_MOUNT_NOBARRIER 0x00000800 #define F2FS_MOUNT_FASTBOOT 0x00001000 -#define F2FS_MOUNT_EXTENT_CACHE 0x00002000 +#define F2FS_MOUNT_READ_EXTENT_CACHE 0x00002000 #define F2FS_MOUNT_DATA_FLUSH 0x00008000 #define F2FS_MOUNT_FAULT_INJECTION 0x00010000 #define F2FS_MOUNT_USRQUOTA 0x00080000 @@ -106,6 +107,7 @@ extern const char *f2fs_fault_name[FAULT_MAX]; #define F2FS_MOUNT_MERGE_CHECKPOINT 0x10000000 #define F2FS_MOUNT_GC_MERGE 0x20000000 #define F2FS_MOUNT_COMPRESS_CACHE 0x40000000 +#define F2FS_MOUNT_AGE_EXTENT_CACHE 0x80000000 #define F2FS_OPTION(sbi) ((sbi)->mount_opt) #define clear_opt(sbi, option) (F2FS_OPTION(sbi).opt &= ~F2FS_MOUNT_##option) @@ -202,10 +204,6 @@ struct f2fs_mount_info { #define __F2FS_HAS_FEATURE(raw_super, mask) \ ((raw_super->feature & cpu_to_le32(mask)) != 0) #define F2FS_HAS_FEATURE(sbi, mask) __F2FS_HAS_FEATURE(sbi->raw_super, mask) -#define F2FS_SET_FEATURE(sbi, mask) \ - (sbi->raw_super->feature |= cpu_to_le32(mask)) -#define F2FS_CLEAR_FEATURE(sbi, mask) \ - (sbi->raw_super->feature &= ~cpu_to_le32(mask)) /* * Default values for user and/or group using reserved blocks @@ -328,8 +326,12 @@ struct discard_entry { unsigned char discard_map[SIT_VBLOCK_MAP_SIZE]; /* segment discard bitmap */ }; +/* minimum discard granularity, unit: block count */ +#define MIN_DISCARD_GRANULARITY 1 /* default discard granularity of inner discard thread, unit: block count */ #define DEFAULT_DISCARD_GRANULARITY 16 +/* default maximum discard granularity of ordered discard, unit: block count */ +#define DEFAULT_MAX_ORDERED_DISCARD_GRANULARITY 16 /* max discard pend list number */ #define MAX_PLIST_NUM 512 @@ -408,7 +410,9 @@ struct discard_cmd_control { unsigned int min_discard_issue_time; /* min. interval between discard issue */ unsigned int mid_discard_issue_time; /* mid. interval between discard issue */ unsigned int max_discard_issue_time; /* max. interval between discard issue */ + unsigned int discard_urgent_util; /* utilization which issue discard proactively */ unsigned int discard_granularity; /* discard granularity */ + unsigned int max_ordered_discard; /* maximum discard granularity issued by lba order */ unsigned int undiscard_blks; /* # of undiscard blocks */ unsigned int next_pos; /* next discard position */ atomic_t issued_discard; /* # of issued discard */ @@ -593,16 +597,35 @@ enum { /* dirty segments threshold for triggering CP */ #define DEFAULT_DIRTY_THRESHOLD 4 +#define RECOVERY_MAX_RA_BLOCKS BIO_MAX_VECS +#define RECOVERY_MIN_RA_BLOCKS 1 + +#define F2FS_ONSTACK_PAGES 16 /* nr of onstack pages */ + /* for in-memory extent cache entry */ #define F2FS_MIN_EXTENT_LEN 64 /* minimum extent length */ /* number of extent info in extent cache we try to shrink */ -#define EXTENT_CACHE_SHRINK_NUMBER 128 +#define READ_EXTENT_CACHE_SHRINK_NUMBER 128 -#define RECOVERY_MAX_RA_BLOCKS BIO_MAX_VECS -#define RECOVERY_MIN_RA_BLOCKS 1 +/* number of age extent info in extent cache we try to shrink */ +#define AGE_EXTENT_CACHE_SHRINK_NUMBER 128 +#define LAST_AGE_WEIGHT 30 +#define SAME_AGE_REGION 1024 -#define F2FS_ONSTACK_PAGES 16 /* nr of onstack pages */ +/* + * Define data block with age less than 1GB as hot data + * define data block with age less than 10GB but more than 1GB as warm data + */ +#define DEF_HOT_DATA_AGE_THRESHOLD 262144 +#define DEF_WARM_DATA_AGE_THRESHOLD 2621440 + +/* extent cache type */ +enum extent_type { + EX_READ, + EX_BLOCK_AGE, + NR_EXTENT_CACHES, +}; struct rb_entry { struct rb_node rb_node; /* rb node located in rb-tree */ @@ -618,10 +641,24 @@ struct rb_entry { struct extent_info { unsigned int fofs; /* start offset in a file */ unsigned int len; /* length of the extent */ - u32 blk; /* start block address of the extent */ + union { + /* read extent_cache */ + struct { + /* start block address of the extent */ + block_t blk; #ifdef CONFIG_F2FS_FS_COMPRESSION - unsigned int c_len; /* physical extent length of compressed blocks */ + /* physical extent length of compressed blocks */ + unsigned int c_len; #endif + }; + /* block age extent_cache */ + struct { + /* block age of the extent */ + unsigned long long age; + /* last total blocks allocated */ + unsigned long long last_blocks; + }; + }; }; struct extent_node { @@ -633,13 +670,25 @@ struct extent_node { struct extent_tree { nid_t ino; /* inode number */ + enum extent_type type; /* keep the extent tree type */ struct rb_root_cached root; /* root of extent info rb-tree */ struct extent_node *cached_en; /* recently accessed extent node */ - struct extent_info largest; /* largested extent info */ struct list_head list; /* to be used by sbi->zombie_list */ rwlock_t lock; /* protect extent info rb-tree */ atomic_t node_cnt; /* # of extent node in rb-tree*/ bool largest_updated; /* largest extent updated */ + struct extent_info largest; /* largest cached extent for EX_READ */ +}; + +struct extent_tree_info { + struct radix_tree_root extent_tree_root;/* cache extent cache entries */ + struct mutex extent_tree_lock; /* locking extent radix tree */ + struct list_head extent_list; /* lru list for shrinker */ + spinlock_t extent_lock; /* locking extent lru list */ + atomic_t total_ext_tree; /* extent tree count */ + struct list_head zombie_list; /* extent zombie tree list */ + atomic_t total_zombie_tree; /* extent zombie tree count */ + atomic_t total_ext_node; /* extent info count */ }; /* @@ -764,6 +813,8 @@ enum { FI_COMPRESS_RELEASED, /* compressed blocks were released */ FI_ALIGNED_WRITE, /* enable aligned write */ FI_COW_FILE, /* indicate COW file */ + FI_ATOMIC_COMMITTED, /* indicate atomic commit completed except disk sync */ + FI_ATOMIC_REPLACE, /* indicate atomic replace */ FI_MAX, /* max flag, never be used */ }; @@ -800,7 +851,8 @@ struct f2fs_inode_info { struct list_head dirty_list; /* dirty list for dirs and files */ struct list_head gdirty_list; /* linked in global dirty list */ struct task_struct *atomic_write_task; /* store atomic write task */ - struct extent_tree *extent_tree; /* cached extent_tree entry */ + struct extent_tree *extent_tree[NR_EXTENT_CACHES]; + /* cached extent_tree entry */ struct inode *cow_inode; /* copy-on-write inode for atomic write */ /* avoid racing between foreground op and gc */ @@ -822,9 +874,10 @@ struct f2fs_inode_info { unsigned int i_cluster_size; /* cluster size */ unsigned int atomic_write_cnt; + loff_t original_i_size; /* original i_size before atomic write */ }; -static inline void get_extent_info(struct extent_info *ext, +static inline void get_read_extent_info(struct extent_info *ext, struct f2fs_extent *i_ext) { ext->fofs = le32_to_cpu(i_ext->fofs); @@ -832,7 +885,7 @@ static inline void get_extent_info(struct extent_info *ext, ext->len = le32_to_cpu(i_ext->len); } -static inline void set_raw_extent(struct extent_info *ext, +static inline void set_raw_read_extent(struct extent_info *ext, struct f2fs_extent *i_ext) { i_ext->fofs = cpu_to_le32(ext->fofs); @@ -840,17 +893,6 @@ static inline void set_raw_extent(struct extent_info *ext, i_ext->len = cpu_to_le32(ext->len); } -static inline void set_extent_info(struct extent_info *ei, unsigned int fofs, - u32 blk, unsigned int len) -{ - ei->fofs = fofs; - ei->blk = blk; - ei->len = len; -#ifdef CONFIG_F2FS_FS_COMPRESSION - ei->c_len = 0; -#endif -} - static inline bool __is_discard_mergeable(struct discard_info *back, struct discard_info *front, unsigned int max_len) { @@ -870,41 +912,6 @@ static inline bool __is_discard_front_mergeable(struct discard_info *cur, return __is_discard_mergeable(cur, front, max_len); } -static inline bool __is_extent_mergeable(struct extent_info *back, - struct extent_info *front) -{ -#ifdef CONFIG_F2FS_FS_COMPRESSION - if (back->c_len && back->len != back->c_len) - return false; - if (front->c_len && front->len != front->c_len) - return false; -#endif - return (back->fofs + back->len == front->fofs && - back->blk + back->len == front->blk); -} - -static inline bool __is_back_mergeable(struct extent_info *cur, - struct extent_info *back) -{ - return __is_extent_mergeable(back, cur); -} - -static inline bool __is_front_mergeable(struct extent_info *cur, - struct extent_info *front) -{ - return __is_extent_mergeable(cur, front); -} - -extern void f2fs_mark_inode_dirty_sync(struct inode *inode, bool sync); -static inline void __try_update_largest_extent(struct extent_tree *et, - struct extent_node *en) -{ - if (en->ei.len > et->largest.len) { - et->largest = en->ei; - et->largest_updated = true; - } -} - /* * For free nid management */ @@ -1062,9 +1069,6 @@ struct f2fs_sm_info { /* a threshold to reclaim prefree segments */ unsigned int rec_prefree_segments; - /* for batched trimming */ - unsigned int trim_sections; /* # of sections to trim */ - struct list_head sit_entry_set; /* sit entry set list */ unsigned int ipu_policy; /* in-place-update policy */ @@ -1318,6 +1322,7 @@ enum { MAX_TIME, }; +/* Note that you need to keep synchronization with this gc_mode_names array */ enum { GC_NORMAL, GC_IDLE_CB, @@ -1668,14 +1673,12 @@ struct f2fs_sb_info { struct mutex flush_lock; /* for flush exclusion */ /* for extent tree cache */ - struct radix_tree_root extent_tree_root;/* cache extent cache entries */ - struct mutex extent_tree_lock; /* locking extent radix tree */ - struct list_head extent_list; /* lru list for shrinker */ - spinlock_t extent_lock; /* locking extent lru list */ - atomic_t total_ext_tree; /* extent tree count */ - struct list_head zombie_list; /* extent zombie tree list */ - atomic_t total_zombie_tree; /* extent zombie tree count */ - atomic_t total_ext_node; /* extent info count */ + struct extent_tree_info extent_tree[NR_EXTENT_CACHES]; + atomic64_t allocated_data_blocks; /* for block age extent_cache */ + + /* The threshold used for hot and warm data seperation*/ + unsigned int hot_data_age_threshold; + unsigned int warm_data_age_threshold; /* basic filesystem units */ unsigned int log_sectors_per_block; /* log2 sectors per block */ @@ -1693,7 +1696,7 @@ struct f2fs_sb_info { unsigned int total_node_count; /* total node block count */ unsigned int total_valid_node_count; /* valid node block count */ int dir_level; /* directory level */ - int readdir_ra; /* readahead inode in readdir */ + bool readdir_ra; /* readahead inode in readdir */ u64 max_io_bytes; /* max io bytes to merge IOs */ block_t user_block_count; /* # of user blocks */ @@ -1734,8 +1737,9 @@ struct f2fs_sb_info { unsigned int cur_victim_sec; /* current victim section num */ unsigned int gc_mode; /* current GC state */ unsigned int next_victim_seg[2]; /* next segment in victim section */ - spinlock_t gc_urgent_high_lock; - unsigned int gc_urgent_high_remaining; /* remaining trial count for GC_URGENT_HIGH */ + spinlock_t gc_remaining_trials_lock; + /* remaining trial count for GC_URGENT_* and GC_IDLE_* */ + unsigned int gc_remaining_trials; /* for skip statistic */ unsigned long long skipped_gc_rwsem; /* FG_GC only */ @@ -1759,10 +1763,14 @@ struct f2fs_sb_info { unsigned int segment_count[2]; /* # of allocated segments */ unsigned int block_count[2]; /* # of allocated blocks */ atomic_t inplace_count; /* # of inplace update */ - atomic64_t total_hit_ext; /* # of lookup extent cache */ - atomic64_t read_hit_rbtree; /* # of hit rbtree extent node */ - atomic64_t read_hit_largest; /* # of hit largest extent node */ - atomic64_t read_hit_cached; /* # of hit cached extent node */ + /* # of lookup extent cache */ + atomic64_t total_hit_ext[NR_EXTENT_CACHES]; + /* # of hit rbtree extent node */ + atomic64_t read_hit_rbtree[NR_EXTENT_CACHES]; + /* # of hit cached extent node */ + atomic64_t read_hit_cached[NR_EXTENT_CACHES]; + /* # of hit largest extent node in read extent cache */ + atomic64_t read_hit_largest; atomic_t inline_xattr; /* # of inline_xattr inodes */ atomic_t inline_inode; /* # of inline_data inodes */ atomic_t inline_dir; /* # of inline_dentry inodes */ @@ -2576,6 +2584,7 @@ static inline block_t __start_sum_addr(struct f2fs_sb_info *sbi) return le32_to_cpu(F2FS_CKPT(sbi)->cp_pack_start_sum); } +extern void f2fs_mark_inode_dirty_sync(struct inode *inode, bool sync); static inline int inc_valid_node_count(struct f2fs_sb_info *sbi, struct inode *inode, bool is_inode) { @@ -2974,7 +2983,7 @@ static inline void f2fs_change_bit(unsigned int nr, char *addr) /* Flags that should be inherited by new inodes from their parent. */ #define F2FS_FL_INHERITED (F2FS_SYNC_FL | F2FS_NODUMP_FL | F2FS_NOATIME_FL | \ F2FS_DIRSYNC_FL | F2FS_PROJINHERIT_FL | \ - F2FS_CASEFOLD_FL | F2FS_COMPR_FL | F2FS_NOCOMP_FL) + F2FS_CASEFOLD_FL) /* Flags that are appropriate for regular files (all but dir-specific ones). */ #define F2FS_REG_FLMASK (~(F2FS_DIRSYNC_FL | F2FS_PROJINHERIT_FL | \ @@ -3072,6 +3081,8 @@ static inline void f2fs_i_blocks_write(struct inode *inode, set_inode_flag(inode, FI_AUTO_RECOVER); } +static inline bool f2fs_is_atomic_file(struct inode *inode); + static inline void f2fs_i_size_write(struct inode *inode, loff_t i_size) { bool clean = !is_inode_flag_set(inode, FI_DIRTY_INODE); @@ -3081,6 +3092,10 @@ static inline void f2fs_i_size_write(struct inode *inode, loff_t i_size) return; i_size_write(inode, i_size); + + if (f2fs_is_atomic_file(inode)) + return; + f2fs_mark_inode_dirty_sync(inode, true); if (clean || recover) set_inode_flag(inode, FI_AUTO_RECOVER); @@ -3796,8 +3811,9 @@ int f2fs_reserve_new_block(struct dnode_of_data *dn); int f2fs_get_block(struct dnode_of_data *dn, pgoff_t index); int f2fs_reserve_block(struct dnode_of_data *dn, pgoff_t index); struct page *f2fs_get_read_data_page(struct inode *inode, pgoff_t index, - blk_opf_t op_flags, bool for_write); -struct page *f2fs_find_data_page(struct inode *inode, pgoff_t index); + blk_opf_t op_flags, bool for_write, pgoff_t *next_pgofs); +struct page *f2fs_find_data_page(struct inode *inode, pgoff_t index, + pgoff_t *next_pgofs); struct page *f2fs_get_lock_data_page(struct inode *inode, pgoff_t index, bool for_write); struct page *f2fs_get_new_data_page(struct inode *inode, @@ -3856,9 +3872,19 @@ struct f2fs_stat_info { struct f2fs_sb_info *sbi; int all_area_segs, sit_area_segs, nat_area_segs, ssa_area_segs; int main_area_segs, main_area_sections, main_area_zones; - unsigned long long hit_largest, hit_cached, hit_rbtree; - unsigned long long hit_total, total_ext; - int ext_tree, zombie_tree, ext_node; + unsigned long long hit_cached[NR_EXTENT_CACHES]; + unsigned long long hit_rbtree[NR_EXTENT_CACHES]; + unsigned long long total_ext[NR_EXTENT_CACHES]; + unsigned long long hit_total[NR_EXTENT_CACHES]; + int ext_tree[NR_EXTENT_CACHES]; + int zombie_tree[NR_EXTENT_CACHES]; + int ext_node[NR_EXTENT_CACHES]; + /* to count memory footprint */ + unsigned long long ext_mem[NR_EXTENT_CACHES]; + /* for read extent cache */ + unsigned long long hit_largest; + /* for block age extent cache */ + unsigned long long allocated_data_blocks; int ndirty_node, ndirty_dent, ndirty_meta, ndirty_imeta; int ndirty_data, ndirty_qdata; unsigned int ndirty_dirs, ndirty_files, nquota_files, ndirty_all; @@ -3917,10 +3943,10 @@ static inline struct f2fs_stat_info *F2FS_STAT(struct f2fs_sb_info *sbi) #define stat_other_skip_bggc_count(sbi) ((sbi)->other_skip_bggc++) #define stat_inc_dirty_inode(sbi, type) ((sbi)->ndirty_inode[type]++) #define stat_dec_dirty_inode(sbi, type) ((sbi)->ndirty_inode[type]--) -#define stat_inc_total_hit(sbi) (atomic64_inc(&(sbi)->total_hit_ext)) -#define stat_inc_rbtree_node_hit(sbi) (atomic64_inc(&(sbi)->read_hit_rbtree)) +#define stat_inc_total_hit(sbi, type) (atomic64_inc(&(sbi)->total_hit_ext[type])) +#define stat_inc_rbtree_node_hit(sbi, type) (atomic64_inc(&(sbi)->read_hit_rbtree[type])) #define stat_inc_largest_node_hit(sbi) (atomic64_inc(&(sbi)->read_hit_largest)) -#define stat_inc_cached_node_hit(sbi) (atomic64_inc(&(sbi)->read_hit_cached)) +#define stat_inc_cached_node_hit(sbi, type) (atomic64_inc(&(sbi)->read_hit_cached[type])) #define stat_inc_inline_xattr(inode) \ do { \ if (f2fs_has_inline_xattr(inode)) \ @@ -4043,10 +4069,10 @@ void f2fs_update_sit_info(struct f2fs_sb_info *sbi); #define stat_other_skip_bggc_count(sbi) do { } while (0) #define stat_inc_dirty_inode(sbi, type) do { } while (0) #define stat_dec_dirty_inode(sbi, type) do { } while (0) -#define stat_inc_total_hit(sbi) do { } while (0) -#define stat_inc_rbtree_node_hit(sbi) do { } while (0) +#define stat_inc_total_hit(sbi, type) do { } while (0) +#define stat_inc_rbtree_node_hit(sbi, type) do { } while (0) #define stat_inc_largest_node_hit(sbi) do { } while (0) -#define stat_inc_cached_node_hit(sbi) do { } while (0) +#define stat_inc_cached_node_hit(sbi, type) do { } while (0) #define stat_inc_inline_xattr(inode) do { } while (0) #define stat_dec_inline_xattr(inode) do { } while (0) #define stat_inc_inline_inode(inode) do { } while (0) @@ -4152,20 +4178,34 @@ struct rb_entry *f2fs_lookup_rb_tree_ret(struct rb_root_cached *root, bool force, bool *leftmost); bool f2fs_check_rb_tree_consistence(struct f2fs_sb_info *sbi, struct rb_root_cached *root, bool check_key); -unsigned int f2fs_shrink_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink); -void f2fs_init_extent_tree(struct inode *inode, struct page *ipage); +void f2fs_init_extent_tree(struct inode *inode); void f2fs_drop_extent_tree(struct inode *inode); -unsigned int f2fs_destroy_extent_node(struct inode *inode); +void f2fs_destroy_extent_node(struct inode *inode); void f2fs_destroy_extent_tree(struct inode *inode); -bool f2fs_lookup_extent_cache(struct inode *inode, pgoff_t pgofs, - struct extent_info *ei); -void f2fs_update_extent_cache(struct dnode_of_data *dn); -void f2fs_update_extent_cache_range(struct dnode_of_data *dn, - pgoff_t fofs, block_t blkaddr, unsigned int len); void f2fs_init_extent_cache_info(struct f2fs_sb_info *sbi); int __init f2fs_create_extent_cache(void); void f2fs_destroy_extent_cache(void); +/* read extent cache ops */ +void f2fs_init_read_extent_tree(struct inode *inode, struct page *ipage); +bool f2fs_lookup_read_extent_cache(struct inode *inode, pgoff_t pgofs, + struct extent_info *ei); +void f2fs_update_read_extent_cache(struct dnode_of_data *dn); +void f2fs_update_read_extent_cache_range(struct dnode_of_data *dn, + pgoff_t fofs, block_t blkaddr, unsigned int len); +unsigned int f2fs_shrink_read_extent_tree(struct f2fs_sb_info *sbi, + int nr_shrink); + +/* block age extent cache ops */ +void f2fs_init_age_extent_tree(struct inode *inode); +bool f2fs_lookup_age_extent_cache(struct inode *inode, pgoff_t pgofs, + struct extent_info *ei); +void f2fs_update_age_extent_cache(struct dnode_of_data *dn); +void f2fs_update_age_extent_cache_range(struct dnode_of_data *dn, + pgoff_t fofs, unsigned int len); +unsigned int f2fs_shrink_age_extent_tree(struct f2fs_sb_info *sbi, + int nr_shrink); + /* * sysfs.c */ @@ -4235,9 +4275,9 @@ int f2fs_write_multi_pages(struct compress_ctx *cc, struct writeback_control *wbc, enum iostat_type io_type); int f2fs_is_compressed_cluster(struct inode *inode, pgoff_t index); -void f2fs_update_extent_tree_range_compressed(struct inode *inode, - pgoff_t fofs, block_t blkaddr, unsigned int llen, - unsigned int c_len); +void f2fs_update_read_extent_tree_range_compressed(struct inode *inode, + pgoff_t fofs, block_t blkaddr, + unsigned int llen, unsigned int c_len); int f2fs_read_multi_pages(struct compress_ctx *cc, struct bio **bio_ret, unsigned nr_pages, sector_t *last_block_in_bio, bool is_readahead, bool for_write); @@ -4318,9 +4358,10 @@ static inline bool f2fs_load_compressed_page(struct f2fs_sb_info *sbi, static inline void f2fs_invalidate_compress_pages(struct f2fs_sb_info *sbi, nid_t ino) { } #define inc_compr_inode_stat(inode) do { } while (0) -static inline void f2fs_update_extent_tree_range_compressed(struct inode *inode, - pgoff_t fofs, block_t blkaddr, unsigned int llen, - unsigned int c_len) { } +static inline void f2fs_update_read_extent_tree_range_compressed( + struct inode *inode, + pgoff_t fofs, block_t blkaddr, + unsigned int llen, unsigned int c_len) { } #endif static inline int set_compress_context(struct inode *inode) @@ -4371,7 +4412,7 @@ static inline bool f2fs_disable_compressed_file(struct inode *inode) } #define F2FS_FEATURE_FUNCS(name, flagname) \ -static inline int f2fs_sb_has_##name(struct f2fs_sb_info *sbi) \ +static inline bool f2fs_sb_has_##name(struct f2fs_sb_info *sbi) \ { \ return F2FS_HAS_FEATURE(sbi, F2FS_FEATURE_##flagname); \ } @@ -4391,26 +4432,6 @@ F2FS_FEATURE_FUNCS(casefold, CASEFOLD); F2FS_FEATURE_FUNCS(compression, COMPRESSION); F2FS_FEATURE_FUNCS(readonly, RO); -static inline bool f2fs_may_extent_tree(struct inode *inode) -{ - struct f2fs_sb_info *sbi = F2FS_I_SB(inode); - - if (!test_opt(sbi, EXTENT_CACHE) || - is_inode_flag_set(inode, FI_NO_EXTENT) || - (is_inode_flag_set(inode, FI_COMPRESSED_FILE) && - !f2fs_sb_has_readonly(sbi))) - return false; - - /* - * for recovered files during mount do not create extents - * if shrinker is not registered. - */ - if (list_empty(&sbi->s_list)) - return false; - - return S_ISREG(inode->i_mode); -} - #ifdef CONFIG_BLK_DEV_ZONED static inline bool f2fs_blkz_is_seq(struct f2fs_sb_info *sbi, int devi, block_t blkaddr) @@ -4563,6 +4584,11 @@ static inline void f2fs_handle_page_eio(struct f2fs_sb_info *sbi, pgoff_t ofs, } } +static inline bool f2fs_is_readonly(struct f2fs_sb_info *sbi) +{ + return f2fs_sb_has_readonly(sbi) || f2fs_readonly(sbi->sb); +} + #define EFSBADCRC EBADMSG /* Bad CRC detected */ #define EFSCORRUPTED EUCLEAN /* Filesystem is corrupted */ diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 83df6f6173d3..a6c401279886 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -571,7 +571,7 @@ void f2fs_truncate_data_blocks_range(struct dnode_of_data *dn, int count) raw_node = F2FS_NODE(dn->node_page); addr = blkaddr_in_node(raw_node) + base + ofs; - /* Assumption: truncateion starts with cluster */ + /* Assumption: truncation starts with cluster */ for (; count > 0; count--, addr++, dn->ofs_in_node++, cluster_index++) { block_t blkaddr = le32_to_cpu(*addr); @@ -618,7 +618,8 @@ void f2fs_truncate_data_blocks_range(struct dnode_of_data *dn, int count) */ fofs = f2fs_start_bidx_of_node(ofs_of_node(dn->node_page), dn->inode) + ofs; - f2fs_update_extent_cache_range(dn, fofs, 0, len); + f2fs_update_read_extent_cache_range(dn, fofs, 0, len); + f2fs_update_age_extent_cache_range(dn, fofs, nr_free); dec_valid_block_count(sbi, dn->inode, nr_free); } dn->ofs_in_node = ofs; @@ -1496,7 +1497,7 @@ static int f2fs_do_zero_range(struct dnode_of_data *dn, pgoff_t start, f2fs_set_data_blkaddr(dn); } - f2fs_update_extent_cache_range(dn, start, 0, index - start); + f2fs_update_read_extent_cache_range(dn, start, 0, index - start); return ret; } @@ -1915,6 +1916,10 @@ static int f2fs_setflags_common(struct inode *inode, u32 iflags, u32 mask) if (!f2fs_disable_compressed_file(inode)) return -EINVAL; } else { + /* try to convert inline_data to support compression */ + int err = f2fs_convert_inline_inode(inode); + if (err) + return err; if (!f2fs_may_compress(inode)) return -EINVAL; if (S_ISREG(inode->i_mode) && F2FS_HAS_BLOCKS(inode)) @@ -2030,13 +2035,14 @@ static int f2fs_ioc_getversion(struct file *filp, unsigned long arg) return put_user(inode->i_generation, (int __user *)arg); } -static int f2fs_ioc_start_atomic_write(struct file *filp) +static int f2fs_ioc_start_atomic_write(struct file *filp, bool truncate) { struct inode *inode = file_inode(filp); struct user_namespace *mnt_userns = file_mnt_user_ns(filp); struct f2fs_inode_info *fi = F2FS_I(inode); struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct inode *pinode; + loff_t isize; int ret; if (!inode_owner_or_capable(mnt_userns, inode)) @@ -2095,13 +2101,25 @@ static int f2fs_ioc_start_atomic_write(struct file *filp) f2fs_up_write(&fi->i_gc_rwsem[WRITE]); goto out; } - f2fs_i_size_write(fi->cow_inode, i_size_read(inode)); + + f2fs_write_inode(inode, NULL); stat_inc_atomic_inode(inode); set_inode_flag(inode, FI_ATOMIC_FILE); set_inode_flag(fi->cow_inode, FI_COW_FILE); clear_inode_flag(fi->cow_inode, FI_INLINE_DATA); + + isize = i_size_read(inode); + fi->original_i_size = isize; + if (truncate) { + set_inode_flag(inode, FI_ATOMIC_REPLACE); + truncate_inode_pages_final(inode->i_mapping); + f2fs_i_size_write(inode, 0); + isize = 0; + } + f2fs_i_size_write(fi->cow_inode, isize); + f2fs_up_write(&fi->i_gc_rwsem[WRITE]); f2fs_update_time(sbi, REQ_TIME); @@ -2133,16 +2151,14 @@ static int f2fs_ioc_commit_atomic_write(struct file *filp) if (f2fs_is_atomic_file(inode)) { ret = f2fs_commit_atomic_write(inode); - if (ret) - goto unlock_out; - - ret = f2fs_do_sync_file(filp, 0, LLONG_MAX, 0, true); if (!ret) - f2fs_abort_atomic_write(inode, false); + ret = f2fs_do_sync_file(filp, 0, LLONG_MAX, 0, true); + + f2fs_abort_atomic_write(inode, ret); } else { ret = f2fs_do_sync_file(filp, 0, LLONG_MAX, 1, false); } -unlock_out: + inode_unlock(inode); mnt_drop_write_file(filp); return ret; @@ -2543,7 +2559,7 @@ static int f2fs_defragment_range(struct f2fs_sb_info *sbi, struct f2fs_map_blocks map = { .m_next_extent = NULL, .m_seg_type = NO_CHECK_TYPE, .m_may_create = false }; - struct extent_info ei = {0, 0, 0}; + struct extent_info ei = {0, }; pgoff_t pg_start, pg_end, next_pgofs; unsigned int blk_per_seg = sbi->blocks_per_seg; unsigned int total = 0, sec_num; @@ -2575,7 +2591,7 @@ static int f2fs_defragment_range(struct f2fs_sb_info *sbi, * lookup mapping info in extent cache, skip defragmenting if physical * block addresses are continuous. */ - if (f2fs_lookup_extent_cache(inode, pg_start, &ei)) { + if (f2fs_lookup_read_extent_cache(inode, pg_start, &ei)) { if (ei.fofs + ei.len >= pg_end) goto out; } @@ -4131,7 +4147,9 @@ static long __f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) case FS_IOC_GETVERSION: return f2fs_ioc_getversion(filp, arg); case F2FS_IOC_START_ATOMIC_WRITE: - return f2fs_ioc_start_atomic_write(filp); + return f2fs_ioc_start_atomic_write(filp, false); + case F2FS_IOC_START_ATOMIC_REPLACE: + return f2fs_ioc_start_atomic_write(filp, true); case F2FS_IOC_COMMIT_ATOMIC_WRITE: return f2fs_ioc_commit_atomic_write(filp); case F2FS_IOC_ABORT_ATOMIC_WRITE: diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 536d332d9e2e..6e2cae3d2e71 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -96,16 +96,6 @@ static int gc_thread_func(void *data) * invalidated soon after by user update or deletion. * So, I'd like to wait some time to collect dirty segments. */ - if (sbi->gc_mode == GC_URGENT_HIGH) { - spin_lock(&sbi->gc_urgent_high_lock); - if (sbi->gc_urgent_high_remaining) { - sbi->gc_urgent_high_remaining--; - if (!sbi->gc_urgent_high_remaining) - sbi->gc_mode = GC_NORMAL; - } - spin_unlock(&sbi->gc_urgent_high_lock); - } - if (sbi->gc_mode == GC_URGENT_HIGH || sbi->gc_mode == GC_URGENT_MID) { wait_ms = gc_th->urgent_sleep_time; @@ -151,6 +141,10 @@ do_gc: /* don't bother wait_ms by foreground gc */ if (!foreground) wait_ms = gc_th->no_gc_sleep_time; + } else { + /* reset wait_ms to default sleep time */ + if (wait_ms == gc_th->no_gc_sleep_time) + wait_ms = gc_th->min_sleep_time; } if (foreground) @@ -162,6 +156,15 @@ do_gc: /* balancing f2fs's metadata periodically */ f2fs_balance_fs_bg(sbi, true); next: + if (sbi->gc_mode != GC_NORMAL) { + spin_lock(&sbi->gc_remaining_trials_lock); + if (sbi->gc_remaining_trials) { + sbi->gc_remaining_trials--; + if (!sbi->gc_remaining_trials) + sbi->gc_mode = GC_NORMAL; + } + spin_unlock(&sbi->gc_remaining_trials_lock); + } sb_end_write(sbi->sb); } while (!kthread_should_stop()); @@ -172,13 +175,10 @@ int f2fs_start_gc_thread(struct f2fs_sb_info *sbi) { struct f2fs_gc_kthread *gc_th; dev_t dev = sbi->sb->s_bdev->bd_dev; - int err = 0; gc_th = f2fs_kmalloc(sbi, sizeof(struct f2fs_gc_kthread), GFP_KERNEL); - if (!gc_th) { - err = -ENOMEM; - goto out; - } + if (!gc_th) + return -ENOMEM; gc_th->urgent_sleep_time = DEF_GC_THREAD_URGENT_SLEEP_TIME; gc_th->min_sleep_time = DEF_GC_THREAD_MIN_SLEEP_TIME; @@ -193,12 +193,14 @@ int f2fs_start_gc_thread(struct f2fs_sb_info *sbi) sbi->gc_thread->f2fs_gc_task = kthread_run(gc_thread_func, sbi, "f2fs_gc-%u:%u", MAJOR(dev), MINOR(dev)); if (IS_ERR(gc_th->f2fs_gc_task)) { - err = PTR_ERR(gc_th->f2fs_gc_task); + int err = PTR_ERR(gc_th->f2fs_gc_task); + kfree(gc_th); sbi->gc_thread = NULL; + return err; } -out: - return err; + + return 0; } void f2fs_stop_gc_thread(struct f2fs_sb_info *sbi) @@ -1079,7 +1081,7 @@ static bool is_alive(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, { struct page *node_page; nid_t nid; - unsigned int ofs_in_node, max_addrs; + unsigned int ofs_in_node, max_addrs, base; block_t source_blkaddr; nid = le32_to_cpu(sum->nid); @@ -1105,11 +1107,18 @@ static bool is_alive(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, return false; } - max_addrs = IS_INODE(node_page) ? DEF_ADDRS_PER_INODE : - DEF_ADDRS_PER_BLOCK; - if (ofs_in_node >= max_addrs) { - f2fs_err(sbi, "Inconsistent ofs_in_node:%u in summary, ino:%u, nid:%u, max:%u", - ofs_in_node, dni->ino, dni->nid, max_addrs); + if (IS_INODE(node_page)) { + base = offset_in_addr(F2FS_INODE(node_page)); + max_addrs = DEF_ADDRS_PER_INODE; + } else { + base = 0; + max_addrs = DEF_ADDRS_PER_BLOCK; + } + + if (base + ofs_in_node >= max_addrs) { + f2fs_err(sbi, "Inconsistent blkaddr offset: base:%u, ofs_in_node:%u, max:%u, ino:%u, nid:%u", + base, ofs_in_node, max_addrs, dni->ino, dni->nid); + f2fs_put_page(node_page, 1); return false; } @@ -1141,7 +1150,7 @@ static int ra_data_block(struct inode *inode, pgoff_t index) struct address_space *mapping = inode->i_mapping; struct dnode_of_data dn; struct page *page; - struct extent_info ei = {0, 0, 0}; + struct extent_info ei = {0, }; struct f2fs_io_info fio = { .sbi = sbi, .ino = inode->i_ino, @@ -1159,7 +1168,7 @@ static int ra_data_block(struct inode *inode, pgoff_t index) if (!page) return -ENOMEM; - if (f2fs_lookup_extent_cache(inode, index, &ei)) { + if (f2fs_lookup_read_extent_cache(inode, index, &ei)) { dn.data_blkaddr = ei.blk + index - ei.fofs; if (unlikely(!f2fs_is_valid_blkaddr(sbi, dn.data_blkaddr, DATA_GENERIC_ENHANCE_READ))) { @@ -1563,8 +1572,8 @@ next_step: continue; } - data_page = f2fs_get_read_data_page(inode, - start_bidx, REQ_RAHEAD, true); + data_page = f2fs_get_read_data_page(inode, start_bidx, + REQ_RAHEAD, true, NULL); f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); if (IS_ERR(data_page)) { iput(inode); @@ -1744,8 +1753,9 @@ freed: get_valid_blocks(sbi, segno, false) == 0) seg_freed++; - if (__is_large_section(sbi) && segno + 1 < end_segno) - sbi->next_victim_seg[gc_type] = segno + 1; + if (__is_large_section(sbi)) + sbi->next_victim_seg[gc_type] = + (segno + 1 < end_segno) ? segno + 1 : NULL_SEGNO; skip: f2fs_put_page(sum_page, 0); } @@ -1898,9 +1908,7 @@ int __init f2fs_create_garbage_collection_cache(void) { victim_entry_slab = f2fs_kmem_cache_create("f2fs_victim_entry", sizeof(struct victim_entry)); - if (!victim_entry_slab) - return -ENOMEM; - return 0; + return victim_entry_slab ? 0 : -ENOMEM; } void f2fs_destroy_garbage_collection_cache(void) @@ -2133,8 +2141,6 @@ out_unlock: if (err) return err; - set_sbi_flag(sbi, SBI_IS_RESIZEFS); - freeze_super(sbi->sb); f2fs_down_write(&sbi->gc_lock); f2fs_down_write(&sbi->cp_global_sem); @@ -2150,6 +2156,7 @@ out_unlock: if (err) goto out_err; + set_sbi_flag(sbi, SBI_IS_RESIZEFS); err = free_segment_range(sbi, secs, false); if (err) goto recover_out; @@ -2173,6 +2180,7 @@ out_unlock: f2fs_commit_super(sbi, false); } recover_out: + clear_sbi_flag(sbi, SBI_IS_RESIZEFS); if (err) { set_sbi_flag(sbi, SBI_NEED_FSCK); f2fs_err(sbi, "resize_fs failed, should run fsck to repair!"); @@ -2185,6 +2193,5 @@ out_err: f2fs_up_write(&sbi->cp_global_sem); f2fs_up_write(&sbi->gc_lock); thaw_super(sbi->sb); - clear_sbi_flag(sbi, SBI_IS_RESIZEFS); return err; } diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index 9f0d3864d9f1..ff6cf66ed46b 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -262,8 +262,8 @@ static bool sanity_check_inode(struct inode *inode, struct page *node_page) return false; } - if (fi->extent_tree) { - struct extent_info *ei = &fi->extent_tree->largest; + if (fi->extent_tree[EX_READ]) { + struct extent_info *ei = &fi->extent_tree[EX_READ]->largest; if (ei->len && (!f2fs_is_valid_blkaddr(sbi, ei->blk, @@ -392,8 +392,6 @@ static int do_read_inode(struct inode *inode) fi->i_pino = le32_to_cpu(ri->i_pino); fi->i_dir_level = ri->i_dir_level; - f2fs_init_extent_tree(inode, node_page); - get_inline_info(inode, ri); fi->i_extra_isize = f2fs_has_extra_attr(inode) ? @@ -479,6 +477,11 @@ static int do_read_inode(struct inode *inode) } init_idisk_time(inode); + + /* Need all the flag bits */ + f2fs_init_read_extent_tree(inode, node_page); + f2fs_init_age_extent_tree(inode); + f2fs_put_page(node_page, 1); stat_inc_inline_xattr(inode); @@ -607,7 +610,7 @@ retry: void f2fs_update_inode(struct inode *inode, struct page *node_page) { struct f2fs_inode *ri; - struct extent_tree *et = F2FS_I(inode)->extent_tree; + struct extent_tree *et = F2FS_I(inode)->extent_tree[EX_READ]; f2fs_wait_on_page_writeback(node_page, NODE, true, true); set_page_dirty(node_page); @@ -621,12 +624,15 @@ void f2fs_update_inode(struct inode *inode, struct page *node_page) ri->i_uid = cpu_to_le32(i_uid_read(inode)); ri->i_gid = cpu_to_le32(i_gid_read(inode)); ri->i_links = cpu_to_le32(inode->i_nlink); - ri->i_size = cpu_to_le64(i_size_read(inode)); ri->i_blocks = cpu_to_le64(SECTOR_TO_BLOCK(inode->i_blocks) + 1); + if (!f2fs_is_atomic_file(inode) || + is_inode_flag_set(inode, FI_ATOMIC_COMMITTED)) + ri->i_size = cpu_to_le64(i_size_read(inode)); + if (et) { read_lock(&et->lock); - set_raw_extent(&et->largest, &ri->i_ext); + set_raw_read_extent(&et->largest, &ri->i_ext); read_unlock(&et->lock); } else { memset(&ri->i_ext, 0, sizeof(ri->i_ext)); diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index c227113b0f26..6032589099ce 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -22,137 +22,6 @@ #include "acl.h" #include <trace/events/f2fs.h> -static struct inode *f2fs_new_inode(struct user_namespace *mnt_userns, - struct inode *dir, umode_t mode) -{ - struct f2fs_sb_info *sbi = F2FS_I_SB(dir); - nid_t ino; - struct inode *inode; - bool nid_free = false; - bool encrypt = false; - int xattr_size = 0; - int err; - - inode = new_inode(dir->i_sb); - if (!inode) - return ERR_PTR(-ENOMEM); - - if (!f2fs_alloc_nid(sbi, &ino)) { - err = -ENOSPC; - goto fail; - } - - nid_free = true; - - inode_init_owner(mnt_userns, inode, dir, mode); - - inode->i_ino = ino; - inode->i_blocks = 0; - inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode); - F2FS_I(inode)->i_crtime = inode->i_mtime; - inode->i_generation = get_random_u32(); - - if (S_ISDIR(inode->i_mode)) - F2FS_I(inode)->i_current_depth = 1; - - err = insert_inode_locked(inode); - if (err) { - err = -EINVAL; - goto fail; - } - - if (f2fs_sb_has_project_quota(sbi) && - (F2FS_I(dir)->i_flags & F2FS_PROJINHERIT_FL)) - F2FS_I(inode)->i_projid = F2FS_I(dir)->i_projid; - else - F2FS_I(inode)->i_projid = make_kprojid(mnt_userns, - F2FS_DEF_PROJID); - - err = fscrypt_prepare_new_inode(dir, inode, &encrypt); - if (err) - goto fail_drop; - - err = f2fs_dquot_initialize(inode); - if (err) - goto fail_drop; - - set_inode_flag(inode, FI_NEW_INODE); - - if (encrypt) - f2fs_set_encrypted_inode(inode); - - if (f2fs_sb_has_extra_attr(sbi)) { - set_inode_flag(inode, FI_EXTRA_ATTR); - F2FS_I(inode)->i_extra_isize = F2FS_TOTAL_EXTRA_ATTR_SIZE; - } - - if (test_opt(sbi, INLINE_XATTR)) - set_inode_flag(inode, FI_INLINE_XATTR); - - if (f2fs_may_inline_dentry(inode)) - set_inode_flag(inode, FI_INLINE_DENTRY); - - if (f2fs_sb_has_flexible_inline_xattr(sbi)) { - f2fs_bug_on(sbi, !f2fs_has_extra_attr(inode)); - if (f2fs_has_inline_xattr(inode)) - xattr_size = F2FS_OPTION(sbi).inline_xattr_size; - /* Otherwise, will be 0 */ - } else if (f2fs_has_inline_xattr(inode) || - f2fs_has_inline_dentry(inode)) { - xattr_size = DEFAULT_INLINE_XATTR_ADDRS; - } - F2FS_I(inode)->i_inline_xattr_size = xattr_size; - - f2fs_init_extent_tree(inode, NULL); - - F2FS_I(inode)->i_flags = - f2fs_mask_flags(mode, F2FS_I(dir)->i_flags & F2FS_FL_INHERITED); - - if (S_ISDIR(inode->i_mode)) - F2FS_I(inode)->i_flags |= F2FS_INDEX_FL; - - if (F2FS_I(inode)->i_flags & F2FS_PROJINHERIT_FL) - set_inode_flag(inode, FI_PROJ_INHERIT); - - if (f2fs_sb_has_compression(sbi)) { - /* Inherit the compression flag in directory */ - if ((F2FS_I(dir)->i_flags & F2FS_COMPR_FL) && - f2fs_may_compress(inode)) - set_compress_context(inode); - } - - /* Should enable inline_data after compression set */ - if (test_opt(sbi, INLINE_DATA) && f2fs_may_inline_data(inode)) - set_inode_flag(inode, FI_INLINE_DATA); - - stat_inc_inline_xattr(inode); - stat_inc_inline_inode(inode); - stat_inc_inline_dir(inode); - - f2fs_set_inode_flags(inode); - - trace_f2fs_new_inode(inode, 0); - return inode; - -fail: - trace_f2fs_new_inode(inode, err); - make_bad_inode(inode); - if (nid_free) - set_inode_flag(inode, FI_FREE_NID); - iput(inode); - return ERR_PTR(err); -fail_drop: - trace_f2fs_new_inode(inode, err); - dquot_drop(inode); - inode->i_flags |= S_NOQUOTA; - if (nid_free) - set_inode_flag(inode, FI_FREE_NID); - clear_nlink(inode); - unlock_new_inode(inode); - iput(inode); - return ERR_PTR(err); -} - static inline int is_extension_exist(const unsigned char *s, const char *sub, bool tmp_ext) { @@ -187,36 +56,6 @@ static inline int is_extension_exist(const unsigned char *s, const char *sub, return 0; } -/* - * Set file's temperature for hot/cold data separation - */ -static inline void set_file_temperature(struct f2fs_sb_info *sbi, struct inode *inode, - const unsigned char *name) -{ - __u8 (*extlist)[F2FS_EXTENSION_LEN] = sbi->raw_super->extension_list; - int i, cold_count, hot_count; - - f2fs_down_read(&sbi->sb_lock); - - cold_count = le32_to_cpu(sbi->raw_super->extension_count); - hot_count = sbi->raw_super->hot_ext_count; - - for (i = 0; i < cold_count + hot_count; i++) { - if (is_extension_exist(name, extlist[i], true)) - break; - } - - f2fs_up_read(&sbi->sb_lock); - - if (i == cold_count + hot_count) - return; - - if (i < cold_count) - file_set_cold(inode); - else - file_set_hot(inode); -} - int f2fs_update_extension_list(struct f2fs_sb_info *sbi, const char *name, bool hot, bool set) { @@ -283,56 +122,215 @@ int f2fs_update_extension_list(struct f2fs_sb_info *sbi, const char *name, return 0; } -static void set_compress_inode(struct f2fs_sb_info *sbi, struct inode *inode, - const unsigned char *name) +static void set_compress_new_inode(struct f2fs_sb_info *sbi, struct inode *dir, + struct inode *inode, const unsigned char *name) { __u8 (*extlist)[F2FS_EXTENSION_LEN] = sbi->raw_super->extension_list; - unsigned char (*noext)[F2FS_EXTENSION_LEN] = F2FS_OPTION(sbi).noextensions; + unsigned char (*noext)[F2FS_EXTENSION_LEN] = + F2FS_OPTION(sbi).noextensions; unsigned char (*ext)[F2FS_EXTENSION_LEN] = F2FS_OPTION(sbi).extensions; unsigned char ext_cnt = F2FS_OPTION(sbi).compress_ext_cnt; unsigned char noext_cnt = F2FS_OPTION(sbi).nocompress_ext_cnt; int i, cold_count, hot_count; - if (!f2fs_sb_has_compression(sbi) || - F2FS_I(inode)->i_flags & F2FS_NOCOMP_FL || - !f2fs_may_compress(inode) || - (!ext_cnt && !noext_cnt)) + if (!f2fs_sb_has_compression(sbi)) return; - f2fs_down_read(&sbi->sb_lock); + if (S_ISDIR(inode->i_mode)) + goto inherit_comp; + /* This name comes only from normal files. */ + if (!name) + return; + + /* Don't compress hot files. */ + f2fs_down_read(&sbi->sb_lock); cold_count = le32_to_cpu(sbi->raw_super->extension_count); hot_count = sbi->raw_super->hot_ext_count; + for (i = cold_count; i < cold_count + hot_count; i++) + if (is_extension_exist(name, extlist[i], false)) + break; + f2fs_up_read(&sbi->sb_lock); + if (i < (cold_count + hot_count)) + return; + + /* Don't compress unallowed extension. */ + for (i = 0; i < noext_cnt; i++) + if (is_extension_exist(name, noext[i], false)) + return; - for (i = cold_count; i < cold_count + hot_count; i++) { - if (is_extension_exist(name, extlist[i], false)) { - f2fs_up_read(&sbi->sb_lock); + /* Compress wanting extension. */ + for (i = 0; i < ext_cnt; i++) { + if (is_extension_exist(name, ext[i], false)) { + set_compress_context(inode); return; } } +inherit_comp: + /* Inherit the {no-}compression flag in directory */ + if (F2FS_I(dir)->i_flags & F2FS_NOCOMP_FL) { + F2FS_I(inode)->i_flags |= F2FS_NOCOMP_FL; + f2fs_mark_inode_dirty_sync(inode, true); + } else if (F2FS_I(dir)->i_flags & F2FS_COMPR_FL) { + set_compress_context(inode); + } +} + +/* + * Set file's temperature for hot/cold data separation + */ +static void set_file_temperature(struct f2fs_sb_info *sbi, struct inode *inode, + const unsigned char *name) +{ + __u8 (*extlist)[F2FS_EXTENSION_LEN] = sbi->raw_super->extension_list; + int i, cold_count, hot_count; + f2fs_down_read(&sbi->sb_lock); + cold_count = le32_to_cpu(sbi->raw_super->extension_count); + hot_count = sbi->raw_super->hot_ext_count; + for (i = 0; i < cold_count + hot_count; i++) + if (is_extension_exist(name, extlist[i], true)) + break; f2fs_up_read(&sbi->sb_lock); - for (i = 0; i < noext_cnt; i++) { - if (is_extension_exist(name, noext[i], false)) { - f2fs_disable_compressed_file(inode); - return; - } + if (i == cold_count + hot_count) + return; + + if (i < cold_count) + file_set_cold(inode); + else + file_set_hot(inode); +} + +static struct inode *f2fs_new_inode(struct user_namespace *mnt_userns, + struct inode *dir, umode_t mode, + const char *name) +{ + struct f2fs_sb_info *sbi = F2FS_I_SB(dir); + nid_t ino; + struct inode *inode; + bool nid_free = false; + bool encrypt = false; + int xattr_size = 0; + int err; + + inode = new_inode(dir->i_sb); + if (!inode) + return ERR_PTR(-ENOMEM); + + if (!f2fs_alloc_nid(sbi, &ino)) { + err = -ENOSPC; + goto fail; } - if (is_inode_flag_set(inode, FI_COMPRESSED_FILE)) - return; + nid_free = true; - for (i = 0; i < ext_cnt; i++) { - if (!is_extension_exist(name, ext[i], false)) - continue; + inode_init_owner(mnt_userns, inode, dir, mode); - /* Do not use inline_data with compression */ - stat_dec_inline_inode(inode); - clear_inode_flag(inode, FI_INLINE_DATA); - set_compress_context(inode); - return; + inode->i_ino = ino; + inode->i_blocks = 0; + inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode); + F2FS_I(inode)->i_crtime = inode->i_mtime; + inode->i_generation = get_random_u32(); + + if (S_ISDIR(inode->i_mode)) + F2FS_I(inode)->i_current_depth = 1; + + err = insert_inode_locked(inode); + if (err) { + err = -EINVAL; + goto fail; + } + + if (f2fs_sb_has_project_quota(sbi) && + (F2FS_I(dir)->i_flags & F2FS_PROJINHERIT_FL)) + F2FS_I(inode)->i_projid = F2FS_I(dir)->i_projid; + else + F2FS_I(inode)->i_projid = make_kprojid(mnt_userns, + F2FS_DEF_PROJID); + + err = fscrypt_prepare_new_inode(dir, inode, &encrypt); + if (err) + goto fail_drop; + + err = f2fs_dquot_initialize(inode); + if (err) + goto fail_drop; + + set_inode_flag(inode, FI_NEW_INODE); + + if (encrypt) + f2fs_set_encrypted_inode(inode); + + if (f2fs_sb_has_extra_attr(sbi)) { + set_inode_flag(inode, FI_EXTRA_ATTR); + F2FS_I(inode)->i_extra_isize = F2FS_TOTAL_EXTRA_ATTR_SIZE; + } + + if (test_opt(sbi, INLINE_XATTR)) + set_inode_flag(inode, FI_INLINE_XATTR); + + if (f2fs_may_inline_dentry(inode)) + set_inode_flag(inode, FI_INLINE_DENTRY); + + if (f2fs_sb_has_flexible_inline_xattr(sbi)) { + f2fs_bug_on(sbi, !f2fs_has_extra_attr(inode)); + if (f2fs_has_inline_xattr(inode)) + xattr_size = F2FS_OPTION(sbi).inline_xattr_size; + /* Otherwise, will be 0 */ + } else if (f2fs_has_inline_xattr(inode) || + f2fs_has_inline_dentry(inode)) { + xattr_size = DEFAULT_INLINE_XATTR_ADDRS; } + F2FS_I(inode)->i_inline_xattr_size = xattr_size; + + F2FS_I(inode)->i_flags = + f2fs_mask_flags(mode, F2FS_I(dir)->i_flags & F2FS_FL_INHERITED); + + if (S_ISDIR(inode->i_mode)) + F2FS_I(inode)->i_flags |= F2FS_INDEX_FL; + + if (F2FS_I(inode)->i_flags & F2FS_PROJINHERIT_FL) + set_inode_flag(inode, FI_PROJ_INHERIT); + + /* Check compression first. */ + set_compress_new_inode(sbi, dir, inode, name); + + /* Should enable inline_data after compression set */ + if (test_opt(sbi, INLINE_DATA) && f2fs_may_inline_data(inode)) + set_inode_flag(inode, FI_INLINE_DATA); + + if (name && !test_opt(sbi, DISABLE_EXT_IDENTIFY)) + set_file_temperature(sbi, inode, name); + + stat_inc_inline_xattr(inode); + stat_inc_inline_inode(inode); + stat_inc_inline_dir(inode); + + f2fs_set_inode_flags(inode); + + f2fs_init_extent_tree(inode); + + trace_f2fs_new_inode(inode, 0); + return inode; + +fail: + trace_f2fs_new_inode(inode, err); + make_bad_inode(inode); + if (nid_free) + set_inode_flag(inode, FI_FREE_NID); + iput(inode); + return ERR_PTR(err); +fail_drop: + trace_f2fs_new_inode(inode, err); + dquot_drop(inode); + inode->i_flags |= S_NOQUOTA; + if (nid_free) + set_inode_flag(inode, FI_FREE_NID); + clear_nlink(inode); + unlock_new_inode(inode); + iput(inode); + return ERR_PTR(err); } static int f2fs_create(struct user_namespace *mnt_userns, struct inode *dir, @@ -352,15 +350,10 @@ static int f2fs_create(struct user_namespace *mnt_userns, struct inode *dir, if (err) return err; - inode = f2fs_new_inode(mnt_userns, dir, mode); + inode = f2fs_new_inode(mnt_userns, dir, mode, dentry->d_name.name); if (IS_ERR(inode)) return PTR_ERR(inode); - if (!test_opt(sbi, DISABLE_EXT_IDENTIFY)) - set_file_temperature(sbi, inode, dentry->d_name.name); - - set_compress_inode(sbi, inode, dentry->d_name.name); - inode->i_op = &f2fs_file_inode_operations; inode->i_fop = &f2fs_file_operations; inode->i_mapping->a_ops = &f2fs_dblock_aops; @@ -632,6 +625,8 @@ static int f2fs_unlink(struct inode *dir, struct dentry *dentry) goto fail; } f2fs_delete_entry(de, page, dir, inode); + f2fs_unlock_op(sbi); + #if IS_ENABLED(CONFIG_UNICODE) /* VFS negative dentries are incompatible with Encoding and * Case-insensitiveness. Eventually we'll want avoid @@ -642,8 +637,6 @@ static int f2fs_unlink(struct inode *dir, struct dentry *dentry) if (IS_CASEFOLDED(dir)) d_invalidate(dentry); #endif - f2fs_unlock_op(sbi); - if (IS_DIRSYNC(dir)) f2fs_sync_fs(sbi->sb, 1); fail: @@ -689,7 +682,7 @@ static int f2fs_symlink(struct user_namespace *mnt_userns, struct inode *dir, if (err) return err; - inode = f2fs_new_inode(mnt_userns, dir, S_IFLNK | S_IRWXUGO); + inode = f2fs_new_inode(mnt_userns, dir, S_IFLNK | S_IRWXUGO, NULL); if (IS_ERR(inode)) return PTR_ERR(inode); @@ -760,7 +753,7 @@ static int f2fs_mkdir(struct user_namespace *mnt_userns, struct inode *dir, if (err) return err; - inode = f2fs_new_inode(mnt_userns, dir, S_IFDIR | mode); + inode = f2fs_new_inode(mnt_userns, dir, S_IFDIR | mode, NULL); if (IS_ERR(inode)) return PTR_ERR(inode); @@ -817,7 +810,7 @@ static int f2fs_mknod(struct user_namespace *mnt_userns, struct inode *dir, if (err) return err; - inode = f2fs_new_inode(mnt_userns, dir, mode); + inode = f2fs_new_inode(mnt_userns, dir, mode, NULL); if (IS_ERR(inode)) return PTR_ERR(inode); @@ -856,7 +849,7 @@ static int __f2fs_tmpfile(struct user_namespace *mnt_userns, struct inode *dir, if (err) return err; - inode = f2fs_new_inode(mnt_userns, dir, mode); + inode = f2fs_new_inode(mnt_userns, dir, mode, NULL); if (IS_ERR(inode)) return PTR_ERR(inode); diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 983572f23896..dde4c0458704 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -60,7 +60,7 @@ bool f2fs_available_free_memory(struct f2fs_sb_info *sbi, int type) avail_ram = val.totalram - val.totalhigh; /* - * give 25%, 25%, 50%, 50%, 50% memory for each components respectively + * give 25%, 25%, 50%, 50%, 25%, 25% memory for each components respectively */ if (type == FREE_NIDS) { mem_size = (nm_i->nid_cnt[FREE_NID] * @@ -85,12 +85,16 @@ bool f2fs_available_free_memory(struct f2fs_sb_info *sbi, int type) sizeof(struct ino_entry); mem_size >>= PAGE_SHIFT; res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 1); - } else if (type == EXTENT_CACHE) { - mem_size = (atomic_read(&sbi->total_ext_tree) * + } else if (type == READ_EXTENT_CACHE || type == AGE_EXTENT_CACHE) { + enum extent_type etype = type == READ_EXTENT_CACHE ? + EX_READ : EX_BLOCK_AGE; + struct extent_tree_info *eti = &sbi->extent_tree[etype]; + + mem_size = (atomic_read(&eti->total_ext_tree) * sizeof(struct extent_tree) + - atomic_read(&sbi->total_ext_node) * + atomic_read(&eti->total_ext_node) * sizeof(struct extent_node)) >> PAGE_SHIFT; - res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 1); + res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 2); } else if (type == DISCARD_CACHE) { mem_size = (atomic_read(&dcc->discard_cmd_cnt) * sizeof(struct discard_cmd)) >> PAGE_SHIFT; @@ -859,7 +863,7 @@ int f2fs_get_dnode_of_data(struct dnode_of_data *dn, pgoff_t index, int mode) blkaddr = data_blkaddr(dn->inode, dn->node_page, dn->ofs_in_node + 1); - f2fs_update_extent_tree_range_compressed(dn->inode, + f2fs_update_read_extent_tree_range_compressed(dn->inode, index, blkaddr, F2FS_I(dn->inode)->i_cluster_size, c_len); @@ -1360,8 +1364,7 @@ static int read_node_page(struct page *page, blk_opf_t op_flags) return err; /* NEW_ADDR can be seen, after cp_error drops some dirty node pages */ - if (unlikely(ni.blk_addr == NULL_ADDR || ni.blk_addr == NEW_ADDR) || - is_sbi_flag_set(sbi, SBI_IS_SHUTDOWN)) { + if (unlikely(ni.blk_addr == NULL_ADDR || ni.blk_addr == NEW_ADDR)) { ClearPageUptodate(page); return -ENOENT; } diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h index 3c09cae058b0..99454d46a939 100644 --- a/fs/f2fs/node.h +++ b/fs/f2fs/node.h @@ -146,7 +146,8 @@ enum mem_type { NAT_ENTRIES, /* indicates the cached nat entry */ DIRTY_DENTS, /* indicates dirty dentry pages */ INO_ENTRIES, /* indicates inode entries */ - EXTENT_CACHE, /* indicates extent cache */ + READ_EXTENT_CACHE, /* indicates read extent cache */ + AGE_EXTENT_CACHE, /* indicates age extent cache */ DISCARD_CACHE, /* indicates memory of cached discard cmds */ COMPRESS_PAGE, /* indicates memory of cached compressed pages */ BASE_CHECK, /* check kernel status */ diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index dea95b48b647..77fd453949b1 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -923,9 +923,7 @@ int __init f2fs_create_recovery_cache(void) { fsync_entry_slab = f2fs_kmem_cache_create("f2fs_fsync_inode_entry", sizeof(struct fsync_inode_entry)); - if (!fsync_entry_slab) - return -ENOMEM; - return 0; + return fsync_entry_slab ? 0 : -ENOMEM; } void f2fs_destroy_recovery_cache(void) diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index b304692c0cf5..25ddea478fc1 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -192,14 +192,19 @@ void f2fs_abort_atomic_write(struct inode *inode, bool clean) if (!f2fs_is_atomic_file(inode)) return; - if (clean) - truncate_inode_pages_final(inode->i_mapping); clear_inode_flag(fi->cow_inode, FI_COW_FILE); iput(fi->cow_inode); fi->cow_inode = NULL; release_atomic_write_cnt(inode); + clear_inode_flag(inode, FI_ATOMIC_COMMITTED); + clear_inode_flag(inode, FI_ATOMIC_REPLACE); clear_inode_flag(inode, FI_ATOMIC_FILE); stat_dec_atomic_inode(inode); + + if (clean) { + truncate_inode_pages_final(inode->i_mapping); + f2fs_i_size_write(inode, fi->original_i_size); + } } static int __replace_atomic_write_block(struct inode *inode, pgoff_t index, @@ -257,14 +262,19 @@ static void __complete_revoke_list(struct inode *inode, struct list_head *head, bool revoke) { struct revoke_entry *cur, *tmp; + bool truncate = is_inode_flag_set(inode, FI_ATOMIC_REPLACE); list_for_each_entry_safe(cur, tmp, head, list) { if (revoke) __replace_atomic_write_block(inode, cur->index, cur->old_addr, NULL, true); + list_del(&cur->list); kmem_cache_free(revoke_entry_slab, cur); } + + if (!revoke && truncate) + f2fs_do_truncate_blocks(inode, 0, false); } static int __f2fs_commit_atomic_write(struct inode *inode) @@ -335,10 +345,12 @@ next: } out: - if (ret) + if (ret) { sbi->revoked_atomic_block += fi->atomic_write_cnt; - else + } else { sbi->committed_atomic_block += fi->atomic_write_cnt; + set_inode_flag(inode, FI_ATOMIC_COMMITTED); + } __complete_revoke_list(inode, &revoke_list, ret ? true : false); @@ -437,8 +449,14 @@ void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi, bool from_bg) return; /* try to shrink extent cache when there is no enough memory */ - if (!f2fs_available_free_memory(sbi, EXTENT_CACHE)) - f2fs_shrink_extent_tree(sbi, EXTENT_CACHE_SHRINK_NUMBER); + if (!f2fs_available_free_memory(sbi, READ_EXTENT_CACHE)) + f2fs_shrink_read_extent_tree(sbi, + READ_EXTENT_CACHE_SHRINK_NUMBER); + + /* try to shrink age extent cache when there is no enough memory */ + if (!f2fs_available_free_memory(sbi, AGE_EXTENT_CACHE)) + f2fs_shrink_age_extent_tree(sbi, + AGE_EXTENT_CACHE_SHRINK_NUMBER); /* check the # of cached NAT entries */ if (!f2fs_available_free_memory(sbi, NAT_ENTRIES)) @@ -620,12 +638,11 @@ int f2fs_create_flush_cmd_control(struct f2fs_sb_info *sbi) { dev_t dev = sbi->sb->s_bdev->bd_dev; struct flush_cmd_control *fcc; - int err = 0; if (SM_I(sbi)->fcc_info) { fcc = SM_I(sbi)->fcc_info; if (fcc->f2fs_issue_flush) - return err; + return 0; goto init_thread; } @@ -638,19 +655,20 @@ int f2fs_create_flush_cmd_control(struct f2fs_sb_info *sbi) init_llist_head(&fcc->issue_list); SM_I(sbi)->fcc_info = fcc; if (!test_opt(sbi, FLUSH_MERGE)) - return err; + return 0; init_thread: fcc->f2fs_issue_flush = kthread_run(issue_flush_thread, sbi, "f2fs_flush-%u:%u", MAJOR(dev), MINOR(dev)); if (IS_ERR(fcc->f2fs_issue_flush)) { - err = PTR_ERR(fcc->f2fs_issue_flush); + int err = PTR_ERR(fcc->f2fs_issue_flush); + kfree(fcc); SM_I(sbi)->fcc_info = NULL; return err; } - return err; + return 0; } void f2fs_destroy_flush_cmd_control(struct f2fs_sb_info *sbi, bool free) @@ -856,7 +874,7 @@ block_t f2fs_get_unusable_blocks(struct f2fs_sb_info *sbi) } mutex_unlock(&dirty_i->seglist_lock); - unusable = holes[DATA] > holes[NODE] ? holes[DATA] : holes[NODE]; + unusable = max(holes[DATA], holes[NODE]); if (unusable > ovp_holes) return unusable - ovp_holes; return 0; @@ -1052,8 +1070,8 @@ static void __init_discard_policy(struct f2fs_sb_info *sbi, dpolicy->io_aware = true; dpolicy->sync = false; dpolicy->ordered = true; - if (utilization(sbi) > DEF_DISCARD_URGENT_UTIL) { - dpolicy->granularity = 1; + if (utilization(sbi) > dcc->discard_urgent_util) { + dpolicy->granularity = MIN_DISCARD_GRANULARITY; if (atomic_read(&dcc->discard_cmd_cnt)) dpolicy->max_interval = dcc->min_discard_issue_time; @@ -1068,7 +1086,7 @@ static void __init_discard_policy(struct f2fs_sb_info *sbi, } else if (discard_type == DPOLICY_UMOUNT) { dpolicy->io_aware = false; /* we need to issue all to keep CP_TRIMMED_FLAG */ - dpolicy->granularity = 1; + dpolicy->granularity = MIN_DISCARD_GRANULARITY; dpolicy->timeout = true; } } @@ -1126,13 +1144,12 @@ static int __submit_discard_cmd(struct f2fs_sb_info *sbi, if (time_to_inject(sbi, FAULT_DISCARD)) { f2fs_show_injection_info(sbi, FAULT_DISCARD); err = -EIO; - goto submit; - } - err = __blkdev_issue_discard(bdev, + } else { + err = __blkdev_issue_discard(bdev, SECTOR_FROM_BLOCK(start), SECTOR_FROM_BLOCK(len), GFP_NOFS, &bio); -submit: + } if (err) { spin_lock_irqsave(&dc->lock, flags); if (dc->state == D_PARTIAL) @@ -1170,7 +1187,7 @@ submit: atomic_inc(&dcc->issued_discard); - f2fs_update_iostat(sbi, NULL, FS_DISCARD, 1); + f2fs_update_iostat(sbi, NULL, FS_DISCARD, len * F2FS_BLKSIZE); lstart += len; start += len; @@ -1342,13 +1359,13 @@ static void __update_discard_tree_range(struct f2fs_sb_info *sbi, } } -static int __queue_discard_cmd(struct f2fs_sb_info *sbi, +static void __queue_discard_cmd(struct f2fs_sb_info *sbi, struct block_device *bdev, block_t blkstart, block_t blklen) { block_t lblkstart = blkstart; if (!f2fs_bdev_support_discard(bdev)) - return 0; + return; trace_f2fs_queue_discard(bdev, blkstart, blklen); @@ -1360,7 +1377,6 @@ static int __queue_discard_cmd(struct f2fs_sb_info *sbi, mutex_lock(&SM_I(sbi)->dcc_info->cmd_lock); __update_discard_tree_range(sbi, bdev, lblkstart, blkstart, blklen); mutex_unlock(&SM_I(sbi)->dcc_info->cmd_lock); - return 0; } static unsigned int __issue_discard_cmd_orderly(struct f2fs_sb_info *sbi, @@ -1448,7 +1464,7 @@ retry: if (i + 1 < dpolicy->granularity) break; - if (i < DEFAULT_DISCARD_GRANULARITY && dpolicy->ordered) + if (i + 1 < dcc->max_ordered_discard && dpolicy->ordered) return __issue_discard_cmd_orderly(sbi, dpolicy); pend_list = &dcc->pend_list[i]; @@ -1645,6 +1661,9 @@ bool f2fs_issue_discard_timeout(struct f2fs_sb_info *sbi) struct discard_policy dpolicy; bool dropped; + if (!atomic_read(&dcc->discard_cmd_cnt)) + return false; + __init_discard_policy(sbi, &dpolicy, DPOLICY_UMOUNT, dcc->discard_granularity); __issue_discard_cmd(sbi, &dpolicy); @@ -1669,6 +1688,11 @@ static int issue_discard_thread(void *data) set_freezable(); do { + wait_event_interruptible_timeout(*q, + kthread_should_stop() || freezing(current) || + dcc->discard_wake, + msecs_to_jiffies(wait_ms)); + if (sbi->gc_mode == GC_URGENT_HIGH || !f2fs_available_free_memory(sbi, DISCARD_CACHE)) __init_discard_policy(sbi, &dpolicy, DPOLICY_FORCE, 1); @@ -1676,14 +1700,6 @@ static int issue_discard_thread(void *data) __init_discard_policy(sbi, &dpolicy, DPOLICY_BG, dcc->discard_granularity); - if (!atomic_read(&dcc->discard_cmd_cnt)) - wait_ms = dpolicy.max_interval; - - wait_event_interruptible_timeout(*q, - kthread_should_stop() || freezing(current) || - dcc->discard_wake, - msecs_to_jiffies(wait_ms)); - if (dcc->discard_wake) dcc->discard_wake = 0; @@ -1697,12 +1713,11 @@ static int issue_discard_thread(void *data) continue; if (kthread_should_stop()) return 0; - if (is_sbi_flag_set(sbi, SBI_NEED_FSCK)) { + if (is_sbi_flag_set(sbi, SBI_NEED_FSCK) || + !atomic_read(&dcc->discard_cmd_cnt)) { wait_ms = dpolicy.max_interval; continue; } - if (!atomic_read(&dcc->discard_cmd_cnt)) - continue; sb_start_intwrite(sbi->sb); @@ -1717,6 +1732,8 @@ static int issue_discard_thread(void *data) } else { wait_ms = dpolicy.max_interval; } + if (!atomic_read(&dcc->discard_cmd_cnt)) + wait_ms = dpolicy.max_interval; sb_end_intwrite(sbi->sb); @@ -1760,7 +1777,8 @@ static int __f2fs_issue_discard_zone(struct f2fs_sb_info *sbi, } /* For conventional zones, use regular discard if supported */ - return __queue_discard_cmd(sbi, bdev, lblkstart, blklen); + __queue_discard_cmd(sbi, bdev, lblkstart, blklen); + return 0; } #endif @@ -1771,7 +1789,8 @@ static int __issue_discard_async(struct f2fs_sb_info *sbi, if (f2fs_sb_has_blkzoned(sbi) && bdev_is_zoned(bdev)) return __f2fs_issue_discard_zone(sbi, bdev, blkstart, blklen); #endif - return __queue_discard_cmd(sbi, bdev, blkstart, blklen); + __queue_discard_cmd(sbi, bdev, blkstart, blklen); + return 0; } static int f2fs_issue_discard(struct f2fs_sb_info *sbi, @@ -2025,8 +2044,10 @@ int f2fs_start_discard_thread(struct f2fs_sb_info *sbi) dcc->f2fs_issue_discard = kthread_run(issue_discard_thread, sbi, "f2fs_discard-%u:%u", MAJOR(dev), MINOR(dev)); - if (IS_ERR(dcc->f2fs_issue_discard)) + if (IS_ERR(dcc->f2fs_issue_discard)) { err = PTR_ERR(dcc->f2fs_issue_discard); + dcc->f2fs_issue_discard = NULL; + } return err; } @@ -2046,6 +2067,7 @@ static int create_discard_cmd_control(struct f2fs_sb_info *sbi) return -ENOMEM; dcc->discard_granularity = DEFAULT_DISCARD_GRANULARITY; + dcc->max_ordered_discard = DEFAULT_MAX_ORDERED_DISCARD_GRANULARITY; if (F2FS_OPTION(sbi).discard_unit == DISCARD_UNIT_SEGMENT) dcc->discard_granularity = sbi->blocks_per_seg; else if (F2FS_OPTION(sbi).discard_unit == DISCARD_UNIT_SECTION) @@ -2066,6 +2088,7 @@ static int create_discard_cmd_control(struct f2fs_sb_info *sbi) dcc->min_discard_issue_time = DEF_MIN_DISCARD_ISSUE_TIME; dcc->mid_discard_issue_time = DEF_MID_DISCARD_ISSUE_TIME; dcc->max_discard_issue_time = DEF_MAX_DISCARD_ISSUE_TIME; + dcc->discard_urgent_util = DEF_DISCARD_URGENT_UTIL; dcc->undiscard_blks = 0; dcc->next_pos = 0; dcc->root = RB_ROOT_CACHED; @@ -2096,8 +2119,7 @@ static void destroy_discard_cmd_control(struct f2fs_sb_info *sbi) * Recovery can cache discard commands, so in error path of * fill_super(), it needs to give a chance to handle them. */ - if (unlikely(atomic_read(&dcc->discard_cmd_cnt))) - f2fs_issue_discard_timeout(sbi); + f2fs_issue_discard_timeout(sbi); kfree(dcc); SM_I(sbi)->dcc_info = NULL; @@ -2642,7 +2664,7 @@ bool f2fs_segment_has_free_slot(struct f2fs_sb_info *sbi, int segno) * This function always allocates a used segment(from dirty seglist) by SSR * manner, so it should recover the existing segment information of valid blocks */ -static void change_curseg(struct f2fs_sb_info *sbi, int type, bool flush) +static void change_curseg(struct f2fs_sb_info *sbi, int type) { struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); struct curseg_info *curseg = CURSEG_I(sbi, type); @@ -2650,9 +2672,7 @@ static void change_curseg(struct f2fs_sb_info *sbi, int type, bool flush) struct f2fs_summary_block *sum_node; struct page *sum_page; - if (flush) - write_sum_page(sbi, curseg->sum_blk, - GET_SUM_BLOCK(sbi, curseg->segno)); + write_sum_page(sbi, curseg->sum_blk, GET_SUM_BLOCK(sbi, curseg->segno)); __set_test_and_inuse(sbi, new_segno); @@ -2691,7 +2711,7 @@ static void get_atssr_segment(struct f2fs_sb_info *sbi, int type, struct seg_entry *se = get_seg_entry(sbi, curseg->next_segno); curseg->seg_type = se->type; - change_curseg(sbi, type, true); + change_curseg(sbi, type); } else { /* allocate cold segment by default */ curseg->seg_type = CURSEG_COLD_DATA; @@ -2835,31 +2855,20 @@ static int get_ssr_segment(struct f2fs_sb_info *sbi, int type, return 0; } -/* - * flush out current segment and replace it with new segment - * This function should be returned with success, otherwise BUG - */ -static void allocate_segment_by_default(struct f2fs_sb_info *sbi, - int type, bool force) +static bool need_new_seg(struct f2fs_sb_info *sbi, int type) { struct curseg_info *curseg = CURSEG_I(sbi, type); - if (force) - new_curseg(sbi, type, true); - else if (!is_set_ckpt_flags(sbi, CP_CRC_RECOVERY_FLAG) && - curseg->seg_type == CURSEG_WARM_NODE) - new_curseg(sbi, type, false); - else if (curseg->alloc_type == LFS && - is_next_segment_free(sbi, curseg, type) && - likely(!is_sbi_flag_set(sbi, SBI_CP_DISABLED))) - new_curseg(sbi, type, false); - else if (f2fs_need_SSR(sbi) && - get_ssr_segment(sbi, type, SSR, 0)) - change_curseg(sbi, type, true); - else - new_curseg(sbi, type, false); - - stat_inc_seg_type(sbi, curseg); + if (!is_set_ckpt_flags(sbi, CP_CRC_RECOVERY_FLAG) && + curseg->seg_type == CURSEG_WARM_NODE) + return true; + if (curseg->alloc_type == LFS && + is_next_segment_free(sbi, curseg, type) && + likely(!is_sbi_flag_set(sbi, SBI_CP_DISABLED))) + return true; + if (!f2fs_need_SSR(sbi) || !get_ssr_segment(sbi, type, SSR, 0)) + return true; + return false; } void f2fs_allocate_segment_for_resize(struct f2fs_sb_info *sbi, int type, @@ -2877,7 +2886,7 @@ void f2fs_allocate_segment_for_resize(struct f2fs_sb_info *sbi, int type, goto unlock; if (f2fs_need_SSR(sbi) && get_ssr_segment(sbi, type, SSR, 0)) - change_curseg(sbi, type, true); + change_curseg(sbi, type); else new_curseg(sbi, type, true); @@ -2912,7 +2921,8 @@ static void __allocate_new_segment(struct f2fs_sb_info *sbi, int type, return; alloc: old_segno = curseg->segno; - SIT_I(sbi)->s_ops->allocate_segment(sbi, type, true); + new_curseg(sbi, type, true); + stat_inc_seg_type(sbi, curseg); locate_dirty_segment(sbi, old_segno); } @@ -2943,10 +2953,6 @@ void f2fs_allocate_new_segments(struct f2fs_sb_info *sbi) f2fs_up_read(&SM_I(sbi)->curseg_lock); } -static const struct segment_allocation default_salloc_ops = { - .allocate_segment = allocate_segment_by_default, -}; - bool f2fs_exist_trim_candidates(struct f2fs_sb_info *sbi, struct cp_control *cpc) { @@ -3152,10 +3158,28 @@ static int __get_segment_type_4(struct f2fs_io_info *fio) } } +static int __get_age_segment_type(struct inode *inode, pgoff_t pgofs) +{ + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + struct extent_info ei; + + if (f2fs_lookup_age_extent_cache(inode, pgofs, &ei)) { + if (!ei.age) + return NO_CHECK_TYPE; + if (ei.age <= sbi->hot_data_age_threshold) + return CURSEG_HOT_DATA; + if (ei.age <= sbi->warm_data_age_threshold) + return CURSEG_WARM_DATA; + return CURSEG_COLD_DATA; + } + return NO_CHECK_TYPE; +} + static int __get_segment_type_6(struct f2fs_io_info *fio) { if (fio->type == DATA) { struct inode *inode = fio->page->mapping->host; + int type; if (is_inode_flag_set(inode, FI_ALIGNED_WRITE)) return CURSEG_COLD_DATA_PINNED; @@ -3170,6 +3194,11 @@ static int __get_segment_type_6(struct f2fs_io_info *fio) } if (file_is_cold(inode) || f2fs_need_compress_data(inode)) return CURSEG_COLD_DATA; + + type = __get_age_segment_type(inode, fio->page->index); + if (type != NO_CHECK_TYPE) + return type; + if (file_is_hot(inode) || is_inode_flag_set(inode, FI_HOT_DATA) || f2fs_is_cow_file(inode)) @@ -3266,11 +3295,19 @@ void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page, update_sit_entry(sbi, old_blkaddr, -1); if (!__has_curseg_space(sbi, curseg)) { - if (from_gc) + /* + * Flush out current segment and replace it with new segment. + */ + if (from_gc) { get_atssr_segment(sbi, type, se->type, AT_SSR, se->mtime); - else - sit_i->s_ops->allocate_segment(sbi, type, false); + } else { + if (need_new_seg(sbi, type)) + new_curseg(sbi, type, false); + else + change_curseg(sbi, type); + stat_inc_seg_type(sbi, curseg); + } } /* * segment dirty status should be updated after segment allocation, @@ -3280,6 +3317,9 @@ void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page, locate_dirty_segment(sbi, GET_SEGNO(sbi, old_blkaddr)); locate_dirty_segment(sbi, GET_SEGNO(sbi, *new_blkaddr)); + if (IS_DATASEG(type)) + atomic64_inc(&sbi->allocated_data_blocks); + up_write(&sit_i->sentry_lock); if (page && IS_NODESEG(type)) { @@ -3407,6 +3447,8 @@ void f2fs_outplace_write_data(struct dnode_of_data *dn, struct f2fs_summary sum; f2fs_bug_on(sbi, dn->data_blkaddr == NULL_ADDR); + if (fio->io_type == FS_DATA_IO || fio->io_type == FS_CP_DATA_IO) + f2fs_update_age_extent_cache(dn); set_summary(&sum, dn->nid, dn->ofs_in_node, fio->version); do_write_page(&sum, fio); f2fs_update_data_blkaddr(dn, fio->new_blkaddr); @@ -3531,7 +3573,7 @@ void f2fs_do_replace_block(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, /* change the current segment */ if (segno != curseg->segno) { curseg->next_segno = segno; - change_curseg(sbi, type, true); + change_curseg(sbi, type); } curseg->next_blkoff = GET_BLKOFF_FROM_SEG0(sbi, new_blkaddr); @@ -3559,7 +3601,7 @@ void f2fs_do_replace_block(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, if (recover_curseg) { if (old_cursegno != curseg->segno) { curseg->next_segno = old_cursegno; - change_curseg(sbi, type, true); + change_curseg(sbi, type); } curseg->next_blkoff = old_blkoff; curseg->alloc_type = old_alloc_type; @@ -4256,9 +4298,6 @@ static int build_sit_info(struct f2fs_sb_info *sbi) return -ENOMEM; #endif - /* init SIT information */ - sit_i->s_ops = &default_salloc_ops; - sit_i->sit_base_addr = le32_to_cpu(raw_super->sit_blkaddr); sit_i->sit_blocks = sit_segs << sbi->log_blocks_per_seg; sit_i->written_valid_blocks = 0; diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h index be8f2d7d007b..3ad1b7b6fa94 100644 --- a/fs/f2fs/segment.h +++ b/fs/f2fs/segment.h @@ -222,10 +222,6 @@ struct sec_entry { unsigned int valid_blocks; /* # of valid blocks in a section */ }; -struct segment_allocation { - void (*allocate_segment)(struct f2fs_sb_info *, int, bool); -}; - #define MAX_SKIP_GC_COUNT 16 struct revoke_entry { @@ -235,8 +231,6 @@ struct revoke_entry { }; struct sit_info { - const struct segment_allocation *s_ops; - block_t sit_base_addr; /* start block address of SIT area */ block_t sit_blocks; /* # of blocks used by SIT area */ block_t written_valid_blocks; /* # of valid blocks in main area */ diff --git a/fs/f2fs/shrinker.c b/fs/f2fs/shrinker.c index dd3c3c7a90ec..83d6fb97dcae 100644 --- a/fs/f2fs/shrinker.c +++ b/fs/f2fs/shrinker.c @@ -28,10 +28,13 @@ static unsigned long __count_free_nids(struct f2fs_sb_info *sbi) return count > 0 ? count : 0; } -static unsigned long __count_extent_cache(struct f2fs_sb_info *sbi) +static unsigned long __count_extent_cache(struct f2fs_sb_info *sbi, + enum extent_type type) { - return atomic_read(&sbi->total_zombie_tree) + - atomic_read(&sbi->total_ext_node); + struct extent_tree_info *eti = &sbi->extent_tree[type]; + + return atomic_read(&eti->total_zombie_tree) + + atomic_read(&eti->total_ext_node); } unsigned long f2fs_shrink_count(struct shrinker *shrink, @@ -53,8 +56,11 @@ unsigned long f2fs_shrink_count(struct shrinker *shrink, } spin_unlock(&f2fs_list_lock); - /* count extent cache entries */ - count += __count_extent_cache(sbi); + /* count read extent cache entries */ + count += __count_extent_cache(sbi, EX_READ); + + /* count block age extent cache entries */ + count += __count_extent_cache(sbi, EX_BLOCK_AGE); /* count clean nat cache entries */ count += __count_nat_entries(sbi); @@ -100,7 +106,10 @@ unsigned long f2fs_shrink_scan(struct shrinker *shrink, sbi->shrinker_run_no = run_no; /* shrink extent cache entries */ - freed += f2fs_shrink_extent_tree(sbi, nr >> 1); + freed += f2fs_shrink_age_extent_tree(sbi, nr >> 2); + + /* shrink read extent cache entries */ + freed += f2fs_shrink_read_extent_tree(sbi, nr >> 2); /* shrink clean nat cache entries */ if (freed < nr) @@ -130,7 +139,9 @@ void f2fs_join_shrinker(struct f2fs_sb_info *sbi) void f2fs_leave_shrinker(struct f2fs_sb_info *sbi) { - f2fs_shrink_extent_tree(sbi, __count_extent_cache(sbi)); + f2fs_shrink_read_extent_tree(sbi, __count_extent_cache(sbi, EX_READ)); + f2fs_shrink_age_extent_tree(sbi, + __count_extent_cache(sbi, EX_BLOCK_AGE)); spin_lock(&f2fs_list_lock); list_del_init(&sbi->s_list); diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 3834ead04620..1f812b9ce985 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -61,6 +61,7 @@ const char *f2fs_fault_name[FAULT_MAX] = { [FAULT_SLAB_ALLOC] = "slab alloc", [FAULT_DQUOT_INIT] = "dquot initialize", [FAULT_LOCK_OP] = "lock_op", + [FAULT_BLKADDR] = "invalid blkaddr", }; void f2fs_build_fault_attr(struct f2fs_sb_info *sbi, unsigned int rate, @@ -110,6 +111,7 @@ enum { Opt_noinline_dentry, Opt_flush_merge, Opt_noflush_merge, + Opt_barrier, Opt_nobarrier, Opt_fastboot, Opt_extent_cache, @@ -161,6 +163,7 @@ enum { Opt_nogc_merge, Opt_discard_unit, Opt_memory_mode, + Opt_age_extent_cache, Opt_err, }; @@ -186,6 +189,7 @@ static match_table_t f2fs_tokens = { {Opt_noinline_dentry, "noinline_dentry"}, {Opt_flush_merge, "flush_merge"}, {Opt_noflush_merge, "noflush_merge"}, + {Opt_barrier, "barrier"}, {Opt_nobarrier, "nobarrier"}, {Opt_fastboot, "fastboot"}, {Opt_extent_cache, "extent_cache"}, @@ -238,6 +242,7 @@ static match_table_t f2fs_tokens = { {Opt_nogc_merge, "nogc_merge"}, {Opt_discard_unit, "discard_unit=%s"}, {Opt_memory_mode, "memory=%s"}, + {Opt_age_extent_cache, "age_extent_cache"}, {Opt_err, NULL}, }; @@ -285,9 +290,7 @@ static int __init f2fs_create_casefold_cache(void) { f2fs_cf_name_slab = f2fs_kmem_cache_create("f2fs_casefolded_name", F2FS_NAME_LEN); - if (!f2fs_cf_name_slab) - return -ENOMEM; - return 0; + return f2fs_cf_name_slab ? 0 : -ENOMEM; } static void f2fs_destroy_casefold_cache(void) @@ -806,14 +809,17 @@ static int parse_options(struct super_block *sb, char *options, bool is_remount) case Opt_nobarrier: set_opt(sbi, NOBARRIER); break; + case Opt_barrier: + clear_opt(sbi, NOBARRIER); + break; case Opt_fastboot: set_opt(sbi, FASTBOOT); break; case Opt_extent_cache: - set_opt(sbi, EXTENT_CACHE); + set_opt(sbi, READ_EXTENT_CACHE); break; case Opt_noextent_cache: - clear_opt(sbi, EXTENT_CACHE); + clear_opt(sbi, READ_EXTENT_CACHE); break; case Opt_noinline_data: clear_opt(sbi, INLINE_DATA); @@ -1253,6 +1259,9 @@ static int parse_options(struct super_block *sb, char *options, bool is_remount) } kfree(name); break; + case Opt_age_extent_cache: + set_opt(sbi, AGE_EXTENT_CACHE); + break; default: f2fs_err(sbi, "Unrecognized mount option \"%s\" or missing value", p); @@ -1347,6 +1356,11 @@ default_check: return -EINVAL; } + if (f2fs_is_readonly(sbi) && test_opt(sbi, FLUSH_MERGE)) { + f2fs_err(sbi, "FLUSH_MERGE not compatible with readonly mode"); + return -EINVAL; + } + if (f2fs_sb_has_readonly(sbi) && !f2fs_readonly(sbi->sb)) { f2fs_err(sbi, "Allow to mount readonly mode only"); return -EROFS; @@ -1567,8 +1581,7 @@ static void f2fs_put_super(struct super_block *sb) /* be sure to wait for any on-going discard commands */ dropped = f2fs_issue_discard_timeout(sbi); - if ((f2fs_hw_support_discard(sbi) || f2fs_hw_should_discard(sbi)) && - !sbi->discard_blks && !dropped) { + if (f2fs_realtime_discard_enable(sbi) && !sbi->discard_blks && !dropped) { struct cp_control cpc = { .reason = CP_UMOUNT | CP_TRIMMED, }; @@ -1935,16 +1948,22 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root) seq_puts(seq, ",inline_dentry"); else seq_puts(seq, ",noinline_dentry"); - if (!f2fs_readonly(sbi->sb) && test_opt(sbi, FLUSH_MERGE)) + if (test_opt(sbi, FLUSH_MERGE)) seq_puts(seq, ",flush_merge"); + else + seq_puts(seq, ",noflush_merge"); if (test_opt(sbi, NOBARRIER)) seq_puts(seq, ",nobarrier"); + else + seq_puts(seq, ",barrier"); if (test_opt(sbi, FASTBOOT)) seq_puts(seq, ",fastboot"); - if (test_opt(sbi, EXTENT_CACHE)) + if (test_opt(sbi, READ_EXTENT_CACHE)) seq_puts(seq, ",extent_cache"); else seq_puts(seq, ",noextent_cache"); + if (test_opt(sbi, AGE_EXTENT_CACHE)) + seq_puts(seq, ",age_extent_cache"); if (test_opt(sbi, DATA_FLUSH)) seq_puts(seq, ",data_flush"); @@ -2043,7 +2062,11 @@ static void default_options(struct f2fs_sb_info *sbi) F2FS_OPTION(sbi).active_logs = NR_CURSEG_PERSIST_TYPE; F2FS_OPTION(sbi).inline_xattr_size = DEFAULT_INLINE_XATTR_ADDRS; - F2FS_OPTION(sbi).alloc_mode = ALLOC_MODE_DEFAULT; + if (le32_to_cpu(F2FS_RAW_SUPER(sbi)->segment_count_main) <= + SMALL_VOLUME_SEGMENTS) + F2FS_OPTION(sbi).alloc_mode = ALLOC_MODE_REUSE; + else + F2FS_OPTION(sbi).alloc_mode = ALLOC_MODE_DEFAULT; F2FS_OPTION(sbi).fsync_mode = FSYNC_MODE_POSIX; F2FS_OPTION(sbi).s_resuid = make_kuid(&init_user_ns, F2FS_DEF_RESUID); F2FS_OPTION(sbi).s_resgid = make_kgid(&init_user_ns, F2FS_DEF_RESGID); @@ -2059,13 +2082,14 @@ static void default_options(struct f2fs_sb_info *sbi) set_opt(sbi, INLINE_XATTR); set_opt(sbi, INLINE_DATA); set_opt(sbi, INLINE_DENTRY); - set_opt(sbi, EXTENT_CACHE); + set_opt(sbi, READ_EXTENT_CACHE); set_opt(sbi, NOHEAP); clear_opt(sbi, DISABLE_CHECKPOINT); set_opt(sbi, MERGE_CHECKPOINT); F2FS_OPTION(sbi).unusable_cap = 0; sbi->sb->s_flags |= SB_LAZYTIME; - set_opt(sbi, FLUSH_MERGE); + if (!f2fs_is_readonly(sbi)) + set_opt(sbi, FLUSH_MERGE); if (f2fs_hw_support_discard(sbi) || f2fs_hw_should_discard(sbi)) set_opt(sbi, DISCARD); if (f2fs_sb_has_blkzoned(sbi)) { @@ -2200,14 +2224,14 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data) bool need_restart_ckpt = false, need_stop_ckpt = false; bool need_restart_flush = false, need_stop_flush = false; bool need_restart_discard = false, need_stop_discard = false; - bool no_extent_cache = !test_opt(sbi, EXTENT_CACHE); + bool no_read_extent_cache = !test_opt(sbi, READ_EXTENT_CACHE); + bool no_age_extent_cache = !test_opt(sbi, AGE_EXTENT_CACHE); bool enable_checkpoint = !test_opt(sbi, DISABLE_CHECKPOINT); bool no_io_align = !F2FS_IO_ALIGNED(sbi); bool no_atgc = !test_opt(sbi, ATGC); bool no_discard = !test_opt(sbi, DISCARD); bool no_compress_cache = !test_opt(sbi, COMPRESS_CACHE); bool block_unit_discard = f2fs_block_unit_discard(sbi); - struct discard_cmd_control *dcc; #ifdef CONFIG_QUOTA int i, j; #endif @@ -2290,11 +2314,17 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data) } /* disallow enable/disable extent_cache dynamically */ - if (no_extent_cache == !!test_opt(sbi, EXTENT_CACHE)) { + if (no_read_extent_cache == !!test_opt(sbi, READ_EXTENT_CACHE)) { err = -EINVAL; f2fs_warn(sbi, "switch extent_cache option is not allowed"); goto restore_opts; } + /* disallow enable/disable age extent_cache dynamically */ + if (no_age_extent_cache == !!test_opt(sbi, AGE_EXTENT_CACHE)) { + err = -EINVAL; + f2fs_warn(sbi, "switch age_extent_cache option is not allowed"); + goto restore_opts; + } if (no_io_align == !!F2FS_IO_ALIGNED(sbi)) { err = -EINVAL; @@ -2388,10 +2418,8 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data) goto restore_flush; need_stop_discard = true; } else { - dcc = SM_I(sbi)->dcc_info; f2fs_stop_discard_thread(sbi); - if (atomic_read(&dcc->discard_cmd_cnt)) - f2fs_issue_discard_timeout(sbi); + f2fs_issue_discard_timeout(sbi); need_restart_discard = true; } } @@ -3616,7 +3644,7 @@ static void init_sb_info(struct f2fs_sb_info *sbi) sbi->seq_file_ra_mul = MIN_RA_MUL; sbi->max_fragment_chunk = DEF_FRAGMENT_SIZE; sbi->max_fragment_hole = DEF_FRAGMENT_SIZE; - spin_lock_init(&sbi->gc_urgent_high_lock); + spin_lock_init(&sbi->gc_remaining_trials_lock); atomic64_set(&sbi->current_atomic_write, 0); sbi->dir_level = DEF_DIR_LEVEL; @@ -4056,18 +4084,16 @@ static int f2fs_setup_casefold(struct f2fs_sb_info *sbi) static void f2fs_tuning_parameters(struct f2fs_sb_info *sbi) { - struct f2fs_sm_info *sm_i = SM_I(sbi); - /* adjust parameters according to the volume size */ - if (sm_i->main_segments <= SMALL_VOLUME_SEGMENTS) { - F2FS_OPTION(sbi).alloc_mode = ALLOC_MODE_REUSE; + if (MAIN_SEGS(sbi) <= SMALL_VOLUME_SEGMENTS) { if (f2fs_block_unit_discard(sbi)) - sm_i->dcc_info->discard_granularity = 1; - sm_i->ipu_policy = 1 << F2FS_IPU_FORCE | + SM_I(sbi)->dcc_info->discard_granularity = + MIN_DISCARD_GRANULARITY; + SM_I(sbi)->ipu_policy = 1 << F2FS_IPU_FORCE | 1 << F2FS_IPU_HONOR_OPU_WRITE; } - sbi->readdir_ra = 1; + sbi->readdir_ra = true; } static int f2fs_fill_super(struct super_block *sb, void *data, int silent) @@ -4095,6 +4121,24 @@ try_onemore: sbi->sb = sb; + /* initialize locks within allocated memory */ + init_f2fs_rwsem(&sbi->gc_lock); + mutex_init(&sbi->writepages); + init_f2fs_rwsem(&sbi->cp_global_sem); + init_f2fs_rwsem(&sbi->node_write); + init_f2fs_rwsem(&sbi->node_change); + spin_lock_init(&sbi->stat_lock); + init_f2fs_rwsem(&sbi->cp_rwsem); + init_f2fs_rwsem(&sbi->quota_sem); + init_waitqueue_head(&sbi->cp_wait); + spin_lock_init(&sbi->error_lock); + + for (i = 0; i < NR_INODE_TYPE; i++) { + INIT_LIST_HEAD(&sbi->inode_list[i]); + spin_lock_init(&sbi->inode_lock[i]); + } + mutex_init(&sbi->flush_lock); + /* Load the checksum driver */ sbi->s_chksum_driver = crypto_alloc_shash("crc32", 0, 0); if (IS_ERR(sbi->s_chksum_driver)) { @@ -4118,6 +4162,8 @@ try_onemore: sb->s_fs_info = sbi; sbi->raw_super = raw_super; + memcpy(sbi->errors, raw_super->s_errors, MAX_F2FS_ERRORS); + /* precompute checksum seed for metadata */ if (f2fs_sb_has_inode_chksum(sbi)) sbi->s_chksum_seed = f2fs_chksum(sbi, ~0, raw_super->uuid, @@ -4174,23 +4220,14 @@ try_onemore: /* init f2fs-specific super block info */ sbi->valid_super_block = valid_super_block; - init_f2fs_rwsem(&sbi->gc_lock); - mutex_init(&sbi->writepages); - init_f2fs_rwsem(&sbi->cp_global_sem); - init_f2fs_rwsem(&sbi->node_write); - init_f2fs_rwsem(&sbi->node_change); /* disallow all the data/node/meta page writes */ set_sbi_flag(sbi, SBI_POR_DOING); - spin_lock_init(&sbi->stat_lock); err = f2fs_init_write_merge_io(sbi); if (err) goto free_bio_info; - init_f2fs_rwsem(&sbi->cp_rwsem); - init_f2fs_rwsem(&sbi->quota_sem); - init_waitqueue_head(&sbi->cp_wait); init_sb_info(sbi); err = f2fs_init_iostat(sbi); @@ -4255,9 +4292,6 @@ try_onemore: goto free_devices; } - spin_lock_init(&sbi->error_lock); - memcpy(sbi->errors, raw_super->s_errors, MAX_F2FS_ERRORS); - sbi->total_valid_node_count = le32_to_cpu(sbi->ckpt->valid_node_count); percpu_counter_set(&sbi->total_valid_inode_count, @@ -4271,12 +4305,6 @@ try_onemore: limit_reserve_root(sbi); adjust_unusable_cap_perc(sbi); - for (i = 0; i < NR_INODE_TYPE; i++) { - INIT_LIST_HEAD(&sbi->inode_list[i]); - spin_lock_init(&sbi->inode_lock[i]); - } - mutex_init(&sbi->flush_lock); - f2fs_init_extent_cache_info(sbi); f2fs_init_ino_entry_info(sbi); @@ -4523,9 +4551,9 @@ free_nm: f2fs_destroy_node_manager(sbi); free_sm: f2fs_destroy_segment_manager(sbi); - f2fs_destroy_post_read_wq(sbi); stop_ckpt_thread: f2fs_stop_ckpt_thread(sbi); + f2fs_destroy_post_read_wq(sbi); free_devices: destroy_device_list(sbi); kvfree(sbi->ckpt); @@ -4626,9 +4654,7 @@ static int __init init_inodecache(void) f2fs_inode_cachep = kmem_cache_create("f2fs_inode_cache", sizeof(struct f2fs_inode_info), 0, SLAB_RECLAIM_ACCOUNT|SLAB_ACCOUNT, NULL); - if (!f2fs_inode_cachep) - return -ENOMEM; - return 0; + return f2fs_inode_cachep ? 0 : -ENOMEM; } static void destroy_inodecache(void) @@ -4693,7 +4719,7 @@ static int __init init_f2fs_fs(void) goto free_iostat; err = f2fs_init_bioset(); if (err) - goto free_bio_enrty_cache; + goto free_bio_entry_cache; err = f2fs_init_compress_mempool(); if (err) goto free_bioset; @@ -4710,7 +4736,7 @@ free_compress_mempool: f2fs_destroy_compress_mempool(); free_bioset: f2fs_destroy_bioset(); -free_bio_enrty_cache: +free_bio_entry_cache: f2fs_destroy_bio_entry_cache(); free_iostat: f2fs_destroy_iostat_processing(); diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c index df27afd71ef4..83a366f3ee80 100644 --- a/fs/f2fs/sysfs.c +++ b/fs/f2fs/sysfs.c @@ -53,9 +53,9 @@ static const char *gc_mode_names[MAX_GC_MODE] = { struct f2fs_attr { struct attribute attr; - ssize_t (*show)(struct f2fs_attr *, struct f2fs_sb_info *, char *); - ssize_t (*store)(struct f2fs_attr *, struct f2fs_sb_info *, - const char *, size_t); + ssize_t (*show)(struct f2fs_attr *a, struct f2fs_sb_info *sbi, char *buf); + ssize_t (*store)(struct f2fs_attr *a, struct f2fs_sb_info *sbi, + const char *buf, size_t len); int struct_type; int offset; int id; @@ -95,28 +95,28 @@ static unsigned char *__struct_ptr(struct f2fs_sb_info *sbi, int struct_type) static ssize_t dirty_segments_show(struct f2fs_attr *a, struct f2fs_sb_info *sbi, char *buf) { - return sprintf(buf, "%llu\n", + return sysfs_emit(buf, "%llu\n", (unsigned long long)(dirty_segments(sbi))); } static ssize_t free_segments_show(struct f2fs_attr *a, struct f2fs_sb_info *sbi, char *buf) { - return sprintf(buf, "%llu\n", + return sysfs_emit(buf, "%llu\n", (unsigned long long)(free_segments(sbi))); } static ssize_t ovp_segments_show(struct f2fs_attr *a, struct f2fs_sb_info *sbi, char *buf) { - return sprintf(buf, "%llu\n", + return sysfs_emit(buf, "%llu\n", (unsigned long long)(overprovision_segments(sbi))); } static ssize_t lifetime_write_kbytes_show(struct f2fs_attr *a, struct f2fs_sb_info *sbi, char *buf) { - return sprintf(buf, "%llu\n", + return sysfs_emit(buf, "%llu\n", (unsigned long long)(sbi->kbytes_written + ((f2fs_get_sectors_written(sbi) - sbi->sectors_written_start) >> 1))); @@ -125,13 +125,13 @@ static ssize_t lifetime_write_kbytes_show(struct f2fs_attr *a, static ssize_t sb_status_show(struct f2fs_attr *a, struct f2fs_sb_info *sbi, char *buf) { - return sprintf(buf, "%lx\n", sbi->s_flag); + return sysfs_emit(buf, "%lx\n", sbi->s_flag); } static ssize_t cp_status_show(struct f2fs_attr *a, struct f2fs_sb_info *sbi, char *buf) { - return sprintf(buf, "%x\n", le32_to_cpu(F2FS_CKPT(sbi)->ckpt_flags)); + return sysfs_emit(buf, "%x\n", le32_to_cpu(F2FS_CKPT(sbi)->ckpt_flags)); } static ssize_t pending_discard_show(struct f2fs_attr *a, @@ -139,10 +139,16 @@ static ssize_t pending_discard_show(struct f2fs_attr *a, { if (!SM_I(sbi)->dcc_info) return -EINVAL; - return sprintf(buf, "%llu\n", (unsigned long long)atomic_read( + return sysfs_emit(buf, "%llu\n", (unsigned long long)atomic_read( &SM_I(sbi)->dcc_info->discard_cmd_cnt)); } +static ssize_t gc_mode_show(struct f2fs_attr *a, + struct f2fs_sb_info *sbi, char *buf) +{ + return sysfs_emit(buf, "%s\n", gc_mode_names[sbi->gc_mode]); +} + static ssize_t features_show(struct f2fs_attr *a, struct f2fs_sb_info *sbi, char *buf) { @@ -199,7 +205,7 @@ static ssize_t features_show(struct f2fs_attr *a, static ssize_t current_reserved_blocks_show(struct f2fs_attr *a, struct f2fs_sb_info *sbi, char *buf) { - return sprintf(buf, "%u\n", sbi->current_reserved_blocks); + return sysfs_emit(buf, "%u\n", sbi->current_reserved_blocks); } static ssize_t unusable_show(struct f2fs_attr *a, @@ -211,7 +217,7 @@ static ssize_t unusable_show(struct f2fs_attr *a, unusable = sbi->unusable_block_count; else unusable = f2fs_get_unusable_blocks(sbi); - return sprintf(buf, "%llu\n", (unsigned long long)unusable); + return sysfs_emit(buf, "%llu\n", (unsigned long long)unusable); } static ssize_t encoding_show(struct f2fs_attr *a, @@ -226,13 +232,13 @@ static ssize_t encoding_show(struct f2fs_attr *a, (sb->s_encoding->version >> 8) & 0xff, sb->s_encoding->version & 0xff); #endif - return sprintf(buf, "(none)"); + return sysfs_emit(buf, "(none)\n"); } static ssize_t mounted_time_sec_show(struct f2fs_attr *a, struct f2fs_sb_info *sbi, char *buf) { - return sprintf(buf, "%llu", SIT_I(sbi)->mounted_time); + return sysfs_emit(buf, "%llu\n", SIT_I(sbi)->mounted_time); } #ifdef CONFIG_F2FS_STAT_FS @@ -241,7 +247,7 @@ static ssize_t moved_blocks_foreground_show(struct f2fs_attr *a, { struct f2fs_stat_info *si = F2FS_STAT(sbi); - return sprintf(buf, "%llu\n", + return sysfs_emit(buf, "%llu\n", (unsigned long long)(si->tot_blks - (si->bg_data_blks + si->bg_node_blks))); } @@ -251,7 +257,7 @@ static ssize_t moved_blocks_background_show(struct f2fs_attr *a, { struct f2fs_stat_info *si = F2FS_STAT(sbi); - return sprintf(buf, "%llu\n", + return sysfs_emit(buf, "%llu\n", (unsigned long long)(si->bg_data_blks + si->bg_node_blks)); } @@ -262,7 +268,7 @@ static ssize_t avg_vblocks_show(struct f2fs_attr *a, si->dirty_count = dirty_segments(sbi); f2fs_update_sit_info(sbi); - return sprintf(buf, "%llu\n", (unsigned long long)(si->avg_vblocks)); + return sysfs_emit(buf, "%llu\n", (unsigned long long)(si->avg_vblocks)); } #endif @@ -332,13 +338,8 @@ static ssize_t f2fs_sbi_show(struct f2fs_attr *a, return sysfs_emit(buf, "%u\n", sbi->compr_new_inode); #endif - if (!strcmp(a->attr.name, "gc_urgent")) - return sysfs_emit(buf, "%s\n", - gc_mode_names[sbi->gc_mode]); - if (!strcmp(a->attr.name, "gc_segment_mode")) - return sysfs_emit(buf, "%s\n", - gc_mode_names[sbi->gc_segment_mode]); + return sysfs_emit(buf, "%u\n", sbi->gc_segment_mode); if (!strcmp(a->attr.name, "gc_reclaimed_segments")) { return sysfs_emit(buf, "%u\n", @@ -362,7 +363,7 @@ static ssize_t f2fs_sbi_show(struct f2fs_attr *a, ui = (unsigned int *)(ptr + a->offset); - return sprintf(buf, "%u\n", *ui); + return sysfs_emit(buf, "%u\n", *ui); } static ssize_t __sbi_store(struct f2fs_attr *a, @@ -483,14 +484,27 @@ out: return count; } + if (!strcmp(a->attr.name, "max_ordered_discard")) { + if (t == 0 || t > MAX_PLIST_NUM) + return -EINVAL; + if (!f2fs_block_unit_discard(sbi)) + return -EINVAL; + *ui = t; + return count; + } + + if (!strcmp(a->attr.name, "discard_urgent_util")) { + if (t > 100) + return -EINVAL; + *ui = t; + return count; + } + if (!strcmp(a->attr.name, "migration_granularity")) { if (t == 0 || t > sbi->segs_per_sec) return -EINVAL; } - if (!strcmp(a->attr.name, "trim_sections")) - return -EINVAL; - if (!strcmp(a->attr.name, "gc_urgent")) { if (t == 0) { sbi->gc_mode = GC_NORMAL; @@ -531,10 +545,10 @@ out: return count; } - if (!strcmp(a->attr.name, "gc_urgent_high_remaining")) { - spin_lock(&sbi->gc_urgent_high_lock); - sbi->gc_urgent_high_remaining = t; - spin_unlock(&sbi->gc_urgent_high_lock); + if (!strcmp(a->attr.name, "gc_remaining_trials")) { + spin_lock(&sbi->gc_remaining_trials_lock); + sbi->gc_remaining_trials = t; + spin_unlock(&sbi->gc_remaining_trials_lock); return count; } @@ -649,6 +663,29 @@ out: return count; } + if (!strcmp(a->attr.name, "readdir_ra")) { + sbi->readdir_ra = !!t; + return count; + } + + if (!strcmp(a->attr.name, "hot_data_age_threshold")) { + if (t == 0 || t >= sbi->warm_data_age_threshold) + return -EINVAL; + if (t == *ui) + return count; + *ui = (unsigned int)t; + return count; + } + + if (!strcmp(a->attr.name, "warm_data_age_threshold")) { + if (t == 0 || t <= sbi->hot_data_age_threshold) + return -EINVAL; + if (t == *ui) + return count; + *ui = (unsigned int)t; + return count; + } + *ui = (unsigned int)t; return count; @@ -721,7 +758,7 @@ static void f2fs_sb_release(struct kobject *kobj) static ssize_t f2fs_feature_show(struct f2fs_attr *a, struct f2fs_sb_info *sbi, char *buf) { - return sprintf(buf, "supported\n"); + return sysfs_emit(buf, "supported\n"); } #define F2FS_FEATURE_RO_ATTR(_name) \ @@ -734,8 +771,8 @@ static ssize_t f2fs_sb_feature_show(struct f2fs_attr *a, struct f2fs_sb_info *sbi, char *buf) { if (F2FS_HAS_FEATURE(sbi, a->id)) - return sprintf(buf, "supported\n"); - return sprintf(buf, "unsupported\n"); + return sysfs_emit(buf, "supported\n"); + return sysfs_emit(buf, "unsupported\n"); } #define F2FS_SB_FEATURE_RO_ATTR(_name, _feat) \ @@ -788,9 +825,10 @@ F2FS_RW_ATTR(DCC_INFO, discard_cmd_control, max_discard_request, max_discard_req F2FS_RW_ATTR(DCC_INFO, discard_cmd_control, min_discard_issue_time, min_discard_issue_time); F2FS_RW_ATTR(DCC_INFO, discard_cmd_control, mid_discard_issue_time, mid_discard_issue_time); F2FS_RW_ATTR(DCC_INFO, discard_cmd_control, max_discard_issue_time, max_discard_issue_time); +F2FS_RW_ATTR(DCC_INFO, discard_cmd_control, discard_urgent_util, discard_urgent_util); F2FS_RW_ATTR(DCC_INFO, discard_cmd_control, discard_granularity, discard_granularity); +F2FS_RW_ATTR(DCC_INFO, discard_cmd_control, max_ordered_discard, max_ordered_discard); F2FS_RW_ATTR(RESERVED_BLOCKS, f2fs_sb_info, reserved_blocks, reserved_blocks); -F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, batched_trim_sections, trim_sections); F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, ipu_policy, ipu_policy); F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_ipu_util, min_ipu_util); F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_fsync_blocks, min_fsync_blocks); @@ -825,7 +863,7 @@ F2FS_RW_ATTR(FAULT_INFO_TYPE, f2fs_fault_info, inject_type, inject_type); #endif F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, data_io_flag, data_io_flag); F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, node_io_flag, node_io_flag); -F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, gc_urgent_high_remaining, gc_urgent_high_remaining); +F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, gc_remaining_trials, gc_remaining_trials); F2FS_RW_ATTR(CPRC_INFO, ckpt_req_control, ckpt_thread_ioprio, ckpt_thread_ioprio); F2FS_GENERAL_RO_ATTR(dirty_segments); F2FS_GENERAL_RO_ATTR(free_segments); @@ -838,6 +876,7 @@ F2FS_GENERAL_RO_ATTR(encoding); F2FS_GENERAL_RO_ATTR(mounted_time_sec); F2FS_GENERAL_RO_ATTR(main_blkaddr); F2FS_GENERAL_RO_ATTR(pending_discard); +F2FS_GENERAL_RO_ATTR(gc_mode); #ifdef CONFIG_F2FS_STAT_FS F2FS_STAT_ATTR(STAT_INFO, f2fs_stat_info, cp_foreground_calls, cp_count); F2FS_STAT_ATTR(STAT_INFO, f2fs_stat_info, cp_background_calls, bg_cp_count); @@ -902,6 +941,10 @@ F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, peak_atomic_write, peak_atomic_write); F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, committed_atomic_block, committed_atomic_block); F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, revoked_atomic_block, revoked_atomic_block); +/* For block age extent cache */ +F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, hot_data_age_threshold, hot_data_age_threshold); +F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, warm_data_age_threshold, warm_data_age_threshold); + #define ATTR_LIST(name) (&f2fs_attr_##name.attr) static struct attribute *f2fs_attrs[] = { ATTR_LIST(gc_urgent_sleep_time), @@ -917,9 +960,11 @@ static struct attribute *f2fs_attrs[] = { ATTR_LIST(min_discard_issue_time), ATTR_LIST(mid_discard_issue_time), ATTR_LIST(max_discard_issue_time), + ATTR_LIST(discard_urgent_util), ATTR_LIST(discard_granularity), + ATTR_LIST(max_ordered_discard), ATTR_LIST(pending_discard), - ATTR_LIST(batched_trim_sections), + ATTR_LIST(gc_mode), ATTR_LIST(ipu_policy), ATTR_LIST(min_ipu_util), ATTR_LIST(min_fsync_blocks), @@ -952,7 +997,7 @@ static struct attribute *f2fs_attrs[] = { #endif ATTR_LIST(data_io_flag), ATTR_LIST(node_io_flag), - ATTR_LIST(gc_urgent_high_remaining), + ATTR_LIST(gc_remaining_trials), ATTR_LIST(ckpt_thread_ioprio), ATTR_LIST(dirty_segments), ATTR_LIST(free_segments), @@ -995,6 +1040,8 @@ static struct attribute *f2fs_attrs[] = { ATTR_LIST(peak_atomic_write), ATTR_LIST(committed_atomic_block), ATTR_LIST(revoked_atomic_block), + ATTR_LIST(hot_data_age_threshold), + ATTR_LIST(warm_data_age_threshold), NULL, }; ATTRIBUTE_GROUPS(f2fs); @@ -1243,6 +1290,44 @@ static int __maybe_unused victim_bits_seq_show(struct seq_file *seq, return 0; } +static int __maybe_unused discard_plist_seq_show(struct seq_file *seq, + void *offset) +{ + struct super_block *sb = seq->private; + struct f2fs_sb_info *sbi = F2FS_SB(sb); + struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; + int i, count; + + seq_puts(seq, "Discard pend list(Show diacrd_cmd count on each entry, .:not exist):\n"); + if (!f2fs_realtime_discard_enable(sbi)) + return 0; + + if (dcc) { + mutex_lock(&dcc->cmd_lock); + for (i = 0; i < MAX_PLIST_NUM; i++) { + struct list_head *pend_list; + struct discard_cmd *dc, *tmp; + + if (i % 8 == 0) + seq_printf(seq, " %-3d", i); + count = 0; + pend_list = &dcc->pend_list[i]; + list_for_each_entry_safe(dc, tmp, pend_list, list) + count++; + if (count) + seq_printf(seq, " %7d", count); + else + seq_puts(seq, " ."); + if (i % 8 == 7) + seq_putc(seq, '\n'); + } + seq_putc(seq, '\n'); + mutex_unlock(&dcc->cmd_lock); + } + + return 0; +} + int __init f2fs_init_sysfs(void) { int ret; @@ -1313,6 +1398,8 @@ int f2fs_register_sysfs(struct f2fs_sb_info *sbi) #endif proc_create_single_data("victim_bits", 0444, sbi->s_proc, victim_bits_seq_show, sb); + proc_create_single_data("discard_plist_info", 0444, sbi->s_proc, + discard_plist_seq_show, sb); } return 0; put_feature_list_kobj: @@ -1336,6 +1423,7 @@ void f2fs_unregister_sysfs(struct f2fs_sb_info *sbi) remove_proc_entry("segment_info", sbi->s_proc); remove_proc_entry("segment_bits", sbi->s_proc); remove_proc_entry("victim_bits", sbi->s_proc); + remove_proc_entry("discard_plist_info", sbi->s_proc); remove_proc_entry(sbi->sb->s_id, f2fs_proc_root); } |