summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2014-04-07 10:55:36 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2014-04-07 10:55:36 -0700
commit3021112598d2b722eee54d8a662fea2089abbdbc (patch)
tree640c5f7b1d8ece378e3e67bd3c401c80b40ecea8
parent0af9fb63915cf5ebb47b5c9ff16526b47545baf5 (diff)
parent48b230a583965d33c32b4e3c29a1e5e15d7e55de (diff)
Merge tag 'for-f2fs-3.15' of git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs
Pull f2fs updates from Jaegeuk Kim: "This patch-set includes the following major enhancement patches. - introduce large directory support - introduce f2fs_issue_flush to merge redundant flush commands - merge write IOs as much as possible aligned to the segment - add sysfs entries to tune the f2fs configuration - use radix_tree for the free_nid_list to reduce in-memory operations - remove costly bit operations in f2fs_find_entry - enhance the readahead flow for CP/NAT/SIT/SSA blocks The other bug fixes are as follows: - recover xattr node blocks correctly after sudden-power-cut - fix to calculate the maximum number of node ids - enhance to handle many error cases And, there are a bunch of cleanups" * tag 'for-f2fs-3.15' of git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs: (62 commits) f2fs: fix wrong statistics of inline data f2fs: check the acl's validity before setting f2fs: introduce f2fs_issue_flush to avoid redundant flush issue f2fs: fix to cover io->bio with io_rwsem f2fs: fix error path when fail to read inline data f2fs: use list_for_each_entry{_safe} for simplyfying code f2fs: avoid free slab cache under spinlock f2fs: avoid unneeded lookup when xattr name length is too long f2fs: avoid unnecessary bio submit when wait page writeback f2fs: return -EIO when node id is not matched f2fs: avoid RECLAIM_FS-ON-W warning f2fs: skip unnecessary node writes during fsync f2fs: introduce fi->i_sem to protect fi's info f2fs: change reclaim rate in percentage f2fs: add missing documentation for dir_level f2fs: remove unnecessary threshold f2fs: throttle the memory footprint with a sysfs entry f2fs: avoid to drop nat entries due to the negative nr_shrink f2fs: call f2fs_wait_on_page_writeback instead of native function f2fs: introduce nr_pages_to_write for segment alignment ...
-rw-r--r--Documentation/ABI/testing/sysfs-fs-f2fs12
-rw-r--r--Documentation/filesystems/f2fs.txt29
-rw-r--r--fs/f2fs/acl.c8
-rw-r--r--fs/f2fs/checkpoint.c208
-rw-r--r--fs/f2fs/data.c106
-rw-r--r--fs/f2fs/debug.c12
-rw-r--r--fs/f2fs/dir.c85
-rw-r--r--fs/f2fs/f2fs.h105
-rw-r--r--fs/f2fs/file.c31
-rw-r--r--fs/f2fs/gc.c16
-rw-r--r--fs/f2fs/inline.c4
-rw-r--r--fs/f2fs/inode.c27
-rw-r--r--fs/f2fs/namei.c9
-rw-r--r--fs/f2fs/node.c334
-rw-r--r--fs/f2fs/node.h25
-rw-r--r--fs/f2fs/recovery.c37
-rw-r--r--fs/f2fs/segment.c222
-rw-r--r--fs/f2fs/segment.h75
-rw-r--r--fs/f2fs/super.c97
-rw-r--r--fs/f2fs/xattr.c7
-rw-r--r--include/linux/f2fs_fs.h2
21 files changed, 939 insertions, 512 deletions
diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs b/Documentation/ABI/testing/sysfs-fs-f2fs
index 32b0809203dd..62dd72522d6e 100644
--- a/Documentation/ABI/testing/sysfs-fs-f2fs
+++ b/Documentation/ABI/testing/sysfs-fs-f2fs
@@ -55,3 +55,15 @@ Date: January 2014
Contact: "Jaegeuk Kim" <jaegeuk.kim@samsung.com>
Description:
Controls the number of trials to find a victim segment.
+
+What: /sys/fs/f2fs/<disk>/dir_level
+Date: March 2014
+Contact: "Jaegeuk Kim" <jaegeuk.kim@samsung.com>
+Description:
+ Controls the directory level for large directory.
+
+What: /sys/fs/f2fs/<disk>/ram_thresh
+Date: March 2014
+Contact: "Jaegeuk Kim" <jaegeuk.kim@samsung.com>
+Description:
+ Controls the memory footprint used by f2fs.
diff --git a/Documentation/filesystems/f2fs.txt b/Documentation/filesystems/f2fs.txt
index b8d284975f0f..25311e113e75 100644
--- a/Documentation/filesystems/f2fs.txt
+++ b/Documentation/filesystems/f2fs.txt
@@ -122,6 +122,10 @@ disable_ext_identify Disable the extension list configured by mkfs, so f2fs
inline_xattr Enable the inline xattrs feature.
inline_data Enable the inline data feature: New created small(<~3.4k)
files can be written into inode block.
+flush_merge Merge concurrent cache_flush commands as much as possible
+ to eliminate redundant command issues. If the underlying
+ device handles the cache_flush command relatively slowly,
+ recommend to enable this option.
================================================================================
DEBUGFS ENTRIES
@@ -169,9 +173,11 @@ Files in /sys/fs/f2fs/<devname>
reclaim_segments This parameter controls the number of prefree
segments to be reclaimed. If the number of prefree
- segments is larger than this number, f2fs tries to
- conduct checkpoint to reclaim the prefree segments
- to free segments. By default, 100 segments, 200MB.
+ segments is larger than the number of segments
+ in the proportion to the percentage over total
+ volume size, f2fs tries to conduct checkpoint to
+ reclaim the prefree segments to free segments.
+ By default, 5% over total # of segments.
max_small_discards This parameter controls the number of discard
commands that consist small blocks less than 2MB.
@@ -195,6 +201,17 @@ Files in /sys/fs/f2fs/<devname>
cleaning operations. The default value is 4096
which covers 8GB block address range.
+ dir_level This parameter controls the directory level to
+ support large directory. If a directory has a
+ number of files, it can reduce the file lookup
+ latency by increasing this dir_level value.
+ Otherwise, it needs to decrease this value to
+ reduce the space overhead. The default value is 0.
+
+ ram_thresh This parameter controls the memory footprint used
+ by free nids and cached nat entries. By default,
+ 10 is set, which indicates 10 MB / 1 GB RAM.
+
================================================================================
USAGE
================================================================================
@@ -444,9 +461,11 @@ The number of blocks and buckets are determined by,
# of blocks in level #n = |
`- 4, Otherwise
- ,- 2^n, if n < MAX_DIR_HASH_DEPTH / 2,
+ ,- 2^ (n + dir_level),
+ | if n < MAX_DIR_HASH_DEPTH / 2,
# of buckets in level #n = |
- `- 2^((MAX_DIR_HASH_DEPTH / 2) - 1), Otherwise
+ `- 2^((MAX_DIR_HASH_DEPTH / 2 + dir_level) - 1),
+ Otherwise
When F2FS finds a file name in a directory, at first a hash value of the file
name is calculated. Then, F2FS scans the hash table in level #0 to find the
diff --git a/fs/f2fs/acl.c b/fs/f2fs/acl.c
index fa8da4cb8c4b..e93e4ec7d165 100644
--- a/fs/f2fs/acl.c
+++ b/fs/f2fs/acl.c
@@ -174,7 +174,7 @@ struct posix_acl *f2fs_get_acl(struct inode *inode, int type)
retval = f2fs_getxattr(inode, name_index, "", NULL, 0);
if (retval > 0) {
- value = kmalloc(retval, GFP_KERNEL);
+ value = kmalloc(retval, GFP_F2FS_ZERO);
if (!value)
return ERR_PTR(-ENOMEM);
retval = f2fs_getxattr(inode, name_index, "", value, retval);
@@ -203,6 +203,12 @@ static int __f2fs_set_acl(struct inode *inode, int type,
size_t size = 0;
int error;
+ if (acl) {
+ error = posix_acl_valid(acl);
+ if (error < 0)
+ return error;
+ }
+
switch (type) {
case ACL_TYPE_ACCESS:
name_index = F2FS_XATTR_INDEX_POSIX_ACL_ACCESS;
diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
index 293d0486a40f..4aa521aa9bc3 100644
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -33,14 +33,12 @@ struct page *grab_meta_page(struct f2fs_sb_info *sbi, pgoff_t index)
struct address_space *mapping = META_MAPPING(sbi);
struct page *page = NULL;
repeat:
- page = grab_cache_page(mapping, index);
+ page = grab_cache_page_write_begin(mapping, index, AOP_FLAG_NOFS);
if (!page) {
cond_resched();
goto repeat;
}
- /* We wait writeback only inside grab_meta_page() */
- wait_on_page_writeback(page);
SetPageUptodate(page);
return page;
}
@@ -75,23 +73,102 @@ out:
return page;
}
+inline int get_max_meta_blks(struct f2fs_sb_info *sbi, int type)
+{
+ switch (type) {
+ case META_NAT:
+ return NM_I(sbi)->max_nid / NAT_ENTRY_PER_BLOCK;
+ case META_SIT:
+ return SIT_BLK_CNT(sbi);
+ case META_SSA:
+ case META_CP:
+ return 0;
+ default:
+ BUG();
+ }
+}
+
+/*
+ * Readahead CP/NAT/SIT/SSA pages
+ */
+int ra_meta_pages(struct f2fs_sb_info *sbi, int start, int nrpages, int type)
+{
+ block_t prev_blk_addr = 0;
+ struct page *page;
+ int blkno = start;
+ int max_blks = get_max_meta_blks(sbi, type);
+
+ struct f2fs_io_info fio = {
+ .type = META,
+ .rw = READ_SYNC | REQ_META | REQ_PRIO
+ };
+
+ for (; nrpages-- > 0; blkno++) {
+ block_t blk_addr;
+
+ switch (type) {
+ case META_NAT:
+ /* get nat block addr */
+ if (unlikely(blkno >= max_blks))
+ blkno = 0;
+ blk_addr = current_nat_addr(sbi,
+ blkno * NAT_ENTRY_PER_BLOCK);
+ break;
+ case META_SIT:
+ /* get sit block addr */
+ if (unlikely(blkno >= max_blks))
+ goto out;
+ blk_addr = current_sit_addr(sbi,
+ blkno * SIT_ENTRY_PER_BLOCK);
+ if (blkno != start && prev_blk_addr + 1 != blk_addr)
+ goto out;
+ prev_blk_addr = blk_addr;
+ break;
+ case META_SSA:
+ case META_CP:
+ /* get ssa/cp block addr */
+ blk_addr = blkno;
+ break;
+ default:
+ BUG();
+ }
+
+ page = grab_cache_page(META_MAPPING(sbi), blk_addr);
+ if (!page)
+ continue;
+ if (PageUptodate(page)) {
+ mark_page_accessed(page);
+ f2fs_put_page(page, 1);
+ continue;
+ }
+
+ f2fs_submit_page_mbio(sbi, page, blk_addr, &fio);
+ mark_page_accessed(page);
+ f2fs_put_page(page, 0);
+ }
+out:
+ f2fs_submit_merged_bio(sbi, META, READ);
+ return blkno - start;
+}
+
static int f2fs_write_meta_page(struct page *page,
struct writeback_control *wbc)
{
struct inode *inode = page->mapping->host;
struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
- /* Should not write any meta pages, if any IO error was occurred */
- if (unlikely(sbi->por_doing ||
- is_set_ckpt_flags(F2FS_CKPT(sbi), CP_ERROR_FLAG)))
+ if (unlikely(sbi->por_doing))
goto redirty_out;
-
if (wbc->for_reclaim)
goto redirty_out;
- wait_on_page_writeback(page);
+ /* Should not write any meta pages, if any IO error was occurred */
+ if (unlikely(is_set_ckpt_flags(F2FS_CKPT(sbi), CP_ERROR_FLAG)))
+ goto no_write;
+ f2fs_wait_on_page_writeback(page, META);
write_meta_page(sbi, page);
+no_write:
dec_page_count(sbi, F2FS_DIRTY_META);
unlock_page(page);
return 0;
@@ -99,6 +176,7 @@ static int f2fs_write_meta_page(struct page *page,
redirty_out:
dec_page_count(sbi, F2FS_DIRTY_META);
wbc->pages_skipped++;
+ account_page_redirty(page);
set_page_dirty(page);
return AOP_WRITEPAGE_ACTIVATE;
}
@@ -107,21 +185,23 @@ static int f2fs_write_meta_pages(struct address_space *mapping,
struct writeback_control *wbc)
{
struct f2fs_sb_info *sbi = F2FS_SB(mapping->host->i_sb);
- int nrpages = MAX_BIO_BLOCKS(max_hw_blocks(sbi));
- long written;
-
- if (wbc->for_kupdate)
- return 0;
+ long diff, written;
/* collect a number of dirty meta pages and write together */
- if (get_pages(sbi, F2FS_DIRTY_META) < nrpages)
- return 0;
+ if (wbc->for_kupdate ||
+ get_pages(sbi, F2FS_DIRTY_META) < nr_pages_to_skip(sbi, META))
+ goto skip_write;
/* if mounting is failed, skip writing node pages */
mutex_lock(&sbi->cp_mutex);
- written = sync_meta_pages(sbi, META, nrpages);
+ diff = nr_pages_to_write(sbi, META, wbc);
+ written = sync_meta_pages(sbi, META, wbc->nr_to_write);
mutex_unlock(&sbi->cp_mutex);
- wbc->nr_to_write -= written;
+ wbc->nr_to_write = max((long)0, wbc->nr_to_write - written - diff);
+ return 0;
+
+skip_write:
+ wbc->pages_skipped += get_pages(sbi, F2FS_DIRTY_META);
return 0;
}
@@ -148,10 +228,22 @@ long sync_meta_pages(struct f2fs_sb_info *sbi, enum page_type type,
for (i = 0; i < nr_pages; i++) {
struct page *page = pvec.pages[i];
+
lock_page(page);
- f2fs_bug_on(page->mapping != mapping);
- f2fs_bug_on(!PageDirty(page));
- clear_page_dirty_for_io(page);
+
+ if (unlikely(page->mapping != mapping)) {
+continue_unlock:
+ unlock_page(page);
+ continue;
+ }
+ if (!PageDirty(page)) {
+ /* someone wrote it for us */
+ goto continue_unlock;
+ }
+
+ if (!clear_page_dirty_for_io(page))
+ goto continue_unlock;
+
if (f2fs_write_meta_page(page, &wbc)) {
unlock_page(page);
break;
@@ -216,16 +308,15 @@ void release_orphan_inode(struct f2fs_sb_info *sbi)
void add_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino)
{
- struct list_head *head, *this;
- struct orphan_inode_entry *new = NULL, *orphan = NULL;
+ struct list_head *head;
+ struct orphan_inode_entry *new, *orphan;
new = f2fs_kmem_cache_alloc(orphan_entry_slab, GFP_ATOMIC);
new->ino = ino;
spin_lock(&sbi->orphan_inode_lock);
head = &sbi->orphan_inode_list;
- list_for_each(this, head) {
- orphan = list_entry(this, struct orphan_inode_entry, list);
+ list_for_each_entry(orphan, head, list) {
if (orphan->ino == ino) {
spin_unlock(&sbi->orphan_inode_lock);
kmem_cache_free(orphan_entry_slab, new);
@@ -234,14 +325,10 @@ void add_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino)
if (orphan->ino > ino)
break;
- orphan = NULL;
}
- /* add new_oentry into list which is sorted by inode number */
- if (orphan)
- list_add(&new->list, this->prev);
- else
- list_add_tail(&new->list, head);
+ /* add new orphan entry into list which is sorted by inode number */
+ list_add_tail(&new->list, &orphan->list);
spin_unlock(&sbi->orphan_inode_lock);
}
@@ -255,10 +342,11 @@ void remove_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino)
list_for_each_entry(orphan, head, list) {
if (orphan->ino == ino) {
list_del(&orphan->list);
- kmem_cache_free(orphan_entry_slab, orphan);
f2fs_bug_on(sbi->n_orphans == 0);
sbi->n_orphans--;
- break;
+ spin_unlock(&sbi->orphan_inode_lock);
+ kmem_cache_free(orphan_entry_slab, orphan);
+ return;
}
}
spin_unlock(&sbi->orphan_inode_lock);
@@ -285,6 +373,8 @@ void recover_orphan_inodes(struct f2fs_sb_info *sbi)
start_blk = __start_cp_addr(sbi) + 1;
orphan_blkaddr = __start_sum_addr(sbi) - 1;
+ ra_meta_pages(sbi, start_blk, orphan_blkaddr, META_CP);
+
for (i = 0; i < orphan_blkaddr; i++) {
struct page *page = get_meta_page(sbi, start_blk + i);
struct f2fs_orphan_block *orphan_blk;
@@ -466,14 +556,12 @@ static int __add_dirty_inode(struct inode *inode, struct dir_inode_entry *new)
{
struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
struct list_head *head = &sbi->dir_inode_list;
- struct list_head *this;
+ struct dir_inode_entry *entry;
- list_for_each(this, head) {
- struct dir_inode_entry *entry;
- entry = list_entry(this, struct dir_inode_entry, list);
+ list_for_each_entry(entry, head, list)
if (unlikely(entry->inode == inode))
return -EEXIST;
- }
+
list_add_tail(&new->list, head);
stat_inc_dirty_dir(sbi);
return 0;
@@ -483,6 +571,7 @@ void set_dirty_dir_page(struct inode *inode, struct page *page)
{
struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
struct dir_inode_entry *new;
+ int ret = 0;
if (!S_ISDIR(inode->i_mode))
return;
@@ -492,13 +581,13 @@ void set_dirty_dir_page(struct inode *inode, struct page *page)
INIT_LIST_HEAD(&new->list);
spin_lock(&sbi->dir_inode_lock);
- if (__add_dirty_inode(inode, new))
- kmem_cache_free(inode_entry_slab, new);
-
- inc_page_count(sbi, F2FS_DIRTY_DENTS);
+ ret = __add_dirty_inode(inode, new);
inode_inc_dirty_dents(inode);
SetPagePrivate(page);
spin_unlock(&sbi->dir_inode_lock);
+
+ if (ret)
+ kmem_cache_free(inode_entry_slab, new);
}
void add_dirty_dir_inode(struct inode *inode)
@@ -506,44 +595,47 @@ void add_dirty_dir_inode(struct inode *inode)
struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
struct dir_inode_entry *new =
f2fs_kmem_cache_alloc(inode_entry_slab, GFP_NOFS);
+ int ret = 0;
new->inode = inode;
INIT_LIST_HEAD(&new->list);
spin_lock(&sbi->dir_inode_lock);
- if (__add_dirty_inode(inode, new))
- kmem_cache_free(inode_entry_slab, new);
+ ret = __add_dirty_inode(inode, new);
spin_unlock(&sbi->dir_inode_lock);
+
+ if (ret)
+ kmem_cache_free(inode_entry_slab, new);
}
void remove_dirty_dir_inode(struct inode *inode)
{
struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
-
- struct list_head *this, *head;
+ struct list_head *head;
+ struct dir_inode_entry *entry;
if (!S_ISDIR(inode->i_mode))
return;
spin_lock(&sbi->dir_inode_lock);
- if (atomic_read(&F2FS_I(inode)->dirty_dents)) {
+ if (get_dirty_dents(inode)) {
spin_unlock(&sbi->dir_inode_lock);
return;
}
head = &sbi->dir_inode_list;
- list_for_each(this, head) {
- struct dir_inode_entry *entry;
- entry = list_entry(this, struct dir_inode_entry, list);
+ list_for_each_entry(entry, head, list) {
if (entry->inode == inode) {
list_del(&entry->list);
- kmem_cache_free(inode_entry_slab, entry);
stat_dec_dirty_dir(sbi);
- break;
+ spin_unlock(&sbi->dir_inode_lock);
+ kmem_cache_free(inode_entry_slab, entry);
+ goto done;
}
}
spin_unlock(&sbi->dir_inode_lock);
+done:
/* Only from the recovery routine */
if (is_inode_flag_set(F2FS_I(inode), FI_DELAY_IPUT)) {
clear_inode_flag(F2FS_I(inode), FI_DELAY_IPUT);
@@ -554,15 +646,14 @@ void remove_dirty_dir_inode(struct inode *inode)
struct inode *check_dirty_dir_inode(struct f2fs_sb_info *sbi, nid_t ino)
{
- struct list_head *this, *head;
+ struct list_head *head;
struct inode *inode = NULL;
+ struct dir_inode_entry *entry;
spin_lock(&sbi->dir_inode_lock);
head = &sbi->dir_inode_list;
- list_for_each(this, head) {
- struct dir_inode_entry *entry;
- entry = list_entry(this, struct dir_inode_entry, list);
+ list_for_each_entry(entry, head, list) {
if (entry->inode->i_ino == ino) {
inode = entry->inode;
break;
@@ -589,7 +680,7 @@ retry:
inode = igrab(entry->inode);
spin_unlock(&sbi->dir_inode_lock);
if (inode) {
- filemap_flush(inode->i_mapping);
+ filemap_fdatawrite(inode->i_mapping);
iput(inode);
} else {
/*
@@ -824,6 +915,7 @@ void write_checkpoint(struct f2fs_sb_info *sbi, bool is_umount)
unblock_operations(sbi);
mutex_unlock(&sbi->cp_mutex);
+ stat_inc_cp_count(sbi->stat_info);
trace_f2fs_write_checkpoint(sbi->sb, is_umount, "finish checkpoint");
}
@@ -845,11 +937,11 @@ void init_orphan_info(struct f2fs_sb_info *sbi)
int __init create_checkpoint_caches(void)
{
orphan_entry_slab = f2fs_kmem_cache_create("f2fs_orphan_entry",
- sizeof(struct orphan_inode_entry), NULL);
+ sizeof(struct orphan_inode_entry));
if (!orphan_entry_slab)
return -ENOMEM;
inode_entry_slab = f2fs_kmem_cache_create("f2fs_dirty_dir_entry",
- sizeof(struct dir_inode_entry), NULL);
+ sizeof(struct dir_inode_entry));
if (!inode_entry_slab) {
kmem_cache_destroy(orphan_entry_slab);
return -ENOMEM;
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 2261ccdd0b5f..45abd60e2bff 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -45,7 +45,7 @@ static void f2fs_read_end_io(struct bio *bio, int err)
static void f2fs_write_end_io(struct bio *bio, int err)
{
- struct f2fs_sb_info *sbi = F2FS_SB(bio->bi_io_vec->bv_page->mapping->host->i_sb);
+ struct f2fs_sb_info *sbi = bio->bi_private;
struct bio_vec *bvec;
int i;
@@ -55,15 +55,16 @@ static void f2fs_write_end_io(struct bio *bio, int err)
if (unlikely(err)) {
SetPageError(page);
set_bit(AS_EIO, &page->mapping->flags);
- set_ckpt_flags(sbi->ckpt, CP_ERROR_FLAG);
- sbi->sb->s_flags |= MS_RDONLY;
+ f2fs_stop_checkpoint(sbi);
}
end_page_writeback(page);
dec_page_count(sbi, F2FS_WRITEBACK);
}
- if (bio->bi_private)
- complete(bio->bi_private);
+ if (sbi->wait_io) {
+ complete(sbi->wait_io);
+ sbi->wait_io = NULL;
+ }
if (!get_pages(sbi, F2FS_WRITEBACK) &&
!list_empty(&sbi->cp_wait.task_list))
@@ -86,6 +87,7 @@ static struct bio *__bio_alloc(struct f2fs_sb_info *sbi, block_t blk_addr,
bio->bi_bdev = sbi->sb->s_bdev;
bio->bi_iter.bi_sector = SECTOR_FROM_BLOCK(sbi, blk_addr);
bio->bi_end_io = is_read ? f2fs_read_end_io : f2fs_write_end_io;
+ bio->bi_private = sbi;
return bio;
}
@@ -113,7 +115,7 @@ static void __submit_merged_bio(struct f2fs_bio_info *io)
*/
if (fio->type == META_FLUSH) {
DECLARE_COMPLETION_ONSTACK(wait);
- io->bio->bi_private = &wait;
+ io->sbi->wait_io = &wait;
submit_bio(rw, io->bio);
wait_for_completion(&wait);
} else {
@@ -132,7 +134,7 @@ void f2fs_submit_merged_bio(struct f2fs_sb_info *sbi,
io = is_read_io(rw) ? &sbi->read_io : &sbi->write_io[btype];
- mutex_lock(&io->io_mutex);
+ down_write(&io->io_rwsem);
/* change META to META_FLUSH in the checkpoint procedure */
if (type >= META_FLUSH) {
@@ -140,7 +142,7 @@ void f2fs_submit_merged_bio(struct f2fs_sb_info *sbi,
io->fio.rw = WRITE_FLUSH_FUA | REQ_META | REQ_PRIO;
}
__submit_merged_bio(io);
- mutex_unlock(&io->io_mutex);
+ up_write(&io->io_rwsem);
}
/*
@@ -178,7 +180,7 @@ void f2fs_submit_page_mbio(struct f2fs_sb_info *sbi, struct page *page,
verify_block_addr(sbi, blk_addr);
- mutex_lock(&io->io_mutex);
+ down_write(&io->io_rwsem);
if (!is_read)
inc_page_count(sbi, F2FS_WRITEBACK);
@@ -202,7 +204,7 @@ alloc_new:
io->last_block_in_bio = blk_addr;
- mutex_unlock(&io->io_mutex);
+ up_write(&io->io_rwsem);
trace_f2fs_submit_page_mbio(page, fio->rw, fio->type, blk_addr);
}
@@ -797,48 +799,36 @@ static int f2fs_write_data_page(struct page *page,
*/
offset = i_size & (PAGE_CACHE_SIZE - 1);
if ((page->index >= end_index + 1) || !offset) {
- if (S_ISDIR(inode->i_mode)) {
- dec_page_count(sbi, F2FS_DIRTY_DENTS);
- inode_dec_dirty_dents(inode);
- }
+ inode_dec_dirty_dents(inode);
goto out;
}
zero_user_segment(page, offset, PAGE_CACHE_SIZE);
write:
- if (unlikely(sbi->por_doing)) {
- err = AOP_WRITEPAGE_ACTIVATE;
+ if (unlikely(sbi->por_doing))
goto redirty_out;
- }
/* Dentry blocks are controlled by checkpoint */
if (S_ISDIR(inode->i_mode)) {
- dec_page_count(sbi, F2FS_DIRTY_DENTS);
inode_dec_dirty_dents(inode);
err = do_write_data_page(page, &fio);
- } else {
- f2fs_lock_op(sbi);
-
- if (f2fs_has_inline_data(inode) || f2fs_may_inline(inode)) {
- err = f2fs_write_inline_data(inode, page, offset);
- f2fs_unlock_op(sbi);
- goto out;
- } else {
- err = do_write_data_page(page, &fio);
- }
+ goto done;
+ }
- f2fs_unlock_op(sbi);
+ if (!wbc->for_reclaim)
need_balance_fs = true;
- }
- if (err == -ENOENT)
- goto out;
- else if (err)
+ else if (has_not_enough_free_secs(sbi, 0))
goto redirty_out;
- if (wbc->for_reclaim) {
- f2fs_submit_merged_bio(sbi, DATA, WRITE);
- need_balance_fs = false;
- }
+ f2fs_lock_op(sbi);
+ if (f2fs_has_inline_data(inode) || f2fs_may_inline(inode))
+ err = f2fs_write_inline_data(inode, page, offset);
+ else
+ err = do_write_data_page(page, &fio);
+ f2fs_unlock_op(sbi);
+done:
+ if (err && err != -ENOENT)
+ goto redirty_out;
clear_cold_data(page);
out:
@@ -849,12 +839,11 @@ out:
redirty_out:
wbc->pages_skipped++;
+ account_page_redirty(page);
set_page_dirty(page);
- return err;
+ return AOP_WRITEPAGE_ACTIVATE;
}
-#define MAX_DESIRED_PAGES_WP 4096
-
static int __f2fs_writepage(struct page *page, struct writeback_control *wbc,
void *data)
{
@@ -871,17 +860,17 @@ static int f2fs_write_data_pages(struct address_space *mapping,
struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
bool locked = false;
int ret;
- long excess_nrtw = 0, desired_nrtw;
+ long diff;
/* deal with chardevs and other special file */
if (!mapping->a_ops->writepage)
return 0;
- if (wbc->nr_to_write < MAX_DESIRED_PAGES_WP) {
- desired_nrtw = MAX_DESIRED_PAGES_WP;
- excess_nrtw = desired_nrtw - wbc->nr_to_write;
- wbc->nr_to_write = desired_nrtw;
- }
+ if (S_ISDIR(inode->i_mode) && wbc->sync_mode == WB_SYNC_NONE &&
+ get_dirty_dents(inode) < nr_pages_to_skip(sbi, DATA))
+ goto skip_write;
+
+ diff = nr_pages_to_write(sbi, DATA, wbc);
if (!S_ISDIR(inode->i_mode)) {
mutex_lock(&sbi->writepages);
@@ -895,8 +884,12 @@ static int f2fs_write_data_pages(struct address_space *mapping,
remove_dirty_dir_inode(inode);
- wbc->nr_to_write -= excess_nrtw;
+ wbc->nr_to_write = max((long)0, wbc->nr_to_write - diff);
return ret;
+
+skip_write:
+ wbc->pages_skipped += get_dirty_dents(inode);
+ return 0;
}
static int f2fs_write_begin(struct file *file, struct address_space *mapping,
@@ -949,13 +942,19 @@ inline_data:
if (dn.data_blkaddr == NEW_ADDR) {
zero_user_segment(page, 0, PAGE_CACHE_SIZE);
} else {
- if (f2fs_has_inline_data(inode))
+ if (f2fs_has_inline_data(inode)) {
err = f2fs_read_inline_data(inode, page);
- else
+ if (err) {
+ page_cache_release(page);
+ return err;
+ }
+ } else {
err = f2fs_submit_page_bio(sbi, page, dn.data_blkaddr,
READ_SYNC);
- if (err)
- return err;
+ if (err)
+ return err;
+ }
+
lock_page(page);
if (unlikely(!PageUptodate(page))) {
f2fs_put_page(page, 1);
@@ -1031,11 +1030,8 @@ static void f2fs_invalidate_data_page(struct page *page, unsigned int offset,
unsigned int length)
{
struct inode *inode = page->mapping->host;
- struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
- if (S_ISDIR(inode->i_mode) && PageDirty(page)) {
- dec_page_count(sbi, F2FS_DIRTY_DENTS);
+ if (PageDirty(page))
inode_dec_dirty_dents(inode);
- }
ClearPagePrivate(page);
}
diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c
index 3de9d20d0c14..b52c12cf5873 100644
--- a/fs/f2fs/debug.c
+++ b/fs/f2fs/debug.c
@@ -86,7 +86,6 @@ static void update_sit_info(struct f2fs_sb_info *sbi)
{
struct f2fs_stat_info *si = F2FS_STAT(sbi);
unsigned int blks_per_sec, hblks_per_sec, total_vblocks, bimodal, dist;
- struct sit_info *sit_i = SIT_I(sbi);
unsigned int segno, vblocks;
int ndirty = 0;
@@ -94,7 +93,6 @@ static void update_sit_info(struct f2fs_sb_info *sbi)
total_vblocks = 0;
blks_per_sec = sbi->segs_per_sec * (1 << sbi->log_blocks_per_seg);
hblks_per_sec = blks_per_sec / 2;
- mutex_lock(&sit_i->sentry_lock);
for (segno = 0; segno < TOTAL_SEGS(sbi); segno += sbi->segs_per_sec) {
vblocks = get_valid_blocks(sbi, segno, sbi->segs_per_sec);
dist = abs(vblocks - hblks_per_sec);
@@ -105,7 +103,6 @@ static void update_sit_info(struct f2fs_sb_info *sbi)
ndirty++;
}
}
- mutex_unlock(&sit_i->sentry_lock);
dist = TOTAL_SECS(sbi) * hblks_per_sec * hblks_per_sec / 100;
si->bimodal = bimodal / dist;
if (si->dirty_count)
@@ -236,6 +233,7 @@ static int stat_show(struct seq_file *s, void *v)
si->dirty_count);
seq_printf(s, " - Prefree: %d\n - Free: %d (%d)\n\n",
si->prefree_count, si->free_segs, si->free_secs);
+ seq_printf(s, "CP calls: %d\n", si->cp_count);
seq_printf(s, "GC calls: %d (BG: %d)\n",
si->call_count, si->bg_gc);
seq_printf(s, " - data segments : %d\n", si->data_segs);
@@ -252,10 +250,10 @@ static int stat_show(struct seq_file *s, void *v)
si->ndirty_dent, si->ndirty_dirs);
seq_printf(s, " - meta: %4d in %4d\n",
si->ndirty_meta, si->meta_pages);
- seq_printf(s, " - NATs: %5d > %lu\n",
- si->nats, NM_WOUT_THRESHOLD);
- seq_printf(s, " - SITs: %5d\n - free_nids: %5d\n",
- si->sits, si->fnids);
+ seq_printf(s, " - NATs: %9d\n - SITs: %9d\n",
+ si->nats, si->sits);
+ seq_printf(s, " - free_nids: %9d\n",
+ si->fnids);
seq_puts(s, "\nDistribution of User Blocks:");
seq_puts(s, " [ valid | invalid | free ]\n");
seq_puts(s, " [");
diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c
index 2b7c255bcbdf..972fd0ef230f 100644
--- a/fs/f2fs/dir.c
+++ b/fs/f2fs/dir.c
@@ -21,12 +21,12 @@ static unsigned long dir_blocks(struct inode *inode)
>> PAGE_CACHE_SHIFT;
}
-static unsigned int dir_buckets(unsigned int level)
+static unsigned int dir_buckets(unsigned int level, int dir_level)
{
if (level < MAX_DIR_HASH_DEPTH / 2)
- return 1 << level;
+ return 1 << (level + dir_level);
else
- return 1 << ((MAX_DIR_HASH_DEPTH / 2) - 1);
+ return 1 << ((MAX_DIR_HASH_DEPTH / 2 + dir_level) - 1);
}
static unsigned int bucket_blocks(unsigned int level)
@@ -65,13 +65,14 @@ static void set_de_type(struct f2fs_dir_entry *de, struct inode *inode)
de->file_type = f2fs_type_by_mode[(mode & S_IFMT) >> S_SHIFT];
}
-static unsigned long dir_block_index(unsigned int level, unsigned int idx)
+static unsigned long dir_block_index(unsigned int level,
+ int dir_level, unsigned int idx)
{
unsigned long i;
unsigned long bidx = 0;
for (i = 0; i < level; i++)
- bidx += dir_buckets(i) * bucket_blocks(i);
+ bidx += dir_buckets(i, dir_level) * bucket_blocks(i);
bidx += idx * bucket_blocks(level);
return bidx;
}
@@ -93,16 +94,21 @@ static struct f2fs_dir_entry *find_in_block(struct page *dentry_page,
f2fs_hash_t namehash, struct page **res_page)
{
struct f2fs_dir_entry *de;
- unsigned long bit_pos, end_pos, next_pos;
+ unsigned long bit_pos = 0;
struct f2fs_dentry_block *dentry_blk = kmap(dentry_page);
- int slots;
+ const void *dentry_bits = &dentry_blk->dentry_bitmap;
+ int max_len = 0;
- bit_pos = find_next_bit_le(&dentry_blk->dentry_bitmap,
- NR_DENTRY_IN_BLOCK, 0);
while (bit_pos < NR_DENTRY_IN_BLOCK) {
+ if (!test_bit_le(bit_pos, dentry_bits)) {
+ if (bit_pos == 0)
+ max_len = 1;
+ else if (!test_bit_le(bit_pos - 1, dentry_bits))
+ max_len++;
+ bit_pos++;
+ continue;
+ }
de = &dentry_blk->dentry[bit_pos];
- slots = GET_DENTRY_SLOTS(le16_to_cpu(de->name_len));
-
if (early_match_name(name, namelen, namehash, de)) {
if (!memcmp(dentry_blk->filename[bit_pos],
name, namelen)) {
@@ -110,20 +116,18 @@ static struct f2fs_dir_entry *find_in_block(struct page *dentry_page,
goto found;
}
}
- next_pos = bit_pos + slots;
- bit_pos = find_next_bit_le(&dentry_blk->dentry_bitmap,
- NR_DENTRY_IN_BLOCK, next_pos);
- if (bit_pos >= NR_DENTRY_IN_BLOCK)
- end_pos = NR_DENTRY_IN_BLOCK;
- else
- end_pos = bit_pos;
- if (*max_slots < end_pos - next_pos)
- *max_slots = end_pos - next_pos;
+ if (max_len > *max_slots) {
+ *max_slots = max_len;
+ max_len = 0;
+ }
+ bit_pos += GET_DENTRY_SLOTS(le16_to_cpu(de->name_len));
}
de = NULL;
kunmap(dentry_page);
found:
+ if (max_len > *max_slots)
+ *max_slots = max_len;
return de;
}
@@ -141,10 +145,11 @@ static struct f2fs_dir_entry *find_in_level(struct inode *dir,
f2fs_bug_on(level > MAX_DIR_HASH_DEPTH);
- nbucket = dir_buckets(level);
+ nbucket = dir_buckets(level, F2FS_I(dir)->i_dir_level);
nblock = bucket_blocks(level);
- bidx = dir_block_index(level, le32_to_cpu(namehash) % nbucket);
+ bidx = dir_block_index(level, F2FS_I(dir)->i_dir_level,
+ le32_to_cpu(namehash) % nbucket);
end_block = bidx + nblock;
for (; bidx < end_block; bidx++) {
@@ -248,7 +253,7 @@ void f2fs_set_link(struct inode *dir, struct f2fs_dir_entry *de,
struct page *page, struct inode *inode)
{
lock_page(page);
- wait_on_page_writeback(page);
+ f2fs_wait_on_page_writeback(page, DATA);
de->ino = cpu_to_le32(inode->i_ino);
set_de_type(de, inode);
kunmap(page);
@@ -347,14 +352,11 @@ static struct page *init_inode_metadata(struct inode *inode,
err = f2fs_init_security(inode, dir, name, page);
if (err)
goto put_error;
-
- wait_on_page_writeback(page);
} else {
page = get_node_page(F2FS_SB(dir->i_sb), inode->i_ino);
if (IS_ERR(page))
return page;
- wait_on_page_writeback(page);
set_cold_node(inode, page);
}
@@ -372,6 +374,10 @@ static struct page *init_inode_metadata(struct inode *inode,
put_error:
f2fs_put_page(page, 1);
+ /* once the failed inode becomes a bad inode, i_mode is S_IFREG */
+ truncate_inode_pages(&inode->i_data, 0);
+ truncate_blocks(inode, 0);
+ remove_dirty_dir_inode(inode);
error:
remove_inode_page(inode);
return ERR_PTR(err);
@@ -395,9 +401,6 @@ static void update_parent_metadata(struct inode *dir, struct inode *inode,
set_inode_flag(F2FS_I(dir), FI_UPDATE_DIR);
}
- if (is_inode_flag_set(F2FS_I(dir), FI_UPDATE_DIR))
- update_inode_page(dir);
-
if (is_inode_flag_set(F2FS_I(inode), FI_INC_LINK))
clear_inode_flag(F2FS_I(inode), FI_INC_LINK);
}
@@ -464,10 +467,11 @@ start:
if (level == current_depth)
++current_depth;
- nbucket = dir_buckets(level);
+ nbucket = dir_buckets(level, F2FS_I(dir)->i_dir_level);
nblock = bucket_blocks(level);
- bidx = dir_block_index(level, (le32_to_cpu(dentry_hash) % nbucket));
+ bidx = dir_block_index(level, F2FS_I(dir)->i_dir_level,
+ (le32_to_cpu(dentry_hash) % nbucket));
for (block = bidx; block <= (bidx + nblock - 1); block++) {
dentry_page = get_new_data_page(dir, NULL, block, true);
@@ -487,8 +491,9 @@ start:
++level;
goto start;
add_dentry:
- wait_on_page_writeback(dentry_page);
+ f2fs_wait_on_page_writeback(dentry_page, DATA);
+ down_write(&F2FS_I(inode)->i_sem);
page = init_inode_metadata(inode, dir, name);
if (IS_ERR(page)) {
err = PTR_ERR(page);
@@ -511,7 +516,12 @@ add_dentry:
update_parent_metadata(dir, inode, current_depth);
fail:
- clear_inode_flag(F2FS_I(dir), FI_UPDATE_DIR);
+ up_write(&F2FS_I(inode)->i_sem);
+
+ if (is_inode_flag_set(F2FS_I(dir), FI_UPDATE_DIR)) {
+ update_inode_page(dir);
+ clear_inode_flag(F2FS_I(dir), FI_UPDATE_DIR);
+ }
kunmap(dentry_page);
f2fs_put_page(dentry_page, 1);
return err;
@@ -528,13 +538,12 @@ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page,
unsigned int bit_pos;
struct address_space *mapping = page->mapping;
struct inode *dir = mapping->host;
- struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb);
int slots = GET_DENTRY_SLOTS(le16_to_cpu(dentry->name_len));
void *kaddr = page_address(page);
int i;
lock_page(page);
- wait_on_page_writeback(page);
+ f2fs_wait_on_page_writeback(page, DATA);
dentry_blk = (struct f2fs_dentry_block *)kaddr;
bit_pos = dentry - (struct f2fs_dir_entry *)dentry_blk->dentry;
@@ -551,6 +560,10 @@ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page,
dir->i_ctime = dir->i_mtime = CURRENT_TIME;
if (inode) {
+ struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb);
+
+ down_write(&F2FS_I(inode)->i_sem);
+
if (S_ISDIR(inode->i_mode)) {
drop_nlink(dir);
update_inode_page(dir);
@@ -561,6 +574,7 @@ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page,
drop_nlink(inode);
i_size_write(inode, 0);
}
+ up_write(&F2FS_I(inode)->i_sem);
update_inode_page(inode);
if (inode->i_nlink == 0)
@@ -573,7 +587,6 @@ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page,
truncate_hole(dir, page->index, page->index + 1);
clear_page_dirty_for_io(page);
ClearPageUptodate(page);
- dec_page_count(sbi, F2FS_DIRTY_DENTS);
inode_dec_dirty_dents(dir);
}
f2fs_put_page(page, 1);
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index fc3c558cb4f3..2ecac8312359 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -40,6 +40,7 @@
#define F2FS_MOUNT_DISABLE_EXT_IDENTIFY 0x00000040
#define F2FS_MOUNT_INLINE_XATTR 0x00000080
#define F2FS_MOUNT_INLINE_DATA 0x00000100
+#define F2FS_MOUNT_FLUSH_MERGE 0x00000200
#define clear_opt(sbi, option) (sbi->mount_opt.opt &= ~F2FS_MOUNT_##option)
#define set_opt(sbi, option) (sbi->mount_opt.opt |= F2FS_MOUNT_##option)
@@ -88,6 +89,16 @@ enum {
SIT_BITMAP
};
+/*
+ * For CP/NAT/SIT/SSA readahead
+ */
+enum {
+ META_CP,
+ META_NAT,
+ META_SIT,
+ META_SSA
+};
+
/* for the list of orphan inodes */
struct orphan_inode_entry {
struct list_head list; /* list head */
@@ -187,16 +198,20 @@ struct extent_info {
#define FADVISE_COLD_BIT 0x01
#define FADVISE_LOST_PINO_BIT 0x02
+#define DEF_DIR_LEVEL 0
+
struct f2fs_inode_info {
struct inode vfs_inode; /* serve a vfs inode */
unsigned long i_flags; /* keep an inode flags for ioctl */
unsigned char i_advise; /* use to give file attribute hints */
+ unsigned char i_dir_level; /* use for dentry level for large dir */
unsigned int i_current_depth; /* use only in directory structure */
unsigned int i_pino; /* parent inode number */
umode_t i_acl_mode; /* keep file acl mode temporarily */
/* Use below internally in f2fs*/
unsigned long flags; /* use to pass per-file flags */
+ struct rw_semaphore i_sem; /* protect fi info */
atomic_t dirty_dents; /* # of dirty dentry pages */
f2fs_hash_t chash; /* hash value of given file name */
unsigned int clevel; /* maximum level of given file name */
@@ -229,6 +244,7 @@ struct f2fs_nm_info {
block_t nat_blkaddr; /* base disk address of NAT */
nid_t max_nid; /* maximum possible node ids */
nid_t next_scan_nid; /* the next nid to be scanned */
+ unsigned int ram_thresh; /* control the memory footprint */
/* NAT cache management */
struct radix_tree_root nat_root;/* root of the nat entry cache */
@@ -238,6 +254,7 @@ struct f2fs_nm_info {
struct list_head dirty_nat_entries; /* cached nat entry list (dirty) */
/* free node ids management */
+ struct radix_tree_root free_nid_root;/* root of the free_nid cache */
struct list_head free_nid_list; /* a list for free nids */
spinlock_t free_nid_list_lock; /* protect free nid list */
unsigned int fcnt; /* the number of free node id */
@@ -300,6 +317,12 @@ enum {
NO_CHECK_TYPE
};
+struct flush_cmd {
+ struct flush_cmd *next;
+ struct completion wait;
+ int ret;
+};
+
struct f2fs_sm_info {
struct sit_info *sit_info; /* whole segment information */
struct free_segmap_info *free_info; /* free segment information */
@@ -328,6 +351,14 @@ struct f2fs_sm_info {
unsigned int ipu_policy; /* in-place-update policy */
unsigned int min_ipu_util; /* in-place-update threshold */
+
+ /* for flush command control */
+ struct task_struct *f2fs_issue_flush; /* flush thread */
+ wait_queue_head_t flush_wait_queue; /* waiting queue for wake-up */
+ struct flush_cmd *issue_list; /* list for command issue */
+ struct flush_cmd *dispatch_list; /* list for command dispatch */
+ spinlock_t issue_lock; /* for issue list lock */
+ struct flush_cmd *issue_tail; /* list tail of issue list */
};
/*
@@ -378,7 +409,7 @@ struct f2fs_bio_info {
struct bio *bio; /* bios to merge */
sector_t last_block_in_bio; /* last block number */
struct f2fs_io_info fio; /* store buffered io info. */
- struct mutex io_mutex; /* mutex for bio */
+ struct rw_semaphore io_rwsem; /* blocking op for bio */
};
struct f2fs_sb_info {
@@ -398,6 +429,7 @@ struct f2fs_sb_info {
/* for bio operations */
struct f2fs_bio_info read_io; /* for read bios */
struct f2fs_bio_info write_io[NR_PAGE_TYPE]; /* for write bios */
+ struct completion *wait_io; /* for completion bios */
/* for checkpoint */
struct f2fs_checkpoint *ckpt; /* raw checkpoint pointer */
@@ -407,7 +439,6 @@ struct f2fs_sb_info {
struct mutex node_write; /* locking node writes */
struct mutex writepages; /* mutex for writepages() */
bool por_doing; /* recovery is doing or not */
- bool on_build_free_nids; /* build_free_nids is doing */
wait_queue_head_t cp_wait;
/* for orphan inode management */
@@ -436,6 +467,7 @@ struct f2fs_sb_info {
unsigned int total_valid_node_count; /* valid node block count */
unsigned int total_valid_inode_count; /* valid inode count */
int active_logs; /* # of active logs */
+ int dir_level; /* directory level */
block_t user_block_count; /* # of user blocks */
block_t total_valid_block_count; /* # of valid blocks */
@@ -622,6 +654,11 @@ static inline int F2FS_HAS_BLOCKS(struct inode *inode)
return inode->i_blocks > F2FS_DEFAULT_ALLOCATED_BLOCKS;
}
+static inline bool f2fs_has_xattr_block(unsigned int ofs)
+{
+ return ofs == XATTR_NODE_OFFSET;
+}
+
static inline bool inc_valid_block_count(struct f2fs_sb_info *sbi,
struct inode *inode, blkcnt_t count)
{
@@ -661,6 +698,7 @@ static inline void inc_page_count(struct f2fs_sb_info *sbi, int count_type)
static inline void inode_inc_dirty_dents(struct inode *inode)
{
+ inc_page_count(F2FS_SB(inode->i_sb), F2FS_DIRTY_DENTS);
atomic_inc(&F2FS_I(inode)->dirty_dents);
}
@@ -671,6 +709,10 @@ static inline void dec_page_count(struct f2fs_sb_info *sbi, int count_type)
static inline void inode_dec_dirty_dents(struct inode *inode)
{
+ if (!S_ISDIR(inode->i_mode))
+ return;
+
+ dec_page_count(F2FS_SB(inode->i_sb), F2FS_DIRTY_DENTS);
atomic_dec(&F2FS_I(inode)->dirty_dents);
}
@@ -679,6 +721,11 @@ static inline int get_pages(struct f2fs_sb_info *sbi, int count_type)
return atomic_read(&sbi->nr_pages[count_type]);
}
+static inline int get_dirty_dents(struct inode *inode)
+{
+ return atomic_read(&F2FS_I(inode)->dirty_dents);
+}
+
static inline int get_blocktype_secs(struct f2fs_sb_info *sbi, int block_type)
{
unsigned int pages_per_sec = sbi->segs_per_sec *
@@ -689,11 +736,7 @@ static inline int get_blocktype_secs(struct f2fs_sb_info *sbi, int block_type)
static inline block_t valid_user_blocks(struct f2fs_sb_info *sbi)
{
- block_t ret;
- spin_lock(&sbi->stat_lock);
- ret = sbi->total_valid_block_count;
- spin_unlock(&sbi->stat_lock);
- return ret;
+ return sbi->total_valid_block_count;
}
static inline unsigned long __bitmap_size(struct f2fs_sb_info *sbi, int flag)
@@ -789,11 +832,7 @@ static inline void dec_valid_node_count(struct f2fs_sb_info *sbi,
static inline unsigned int valid_node_count(struct f2fs_sb_info *sbi)
{
- unsigned int ret;
- spin_lock(&sbi->stat_lock);
- ret = sbi->total_valid_node_count;
- spin_unlock(&sbi->stat_lock);
- return ret;
+ return sbi->total_valid_node_count;
}
static inline void inc_valid_inode_count(struct f2fs_sb_info *sbi)
@@ -814,11 +853,7 @@ static inline void dec_valid_inode_count(struct f2fs_sb_info *sbi)
static inline unsigned int valid_inode_count(struct f2fs_sb_info *sbi)
{
- unsigned int ret;
- spin_lock(&sbi->stat_lock);
- ret = sbi->total_valid_inode_count;
- spin_unlock(&sbi->stat_lock);
- return ret;
+ return sbi->total_valid_inode_count;
}
static inline void f2fs_put_page(struct page *page, int unlock)
@@ -844,9 +879,9 @@ static inline void f2fs_put_dnode(struct dnode_of_data *dn)
}
static inline struct kmem_cache *f2fs_kmem_cache_create(const char *name,
- size_t size, void (*ctor)(void *))
+ size_t size)
{
- return kmem_cache_create(name, size, 0, SLAB_RECLAIM_ACCOUNT, ctor);
+ return kmem_cache_create(name, size, 0, SLAB_RECLAIM_ACCOUNT, NULL);
}
static inline void *f2fs_kmem_cache_alloc(struct kmem_cache *cachep,
@@ -983,24 +1018,28 @@ static inline void set_raw_inline(struct f2fs_inode_info *fi,
ri->i_inline |= F2FS_INLINE_DATA;
}
+static inline int f2fs_has_inline_xattr(struct inode *inode)
+{
+ return is_inode_flag_set(F2FS_I(inode), FI_INLINE_XATTR);
+}
+
static inline unsigned int addrs_per_inode(struct f2fs_inode_info *fi)
{
- if (is_inode_flag_set(fi, FI_INLINE_XATTR))
+ if (f2fs_has_inline_xattr(&fi->vfs_inode))
return DEF_ADDRS_PER_INODE - F2FS_INLINE_XATTR_ADDRS;
return DEF_ADDRS_PER_INODE;
}
static inline void *inline_xattr_addr(struct page *page)
{
- struct f2fs_inode *ri;
- ri = (struct f2fs_inode *)page_address(page);
+ struct f2fs_inode *ri = F2FS_INODE(page);
return (void *)&(ri->i_addr[DEF_ADDRS_PER_INODE -
F2FS_INLINE_XATTR_ADDRS]);
}
static inline int inline_xattr_size(struct inode *inode)
{
- if (is_inode_flag_set(F2FS_I(inode), FI_INLINE_XATTR))
+ if (f2fs_has_inline_xattr(inode))
return F2FS_INLINE_XATTR_ADDRS << 2;
else
return 0;
@@ -1013,8 +1052,7 @@ static inline int f2fs_has_inline_data(struct inode *inode)
static inline void *inline_data_addr(struct page *page)
{
- struct f2fs_inode *ri;
- ri = (struct f2fs_inode *)page_address(page);
+ struct f2fs_inode *ri = F2FS_INODE(page);
return (void *)&(ri->i_addr[1]);
}
@@ -1023,6 +1061,12 @@ static inline int f2fs_readonly(struct super_block *sb)
return sb->s_flags & MS_RDONLY;
}
+static inline void f2fs_stop_checkpoint(struct f2fs_sb_info *sbi)
+{
+ set_ckpt_flags(sbi->ckpt, CP_ERROR_FLAG);
+ sbi->sb->s_flags |= MS_RDONLY;
+}
+
#define get_inode_mode(i) \
((is_inode_flag_set(F2FS_I(i), FI_ACL_MODE)) ? \
(F2FS_I(i)->i_acl_mode) : ((i)->i_mode))
@@ -1048,7 +1092,7 @@ void f2fs_set_inode_flags(struct inode *);
struct inode *f2fs_iget(struct super_block *, unsigned long);
int try_to_free_nats(struct f2fs_sb_info *, int);
void update_inode(struct inode *, struct page *);
-int update_inode_page(struct inode *);
+void update_inode_page(struct inode *);
int f2fs_write_inode(struct inode *, struct writeback_control *);
void f2fs_evict_inode(struct inode *);
@@ -1097,6 +1141,7 @@ struct dnode_of_data;
struct node_info;
int is_checkpointed_node(struct f2fs_sb_info *, nid_t);
+bool fsync_mark_done(struct f2fs_sb_info *, nid_t);
void get_node_info(struct f2fs_sb_info *, nid_t, struct node_info *);
int get_dnode_of_data(struct dnode_of_data *, pgoff_t, int);
int truncate_inode_blocks(struct inode *, pgoff_t);
@@ -1115,6 +1160,7 @@ void alloc_nid_done(struct f2fs_sb_info *, nid_t);
void alloc_nid_failed(struct f2fs_sb_info *, nid_t);
void recover_node_page(struct f2fs_sb_info *, struct page *,
struct f2fs_summary *, struct node_info *, block_t);
+bool recover_xattr_data(struct inode *, struct page *, block_t);
int recover_inode_page(struct f2fs_sb_info *, struct page *);
int restore_node_summary(struct f2fs_sb_info *, unsigned int,
struct f2fs_summary_block *);
@@ -1129,7 +1175,9 @@ void destroy_node_manager_caches(void);
*/
void f2fs_balance_fs(struct f2fs_sb_info *);
void f2fs_balance_fs_bg(struct f2fs_sb_info *);
+int f2fs_issue_flush(struct f2fs_sb_info *);
void invalidate_blocks(struct f2fs_sb_info *, block_t);
+void refresh_sit_entry(struct f2fs_sb_info *, block_t, block_t);
void clear_prefree_segments(struct f2fs_sb_info *);
int npages_for_summary_flush(struct f2fs_sb_info *);
void allocate_new_segments(struct f2fs_sb_info *);
@@ -1162,6 +1210,7 @@ void destroy_segment_manager_caches(void);
*/
struct page *grab_meta_page(struct f2fs_sb_info *, pgoff_t);
struct page *get_meta_page(struct f2fs_sb_info *, pgoff_t);
+int ra_meta_pages(struct f2fs_sb_info *, int, int, int);
long sync_meta_pages(struct f2fs_sb_info *, enum page_type, long);
int acquire_orphan_inode(struct f2fs_sb_info *);
void release_orphan_inode(struct f2fs_sb_info *);
@@ -1231,7 +1280,7 @@ struct f2fs_stat_info {
int util_free, util_valid, util_invalid;
int rsvd_segs, overp_segs;
int dirty_count, node_pages, meta_pages;
- int prefree_count, call_count;
+ int prefree_count, call_count, cp_count;
int tot_segs, node_segs, data_segs, free_segs, free_secs;
int tot_blks, data_blks, node_blks;
int curseg[NR_CURSEG_TYPE];
@@ -1248,6 +1297,7 @@ static inline struct f2fs_stat_info *F2FS_STAT(struct f2fs_sb_info *sbi)
return (struct f2fs_stat_info *)sbi->stat_info;
}
+#define stat_inc_cp_count(si) ((si)->cp_count++)
#define stat_inc_call_count(si) ((si)->call_count++)
#define stat_inc_bggc_count(sbi) ((sbi)->bg_gc++)
#define stat_inc_dirty_dir(sbi) ((sbi)->n_dirty_dirs++)
@@ -1302,6 +1352,7 @@ void f2fs_destroy_stats(struct f2fs_sb_info *);
void __init f2fs_create_root_stats(void);
void f2fs_destroy_root_stats(void);
#else
+#define stat_inc_cp_count(si)
#define stat_inc_call_count(si)
#define stat_inc_bggc_count(si)
#define stat_inc_dirty_dir(sbi)
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 0dfcef53a6ed..302d552afea5 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -76,7 +76,7 @@ static int f2fs_vm_page_mkwrite(struct vm_area_struct *vma,
trace_f2fs_vm_page_mkwrite(page, DATA);
mapped:
/* fill the page */
- wait_on_page_writeback(page);
+ f2fs_wait_on_page_writeback(page, DATA);
out:
sb_end_pagefault(inode->i_sb);
return block_page_mkwrite_return(err);
@@ -111,11 +111,12 @@ static int get_parent_ino(struct inode *inode, nid_t *pino)
int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
{
struct inode *inode = file->f_mapping->host;
+ struct f2fs_inode_info *fi = F2FS_I(inode);
struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
int ret = 0;
bool need_cp = false;
struct writeback_control wbc = {
- .sync_mode = WB_SYNC_NONE,
+ .sync_mode = WB_SYNC_ALL,
.nr_to_write = LONG_MAX,
.for_reclaim = 0,
};
@@ -133,7 +134,7 @@ int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
/* guarantee free sections for fsync */
f2fs_balance_fs(sbi);
- mutex_lock(&inode->i_mutex);
+ down_read(&fi->i_sem);
/*
* Both of fdatasync() and fsync() are able to be recovered from
@@ -150,25 +151,33 @@ int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
else if (F2FS_I(inode)->xattr_ver == cur_cp_version(F2FS_CKPT(sbi)))
need_cp = true;
+ up_read(&fi->i_sem);
+
if (need_cp) {
nid_t pino;
- F2FS_I(inode)->xattr_ver = 0;
-
/* all the dirty node pages should be flushed for POR */
ret = f2fs_sync_fs(inode->i_sb, 1);
+
+ down_write(&fi->i_sem);
+ F2FS_I(inode)->xattr_ver = 0;
if (file_wrong_pino(inode) && inode->i_nlink == 1 &&
get_parent_ino(inode, &pino)) {
F2FS_I(inode)->i_pino = pino;
file_got_pino(inode);
+ up_write(&fi->i_sem);
mark_inode_dirty_sync(inode);
ret = f2fs_write_inode(inode, NULL);
if (ret)
goto out;
+ } else {
+ up_write(&fi->i_sem);
}
} else {
/* if there is no written node page, write its inode page */
while (!sync_node_pages(sbi, inode->i_ino, &wbc)) {
+ if (fsync_mark_done(sbi, inode->i_ino))
+ goto out;
mark_inode_dirty_sync(inode);
ret = f2fs_write_inode(inode, NULL);
if (ret)
@@ -177,10 +186,9 @@ int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
ret = wait_on_node_pages_writeback(sbi, inode->i_ino);
if (ret)
goto out;
- ret = blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL);
+ ret = f2fs_issue_flush(F2FS_SB(inode->i_sb));
}
out:
- mutex_unlock(&inode->i_mutex);
trace_f2fs_sync_file_exit(inode, need_cp, datasync, ret);
return ret;
}
@@ -245,7 +253,7 @@ static void truncate_partial_data_page(struct inode *inode, u64 from)
f2fs_put_page(page, 1);
return;
}
- wait_on_page_writeback(page);
+ f2fs_wait_on_page_writeback(page, DATA);
zero_user(page, offset, PAGE_CACHE_SIZE - offset);
set_page_dirty(page);
f2fs_put_page(page, 1);
@@ -422,7 +430,7 @@ static void fill_zero(struct inode *inode, pgoff_t index,
f2fs_unlock_op(sbi);
if (!IS_ERR(page)) {
- wait_on_page_writeback(page);
+ f2fs_wait_on_page_writeback(page, DATA);
zero_user(page, start, len);
set_page_dirty(page);
f2fs_put_page(page, 1);
@@ -560,6 +568,8 @@ static long f2fs_fallocate(struct file *file, int mode,
if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE))
return -EOPNOTSUPP;
+ mutex_lock(&inode->i_mutex);
+
if (mode & FALLOC_FL_PUNCH_HOLE)
ret = punch_hole(inode, offset, len);
else
@@ -569,6 +579,9 @@ static long f2fs_fallocate(struct file *file, int mode,
inode->i_mtime = inode->i_ctime = CURRENT_TIME;
mark_inode_dirty(inode);
}
+
+ mutex_unlock(&inode->i_mutex);
+
trace_f2fs_fallocate(inode, mode, offset, len, ret);
return ret;
}
diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
index ea0371e854b4..b90dbe55403a 100644
--- a/fs/f2fs/gc.c
+++ b/fs/f2fs/gc.c
@@ -531,15 +531,10 @@ static void move_data_page(struct inode *inode, struct page *page, int gc_type)
set_page_dirty(page);
set_cold_data(page);
} else {
- struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
-
f2fs_wait_on_page_writeback(page, DATA);
- if (clear_page_dirty_for_io(page) &&
- S_ISDIR(inode->i_mode)) {
- dec_page_count(sbi, F2FS_DIRTY_DENTS);
+ if (clear_page_dirty_for_io(page))
inode_dec_dirty_dents(inode);
- }
set_cold_data(page);
do_write_data_page(page, &fio);
clear_cold_data(page);
@@ -701,6 +696,8 @@ int f2fs_gc(struct f2fs_sb_info *sbi)
gc_more:
if (unlikely(!(sbi->sb->s_flags & MS_ACTIVE)))
goto stop;
+ if (unlikely(is_set_ckpt_flags(F2FS_CKPT(sbi), CP_ERROR_FLAG)))
+ goto stop;
if (gc_type == BG_GC && has_not_enough_free_secs(sbi, nfree)) {
gc_type = FG_GC;
@@ -711,6 +708,11 @@ gc_more:
goto stop;
ret = 0;
+ /* readahead multi ssa blocks those have contiguous address */
+ if (sbi->segs_per_sec > 1)
+ ra_meta_pages(sbi, GET_SUM_BLOCK(sbi, segno), sbi->segs_per_sec,
+ META_SSA);
+
for (i = 0; i < sbi->segs_per_sec; i++)
do_garbage_collect(sbi, segno + i, &ilist, gc_type);
@@ -740,7 +742,7 @@ void build_gc_manager(struct f2fs_sb_info *sbi)
int __init create_gc_caches(void)
{
winode_slab = f2fs_kmem_cache_create("f2fs_gc_inodes",
- sizeof(struct inode_entry), NULL);
+ sizeof(struct inode_entry));
if (!winode_slab)
return -ENOMEM;
return 0;
diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c
index 31ee5b164ff9..383db1fabcf4 100644
--- a/fs/f2fs/inline.c
+++ b/fs/f2fs/inline.c
@@ -45,8 +45,10 @@ int f2fs_read_inline_data(struct inode *inode, struct page *page)
}
ipage = get_node_page(sbi, inode->i_ino);
- if (IS_ERR(ipage))
+ if (IS_ERR(ipage)) {
+ unlock_page(page);
return PTR_ERR(ipage);
+ }
zero_user_segment(page, MAX_INLINE_DATA, PAGE_CACHE_SIZE);
diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c
index 28cea76d78c6..ee829d360468 100644
--- a/fs/f2fs/inode.c
+++ b/fs/f2fs/inode.c
@@ -107,6 +107,7 @@ static int do_read_inode(struct inode *inode)
fi->flags = 0;
fi->i_advise = ri->i_advise;
fi->i_pino = le32_to_cpu(ri->i_pino);
+ fi->i_dir_level = ri->i_dir_level;
get_extent_info(&fi->ext, ri->i_ext);
get_inline_info(fi, ri);
@@ -204,6 +205,7 @@ void update_inode(struct inode *inode, struct page *node_page)
ri->i_flags = cpu_to_le32(F2FS_I(inode)->i_flags);
ri->i_pino = cpu_to_le32(F2FS_I(inode)->i_pino);
ri->i_generation = cpu_to_le32(inode->i_generation);
+ ri->i_dir_level = F2FS_I(inode)->i_dir_level;
__set_inode_rdev(inode, ri);
set_cold_node(inode, node_page);
@@ -212,24 +214,29 @@ void update_inode(struct inode *inode, struct page *node_page)
clear_inode_flag(F2FS_I(inode), FI_DIRTY_INODE);
}
-int update_inode_page(struct inode *inode)
+void update_inode_page(struct inode *inode)
{
struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
struct page *node_page;
-
+retry:
node_page = get_node_page(sbi, inode->i_ino);
- if (IS_ERR(node_page))
- return PTR_ERR(node_page);
-
+ if (IS_ERR(node_page)) {
+ int err = PTR_ERR(node_page);
+ if (err == -ENOMEM) {
+ cond_resched();
+ goto retry;
+ } else if (err != -ENOENT) {
+ f2fs_stop_checkpoint(sbi);
+ }
+ return;
+ }
update_inode(inode, node_page);
f2fs_put_page(node_page, 1);
- return 0;
}
int f2fs_write_inode(struct inode *inode, struct writeback_control *wbc)
{
struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
- int ret;
if (inode->i_ino == F2FS_NODE_INO(sbi) ||
inode->i_ino == F2FS_META_INO(sbi))
@@ -243,13 +250,13 @@ int f2fs_write_inode(struct inode *inode, struct writeback_control *wbc)
* during the urgent cleaning time when runing out of free sections.
*/
f2fs_lock_op(sbi);
- ret = update_inode_page(inode);
+ update_inode_page(inode);
f2fs_unlock_op(sbi);
if (wbc)
f2fs_balance_fs(sbi);
- return ret;
+ return 0;
}
/*
@@ -266,7 +273,7 @@ void f2fs_evict_inode(struct inode *inode)
inode->i_ino == F2FS_META_INO(sbi))
goto no_delete;
- f2fs_bug_on(atomic_read(&F2FS_I(inode)->dirty_dents));
+ f2fs_bug_on(get_dirty_dents(inode));
remove_dirty_dir_inode(inode);
if (inode->i_nlink || is_bad_inode(inode))
diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c
index 397d459e97bf..a9409d19dfd4 100644
--- a/fs/f2fs/namei.c
+++ b/fs/f2fs/namei.c
@@ -207,6 +207,8 @@ static struct dentry *f2fs_lookup(struct inode *dir, struct dentry *dentry,
inode = f2fs_iget(dir->i_sb, ino);
if (IS_ERR(inode))
return ERR_CAST(inode);
+
+ stat_inc_inline_inode(inode);
}
return d_splice_alias(inode, dentry);
@@ -424,12 +426,17 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry,
}
f2fs_set_link(new_dir, new_entry, new_page, old_inode);
+ down_write(&F2FS_I(old_inode)->i_sem);
F2FS_I(old_inode)->i_pino = new_dir->i_ino;
+ up_write(&F2FS_I(old_inode)->i_sem);
new_inode->i_ctime = CURRENT_TIME;
+ down_write(&F2FS_I(new_inode)->i_sem);
if (old_dir_entry)
drop_nlink(new_inode);
drop_nlink(new_inode);
+ up_write(&F2FS_I(new_inode)->i_sem);
+
mark_inode_dirty(new_inode);
if (!new_inode->i_nlink)
@@ -459,7 +466,9 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry,
if (old_dir != new_dir) {
f2fs_set_link(old_inode, old_dir_entry,
old_dir_page, new_dir);
+ down_write(&F2FS_I(old_inode)->i_sem);
F2FS_I(old_inode)->i_pino = new_dir->i_ino;
+ up_write(&F2FS_I(old_inode)->i_sem);
update_inode_page(old_inode);
} else {
kunmap(old_dir_page);
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index b0649b76eb4f..a161e955c4c8 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -21,9 +21,27 @@
#include "segment.h"
#include <trace/events/f2fs.h>
+#define on_build_free_nids(nmi) mutex_is_locked(&nm_i->build_lock)
+
static struct kmem_cache *nat_entry_slab;
static struct kmem_cache *free_nid_slab;
+static inline bool available_free_memory(struct f2fs_nm_info *nm_i, int type)
+{
+ struct sysinfo val;
+ unsigned long mem_size = 0;
+
+ si_meminfo(&val);
+ if (type == FREE_NIDS)
+ mem_size = nm_i->fcnt * sizeof(struct free_nid);
+ else if (type == NAT_ENTRIES)
+ mem_size += nm_i->nat_cnt * sizeof(struct nat_entry);
+ mem_size >>= 12;
+
+ /* give 50:50 memory for free nids and nat caches respectively */
+ return (mem_size < ((val.totalram * nm_i->ram_thresh) >> 11));
+}
+
static void clear_node_page_dirty(struct page *page)
{
struct address_space *mapping = page->mapping;
@@ -82,42 +100,6 @@ static struct page *get_next_nat_page(struct f2fs_sb_info *sbi, nid_t nid)
return dst_page;
}
-/*
- * Readahead NAT pages
- */
-static void ra_nat_pages(struct f2fs_sb_info *sbi, int nid)
-{
- struct address_space *mapping = META_MAPPING(sbi);
- struct f2fs_nm_info *nm_i = NM_I(sbi);
- struct page *page;
- pgoff_t index;
- int i;
- struct f2fs_io_info fio = {
- .type = META,
- .rw = READ_SYNC | REQ_META | REQ_PRIO
- };
-
-
- for (i = 0; i < FREE_NID_PAGES; i++, nid += NAT_ENTRY_PER_BLOCK) {
- if (unlikely(nid >= nm_i->max_nid))
- nid = 0;
- index = current_nat_addr(sbi, nid);
-
- page = grab_cache_page(mapping, index);
- if (!page)
- continue;
- if (PageUptodate(page)) {
- mark_page_accessed(page);
- f2fs_put_page(page, 1);
- continue;
- }
- f2fs_submit_page_mbio(sbi, page, index, &fio);
- mark_page_accessed(page);
- f2fs_put_page(page, 0);
- }
- f2fs_submit_merged_bio(sbi, META, READ);
-}
-
static struct nat_entry *__lookup_nat_cache(struct f2fs_nm_info *nm_i, nid_t n)
{
return radix_tree_lookup(&nm_i->nat_root, n);
@@ -151,6 +133,20 @@ int is_checkpointed_node(struct f2fs_sb_info *sbi, nid_t nid)
return is_cp;
}
+bool fsync_mark_done(struct f2fs_sb_info *sbi, nid_t nid)
+{
+ struct f2fs_nm_info *nm_i = NM_I(sbi);
+ struct nat_entry *e;
+ bool fsync_done = false;
+
+ read_lock(&nm_i->nat_tree_lock);
+ e = __lookup_nat_cache(nm_i, nid);
+ if (e)
+ fsync_done = e->fsync_done;
+ read_unlock(&nm_i->nat_tree_lock);
+ return fsync_done;
+}
+
static struct nat_entry *grab_nat_entry(struct f2fs_nm_info *nm_i, nid_t nid)
{
struct nat_entry *new;
@@ -164,6 +160,7 @@ static struct nat_entry *grab_nat_entry(struct f2fs_nm_info *nm_i, nid_t nid)
}
memset(new, 0, sizeof(struct nat_entry));
nat_set_nid(new, nid);
+ new->checkpointed = true;
list_add_tail(&new->list, &nm_i->nat_entries);
nm_i->nat_cnt++;
return new;
@@ -185,13 +182,12 @@ retry:
nat_set_blkaddr(e, le32_to_cpu(ne->block_addr));
nat_set_ino(e, le32_to_cpu(ne->ino));
nat_set_version(e, ne->version);
- e->checkpointed = true;
}
write_unlock(&nm_i->nat_tree_lock);
}
static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni,
- block_t new_blkaddr)
+ block_t new_blkaddr, bool fsync_done)
{
struct f2fs_nm_info *nm_i = NM_I(sbi);
struct nat_entry *e;
@@ -205,7 +201,6 @@ retry:
goto retry;
}
e->ni = *ni;
- e->checkpointed = true;
f2fs_bug_on(ni->blk_addr == NEW_ADDR);
} else if (new_blkaddr == NEW_ADDR) {
/*
@@ -217,9 +212,6 @@ retry:
f2fs_bug_on(ni->blk_addr != NULL_ADDR);
}
- if (new_blkaddr == NEW_ADDR)
- e->checkpointed = false;
-
/* sanity check */
f2fs_bug_on(nat_get_blkaddr(e) != ni->blk_addr);
f2fs_bug_on(nat_get_blkaddr(e) == NULL_ADDR &&
@@ -239,6 +231,11 @@ retry:
/* change address */
nat_set_blkaddr(e, new_blkaddr);
__set_nat_cache_dirty(nm_i, e);
+
+ /* update fsync_mark if its inode nat entry is still alive */
+ e = __lookup_nat_cache(nm_i, ni->ino);
+ if (e)
+ e->fsync_done = fsync_done;
write_unlock(&nm_i->nat_tree_lock);
}
@@ -246,7 +243,7 @@ int try_to_free_nats(struct f2fs_sb_info *sbi, int nr_shrink)
{
struct f2fs_nm_info *nm_i = NM_I(sbi);
- if (nm_i->nat_cnt <= NM_WOUT_THRESHOLD)
+ if (available_free_memory(nm_i, NAT_ENTRIES))
return 0;
write_lock(&nm_i->nat_tree_lock);
@@ -505,7 +502,7 @@ static void truncate_node(struct dnode_of_data *dn)
/* Deallocate node address */
invalidate_blocks(sbi, ni.blk_addr);
dec_valid_node_count(sbi, dn->inode);
- set_node_addr(sbi, &ni, NULL_ADDR);
+ set_node_addr(sbi, &ni, NULL_ADDR, false);
if (dn->nid == dn->inode->i_ino) {
remove_orphan_inode(sbi, dn->nid);
@@ -763,7 +760,7 @@ skip_partial:
f2fs_put_page(page, 1);
goto restart;
}
- wait_on_page_writeback(page);
+ f2fs_wait_on_page_writeback(page, NODE);
ri->i_nid[offset[0] - NODE_DIR1_BLOCK] = 0;
set_page_dirty(page);
unlock_page(page);
@@ -852,7 +849,8 @@ struct page *new_node_page(struct dnode_of_data *dn,
if (unlikely(is_inode_flag_set(F2FS_I(dn->inode), FI_NO_ALLOC)))
return ERR_PTR(-EPERM);
- page = grab_cache_page(NODE_MAPPING(sbi), dn->nid);
+ page = grab_cache_page_write_begin(NODE_MAPPING(sbi),
+ dn->nid, AOP_FLAG_NOFS);
if (!page)
return ERR_PTR(-ENOMEM);
@@ -867,14 +865,14 @@ struct page *new_node_page(struct dnode_of_data *dn,
f2fs_bug_on(old_ni.blk_addr != NULL_ADDR);
new_ni = old_ni;
new_ni.ino = dn->inode->i_ino;
- set_node_addr(sbi, &new_ni, NEW_ADDR);
+ set_node_addr(sbi, &new_ni, NEW_ADDR, false);
fill_node_footer(page, dn->nid, dn->inode->i_ino, ofs, true);
set_cold_node(dn->inode, page);
SetPageUptodate(page);
set_page_dirty(page);
- if (ofs == XATTR_NODE_OFFSET)
+ if (f2fs_has_xattr_block(ofs))
F2FS_I(dn->inode)->i_xattr_nid = dn->nid;
dn->node_page = page;
@@ -948,7 +946,8 @@ struct page *get_node_page(struct f2fs_sb_info *sbi, pgoff_t nid)
struct page *page;
int err;
repeat:
- page = grab_cache_page(NODE_MAPPING(sbi), nid);
+ page = grab_cache_page_write_begin(NODE_MAPPING(sbi),
+ nid, AOP_FLAG_NOFS);
if (!page)
return ERR_PTR(-ENOMEM);
@@ -959,7 +958,7 @@ repeat:
goto got_it;
lock_page(page);
- if (unlikely(!PageUptodate(page))) {
+ if (unlikely(!PageUptodate(page) || nid != nid_of_node(page))) {
f2fs_put_page(page, 1);
return ERR_PTR(-EIO);
}
@@ -968,7 +967,6 @@ repeat:
goto repeat;
}
got_it:
- f2fs_bug_on(nid != nid_of_node(page));
mark_page_accessed(page);
return page;
}
@@ -1168,7 +1166,7 @@ int wait_on_node_pages_writeback(struct f2fs_sb_info *sbi, nid_t ino)
continue;
if (ino && ino_of_node(page) == ino) {
- wait_on_page_writeback(page);
+ f2fs_wait_on_page_writeback(page, NODE);
if (TestClearPageError(page))
ret = -EIO;
}
@@ -1201,7 +1199,7 @@ static int f2fs_write_node_page(struct page *page,
if (unlikely(sbi->por_doing))
goto redirty_out;
- wait_on_page_writeback(page);
+ f2fs_wait_on_page_writeback(page, NODE);
/* get old block addr of this node page */
nid = nid_of_node(page);
@@ -1222,7 +1220,7 @@ static int f2fs_write_node_page(struct page *page,
mutex_lock(&sbi->node_write);
set_page_writeback(page);
write_node_page(sbi, page, &fio, nid, ni.blk_addr, &new_addr);
- set_node_addr(sbi, &ni, new_addr);
+ set_node_addr(sbi, &ni, new_addr, is_fsync_dnode(page));
dec_page_count(sbi, F2FS_DIRTY_NODES);
mutex_unlock(&sbi->node_write);
unlock_page(page);
@@ -1231,35 +1229,32 @@ static int f2fs_write_node_page(struct page *page,
redirty_out:
dec_page_count(sbi, F2FS_DIRTY_NODES);
wbc->pages_skipped++;
+ account_page_redirty(page);
set_page_dirty(page);
return AOP_WRITEPAGE_ACTIVATE;
}
-/*
- * It is very important to gather dirty pages and write at once, so that we can
- * submit a big bio without interfering other data writes.
- * Be default, 512 pages (2MB) * 3 node types, is more reasonable.
- */
-#define COLLECT_DIRTY_NODES 1536
static int f2fs_write_node_pages(struct address_space *mapping,
struct writeback_control *wbc)
{
struct f2fs_sb_info *sbi = F2FS_SB(mapping->host->i_sb);
- long nr_to_write = wbc->nr_to_write;
+ long diff;
/* balancing f2fs's metadata in background */
f2fs_balance_fs_bg(sbi);
/* collect a number of dirty node pages and write together */
- if (get_pages(sbi, F2FS_DIRTY_NODES) < COLLECT_DIRTY_NODES)
- return 0;
+ if (get_pages(sbi, F2FS_DIRTY_NODES) < nr_pages_to_skip(sbi, NODE))
+ goto skip_write;
- /* if mounting is failed, skip writing node pages */
- wbc->nr_to_write = 3 * max_hw_blocks(sbi);
+ diff = nr_pages_to_write(sbi, NODE, wbc);
wbc->sync_mode = WB_SYNC_NONE;
sync_node_pages(sbi, 0, wbc);
- wbc->nr_to_write = nr_to_write - (3 * max_hw_blocks(sbi) -
- wbc->nr_to_write);
+ wbc->nr_to_write = max((long)0, wbc->nr_to_write - diff);
+ return 0;
+
+skip_write:
+ wbc->pages_skipped += get_pages(sbi, F2FS_DIRTY_NODES);
return 0;
}
@@ -1307,22 +1302,17 @@ const struct address_space_operations f2fs_node_aops = {
.releasepage = f2fs_release_node_page,
};
-static struct free_nid *__lookup_free_nid_list(nid_t n, struct list_head *head)
+static struct free_nid *__lookup_free_nid_list(struct f2fs_nm_info *nm_i,
+ nid_t n)
{
- struct list_head *this;
- struct free_nid *i;
- list_for_each(this, head) {
- i = list_entry(this, struct free_nid, list);
- if (i->nid == n)
- return i;
- }
- return NULL;
+ return radix_tree_lookup(&nm_i->free_nid_root, n);
}
-static void __del_from_free_nid_list(struct free_nid *i)
+static void __del_from_free_nid_list(struct f2fs_nm_info *nm_i,
+ struct free_nid *i)
{
list_del(&i->list);
- kmem_cache_free(free_nid_slab, i);
+ radix_tree_delete(&nm_i->free_nid_root, i->nid);
}
static int add_free_nid(struct f2fs_nm_info *nm_i, nid_t nid, bool build)
@@ -1331,7 +1321,7 @@ static int add_free_nid(struct f2fs_nm_info *nm_i, nid_t nid, bool build)
struct nat_entry *ne;
bool allocated = false;
- if (nm_i->fcnt > 2 * MAX_FREE_NIDS)
+ if (!available_free_memory(nm_i, FREE_NIDS))
return -1;
/* 0 nid should not be used */
@@ -1342,7 +1332,8 @@ static int add_free_nid(struct f2fs_nm_info *nm_i, nid_t nid, bool build)
/* do not add allocated nids */
read_lock(&nm_i->nat_tree_lock);
ne = __lookup_nat_cache(nm_i, nid);
- if (ne && nat_get_blkaddr(ne) != NULL_ADDR)
+ if (ne &&
+ (!ne->checkpointed || nat_get_blkaddr(ne) != NULL_ADDR))
allocated = true;
read_unlock(&nm_i->nat_tree_lock);
if (allocated)
@@ -1354,7 +1345,7 @@ static int add_free_nid(struct f2fs_nm_info *nm_i, nid_t nid, bool build)
i->state = NID_NEW;
spin_lock(&nm_i->free_nid_list_lock);
- if (__lookup_free_nid_list(nid, &nm_i->free_nid_list)) {
+ if (radix_tree_insert(&nm_i->free_nid_root, i->nid, i)) {
spin_unlock(&nm_i->free_nid_list_lock);
kmem_cache_free(free_nid_slab, i);
return 0;
@@ -1368,13 +1359,19 @@ static int add_free_nid(struct f2fs_nm_info *nm_i, nid_t nid, bool build)
static void remove_free_nid(struct f2fs_nm_info *nm_i, nid_t nid)
{
struct free_nid *i;
+ bool need_free = false;
+
spin_lock(&nm_i->free_nid_list_lock);
- i = __lookup_free_nid_list(nid, &nm_i->free_nid_list);
+ i = __lookup_free_nid_list(nm_i, nid);
if (i && i->state == NID_NEW) {
- __del_from_free_nid_list(i);
+ __del_from_free_nid_list(nm_i, i);
nm_i->fcnt--;
+ need_free = true;
}
spin_unlock(&nm_i->free_nid_list_lock);
+
+ if (need_free)
+ kmem_cache_free(free_nid_slab, i);
}
static void scan_nat_page(struct f2fs_nm_info *nm_i,
@@ -1413,7 +1410,7 @@ static void build_free_nids(struct f2fs_sb_info *sbi)
return;
/* readahead nat pages to be scanned */
- ra_nat_pages(sbi, nid);
+ ra_meta_pages(sbi, NAT_BLOCK_OFFSET(nid), FREE_NID_PAGES, META_NAT);
while (1) {
struct page *page = get_current_nat_page(sbi, nid);
@@ -1454,7 +1451,6 @@ bool alloc_nid(struct f2fs_sb_info *sbi, nid_t *nid)
{
struct f2fs_nm_info *nm_i = NM_I(sbi);
struct free_nid *i = NULL;
- struct list_head *this;
retry:
if (unlikely(sbi->total_valid_node_count + 1 >= nm_i->max_nid))
return false;
@@ -1462,13 +1458,11 @@ retry:
spin_lock(&nm_i->free_nid_list_lock);
/* We should not use stale free nids created by build_free_nids */
- if (nm_i->fcnt && !sbi->on_build_free_nids) {
+ if (nm_i->fcnt && !on_build_free_nids(nm_i)) {
f2fs_bug_on(list_empty(&nm_i->free_nid_list));
- list_for_each(this, &nm_i->free_nid_list) {
- i = list_entry(this, struct free_nid, list);
+ list_for_each_entry(i, &nm_i->free_nid_list, list)
if (i->state == NID_NEW)
break;
- }
f2fs_bug_on(i->state != NID_NEW);
*nid = i->nid;
@@ -1481,9 +1475,7 @@ retry:
/* Let's scan nat pages and its caches to get free nids */
mutex_lock(&nm_i->build_lock);
- sbi->on_build_free_nids = true;
build_free_nids(sbi);
- sbi->on_build_free_nids = false;
mutex_unlock(&nm_i->build_lock);
goto retry;
}
@@ -1497,10 +1489,12 @@ void alloc_nid_done(struct f2fs_sb_info *sbi, nid_t nid)
struct free_nid *i;
spin_lock(&nm_i->free_nid_list_lock);
- i = __lookup_free_nid_list(nid, &nm_i->free_nid_list);
+ i = __lookup_free_nid_list(nm_i, nid);
f2fs_bug_on(!i || i->state != NID_ALLOC);
- __del_from_free_nid_list(i);
+ __del_from_free_nid_list(nm_i, i);
spin_unlock(&nm_i->free_nid_list_lock);
+
+ kmem_cache_free(free_nid_slab, i);
}
/*
@@ -1510,20 +1504,25 @@ void alloc_nid_failed(struct f2fs_sb_info *sbi, nid_t nid)
{
struct f2fs_nm_info *nm_i = NM_I(sbi);
struct free_nid *i;
+ bool need_free = false;
if (!nid)
return;
spin_lock(&nm_i->free_nid_list_lock);
- i = __lookup_free_nid_list(nid, &nm_i->free_nid_list);
+ i = __lookup_free_nid_list(nm_i, nid);
f2fs_bug_on(!i || i->state != NID_ALLOC);
- if (nm_i->fcnt > 2 * MAX_FREE_NIDS) {
- __del_from_free_nid_list(i);
+ if (!available_free_memory(nm_i, FREE_NIDS)) {
+ __del_from_free_nid_list(nm_i, i);
+ need_free = true;
} else {
i->state = NID_NEW;
nm_i->fcnt++;
}
spin_unlock(&nm_i->free_nid_list_lock);
+
+ if (need_free)
+ kmem_cache_free(free_nid_slab, i);
}
void recover_node_page(struct f2fs_sb_info *sbi, struct page *page,
@@ -1531,10 +1530,83 @@ void recover_node_page(struct f2fs_sb_info *sbi, struct page *page,
block_t new_blkaddr)
{
rewrite_node_page(sbi, page, sum, ni->blk_addr, new_blkaddr);
- set_node_addr(sbi, ni, new_blkaddr);
+ set_node_addr(sbi, ni, new_blkaddr, false);
clear_node_page_dirty(page);
}
+void recover_inline_xattr(struct inode *inode, struct page *page)
+{
+ struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
+ void *src_addr, *dst_addr;
+ size_t inline_size;
+ struct page *ipage;
+ struct f2fs_inode *ri;
+
+ if (!f2fs_has_inline_xattr(inode))
+ return;
+
+ if (!IS_INODE(page))
+ return;
+
+ ri = F2FS_INODE(page);
+ if (!(ri->i_inline & F2FS_INLINE_XATTR))
+ return;
+
+ ipage = get_node_page(sbi, inode->i_ino);
+ f2fs_bug_on(IS_ERR(ipage));
+
+ dst_addr = inline_xattr_addr(ipage);
+ src_addr = inline_xattr_addr(page);
+ inline_size = inline_xattr_size(inode);
+
+ memcpy(dst_addr, src_addr, inline_size);
+
+ update_inode(inode, ipage);
+ f2fs_put_page(ipage, 1);
+}
+
+bool recover_xattr_data(struct inode *inode, struct page *page, block_t blkaddr)
+{
+ struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
+ nid_t prev_xnid = F2FS_I(inode)->i_xattr_nid;
+ nid_t new_xnid = nid_of_node(page);
+ struct node_info ni;
+
+ recover_inline_xattr(inode, page);
+
+ if (!f2fs_has_xattr_block(ofs_of_node(page)))
+ return false;
+
+ /* 1: invalidate the previous xattr nid */
+ if (!prev_xnid)
+ goto recover_xnid;
+
+ /* Deallocate node address */
+ get_node_info(sbi, prev_xnid, &ni);
+ f2fs_bug_on(ni.blk_addr == NULL_ADDR);
+ invalidate_blocks(sbi, ni.blk_addr);
+ dec_valid_node_count(sbi, inode);
+ set_node_addr(sbi, &ni, NULL_ADDR, false);
+
+recover_xnid:
+ /* 2: allocate new xattr nid */
+ if (unlikely(!inc_valid_node_count(sbi, inode)))
+ f2fs_bug_on(1);
+
+ remove_free_nid(NM_I(sbi), new_xnid);
+ get_node_info(sbi, new_xnid, &ni);
+ ni.ino = inode->i_ino;
+ set_node_addr(sbi, &ni, NEW_ADDR, false);
+ F2FS_I(inode)->i_xattr_nid = new_xnid;
+
+ /* 3: update xattr blkaddr */
+ refresh_sit_entry(sbi, NEW_ADDR, blkaddr);
+ set_node_addr(sbi, &ni, blkaddr, false);
+
+ update_inode_page(inode);
+ return true;
+}
+
int recover_inode_page(struct f2fs_sb_info *sbi, struct page *page)
{
struct f2fs_inode *src, *dst;
@@ -1567,7 +1639,7 @@ int recover_inode_page(struct f2fs_sb_info *sbi, struct page *page)
if (unlikely(!inc_valid_node_count(sbi, NULL)))
WARN_ON(1);
- set_node_addr(sbi, &new_ni, NEW_ADDR);
+ set_node_addr(sbi, &new_ni, NEW_ADDR, false);
inc_valid_inode_count(sbi);
f2fs_put_page(ipage, 1);
return 0;
@@ -1590,15 +1662,8 @@ static int ra_sum_pages(struct f2fs_sb_info *sbi, struct list_head *pages,
for (; page_idx < start + nrpages; page_idx++) {
/* alloc temporal page for read node summary info*/
page = alloc_page(GFP_F2FS_ZERO);
- if (!page) {
- struct page *tmp;
- list_for_each_entry_safe(page, tmp, pages, lru) {
- list_del(&page->lru);
- unlock_page(page);
- __free_pages(page, 0);
- }
- return -ENOMEM;
- }
+ if (!page)
+ break;
lock_page(page);
page->index = page_idx;
@@ -1609,7 +1674,8 @@ static int ra_sum_pages(struct f2fs_sb_info *sbi, struct list_head *pages,
f2fs_submit_page_mbio(sbi, page, page->index, &fio);
f2fs_submit_merged_bio(sbi, META, READ);
- return 0;
+
+ return page_idx - start;
}
int restore_node_summary(struct f2fs_sb_info *sbi,
@@ -1628,15 +1694,17 @@ int restore_node_summary(struct f2fs_sb_info *sbi,
addr = START_BLOCK(sbi, segno);
sum_entry = &sum->entries[0];
- for (i = 0; i < last_offset; i += nrpages, addr += nrpages) {
+ for (i = 0; !err && i < last_offset; i += nrpages, addr += nrpages) {
nrpages = min(last_offset - i, bio_blocks);
/* read ahead node pages */
- err = ra_sum_pages(sbi, &page_list, addr, nrpages);
- if (err)
- return err;
+ nrpages = ra_sum_pages(sbi, &page_list, addr, nrpages);
+ if (!nrpages)
+ return -ENOMEM;
list_for_each_entry_safe(page, tmp, &page_list, lru) {
+ if (err)
+ goto skip;
lock_page(page);
if (unlikely(!PageUptodate(page))) {
@@ -1648,9 +1716,9 @@ int restore_node_summary(struct f2fs_sb_info *sbi,
sum_entry->ofs_in_node = 0;
sum_entry++;
}
-
- list_del(&page->lru);
unlock_page(page);
+skip:
+ list_del(&page->lru);
__free_pages(page, 0);
}
}
@@ -1709,7 +1777,7 @@ void flush_nat_entries(struct f2fs_sb_info *sbi)
struct f2fs_nm_info *nm_i = NM_I(sbi);
struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
struct f2fs_summary_block *sum = curseg->sum_blk;
- struct list_head *cur, *n;
+ struct nat_entry *ne, *cur;
struct page *page = NULL;
struct f2fs_nat_block *nat_blk = NULL;
nid_t start_nid = 0, end_nid = 0;
@@ -1721,18 +1789,17 @@ void flush_nat_entries(struct f2fs_sb_info *sbi)
mutex_lock(&curseg->curseg_mutex);
/* 1) flush dirty nat caches */
- list_for_each_safe(cur, n, &nm_i->dirty_nat_entries) {
- struct nat_entry *ne;
+ list_for_each_entry_safe(ne, cur, &nm_i->dirty_nat_entries, list) {
nid_t nid;
struct f2fs_nat_entry raw_ne;
int offset = -1;
block_t new_blkaddr;
- ne = list_entry(cur, struct nat_entry, list);
- nid = nat_get_nid(ne);
-
if (nat_get_blkaddr(ne) == NEW_ADDR)
continue;
+
+ nid = nat_get_nid(ne);
+
if (flushed)
goto to_nat_page;
@@ -1783,16 +1850,12 @@ flush_now:
} else {
write_lock(&nm_i->nat_tree_lock);
__clear_nat_cache_dirty(nm_i, ne);
- ne->checkpointed = true;
write_unlock(&nm_i->nat_tree_lock);
}
}
if (!flushed)
mutex_unlock(&curseg->curseg_mutex);
f2fs_put_page(page, 1);
-
- /* 2) shrink nat caches if necessary */
- try_to_free_nats(sbi, nm_i->nat_cnt - NM_WOUT_THRESHOLD);
}
static int init_node_manager(struct f2fs_sb_info *sbi)
@@ -1807,10 +1870,14 @@ static int init_node_manager(struct f2fs_sb_info *sbi)
/* segment_count_nat includes pair segment so divide to 2. */
nat_segs = le32_to_cpu(sb_raw->segment_count_nat) >> 1;
nat_blocks = nat_segs << le32_to_cpu(sb_raw->log_blocks_per_seg);
- nm_i->max_nid = NAT_ENTRY_PER_BLOCK * nat_blocks;
+
+ /* not used nids: 0, node, meta, (and root counted as valid node) */
+ nm_i->max_nid = NAT_ENTRY_PER_BLOCK * nat_blocks - 3;
nm_i->fcnt = 0;
nm_i->nat_cnt = 0;
+ nm_i->ram_thresh = DEF_RAM_THRESHOLD;
+ INIT_RADIX_TREE(&nm_i->free_nid_root, GFP_ATOMIC);
INIT_LIST_HEAD(&nm_i->free_nid_list);
INIT_RADIX_TREE(&nm_i->nat_root, GFP_ATOMIC);
INIT_LIST_HEAD(&nm_i->nat_entries);
@@ -1864,8 +1931,11 @@ void destroy_node_manager(struct f2fs_sb_info *sbi)
spin_lock(&nm_i->free_nid_list_lock);
list_for_each_entry_safe(i, next_i, &nm_i->free_nid_list, list) {
f2fs_bug_on(i->state == NID_ALLOC);
- __del_from_free_nid_list(i);
+ __del_from_free_nid_list(nm_i, i);
nm_i->fcnt--;
+ spin_unlock(&nm_i->free_nid_list_lock);
+ kmem_cache_free(free_nid_slab, i);
+ spin_lock(&nm_i->free_nid_list_lock);
}
f2fs_bug_on(nm_i->fcnt);
spin_unlock(&nm_i->free_nid_list_lock);
@@ -1875,11 +1945,9 @@ void destroy_node_manager(struct f2fs_sb_info *sbi)
while ((found = __gang_lookup_nat_cache(nm_i,
nid, NATVEC_SIZE, natvec))) {
unsigned idx;
- for (idx = 0; idx < found; idx++) {
- struct nat_entry *e = natvec[idx];
- nid = nat_get_nid(e) + 1;
- __del_from_nat_cache(nm_i, e);
- }
+ nid = nat_get_nid(natvec[found - 1]) + 1;
+ for (idx = 0; idx < found; idx++)
+ __del_from_nat_cache(nm_i, natvec[idx]);
}
f2fs_bug_on(nm_i->nat_cnt);
write_unlock(&nm_i->nat_tree_lock);
@@ -1892,12 +1960,12 @@ void destroy_node_manager(struct f2fs_sb_info *sbi)
int __init create_node_manager_caches(void)
{
nat_entry_slab = f2fs_kmem_cache_create("nat_entry",
- sizeof(struct nat_entry), NULL);
+ sizeof(struct nat_entry));
if (!nat_entry_slab)
return -ENOMEM;
free_nid_slab = f2fs_kmem_cache_create("free_nid",
- sizeof(struct free_nid), NULL);
+ sizeof(struct free_nid));
if (!free_nid_slab) {
kmem_cache_destroy(nat_entry_slab);
return -ENOMEM;
diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h
index c4c79885c993..5decc1a375f0 100644
--- a/fs/f2fs/node.h
+++ b/fs/f2fs/node.h
@@ -17,14 +17,11 @@
/* # of pages to perform readahead before building free nids */
#define FREE_NID_PAGES 4
-/* maximum # of free node ids to produce during build_free_nids */
-#define MAX_FREE_NIDS (NAT_ENTRY_PER_BLOCK * FREE_NID_PAGES)
-
/* maximum readahead size for node during getting data blocks */
#define MAX_RA_NODE 128
-/* maximum cached nat entries to manage memory footprint */
-#define NM_WOUT_THRESHOLD (64 * NAT_ENTRY_PER_BLOCK)
+/* control the memory footprint threshold (10MB per 1GB ram) */
+#define DEF_RAM_THRESHOLD 10
/* vector size for gang look-up from nat cache that consists of radix tree */
#define NATVEC_SIZE 64
@@ -45,6 +42,7 @@ struct node_info {
struct nat_entry {
struct list_head list; /* for clean or dirty nat list */
bool checkpointed; /* whether it is checkpointed or not */
+ bool fsync_done; /* whether the latest node has fsync mark */
struct node_info ni; /* in-memory node information */
};
@@ -58,9 +56,15 @@ struct nat_entry {
#define nat_set_version(nat, v) (nat->ni.version = v)
#define __set_nat_cache_dirty(nm_i, ne) \
- list_move_tail(&ne->list, &nm_i->dirty_nat_entries);
+ do { \
+ ne->checkpointed = false; \
+ list_move_tail(&ne->list, &nm_i->dirty_nat_entries); \
+ } while (0);
#define __clear_nat_cache_dirty(nm_i, ne) \
- list_move_tail(&ne->list, &nm_i->nat_entries);
+ do { \
+ ne->checkpointed = true; \
+ list_move_tail(&ne->list, &nm_i->nat_entries); \
+ } while (0);
#define inc_node_version(version) (++version)
static inline void node_info_from_raw_nat(struct node_info *ni,
@@ -71,6 +75,11 @@ static inline void node_info_from_raw_nat(struct node_info *ni,
ni->version = raw_ne->version;
}
+enum nid_type {
+ FREE_NIDS, /* indicates the free nid list */
+ NAT_ENTRIES /* indicates the cached nat entry */
+};
+
/*
* For free nid mangement
*/
@@ -236,7 +245,7 @@ static inline bool IS_DNODE(struct page *node_page)
{
unsigned int ofs = ofs_of_node(node_page);
- if (ofs == XATTR_NODE_OFFSET)
+ if (f2fs_has_xattr_block(ofs))
return false;
if (ofs == 3 || ofs == 4 + NIDS_PER_BLOCK ||
diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c
index 976a7a934db5..b1ae89f0f44e 100644
--- a/fs/f2fs/recovery.c
+++ b/fs/f2fs/recovery.c
@@ -27,14 +27,12 @@ bool space_for_roll_forward(struct f2fs_sb_info *sbi)
static struct fsync_inode_entry *get_fsync_inode(struct list_head *head,
nid_t ino)
{
- struct list_head *this;
struct fsync_inode_entry *entry;
- list_for_each(this, head) {
- entry = list_entry(this, struct fsync_inode_entry, list);
+ list_for_each_entry(entry, head, list)
if (entry->inode->i_ino == ino)
return entry;
- }
+
return NULL;
}
@@ -136,7 +134,7 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head)
/* get node pages in the current segment */
curseg = CURSEG_I(sbi, CURSEG_WARM_NODE);
- blkaddr = START_BLOCK(sbi, curseg->segno) + curseg->next_blkoff;
+ blkaddr = NEXT_FREE_BLKADDR(sbi, curseg);
/* read node page */
page = alloc_page(GFP_F2FS_ZERO);
@@ -218,13 +216,12 @@ static int check_index_in_prev_nodes(struct f2fs_sb_info *sbi,
{
struct seg_entry *sentry;
unsigned int segno = GET_SEGNO(sbi, blkaddr);
- unsigned short blkoff = GET_SEGOFF_FROM_SEG0(sbi, blkaddr) &
- (sbi->blocks_per_seg - 1);
+ unsigned short blkoff = GET_BLKOFF_FROM_SEG0(sbi, blkaddr);
+ struct f2fs_summary_block *sum_node;
struct f2fs_summary sum;
+ struct page *sum_page, *node_page;
nid_t ino, nid;
- void *kaddr;
struct inode *inode;
- struct page *node_page;
unsigned int offset;
block_t bidx;
int i;
@@ -238,18 +235,15 @@ static int check_index_in_prev_nodes(struct f2fs_sb_info *sbi,
struct curseg_info *curseg = CURSEG_I(sbi, i);
if (curseg->segno == segno) {
sum = curseg->sum_blk->entries[blkoff];
- break;
+ goto got_it;
}
}
- if (i > CURSEG_COLD_DATA) {
- struct page *sum_page = get_sum_page(sbi, segno);
- struct f2fs_summary_block *sum_node;
- kaddr = page_address(sum_page);
- sum_node = (struct f2fs_summary_block *)kaddr;
- sum = sum_node->entries[blkoff];
- f2fs_put_page(sum_page, 1);
- }
+ sum_page = get_sum_page(sbi, segno);
+ sum_node = (struct f2fs_summary_block *)page_address(sum_page);
+ sum = sum_node->entries[blkoff];
+ f2fs_put_page(sum_page, 1);
+got_it:
/* Use the locked dnode page and inode */
nid = le32_to_cpu(sum.nid);
if (dn->inode->i_ino == nid) {
@@ -301,6 +295,9 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode,
if (recover_inline_data(inode, page))
goto out;
+ if (recover_xattr_data(inode, page, blkaddr))
+ goto out;
+
start = start_bidx_of_node(ofs_of_node(page), fi);
if (IS_INODE(page))
end = start + ADDRS_PER_INODE(fi);
@@ -317,7 +314,7 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode,
goto out;
}
- wait_on_page_writeback(dn.node_page);
+ f2fs_wait_on_page_writeback(dn.node_page, NODE);
get_node_info(sbi, dn.nid, &ni);
f2fs_bug_on(ni.ino != ino_of_node(page));
@@ -437,7 +434,7 @@ int recover_fsync_data(struct f2fs_sb_info *sbi)
bool need_writecp = false;
fsync_entry_slab = f2fs_kmem_cache_create("f2fs_fsync_inode_entry",
- sizeof(struct fsync_inode_entry), NULL);
+ sizeof(struct fsync_inode_entry));
if (!fsync_entry_slab)
return -ENOMEM;
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index 7caac5f2ca9e..085f548be7a3 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -13,6 +13,7 @@
#include <linux/bio.h>
#include <linux/blkdev.h>
#include <linux/prefetch.h>
+#include <linux/kthread.h>
#include <linux/vmalloc.h>
#include <linux/swap.h>
@@ -24,6 +25,7 @@
#define __reverse_ffz(x) __reverse_ffs(~(x))
static struct kmem_cache *discard_entry_slab;
+static struct kmem_cache *flush_cmd_slab;
/*
* __reverse_ffs is copied from include/asm-generic/bitops/__ffs.h since
@@ -195,6 +197,73 @@ void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi)
f2fs_sync_fs(sbi->sb, true);
}
+static int issue_flush_thread(void *data)
+{
+ struct f2fs_sb_info *sbi = data;
+ struct f2fs_sm_info *sm_i = SM_I(sbi);
+ wait_queue_head_t *q = &sm_i->flush_wait_queue;
+repeat:
+ if (kthread_should_stop())
+ return 0;
+
+ spin_lock(&sm_i->issue_lock);
+ if (sm_i->issue_list) {
+ sm_i->dispatch_list = sm_i->issue_list;
+ sm_i->issue_list = sm_i->issue_tail = NULL;
+ }
+ spin_unlock(&sm_i->issue_lock);
+
+ if (sm_i->dispatch_list) {
+ struct bio *bio = bio_alloc(GFP_NOIO, 0);
+ struct flush_cmd *cmd, *next;
+ int ret;
+
+ bio->bi_bdev = sbi->sb->s_bdev;
+ ret = submit_bio_wait(WRITE_FLUSH, bio);
+
+ for (cmd = sm_i->dispatch_list; cmd; cmd = next) {
+ cmd->ret = ret;
+ next = cmd->next;
+ complete(&cmd->wait);
+ }
+ sm_i->dispatch_list = NULL;
+ }
+
+ wait_event_interruptible(*q, kthread_should_stop() || sm_i->issue_list);
+ goto repeat;
+}
+
+int f2fs_issue_flush(struct f2fs_sb_info *sbi)
+{
+ struct f2fs_sm_info *sm_i = SM_I(sbi);
+ struct flush_cmd *cmd;
+ int ret;
+
+ if (!test_opt(sbi, FLUSH_MERGE))
+ return blkdev_issue_flush(sbi->sb->s_bdev, GFP_KERNEL, NULL);
+
+ cmd = f2fs_kmem_cache_alloc(flush_cmd_slab, GFP_ATOMIC);
+ cmd->next = NULL;
+ cmd->ret = 0;
+ init_completion(&cmd->wait);
+
+ spin_lock(&sm_i->issue_lock);
+ if (sm_i->issue_list)
+ sm_i->issue_tail->next = cmd;
+ else
+ sm_i->issue_list = cmd;
+ sm_i->issue_tail = cmd;
+ spin_unlock(&sm_i->issue_lock);
+
+ if (!sm_i->dispatch_list)
+ wake_up(&sm_i->flush_wait_queue);
+
+ wait_for_completion(&cmd->wait);
+ ret = cmd->ret;
+ kmem_cache_free(flush_cmd_slab, cmd);
+ return ret;
+}
+
static void __locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno,
enum dirty_type dirty_type)
{
@@ -340,8 +409,7 @@ static void set_prefree_as_free_segments(struct f2fs_sb_info *sbi)
void clear_prefree_segments(struct f2fs_sb_info *sbi)
{
struct list_head *head = &(SM_I(sbi)->discard_list);
- struct list_head *this, *next;
- struct discard_entry *entry;
+ struct discard_entry *entry, *this;
struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
unsigned long *prefree_map = dirty_i->dirty_segmap[PRE];
unsigned int total_segs = TOTAL_SEGS(sbi);
@@ -370,8 +438,7 @@ void clear_prefree_segments(struct f2fs_sb_info *sbi)
mutex_unlock(&dirty_i->seglist_lock);
/* send small discards */
- list_for_each_safe(this, next, head) {
- entry = list_entry(this, struct discard_entry, list);
+ list_for_each_entry_safe(entry, this, head, list) {
f2fs_issue_discard(sbi, entry->blkaddr, entry->len);
list_del(&entry->list);
SM_I(sbi)->nr_discards -= entry->len;
@@ -405,7 +472,7 @@ static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del)
se = get_seg_entry(sbi, segno);
new_vblocks = se->valid_blocks + del;
- offset = GET_SEGOFF_FROM_SEG0(sbi, blkaddr) & (sbi->blocks_per_seg - 1);
+ offset = GET_BLKOFF_FROM_SEG0(sbi, blkaddr);
f2fs_bug_on((new_vblocks >> (sizeof(unsigned short) << 3) ||
(new_vblocks > sbi->blocks_per_seg)));
@@ -434,12 +501,14 @@ static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del)
get_sec_entry(sbi, segno)->valid_blocks += del;
}
-static void refresh_sit_entry(struct f2fs_sb_info *sbi,
- block_t old_blkaddr, block_t new_blkaddr)
+void refresh_sit_entry(struct f2fs_sb_info *sbi, block_t old, block_t new)
{
- update_sit_entry(sbi, new_blkaddr, 1);
- if (GET_SEGNO(sbi, old_blkaddr) != NULL_SEGNO)
- update_sit_entry(sbi, old_blkaddr, -1);
+ update_sit_entry(sbi, new, 1);
+ if (GET_SEGNO(sbi, old) != NULL_SEGNO)
+ update_sit_entry(sbi, old, -1);
+
+ locate_dirty_segment(sbi, GET_SEGNO(sbi, old));
+ locate_dirty_segment(sbi, GET_SEGNO(sbi, new));
}
void invalidate_blocks(struct f2fs_sb_info *sbi, block_t addr)
@@ -881,17 +950,15 @@ void allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
stat_inc_block_count(sbi, curseg);
+ if (!__has_curseg_space(sbi, type))
+ sit_i->s_ops->allocate_segment(sbi, type, false);
/*
* SIT information should be updated before segment allocation,
* since SSR needs latest valid block information.
*/
refresh_sit_entry(sbi, old_blkaddr, *new_blkaddr);
-
- if (!__has_curseg_space(sbi, type))
- sit_i->s_ops->allocate_segment(sbi, type, false);
-
locate_dirty_segment(sbi, old_cursegno);
- locate_dirty_segment(sbi, GET_SEGNO(sbi, old_blkaddr));
+
mutex_unlock(&sit_i->sentry_lock);
if (page && IS_NODESEG(type))
@@ -987,14 +1054,11 @@ void recover_data_page(struct f2fs_sb_info *sbi,
change_curseg(sbi, type, true);
}
- curseg->next_blkoff = GET_SEGOFF_FROM_SEG0(sbi, new_blkaddr) &
- (sbi->blocks_per_seg - 1);
+ curseg->next_blkoff = GET_BLKOFF_FROM_SEG0(sbi, new_blkaddr);
__add_sum_entry(sbi, type, sum);
refresh_sit_entry(sbi, old_blkaddr, new_blkaddr);
-
locate_dirty_segment(sbi, old_cursegno);
- locate_dirty_segment(sbi, GET_SEGNO(sbi, old_blkaddr));
mutex_unlock(&sit_i->sentry_lock);
mutex_unlock(&curseg->curseg_mutex);
@@ -1028,8 +1092,7 @@ void rewrite_node_page(struct f2fs_sb_info *sbi,
curseg->next_segno = segno;
change_curseg(sbi, type, true);
}
- curseg->next_blkoff = GET_SEGOFF_FROM_SEG0(sbi, new_blkaddr) &
- (sbi->blocks_per_seg - 1);
+ curseg->next_blkoff = GET_BLKOFF_FROM_SEG0(sbi, new_blkaddr);
__add_sum_entry(sbi, type, sum);
/* change the current log to the next block addr in advance */
@@ -1037,28 +1100,50 @@ void rewrite_node_page(struct f2fs_sb_info *sbi,
curseg->next_segno = next_segno;
change_curseg(sbi, type, true);
}
- curseg->next_blkoff = GET_SEGOFF_FROM_SEG0(sbi, next_blkaddr) &
- (sbi->blocks_per_seg - 1);
+ curseg->next_blkoff = GET_BLKOFF_FROM_SEG0(sbi, next_blkaddr);
/* rewrite node page */
set_page_writeback(page);
f2fs_submit_page_mbio(sbi, page, new_blkaddr, &fio);
f2fs_submit_merged_bio(sbi, NODE, WRITE);
refresh_sit_entry(sbi, old_blkaddr, new_blkaddr);
-
locate_dirty_segment(sbi, old_cursegno);
- locate_dirty_segment(sbi, GET_SEGNO(sbi, old_blkaddr));
mutex_unlock(&sit_i->sentry_lock);
mutex_unlock(&curseg->curseg_mutex);
}
+static inline bool is_merged_page(struct f2fs_sb_info *sbi,
+ struct page *page, enum page_type type)
+{
+ enum page_type btype = PAGE_TYPE_OF_BIO(type);
+ struct f2fs_bio_info *io = &sbi->write_io[btype];
+ struct bio_vec *bvec;
+ int i;
+
+ down_read(&io->io_rwsem);
+ if (!io->bio)
+ goto out;
+
+ bio_for_each_segment_all(bvec, io->bio, i) {
+ if (page == bvec->bv_page) {
+ up_read(&io->io_rwsem);
+ return true;
+ }
+ }
+
+out:
+ up_read(&io->io_rwsem);
+ return false;
+}
+
void f2fs_wait_on_page_writeback(struct page *page,
enum page_type type)
{
struct f2fs_sb_info *sbi = F2FS_SB(page->mapping->host->i_sb);
if (PageWriteback(page)) {
- f2fs_submit_merged_bio(sbi, type, WRITE);
+ if (is_merged_page(sbi, page, type))
+ f2fs_submit_merged_bio(sbi, type, WRITE);
wait_on_page_writeback(page);
}
}
@@ -1167,9 +1252,12 @@ static int read_normal_summaries(struct f2fs_sb_info *sbi, int type)
ns->ofs_in_node = 0;
}
} else {
- if (restore_node_summary(sbi, segno, sum)) {
+ int err;
+
+ err = restore_node_summary(sbi, segno, sum);
+ if (err) {
f2fs_put_page(new, 1);
- return -EINVAL;
+ return err;
}
}
}
@@ -1190,6 +1278,7 @@ static int read_normal_summaries(struct f2fs_sb_info *sbi, int type)
static int restore_curseg_summaries(struct f2fs_sb_info *sbi)
{
int type = CURSEG_HOT_DATA;
+ int err;
if (is_set_ckpt_flags(F2FS_CKPT(sbi), CP_COMPACT_SUM_FLAG)) {
/* restore for compacted data summary */
@@ -1198,9 +1287,12 @@ static int restore_curseg_summaries(struct f2fs_sb_info *sbi)
type = CURSEG_HOT_NODE;
}
- for (; type <= CURSEG_COLD_NODE; type++)
- if (read_normal_summaries(sbi, type))
- return -EINVAL;
+ for (; type <= CURSEG_COLD_NODE; type++) {
+ err = read_normal_summaries(sbi, type);
+ if (err)
+ return err;
+ }
+
return 0;
}
@@ -1583,47 +1675,6 @@ static int build_curseg(struct f2fs_sb_info *sbi)
return restore_curseg_summaries(sbi);
}
-static int ra_sit_pages(struct f2fs_sb_info *sbi, int start, int nrpages)
-{
- struct address_space *mapping = META_MAPPING(sbi);
- struct page *page;
- block_t blk_addr, prev_blk_addr = 0;
- int sit_blk_cnt = SIT_BLK_CNT(sbi);
- int blkno = start;
- struct f2fs_io_info fio = {
- .type = META,
- .rw = READ_SYNC | REQ_META | REQ_PRIO
- };
-
- for (; blkno < start + nrpages && blkno < sit_blk_cnt; blkno++) {
-
- blk_addr = current_sit_addr(sbi, blkno * SIT_ENTRY_PER_BLOCK);
-
- if (blkno != start && prev_blk_addr + 1 != blk_addr)
- break;
- prev_blk_addr = blk_addr;
-repeat:
- page = grab_cache_page(mapping, blk_addr);
- if (!page) {
- cond_resched();
- goto repeat;
- }
- if (PageUptodate(page)) {
- mark_page_accessed(page);
- f2fs_put_page(page, 1);
- continue;
- }
-
- f2fs_submit_page_mbio(sbi, page, blk_addr, &fio);
-
- mark_page_accessed(page);
- f2fs_put_page(page, 0);
- }
-
- f2fs_submit_merged_bio(sbi, META, READ);
- return blkno - start;
-}
-
static void build_sit_entries(struct f2fs_sb_info *sbi)
{
struct sit_info *sit_i = SIT_I(sbi);
@@ -1635,7 +1686,7 @@ static void build_sit_entries(struct f2fs_sb_info *sbi)
int nrpages = MAX_BIO_BLOCKS(max_hw_blocks(sbi));
do {
- readed = ra_sit_pages(sbi, start_blk, nrpages);
+ readed = ra_meta_pages(sbi, start_blk, nrpages, META_SIT);
start = start_blk * sit_i->sents_per_block;
end = (start_blk + readed) * sit_i->sents_per_block;
@@ -1781,6 +1832,7 @@ int build_segment_manager(struct f2fs_sb_info *sbi)
{
struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi);
struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
+ dev_t dev = sbi->sb->s_bdev->bd_dev;
struct f2fs_sm_info *sm_info;
int err;
@@ -1799,7 +1851,8 @@ int build_segment_manager(struct f2fs_sb_info *sbi)
sm_info->ovp_segments = le32_to_cpu(ckpt->overprov_segment_count);
sm_info->main_segments = le32_to_cpu(raw_super->segment_count_main);
sm_info->ssa_blkaddr = le32_to_cpu(raw_super->ssa_blkaddr);
- sm_info->rec_prefree_segments = DEF_RECLAIM_PREFREE_SEGMENTS;
+ sm_info->rec_prefree_segments = sm_info->main_segments *
+ DEF_RECLAIM_PREFREE_SEGMENTS / 100;
sm_info->ipu_policy = F2FS_IPU_DISABLE;
sm_info->min_ipu_util = DEF_MIN_IPU_UTIL;
@@ -1807,6 +1860,16 @@ int build_segment_manager(struct f2fs_sb_info *sbi)
sm_info->nr_discards = 0;
sm_info->max_discards = 0;
+ if (test_opt(sbi, FLUSH_MERGE)) {
+ spin_lock_init(&sm_info->issue_lock);
+ init_waitqueue_head(&sm_info->flush_wait_queue);
+
+ sm_info->f2fs_issue_flush = kthread_run(issue_flush_thread, sbi,
+ "f2fs_flush-%u:%u", MAJOR(dev), MINOR(dev));
+ if (IS_ERR(sm_info->f2fs_issue_flush))
+ return PTR_ERR(sm_info->f2fs_issue_flush);
+ }
+
err = build_sit_info(sbi);
if (err)
return err;
@@ -1915,6 +1978,8 @@ void destroy_segment_manager(struct f2fs_sb_info *sbi)
struct f2fs_sm_info *sm_info = SM_I(sbi);
if (!sm_info)
return;
+ if (sm_info->f2fs_issue_flush)
+ kthread_stop(sm_info->f2fs_issue_flush);
destroy_dirty_segmap(sbi);
destroy_curseg(sbi);
destroy_free_segmap(sbi);
@@ -1926,13 +1991,20 @@ void destroy_segment_manager(struct f2fs_sb_info *sbi)
int __init create_segment_manager_caches(void)
{
discard_entry_slab = f2fs_kmem_cache_create("discard_entry",
- sizeof(struct discard_entry), NULL);
+ sizeof(struct discard_entry));
if (!discard_entry_slab)
return -ENOMEM;
+ flush_cmd_slab = f2fs_kmem_cache_create("flush_command",
+ sizeof(struct flush_cmd));
+ if (!flush_cmd_slab) {
+ kmem_cache_destroy(discard_entry_slab);
+ return -ENOMEM;
+ }
return 0;
}
void destroy_segment_manager_caches(void)
{
kmem_cache_destroy(discard_entry_slab);
+ kmem_cache_destroy(flush_cmd_slab);
}
diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h
index 5731682d7516..7091204680f4 100644
--- a/fs/f2fs/segment.h
+++ b/fs/f2fs/segment.h
@@ -14,7 +14,7 @@
#define NULL_SEGNO ((unsigned int)(~0))
#define NULL_SECNO ((unsigned int)(~0))
-#define DEF_RECLAIM_PREFREE_SEGMENTS 100 /* 200MB of prefree segments */
+#define DEF_RECLAIM_PREFREE_SEGMENTS 5 /* 5% over total segments */
/* L: Logical segment # in volume, R: Relative segment # in main area */
#define GET_L2R_SEGNO(free_i, segno) (segno - free_i->start_segno)
@@ -57,6 +57,9 @@
((blk_addr) - SM_I(sbi)->seg0_blkaddr)
#define GET_SEGNO_FROM_SEG0(sbi, blk_addr) \
(GET_SEGOFF_FROM_SEG0(sbi, blk_addr) >> sbi->log_blocks_per_seg)
+#define GET_BLKOFF_FROM_SEG0(sbi, blk_addr) \
+ (GET_SEGOFF_FROM_SEG0(sbi, blk_addr) & (sbi->blocks_per_seg - 1))
+
#define GET_SEGNO(sbi, blk_addr) \
(((blk_addr == NULL_ADDR) || (blk_addr == NEW_ADDR)) ? \
NULL_SEGNO : GET_L2R_SEGNO(FREE_I(sbi), \
@@ -377,26 +380,12 @@ static inline void get_sit_bitmap(struct f2fs_sb_info *sbi,
static inline block_t written_block_count(struct f2fs_sb_info *sbi)
{
- struct sit_info *sit_i = SIT_I(sbi);
- block_t vblocks;
-
- mutex_lock(&sit_i->sentry_lock);
- vblocks = sit_i->written_valid_blocks;
- mutex_unlock(&sit_i->sentry_lock);
-
- return vblocks;
+ return SIT_I(sbi)->written_valid_blocks;
}
static inline unsigned int free_segments(struct f2fs_sb_info *sbi)
{
- struct free_segmap_info *free_i = FREE_I(sbi);
- unsigned int free_segs;
-
- read_lock(&free_i->segmap_lock);
- free_segs = free_i->free_segments;
- read_unlock(&free_i->segmap_lock);
-
- return free_segs;
+ return FREE_I(sbi)->free_segments;
}
static inline int reserved_segments(struct f2fs_sb_info *sbi)
@@ -406,14 +395,7 @@ static inline int reserved_segments(struct f2fs_sb_info *sbi)
static inline unsigned int free_sections(struct f2fs_sb_info *sbi)
{
- struct free_segmap_info *free_i = FREE_I(sbi);
- unsigned int free_secs;
-
- read_lock(&free_i->segmap_lock);
- free_secs = free_i->free_sections;
- read_unlock(&free_i->segmap_lock);
-
- return free_secs;
+ return FREE_I(sbi)->free_sections;
}
static inline unsigned int prefree_segments(struct f2fs_sb_info *sbi)
@@ -682,3 +664,46 @@ static inline unsigned int max_hw_blocks(struct f2fs_sb_info *sbi)
struct request_queue *q = bdev_get_queue(bdev);
return SECTOR_TO_BLOCK(sbi, queue_max_sectors(q));
}
+
+/*
+ * It is very important to gather dirty pages and write at once, so that we can
+ * submit a big bio without interfering other data writes.
+ * By default, 512 pages for directory data,
+ * 512 pages (2MB) * 3 for three types of nodes, and
+ * max_bio_blocks for meta are set.
+ */
+static inline int nr_pages_to_skip(struct f2fs_sb_info *sbi, int type)
+{
+ if (type == DATA)
+ return sbi->blocks_per_seg;
+ else if (type == NODE)
+ return 3 * sbi->blocks_per_seg;
+ else if (type == META)
+ return MAX_BIO_BLOCKS(max_hw_blocks(sbi));
+ else
+ return 0;
+}
+
+/*
+ * When writing pages, it'd better align nr_to_write for segment size.
+ */
+static inline long nr_pages_to_write(struct f2fs_sb_info *sbi, int type,
+ struct writeback_control *wbc)
+{
+ long nr_to_write, desired;
+
+ if (wbc->sync_mode != WB_SYNC_NONE)
+ return 0;
+
+ nr_to_write = wbc->nr_to_write;
+
+ if (type == DATA)
+ desired = 4096;
+ else if (type == NODE)
+ desired = 3 * max_hw_blocks(sbi);
+ else
+ desired = MAX_BIO_BLOCKS(max_hw_blocks(sbi));
+
+ wbc->nr_to_write = desired;
+ return desired - nr_to_write;
+}
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 856bdf994c0a..c756923a7302 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -51,6 +51,7 @@ enum {
Opt_disable_ext_identify,
Opt_inline_xattr,
Opt_inline_data,
+ Opt_flush_merge,
Opt_err,
};
@@ -67,6 +68,7 @@ static match_table_t f2fs_tokens = {
{Opt_disable_ext_identify, "disable_ext_identify"},
{Opt_inline_xattr, "inline_xattr"},
{Opt_inline_data, "inline_data"},
+ {Opt_flush_merge, "flush_merge"},
{Opt_err, NULL},
};
@@ -74,6 +76,7 @@ static match_table_t f2fs_tokens = {
enum {
GC_THREAD, /* struct f2fs_gc_thread */
SM_INFO, /* struct f2fs_sm_info */
+ NM_INFO, /* struct f2fs_nm_info */
F2FS_SBI, /* struct f2fs_sb_info */
};
@@ -92,6 +95,8 @@ static unsigned char *__struct_ptr(struct f2fs_sb_info *sbi, int struct_type)
return (unsigned char *)sbi->gc_thread;
else if (struct_type == SM_INFO)
return (unsigned char *)SM_I(sbi);
+ else if (struct_type == NM_INFO)
+ return (unsigned char *)NM_I(sbi);
else if (struct_type == F2FS_SBI)
return (unsigned char *)sbi;
return NULL;
@@ -183,7 +188,9 @@ F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, reclaim_segments, rec_prefree_segments);
F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, max_small_discards, max_discards);
F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, ipu_policy, ipu_policy);
F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_ipu_util, min_ipu_util);
+F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, ram_thresh, ram_thresh);
F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, max_victim_search, max_victim_search);
+F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, dir_level, dir_level);
#define ATTR_LIST(name) (&f2fs_attr_##name.attr)
static struct attribute *f2fs_attrs[] = {
@@ -196,6 +203,8 @@ static struct attribute *f2fs_attrs[] = {
ATTR_LIST(ipu_policy),
ATTR_LIST(min_ipu_util),
ATTR_LIST(max_victim_search),
+ ATTR_LIST(dir_level),
+ ATTR_LIST(ram_thresh),
NULL,
};
@@ -256,9 +265,9 @@ static int parse_options(struct super_block *sb, char *options)
if (!name)
return -ENOMEM;
- if (!strncmp(name, "on", 2))
+ if (strlen(name) == 2 && !strncmp(name, "on", 2))
set_opt(sbi, BG_GC);
- else if (!strncmp(name, "off", 3))
+ else if (strlen(name) == 3 && !strncmp(name, "off", 3))
clear_opt(sbi, BG_GC);
else {
kfree(name);
@@ -327,6 +336,9 @@ static int parse_options(struct super_block *sb, char *options)
case Opt_inline_data:
set_opt(sbi, INLINE_DATA);
break;
+ case Opt_flush_merge:
+ set_opt(sbi, FLUSH_MERGE);
+ break;
default:
f2fs_msg(sb, KERN_ERR,
"Unrecognized mount option \"%s\" or missing value",
@@ -353,12 +365,16 @@ static struct inode *f2fs_alloc_inode(struct super_block *sb)
fi->i_current_depth = 1;
fi->i_advise = 0;
rwlock_init(&fi->ext.ext_lock);
+ init_rwsem(&fi->i_sem);
set_inode_flag(fi, FI_NEW_INODE);
if (test_opt(F2FS_SB(sb), INLINE_XATTR))
set_inode_flag(fi, FI_INLINE_XATTR);
+ /* Will be used by directory only */
+ fi->i_dir_level = F2FS_SB(sb)->dir_level;
+
return &fi->vfs_inode;
}
@@ -526,6 +542,8 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root)
seq_puts(seq, ",disable_ext_identify");
if (test_opt(sbi, INLINE_DATA))
seq_puts(seq, ",inline_data");
+ if (test_opt(sbi, FLUSH_MERGE))
+ seq_puts(seq, ",flush_merge");
seq_printf(seq, ",active_logs=%u", sbi->active_logs);
return 0;
@@ -539,13 +557,22 @@ static int segment_info_seq_show(struct seq_file *seq, void *offset)
le32_to_cpu(sbi->raw_super->segment_count_main);
int i;
+ seq_puts(seq, "format: segment_type|valid_blocks\n"
+ "segment_type(0:HD, 1:WD, 2:CD, 3:HN, 4:WN, 5:CN)\n");
+
for (i = 0; i < total_segs; i++) {
- seq_printf(seq, "%u", get_valid_blocks(sbi, i, 1));
- if (i != 0 && (i % 10) == 0)
- seq_puts(seq, "\n");
+ struct seg_entry *se = get_seg_entry(sbi, i);
+
+ if ((i % 10) == 0)
+ seq_printf(seq, "%-5d", i);
+ seq_printf(seq, "%d|%-3u", se->type,
+ get_valid_blocks(sbi, i, 1));
+ if ((i % 10) == 9 || i == (total_segs - 1))
+ seq_putc(seq, '\n');
else
- seq_puts(seq, " ");
+ seq_putc(seq, ' ');
}
+
return 0;
}
@@ -640,6 +667,8 @@ static struct inode *f2fs_nfs_get_inode(struct super_block *sb,
if (unlikely(ino < F2FS_ROOT_INO(sbi)))
return ERR_PTR(-ESTALE);
+ if (unlikely(ino >= NM_I(sbi)->max_nid))
+ return ERR_PTR(-ESTALE);
/*
* f2fs_iget isn't quite right if the inode is currently unallocated!
@@ -787,6 +816,8 @@ static void init_sb_info(struct f2fs_sb_info *sbi)
for (i = 0; i < NR_COUNT_TYPE; i++)
atomic_set(&sbi->nr_pages[i], 0);
+
+ sbi->dir_level = DEF_DIR_LEVEL;
}
/*
@@ -898,11 +929,11 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
sbi->por_doing = false;
spin_lock_init(&sbi->stat_lock);
- mutex_init(&sbi->read_io.io_mutex);
+ init_rwsem(&sbi->read_io.io_rwsem);
sbi->read_io.sbi = sbi;
sbi->read_io.bio = NULL;
for (i = 0; i < NR_PAGE_TYPE; i++) {
- mutex_init(&sbi->write_io[i].io_mutex);
+ init_rwsem(&sbi->write_io[i].io_rwsem);
sbi->write_io[i].sbi = sbi;
sbi->write_io[i].bio = NULL;
}
@@ -991,28 +1022,9 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
goto free_root_inode;
}
- /* recover fsynced data */
- if (!test_opt(sbi, DISABLE_ROLL_FORWARD)) {
- err = recover_fsync_data(sbi);
- if (err)
- f2fs_msg(sb, KERN_ERR,
- "Cannot recover all fsync data errno=%ld", err);
- }
-
- /*
- * If filesystem is not mounted as read-only then
- * do start the gc_thread.
- */
- if (!(sb->s_flags & MS_RDONLY)) {
- /* After POR, we can run background GC thread.*/
- err = start_gc_thread(sbi);
- if (err)
- goto free_gc;
- }
-
err = f2fs_build_stats(sbi);
if (err)
- goto free_gc;
+ goto free_root_inode;
if (f2fs_proc_root)
sbi->s_proc = proc_mkdir(sb->s_id, f2fs_proc_root);
@@ -1034,17 +1046,36 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
err = kobject_init_and_add(&sbi->s_kobj, &f2fs_ktype, NULL,
"%s", sb->s_id);
if (err)
- goto fail;
+ goto free_proc;
+
+ /* recover fsynced data */
+ if (!test_opt(sbi, DISABLE_ROLL_FORWARD)) {
+ err = recover_fsync_data(sbi);
+ if (err)
+ f2fs_msg(sb, KERN_ERR,
+ "Cannot recover all fsync data errno=%ld", err);
+ }
+ /*
+ * If filesystem is not mounted as read-only then
+ * do start the gc_thread.
+ */
+ if (!(sb->s_flags & MS_RDONLY)) {
+ /* After POR, we can run background GC thread.*/
+ err = start_gc_thread(sbi);
+ if (err)
+ goto free_kobj;
+ }
return 0;
-fail:
+
+free_kobj:
+ kobject_del(&sbi->s_kobj);
+free_proc:
if (sbi->s_proc) {
remove_proc_entry("segment_info", sbi->s_proc);
remove_proc_entry(sb->s_id, f2fs_proc_root);
}
f2fs_destroy_stats(sbi);
-free_gc:
- stop_gc_thread(sbi);
free_root_inode:
dput(sb->s_root);
sb->s_root = NULL;
@@ -1084,7 +1115,7 @@ MODULE_ALIAS_FS("f2fs");
static int __init init_inodecache(void)
{
f2fs_inode_cachep = f2fs_kmem_cache_create("f2fs_inode_cache",
- sizeof(struct f2fs_inode_info), NULL);
+ sizeof(struct f2fs_inode_info));
if (!f2fs_inode_cachep)
return -ENOMEM;
return 0;
diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c
index 89d0422a91a8..503c2451131e 100644
--- a/fs/f2fs/xattr.c
+++ b/fs/f2fs/xattr.c
@@ -275,7 +275,7 @@ static void *read_all_xattrs(struct inode *inode, struct page *ipage)
inline_size = inline_xattr_size(inode);
- txattr_addr = kzalloc(inline_size + size, GFP_KERNEL);
+ txattr_addr = kzalloc(inline_size + size, GFP_F2FS_ZERO);
if (!txattr_addr)
return NULL;
@@ -407,6 +407,8 @@ int f2fs_getxattr(struct inode *inode, int name_index, const char *name,
if (name == NULL)
return -EINVAL;
name_len = strlen(name);
+ if (name_len > F2FS_NAME_LEN)
+ return -ERANGE;
base_addr = read_all_xattrs(inode, NULL);
if (!base_addr)
@@ -590,7 +592,10 @@ int f2fs_setxattr(struct inode *inode, int name_index, const char *name,
f2fs_balance_fs(sbi);
f2fs_lock_op(sbi);
+ /* protect xattr_ver */
+ down_write(&F2FS_I(inode)->i_sem);
err = __f2fs_setxattr(inode, name_index, name, value, value_len, ipage);
+ up_write(&F2FS_I(inode)->i_sem);
f2fs_unlock_op(sbi);
return err;
diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h
index da74d878dc4f..df53e1753a76 100644
--- a/include/linux/f2fs_fs.h
+++ b/include/linux/f2fs_fs.h
@@ -183,7 +183,7 @@ struct f2fs_inode {
__le32 i_pino; /* parent inode number */
__le32 i_namelen; /* file name length */
__u8 i_name[F2FS_NAME_LEN]; /* file name for SPOR */
- __u8 i_reserved2; /* for backward compatibility */
+ __u8 i_dir_level; /* dentry_level for large dir */
struct f2fs_extent i_ext; /* caching a largest extent */