diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2024-01-10 16:34:17 -0800 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2024-01-10 16:34:17 -0800 |
commit | 999a36b52b1b11b2ca0590756e4f8cf21f2d9182 (patch) | |
tree | 1b51ea332b5258e51fc9209168f66246bf7b3875 /fs/bcachefs/fs.c | |
parent | 84e9a2d5517bf62edda74f382757aa173b8e45fd (diff) | |
parent | 169de41985f53320580f3d347534966ea83343ca (diff) |
Merge tag 'bcachefs-2024-01-10' of https://evilpiepirate.org/git/bcachefs
Pull bcachefs updates from Kent Overstreet:
- btree write buffer rewrite: instead of adding keys to the btree write
buffer at transaction commit time, we now journal them with a
different journal entry type and copy them from the journal to the
write buffer just prior to journal write.
This reduces the number of atomic operations on shared cachelines in
the transaction commit path and is a signicant performance
improvement on some workloads: multithreaded 4k random writes went
from ~650k iops to ~850k iops.
- Bring back optimistic spinning for six locks: the new implementation
doesn't use osq locks; instead we add to the lock waitlist as normal,
and then spin on the lock_acquired bit in the waitlist entry, _not_
the lock itself.
- New ioctls:
- BCH_IOCTL_DEV_USAGE_V2, which allows for new data types
- BCH_IOCTL_OFFLINE_FSCK, which runs the kernel implementation of
fsck but without mounting: useful for transparently using the
kernel version of fsck from 'bcachefs fsck' when the kernel
version is a better match for the on disk filesystem.
- BCH_IOCTL_ONLINE_FSCK: online fsck. Not all passes are supported
yet, but the passes that are supported are fully featured - errors
may be corrected as normal.
The new ioctls use the new 'thread_with_file' abstraction for kicking
off a kthread that's tied to a file descriptor returned to userspace
via the ioctl.
- btree_paths within a btree_trans are now dynamically growable,
instead of being limited to 64. This is important for the
check_directory_structure phase of fsck, and also fixes some issues
we were having with btree path overflow in the reflink btree.
- Trigger refactoring; prep work for the upcoming disk space accounting
rewrite
- Numerous bugfixes :)
* tag 'bcachefs-2024-01-10' of https://evilpiepirate.org/git/bcachefs: (226 commits)
bcachefs: eytzinger0_find() search should be const
bcachefs: move "ptrs not changing" optimization to bch2_trigger_extent()
bcachefs: fix simulateously upgrading & downgrading
bcachefs: Restart recovery passes more reliably
bcachefs: bch2_dump_bset() doesn't choke on u64s == 0
bcachefs: improve checksum error messages
bcachefs: improve validate_bset_keys()
bcachefs: print sb magic when relevant
bcachefs: __bch2_sb_field_to_text()
bcachefs: %pg is banished
bcachefs: Improve would_deadlock trace event
bcachefs: fsck_err()s don't need to manually check c->sb.version anymore
bcachefs: Upgrades now specify errors to fix, like downgrades
bcachefs: no thread_with_file in userspace
bcachefs: Don't autofix errors we can't fix
bcachefs: add missing bch2_latency_acct() call
bcachefs: increase max_active on io_complete_wq
bcachefs: add time_stats for btree_node_read_done()
bcachefs: don't clear accessed bit in btree node fill
bcachefs: Add an option to control btree node prefetching
...
Diffstat (limited to 'fs/bcachefs/fs.c')
-rw-r--r-- | fs/bcachefs/fs.c | 100 |
1 files changed, 33 insertions, 67 deletions
diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index c1895df1bffe..ec419b8e2c43 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -93,7 +93,7 @@ retry: BTREE_ITER_INTENT) ?: (set ? set(trans, inode, &inode_u, p) : 0) ?: bch2_inode_write(trans, &iter, &inode_u) ?: - bch2_trans_commit(trans, NULL, NULL, BTREE_INSERT_NOFAIL); + bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc); /* * the btree node lock protects inode->ei_inode, not ei_update_lock; @@ -455,7 +455,7 @@ int __bch2_unlink(struct inode *vdir, struct dentry *dentry, bch2_lock_inodes(INODE_UPDATE_LOCK, dir, inode); ret = commit_do(trans, NULL, NULL, - BTREE_INSERT_NOFAIL, + BCH_TRANS_COMMIT_no_enospc, bch2_unlink_trans(trans, inode_inum(dir), &dir_u, &inode_u, &dentry->d_name, @@ -729,7 +729,7 @@ retry: ret = bch2_inode_write(trans, &inode_iter, &inode_u) ?: bch2_trans_commit(trans, NULL, NULL, - BTREE_INSERT_NOFAIL); + BCH_TRANS_COMMIT_no_enospc); btree_err: bch2_trans_iter_exit(trans, &inode_iter); @@ -1012,15 +1012,13 @@ static int bch2_vfs_readdir(struct file *file, struct dir_context *ctx) { struct bch_inode_info *inode = file_bch_inode(file); struct bch_fs *c = inode->v.i_sb->s_fs_info; - int ret; if (!dir_emit_dots(file, ctx)) return 0; - ret = bch2_readdir(c, inode_inum(inode), ctx); - if (ret) - bch_err_fn(c, ret); + int ret = bch2_readdir(c, inode_inum(inode), ctx); + bch_err_fn(c, ret); return bch2_err_class(ret); } @@ -1500,7 +1498,7 @@ static void bch2_evict_inode(struct inode *vinode) void bch2_evict_subvolume_inodes(struct bch_fs *c, snapshot_id_list *s) { - struct bch_inode_info *inode, **i; + struct bch_inode_info *inode; DARRAY(struct bch_inode_info *) grabbed; bool clean_pass = false, this_pass_clean; @@ -1626,43 +1624,18 @@ static struct bch_fs *bch2_path_to_fs(const char *path) return c ?: ERR_PTR(-ENOENT); } -static char **split_devs(const char *_dev_name, unsigned *nr) -{ - char *dev_name = NULL, **devs = NULL, *s; - size_t i = 0, nr_devs = 0; - - dev_name = kstrdup(_dev_name, GFP_KERNEL); - if (!dev_name) - return NULL; - - for (s = dev_name; s; s = strchr(s + 1, ':')) - nr_devs++; - - devs = kcalloc(nr_devs + 1, sizeof(const char *), GFP_KERNEL); - if (!devs) { - kfree(dev_name); - return NULL; - } - - while ((s = strsep(&dev_name, ":"))) - devs[i++] = s; - - *nr = nr_devs; - return devs; -} - static int bch2_remount(struct super_block *sb, int *flags, char *data) { struct bch_fs *c = sb->s_fs_info; struct bch_opts opts = bch2_opts_empty(); int ret; - opt_set(opts, read_only, (*flags & SB_RDONLY) != 0); - ret = bch2_parse_mount_opts(c, &opts, data); if (ret) goto err; + opt_set(opts, read_only, (*flags & SB_RDONLY) != 0); + if (opts.read_only != c->opts.read_only) { down_write(&c->state_lock); @@ -1696,11 +1669,9 @@ err: static int bch2_show_devname(struct seq_file *seq, struct dentry *root) { struct bch_fs *c = root->d_sb->s_fs_info; - struct bch_dev *ca; - unsigned i; bool first = true; - for_each_online_member(ca, c, i) { + for_each_online_member(c, ca) { if (!first) seq_putc(seq, ':'); first = false; @@ -1770,7 +1741,7 @@ static int bch2_unfreeze(struct super_block *sb) struct bch_fs *c = sb->s_fs_info; int ret; - if (test_bit(BCH_FS_EMERGENCY_RO, &c->flags)) + if (test_bit(BCH_FS_emergency_ro, &c->flags)) return 0; down_write(&c->state_lock); @@ -1805,17 +1776,18 @@ static int bch2_noset_super(struct super_block *s, void *data) return -EBUSY; } +typedef DARRAY(struct bch_fs *) darray_fs; + static int bch2_test_super(struct super_block *s, void *data) { struct bch_fs *c = s->s_fs_info; - struct bch_fs **devs = data; - unsigned i; + darray_fs *d = data; if (!c) return false; - for (i = 0; devs[i]; i++) - if (c != devs[i]) + darray_for_each(*d, i) + if (c != *i) return false; return true; } @@ -1824,13 +1796,9 @@ static struct dentry *bch2_mount(struct file_system_type *fs_type, int flags, const char *dev_name, void *data) { struct bch_fs *c; - struct bch_dev *ca; struct super_block *sb; struct inode *vinode; struct bch_opts opts = bch2_opts_empty(); - char **devs; - struct bch_fs **devs_to_fs = NULL; - unsigned i, nr_devs; int ret; opt_set(opts, read_only, (flags & SB_RDONLY) != 0); @@ -1842,25 +1810,25 @@ static struct dentry *bch2_mount(struct file_system_type *fs_type, if (!dev_name || strlen(dev_name) == 0) return ERR_PTR(-EINVAL); - devs = split_devs(dev_name, &nr_devs); - if (!devs) - return ERR_PTR(-ENOMEM); + darray_str devs; + ret = bch2_split_devs(dev_name, &devs); + if (ret) + return ERR_PTR(ret); - devs_to_fs = kcalloc(nr_devs + 1, sizeof(void *), GFP_KERNEL); - if (!devs_to_fs) { - sb = ERR_PTR(-ENOMEM); - goto got_sb; + darray_fs devs_to_fs = {}; + darray_for_each(devs, i) { + ret = darray_push(&devs_to_fs, bch2_path_to_fs(*i)); + if (ret) { + sb = ERR_PTR(ret); + goto got_sb; + } } - for (i = 0; i < nr_devs; i++) - devs_to_fs[i] = bch2_path_to_fs(devs[i]); - - sb = sget(fs_type, bch2_test_super, bch2_noset_super, - flags|SB_NOSEC, devs_to_fs); + sb = sget(fs_type, bch2_test_super, bch2_noset_super, flags|SB_NOSEC, &devs_to_fs); if (!IS_ERR(sb)) goto got_sb; - c = bch2_fs_open(devs, nr_devs, opts); + c = bch2_fs_open(devs.data, devs.nr, opts); if (IS_ERR(c)) { sb = ERR_CAST(c); goto got_sb; @@ -1880,9 +1848,8 @@ static struct dentry *bch2_mount(struct file_system_type *fs_type, if (IS_ERR(sb)) bch2_fs_stop(c); got_sb: - kfree(devs_to_fs); - kfree(devs[0]); - kfree(devs); + darray_exit(&devs_to_fs); + bch2_darray_str_exit(&devs); if (IS_ERR(sb)) { ret = PTR_ERR(sb); @@ -1923,7 +1890,7 @@ got_sb: sb->s_bdi->ra_pages = VM_READAHEAD_PAGES; - for_each_online_member(ca, c, i) { + for_each_online_member(c, ca) { struct block_device *bdev = ca->disk_sb.bdev; /* XXX: create an anonymous device for multi device filesystems */ @@ -1944,10 +1911,9 @@ got_sb: vinode = bch2_vfs_inode_get(c, BCACHEFS_ROOT_SUBVOL_INUM); ret = PTR_ERR_OR_ZERO(vinode); - if (ret) { - bch_err_msg(c, ret, "mounting: error getting root inode"); + bch_err_msg(c, ret, "mounting: error getting root inode"); + if (ret) goto err_put_super; - } sb->s_root = d_make_root(vinode); if (!sb->s_root) { |