summaryrefslogtreecommitdiff
path: root/fs/bcachefs
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2024-10-05 15:18:04 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2024-10-05 15:18:04 -0700
commit8f602276d3902642fdc3429b548d73c745446601 (patch)
tree94efac93c954055e005c7d6b5f1ea95dd9fa4828 /fs/bcachefs
parentfc20a3e57247e21e1bd582f604b20bf898f7d111 (diff)
parent0f25eb4b60771f08fbcca878a8f7f88086d0c885 (diff)
Merge tag 'bcachefs-2024-10-05' of git://evilpiepirate.org/bcachefs
Pull bcachefs fixes from Kent Overstreet: "A lot of little fixes, bigger ones include: - bcachefs's __wait_on_freeing_inode() was broken in rc1 due to vfs changes, now fixed along with another lost wakeup - fragmentation LRU fixes; fsck now repairs successfully (this is the data structure copygc uses); along with some nice simplification. - Rework logged op error handling, so that if logged op replay errors (due to another filesystem error) we delete the logged op instead of going into an infinite loop) - Various small filesystem connectivitity repair fixes" * tag 'bcachefs-2024-10-05' of git://evilpiepirate.org/bcachefs: bcachefs: Rework logged op error handling bcachefs: Add warn param to subvol_get_snapshot, peek_inode bcachefs: Kill snapshot arg to fsck_write_inode() bcachefs: Check for unlinked, non-empty dirs in check_inode() bcachefs: Check for unlinked inodes with dirents bcachefs: Check for directories with no backpointers bcachefs: Kill alloc_v4.fragmentation_lru bcachefs: minor lru fsck fixes bcachefs: Mark more errors AUTOFIX bcachefs: Make sure we print error that causes fsck to bail out bcachefs: bkey errors are only AUTOFIX during read bcachefs: Create lost+found in correct snapshot bcachefs: Fix reattach_inode() bcachefs: Add missing wakeup to bch2_inode_hash_remove() bcachefs: Fix trans_commit disk accounting revert bcachefs: Fix bch2_inode_is_open() check bcachefs: Fix return type of dirent_points_to_inode_nowarn() bcachefs: Fix bad shift in bch2_read_flag_list()
Diffstat (limited to 'fs/bcachefs')
-rw-r--r--fs/bcachefs/alloc_background.c30
-rw-r--r--fs/bcachefs/alloc_background_format.h2
-rw-r--r--fs/bcachefs/btree_gc.c3
-rw-r--r--fs/bcachefs/btree_trans_commit.c3
-rw-r--r--fs/bcachefs/error.c23
-rw-r--r--fs/bcachefs/error.h9
-rw-r--r--fs/bcachefs/fs.c33
-rw-r--r--fs/bcachefs/fsck.c194
-rw-r--r--fs/bcachefs/inode.c44
-rw-r--r--fs/bcachefs/inode.h28
-rw-r--r--fs/bcachefs/io_misc.c63
-rw-r--r--fs/bcachefs/logged_ops.c16
-rw-r--r--fs/bcachefs/logged_ops.h2
-rw-r--r--fs/bcachefs/lru.c34
-rw-r--r--fs/bcachefs/move.c2
-rw-r--r--fs/bcachefs/movinggc.c12
-rw-r--r--fs/bcachefs/sb-errors_format.h33
-rw-r--r--fs/bcachefs/subvolume.c16
-rw-r--r--fs/bcachefs/subvolume.h2
-rw-r--r--fs/bcachefs/util.c2
20 files changed, 342 insertions, 209 deletions
diff --git a/fs/bcachefs/alloc_background.c b/fs/bcachefs/alloc_background.c
index 645b5ed4babb..4e4a448f6931 100644
--- a/fs/bcachefs/alloc_background.c
+++ b/fs/bcachefs/alloc_background.c
@@ -332,7 +332,6 @@ void bch2_alloc_v4_swab(struct bkey_s k)
a->io_time[1] = swab64(a->io_time[1]);
a->stripe = swab32(a->stripe);
a->nr_external_backpointers = swab32(a->nr_external_backpointers);
- a->fragmentation_lru = swab64(a->fragmentation_lru);
a->stripe_sectors = swab32(a->stripe_sectors);
bps = alloc_v4_backpointers(a);
@@ -347,6 +346,7 @@ void bch2_alloc_to_text(struct printbuf *out, struct bch_fs *c, struct bkey_s_c
{
struct bch_alloc_v4 _a;
const struct bch_alloc_v4 *a = bch2_alloc_to_v4(k, &_a);
+ struct bch_dev *ca = c ? bch2_dev_bucket_tryget_noerror(c, k.k->p) : NULL;
prt_newline(out);
printbuf_indent_add(out, 2);
@@ -364,9 +364,13 @@ void bch2_alloc_to_text(struct printbuf *out, struct bch_fs *c, struct bkey_s_c
prt_printf(out, "stripe_redundancy %u\n", a->stripe_redundancy);
prt_printf(out, "io_time[READ] %llu\n", a->io_time[READ]);
prt_printf(out, "io_time[WRITE] %llu\n", a->io_time[WRITE]);
- prt_printf(out, "fragmentation %llu\n", a->fragmentation_lru);
+
+ if (ca)
+ prt_printf(out, "fragmentation %llu\n", alloc_lru_idx_fragmentation(*a, ca));
prt_printf(out, "bp_start %llu\n", BCH_ALLOC_V4_BACKPOINTERS_START(a));
printbuf_indent_sub(out, 2);
+
+ bch2_dev_put(ca);
}
void __bch2_alloc_to_v4(struct bkey_s_c k, struct bch_alloc_v4 *out)
@@ -882,12 +886,13 @@ int bch2_trigger_alloc(struct btree_trans *trans,
goto err;
}
- new_a->fragmentation_lru = alloc_lru_idx_fragmentation(*new_a, ca);
- if (old_a->fragmentation_lru != new_a->fragmentation_lru) {
+ old_lru = alloc_lru_idx_fragmentation(*old_a, ca);
+ new_lru = alloc_lru_idx_fragmentation(*new_a, ca);
+ if (old_lru != new_lru) {
ret = bch2_lru_change(trans,
BCH_LRU_FRAGMENTATION_START,
bucket_to_u64(new.k->p),
- old_a->fragmentation_lru, new_a->fragmentation_lru);
+ old_lru, new_lru);
if (ret)
goto err;
}
@@ -1629,18 +1634,22 @@ static int bch2_check_alloc_to_lru_ref(struct btree_trans *trans,
if (ret)
return ret;
+ struct bch_dev *ca = bch2_dev_tryget_noerror(c, alloc_k.k->p.inode);
+ if (!ca)
+ return 0;
+
a = bch2_alloc_to_v4(alloc_k, &a_convert);
- if (a->fragmentation_lru) {
+ u64 lru_idx = alloc_lru_idx_fragmentation(*a, ca);
+ if (lru_idx) {
ret = bch2_lru_check_set(trans, BCH_LRU_FRAGMENTATION_START,
- a->fragmentation_lru,
- alloc_k, last_flushed);
+ lru_idx, alloc_k, last_flushed);
if (ret)
- return ret;
+ goto err;
}
if (a->data_type != BCH_DATA_cached)
- return 0;
+ goto err;
if (fsck_err_on(!a->io_time[READ],
trans, alloc_key_cached_but_read_time_zero,
@@ -1669,6 +1678,7 @@ static int bch2_check_alloc_to_lru_ref(struct btree_trans *trans,
goto err;
err:
fsck_err:
+ bch2_dev_put(ca);
printbuf_exit(&buf);
return ret;
}
diff --git a/fs/bcachefs/alloc_background_format.h b/fs/bcachefs/alloc_background_format.h
index f754a2951d8a..befdaa95c515 100644
--- a/fs/bcachefs/alloc_background_format.h
+++ b/fs/bcachefs/alloc_background_format.h
@@ -70,7 +70,7 @@ struct bch_alloc_v4 {
__u32 stripe;
__u32 nr_external_backpointers;
/* end of fields in original version of alloc_v4 */
- __u64 fragmentation_lru;
+ __u64 _fragmentation_lru; /* obsolete */
__u32 stripe_sectors;
__u32 pad;
} __packed __aligned(8);
diff --git a/fs/bcachefs/btree_gc.c b/fs/bcachefs/btree_gc.c
index 660d2fa02da2..771154e3a291 100644
--- a/fs/bcachefs/btree_gc.c
+++ b/fs/bcachefs/btree_gc.c
@@ -828,8 +828,6 @@ static int bch2_alloc_write_key(struct btree_trans *trans,
return ret;
}
- gc.fragmentation_lru = alloc_lru_idx_fragmentation(gc, ca);
-
if (fsck_err_on(new.data_type != gc.data_type,
trans, alloc_key_data_type_wrong,
"bucket %llu:%llu gen %u has wrong data_type"
@@ -857,7 +855,6 @@ static int bch2_alloc_write_key(struct btree_trans *trans,
copy_bucket_field(alloc_key_cached_sectors_wrong, cached_sectors);
copy_bucket_field(alloc_key_stripe_wrong, stripe);
copy_bucket_field(alloc_key_stripe_redundancy_wrong, stripe_redundancy);
- copy_bucket_field(alloc_key_fragmentation_lru_wrong, fragmentation_lru);
#undef copy_bucket_field
if (!bch2_alloc_v4_cmp(*old, new))
diff --git a/fs/bcachefs/btree_trans_commit.c b/fs/bcachefs/btree_trans_commit.c
index 1a74a1a252ee..9bf471fa4361 100644
--- a/fs/bcachefs/btree_trans_commit.c
+++ b/fs/bcachefs/btree_trans_commit.c
@@ -832,7 +832,8 @@ revert_fs_usage:
for (struct jset_entry *entry2 = trans->journal_entries;
entry2 != entry;
entry2 = vstruct_next(entry2))
- if (jset_entry_is_key(entry2) && entry2->start->k.type == KEY_TYPE_accounting) {
+ if (entry2->type == BCH_JSET_ENTRY_write_buffer_keys &&
+ entry2->start->k.type == KEY_TYPE_accounting) {
struct bkey_s_accounting a = bkey_i_to_s_accounting(entry2->start);
bch2_accounting_neg(a);
diff --git a/fs/bcachefs/error.c b/fs/bcachefs/error.c
index 3a16b535b6c3..7a79f695ba2e 100644
--- a/fs/bcachefs/error.c
+++ b/fs/bcachefs/error.c
@@ -393,6 +393,14 @@ int __bch2_fsck_err(struct bch_fs *c,
!(flags & FSCK_CAN_IGNORE)))
ret = -BCH_ERR_fsck_errors_not_fixed;
+ bool exiting =
+ test_bit(BCH_FS_fsck_running, &c->flags) &&
+ (ret != -BCH_ERR_fsck_fix &&
+ ret != -BCH_ERR_fsck_ignore);
+
+ if (exiting)
+ print = true;
+
if (print) {
if (bch2_fs_stdio_redirect(c))
bch2_print(c, "%s\n", out->buf);
@@ -400,9 +408,7 @@ int __bch2_fsck_err(struct bch_fs *c,
bch2_print_string_as_lines(KERN_ERR, out->buf);
}
- if (test_bit(BCH_FS_fsck_running, &c->flags) &&
- (ret != -BCH_ERR_fsck_fix &&
- ret != -BCH_ERR_fsck_ignore))
+ if (exiting)
bch_err(c, "Unable to continue, halting");
else if (suppressing)
bch_err(c, "Ratelimiting new instances of previous error");
@@ -430,10 +436,17 @@ err:
int __bch2_bkey_fsck_err(struct bch_fs *c,
struct bkey_s_c k,
- enum bch_fsck_flags flags,
+ enum bch_validate_flags validate_flags,
enum bch_sb_error_id err,
const char *fmt, ...)
{
+ if (validate_flags & BCH_VALIDATE_silent)
+ return -BCH_ERR_fsck_delete_bkey;
+
+ unsigned fsck_flags = 0;
+ if (!(validate_flags & (BCH_VALIDATE_write|BCH_VALIDATE_commit)))
+ fsck_flags |= FSCK_AUTOFIX|FSCK_CAN_FIX;
+
struct printbuf buf = PRINTBUF;
va_list args;
@@ -445,7 +458,7 @@ int __bch2_bkey_fsck_err(struct bch_fs *c,
va_end(args);
prt_str(&buf, ": delete?");
- int ret = __bch2_fsck_err(c, NULL, flags, err, "%s", buf.buf);
+ int ret = __bch2_fsck_err(c, NULL, fsck_flags, err, "%s", buf.buf);
printbuf_exit(&buf);
return ret;
}
diff --git a/fs/bcachefs/error.h b/fs/bcachefs/error.h
index 21ee7211b03e..6551ada926b6 100644
--- a/fs/bcachefs/error.h
+++ b/fs/bcachefs/error.h
@@ -167,10 +167,11 @@ void bch2_flush_fsck_errs(struct bch_fs *);
#define fsck_err_on(cond, c, _err_type, ...) \
__fsck_err_on(cond, c, FSCK_CAN_FIX|FSCK_CAN_IGNORE, _err_type, __VA_ARGS__)
+enum bch_validate_flags;
__printf(5, 6)
int __bch2_bkey_fsck_err(struct bch_fs *,
struct bkey_s_c,
- enum bch_fsck_flags,
+ enum bch_validate_flags,
enum bch_sb_error_id,
const char *, ...);
@@ -180,11 +181,7 @@ int __bch2_bkey_fsck_err(struct bch_fs *,
*/
#define bkey_fsck_err(c, _err_type, _err_msg, ...) \
do { \
- if ((flags & BCH_VALIDATE_silent)) { \
- ret = -BCH_ERR_fsck_delete_bkey; \
- goto fsck_err; \
- } \
- int _ret = __bch2_bkey_fsck_err(c, k, FSCK_CAN_FIX|FSCK_AUTOFIX,\
+ int _ret = __bch2_bkey_fsck_err(c, k, flags, \
BCH_FSCK_ERR_##_err_type, \
_err_msg, ##__VA_ARGS__); \
if (_ret != -BCH_ERR_fsck_fix && \
diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c
index 4a1bb07a2574..5bfc26d58270 100644
--- a/fs/bcachefs/fs.c
+++ b/fs/bcachefs/fs.c
@@ -174,20 +174,24 @@ static const struct rhashtable_params bch2_vfs_inodes_params = {
.automatic_shrinking = true,
};
-static void __wait_on_freeing_inode(struct inode *inode)
+struct bch_inode_info *__bch2_inode_hash_find(struct bch_fs *c, subvol_inum inum)
{
- wait_queue_head_t *wq;
- DEFINE_WAIT_BIT(wait, &inode->i_state, __I_NEW);
- wq = bit_waitqueue(&inode->i_state, __I_NEW);
- prepare_to_wait(wq, &wait.wq_entry, TASK_UNINTERRUPTIBLE);
- spin_unlock(&inode->i_lock);
- schedule();
- finish_wait(wq, &wait.wq_entry);
+ return rhashtable_lookup_fast(&c->vfs_inodes_table, &inum, bch2_vfs_inodes_params);
}
-struct bch_inode_info *__bch2_inode_hash_find(struct bch_fs *c, subvol_inum inum)
+static void __wait_on_freeing_inode(struct bch_fs *c,
+ struct bch_inode_info *inode,
+ subvol_inum inum)
{
- return rhashtable_lookup_fast(&c->vfs_inodes_table, &inum, bch2_vfs_inodes_params);
+ wait_queue_head_t *wq;
+ DEFINE_WAIT_BIT(wait, &inode->v.i_state, __I_NEW);
+ wq = inode_bit_waitqueue(&wait, &inode->v, __I_NEW);
+ prepare_to_wait(wq, &wait.wq_entry, TASK_UNINTERRUPTIBLE);
+ spin_unlock(&inode->v.i_lock);
+
+ if (__bch2_inode_hash_find(c, inum) == inode)
+ schedule_timeout(HZ * 10);
+ finish_wait(wq, &wait.wq_entry);
}
static struct bch_inode_info *bch2_inode_hash_find(struct bch_fs *c, struct btree_trans *trans,
@@ -204,10 +208,10 @@ repeat:
}
if ((inode->v.i_state & (I_FREEING|I_WILL_FREE))) {
if (!trans) {
- __wait_on_freeing_inode(&inode->v);
+ __wait_on_freeing_inode(c, inode, inum);
} else {
bch2_trans_unlock(trans);
- __wait_on_freeing_inode(&inode->v);
+ __wait_on_freeing_inode(c, inode, inum);
int ret = bch2_trans_relock(trans);
if (ret)
return ERR_PTR(ret);
@@ -232,6 +236,11 @@ static void bch2_inode_hash_remove(struct bch_fs *c, struct bch_inode_info *inod
&inode->hash, bch2_vfs_inodes_params);
BUG_ON(ret);
inode->v.i_hash.pprev = NULL;
+ /*
+ * This pairs with the bch2_inode_hash_find() ->
+ * __wait_on_freeing_inode() path
+ */
+ inode_wake_up_bit(&inode->v, __I_NEW);
}
}
diff --git a/fs/bcachefs/fsck.c b/fs/bcachefs/fsck.c
index 0d8b782b63fb..b8a6ceb0cc7a 100644
--- a/fs/bcachefs/fsck.c
+++ b/fs/bcachefs/fsck.c
@@ -28,8 +28,8 @@ static bool inode_points_to_dirent(struct bch_inode_unpacked *inode,
inode->bi_dir_offset == d.k->p.offset;
}
-static bool dirent_points_to_inode_nowarn(struct bkey_s_c_dirent d,
- struct bch_inode_unpacked *inode)
+static int dirent_points_to_inode_nowarn(struct bkey_s_c_dirent d,
+ struct bch_inode_unpacked *inode)
{
if (d.v->d_type == DT_SUBVOL
? le32_to_cpu(d.v->d_child_subvol) == inode->bi_subvol
@@ -137,16 +137,15 @@ found:
return ret;
}
-static int lookup_inode(struct btree_trans *trans, u64 inode_nr,
- struct bch_inode_unpacked *inode,
- u32 *snapshot)
+static int lookup_inode(struct btree_trans *trans, u64 inode_nr, u32 snapshot,
+ struct bch_inode_unpacked *inode)
{
struct btree_iter iter;
struct bkey_s_c k;
int ret;
k = bch2_bkey_get_iter(trans, &iter, BTREE_ID_inodes,
- SPOS(0, inode_nr, *snapshot), 0);
+ SPOS(0, inode_nr, snapshot), 0);
ret = bkey_err(k);
if (ret)
goto err;
@@ -154,8 +153,6 @@ static int lookup_inode(struct btree_trans *trans, u64 inode_nr,
ret = bkey_is_inode(k.k)
? bch2_inode_unpack(k, inode)
: -BCH_ERR_ENOENT_inode;
- if (!ret)
- *snapshot = iter.pos.snapshot;
err:
bch2_trans_iter_exit(trans, &iter);
return ret;
@@ -250,8 +247,7 @@ static int lookup_lostfound(struct btree_trans *trans, u32 snapshot,
struct bch_inode_unpacked root_inode;
struct bch_hash_info root_hash_info;
- u32 root_inode_snapshot = snapshot;
- ret = lookup_inode(trans, root_inum.inum, &root_inode, &root_inode_snapshot);
+ ret = lookup_inode(trans, root_inum.inum, snapshot, &root_inode);
bch_err_msg(c, ret, "looking up root inode %llu for subvol %u",
root_inum.inum, le32_to_cpu(st.master_subvol));
if (ret)
@@ -277,17 +273,23 @@ static int lookup_lostfound(struct btree_trans *trans, u32 snapshot,
* The bch2_check_dirents pass has already run, dangling dirents
* shouldn't exist here:
*/
- ret = lookup_inode(trans, inum, lostfound, &snapshot);
+ ret = lookup_inode(trans, inum, snapshot, lostfound);
bch_err_msg(c, ret, "looking up lost+found %llu:%u in (root inode %llu, snapshot root %u)",
inum, snapshot, root_inum.inum, bch2_snapshot_root(c, snapshot));
return ret;
create_lostfound:
/*
+ * we always create lost+found in the root snapshot; we don't want
+ * different branches of the snapshot tree to have different lost+found
+ */
+ snapshot = le32_to_cpu(st.root_snapshot);
+ /*
* XXX: we could have a nicer log message here if we had a nice way to
* walk backpointers to print a path
*/
- bch_notice(c, "creating lost+found in snapshot %u", le32_to_cpu(st.root_snapshot));
+ bch_notice(c, "creating lost+found in subvol %llu snapshot %u",
+ root_inum.subvol, le32_to_cpu(st.root_snapshot));
u64 now = bch2_current_time(c);
struct btree_iter lostfound_iter = { NULL };
@@ -296,6 +298,7 @@ create_lostfound:
bch2_inode_init_early(c, lostfound);
bch2_inode_init_late(lostfound, now, 0, 0, S_IFDIR|0700, 0, &root_inode);
lostfound->bi_dir = root_inode.bi_inum;
+ lostfound->bi_snapshot = le32_to_cpu(st.root_snapshot);
root_inode.bi_nlink++;
@@ -323,9 +326,7 @@ err:
return ret;
}
-static int reattach_inode(struct btree_trans *trans,
- struct bch_inode_unpacked *inode,
- u32 inode_snapshot)
+static int reattach_inode(struct btree_trans *trans, struct bch_inode_unpacked *inode)
{
struct bch_fs *c = trans->c;
struct bch_hash_info dir_hash;
@@ -333,7 +334,7 @@ static int reattach_inode(struct btree_trans *trans,
char name_buf[20];
struct qstr name;
u64 dir_offset = 0;
- u32 dirent_snapshot = inode_snapshot;
+ u32 dirent_snapshot = inode->bi_snapshot;
int ret;
if (inode->bi_subvol) {
@@ -354,14 +355,18 @@ static int reattach_inode(struct btree_trans *trans,
if (ret)
return ret;
- if (S_ISDIR(inode->bi_mode)) {
- lostfound.bi_nlink++;
+ lostfound.bi_nlink += S_ISDIR(inode->bi_mode);
- ret = __bch2_fsck_write_inode(trans, &lostfound, U32_MAX);
- if (ret)
- return ret;
+ /* ensure lost+found inode is also present in inode snapshot */
+ if (!inode->bi_subvol) {
+ BUG_ON(!bch2_snapshot_is_ancestor(c, inode->bi_snapshot, lostfound.bi_snapshot));
+ lostfound.bi_snapshot = inode->bi_snapshot;
}
+ ret = __bch2_fsck_write_inode(trans, &lostfound);
+ if (ret)
+ return ret;
+
dir_hash = bch2_hash_info_init(c, &lostfound);
name = (struct qstr) QSTR(name_buf);
@@ -383,7 +388,7 @@ static int reattach_inode(struct btree_trans *trans,
inode->bi_dir = lostfound.bi_inum;
inode->bi_dir_offset = dir_offset;
- return __bch2_fsck_write_inode(trans, inode, inode_snapshot);
+ return __bch2_fsck_write_inode(trans, inode);
}
static int remove_backpointer(struct btree_trans *trans,
@@ -422,7 +427,7 @@ static int reattach_subvol(struct btree_trans *trans, struct bkey_s_c_subvolume
if (ret)
return ret;
- ret = reattach_inode(trans, &inode, le32_to_cpu(s.v->snapshot));
+ ret = reattach_inode(trans, &inode);
bch_err_msg(c, ret, "reattaching inode %llu", inode.bi_inum);
return ret;
}
@@ -540,8 +545,9 @@ static int reconstruct_inode(struct btree_trans *trans, enum btree_id btree, u32
bch2_inode_init_late(&new_inode, bch2_current_time(c), 0, 0, i_mode|0600, 0, NULL);
new_inode.bi_size = i_size;
new_inode.bi_inum = inum;
+ new_inode.bi_snapshot = snapshot;
- return __bch2_fsck_write_inode(trans, &new_inode, snapshot);
+ return __bch2_fsck_write_inode(trans, &new_inode);
}
struct snapshots_seen {
@@ -1024,6 +1030,7 @@ static int check_inode(struct btree_trans *trans,
bool full)
{
struct bch_fs *c = trans->c;
+ struct printbuf buf = PRINTBUF;
struct bch_inode_unpacked u;
bool do_update = false;
int ret;
@@ -1057,7 +1064,41 @@ static int check_inode(struct btree_trans *trans,
trans, inode_snapshot_mismatch,
"inodes in different snapshots don't match")) {
bch_err(c, "repair not implemented yet");
- return -BCH_ERR_fsck_repair_unimplemented;
+ ret = -BCH_ERR_fsck_repair_unimplemented;
+ goto err_noprint;
+ }
+
+ if (u.bi_dir || u.bi_dir_offset) {
+ ret = check_inode_dirent_inode(trans, &u, &do_update);
+ if (ret)
+ goto err;
+ }
+
+ if (fsck_err_on(u.bi_dir && (u.bi_flags & BCH_INODE_unlinked),
+ trans, inode_unlinked_but_has_dirent,
+ "inode unlinked but has dirent\n%s",
+ (printbuf_reset(&buf),
+ bch2_inode_unpacked_to_text(&buf, &u),
+ buf.buf))) {
+ u.bi_flags &= ~BCH_INODE_unlinked;
+ do_update = true;
+ }
+
+ if (S_ISDIR(u.bi_mode) && (u.bi_flags & BCH_INODE_unlinked)) {
+ /* Check for this early so that check_unreachable_inode() will reattach it */
+
+ ret = bch2_empty_dir_snapshot(trans, k.k->p.offset, 0, k.k->p.snapshot);
+ if (ret && ret != -BCH_ERR_ENOTEMPTY_dir_not_empty)
+ goto err;
+
+ fsck_err_on(ret, trans, inode_dir_unlinked_but_not_empty,
+ "dir unlinked but not empty\n%s",
+ (printbuf_reset(&buf),
+ bch2_inode_unpacked_to_text(&buf, &u),
+ buf.buf));
+ u.bi_flags &= ~BCH_INODE_unlinked;
+ do_update = true;
+ ret = 0;
}
if ((u.bi_flags & (BCH_INODE_i_size_dirty|BCH_INODE_unlinked)) &&
@@ -1070,15 +1111,15 @@ static int check_inode(struct btree_trans *trans,
u.bi_flags &= ~BCH_INODE_i_size_dirty|BCH_INODE_unlinked;
- ret = __bch2_fsck_write_inode(trans, &u, iter->pos.snapshot);
+ ret = __bch2_fsck_write_inode(trans, &u);
bch_err_msg(c, ret, "in fsck updating inode");
if (ret)
- return ret;
+ goto err_noprint;
if (!bpos_eq(new_min_pos, POS_MIN))
bch2_btree_iter_set_pos(iter, bpos_predecessor(new_min_pos));
- return 0;
+ goto err_noprint;
}
if (u.bi_flags & BCH_INODE_unlinked) {
@@ -1095,7 +1136,7 @@ static int check_inode(struct btree_trans *trans,
*/
ret = check_inode_deleted_list(trans, k.k->p);
if (ret < 0)
- return ret;
+ goto err_noprint;
fsck_err_on(!ret,
trans, unlinked_inode_not_on_deleted_list,
@@ -1106,13 +1147,13 @@ static int check_inode(struct btree_trans *trans,
if (ret)
goto err;
} else {
- if (fsck_err_on(bch2_inode_is_open(c, k.k->p),
+ if (fsck_err_on(!bch2_inode_is_open(c, k.k->p),
trans, inode_unlinked_and_not_open,
"inode %llu%u unlinked and not open",
u.bi_inum, u.bi_snapshot)) {
ret = bch2_inode_rm_snapshot(trans, u.bi_inum, iter->pos.snapshot);
bch_err_msg(c, ret, "in fsck deleting inode");
- return ret;
+ goto err_noprint;
}
}
}
@@ -1177,12 +1218,6 @@ static int check_inode(struct btree_trans *trans,
do_update = true;
}
- if (u.bi_dir || u.bi_dir_offset) {
- ret = check_inode_dirent_inode(trans, &u, &do_update);
- if (ret)
- goto err;
- }
-
if (fsck_err_on(u.bi_parent_subvol &&
(u.bi_subvol == 0 ||
u.bi_subvol == BCACHEFS_ROOT_SUBVOL),
@@ -1224,14 +1259,16 @@ static int check_inode(struct btree_trans *trans,
}
do_update:
if (do_update) {
- ret = __bch2_fsck_write_inode(trans, &u, iter->pos.snapshot);
+ ret = __bch2_fsck_write_inode(trans, &u);
bch_err_msg(c, ret, "in fsck updating inode");
if (ret)
- return ret;
+ goto err_noprint;
}
err:
fsck_err:
bch_err_fn(c, ret);
+err_noprint:
+ printbuf_exit(&buf);
return ret;
}
@@ -1347,7 +1384,7 @@ static int check_i_sectors_notnested(struct btree_trans *trans, struct inode_wal
w->last_pos.inode, i->snapshot,
i->inode.bi_sectors, i->count)) {
i->inode.bi_sectors = i->count;
- ret = bch2_fsck_write_inode(trans, &i->inode, i->snapshot);
+ ret = bch2_fsck_write_inode(trans, &i->inode);
if (ret)
break;
}
@@ -1789,7 +1826,7 @@ static int check_subdir_count_notnested(struct btree_trans *trans, struct inode_
"directory %llu:%u with wrong i_nlink: got %u, should be %llu",
w->last_pos.inode, i->snapshot, i->inode.bi_nlink, i->count)) {
i->inode.bi_nlink = i->count;
- ret = bch2_fsck_write_inode(trans, &i->inode, i->snapshot);
+ ret = bch2_fsck_write_inode(trans, &i->inode);
if (ret)
break;
}
@@ -1810,8 +1847,7 @@ noinline_for_stack
static int check_dirent_inode_dirent(struct btree_trans *trans,
struct btree_iter *iter,
struct bkey_s_c_dirent d,
- struct bch_inode_unpacked *target,
- u32 target_snapshot)
+ struct bch_inode_unpacked *target)
{
struct bch_fs *c = trans->c;
struct printbuf buf = PRINTBUF;
@@ -1821,6 +1857,32 @@ static int check_dirent_inode_dirent(struct btree_trans *trans,
if (inode_points_to_dirent(target, d))
return 0;
+ if (!target->bi_dir &&
+ !target->bi_dir_offset) {
+ fsck_err_on(S_ISDIR(target->bi_mode),
+ trans, inode_dir_missing_backpointer,
+ "directory with missing backpointer\n%s",
+ (printbuf_reset(&buf),
+ bch2_bkey_val_to_text(&buf, c, d.s_c),
+ prt_printf(&buf, "\n"),
+ bch2_inode_unpacked_to_text(&buf, target),
+ buf.buf));
+
+ fsck_err_on(target->bi_flags & BCH_INODE_unlinked,
+ trans, inode_unlinked_but_has_dirent,
+ "inode unlinked but has dirent\n%s",
+ (printbuf_reset(&buf),
+ bch2_bkey_val_to_text(&buf, c, d.s_c),
+ prt_printf(&buf, "\n"),
+ bch2_inode_unpacked_to_text(&buf, target),
+ buf.buf));
+
+ target->bi_flags &= ~BCH_INODE_unlinked;
+ target->bi_dir = d.k->p.inode;
+ target->bi_dir_offset = d.k->p.offset;
+ return __bch2_fsck_write_inode(trans, target);
+ }
+
if (bch2_inode_should_have_bp(target) &&
!fsck_err(trans, inode_wrong_backpointer,
"dirent points to inode that does not point back:\n %s",
@@ -1830,15 +1892,8 @@ static int check_dirent_inode_dirent(struct btree_trans *trans,
buf.buf)))
goto err;
- if (!target->bi_dir &&
- !target->bi_dir_offset) {
- target->bi_dir = d.k->p.inode;
- target->bi_dir_offset = d.k->p.offset;
- return __bch2_fsck_write_inode(trans, target, target_snapshot);
- }
-
struct bkey_s_c_dirent bp_dirent = dirent_get_by_pos(trans, &bp_iter,
- SPOS(target->bi_dir, target->bi_dir_offset, target_snapshot));
+ SPOS(target->bi_dir, target->bi_dir_offset, target->bi_snapshot));
ret = bkey_err(bp_dirent);
if (ret && !bch2_err_matches(ret, ENOENT))
goto err;
@@ -1851,14 +1906,14 @@ static int check_dirent_inode_dirent(struct btree_trans *trans,
"inode %llu:%u has wrong backpointer:\n"
"got %llu:%llu\n"
"should be %llu:%llu",
- target->bi_inum, target_snapshot,
+ target->bi_inum, target->bi_snapshot,
target->bi_dir,
target->bi_dir_offset,
d.k->p.inode,
d.k->p.offset)) {
target->bi_dir = d.k->p.inode;
target->bi_dir_offset = d.k->p.offset;
- ret = __bch2_fsck_write_inode(trans, target, target_snapshot);
+ ret = __bch2_fsck_write_inode(trans, target);
goto out;
}
@@ -1873,7 +1928,7 @@ static int check_dirent_inode_dirent(struct btree_trans *trans,
trans, inode_dir_multiple_links,
"%s %llu:%u with multiple links\n%s",
S_ISDIR(target->bi_mode) ? "directory" : "subvolume",
- target->bi_inum, target_snapshot, buf.buf)) {
+ target->bi_inum, target->bi_snapshot, buf.buf)) {
ret = __remove_dirent(trans, d.k->p);
goto out;
}
@@ -1886,10 +1941,10 @@ static int check_dirent_inode_dirent(struct btree_trans *trans,
if (fsck_err_on(backpointer_exists && !target->bi_nlink,
trans, inode_multiple_links_but_nlink_0,
"inode %llu:%u type %s has multiple links but i_nlink 0\n%s",
- target->bi_inum, target_snapshot, bch2_d_types[d.v->d_type], buf.buf)) {
+ target->bi_inum, target->bi_snapshot, bch2_d_types[d.v->d_type], buf.buf)) {
target->bi_nlink++;
target->bi_flags &= ~BCH_INODE_unlinked;
- ret = __bch2_fsck_write_inode(trans, target, target_snapshot);
+ ret = __bch2_fsck_write_inode(trans, target);
if (ret)
goto err;
}
@@ -1906,15 +1961,14 @@ noinline_for_stack
static int check_dirent_target(struct btree_trans *trans,
struct btree_iter *iter,
struct bkey_s_c_dirent d,
- struct bch_inode_unpacked *target,
- u32 target_snapshot)
+ struct bch_inode_unpacked *target)
{
struct bch_fs *c = trans->c;
struct bkey_i_dirent *n;
struct printbuf buf = PRINTBUF;
int ret = 0;
- ret = check_dirent_inode_dirent(trans, iter, d, target, target_snapshot);
+ ret = check_dirent_inode_dirent(trans, iter, d, target);
if (ret)
goto err;
@@ -2073,7 +2127,7 @@ static int check_dirent_to_subvol(struct btree_trans *trans, struct btree_iter *
u64 target_inum = le64_to_cpu(s.v->inode);
u32 target_snapshot = le32_to_cpu(s.v->snapshot);
- ret = lookup_inode(trans, target_inum, &subvol_root, &target_snapshot);
+ ret = lookup_inode(trans, target_inum, target_snapshot, &subvol_root);
if (ret && !bch2_err_matches(ret, ENOENT))
goto err;
@@ -2089,13 +2143,13 @@ static int check_dirent_to_subvol(struct btree_trans *trans, struct btree_iter *
target_inum,
subvol_root.bi_parent_subvol, parent_subvol)) {
subvol_root.bi_parent_subvol = parent_subvol;
- ret = __bch2_fsck_write_inode(trans, &subvol_root, target_snapshot);
+ subvol_root.bi_snapshot = le32_to_cpu(s.v->snapshot);
+ ret = __bch2_fsck_write_inode(trans, &subvol_root);
if (ret)
goto err;
}
- ret = check_dirent_target(trans, iter, d, &subvol_root,
- target_snapshot);
+ ret = check_dirent_target(trans, iter, d, &subvol_root);
if (ret)
goto err;
out:
@@ -2188,8 +2242,7 @@ static int check_dirent(struct btree_trans *trans, struct btree_iter *iter,
}
darray_for_each(target->inodes, i) {
- ret = check_dirent_target(trans, iter, d,
- &i->inode, i->snapshot);
+ ret = check_dirent_target(trans, iter, d, &i->inode);
if (ret)
goto err;
}
@@ -2330,7 +2383,7 @@ static int check_root_trans(struct btree_trans *trans)
goto err;
}
- ret = lookup_inode(trans, BCACHEFS_ROOT_INO, &root_inode, &snapshot);
+ ret = lookup_inode(trans, BCACHEFS_ROOT_INO, snapshot, &root_inode);
if (ret && !bch2_err_matches(ret, ENOENT))
return ret;
@@ -2343,8 +2396,9 @@ static int check_root_trans(struct btree_trans *trans)
bch2_inode_init(c, &root_inode, 0, 0, S_IFDIR|0755,
0, NULL);
root_inode.bi_inum = inum;
+ root_inode.bi_snapshot = snapshot;
- ret = __bch2_fsck_write_inode(trans, &root_inode, snapshot);
+ ret = __bch2_fsck_write_inode(trans, &root_inode);
bch_err_msg(c, ret, "writing root inode");
}
err:
@@ -2511,7 +2565,7 @@ static int check_path(struct btree_trans *trans, pathbuf *p, struct bkey_s_c ino
(printbuf_reset(&buf),
bch2_bkey_val_to_text(&buf, c, inode_k),
buf.buf)))
- ret = reattach_inode(trans, &inode, snapshot);
+ ret = reattach_inode(trans, &inode);
goto out;
}
@@ -2557,7 +2611,7 @@ static int check_path(struct btree_trans *trans, pathbuf *p, struct bkey_s_c ino
if (ret)
break;
- ret = reattach_inode(trans, &inode, snapshot);
+ ret = reattach_inode(trans, &inode);
bch_err_msg(c, ret, "reattaching inode %llu", inode.bi_inum);
}
break;
@@ -2787,7 +2841,7 @@ static int check_nlinks_update_inode(struct btree_trans *trans, struct btree_ite
u.bi_inum, bch2_d_types[mode_to_type(u.bi_mode)],
bch2_inode_nlink_get(&u), link->count)) {
bch2_inode_nlink_set(&u, link->count);
- ret = __bch2_fsck_write_inode(trans, &u, k.k->p.snapshot);
+ ret = __bch2_fsck_write_inode(trans, &u);
}
fsck_err:
return ret;
diff --git a/fs/bcachefs/inode.c b/fs/bcachefs/inode.c
index f414c69acc44..3e5bc01961b8 100644
--- a/fs/bcachefs/inode.c
+++ b/fs/bcachefs/inode.c
@@ -327,22 +327,20 @@ int bch2_inode_unpack(struct bkey_s_c k,
: bch2_inode_unpack_slowpath(k, unpacked);
}
-int bch2_inode_peek_nowarn(struct btree_trans *trans,
- struct btree_iter *iter,
- struct bch_inode_unpacked *inode,
- subvol_inum inum, unsigned flags)
+int __bch2_inode_peek(struct btree_trans *trans,
+ struct btree_iter *iter,
+ struct bch_inode_unpacked *inode,
+ subvol_inum inum, unsigned flags,
+ bool warn)
{
- struct bkey_s_c k;
u32 snapshot;
- int ret;
-
- ret = bch2_subvolume_get_snapshot(trans, inum.subvol, &snapshot);
+ int ret = __bch2_subvolume_get_snapshot(trans, inum.subvol, &snapshot, warn);
if (ret)
return ret;
- k = bch2_bkey_get_iter(trans, iter, BTREE_ID_inodes,
- SPOS(0, inum.inum, snapshot),
- flags|BTREE_ITER_cached);
+ struct bkey_s_c k = bch2_bkey_get_iter(trans, iter, BTREE_ID_inodes,
+ SPOS(0, inum.inum, snapshot),
+ flags|BTREE_ITER_cached);
ret = bkey_err(k);
if (ret)
return ret;
@@ -357,20 +355,12 @@ int bch2_inode_peek_nowarn(struct btree_trans *trans,
return 0;
err:
+ if (warn)
+ bch_err_msg(trans->c, ret, "looking up inum %llu:%llu:", inum.subvol, inum.inum);
bch2_trans_iter_exit(trans, iter);
return ret;
}
-int bch2_inode_peek(struct btree_trans *trans,
- struct btree_iter *iter,
- struct bch_inode_unpacked *inode,
- subvol_inum inum, unsigned flags)
-{
- int ret = bch2_inode_peek_nowarn(trans, iter, inode, inum, flags);
- bch_err_msg(trans->c, ret, "looking up inum %llu:%llu:", inum.subvol, inum.inum);
- return ret;
-}
-
int bch2_inode_write_flags(struct btree_trans *trans,
struct btree_iter *iter,
struct bch_inode_unpacked *inode,
@@ -387,9 +377,7 @@ int bch2_inode_write_flags(struct btree_trans *trans,
return bch2_trans_update(trans, iter, &inode_p->inode.k_i, flags);
}
-int __bch2_fsck_write_inode(struct btree_trans *trans,
- struct bch_inode_unpacked *inode,
- u32 snapshot)
+int __bch2_fsck_write_inode(struct btree_trans *trans, struct bch_inode_unpacked *inode)
{
struct bkey_inode_buf *inode_p =
bch2_trans_kmalloc(trans, sizeof(*inode_p));
@@ -398,19 +386,17 @@ int __bch2_fsck_write_inode(struct btree_trans *trans,
return PTR_ERR(inode_p);
bch2_inode_pack(inode_p, inode);
- inode_p->inode.k.p.snapshot = snapshot;
+ inode_p->inode.k.p.snapshot = inode->bi_snapshot;
return bch2_btree_insert_nonextent(trans, BTREE_ID_inodes,
&inode_p->inode.k_i,
BTREE_UPDATE_internal_snapshot_node);
}
-int bch2_fsck_write_inode(struct btree_trans *trans,
- struct bch_inode_unpacked *inode,
- u32 snapshot)
+int bch2_fsck_write_inode(struct btree_trans *trans, struct bch_inode_unpacked *inode)
{
int ret = commit_do(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc,
- __bch2_fsck_write_inode(trans, inode, snapshot));
+ __bch2_fsck_write_inode(trans, inode));
bch_err_fn(trans->c, ret);
return ret;
}
diff --git a/fs/bcachefs/inode.h b/fs/bcachefs/inode.h
index 695abd707cb6..9c1f67705684 100644
--- a/fs/bcachefs/inode.h
+++ b/fs/bcachefs/inode.h
@@ -97,10 +97,26 @@ struct bkey_i *bch2_inode_to_v3(struct btree_trans *, struct bkey_i *);
void bch2_inode_unpacked_to_text(struct printbuf *, struct bch_inode_unpacked *);
-int bch2_inode_peek_nowarn(struct btree_trans *, struct btree_iter *,
- struct bch_inode_unpacked *, subvol_inum, unsigned);
-int bch2_inode_peek(struct btree_trans *, struct btree_iter *,
- struct bch_inode_unpacked *, subvol_inum, unsigned);
+int __bch2_inode_peek(struct btree_trans *, struct btree_iter *,
+ struct bch_inode_unpacked *, subvol_inum, unsigned, bool);
+
+static inline int bch2_inode_peek_nowarn(struct btree_trans *trans,
+ struct btree_iter *iter,
+ struct bch_inode_unpacked *inode,
+ subvol_inum inum, unsigned flags)
+{
+ return __bch2_inode_peek(trans, iter, inode, inum, flags, false);
+}
+
+static inline int bch2_inode_peek(struct btree_trans *trans,
+ struct btree_iter *iter,
+ struct bch_inode_unpacked *inode,
+ subvol_inum inum, unsigned flags)
+{
+ return __bch2_inode_peek(trans, iter, inode, inum, flags, true);
+ int ret = bch2_inode_peek_nowarn(trans, iter, inode, inum, flags);
+ return ret;
+}
int bch2_inode_write_flags(struct btree_trans *, struct btree_iter *,
struct bch_inode_unpacked *, enum btree_iter_update_trigger_flags);
@@ -112,8 +128,8 @@ static inline int bch2_inode_write(struct btree_trans *trans,
return bch2_inode_write_flags(trans, iter, inode, 0);
}
-int __bch2_fsck_write_inode(struct btree_trans *, struct bch_inode_unpacked *, u32);
-int bch2_fsck_write_inode(struct btree_trans *, struct bch_inode_unpacked *, u32);
+int __bch2_fsck_write_inode(struct btree_trans *, struct bch_inode_unpacked *);
+int bch2_fsck_write_inode(struct btree_trans *, struct bch_inode_unpacked *);
void bch2_inode_init_early(struct bch_fs *,
struct bch_inode_unpacked *);
diff --git a/fs/bcachefs/io_misc.c b/fs/bcachefs/io_misc.c
index 177ed331c00b..307ed0a45184 100644
--- a/fs/bcachefs/io_misc.c
+++ b/fs/bcachefs/io_misc.c
@@ -224,13 +224,14 @@ void bch2_logged_op_truncate_to_text(struct printbuf *out, struct bch_fs *c, str
static int truncate_set_isize(struct btree_trans *trans,
subvol_inum inum,
- u64 new_i_size)
+ u64 new_i_size,
+ bool warn)
{
struct btree_iter iter = { NULL };
struct bch_inode_unpacked inode_u;
int ret;
- ret = bch2_inode_peek(trans, &iter, &inode_u, inum, BTREE_ITER_intent) ?:
+ ret = __bch2_inode_peek(trans, &iter, &inode_u, inum, BTREE_ITER_intent, warn) ?:
(inode_u.bi_size = new_i_size, 0) ?:
bch2_inode_write(trans, &iter, &inode_u);
@@ -247,10 +248,11 @@ static int __bch2_resume_logged_op_truncate(struct btree_trans *trans,
struct bkey_i_logged_op_truncate *op = bkey_i_to_logged_op_truncate(op_k);
subvol_inum inum = { le32_to_cpu(op->v.subvol), le64_to_cpu(op->v.inum) };
u64 new_i_size = le64_to_cpu(op->v.new_i_size);
+ bool warn_errors = i_sectors_delta != NULL;
int ret;
ret = commit_do(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc,
- truncate_set_isize(trans, inum, new_i_size));
+ truncate_set_isize(trans, inum, new_i_size, i_sectors_delta != NULL));
if (ret)
goto err;
@@ -263,8 +265,8 @@ static int __bch2_resume_logged_op_truncate(struct btree_trans *trans,
if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
ret = 0;
err:
- bch2_logged_op_finish(trans, op_k);
- bch_err_fn(c, ret);
+ if (warn_errors)
+ bch_err_fn(c, ret);
return ret;
}
@@ -288,9 +290,14 @@ int bch2_truncate(struct bch_fs *c, subvol_inum inum, u64 new_i_size, u64 *i_sec
* resume only proceeding in one of the snapshots
*/
down_read(&c->snapshot_create_lock);
- int ret = bch2_trans_run(c,
- bch2_logged_op_start(trans, &op.k_i) ?:
- __bch2_resume_logged_op_truncate(trans, &op.k_i, i_sectors_delta));
+ struct btree_trans *trans = bch2_trans_get(c);
+ int ret = bch2_logged_op_start(trans, &op.k_i);
+ if (ret)
+ goto out;
+ ret = __bch2_resume_logged_op_truncate(trans, &op.k_i, i_sectors_delta);
+ ret = bch2_logged_op_finish(trans, &op.k_i) ?: ret;
+out:
+ bch2_trans_put(trans);
up_read(&c->snapshot_create_lock);
return ret;
@@ -308,7 +315,8 @@ void bch2_logged_op_finsert_to_text(struct printbuf *out, struct bch_fs *c, stru
prt_printf(out, " src_offset=%llu", le64_to_cpu(op.v->src_offset));
}
-static int adjust_i_size(struct btree_trans *trans, subvol_inum inum, u64 offset, s64 len)
+static int adjust_i_size(struct btree_trans *trans, subvol_inum inum,
+ u64 offset, s64 len, bool warn)
{
struct btree_iter iter;
struct bch_inode_unpacked inode_u;
@@ -317,7 +325,7 @@ static int adjust_i_size(struct btree_trans *trans, subvol_inum inum, u64 offset
offset <<= 9;
len <<= 9;
- ret = bch2_inode_peek(trans, &iter, &inode_u, inum, BTREE_ITER_intent);
+ ret = __bch2_inode_peek(trans, &iter, &inode_u, inum, BTREE_ITER_intent, warn);
if (ret)
return ret;
@@ -357,12 +365,22 @@ static int __bch2_resume_logged_op_finsert(struct btree_trans *trans,
u64 len = abs(shift);
u64 pos = le64_to_cpu(op->v.pos);
bool insert = shift > 0;
+ u32 snapshot;
+ bool warn_errors = i_sectors_delta != NULL;
int ret = 0;
ret = bch2_inum_opts_get(trans, inum, &opts);
if (ret)
return ret;
+ /*
+ * check for missing subvolume before fpunch, as in resume we don't want
+ * it to be a fatal error
+ */
+ ret = __bch2_subvolume_get_snapshot(trans, inum.subvol, &snapshot, warn_errors);
+ if (ret)
+ return ret;
+
bch2_trans_iter_init(trans, &iter, BTREE_ID_extents,
POS(inum.inum, 0),
BTREE_ITER_intent);
@@ -373,7 +391,7 @@ case LOGGED_OP_FINSERT_start:
if (insert) {
ret = commit_do(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc,
- adjust_i_size(trans, inum, src_offset, len) ?:
+ adjust_i_size(trans, inum, src_offset, len, warn_errors) ?:
bch2_logged_op_update(trans, &op->k_i));
if (ret)
goto err;
@@ -396,11 +414,11 @@ case LOGGED_OP_FINSERT_shift_extents:
struct bkey_i delete, *copy;
struct bkey_s_c k;
struct bpos src_pos = POS(inum.inum, src_offset);
- u32 snapshot;
bch2_trans_begin(trans);
- ret = bch2_subvolume_get_snapshot(trans, inum.subvol, &snapshot);
+ ret = __bch2_subvolume_get_snapshot(trans, inum.subvol, &snapshot,
+ warn_errors);
if (ret)
goto btree_err;
@@ -463,12 +481,12 @@ btree_err:
if (!insert) {
ret = commit_do(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc,
- adjust_i_size(trans, inum, src_offset, shift) ?:
+ adjust_i_size(trans, inum, src_offset, shift, warn_errors) ?:
bch2_logged_op_update(trans, &op->k_i));
} else {
/* We need an inode update to update bi_journal_seq for fsync: */
ret = commit_do(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc,
- adjust_i_size(trans, inum, 0, 0) ?:
+ adjust_i_size(trans, inum, 0, 0, warn_errors) ?:
bch2_logged_op_update(trans, &op->k_i));
}
@@ -477,9 +495,9 @@ case LOGGED_OP_FINSERT_finish:
break;
}
err:
- bch_err_fn(c, ret);
- bch2_logged_op_finish(trans, op_k);
bch2_trans_iter_exit(trans, &iter);
+ if (warn_errors)
+ bch_err_fn(c, ret);
return ret;
}
@@ -508,9 +526,14 @@ int bch2_fcollapse_finsert(struct bch_fs *c, subvol_inum inum,
* resume only proceeding in one of the snapshots
*/
down_read(&c->snapshot_create_lock);
- int ret = bch2_trans_run(c,
- bch2_logged_op_start(trans, &op.k_i) ?:
- __bch2_resume_logged_op_finsert(trans, &op.k_i, i_sectors_delta));
+ struct btree_trans *trans = bch2_trans_get(c);
+ int ret = bch2_logged_op_start(trans, &op.k_i);
+ if (ret)
+ goto out;
+ ret = __bch2_resume_logged_op_finsert(trans, &op.k_i, i_sectors_delta);
+ ret = bch2_logged_op_finish(trans, &op.k_i) ?: ret;
+out:
+ bch2_trans_put(trans);
up_read(&c->snapshot_create_lock);
return ret;
diff --git a/fs/bcachefs/logged_ops.c b/fs/bcachefs/logged_ops.c
index 6f4a4e1083c9..60e00702d1a4 100644
--- a/fs/bcachefs/logged_ops.c
+++ b/fs/bcachefs/logged_ops.c
@@ -34,8 +34,6 @@ static int resume_logged_op(struct btree_trans *trans, struct btree_iter *iter,
struct bkey_s_c k)
{
struct bch_fs *c = trans->c;
- const struct bch_logged_op_fn *fn = logged_op_fn(k.k->type);
- struct bkey_buf sk;
u32 restart_count = trans->restart_count;
struct printbuf buf = PRINTBUF;
int ret = 0;
@@ -46,13 +44,15 @@ static int resume_logged_op(struct btree_trans *trans, struct btree_iter *iter,
(bch2_bkey_val_to_text(&buf, c, k),
buf.buf));
- if (!fn)
- return 0;
-
+ struct bkey_buf sk;
bch2_bkey_buf_init(&sk);
bch2_bkey_buf_reassemble(&sk, c, k);
- fn->resume(trans, sk.k);
+ const struct bch_logged_op_fn *fn = logged_op_fn(sk.k->k.type);
+ if (fn)
+ fn->resume(trans, sk.k);
+
+ ret = bch2_logged_op_finish(trans, sk.k);
bch2_bkey_buf_exit(&sk, c);
fsck_err:
@@ -93,7 +93,7 @@ int bch2_logged_op_start(struct btree_trans *trans, struct bkey_i *k)
__bch2_logged_op_start(trans, k));
}
-void bch2_logged_op_finish(struct btree_trans *trans, struct bkey_i *k)
+int bch2_logged_op_finish(struct btree_trans *trans, struct bkey_i *k)
{
int ret = commit_do(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc,
bch2_btree_delete(trans, BTREE_ID_logged_ops, k->k.p, 0));
@@ -113,4 +113,6 @@ void bch2_logged_op_finish(struct btree_trans *trans, struct bkey_i *k)
buf.buf, bch2_err_str(ret));
printbuf_exit(&buf);
}
+
+ return ret;
}
diff --git a/fs/bcachefs/logged_ops.h b/fs/bcachefs/logged_ops.h
index 4d1e786a27a8..30ae9ef737dd 100644
--- a/fs/bcachefs/logged_ops.h
+++ b/fs/bcachefs/logged_ops.h
@@ -15,6 +15,6 @@ static inline int bch2_logged_op_update(struct btree_trans *trans, struct bkey_i
int bch2_resume_logged_ops(struct bch_fs *);
int bch2_logged_op_start(struct btree_trans *, struct bkey_i *);
-void bch2_logged_op_finish(struct btree_trans *, struct bkey_i *);
+int bch2_logged_op_finish(struct btree_trans *, struct bkey_i *);
#endif /* _BCACHEFS_LOGGED_OPS_H */
diff --git a/fs/bcachefs/lru.c b/fs/bcachefs/lru.c
index 96f2f4f8c397..10857eccdeaf 100644
--- a/fs/bcachefs/lru.c
+++ b/fs/bcachefs/lru.c
@@ -2,6 +2,7 @@
#include "bcachefs.h"
#include "alloc_background.h"
+#include "bkey_buf.h"
#include "btree_iter.h"
#include "btree_update.h"
#include "btree_write_buffer.h"
@@ -118,7 +119,7 @@ fsck_err:
static int bch2_check_lru_key(struct btree_trans *trans,
struct btree_iter *lru_iter,
struct bkey_s_c lru_k,
- struct bpos *last_flushed_pos)
+ struct bkey_buf *last_flushed)
{
struct bch_fs *c = trans->c;
struct btree_iter iter;
@@ -132,11 +133,13 @@ static int bch2_check_lru_key(struct btree_trans *trans,
u64 idx;
int ret;
- if (fsck_err_on(!bch2_dev_bucket_exists(c, alloc_pos),
+ struct bch_dev *ca = bch2_dev_bucket_tryget_noerror(c, alloc_pos);
+
+ if (fsck_err_on(!ca,
trans, lru_entry_to_invalid_bucket,
"lru key points to nonexistent device:bucket %llu:%llu",
alloc_pos.inode, alloc_pos.offset))
- return bch2_btree_delete_at(trans, lru_iter, 0);
+ return bch2_btree_bit_mod_buffered(trans, BTREE_ID_lru, lru_iter->pos, false);
k = bch2_bkey_get_iter(trans, &iter, BTREE_ID_alloc, alloc_pos, 0);
ret = bkey_err(k);
@@ -150,18 +153,15 @@ static int bch2_check_lru_key(struct btree_trans *trans,
idx = alloc_lru_idx_read(*a);
break;
case BCH_LRU_fragmentation:
- idx = a->fragmentation_lru;
+ idx = alloc_lru_idx_fragmentation(*a, ca);
break;
}
if (lru_k.k->type != KEY_TYPE_set ||
lru_pos_time(lru_k.k->p) != idx) {
- if (!bpos_eq(*last_flushed_pos, lru_k.k->p)) {
- *last_flushed_pos = lru_k.k->p;
- ret = bch2_btree_write_buffer_flush_sync(trans) ?:
- -BCH_ERR_transaction_restart_write_buffer_flush;
- goto out;
- }
+ ret = bch2_btree_write_buffer_maybe_flush(trans, lru_k, last_flushed);
+ if (ret)
+ goto err;
if (fsck_err(trans, lru_entry_bad,
"incorrect lru entry: lru %s time %llu\n"
@@ -171,12 +171,12 @@ static int bch2_check_lru_key(struct btree_trans *trans,
lru_pos_time(lru_k.k->p),
(bch2_bkey_val_to_text(&buf1, c, lru_k), buf1.buf),
(bch2_bkey_val_to_text(&buf2, c, k), buf2.buf)))
- ret = bch2_btree_delete_at(trans, lru_iter, 0);
+ ret = bch2_btree_bit_mod_buffered(trans, BTREE_ID_lru, lru_iter->pos, false);
}
-out:
err:
fsck_err:
bch2_trans_iter_exit(trans, &iter);
+ bch2_dev_put(ca);
printbuf_exit(&buf2);
printbuf_exit(&buf1);
return ret;
@@ -184,12 +184,18 @@ fsck_err:
int bch2_check_lrus(struct bch_fs *c)
{
- struct bpos last_flushed_pos = POS_MIN;
+ struct bkey_buf last_flushed;
+
+ bch2_bkey_buf_init(&last_flushed);
+ bkey_init(&last_flushed.k->k);
+
int ret = bch2_trans_run(c,
for_each_btree_key_commit(trans, iter,
BTREE_ID_lru, POS_MIN, BTREE_ITER_prefetch, k,
NULL, NULL, BCH_TRANS_COMMIT_no_enospc|BCH_TRANS_COMMIT_lazy_rw,
- bch2_check_lru_key(trans, &iter, k, &last_flushed_pos)));
+ bch2_check_lru_key(trans, &iter, k, &last_flushed)));
+
+ bch2_bkey_buf_exit(&last_flushed, c);
bch_err_fn(c, ret);
return ret;
diff --git a/fs/bcachefs/move.c b/fs/bcachefs/move.c
index 7d3920e03742..8c456d8b8b99 100644
--- a/fs/bcachefs/move.c
+++ b/fs/bcachefs/move.c
@@ -692,7 +692,7 @@ int bch2_evacuate_bucket(struct moving_context *ctxt,
a = bch2_alloc_to_v4(k, &a_convert);
dirty_sectors = bch2_bucket_sectors_dirty(*a);
bucket_size = ca->mi.bucket_size;
- fragmentation = a->fragmentation_lru;
+ fragmentation = alloc_lru_idx_fragmentation(*a, ca);
ret = bch2_btree_write_buffer_tryflush(trans);
bch_err_msg(c, ret, "flushing btree write buffer");
diff --git a/fs/bcachefs/movinggc.c b/fs/bcachefs/movinggc.c
index d86565bf07c8..d658be90f737 100644
--- a/fs/bcachefs/movinggc.c
+++ b/fs/bcachefs/movinggc.c
@@ -73,6 +73,7 @@ move_bucket_in_flight_add(struct buckets_in_flight *list, struct move_bucket b)
static int bch2_bucket_is_movable(struct btree_trans *trans,
struct move_bucket *b, u64 time)
{
+ struct bch_fs *c = trans->c;
struct btree_iter iter;
struct bkey_s_c k;
struct bch_alloc_v4 _a;
@@ -90,14 +91,19 @@ static int bch2_bucket_is_movable(struct btree_trans *trans,
if (ret)
return ret;
+ struct bch_dev *ca = bch2_dev_tryget(c, k.k->p.inode);
+ if (!ca)
+ goto out;
+
a = bch2_alloc_to_v4(k, &_a);
b->k.gen = a->gen;
b->sectors = bch2_bucket_sectors_dirty(*a);
+ u64 lru_idx = alloc_lru_idx_fragmentation(*a, ca);
- ret = data_type_movable(a->data_type) &&
- a->fragmentation_lru &&
- a->fragmentation_lru <= time;
+ ret = lru_idx && lru_idx <= time;
+ bch2_dev_put(ca);
+out:
bch2_trans_iter_exit(trans, &iter);
return ret;
}
diff --git a/fs/bcachefs/sb-errors_format.h b/fs/bcachefs/sb-errors_format.h
index ed5dca5e1161..4135b1ea2fec 100644
--- a/fs/bcachefs/sb-errors_format.h
+++ b/fs/bcachefs/sb-errors_format.h
@@ -115,8 +115,8 @@ enum bch_fsck_flags {
x(alloc_key_data_type_inconsistency, 101, 0) \
x(alloc_key_to_missing_dev_bucket, 102, 0) \
x(alloc_key_cached_inconsistency, 103, 0) \
- x(alloc_key_cached_but_read_time_zero, 104, 0) \
- x(alloc_key_to_missing_lru_entry, 105, 0) \
+ x(alloc_key_cached_but_read_time_zero, 104, FSCK_AUTOFIX) \
+ x(alloc_key_to_missing_lru_entry, 105, FSCK_AUTOFIX) \
x(alloc_key_data_type_wrong, 106, FSCK_AUTOFIX) \
x(alloc_key_gen_wrong, 107, FSCK_AUTOFIX) \
x(alloc_key_dirty_sectors_wrong, 108, FSCK_AUTOFIX) \
@@ -129,20 +129,20 @@ enum bch_fsck_flags {
x(freespace_key_wrong, 115, 0) \
x(freespace_hole_missing, 116, 0) \
x(bucket_gens_val_size_bad, 117, 0) \
- x(bucket_gens_key_wrong, 118, 0) \
- x(bucket_gens_hole_wrong, 119, 0) \
- x(bucket_gens_to_invalid_dev, 120, 0) \
- x(bucket_gens_to_invalid_buckets, 121, 0) \
- x(bucket_gens_nonzero_for_invalid_buckets, 122, 0) \
+ x(bucket_gens_key_wrong, 118, FSCK_AUTOFIX) \
+ x(bucket_gens_hole_wrong, 119, FSCK_AUTOFIX) \
+ x(bucket_gens_to_invalid_dev, 120, FSCK_AUTOFIX) \
+ x(bucket_gens_to_invalid_buckets, 121, FSCK_AUTOFIX) \
+ x(bucket_gens_nonzero_for_invalid_buckets, 122, FSCK_AUTOFIX) \
x(need_discard_freespace_key_to_invalid_dev_bucket, 123, 0) \
x(need_discard_freespace_key_bad, 124, 0) \
x(backpointer_bucket_offset_wrong, 125, 0) \
x(backpointer_to_missing_device, 126, 0) \
x(backpointer_to_missing_alloc, 127, 0) \
x(backpointer_to_missing_ptr, 128, 0) \
- x(lru_entry_at_time_0, 129, 0) \
- x(lru_entry_to_invalid_bucket, 130, 0) \
- x(lru_entry_bad, 131, 0) \
+ x(lru_entry_at_time_0, 129, FSCK_AUTOFIX) \
+ x(lru_entry_to_invalid_bucket, 130, FSCK_AUTOFIX) \
+ x(lru_entry_bad, 131, FSCK_AUTOFIX) \
x(btree_ptr_val_too_big, 132, 0) \
x(btree_ptr_v2_val_too_big, 133, 0) \
x(btree_ptr_has_non_ptr, 134, 0) \
@@ -158,9 +158,9 @@ enum bch_fsck_flags {
x(ptr_after_last_bucket, 144, 0) \
x(ptr_before_first_bucket, 145, 0) \
x(ptr_spans_multiple_buckets, 146, 0) \
- x(ptr_to_missing_backpointer, 147, 0) \
- x(ptr_to_missing_alloc_key, 148, 0) \
- x(ptr_to_missing_replicas_entry, 149, 0) \
+ x(ptr_to_missing_backpointer, 147, FSCK_AUTOFIX) \
+ x(ptr_to_missing_alloc_key, 148, FSCK_AUTOFIX) \
+ x(ptr_to_missing_replicas_entry, 149, FSCK_AUTOFIX) \
x(ptr_to_missing_stripe, 150, 0) \
x(ptr_to_incorrect_stripe, 151, 0) \
x(ptr_gen_newer_than_bucket_gen, 152, 0) \
@@ -194,7 +194,7 @@ enum bch_fsck_flags {
x(snapshot_skiplist_not_normalized, 180, 0) \
x(snapshot_skiplist_bad, 181, 0) \
x(snapshot_should_not_have_subvol, 182, 0) \
- x(snapshot_to_bad_snapshot_tree, 183, 0) \
+ x(snapshot_to_bad_snapshot_tree, 183, FSCK_AUTOFIX) \
x(snapshot_bad_depth, 184, 0) \
x(snapshot_bad_skiplist, 185, 0) \
x(subvol_pos_bad, 186, 0) \
@@ -211,6 +211,7 @@ enum bch_fsck_flags {
x(inode_unlinked_but_clean, 197, 0) \
x(inode_unlinked_but_nlink_nonzero, 198, 0) \
x(inode_unlinked_and_not_open, 281, 0) \
+ x(inode_unlinked_but_has_dirent, 285, 0) \
x(inode_checksum_type_invalid, 199, 0) \
x(inode_compression_type_invalid, 200, 0) \
x(inode_subvol_root_but_not_dir, 201, 0) \
@@ -219,6 +220,8 @@ enum bch_fsck_flags {
x(inode_i_sectors_wrong, 204, FSCK_AUTOFIX) \
x(inode_dir_wrong_nlink, 205, FSCK_AUTOFIX) \
x(inode_dir_multiple_links, 206, FSCK_AUTOFIX) \
+ x(inode_dir_missing_backpointer, 284, FSCK_AUTOFIX) \
+ x(inode_dir_unlinked_but_not_empty, 286, FSCK_AUTOFIX) \
x(inode_multiple_links_but_nlink_0, 207, FSCK_AUTOFIX) \
x(inode_wrong_backpointer, 208, FSCK_AUTOFIX) \
x(inode_wrong_nlink, 209, FSCK_AUTOFIX) \
@@ -295,7 +298,7 @@ enum bch_fsck_flags {
x(accounting_key_replicas_devs_unsorted, 280, FSCK_AUTOFIX) \
x(accounting_key_version_0, 282, FSCK_AUTOFIX) \
x(logged_op_but_clean, 283, FSCK_AUTOFIX) \
- x(MAX, 284, 0)
+ x(MAX, 287, 0)
enum bch_sb_error_id {
#define x(t, n, ...) BCH_FSCK_ERR_##t = n,
diff --git a/fs/bcachefs/subvolume.c b/fs/bcachefs/subvolume.c
index 6845dde1b339..91d8187ee168 100644
--- a/fs/bcachefs/subvolume.c
+++ b/fs/bcachefs/subvolume.c
@@ -102,7 +102,8 @@ static int check_subvol(struct btree_trans *trans,
inode.bi_inum, inode.bi_snapshot,
inode.bi_subvol, subvol.k->p.offset)) {
inode.bi_subvol = subvol.k->p.offset;
- ret = __bch2_fsck_write_inode(trans, &inode, le32_to_cpu(subvol.v->snapshot));
+ inode.bi_snapshot = le32_to_cpu(subvol.v->snapshot);
+ ret = __bch2_fsck_write_inode(trans, &inode);
if (ret)
goto err;
}
@@ -331,8 +332,8 @@ int bch2_snapshot_get_subvol(struct btree_trans *trans, u32 snapshot,
bch2_subvolume_get(trans, le32_to_cpu(snap.subvol), true, 0, subvol);
}
-int bch2_subvolume_get_snapshot(struct btree_trans *trans, u32 subvolid,
- u32 *snapid)
+int __bch2_subvolume_get_snapshot(struct btree_trans *trans, u32 subvolid,
+ u32 *snapid, bool warn)
{
struct btree_iter iter;
struct bkey_s_c_subvolume subvol;
@@ -343,7 +344,8 @@ int bch2_subvolume_get_snapshot(struct btree_trans *trans, u32 subvolid,
BTREE_ITER_cached|BTREE_ITER_with_updates,
subvolume);
ret = bkey_err(subvol);
- bch2_fs_inconsistent_on(bch2_err_matches(ret, ENOENT), trans->c,
+
+ bch2_fs_inconsistent_on(warn && bch2_err_matches(ret, ENOENT), trans->c,
"missing subvolume %u", subvolid);
if (likely(!ret))
@@ -352,6 +354,12 @@ int bch2_subvolume_get_snapshot(struct btree_trans *trans, u32 subvolid,
return ret;
}
+int bch2_subvolume_get_snapshot(struct btree_trans *trans, u32 subvolid,
+ u32 *snapid)
+{
+ return __bch2_subvolume_get_snapshot(trans, subvolid, snapid, true);
+}
+
static int bch2_subvolume_reparent(struct btree_trans *trans,
struct btree_iter *iter,
struct bkey_s_c k,
diff --git a/fs/bcachefs/subvolume.h b/fs/bcachefs/subvolume.h
index e62f876541fe..f897d106e142 100644
--- a/fs/bcachefs/subvolume.h
+++ b/fs/bcachefs/subvolume.h
@@ -26,6 +26,8 @@ int bch2_subvolume_trigger(struct btree_trans *, enum btree_id, unsigned,
int bch2_subvol_has_children(struct btree_trans *, u32);
int bch2_subvolume_get(struct btree_trans *, unsigned,
bool, int, struct bch_subvolume *);
+int __bch2_subvolume_get_snapshot(struct btree_trans *, u32,
+ u32 *, bool);
int bch2_subvolume_get_snapshot(struct btree_trans *, u32, u32 *);
int bch2_subvol_is_ro_trans(struct btree_trans *, u32);
diff --git a/fs/bcachefs/util.c b/fs/bcachefs/util.c
index 42f565c76181..e0a876cbaa6b 100644
--- a/fs/bcachefs/util.c
+++ b/fs/bcachefs/util.c
@@ -222,7 +222,7 @@ u64 bch2_read_flag_list(const char *opt, const char * const list[])
break;
}
- ret |= 1 << flag;
+ ret |= BIT_ULL(flag);
}
kfree(d);