diff options
Diffstat (limited to 'fs/bcachefs/recovery.c')
-rw-r--r-- | fs/bcachefs/recovery.c | 134 |
1 files changed, 90 insertions, 44 deletions
diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c index 1f9d044ed920..d89eb43c5ce9 100644 --- a/fs/bcachefs/recovery.c +++ b/fs/bcachefs/recovery.c @@ -10,6 +10,7 @@ #include "btree_io.h" #include "buckets.h" #include "dirent.h" +#include "disk_accounting.h" #include "errcode.h" #include "error.h" #include "fs-common.h" @@ -90,6 +91,7 @@ static void bch2_reconstruct_alloc(struct bch_fs *c) __set_bit_le64(BCH_FSCK_ERR_freespace_hole_missing, ext->errors_silent); __set_bit_le64(BCH_FSCK_ERR_ptr_to_missing_backpointer, ext->errors_silent); __set_bit_le64(BCH_FSCK_ERR_lru_entry_bad, ext->errors_silent); + __set_bit_le64(BCH_FSCK_ERR_accounting_mismatch, ext->errors_silent); c->sb.compat &= ~(1ULL << BCH_COMPAT_alloc_info); bch2_write_super(c); @@ -134,6 +136,45 @@ static void replay_now_at(struct journal *j, u64 seq) bch2_journal_pin_put(j, j->replay_journal_seq++); } +static int bch2_journal_replay_accounting_key(struct btree_trans *trans, + struct journal_key *k) +{ + struct btree_iter iter; + bch2_trans_node_iter_init(trans, &iter, k->btree_id, k->k->k.p, + BTREE_MAX_DEPTH, k->level, + BTREE_ITER_intent); + int ret = bch2_btree_iter_traverse(&iter); + if (ret) + goto out; + + struct bkey u; + struct bkey_s_c old = bch2_btree_path_peek_slot(btree_iter_path(trans, &iter), &u); + + /* Has this delta already been applied to the btree? */ + if (bversion_cmp(old.k->version, k->k->k.version) >= 0) { + ret = 0; + goto out; + } + + struct bkey_i *new = k->k; + if (old.k->type == KEY_TYPE_accounting) { + new = bch2_bkey_make_mut_noupdate(trans, bkey_i_to_s_c(k->k)); + ret = PTR_ERR_OR_ZERO(new); + if (ret) + goto out; + + bch2_accounting_accumulate(bkey_i_to_accounting(new), + bkey_s_c_to_accounting(old)); + } + + trans->journal_res.seq = k->journal_seq; + + ret = bch2_trans_update(trans, &iter, new, BTREE_TRIGGER_norun); +out: + bch2_trans_iter_exit(trans, &iter); + return ret; +} + static int bch2_journal_replay_key(struct btree_trans *trans, struct journal_key *k) { @@ -184,6 +225,11 @@ static int bch2_journal_replay_key(struct btree_trans *trans, if (k->overwritten) goto out; + if (k->k->k.type == KEY_TYPE_accounting) { + ret = bch2_trans_update_buffered(trans, BTREE_ID_accounting, k->k); + goto out; + } + ret = bch2_trans_update(trans, &iter, k->k, update_flags); out: bch2_trans_iter_exit(trans, &iter); @@ -222,6 +268,30 @@ int bch2_journal_replay(struct bch_fs *c) trans = bch2_trans_get(c); /* + * Replay accounting keys first: we can't allow the write buffer to + * flush accounting keys until we're done + */ + darray_for_each(*keys, k) { + if (!(k->k->k.type == KEY_TYPE_accounting && !k->allocated)) + continue; + + cond_resched(); + + ret = commit_do(trans, NULL, NULL, + BCH_TRANS_COMMIT_no_enospc| + BCH_TRANS_COMMIT_journal_reclaim| + BCH_TRANS_COMMIT_skip_accounting_apply| + BCH_TRANS_COMMIT_no_journal_res, + bch2_journal_replay_accounting_key(trans, k)); + if (bch2_fs_fatal_err_on(ret, c, "error replaying accounting; %s", bch2_err_str(ret))) + goto err; + + k->overwritten = true; + } + + set_bit(BCH_FS_accounting_replay_done, &c->flags); + + /* * First, attempt to replay keys in sorted order. This is more * efficient - better locality of btree access - but some might fail if * that would cause a journal deadlock. @@ -241,9 +311,10 @@ int bch2_journal_replay(struct bch_fs *c) commit_do(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc| BCH_TRANS_COMMIT_journal_reclaim| + BCH_TRANS_COMMIT_skip_accounting_apply| (!k->allocated ? BCH_TRANS_COMMIT_no_journal_res : 0), bch2_journal_replay_key(trans, k)); - BUG_ON(!ret && !k->overwritten); + BUG_ON(!ret && !k->overwritten && k->k->k.type != KEY_TYPE_accounting); if (ret) { ret = darray_push(&keys_sorted, k); if (ret) @@ -271,6 +342,7 @@ int bch2_journal_replay(struct bch_fs *c) ret = commit_do(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc| + BCH_TRANS_COMMIT_skip_accounting_apply| (!k->allocated ? BCH_TRANS_COMMIT_no_journal_res|BCH_WATERMARK_reclaim : 0), @@ -280,7 +352,7 @@ int bch2_journal_replay(struct bch_fs *c) if (ret) goto err; - BUG_ON(!k->overwritten); + BUG_ON(k->btree_id != BTREE_ID_accounting && !k->overwritten); } /* @@ -355,45 +427,10 @@ static int journal_replay_entry_early(struct bch_fs *c, container_of(entry, struct jset_entry_usage, entry); switch (entry->btree_id) { - case BCH_FS_USAGE_reserved: - if (entry->level < BCH_REPLICAS_MAX) - c->usage_base->persistent_reserved[entry->level] = - le64_to_cpu(u->v); - break; - case BCH_FS_USAGE_inodes: - c->usage_base->b.nr_inodes = le64_to_cpu(u->v); - break; case BCH_FS_USAGE_key_version: - atomic64_set(&c->key_version, - le64_to_cpu(u->v)); + atomic64_set(&c->key_version, le64_to_cpu(u->v)); break; } - - break; - } - case BCH_JSET_ENTRY_data_usage: { - struct jset_entry_data_usage *u = - container_of(entry, struct jset_entry_data_usage, entry); - - ret = bch2_replicas_set_usage(c, &u->r, - le64_to_cpu(u->v)); - break; - } - case BCH_JSET_ENTRY_dev_usage: { - struct jset_entry_dev_usage *u = - container_of(entry, struct jset_entry_dev_usage, entry); - unsigned nr_types = jset_entry_dev_usage_nr_types(u); - - rcu_read_lock(); - struct bch_dev *ca = bch2_dev_rcu(c, le32_to_cpu(u->dev)); - if (ca) - for (unsigned i = 0; i < min_t(unsigned, nr_types, BCH_DATA_NR); i++) { - ca->usage_base->d[i].buckets = le64_to_cpu(u->d[i].buckets); - ca->usage_base->d[i].sectors = le64_to_cpu(u->d[i].sectors); - ca->usage_base->d[i].fragmented = le64_to_cpu(u->d[i].fragmented); - } - rcu_read_unlock(); - break; } case BCH_JSET_ENTRY_blacklist: { @@ -454,8 +491,6 @@ static int journal_replay_early(struct bch_fs *c, } } - bch2_fs_usage_initialize(c); - return 0; } @@ -810,6 +845,10 @@ use_clean: if (ret) goto err; + set_bit(BCH_FS_btree_running, &c->flags); + + ret = bch2_sb_set_upgrade_extra(c); + ret = bch2_run_recovery_passes(c); if (ret) goto err; @@ -969,14 +1008,12 @@ int bch2_fs_initialize(struct bch_fs *c) mutex_unlock(&c->sb_lock); c->curr_recovery_pass = BCH_RECOVERY_PASS_NR; + set_bit(BCH_FS_btree_running, &c->flags); set_bit(BCH_FS_may_go_rw, &c->flags); for (unsigned i = 0; i < BTREE_ID_NR; i++) bch2_btree_root_alloc_fake(c, i, 0); - for_each_member_device(c, ca) - bch2_dev_usage_init(ca); - ret = bch2_fs_journal_alloc(c); if (ret) goto err; @@ -986,12 +1023,21 @@ int bch2_fs_initialize(struct bch_fs *c) * set up the journal.pin FIFO and journal.cur pointer: */ bch2_fs_journal_start(&c->journal, 1); + set_bit(BCH_FS_accounting_replay_done, &c->flags); bch2_journal_set_replay_done(&c->journal); ret = bch2_fs_read_write_early(c); if (ret) goto err; + for_each_member_device(c, ca) { + ret = bch2_dev_usage_init(ca, false); + if (ret) { + bch2_dev_put(ca); + goto err; + } + } + /* * Write out the superblock and journal buckets, now that we can do * btree updates @@ -1025,7 +1071,7 @@ int bch2_fs_initialize(struct bch_fs *c) bch2_inode_pack(&packed_inode, &root_inode); packed_inode.inode.k.p.snapshot = U32_MAX; - ret = bch2_btree_insert(c, BTREE_ID_inodes, &packed_inode.inode.k_i, NULL, 0); + ret = bch2_btree_insert(c, BTREE_ID_inodes, &packed_inode.inode.k_i, NULL, 0, 0); bch_err_msg(c, ret, "creating root directory"); if (ret) goto err; |