diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2024-03-19 17:27:25 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2024-03-19 17:27:25 -0700 |
commit | a4145ce1e7bc247fd6f2846e8699473448717b37 (patch) | |
tree | 7d91d160ff98dcbe3e6519070893dec3ecd93f96 /fs | |
parent | 78c3925c048c752334873f56c3a3d1c9d53e0416 (diff) | |
parent | 2e92d26b25432ec3399cb517beb0a79a745ec60f (diff) |
Merge tag 'bcachefs-2024-03-19' of https://evilpiepirate.org/git/bcachefs
Pull bcachefs fixes from Kent Overstreet:
"Assorted bugfixes.
Most are fixes for simple assertion pops; the most significant fix is
for a deadlock in recovery when we have to rewrite large numbers of
btree nodes to fix errors. This was incorrectly running out of the
same workqueue as the core interior btree update path - we now give it
its own single threaded workqueue.
This was visible to users as "bch2_btree_update_start(): error:
BCH_ERR_journal_reclaim_would_deadlock" - and then recovery hanging"
* tag 'bcachefs-2024-03-19' of https://evilpiepirate.org/git/bcachefs:
bcachefs: Fix lost wakeup on journal shutdown
bcachefs; Fix deadlock in bch2_btree_update_start()
bcachefs: ratelimit errors from async_btree_node_rewrite
bcachefs: Run check_topology() first
bcachefs: Improve bch2_fatal_error()
bcachefs: Fix lost transaction restart error
bcachefs: Don't corrupt journal keys gap buffer when dropping alloc info
bcachefs: fix for building in userspace
bcachefs: bch2_snapshot_is_ancestor() now safe to call in early recovery
bcachefs: Fix nested transaction restart handling in bch2_bucket_gens_init()
bcachefs: Improve sysfs internal/btree_updates
bcachefs: Split out btree_node_rewrite_worker
bcachefs: Fix locking in bch2_alloc_write_key()
bcachefs: Avoid extent entry type assertions in .invalid()
bcachefs: Fix spurious -BCH_ERR_transaction_restart_nested
bcachefs: Fix check_key_has_snapshot() call
bcachefs: Change "accounting overran journal reservation" to a warning
Diffstat (limited to 'fs')
-rw-r--r-- | fs/bcachefs/alloc_background.c | 15 | ||||
-rw-r--r-- | fs/bcachefs/alloc_foreground.c | 10 | ||||
-rw-r--r-- | fs/bcachefs/bcachefs.h | 2 | ||||
-rw-r--r-- | fs/bcachefs/btree_gc.c | 2 | ||||
-rw-r--r-- | fs/bcachefs/btree_io.c | 12 | ||||
-rw-r--r-- | fs/bcachefs/btree_key_cache.c | 2 | ||||
-rw-r--r-- | fs/bcachefs/btree_update_interior.c | 44 | ||||
-rw-r--r-- | fs/bcachefs/btree_update_interior.h | 1 | ||||
-rw-r--r-- | fs/bcachefs/btree_write_buffer.c | 2 | ||||
-rw-r--r-- | fs/bcachefs/buckets.c | 6 | ||||
-rw-r--r-- | fs/bcachefs/debug.c | 2 | ||||
-rw-r--r-- | fs/bcachefs/ec.c | 6 | ||||
-rw-r--r-- | fs/bcachefs/error.h | 4 | ||||
-rw-r--r-- | fs/bcachefs/extents.h | 6 | ||||
-rw-r--r-- | fs/bcachefs/fs.c | 3 | ||||
-rw-r--r-- | fs/bcachefs/fsck.c | 33 | ||||
-rw-r--r-- | fs/bcachefs/journal.c | 12 | ||||
-rw-r--r-- | fs/bcachefs/journal_io.c | 15 | ||||
-rw-r--r-- | fs/bcachefs/logged_ops.c | 4 | ||||
-rw-r--r-- | fs/bcachefs/movinggc.c | 3 | ||||
-rw-r--r-- | fs/bcachefs/recovery.c | 6 | ||||
-rw-r--r-- | fs/bcachefs/recovery_types.h | 2 | ||||
-rw-r--r-- | fs/bcachefs/snapshot.c | 32 | ||||
-rw-r--r-- | fs/bcachefs/super-io.c | 8 | ||||
-rw-r--r-- | fs/bcachefs/super.c | 33 | ||||
-rw-r--r-- | fs/bcachefs/util.h | 3 |
26 files changed, 157 insertions, 111 deletions
diff --git a/fs/bcachefs/alloc_background.c b/fs/bcachefs/alloc_background.c index c47f72f2bd58..893e38f9db80 100644 --- a/fs/bcachefs/alloc_background.c +++ b/fs/bcachefs/alloc_background.c @@ -532,13 +532,13 @@ int bch2_bucket_gens_init(struct bch_fs *c) u8 gen = bch2_alloc_to_v4(k, &a)->gen; unsigned offset; struct bpos pos = alloc_gens_pos(iter.pos, &offset); + int ret2 = 0; if (have_bucket_gens_key && bkey_cmp(iter.pos, pos)) { - ret = commit_do(trans, NULL, NULL, - BCH_TRANS_COMMIT_no_enospc, - bch2_btree_insert_trans(trans, BTREE_ID_bucket_gens, &g.k_i, 0)); - if (ret) - break; + ret2 = bch2_btree_insert_trans(trans, BTREE_ID_bucket_gens, &g.k_i, 0) ?: + bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc); + if (ret2) + goto iter_err; have_bucket_gens_key = false; } @@ -549,7 +549,8 @@ int bch2_bucket_gens_init(struct bch_fs *c) } g.v.gens[offset] = gen; - 0; +iter_err: + ret2; })); if (have_bucket_gens_key && !ret) @@ -852,7 +853,7 @@ int bch2_trigger_alloc(struct btree_trans *trans, bucket_journal_seq); if (ret) { bch2_fs_fatal_error(c, - "error setting bucket_needs_journal_commit: %i", ret); + "setting bucket_needs_journal_commit: %s", bch2_err_str(ret)); return ret; } } diff --git a/fs/bcachefs/alloc_foreground.c b/fs/bcachefs/alloc_foreground.c index ca58193dd902..214b15c84d1f 100644 --- a/fs/bcachefs/alloc_foreground.c +++ b/fs/bcachefs/alloc_foreground.c @@ -1356,15 +1356,17 @@ retry: /* Don't retry from all devices if we're out of open buckets: */ if (bch2_err_matches(ret, BCH_ERR_open_buckets_empty)) { - int ret = open_bucket_add_buckets(trans, &ptrs, wp, devs_have, + int ret2 = open_bucket_add_buckets(trans, &ptrs, wp, devs_have, target, erasure_code, nr_replicas, &nr_effective, &have_cache, watermark, flags, cl); - if (!ret || - bch2_err_matches(ret, BCH_ERR_transaction_restart) || - bch2_err_matches(ret, BCH_ERR_open_buckets_empty)) + if (!ret2 || + bch2_err_matches(ret2, BCH_ERR_transaction_restart) || + bch2_err_matches(ret2, BCH_ERR_open_buckets_empty)) { + ret = ret2; goto alloc_done; + } } /* diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h index 339dc3e1dcd3..799aa32b6b4d 100644 --- a/fs/bcachefs/bcachefs.h +++ b/fs/bcachefs/bcachefs.h @@ -849,6 +849,8 @@ struct bch_fs { struct workqueue_struct *btree_interior_update_worker; struct work_struct btree_interior_update_work; + struct workqueue_struct *btree_node_rewrite_worker; + struct list_head pending_node_rewrites; struct mutex pending_node_rewrites_lock; diff --git a/fs/bcachefs/btree_gc.c b/fs/bcachefs/btree_gc.c index 584aee7010de..bdaed29f084a 100644 --- a/fs/bcachefs/btree_gc.c +++ b/fs/bcachefs/btree_gc.c @@ -1392,11 +1392,11 @@ static int bch2_alloc_write_key(struct btree_trans *trans, *old, b->data_type); gc = *b; - percpu_up_read(&c->mark_lock); if (gc.data_type != old_gc.data_type || gc.dirty_sectors != old_gc.dirty_sectors) bch2_dev_usage_update_m(c, ca, &old_gc, &gc); + percpu_up_read(&c->mark_lock); if (metadata_only && gc.data_type != BCH_DATA_sb && diff --git a/fs/bcachefs/btree_io.c b/fs/bcachefs/btree_io.c index 624c8287deb4..34df8ccc5fec 100644 --- a/fs/bcachefs/btree_io.c +++ b/fs/bcachefs/btree_io.c @@ -1066,7 +1066,7 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca, ret = bset_encrypt(c, i, b->written << 9); if (bch2_fs_fatal_err_on(ret, c, - "error decrypting btree node: %i", ret)) + "decrypting btree node: %s", bch2_err_str(ret))) goto fsck_err; btree_err_on(btree_node_type_is_extents(btree_node_type(b)) && @@ -1107,7 +1107,7 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca, ret = bset_encrypt(c, i, b->written << 9); if (bch2_fs_fatal_err_on(ret, c, - "error decrypting btree node: %i\n", ret)) + "decrypting btree node: %s", bch2_err_str(ret))) goto fsck_err; sectors = vstruct_sectors(bne, c->block_bits); @@ -1338,7 +1338,7 @@ start: if (saw_error && !btree_node_read_error(b)) { printbuf_reset(&buf); bch2_bpos_to_text(&buf, b->key.k.p); - bch_info(c, "%s: rewriting btree node at btree=%s level=%u %s due to error", + bch_err_ratelimited(c, "%s: rewriting btree node at btree=%s level=%u %s due to error", __func__, bch2_btree_id_str(b->c.btree_id), b->c.level, buf.buf); bch2_btree_node_rewrite_async(c, b); @@ -1874,8 +1874,8 @@ out: return; err: set_btree_node_noevict(b); - if (!bch2_err_matches(ret, EROFS)) - bch2_fs_fatal_error(c, "fatal error writing btree node: %s", bch2_err_str(ret)); + bch2_fs_fatal_err_on(!bch2_err_matches(ret, EROFS), c, + "writing btree node: %s", bch2_err_str(ret)); goto out; } @@ -2131,7 +2131,7 @@ do_write: ret = bset_encrypt(c, i, b->written << 9); if (bch2_fs_fatal_err_on(ret, c, - "error encrypting btree node: %i\n", ret)) + "encrypting btree node: %s", bch2_err_str(ret))) goto err; nonce = btree_nonce(i, b->written << 9); diff --git a/fs/bcachefs/btree_key_cache.c b/fs/bcachefs/btree_key_cache.c index 8a71d43444b9..581edcb0911b 100644 --- a/fs/bcachefs/btree_key_cache.c +++ b/fs/bcachefs/btree_key_cache.c @@ -676,7 +676,7 @@ static int btree_key_cache_flush_pos(struct btree_trans *trans, !bch2_err_matches(ret, BCH_ERR_transaction_restart) && !bch2_err_matches(ret, BCH_ERR_journal_reclaim_would_deadlock) && !bch2_journal_error(j), c, - "error flushing key cache: %s", bch2_err_str(ret)); + "flushing key cache: %s", bch2_err_str(ret)); if (ret) goto out; diff --git a/fs/bcachefs/btree_update_interior.c b/fs/bcachefs/btree_update_interior.c index 642213ef9f79..b2f5f2e50f7e 100644 --- a/fs/bcachefs/btree_update_interior.c +++ b/fs/bcachefs/btree_update_interior.c @@ -646,7 +646,7 @@ static void btree_update_nodes_written(struct btree_update *as) bch2_trans_unlock(trans); bch2_fs_fatal_err_on(ret && !bch2_journal_error(&c->journal), c, - "%s(): error %s", __func__, bch2_err_str(ret)); + "%s", bch2_err_str(ret)); err: if (as->b) { @@ -1067,13 +1067,18 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path, flags &= ~BCH_WATERMARK_MASK; flags |= watermark; - if (!(flags & BCH_TRANS_COMMIT_journal_reclaim) && - watermark < c->journal.watermark) { + if (watermark < c->journal.watermark) { struct journal_res res = { 0 }; + unsigned journal_flags = watermark|JOURNAL_RES_GET_CHECK; + + if ((flags & BCH_TRANS_COMMIT_journal_reclaim) && + watermark != BCH_WATERMARK_reclaim) + journal_flags |= JOURNAL_RES_GET_NONBLOCK; ret = drop_locks_do(trans, - bch2_journal_res_get(&c->journal, &res, 1, - watermark|JOURNAL_RES_GET_CHECK)); + bch2_journal_res_get(&c->journal, &res, 1, journal_flags)); + if (bch2_err_matches(ret, BCH_ERR_operation_blocked)) + ret = -BCH_ERR_journal_reclaim_would_deadlock; if (ret) return ERR_PTR(ret); } @@ -1117,6 +1122,7 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path, closure_init(&as->cl, NULL); as->c = c; as->start_time = start_time; + as->ip_started = _RET_IP_; as->mode = BTREE_INTERIOR_NO_UPDATE; as->took_gc_lock = true; as->btree_id = path->btree_id; @@ -1192,7 +1198,8 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path, err: bch2_btree_update_free(as, trans); if (!bch2_err_matches(ret, ENOSPC) && - !bch2_err_matches(ret, EROFS)) + !bch2_err_matches(ret, EROFS) && + ret != -BCH_ERR_journal_reclaim_would_deadlock) bch_err_fn_ratelimited(c, ret); return ERR_PTR(ret); } @@ -2114,7 +2121,7 @@ static void async_btree_node_rewrite_work(struct work_struct *work) ret = bch2_trans_do(c, NULL, NULL, 0, async_btree_node_rewrite_trans(trans, a)); - bch_err_fn(c, ret); + bch_err_fn_ratelimited(c, ret); bch2_write_ref_put(c, BCH_WRITE_REF_node_rewrite); kfree(a); } @@ -2161,7 +2168,7 @@ void bch2_btree_node_rewrite_async(struct bch_fs *c, struct btree *b) bch2_write_ref_get(c, BCH_WRITE_REF_node_rewrite); } - queue_work(c->btree_interior_update_worker, &a->work); + queue_work(c->btree_node_rewrite_worker, &a->work); } void bch2_do_pending_node_rewrites(struct bch_fs *c) @@ -2173,7 +2180,7 @@ void bch2_do_pending_node_rewrites(struct bch_fs *c) list_del(&a->list); bch2_write_ref_get(c, BCH_WRITE_REF_node_rewrite); - queue_work(c->btree_interior_update_worker, &a->work); + queue_work(c->btree_node_rewrite_worker, &a->work); } mutex_unlock(&c->pending_node_rewrites_lock); } @@ -2441,12 +2448,12 @@ void bch2_btree_updates_to_text(struct printbuf *out, struct bch_fs *c) mutex_lock(&c->btree_interior_update_lock); list_for_each_entry(as, &c->btree_interior_update_list, list) - prt_printf(out, "%p m %u w %u r %u j %llu\n", - as, - as->mode, - as->nodes_written, - closure_nr_remaining(&as->cl), - as->journal.seq); + prt_printf(out, "%ps: mode=%u nodes_written=%u cl.remaining=%u journal_seq=%llu\n", + (void *) as->ip_started, + as->mode, + as->nodes_written, + closure_nr_remaining(&as->cl), + as->journal.seq); mutex_unlock(&c->btree_interior_update_lock); } @@ -2510,6 +2517,8 @@ bch2_btree_roots_to_journal_entries(struct bch_fs *c, void bch2_fs_btree_interior_update_exit(struct bch_fs *c) { + if (c->btree_node_rewrite_worker) + destroy_workqueue(c->btree_node_rewrite_worker); if (c->btree_interior_update_worker) destroy_workqueue(c->btree_interior_update_worker); mempool_exit(&c->btree_interior_update_pool); @@ -2534,6 +2543,11 @@ int bch2_fs_btree_interior_update_init(struct bch_fs *c) if (!c->btree_interior_update_worker) return -BCH_ERR_ENOMEM_btree_interior_update_worker_init; + c->btree_node_rewrite_worker = + alloc_ordered_workqueue("btree_node_rewrite", WQ_UNBOUND); + if (!c->btree_node_rewrite_worker) + return -BCH_ERR_ENOMEM_btree_interior_update_worker_init; + if (mempool_init_kmalloc_pool(&c->btree_interior_update_pool, 1, sizeof(struct btree_update))) return -BCH_ERR_ENOMEM_btree_interior_update_pool_init; diff --git a/fs/bcachefs/btree_update_interior.h b/fs/bcachefs/btree_update_interior.h index 3439b03719c7..f651dd48aaa0 100644 --- a/fs/bcachefs/btree_update_interior.h +++ b/fs/bcachefs/btree_update_interior.h @@ -32,6 +32,7 @@ struct btree_update { struct closure cl; struct bch_fs *c; u64 start_time; + unsigned long ip_started; struct list_head list; struct list_head unwritten_list; diff --git a/fs/bcachefs/btree_write_buffer.c b/fs/bcachefs/btree_write_buffer.c index b77e7b382b66..5cbad8445782 100644 --- a/fs/bcachefs/btree_write_buffer.c +++ b/fs/bcachefs/btree_write_buffer.c @@ -378,7 +378,7 @@ static int bch2_btree_write_buffer_flush_locked(struct btree_trans *trans) } } err: - bch2_fs_fatal_err_on(ret, c, "%s: insert error %s", __func__, bch2_err_str(ret)); + bch2_fs_fatal_err_on(ret, c, "%s", bch2_err_str(ret)); trace_write_buffer_flush(trans, wb->flushing.keys.nr, skipped, fast, 0); bch2_journal_pin_drop(j, &wb->flushing.pin); wb->flushing.keys.nr = 0; diff --git a/fs/bcachefs/buckets.c b/fs/bcachefs/buckets.c index c2f46b267b3a..96edf2c34d43 100644 --- a/fs/bcachefs/buckets.c +++ b/fs/bcachefs/buckets.c @@ -990,8 +990,8 @@ static int __trigger_extent(struct btree_trans *trans, ret = !gc ? bch2_update_cached_sectors_list(trans, p.ptr.dev, disk_sectors) : update_cached_sectors(c, k, p.ptr.dev, disk_sectors, 0, true); - bch2_fs_fatal_err_on(ret && gc, c, "%s(): no replicas entry while updating cached sectors", - __func__); + bch2_fs_fatal_err_on(ret && gc, c, "%s: no replicas entry while updating cached sectors", + bch2_err_str(ret)); if (ret) return ret; } @@ -1020,7 +1020,7 @@ static int __trigger_extent(struct btree_trans *trans, struct printbuf buf = PRINTBUF; bch2_bkey_val_to_text(&buf, c, k); - bch2_fs_fatal_error(c, "%s(): no replicas entry for %s", __func__, buf.buf); + bch2_fs_fatal_error(c, ": no replicas entry for %s", buf.buf); printbuf_exit(&buf); } if (ret) diff --git a/fs/bcachefs/debug.c b/fs/bcachefs/debug.c index b1f147e6be4d..208ce6f0fc43 100644 --- a/fs/bcachefs/debug.c +++ b/fs/bcachefs/debug.c @@ -170,7 +170,7 @@ void __bch2_btree_verify(struct bch_fs *c, struct btree *b) struct printbuf buf = PRINTBUF; bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&b->key)); - bch2_fs_fatal_error(c, "btree node verify failed for : %s\n", buf.buf); + bch2_fs_fatal_error(c, ": btree node verify failed for: %s\n", buf.buf); printbuf_exit(&buf); } out: diff --git a/fs/bcachefs/ec.c b/fs/bcachefs/ec.c index b98e2c2b8bf0..082075244e16 100644 --- a/fs/bcachefs/ec.c +++ b/fs/bcachefs/ec.c @@ -448,7 +448,7 @@ int bch2_trigger_stripe(struct btree_trans *trans, struct printbuf buf = PRINTBUF; bch2_bkey_val_to_text(&buf, c, new); - bch2_fs_fatal_error(c, "no replicas entry for %s", buf.buf); + bch2_fs_fatal_error(c, ": no replicas entry for %s", buf.buf); printbuf_exit(&buf); return ret; } @@ -1868,10 +1868,10 @@ static int __bch2_ec_stripe_head_reuse(struct btree_trans *trans, struct ec_stri return -BCH_ERR_stripe_alloc_blocked; ret = get_stripe_key_trans(trans, idx, &h->s->existing_stripe); + bch2_fs_fatal_err_on(ret && !bch2_err_matches(ret, BCH_ERR_transaction_restart), c, + "reading stripe key: %s", bch2_err_str(ret)); if (ret) { bch2_stripe_close(c, h->s); - if (!bch2_err_matches(ret, BCH_ERR_transaction_restart)) - bch2_fs_fatal_error(c, "error reading stripe key: %s", bch2_err_str(ret)); return ret; } diff --git a/fs/bcachefs/error.h b/fs/bcachefs/error.h index 94491190e09e..ae1d6674c512 100644 --- a/fs/bcachefs/error.h +++ b/fs/bcachefs/error.h @@ -191,9 +191,9 @@ do { \ void bch2_fatal_error(struct bch_fs *); -#define bch2_fs_fatal_error(c, ...) \ +#define bch2_fs_fatal_error(c, _msg, ...) \ do { \ - bch_err(c, __VA_ARGS__); \ + bch_err(c, "%s(): fatal error " _msg, __func__, ##__VA_ARGS__); \ bch2_fatal_error(c); \ } while (0) diff --git a/fs/bcachefs/extents.h b/fs/bcachefs/extents.h index 6219f2c08e4c..fd2669cdd76f 100644 --- a/fs/bcachefs/extents.h +++ b/fs/bcachefs/extents.h @@ -108,17 +108,17 @@ static inline void extent_entry_drop(struct bkey_s k, union bch_extent_entry *en static inline bool extent_entry_is_ptr(const union bch_extent_entry *e) { - return extent_entry_type(e) == BCH_EXTENT_ENTRY_ptr; + return __extent_entry_type(e) == BCH_EXTENT_ENTRY_ptr; } static inline bool extent_entry_is_stripe_ptr(const union bch_extent_entry *e) { - return extent_entry_type(e) == BCH_EXTENT_ENTRY_stripe_ptr; + return __extent_entry_type(e) == BCH_EXTENT_ENTRY_stripe_ptr; } static inline bool extent_entry_is_crc(const union bch_extent_entry *e) { - switch (extent_entry_type(e)) { + switch (__extent_entry_type(e)) { case BCH_EXTENT_ENTRY_crc32: case BCH_EXTENT_ENTRY_crc64: case BCH_EXTENT_ENTRY_crc128: diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index 3f073845bbd7..0ccee05f6887 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -108,7 +108,8 @@ retry: goto retry; bch2_fs_fatal_err_on(bch2_err_matches(ret, ENOENT), c, - "inode %u:%llu not found when updating", + "%s: inode %u:%llu not found when updating", + bch2_err_str(ret), inode_inum(inode).subvol, inode_inum(inode).inum); diff --git a/fs/bcachefs/fsck.c b/fs/bcachefs/fsck.c index f48033be3f6b..47d4eefaba7b 100644 --- a/fs/bcachefs/fsck.c +++ b/fs/bcachefs/fsck.c @@ -1114,10 +1114,9 @@ int bch2_check_inodes(struct bch_fs *c) return ret; } -static int check_i_sectors(struct btree_trans *trans, struct inode_walker *w) +static int check_i_sectors_notnested(struct btree_trans *trans, struct inode_walker *w) { struct bch_fs *c = trans->c; - u32 restart_count = trans->restart_count; int ret = 0; s64 count2; @@ -1149,7 +1148,14 @@ static int check_i_sectors(struct btree_trans *trans, struct inode_walker *w) } fsck_err: bch_err_fn(c, ret); - return ret ?: trans_was_restarted(trans, restart_count); + return ret; +} + +static int check_i_sectors(struct btree_trans *trans, struct inode_walker *w) +{ + u32 restart_count = trans->restart_count; + return check_i_sectors_notnested(trans, w) ?: + trans_was_restarted(trans, restart_count); } struct extent_end { @@ -1533,7 +1539,7 @@ int bch2_check_extents(struct bch_fs *c) check_extent(trans, &iter, k, &w, &s, &extent_ends) ?: check_extent_overbig(trans, &iter, k); })) ?: - check_i_sectors(trans, &w)); + check_i_sectors_notnested(trans, &w)); bch2_disk_reservation_put(c, &res); extent_ends_exit(&extent_ends); @@ -1563,10 +1569,9 @@ int bch2_check_indirect_extents(struct bch_fs *c) return ret; } -static int check_subdir_count(struct btree_trans *trans, struct inode_walker *w) +static int check_subdir_count_notnested(struct btree_trans *trans, struct inode_walker *w) { struct bch_fs *c = trans->c; - u32 restart_count = trans->restart_count; int ret = 0; s64 count2; @@ -1598,7 +1603,14 @@ static int check_subdir_count(struct btree_trans *trans, struct inode_walker *w) } fsck_err: bch_err_fn(c, ret); - return ret ?: trans_was_restarted(trans, restart_count); + return ret; +} + +static int check_subdir_count(struct btree_trans *trans, struct inode_walker *w) +{ + u32 restart_count = trans->restart_count; + return check_subdir_count_notnested(trans, w) ?: + trans_was_restarted(trans, restart_count); } static int check_dirent_inode_dirent(struct btree_trans *trans, @@ -2003,7 +2015,8 @@ int bch2_check_dirents(struct bch_fs *c) k, NULL, NULL, BCH_TRANS_COMMIT_no_enospc, - check_dirent(trans, &iter, k, &hash_info, &dir, &target, &s))); + check_dirent(trans, &iter, k, &hash_info, &dir, &target, &s)) ?: + check_subdir_count_notnested(trans, &dir)); snapshots_seen_exit(&s); inode_walker_exit(&dir); @@ -2022,8 +2035,10 @@ static int check_xattr(struct btree_trans *trans, struct btree_iter *iter, int ret; ret = check_key_has_snapshot(trans, iter, k); - if (ret) + if (ret < 0) return ret; + if (ret) + return 0; i = walk_inode(trans, inode, k); ret = PTR_ERR_OR_ZERO(i); diff --git a/fs/bcachefs/journal.c b/fs/bcachefs/journal.c index f314b2e78ec3..9c9a25dbd613 100644 --- a/fs/bcachefs/journal.c +++ b/fs/bcachefs/journal.c @@ -511,18 +511,18 @@ retry: if (journal_res_get_fast(j, res, flags)) return 0; + if (bch2_journal_error(j)) + return -BCH_ERR_erofs_journal_err; + + if (j->blocked) + return -BCH_ERR_journal_res_get_blocked; + if ((flags & BCH_WATERMARK_MASK) < j->watermark) { ret = JOURNAL_ERR_journal_full; can_discard = j->can_discard; goto out; } - if (j->blocked) - return -BCH_ERR_journal_res_get_blocked; - - if (bch2_journal_error(j)) - return -BCH_ERR_erofs_journal_err; - if (nr_unwritten_journal_entries(j) == ARRAY_SIZE(j->buf) && !journal_entry_is_open(j)) { ret = JOURNAL_ERR_max_in_flight; goto out; diff --git a/fs/bcachefs/journal_io.c b/fs/bcachefs/journal_io.c index d76c3c0c203f..725fcf46f631 100644 --- a/fs/bcachefs/journal_io.c +++ b/fs/bcachefs/journal_io.c @@ -1082,9 +1082,7 @@ reread: ret = bch2_encrypt(c, JSET_CSUM_TYPE(j), journal_nonce(j), j->encrypted_start, vstruct_end(j) - (void *) j->encrypted_start); - bch2_fs_fatal_err_on(ret, c, - "error decrypting journal entry: %s", - bch2_err_str(ret)); + bch2_fs_fatal_err_on(ret, c, "decrypting journal entry: %s", bch2_err_str(ret)); mutex_lock(&jlist->lock); ret = journal_entry_add(c, ca, (struct journal_ptr) { @@ -1820,7 +1818,8 @@ static int bch2_journal_write_prep(struct journal *j, struct journal_buf *w) jset_entry_for_each_key(i, k) { ret = bch2_journal_key_to_wb(c, &wb, i->btree_id, k); if (ret) { - bch2_fs_fatal_error(c, "-ENOMEM flushing journal keys to btree write buffer"); + bch2_fs_fatal_error(c, "flushing journal keys to btree write buffer: %s", + bch2_err_str(ret)); bch2_journal_keys_to_write_buffer_end(c, &wb); return ret; } @@ -1848,7 +1847,8 @@ static int bch2_journal_write_prep(struct journal *j, struct journal_buf *w) bch2_journal_super_entries_add_common(c, &end, seq); u64s = (u64 *) end - (u64 *) start; - BUG_ON(u64s > j->entry_u64s_reserved); + + WARN_ON(u64s > j->entry_u64s_reserved); le32_add_cpu(&jset->u64s, u64s); @@ -1856,7 +1856,7 @@ static int bch2_journal_write_prep(struct journal *j, struct journal_buf *w) bytes = vstruct_bytes(jset); if (sectors > w->sectors) { - bch2_fs_fatal_error(c, "aieeee! journal write overran available space, %zu > %u (extra %u reserved %u/%u)", + bch2_fs_fatal_error(c, ": journal write overran available space, %zu > %u (extra %u reserved %u/%u)", vstruct_bytes(jset), w->sectors << 9, u64s, w->u64s_reserved, j->entry_u64s_reserved); return -EINVAL; @@ -1884,8 +1884,7 @@ static int bch2_journal_write_prep(struct journal *j, struct journal_buf *w) ret = bch2_encrypt(c, JSET_CSUM_TYPE(jset), journal_nonce(jset), jset->encrypted_start, vstruct_end(jset) - (void *) jset->encrypted_start); - if (bch2_fs_fatal_err_on(ret, c, - "error decrypting journal entry: %i", ret)) + if (bch2_fs_fatal_err_on(ret, c, "decrypting journal entry: %s", bch2_err_str(ret))) return ret; jset->csum = csum_vstruct(c, JSET_CSUM_TYPE(jset), diff --git a/fs/bcachefs/logged_ops.c b/fs/bcachefs/logged_ops.c index ad598105c587..9fac838d123e 100644 --- a/fs/bcachefs/logged_ops.c +++ b/fs/bcachefs/logged_ops.c @@ -101,8 +101,8 @@ void bch2_logged_op_finish(struct btree_trans *trans, struct bkey_i *k) struct printbuf buf = PRINTBUF; bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(k)); - bch2_fs_fatal_error(c, "%s: error deleting logged operation %s: %s", - __func__, buf.buf, bch2_err_str(ret)); + bch2_fs_fatal_error(c, "deleting logged operation %s: %s", + buf.buf, bch2_err_str(ret)); printbuf_exit(&buf); } } diff --git a/fs/bcachefs/movinggc.c b/fs/bcachefs/movinggc.c index 69e06a84dad4..0d2b82d8d11f 100644 --- a/fs/bcachefs/movinggc.c +++ b/fs/bcachefs/movinggc.c @@ -155,8 +155,7 @@ static int bch2_copygc_get_buckets(struct moving_context *ctxt, if (bch2_err_matches(ret, EROFS)) return ret; - if (bch2_fs_fatal_err_on(ret, c, "%s: error %s from bch2_btree_write_buffer_tryflush()", - __func__, bch2_err_str(ret))) + if (bch2_fs_fatal_err_on(ret, c, "%s: from bch2_btree_write_buffer_tryflush()", bch2_err_str(ret))) return ret; ret = for_each_btree_key_upto(trans, iter, BTREE_ID_lru, diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c index 2af219aedfdb..03f9d6afe467 100644 --- a/fs/bcachefs/recovery.c +++ b/fs/bcachefs/recovery.c @@ -90,10 +90,12 @@ static void do_reconstruct_alloc(struct bch_fs *c) struct journal_keys *keys = &c->journal_keys; size_t src, dst; + move_gap(keys, keys->nr); + for (src = 0, dst = 0; src < keys->nr; src++) if (!btree_id_is_alloc(keys->data[src].btree_id)) keys->data[dst++] = keys->data[src]; - keys->nr = dst; + keys->nr = keys->gap = dst; } /* @@ -203,6 +205,8 @@ static int bch2_journal_replay(struct bch_fs *c) BUG_ON(!atomic_read(&keys->ref)); + move_gap(keys, keys->nr); + /* * First, attempt to replay keys in sorted order. This is more * efficient - better locality of btree access - but some might fail if diff --git a/fs/bcachefs/recovery_types.h b/fs/bcachefs/recovery_types.h index 1361e34d4e64..4959e95e7c74 100644 --- a/fs/bcachefs/recovery_types.h +++ b/fs/bcachefs/recovery_types.h @@ -13,11 +13,11 @@ * must never change: */ #define BCH_RECOVERY_PASSES() \ + x(check_topology, 4, 0) \ x(alloc_read, 0, PASS_ALWAYS) \ x(stripes_read, 1, PASS_ALWAYS) \ x(initialize_subvolumes, 2, 0) \ x(snapshots_read, 3, PASS_ALWAYS) \ - x(check_topology, 4, 0) \ x(check_allocations, 5, PASS_FSCK) \ x(trans_mark_dev_sbs, 6, PASS_ALWAYS|PASS_SILENT) \ x(fs_journal_alloc, 7, PASS_ALWAYS|PASS_SILENT) \ diff --git a/fs/bcachefs/snapshot.c b/fs/bcachefs/snapshot.c index ac6ba04d5521..39debe814bf3 100644 --- a/fs/bcachefs/snapshot.c +++ b/fs/bcachefs/snapshot.c @@ -91,18 +91,20 @@ static int bch2_snapshot_tree_create(struct btree_trans *trans, /* Snapshot nodes: */ -static bool bch2_snapshot_is_ancestor_early(struct bch_fs *c, u32 id, u32 ancestor) +static bool __bch2_snapshot_is_ancestor_early(struct snapshot_table *t, u32 id, u32 ancestor) { - struct snapshot_table *t; - - rcu_read_lock(); - t = rcu_dereference(c->snapshots); - while (id && id < ancestor) id = __snapshot_t(t, id)->parent; + return id == ancestor; +} + +static bool bch2_snapshot_is_ancestor_early(struct bch_fs *c, u32 id, u32 ancestor) +{ + rcu_read_lock(); + bool ret = __bch2_snapshot_is_ancestor_early(rcu_dereference(c->snapshots), id, ancestor); rcu_read_unlock(); - return id == ancestor; + return ret; } static inline u32 get_ancestor_below(struct snapshot_table *t, u32 id, u32 ancestor) @@ -120,13 +122,15 @@ static inline u32 get_ancestor_below(struct snapshot_table *t, u32 id, u32 ances bool __bch2_snapshot_is_ancestor(struct bch_fs *c, u32 id, u32 ancestor) { - struct snapshot_table *t; bool ret; - EBUG_ON(c->recovery_pass_done <= BCH_RECOVERY_PASS_check_snapshots); - rcu_read_lock(); - t = rcu_dereference(c->snapshots); + struct snapshot_table *t = rcu_dereference(c->snapshots); + + if (unlikely(c->recovery_pass_done <= BCH_RECOVERY_PASS_check_snapshots)) { + ret = __bch2_snapshot_is_ancestor_early(t, id, ancestor); + goto out; + } while (id && id < ancestor - IS_ANCESTOR_BITMAP) id = get_ancestor_below(t, id, ancestor); @@ -134,11 +138,11 @@ bool __bch2_snapshot_is_ancestor(struct bch_fs *c, u32 id, u32 ancestor) if (id && id < ancestor) { ret = test_bit(ancestor - id - 1, __snapshot_t(t, id)->is_ancestor); - EBUG_ON(ret != bch2_snapshot_is_ancestor_early(c, id, ancestor)); + EBUG_ON(ret != __bch2_snapshot_is_ancestor_early(t, id, ancestor)); } else { ret = id == ancestor; } - +out: rcu_read_unlock(); return ret; @@ -547,7 +551,7 @@ static int check_snapshot_tree(struct btree_trans *trans, "snapshot tree points to missing subvolume:\n %s", (printbuf_reset(&buf), bch2_bkey_val_to_text(&buf, c, st.s_c), buf.buf)) || - fsck_err_on(!bch2_snapshot_is_ancestor_early(c, + fsck_err_on(!bch2_snapshot_is_ancestor(c, le32_to_cpu(subvol.snapshot), root_id), c, snapshot_tree_to_wrong_subvol, diff --git a/fs/bcachefs/super-io.c b/fs/bcachefs/super-io.c index bceac29f3d86..ad28e370b640 100644 --- a/fs/bcachefs/super-io.c +++ b/fs/bcachefs/super-io.c @@ -985,7 +985,7 @@ int bch2_write_super(struct bch_fs *c) prt_str(&buf, " > "); bch2_version_to_text(&buf, bcachefs_metadata_version_current); prt_str(&buf, ")"); - bch2_fs_fatal_error(c, "%s", buf.buf); + bch2_fs_fatal_error(c, ": %s", buf.buf); printbuf_exit(&buf); return -BCH_ERR_sb_not_downgraded; } @@ -1005,7 +1005,7 @@ int bch2_write_super(struct bch_fs *c) if (le64_to_cpu(ca->sb_read_scratch->seq) < ca->disk_sb.seq) { bch2_fs_fatal_error(c, - "Superblock write was silently dropped! (seq %llu expected %llu)", + ": Superblock write was silently dropped! (seq %llu expected %llu)", le64_to_cpu(ca->sb_read_scratch->seq), ca->disk_sb.seq); percpu_ref_put(&ca->io_ref); @@ -1015,7 +1015,7 @@ int bch2_write_super(struct bch_fs *c) if (le64_to_cpu(ca->sb_read_scratch->seq) > ca->disk_sb.seq) { bch2_fs_fatal_error(c, - "Superblock modified by another process (seq %llu expected %llu)", + ": Superblock modified by another process (seq %llu expected %llu)", le64_to_cpu(ca->sb_read_scratch->seq), ca->disk_sb.seq); percpu_ref_put(&ca->io_ref); @@ -1066,7 +1066,7 @@ int bch2_write_super(struct bch_fs *c) !can_mount_with_written || (can_mount_without_written && !can_mount_with_written), c, - "Unable to write superblock to sufficient devices (from %ps)", + ": Unable to write superblock to sufficient devices (from %ps)", (void *) _RET_IP_)) ret = -1; out: diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c index 233f864ed8b0..1ad6e5cd9476 100644 --- a/fs/bcachefs/super.c +++ b/fs/bcachefs/super.c @@ -87,20 +87,28 @@ const char * const bch2_fs_flag_strs[] = { NULL }; -void bch2_print_opts(struct bch_opts *opts, const char *fmt, ...) +__printf(2, 0) +static void bch2_print_maybe_redirect(struct stdio_redirect *stdio, const char *fmt, va_list args) { - struct stdio_redirect *stdio = (void *)(unsigned long)opts->stdio; - - va_list args; - va_start(args, fmt); - if (likely(!stdio)) { - vprintk(fmt, args); - } else { +#ifdef __KERNEL__ + if (unlikely(stdio)) { if (fmt[0] == KERN_SOH[0]) fmt += 2; bch2_stdio_redirect_vprintf(stdio, true, fmt, args); + return; } +#endif + vprintk(fmt, args); +} + +void bch2_print_opts(struct bch_opts *opts, const char *fmt, ...) +{ + struct stdio_redirect *stdio = (void *)(unsigned long)opts->stdio; + + va_list args; + va_start(args, fmt); + bch2_print_maybe_redirect(stdio, fmt, args); va_end(args); } @@ -110,14 +118,7 @@ void __bch2_print(struct bch_fs *c, const char *fmt, ...) va_list args; va_start(args, fmt); - if (likely(!stdio)) { - vprintk(fmt, args); - } else { - if (fmt[0] == KERN_SOH[0]) - fmt += 2; - - bch2_stdio_redirect_vprintf(stdio, true, fmt, args); - } + bch2_print_maybe_redirect(stdio, fmt, args); va_end(args); } diff --git a/fs/bcachefs/util.h b/fs/bcachefs/util.h index 7ffbddb80400..175aee3074c7 100644 --- a/fs/bcachefs/util.h +++ b/fs/bcachefs/util.h @@ -683,6 +683,9 @@ static inline void __move_gap(void *array, size_t element_size, /* Move the gap in a gap buffer: */ #define move_gap(_d, _new_gap) \ do { \ + BUG_ON(_new_gap > (_d)->nr); \ + BUG_ON((_d)->gap > (_d)->nr); \ + \ __move_gap((_d)->data, sizeof((_d)->data[0]), \ (_d)->nr, (_d)->size, (_d)->gap, _new_gap); \ (_d)->gap = _new_gap; \ |