diff options
Diffstat (limited to 'fs/bcachefs/btree_write_buffer.c')
-rw-r--r-- | fs/bcachefs/btree_write_buffer.c | 140 |
1 files changed, 128 insertions, 12 deletions
diff --git a/fs/bcachefs/btree_write_buffer.c b/fs/bcachefs/btree_write_buffer.c index d0e92d948002..3f56b584f8ec 100644 --- a/fs/bcachefs/btree_write_buffer.c +++ b/fs/bcachefs/btree_write_buffer.c @@ -6,6 +6,7 @@ #include "btree_update.h" #include "btree_update_interior.h" #include "btree_write_buffer.h" +#include "disk_accounting.h" #include "error.h" #include "extents.h" #include "journal.h" @@ -134,7 +135,9 @@ static noinline int wb_flush_one_slowpath(struct btree_trans *trans, static inline int wb_flush_one(struct btree_trans *trans, struct btree_iter *iter, struct btree_write_buffered_key *wb, - bool *write_locked, size_t *fast) + bool *write_locked, + bool *accounting_accumulated, + size_t *fast) { struct btree_path *path; int ret; @@ -147,6 +150,16 @@ static inline int wb_flush_one(struct btree_trans *trans, struct btree_iter *ite if (ret) return ret; + if (!*accounting_accumulated && wb->k.k.type == KEY_TYPE_accounting) { + struct bkey u; + struct bkey_s_c k = bch2_btree_path_peek_slot_exact(btree_iter_path(trans, iter), &u); + + if (k.k->type == KEY_TYPE_accounting) + bch2_accounting_accumulate(bkey_i_to_accounting(&wb->k), + bkey_s_c_to_accounting(k)); + } + *accounting_accumulated = true; + /* * We can't clone a path that has write locks: unshare it now, before * set_pos and traverse(): @@ -259,8 +272,9 @@ static int bch2_btree_write_buffer_flush_locked(struct btree_trans *trans) struct journal *j = &c->journal; struct btree_write_buffer *wb = &c->btree_write_buffer; struct btree_iter iter = { NULL }; - size_t skipped = 0, fast = 0, slowpath = 0; + size_t overwritten = 0, fast = 0, slowpath = 0, could_not_insert = 0; bool write_locked = false; + bool accounting_replay_done = test_bit(BCH_FS_accounting_replay_done, &c->flags); int ret = 0; bch2_trans_unlock(trans); @@ -301,11 +315,22 @@ static int bch2_btree_write_buffer_flush_locked(struct btree_trans *trans) BUG_ON(!k->journal_seq); + if (!accounting_replay_done && + k->k.k.type == KEY_TYPE_accounting) { + slowpath++; + continue; + } + if (i + 1 < &darray_top(wb->sorted) && wb_key_eq(i, i + 1)) { struct btree_write_buffered_key *n = &wb->flushing.keys.data[i[1].idx]; - skipped++; + if (k->k.k.type == KEY_TYPE_accounting && + n->k.k.type == KEY_TYPE_accounting) + bch2_accounting_accumulate(bkey_i_to_accounting(&n->k), + bkey_i_to_s_c_accounting(&k->k)); + + overwritten++; n->journal_seq = min_t(u64, n->journal_seq, k->journal_seq); k->journal_seq = 0; continue; @@ -340,13 +365,15 @@ static int bch2_btree_write_buffer_flush_locked(struct btree_trans *trans) bch2_btree_iter_set_pos(&iter, k->k.k.p); btree_iter_path(trans, &iter)->preserve = false; + bool accounting_accumulated = false; do { if (race_fault()) { ret = -BCH_ERR_journal_reclaim_would_deadlock; break; } - ret = wb_flush_one(trans, &iter, k, &write_locked, &fast); + ret = wb_flush_one(trans, &iter, k, &write_locked, + &accounting_accumulated, &fast); if (!write_locked) bch2_trans_begin(trans); } while (bch2_err_matches(ret, BCH_ERR_transaction_restart)); @@ -387,8 +414,15 @@ static int bch2_btree_write_buffer_flush_locked(struct btree_trans *trans) if (!i->journal_seq) continue; - bch2_journal_pin_update(j, i->journal_seq, &wb->flushing.pin, - bch2_btree_write_buffer_journal_flush); + if (!accounting_replay_done && + i->k.k.type == KEY_TYPE_accounting) { + could_not_insert++; + continue; + } + + if (!could_not_insert) + bch2_journal_pin_update(j, i->journal_seq, &wb->flushing.pin, + bch2_btree_write_buffer_journal_flush); bch2_trans_begin(trans); @@ -401,13 +435,45 @@ static int bch2_btree_write_buffer_flush_locked(struct btree_trans *trans) btree_write_buffered_insert(trans, i)); if (ret) goto err; + + i->journal_seq = 0; + } + + /* + * If journal replay hasn't finished with accounting keys we + * can't flush accounting keys at all - condense them and leave + * them for next time. + * + * Q: Can the write buffer overflow? + * A Shouldn't be any actual risk. It's just new accounting + * updates that the write buffer can't flush, and those are only + * going to be generated by interior btree node updates as + * journal replay has to split/rewrite nodes to make room for + * its updates. + * + * And for those new acounting updates, updates to the same + * counters get accumulated as they're flushed from the journal + * to the write buffer - see the patch for eytzingcer tree + * accumulated. So we could only overflow if the number of + * distinct counters touched somehow was very large. + */ + if (could_not_insert) { + struct btree_write_buffered_key *dst = wb->flushing.keys.data; + + darray_for_each(wb->flushing.keys, i) + if (i->journal_seq) + *dst++ = *i; + wb->flushing.keys.nr = dst - wb->flushing.keys.data; } } err: + if (ret || !could_not_insert) { + bch2_journal_pin_drop(j, &wb->flushing.pin); + wb->flushing.keys.nr = 0; + } + bch2_fs_fatal_err_on(ret, c, "%s", bch2_err_str(ret)); - trace_write_buffer_flush(trans, wb->flushing.keys.nr, skipped, fast, 0); - bch2_journal_pin_drop(j, &wb->flushing.pin); - wb->flushing.keys.nr = 0; + trace_write_buffer_flush(trans, wb->flushing.keys.nr, overwritten, fast, 0); return ret; } @@ -494,7 +560,7 @@ int bch2_btree_write_buffer_tryflush(struct btree_trans *trans) return ret; } -/** +/* * In check and repair code, when checking references to write buffer btrees we * need to issue a flush before we have a definitive error: this issues a flush * if this is a key we haven't yet checked. @@ -544,6 +610,29 @@ static void bch2_btree_write_buffer_flush_work(struct work_struct *work) bch2_write_ref_put(c, BCH_WRITE_REF_btree_write_buffer); } +static void wb_accounting_sort(struct btree_write_buffer *wb) +{ + eytzinger0_sort(wb->accounting.data, wb->accounting.nr, + sizeof(wb->accounting.data[0]), + wb_key_cmp, NULL); +} + +int bch2_accounting_key_to_wb_slowpath(struct bch_fs *c, enum btree_id btree, + struct bkey_i_accounting *k) +{ + struct btree_write_buffer *wb = &c->btree_write_buffer; + struct btree_write_buffered_key new = { .btree = btree }; + + bkey_copy(&new.k, &k->k_i); + + int ret = darray_push(&wb->accounting, new); + if (ret) + return ret; + + wb_accounting_sort(wb); + return 0; +} + int bch2_journal_key_to_wb_slowpath(struct bch_fs *c, struct journal_keys_to_wb *dst, enum btree_id btree, struct bkey_i *k) @@ -613,11 +702,35 @@ void bch2_journal_keys_to_write_buffer_start(struct bch_fs *c, struct journal_ke bch2_journal_pin_add(&c->journal, seq, &dst->wb->pin, bch2_btree_write_buffer_journal_flush); + + darray_for_each(wb->accounting, i) + memset(&i->k.v, 0, bkey_val_bytes(&i->k.k)); } -void bch2_journal_keys_to_write_buffer_end(struct bch_fs *c, struct journal_keys_to_wb *dst) +int bch2_journal_keys_to_write_buffer_end(struct bch_fs *c, struct journal_keys_to_wb *dst) { struct btree_write_buffer *wb = &c->btree_write_buffer; + unsigned live_accounting_keys = 0; + int ret = 0; + + darray_for_each(wb->accounting, i) + if (!bch2_accounting_key_is_zero(bkey_i_to_s_c_accounting(&i->k))) { + i->journal_seq = dst->seq; + live_accounting_keys++; + ret = __bch2_journal_key_to_wb(c, dst, i->btree, &i->k); + if (ret) + break; + } + + if (live_accounting_keys * 2 < wb->accounting.nr) { + struct btree_write_buffered_key *dst = wb->accounting.data; + + darray_for_each(wb->accounting, src) + if (!bch2_accounting_key_is_zero(bkey_i_to_s_c_accounting(&src->k))) + *dst++ = *src; + wb->accounting.nr = dst - wb->accounting.data; + wb_accounting_sort(wb); + } if (!dst->wb->keys.nr) bch2_journal_pin_drop(&c->journal, &dst->wb->pin); @@ -630,6 +743,8 @@ void bch2_journal_keys_to_write_buffer_end(struct bch_fs *c, struct journal_keys if (dst->wb == &wb->flushing) mutex_unlock(&wb->flushing.lock); mutex_unlock(&wb->inc.lock); + + return ret; } static int bch2_journal_keys_to_write_buffer(struct bch_fs *c, struct journal_buf *buf) @@ -653,7 +768,7 @@ static int bch2_journal_keys_to_write_buffer(struct bch_fs *c, struct journal_bu buf->need_flush_to_write_buffer = false; spin_unlock(&c->journal.lock); out: - bch2_journal_keys_to_write_buffer_end(c, &dst); + ret = bch2_journal_keys_to_write_buffer_end(c, &dst) ?: ret; return ret; } @@ -685,6 +800,7 @@ void bch2_fs_btree_write_buffer_exit(struct bch_fs *c) BUG_ON((wb->inc.keys.nr || wb->flushing.keys.nr) && !bch2_journal_error(&c->journal)); + darray_exit(&wb->accounting); darray_exit(&wb->sorted); darray_exit(&wb->flushing.keys); darray_exit(&wb->inc.keys); |