summaryrefslogtreecommitdiff
path: root/fs/bcachefs/btree_write_buffer.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/bcachefs/btree_write_buffer.c')
-rw-r--r--fs/bcachefs/btree_write_buffer.c140
1 files changed, 128 insertions, 12 deletions
diff --git a/fs/bcachefs/btree_write_buffer.c b/fs/bcachefs/btree_write_buffer.c
index d0e92d948002..3f56b584f8ec 100644
--- a/fs/bcachefs/btree_write_buffer.c
+++ b/fs/bcachefs/btree_write_buffer.c
@@ -6,6 +6,7 @@
#include "btree_update.h"
#include "btree_update_interior.h"
#include "btree_write_buffer.h"
+#include "disk_accounting.h"
#include "error.h"
#include "extents.h"
#include "journal.h"
@@ -134,7 +135,9 @@ static noinline int wb_flush_one_slowpath(struct btree_trans *trans,
static inline int wb_flush_one(struct btree_trans *trans, struct btree_iter *iter,
struct btree_write_buffered_key *wb,
- bool *write_locked, size_t *fast)
+ bool *write_locked,
+ bool *accounting_accumulated,
+ size_t *fast)
{
struct btree_path *path;
int ret;
@@ -147,6 +150,16 @@ static inline int wb_flush_one(struct btree_trans *trans, struct btree_iter *ite
if (ret)
return ret;
+ if (!*accounting_accumulated && wb->k.k.type == KEY_TYPE_accounting) {
+ struct bkey u;
+ struct bkey_s_c k = bch2_btree_path_peek_slot_exact(btree_iter_path(trans, iter), &u);
+
+ if (k.k->type == KEY_TYPE_accounting)
+ bch2_accounting_accumulate(bkey_i_to_accounting(&wb->k),
+ bkey_s_c_to_accounting(k));
+ }
+ *accounting_accumulated = true;
+
/*
* We can't clone a path that has write locks: unshare it now, before
* set_pos and traverse():
@@ -259,8 +272,9 @@ static int bch2_btree_write_buffer_flush_locked(struct btree_trans *trans)
struct journal *j = &c->journal;
struct btree_write_buffer *wb = &c->btree_write_buffer;
struct btree_iter iter = { NULL };
- size_t skipped = 0, fast = 0, slowpath = 0;
+ size_t overwritten = 0, fast = 0, slowpath = 0, could_not_insert = 0;
bool write_locked = false;
+ bool accounting_replay_done = test_bit(BCH_FS_accounting_replay_done, &c->flags);
int ret = 0;
bch2_trans_unlock(trans);
@@ -301,11 +315,22 @@ static int bch2_btree_write_buffer_flush_locked(struct btree_trans *trans)
BUG_ON(!k->journal_seq);
+ if (!accounting_replay_done &&
+ k->k.k.type == KEY_TYPE_accounting) {
+ slowpath++;
+ continue;
+ }
+
if (i + 1 < &darray_top(wb->sorted) &&
wb_key_eq(i, i + 1)) {
struct btree_write_buffered_key *n = &wb->flushing.keys.data[i[1].idx];
- skipped++;
+ if (k->k.k.type == KEY_TYPE_accounting &&
+ n->k.k.type == KEY_TYPE_accounting)
+ bch2_accounting_accumulate(bkey_i_to_accounting(&n->k),
+ bkey_i_to_s_c_accounting(&k->k));
+
+ overwritten++;
n->journal_seq = min_t(u64, n->journal_seq, k->journal_seq);
k->journal_seq = 0;
continue;
@@ -340,13 +365,15 @@ static int bch2_btree_write_buffer_flush_locked(struct btree_trans *trans)
bch2_btree_iter_set_pos(&iter, k->k.k.p);
btree_iter_path(trans, &iter)->preserve = false;
+ bool accounting_accumulated = false;
do {
if (race_fault()) {
ret = -BCH_ERR_journal_reclaim_would_deadlock;
break;
}
- ret = wb_flush_one(trans, &iter, k, &write_locked, &fast);
+ ret = wb_flush_one(trans, &iter, k, &write_locked,
+ &accounting_accumulated, &fast);
if (!write_locked)
bch2_trans_begin(trans);
} while (bch2_err_matches(ret, BCH_ERR_transaction_restart));
@@ -387,8 +414,15 @@ static int bch2_btree_write_buffer_flush_locked(struct btree_trans *trans)
if (!i->journal_seq)
continue;
- bch2_journal_pin_update(j, i->journal_seq, &wb->flushing.pin,
- bch2_btree_write_buffer_journal_flush);
+ if (!accounting_replay_done &&
+ i->k.k.type == KEY_TYPE_accounting) {
+ could_not_insert++;
+ continue;
+ }
+
+ if (!could_not_insert)
+ bch2_journal_pin_update(j, i->journal_seq, &wb->flushing.pin,
+ bch2_btree_write_buffer_journal_flush);
bch2_trans_begin(trans);
@@ -401,13 +435,45 @@ static int bch2_btree_write_buffer_flush_locked(struct btree_trans *trans)
btree_write_buffered_insert(trans, i));
if (ret)
goto err;
+
+ i->journal_seq = 0;
+ }
+
+ /*
+ * If journal replay hasn't finished with accounting keys we
+ * can't flush accounting keys at all - condense them and leave
+ * them for next time.
+ *
+ * Q: Can the write buffer overflow?
+ * A Shouldn't be any actual risk. It's just new accounting
+ * updates that the write buffer can't flush, and those are only
+ * going to be generated by interior btree node updates as
+ * journal replay has to split/rewrite nodes to make room for
+ * its updates.
+ *
+ * And for those new acounting updates, updates to the same
+ * counters get accumulated as they're flushed from the journal
+ * to the write buffer - see the patch for eytzingcer tree
+ * accumulated. So we could only overflow if the number of
+ * distinct counters touched somehow was very large.
+ */
+ if (could_not_insert) {
+ struct btree_write_buffered_key *dst = wb->flushing.keys.data;
+
+ darray_for_each(wb->flushing.keys, i)
+ if (i->journal_seq)
+ *dst++ = *i;
+ wb->flushing.keys.nr = dst - wb->flushing.keys.data;
}
}
err:
+ if (ret || !could_not_insert) {
+ bch2_journal_pin_drop(j, &wb->flushing.pin);
+ wb->flushing.keys.nr = 0;
+ }
+
bch2_fs_fatal_err_on(ret, c, "%s", bch2_err_str(ret));
- trace_write_buffer_flush(trans, wb->flushing.keys.nr, skipped, fast, 0);
- bch2_journal_pin_drop(j, &wb->flushing.pin);
- wb->flushing.keys.nr = 0;
+ trace_write_buffer_flush(trans, wb->flushing.keys.nr, overwritten, fast, 0);
return ret;
}
@@ -494,7 +560,7 @@ int bch2_btree_write_buffer_tryflush(struct btree_trans *trans)
return ret;
}
-/**
+/*
* In check and repair code, when checking references to write buffer btrees we
* need to issue a flush before we have a definitive error: this issues a flush
* if this is a key we haven't yet checked.
@@ -544,6 +610,29 @@ static void bch2_btree_write_buffer_flush_work(struct work_struct *work)
bch2_write_ref_put(c, BCH_WRITE_REF_btree_write_buffer);
}
+static void wb_accounting_sort(struct btree_write_buffer *wb)
+{
+ eytzinger0_sort(wb->accounting.data, wb->accounting.nr,
+ sizeof(wb->accounting.data[0]),
+ wb_key_cmp, NULL);
+}
+
+int bch2_accounting_key_to_wb_slowpath(struct bch_fs *c, enum btree_id btree,
+ struct bkey_i_accounting *k)
+{
+ struct btree_write_buffer *wb = &c->btree_write_buffer;
+ struct btree_write_buffered_key new = { .btree = btree };
+
+ bkey_copy(&new.k, &k->k_i);
+
+ int ret = darray_push(&wb->accounting, new);
+ if (ret)
+ return ret;
+
+ wb_accounting_sort(wb);
+ return 0;
+}
+
int bch2_journal_key_to_wb_slowpath(struct bch_fs *c,
struct journal_keys_to_wb *dst,
enum btree_id btree, struct bkey_i *k)
@@ -613,11 +702,35 @@ void bch2_journal_keys_to_write_buffer_start(struct bch_fs *c, struct journal_ke
bch2_journal_pin_add(&c->journal, seq, &dst->wb->pin,
bch2_btree_write_buffer_journal_flush);
+
+ darray_for_each(wb->accounting, i)
+ memset(&i->k.v, 0, bkey_val_bytes(&i->k.k));
}
-void bch2_journal_keys_to_write_buffer_end(struct bch_fs *c, struct journal_keys_to_wb *dst)
+int bch2_journal_keys_to_write_buffer_end(struct bch_fs *c, struct journal_keys_to_wb *dst)
{
struct btree_write_buffer *wb = &c->btree_write_buffer;
+ unsigned live_accounting_keys = 0;
+ int ret = 0;
+
+ darray_for_each(wb->accounting, i)
+ if (!bch2_accounting_key_is_zero(bkey_i_to_s_c_accounting(&i->k))) {
+ i->journal_seq = dst->seq;
+ live_accounting_keys++;
+ ret = __bch2_journal_key_to_wb(c, dst, i->btree, &i->k);
+ if (ret)
+ break;
+ }
+
+ if (live_accounting_keys * 2 < wb->accounting.nr) {
+ struct btree_write_buffered_key *dst = wb->accounting.data;
+
+ darray_for_each(wb->accounting, src)
+ if (!bch2_accounting_key_is_zero(bkey_i_to_s_c_accounting(&src->k)))
+ *dst++ = *src;
+ wb->accounting.nr = dst - wb->accounting.data;
+ wb_accounting_sort(wb);
+ }
if (!dst->wb->keys.nr)
bch2_journal_pin_drop(&c->journal, &dst->wb->pin);
@@ -630,6 +743,8 @@ void bch2_journal_keys_to_write_buffer_end(struct bch_fs *c, struct journal_keys
if (dst->wb == &wb->flushing)
mutex_unlock(&wb->flushing.lock);
mutex_unlock(&wb->inc.lock);
+
+ return ret;
}
static int bch2_journal_keys_to_write_buffer(struct bch_fs *c, struct journal_buf *buf)
@@ -653,7 +768,7 @@ static int bch2_journal_keys_to_write_buffer(struct bch_fs *c, struct journal_bu
buf->need_flush_to_write_buffer = false;
spin_unlock(&c->journal.lock);
out:
- bch2_journal_keys_to_write_buffer_end(c, &dst);
+ ret = bch2_journal_keys_to_write_buffer_end(c, &dst) ?: ret;
return ret;
}
@@ -685,6 +800,7 @@ void bch2_fs_btree_write_buffer_exit(struct bch_fs *c)
BUG_ON((wb->inc.keys.nr || wb->flushing.keys.nr) &&
!bch2_journal_error(&c->journal));
+ darray_exit(&wb->accounting);
darray_exit(&wb->sorted);
darray_exit(&wb->flushing.keys);
darray_exit(&wb->inc.keys);