diff options
author | Kent Overstreet <kent.overstreet@linux.dev> | 2023-03-02 01:54:17 -0500 |
---|---|---|
committer | Kent Overstreet <kent.overstreet@linux.dev> | 2023-10-22 17:09:55 -0400 |
commit | e84face6f0c9512d896eb1bf6c8238ea2fa7edd0 (patch) | |
tree | 802f0dde21c3311cf7a0245f379e0f1d2450bc29 /fs | |
parent | d57c9add59b187a6fcd76cb80d60f36234ca8033 (diff) |
bcachefs: RESERVE_stripe
Rework stripe creation path - new algorithm for deciding when to create
new stripes or reuse existing stripes.
We add a new allocation watermark, RESERVE_stripe, above RESERVE_none.
Then we always try to create a new stripe by doing RESERVE_stripe
allocations; if this fails, we reuse an existing stripe and allocate
buckets for it with the reserve watermark for the given write
(RESERVE_none or RESERVE_movinggc).
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
Diffstat (limited to 'fs')
-rw-r--r-- | fs/bcachefs/alloc_background.h | 2 | ||||
-rw-r--r-- | fs/bcachefs/alloc_types.h | 3 | ||||
-rw-r--r-- | fs/bcachefs/buckets.h | 3 | ||||
-rw-r--r-- | fs/bcachefs/ec.c | 73 | ||||
-rw-r--r-- | fs/bcachefs/errcode.h | 1 |
5 files changed, 60 insertions, 22 deletions
diff --git a/fs/bcachefs/alloc_background.h b/fs/bcachefs/alloc_background.h index c9ff590ef978..324798396fc6 100644 --- a/fs/bcachefs/alloc_background.h +++ b/fs/bcachefs/alloc_background.h @@ -216,7 +216,7 @@ static inline u64 should_invalidate_buckets(struct bch_dev *ca, u64 free = max_t(s64, 0, u.d[BCH_DATA_free].buckets + u.d[BCH_DATA_need_discard].buckets - - bch2_dev_buckets_reserved(ca, RESERVE_none)); + - bch2_dev_buckets_reserved(ca, RESERVE_stripe)); return clamp_t(s64, want_free - free, 0, u.d[BCH_DATA_cached].buckets); } diff --git a/fs/bcachefs/alloc_types.h b/fs/bcachefs/alloc_types.h index c8a45ea9d661..4d09bd20d8ec 100644 --- a/fs/bcachefs/alloc_types.h +++ b/fs/bcachefs/alloc_types.h @@ -22,7 +22,8 @@ struct ec_bucket_buf; x(btree_movinggc) \ x(btree) \ x(movinggc) \ - x(none) + x(none) \ + x(stripe) enum alloc_reserve { #define x(name) RESERVE_##name, diff --git a/fs/bcachefs/buckets.h b/fs/bcachefs/buckets.h index 22721bfea414..d677b0225c52 100644 --- a/fs/bcachefs/buckets.h +++ b/fs/bcachefs/buckets.h @@ -157,6 +157,9 @@ static inline u64 bch2_dev_buckets_reserved(struct bch_dev *ca, enum alloc_reser switch (reserve) { case RESERVE_NR: unreachable(); + case RESERVE_stripe: + reserved += ca->mi.nbuckets >> 6; + fallthrough; case RESERVE_none: reserved += ca->mi.nbuckets >> 6; fallthrough; diff --git a/fs/bcachefs/ec.c b/fs/bcachefs/ec.c index d206da686da8..6bf14f975d93 100644 --- a/fs/bcachefs/ec.c +++ b/fs/bcachefs/ec.c @@ -1569,6 +1569,17 @@ static int __bch2_ec_stripe_head_reuse(struct btree_trans *trans, struct ec_stri BUG_ON(h->s->existing_stripe.size != h->blocksize); BUG_ON(h->s->existing_stripe.size != h->s->existing_stripe.key.v.sectors); + /* + * Free buckets we initially allocated - they might conflict with + * blocks from the stripe we're reusing: + */ + for_each_set_bit(i, h->s->blocks_gotten, h->s->new_stripe.key.v.nr_blocks) { + bch2_open_bucket_put(c, c->open_buckets + h->s->blocks[i]); + h->s->blocks[i] = 0; + } + memset(h->s->blocks_gotten, 0, sizeof(h->s->blocks_gotten)); + memset(h->s->blocks_allocated, 0, sizeof(h->s->blocks_allocated)); + for (i = 0; i < h->s->existing_stripe.key.v.nr_blocks; i++) { if (stripe_blockcount_get(&h->s->existing_stripe.key.v, i)) { __set_bit(i, h->s->blocks_gotten); @@ -1649,8 +1660,8 @@ struct ec_stripe_head *bch2_ec_stripe_head_get(struct btree_trans *trans, { struct bch_fs *c = trans->c; struct ec_stripe_head *h; + bool waiting = false; int ret; - bool needs_stripe_new; h = __bch2_ec_stripe_head_get(trans, target, algo, redundancy, reserve); if (!h) @@ -1658,8 +1669,7 @@ struct ec_stripe_head *bch2_ec_stripe_head_get(struct btree_trans *trans, if (IS_ERR_OR_NULL(h)) return h; - needs_stripe_new = !h->s; - if (needs_stripe_new) { + if (!h->s) { if (ec_new_stripe_alloc(c, h)) { ret = -ENOMEM; bch_err(c, "failed to allocate new stripe"); @@ -1670,30 +1680,53 @@ struct ec_stripe_head *bch2_ec_stripe_head_get(struct btree_trans *trans, BUG(); } - /* - * Try reserve a new stripe before reusing an - * existing stripe. This will prevent unnecessary - * read amplification during write oriented workloads. - */ - ret = 0; - if (!h->s->allocated && !h->s->res.sectors && !h->s->have_existing_stripe) - ret = __bch2_ec_stripe_head_reserve(trans, h); - if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) - goto err; + if (h->s->allocated) + goto allocated; - if (ret && needs_stripe_new) - ret = __bch2_ec_stripe_head_reuse(trans, h); - if (ret) + if (h->s->have_existing_stripe) + goto alloc_existing; + + /* First, try to allocate a full stripe: */ + ret = new_stripe_alloc_buckets(trans, h, RESERVE_stripe, NULL) ?: + __bch2_ec_stripe_head_reserve(trans, h); + if (!ret) + goto allocated; + if (bch2_err_matches(ret, BCH_ERR_transaction_restart) || + bch2_err_matches(ret, ENOMEM)) goto err; - if (!h->s->allocated) { - ret = new_stripe_alloc_buckets(trans, h, reserve, cl); - if (ret) + /* + * Not enough buckets available for a full stripe: we must reuse an + * existing stripe: + */ + while (1) { + ret = __bch2_ec_stripe_head_reuse(trans, h); + if (!ret) + break; + if (ret == -BCH_ERR_ENOSPC_stripe_reuse && cl) + ret = -BCH_ERR_stripe_alloc_blocked; + if (waiting || !cl || ret != -BCH_ERR_stripe_alloc_blocked) goto err; - h->s->allocated = true; + /* XXX freelist_wait? */ + closure_wait(&c->freelist_wait, cl); + waiting = true; } + if (waiting) + closure_wake_up(&c->freelist_wait); +alloc_existing: + /* + * Retry allocating buckets, with the reserve watermark for this + * particular write: + */ + ret = new_stripe_alloc_buckets(trans, h, reserve, cl); + if (ret) + goto err; +allocated: + h->s->allocated = true; + BUG_ON(!h->s->idx); + BUG_ON(trans->restarted); return h; err: diff --git a/fs/bcachefs/errcode.h b/fs/bcachefs/errcode.h index 6129af6129c3..283303db7dfd 100644 --- a/fs/bcachefs/errcode.h +++ b/fs/bcachefs/errcode.h @@ -93,6 +93,7 @@ x(BCH_ERR_operation_blocked, journal_res_get_blocked) \ x(BCH_ERR_operation_blocked, journal_preres_get_blocked) \ x(BCH_ERR_operation_blocked, bucket_alloc_blocked) \ + x(BCH_ERR_operation_blocked, stripe_alloc_blocked) \ x(BCH_ERR_invalid, invalid_sb) \ x(BCH_ERR_invalid_sb, invalid_sb_magic) \ x(BCH_ERR_invalid_sb, invalid_sb_version) \ |