diff options
author | Kent Overstreet <kent.overstreet@gmail.com> | 2019-02-19 13:41:36 -0500 |
---|---|---|
committer | Kent Overstreet <kent.overstreet@linux.dev> | 2023-10-22 17:08:17 -0400 |
commit | 68ef94a63caf214ee238434bf0d4c7a6a32c33a2 (patch) | |
tree | 903e1c65cc599f98f6417e0ca845247b850b9148 /fs | |
parent | 9ace606e93e9c6dff919ca8f35d461e8462590b7 (diff) |
bcachefs: Add a pre-reserve mechanism for the journal
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
Diffstat (limited to 'fs')
-rw-r--r-- | fs/bcachefs/journal.c | 52 | ||||
-rw-r--r-- | fs/bcachefs/journal.h | 89 | ||||
-rw-r--r-- | fs/bcachefs/journal_io.c | 6 | ||||
-rw-r--r-- | fs/bcachefs/journal_reclaim.c | 43 | ||||
-rw-r--r-- | fs/bcachefs/journal_types.h | 27 |
5 files changed, 208 insertions, 9 deletions
diff --git a/fs/bcachefs/journal.c b/fs/bcachefs/journal.c index ba6adf11ef42..0aae8fd74c8a 100644 --- a/fs/bcachefs/journal.c +++ b/fs/bcachefs/journal.c @@ -343,6 +343,16 @@ retry: return 0; } + if (!(flags & JOURNAL_RES_GET_RESERVED) && + !test_bit(JOURNAL_MAY_GET_UNRESERVED, &j->flags)) { + /* + * Don't want to close current journal entry, just need to + * invoke reclaim: + */ + ret = -ENOSPC; + goto unlock; + } + /* * If we couldn't get a reservation because the current buf filled up, * and we had room for a bigger entry on disk, signal that we want to @@ -366,7 +376,7 @@ retry: } else { ret = journal_entry_open(j); } - +unlock: if ((ret == -EAGAIN || ret == -ENOSPC) && !j->res_get_blocked_start) j->res_get_blocked_start = local_clock() ?: 1; @@ -378,6 +388,8 @@ retry: goto retry; if (ret == -ENOSPC) { + BUG_ON(!can_discard && (flags & JOURNAL_RES_GET_RESERVED)); + /* * Journal is full - can't rely on reclaim from work item due to * freezing: @@ -423,6 +435,32 @@ int bch2_journal_res_get_slowpath(struct journal *j, struct journal_res *res, return ret; } +/* journal_preres: */ + +static bool journal_preres_available(struct journal *j, + struct journal_preres *res, + unsigned new_u64s) +{ + bool ret = bch2_journal_preres_get_fast(j, res, new_u64s); + + if (!ret) + bch2_journal_reclaim_work(&j->reclaim_work.work); + + return ret; +} + +int __bch2_journal_preres_get(struct journal *j, + struct journal_preres *res, + unsigned new_u64s) +{ + int ret; + + closure_wait_event(&j->preres_wait, + (ret = bch2_journal_error(j)) || + journal_preres_available(j, res, new_u64s)); + return ret; +} + /* journal_entry_res: */ void bch2_journal_entry_res_resize(struct journal *j, @@ -1110,11 +1148,16 @@ ssize_t bch2_journal_print_debug(struct journal *j, char *buf) "seq:\t\t\t%llu\n" "last_seq:\t\t%llu\n" "last_seq_ondisk:\t%llu\n" + "prereserved:\t\t%u/%u\n" + "current entry sectors:\t%u\n" "current entry:\t\t", fifo_used(&j->pin), journal_cur_seq(j), journal_last_seq(j), - j->last_seq_ondisk); + j->last_seq_ondisk, + j->prereserved.reserved, + j->prereserved.remaining, + j->cur_entry_sectors); switch (s.cur_entry_offset) { case JOURNAL_ENTRY_ERROR_VAL: @@ -1136,8 +1179,9 @@ ssize_t bch2_journal_print_debug(struct journal *j, char *buf) journal_state_count(s, s.idx)); if (s.prev_buf_unwritten) - pr_buf(&out, "yes, ref %u\n", - journal_state_count(s, !s.idx)); + pr_buf(&out, "yes, ref %u sectors %u\n", + journal_state_count(s, !s.idx), + journal_prev_buf(j)->sectors); else pr_buf(&out, "no\n"); diff --git a/fs/bcachefs/journal.h b/fs/bcachefs/journal.h index 77d59fb0b151..809cf25f5a03 100644 --- a/fs/bcachefs/journal.h +++ b/fs/bcachefs/journal.h @@ -119,6 +119,7 @@ static inline void journal_wake(struct journal *j) { wake_up(&j->wait); closure_wake_up(&j->async_wait); + closure_wake_up(&j->preres_wait); } static inline struct journal_buf *journal_cur_buf(struct journal *j) @@ -274,6 +275,7 @@ int bch2_journal_res_get_slowpath(struct journal *, struct journal_res *, #define JOURNAL_RES_GET_NONBLOCK (1 << 0) #define JOURNAL_RES_GET_CHECK (1 << 1) +#define JOURNAL_RES_GET_RESERVED (1 << 2) static inline int journal_res_get_fast(struct journal *j, struct journal_res *res, @@ -294,6 +296,10 @@ static inline int journal_res_get_fast(struct journal *j, EBUG_ON(!journal_state_count(new, new.idx)); + if (!(flags & JOURNAL_RES_GET_RESERVED) && + !test_bit(JOURNAL_MAY_GET_UNRESERVED, &j->flags)) + return 0; + if (flags & JOURNAL_RES_GET_CHECK) return 1; @@ -333,6 +339,89 @@ out: return 0; } +/* journal_preres: */ + +static inline bool journal_check_may_get_unreserved(struct journal *j) +{ + union journal_preres_state s = READ_ONCE(j->prereserved); + bool ret = s.reserved <= s.remaining && + fifo_free(&j->pin) > 8; + + lockdep_assert_held(&j->lock); + + if (ret != test_bit(JOURNAL_MAY_GET_UNRESERVED, &j->flags)) { + if (ret) { + set_bit(JOURNAL_MAY_GET_UNRESERVED, &j->flags); + journal_wake(j); + } else { + clear_bit(JOURNAL_MAY_GET_UNRESERVED, &j->flags); + } + } + return ret; +} + +static inline void bch2_journal_preres_put(struct journal *j, + struct journal_preres *res) +{ + union journal_preres_state s = { .reserved = res->u64s }; + + if (!res->u64s) + return; + + s.v = atomic64_sub_return(s.v, &j->prereserved.counter); + res->u64s = 0; + closure_wake_up(&j->preres_wait); + + if (s.reserved <= s.remaining && + !test_bit(JOURNAL_MAY_GET_UNRESERVED, &j->flags)) { + spin_lock(&j->lock); + journal_check_may_get_unreserved(j); + spin_unlock(&j->lock); + } +} + +int __bch2_journal_preres_get(struct journal *, + struct journal_preres *, unsigned); + +static inline int bch2_journal_preres_get_fast(struct journal *j, + struct journal_preres *res, + unsigned new_u64s) +{ + int d = new_u64s - res->u64s; + union journal_preres_state old, new; + u64 v = atomic64_read(&j->prereserved.counter); + + do { + old.v = new.v = v; + + new.reserved += d; + + if (new.reserved > new.remaining) + return 0; + } while ((v = atomic64_cmpxchg(&j->prereserved.counter, + old.v, new.v)) != old.v); + + res->u64s += d; + return 1; +} + +static inline int bch2_journal_preres_get(struct journal *j, + struct journal_preres *res, + unsigned new_u64s, + unsigned flags) +{ + if (new_u64s <= res->u64s) + return 0; + + if (bch2_journal_preres_get_fast(j, res, new_u64s)) + return 0; + + if (flags & JOURNAL_RES_GET_NONBLOCK) + return -EAGAIN; + + return __bch2_journal_preres_get(j, res, new_u64s); +} + /* journal_entry_res: */ void bch2_journal_entry_res_resize(struct journal *, diff --git a/fs/bcachefs/journal_io.c b/fs/bcachefs/journal_io.c index 07cfbb975c37..db95257cec11 100644 --- a/fs/bcachefs/journal_io.c +++ b/fs/bcachefs/journal_io.c @@ -974,6 +974,12 @@ static int journal_write_alloc(struct journal *j, struct journal_buf *w, journal_space_discarded)) { ja->cur_idx = (ja->cur_idx + 1) % ja->nr; ja->sectors_free = ca->mi.bucket_size; + + /* + * ja->bucket_seq[ja->cur_idx] must always have + * something sensible: + */ + ja->bucket_seq[ja->cur_idx] = le64_to_cpu(w->data->seq); } } diff --git a/fs/bcachefs/journal_reclaim.c b/fs/bcachefs/journal_reclaim.c index a3c53b78ad10..053fa4aa4f5f 100644 --- a/fs/bcachefs/journal_reclaim.c +++ b/fs/bcachefs/journal_reclaim.c @@ -49,6 +49,18 @@ unsigned bch2_journal_dev_buckets_available(struct journal *j, return available; } +static void journal_set_remaining(struct journal *j, unsigned u64s_remaining) +{ + union journal_preres_state old, new; + u64 v = atomic64_read(&j->prereserved.counter); + + do { + old.v = new.v = v; + new.remaining = u64s_remaining; + } while ((v = atomic64_cmpxchg(&j->prereserved.counter, + old.v, new.v)) != old.v); +} + static struct journal_space { unsigned next_entry; unsigned remaining; @@ -124,8 +136,9 @@ void bch2_journal_space_available(struct journal *j) struct bch_fs *c = container_of(j, struct bch_fs, journal); struct bch_dev *ca; struct journal_space discarded, clean_ondisk, clean; - unsigned max_entry_size = min(j->buf[0].buf_size >> 9, - j->buf[1].buf_size >> 9); + unsigned overhead, u64s_remaining = 0; + unsigned max_entry_size = min(j->buf[0].buf_size >> 9, + j->buf[1].buf_size >> 9); unsigned i, nr_online = 0, nr_devs_want; bool can_discard = false; int ret = 0; @@ -176,9 +189,17 @@ void bch2_journal_space_available(struct journal *j) if (!discarded.next_entry) ret = -ENOSPC; + + overhead = DIV_ROUND_UP(clean.remaining, max_entry_size) * + journal_entry_overhead(j); + u64s_remaining = clean.remaining << 6; + u64s_remaining = max_t(int, 0, u64s_remaining - overhead); + u64s_remaining /= 4; out: j->cur_entry_sectors = !ret ? discarded.next_entry : 0; j->cur_entry_error = ret; + journal_set_remaining(j, u64s_remaining); + journal_check_may_get_unreserved(j); if (!ret) journal_wake(j); @@ -454,7 +475,7 @@ void bch2_journal_reclaim(struct journal *j) { struct bch_fs *c = container_of(j, struct bch_fs, journal); struct bch_dev *ca; - unsigned iter, bucket_to_flush, min_nr = 0; + unsigned iter, min_nr = 0; u64 seq_to_flush = 0; lockdep_assert_held(&j->reclaim_lock); @@ -465,13 +486,22 @@ void bch2_journal_reclaim(struct journal *j) for_each_rw_member(ca, c, iter) { struct journal_device *ja = &ca->journal; + unsigned nr_buckets, bucket_to_flush; if (!ja->nr) continue; - /* Try to keep the journal at most half full: */ - bucket_to_flush = (ja->cur_idx + (ja->nr >> 1)) % ja->nr; + nr_buckets = ja->nr / 2; + + /* And include pre-reservations: */ + nr_buckets += DIV_ROUND_UP(j->prereserved.reserved, + (ca->mi.bucket_size << 6) - + journal_entry_overhead(j)); + + nr_buckets = min(nr_buckets, ja->nr); + + bucket_to_flush = (ja->cur_idx + nr_buckets) % ja->nr; seq_to_flush = max_t(u64, seq_to_flush, ja->bucket_seq[bucket_to_flush]); } @@ -490,6 +520,9 @@ void bch2_journal_reclaim(struct journal *j) msecs_to_jiffies(j->reclaim_delay_ms))) min_nr = 1; + if (j->prereserved.reserved * 2 > j->prereserved.remaining) + min_nr = 1; + journal_flush_pins(j, seq_to_flush, min_nr); if (!test_bit(BCH_FS_RO, &c->flags)) diff --git a/fs/bcachefs/journal_types.h b/fs/bcachefs/journal_types.h index c91a21e07809..85bf5e2706f7 100644 --- a/fs/bcachefs/journal_types.h +++ b/fs/bcachefs/journal_types.h @@ -80,6 +80,14 @@ struct journal_res { u64 seq; }; +/* + * For reserving space in the journal prior to getting a reservation on a + * particular journal entry: + */ +struct journal_preres { + unsigned u64s; +}; + union journal_res_state { struct { atomic64_t counter; @@ -98,6 +106,21 @@ union journal_res_state { }; }; +union journal_preres_state { + struct { + atomic64_t counter; + }; + + struct { + u64 v; + }; + + struct { + u32 reserved; + u32 remaining; + }; +}; + /* bytes: */ #define JOURNAL_ENTRY_SIZE_MIN (64U << 10) /* 64k */ #define JOURNAL_ENTRY_SIZE_MAX (4U << 20) /* 4M */ @@ -122,6 +145,7 @@ enum { JOURNAL_STARTED, JOURNAL_NEED_WRITE, JOURNAL_NOT_EMPTY, + JOURNAL_MAY_GET_UNRESERVED, }; /* Embedded in struct bch_fs */ @@ -142,6 +166,8 @@ struct journal { */ int cur_entry_error; + union journal_preres_state prereserved; + /* Reserved space in journal entry to be used just prior to write */ unsigned entry_u64s_reserved; @@ -161,6 +187,7 @@ struct journal { /* Used when waiting because the journal was full */ wait_queue_head_t wait; struct closure_waitlist async_wait; + struct closure_waitlist preres_wait; struct closure io; struct delayed_work write_work; |