summaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
authorKent Overstreet <kent.overstreet@gmail.com>2019-02-19 13:41:36 -0500
committerKent Overstreet <kent.overstreet@linux.dev>2023-10-22 17:08:17 -0400
commit68ef94a63caf214ee238434bf0d4c7a6a32c33a2 (patch)
tree903e1c65cc599f98f6417e0ca845247b850b9148 /fs
parent9ace606e93e9c6dff919ca8f35d461e8462590b7 (diff)
bcachefs: Add a pre-reserve mechanism for the journal
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
Diffstat (limited to 'fs')
-rw-r--r--fs/bcachefs/journal.c52
-rw-r--r--fs/bcachefs/journal.h89
-rw-r--r--fs/bcachefs/journal_io.c6
-rw-r--r--fs/bcachefs/journal_reclaim.c43
-rw-r--r--fs/bcachefs/journal_types.h27
5 files changed, 208 insertions, 9 deletions
diff --git a/fs/bcachefs/journal.c b/fs/bcachefs/journal.c
index ba6adf11ef42..0aae8fd74c8a 100644
--- a/fs/bcachefs/journal.c
+++ b/fs/bcachefs/journal.c
@@ -343,6 +343,16 @@ retry:
return 0;
}
+ if (!(flags & JOURNAL_RES_GET_RESERVED) &&
+ !test_bit(JOURNAL_MAY_GET_UNRESERVED, &j->flags)) {
+ /*
+ * Don't want to close current journal entry, just need to
+ * invoke reclaim:
+ */
+ ret = -ENOSPC;
+ goto unlock;
+ }
+
/*
* If we couldn't get a reservation because the current buf filled up,
* and we had room for a bigger entry on disk, signal that we want to
@@ -366,7 +376,7 @@ retry:
} else {
ret = journal_entry_open(j);
}
-
+unlock:
if ((ret == -EAGAIN || ret == -ENOSPC) &&
!j->res_get_blocked_start)
j->res_get_blocked_start = local_clock() ?: 1;
@@ -378,6 +388,8 @@ retry:
goto retry;
if (ret == -ENOSPC) {
+ BUG_ON(!can_discard && (flags & JOURNAL_RES_GET_RESERVED));
+
/*
* Journal is full - can't rely on reclaim from work item due to
* freezing:
@@ -423,6 +435,32 @@ int bch2_journal_res_get_slowpath(struct journal *j, struct journal_res *res,
return ret;
}
+/* journal_preres: */
+
+static bool journal_preres_available(struct journal *j,
+ struct journal_preres *res,
+ unsigned new_u64s)
+{
+ bool ret = bch2_journal_preres_get_fast(j, res, new_u64s);
+
+ if (!ret)
+ bch2_journal_reclaim_work(&j->reclaim_work.work);
+
+ return ret;
+}
+
+int __bch2_journal_preres_get(struct journal *j,
+ struct journal_preres *res,
+ unsigned new_u64s)
+{
+ int ret;
+
+ closure_wait_event(&j->preres_wait,
+ (ret = bch2_journal_error(j)) ||
+ journal_preres_available(j, res, new_u64s));
+ return ret;
+}
+
/* journal_entry_res: */
void bch2_journal_entry_res_resize(struct journal *j,
@@ -1110,11 +1148,16 @@ ssize_t bch2_journal_print_debug(struct journal *j, char *buf)
"seq:\t\t\t%llu\n"
"last_seq:\t\t%llu\n"
"last_seq_ondisk:\t%llu\n"
+ "prereserved:\t\t%u/%u\n"
+ "current entry sectors:\t%u\n"
"current entry:\t\t",
fifo_used(&j->pin),
journal_cur_seq(j),
journal_last_seq(j),
- j->last_seq_ondisk);
+ j->last_seq_ondisk,
+ j->prereserved.reserved,
+ j->prereserved.remaining,
+ j->cur_entry_sectors);
switch (s.cur_entry_offset) {
case JOURNAL_ENTRY_ERROR_VAL:
@@ -1136,8 +1179,9 @@ ssize_t bch2_journal_print_debug(struct journal *j, char *buf)
journal_state_count(s, s.idx));
if (s.prev_buf_unwritten)
- pr_buf(&out, "yes, ref %u\n",
- journal_state_count(s, !s.idx));
+ pr_buf(&out, "yes, ref %u sectors %u\n",
+ journal_state_count(s, !s.idx),
+ journal_prev_buf(j)->sectors);
else
pr_buf(&out, "no\n");
diff --git a/fs/bcachefs/journal.h b/fs/bcachefs/journal.h
index 77d59fb0b151..809cf25f5a03 100644
--- a/fs/bcachefs/journal.h
+++ b/fs/bcachefs/journal.h
@@ -119,6 +119,7 @@ static inline void journal_wake(struct journal *j)
{
wake_up(&j->wait);
closure_wake_up(&j->async_wait);
+ closure_wake_up(&j->preres_wait);
}
static inline struct journal_buf *journal_cur_buf(struct journal *j)
@@ -274,6 +275,7 @@ int bch2_journal_res_get_slowpath(struct journal *, struct journal_res *,
#define JOURNAL_RES_GET_NONBLOCK (1 << 0)
#define JOURNAL_RES_GET_CHECK (1 << 1)
+#define JOURNAL_RES_GET_RESERVED (1 << 2)
static inline int journal_res_get_fast(struct journal *j,
struct journal_res *res,
@@ -294,6 +296,10 @@ static inline int journal_res_get_fast(struct journal *j,
EBUG_ON(!journal_state_count(new, new.idx));
+ if (!(flags & JOURNAL_RES_GET_RESERVED) &&
+ !test_bit(JOURNAL_MAY_GET_UNRESERVED, &j->flags))
+ return 0;
+
if (flags & JOURNAL_RES_GET_CHECK)
return 1;
@@ -333,6 +339,89 @@ out:
return 0;
}
+/* journal_preres: */
+
+static inline bool journal_check_may_get_unreserved(struct journal *j)
+{
+ union journal_preres_state s = READ_ONCE(j->prereserved);
+ bool ret = s.reserved <= s.remaining &&
+ fifo_free(&j->pin) > 8;
+
+ lockdep_assert_held(&j->lock);
+
+ if (ret != test_bit(JOURNAL_MAY_GET_UNRESERVED, &j->flags)) {
+ if (ret) {
+ set_bit(JOURNAL_MAY_GET_UNRESERVED, &j->flags);
+ journal_wake(j);
+ } else {
+ clear_bit(JOURNAL_MAY_GET_UNRESERVED, &j->flags);
+ }
+ }
+ return ret;
+}
+
+static inline void bch2_journal_preres_put(struct journal *j,
+ struct journal_preres *res)
+{
+ union journal_preres_state s = { .reserved = res->u64s };
+
+ if (!res->u64s)
+ return;
+
+ s.v = atomic64_sub_return(s.v, &j->prereserved.counter);
+ res->u64s = 0;
+ closure_wake_up(&j->preres_wait);
+
+ if (s.reserved <= s.remaining &&
+ !test_bit(JOURNAL_MAY_GET_UNRESERVED, &j->flags)) {
+ spin_lock(&j->lock);
+ journal_check_may_get_unreserved(j);
+ spin_unlock(&j->lock);
+ }
+}
+
+int __bch2_journal_preres_get(struct journal *,
+ struct journal_preres *, unsigned);
+
+static inline int bch2_journal_preres_get_fast(struct journal *j,
+ struct journal_preres *res,
+ unsigned new_u64s)
+{
+ int d = new_u64s - res->u64s;
+ union journal_preres_state old, new;
+ u64 v = atomic64_read(&j->prereserved.counter);
+
+ do {
+ old.v = new.v = v;
+
+ new.reserved += d;
+
+ if (new.reserved > new.remaining)
+ return 0;
+ } while ((v = atomic64_cmpxchg(&j->prereserved.counter,
+ old.v, new.v)) != old.v);
+
+ res->u64s += d;
+ return 1;
+}
+
+static inline int bch2_journal_preres_get(struct journal *j,
+ struct journal_preres *res,
+ unsigned new_u64s,
+ unsigned flags)
+{
+ if (new_u64s <= res->u64s)
+ return 0;
+
+ if (bch2_journal_preres_get_fast(j, res, new_u64s))
+ return 0;
+
+ if (flags & JOURNAL_RES_GET_NONBLOCK)
+ return -EAGAIN;
+
+ return __bch2_journal_preres_get(j, res, new_u64s);
+}
+
/* journal_entry_res: */
void bch2_journal_entry_res_resize(struct journal *,
diff --git a/fs/bcachefs/journal_io.c b/fs/bcachefs/journal_io.c
index 07cfbb975c37..db95257cec11 100644
--- a/fs/bcachefs/journal_io.c
+++ b/fs/bcachefs/journal_io.c
@@ -974,6 +974,12 @@ static int journal_write_alloc(struct journal *j, struct journal_buf *w,
journal_space_discarded)) {
ja->cur_idx = (ja->cur_idx + 1) % ja->nr;
ja->sectors_free = ca->mi.bucket_size;
+
+ /*
+ * ja->bucket_seq[ja->cur_idx] must always have
+ * something sensible:
+ */
+ ja->bucket_seq[ja->cur_idx] = le64_to_cpu(w->data->seq);
}
}
diff --git a/fs/bcachefs/journal_reclaim.c b/fs/bcachefs/journal_reclaim.c
index a3c53b78ad10..053fa4aa4f5f 100644
--- a/fs/bcachefs/journal_reclaim.c
+++ b/fs/bcachefs/journal_reclaim.c
@@ -49,6 +49,18 @@ unsigned bch2_journal_dev_buckets_available(struct journal *j,
return available;
}
+static void journal_set_remaining(struct journal *j, unsigned u64s_remaining)
+{
+ union journal_preres_state old, new;
+ u64 v = atomic64_read(&j->prereserved.counter);
+
+ do {
+ old.v = new.v = v;
+ new.remaining = u64s_remaining;
+ } while ((v = atomic64_cmpxchg(&j->prereserved.counter,
+ old.v, new.v)) != old.v);
+}
+
static struct journal_space {
unsigned next_entry;
unsigned remaining;
@@ -124,8 +136,9 @@ void bch2_journal_space_available(struct journal *j)
struct bch_fs *c = container_of(j, struct bch_fs, journal);
struct bch_dev *ca;
struct journal_space discarded, clean_ondisk, clean;
- unsigned max_entry_size = min(j->buf[0].buf_size >> 9,
- j->buf[1].buf_size >> 9);
+ unsigned overhead, u64s_remaining = 0;
+ unsigned max_entry_size = min(j->buf[0].buf_size >> 9,
+ j->buf[1].buf_size >> 9);
unsigned i, nr_online = 0, nr_devs_want;
bool can_discard = false;
int ret = 0;
@@ -176,9 +189,17 @@ void bch2_journal_space_available(struct journal *j)
if (!discarded.next_entry)
ret = -ENOSPC;
+
+ overhead = DIV_ROUND_UP(clean.remaining, max_entry_size) *
+ journal_entry_overhead(j);
+ u64s_remaining = clean.remaining << 6;
+ u64s_remaining = max_t(int, 0, u64s_remaining - overhead);
+ u64s_remaining /= 4;
out:
j->cur_entry_sectors = !ret ? discarded.next_entry : 0;
j->cur_entry_error = ret;
+ journal_set_remaining(j, u64s_remaining);
+ journal_check_may_get_unreserved(j);
if (!ret)
journal_wake(j);
@@ -454,7 +475,7 @@ void bch2_journal_reclaim(struct journal *j)
{
struct bch_fs *c = container_of(j, struct bch_fs, journal);
struct bch_dev *ca;
- unsigned iter, bucket_to_flush, min_nr = 0;
+ unsigned iter, min_nr = 0;
u64 seq_to_flush = 0;
lockdep_assert_held(&j->reclaim_lock);
@@ -465,13 +486,22 @@ void bch2_journal_reclaim(struct journal *j)
for_each_rw_member(ca, c, iter) {
struct journal_device *ja = &ca->journal;
+ unsigned nr_buckets, bucket_to_flush;
if (!ja->nr)
continue;
-
/* Try to keep the journal at most half full: */
- bucket_to_flush = (ja->cur_idx + (ja->nr >> 1)) % ja->nr;
+ nr_buckets = ja->nr / 2;
+
+ /* And include pre-reservations: */
+ nr_buckets += DIV_ROUND_UP(j->prereserved.reserved,
+ (ca->mi.bucket_size << 6) -
+ journal_entry_overhead(j));
+
+ nr_buckets = min(nr_buckets, ja->nr);
+
+ bucket_to_flush = (ja->cur_idx + nr_buckets) % ja->nr;
seq_to_flush = max_t(u64, seq_to_flush,
ja->bucket_seq[bucket_to_flush]);
}
@@ -490,6 +520,9 @@ void bch2_journal_reclaim(struct journal *j)
msecs_to_jiffies(j->reclaim_delay_ms)))
min_nr = 1;
+ if (j->prereserved.reserved * 2 > j->prereserved.remaining)
+ min_nr = 1;
+
journal_flush_pins(j, seq_to_flush, min_nr);
if (!test_bit(BCH_FS_RO, &c->flags))
diff --git a/fs/bcachefs/journal_types.h b/fs/bcachefs/journal_types.h
index c91a21e07809..85bf5e2706f7 100644
--- a/fs/bcachefs/journal_types.h
+++ b/fs/bcachefs/journal_types.h
@@ -80,6 +80,14 @@ struct journal_res {
u64 seq;
};
+/*
+ * For reserving space in the journal prior to getting a reservation on a
+ * particular journal entry:
+ */
+struct journal_preres {
+ unsigned u64s;
+};
+
union journal_res_state {
struct {
atomic64_t counter;
@@ -98,6 +106,21 @@ union journal_res_state {
};
};
+union journal_preres_state {
+ struct {
+ atomic64_t counter;
+ };
+
+ struct {
+ u64 v;
+ };
+
+ struct {
+ u32 reserved;
+ u32 remaining;
+ };
+};
+
/* bytes: */
#define JOURNAL_ENTRY_SIZE_MIN (64U << 10) /* 64k */
#define JOURNAL_ENTRY_SIZE_MAX (4U << 20) /* 4M */
@@ -122,6 +145,7 @@ enum {
JOURNAL_STARTED,
JOURNAL_NEED_WRITE,
JOURNAL_NOT_EMPTY,
+ JOURNAL_MAY_GET_UNRESERVED,
};
/* Embedded in struct bch_fs */
@@ -142,6 +166,8 @@ struct journal {
*/
int cur_entry_error;
+ union journal_preres_state prereserved;
+
/* Reserved space in journal entry to be used just prior to write */
unsigned entry_u64s_reserved;
@@ -161,6 +187,7 @@ struct journal {
/* Used when waiting because the journal was full */
wait_queue_head_t wait;
struct closure_waitlist async_wait;
+ struct closure_waitlist preres_wait;
struct closure io;
struct delayed_work write_work;