diff options
author | Kent Overstreet <kent.overstreet@gmail.com> | 2022-02-10 04:32:19 -0500 |
---|---|---|
committer | Kent Overstreet <kent.overstreet@linux.dev> | 2023-10-22 17:09:29 -0400 |
commit | 59cc38b8d43b529d91c249c2eef35c8c3fc9fbd8 (patch) | |
tree | c67cba1b5df87df0606070046a2a0420e0f60d29 /fs | |
parent | f25d8215f499418c17dfde0b3158a66e03c758dc (diff) |
bcachefs: New discard implementation
In the old allocator code, buckets would be discarded just prior to
being used - this made sense in bcache where we were discarding buckets
just after invalidating the cached data they contain, but in a
filesystem where we typically have more free space we want to be
discarding buckets when they become empty.
This patch implements the new behaviour - it checks the need_discard
btree for buckets awaiting discards, and then clears the appropriate
bit in the alloc btree, which moves the buckets to the freespace btree.
Additionally, discards are now enabled by default.
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
Diffstat (limited to 'fs')
-rw-r--r-- | fs/bcachefs/alloc_background.c | 140 | ||||
-rw-r--r-- | fs/bcachefs/alloc_background.h | 2 | ||||
-rw-r--r-- | fs/bcachefs/bcachefs.h | 1 | ||||
-rw-r--r-- | fs/bcachefs/buckets.c | 5 | ||||
-rw-r--r-- | fs/bcachefs/journal_io.c | 2 | ||||
-rw-r--r-- | fs/bcachefs/opts.h | 2 | ||||
-rw-r--r-- | fs/bcachefs/super.c | 2 | ||||
-rw-r--r-- | fs/bcachefs/trace.h | 34 |
8 files changed, 187 insertions, 1 deletions
diff --git a/fs/bcachefs/alloc_background.c b/fs/bcachefs/alloc_background.c index 3ba2b35fad53..9514c2e5f01e 100644 --- a/fs/bcachefs/alloc_background.c +++ b/fs/bcachefs/alloc_background.c @@ -545,6 +545,7 @@ int bch2_trans_mark_alloc(struct btree_trans *trans, new_a->io_time[READ] = max_t(u64, 1, atomic64_read(&c->io_clock[READ].now)); new_a->io_time[WRITE]= max_t(u64, 1, atomic64_read(&c->io_clock[WRITE].now)); SET_BCH_ALLOC_V4_NEED_INC_GEN(new_a, true); + SET_BCH_ALLOC_V4_NEED_DISCARD(new_a, true); } if (old_a.data_type && !new_a->data_type && @@ -579,6 +580,144 @@ int bch2_trans_mark_alloc(struct btree_trans *trans, return 0; } +static int bch2_clear_need_discard(struct btree_trans *trans, struct bpos pos, + struct bch_dev *ca, bool *discard_done) +{ + struct bch_fs *c = trans->c; + struct btree_iter iter; + struct bkey_s_c k; + struct bkey_i_alloc_v4 *a; + struct printbuf buf = PRINTBUF; + int ret; + + bch2_trans_iter_init(trans, &iter, BTREE_ID_alloc, pos, + BTREE_ITER_CACHED); + k = bch2_btree_iter_peek_slot(&iter); + ret = bkey_err(k); + if (ret) + goto out; + + a = bch2_alloc_to_v4_mut(trans, k); + ret = PTR_ERR_OR_ZERO(a); + if (ret) + goto out; + + if (BCH_ALLOC_V4_NEED_INC_GEN(&a->v)) { + a->v.gen++; + SET_BCH_ALLOC_V4_NEED_INC_GEN(&a->v, false); + goto write; + } + + BUG_ON(a->v.journal_seq > c->journal.flushed_seq_ondisk); + + if (bch2_fs_inconsistent_on(!BCH_ALLOC_V4_NEED_DISCARD(&a->v), c, + "%s\n incorrectly set in need_discard btree", + (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) { + ret = -EIO; + goto out; + } + + if (!*discard_done && ca->mi.discard && !c->opts.nochanges) { + /* + * This works without any other locks because this is the only + * thread that removes items from the need_discard tree + */ + bch2_trans_unlock(trans); + blkdev_issue_discard(ca->disk_sb.bdev, + k.k->p.offset * ca->mi.bucket_size, + ca->mi.bucket_size, + GFP_KERNEL); + *discard_done = true; + + ret = bch2_trans_relock(trans) ? 0 : -EINTR; + if (ret) + goto out; + } + + SET_BCH_ALLOC_V4_NEED_DISCARD(&a->v, false); +write: + ret = bch2_trans_update(trans, &iter, &a->k_i, 0); +out: + bch2_trans_iter_exit(trans, &iter); + printbuf_exit(&buf); + return ret; +} + +static void bch2_do_discards_work(struct work_struct *work) +{ + struct bch_fs *c = container_of(work, struct bch_fs, discard_work); + struct bch_dev *ca = NULL; + struct btree_trans trans; + struct btree_iter iter; + struct bkey_s_c k; + u64 seen = 0, open = 0, need_journal_commit = 0, discarded = 0; + int ret; + + bch2_trans_init(&trans, c, 0, 0); + + for_each_btree_key(&trans, iter, BTREE_ID_need_discard, + POS_MIN, 0, k, ret) { + bool discard_done = false; + + if (ca && k.k->p.inode != ca->dev_idx) { + percpu_ref_put(&ca->io_ref); + ca = NULL; + } + + if (!ca) { + ca = bch_dev_bkey_exists(c, k.k->p.inode); + if (!percpu_ref_tryget(&ca->io_ref)) { + ca = NULL; + bch2_btree_iter_set_pos(&iter, POS(k.k->p.inode + 1, 0)); + continue; + } + } + + seen++; + + if (bch2_bucket_is_open_safe(c, k.k->p.inode, k.k->p.offset)) { + open++; + continue; + } + + if (bch2_bucket_needs_journal_commit(&c->buckets_waiting_for_journal, + c->journal.flushed_seq_ondisk, + k.k->p.inode, k.k->p.offset)) { + need_journal_commit++; + continue; + } + + ret = __bch2_trans_do(&trans, NULL, NULL, + BTREE_INSERT_USE_RESERVE| + BTREE_INSERT_NOFAIL, + bch2_clear_need_discard(&trans, k.k->p, ca, &discard_done)); + if (ret) + break; + + discarded++; + } + bch2_trans_iter_exit(&trans, &iter); + + if (ca) + percpu_ref_put(&ca->io_ref); + + bch2_trans_exit(&trans); + + if (need_journal_commit * 2 > seen) + bch2_journal_flush_async(&c->journal, NULL); + + percpu_ref_put(&c->writes); + + trace_do_discards(c, seen, open, need_journal_commit, discarded, ret); +} + +void bch2_do_discards(struct bch_fs *c) +{ + if (percpu_ref_tryget(&c->writes) && + !queue_work(system_long_wq, &c->discard_work)) + percpu_ref_put(&c->writes); +} + static int bch2_dev_freespace_init(struct bch_fs *c, struct bch_dev *ca) { struct btree_trans trans; @@ -862,4 +1001,5 @@ void bch2_dev_allocator_add(struct bch_fs *c, struct bch_dev *ca) void bch2_fs_allocator_background_init(struct bch_fs *c) { spin_lock_init(&c->freelist_lock); + INIT_WORK(&c->discard_work, bch2_do_discards_work); } diff --git a/fs/bcachefs/alloc_background.h b/fs/bcachefs/alloc_background.h index 74b23f9b1bd3..8ba9bf853c2f 100644 --- a/fs/bcachefs/alloc_background.h +++ b/fs/bcachefs/alloc_background.h @@ -113,6 +113,8 @@ int bch2_alloc_read(struct bch_fs *, bool, bool); int bch2_trans_mark_alloc(struct btree_trans *, struct bkey_s_c, struct bkey_i *, unsigned); +void bch2_do_discards(struct bch_fs *); + int bch2_fs_freespace_init(struct bch_fs *); void bch2_recalc_capacity(struct bch_fs *); diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h index 879b2adc8b42..ca48b3f86304 100644 --- a/fs/bcachefs/bcachefs.h +++ b/fs/bcachefs/bcachefs.h @@ -758,6 +758,7 @@ struct bch_fs { unsigned write_points_nr; struct buckets_waiting_for_journal buckets_waiting_for_journal; + struct work_struct discard_work; /* GARBAGE COLLECTION */ struct task_struct *gc_thread; diff --git a/fs/bcachefs/buckets.c b/fs/bcachefs/buckets.c index 2c6fdf385ba3..0e86b45b6c55 100644 --- a/fs/bcachefs/buckets.c +++ b/fs/bcachefs/buckets.c @@ -543,6 +543,11 @@ int bch2_mark_alloc(struct btree_trans *trans, (!new_a.journal_seq || new_a.journal_seq < c->journal.flushed_seq_ondisk)) closure_wake_up(&c->freelist_wait); + if ((flags & BTREE_TRIGGER_INSERT) && + BCH_ALLOC_V4_NEED_DISCARD(&new_a) && + !new_a.journal_seq) + bch2_do_discards(c); + if (bucket_state(new_a) == BUCKET_need_gc_gens) { atomic_inc(&c->kick_gc); wake_up_process(c->gc_thread); diff --git a/fs/bcachefs/journal_io.c b/fs/bcachefs/journal_io.c index 3e418342ee67..3974d043fd8a 100644 --- a/fs/bcachefs/journal_io.c +++ b/fs/bcachefs/journal_io.c @@ -1,5 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 #include "bcachefs.h" +#include "alloc_background.h" #include "alloc_foreground.h" #include "btree_io.h" #include "btree_update_interior.h" @@ -1399,6 +1400,7 @@ static void journal_write_done(struct closure *cl) j->flushed_seq_ondisk = seq; j->last_seq_ondisk = w->last_seq; + bch2_do_discards(c); closure_wake_up(&c->freelist_wait); bch2_reset_alloc_cursors(c); diff --git a/fs/bcachefs/opts.h b/fs/bcachefs/opts.h index b45740ec3c67..ce79e1a12bd0 100644 --- a/fs/bcachefs/opts.h +++ b/fs/bcachefs/opts.h @@ -266,7 +266,7 @@ enum opt_type { x(discard, u8, \ OPT_FS|OPT_MOUNT|OPT_DEVICE, \ OPT_BOOL(), \ - BCH2_NO_SB_OPT, false, \ + BCH2_NO_SB_OPT, true, \ NULL, "Enable discard/TRIM support") \ x(verbose, u8, \ OPT_FS|OPT_MOUNT, \ diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c index 3a8740fde9de..037923bca742 100644 --- a/fs/bcachefs/super.c +++ b/fs/bcachefs/super.c @@ -401,6 +401,8 @@ static int __bch2_fs_read_write(struct bch_fs *c, bool early) bch2_dev_allocator_add(c, ca); bch2_recalc_capacity(c); + bch2_do_discards(c); + if (!early) { ret = bch2_fs_read_write_late(c); if (ret) diff --git a/fs/bcachefs/trace.h b/fs/bcachefs/trace.h index caf59b977e2f..ef2096fd147d 100644 --- a/fs/bcachefs/trace.h +++ b/fs/bcachefs/trace.h @@ -182,6 +182,40 @@ TRACE_EVENT(journal_reclaim_finish, __entry->nr_flushed) ); +/* allocator: */ + +TRACE_EVENT(do_discards, + TP_PROTO(struct bch_fs *c, u64 seen, u64 open, + u64 need_journal_commit, u64 discarded, int ret), + TP_ARGS(c, seen, open, need_journal_commit, discarded, ret), + + TP_STRUCT__entry( + __field(dev_t, dev ) + __field(u64, seen ) + __field(u64, open ) + __field(u64, need_journal_commit ) + __field(u64, discarded ) + __field(int, ret ) + ), + + TP_fast_assign( + __entry->dev = c->dev; + __entry->seen = seen; + __entry->open = open; + __entry->need_journal_commit = need_journal_commit; + __entry->discarded = discarded; + __entry->ret = ret; + ), + + TP_printk("%d%d seen %llu open %llu need_journal_commit %llu discarded %llu ret %i", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->seen, + __entry->open, + __entry->need_journal_commit, + __entry->discarded, + __entry->ret) +); + /* bset.c: */ DEFINE_EVENT(bpos, bkey_pack_pos_fail, |