diff options
author | Kent Overstreet <kent.overstreet@gmail.com> | 2021-04-14 12:17:41 -0400 |
---|---|---|
committer | Kent Overstreet <kent.overstreet@linux.dev> | 2023-10-22 17:09:00 -0400 |
commit | d44a6e350ed28c00e00f5d8d5882682275dc0945 (patch) | |
tree | 9c2bf20ee0d1464b600f08ef3b8fb4026564438c | |
parent | 4aac975b6c9100cb08da4645291a262d970c1922 (diff) |
bcachefs: Drop old style btree node coalescing
We have foreground btree node merging now, and any future btree node
merging improvements are going to be based off of that code.
Signed-off-by: Kent Overstreet <kent.overstreet@gmail.com>
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
-rw-r--r-- | fs/bcachefs/btree_gc.c | 340 | ||||
-rw-r--r-- | fs/bcachefs/btree_gc.h | 2 | ||||
-rw-r--r-- | fs/bcachefs/sysfs.c | 5 | ||||
-rw-r--r-- | fs/bcachefs/trace.h | 37 |
4 files changed, 0 insertions, 384 deletions
diff --git a/fs/bcachefs/btree_gc.c b/fs/bcachefs/btree_gc.c index c14794cf1be8..b61d27de5cd7 100644 --- a/fs/bcachefs/btree_gc.c +++ b/fs/bcachefs/btree_gc.c @@ -1273,346 +1273,6 @@ err: return ret; } -/* Btree coalescing */ - -static void recalc_packed_keys(struct btree *b) -{ - struct bset *i = btree_bset_first(b); - struct bkey_packed *k; - - memset(&b->nr, 0, sizeof(b->nr)); - - BUG_ON(b->nsets != 1); - - vstruct_for_each(i, k) - btree_keys_account_key_add(&b->nr, 0, k); -} - -static void bch2_coalesce_nodes(struct bch_fs *c, struct btree_iter *iter, - struct btree *old_nodes[GC_MERGE_NODES]) -{ - struct btree *parent = btree_node_parent(iter, old_nodes[0]); - unsigned i, nr_old_nodes, nr_new_nodes, u64s = 0; - unsigned blocks = btree_blocks(c) * 2 / 3; - struct btree *new_nodes[GC_MERGE_NODES]; - struct btree_update *as; - struct keylist keylist; - struct bkey_format_state format_state; - struct bkey_format new_format; - - memset(new_nodes, 0, sizeof(new_nodes)); - bch2_keylist_init(&keylist, NULL); - - /* Count keys that are not deleted */ - for (i = 0; i < GC_MERGE_NODES && old_nodes[i]; i++) - u64s += old_nodes[i]->nr.live_u64s; - - nr_old_nodes = nr_new_nodes = i; - - /* Check if all keys in @old_nodes could fit in one fewer node */ - if (nr_old_nodes <= 1 || - __vstruct_blocks(struct btree_node, c->block_bits, - DIV_ROUND_UP(u64s, nr_old_nodes - 1)) > blocks) - return; - - /* Find a format that all keys in @old_nodes can pack into */ - bch2_bkey_format_init(&format_state); - - /* - * XXX: this won't correctly take it account the new min/max keys: - */ - for (i = 0; i < nr_old_nodes; i++) - __bch2_btree_calc_format(&format_state, old_nodes[i]); - - new_format = bch2_bkey_format_done(&format_state); - - /* Check if repacking would make any nodes too big to fit */ - for (i = 0; i < nr_old_nodes; i++) - if (!bch2_btree_node_format_fits(c, old_nodes[i], &new_format)) { - trace_btree_gc_coalesce_fail(c, - BTREE_GC_COALESCE_FAIL_FORMAT_FITS); - return; - } - - if (bch2_keylist_realloc(&keylist, NULL, 0, - BKEY_BTREE_PTR_U64s_MAX * nr_old_nodes)) { - trace_btree_gc_coalesce_fail(c, - BTREE_GC_COALESCE_FAIL_KEYLIST_REALLOC); - return; - } - - as = bch2_btree_update_start(iter, old_nodes[0]->c.level, - btree_update_reserve_required(c, parent) + nr_old_nodes, - BTREE_INSERT_NOFAIL| - BTREE_INSERT_USE_RESERVE); - if (IS_ERR(as)) { - trace_btree_gc_coalesce_fail(c, - BTREE_GC_COALESCE_FAIL_RESERVE_GET); - bch2_keylist_free(&keylist, NULL); - return; - } - - trace_btree_gc_coalesce(c, old_nodes[0]); - - for (i = 0; i < nr_old_nodes; i++) - bch2_btree_interior_update_will_free_node(as, old_nodes[i]); - - /* Repack everything with @new_format and sort down to one bset */ - for (i = 0; i < nr_old_nodes; i++) - new_nodes[i] = - __bch2_btree_node_alloc_replacement(as, old_nodes[i], - new_format); - - /* - * Conceptually we concatenate the nodes together and slice them - * up at different boundaries. - */ - for (i = nr_new_nodes - 1; i > 0; --i) { - struct btree *n1 = new_nodes[i]; - struct btree *n2 = new_nodes[i - 1]; - - struct bset *s1 = btree_bset_first(n1); - struct bset *s2 = btree_bset_first(n2); - struct bkey_packed *k, *last = NULL; - - /* Calculate how many keys from @n2 we could fit inside @n1 */ - u64s = 0; - - for (k = s2->start; - k < vstruct_last(s2) && - vstruct_blocks_plus(n1->data, c->block_bits, - u64s + k->u64s) <= blocks; - k = bkey_next(k)) { - last = k; - u64s += k->u64s; - } - - if (u64s == le16_to_cpu(s2->u64s)) { - /* n2 fits entirely in n1 */ - n1->key.k.p = n1->data->max_key = n2->data->max_key; - - memcpy_u64s(vstruct_last(s1), - s2->start, - le16_to_cpu(s2->u64s)); - le16_add_cpu(&s1->u64s, le16_to_cpu(s2->u64s)); - - set_btree_bset_end(n1, n1->set); - - six_unlock_write(&n2->c.lock); - bch2_btree_node_free_never_inserted(c, n2); - six_unlock_intent(&n2->c.lock); - - memmove(new_nodes + i - 1, - new_nodes + i, - sizeof(new_nodes[0]) * (nr_new_nodes - i)); - new_nodes[--nr_new_nodes] = NULL; - } else if (u64s) { - /* move part of n2 into n1 */ - n1->key.k.p = n1->data->max_key = - bkey_unpack_pos(n1, last); - - n2->data->min_key = bpos_successor(n1->data->max_key); - - memcpy_u64s(vstruct_last(s1), - s2->start, u64s); - le16_add_cpu(&s1->u64s, u64s); - - memmove(s2->start, - vstruct_idx(s2, u64s), - (le16_to_cpu(s2->u64s) - u64s) * sizeof(u64)); - s2->u64s = cpu_to_le16(le16_to_cpu(s2->u64s) - u64s); - - set_btree_bset_end(n1, n1->set); - set_btree_bset_end(n2, n2->set); - } - } - - for (i = 0; i < nr_new_nodes; i++) { - struct btree *n = new_nodes[i]; - - recalc_packed_keys(n); - btree_node_reset_sib_u64s(n); - - bch2_btree_build_aux_trees(n); - - bch2_btree_update_add_new_node(as, n); - six_unlock_write(&n->c.lock); - - bch2_btree_node_write(c, n, SIX_LOCK_intent); - } - - /* - * The keys for the old nodes get deleted. We don't want to insert keys - * that compare equal to the keys for the new nodes we'll also be - * inserting - we can't because keys on a keylist must be strictly - * greater than the previous keys, and we also don't need to since the - * key for the new node will serve the same purpose (overwriting the key - * for the old node). - */ - for (i = 0; i < nr_old_nodes; i++) { - struct bkey_i delete; - unsigned j; - - for (j = 0; j < nr_new_nodes; j++) - if (!bpos_cmp(old_nodes[i]->key.k.p, - new_nodes[j]->key.k.p)) - goto next; - - bkey_init(&delete.k); - delete.k.p = old_nodes[i]->key.k.p; - bch2_keylist_add_in_order(&keylist, &delete); -next: - i = i; - } - - /* - * Keys for the new nodes get inserted: bch2_btree_insert_keys() only - * does the lookup once and thus expects the keys to be in sorted order - * so we have to make sure the new keys are correctly ordered with - * respect to the deleted keys added in the previous loop - */ - for (i = 0; i < nr_new_nodes; i++) - bch2_keylist_add_in_order(&keylist, &new_nodes[i]->key); - - /* Insert the newly coalesced nodes */ - bch2_btree_insert_node(as, parent, iter, &keylist, 0); - - BUG_ON(!bch2_keylist_empty(&keylist)); - - BUG_ON(iter->l[old_nodes[0]->c.level].b != old_nodes[0]); - - bch2_btree_iter_node_replace(iter, new_nodes[0]); - - for (i = 0; i < nr_new_nodes; i++) - bch2_btree_update_get_open_buckets(as, new_nodes[i]); - - /* Free the old nodes and update our sliding window */ - for (i = 0; i < nr_old_nodes; i++) { - bch2_btree_node_free_inmem(c, old_nodes[i], iter); - - /* - * the index update might have triggered a split, in which case - * the nodes we coalesced - the new nodes we just created - - * might not be sibling nodes anymore - don't add them to the - * sliding window (except the first): - */ - if (!i) { - old_nodes[i] = new_nodes[i]; - } else { - old_nodes[i] = NULL; - } - } - - for (i = 0; i < nr_new_nodes; i++) - six_unlock_intent(&new_nodes[i]->c.lock); - - bch2_btree_update_done(as); - bch2_keylist_free(&keylist, NULL); -} - -static int bch2_coalesce_btree(struct bch_fs *c, enum btree_id btree_id) -{ - struct btree_trans trans; - struct btree_iter *iter; - struct btree *b; - bool kthread = (current->flags & PF_KTHREAD) != 0; - unsigned i; - int ret = 0; - - /* Sliding window of adjacent btree nodes */ - struct btree *merge[GC_MERGE_NODES]; - u32 lock_seq[GC_MERGE_NODES]; - - bch2_trans_init(&trans, c, 0, 0); - - /* - * XXX: We don't have a good way of positively matching on sibling nodes - * that have the same parent - this code works by handling the cases - * where they might not have the same parent, and is thus fragile. Ugh. - * - * Perhaps redo this to use multiple linked iterators? - */ - memset(merge, 0, sizeof(merge)); - - __for_each_btree_node(&trans, iter, btree_id, POS_MIN, - BTREE_MAX_DEPTH, 0, - BTREE_ITER_PREFETCH, b) { - memmove(merge + 1, merge, - sizeof(merge) - sizeof(merge[0])); - memmove(lock_seq + 1, lock_seq, - sizeof(lock_seq) - sizeof(lock_seq[0])); - - merge[0] = b; - - for (i = 1; i < GC_MERGE_NODES; i++) { - if (!merge[i] || - !six_relock_intent(&merge[i]->c.lock, lock_seq[i])) - break; - - if (merge[i]->c.level != merge[0]->c.level) { - six_unlock_intent(&merge[i]->c.lock); - break; - } - } - memset(merge + i, 0, (GC_MERGE_NODES - i) * sizeof(merge[0])); - - bch2_coalesce_nodes(c, iter, merge); - - for (i = 1; i < GC_MERGE_NODES && merge[i]; i++) { - lock_seq[i] = merge[i]->c.lock.state.seq; - six_unlock_intent(&merge[i]->c.lock); - } - - lock_seq[0] = merge[0]->c.lock.state.seq; - - if (kthread && kthread_should_stop()) { - ret = -ESHUTDOWN; - break; - } - - bch2_trans_cond_resched(&trans); - - /* - * If the parent node wasn't relocked, it might have been split - * and the nodes in our sliding window might not have the same - * parent anymore - blow away the sliding window: - */ - if (btree_iter_node(iter, iter->level + 1) && - !btree_node_intent_locked(iter, iter->level + 1)) - memset(merge + 1, 0, - (GC_MERGE_NODES - 1) * sizeof(merge[0])); - } - bch2_trans_iter_put(&trans, iter); - - return bch2_trans_exit(&trans) ?: ret; -} - -/** - * bch_coalesce - coalesce adjacent nodes with low occupancy - */ -void bch2_coalesce(struct bch_fs *c) -{ - enum btree_id id; - - down_read(&c->gc_lock); - trace_gc_coalesce_start(c); - - for (id = 0; id < BTREE_ID_NR; id++) { - int ret = c->btree_roots[id].b - ? bch2_coalesce_btree(c, id) - : 0; - - if (ret) { - if (ret != -ESHUTDOWN) - bch_err(c, "btree coalescing failed: %d", ret); - return; - } - } - - trace_gc_coalesce_end(c); - up_read(&c->gc_lock); -} - static int bch2_gc_thread(void *arg) { struct bch_fs *c = arg; diff --git a/fs/bcachefs/btree_gc.h b/fs/bcachefs/btree_gc.h index 44b7d121610f..868723a30b15 100644 --- a/fs/bcachefs/btree_gc.h +++ b/fs/bcachefs/btree_gc.h @@ -4,8 +4,6 @@ #include "btree_types.h" -void bch2_coalesce(struct bch_fs *); - int bch2_gc(struct bch_fs *, bool, bool); int bch2_gc_gens(struct bch_fs *); void bch2_gc_thread_stop(struct bch_fs *); diff --git a/fs/bcachefs/sysfs.c b/fs/bcachefs/sysfs.c index 077f3a8cead7..21ef7719cf55 100644 --- a/fs/bcachefs/sysfs.c +++ b/fs/bcachefs/sysfs.c @@ -132,7 +132,6 @@ do { \ } while (0) write_attribute(trigger_journal_flush); -write_attribute(trigger_btree_coalesce); write_attribute(trigger_gc); write_attribute(prune_cache); rw_attribute(btree_gc_periodic); @@ -478,9 +477,6 @@ STORE(bch2_fs) if (attr == &sysfs_trigger_journal_flush) bch2_journal_meta(&c->journal); - if (attr == &sysfs_trigger_btree_coalesce) - bch2_coalesce(c); - if (attr == &sysfs_trigger_gc) { /* * Full gc is currently incompatible with btree key cache: @@ -577,7 +573,6 @@ struct attribute *bch2_fs_internal_files[] = { &sysfs_extent_migrate_raced, &sysfs_trigger_journal_flush, - &sysfs_trigger_btree_coalesce, &sysfs_trigger_gc, &sysfs_gc_gens_pos, &sysfs_prune_cache, diff --git a/fs/bcachefs/trace.h b/fs/bcachefs/trace.h index 387c1c49f696..493f9223c5bd 100644 --- a/fs/bcachefs/trace.h +++ b/fs/bcachefs/trace.h @@ -353,28 +353,6 @@ DEFINE_EVENT(btree_node, btree_set_root, /* Garbage collection */ -DEFINE_EVENT(btree_node, btree_gc_coalesce, - TP_PROTO(struct bch_fs *c, struct btree *b), - TP_ARGS(c, b) -); - -TRACE_EVENT(btree_gc_coalesce_fail, - TP_PROTO(struct bch_fs *c, int reason), - TP_ARGS(c, reason), - - TP_STRUCT__entry( - __field(u8, reason ) - __array(char, uuid, 16 ) - ), - - TP_fast_assign( - __entry->reason = reason; - memcpy(__entry->uuid, c->disk_sb.sb->user_uuid.b, 16); - ), - - TP_printk("%pU: %u", __entry->uuid, __entry->reason) -); - DEFINE_EVENT(btree_node, btree_gc_rewrite_node, TP_PROTO(struct bch_fs *c, struct btree *b), TP_ARGS(c, b) @@ -395,16 +373,6 @@ DEFINE_EVENT(bch_fs, gc_end, TP_ARGS(c) ); -DEFINE_EVENT(bch_fs, gc_coalesce_start, - TP_PROTO(struct bch_fs *c), - TP_ARGS(c) -); - -DEFINE_EVENT(bch_fs, gc_coalesce_end, - TP_PROTO(struct bch_fs *c), - TP_ARGS(c) -); - DEFINE_EVENT(bch_fs, gc_cannot_inc_gens, TP_PROTO(struct bch_fs *c), TP_ARGS(c) @@ -453,11 +421,6 @@ TRACE_EVENT(invalidate, MINOR(__entry->dev), __entry->offset) ); -DEFINE_EVENT(bch_fs, rescale_prios, - TP_PROTO(struct bch_fs *c), - TP_ARGS(c) -); - DECLARE_EVENT_CLASS(bucket_alloc, TP_PROTO(struct bch_dev *ca, enum alloc_reserve reserve), TP_ARGS(ca, reserve), |