summaryrefslogtreecommitdiff
path: root/fs/btrfs/sysfs.c
diff options
context:
space:
mode:
authorBoris Burkov <boris@bur.io>2024-02-02 11:52:16 -0800
committerDavid Sterba <dsterba@suse.com>2024-07-11 15:33:27 +0200
commitf5ff64ccf7bb7274ed66b0d835b2f6ae10af5d7a (patch)
tree0baaff38ff8a3587ef8dbdec13796a5986723559 /fs/btrfs/sysfs.c
parent42f620aec182f62ee72e3fce41cb3353951b3508 (diff)
btrfs: dynamic block_group reclaim threshold
We can currently recover allocated block_groups by: - explicitly starting balance operations - "auto reclaim" via bg_reclaim_threshold The latter works by checking against a fixed threshold on frees. If we pass from above the threshold to below, relocation triggers and the block group will get reclaimed by the cleaner thread (assuming it is still eligible) Picking a threshold is challenging. Too high, and you end up trying to reclaim very full block_groups which is quite costly, and you don't do reclaim on block_groups that don't get quite THAT full, but could still be quite fragmented and stranding a lot of space. Too low, and you similarly miss out on reclaim even if you badly need it to avoid running out of unallocated space, if you have heavily fragmented block groups living above the threshold. No matter the threshold, it suffers from a workload that happens to bounce around that threshold, which can introduce arbitrary amounts of reclaim waste. To improve this situation, introduce a dynamic threshold. The basic idea behind this threshold is that it should be very lax when there is plenty of unallocated space, and increasingly aggressive as we approach zero unallocated space. To that end, it sets a target for unallocated space (10 chunks) and then linearly increases the threshold as the amount of space short of the target we are increases. The formula is: (target - unalloc) / target I tested this by running it on three interesting workloads: 1. bounce allocations around X% full. 2. fill up all the way and introduce full fragmentation. 3. write in a fragmented way until the filesystem is just about full. 1. and 2. attack the weaknesses of a fixed threshold; fixed either works perfectly or fully falls apart, depending on the threshold. Dynamic always handles these cases well. 3. attacks dynamic by checking whether it is too zealous to reclaim in conditions with low unallocated and low unused. It tends to claw back 1GiB of unallocated fairly aggressively, but not much more. Early versions of dynamic threshold struggled on this test. Additional work could be done to intelligently ratchet up the urgency of reclaim in very low unallocated conditions. Existing mechanisms are already useless in that case anyway. Reviewed-by: Josef Bacik <josef@toxicpanda.com> Signed-off-by: Boris Burkov <boris@bur.io> Signed-off-by: David Sterba <dsterba@suse.com>
Diffstat (limited to 'fs/btrfs/sysfs.c')
-rw-r--r--fs/btrfs/sysfs.c43
1 files changed, 42 insertions, 1 deletions
diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c
index 919c7ba45121..360d6093476f 100644
--- a/fs/btrfs/sysfs.c
+++ b/fs/btrfs/sysfs.c
@@ -905,8 +905,12 @@ static ssize_t btrfs_sinfo_bg_reclaim_threshold_show(struct kobject *kobj,
char *buf)
{
struct btrfs_space_info *space_info = to_space_info(kobj);
+ ssize_t ret;
- return sysfs_emit(buf, "%d\n", READ_ONCE(space_info->bg_reclaim_threshold));
+ spin_lock(&space_info->lock);
+ ret = sysfs_emit(buf, "%d\n", btrfs_calc_reclaim_threshold(space_info));
+ spin_unlock(&space_info->lock);
+ return ret;
}
static ssize_t btrfs_sinfo_bg_reclaim_threshold_store(struct kobject *kobj,
@@ -917,6 +921,9 @@ static ssize_t btrfs_sinfo_bg_reclaim_threshold_store(struct kobject *kobj,
int thresh;
int ret;
+ if (READ_ONCE(space_info->dynamic_reclaim))
+ return -EINVAL;
+
ret = kstrtoint(buf, 10, &thresh);
if (ret)
return ret;
@@ -933,6 +940,39 @@ BTRFS_ATTR_RW(space_info, bg_reclaim_threshold,
btrfs_sinfo_bg_reclaim_threshold_show,
btrfs_sinfo_bg_reclaim_threshold_store);
+static ssize_t btrfs_sinfo_dynamic_reclaim_show(struct kobject *kobj,
+ struct kobj_attribute *a,
+ char *buf)
+{
+ struct btrfs_space_info *space_info = to_space_info(kobj);
+
+ return sysfs_emit(buf, "%d\n", READ_ONCE(space_info->dynamic_reclaim));
+}
+
+static ssize_t btrfs_sinfo_dynamic_reclaim_store(struct kobject *kobj,
+ struct kobj_attribute *a,
+ const char *buf, size_t len)
+{
+ struct btrfs_space_info *space_info = to_space_info(kobj);
+ int dynamic_reclaim;
+ int ret;
+
+ ret = kstrtoint(buf, 10, &dynamic_reclaim);
+ if (ret)
+ return ret;
+
+ if (dynamic_reclaim < 0)
+ return -EINVAL;
+
+ WRITE_ONCE(space_info->dynamic_reclaim, dynamic_reclaim != 0);
+
+ return len;
+}
+
+BTRFS_ATTR_RW(space_info, dynamic_reclaim,
+ btrfs_sinfo_dynamic_reclaim_show,
+ btrfs_sinfo_dynamic_reclaim_store);
+
/*
* Allocation information about block group types.
*
@@ -950,6 +990,7 @@ static struct attribute *space_info_attrs[] = {
BTRFS_ATTR_PTR(space_info, disk_used),
BTRFS_ATTR_PTR(space_info, disk_total),
BTRFS_ATTR_PTR(space_info, bg_reclaim_threshold),
+ BTRFS_ATTR_PTR(space_info, dynamic_reclaim),
BTRFS_ATTR_PTR(space_info, chunk_size),
BTRFS_ATTR_PTR(space_info, size_classes),
BTRFS_ATTR_PTR(space_info, reclaim_count),