summaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
authorQu Wenruo <wqu@suse.com>2024-04-30 07:53:02 +0930
committerDavid Sterba <dsterba@suse.com>2024-07-11 15:33:20 +0200
commit3d2ac9922465be5e8c9662eead70867159bf567d (patch)
treebf2685203c45f193babfdee7773a5354c5db50cd /fs
parent87a6962f73b1e5892d06987904e8f3827bf3ceec (diff)
btrfs: introduce new members for extent_map
Introduce two new members for extent_map: - disk_bytenr - offset Both are matching the members with the same name inside btrfs_file_extent_items. For now this patch only touches those members when: - Reading btrfs_file_extent_items from disk - Inserting new holes - Merging two extent maps With the new disk_bytenr and disk_num_bytes, doing merging would be a little more complex, as we have 3 different cases: * Both extent maps are referring to the same data extents |<----- data extent A ----->| |<- em 1 ->|<- em 2 ->| * Both extent maps are referring to different data extents |<-- data extent A -->|<-- data extent B -->| |<- em 1 ->|<- em 2 ->| * One of the extent maps is referring to a merged and larger data extent that covers both extent maps This is not really valid case other than some selftests. So this test case would be removed. A new helper merge_ondisk_extents() is introduced to handle the above valid cases. To properly assign values for those new members, a new btrfs_file_extent parameter is introduced to all the involved call sites. - For NOCOW writes the btrfs_file_extent would be exposed from can_nocow_file_extent(). - For other writes, the members can be easily calculated As most of them have 0 offset and utilizing the whole on-disk data extent. The exception is encoded write, but thankfully that interface provided offset directly and all other needed info. For now, both the old members (block_start/block_len/orig_start) are co-existing with the new members (disk_bytenr/offset), meanwhile all the critical code is still using the old members only. The cleanup will happen later after all the old and new members are properly validated. There would be some re-ordering for the assignment of the extent_map members, now we follow the new ordering: - start and len Or file_pos and num_bytes for other structures. - disk_bytenr and disk_num_bytes - offset and ram_bytes - compression So expect some seemingly unrelated line movement. Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com> Reviewed-by: Filipe Manana <fdmanana@suse.com> Signed-off-by: Qu Wenruo <wqu@suse.com> Reviewed-by: David Sterba <dsterba@suse.com> Signed-off-by: David Sterba <dsterba@suse.com>
Diffstat (limited to 'fs')
-rw-r--r--fs/btrfs/defrag.c4
-rw-r--r--fs/btrfs/extent_map.c79
-rw-r--r--fs/btrfs/extent_map.h17
-rw-r--r--fs/btrfs/file-item.c9
-rw-r--r--fs/btrfs/file.c1
-rw-r--r--fs/btrfs/inode.c57
6 files changed, 156 insertions, 11 deletions
diff --git a/fs/btrfs/defrag.c b/fs/btrfs/defrag.c
index 407ccec3e57e..242c5469f4ba 100644
--- a/fs/btrfs/defrag.c
+++ b/fs/btrfs/defrag.c
@@ -709,6 +709,10 @@ iterate:
em->start = start;
em->orig_start = start;
em->block_start = EXTENT_MAP_HOLE;
+ em->disk_bytenr = EXTENT_MAP_HOLE;
+ em->disk_num_bytes = 0;
+ em->ram_bytes = 0;
+ em->offset = 0;
em->len = key.offset - start;
break;
}
diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c
index a9d60d1eade9..0242f354e1af 100644
--- a/fs/btrfs/extent_map.c
+++ b/fs/btrfs/extent_map.c
@@ -229,6 +229,61 @@ static bool mergeable_maps(const struct extent_map *prev, const struct extent_ma
return next->block_start == prev->block_start;
}
+/*
+ * Handle the on-disk data extents merge for @prev and @next.
+ *
+ * Only touches disk_bytenr/disk_num_bytes/offset/ram_bytes.
+ * For now only uncompressed regular extent can be merged.
+ *
+ * @prev and @next will be both updated to point to the new merged range.
+ * Thus one of them should be removed by the caller.
+ */
+static void merge_ondisk_extents(struct extent_map *prev, struct extent_map *next)
+{
+ u64 new_disk_bytenr;
+ u64 new_disk_num_bytes;
+ u64 new_offset;
+
+ /* @prev and @next should not be compressed. */
+ ASSERT(!extent_map_is_compressed(prev));
+ ASSERT(!extent_map_is_compressed(next));
+
+ /*
+ * There are two different cases where @prev and @next can be merged.
+ *
+ * 1) They are referring to the same data extent:
+ *
+ * |<----- data extent A ----->|
+ * |<- prev ->|<- next ->|
+ *
+ * 2) They are referring to different data extents but still adjacent:
+ *
+ * |<-- data extent A -->|<-- data extent B -->|
+ * |<- prev ->|<- next ->|
+ *
+ * The calculation here always merges the data extents first, then updates
+ * @offset using the new data extents.
+ *
+ * For case 1), the merged data extent would be the same.
+ * For case 2), we just merge the two data extents into one.
+ */
+ new_disk_bytenr = min(prev->disk_bytenr, next->disk_bytenr);
+ new_disk_num_bytes = max(prev->disk_bytenr + prev->disk_num_bytes,
+ next->disk_bytenr + next->disk_num_bytes) -
+ new_disk_bytenr;
+ new_offset = prev->disk_bytenr + prev->offset - new_disk_bytenr;
+
+ prev->disk_bytenr = new_disk_bytenr;
+ prev->disk_num_bytes = new_disk_num_bytes;
+ prev->ram_bytes = new_disk_num_bytes;
+ prev->offset = new_offset;
+
+ next->disk_bytenr = new_disk_bytenr;
+ next->disk_num_bytes = new_disk_num_bytes;
+ next->ram_bytes = new_disk_num_bytes;
+ next->offset = new_offset;
+}
+
static void try_merge_map(struct btrfs_inode *inode, struct extent_map *em)
{
struct extent_map_tree *tree = &inode->extent_tree;
@@ -260,6 +315,9 @@ static void try_merge_map(struct btrfs_inode *inode, struct extent_map *em)
em->block_len += merge->block_len;
em->block_start = merge->block_start;
em->generation = max(em->generation, merge->generation);
+
+ if (em->disk_bytenr < EXTENT_MAP_LAST_BYTE)
+ merge_ondisk_extents(merge, em);
em->flags |= EXTENT_FLAG_MERGED;
rb_erase(&merge->rb_node, &tree->root);
@@ -275,6 +333,8 @@ static void try_merge_map(struct btrfs_inode *inode, struct extent_map *em)
if (rb && can_merge_extent_map(merge) && mergeable_maps(em, merge)) {
em->len += merge->len;
em->block_len += merge->block_len;
+ if (em->disk_bytenr < EXTENT_MAP_LAST_BYTE)
+ merge_ondisk_extents(em, merge);
rb_erase(&merge->rb_node, &tree->root);
RB_CLEAR_NODE(&merge->rb_node);
em->generation = max(em->generation, merge->generation);
@@ -562,6 +622,7 @@ static noinline int merge_extent_mapping(struct btrfs_inode *inode,
!extent_map_is_compressed(em)) {
em->block_start += start_diff;
em->block_len = em->len;
+ em->offset += start_diff;
}
return add_extent_mapping(inode, em, 0);
}
@@ -785,14 +846,18 @@ void btrfs_drop_extent_map_range(struct btrfs_inode *inode, u64 start, u64 end,
split->block_len = em->block_len;
else
split->block_len = split->len;
+ split->disk_bytenr = em->disk_bytenr;
split->disk_num_bytes = max(split->block_len,
em->disk_num_bytes);
+ split->offset = em->offset;
split->ram_bytes = em->ram_bytes;
} else {
split->orig_start = split->start;
split->block_len = 0;
split->block_start = em->block_start;
+ split->disk_bytenr = em->disk_bytenr;
split->disk_num_bytes = 0;
+ split->offset = 0;
split->ram_bytes = split->len;
}
@@ -813,13 +878,14 @@ void btrfs_drop_extent_map_range(struct btrfs_inode *inode, u64 start, u64 end,
split->start = end;
split->len = em_end - end;
split->block_start = em->block_start;
+ split->disk_bytenr = em->disk_bytenr;
split->flags = flags;
split->generation = gen;
if (em->block_start < EXTENT_MAP_LAST_BYTE) {
split->disk_num_bytes = max(em->block_len,
em->disk_num_bytes);
-
+ split->offset = em->offset + end - em->start;
split->ram_bytes = em->ram_bytes;
if (compressed) {
split->block_len = em->block_len;
@@ -832,10 +898,11 @@ void btrfs_drop_extent_map_range(struct btrfs_inode *inode, u64 start, u64 end,
split->orig_start = em->orig_start;
}
} else {
+ split->disk_num_bytes = 0;
+ split->offset = 0;
split->ram_bytes = split->len;
split->orig_start = split->start;
split->block_len = 0;
- split->disk_num_bytes = 0;
}
if (extent_map_in_tree(em)) {
@@ -989,10 +1056,12 @@ int split_extent_map(struct btrfs_inode *inode, u64 start, u64 len, u64 pre,
/* First, replace the em with a new extent_map starting from * em->start */
split_pre->start = em->start;
split_pre->len = pre;
+ split_pre->disk_bytenr = new_logical;
+ split_pre->disk_num_bytes = split_pre->len;
+ split_pre->offset = 0;
split_pre->orig_start = split_pre->start;
split_pre->block_start = new_logical;
split_pre->block_len = split_pre->len;
- split_pre->disk_num_bytes = split_pre->block_len;
split_pre->ram_bytes = split_pre->len;
split_pre->flags = flags;
split_pre->generation = em->generation;
@@ -1007,10 +1076,12 @@ int split_extent_map(struct btrfs_inode *inode, u64 start, u64 len, u64 pre,
/* Insert the middle extent_map. */
split_mid->start = em->start + pre;
split_mid->len = em->len - pre;
+ split_mid->disk_bytenr = em->block_start + pre;
+ split_mid->disk_num_bytes = split_mid->len;
+ split_mid->offset = 0;
split_mid->orig_start = split_mid->start;
split_mid->block_start = em->block_start + pre;
split_mid->block_len = split_mid->len;
- split_mid->disk_num_bytes = split_mid->block_len;
split_mid->ram_bytes = split_mid->len;
split_mid->flags = flags;
split_mid->generation = em->generation;
diff --git a/fs/btrfs/extent_map.h b/fs/btrfs/extent_map.h
index 2b7bbffd594b..0b1a8e409377 100644
--- a/fs/btrfs/extent_map.h
+++ b/fs/btrfs/extent_map.h
@@ -71,12 +71,29 @@ struct extent_map {
u64 orig_start;
/*
+ * The bytenr of the full on-disk extent.
+ *
+ * For regular extents it's btrfs_file_extent_item::disk_bytenr.
+ * For holes it's EXTENT_MAP_HOLE and for inline extents it's
+ * EXTENT_MAP_INLINE.
+ */
+ u64 disk_bytenr;
+
+ /*
* The full on-disk extent length, matching
* btrfs_file_extent_item::disk_num_bytes.
*/
u64 disk_num_bytes;
/*
+ * Offset inside the decompressed extent.
+ *
+ * For regular extents it's btrfs_file_extent_item::offset.
+ * For holes and inline extents it's 0.
+ */
+ u64 offset;
+
+ /*
* The decompressed size of the whole on-disk extent, matching
* btrfs_file_extent_item::ram_bytes.
*/
diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c
index 430dce44ebd2..1298afea9503 100644
--- a/fs/btrfs/file-item.c
+++ b/fs/btrfs/file-item.c
@@ -1295,12 +1295,17 @@ void btrfs_extent_item_to_extent_map(struct btrfs_inode *inode,
em->len = btrfs_file_extent_end(path) - extent_start;
em->orig_start = extent_start -
btrfs_file_extent_offset(leaf, fi);
- em->disk_num_bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
if (bytenr == 0) {
em->block_start = EXTENT_MAP_HOLE;
+ em->disk_bytenr = EXTENT_MAP_HOLE;
+ em->disk_num_bytes = 0;
+ em->offset = 0;
return;
}
+ em->disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
+ em->disk_num_bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
+ em->offset = btrfs_file_extent_offset(leaf, fi);
if (compress_type != BTRFS_COMPRESS_NONE) {
extent_map_set_compression(em, compress_type);
em->block_start = bytenr;
@@ -1317,8 +1322,10 @@ void btrfs_extent_item_to_extent_map(struct btrfs_inode *inode,
ASSERT(extent_start == 0);
em->block_start = EXTENT_MAP_INLINE;
+ em->disk_bytenr = EXTENT_MAP_INLINE;
em->start = 0;
em->len = fs_info->sectorsize;
+ em->offset = 0;
/*
* Initialize orig_start and block_len with the same values
* as in inode.c:btrfs_get_extent().
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index e3ef1659b6ae..b8cd5687d60e 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -2349,6 +2349,7 @@ out:
hole_em->orig_start = offset;
hole_em->block_start = EXTENT_MAP_HOLE;
+ hole_em->disk_bytenr = EXTENT_MAP_HOLE;
hole_em->block_len = 0;
hole_em->disk_num_bytes = 0;
hole_em->generation = trans->transid;
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 5696babaa987..a8ecc3806061 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -142,6 +142,7 @@ static struct extent_map *create_io_em(struct btrfs_inode *inode, u64 start,
u64 len, u64 orig_start, u64 block_start,
u64 block_len, u64 disk_num_bytes,
u64 ram_bytes, int compress_type,
+ const struct btrfs_file_extent *file_extent,
int type);
static int data_reloc_print_warning_inode(u64 inum, u64 offset, u64 num_bytes,
@@ -1153,6 +1154,7 @@ static void submit_one_async_extent(struct async_chunk *async_chunk,
struct btrfs_root *root = inode->root;
struct btrfs_fs_info *fs_info = root->fs_info;
struct btrfs_ordered_extent *ordered;
+ struct btrfs_file_extent file_extent;
struct btrfs_key ins;
struct page *locked_page = NULL;
struct extent_state *cached = NULL;
@@ -1199,6 +1201,13 @@ static void submit_one_async_extent(struct async_chunk *async_chunk,
lock_extent(io_tree, start, end, &cached);
/* Here we're doing allocation and writeback of the compressed pages */
+ file_extent.disk_bytenr = ins.objectid;
+ file_extent.disk_num_bytes = ins.offset;
+ file_extent.ram_bytes = async_extent->ram_size;
+ file_extent.num_bytes = async_extent->ram_size;
+ file_extent.offset = 0;
+ file_extent.compression = async_extent->compress_type;
+
em = create_io_em(inode, start,
async_extent->ram_size, /* len */
start, /* orig_start */
@@ -1207,6 +1216,7 @@ static void submit_one_async_extent(struct async_chunk *async_chunk,
ins.offset, /* orig_block_len */
async_extent->ram_size, /* ram_bytes */
async_extent->compress_type,
+ &file_extent,
BTRFS_ORDERED_COMPRESSED);
if (IS_ERR(em)) {
ret = PTR_ERR(em);
@@ -1396,6 +1406,7 @@ static noinline int cow_file_range(struct btrfs_inode *inode,
while (num_bytes > 0) {
struct btrfs_ordered_extent *ordered;
+ struct btrfs_file_extent file_extent;
cur_alloc_size = num_bytes;
ret = btrfs_reserve_extent(root, cur_alloc_size, cur_alloc_size,
@@ -1432,6 +1443,12 @@ static noinline int cow_file_range(struct btrfs_inode *inode,
extent_reserved = true;
ram_size = ins.offset;
+ file_extent.disk_bytenr = ins.objectid;
+ file_extent.disk_num_bytes = ins.offset;
+ file_extent.num_bytes = ins.offset;
+ file_extent.ram_bytes = ins.offset;
+ file_extent.offset = 0;
+ file_extent.compression = BTRFS_COMPRESS_NONE;
lock_extent(&inode->io_tree, start, start + ram_size - 1,
&cached);
@@ -1443,6 +1460,7 @@ static noinline int cow_file_range(struct btrfs_inode *inode,
ins.offset, /* orig_block_len */
ram_size, /* ram_bytes */
BTRFS_COMPRESS_NONE, /* compress_type */
+ &file_extent,
BTRFS_ORDERED_REGULAR /* type */);
if (IS_ERR(em)) {
unlock_extent(&inode->io_tree, start,
@@ -2181,6 +2199,7 @@ must_cow:
nocow_args.num_bytes, /* block_len */
nocow_args.disk_num_bytes, /* orig_block_len */
ram_bytes, BTRFS_COMPRESS_NONE,
+ &nocow_args.file_extent,
BTRFS_ORDERED_PREALLOC);
if (IS_ERR(em)) {
unlock_extent(&inode->io_tree, cur_offset,
@@ -5013,6 +5032,7 @@ int btrfs_cont_expand(struct btrfs_inode *inode, loff_t oldsize, loff_t size)
hole_em->orig_start = cur_offset;
hole_em->block_start = EXTENT_MAP_HOLE;
+ hole_em->disk_bytenr = EXTENT_MAP_HOLE;
hole_em->block_len = 0;
hole_em->disk_num_bytes = 0;
hole_em->ram_bytes = hole_size;
@@ -6881,6 +6901,7 @@ struct extent_map *btrfs_get_extent(struct btrfs_inode *inode,
}
em->start = EXTENT_MAP_HOLE;
em->orig_start = EXTENT_MAP_HOLE;
+ em->disk_bytenr = EXTENT_MAP_HOLE;
em->len = (u64)-1;
em->block_len = (u64)-1;
@@ -7046,7 +7067,8 @@ static struct extent_map *btrfs_create_dio_extent(struct btrfs_inode *inode,
const u64 block_len,
const u64 orig_block_len,
const u64 ram_bytes,
- const int type)
+ const int type,
+ const struct btrfs_file_extent *file_extent)
{
struct extent_map *em = NULL;
struct btrfs_ordered_extent *ordered;
@@ -7055,7 +7077,7 @@ static struct extent_map *btrfs_create_dio_extent(struct btrfs_inode *inode,
em = create_io_em(inode, start, len, orig_start, block_start,
block_len, orig_block_len, ram_bytes,
BTRFS_COMPRESS_NONE, /* compress_type */
- type);
+ file_extent, type);
if (IS_ERR(em))
goto out;
}
@@ -7086,6 +7108,7 @@ static struct extent_map *btrfs_new_extent_direct(struct btrfs_inode *inode,
{
struct btrfs_root *root = inode->root;
struct btrfs_fs_info *fs_info = root->fs_info;
+ struct btrfs_file_extent file_extent;
struct extent_map *em;
struct btrfs_key ins;
u64 alloc_hint;
@@ -7104,9 +7127,16 @@ again:
if (ret)
return ERR_PTR(ret);
+ file_extent.disk_bytenr = ins.objectid;
+ file_extent.disk_num_bytes = ins.offset;
+ file_extent.num_bytes = ins.offset;
+ file_extent.ram_bytes = ins.offset;
+ file_extent.offset = 0;
+ file_extent.compression = BTRFS_COMPRESS_NONE;
em = btrfs_create_dio_extent(inode, dio_data, start, ins.offset, start,
ins.objectid, ins.offset, ins.offset,
- ins.offset, BTRFS_ORDERED_REGULAR);
+ ins.offset, BTRFS_ORDERED_REGULAR,
+ &file_extent);
btrfs_dec_block_group_reservations(fs_info, ins.objectid);
if (IS_ERR(em))
btrfs_free_reserved_extent(fs_info, ins.objectid, ins.offset,
@@ -7348,6 +7378,7 @@ static struct extent_map *create_io_em(struct btrfs_inode *inode, u64 start,
u64 len, u64 orig_start, u64 block_start,
u64 block_len, u64 disk_num_bytes,
u64 ram_bytes, int compress_type,
+ const struct btrfs_file_extent *file_extent,
int type)
{
struct extent_map *em;
@@ -7405,9 +7436,11 @@ static struct extent_map *create_io_em(struct btrfs_inode *inode, u64 start,
em->len = len;
em->block_len = block_len;
em->block_start = block_start;
+ em->disk_bytenr = file_extent->disk_bytenr;
em->disk_num_bytes = disk_num_bytes;
em->ram_bytes = ram_bytes;
em->generation = -1;
+ em->offset = file_extent->offset;
em->flags |= EXTENT_FLAG_PINNED;
if (type == BTRFS_ORDERED_COMPRESSED)
extent_map_set_compression(em, compress_type);
@@ -7431,6 +7464,7 @@ static int btrfs_get_blocks_direct_write(struct extent_map **map,
{
const bool nowait = (iomap_flags & IOMAP_NOWAIT);
struct btrfs_fs_info *fs_info = inode_to_fs_info(inode);
+ struct btrfs_file_extent file_extent;
struct extent_map *em = *map;
int type;
u64 block_start, orig_start, orig_block_len, ram_bytes;
@@ -7461,7 +7495,8 @@ static int btrfs_get_blocks_direct_write(struct extent_map **map,
block_start = em->block_start + (start - em->start);
if (can_nocow_extent(inode, start, &len, &orig_start,
- &orig_block_len, &ram_bytes, NULL, false, false) == 1) {
+ &orig_block_len, &ram_bytes,
+ &file_extent, false, false) == 1) {
bg = btrfs_inc_nocow_writers(fs_info, block_start);
if (bg)
can_nocow = true;
@@ -7489,7 +7524,8 @@ static int btrfs_get_blocks_direct_write(struct extent_map **map,
em2 = btrfs_create_dio_extent(BTRFS_I(inode), dio_data, start, len,
orig_start, block_start,
len, orig_block_len,
- ram_bytes, type);
+ ram_bytes, type,
+ &file_extent);
btrfs_dec_nocow_writers(bg);
if (type == BTRFS_ORDERED_PREALLOC) {
free_extent_map(em);
@@ -9581,6 +9617,8 @@ static int __btrfs_prealloc_file_range(struct inode *inode, int mode,
em->orig_start = cur_offset;
em->len = ins.offset;
em->block_start = ins.objectid;
+ em->disk_bytenr = ins.objectid;
+ em->offset = 0;
em->block_len = ins.offset;
em->disk_num_bytes = ins.offset;
em->ram_bytes = ins.offset;
@@ -10147,6 +10185,7 @@ ssize_t btrfs_do_encoded_write(struct kiocb *iocb, struct iov_iter *from,
struct extent_changeset *data_reserved = NULL;
struct extent_state *cached_state = NULL;
struct btrfs_ordered_extent *ordered;
+ struct btrfs_file_extent file_extent;
int compression;
size_t orig_count;
u64 start, end;
@@ -10322,10 +10361,16 @@ ssize_t btrfs_do_encoded_write(struct kiocb *iocb, struct iov_iter *from,
goto out_delalloc_release;
extent_reserved = true;
+ file_extent.disk_bytenr = ins.objectid;
+ file_extent.disk_num_bytes = ins.offset;
+ file_extent.num_bytes = num_bytes;
+ file_extent.ram_bytes = ram_bytes;
+ file_extent.offset = encoded->unencoded_offset;
+ file_extent.compression = compression;
em = create_io_em(inode, start, num_bytes,
start - encoded->unencoded_offset, ins.objectid,
ins.offset, ins.offset, ram_bytes, compression,
- BTRFS_ORDERED_COMPRESSED);
+ &file_extent, BTRFS_ORDERED_COMPRESSED);
if (IS_ERR(em)) {
ret = PTR_ERR(em);
goto out_free_reserved;