summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--fs/btrfs/tree-log.c54
1 files changed, 54 insertions, 0 deletions
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 6d650468d21a..1bbaace73383 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -722,12 +722,66 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans,
&ordered_sums, 0);
if (ret)
goto out;
+ /*
+ * Now delete all existing cums in the csum root that
+ * cover our range. We do this because we can have an
+ * extent that is completely referenced by one file
+ * extent item and partially referenced by another
+ * file extent item (like after using the clone or
+ * extent_same ioctls). In this case if we end up doing
+ * the replay of the one that partially references the
+ * extent first, and we do not do the csum deletion
+ * below, we can get 2 csum items in the csum tree that
+ * overlap each other. For example, imagine our log has
+ * the two following file extent items:
+ *
+ * key (257 EXTENT_DATA 409600)
+ * extent data disk byte 12845056 nr 102400
+ * extent data offset 20480 nr 20480 ram 102400
+ *
+ * key (257 EXTENT_DATA 819200)
+ * extent data disk byte 12845056 nr 102400
+ * extent data offset 0 nr 102400 ram 102400
+ *
+ * Where the second one fully references the 100K extent
+ * that starts at disk byte 12845056, and the log tree
+ * has a single csum item that covers the entire range
+ * of the extent:
+ *
+ * key (EXTENT_CSUM EXTENT_CSUM 12845056) itemsize 100
+ *
+ * After the first file extent item is replayed, the
+ * csum tree gets the following csum item:
+ *
+ * key (EXTENT_CSUM EXTENT_CSUM 12865536) itemsize 20
+ *
+ * Which covers the 20K sub-range starting at offset 20K
+ * of our extent. Now when we replay the second file
+ * extent item, if we do not delete existing csum items
+ * that cover any of its blocks, we end up getting two
+ * csum items in our csum tree that overlap each other:
+ *
+ * key (EXTENT_CSUM EXTENT_CSUM 12845056) itemsize 100
+ * key (EXTENT_CSUM EXTENT_CSUM 12865536) itemsize 20
+ *
+ * Which is a problem, because after this anyone trying
+ * to lookup up for the checksum of any block of our
+ * extent starting at an offset of 40K or higher, will
+ * end up looking at the second csum item only, which
+ * does not contain the checksum for any block starting
+ * at offset 40K or higher of our extent.
+ */
while (!list_empty(&ordered_sums)) {
struct btrfs_ordered_sum *sums;
sums = list_entry(ordered_sums.next,
struct btrfs_ordered_sum,
list);
if (!ret)
+ ret = btrfs_del_csums(trans,
+ root->fs_info->csum_root,
+ sums->bytenr,
+ sums->len);
+ if (!ret)
ret = btrfs_csum_file_blocks(trans,
root->fs_info->csum_root,
sums);