summaryrefslogtreecommitdiff
path: root/fs/bcachefs/fs-io.c
diff options
context:
space:
mode:
authorKent Overstreet <kent.overstreet@linux.dev>2023-07-09 22:28:08 -0400
committerKent Overstreet <kent.overstreet@linux.dev>2023-10-22 17:10:07 -0400
commita09818c7e78633ee8a6d147ea5bf074d60ea66cd (patch)
tree1ae1b2802c083a645f766d45b05d13a62307b52a /fs/bcachefs/fs-io.c
parentea28c86722954a58e3bab24eec3e7624e8d4956b (diff)
bcachefs: Fallocate now checks page cache
Previously, fallocate would only check the state of the extents btree when determining if we need to create a reservation. But the page cache might already have dirty data or a disk reservation. This changes __bchfs_fallocate() to call bch2_seek_pagecache_hole() to check for this. Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
Diffstat (limited to 'fs/bcachefs/fs-io.c')
-rw-r--r--fs/bcachefs/fs-io.c83
1 files changed, 61 insertions, 22 deletions
diff --git a/fs/bcachefs/fs-io.c b/fs/bcachefs/fs-io.c
index cb654cfecfb9..0661dfd9a8d0 100644
--- a/fs/bcachefs/fs-io.c
+++ b/fs/bcachefs/fs-io.c
@@ -35,6 +35,8 @@
#include <trace/events/writeback.h>
+static void bch2_clamp_data_hole(struct inode *, u64 *, u64 *, unsigned);
+
struct folio_vec {
struct folio *fv_folio;
size_t fv_offset;
@@ -3370,6 +3372,8 @@ static int __bchfs_fallocate(struct bch_inode_info *inode, int mode,
struct quota_res quota_res = { 0 };
struct bkey_s_c k;
unsigned sectors;
+ bool is_allocation;
+ u64 hole_start, hole_end;
u32 snapshot;
bch2_trans_begin(&trans);
@@ -3385,6 +3389,10 @@ static int __bchfs_fallocate(struct bch_inode_info *inode, int mode,
if ((ret = bkey_err(k)))
goto bkey_err;
+ hole_start = iter.pos.offset;
+ hole_end = bpos_min(k.k->p, end_pos).offset;
+ is_allocation = bkey_extent_is_allocation(k.k);
+
/* already reserved */
if (bkey_extent_is_reservation(k) &&
bch2_bkey_nr_ptrs_fully_allocated(k) >= opts.data_replicas) {
@@ -3398,17 +3406,26 @@ static int __bchfs_fallocate(struct bch_inode_info *inode, int mode,
continue;
}
- /*
- * XXX: for nocow mode, we should promote shared extents to
- * unshared here
- */
+ if (!(mode & FALLOC_FL_ZERO_RANGE)) {
+ ret = drop_locks_do(&trans,
+ (bch2_clamp_data_hole(&inode->v,
+ &hole_start,
+ &hole_end,
+ opts.data_replicas), 0));
+ bch2_btree_iter_set_pos(&iter, POS(iter.pos.inode, hole_start));
+
+ if (ret)
+ goto bkey_err;
+
+ if (hole_start == hole_end)
+ continue;
+ }
- sectors = bpos_min(k.k->p, end_pos).offset - iter.pos.offset;
+ sectors = hole_end - hole_start;
- if (!bkey_extent_is_allocation(k.k)) {
+ if (!is_allocation) {
ret = bch2_quota_reservation_add(c, inode,
- &quota_res,
- sectors, true);
+ &quota_res, sectors, true);
if (unlikely(ret))
goto bkey_err;
}
@@ -3420,15 +3437,15 @@ static int __bchfs_fallocate(struct bch_inode_info *inode, int mode,
goto bkey_err;
i_sectors_acct(c, inode, &quota_res, i_sectors_delta);
+
+ drop_locks_do(&trans,
+ (mark_pagecache_reserved(inode, hole_start, iter.pos.offset), 0));
bkey_err:
bch2_quota_reservation_put(c, inode, &quota_res);
if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
ret = 0;
}
- bch2_trans_unlock(&trans); /* lock ordering, before taking pagecache locks: */
- mark_pagecache_reserved(inode, start_sector, iter.pos.offset);
-
if (bch2_err_matches(ret, ENOSPC) && (mode & FALLOC_FL_ZERO_RANGE)) {
struct quota_res quota_res = { 0 };
s64 i_sectors_delta = 0;
@@ -3676,14 +3693,16 @@ err:
/* fseek: */
-static int folio_data_offset(struct folio *folio, loff_t pos)
+static int folio_data_offset(struct folio *folio, loff_t pos,
+ unsigned min_replicas)
{
struct bch_folio *s = bch2_folio(folio);
unsigned i, sectors = folio_sectors(folio);
if (s)
for (i = folio_pos_to_s(folio, pos); i < sectors; i++)
- if (s->s[i].state >= SECTOR_dirty)
+ if (s->s[i].state >= SECTOR_dirty &&
+ s->s[i].nr_replicas + s->s[i].replicas_reserved >= min_replicas)
return i << SECTOR_SHIFT;
return -1;
@@ -3691,7 +3710,8 @@ static int folio_data_offset(struct folio *folio, loff_t pos)
static loff_t bch2_seek_pagecache_data(struct inode *vinode,
loff_t start_offset,
- loff_t end_offset)
+ loff_t end_offset,
+ unsigned min_replicas)
{
struct folio_batch fbatch;
pgoff_t start_index = start_offset >> PAGE_SHIFT;
@@ -3710,7 +3730,8 @@ static loff_t bch2_seek_pagecache_data(struct inode *vinode,
folio_lock(folio);
offset = folio_data_offset(folio,
- max(folio_pos(folio), start_offset));
+ max(folio_pos(folio), start_offset),
+ min_replicas);
if (offset >= 0) {
ret = clamp(folio_pos(folio) + offset,
start_offset, end_offset);
@@ -3772,7 +3793,7 @@ err:
if (next_data > offset)
next_data = bch2_seek_pagecache_data(&inode->v,
- offset, next_data);
+ offset, next_data, 0);
if (next_data >= isize)
return -ENXIO;
@@ -3780,7 +3801,8 @@ err:
return vfs_setpos(file, next_data, MAX_LFS_FILESIZE);
}
-static bool folio_hole_offset(struct address_space *mapping, loff_t *offset)
+static bool folio_hole_offset(struct address_space *mapping, loff_t *offset,
+ unsigned min_replicas)
{
struct folio *folio;
struct bch_folio *s;
@@ -3797,7 +3819,8 @@ static bool folio_hole_offset(struct address_space *mapping, loff_t *offset)
sectors = folio_sectors(folio);
for (i = folio_pos_to_s(folio, *offset); i < sectors; i++)
- if (s->s[i].state < SECTOR_dirty) {
+ if (s->s[i].state < SECTOR_dirty ||
+ s->s[i].nr_replicas + s->s[i].replicas_reserved < min_replicas) {
*offset = max(*offset,
folio_pos(folio) + (i << SECTOR_SHIFT));
goto unlock;
@@ -3812,18 +3835,34 @@ unlock:
static loff_t bch2_seek_pagecache_hole(struct inode *vinode,
loff_t start_offset,
- loff_t end_offset)
+ loff_t end_offset,
+ unsigned min_replicas)
{
struct address_space *mapping = vinode->i_mapping;
loff_t offset = start_offset;
while (offset < end_offset &&
- !folio_hole_offset(mapping, &offset))
+ !folio_hole_offset(mapping, &offset, min_replicas))
;
return min(offset, end_offset);
}
+static void bch2_clamp_data_hole(struct inode *inode,
+ u64 *hole_start,
+ u64 *hole_end,
+ unsigned min_replicas)
+{
+ *hole_start = bch2_seek_pagecache_hole(inode,
+ *hole_start << 9, *hole_end << 9, min_replicas) >> 9;
+
+ if (*hole_start == *hole_end)
+ return;
+
+ *hole_end = bch2_seek_pagecache_data(inode,
+ *hole_start << 9, *hole_end << 9, min_replicas) >> 9;
+}
+
static loff_t bch2_seek_hole(struct file *file, u64 offset)
{
struct bch_inode_info *inode = file_bch_inode(file);
@@ -3853,12 +3892,12 @@ retry:
BTREE_ITER_SLOTS, k, ret) {
if (k.k->p.inode != inode->v.i_ino) {
next_hole = bch2_seek_pagecache_hole(&inode->v,
- offset, MAX_LFS_FILESIZE);
+ offset, MAX_LFS_FILESIZE, 0);
break;
} else if (!bkey_extent_is_data(k.k)) {
next_hole = bch2_seek_pagecache_hole(&inode->v,
max(offset, bkey_start_offset(k.k) << 9),
- k.k->p.offset << 9);
+ k.k->p.offset << 9, 0);
if (next_hole < k.k->p.offset << 9)
break;