summaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
authorKent Overstreet <kent.overstreet@gmail.com>2019-10-09 12:11:00 -0400
committerKent Overstreet <kent.overstreet@linux.dev>2023-10-22 17:08:29 -0400
commit2925fc49b3303ee7733cf9f6cba6a59a5b8a5e4b (patch)
tree22e8bfd43f84b6b7135f6489d6d74e1c3abfbb5f /fs
parente0541a9346951c94dce4d65d88541a329adf0b76 (diff)
bcachefs: Split out bchfs_extent_update()
The next few patches are going to be more moving the logic around i_size/i_sectors updates to io.c, and better separating the Linux VFS specific code from core bcachefs code, to better support the fuse port. Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
Diffstat (limited to 'fs')
-rw-r--r--fs/bcachefs/fs-io.c169
-rw-r--r--fs/bcachefs/fs-io.h14
-rw-r--r--fs/bcachefs/io.c141
-rw-r--r--fs/bcachefs/io.h3
-rw-r--r--fs/bcachefs/reflink.c6
5 files changed, 178 insertions, 155 deletions
diff --git a/fs/bcachefs/fs-io.c b/fs/bcachefs/fs-io.c
index 9ecefd95df6e..92cab285698c 100644
--- a/fs/bcachefs/fs-io.c
+++ b/fs/bcachefs/fs-io.c
@@ -237,151 +237,31 @@ static void i_sectors_acct(struct bch_fs *c, struct bch_inode_info *inode,
mutex_unlock(&inode->ei_quota_lock);
}
-/* normal i_size/i_sectors update machinery: */
-
-static int sum_sector_overwrites(struct btree_trans *trans,
- struct btree_iter *extent_iter,
- struct bkey_i *new,
- bool may_allocate,
- bool *maybe_extending,
- s64 *delta)
-{
- struct btree_iter *iter;
- struct bkey_s_c old;
- int ret = 0;
-
- *maybe_extending = true;
- *delta = 0;
-
- iter = bch2_trans_copy_iter(trans, extent_iter);
- if (IS_ERR(iter))
- return PTR_ERR(iter);
-
- for_each_btree_key_continue(iter, BTREE_ITER_SLOTS, old, ret) {
- if (!may_allocate &&
- bch2_bkey_nr_ptrs_allocated(old) <
- bch2_bkey_nr_dirty_ptrs(bkey_i_to_s_c(new))) {
- ret = -ENOSPC;
- break;
- }
-
- *delta += (min(new->k.p.offset,
- old.k->p.offset) -
- max(bkey_start_offset(&new->k),
- bkey_start_offset(old.k))) *
- (bkey_extent_is_allocation(&new->k) -
- bkey_extent_is_allocation(old.k));
-
- if (bkey_cmp(old.k->p, new->k.p) >= 0) {
- /*
- * Check if there's already data above where we're
- * going to be writing to - this means we're definitely
- * not extending the file:
- *
- * Note that it's not sufficient to check if there's
- * data up to the sector offset we're going to be
- * writing to, because i_size could be up to one block
- * less:
- */
- if (!bkey_cmp(old.k->p, new->k.p))
- old = bch2_btree_iter_next(iter);
-
- if (old.k && !bkey_err(old) &&
- old.k->p.inode == extent_iter->pos.inode &&
- bkey_extent_is_data(old.k))
- *maybe_extending = false;
-
- break;
- }
- }
-
- bch2_trans_iter_put(trans, iter);
- return ret;
-}
-
-int bch2_extent_update(struct btree_trans *trans,
- struct bch_inode_info *inode,
- struct disk_reservation *disk_res,
- struct quota_res *quota_res,
- struct btree_iter *extent_iter,
- struct bkey_i *k,
- u64 new_i_size,
- bool may_allocate,
- bool direct,
- s64 *total_delta)
-{
- struct bch_fs *c = trans->c;
- struct btree_iter *inode_iter = NULL;
- struct bch_inode_unpacked inode_u;
- struct bkey_inode_buf inode_p;
- bool extending = false;
- s64 i_sectors_delta;
+int bchfs_extent_update(struct btree_trans *trans,
+ struct bch_inode_info *inode,
+ struct disk_reservation *disk_res,
+ struct quota_res *quota_res,
+ struct btree_iter *extent_iter,
+ struct bkey_i *k,
+ u64 new_i_size,
+ bool may_allocate,
+ bool direct,
+ s64 *total_delta)
+{
+ s64 i_sectors_delta = 0;
int ret;
- ret = bch2_extent_trim_atomic(k, extent_iter);
- if (ret)
- return ret;
-
- ret = sum_sector_overwrites(trans, extent_iter, k, may_allocate,
- &extending, &i_sectors_delta);
+ ret = bch2_extent_update(trans, extent_iter, k,
+ disk_res, &inode->ei_journal_seq,
+ new_i_size, &i_sectors_delta);
if (ret)
return ret;
- bch2_trans_update(trans, extent_iter, k);
-
- new_i_size = min(k->k.p.offset << 9, new_i_size);
-
- if (i_sectors_delta || extending) {
- inode_iter = bch2_inode_peek(trans, &inode_u,
- k->k.p.inode, BTREE_ITER_INTENT);
- if (IS_ERR(inode_iter))
- return PTR_ERR(inode_iter);
-
- /*
- * XXX:
- * writeback can race a bit with truncate, because truncate
- * first updates the inode then truncates the pagecache. This is
- * ugly, but lets us preserve the invariant that the in memory
- * i_size is always >= the on disk i_size.
- *
- BUG_ON(new_i_size > inode_u.bi_size &&
- (inode_u.bi_flags & BCH_INODE_I_SIZE_DIRTY));
- */
- BUG_ON(new_i_size > inode_u.bi_size && !extending &&
- !(inode_u.bi_flags & BCH_INODE_I_SIZE_DIRTY));
-
- if (!(inode_u.bi_flags & BCH_INODE_I_SIZE_DIRTY) &&
- new_i_size > inode_u.bi_size)
- inode_u.bi_size = new_i_size;
- else
- extending = false;
-
- inode_u.bi_sectors += i_sectors_delta;
-
- if (i_sectors_delta || extending) {
- bch2_inode_pack(&inode_p, &inode_u);
- bch2_trans_update(trans, inode_iter,
- &inode_p.inode.k_i);
- }
- }
-
- ret = bch2_trans_commit(trans, disk_res,
- &inode->ei_journal_seq,
- BTREE_INSERT_NOFAIL|
- BTREE_INSERT_ATOMIC|
- BTREE_INSERT_NOUNLOCK|
- BTREE_INSERT_USE_RESERVE);
- if (ret)
- goto err;
-
- if (i_sectors_delta || extending) {
- inode->ei_inode.bi_sectors = inode_u.bi_sectors;
- inode->ei_inode.bi_size = inode_u.bi_size;
- }
+ new_i_size = min(new_i_size, extent_iter->pos.offset << 9);
if (direct)
- i_sectors_acct(c, inode, quota_res, i_sectors_delta);
- if (direct && extending) {
+ i_sectors_acct(trans->c, inode, quota_res, i_sectors_delta);
+ if (direct && new_i_size) {
spin_lock(&inode->v.i_lock);
if (new_i_size > inode->v.i_size)
i_size_write(&inode->v, new_i_size);
@@ -390,10 +270,7 @@ int bch2_extent_update(struct btree_trans *trans,
if (total_delta)
*total_delta += i_sectors_delta;
-err:
- if (!IS_ERR_OR_NULL(inode_iter))
- bch2_trans_iter_put(trans, inode_iter);
- return ret;
+ return 0;
}
static int bchfs_write_index_update(struct bch_write_op *wop)
@@ -426,7 +303,7 @@ static int bchfs_write_index_update(struct bch_write_op *wop)
bch2_trans_begin_updates(&trans);
- ret = bch2_extent_update(&trans, inode,
+ ret = bchfs_extent_update(&trans, inode,
&wop->res, quota_res,
iter, &tmp.k,
op->new_i_size,
@@ -2295,7 +2172,7 @@ int bch2_fpunch_at(struct btree_trans *trans, struct btree_iter *iter,
bch2_trans_begin_updates(trans);
- ret = bch2_extent_update(trans, inode,
+ ret = bchfs_extent_update(trans, inode,
&disk_res, NULL, iter, &delete,
0, false, true, NULL);
bch2_disk_reservation_put(c, &disk_res);
@@ -2463,6 +2340,8 @@ static int bch2_extend(struct bch_inode_info *inode,
/*
* sync appends:
+ *
+ * this has to be done _before_ extending i_size:
*/
ret = filemap_write_and_wait_range(mapping, inode_u->bi_size, S64_MAX);
if (ret)
@@ -2939,7 +2818,7 @@ static long bch2_fallocate(struct bch_inode_info *inode, int mode,
bch2_trans_begin_updates(&trans);
- ret = bch2_extent_update(&trans, inode,
+ ret = bchfs_extent_update(&trans, inode,
&disk_res, &quota_res,
iter, &reservation.k_i,
0, true, true, NULL);
diff --git a/fs/bcachefs/fs-io.h b/fs/bcachefs/fs-io.h
index 5e48d21bd2e4..090d1c86de37 100644
--- a/fs/bcachefs/fs-io.h
+++ b/fs/bcachefs/fs-io.h
@@ -11,13 +11,13 @@
struct quota_res;
-int bch2_extent_update(struct btree_trans *,
- struct bch_inode_info *,
- struct disk_reservation *,
- struct quota_res *,
- struct btree_iter *,
- struct bkey_i *,
- u64, bool, bool, s64 *);
+int bchfs_extent_update(struct btree_trans *,
+ struct bch_inode_info *,
+ struct disk_reservation *,
+ struct quota_res *,
+ struct btree_iter *,
+ struct bkey_i *,
+ u64, bool, bool, s64 *);
int bch2_fpunch_at(struct btree_trans *, struct btree_iter *,
struct bpos, struct bch_inode_info *);
diff --git a/fs/bcachefs/io.c b/fs/bcachefs/io.c
index b893db7f7dcc..a9b1c21dd9a7 100644
--- a/fs/bcachefs/io.c
+++ b/fs/bcachefs/io.c
@@ -19,6 +19,7 @@
#include "ec.h"
#include "error.h"
#include "extents.h"
+#include "inode.h"
#include "io.h"
#include "journal.h"
#include "keylist.h"
@@ -178,6 +179,146 @@ void bch2_bio_alloc_pages_pool(struct bch_fs *c, struct bio *bio,
mutex_unlock(&c->bio_bounce_pages_lock);
}
+/* Extent update path: */
+
+static int sum_sector_overwrites(struct btree_trans *trans,
+ struct btree_iter *extent_iter,
+ struct bkey_i *new,
+ bool may_allocate,
+ bool *maybe_extending,
+ s64 *delta)
+{
+ struct btree_iter *iter;
+ struct bkey_s_c old;
+ int ret = 0;
+
+ *maybe_extending = true;
+ *delta = 0;
+
+ iter = bch2_trans_copy_iter(trans, extent_iter);
+ if (IS_ERR(iter))
+ return PTR_ERR(iter);
+
+ for_each_btree_key_continue(iter, BTREE_ITER_SLOTS, old, ret) {
+ if (!may_allocate &&
+ bch2_bkey_nr_ptrs_allocated(old) <
+ bch2_bkey_nr_dirty_ptrs(bkey_i_to_s_c(new))) {
+ ret = -ENOSPC;
+ break;
+ }
+
+ *delta += (min(new->k.p.offset,
+ old.k->p.offset) -
+ max(bkey_start_offset(&new->k),
+ bkey_start_offset(old.k))) *
+ (bkey_extent_is_allocation(&new->k) -
+ bkey_extent_is_allocation(old.k));
+
+ if (bkey_cmp(old.k->p, new->k.p) >= 0) {
+ /*
+ * Check if there's already data above where we're
+ * going to be writing to - this means we're definitely
+ * not extending the file:
+ *
+ * Note that it's not sufficient to check if there's
+ * data up to the sector offset we're going to be
+ * writing to, because i_size could be up to one block
+ * less:
+ */
+ if (!bkey_cmp(old.k->p, new->k.p))
+ old = bch2_btree_iter_next(iter);
+
+ if (old.k && !bkey_err(old) &&
+ old.k->p.inode == extent_iter->pos.inode &&
+ bkey_extent_is_data(old.k))
+ *maybe_extending = false;
+
+ break;
+ }
+ }
+
+ bch2_trans_iter_put(trans, iter);
+ return ret;
+}
+
+int bch2_extent_update(struct btree_trans *trans,
+ struct btree_iter *iter,
+ struct bkey_i *k,
+ struct disk_reservation *disk_res,
+ u64 *journal_seq,
+ u64 new_i_size,
+ s64 *i_sectors_delta)
+{
+ /* this must live until after bch2_trans_commit(): */
+ struct bkey_inode_buf inode_p;
+ bool extending = false;
+ s64 delta = 0;
+ int ret;
+
+ ret = bch2_extent_trim_atomic(k, iter);
+ if (ret)
+ return ret;
+
+ ret = sum_sector_overwrites(trans, iter, k,
+ disk_res && disk_res->sectors != 0,
+ &extending, &delta);
+ if (ret)
+ return ret;
+
+ new_i_size = extending
+ ? min(k->k.p.offset << 9, new_i_size)
+ : 0;
+
+ if (delta || new_i_size) {
+ struct btree_iter *inode_iter;
+ struct bch_inode_unpacked inode_u;
+
+ inode_iter = bch2_inode_peek(trans, &inode_u,
+ k->k.p.inode, BTREE_ITER_INTENT);
+ if (IS_ERR(inode_iter))
+ return PTR_ERR(inode_iter);
+
+ /*
+ * XXX:
+ * writeback can race a bit with truncate, because truncate
+ * first updates the inode then truncates the pagecache. This is
+ * ugly, but lets us preserve the invariant that the in memory
+ * i_size is always >= the on disk i_size.
+ *
+ BUG_ON(new_i_size > inode_u.bi_size &&
+ (inode_u.bi_flags & BCH_INODE_I_SIZE_DIRTY));
+ */
+ BUG_ON(new_i_size > inode_u.bi_size && !extending);
+
+ if (!(inode_u.bi_flags & BCH_INODE_I_SIZE_DIRTY) &&
+ new_i_size > inode_u.bi_size)
+ inode_u.bi_size = new_i_size;
+ else
+ new_i_size = 0;
+
+ inode_u.bi_sectors += delta;
+
+ if (delta || new_i_size) {
+ bch2_inode_pack(&inode_p, &inode_u);
+ bch2_trans_update(trans, inode_iter,
+ &inode_p.inode.k_i);
+ }
+
+ bch2_trans_iter_put(trans, inode_iter);
+ }
+
+ bch2_trans_update(trans, iter, k);
+
+ ret = bch2_trans_commit(trans, disk_res, journal_seq,
+ BTREE_INSERT_NOFAIL|
+ BTREE_INSERT_ATOMIC|
+ BTREE_INSERT_USE_RESERVE);
+ if (!ret && i_sectors_delta)
+ *i_sectors_delta += delta;
+
+ return ret;
+}
+
/* Writes */
void bch2_submit_wbio_replicas(struct bch_write_bio *wbio, struct bch_fs *c,
diff --git a/fs/bcachefs/io.h b/fs/bcachefs/io.h
index c6f5ae717cf3..e53f9ecc082d 100644
--- a/fs/bcachefs/io.h
+++ b/fs/bcachefs/io.h
@@ -58,6 +58,9 @@ static inline struct workqueue_struct *index_update_wq(struct bch_write_op *op)
: op->c->wq;
}
+int bch2_extent_update(struct btree_trans *, struct btree_iter *,
+ struct bkey_i *, struct disk_reservation *,
+ u64 *, u64, s64 *);
int bch2_write_index_default(struct bch_write_op *);
static inline void bch2_write_op_init(struct bch_write_op *op, struct bch_fs *c,
diff --git a/fs/bcachefs/reflink.c b/fs/bcachefs/reflink.c
index de4c8b075a65..c9ff467cc0d9 100644
--- a/fs/bcachefs/reflink.c
+++ b/fs/bcachefs/reflink.c
@@ -259,9 +259,9 @@ s64 bch2_remap_range(struct bch_fs *c,
min(src_k.k->p.offset - src_iter->pos.offset,
dst_end.offset - dst_iter->pos.offset));
- ret = bch2_extent_update(&trans, dst_inode, NULL, NULL,
- dst_iter, &new_dst.k,
- new_i_size, false, true, NULL);
+ ret = bchfs_extent_update(&trans, dst_inode, NULL, NULL,
+ dst_iter, &new_dst.k,
+ new_i_size, false, true, NULL);
if (ret)
goto btree_err;