summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/admin-guide/ext4.rst10
-rw-r--r--fs/ext4/bitmap.c8
-rw-r--r--fs/ext4/dir.c14
-rw-r--r--fs/ext4/ext4.h31
-rw-r--r--fs/ext4/extents.c823
-rw-r--r--fs/ext4/extents_status.c240
-rw-r--r--fs/ext4/extents_status.h28
-rw-r--r--fs/ext4/fast_commit.c47
-rw-r--r--fs/ext4/file.c20
-rw-r--r--fs/ext4/ialloc.c35
-rw-r--r--fs/ext4/indirect.c7
-rw-r--r--fs/ext4/inline.c46
-rw-r--r--fs/ext4/inode.c292
-rw-r--r--fs/ext4/mballoc.c25
-rw-r--r--fs/ext4/migrate.c7
-rw-r--r--fs/ext4/move_extent.c90
-rw-r--r--fs/ext4/namei.c16
-rw-r--r--fs/ext4/readpage.c16
-rw-r--r--fs/ext4/resize.c3
-rw-r--r--fs/ext4/super.c65
-rw-r--r--fs/ext4/xattr.c31
-rw-r--r--fs/ext4/xattr.h7
-rw-r--r--fs/jbd2/checkpoint.c21
-rw-r--r--fs/jbd2/journal.c97
-rw-r--r--include/linux/jbd2.h4
25 files changed, 989 insertions, 994 deletions
diff --git a/Documentation/admin-guide/ext4.rst b/Documentation/admin-guide/ext4.rst
index 5740d85439ff..2418b0c2d3df 100644
--- a/Documentation/admin-guide/ext4.rst
+++ b/Documentation/admin-guide/ext4.rst
@@ -212,16 +212,6 @@ When mounting an ext4 filesystem, the following option are accepted:
that ext4's inode table readahead algorithm will pre-read into the
buffer cache. The default value is 32 blocks.
- nouser_xattr
- Disables Extended User Attributes. See the attr(5) manual page for
- more information about extended attributes.
-
- noacl
- This option disables POSIX Access Control List support. If ACL support
- is enabled in the kernel configuration (CONFIG_EXT4_FS_POSIX_ACL), ACL
- is enabled by default on mount. See the acl(5) manual page for more
- information about acl.
-
bsddf (*)
Make 'df' act like BSD.
diff --git a/fs/ext4/bitmap.c b/fs/ext4/bitmap.c
index cd725bebe69e..2a135075468d 100644
--- a/fs/ext4/bitmap.c
+++ b/fs/ext4/bitmap.c
@@ -18,15 +18,17 @@ unsigned int ext4_count_free(char *bitmap, unsigned int numchars)
int ext4_inode_bitmap_csum_verify(struct super_block *sb,
struct ext4_group_desc *gdp,
- struct buffer_head *bh, int sz)
+ struct buffer_head *bh)
{
__u32 hi;
__u32 provided, calculated;
struct ext4_sb_info *sbi = EXT4_SB(sb);
+ int sz;
if (!ext4_has_metadata_csum(sb))
return 1;
+ sz = EXT4_INODES_PER_GROUP(sb) >> 3;
provided = le16_to_cpu(gdp->bg_inode_bitmap_csum_lo);
calculated = ext4_chksum(sbi, sbi->s_csum_seed, (__u8 *)bh->b_data, sz);
if (sbi->s_desc_size >= EXT4_BG_INODE_BITMAP_CSUM_HI_END) {
@@ -40,14 +42,16 @@ int ext4_inode_bitmap_csum_verify(struct super_block *sb,
void ext4_inode_bitmap_csum_set(struct super_block *sb,
struct ext4_group_desc *gdp,
- struct buffer_head *bh, int sz)
+ struct buffer_head *bh)
{
__u32 csum;
struct ext4_sb_info *sbi = EXT4_SB(sb);
+ int sz;
if (!ext4_has_metadata_csum(sb))
return;
+ sz = EXT4_INODES_PER_GROUP(sb) >> 3;
csum = ext4_chksum(sbi, sbi->s_csum_seed, (__u8 *)bh->b_data, sz);
gdp->bg_inode_bitmap_csum_lo = cpu_to_le16(csum & 0xFFFF);
if (sbi->s_desc_size >= EXT4_BG_INODE_BITMAP_CSUM_HI_END)
diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c
index 13196afe55ce..ef6a3c8f3a9a 100644
--- a/fs/ext4/dir.c
+++ b/fs/ext4/dir.c
@@ -280,12 +280,20 @@ static int ext4_readdir(struct file *file, struct dir_context *ctx)
struct fscrypt_str de_name =
FSTR_INIT(de->name,
de->name_len);
+ u32 hash;
+ u32 minor_hash;
+
+ if (IS_CASEFOLDED(inode)) {
+ hash = EXT4_DIRENT_HASH(de);
+ minor_hash = EXT4_DIRENT_MINOR_HASH(de);
+ } else {
+ hash = 0;
+ minor_hash = 0;
+ }
/* Directory is encrypted */
err = fscrypt_fname_disk_to_usr(inode,
- EXT4_DIRENT_HASH(de),
- EXT4_DIRENT_MINOR_HASH(de),
- &de_name, &fstr);
+ hash, minor_hash, &de_name, &fstr);
de_name = fstr;
fstr.len = save_len;
if (err)
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index ecc15e5f1eba..44b0d418143c 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -1058,6 +1058,7 @@ struct ext4_inode_info {
/* Number of ongoing updates on this inode */
atomic_t i_fc_updates;
+ atomic_t i_unwritten; /* Nr. of inflight conversions pending */
/* Fast commit wait queue for this inode */
wait_queue_head_t i_fc_wait;
@@ -1106,6 +1107,10 @@ struct ext4_inode_info {
/* mballoc */
atomic_t i_prealloc_active;
+
+ /* allocation reservation info for delalloc */
+ /* In case of bigalloc, this refer to clusters rather than blocks */
+ unsigned int i_reserved_data_blocks;
struct rb_root i_prealloc_node;
rwlock_t i_prealloc_lock;
@@ -1122,10 +1127,6 @@ struct ext4_inode_info {
/* ialloc */
ext4_group_t i_last_alloc_group;
- /* allocation reservation info for delalloc */
- /* In case of bigalloc, this refer to clusters rather than blocks */
- unsigned int i_reserved_data_blocks;
-
/* pending cluster reservations for bigalloc file systems */
struct ext4_pending_tree i_pending_tree;
@@ -1149,7 +1150,6 @@ struct ext4_inode_info {
*/
struct list_head i_rsv_conversion_list;
struct work_struct i_rsv_conversion_work;
- atomic_t i_unwritten; /* Nr. of inflight conversions pending */
spinlock_t i_block_reservation_lock;
@@ -2338,9 +2338,9 @@ struct ext4_dir_entry_2 {
((struct ext4_dir_entry_hash *) \
(((void *)(entry)) + \
((8 + (entry)->name_len + EXT4_DIR_ROUND) & ~EXT4_DIR_ROUND)))
-#define EXT4_DIRENT_HASH(entry) le32_to_cpu(EXT4_DIRENT_HASHES(de)->hash)
+#define EXT4_DIRENT_HASH(entry) le32_to_cpu(EXT4_DIRENT_HASHES(entry)->hash)
#define EXT4_DIRENT_MINOR_HASH(entry) \
- le32_to_cpu(EXT4_DIRENT_HASHES(de)->minor_hash)
+ le32_to_cpu(EXT4_DIRENT_HASHES(entry)->minor_hash)
static inline bool ext4_hash_in_dirent(const struct inode *inode)
{
@@ -2462,6 +2462,7 @@ static inline __le16 ext4_rec_len_to_disk(unsigned len, unsigned blocksize)
#define DX_HASH_HALF_MD4_UNSIGNED 4
#define DX_HASH_TEA_UNSIGNED 5
#define DX_HASH_SIPHASH 6
+#define DX_HASH_LAST DX_HASH_SIPHASH
static inline u32 ext4_chksum(struct ext4_sb_info *sbi, u32 crc,
const void *address, unsigned int length)
@@ -2695,10 +2696,10 @@ struct mmpd_data {
extern unsigned int ext4_count_free(char *bitmap, unsigned numchars);
void ext4_inode_bitmap_csum_set(struct super_block *sb,
struct ext4_group_desc *gdp,
- struct buffer_head *bh, int sz);
+ struct buffer_head *bh);
int ext4_inode_bitmap_csum_verify(struct super_block *sb,
struct ext4_group_desc *gdp,
- struct buffer_head *bh, int sz);
+ struct buffer_head *bh);
void ext4_block_bitmap_csum_set(struct super_block *sb,
struct ext4_group_desc *gdp,
struct buffer_head *bh);
@@ -3712,11 +3713,12 @@ extern int ext4_map_blocks(handle_t *handle, struct inode *inode,
extern int ext4_ext_calc_credits_for_single_extent(struct inode *inode,
int num,
struct ext4_ext_path *path);
-extern int ext4_ext_insert_extent(handle_t *, struct inode *,
- struct ext4_ext_path **,
- struct ext4_extent *, int);
+extern struct ext4_ext_path *ext4_ext_insert_extent(
+ handle_t *handle, struct inode *inode,
+ struct ext4_ext_path *path,
+ struct ext4_extent *newext, int gb_flags);
extern struct ext4_ext_path *ext4_find_extent(struct inode *, ext4_lblk_t,
- struct ext4_ext_path **,
+ struct ext4_ext_path *,
int flags);
extern void ext4_free_ext_path(struct ext4_ext_path *);
extern int ext4_ext_check_inode(struct inode *inode);
@@ -3853,6 +3855,9 @@ static inline int ext4_buffer_uptodate(struct buffer_head *bh)
return buffer_uptodate(bh);
}
+extern int ext4_block_write_begin(handle_t *handle, struct folio *folio,
+ loff_t pos, unsigned len,
+ get_block_t *get_block);
#endif /* __KERNEL__ */
#define EFSBADCRC EBADMSG /* Bad CRC detected */
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index e067f2dd0335..34e25eee6521 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -84,12 +84,11 @@ static void ext4_extent_block_csum_set(struct inode *inode,
et->et_checksum = ext4_extent_block_csum(inode, eh);
}
-static int ext4_split_extent_at(handle_t *handle,
- struct inode *inode,
- struct ext4_ext_path **ppath,
- ext4_lblk_t split,
- int split_flag,
- int flags);
+static struct ext4_ext_path *ext4_split_extent_at(handle_t *handle,
+ struct inode *inode,
+ struct ext4_ext_path *path,
+ ext4_lblk_t split,
+ int split_flag, int flags);
static int ext4_ext_trunc_restart_fn(struct inode *inode, int *dropped)
{
@@ -106,21 +105,27 @@ static int ext4_ext_trunc_restart_fn(struct inode *inode, int *dropped)
return 0;
}
+static inline void ext4_ext_path_brelse(struct ext4_ext_path *path)
+{
+ brelse(path->p_bh);
+ path->p_bh = NULL;
+}
+
static void ext4_ext_drop_refs(struct ext4_ext_path *path)
{
int depth, i;
- if (!path)
+ if (IS_ERR_OR_NULL(path))
return;
depth = path->p_depth;
- for (i = 0; i <= depth; i++, path++) {
- brelse(path->p_bh);
- path->p_bh = NULL;
- }
+ for (i = 0; i <= depth; i++, path++)
+ ext4_ext_path_brelse(path);
}
void ext4_free_ext_path(struct ext4_ext_path *path)
{
+ if (IS_ERR_OR_NULL(path))
+ return;
ext4_ext_drop_refs(path);
kfree(path);
}
@@ -323,19 +328,18 @@ static inline int ext4_ext_space_root_idx(struct inode *inode, int check)
return size;
}
-static inline int
+static inline struct ext4_ext_path *
ext4_force_split_extent_at(handle_t *handle, struct inode *inode,
- struct ext4_ext_path **ppath, ext4_lblk_t lblk,
+ struct ext4_ext_path *path, ext4_lblk_t lblk,
int nofail)
{
- struct ext4_ext_path *path = *ppath;
int unwritten = ext4_ext_is_unwritten(path[path->p_depth].p_ext);
int flags = EXT4_EX_NOCACHE | EXT4_GET_BLOCKS_PRE_IO;
if (nofail)
flags |= EXT4_GET_BLOCKS_METADATA_NOFAIL | EXT4_EX_NOFAIL;
- return ext4_split_extent_at(handle, inode, ppath, lblk, unwritten ?
+ return ext4_split_extent_at(handle, inode, path, lblk, unwritten ?
EXT4_EXT_MARK_UNWRIT1|EXT4_EXT_MARK_UNWRIT2 : 0,
flags);
}
@@ -635,8 +639,7 @@ int ext4_ext_precache(struct inode *inode)
*/
if ((i == depth) ||
path[i].p_idx > EXT_LAST_INDEX(path[i].p_hdr)) {
- brelse(path[i].p_bh);
- path[i].p_bh = NULL;
+ ext4_ext_path_brelse(path + i);
i--;
continue;
}
@@ -689,7 +692,7 @@ static void ext4_ext_show_leaf(struct inode *inode, struct ext4_ext_path *path)
struct ext4_extent *ex;
int i;
- if (!path)
+ if (IS_ERR_OR_NULL(path))
return;
eh = path[depth].p_hdr;
@@ -881,11 +884,10 @@ void ext4_ext_tree_init(handle_t *handle, struct inode *inode)
struct ext4_ext_path *
ext4_find_extent(struct inode *inode, ext4_lblk_t block,
- struct ext4_ext_path **orig_path, int flags)
+ struct ext4_ext_path *path, int flags)
{
struct ext4_extent_header *eh;
struct buffer_head *bh;
- struct ext4_ext_path *path = orig_path ? *orig_path : NULL;
short int depth, i, ppos = 0;
int ret;
gfp_t gfp_flags = GFP_NOFS;
@@ -906,7 +908,7 @@ ext4_find_extent(struct inode *inode, ext4_lblk_t block,
ext4_ext_drop_refs(path);
if (depth > path[0].p_maxdepth) {
kfree(path);
- *orig_path = path = NULL;
+ path = NULL;
}
}
if (!path) {
@@ -961,8 +963,6 @@ ext4_find_extent(struct inode *inode, ext4_lblk_t block,
err:
ext4_free_ext_path(path);
- if (orig_path)
- *orig_path = NULL;
return ERR_PTR(ret);
}
@@ -1395,15 +1395,15 @@ out:
* finds empty index and adds new leaf.
* if no free index is found, then it requests in-depth growing.
*/
-static int ext4_ext_create_new_leaf(handle_t *handle, struct inode *inode,
- unsigned int mb_flags,
- unsigned int gb_flags,
- struct ext4_ext_path **ppath,
- struct ext4_extent *newext)
+static struct ext4_ext_path *
+ext4_ext_create_new_leaf(handle_t *handle, struct inode *inode,
+ unsigned int mb_flags, unsigned int gb_flags,
+ struct ext4_ext_path *path,
+ struct ext4_extent *newext)
{
- struct ext4_ext_path *path = *ppath;
struct ext4_ext_path *curp;
int depth, i, err = 0;
+ ext4_lblk_t ee_block = le32_to_cpu(newext->ee_block);
repeat:
i = depth = ext_depth(inode);
@@ -1422,42 +1422,38 @@ repeat:
* entry: create all needed subtree and add new leaf */
err = ext4_ext_split(handle, inode, mb_flags, path, newext, i);
if (err)
- goto out;
+ goto errout;
/* refill path */
- path = ext4_find_extent(inode,
- (ext4_lblk_t)le32_to_cpu(newext->ee_block),
- ppath, gb_flags);
- if (IS_ERR(path))
- err = PTR_ERR(path);
- } else {
- /* tree is full, time to grow in depth */
- err = ext4_ext_grow_indepth(handle, inode, mb_flags);
- if (err)
- goto out;
+ path = ext4_find_extent(inode, ee_block, path, gb_flags);
+ return path;
+ }
- /* refill path */
- path = ext4_find_extent(inode,
- (ext4_lblk_t)le32_to_cpu(newext->ee_block),
- ppath, gb_flags);
- if (IS_ERR(path)) {
- err = PTR_ERR(path);
- goto out;
- }
+ /* tree is full, time to grow in depth */
+ err = ext4_ext_grow_indepth(handle, inode, mb_flags);
+ if (err)
+ goto errout;
- /*
- * only first (depth 0 -> 1) produces free space;
- * in all other cases we have to split the grown tree
- */
- depth = ext_depth(inode);
- if (path[depth].p_hdr->eh_entries == path[depth].p_hdr->eh_max) {
- /* now we need to split */
- goto repeat;
- }
+ /* refill path */
+ path = ext4_find_extent(inode, ee_block, path, gb_flags);
+ if (IS_ERR(path))
+ return path;
+
+ /*
+ * only first (depth 0 -> 1) produces free space;
+ * in all other cases we have to split the grown tree
+ */
+ depth = ext_depth(inode);
+ if (path[depth].p_hdr->eh_entries == path[depth].p_hdr->eh_max) {
+ /* now we need to split */
+ goto repeat;
}
-out:
- return err;
+ return path;
+
+errout:
+ ext4_free_ext_path(path);
+ return ERR_PTR(err);
}
/*
@@ -1749,12 +1745,23 @@ static int ext4_ext_correct_indexes(handle_t *handle, struct inode *inode,
break;
err = ext4_ext_get_access(handle, inode, path + k);
if (err)
- break;
+ goto clean;
path[k].p_idx->ei_block = border;
err = ext4_ext_dirty(handle, inode, path + k);
if (err)
- break;
+ goto clean;
}
+ return 0;
+
+clean:
+ /*
+ * The path[k].p_bh is either unmodified or with no verified bit
+ * set (see ext4_ext_get_access()). So just clear the verified bit
+ * of the successfully modified extents buffers, which will force
+ * these extents to be checked to avoid using inconsistent data.
+ */
+ while (++k < depth)
+ clear_buffer_verified(path[k].p_bh);
return err;
}
@@ -1876,7 +1883,7 @@ static void ext4_ext_try_to_merge_up(handle_t *handle,
(path[1].p_ext - EXT_FIRST_EXTENT(path[1].p_hdr));
path[0].p_hdr->eh_max = cpu_to_le16(max_root);
- brelse(path[1].p_bh);
+ ext4_ext_path_brelse(path + 1);
ext4_free_blocks(handle, inode, NULL, blk, 1,
EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET);
}
@@ -1964,16 +1971,15 @@ out:
* inserts requested extent as new one into the tree,
* creating new leaf in the no-space case.
*/
-int ext4_ext_insert_extent(handle_t *handle, struct inode *inode,
- struct ext4_ext_path **ppath,
- struct ext4_extent *newext, int gb_flags)
+struct ext4_ext_path *
+ext4_ext_insert_extent(handle_t *handle, struct inode *inode,
+ struct ext4_ext_path *path,
+ struct ext4_extent *newext, int gb_flags)
{
- struct ext4_ext_path *path = *ppath;
struct ext4_extent_header *eh;
struct ext4_extent *ex, *fex;
struct ext4_extent *nearex; /* nearest extent */
- struct ext4_ext_path *npath = NULL;
- int depth, len, err;
+ int depth, len, err = 0;
ext4_lblk_t next;
int mb_flags = 0, unwritten;
@@ -1981,14 +1987,16 @@ int ext4_ext_insert_extent(handle_t *handle, struct inode *inode,
mb_flags |= EXT4_MB_DELALLOC_RESERVED;
if (unlikely(ext4_ext_get_actual_len(newext) == 0)) {
EXT4_ERROR_INODE(inode, "ext4_ext_get_actual_len(newext) == 0");
- return -EFSCORRUPTED;
+ err = -EFSCORRUPTED;
+ goto errout;
}
depth = ext_depth(inode);
ex = path[depth].p_ext;
eh = path[depth].p_hdr;
if (unlikely(path[depth].p_hdr == NULL)) {
EXT4_ERROR_INODE(inode, "path[%d].p_hdr == NULL", depth);
- return -EFSCORRUPTED;
+ err = -EFSCORRUPTED;
+ goto errout;
}
/* try to insert block into found extent and return */
@@ -2026,7 +2034,7 @@ int ext4_ext_insert_extent(handle_t *handle, struct inode *inode,
err = ext4_ext_get_access(handle, inode,
path + depth);
if (err)
- return err;
+ goto errout;
unwritten = ext4_ext_is_unwritten(ex);
ex->ee_len = cpu_to_le16(ext4_ext_get_actual_len(ex)
+ ext4_ext_get_actual_len(newext));
@@ -2051,7 +2059,7 @@ prepend:
err = ext4_ext_get_access(handle, inode,
path + depth);
if (err)
- return err;
+ goto errout;
unwritten = ext4_ext_is_unwritten(ex);
ex->ee_block = newext->ee_block;
@@ -2076,21 +2084,26 @@ prepend:
if (le32_to_cpu(newext->ee_block) > le32_to_cpu(fex->ee_block))
next = ext4_ext_next_leaf_block(path);
if (next != EXT_MAX_BLOCKS) {
+ struct ext4_ext_path *npath;
+
ext_debug(inode, "next leaf block - %u\n", next);
- BUG_ON(npath != NULL);
npath = ext4_find_extent(inode, next, NULL, gb_flags);
- if (IS_ERR(npath))
- return PTR_ERR(npath);
+ if (IS_ERR(npath)) {
+ err = PTR_ERR(npath);
+ goto errout;
+ }
BUG_ON(npath->p_depth != path->p_depth);
eh = npath[depth].p_hdr;
if (le16_to_cpu(eh->eh_entries) < le16_to_cpu(eh->eh_max)) {
ext_debug(inode, "next leaf isn't full(%d)\n",
le16_to_cpu(eh->eh_entries));
+ ext4_free_ext_path(path);
path = npath;
goto has_space;
}
ext_debug(inode, "next leaf has no free space(%d,%d)\n",
le16_to_cpu(eh->eh_entries), le16_to_cpu(eh->eh_max));
+ ext4_free_ext_path(npath);
}
/*
@@ -2099,10 +2112,10 @@ prepend:
*/
if (gb_flags & EXT4_GET_BLOCKS_METADATA_NOFAIL)
mb_flags |= EXT4_MB_USE_RESERVED;
- err = ext4_ext_create_new_leaf(handle, inode, mb_flags, gb_flags,
- ppath, newext);
- if (err)
- goto cleanup;
+ path = ext4_ext_create_new_leaf(handle, inode, mb_flags, gb_flags,
+ path, newext);
+ if (IS_ERR(path))
+ return path;
depth = ext_depth(inode);
eh = path[depth].p_hdr;
@@ -2111,7 +2124,7 @@ has_space:
err = ext4_ext_get_access(handle, inode, path + depth);
if (err)
- goto cleanup;
+ goto errout;
if (!nearex) {
/* there is no extent in this leaf, create first one */
@@ -2169,17 +2182,20 @@ merge:
if (!(gb_flags & EXT4_GET_BLOCKS_PRE_IO))
ext4_ext_try_to_merge(handle, inode, path, nearex);
-
/* time to correct all indexes above */
err = ext4_ext_correct_indexes(handle, inode, path);
if (err)
- goto cleanup;
+ goto errout;
err = ext4_ext_dirty(handle, inode, path + path->p_depth);
+ if (err)
+ goto errout;
-cleanup:
- ext4_free_ext_path(npath);
- return err;
+ return path;
+
+errout:
+ ext4_free_ext_path(path);
+ return ERR_PTR(err);
}
static int ext4_fill_es_cache_info(struct inode *inode,
@@ -2279,27 +2295,26 @@ static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode,
{
int err;
ext4_fsblk_t leaf;
+ int k = depth - 1;
/* free index block */
- depth--;
- path = path + depth;
- leaf = ext4_idx_pblock(path->p_idx);
- if (unlikely(path->p_hdr->eh_entries == 0)) {
- EXT4_ERROR_INODE(inode, "path->p_hdr->eh_entries == 0");
+ leaf = ext4_idx_pblock(path[k].p_idx);
+ if (unlikely(path[k].p_hdr->eh_entries == 0)) {
+ EXT4_ERROR_INODE(inode, "path[%d].p_hdr->eh_entries == 0", k);
return -EFSCORRUPTED;
}
- err = ext4_ext_get_access(handle, inode, path);
+ err = ext4_ext_get_access(handle, inode, path + k);
if (err)
return err;
- if (path->p_idx != EXT_LAST_INDEX(path->p_hdr)) {
- int len = EXT_LAST_INDEX(path->p_hdr) - path->p_idx;
+ if (path[k].p_idx != EXT_LAST_INDEX(path[k].p_hdr)) {
+ int len = EXT_LAST_INDEX(path[k].p_hdr) - path[k].p_idx;
len *= sizeof(struct ext4_extent_idx);
- memmove(path->p_idx, path->p_idx + 1, len);
+ memmove(path[k].p_idx, path[k].p_idx + 1, len);
}
- le16_add_cpu(&path->p_hdr->eh_entries, -1);
- err = ext4_ext_dirty(handle, inode, path);
+ le16_add_cpu(&path[k].p_hdr->eh_entries, -1);
+ err = ext4_ext_dirty(handle, inode, path + k);
if (err)
return err;
ext_debug(inode, "index is empty, remove it, free block %llu\n", leaf);
@@ -2308,18 +2323,29 @@ static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode,
ext4_free_blocks(handle, inode, NULL, leaf, 1,
EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET);
- while (--depth >= 0) {
- if (path->p_idx != EXT_FIRST_INDEX(path->p_hdr))
+ while (--k >= 0) {
+ if (path[k + 1].p_idx != EXT_FIRST_INDEX(path[k + 1].p_hdr))
break;
- path--;
- err = ext4_ext_get_access(handle, inode, path);
+ err = ext4_ext_get_access(handle, inode, path + k);
if (err)
- break;
- path->p_idx->ei_block = (path+1)->p_idx->ei_block;
- err = ext4_ext_dirty(handle, inode, path);
+ goto clean;
+ path[k].p_idx->ei_block = path[k + 1].p_idx->ei_block;
+ err = ext4_ext_dirty(handle, inode, path + k);
if (err)
- break;
+ goto clean;
}
+ return 0;
+
+clean:
+ /*
+ * The path[k].p_bh is either unmodified or with no verified bit
+ * set (see ext4_ext_get_access()). So just clear the verified bit
+ * of the successfully modified extents buffers, which will force
+ * these extents to be checked to avoid using inconsistent data.
+ */
+ while (++k < depth)
+ clear_buffer_verified(path[k].p_bh);
+
return err;
}
@@ -2872,11 +2898,12 @@ again:
* fail removing space due to ENOSPC so try to use
* reserved block if that happens.
*/
- err = ext4_force_split_extent_at(handle, inode, &path,
- end + 1, 1);
- if (err < 0)
+ path = ext4_force_split_extent_at(handle, inode, path,
+ end + 1, 1);
+ if (IS_ERR(path)) {
+ err = PTR_ERR(path);
goto out;
-
+ }
} else if (sbi->s_cluster_ratio > 1 && end >= ex_end &&
partial.state == initial) {
/*
@@ -2934,8 +2961,7 @@ again:
err = ext4_ext_rm_leaf(handle, inode, path,
&partial, start, end);
/* root level has p_bh == NULL, brelse() eats this */
- brelse(path[i].p_bh);
- path[i].p_bh = NULL;
+ ext4_ext_path_brelse(path + i);
i--;
continue;
}
@@ -2997,8 +3023,7 @@ again:
err = ext4_ext_rm_idx(handle, inode, path, i);
}
/* root level has p_bh == NULL, brelse() eats this */
- brelse(path[i].p_bh);
- path[i].p_bh = NULL;
+ ext4_ext_path_brelse(path + i);
i--;
ext_debug(inode, "return to level %d\n", i);
}
@@ -3113,7 +3138,7 @@ static void ext4_zeroout_es(struct inode *inode, struct ext4_extent *ex)
return;
ext4_es_insert_extent(inode, ee_block, ee_len, ee_pblock,
- EXTENT_STATUS_WRITTEN);
+ EXTENT_STATUS_WRITTEN, 0);
}
/* FIXME!! we need to try to merge to left or right after zero-out */
@@ -3147,16 +3172,14 @@ static int ext4_ext_zeroout(struct inode *inode, struct ext4_extent *ex)
* a> the extent are splitted into two extent.
* b> split is not needed, and just mark the extent.
*
- * return 0 on success.
+ * Return an extent path pointer on success, or an error pointer on failure.
*/
-static int ext4_split_extent_at(handle_t *handle,
- struct inode *inode,
- struct ext4_ext_path **ppath,
- ext4_lblk_t split,
- int split_flag,
- int flags)
+static struct ext4_ext_path *ext4_split_extent_at(handle_t *handle,
+ struct inode *inode,
+ struct ext4_ext_path *path,
+ ext4_lblk_t split,
+ int split_flag, int flags)
{
- struct ext4_ext_path *path = *ppath;
ext4_fsblk_t newblock;
ext4_lblk_t ee_block;
struct ext4_extent *ex, newex, orig_ex, zero_ex;
@@ -3226,10 +3249,31 @@ static int ext4_split_extent_at(handle_t *handle,
if (split_flag & EXT4_EXT_MARK_UNWRIT2)
ext4_ext_mark_unwritten(ex2);
- err = ext4_ext_insert_extent(handle, inode, ppath, &newex, flags);
- if (err != -ENOSPC && err != -EDQUOT && err != -ENOMEM)
+ path = ext4_ext_insert_extent(handle, inode, path, &newex, flags);
+ if (!IS_ERR(path))
goto out;
+ err = PTR_ERR(path);
+ if (err != -ENOSPC && err != -EDQUOT && err != -ENOMEM)
+ return path;
+
+ /*
+ * Get a new path to try to zeroout or fix the extent length.
+ * Using EXT4_EX_NOFAIL guarantees that ext4_find_extent()
+ * will not return -ENOMEM, otherwise -ENOMEM will cause a
+ * retry in do_writepages(), and a WARN_ON may be triggered
+ * in ext4_da_update_reserve_space() due to an incorrect
+ * ee_len causing the i_reserved_data_blocks exception.
+ */
+ path = ext4_find_extent(inode, ee_block, NULL, flags | EXT4_EX_NOFAIL);
+ if (IS_ERR(path)) {
+ EXT4_ERROR_INODE(inode, "Failed split extent on %u, err %ld",
+ split, PTR_ERR(path));
+ return path;
+ }
+ depth = ext_depth(inode);
+ ex = path[depth].p_ext;
+
if (EXT4_EXT_MAY_ZEROOUT & split_flag) {
if (split_flag & (EXT4_EXT_DATA_VALID1|EXT4_EXT_DATA_VALID2)) {
if (split_flag & EXT4_EXT_DATA_VALID1) {
@@ -3280,14 +3324,17 @@ fix_extent_len:
* and err is a non-zero error code.
*/
ext4_ext_dirty(handle, inode, path + path->p_depth);
- return err;
out:
+ if (err) {
+ ext4_free_ext_path(path);
+ path = ERR_PTR(err);
+ }
ext4_ext_show_leaf(inode, path);
- return err;
+ return path;
}
/*
- * ext4_split_extents() splits an extent and mark extent which is covered
+ * ext4_split_extent() splits an extent and mark extent which is covered
* by @map as split_flags indicates
*
* It may result in splitting the extent into multiple extents (up to three)
@@ -3297,21 +3344,18 @@ out:
* c> Splits in three extents: Somone is splitting in middle of the extent
*
*/
-static int ext4_split_extent(handle_t *handle,
- struct inode *inode,
- struct ext4_ext_path **ppath,
- struct ext4_map_blocks *map,
- int split_flag,
- int flags)
+static struct ext4_ext_path *ext4_split_extent(handle_t *handle,
+ struct inode *inode,
+ struct ext4_ext_path *path,
+ struct ext4_map_blocks *map,
+ int split_flag, int flags,
+ unsigned int *allocated)
{
- struct ext4_ext_path *path = *ppath;
ext4_lblk_t ee_block;
struct ext4_extent *ex;
unsigned int ee_len, depth;
- int err = 0;
int unwritten;
int split_flag1, flags1;
- int allocated = map->m_len;
depth = ext_depth(inode);
ex = path[depth].p_ext;
@@ -3327,28 +3371,27 @@ static int ext4_split_extent(handle_t *handle,
EXT4_EXT_MARK_UNWRIT2;
if (split_flag & EXT4_EXT_DATA_VALID2)
split_flag1 |= EXT4_EXT_DATA_VALID1;
- err = ext4_split_extent_at(handle, inode, ppath,
+ path = ext4_split_extent_at(handle, inode, path,
map->m_lblk + map->m_len, split_flag1, flags1);
- if (err)
- goto out;
- } else {
- allocated = ee_len - (map->m_lblk - ee_block);
- }
- /*
- * Update path is required because previous ext4_split_extent_at() may
- * result in split of original leaf or extent zeroout.
- */
- path = ext4_find_extent(inode, map->m_lblk, ppath, flags);
- if (IS_ERR(path))
- return PTR_ERR(path);
- depth = ext_depth(inode);
- ex = path[depth].p_ext;
- if (!ex) {
- EXT4_ERROR_INODE(inode, "unexpected hole at %lu",
- (unsigned long) map->m_lblk);
- return -EFSCORRUPTED;
+ if (IS_ERR(path))
+ return path;
+ /*
+ * Update path is required because previous ext4_split_extent_at
+ * may result in split of original leaf or extent zeroout.
+ */
+ path = ext4_find_extent(inode, map->m_lblk, path, flags);
+ if (IS_ERR(path))
+ return path;
+ depth = ext_depth(inode);
+ ex = path[depth].p_ext;
+ if (!ex) {
+ EXT4_ERROR_INODE(inode, "unexpected hole at %lu",
+ (unsigned long) map->m_lblk);
+ ext4_free_ext_path(path);
+ return ERR_PTR(-EFSCORRUPTED);
+ }
+ unwritten = ext4_ext_is_unwritten(ex);
}
- unwritten = ext4_ext_is_unwritten(ex);
if (map->m_lblk >= ee_block) {
split_flag1 = split_flag & EXT4_EXT_DATA_VALID2;
@@ -3357,15 +3400,20 @@ static int ext4_split_extent(handle_t *handle,
split_flag1 |= split_flag & (EXT4_EXT_MAY_ZEROOUT |
EXT4_EXT_MARK_UNWRIT2);
}
- err = ext4_split_extent_at(handle, inode, ppath,
+ path = ext4_split_extent_at(handle, inode, path,
map->m_lblk, split_flag1, flags);
- if (err)
- goto out;
+ if (IS_ERR(path))
+ return path;
}
+ if (allocated) {
+ if (map->m_lblk + map->m_len > ee_block + ee_len)
+ *allocated = ee_len - (map->m_lblk - ee_block);
+ else
+ *allocated = map->m_len;
+ }
ext4_ext_show_leaf(inode, path);
-out:
- return err ? err : allocated;
+ return path;
}
/*
@@ -3388,13 +3436,11 @@ out:
* that are allocated and initialized.
* It is guaranteed to be >= map->m_len.
*/
-static int ext4_ext_convert_to_initialized(handle_t *handle,
- struct inode *inode,
- struct ext4_map_blocks *map,
- struct ext4_ext_path **ppath,
- int flags)
+static struct ext4_ext_path *
+ext4_ext_convert_to_initialized(handle_t *handle, struct inode *inode,
+ struct ext4_map_blocks *map, struct ext4_ext_path *path,
+ int flags, unsigned int *allocated)
{
- struct ext4_ext_path *path = *ppath;
struct ext4_sb_info *sbi;
struct ext4_extent_header *eh;
struct ext4_map_blocks split_map;
@@ -3404,7 +3450,6 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
unsigned int ee_len, depth, map_len = map->m_len;
int err = 0;
int split_flag = EXT4_EXT_DATA_VALID2;
- int allocated = 0;
unsigned int max_zeroout = 0;
ext_debug(inode, "logical block %llu, max_blocks %u\n",
@@ -3445,6 +3490,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
* - L2: we only attempt to merge with an extent stored in the
* same extent tree node.
*/
+ *allocated = 0;
if ((map->m_lblk == ee_block) &&
/* See if we can merge left */
(map_len < ee_len) && /*L1*/
@@ -3474,7 +3520,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
(prev_len < (EXT_INIT_MAX_LEN - map_len))) { /*C4*/
err = ext4_ext_get_access(handle, inode, path + depth);
if (err)
- goto out;
+ goto errout;
trace_ext4_ext_convert_to_initialized_fastpath(inode,
map, ex, abut_ex);
@@ -3489,7 +3535,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
abut_ex->ee_len = cpu_to_le16(prev_len + map_len);
/* Result: number of initialized blocks past m_lblk */
- allocated = map_len;
+ *allocated = map_len;
}
} else if (((map->m_lblk + map_len) == (ee_block + ee_len)) &&
(map_len < ee_len) && /*L1*/
@@ -3520,7 +3566,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
(next_len < (EXT_INIT_MAX_LEN - map_len))) { /*C4*/
err = ext4_ext_get_access(handle, inode, path + depth);
if (err)
- goto out;
+ goto errout;
trace_ext4_ext_convert_to_initialized_fastpath(inode,
map, ex, abut_ex);
@@ -3535,18 +3581,20 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
abut_ex->ee_len = cpu_to_le16(next_len + map_len);
/* Result: number of initialized blocks past m_lblk */
- allocated = map_len;
+ *allocated = map_len;
}
}
- if (allocated) {
+ if (*allocated) {
/* Mark the block containing both extents as dirty */
err = ext4_ext_dirty(handle, inode, path + depth);
/* Update path to point to the right extent */
path[depth].p_ext = abut_ex;
+ if (err)
+ goto errout;
goto out;
} else
- allocated = ee_len - (map->m_lblk - ee_block);
+ *allocated = ee_len - (map->m_lblk - ee_block);
WARN_ON(map->m_lblk < ee_block);
/*
@@ -3573,21 +3621,21 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
split_map.m_lblk = map->m_lblk;
split_map.m_len = map->m_len;
- if (max_zeroout && (allocated > split_map.m_len)) {
- if (allocated <= max_zeroout) {
+ if (max_zeroout && (*allocated > split_map.m_len)) {
+ if (*allocated <= max_zeroout) {
/* case 3 or 5 */
zero_ex1.ee_block =
cpu_to_le32(split_map.m_lblk +
split_map.m_len);
zero_ex1.ee_len =
- cpu_to_le16(allocated - split_map.m_len);
+ cpu_to_le16(*allocated - split_map.m_len);
ext4_ext_store_pblock(&zero_ex1,
ext4_ext_pblock(ex) + split_map.m_lblk +
split_map.m_len - ee_block);
err = ext4_ext_zeroout(inode, &zero_ex1);
if (err)
goto fallback;
- split_map.m_len = allocated;
+ split_map.m_len = *allocated;
}
if (split_map.m_lblk - ee_block + split_map.m_len <
max_zeroout) {
@@ -3605,22 +3653,24 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
split_map.m_len += split_map.m_lblk - ee_block;
split_map.m_lblk = ee_block;
- allocated = map->m_len;
+ *allocated = map->m_len;
}
}
fallback:
- err = ext4_split_extent(handle, inode, ppath, &split_map, split_flag,
- flags);
- if (err > 0)
- err = 0;
+ path = ext4_split_extent(handle, inode, path, &split_map, split_flag,
+ flags, NULL);
+ if (IS_ERR(path))
+ return path;
out:
/* If we have gotten a failure, don't zero out status tree */
- if (!err) {
- ext4_zeroout_es(inode, &zero_ex1);
- ext4_zeroout_es(inode, &zero_ex2);
- }
- return err ? err : allocated;
+ ext4_zeroout_es(inode, &zero_ex1);
+ ext4_zeroout_es(inode, &zero_ex2);
+ return path;
+
+errout:
+ ext4_free_ext_path(path);
+ return ERR_PTR(err);
}
/*
@@ -3645,15 +3695,16 @@ out:
* being filled will be convert to initialized by the end_io callback function
* via ext4_convert_unwritten_extents().
*
- * Returns the size of unwritten extent to be written on success.
+ * The size of unwritten extent to be written is passed to the caller via the
+ * allocated pointer. Return an extent path pointer on success, or an error
+ * pointer on failure.
*/
-static int ext4_split_convert_extents(handle_t *handle,
+static struct ext4_ext_path *ext4_split_convert_extents(handle_t *handle,
struct inode *inode,
struct ext4_map_blocks *map,
- struct ext4_ext_path **ppath,
- int flags)
+ struct ext4_ext_path *path,
+ int flags, unsigned int *allocated)
{
- struct ext4_ext_path *path = *ppath;
ext4_lblk_t eof_block;
ext4_lblk_t ee_block;
struct ext4_extent *ex;
@@ -3686,15 +3737,15 @@ static int ext4_split_convert_extents(handle_t *handle,
split_flag |= (EXT4_EXT_MARK_UNWRIT2 | EXT4_EXT_DATA_VALID2);
}
flags |= EXT4_GET_BLOCKS_PRE_IO;
- return ext4_split_extent(handle, inode, ppath, map, split_flag, flags);
+ return ext4_split_extent(handle, inode, path, map, split_flag, flags,
+ allocated);
}
-static int ext4_convert_unwritten_extents_endio(handle_t *handle,
- struct inode *inode,
- struct ext4_map_blocks *map,
- struct ext4_ext_path **ppath)
+static struct ext4_ext_path *
+ext4_convert_unwritten_extents_endio(handle_t *handle, struct inode *inode,
+ struct ext4_map_blocks *map,
+ struct ext4_ext_path *path)
{
- struct ext4_ext_path *path = *ppath;
struct ext4_extent *ex;
ext4_lblk_t ee_block;
unsigned int ee_len;
@@ -3722,20 +3773,21 @@ static int ext4_convert_unwritten_extents_endio(handle_t *handle,
inode->i_ino, (unsigned long long)ee_block, ee_len,
(unsigned long long)map->m_lblk, map->m_len);
#endif
- err = ext4_split_convert_extents(handle, inode, map, ppath,
- EXT4_GET_BLOCKS_CONVERT);
- if (err < 0)
- return err;
- path = ext4_find_extent(inode, map->m_lblk, ppath, 0);
+ path = ext4_split_convert_extents(handle, inode, map, path,
+ EXT4_GET_BLOCKS_CONVERT, NULL);
if (IS_ERR(path))
- return PTR_ERR(path);
+ return path;
+
+ path = ext4_find_extent(inode, map->m_lblk, path, 0);
+ if (IS_ERR(path))
+ return path;
depth = ext_depth(inode);
ex = path[depth].p_ext;
}
err = ext4_ext_get_access(handle, inode, path + depth);
if (err)
- goto out;
+ goto errout;
/* first mark the extent as initialized */
ext4_ext_mark_initialized(ex);
@@ -3746,18 +3798,23 @@ static int ext4_convert_unwritten_extents_endio(handle_t *handle,
/* Mark modified extent as dirty */
err = ext4_ext_dirty(handle, inode, path + path->p_depth);
-out:
+ if (err)
+ goto errout;
+
ext4_ext_show_leaf(inode, path);
- return err;
+ return path;
+
+errout:
+ ext4_free_ext_path(path);
+ return ERR_PTR(err);
}
-static int
+static struct ext4_ext_path *
convert_initialized_extent(handle_t *handle, struct inode *inode,
struct ext4_map_blocks *map,
- struct ext4_ext_path **ppath,
+ struct ext4_ext_path *path,
unsigned int *allocated)
{
- struct ext4_ext_path *path = *ppath;
struct ext4_extent *ex;
ext4_lblk_t ee_block;
unsigned int ee_len;
@@ -3780,25 +3837,27 @@ convert_initialized_extent(handle_t *handle, struct inode *inode,
(unsigned long long)ee_block, ee_len);
if (ee_block != map->m_lblk || ee_len > map->m_len) {
- err = ext4_split_convert_extents(handle, inode, map, ppath,
- EXT4_GET_BLOCKS_CONVERT_UNWRITTEN);
- if (err < 0)
- return err;
- path = ext4_find_extent(inode, map->m_lblk, ppath, 0);
+ path = ext4_split_convert_extents(handle, inode, map, path,
+ EXT4_GET_BLOCKS_CONVERT_UNWRITTEN, NULL);
if (IS_ERR(path))
- return PTR_ERR(path);
+ return path;
+
+ path = ext4_find_extent(inode, map->m_lblk, path, 0);
+ if (IS_ERR(path))
+ return path;
depth = ext_depth(inode);
ex = path[depth].p_ext;
if (!ex) {
EXT4_ERROR_INODE(inode, "unexpected hole at %lu",
(unsigned long) map->m_lblk);
- return -EFSCORRUPTED;
+ err = -EFSCORRUPTED;
+ goto errout;
}
}
err = ext4_ext_get_access(handle, inode, path + depth);
if (err)
- return err;
+ goto errout;
/* first mark the extent as unwritten */
ext4_ext_mark_unwritten(ex);
@@ -3810,7 +3869,7 @@ convert_initialized_extent(handle_t *handle, struct inode *inode,
/* Mark modified extent as dirty */
err = ext4_ext_dirty(handle, inode, path + path->p_depth);
if (err)
- return err;
+ goto errout;
ext4_ext_show_leaf(inode, path);
ext4_update_inode_fsync_trans(handle, inode, 1);
@@ -3819,22 +3878,24 @@ convert_initialized_extent(handle_t *handle, struct inode *inode,
if (*allocated > map->m_len)
*allocated = map->m_len;
map->m_len = *allocated;
- return 0;
+ return path;
+
+errout:
+ ext4_free_ext_path(path);
+ return ERR_PTR(err);
}
-static int
+static struct ext4_ext_path *
ext4_ext_handle_unwritten_extents(handle_t *handle, struct inode *inode,
struct ext4_map_blocks *map,
- struct ext4_ext_path **ppath, int flags,
- unsigned int allocated, ext4_fsblk_t newblock)
+ struct ext4_ext_path *path, int flags,
+ unsigned int *allocated, ext4_fsblk_t newblock)
{
- struct ext4_ext_path __maybe_unused *path = *ppath;
- int ret = 0;
int err = 0;
ext_debug(inode, "logical block %llu, max_blocks %u, flags 0x%x, allocated %u\n",
(unsigned long long)map->m_lblk, map->m_len, flags,
- allocated);
+ *allocated);
ext4_ext_show_leaf(inode, path);
/*
@@ -3844,36 +3905,34 @@ ext4_ext_handle_unwritten_extents(handle_t *handle, struct inode *inode,
flags |= EXT4_GET_BLOCKS_METADATA_NOFAIL;
trace_ext4_ext_handle_unwritten_extents(inode, map, flags,
- allocated, newblock);
+ *allocated, newblock);
/* get_block() before submitting IO, split the extent */
if (flags & EXT4_GET_BLOCKS_PRE_IO) {
- ret = ext4_split_convert_extents(handle, inode, map, ppath,
- flags | EXT4_GET_BLOCKS_CONVERT);
- if (ret < 0) {
- err = ret;
- goto out2;
- }
+ path = ext4_split_convert_extents(handle, inode, map, path,
+ flags | EXT4_GET_BLOCKS_CONVERT, allocated);
+ if (IS_ERR(path))
+ return path;
/*
- * shouldn't get a 0 return when splitting an extent unless
+ * shouldn't get a 0 allocated when splitting an extent unless
* m_len is 0 (bug) or extent has been corrupted
*/
- if (unlikely(ret == 0)) {
+ if (unlikely(*allocated == 0)) {
EXT4_ERROR_INODE(inode,
- "unexpected ret == 0, m_len = %u",
+ "unexpected allocated == 0, m_len = %u",
map->m_len);
err = -EFSCORRUPTED;
- goto out2;
+ goto errout;
}
map->m_flags |= EXT4_MAP_UNWRITTEN;
goto out;
}
/* IO end_io complete, convert the filled extent to written */
if (flags & EXT4_GET_BLOCKS_CONVERT) {
- err = ext4_convert_unwritten_extents_endio(handle, inode, map,
- ppath);
- if (err < 0)
- goto out2;
+ path = ext4_convert_unwritten_extents_endio(handle, inode,
+ map, path);
+ if (IS_ERR(path))
+ return path;
ext4_update_inode_fsync_trans(handle, inode, 1);
goto map_out;
}
@@ -3905,36 +3964,37 @@ ext4_ext_handle_unwritten_extents(handle_t *handle, struct inode *inode,
* For buffered writes, at writepage time, etc. Convert a
* discovered unwritten extent to written.
*/
- ret = ext4_ext_convert_to_initialized(handle, inode, map, ppath, flags);
- if (ret < 0) {
- err = ret;
- goto out2;
- }
+ path = ext4_ext_convert_to_initialized(handle, inode, map, path,
+ flags, allocated);
+ if (IS_ERR(path))
+ return path;
ext4_update_inode_fsync_trans(handle, inode, 1);
/*
- * shouldn't get a 0 return when converting an unwritten extent
+ * shouldn't get a 0 allocated when converting an unwritten extent
* unless m_len is 0 (bug) or extent has been corrupted
*/
- if (unlikely(ret == 0)) {
- EXT4_ERROR_INODE(inode, "unexpected ret == 0, m_len = %u",
+ if (unlikely(*allocated == 0)) {
+ EXT4_ERROR_INODE(inode, "unexpected allocated == 0, m_len = %u",
map->m_len);
err = -EFSCORRUPTED;
- goto out2;
+ goto errout;
}
out:
- allocated = ret;
map->m_flags |= EXT4_MAP_NEW;
map_out:
map->m_flags |= EXT4_MAP_MAPPED;
out1:
map->m_pblk = newblock;
- if (allocated > map->m_len)
- allocated = map->m_len;
- map->m_len = allocated;
+ if (*allocated > map->m_len)
+ *allocated = map->m_len;
+ map->m_len = *allocated;
ext4_ext_show_leaf(inode, path);
-out2:
- return err ? err : allocated;
+ return path;
+
+errout:
+ ext4_free_ext_path(path);
+ return ERR_PTR(err);
}
/*
@@ -4097,7 +4157,8 @@ again:
insert_hole:
/* Put just found gap into cache to speed up subsequent requests */
ext_debug(inode, " -> %u:%u\n", hole_start, len);
- ext4_es_insert_extent(inode, hole_start, len, ~0, EXTENT_STATUS_HOLE);
+ ext4_es_insert_extent(inode, hole_start, len, ~0,
+ EXTENT_STATUS_HOLE, 0);
/* Update hole_len to reflect hole size after lblk */
if (hole_start != lblk)
@@ -4131,7 +4192,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
struct ext4_extent newex, *ex, ex2;
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
ext4_fsblk_t newblock = 0, pblk;
- int err = 0, depth, ret;
+ int err = 0, depth;
unsigned int allocated = 0, offset = 0;
unsigned int allocated_clusters = 0;
struct ext4_allocation_request ar;
@@ -4144,7 +4205,6 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
path = ext4_find_extent(inode, map->m_lblk, NULL, 0);
if (IS_ERR(path)) {
err = PTR_ERR(path);
- path = NULL;
goto out;
}
@@ -4193,8 +4253,10 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
*/
if ((!ext4_ext_is_unwritten(ex)) &&
(flags & EXT4_GET_BLOCKS_CONVERT_UNWRITTEN)) {
- err = convert_initialized_extent(handle,
- inode, map, &path, &allocated);
+ path = convert_initialized_extent(handle,
+ inode, map, path, &allocated);
+ if (IS_ERR(path))
+ err = PTR_ERR(path);
goto out;
} else if (!ext4_ext_is_unwritten(ex)) {
map->m_flags |= EXT4_MAP_MAPPED;
@@ -4206,13 +4268,11 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
goto out;
}
- ret = ext4_ext_handle_unwritten_extents(
- handle, inode, map, &path, flags,
- allocated, newblock);
- if (ret < 0)
- err = ret;
- else
- allocated = ret;
+ path = ext4_ext_handle_unwritten_extents(
+ handle, inode, map, path, flags,
+ &allocated, newblock);
+ if (IS_ERR(path))
+ err = PTR_ERR(path);
goto out;
}
}
@@ -4264,6 +4324,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
get_implied_cluster_alloc(inode->i_sb, map, &ex2, path)) {
ar.len = allocated = map->m_len;
newblock = map->m_pblk;
+ err = 0;
goto got_allocated_blocks;
}
@@ -4336,8 +4397,9 @@ got_allocated_blocks:
map->m_flags |= EXT4_MAP_UNWRITTEN;
}
- err = ext4_ext_insert_extent(handle, inode, &path, &newex, flags);
- if (err) {
+ path = ext4_ext_insert_extent(handle, inode, path, &newex, flags);
+ if (IS_ERR(path)) {
+ err = PTR_ERR(path);
if (allocated_clusters) {
int fb_flags = 0;
@@ -4357,43 +4419,6 @@ got_allocated_blocks:
}
/*
- * Reduce the reserved cluster count to reflect successful deferred
- * allocation of delayed allocated clusters or direct allocation of
- * clusters discovered to be delayed allocated. Once allocated, a
- * cluster is not included in the reserved count.
- */
- if (test_opt(inode->i_sb, DELALLOC) && allocated_clusters) {
- if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) {
- /*
- * When allocating delayed allocated clusters, simply
- * reduce the reserved cluster count and claim quota
- */
- ext4_da_update_reserve_space(inode, allocated_clusters,
- 1);
- } else {
- ext4_lblk_t lblk, len;
- unsigned int n;
-
- /*
- * When allocating non-delayed allocated clusters
- * (from fallocate, filemap, DIO, or clusters
- * allocated when delalloc has been disabled by
- * ext4_nonda_switch), reduce the reserved cluster
- * count by the number of allocated clusters that
- * have previously been delayed allocated. Quota
- * has been claimed by ext4_mb_new_blocks() above,
- * so release the quota reservations made for any
- * previously delayed allocated clusters.
- */
- lblk = EXT4_LBLK_CMASK(sbi, map->m_lblk);
- len = allocated_clusters << sbi->s_cluster_bits;
- n = ext4_es_delayed_clu(inode, lblk, len);
- if (n > 0)
- ext4_da_update_reserve_space(inode, (int) n, 0);
- }
- }
-
- /*
* Cache the extent and update transaction to commit on fdatasync only
* when it is _not_ an unwritten extent.
*/
@@ -5184,7 +5209,7 @@ ext4_ext_shift_extents(struct inode *inode, handle_t *handle,
* won't be shifted beyond EXT_MAX_BLOCKS.
*/
if (SHIFT == SHIFT_LEFT) {
- path = ext4_find_extent(inode, start - 1, &path,
+ path = ext4_find_extent(inode, start - 1, path,
EXT4_EX_NOCACHE);
if (IS_ERR(path))
return PTR_ERR(path);
@@ -5233,7 +5258,7 @@ again:
* becomes NULL to indicate the end of the loop.
*/
while (iterator && start <= stop) {
- path = ext4_find_extent(inode, *iterator, &path,
+ path = ext4_find_extent(inode, *iterator, path,
EXT4_EX_NOCACHE);
if (IS_ERR(path))
return PTR_ERR(path);
@@ -5535,6 +5560,7 @@ static int ext4_insert_range(struct file *file, loff_t offset, loff_t len)
path = ext4_find_extent(inode, offset_lblk, NULL, 0);
if (IS_ERR(path)) {
up_write(&EXT4_I(inode)->i_data_sem);
+ ret = PTR_ERR(path);
goto out_stop;
}
@@ -5553,22 +5579,21 @@ static int ext4_insert_range(struct file *file, loff_t offset, loff_t len)
if (ext4_ext_is_unwritten(extent))
split_flag = EXT4_EXT_MARK_UNWRIT1 |
EXT4_EXT_MARK_UNWRIT2;
- ret = ext4_split_extent_at(handle, inode, &path,
+ path = ext4_split_extent_at(handle, inode, path,
offset_lblk, split_flag,
EXT4_EX_NOCACHE |
EXT4_GET_BLOCKS_PRE_IO |
EXT4_GET_BLOCKS_METADATA_NOFAIL);
}
- ext4_free_ext_path(path);
- if (ret < 0) {
+ if (IS_ERR(path)) {
up_write(&EXT4_I(inode)->i_data_sem);
+ ret = PTR_ERR(path);
goto out_stop;
}
- } else {
- ext4_free_ext_path(path);
}
+ ext4_free_ext_path(path);
ext4_es_remove_extent(inode, offset_lblk, EXT_MAX_BLOCKS - offset_lblk);
/*
@@ -5636,25 +5661,21 @@ ext4_swap_extents(handle_t *handle, struct inode *inode1,
int e1_len, e2_len, len;
int split = 0;
- path1 = ext4_find_extent(inode1, lblk1, NULL, EXT4_EX_NOCACHE);
+ path1 = ext4_find_extent(inode1, lblk1, path1, EXT4_EX_NOCACHE);
if (IS_ERR(path1)) {
*erp = PTR_ERR(path1);
- path1 = NULL;
- finish:
- count = 0;
- goto repeat;
+ goto errout;
}
- path2 = ext4_find_extent(inode2, lblk2, NULL, EXT4_EX_NOCACHE);
+ path2 = ext4_find_extent(inode2, lblk2, path2, EXT4_EX_NOCACHE);
if (IS_ERR(path2)) {
*erp = PTR_ERR(path2);
- path2 = NULL;
- goto finish;
+ goto errout;
}
ex1 = path1[path1->p_depth].p_ext;
ex2 = path2[path2->p_depth].p_ext;
/* Do we have something to swap ? */
if (unlikely(!ex2 || !ex1))
- goto finish;
+ goto errout;
e1_blk = le32_to_cpu(ex1->ee_block);
e2_blk = le32_to_cpu(ex2->ee_block);
@@ -5676,7 +5697,7 @@ ext4_swap_extents(handle_t *handle, struct inode *inode1,
next2 = e2_blk;
/* Do we have something to swap */
if (next1 == EXT_MAX_BLOCKS || next2 == EXT_MAX_BLOCKS)
- goto finish;
+ goto errout;
/* Move to the rightest boundary */
len = next1 - lblk1;
if (len < next2 - lblk2)
@@ -5686,28 +5707,32 @@ ext4_swap_extents(handle_t *handle, struct inode *inode1,
lblk1 += len;
lblk2 += len;
count -= len;
- goto repeat;
+ continue;
}
/* Prepare left boundary */
if (e1_blk < lblk1) {
split = 1;
- *erp = ext4_force_split_extent_at(handle, inode1,
- &path1, lblk1, 0);
- if (unlikely(*erp))
- goto finish;
+ path1 = ext4_force_split_extent_at(handle, inode1,
+ path1, lblk1, 0);
+ if (IS_ERR(path1)) {
+ *erp = PTR_ERR(path1);
+ goto errout;
+ }
}
if (e2_blk < lblk2) {
split = 1;
- *erp = ext4_force_split_extent_at(handle, inode2,
- &path2, lblk2, 0);
- if (unlikely(*erp))
- goto finish;
+ path2 = ext4_force_split_extent_at(handle, inode2,
+ path2, lblk2, 0);
+ if (IS_ERR(path2)) {
+ *erp = PTR_ERR(path2);
+ goto errout;
+ }
}
/* ext4_split_extent_at() may result in leaf extent split,
* path must to be revalidated. */
if (split)
- goto repeat;
+ continue;
/* Prepare right boundary */
len = count;
@@ -5718,30 +5743,34 @@ ext4_swap_extents(handle_t *handle, struct inode *inode1,
if (len != e1_len) {
split = 1;
- *erp = ext4_force_split_extent_at(handle, inode1,
- &path1, lblk1 + len, 0);
- if (unlikely(*erp))
- goto finish;
+ path1 = ext4_force_split_extent_at(handle, inode1,
+ path1, lblk1 + len, 0);
+ if (IS_ERR(path1)) {
+ *erp = PTR_ERR(path1);
+ goto errout;
+ }
}
if (len != e2_len) {
split = 1;
- *erp = ext4_force_split_extent_at(handle, inode2,
- &path2, lblk2 + len, 0);
- if (*erp)
- goto finish;
+ path2 = ext4_force_split_extent_at(handle, inode2,
+ path2, lblk2 + len, 0);
+ if (IS_ERR(path2)) {
+ *erp = PTR_ERR(path2);
+ goto errout;
+ }
}
/* ext4_split_extent_at() may result in leaf extent split,
* path must to be revalidated. */
if (split)
- goto repeat;
+ continue;
BUG_ON(e2_len != e1_len);
*erp = ext4_ext_get_access(handle, inode1, path1 + path1->p_depth);
if (unlikely(*erp))
- goto finish;
+ goto errout;
*erp = ext4_ext_get_access(handle, inode2, path2 + path2->p_depth);
if (unlikely(*erp))
- goto finish;
+ goto errout;
/* Both extents are fully inside boundaries. Swap it now */
tmp_ex = *ex1;
@@ -5759,7 +5788,7 @@ ext4_swap_extents(handle_t *handle, struct inode *inode1,
*erp = ext4_ext_dirty(handle, inode2, path2 +
path2->p_depth);
if (unlikely(*erp))
- goto finish;
+ goto errout;
*erp = ext4_ext_dirty(handle, inode1, path1 +
path1->p_depth);
/*
@@ -5769,17 +5798,17 @@ ext4_swap_extents(handle_t *handle, struct inode *inode1,
* aborted anyway.
*/
if (unlikely(*erp))
- goto finish;
+ goto errout;
+
lblk1 += len;
lblk2 += len;
replaced_count += len;
count -= len;
-
- repeat:
- ext4_free_ext_path(path1);
- ext4_free_ext_path(path2);
- path1 = path2 = NULL;
}
+
+errout:
+ ext4_free_ext_path(path1);
+ ext4_free_ext_path(path2);
return replaced_count;
}
@@ -5814,11 +5843,8 @@ int ext4_clu_mapped(struct inode *inode, ext4_lblk_t lclu)
/* search for the extent closest to the first block in the cluster */
path = ext4_find_extent(inode, EXT4_C2B(sbi, lclu), NULL, 0);
- if (IS_ERR(path)) {
- err = PTR_ERR(path);
- path = NULL;
- goto out;
- }
+ if (IS_ERR(path))
+ return PTR_ERR(path);
depth = ext_depth(inode);
@@ -5880,7 +5906,7 @@ out:
int ext4_ext_replay_update_ex(struct inode *inode, ext4_lblk_t start,
int len, int unwritten, ext4_fsblk_t pblk)
{
- struct ext4_ext_path *path = NULL, *ppath;
+ struct ext4_ext_path *path;
struct ext4_extent *ex;
int ret;
@@ -5896,30 +5922,34 @@ int ext4_ext_replay_update_ex(struct inode *inode, ext4_lblk_t start,
if (le32_to_cpu(ex->ee_block) != start ||
ext4_ext_get_actual_len(ex) != len) {
/* We need to split this extent to match our extent first */
- ppath = path;
down_write(&EXT4_I(inode)->i_data_sem);
- ret = ext4_force_split_extent_at(NULL, inode, &ppath, start, 1);
+ path = ext4_force_split_extent_at(NULL, inode, path, start, 1);
up_write(&EXT4_I(inode)->i_data_sem);
- if (ret)
+ if (IS_ERR(path)) {
+ ret = PTR_ERR(path);
goto out;
- kfree(path);
- path = ext4_find_extent(inode, start, NULL, 0);
+ }
+
+ path = ext4_find_extent(inode, start, path, 0);
if (IS_ERR(path))
- return -1;
- ppath = path;
+ return PTR_ERR(path);
+
ex = path[path->p_depth].p_ext;
WARN_ON(le32_to_cpu(ex->ee_block) != start);
+
if (ext4_ext_get_actual_len(ex) != len) {
down_write(&EXT4_I(inode)->i_data_sem);
- ret = ext4_force_split_extent_at(NULL, inode, &ppath,
- start + len, 1);
+ path = ext4_force_split_extent_at(NULL, inode, path,
+ start + len, 1);
up_write(&EXT4_I(inode)->i_data_sem);
- if (ret)
+ if (IS_ERR(path)) {
+ ret = PTR_ERR(path);
goto out;
- kfree(path);
- path = ext4_find_extent(inode, start, NULL, 0);
+ }
+
+ path = ext4_find_extent(inode, start, path, 0);
if (IS_ERR(path))
- return -EINVAL;
+ return PTR_ERR(path);
ex = path[path->p_depth].p_ext;
}
}
@@ -6001,12 +6031,9 @@ int ext4_ext_replay_set_iblocks(struct inode *inode)
if (IS_ERR(path))
return PTR_ERR(path);
ex = path[path->p_depth].p_ext;
- if (!ex) {
- ext4_free_ext_path(path);
+ if (!ex)
goto out;
- }
end = le32_to_cpu(ex->ee_block) + ext4_ext_get_actual_len(ex);
- ext4_free_ext_path(path);
/* Count the number of data blocks */
cur = 0;
@@ -6032,32 +6059,28 @@ int ext4_ext_replay_set_iblocks(struct inode *inode)
ret = skip_hole(inode, &cur);
if (ret < 0)
goto out;
- path = ext4_find_extent(inode, cur, NULL, 0);
+ path = ext4_find_extent(inode, cur, path, 0);
if (IS_ERR(path))
goto out;
numblks += path->p_depth;
- ext4_free_ext_path(path);
while (cur < end) {
- path = ext4_find_extent(inode, cur, NULL, 0);
+ path = ext4_find_extent(inode, cur, path, 0);
if (IS_ERR(path))
break;
ex = path[path->p_depth].p_ext;
- if (!ex) {
- ext4_free_ext_path(path);
- return 0;
- }
+ if (!ex)
+ goto cleanup;
+
cur = max(cur + 1, le32_to_cpu(ex->ee_block) +
ext4_ext_get_actual_len(ex));
ret = skip_hole(inode, &cur);
- if (ret < 0) {
- ext4_free_ext_path(path);
+ if (ret < 0)
break;
- }
- path2 = ext4_find_extent(inode, cur, NULL, 0);
- if (IS_ERR(path2)) {
- ext4_free_ext_path(path);
+
+ path2 = ext4_find_extent(inode, cur, path2, 0);
+ if (IS_ERR(path2))
break;
- }
+
for (i = 0; i <= max(path->p_depth, path2->p_depth); i++) {
cmp1 = cmp2 = 0;
if (i <= path->p_depth)
@@ -6069,13 +6092,14 @@ int ext4_ext_replay_set_iblocks(struct inode *inode)
if (cmp1 != cmp2 && cmp2 != 0)
numblks++;
}
- ext4_free_ext_path(path);
- ext4_free_ext_path(path2);
}
out:
inode->i_blocks = numblks << (inode->i_sb->s_blocksize_bits - 9);
ext4_mark_inode_dirty(NULL, inode);
+cleanup:
+ ext4_free_ext_path(path);
+ ext4_free_ext_path(path2);
return 0;
}
@@ -6096,12 +6120,9 @@ int ext4_ext_clear_bb(struct inode *inode)
if (IS_ERR(path))
return PTR_ERR(path);
ex = path[path->p_depth].p_ext;
- if (!ex) {
- ext4_free_ext_path(path);
- return 0;
- }
+ if (!ex)
+ goto out;
end = le32_to_cpu(ex->ee_block) + ext4_ext_get_actual_len(ex);
- ext4_free_ext_path(path);
cur = 0;
while (cur < end) {
@@ -6111,16 +6132,16 @@ int ext4_ext_clear_bb(struct inode *inode)
if (ret < 0)
break;
if (ret > 0) {
- path = ext4_find_extent(inode, map.m_lblk, NULL, 0);
- if (!IS_ERR_OR_NULL(path)) {
+ path = ext4_find_extent(inode, map.m_lblk, path, 0);
+ if (!IS_ERR(path)) {
for (j = 0; j < path->p_depth; j++) {
-
ext4_mb_mark_bb(inode->i_sb,
path[j].p_block, 1, false);
ext4_fc_record_regions(inode->i_sb, inode->i_ino,
0, path[j].p_block, 1, 1);
}
- ext4_free_ext_path(path);
+ } else {
+ path = NULL;
}
ext4_mb_mark_bb(inode->i_sb, map.m_pblk, map.m_len, false);
ext4_fc_record_regions(inode->i_sb, inode->i_ino,
@@ -6129,5 +6150,7 @@ int ext4_ext_clear_bb(struct inode *inode)
cur = cur + map.m_len;
}
+out:
+ ext4_free_ext_path(path);
return 0;
}
diff --git a/fs/ext4/extents_status.c b/fs/ext4/extents_status.c
index 17dcf13adde2..c786691dabd3 100644
--- a/fs/ext4/extents_status.c
+++ b/fs/ext4/extents_status.c
@@ -558,8 +558,8 @@ static int ext4_es_can_be_merged(struct extent_status *es1,
if (ext4_es_is_hole(es1))
return 1;
- /* we need to check delayed extent is without unwritten status */
- if (ext4_es_is_delayed(es1) && !ext4_es_is_unwritten(es1))
+ /* we need to check delayed extent */
+ if (ext4_es_is_delayed(es1))
return 1;
return 0;
@@ -848,11 +848,12 @@ out:
*/
void ext4_es_insert_extent(struct inode *inode, ext4_lblk_t lblk,
ext4_lblk_t len, ext4_fsblk_t pblk,
- unsigned int status)
+ unsigned int status, int flags)
{
struct extent_status newes;
ext4_lblk_t end = lblk + len - 1;
int err1 = 0, err2 = 0, err3 = 0;
+ int resv_used = 0, pending = 0;
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
struct extent_status *es1 = NULL;
struct extent_status *es2 = NULL;
@@ -862,21 +863,14 @@ void ext4_es_insert_extent(struct inode *inode, ext4_lblk_t lblk,
if (EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY)
return;
- es_debug("add [%u/%u) %llu %x to extent status tree of inode %lu\n",
- lblk, len, pblk, status, inode->i_ino);
+ es_debug("add [%u/%u) %llu %x %x to extent status tree of inode %lu\n",
+ lblk, len, pblk, status, flags, inode->i_ino);
if (!len)
return;
BUG_ON(end < lblk);
-
- if ((status & EXTENT_STATUS_DELAYED) &&
- (status & EXTENT_STATUS_WRITTEN)) {
- ext4_warning(inode->i_sb, "Inserting extent [%u/%u] as "
- " delayed and written which can potentially "
- " cause data loss.", lblk, len);
- WARN_ON(1);
- }
+ WARN_ON_ONCE(status & EXTENT_STATUS_DELAYED);
newes.es_lblk = lblk;
newes.es_len = len;
@@ -894,11 +888,11 @@ retry:
es1 = __es_alloc_extent(true);
if ((err1 || err2) && !es2)
es2 = __es_alloc_extent(true);
- if ((err1 || err2 || err3) && revise_pending && !pr)
+ if ((err1 || err2 || err3 < 0) && revise_pending && !pr)
pr = __alloc_pending(true);
write_lock(&EXT4_I(inode)->i_es_lock);
- err1 = __es_remove_extent(inode, lblk, end, NULL, es1);
+ err1 = __es_remove_extent(inode, lblk, end, &resv_used, es1);
if (err1 != 0)
goto error;
/* Free preallocated extent if it didn't get used. */
@@ -922,16 +916,38 @@ retry:
if (revise_pending) {
err3 = __revise_pending(inode, lblk, len, &pr);
- if (err3 != 0)
+ if (err3 < 0)
goto error;
if (pr) {
__free_pending(pr);
pr = NULL;
}
+ pending = err3;
}
error:
write_unlock(&EXT4_I(inode)->i_es_lock);
- if (err1 || err2 || err3)
+ /*
+ * Reduce the reserved cluster count to reflect successful deferred
+ * allocation of delayed allocated clusters or direct allocation of
+ * clusters discovered to be delayed allocated. Once allocated, a
+ * cluster is not included in the reserved count.
+ *
+ * When direct allocating (from fallocate, filemap, DIO, or clusters
+ * allocated when delalloc has been disabled by ext4_nonda_switch())
+ * an extent either 1) contains delayed blocks but start with
+ * non-delayed allocated blocks (e.g. hole) or 2) contains non-delayed
+ * allocated blocks which belong to delayed allocated clusters when
+ * bigalloc feature is enabled, quota has already been claimed by
+ * ext4_mb_new_blocks(), so release the quota reservations made for
+ * any previously delayed allocated clusters instead of claim them
+ * again.
+ */
+ resv_used += pending;
+ if (resv_used)
+ ext4_da_update_reserve_space(inode, resv_used,
+ flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE);
+
+ if (err1 || err2 || err3 < 0)
goto retry;
ext4_es_print_tree(inode);
@@ -1051,7 +1067,7 @@ out:
}
struct rsvd_count {
- int ndelonly;
+ int ndelayed;
bool first_do_lblk_found;
ext4_lblk_t first_do_lblk;
ext4_lblk_t last_do_lblk;
@@ -1077,10 +1093,10 @@ static void init_rsvd(struct inode *inode, ext4_lblk_t lblk,
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
struct rb_node *node;
- rc->ndelonly = 0;
+ rc->ndelayed = 0;
/*
- * for bigalloc, note the first delonly block in the range has not
+ * for bigalloc, note the first delayed block in the range has not
* been found, record the extent containing the block to the left of
* the region to be removed, if any, and note that there's no partial
* cluster to track
@@ -1100,9 +1116,8 @@ static void init_rsvd(struct inode *inode, ext4_lblk_t lblk,
}
/*
- * count_rsvd - count the clusters containing delayed and not unwritten
- * (delonly) blocks in a range within an extent and add to
- * the running tally in rsvd_count
+ * count_rsvd - count the clusters containing delayed blocks in a range
+ * within an extent and add to the running tally in rsvd_count
*
* @inode - file containing extent
* @lblk - first block in range
@@ -1119,13 +1134,13 @@ static void count_rsvd(struct inode *inode, ext4_lblk_t lblk, long len,
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
ext4_lblk_t i, end, nclu;
- if (!ext4_es_is_delonly(es))
+ if (!ext4_es_is_delayed(es))
return;
WARN_ON(len <= 0);
if (sbi->s_cluster_ratio == 1) {
- rc->ndelonly += (int) len;
+ rc->ndelayed += (int) len;
return;
}
@@ -1135,7 +1150,7 @@ static void count_rsvd(struct inode *inode, ext4_lblk_t lblk, long len,
end = lblk + (ext4_lblk_t) len - 1;
end = (end > ext4_es_end(es)) ? ext4_es_end(es) : end;
- /* record the first block of the first delonly extent seen */
+ /* record the first block of the first delayed extent seen */
if (!rc->first_do_lblk_found) {
rc->first_do_lblk = i;
rc->first_do_lblk_found = true;
@@ -1149,7 +1164,7 @@ static void count_rsvd(struct inode *inode, ext4_lblk_t lblk, long len,
* doesn't start with it, count it and stop tracking
*/
if (rc->partial && (rc->lclu != EXT4_B2C(sbi, i))) {
- rc->ndelonly++;
+ rc->ndelayed++;
rc->partial = false;
}
@@ -1159,7 +1174,7 @@ static void count_rsvd(struct inode *inode, ext4_lblk_t lblk, long len,
*/
if (EXT4_LBLK_COFF(sbi, i) != 0) {
if (end >= EXT4_LBLK_CFILL(sbi, i)) {
- rc->ndelonly++;
+ rc->ndelayed++;
rc->partial = false;
i = EXT4_LBLK_CFILL(sbi, i) + 1;
}
@@ -1167,11 +1182,11 @@ static void count_rsvd(struct inode *inode, ext4_lblk_t lblk, long len,
/*
* if the current cluster starts on a cluster boundary, count the
- * number of whole delonly clusters in the extent
+ * number of whole delayed clusters in the extent
*/
if ((i + sbi->s_cluster_ratio - 1) <= end) {
nclu = (end - i + 1) >> sbi->s_cluster_bits;
- rc->ndelonly += nclu;
+ rc->ndelayed += nclu;
i += nclu << sbi->s_cluster_bits;
}
@@ -1231,10 +1246,9 @@ static struct pending_reservation *__pr_tree_search(struct rb_root *root,
* @rc - pointer to reserved count data
*
* The number of reservations to be released is equal to the number of
- * clusters containing delayed and not unwritten (delonly) blocks within
- * the range, minus the number of clusters still containing delonly blocks
- * at the ends of the range, and minus the number of pending reservations
- * within the range.
+ * clusters containing delayed blocks within the range, minus the number of
+ * clusters still containing delayed blocks at the ends of the range, and
+ * minus the number of pending reservations within the range.
*/
static unsigned int get_rsvd(struct inode *inode, ext4_lblk_t end,
struct extent_status *right_es,
@@ -1245,33 +1259,33 @@ static unsigned int get_rsvd(struct inode *inode, ext4_lblk_t end,
struct ext4_pending_tree *tree = &EXT4_I(inode)->i_pending_tree;
struct rb_node *node;
ext4_lblk_t first_lclu, last_lclu;
- bool left_delonly, right_delonly, count_pending;
+ bool left_delayed, right_delayed, count_pending;
struct extent_status *es;
if (sbi->s_cluster_ratio > 1) {
/* count any remaining partial cluster */
if (rc->partial)
- rc->ndelonly++;
+ rc->ndelayed++;
- if (rc->ndelonly == 0)
+ if (rc->ndelayed == 0)
return 0;
first_lclu = EXT4_B2C(sbi, rc->first_do_lblk);
last_lclu = EXT4_B2C(sbi, rc->last_do_lblk);
/*
- * decrease the delonly count by the number of clusters at the
- * ends of the range that still contain delonly blocks -
+ * decrease the delayed count by the number of clusters at the
+ * ends of the range that still contain delayed blocks -
* these clusters still need to be reserved
*/
- left_delonly = right_delonly = false;
+ left_delayed = right_delayed = false;
es = rc->left_es;
while (es && ext4_es_end(es) >=
EXT4_LBLK_CMASK(sbi, rc->first_do_lblk)) {
- if (ext4_es_is_delonly(es)) {
- rc->ndelonly--;
- left_delonly = true;
+ if (ext4_es_is_delayed(es)) {
+ rc->ndelayed--;
+ left_delayed = true;
break;
}
node = rb_prev(&es->rb_node);
@@ -1279,7 +1293,7 @@ static unsigned int get_rsvd(struct inode *inode, ext4_lblk_t end,
break;
es = rb_entry(node, struct extent_status, rb_node);
}
- if (right_es && (!left_delonly || first_lclu != last_lclu)) {
+ if (right_es && (!left_delayed || first_lclu != last_lclu)) {
if (end < ext4_es_end(right_es)) {
es = right_es;
} else {
@@ -1289,9 +1303,9 @@ static unsigned int get_rsvd(struct inode *inode, ext4_lblk_t end,
}
while (es && es->es_lblk <=
EXT4_LBLK_CFILL(sbi, rc->last_do_lblk)) {
- if (ext4_es_is_delonly(es)) {
- rc->ndelonly--;
- right_delonly = true;
+ if (ext4_es_is_delayed(es)) {
+ rc->ndelayed--;
+ right_delayed = true;
break;
}
node = rb_next(&es->rb_node);
@@ -1305,21 +1319,21 @@ static unsigned int get_rsvd(struct inode *inode, ext4_lblk_t end,
/*
* Determine the block range that should be searched for
* pending reservations, if any. Clusters on the ends of the
- * original removed range containing delonly blocks are
+ * original removed range containing delayed blocks are
* excluded. They've already been accounted for and it's not
* possible to determine if an associated pending reservation
* should be released with the information available in the
* extents status tree.
*/
if (first_lclu == last_lclu) {
- if (left_delonly | right_delonly)
+ if (left_delayed | right_delayed)
count_pending = false;
else
count_pending = true;
} else {
- if (left_delonly)
+ if (left_delayed)
first_lclu++;
- if (right_delonly)
+ if (right_delayed)
last_lclu--;
if (first_lclu <= last_lclu)
count_pending = true;
@@ -1330,13 +1344,13 @@ static unsigned int get_rsvd(struct inode *inode, ext4_lblk_t end,
/*
* a pending reservation found between first_lclu and last_lclu
* represents an allocated cluster that contained at least one
- * delonly block, so the delonly total must be reduced by one
+ * delayed block, so the delayed total must be reduced by one
* for each pending reservation found and released
*/
if (count_pending) {
pr = __pr_tree_search(&tree->root, first_lclu);
while (pr && pr->lclu <= last_lclu) {
- rc->ndelonly--;
+ rc->ndelayed--;
node = rb_next(&pr->rb_node);
rb_erase(&pr->rb_node, &tree->root);
__free_pending(pr);
@@ -1347,7 +1361,7 @@ static unsigned int get_rsvd(struct inode *inode, ext4_lblk_t end,
}
}
}
- return rc->ndelonly;
+ return rc->ndelayed;
}
@@ -1940,7 +1954,7 @@ static struct pending_reservation *__get_pending(struct inode *inode,
* @lblk - logical block in the cluster to be added
* @prealloc - preallocated pending entry
*
- * Returns 0 on successful insertion and -ENOMEM on failure. If the
+ * Returns 1 on successful insertion and -ENOMEM on failure. If the
* pending reservation is already in the set, returns successfully.
*/
static int __insert_pending(struct inode *inode, ext4_lblk_t lblk,
@@ -1984,6 +1998,7 @@ static int __insert_pending(struct inode *inode, ext4_lblk_t lblk,
rb_link_node(&pr->rb_node, parent, p);
rb_insert_color(&pr->rb_node, &tree->root);
+ ret = 1;
out:
return ret;
@@ -2105,7 +2120,7 @@ retry:
es1 = __es_alloc_extent(true);
if ((err1 || err2) && !es2)
es2 = __es_alloc_extent(true);
- if (err1 || err2 || err3) {
+ if (err1 || err2 || err3 < 0) {
if (lclu_allocated && !pr1)
pr1 = __alloc_pending(true);
if (end_allocated && !pr2)
@@ -2135,7 +2150,7 @@ retry:
if (lclu_allocated) {
err3 = __insert_pending(inode, lblk, &pr1);
- if (err3 != 0)
+ if (err3 < 0)
goto error;
if (pr1) {
__free_pending(pr1);
@@ -2144,7 +2159,7 @@ retry:
}
if (end_allocated) {
err3 = __insert_pending(inode, end, &pr2);
- if (err3 != 0)
+ if (err3 < 0)
goto error;
if (pr2) {
__free_pending(pr2);
@@ -2153,7 +2168,7 @@ retry:
}
error:
write_unlock(&EXT4_I(inode)->i_es_lock);
- if (err1 || err2 || err3)
+ if (err1 || err2 || err3 < 0)
goto retry;
ext4_es_print_tree(inode);
@@ -2162,94 +2177,6 @@ error:
}
/*
- * __es_delayed_clu - count number of clusters containing blocks that
- * are delayed only
- *
- * @inode - file containing block range
- * @start - logical block defining start of range
- * @end - logical block defining end of range
- *
- * Returns the number of clusters containing only delayed (not delayed
- * and unwritten) blocks in the range specified by @start and @end. Any
- * cluster or part of a cluster within the range and containing a delayed
- * and not unwritten block within the range is counted as a whole cluster.
- */
-static unsigned int __es_delayed_clu(struct inode *inode, ext4_lblk_t start,
- ext4_lblk_t end)
-{
- struct ext4_es_tree *tree = &EXT4_I(inode)->i_es_tree;
- struct extent_status *es;
- struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
- struct rb_node *node;
- ext4_lblk_t first_lclu, last_lclu;
- unsigned long long last_counted_lclu;
- unsigned int n = 0;
-
- /* guaranteed to be unequal to any ext4_lblk_t value */
- last_counted_lclu = ~0ULL;
-
- es = __es_tree_search(&tree->root, start);
-
- while (es && (es->es_lblk <= end)) {
- if (ext4_es_is_delonly(es)) {
- if (es->es_lblk <= start)
- first_lclu = EXT4_B2C(sbi, start);
- else
- first_lclu = EXT4_B2C(sbi, es->es_lblk);
-
- if (ext4_es_end(es) >= end)
- last_lclu = EXT4_B2C(sbi, end);
- else
- last_lclu = EXT4_B2C(sbi, ext4_es_end(es));
-
- if (first_lclu == last_counted_lclu)
- n += last_lclu - first_lclu;
- else
- n += last_lclu - first_lclu + 1;
- last_counted_lclu = last_lclu;
- }
- node = rb_next(&es->rb_node);
- if (!node)
- break;
- es = rb_entry(node, struct extent_status, rb_node);
- }
-
- return n;
-}
-
-/*
- * ext4_es_delayed_clu - count number of clusters containing blocks that
- * are both delayed and unwritten
- *
- * @inode - file containing block range
- * @lblk - logical block defining start of range
- * @len - number of blocks in range
- *
- * Locking for external use of __es_delayed_clu().
- */
-unsigned int ext4_es_delayed_clu(struct inode *inode, ext4_lblk_t lblk,
- ext4_lblk_t len)
-{
- struct ext4_inode_info *ei = EXT4_I(inode);
- ext4_lblk_t end;
- unsigned int n;
-
- if (len == 0)
- return 0;
-
- end = lblk + len - 1;
- WARN_ON(end < lblk);
-
- read_lock(&ei->i_es_lock);
-
- n = __es_delayed_clu(inode, lblk, end);
-
- read_unlock(&ei->i_es_lock);
-
- return n;
-}
-
-/*
* __revise_pending - makes, cancels, or leaves unchanged pending cluster
* reservations for a specified block range depending
* upon the presence or absence of delayed blocks
@@ -2263,7 +2190,9 @@ unsigned int ext4_es_delayed_clu(struct inode *inode, ext4_lblk_t lblk,
*
* Used after a newly allocated extent is added to the extents status tree.
* Requires that the extents in the range have either written or unwritten
- * status. Must be called while holding i_es_lock.
+ * status. Must be called while holding i_es_lock. Returns number of new
+ * inserts pending cluster on insert pendings, returns 0 on remove pendings,
+ * return -ENOMEM on failure.
*/
static int __revise_pending(struct inode *inode, ext4_lblk_t lblk,
ext4_lblk_t len,
@@ -2273,6 +2202,7 @@ static int __revise_pending(struct inode *inode, ext4_lblk_t lblk,
ext4_lblk_t end = lblk + len - 1;
ext4_lblk_t first, last;
bool f_del = false, l_del = false;
+ int pendings = 0;
int ret = 0;
if (len == 0)
@@ -2294,49 +2224,53 @@ static int __revise_pending(struct inode *inode, ext4_lblk_t lblk,
if (EXT4_B2C(sbi, lblk) == EXT4_B2C(sbi, end)) {
first = EXT4_LBLK_CMASK(sbi, lblk);
if (first != lblk)
- f_del = __es_scan_range(inode, &ext4_es_is_delonly,
+ f_del = __es_scan_range(inode, &ext4_es_is_delayed,
first, lblk - 1);
if (f_del) {
ret = __insert_pending(inode, first, prealloc);
if (ret < 0)
goto out;
+ pendings += ret;
} else {
last = EXT4_LBLK_CMASK(sbi, end) +
sbi->s_cluster_ratio - 1;
if (last != end)
l_del = __es_scan_range(inode,
- &ext4_es_is_delonly,
+ &ext4_es_is_delayed,
end + 1, last);
if (l_del) {
ret = __insert_pending(inode, last, prealloc);
if (ret < 0)
goto out;
+ pendings += ret;
} else
__remove_pending(inode, last);
}
} else {
first = EXT4_LBLK_CMASK(sbi, lblk);
if (first != lblk)
- f_del = __es_scan_range(inode, &ext4_es_is_delonly,
+ f_del = __es_scan_range(inode, &ext4_es_is_delayed,
first, lblk - 1);
if (f_del) {
ret = __insert_pending(inode, first, prealloc);
if (ret < 0)
goto out;
+ pendings += ret;
} else
__remove_pending(inode, first);
last = EXT4_LBLK_CMASK(sbi, end) + sbi->s_cluster_ratio - 1;
if (last != end)
- l_del = __es_scan_range(inode, &ext4_es_is_delonly,
+ l_del = __es_scan_range(inode, &ext4_es_is_delayed,
end + 1, last);
if (l_del) {
ret = __insert_pending(inode, last, prealloc);
if (ret < 0)
goto out;
+ pendings += ret;
} else
__remove_pending(inode, last);
}
out:
- return ret;
+ return (ret < 0) ? ret : pendings;
}
diff --git a/fs/ext4/extents_status.h b/fs/ext4/extents_status.h
index 3c8e2edee5d5..4424232de298 100644
--- a/fs/ext4/extents_status.h
+++ b/fs/ext4/extents_status.h
@@ -42,6 +42,10 @@ enum {
#define ES_SHIFT (sizeof(ext4_fsblk_t)*8 - ES_FLAGS)
#define ES_MASK (~((ext4_fsblk_t)0) << ES_SHIFT)
+/*
+ * Besides EXTENT_STATUS_REFERENCED, all these extent type masks
+ * are exclusive, only one type can be set at a time.
+ */
#define EXTENT_STATUS_WRITTEN (1 << ES_WRITTEN_B)
#define EXTENT_STATUS_UNWRITTEN (1 << ES_UNWRITTEN_B)
#define EXTENT_STATUS_DELAYED (1 << ES_DELAYED_B)
@@ -51,7 +55,9 @@ enum {
#define ES_TYPE_MASK ((ext4_fsblk_t)(EXTENT_STATUS_WRITTEN | \
EXTENT_STATUS_UNWRITTEN | \
EXTENT_STATUS_DELAYED | \
- EXTENT_STATUS_HOLE) << ES_SHIFT)
+ EXTENT_STATUS_HOLE))
+
+#define ES_TYPE_VALID(type) ((type) && !((type) & ((type) - 1)))
struct ext4_sb_info;
struct ext4_extent;
@@ -129,7 +135,7 @@ extern void ext4_es_init_tree(struct ext4_es_tree *tree);
extern void ext4_es_insert_extent(struct inode *inode, ext4_lblk_t lblk,
ext4_lblk_t len, ext4_fsblk_t pblk,
- unsigned int status);
+ unsigned int status, int flags);
extern void ext4_es_cache_extent(struct inode *inode, ext4_lblk_t lblk,
ext4_lblk_t len, ext4_fsblk_t pblk,
unsigned int status);
@@ -156,7 +162,7 @@ static inline unsigned int ext4_es_status(struct extent_status *es)
static inline unsigned int ext4_es_type(struct extent_status *es)
{
- return (es->es_pblk & ES_TYPE_MASK) >> ES_SHIFT;
+ return (es->es_pblk >> ES_SHIFT) & ES_TYPE_MASK;
}
static inline int ext4_es_is_written(struct extent_status *es)
@@ -184,11 +190,6 @@ static inline int ext4_es_is_mapped(struct extent_status *es)
return (ext4_es_is_written(es) || ext4_es_is_unwritten(es));
}
-static inline int ext4_es_is_delonly(struct extent_status *es)
-{
- return (ext4_es_is_delayed(es) && !ext4_es_is_unwritten(es));
-}
-
static inline void ext4_es_set_referenced(struct extent_status *es)
{
es->es_pblk |= ((ext4_fsblk_t)EXTENT_STATUS_REFERENCED) << ES_SHIFT;
@@ -224,17 +225,12 @@ static inline void ext4_es_store_pblock(struct extent_status *es,
es->es_pblk = block;
}
-static inline void ext4_es_store_status(struct extent_status *es,
- unsigned int status)
-{
- es->es_pblk = (((ext4_fsblk_t)status << ES_SHIFT) & ES_MASK) |
- (es->es_pblk & ~ES_MASK);
-}
-
static inline void ext4_es_store_pblock_status(struct extent_status *es,
ext4_fsblk_t pb,
unsigned int status)
{
+ WARN_ON_ONCE(!ES_TYPE_VALID(status & ES_TYPE_MASK));
+
es->es_pblk = (((ext4_fsblk_t)status << ES_SHIFT) & ES_MASK) |
(pb & ~ES_MASK);
}
@@ -252,8 +248,6 @@ extern bool ext4_is_pending(struct inode *inode, ext4_lblk_t lblk);
extern void ext4_es_insert_delayed_extent(struct inode *inode, ext4_lblk_t lblk,
ext4_lblk_t len, bool lclu_allocated,
bool end_allocated);
-extern unsigned int ext4_es_delayed_clu(struct inode *inode, ext4_lblk_t lblk,
- ext4_lblk_t len);
extern void ext4_clear_inode_es(struct inode *inode);
#endif /* _EXT4_EXTENTS_STATUS_H */
diff --git a/fs/ext4/fast_commit.c b/fs/ext4/fast_commit.c
index 3926a05eceee..eaa5f5b51f50 100644
--- a/fs/ext4/fast_commit.c
+++ b/fs/ext4/fast_commit.c
@@ -339,22 +339,29 @@ void ext4_fc_mark_ineligible(struct super_block *sb, int reason, handle_t *handl
{
struct ext4_sb_info *sbi = EXT4_SB(sb);
tid_t tid;
+ bool has_transaction = true;
+ bool is_ineligible;
if (ext4_fc_disabled(sb))
return;
- ext4_set_mount_flag(sb, EXT4_MF_FC_INELIGIBLE);
if (handle && !IS_ERR(handle))
tid = handle->h_transaction->t_tid;
else {
read_lock(&sbi->s_journal->j_state_lock);
- tid = sbi->s_journal->j_running_transaction ?
- sbi->s_journal->j_running_transaction->t_tid : 0;
+ if (sbi->s_journal->j_running_transaction)
+ tid = sbi->s_journal->j_running_transaction->t_tid;
+ else
+ has_transaction = false;
read_unlock(&sbi->s_journal->j_state_lock);
}
spin_lock(&sbi->s_fc_lock);
- if (tid_gt(tid, sbi->s_fc_ineligible_tid))
+ is_ineligible = ext4_test_mount_flag(sb, EXT4_MF_FC_INELIGIBLE);
+ if (has_transaction &&
+ (!is_ineligible ||
+ (is_ineligible && tid_gt(tid, sbi->s_fc_ineligible_tid))))
sbi->s_fc_ineligible_tid = tid;
+ ext4_set_mount_flag(sb, EXT4_MF_FC_INELIGIBLE);
spin_unlock(&sbi->s_fc_lock);
WARN_ON(reason >= EXT4_FC_REASON_MAX);
sbi->s_fc_stats.fc_ineligible_reason_count[reason]++;
@@ -1288,8 +1295,21 @@ static void ext4_fc_cleanup(journal_t *journal, int full, tid_t tid)
list_del_init(&iter->i_fc_list);
ext4_clear_inode_state(&iter->vfs_inode,
EXT4_STATE_FC_COMMITTING);
- if (tid_geq(tid, iter->i_sync_tid))
+ if (tid_geq(tid, iter->i_sync_tid)) {
ext4_fc_reset_inode(&iter->vfs_inode);
+ } else if (full) {
+ /*
+ * We are called after a full commit, inode has been
+ * modified while the commit was running. Re-enqueue
+ * the inode into STAGING, which will then be splice
+ * back into MAIN. This cannot happen during
+ * fastcommit because the journal is locked all the
+ * time in that case (and tid doesn't increase so
+ * tid check above isn't reliable).
+ */
+ list_add_tail(&EXT4_I(&iter->vfs_inode)->i_fc_list,
+ &sbi->s_fc_q[FC_Q_STAGING]);
+ }
/* Make sure EXT4_STATE_FC_COMMITTING bit is clear */
smp_mb();
#if (BITS_PER_LONG < 64)
@@ -1772,7 +1792,7 @@ static int ext4_fc_replay_add_range(struct super_block *sb,
if (ret == 0) {
/* Range is not mapped */
- path = ext4_find_extent(inode, cur, NULL, 0);
+ path = ext4_find_extent(inode, cur, path, 0);
if (IS_ERR(path))
goto out;
memset(&newex, 0, sizeof(newex));
@@ -1783,11 +1803,10 @@ static int ext4_fc_replay_add_range(struct super_block *sb,
if (ext4_ext_is_unwritten(ex))
ext4_ext_mark_unwritten(&newex);
down_write(&EXT4_I(inode)->i_data_sem);
- ret = ext4_ext_insert_extent(
- NULL, inode, &path, &newex, 0);
+ path = ext4_ext_insert_extent(NULL, inode,
+ path, &newex, 0);
up_write((&EXT4_I(inode)->i_data_sem));
- ext4_free_ext_path(path);
- if (ret)
+ if (IS_ERR(path))
goto out;
goto next;
}
@@ -1836,6 +1855,7 @@ next:
ext4_ext_replay_shrink_inode(inode, i_size_read(inode) >>
sb->s_blocksize_bits);
out:
+ ext4_free_ext_path(path);
iput(inode);
return 0;
}
@@ -1936,12 +1956,13 @@ static void ext4_fc_set_bitmaps_and_counters(struct super_block *sb)
break;
if (ret > 0) {
- path = ext4_find_extent(inode, map.m_lblk, NULL, 0);
+ path = ext4_find_extent(inode, map.m_lblk, path, 0);
if (!IS_ERR(path)) {
for (j = 0; j < path->p_depth; j++)
ext4_mb_mark_bb(inode->i_sb,
path[j].p_block, 1, true);
- ext4_free_ext_path(path);
+ } else {
+ path = NULL;
}
cur += ret;
ext4_mb_mark_bb(inode->i_sb, map.m_pblk,
@@ -1952,6 +1973,8 @@ static void ext4_fc_set_bitmaps_and_counters(struct super_block *sb)
}
iput(inode);
}
+
+ ext4_free_ext_path(path);
}
/*
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index c89e434db6b7..f14aed14b9cf 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -306,7 +306,7 @@ out:
}
static ssize_t ext4_handle_inode_extension(struct inode *inode, loff_t offset,
- ssize_t count)
+ ssize_t written, ssize_t count)
{
handle_t *handle;
@@ -315,7 +315,7 @@ static ssize_t ext4_handle_inode_extension(struct inode *inode, loff_t offset,
if (IS_ERR(handle))
return PTR_ERR(handle);
- if (ext4_update_inode_size(inode, offset + count)) {
+ if (ext4_update_inode_size(inode, offset + written)) {
int ret = ext4_mark_inode_dirty(handle, inode);
if (unlikely(ret)) {
ext4_journal_stop(handle);
@@ -323,21 +323,21 @@ static ssize_t ext4_handle_inode_extension(struct inode *inode, loff_t offset,
}
}
- if (inode->i_nlink)
+ if ((written == count) && inode->i_nlink)
ext4_orphan_del(handle, inode);
ext4_journal_stop(handle);
- return count;
+ return written;
}
/*
* Clean up the inode after DIO or DAX extending write has completed and the
* inode size has been updated using ext4_handle_inode_extension().
*/
-static void ext4_inode_extension_cleanup(struct inode *inode, ssize_t count)
+static void ext4_inode_extension_cleanup(struct inode *inode, bool need_trunc)
{
lockdep_assert_held_write(&inode->i_rwsem);
- if (count < 0) {
+ if (need_trunc) {
ext4_truncate_failed_write(inode);
/*
* If the truncate operation failed early, then the inode may
@@ -393,7 +393,7 @@ static int ext4_dio_write_end_io(struct kiocb *iocb, ssize_t size,
if (pos + size <= READ_ONCE(EXT4_I(inode)->i_disksize) &&
pos + size <= i_size_read(inode))
return size;
- return ext4_handle_inode_extension(inode, pos, size);
+ return ext4_handle_inode_extension(inode, pos, size, size);
}
static const struct iomap_dio_ops ext4_dio_write_ops = {
@@ -586,7 +586,7 @@ static ssize_t ext4_dio_write_iter(struct kiocb *iocb, struct iov_iter *from)
* writeback of delalloc blocks.
*/
WARN_ON_ONCE(ret == -EIOCBQUEUED);
- ext4_inode_extension_cleanup(inode, ret);
+ ext4_inode_extension_cleanup(inode, ret < 0);
}
out:
@@ -669,8 +669,8 @@ ext4_dax_write_iter(struct kiocb *iocb, struct iov_iter *from)
ret = dax_iomap_rw(iocb, from, &ext4_iomap_ops);
if (extend) {
- ret = ext4_handle_inode_extension(inode, offset, ret);
- ext4_inode_extension_cleanup(inode, ret);
+ ret = ext4_handle_inode_extension(inode, offset, ret, count);
+ ext4_inode_extension_cleanup(inode, ret < (ssize_t)count);
}
out:
inode_unlock(inode);
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index 9dfd768ed9f8..7f1a5f90dbbd 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -87,10 +87,10 @@ static int ext4_validate_inode_bitmap(struct super_block *sb,
if (EXT4_SB(sb)->s_mount_state & EXT4_FC_REPLAY)
return 0;
- grp = ext4_get_group_info(sb, block_group);
-
if (buffer_verified(bh))
return 0;
+
+ grp = ext4_get_group_info(sb, block_group);
if (!grp || EXT4_MB_GRP_IBITMAP_CORRUPT(grp))
return -EFSCORRUPTED;
@@ -98,8 +98,7 @@ static int ext4_validate_inode_bitmap(struct super_block *sb,
if (buffer_verified(bh))
goto verified;
blk = ext4_inode_bitmap(sb, desc);
- if (!ext4_inode_bitmap_csum_verify(sb, desc, bh,
- EXT4_INODES_PER_GROUP(sb) / 8) ||
+ if (!ext4_inode_bitmap_csum_verify(sb, desc, bh) ||
ext4_simulate_fail(sb, EXT4_SIM_IBITMAP_CRC)) {
ext4_unlock_group(sb, block_group);
ext4_error(sb, "Corrupt inode bitmap - block_group = %u, "
@@ -327,8 +326,7 @@ void ext4_free_inode(handle_t *handle, struct inode *inode)
if (percpu_counter_initialized(&sbi->s_dirs_counter))
percpu_counter_dec(&sbi->s_dirs_counter);
}
- ext4_inode_bitmap_csum_set(sb, gdp, bitmap_bh,
- EXT4_INODES_PER_GROUP(sb) / 8);
+ ext4_inode_bitmap_csum_set(sb, gdp, bitmap_bh);
ext4_group_desc_csum_set(sb, block_group, gdp);
ext4_unlock_group(sb, block_group);
@@ -514,6 +512,8 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent,
if (min_inodes < 1)
min_inodes = 1;
min_clusters = avefreec - EXT4_CLUSTERS_PER_GROUP(sb)*flex_size / 4;
+ if (min_clusters < 0)
+ min_clusters = 0;
/*
* Start looking in the flex group where we last allocated an
@@ -755,10 +755,10 @@ int ext4_mark_inode_used(struct super_block *sb, int ino)
struct ext4_group_desc *gdp;
ext4_group_t group;
int bit;
- int err = -EFSCORRUPTED;
+ int err;
if (ino < EXT4_FIRST_INO(sb) || ino > max_ino)
- goto out;
+ return -EFSCORRUPTED;
group = (ino - 1) / EXT4_INODES_PER_GROUP(sb);
bit = (ino - 1) % EXT4_INODES_PER_GROUP(sb);
@@ -772,7 +772,7 @@ int ext4_mark_inode_used(struct super_block *sb, int ino)
}
gdp = ext4_get_group_desc(sb, group, &group_desc_bh);
- if (!gdp || !group_desc_bh) {
+ if (!gdp) {
err = -EINVAL;
goto out;
}
@@ -851,8 +851,7 @@ int ext4_mark_inode_used(struct super_block *sb, int ino)
ext4_free_inodes_set(sb, gdp, ext4_free_inodes_count(sb, gdp) - 1);
if (ext4_has_group_desc_csum(sb)) {
- ext4_inode_bitmap_csum_set(sb, gdp, inode_bitmap_bh,
- EXT4_INODES_PER_GROUP(sb) / 8);
+ ext4_inode_bitmap_csum_set(sb, gdp, inode_bitmap_bh);
ext4_group_desc_csum_set(sb, group, gdp);
}
@@ -860,6 +859,7 @@ int ext4_mark_inode_used(struct super_block *sb, int ino)
err = ext4_handle_dirty_metadata(NULL, NULL, group_desc_bh);
sync_dirty_buffer(group_desc_bh);
out:
+ brelse(inode_bitmap_bh);
return err;
}
@@ -1053,14 +1053,14 @@ got_group:
brelse(inode_bitmap_bh);
inode_bitmap_bh = ext4_read_inode_bitmap(sb, group);
/* Skip groups with suspicious inode tables */
- if (((!(sbi->s_mount_state & EXT4_FC_REPLAY))
- && EXT4_MB_GRP_IBITMAP_CORRUPT(grp)) ||
- IS_ERR(inode_bitmap_bh)) {
+ if (IS_ERR(inode_bitmap_bh)) {
inode_bitmap_bh = NULL;
goto next_group;
}
+ if (!(sbi->s_mount_state & EXT4_FC_REPLAY) &&
+ EXT4_MB_GRP_IBITMAP_CORRUPT(grp))
+ goto next_group;
-repeat_in_this_group:
ret2 = find_inode_bit(sb, group, inode_bitmap_bh, &ino);
if (!ret2)
goto next_group;
@@ -1110,8 +1110,6 @@ repeat_in_this_group:
if (!ret2)
goto got; /* we grabbed the inode! */
- if (ino < EXT4_INODES_PER_GROUP(sb))
- goto repeat_in_this_group;
next_group:
if (++group == ngroups)
group = 0;
@@ -1224,8 +1222,7 @@ got:
}
}
if (ext4_has_group_desc_csum(sb)) {
- ext4_inode_bitmap_csum_set(sb, gdp, inode_bitmap_bh,
- EXT4_INODES_PER_GROUP(sb) / 8);
+ ext4_inode_bitmap_csum_set(sb, gdp, inode_bitmap_bh);
ext4_group_desc_csum_set(sb, group, gdp);
}
ext4_unlock_group(sb, group);
diff --git a/fs/ext4/indirect.c b/fs/ext4/indirect.c
index d8ca7f64f952..7404f0935c90 100644
--- a/fs/ext4/indirect.c
+++ b/fs/ext4/indirect.c
@@ -652,13 +652,6 @@ int ext4_ind_map_blocks(handle_t *handle, struct inode *inode,
ext4_update_inode_fsync_trans(handle, inode, 1);
count = ar.len;
- /*
- * Update reserved blocks/metadata blocks after successful block
- * allocation which had been deferred till now.
- */
- if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE)
- ext4_da_update_reserve_space(inode, count, 1);
-
got_it:
map->m_flags |= EXT4_MAP_MAPPED;
map->m_pblk = le32_to_cpu(chain[depth-1].key);
diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c
index edf4aa99a974..3536ca7e4fcc 100644
--- a/fs/ext4/inline.c
+++ b/fs/ext4/inline.c
@@ -601,10 +601,11 @@ retry:
goto out;
if (ext4_should_dioread_nolock(inode)) {
- ret = __block_write_begin(folio, from, to,
- ext4_get_block_unwritten);
+ ret = ext4_block_write_begin(handle, folio, from, to,
+ ext4_get_block_unwritten);
} else
- ret = __block_write_begin(folio, from, to, ext4_get_block);
+ ret = ext4_block_write_begin(handle, folio, from, to,
+ ext4_get_block);
if (!ret && ext4_should_journal_data(inode)) {
ret = ext4_walk_page_buffers(handle, inode,
@@ -856,8 +857,8 @@ static int ext4_da_convert_inline_data_to_extent(struct address_space *mapping,
goto out;
}
- ret = __block_write_begin(folio, 0, inline_size,
- ext4_da_get_block_prep);
+ ret = ext4_block_write_begin(NULL, folio, 0, inline_size,
+ ext4_da_get_block_prep);
if (ret) {
up_read(&EXT4_I(inode)->xattr_sem);
folio_unlock(folio);
@@ -1665,24 +1666,36 @@ struct buffer_head *ext4_find_inline_entry(struct inode *dir,
struct ext4_dir_entry_2 **res_dir,
int *has_inline_data)
{
+ struct ext4_xattr_ibody_find is = {
+ .s = { .not_found = -ENODATA, },
+ };
+ struct ext4_xattr_info i = {
+ .name_index = EXT4_XATTR_INDEX_SYSTEM,
+ .name = EXT4_XATTR_SYSTEM_DATA,
+ };
int ret;
- struct ext4_iloc iloc;
void *inline_start;
int inline_size;
- if (ext4_get_inode_loc(dir, &iloc))
- return NULL;
+ ret = ext4_get_inode_loc(dir, &is.iloc);
+ if (ret)
+ return ERR_PTR(ret);
down_read(&EXT4_I(dir)->xattr_sem);
+
+ ret = ext4_xattr_ibody_find(dir, &i, &is);
+ if (ret)
+ goto out;
+
if (!ext4_has_inline_data(dir)) {
*has_inline_data = 0;
goto out;
}
- inline_start = (void *)ext4_raw_inode(&iloc)->i_block +
+ inline_start = (void *)ext4_raw_inode(&is.iloc)->i_block +
EXT4_INLINE_DOTDOT_SIZE;
inline_size = EXT4_MIN_INLINE_DATA_SIZE - EXT4_INLINE_DOTDOT_SIZE;
- ret = ext4_search_dir(iloc.bh, inline_start, inline_size,
+ ret = ext4_search_dir(is.iloc.bh, inline_start, inline_size,
dir, fname, 0, res_dir);
if (ret == 1)
goto out_find;
@@ -1692,20 +1705,23 @@ struct buffer_head *ext4_find_inline_entry(struct inode *dir,
if (ext4_get_inline_size(dir) == EXT4_MIN_INLINE_DATA_SIZE)
goto out;
- inline_start = ext4_get_inline_xattr_pos(dir, &iloc);
+ inline_start = ext4_get_inline_xattr_pos(dir, &is.iloc);
inline_size = ext4_get_inline_size(dir) - EXT4_MIN_INLINE_DATA_SIZE;
- ret = ext4_search_dir(iloc.bh, inline_start, inline_size,
+ ret = ext4_search_dir(is.iloc.bh, inline_start, inline_size,
dir, fname, 0, res_dir);
if (ret == 1)
goto out_find;
out:
- brelse(iloc.bh);
- iloc.bh = NULL;
+ brelse(is.iloc.bh);
+ if (ret < 0)
+ is.iloc.bh = ERR_PTR(ret);
+ else
+ is.iloc.bh = NULL;
out_find:
up_read(&EXT4_I(dir)->xattr_sem);
- return iloc.bh;
+ return is.iloc.bh;
}
int ext4_delete_inline_entry(handle_t *handle,
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 03374dc215d1..54bdd4884fe6 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -49,6 +49,11 @@
#include <trace/events/ext4.h>
+static void ext4_journalled_zero_new_buffers(handle_t *handle,
+ struct inode *inode,
+ struct folio *folio,
+ unsigned from, unsigned to);
+
static __u32 ext4_inode_csum(struct inode *inode, struct ext4_inode *raw,
struct ext4_inode_info *ei)
{
@@ -478,7 +483,89 @@ static int ext4_map_query_blocks(handle_t *handle, struct inode *inode,
status = map->m_flags & EXT4_MAP_UNWRITTEN ?
EXTENT_STATUS_UNWRITTEN : EXTENT_STATUS_WRITTEN;
ext4_es_insert_extent(inode, map->m_lblk, map->m_len,
- map->m_pblk, status);
+ map->m_pblk, status, 0);
+ return retval;
+}
+
+static int ext4_map_create_blocks(handle_t *handle, struct inode *inode,
+ struct ext4_map_blocks *map, int flags)
+{
+ struct extent_status es;
+ unsigned int status;
+ int err, retval = 0;
+
+ /*
+ * We pass in the magic EXT4_GET_BLOCKS_DELALLOC_RESERVE
+ * indicates that the blocks and quotas has already been
+ * checked when the data was copied into the page cache.
+ */
+ if (map->m_flags & EXT4_MAP_DELAYED)
+ flags |= EXT4_GET_BLOCKS_DELALLOC_RESERVE;
+
+ /*
+ * Here we clear m_flags because after allocating an new extent,
+ * it will be set again.
+ */
+ map->m_flags &= ~EXT4_MAP_FLAGS;
+
+ /*
+ * We need to check for EXT4 here because migrate could have
+ * changed the inode type in between.
+ */
+ if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) {
+ retval = ext4_ext_map_blocks(handle, inode, map, flags);
+ } else {
+ retval = ext4_ind_map_blocks(handle, inode, map, flags);
+
+ /*
+ * We allocated new blocks which will result in i_data's
+ * format changing. Force the migrate to fail by clearing
+ * migrate flags.
+ */
+ if (retval > 0 && map->m_flags & EXT4_MAP_NEW)
+ ext4_clear_inode_state(inode, EXT4_STATE_EXT_MIGRATE);
+ }
+ if (retval <= 0)
+ return retval;
+
+ if (unlikely(retval != map->m_len)) {
+ ext4_warning(inode->i_sb,
+ "ES len assertion failed for inode %lu: "
+ "retval %d != map->m_len %d",
+ inode->i_ino, retval, map->m_len);
+ WARN_ON(1);
+ }
+
+ /*
+ * We have to zeroout blocks before inserting them into extent
+ * status tree. Otherwise someone could look them up there and
+ * use them before they are really zeroed. We also have to
+ * unmap metadata before zeroing as otherwise writeback can
+ * overwrite zeros with stale data from block device.
+ */
+ if (flags & EXT4_GET_BLOCKS_ZERO &&
+ map->m_flags & EXT4_MAP_MAPPED && map->m_flags & EXT4_MAP_NEW) {
+ err = ext4_issue_zeroout(inode, map->m_lblk, map->m_pblk,
+ map->m_len);
+ if (err)
+ return err;
+ }
+
+ /*
+ * If the extent has been zeroed out, we don't need to update
+ * extent status tree.
+ */
+ if (flags & EXT4_GET_BLOCKS_PRE_IO &&
+ ext4_es_lookup_extent(inode, map->m_lblk, NULL, &es)) {
+ if (ext4_es_is_written(&es))
+ return retval;
+ }
+
+ status = map->m_flags & EXT4_MAP_UNWRITTEN ?
+ EXTENT_STATUS_UNWRITTEN : EXTENT_STATUS_WRITTEN;
+ ext4_es_insert_extent(inode, map->m_lblk, map->m_len,
+ map->m_pblk, status, flags);
+
return retval;
}
@@ -576,32 +663,7 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode,
* file system block.
*/
down_read(&EXT4_I(inode)->i_data_sem);
- if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) {
- retval = ext4_ext_map_blocks(handle, inode, map, 0);
- } else {
- retval = ext4_ind_map_blocks(handle, inode, map, 0);
- }
- if (retval > 0) {
- unsigned int status;
-
- if (unlikely(retval != map->m_len)) {
- ext4_warning(inode->i_sb,
- "ES len assertion failed for inode "
- "%lu: retval %d != map->m_len %d",
- inode->i_ino, retval, map->m_len);
- WARN_ON(1);
- }
-
- status = map->m_flags & EXT4_MAP_UNWRITTEN ?
- EXTENT_STATUS_UNWRITTEN : EXTENT_STATUS_WRITTEN;
- if (!(flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) &&
- !(status & EXTENT_STATUS_WRITTEN) &&
- ext4_es_scan_range(inode, &ext4_es_is_delayed, map->m_lblk,
- map->m_lblk + map->m_len - 1))
- status |= EXTENT_STATUS_DELAYED;
- ext4_es_insert_extent(inode, map->m_lblk, map->m_len,
- map->m_pblk, status);
- }
+ retval = ext4_map_query_blocks(handle, inode, map);
up_read((&EXT4_I(inode)->i_data_sem));
found:
@@ -631,88 +693,13 @@ found:
return retval;
/*
- * Here we clear m_flags because after allocating an new extent,
- * it will be set again.
- */
- map->m_flags &= ~EXT4_MAP_FLAGS;
-
- /*
* New blocks allocate and/or writing to unwritten extent
* will possibly result in updating i_data, so we take
* the write lock of i_data_sem, and call get_block()
* with create == 1 flag.
*/
down_write(&EXT4_I(inode)->i_data_sem);
-
- /*
- * We need to check for EXT4 here because migrate
- * could have changed the inode type in between
- */
- if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) {
- retval = ext4_ext_map_blocks(handle, inode, map, flags);
- } else {
- retval = ext4_ind_map_blocks(handle, inode, map, flags);
-
- if (retval > 0 && map->m_flags & EXT4_MAP_NEW) {
- /*
- * We allocated new blocks which will result in
- * i_data's format changing. Force the migrate
- * to fail by clearing migrate flags
- */
- ext4_clear_inode_state(inode, EXT4_STATE_EXT_MIGRATE);
- }
- }
-
- if (retval > 0) {
- unsigned int status;
-
- if (unlikely(retval != map->m_len)) {
- ext4_warning(inode->i_sb,
- "ES len assertion failed for inode "
- "%lu: retval %d != map->m_len %d",
- inode->i_ino, retval, map->m_len);
- WARN_ON(1);
- }
-
- /*
- * We have to zeroout blocks before inserting them into extent
- * status tree. Otherwise someone could look them up there and
- * use them before they are really zeroed. We also have to
- * unmap metadata before zeroing as otherwise writeback can
- * overwrite zeros with stale data from block device.
- */
- if (flags & EXT4_GET_BLOCKS_ZERO &&
- map->m_flags & EXT4_MAP_MAPPED &&
- map->m_flags & EXT4_MAP_NEW) {
- ret = ext4_issue_zeroout(inode, map->m_lblk,
- map->m_pblk, map->m_len);
- if (ret) {
- retval = ret;
- goto out_sem;
- }
- }
-
- /*
- * If the extent has been zeroed out, we don't need to update
- * extent status tree.
- */
- if ((flags & EXT4_GET_BLOCKS_PRE_IO) &&
- ext4_es_lookup_extent(inode, map->m_lblk, NULL, &es)) {
- if (ext4_es_is_written(&es))
- goto out_sem;
- }
- status = map->m_flags & EXT4_MAP_UNWRITTEN ?
- EXTENT_STATUS_UNWRITTEN : EXTENT_STATUS_WRITTEN;
- if (!(flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) &&
- !(status & EXTENT_STATUS_WRITTEN) &&
- ext4_es_scan_range(inode, &ext4_es_is_delayed, map->m_lblk,
- map->m_lblk + map->m_len - 1))
- status |= EXTENT_STATUS_DELAYED;
- ext4_es_insert_extent(inode, map->m_lblk, map->m_len,
- map->m_pblk, status);
- }
-
-out_sem:
+ retval = ext4_map_create_blocks(handle, inode, map, flags);
up_write((&EXT4_I(inode)->i_data_sem));
if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) {
ret = check_block_validity(inode, map);
@@ -1018,32 +1005,16 @@ static int ext4_dirty_journalled_data(handle_t *handle, struct buffer_head *bh)
int do_journal_get_write_access(handle_t *handle, struct inode *inode,
struct buffer_head *bh)
{
- int dirty = buffer_dirty(bh);
- int ret;
-
if (!buffer_mapped(bh) || buffer_freed(bh))
return 0;
- /*
- * __block_write_begin() could have dirtied some buffers. Clean
- * the dirty bit as jbd2_journal_get_write_access() could complain
- * otherwise about fs integrity issues. Setting of the dirty bit
- * by __block_write_begin() isn't a real problem here as we clear
- * the bit before releasing a page lock and thus writeback cannot
- * ever write the buffer.
- */
- if (dirty)
- clear_buffer_dirty(bh);
BUFFER_TRACE(bh, "get write access");
- ret = ext4_journal_get_write_access(handle, inode->i_sb, bh,
+ return ext4_journal_get_write_access(handle, inode->i_sb, bh,
EXT4_JTR_NONE);
- if (!ret && dirty)
- ret = ext4_dirty_journalled_data(handle, bh);
- return ret;
}
-#ifdef CONFIG_FS_ENCRYPTION
-static int ext4_block_write_begin(struct folio *folio, loff_t pos, unsigned len,
- get_block_t *get_block)
+int ext4_block_write_begin(handle_t *handle, struct folio *folio,
+ loff_t pos, unsigned len,
+ get_block_t *get_block)
{
unsigned from = pos & (PAGE_SIZE - 1);
unsigned to = from + len;
@@ -1056,6 +1027,7 @@ static int ext4_block_write_begin(struct folio *folio, loff_t pos, unsigned len,
struct buffer_head *bh, *head, *wait[2];
int nr_wait = 0;
int i;
+ bool should_journal_data = ext4_should_journal_data(inode);
BUG_ON(!folio_test_locked(folio));
BUG_ON(from > PAGE_SIZE);
@@ -1085,10 +1057,22 @@ static int ext4_block_write_begin(struct folio *folio, loff_t pos, unsigned len,
if (err)
break;
if (buffer_new(bh)) {
+ /*
+ * We may be zeroing partial buffers or all new
+ * buffers in case of failure. Prepare JBD2 for
+ * that.
+ */
+ if (should_journal_data)
+ do_journal_get_write_access(handle,
+ inode, bh);
if (folio_test_uptodate(folio)) {
- clear_buffer_new(bh);
+ /*
+ * Unlike __block_write_begin() we leave
+ * dirtying of new uptodate buffers to
+ * ->write_end() time or
+ * folio_zero_new_buffers().
+ */
set_buffer_uptodate(bh);
- mark_buffer_dirty(bh);
continue;
}
if (block_end > to || block_start < from)
@@ -1118,7 +1102,11 @@ static int ext4_block_write_begin(struct folio *folio, loff_t pos, unsigned len,
err = -EIO;
}
if (unlikely(err)) {
- folio_zero_new_buffers(folio, from, to);
+ if (should_journal_data)
+ ext4_journalled_zero_new_buffers(handle, inode, folio,
+ from, to);
+ else
+ folio_zero_new_buffers(folio, from, to);
} else if (fscrypt_inode_uses_fs_layer_crypto(inode)) {
for (i = 0; i < nr_wait; i++) {
int err2;
@@ -1134,7 +1122,6 @@ static int ext4_block_write_begin(struct folio *folio, loff_t pos, unsigned len,
return err;
}
-#endif
/*
* To preserve ordering, it is essential that the hole instantiation and
@@ -1216,19 +1203,12 @@ retry_journal:
/* In case writeback began while the folio was unlocked */
folio_wait_stable(folio);
-#ifdef CONFIG_FS_ENCRYPTION
if (ext4_should_dioread_nolock(inode))
- ret = ext4_block_write_begin(folio, pos, len,
+ ret = ext4_block_write_begin(handle, folio, pos, len,
ext4_get_block_unwritten);
else
- ret = ext4_block_write_begin(folio, pos, len, ext4_get_block);
-#else
- if (ext4_should_dioread_nolock(inode))
- ret = __block_write_begin(folio, pos, len,
- ext4_get_block_unwritten);
- else
- ret = __block_write_begin(folio, pos, len, ext4_get_block);
-#endif
+ ret = ext4_block_write_begin(handle, folio, pos, len,
+ ext4_get_block);
if (!ret && ext4_should_journal_data(inode)) {
ret = ext4_walk_page_buffers(handle, inode,
folio_buffers(folio), from, to,
@@ -1241,7 +1221,7 @@ retry_journal:
folio_unlock(folio);
/*
- * __block_write_begin may have instantiated a few blocks
+ * ext4_block_write_begin may have instantiated a few blocks
* outside i_size. Trim these off again. Don't need
* i_size_read because we hold i_rwsem.
*
@@ -1388,9 +1368,9 @@ static void ext4_journalled_zero_new_buffers(handle_t *handle,
size = min(to, block_end) - start;
folio_zero_range(folio, start, size);
- write_end_fn(handle, inode, bh);
}
clear_buffer_new(bh);
+ write_end_fn(handle, inode, bh);
}
}
block_start = block_end;
@@ -1661,7 +1641,7 @@ static int ext4_clu_alloc_state(struct inode *inode, ext4_lblk_t lblk)
int ret;
/* Has delalloc reservation? */
- if (ext4_es_scan_clu(inode, &ext4_es_is_delonly, lblk))
+ if (ext4_es_scan_clu(inode, &ext4_es_is_delayed, lblk))
return 1;
/* Already been allocated? */
@@ -1782,7 +1762,7 @@ found:
* Delayed extent could be allocated by fallocate.
* So we need to check it.
*/
- if (ext4_es_is_delonly(&es)) {
+ if (ext4_es_is_delayed(&es)) {
map->m_flags |= EXT4_MAP_DELAYED;
return 0;
}
@@ -2217,11 +2197,6 @@ static int mpage_map_one_extent(handle_t *handle, struct mpage_da_data *mpd)
* writeback and there is nothing we can do about it so it might result
* in data loss. So use reserved blocks to allocate metadata if
* possible.
- *
- * We pass in the magic EXT4_GET_BLOCKS_DELALLOC_RESERVE if
- * the blocks in question are delalloc blocks. This indicates
- * that the blocks and quotas has already been checked when
- * the data was copied into the page cache.
*/
get_blocks_flags = EXT4_GET_BLOCKS_CREATE |
EXT4_GET_BLOCKS_METADATA_NOFAIL |
@@ -2229,8 +2204,6 @@ static int mpage_map_one_extent(handle_t *handle, struct mpage_da_data *mpd)
dioread_nolock = ext4_should_dioread_nolock(inode);
if (dioread_nolock)
get_blocks_flags |= EXT4_GET_BLOCKS_IO_CREATE_EXT;
- if (map->m_flags & BIT(BH_Delay))
- get_blocks_flags |= EXT4_GET_BLOCKS_DELALLOC_RESERVE;
err = ext4_map_blocks(handle, inode, map, get_blocks_flags);
if (err < 0)
@@ -2959,11 +2932,8 @@ retry:
if (IS_ERR(folio))
return PTR_ERR(folio);
-#ifdef CONFIG_FS_ENCRYPTION
- ret = ext4_block_write_begin(folio, pos, len, ext4_da_get_block_prep);
-#else
- ret = __block_write_begin(folio, pos, len, ext4_da_get_block_prep);
-#endif
+ ret = ext4_block_write_begin(NULL, folio, pos, len,
+ ext4_da_get_block_prep);
if (ret < 0) {
folio_unlock(folio);
folio_put(folio);
@@ -4067,7 +4037,7 @@ int ext4_punch_hole(struct file *file, loff_t offset, loff_t length)
stop_block);
ext4_es_insert_extent(inode, first_block, hole_len, ~0,
- EXTENT_STATUS_HOLE);
+ EXTENT_STATUS_HOLE, 0);
up_write(&EXT4_I(inode)->i_data_sem);
}
ext4_fc_track_range(handle, inode, first_block, stop_block);
@@ -5276,8 +5246,9 @@ static void ext4_wait_for_tail_page_commit(struct inode *inode)
{
unsigned offset;
journal_t *journal = EXT4_SB(inode->i_sb)->s_journal;
- tid_t commit_tid = 0;
+ tid_t commit_tid;
int ret;
+ bool has_transaction;
offset = inode->i_size & (PAGE_SIZE - 1);
/*
@@ -5302,12 +5273,14 @@ static void ext4_wait_for_tail_page_commit(struct inode *inode)
folio_put(folio);
if (ret != -EBUSY)
return;
- commit_tid = 0;
+ has_transaction = false;
read_lock(&journal->j_state_lock);
- if (journal->j_committing_transaction)
+ if (journal->j_committing_transaction) {
commit_tid = journal->j_committing_transaction->t_tid;
+ has_transaction = true;
+ }
read_unlock(&journal->j_state_lock);
- if (commit_tid)
+ if (has_transaction)
jbd2_log_wait_commit(journal, commit_tid);
}
}
@@ -6216,7 +6189,8 @@ retry_alloc:
if (folio_pos(folio) + len > size)
len = size - folio_pos(folio);
- err = __block_write_begin(folio, 0, len, ext4_get_block);
+ err = ext4_block_write_begin(handle, folio, 0, len,
+ ext4_get_block);
if (!err) {
ret = VM_FAULT_SIGBUS;
if (ext4_journal_folio_buffers(handle, folio, len))
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 9dda9cd68ab2..d73e38323879 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -2356,7 +2356,7 @@ int ext4_mb_find_by_goal(struct ext4_allocation_context *ac,
ex.fe_logical = 0xDEADFA11; /* debug value */
if (max >= ac->ac_g_ex.fe_len &&
- ac->ac_g_ex.fe_len == EXT4_B2C(sbi, sbi->s_stripe)) {
+ ac->ac_g_ex.fe_len == EXT4_NUM_B2C(sbi, sbi->s_stripe)) {
ext4_fsblk_t start;
start = ext4_grp_offs_to_block(ac->ac_sb, &ex);
@@ -2553,7 +2553,7 @@ void ext4_mb_scan_aligned(struct ext4_allocation_context *ac,
do_div(a, sbi->s_stripe);
i = (a * sbi->s_stripe) - first_group_block;
- stripe = EXT4_B2C(sbi, sbi->s_stripe);
+ stripe = EXT4_NUM_B2C(sbi, sbi->s_stripe);
i = EXT4_B2C(sbi, i);
while (i < EXT4_CLUSTERS_PER_GROUP(sb)) {
if (!mb_test_bit(i, bitmap)) {
@@ -2928,9 +2928,11 @@ repeat:
if (cr == CR_POWER2_ALIGNED)
ext4_mb_simple_scan_group(ac, &e4b);
else {
- bool is_stripe_aligned = sbi->s_stripe &&
+ bool is_stripe_aligned =
+ (sbi->s_stripe >=
+ sbi->s_cluster_ratio) &&
!(ac->ac_g_ex.fe_len %
- EXT4_B2C(sbi, sbi->s_stripe));
+ EXT4_NUM_B2C(sbi, sbi->s_stripe));
if ((cr == CR_GOAL_LEN_FAST ||
cr == CR_BEST_AVAIL_LEN) &&
@@ -3075,8 +3077,7 @@ static int ext4_mb_seq_groups_show(struct seq_file *seq, void *v)
seq_puts(seq, " ]");
if (EXT4_MB_GRP_BBITMAP_CORRUPT(&sg.info))
seq_puts(seq, " Block bitmap corrupted!");
- seq_puts(seq, "\n");
-
+ seq_putc(seq, '\n');
return 0;
}
@@ -3707,7 +3708,7 @@ int ext4_mb_init(struct super_block *sb)
*/
if (sbi->s_stripe > 1) {
sbi->s_mb_group_prealloc = roundup(
- sbi->s_mb_group_prealloc, EXT4_B2C(sbi, sbi->s_stripe));
+ sbi->s_mb_group_prealloc, EXT4_NUM_B2C(sbi, sbi->s_stripe));
}
sbi->s_locality_groups = alloc_percpu(struct ext4_locality_group);
@@ -3887,11 +3888,8 @@ static void ext4_free_data_in_buddy(struct super_block *sb,
/*
* Clear the trimmed flag for the group so that the next
* ext4_trim_fs can trim it.
- * If the volume is mounted with -o discard, online discard
- * is supported and the free blocks will be trimmed online.
*/
- if (!test_opt(sb, DISCARD))
- EXT4_MB_GRP_CLEAR_TRIMMED(db);
+ EXT4_MB_GRP_CLEAR_TRIMMED(db);
if (!db->bb_free_root.rb_node) {
/* No more items in the per group rb tree
@@ -6515,8 +6513,9 @@ do_more:
" group:%u block:%d count:%lu failed"
" with %d", block_group, bit, count,
err);
- } else
- EXT4_MB_GRP_CLEAR_TRIMMED(e4b.bd_info);
+ }
+
+ EXT4_MB_GRP_CLEAR_TRIMMED(e4b.bd_info);
ext4_lock_group(sb, block_group);
mb_free_blocks(inode, &e4b, bit, count_clusters);
diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c
index d98ac2af8199..1b0dfd963d3f 100644
--- a/fs/ext4/migrate.c
+++ b/fs/ext4/migrate.c
@@ -37,7 +37,6 @@ static int finish_range(handle_t *handle, struct inode *inode,
path = ext4_find_extent(inode, lb->first_block, NULL, 0);
if (IS_ERR(path)) {
retval = PTR_ERR(path);
- path = NULL;
goto err_out;
}
@@ -53,7 +52,9 @@ static int finish_range(handle_t *handle, struct inode *inode,
retval = ext4_datasem_ensure_credits(handle, inode, needed, needed, 0);
if (retval < 0)
goto err_out;
- retval = ext4_ext_insert_extent(handle, inode, &path, &newext, 0);
+ path = ext4_ext_insert_extent(handle, inode, path, &newext, 0);
+ if (IS_ERR(path))
+ retval = PTR_ERR(path);
err_out:
up_write((&EXT4_I(inode)->i_data_sem));
ext4_free_ext_path(path);
@@ -663,8 +664,8 @@ int ext4_ind_migrate(struct inode *inode)
if (unlikely(ret2 && !ret))
ret = ret2;
errout:
- ext4_journal_stop(handle);
up_write(&EXT4_I(inode)->i_data_sem);
+ ext4_journal_stop(handle);
out_unlock:
ext4_writepages_up_write(inode->i_sb, alloc_ctx);
return ret;
diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c
index 204f53b23622..b64661ea6e0e 100644
--- a/fs/ext4/move_extent.c
+++ b/fs/ext4/move_extent.c
@@ -17,27 +17,23 @@
* get_ext_path() - Find an extent path for designated logical block number.
* @inode: inode to be searched
* @lblock: logical block number to find an extent path
- * @ppath: pointer to an extent path pointer (for output)
+ * @path: pointer to an extent path
*
- * ext4_find_extent wrapper. Return 0 on success, or a negative error value
- * on failure.
+ * ext4_find_extent wrapper. Return an extent path pointer on success,
+ * or an error pointer on failure.
*/
-static inline int
+static inline struct ext4_ext_path *
get_ext_path(struct inode *inode, ext4_lblk_t lblock,
- struct ext4_ext_path **ppath)
+ struct ext4_ext_path *path)
{
- struct ext4_ext_path *path;
-
- path = ext4_find_extent(inode, lblock, ppath, EXT4_EX_NOCACHE);
+ path = ext4_find_extent(inode, lblock, path, EXT4_EX_NOCACHE);
if (IS_ERR(path))
- return PTR_ERR(path);
+ return path;
if (path[ext_depth(inode)].p_ext == NULL) {
ext4_free_ext_path(path);
- *ppath = NULL;
- return -ENODATA;
+ return ERR_PTR(-ENODATA);
}
- *ppath = path;
- return 0;
+ return path;
}
/**
@@ -95,9 +91,11 @@ mext_check_coverage(struct inode *inode, ext4_lblk_t from, ext4_lblk_t count,
int ret = 0;
ext4_lblk_t last = from + count;
while (from < last) {
- *err = get_ext_path(inode, from, &path);
- if (*err)
- goto out;
+ path = get_ext_path(inode, from, path);
+ if (IS_ERR(path)) {
+ *err = PTR_ERR(path);
+ return ret;
+ }
ext = path[ext_depth(inode)].p_ext;
if (unwritten != ext4_ext_is_unwritten(ext))
goto out;
@@ -166,15 +164,16 @@ mext_folio_double_lock(struct inode *inode1, struct inode *inode2,
return 0;
}
-/* Force page buffers uptodate w/o dropping page's lock */
-static int
-mext_page_mkuptodate(struct folio *folio, unsigned from, unsigned to)
+/* Force folio buffers uptodate w/o dropping folio's lock */
+static int mext_page_mkuptodate(struct folio *folio, size_t from, size_t to)
{
struct inode *inode = folio->mapping->host;
sector_t block;
- struct buffer_head *bh, *head, *arr[MAX_BUF_PER_PAGE];
+ struct buffer_head *bh, *head;
unsigned int blocksize, block_start, block_end;
- int i, err, nr = 0, partial = 0;
+ int nr = 0;
+ bool partial = false;
+
BUG_ON(!folio_test_locked(folio));
BUG_ON(folio_test_writeback(folio));
@@ -186,19 +185,21 @@ mext_page_mkuptodate(struct folio *folio, unsigned from, unsigned to)
if (!head)
head = create_empty_buffers(folio, blocksize, 0);
- block = (sector_t)folio->index << (PAGE_SHIFT - inode->i_blkbits);
- for (bh = head, block_start = 0; bh != head || !block_start;
- block++, block_start = block_end, bh = bh->b_this_page) {
+ block = folio_pos(folio) >> inode->i_blkbits;
+ block_end = 0;
+ bh = head;
+ do {
+ block_start = block_end;
block_end = block_start + blocksize;
if (block_end <= from || block_start >= to) {
if (!buffer_uptodate(bh))
- partial = 1;
+ partial = true;
continue;
}
if (buffer_uptodate(bh))
continue;
if (!buffer_mapped(bh)) {
- err = ext4_get_block(inode, block, bh, 0);
+ int err = ext4_get_block(inode, block, bh, 0);
if (err)
return err;
if (!buffer_mapped(bh)) {
@@ -207,21 +208,30 @@ mext_page_mkuptodate(struct folio *folio, unsigned from, unsigned to)
continue;
}
}
- BUG_ON(nr >= MAX_BUF_PER_PAGE);
- arr[nr++] = bh;
- }
+ lock_buffer(bh);
+ if (buffer_uptodate(bh)) {
+ unlock_buffer(bh);
+ continue;
+ }
+ ext4_read_bh_nowait(bh, 0, NULL);
+ nr++;
+ } while (block++, (bh = bh->b_this_page) != head);
+
/* No io required */
if (!nr)
goto out;
- for (i = 0; i < nr; i++) {
- bh = arr[i];
- if (!bh_uptodate_or_lock(bh)) {
- err = ext4_read_bh(bh, 0, NULL);
- if (err)
- return err;
- }
- }
+ bh = head;
+ do {
+ if (bh_offset(bh) + blocksize <= from)
+ continue;
+ if (bh_offset(bh) > to)
+ break;
+ wait_on_buffer(bh);
+ if (buffer_uptodate(bh))
+ continue;
+ return -EIO;
+ } while ((bh = bh->b_this_page) != head);
out:
if (!partial)
folio_mark_uptodate(folio);
@@ -624,9 +634,11 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp, __u64 orig_blk,
int offset_in_page;
int unwritten, cur_len;
- ret = get_ext_path(orig_inode, o_start, &path);
- if (ret)
+ path = get_ext_path(orig_inode, o_start, path);
+ if (IS_ERR(path)) {
+ ret = PTR_ERR(path);
goto out;
+ }
ex = path[path->p_depth].p_ext;
cur_blk = le32_to_cpu(ex->ee_block);
cur_len = ext4_ext_get_actual_len(ex);
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 6a95713f9193..790db7eac6c2 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -1482,7 +1482,7 @@ static bool ext4_match(struct inode *parent,
}
/*
- * Returns 0 if not found, -1 on failure, and 1 on success
+ * Returns 0 if not found, -EFSCORRUPTED on failure, and 1 on success
*/
int ext4_search_dir(struct buffer_head *bh, char *search_buf, int buf_size,
struct inode *dir, struct ext4_filename *fname,
@@ -1503,7 +1503,7 @@ int ext4_search_dir(struct buffer_head *bh, char *search_buf, int buf_size,
* a full check */
if (ext4_check_dir_entry(dir, NULL, de, bh, search_buf,
buf_size, offset))
- return -1;
+ return -EFSCORRUPTED;
*res_dir = de;
return 1;
}
@@ -1511,7 +1511,7 @@ int ext4_search_dir(struct buffer_head *bh, char *search_buf, int buf_size,
de_len = ext4_rec_len_from_disk(de->rec_len,
dir->i_sb->s_blocksize);
if (de_len <= 0)
- return -1;
+ return -EFSCORRUPTED;
offset += de_len;
de = (struct ext4_dir_entry_2 *) ((char *) de + de_len);
}
@@ -1574,7 +1574,7 @@ static struct buffer_head *__ext4_find_entry(struct inode *dir,
&has_inline_data);
if (inlined)
*inlined = has_inline_data;
- if (has_inline_data)
+ if (has_inline_data || IS_ERR(ret))
goto cleanup_and_exit;
}
@@ -1663,8 +1663,10 @@ restart:
goto cleanup_and_exit;
} else {
brelse(bh);
- if (i < 0)
+ if (i < 0) {
+ ret = ERR_PTR(i);
goto cleanup_and_exit;
+ }
}
next:
if (++block >= nblocks)
@@ -1758,7 +1760,7 @@ static struct buffer_head * ext4_dx_find_entry(struct inode *dir,
if (retval == 1)
goto success;
brelse(bh);
- if (retval == -1) {
+ if (retval < 0) {
bh = ERR_PTR(ERR_BAD_DX_DIR);
goto errout;
}
@@ -1999,7 +2001,7 @@ static struct ext4_dir_entry_2 *do_split(handle_t *handle, struct inode *dir,
split = count/2;
hash2 = map[split].hash;
- continued = hash2 == map[split - 1].hash;
+ continued = split > 0 ? hash2 == map[split - 1].hash : 0;
dxtrace(printk(KERN_INFO "Split block %lu at %x, %i/%i\n",
(unsigned long)dx_get_block(frame->at),
hash2, split, count-split));
diff --git a/fs/ext4/readpage.c b/fs/ext4/readpage.c
index 8494492582ab..5d3a9dc9a32d 100644
--- a/fs/ext4/readpage.c
+++ b/fs/ext4/readpage.c
@@ -221,7 +221,7 @@ int ext4_mpage_readpages(struct inode *inode,
sector_t block_in_file;
sector_t last_block;
sector_t last_block_in_file;
- sector_t blocks[MAX_BUF_PER_PAGE];
+ sector_t first_block;
unsigned page_block;
struct block_device *bdev = inode->i_sb->s_bdev;
int length;
@@ -263,6 +263,7 @@ int ext4_mpage_readpages(struct inode *inode,
unsigned map_offset = block_in_file - map.m_lblk;
unsigned last = map.m_len - map_offset;
+ first_block = map.m_pblk + map_offset;
for (relative_block = 0; ; relative_block++) {
if (relative_block == last) {
/* needed? */
@@ -271,8 +272,6 @@ int ext4_mpage_readpages(struct inode *inode,
}
if (page_block == blocks_per_page)
break;
- blocks[page_block] = map.m_pblk + map_offset +
- relative_block;
page_block++;
block_in_file++;
}
@@ -307,7 +306,9 @@ int ext4_mpage_readpages(struct inode *inode,
goto confused; /* hole -> non-hole */
/* Contiguous blocks? */
- if (page_block && blocks[page_block-1] != map.m_pblk-1)
+ if (!page_block)
+ first_block = map.m_pblk;
+ else if (first_block + page_block != map.m_pblk)
goto confused;
for (relative_block = 0; ; relative_block++) {
if (relative_block == map.m_len) {
@@ -316,7 +317,6 @@ int ext4_mpage_readpages(struct inode *inode,
break;
} else if (page_block == blocks_per_page)
break;
- blocks[page_block] = map.m_pblk+relative_block;
page_block++;
block_in_file++;
}
@@ -339,7 +339,7 @@ int ext4_mpage_readpages(struct inode *inode,
* This folio will go to BIO. Do we need to send this
* BIO off first?
*/
- if (bio && (last_block_in_bio != blocks[0] - 1 ||
+ if (bio && (last_block_in_bio != first_block - 1 ||
!fscrypt_mergeable_bio(bio, inode, next_block))) {
submit_and_realloc:
submit_bio(bio);
@@ -355,7 +355,7 @@ int ext4_mpage_readpages(struct inode *inode,
fscrypt_set_bio_crypt_ctx(bio, inode, next_block,
GFP_KERNEL);
ext4_set_bio_post_read_ctx(bio, inode, folio->index);
- bio->bi_iter.bi_sector = blocks[0] << (blkbits - 9);
+ bio->bi_iter.bi_sector = first_block << (blkbits - 9);
bio->bi_end_io = mpage_end_io;
if (rac)
bio->bi_opf |= REQ_RAHEAD;
@@ -371,7 +371,7 @@ int ext4_mpage_readpages(struct inode *inode,
submit_bio(bio);
bio = NULL;
} else
- last_block_in_bio = blocks[blocks_per_page - 1];
+ last_block_in_bio = first_block + blocks_per_page - 1;
continue;
confused:
if (bio) {
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c
index 0ba9837d65ca..e04eb08b9060 100644
--- a/fs/ext4/resize.c
+++ b/fs/ext4/resize.c
@@ -1319,8 +1319,7 @@ static int ext4_set_bitmap_checksums(struct super_block *sb,
bh = ext4_get_bitmap(sb, group_data->inode_bitmap);
if (!bh)
return -EIO;
- ext4_inode_bitmap_csum_set(sb, gdp, bh,
- EXT4_INODES_PER_GROUP(sb) / 8);
+ ext4_inode_bitmap_csum_set(sb, gdp, bh);
brelse(bh);
bh = ext4_get_bitmap(sb, group_data->block_bitmap);
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index e72145c4ae5a..16a4ce704460 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -735,11 +735,12 @@ static void ext4_handle_error(struct super_block *sb, bool force_ro, int error,
ext4_msg(sb, KERN_CRIT, "Remounting filesystem read-only");
/*
- * Make sure updated value of ->s_mount_flags will be visible before
- * ->s_flags update
+ * EXT4_FLAGS_SHUTDOWN was set which stops all filesystem
+ * modifications. We don't set SB_RDONLY because that requires
+ * sb->s_umount semaphore and setting it without proper remount
+ * procedure is confusing code such as freeze_super() leading to
+ * deadlocks and other problems.
*/
- smp_wmb();
- sb->s_flags |= SB_RDONLY;
}
static void update_super_work(struct work_struct *work)
@@ -3045,7 +3046,7 @@ int ext4_seq_options_show(struct seq_file *seq, void *offset)
seq_puts(seq, sb_rdonly(sb) ? "ro" : "rw");
rc = _ext4_show_options(seq, sb, 1);
- seq_puts(seq, "\n");
+ seq_putc(seq, '\n');
return rc;
}
@@ -5087,16 +5088,27 @@ out:
return ret;
}
-static void ext4_hash_info_init(struct super_block *sb)
+static int ext4_hash_info_init(struct super_block *sb)
{
struct ext4_sb_info *sbi = EXT4_SB(sb);
struct ext4_super_block *es = sbi->s_es;
unsigned int i;
+ sbi->s_def_hash_version = es->s_def_hash_version;
+
+ if (sbi->s_def_hash_version > DX_HASH_LAST) {
+ ext4_msg(sb, KERN_ERR,
+ "Invalid default hash set in the superblock");
+ return -EINVAL;
+ } else if (sbi->s_def_hash_version == DX_HASH_SIPHASH) {
+ ext4_msg(sb, KERN_ERR,
+ "SIPHASH is not a valid default hash value");
+ return -EINVAL;
+ }
+
for (i = 0; i < 4; i++)
sbi->s_hash_seed[i] = le32_to_cpu(es->s_hash_seed[i]);
- sbi->s_def_hash_version = es->s_def_hash_version;
if (ext4_has_feature_dir_index(sb)) {
i = le32_to_cpu(es->s_flags);
if (i & EXT2_FLAGS_UNSIGNED_HASH)
@@ -5114,6 +5126,7 @@ static void ext4_hash_info_init(struct super_block *sb)
#endif
}
}
+ return 0;
}
static int ext4_block_group_meta_init(struct super_block *sb, int silent)
@@ -5165,6 +5178,18 @@ static int ext4_block_group_meta_init(struct super_block *sb, int silent)
return 0;
}
+/*
+ * It's hard to get stripe aligned blocks if stripe is not aligned with
+ * cluster, just disable stripe and alert user to simplify code and avoid
+ * stripe aligned allocation which will rarely succeed.
+ */
+static bool ext4_is_stripe_incompatible(struct super_block *sb, unsigned long stripe)
+{
+ struct ext4_sb_info *sbi = EXT4_SB(sb);
+ return (stripe > 0 && sbi->s_cluster_ratio > 1 &&
+ stripe % sbi->s_cluster_ratio != 0);
+}
+
static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb)
{
struct ext4_super_block *es = NULL;
@@ -5249,7 +5274,9 @@ static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb)
if (err)
goto failed_mount;
- ext4_hash_info_init(sb);
+ err = ext4_hash_info_init(sb);
+ if (err)
+ goto failed_mount;
err = ext4_handle_clustersize(sb);
if (err)
@@ -5272,13 +5299,7 @@ static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb)
goto failed_mount3;
sbi->s_stripe = ext4_get_stripe_size(sbi);
- /*
- * It's hard to get stripe aligned blocks if stripe is not aligned with
- * cluster, just disable stripe and alert user to simpfy code and avoid
- * stripe aligned allocation which will rarely successes.
- */
- if (sbi->s_stripe > 0 && sbi->s_cluster_ratio > 1 &&
- sbi->s_stripe % sbi->s_cluster_ratio != 0) {
+ if (ext4_is_stripe_incompatible(sb, sbi->s_stripe)) {
ext4_msg(sb, KERN_WARNING,
"stripe (%lu) is not aligned with cluster size (%u), "
"stripe is disabled",
@@ -5313,6 +5334,8 @@ static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb)
INIT_LIST_HEAD(&sbi->s_orphan); /* unlinked but open files */
mutex_init(&sbi->s_orphan_lock);
+ spin_lock_init(&sbi->s_bdev_wb_lock);
+
ext4_fast_commit_init(sb);
sb->s_root = NULL;
@@ -5534,7 +5557,6 @@ static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb)
* Save the original bdev mapping's wb_err value which could be
* used to detect the metadata async write error.
*/
- spin_lock_init(&sbi->s_bdev_wb_lock);
errseq_check_and_advance(&sb->s_bdev->bd_mapping->wb_err,
&sbi->s_bdev_wb_err);
EXT4_SB(sb)->s_mount_state |= EXT4_ORPHAN_FS;
@@ -5614,8 +5636,8 @@ failed_mount3a:
failed_mount3:
/* flush s_sb_upd_work before sbi destroy */
flush_work(&sbi->s_sb_upd_work);
- del_timer_sync(&sbi->s_err_report);
ext4_stop_mmpd(sbi);
+ del_timer_sync(&sbi->s_err_report);
ext4_group_desc_free(sbi);
failed_mount:
if (sbi->s_chksum_driver)
@@ -6441,6 +6463,15 @@ static int __ext4_remount(struct fs_context *fc, struct super_block *sb)
}
+ if ((ctx->spec & EXT4_SPEC_s_stripe) &&
+ ext4_is_stripe_incompatible(sb, ctx->s_stripe)) {
+ ext4_msg(sb, KERN_WARNING,
+ "stripe (%lu) is not aligned with cluster size (%u), "
+ "stripe is disabled",
+ ctx->s_stripe, sbi->s_cluster_ratio);
+ ctx->s_stripe = 0;
+ }
+
/*
* Changing the DIOREAD_NOLOCK or DELALLOC mount options may cause
* two calls to ext4_should_dioread_nolock() to return inconsistent
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index 46ce2f21fef9..e0e1956dcdd3 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -458,7 +458,7 @@ static int ext4_xattr_inode_iget(struct inode *parent, unsigned long ea_ino,
ext4_set_inode_state(inode, EXT4_STATE_LUSTRE_EA_INODE);
ext4_xattr_inode_set_ref(inode, 1);
} else {
- inode_lock(inode);
+ inode_lock_nested(inode, I_MUTEX_XATTR);
inode->i_flags |= S_NOQUOTA;
inode_unlock(inode);
}
@@ -1039,7 +1039,7 @@ static int ext4_xattr_inode_update_ref(handle_t *handle, struct inode *ea_inode,
s64 ref_count;
int ret;
- inode_lock(ea_inode);
+ inode_lock_nested(ea_inode, I_MUTEX_XATTR);
ret = ext4_reserve_inode_write(handle, ea_inode, &iloc);
if (ret)
@@ -2879,33 +2879,31 @@ ext4_expand_inode_array(struct ext4_xattr_inode_array **ea_inode_array,
if (*ea_inode_array == NULL) {
/*
* Start with 15 inodes, so it fits into a power-of-two size.
- * If *ea_inode_array is NULL, this is essentially offsetof()
*/
- (*ea_inode_array) =
- kmalloc(offsetof(struct ext4_xattr_inode_array,
- inodes[EIA_MASK]),
- GFP_NOFS);
+ (*ea_inode_array) = kmalloc(
+ struct_size(*ea_inode_array, inodes, EIA_MASK),
+ GFP_NOFS);
if (*ea_inode_array == NULL)
return -ENOMEM;
(*ea_inode_array)->count = 0;
} else if (((*ea_inode_array)->count & EIA_MASK) == EIA_MASK) {
/* expand the array once all 15 + n * 16 slots are full */
struct ext4_xattr_inode_array *new_array = NULL;
- int count = (*ea_inode_array)->count;
- /* if new_array is NULL, this is essentially offsetof() */
new_array = kmalloc(
- offsetof(struct ext4_xattr_inode_array,
- inodes[count + EIA_INCR]),
- GFP_NOFS);
+ struct_size(*ea_inode_array, inodes,
+ (*ea_inode_array)->count + EIA_INCR),
+ GFP_NOFS);
if (new_array == NULL)
return -ENOMEM;
memcpy(new_array, *ea_inode_array,
- offsetof(struct ext4_xattr_inode_array, inodes[count]));
+ struct_size(*ea_inode_array, inodes,
+ (*ea_inode_array)->count));
kfree(*ea_inode_array);
*ea_inode_array = new_array;
}
- (*ea_inode_array)->inodes[(*ea_inode_array)->count++] = inode;
+ (*ea_inode_array)->count++;
+ (*ea_inode_array)->inodes[(*ea_inode_array)->count - 1] = inode;
return 0;
}
@@ -3036,8 +3034,6 @@ void ext4_xattr_inode_array_free(struct ext4_xattr_inode_array *ea_inode_array)
*
* Create a new entry in the extended attribute block cache, and insert
* it unless such an entry is already in the cache.
- *
- * Returns 0, or a negative error number on failure.
*/
static void
ext4_xattr_block_cache_insert(struct mb_cache *ea_block_cache,
@@ -3065,8 +3061,7 @@ ext4_xattr_block_cache_insert(struct mb_cache *ea_block_cache,
*
* Compare two extended attribute blocks for equality.
*
- * Returns 0 if the blocks are equal, 1 if they differ, and
- * a negative error number on errors.
+ * Returns 0 if the blocks are equal, 1 if they differ.
*/
static int
ext4_xattr_cmp(struct ext4_xattr_header *header1,
diff --git a/fs/ext4/xattr.h b/fs/ext4/xattr.h
index bd97c4aa8177..b25c2d7b5f99 100644
--- a/fs/ext4/xattr.h
+++ b/fs/ext4/xattr.h
@@ -32,8 +32,7 @@ struct ext4_xattr_header {
__le32 h_refcount; /* reference count */
__le32 h_blocks; /* number of disk blocks used */
__le32 h_hash; /* hash value of all attributes */
- __le32 h_checksum; /* crc32c(uuid+id+xattrblock) */
- /* id = inum if refcount=1, blknum otherwise */
+ __le32 h_checksum; /* crc32c(uuid+blknum+xattrblock) */
__u32 h_reserved[3]; /* zero right now */
};
@@ -130,8 +129,8 @@ struct ext4_xattr_ibody_find {
};
struct ext4_xattr_inode_array {
- unsigned int count; /* # of used items in the array */
- struct inode *inodes[];
+ unsigned int count;
+ struct inode *inodes[] __counted_by(count);
};
extern const struct xattr_handler ext4_xattr_user_handler;
diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c
index 951f78634adf..b3971e91e8eb 100644
--- a/fs/jbd2/checkpoint.c
+++ b/fs/jbd2/checkpoint.c
@@ -79,17 +79,23 @@ __releases(&journal->j_state_lock)
if (space_left < nblocks) {
int chkpt = journal->j_checkpoint_transactions != NULL;
tid_t tid = 0;
+ bool has_transaction = false;
- if (journal->j_committing_transaction)
+ if (journal->j_committing_transaction) {
tid = journal->j_committing_transaction->t_tid;
+ has_transaction = true;
+ }
spin_unlock(&journal->j_list_lock);
write_unlock(&journal->j_state_lock);
if (chkpt) {
jbd2_log_do_checkpoint(journal);
- } else if (jbd2_cleanup_journal_tail(journal) == 0) {
- /* We were able to recover space; yay! */
+ } else if (jbd2_cleanup_journal_tail(journal) <= 0) {
+ /*
+ * We were able to recover space or the
+ * journal was aborted due to an error.
+ */
;
- } else if (tid) {
+ } else if (has_transaction) {
/*
* jbd2_journal_commit_transaction() may want
* to take the checkpoint_mutex if JBD2_FLUSHED
@@ -407,6 +413,7 @@ unsigned long jbd2_journal_shrink_checkpoint_list(journal_t *journal,
tid_t tid = 0;
unsigned long nr_freed = 0;
unsigned long freed;
+ bool first_set = false;
again:
spin_lock(&journal->j_list_lock);
@@ -426,8 +433,10 @@ again:
else
transaction = journal->j_checkpoint_transactions;
- if (!first_tid)
+ if (!first_set) {
first_tid = transaction->t_tid;
+ first_set = true;
+ }
last_transaction = journal->j_checkpoint_transactions->t_cpprev;
next_transaction = transaction;
last_tid = last_transaction->t_tid;
@@ -457,7 +466,7 @@ again:
spin_unlock(&journal->j_list_lock);
cond_resched();
- if (*nr_to_scan && next_tid)
+ if (*nr_to_scan && journal->j_shrink_transaction)
goto again;
out:
trace_jbd2_shrink_checkpoint_list(journal, first_tid, tid, last_tid,
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index 1ebf2393bfb7..97f487c3d8fc 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -281,6 +281,16 @@ static void journal_kill_thread(journal_t *journal)
write_unlock(&journal->j_state_lock);
}
+static inline bool jbd2_data_needs_escaping(char *data)
+{
+ return *((__be32 *)data) == cpu_to_be32(JBD2_MAGIC_NUMBER);
+}
+
+static inline void jbd2_data_do_escape(char *data)
+{
+ *((unsigned int *)data) = 0;
+}
+
/*
* jbd2_journal_write_metadata_buffer: write a metadata buffer to the journal.
*
@@ -318,9 +328,7 @@ int jbd2_journal_write_metadata_buffer(transaction_t *transaction,
struct buffer_head **bh_out,
sector_t blocknr)
{
- int done_copy_out = 0;
int do_escape = 0;
- char *mapped_data;
struct buffer_head *new_bh;
struct folio *new_folio;
unsigned int new_offset;
@@ -349,37 +357,33 @@ int jbd2_journal_write_metadata_buffer(transaction_t *transaction,
* we use that version of the data for the commit.
*/
if (jh_in->b_frozen_data) {
- done_copy_out = 1;
new_folio = virt_to_folio(jh_in->b_frozen_data);
new_offset = offset_in_folio(new_folio, jh_in->b_frozen_data);
+ do_escape = jbd2_data_needs_escaping(jh_in->b_frozen_data);
+ if (do_escape)
+ jbd2_data_do_escape(jh_in->b_frozen_data);
} else {
+ char *tmp;
+ char *mapped_data;
+
new_folio = bh_in->b_folio;
new_offset = offset_in_folio(new_folio, bh_in->b_data);
- }
-
- mapped_data = kmap_local_folio(new_folio, new_offset);
- /*
- * Fire data frozen trigger if data already wasn't frozen. Do this
- * before checking for escaping, as the trigger may modify the magic
- * offset. If a copy-out happens afterwards, it will have the correct
- * data in the buffer.
- */
- if (!done_copy_out)
+ mapped_data = kmap_local_folio(new_folio, new_offset);
+ /*
+ * Fire data frozen trigger if data already wasn't frozen. Do
+ * this before checking for escaping, as the trigger may modify
+ * the magic offset. If a copy-out happens afterwards, it will
+ * have the correct data in the buffer.
+ */
jbd2_buffer_frozen_trigger(jh_in, mapped_data,
jh_in->b_triggers);
-
- /*
- * Check for escaping
- */
- if (*((__be32 *)mapped_data) == cpu_to_be32(JBD2_MAGIC_NUMBER))
- do_escape = 1;
- kunmap_local(mapped_data);
-
- /*
- * Do we need to do a data copy?
- */
- if (do_escape && !done_copy_out) {
- char *tmp;
+ do_escape = jbd2_data_needs_escaping(mapped_data);
+ kunmap_local(mapped_data);
+ /*
+ * Do we need to do a data copy?
+ */
+ if (!do_escape)
+ goto escape_done;
spin_unlock(&jh_in->b_state_lock);
tmp = jbd2_alloc(bh_in->b_size, GFP_NOFS);
@@ -406,18 +410,10 @@ int jbd2_journal_write_metadata_buffer(transaction_t *transaction,
copy_done:
new_folio = virt_to_folio(jh_in->b_frozen_data);
new_offset = offset_in_folio(new_folio, jh_in->b_frozen_data);
- done_copy_out = 1;
+ jbd2_data_do_escape(jh_in->b_frozen_data);
}
- /*
- * Did we need to do an escaping? Now we've done all the
- * copying, we can finally do so.
- * b_frozen_data is from jbd2_alloc() which always provides an
- * address from the direct kernels mapping.
- */
- if (do_escape)
- *((unsigned int *)jh_in->b_frozen_data) = 0;
-
+escape_done:
folio_set_bh(new_bh, new_folio, new_offset);
new_bh->b_size = bh_in->b_size;
new_bh->b_bdev = journal->j_dev;
@@ -710,7 +706,7 @@ int jbd2_fc_begin_commit(journal_t *journal, tid_t tid)
return -EINVAL;
write_lock(&journal->j_state_lock);
- if (tid <= journal->j_commit_sequence) {
+ if (tid_geq(journal->j_commit_sequence, tid)) {
write_unlock(&journal->j_state_lock);
return -EALREADY;
}
@@ -740,9 +736,9 @@ EXPORT_SYMBOL(jbd2_fc_begin_commit);
*/
static int __jbd2_fc_end_commit(journal_t *journal, tid_t tid, bool fallback)
{
- jbd2_journal_unlock_updates(journal);
if (journal->j_fc_cleanup_callback)
journal->j_fc_cleanup_callback(journal, 0, tid);
+ jbd2_journal_unlock_updates(journal);
write_lock(&journal->j_state_lock);
journal->j_flags &= ~JBD2_FAST_COMMIT_ONGOING;
if (fallback)
@@ -841,17 +837,12 @@ int jbd2_fc_get_buf(journal_t *journal, struct buffer_head **bh_out)
*bh_out = NULL;
- if (journal->j_fc_off + journal->j_fc_first < journal->j_fc_last) {
- fc_off = journal->j_fc_off;
- blocknr = journal->j_fc_first + fc_off;
- journal->j_fc_off++;
- } else {
- ret = -EINVAL;
- }
-
- if (ret)
- return ret;
+ if (journal->j_fc_off + journal->j_fc_first >= journal->j_fc_last)
+ return -EINVAL;
+ fc_off = journal->j_fc_off;
+ blocknr = journal->j_fc_first + fc_off;
+ journal->j_fc_off++;
ret = jbd2_journal_bmap(journal, blocknr, &pblock);
if (ret)
return ret;
@@ -860,7 +851,6 @@ int jbd2_fc_get_buf(journal_t *journal, struct buffer_head **bh_out)
if (!bh)
return -ENOMEM;
-
journal->j_fc_wbuf[fc_off] = bh;
*bh_out = bh;
@@ -903,7 +893,7 @@ int jbd2_fc_wait_bufs(journal_t *journal, int num_blks)
}
EXPORT_SYMBOL(jbd2_fc_wait_bufs);
-int jbd2_fc_release_bufs(journal_t *journal)
+void jbd2_fc_release_bufs(journal_t *journal)
{
struct buffer_head *bh;
int i, j_fc_off;
@@ -917,8 +907,6 @@ int jbd2_fc_release_bufs(journal_t *journal)
put_bh(bh);
journal->j_fc_wbuf[i] = NULL;
}
-
- return 0;
}
EXPORT_SYMBOL(jbd2_fc_release_bufs);
@@ -1944,7 +1932,7 @@ static void jbd2_mark_journal_empty(journal_t *journal, blk_opf_t write_flags)
if (had_fast_commit)
jbd2_set_feature_fast_commit(journal);
- /* Log is no longer empty */
+ /* Log is empty */
write_lock(&journal->j_state_lock);
journal->j_flags |= JBD2_FLUSHED;
write_unlock(&journal->j_state_lock);
@@ -2866,8 +2854,7 @@ static struct journal_head *journal_alloc_journal_head(void)
ret = kmem_cache_zalloc(jbd2_journal_head_cache,
GFP_NOFS | __GFP_NOFAIL);
}
- if (ret)
- spin_lock_init(&ret->b_state_lock);
+ spin_lock_init(&ret->b_state_lock);
return ret;
}
diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h
index 5157d92b6f23..8aef9bb6ad57 100644
--- a/include/linux/jbd2.h
+++ b/include/linux/jbd2.h
@@ -1086,7 +1086,7 @@ struct journal_s
int j_revoke_records_per_block;
/**
- * @j_transaction_overhead:
+ * @j_transaction_overhead_buffers:
*
* Number of blocks each transaction needs for its own bookkeeping
*/
@@ -1675,7 +1675,7 @@ int jbd2_fc_get_buf(journal_t *journal, struct buffer_head **bh_out);
int jbd2_submit_inode_data(journal_t *journal, struct jbd2_inode *jinode);
int jbd2_wait_inode_data(journal_t *journal, struct jbd2_inode *jinode);
int jbd2_fc_wait_bufs(journal_t *journal, int num_blks);
-int jbd2_fc_release_bufs(journal_t *journal);
+void jbd2_fc_release_bufs(journal_t *journal);
/*
* is_journal_abort