diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2023-06-29 13:39:51 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2023-06-29 13:39:51 -0700 |
commit | c6b0271053e7a5ae57511363213777f706b60489 (patch) | |
tree | 2395b0f0b876bf06797312ccb477600252dbdb07 /fs/ext2 | |
parent | 18c9901d7435b20b13357907bac2c0e3b0fd4cd6 (diff) | |
parent | 028f6055c912588e6f72722d89c30b401bbcf013 (diff) |
Merge tag 'fs_for_v6.5-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/jack/linux-fs
Pull misc filesystem updates from Jan Kara:
- Rewrite kmap_local() handling in ext2
- Convert ext2 direct IO path to iomap (with some infrastructure tweaks
associated with that)
- Convert two boilerplate licenses in udf to SPDX identifiers
- Other small udf, ext2, and quota fixes and cleanups
* tag 'fs_for_v6.5-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/jack/linux-fs:
udf: Fix uninitialized array access for some pathnames
ext2: Drop fragment support
quota: fix warning in dqgrab()
quota: Properly disable quotas when add_dquot_ref() fails
fs: udf: udftime: Replace LGPL boilerplate with SPDX identifier
fs: udf: Replace GPL 2.0 boilerplate license notice with SPDX identifier
fs: Drop wait_unfrozen wait queue
ext2_find_entry()/ext2_dotdot(): callers don't need page_addr anymore
ext2_{set_link,delete_entry}(): don't bother with page_addr
ext2_put_page(): accept any pointer within the page
ext2_get_page(): saner type
ext2: use offset_in_page() instead of open-coding it as subtraction
ext2_rename(): set_link and delete_entry may fail
ext2: Add direct-io trace points
ext2: Move direct-io to use iomap
ext2: Use generic_buffers_fsync() implementation
ext4: Use generic_buffers_fsync_noflush() implementation
fs/buffer.c: Add generic_buffers_fsync*() implementation
ext2/dax: Fix ext2_setsize when len is page aligned
Diffstat (limited to 'fs/ext2')
-rw-r--r-- | fs/ext2/Makefile | 5 | ||||
-rw-r--r-- | fs/ext2/dir.c | 136 | ||||
-rw-r--r-- | fs/ext2/ext2.h | 23 | ||||
-rw-r--r-- | fs/ext2/file.c | 126 | ||||
-rw-r--r-- | fs/ext2/inode.c | 58 | ||||
-rw-r--r-- | fs/ext2/namei.c | 63 | ||||
-rw-r--r-- | fs/ext2/super.c | 23 | ||||
-rw-r--r-- | fs/ext2/trace.c | 6 | ||||
-rw-r--r-- | fs/ext2/trace.h | 94 |
9 files changed, 356 insertions, 178 deletions
diff --git a/fs/ext2/Makefile b/fs/ext2/Makefile index 311479d864a7..8860948ef9ca 100644 --- a/fs/ext2/Makefile +++ b/fs/ext2/Makefile @@ -6,7 +6,10 @@ obj-$(CONFIG_EXT2_FS) += ext2.o ext2-y := balloc.o dir.o file.o ialloc.o inode.o \ - ioctl.o namei.o super.o symlink.o + ioctl.o namei.o super.o symlink.o trace.o + +# For tracepoints to include our trace.h from tracepoint infrastructure +CFLAGS_trace.o := -I$(src) ext2-$(CONFIG_EXT2_FS_XATTR) += xattr.o xattr_user.o xattr_trusted.o ext2-$(CONFIG_EXT2_FS_POSIX_ACL) += acl.o diff --git a/fs/ext2/dir.c b/fs/ext2/dir.c index 4a6955a0a116..42db804794bd 100644 --- a/fs/ext2/dir.c +++ b/fs/ext2/dir.c @@ -186,23 +186,25 @@ fail: * NOTE: ext2_find_entry() and ext2_dotdot() act as a call to ext2_get_page() * and should be treated as a call to ext2_get_page() for nesting purposes. */ -static struct page * ext2_get_page(struct inode *dir, unsigned long n, - int quiet, void **page_addr) +static void *ext2_get_page(struct inode *dir, unsigned long n, + int quiet, struct page **page) { struct address_space *mapping = dir->i_mapping; struct folio *folio = read_mapping_folio(mapping, n, NULL); + void *page_addr; if (IS_ERR(folio)) - return &folio->page; - *page_addr = kmap_local_folio(folio, n & (folio_nr_pages(folio) - 1)); + return ERR_CAST(folio); + page_addr = kmap_local_folio(folio, n & (folio_nr_pages(folio) - 1)); if (unlikely(!folio_test_checked(folio))) { - if (!ext2_check_page(&folio->page, quiet, *page_addr)) + if (!ext2_check_page(&folio->page, quiet, page_addr)) goto fail; } - return &folio->page; + *page = &folio->page; + return page_addr; fail: - ext2_put_page(&folio->page, *page_addr); + ext2_put_page(&folio->page, page_addr); return ERR_PTR(-EIO); } @@ -240,7 +242,7 @@ ext2_validate_entry(char *base, unsigned offset, unsigned mask) break; p = ext2_next_entry(p); } - return (char *)p - base; + return offset_in_page(p); } static inline void ext2_set_de_type(ext2_dirent *de, struct inode *inode) @@ -271,16 +273,17 @@ ext2_readdir(struct file *file, struct dir_context *ctx) EXT2_HAS_INCOMPAT_FEATURE(sb, EXT2_FEATURE_INCOMPAT_FILETYPE); for ( ; n < npages; n++, offset = 0) { - char *kaddr, *limit; ext2_dirent *de; - struct page *page = ext2_get_page(inode, n, 0, (void **)&kaddr); + struct page *page; + char *kaddr = ext2_get_page(inode, n, 0, &page); + char *limit; - if (IS_ERR(page)) { + if (IS_ERR(kaddr)) { ext2_error(sb, __func__, "bad page in #%lu", inode->i_ino); ctx->pos += PAGE_SIZE - offset; - return PTR_ERR(page); + return PTR_ERR(kaddr); } if (unlikely(need_revalidate)) { if (offset) { @@ -296,7 +299,7 @@ ext2_readdir(struct file *file, struct dir_context *ctx) if (de->rec_len == 0) { ext2_error(sb, __func__, "zero-length directory entry"); - ext2_put_page(page, kaddr); + ext2_put_page(page, de); return -EIO; } if (de->inode) { @@ -308,7 +311,7 @@ ext2_readdir(struct file *file, struct dir_context *ctx) if (!dir_emit(ctx, de->name, de->name_len, le32_to_cpu(de->inode), d_type)) { - ext2_put_page(page, kaddr); + ext2_put_page(page, de); return 0; } } @@ -336,8 +339,7 @@ ext2_readdir(struct file *file, struct dir_context *ctx) * should be treated as a call to ext2_get_page() for nesting purposes. */ struct ext2_dir_entry_2 *ext2_find_entry (struct inode *dir, - const struct qstr *child, struct page **res_page, - void **res_page_addr) + const struct qstr *child, struct page **res_page) { const char *name = child->name; int namelen = child->len; @@ -347,40 +349,36 @@ struct ext2_dir_entry_2 *ext2_find_entry (struct inode *dir, struct page *page = NULL; struct ext2_inode_info *ei = EXT2_I(dir); ext2_dirent * de; - void *page_addr; if (npages == 0) goto out; /* OFFSET_CACHE */ *res_page = NULL; - *res_page_addr = NULL; start = ei->i_dir_start_lookup; if (start >= npages) start = 0; n = start; do { - char *kaddr; - page = ext2_get_page(dir, n, 0, &page_addr); - if (IS_ERR(page)) - return ERR_CAST(page); + char *kaddr = ext2_get_page(dir, n, 0, &page); + if (IS_ERR(kaddr)) + return ERR_CAST(kaddr); - kaddr = page_addr; de = (ext2_dirent *) kaddr; kaddr += ext2_last_byte(dir, n) - reclen; while ((char *) de <= kaddr) { if (de->rec_len == 0) { ext2_error(dir->i_sb, __func__, "zero-length directory entry"); - ext2_put_page(page, page_addr); + ext2_put_page(page, de); goto out; } if (ext2_match(namelen, name, de)) goto found; de = ext2_next_entry(de); } - ext2_put_page(page, page_addr); + ext2_put_page(page, kaddr); if (++n >= npages) n = 0; @@ -398,7 +396,6 @@ out: found: *res_page = page; - *res_page_addr = page_addr; ei->i_dir_start_lookup = n; return de; } @@ -415,33 +412,26 @@ found: * ext2_find_entry() and ext2_dotdot() act as a call to ext2_get_page() and * should be treated as a call to ext2_get_page() for nesting purposes. */ -struct ext2_dir_entry_2 *ext2_dotdot(struct inode *dir, struct page **p, - void **pa) +struct ext2_dir_entry_2 *ext2_dotdot(struct inode *dir, struct page **p) { - void *page_addr; - struct page *page = ext2_get_page(dir, 0, 0, &page_addr); - ext2_dirent *de = NULL; + ext2_dirent *de = ext2_get_page(dir, 0, 0, p); - if (!IS_ERR(page)) { - de = ext2_next_entry((ext2_dirent *) page_addr); - *p = page; - *pa = page_addr; - } - return de; + if (!IS_ERR(de)) + return ext2_next_entry(de); + return NULL; } int ext2_inode_by_name(struct inode *dir, const struct qstr *child, ino_t *ino) { struct ext2_dir_entry_2 *de; struct page *page; - void *page_addr; - de = ext2_find_entry(dir, child, &page, &page_addr); + de = ext2_find_entry(dir, child, &page); if (IS_ERR(de)) return PTR_ERR(de); *ino = le32_to_cpu(de->inode); - ext2_put_page(page, page_addr); + ext2_put_page(page, de); return 0; } @@ -462,11 +452,9 @@ static int ext2_handle_dirsync(struct inode *dir) } int ext2_set_link(struct inode *dir, struct ext2_dir_entry_2 *de, - struct page *page, void *page_addr, struct inode *inode, - bool update_times) + struct page *page, struct inode *inode, bool update_times) { - loff_t pos = page_offset(page) + - (char *) de - (char *) page_addr; + loff_t pos = page_offset(page) + offset_in_page(de); unsigned len = ext2_rec_len_from_disk(de->rec_len); int err; @@ -498,7 +486,6 @@ int ext2_add_link (struct dentry *dentry, struct inode *inode) unsigned reclen = EXT2_DIR_REC_LEN(namelen); unsigned short rec_len, name_len; struct page *page = NULL; - void *page_addr = NULL; ext2_dirent * de; unsigned long npages = dir_pages(dir); unsigned long n; @@ -511,15 +498,12 @@ int ext2_add_link (struct dentry *dentry, struct inode *inode) * to protect that region. */ for (n = 0; n <= npages; n++) { - char *kaddr; + char *kaddr = ext2_get_page(dir, n, 0, &page); char *dir_end; - page = ext2_get_page(dir, n, 0, &page_addr); - err = PTR_ERR(page); - if (IS_ERR(page)) - goto out; + if (IS_ERR(kaddr)) + return PTR_ERR(kaddr); lock_page(page); - kaddr = page_addr; dir_end = kaddr + ext2_last_byte(dir, n); de = (ext2_dirent *)kaddr; kaddr += PAGE_SIZE - reclen; @@ -550,14 +534,13 @@ int ext2_add_link (struct dentry *dentry, struct inode *inode) de = (ext2_dirent *) ((char *) de + rec_len); } unlock_page(page); - ext2_put_page(page, page_addr); + ext2_put_page(page, kaddr); } BUG(); return -EINVAL; got_it: - pos = page_offset(page) + - (char *)de - (char *)page_addr; + pos = page_offset(page) + offset_in_page(de); err = ext2_prepare_chunk(page, pos, rec_len); if (err) goto out_unlock; @@ -578,8 +561,7 @@ got_it: err = ext2_handle_dirsync(dir); /* OFFSET_CACHE */ out_put: - ext2_put_page(page, page_addr); -out: + ext2_put_page(page, de); return err; out_unlock: unlock_page(page); @@ -590,34 +572,36 @@ out_unlock: * ext2_delete_entry deletes a directory entry by merging it with the * previous entry. Page is up-to-date. */ -int ext2_delete_entry (struct ext2_dir_entry_2 *dir, struct page *page, - char *kaddr) +int ext2_delete_entry(struct ext2_dir_entry_2 *dir, struct page *page) { struct inode *inode = page->mapping->host; - unsigned from = ((char*)dir - kaddr) & ~(ext2_chunk_size(inode)-1); - unsigned to = ((char *)dir - kaddr) + + char *kaddr = (char *)((unsigned long)dir & PAGE_MASK); + unsigned from = offset_in_page(dir) & ~(ext2_chunk_size(inode)-1); + unsigned to = offset_in_page(dir) + ext2_rec_len_from_disk(dir->rec_len); loff_t pos; - ext2_dirent * pde = NULL; - ext2_dirent * de = (ext2_dirent *) (kaddr + from); + ext2_dirent *pde = NULL; + ext2_dirent *de = (ext2_dirent *)(kaddr + from); int err; while ((char*)de < (char*)dir) { if (de->rec_len == 0) { ext2_error(inode->i_sb, __func__, "zero-length directory entry"); - err = -EIO; - goto out; + return -EIO; } pde = de; de = ext2_next_entry(de); } if (pde) - from = (char *)pde - kaddr; + from = offset_in_page(pde); pos = page_offset(page) + from; lock_page(page); err = ext2_prepare_chunk(page, pos, to - from); - BUG_ON(err); + if (err) { + unlock_page(page); + return err; + } if (pde) pde->rec_len = ext2_rec_len_to_disk(to - from); dir->inode = 0; @@ -625,9 +609,7 @@ int ext2_delete_entry (struct ext2_dir_entry_2 *dir, struct page *page, inode->i_ctime = inode->i_mtime = current_time(inode); EXT2_I(inode)->i_flags &= ~EXT2_BTREE_FL; mark_inode_dirty(inode); - err = ext2_handle_dirsync(inode); -out: - return err; + return ext2_handle_dirsync(inode); } /* @@ -677,19 +659,17 @@ fail: */ int ext2_empty_dir (struct inode * inode) { - void *page_addr = NULL; - struct page *page = NULL; + struct page *page; + char *kaddr; unsigned long i, npages = dir_pages(inode); for (i = 0; i < npages; i++) { - char *kaddr; - ext2_dirent * de; - page = ext2_get_page(inode, i, 0, &page_addr); + ext2_dirent *de; - if (IS_ERR(page)) + kaddr = ext2_get_page(inode, i, 0, &page); + if (IS_ERR(kaddr)) return 0; - kaddr = page_addr; de = (ext2_dirent *)kaddr; kaddr += ext2_last_byte(inode, i) - EXT2_DIR_REC_LEN(1); @@ -715,12 +695,12 @@ int ext2_empty_dir (struct inode * inode) } de = ext2_next_entry(de); } - ext2_put_page(page, page_addr); + ext2_put_page(page, kaddr); } return 1; not_empty: - ext2_put_page(page, page_addr); + ext2_put_page(page, kaddr); return 0; } diff --git a/fs/ext2/ext2.h b/fs/ext2/ext2.h index 8244366862e4..35a041c47c38 100644 --- a/fs/ext2/ext2.h +++ b/fs/ext2/ext2.h @@ -70,10 +70,7 @@ struct mb_cache; * second extended-fs super-block data in memory */ struct ext2_sb_info { - unsigned long s_frag_size; /* Size of a fragment in bytes */ - unsigned long s_frags_per_block;/* Number of fragments per block */ unsigned long s_inodes_per_block;/* Number of inodes per block */ - unsigned long s_frags_per_group;/* Number of fragments in a group */ unsigned long s_blocks_per_group;/* Number of blocks in a group */ unsigned long s_inodes_per_group;/* Number of inodes in a group */ unsigned long s_itb_per_group; /* Number of inode table blocks per group */ @@ -189,15 +186,6 @@ static inline struct ext2_sb_info *EXT2_SB(struct super_block *sb) #define EXT2_FIRST_INO(s) (EXT2_SB(s)->s_first_ino) /* - * Macro-instructions used to manage fragments - */ -#define EXT2_MIN_FRAG_SIZE 1024 -#define EXT2_MAX_FRAG_SIZE 4096 -#define EXT2_MIN_FRAG_LOG_SIZE 10 -#define EXT2_FRAG_SIZE(s) (EXT2_SB(s)->s_frag_size) -#define EXT2_FRAGS_PER_BLOCK(s) (EXT2_SB(s)->s_frags_per_block) - -/* * Structure of a blocks group descriptor */ struct ext2_group_desc @@ -730,14 +718,12 @@ extern int ext2_inode_by_name(struct inode *dir, const struct qstr *child, ino_t *ino); extern int ext2_make_empty(struct inode *, struct inode *); extern struct ext2_dir_entry_2 *ext2_find_entry(struct inode *, const struct qstr *, - struct page **, void **res_page_addr); -extern int ext2_delete_entry(struct ext2_dir_entry_2 *dir, struct page *page, - char *kaddr); + struct page **); +extern int ext2_delete_entry(struct ext2_dir_entry_2 *dir, struct page *page); extern int ext2_empty_dir (struct inode *); -extern struct ext2_dir_entry_2 *ext2_dotdot(struct inode *dir, struct page **p, void **pa); +extern struct ext2_dir_entry_2 *ext2_dotdot(struct inode *dir, struct page **p); int ext2_set_link(struct inode *dir, struct ext2_dir_entry_2 *de, - struct page *page, void *page_addr, struct inode *inode, - bool update_times); + struct page *page, struct inode *inode, bool update_times); static inline void ext2_put_page(struct page *page, void *page_addr) { kunmap_local(page_addr); @@ -754,6 +740,7 @@ extern unsigned long ext2_count_free (struct buffer_head *, unsigned); extern struct inode *ext2_iget (struct super_block *, unsigned long); extern int ext2_write_inode (struct inode *, struct writeback_control *); extern void ext2_evict_inode(struct inode *); +void ext2_write_failed(struct address_space *mapping, loff_t to); extern int ext2_get_block(struct inode *, sector_t, struct buffer_head *, int); extern int ext2_setattr (struct mnt_idmap *, struct dentry *, struct iattr *); extern int ext2_getattr (struct mnt_idmap *, const struct path *, diff --git a/fs/ext2/file.c b/fs/ext2/file.c index d1ae0f0a3726..0b4c91c62e1f 100644 --- a/fs/ext2/file.c +++ b/fs/ext2/file.c @@ -25,9 +25,11 @@ #include <linux/quotaops.h> #include <linux/iomap.h> #include <linux/uio.h> +#include <linux/buffer_head.h> #include "ext2.h" #include "xattr.h" #include "acl.h" +#include "trace.h" #ifdef CONFIG_FS_DAX static ssize_t ext2_dax_read_iter(struct kiocb *iocb, struct iov_iter *to) @@ -153,7 +155,7 @@ int ext2_fsync(struct file *file, loff_t start, loff_t end, int datasync) int ret; struct super_block *sb = file->f_mapping->host->i_sb; - ret = generic_file_fsync(file, start, end, datasync); + ret = generic_buffers_fsync(file, start, end, datasync); if (ret == -EIO) /* We don't really know where the IO error happened... */ ext2_error(sb, __func__, @@ -161,12 +163,131 @@ int ext2_fsync(struct file *file, loff_t start, loff_t end, int datasync) return ret; } +static ssize_t ext2_dio_read_iter(struct kiocb *iocb, struct iov_iter *to) +{ + struct file *file = iocb->ki_filp; + struct inode *inode = file->f_mapping->host; + ssize_t ret; + + trace_ext2_dio_read_begin(iocb, to, 0); + inode_lock_shared(inode); + ret = iomap_dio_rw(iocb, to, &ext2_iomap_ops, NULL, 0, NULL, 0); + inode_unlock_shared(inode); + trace_ext2_dio_read_end(iocb, to, ret); + + return ret; +} + +static int ext2_dio_write_end_io(struct kiocb *iocb, ssize_t size, + int error, unsigned int flags) +{ + loff_t pos = iocb->ki_pos; + struct inode *inode = file_inode(iocb->ki_filp); + + if (error) + goto out; + + /* + * If we are extending the file, we have to update i_size here before + * page cache gets invalidated in iomap_dio_rw(). This prevents racing + * buffered reads from zeroing out too much from page cache pages. + * Note that all extending writes always happens synchronously with + * inode lock held by ext2_dio_write_iter(). So it is safe to update + * inode size here for extending file writes. + */ + pos += size; + if (pos > i_size_read(inode)) { + i_size_write(inode, pos); + mark_inode_dirty(inode); + } +out: + trace_ext2_dio_write_endio(iocb, size, error); + return error; +} + +static const struct iomap_dio_ops ext2_dio_write_ops = { + .end_io = ext2_dio_write_end_io, +}; + +static ssize_t ext2_dio_write_iter(struct kiocb *iocb, struct iov_iter *from) +{ + struct file *file = iocb->ki_filp; + struct inode *inode = file->f_mapping->host; + ssize_t ret; + unsigned int flags = 0; + unsigned long blocksize = inode->i_sb->s_blocksize; + loff_t offset = iocb->ki_pos; + loff_t count = iov_iter_count(from); + ssize_t status = 0; + + trace_ext2_dio_write_begin(iocb, from, 0); + inode_lock(inode); + ret = generic_write_checks(iocb, from); + if (ret <= 0) + goto out_unlock; + + ret = kiocb_modified(iocb); + if (ret) + goto out_unlock; + + /* use IOMAP_DIO_FORCE_WAIT for unaligned or extending writes */ + if (iocb->ki_pos + iov_iter_count(from) > i_size_read(inode) || + (!IS_ALIGNED(iocb->ki_pos | iov_iter_alignment(from), blocksize))) + flags |= IOMAP_DIO_FORCE_WAIT; + + ret = iomap_dio_rw(iocb, from, &ext2_iomap_ops, &ext2_dio_write_ops, + flags, NULL, 0); + + /* ENOTBLK is magic return value for fallback to buffered-io */ + if (ret == -ENOTBLK) + ret = 0; + + if (ret < 0 && ret != -EIOCBQUEUED) + ext2_write_failed(inode->i_mapping, offset + count); + + /* handle case for partial write and for fallback to buffered write */ + if (ret >= 0 && iov_iter_count(from)) { + loff_t pos, endbyte; + int ret2; + + iocb->ki_flags &= ~IOCB_DIRECT; + pos = iocb->ki_pos; + status = generic_perform_write(iocb, from); + if (unlikely(status < 0)) { + ret = status; + goto out_unlock; + } + + iocb->ki_pos += status; + ret += status; + endbyte = pos + status - 1; + ret2 = filemap_write_and_wait_range(inode->i_mapping, pos, + endbyte); + if (!ret2) + invalidate_mapping_pages(inode->i_mapping, + pos >> PAGE_SHIFT, + endbyte >> PAGE_SHIFT); + if (ret > 0) + generic_write_sync(iocb, ret); + } + +out_unlock: + inode_unlock(inode); + if (status) + trace_ext2_dio_write_buff_end(iocb, from, status); + trace_ext2_dio_write_end(iocb, from, ret); + return ret; +} + static ssize_t ext2_file_read_iter(struct kiocb *iocb, struct iov_iter *to) { #ifdef CONFIG_FS_DAX if (IS_DAX(iocb->ki_filp->f_mapping->host)) return ext2_dax_read_iter(iocb, to); #endif + if (iocb->ki_flags & IOCB_DIRECT) + return ext2_dio_read_iter(iocb, to); + return generic_file_read_iter(iocb, to); } @@ -176,6 +297,9 @@ static ssize_t ext2_file_write_iter(struct kiocb *iocb, struct iov_iter *from) if (IS_DAX(iocb->ki_filp->f_mapping->host)) return ext2_dax_write_iter(iocb, from); #endif + if (iocb->ki_flags & IOCB_DIRECT) + return ext2_dio_write_iter(iocb, from); + return generic_file_write_iter(iocb, from); } diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c index 26f135e7ffce..75983215c7a1 100644 --- a/fs/ext2/inode.c +++ b/fs/ext2/inode.c @@ -56,7 +56,7 @@ static inline int ext2_inode_is_fast_symlink(struct inode *inode) static void ext2_truncate_blocks(struct inode *inode, loff_t offset); -static void ext2_write_failed(struct address_space *mapping, loff_t to) +void ext2_write_failed(struct address_space *mapping, loff_t to) { struct inode *inode = mapping->host; @@ -809,9 +809,27 @@ static int ext2_iomap_begin(struct inode *inode, loff_t offset, loff_t length, bool new = false, boundary = false; u32 bno; int ret; + bool create = flags & IOMAP_WRITE; + + /* + * For writes that could fill holes inside i_size on a + * DIO_SKIP_HOLES filesystem we forbid block creations: only + * overwrites are permitted. + */ + if ((flags & IOMAP_DIRECT) && + (first_block << blkbits) < i_size_read(inode)) + create = 0; + + /* + * Writes that span EOF might trigger an IO size update on completion, + * so consider them to be dirty for the purposes of O_DSYNC even if + * there is no other metadata changes pending or have been made here. + */ + if ((flags & IOMAP_WRITE) && offset + length > i_size_read(inode)) + iomap->flags |= IOMAP_F_DIRTY; ret = ext2_get_blocks(inode, first_block, max_blocks, - &bno, &new, &boundary, flags & IOMAP_WRITE); + &bno, &new, &boundary, create); if (ret < 0) return ret; @@ -823,6 +841,12 @@ static int ext2_iomap_begin(struct inode *inode, loff_t offset, loff_t length, iomap->bdev = inode->i_sb->s_bdev; if (ret == 0) { + /* + * Switch to buffered-io for writing to holes in a non-extent + * based filesystem to avoid stale data exposure problem. + */ + if (!create && (flags & IOMAP_WRITE) && (flags & IOMAP_DIRECT)) + return -ENOTBLK; iomap->type = IOMAP_HOLE; iomap->addr = IOMAP_NULL_ADDR; iomap->length = 1 << blkbits; @@ -844,6 +868,13 @@ static int ext2_iomap_end(struct inode *inode, loff_t offset, loff_t length, ssize_t written, unsigned flags, struct iomap *iomap) { + /* + * Switch to buffered-io in case of any error. + * Blocks allocated can be used by the buffered-io path. + */ + if ((flags & IOMAP_DIRECT) && (flags & IOMAP_WRITE) && written == 0) + return -ENOTBLK; + if (iomap->type == IOMAP_MAPPED && written < length && (flags & IOMAP_WRITE)) @@ -908,22 +939,6 @@ static sector_t ext2_bmap(struct address_space *mapping, sector_t block) return generic_block_bmap(mapping,block,ext2_get_block); } -static ssize_t -ext2_direct_IO(struct kiocb *iocb, struct iov_iter *iter) -{ - struct file *file = iocb->ki_filp; - struct address_space *mapping = file->f_mapping; - struct inode *inode = mapping->host; - size_t count = iov_iter_count(iter); - loff_t offset = iocb->ki_pos; - ssize_t ret; - - ret = blockdev_direct_IO(iocb, inode, iter, ext2_get_block); - if (ret < 0 && iov_iter_rw(iter) == WRITE) - ext2_write_failed(mapping, offset + count); - return ret; -} - static int ext2_writepages(struct address_space *mapping, struct writeback_control *wbc) { @@ -946,7 +961,7 @@ const struct address_space_operations ext2_aops = { .write_begin = ext2_write_begin, .write_end = ext2_write_end, .bmap = ext2_bmap, - .direct_IO = ext2_direct_IO, + .direct_IO = noop_direct_IO, .writepages = ext2_writepages, .migrate_folio = buffer_migrate_folio, .is_partially_uptodate = block_is_partially_uptodate, @@ -1259,9 +1274,8 @@ static int ext2_setsize(struct inode *inode, loff_t newsize) inode_dio_wait(inode); if (IS_DAX(inode)) - error = dax_zero_range(inode, newsize, - PAGE_ALIGN(newsize) - newsize, NULL, - &ext2_iomap_ops); + error = dax_truncate_page(inode, newsize, NULL, + &ext2_iomap_ops); else error = block_truncate_page(inode->i_mapping, newsize, ext2_get_block); diff --git a/fs/ext2/namei.c b/fs/ext2/namei.c index 7f5dfa87cc95..937dd8f60f96 100644 --- a/fs/ext2/namei.c +++ b/fs/ext2/namei.c @@ -269,26 +269,25 @@ out_dir: goto out; } -static int ext2_unlink(struct inode * dir, struct dentry *dentry) +static int ext2_unlink(struct inode *dir, struct dentry *dentry) { - struct inode * inode = d_inode(dentry); - struct ext2_dir_entry_2 * de; - struct page * page; - void *page_addr; + struct inode *inode = d_inode(dentry); + struct ext2_dir_entry_2 *de; + struct page *page; int err; err = dquot_initialize(dir); if (err) goto out; - de = ext2_find_entry(dir, &dentry->d_name, &page, &page_addr); + de = ext2_find_entry(dir, &dentry->d_name, &page); if (IS_ERR(de)) { err = PTR_ERR(de); goto out; } - err = ext2_delete_entry (de, page, page_addr); - ext2_put_page(page, page_addr); + err = ext2_delete_entry(de, page); + ext2_put_page(page, de); if (err) goto out; @@ -323,10 +322,8 @@ static int ext2_rename (struct mnt_idmap * idmap, struct inode * old_inode = d_inode(old_dentry); struct inode * new_inode = d_inode(new_dentry); struct page * dir_page = NULL; - void *dir_page_addr; struct ext2_dir_entry_2 * dir_de = NULL; struct page * old_page; - void *old_page_addr; struct ext2_dir_entry_2 * old_de; int err; @@ -335,28 +332,24 @@ static int ext2_rename (struct mnt_idmap * idmap, err = dquot_initialize(old_dir); if (err) - goto out; + return err; err = dquot_initialize(new_dir); if (err) - goto out; + return err; - old_de = ext2_find_entry(old_dir, &old_dentry->d_name, &old_page, - &old_page_addr); - if (IS_ERR(old_de)) { - err = PTR_ERR(old_de); - goto out; - } + old_de = ext2_find_entry(old_dir, &old_dentry->d_name, &old_page); + if (IS_ERR(old_de)) + return PTR_ERR(old_de); if (S_ISDIR(old_inode->i_mode)) { err = -EIO; - dir_de = ext2_dotdot(old_inode, &dir_page, &dir_page_addr); + dir_de = ext2_dotdot(old_inode, &dir_page); if (!dir_de) goto out_old; } if (new_inode) { - void *page_addr; struct page *new_page; struct ext2_dir_entry_2 *new_de; @@ -365,14 +358,13 @@ static int ext2_rename (struct mnt_idmap * idmap, goto out_dir; new_de = ext2_find_entry(new_dir, &new_dentry->d_name, - &new_page, &page_addr); + &new_page); if (IS_ERR(new_de)) { err = PTR_ERR(new_de); goto out_dir; } - err = ext2_set_link(new_dir, new_de, new_page, page_addr, - old_inode, true); - ext2_put_page(new_page, page_addr); + err = ext2_set_link(new_dir, new_de, new_page, old_inode, true); + ext2_put_page(new_page, new_de); if (err) goto out_dir; new_inode->i_ctime = current_time(new_inode); @@ -394,27 +386,20 @@ static int ext2_rename (struct mnt_idmap * idmap, old_inode->i_ctime = current_time(old_inode); mark_inode_dirty(old_inode); - ext2_delete_entry(old_de, old_page, old_page_addr); - - if (dir_de) { - if (old_dir != new_dir) { + err = ext2_delete_entry(old_de, old_page); + if (!err && dir_de) { + if (old_dir != new_dir) err = ext2_set_link(old_inode, dir_de, dir_page, - dir_page_addr, new_dir, false); + new_dir, false); - } - ext2_put_page(dir_page, dir_page_addr); inode_dec_link_count(old_dir); } - -out_old: - ext2_put_page(old_page, old_page_addr); -out: - return err; - out_dir: if (dir_de) - ext2_put_page(dir_page, dir_page_addr); - goto out_old; + ext2_put_page(dir_page, dir_de); +out_old: + ext2_put_page(old_page, old_de); + return err; } const struct inode_operations ext2_dir_inode_operations = { diff --git a/fs/ext2/super.c b/fs/ext2/super.c index f342f347a695..2959afc7541c 100644 --- a/fs/ext2/super.c +++ b/fs/ext2/super.c @@ -668,10 +668,9 @@ static int ext2_setup_super (struct super_block * sb, es->s_max_mnt_count = cpu_to_le16(EXT2_DFL_MAX_MNT_COUNT); le16_add_cpu(&es->s_mnt_count, 1); if (test_opt (sb, DEBUG)) - ext2_msg(sb, KERN_INFO, "%s, %s, bs=%lu, fs=%lu, gc=%lu, " + ext2_msg(sb, KERN_INFO, "%s, %s, bs=%lu, gc=%lu, " "bpg=%lu, ipg=%lu, mo=%04lx]", EXT2FS_VERSION, EXT2FS_DATE, sb->s_blocksize, - sbi->s_frag_size, sbi->s_groups_count, EXT2_BLOCKS_PER_GROUP(sb), EXT2_INODES_PER_GROUP(sb), @@ -1012,14 +1011,7 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent) } } - sbi->s_frag_size = EXT2_MIN_FRAG_SIZE << - le32_to_cpu(es->s_log_frag_size); - if (sbi->s_frag_size == 0) - goto cantfind_ext2; - sbi->s_frags_per_block = sb->s_blocksize / sbi->s_frag_size; - sbi->s_blocks_per_group = le32_to_cpu(es->s_blocks_per_group); - sbi->s_frags_per_group = le32_to_cpu(es->s_frags_per_group); sbi->s_inodes_per_group = le32_to_cpu(es->s_inodes_per_group); sbi->s_inodes_per_block = sb->s_blocksize / EXT2_INODE_SIZE(sb); @@ -1045,11 +1037,10 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent) goto failed_mount; } - if (sb->s_blocksize != sbi->s_frag_size) { + if (es->s_log_frag_size != es->s_log_block_size) { ext2_msg(sb, KERN_ERR, - "error: fragsize %lu != blocksize %lu" - "(not supported yet)", - sbi->s_frag_size, sb->s_blocksize); + "error: fragsize log %u != blocksize log %u", + le32_to_cpu(es->s_log_frag_size), sb->s_blocksize_bits); goto failed_mount; } @@ -1066,12 +1057,6 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent) sbi->s_blocks_per_group, sbi->s_inodes_per_group + 3); goto failed_mount; } - if (sbi->s_frags_per_group > sb->s_blocksize * 8) { - ext2_msg(sb, KERN_ERR, - "error: #fragments per group too big: %lu", - sbi->s_frags_per_group); - goto failed_mount; - } if (sbi->s_inodes_per_group < sbi->s_inodes_per_block || sbi->s_inodes_per_group > sb->s_blocksize * 8) { ext2_msg(sb, KERN_ERR, diff --git a/fs/ext2/trace.c b/fs/ext2/trace.c new file mode 100644 index 000000000000..b01cdf6526fd --- /dev/null +++ b/fs/ext2/trace.c @@ -0,0 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0 +#include "ext2.h" +#include <linux/uio.h> + +#define CREATE_TRACE_POINTS +#include "trace.h" diff --git a/fs/ext2/trace.h b/fs/ext2/trace.h new file mode 100644 index 000000000000..7d230e13576e --- /dev/null +++ b/fs/ext2/trace.h @@ -0,0 +1,94 @@ +// SPDX-License-Identifier: GPL-2.0 + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM ext2 + +#if !defined(_EXT2_TRACE_H) || defined(TRACE_HEADER_MULTI_READ) +#define _EXT2_TRACE_H + +#include <linux/tracepoint.h> + +DECLARE_EVENT_CLASS(ext2_dio_class, + TP_PROTO(struct kiocb *iocb, struct iov_iter *iter, ssize_t ret), + TP_ARGS(iocb, iter, ret), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(ino_t, ino) + __field(loff_t, isize) + __field(loff_t, pos) + __field(size_t, count) + __field(int, ki_flags) + __field(bool, aio) + __field(ssize_t, ret) + ), + TP_fast_assign( + __entry->dev = file_inode(iocb->ki_filp)->i_sb->s_dev; + __entry->ino = file_inode(iocb->ki_filp)->i_ino; + __entry->isize = file_inode(iocb->ki_filp)->i_size; + __entry->pos = iocb->ki_pos; + __entry->count = iov_iter_count(iter); + __entry->ki_flags = iocb->ki_flags; + __entry->aio = !is_sync_kiocb(iocb); + __entry->ret = ret; + ), + TP_printk("dev %d:%d ino 0x%lx isize 0x%llx pos 0x%llx len %zu flags %s aio %d ret %zd", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->ino, + __entry->isize, + __entry->pos, + __entry->count, + __print_flags(__entry->ki_flags, "|", TRACE_IOCB_STRINGS), + __entry->aio, + __entry->ret) +); + +#define DEFINE_DIO_RW_EVENT(name) \ +DEFINE_EVENT(ext2_dio_class, name, \ + TP_PROTO(struct kiocb *iocb, struct iov_iter *iter, ssize_t ret), \ + TP_ARGS(iocb, iter, ret)) +DEFINE_DIO_RW_EVENT(ext2_dio_write_begin); +DEFINE_DIO_RW_EVENT(ext2_dio_write_end); +DEFINE_DIO_RW_EVENT(ext2_dio_write_buff_end); +DEFINE_DIO_RW_EVENT(ext2_dio_read_begin); +DEFINE_DIO_RW_EVENT(ext2_dio_read_end); + +TRACE_EVENT(ext2_dio_write_endio, + TP_PROTO(struct kiocb *iocb, ssize_t size, int ret), + TP_ARGS(iocb, size, ret), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(ino_t, ino) + __field(loff_t, isize) + __field(loff_t, pos) + __field(ssize_t, size) + __field(int, ki_flags) + __field(bool, aio) + __field(int, ret) + ), + TP_fast_assign( + __entry->dev = file_inode(iocb->ki_filp)->i_sb->s_dev; + __entry->ino = file_inode(iocb->ki_filp)->i_ino; + __entry->isize = file_inode(iocb->ki_filp)->i_size; + __entry->pos = iocb->ki_pos; + __entry->size = size; + __entry->ki_flags = iocb->ki_flags; + __entry->aio = !is_sync_kiocb(iocb); + __entry->ret = ret; + ), + TP_printk("dev %d:%d ino 0x%lx isize 0x%llx pos 0x%llx len %zd flags %s aio %d ret %d", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->ino, + __entry->isize, + __entry->pos, + __entry->size, + __print_flags(__entry->ki_flags, "|", TRACE_IOCB_STRINGS), + __entry->aio, + __entry->ret) +); + +#endif /* _EXT2_TRACE_H */ + +#undef TRACE_INCLUDE_PATH +#define TRACE_INCLUDE_PATH . +#define TRACE_INCLUDE_FILE trace +#include <trace/define_trace.h> |