summaryrefslogtreecommitdiff
path: root/fs/ext2
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2023-06-29 13:39:51 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2023-06-29 13:39:51 -0700
commitc6b0271053e7a5ae57511363213777f706b60489 (patch)
tree2395b0f0b876bf06797312ccb477600252dbdb07 /fs/ext2
parent18c9901d7435b20b13357907bac2c0e3b0fd4cd6 (diff)
parent028f6055c912588e6f72722d89c30b401bbcf013 (diff)
Merge tag 'fs_for_v6.5-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/jack/linux-fs
Pull misc filesystem updates from Jan Kara: - Rewrite kmap_local() handling in ext2 - Convert ext2 direct IO path to iomap (with some infrastructure tweaks associated with that) - Convert two boilerplate licenses in udf to SPDX identifiers - Other small udf, ext2, and quota fixes and cleanups * tag 'fs_for_v6.5-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/jack/linux-fs: udf: Fix uninitialized array access for some pathnames ext2: Drop fragment support quota: fix warning in dqgrab() quota: Properly disable quotas when add_dquot_ref() fails fs: udf: udftime: Replace LGPL boilerplate with SPDX identifier fs: udf: Replace GPL 2.0 boilerplate license notice with SPDX identifier fs: Drop wait_unfrozen wait queue ext2_find_entry()/ext2_dotdot(): callers don't need page_addr anymore ext2_{set_link,delete_entry}(): don't bother with page_addr ext2_put_page(): accept any pointer within the page ext2_get_page(): saner type ext2: use offset_in_page() instead of open-coding it as subtraction ext2_rename(): set_link and delete_entry may fail ext2: Add direct-io trace points ext2: Move direct-io to use iomap ext2: Use generic_buffers_fsync() implementation ext4: Use generic_buffers_fsync_noflush() implementation fs/buffer.c: Add generic_buffers_fsync*() implementation ext2/dax: Fix ext2_setsize when len is page aligned
Diffstat (limited to 'fs/ext2')
-rw-r--r--fs/ext2/Makefile5
-rw-r--r--fs/ext2/dir.c136
-rw-r--r--fs/ext2/ext2.h23
-rw-r--r--fs/ext2/file.c126
-rw-r--r--fs/ext2/inode.c58
-rw-r--r--fs/ext2/namei.c63
-rw-r--r--fs/ext2/super.c23
-rw-r--r--fs/ext2/trace.c6
-rw-r--r--fs/ext2/trace.h94
9 files changed, 356 insertions, 178 deletions
diff --git a/fs/ext2/Makefile b/fs/ext2/Makefile
index 311479d864a7..8860948ef9ca 100644
--- a/fs/ext2/Makefile
+++ b/fs/ext2/Makefile
@@ -6,7 +6,10 @@
obj-$(CONFIG_EXT2_FS) += ext2.o
ext2-y := balloc.o dir.o file.o ialloc.o inode.o \
- ioctl.o namei.o super.o symlink.o
+ ioctl.o namei.o super.o symlink.o trace.o
+
+# For tracepoints to include our trace.h from tracepoint infrastructure
+CFLAGS_trace.o := -I$(src)
ext2-$(CONFIG_EXT2_FS_XATTR) += xattr.o xattr_user.o xattr_trusted.o
ext2-$(CONFIG_EXT2_FS_POSIX_ACL) += acl.o
diff --git a/fs/ext2/dir.c b/fs/ext2/dir.c
index 4a6955a0a116..42db804794bd 100644
--- a/fs/ext2/dir.c
+++ b/fs/ext2/dir.c
@@ -186,23 +186,25 @@ fail:
* NOTE: ext2_find_entry() and ext2_dotdot() act as a call to ext2_get_page()
* and should be treated as a call to ext2_get_page() for nesting purposes.
*/
-static struct page * ext2_get_page(struct inode *dir, unsigned long n,
- int quiet, void **page_addr)
+static void *ext2_get_page(struct inode *dir, unsigned long n,
+ int quiet, struct page **page)
{
struct address_space *mapping = dir->i_mapping;
struct folio *folio = read_mapping_folio(mapping, n, NULL);
+ void *page_addr;
if (IS_ERR(folio))
- return &folio->page;
- *page_addr = kmap_local_folio(folio, n & (folio_nr_pages(folio) - 1));
+ return ERR_CAST(folio);
+ page_addr = kmap_local_folio(folio, n & (folio_nr_pages(folio) - 1));
if (unlikely(!folio_test_checked(folio))) {
- if (!ext2_check_page(&folio->page, quiet, *page_addr))
+ if (!ext2_check_page(&folio->page, quiet, page_addr))
goto fail;
}
- return &folio->page;
+ *page = &folio->page;
+ return page_addr;
fail:
- ext2_put_page(&folio->page, *page_addr);
+ ext2_put_page(&folio->page, page_addr);
return ERR_PTR(-EIO);
}
@@ -240,7 +242,7 @@ ext2_validate_entry(char *base, unsigned offset, unsigned mask)
break;
p = ext2_next_entry(p);
}
- return (char *)p - base;
+ return offset_in_page(p);
}
static inline void ext2_set_de_type(ext2_dirent *de, struct inode *inode)
@@ -271,16 +273,17 @@ ext2_readdir(struct file *file, struct dir_context *ctx)
EXT2_HAS_INCOMPAT_FEATURE(sb, EXT2_FEATURE_INCOMPAT_FILETYPE);
for ( ; n < npages; n++, offset = 0) {
- char *kaddr, *limit;
ext2_dirent *de;
- struct page *page = ext2_get_page(inode, n, 0, (void **)&kaddr);
+ struct page *page;
+ char *kaddr = ext2_get_page(inode, n, 0, &page);
+ char *limit;
- if (IS_ERR(page)) {
+ if (IS_ERR(kaddr)) {
ext2_error(sb, __func__,
"bad page in #%lu",
inode->i_ino);
ctx->pos += PAGE_SIZE - offset;
- return PTR_ERR(page);
+ return PTR_ERR(kaddr);
}
if (unlikely(need_revalidate)) {
if (offset) {
@@ -296,7 +299,7 @@ ext2_readdir(struct file *file, struct dir_context *ctx)
if (de->rec_len == 0) {
ext2_error(sb, __func__,
"zero-length directory entry");
- ext2_put_page(page, kaddr);
+ ext2_put_page(page, de);
return -EIO;
}
if (de->inode) {
@@ -308,7 +311,7 @@ ext2_readdir(struct file *file, struct dir_context *ctx)
if (!dir_emit(ctx, de->name, de->name_len,
le32_to_cpu(de->inode),
d_type)) {
- ext2_put_page(page, kaddr);
+ ext2_put_page(page, de);
return 0;
}
}
@@ -336,8 +339,7 @@ ext2_readdir(struct file *file, struct dir_context *ctx)
* should be treated as a call to ext2_get_page() for nesting purposes.
*/
struct ext2_dir_entry_2 *ext2_find_entry (struct inode *dir,
- const struct qstr *child, struct page **res_page,
- void **res_page_addr)
+ const struct qstr *child, struct page **res_page)
{
const char *name = child->name;
int namelen = child->len;
@@ -347,40 +349,36 @@ struct ext2_dir_entry_2 *ext2_find_entry (struct inode *dir,
struct page *page = NULL;
struct ext2_inode_info *ei = EXT2_I(dir);
ext2_dirent * de;
- void *page_addr;
if (npages == 0)
goto out;
/* OFFSET_CACHE */
*res_page = NULL;
- *res_page_addr = NULL;
start = ei->i_dir_start_lookup;
if (start >= npages)
start = 0;
n = start;
do {
- char *kaddr;
- page = ext2_get_page(dir, n, 0, &page_addr);
- if (IS_ERR(page))
- return ERR_CAST(page);
+ char *kaddr = ext2_get_page(dir, n, 0, &page);
+ if (IS_ERR(kaddr))
+ return ERR_CAST(kaddr);
- kaddr = page_addr;
de = (ext2_dirent *) kaddr;
kaddr += ext2_last_byte(dir, n) - reclen;
while ((char *) de <= kaddr) {
if (de->rec_len == 0) {
ext2_error(dir->i_sb, __func__,
"zero-length directory entry");
- ext2_put_page(page, page_addr);
+ ext2_put_page(page, de);
goto out;
}
if (ext2_match(namelen, name, de))
goto found;
de = ext2_next_entry(de);
}
- ext2_put_page(page, page_addr);
+ ext2_put_page(page, kaddr);
if (++n >= npages)
n = 0;
@@ -398,7 +396,6 @@ out:
found:
*res_page = page;
- *res_page_addr = page_addr;
ei->i_dir_start_lookup = n;
return de;
}
@@ -415,33 +412,26 @@ found:
* ext2_find_entry() and ext2_dotdot() act as a call to ext2_get_page() and
* should be treated as a call to ext2_get_page() for nesting purposes.
*/
-struct ext2_dir_entry_2 *ext2_dotdot(struct inode *dir, struct page **p,
- void **pa)
+struct ext2_dir_entry_2 *ext2_dotdot(struct inode *dir, struct page **p)
{
- void *page_addr;
- struct page *page = ext2_get_page(dir, 0, 0, &page_addr);
- ext2_dirent *de = NULL;
+ ext2_dirent *de = ext2_get_page(dir, 0, 0, p);
- if (!IS_ERR(page)) {
- de = ext2_next_entry((ext2_dirent *) page_addr);
- *p = page;
- *pa = page_addr;
- }
- return de;
+ if (!IS_ERR(de))
+ return ext2_next_entry(de);
+ return NULL;
}
int ext2_inode_by_name(struct inode *dir, const struct qstr *child, ino_t *ino)
{
struct ext2_dir_entry_2 *de;
struct page *page;
- void *page_addr;
- de = ext2_find_entry(dir, child, &page, &page_addr);
+ de = ext2_find_entry(dir, child, &page);
if (IS_ERR(de))
return PTR_ERR(de);
*ino = le32_to_cpu(de->inode);
- ext2_put_page(page, page_addr);
+ ext2_put_page(page, de);
return 0;
}
@@ -462,11 +452,9 @@ static int ext2_handle_dirsync(struct inode *dir)
}
int ext2_set_link(struct inode *dir, struct ext2_dir_entry_2 *de,
- struct page *page, void *page_addr, struct inode *inode,
- bool update_times)
+ struct page *page, struct inode *inode, bool update_times)
{
- loff_t pos = page_offset(page) +
- (char *) de - (char *) page_addr;
+ loff_t pos = page_offset(page) + offset_in_page(de);
unsigned len = ext2_rec_len_from_disk(de->rec_len);
int err;
@@ -498,7 +486,6 @@ int ext2_add_link (struct dentry *dentry, struct inode *inode)
unsigned reclen = EXT2_DIR_REC_LEN(namelen);
unsigned short rec_len, name_len;
struct page *page = NULL;
- void *page_addr = NULL;
ext2_dirent * de;
unsigned long npages = dir_pages(dir);
unsigned long n;
@@ -511,15 +498,12 @@ int ext2_add_link (struct dentry *dentry, struct inode *inode)
* to protect that region.
*/
for (n = 0; n <= npages; n++) {
- char *kaddr;
+ char *kaddr = ext2_get_page(dir, n, 0, &page);
char *dir_end;
- page = ext2_get_page(dir, n, 0, &page_addr);
- err = PTR_ERR(page);
- if (IS_ERR(page))
- goto out;
+ if (IS_ERR(kaddr))
+ return PTR_ERR(kaddr);
lock_page(page);
- kaddr = page_addr;
dir_end = kaddr + ext2_last_byte(dir, n);
de = (ext2_dirent *)kaddr;
kaddr += PAGE_SIZE - reclen;
@@ -550,14 +534,13 @@ int ext2_add_link (struct dentry *dentry, struct inode *inode)
de = (ext2_dirent *) ((char *) de + rec_len);
}
unlock_page(page);
- ext2_put_page(page, page_addr);
+ ext2_put_page(page, kaddr);
}
BUG();
return -EINVAL;
got_it:
- pos = page_offset(page) +
- (char *)de - (char *)page_addr;
+ pos = page_offset(page) + offset_in_page(de);
err = ext2_prepare_chunk(page, pos, rec_len);
if (err)
goto out_unlock;
@@ -578,8 +561,7 @@ got_it:
err = ext2_handle_dirsync(dir);
/* OFFSET_CACHE */
out_put:
- ext2_put_page(page, page_addr);
-out:
+ ext2_put_page(page, de);
return err;
out_unlock:
unlock_page(page);
@@ -590,34 +572,36 @@ out_unlock:
* ext2_delete_entry deletes a directory entry by merging it with the
* previous entry. Page is up-to-date.
*/
-int ext2_delete_entry (struct ext2_dir_entry_2 *dir, struct page *page,
- char *kaddr)
+int ext2_delete_entry(struct ext2_dir_entry_2 *dir, struct page *page)
{
struct inode *inode = page->mapping->host;
- unsigned from = ((char*)dir - kaddr) & ~(ext2_chunk_size(inode)-1);
- unsigned to = ((char *)dir - kaddr) +
+ char *kaddr = (char *)((unsigned long)dir & PAGE_MASK);
+ unsigned from = offset_in_page(dir) & ~(ext2_chunk_size(inode)-1);
+ unsigned to = offset_in_page(dir) +
ext2_rec_len_from_disk(dir->rec_len);
loff_t pos;
- ext2_dirent * pde = NULL;
- ext2_dirent * de = (ext2_dirent *) (kaddr + from);
+ ext2_dirent *pde = NULL;
+ ext2_dirent *de = (ext2_dirent *)(kaddr + from);
int err;
while ((char*)de < (char*)dir) {
if (de->rec_len == 0) {
ext2_error(inode->i_sb, __func__,
"zero-length directory entry");
- err = -EIO;
- goto out;
+ return -EIO;
}
pde = de;
de = ext2_next_entry(de);
}
if (pde)
- from = (char *)pde - kaddr;
+ from = offset_in_page(pde);
pos = page_offset(page) + from;
lock_page(page);
err = ext2_prepare_chunk(page, pos, to - from);
- BUG_ON(err);
+ if (err) {
+ unlock_page(page);
+ return err;
+ }
if (pde)
pde->rec_len = ext2_rec_len_to_disk(to - from);
dir->inode = 0;
@@ -625,9 +609,7 @@ int ext2_delete_entry (struct ext2_dir_entry_2 *dir, struct page *page,
inode->i_ctime = inode->i_mtime = current_time(inode);
EXT2_I(inode)->i_flags &= ~EXT2_BTREE_FL;
mark_inode_dirty(inode);
- err = ext2_handle_dirsync(inode);
-out:
- return err;
+ return ext2_handle_dirsync(inode);
}
/*
@@ -677,19 +659,17 @@ fail:
*/
int ext2_empty_dir (struct inode * inode)
{
- void *page_addr = NULL;
- struct page *page = NULL;
+ struct page *page;
+ char *kaddr;
unsigned long i, npages = dir_pages(inode);
for (i = 0; i < npages; i++) {
- char *kaddr;
- ext2_dirent * de;
- page = ext2_get_page(inode, i, 0, &page_addr);
+ ext2_dirent *de;
- if (IS_ERR(page))
+ kaddr = ext2_get_page(inode, i, 0, &page);
+ if (IS_ERR(kaddr))
return 0;
- kaddr = page_addr;
de = (ext2_dirent *)kaddr;
kaddr += ext2_last_byte(inode, i) - EXT2_DIR_REC_LEN(1);
@@ -715,12 +695,12 @@ int ext2_empty_dir (struct inode * inode)
}
de = ext2_next_entry(de);
}
- ext2_put_page(page, page_addr);
+ ext2_put_page(page, kaddr);
}
return 1;
not_empty:
- ext2_put_page(page, page_addr);
+ ext2_put_page(page, kaddr);
return 0;
}
diff --git a/fs/ext2/ext2.h b/fs/ext2/ext2.h
index 8244366862e4..35a041c47c38 100644
--- a/fs/ext2/ext2.h
+++ b/fs/ext2/ext2.h
@@ -70,10 +70,7 @@ struct mb_cache;
* second extended-fs super-block data in memory
*/
struct ext2_sb_info {
- unsigned long s_frag_size; /* Size of a fragment in bytes */
- unsigned long s_frags_per_block;/* Number of fragments per block */
unsigned long s_inodes_per_block;/* Number of inodes per block */
- unsigned long s_frags_per_group;/* Number of fragments in a group */
unsigned long s_blocks_per_group;/* Number of blocks in a group */
unsigned long s_inodes_per_group;/* Number of inodes in a group */
unsigned long s_itb_per_group; /* Number of inode table blocks per group */
@@ -189,15 +186,6 @@ static inline struct ext2_sb_info *EXT2_SB(struct super_block *sb)
#define EXT2_FIRST_INO(s) (EXT2_SB(s)->s_first_ino)
/*
- * Macro-instructions used to manage fragments
- */
-#define EXT2_MIN_FRAG_SIZE 1024
-#define EXT2_MAX_FRAG_SIZE 4096
-#define EXT2_MIN_FRAG_LOG_SIZE 10
-#define EXT2_FRAG_SIZE(s) (EXT2_SB(s)->s_frag_size)
-#define EXT2_FRAGS_PER_BLOCK(s) (EXT2_SB(s)->s_frags_per_block)
-
-/*
* Structure of a blocks group descriptor
*/
struct ext2_group_desc
@@ -730,14 +718,12 @@ extern int ext2_inode_by_name(struct inode *dir,
const struct qstr *child, ino_t *ino);
extern int ext2_make_empty(struct inode *, struct inode *);
extern struct ext2_dir_entry_2 *ext2_find_entry(struct inode *, const struct qstr *,
- struct page **, void **res_page_addr);
-extern int ext2_delete_entry(struct ext2_dir_entry_2 *dir, struct page *page,
- char *kaddr);
+ struct page **);
+extern int ext2_delete_entry(struct ext2_dir_entry_2 *dir, struct page *page);
extern int ext2_empty_dir (struct inode *);
-extern struct ext2_dir_entry_2 *ext2_dotdot(struct inode *dir, struct page **p, void **pa);
+extern struct ext2_dir_entry_2 *ext2_dotdot(struct inode *dir, struct page **p);
int ext2_set_link(struct inode *dir, struct ext2_dir_entry_2 *de,
- struct page *page, void *page_addr, struct inode *inode,
- bool update_times);
+ struct page *page, struct inode *inode, bool update_times);
static inline void ext2_put_page(struct page *page, void *page_addr)
{
kunmap_local(page_addr);
@@ -754,6 +740,7 @@ extern unsigned long ext2_count_free (struct buffer_head *, unsigned);
extern struct inode *ext2_iget (struct super_block *, unsigned long);
extern int ext2_write_inode (struct inode *, struct writeback_control *);
extern void ext2_evict_inode(struct inode *);
+void ext2_write_failed(struct address_space *mapping, loff_t to);
extern int ext2_get_block(struct inode *, sector_t, struct buffer_head *, int);
extern int ext2_setattr (struct mnt_idmap *, struct dentry *, struct iattr *);
extern int ext2_getattr (struct mnt_idmap *, const struct path *,
diff --git a/fs/ext2/file.c b/fs/ext2/file.c
index d1ae0f0a3726..0b4c91c62e1f 100644
--- a/fs/ext2/file.c
+++ b/fs/ext2/file.c
@@ -25,9 +25,11 @@
#include <linux/quotaops.h>
#include <linux/iomap.h>
#include <linux/uio.h>
+#include <linux/buffer_head.h>
#include "ext2.h"
#include "xattr.h"
#include "acl.h"
+#include "trace.h"
#ifdef CONFIG_FS_DAX
static ssize_t ext2_dax_read_iter(struct kiocb *iocb, struct iov_iter *to)
@@ -153,7 +155,7 @@ int ext2_fsync(struct file *file, loff_t start, loff_t end, int datasync)
int ret;
struct super_block *sb = file->f_mapping->host->i_sb;
- ret = generic_file_fsync(file, start, end, datasync);
+ ret = generic_buffers_fsync(file, start, end, datasync);
if (ret == -EIO)
/* We don't really know where the IO error happened... */
ext2_error(sb, __func__,
@@ -161,12 +163,131 @@ int ext2_fsync(struct file *file, loff_t start, loff_t end, int datasync)
return ret;
}
+static ssize_t ext2_dio_read_iter(struct kiocb *iocb, struct iov_iter *to)
+{
+ struct file *file = iocb->ki_filp;
+ struct inode *inode = file->f_mapping->host;
+ ssize_t ret;
+
+ trace_ext2_dio_read_begin(iocb, to, 0);
+ inode_lock_shared(inode);
+ ret = iomap_dio_rw(iocb, to, &ext2_iomap_ops, NULL, 0, NULL, 0);
+ inode_unlock_shared(inode);
+ trace_ext2_dio_read_end(iocb, to, ret);
+
+ return ret;
+}
+
+static int ext2_dio_write_end_io(struct kiocb *iocb, ssize_t size,
+ int error, unsigned int flags)
+{
+ loff_t pos = iocb->ki_pos;
+ struct inode *inode = file_inode(iocb->ki_filp);
+
+ if (error)
+ goto out;
+
+ /*
+ * If we are extending the file, we have to update i_size here before
+ * page cache gets invalidated in iomap_dio_rw(). This prevents racing
+ * buffered reads from zeroing out too much from page cache pages.
+ * Note that all extending writes always happens synchronously with
+ * inode lock held by ext2_dio_write_iter(). So it is safe to update
+ * inode size here for extending file writes.
+ */
+ pos += size;
+ if (pos > i_size_read(inode)) {
+ i_size_write(inode, pos);
+ mark_inode_dirty(inode);
+ }
+out:
+ trace_ext2_dio_write_endio(iocb, size, error);
+ return error;
+}
+
+static const struct iomap_dio_ops ext2_dio_write_ops = {
+ .end_io = ext2_dio_write_end_io,
+};
+
+static ssize_t ext2_dio_write_iter(struct kiocb *iocb, struct iov_iter *from)
+{
+ struct file *file = iocb->ki_filp;
+ struct inode *inode = file->f_mapping->host;
+ ssize_t ret;
+ unsigned int flags = 0;
+ unsigned long blocksize = inode->i_sb->s_blocksize;
+ loff_t offset = iocb->ki_pos;
+ loff_t count = iov_iter_count(from);
+ ssize_t status = 0;
+
+ trace_ext2_dio_write_begin(iocb, from, 0);
+ inode_lock(inode);
+ ret = generic_write_checks(iocb, from);
+ if (ret <= 0)
+ goto out_unlock;
+
+ ret = kiocb_modified(iocb);
+ if (ret)
+ goto out_unlock;
+
+ /* use IOMAP_DIO_FORCE_WAIT for unaligned or extending writes */
+ if (iocb->ki_pos + iov_iter_count(from) > i_size_read(inode) ||
+ (!IS_ALIGNED(iocb->ki_pos | iov_iter_alignment(from), blocksize)))
+ flags |= IOMAP_DIO_FORCE_WAIT;
+
+ ret = iomap_dio_rw(iocb, from, &ext2_iomap_ops, &ext2_dio_write_ops,
+ flags, NULL, 0);
+
+ /* ENOTBLK is magic return value for fallback to buffered-io */
+ if (ret == -ENOTBLK)
+ ret = 0;
+
+ if (ret < 0 && ret != -EIOCBQUEUED)
+ ext2_write_failed(inode->i_mapping, offset + count);
+
+ /* handle case for partial write and for fallback to buffered write */
+ if (ret >= 0 && iov_iter_count(from)) {
+ loff_t pos, endbyte;
+ int ret2;
+
+ iocb->ki_flags &= ~IOCB_DIRECT;
+ pos = iocb->ki_pos;
+ status = generic_perform_write(iocb, from);
+ if (unlikely(status < 0)) {
+ ret = status;
+ goto out_unlock;
+ }
+
+ iocb->ki_pos += status;
+ ret += status;
+ endbyte = pos + status - 1;
+ ret2 = filemap_write_and_wait_range(inode->i_mapping, pos,
+ endbyte);
+ if (!ret2)
+ invalidate_mapping_pages(inode->i_mapping,
+ pos >> PAGE_SHIFT,
+ endbyte >> PAGE_SHIFT);
+ if (ret > 0)
+ generic_write_sync(iocb, ret);
+ }
+
+out_unlock:
+ inode_unlock(inode);
+ if (status)
+ trace_ext2_dio_write_buff_end(iocb, from, status);
+ trace_ext2_dio_write_end(iocb, from, ret);
+ return ret;
+}
+
static ssize_t ext2_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
{
#ifdef CONFIG_FS_DAX
if (IS_DAX(iocb->ki_filp->f_mapping->host))
return ext2_dax_read_iter(iocb, to);
#endif
+ if (iocb->ki_flags & IOCB_DIRECT)
+ return ext2_dio_read_iter(iocb, to);
+
return generic_file_read_iter(iocb, to);
}
@@ -176,6 +297,9 @@ static ssize_t ext2_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
if (IS_DAX(iocb->ki_filp->f_mapping->host))
return ext2_dax_write_iter(iocb, from);
#endif
+ if (iocb->ki_flags & IOCB_DIRECT)
+ return ext2_dio_write_iter(iocb, from);
+
return generic_file_write_iter(iocb, from);
}
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
index 26f135e7ffce..75983215c7a1 100644
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -56,7 +56,7 @@ static inline int ext2_inode_is_fast_symlink(struct inode *inode)
static void ext2_truncate_blocks(struct inode *inode, loff_t offset);
-static void ext2_write_failed(struct address_space *mapping, loff_t to)
+void ext2_write_failed(struct address_space *mapping, loff_t to)
{
struct inode *inode = mapping->host;
@@ -809,9 +809,27 @@ static int ext2_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
bool new = false, boundary = false;
u32 bno;
int ret;
+ bool create = flags & IOMAP_WRITE;
+
+ /*
+ * For writes that could fill holes inside i_size on a
+ * DIO_SKIP_HOLES filesystem we forbid block creations: only
+ * overwrites are permitted.
+ */
+ if ((flags & IOMAP_DIRECT) &&
+ (first_block << blkbits) < i_size_read(inode))
+ create = 0;
+
+ /*
+ * Writes that span EOF might trigger an IO size update on completion,
+ * so consider them to be dirty for the purposes of O_DSYNC even if
+ * there is no other metadata changes pending or have been made here.
+ */
+ if ((flags & IOMAP_WRITE) && offset + length > i_size_read(inode))
+ iomap->flags |= IOMAP_F_DIRTY;
ret = ext2_get_blocks(inode, first_block, max_blocks,
- &bno, &new, &boundary, flags & IOMAP_WRITE);
+ &bno, &new, &boundary, create);
if (ret < 0)
return ret;
@@ -823,6 +841,12 @@ static int ext2_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
iomap->bdev = inode->i_sb->s_bdev;
if (ret == 0) {
+ /*
+ * Switch to buffered-io for writing to holes in a non-extent
+ * based filesystem to avoid stale data exposure problem.
+ */
+ if (!create && (flags & IOMAP_WRITE) && (flags & IOMAP_DIRECT))
+ return -ENOTBLK;
iomap->type = IOMAP_HOLE;
iomap->addr = IOMAP_NULL_ADDR;
iomap->length = 1 << blkbits;
@@ -844,6 +868,13 @@ static int
ext2_iomap_end(struct inode *inode, loff_t offset, loff_t length,
ssize_t written, unsigned flags, struct iomap *iomap)
{
+ /*
+ * Switch to buffered-io in case of any error.
+ * Blocks allocated can be used by the buffered-io path.
+ */
+ if ((flags & IOMAP_DIRECT) && (flags & IOMAP_WRITE) && written == 0)
+ return -ENOTBLK;
+
if (iomap->type == IOMAP_MAPPED &&
written < length &&
(flags & IOMAP_WRITE))
@@ -908,22 +939,6 @@ static sector_t ext2_bmap(struct address_space *mapping, sector_t block)
return generic_block_bmap(mapping,block,ext2_get_block);
}
-static ssize_t
-ext2_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
-{
- struct file *file = iocb->ki_filp;
- struct address_space *mapping = file->f_mapping;
- struct inode *inode = mapping->host;
- size_t count = iov_iter_count(iter);
- loff_t offset = iocb->ki_pos;
- ssize_t ret;
-
- ret = blockdev_direct_IO(iocb, inode, iter, ext2_get_block);
- if (ret < 0 && iov_iter_rw(iter) == WRITE)
- ext2_write_failed(mapping, offset + count);
- return ret;
-}
-
static int
ext2_writepages(struct address_space *mapping, struct writeback_control *wbc)
{
@@ -946,7 +961,7 @@ const struct address_space_operations ext2_aops = {
.write_begin = ext2_write_begin,
.write_end = ext2_write_end,
.bmap = ext2_bmap,
- .direct_IO = ext2_direct_IO,
+ .direct_IO = noop_direct_IO,
.writepages = ext2_writepages,
.migrate_folio = buffer_migrate_folio,
.is_partially_uptodate = block_is_partially_uptodate,
@@ -1259,9 +1274,8 @@ static int ext2_setsize(struct inode *inode, loff_t newsize)
inode_dio_wait(inode);
if (IS_DAX(inode))
- error = dax_zero_range(inode, newsize,
- PAGE_ALIGN(newsize) - newsize, NULL,
- &ext2_iomap_ops);
+ error = dax_truncate_page(inode, newsize, NULL,
+ &ext2_iomap_ops);
else
error = block_truncate_page(inode->i_mapping,
newsize, ext2_get_block);
diff --git a/fs/ext2/namei.c b/fs/ext2/namei.c
index 7f5dfa87cc95..937dd8f60f96 100644
--- a/fs/ext2/namei.c
+++ b/fs/ext2/namei.c
@@ -269,26 +269,25 @@ out_dir:
goto out;
}
-static int ext2_unlink(struct inode * dir, struct dentry *dentry)
+static int ext2_unlink(struct inode *dir, struct dentry *dentry)
{
- struct inode * inode = d_inode(dentry);
- struct ext2_dir_entry_2 * de;
- struct page * page;
- void *page_addr;
+ struct inode *inode = d_inode(dentry);
+ struct ext2_dir_entry_2 *de;
+ struct page *page;
int err;
err = dquot_initialize(dir);
if (err)
goto out;
- de = ext2_find_entry(dir, &dentry->d_name, &page, &page_addr);
+ de = ext2_find_entry(dir, &dentry->d_name, &page);
if (IS_ERR(de)) {
err = PTR_ERR(de);
goto out;
}
- err = ext2_delete_entry (de, page, page_addr);
- ext2_put_page(page, page_addr);
+ err = ext2_delete_entry(de, page);
+ ext2_put_page(page, de);
if (err)
goto out;
@@ -323,10 +322,8 @@ static int ext2_rename (struct mnt_idmap * idmap,
struct inode * old_inode = d_inode(old_dentry);
struct inode * new_inode = d_inode(new_dentry);
struct page * dir_page = NULL;
- void *dir_page_addr;
struct ext2_dir_entry_2 * dir_de = NULL;
struct page * old_page;
- void *old_page_addr;
struct ext2_dir_entry_2 * old_de;
int err;
@@ -335,28 +332,24 @@ static int ext2_rename (struct mnt_idmap * idmap,
err = dquot_initialize(old_dir);
if (err)
- goto out;
+ return err;
err = dquot_initialize(new_dir);
if (err)
- goto out;
+ return err;
- old_de = ext2_find_entry(old_dir, &old_dentry->d_name, &old_page,
- &old_page_addr);
- if (IS_ERR(old_de)) {
- err = PTR_ERR(old_de);
- goto out;
- }
+ old_de = ext2_find_entry(old_dir, &old_dentry->d_name, &old_page);
+ if (IS_ERR(old_de))
+ return PTR_ERR(old_de);
if (S_ISDIR(old_inode->i_mode)) {
err = -EIO;
- dir_de = ext2_dotdot(old_inode, &dir_page, &dir_page_addr);
+ dir_de = ext2_dotdot(old_inode, &dir_page);
if (!dir_de)
goto out_old;
}
if (new_inode) {
- void *page_addr;
struct page *new_page;
struct ext2_dir_entry_2 *new_de;
@@ -365,14 +358,13 @@ static int ext2_rename (struct mnt_idmap * idmap,
goto out_dir;
new_de = ext2_find_entry(new_dir, &new_dentry->d_name,
- &new_page, &page_addr);
+ &new_page);
if (IS_ERR(new_de)) {
err = PTR_ERR(new_de);
goto out_dir;
}
- err = ext2_set_link(new_dir, new_de, new_page, page_addr,
- old_inode, true);
- ext2_put_page(new_page, page_addr);
+ err = ext2_set_link(new_dir, new_de, new_page, old_inode, true);
+ ext2_put_page(new_page, new_de);
if (err)
goto out_dir;
new_inode->i_ctime = current_time(new_inode);
@@ -394,27 +386,20 @@ static int ext2_rename (struct mnt_idmap * idmap,
old_inode->i_ctime = current_time(old_inode);
mark_inode_dirty(old_inode);
- ext2_delete_entry(old_de, old_page, old_page_addr);
-
- if (dir_de) {
- if (old_dir != new_dir) {
+ err = ext2_delete_entry(old_de, old_page);
+ if (!err && dir_de) {
+ if (old_dir != new_dir)
err = ext2_set_link(old_inode, dir_de, dir_page,
- dir_page_addr, new_dir, false);
+ new_dir, false);
- }
- ext2_put_page(dir_page, dir_page_addr);
inode_dec_link_count(old_dir);
}
-
-out_old:
- ext2_put_page(old_page, old_page_addr);
-out:
- return err;
-
out_dir:
if (dir_de)
- ext2_put_page(dir_page, dir_page_addr);
- goto out_old;
+ ext2_put_page(dir_page, dir_de);
+out_old:
+ ext2_put_page(old_page, old_de);
+ return err;
}
const struct inode_operations ext2_dir_inode_operations = {
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index f342f347a695..2959afc7541c 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -668,10 +668,9 @@ static int ext2_setup_super (struct super_block * sb,
es->s_max_mnt_count = cpu_to_le16(EXT2_DFL_MAX_MNT_COUNT);
le16_add_cpu(&es->s_mnt_count, 1);
if (test_opt (sb, DEBUG))
- ext2_msg(sb, KERN_INFO, "%s, %s, bs=%lu, fs=%lu, gc=%lu, "
+ ext2_msg(sb, KERN_INFO, "%s, %s, bs=%lu, gc=%lu, "
"bpg=%lu, ipg=%lu, mo=%04lx]",
EXT2FS_VERSION, EXT2FS_DATE, sb->s_blocksize,
- sbi->s_frag_size,
sbi->s_groups_count,
EXT2_BLOCKS_PER_GROUP(sb),
EXT2_INODES_PER_GROUP(sb),
@@ -1012,14 +1011,7 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent)
}
}
- sbi->s_frag_size = EXT2_MIN_FRAG_SIZE <<
- le32_to_cpu(es->s_log_frag_size);
- if (sbi->s_frag_size == 0)
- goto cantfind_ext2;
- sbi->s_frags_per_block = sb->s_blocksize / sbi->s_frag_size;
-
sbi->s_blocks_per_group = le32_to_cpu(es->s_blocks_per_group);
- sbi->s_frags_per_group = le32_to_cpu(es->s_frags_per_group);
sbi->s_inodes_per_group = le32_to_cpu(es->s_inodes_per_group);
sbi->s_inodes_per_block = sb->s_blocksize / EXT2_INODE_SIZE(sb);
@@ -1045,11 +1037,10 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent)
goto failed_mount;
}
- if (sb->s_blocksize != sbi->s_frag_size) {
+ if (es->s_log_frag_size != es->s_log_block_size) {
ext2_msg(sb, KERN_ERR,
- "error: fragsize %lu != blocksize %lu"
- "(not supported yet)",
- sbi->s_frag_size, sb->s_blocksize);
+ "error: fragsize log %u != blocksize log %u",
+ le32_to_cpu(es->s_log_frag_size), sb->s_blocksize_bits);
goto failed_mount;
}
@@ -1066,12 +1057,6 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent)
sbi->s_blocks_per_group, sbi->s_inodes_per_group + 3);
goto failed_mount;
}
- if (sbi->s_frags_per_group > sb->s_blocksize * 8) {
- ext2_msg(sb, KERN_ERR,
- "error: #fragments per group too big: %lu",
- sbi->s_frags_per_group);
- goto failed_mount;
- }
if (sbi->s_inodes_per_group < sbi->s_inodes_per_block ||
sbi->s_inodes_per_group > sb->s_blocksize * 8) {
ext2_msg(sb, KERN_ERR,
diff --git a/fs/ext2/trace.c b/fs/ext2/trace.c
new file mode 100644
index 000000000000..b01cdf6526fd
--- /dev/null
+++ b/fs/ext2/trace.c
@@ -0,0 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "ext2.h"
+#include <linux/uio.h>
+
+#define CREATE_TRACE_POINTS
+#include "trace.h"
diff --git a/fs/ext2/trace.h b/fs/ext2/trace.h
new file mode 100644
index 000000000000..7d230e13576e
--- /dev/null
+++ b/fs/ext2/trace.h
@@ -0,0 +1,94 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM ext2
+
+#if !defined(_EXT2_TRACE_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _EXT2_TRACE_H
+
+#include <linux/tracepoint.h>
+
+DECLARE_EVENT_CLASS(ext2_dio_class,
+ TP_PROTO(struct kiocb *iocb, struct iov_iter *iter, ssize_t ret),
+ TP_ARGS(iocb, iter, ret),
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(ino_t, ino)
+ __field(loff_t, isize)
+ __field(loff_t, pos)
+ __field(size_t, count)
+ __field(int, ki_flags)
+ __field(bool, aio)
+ __field(ssize_t, ret)
+ ),
+ TP_fast_assign(
+ __entry->dev = file_inode(iocb->ki_filp)->i_sb->s_dev;
+ __entry->ino = file_inode(iocb->ki_filp)->i_ino;
+ __entry->isize = file_inode(iocb->ki_filp)->i_size;
+ __entry->pos = iocb->ki_pos;
+ __entry->count = iov_iter_count(iter);
+ __entry->ki_flags = iocb->ki_flags;
+ __entry->aio = !is_sync_kiocb(iocb);
+ __entry->ret = ret;
+ ),
+ TP_printk("dev %d:%d ino 0x%lx isize 0x%llx pos 0x%llx len %zu flags %s aio %d ret %zd",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ __entry->ino,
+ __entry->isize,
+ __entry->pos,
+ __entry->count,
+ __print_flags(__entry->ki_flags, "|", TRACE_IOCB_STRINGS),
+ __entry->aio,
+ __entry->ret)
+);
+
+#define DEFINE_DIO_RW_EVENT(name) \
+DEFINE_EVENT(ext2_dio_class, name, \
+ TP_PROTO(struct kiocb *iocb, struct iov_iter *iter, ssize_t ret), \
+ TP_ARGS(iocb, iter, ret))
+DEFINE_DIO_RW_EVENT(ext2_dio_write_begin);
+DEFINE_DIO_RW_EVENT(ext2_dio_write_end);
+DEFINE_DIO_RW_EVENT(ext2_dio_write_buff_end);
+DEFINE_DIO_RW_EVENT(ext2_dio_read_begin);
+DEFINE_DIO_RW_EVENT(ext2_dio_read_end);
+
+TRACE_EVENT(ext2_dio_write_endio,
+ TP_PROTO(struct kiocb *iocb, ssize_t size, int ret),
+ TP_ARGS(iocb, size, ret),
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(ino_t, ino)
+ __field(loff_t, isize)
+ __field(loff_t, pos)
+ __field(ssize_t, size)
+ __field(int, ki_flags)
+ __field(bool, aio)
+ __field(int, ret)
+ ),
+ TP_fast_assign(
+ __entry->dev = file_inode(iocb->ki_filp)->i_sb->s_dev;
+ __entry->ino = file_inode(iocb->ki_filp)->i_ino;
+ __entry->isize = file_inode(iocb->ki_filp)->i_size;
+ __entry->pos = iocb->ki_pos;
+ __entry->size = size;
+ __entry->ki_flags = iocb->ki_flags;
+ __entry->aio = !is_sync_kiocb(iocb);
+ __entry->ret = ret;
+ ),
+ TP_printk("dev %d:%d ino 0x%lx isize 0x%llx pos 0x%llx len %zd flags %s aio %d ret %d",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ __entry->ino,
+ __entry->isize,
+ __entry->pos,
+ __entry->size,
+ __print_flags(__entry->ki_flags, "|", TRACE_IOCB_STRINGS),
+ __entry->aio,
+ __entry->ret)
+);
+
+#endif /* _EXT2_TRACE_H */
+
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH .
+#define TRACE_INCLUDE_FILE trace
+#include <trace/define_trace.h>