diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2023-06-26 12:47:20 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2023-06-26 12:47:20 -0700 |
commit | a0433f8cae3ac51f59b4b1863032822aaa2d8164 (patch) | |
tree | 9eb7b096aa9f7fa53921e6ff247488f3a55471f5 /fs | |
parent | 0aa69d53ac7c30f6184f88f2e310d808b32b35a5 (diff) | |
parent | fcaa174a9c995cf0af3967e55644a1543ea07e36 (diff) |
Merge tag 'for-6.5/block-2023-06-23' of git://git.kernel.dk/linux
Pull block updates from Jens Axboe:
- NVMe pull request via Keith:
- Various cleanups all around (Irvin, Chaitanya, Christophe)
- Better struct packing (Christophe JAILLET)
- Reduce controller error logs for optional commands (Keith)
- Support for >=64KiB block sizes (Daniel Gomez)
- Fabrics fixes and code organization (Max, Chaitanya, Daniel
Wagner)
- bcache updates via Coly:
- Fix a race at init time (Mingzhe Zou)
- Misc fixes and cleanups (Andrea, Thomas, Zheng, Ye)
- use page pinning in the block layer for dio (David)
- convert old block dio code to page pinning (David, Christoph)
- cleanups for pktcdvd (Andy)
- cleanups for rnbd (Guoqing)
- use the unchecked __bio_add_page() for the initial single page
additions (Johannes)
- fix overflows in the Amiga partition handling code (Michael)
- improve mq-deadline zoned device support (Bart)
- keep passthrough requests out of the IO schedulers (Christoph, Ming)
- improve support for flush requests, making them less special to deal
with (Christoph)
- add bdev holder ops and shutdown methods (Christoph)
- fix the name_to_dev_t() situation and use cases (Christoph)
- decouple the block open flags from fmode_t (Christoph)
- ublk updates and cleanups, including adding user copy support (Ming)
- BFQ sanity checking (Bart)
- convert brd from radix to xarray (Pankaj)
- constify various structures (Thomas, Ivan)
- more fine grained persistent reservation ioctl capability checks
(Jingbo)
- misc fixes and cleanups (Arnd, Azeem, Demi, Ed, Hengqi, Hou, Jan,
Jordy, Li, Min, Yu, Zhong, Waiman)
* tag 'for-6.5/block-2023-06-23' of git://git.kernel.dk/linux: (266 commits)
scsi/sg: don't grab scsi host module reference
ext4: Fix warning in blkdev_put()
block: don't return -EINVAL for not found names in devt_from_devname
cdrom: Fix spectre-v1 gadget
block: Improve kernel-doc headers
blk-mq: don't insert passthrough request into sw queue
bsg: make bsg_class a static const structure
ublk: make ublk_chr_class a static const structure
aoe: make aoe_class a static const structure
block/rnbd: make all 'class' structures const
block: fix the exclusive open mask in disk_scan_partitions
block: add overflow checks for Amiga partition support
block: change all __u32 annotations to __be32 in affs_hardblocks.h
block: fix signed int overflow in Amiga partition support
block: add capacity validation in bdev_add_partition()
block: fine-granular CAP_SYS_ADMIN for Persistent Reservation
block: disallow Persistent Reservation on partitions
reiserfs: fix blkdev_put() warning from release_journal_dev()
block: fix wrong mode for blkdev_get_by_dev() from disk_scan_partitions()
block: document the holder argument to blkdev_get_by_path
...
Diffstat (limited to 'fs')
-rw-r--r-- | fs/Makefile | 10 | ||||
-rw-r--r-- | fs/btrfs/dev-replace.c | 8 | ||||
-rw-r--r-- | fs/btrfs/ioctl.c | 12 | ||||
-rw-r--r-- | fs/btrfs/super.c | 21 | ||||
-rw-r--r-- | fs/btrfs/volumes.c | 59 | ||||
-rw-r--r-- | fs/btrfs/volumes.h | 11 | ||||
-rw-r--r-- | fs/buffer.c | 3 | ||||
-rw-r--r-- | fs/direct-io.c | 71 | ||||
-rw-r--r-- | fs/erofs/super.c | 7 | ||||
-rw-r--r-- | fs/ext4/ext4.h | 1 | ||||
-rw-r--r-- | fs/ext4/ioctl.c | 24 | ||||
-rw-r--r-- | fs/ext4/super.c | 27 | ||||
-rw-r--r-- | fs/f2fs/super.c | 12 | ||||
-rw-r--r-- | fs/gfs2/ops_fstype.c | 2 | ||||
-rw-r--r-- | fs/inode.c | 3 | ||||
-rw-r--r-- | fs/iomap/buffered-io.c | 6 | ||||
-rw-r--r-- | fs/iomap/direct-io.c | 1 | ||||
-rw-r--r-- | fs/jfs/jfs_logmgr.c | 12 | ||||
-rw-r--r-- | fs/nfs/blocklayout/dev.c | 10 | ||||
-rw-r--r-- | fs/nilfs2/super.c | 12 | ||||
-rw-r--r-- | fs/no-block.c | 19 | ||||
-rw-r--r-- | fs/ocfs2/cluster/heartbeat.c | 7 | ||||
-rw-r--r-- | fs/pstore/blk.c | 4 | ||||
-rw-r--r-- | fs/reiserfs/journal.c | 25 | ||||
-rw-r--r-- | fs/reiserfs/reiserfs.h | 1 | ||||
-rw-r--r-- | fs/splice.c | 15 | ||||
-rw-r--r-- | fs/super.c | 48 | ||||
-rw-r--r-- | fs/xfs/xfs_fsops.c | 3 | ||||
-rw-r--r-- | fs/xfs/xfs_mount.h | 4 | ||||
-rw-r--r-- | fs/xfs/xfs_super.c | 34 | ||||
-rw-r--r-- | fs/zonefs/super.c | 2 |
31 files changed, 254 insertions, 220 deletions
diff --git a/fs/Makefile b/fs/Makefile index 5bfdbf0d7037..e513aaee0603 100644 --- a/fs/Makefile +++ b/fs/Makefile @@ -17,14 +17,8 @@ obj-y := open.o read_write.o file_table.o super.o \ fs_types.o fs_context.o fs_parser.o fsopen.o init.o \ kernel_read_file.o mnt_idmapping.o remap_range.o -ifeq ($(CONFIG_BLOCK),y) -obj-y += buffer.o mpage.o -else -obj-y += no-block.o -endif - -obj-$(CONFIG_PROC_FS) += proc_namespace.o - +obj-$(CONFIG_BLOCK) += buffer.o mpage.o +obj-$(CONFIG_PROC_FS) += proc_namespace.o obj-$(CONFIG_LEGACY_DIRECT_IO) += direct-io.o obj-y += notify/ obj-$(CONFIG_EPOLL) += eventpoll.o diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c index 5e86bea0a950..5f10965fd72b 100644 --- a/fs/btrfs/dev-replace.c +++ b/fs/btrfs/dev-replace.c @@ -257,8 +257,8 @@ static int btrfs_init_dev_replace_tgtdev(struct btrfs_fs_info *fs_info, return -EINVAL; } - bdev = blkdev_get_by_path(device_path, FMODE_WRITE | FMODE_EXCL, - fs_info->bdev_holder); + bdev = blkdev_get_by_path(device_path, BLK_OPEN_WRITE, + fs_info->bdev_holder, NULL); if (IS_ERR(bdev)) { btrfs_err(fs_info, "target device %s is invalid!", device_path); return PTR_ERR(bdev); @@ -315,7 +315,7 @@ static int btrfs_init_dev_replace_tgtdev(struct btrfs_fs_info *fs_info, device->bdev = bdev; set_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &device->dev_state); set_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state); - device->mode = FMODE_EXCL; + device->holder = fs_info->bdev_holder; device->dev_stats_valid = 1; set_blocksize(device->bdev, BTRFS_BDEV_BLOCKSIZE); device->fs_devices = fs_devices; @@ -334,7 +334,7 @@ static int btrfs_init_dev_replace_tgtdev(struct btrfs_fs_info *fs_info, return 0; error: - blkdev_put(bdev, FMODE_EXCL); + blkdev_put(bdev, fs_info->bdev_holder); return ret; } diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index edbbd5cf23fc..a895d105464b 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -2671,7 +2671,7 @@ static long btrfs_ioctl_rm_dev_v2(struct file *file, void __user *arg) struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); struct btrfs_ioctl_vol_args_v2 *vol_args; struct block_device *bdev = NULL; - fmode_t mode; + void *holder; int ret; bool cancel = false; @@ -2708,7 +2708,7 @@ static long btrfs_ioctl_rm_dev_v2(struct file *file, void __user *arg) goto err_drop; /* Exclusive operation is now claimed */ - ret = btrfs_rm_device(fs_info, &args, &bdev, &mode); + ret = btrfs_rm_device(fs_info, &args, &bdev, &holder); btrfs_exclop_finish(fs_info); @@ -2723,7 +2723,7 @@ static long btrfs_ioctl_rm_dev_v2(struct file *file, void __user *arg) err_drop: mnt_drop_write_file(file); if (bdev) - blkdev_put(bdev, mode); + blkdev_put(bdev, holder); out: btrfs_put_dev_args_from_path(&args); kfree(vol_args); @@ -2737,7 +2737,7 @@ static long btrfs_ioctl_rm_dev(struct file *file, void __user *arg) struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); struct btrfs_ioctl_vol_args *vol_args; struct block_device *bdev = NULL; - fmode_t mode; + void *holder; int ret; bool cancel = false; @@ -2764,7 +2764,7 @@ static long btrfs_ioctl_rm_dev(struct file *file, void __user *arg) ret = exclop_start_or_cancel_reloc(fs_info, BTRFS_EXCLOP_DEV_REMOVE, cancel); if (ret == 0) { - ret = btrfs_rm_device(fs_info, &args, &bdev, &mode); + ret = btrfs_rm_device(fs_info, &args, &bdev, &holder); if (!ret) btrfs_info(fs_info, "disk deleted %s", vol_args->name); btrfs_exclop_finish(fs_info); @@ -2772,7 +2772,7 @@ static long btrfs_ioctl_rm_dev(struct file *file, void __user *arg) mnt_drop_write_file(file); if (bdev) - blkdev_put(bdev, mode); + blkdev_put(bdev, holder); out: btrfs_put_dev_args_from_path(&args); kfree(vol_args); diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 8b1c1225245e..f1dd172d8d5b 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -849,8 +849,7 @@ out: * All other options will be parsed on much later in the mount process and * only when we need to allocate a new super block. */ -static int btrfs_parse_device_options(const char *options, fmode_t flags, - void *holder) +static int btrfs_parse_device_options(const char *options, blk_mode_t flags) { substring_t args[MAX_OPT_ARGS]; char *device_name, *opts, *orig, *p; @@ -884,8 +883,7 @@ static int btrfs_parse_device_options(const char *options, fmode_t flags, error = -ENOMEM; goto out; } - device = btrfs_scan_one_device(device_name, flags, - holder); + device = btrfs_scan_one_device(device_name, flags); kfree(device_name); if (IS_ERR(device)) { error = PTR_ERR(device); @@ -1442,12 +1440,9 @@ static struct dentry *btrfs_mount_root(struct file_system_type *fs_type, struct btrfs_fs_devices *fs_devices = NULL; struct btrfs_fs_info *fs_info = NULL; void *new_sec_opts = NULL; - fmode_t mode = FMODE_READ; + blk_mode_t mode = sb_open_mode(flags); int error = 0; - if (!(flags & SB_RDONLY)) - mode |= FMODE_WRITE; - if (data) { error = security_sb_eat_lsm_opts(data, &new_sec_opts); if (error) @@ -1477,13 +1472,13 @@ static struct dentry *btrfs_mount_root(struct file_system_type *fs_type, } mutex_lock(&uuid_mutex); - error = btrfs_parse_device_options(data, mode, fs_type); + error = btrfs_parse_device_options(data, mode); if (error) { mutex_unlock(&uuid_mutex); goto error_fs_info; } - device = btrfs_scan_one_device(device_name, mode, fs_type); + device = btrfs_scan_one_device(device_name, mode); if (IS_ERR(device)) { mutex_unlock(&uuid_mutex); error = PTR_ERR(device); @@ -2195,8 +2190,7 @@ static long btrfs_control_ioctl(struct file *file, unsigned int cmd, switch (cmd) { case BTRFS_IOC_SCAN_DEV: mutex_lock(&uuid_mutex); - device = btrfs_scan_one_device(vol->name, FMODE_READ, - &btrfs_root_fs_type); + device = btrfs_scan_one_device(vol->name, BLK_OPEN_READ); ret = PTR_ERR_OR_ZERO(device); mutex_unlock(&uuid_mutex); break; @@ -2210,8 +2204,7 @@ static long btrfs_control_ioctl(struct file *file, unsigned int cmd, break; case BTRFS_IOC_DEVICES_READY: mutex_lock(&uuid_mutex); - device = btrfs_scan_one_device(vol->name, FMODE_READ, - &btrfs_root_fs_type); + device = btrfs_scan_one_device(vol->name, BLK_OPEN_READ); if (IS_ERR(device)) { mutex_unlock(&uuid_mutex); ret = PTR_ERR(device); diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 4193ace3fb5a..73f9ea7672db 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -510,13 +510,13 @@ static struct btrfs_fs_devices *find_fsid_with_metadata_uuid( static int -btrfs_get_bdev_and_sb(const char *device_path, fmode_t flags, void *holder, +btrfs_get_bdev_and_sb(const char *device_path, blk_mode_t flags, void *holder, int flush, struct block_device **bdev, struct btrfs_super_block **disk_super) { int ret; - *bdev = blkdev_get_by_path(device_path, flags, holder); + *bdev = blkdev_get_by_path(device_path, flags, holder, NULL); if (IS_ERR(*bdev)) { ret = PTR_ERR(*bdev); @@ -527,14 +527,14 @@ btrfs_get_bdev_and_sb(const char *device_path, fmode_t flags, void *holder, sync_blockdev(*bdev); ret = set_blocksize(*bdev, BTRFS_BDEV_BLOCKSIZE); if (ret) { - blkdev_put(*bdev, flags); + blkdev_put(*bdev, holder); goto error; } invalidate_bdev(*bdev); *disk_super = btrfs_read_dev_super(*bdev); if (IS_ERR(*disk_super)) { ret = PTR_ERR(*disk_super); - blkdev_put(*bdev, flags); + blkdev_put(*bdev, holder); goto error; } @@ -610,7 +610,7 @@ static int btrfs_free_stale_devices(dev_t devt, struct btrfs_device *skip_device * fs_devices->device_list_mutex here. */ static int btrfs_open_one_device(struct btrfs_fs_devices *fs_devices, - struct btrfs_device *device, fmode_t flags, + struct btrfs_device *device, blk_mode_t flags, void *holder) { struct block_device *bdev; @@ -662,7 +662,7 @@ static int btrfs_open_one_device(struct btrfs_fs_devices *fs_devices, device->bdev = bdev; clear_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &device->dev_state); - device->mode = flags; + device->holder = holder; fs_devices->open_devices++; if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state) && @@ -676,7 +676,7 @@ static int btrfs_open_one_device(struct btrfs_fs_devices *fs_devices, error_free_page: btrfs_release_disk_super(disk_super); - blkdev_put(bdev, flags); + blkdev_put(bdev, holder); return -EINVAL; } @@ -1067,7 +1067,7 @@ static void __btrfs_free_extra_devids(struct btrfs_fs_devices *fs_devices, continue; if (device->bdev) { - blkdev_put(device->bdev, device->mode); + blkdev_put(device->bdev, device->holder); device->bdev = NULL; fs_devices->open_devices--; } @@ -1113,7 +1113,7 @@ static void btrfs_close_bdev(struct btrfs_device *device) invalidate_bdev(device->bdev); } - blkdev_put(device->bdev, device->mode); + blkdev_put(device->bdev, device->holder); } static void btrfs_close_one_device(struct btrfs_device *device) @@ -1217,14 +1217,12 @@ void btrfs_close_devices(struct btrfs_fs_devices *fs_devices) } static int open_fs_devices(struct btrfs_fs_devices *fs_devices, - fmode_t flags, void *holder) + blk_mode_t flags, void *holder) { struct btrfs_device *device; struct btrfs_device *latest_dev = NULL; struct btrfs_device *tmp_device; - flags |= FMODE_EXCL; - list_for_each_entry_safe(device, tmp_device, &fs_devices->devices, dev_list) { int ret; @@ -1267,7 +1265,7 @@ static int devid_cmp(void *priv, const struct list_head *a, } int btrfs_open_devices(struct btrfs_fs_devices *fs_devices, - fmode_t flags, void *holder) + blk_mode_t flags, void *holder) { int ret; @@ -1358,8 +1356,7 @@ int btrfs_forget_devices(dev_t devt) * and we are not allowed to call set_blocksize during the scan. The superblock * is read via pagecache */ -struct btrfs_device *btrfs_scan_one_device(const char *path, fmode_t flags, - void *holder) +struct btrfs_device *btrfs_scan_one_device(const char *path, blk_mode_t flags) { struct btrfs_super_block *disk_super; bool new_device_added = false; @@ -1378,16 +1375,16 @@ struct btrfs_device *btrfs_scan_one_device(const char *path, fmode_t flags, */ /* - * Avoid using flag |= FMODE_EXCL here, as the systemd-udev may - * initiate the device scan which may race with the user's mount - * or mkfs command, resulting in failure. - * Since the device scan is solely for reading purposes, there is - * no need for FMODE_EXCL. Additionally, the devices are read again + * Avoid an exclusive open here, as the systemd-udev may initiate the + * device scan which may race with the user's mount or mkfs command, + * resulting in failure. + * Since the device scan is solely for reading purposes, there is no + * need for an exclusive open. Additionally, the devices are read again * during the mount process. It is ok to get some inconsistent * values temporarily, as the device paths of the fsid are the only * required information for assembling the volume. */ - bdev = blkdev_get_by_path(path, flags, holder); + bdev = blkdev_get_by_path(path, flags, NULL, NULL); if (IS_ERR(bdev)) return ERR_CAST(bdev); @@ -1411,7 +1408,7 @@ struct btrfs_device *btrfs_scan_one_device(const char *path, fmode_t flags, btrfs_release_disk_super(disk_super); error_bdev_put: - blkdev_put(bdev, flags); + blkdev_put(bdev, NULL); return device; } @@ -2098,7 +2095,7 @@ void btrfs_scratch_superblocks(struct btrfs_fs_info *fs_info, int btrfs_rm_device(struct btrfs_fs_info *fs_info, struct btrfs_dev_lookup_args *args, - struct block_device **bdev, fmode_t *mode) + struct block_device **bdev, void **holder) { struct btrfs_trans_handle *trans; struct btrfs_device *device; @@ -2237,7 +2234,7 @@ int btrfs_rm_device(struct btrfs_fs_info *fs_info, } *bdev = device->bdev; - *mode = device->mode; + *holder = device->holder; synchronize_rcu(); btrfs_free_device(device); @@ -2391,7 +2388,7 @@ int btrfs_get_dev_args_from_path(struct btrfs_fs_info *fs_info, return -ENOMEM; } - ret = btrfs_get_bdev_and_sb(path, FMODE_READ, fs_info->bdev_holder, 0, + ret = btrfs_get_bdev_and_sb(path, BLK_OPEN_READ, NULL, 0, &bdev, &disk_super); if (ret) { btrfs_put_dev_args_from_path(args); @@ -2405,7 +2402,7 @@ int btrfs_get_dev_args_from_path(struct btrfs_fs_info *fs_info, else memcpy(args->fsid, disk_super->fsid, BTRFS_FSID_SIZE); btrfs_release_disk_super(disk_super); - blkdev_put(bdev, FMODE_READ); + blkdev_put(bdev, NULL); return 0; } @@ -2638,8 +2635,8 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path if (sb_rdonly(sb) && !fs_devices->seeding) return -EROFS; - bdev = blkdev_get_by_path(device_path, FMODE_WRITE | FMODE_EXCL, - fs_info->bdev_holder); + bdev = blkdev_get_by_path(device_path, BLK_OPEN_WRITE, + fs_info->bdev_holder, NULL); if (IS_ERR(bdev)) return PTR_ERR(bdev); @@ -2701,7 +2698,7 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path device->commit_total_bytes = device->total_bytes; set_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &device->dev_state); clear_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state); - device->mode = FMODE_EXCL; + device->holder = fs_info->bdev_holder; device->dev_stats_valid = 1; set_blocksize(device->bdev, BTRFS_BDEV_BLOCKSIZE); @@ -2859,7 +2856,7 @@ error_free_zone: error_free_device: btrfs_free_device(device); error: - blkdev_put(bdev, FMODE_EXCL); + blkdev_put(bdev, fs_info->bdev_holder); if (locked) { mutex_unlock(&uuid_mutex); up_write(&sb->s_umount); @@ -6898,7 +6895,7 @@ static struct btrfs_fs_devices *open_seed_devices(struct btrfs_fs_info *fs_info, if (IS_ERR(fs_devices)) return fs_devices; - ret = open_fs_devices(fs_devices, FMODE_READ, fs_info->bdev_holder); + ret = open_fs_devices(fs_devices, BLK_OPEN_READ, fs_info->bdev_holder); if (ret) { free_fs_devices(fs_devices); return ERR_PTR(ret); diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index d44cb9d22d63..b8c51f16ba86 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h @@ -94,8 +94,8 @@ struct btrfs_device { struct btrfs_zoned_device_info *zone_info; - /* the mode sent to blkdev_get */ - fmode_t mode; + /* block device holder for blkdev_get/put */ + void *holder; /* * Device's major-minor number. Must be set even if the device is not @@ -610,9 +610,8 @@ struct btrfs_block_group *btrfs_create_chunk(struct btrfs_trans_handle *trans, u64 type); void btrfs_mapping_tree_free(struct extent_map_tree *tree); int btrfs_open_devices(struct btrfs_fs_devices *fs_devices, - fmode_t flags, void *holder); -struct btrfs_device *btrfs_scan_one_device(const char *path, - fmode_t flags, void *holder); + blk_mode_t flags, void *holder); +struct btrfs_device *btrfs_scan_one_device(const char *path, blk_mode_t flags); int btrfs_forget_devices(dev_t devt); void btrfs_close_devices(struct btrfs_fs_devices *fs_devices); void btrfs_free_extra_devids(struct btrfs_fs_devices *fs_devices); @@ -630,7 +629,7 @@ struct btrfs_device *btrfs_alloc_device(struct btrfs_fs_info *fs_info, void btrfs_put_dev_args_from_path(struct btrfs_dev_lookup_args *args); int btrfs_rm_device(struct btrfs_fs_info *fs_info, struct btrfs_dev_lookup_args *args, - struct block_device **bdev, fmode_t *mode); + struct block_device **bdev, void **holder); void __exit btrfs_cleanup_fs_uuids(void); int btrfs_num_copies(struct btrfs_fs_info *fs_info, u64 logical, u64 len); int btrfs_grow_device(struct btrfs_trans_handle *trans, diff --git a/fs/buffer.c b/fs/buffer.c index fe64356e89b8..93c7446d9221 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -2759,8 +2759,7 @@ static void submit_bh_wbc(blk_opf_t opf, struct buffer_head *bh, bio->bi_iter.bi_sector = bh->b_blocknr * (bh->b_size >> 9); - bio_add_page(bio, bh->b_page, bh->b_size, bh_offset(bh)); - BUG_ON(bio->bi_iter.bi_size != bh->b_size); + __bio_add_page(bio, bh->b_page, bh->b_size, bh_offset(bh)); bio->bi_end_io = end_bio_bh_io_sync; bio->bi_private = bh; diff --git a/fs/direct-io.c b/fs/direct-io.c index 0b380bb8a81e..2ceb378b93c0 100644 --- a/fs/direct-io.c +++ b/fs/direct-io.c @@ -42,8 +42,8 @@ #include "internal.h" /* - * How many user pages to map in one call to get_user_pages(). This determines - * the size of a structure in the slab cache + * How many user pages to map in one call to iov_iter_extract_pages(). This + * determines the size of a structure in the slab cache */ #define DIO_PAGES 64 @@ -121,12 +121,13 @@ struct dio { struct inode *inode; loff_t i_size; /* i_size when submitted */ dio_iodone_t *end_io; /* IO completion function */ + bool is_pinned; /* T if we have pins on the pages */ void *private; /* copy from map_bh.b_private */ /* BIO completion state */ spinlock_t bio_lock; /* protects BIO fields below */ - int page_errors; /* errno from get_user_pages() */ + int page_errors; /* err from iov_iter_extract_pages() */ int is_async; /* is IO async ? */ bool defer_completion; /* defer AIO completion to workqueue? */ bool should_dirty; /* if pages should be dirtied */ @@ -165,14 +166,14 @@ static inline unsigned dio_pages_present(struct dio_submit *sdio) */ static inline int dio_refill_pages(struct dio *dio, struct dio_submit *sdio) { + struct page **pages = dio->pages; const enum req_op dio_op = dio->opf & REQ_OP_MASK; ssize_t ret; - ret = iov_iter_get_pages2(sdio->iter, dio->pages, LONG_MAX, DIO_PAGES, - &sdio->from); + ret = iov_iter_extract_pages(sdio->iter, &pages, LONG_MAX, + DIO_PAGES, 0, &sdio->from); if (ret < 0 && sdio->blocks_available && dio_op == REQ_OP_WRITE) { - struct page *page = ZERO_PAGE(0); /* * A memory fault, but the filesystem has some outstanding * mapped blocks. We need to use those blocks up to avoid @@ -180,8 +181,7 @@ static inline int dio_refill_pages(struct dio *dio, struct dio_submit *sdio) */ if (dio->page_errors == 0) dio->page_errors = ret; - get_page(page); - dio->pages[0] = page; + dio->pages[0] = ZERO_PAGE(0); sdio->head = 0; sdio->tail = 1; sdio->from = 0; @@ -201,9 +201,9 @@ static inline int dio_refill_pages(struct dio *dio, struct dio_submit *sdio) /* * Get another userspace page. Returns an ERR_PTR on error. Pages are - * buffered inside the dio so that we can call get_user_pages() against a - * decent number of pages, less frequently. To provide nicer use of the - * L1 cache. + * buffered inside the dio so that we can call iov_iter_extract_pages() + * against a decent number of pages, less frequently. To provide nicer use of + * the L1 cache. */ static inline struct page *dio_get_page(struct dio *dio, struct dio_submit *sdio) @@ -219,6 +219,18 @@ static inline struct page *dio_get_page(struct dio *dio, return dio->pages[sdio->head]; } +static void dio_pin_page(struct dio *dio, struct page *page) +{ + if (dio->is_pinned) + folio_add_pin(page_folio(page)); +} + +static void dio_unpin_page(struct dio *dio, struct page *page) +{ + if (dio->is_pinned) + unpin_user_page(page); +} + /* * dio_complete() - called when all DIO BIO I/O has been completed * @@ -402,6 +414,8 @@ dio_bio_alloc(struct dio *dio, struct dio_submit *sdio, bio->bi_end_io = dio_bio_end_aio; else bio->bi_end_io = dio_bio_end_io; + if (dio->is_pinned) + bio_set_flag(bio, BIO_PAGE_PINNED); sdio->bio = bio; sdio->logical_offset_in_bio = sdio->cur_page_fs_offset; } @@ -442,8 +456,10 @@ static inline void dio_bio_submit(struct dio *dio, struct dio_submit *sdio) */ static inline void dio_cleanup(struct dio *dio, struct dio_submit *sdio) { - while (sdio->head < sdio->tail) - put_page(dio->pages[sdio->head++]); + if (dio->is_pinned) + unpin_user_pages(dio->pages + sdio->head, + sdio->tail - sdio->head); + sdio->head = sdio->tail; } /* @@ -674,7 +690,7 @@ out: * * Return zero on success. Non-zero means the caller needs to start a new BIO. */ -static inline int dio_bio_add_page(struct dio_submit *sdio) +static inline int dio_bio_add_page(struct dio *dio, struct dio_submit *sdio) { int ret; @@ -686,7 +702,7 @@ static inline int dio_bio_add_page(struct dio_submit *sdio) */ if ((sdio->cur_page_len + sdio->cur_page_offset) == PAGE_SIZE) sdio->pages_in_io--; - get_page(sdio->cur_page); + dio_pin_page(dio, sdio->cur_page); sdio->final_block_in_bio = sdio->cur_page_block + (sdio->cur_page_len >> sdio->blkbits); ret = 0; @@ -741,11 +757,11 @@ static inline int dio_send_cur_page(struct dio *dio, struct dio_submit *sdio, goto out; } - if (dio_bio_add_page(sdio) != 0) { + if (dio_bio_add_page(dio, sdio) != 0) { dio_bio_submit(dio, sdio); ret = dio_new_bio(dio, sdio, sdio->cur_page_block, map_bh); if (ret == 0) { - ret = dio_bio_add_page(sdio); + ret = dio_bio_add_page(dio, sdio); BUG_ON(ret != 0); } } @@ -802,13 +818,13 @@ submit_page_section(struct dio *dio, struct dio_submit *sdio, struct page *page, */ if (sdio->cur_page) { ret = dio_send_cur_page(dio, sdio, map_bh); - put_page(sdio->cur_page); + dio_unpin_page(dio, sdio->cur_page); sdio->cur_page = NULL; if (ret) return ret; } - get_page(page); /* It is in dio */ + dio_pin_page(dio, page); /* It is in dio */ sdio->cur_page = page; sdio->cur_page_offset = offset; sdio->cur_page_len = len; @@ -823,7 +839,7 @@ out: ret = dio_send_cur_page(dio, sdio, map_bh); if (sdio->bio) dio_bio_submit(dio, sdio); - put_page(sdio->cur_page); + dio_unpin_page(dio, sdio->cur_page); sdio->cur_page = NULL; } return ret; @@ -924,7 +940,7 @@ static int do_direct_IO(struct dio *dio, struct dio_submit *sdio, ret = get_more_blocks(dio, sdio, map_bh); if (ret) { - put_page(page); + dio_unpin_page(dio, page); goto out; } if (!buffer_mapped(map_bh)) @@ -969,7 +985,7 @@ do_holes: /* AKPM: eargh, -ENOTBLK is a hack */ if (dio_op == REQ_OP_WRITE) { - put_page(page); + dio_unpin_page(dio, page); return -ENOTBLK; } @@ -982,7 +998,7 @@ do_holes: if (sdio->block_in_file >= i_size_aligned >> blkbits) { /* We hit eof */ - put_page(page); + dio_unpin_page(dio, page); goto out; } zero_user(page, from, 1 << blkbits); @@ -1022,7 +1038,7 @@ do_holes: sdio->next_block_for_io, map_bh); if (ret) { - put_page(page); + dio_unpin_page(dio, page); goto out; } sdio->next_block_for_io += this_chunk_blocks; @@ -1037,8 +1053,8 @@ next_block: break; } - /* Drop the ref which was taken in get_user_pages() */ - put_page(page); + /* Drop the pin which was taken in get_user_pages() */ + dio_unpin_page(dio, page); } out: return ret; @@ -1133,6 +1149,7 @@ ssize_t __blockdev_direct_IO(struct kiocb *iocb, struct inode *inode, /* will be released by direct_io_worker */ inode_lock(inode); } + dio->is_pinned = iov_iter_extract_will_pin(iter); /* Once we sampled i_size check for reads beyond EOF */ dio->i_size = i_size_read(inode); @@ -1257,7 +1274,7 @@ ssize_t __blockdev_direct_IO(struct kiocb *iocb, struct inode *inode, ret2 = dio_send_cur_page(dio, &sdio, &map_bh); if (retval == 0) retval = ret2; - put_page(sdio.cur_page); + dio_unpin_page(dio, sdio.cur_page); sdio.cur_page = NULL; } if (sdio.bio) diff --git a/fs/erofs/super.c b/fs/erofs/super.c index ff18f6b14de5..9d6a3c6158bd 100644 --- a/fs/erofs/super.c +++ b/fs/erofs/super.c @@ -19,6 +19,7 @@ #include <trace/events/erofs.h> static struct kmem_cache *erofs_inode_cachep __read_mostly; +struct file_system_type erofs_fs_type; void _erofs_err(struct super_block *sb, const char *function, const char *fmt, ...) @@ -253,8 +254,8 @@ static int erofs_init_device(struct erofs_buf *buf, struct super_block *sb, return PTR_ERR(fscache); dif->fscache = fscache; } else if (!sbi->devs->flatdev) { - bdev = blkdev_get_by_path(dif->path, FMODE_READ | FMODE_EXCL, - sb->s_type); + bdev = blkdev_get_by_path(dif->path, BLK_OPEN_READ, sb->s_type, + NULL); if (IS_ERR(bdev)) return PTR_ERR(bdev); dif->bdev = bdev; @@ -815,7 +816,7 @@ static int erofs_release_device_info(int id, void *ptr, void *data) fs_put_dax(dif->dax_dev, NULL); if (dif->bdev) - blkdev_put(dif->bdev, FMODE_READ | FMODE_EXCL); + blkdev_put(dif->bdev, &erofs_fs_type); erofs_fscache_unregister_cookie(dif->fscache); dif->fscache = NULL; kfree(dif->path); diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 8104a21b001a..02fa8a64dc3f 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -2968,6 +2968,7 @@ int ext4_fileattr_set(struct mnt_idmap *idmap, int ext4_fileattr_get(struct dentry *dentry, struct fileattr *fa); extern void ext4_reset_inode_seed(struct inode *inode); int ext4_update_overhead(struct super_block *sb, bool force); +int ext4_force_shutdown(struct super_block *sb, u32 flags); /* migrate.c */ extern int ext4_ext_migrate(struct inode *); diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index f9a430152063..961284cc9b65 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c @@ -793,16 +793,9 @@ static int ext4_ioctl_setproject(struct inode *inode, __u32 projid) } #endif -static int ext4_shutdown(struct super_block *sb, unsigned long arg) +int ext4_force_shutdown(struct super_block *sb, u32 flags) { struct ext4_sb_info *sbi = EXT4_SB(sb); - __u32 flags; - - if (!capable(CAP_SYS_ADMIN)) - return -EPERM; - - if (get_user(flags, (__u32 __user *)arg)) - return -EFAULT; if (flags > EXT4_GOING_FLAGS_NOLOGFLUSH) return -EINVAL; @@ -838,6 +831,19 @@ static int ext4_shutdown(struct super_block *sb, unsigned long arg) return 0; } +static int ext4_ioctl_shutdown(struct super_block *sb, unsigned long arg) +{ + u32 flags; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + if (get_user(flags, (__u32 __user *)arg)) + return -EFAULT; + + return ext4_force_shutdown(sb, flags); +} + struct getfsmap_info { struct super_block *gi_sb; struct fsmap_head __user *gi_data; @@ -1566,7 +1572,7 @@ resizefs_out: return ext4_ioctl_get_es_cache(filp, arg); case EXT4_IOC_SHUTDOWN: - return ext4_shutdown(sb, arg); + return ext4_ioctl_shutdown(sb, arg); case FS_IOC_ENABLE_VERITY: if (!ext4_has_feature_verity(sb)) diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 05fcecc36244..eaa5858d5285 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -1096,6 +1096,15 @@ void ext4_update_dynamic_rev(struct super_block *sb) */ } +static void ext4_bdev_mark_dead(struct block_device *bdev) +{ + ext4_force_shutdown(bdev->bd_holder, EXT4_GOING_FLAGS_NOLOGFLUSH); +} + +static const struct blk_holder_ops ext4_holder_ops = { + .mark_dead = ext4_bdev_mark_dead, +}; + /* * Open the external journal device */ @@ -1103,7 +1112,8 @@ static struct block_device *ext4_blkdev_get(dev_t dev, struct super_block *sb) { struct block_device *bdev; - bdev = blkdev_get_by_dev(dev, FMODE_READ|FMODE_WRITE|FMODE_EXCL, sb); + bdev = blkdev_get_by_dev(dev, BLK_OPEN_READ | BLK_OPEN_WRITE, sb, + &ext4_holder_ops); if (IS_ERR(bdev)) goto fail; return bdev; @@ -1118,17 +1128,12 @@ fail: /* * Release the journal device */ -static void ext4_blkdev_put(struct block_device *bdev) -{ - blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL); -} - static void ext4_blkdev_remove(struct ext4_sb_info *sbi) { struct block_device *bdev; bdev = sbi->s_journal_bdev; if (bdev) { - ext4_blkdev_put(bdev); + blkdev_put(bdev, sbi->s_sb); sbi->s_journal_bdev = NULL; } } @@ -1449,6 +1454,11 @@ static void ext4_destroy_inode(struct inode *inode) EXT4_I(inode)->i_reserved_data_blocks); } +static void ext4_shutdown(struct super_block *sb) +{ + ext4_force_shutdown(sb, EXT4_GOING_FLAGS_NOLOGFLUSH); +} + static void init_once(void *foo) { struct ext4_inode_info *ei = foo; @@ -1609,6 +1619,7 @@ static const struct super_operations ext4_sops = { .unfreeze_fs = ext4_unfreeze, .statfs = ext4_statfs, .show_options = ext4_show_options, + .shutdown = ext4_shutdown, #ifdef CONFIG_QUOTA .quota_read = ext4_quota_read, .quota_write = ext4_quota_write, @@ -5899,7 +5910,7 @@ static journal_t *ext4_get_dev_journal(struct super_block *sb, out_journal: jbd2_journal_destroy(journal); out_bdev: - ext4_blkdev_put(bdev); + blkdev_put(bdev, sb); return NULL; } diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 9f15b03037db..e34197a70dc1 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -1538,7 +1538,7 @@ static void destroy_device_list(struct f2fs_sb_info *sbi) int i; for (i = 0; i < sbi->s_ndevs; i++) { - blkdev_put(FDEV(i).bdev, FMODE_EXCL); + blkdev_put(FDEV(i).bdev, sbi->sb->s_type); #ifdef CONFIG_BLK_DEV_ZONED kvfree(FDEV(i).blkz_seq); #endif @@ -3993,6 +3993,7 @@ static int f2fs_scan_devices(struct f2fs_sb_info *sbi) struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi); unsigned int max_devices = MAX_DEVICES; unsigned int logical_blksize; + blk_mode_t mode = sb_open_mode(sbi->sb->s_flags); int i; /* Initialize single device information */ @@ -4024,8 +4025,8 @@ static int f2fs_scan_devices(struct f2fs_sb_info *sbi) if (max_devices == 1) { /* Single zoned block device mount */ FDEV(0).bdev = - blkdev_get_by_dev(sbi->sb->s_bdev->bd_dev, - sbi->sb->s_mode, sbi->sb->s_type); + blkdev_get_by_dev(sbi->sb->s_bdev->bd_dev, mode, + sbi->sb->s_type, NULL); } else { /* Multi-device mount */ memcpy(FDEV(i).path, RDEV(i).path, MAX_PATH_LEN); @@ -4043,8 +4044,9 @@ static int f2fs_scan_devices(struct f2fs_sb_info *sbi) (FDEV(i).total_segments << sbi->log_blocks_per_seg) - 1; } - FDEV(i).bdev = blkdev_get_by_path(FDEV(i).path, - sbi->sb->s_mode, sbi->sb->s_type); + FDEV(i).bdev = blkdev_get_by_path(FDEV(i).path, mode, + sbi->sb->s_type, + NULL); } if (IS_ERR(FDEV(i).bdev)) return PTR_ERR(FDEV(i).bdev); diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index 9af9ddb61ca0..cd962985b058 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c @@ -254,7 +254,7 @@ static int gfs2_read_super(struct gfs2_sbd *sdp, sector_t sector, int silent) bio = bio_alloc(sb->s_bdev, 1, REQ_OP_READ | REQ_META, GFP_NOFS); bio->bi_iter.bi_sector = sector * (sb->s_blocksize >> 9); - bio_add_page(bio, page, PAGE_SIZE, 0); + __bio_add_page(bio, page, PAGE_SIZE, 0); bio->bi_end_io = end_bio_io_page; bio->bi_private = page; diff --git a/fs/inode.c b/fs/inode.c index 53ae3b76d232..d37fad91c8da 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -2306,7 +2306,8 @@ void init_special_inode(struct inode *inode, umode_t mode, dev_t rdev) inode->i_fop = &def_chr_fops; inode->i_rdev = rdev; } else if (S_ISBLK(mode)) { - inode->i_fop = &def_blk_fops; + if (IS_ENABLED(CONFIG_BLOCK)) + inode->i_fop = &def_blk_fops; inode->i_rdev = rdev; } else if (S_ISFIFO(mode)) inode->i_fop = &pipefifo_fops; diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c index 063133ec77f4..0edab9deae2a 100644 --- a/fs/iomap/buffered-io.c +++ b/fs/iomap/buffered-io.c @@ -312,7 +312,7 @@ static loff_t iomap_readpage_iter(const struct iomap_iter *iter, ctx->bio->bi_opf |= REQ_RAHEAD; ctx->bio->bi_iter.bi_sector = sector; ctx->bio->bi_end_io = iomap_read_end_io; - bio_add_folio(ctx->bio, folio, plen, poff); + bio_add_folio_nofail(ctx->bio, folio, plen, poff); } done: @@ -539,7 +539,7 @@ static int iomap_read_folio_sync(loff_t block_start, struct folio *folio, bio_init(&bio, iomap->bdev, &bvec, 1, REQ_OP_READ); bio.bi_iter.bi_sector = iomap_sector(iomap, block_start); - bio_add_folio(&bio, folio, plen, poff); + bio_add_folio_nofail(&bio, folio, plen, poff); return submit_bio_wait(&bio); } @@ -1582,7 +1582,7 @@ iomap_add_to_ioend(struct inode *inode, loff_t pos, struct folio *folio, if (!bio_add_folio(wpc->ioend->io_bio, folio, len, poff)) { wpc->ioend->io_bio = iomap_chain_bio(wpc->ioend->io_bio); - bio_add_folio(wpc->ioend->io_bio, folio, len, poff); + bio_add_folio_nofail(wpc->ioend->io_bio, folio, len, poff); } if (iop) diff --git a/fs/iomap/direct-io.c b/fs/iomap/direct-io.c index 019cc87d0fb3..08873f0627dd 100644 --- a/fs/iomap/direct-io.c +++ b/fs/iomap/direct-io.c @@ -203,7 +203,6 @@ static void iomap_dio_zero(const struct iomap_iter *iter, struct iomap_dio *dio, bio->bi_private = dio; bio->bi_end_io = iomap_dio_bio_end_io; - get_page(page); __bio_add_page(bio, page, len, 0); iomap_dio_submit_bio(iter, dio, bio, pos); } diff --git a/fs/jfs/jfs_logmgr.c b/fs/jfs/jfs_logmgr.c index 695415cbfe98..e855b8fde76c 100644 --- a/fs/jfs/jfs_logmgr.c +++ b/fs/jfs/jfs_logmgr.c @@ -1100,8 +1100,8 @@ int lmLogOpen(struct super_block *sb) * file systems to log may have n-to-1 relationship; */ - bdev = blkdev_get_by_dev(sbi->logdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL, - log); + bdev = blkdev_get_by_dev(sbi->logdev, BLK_OPEN_READ | BLK_OPEN_WRITE, + log, NULL); if (IS_ERR(bdev)) { rc = PTR_ERR(bdev); goto free; @@ -1141,7 +1141,7 @@ journal_found: lbmLogShutdown(log); close: /* close external log device */ - blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL); + blkdev_put(bdev, log); free: /* free log descriptor */ mutex_unlock(&jfs_log_mutex); @@ -1485,7 +1485,7 @@ int lmLogClose(struct super_block *sb) bdev = log->bdev; rc = lmLogShutdown(log); - blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL); + blkdev_put(bdev, log); kfree(log); @@ -1974,7 +1974,7 @@ static int lbmRead(struct jfs_log * log, int pn, struct lbuf ** bpp) bio = bio_alloc(log->bdev, 1, REQ_OP_READ, GFP_NOFS); bio->bi_iter.bi_sector = bp->l_blkno << (log->l2bsize - 9); - bio_add_page(bio, bp->l_page, LOGPSIZE, bp->l_offset); + __bio_add_page(bio, bp->l_page, LOGPSIZE, bp->l_offset); BUG_ON(bio->bi_iter.bi_size != LOGPSIZE); bio->bi_end_io = lbmIODone; @@ -2115,7 +2115,7 @@ static void lbmStartIO(struct lbuf * bp) bio = bio_alloc(log->bdev, 1, REQ_OP_WRITE | REQ_SYNC, GFP_NOFS); bio->bi_iter.bi_sector = bp->l_blkno << (log->l2bsize - 9); - bio_add_page(bio, bp->l_page, LOGPSIZE, bp->l_offset); + __bio_add_page(bio, bp->l_page, LOGPSIZE, bp->l_offset); BUG_ON(bio->bi_iter.bi_size != LOGPSIZE); bio->bi_end_io = lbmIODone; diff --git a/fs/nfs/blocklayout/dev.c b/fs/nfs/blocklayout/dev.c index fea5f8821da5..70f5563a8e81 100644 --- a/fs/nfs/blocklayout/dev.c +++ b/fs/nfs/blocklayout/dev.c @@ -35,7 +35,7 @@ bl_free_device(struct pnfs_block_dev *dev) } if (dev->bdev) - blkdev_put(dev->bdev, FMODE_READ | FMODE_WRITE); + blkdev_put(dev->bdev, NULL); } } @@ -243,7 +243,8 @@ bl_parse_simple(struct nfs_server *server, struct pnfs_block_dev *d, if (!dev) return -EIO; - bdev = blkdev_get_by_dev(dev, FMODE_READ | FMODE_WRITE, NULL); + bdev = blkdev_get_by_dev(dev, BLK_OPEN_READ | BLK_OPEN_WRITE, NULL, + NULL); if (IS_ERR(bdev)) { printk(KERN_WARNING "pNFS: failed to open device %d:%d (%ld)\n", MAJOR(dev), MINOR(dev), PTR_ERR(bdev)); @@ -312,7 +313,8 @@ bl_open_path(struct pnfs_block_volume *v, const char *prefix) if (!devname) return ERR_PTR(-ENOMEM); - bdev = blkdev_get_by_path(devname, FMODE_READ | FMODE_WRITE, NULL); + bdev = blkdev_get_by_path(devname, BLK_OPEN_READ | BLK_OPEN_WRITE, NULL, + NULL); if (IS_ERR(bdev)) { pr_warn("pNFS: failed to open device %s (%ld)\n", devname, PTR_ERR(bdev)); @@ -373,7 +375,7 @@ bl_parse_scsi(struct nfs_server *server, struct pnfs_block_dev *d, return 0; out_blkdev_put: - blkdev_put(d->bdev, FMODE_READ | FMODE_WRITE); + blkdev_put(d->bdev, NULL); return error; } diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c index 9ba4933087af..0ef8c71bde8e 100644 --- a/fs/nilfs2/super.c +++ b/fs/nilfs2/super.c @@ -1299,14 +1299,11 @@ nilfs_mount(struct file_system_type *fs_type, int flags, { struct nilfs_super_data sd; struct super_block *s; - fmode_t mode = FMODE_READ | FMODE_EXCL; struct dentry *root_dentry; int err, s_new = false; - if (!(flags & SB_RDONLY)) - mode |= FMODE_WRITE; - - sd.bdev = blkdev_get_by_path(dev_name, mode, fs_type); + sd.bdev = blkdev_get_by_path(dev_name, sb_open_mode(flags), fs_type, + NULL); if (IS_ERR(sd.bdev)) return ERR_CAST(sd.bdev); @@ -1340,7 +1337,6 @@ nilfs_mount(struct file_system_type *fs_type, int flags, s_new = true; /* New superblock instance created */ - s->s_mode = mode; snprintf(s->s_id, sizeof(s->s_id), "%pg", sd.bdev); sb_set_blocksize(s, block_size(sd.bdev)); @@ -1378,7 +1374,7 @@ nilfs_mount(struct file_system_type *fs_type, int flags, } if (!s_new) - blkdev_put(sd.bdev, mode); + blkdev_put(sd.bdev, fs_type); return root_dentry; @@ -1387,7 +1383,7 @@ nilfs_mount(struct file_system_type *fs_type, int flags, failed: if (!s_new) - blkdev_put(sd.bdev, mode); + blkdev_put(sd.bdev, fs_type); return ERR_PTR(err); } diff --git a/fs/no-block.c b/fs/no-block.c deleted file mode 100644 index 481c0f0ab4bd..000000000000 --- a/fs/no-block.c +++ /dev/null @@ -1,19 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* no-block.c: implementation of routines required for non-BLOCK configuration - * - * Copyright (C) 2006 Red Hat, Inc. All Rights Reserved. - * Written by David Howells (dhowells@redhat.com) - */ - -#include <linux/kernel.h> -#include <linux/fs.h> - -static int no_blkdev_open(struct inode * inode, struct file * filp) -{ - return -ENODEV; -} - -const struct file_operations def_blk_fops = { - .open = no_blkdev_open, - .llseek = noop_llseek, -}; diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c index 60b97c92e2b2..21472e3ed182 100644 --- a/fs/ocfs2/cluster/heartbeat.c +++ b/fs/ocfs2/cluster/heartbeat.c @@ -1503,7 +1503,7 @@ static void o2hb_region_release(struct config_item *item) } if (reg->hr_bdev) - blkdev_put(reg->hr_bdev, FMODE_READ|FMODE_WRITE); + blkdev_put(reg->hr_bdev, NULL); kfree(reg->hr_slots); @@ -1786,7 +1786,8 @@ static ssize_t o2hb_region_dev_store(struct config_item *item, goto out2; reg->hr_bdev = blkdev_get_by_dev(f.file->f_mapping->host->i_rdev, - FMODE_WRITE | FMODE_READ, NULL); + BLK_OPEN_WRITE | BLK_OPEN_READ, NULL, + NULL); if (IS_ERR(reg->hr_bdev)) { ret = PTR_ERR(reg->hr_bdev); reg->hr_bdev = NULL; @@ -1893,7 +1894,7 @@ static ssize_t o2hb_region_dev_store(struct config_item *item, out3: if (ret < 0) { - blkdev_put(reg->hr_bdev, FMODE_READ | FMODE_WRITE); + blkdev_put(reg->hr_bdev, NULL); reg->hr_bdev = NULL; } out2: diff --git a/fs/pstore/blk.c b/fs/pstore/blk.c index 4ae0cfcd15f2..de8cf5d75f34 100644 --- a/fs/pstore/blk.c +++ b/fs/pstore/blk.c @@ -263,9 +263,9 @@ static __init const char *early_boot_devpath(const char *initial_devname) * same scheme to find the device that we use for mounting * the root file system. */ - dev_t dev = name_to_dev_t(initial_devname); + dev_t dev; - if (!dev) { + if (early_lookup_bdev(initial_devname, &dev)) { pr_err("failed to resolve '%s'!\n", initial_devname); return initial_devname; } diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c index 4d11d60f493c..479aa4a57602 100644 --- a/fs/reiserfs/journal.c +++ b/fs/reiserfs/journal.c @@ -2589,7 +2589,12 @@ static void release_journal_dev(struct super_block *super, struct reiserfs_journal *journal) { if (journal->j_dev_bd != NULL) { - blkdev_put(journal->j_dev_bd, journal->j_dev_mode); + void *holder = NULL; + + if (journal->j_dev_bd->bd_dev != super->s_dev) + holder = journal; + + blkdev_put(journal->j_dev_bd, holder); journal->j_dev_bd = NULL; } } @@ -2598,9 +2603,10 @@ static int journal_init_dev(struct super_block *super, struct reiserfs_journal *journal, const char *jdev_name) { + blk_mode_t blkdev_mode = BLK_OPEN_READ; + void *holder = journal; int result; dev_t jdev; - fmode_t blkdev_mode = FMODE_READ | FMODE_WRITE | FMODE_EXCL; result = 0; @@ -2608,16 +2614,15 @@ static int journal_init_dev(struct super_block *super, jdev = SB_ONDISK_JOURNAL_DEVICE(super) ? new_decode_dev(SB_ONDISK_JOURNAL_DEVICE(super)) : super->s_dev; - if (bdev_read_only(super->s_bdev)) - blkdev_mode = FMODE_READ; + if (!bdev_read_only(super->s_bdev)) + blkdev_mode |= BLK_OPEN_WRITE; /* there is no "jdev" option and journal is on separate device */ if ((!jdev_name || !jdev_name[0])) { if (jdev == super->s_dev) - blkdev_mode &= ~FMODE_EXCL; - journal->j_dev_bd = blkdev_get_by_dev(jdev, blkdev_mode, - journal); - journal->j_dev_mode = blkdev_mode; + holder = NULL; + journal->j_dev_bd = blkdev_get_by_dev(jdev, blkdev_mode, holder, + NULL); if (IS_ERR(journal->j_dev_bd)) { result = PTR_ERR(journal->j_dev_bd); journal->j_dev_bd = NULL; @@ -2631,8 +2636,8 @@ static int journal_init_dev(struct super_block *super, return 0; } - journal->j_dev_mode = blkdev_mode; - journal->j_dev_bd = blkdev_get_by_path(jdev_name, blkdev_mode, journal); + journal->j_dev_bd = blkdev_get_by_path(jdev_name, blkdev_mode, holder, + NULL); if (IS_ERR(journal->j_dev_bd)) { result = PTR_ERR(journal->j_dev_bd); journal->j_dev_bd = NULL; diff --git a/fs/reiserfs/reiserfs.h b/fs/reiserfs/reiserfs.h index 1bccf6a2e908..55e85256aae8 100644 --- a/fs/reiserfs/reiserfs.h +++ b/fs/reiserfs/reiserfs.h @@ -300,7 +300,6 @@ struct reiserfs_journal { struct reiserfs_journal_cnode *j_first; struct block_device *j_dev_bd; - fmode_t j_dev_mode; /* first block on s_dev of reserved area journal */ int j_1st_reserved_block; diff --git a/fs/splice.c b/fs/splice.c index 2420ead610a7..7a9565d8ec4f 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -368,16 +368,15 @@ ssize_t copy_splice_read(struct file *in, loff_t *ppos, if (ret > 0) { keep = DIV_ROUND_UP(ret, PAGE_SIZE); *ppos = kiocb.ki_pos; - file_accessed(in); - } else if (ret < 0) { - /* - * callers of ->splice_read() expect -EAGAIN on - * "can't put anything in there", rather than -EFAULT. - */ - if (ret == -EFAULT) - ret = -EAGAIN; } + /* + * Callers of ->splice_read() expect -EAGAIN on "can't put anything in + * there", rather than -EFAULT. + */ + if (ret == -EFAULT) + ret = -EAGAIN; + /* Free any pages that didn't get touched at all. */ if (keep < npages) release_pages(pages + keep, npages - keep); diff --git a/fs/super.c b/fs/super.c index 48c29954d487..05ff6abddd3c 100644 --- a/fs/super.c +++ b/fs/super.c @@ -1210,6 +1210,22 @@ int get_tree_keyed(struct fs_context *fc, EXPORT_SYMBOL(get_tree_keyed); #ifdef CONFIG_BLOCK +static void fs_mark_dead(struct block_device *bdev) +{ + struct super_block *sb; + + sb = get_super(bdev); + if (!sb) + return; + + if (sb->s_op->shutdown) + sb->s_op->shutdown(sb); + drop_super(sb); +} + +static const struct blk_holder_ops fs_holder_ops = { + .mark_dead = fs_mark_dead, +}; static int set_bdev_super(struct super_block *s, void *data) { @@ -1243,16 +1259,13 @@ int get_tree_bdev(struct fs_context *fc, { struct block_device *bdev; struct super_block *s; - fmode_t mode = FMODE_READ | FMODE_EXCL; int error = 0; - if (!(fc->sb_flags & SB_RDONLY)) - mode |= FMODE_WRITE; - if (!fc->source) return invalf(fc, "No source specified"); - bdev = blkdev_get_by_path(fc->source, mode, fc->fs_type); + bdev = blkdev_get_by_path(fc->source, sb_open_mode(fc->sb_flags), + fc->fs_type, &fs_holder_ops); if (IS_ERR(bdev)) { errorf(fc, "%s: Can't open blockdev", fc->source); return PTR_ERR(bdev); @@ -1266,7 +1279,7 @@ int get_tree_bdev(struct fs_context *fc, if (bdev->bd_fsfreeze_count > 0) { mutex_unlock(&bdev->bd_fsfreeze_mutex); warnf(fc, "%pg: Can't mount, blockdev is frozen", bdev); - blkdev_put(bdev, mode); + blkdev_put(bdev, fc->fs_type); return -EBUSY; } @@ -1275,7 +1288,7 @@ int get_tree_bdev(struct fs_context *fc, s = sget_fc(fc, test_bdev_super_fc, set_bdev_super_fc); mutex_unlock(&bdev->bd_fsfreeze_mutex); if (IS_ERR(s)) { - blkdev_put(bdev, mode); + blkdev_put(bdev, fc->fs_type); return PTR_ERR(s); } @@ -1284,7 +1297,7 @@ int get_tree_bdev(struct fs_context *fc, if ((fc->sb_flags ^ s->s_flags) & SB_RDONLY) { warnf(fc, "%pg: Can't mount, would change RO state", bdev); deactivate_locked_super(s); - blkdev_put(bdev, mode); + blkdev_put(bdev, fc->fs_type); return -EBUSY; } @@ -1296,10 +1309,9 @@ int get_tree_bdev(struct fs_context *fc, * holding an active reference. */ up_write(&s->s_umount); - blkdev_put(bdev, mode); + blkdev_put(bdev, fc->fs_type); down_write(&s->s_umount); } else { - s->s_mode = mode; snprintf(s->s_id, sizeof(s->s_id), "%pg", bdev); shrinker_debugfs_rename(&s->s_shrink, "sb-%s:%s", fc->fs_type->name, s->s_id); @@ -1331,13 +1343,10 @@ struct dentry *mount_bdev(struct file_system_type *fs_type, { struct block_device *bdev; struct super_block *s; - fmode_t mode = FMODE_READ | FMODE_EXCL; int error = 0; - if (!(flags & SB_RDONLY)) - mode |= FMODE_WRITE; - - bdev = blkdev_get_by_path(dev_name, mode, fs_type); + bdev = blkdev_get_by_path(dev_name, sb_open_mode(flags), fs_type, + &fs_holder_ops); if (IS_ERR(bdev)) return ERR_CAST(bdev); @@ -1373,10 +1382,9 @@ struct dentry *mount_bdev(struct file_system_type *fs_type, * holding an active reference. */ up_write(&s->s_umount); - blkdev_put(bdev, mode); + blkdev_put(bdev, fs_type); down_write(&s->s_umount); } else { - s->s_mode = mode; snprintf(s->s_id, sizeof(s->s_id), "%pg", bdev); shrinker_debugfs_rename(&s->s_shrink, "sb-%s:%s", fs_type->name, s->s_id); @@ -1396,7 +1404,7 @@ struct dentry *mount_bdev(struct file_system_type *fs_type, error_s: error = PTR_ERR(s); error_bdev: - blkdev_put(bdev, mode); + blkdev_put(bdev, fs_type); error: return ERR_PTR(error); } @@ -1405,13 +1413,11 @@ EXPORT_SYMBOL(mount_bdev); void kill_block_super(struct super_block *sb) { struct block_device *bdev = sb->s_bdev; - fmode_t mode = sb->s_mode; bdev->bd_super = NULL; generic_shutdown_super(sb); sync_blockdev(bdev); - WARN_ON_ONCE(!(mode & FMODE_EXCL)); - blkdev_put(bdev, mode | FMODE_EXCL); + blkdev_put(bdev, sb->s_type); } EXPORT_SYMBOL(kill_block_super); diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c index 13851c0d640b..9ebb8333a308 100644 --- a/fs/xfs/xfs_fsops.c +++ b/fs/xfs/xfs_fsops.c @@ -534,6 +534,9 @@ xfs_do_force_shutdown( } else if (flags & SHUTDOWN_CORRUPT_ONDISK) { tag = XFS_PTAG_SHUTDOWN_CORRUPT; why = "Corruption of on-disk metadata"; + } else if (flags & SHUTDOWN_DEVICE_REMOVED) { + tag = XFS_PTAG_SHUTDOWN_IOERROR; + why = "Block device removal"; } else { tag = XFS_PTAG_SHUTDOWN_IOERROR; why = "Metadata I/O Error"; diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h index 6c09f89534d3..e2866e7fa60c 100644 --- a/fs/xfs/xfs_mount.h +++ b/fs/xfs/xfs_mount.h @@ -458,12 +458,14 @@ void xfs_do_force_shutdown(struct xfs_mount *mp, uint32_t flags, char *fname, #define SHUTDOWN_FORCE_UMOUNT (1u << 2) /* shutdown from a forced unmount */ #define SHUTDOWN_CORRUPT_INCORE (1u << 3) /* corrupt in-memory structures */ #define SHUTDOWN_CORRUPT_ONDISK (1u << 4) /* corrupt metadata on device */ +#define SHUTDOWN_DEVICE_REMOVED (1u << 5) /* device removed underneath us */ #define XFS_SHUTDOWN_STRINGS \ { SHUTDOWN_META_IO_ERROR, "metadata_io" }, \ { SHUTDOWN_LOG_IO_ERROR, "log_io" }, \ { SHUTDOWN_FORCE_UMOUNT, "force_umount" }, \ - { SHUTDOWN_CORRUPT_INCORE, "corruption" } + { SHUTDOWN_CORRUPT_INCORE, "corruption" }, \ + { SHUTDOWN_DEVICE_REMOVED, "device_removed" } /* * Flags for xfs_mountfs diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index 4120bd1cba90..d910b141d52e 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -377,6 +377,17 @@ disable_dax: return 0; } +static void +xfs_bdev_mark_dead( + struct block_device *bdev) +{ + xfs_force_shutdown(bdev->bd_holder, SHUTDOWN_DEVICE_REMOVED); +} + +static const struct blk_holder_ops xfs_holder_ops = { + .mark_dead = xfs_bdev_mark_dead, +}; + STATIC int xfs_blkdev_get( xfs_mount_t *mp, @@ -385,8 +396,8 @@ xfs_blkdev_get( { int error = 0; - *bdevp = blkdev_get_by_path(name, FMODE_READ|FMODE_WRITE|FMODE_EXCL, - mp); + *bdevp = blkdev_get_by_path(name, BLK_OPEN_READ | BLK_OPEN_WRITE, mp, + &xfs_holder_ops); if (IS_ERR(*bdevp)) { error = PTR_ERR(*bdevp); xfs_warn(mp, "Invalid device [%s], error=%d", name, error); @@ -397,10 +408,11 @@ xfs_blkdev_get( STATIC void xfs_blkdev_put( + struct xfs_mount *mp, struct block_device *bdev) { if (bdev) - blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL); + blkdev_put(bdev, mp); } STATIC void @@ -411,13 +423,13 @@ xfs_close_devices( struct block_device *logdev = mp->m_logdev_targp->bt_bdev; xfs_free_buftarg(mp->m_logdev_targp); - xfs_blkdev_put(logdev); + xfs_blkdev_put(mp, logdev); } if (mp->m_rtdev_targp) { struct block_device *rtdev = mp->m_rtdev_targp->bt_bdev; xfs_free_buftarg(mp->m_rtdev_targp); - xfs_blkdev_put(rtdev); + xfs_blkdev_put(mp, rtdev); } xfs_free_buftarg(mp->m_ddev_targp); } @@ -492,10 +504,10 @@ xfs_open_devices( out_free_ddev_targ: xfs_free_buftarg(mp->m_ddev_targp); out_close_rtdev: - xfs_blkdev_put(rtdev); + xfs_blkdev_put(mp, rtdev); out_close_logdev: if (logdev && logdev != ddev) - xfs_blkdev_put(logdev); + xfs_blkdev_put(mp, logdev); return error; } @@ -1160,6 +1172,13 @@ xfs_fs_free_cached_objects( return xfs_reclaim_inodes_nr(XFS_M(sb), sc->nr_to_scan); } +static void +xfs_fs_shutdown( + struct super_block *sb) +{ + xfs_force_shutdown(XFS_M(sb), SHUTDOWN_DEVICE_REMOVED); +} + static const struct super_operations xfs_super_operations = { .alloc_inode = xfs_fs_alloc_inode, .destroy_inode = xfs_fs_destroy_inode, @@ -1173,6 +1192,7 @@ static const struct super_operations xfs_super_operations = { .show_options = xfs_fs_show_options, .nr_cached_objects = xfs_fs_nr_cached_objects, .free_cached_objects = xfs_fs_free_cached_objects, + .shutdown = xfs_fs_shutdown, }; static int diff --git a/fs/zonefs/super.c b/fs/zonefs/super.c index 56c00111966a..bbe44a26a8e5 100644 --- a/fs/zonefs/super.c +++ b/fs/zonefs/super.c @@ -1128,7 +1128,7 @@ static int zonefs_read_super(struct super_block *sb) bio_init(&bio, sb->s_bdev, &bio_vec, 1, REQ_OP_READ); bio.bi_iter.bi_sector = 0; - bio_add_page(&bio, page, PAGE_SIZE, 0); + __bio_add_page(&bio, page, PAGE_SIZE, 0); ret = submit_bio_wait(&bio); if (ret) |