diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2024-05-13 13:03:54 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2024-05-13 13:03:54 -0700 |
commit | 0c9f4ac808b017a0013cee92a30de980550145d5 (patch) | |
tree | 94eedbb9ef4815df9dc8d1dd6424fc92a2fbcd7a /include | |
parent | 9961a785944601e32f185ea696347b22ffda634c (diff) | |
parent | a3166c51702bb00b8f8b84022090cbab8f37be1a (diff) |
Merge tag 'for-6.10/block-20240511' of git://git.kernel.dk/linux
Pull block updates from Jens Axboe:
- Add a partscan attribute in sysfs, fixing an issue with systemd
relying on an internal interface that went away.
- Attempt #2 at making long running discards interruptible. The
previous attempt went into 6.9, but we ended up mostly reverting it
as it had issues.
- Remove old ida_simple API in bcache
- Support for zoned write plugging, greatly improving the performance
on zoned devices.
- Remove the old throttle low interface, which has been experimental
since 2017 and never made it beyond that and isn't being used.
- Remove page->index debugging checks in brd, as it hasn't caught
anything and prepares us for removing in struct page.
- MD pull request from Song
- Don't schedule block workers on isolated CPUs
* tag 'for-6.10/block-20240511' of git://git.kernel.dk/linux: (84 commits)
blk-throttle: delay initialization until configuration
blk-throttle: remove CONFIG_BLK_DEV_THROTTLING_LOW
block: fix that util can be greater than 100%
block: support to account io_ticks precisely
block: add plug while submitting IO
bcache: fix variable length array abuse in btree_iter
bcache: Remove usage of the deprecated ida_simple_xx() API
md: Revert "md: Fix overflow in is_mddev_idle"
blk-lib: check for kill signal in ioctl BLKDISCARD
block: add a bio_await_chain helper
block: add a blk_alloc_discard_bio helper
block: add a bio_chain_and_submit helper
block: move discard checks into the ioctl handler
block: remove the discard_granularity check in __blkdev_issue_discard
block/ioctl: prefer different overflow check
null_blk: Fix the WARNING: modpost: missing MODULE_DESCRIPTION()
block: fix and simplify blkdevparts= cmdline parsing
block: refine the EOF check in blkdev_iomap_begin
block: add a partscan sysfs attribute for disks
block: add a disk_has_partscan helper
...
Diffstat (limited to 'include')
-rw-r--r-- | include/linux/bio.h | 11 | ||||
-rw-r--r-- | include/linux/blk-mq.h | 85 | ||||
-rw-r--r-- | include/linux/blk_types.h | 30 | ||||
-rw-r--r-- | include/linux/blkdev.h | 116 |
4 files changed, 86 insertions, 156 deletions
diff --git a/include/linux/bio.h b/include/linux/bio.h index 875d792bffff..d5379548d684 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -615,6 +615,13 @@ static inline void bio_list_merge(struct bio_list *bl, struct bio_list *bl2) bl->tail = bl2->tail; } +static inline void bio_list_merge_init(struct bio_list *bl, + struct bio_list *bl2) +{ + bio_list_merge(bl, bl2); + bio_list_init(bl2); +} + static inline void bio_list_merge_head(struct bio_list *bl, struct bio_list *bl2) { @@ -824,5 +831,9 @@ static inline void bio_clear_polled(struct bio *bio) struct bio *blk_next_bio(struct bio *bio, struct block_device *bdev, unsigned int nr_pages, blk_opf_t opf, gfp_t gfp); +struct bio *bio_chain_and_submit(struct bio *prev, struct bio *new); + +struct bio *blk_alloc_discard_bio(struct block_device *bdev, + sector_t *sector, sector_t *nr_sects, gfp_t gfp_mask); #endif /* __LINUX_BIO_H */ diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index d3d8fd8e229b..89ba6b16fe8b 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -54,8 +54,8 @@ typedef __u32 __bitwise req_flags_t; /* Look at ->special_vec for the actual data payload instead of the bio chain. */ #define RQF_SPECIAL_PAYLOAD ((__force req_flags_t)(1 << 18)) -/* The per-zone write lock is held for this request */ -#define RQF_ZONE_WRITE_LOCKED ((__force req_flags_t)(1 << 19)) +/* The request completion needs to be signaled to zone write pluging. */ +#define RQF_ZONE_WRITE_PLUGGING ((__force req_flags_t)(1 << 20)) /* ->timeout has been called, don't expire again */ #define RQF_TIMED_OUT ((__force req_flags_t)(1 << 21)) #define RQF_RESV ((__force req_flags_t)(1 << 23)) @@ -1150,85 +1150,4 @@ static inline int blk_rq_map_sg(struct request_queue *q, struct request *rq, } void blk_dump_rq_flags(struct request *, char *); -#ifdef CONFIG_BLK_DEV_ZONED -static inline unsigned int blk_rq_zone_no(struct request *rq) -{ - return disk_zone_no(rq->q->disk, blk_rq_pos(rq)); -} - -static inline unsigned int blk_rq_zone_is_seq(struct request *rq) -{ - return disk_zone_is_seq(rq->q->disk, blk_rq_pos(rq)); -} - -/** - * blk_rq_is_seq_zoned_write() - Check if @rq requires write serialization. - * @rq: Request to examine. - * - * Note: REQ_OP_ZONE_APPEND requests do not require serialization. - */ -static inline bool blk_rq_is_seq_zoned_write(struct request *rq) -{ - return op_needs_zoned_write_locking(req_op(rq)) && - blk_rq_zone_is_seq(rq); -} - -bool blk_req_needs_zone_write_lock(struct request *rq); -bool blk_req_zone_write_trylock(struct request *rq); -void __blk_req_zone_write_lock(struct request *rq); -void __blk_req_zone_write_unlock(struct request *rq); - -static inline void blk_req_zone_write_lock(struct request *rq) -{ - if (blk_req_needs_zone_write_lock(rq)) - __blk_req_zone_write_lock(rq); -} - -static inline void blk_req_zone_write_unlock(struct request *rq) -{ - if (rq->rq_flags & RQF_ZONE_WRITE_LOCKED) - __blk_req_zone_write_unlock(rq); -} - -static inline bool blk_req_zone_is_write_locked(struct request *rq) -{ - return rq->q->disk->seq_zones_wlock && - test_bit(blk_rq_zone_no(rq), rq->q->disk->seq_zones_wlock); -} - -static inline bool blk_req_can_dispatch_to_zone(struct request *rq) -{ - if (!blk_req_needs_zone_write_lock(rq)) - return true; - return !blk_req_zone_is_write_locked(rq); -} -#else /* CONFIG_BLK_DEV_ZONED */ -static inline bool blk_rq_is_seq_zoned_write(struct request *rq) -{ - return false; -} - -static inline bool blk_req_needs_zone_write_lock(struct request *rq) -{ - return false; -} - -static inline void blk_req_zone_write_lock(struct request *rq) -{ -} - -static inline void blk_req_zone_write_unlock(struct request *rq) -{ -} -static inline bool blk_req_zone_is_write_locked(struct request *rq) -{ - return false; -} - -static inline bool blk_req_can_dispatch_to_zone(struct request *rq) -{ - return true; -} -#endif /* CONFIG_BLK_DEV_ZONED */ - #endif /* BLK_MQ_H */ diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index c3e098b21c16..25dbf1097085 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -131,25 +131,13 @@ typedef u16 blk_short_t; #define BLK_STS_DEV_RESOURCE ((__force blk_status_t)13) /* - * BLK_STS_ZONE_RESOURCE is returned from the driver to the block layer if zone - * related resources are unavailable, but the driver can guarantee the queue - * will be rerun in the future once the resources become available again. - * - * This is different from BLK_STS_DEV_RESOURCE in that it explicitly references - * a zone specific resource and IO to a different zone on the same device could - * still be served. Examples of that are zones that are write-locked, but a read - * to the same zone could be served. - */ -#define BLK_STS_ZONE_RESOURCE ((__force blk_status_t)14) - -/* * BLK_STS_ZONE_OPEN_RESOURCE is returned from the driver in the completion * path if the device returns a status indicating that too many zone resources * are currently open. The same command should be successful if resubmitted * after the number of open zones decreases below the device's limits, which is * reported in the request_queue's max_open_zones. */ -#define BLK_STS_ZONE_OPEN_RESOURCE ((__force blk_status_t)15) +#define BLK_STS_ZONE_OPEN_RESOURCE ((__force blk_status_t)14) /* * BLK_STS_ZONE_ACTIVE_RESOURCE is returned from the driver in the completion @@ -158,20 +146,20 @@ typedef u16 blk_short_t; * after the number of active zones decreases below the device's limits, which * is reported in the request_queue's max_active_zones. */ -#define BLK_STS_ZONE_ACTIVE_RESOURCE ((__force blk_status_t)16) +#define BLK_STS_ZONE_ACTIVE_RESOURCE ((__force blk_status_t)15) /* * BLK_STS_OFFLINE is returned from the driver when the target device is offline * or is being taken offline. This could help differentiate the case where a * device is intentionally being shut down from a real I/O error. */ -#define BLK_STS_OFFLINE ((__force blk_status_t)17) +#define BLK_STS_OFFLINE ((__force blk_status_t)16) /* * BLK_STS_DURATION_LIMIT is returned from the driver when the target device * aborted the command because it exceeded one of its Command Duration Limits. */ -#define BLK_STS_DURATION_LIMIT ((__force blk_status_t)18) +#define BLK_STS_DURATION_LIMIT ((__force blk_status_t)17) /** * blk_path_error - returns true if error may be path related @@ -228,7 +216,12 @@ struct bio { struct bvec_iter bi_iter; - blk_qc_t bi_cookie; + union { + /* for polled bios: */ + blk_qc_t bi_cookie; + /* for plugged zoned writes only: */ + unsigned int __bi_nr_segments; + }; bio_end_io_t *bi_end_io; void *bi_private; #ifdef CONFIG_BLK_CGROUP @@ -298,7 +291,8 @@ enum { BIO_QOS_THROTTLED, /* bio went through rq_qos throttle path */ BIO_QOS_MERGED, /* but went through rq_qos merge path */ BIO_REMAPPED, - BIO_ZONE_WRITE_LOCKED, /* Owns a zoned device zone write lock */ + BIO_ZONE_WRITE_PLUGGING, /* bio handled through zone write plugging */ + BIO_EMULATES_ZONE_APPEND, /* bio emulates a zone append operation */ BIO_FLAG_LAST }; diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 69e7da33ca49..69c4f113db42 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -179,22 +179,21 @@ struct gendisk { #ifdef CONFIG_BLK_DEV_ZONED /* - * Zoned block device information for request dispatch control. - * nr_zones is the total number of zones of the device. This is always - * 0 for regular block devices. conv_zones_bitmap is a bitmap of nr_zones - * bits which indicates if a zone is conventional (bit set) or - * sequential (bit clear). seq_zones_wlock is a bitmap of nr_zones - * bits which indicates if a zone is write locked, that is, if a write - * request targeting the zone was dispatched. - * - * Reads of this information must be protected with blk_queue_enter() / - * blk_queue_exit(). Modifying this information is only allowed while - * no requests are being processed. See also blk_mq_freeze_queue() and - * blk_mq_unfreeze_queue(). + * Zoned block device information. Reads of this information must be + * protected with blk_queue_enter() / blk_queue_exit(). Modifying this + * information is only allowed while no requests are being processed. + * See also blk_mq_freeze_queue() and blk_mq_unfreeze_queue(). */ unsigned int nr_zones; + unsigned int zone_capacity; unsigned long *conv_zones_bitmap; - unsigned long *seq_zones_wlock; + unsigned int zone_wplugs_hash_bits; + spinlock_t zone_wplugs_lock; + struct mempool_s *zone_wplugs_pool; + struct hlist_head *zone_wplugs_hash; + struct list_head zone_wplugs_err_list; + struct work_struct zone_wplugs_work; + struct workqueue_struct *zone_wplugs_wq; #endif /* CONFIG_BLK_DEV_ZONED */ #if IS_ENABLED(CONFIG_CDROM) @@ -233,6 +232,19 @@ static inline unsigned int disk_openers(struct gendisk *disk) return atomic_read(&disk->part0->bd_openers); } +/** + * disk_has_partscan - return %true if partition scanning is enabled on a disk + * @disk: disk to check + * + * Returns %true if partitions scanning is enabled for @disk, or %false if + * partition scanning is disabled either permanently or temporarily. + */ +static inline bool disk_has_partscan(struct gendisk *disk) +{ + return !(disk->flags & (GENHD_FL_NO_PART | GENHD_FL_HIDDEN)) && + !test_bit(GD_SUPPRESS_PART_SCAN, &disk->state); +} + /* * The gendisk is refcounted by the part0 block_device, and the bd_device * therein is also used for device model presentation in sysfs. @@ -331,8 +343,7 @@ int blkdev_report_zones(struct block_device *bdev, sector_t sector, unsigned int nr_zones, report_zones_cb cb, void *data); int blkdev_zone_mgmt(struct block_device *bdev, enum req_op op, sector_t sectors, sector_t nr_sectors); -int blk_revalidate_disk_zones(struct gendisk *disk, - void (*update_driver_data)(struct gendisk *disk)); +int blk_revalidate_disk_zones(struct gendisk *disk); /* * Independent access ranges: struct blk_independent_access_range describes @@ -449,8 +460,6 @@ struct request_queue { atomic_t nr_active_requests_shared_tags; - unsigned int required_elevator_features; - struct blk_mq_tags *sched_shared_tags; struct list_head icq_list; @@ -633,15 +642,6 @@ static inline unsigned int disk_zone_no(struct gendisk *disk, sector_t sector) return sector >> ilog2(disk->queue->limits.chunk_sectors); } -static inline bool disk_zone_is_seq(struct gendisk *disk, sector_t sector) -{ - if (!blk_queue_is_zoned(disk->queue)) - return false; - if (!disk->conv_zones_bitmap) - return true; - return !test_bit(disk_zone_no(disk, sector), disk->conv_zones_bitmap); -} - static inline void disk_set_max_open_zones(struct gendisk *disk, unsigned int max_open_zones) { @@ -664,6 +664,7 @@ static inline unsigned int bdev_max_active_zones(struct block_device *bdev) return bdev->bd_disk->queue->limits.max_active_zones; } +bool blk_zone_plug_bio(struct bio *bio, unsigned int nr_segs); #else /* CONFIG_BLK_DEV_ZONED */ static inline unsigned int bdev_nr_zones(struct block_device *bdev) { @@ -674,10 +675,6 @@ static inline unsigned int disk_nr_zones(struct gendisk *disk) { return 0; } -static inline bool disk_zone_is_seq(struct gendisk *disk, sector_t sector) -{ - return false; -} static inline unsigned int disk_zone_no(struct gendisk *disk, sector_t sector) { return 0; @@ -691,6 +688,10 @@ static inline unsigned int bdev_max_active_zones(struct block_device *bdev) { return 0; } +static inline bool blk_zone_plug_bio(struct bio *bio, unsigned int nr_segs) +{ + return false; +} #endif /* CONFIG_BLK_DEV_ZONED */ static inline unsigned int blk_queue_depth(struct request_queue *q) @@ -855,9 +856,11 @@ static inline unsigned int bio_zone_no(struct bio *bio) return disk_zone_no(bio->bi_bdev->bd_disk, bio->bi_iter.bi_sector); } -static inline unsigned int bio_zone_is_seq(struct bio *bio) +static inline bool bio_straddles_zones(struct bio *bio) { - return disk_zone_is_seq(bio->bi_bdev->bd_disk, bio->bi_iter.bi_sector); + return bio_sectors(bio) && + bio_zone_no(bio) != + disk_zone_no(bio->bi_bdev->bd_disk, bio_end_sector(bio) - 1); } /* @@ -942,14 +945,6 @@ disk_alloc_independent_access_ranges(struct gendisk *disk, int nr_ia_ranges); void disk_set_independent_access_ranges(struct gendisk *disk, struct blk_independent_access_ranges *iars); -/* - * Elevator features for blk_queue_required_elevator_features: - */ -/* Supports zoned block devices sequential write constraint */ -#define ELEVATOR_F_ZBD_SEQ_WRITE (1U << 0) - -extern void blk_queue_required_elevator_features(struct request_queue *q, - unsigned int features); extern bool blk_queue_can_use_dma_map_merging(struct request_queue *q, struct device *dev); @@ -1156,12 +1151,29 @@ static inline unsigned int queue_max_segment_size(const struct request_queue *q) return q->limits.max_segment_size; } -static inline unsigned int queue_max_zone_append_sectors(const struct request_queue *q) +static inline unsigned int queue_limits_max_zone_append_sectors(struct queue_limits *l) { + unsigned int max_sectors = min(l->chunk_sectors, l->max_hw_sectors); - const struct queue_limits *l = &q->limits; + return min_not_zero(l->max_zone_append_sectors, max_sectors); +} + +static inline unsigned int queue_max_zone_append_sectors(struct request_queue *q) +{ + if (!blk_queue_is_zoned(q)) + return 0; + + return queue_limits_max_zone_append_sectors(&q->limits); +} + +static inline bool queue_emulates_zone_append(struct request_queue *q) +{ + return blk_queue_is_zoned(q) && !q->limits.max_zone_append_sectors; +} - return min(l->max_zone_append_sectors, l->max_sectors); +static inline bool bdev_emulates_zone_append(struct block_device *bdev) +{ + return queue_emulates_zone_append(bdev_get_queue(bdev)); } static inline unsigned int @@ -1303,18 +1315,6 @@ static inline unsigned int bdev_zone_no(struct block_device *bdev, sector_t sec) return disk_zone_no(bdev->bd_disk, sec); } -/* Whether write serialization is required for @op on zoned devices. */ -static inline bool op_needs_zoned_write_locking(enum req_op op) -{ - return op == REQ_OP_WRITE || op == REQ_OP_WRITE_ZEROES; -} - -static inline bool bdev_op_is_zoned_write(struct block_device *bdev, - enum req_op op) -{ - return bdev_is_zoned(bdev) && op_needs_zoned_write_locking(op); -} - static inline sector_t bdev_zone_sectors(struct block_device *bdev) { struct request_queue *q = bdev_get_queue(bdev); @@ -1330,6 +1330,12 @@ static inline sector_t bdev_offset_from_zone_start(struct block_device *bdev, return sector & (bdev_zone_sectors(bdev) - 1); } +static inline sector_t bio_offset_from_zone_start(struct bio *bio) +{ + return bdev_offset_from_zone_start(bio->bi_bdev, + bio->bi_iter.bi_sector); +} + static inline bool bdev_is_zone_start(struct block_device *bdev, sector_t sector) { |