summaryrefslogtreecommitdiff
path: root/include
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2024-05-13 13:03:54 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2024-05-13 13:03:54 -0700
commit0c9f4ac808b017a0013cee92a30de980550145d5 (patch)
tree94eedbb9ef4815df9dc8d1dd6424fc92a2fbcd7a /include
parent9961a785944601e32f185ea696347b22ffda634c (diff)
parenta3166c51702bb00b8f8b84022090cbab8f37be1a (diff)
Merge tag 'for-6.10/block-20240511' of git://git.kernel.dk/linux
Pull block updates from Jens Axboe: - Add a partscan attribute in sysfs, fixing an issue with systemd relying on an internal interface that went away. - Attempt #2 at making long running discards interruptible. The previous attempt went into 6.9, but we ended up mostly reverting it as it had issues. - Remove old ida_simple API in bcache - Support for zoned write plugging, greatly improving the performance on zoned devices. - Remove the old throttle low interface, which has been experimental since 2017 and never made it beyond that and isn't being used. - Remove page->index debugging checks in brd, as it hasn't caught anything and prepares us for removing in struct page. - MD pull request from Song - Don't schedule block workers on isolated CPUs * tag 'for-6.10/block-20240511' of git://git.kernel.dk/linux: (84 commits) blk-throttle: delay initialization until configuration blk-throttle: remove CONFIG_BLK_DEV_THROTTLING_LOW block: fix that util can be greater than 100% block: support to account io_ticks precisely block: add plug while submitting IO bcache: fix variable length array abuse in btree_iter bcache: Remove usage of the deprecated ida_simple_xx() API md: Revert "md: Fix overflow in is_mddev_idle" blk-lib: check for kill signal in ioctl BLKDISCARD block: add a bio_await_chain helper block: add a blk_alloc_discard_bio helper block: add a bio_chain_and_submit helper block: move discard checks into the ioctl handler block: remove the discard_granularity check in __blkdev_issue_discard block/ioctl: prefer different overflow check null_blk: Fix the WARNING: modpost: missing MODULE_DESCRIPTION() block: fix and simplify blkdevparts= cmdline parsing block: refine the EOF check in blkdev_iomap_begin block: add a partscan sysfs attribute for disks block: add a disk_has_partscan helper ...
Diffstat (limited to 'include')
-rw-r--r--include/linux/bio.h11
-rw-r--r--include/linux/blk-mq.h85
-rw-r--r--include/linux/blk_types.h30
-rw-r--r--include/linux/blkdev.h116
4 files changed, 86 insertions, 156 deletions
diff --git a/include/linux/bio.h b/include/linux/bio.h
index 875d792bffff..d5379548d684 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -615,6 +615,13 @@ static inline void bio_list_merge(struct bio_list *bl, struct bio_list *bl2)
bl->tail = bl2->tail;
}
+static inline void bio_list_merge_init(struct bio_list *bl,
+ struct bio_list *bl2)
+{
+ bio_list_merge(bl, bl2);
+ bio_list_init(bl2);
+}
+
static inline void bio_list_merge_head(struct bio_list *bl,
struct bio_list *bl2)
{
@@ -824,5 +831,9 @@ static inline void bio_clear_polled(struct bio *bio)
struct bio *blk_next_bio(struct bio *bio, struct block_device *bdev,
unsigned int nr_pages, blk_opf_t opf, gfp_t gfp);
+struct bio *bio_chain_and_submit(struct bio *prev, struct bio *new);
+
+struct bio *blk_alloc_discard_bio(struct block_device *bdev,
+ sector_t *sector, sector_t *nr_sects, gfp_t gfp_mask);
#endif /* __LINUX_BIO_H */
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index d3d8fd8e229b..89ba6b16fe8b 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -54,8 +54,8 @@ typedef __u32 __bitwise req_flags_t;
/* Look at ->special_vec for the actual data payload instead of the
bio chain. */
#define RQF_SPECIAL_PAYLOAD ((__force req_flags_t)(1 << 18))
-/* The per-zone write lock is held for this request */
-#define RQF_ZONE_WRITE_LOCKED ((__force req_flags_t)(1 << 19))
+/* The request completion needs to be signaled to zone write pluging. */
+#define RQF_ZONE_WRITE_PLUGGING ((__force req_flags_t)(1 << 20))
/* ->timeout has been called, don't expire again */
#define RQF_TIMED_OUT ((__force req_flags_t)(1 << 21))
#define RQF_RESV ((__force req_flags_t)(1 << 23))
@@ -1150,85 +1150,4 @@ static inline int blk_rq_map_sg(struct request_queue *q, struct request *rq,
}
void blk_dump_rq_flags(struct request *, char *);
-#ifdef CONFIG_BLK_DEV_ZONED
-static inline unsigned int blk_rq_zone_no(struct request *rq)
-{
- return disk_zone_no(rq->q->disk, blk_rq_pos(rq));
-}
-
-static inline unsigned int blk_rq_zone_is_seq(struct request *rq)
-{
- return disk_zone_is_seq(rq->q->disk, blk_rq_pos(rq));
-}
-
-/**
- * blk_rq_is_seq_zoned_write() - Check if @rq requires write serialization.
- * @rq: Request to examine.
- *
- * Note: REQ_OP_ZONE_APPEND requests do not require serialization.
- */
-static inline bool blk_rq_is_seq_zoned_write(struct request *rq)
-{
- return op_needs_zoned_write_locking(req_op(rq)) &&
- blk_rq_zone_is_seq(rq);
-}
-
-bool blk_req_needs_zone_write_lock(struct request *rq);
-bool blk_req_zone_write_trylock(struct request *rq);
-void __blk_req_zone_write_lock(struct request *rq);
-void __blk_req_zone_write_unlock(struct request *rq);
-
-static inline void blk_req_zone_write_lock(struct request *rq)
-{
- if (blk_req_needs_zone_write_lock(rq))
- __blk_req_zone_write_lock(rq);
-}
-
-static inline void blk_req_zone_write_unlock(struct request *rq)
-{
- if (rq->rq_flags & RQF_ZONE_WRITE_LOCKED)
- __blk_req_zone_write_unlock(rq);
-}
-
-static inline bool blk_req_zone_is_write_locked(struct request *rq)
-{
- return rq->q->disk->seq_zones_wlock &&
- test_bit(blk_rq_zone_no(rq), rq->q->disk->seq_zones_wlock);
-}
-
-static inline bool blk_req_can_dispatch_to_zone(struct request *rq)
-{
- if (!blk_req_needs_zone_write_lock(rq))
- return true;
- return !blk_req_zone_is_write_locked(rq);
-}
-#else /* CONFIG_BLK_DEV_ZONED */
-static inline bool blk_rq_is_seq_zoned_write(struct request *rq)
-{
- return false;
-}
-
-static inline bool blk_req_needs_zone_write_lock(struct request *rq)
-{
- return false;
-}
-
-static inline void blk_req_zone_write_lock(struct request *rq)
-{
-}
-
-static inline void blk_req_zone_write_unlock(struct request *rq)
-{
-}
-static inline bool blk_req_zone_is_write_locked(struct request *rq)
-{
- return false;
-}
-
-static inline bool blk_req_can_dispatch_to_zone(struct request *rq)
-{
- return true;
-}
-#endif /* CONFIG_BLK_DEV_ZONED */
-
#endif /* BLK_MQ_H */
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index c3e098b21c16..25dbf1097085 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -131,25 +131,13 @@ typedef u16 blk_short_t;
#define BLK_STS_DEV_RESOURCE ((__force blk_status_t)13)
/*
- * BLK_STS_ZONE_RESOURCE is returned from the driver to the block layer if zone
- * related resources are unavailable, but the driver can guarantee the queue
- * will be rerun in the future once the resources become available again.
- *
- * This is different from BLK_STS_DEV_RESOURCE in that it explicitly references
- * a zone specific resource and IO to a different zone on the same device could
- * still be served. Examples of that are zones that are write-locked, but a read
- * to the same zone could be served.
- */
-#define BLK_STS_ZONE_RESOURCE ((__force blk_status_t)14)
-
-/*
* BLK_STS_ZONE_OPEN_RESOURCE is returned from the driver in the completion
* path if the device returns a status indicating that too many zone resources
* are currently open. The same command should be successful if resubmitted
* after the number of open zones decreases below the device's limits, which is
* reported in the request_queue's max_open_zones.
*/
-#define BLK_STS_ZONE_OPEN_RESOURCE ((__force blk_status_t)15)
+#define BLK_STS_ZONE_OPEN_RESOURCE ((__force blk_status_t)14)
/*
* BLK_STS_ZONE_ACTIVE_RESOURCE is returned from the driver in the completion
@@ -158,20 +146,20 @@ typedef u16 blk_short_t;
* after the number of active zones decreases below the device's limits, which
* is reported in the request_queue's max_active_zones.
*/
-#define BLK_STS_ZONE_ACTIVE_RESOURCE ((__force blk_status_t)16)
+#define BLK_STS_ZONE_ACTIVE_RESOURCE ((__force blk_status_t)15)
/*
* BLK_STS_OFFLINE is returned from the driver when the target device is offline
* or is being taken offline. This could help differentiate the case where a
* device is intentionally being shut down from a real I/O error.
*/
-#define BLK_STS_OFFLINE ((__force blk_status_t)17)
+#define BLK_STS_OFFLINE ((__force blk_status_t)16)
/*
* BLK_STS_DURATION_LIMIT is returned from the driver when the target device
* aborted the command because it exceeded one of its Command Duration Limits.
*/
-#define BLK_STS_DURATION_LIMIT ((__force blk_status_t)18)
+#define BLK_STS_DURATION_LIMIT ((__force blk_status_t)17)
/**
* blk_path_error - returns true if error may be path related
@@ -228,7 +216,12 @@ struct bio {
struct bvec_iter bi_iter;
- blk_qc_t bi_cookie;
+ union {
+ /* for polled bios: */
+ blk_qc_t bi_cookie;
+ /* for plugged zoned writes only: */
+ unsigned int __bi_nr_segments;
+ };
bio_end_io_t *bi_end_io;
void *bi_private;
#ifdef CONFIG_BLK_CGROUP
@@ -298,7 +291,8 @@ enum {
BIO_QOS_THROTTLED, /* bio went through rq_qos throttle path */
BIO_QOS_MERGED, /* but went through rq_qos merge path */
BIO_REMAPPED,
- BIO_ZONE_WRITE_LOCKED, /* Owns a zoned device zone write lock */
+ BIO_ZONE_WRITE_PLUGGING, /* bio handled through zone write plugging */
+ BIO_EMULATES_ZONE_APPEND, /* bio emulates a zone append operation */
BIO_FLAG_LAST
};
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 69e7da33ca49..69c4f113db42 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -179,22 +179,21 @@ struct gendisk {
#ifdef CONFIG_BLK_DEV_ZONED
/*
- * Zoned block device information for request dispatch control.
- * nr_zones is the total number of zones of the device. This is always
- * 0 for regular block devices. conv_zones_bitmap is a bitmap of nr_zones
- * bits which indicates if a zone is conventional (bit set) or
- * sequential (bit clear). seq_zones_wlock is a bitmap of nr_zones
- * bits which indicates if a zone is write locked, that is, if a write
- * request targeting the zone was dispatched.
- *
- * Reads of this information must be protected with blk_queue_enter() /
- * blk_queue_exit(). Modifying this information is only allowed while
- * no requests are being processed. See also blk_mq_freeze_queue() and
- * blk_mq_unfreeze_queue().
+ * Zoned block device information. Reads of this information must be
+ * protected with blk_queue_enter() / blk_queue_exit(). Modifying this
+ * information is only allowed while no requests are being processed.
+ * See also blk_mq_freeze_queue() and blk_mq_unfreeze_queue().
*/
unsigned int nr_zones;
+ unsigned int zone_capacity;
unsigned long *conv_zones_bitmap;
- unsigned long *seq_zones_wlock;
+ unsigned int zone_wplugs_hash_bits;
+ spinlock_t zone_wplugs_lock;
+ struct mempool_s *zone_wplugs_pool;
+ struct hlist_head *zone_wplugs_hash;
+ struct list_head zone_wplugs_err_list;
+ struct work_struct zone_wplugs_work;
+ struct workqueue_struct *zone_wplugs_wq;
#endif /* CONFIG_BLK_DEV_ZONED */
#if IS_ENABLED(CONFIG_CDROM)
@@ -233,6 +232,19 @@ static inline unsigned int disk_openers(struct gendisk *disk)
return atomic_read(&disk->part0->bd_openers);
}
+/**
+ * disk_has_partscan - return %true if partition scanning is enabled on a disk
+ * @disk: disk to check
+ *
+ * Returns %true if partitions scanning is enabled for @disk, or %false if
+ * partition scanning is disabled either permanently or temporarily.
+ */
+static inline bool disk_has_partscan(struct gendisk *disk)
+{
+ return !(disk->flags & (GENHD_FL_NO_PART | GENHD_FL_HIDDEN)) &&
+ !test_bit(GD_SUPPRESS_PART_SCAN, &disk->state);
+}
+
/*
* The gendisk is refcounted by the part0 block_device, and the bd_device
* therein is also used for device model presentation in sysfs.
@@ -331,8 +343,7 @@ int blkdev_report_zones(struct block_device *bdev, sector_t sector,
unsigned int nr_zones, report_zones_cb cb, void *data);
int blkdev_zone_mgmt(struct block_device *bdev, enum req_op op,
sector_t sectors, sector_t nr_sectors);
-int blk_revalidate_disk_zones(struct gendisk *disk,
- void (*update_driver_data)(struct gendisk *disk));
+int blk_revalidate_disk_zones(struct gendisk *disk);
/*
* Independent access ranges: struct blk_independent_access_range describes
@@ -449,8 +460,6 @@ struct request_queue {
atomic_t nr_active_requests_shared_tags;
- unsigned int required_elevator_features;
-
struct blk_mq_tags *sched_shared_tags;
struct list_head icq_list;
@@ -633,15 +642,6 @@ static inline unsigned int disk_zone_no(struct gendisk *disk, sector_t sector)
return sector >> ilog2(disk->queue->limits.chunk_sectors);
}
-static inline bool disk_zone_is_seq(struct gendisk *disk, sector_t sector)
-{
- if (!blk_queue_is_zoned(disk->queue))
- return false;
- if (!disk->conv_zones_bitmap)
- return true;
- return !test_bit(disk_zone_no(disk, sector), disk->conv_zones_bitmap);
-}
-
static inline void disk_set_max_open_zones(struct gendisk *disk,
unsigned int max_open_zones)
{
@@ -664,6 +664,7 @@ static inline unsigned int bdev_max_active_zones(struct block_device *bdev)
return bdev->bd_disk->queue->limits.max_active_zones;
}
+bool blk_zone_plug_bio(struct bio *bio, unsigned int nr_segs);
#else /* CONFIG_BLK_DEV_ZONED */
static inline unsigned int bdev_nr_zones(struct block_device *bdev)
{
@@ -674,10 +675,6 @@ static inline unsigned int disk_nr_zones(struct gendisk *disk)
{
return 0;
}
-static inline bool disk_zone_is_seq(struct gendisk *disk, sector_t sector)
-{
- return false;
-}
static inline unsigned int disk_zone_no(struct gendisk *disk, sector_t sector)
{
return 0;
@@ -691,6 +688,10 @@ static inline unsigned int bdev_max_active_zones(struct block_device *bdev)
{
return 0;
}
+static inline bool blk_zone_plug_bio(struct bio *bio, unsigned int nr_segs)
+{
+ return false;
+}
#endif /* CONFIG_BLK_DEV_ZONED */
static inline unsigned int blk_queue_depth(struct request_queue *q)
@@ -855,9 +856,11 @@ static inline unsigned int bio_zone_no(struct bio *bio)
return disk_zone_no(bio->bi_bdev->bd_disk, bio->bi_iter.bi_sector);
}
-static inline unsigned int bio_zone_is_seq(struct bio *bio)
+static inline bool bio_straddles_zones(struct bio *bio)
{
- return disk_zone_is_seq(bio->bi_bdev->bd_disk, bio->bi_iter.bi_sector);
+ return bio_sectors(bio) &&
+ bio_zone_no(bio) !=
+ disk_zone_no(bio->bi_bdev->bd_disk, bio_end_sector(bio) - 1);
}
/*
@@ -942,14 +945,6 @@ disk_alloc_independent_access_ranges(struct gendisk *disk, int nr_ia_ranges);
void disk_set_independent_access_ranges(struct gendisk *disk,
struct blk_independent_access_ranges *iars);
-/*
- * Elevator features for blk_queue_required_elevator_features:
- */
-/* Supports zoned block devices sequential write constraint */
-#define ELEVATOR_F_ZBD_SEQ_WRITE (1U << 0)
-
-extern void blk_queue_required_elevator_features(struct request_queue *q,
- unsigned int features);
extern bool blk_queue_can_use_dma_map_merging(struct request_queue *q,
struct device *dev);
@@ -1156,12 +1151,29 @@ static inline unsigned int queue_max_segment_size(const struct request_queue *q)
return q->limits.max_segment_size;
}
-static inline unsigned int queue_max_zone_append_sectors(const struct request_queue *q)
+static inline unsigned int queue_limits_max_zone_append_sectors(struct queue_limits *l)
{
+ unsigned int max_sectors = min(l->chunk_sectors, l->max_hw_sectors);
- const struct queue_limits *l = &q->limits;
+ return min_not_zero(l->max_zone_append_sectors, max_sectors);
+}
+
+static inline unsigned int queue_max_zone_append_sectors(struct request_queue *q)
+{
+ if (!blk_queue_is_zoned(q))
+ return 0;
+
+ return queue_limits_max_zone_append_sectors(&q->limits);
+}
+
+static inline bool queue_emulates_zone_append(struct request_queue *q)
+{
+ return blk_queue_is_zoned(q) && !q->limits.max_zone_append_sectors;
+}
- return min(l->max_zone_append_sectors, l->max_sectors);
+static inline bool bdev_emulates_zone_append(struct block_device *bdev)
+{
+ return queue_emulates_zone_append(bdev_get_queue(bdev));
}
static inline unsigned int
@@ -1303,18 +1315,6 @@ static inline unsigned int bdev_zone_no(struct block_device *bdev, sector_t sec)
return disk_zone_no(bdev->bd_disk, sec);
}
-/* Whether write serialization is required for @op on zoned devices. */
-static inline bool op_needs_zoned_write_locking(enum req_op op)
-{
- return op == REQ_OP_WRITE || op == REQ_OP_WRITE_ZEROES;
-}
-
-static inline bool bdev_op_is_zoned_write(struct block_device *bdev,
- enum req_op op)
-{
- return bdev_is_zoned(bdev) && op_needs_zoned_write_locking(op);
-}
-
static inline sector_t bdev_zone_sectors(struct block_device *bdev)
{
struct request_queue *q = bdev_get_queue(bdev);
@@ -1330,6 +1330,12 @@ static inline sector_t bdev_offset_from_zone_start(struct block_device *bdev,
return sector & (bdev_zone_sectors(bdev) - 1);
}
+static inline sector_t bio_offset_from_zone_start(struct bio *bio)
+{
+ return bdev_offset_from_zone_start(bio->bi_bdev,
+ bio->bi_iter.bi_sector);
+}
+
static inline bool bdev_is_zone_start(struct block_device *bdev,
sector_t sector)
{