summaryrefslogtreecommitdiff
path: root/block
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2022-08-04 20:00:14 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2022-08-04 20:00:14 -0700
commitfa9db655d0e112c108fe838809608caf759bdf5e (patch)
tree899a983b333871688095fd14b413c199b9a38f73 /block
parente495274793ea602415d050452088a496abcd9e6c (diff)
parentbc792884b76f0da2f5c9a8d720e430e2de9756f5 (diff)
Merge tag 'for-5.20/block-2022-08-04' of git://git.kernel.dk/linux-block
Pull block driver updates from Jens Axboe: - NVMe pull requests via Christoph: - add support for In-Band authentication (Hannes Reinecke) - handle the persistent internal error AER (Michael Kelley) - use in-capsule data for TCP I/O queue connect (Caleb Sander) - remove timeout for getting RDMA-CM established event (Israel Rukshin) - misc cleanups (Joel Granados, Sagi Grimberg, Chaitanya Kulkarni, Guixin Liu, Xiang wangx) - use command_id instead of req->tag in trace_nvme_complete_rq() (Bean Huo) - various fixes for the new authentication code (Lukas Bulwahn, Dan Carpenter, Colin Ian King, Chaitanya Kulkarni, Hannes Reinecke) - small cleanups (Liu Song, Christoph Hellwig) - restore compat_ioctl support (Nick Bowler) - make a nvmet-tcp workqueue lockdep-safe (Sagi Grimberg) - enable generic interface (/dev/ngXnY) for unknown command sets (Joel Granados, Christoph Hellwig) - don't always build constants.o (Christoph Hellwig) - print the command name of aborted commands (Christoph Hellwig) - MD pull requests via Song: - Improve raid5 lock contention, by Logan Gunthorpe. - Misc fixes to raid5, by Logan Gunthorpe. - Fix race condition with md_reap_sync_thread(), by Guoqing Jiang. - Fix potential deadlock with raid5_quiesce and raid5_get_active_stripe, by Logan Gunthorpe. - Refactoring md_alloc(), by Christoph" - Fix md disk_name lifetime problems, by Christoph Hellwig - Convert prepare_to_wait() to wait_woken() api, by Logan Gunthorpe; - Fix sectors_to_do bitmap issue, by Logan Gunthorpe. - Work on unifying the null_blk module parameters and configfs API (Vincent) - drbd bitmap IO error fix (Lars) - Set of rnbd fixes (Guoqing, Md Haris) - Remove experimental marker on bcache async device registration (Coly) - Series from cleaning up the bio splitting (Christoph) - Removal of the sx8 block driver. This hardware never really widespread, and it didn't receive a lot of attention after the initial merge of it back in 2005 (Christoph) - A few fixes for s390 dasd (Eric, Jiang) - Followup set of fixes for ublk (Ming) - Support for UBLK_IO_NEED_GET_DATA for ublk (ZiyangZhang) - Fixes for the dio dma alignment (Keith) - Misc fixes and cleanups (Ming, Yu, Dan, Christophe * tag 'for-5.20/block-2022-08-04' of git://git.kernel.dk/linux-block: (136 commits) s390/dasd: Establish DMA alignment s390/dasd: drop unexpected word 'for' in comments ublk_drv: add support for UBLK_IO_NEED_GET_DATA ublk_cmd.h: add one new ublk command: UBLK_IO_NEED_GET_DATA ublk_drv: cleanup ublksrv_ctrl_dev_info ublk_drv: add SET_PARAMS/GET_PARAMS control command ublk_drv: fix ublk device leak in case that add_disk fails ublk_drv: cancel device even though disk isn't up block: fix leaking page ref on truncated direct io block: ensure bio_iov_add_page can't fail block: ensure iov_iter advances for added pages drivers:md:fix a potential use-after-free bug md/raid5: Ensure batch_last is released before sleeping for quiesce md/raid5: Move stripe_request_ctx up md/raid5: Drop unnecessary call to r5c_check_stripe_cache_usage() md/raid5: Make is_inactive_blocked() helper md/raid5: Refactor raid5_get_active_stripe() block: pass struct queue_limits to the bio splitting helpers block: move bio_allowed_max_sectors to blk-merge.c block: move the call to get_max_io_size out of blk_bio_segment_split ...
Diffstat (limited to 'block')
-rw-r--r--block/bio-integrity.c2
-rw-r--r--block/bio.c51
-rw-r--r--block/blk-core.c9
-rw-r--r--block/blk-merge.c185
-rw-r--r--block/blk-mq.c6
-rw-r--r--block/blk-sysfs.c2
-rw-r--r--block/blk.h47
-rw-r--r--block/bounce.c26
-rw-r--r--block/genhd.c8
9 files changed, 165 insertions, 171 deletions
diff --git a/block/bio-integrity.c b/block/bio-integrity.c
index 32929c89ba8a..3f5685c00e36 100644
--- a/block/bio-integrity.c
+++ b/block/bio-integrity.c
@@ -134,7 +134,7 @@ int bio_integrity_add_page(struct bio *bio, struct page *page,
iv = bip->bip_vec + bip->bip_vcnt;
if (bip->bip_vcnt &&
- bvec_gap_to_prev(bdev_get_queue(bio->bi_bdev),
+ bvec_gap_to_prev(&bdev_get_queue(bio->bi_bdev)->limits,
&bip->bip_vec[bip->bip_vcnt - 1], offset))
return 0;
diff --git a/block/bio.c b/block/bio.c
index 6f9f883f9a65..d6eb90d9b20b 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -965,7 +965,7 @@ int bio_add_hw_page(struct request_queue *q, struct bio *bio,
* would create a gap, disallow it.
*/
bvec = &bio->bi_io_vec[bio->bi_vcnt - 1];
- if (bvec_gap_to_prev(q, bvec, offset))
+ if (bvec_gap_to_prev(&q->limits, bvec, offset))
return 0;
}
@@ -1151,22 +1151,12 @@ void bio_iov_bvec_set(struct bio *bio, struct iov_iter *iter)
bio_set_flag(bio, BIO_CLONED);
}
-static void bio_put_pages(struct page **pages, size_t size, size_t off)
-{
- size_t i, nr = DIV_ROUND_UP(size + (off & ~PAGE_MASK), PAGE_SIZE);
-
- for (i = 0; i < nr; i++)
- put_page(pages[i]);
-}
-
static int bio_iov_add_page(struct bio *bio, struct page *page,
unsigned int len, unsigned int offset)
{
bool same_page = false;
if (!__bio_try_merge_page(bio, page, len, offset, &same_page)) {
- if (WARN_ON_ONCE(bio_full(bio, len)))
- return -EINVAL;
__bio_add_page(bio, page, len, offset);
return 0;
}
@@ -1209,8 +1199,9 @@ static int __bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter)
struct bio_vec *bv = bio->bi_io_vec + bio->bi_vcnt;
struct page **pages = (struct page **)bv;
ssize_t size, left;
- unsigned len, i;
+ unsigned len, i = 0;
size_t offset;
+ int ret = 0;
/*
* Move page array up in the allocated memory for the bio vecs as far as
@@ -1227,32 +1218,40 @@ static int __bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter)
* result to ensure the bio's total size is correct. The remainder of
* the iov data will be picked up in the next bio iteration.
*/
- size = iov_iter_get_pages(iter, pages, LONG_MAX, nr_pages, &offset);
- if (size > 0)
+ size = iov_iter_get_pages(iter, pages, UINT_MAX - bio->bi_iter.bi_size,
+ nr_pages, &offset);
+ if (size > 0) {
+ nr_pages = DIV_ROUND_UP(offset + size, PAGE_SIZE);
size = ALIGN_DOWN(size, bdev_logical_block_size(bio->bi_bdev));
- if (unlikely(size <= 0))
- return size ? size : -EFAULT;
+ } else
+ nr_pages = 0;
+
+ if (unlikely(size <= 0)) {
+ ret = size ? size : -EFAULT;
+ goto out;
+ }
for (left = size, i = 0; left > 0; left -= len, i++) {
struct page *page = pages[i];
- int ret;
len = min_t(size_t, PAGE_SIZE - offset, left);
- if (bio_op(bio) == REQ_OP_ZONE_APPEND)
+ if (bio_op(bio) == REQ_OP_ZONE_APPEND) {
ret = bio_iov_add_zone_append_page(bio, page, len,
offset);
- else
- ret = bio_iov_add_page(bio, page, len, offset);
+ if (ret)
+ break;
+ } else
+ bio_iov_add_page(bio, page, len, offset);
- if (ret) {
- bio_put_pages(pages + i, left, offset);
- return ret;
- }
offset = 0;
}
- iov_iter_advance(iter, size);
- return 0;
+ iov_iter_advance(iter, size - left);
+out:
+ while (i < nr_pages)
+ put_page(pages[i++]);
+
+ return ret;
}
/**
diff --git a/block/blk-core.c b/block/blk-core.c
index 3d286a256d3d..a0d1104c5590 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -377,7 +377,6 @@ static void blk_timeout_work(struct work_struct *work)
struct request_queue *blk_alloc_queue(int node_id, bool alloc_srcu)
{
struct request_queue *q;
- int ret;
q = kmem_cache_alloc_node(blk_get_queue_kmem_cache(alloc_srcu),
GFP_KERNEL | __GFP_ZERO, node_id);
@@ -396,13 +395,9 @@ struct request_queue *blk_alloc_queue(int node_id, bool alloc_srcu)
if (q->id < 0)
goto fail_srcu;
- ret = bioset_init(&q->bio_split, BIO_POOL_SIZE, 0, 0);
- if (ret)
- goto fail_id;
-
q->stats = blk_alloc_queue_stats();
if (!q->stats)
- goto fail_split;
+ goto fail_id;
q->node = node_id;
@@ -439,8 +434,6 @@ struct request_queue *blk_alloc_queue(int node_id, bool alloc_srcu)
fail_stats:
blk_free_queue_stats(q->stats);
-fail_split:
- bioset_exit(&q->bio_split);
fail_id:
ida_free(&blk_queue_ida, q->id);
fail_srcu:
diff --git a/block/blk-merge.c b/block/blk-merge.c
index 4c8a699754c9..ff04e9290715 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -82,7 +82,7 @@ static inline bool bio_will_gap(struct request_queue *q,
bio_get_first_bvec(next, &nb);
if (biovec_phys_mergeable(q, &pb, &nb))
return false;
- return __bvec_gap_to_prev(q, &pb, nb.bv_offset);
+ return __bvec_gap_to_prev(&q->limits, &pb, nb.bv_offset);
}
static inline bool req_gap_back_merge(struct request *req, struct bio *bio)
@@ -95,23 +95,30 @@ static inline bool req_gap_front_merge(struct request *req, struct bio *bio)
return bio_will_gap(req->q, NULL, bio, req->bio);
}
-static struct bio *blk_bio_discard_split(struct request_queue *q,
- struct bio *bio,
- struct bio_set *bs,
- unsigned *nsegs)
+/*
+ * The max size one bio can handle is UINT_MAX becasue bvec_iter.bi_size
+ * is defined as 'unsigned int', meantime it has to be aligned to with the
+ * logical block size, which is the minimum accepted unit by hardware.
+ */
+static unsigned int bio_allowed_max_sectors(struct queue_limits *lim)
+{
+ return round_down(UINT_MAX, lim->logical_block_size) >> SECTOR_SHIFT;
+}
+
+static struct bio *bio_split_discard(struct bio *bio, struct queue_limits *lim,
+ unsigned *nsegs, struct bio_set *bs)
{
unsigned int max_discard_sectors, granularity;
- int alignment;
sector_t tmp;
unsigned split_sectors;
*nsegs = 1;
/* Zero-sector (unknown) and one-sector granularities are the same. */
- granularity = max(q->limits.discard_granularity >> 9, 1U);
+ granularity = max(lim->discard_granularity >> 9, 1U);
- max_discard_sectors = min(q->limits.max_discard_sectors,
- bio_allowed_max_sectors(q));
+ max_discard_sectors =
+ min(lim->max_discard_sectors, bio_allowed_max_sectors(lim));
max_discard_sectors -= max_discard_sectors % granularity;
if (unlikely(!max_discard_sectors)) {
@@ -128,9 +135,8 @@ static struct bio *blk_bio_discard_split(struct request_queue *q,
* If the next starting sector would be misaligned, stop the discard at
* the previous aligned sector.
*/
- alignment = (q->limits.discard_alignment >> 9) % granularity;
-
- tmp = bio->bi_iter.bi_sector + split_sectors - alignment;
+ tmp = bio->bi_iter.bi_sector + split_sectors -
+ ((lim->discard_alignment >> 9) % granularity);
tmp = sector_div(tmp, granularity);
if (split_sectors > tmp)
@@ -139,18 +145,15 @@ static struct bio *blk_bio_discard_split(struct request_queue *q,
return bio_split(bio, split_sectors, GFP_NOIO, bs);
}
-static struct bio *blk_bio_write_zeroes_split(struct request_queue *q,
- struct bio *bio, struct bio_set *bs, unsigned *nsegs)
+static struct bio *bio_split_write_zeroes(struct bio *bio,
+ struct queue_limits *lim, unsigned *nsegs, struct bio_set *bs)
{
*nsegs = 0;
-
- if (!q->limits.max_write_zeroes_sectors)
+ if (!lim->max_write_zeroes_sectors)
return NULL;
-
- if (bio_sectors(bio) <= q->limits.max_write_zeroes_sectors)
+ if (bio_sectors(bio) <= lim->max_write_zeroes_sectors)
return NULL;
-
- return bio_split(bio, q->limits.max_write_zeroes_sectors, GFP_NOIO, bs);
+ return bio_split(bio, lim->max_write_zeroes_sectors, GFP_NOIO, bs);
}
/*
@@ -161,17 +164,17 @@ static struct bio *blk_bio_write_zeroes_split(struct request_queue *q,
* requests that are submitted to a block device if the start of a bio is not
* aligned to a physical block boundary.
*/
-static inline unsigned get_max_io_size(struct request_queue *q,
- struct bio *bio)
+static inline unsigned get_max_io_size(struct bio *bio,
+ struct queue_limits *lim)
{
- unsigned pbs = queue_physical_block_size(q) >> SECTOR_SHIFT;
- unsigned lbs = queue_logical_block_size(q) >> SECTOR_SHIFT;
- unsigned max_sectors = queue_max_sectors(q), start, end;
+ unsigned pbs = lim->physical_block_size >> SECTOR_SHIFT;
+ unsigned lbs = lim->logical_block_size >> SECTOR_SHIFT;
+ unsigned max_sectors = lim->max_sectors, start, end;
- if (q->limits.chunk_sectors) {
+ if (lim->chunk_sectors) {
max_sectors = min(max_sectors,
blk_chunk_sectors_left(bio->bi_iter.bi_sector,
- q->limits.chunk_sectors));
+ lim->chunk_sectors));
}
start = bio->bi_iter.bi_sector & (pbs - 1);
@@ -181,11 +184,10 @@ static inline unsigned get_max_io_size(struct request_queue *q,
return max_sectors & ~(lbs - 1);
}
-static inline unsigned get_max_segment_size(const struct request_queue *q,
- struct page *start_page,
- unsigned long offset)
+static inline unsigned get_max_segment_size(struct queue_limits *lim,
+ struct page *start_page, unsigned long offset)
{
- unsigned long mask = queue_segment_boundary(q);
+ unsigned long mask = lim->seg_boundary_mask;
offset = mask & (page_to_phys(start_page) + offset);
@@ -194,12 +196,12 @@ static inline unsigned get_max_segment_size(const struct request_queue *q,
* on 32bit arch, use queue's max segment size when that happens.
*/
return min_not_zero(mask - offset + 1,
- (unsigned long)queue_max_segment_size(q));
+ (unsigned long)lim->max_segment_size);
}
/**
* bvec_split_segs - verify whether or not a bvec should be split in the middle
- * @q: [in] request queue associated with the bio associated with @bv
+ * @lim: [in] queue limits to split based on
* @bv: [in] bvec to examine
* @nsegs: [in,out] Number of segments in the bio being built. Incremented
* by the number of segments from @bv that may be appended to that
@@ -217,10 +219,9 @@ static inline unsigned get_max_segment_size(const struct request_queue *q,
* *@nsegs segments and *@sectors sectors would make that bio unacceptable for
* the block driver.
*/
-static bool bvec_split_segs(const struct request_queue *q,
- const struct bio_vec *bv, unsigned *nsegs,
- unsigned *bytes, unsigned max_segs,
- unsigned max_bytes)
+static bool bvec_split_segs(struct queue_limits *lim, const struct bio_vec *bv,
+ unsigned *nsegs, unsigned *bytes, unsigned max_segs,
+ unsigned max_bytes)
{
unsigned max_len = min(max_bytes, UINT_MAX) - *bytes;
unsigned len = min(bv->bv_len, max_len);
@@ -228,7 +229,7 @@ static bool bvec_split_segs(const struct request_queue *q,
unsigned seg_size = 0;
while (len && *nsegs < max_segs) {
- seg_size = get_max_segment_size(q, bv->bv_page,
+ seg_size = get_max_segment_size(lim, bv->bv_page,
bv->bv_offset + total_len);
seg_size = min(seg_size, len);
@@ -236,7 +237,7 @@ static bool bvec_split_segs(const struct request_queue *q,
total_len += seg_size;
len -= seg_size;
- if ((bv->bv_offset + total_len) & queue_virt_boundary(q))
+ if ((bv->bv_offset + total_len) & lim->virt_boundary_mask)
break;
}
@@ -247,16 +248,17 @@ static bool bvec_split_segs(const struct request_queue *q,
}
/**
- * blk_bio_segment_split - split a bio in two bios
- * @q: [in] request queue pointer
+ * bio_split_rw - split a bio in two bios
* @bio: [in] bio to be split
- * @bs: [in] bio set to allocate the clone from
+ * @lim: [in] queue limits to split based on
* @segs: [out] number of segments in the bio with the first half of the sectors
+ * @bs: [in] bio set to allocate the clone from
+ * @max_bytes: [in] maximum number of bytes per bio
*
* Clone @bio, update the bi_iter of the clone to represent the first sectors
* of @bio and update @bio->bi_iter to represent the remaining sectors. The
* following is guaranteed for the cloned bio:
- * - That it has at most get_max_io_size(@q, @bio) sectors.
+ * - That it has at most @max_bytes worth of data
* - That it has at most queue_max_segments(@q) segments.
*
* Except for discard requests the cloned bio will point at the bi_io_vec of
@@ -265,33 +267,30 @@ static bool bvec_split_segs(const struct request_queue *q,
* responsible for ensuring that @bs is only destroyed after processing of the
* split bio has finished.
*/
-static struct bio *blk_bio_segment_split(struct request_queue *q,
- struct bio *bio,
- struct bio_set *bs,
- unsigned *segs)
+static struct bio *bio_split_rw(struct bio *bio, struct queue_limits *lim,
+ unsigned *segs, struct bio_set *bs, unsigned max_bytes)
{
struct bio_vec bv, bvprv, *bvprvp = NULL;
struct bvec_iter iter;
unsigned nsegs = 0, bytes = 0;
- const unsigned max_bytes = get_max_io_size(q, bio) << 9;
- const unsigned max_segs = queue_max_segments(q);
bio_for_each_bvec(bv, bio, iter) {
/*
* If the queue doesn't support SG gaps and adding this
* offset would create a gap, disallow it.
*/
- if (bvprvp && bvec_gap_to_prev(q, bvprvp, bv.bv_offset))
+ if (bvprvp && bvec_gap_to_prev(lim, bvprvp, bv.bv_offset))
goto split;
- if (nsegs < max_segs &&
+ if (nsegs < lim->max_segments &&
bytes + bv.bv_len <= max_bytes &&
bv.bv_offset + bv.bv_len <= PAGE_SIZE) {
nsegs++;
bytes += bv.bv_len;
- } else if (bvec_split_segs(q, &bv, &nsegs, &bytes, max_segs,
- max_bytes)) {
- goto split;
+ } else {
+ if (bvec_split_segs(lim, &bv, &nsegs, &bytes,
+ lim->max_segments, max_bytes))
+ goto split;
}
bvprv = bv;
@@ -308,7 +307,7 @@ split:
* split size so that each bio is properly block size aligned, even if
* we do not use the full hardware limits.
*/
- bytes = ALIGN_DOWN(bytes, queue_logical_block_size(q));
+ bytes = ALIGN_DOWN(bytes, lim->logical_block_size);
/*
* Bio splitting may cause subtle trouble such as hang when doing sync
@@ -320,34 +319,35 @@ split:
}
/**
- * __blk_queue_split - split a bio and submit the second half
- * @q: [in] request_queue new bio is being queued at
- * @bio: [in, out] bio to be split
- * @nr_segs: [out] number of segments in the first bio
+ * __bio_split_to_limits - split a bio to fit the queue limits
+ * @bio: bio to be split
+ * @lim: queue limits to split based on
+ * @nr_segs: returns the number of segments in the returned bio
+ *
+ * Check if @bio needs splitting based on the queue limits, and if so split off
+ * a bio fitting the limits from the beginning of @bio and return it. @bio is
+ * shortened to the remainder and re-submitted.
*
- * Split a bio into two bios, chain the two bios, submit the second half and
- * store a pointer to the first half in *@bio. If the second bio is still too
- * big it will be split by a recursive call to this function. Since this
- * function may allocate a new bio from q->bio_split, it is the responsibility
- * of the caller to ensure that q->bio_split is only released after processing
- * of the split bio has finished.
+ * The split bio is allocated from @q->bio_split, which is provided by the
+ * block layer.
*/
-void __blk_queue_split(struct request_queue *q, struct bio **bio,
+struct bio *__bio_split_to_limits(struct bio *bio, struct queue_limits *lim,
unsigned int *nr_segs)
{
- struct bio *split = NULL;
+ struct bio_set *bs = &bio->bi_bdev->bd_disk->bio_split;
+ struct bio *split;
- switch (bio_op(*bio)) {
+ switch (bio_op(bio)) {
case REQ_OP_DISCARD:
case REQ_OP_SECURE_ERASE:
- split = blk_bio_discard_split(q, *bio, &q->bio_split, nr_segs);
+ split = bio_split_discard(bio, lim, nr_segs, bs);
break;
case REQ_OP_WRITE_ZEROES:
- split = blk_bio_write_zeroes_split(q, *bio, &q->bio_split,
- nr_segs);
+ split = bio_split_write_zeroes(bio, lim, nr_segs, bs);
break;
default:
- split = blk_bio_segment_split(q, *bio, &q->bio_split, nr_segs);
+ split = bio_split_rw(bio, lim, nr_segs, bs,
+ get_max_io_size(bio, lim) << SECTOR_SHIFT);
break;
}
@@ -356,32 +356,35 @@ void __blk_queue_split(struct request_queue *q, struct bio **bio,
split->bi_opf |= REQ_NOMERGE;
blkcg_bio_issue_init(split);
- bio_chain(split, *bio);
- trace_block_split(split, (*bio)->bi_iter.bi_sector);
- submit_bio_noacct(*bio);
- *bio = split;
+ bio_chain(split, bio);
+ trace_block_split(split, bio->bi_iter.bi_sector);
+ submit_bio_noacct(bio);
+ return split;
}
+ return bio;
}
/**
- * blk_queue_split - split a bio and submit the second half
- * @bio: [in, out] bio to be split
+ * bio_split_to_limits - split a bio to fit the queue limits
+ * @bio: bio to be split
+ *
+ * Check if @bio needs splitting based on the queue limits of @bio->bi_bdev, and
+ * if so split off a bio fitting the limits from the beginning of @bio and
+ * return it. @bio is shortened to the remainder and re-submitted.
*
- * Split a bio into two bios, chains the two bios, submit the second half and
- * store a pointer to the first half in *@bio. Since this function may allocate
- * a new bio from q->bio_split, it is the responsibility of the caller to ensure
- * that q->bio_split is only released after processing of the split bio has
- * finished.
+ * The split bio is allocated from @q->bio_split, which is provided by the
+ * block layer.
*/
-void blk_queue_split(struct bio **bio)
+struct bio *bio_split_to_limits(struct bio *bio)
{
- struct request_queue *q = bdev_get_queue((*bio)->bi_bdev);
+ struct queue_limits *lim = &bdev_get_queue(bio->bi_bdev)->limits;
unsigned int nr_segs;
- if (blk_may_split(q, *bio))
- __blk_queue_split(q, bio, &nr_segs);
+ if (bio_may_exceed_limits(bio, lim))
+ return __bio_split_to_limits(bio, lim, &nr_segs);
+ return bio;
}
-EXPORT_SYMBOL(blk_queue_split);
+EXPORT_SYMBOL(bio_split_to_limits);
unsigned int blk_recalc_rq_segments(struct request *rq)
{
@@ -411,7 +414,7 @@ unsigned int blk_recalc_rq_segments(struct request *rq)
}
rq_for_each_bvec(bv, rq, iter)
- bvec_split_segs(rq->q, &bv, &nr_phys_segs, &bytes,
+ bvec_split_segs(&rq->q->limits, &bv, &nr_phys_segs, &bytes,
UINT_MAX, UINT_MAX);
return nr_phys_segs;
}
@@ -442,8 +445,8 @@ static unsigned blk_bvec_map_sg(struct request_queue *q,
while (nbytes > 0) {
unsigned offset = bvec->bv_offset + total;
- unsigned len = min(get_max_segment_size(q, bvec->bv_page,
- offset), nbytes);
+ unsigned len = min(get_max_segment_size(&q->limits,
+ bvec->bv_page, offset), nbytes);
struct page *page = bvec->bv_page;
/*
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 70177ee74295..5ee62b95f3e5 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -2815,9 +2815,9 @@ void blk_mq_submit_bio(struct bio *bio)
unsigned int nr_segs = 1;
blk_status_t ret;
- blk_queue_bounce(q, &bio);
- if (blk_may_split(q, bio))
- __blk_queue_split(q, &bio, &nr_segs);
+ bio = blk_queue_bounce(bio, q);
+ if (bio_may_exceed_limits(bio, &q->limits))
+ bio = __bio_split_to_limits(bio, &q->limits, &nr_segs);
if (!bio_integrity_prep(bio))
return;
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index c0303026752d..e1f009aba6fd 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -779,8 +779,6 @@ static void blk_release_queue(struct kobject *kobj)
if (queue_is_mq(q))
blk_mq_release(q);
- bioset_exit(&q->bio_split);
-
if (blk_queue_has_srcu(q))
cleanup_srcu_struct(q->srcu);
diff --git a/block/blk.h b/block/blk.h
index 1d83b1d41cd1..d7142c4d2fef 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -97,23 +97,23 @@ static inline bool biovec_phys_mergeable(struct request_queue *q,
return true;
}
-static inline bool __bvec_gap_to_prev(struct request_queue *q,
+static inline bool __bvec_gap_to_prev(struct queue_limits *lim,
struct bio_vec *bprv, unsigned int offset)
{
- return (offset & queue_virt_boundary(q)) ||
- ((bprv->bv_offset + bprv->bv_len) & queue_virt_boundary(q));
+ return (offset & lim->virt_boundary_mask) ||
+ ((bprv->bv_offset + bprv->bv_len) & lim->virt_boundary_mask);
}
/*
* Check if adding a bio_vec after bprv with offset would create a gap in
* the SG list. Most drivers don't care about this, but some do.
*/
-static inline bool bvec_gap_to_prev(struct request_queue *q,
+static inline bool bvec_gap_to_prev(struct queue_limits *lim,
struct bio_vec *bprv, unsigned int offset)
{
- if (!queue_virt_boundary(q))
+ if (!lim->virt_boundary_mask)
return false;
- return __bvec_gap_to_prev(q, bprv, offset);
+ return __bvec_gap_to_prev(lim, bprv, offset);
}
static inline bool rq_mergeable(struct request *rq)
@@ -189,7 +189,8 @@ static inline bool integrity_req_gap_back_merge(struct request *req,
struct bio_integrity_payload *bip = bio_integrity(req->bio);
struct bio_integrity_payload *bip_next = bio_integrity(next);
- return bvec_gap_to_prev(req->q, &bip->bip_vec[bip->bip_vcnt - 1],
+ return bvec_gap_to_prev(&req->q->limits,
+ &bip->bip_vec[bip->bip_vcnt - 1],
bip_next->bip_vec[0].bv_offset);
}
@@ -199,7 +200,8 @@ static inline bool integrity_req_gap_front_merge(struct request *req,
struct bio_integrity_payload *bip = bio_integrity(bio);
struct bio_integrity_payload *bip_next = bio_integrity(req->bio);
- return bvec_gap_to_prev(req->q, &bip->bip_vec[bip->bip_vcnt - 1],
+ return bvec_gap_to_prev(&req->q->limits,
+ &bip->bip_vec[bip->bip_vcnt - 1],
bip_next->bip_vec[0].bv_offset);
}
@@ -288,7 +290,8 @@ ssize_t part_timeout_show(struct device *, struct device_attribute *, char *);
ssize_t part_timeout_store(struct device *, struct device_attribute *,
const char *, size_t);
-static inline bool blk_may_split(struct request_queue *q, struct bio *bio)
+static inline bool bio_may_exceed_limits(struct bio *bio,
+ struct queue_limits *lim)
{
switch (bio_op(bio)) {
case REQ_OP_DISCARD:
@@ -307,12 +310,12 @@ static inline bool blk_may_split(struct request_queue *q, struct bio *bio)
* to the performance impact of cloned bios themselves the loop below
* doesn't matter anyway.
*/
- return q->limits.chunk_sectors || bio->bi_vcnt != 1 ||
+ return lim->chunk_sectors || bio->bi_vcnt != 1 ||
bio->bi_io_vec->bv_len + bio->bi_io_vec->bv_offset > PAGE_SIZE;
}
-void __blk_queue_split(struct request_queue *q, struct bio **bio,
- unsigned int *nr_segs);
+struct bio *__bio_split_to_limits(struct bio *bio, struct queue_limits *lim,
+ unsigned int *nr_segs);
int ll_back_merge_fn(struct request *req, struct bio *bio,
unsigned int nr_segs);
bool blk_attempt_req_merge(struct request_queue *q, struct request *rq,
@@ -345,16 +348,6 @@ static inline void req_set_nomerge(struct request_queue *q, struct request *req)
}
/*
- * The max size one bio can handle is UINT_MAX becasue bvec_iter.bi_size
- * is defined as 'unsigned int', meantime it has to aligned to with logical
- * block size which is the minimum accepted unit by hardware.
- */
-static inline unsigned int bio_allowed_max_sectors(struct request_queue *q)
-{
- return round_down(UINT_MAX, queue_logical_block_size(q)) >> 9;
-}
-
-/*
* Internal io_context interface
*/
struct io_cq *ioc_find_get_icq(struct request_queue *q);
@@ -378,7 +371,7 @@ static inline void blk_throtl_bio_endio(struct bio *bio) { }
static inline void blk_throtl_stat_add(struct request *rq, u64 time) { }
#endif
-void __blk_queue_bounce(struct request_queue *q, struct bio **bio);
+struct bio *__blk_queue_bounce(struct bio *bio, struct request_queue *q);
static inline bool blk_queue_may_bounce(struct request_queue *q)
{
@@ -387,10 +380,12 @@ static inline bool blk_queue_may_bounce(struct request_queue *q)
max_low_pfn >= max_pfn;
}
-static inline void blk_queue_bounce(struct request_queue *q, struct bio **bio)
+static inline struct bio *blk_queue_bounce(struct bio *bio,
+ struct request_queue *q)
{
- if (unlikely(blk_queue_may_bounce(q) && bio_has_data(*bio)))
- __blk_queue_bounce(q, bio);
+ if (unlikely(blk_queue_may_bounce(q) && bio_has_data(bio)))
+ return __blk_queue_bounce(bio, q);
+ return bio;
}
#ifdef CONFIG_BLK_CGROUP_IOLATENCY
diff --git a/block/bounce.c b/block/bounce.c
index c8f487af7be3..7cfcb242f9a1 100644
--- a/block/bounce.c
+++ b/block/bounce.c
@@ -199,24 +199,24 @@ err_put:
return NULL;
}
-void __blk_queue_bounce(struct request_queue *q, struct bio **bio_orig)
+struct bio *__blk_queue_bounce(struct bio *bio_orig, struct request_queue *q)
{
struct bio *bio;
- int rw = bio_data_dir(*bio_orig);
+ int rw = bio_data_dir(bio_orig);
struct bio_vec *to, from;
struct bvec_iter iter;
unsigned i = 0, bytes = 0;
bool bounce = false;
int sectors;
- bio_for_each_segment(from, *bio_orig, iter) {
+ bio_for_each_segment(from, bio_orig, iter) {
if (i++ < BIO_MAX_VECS)
bytes += from.bv_len;
if (PageHighMem(from.bv_page))
bounce = true;
}
if (!bounce)
- return;
+ return bio_orig;
/*
* Individual bvecs might not be logical block aligned. Round down
@@ -225,13 +225,13 @@ void __blk_queue_bounce(struct request_queue *q, struct bio **bio_orig)
*/
sectors = ALIGN_DOWN(bytes, queue_logical_block_size(q)) >>
SECTOR_SHIFT;
- if (sectors < bio_sectors(*bio_orig)) {
- bio = bio_split(*bio_orig, sectors, GFP_NOIO, &bounce_bio_split);
- bio_chain(bio, *bio_orig);
- submit_bio_noacct(*bio_orig);
- *bio_orig = bio;
+ if (sectors < bio_sectors(bio_orig)) {
+ bio = bio_split(bio_orig, sectors, GFP_NOIO, &bounce_bio_split);
+ bio_chain(bio, bio_orig);
+ submit_bio_noacct(bio_orig);
+ bio_orig = bio;
}
- bio = bounce_clone_bio(*bio_orig);
+ bio = bounce_clone_bio(bio_orig);
/*
* Bvec table can't be updated by bio_for_each_segment_all(),
@@ -254,7 +254,7 @@ void __blk_queue_bounce(struct request_queue *q, struct bio **bio_orig)
to->bv_page = bounce_page;
}
- trace_block_bio_bounce(*bio_orig);
+ trace_block_bio_bounce(bio_orig);
bio->bi_flags |= (1 << BIO_BOUNCED);
@@ -263,6 +263,6 @@ void __blk_queue_bounce(struct request_queue *q, struct bio **bio_orig)
else
bio->bi_end_io = bounce_end_io_write;
- bio->bi_private = *bio_orig;
- *bio_orig = bio;
+ bio->bi_private = bio_orig;
+ return bio;
}
diff --git a/block/genhd.c b/block/genhd.c
index e1d5b10ac193..b901fea1d55a 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -1151,6 +1151,7 @@ static void disk_release(struct device *dev)
blk_mq_exit_queue(disk->queue);
blkcg_exit_queue(disk->queue);
+ bioset_exit(&disk->bio_split);
disk_release_events(disk);
kfree(disk->random);
@@ -1342,9 +1343,12 @@ struct gendisk *__alloc_disk_node(struct request_queue *q, int node_id,
if (!disk)
goto out_put_queue;
+ if (bioset_init(&disk->bio_split, BIO_POOL_SIZE, 0, 0))
+ goto out_free_disk;
+
disk->bdi = bdi_alloc(node_id);
if (!disk->bdi)
- goto out_free_disk;
+ goto out_free_bioset;
/* bdev_alloc() might need the queue, set before the first call */
disk->queue = q;
@@ -1382,6 +1386,8 @@ out_destroy_part_tbl:
iput(disk->part0->bd_inode);
out_free_bdi:
bdi_put(disk->bdi);
+out_free_bioset:
+ bioset_exit(&disk->bio_split);
out_free_disk:
kfree(disk);
out_put_queue: