From e73a625bc24880f1fe5abaa89bb63e0918fbd66c Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 28 Sep 2022 09:19:59 -0600 Subject: block: kill deprecated BUG_ON() in the flush handling We've never had any useful reports from this BUG_ON(), and in fact a number of the BUG_ON()'s in the flush handling need to be turned into more graceful handling. In preparation for allowing batched completions of the end_io handling, where we can enter the flush completion with queuelist having been reused for the batch, get rid of this BUG_ON(). Signed-off-by: Jens Axboe --- block/blk-flush.c | 1 - 1 file changed, 1 deletion(-) (limited to 'block') diff --git a/block/blk-flush.c b/block/blk-flush.c index d20a0c6b2c66..27705fc584a0 100644 --- a/block/blk-flush.c +++ b/block/blk-flush.c @@ -205,7 +205,6 @@ static void blk_flush_complete_seq(struct request *rq, * flush data request completion path. Restore @rq for * normal completion and end it. */ - BUG_ON(!list_empty(&rq->queuelist)); list_del_init(&rq->flush.list); blk_flush_restore_request(rq); blk_mq_end_request(rq, error); -- cgit v1.2.3-58-ga151 From 4b6a5d9cea911424e84107df8c4eb8317938d2cd Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 21 Sep 2022 08:22:09 -0600 Subject: block: enable batched allocation for blk_mq_alloc_request() The filesystem IO path can take advantage of allocating batches of requests, if the underlying submitter tells the block layer about it through the blk_plug. For passthrough IO, the exported API is the blk_mq_alloc_request() helper, and that one does not allow for request caching. Wire up request caching for blk_mq_alloc_request(), which is generally done without having a bio available upfront. Tested-by: Anuj Gupta Reviewed-by: Keith Busch Signed-off-by: Jens Axboe --- block/blk-mq.c | 80 +++++++++++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 71 insertions(+), 9 deletions(-) (limited to 'block') diff --git a/block/blk-mq.c b/block/blk-mq.c index 83492d942348..b32f70f38c6e 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -510,25 +510,87 @@ retry: alloc_time_ns); } -struct request *blk_mq_alloc_request(struct request_queue *q, blk_opf_t opf, - blk_mq_req_flags_t flags) +static struct request *blk_mq_rq_cache_fill(struct request_queue *q, + struct blk_plug *plug, + blk_opf_t opf, + blk_mq_req_flags_t flags) { struct blk_mq_alloc_data data = { .q = q, .flags = flags, .cmd_flags = opf, - .nr_tags = 1, + .nr_tags = plug->nr_ios, + .cached_rq = &plug->cached_rq, }; struct request *rq; - int ret; - ret = blk_queue_enter(q, flags); - if (ret) - return ERR_PTR(ret); + if (blk_queue_enter(q, flags)) + return NULL; + + plug->nr_ios = 1; rq = __blk_mq_alloc_requests(&data); - if (!rq) - goto out_queue_exit; + if (unlikely(!rq)) + blk_queue_exit(q); + return rq; +} + +static struct request *blk_mq_alloc_cached_request(struct request_queue *q, + blk_opf_t opf, + blk_mq_req_flags_t flags) +{ + struct blk_plug *plug = current->plug; + struct request *rq; + + if (!plug) + return NULL; + if (rq_list_empty(plug->cached_rq)) { + if (plug->nr_ios == 1) + return NULL; + rq = blk_mq_rq_cache_fill(q, plug, opf, flags); + if (rq) + goto got_it; + return NULL; + } + rq = rq_list_peek(&plug->cached_rq); + if (!rq || rq->q != q) + return NULL; + + if (blk_mq_get_hctx_type(opf) != rq->mq_hctx->type) + return NULL; + if (op_is_flush(rq->cmd_flags) != op_is_flush(opf)) + return NULL; + + plug->cached_rq = rq_list_next(rq); +got_it: + rq->cmd_flags = opf; + INIT_LIST_HEAD(&rq->queuelist); + return rq; +} + +struct request *blk_mq_alloc_request(struct request_queue *q, blk_opf_t opf, + blk_mq_req_flags_t flags) +{ + struct request *rq; + + rq = blk_mq_alloc_cached_request(q, opf, flags); + if (!rq) { + struct blk_mq_alloc_data data = { + .q = q, + .flags = flags, + .cmd_flags = opf, + .nr_tags = 1, + }; + int ret; + + ret = blk_queue_enter(q, flags); + if (ret) + return ERR_PTR(ret); + + rq = __blk_mq_alloc_requests(&data); + if (!rq) + goto out_queue_exit; + } rq->__data_len = 0; rq->__sector = (sector_t) -1; rq->bio = rq->biotail = NULL; -- cgit v1.2.3-58-ga151 From de671d6116b5210097cd6fbb877bac92536f265b Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 21 Sep 2022 15:19:54 -0600 Subject: block: change request end_io handler to pass back a return value Everything is just converted to returning RQ_END_IO_NONE, and there should be no functional changes with this patch. In preparation for allowing the end_io handler to pass ownership back to the block layer, rather than retain ownership of the request. Reviewed-by: Keith Busch Signed-off-by: Jens Axboe --- block/blk-flush.c | 10 +++++++--- block/blk-mq.c | 14 +++++++++----- drivers/md/dm-rq.c | 4 +++- drivers/nvme/host/core.c | 6 ++++-- drivers/nvme/host/ioctl.c | 5 ++++- drivers/nvme/host/pci.c | 12 ++++++++---- drivers/nvme/target/passthru.c | 5 +++-- drivers/scsi/scsi_error.c | 4 +++- drivers/scsi/sg.c | 9 +++++---- drivers/scsi/st.c | 4 +++- drivers/target/target_core_pscsi.c | 6 ++++-- drivers/ufs/core/ufshpb.c | 8 ++++++-- include/linux/blk-mq.h | 7 ++++++- 13 files changed, 65 insertions(+), 29 deletions(-) (limited to 'block') diff --git a/block/blk-flush.c b/block/blk-flush.c index 27705fc584a0..53202eff545e 100644 --- a/block/blk-flush.c +++ b/block/blk-flush.c @@ -217,7 +217,8 @@ static void blk_flush_complete_seq(struct request *rq, blk_kick_flush(q, fq, cmd_flags); } -static void flush_end_io(struct request *flush_rq, blk_status_t error) +static enum rq_end_io_ret flush_end_io(struct request *flush_rq, + blk_status_t error) { struct request_queue *q = flush_rq->q; struct list_head *running; @@ -231,7 +232,7 @@ static void flush_end_io(struct request *flush_rq, blk_status_t error) if (!req_ref_put_and_test(flush_rq)) { fq->rq_status = error; spin_unlock_irqrestore(&fq->mq_flush_lock, flags); - return; + return RQ_END_IO_NONE; } blk_account_io_flush(flush_rq); @@ -268,6 +269,7 @@ static void flush_end_io(struct request *flush_rq, blk_status_t error) } spin_unlock_irqrestore(&fq->mq_flush_lock, flags); + return RQ_END_IO_NONE; } bool is_flush_rq(struct request *rq) @@ -353,7 +355,8 @@ static void blk_kick_flush(struct request_queue *q, struct blk_flush_queue *fq, blk_flush_queue_rq(flush_rq, false); } -static void mq_flush_data_end_io(struct request *rq, blk_status_t error) +static enum rq_end_io_ret mq_flush_data_end_io(struct request *rq, + blk_status_t error) { struct request_queue *q = rq->q; struct blk_mq_hw_ctx *hctx = rq->mq_hctx; @@ -375,6 +378,7 @@ static void mq_flush_data_end_io(struct request *rq, blk_status_t error) spin_unlock_irqrestore(&fq->mq_flush_lock, flags); blk_mq_sched_restart(hctx); + return RQ_END_IO_NONE; } /** diff --git a/block/blk-mq.c b/block/blk-mq.c index b32f70f38c6e..a21631de45b3 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -1001,7 +1001,8 @@ inline void __blk_mq_end_request(struct request *rq, blk_status_t error) if (rq->end_io) { rq_qos_done(rq->q, rq); - rq->end_io(rq, error); + if (rq->end_io(rq, error) == RQ_END_IO_FREE) + blk_mq_free_request(rq); } else { blk_mq_free_request(rq); } @@ -1295,12 +1296,13 @@ struct blk_rq_wait { blk_status_t ret; }; -static void blk_end_sync_rq(struct request *rq, blk_status_t ret) +static enum rq_end_io_ret blk_end_sync_rq(struct request *rq, blk_status_t ret) { struct blk_rq_wait *wait = rq->end_io_data; wait->ret = ret; complete(&wait->done); + return RQ_END_IO_NONE; } bool blk_rq_is_poll(struct request *rq) @@ -1534,10 +1536,12 @@ static bool blk_mq_req_expired(struct request *rq, unsigned long *next) void blk_mq_put_rq_ref(struct request *rq) { - if (is_flush_rq(rq)) - rq->end_io(rq, 0); - else if (req_ref_put_and_test(rq)) + if (is_flush_rq(rq)) { + if (rq->end_io(rq, 0) == RQ_END_IO_FREE) + blk_mq_free_request(rq); + } else if (req_ref_put_and_test(rq)) { __blk_mq_free_request(rq); + } } static bool blk_mq_check_expired(struct request *rq, void *priv) diff --git a/drivers/md/dm-rq.c b/drivers/md/dm-rq.c index 4f49bbcce4f1..3001b10a3fbf 100644 --- a/drivers/md/dm-rq.c +++ b/drivers/md/dm-rq.c @@ -292,11 +292,13 @@ static void dm_kill_unmapped_request(struct request *rq, blk_status_t error) dm_complete_request(rq, error); } -static void end_clone_request(struct request *clone, blk_status_t error) +static enum rq_end_io_ret end_clone_request(struct request *clone, + blk_status_t error) { struct dm_rq_target_io *tio = clone->end_io_data; dm_complete_request(tio->orig, error); + return RQ_END_IO_NONE; } static int dm_rq_bio_constructor(struct bio *bio, struct bio *bio_orig, diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 0f05b61a30fe..965a4c3e9d44 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -1172,7 +1172,8 @@ static void nvme_queue_keep_alive_work(struct nvme_ctrl *ctrl) queue_delayed_work(nvme_wq, &ctrl->ka_work, ctrl->kato * HZ / 2); } -static void nvme_keep_alive_end_io(struct request *rq, blk_status_t status) +static enum rq_end_io_ret nvme_keep_alive_end_io(struct request *rq, + blk_status_t status) { struct nvme_ctrl *ctrl = rq->end_io_data; unsigned long flags; @@ -1184,7 +1185,7 @@ static void nvme_keep_alive_end_io(struct request *rq, blk_status_t status) dev_err(ctrl->device, "failed nvme_keep_alive_end_io error=%d\n", status); - return; + return RQ_END_IO_NONE; } ctrl->comp_seen = false; @@ -1195,6 +1196,7 @@ static void nvme_keep_alive_end_io(struct request *rq, blk_status_t status) spin_unlock_irqrestore(&ctrl->lock, flags); if (startka) nvme_queue_keep_alive_work(ctrl); + return RQ_END_IO_NONE; } static void nvme_keep_alive_work(struct work_struct *work) diff --git a/drivers/nvme/host/ioctl.c b/drivers/nvme/host/ioctl.c index 357791ff0623..2995789d5f9d 100644 --- a/drivers/nvme/host/ioctl.c +++ b/drivers/nvme/host/ioctl.c @@ -392,7 +392,8 @@ static void nvme_uring_task_cb(struct io_uring_cmd *ioucmd) io_uring_cmd_done(ioucmd, status, result); } -static void nvme_uring_cmd_end_io(struct request *req, blk_status_t err) +static enum rq_end_io_ret nvme_uring_cmd_end_io(struct request *req, + blk_status_t err) { struct io_uring_cmd *ioucmd = req->end_io_data; struct nvme_uring_cmd_pdu *pdu = nvme_uring_cmd_pdu(ioucmd); @@ -411,6 +412,8 @@ static void nvme_uring_cmd_end_io(struct request *req, blk_status_t err) nvme_uring_task_cb(ioucmd); else io_uring_cmd_complete_in_task(ioucmd, nvme_uring_task_cb); + + return RQ_END_IO_NONE; } static int nvme_uring_cmd_io(struct nvme_ctrl *ctrl, struct nvme_ns *ns, diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index f9af99b7e672..7bbffd2a9beb 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -1268,7 +1268,7 @@ static int adapter_delete_sq(struct nvme_dev *dev, u16 sqid) return adapter_delete_queue(dev, nvme_admin_delete_sq, sqid); } -static void abort_endio(struct request *req, blk_status_t error) +static enum rq_end_io_ret abort_endio(struct request *req, blk_status_t error) { struct nvme_queue *nvmeq = req->mq_hctx->driver_data; @@ -1276,6 +1276,7 @@ static void abort_endio(struct request *req, blk_status_t error) "Abort status: 0x%x", nvme_req(req)->status); atomic_inc(&nvmeq->dev->ctrl.abort_limit); blk_mq_free_request(req); + return RQ_END_IO_NONE; } static bool nvme_should_reset(struct nvme_dev *dev, u32 csts) @@ -2447,22 +2448,25 @@ out_unlock: return result; } -static void nvme_del_queue_end(struct request *req, blk_status_t error) +static enum rq_end_io_ret nvme_del_queue_end(struct request *req, + blk_status_t error) { struct nvme_queue *nvmeq = req->end_io_data; blk_mq_free_request(req); complete(&nvmeq->delete_done); + return RQ_END_IO_NONE; } -static void nvme_del_cq_end(struct request *req, blk_status_t error) +static enum rq_end_io_ret nvme_del_cq_end(struct request *req, + blk_status_t error) { struct nvme_queue *nvmeq = req->end_io_data; if (error) set_bit(NVMEQ_DELETE_ERROR, &nvmeq->flags); - nvme_del_queue_end(req, error); + return nvme_del_queue_end(req, error); } static int nvme_delete_queue(struct nvme_queue *nvmeq, u8 opcode) diff --git a/drivers/nvme/target/passthru.c b/drivers/nvme/target/passthru.c index 94d3153bae54..79af5140af8b 100644 --- a/drivers/nvme/target/passthru.c +++ b/drivers/nvme/target/passthru.c @@ -245,14 +245,15 @@ static void nvmet_passthru_execute_cmd_work(struct work_struct *w) nvme_passthru_end(ctrl, effects, req->cmd, status); } -static void nvmet_passthru_req_done(struct request *rq, - blk_status_t blk_status) +static enum rq_end_io_ret nvmet_passthru_req_done(struct request *rq, + blk_status_t blk_status) { struct nvmet_req *req = rq->end_io_data; req->cqe->result = nvme_req(rq)->result; nvmet_req_complete(req, nvme_req(rq)->status); blk_mq_free_request(rq); + return RQ_END_IO_NONE; } static int nvmet_passthru_map_sg(struct nvmet_req *req, struct request *rq) diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c index 448748e3fba5..786fb963cf3f 100644 --- a/drivers/scsi/scsi_error.c +++ b/drivers/scsi/scsi_error.c @@ -2004,9 +2004,11 @@ maybe_retry: } } -static void eh_lock_door_done(struct request *req, blk_status_t status) +static enum rq_end_io_ret eh_lock_door_done(struct request *req, + blk_status_t status) { blk_mq_free_request(req); + return RQ_END_IO_NONE; } /** diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c index 340b050ad28d..94c5e9a9309c 100644 --- a/drivers/scsi/sg.c +++ b/drivers/scsi/sg.c @@ -177,7 +177,7 @@ typedef struct sg_device { /* holds the state of each scsi generic device */ } Sg_device; /* tasklet or soft irq callback */ -static void sg_rq_end_io(struct request *rq, blk_status_t status); +static enum rq_end_io_ret sg_rq_end_io(struct request *rq, blk_status_t status); static int sg_start_req(Sg_request *srp, unsigned char *cmd); static int sg_finish_rem_req(Sg_request * srp); static int sg_build_indirect(Sg_scatter_hold * schp, Sg_fd * sfp, int buff_size); @@ -1311,7 +1311,7 @@ sg_rq_end_io_usercontext(struct work_struct *work) * This function is a "bottom half" handler that is called by the mid * level when a command is completed (or has failed). */ -static void +static enum rq_end_io_ret sg_rq_end_io(struct request *rq, blk_status_t status) { struct scsi_cmnd *scmd = blk_mq_rq_to_pdu(rq); @@ -1324,11 +1324,11 @@ sg_rq_end_io(struct request *rq, blk_status_t status) int result, resid, done = 1; if (WARN_ON(srp->done != 0)) - return; + return RQ_END_IO_NONE; sfp = srp->parentfp; if (WARN_ON(sfp == NULL)) - return; + return RQ_END_IO_NONE; sdp = sfp->parentdp; if (unlikely(atomic_read(&sdp->detaching))) @@ -1406,6 +1406,7 @@ sg_rq_end_io(struct request *rq, blk_status_t status) INIT_WORK(&srp->ew.work, sg_rq_end_io_usercontext); schedule_work(&srp->ew.work); } + return RQ_END_IO_NONE; } static const struct file_operations sg_fops = { diff --git a/drivers/scsi/st.c b/drivers/scsi/st.c index 850172a2b8f1..55e7c07ebe4c 100644 --- a/drivers/scsi/st.c +++ b/drivers/scsi/st.c @@ -512,7 +512,8 @@ static void st_do_stats(struct scsi_tape *STp, struct request *req) atomic64_dec(&STp->stats->in_flight); } -static void st_scsi_execute_end(struct request *req, blk_status_t status) +static enum rq_end_io_ret st_scsi_execute_end(struct request *req, + blk_status_t status) { struct scsi_cmnd *scmd = blk_mq_rq_to_pdu(req); struct st_request *SRpnt = req->end_io_data; @@ -532,6 +533,7 @@ static void st_scsi_execute_end(struct request *req, blk_status_t status) blk_rq_unmap_user(tmp); blk_mq_free_request(req); + return RQ_END_IO_NONE; } static int st_scsi_execute(struct st_request *SRpnt, const unsigned char *cmd, diff --git a/drivers/target/target_core_pscsi.c b/drivers/target/target_core_pscsi.c index e6a967ddc08c..8a7306e5e133 100644 --- a/drivers/target/target_core_pscsi.c +++ b/drivers/target/target_core_pscsi.c @@ -39,7 +39,7 @@ static inline struct pscsi_dev_virt *PSCSI_DEV(struct se_device *dev) } static sense_reason_t pscsi_execute_cmd(struct se_cmd *cmd); -static void pscsi_req_done(struct request *, blk_status_t); +static enum rq_end_io_ret pscsi_req_done(struct request *, blk_status_t); /* pscsi_attach_hba(): * @@ -1002,7 +1002,8 @@ static sector_t pscsi_get_blocks(struct se_device *dev) return 0; } -static void pscsi_req_done(struct request *req, blk_status_t status) +static enum rq_end_io_ret pscsi_req_done(struct request *req, + blk_status_t status) { struct se_cmd *cmd = req->end_io_data; struct scsi_cmnd *scmd = blk_mq_rq_to_pdu(req); @@ -1029,6 +1030,7 @@ static void pscsi_req_done(struct request *req, blk_status_t status) } blk_mq_free_request(req); + return RQ_END_IO_NONE; } static const struct target_backend_ops pscsi_ops = { diff --git a/drivers/ufs/core/ufshpb.c b/drivers/ufs/core/ufshpb.c index a1a7a1175a5a..3d69a81c5b17 100644 --- a/drivers/ufs/core/ufshpb.c +++ b/drivers/ufs/core/ufshpb.c @@ -613,14 +613,17 @@ static void ufshpb_activate_subregion(struct ufshpb_lu *hpb, srgn->srgn_state = HPB_SRGN_VALID; } -static void ufshpb_umap_req_compl_fn(struct request *req, blk_status_t error) +static enum rq_end_io_ret ufshpb_umap_req_compl_fn(struct request *req, + blk_status_t error) { struct ufshpb_req *umap_req = (struct ufshpb_req *)req->end_io_data; ufshpb_put_req(umap_req->hpb, umap_req); + return RQ_END_IO_NONE; } -static void ufshpb_map_req_compl_fn(struct request *req, blk_status_t error) +static enum rq_end_io_ret ufshpb_map_req_compl_fn(struct request *req, + blk_status_t error) { struct ufshpb_req *map_req = (struct ufshpb_req *) req->end_io_data; struct ufshpb_lu *hpb = map_req->hpb; @@ -636,6 +639,7 @@ static void ufshpb_map_req_compl_fn(struct request *req, blk_status_t error) spin_unlock_irqrestore(&hpb->rgn_state_lock, flags); ufshpb_put_map_req(map_req->hpb, map_req); + return RQ_END_IO_NONE; } static void ufshpb_set_unmap_cmd(unsigned char *cdb, struct ufshpb_region *rgn) diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 00a15808c137..e6fa49dd6196 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -14,7 +14,12 @@ struct blk_flush_queue; #define BLKDEV_MIN_RQ 4 #define BLKDEV_DEFAULT_RQ 128 -typedef void (rq_end_io_fn)(struct request *, blk_status_t); +enum rq_end_io_ret { + RQ_END_IO_NONE, + RQ_END_IO_FREE, +}; + +typedef enum rq_end_io_ret (rq_end_io_fn)(struct request *, blk_status_t); /* * request flags */ -- cgit v1.2.3-58-ga151 From ab3e1d3bbab9e973aeb4dd4603251578658a47ff Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 21 Sep 2022 08:24:16 -0600 Subject: block: allow end_io based requests in the completion batch handling With end_io handlers now being able to potentially pass ownership of the request upon completion, we can allow requests with end_io handlers in the batch completion handling. Reviewed-by: Anuj Gupta Reviewed-by: Keith Busch Co-developed-by: Stefan Roesch Signed-off-by: Jens Axboe --- block/blk-mq.c | 13 +++++++++++-- include/linux/blk-mq.h | 3 ++- 2 files changed, 13 insertions(+), 3 deletions(-) (limited to 'block') diff --git a/block/blk-mq.c b/block/blk-mq.c index a21631de45b3..8070b6c10e8d 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -823,8 +823,10 @@ static void blk_complete_request(struct request *req) * can find how many bytes remain in the request * later. */ - req->bio = NULL; - req->__data_len = 0; + if (!req->end_io) { + req->bio = NULL; + req->__data_len = 0; + } } /** @@ -1055,6 +1057,13 @@ void blk_mq_end_request_batch(struct io_comp_batch *iob) rq_qos_done(rq->q, rq); + /* + * If end_io handler returns NONE, then it still has + * ownership of the request. + */ + if (rq->end_io && rq->end_io(rq, 0) == RQ_END_IO_NONE) + continue; + WRITE_ONCE(rq->state, MQ_RQ_IDLE); if (!req_ref_put_and_test(rq)) continue; diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index e6fa49dd6196..50811d0fb143 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -853,8 +853,9 @@ static inline bool blk_mq_add_to_batch(struct request *req, struct io_comp_batch *iob, int ioerror, void (*complete)(struct io_comp_batch *)) { - if (!iob || (req->rq_flags & RQF_ELV) || req->end_io || ioerror) + if (!iob || (req->rq_flags & RQF_ELV) || ioerror) return false; + if (!iob->complete) iob->complete = complete; else if (iob->complete != complete) -- cgit v1.2.3-58-ga151 From 557654025df5706785d395558244890dc4b2c875 Mon Sep 17 00:00:00 2001 From: Anuj Gupta Date: Fri, 30 Sep 2022 11:57:40 +0530 Subject: block: add blk_rq_map_user_io Create a helper blk_rq_map_user_io for mapping of vectored as well as non-vectored requests. This will help in saving dupilcation of code at few places in scsi and nvme. Signed-off-by: Anuj Gupta Suggested-by: Christoph Hellwig Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/20220930062749.152261-4-anuj20.g@samsung.com Signed-off-by: Jens Axboe --- block/blk-map.c | 36 ++++++++++++++++++++++++++++++++++++ include/linux/blk-mq.h | 2 ++ 2 files changed, 38 insertions(+) (limited to 'block') diff --git a/block/blk-map.c b/block/blk-map.c index 7693f8e3c454..0e37bbedd46c 100644 --- a/block/blk-map.c +++ b/block/blk-map.c @@ -611,6 +611,42 @@ int blk_rq_map_user(struct request_queue *q, struct request *rq, } EXPORT_SYMBOL(blk_rq_map_user); +int blk_rq_map_user_io(struct request *req, struct rq_map_data *map_data, + void __user *ubuf, unsigned long buf_len, gfp_t gfp_mask, + bool vec, int iov_count, bool check_iter_count, int rw) +{ + int ret = 0; + + if (vec) { + struct iovec fast_iov[UIO_FASTIOV]; + struct iovec *iov = fast_iov; + struct iov_iter iter; + + ret = import_iovec(rw, ubuf, iov_count ? iov_count : buf_len, + UIO_FASTIOV, &iov, &iter); + if (ret < 0) + return ret; + + if (iov_count) { + /* SG_IO howto says that the shorter of the two wins */ + iov_iter_truncate(&iter, buf_len); + if (check_iter_count && !iov_iter_count(&iter)) { + kfree(iov); + return -EINVAL; + } + } + + ret = blk_rq_map_user_iov(req->q, req, map_data, &iter, + gfp_mask); + kfree(iov); + } else if (buf_len) { + ret = blk_rq_map_user(req->q, req, map_data, ubuf, buf_len, + gfp_mask); + } + return ret; +} +EXPORT_SYMBOL(blk_rq_map_user_io); + /** * blk_rq_unmap_user - unmap a request with user data * @bio: start of bio list diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 50811d0fb143..ba18e9bdb799 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -985,6 +985,8 @@ struct rq_map_data { int blk_rq_map_user(struct request_queue *, struct request *, struct rq_map_data *, void __user *, unsigned long, gfp_t); +int blk_rq_map_user_io(struct request *, struct rq_map_data *, + void __user *, unsigned long, gfp_t, bool, int, bool, int); int blk_rq_map_user_iov(struct request_queue *, struct request *, struct rq_map_data *, const struct iov_iter *, gfp_t); int blk_rq_unmap_user(struct bio *); -- cgit v1.2.3-58-ga151 From 32f1c71b15fc9cb8e964c3d0c15ca99a70cfe8a7 Mon Sep 17 00:00:00 2001 From: Anuj Gupta Date: Fri, 30 Sep 2022 11:57:45 +0530 Subject: block: rename bio_map_put to blk_mq_map_bio_put This patch renames existing bio_map_put function to blk_mq_map_bio_put. Signed-off-by: Anuj Gupta Suggested-by: Christoph Hellwig Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/20220930062749.152261-9-anuj20.g@samsung.com Signed-off-by: Jens Axboe --- block/blk-map.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'block') diff --git a/block/blk-map.c b/block/blk-map.c index 0e37bbedd46c..84b13a4158b7 100644 --- a/block/blk-map.c +++ b/block/blk-map.c @@ -231,7 +231,7 @@ out_bmd: return ret; } -static void bio_map_put(struct bio *bio) +static void blk_mq_map_bio_put(struct bio *bio) { if (bio->bi_opf & REQ_ALLOC_CACHE) { bio_put(bio); @@ -331,7 +331,7 @@ static int bio_map_user_iov(struct request *rq, struct iov_iter *iter, out_unmap: bio_release_pages(bio, false); - bio_map_put(bio); + blk_mq_map_bio_put(bio); return ret; } @@ -672,7 +672,7 @@ int blk_rq_unmap_user(struct bio *bio) next_bio = bio; bio = bio->bi_next; - bio_map_put(next_bio); + blk_mq_map_bio_put(next_bio); } return ret; -- cgit v1.2.3-58-ga151 From ab89e8e7ca526ca04baaad2aa28172d336425d67 Mon Sep 17 00:00:00 2001 From: Kanchan Joshi Date: Fri, 30 Sep 2022 11:57:46 +0530 Subject: block: factor out blk_rq_map_bio_alloc helper Move bio allocation logic from bio_map_user_iov to a new helper blk_rq_map_bio_alloc. It is named so because functionality is opposite of what is done inside blk_mq_map_bio_put. This is a prep patch. Signed-off-by: Kanchan Joshi Link: https://lore.kernel.org/r/20220930062749.152261-10-anuj20.g@samsung.com Signed-off-by: Jens Axboe --- block/blk-map.c | 33 ++++++++++++++++++++++----------- 1 file changed, 22 insertions(+), 11 deletions(-) (limited to 'block') diff --git a/block/blk-map.c b/block/blk-map.c index 84b13a4158b7..d6ea377394a9 100644 --- a/block/blk-map.c +++ b/block/blk-map.c @@ -241,17 +241,10 @@ static void blk_mq_map_bio_put(struct bio *bio) } } -static int bio_map_user_iov(struct request *rq, struct iov_iter *iter, - gfp_t gfp_mask) +static struct bio *blk_rq_map_bio_alloc(struct request *rq, + unsigned int nr_vecs, gfp_t gfp_mask) { - unsigned int max_sectors = queue_max_hw_sectors(rq->q); - unsigned int nr_vecs = iov_iter_npages(iter, BIO_MAX_VECS); struct bio *bio; - int ret; - int j; - - if (!iov_iter_count(iter)) - return -EINVAL; if (rq->cmd_flags & REQ_POLLED) { blk_opf_t opf = rq->cmd_flags | REQ_ALLOC_CACHE; @@ -259,13 +252,31 @@ static int bio_map_user_iov(struct request *rq, struct iov_iter *iter, bio = bio_alloc_bioset(NULL, nr_vecs, opf, gfp_mask, &fs_bio_set); if (!bio) - return -ENOMEM; + return NULL; } else { bio = bio_kmalloc(nr_vecs, gfp_mask); if (!bio) - return -ENOMEM; + return NULL; bio_init(bio, NULL, bio->bi_inline_vecs, nr_vecs, req_op(rq)); } + return bio; +} + +static int bio_map_user_iov(struct request *rq, struct iov_iter *iter, + gfp_t gfp_mask) +{ + unsigned int max_sectors = queue_max_hw_sectors(rq->q); + unsigned int nr_vecs = iov_iter_npages(iter, BIO_MAX_VECS); + struct bio *bio; + int ret; + int j; + + if (!iov_iter_count(iter)) + return -EINVAL; + + bio = blk_rq_map_bio_alloc(rq, nr_vecs, gfp_mask); + if (bio == NULL) + return -ENOMEM; while (iov_iter_count(iter)) { struct page **pages, *stack_pages[UIO_FASTIOV]; -- cgit v1.2.3-58-ga151 From 37987547932c89f15f9b76950040131ddb591a8b Mon Sep 17 00:00:00 2001 From: Kanchan Joshi Date: Fri, 30 Sep 2022 11:57:47 +0530 Subject: block: extend functionality to map bvec iterator Extend blk_rq_map_user_iov so that it can handle bvec iterator, using the new blk_rq_map_user_bvec function. It maps the pages from bvec iterator into a bio and place the bio into request. This helper will be used by nvme for uring-passthrough path when IO is done using pre-mapped buffers. Signed-off-by: Kanchan Joshi Signed-off-by: Anuj Gupta Suggested-by: Christoph Hellwig Link: https://lore.kernel.org/r/20220930062749.152261-11-anuj20.g@samsung.com Signed-off-by: Jens Axboe --- block/blk-map.c | 75 ++++++++++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 71 insertions(+), 4 deletions(-) (limited to 'block') diff --git a/block/blk-map.c b/block/blk-map.c index d6ea377394a9..34735626b00f 100644 --- a/block/blk-map.c +++ b/block/blk-map.c @@ -548,6 +548,62 @@ int blk_rq_append_bio(struct request *rq, struct bio *bio) } EXPORT_SYMBOL(blk_rq_append_bio); +/* Prepare bio for passthrough IO given ITER_BVEC iter */ +static int blk_rq_map_user_bvec(struct request *rq, const struct iov_iter *iter) +{ + struct request_queue *q = rq->q; + size_t nr_iter = iov_iter_count(iter); + size_t nr_segs = iter->nr_segs; + struct bio_vec *bvecs, *bvprvp = NULL; + struct queue_limits *lim = &q->limits; + unsigned int nsegs = 0, bytes = 0; + struct bio *bio; + size_t i; + + if (!nr_iter || (nr_iter >> SECTOR_SHIFT) > queue_max_hw_sectors(q)) + return -EINVAL; + if (nr_segs > queue_max_segments(q)) + return -EINVAL; + + /* no iovecs to alloc, as we already have a BVEC iterator */ + bio = blk_rq_map_bio_alloc(rq, 0, GFP_KERNEL); + if (bio == NULL) + return -ENOMEM; + + bio_iov_bvec_set(bio, (struct iov_iter *)iter); + blk_rq_bio_prep(rq, bio, nr_segs); + + /* loop to perform a bunch of sanity checks */ + bvecs = (struct bio_vec *)iter->bvec; + for (i = 0; i < nr_segs; i++) { + struct bio_vec *bv = &bvecs[i]; + + /* + * If the queue doesn't support SG gaps and adding this + * offset would create a gap, fallback to copy. + */ + if (bvprvp && bvec_gap_to_prev(lim, bvprvp, bv->bv_offset)) { + blk_mq_map_bio_put(bio); + return -EREMOTEIO; + } + /* check full condition */ + if (nsegs >= nr_segs || bytes > UINT_MAX - bv->bv_len) + goto put_bio; + if (bytes + bv->bv_len > nr_iter) + goto put_bio; + if (bv->bv_offset + bv->bv_len > PAGE_SIZE) + goto put_bio; + + nsegs++; + bytes += bv->bv_len; + bvprvp = bv; + } + return 0; +put_bio: + blk_mq_map_bio_put(bio); + return -EINVAL; +} + /** * blk_rq_map_user_iov - map user data to a request, for passthrough requests * @q: request queue where request should be inserted @@ -567,24 +623,35 @@ int blk_rq_map_user_iov(struct request_queue *q, struct request *rq, struct rq_map_data *map_data, const struct iov_iter *iter, gfp_t gfp_mask) { - bool copy = false; + bool copy = false, map_bvec = false; unsigned long align = q->dma_pad_mask | queue_dma_alignment(q); struct bio *bio = NULL; struct iov_iter i; int ret = -EINVAL; - if (!iter_is_iovec(iter)) - goto fail; - if (map_data) copy = true; else if (blk_queue_may_bounce(q)) copy = true; else if (iov_iter_alignment(iter) & align) copy = true; + else if (iov_iter_is_bvec(iter)) + map_bvec = true; + else if (!iter_is_iovec(iter)) + copy = true; else if (queue_virt_boundary(q)) copy = queue_virt_boundary(q) & iov_iter_gap_alignment(iter); + if (map_bvec) { + ret = blk_rq_map_user_bvec(rq, iter); + if (!ret) + return 0; + if (ret != -EREMOTEIO) + goto fail; + /* fall back to copying the data on limits mismatches */ + copy = true; + } + i = *iter; do { if (copy) -- cgit v1.2.3-58-ga151