From 01e99aeca3979600302913cef3f89076786f32c8 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Tue, 25 Feb 2020 09:04:32 +0800 Subject: blk-mq: insert passthrough request into hctx->dispatch directly For some reason, device may be in one situation which can't handle FS request, so STS_RESOURCE is always returned and the FS request will be added to hctx->dispatch. However passthrough request may be required at that time for fixing the problem. If passthrough request is added to scheduler queue, there isn't any chance for blk-mq to dispatch it given we prioritize requests in hctx->dispatch. Then the FS IO request may never be completed, and IO hang is caused. So passthrough request has to be added to hctx->dispatch directly for fixing the IO hang. Fix this issue by inserting passthrough request into hctx->dispatch directly together withing adding FS request to the tail of hctx->dispatch in blk_mq_dispatch_rq_list(). Actually we add FS request to tail of hctx->dispatch at default, see blk_mq_request_bypass_insert(). Then it becomes consistent with original legacy IO request path, in which passthrough request is always added to q->queue_head. Cc: Dongli Zhang Cc: Christoph Hellwig Cc: Ewan D. Milne Signed-off-by: Ming Lei Signed-off-by: Jens Axboe --- block/blk-flush.c | 2 +- block/blk-mq-sched.c | 22 +++++++++++++++------- block/blk-mq.c | 18 +++++++++++------- block/blk-mq.h | 3 ++- 4 files changed, 29 insertions(+), 16 deletions(-) diff --git a/block/blk-flush.c b/block/blk-flush.c index 3f977c517960..5cc775bdb06a 100644 --- a/block/blk-flush.c +++ b/block/blk-flush.c @@ -412,7 +412,7 @@ void blk_insert_flush(struct request *rq) */ if ((policy & REQ_FSEQ_DATA) && !(policy & (REQ_FSEQ_PREFLUSH | REQ_FSEQ_POSTFLUSH))) { - blk_mq_request_bypass_insert(rq, false); + blk_mq_request_bypass_insert(rq, false, false); return; } diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c index ca22afd47b3d..856356b1619e 100644 --- a/block/blk-mq-sched.c +++ b/block/blk-mq-sched.c @@ -361,13 +361,19 @@ static bool blk_mq_sched_bypass_insert(struct blk_mq_hw_ctx *hctx, bool has_sched, struct request *rq) { - /* dispatch flush rq directly */ - if (rq->rq_flags & RQF_FLUSH_SEQ) { - spin_lock(&hctx->lock); - list_add(&rq->queuelist, &hctx->dispatch); - spin_unlock(&hctx->lock); + /* + * dispatch flush and passthrough rq directly + * + * passthrough request has to be added to hctx->dispatch directly. + * For some reason, device may be in one situation which can't + * handle FS request, so STS_RESOURCE is always returned and the + * FS request will be added to hctx->dispatch. However passthrough + * request may be required at that time for fixing the problem. If + * passthrough request is added to scheduler queue, there isn't any + * chance to dispatch it given we prioritize requests in hctx->dispatch. + */ + if ((rq->rq_flags & RQF_FLUSH_SEQ) || blk_rq_is_passthrough(rq)) return true; - } if (has_sched) rq->rq_flags |= RQF_SORTED; @@ -391,8 +397,10 @@ void blk_mq_sched_insert_request(struct request *rq, bool at_head, WARN_ON(e && (rq->tag != -1)); - if (blk_mq_sched_bypass_insert(hctx, !!e, rq)) + if (blk_mq_sched_bypass_insert(hctx, !!e, rq)) { + blk_mq_request_bypass_insert(rq, at_head, false); goto run; + } if (e && e->type->ops.insert_requests) { LIST_HEAD(list); diff --git a/block/blk-mq.c b/block/blk-mq.c index a12b1763508d..5e1e4151cb51 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -735,7 +735,7 @@ static void blk_mq_requeue_work(struct work_struct *work) * merge. */ if (rq->rq_flags & RQF_DONTPREP) - blk_mq_request_bypass_insert(rq, false); + blk_mq_request_bypass_insert(rq, false, false); else blk_mq_sched_insert_request(rq, true, false, false); } @@ -1286,7 +1286,7 @@ bool blk_mq_dispatch_rq_list(struct request_queue *q, struct list_head *list, q->mq_ops->commit_rqs(hctx); spin_lock(&hctx->lock); - list_splice_init(list, &hctx->dispatch); + list_splice_tail_init(list, &hctx->dispatch); spin_unlock(&hctx->lock); /* @@ -1677,12 +1677,16 @@ void __blk_mq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq, * Should only be used carefully, when the caller knows we want to * bypass a potential IO scheduler on the target device. */ -void blk_mq_request_bypass_insert(struct request *rq, bool run_queue) +void blk_mq_request_bypass_insert(struct request *rq, bool at_head, + bool run_queue) { struct blk_mq_hw_ctx *hctx = rq->mq_hctx; spin_lock(&hctx->lock); - list_add_tail(&rq->queuelist, &hctx->dispatch); + if (at_head) + list_add(&rq->queuelist, &hctx->dispatch); + else + list_add_tail(&rq->queuelist, &hctx->dispatch); spin_unlock(&hctx->lock); if (run_queue) @@ -1849,7 +1853,7 @@ insert: if (bypass_insert) return BLK_STS_RESOURCE; - blk_mq_request_bypass_insert(rq, run_queue); + blk_mq_request_bypass_insert(rq, false, run_queue); return BLK_STS_OK; } @@ -1876,7 +1880,7 @@ static void blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx, ret = __blk_mq_try_issue_directly(hctx, rq, cookie, false, true); if (ret == BLK_STS_RESOURCE || ret == BLK_STS_DEV_RESOURCE) - blk_mq_request_bypass_insert(rq, true); + blk_mq_request_bypass_insert(rq, false, true); else if (ret != BLK_STS_OK) blk_mq_end_request(rq, ret); @@ -1910,7 +1914,7 @@ void blk_mq_try_issue_list_directly(struct blk_mq_hw_ctx *hctx, if (ret != BLK_STS_OK) { if (ret == BLK_STS_RESOURCE || ret == BLK_STS_DEV_RESOURCE) { - blk_mq_request_bypass_insert(rq, + blk_mq_request_bypass_insert(rq, false, list_empty(list)); break; } diff --git a/block/blk-mq.h b/block/blk-mq.h index eaaca8fc1c28..c0fa34378eb2 100644 --- a/block/blk-mq.h +++ b/block/blk-mq.h @@ -66,7 +66,8 @@ int blk_mq_alloc_rqs(struct blk_mq_tag_set *set, struct blk_mq_tags *tags, */ void __blk_mq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq, bool at_head); -void blk_mq_request_bypass_insert(struct request *rq, bool run_queue); +void blk_mq_request_bypass_insert(struct request *rq, bool at_head, + bool run_queue); void blk_mq_insert_requests(struct blk_mq_hw_ctx *hctx, struct blk_mq_ctx *ctx, struct list_head *list); -- cgit v1.2.3-58-ga151 From c780e86dd48ef6467a1146cf7d0fe1e05a635039 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Thu, 6 Feb 2020 15:28:12 +0100 Subject: blktrace: Protect q->blk_trace with RCU KASAN is reporting that __blk_add_trace() has a use-after-free issue when accessing q->blk_trace. Indeed the switching of block tracing (and thus eventual freeing of q->blk_trace) is completely unsynchronized with the currently running tracing and thus it can happen that the blk_trace structure is being freed just while __blk_add_trace() works on it. Protect accesses to q->blk_trace by RCU during tracing and make sure we wait for the end of RCU grace period when shutting down tracing. Luckily that is rare enough event that we can afford that. Note that postponing the freeing of blk_trace to an RCU callback should better be avoided as it could have unexpected user visible side-effects as debugfs files would be still existing for a short while block tracing has been shut down. Link: https://bugzilla.kernel.org/show_bug.cgi?id=205711 CC: stable@vger.kernel.org Reviewed-by: Chaitanya Kulkarni Reviewed-by: Ming Lei Tested-by: Ming Lei Reviewed-by: Bart Van Assche Reported-by: Tristan Madani Signed-off-by: Jan Kara Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 2 +- include/linux/blktrace_api.h | 18 +++++-- kernel/trace/blktrace.c | 114 +++++++++++++++++++++++++++++++------------ 3 files changed, 97 insertions(+), 37 deletions(-) diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 053ea4b51988..10455b2bbbb4 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -524,7 +524,7 @@ struct request_queue { unsigned int sg_reserved_size; int node; #ifdef CONFIG_BLK_DEV_IO_TRACE - struct blk_trace *blk_trace; + struct blk_trace __rcu *blk_trace; struct mutex blk_trace_mutex; #endif /* diff --git a/include/linux/blktrace_api.h b/include/linux/blktrace_api.h index 7bb2d8de9f30..3b6ff5902edc 100644 --- a/include/linux/blktrace_api.h +++ b/include/linux/blktrace_api.h @@ -51,9 +51,13 @@ void __trace_note_message(struct blk_trace *, struct blkcg *blkcg, const char *f **/ #define blk_add_cgroup_trace_msg(q, cg, fmt, ...) \ do { \ - struct blk_trace *bt = (q)->blk_trace; \ + struct blk_trace *bt; \ + \ + rcu_read_lock(); \ + bt = rcu_dereference((q)->blk_trace); \ if (unlikely(bt)) \ __trace_note_message(bt, cg, fmt, ##__VA_ARGS__);\ + rcu_read_unlock(); \ } while (0) #define blk_add_trace_msg(q, fmt, ...) \ blk_add_cgroup_trace_msg(q, NULL, fmt, ##__VA_ARGS__) @@ -61,10 +65,14 @@ void __trace_note_message(struct blk_trace *, struct blkcg *blkcg, const char *f static inline bool blk_trace_note_message_enabled(struct request_queue *q) { - struct blk_trace *bt = q->blk_trace; - if (likely(!bt)) - return false; - return bt->act_mask & BLK_TC_NOTIFY; + struct blk_trace *bt; + bool ret; + + rcu_read_lock(); + bt = rcu_dereference(q->blk_trace); + ret = bt && (bt->act_mask & BLK_TC_NOTIFY); + rcu_read_unlock(); + return ret; } extern void blk_add_driver_data(struct request_queue *q, struct request *rq, diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index 0735ae8545d8..4560878f0bac 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c @@ -335,6 +335,7 @@ static void put_probe_ref(void) static void blk_trace_cleanup(struct blk_trace *bt) { + synchronize_rcu(); blk_trace_free(bt); put_probe_ref(); } @@ -629,8 +630,10 @@ static int compat_blk_trace_setup(struct request_queue *q, char *name, static int __blk_trace_startstop(struct request_queue *q, int start) { int ret; - struct blk_trace *bt = q->blk_trace; + struct blk_trace *bt; + bt = rcu_dereference_protected(q->blk_trace, + lockdep_is_held(&q->blk_trace_mutex)); if (bt == NULL) return -EINVAL; @@ -740,8 +743,8 @@ int blk_trace_ioctl(struct block_device *bdev, unsigned cmd, char __user *arg) void blk_trace_shutdown(struct request_queue *q) { mutex_lock(&q->blk_trace_mutex); - - if (q->blk_trace) { + if (rcu_dereference_protected(q->blk_trace, + lockdep_is_held(&q->blk_trace_mutex))) { __blk_trace_startstop(q, 0); __blk_trace_remove(q); } @@ -752,8 +755,10 @@ void blk_trace_shutdown(struct request_queue *q) #ifdef CONFIG_BLK_CGROUP static u64 blk_trace_bio_get_cgid(struct request_queue *q, struct bio *bio) { - struct blk_trace *bt = q->blk_trace; + struct blk_trace *bt; + /* We don't use the 'bt' value here except as an optimization... */ + bt = rcu_dereference_protected(q->blk_trace, 1); if (!bt || !(blk_tracer_flags.val & TRACE_BLK_OPT_CGROUP)) return 0; @@ -796,10 +801,14 @@ blk_trace_request_get_cgid(struct request_queue *q, struct request *rq) static void blk_add_trace_rq(struct request *rq, int error, unsigned int nr_bytes, u32 what, u64 cgid) { - struct blk_trace *bt = rq->q->blk_trace; + struct blk_trace *bt; - if (likely(!bt)) + rcu_read_lock(); + bt = rcu_dereference(rq->q->blk_trace); + if (likely(!bt)) { + rcu_read_unlock(); return; + } if (blk_rq_is_passthrough(rq)) what |= BLK_TC_ACT(BLK_TC_PC); @@ -808,6 +817,7 @@ static void blk_add_trace_rq(struct request *rq, int error, __blk_add_trace(bt, blk_rq_trace_sector(rq), nr_bytes, req_op(rq), rq->cmd_flags, what, error, 0, NULL, cgid); + rcu_read_unlock(); } static void blk_add_trace_rq_insert(void *ignore, @@ -853,14 +863,19 @@ static void blk_add_trace_rq_complete(void *ignore, struct request *rq, static void blk_add_trace_bio(struct request_queue *q, struct bio *bio, u32 what, int error) { - struct blk_trace *bt = q->blk_trace; + struct blk_trace *bt; - if (likely(!bt)) + rcu_read_lock(); + bt = rcu_dereference(q->blk_trace); + if (likely(!bt)) { + rcu_read_unlock(); return; + } __blk_add_trace(bt, bio->bi_iter.bi_sector, bio->bi_iter.bi_size, bio_op(bio), bio->bi_opf, what, error, 0, NULL, blk_trace_bio_get_cgid(q, bio)); + rcu_read_unlock(); } static void blk_add_trace_bio_bounce(void *ignore, @@ -905,11 +920,14 @@ static void blk_add_trace_getrq(void *ignore, if (bio) blk_add_trace_bio(q, bio, BLK_TA_GETRQ, 0); else { - struct blk_trace *bt = q->blk_trace; + struct blk_trace *bt; + rcu_read_lock(); + bt = rcu_dereference(q->blk_trace); if (bt) __blk_add_trace(bt, 0, 0, rw, 0, BLK_TA_GETRQ, 0, 0, NULL, 0); + rcu_read_unlock(); } } @@ -921,27 +939,35 @@ static void blk_add_trace_sleeprq(void *ignore, if (bio) blk_add_trace_bio(q, bio, BLK_TA_SLEEPRQ, 0); else { - struct blk_trace *bt = q->blk_trace; + struct blk_trace *bt; + rcu_read_lock(); + bt = rcu_dereference(q->blk_trace); if (bt) __blk_add_trace(bt, 0, 0, rw, 0, BLK_TA_SLEEPRQ, 0, 0, NULL, 0); + rcu_read_unlock(); } } static void blk_add_trace_plug(void *ignore, struct request_queue *q) { - struct blk_trace *bt = q->blk_trace; + struct blk_trace *bt; + rcu_read_lock(); + bt = rcu_dereference(q->blk_trace); if (bt) __blk_add_trace(bt, 0, 0, 0, 0, BLK_TA_PLUG, 0, 0, NULL, 0); + rcu_read_unlock(); } static void blk_add_trace_unplug(void *ignore, struct request_queue *q, unsigned int depth, bool explicit) { - struct blk_trace *bt = q->blk_trace; + struct blk_trace *bt; + rcu_read_lock(); + bt = rcu_dereference(q->blk_trace); if (bt) { __be64 rpdu = cpu_to_be64(depth); u32 what; @@ -953,14 +979,17 @@ static void blk_add_trace_unplug(void *ignore, struct request_queue *q, __blk_add_trace(bt, 0, 0, 0, 0, what, 0, sizeof(rpdu), &rpdu, 0); } + rcu_read_unlock(); } static void blk_add_trace_split(void *ignore, struct request_queue *q, struct bio *bio, unsigned int pdu) { - struct blk_trace *bt = q->blk_trace; + struct blk_trace *bt; + rcu_read_lock(); + bt = rcu_dereference(q->blk_trace); if (bt) { __be64 rpdu = cpu_to_be64(pdu); @@ -969,6 +998,7 @@ static void blk_add_trace_split(void *ignore, BLK_TA_SPLIT, bio->bi_status, sizeof(rpdu), &rpdu, blk_trace_bio_get_cgid(q, bio)); } + rcu_read_unlock(); } /** @@ -988,11 +1018,15 @@ static void blk_add_trace_bio_remap(void *ignore, struct request_queue *q, struct bio *bio, dev_t dev, sector_t from) { - struct blk_trace *bt = q->blk_trace; + struct blk_trace *bt; struct blk_io_trace_remap r; - if (likely(!bt)) + rcu_read_lock(); + bt = rcu_dereference(q->blk_trace); + if (likely(!bt)) { + rcu_read_unlock(); return; + } r.device_from = cpu_to_be32(dev); r.device_to = cpu_to_be32(bio_dev(bio)); @@ -1001,6 +1035,7 @@ static void blk_add_trace_bio_remap(void *ignore, __blk_add_trace(bt, bio->bi_iter.bi_sector, bio->bi_iter.bi_size, bio_op(bio), bio->bi_opf, BLK_TA_REMAP, bio->bi_status, sizeof(r), &r, blk_trace_bio_get_cgid(q, bio)); + rcu_read_unlock(); } /** @@ -1021,11 +1056,15 @@ static void blk_add_trace_rq_remap(void *ignore, struct request *rq, dev_t dev, sector_t from) { - struct blk_trace *bt = q->blk_trace; + struct blk_trace *bt; struct blk_io_trace_remap r; - if (likely(!bt)) + rcu_read_lock(); + bt = rcu_dereference(q->blk_trace); + if (likely(!bt)) { + rcu_read_unlock(); return; + } r.device_from = cpu_to_be32(dev); r.device_to = cpu_to_be32(disk_devt(rq->rq_disk)); @@ -1034,6 +1073,7 @@ static void blk_add_trace_rq_remap(void *ignore, __blk_add_trace(bt, blk_rq_pos(rq), blk_rq_bytes(rq), rq_data_dir(rq), 0, BLK_TA_REMAP, 0, sizeof(r), &r, blk_trace_request_get_cgid(q, rq)); + rcu_read_unlock(); } /** @@ -1051,14 +1091,19 @@ void blk_add_driver_data(struct request_queue *q, struct request *rq, void *data, size_t len) { - struct blk_trace *bt = q->blk_trace; + struct blk_trace *bt; - if (likely(!bt)) + rcu_read_lock(); + bt = rcu_dereference(q->blk_trace); + if (likely(!bt)) { + rcu_read_unlock(); return; + } __blk_add_trace(bt, blk_rq_trace_sector(rq), blk_rq_bytes(rq), 0, 0, BLK_TA_DRV_DATA, 0, len, data, blk_trace_request_get_cgid(q, rq)); + rcu_read_unlock(); } EXPORT_SYMBOL_GPL(blk_add_driver_data); @@ -1597,6 +1642,7 @@ static int blk_trace_remove_queue(struct request_queue *q) return -EINVAL; put_probe_ref(); + synchronize_rcu(); blk_trace_free(bt); return 0; } @@ -1758,6 +1804,7 @@ static ssize_t sysfs_blk_trace_attr_show(struct device *dev, struct hd_struct *p = dev_to_part(dev); struct request_queue *q; struct block_device *bdev; + struct blk_trace *bt; ssize_t ret = -ENXIO; bdev = bdget(part_devt(p)); @@ -1770,21 +1817,23 @@ static ssize_t sysfs_blk_trace_attr_show(struct device *dev, mutex_lock(&q->blk_trace_mutex); + bt = rcu_dereference_protected(q->blk_trace, + lockdep_is_held(&q->blk_trace_mutex)); if (attr == &dev_attr_enable) { - ret = sprintf(buf, "%u\n", !!q->blk_trace); + ret = sprintf(buf, "%u\n", !!bt); goto out_unlock_bdev; } - if (q->blk_trace == NULL) + if (bt == NULL) ret = sprintf(buf, "disabled\n"); else if (attr == &dev_attr_act_mask) - ret = blk_trace_mask2str(buf, q->blk_trace->act_mask); + ret = blk_trace_mask2str(buf, bt->act_mask); else if (attr == &dev_attr_pid) - ret = sprintf(buf, "%u\n", q->blk_trace->pid); + ret = sprintf(buf, "%u\n", bt->pid); else if (attr == &dev_attr_start_lba) - ret = sprintf(buf, "%llu\n", q->blk_trace->start_lba); + ret = sprintf(buf, "%llu\n", bt->start_lba); else if (attr == &dev_attr_end_lba) - ret = sprintf(buf, "%llu\n", q->blk_trace->end_lba); + ret = sprintf(buf, "%llu\n", bt->end_lba); out_unlock_bdev: mutex_unlock(&q->blk_trace_mutex); @@ -1801,6 +1850,7 @@ static ssize_t sysfs_blk_trace_attr_store(struct device *dev, struct block_device *bdev; struct request_queue *q; struct hd_struct *p; + struct blk_trace *bt; u64 value; ssize_t ret = -EINVAL; @@ -1831,8 +1881,10 @@ static ssize_t sysfs_blk_trace_attr_store(struct device *dev, mutex_lock(&q->blk_trace_mutex); + bt = rcu_dereference_protected(q->blk_trace, + lockdep_is_held(&q->blk_trace_mutex)); if (attr == &dev_attr_enable) { - if (!!value == !!q->blk_trace) { + if (!!value == !!bt) { ret = 0; goto out_unlock_bdev; } @@ -1844,18 +1896,18 @@ static ssize_t sysfs_blk_trace_attr_store(struct device *dev, } ret = 0; - if (q->blk_trace == NULL) + if (bt == NULL) ret = blk_trace_setup_queue(q, bdev); if (ret == 0) { if (attr == &dev_attr_act_mask) - q->blk_trace->act_mask = value; + bt->act_mask = value; else if (attr == &dev_attr_pid) - q->blk_trace->pid = value; + bt->pid = value; else if (attr == &dev_attr_start_lba) - q->blk_trace->start_lba = value; + bt->start_lba = value; else if (attr == &dev_attr_end_lba) - q->blk_trace->end_lba = value; + bt->end_lba = value; } out_unlock_bdev: -- cgit v1.2.3-58-ga151 From 93d7c3185893b185e7f4347f8986b9b521254a6e Mon Sep 17 00:00:00 2001 From: Dongli Zhang Date: Mon, 24 Feb 2020 10:39:11 -0800 Subject: null_blk: remove unused fields in 'nullb_cmd' 'list', 'll_list' and 'csd' are no longer used. The 'list' is not used since it was introduced by commit f2298c0403b0 ("null_blk: multi queue aware block test driver"). The 'll_list' is no longer used since commit 3c395a969acc ("null_blk: set a separate timer for each command"). The 'csd' is no longer used since commit ce2c350b2cfe ("null_blk: use blk_complete_request and blk_mq_complete_request"). Reviewed-by: Bart Van Assche Signed-off-by: Dongli Zhang Signed-off-by: Jens Axboe --- drivers/block/null_blk.h | 3 --- drivers/block/null_blk_main.c | 2 -- 2 files changed, 5 deletions(-) diff --git a/drivers/block/null_blk.h b/drivers/block/null_blk.h index bc837862b767..62b660821dbc 100644 --- a/drivers/block/null_blk.h +++ b/drivers/block/null_blk.h @@ -14,9 +14,6 @@ #include struct nullb_cmd { - struct list_head list; - struct llist_node ll_list; - struct __call_single_data csd; struct request *rq; struct bio *bio; unsigned int tag; diff --git a/drivers/block/null_blk_main.c b/drivers/block/null_blk_main.c index 16510795e377..133060431dbd 100644 --- a/drivers/block/null_blk_main.c +++ b/drivers/block/null_blk_main.c @@ -1518,8 +1518,6 @@ static int setup_commands(struct nullb_queue *nq) for (i = 0; i < nq->queue_depth; i++) { cmd = &nq->cmds[i]; - INIT_LIST_HEAD(&cmd->list); - cmd->ll_list.next = NULL; cmd->tag = -1U; } -- cgit v1.2.3-58-ga151 From cae740a04b4d6d5166f19ee5faf04ea2a1f34b3d Mon Sep 17 00:00:00 2001 From: John Garry Date: Wed, 26 Feb 2020 20:10:15 +0800 Subject: blk-mq: Remove some unused function arguments The struct blk_mq_hw_ctx pointer argument in blk_mq_put_tag(), blk_mq_poll_nsecs(), and blk_mq_poll_hybrid_sleep() is unused, so remove it. Overall obj code size shows a minor reduction, before: text data bss dec hex filename 27306 1312 0 28618 6fca block/blk-mq.o 4303 272 0 4575 11df block/blk-mq-tag.o after: 27282 1312 0 28594 6fb2 block/blk-mq.o 4311 272 0 4583 11e7 block/blk-mq-tag.o Reviewed-by: Johannes Thumshirn Reviewed-by: Hannes Reinecke Signed-off-by: John Garry -- This minor patch had been carried as part of the blk-mq shared tags RFC, I'd rather not carry it anymore as it required rebasing, so now or never.. Signed-off-by: Jens Axboe --- block/blk-mq-tag.c | 4 ++-- block/blk-mq-tag.h | 4 ++-- block/blk-mq.c | 10 ++++------ block/blk-mq.h | 2 +- 4 files changed, 9 insertions(+), 11 deletions(-) diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c index fbacde454718..586c9d6e904a 100644 --- a/block/blk-mq-tag.c +++ b/block/blk-mq-tag.c @@ -183,8 +183,8 @@ found_tag: return tag + tag_offset; } -void blk_mq_put_tag(struct blk_mq_hw_ctx *hctx, struct blk_mq_tags *tags, - struct blk_mq_ctx *ctx, unsigned int tag) +void blk_mq_put_tag(struct blk_mq_tags *tags, struct blk_mq_ctx *ctx, + unsigned int tag) { if (!blk_mq_tag_is_reserved(tags, tag)) { const int real_tag = tag - tags->nr_reserved_tags; diff --git a/block/blk-mq-tag.h b/block/blk-mq-tag.h index 15bc74acb57e..2b8321efb682 100644 --- a/block/blk-mq-tag.h +++ b/block/blk-mq-tag.h @@ -26,8 +26,8 @@ extern struct blk_mq_tags *blk_mq_init_tags(unsigned int nr_tags, unsigned int r extern void blk_mq_free_tags(struct blk_mq_tags *tags); extern unsigned int blk_mq_get_tag(struct blk_mq_alloc_data *data); -extern void blk_mq_put_tag(struct blk_mq_hw_ctx *hctx, struct blk_mq_tags *tags, - struct blk_mq_ctx *ctx, unsigned int tag); +extern void blk_mq_put_tag(struct blk_mq_tags *tags, struct blk_mq_ctx *ctx, + unsigned int tag); extern int blk_mq_tag_update_depth(struct blk_mq_hw_ctx *hctx, struct blk_mq_tags **tags, unsigned int depth, bool can_grow); diff --git a/block/blk-mq.c b/block/blk-mq.c index 5e1e4151cb51..d92088dec6c3 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -477,9 +477,9 @@ static void __blk_mq_free_request(struct request *rq) blk_pm_mark_last_busy(rq); rq->mq_hctx = NULL; if (rq->tag != -1) - blk_mq_put_tag(hctx, hctx->tags, ctx, rq->tag); + blk_mq_put_tag(hctx->tags, ctx, rq->tag); if (sched_tag != -1) - blk_mq_put_tag(hctx, hctx->sched_tags, ctx, sched_tag); + blk_mq_put_tag(hctx->sched_tags, ctx, sched_tag); blk_mq_sched_restart(hctx); blk_queue_exit(q); } @@ -3402,7 +3402,6 @@ static void blk_mq_poll_stats_fn(struct blk_stat_callback *cb) } static unsigned long blk_mq_poll_nsecs(struct request_queue *q, - struct blk_mq_hw_ctx *hctx, struct request *rq) { unsigned long ret = 0; @@ -3435,7 +3434,6 @@ static unsigned long blk_mq_poll_nsecs(struct request_queue *q, } static bool blk_mq_poll_hybrid_sleep(struct request_queue *q, - struct blk_mq_hw_ctx *hctx, struct request *rq) { struct hrtimer_sleeper hs; @@ -3455,7 +3453,7 @@ static bool blk_mq_poll_hybrid_sleep(struct request_queue *q, if (q->poll_nsec > 0) nsecs = q->poll_nsec; else - nsecs = blk_mq_poll_nsecs(q, hctx, rq); + nsecs = blk_mq_poll_nsecs(q, rq); if (!nsecs) return false; @@ -3510,7 +3508,7 @@ static bool blk_mq_poll_hybrid(struct request_queue *q, return false; } - return blk_mq_poll_hybrid_sleep(q, hctx, rq); + return blk_mq_poll_hybrid_sleep(q, rq); } /** diff --git a/block/blk-mq.h b/block/blk-mq.h index c0fa34378eb2..10bfdfb494fa 100644 --- a/block/blk-mq.h +++ b/block/blk-mq.h @@ -200,7 +200,7 @@ static inline bool blk_mq_get_dispatch_budget(struct blk_mq_hw_ctx *hctx) static inline void __blk_mq_put_driver_tag(struct blk_mq_hw_ctx *hctx, struct request *rq) { - blk_mq_put_tag(hctx, hctx->tags, rq->mq_ctx, rq->tag); + blk_mq_put_tag(hctx->tags, rq->mq_ctx, rq->tag); rq->tag = -1; if (rq->rq_flags & RQF_MQ_INFLIGHT) { -- cgit v1.2.3-58-ga151 From 9515743bfb39c61aaf3d4f3219a645c8d1fe9a0e Mon Sep 17 00:00:00 2001 From: Bijan Mottahedeh Date: Wed, 26 Feb 2020 18:53:43 -0800 Subject: nvme-pci: Hold cq_poll_lock while completing CQEs Completions need to consumed in the same order the controller submitted them, otherwise future completion entries may overwrite ones we haven't handled yet. Hold the nvme queue's poll lock while completing new CQEs to prevent another thread from freeing command tags for reuse out-of-order. Fixes: dabcefab45d3 ("nvme: provide optimized poll function for separate poll queues") Signed-off-by: Bijan Mottahedeh Reviewed-by: Sagi Grimberg Reviewed-by: Jens Axboe Signed-off-by: Keith Busch --- drivers/nvme/host/pci.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index ace4dd9e953c..d3f23d6254e4 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -1078,9 +1078,9 @@ static int nvme_poll(struct blk_mq_hw_ctx *hctx) spin_lock(&nvmeq->cq_poll_lock); found = nvme_process_cq(nvmeq, &start, &end, -1); + nvme_complete_cqes(nvmeq, start, end); spin_unlock(&nvmeq->cq_poll_lock); - nvme_complete_cqes(nvmeq, start, end); return found; } -- cgit v1.2.3-58-ga151