diff options
author | Damien Le Moal <damien.lemoal@wdc.com> | 2019-08-28 13:40:20 +0900 |
---|---|---|
committer | Jens Axboe <axboe@kernel.dk> | 2019-09-03 07:59:51 -0600 |
commit | cb8acabbe33b110157955a7425ee876fb81e6bbc (patch) | |
tree | e1822764e1203e47b2f0a469582bf25a58a6886d | |
parent | 0feacaa21634014148068035b02eade71f853496 (diff) |
block: mq-deadline: Fix queue restart handling
Commit 7211aef86f79 ("block: mq-deadline: Fix write completion
handling") added a call to blk_mq_sched_mark_restart_hctx() in
dd_dispatch_request() to make sure that write request dispatching does
not stall when all target zones are locked. This fix left a subtle race
when a write completion happens during a dispatch execution on another
CPU:
CPU 0: Dispatch CPU1: write completion
dd_dispatch_request()
lock(&dd->lock);
...
lock(&dd->zone_lock); dd_finish_request()
rq = find request lock(&dd->zone_lock);
unlock(&dd->zone_lock);
zone write unlock
unlock(&dd->zone_lock);
...
__blk_mq_free_request
check restart flag (not set)
-> queue not run
...
if (!rq && have writes)
blk_mq_sched_mark_restart_hctx()
unlock(&dd->lock)
Since the dispatch context finishes after the write request completion
handling, marking the queue as needing a restart is not seen from
__blk_mq_free_request() and blk_mq_sched_restart() not executed leading
to the dispatch stall under 100% write workloads.
Fix this by moving the call to blk_mq_sched_mark_restart_hctx() from
dd_dispatch_request() into dd_finish_request() under the zone lock to
ensure full mutual exclusion between write request dispatch selection
and zone unlock on write request completion.
Fixes: 7211aef86f79 ("block: mq-deadline: Fix write completion handling")
Cc: stable@vger.kernel.org
Reported-by: Hans Holmberg <Hans.Holmberg@wdc.com>
Reviewed-by: Hans Holmberg <hans.holmberg@wdc.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Damien Le Moal <damien.lemoal@wdc.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
-rw-r--r-- | block/mq-deadline.c | 19 |
1 files changed, 9 insertions, 10 deletions
diff --git a/block/mq-deadline.c b/block/mq-deadline.c index 2a2a2e82832e..35e84bc0ec8c 100644 --- a/block/mq-deadline.c +++ b/block/mq-deadline.c @@ -377,13 +377,6 @@ done: * hardware queue, but we may return a request that is for a * different hardware queue. This is because mq-deadline has shared * state for all hardware queues, in terms of sorting, FIFOs, etc. - * - * For a zoned block device, __dd_dispatch_request() may return NULL - * if all the queued write requests are directed at zones that are already - * locked due to on-going write requests. In this case, make sure to mark - * the queue as needing a restart to ensure that the queue is run again - * and the pending writes dispatched once the target zones for the ongoing - * write requests are unlocked in dd_finish_request(). */ static struct request *dd_dispatch_request(struct blk_mq_hw_ctx *hctx) { @@ -392,9 +385,6 @@ static struct request *dd_dispatch_request(struct blk_mq_hw_ctx *hctx) spin_lock(&dd->lock); rq = __dd_dispatch_request(dd); - if (!rq && blk_queue_is_zoned(hctx->queue) && - !list_empty(&dd->fifo_list[WRITE])) - blk_mq_sched_mark_restart_hctx(hctx); spin_unlock(&dd->lock); return rq; @@ -561,6 +551,13 @@ static void dd_prepare_request(struct request *rq, struct bio *bio) * spinlock so that the zone is never unlocked while deadline_fifo_request() * or deadline_next_request() are executing. This function is called for * all requests, whether or not these requests complete successfully. + * + * For a zoned block device, __dd_dispatch_request() may have stopped + * dispatching requests if all the queued requests are write requests directed + * at zones that are already locked due to on-going write requests. To ensure + * write request dispatch progress in this case, mark the queue as needing a + * restart to ensure that the queue is run again after completion of the + * request and zones being unlocked. */ static void dd_finish_request(struct request *rq) { @@ -572,6 +569,8 @@ static void dd_finish_request(struct request *rq) spin_lock_irqsave(&dd->zone_lock, flags); blk_req_zone_write_unlock(rq); + if (!list_empty(&dd->fifo_list[WRITE])) + blk_mq_sched_mark_restart_hctx(rq->mq_hctx); spin_unlock_irqrestore(&dd->zone_lock, flags); } } |