diff options
Diffstat (limited to 'block/blk-mq.c')
-rw-r--r-- | block/blk-mq.c | 422 |
1 files changed, 249 insertions, 173 deletions
diff --git a/block/blk-mq.c b/block/blk-mq.c index 98a18609755e..b600463791ec 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -37,6 +37,7 @@ #include "blk-wbt.h" #include "blk-mq-sched.h" +static bool blk_mq_poll(struct request_queue *q, blk_qc_t cookie); static void blk_mq_poll_stats_start(struct request_queue *q); static void blk_mq_poll_stats_fn(struct blk_stat_callback *cb); @@ -60,10 +61,10 @@ static int blk_mq_poll_stats_bkt(const struct request *rq) /* * Check if any of the ctx's have pending work in this hardware queue */ -bool blk_mq_hctx_has_pending(struct blk_mq_hw_ctx *hctx) +static bool blk_mq_hctx_has_pending(struct blk_mq_hw_ctx *hctx) { - return sbitmap_any_bit_set(&hctx->ctx_map) || - !list_empty_careful(&hctx->dispatch) || + return !list_empty_careful(&hctx->dispatch) || + sbitmap_any_bit_set(&hctx->ctx_map) || blk_mq_sched_has_work(hctx); } @@ -125,7 +126,8 @@ void blk_freeze_queue_start(struct request_queue *q) freeze_depth = atomic_inc_return(&q->mq_freeze_depth); if (freeze_depth == 1) { percpu_ref_kill(&q->q_usage_counter); - blk_mq_run_hw_queues(q, false); + if (q->mq_ops) + blk_mq_run_hw_queues(q, false); } } EXPORT_SYMBOL_GPL(blk_freeze_queue_start); @@ -255,13 +257,6 @@ void blk_mq_wake_waiters(struct request_queue *q) queue_for_each_hw_ctx(q, hctx, i) if (blk_mq_hw_queue_mapped(hctx)) blk_mq_tag_wakeup_all(hctx->tags, true); - - /* - * If we are called because the queue has now been marked as - * dying, we need to ensure that processes currently waiting on - * the queue are notified as well. - */ - wake_up_all(&q->mq_freeze_wq); } bool blk_mq_can_queue(struct blk_mq_hw_ctx *hctx) @@ -296,6 +291,8 @@ static struct request *blk_mq_rq_ctx_init(struct blk_mq_alloc_data *data, rq->q = data->q; rq->mq_ctx = data->ctx; rq->cmd_flags = op; + if (data->flags & BLK_MQ_REQ_PREEMPT) + rq->rq_flags |= RQF_PREEMPT; if (blk_queue_io_stat(data->q)) rq->rq_flags |= RQF_IO_STAT; /* do not touch atomic flags, it needs atomic ops against the timer */ @@ -336,12 +333,14 @@ static struct request *blk_mq_get_request(struct request_queue *q, struct elevator_queue *e = q->elevator; struct request *rq; unsigned int tag; - struct blk_mq_ctx *local_ctx = NULL; + bool put_ctx_on_error = false; blk_queue_enter_live(q); data->q = q; - if (likely(!data->ctx)) - data->ctx = local_ctx = blk_mq_get_ctx(q); + if (likely(!data->ctx)) { + data->ctx = blk_mq_get_ctx(q); + put_ctx_on_error = true; + } if (likely(!data->hctx)) data->hctx = blk_mq_map_queue(q, data->ctx->cpu); if (op & REQ_NOWAIT) @@ -360,8 +359,8 @@ static struct request *blk_mq_get_request(struct request_queue *q, tag = blk_mq_get_tag(data); if (tag == BLK_MQ_TAG_FAIL) { - if (local_ctx) { - blk_mq_put_ctx(local_ctx); + if (put_ctx_on_error) { + blk_mq_put_ctx(data->ctx); data->ctx = NULL; } blk_queue_exit(q); @@ -384,13 +383,13 @@ static struct request *blk_mq_get_request(struct request_queue *q, } struct request *blk_mq_alloc_request(struct request_queue *q, unsigned int op, - unsigned int flags) + blk_mq_req_flags_t flags) { struct blk_mq_alloc_data alloc_data = { .flags = flags }; struct request *rq; int ret; - ret = blk_queue_enter(q, flags & BLK_MQ_REQ_NOWAIT); + ret = blk_queue_enter(q, flags); if (ret) return ERR_PTR(ret); @@ -410,7 +409,7 @@ struct request *blk_mq_alloc_request(struct request_queue *q, unsigned int op, EXPORT_SYMBOL(blk_mq_alloc_request); struct request *blk_mq_alloc_request_hctx(struct request_queue *q, - unsigned int op, unsigned int flags, unsigned int hctx_idx) + unsigned int op, blk_mq_req_flags_t flags, unsigned int hctx_idx) { struct blk_mq_alloc_data alloc_data = { .flags = flags }; struct request *rq; @@ -429,7 +428,7 @@ struct request *blk_mq_alloc_request_hctx(struct request_queue *q, if (hctx_idx >= q->nr_hw_queues) return ERR_PTR(-EIO); - ret = blk_queue_enter(q, true); + ret = blk_queue_enter(q, flags); if (ret) return ERR_PTR(ret); @@ -476,8 +475,14 @@ void blk_mq_free_request(struct request *rq) if (rq->rq_flags & RQF_MQ_INFLIGHT) atomic_dec(&hctx->nr_active); + if (unlikely(laptop_mode && !blk_rq_is_passthrough(rq))) + laptop_io_completion(q->backing_dev_info); + wbt_done(q->rq_wb, &rq->issue_stat); + if (blk_rq_rl(rq)) + blk_put_rl(blk_rq_rl(rq)); + clear_bit(REQ_ATOM_STARTED, &rq->atomic_flags); clear_bit(REQ_ATOM_POLL_SLEPT, &rq->atomic_flags); if (rq->tag != -1) @@ -593,22 +598,32 @@ void blk_mq_start_request(struct request *rq) blk_add_timer(rq); - /* - * Ensure that ->deadline is visible before set the started - * flag and clear the completed flag. - */ - smp_mb__before_atomic(); + WARN_ON_ONCE(test_bit(REQ_ATOM_STARTED, &rq->atomic_flags)); /* * Mark us as started and clear complete. Complete might have been * set if requeue raced with timeout, which then marked it as * complete. So be sure to clear complete again when we start * the request, otherwise we'll ignore the completion event. + * + * Ensure that ->deadline is visible before we set STARTED, such that + * blk_mq_check_expired() is guaranteed to observe our ->deadline when + * it observes STARTED. */ - if (!test_bit(REQ_ATOM_STARTED, &rq->atomic_flags)) - set_bit(REQ_ATOM_STARTED, &rq->atomic_flags); - if (test_bit(REQ_ATOM_COMPLETE, &rq->atomic_flags)) + smp_wmb(); + set_bit(REQ_ATOM_STARTED, &rq->atomic_flags); + if (test_bit(REQ_ATOM_COMPLETE, &rq->atomic_flags)) { + /* + * Coherence order guarantees these consecutive stores to a + * single variable propagate in the specified order. Thus the + * clear_bit() is ordered _after_ the set bit. See + * blk_mq_check_expired(). + * + * (the bits must be part of the same byte for this to be + * true). + */ clear_bit(REQ_ATOM_COMPLETE, &rq->atomic_flags); + } if (q->dma_drain_size && blk_rq_bytes(rq)) { /* @@ -634,6 +649,8 @@ static void __blk_mq_requeue_request(struct request *rq) { struct request_queue *q = rq->q; + blk_mq_put_driver_tag(rq); + trace_block_rq_requeue(q, rq); wbt_requeue(q->rq_wb, &rq->issue_stat); blk_mq_sched_requeue_request(rq); @@ -690,7 +707,7 @@ void blk_mq_add_to_requeue_list(struct request *rq, bool at_head, /* * We abuse this flag that is otherwise used by the I/O scheduler to - * request head insertation from the workqueue. + * request head insertion from the workqueue. */ BUG_ON(rq->rq_flags & RQF_SOFTBARRIER); @@ -778,11 +795,20 @@ static void blk_mq_check_expired(struct blk_mq_hw_ctx *hctx, struct request *rq, void *priv, bool reserved) { struct blk_mq_timeout_data *data = priv; + unsigned long deadline; if (!test_bit(REQ_ATOM_STARTED, &rq->atomic_flags)) return; /* + * Ensures that if we see STARTED we must also see our + * up-to-date deadline, see blk_mq_start_request(). + */ + smp_rmb(); + + deadline = READ_ONCE(rq->deadline); + + /* * The rq being checked may have been freed and reallocated * out already here, we avoid this race by checking rq->deadline * and REQ_ATOM_COMPLETE flag together: @@ -795,11 +821,20 @@ static void blk_mq_check_expired(struct blk_mq_hw_ctx *hctx, * and clearing the flag in blk_mq_start_request(), so * this rq won't be timed out too. */ - if (time_after_eq(jiffies, rq->deadline)) { - if (!blk_mark_rq_complete(rq)) + if (time_after_eq(jiffies, deadline)) { + if (!blk_mark_rq_complete(rq)) { + /* + * Again coherence order ensures that consecutive reads + * from the same variable must be in that order. This + * ensures that if we see COMPLETE clear, we must then + * see STARTED set and we'll ignore this timeout. + * + * (There's also the MB implied by the test_and_clear()) + */ blk_mq_rq_timed_out(rq, reserved); - } else if (!data->next_set || time_after(data->next, rq->deadline)) { - data->next = rq->deadline; + } + } else if (!data->next_set || time_after(data->next, deadline)) { + data->next = deadline; data->next_set = 1; } } @@ -880,6 +915,45 @@ void blk_mq_flush_busy_ctxs(struct blk_mq_hw_ctx *hctx, struct list_head *list) } EXPORT_SYMBOL_GPL(blk_mq_flush_busy_ctxs); +struct dispatch_rq_data { + struct blk_mq_hw_ctx *hctx; + struct request *rq; +}; + +static bool dispatch_rq_from_ctx(struct sbitmap *sb, unsigned int bitnr, + void *data) +{ + struct dispatch_rq_data *dispatch_data = data; + struct blk_mq_hw_ctx *hctx = dispatch_data->hctx; + struct blk_mq_ctx *ctx = hctx->ctxs[bitnr]; + + spin_lock(&ctx->lock); + if (unlikely(!list_empty(&ctx->rq_list))) { + dispatch_data->rq = list_entry_rq(ctx->rq_list.next); + list_del_init(&dispatch_data->rq->queuelist); + if (list_empty(&ctx->rq_list)) + sbitmap_clear_bit(sb, bitnr); + } + spin_unlock(&ctx->lock); + + return !dispatch_data->rq; +} + +struct request *blk_mq_dequeue_from_ctx(struct blk_mq_hw_ctx *hctx, + struct blk_mq_ctx *start) +{ + unsigned off = start ? start->index_hw : 0; + struct dispatch_rq_data data = { + .hctx = hctx, + .rq = NULL, + }; + + __sbitmap_for_each_set(&hctx->ctx_map, off, + dispatch_rq_from_ctx, &data); + + return data.rq; +} + static inline unsigned int queued_to_index(unsigned int queued) { if (!queued) @@ -920,109 +994,95 @@ done: return rq->tag != -1; } -static void __blk_mq_put_driver_tag(struct blk_mq_hw_ctx *hctx, - struct request *rq) -{ - blk_mq_put_tag(hctx, hctx->tags, rq->mq_ctx, rq->tag); - rq->tag = -1; - - if (rq->rq_flags & RQF_MQ_INFLIGHT) { - rq->rq_flags &= ~RQF_MQ_INFLIGHT; - atomic_dec(&hctx->nr_active); - } -} - -static void blk_mq_put_driver_tag_hctx(struct blk_mq_hw_ctx *hctx, - struct request *rq) -{ - if (rq->tag == -1 || rq->internal_tag == -1) - return; - - __blk_mq_put_driver_tag(hctx, rq); -} - -static void blk_mq_put_driver_tag(struct request *rq) +static int blk_mq_dispatch_wake(wait_queue_entry_t *wait, unsigned mode, + int flags, void *key) { struct blk_mq_hw_ctx *hctx; - if (rq->tag == -1 || rq->internal_tag == -1) - return; + hctx = container_of(wait, struct blk_mq_hw_ctx, dispatch_wait); - hctx = blk_mq_map_queue(rq->q, rq->mq_ctx->cpu); - __blk_mq_put_driver_tag(hctx, rq); + list_del_init(&wait->entry); + blk_mq_run_hw_queue(hctx, true); + return 1; } /* - * If we fail getting a driver tag because all the driver tags are already - * assigned and on the dispatch list, BUT the first entry does not have a - * tag, then we could deadlock. For that case, move entries with assigned - * driver tags to the front, leaving the set of tagged requests in the - * same order, and the untagged set in the same order. + * Mark us waiting for a tag. For shared tags, this involves hooking us into + * the tag wakeups. For non-shared tags, we can simply mark us nedeing a + * restart. For both caes, take care to check the condition again after + * marking us as waiting. */ -static bool reorder_tags_to_front(struct list_head *list) -{ - struct request *rq, *tmp, *first = NULL; - - list_for_each_entry_safe_reverse(rq, tmp, list, queuelist) { - if (rq == first) - break; - if (rq->tag != -1) { - list_move(&rq->queuelist, list); - if (!first) - first = rq; - } - } - - return first != NULL; -} - -static int blk_mq_dispatch_wake(wait_queue_entry_t *wait, unsigned mode, int flags, - void *key) +static bool blk_mq_mark_tag_wait(struct blk_mq_hw_ctx **hctx, + struct request *rq) { - struct blk_mq_hw_ctx *hctx; + struct blk_mq_hw_ctx *this_hctx = *hctx; + bool shared_tags = (this_hctx->flags & BLK_MQ_F_TAG_SHARED) != 0; + struct sbq_wait_state *ws; + wait_queue_entry_t *wait; + bool ret; - hctx = container_of(wait, struct blk_mq_hw_ctx, dispatch_wait); + if (!shared_tags) { + if (!test_bit(BLK_MQ_S_SCHED_RESTART, &this_hctx->state)) + set_bit(BLK_MQ_S_SCHED_RESTART, &this_hctx->state); + } else { + wait = &this_hctx->dispatch_wait; + if (!list_empty_careful(&wait->entry)) + return false; - list_del(&wait->entry); - clear_bit_unlock(BLK_MQ_S_TAG_WAITING, &hctx->state); - blk_mq_run_hw_queue(hctx, true); - return 1; -} + spin_lock(&this_hctx->lock); + if (!list_empty(&wait->entry)) { + spin_unlock(&this_hctx->lock); + return false; + } -static bool blk_mq_dispatch_wait_add(struct blk_mq_hw_ctx *hctx) -{ - struct sbq_wait_state *ws; + ws = bt_wait_ptr(&this_hctx->tags->bitmap_tags, this_hctx); + add_wait_queue(&ws->wait, wait); + } /* - * The TAG_WAITING bit serves as a lock protecting hctx->dispatch_wait. - * The thread which wins the race to grab this bit adds the hardware - * queue to the wait queue. + * It's possible that a tag was freed in the window between the + * allocation failure and adding the hardware queue to the wait + * queue. */ - if (test_bit(BLK_MQ_S_TAG_WAITING, &hctx->state) || - test_and_set_bit_lock(BLK_MQ_S_TAG_WAITING, &hctx->state)) - return false; + ret = blk_mq_get_driver_tag(rq, hctx, false); - init_waitqueue_func_entry(&hctx->dispatch_wait, blk_mq_dispatch_wake); - ws = bt_wait_ptr(&hctx->tags->bitmap_tags, hctx); + if (!shared_tags) { + /* + * Don't clear RESTART here, someone else could have set it. + * At most this will cost an extra queue run. + */ + return ret; + } else { + if (!ret) { + spin_unlock(&this_hctx->lock); + return false; + } - /* - * As soon as this returns, it's no longer safe to fiddle with - * hctx->dispatch_wait, since a completion can wake up the wait queue - * and unlock the bit. - */ - add_wait_queue(&ws->wait, &hctx->dispatch_wait); - return true; + /* + * We got a tag, remove ourselves from the wait queue to ensure + * someone else gets the wakeup. + */ + spin_lock_irq(&ws->wait.lock); + list_del_init(&wait->entry); + spin_unlock_irq(&ws->wait.lock); + spin_unlock(&this_hctx->lock); + return true; + } } -bool blk_mq_dispatch_rq_list(struct request_queue *q, struct list_head *list) +bool blk_mq_dispatch_rq_list(struct request_queue *q, struct list_head *list, + bool got_budget) { struct blk_mq_hw_ctx *hctx; - struct request *rq; + struct request *rq, *nxt; + bool no_tag = false; int errors, queued; if (list_empty(list)) return false; + WARN_ON(!list_is_singular(list) && got_budget); + /* * Now process all the entries, sending them to the driver. */ @@ -1033,23 +1093,29 @@ bool blk_mq_dispatch_rq_list(struct request_queue *q, struct list_head *list) rq = list_first_entry(list, struct request, queuelist); if (!blk_mq_get_driver_tag(rq, &hctx, false)) { - if (!queued && reorder_tags_to_front(list)) - continue; - /* * The initial allocation attempt failed, so we need to - * rerun the hardware queue when a tag is freed. + * rerun the hardware queue when a tag is freed. The + * waitqueue takes care of that. If the queue is run + * before we add this entry back on the dispatch list, + * we'll re-run it below. */ - if (!blk_mq_dispatch_wait_add(hctx)) + if (!blk_mq_mark_tag_wait(&hctx, rq)) { + if (got_budget) + blk_mq_put_dispatch_budget(hctx); + /* + * For non-shared tags, the RESTART check + * will suffice. + */ + if (hctx->flags & BLK_MQ_F_TAG_SHARED) + no_tag = true; break; + } + } - /* - * It's possible that a tag was freed in the window - * between the allocation failure and adding the - * hardware queue to the wait queue. - */ - if (!blk_mq_get_driver_tag(rq, &hctx, false)) - break; + if (!got_budget && !blk_mq_get_dispatch_budget(hctx)) { + blk_mq_put_driver_tag(rq); + break; } list_del_init(&rq->queuelist); @@ -1063,15 +1129,21 @@ bool blk_mq_dispatch_rq_list(struct request_queue *q, struct list_head *list) if (list_empty(list)) bd.last = true; else { - struct request *nxt; - nxt = list_first_entry(list, struct request, queuelist); bd.last = !blk_mq_get_driver_tag(nxt, NULL, false); } ret = q->mq_ops->queue_rq(hctx, &bd); if (ret == BLK_STS_RESOURCE) { - blk_mq_put_driver_tag_hctx(hctx, rq); + /* + * If an I/O scheduler has been configured and we got a + * driver tag for the next request already, free it + * again. + */ + if (!list_empty(list)) { + nxt = list_first_entry(list, struct request, queuelist); + blk_mq_put_driver_tag(nxt); + } list_add(&rq->queuelist, list); __blk_mq_requeue_request(rq); break; @@ -1093,13 +1165,6 @@ bool blk_mq_dispatch_rq_list(struct request_queue *q, struct list_head *list) * that is where we will continue on next queue run. */ if (!list_empty(list)) { - /* - * If an I/O scheduler has been configured and we got a driver - * tag for the next request already, free it again. - */ - rq = list_first_entry(list, struct request, queuelist); - blk_mq_put_driver_tag(rq); - spin_lock(&hctx->lock); list_splice_init(list, &hctx->dispatch); spin_unlock(&hctx->lock); @@ -1109,10 +1174,10 @@ bool blk_mq_dispatch_rq_list(struct request_queue *q, struct list_head *list) * it is no longer set that means that it was cleared by another * thread and hence that a queue rerun is needed. * - * If TAG_WAITING is set that means that an I/O scheduler has - * been configured and another thread is waiting for a driver - * tag. To guarantee fairness, do not rerun this hardware queue - * but let the other thread grab the driver tag. + * If 'no_tag' is set, that means that we failed getting + * a driver tag with an I/O scheduler attached. If our dispatch + * waitqueue is no longer active, ensure that we run the queue + * AFTER adding our entries back to the list. * * If no I/O scheduler has been configured it is possible that * the hardware queue got stopped and restarted before requests @@ -1124,8 +1189,8 @@ bool blk_mq_dispatch_rq_list(struct request_queue *q, struct list_head *list) * returning BLK_STS_RESOURCE. Two exceptions are scsi-mq * and dm-rq. */ - if (!blk_mq_sched_needs_restart(hctx) && - !test_bit(BLK_MQ_S_TAG_WAITING, &hctx->state)) + if (!blk_mq_sched_needs_restart(hctx) || + (no_tag && list_empty_careful(&hctx->dispatch_wait.entry))) blk_mq_run_hw_queue(hctx, true); } @@ -1218,9 +1283,14 @@ void blk_mq_delay_run_hw_queue(struct blk_mq_hw_ctx *hctx, unsigned long msecs) } EXPORT_SYMBOL(blk_mq_delay_run_hw_queue); -void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async) +bool blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async) { - __blk_mq_delay_run_hw_queue(hctx, async, 0); + if (blk_mq_hctx_has_pending(hctx)) { + __blk_mq_delay_run_hw_queue(hctx, async, 0); + return true; + } + + return false; } EXPORT_SYMBOL(blk_mq_run_hw_queue); @@ -1230,8 +1300,7 @@ void blk_mq_run_hw_queues(struct request_queue *q, bool async) int i; queue_for_each_hw_ctx(q, hctx, i) { - if (!blk_mq_hctx_has_pending(hctx) || - blk_mq_hctx_stopped(hctx)) + if (blk_mq_hctx_stopped(hctx)) continue; blk_mq_run_hw_queue(hctx, async); @@ -1405,7 +1474,7 @@ void __blk_mq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq, * Should only be used carefully, when the caller knows we want to * bypass a potential IO scheduler on the target device. */ -void blk_mq_request_bypass_insert(struct request *rq) +void blk_mq_request_bypass_insert(struct request *rq, bool run_queue) { struct blk_mq_ctx *ctx = rq->mq_ctx; struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(rq->q, ctx->cpu); @@ -1414,7 +1483,8 @@ void blk_mq_request_bypass_insert(struct request *rq) list_add_tail(&rq->queuelist, &hctx->dispatch); spin_unlock(&hctx->lock); - blk_mq_run_hw_queue(hctx, false); + if (run_queue) + blk_mq_run_hw_queue(hctx, false); } void blk_mq_insert_requests(struct blk_mq_hw_ctx *hctx, struct blk_mq_ctx *ctx, @@ -1501,13 +1571,9 @@ static void blk_mq_bio_to_request(struct request *rq, struct bio *bio) { blk_init_request_from_bio(rq, bio); - blk_account_io_start(rq, true); -} + blk_rq_set_rl(rq, blk_get_rl(rq->q, bio)); -static inline bool hctx_allow_merges(struct blk_mq_hw_ctx *hctx) -{ - return (hctx->flags & BLK_MQ_F_SHOULD_MERGE) && - !blk_queue_nomerges(hctx->queue); + blk_account_io_start(rq, true); } static inline void blk_mq_queue_io(struct blk_mq_hw_ctx *hctx, @@ -1552,6 +1618,11 @@ static void __blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx, if (!blk_mq_get_driver_tag(rq, NULL, false)) goto insert; + if (!blk_mq_get_dispatch_budget(hctx)) { + blk_mq_put_driver_tag(rq); + goto insert; + } + new_cookie = request_to_qc_t(hctx, rq); /* @@ -1641,13 +1712,10 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio) if (unlikely(is_flush_fua)) { blk_mq_put_ctx(data.ctx); blk_mq_bio_to_request(rq, bio); - if (q->elevator) { - blk_mq_sched_insert_request(rq, false, true, true, - true); - } else { - blk_insert_flush(rq); - blk_mq_run_hw_queue(data.hctx, true); - } + + /* bypass scheduler for flush rq */ + blk_insert_flush(rq); + blk_mq_run_hw_queue(data.hctx, true); } else if (plug && q->nr_hw_queues == 1) { struct request *last = NULL; @@ -1990,6 +2058,9 @@ static int blk_mq_init_hctx(struct request_queue *q, hctx->nr_ctx = 0; + init_waitqueue_func_entry(&hctx->dispatch_wait, blk_mq_dispatch_wake); + INIT_LIST_HEAD(&hctx->dispatch_wait.entry); + if (set->ops->init_hctx && set->ops->init_hctx(hctx, set->driver_data, hctx_idx)) goto free_bitmap; @@ -2229,8 +2300,11 @@ static void blk_mq_add_queue_tag_set(struct blk_mq_tag_set *set, mutex_lock(&set->tag_list_lock); - /* Check to see if we're transitioning to shared (from 1 to 2 queues). */ - if (!list_empty(&set->tag_list) && !(set->flags & BLK_MQ_F_TAG_SHARED)) { + /* + * Check to see if we're transitioning to shared (from 1 to 2 queues). + */ + if (!list_empty(&set->tag_list) && + !(set->flags & BLK_MQ_F_TAG_SHARED)) { set->flags |= BLK_MQ_F_TAG_SHARED; /* update existing queue */ blk_mq_update_tag_set_depth(set, true); @@ -2404,6 +2478,8 @@ struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set, spin_lock_init(&q->requeue_lock); blk_queue_make_request(q, blk_mq_make_request); + if (q->mq_ops->poll) + q->poll_fn = blk_mq_poll; /* * Do this after blk_queue_make_request() overrides it... @@ -2460,10 +2536,9 @@ static void blk_mq_queue_reinit(struct request_queue *q) /* * redo blk_mq_init_cpu_queues and blk_mq_init_hw_queues. FIXME: maybe - * we should change hctx numa_node according to new topology (this - * involves free and re-allocate memory, worthy doing?) + * we should change hctx numa_node according to the new topology (this + * involves freeing and re-allocating memory, worth doing?) */ - blk_mq_map_swqueue(q); blk_mq_sysfs_register(q); @@ -2552,6 +2627,9 @@ int blk_mq_alloc_tag_set(struct blk_mq_tag_set *set) if (!set->ops->queue_rq) return -EINVAL; + if (!set->ops->get_budget ^ !set->ops->put_budget) + return -EINVAL; + if (set->queue_depth > BLK_MQ_MAX_DEPTH) { pr_info("blk-mq: reduced tag depth to %u\n", BLK_MQ_MAX_DEPTH); @@ -2642,8 +2720,7 @@ int blk_mq_update_nr_requests(struct request_queue *q, unsigned int nr) * queue depth. This is similar to what the old code would do. */ if (!hctx->sched_tags) { - ret = blk_mq_tag_update_depth(hctx, &hctx->tags, - min(nr, set->queue_depth), + ret = blk_mq_tag_update_depth(hctx, &hctx->tags, nr, false); } else { ret = blk_mq_tag_update_depth(hctx, &hctx->sched_tags, @@ -2863,20 +2940,14 @@ static bool __blk_mq_poll(struct blk_mq_hw_ctx *hctx, struct request *rq) return false; } -bool blk_mq_poll(struct request_queue *q, blk_qc_t cookie) +static bool blk_mq_poll(struct request_queue *q, blk_qc_t cookie) { struct blk_mq_hw_ctx *hctx; - struct blk_plug *plug; struct request *rq; - if (!q->mq_ops || !q->mq_ops->poll || !blk_qc_t_valid(cookie) || - !test_bit(QUEUE_FLAG_POLL, &q->queue_flags)) + if (!test_bit(QUEUE_FLAG_POLL, &q->queue_flags)) return false; - plug = current->plug; - if (plug) - blk_flush_plug_list(plug, false); - hctx = q->queue_hw_ctx[blk_qc_t_to_queue_num(cookie)]; if (!blk_qc_t_is_internal(cookie)) rq = blk_mq_tag_to_rq(hctx->tags, blk_qc_t_to_tag(cookie)); @@ -2894,10 +2965,15 @@ bool blk_mq_poll(struct request_queue *q, blk_qc_t cookie) return __blk_mq_poll(hctx, rq); } -EXPORT_SYMBOL_GPL(blk_mq_poll); static int __init blk_mq_init(void) { + /* + * See comment in block/blk.h rq_atomic_flags enum + */ + BUILD_BUG_ON((REQ_ATOM_STARTED / BITS_PER_BYTE) != + (REQ_ATOM_COMPLETE / BITS_PER_BYTE)); + cpuhp_setup_state_multi(CPUHP_BLK_MQ_DEAD, "block/mq:dead", NULL, blk_mq_hctx_notify_dead); return 0; |