diff options
Diffstat (limited to 'block/blk-mq.c')
-rw-r--r-- | block/blk-mq.c | 122 |
1 files changed, 33 insertions, 89 deletions
diff --git a/block/blk-mq.c b/block/blk-mq.c index f5c7dbcb954f..0dc9e341c2a7 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -1180,7 +1180,12 @@ bool blk_mq_dispatch_rq_list(struct request_queue *q, struct list_head *list, struct blk_mq_queue_data bd; rq = list_first_entry(list, struct request, queuelist); - if (!blk_mq_get_driver_tag(rq, &hctx, false)) { + + hctx = blk_mq_map_queue(rq->q, rq->mq_ctx->cpu); + if (!got_budget && !blk_mq_get_dispatch_budget(hctx)) + break; + + if (!blk_mq_get_driver_tag(rq, NULL, false)) { /* * The initial allocation attempt failed, so we need to * rerun the hardware queue when a tag is freed. The @@ -1189,8 +1194,7 @@ bool blk_mq_dispatch_rq_list(struct request_queue *q, struct list_head *list, * we'll re-run it below. */ if (!blk_mq_mark_tag_wait(&hctx, rq)) { - if (got_budget) - blk_mq_put_dispatch_budget(hctx); + blk_mq_put_dispatch_budget(hctx); /* * For non-shared tags, the RESTART check * will suffice. @@ -1201,11 +1205,6 @@ bool blk_mq_dispatch_rq_list(struct request_queue *q, struct list_head *list, } } - if (!got_budget && !blk_mq_get_dispatch_budget(hctx)) { - blk_mq_put_driver_tag(rq); - break; - } - list_del_init(&rq->queuelist); bd.rq = rq; @@ -1336,6 +1335,15 @@ static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx) hctx_unlock(hctx, srcu_idx); } +static inline int blk_mq_first_mapped_cpu(struct blk_mq_hw_ctx *hctx) +{ + int cpu = cpumask_first_and(hctx->cpumask, cpu_online_mask); + + if (cpu >= nr_cpu_ids) + cpu = cpumask_first(hctx->cpumask); + return cpu; +} + /* * It'd be great if the workqueue API had a way to pass * in a mask and had some smarts for more clever placement. @@ -1345,26 +1353,17 @@ static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx) static int blk_mq_hctx_next_cpu(struct blk_mq_hw_ctx *hctx) { bool tried = false; + int next_cpu = hctx->next_cpu; if (hctx->queue->nr_hw_queues == 1) return WORK_CPU_UNBOUND; if (--hctx->next_cpu_batch <= 0) { - int next_cpu; select_cpu: - next_cpu = cpumask_next_and(hctx->next_cpu, hctx->cpumask, + next_cpu = cpumask_next_and(next_cpu, hctx->cpumask, cpu_online_mask); if (next_cpu >= nr_cpu_ids) - next_cpu = cpumask_first_and(hctx->cpumask,cpu_online_mask); - - /* - * No online CPU is found, so have to make sure hctx->next_cpu - * is set correctly for not breaking workqueue. - */ - if (next_cpu >= nr_cpu_ids) - hctx->next_cpu = cpumask_first(hctx->cpumask); - else - hctx->next_cpu = next_cpu; + next_cpu = blk_mq_first_mapped_cpu(hctx); hctx->next_cpu_batch = BLK_MQ_CPU_WORK_BATCH; } @@ -1372,7 +1371,7 @@ select_cpu: * Do unbound schedule if we can't find a online CPU for this hctx, * and it should only happen in the path of handling CPU DEAD. */ - if (!cpu_online(hctx->next_cpu)) { + if (!cpu_online(next_cpu)) { if (!tried) { tried = true; goto select_cpu; @@ -1382,18 +1381,18 @@ select_cpu: * Make sure to re-select CPU next time once after CPUs * in hctx->cpumask become online again. */ + hctx->next_cpu = next_cpu; hctx->next_cpu_batch = 1; return WORK_CPU_UNBOUND; } - return hctx->next_cpu; + + hctx->next_cpu = next_cpu; + return next_cpu; } static void __blk_mq_delay_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async, unsigned long msecs) { - if (WARN_ON_ONCE(!blk_mq_hw_queue_mapped(hctx))) - return; - if (unlikely(blk_mq_hctx_stopped(hctx))) return; @@ -1560,40 +1559,14 @@ static void blk_mq_run_work_fn(struct work_struct *work) hctx = container_of(work, struct blk_mq_hw_ctx, run_work.work); /* - * If we are stopped, don't run the queue. The exception is if - * BLK_MQ_S_START_ON_RUN is set. For that case, we auto-clear - * the STOPPED bit and run it. + * If we are stopped, don't run the queue. */ - if (test_bit(BLK_MQ_S_STOPPED, &hctx->state)) { - if (!test_bit(BLK_MQ_S_START_ON_RUN, &hctx->state)) - return; - - clear_bit(BLK_MQ_S_START_ON_RUN, &hctx->state); + if (test_bit(BLK_MQ_S_STOPPED, &hctx->state)) clear_bit(BLK_MQ_S_STOPPED, &hctx->state); - } __blk_mq_run_hw_queue(hctx); } - -void blk_mq_delay_queue(struct blk_mq_hw_ctx *hctx, unsigned long msecs) -{ - if (WARN_ON_ONCE(!blk_mq_hw_queue_mapped(hctx))) - return; - - /* - * Stop the hw queue, then modify currently delayed work. - * This should prevent us from running the queue prematurely. - * Mark the queue as auto-clearing STOPPED when it runs. - */ - blk_mq_stop_hw_queue(hctx); - set_bit(BLK_MQ_S_START_ON_RUN, &hctx->state); - kblockd_mod_delayed_work_on(blk_mq_hctx_next_cpu(hctx), - &hctx->run_work, - msecs_to_jiffies(msecs)); -} -EXPORT_SYMBOL(blk_mq_delay_queue); - static inline void __blk_mq_insert_req_list(struct blk_mq_hw_ctx *hctx, struct request *rq, bool at_head) @@ -1804,11 +1777,11 @@ static blk_status_t __blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx, if (q->elevator && !bypass_insert) goto insert; - if (!blk_mq_get_driver_tag(rq, NULL, false)) + if (!blk_mq_get_dispatch_budget(hctx)) goto insert; - if (!blk_mq_get_dispatch_budget(hctx)) { - blk_mq_put_driver_tag(rq); + if (!blk_mq_get_driver_tag(rq, NULL, false)) { + blk_mq_put_dispatch_budget(hctx); goto insert; } @@ -2356,7 +2329,7 @@ static void blk_mq_free_map_and_requests(struct blk_mq_tag_set *set, static void blk_mq_map_swqueue(struct request_queue *q) { - unsigned int i, hctx_idx; + unsigned int i; struct blk_mq_hw_ctx *hctx; struct blk_mq_ctx *ctx; struct blk_mq_tag_set *set = q->tag_set; @@ -2373,23 +2346,8 @@ static void blk_mq_map_swqueue(struct request_queue *q) /* * Map software to hardware queues. - * - * If the cpu isn't present, the cpu is mapped to first hctx. */ for_each_possible_cpu(i) { - hctx_idx = q->mq_map[i]; - /* unmapped hw queue can be remapped after CPU topo changed */ - if (!set->tags[hctx_idx] && - !__blk_mq_alloc_rq_map(set, hctx_idx)) { - /* - * If tags initialization fail for some hctx, - * that hctx won't be brought online. In this - * case, remap the current ctx to hctx[0] which - * is guaranteed to always have tags allocated - */ - q->mq_map[i] = 0; - } - ctx = per_cpu_ptr(q->queue_ctx, i); hctx = blk_mq_map_queue(q, i); @@ -2401,21 +2359,8 @@ static void blk_mq_map_swqueue(struct request_queue *q) mutex_unlock(&q->sysfs_lock); queue_for_each_hw_ctx(q, hctx, i) { - /* - * If no software queues are mapped to this hardware queue, - * disable it and free the request entries. - */ - if (!hctx->nr_ctx) { - /* Never unmap queue 0. We need it as a - * fallback in case of a new remap fails - * allocation - */ - if (i && set->tags[i]) - blk_mq_free_map_and_requests(set, i); - - hctx->tags = NULL; - continue; - } + /* every hctx should get mapped by at least one CPU */ + WARN_ON(!hctx->nr_ctx); hctx->tags = set->tags[i]; WARN_ON(!hctx->tags); @@ -2430,8 +2375,7 @@ static void blk_mq_map_swqueue(struct request_queue *q) /* * Initialize batch roundrobin counts */ - hctx->next_cpu = cpumask_first_and(hctx->cpumask, - cpu_online_mask); + hctx->next_cpu = blk_mq_first_mapped_cpu(hctx); hctx->next_cpu_batch = BLK_MQ_CPU_WORK_BATCH; } } |