blk-mq: simplify queue mapping & schedule with each possisble CPU

The previous patch assigns interrupt vectors to all possible CPUs, so now hctx can be mapped to possible CPUs, this patch applies this fact to simplify queue mapping & schedule so that we don't need to handle CPU hotplug for dealing with physical CPU plug & unplug. With this simplication, we can work well on physical CPU plug & unplug, which is a normal use case for VM at least. Make sure we allocate blk_mq_ctx structures for all possible CPUs, and set hctx->numa_node for possible CPUs which are mapped to this hctx. And only choose the online CPUs for schedule. Reported-by: Christian Borntraeger <borntraeger@de.ibm.com> Tested-by: Christian Borntraeger <borntraeger@de.ibm.com> Tested-by: Stefan Haberland <sth@linux.vnet.ibm.com> Cc: Thomas Gleixner <tglx@linutronix.de> Signed-off-by: Christoph Hellwig <hch@lst.de> Fixes: 4b855ad37194 ("blk-mq: Create hctx for each present CPU") (merged the three into one because any single one may not work, and fix selecting online CPUs for scheduler) Signed-off-by: Ming Lei <ming.lei@redhat.com> Signed-off-by: Jens Axboe <axboe@kernel.dk>
author: Christoph Hellwig <hch@lst.de> 2018-01-12 10:53:06 +0800
committer: Jens Axboe <axboe@kernel.dk> 2018-01-12 11:01:40 -0700
commit: 20e4d813931961fe26d26a1e98b3aba6ec00b130 (patch)
tree: b6ba68628816661204010388c68416be56ceefb7 /block
parent: 84676c1f21e8ff54befe985f4f14dc1edc10046b (diff)
1 files changed, 8 insertions, 11 deletions
diff --git a/block/blk-mq.c b/block/blk-mq.c
index afccd0848d6f..b3b2003b7429 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -440,7 +440,7 @@ struct request *blk_mq_alloc_request_hctx(struct request_queue *q,
 		blk_queue_exit(q);
 		return ERR_PTR(-EXDEV);
 	}
-	cpu = cpumask_first(alloc_data.hctx->cpumask);
+	cpu = cpumask_first_and(alloc_data.hctx->cpumask, cpu_online_mask);
 	alloc_data.ctx = __blk_mq_get_ctx(q, cpu);
 
 	rq = blk_mq_get_request(q, NULL, op, &alloc_data);
@@ -1324,9 +1324,10 @@ static int blk_mq_hctx_next_cpu(struct blk_mq_hw_ctx *hctx)
 	if (--hctx->next_cpu_batch <= 0) {
 		int next_cpu;
 
-		next_cpu = cpumask_next(hctx->next_cpu, hctx->cpumask);
+		next_cpu = cpumask_next_and(hctx->next_cpu, hctx->cpumask,
+				cpu_online_mask);
 		if (next_cpu >= nr_cpu_ids)
-			next_cpu = cpumask_first(hctx->cpumask);
+			next_cpu = cpumask_first_and(hctx->cpumask,cpu_online_mask);
 
 		hctx->next_cpu = next_cpu;
 		hctx->next_cpu_batch = BLK_MQ_CPU_WORK_BATCH;
@@ -2220,16 +2221,11 @@ static void blk_mq_init_cpu_queues(struct request_queue *q,
 		INIT_LIST_HEAD(&__ctx->rq_list);
 		__ctx->queue = q;
 
-		/* If the cpu isn't present, the cpu is mapped to first hctx */
-		if (!cpu_present(i))
-			continue;
-
-		hctx = blk_mq_map_queue(q, i);
-
 		/*
 		 * Set local node, IFF we have more than one hw queue. If
 		 * not, we remain on the home node of the device
 		 */
+		hctx = blk_mq_map_queue(q, i);
 		if (nr_hw_queues > 1 && hctx->numa_node == NUMA_NO_NODE)
 			hctx->numa_node = local_memory_node(cpu_to_node(i));
 	}
@@ -2286,7 +2282,7 @@ static void blk_mq_map_swqueue(struct request_queue *q)
 	 *
 	 * If the cpu isn't present, the cpu is mapped to first hctx.
 	 */
-	for_each_present_cpu(i) {
+	for_each_possible_cpu(i) {
 		hctx_idx = q->mq_map[i];
 		/* unmapped hw queue can be remapped after CPU topo changed */
 		if (!set->tags[hctx_idx] &&
@@ -2340,7 +2336,8 @@ static void blk_mq_map_swqueue(struct request_queue *q)
 		/*
 		 * Initialize batch roundrobin counts
 		 */
-		hctx->next_cpu = cpumask_first(hctx->cpumask);
+		hctx->next_cpu = cpumask_first_and(hctx->cpumask,
+				cpu_online_mask);
 		hctx->next_cpu_batch = BLK_MQ_CPU_WORK_BATCH;
 	}
 }
author	Christoph Hellwig <hch@lst.de>	2018-01-12 10:53:06 +0800
committer	Jens Axboe <axboe@kernel.dk>	2018-01-12 11:01:40 -0700
commit	20e4d813931961fe26d26a1e98b3aba6ec00b130 (patch)
tree	b6ba68628816661204010388c68416be56ceefb7 /block
parent	84676c1f21e8ff54befe985f4f14dc1edc10046b (diff)