summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJens Axboe <axboe@kernel.dk>2017-11-14 10:24:58 -0700
committerJens Axboe <axboe@kernel.dk>2017-12-22 11:09:37 -0700
commit4e5dff41be7b5201c1c47ceb3a2a8d698516bc2b (patch)
tree0e7924a2fdd30cfdd17e471b8400085e1e5b734c
parent1291a0d5049dbc06baaaf66a9ff3f53db493b19b (diff)
blk-mq: improve heavily contended tag case
Even with a number of waitqueues, we can get into a situation where we are heavily contended on the waitqueue lock. I got a report on spc1 where we're spending seconds doing this. Arguably the use case is nasty, I reproduce it with one device and 1000 threads banging on the device. But that doesn't mean we shouldn't be handling it better. What ends up happening is that a thread will fail to get a tag, add itself to the waitqueue, and subsequently get woken up when a tag is freed - only to find itself going back to sleep on the waitqueue. Instead of waking all threads, use an exclusive wait and wake up our sbitmap batch count instead. This seems to work well for me (massive improvement for this use case), and it survives basic testing. But I haven't fully verified it yet. An additional improvement is running the queue and checking for a new tag BEFORE needing to add ourselves to the waitqueue. Signed-off-by: Jens Axboe <axboe@kernel.dk>
-rw-r--r--block/blk-mq-tag.c13
-rw-r--r--lib/sbitmap.c2
2 files changed, 8 insertions, 7 deletions
diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c
index c81b40ecd3f1..336dde07b230 100644
--- a/block/blk-mq-tag.c
+++ b/block/blk-mq-tag.c
@@ -134,12 +134,6 @@ unsigned int blk_mq_get_tag(struct blk_mq_alloc_data *data)
ws = bt_wait_ptr(bt, data->hctx);
drop_ctx = data->ctx == NULL;
do {
- prepare_to_wait(&ws->wait, &wait, TASK_UNINTERRUPTIBLE);
-
- tag = __blk_mq_get_tag(data, bt);
- if (tag != -1)
- break;
-
/*
* We're out of tags on this hardware queue, kick any
* pending IO submits before going to sleep waiting for
@@ -155,6 +149,13 @@ unsigned int blk_mq_get_tag(struct blk_mq_alloc_data *data)
if (tag != -1)
break;
+ prepare_to_wait_exclusive(&ws->wait, &wait,
+ TASK_UNINTERRUPTIBLE);
+
+ tag = __blk_mq_get_tag(data, bt);
+ if (tag != -1)
+ break;
+
if (data->ctx)
blk_mq_put_ctx(data->ctx);
diff --git a/lib/sbitmap.c b/lib/sbitmap.c
index 80aa8d5463fa..42b5ca0acf93 100644
--- a/lib/sbitmap.c
+++ b/lib/sbitmap.c
@@ -462,7 +462,7 @@ static void sbq_wake_up(struct sbitmap_queue *sbq)
*/
atomic_cmpxchg(&ws->wait_cnt, wait_cnt, wait_cnt + wake_batch);
sbq_index_atomic_inc(&sbq->wake_index);
- wake_up(&ws->wait);
+ wake_up_nr(&ws->wait, wake_batch);
}
}