diff options
author | Bart Van Assche <bart.vanassche@wdc.com> | 2018-02-28 10:15:32 -0800 |
---|---|---|
committer | Jens Axboe <axboe@kernel.dk> | 2018-02-28 12:23:35 -0700 |
commit | 498f6650aec864e331cae7575fec5f07781d0bf3 (patch) | |
tree | 48b9bdc433ec5c8729e317d55e5d2c5d2fec11c4 /block/blk-core.c | |
parent | 5ee0524ba137fe928a88b440d014e3c8451fb32c (diff) |
block: Fix a race between the cgroup code and request queue initialization
Initialize the request queue lock earlier such that the following
race can no longer occur:
blk_init_queue_node() blkcg_print_blkgs()
blk_alloc_queue_node (1)
q->queue_lock = &q->__queue_lock (2)
blkcg_init_queue(q) (3)
spin_lock_irq(blkg->q->queue_lock) (4)
q->queue_lock = lock (5)
spin_unlock_irq(blkg->q->queue_lock) (6)
(1) allocate an uninitialized queue;
(2) initialize queue_lock to its default internal lock;
(3) initialize blkcg part of request queue, which will create blkg and
then insert it to blkg_list;
(4) traverse blkg_list and find the created blkg, and then take its
queue lock, here it is the default *internal lock*;
(5) *race window*, now queue_lock is overridden with *driver specified
lock*;
(6) now unlock *driver specified lock*, not the locked *internal lock*,
unlock balance breaks.
The changes in this patch are as follows:
- Move the .queue_lock initialization from blk_init_queue_node() into
blk_alloc_queue_node().
- Only override the .queue_lock pointer for legacy queues because it
is not useful for blk-mq queues to override this pointer.
- For all all block drivers that initialize .queue_lock explicitly,
change the blk_alloc_queue() call in the driver into a
blk_alloc_queue_node() call and remove the explicit .queue_lock
initialization. Additionally, initialize the spin lock that will
be used as queue lock earlier if necessary.
Reported-by: Joseph Qi <joseph.qi@linux.alibaba.com>
Signed-off-by: Bart Van Assche <bart.vanassche@wdc.com>
Reviewed-by: Joseph Qi <joseph.qi@linux.alibaba.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Philipp Reisner <philipp.reisner@linbit.com>
Cc: Ulf Hansson <ulf.hansson@linaro.org>
Cc: Kees Cook <keescook@chromium.org>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
Diffstat (limited to 'block/blk-core.c')
-rw-r--r-- | block/blk-core.c | 24 |
1 files changed, 16 insertions, 8 deletions
diff --git a/block/blk-core.c b/block/blk-core.c index e873a24bf82d..41c74b37be85 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -888,6 +888,19 @@ static void blk_rq_timed_out_timer(struct timer_list *t) kblockd_schedule_work(&q->timeout_work); } +/** + * blk_alloc_queue_node - allocate a request queue + * @gfp_mask: memory allocation flags + * @node_id: NUMA node to allocate memory from + * @lock: For legacy queues, pointer to a spinlock that will be used to e.g. + * serialize calls to the legacy .request_fn() callback. Ignored for + * blk-mq request queues. + * + * Note: pass the queue lock as the third argument to this function instead of + * setting the queue lock pointer explicitly to avoid triggering a sporadic + * crash in the blkcg code. This function namely calls blkcg_init_queue() and + * the queue lock pointer must be set before blkcg_init_queue() is called. + */ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id, spinlock_t *lock) { @@ -940,11 +953,8 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id, mutex_init(&q->sysfs_lock); spin_lock_init(&q->__queue_lock); - /* - * By default initialize queue_lock to internal lock and driver can - * override it later if need be. - */ - q->queue_lock = &q->__queue_lock; + if (!q->mq_ops) + q->queue_lock = lock ? : &q->__queue_lock; /* * A queue starts its life with bypass turned on to avoid @@ -1031,13 +1041,11 @@ blk_init_queue_node(request_fn_proc *rfn, spinlock_t *lock, int node_id) { struct request_queue *q; - q = blk_alloc_queue_node(GFP_KERNEL, node_id, NULL); + q = blk_alloc_queue_node(GFP_KERNEL, node_id, lock); if (!q) return NULL; q->request_fn = rfn; - if (lock) - q->queue_lock = lock; if (blk_init_allocated_queue(q) < 0) { blk_cleanup_queue(q); return NULL; |