diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2020-08-03 11:57:03 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2020-08-03 11:57:03 -0700 |
commit | 382625d0d4325fb14a29444eb8dce8dcc2eb9b51 (patch) | |
tree | de35ff523e65c3a98fd3ac3a3595d517856d03da /block/blk-core.c | |
parent | 99f6cf61f175c1239ed8e86d4a1757c380da52d1 (diff) | |
parent | d958e343bdc3de2643ce25225bed082dc222858d (diff) |
Merge tag 'for-5.9/block-20200802' of git://git.kernel.dk/linux-block
Pull core block updates from Jens Axboe:
"Good amount of cleanups and tech debt removals in here, and as a
result, the diffstat shows a nice net reduction in code.
- Softirq completion cleanups (Christoph)
- Stop using ->queuedata (Christoph)
- Cleanup bd claiming (Christoph)
- Use check_events, moving away from the legacy media change
(Christoph)
- Use inode i_blkbits consistently (Christoph)
- Remove old unused writeback congestion bits (Christoph)
- Cleanup/unify submission path (Christoph)
- Use bio_uninit consistently, instead of bio_disassociate_blkg
(Christoph)
- sbitmap cleared bits handling (John)
- Request merging blktrace event addition (Jan)
- sysfs add/remove race fixes (Luis)
- blk-mq tag fixes/optimizations (Ming)
- Duplicate words in comments (Randy)
- Flush deferral cleanup (Yufen)
- IO context locking/retry fixes (John)
- struct_size() usage (Gustavo)
- blk-iocost fixes (Chengming)
- blk-cgroup IO stats fixes (Boris)
- Various little fixes"
* tag 'for-5.9/block-20200802' of git://git.kernel.dk/linux-block: (135 commits)
block: blk-timeout: delete duplicated word
block: blk-mq-sched: delete duplicated word
block: blk-mq: delete duplicated word
block: genhd: delete duplicated words
block: elevator: delete duplicated word and fix typos
block: bio: delete duplicated words
block: bfq-iosched: fix duplicated word
iocost_monitor: start from the oldest usage index
iocost: Fix check condition of iocg abs_vdebt
block: Remove callback typedefs for blk_mq_ops
block: Use non _rcu version of list functions for tag_set_list
blk-cgroup: show global disk stats in root cgroup io.stat
blk-cgroup: make iostat functions visible to stat printing
block: improve discard bio alignment in __blkdev_issue_discard()
block: change REQ_OP_ZONE_RESET and REQ_OP_ZONE_RESET_ALL to be odd numbers
block: defer flush request no matter whether we have elevator
block: make blk_timeout_init() static
block: remove retry loop in ioc_release_fn()
block: remove unnecessary ioc nested locking
block: integrate bd_start_claiming into __blkdev_get
...
Diffstat (limited to 'block/blk-core.c')
-rw-r--r-- | block/blk-core.c | 286 |
1 files changed, 143 insertions, 143 deletions
diff --git a/block/blk-core.c b/block/blk-core.c index 03252af8c82c..93104c7470e8 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -51,9 +51,7 @@ #include "blk-pm.h" #include "blk-rq-qos.h" -#ifdef CONFIG_DEBUG_FS struct dentry *blk_debugfs_root; -#endif EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_remap); EXPORT_TRACEPOINT_SYMBOL_GPL(block_rq_remap); @@ -285,7 +283,7 @@ EXPORT_SYMBOL(blk_dump_rq_flags); * A block device may call blk_sync_queue to ensure that any * such activity is cancelled, thus allowing it to release resources * that the callbacks might use. The caller must already have made sure - * that its ->make_request_fn will not re-add plugging prior to calling + * that its ->submit_bio will not re-add plugging prior to calling * this function. * * This function does not cancel any asynchronous activity arising @@ -321,6 +319,16 @@ void blk_clear_pm_only(struct request_queue *q) } EXPORT_SYMBOL_GPL(blk_clear_pm_only); +/** + * blk_put_queue - decrement the request_queue refcount + * @q: the request_queue structure to decrement the refcount for + * + * Decrements the refcount of the request_queue kobject. When this reaches 0 + * we'll have blk_release_queue() called. + * + * Context: Any context, but the last reference must not be dropped from + * atomic context. + */ void blk_put_queue(struct request_queue *q) { kobject_put(&q->kobj); @@ -352,9 +360,14 @@ EXPORT_SYMBOL_GPL(blk_set_queue_dying); * * Mark @q DYING, drain all pending requests, mark @q DEAD, destroy and * put it. All future requests will be failed immediately with -ENODEV. + * + * Context: can sleep */ void blk_cleanup_queue(struct request_queue *q) { + /* cannot be called from atomic context */ + might_sleep(); + WARN_ON_ONCE(blk_queue_registered(q)); /* mark @q DYING, no new request or merges will be allowed afterwards */ @@ -497,7 +510,7 @@ static void blk_timeout_work(struct work_struct *work) { } -struct request_queue *__blk_alloc_queue(int node_id) +struct request_queue *blk_alloc_queue(int node_id) { struct request_queue *q; int ret; @@ -540,9 +553,7 @@ struct request_queue *__blk_alloc_queue(int node_id) kobject_init(&q->kobj, &blk_queue_ktype); -#ifdef CONFIG_BLK_DEV_IO_TRACE - mutex_init(&q->blk_trace_mutex); -#endif + mutex_init(&q->debugfs_mutex); mutex_init(&q->sysfs_lock); mutex_init(&q->sysfs_dir_lock); spin_lock_init(&q->queue_lock); @@ -564,6 +575,7 @@ struct request_queue *__blk_alloc_queue(int node_id) blk_queue_dma_alignment(q, 511); blk_set_default_limits(&q->limits); + q->nr_requests = BLKDEV_MAX_RQ; return q; @@ -581,23 +593,16 @@ fail_q: kmem_cache_free(blk_requestq_cachep, q); return NULL; } - -struct request_queue *blk_alloc_queue(make_request_fn make_request, int node_id) -{ - struct request_queue *q; - - if (WARN_ON_ONCE(!make_request)) - return NULL; - - q = __blk_alloc_queue(node_id); - if (!q) - return NULL; - q->make_request_fn = make_request; - q->nr_requests = BLKDEV_MAX_RQ; - return q; -} EXPORT_SYMBOL(blk_alloc_queue); +/** + * blk_get_queue - increment the request_queue refcount + * @q: the request_queue structure to increment the refcount for + * + * Increment the refcount of the request_queue kobject. + * + * Context: Any context. + */ bool blk_get_queue(struct request_queue *q) { if (likely(!blk_queue_dying(q))) { @@ -850,8 +855,7 @@ static inline bool bio_check_ro(struct bio *bio, struct hd_struct *part) return false; WARN_ONCE(1, - "generic_make_request: Trying to write " - "to read-only block-device %s (partno %d)\n", + "Trying to write to read-only block-device %s (partno %d)\n", bio_devname(bio, b), part->partno); /* Older lvm-tools actually trigger this */ return false; @@ -952,25 +956,13 @@ static inline blk_status_t blk_check_zone_append(struct request_queue *q, return BLK_STS_OK; } -static noinline_for_stack bool -generic_make_request_checks(struct bio *bio) +static noinline_for_stack bool submit_bio_checks(struct bio *bio) { - struct request_queue *q; - int nr_sectors = bio_sectors(bio); + struct request_queue *q = bio->bi_disk->queue; blk_status_t status = BLK_STS_IOERR; - char b[BDEVNAME_SIZE]; might_sleep(); - q = bio->bi_disk->queue; - if (unlikely(!q)) { - printk(KERN_ERR - "generic_make_request: Trying to access " - "nonexistent block-device %s (%Lu)\n", - bio_devname(bio, b), (long long)bio->bi_iter.bi_sector); - goto end_io; - } - /* * For a REQ_NOWAIT based request, return -EOPNOTSUPP * if queue is not a request based queue. @@ -992,14 +984,13 @@ generic_make_request_checks(struct bio *bio) } /* - * Filter flush bio's early so that make_request based - * drivers without flush support don't have to worry - * about them. + * Filter flush bio's early so that bio based drivers without flush + * support don't have to worry about them. */ if (op_is_flush(bio->bi_opf) && !test_bit(QUEUE_FLAG_WC, &q->queue_flags)) { bio->bi_opf &= ~(REQ_PREFLUSH | REQ_FUA); - if (!nr_sectors) { + if (!bio_sectors(bio)) { status = BLK_STS_OK; goto end_io; } @@ -1054,8 +1045,13 @@ generic_make_request_checks(struct bio *bio) if (unlikely(!current->io_context)) create_task_io_context(current, GFP_ATOMIC, q->node); - if (!blkcg_bio_issue_check(q, bio)) + if (blk_throtl_bio(bio)) { + blkcg_bio_issue_init(bio); return false; + } + + blk_cgroup_bio_start(bio); + blkcg_bio_issue_init(bio); if (!bio_flagged(bio, BIO_TRACE_COMPLETION)) { trace_block_bio_queue(q, bio); @@ -1074,138 +1070,144 @@ end_io: return false; } -static blk_qc_t do_make_request(struct bio *bio) +static blk_qc_t __submit_bio(struct bio *bio) { - struct request_queue *q = bio->bi_disk->queue; + struct gendisk *disk = bio->bi_disk; blk_qc_t ret = BLK_QC_T_NONE; if (blk_crypto_bio_prep(&bio)) { - if (!q->make_request_fn) - return blk_mq_make_request(q, bio); - ret = q->make_request_fn(q, bio); + if (!disk->fops->submit_bio) + return blk_mq_submit_bio(bio); + ret = disk->fops->submit_bio(bio); } - blk_queue_exit(q); + blk_queue_exit(disk->queue); return ret; } -/** - * generic_make_request - re-submit a bio to the block device layer for I/O - * @bio: The bio describing the location in memory and on the device. +/* + * The loop in this function may be a bit non-obvious, and so deserves some + * explanation: * - * This is a version of submit_bio() that shall only be used for I/O that is - * resubmitted to lower level drivers by stacking block drivers. All file - * systems and other upper level users of the block layer should use - * submit_bio() instead. + * - Before entering the loop, bio->bi_next is NULL (as all callers ensure + * that), so we have a list with a single bio. + * - We pretend that we have just taken it off a longer list, so we assign + * bio_list to a pointer to the bio_list_on_stack, thus initialising the + * bio_list of new bios to be added. ->submit_bio() may indeed add some more + * bios through a recursive call to submit_bio_noacct. If it did, we find a + * non-NULL value in bio_list and re-enter the loop from the top. + * - In this case we really did just take the bio of the top of the list (no + * pretending) and so remove it from bio_list, and call into ->submit_bio() + * again. + * + * bio_list_on_stack[0] contains bios submitted by the current ->submit_bio. + * bio_list_on_stack[1] contains bios that were submitted before the current + * ->submit_bio_bio, but that haven't been processed yet. */ -blk_qc_t generic_make_request(struct bio *bio) +static blk_qc_t __submit_bio_noacct(struct bio *bio) { - /* - * bio_list_on_stack[0] contains bios submitted by the current - * make_request_fn. - * bio_list_on_stack[1] contains bios that were submitted before - * the current make_request_fn, but that haven't been processed - * yet. - */ struct bio_list bio_list_on_stack[2]; blk_qc_t ret = BLK_QC_T_NONE; - if (!generic_make_request_checks(bio)) - goto out; - - /* - * We only want one ->make_request_fn to be active at a time, else - * stack usage with stacked devices could be a problem. So use - * current->bio_list to keep a list of requests submited by a - * make_request_fn function. current->bio_list is also used as a - * flag to say if generic_make_request is currently active in this - * task or not. If it is NULL, then no make_request is active. If - * it is non-NULL, then a make_request is active, and new requests - * should be added at the tail - */ - if (current->bio_list) { - bio_list_add(¤t->bio_list[0], bio); - goto out; - } - - /* following loop may be a bit non-obvious, and so deserves some - * explanation. - * Before entering the loop, bio->bi_next is NULL (as all callers - * ensure that) so we have a list with a single bio. - * We pretend that we have just taken it off a longer list, so - * we assign bio_list to a pointer to the bio_list_on_stack, - * thus initialising the bio_list of new bios to be - * added. ->make_request() may indeed add some more bios - * through a recursive call to generic_make_request. If it - * did, we find a non-NULL value in bio_list and re-enter the loop - * from the top. In this case we really did just take the bio - * of the top of the list (no pretending) and so remove it from - * bio_list, and call into ->make_request() again. - */ BUG_ON(bio->bi_next); + bio_list_init(&bio_list_on_stack[0]); current->bio_list = bio_list_on_stack; + do { struct request_queue *q = bio->bi_disk->queue; + struct bio_list lower, same; + + if (unlikely(bio_queue_enter(bio) != 0)) + continue; - if (likely(bio_queue_enter(bio) == 0)) { - struct bio_list lower, same; + /* + * Create a fresh bio_list for all subordinate requests. + */ + bio_list_on_stack[1] = bio_list_on_stack[0]; + bio_list_init(&bio_list_on_stack[0]); - /* Create a fresh bio_list for all subordinate requests */ - bio_list_on_stack[1] = bio_list_on_stack[0]; - bio_list_init(&bio_list_on_stack[0]); - ret = do_make_request(bio); + ret = __submit_bio(bio); - /* sort new bios into those for a lower level - * and those for the same level - */ - bio_list_init(&lower); - bio_list_init(&same); - while ((bio = bio_list_pop(&bio_list_on_stack[0])) != NULL) - if (q == bio->bi_disk->queue) - bio_list_add(&same, bio); - else - bio_list_add(&lower, bio); - /* now assemble so we handle the lowest level first */ - bio_list_merge(&bio_list_on_stack[0], &lower); - bio_list_merge(&bio_list_on_stack[0], &same); - bio_list_merge(&bio_list_on_stack[0], &bio_list_on_stack[1]); + /* + * Sort new bios into those for a lower level and those for the + * same level. + */ + bio_list_init(&lower); + bio_list_init(&same); + while ((bio = bio_list_pop(&bio_list_on_stack[0])) != NULL) + if (q == bio->bi_disk->queue) + bio_list_add(&same, bio); + else + bio_list_add(&lower, bio); + + /* + * Now assemble so we handle the lowest level first. + */ + bio_list_merge(&bio_list_on_stack[0], &lower); + bio_list_merge(&bio_list_on_stack[0], &same); + bio_list_merge(&bio_list_on_stack[0], &bio_list_on_stack[1]); + } while ((bio = bio_list_pop(&bio_list_on_stack[0]))); + + current->bio_list = NULL; + return ret; +} + +static blk_qc_t __submit_bio_noacct_mq(struct bio *bio) +{ + struct bio_list bio_list[2] = { }; + blk_qc_t ret = BLK_QC_T_NONE; + + current->bio_list = bio_list; + + do { + struct gendisk *disk = bio->bi_disk; + + if (unlikely(bio_queue_enter(bio) != 0)) + continue; + + if (!blk_crypto_bio_prep(&bio)) { + blk_queue_exit(disk->queue); + ret = BLK_QC_T_NONE; + continue; } - bio = bio_list_pop(&bio_list_on_stack[0]); - } while (bio); - current->bio_list = NULL; /* deactivate */ -out: + ret = blk_mq_submit_bio(bio); + } while ((bio = bio_list_pop(&bio_list[0]))); + + current->bio_list = NULL; return ret; } -EXPORT_SYMBOL(generic_make_request); /** - * direct_make_request - hand a buffer directly to its device driver for I/O + * submit_bio_noacct - re-submit a bio to the block device layer for I/O * @bio: The bio describing the location in memory and on the device. * - * This function behaves like generic_make_request(), but does not protect - * against recursion. Must only be used if the called driver is known - * to be blk-mq based. + * This is a version of submit_bio() that shall only be used for I/O that is + * resubmitted to lower level drivers by stacking block drivers. All file + * systems and other upper level users of the block layer should use + * submit_bio() instead. */ -blk_qc_t direct_make_request(struct bio *bio) +blk_qc_t submit_bio_noacct(struct bio *bio) { - struct request_queue *q = bio->bi_disk->queue; - - if (WARN_ON_ONCE(q->make_request_fn)) { - bio_io_error(bio); - return BLK_QC_T_NONE; - } - if (!generic_make_request_checks(bio)) - return BLK_QC_T_NONE; - if (unlikely(bio_queue_enter(bio))) + if (!submit_bio_checks(bio)) return BLK_QC_T_NONE; - if (!blk_crypto_bio_prep(&bio)) { - blk_queue_exit(q); + + /* + * We only want one ->submit_bio to be active at a time, else stack + * usage with stacked devices could be a problem. Use current->bio_list + * to collect a list of requests submited by a ->submit_bio method while + * it is active, and then process them after it returned. + */ + if (current->bio_list) { + bio_list_add(¤t->bio_list[0], bio); return BLK_QC_T_NONE; } - return blk_mq_make_request(q, bio); + + if (!bio->bi_disk->fops->submit_bio) + return __submit_bio_noacct_mq(bio); + return __submit_bio_noacct(bio); } -EXPORT_SYMBOL_GPL(direct_make_request); +EXPORT_SYMBOL(submit_bio_noacct); /** * submit_bio - submit a bio to the block device layer for I/O @@ -1266,13 +1268,13 @@ blk_qc_t submit_bio(struct bio *bio) blk_qc_t ret; psi_memstall_enter(&pflags); - ret = generic_make_request(bio); + ret = submit_bio_noacct(bio); psi_memstall_leave(&pflags); return ret; } - return generic_make_request(bio); + return submit_bio_noacct(bio); } EXPORT_SYMBOL(submit_bio); @@ -1908,9 +1910,7 @@ int __init blk_dev_init(void) blk_requestq_cachep = kmem_cache_create("request_queue", sizeof(struct request_queue), 0, SLAB_PANIC, NULL); -#ifdef CONFIG_DEBUG_FS blk_debugfs_root = debugfs_create_dir("block", NULL); -#endif return 0; } |