summaryrefslogtreecommitdiff
path: root/block/blk-core.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2020-08-03 11:57:03 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2020-08-03 11:57:03 -0700
commit382625d0d4325fb14a29444eb8dce8dcc2eb9b51 (patch)
treede35ff523e65c3a98fd3ac3a3595d517856d03da /block/blk-core.c
parent99f6cf61f175c1239ed8e86d4a1757c380da52d1 (diff)
parentd958e343bdc3de2643ce25225bed082dc222858d (diff)
Merge tag 'for-5.9/block-20200802' of git://git.kernel.dk/linux-block
Pull core block updates from Jens Axboe: "Good amount of cleanups and tech debt removals in here, and as a result, the diffstat shows a nice net reduction in code. - Softirq completion cleanups (Christoph) - Stop using ->queuedata (Christoph) - Cleanup bd claiming (Christoph) - Use check_events, moving away from the legacy media change (Christoph) - Use inode i_blkbits consistently (Christoph) - Remove old unused writeback congestion bits (Christoph) - Cleanup/unify submission path (Christoph) - Use bio_uninit consistently, instead of bio_disassociate_blkg (Christoph) - sbitmap cleared bits handling (John) - Request merging blktrace event addition (Jan) - sysfs add/remove race fixes (Luis) - blk-mq tag fixes/optimizations (Ming) - Duplicate words in comments (Randy) - Flush deferral cleanup (Yufen) - IO context locking/retry fixes (John) - struct_size() usage (Gustavo) - blk-iocost fixes (Chengming) - blk-cgroup IO stats fixes (Boris) - Various little fixes" * tag 'for-5.9/block-20200802' of git://git.kernel.dk/linux-block: (135 commits) block: blk-timeout: delete duplicated word block: blk-mq-sched: delete duplicated word block: blk-mq: delete duplicated word block: genhd: delete duplicated words block: elevator: delete duplicated word and fix typos block: bio: delete duplicated words block: bfq-iosched: fix duplicated word iocost_monitor: start from the oldest usage index iocost: Fix check condition of iocg abs_vdebt block: Remove callback typedefs for blk_mq_ops block: Use non _rcu version of list functions for tag_set_list blk-cgroup: show global disk stats in root cgroup io.stat blk-cgroup: make iostat functions visible to stat printing block: improve discard bio alignment in __blkdev_issue_discard() block: change REQ_OP_ZONE_RESET and REQ_OP_ZONE_RESET_ALL to be odd numbers block: defer flush request no matter whether we have elevator block: make blk_timeout_init() static block: remove retry loop in ioc_release_fn() block: remove unnecessary ioc nested locking block: integrate bd_start_claiming into __blkdev_get ...
Diffstat (limited to 'block/blk-core.c')
-rw-r--r--block/blk-core.c286
1 files changed, 143 insertions, 143 deletions
diff --git a/block/blk-core.c b/block/blk-core.c
index 03252af8c82c..93104c7470e8 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -51,9 +51,7 @@
#include "blk-pm.h"
#include "blk-rq-qos.h"
-#ifdef CONFIG_DEBUG_FS
struct dentry *blk_debugfs_root;
-#endif
EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_remap);
EXPORT_TRACEPOINT_SYMBOL_GPL(block_rq_remap);
@@ -285,7 +283,7 @@ EXPORT_SYMBOL(blk_dump_rq_flags);
* A block device may call blk_sync_queue to ensure that any
* such activity is cancelled, thus allowing it to release resources
* that the callbacks might use. The caller must already have made sure
- * that its ->make_request_fn will not re-add plugging prior to calling
+ * that its ->submit_bio will not re-add plugging prior to calling
* this function.
*
* This function does not cancel any asynchronous activity arising
@@ -321,6 +319,16 @@ void blk_clear_pm_only(struct request_queue *q)
}
EXPORT_SYMBOL_GPL(blk_clear_pm_only);
+/**
+ * blk_put_queue - decrement the request_queue refcount
+ * @q: the request_queue structure to decrement the refcount for
+ *
+ * Decrements the refcount of the request_queue kobject. When this reaches 0
+ * we'll have blk_release_queue() called.
+ *
+ * Context: Any context, but the last reference must not be dropped from
+ * atomic context.
+ */
void blk_put_queue(struct request_queue *q)
{
kobject_put(&q->kobj);
@@ -352,9 +360,14 @@ EXPORT_SYMBOL_GPL(blk_set_queue_dying);
*
* Mark @q DYING, drain all pending requests, mark @q DEAD, destroy and
* put it. All future requests will be failed immediately with -ENODEV.
+ *
+ * Context: can sleep
*/
void blk_cleanup_queue(struct request_queue *q)
{
+ /* cannot be called from atomic context */
+ might_sleep();
+
WARN_ON_ONCE(blk_queue_registered(q));
/* mark @q DYING, no new request or merges will be allowed afterwards */
@@ -497,7 +510,7 @@ static void blk_timeout_work(struct work_struct *work)
{
}
-struct request_queue *__blk_alloc_queue(int node_id)
+struct request_queue *blk_alloc_queue(int node_id)
{
struct request_queue *q;
int ret;
@@ -540,9 +553,7 @@ struct request_queue *__blk_alloc_queue(int node_id)
kobject_init(&q->kobj, &blk_queue_ktype);
-#ifdef CONFIG_BLK_DEV_IO_TRACE
- mutex_init(&q->blk_trace_mutex);
-#endif
+ mutex_init(&q->debugfs_mutex);
mutex_init(&q->sysfs_lock);
mutex_init(&q->sysfs_dir_lock);
spin_lock_init(&q->queue_lock);
@@ -564,6 +575,7 @@ struct request_queue *__blk_alloc_queue(int node_id)
blk_queue_dma_alignment(q, 511);
blk_set_default_limits(&q->limits);
+ q->nr_requests = BLKDEV_MAX_RQ;
return q;
@@ -581,23 +593,16 @@ fail_q:
kmem_cache_free(blk_requestq_cachep, q);
return NULL;
}
-
-struct request_queue *blk_alloc_queue(make_request_fn make_request, int node_id)
-{
- struct request_queue *q;
-
- if (WARN_ON_ONCE(!make_request))
- return NULL;
-
- q = __blk_alloc_queue(node_id);
- if (!q)
- return NULL;
- q->make_request_fn = make_request;
- q->nr_requests = BLKDEV_MAX_RQ;
- return q;
-}
EXPORT_SYMBOL(blk_alloc_queue);
+/**
+ * blk_get_queue - increment the request_queue refcount
+ * @q: the request_queue structure to increment the refcount for
+ *
+ * Increment the refcount of the request_queue kobject.
+ *
+ * Context: Any context.
+ */
bool blk_get_queue(struct request_queue *q)
{
if (likely(!blk_queue_dying(q))) {
@@ -850,8 +855,7 @@ static inline bool bio_check_ro(struct bio *bio, struct hd_struct *part)
return false;
WARN_ONCE(1,
- "generic_make_request: Trying to write "
- "to read-only block-device %s (partno %d)\n",
+ "Trying to write to read-only block-device %s (partno %d)\n",
bio_devname(bio, b), part->partno);
/* Older lvm-tools actually trigger this */
return false;
@@ -952,25 +956,13 @@ static inline blk_status_t blk_check_zone_append(struct request_queue *q,
return BLK_STS_OK;
}
-static noinline_for_stack bool
-generic_make_request_checks(struct bio *bio)
+static noinline_for_stack bool submit_bio_checks(struct bio *bio)
{
- struct request_queue *q;
- int nr_sectors = bio_sectors(bio);
+ struct request_queue *q = bio->bi_disk->queue;
blk_status_t status = BLK_STS_IOERR;
- char b[BDEVNAME_SIZE];
might_sleep();
- q = bio->bi_disk->queue;
- if (unlikely(!q)) {
- printk(KERN_ERR
- "generic_make_request: Trying to access "
- "nonexistent block-device %s (%Lu)\n",
- bio_devname(bio, b), (long long)bio->bi_iter.bi_sector);
- goto end_io;
- }
-
/*
* For a REQ_NOWAIT based request, return -EOPNOTSUPP
* if queue is not a request based queue.
@@ -992,14 +984,13 @@ generic_make_request_checks(struct bio *bio)
}
/*
- * Filter flush bio's early so that make_request based
- * drivers without flush support don't have to worry
- * about them.
+ * Filter flush bio's early so that bio based drivers without flush
+ * support don't have to worry about them.
*/
if (op_is_flush(bio->bi_opf) &&
!test_bit(QUEUE_FLAG_WC, &q->queue_flags)) {
bio->bi_opf &= ~(REQ_PREFLUSH | REQ_FUA);
- if (!nr_sectors) {
+ if (!bio_sectors(bio)) {
status = BLK_STS_OK;
goto end_io;
}
@@ -1054,8 +1045,13 @@ generic_make_request_checks(struct bio *bio)
if (unlikely(!current->io_context))
create_task_io_context(current, GFP_ATOMIC, q->node);
- if (!blkcg_bio_issue_check(q, bio))
+ if (blk_throtl_bio(bio)) {
+ blkcg_bio_issue_init(bio);
return false;
+ }
+
+ blk_cgroup_bio_start(bio);
+ blkcg_bio_issue_init(bio);
if (!bio_flagged(bio, BIO_TRACE_COMPLETION)) {
trace_block_bio_queue(q, bio);
@@ -1074,138 +1070,144 @@ end_io:
return false;
}
-static blk_qc_t do_make_request(struct bio *bio)
+static blk_qc_t __submit_bio(struct bio *bio)
{
- struct request_queue *q = bio->bi_disk->queue;
+ struct gendisk *disk = bio->bi_disk;
blk_qc_t ret = BLK_QC_T_NONE;
if (blk_crypto_bio_prep(&bio)) {
- if (!q->make_request_fn)
- return blk_mq_make_request(q, bio);
- ret = q->make_request_fn(q, bio);
+ if (!disk->fops->submit_bio)
+ return blk_mq_submit_bio(bio);
+ ret = disk->fops->submit_bio(bio);
}
- blk_queue_exit(q);
+ blk_queue_exit(disk->queue);
return ret;
}
-/**
- * generic_make_request - re-submit a bio to the block device layer for I/O
- * @bio: The bio describing the location in memory and on the device.
+/*
+ * The loop in this function may be a bit non-obvious, and so deserves some
+ * explanation:
*
- * This is a version of submit_bio() that shall only be used for I/O that is
- * resubmitted to lower level drivers by stacking block drivers. All file
- * systems and other upper level users of the block layer should use
- * submit_bio() instead.
+ * - Before entering the loop, bio->bi_next is NULL (as all callers ensure
+ * that), so we have a list with a single bio.
+ * - We pretend that we have just taken it off a longer list, so we assign
+ * bio_list to a pointer to the bio_list_on_stack, thus initialising the
+ * bio_list of new bios to be added. ->submit_bio() may indeed add some more
+ * bios through a recursive call to submit_bio_noacct. If it did, we find a
+ * non-NULL value in bio_list and re-enter the loop from the top.
+ * - In this case we really did just take the bio of the top of the list (no
+ * pretending) and so remove it from bio_list, and call into ->submit_bio()
+ * again.
+ *
+ * bio_list_on_stack[0] contains bios submitted by the current ->submit_bio.
+ * bio_list_on_stack[1] contains bios that were submitted before the current
+ * ->submit_bio_bio, but that haven't been processed yet.
*/
-blk_qc_t generic_make_request(struct bio *bio)
+static blk_qc_t __submit_bio_noacct(struct bio *bio)
{
- /*
- * bio_list_on_stack[0] contains bios submitted by the current
- * make_request_fn.
- * bio_list_on_stack[1] contains bios that were submitted before
- * the current make_request_fn, but that haven't been processed
- * yet.
- */
struct bio_list bio_list_on_stack[2];
blk_qc_t ret = BLK_QC_T_NONE;
- if (!generic_make_request_checks(bio))
- goto out;
-
- /*
- * We only want one ->make_request_fn to be active at a time, else
- * stack usage with stacked devices could be a problem. So use
- * current->bio_list to keep a list of requests submited by a
- * make_request_fn function. current->bio_list is also used as a
- * flag to say if generic_make_request is currently active in this
- * task or not. If it is NULL, then no make_request is active. If
- * it is non-NULL, then a make_request is active, and new requests
- * should be added at the tail
- */
- if (current->bio_list) {
- bio_list_add(&current->bio_list[0], bio);
- goto out;
- }
-
- /* following loop may be a bit non-obvious, and so deserves some
- * explanation.
- * Before entering the loop, bio->bi_next is NULL (as all callers
- * ensure that) so we have a list with a single bio.
- * We pretend that we have just taken it off a longer list, so
- * we assign bio_list to a pointer to the bio_list_on_stack,
- * thus initialising the bio_list of new bios to be
- * added. ->make_request() may indeed add some more bios
- * through a recursive call to generic_make_request. If it
- * did, we find a non-NULL value in bio_list and re-enter the loop
- * from the top. In this case we really did just take the bio
- * of the top of the list (no pretending) and so remove it from
- * bio_list, and call into ->make_request() again.
- */
BUG_ON(bio->bi_next);
+
bio_list_init(&bio_list_on_stack[0]);
current->bio_list = bio_list_on_stack;
+
do {
struct request_queue *q = bio->bi_disk->queue;
+ struct bio_list lower, same;
+
+ if (unlikely(bio_queue_enter(bio) != 0))
+ continue;
- if (likely(bio_queue_enter(bio) == 0)) {
- struct bio_list lower, same;
+ /*
+ * Create a fresh bio_list for all subordinate requests.
+ */
+ bio_list_on_stack[1] = bio_list_on_stack[0];
+ bio_list_init(&bio_list_on_stack[0]);
- /* Create a fresh bio_list for all subordinate requests */
- bio_list_on_stack[1] = bio_list_on_stack[0];
- bio_list_init(&bio_list_on_stack[0]);
- ret = do_make_request(bio);
+ ret = __submit_bio(bio);
- /* sort new bios into those for a lower level
- * and those for the same level
- */
- bio_list_init(&lower);
- bio_list_init(&same);
- while ((bio = bio_list_pop(&bio_list_on_stack[0])) != NULL)
- if (q == bio->bi_disk->queue)
- bio_list_add(&same, bio);
- else
- bio_list_add(&lower, bio);
- /* now assemble so we handle the lowest level first */
- bio_list_merge(&bio_list_on_stack[0], &lower);
- bio_list_merge(&bio_list_on_stack[0], &same);
- bio_list_merge(&bio_list_on_stack[0], &bio_list_on_stack[1]);
+ /*
+ * Sort new bios into those for a lower level and those for the
+ * same level.
+ */
+ bio_list_init(&lower);
+ bio_list_init(&same);
+ while ((bio = bio_list_pop(&bio_list_on_stack[0])) != NULL)
+ if (q == bio->bi_disk->queue)
+ bio_list_add(&same, bio);
+ else
+ bio_list_add(&lower, bio);
+
+ /*
+ * Now assemble so we handle the lowest level first.
+ */
+ bio_list_merge(&bio_list_on_stack[0], &lower);
+ bio_list_merge(&bio_list_on_stack[0], &same);
+ bio_list_merge(&bio_list_on_stack[0], &bio_list_on_stack[1]);
+ } while ((bio = bio_list_pop(&bio_list_on_stack[0])));
+
+ current->bio_list = NULL;
+ return ret;
+}
+
+static blk_qc_t __submit_bio_noacct_mq(struct bio *bio)
+{
+ struct bio_list bio_list[2] = { };
+ blk_qc_t ret = BLK_QC_T_NONE;
+
+ current->bio_list = bio_list;
+
+ do {
+ struct gendisk *disk = bio->bi_disk;
+
+ if (unlikely(bio_queue_enter(bio) != 0))
+ continue;
+
+ if (!blk_crypto_bio_prep(&bio)) {
+ blk_queue_exit(disk->queue);
+ ret = BLK_QC_T_NONE;
+ continue;
}
- bio = bio_list_pop(&bio_list_on_stack[0]);
- } while (bio);
- current->bio_list = NULL; /* deactivate */
-out:
+ ret = blk_mq_submit_bio(bio);
+ } while ((bio = bio_list_pop(&bio_list[0])));
+
+ current->bio_list = NULL;
return ret;
}
-EXPORT_SYMBOL(generic_make_request);
/**
- * direct_make_request - hand a buffer directly to its device driver for I/O
+ * submit_bio_noacct - re-submit a bio to the block device layer for I/O
* @bio: The bio describing the location in memory and on the device.
*
- * This function behaves like generic_make_request(), but does not protect
- * against recursion. Must only be used if the called driver is known
- * to be blk-mq based.
+ * This is a version of submit_bio() that shall only be used for I/O that is
+ * resubmitted to lower level drivers by stacking block drivers. All file
+ * systems and other upper level users of the block layer should use
+ * submit_bio() instead.
*/
-blk_qc_t direct_make_request(struct bio *bio)
+blk_qc_t submit_bio_noacct(struct bio *bio)
{
- struct request_queue *q = bio->bi_disk->queue;
-
- if (WARN_ON_ONCE(q->make_request_fn)) {
- bio_io_error(bio);
- return BLK_QC_T_NONE;
- }
- if (!generic_make_request_checks(bio))
- return BLK_QC_T_NONE;
- if (unlikely(bio_queue_enter(bio)))
+ if (!submit_bio_checks(bio))
return BLK_QC_T_NONE;
- if (!blk_crypto_bio_prep(&bio)) {
- blk_queue_exit(q);
+
+ /*
+ * We only want one ->submit_bio to be active at a time, else stack
+ * usage with stacked devices could be a problem. Use current->bio_list
+ * to collect a list of requests submited by a ->submit_bio method while
+ * it is active, and then process them after it returned.
+ */
+ if (current->bio_list) {
+ bio_list_add(&current->bio_list[0], bio);
return BLK_QC_T_NONE;
}
- return blk_mq_make_request(q, bio);
+
+ if (!bio->bi_disk->fops->submit_bio)
+ return __submit_bio_noacct_mq(bio);
+ return __submit_bio_noacct(bio);
}
-EXPORT_SYMBOL_GPL(direct_make_request);
+EXPORT_SYMBOL(submit_bio_noacct);
/**
* submit_bio - submit a bio to the block device layer for I/O
@@ -1266,13 +1268,13 @@ blk_qc_t submit_bio(struct bio *bio)
blk_qc_t ret;
psi_memstall_enter(&pflags);
- ret = generic_make_request(bio);
+ ret = submit_bio_noacct(bio);
psi_memstall_leave(&pflags);
return ret;
}
- return generic_make_request(bio);
+ return submit_bio_noacct(bio);
}
EXPORT_SYMBOL(submit_bio);
@@ -1908,9 +1910,7 @@ int __init blk_dev_init(void)
blk_requestq_cachep = kmem_cache_create("request_queue",
sizeof(struct request_queue), 0, SLAB_PANIC, NULL);
-#ifdef CONFIG_DEBUG_FS
blk_debugfs_root = debugfs_create_dir("block", NULL);
-#endif
return 0;
}