summaryrefslogtreecommitdiff
path: root/block
diff options
context:
space:
mode:
Diffstat (limited to 'block')
-rw-r--r--block/bdev.c7
-rw-r--r--block/bfq-cgroup.c8
-rw-r--r--block/bfq-iosched.c206
-rw-r--r--block/bfq-iosched.h8
-rw-r--r--block/bio-integrity.c4
-rw-r--r--block/bio.c112
-rw-r--r--block/blk-cgroup.c23
-rw-r--r--block/blk-cgroup.h1
-rw-r--r--block/blk-iocost.c10
-rw-r--r--block/blk-ioprio.c57
-rw-r--r--block/blk-ioprio.h9
-rw-r--r--block/blk-lib.c23
-rw-r--r--block/blk-merge.c162
-rw-r--r--block/blk-mq-tag.c5
-rw-r--r--block/blk-mq.c14
-rw-r--r--block/blk-rq-qos.c2
-rw-r--r--block/blk-sysfs.c22
-rw-r--r--block/blk-throttle.c80
-rw-r--r--block/blk-throttle.h2
-rw-r--r--block/blk.h75
-rw-r--r--block/elevator.c21
-rw-r--r--block/elevator.h2
-rw-r--r--block/fops.c24
-rw-r--r--block/ioctl.c168
-rw-r--r--block/partitions/core.c8
-rw-r--r--block/t10-pi.c8
26 files changed, 633 insertions, 428 deletions
diff --git a/block/bdev.c b/block/bdev.c
index c5507b6f63b8..33f9c4605e3a 100644
--- a/block/bdev.c
+++ b/block/bdev.c
@@ -24,6 +24,7 @@
#include <linux/pseudo_fs.h>
#include <linux/uio.h>
#include <linux/namei.h>
+#include <linux/security.h>
#include <linux/part_stat.h>
#include <linux/uaccess.h>
#include <linux/stat.h>
@@ -324,6 +325,11 @@ static struct inode *bdev_alloc_inode(struct super_block *sb)
if (!ei)
return NULL;
memset(&ei->bdev, 0, sizeof(ei->bdev));
+
+ if (security_bdev_alloc(&ei->bdev)) {
+ kmem_cache_free(bdev_cachep, ei);
+ return NULL;
+ }
return &ei->vfs_inode;
}
@@ -333,6 +339,7 @@ static void bdev_free_inode(struct inode *inode)
free_percpu(bdev->bd_stats);
kfree(bdev->bd_meta_info);
+ security_bdev_free(bdev);
if (!bdev_is_partition(bdev)) {
if (bdev->bd_disk && bdev->bd_disk->bdi)
diff --git a/block/bfq-cgroup.c b/block/bfq-cgroup.c
index b758693697c0..e831aedb4643 100644
--- a/block/bfq-cgroup.c
+++ b/block/bfq-cgroup.c
@@ -679,12 +679,7 @@ void bfq_bfqq_move(struct bfq_data *bfqd, struct bfq_queue *bfqq,
bfq_put_idle_entity(bfq_entity_service_tree(entity), entity);
bfqg_and_blkg_put(old_parent);
- if (entity->parent &&
- entity->parent->last_bfqq_created == bfqq)
- entity->parent->last_bfqq_created = NULL;
- else if (bfqd->last_bfqq_created == bfqq)
- bfqd->last_bfqq_created = NULL;
-
+ bfq_reassign_last_bfqq(bfqq, NULL);
entity->parent = bfqg->my_entity;
entity->sched_data = &bfqg->sched_data;
/* pin down bfqg and its associated blkg */
@@ -741,7 +736,6 @@ static void bfq_sync_bfqq_move(struct bfq_data *bfqd,
*/
bfq_put_cooperator(sync_bfqq);
bic_set_bfqq(bic, NULL, true, act_idx);
- bfq_release_process_ref(bfqd, sync_bfqq);
}
}
diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
index 36a4998c4b37..0747d9d0e48c 100644
--- a/block/bfq-iosched.c
+++ b/block/bfq-iosched.c
@@ -2911,8 +2911,12 @@ bfq_setup_cooperator(struct bfq_data *bfqd, struct bfq_queue *bfqq,
struct bfq_iocq_bfqq_data *bfqq_data = &bic->bfqq_data[a_idx];
/* if a merge has already been setup, then proceed with that first */
- if (bfqq->new_bfqq)
- return bfqq->new_bfqq;
+ new_bfqq = bfqq->new_bfqq;
+ if (new_bfqq) {
+ while (new_bfqq->new_bfqq)
+ new_bfqq = new_bfqq->new_bfqq;
+ return new_bfqq;
+ }
/*
* Check delayed stable merge for rotational or non-queueing
@@ -3093,8 +3097,8 @@ static void bfq_bfqq_save_state(struct bfq_queue *bfqq)
}
-static void
-bfq_reassign_last_bfqq(struct bfq_queue *cur_bfqq, struct bfq_queue *new_bfqq)
+void bfq_reassign_last_bfqq(struct bfq_queue *cur_bfqq,
+ struct bfq_queue *new_bfqq)
{
if (cur_bfqq->entity.parent &&
cur_bfqq->entity.parent->last_bfqq_created == cur_bfqq)
@@ -3125,10 +3129,12 @@ void bfq_release_process_ref(struct bfq_data *bfqd, struct bfq_queue *bfqq)
bfq_put_queue(bfqq);
}
-static void
-bfq_merge_bfqqs(struct bfq_data *bfqd, struct bfq_io_cq *bic,
- struct bfq_queue *bfqq, struct bfq_queue *new_bfqq)
+static struct bfq_queue *bfq_merge_bfqqs(struct bfq_data *bfqd,
+ struct bfq_io_cq *bic,
+ struct bfq_queue *bfqq)
{
+ struct bfq_queue *new_bfqq = bfqq->new_bfqq;
+
bfq_log_bfqq(bfqd, bfqq, "merging with queue %lu",
(unsigned long)new_bfqq->pid);
/* Save weight raising and idle window of the merged queues */
@@ -3222,6 +3228,8 @@ bfq_merge_bfqqs(struct bfq_data *bfqd, struct bfq_io_cq *bic,
bfq_reassign_last_bfqq(bfqq, new_bfqq);
bfq_release_process_ref(bfqd, bfqq);
+
+ return new_bfqq;
}
static bool bfq_allow_bio_merge(struct request_queue *q, struct request *rq,
@@ -3257,14 +3265,8 @@ static bool bfq_allow_bio_merge(struct request_queue *q, struct request *rq,
* fulfilled, i.e., bic can be redirected to new_bfqq
* and bfqq can be put.
*/
- bfq_merge_bfqqs(bfqd, bfqd->bio_bic, bfqq,
- new_bfqq);
- /*
- * If we get here, bio will be queued into new_queue,
- * so use new_bfqq to decide whether bio and rq can be
- * merged.
- */
- bfqq = new_bfqq;
+ while (bfqq != new_bfqq)
+ bfqq = bfq_merge_bfqqs(bfqd, bfqd->bio_bic, bfqq);
/*
* Change also bqfd->bio_bfqq, as
@@ -5432,6 +5434,8 @@ void bfq_put_cooperator(struct bfq_queue *bfqq)
bfq_put_queue(__bfqq);
__bfqq = next;
}
+
+ bfq_release_process_ref(bfqq->bfqd, bfqq);
}
static void bfq_exit_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq)
@@ -5444,8 +5448,6 @@ static void bfq_exit_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq)
bfq_log_bfqq(bfqd, bfqq, "exit_bfqq: %p, %d", bfqq, bfqq->ref);
bfq_put_cooperator(bfqq);
-
- bfq_release_process_ref(bfqd, bfqq);
}
static void bfq_exit_icq_bfqq(struct bfq_io_cq *bic, bool is_sync,
@@ -5701,9 +5703,7 @@ bfq_do_early_stable_merge(struct bfq_data *bfqd, struct bfq_queue *bfqq,
* state before killing it.
*/
bfqq->bic = bic;
- bfq_merge_bfqqs(bfqd, bic, bfqq, new_bfqq);
-
- return new_bfqq;
+ return bfq_merge_bfqqs(bfqd, bic, bfqq);
}
/*
@@ -6158,6 +6158,7 @@ static bool __bfq_insert_request(struct bfq_data *bfqd, struct request *rq)
bool waiting, idle_timer_disabled = false;
if (new_bfqq) {
+ struct bfq_queue *old_bfqq = bfqq;
/*
* Release the request's reference to the old bfqq
* and make sure one is taken to the shared queue.
@@ -6174,18 +6175,18 @@ static bool __bfq_insert_request(struct bfq_data *bfqd, struct request *rq)
* new_bfqq.
*/
if (bic_to_bfqq(RQ_BIC(rq), true,
- bfq_actuator_index(bfqd, rq->bio)) == bfqq)
- bfq_merge_bfqqs(bfqd, RQ_BIC(rq),
- bfqq, new_bfqq);
+ bfq_actuator_index(bfqd, rq->bio)) == bfqq) {
+ while (bfqq != new_bfqq)
+ bfqq = bfq_merge_bfqqs(bfqd, RQ_BIC(rq), bfqq);
+ }
- bfq_clear_bfqq_just_created(bfqq);
+ bfq_clear_bfqq_just_created(old_bfqq);
/*
* rq is about to be enqueued into new_bfqq,
* release rq reference on bfqq
*/
- bfq_put_queue(bfqq);
+ bfq_put_queue(old_bfqq);
rq->elv.priv[1] = new_bfqq;
- bfqq = new_bfqq;
}
bfq_update_io_thinktime(bfqd, bfqq);
@@ -6723,7 +6724,7 @@ bfq_split_bfqq(struct bfq_io_cq *bic, struct bfq_queue *bfqq)
{
bfq_log_bfqq(bfqq->bfqd, bfqq, "splitting queue");
- if (bfqq_process_refs(bfqq) == 1) {
+ if (bfqq_process_refs(bfqq) == 1 && !bfqq->new_bfqq) {
bfqq->pid = current->pid;
bfq_clear_bfqq_coop(bfqq);
bfq_clear_bfqq_split_coop(bfqq);
@@ -6733,16 +6734,13 @@ bfq_split_bfqq(struct bfq_io_cq *bic, struct bfq_queue *bfqq)
bic_set_bfqq(bic, NULL, true, bfqq->actuator_idx);
bfq_put_cooperator(bfqq);
-
- bfq_release_process_ref(bfqq->bfqd, bfqq);
return NULL;
}
-static struct bfq_queue *bfq_get_bfqq_handle_split(struct bfq_data *bfqd,
- struct bfq_io_cq *bic,
- struct bio *bio,
- bool split, bool is_sync,
- bool *new_queue)
+static struct bfq_queue *
+__bfq_get_bfqq_handle_split(struct bfq_data *bfqd, struct bfq_io_cq *bic,
+ struct bio *bio, bool split, bool is_sync,
+ bool *new_queue)
{
unsigned int act_idx = bfq_actuator_index(bfqd, bio);
struct bfq_queue *bfqq = bic_to_bfqq(bic, is_sync, act_idx);
@@ -6821,6 +6819,84 @@ static void bfq_prepare_request(struct request *rq)
rq->elv.priv[0] = rq->elv.priv[1] = NULL;
}
+static struct bfq_queue *bfq_waker_bfqq(struct bfq_queue *bfqq)
+{
+ struct bfq_queue *new_bfqq = bfqq->new_bfqq;
+ struct bfq_queue *waker_bfqq = bfqq->waker_bfqq;
+
+ if (!waker_bfqq)
+ return NULL;
+
+ while (new_bfqq) {
+ if (new_bfqq == waker_bfqq) {
+ /*
+ * If waker_bfqq is in the merge chain, and current
+ * is the only procress.
+ */
+ if (bfqq_process_refs(waker_bfqq) == 1)
+ return NULL;
+ break;
+ }
+
+ new_bfqq = new_bfqq->new_bfqq;
+ }
+
+ return waker_bfqq;
+}
+
+static struct bfq_queue *bfq_get_bfqq_handle_split(struct bfq_data *bfqd,
+ struct bfq_io_cq *bic,
+ struct bio *bio,
+ unsigned int idx,
+ bool is_sync)
+{
+ struct bfq_queue *waker_bfqq;
+ struct bfq_queue *bfqq;
+ bool new_queue = false;
+
+ bfqq = __bfq_get_bfqq_handle_split(bfqd, bic, bio, false, is_sync,
+ &new_queue);
+ if (unlikely(new_queue))
+ return bfqq;
+
+ /* If the queue was seeky for too long, break it apart. */
+ if (!bfq_bfqq_coop(bfqq) || !bfq_bfqq_split_coop(bfqq) ||
+ bic->bfqq_data[idx].stably_merged)
+ return bfqq;
+
+ waker_bfqq = bfq_waker_bfqq(bfqq);
+
+ /* Update bic before losing reference to bfqq */
+ if (bfq_bfqq_in_large_burst(bfqq))
+ bic->bfqq_data[idx].saved_in_large_burst = true;
+
+ bfqq = bfq_split_bfqq(bic, bfqq);
+ if (bfqq) {
+ bfq_bfqq_resume_state(bfqq, bfqd, bic, true);
+ return bfqq;
+ }
+
+ bfqq = __bfq_get_bfqq_handle_split(bfqd, bic, bio, true, is_sync, NULL);
+ if (unlikely(bfqq == &bfqd->oom_bfqq))
+ return bfqq;
+
+ bfq_bfqq_resume_state(bfqq, bfqd, bic, false);
+ bfqq->waker_bfqq = waker_bfqq;
+ bfqq->tentative_waker_bfqq = NULL;
+
+ /*
+ * If the waker queue disappears, then new_bfqq->waker_bfqq must be
+ * reset. So insert new_bfqq into the
+ * woken_list of the waker. See
+ * bfq_check_waker for details.
+ */
+ if (waker_bfqq)
+ hlist_add_head(&bfqq->woken_list_node,
+ &bfqq->waker_bfqq->woken_list);
+
+ return bfqq;
+}
+
/*
* If needed, init rq, allocate bfq data structures associated with
* rq, and increment reference counters in the destination bfq_queue
@@ -6852,8 +6928,6 @@ static struct bfq_queue *bfq_init_rq(struct request *rq)
struct bfq_io_cq *bic;
const int is_sync = rq_is_sync(rq);
struct bfq_queue *bfqq;
- bool new_queue = false;
- bool bfqq_already_existing = false, split = false;
unsigned int a_idx = bfq_actuator_index(bfqd, bio);
if (unlikely(!rq->elv.icq))
@@ -6870,54 +6944,9 @@ static struct bfq_queue *bfq_init_rq(struct request *rq)
return RQ_BFQQ(rq);
bic = icq_to_bic(rq->elv.icq);
-
bfq_check_ioprio_change(bic, bio);
-
bfq_bic_update_cgroup(bic, bio);
-
- bfqq = bfq_get_bfqq_handle_split(bfqd, bic, bio, false, is_sync,
- &new_queue);
-
- if (likely(!new_queue)) {
- /* If the queue was seeky for too long, break it apart. */
- if (bfq_bfqq_coop(bfqq) && bfq_bfqq_split_coop(bfqq) &&
- !bic->bfqq_data[a_idx].stably_merged) {
- struct bfq_queue *old_bfqq = bfqq;
-
- /* Update bic before losing reference to bfqq */
- if (bfq_bfqq_in_large_burst(bfqq))
- bic->bfqq_data[a_idx].saved_in_large_burst =
- true;
-
- bfqq = bfq_split_bfqq(bic, bfqq);
- split = true;
-
- if (!bfqq) {
- bfqq = bfq_get_bfqq_handle_split(bfqd, bic, bio,
- true, is_sync,
- NULL);
- if (unlikely(bfqq == &bfqd->oom_bfqq))
- bfqq_already_existing = true;
- } else
- bfqq_already_existing = true;
-
- if (!bfqq_already_existing) {
- bfqq->waker_bfqq = old_bfqq->waker_bfqq;
- bfqq->tentative_waker_bfqq = NULL;
-
- /*
- * If the waker queue disappears, then
- * new_bfqq->waker_bfqq must be
- * reset. So insert new_bfqq into the
- * woken_list of the waker. See
- * bfq_check_waker for details.
- */
- if (bfqq->waker_bfqq)
- hlist_add_head(&bfqq->woken_list_node,
- &bfqq->waker_bfqq->woken_list);
- }
- }
- }
+ bfqq = bfq_get_bfqq_handle_split(bfqd, bic, bio, a_idx, is_sync);
bfqq_request_allocated(bfqq);
bfqq->ref++;
@@ -6934,18 +6963,9 @@ static struct bfq_queue *bfq_init_rq(struct request *rq)
* addition, if the queue has also just been split, we have to
* resume its state.
*/
- if (likely(bfqq != &bfqd->oom_bfqq) && bfqq_process_refs(bfqq) == 1) {
+ if (likely(bfqq != &bfqd->oom_bfqq) && !bfqq->new_bfqq &&
+ bfqq_process_refs(bfqq) == 1)
bfqq->bic = bic;
- if (split) {
- /*
- * The queue has just been split from a shared
- * queue: restore the idle window and the
- * possible weight raising period.
- */
- bfq_bfqq_resume_state(bfqq, bfqd, bic,
- bfqq_already_existing);
- }
- }
/*
* Consider bfqq as possibly belonging to a burst of newly
diff --git a/block/bfq-iosched.h b/block/bfq-iosched.h
index 08ddf2cfae5b..687a3a7ba784 100644
--- a/block/bfq-iosched.h
+++ b/block/bfq-iosched.h
@@ -1156,6 +1156,8 @@ void bfq_del_bfqq_busy(struct bfq_queue *bfqq, bool expiration);
void bfq_add_bfqq_busy(struct bfq_queue *bfqq);
void bfq_add_bfqq_in_groups_with_pending_reqs(struct bfq_queue *bfqq);
void bfq_del_bfqq_in_groups_with_pending_reqs(struct bfq_queue *bfqq);
+void bfq_reassign_last_bfqq(struct bfq_queue *cur_bfqq,
+ struct bfq_queue *new_bfqq);
/* --------------- end of interface of B-WF2Q+ ---------------- */
@@ -1183,11 +1185,6 @@ struct bfq_group *bfqq_group(struct bfq_queue *bfqq);
"%s " fmt, pid_str, ##args); \
} while (0)
-#define bfq_log_bfqg(bfqd, bfqg, fmt, args...) do { \
- blk_add_cgroup_trace_msg((bfqd)->queue, \
- &bfqg_to_blkg(bfqg)->blkcg->css, fmt, ##args); \
-} while (0)
-
#else /* CONFIG_BFQ_GROUP_IOSCHED */
#define bfq_log_bfqq(bfqd, bfqq, fmt, args...) do { \
@@ -1197,7 +1194,6 @@ struct bfq_group *bfqq_group(struct bfq_queue *bfqq);
bfq_bfqq_name((bfqq), pid_str, MAX_BFQQ_NAME_LENGTH); \
blk_add_trace_msg((bfqd)->queue, "%s " fmt, pid_str, ##args); \
} while (0)
-#define bfq_log_bfqg(bfqd, bfqg, fmt, args...) do {} while (0)
#endif /* CONFIG_BFQ_GROUP_IOSCHED */
diff --git a/block/bio-integrity.c b/block/bio-integrity.c
index 8d1fb38f745f..96a2653905ae 100644
--- a/block/bio-integrity.c
+++ b/block/bio-integrity.c
@@ -167,10 +167,6 @@ int bio_integrity_add_page(struct bio *bio, struct page *page,
struct request_queue *q = bdev_get_queue(bio->bi_bdev);
struct bio_integrity_payload *bip = bio_integrity(bio);
- if (((bip->bip_iter.bi_size + len) >> SECTOR_SHIFT) >
- queue_max_hw_sectors(q))
- return 0;
-
if (bip->bip_vcnt > 0) {
struct bio_vec *bv = &bip->bip_vec[bip->bip_vcnt - 1];
bool same_page = false;
diff --git a/block/bio.c b/block/bio.c
index c4053d49679a..ac4d77c88932 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -931,7 +931,8 @@ static bool bvec_try_merge_page(struct bio_vec *bv, struct page *page,
if (!zone_device_pages_have_same_pgmap(bv->bv_page, page))
return false;
- *same_page = ((vec_end_addr & PAGE_MASK) == page_addr);
+ *same_page = ((vec_end_addr & PAGE_MASK) == ((page_addr + off) &
+ PAGE_MASK));
if (!*same_page) {
if (IS_ENABLED(CONFIG_KMSAN))
return false;
@@ -1017,6 +1018,29 @@ int bio_add_hw_page(struct request_queue *q, struct bio *bio,
}
/**
+ * bio_add_hw_folio - attempt to add a folio to a bio with hw constraints
+ * @q: the target queue
+ * @bio: destination bio
+ * @folio: folio to add
+ * @len: vec entry length
+ * @offset: vec entry offset in the folio
+ * @max_sectors: maximum number of sectors that can be added
+ * @same_page: return if the segment has been merged inside the same folio
+ *
+ * Add a folio to a bio while respecting the hardware max_sectors, max_segment
+ * and gap limitations.
+ */
+int bio_add_hw_folio(struct request_queue *q, struct bio *bio,
+ struct folio *folio, size_t len, size_t offset,
+ unsigned int max_sectors, bool *same_page)
+{
+ if (len > UINT_MAX || offset > UINT_MAX)
+ return 0;
+ return bio_add_hw_page(q, bio, folio_page(folio, 0), len, offset,
+ max_sectors, same_page);
+}
+
+/**
* bio_add_pc_page - attempt to add page to passthrough bio
* @q: the target queue
* @bio: destination bio
@@ -1166,7 +1190,6 @@ void __bio_release_pages(struct bio *bio, bool mark_dirty)
struct folio_iter fi;
bio_for_each_folio_all(fi, bio) {
- struct page *page;
size_t nr_pages;
if (mark_dirty) {
@@ -1174,12 +1197,9 @@ void __bio_release_pages(struct bio *bio, bool mark_dirty)
folio_mark_dirty(fi.folio);
folio_unlock(fi.folio);
}
- page = folio_page(fi.folio, fi.offset / PAGE_SIZE);
nr_pages = (fi.offset + fi.length - 1) / PAGE_SIZE -
fi.offset / PAGE_SIZE + 1;
- do {
- bio_release_page(bio, page++);
- } while (--nr_pages != 0);
+ unpin_user_folio(fi.folio, nr_pages);
}
}
EXPORT_SYMBOL_GPL(__bio_release_pages);
@@ -1204,8 +1224,8 @@ void bio_iov_bvec_set(struct bio *bio, struct iov_iter *iter)
bio_set_flag(bio, BIO_CLONED);
}
-static int bio_iov_add_page(struct bio *bio, struct page *page,
- unsigned int len, unsigned int offset)
+static int bio_iov_add_folio(struct bio *bio, struct folio *folio, size_t len,
+ size_t offset)
{
bool same_page = false;
@@ -1214,30 +1234,61 @@ static int bio_iov_add_page(struct bio *bio, struct page *page,
if (bio->bi_vcnt > 0 &&
bvec_try_merge_page(&bio->bi_io_vec[bio->bi_vcnt - 1],
- page, len, offset, &same_page)) {
+ folio_page(folio, 0), len, offset,
+ &same_page)) {
bio->bi_iter.bi_size += len;
- if (same_page)
- bio_release_page(bio, page);
+ if (same_page && bio_flagged(bio, BIO_PAGE_PINNED))
+ unpin_user_folio(folio, 1);
return 0;
}
- __bio_add_page(bio, page, len, offset);
+ bio_add_folio_nofail(bio, folio, len, offset);
return 0;
}
-static int bio_iov_add_zone_append_page(struct bio *bio, struct page *page,
- unsigned int len, unsigned int offset)
+static int bio_iov_add_zone_append_folio(struct bio *bio, struct folio *folio,
+ size_t len, size_t offset)
{
struct request_queue *q = bdev_get_queue(bio->bi_bdev);
bool same_page = false;
- if (bio_add_hw_page(q, bio, page, len, offset,
+ if (bio_add_hw_folio(q, bio, folio, len, offset,
queue_max_zone_append_sectors(q), &same_page) != len)
return -EINVAL;
- if (same_page)
- bio_release_page(bio, page);
+ if (same_page && bio_flagged(bio, BIO_PAGE_PINNED))
+ unpin_user_folio(folio, 1);
return 0;
}
+static unsigned int get_contig_folio_len(unsigned int *num_pages,
+ struct page **pages, unsigned int i,
+ struct folio *folio, size_t left,
+ size_t offset)
+{
+ size_t bytes = left;
+ size_t contig_sz = min_t(size_t, PAGE_SIZE - offset, bytes);
+ unsigned int j;
+
+ /*
+ * We might COW a single page in the middle of
+ * a large folio, so we have to check that all
+ * pages belong to the same folio.
+ */
+ bytes -= contig_sz;
+ for (j = i + 1; j < i + *num_pages; j++) {
+ size_t next = min_t(size_t, PAGE_SIZE, bytes);
+
+ if (page_folio(pages[j]) != folio ||
+ pages[j] != pages[j - 1] + 1) {
+ break;
+ }
+ contig_sz += next;
+ bytes -= next;
+ }
+ *num_pages = j - i;
+
+ return contig_sz;
+}
+
#define PAGE_PTRS_PER_BVEC (sizeof(struct bio_vec) / sizeof(struct page *))
/**
@@ -1257,9 +1308,9 @@ static int __bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter)
unsigned short entries_left = bio->bi_max_vecs - bio->bi_vcnt;
struct bio_vec *bv = bio->bi_io_vec + bio->bi_vcnt;
struct page **pages = (struct page **)bv;
- ssize_t size, left;
- unsigned len, i = 0;
- size_t offset;
+ ssize_t size;
+ unsigned int num_pages, i = 0;
+ size_t offset, folio_offset, left, len;
int ret = 0;
/*
@@ -1299,17 +1350,28 @@ static int __bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter)
goto out;
}
- for (left = size, i = 0; left > 0; left -= len, i++) {
+ for (left = size, i = 0; left > 0; left -= len, i += num_pages) {
struct page *page = pages[i];
+ struct folio *folio = page_folio(page);
+
+ folio_offset = ((size_t)folio_page_idx(folio, page) <<
+ PAGE_SHIFT) + offset;
+
+ len = min(folio_size(folio) - folio_offset, left);
+
+ num_pages = DIV_ROUND_UP(offset + len, PAGE_SIZE);
+
+ if (num_pages > 1)
+ len = get_contig_folio_len(&num_pages, pages, i,
+ folio, left, offset);
- len = min_t(size_t, PAGE_SIZE - offset, left);
if (bio_op(bio) == REQ_OP_ZONE_APPEND) {
- ret = bio_iov_add_zone_append_page(bio, page, len,
- offset);
+ ret = bio_iov_add_zone_append_folio(bio, folio, len,
+ folio_offset);
if (ret)
break;
} else
- bio_iov_add_page(bio, page, len, offset);
+ bio_iov_add_folio(bio, folio, len, folio_offset);
offset = 0;
}
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index 69e70964398c..e68c725cf8d9 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -1458,7 +1458,6 @@ int blkcg_init_disk(struct gendisk *disk)
struct request_queue *q = disk->queue;
struct blkcg_gq *new_blkg, *blkg;
bool preloaded;
- int ret;
new_blkg = blkg_alloc(&blkcg_root, disk, GFP_KERNEL);
if (!new_blkg)
@@ -1478,15 +1477,8 @@ int blkcg_init_disk(struct gendisk *disk)
if (preloaded)
radix_tree_preload_end();
- ret = blk_ioprio_init(disk);
- if (ret)
- goto err_destroy_all;
-
return 0;
-err_destroy_all:
- blkg_destroy_all(disk);
- return ret;
err_unlock:
spin_unlock_irq(&q->queue_lock);
if (preloaded)
@@ -1554,6 +1546,14 @@ int blkcg_activate_policy(struct gendisk *disk, const struct blkcg_policy *pol)
if (blkcg_policy_enabled(q, pol))
return 0;
+ /*
+ * Policy is allowed to be registered without pd_alloc_fn/pd_free_fn,
+ * for example, ioprio. Such policy will work on blkcg level, not disk
+ * level, and don't need to be activated.
+ */
+ if (WARN_ON_ONCE(!pol->pd_alloc_fn || !pol->pd_free_fn))
+ return -EINVAL;
+
if (queue_is_mq(q))
blk_mq_freeze_queue(q);
retry:
@@ -1733,9 +1733,12 @@ int blkcg_policy_register(struct blkcg_policy *pol)
goto err_unlock;
}
- /* Make sure cpd/pd_alloc_fn and cpd/pd_free_fn in pairs */
+ /*
+ * Make sure cpd/pd_alloc_fn and cpd/pd_free_fn in pairs, and policy
+ * without pd_alloc_fn/pd_free_fn can't be activated.
+ */
if ((!pol->cpd_alloc_fn ^ !pol->cpd_free_fn) ||
- (!pol->pd_alloc_fn ^ !pol->pd_free_fn))
+ (!pol->pd_alloc_fn ^ !pol->pd_free_fn))
goto err_unlock;
/* register @pol */
diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h
index 864fad4a850b..b9e3265c1eb3 100644
--- a/block/blk-cgroup.h
+++ b/block/blk-cgroup.h
@@ -485,7 +485,6 @@ static inline void blkcg_deactivate_policy(struct gendisk *disk,
static inline struct blkg_policy_data *blkg_to_pd(struct blkcg_gq *blkg,
struct blkcg_policy *pol) { return NULL; }
static inline struct blkcg_gq *pd_to_blkg(struct blkg_policy_data *pd) { return NULL; }
-static inline char *blkg_path(struct blkcg_gq *blkg) { return NULL; }
static inline void blkg_get(struct blkcg_gq *blkg) { }
static inline void blkg_put(struct blkcg_gq *blkg) { }
static inline void blkcg_bio_issue_init(struct bio *bio) { }
diff --git a/block/blk-iocost.c b/block/blk-iocost.c
index 690ca99dfaca..9dc9323f84ac 100644
--- a/block/blk-iocost.c
+++ b/block/blk-iocost.c
@@ -648,7 +648,7 @@ static const struct ioc_params autop[] = {
* vrate adjust percentages indexed by ioc->busy_level. We adjust up on
* vtime credit shortage and down on device saturation.
*/
-static u32 vrate_adj_pct[] =
+static const u32 vrate_adj_pct[] =
{ 0, 0, 0, 0,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
@@ -2076,7 +2076,7 @@ static void ioc_forgive_debts(struct ioc *ioc, u64 usage_us_sum, int nr_debtors,
struct ioc_now *now)
{
struct ioc_gq *iocg;
- u64 dur, usage_pct, nr_cycles;
+ u64 dur, usage_pct, nr_cycles, nr_cycles_shift;
/* if no debtor, reset the cycle */
if (!nr_debtors) {
@@ -2138,10 +2138,12 @@ static void ioc_forgive_debts(struct ioc *ioc, u64 usage_us_sum, int nr_debtors,
old_debt = iocg->abs_vdebt;
old_delay = iocg->delay;
+ nr_cycles_shift = min_t(u64, nr_cycles, BITS_PER_LONG - 1);
if (iocg->abs_vdebt)
- iocg->abs_vdebt = iocg->abs_vdebt >> nr_cycles ?: 1;
+ iocg->abs_vdebt = iocg->abs_vdebt >> nr_cycles_shift ?: 1;
+
if (iocg->delay)
- iocg->delay = iocg->delay >> nr_cycles ?: 1;
+ iocg->delay = iocg->delay >> nr_cycles_shift ?: 1;
iocg_kick_waitq(iocg, true, now);
diff --git a/block/blk-ioprio.c b/block/blk-ioprio.c
index 4051fada01f1..8fff7ccc0ac7 100644
--- a/block/blk-ioprio.c
+++ b/block/blk-ioprio.c
@@ -50,14 +50,6 @@ static const char *policy_name[] = {
static struct blkcg_policy ioprio_policy;
/**
- * struct ioprio_blkg - Per (cgroup, request queue) data.
- * @pd: blkg_policy_data structure.
- */
-struct ioprio_blkg {
- struct blkg_policy_data pd;
-};
-
-/**
* struct ioprio_blkcg - Per cgroup data.
* @cpd: blkcg_policy_data structure.
* @prio_policy: One of the IOPRIO_CLASS_* values. See also <linux/ioprio.h>.
@@ -67,11 +59,6 @@ struct ioprio_blkcg {
enum prio_policy prio_policy;
};
-static inline struct ioprio_blkg *pd_to_ioprio(struct blkg_policy_data *pd)
-{
- return pd ? container_of(pd, struct ioprio_blkg, pd) : NULL;
-}
-
static struct ioprio_blkcg *blkcg_to_ioprio_blkcg(struct blkcg *blkcg)
{
return container_of(blkcg_to_cpd(blkcg, &ioprio_policy),
@@ -84,16 +71,6 @@ ioprio_blkcg_from_css(struct cgroup_subsys_state *css)
return blkcg_to_ioprio_blkcg(css_to_blkcg(css));
}
-static struct ioprio_blkcg *ioprio_blkcg_from_bio(struct bio *bio)
-{
- struct blkg_policy_data *pd = blkg_to_pd(bio->bi_blkg, &ioprio_policy);
-
- if (!pd)
- return NULL;
-
- return blkcg_to_ioprio_blkcg(pd->blkg->blkcg);
-}
-
static int ioprio_show_prio_policy(struct seq_file *sf, void *v)
{
struct ioprio_blkcg *blkcg = ioprio_blkcg_from_css(seq_css(sf));
@@ -118,25 +95,6 @@ static ssize_t ioprio_set_prio_policy(struct kernfs_open_file *of, char *buf,
return nbytes;
}
-static struct blkg_policy_data *
-ioprio_alloc_pd(struct gendisk *disk, struct blkcg *blkcg, gfp_t gfp)
-{
- struct ioprio_blkg *ioprio_blkg;
-
- ioprio_blkg = kzalloc(sizeof(*ioprio_blkg), gfp);
- if (!ioprio_blkg)
- return NULL;
-
- return &ioprio_blkg->pd;
-}
-
-static void ioprio_free_pd(struct blkg_policy_data *pd)
-{
- struct ioprio_blkg *ioprio_blkg = pd_to_ioprio(pd);
-
- kfree(ioprio_blkg);
-}
-
static struct blkcg_policy_data *ioprio_alloc_cpd(gfp_t gfp)
{
struct ioprio_blkcg *blkcg;
@@ -179,14 +137,11 @@ static struct blkcg_policy ioprio_policy = {
.cpd_alloc_fn = ioprio_alloc_cpd,
.cpd_free_fn = ioprio_free_cpd,
-
- .pd_alloc_fn = ioprio_alloc_pd,
- .pd_free_fn = ioprio_free_pd,
};
void blkcg_set_ioprio(struct bio *bio)
{
- struct ioprio_blkcg *blkcg = ioprio_blkcg_from_bio(bio);
+ struct ioprio_blkcg *blkcg = blkcg_to_ioprio_blkcg(bio->bi_blkg->blkcg);
u16 prio;
if (!blkcg || blkcg->prio_policy == POLICY_NO_CHANGE)
@@ -219,16 +174,6 @@ void blkcg_set_ioprio(struct bio *bio)
bio->bi_ioprio = prio;
}
-void blk_ioprio_exit(struct gendisk *disk)
-{
- blkcg_deactivate_policy(disk, &ioprio_policy);
-}
-
-int blk_ioprio_init(struct gendisk *disk)
-{
- return blkcg_activate_policy(disk, &ioprio_policy);
-}
-
static int __init ioprio_init(void)
{
return blkcg_policy_register(&ioprio_policy);
diff --git a/block/blk-ioprio.h b/block/blk-ioprio.h
index b6afb8e80de0..9265143f9bc9 100644
--- a/block/blk-ioprio.h
+++ b/block/blk-ioprio.h
@@ -9,17 +9,8 @@ struct request_queue;
struct bio;
#ifdef CONFIG_BLK_CGROUP_IOPRIO
-int blk_ioprio_init(struct gendisk *disk);
-void blk_ioprio_exit(struct gendisk *disk);
void blkcg_set_ioprio(struct bio *bio);
#else
-static inline int blk_ioprio_init(struct gendisk *disk)
-{
- return 0;
-}
-static inline void blk_ioprio_exit(struct gendisk *disk)
-{
-}
static inline void blkcg_set_ioprio(struct bio *bio)
{
}
diff --git a/block/blk-lib.c b/block/blk-lib.c
index 9f735efa6c94..4c9f20a689f7 100644
--- a/block/blk-lib.c
+++ b/block/blk-lib.c
@@ -111,13 +111,20 @@ static sector_t bio_write_zeroes_limit(struct block_device *bdev)
(UINT_MAX >> SECTOR_SHIFT) & ~bs_mask);
}
+/*
+ * There is no reliable way for the SCSI subsystem to determine whether a
+ * device supports a WRITE SAME operation without actually performing a write
+ * to media. As a result, write_zeroes is enabled by default and will be
+ * disabled if a zeroing operation subsequently fails. This means that this
+ * queue limit is likely to change at runtime.
+ */
static void __blkdev_issue_write_zeroes(struct block_device *bdev,
sector_t sector, sector_t nr_sects, gfp_t gfp_mask,
- struct bio **biop, unsigned flags)
+ struct bio **biop, unsigned flags, sector_t limit)
{
+
while (nr_sects) {
- unsigned int len = min_t(sector_t, nr_sects,
- bio_write_zeroes_limit(bdev));
+ unsigned int len = min(nr_sects, limit);
struct bio *bio;
if ((flags & BLKDEV_ZERO_KILLABLE) &&
@@ -141,12 +148,14 @@ static void __blkdev_issue_write_zeroes(struct block_device *bdev,
static int blkdev_issue_write_zeroes(struct block_device *bdev, sector_t sector,
sector_t nr_sects, gfp_t gfp, unsigned flags)
{
+ sector_t limit = bio_write_zeroes_limit(bdev);
struct bio *bio = NULL;
struct blk_plug plug;
int ret = 0;
blk_start_plug(&plug);
- __blkdev_issue_write_zeroes(bdev, sector, nr_sects, gfp, &bio, flags);
+ __blkdev_issue_write_zeroes(bdev, sector, nr_sects, gfp, &bio,
+ flags, limit);
if (bio) {
if ((flags & BLKDEV_ZERO_KILLABLE) &&
fatal_signal_pending(current)) {
@@ -265,12 +274,14 @@ int __blkdev_issue_zeroout(struct block_device *bdev, sector_t sector,
sector_t nr_sects, gfp_t gfp_mask, struct bio **biop,
unsigned flags)
{
+ sector_t limit = bio_write_zeroes_limit(bdev);
+
if (bdev_read_only(bdev))
return -EPERM;
- if (bdev_write_zeroes_sectors(bdev)) {
+ if (limit) {
__blkdev_issue_write_zeroes(bdev, sector, nr_sects,
- gfp_mask, biop, flags);
+ gfp_mask, biop, flags, limit);
} else {
if (flags & BLKDEV_ZERO_NOFALLBACK)
return -EOPNOTSUPP;
diff --git a/block/blk-merge.c b/block/blk-merge.c
index de5281bcadc5..56769c4bcd79 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -105,9 +105,33 @@ static unsigned int bio_allowed_max_sectors(const struct queue_limits *lim)
return round_down(UINT_MAX, lim->logical_block_size) >> SECTOR_SHIFT;
}
-static struct bio *bio_split_discard(struct bio *bio,
- const struct queue_limits *lim,
- unsigned *nsegs, struct bio_set *bs)
+static struct bio *bio_submit_split(struct bio *bio, int split_sectors)
+{
+ if (unlikely(split_sectors < 0)) {
+ bio->bi_status = errno_to_blk_status(split_sectors);
+ bio_endio(bio);
+ return NULL;
+ }
+
+ if (split_sectors) {
+ struct bio *split;
+
+ split = bio_split(bio, split_sectors, GFP_NOIO,
+ &bio->bi_bdev->bd_disk->bio_split);
+ split->bi_opf |= REQ_NOMERGE;
+ blkcg_bio_issue_init(split);
+ bio_chain(split, bio);
+ trace_block_split(split, bio->bi_iter.bi_sector);
+ WARN_ON_ONCE(bio_zone_write_plugging(bio));
+ submit_bio_noacct(bio);
+ return split;
+ }
+
+ return bio;
+}
+
+struct bio *bio_split_discard(struct bio *bio, const struct queue_limits *lim,
+ unsigned *nsegs)
{
unsigned int max_discard_sectors, granularity;
sector_t tmp;
@@ -121,10 +145,10 @@ static struct bio *bio_split_discard(struct bio *bio,
min(lim->max_discard_sectors, bio_allowed_max_sectors(lim));
max_discard_sectors -= max_discard_sectors % granularity;
if (unlikely(!max_discard_sectors))
- return NULL;
+ return bio;
if (bio_sectors(bio) <= max_discard_sectors)
- return NULL;
+ return bio;
split_sectors = max_discard_sectors;
@@ -139,19 +163,18 @@ static struct bio *bio_split_discard(struct bio *bio,
if (split_sectors > tmp)
split_sectors -= tmp;
- return bio_split(bio, split_sectors, GFP_NOIO, bs);
+ return bio_submit_split(bio, split_sectors);
}
-static struct bio *bio_split_write_zeroes(struct bio *bio,
- const struct queue_limits *lim,
- unsigned *nsegs, struct bio_set *bs)
+struct bio *bio_split_write_zeroes(struct bio *bio,
+ const struct queue_limits *lim, unsigned *nsegs)
{
*nsegs = 0;
if (!lim->max_write_zeroes_sectors)
- return NULL;
+ return bio;
if (bio_sectors(bio) <= lim->max_write_zeroes_sectors)
- return NULL;
- return bio_split(bio, lim->max_write_zeroes_sectors, GFP_NOIO, bs);
+ return bio;
+ return bio_submit_split(bio, lim->max_write_zeroes_sectors);
}
static inline unsigned int blk_boundary_sectors(const struct queue_limits *lim,
@@ -274,27 +297,19 @@ static bool bvec_split_segs(const struct queue_limits *lim,
}
/**
- * bio_split_rw - split a bio in two bios
+ * bio_split_rw_at - check if and where to split a read/write bio
* @bio: [in] bio to be split
* @lim: [in] queue limits to split based on
* @segs: [out] number of segments in the bio with the first half of the sectors
- * @bs: [in] bio set to allocate the clone from
* @max_bytes: [in] maximum number of bytes per bio
*
- * Clone @bio, update the bi_iter of the clone to represent the first sectors
- * of @bio and update @bio->bi_iter to represent the remaining sectors. The
- * following is guaranteed for the cloned bio:
- * - That it has at most @max_bytes worth of data
- * - That it has at most queue_max_segments(@q) segments.
- *
- * Except for discard requests the cloned bio will point at the bi_io_vec of
- * the original bio. It is the responsibility of the caller to ensure that the
- * original bio is not freed before the cloned bio. The caller is also
- * responsible for ensuring that @bs is only destroyed after processing of the
- * split bio has finished.
+ * Find out if @bio needs to be split to fit the queue limits in @lim and a
+ * maximum size of @max_bytes. Returns a negative error number if @bio can't be
+ * split, 0 if the bio doesn't have to be split, or a positive sector offset if
+ * @bio needs to be split.
*/
-struct bio *bio_split_rw(struct bio *bio, const struct queue_limits *lim,
- unsigned *segs, struct bio_set *bs, unsigned max_bytes)
+int bio_split_rw_at(struct bio *bio, const struct queue_limits *lim,
+ unsigned *segs, unsigned max_bytes)
{
struct bio_vec bv, bvprv, *bvprvp = NULL;
struct bvec_iter iter;
@@ -324,22 +339,17 @@ struct bio *bio_split_rw(struct bio *bio, const struct queue_limits *lim,
}
*segs = nsegs;
- return NULL;
+ return 0;
split:
- if (bio->bi_opf & REQ_ATOMIC) {
- bio->bi_status = BLK_STS_INVAL;
- bio_endio(bio);
- return ERR_PTR(-EINVAL);
- }
+ if (bio->bi_opf & REQ_ATOMIC)
+ return -EINVAL;
+
/*
* We can't sanely support splitting for a REQ_NOWAIT bio. End it
* with EAGAIN if splitting is required and return an error pointer.
*/
- if (bio->bi_opf & REQ_NOWAIT) {
- bio->bi_status = BLK_STS_AGAIN;
- bio_endio(bio);
- return ERR_PTR(-EAGAIN);
- }
+ if (bio->bi_opf & REQ_NOWAIT)
+ return -EAGAIN;
*segs = nsegs;
@@ -356,58 +366,36 @@ split:
* big IO can be trival, disable iopoll when split needed.
*/
bio_clear_polled(bio);
- return bio_split(bio, bytes >> SECTOR_SHIFT, GFP_NOIO, bs);
+ return bytes >> SECTOR_SHIFT;
}
-EXPORT_SYMBOL_GPL(bio_split_rw);
+EXPORT_SYMBOL_GPL(bio_split_rw_at);
-/**
- * __bio_split_to_limits - split a bio to fit the queue limits
- * @bio: bio to be split
- * @lim: queue limits to split based on
- * @nr_segs: returns the number of segments in the returned bio
- *
- * Check if @bio needs splitting based on the queue limits, and if so split off
- * a bio fitting the limits from the beginning of @bio and return it. @bio is
- * shortened to the remainder and re-submitted.
+struct bio *bio_split_rw(struct bio *bio, const struct queue_limits *lim,
+ unsigned *nr_segs)
+{
+ return bio_submit_split(bio,
+ bio_split_rw_at(bio, lim, nr_segs,
+ get_max_io_size(bio, lim) << SECTOR_SHIFT));
+}
+
+/*
+ * REQ_OP_ZONE_APPEND bios must never be split by the block layer.
*
- * The split bio is allocated from @q->bio_split, which is provided by the
- * block layer.
+ * But we want the nr_segs calculation provided by bio_split_rw_at, and having
+ * a good sanity check that the submitter built the bio correctly is nice to
+ * have as well.
*/
-struct bio *__bio_split_to_limits(struct bio *bio,
- const struct queue_limits *lim,
- unsigned int *nr_segs)
+struct bio *bio_split_zone_append(struct bio *bio,
+ const struct queue_limits *lim, unsigned *nr_segs)
{
- struct bio_set *bs = &bio->bi_bdev->bd_disk->bio_split;
- struct bio *split;
-
- switch (bio_op(bio)) {
- case REQ_OP_DISCARD:
- case REQ_OP_SECURE_ERASE:
- split = bio_split_discard(bio, lim, nr_segs, bs);
- break;
- case REQ_OP_WRITE_ZEROES:
- split = bio_split_write_zeroes(bio, lim, nr_segs, bs);
- break;
- default:
- split = bio_split_rw(bio, lim, nr_segs, bs,
- get_max_io_size(bio, lim) << SECTOR_SHIFT);
- if (IS_ERR(split))
- return NULL;
- break;
- }
-
- if (split) {
- /* there isn't chance to merge the split bio */
- split->bi_opf |= REQ_NOMERGE;
-
- blkcg_bio_issue_init(split);
- bio_chain(split, bio);
- trace_block_split(split, bio->bi_iter.bi_sector);
- WARN_ON_ONCE(bio_zone_write_plugging(bio));
- submit_bio_noacct(bio);
- return split;
- }
- return bio;
+ unsigned int max_sectors = queue_limits_max_zone_append_sectors(lim);
+ int split_sectors;
+
+ split_sectors = bio_split_rw_at(bio, lim, nr_segs,
+ max_sectors << SECTOR_SHIFT);
+ if (WARN_ON_ONCE(split_sectors > 0))
+ split_sectors = -EINVAL;
+ return bio_submit_split(bio, split_sectors);
}
/**
@@ -426,9 +414,7 @@ struct bio *bio_split_to_limits(struct bio *bio)
const struct queue_limits *lim = &bdev_get_queue(bio->bi_bdev)->limits;
unsigned int nr_segs;
- if (bio_may_exceed_limits(bio, lim))
- return __bio_split_to_limits(bio, lim, &nr_segs);
- return bio;
+ return __bio_split_to_limits(bio, lim, &nr_segs);
}
EXPORT_SYMBOL(bio_split_to_limits);
diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c
index cc57e2dd9a0b..2cafcf11ee8b 100644
--- a/block/blk-mq-tag.c
+++ b/block/blk-mq-tag.c
@@ -38,6 +38,7 @@ static void blk_mq_update_wake_batch(struct blk_mq_tags *tags,
void __blk_mq_tag_busy(struct blk_mq_hw_ctx *hctx)
{
unsigned int users;
+ unsigned long flags;
struct blk_mq_tags *tags = hctx->tags;
/*
@@ -56,11 +57,11 @@ void __blk_mq_tag_busy(struct blk_mq_hw_ctx *hctx)
return;
}
- spin_lock_irq(&tags->lock);
+ spin_lock_irqsave(&tags->lock, flags);
users = tags->active_queues + 1;
WRITE_ONCE(tags->active_queues, users);
blk_mq_update_wake_batch(tags, users);
- spin_unlock_irq(&tags->lock);
+ spin_unlock_irqrestore(&tags->lock, flags);
}
/*
diff --git a/block/blk-mq.c b/block/blk-mq.c
index aa28157b1aaf..831c5cf5d874 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -2753,6 +2753,7 @@ static void blk_mq_dispatch_plug_list(struct blk_plug *plug, bool from_sched)
void blk_mq_flush_plug_list(struct blk_plug *plug, bool from_schedule)
{
struct request *rq;
+ unsigned int depth;
/*
* We may have been called recursively midway through handling
@@ -2763,6 +2764,7 @@ void blk_mq_flush_plug_list(struct blk_plug *plug, bool from_schedule)
*/
if (plug->rq_count == 0)
return;
+ depth = plug->rq_count;
plug->rq_count = 0;
if (!plug->multiple_queues && !plug->has_elevator && !from_schedule) {
@@ -2770,6 +2772,7 @@ void blk_mq_flush_plug_list(struct blk_plug *plug, bool from_schedule)
rq = rq_list_peek(&plug->mq_list);
q = rq->q;
+ trace_block_unplug(q, depth, true);
/*
* Peek first request and see if we have a ->queue_rqs() hook.
@@ -2939,7 +2942,7 @@ void blk_mq_submit_bio(struct bio *bio)
struct blk_plug *plug = current->plug;
const int is_sync = op_is_sync(bio->bi_opf);
struct blk_mq_hw_ctx *hctx;
- unsigned int nr_segs = 1;
+ unsigned int nr_segs;
struct request *rq;
blk_status_t ret;
@@ -2981,11 +2984,10 @@ void blk_mq_submit_bio(struct bio *bio)
goto queue_exit;
}
- if (unlikely(bio_may_exceed_limits(bio, &q->limits))) {
- bio = __bio_split_to_limits(bio, &q->limits, &nr_segs);
- if (!bio)
- goto queue_exit;
- }
+ bio = __bio_split_to_limits(bio, &q->limits, &nr_segs);
+ if (!bio)
+ goto queue_exit;
+
if (!bio_integrity_prep(bio))
goto queue_exit;
diff --git a/block/blk-rq-qos.c b/block/blk-rq-qos.c
index dd7310c94713..2cfb297d9a62 100644
--- a/block/blk-rq-qos.c
+++ b/block/blk-rq-qos.c
@@ -263,7 +263,7 @@ void rq_qos_wait(struct rq_wait *rqw, void *private_data,
has_sleeper = !prepare_to_wait_exclusive(&rqw->wait, &data.wq,
TASK_UNINTERRUPTIBLE);
do {
- /* The memory barrier in set_task_state saves us here. */
+ /* The memory barrier in set_current_state saves us here. */
if (data.got_token)
break;
if (!has_sleeper && acquire_inflight_cb(rqw, private_data)) {
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index 60116d13cb80..e85941bec857 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -23,6 +23,7 @@
struct queue_sysfs_entry {
struct attribute attr;
ssize_t (*show)(struct gendisk *disk, char *page);
+ int (*load_module)(struct gendisk *disk, const char *page, size_t count);
ssize_t (*store)(struct gendisk *disk, const char *page, size_t count);
};
@@ -413,6 +414,14 @@ static struct queue_sysfs_entry _prefix##_entry = { \
.store = _prefix##_store, \
};
+#define QUEUE_RW_LOAD_MODULE_ENTRY(_prefix, _name) \
+static struct queue_sysfs_entry _prefix##_entry = { \
+ .attr = { .name = _name, .mode = 0644 }, \
+ .show = _prefix##_show, \
+ .load_module = _prefix##_load_module, \
+ .store = _prefix##_store, \
+}
+
QUEUE_RW_ENTRY(queue_requests, "nr_requests");
QUEUE_RW_ENTRY(queue_ra, "read_ahead_kb");
QUEUE_RW_ENTRY(queue_max_sectors, "max_sectors_kb");
@@ -420,7 +429,7 @@ QUEUE_RO_ENTRY(queue_max_hw_sectors, "max_hw_sectors_kb");
QUEUE_RO_ENTRY(queue_max_segments, "max_segments");
QUEUE_RO_ENTRY(queue_max_integrity_segments, "max_integrity_segments");
QUEUE_RO_ENTRY(queue_max_segment_size, "max_segment_size");
-QUEUE_RW_ENTRY(elv_iosched, "scheduler");
+QUEUE_RW_LOAD_MODULE_ENTRY(elv_iosched, "scheduler");
QUEUE_RO_ENTRY(queue_logical_block_size, "logical_block_size");
QUEUE_RO_ENTRY(queue_physical_block_size, "physical_block_size");
@@ -670,6 +679,17 @@ queue_attr_store(struct kobject *kobj, struct attribute *attr,
if (!entry->store)
return -EIO;
+ /*
+ * If the attribute needs to load a module, do it before freezing the
+ * queue to ensure that the module file can be read when the request
+ * queue is the one for the device storing the module file.
+ */
+ if (entry->load_module) {
+ res = entry->load_module(disk, page, length);
+ if (res)
+ return res;
+ }
+
blk_mq_freeze_queue(q);
mutex_lock(&q->sysfs_lock);
res = entry->store(disk, page, length);
diff --git a/block/blk-throttle.c b/block/blk-throttle.c
index dc6140fa3de0..2c4192e12efa 100644
--- a/block/blk-throttle.c
+++ b/block/blk-throttle.c
@@ -31,14 +31,6 @@ static struct workqueue_struct *kthrotld_workqueue;
#define rb_entry_tg(node) rb_entry((node), struct throtl_grp, rb_node)
-/* We measure latency for request size from <= 4k to >= 1M */
-#define LATENCY_BUCKET_SIZE 9
-
-struct latency_bucket {
- unsigned long total_latency; /* ns / 1024 */
- int samples;
-};
-
struct throtl_data
{
/* service tree for active throtl groups */
@@ -116,9 +108,6 @@ static unsigned int tg_iops_limit(struct throtl_grp *tg, int rw)
return tg->iops[rw];
}
-#define request_bucket_index(sectors) \
- clamp_t(int, order_base_2(sectors) - 3, 0, LATENCY_BUCKET_SIZE - 1)
-
/**
* throtl_log - log debug message via blktrace
* @sq: the service_queue being reported
@@ -1595,6 +1584,22 @@ void blk_throtl_cancel_bios(struct gendisk *disk)
spin_unlock_irq(&q->queue_lock);
}
+static bool tg_within_limit(struct throtl_grp *tg, struct bio *bio, bool rw)
+{
+ /* throtl is FIFO - if bios are already queued, should queue */
+ if (tg->service_queue.nr_queued[rw])
+ return false;
+
+ return tg_may_dispatch(tg, bio, NULL);
+}
+
+static void tg_dispatch_in_debt(struct throtl_grp *tg, struct bio *bio, bool rw)
+{
+ if (!bio_flagged(bio, BIO_BPS_THROTTLED))
+ tg->carryover_bytes[rw] -= throtl_bio_data_size(bio);
+ tg->carryover_ios[rw]--;
+}
+
bool __blk_throtl_bio(struct bio *bio)
{
struct request_queue *q = bdev_get_queue(bio->bi_bdev);
@@ -1611,34 +1616,35 @@ bool __blk_throtl_bio(struct bio *bio)
sq = &tg->service_queue;
while (true) {
- if (tg->last_low_overflow_time[rw] == 0)
- tg->last_low_overflow_time[rw] = jiffies;
- /* throtl is FIFO - if bios are already queued, should queue */
- if (sq->nr_queued[rw])
- break;
-
- /* if above limits, break to queue */
- if (!tg_may_dispatch(tg, bio, NULL)) {
- tg->last_low_overflow_time[rw] = jiffies;
+ if (tg_within_limit(tg, bio, rw)) {
+ /* within limits, let's charge and dispatch directly */
+ throtl_charge_bio(tg, bio);
+
+ /*
+ * We need to trim slice even when bios are not being
+ * queued otherwise it might happen that a bio is not
+ * queued for a long time and slice keeps on extending
+ * and trim is not called for a long time. Now if limits
+ * are reduced suddenly we take into account all the IO
+ * dispatched so far at new low rate and * newly queued
+ * IO gets a really long dispatch time.
+ *
+ * So keep on trimming slice even if bio is not queued.
+ */
+ throtl_trim_slice(tg, rw);
+ } else if (bio_issue_as_root_blkg(bio)) {
+ /*
+ * IOs which may cause priority inversions are
+ * dispatched directly, even if they're over limit.
+ * Debts are handled by carryover_bytes/ios while
+ * calculating wait time.
+ */
+ tg_dispatch_in_debt(tg, bio, rw);
+ } else {
+ /* if above limits, break to queue */
break;
}
- /* within limits, let's charge and dispatch directly */
- throtl_charge_bio(tg, bio);
-
- /*
- * We need to trim slice even when bios are not being queued
- * otherwise it might happen that a bio is not queued for
- * a long time and slice keeps on extending and trim is not
- * called for a long time. Now if limits are reduced suddenly
- * we take into account all the IO dispatched so far at new
- * low rate and * newly queued IO gets a really long dispatch
- * time.
- *
- * So keep on trimming slice even if bio is not queued.
- */
- throtl_trim_slice(tg, rw);
-
/*
* @bio passed through this layer without being throttled.
* Climb up the ladder. If we're already at the top, it
@@ -1661,8 +1667,6 @@ bool __blk_throtl_bio(struct bio *bio)
tg->io_disp[rw], tg_iops_limit(tg, rw),
sq->nr_queued[READ], sq->nr_queued[WRITE]);
- tg->last_low_overflow_time[rw] = jiffies;
-
td->nr_queued[rw]++;
throtl_add_bio_tg(bio, qn, tg);
throttled = true;
diff --git a/block/blk-throttle.h b/block/blk-throttle.h
index 4d9ef5abdf21..1a36d1278eea 100644
--- a/block/blk-throttle.h
+++ b/block/blk-throttle.h
@@ -106,8 +106,6 @@ struct throtl_grp {
/* Number of bio's dispatched in current slice */
unsigned int io_disp[2];
- unsigned long last_low_overflow_time[2];
-
uint64_t last_bytes_disp[2];
unsigned int last_io_disp[2];
diff --git a/block/blk.h b/block/blk.h
index e180863f918b..c718e4291db0 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -331,33 +331,67 @@ ssize_t part_timeout_show(struct device *, struct device_attribute *, char *);
ssize_t part_timeout_store(struct device *, struct device_attribute *,
const char *, size_t);
-static inline bool bio_may_exceed_limits(struct bio *bio,
- const struct queue_limits *lim)
+struct bio *bio_split_discard(struct bio *bio, const struct queue_limits *lim,
+ unsigned *nsegs);
+struct bio *bio_split_write_zeroes(struct bio *bio,
+ const struct queue_limits *lim, unsigned *nsegs);
+struct bio *bio_split_rw(struct bio *bio, const struct queue_limits *lim,
+ unsigned *nr_segs);
+struct bio *bio_split_zone_append(struct bio *bio,
+ const struct queue_limits *lim, unsigned *nr_segs);
+
+/*
+ * All drivers must accept single-segments bios that are smaller than PAGE_SIZE.
+ *
+ * This is a quick and dirty check that relies on the fact that bi_io_vec[0] is
+ * always valid if a bio has data. The check might lead to occasional false
+ * positives when bios are cloned, but compared to the performance impact of
+ * cloned bios themselves the loop below doesn't matter anyway.
+ */
+static inline bool bio_may_need_split(struct bio *bio,
+ const struct queue_limits *lim)
+{
+ return lim->chunk_sectors || bio->bi_vcnt != 1 ||
+ bio->bi_io_vec->bv_len + bio->bi_io_vec->bv_offset > PAGE_SIZE;
+}
+
+/**
+ * __bio_split_to_limits - split a bio to fit the queue limits
+ * @bio: bio to be split
+ * @lim: queue limits to split based on
+ * @nr_segs: returns the number of segments in the returned bio
+ *
+ * Check if @bio needs splitting based on the queue limits, and if so split off
+ * a bio fitting the limits from the beginning of @bio and return it. @bio is
+ * shortened to the remainder and re-submitted.
+ *
+ * The split bio is allocated from @q->bio_split, which is provided by the
+ * block layer.
+ */
+static inline struct bio *__bio_split_to_limits(struct bio *bio,
+ const struct queue_limits *lim, unsigned int *nr_segs)
{
switch (bio_op(bio)) {
+ case REQ_OP_READ:
+ case REQ_OP_WRITE:
+ if (bio_may_need_split(bio, lim))
+ return bio_split_rw(bio, lim, nr_segs);
+ *nr_segs = 1;
+ return bio;
+ case REQ_OP_ZONE_APPEND:
+ return bio_split_zone_append(bio, lim, nr_segs);
case REQ_OP_DISCARD:
case REQ_OP_SECURE_ERASE:
+ return bio_split_discard(bio, lim, nr_segs);
case REQ_OP_WRITE_ZEROES:
- return true; /* non-trivial splitting decisions */
+ return bio_split_write_zeroes(bio, lim, nr_segs);
default:
- break;
+ /* other operations can't be split */
+ *nr_segs = 0;
+ return bio;
}
-
- /*
- * All drivers must accept single-segments bios that are <= PAGE_SIZE.
- * This is a quick and dirty check that relies on the fact that
- * bi_io_vec[0] is always valid if a bio has data. The check might
- * lead to occasional false negatives when bios are cloned, but compared
- * to the performance impact of cloned bios themselves the loop below
- * doesn't matter anyway.
- */
- return lim->chunk_sectors || bio->bi_vcnt != 1 ||
- bio->bi_io_vec->bv_len + bio->bi_io_vec->bv_offset > PAGE_SIZE;
}
-struct bio *__bio_split_to_limits(struct bio *bio,
- const struct queue_limits *lim,
- unsigned int *nr_segs);
int ll_back_merge_fn(struct request *req, struct bio *bio,
unsigned int nr_segs);
bool blk_attempt_req_merge(struct request_queue *q, struct request *rq,
@@ -540,6 +574,10 @@ int bio_add_hw_page(struct request_queue *q, struct bio *bio,
struct page *page, unsigned int len, unsigned int offset,
unsigned int max_sectors, bool *same_page);
+int bio_add_hw_folio(struct request_queue *q, struct bio *bio,
+ struct folio *folio, size_t len, size_t offset,
+ unsigned int max_sectors, bool *same_page);
+
/*
* Clean up a page appropriately, where the page may be pinned, may have a
* ref taken on it or neither.
@@ -571,6 +609,7 @@ blk_mode_t file_to_blk_mode(struct file *file);
int truncate_bdev_range(struct block_device *bdev, blk_mode_t mode,
loff_t lstart, loff_t lend);
long blkdev_ioctl(struct file *file, unsigned cmd, unsigned long arg);
+int blkdev_uring_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags);
long compat_blkdev_ioctl(struct file *file, unsigned cmd, unsigned long arg);
extern const struct address_space_operations def_blk_aops;
diff --git a/block/elevator.c b/block/elevator.c
index f13d552a32c8..c355b55d0107 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -698,17 +698,26 @@ static int elevator_change(struct request_queue *q, const char *elevator_name)
return 0;
e = elevator_find_get(q, elevator_name);
- if (!e) {
- request_module("%s-iosched", elevator_name);
- e = elevator_find_get(q, elevator_name);
- if (!e)
- return -EINVAL;
- }
+ if (!e)
+ return -EINVAL;
ret = elevator_switch(q, e);
elevator_put(e);
return ret;
}
+int elv_iosched_load_module(struct gendisk *disk, const char *buf,
+ size_t count)
+{
+ char elevator_name[ELV_NAME_MAX];
+
+ if (!elv_support_iosched(disk->queue))
+ return -EOPNOTSUPP;
+
+ strscpy(elevator_name, buf, sizeof(elevator_name));
+
+ return request_module("%s-iosched", strstrip(elevator_name));
+}
+
ssize_t elv_iosched_store(struct gendisk *disk, const char *buf,
size_t count)
{
diff --git a/block/elevator.h b/block/elevator.h
index 3fe18e1a8692..2a78544bf201 100644
--- a/block/elevator.h
+++ b/block/elevator.h
@@ -148,6 +148,8 @@ extern void elv_unregister(struct elevator_type *);
* io scheduler sysfs switching
*/
ssize_t elv_iosched_show(struct gendisk *disk, char *page);
+int elv_iosched_load_module(struct gendisk *disk, const char *page,
+ size_t count);
ssize_t elv_iosched_store(struct gendisk *disk, const char *page, size_t count);
extern bool elv_bio_merge_ok(struct request *, struct bio *);
diff --git a/block/fops.c b/block/fops.c
index 9825c1713a49..e69deb6d8635 100644
--- a/block/fops.c
+++ b/block/fops.c
@@ -17,6 +17,7 @@
#include <linux/fs.h>
#include <linux/iomap.h>
#include <linux/module.h>
+#include <linux/io_uring/cmd.h>
#include "blk.h"
static inline struct inode *bdev_file_inode(struct file *file)
@@ -451,20 +452,20 @@ static void blkdev_readahead(struct readahead_control *rac)
}
static int blkdev_write_begin(struct file *file, struct address_space *mapping,
- loff_t pos, unsigned len, struct page **pagep, void **fsdata)
+ loff_t pos, unsigned len, struct folio **foliop, void **fsdata)
{
- return block_write_begin(mapping, pos, len, pagep, blkdev_get_block);
+ return block_write_begin(mapping, pos, len, foliop, blkdev_get_block);
}
static int blkdev_write_end(struct file *file, struct address_space *mapping,
- loff_t pos, unsigned len, unsigned copied, struct page *page,
+ loff_t pos, unsigned len, unsigned copied, struct folio *folio,
void *fsdata)
{
int ret;
- ret = block_write_end(file, mapping, pos, len, copied, page, fsdata);
+ ret = block_write_end(file, mapping, pos, len, copied, folio, fsdata);
- unlock_page(page);
- put_page(page);
+ folio_unlock(folio);
+ folio_put(folio);
return ret;
}
@@ -771,7 +772,7 @@ reexpand:
#define BLKDEV_FALLOC_FL_SUPPORTED \
(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE | \
- FALLOC_FL_ZERO_RANGE | FALLOC_FL_NO_HIDE_STALE)
+ FALLOC_FL_ZERO_RANGE)
static long blkdev_fallocate(struct file *file, int mode, loff_t start,
loff_t len)
@@ -830,14 +831,6 @@ static long blkdev_fallocate(struct file *file, int mode, loff_t start,
len >> SECTOR_SHIFT, GFP_KERNEL,
BLKDEV_ZERO_NOFALLBACK);
break;
- case FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE | FALLOC_FL_NO_HIDE_STALE:
- error = truncate_bdev_range(bdev, file_to_blk_mode(file), start, end);
- if (error)
- goto fail;
-
- error = blkdev_issue_discard(bdev, start >> SECTOR_SHIFT,
- len >> SECTOR_SHIFT, GFP_KERNEL);
- break;
default:
error = -EOPNOTSUPP;
}
@@ -873,6 +866,7 @@ const struct file_operations def_blk_fops = {
.splice_read = filemap_splice_read,
.splice_write = iter_file_splice_write,
.fallocate = blkdev_fallocate,
+ .uring_cmd = blkdev_uring_cmd,
.fop_flags = FOP_BUFFER_RASYNC,
};
diff --git a/block/ioctl.c b/block/ioctl.c
index e8e4a4190f18..6554b728bae6 100644
--- a/block/ioctl.c
+++ b/block/ioctl.c
@@ -11,6 +11,9 @@
#include <linux/blktrace_api.h>
#include <linux/pr.h>
#include <linux/uaccess.h>
+#include <linux/pagemap.h>
+#include <linux/io_uring/cmd.h>
+#include <uapi/linux/blkdev.h>
#include "blk.h"
static int blkpg_do_ioctl(struct block_device *bdev,
@@ -92,38 +95,51 @@ static int compat_blkpg_ioctl(struct block_device *bdev,
}
#endif
+/*
+ * Check that [start, start + len) is a valid range from the block device's
+ * perspective, including verifying that it can be correctly translated into
+ * logical block addresses.
+ */
+static int blk_validate_byte_range(struct block_device *bdev,
+ uint64_t start, uint64_t len)
+{
+ unsigned int bs_mask = bdev_logical_block_size(bdev) - 1;
+ uint64_t end;
+
+ if ((start | len) & bs_mask)
+ return -EINVAL;
+ if (!len)
+ return -EINVAL;
+ if (check_add_overflow(start, len, &end) || end > bdev_nr_bytes(bdev))
+ return -EINVAL;
+
+ return 0;
+}
+
static int blk_ioctl_discard(struct block_device *bdev, blk_mode_t mode,
unsigned long arg)
{
- unsigned int bs_mask = bdev_logical_block_size(bdev) - 1;
- uint64_t range[2], start, len, end;
+ uint64_t range[2], start, len;
struct bio *prev = NULL, *bio;
sector_t sector, nr_sects;
struct blk_plug plug;
int err;
- if (!(mode & BLK_OPEN_WRITE))
- return -EBADF;
-
- if (!bdev_max_discard_sectors(bdev))
- return -EOPNOTSUPP;
- if (bdev_read_only(bdev))
- return -EPERM;
-
if (copy_from_user(range, (void __user *)arg, sizeof(range)))
return -EFAULT;
-
start = range[0];
len = range[1];
- if (!len)
- return -EINVAL;
- if ((start | len) & bs_mask)
- return -EINVAL;
+ if (!bdev_max_discard_sectors(bdev))
+ return -EOPNOTSUPP;
- if (check_add_overflow(start, len, &end) ||
- end > bdev_nr_bytes(bdev))
- return -EINVAL;
+ if (!(mode & BLK_OPEN_WRITE))
+ return -EBADF;
+ if (bdev_read_only(bdev))
+ return -EPERM;
+ err = blk_validate_byte_range(bdev, start, len);
+ if (err)
+ return err;
filemap_invalidate_lock(bdev->bd_mapping);
err = truncate_bdev_range(bdev, mode, start, start + len - 1);
@@ -163,7 +179,7 @@ fail:
static int blk_ioctl_secure_erase(struct block_device *bdev, blk_mode_t mode,
void __user *argp)
{
- uint64_t start, len;
+ uint64_t start, len, end;
uint64_t range[2];
int err;
@@ -178,11 +194,12 @@ static int blk_ioctl_secure_erase(struct block_device *bdev, blk_mode_t mode,
len = range[1];
if ((start & 511) || (len & 511))
return -EINVAL;
- if (start + len > bdev_nr_bytes(bdev))
+ if (check_add_overflow(start, len, &end) ||
+ end > bdev_nr_bytes(bdev))
return -EINVAL;
filemap_invalidate_lock(bdev->bd_mapping);
- err = truncate_bdev_range(bdev, mode, start, start + len - 1);
+ err = truncate_bdev_range(bdev, mode, start, end - 1);
if (!err)
err = blkdev_issue_secure_erase(bdev, start >> 9, len >> 9,
GFP_KERNEL);
@@ -734,3 +751,112 @@ long compat_blkdev_ioctl(struct file *file, unsigned cmd, unsigned long arg)
return ret;
}
#endif
+
+struct blk_iou_cmd {
+ int res;
+ bool nowait;
+};
+
+static void blk_cmd_complete(struct io_uring_cmd *cmd, unsigned int issue_flags)
+{
+ struct blk_iou_cmd *bic = io_uring_cmd_to_pdu(cmd, struct blk_iou_cmd);
+
+ if (bic->res == -EAGAIN && bic->nowait)
+ io_uring_cmd_issue_blocking(cmd);
+ else
+ io_uring_cmd_done(cmd, bic->res, 0, issue_flags);
+}
+
+static void bio_cmd_bio_end_io(struct bio *bio)
+{
+ struct io_uring_cmd *cmd = bio->bi_private;
+ struct blk_iou_cmd *bic = io_uring_cmd_to_pdu(cmd, struct blk_iou_cmd);
+
+ if (unlikely(bio->bi_status) && !bic->res)
+ bic->res = blk_status_to_errno(bio->bi_status);
+
+ io_uring_cmd_do_in_task_lazy(cmd, blk_cmd_complete);
+ bio_put(bio);
+}
+
+static int blkdev_cmd_discard(struct io_uring_cmd *cmd,
+ struct block_device *bdev,
+ uint64_t start, uint64_t len, bool nowait)
+{
+ struct blk_iou_cmd *bic = io_uring_cmd_to_pdu(cmd, struct blk_iou_cmd);
+ gfp_t gfp = nowait ? GFP_NOWAIT : GFP_KERNEL;
+ sector_t sector = start >> SECTOR_SHIFT;
+ sector_t nr_sects = len >> SECTOR_SHIFT;
+ struct bio *prev = NULL, *bio;
+ int err;
+
+ if (!bdev_max_discard_sectors(bdev))
+ return -EOPNOTSUPP;
+ if (!(file_to_blk_mode(cmd->file) & BLK_OPEN_WRITE))
+ return -EBADF;
+ if (bdev_read_only(bdev))
+ return -EPERM;
+ err = blk_validate_byte_range(bdev, start, len);
+ if (err)
+ return err;
+
+ err = filemap_invalidate_pages(bdev->bd_mapping, start,
+ start + len - 1, nowait);
+ if (err)
+ return err;
+
+ while (true) {
+ bio = blk_alloc_discard_bio(bdev, &sector, &nr_sects, gfp);
+ if (!bio)
+ break;
+ if (nowait) {
+ /*
+ * Don't allow multi-bio non-blocking submissions as
+ * subsequent bios may fail but we won't get a direct
+ * indication of that. Normally, the caller should
+ * retry from a blocking context.
+ */
+ if (unlikely(nr_sects)) {
+ bio_put(bio);
+ return -EAGAIN;
+ }
+ bio->bi_opf |= REQ_NOWAIT;
+ }
+
+ prev = bio_chain_and_submit(prev, bio);
+ }
+ if (unlikely(!prev))
+ return -EAGAIN;
+ if (unlikely(nr_sects))
+ bic->res = -EAGAIN;
+
+ prev->bi_private = cmd;
+ prev->bi_end_io = bio_cmd_bio_end_io;
+ submit_bio(prev);
+ return -EIOCBQUEUED;
+}
+
+int blkdev_uring_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags)
+{
+ struct block_device *bdev = I_BDEV(cmd->file->f_mapping->host);
+ struct blk_iou_cmd *bic = io_uring_cmd_to_pdu(cmd, struct blk_iou_cmd);
+ const struct io_uring_sqe *sqe = cmd->sqe;
+ u32 cmd_op = cmd->cmd_op;
+ uint64_t start, len;
+
+ if (unlikely(sqe->ioprio || sqe->__pad1 || sqe->len ||
+ sqe->rw_flags || sqe->file_index))
+ return -EINVAL;
+
+ bic->res = 0;
+ bic->nowait = issue_flags & IO_URING_F_NONBLOCK;
+
+ start = READ_ONCE(sqe->addr);
+ len = READ_ONCE(sqe->addr3);
+
+ switch (cmd_op) {
+ case BLOCK_URING_CMD_DISCARD:
+ return blkdev_cmd_discard(cmd, bdev, start, len, bic->nowait);
+ }
+ return -EINVAL;
+}
diff --git a/block/partitions/core.c b/block/partitions/core.c
index ab76e64f0f6c..5bd7a603092e 100644
--- a/block/partitions/core.c
+++ b/block/partitions/core.c
@@ -555,9 +555,11 @@ static bool blk_add_partition(struct gendisk *disk,
part = add_partition(disk, p, from, size, state->parts[p].flags,
&state->parts[p].info);
- if (IS_ERR(part) && PTR_ERR(part) != -ENXIO) {
- printk(KERN_ERR " %s: p%d could not be added: %pe\n",
- disk->disk_name, p, part);
+ if (IS_ERR(part)) {
+ if (PTR_ERR(part) != -ENXIO) {
+ printk(KERN_ERR " %s: p%d could not be added: %pe\n",
+ disk->disk_name, p, part);
+ }
return true;
}
diff --git a/block/t10-pi.c b/block/t10-pi.c
index 425e2836f3e1..e7052a728966 100644
--- a/block/t10-pi.c
+++ b/block/t10-pi.c
@@ -8,7 +8,6 @@
#include <linux/blk-integrity.h>
#include <linux/crc-t10dif.h>
#include <linux/crc64.h>
-#include <linux/module.h>
#include <net/checksum.h>
#include <asm/unaligned.h>
#include "blk.h"
@@ -240,9 +239,9 @@ static void ext_pi_crc64_generate(struct blk_integrity_iter *iter,
}
}
-static bool ext_pi_ref_escape(u8 *ref_tag)
+static bool ext_pi_ref_escape(const u8 ref_tag[6])
{
- static u8 ref_escape[6] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
+ static const u8 ref_escape[6] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
return memcmp(ref_tag, ref_escape, sizeof(ref_escape)) == 0;
}
@@ -472,6 +471,3 @@ void blk_integrity_complete(struct request *rq, unsigned int nr_bytes)
else
t10_pi_type1_complete(rq, nr_bytes);
}
-
-MODULE_DESCRIPTION("T10 Protection Information module");
-MODULE_LICENSE("GPL");