From 2f578aaf51624aa6fcff041fc7dc5c2d4dfa447f Mon Sep 17 00:00:00 2001 From: Minwoo Im Date: Sun, 9 Jun 2019 05:15:51 +0900 Subject: block: move tag field position in struct request __data_len and __sector are internal fields which should not be accessed directly in driver-level like the comment above it. But, tag field can be accessed by driver level directly so that we need to make the comment right by moving it to some other place. Cc: Jens Axboe Cc: linux-block@vger.kernel.org Signed-off-by: Minwoo Im Reviewed-by: Chaitanya Kulkarni Reviewed-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 592669bcc536..90e6914bea0c 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -137,11 +137,11 @@ struct request { unsigned int cmd_flags; /* op and common flags */ req_flags_t rq_flags; + int tag; int internal_tag; /* the following two fields are internal, NEVER access directly */ unsigned int __data_len; /* total data len */ - int tag; sector_t __sector; /* sector cursor */ struct bio *bio; -- cgit v1.2.3-58-ga151 From 3a211b71529fdd0a89095b18fb19155db0c8fb5d Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Thu, 23 May 2019 18:43:11 +0300 Subject: blk-core: Remove blk_end_request*() declarations Commit a1ce35fa49852db60fc6e268 ("block: remove dead elevator code") deleted blk_end_request() and friends, but some declaration are still left. Purge them. Signed-off-by: Pavel Begunkov Signed-off-by: Jens Axboe --- block/blk-core.c | 2 +- include/linux/blkdev.h | 12 ------------ 2 files changed, 1 insertion(+), 13 deletions(-) (limited to 'include') diff --git a/block/blk-core.c b/block/blk-core.c index 8340f69670d8..94c6520bc786 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -1348,7 +1348,7 @@ EXPORT_SYMBOL_GPL(blk_steal_bios); * * This special helper function is only for request stacking drivers * (e.g. request-based dm) so that they can handle partial completion. - * Actual device drivers should use blk_end_request instead. + * Actual device drivers should use blk_mq_end_request instead. * * Passing the result of blk_rq_bytes() as @nr_bytes guarantees * %false return from this function. diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 90e6914bea0c..ad49a775c54f 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1026,21 +1026,9 @@ void blk_steal_bios(struct bio_list *list, struct request *rq); * * blk_update_request() completes given number of bytes and updates * the request without completing it. - * - * blk_end_request() and friends. __blk_end_request() must be called - * with the request queue spinlock acquired. - * - * Several drivers define their own end_request and call - * blk_end_request() for parts of the original function. - * This prevents code duplication in drivers. */ extern bool blk_update_request(struct request *rq, blk_status_t error, unsigned int nr_bytes); -extern void blk_end_request_all(struct request *rq, blk_status_t error); -extern bool __blk_end_request(struct request *rq, blk_status_t error, - unsigned int nr_bytes); -extern void __blk_end_request_all(struct request *rq, blk_status_t error); -extern bool __blk_end_request_cur(struct request *rq, blk_status_t error); extern void __blk_complete_request(struct request *); extern void blk_abort_request(struct request *); -- cgit v1.2.3-58-ga151 From f924cddebc900f7cb10d5538d69523e558fa681c Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 6 Jun 2019 12:29:00 +0200 Subject: block: remove blk_init_request_from_bio MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit lightnvm should have never used this function, as it is sending passthrough requests, so switch it to blk_rq_append_bio like all the other passthrough request users. Inline blk_init_request_from_bio into the only remaining caller. Reviewed-by: Hannes Reinecke Reviewed-by: Minwoo Im Reviewed-by: Javier González Reviewed-by: Matias Bjørling Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- block/blk-core.c | 11 ----------- block/blk-mq.c | 7 ++++++- drivers/nvme/host/lightnvm.c | 2 +- include/linux/blkdev.h | 1 - 4 files changed, 7 insertions(+), 14 deletions(-) (limited to 'include') diff --git a/block/blk-core.c b/block/blk-core.c index b6f22f219389..d1c7c69a20dd 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -687,17 +687,6 @@ bool blk_attempt_plug_merge(struct request_queue *q, struct bio *bio, return false; } -void blk_init_request_from_bio(struct request *req, struct bio *bio) -{ - if (bio->bi_opf & REQ_RAHEAD) - req->cmd_flags |= REQ_FAILFAST_MASK; - - req->__sector = bio->bi_iter.bi_sector; - req->write_hint = bio->bi_write_hint; - blk_rq_bio_prep(req->q, req, bio); -} -EXPORT_SYMBOL_GPL(blk_init_request_from_bio); - static void handle_bad_sector(struct bio *bio, sector_t maxsector) { char b[BDEVNAME_SIZE]; diff --git a/block/blk-mq.c b/block/blk-mq.c index ce0f5f4ede70..61457bffa55f 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -1766,7 +1766,12 @@ void blk_mq_flush_plug_list(struct blk_plug *plug, bool from_schedule) static void blk_mq_bio_to_request(struct request *rq, struct bio *bio) { - blk_init_request_from_bio(rq, bio); + if (bio->bi_opf & REQ_RAHEAD) + rq->cmd_flags |= REQ_FAILFAST_MASK; + + rq->__sector = bio->bi_iter.bi_sector; + rq->write_hint = bio->bi_write_hint; + blk_rq_bio_prep(rq->q, rq, bio); blk_account_io_start(rq, true); } diff --git a/drivers/nvme/host/lightnvm.c b/drivers/nvme/host/lightnvm.c index 4f20a10b39d3..ba009d4c9dfa 100644 --- a/drivers/nvme/host/lightnvm.c +++ b/drivers/nvme/host/lightnvm.c @@ -660,7 +660,7 @@ static struct request *nvme_nvm_alloc_request(struct request_queue *q, rq->cmd_flags &= ~REQ_FAILFAST_DRIVER; if (rqd->bio) - blk_init_request_from_bio(rq, rqd->bio); + blk_rq_append_bio(rq, &rqd->bio); else rq->ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, IOPRIO_NORM); diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index ad49a775c54f..2d4dfe82767a 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -828,7 +828,6 @@ extern void blk_unregister_queue(struct gendisk *disk); extern blk_qc_t generic_make_request(struct bio *bio); extern blk_qc_t direct_make_request(struct bio *bio); extern void blk_rq_init(struct request_queue *q, struct request *rq); -extern void blk_init_request_from_bio(struct request *req, struct bio *bio); extern void blk_put_request(struct request *); extern struct request *blk_get_request(struct request_queue *, unsigned int op, blk_mq_req_flags_t flags); -- cgit v1.2.3-58-ga151 From 14ccb66b3f585b2bc21e7256c96090abed5a512c Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 6 Jun 2019 12:29:01 +0200 Subject: block: remove the bi_phys_segments field in struct bio We only need the number of segments in the blk-mq submission path. Remove the field from struct bio, and return it from a variant of blk_queue_split instead of that it can passed as an argument to those functions that need the value. This also means we stop recounting segments except for cloning and partial segments. To keep the number of arguments in this how path down remove pointless struct request_queue arguments from any of the functions that had it and grew a nr_segs argument. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- Documentation/block/biodoc.txt | 1 - block/bfq-iosched.c | 5 +-- block/bio.c | 15 +-------- block/blk-core.c | 32 ++++++++---------- block/blk-map.c | 10 ++++-- block/blk-merge.c | 75 +++++++++++++++--------------------------- block/blk-mq-sched.c | 26 ++++++++------- block/blk-mq-sched.h | 10 +++--- block/blk-mq.c | 23 ++++++------- block/blk.h | 23 ++++++------- block/kyber-iosched.c | 5 +-- block/mq-deadline.c | 5 +-- drivers/md/raid5.c | 1 - include/linux/bio.h | 1 - include/linux/blk-mq.h | 2 +- include/linux/blk_types.h | 6 ---- include/linux/blkdev.h | 1 - include/linux/elevator.h | 2 +- 18 files changed, 106 insertions(+), 137 deletions(-) (limited to 'include') diff --git a/Documentation/block/biodoc.txt b/Documentation/block/biodoc.txt index ac18b488cb5e..31c177663ed5 100644 --- a/Documentation/block/biodoc.txt +++ b/Documentation/block/biodoc.txt @@ -436,7 +436,6 @@ struct bio { struct bvec_iter bi_iter; /* current index into bio_vec array */ unsigned int bi_size; /* total size in bytes */ - unsigned short bi_phys_segments; /* segments after physaddr coalesce*/ unsigned short bi_hw_segments; /* segments after DMA remapping */ unsigned int bi_max; /* max bio_vecs we can hold used as index into pool */ diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c index f8d430f88d25..a6bf842cbe16 100644 --- a/block/bfq-iosched.c +++ b/block/bfq-iosched.c @@ -2027,7 +2027,8 @@ static void bfq_remove_request(struct request_queue *q, } -static bool bfq_bio_merge(struct blk_mq_hw_ctx *hctx, struct bio *bio) +static bool bfq_bio_merge(struct blk_mq_hw_ctx *hctx, struct bio *bio, + unsigned int nr_segs) { struct request_queue *q = hctx->queue; struct bfq_data *bfqd = q->elevator->elevator_data; @@ -2050,7 +2051,7 @@ static bool bfq_bio_merge(struct blk_mq_hw_ctx *hctx, struct bio *bio) bfqd->bio_bfqq = NULL; bfqd->bio_bic = bic; - ret = blk_mq_sched_try_merge(q, bio, &free); + ret = blk_mq_sched_try_merge(q, bio, nr_segs, &free); if (free) blk_mq_free_request(free); diff --git a/block/bio.c b/block/bio.c index 4bcdcd3f63f4..ad9c3aa9bf7d 100644 --- a/block/bio.c +++ b/block/bio.c @@ -558,14 +558,6 @@ void bio_put(struct bio *bio) } EXPORT_SYMBOL(bio_put); -int bio_phys_segments(struct request_queue *q, struct bio *bio) -{ - if (unlikely(!bio_flagged(bio, BIO_SEG_VALID))) - blk_recount_segments(q, bio); - - return bio->bi_phys_segments; -} - /** * __bio_clone_fast - clone a bio that shares the original bio's biovec * @bio: destination bio @@ -739,7 +731,7 @@ static int __bio_add_pc_page(struct request_queue *q, struct bio *bio, if (bio_full(bio)) return 0; - if (bio->bi_phys_segments >= queue_max_segments(q)) + if (bio->bi_vcnt >= queue_max_segments(q)) return 0; bvec = &bio->bi_io_vec[bio->bi_vcnt]; @@ -749,8 +741,6 @@ static int __bio_add_pc_page(struct request_queue *q, struct bio *bio, bio->bi_vcnt++; done: bio->bi_iter.bi_size += len; - bio->bi_phys_segments = bio->bi_vcnt; - bio_set_flag(bio, BIO_SEG_VALID); return len; } @@ -1909,10 +1899,7 @@ void bio_trim(struct bio *bio, int offset, int size) if (offset == 0 && size == bio->bi_iter.bi_size) return; - bio_clear_flag(bio, BIO_SEG_VALID); - bio_advance(bio, offset << 9); - bio->bi_iter.bi_size = size; if (bio_integrity(bio)) diff --git a/block/blk-core.c b/block/blk-core.c index d1c7c69a20dd..ef998a724b27 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -550,15 +550,15 @@ void blk_put_request(struct request *req) } EXPORT_SYMBOL(blk_put_request); -bool bio_attempt_back_merge(struct request_queue *q, struct request *req, - struct bio *bio) +bool bio_attempt_back_merge(struct request *req, struct bio *bio, + unsigned int nr_segs) { const int ff = bio->bi_opf & REQ_FAILFAST_MASK; - if (!ll_back_merge_fn(q, req, bio)) + if (!ll_back_merge_fn(req, bio, nr_segs)) return false; - trace_block_bio_backmerge(q, req, bio); + trace_block_bio_backmerge(req->q, req, bio); if ((req->cmd_flags & REQ_FAILFAST_MASK) != ff) blk_rq_set_mixed_merge(req); @@ -571,15 +571,15 @@ bool bio_attempt_back_merge(struct request_queue *q, struct request *req, return true; } -bool bio_attempt_front_merge(struct request_queue *q, struct request *req, - struct bio *bio) +bool bio_attempt_front_merge(struct request *req, struct bio *bio, + unsigned int nr_segs) { const int ff = bio->bi_opf & REQ_FAILFAST_MASK; - if (!ll_front_merge_fn(q, req, bio)) + if (!ll_front_merge_fn(req, bio, nr_segs)) return false; - trace_block_bio_frontmerge(q, req, bio); + trace_block_bio_frontmerge(req->q, req, bio); if ((req->cmd_flags & REQ_FAILFAST_MASK) != ff) blk_rq_set_mixed_merge(req); @@ -621,6 +621,7 @@ no_merge: * blk_attempt_plug_merge - try to merge with %current's plugged list * @q: request_queue new bio is being queued at * @bio: new bio being queued + * @nr_segs: number of segments in @bio * @same_queue_rq: pointer to &struct request that gets filled in when * another request associated with @q is found on the plug list * (optional, may be %NULL) @@ -639,7 +640,7 @@ no_merge: * Caller must ensure !blk_queue_nomerges(q) beforehand. */ bool blk_attempt_plug_merge(struct request_queue *q, struct bio *bio, - struct request **same_queue_rq) + unsigned int nr_segs, struct request **same_queue_rq) { struct blk_plug *plug; struct request *rq; @@ -668,10 +669,10 @@ bool blk_attempt_plug_merge(struct request_queue *q, struct bio *bio, switch (blk_try_merge(rq, bio)) { case ELEVATOR_BACK_MERGE: - merged = bio_attempt_back_merge(q, rq, bio); + merged = bio_attempt_back_merge(rq, bio, nr_segs); break; case ELEVATOR_FRONT_MERGE: - merged = bio_attempt_front_merge(q, rq, bio); + merged = bio_attempt_front_merge(rq, bio, nr_segs); break; case ELEVATOR_DISCARD_MERGE: merged = bio_attempt_discard_merge(q, rq, bio); @@ -1427,14 +1428,9 @@ bool blk_update_request(struct request *req, blk_status_t error, } EXPORT_SYMBOL_GPL(blk_update_request); -void blk_rq_bio_prep(struct request_queue *q, struct request *rq, - struct bio *bio) +void blk_rq_bio_prep(struct request *rq, struct bio *bio, unsigned int nr_segs) { - if (bio_has_data(bio)) - rq->nr_phys_segments = bio_phys_segments(q, bio); - else if (bio_op(bio) == REQ_OP_DISCARD) - rq->nr_phys_segments = 1; - + rq->nr_phys_segments = nr_segs; rq->__data_len = bio->bi_iter.bi_size; rq->bio = rq->biotail = bio; rq->ioprio = bio_prio(bio); diff --git a/block/blk-map.c b/block/blk-map.c index db9373bd31ac..3a62e471d81b 100644 --- a/block/blk-map.c +++ b/block/blk-map.c @@ -18,13 +18,19 @@ int blk_rq_append_bio(struct request *rq, struct bio **bio) { struct bio *orig_bio = *bio; + struct bvec_iter iter; + struct bio_vec bv; + unsigned int nr_segs = 0; blk_queue_bounce(rq->q, bio); + bio_for_each_bvec(bv, *bio, iter) + nr_segs++; + if (!rq->bio) { - blk_rq_bio_prep(rq->q, rq, *bio); + blk_rq_bio_prep(rq, *bio, nr_segs); } else { - if (!ll_back_merge_fn(rq->q, rq, *bio)) { + if (!ll_back_merge_fn(rq, *bio, nr_segs)) { if (orig_bio != *bio) { bio_put(*bio); *bio = orig_bio; diff --git a/block/blk-merge.c b/block/blk-merge.c index 17713d7d98d5..72b4fd89a22d 100644 --- a/block/blk-merge.c +++ b/block/blk-merge.c @@ -258,32 +258,29 @@ split: return do_split ? new : NULL; } -void blk_queue_split(struct request_queue *q, struct bio **bio) +void __blk_queue_split(struct request_queue *q, struct bio **bio, + unsigned int *nr_segs) { - struct bio *split, *res; - unsigned nsegs; + struct bio *split; switch (bio_op(*bio)) { case REQ_OP_DISCARD: case REQ_OP_SECURE_ERASE: - split = blk_bio_discard_split(q, *bio, &q->bio_split, &nsegs); + split = blk_bio_discard_split(q, *bio, &q->bio_split, nr_segs); break; case REQ_OP_WRITE_ZEROES: - split = blk_bio_write_zeroes_split(q, *bio, &q->bio_split, &nsegs); + split = blk_bio_write_zeroes_split(q, *bio, &q->bio_split, + nr_segs); break; case REQ_OP_WRITE_SAME: - split = blk_bio_write_same_split(q, *bio, &q->bio_split, &nsegs); + split = blk_bio_write_same_split(q, *bio, &q->bio_split, + nr_segs); break; default: - split = blk_bio_segment_split(q, *bio, &q->bio_split, &nsegs); + split = blk_bio_segment_split(q, *bio, &q->bio_split, nr_segs); break; } - /* physical segments can be figured out during splitting */ - res = split ? split : *bio; - res->bi_phys_segments = nsegs; - bio_set_flag(res, BIO_SEG_VALID); - if (split) { /* there isn't chance to merge the splitted bio */ split->bi_opf |= REQ_NOMERGE; @@ -304,6 +301,13 @@ void blk_queue_split(struct request_queue *q, struct bio **bio) *bio = split; } } + +void blk_queue_split(struct request_queue *q, struct bio **bio) +{ + unsigned int nr_segs; + + __blk_queue_split(q, bio, &nr_segs); +} EXPORT_SYMBOL(blk_queue_split); static unsigned int __blk_recalc_rq_segments(struct request_queue *q, @@ -338,17 +342,6 @@ void blk_recalc_rq_segments(struct request *rq) rq->nr_phys_segments = __blk_recalc_rq_segments(rq->q, rq->bio); } -void blk_recount_segments(struct request_queue *q, struct bio *bio) -{ - struct bio *nxt = bio->bi_next; - - bio->bi_next = NULL; - bio->bi_phys_segments = __blk_recalc_rq_segments(q, bio); - bio->bi_next = nxt; - - bio_set_flag(bio, BIO_SEG_VALID); -} - static inline struct scatterlist *blk_next_sg(struct scatterlist **sg, struct scatterlist *sglist) { @@ -519,16 +512,13 @@ int blk_rq_map_sg(struct request_queue *q, struct request *rq, } EXPORT_SYMBOL(blk_rq_map_sg); -static inline int ll_new_hw_segment(struct request_queue *q, - struct request *req, - struct bio *bio) +static inline int ll_new_hw_segment(struct request *req, struct bio *bio, + unsigned int nr_phys_segs) { - int nr_phys_segs = bio_phys_segments(q, bio); - - if (req->nr_phys_segments + nr_phys_segs > queue_max_segments(q)) + if (req->nr_phys_segments + nr_phys_segs > queue_max_segments(req->q)) goto no_merge; - if (blk_integrity_merge_bio(q, req, bio) == false) + if (blk_integrity_merge_bio(req->q, req, bio) == false) goto no_merge; /* @@ -539,12 +529,11 @@ static inline int ll_new_hw_segment(struct request_queue *q, return 1; no_merge: - req_set_nomerge(q, req); + req_set_nomerge(req->q, req); return 0; } -int ll_back_merge_fn(struct request_queue *q, struct request *req, - struct bio *bio) +int ll_back_merge_fn(struct request *req, struct bio *bio, unsigned int nr_segs) { if (req_gap_back_merge(req, bio)) return 0; @@ -553,21 +542,15 @@ int ll_back_merge_fn(struct request_queue *q, struct request *req, return 0; if (blk_rq_sectors(req) + bio_sectors(bio) > blk_rq_get_max_sectors(req, blk_rq_pos(req))) { - req_set_nomerge(q, req); + req_set_nomerge(req->q, req); return 0; } - if (!bio_flagged(req->biotail, BIO_SEG_VALID)) - blk_recount_segments(q, req->biotail); - if (!bio_flagged(bio, BIO_SEG_VALID)) - blk_recount_segments(q, bio); - return ll_new_hw_segment(q, req, bio); + return ll_new_hw_segment(req, bio, nr_segs); } -int ll_front_merge_fn(struct request_queue *q, struct request *req, - struct bio *bio) +int ll_front_merge_fn(struct request *req, struct bio *bio, unsigned int nr_segs) { - if (req_gap_front_merge(req, bio)) return 0; if (blk_integrity_rq(req) && @@ -575,15 +558,11 @@ int ll_front_merge_fn(struct request_queue *q, struct request *req, return 0; if (blk_rq_sectors(req) + bio_sectors(bio) > blk_rq_get_max_sectors(req, bio->bi_iter.bi_sector)) { - req_set_nomerge(q, req); + req_set_nomerge(req->q, req); return 0; } - if (!bio_flagged(bio, BIO_SEG_VALID)) - blk_recount_segments(q, bio); - if (!bio_flagged(req->bio, BIO_SEG_VALID)) - blk_recount_segments(q, req->bio); - return ll_new_hw_segment(q, req, bio); + return ll_new_hw_segment(req, bio, nr_segs); } static bool req_attempt_discard_merge(struct request_queue *q, struct request *req, diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c index 2766066a15db..956a7aa9a637 100644 --- a/block/blk-mq-sched.c +++ b/block/blk-mq-sched.c @@ -224,7 +224,7 @@ void blk_mq_sched_dispatch_requests(struct blk_mq_hw_ctx *hctx) } bool blk_mq_sched_try_merge(struct request_queue *q, struct bio *bio, - struct request **merged_request) + unsigned int nr_segs, struct request **merged_request) { struct request *rq; @@ -232,7 +232,7 @@ bool blk_mq_sched_try_merge(struct request_queue *q, struct bio *bio, case ELEVATOR_BACK_MERGE: if (!blk_mq_sched_allow_merge(q, rq, bio)) return false; - if (!bio_attempt_back_merge(q, rq, bio)) + if (!bio_attempt_back_merge(rq, bio, nr_segs)) return false; *merged_request = attempt_back_merge(q, rq); if (!*merged_request) @@ -241,7 +241,7 @@ bool blk_mq_sched_try_merge(struct request_queue *q, struct bio *bio, case ELEVATOR_FRONT_MERGE: if (!blk_mq_sched_allow_merge(q, rq, bio)) return false; - if (!bio_attempt_front_merge(q, rq, bio)) + if (!bio_attempt_front_merge(rq, bio, nr_segs)) return false; *merged_request = attempt_front_merge(q, rq); if (!*merged_request) @@ -260,7 +260,7 @@ EXPORT_SYMBOL_GPL(blk_mq_sched_try_merge); * of them. */ bool blk_mq_bio_list_merge(struct request_queue *q, struct list_head *list, - struct bio *bio) + struct bio *bio, unsigned int nr_segs) { struct request *rq; int checked = 8; @@ -277,11 +277,13 @@ bool blk_mq_bio_list_merge(struct request_queue *q, struct list_head *list, switch (blk_try_merge(rq, bio)) { case ELEVATOR_BACK_MERGE: if (blk_mq_sched_allow_merge(q, rq, bio)) - merged = bio_attempt_back_merge(q, rq, bio); + merged = bio_attempt_back_merge(rq, bio, + nr_segs); break; case ELEVATOR_FRONT_MERGE: if (blk_mq_sched_allow_merge(q, rq, bio)) - merged = bio_attempt_front_merge(q, rq, bio); + merged = bio_attempt_front_merge(rq, bio, + nr_segs); break; case ELEVATOR_DISCARD_MERGE: merged = bio_attempt_discard_merge(q, rq, bio); @@ -304,13 +306,14 @@ EXPORT_SYMBOL_GPL(blk_mq_bio_list_merge); */ static bool blk_mq_attempt_merge(struct request_queue *q, struct blk_mq_hw_ctx *hctx, - struct blk_mq_ctx *ctx, struct bio *bio) + struct blk_mq_ctx *ctx, struct bio *bio, + unsigned int nr_segs) { enum hctx_type type = hctx->type; lockdep_assert_held(&ctx->lock); - if (blk_mq_bio_list_merge(q, &ctx->rq_lists[type], bio)) { + if (blk_mq_bio_list_merge(q, &ctx->rq_lists[type], bio, nr_segs)) { ctx->rq_merged++; return true; } @@ -318,7 +321,8 @@ static bool blk_mq_attempt_merge(struct request_queue *q, return false; } -bool __blk_mq_sched_bio_merge(struct request_queue *q, struct bio *bio) +bool __blk_mq_sched_bio_merge(struct request_queue *q, struct bio *bio, + unsigned int nr_segs) { struct elevator_queue *e = q->elevator; struct blk_mq_ctx *ctx = blk_mq_get_ctx(q); @@ -328,7 +332,7 @@ bool __blk_mq_sched_bio_merge(struct request_queue *q, struct bio *bio) if (e && e->type->ops.bio_merge) { blk_mq_put_ctx(ctx); - return e->type->ops.bio_merge(hctx, bio); + return e->type->ops.bio_merge(hctx, bio, nr_segs); } type = hctx->type; @@ -336,7 +340,7 @@ bool __blk_mq_sched_bio_merge(struct request_queue *q, struct bio *bio) !list_empty_careful(&ctx->rq_lists[type])) { /* default per sw-queue merge */ spin_lock(&ctx->lock); - ret = blk_mq_attempt_merge(q, hctx, ctx, bio); + ret = blk_mq_attempt_merge(q, hctx, ctx, bio, nr_segs); spin_unlock(&ctx->lock); } diff --git a/block/blk-mq-sched.h b/block/blk-mq-sched.h index 3cf92cbbd8ac..cf22ab00fefb 100644 --- a/block/blk-mq-sched.h +++ b/block/blk-mq-sched.h @@ -12,8 +12,9 @@ void blk_mq_sched_assign_ioc(struct request *rq); void blk_mq_sched_request_inserted(struct request *rq); bool blk_mq_sched_try_merge(struct request_queue *q, struct bio *bio, - struct request **merged_request); -bool __blk_mq_sched_bio_merge(struct request_queue *q, struct bio *bio); + unsigned int nr_segs, struct request **merged_request); +bool __blk_mq_sched_bio_merge(struct request_queue *q, struct bio *bio, + unsigned int nr_segs); bool blk_mq_sched_try_insert_merge(struct request_queue *q, struct request *rq); void blk_mq_sched_mark_restart_hctx(struct blk_mq_hw_ctx *hctx); void blk_mq_sched_restart(struct blk_mq_hw_ctx *hctx); @@ -31,12 +32,13 @@ void blk_mq_exit_sched(struct request_queue *q, struct elevator_queue *e); void blk_mq_sched_free_requests(struct request_queue *q); static inline bool -blk_mq_sched_bio_merge(struct request_queue *q, struct bio *bio) +blk_mq_sched_bio_merge(struct request_queue *q, struct bio *bio, + unsigned int nr_segs) { if (blk_queue_nomerges(q) || !bio_mergeable(bio)) return false; - return __blk_mq_sched_bio_merge(q, bio); + return __blk_mq_sched_bio_merge(q, bio, nr_segs); } static inline bool diff --git a/block/blk-mq.c b/block/blk-mq.c index 61457bffa55f..d89383847d09 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -1764,14 +1764,15 @@ void blk_mq_flush_plug_list(struct blk_plug *plug, bool from_schedule) } } -static void blk_mq_bio_to_request(struct request *rq, struct bio *bio) +static void blk_mq_bio_to_request(struct request *rq, struct bio *bio, + unsigned int nr_segs) { if (bio->bi_opf & REQ_RAHEAD) rq->cmd_flags |= REQ_FAILFAST_MASK; rq->__sector = bio->bi_iter.bi_sector; rq->write_hint = bio->bi_write_hint; - blk_rq_bio_prep(rq->q, rq, bio); + blk_rq_bio_prep(rq, bio, nr_segs); blk_account_io_start(rq, true); } @@ -1941,20 +1942,20 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio) struct request *rq; struct blk_plug *plug; struct request *same_queue_rq = NULL; + unsigned int nr_segs; blk_qc_t cookie; blk_queue_bounce(q, &bio); - - blk_queue_split(q, &bio); + __blk_queue_split(q, &bio, &nr_segs); if (!bio_integrity_prep(bio)) return BLK_QC_T_NONE; if (!is_flush_fua && !blk_queue_nomerges(q) && - blk_attempt_plug_merge(q, bio, &same_queue_rq)) + blk_attempt_plug_merge(q, bio, nr_segs, &same_queue_rq)) return BLK_QC_T_NONE; - if (blk_mq_sched_bio_merge(q, bio)) + if (blk_mq_sched_bio_merge(q, bio, nr_segs)) return BLK_QC_T_NONE; rq_qos_throttle(q, bio); @@ -1977,7 +1978,7 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio) plug = current->plug; if (unlikely(is_flush_fua)) { blk_mq_put_ctx(data.ctx); - blk_mq_bio_to_request(rq, bio); + blk_mq_bio_to_request(rq, bio, nr_segs); /* bypass scheduler for flush rq */ blk_insert_flush(rq); @@ -1991,7 +1992,7 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio) struct request *last = NULL; blk_mq_put_ctx(data.ctx); - blk_mq_bio_to_request(rq, bio); + blk_mq_bio_to_request(rq, bio, nr_segs); if (!request_count) trace_block_plug(q); @@ -2006,7 +2007,7 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio) blk_add_rq_to_plug(plug, rq); } else if (plug && !blk_queue_nomerges(q)) { - blk_mq_bio_to_request(rq, bio); + blk_mq_bio_to_request(rq, bio, nr_segs); /* * We do limited plugging. If the bio can be merged, do that. @@ -2035,11 +2036,11 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio) } else if ((q->nr_hw_queues > 1 && is_sync) || (!q->elevator && !data.hctx->dispatch_busy)) { blk_mq_put_ctx(data.ctx); - blk_mq_bio_to_request(rq, bio); + blk_mq_bio_to_request(rq, bio, nr_segs); blk_mq_try_issue_directly(data.hctx, rq, &cookie); } else { blk_mq_put_ctx(data.ctx); - blk_mq_bio_to_request(rq, bio); + blk_mq_bio_to_request(rq, bio, nr_segs); blk_mq_sched_insert_request(rq, false, true, true); } diff --git a/block/blk.h b/block/blk.h index 7814aa207153..a1d33cb65842 100644 --- a/block/blk.h +++ b/block/blk.h @@ -51,8 +51,7 @@ struct blk_flush_queue *blk_alloc_flush_queue(struct request_queue *q, int node, int cmd_size, gfp_t flags); void blk_free_flush_queue(struct blk_flush_queue *q); -void blk_rq_bio_prep(struct request_queue *q, struct request *rq, - struct bio *bio); +void blk_rq_bio_prep(struct request *rq, struct bio *bio, unsigned int nr_segs); void blk_freeze_queue(struct request_queue *q); static inline void blk_queue_enter_live(struct request_queue *q) @@ -154,14 +153,14 @@ static inline bool bio_integrity_endio(struct bio *bio) unsigned long blk_rq_timeout(unsigned long timeout); void blk_add_timer(struct request *req); -bool bio_attempt_front_merge(struct request_queue *q, struct request *req, - struct bio *bio); -bool bio_attempt_back_merge(struct request_queue *q, struct request *req, - struct bio *bio); +bool bio_attempt_front_merge(struct request *req, struct bio *bio, + unsigned int nr_segs); +bool bio_attempt_back_merge(struct request *req, struct bio *bio, + unsigned int nr_segs); bool bio_attempt_discard_merge(struct request_queue *q, struct request *req, struct bio *bio); bool blk_attempt_plug_merge(struct request_queue *q, struct bio *bio, - struct request **same_queue_rq); + unsigned int nr_segs, struct request **same_queue_rq); void blk_account_io_start(struct request *req, bool new_io); void blk_account_io_completion(struct request *req, unsigned int bytes); @@ -202,10 +201,12 @@ static inline int blk_should_fake_timeout(struct request_queue *q) } #endif -int ll_back_merge_fn(struct request_queue *q, struct request *req, - struct bio *bio); -int ll_front_merge_fn(struct request_queue *q, struct request *req, - struct bio *bio); +void __blk_queue_split(struct request_queue *q, struct bio **bio, + unsigned int *nr_segs); +int ll_back_merge_fn(struct request *req, struct bio *bio, + unsigned int nr_segs); +int ll_front_merge_fn(struct request *req, struct bio *bio, + unsigned int nr_segs); struct request *attempt_back_merge(struct request_queue *q, struct request *rq); struct request *attempt_front_merge(struct request_queue *q, struct request *rq); int blk_attempt_req_merge(struct request_queue *q, struct request *rq, diff --git a/block/kyber-iosched.c b/block/kyber-iosched.c index c3b05119cebd..3c2602601741 100644 --- a/block/kyber-iosched.c +++ b/block/kyber-iosched.c @@ -562,7 +562,8 @@ static void kyber_limit_depth(unsigned int op, struct blk_mq_alloc_data *data) } } -static bool kyber_bio_merge(struct blk_mq_hw_ctx *hctx, struct bio *bio) +static bool kyber_bio_merge(struct blk_mq_hw_ctx *hctx, struct bio *bio, + unsigned int nr_segs) { struct kyber_hctx_data *khd = hctx->sched_data; struct blk_mq_ctx *ctx = blk_mq_get_ctx(hctx->queue); @@ -572,7 +573,7 @@ static bool kyber_bio_merge(struct blk_mq_hw_ctx *hctx, struct bio *bio) bool merged; spin_lock(&kcq->lock); - merged = blk_mq_bio_list_merge(hctx->queue, rq_list, bio); + merged = blk_mq_bio_list_merge(hctx->queue, rq_list, bio, nr_segs); spin_unlock(&kcq->lock); blk_mq_put_ctx(ctx); diff --git a/block/mq-deadline.c b/block/mq-deadline.c index 1876f5712bfd..b8a682b5a1bb 100644 --- a/block/mq-deadline.c +++ b/block/mq-deadline.c @@ -469,7 +469,8 @@ static int dd_request_merge(struct request_queue *q, struct request **rq, return ELEVATOR_NO_MERGE; } -static bool dd_bio_merge(struct blk_mq_hw_ctx *hctx, struct bio *bio) +static bool dd_bio_merge(struct blk_mq_hw_ctx *hctx, struct bio *bio, + unsigned int nr_segs) { struct request_queue *q = hctx->queue; struct deadline_data *dd = q->elevator->elevator_data; @@ -477,7 +478,7 @@ static bool dd_bio_merge(struct blk_mq_hw_ctx *hctx, struct bio *bio) bool ret; spin_lock(&dd->lock); - ret = blk_mq_sched_try_merge(q, bio, &free); + ret = blk_mq_sched_try_merge(q, bio, nr_segs, &free); spin_unlock(&dd->lock); if (free) diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index da94cbaa1a9e..3de4e13bde98 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -5251,7 +5251,6 @@ static int raid5_read_one_chunk(struct mddev *mddev, struct bio *raid_bio) rcu_read_unlock(); raid_bio->bi_next = (void*)rdev; bio_set_dev(align_bi, rdev->bdev); - bio_clear_flag(align_bi, BIO_SEG_VALID); if (is_badblock(rdev, align_bi->bi_iter.bi_sector, bio_sectors(align_bi), diff --git a/include/linux/bio.h b/include/linux/bio.h index 0f23b5682640..ee11c4324751 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -408,7 +408,6 @@ static inline void bio_wouldblock_error(struct bio *bio) } struct request_queue; -extern int bio_phys_segments(struct request_queue *, struct bio *); extern int submit_bio_wait(struct bio *bio); extern void bio_advance(struct bio *, unsigned); diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 15d1aa53d96c..3fa1fa59f9b2 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -306,7 +306,7 @@ void blk_mq_delay_kick_requeue_list(struct request_queue *q, unsigned long msecs bool blk_mq_complete_request(struct request *rq); void blk_mq_complete_request_sync(struct request *rq); bool blk_mq_bio_list_merge(struct request_queue *q, struct list_head *list, - struct bio *bio); + struct bio *bio, unsigned int nr_segs); bool blk_mq_queue_stopped(struct request_queue *q); void blk_mq_stop_hw_queue(struct blk_mq_hw_ctx *hctx); void blk_mq_start_hw_queue(struct blk_mq_hw_ctx *hctx); diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index 95202f80676c..6a53799c3fe2 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -154,11 +154,6 @@ struct bio { blk_status_t bi_status; u8 bi_partno; - /* Number of segments in this BIO after - * physical address coalescing is performed. - */ - unsigned int bi_phys_segments; - struct bvec_iter bi_iter; atomic_t __bi_remaining; @@ -210,7 +205,6 @@ struct bio { */ enum { BIO_NO_PAGE_REF, /* don't put release vec pages */ - BIO_SEG_VALID, /* bi_phys_segments valid */ BIO_CLONED, /* doesn't own data */ BIO_BOUNCED, /* bio is a bounce bio */ BIO_USER_MAPPED, /* contains user pages */ diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 2d4dfe82767a..d5d3bb45dfb6 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -841,7 +841,6 @@ extern blk_status_t blk_insert_cloned_request(struct request_queue *q, struct request *rq); extern int blk_rq_append_bio(struct request *rq, struct bio **bio); extern void blk_queue_split(struct request_queue *, struct bio **); -extern void blk_recount_segments(struct request_queue *, struct bio *); extern int scsi_verify_blk_ioctl(struct block_device *, unsigned int); extern int scsi_cmd_blk_ioctl(struct block_device *, fmode_t, unsigned int, void __user *); diff --git a/include/linux/elevator.h b/include/linux/elevator.h index 6e8bc53740f0..169bb2e02516 100644 --- a/include/linux/elevator.h +++ b/include/linux/elevator.h @@ -34,7 +34,7 @@ struct elevator_mq_ops { void (*depth_updated)(struct blk_mq_hw_ctx *); bool (*allow_merge)(struct request_queue *, struct request *, struct bio *); - bool (*bio_merge)(struct blk_mq_hw_ctx *, struct bio *); + bool (*bio_merge)(struct blk_mq_hw_ctx *, struct bio *, unsigned int); int (*request_merge)(struct request_queue *q, struct request **, struct bio *); void (*request_merged)(struct request_queue *, struct request *, enum elv_merge); void (*requests_merged)(struct request_queue *, struct request *, struct request *); -- cgit v1.2.3-58-ga151 From 239eeb085753d4356f731a773f363eb5bed4fe81 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 6 Jun 2019 12:26:19 +0200 Subject: blk-cgroup: factor out a helper to read rwstat counter Trying to break up the crazy statements to something readable. Also switch to an unsigned counter as it can't ever turn negative. Reviewed-by: Chaitanya Kulkarni Acked-by: Tejun Heo Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- block/blk-cgroup.c | 5 ++--- include/linux/blk-cgroup.h | 7 +++++++ 2 files changed, 9 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index 440797293235..0778e52b1db2 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -745,7 +745,7 @@ struct blkg_rwstat blkg_rwstat_recursive_sum(struct blkcg_gq *blkg, struct blkcg_gq *pos_blkg; struct cgroup_subsys_state *pos_css; struct blkg_rwstat sum = { }; - int i; + unsigned int i; lockdep_assert_held(&blkg->q->queue_lock); @@ -762,8 +762,7 @@ struct blkg_rwstat blkg_rwstat_recursive_sum(struct blkcg_gq *blkg, rwstat = (void *)pos_blkg + off; for (i = 0; i < BLKG_RWSTAT_NR; i++) - atomic64_add(atomic64_read(&rwstat->aux_cnt[i]) + - percpu_counter_sum_positive(&rwstat->cpu_cnt[i]), + atomic64_add(blkg_rwstat_read_counter(rwstat, i), &sum.aux_cnt[i]); } rcu_read_unlock(); diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h index 76c61318fda5..06236f56a840 100644 --- a/include/linux/blk-cgroup.h +++ b/include/linux/blk-cgroup.h @@ -198,6 +198,13 @@ int blkcg_activate_policy(struct request_queue *q, void blkcg_deactivate_policy(struct request_queue *q, const struct blkcg_policy *pol); +static inline u64 blkg_rwstat_read_counter(struct blkg_rwstat *rwstat, + unsigned int idx) +{ + return atomic64_read(&rwstat->aux_cnt[idx]) + + percpu_counter_sum_positive(&rwstat->cpu_cnt[idx]); +} + const char *blkg_dev_name(struct blkcg_gq *blkg); void blkcg_print_blkgs(struct seq_file *sf, struct blkcg *blkcg, u64 (*prfill)(struct seq_file *, -- cgit v1.2.3-58-ga151 From 5d0b6e48cbef3219c0ed75e0e746c4ed259303c2 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 6 Jun 2019 12:26:20 +0200 Subject: blk-cgroup: pass blkg_rwstat structures by reference Returning a structure generates rather bad code, so switch to passing by reference. Also don't require the structure to be zeroed and add to the 0-initialized counters, but actually set the counters to the calculated value. Acked-by: Tejun Heo Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- block/bfq-cgroup.c | 15 +++++++++------ block/blk-cgroup.c | 31 ++++++++++++++++--------------- include/linux/blk-cgroup.h | 14 +++++++------- 3 files changed, 32 insertions(+), 28 deletions(-) (limited to 'include') diff --git a/block/bfq-cgroup.c b/block/bfq-cgroup.c index b3796a40a61a..66abc82179f3 100644 --- a/block/bfq-cgroup.c +++ b/block/bfq-cgroup.c @@ -935,9 +935,9 @@ static u64 bfqg_prfill_stat_recursive(struct seq_file *sf, static u64 bfqg_prfill_rwstat_recursive(struct seq_file *sf, struct blkg_policy_data *pd, int off) { - struct blkg_rwstat sum = blkg_rwstat_recursive_sum(pd_to_blkg(pd), - &blkcg_policy_bfq, - off); + struct blkg_rwstat sum; + + blkg_rwstat_recursive_sum(pd_to_blkg(pd), &blkcg_policy_bfq, off, &sum); return __blkg_prfill_rwstat(sf, pd, &sum); } @@ -975,9 +975,12 @@ static int bfqg_print_stat_sectors(struct seq_file *sf, void *v) static u64 bfqg_prfill_sectors_recursive(struct seq_file *sf, struct blkg_policy_data *pd, int off) { - struct blkg_rwstat tmp = blkg_rwstat_recursive_sum(pd->blkg, NULL, - offsetof(struct blkcg_gq, stat_bytes)); - u64 sum = atomic64_read(&tmp.aux_cnt[BLKG_RWSTAT_READ]) + + struct blkg_rwstat tmp; + u64 sum; + + blkg_rwstat_recursive_sum(pd->blkg, NULL, + offsetof(struct blkcg_gq, stat_bytes), &tmp); + sum = atomic64_read(&tmp.aux_cnt[BLKG_RWSTAT_READ]) + atomic64_read(&tmp.aux_cnt[BLKG_RWSTAT_WRITE]); return __blkg_prfill_u64(sf, pd, sum >> 9); diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index 0778e52b1db2..db039a869d95 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -597,8 +597,9 @@ EXPORT_SYMBOL_GPL(blkg_prfill_stat); u64 blkg_prfill_rwstat(struct seq_file *sf, struct blkg_policy_data *pd, int off) { - struct blkg_rwstat rwstat = blkg_rwstat_read((void *)pd + off); + struct blkg_rwstat rwstat = { }; + blkg_rwstat_read((void *)pd + off, &rwstat); return __blkg_prfill_rwstat(sf, pd, &rwstat); } EXPORT_SYMBOL_GPL(blkg_prfill_rwstat); @@ -606,8 +607,9 @@ EXPORT_SYMBOL_GPL(blkg_prfill_rwstat); static u64 blkg_prfill_rwstat_field(struct seq_file *sf, struct blkg_policy_data *pd, int off) { - struct blkg_rwstat rwstat = blkg_rwstat_read((void *)pd->blkg + off); + struct blkg_rwstat rwstat = { }; + blkg_rwstat_read((void *)pd->blkg + off, &rwstat); return __blkg_prfill_rwstat(sf, pd, &rwstat); } @@ -649,8 +651,9 @@ static u64 blkg_prfill_rwstat_field_recursive(struct seq_file *sf, struct blkg_policy_data *pd, int off) { - struct blkg_rwstat rwstat = blkg_rwstat_recursive_sum(pd->blkg, - NULL, off); + struct blkg_rwstat rwstat; + + blkg_rwstat_recursive_sum(pd->blkg, NULL, off, &rwstat); return __blkg_prfill_rwstat(sf, pd, &rwstat); } @@ -731,6 +734,7 @@ EXPORT_SYMBOL_GPL(blkg_stat_recursive_sum); * @blkg: blkg of interest * @pol: blkcg_policy which contains the blkg_rwstat * @off: offset to the blkg_rwstat in blkg_policy_data or @blkg + * @sum: blkg_rwstat structure containing the results * * Collect the blkg_rwstat specified by @blkg, @pol and @off and all its * online descendants and their aux counts. The caller must be holding the @@ -739,12 +743,11 @@ EXPORT_SYMBOL_GPL(blkg_stat_recursive_sum); * If @pol is NULL, blkg_rwstat is at @off bytes into @blkg; otherwise, it * is at @off bytes into @blkg's blkg_policy_data of the policy. */ -struct blkg_rwstat blkg_rwstat_recursive_sum(struct blkcg_gq *blkg, - struct blkcg_policy *pol, int off) +void blkg_rwstat_recursive_sum(struct blkcg_gq *blkg, struct blkcg_policy *pol, + int off, struct blkg_rwstat *sum) { struct blkcg_gq *pos_blkg; struct cgroup_subsys_state *pos_css; - struct blkg_rwstat sum = { }; unsigned int i; lockdep_assert_held(&blkg->q->queue_lock); @@ -762,12 +765,10 @@ struct blkg_rwstat blkg_rwstat_recursive_sum(struct blkcg_gq *blkg, rwstat = (void *)pos_blkg + off; for (i = 0; i < BLKG_RWSTAT_NR; i++) - atomic64_add(blkg_rwstat_read_counter(rwstat, i), - &sum.aux_cnt[i]); + atomic64_set(&sum->aux_cnt[i], + blkg_rwstat_read_counter(rwstat, i)); } rcu_read_unlock(); - - return sum; } EXPORT_SYMBOL_GPL(blkg_rwstat_recursive_sum); @@ -953,14 +954,14 @@ static int blkcg_print_stat(struct seq_file *sf, void *v) spin_lock_irq(&blkg->q->queue_lock); - rwstat = blkg_rwstat_recursive_sum(blkg, NULL, - offsetof(struct blkcg_gq, stat_bytes)); + blkg_rwstat_recursive_sum(blkg, NULL, + offsetof(struct blkcg_gq, stat_bytes), &rwstat); rbytes = atomic64_read(&rwstat.aux_cnt[BLKG_RWSTAT_READ]); wbytes = atomic64_read(&rwstat.aux_cnt[BLKG_RWSTAT_WRITE]); dbytes = atomic64_read(&rwstat.aux_cnt[BLKG_RWSTAT_DISCARD]); - rwstat = blkg_rwstat_recursive_sum(blkg, NULL, - offsetof(struct blkcg_gq, stat_ios)); + blkg_rwstat_recursive_sum(blkg, NULL, + offsetof(struct blkcg_gq, stat_ios), &rwstat); rios = atomic64_read(&rwstat.aux_cnt[BLKG_RWSTAT_READ]); wios = atomic64_read(&rwstat.aux_cnt[BLKG_RWSTAT_WRITE]); dios = atomic64_read(&rwstat.aux_cnt[BLKG_RWSTAT_DISCARD]); diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h index 06236f56a840..3ee858111274 100644 --- a/include/linux/blk-cgroup.h +++ b/include/linux/blk-cgroup.h @@ -224,8 +224,8 @@ int blkg_print_stat_ios_recursive(struct seq_file *sf, void *v); u64 blkg_stat_recursive_sum(struct blkcg_gq *blkg, struct blkcg_policy *pol, int off); -struct blkg_rwstat blkg_rwstat_recursive_sum(struct blkcg_gq *blkg, - struct blkcg_policy *pol, int off); +void blkg_rwstat_recursive_sum(struct blkcg_gq *blkg, struct blkcg_policy *pol, + int off, struct blkg_rwstat *sum); struct blkg_conf_ctx { struct gendisk *disk; @@ -700,15 +700,14 @@ static inline void blkg_rwstat_add(struct blkg_rwstat *rwstat, * * Read the current snapshot of @rwstat and return it in the aux counts. */ -static inline struct blkg_rwstat blkg_rwstat_read(struct blkg_rwstat *rwstat) +static inline void blkg_rwstat_read(struct blkg_rwstat *rwstat, + struct blkg_rwstat *result) { - struct blkg_rwstat result; int i; for (i = 0; i < BLKG_RWSTAT_NR; i++) - atomic64_set(&result.aux_cnt[i], + atomic64_set(&result->aux_cnt[i], percpu_counter_sum_positive(&rwstat->cpu_cnt[i])); - return result; } /** @@ -721,8 +720,9 @@ static inline struct blkg_rwstat blkg_rwstat_read(struct blkg_rwstat *rwstat) */ static inline uint64_t blkg_rwstat_total(struct blkg_rwstat *rwstat) { - struct blkg_rwstat tmp = blkg_rwstat_read(rwstat); + struct blkg_rwstat tmp = { }; + blkg_rwstat_read(rwstat, &tmp); return atomic64_read(&tmp.aux_cnt[BLKG_RWSTAT_READ]) + atomic64_read(&tmp.aux_cnt[BLKG_RWSTAT_WRITE]); } -- cgit v1.2.3-58-ga151 From 7af6fd9112ba310a889c60d0606b4b74049cfe14 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 6 Jun 2019 12:26:21 +0200 Subject: blk-cgroup: introduce a new struct blkg_rwstat_sample When sampling the blkcg counts we don't need atomics or per-cpu variables. Introduce a new structure just containing plain u64 counters. Acked-by: Tejun Heo Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- block/bfq-cgroup.c | 10 ++++------ block/blk-cgroup.c | 39 +++++++++++++++++++-------------------- include/linux/blk-cgroup.h | 22 ++++++++++++---------- 3 files changed, 35 insertions(+), 36 deletions(-) (limited to 'include') diff --git a/block/bfq-cgroup.c b/block/bfq-cgroup.c index 66abc82179f3..624374a99c6e 100644 --- a/block/bfq-cgroup.c +++ b/block/bfq-cgroup.c @@ -935,7 +935,7 @@ static u64 bfqg_prfill_stat_recursive(struct seq_file *sf, static u64 bfqg_prfill_rwstat_recursive(struct seq_file *sf, struct blkg_policy_data *pd, int off) { - struct blkg_rwstat sum; + struct blkg_rwstat_sample sum; blkg_rwstat_recursive_sum(pd_to_blkg(pd), &blkcg_policy_bfq, off, &sum); return __blkg_prfill_rwstat(sf, pd, &sum); @@ -975,15 +975,13 @@ static int bfqg_print_stat_sectors(struct seq_file *sf, void *v) static u64 bfqg_prfill_sectors_recursive(struct seq_file *sf, struct blkg_policy_data *pd, int off) { - struct blkg_rwstat tmp; - u64 sum; + struct blkg_rwstat_sample tmp; blkg_rwstat_recursive_sum(pd->blkg, NULL, offsetof(struct blkcg_gq, stat_bytes), &tmp); - sum = atomic64_read(&tmp.aux_cnt[BLKG_RWSTAT_READ]) + - atomic64_read(&tmp.aux_cnt[BLKG_RWSTAT_WRITE]); - return __blkg_prfill_u64(sf, pd, sum >> 9); + return __blkg_prfill_u64(sf, pd, + (tmp.cnt[BLKG_RWSTAT_READ] + tmp.cnt[BLKG_RWSTAT_WRITE]) >> 9); } static int bfqg_print_stat_sectors_recursive(struct seq_file *sf, void *v) diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index db039a869d95..664c09866839 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -544,7 +544,7 @@ EXPORT_SYMBOL_GPL(__blkg_prfill_u64); * Print @rwstat to @sf for the device assocaited with @pd. */ u64 __blkg_prfill_rwstat(struct seq_file *sf, struct blkg_policy_data *pd, - const struct blkg_rwstat *rwstat) + const struct blkg_rwstat_sample *rwstat) { static const char *rwstr[] = { [BLKG_RWSTAT_READ] = "Read", @@ -562,12 +562,12 @@ u64 __blkg_prfill_rwstat(struct seq_file *sf, struct blkg_policy_data *pd, for (i = 0; i < BLKG_RWSTAT_NR; i++) seq_printf(sf, "%s %s %llu\n", dname, rwstr[i], - (unsigned long long)atomic64_read(&rwstat->aux_cnt[i])); + rwstat->cnt[i]); - v = atomic64_read(&rwstat->aux_cnt[BLKG_RWSTAT_READ]) + - atomic64_read(&rwstat->aux_cnt[BLKG_RWSTAT_WRITE]) + - atomic64_read(&rwstat->aux_cnt[BLKG_RWSTAT_DISCARD]); - seq_printf(sf, "%s Total %llu\n", dname, (unsigned long long)v); + v = rwstat->cnt[BLKG_RWSTAT_READ] + + rwstat->cnt[BLKG_RWSTAT_WRITE] + + rwstat->cnt[BLKG_RWSTAT_DISCARD]; + seq_printf(sf, "%s Total %llu\n", dname, v); return v; } EXPORT_SYMBOL_GPL(__blkg_prfill_rwstat); @@ -597,7 +597,7 @@ EXPORT_SYMBOL_GPL(blkg_prfill_stat); u64 blkg_prfill_rwstat(struct seq_file *sf, struct blkg_policy_data *pd, int off) { - struct blkg_rwstat rwstat = { }; + struct blkg_rwstat_sample rwstat = { }; blkg_rwstat_read((void *)pd + off, &rwstat); return __blkg_prfill_rwstat(sf, pd, &rwstat); @@ -607,7 +607,7 @@ EXPORT_SYMBOL_GPL(blkg_prfill_rwstat); static u64 blkg_prfill_rwstat_field(struct seq_file *sf, struct blkg_policy_data *pd, int off) { - struct blkg_rwstat rwstat = { }; + struct blkg_rwstat_sample rwstat = { }; blkg_rwstat_read((void *)pd->blkg + off, &rwstat); return __blkg_prfill_rwstat(sf, pd, &rwstat); @@ -651,7 +651,7 @@ static u64 blkg_prfill_rwstat_field_recursive(struct seq_file *sf, struct blkg_policy_data *pd, int off) { - struct blkg_rwstat rwstat; + struct blkg_rwstat_sample rwstat; blkg_rwstat_recursive_sum(pd->blkg, NULL, off, &rwstat); return __blkg_prfill_rwstat(sf, pd, &rwstat); @@ -734,7 +734,7 @@ EXPORT_SYMBOL_GPL(blkg_stat_recursive_sum); * @blkg: blkg of interest * @pol: blkcg_policy which contains the blkg_rwstat * @off: offset to the blkg_rwstat in blkg_policy_data or @blkg - * @sum: blkg_rwstat structure containing the results + * @sum: blkg_rwstat_sample structure containing the results * * Collect the blkg_rwstat specified by @blkg, @pol and @off and all its * online descendants and their aux counts. The caller must be holding the @@ -744,7 +744,7 @@ EXPORT_SYMBOL_GPL(blkg_stat_recursive_sum); * is at @off bytes into @blkg's blkg_policy_data of the policy. */ void blkg_rwstat_recursive_sum(struct blkcg_gq *blkg, struct blkcg_policy *pol, - int off, struct blkg_rwstat *sum) + int off, struct blkg_rwstat_sample *sum) { struct blkcg_gq *pos_blkg; struct cgroup_subsys_state *pos_css; @@ -765,8 +765,7 @@ void blkg_rwstat_recursive_sum(struct blkcg_gq *blkg, struct blkcg_policy *pol, rwstat = (void *)pos_blkg + off; for (i = 0; i < BLKG_RWSTAT_NR; i++) - atomic64_set(&sum->aux_cnt[i], - blkg_rwstat_read_counter(rwstat, i)); + sum->cnt[i] = blkg_rwstat_read_counter(rwstat, i); } rcu_read_unlock(); } @@ -934,7 +933,7 @@ static int blkcg_print_stat(struct seq_file *sf, void *v) hlist_for_each_entry_rcu(blkg, &blkcg->blkg_list, blkcg_node) { const char *dname; char *buf; - struct blkg_rwstat rwstat; + struct blkg_rwstat_sample rwstat; u64 rbytes, wbytes, rios, wios, dbytes, dios; size_t size = seq_get_buf(sf, &buf), off = 0; int i; @@ -956,15 +955,15 @@ static int blkcg_print_stat(struct seq_file *sf, void *v) blkg_rwstat_recursive_sum(blkg, NULL, offsetof(struct blkcg_gq, stat_bytes), &rwstat); - rbytes = atomic64_read(&rwstat.aux_cnt[BLKG_RWSTAT_READ]); - wbytes = atomic64_read(&rwstat.aux_cnt[BLKG_RWSTAT_WRITE]); - dbytes = atomic64_read(&rwstat.aux_cnt[BLKG_RWSTAT_DISCARD]); + rbytes = rwstat.cnt[BLKG_RWSTAT_READ]; + wbytes = rwstat.cnt[BLKG_RWSTAT_WRITE]; + dbytes = rwstat.cnt[BLKG_RWSTAT_DISCARD]; blkg_rwstat_recursive_sum(blkg, NULL, offsetof(struct blkcg_gq, stat_ios), &rwstat); - rios = atomic64_read(&rwstat.aux_cnt[BLKG_RWSTAT_READ]); - wios = atomic64_read(&rwstat.aux_cnt[BLKG_RWSTAT_WRITE]); - dios = atomic64_read(&rwstat.aux_cnt[BLKG_RWSTAT_DISCARD]); + rios = rwstat.cnt[BLKG_RWSTAT_READ]; + wios = rwstat.cnt[BLKG_RWSTAT_WRITE]; + dios = rwstat.cnt[BLKG_RWSTAT_DISCARD]; spin_unlock_irq(&blkg->q->queue_lock); diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h index 3ee858111274..e4a81767e111 100644 --- a/include/linux/blk-cgroup.h +++ b/include/linux/blk-cgroup.h @@ -63,8 +63,7 @@ struct blkcg { /* * blkg_[rw]stat->aux_cnt is excluded for local stats but included for - * recursive. Used to carry stats of dead children, and, for blkg_rwstat, - * to carry result values from read and sum operations. + * recursive. Used to carry stats of dead children. */ struct blkg_stat { struct percpu_counter cpu_cnt; @@ -76,6 +75,10 @@ struct blkg_rwstat { atomic64_t aux_cnt[BLKG_RWSTAT_NR]; }; +struct blkg_rwstat_sample { + u64 cnt[BLKG_RWSTAT_NR]; +}; + /* * A blkcg_gq (blkg) is association between a block cgroup (blkcg) and a * request_queue (q). This is used by blkcg policies which need to track @@ -213,7 +216,7 @@ void blkcg_print_blkgs(struct seq_file *sf, struct blkcg *blkcg, bool show_total); u64 __blkg_prfill_u64(struct seq_file *sf, struct blkg_policy_data *pd, u64 v); u64 __blkg_prfill_rwstat(struct seq_file *sf, struct blkg_policy_data *pd, - const struct blkg_rwstat *rwstat); + const struct blkg_rwstat_sample *rwstat); u64 blkg_prfill_stat(struct seq_file *sf, struct blkg_policy_data *pd, int off); u64 blkg_prfill_rwstat(struct seq_file *sf, struct blkg_policy_data *pd, int off); @@ -225,7 +228,7 @@ int blkg_print_stat_ios_recursive(struct seq_file *sf, void *v); u64 blkg_stat_recursive_sum(struct blkcg_gq *blkg, struct blkcg_policy *pol, int off); void blkg_rwstat_recursive_sum(struct blkcg_gq *blkg, struct blkcg_policy *pol, - int off, struct blkg_rwstat *sum); + int off, struct blkg_rwstat_sample *sum); struct blkg_conf_ctx { struct gendisk *disk; @@ -701,13 +704,13 @@ static inline void blkg_rwstat_add(struct blkg_rwstat *rwstat, * Read the current snapshot of @rwstat and return it in the aux counts. */ static inline void blkg_rwstat_read(struct blkg_rwstat *rwstat, - struct blkg_rwstat *result) + struct blkg_rwstat_sample *result) { int i; for (i = 0; i < BLKG_RWSTAT_NR; i++) - atomic64_set(&result->aux_cnt[i], - percpu_counter_sum_positive(&rwstat->cpu_cnt[i])); + result->cnt[i] = + percpu_counter_sum_positive(&rwstat->cpu_cnt[i]); } /** @@ -720,11 +723,10 @@ static inline void blkg_rwstat_read(struct blkg_rwstat *rwstat, */ static inline uint64_t blkg_rwstat_total(struct blkg_rwstat *rwstat) { - struct blkg_rwstat tmp = { }; + struct blkg_rwstat_sample tmp = { }; blkg_rwstat_read(rwstat, &tmp); - return atomic64_read(&tmp.aux_cnt[BLKG_RWSTAT_READ]) + - atomic64_read(&tmp.aux_cnt[BLKG_RWSTAT_WRITE]); + return tmp.cnt[BLKG_RWSTAT_READ] + tmp.cnt[BLKG_RWSTAT_WRITE]; } /** -- cgit v1.2.3-58-ga151 From c0ce79dca5b0e8373a546ebea2af7b3df94c584e Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 6 Jun 2019 12:26:22 +0200 Subject: blk-cgroup: move struct blkg_stat to bfq This structure and assorted infrastructure is only used by the bfq I/O scheduler. Move it there instead of bloating the common code. Acked-by: Tejun Heo Acked-by: Paolo Valente Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- block/bfq-cgroup.c | 192 ++++++++++++++++++++++++++++++++++++--------- block/bfq-iosched.h | 19 +++-- block/blk-cgroup.c | 56 ------------- include/linux/blk-cgroup.h | 71 ----------------- 4 files changed, 167 insertions(+), 171 deletions(-) (limited to 'include') diff --git a/block/bfq-cgroup.c b/block/bfq-cgroup.c index 624374a99c6e..a691dca7e966 100644 --- a/block/bfq-cgroup.c +++ b/block/bfq-cgroup.c @@ -17,6 +17,124 @@ #if defined(CONFIG_BFQ_GROUP_IOSCHED) && defined(CONFIG_DEBUG_BLK_CGROUP) +static int bfq_stat_init(struct bfq_stat *stat, gfp_t gfp) +{ + int ret; + + ret = percpu_counter_init(&stat->cpu_cnt, 0, gfp); + if (ret) + return ret; + + atomic64_set(&stat->aux_cnt, 0); + return 0; +} + +static void bfq_stat_exit(struct bfq_stat *stat) +{ + percpu_counter_destroy(&stat->cpu_cnt); +} + +/** + * bfq_stat_add - add a value to a bfq_stat + * @stat: target bfq_stat + * @val: value to add + * + * Add @val to @stat. The caller must ensure that IRQ on the same CPU + * don't re-enter this function for the same counter. + */ +static inline void bfq_stat_add(struct bfq_stat *stat, uint64_t val) +{ + percpu_counter_add_batch(&stat->cpu_cnt, val, BLKG_STAT_CPU_BATCH); +} + +/** + * bfq_stat_read - read the current value of a bfq_stat + * @stat: bfq_stat to read + */ +static inline uint64_t bfq_stat_read(struct bfq_stat *stat) +{ + return percpu_counter_sum_positive(&stat->cpu_cnt); +} + +/** + * bfq_stat_reset - reset a bfq_stat + * @stat: bfq_stat to reset + */ +static inline void bfq_stat_reset(struct bfq_stat *stat) +{ + percpu_counter_set(&stat->cpu_cnt, 0); + atomic64_set(&stat->aux_cnt, 0); +} + +/** + * bfq_stat_add_aux - add a bfq_stat into another's aux count + * @to: the destination bfq_stat + * @from: the source + * + * Add @from's count including the aux one to @to's aux count. + */ +static inline void bfq_stat_add_aux(struct bfq_stat *to, + struct bfq_stat *from) +{ + atomic64_add(bfq_stat_read(from) + atomic64_read(&from->aux_cnt), + &to->aux_cnt); +} + +/** + * bfq_stat_recursive_sum - collect hierarchical bfq_stat + * @blkg: blkg of interest + * @pol: blkcg_policy which contains the bfq_stat + * @off: offset to the bfq_stat in blkg_policy_data or @blkg + * + * Collect the bfq_stat specified by @blkg, @pol and @off and all its + * online descendants and their aux counts. The caller must be holding the + * queue lock for online tests. + * + * If @pol is NULL, bfq_stat is at @off bytes into @blkg; otherwise, it is + * at @off bytes into @blkg's blkg_policy_data of the policy. + */ +static u64 bfq_stat_recursive_sum(struct blkcg_gq *blkg, + struct blkcg_policy *pol, int off) +{ + struct blkcg_gq *pos_blkg; + struct cgroup_subsys_state *pos_css; + u64 sum = 0; + + lockdep_assert_held(&blkg->q->queue_lock); + + rcu_read_lock(); + blkg_for_each_descendant_pre(pos_blkg, pos_css, blkg) { + struct bfq_stat *stat; + + if (!pos_blkg->online) + continue; + + if (pol) + stat = (void *)blkg_to_pd(pos_blkg, pol) + off; + else + stat = (void *)blkg + off; + + sum += bfq_stat_read(stat) + atomic64_read(&stat->aux_cnt); + } + rcu_read_unlock(); + + return sum; +} + +/** + * blkg_prfill_stat - prfill callback for bfq_stat + * @sf: seq_file to print to + * @pd: policy private data of interest + * @off: offset to the bfq_stat in @pd + * + * prfill callback for printing a bfq_stat. + */ +static u64 blkg_prfill_stat(struct seq_file *sf, struct blkg_policy_data *pd, + int off) +{ + return __blkg_prfill_u64(sf, pd, bfq_stat_read((void *)pd + off)); +} + /* bfqg stats flags */ enum bfqg_stats_flags { BFQG_stats_waiting = 0, @@ -53,7 +171,7 @@ static void bfqg_stats_update_group_wait_time(struct bfqg_stats *stats) now = ktime_get_ns(); if (now > stats->start_group_wait_time) - blkg_stat_add(&stats->group_wait_time, + bfq_stat_add(&stats->group_wait_time, now - stats->start_group_wait_time); bfqg_stats_clear_waiting(stats); } @@ -82,14 +200,14 @@ static void bfqg_stats_end_empty_time(struct bfqg_stats *stats) now = ktime_get_ns(); if (now > stats->start_empty_time) - blkg_stat_add(&stats->empty_time, + bfq_stat_add(&stats->empty_time, now - stats->start_empty_time); bfqg_stats_clear_empty(stats); } void bfqg_stats_update_dequeue(struct bfq_group *bfqg) { - blkg_stat_add(&bfqg->stats.dequeue, 1); + bfq_stat_add(&bfqg->stats.dequeue, 1); } void bfqg_stats_set_start_empty_time(struct bfq_group *bfqg) @@ -119,7 +237,7 @@ void bfqg_stats_update_idle_time(struct bfq_group *bfqg) u64 now = ktime_get_ns(); if (now > stats->start_idle_time) - blkg_stat_add(&stats->idle_time, + bfq_stat_add(&stats->idle_time, now - stats->start_idle_time); bfqg_stats_clear_idling(stats); } @@ -137,9 +255,9 @@ void bfqg_stats_update_avg_queue_size(struct bfq_group *bfqg) { struct bfqg_stats *stats = &bfqg->stats; - blkg_stat_add(&stats->avg_queue_size_sum, + bfq_stat_add(&stats->avg_queue_size_sum, blkg_rwstat_total(&stats->queued)); - blkg_stat_add(&stats->avg_queue_size_samples, 1); + bfq_stat_add(&stats->avg_queue_size_samples, 1); bfqg_stats_update_group_wait_time(stats); } @@ -279,13 +397,13 @@ static void bfqg_stats_reset(struct bfqg_stats *stats) blkg_rwstat_reset(&stats->merged); blkg_rwstat_reset(&stats->service_time); blkg_rwstat_reset(&stats->wait_time); - blkg_stat_reset(&stats->time); - blkg_stat_reset(&stats->avg_queue_size_sum); - blkg_stat_reset(&stats->avg_queue_size_samples); - blkg_stat_reset(&stats->dequeue); - blkg_stat_reset(&stats->group_wait_time); - blkg_stat_reset(&stats->idle_time); - blkg_stat_reset(&stats->empty_time); + bfq_stat_reset(&stats->time); + bfq_stat_reset(&stats->avg_queue_size_sum); + bfq_stat_reset(&stats->avg_queue_size_samples); + bfq_stat_reset(&stats->dequeue); + bfq_stat_reset(&stats->group_wait_time); + bfq_stat_reset(&stats->idle_time); + bfq_stat_reset(&stats->empty_time); #endif } @@ -300,14 +418,14 @@ static void bfqg_stats_add_aux(struct bfqg_stats *to, struct bfqg_stats *from) blkg_rwstat_add_aux(&to->merged, &from->merged); blkg_rwstat_add_aux(&to->service_time, &from->service_time); blkg_rwstat_add_aux(&to->wait_time, &from->wait_time); - blkg_stat_add_aux(&from->time, &from->time); - blkg_stat_add_aux(&to->avg_queue_size_sum, &from->avg_queue_size_sum); - blkg_stat_add_aux(&to->avg_queue_size_samples, + bfq_stat_add_aux(&from->time, &from->time); + bfq_stat_add_aux(&to->avg_queue_size_sum, &from->avg_queue_size_sum); + bfq_stat_add_aux(&to->avg_queue_size_samples, &from->avg_queue_size_samples); - blkg_stat_add_aux(&to->dequeue, &from->dequeue); - blkg_stat_add_aux(&to->group_wait_time, &from->group_wait_time); - blkg_stat_add_aux(&to->idle_time, &from->idle_time); - blkg_stat_add_aux(&to->empty_time, &from->empty_time); + bfq_stat_add_aux(&to->dequeue, &from->dequeue); + bfq_stat_add_aux(&to->group_wait_time, &from->group_wait_time); + bfq_stat_add_aux(&to->idle_time, &from->idle_time); + bfq_stat_add_aux(&to->empty_time, &from->empty_time); #endif } @@ -360,13 +478,13 @@ static void bfqg_stats_exit(struct bfqg_stats *stats) blkg_rwstat_exit(&stats->service_time); blkg_rwstat_exit(&stats->wait_time); blkg_rwstat_exit(&stats->queued); - blkg_stat_exit(&stats->time); - blkg_stat_exit(&stats->avg_queue_size_sum); - blkg_stat_exit(&stats->avg_queue_size_samples); - blkg_stat_exit(&stats->dequeue); - blkg_stat_exit(&stats->group_wait_time); - blkg_stat_exit(&stats->idle_time); - blkg_stat_exit(&stats->empty_time); + bfq_stat_exit(&stats->time); + bfq_stat_exit(&stats->avg_queue_size_sum); + bfq_stat_exit(&stats->avg_queue_size_samples); + bfq_stat_exit(&stats->dequeue); + bfq_stat_exit(&stats->group_wait_time); + bfq_stat_exit(&stats->idle_time); + bfq_stat_exit(&stats->empty_time); #endif } @@ -377,13 +495,13 @@ static int bfqg_stats_init(struct bfqg_stats *stats, gfp_t gfp) blkg_rwstat_init(&stats->service_time, gfp) || blkg_rwstat_init(&stats->wait_time, gfp) || blkg_rwstat_init(&stats->queued, gfp) || - blkg_stat_init(&stats->time, gfp) || - blkg_stat_init(&stats->avg_queue_size_sum, gfp) || - blkg_stat_init(&stats->avg_queue_size_samples, gfp) || - blkg_stat_init(&stats->dequeue, gfp) || - blkg_stat_init(&stats->group_wait_time, gfp) || - blkg_stat_init(&stats->idle_time, gfp) || - blkg_stat_init(&stats->empty_time, gfp)) { + bfq_stat_init(&stats->time, gfp) || + bfq_stat_init(&stats->avg_queue_size_sum, gfp) || + bfq_stat_init(&stats->avg_queue_size_samples, gfp) || + bfq_stat_init(&stats->dequeue, gfp) || + bfq_stat_init(&stats->group_wait_time, gfp) || + bfq_stat_init(&stats->idle_time, gfp) || + bfq_stat_init(&stats->empty_time, gfp)) { bfqg_stats_exit(stats); return -ENOMEM; } @@ -927,7 +1045,7 @@ static int bfqg_print_rwstat(struct seq_file *sf, void *v) static u64 bfqg_prfill_stat_recursive(struct seq_file *sf, struct blkg_policy_data *pd, int off) { - u64 sum = blkg_stat_recursive_sum(pd_to_blkg(pd), + u64 sum = bfq_stat_recursive_sum(pd_to_blkg(pd), &blkcg_policy_bfq, off); return __blkg_prfill_u64(sf, pd, sum); } @@ -996,11 +1114,11 @@ static u64 bfqg_prfill_avg_queue_size(struct seq_file *sf, struct blkg_policy_data *pd, int off) { struct bfq_group *bfqg = pd_to_bfqg(pd); - u64 samples = blkg_stat_read(&bfqg->stats.avg_queue_size_samples); + u64 samples = bfq_stat_read(&bfqg->stats.avg_queue_size_samples); u64 v = 0; if (samples) { - v = blkg_stat_read(&bfqg->stats.avg_queue_size_sum); + v = bfq_stat_read(&bfqg->stats.avg_queue_size_sum); v = div64_u64(v, samples); } __blkg_prfill_u64(sf, pd, v); diff --git a/block/bfq-iosched.h b/block/bfq-iosched.h index c2faa77824f8..aef4fa0046b8 100644 --- a/block/bfq-iosched.h +++ b/block/bfq-iosched.h @@ -777,6 +777,11 @@ enum bfqq_expiration { BFQQE_PREEMPTED /* preemption in progress */ }; +struct bfq_stat { + struct percpu_counter cpu_cnt; + atomic64_t aux_cnt; +}; + struct bfqg_stats { #if defined(CONFIG_BFQ_GROUP_IOSCHED) && defined(CONFIG_DEBUG_BLK_CGROUP) /* number of ios merged */ @@ -788,19 +793,19 @@ struct bfqg_stats { /* number of IOs queued up */ struct blkg_rwstat queued; /* total disk time and nr sectors dispatched by this group */ - struct blkg_stat time; + struct bfq_stat time; /* sum of number of ios queued across all samples */ - struct blkg_stat avg_queue_size_sum; + struct bfq_stat avg_queue_size_sum; /* count of samples taken for average */ - struct blkg_stat avg_queue_size_samples; + struct bfq_stat avg_queue_size_samples; /* how many times this group has been removed from service tree */ - struct blkg_stat dequeue; + struct bfq_stat dequeue; /* total time spent waiting for it to be assigned a timeslice. */ - struct blkg_stat group_wait_time; + struct bfq_stat group_wait_time; /* time spent idling for this blkcg_gq */ - struct blkg_stat idle_time; + struct bfq_stat idle_time; /* total time with empty current active q with other requests queued */ - struct blkg_stat empty_time; + struct bfq_stat empty_time; /* fields after this shouldn't be cleared on stat reset */ u64 start_group_wait_time; u64 start_idle_time; diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index 664c09866839..53b7bd4c7000 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -572,20 +572,6 @@ u64 __blkg_prfill_rwstat(struct seq_file *sf, struct blkg_policy_data *pd, } EXPORT_SYMBOL_GPL(__blkg_prfill_rwstat); -/** - * blkg_prfill_stat - prfill callback for blkg_stat - * @sf: seq_file to print to - * @pd: policy private data of interest - * @off: offset to the blkg_stat in @pd - * - * prfill callback for printing a blkg_stat. - */ -u64 blkg_prfill_stat(struct seq_file *sf, struct blkg_policy_data *pd, int off) -{ - return __blkg_prfill_u64(sf, pd, blkg_stat_read((void *)pd + off)); -} -EXPORT_SYMBOL_GPL(blkg_prfill_stat); - /** * blkg_prfill_rwstat - prfill callback for blkg_rwstat * @sf: seq_file to print to @@ -687,48 +673,6 @@ int blkg_print_stat_ios_recursive(struct seq_file *sf, void *v) } EXPORT_SYMBOL_GPL(blkg_print_stat_ios_recursive); -/** - * blkg_stat_recursive_sum - collect hierarchical blkg_stat - * @blkg: blkg of interest - * @pol: blkcg_policy which contains the blkg_stat - * @off: offset to the blkg_stat in blkg_policy_data or @blkg - * - * Collect the blkg_stat specified by @blkg, @pol and @off and all its - * online descendants and their aux counts. The caller must be holding the - * queue lock for online tests. - * - * If @pol is NULL, blkg_stat is at @off bytes into @blkg; otherwise, it is - * at @off bytes into @blkg's blkg_policy_data of the policy. - */ -u64 blkg_stat_recursive_sum(struct blkcg_gq *blkg, - struct blkcg_policy *pol, int off) -{ - struct blkcg_gq *pos_blkg; - struct cgroup_subsys_state *pos_css; - u64 sum = 0; - - lockdep_assert_held(&blkg->q->queue_lock); - - rcu_read_lock(); - blkg_for_each_descendant_pre(pos_blkg, pos_css, blkg) { - struct blkg_stat *stat; - - if (!pos_blkg->online) - continue; - - if (pol) - stat = (void *)blkg_to_pd(pos_blkg, pol) + off; - else - stat = (void *)blkg + off; - - sum += blkg_stat_read(stat) + atomic64_read(&stat->aux_cnt); - } - rcu_read_unlock(); - - return sum; -} -EXPORT_SYMBOL_GPL(blkg_stat_recursive_sum); - /** * blkg_rwstat_recursive_sum - collect hierarchical blkg_rwstat * @blkg: blkg of interest diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h index e4a81767e111..33f23a858438 100644 --- a/include/linux/blk-cgroup.h +++ b/include/linux/blk-cgroup.h @@ -65,11 +65,6 @@ struct blkcg { * blkg_[rw]stat->aux_cnt is excluded for local stats but included for * recursive. Used to carry stats of dead children. */ -struct blkg_stat { - struct percpu_counter cpu_cnt; - atomic64_t aux_cnt; -}; - struct blkg_rwstat { struct percpu_counter cpu_cnt[BLKG_RWSTAT_NR]; atomic64_t aux_cnt[BLKG_RWSTAT_NR]; @@ -217,7 +212,6 @@ void blkcg_print_blkgs(struct seq_file *sf, struct blkcg *blkcg, u64 __blkg_prfill_u64(struct seq_file *sf, struct blkg_policy_data *pd, u64 v); u64 __blkg_prfill_rwstat(struct seq_file *sf, struct blkg_policy_data *pd, const struct blkg_rwstat_sample *rwstat); -u64 blkg_prfill_stat(struct seq_file *sf, struct blkg_policy_data *pd, int off); u64 blkg_prfill_rwstat(struct seq_file *sf, struct blkg_policy_data *pd, int off); int blkg_print_stat_bytes(struct seq_file *sf, void *v); @@ -225,8 +219,6 @@ int blkg_print_stat_ios(struct seq_file *sf, void *v); int blkg_print_stat_bytes_recursive(struct seq_file *sf, void *v); int blkg_print_stat_ios_recursive(struct seq_file *sf, void *v); -u64 blkg_stat_recursive_sum(struct blkcg_gq *blkg, - struct blkcg_policy *pol, int off); void blkg_rwstat_recursive_sum(struct blkcg_gq *blkg, struct blkcg_policy *pol, int off, struct blkg_rwstat_sample *sum); @@ -579,69 +571,6 @@ static inline void blkg_put(struct blkcg_gq *blkg) if (((d_blkg) = __blkg_lookup(css_to_blkcg(pos_css), \ (p_blkg)->q, false))) -static inline int blkg_stat_init(struct blkg_stat *stat, gfp_t gfp) -{ - int ret; - - ret = percpu_counter_init(&stat->cpu_cnt, 0, gfp); - if (ret) - return ret; - - atomic64_set(&stat->aux_cnt, 0); - return 0; -} - -static inline void blkg_stat_exit(struct blkg_stat *stat) -{ - percpu_counter_destroy(&stat->cpu_cnt); -} - -/** - * blkg_stat_add - add a value to a blkg_stat - * @stat: target blkg_stat - * @val: value to add - * - * Add @val to @stat. The caller must ensure that IRQ on the same CPU - * don't re-enter this function for the same counter. - */ -static inline void blkg_stat_add(struct blkg_stat *stat, uint64_t val) -{ - percpu_counter_add_batch(&stat->cpu_cnt, val, BLKG_STAT_CPU_BATCH); -} - -/** - * blkg_stat_read - read the current value of a blkg_stat - * @stat: blkg_stat to read - */ -static inline uint64_t blkg_stat_read(struct blkg_stat *stat) -{ - return percpu_counter_sum_positive(&stat->cpu_cnt); -} - -/** - * blkg_stat_reset - reset a blkg_stat - * @stat: blkg_stat to reset - */ -static inline void blkg_stat_reset(struct blkg_stat *stat) -{ - percpu_counter_set(&stat->cpu_cnt, 0); - atomic64_set(&stat->aux_cnt, 0); -} - -/** - * blkg_stat_add_aux - add a blkg_stat into another's aux count - * @to: the destination blkg_stat - * @from: the source - * - * Add @from's count including the aux one to @to's aux count. - */ -static inline void blkg_stat_add_aux(struct blkg_stat *to, - struct blkg_stat *from) -{ - atomic64_add(blkg_stat_read(from) + atomic64_read(&from->aux_cnt), - &to->aux_cnt); -} - static inline int blkg_rwstat_init(struct blkg_rwstat *rwstat, gfp_t gfp) { int i, ret; -- cgit v1.2.3-58-ga151 From e47bc4eda953928644109101d07c9c95dc29a458 Mon Sep 17 00:00:00 2001 From: Chaitanya Kulkarni Date: Thu, 20 Jun 2019 10:59:16 -0700 Subject: block: add centralize REQ_OP_XXX to string helper In order to centralize the REQ_OP_XXX to string conversion which can be used in the block layer and different places in the kernel like f2fs, this patch adds a new helper function along with an array similar to the one present in the blk-mq-debugfs.c. We keep this helper functionality centralize under blk-core.c instead of blk-mq-debugfs.c since blk-core.c is configured using CONFIG_BLOCK and it will not be dependent on blk-mq-debugfs.c which is configured using CONFIG_BLK_DEBUG_FS. Next patch adjusts the code in the blk-mq-debugfs.c with newly introduced helper. Reviewed-by: Bart Van Assche Signed-off-by: Chaitanya Kulkarni Signed-off-by: Jens Axboe --- block/blk-core.c | 36 ++++++++++++++++++++++++++++++++++++ include/linux/blkdev.h | 3 +++ 2 files changed, 39 insertions(+) (limited to 'include') diff --git a/block/blk-core.c b/block/blk-core.c index c97da29ddc07..129204dd3bae 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -120,6 +120,42 @@ void blk_rq_init(struct request_queue *q, struct request *rq) } EXPORT_SYMBOL(blk_rq_init); +#define REQ_OP_NAME(name) [REQ_OP_##name] = #name +static const char *const blk_op_name[] = { + REQ_OP_NAME(READ), + REQ_OP_NAME(WRITE), + REQ_OP_NAME(FLUSH), + REQ_OP_NAME(DISCARD), + REQ_OP_NAME(SECURE_ERASE), + REQ_OP_NAME(ZONE_RESET), + REQ_OP_NAME(WRITE_SAME), + REQ_OP_NAME(WRITE_ZEROES), + REQ_OP_NAME(SCSI_IN), + REQ_OP_NAME(SCSI_OUT), + REQ_OP_NAME(DRV_IN), + REQ_OP_NAME(DRV_OUT), +}; +#undef REQ_OP_NAME + +/** + * blk_op_str - Return string XXX in the REQ_OP_XXX. + * @op: REQ_OP_XXX. + * + * Description: Centralize block layer function to convert REQ_OP_XXX into + * string format. Useful in the debugging and tracing bio or request. For + * invalid REQ_OP_XXX it returns string "UNKNOWN". + */ +inline const char *blk_op_str(unsigned int op) +{ + const char *op_str = "UNKNOWN"; + + if (op < ARRAY_SIZE(blk_op_name) && blk_op_name[op]) + op_str = blk_op_name[op]; + + return op_str; +} +EXPORT_SYMBOL_GPL(blk_op_str); + static const struct { int errno; const char *name; diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index d5d3bb45dfb6..0c482371c8b3 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -865,6 +865,9 @@ extern void blk_execute_rq(struct request_queue *, struct gendisk *, extern void blk_execute_rq_nowait(struct request_queue *, struct gendisk *, struct request *, int, rq_end_io_fn *); +/* Helper to convert REQ_OP_XXX to its string format XXX */ +extern const char *blk_op_str(unsigned int op); + int blk_status_to_errno(blk_status_t status); blk_status_t errno_to_blk_status(int errno); -- cgit v1.2.3-58-ga151 From 0ce353794b6c4dc88592b942e94b33cd1bf2ef54 Mon Sep 17 00:00:00 2001 From: Chaitanya Kulkarni Date: Thu, 20 Jun 2019 10:59:19 -0700 Subject: f2fs: use block layer helper for show_bio_op macro Adjust the f2fs tracing code to use newly introduced block layer function blk_op_str() which converts the REQ_OP_XXX into the string XXX. Reviewed-by: Chao Yu Signed-off-by: Chaitanya Kulkarni Signed-off-by: Jens Axboe --- include/trace/events/f2fs.h | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) (limited to 'include') diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h index 53b96f12300c..e3dc031af7f5 100644 --- a/include/trace/events/f2fs.h +++ b/include/trace/events/f2fs.h @@ -76,16 +76,7 @@ TRACE_DEFINE_ENUM(CP_TRIMMED); #define show_bio_type(op,op_flags) show_bio_op(op), \ show_bio_op_flags(op_flags) -#define show_bio_op(op) \ - __print_symbolic(op, \ - { REQ_OP_READ, "READ" }, \ - { REQ_OP_WRITE, "WRITE" }, \ - { REQ_OP_FLUSH, "FLUSH" }, \ - { REQ_OP_DISCARD, "DISCARD" }, \ - { REQ_OP_SECURE_ERASE, "SECURE_ERASE" }, \ - { REQ_OP_ZONE_RESET, "ZONE_RESET" }, \ - { REQ_OP_WRITE_SAME, "WRITE_SAME" }, \ - { REQ_OP_WRITE_ZEROES, "WRITE_ZEROES" }) +#define show_bio_op(op) blk_op_str(op) #define show_bio_op_flags(flags) \ __print_flags(F2FS_BIO_FLAG_MASK(flags), "|", \ -- cgit v1.2.3-58-ga151 From 150d71f725fd2f5a0015b7fa8df0816a207d4e4b Mon Sep 17 00:00:00 2001 From: James Smart Date: Tue, 14 May 2019 14:58:03 -0700 Subject: nvmet-fc: add transport discovery change event callback support This patch adds support for the nvmet discovery_change transport op. In turn, the transport adds it's own LLDD api callback discovery_event op to request the LLDD to generate an RSCN for the discovery change. Signed-off-by: James Smart Reviewed-by: Hannes Reinecke Reviewed-by: Arun Easi Signed-off-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- drivers/nvme/target/fc.c | 11 +++++++++++ include/linux/nvme-fc-driver.h | 6 ++++++ 2 files changed, 17 insertions(+) (limited to 'include') diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c index 508661af0f50..1f252c9a953a 100644 --- a/drivers/nvme/target/fc.c +++ b/drivers/nvme/target/fc.c @@ -2549,6 +2549,16 @@ nvmet_fc_remove_port(struct nvmet_port *port) kfree(pe); } +static void +nvmet_fc_discovery_chg(struct nvmet_port *port) +{ + struct nvmet_fc_port_entry *pe = port->priv; + struct nvmet_fc_tgtport *tgtport = pe->tgtport; + + if (tgtport && tgtport->ops->discovery_event) + tgtport->ops->discovery_event(&tgtport->fc_target_port); +} + static const struct nvmet_fabrics_ops nvmet_fc_tgt_fcp_ops = { .owner = THIS_MODULE, .type = NVMF_TRTYPE_FC, @@ -2557,6 +2567,7 @@ static const struct nvmet_fabrics_ops nvmet_fc_tgt_fcp_ops = { .remove_port = nvmet_fc_remove_port, .queue_response = nvmet_fc_fcp_nvme_cmd_done, .delete_ctrl = nvmet_fc_delete_ctrl, + .discovery_chg = nvmet_fc_discovery_chg, }; static int __init nvmet_fc_init_module(void) diff --git a/include/linux/nvme-fc-driver.h b/include/linux/nvme-fc-driver.h index c48e96436f56..98d904961b33 100644 --- a/include/linux/nvme-fc-driver.h +++ b/include/linux/nvme-fc-driver.h @@ -791,6 +791,11 @@ struct nvmet_fc_target_port { * nvmefc_tgt_fcp_req. * Entrypoint is Optional. * + * @discovery_event: Called by the transport to generate an RSCN + * change notifications to NVME initiators. The RSCN notifications + * should cause the initiator to rescan the discovery controller + * on the targetport. + * * @max_hw_queues: indicates the maximum number of hw queues the LLDD * supports for cpu affinitization. * Value is Mandatory. Must be at least 1. @@ -832,6 +837,7 @@ struct nvmet_fc_target_template { struct nvmefc_tgt_fcp_req *fcpreq); void (*defer_rcv)(struct nvmet_fc_target_port *tgtport, struct nvmefc_tgt_fcp_req *fcpreq); + void (*discovery_event)(struct nvmet_fc_target_port *tgtport); u32 max_hw_queues; u16 max_sgl_segments; -- cgit v1.2.3-58-ga151 From 7a1f46e3f75cff5042dfa1bb80c9929a0e412abc Mon Sep 17 00:00:00 2001 From: Minwoo Im Date: Thu, 6 Jun 2019 14:30:14 +0900 Subject: nvme: introduce nvme_is_fabrics to check fabrics cmd This patch introduces a nvme_is_fabrics() inline function to check whether or not the given command structure is for fabrics. Signed-off-by: Minwoo Im Reviewed-by: Sagi Grimberg Signed-off-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- drivers/nvme/host/fabrics.c | 2 +- drivers/nvme/target/core.c | 2 +- drivers/nvme/target/fabrics-cmd.c | 2 +- drivers/nvme/target/fc.c | 2 +- include/linux/nvme.h | 7 ++++++- 5 files changed, 10 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c index 5838f7cd53ac..1994d5b42f94 100644 --- a/drivers/nvme/host/fabrics.c +++ b/drivers/nvme/host/fabrics.c @@ -578,7 +578,7 @@ bool __nvmf_check_ready(struct nvme_ctrl *ctrl, struct request *rq, switch (ctrl->state) { case NVME_CTRL_NEW: case NVME_CTRL_CONNECTING: - if (req->cmd->common.opcode == nvme_fabrics_command && + if (nvme_is_fabrics(req->cmd) && req->cmd->fabrics.fctype == nvme_fabrics_type_connect) return true; break; diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c index 43e8c4adc1f4..0587707b1a25 100644 --- a/drivers/nvme/target/core.c +++ b/drivers/nvme/target/core.c @@ -873,7 +873,7 @@ bool nvmet_req_init(struct nvmet_req *req, struct nvmet_cq *cq, status = nvmet_parse_connect_cmd(req); else if (likely(req->sq->qid != 0)) status = nvmet_parse_io_cmd(req); - else if (req->cmd->common.opcode == nvme_fabrics_command) + else if (nvme_is_fabrics(req->cmd)) status = nvmet_parse_fabrics_cmd(req); else if (req->sq->ctrl->subsys->type == NVME_NQN_DISC) status = nvmet_parse_discovery_cmd(req); diff --git a/drivers/nvme/target/fabrics-cmd.c b/drivers/nvme/target/fabrics-cmd.c index 3b9f79aba98f..d16b55ffe79f 100644 --- a/drivers/nvme/target/fabrics-cmd.c +++ b/drivers/nvme/target/fabrics-cmd.c @@ -268,7 +268,7 @@ u16 nvmet_parse_connect_cmd(struct nvmet_req *req) { struct nvme_command *cmd = req->cmd; - if (cmd->common.opcode != nvme_fabrics_command) { + if (!nvme_is_fabrics(cmd)) { pr_err("invalid command 0x%x on unconnected queue.\n", cmd->fabrics.opcode); req->error_loc = offsetof(struct nvme_common_command, opcode); diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c index 1f252c9a953a..ce8d819f86cc 100644 --- a/drivers/nvme/target/fc.c +++ b/drivers/nvme/target/fc.c @@ -1806,7 +1806,7 @@ nvmet_fc_prep_fcp_rsp(struct nvmet_fc_tgtport *tgtport, */ rspcnt = atomic_inc_return(&fod->queue->zrspcnt); if (!(rspcnt % fod->queue->ersp_ratio) || - sqe->opcode == nvme_fabrics_command || + nvme_is_fabrics((struct nvme_command *) sqe) || xfr_length != fod->req.transfer_len || (le16_to_cpu(cqe->status) & 0xFFFE) || cqewd[0] || cqewd[1] || (sqe->flags & (NVME_CMD_FUSE_FIRST | NVME_CMD_FUSE_SECOND)) || diff --git a/include/linux/nvme.h b/include/linux/nvme.h index 8028adacaff3..7080923e78d1 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -1165,6 +1165,11 @@ struct nvme_command { }; }; +static inline bool nvme_is_fabrics(struct nvme_command *cmd) +{ + return cmd->common.opcode == nvme_fabrics_command; +} + struct nvme_error_slot { __le64 error_count; __le16 sqid; @@ -1186,7 +1191,7 @@ static inline bool nvme_is_write(struct nvme_command *cmd) * * Why can't we simply have a Fabrics In and Fabrics out command? */ - if (unlikely(cmd->common.opcode == nvme_fabrics_command)) + if (unlikely(nvme_is_fabrics(cmd))) return cmd->fabrics.fctype & 1; return cmd->common.opcode & 1; } -- cgit v1.2.3-58-ga151 From 26f2990d85838caa650744a0ded9e38988a2bd7f Mon Sep 17 00:00:00 2001 From: Minwoo Im Date: Wed, 12 Jun 2019 21:45:30 +0900 Subject: nvme-trace: move opcode symbol print to nvme.h The following patches are going to provide the target-side trace which might need these kind of macros. It would be great if it can be shared between host and target side both. Signed-off-by: Minwoo Im Signed-off-by: Christoph Hellwig --- drivers/nvme/host/trace.h | 44 -------------------------------------------- include/linux/nvme.h | 45 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 45 insertions(+), 44 deletions(-) (limited to 'include') diff --git a/drivers/nvme/host/trace.h b/drivers/nvme/host/trace.h index e71502d141ed..62ee29107c32 100644 --- a/drivers/nvme/host/trace.h +++ b/drivers/nvme/host/trace.h @@ -16,50 +16,6 @@ #include "nvme.h" -#define nvme_admin_opcode_name(opcode) { opcode, #opcode } -#define show_admin_opcode_name(val) \ - __print_symbolic(val, \ - nvme_admin_opcode_name(nvme_admin_delete_sq), \ - nvme_admin_opcode_name(nvme_admin_create_sq), \ - nvme_admin_opcode_name(nvme_admin_get_log_page), \ - nvme_admin_opcode_name(nvme_admin_delete_cq), \ - nvme_admin_opcode_name(nvme_admin_create_cq), \ - nvme_admin_opcode_name(nvme_admin_identify), \ - nvme_admin_opcode_name(nvme_admin_abort_cmd), \ - nvme_admin_opcode_name(nvme_admin_set_features), \ - nvme_admin_opcode_name(nvme_admin_get_features), \ - nvme_admin_opcode_name(nvme_admin_async_event), \ - nvme_admin_opcode_name(nvme_admin_ns_mgmt), \ - nvme_admin_opcode_name(nvme_admin_activate_fw), \ - nvme_admin_opcode_name(nvme_admin_download_fw), \ - nvme_admin_opcode_name(nvme_admin_ns_attach), \ - nvme_admin_opcode_name(nvme_admin_keep_alive), \ - nvme_admin_opcode_name(nvme_admin_directive_send), \ - nvme_admin_opcode_name(nvme_admin_directive_recv), \ - nvme_admin_opcode_name(nvme_admin_dbbuf), \ - nvme_admin_opcode_name(nvme_admin_format_nvm), \ - nvme_admin_opcode_name(nvme_admin_security_send), \ - nvme_admin_opcode_name(nvme_admin_security_recv), \ - nvme_admin_opcode_name(nvme_admin_sanitize_nvm)) - -#define nvme_opcode_name(opcode) { opcode, #opcode } -#define show_nvm_opcode_name(val) \ - __print_symbolic(val, \ - nvme_opcode_name(nvme_cmd_flush), \ - nvme_opcode_name(nvme_cmd_write), \ - nvme_opcode_name(nvme_cmd_read), \ - nvme_opcode_name(nvme_cmd_write_uncor), \ - nvme_opcode_name(nvme_cmd_compare), \ - nvme_opcode_name(nvme_cmd_write_zeroes), \ - nvme_opcode_name(nvme_cmd_dsm), \ - nvme_opcode_name(nvme_cmd_resv_register), \ - nvme_opcode_name(nvme_cmd_resv_report), \ - nvme_opcode_name(nvme_cmd_resv_acquire), \ - nvme_opcode_name(nvme_cmd_resv_release)) - -#define show_opcode_name(qid, opcode) \ - (qid ? show_nvm_opcode_name(opcode) : show_admin_opcode_name(opcode)) - const char *nvme_trace_parse_admin_cmd(struct trace_seq *p, u8 opcode, u8 *cdw10); const char *nvme_trace_parse_nvm_cmd(struct trace_seq *p, u8 opcode, diff --git a/include/linux/nvme.h b/include/linux/nvme.h index 7080923e78d1..86b3d04baf20 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -562,6 +562,22 @@ enum nvme_opcode { nvme_cmd_resv_release = 0x15, }; +#define nvme_opcode_name(opcode) { opcode, #opcode } +#define show_nvm_opcode_name(val) \ + __print_symbolic(val, \ + nvme_opcode_name(nvme_cmd_flush), \ + nvme_opcode_name(nvme_cmd_write), \ + nvme_opcode_name(nvme_cmd_read), \ + nvme_opcode_name(nvme_cmd_write_uncor), \ + nvme_opcode_name(nvme_cmd_compare), \ + nvme_opcode_name(nvme_cmd_write_zeroes), \ + nvme_opcode_name(nvme_cmd_dsm), \ + nvme_opcode_name(nvme_cmd_resv_register), \ + nvme_opcode_name(nvme_cmd_resv_report), \ + nvme_opcode_name(nvme_cmd_resv_acquire), \ + nvme_opcode_name(nvme_cmd_resv_release)) + + /* * Descriptor subtype - lower 4 bits of nvme_(keyed_)sgl_desc identifier * @@ -794,6 +810,35 @@ enum nvme_admin_opcode { nvme_admin_sanitize_nvm = 0x84, }; +#define nvme_admin_opcode_name(opcode) { opcode, #opcode } +#define show_admin_opcode_name(val) \ + __print_symbolic(val, \ + nvme_admin_opcode_name(nvme_admin_delete_sq), \ + nvme_admin_opcode_name(nvme_admin_create_sq), \ + nvme_admin_opcode_name(nvme_admin_get_log_page), \ + nvme_admin_opcode_name(nvme_admin_delete_cq), \ + nvme_admin_opcode_name(nvme_admin_create_cq), \ + nvme_admin_opcode_name(nvme_admin_identify), \ + nvme_admin_opcode_name(nvme_admin_abort_cmd), \ + nvme_admin_opcode_name(nvme_admin_set_features), \ + nvme_admin_opcode_name(nvme_admin_get_features), \ + nvme_admin_opcode_name(nvme_admin_async_event), \ + nvme_admin_opcode_name(nvme_admin_ns_mgmt), \ + nvme_admin_opcode_name(nvme_admin_activate_fw), \ + nvme_admin_opcode_name(nvme_admin_download_fw), \ + nvme_admin_opcode_name(nvme_admin_ns_attach), \ + nvme_admin_opcode_name(nvme_admin_keep_alive), \ + nvme_admin_opcode_name(nvme_admin_directive_send), \ + nvme_admin_opcode_name(nvme_admin_directive_recv), \ + nvme_admin_opcode_name(nvme_admin_dbbuf), \ + nvme_admin_opcode_name(nvme_admin_format_nvm), \ + nvme_admin_opcode_name(nvme_admin_security_send), \ + nvme_admin_opcode_name(nvme_admin_security_recv), \ + nvme_admin_opcode_name(nvme_admin_sanitize_nvm)) + +#define show_opcode_name(qid, opcode) \ + (qid ? show_nvm_opcode_name(opcode) : show_admin_opcode_name(opcode)) + enum { NVME_QUEUE_PHYS_CONTIG = (1 << 0), NVME_CQ_IRQ_ENABLED = (1 << 1), -- cgit v1.2.3-58-ga151 From ad795e47cdef078bfd9e48745040d12104005aab Mon Sep 17 00:00:00 2001 From: Minwoo Im Date: Wed, 12 Jun 2019 21:45:31 +0900 Subject: nvme-trace: support for fabrics commands in host-side This patch introduces fabrics commands tracing feature from host-side. This patch does not include any changes for the previous host-side tracing, but just add fabrics commands parsing in cmd=() format. Signed-off-by: Minwoo Im [hch: fixed some whitespace damage] Signed-off-by: Christoph Hellwig --- drivers/nvme/host/trace.c | 63 +++++++++++++++++++++++++++++++++++++++++++++++ drivers/nvme/host/trace.h | 20 ++++++++++----- include/linux/nvme.h | 20 ++++++++++++--- 3 files changed, 94 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/drivers/nvme/host/trace.c b/drivers/nvme/host/trace.c index 14b0d2993cbe..f01ad0fd60bb 100644 --- a/drivers/nvme/host/trace.c +++ b/drivers/nvme/host/trace.c @@ -135,6 +135,69 @@ const char *nvme_trace_parse_nvm_cmd(struct trace_seq *p, } } +static const char *nvme_trace_fabrics_property_set(struct trace_seq *p, u8 *spc) +{ + const char *ret = trace_seq_buffer_ptr(p); + u8 attrib = spc[0]; + u32 ofst = get_unaligned_le32(spc + 4); + u64 value = get_unaligned_le64(spc + 8); + + trace_seq_printf(p, "attrib=%u, ofst=0x%x, value=0x%llx", + attrib, ofst, value); + trace_seq_putc(p, 0); + return ret; +} + +static const char *nvme_trace_fabrics_connect(struct trace_seq *p, u8 *spc) +{ + const char *ret = trace_seq_buffer_ptr(p); + u16 recfmt = get_unaligned_le16(spc); + u16 qid = get_unaligned_le16(spc + 2); + u16 sqsize = get_unaligned_le16(spc + 4); + u8 cattr = spc[6]; + u32 kato = get_unaligned_le32(spc + 8); + + trace_seq_printf(p, "recfmt=%u, qid=%u, sqsize=%u, cattr=%u, kato=%u", + recfmt, qid, sqsize, cattr, kato); + trace_seq_putc(p, 0); + return ret; +} + +static const char *nvme_trace_fabrics_property_get(struct trace_seq *p, u8 *spc) +{ + const char *ret = trace_seq_buffer_ptr(p); + u8 attrib = spc[0]; + u32 ofst = get_unaligned_le32(spc + 4); + + trace_seq_printf(p, "attrib=%u, ofst=0x%x", attrib, ofst); + trace_seq_putc(p, 0); + return ret; +} + +static const char *nvme_trace_fabrics_common(struct trace_seq *p, u8 *spc) +{ + const char *ret = trace_seq_buffer_ptr(p); + + trace_seq_printf(p, "spcecific=%*ph", 24, spc); + trace_seq_putc(p, 0); + return ret; +} + +const char *nvme_trace_parse_fabrics_cmd(struct trace_seq *p, + u8 fctype, u8 *spc) +{ + switch (fctype) { + case nvme_fabrics_type_property_set: + return nvme_trace_fabrics_property_set(p, spc); + case nvme_fabrics_type_connect: + return nvme_trace_fabrics_connect(p, spc); + case nvme_fabrics_type_property_get: + return nvme_trace_fabrics_property_get(p, spc); + default: + return nvme_trace_fabrics_common(p, spc); + } +} + const char *nvme_trace_disk_name(struct trace_seq *p, char *name) { const char *ret = trace_seq_buffer_ptr(p); diff --git a/drivers/nvme/host/trace.h b/drivers/nvme/host/trace.h index 62ee29107c32..19a18c87fb7b 100644 --- a/drivers/nvme/host/trace.h +++ b/drivers/nvme/host/trace.h @@ -20,11 +20,15 @@ const char *nvme_trace_parse_admin_cmd(struct trace_seq *p, u8 opcode, u8 *cdw10); const char *nvme_trace_parse_nvm_cmd(struct trace_seq *p, u8 opcode, u8 *cdw10); +const char *nvme_trace_parse_fabrics_cmd(struct trace_seq *p, u8 fctype, + u8 *spc); -#define parse_nvme_cmd(qid, opcode, cdw10) \ - (qid ? \ - nvme_trace_parse_nvm_cmd(p, opcode, cdw10) : \ - nvme_trace_parse_admin_cmd(p, opcode, cdw10)) +#define parse_nvme_cmd(qid, opcode, fctype, cdw10) \ + ((opcode) == nvme_fabrics_command ? \ + nvme_trace_parse_fabrics_cmd(p, fctype, cdw10) : \ + ((qid) ? \ + nvme_trace_parse_nvm_cmd(p, opcode, cdw10) : \ + nvme_trace_parse_admin_cmd(p, opcode, cdw10))) const char *nvme_trace_disk_name(struct trace_seq *p, char *name); #define __print_disk_name(name) \ @@ -49,6 +53,7 @@ TRACE_EVENT(nvme_setup_cmd, __field(int, qid) __field(u8, opcode) __field(u8, flags) + __field(u8, fctype) __field(u16, cid) __field(u32, nsid) __field(u64, metadata) @@ -62,6 +67,7 @@ TRACE_EVENT(nvme_setup_cmd, __entry->cid = cmd->common.command_id; __entry->nsid = le32_to_cpu(cmd->common.nsid); __entry->metadata = le64_to_cpu(cmd->common.metadata); + __entry->fctype = cmd->fabrics.fctype; __assign_disk_name(__entry->disk, req->rq_disk); memcpy(__entry->cdw10, &cmd->common.cdw10, sizeof(__entry->cdw10)); @@ -70,8 +76,10 @@ TRACE_EVENT(nvme_setup_cmd, __entry->ctrl_id, __print_disk_name(__entry->disk), __entry->qid, __entry->cid, __entry->nsid, __entry->flags, __entry->metadata, - show_opcode_name(__entry->qid, __entry->opcode), - parse_nvme_cmd(__entry->qid, __entry->opcode, __entry->cdw10)) + show_opcode_name(__entry->qid, __entry->opcode, + __entry->fctype), + parse_nvme_cmd(__entry->qid, __entry->opcode, + __entry->fctype, __entry->cdw10)) ); TRACE_EVENT(nvme_complete_rq, diff --git a/include/linux/nvme.h b/include/linux/nvme.h index 86b3d04baf20..d98b2d8baf4e 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -836,9 +836,6 @@ enum nvme_admin_opcode { nvme_admin_opcode_name(nvme_admin_security_recv), \ nvme_admin_opcode_name(nvme_admin_sanitize_nvm)) -#define show_opcode_name(qid, opcode) \ - (qid ? show_nvm_opcode_name(opcode) : show_admin_opcode_name(opcode)) - enum { NVME_QUEUE_PHYS_CONTIG = (1 << 0), NVME_CQ_IRQ_ENABLED = (1 << 1), @@ -1053,6 +1050,23 @@ enum nvmf_capsule_command { nvme_fabrics_type_property_get = 0x04, }; +#define nvme_fabrics_type_name(type) { type, #type } +#define show_fabrics_type_name(type) \ + __print_symbolic(type, \ + nvme_fabrics_type_name(nvme_fabrics_type_property_set), \ + nvme_fabrics_type_name(nvme_fabrics_type_connect), \ + nvme_fabrics_type_name(nvme_fabrics_type_property_get)) + +/* + * If not fabrics command, fctype will be ignored. + */ +#define show_opcode_name(qid, opcode, fctype) \ + ((opcode) == nvme_fabrics_command ? \ + show_fabrics_type_name(fctype) : \ + ((qid) ? \ + show_nvm_opcode_name(opcode) : \ + show_admin_opcode_name(opcode))) + struct nvmf_common_command { __u8 opcode; __u8 resv1; -- cgit v1.2.3-58-ga151 From a5b47a40bed8b19e956872fb55097d676a68f59e Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Thu, 27 Jun 2019 11:59:41 +0900 Subject: block: Remove unused code bio_flush_dcache_pages() is unused. Remove it. Reviewed-by: Christoph Hellwig Signed-off-by: Damien Le Moal Signed-off-by: Jens Axboe --- block/bio.c | 12 ------------ include/linux/bio.h | 11 ----------- 2 files changed, 23 deletions(-) (limited to 'include') diff --git a/block/bio.c b/block/bio.c index ad9c3aa9bf7d..bb55b94bb361 100644 --- a/block/bio.c +++ b/block/bio.c @@ -1760,18 +1760,6 @@ void generic_end_io_acct(struct request_queue *q, int req_op, } EXPORT_SYMBOL(generic_end_io_acct); -#if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE -void bio_flush_dcache_pages(struct bio *bi) -{ - struct bio_vec bvec; - struct bvec_iter iter; - - bio_for_each_segment(bvec, bi, iter) - flush_dcache_page(bvec.bv_page); -} -EXPORT_SYMBOL(bio_flush_dcache_pages); -#endif - static inline bool bio_remaining_done(struct bio *bio) { /* diff --git a/include/linux/bio.h b/include/linux/bio.h index ee11c4324751..5a8ae56e09ff 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -443,17 +443,6 @@ void generic_end_io_acct(struct request_queue *q, int op, struct hd_struct *part, unsigned long start_time); -#ifndef ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE -# error "You should define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE for your platform" -#endif -#if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE -extern void bio_flush_dcache_pages(struct bio *bi); -#else -static inline void bio_flush_dcache_pages(struct bio *bi) -{ -} -#endif - extern void bio_copy_data_iter(struct bio *dst, struct bvec_iter *dst_iter, struct bio *src, struct bvec_iter *src_iter); extern void bio_copy_data(struct bio *dst, struct bio *src); -- cgit v1.2.3-58-ga151 From 5e4c7cf60ec3cad59703c203de1dfb31ea608e6e Mon Sep 17 00:00:00 2001 From: Revanth Rajashekar Date: Thu, 27 Jun 2019 16:30:02 -0600 Subject: block: sed-opal: PSID reverttper capability PSID is a 32 character password printed on the drive label, to prove its physical access. This PSID reverttper function is very useful to regain the control over the drive when it is locked and the user can no longer access it because of some failures. However, *all the data on the drive is completely erased*. This method is advisable only when the user is exhausted of all other recovery methods. PSID capabilities are described in: https://trustedcomputinggroup.org/wp-content/uploads/TCG_Storage-Opal_Feature_Set_PSID_v1.00_r1.00.pdf Signed-off-by: Revanth Rajashekar Signed-off-by: Jens Axboe --- block/sed-opal.c | 33 +++++++++++++++++++++++++++++---- include/linux/sed-opal.h | 1 + include/uapi/linux/sed-opal.h | 1 + 3 files changed, 31 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/block/sed-opal.c b/block/sed-opal.c index a46e8d13e16d..bb8ef7963d11 100644 --- a/block/sed-opal.c +++ b/block/sed-opal.c @@ -1307,6 +1307,7 @@ static int start_generic_opal_session(struct opal_dev *dev, break; case OPAL_ADMIN1_UID: case OPAL_SID_UID: + case OPAL_PSID_UID: add_token_u8(&err, dev, OPAL_STARTNAME); add_token_u8(&err, dev, 0); /* HostChallenge */ add_token_bytestring(&err, dev, key, key_len); @@ -1367,6 +1368,16 @@ static int start_admin1LSP_opal_session(struct opal_dev *dev, void *data) key->key, key->key_len); } +static int start_PSID_opal_session(struct opal_dev *dev, void *data) +{ + const struct opal_key *okey = data; + + return start_generic_opal_session(dev, OPAL_PSID_UID, + OPAL_ADMINSP_UID, + okey->key, + okey->key_len); +} + static int start_auth_opal_session(struct opal_dev *dev, void *data) { struct opal_session_info *session = data; @@ -2030,17 +2041,28 @@ static int opal_add_user_to_lr(struct opal_dev *dev, return ret; } -static int opal_reverttper(struct opal_dev *dev, struct opal_key *opal) +static int opal_reverttper(struct opal_dev *dev, struct opal_key *opal, bool psid) { + /* controller will terminate session */ const struct opal_step revert_steps[] = { { start_SIDASP_opal_session, opal }, - { revert_tper, } /* controller will terminate session */ + { revert_tper, } }; + const struct opal_step psid_revert_steps[] = { + { start_PSID_opal_session, opal }, + { revert_tper, } + }; + int ret; mutex_lock(&dev->dev_lock); setup_opal_dev(dev); - ret = execute_steps(dev, revert_steps, ARRAY_SIZE(revert_steps)); + if (psid) + ret = execute_steps(dev, psid_revert_steps, + ARRAY_SIZE(psid_revert_steps)); + else + ret = execute_steps(dev, revert_steps, + ARRAY_SIZE(revert_steps)); mutex_unlock(&dev->dev_lock); /* @@ -2280,7 +2302,7 @@ int sed_ioctl(struct opal_dev *dev, unsigned int cmd, void __user *arg) ret = opal_activate_user(dev, p); break; case IOC_OPAL_REVERT_TPR: - ret = opal_reverttper(dev, p); + ret = opal_reverttper(dev, p, false); break; case IOC_OPAL_LR_SETUP: ret = opal_setup_locking_range(dev, p); @@ -2297,6 +2319,9 @@ int sed_ioctl(struct opal_dev *dev, unsigned int cmd, void __user *arg) case IOC_OPAL_SECURE_ERASE_LR: ret = opal_secure_erase_locking_range(dev, p); break; + case IOC_OPAL_PSID_REVERT_TPR: + ret = opal_reverttper(dev, p, true); + break; default: break; } diff --git a/include/linux/sed-opal.h b/include/linux/sed-opal.h index 3e76b6d7d97f..f03bbffd3281 100644 --- a/include/linux/sed-opal.h +++ b/include/linux/sed-opal.h @@ -39,6 +39,7 @@ static inline bool is_sed_ioctl(unsigned int cmd) case IOC_OPAL_ENABLE_DISABLE_MBR: case IOC_OPAL_ERASE_LR: case IOC_OPAL_SECURE_ERASE_LR: + case IOC_OPAL_PSID_REVERT_TPR: return true; } return false; diff --git a/include/uapi/linux/sed-opal.h b/include/uapi/linux/sed-opal.h index 33e53b80cd1f..7a03e5b4df6e 100644 --- a/include/uapi/linux/sed-opal.h +++ b/include/uapi/linux/sed-opal.h @@ -107,5 +107,6 @@ struct opal_mbr_data { #define IOC_OPAL_ENABLE_DISABLE_MBR _IOW('p', 229, struct opal_mbr_data) #define IOC_OPAL_ERASE_LR _IOW('p', 230, struct opal_session_info) #define IOC_OPAL_SECURE_ERASE_LR _IOW('p', 231, struct opal_session_info) +#define IOC_OPAL_PSID_REVERT_TPR _IOW('p', 232, struct opal_key) #endif /* _UAPI_SED_OPAL_H */ -- cgit v1.2.3-58-ga151 From b2d0d99135ad145667765cbd27f148c1a4cd50d1 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 26 Jun 2019 15:49:20 +0200 Subject: block: move the BIO_NO_PAGE_REF check into bio_release_pages Move the BIO_NO_PAGE_REF check into bio_release_pages instead of duplicating it in both callers. Also make the function available outside of bio.c so that we can reuse it in other direct I/O implementations. Reviewed-by: Minwoo Im Reviewed-by: Johannes Thumshirn Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- block/bio.c | 11 ++++++----- include/linux/bio.h | 1 + 2 files changed, 7 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/block/bio.c b/block/bio.c index bb55b94bb361..b35356c6093b 100644 --- a/block/bio.c +++ b/block/bio.c @@ -845,11 +845,14 @@ static void bio_get_pages(struct bio *bio) get_page(bvec->bv_page); } -static void bio_release_pages(struct bio *bio) +void bio_release_pages(struct bio *bio) { struct bvec_iter_all iter_all; struct bio_vec *bvec; + if (bio_flagged(bio, BIO_NO_PAGE_REF)) + return; + bio_for_each_segment_all(bvec, bio, iter_all) put_page(bvec->bv_page); } @@ -1681,8 +1684,7 @@ static void bio_dirty_fn(struct work_struct *work) next = bio->bi_private; bio_set_pages_dirty(bio); - if (!bio_flagged(bio, BIO_NO_PAGE_REF)) - bio_release_pages(bio); + bio_release_pages(bio); bio_put(bio); } } @@ -1698,8 +1700,7 @@ void bio_check_pages_dirty(struct bio *bio) goto defer; } - if (!bio_flagged(bio, BIO_NO_PAGE_REF)) - bio_release_pages(bio); + bio_release_pages(bio); bio_put(bio); return; defer: diff --git a/include/linux/bio.h b/include/linux/bio.h index 5a8ae56e09ff..6d82b4856282 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -426,6 +426,7 @@ bool __bio_try_merge_page(struct bio *bio, struct page *page, void __bio_add_page(struct bio *bio, struct page *page, unsigned int len, unsigned int off); int bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter); +void bio_release_pages(struct bio *bio); struct rq_map_data; extern struct bio *bio_map_user_iov(struct request_queue *, struct iov_iter *, gfp_t); -- cgit v1.2.3-58-ga151 From d241a95f3514a5eb544dfd8d9d141ffd1c89b707 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 26 Jun 2019 15:49:21 +0200 Subject: block: optionally mark pages dirty in bio_release_pages A lot of callers of bio_release_pages also want to mark the released pages as dirty. Add a mark_dirty parameter to avoid a second relatively expensive bio_for_each_segment_all loop. Reviewed-by: Minwoo Im Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- block/bio.c | 12 +++++++----- include/linux/bio.h | 2 +- 2 files changed, 8 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/block/bio.c b/block/bio.c index b35356c6093b..8a7b315630ce 100644 --- a/block/bio.c +++ b/block/bio.c @@ -845,7 +845,7 @@ static void bio_get_pages(struct bio *bio) get_page(bvec->bv_page); } -void bio_release_pages(struct bio *bio) +void bio_release_pages(struct bio *bio, bool mark_dirty) { struct bvec_iter_all iter_all; struct bio_vec *bvec; @@ -853,8 +853,11 @@ void bio_release_pages(struct bio *bio) if (bio_flagged(bio, BIO_NO_PAGE_REF)) return; - bio_for_each_segment_all(bvec, bio, iter_all) + bio_for_each_segment_all(bvec, bio, iter_all) { + if (mark_dirty && !PageCompound(bvec->bv_page)) + set_page_dirty_lock(bvec->bv_page); put_page(bvec->bv_page); + } } static int __bio_iov_bvec_add_pages(struct bio *bio, struct iov_iter *iter) @@ -1683,8 +1686,7 @@ static void bio_dirty_fn(struct work_struct *work) while ((bio = next) != NULL) { next = bio->bi_private; - bio_set_pages_dirty(bio); - bio_release_pages(bio); + bio_release_pages(bio, true); bio_put(bio); } } @@ -1700,7 +1702,7 @@ void bio_check_pages_dirty(struct bio *bio) goto defer; } - bio_release_pages(bio); + bio_release_pages(bio, false); bio_put(bio); return; defer: diff --git a/include/linux/bio.h b/include/linux/bio.h index 6d82b4856282..2d8c73f0ecaf 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -426,7 +426,7 @@ bool __bio_try_merge_page(struct bio *bio, struct page *page, void __bio_add_page(struct bio *bio, struct page *page, unsigned int len, unsigned int off); int bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter); -void bio_release_pages(struct bio *bio); +void bio_release_pages(struct bio *bio, bool mark_dirty); struct rq_map_data; extern struct bio *bio_map_user_iov(struct request_queue *, struct iov_iter *, gfp_t); -- cgit v1.2.3-58-ga151 From b620743077e291ae7d0debd21f50413a8c266229 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 26 Jun 2019 15:49:28 +0200 Subject: block: never take page references for ITER_BVEC If we pass pages through an iov_iter we always already have a reference in the caller. Thus remove the ITER_BVEC_FLAG_NO_REF and don't take reference to pages by default for bvec backed iov_iters. Reviewed-by: Minwoo Im Reviewed-by: Johannes Thumshirn Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- block/bio.c | 14 +------------- drivers/block/loop.c | 16 ++++------------ fs/io_uring.c | 3 --- include/linux/uio.h | 10 +--------- 4 files changed, 6 insertions(+), 37 deletions(-) (limited to 'include') diff --git a/block/bio.c b/block/bio.c index 1cbf2a7c245e..5733b9426231 100644 --- a/block/bio.c +++ b/block/bio.c @@ -836,15 +836,6 @@ int bio_add_page(struct bio *bio, struct page *page, } EXPORT_SYMBOL(bio_add_page); -static void bio_get_pages(struct bio *bio) -{ - struct bvec_iter_all iter_all; - struct bio_vec *bvec; - - bio_for_each_segment_all(bvec, bio, iter_all) - get_page(bvec->bv_page); -} - void bio_release_pages(struct bio *bio, bool mark_dirty) { struct bvec_iter_all iter_all; @@ -960,11 +951,8 @@ int bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter) ret = __bio_iov_iter_get_pages(bio, iter); } while (!ret && iov_iter_count(iter) && !bio_full(bio)); - if (iov_iter_bvec_no_ref(iter)) + if (is_bvec) bio_set_flag(bio, BIO_NO_PAGE_REF); - else if (is_bvec) - bio_get_pages(bio); - return bio->bi_vcnt ? 0 : ret; } diff --git a/drivers/block/loop.c b/drivers/block/loop.c index f11b7dc16e9d..44c9985f352a 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -264,20 +264,12 @@ lo_do_transfer(struct loop_device *lo, int cmd, return ret; } -static inline void loop_iov_iter_bvec(struct iov_iter *i, - unsigned int direction, const struct bio_vec *bvec, - unsigned long nr_segs, size_t count) -{ - iov_iter_bvec(i, direction, bvec, nr_segs, count); - i->type |= ITER_BVEC_FLAG_NO_REF; -} - static int lo_write_bvec(struct file *file, struct bio_vec *bvec, loff_t *ppos) { struct iov_iter i; ssize_t bw; - loop_iov_iter_bvec(&i, WRITE, bvec, 1, bvec->bv_len); + iov_iter_bvec(&i, WRITE, bvec, 1, bvec->bv_len); file_start_write(file); bw = vfs_iter_write(file, &i, ppos, 0); @@ -355,7 +347,7 @@ static int lo_read_simple(struct loop_device *lo, struct request *rq, ssize_t len; rq_for_each_segment(bvec, rq, iter) { - loop_iov_iter_bvec(&i, READ, &bvec, 1, bvec.bv_len); + iov_iter_bvec(&i, READ, &bvec, 1, bvec.bv_len); len = vfs_iter_read(lo->lo_backing_file, &i, &pos, 0); if (len < 0) return len; @@ -396,7 +388,7 @@ static int lo_read_transfer(struct loop_device *lo, struct request *rq, b.bv_offset = 0; b.bv_len = bvec.bv_len; - loop_iov_iter_bvec(&i, READ, &b, 1, b.bv_len); + iov_iter_bvec(&i, READ, &b, 1, b.bv_len); len = vfs_iter_read(lo->lo_backing_file, &i, &pos, 0); if (len < 0) { ret = len; @@ -563,7 +555,7 @@ static int lo_rw_aio(struct loop_device *lo, struct loop_cmd *cmd, } atomic_set(&cmd->ref, 2); - loop_iov_iter_bvec(&iter, rw, bvec, nr_bvec, blk_rq_bytes(rq)); + iov_iter_bvec(&iter, rw, bvec, nr_bvec, blk_rq_bytes(rq)); iter.iov_offset = offset; cmd->iocb.ki_pos = pos; diff --git a/fs/io_uring.c b/fs/io_uring.c index 86a2bd721900..eb6ab1507913 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -997,9 +997,6 @@ static int io_import_fixed(struct io_ring_ctx *ctx, int rw, iov_iter_bvec(iter, rw, imu->bvec, imu->nr_bvecs, offset + len); if (offset) iov_iter_advance(iter, offset); - - /* don't drop a reference to these pages */ - iter->type |= ITER_BVEC_FLAG_NO_REF; return 0; } diff --git a/include/linux/uio.h b/include/linux/uio.h index 2c90a0842ee8..cea1761c5672 100644 --- a/include/linux/uio.h +++ b/include/linux/uio.h @@ -19,9 +19,6 @@ struct kvec { }; enum iter_type { - /* set if ITER_BVEC doesn't hold a bv_page ref */ - ITER_BVEC_FLAG_NO_REF = 2, - /* iter types */ ITER_IOVEC = 4, ITER_KVEC = 8, @@ -56,7 +53,7 @@ struct iov_iter { static inline enum iter_type iov_iter_type(const struct iov_iter *i) { - return i->type & ~(READ | WRITE | ITER_BVEC_FLAG_NO_REF); + return i->type & ~(READ | WRITE); } static inline bool iter_is_iovec(const struct iov_iter *i) @@ -89,11 +86,6 @@ static inline unsigned char iov_iter_rw(const struct iov_iter *i) return i->type & (READ | WRITE); } -static inline bool iov_iter_bvec_no_ref(const struct iov_iter *i) -{ - return (i->type & ITER_BVEC_FLAG_NO_REF) != 0; -} - /* * Total number of bytes covered by an iovec. * -- cgit v1.2.3-58-ga151 From c9888443413e4e06013e482fc484dbb9c559c145 Mon Sep 17 00:00:00 2001 From: Jonas Rabenstein Date: Tue, 21 May 2019 22:46:44 +0200 Subject: block: sed-opal: add ioctl for done-mark of shadow mbr Enable users to mark the shadow mbr as done without completely deactivating the shadow mbr feature. This may be useful on reboots, when the power to the disk is not disconnected in between and the shadow mbr stores the required boot files. Of course, this saves also the (few) commands required to enable the feature if it is already enabled and one only wants to mark the shadow mbr as done. Co-authored-by: David Kozub Signed-off-by: Jonas Rabenstein Signed-off-by: David Kozub Reviewed-by: Christoph Hellwig Reviewed by: Scott Bauer Reviewed-by: Jon Derrick Signed-off-by: Jens Axboe --- block/sed-opal.c | 27 +++++++++++++++++++++++++++ include/linux/sed-opal.h | 1 + include/uapi/linux/sed-opal.h | 12 ++++++++++++ 3 files changed, 40 insertions(+) (limited to 'include') diff --git a/block/sed-opal.c b/block/sed-opal.c index c54019c11e91..f94f359dd688 100644 --- a/block/sed-opal.c +++ b/block/sed-opal.c @@ -1989,6 +1989,30 @@ static int opal_enable_disable_shadow_mbr(struct opal_dev *dev, return ret; } +static int opal_set_mbr_done(struct opal_dev *dev, + struct opal_mbr_done *mbr_done) +{ + u8 mbr_done_tf = mbr_done->done_flag == OPAL_MBR_DONE ? + OPAL_TRUE : OPAL_FALSE; + + const struct opal_step mbr_steps[] = { + { start_admin1LSP_opal_session, &mbr_done->key }, + { set_mbr_done, &mbr_done_tf }, + { end_opal_session, } + }; + int ret; + + if (mbr_done->done_flag != OPAL_MBR_DONE && + mbr_done->done_flag != OPAL_MBR_NOT_DONE) + return -EINVAL; + + mutex_lock(&dev->dev_lock); + setup_opal_dev(dev); + ret = execute_steps(dev, mbr_steps, ARRAY_SIZE(mbr_steps)); + mutex_unlock(&dev->dev_lock); + return ret; +} + static int opal_save(struct opal_dev *dev, struct opal_lock_unlock *lk_unlk) { struct opal_suspend_data *suspend; @@ -2310,6 +2334,9 @@ int sed_ioctl(struct opal_dev *dev, unsigned int cmd, void __user *arg) case IOC_OPAL_ENABLE_DISABLE_MBR: ret = opal_enable_disable_shadow_mbr(dev, p); break; + case IOC_OPAL_MBR_DONE: + ret = opal_set_mbr_done(dev, p); + break; case IOC_OPAL_ERASE_LR: ret = opal_erase_locking_range(dev, p); break; diff --git a/include/linux/sed-opal.h b/include/linux/sed-opal.h index f03bbffd3281..f834e8a1495f 100644 --- a/include/linux/sed-opal.h +++ b/include/linux/sed-opal.h @@ -40,6 +40,7 @@ static inline bool is_sed_ioctl(unsigned int cmd) case IOC_OPAL_ERASE_LR: case IOC_OPAL_SECURE_ERASE_LR: case IOC_OPAL_PSID_REVERT_TPR: + case IOC_OPAL_MBR_DONE: return true; } return false; diff --git a/include/uapi/linux/sed-opal.h b/include/uapi/linux/sed-opal.h index 7a03e5b4df6e..5681f55d334b 100644 --- a/include/uapi/linux/sed-opal.h +++ b/include/uapi/linux/sed-opal.h @@ -20,6 +20,11 @@ enum opal_mbr { OPAL_MBR_DISABLE = 0x01, }; +enum opal_mbr_done_flag { + OPAL_MBR_NOT_DONE = 0x0, + OPAL_MBR_DONE = 0x01 +}; + enum opal_user { OPAL_ADMIN1 = 0x0, OPAL_USER1 = 0x01, @@ -95,6 +100,12 @@ struct opal_mbr_data { __u8 __align[7]; }; +struct opal_mbr_done { + struct opal_key key; + __u8 done_flag; + __u8 __align[7]; +}; + #define IOC_OPAL_SAVE _IOW('p', 220, struct opal_lock_unlock) #define IOC_OPAL_LOCK_UNLOCK _IOW('p', 221, struct opal_lock_unlock) #define IOC_OPAL_TAKE_OWNERSHIP _IOW('p', 222, struct opal_key) @@ -108,5 +119,6 @@ struct opal_mbr_data { #define IOC_OPAL_ERASE_LR _IOW('p', 230, struct opal_session_info) #define IOC_OPAL_SECURE_ERASE_LR _IOW('p', 231, struct opal_session_info) #define IOC_OPAL_PSID_REVERT_TPR _IOW('p', 232, struct opal_key) +#define IOC_OPAL_MBR_DONE _IOW('p', 233, struct opal_mbr_done) #endif /* _UAPI_SED_OPAL_H */ -- cgit v1.2.3-58-ga151 From a9b25b4cf2b76d320afc999f881ccb805fecdd84 Mon Sep 17 00:00:00 2001 From: Jonas Rabenstein Date: Tue, 21 May 2019 22:46:45 +0200 Subject: block: sed-opal: ioctl for writing to shadow mbr Allow modification of the shadow mbr. If the shadow mbr is not marked as done, this data will be presented read only as the device content. Only after marking the shadow mbr as done and unlocking a locking range the actual content is accessible. Co-authored-by: David Kozub Signed-off-by: Jonas Rabenstein Signed-off-by: David Kozub Reviewed-by: Scott Bauer Reviewed-by: Jon Derrick Signed-off-by: Jens Axboe --- block/sed-opal.c | 91 ++++++++++++++++++++++++++++++++++++++++++- include/linux/sed-opal.h | 1 + include/uapi/linux/sed-opal.h | 8 ++++ 3 files changed, 98 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/block/sed-opal.c b/block/sed-opal.c index f94f359dd688..b02ef2ff0d75 100644 --- a/block/sed-opal.c +++ b/block/sed-opal.c @@ -26,6 +26,9 @@ #define IO_BUFFER_LENGTH 2048 #define MAX_TOKS 64 +/* Number of bytes needed by cmd_finalize. */ +#define CMD_FINALIZE_BYTES_NEEDED 7 + struct opal_step { int (*fn)(struct opal_dev *dev, void *data); void *data; @@ -523,12 +526,17 @@ static int opal_discovery0_step(struct opal_dev *dev) return execute_step(dev, &discovery0_step, 0); } +static size_t remaining_size(struct opal_dev *cmd) +{ + return IO_BUFFER_LENGTH - cmd->pos; +} + static bool can_add(int *err, struct opal_dev *cmd, size_t len) { if (*err) return false; - if (len > IO_BUFFER_LENGTH || cmd->pos > IO_BUFFER_LENGTH - len) { + if (remaining_size(cmd) < len) { pr_debug("Error adding %zu bytes: end of buffer.\n", len); *err = -ERANGE; return false; @@ -674,7 +682,11 @@ static int cmd_finalize(struct opal_dev *cmd, u32 hsn, u32 tsn) struct opal_header *hdr; int err = 0; - /* close the parameter list opened from cmd_start */ + /* + * Close the parameter list opened from cmd_start. + * The number of bytes added must be equal to + * CMD_FINALIZE_BYTES_NEEDED. + */ add_token_u8(&err, cmd, OPAL_ENDLIST); add_token_u8(&err, cmd, OPAL_ENDOFDATA); @@ -1536,6 +1548,58 @@ static int set_mbr_enable_disable(struct opal_dev *dev, void *data) return finalize_and_send(dev, parse_and_check_status); } +static int write_shadow_mbr(struct opal_dev *dev, void *data) +{ + struct opal_shadow_mbr *shadow = data; + const u8 __user *src; + u8 *dst; + size_t off = 0; + u64 len; + int err = 0; + + /* do the actual transmission(s) */ + src = (u8 __user *)(uintptr_t)shadow->data; + while (off < shadow->size) { + err = cmd_start(dev, opaluid[OPAL_MBR], opalmethod[OPAL_SET]); + add_token_u8(&err, dev, OPAL_STARTNAME); + add_token_u8(&err, dev, OPAL_WHERE); + add_token_u64(&err, dev, shadow->offset + off); + add_token_u8(&err, dev, OPAL_ENDNAME); + + add_token_u8(&err, dev, OPAL_STARTNAME); + add_token_u8(&err, dev, OPAL_VALUES); + + /* + * The bytestring header is either 1 or 2 bytes, so assume 2. + * There also needs to be enough space to accommodate the + * trailing OPAL_ENDNAME (1 byte) and tokens added by + * cmd_finalize. + */ + len = min(remaining_size(dev) - (2+1+CMD_FINALIZE_BYTES_NEEDED), + (size_t)(shadow->size - off)); + pr_debug("MBR: write bytes %zu+%llu/%llu\n", + off, len, shadow->size); + + dst = add_bytestring_header(&err, dev, len); + if (!dst) + break; + if (copy_from_user(dst, src + off, len)) + err = -EFAULT; + dev->pos += len; + + add_token_u8(&err, dev, OPAL_ENDNAME); + if (err) + break; + + err = finalize_and_send(dev, parse_and_check_status); + if (err) + break; + + off += len; + } + return err; +} + static int generic_pw_cmd(u8 *key, size_t key_len, u8 *cpin_uid, struct opal_dev *dev) { @@ -2013,6 +2077,26 @@ static int opal_set_mbr_done(struct opal_dev *dev, return ret; } +static int opal_write_shadow_mbr(struct opal_dev *dev, + struct opal_shadow_mbr *info) +{ + const struct opal_step mbr_steps[] = { + { start_admin1LSP_opal_session, &info->key }, + { write_shadow_mbr, info }, + { end_opal_session, } + }; + int ret; + + if (info->size == 0) + return 0; + + mutex_lock(&dev->dev_lock); + setup_opal_dev(dev); + ret = execute_steps(dev, mbr_steps, ARRAY_SIZE(mbr_steps)); + mutex_unlock(&dev->dev_lock); + return ret; +} + static int opal_save(struct opal_dev *dev, struct opal_lock_unlock *lk_unlk) { struct opal_suspend_data *suspend; @@ -2337,6 +2421,9 @@ int sed_ioctl(struct opal_dev *dev, unsigned int cmd, void __user *arg) case IOC_OPAL_MBR_DONE: ret = opal_set_mbr_done(dev, p); break; + case IOC_OPAL_WRITE_SHADOW_MBR: + ret = opal_write_shadow_mbr(dev, p); + break; case IOC_OPAL_ERASE_LR: ret = opal_erase_locking_range(dev, p); break; diff --git a/include/linux/sed-opal.h b/include/linux/sed-opal.h index f834e8a1495f..53c28d750a45 100644 --- a/include/linux/sed-opal.h +++ b/include/linux/sed-opal.h @@ -41,6 +41,7 @@ static inline bool is_sed_ioctl(unsigned int cmd) case IOC_OPAL_SECURE_ERASE_LR: case IOC_OPAL_PSID_REVERT_TPR: case IOC_OPAL_MBR_DONE: + case IOC_OPAL_WRITE_SHADOW_MBR: return true; } return false; diff --git a/include/uapi/linux/sed-opal.h b/include/uapi/linux/sed-opal.h index 5681f55d334b..c6d035fa1b6c 100644 --- a/include/uapi/linux/sed-opal.h +++ b/include/uapi/linux/sed-opal.h @@ -106,6 +106,13 @@ struct opal_mbr_done { __u8 __align[7]; }; +struct opal_shadow_mbr { + struct opal_key key; + const __u64 data; + __u64 offset; + __u64 size; +}; + #define IOC_OPAL_SAVE _IOW('p', 220, struct opal_lock_unlock) #define IOC_OPAL_LOCK_UNLOCK _IOW('p', 221, struct opal_lock_unlock) #define IOC_OPAL_TAKE_OWNERSHIP _IOW('p', 222, struct opal_key) @@ -120,5 +127,6 @@ struct opal_mbr_done { #define IOC_OPAL_SECURE_ERASE_LR _IOW('p', 231, struct opal_session_info) #define IOC_OPAL_PSID_REVERT_TPR _IOW('p', 232, struct opal_key) #define IOC_OPAL_MBR_DONE _IOW('p', 233, struct opal_mbr_done) +#define IOC_OPAL_WRITE_SHADOW_MBR _IOW('p', 234, struct opal_shadow_mbr) #endif /* _UAPI_SED_OPAL_H */ -- cgit v1.2.3-58-ga151 From 79d08f89bb1b5c2c1ff90d9bb95497ab9e8aa7e0 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Mon, 1 Jul 2019 15:14:46 +0800 Subject: block: fix .bi_size overflow 'bio->bi_iter.bi_size' is 'unsigned int', which at most hold 4G - 1 bytes. Before 07173c3ec276 ("block: enable multipage bvecs"), one bio can include very limited pages, and usually at most 256, so the fs bio size won't be bigger than 1M bytes most of times. Since we support multi-page bvec, in theory one fs bio really can be added > 1M pages, especially in case of hugepage, or big writeback with too many dirty pages. Then there is chance in which .bi_size is overflowed. Fixes this issue by using bio_full() to check if the added segment may overflow .bi_size. Cc: Liu Yiding Cc: kernel test robot Cc: "Darrick J. Wong" Cc: linux-xfs@vger.kernel.org Cc: linux-fsdevel@vger.kernel.org Cc: stable@vger.kernel.org Fixes: 07173c3ec276 ("block: enable multipage bvecs") Reviewed-by: Christoph Hellwig Signed-off-by: Ming Lei Signed-off-by: Jens Axboe --- block/bio.c | 10 +++++----- fs/iomap.c | 2 +- fs/xfs/xfs_aops.c | 2 +- include/linux/bio.h | 18 ++++++++++++++++-- 4 files changed, 23 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/block/bio.c b/block/bio.c index 933c1e36643b..29cd6cf4da51 100644 --- a/block/bio.c +++ b/block/bio.c @@ -723,7 +723,7 @@ static int __bio_add_pc_page(struct request_queue *q, struct bio *bio, } } - if (bio_full(bio)) + if (bio_full(bio, len)) return 0; if (bio->bi_vcnt >= queue_max_segments(q)) @@ -797,7 +797,7 @@ void __bio_add_page(struct bio *bio, struct page *page, struct bio_vec *bv = &bio->bi_io_vec[bio->bi_vcnt]; WARN_ON_ONCE(bio_flagged(bio, BIO_CLONED)); - WARN_ON_ONCE(bio_full(bio)); + WARN_ON_ONCE(bio_full(bio, len)); bv->bv_page = page; bv->bv_offset = off; @@ -824,7 +824,7 @@ int bio_add_page(struct bio *bio, struct page *page, bool same_page = false; if (!__bio_try_merge_page(bio, page, len, offset, &same_page)) { - if (bio_full(bio)) + if (bio_full(bio, len)) return 0; __bio_add_page(bio, page, len, offset); } @@ -909,7 +909,7 @@ static int __bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter) if (same_page) put_page(page); } else { - if (WARN_ON_ONCE(bio_full(bio))) + if (WARN_ON_ONCE(bio_full(bio, len))) return -EINVAL; __bio_add_page(bio, page, len, offset); } @@ -953,7 +953,7 @@ int bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter) ret = __bio_iov_bvec_add_pages(bio, iter); else ret = __bio_iov_iter_get_pages(bio, iter); - } while (!ret && iov_iter_count(iter) && !bio_full(bio)); + } while (!ret && iov_iter_count(iter) && !bio_full(bio, 0)); if (is_bvec) bio_set_flag(bio, BIO_NO_PAGE_REF); diff --git a/fs/iomap.c b/fs/iomap.c index 4f94788db43b..7a147aa0c4d9 100644 --- a/fs/iomap.c +++ b/fs/iomap.c @@ -333,7 +333,7 @@ iomap_readpage_actor(struct inode *inode, loff_t pos, loff_t length, void *data, if (iop) atomic_inc(&iop->read_count); - if (!ctx->bio || !is_contig || bio_full(ctx->bio)) { + if (!ctx->bio || !is_contig || bio_full(ctx->bio, plen)) { gfp_t gfp = mapping_gfp_constraint(page->mapping, GFP_KERNEL); int nr_vecs = (length + PAGE_SIZE - 1) >> PAGE_SHIFT; diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index 8da5e6637771..11f703d4a605 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c @@ -782,7 +782,7 @@ xfs_add_to_ioend( atomic_inc(&iop->write_count); if (!merged) { - if (bio_full(wpc->ioend->io_bio)) + if (bio_full(wpc->ioend->io_bio, len)) xfs_chain_bio(wpc->ioend, wbc, bdev, sector); bio_add_page(wpc->ioend->io_bio, page, len, poff); } diff --git a/include/linux/bio.h b/include/linux/bio.h index dc630b05e6e5..3cdb84cdc488 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -102,9 +102,23 @@ static inline void *bio_data(struct bio *bio) return NULL; } -static inline bool bio_full(struct bio *bio) +/** + * bio_full - check if the bio is full + * @bio: bio to check + * @len: length of one segment to be added + * + * Return true if @bio is full and one segment with @len bytes can't be + * added to the bio, otherwise return false + */ +static inline bool bio_full(struct bio *bio, unsigned len) { - return bio->bi_vcnt >= bio->bi_max_vecs; + if (bio->bi_vcnt >= bio->bi_max_vecs) + return true; + + if (bio->bi_iter.bi_size > UINT_MAX - len) + return true; + + return false; } static inline bool bio_next_segment(const struct bio *bio, -- cgit v1.2.3-58-ga151