From 4246a0b63bd8f56a1469b12eafeb875b1041a451 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 20 Jul 2015 15:29:37 +0200
Subject: block: add a bi_error field to struct bio

Currently we have two different ways to signal an I/O error on a BIO:

 (1) by clearing the BIO_UPTODATE flag
 (2) by returning a Linux errno value to the bi_end_io callback

The first one has the drawback of only communicating a single possible
error (-EIO), and the second one has the drawback of not beeing persistent
when bios are queued up, and are not passed along from child to parent
bio in the ever more popular chaining scenario.  Having both mechanisms
available has the additional drawback of utterly confusing driver authors
and introducing bugs where various I/O submitters only deal with one of
them, and the others have to add boilerplate code to deal with both kinds
of error returns.

So add a new bi_error field to store an errno value directly in struct
bio and remove the existing mechanisms to clean all this up.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Reviewed-by: NeilBrown <neilb@suse.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/bio.c | 43 ++++++++++++++++++-------------------------
 1 file changed, 18 insertions(+), 25 deletions(-)

(limited to 'block/bio.c')

diff --git a/block/bio.c b/block/bio.c
index 2a00d349cd68..a23f489f398f 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -269,7 +269,6 @@ static void bio_free(struct bio *bio)
 void bio_init(struct bio *bio)
 {
 	memset(bio, 0, sizeof(*bio));
-	bio->bi_flags = 1 << BIO_UPTODATE;
 	atomic_set(&bio->__bi_remaining, 1);
 	atomic_set(&bio->__bi_cnt, 1);
 }
@@ -292,14 +291,17 @@ void bio_reset(struct bio *bio)
 	__bio_free(bio);
 
 	memset(bio, 0, BIO_RESET_BYTES);
-	bio->bi_flags = flags | (1 << BIO_UPTODATE);
+	bio->bi_flags = flags;
 	atomic_set(&bio->__bi_remaining, 1);
 }
 EXPORT_SYMBOL(bio_reset);
 
-static void bio_chain_endio(struct bio *bio, int error)
+static void bio_chain_endio(struct bio *bio)
 {
-	bio_endio(bio->bi_private, error);
+	struct bio *parent = bio->bi_private;
+
+	parent->bi_error = bio->bi_error;
+	bio_endio(parent);
 	bio_put(bio);
 }
 
@@ -896,11 +898,11 @@ struct submit_bio_ret {
 	int error;
 };
 
-static void submit_bio_wait_endio(struct bio *bio, int error)
+static void submit_bio_wait_endio(struct bio *bio)
 {
 	struct submit_bio_ret *ret = bio->bi_private;
 
-	ret->error = error;
+	ret->error = bio->bi_error;
 	complete(&ret->event);
 }
 
@@ -1445,7 +1447,7 @@ void bio_unmap_user(struct bio *bio)
 }
 EXPORT_SYMBOL(bio_unmap_user);
 
-static void bio_map_kern_endio(struct bio *bio, int err)
+static void bio_map_kern_endio(struct bio *bio)
 {
 	bio_put(bio);
 }
@@ -1501,13 +1503,13 @@ struct bio *bio_map_kern(struct request_queue *q, void *data, unsigned int len,
 }
 EXPORT_SYMBOL(bio_map_kern);
 
-static void bio_copy_kern_endio(struct bio *bio, int err)
+static void bio_copy_kern_endio(struct bio *bio)
 {
 	bio_free_pages(bio);
 	bio_put(bio);
 }
 
-static void bio_copy_kern_endio_read(struct bio *bio, int err)
+static void bio_copy_kern_endio_read(struct bio *bio)
 {
 	char *p = bio->bi_private;
 	struct bio_vec *bvec;
@@ -1518,7 +1520,7 @@ static void bio_copy_kern_endio_read(struct bio *bio, int err)
 		p += bvec->bv_len;
 	}
 
-	bio_copy_kern_endio(bio, err);
+	bio_copy_kern_endio(bio);
 }
 
 /**
@@ -1778,25 +1780,15 @@ static inline bool bio_remaining_done(struct bio *bio)
 /**
  * bio_endio - end I/O on a bio
  * @bio:	bio
- * @error:	error, if any
  *
  * Description:
- *   bio_endio() will end I/O on the whole bio. bio_endio() is the
- *   preferred way to end I/O on a bio, it takes care of clearing
- *   BIO_UPTODATE on error. @error is 0 on success, and and one of the
- *   established -Exxxx (-EIO, for instance) error values in case
- *   something went wrong. No one should call bi_end_io() directly on a
- *   bio unless they own it and thus know that it has an end_io
- *   function.
+ *   bio_endio() will end I/O on the whole bio. bio_endio() is the preferred
+ *   way to end I/O on a bio. No one should call bi_end_io() directly on a
+ *   bio unless they own it and thus know that it has an end_io function.
  **/
-void bio_endio(struct bio *bio, int error)
+void bio_endio(struct bio *bio)
 {
 	while (bio) {
-		if (error)
-			clear_bit(BIO_UPTODATE, &bio->bi_flags);
-		else if (!test_bit(BIO_UPTODATE, &bio->bi_flags))
-			error = -EIO;
-
 		if (unlikely(!bio_remaining_done(bio)))
 			break;
 
@@ -1810,11 +1802,12 @@ void bio_endio(struct bio *bio, int error)
 		 */
 		if (bio->bi_end_io == bio_chain_endio) {
 			struct bio *parent = bio->bi_private;
+			parent->bi_error = bio->bi_error;
 			bio_put(bio);
 			bio = parent;
 		} else {
 			if (bio->bi_end_io)
-				bio->bi_end_io(bio, error);
+				bio->bi_end_io(bio);
 			bio = NULL;
 		}
 	}
-- 
cgit v1.2.3-58-ga151


From b7c44ed9d2fc6b461378c65eaf144ccc80a47772 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@fb.com>
Date: Fri, 24 Jul 2015 12:37:59 -0600
Subject: block: manipulate bio->bi_flags through helpers

Some places use helpers now, others don't. We only have the 'is set'
helper, add helpers for setting and clearing flags too.

It was a bit of a mess of atomic vs non-atomic access. With
BIO_UPTODATE gone, we don't have any risk of concurrent access to the
flags. So relax the restriction and don't make any of them atomic. The
flags that do have serialization issues (reffed and chained), we
already handle those separately.

Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/bio.c               | 14 +++++++-------
 block/blk-core.c          |  2 +-
 block/blk-map.c           |  2 +-
 block/blk-merge.c         |  2 +-
 block/bounce.c            |  2 +-
 drivers/md/raid1.c        |  4 ++--
 drivers/md/raid10.c       |  6 +++---
 drivers/md/raid5.c        |  2 +-
 fs/buffer.c               |  2 +-
 include/linux/bio.h       | 15 +++++++++++++++
 include/linux/blk_types.h |  2 --
 11 files changed, 33 insertions(+), 20 deletions(-)

(limited to 'block/bio.c')

diff --git a/block/bio.c b/block/bio.c
index a23f489f398f..911ae8f82752 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -311,7 +311,7 @@ static void bio_chain_endio(struct bio *bio)
  */
 static inline void bio_inc_remaining(struct bio *bio)
 {
-	bio->bi_flags |= (1 << BIO_CHAIN);
+	bio_set_flag(bio, BIO_CHAIN);
 	smp_mb__before_atomic();
 	atomic_inc(&bio->__bi_remaining);
 }
@@ -495,7 +495,7 @@ struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs)
 		if (unlikely(!bvl))
 			goto err_free;
 
-		bio->bi_flags |= 1 << BIO_OWNS_VEC;
+		bio_set_flag(bio, BIO_OWNS_VEC);
 	} else if (nr_iovecs) {
 		bvl = bio->bi_inline_vecs;
 	}
@@ -580,7 +580,7 @@ void __bio_clone_fast(struct bio *bio, struct bio *bio_src)
 	 * so we don't set nor calculate new physical/hw segment counts here
 	 */
 	bio->bi_bdev = bio_src->bi_bdev;
-	bio->bi_flags |= 1 << BIO_CLONED;
+	bio_set_flag(bio, BIO_CLONED);
 	bio->bi_rw = bio_src->bi_rw;
 	bio->bi_iter = bio_src->bi_iter;
 	bio->bi_io_vec = bio_src->bi_io_vec;
@@ -829,7 +829,7 @@ static int __bio_add_page(struct request_queue *q, struct bio *bio, struct page
 
 	/* If we may be able to merge these biovecs, force a recount */
 	if (bio->bi_vcnt > 1 && (BIOVEC_PHYS_MERGEABLE(bvec-1, bvec)))
-		bio->bi_flags &= ~(1 << BIO_SEG_VALID);
+		bio_clear_flag(bio, BIO_SEG_VALID);
 
  done:
 	return len;
@@ -1390,7 +1390,7 @@ struct bio *bio_map_user_iov(struct request_queue *q,
 	if (iter->type & WRITE)
 		bio->bi_rw |= REQ_WRITE;
 
-	bio->bi_flags |= (1 << BIO_USER_MAPPED);
+	bio_set_flag(bio, BIO_USER_MAPPED);
 
 	/*
 	 * subtle -- if __bio_map_user() ended up bouncing a bio,
@@ -1770,7 +1770,7 @@ static inline bool bio_remaining_done(struct bio *bio)
 	BUG_ON(atomic_read(&bio->__bi_remaining) <= 0);
 
 	if (atomic_dec_and_test(&bio->__bi_remaining)) {
-		clear_bit(BIO_CHAIN, &bio->bi_flags);
+		bio_clear_flag(bio, BIO_CHAIN);
 		return true;
 	}
 
@@ -1866,7 +1866,7 @@ void bio_trim(struct bio *bio, int offset, int size)
 	if (offset == 0 && size == bio->bi_iter.bi_size)
 		return;
 
-	clear_bit(BIO_SEG_VALID, &bio->bi_flags);
+	bio_clear_flag(bio, BIO_SEG_VALID);
 
 	bio_advance(bio, offset << 9);
 
diff --git a/block/blk-core.c b/block/blk-core.c
index 7ef15b947b91..d1796b54e97a 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -146,7 +146,7 @@ static void req_bio_endio(struct request *rq, struct bio *bio,
 		bio->bi_error = error;
 
 	if (unlikely(rq->cmd_flags & REQ_QUIET))
-		set_bit(BIO_QUIET, &bio->bi_flags);
+		bio_set_flag(bio, BIO_QUIET);
 
 	bio_advance(bio, nbytes);
 
diff --git a/block/blk-map.c b/block/blk-map.c
index 5fe1c30bfba7..233841644c9d 100644
--- a/block/blk-map.c
+++ b/block/blk-map.c
@@ -94,7 +94,7 @@ int blk_rq_map_user_iov(struct request_queue *q, struct request *rq,
 		return PTR_ERR(bio);
 
 	if (map_data && map_data->null_mapped)
-		bio->bi_flags |= (1 << BIO_NULL_MAPPED);
+		bio_set_flag(bio, BIO_NULL_MAPPED);
 
 	if (bio->bi_iter.bi_size != iter->count) {
 		/*
diff --git a/block/blk-merge.c b/block/blk-merge.c
index 30a0d9f89017..a455b9860143 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -116,7 +116,7 @@ void blk_recount_segments(struct request_queue *q, struct bio *bio)
 		bio->bi_next = nxt;
 	}
 
-	bio->bi_flags |= (1 << BIO_SEG_VALID);
+	bio_set_flag(bio, BIO_SEG_VALID);
 }
 EXPORT_SYMBOL(blk_recount_segments);
 
diff --git a/block/bounce.c b/block/bounce.c
index f4db245b9f3a..2c310ea007ee 100644
--- a/block/bounce.c
+++ b/block/bounce.c
@@ -186,7 +186,7 @@ static int must_snapshot_stable_pages(struct request_queue *q, struct bio *bio)
 	if (!bdi_cap_stable_pages_required(&q->backing_dev_info))
 		return 0;
 
-	return test_bit(BIO_SNAP_STABLE, &bio->bi_flags);
+	return bio_flagged(bio, BIO_SNAP_STABLE);
 }
 #else
 static int must_snapshot_stable_pages(struct request_queue *q, struct bio *bio)
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index 9aa7d1fb2bc1..60d0a8626e63 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -1157,7 +1157,7 @@ static void make_request(struct mddev *mddev, struct bio * bio)
 	 * non-zero, then it is the number of not-completed requests.
 	 */
 	bio->bi_phys_segments = 0;
-	clear_bit(BIO_SEG_VALID, &bio->bi_flags);
+	bio_clear_flag(bio, BIO_SEG_VALID);
 
 	if (rw == READ) {
 		/*
@@ -2711,7 +2711,7 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr, int *skipp
 						/* remove last page from this bio */
 						bio->bi_vcnt--;
 						bio->bi_iter.bi_size -= len;
-						__clear_bit(BIO_SEG_VALID, &bio->bi_flags);
+						bio_clear_flag(bio, BIO_SEG_VALID);
 					}
 					goto bio_full;
 				}
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index 929e9a26d81b..316ff6f611e9 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -1216,7 +1216,7 @@ static void __make_request(struct mddev *mddev, struct bio *bio)
 	 * non-zero, then it is the number of not-completed requests.
 	 */
 	bio->bi_phys_segments = 0;
-	clear_bit(BIO_SEG_VALID, &bio->bi_flags);
+	bio_clear_flag(bio, BIO_SEG_VALID);
 
 	if (rw == READ) {
 		/*
@@ -3353,7 +3353,7 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr,
 				/* remove last page from this bio */
 				bio2->bi_vcnt--;
 				bio2->bi_iter.bi_size -= len;
-				__clear_bit(BIO_SEG_VALID, &bio2->bi_flags);
+				bio_clear_flag(bio2, BIO_SEG_VALID);
 			}
 			goto bio_full;
 		}
@@ -4433,7 +4433,7 @@ read_more:
 				/* Remove last page from this bio */
 				bio2->bi_vcnt--;
 				bio2->bi_iter.bi_size -= len;
-				__clear_bit(BIO_SEG_VALID, &bio2->bi_flags);
+				bio_clear_flag(bio2, BIO_SEG_VALID);
 			}
 			goto bio_full;
 		}
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 84d6eec1033e..e3d48775c9df 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -4850,7 +4850,7 @@ static int chunk_aligned_read(struct mddev *mddev, struct bio * raid_bio)
 		rcu_read_unlock();
 		raid_bio->bi_next = (void*)rdev;
 		align_bi->bi_bdev =  rdev->bdev;
-		__clear_bit(BIO_SEG_VALID, &align_bi->bi_flags);
+		bio_clear_flag(align_bi, BIO_SEG_VALID);
 
 		if (!bio_fits_rdev(align_bi) ||
 		    is_badblock(rdev, align_bi->bi_iter.bi_sector,
diff --git a/fs/buffer.c b/fs/buffer.c
index 7a49bb84ecb5..7887bb466368 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -2961,7 +2961,7 @@ static void end_bio_bh_io_sync(struct bio *bio)
 {
 	struct buffer_head *bh = bio->bi_private;
 
-	if (unlikely (test_bit(BIO_QUIET,&bio->bi_flags)))
+	if (unlikely(bio_flagged(bio, BIO_QUIET)))
 		set_bit(BH_Quiet, &bh->b_state);
 
 	bh->b_end_io(bh, !bio->bi_error);
diff --git a/include/linux/bio.h b/include/linux/bio.h
index 6b918177002d..986e6e19feb5 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -304,6 +304,21 @@ static inline void bio_cnt_set(struct bio *bio, unsigned int count)
 	atomic_set(&bio->__bi_cnt, count);
 }
 
+static inline bool bio_flagged(struct bio *bio, unsigned int bit)
+{
+	return (bio->bi_flags & (1UL << bit)) != 0;
+}
+
+static inline void bio_set_flag(struct bio *bio, unsigned int bit)
+{
+	bio->bi_flags |= (1UL << bit);
+}
+
+static inline void bio_clear_flag(struct bio *bio, unsigned int bit)
+{
+	bio->bi_flags &= ~(1UL << bit);
+}
+
 enum bip_flags {
 	BIP_BLOCK_INTEGRITY	= 1 << 0, /* block layer owns integrity data */
 	BIP_MAPPED_INTEGRITY	= 1 << 1, /* ref tag has been remapped */
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index 6164fb8a817b..a765a50e780f 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -129,8 +129,6 @@ struct bio {
 #define BIO_RESET_BITS	13
 #define BIO_OWNS_VEC	13	/* bio_free() should free bvec */
 
-#define bio_flagged(bio, flag)	((bio)->bi_flags & (1 << (flag)))
-
 /*
  * top 4 bits of bio flags indicate the pool this bio came from
  */
-- 
cgit v1.2.3-58-ga151


From c66a14d07c136cc35d4dcb84bfc7bc188be24d4c Mon Sep 17 00:00:00 2001
From: Kent Overstreet <kent.overstreet@gmail.com>
Date: Sat, 23 Nov 2013 22:30:22 -0800
Subject: block: simplify bio_add_page()

Since generic_make_request() can now handle arbitrary size bios, all we
have to do is make sure the bvec array doesn't overflow.
__bio_add_page() doesn't need to call ->merge_bvec_fn(), where
we can get rid of unnecessary code paths.

Removing the call to ->merge_bvec_fn() is also fine, as no driver that
implements support for BLOCK_PC commands even has a ->merge_bvec_fn()
method.

Cc: Christoph Hellwig <hch@infradead.org>
Cc: Jens Axboe <axboe@kernel.dk>
Signed-off-by: Kent Overstreet <kent.overstreet@gmail.com>
[dpark: rebase and resolve merge conflicts, change a couple of comments,
 make bio_add_page() warn once upon a cloned bio.]
Signed-off-by: Dongsu Park <dpark@posteo.net>
Signed-off-by: Ming Lin <ming.l@ssi.samsung.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/bio.c | 135 +++++++++++++++++++++++++-----------------------------------
 1 file changed, 55 insertions(+), 80 deletions(-)

(limited to 'block/bio.c')

diff --git a/block/bio.c b/block/bio.c
index 911ae8f82752..ba9c4b0c0ff2 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -716,9 +716,23 @@ int bio_get_nr_vecs(struct block_device *bdev)
 }
 EXPORT_SYMBOL(bio_get_nr_vecs);
 
-static int __bio_add_page(struct request_queue *q, struct bio *bio, struct page
-			  *page, unsigned int len, unsigned int offset,
-			  unsigned int max_sectors)
+/**
+ *	bio_add_pc_page	-	attempt to add page to bio
+ *	@q: the target queue
+ *	@bio: destination bio
+ *	@page: page to add
+ *	@len: vec entry length
+ *	@offset: vec entry offset
+ *
+ *	Attempt to add a page to the bio_vec maplist. This can fail for a
+ *	number of reasons, such as the bio being full or target block device
+ *	limitations. The target block device must allow bio's up to PAGE_SIZE,
+ *	so it is always possible to add a single page to an empty bio.
+ *
+ *	This should only be used by REQ_PC bios.
+ */
+int bio_add_pc_page(struct request_queue *q, struct bio *bio, struct page
+		    *page, unsigned int len, unsigned int offset)
 {
 	int retried_segments = 0;
 	struct bio_vec *bvec;
@@ -729,7 +743,7 @@ static int __bio_add_page(struct request_queue *q, struct bio *bio, struct page
 	if (unlikely(bio_flagged(bio, BIO_CLONED)))
 		return 0;
 
-	if (((bio->bi_iter.bi_size + len) >> 9) > max_sectors)
+	if (((bio->bi_iter.bi_size + len) >> 9) > queue_max_hw_sectors(q))
 		return 0;
 
 	/*
@@ -742,28 +756,7 @@ static int __bio_add_page(struct request_queue *q, struct bio *bio, struct page
 
 		if (page == prev->bv_page &&
 		    offset == prev->bv_offset + prev->bv_len) {
-			unsigned int prev_bv_len = prev->bv_len;
 			prev->bv_len += len;
-
-			if (q->merge_bvec_fn) {
-				struct bvec_merge_data bvm = {
-					/* prev_bvec is already charged in
-					   bi_size, discharge it in order to
-					   simulate merging updated prev_bvec
-					   as new bvec. */
-					.bi_bdev = bio->bi_bdev,
-					.bi_sector = bio->bi_iter.bi_sector,
-					.bi_size = bio->bi_iter.bi_size -
-						prev_bv_len,
-					.bi_rw = bio->bi_rw,
-				};
-
-				if (q->merge_bvec_fn(q, &bvm, prev) < prev->bv_len) {
-					prev->bv_len -= len;
-					return 0;
-				}
-			}
-
 			bio->bi_iter.bi_size += len;
 			goto done;
 		}
@@ -806,27 +799,6 @@ static int __bio_add_page(struct request_queue *q, struct bio *bio, struct page
 		blk_recount_segments(q, bio);
 	}
 
-	/*
-	 * if queue has other restrictions (eg varying max sector size
-	 * depending on offset), it can specify a merge_bvec_fn in the
-	 * queue to get further control
-	 */
-	if (q->merge_bvec_fn) {
-		struct bvec_merge_data bvm = {
-			.bi_bdev = bio->bi_bdev,
-			.bi_sector = bio->bi_iter.bi_sector,
-			.bi_size = bio->bi_iter.bi_size - len,
-			.bi_rw = bio->bi_rw,
-		};
-
-		/*
-		 * merge_bvec_fn() returns number of bytes it can accept
-		 * at this offset
-		 */
-		if (q->merge_bvec_fn(q, &bvm, bvec) < bvec->bv_len)
-			goto failed;
-	}
-
 	/* If we may be able to merge these biovecs, force a recount */
 	if (bio->bi_vcnt > 1 && (BIOVEC_PHYS_MERGEABLE(bvec-1, bvec)))
 		bio_clear_flag(bio, BIO_SEG_VALID);
@@ -843,28 +815,6 @@ static int __bio_add_page(struct request_queue *q, struct bio *bio, struct page
 	blk_recount_segments(q, bio);
 	return 0;
 }
-
-/**
- *	bio_add_pc_page	-	attempt to add page to bio
- *	@q: the target queue
- *	@bio: destination bio
- *	@page: page to add
- *	@len: vec entry length
- *	@offset: vec entry offset
- *
- *	Attempt to add a page to the bio_vec maplist. This can fail for a
- *	number of reasons, such as the bio being full or target block device
- *	limitations. The target block device must allow bio's up to PAGE_SIZE,
- *	so it is always possible to add a single page to an empty bio.
- *
- *	This should only be used by REQ_PC bios.
- */
-int bio_add_pc_page(struct request_queue *q, struct bio *bio, struct page *page,
-		    unsigned int len, unsigned int offset)
-{
-	return __bio_add_page(q, bio, page, len, offset,
-			      queue_max_hw_sectors(q));
-}
 EXPORT_SYMBOL(bio_add_pc_page);
 
 /**
@@ -874,22 +824,47 @@ EXPORT_SYMBOL(bio_add_pc_page);
  *	@len: vec entry length
  *	@offset: vec entry offset
  *
- *	Attempt to add a page to the bio_vec maplist. This can fail for a
- *	number of reasons, such as the bio being full or target block device
- *	limitations. The target block device must allow bio's up to PAGE_SIZE,
- *	so it is always possible to add a single page to an empty bio.
+ *	Attempt to add a page to the bio_vec maplist. This will only fail
+ *	if either bio->bi_vcnt == bio->bi_max_vecs or it's a cloned bio.
  */
-int bio_add_page(struct bio *bio, struct page *page, unsigned int len,
-		 unsigned int offset)
+int bio_add_page(struct bio *bio, struct page *page,
+		 unsigned int len, unsigned int offset)
 {
-	struct request_queue *q = bdev_get_queue(bio->bi_bdev);
-	unsigned int max_sectors;
+	struct bio_vec *bv;
 
-	max_sectors = blk_max_size_offset(q, bio->bi_iter.bi_sector);
-	if ((max_sectors < (len >> 9)) && !bio->bi_iter.bi_size)
-		max_sectors = len >> 9;
+	/*
+	 * cloned bio must not modify vec list
+	 */
+	if (WARN_ON_ONCE(bio_flagged(bio, BIO_CLONED)))
+		return 0;
 
-	return __bio_add_page(q, bio, page, len, offset, max_sectors);
+	/*
+	 * For filesystems with a blocksize smaller than the pagesize
+	 * we will often be called with the same page as last time and
+	 * a consecutive offset.  Optimize this special case.
+	 */
+	if (bio->bi_vcnt > 0) {
+		bv = &bio->bi_io_vec[bio->bi_vcnt - 1];
+
+		if (page == bv->bv_page &&
+		    offset == bv->bv_offset + bv->bv_len) {
+			bv->bv_len += len;
+			goto done;
+		}
+	}
+
+	if (bio->bi_vcnt >= bio->bi_max_vecs)
+		return 0;
+
+	bv		= &bio->bi_io_vec[bio->bi_vcnt];
+	bv->bv_page	= page;
+	bv->bv_len	= len;
+	bv->bv_offset	= offset;
+
+	bio->bi_vcnt++;
+done:
+	bio->bi_iter.bi_size += len;
+	return len;
 }
 EXPORT_SYMBOL(bio_add_page);
 
-- 
cgit v1.2.3-58-ga151


From b54ffb73cadcdcff9cc1ae0e11f502407e3e2e4c Mon Sep 17 00:00:00 2001
From: Kent Overstreet <kent.overstreet@gmail.com>
Date: Tue, 19 May 2015 14:31:01 +0200
Subject: block: remove bio_get_nr_vecs()

We can always fill up the bio now, no need to estimate the possible
size based on queue parameters.

Acked-by: Steven Whitehouse <swhiteho@redhat.com>
Signed-off-by: Kent Overstreet <kent.overstreet@gmail.com>
[hch: rebased and wrote a changelog]
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Ming Lin <ming.l@ssi.samsung.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/bio.c            | 23 -----------------------
 drivers/md/dm-io.c     |  2 +-
 fs/btrfs/compression.c |  5 +----
 fs/btrfs/extent_io.c   |  9 ++-------
 fs/btrfs/inode.c       |  3 +--
 fs/btrfs/scrub.c       | 18 ++----------------
 fs/direct-io.c         |  2 +-
 fs/ext4/page-io.c      |  3 +--
 fs/ext4/readpage.c     |  2 +-
 fs/f2fs/data.c         |  2 +-
 fs/gfs2/lops.c         |  9 +--------
 fs/logfs/dev_bdev.c    |  4 ++--
 fs/mpage.c             |  4 ++--
 fs/nilfs2/segbuf.c     |  2 +-
 fs/xfs/xfs_aops.c      |  3 +--
 include/linux/bio.h    |  1 -
 16 files changed, 18 insertions(+), 74 deletions(-)

(limited to 'block/bio.c')

diff --git a/block/bio.c b/block/bio.c
index ba9c4b0c0ff2..425d6d4a2f7a 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -693,29 +693,6 @@ integrity_clone:
 }
 EXPORT_SYMBOL(bio_clone_bioset);
 
-/**
- *	bio_get_nr_vecs		- return approx number of vecs
- *	@bdev:  I/O target
- *
- *	Return the approximate number of pages we can send to this target.
- *	There's no guarantee that you will be able to fit this number of pages
- *	into a bio, it does not account for dynamic restrictions that vary
- *	on offset.
- */
-int bio_get_nr_vecs(struct block_device *bdev)
-{
-	struct request_queue *q = bdev_get_queue(bdev);
-	int nr_pages;
-
-	nr_pages = min_t(unsigned,
-		     queue_max_segments(q),
-		     queue_max_sectors(q) / (PAGE_SIZE >> 9) + 1);
-
-	return min_t(unsigned, nr_pages, BIO_MAX_PAGES);
-
-}
-EXPORT_SYMBOL(bio_get_nr_vecs);
-
 /**
  *	bio_add_pc_page	-	attempt to add page to bio
  *	@q: the target queue
diff --git a/drivers/md/dm-io.c b/drivers/md/dm-io.c
index c84714f70378..6f8e83b2a6f8 100644
--- a/drivers/md/dm-io.c
+++ b/drivers/md/dm-io.c
@@ -316,7 +316,7 @@ static void do_region(int rw, unsigned region, struct dm_io_region *where,
 		if ((rw & REQ_DISCARD) || (rw & REQ_WRITE_SAME))
 			num_bvecs = 1;
 		else
-			num_bvecs = min_t(int, bio_get_nr_vecs(where->bdev),
+			num_bvecs = min_t(int, BIO_MAX_PAGES,
 					  dm_sector_div_up(remaining, (PAGE_SIZE >> SECTOR_SHIFT)));
 
 		bio = bio_alloc_bioset(GFP_NOIO, num_bvecs, io->client->bios);
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index 302266ec2cdb..57ee8ca29b06 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -97,10 +97,7 @@ static inline int compressed_bio_size(struct btrfs_root *root,
 static struct bio *compressed_bio_alloc(struct block_device *bdev,
 					u64 first_byte, gfp_t gfp_flags)
 {
-	int nr_vecs;
-
-	nr_vecs = bio_get_nr_vecs(bdev);
-	return btrfs_bio_alloc(bdev, first_byte >> 9, nr_vecs, gfp_flags);
+	return btrfs_bio_alloc(bdev, first_byte >> 9, BIO_MAX_PAGES, gfp_flags);
 }
 
 static int check_compressed_csum(struct inode *inode,
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index c22f175ed024..68b12bbc709f 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -2795,9 +2795,7 @@ static int submit_extent_page(int rw, struct extent_io_tree *tree,
 {
 	int ret = 0;
 	struct bio *bio;
-	int nr;
 	int contig = 0;
-	int this_compressed = bio_flags & EXTENT_BIO_COMPRESSED;
 	int old_compressed = prev_bio_flags & EXTENT_BIO_COMPRESSED;
 	size_t page_size = min_t(size_t, size, PAGE_CACHE_SIZE);
 
@@ -2822,12 +2820,9 @@ static int submit_extent_page(int rw, struct extent_io_tree *tree,
 			return 0;
 		}
 	}
-	if (this_compressed)
-		nr = BIO_MAX_PAGES;
-	else
-		nr = bio_get_nr_vecs(bdev);
 
-	bio = btrfs_bio_alloc(bdev, sector, nr, GFP_NOFS | __GFP_HIGH);
+	bio = btrfs_bio_alloc(bdev, sector, BIO_MAX_PAGES,
+			GFP_NOFS | __GFP_HIGH);
 	if (!bio)
 		return -ENOMEM;
 
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 6b8becfe2057..8635ef01a04a 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -7959,8 +7959,7 @@ out:
 static struct bio *btrfs_dio_bio_alloc(struct block_device *bdev,
 				       u64 first_sector, gfp_t gfp_flags)
 {
-	int nr_vecs = bio_get_nr_vecs(bdev);
-	return btrfs_bio_alloc(bdev, first_sector, nr_vecs, gfp_flags);
+	return btrfs_bio_alloc(bdev, first_sector, BIO_MAX_PAGES, gfp_flags);
 }
 
 static inline int btrfs_lookup_and_bind_dio_csum(struct btrfs_root *root,
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index ebb8260186fe..9c146d8307b5 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -454,27 +454,14 @@ struct scrub_ctx *scrub_setup_ctx(struct btrfs_device *dev, int is_dev_replace)
 	struct scrub_ctx *sctx;
 	int		i;
 	struct btrfs_fs_info *fs_info = dev->dev_root->fs_info;
-	int pages_per_rd_bio;
 	int ret;
 
-	/*
-	 * the setting of pages_per_rd_bio is correct for scrub but might
-	 * be wrong for the dev_replace code where we might read from
-	 * different devices in the initial huge bios. However, that
-	 * code is able to correctly handle the case when adding a page
-	 * to a bio fails.
-	 */
-	if (dev->bdev)
-		pages_per_rd_bio = min_t(int, SCRUB_PAGES_PER_RD_BIO,
-					 bio_get_nr_vecs(dev->bdev));
-	else
-		pages_per_rd_bio = SCRUB_PAGES_PER_RD_BIO;
 	sctx = kzalloc(sizeof(*sctx), GFP_NOFS);
 	if (!sctx)
 		goto nomem;
 	atomic_set(&sctx->refs, 1);
 	sctx->is_dev_replace = is_dev_replace;
-	sctx->pages_per_rd_bio = pages_per_rd_bio;
+	sctx->pages_per_rd_bio = SCRUB_PAGES_PER_RD_BIO;
 	sctx->curr = -1;
 	sctx->dev_root = dev->dev_root;
 	for (i = 0; i < SCRUB_BIOS_PER_SCTX; ++i) {
@@ -3896,8 +3883,7 @@ static int scrub_setup_wr_ctx(struct scrub_ctx *sctx,
 		return 0;
 
 	WARN_ON(!dev->bdev);
-	wr_ctx->pages_per_wr_bio = min_t(int, SCRUB_PAGES_PER_WR_BIO,
-					 bio_get_nr_vecs(dev->bdev));
+	wr_ctx->pages_per_wr_bio = SCRUB_PAGES_PER_WR_BIO;
 	wr_ctx->tgtdev = dev;
 	atomic_set(&wr_ctx->flush_all_writes, 0);
 	return 0;
diff --git a/fs/direct-io.c b/fs/direct-io.c
index 818c647f36d3..11256291642e 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -655,7 +655,7 @@ static inline int dio_new_bio(struct dio *dio, struct dio_submit *sdio,
 	if (ret)
 		goto out;
 	sector = start_sector << (sdio->blkbits - 9);
-	nr_pages = min(sdio->pages_in_io, bio_get_nr_vecs(map_bh->b_bdev));
+	nr_pages = min(sdio->pages_in_io, BIO_MAX_PAGES);
 	BUG_ON(nr_pages <= 0);
 	dio_bio_alloc(dio, sdio, map_bh->b_bdev, sector, nr_pages);
 	sdio->boundary = 0;
diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c
index aa95566f14be..8a9d63a0c071 100644
--- a/fs/ext4/page-io.c
+++ b/fs/ext4/page-io.c
@@ -372,10 +372,9 @@ void ext4_io_submit_init(struct ext4_io_submit *io,
 static int io_submit_init_bio(struct ext4_io_submit *io,
 			      struct buffer_head *bh)
 {
-	int nvecs = bio_get_nr_vecs(bh->b_bdev);
 	struct bio *bio;
 
-	bio = bio_alloc(GFP_NOIO, min(nvecs, BIO_MAX_PAGES));
+	bio = bio_alloc(GFP_NOIO, BIO_MAX_PAGES);
 	if (!bio)
 		return -ENOMEM;
 	bio->bi_iter.bi_sector = bh->b_blocknr * (bh->b_size >> 9);
diff --git a/fs/ext4/readpage.c b/fs/ext4/readpage.c
index 5de5b871c178..e26803fb210d 100644
--- a/fs/ext4/readpage.c
+++ b/fs/ext4/readpage.c
@@ -284,7 +284,7 @@ int ext4_mpage_readpages(struct address_space *mapping,
 					goto set_error_page;
 			}
 			bio = bio_alloc(GFP_KERNEL,
-				min_t(int, nr_pages, bio_get_nr_vecs(bdev)));
+				min_t(int, nr_pages, BIO_MAX_PAGES));
 			if (!bio) {
 				if (ctx)
 					ext4_release_crypto_ctx(ctx);
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 8f0baa7ffb50..b478accb24d9 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -1552,7 +1552,7 @@ submit_and_realloc:
 			}
 
 			bio = bio_alloc(GFP_KERNEL,
-				min_t(int, nr_pages, bio_get_nr_vecs(bdev)));
+				min_t(int, nr_pages, BIO_MAX_PAGES));
 			if (!bio) {
 				if (ctx)
 					f2fs_release_crypto_ctx(ctx);
diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c
index c0a1b967deba..92324ac58290 100644
--- a/fs/gfs2/lops.c
+++ b/fs/gfs2/lops.c
@@ -261,18 +261,11 @@ void gfs2_log_flush_bio(struct gfs2_sbd *sdp, int rw)
 static struct bio *gfs2_log_alloc_bio(struct gfs2_sbd *sdp, u64 blkno)
 {
 	struct super_block *sb = sdp->sd_vfs;
-	unsigned nrvecs = bio_get_nr_vecs(sb->s_bdev);
 	struct bio *bio;
 
 	BUG_ON(sdp->sd_log_bio);
 
-	while (1) {
-		bio = bio_alloc(GFP_NOIO, nrvecs);
-		if (likely(bio))
-			break;
-		nrvecs = max(nrvecs/2, 1U);
-	}
-
+	bio = bio_alloc(GFP_NOIO, BIO_MAX_PAGES);
 	bio->bi_iter.bi_sector = blkno * (sb->s_blocksize >> 9);
 	bio->bi_bdev = sb->s_bdev;
 	bio->bi_end_io = gfs2_end_log_write;
diff --git a/fs/logfs/dev_bdev.c b/fs/logfs/dev_bdev.c
index cea0cc9878b7..a7fdbd868474 100644
--- a/fs/logfs/dev_bdev.c
+++ b/fs/logfs/dev_bdev.c
@@ -81,7 +81,7 @@ static int __bdev_writeseg(struct super_block *sb, u64 ofs, pgoff_t index,
 	unsigned int max_pages;
 	int i;
 
-	max_pages = min(nr_pages, (size_t) bio_get_nr_vecs(super->s_bdev));
+	max_pages = min(nr_pages, BIO_MAX_PAGES);
 
 	bio = bio_alloc(GFP_NOFS, max_pages);
 	BUG_ON(!bio);
@@ -171,7 +171,7 @@ static int do_erase(struct super_block *sb, u64 ofs, pgoff_t index,
 	unsigned int max_pages;
 	int i;
 
-	max_pages = min(nr_pages, (size_t) bio_get_nr_vecs(super->s_bdev));
+	max_pages = min(nr_pages, BIO_MAX_PAGES);
 
 	bio = bio_alloc(GFP_NOFS, max_pages);
 	BUG_ON(!bio);
diff --git a/fs/mpage.c b/fs/mpage.c
index abac9361b3f1..778a4ddef77a 100644
--- a/fs/mpage.c
+++ b/fs/mpage.c
@@ -277,7 +277,7 @@ alloc_new:
 				goto out;
 		}
 		bio = mpage_alloc(bdev, blocks[0] << (blkbits - 9),
-			  	min_t(int, nr_pages, bio_get_nr_vecs(bdev)),
+				min_t(int, nr_pages, BIO_MAX_PAGES),
 				GFP_KERNEL);
 		if (bio == NULL)
 			goto confused;
@@ -602,7 +602,7 @@ alloc_new:
 			}
 		}
 		bio = mpage_alloc(bdev, blocks[0] << (blkbits - 9),
-				bio_get_nr_vecs(bdev), GFP_NOFS|__GFP_HIGH);
+				BIO_MAX_PAGES, GFP_NOFS|__GFP_HIGH);
 		if (bio == NULL)
 			goto confused;
 
diff --git a/fs/nilfs2/segbuf.c b/fs/nilfs2/segbuf.c
index 550b10efb14e..f63620ce3892 100644
--- a/fs/nilfs2/segbuf.c
+++ b/fs/nilfs2/segbuf.c
@@ -414,7 +414,7 @@ static void nilfs_segbuf_prepare_write(struct nilfs_segment_buffer *segbuf,
 {
 	wi->bio = NULL;
 	wi->rest_blocks = segbuf->sb_sum.nblocks;
-	wi->max_pages = bio_get_nr_vecs(wi->nilfs->ns_bdev);
+	wi->max_pages = BIO_MAX_PAGES;
 	wi->nr_vecs = min(wi->max_pages, wi->rest_blocks);
 	wi->start = wi->end = 0;
 	wi->blocknr = segbuf->sb_pseg_start;
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 3714844a81d8..c77499bcbd7a 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -381,8 +381,7 @@ STATIC struct bio *
 xfs_alloc_ioend_bio(
 	struct buffer_head	*bh)
 {
-	int			nvecs = bio_get_nr_vecs(bh->b_bdev);
-	struct bio		*bio = bio_alloc(GFP_NOIO, nvecs);
+	struct bio		*bio = bio_alloc(GFP_NOIO, BIO_MAX_PAGES);
 
 	ASSERT(bio->bi_private == NULL);
 	bio->bi_iter.bi_sector = bh->b_blocknr * (bh->b_size >> 9);
diff --git a/include/linux/bio.h b/include/linux/bio.h
index b7892a1906bd..ad7217458812 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -460,7 +460,6 @@ void bio_chain(struct bio *, struct bio *);
 extern int bio_add_page(struct bio *, struct page *, unsigned int,unsigned int);
 extern int bio_add_pc_page(struct request_queue *, struct bio *, struct page *,
 			   unsigned int, unsigned int);
-extern int bio_get_nr_vecs(struct block_device *);
 struct rq_map_data;
 extern struct bio *bio_map_user_iov(struct request_queue *,
 				    const struct iov_iter *, gfp_t);
-- 
cgit v1.2.3-58-ga151


From 03100aada96f0645bbcb89aea24c01f02d0ef1fa Mon Sep 17 00:00:00 2001
From: Keith Busch <keith.busch@intel.com>
Date: Wed, 19 Aug 2015 14:24:05 -0700
Subject: block: Replace SG_GAPS with new queue limits mask

The SG_GAPS queue flag caused checks for bio vector alignment against
PAGE_SIZE, but the device may have different constraints. This patch
adds a queue limits so a driver with such constraints can set to allow
requests that would have been unnecessarily split. The new gaps check
takes the request_queue as a parameter to simplify the logic around
invoking this function.

This new limit makes the queue flag redundant, so removing it and
all usage. Device-mappers will inherit the correct settings through
blk_stack_limits().

Signed-off-by: Keith Busch <keith.busch@intel.com>
Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/bio.c               |  3 +--
 block/blk-merge.c         | 23 +++++++----------------
 block/blk-settings.c      | 14 ++++++++++++++
 drivers/block/nvme-core.c |  2 +-
 drivers/md/dm-table.c     | 13 -------------
 include/linux/bio.h       |  9 ---------
 include/linux/blkdev.h    | 21 ++++++++++++++++++++-
 7 files changed, 43 insertions(+), 42 deletions(-)

(limited to 'block/bio.c')

diff --git a/block/bio.c b/block/bio.c
index 425d6d4a2f7a..b1f198f9a317 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -742,8 +742,7 @@ int bio_add_pc_page(struct request_queue *q, struct bio *bio, struct page
 		 * If the queue doesn't support SG gaps and adding this
 		 * offset would create a gap, disallow it.
 		 */
-		if (q->queue_flags & (1 << QUEUE_FLAG_SG_GAPS) &&
-		    bvec_gap_to_prev(prev, offset))
+		if (bvec_gap_to_prev(q, prev, offset))
 			return 0;
 	}
 
diff --git a/block/blk-merge.c b/block/blk-merge.c
index 0027def35f5a..0e0d9fd01c40 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -82,8 +82,7 @@ static struct bio *blk_bio_segment_split(struct request_queue *q,
 		 * If the queue doesn't support SG gaps and adding this
 		 * offset would create a gap, disallow it.
 		 */
-		if (q->queue_flags & (1 << QUEUE_FLAG_SG_GAPS) &&
-		    prev && bvec_gap_to_prev(&bvprv, bv.bv_offset))
+		if (prev && bvec_gap_to_prev(q, &bvprv, bv.bv_offset))
 			goto split;
 
 		if (prev && blk_queue_cluster(q)) {
@@ -484,12 +483,12 @@ static bool req_no_special_merge(struct request *req)
 	return !q->mq_ops && req->special;
 }
 
-static int req_gap_to_prev(struct request *req, struct request *next)
+static int req_gap_to_prev(struct request *req, struct bio *next)
 {
 	struct bio *prev = req->biotail;
 
-	return bvec_gap_to_prev(&prev->bi_io_vec[prev->bi_vcnt - 1],
-				next->bio->bi_io_vec[0].bv_offset);
+	return bvec_gap_to_prev(req->q, &prev->bi_io_vec[prev->bi_vcnt - 1],
+			next->bi_io_vec[1].bv_offset);
 }
 
 static int ll_merge_requests_fn(struct request_queue *q, struct request *req,
@@ -506,8 +505,7 @@ static int ll_merge_requests_fn(struct request_queue *q, struct request *req,
 	if (req_no_special_merge(req) || req_no_special_merge(next))
 		return 0;
 
-	if (test_bit(QUEUE_FLAG_SG_GAPS, &q->queue_flags) &&
-	    req_gap_to_prev(req, next))
+	if (req_gap_to_prev(req, next->bio))
 		return 0;
 
 	/*
@@ -692,8 +690,6 @@ int blk_attempt_req_merge(struct request_queue *q, struct request *rq,
 
 bool blk_rq_merge_ok(struct request *rq, struct bio *bio)
 {
-	struct request_queue *q = rq->q;
-
 	if (!rq_mergeable(rq) || !bio_mergeable(bio))
 		return false;
 
@@ -718,13 +714,8 @@ bool blk_rq_merge_ok(struct request *rq, struct bio *bio)
 		return false;
 
 	/* Only check gaps if the bio carries data */
-	if (q->queue_flags & (1 << QUEUE_FLAG_SG_GAPS) && bio_has_data(bio)) {
-		struct bio_vec *bprev;
-
-		bprev = &rq->biotail->bi_io_vec[rq->biotail->bi_vcnt - 1];
-		if (bvec_gap_to_prev(bprev, bio->bi_io_vec[0].bv_offset))
-			return false;
-	}
+	if (bio_has_data(bio) && req_gap_to_prev(rq, bio))
+		return false;
 
 	return true;
 }
diff --git a/block/blk-settings.c b/block/blk-settings.c
index d27b4e272356..f96c72116931 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -89,6 +89,7 @@ void blk_set_default_limits(struct queue_limits *lim)
 	lim->max_segments = BLK_MAX_SEGMENTS;
 	lim->max_integrity_segments = 0;
 	lim->seg_boundary_mask = BLK_SEG_BOUNDARY_MASK;
+	lim->virt_boundary_mask = 0;
 	lim->max_segment_size = BLK_MAX_SEGMENT_SIZE;
 	lim->max_sectors = lim->max_hw_sectors = BLK_SAFE_MAX_SECTORS;
 	lim->chunk_sectors = 0;
@@ -532,6 +533,8 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
 
 	t->seg_boundary_mask = min_not_zero(t->seg_boundary_mask,
 					    b->seg_boundary_mask);
+	t->virt_boundary_mask = min_not_zero(t->virt_boundary_mask,
+					    b->virt_boundary_mask);
 
 	t->max_segments = min_not_zero(t->max_segments, b->max_segments);
 	t->max_integrity_segments = min_not_zero(t->max_integrity_segments,
@@ -771,6 +774,17 @@ void blk_queue_segment_boundary(struct request_queue *q, unsigned long mask)
 }
 EXPORT_SYMBOL(blk_queue_segment_boundary);
 
+/**
+ * blk_queue_virt_boundary - set boundary rules for bio merging
+ * @q:  the request queue for the device
+ * @mask:  the memory boundary mask
+ **/
+void blk_queue_virt_boundary(struct request_queue *q, unsigned long mask)
+{
+	q->limits.virt_boundary_mask = mask;
+}
+EXPORT_SYMBOL(blk_queue_virt_boundary);
+
 /**
  * blk_queue_dma_alignment - set dma length and memory alignment
  * @q:     the request queue for the device
diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c
index d844ec4a2b85..2f694d78da55 100644
--- a/drivers/block/nvme-core.c
+++ b/drivers/block/nvme-core.c
@@ -2067,7 +2067,6 @@ static void nvme_alloc_ns(struct nvme_dev *dev, unsigned nsid)
 		goto out_free_ns;
 	queue_flag_set_unlocked(QUEUE_FLAG_NOMERGES, ns->queue);
 	queue_flag_set_unlocked(QUEUE_FLAG_NONROT, ns->queue);
-	queue_flag_set_unlocked(QUEUE_FLAG_SG_GAPS, ns->queue);
 	ns->dev = dev;
 	ns->queue->queuedata = ns;
 
@@ -2087,6 +2086,7 @@ static void nvme_alloc_ns(struct nvme_dev *dev, unsigned nsid)
 		blk_queue_chunk_sectors(ns->queue, dev->stripe_size >> 9);
 	if (dev->vwc & NVME_CTRL_VWC_PRESENT)
 		blk_queue_flush(ns->queue, REQ_FLUSH | REQ_FUA);
+	blk_queue_virt_boundary(ns->queue, dev->page_size - 1);
 
 	disk->major = nvme_major;
 	disk->first_minor = 0;
diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c
index afb4ad3dfeb3..e76ed003769e 100644
--- a/drivers/md/dm-table.c
+++ b/drivers/md/dm-table.c
@@ -1380,14 +1380,6 @@ static int queue_supports_sg_merge(struct dm_target *ti, struct dm_dev *dev,
 	return q && !test_bit(QUEUE_FLAG_NO_SG_MERGE, &q->queue_flags);
 }
 
-static int queue_supports_sg_gaps(struct dm_target *ti, struct dm_dev *dev,
-				  sector_t start, sector_t len, void *data)
-{
-	struct request_queue *q = bdev_get_queue(dev->bdev);
-
-	return q && !test_bit(QUEUE_FLAG_SG_GAPS, &q->queue_flags);
-}
-
 static bool dm_table_all_devices_attribute(struct dm_table *t,
 					   iterate_devices_callout_fn func)
 {
@@ -1508,11 +1500,6 @@ void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q,
 	else
 		queue_flag_set_unlocked(QUEUE_FLAG_NO_SG_MERGE, q);
 
-	if (dm_table_all_devices_attribute(t, queue_supports_sg_gaps))
-		queue_flag_clear_unlocked(QUEUE_FLAG_SG_GAPS, q);
-	else
-		queue_flag_set_unlocked(QUEUE_FLAG_SG_GAPS, q);
-
 	dm_table_set_integrity(t);
 
 	/*
diff --git a/include/linux/bio.h b/include/linux/bio.h
index ad7217458812..b9b6e046b52e 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -186,15 +186,6 @@ static inline void *bio_data(struct bio *bio)
 #define BIOVEC_SEG_BOUNDARY(q, b1, b2) \
 	__BIO_SEG_BOUNDARY(bvec_to_phys((b1)), bvec_to_phys((b2)) + (b2)->bv_len, queue_segment_boundary((q)))
 
-/*
- * Check if adding a bio_vec after bprv with offset would create a gap in
- * the SG list. Most drivers don't care about this, but some do.
- */
-static inline bool bvec_gap_to_prev(struct bio_vec *bprv, unsigned int offset)
-{
-	return offset || ((bprv->bv_offset + bprv->bv_len) & (PAGE_SIZE - 1));
-}
-
 /*
  * drivers should _never_ use the all version - the bio may have been split
  * before it got to the driver and the driver won't own all of it
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index e427debc7008..a622f270f09e 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -250,6 +250,7 @@ struct blk_queue_tag {
 struct queue_limits {
 	unsigned long		bounce_pfn;
 	unsigned long		seg_boundary_mask;
+	unsigned long		virt_boundary_mask;
 
 	unsigned int		max_hw_sectors;
 	unsigned int		chunk_sectors;
@@ -479,7 +480,6 @@ struct request_queue {
 #define QUEUE_FLAG_DEAD        19	/* queue tear-down finished */
 #define QUEUE_FLAG_INIT_DONE   20	/* queue is initialized */
 #define QUEUE_FLAG_NO_SG_MERGE 21	/* don't attempt to merge SG segments*/
-#define QUEUE_FLAG_SG_GAPS     22	/* queue doesn't support SG gaps */
 
 #define QUEUE_FLAG_DEFAULT	((1 << QUEUE_FLAG_IO_STAT) |		\
 				 (1 << QUEUE_FLAG_STACKABLE)	|	\
@@ -981,6 +981,7 @@ extern int blk_queue_dma_drain(struct request_queue *q,
 			       void *buf, unsigned int size);
 extern void blk_queue_lld_busy(struct request_queue *q, lld_busy_fn *fn);
 extern void blk_queue_segment_boundary(struct request_queue *, unsigned long);
+extern void blk_queue_virt_boundary(struct request_queue *, unsigned long);
 extern void blk_queue_prep_rq(struct request_queue *, prep_rq_fn *pfn);
 extern void blk_queue_unprep_rq(struct request_queue *, unprep_rq_fn *ufn);
 extern void blk_queue_dma_alignment(struct request_queue *, int);
@@ -1149,6 +1150,11 @@ static inline unsigned long queue_segment_boundary(struct request_queue *q)
 	return q->limits.seg_boundary_mask;
 }
 
+static inline unsigned long queue_virt_boundary(struct request_queue *q)
+{
+	return q->limits.virt_boundary_mask;
+}
+
 static inline unsigned int queue_max_sectors(struct request_queue *q)
 {
 	return q->limits.max_sectors;
@@ -1349,6 +1355,19 @@ static inline void put_dev_sector(Sector p)
 	page_cache_release(p.v);
 }
 
+/*
+ * Check if adding a bio_vec after bprv with offset would create a gap in
+ * the SG list. Most drivers don't care about this, but some do.
+ */
+static inline bool bvec_gap_to_prev(struct request_queue *q,
+				struct bio_vec *bprv, unsigned int offset)
+{
+	if (!queue_virt_boundary(q))
+		return false;
+	return offset ||
+		((bprv->bv_offset + bprv->bv_len) & queue_virt_boundary(q));
+}
+
 struct work_struct;
 int kblockd_schedule_work(struct work_struct *work);
 int kblockd_schedule_delayed_work(struct delayed_work *dwork, unsigned long delay);
-- 
cgit v1.2.3-58-ga151