summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2019-06-20 09:58:35 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2019-06-20 09:58:35 -0700
commit41a247d896d20b2b7c73ec40523d7caf058c0698 (patch)
treee1acfa6937c408932c8d267753b8bdb448c5dc98
parent241e39004581475b2802cd63c111fec43bb0123e (diff)
parent440078db7a5539b36bd780a826cb6e2cf2cce0d0 (diff)
Merge tag 'for-linus-20190620' of git://git.kernel.dk/linux-block
Pull block fixes from Jens Axboe: "Three fixes that should go into this series. One is a set of two patches from Christoph, fixing a page leak on same page merges. Boiled down version of a bigger fix, but this one is more appropriate for this late in the cycle (and easier to backport to stable). The last patch is for a divide error in MD, from Mariusz (via Song)" * tag 'for-linus-20190620' of git://git.kernel.dk/linux-block: md: fix for divide error in status_resync block: fix page leak when merging to same page block: return from __bio_try_merge_page if merging occured in the same page
-rw-r--r--block/bio.c38
-rw-r--r--drivers/md/md.c36
-rw-r--r--fs/iomap.c12
-rw-r--r--fs/xfs/xfs_aops.c11
-rw-r--r--include/linux/bio.h2
5 files changed, 60 insertions, 39 deletions
diff --git a/block/bio.c b/block/bio.c
index 683cbb40f051..ce797d73bb43 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -636,7 +636,7 @@ EXPORT_SYMBOL(bio_clone_fast);
static inline bool page_is_mergeable(const struct bio_vec *bv,
struct page *page, unsigned int len, unsigned int off,
- bool same_page)
+ bool *same_page)
{
phys_addr_t vec_end_addr = page_to_phys(bv->bv_page) +
bv->bv_offset + bv->bv_len - 1;
@@ -647,15 +647,9 @@ static inline bool page_is_mergeable(const struct bio_vec *bv,
if (xen_domain() && !xen_biovec_phys_mergeable(bv, page))
return false;
- if ((vec_end_addr & PAGE_MASK) != page_addr) {
- if (same_page)
- return false;
- if (pfn_to_page(PFN_DOWN(vec_end_addr)) + 1 != page)
- return false;
- }
-
- WARN_ON_ONCE(same_page && (len + off) > PAGE_SIZE);
-
+ *same_page = ((vec_end_addr & PAGE_MASK) == page_addr);
+ if (!*same_page && pfn_to_page(PFN_DOWN(vec_end_addr)) + 1 != page)
+ return false;
return true;
}
@@ -701,6 +695,7 @@ static int __bio_add_pc_page(struct request_queue *q, struct bio *bio,
bool put_same_page)
{
struct bio_vec *bvec;
+ bool same_page = false;
/*
* cloned bio must not modify vec list
@@ -729,7 +724,7 @@ static int __bio_add_pc_page(struct request_queue *q, struct bio *bio,
if (bvec_gap_to_prev(q, bvec, offset))
return 0;
- if (page_is_mergeable(bvec, page, len, offset, false) &&
+ if (page_is_mergeable(bvec, page, len, offset, &same_page) &&
can_add_page_to_seg(q, bvec, page, len, offset)) {
bvec->bv_len += len;
goto done;
@@ -767,8 +762,7 @@ EXPORT_SYMBOL(bio_add_pc_page);
* @page: start page to add
* @len: length of the data to add
* @off: offset of the data relative to @page
- * @same_page: if %true only merge if the new data is in the same physical
- * page as the last segment of the bio.
+ * @same_page: return if the segment has been merged inside the same page
*
* Try to add the data at @page + @off to the last bvec of @bio. This is a
* a useful optimisation for file systems with a block size smaller than the
@@ -779,7 +773,7 @@ EXPORT_SYMBOL(bio_add_pc_page);
* Return %true on success or %false on failure.
*/
bool __bio_try_merge_page(struct bio *bio, struct page *page,
- unsigned int len, unsigned int off, bool same_page)
+ unsigned int len, unsigned int off, bool *same_page)
{
if (WARN_ON_ONCE(bio_flagged(bio, BIO_CLONED)))
return false;
@@ -837,7 +831,9 @@ EXPORT_SYMBOL_GPL(__bio_add_page);
int bio_add_page(struct bio *bio, struct page *page,
unsigned int len, unsigned int offset)
{
- if (!__bio_try_merge_page(bio, page, len, offset, false)) {
+ bool same_page = false;
+
+ if (!__bio_try_merge_page(bio, page, len, offset, &same_page)) {
if (bio_full(bio))
return 0;
__bio_add_page(bio, page, len, offset);
@@ -900,6 +896,7 @@ static int __bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter)
unsigned short entries_left = bio->bi_max_vecs - bio->bi_vcnt;
struct bio_vec *bv = bio->bi_io_vec + bio->bi_vcnt;
struct page **pages = (struct page **)bv;
+ bool same_page = false;
ssize_t size, left;
unsigned len, i;
size_t offset;
@@ -920,8 +917,15 @@ static int __bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter)
struct page *page = pages[i];
len = min_t(size_t, PAGE_SIZE - offset, left);
- if (WARN_ON_ONCE(bio_add_page(bio, page, len, offset) != len))
- return -EINVAL;
+
+ if (__bio_try_merge_page(bio, page, len, offset, &same_page)) {
+ if (same_page)
+ put_page(page);
+ } else {
+ if (WARN_ON_ONCE(bio_full(bio)))
+ return -EINVAL;
+ __bio_add_page(bio, page, len, offset);
+ }
offset = 0;
}
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 04f4f131f9d6..9801d540fea1 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -7607,9 +7607,9 @@ static void status_unused(struct seq_file *seq)
static int status_resync(struct seq_file *seq, struct mddev *mddev)
{
sector_t max_sectors, resync, res;
- unsigned long dt, db;
- sector_t rt;
- int scale;
+ unsigned long dt, db = 0;
+ sector_t rt, curr_mark_cnt, resync_mark_cnt;
+ int scale, recovery_active;
unsigned int per_milli;
if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) ||
@@ -7698,22 +7698,30 @@ static int status_resync(struct seq_file *seq, struct mddev *mddev)
* db: blocks written from mark until now
* rt: remaining time
*
- * rt is a sector_t, so could be 32bit or 64bit.
- * So we divide before multiply in case it is 32bit and close
- * to the limit.
- * We scale the divisor (db) by 32 to avoid losing precision
- * near the end of resync when the number of remaining sectors
- * is close to 'db'.
- * We then divide rt by 32 after multiplying by db to compensate.
- * The '+1' avoids division by zero if db is very small.
+ * rt is a sector_t, which is always 64bit now. We are keeping
+ * the original algorithm, but it is not really necessary.
+ *
+ * Original algorithm:
+ * So we divide before multiply in case it is 32bit and close
+ * to the limit.
+ * We scale the divisor (db) by 32 to avoid losing precision
+ * near the end of resync when the number of remaining sectors
+ * is close to 'db'.
+ * We then divide rt by 32 after multiplying by db to compensate.
+ * The '+1' avoids division by zero if db is very small.
*/
dt = ((jiffies - mddev->resync_mark) / HZ);
if (!dt) dt++;
- db = (mddev->curr_mark_cnt - atomic_read(&mddev->recovery_active))
- - mddev->resync_mark_cnt;
+
+ curr_mark_cnt = mddev->curr_mark_cnt;
+ recovery_active = atomic_read(&mddev->recovery_active);
+ resync_mark_cnt = mddev->resync_mark_cnt;
+
+ if (curr_mark_cnt >= (recovery_active + resync_mark_cnt))
+ db = curr_mark_cnt - (recovery_active + resync_mark_cnt);
rt = max_sectors - resync; /* number of remaining sectors */
- sector_div(rt, db/32+1);
+ rt = div64_u64(rt, db/32+1);
rt *= dt;
rt >>= 5;
diff --git a/fs/iomap.c b/fs/iomap.c
index 23ef63fd1669..12654c2e78f8 100644
--- a/fs/iomap.c
+++ b/fs/iomap.c
@@ -287,7 +287,7 @@ iomap_readpage_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
struct iomap_readpage_ctx *ctx = data;
struct page *page = ctx->cur_page;
struct iomap_page *iop = iomap_page_create(inode, page);
- bool is_contig = false;
+ bool same_page = false, is_contig = false;
loff_t orig_pos = pos;
unsigned poff, plen;
sector_t sector;
@@ -315,10 +315,14 @@ iomap_readpage_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
* Try to merge into a previous segment if we can.
*/
sector = iomap_sector(iomap, pos);
- if (ctx->bio && bio_end_sector(ctx->bio) == sector) {
- if (__bio_try_merge_page(ctx->bio, page, plen, poff, true))
- goto done;
+ if (ctx->bio && bio_end_sector(ctx->bio) == sector)
is_contig = true;
+
+ if (is_contig &&
+ __bio_try_merge_page(ctx->bio, page, plen, poff, &same_page)) {
+ if (!same_page && iop)
+ atomic_inc(&iop->read_count);
+ goto done;
}
/*
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index a6f0f4761a37..8da5e6637771 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -758,6 +758,7 @@ xfs_add_to_ioend(
struct block_device *bdev = xfs_find_bdev_for_inode(inode);
unsigned len = i_blocksize(inode);
unsigned poff = offset & (PAGE_SIZE - 1);
+ bool merged, same_page = false;
sector_t sector;
sector = xfs_fsb_to_db(ip, wpc->imap.br_startblock) +
@@ -774,9 +775,13 @@ xfs_add_to_ioend(
wpc->imap.br_state, offset, bdev, sector);
}
- if (!__bio_try_merge_page(wpc->ioend->io_bio, page, len, poff, true)) {
- if (iop)
- atomic_inc(&iop->write_count);
+ merged = __bio_try_merge_page(wpc->ioend->io_bio, page, len, poff,
+ &same_page);
+
+ if (iop && !same_page)
+ atomic_inc(&iop->write_count);
+
+ if (!merged) {
if (bio_full(wpc->ioend->io_bio))
xfs_chain_bio(wpc->ioend, wbc, bdev, sector);
bio_add_page(wpc->ioend->io_bio, page, len, poff);
diff --git a/include/linux/bio.h b/include/linux/bio.h
index 0f23b5682640..f87abaa898f0 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -423,7 +423,7 @@ extern int bio_add_page(struct bio *, struct page *, unsigned int,unsigned int);
extern int bio_add_pc_page(struct request_queue *, struct bio *, struct page *,
unsigned int, unsigned int);
bool __bio_try_merge_page(struct bio *bio, struct page *page,
- unsigned int len, unsigned int off, bool same_page);
+ unsigned int len, unsigned int off, bool *same_page);
void __bio_add_page(struct bio *bio, struct page *page,
unsigned int len, unsigned int off);
int bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter);