diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2017-07-28 12:24:21 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2017-07-28 12:24:21 -0700 |
commit | 9583f1c99fe5a11b3f294ac8093e93f008bea29e (patch) | |
tree | 21a9b44d5e3b4e210559dd94c5e8b55c4d97c105 | |
parent | 1731a47444e7bf66d5cf9415bbd6ac5e3903d719 (diff) | |
parent | ed9b66d21866ae3bf16557406258ebe6c00a9a84 (diff) |
Merge branch 'for-next' of git://git.kernel.org/pub/scm/linux/kernel/git/shli/md
Pull MD fixes from Shaohua Li:
"This fixes several bugs, three of them are marked for stable:
- an initialization issue fixed by Ming
- a bio clone race issue fixed by me
- an async tx flush issue fixed by Ofer
- other cleanups"
* 'for-next' of git://git.kernel.org/pub/scm/linux/kernel/git/shli/md:
MD: fix warnning for UP case
md/raid5: add thread_group worker async_tx_issue_pending_all
md: simplify code with bio_io_error
md/raid1: fix writebehind bio clone
md: raid1-10: move raid1/raid10 common code into raid1-10.c
md: raid1/raid10: initialize bvec table via bio_add_page()
md: remove 'idx' from 'struct resync_pages'
-rw-r--r-- | drivers/md/md.c | 2 | ||||
-rw-r--r-- | drivers/md/md.h | 54 | ||||
-rw-r--r-- | drivers/md/raid1-10.c | 81 | ||||
-rw-r--r-- | drivers/md/raid1.c | 68 | ||||
-rw-r--r-- | drivers/md/raid10.c | 25 | ||||
-rw-r--r-- | drivers/md/raid5.c | 11 |
6 files changed, 115 insertions, 126 deletions
diff --git a/drivers/md/md.c b/drivers/md/md.c index 8cdca0296749..c99634612fc4 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -2287,7 +2287,7 @@ static void export_array(struct mddev *mddev) static bool set_in_sync(struct mddev *mddev) { - WARN_ON_ONCE(!spin_is_locked(&mddev->lock)); + WARN_ON_ONCE(NR_CPUS != 1 && !spin_is_locked(&mddev->lock)); if (!mddev->in_sync) { mddev->sync_checkers++; spin_unlock(&mddev->lock); diff --git a/drivers/md/md.h b/drivers/md/md.h index b50eb4ac1b82..09db03455801 100644 --- a/drivers/md/md.h +++ b/drivers/md/md.h @@ -731,58 +731,4 @@ static inline void mddev_check_write_zeroes(struct mddev *mddev, struct bio *bio !bdev_get_queue(bio->bi_bdev)->limits.max_write_zeroes_sectors) mddev->queue->limits.max_write_zeroes_sectors = 0; } - -/* Maximum size of each resync request */ -#define RESYNC_BLOCK_SIZE (64*1024) -#define RESYNC_PAGES ((RESYNC_BLOCK_SIZE + PAGE_SIZE-1) / PAGE_SIZE) - -/* for managing resync I/O pages */ -struct resync_pages { - unsigned idx; /* for get/put page from the pool */ - void *raid_bio; - struct page *pages[RESYNC_PAGES]; -}; - -static inline int resync_alloc_pages(struct resync_pages *rp, - gfp_t gfp_flags) -{ - int i; - - for (i = 0; i < RESYNC_PAGES; i++) { - rp->pages[i] = alloc_page(gfp_flags); - if (!rp->pages[i]) - goto out_free; - } - - return 0; - -out_free: - while (--i >= 0) - put_page(rp->pages[i]); - return -ENOMEM; -} - -static inline void resync_free_pages(struct resync_pages *rp) -{ - int i; - - for (i = 0; i < RESYNC_PAGES; i++) - put_page(rp->pages[i]); -} - -static inline void resync_get_all_pages(struct resync_pages *rp) -{ - int i; - - for (i = 0; i < RESYNC_PAGES; i++) - get_page(rp->pages[i]); -} - -static inline struct page *resync_fetch_page(struct resync_pages *rp, - unsigned idx) -{ - if (WARN_ON_ONCE(idx >= RESYNC_PAGES)) - return NULL; - return rp->pages[idx]; -} #endif /* _MD_MD_H */ diff --git a/drivers/md/raid1-10.c b/drivers/md/raid1-10.c new file mode 100644 index 000000000000..9f2670b45f31 --- /dev/null +++ b/drivers/md/raid1-10.c @@ -0,0 +1,81 @@ +/* Maximum size of each resync request */ +#define RESYNC_BLOCK_SIZE (64*1024) +#define RESYNC_PAGES ((RESYNC_BLOCK_SIZE + PAGE_SIZE-1) / PAGE_SIZE) + +/* for managing resync I/O pages */ +struct resync_pages { + void *raid_bio; + struct page *pages[RESYNC_PAGES]; +}; + +static inline int resync_alloc_pages(struct resync_pages *rp, + gfp_t gfp_flags) +{ + int i; + + for (i = 0; i < RESYNC_PAGES; i++) { + rp->pages[i] = alloc_page(gfp_flags); + if (!rp->pages[i]) + goto out_free; + } + + return 0; + +out_free: + while (--i >= 0) + put_page(rp->pages[i]); + return -ENOMEM; +} + +static inline void resync_free_pages(struct resync_pages *rp) +{ + int i; + + for (i = 0; i < RESYNC_PAGES; i++) + put_page(rp->pages[i]); +} + +static inline void resync_get_all_pages(struct resync_pages *rp) +{ + int i; + + for (i = 0; i < RESYNC_PAGES; i++) + get_page(rp->pages[i]); +} + +static inline struct page *resync_fetch_page(struct resync_pages *rp, + unsigned idx) +{ + if (WARN_ON_ONCE(idx >= RESYNC_PAGES)) + return NULL; + return rp->pages[idx]; +} + +/* + * 'strct resync_pages' stores actual pages used for doing the resync + * IO, and it is per-bio, so make .bi_private points to it. + */ +static inline struct resync_pages *get_resync_pages(struct bio *bio) +{ + return bio->bi_private; +} + +/* generally called after bio_reset() for reseting bvec */ +static void md_bio_reset_resync_pages(struct bio *bio, struct resync_pages *rp, + int size) +{ + int idx = 0; + + /* initialize bvec table again */ + do { + struct page *page = resync_fetch_page(rp, idx); + int len = min_t(int, size, PAGE_SIZE); + + /* + * won't fail because the vec table is big + * enough to hold all these pages + */ + bio_add_page(bio, page, len, 0); + size -= len; + } while (idx++ < RESYNC_PAGES && size > 0); +} diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 3febfc8391fb..f50958ded9f0 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -81,14 +81,7 @@ static void lower_barrier(struct r1conf *conf, sector_t sector_nr); #define raid1_log(md, fmt, args...) \ do { if ((md)->queue) blk_add_trace_msg((md)->queue, "raid1 " fmt, ##args); } while (0) -/* - * 'strct resync_pages' stores actual pages used for doing the resync - * IO, and it is per-bio, so make .bi_private points to it. - */ -static inline struct resync_pages *get_resync_pages(struct bio *bio) -{ - return bio->bi_private; -} +#include "raid1-10.c" /* * for resync bio, r1bio pointer can be retrieved from the per-bio @@ -170,7 +163,6 @@ static void * r1buf_pool_alloc(gfp_t gfp_flags, void *data) resync_get_all_pages(rp); } - rp->idx = 0; rp->raid_bio = r1_bio; bio->bi_private = rp; } @@ -492,10 +484,6 @@ static void raid1_end_write_request(struct bio *bio) } if (behind) { - /* we release behind master bio when all write are done */ - if (r1_bio->behind_master_bio == bio) - to_put = NULL; - if (test_bit(WriteMostly, &rdev->flags)) atomic_dec(&r1_bio->behind_remaining); @@ -802,8 +790,7 @@ static void flush_bio_list(struct r1conf *conf, struct bio *bio) bio->bi_next = NULL; bio->bi_bdev = rdev->bdev; if (test_bit(Faulty, &rdev->flags)) { - bio->bi_status = BLK_STS_IOERR; - bio_endio(bio); + bio_io_error(bio); } else if (unlikely((bio_op(bio) == REQ_OP_DISCARD) && !blk_queue_discard(bdev_get_queue(bio->bi_bdev)))) /* Just ignore it */ @@ -1088,7 +1075,7 @@ static void unfreeze_array(struct r1conf *conf) wake_up(&conf->wait_barrier); } -static struct bio *alloc_behind_master_bio(struct r1bio *r1_bio, +static void alloc_behind_master_bio(struct r1bio *r1_bio, struct bio *bio) { int size = bio->bi_iter.bi_size; @@ -1098,11 +1085,13 @@ static struct bio *alloc_behind_master_bio(struct r1bio *r1_bio, behind_bio = bio_alloc_mddev(GFP_NOIO, vcnt, r1_bio->mddev); if (!behind_bio) - goto fail; + return; /* discard op, we don't support writezero/writesame yet */ - if (!bio_has_data(bio)) + if (!bio_has_data(bio)) { + behind_bio->bi_iter.bi_size = size; goto skip_copy; + } while (i < vcnt && size) { struct page *page; @@ -1123,14 +1112,13 @@ skip_copy: r1_bio->behind_master_bio = behind_bio;; set_bit(R1BIO_BehindIO, &r1_bio->state); - return behind_bio; + return; free_pages: pr_debug("%dB behind alloc failed, doing sync I/O\n", bio->bi_iter.bi_size); bio_free_pages(behind_bio); -fail: - return behind_bio; + bio_put(behind_bio); } struct raid1_plug_cb { @@ -1483,7 +1471,7 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio, (atomic_read(&bitmap->behind_writes) < mddev->bitmap_info.max_write_behind) && !waitqueue_active(&bitmap->behind_wait)) { - mbio = alloc_behind_master_bio(r1_bio, bio); + alloc_behind_master_bio(r1_bio, bio); } bitmap_startwrite(bitmap, r1_bio->sector, @@ -1493,14 +1481,11 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio, first_clone = 0; } - if (!mbio) { - if (r1_bio->behind_master_bio) - mbio = bio_clone_fast(r1_bio->behind_master_bio, - GFP_NOIO, - mddev->bio_set); - else - mbio = bio_clone_fast(bio, GFP_NOIO, mddev->bio_set); - } + if (r1_bio->behind_master_bio) + mbio = bio_clone_fast(r1_bio->behind_master_bio, + GFP_NOIO, mddev->bio_set); + else + mbio = bio_clone_fast(bio, GFP_NOIO, mddev->bio_set); if (r1_bio->behind_master_bio) { if (test_bit(WriteMostly, &conf->mirrors[i].rdev->flags)) @@ -2086,10 +2071,7 @@ static void process_checks(struct r1bio *r1_bio) /* Fix variable parts of all bios */ vcnt = (r1_bio->sectors + PAGE_SIZE / 512 - 1) >> (PAGE_SHIFT - 9); for (i = 0; i < conf->raid_disks * 2; i++) { - int j; - int size; blk_status_t status; - struct bio_vec *bi; struct bio *b = r1_bio->bios[i]; struct resync_pages *rp = get_resync_pages(b); if (b->bi_end_io != end_sync_read) @@ -2098,8 +2080,6 @@ static void process_checks(struct r1bio *r1_bio) status = b->bi_status; bio_reset(b); b->bi_status = status; - b->bi_vcnt = vcnt; - b->bi_iter.bi_size = r1_bio->sectors << 9; b->bi_iter.bi_sector = r1_bio->sector + conf->mirrors[i].rdev->data_offset; b->bi_bdev = conf->mirrors[i].rdev->bdev; @@ -2107,15 +2087,8 @@ static void process_checks(struct r1bio *r1_bio) rp->raid_bio = r1_bio; b->bi_private = rp; - size = b->bi_iter.bi_size; - bio_for_each_segment_all(bi, b, j) { - bi->bv_offset = 0; - if (size > PAGE_SIZE) - bi->bv_len = PAGE_SIZE; - else - bi->bv_len = size; - size -= PAGE_SIZE; - } + /* initialize bvec table again */ + md_bio_reset_resync_pages(b, rp, r1_bio->sectors << 9); } for (primary = 0; primary < conf->raid_disks * 2; primary++) if (r1_bio->bios[primary]->bi_end_io == end_sync_read && @@ -2366,8 +2339,6 @@ static int narrow_write_error(struct r1bio *r1_bio, int i) wbio = bio_clone_fast(r1_bio->behind_master_bio, GFP_NOIO, mddev->bio_set); - /* We really need a _all clone */ - wbio->bi_iter = (struct bvec_iter){ 0 }; } else { wbio = bio_clone_fast(r1_bio->master_bio, GFP_NOIO, mddev->bio_set); @@ -2619,6 +2590,7 @@ static sector_t raid1_sync_request(struct mddev *mddev, sector_t sector_nr, int good_sectors = RESYNC_SECTORS; int min_bad = 0; /* number of sectors that are bad in all devices */ int idx = sector_to_idx(sector_nr); + int page_idx = 0; if (!conf->r1buf_pool) if (init_resync(conf)) @@ -2846,7 +2818,7 @@ static sector_t raid1_sync_request(struct mddev *mddev, sector_t sector_nr, bio = r1_bio->bios[i]; rp = get_resync_pages(bio); if (bio->bi_end_io) { - page = resync_fetch_page(rp, rp->idx++); + page = resync_fetch_page(rp, page_idx); /* * won't fail because the vec table is big @@ -2858,7 +2830,7 @@ static sector_t raid1_sync_request(struct mddev *mddev, sector_t sector_nr, nr_sectors += len>>9; sector_nr += len>>9; sync_blocks -= (len>>9); - } while (get_resync_pages(r1_bio->bios[disk]->bi_private)->idx < RESYNC_PAGES); + } while (++page_idx < RESYNC_PAGES); r1_bio->sectors = nr_sectors; diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 5026e7ad51d3..f55d4cc085f6 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -110,14 +110,7 @@ static void end_reshape(struct r10conf *conf); #define raid10_log(md, fmt, args...) \ do { if ((md)->queue) blk_add_trace_msg((md)->queue, "raid10 " fmt, ##args); } while (0) -/* - * 'strct resync_pages' stores actual pages used for doing the resync - * IO, and it is per-bio, so make .bi_private points to it. - */ -static inline struct resync_pages *get_resync_pages(struct bio *bio) -{ - return bio->bi_private; -} +#include "raid1-10.c" /* * for resync bio, r10bio pointer can be retrieved from the per-bio @@ -221,7 +214,6 @@ static void * r10buf_pool_alloc(gfp_t gfp_flags, void *data) resync_get_all_pages(rp); } - rp->idx = 0; rp->raid_bio = r10_bio; bio->bi_private = rp; if (rbio) { @@ -913,8 +905,7 @@ static void flush_pending_writes(struct r10conf *conf) bio->bi_next = NULL; bio->bi_bdev = rdev->bdev; if (test_bit(Faulty, &rdev->flags)) { - bio->bi_status = BLK_STS_IOERR; - bio_endio(bio); + bio_io_error(bio); } else if (unlikely((bio_op(bio) == REQ_OP_DISCARD) && !blk_queue_discard(bdev_get_queue(bio->bi_bdev)))) /* Just ignore it */ @@ -1098,8 +1089,7 @@ static void raid10_unplug(struct blk_plug_cb *cb, bool from_schedule) bio->bi_next = NULL; bio->bi_bdev = rdev->bdev; if (test_bit(Faulty, &rdev->flags)) { - bio->bi_status = BLK_STS_IOERR; - bio_endio(bio); + bio_io_error(bio); } else if (unlikely((bio_op(bio) == REQ_OP_DISCARD) && !blk_queue_discard(bdev_get_queue(bio->bi_bdev)))) /* Just ignore it */ @@ -2087,8 +2077,8 @@ static void sync_request_write(struct mddev *mddev, struct r10bio *r10_bio) rp = get_resync_pages(tbio); bio_reset(tbio); - tbio->bi_vcnt = vcnt; - tbio->bi_iter.bi_size = fbio->bi_iter.bi_size; + md_bio_reset_resync_pages(tbio, rp, fbio->bi_iter.bi_size); + rp->raid_bio = r10_bio; tbio->bi_private = rp; tbio->bi_iter.bi_sector = r10_bio->devs[i].addr; @@ -2853,6 +2843,7 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr, sector_t sectors_skipped = 0; int chunks_skipped = 0; sector_t chunk_mask = conf->geo.chunk_mask; + int page_idx = 0; if (!conf->r10buf_pool) if (init_resync(conf)) @@ -3355,7 +3346,7 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr, break; for (bio= biolist ; bio ; bio=bio->bi_next) { struct resync_pages *rp = get_resync_pages(bio); - page = resync_fetch_page(rp, rp->idx++); + page = resync_fetch_page(rp, page_idx); /* * won't fail because the vec table is big enough * to hold all these pages @@ -3364,7 +3355,7 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr, } nr_sectors += len>>9; sector_nr += len>>9; - } while (get_resync_pages(biolist)->idx < RESYNC_PAGES); + } while (++page_idx < RESYNC_PAGES); r10_bio->sectors = nr_sectors; while (biolist) { diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index aeeb8d6854e2..0fc2748aaf95 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -3381,9 +3381,8 @@ handle_failed_stripe(struct r5conf *conf, struct stripe_head *sh, sh->dev[i].sector + STRIPE_SECTORS) { struct bio *nextbi = r5_next_bio(bi, sh->dev[i].sector); - bi->bi_status = BLK_STS_IOERR; md_write_end(conf->mddev); - bio_endio(bi); + bio_io_error(bi); bi = nextbi; } if (bitmap_end) @@ -3403,9 +3402,8 @@ handle_failed_stripe(struct r5conf *conf, struct stripe_head *sh, sh->dev[i].sector + STRIPE_SECTORS) { struct bio *bi2 = r5_next_bio(bi, sh->dev[i].sector); - bi->bi_status = BLK_STS_IOERR; md_write_end(conf->mddev); - bio_endio(bi); + bio_io_error(bi); bi = bi2; } @@ -3429,8 +3427,7 @@ handle_failed_stripe(struct r5conf *conf, struct stripe_head *sh, struct bio *nextbi = r5_next_bio(bi, sh->dev[i].sector); - bi->bi_status = BLK_STS_IOERR; - bio_endio(bi); + bio_io_error(bi); bi = nextbi; } } @@ -6237,6 +6234,8 @@ static void raid5_do_work(struct work_struct *work) pr_debug("%d stripes handled\n", handled); spin_unlock_irq(&conf->device_lock); + + async_tx_issue_pending_all(); blk_finish_plug(&plug); pr_debug("--- raid5worker inactive\n"); |