diff options
Diffstat (limited to 'drivers/md/raid10.c')
-rw-r--r-- | drivers/md/raid10.c | 86 |
1 files changed, 38 insertions, 48 deletions
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index aea11476fee6..8a1354a08a1a 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -64,31 +64,6 @@ * [B A] [D C] [B A] [E C D] */ -/* - * Number of guaranteed r10bios in case of extreme VM load: - */ -#define NR_RAID10_BIOS 256 - -/* when we get a read error on a read-only array, we redirect to another - * device without failing the first device, or trying to over-write to - * correct the read error. To keep track of bad blocks on a per-bio - * level, we store IO_BLOCKED in the appropriate 'bios' pointer - */ -#define IO_BLOCKED ((struct bio *)1) -/* When we successfully write to a known bad-block, we need to remove the - * bad-block marking which must be done from process context. So we record - * the success by setting devs[n].bio to IO_MADE_GOOD - */ -#define IO_MADE_GOOD ((struct bio *)2) - -#define BIO_SPECIAL(bio) ((unsigned long)bio <= 2) - -/* When there are this many requests queued to be written by - * the raid10 thread, we become 'congested' to provide back-pressure - * for writeback. - */ -static int max_queued_requests = 1024; - static void allow_barrier(struct r10conf *conf); static void lower_barrier(struct r10conf *conf); static int _enough(struct r10conf *conf, int previous, int ignore); @@ -123,11 +98,6 @@ static void * r10bio_pool_alloc(gfp_t gfp_flags, void *data) return kzalloc(size, gfp_flags); } -static void r10bio_pool_free(void *r10_bio, void *data) -{ - kfree(r10_bio); -} - #define RESYNC_SECTORS (RESYNC_BLOCK_SIZE >> 9) /* amount of memory to reserve for resync requests */ #define RESYNC_WINDOW (1024*1024) @@ -233,7 +203,7 @@ out_free_bio: } kfree(rps); out_free_r10bio: - r10bio_pool_free(r10_bio, conf); + rbio_pool_free(r10_bio, conf); return NULL; } @@ -261,7 +231,7 @@ static void r10buf_pool_free(void *__r10_bio, void *data) /* resync pages array stored in the 1st bio's .bi_private */ kfree(rp); - r10bio_pool_free(r10bio, conf); + rbio_pool_free(r10bio, conf); } static void put_all_bios(struct r10conf *conf, struct r10bio *r10_bio) @@ -737,15 +707,19 @@ static struct md_rdev *read_balance(struct r10conf *conf, int sectors = r10_bio->sectors; int best_good_sectors; sector_t new_distance, best_dist; - struct md_rdev *best_rdev, *rdev = NULL; + struct md_rdev *best_dist_rdev, *best_pending_rdev, *rdev = NULL; int do_balance; - int best_slot; + int best_dist_slot, best_pending_slot; + bool has_nonrot_disk = false; + unsigned int min_pending; struct geom *geo = &conf->geo; raid10_find_phys(conf, r10_bio); rcu_read_lock(); - best_slot = -1; - best_rdev = NULL; + best_dist_slot = -1; + min_pending = UINT_MAX; + best_dist_rdev = NULL; + best_pending_rdev = NULL; best_dist = MaxSector; best_good_sectors = 0; do_balance = 1; @@ -767,6 +741,8 @@ static struct md_rdev *read_balance(struct r10conf *conf, sector_t first_bad; int bad_sectors; sector_t dev_sector; + unsigned int pending; + bool nonrot; if (r10_bio->devs[slot].bio == IO_BLOCKED) continue; @@ -803,8 +779,8 @@ static struct md_rdev *read_balance(struct r10conf *conf, first_bad - dev_sector; if (good_sectors > best_good_sectors) { best_good_sectors = good_sectors; - best_slot = slot; - best_rdev = rdev; + best_dist_slot = slot; + best_dist_rdev = rdev; } if (!do_balance) /* Must read from here */ @@ -817,14 +793,23 @@ static struct md_rdev *read_balance(struct r10conf *conf, if (!do_balance) break; - if (best_slot >= 0) + nonrot = blk_queue_nonrot(bdev_get_queue(rdev->bdev)); + has_nonrot_disk |= nonrot; + pending = atomic_read(&rdev->nr_pending); + if (min_pending > pending && nonrot) { + min_pending = pending; + best_pending_slot = slot; + best_pending_rdev = rdev; + } + + if (best_dist_slot >= 0) /* At least 2 disks to choose from so failfast is OK */ set_bit(R10BIO_FailFast, &r10_bio->state); /* This optimisation is debatable, and completely destroys * sequential read speed for 'far copies' arrays. So only * keep it for 'near' arrays, and review those later. */ - if (geo->near_copies > 1 && !atomic_read(&rdev->nr_pending)) + if (geo->near_copies > 1 && !pending) new_distance = 0; /* for far > 1 always use the lowest address */ @@ -833,15 +818,21 @@ static struct md_rdev *read_balance(struct r10conf *conf, else new_distance = abs(r10_bio->devs[slot].addr - conf->mirrors[disk].head_position); + if (new_distance < best_dist) { best_dist = new_distance; - best_slot = slot; - best_rdev = rdev; + best_dist_slot = slot; + best_dist_rdev = rdev; } } if (slot >= conf->copies) { - slot = best_slot; - rdev = best_rdev; + if (has_nonrot_disk) { + slot = best_pending_slot; + rdev = best_pending_rdev; + } else { + slot = best_dist_slot; + rdev = best_dist_rdev; + } } if (slot >= 0) { @@ -3675,8 +3666,8 @@ static struct r10conf *setup_conf(struct mddev *mddev) conf->geo = geo; conf->copies = copies; - err = mempool_init(&conf->r10bio_pool, NR_RAID10_BIOS, r10bio_pool_alloc, - r10bio_pool_free, conf); + err = mempool_init(&conf->r10bio_pool, NR_RAID_BIOS, r10bio_pool_alloc, + rbio_pool_free, conf); if (err) goto out; @@ -4780,8 +4771,7 @@ static int handle_reshape_read_error(struct mddev *mddev, int idx = 0; struct page **pages; - r10b = kmalloc(sizeof(*r10b) + - sizeof(struct r10dev) * conf->copies, GFP_NOIO); + r10b = kmalloc(struct_size(r10b, devs, conf->copies), GFP_NOIO); if (!r10b) { set_bit(MD_RECOVERY_INTR, &mddev->recovery); return -ENOMEM; |