diff options
author | Lars Ellenberg <lars.ellenberg@linbit.com> | 2014-04-28 18:43:19 +0200 |
---|---|---|
committer | Jens Axboe <axboe@fb.com> | 2014-04-30 13:46:54 -0600 |
commit | e82998743385ca861b9ec919eb2ba8177ce72180 (patch) | |
tree | 4cf895d0a9633eff1f11363f98ebe60713a4bcd7 | |
parent | 0e49d7b014c5d591a053d08888a455bd74a88646 (diff) |
drbd: don't let application IO pre-empt resync too often
Before, application IO could pre-empt resync activity
for up to hardcoded 20 seconds per resync request.
A very busy server could throttle the effective resync bandwidth
down to one request per 20 seconds.
Now, we only let application IO pre-empt resync traffic
while the current resync rate estimate is above c-min-rate.
If you disable the c-min-rate throttle feature (set c-min-rate = 0),
application IO will no longer pre-empt resync traffic at all.
Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
-rw-r--r-- | drivers/block/drbd/drbd_actlog.c | 13 | ||||
-rw-r--r-- | drivers/block/drbd/drbd_int.h | 3 | ||||
-rw-r--r-- | drivers/block/drbd/drbd_receiver.c | 47 |
3 files changed, 34 insertions, 29 deletions
diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c index 7e7b0e143655..8dd09a7f23c6 100644 --- a/drivers/block/drbd/drbd_actlog.c +++ b/drivers/block/drbd/drbd_actlog.c @@ -1022,8 +1022,7 @@ int drbd_rs_begin_io(struct drbd_device *device, sector_t sector) unsigned int enr = BM_SECT_TO_EXT(sector); struct bm_extent *bm_ext; int i, sig; - int sa = 200; /* Step aside 200 times, then grab the extent and let app-IO wait. - 200 times -> 20 seconds. */ + bool sa; retry: sig = wait_event_interruptible(device->al_wait, @@ -1034,12 +1033,15 @@ retry: if (test_bit(BME_LOCKED, &bm_ext->flags)) return 0; + /* step aside only while we are above c-min-rate; unless disabled. */ + sa = drbd_rs_c_min_rate_throttle(device); + for (i = 0; i < AL_EXT_PER_BM_SECT; i++) { sig = wait_event_interruptible(device->al_wait, !_is_in_al(device, enr * AL_EXT_PER_BM_SECT + i) || - test_bit(BME_PRIORITY, &bm_ext->flags)); + (sa && test_bit(BME_PRIORITY, &bm_ext->flags))); - if (sig || (test_bit(BME_PRIORITY, &bm_ext->flags) && sa)) { + if (sig || (sa && test_bit(BME_PRIORITY, &bm_ext->flags))) { spin_lock_irq(&device->al_lock); if (lc_put(device->resync, &bm_ext->lce) == 0) { bm_ext->flags = 0; /* clears BME_NO_WRITES and eventually BME_PRIORITY */ @@ -1051,9 +1053,6 @@ retry: return -EINTR; if (schedule_timeout_interruptible(HZ/10)) return -EINTR; - if (sa && --sa == 0) - drbd_warn(device, "drbd_rs_begin_io() stepped aside for 20sec." - "Resync stalled?\n"); goto retry; } } diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 361a2e9cd727..f0cabea5cda2 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -1339,7 +1339,8 @@ extern void start_resync_timer_fn(unsigned long data); /* drbd_receiver.c */ extern int drbd_receiver(struct drbd_thread *thi); extern int drbd_asender(struct drbd_thread *thi); -extern int drbd_rs_should_slow_down(struct drbd_device *device, sector_t sector); +extern bool drbd_rs_c_min_rate_throttle(struct drbd_device *device); +extern bool drbd_rs_should_slow_down(struct drbd_device *device, sector_t sector); extern int drbd_submit_peer_request(struct drbd_device *, struct drbd_peer_request *, const unsigned, const int); diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 6ffbc22eba0b..10d2dcb16bff 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -2323,39 +2323,45 @@ out_interrupted: * The current sync rate used here uses only the most recent two step marks, * to have a short time average so we can react faster. */ -int drbd_rs_should_slow_down(struct drbd_device *device, sector_t sector) +bool drbd_rs_should_slow_down(struct drbd_device *device, sector_t sector) { - struct gendisk *disk = device->ldev->backing_bdev->bd_contains->bd_disk; - unsigned long db, dt, dbdt; struct lc_element *tmp; - int curr_events; - int throttle = 0; - unsigned int c_min_rate; + bool throttle = true; - rcu_read_lock(); - c_min_rate = rcu_dereference(device->ldev->disk_conf)->c_min_rate; - rcu_read_unlock(); - - /* feature disabled? */ - if (c_min_rate == 0) - return 0; + if (!drbd_rs_c_min_rate_throttle(device)) + return false; spin_lock_irq(&device->al_lock); tmp = lc_find(device->resync, BM_SECT_TO_EXT(sector)); if (tmp) { struct bm_extent *bm_ext = lc_entry(tmp, struct bm_extent, lce); - if (test_bit(BME_PRIORITY, &bm_ext->flags)) { - spin_unlock_irq(&device->al_lock); - return 0; - } + if (test_bit(BME_PRIORITY, &bm_ext->flags)) + throttle = false; /* Do not slow down if app IO is already waiting for this extent */ } spin_unlock_irq(&device->al_lock); + return throttle; +} + +bool drbd_rs_c_min_rate_throttle(struct drbd_device *device) +{ + struct gendisk *disk = device->ldev->backing_bdev->bd_contains->bd_disk; + unsigned long db, dt, dbdt; + unsigned int c_min_rate; + int curr_events; + + rcu_read_lock(); + c_min_rate = rcu_dereference(device->ldev->disk_conf)->c_min_rate; + rcu_read_unlock(); + + /* feature disabled? */ + if (c_min_rate == 0) + return false; + curr_events = (int)part_stat_read(&disk->part0, sectors[0]) + (int)part_stat_read(&disk->part0, sectors[1]) - atomic_read(&device->rs_sect_ev); - if (!device->rs_last_events || curr_events - device->rs_last_events > 64) { unsigned long rs_left; int i; @@ -2378,12 +2384,11 @@ int drbd_rs_should_slow_down(struct drbd_device *device, sector_t sector) dbdt = Bit2KB(db/dt); if (dbdt > c_min_rate) - throttle = 1; + return true; } - return throttle; + return false; } - static int receive_DataRequest(struct drbd_connection *connection, struct packet_info *pi) { struct drbd_peer_device *peer_device; |