summaryrefslogtreecommitdiff
path: root/drivers/md/md.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2017-11-14 16:07:26 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2017-11-14 16:07:26 -0800
commit47f521ba18190e4bfbb65ead3977af5756884427 (patch)
tree54d6039d71149d8596b66a1d41cfd9eb7f334601 /drivers/md/md.c
parentb91593fa8531a7396551dd9c0a0c51e9b9b97ca9 (diff)
parent0868b99c214a3d55486c700de7c3f770b7243e7c (diff)
Merge branch 'for-next' of git://git.kernel.org/pub/scm/linux/kernel/git/shli/md
Pull MD update from Shaohua Li: "This update mostly includes bug fixes: - md-cluster now supports raid10 from Guoqing - raid5 PPL fixes from Artur - badblock regression fix from Bo - suspend hang related fixes from Neil - raid5 reshape fixes from Neil - raid1 freeze deadlock fix from Nate - memleak fixes from Zdenek - bitmap related fixes from Me and Tao - other fixes and cleanups" * 'for-next' of git://git.kernel.org/pub/scm/linux/kernel/git/shli/md: (33 commits) md: free unused memory after bitmap resize md: release allocated bitset sync_set md/bitmap: clear BITMAP_WRITE_ERROR bit before writing it to sb md: be cautious about using ->curr_resync_completed for ->recovery_offset badblocks: fix wrong return value in badblocks_set if badblocks are disabled md: don't check MD_SB_CHANGE_CLEAN in md_allow_write md-cluster: update document for raid10 md: remove redundant variable q raid1: remove obsolete code in raid1_write_request md-cluster: Use a small window for raid10 resync md-cluster: Suspend writes in RAID10 if within range md-cluster/raid10: set "do_balance = 0" if area is resyncing md: use lockdep_assert_held raid1: prevent freeze_array/wait_all_barriers deadlock md: use TASK_IDLE instead of blocking signals md: remove special meaning of ->quiesce(.., 2) md: allow metadata update while suspending. md: use mddev_suspend/resume instead of ->quiesce() md: move suspend_hi/lo handling into core md code md: don't call bitmap_create() while array is quiesced. ...
Diffstat (limited to 'drivers/md/md.c')
-rw-r--r--drivers/md/md.c147
1 files changed, 94 insertions, 53 deletions
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 447ddcbc9566..09c3af3dcdca 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -69,7 +69,7 @@
#include <trace/events/block.h>
#include "md.h"
-#include "bitmap.h"
+#include "md-bitmap.h"
#include "md-cluster.h"
#ifndef MODULE
@@ -266,16 +266,31 @@ static DEFINE_SPINLOCK(all_mddevs_lock);
* call has finished, the bio has been linked into some internal structure
* and so is visible to ->quiesce(), so we don't need the refcount any more.
*/
+static bool is_suspended(struct mddev *mddev, struct bio *bio)
+{
+ if (mddev->suspended)
+ return true;
+ if (bio_data_dir(bio) != WRITE)
+ return false;
+ if (mddev->suspend_lo >= mddev->suspend_hi)
+ return false;
+ if (bio->bi_iter.bi_sector >= mddev->suspend_hi)
+ return false;
+ if (bio_end_sector(bio) < mddev->suspend_lo)
+ return false;
+ return true;
+}
+
void md_handle_request(struct mddev *mddev, struct bio *bio)
{
check_suspended:
rcu_read_lock();
- if (mddev->suspended) {
+ if (is_suspended(mddev, bio)) {
DEFINE_WAIT(__wait);
for (;;) {
prepare_to_wait(&mddev->sb_wait, &__wait,
TASK_UNINTERRUPTIBLE);
- if (!mddev->suspended)
+ if (!is_suspended(mddev, bio))
break;
rcu_read_unlock();
schedule();
@@ -344,12 +359,17 @@ static blk_qc_t md_make_request(struct request_queue *q, struct bio *bio)
void mddev_suspend(struct mddev *mddev)
{
WARN_ON_ONCE(mddev->thread && current == mddev->thread->tsk);
+ lockdep_assert_held(&mddev->reconfig_mutex);
if (mddev->suspended++)
return;
synchronize_rcu();
wake_up(&mddev->sb_wait);
+ set_bit(MD_ALLOW_SB_UPDATE, &mddev->flags);
+ smp_mb__after_atomic();
wait_event(mddev->sb_wait, atomic_read(&mddev->active_io) == 0);
mddev->pers->quiesce(mddev, 1);
+ clear_bit_unlock(MD_ALLOW_SB_UPDATE, &mddev->flags);
+ wait_event(mddev->sb_wait, !test_bit(MD_UPDATING_SB, &mddev->flags));
del_timer_sync(&mddev->safemode_timer);
}
@@ -357,6 +377,7 @@ EXPORT_SYMBOL_GPL(mddev_suspend);
void mddev_resume(struct mddev *mddev)
{
+ lockdep_assert_held(&mddev->reconfig_mutex);
if (--mddev->suspended)
return;
wake_up(&mddev->sb_wait);
@@ -663,6 +684,7 @@ void mddev_unlock(struct mddev *mddev)
*/
spin_lock(&pers_lock);
md_wakeup_thread(mddev->thread);
+ wake_up(&mddev->sb_wait);
spin_unlock(&pers_lock);
}
EXPORT_SYMBOL_GPL(mddev_unlock);
@@ -2313,7 +2335,7 @@ static void export_array(struct mddev *mddev)
static bool set_in_sync(struct mddev *mddev)
{
- WARN_ON_ONCE(NR_CPUS != 1 && !spin_is_locked(&mddev->lock));
+ lockdep_assert_held(&mddev->lock);
if (!mddev->in_sync) {
mddev->sync_checkers++;
spin_unlock(&mddev->lock);
@@ -2432,10 +2454,18 @@ repeat:
}
}
- /* First make sure individual recovery_offsets are correct */
+ /*
+ * First make sure individual recovery_offsets are correct
+ * curr_resync_completed can only be used during recovery.
+ * During reshape/resync it might use array-addresses rather
+ * that device addresses.
+ */
rdev_for_each(rdev, mddev) {
if (rdev->raid_disk >= 0 &&
mddev->delta_disks >= 0 &&
+ test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) &&
+ test_bit(MD_RECOVERY_RECOVER, &mddev->recovery) &&
+ !test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
!test_bit(Journal, &rdev->flags) &&
!test_bit(In_sync, &rdev->flags) &&
mddev->curr_resync_completed > rdev->recovery_offset)
@@ -4824,7 +4854,7 @@ suspend_lo_show(struct mddev *mddev, char *page)
static ssize_t
suspend_lo_store(struct mddev *mddev, const char *buf, size_t len)
{
- unsigned long long old, new;
+ unsigned long long new;
int err;
err = kstrtoull(buf, 10, &new);
@@ -4840,16 +4870,10 @@ suspend_lo_store(struct mddev *mddev, const char *buf, size_t len)
if (mddev->pers == NULL ||
mddev->pers->quiesce == NULL)
goto unlock;
- old = mddev->suspend_lo;
+ mddev_suspend(mddev);
mddev->suspend_lo = new;
- if (new >= old)
- /* Shrinking suspended region */
- mddev->pers->quiesce(mddev, 2);
- else {
- /* Expanding suspended region - need to wait */
- mddev->pers->quiesce(mddev, 1);
- mddev->pers->quiesce(mddev, 0);
- }
+ mddev_resume(mddev);
+
err = 0;
unlock:
mddev_unlock(mddev);
@@ -4867,7 +4891,7 @@ suspend_hi_show(struct mddev *mddev, char *page)
static ssize_t
suspend_hi_store(struct mddev *mddev, const char *buf, size_t len)
{
- unsigned long long old, new;
+ unsigned long long new;
int err;
err = kstrtoull(buf, 10, &new);
@@ -4880,19 +4904,13 @@ suspend_hi_store(struct mddev *mddev, const char *buf, size_t len)
if (err)
return err;
err = -EINVAL;
- if (mddev->pers == NULL ||
- mddev->pers->quiesce == NULL)
+ if (mddev->pers == NULL)
goto unlock;
- old = mddev->suspend_hi;
+
+ mddev_suspend(mddev);
mddev->suspend_hi = new;
- if (new <= old)
- /* Shrinking suspended region */
- mddev->pers->quiesce(mddev, 2);
- else {
- /* Expanding suspended region - need to wait */
- mddev->pers->quiesce(mddev, 1);
- mddev->pers->quiesce(mddev, 0);
- }
+ mddev_resume(mddev);
+
err = 0;
unlock:
mddev_unlock(mddev);
@@ -5834,8 +5852,14 @@ void md_stop(struct mddev *mddev)
* This is called from dm-raid
*/
__md_stop(mddev);
- if (mddev->bio_set)
+ if (mddev->bio_set) {
bioset_free(mddev->bio_set);
+ mddev->bio_set = NULL;
+ }
+ if (mddev->sync_set) {
+ bioset_free(mddev->sync_set);
+ mddev->sync_set = NULL;
+ }
}
EXPORT_SYMBOL_GPL(md_stop);
@@ -6362,7 +6386,7 @@ static int add_new_disk(struct mddev *mddev, mdu_disk_info_t *info)
break;
}
}
- if (has_journal) {
+ if (has_journal || mddev->bitmap) {
export_rdev(rdev);
return -EBUSY;
}
@@ -6618,22 +6642,26 @@ static int set_bitmap_file(struct mddev *mddev, int fd)
return -ENOENT; /* cannot remove what isn't there */
err = 0;
if (mddev->pers) {
- mddev->pers->quiesce(mddev, 1);
if (fd >= 0) {
struct bitmap *bitmap;
bitmap = bitmap_create(mddev, -1);
+ mddev_suspend(mddev);
if (!IS_ERR(bitmap)) {
mddev->bitmap = bitmap;
err = bitmap_load(mddev);
} else
err = PTR_ERR(bitmap);
- }
- if (fd < 0 || err) {
+ if (err) {
+ bitmap_destroy(mddev);
+ fd = -1;
+ }
+ mddev_resume(mddev);
+ } else if (fd < 0) {
+ mddev_suspend(mddev);
bitmap_destroy(mddev);
- fd = -1; /* make sure to put the file */
+ mddev_resume(mddev);
}
- mddev->pers->quiesce(mddev, 0);
}
if (fd < 0) {
struct file *f = mddev->bitmap_info.file;
@@ -6735,7 +6763,7 @@ static int set_array_info(struct mddev *mddev, mdu_array_info_t *info)
void md_set_array_sectors(struct mddev *mddev, sector_t array_sectors)
{
- WARN(!mddev_is_locked(mddev), "%s: unlocked mddev!\n", __func__);
+ lockdep_assert_held(&mddev->reconfig_mutex);
if (mddev->external_size)
return;
@@ -6917,8 +6945,8 @@ static int update_array_info(struct mddev *mddev, mdu_array_info_t *info)
mddev->bitmap_info.default_offset;
mddev->bitmap_info.space =
mddev->bitmap_info.default_space;
- mddev->pers->quiesce(mddev, 1);
bitmap = bitmap_create(mddev, -1);
+ mddev_suspend(mddev);
if (!IS_ERR(bitmap)) {
mddev->bitmap = bitmap;
rv = bitmap_load(mddev);
@@ -6926,7 +6954,7 @@ static int update_array_info(struct mddev *mddev, mdu_array_info_t *info)
rv = PTR_ERR(bitmap);
if (rv)
bitmap_destroy(mddev);
- mddev->pers->quiesce(mddev, 0);
+ mddev_resume(mddev);
} else {
/* remove the bitmap */
if (!mddev->bitmap) {
@@ -6949,9 +6977,9 @@ static int update_array_info(struct mddev *mddev, mdu_array_info_t *info)
mddev->bitmap_info.nodes = 0;
md_cluster_ops->leave(mddev);
}
- mddev->pers->quiesce(mddev, 1);
+ mddev_suspend(mddev);
bitmap_destroy(mddev);
- mddev->pers->quiesce(mddev, 0);
+ mddev_resume(mddev);
mddev->bitmap_info.offset = 0;
}
}
@@ -7468,8 +7496,8 @@ void md_wakeup_thread(struct md_thread *thread)
{
if (thread) {
pr_debug("md: waking up MD thread %s.\n", thread->tsk->comm);
- if (!test_and_set_bit(THREAD_WAKEUP, &thread->flags))
- wake_up(&thread->wqueue);
+ set_bit(THREAD_WAKEUP, &thread->flags);
+ wake_up(&thread->wqueue);
}
}
EXPORT_SYMBOL(md_wakeup_thread);
@@ -8039,7 +8067,8 @@ bool md_write_start(struct mddev *mddev, struct bio *bi)
if (did_change)
sysfs_notify_dirent_safe(mddev->sysfs_state);
wait_event(mddev->sb_wait,
- !test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags) && !mddev->suspended);
+ !test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags) ||
+ mddev->suspended);
if (test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags)) {
percpu_ref_put(&mddev->writes_pending);
return false;
@@ -8110,7 +8139,6 @@ void md_allow_write(struct mddev *mddev)
sysfs_notify_dirent_safe(mddev->sysfs_state);
/* wait for the dirty state to be recorded in the metadata */
wait_event(mddev->sb_wait,
- !test_bit(MD_SB_CHANGE_CLEAN, &mddev->sb_flags) &&
!test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags));
} else
spin_unlock(&mddev->lock);
@@ -8477,16 +8505,19 @@ void md_do_sync(struct md_thread *thread)
} else {
if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery))
mddev->curr_resync = MaxSector;
- rcu_read_lock();
- rdev_for_each_rcu(rdev, mddev)
- if (rdev->raid_disk >= 0 &&
- mddev->delta_disks >= 0 &&
- !test_bit(Journal, &rdev->flags) &&
- !test_bit(Faulty, &rdev->flags) &&
- !test_bit(In_sync, &rdev->flags) &&
- rdev->recovery_offset < mddev->curr_resync)
- rdev->recovery_offset = mddev->curr_resync;
- rcu_read_unlock();
+ if (!test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
+ test_bit(MD_RECOVERY_RECOVER, &mddev->recovery)) {
+ rcu_read_lock();
+ rdev_for_each_rcu(rdev, mddev)
+ if (rdev->raid_disk >= 0 &&
+ mddev->delta_disks >= 0 &&
+ !test_bit(Journal, &rdev->flags) &&
+ !test_bit(Faulty, &rdev->flags) &&
+ !test_bit(In_sync, &rdev->flags) &&
+ rdev->recovery_offset < mddev->curr_resync)
+ rdev->recovery_offset = mddev->curr_resync;
+ rcu_read_unlock();
+ }
}
}
skip:
@@ -8813,6 +8844,16 @@ void md_check_recovery(struct mddev *mddev)
unlock:
wake_up(&mddev->sb_wait);
mddev_unlock(mddev);
+ } else if (test_bit(MD_ALLOW_SB_UPDATE, &mddev->flags) && mddev->sb_flags) {
+ /* Write superblock - thread that called mddev_suspend()
+ * holds reconfig_mutex for us.
+ */
+ set_bit(MD_UPDATING_SB, &mddev->flags);
+ smp_mb__after_atomic();
+ if (test_bit(MD_ALLOW_SB_UPDATE, &mddev->flags))
+ md_update_sb(mddev, 0);
+ clear_bit_unlock(MD_UPDATING_SB, &mddev->flags);
+ wake_up(&mddev->sb_wait);
}
}
EXPORT_SYMBOL(md_check_recovery);