summaryrefslogtreecommitdiff
path: root/drivers/md/raid1.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2010-05-21 15:49:14 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2010-05-21 15:49:14 -0700
commit6e4513972a5ad28517477d21f301a02ac7a0df76 (patch)
treec88abe8297ea3971f722e0787842fb790d14767a /drivers/md/raid1.c
parentd79df0b1eda0099a22cbcece01ce5e7d222450de (diff)
parent19fdb9eefb21b72edbc365b838502780c392bad6 (diff)
Merge branch 'for-linus' of git://neil.brown.name/md
* 'for-linus' of git://neil.brown.name/md: (45 commits) md: don't insist on valid event count for spare devices. md: simplify updating of event count to sometimes avoid updating spares. md/raid6: Fix raid-6 read-error correction in degraded state md: restore ability of spare drives to spin down. md: Fix read balancing in RAID1 and RAID10 on drives > 2TB md/linear: standardise all printk messages md/raid0: tidy up printk messages. md/raid10: tidy up printk messages. md/raid1: improve printk messages md/raid5: improve consistency of error messages. md: remove EXPERIMENTAL designation from RAID10 md: allow integers to be passed to md/level md: notify mdstat waiters of level change md/raid4: permit raid0 takeover md/raid1: delay reads that could overtake behind-writes. md/raid1: fix confusing 'redirect sector' message. md: don't unregister the thread in mddev_suspend md: factor out init code for an mddev md: pass mddev to make_request functions rather than request_queue md: call md_stop_writes from md_stop ...
Diffstat (limited to 'drivers/md/raid1.c')
-rw-r--r--drivers/md/raid1.c114
1 files changed, 59 insertions, 55 deletions
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index e59b10e66edb..a948da8012de 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -263,7 +263,7 @@ static inline void update_head_pos(int disk, r1bio_t *r1_bio)
static void raid1_end_read_request(struct bio *bio, int error)
{
int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
- r1bio_t * r1_bio = (r1bio_t *)(bio->bi_private);
+ r1bio_t *r1_bio = bio->bi_private;
int mirror;
conf_t *conf = r1_bio->mddev->private;
@@ -297,7 +297,8 @@ static void raid1_end_read_request(struct bio *bio, int error)
*/
char b[BDEVNAME_SIZE];
if (printk_ratelimit())
- printk(KERN_ERR "raid1: %s: rescheduling sector %llu\n",
+ printk(KERN_ERR "md/raid1:%s: %s: rescheduling sector %llu\n",
+ mdname(conf->mddev),
bdevname(conf->mirrors[mirror].rdev->bdev,b), (unsigned long long)r1_bio->sector);
reschedule_retry(r1_bio);
}
@@ -308,7 +309,7 @@ static void raid1_end_read_request(struct bio *bio, int error)
static void raid1_end_write_request(struct bio *bio, int error)
{
int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
- r1bio_t * r1_bio = (r1bio_t *)(bio->bi_private);
+ r1bio_t *r1_bio = bio->bi_private;
int mirror, behind = test_bit(R1BIO_BehindIO, &r1_bio->state);
conf_t *conf = r1_bio->mddev->private;
struct bio *to_put = NULL;
@@ -418,7 +419,7 @@ static void raid1_end_write_request(struct bio *bio, int error)
*/
static int read_balance(conf_t *conf, r1bio_t *r1_bio)
{
- const unsigned long this_sector = r1_bio->sector;
+ const sector_t this_sector = r1_bio->sector;
int new_disk = conf->last_used, disk = new_disk;
int wonly_disk = -1;
const int sectors = r1_bio->sectors;
@@ -434,7 +435,7 @@ static int read_balance(conf_t *conf, r1bio_t *r1_bio)
retry:
if (conf->mddev->recovery_cp < MaxSector &&
(this_sector + sectors >= conf->next_resync)) {
- /* Choose the first operation device, for consistancy */
+ /* Choose the first operational device, for consistancy */
new_disk = 0;
for (rdev = rcu_dereference(conf->mirrors[new_disk].rdev);
@@ -774,9 +775,8 @@ do_sync_io:
return NULL;
}
-static int make_request(struct request_queue *q, struct bio * bio)
+static int make_request(mddev_t *mddev, struct bio * bio)
{
- mddev_t *mddev = q->queuedata;
conf_t *conf = mddev->private;
mirror_info_t *mirror;
r1bio_t *r1_bio;
@@ -788,7 +788,6 @@ static int make_request(struct request_queue *q, struct bio * bio)
struct page **behind_pages = NULL;
const int rw = bio_data_dir(bio);
const bool do_sync = bio_rw_flagged(bio, BIO_RW_SYNCIO);
- int cpu;
bool do_barriers;
mdk_rdev_t *blocked_rdev;
@@ -834,12 +833,6 @@ static int make_request(struct request_queue *q, struct bio * bio)
bitmap = mddev->bitmap;
- cpu = part_stat_lock();
- part_stat_inc(cpu, &mddev->gendisk->part0, ios[rw]);
- part_stat_add(cpu, &mddev->gendisk->part0, sectors[rw],
- bio_sectors(bio));
- part_stat_unlock();
-
/*
* make_request() can abort the operation when READA is being
* used and no empty request is available.
@@ -866,6 +859,15 @@ static int make_request(struct request_queue *q, struct bio * bio)
}
mirror = conf->mirrors + rdisk;
+ if (test_bit(WriteMostly, &mirror->rdev->flags) &&
+ bitmap) {
+ /* Reading from a write-mostly device must
+ * take care not to over-take any writes
+ * that are 'behind'
+ */
+ wait_event(bitmap->behind_wait,
+ atomic_read(&bitmap->behind_writes) == 0);
+ }
r1_bio->read_disk = rdisk;
read_bio = bio_clone(bio, GFP_NOIO);
@@ -912,9 +914,10 @@ static int make_request(struct request_queue *q, struct bio * bio)
if (test_bit(Faulty, &rdev->flags)) {
rdev_dec_pending(rdev, mddev);
r1_bio->bios[i] = NULL;
- } else
+ } else {
r1_bio->bios[i] = bio;
- targets++;
+ targets++;
+ }
} else
r1_bio->bios[i] = NULL;
}
@@ -942,10 +945,14 @@ static int make_request(struct request_queue *q, struct bio * bio)
set_bit(R1BIO_Degraded, &r1_bio->state);
}
- /* do behind I/O ? */
+ /* do behind I/O ?
+ * Not if there are too many, or cannot allocate memory,
+ * or a reader on WriteMostly is waiting for behind writes
+ * to flush */
if (bitmap &&
(atomic_read(&bitmap->behind_writes)
< mddev->bitmap_info.max_write_behind) &&
+ !waitqueue_active(&bitmap->behind_wait) &&
(behind_pages = alloc_behind_pages(bio)) != NULL)
set_bit(R1BIO_BehindIO, &r1_bio->state);
@@ -1070,21 +1077,22 @@ static void error(mddev_t *mddev, mdk_rdev_t *rdev)
} else
set_bit(Faulty, &rdev->flags);
set_bit(MD_CHANGE_DEVS, &mddev->flags);
- printk(KERN_ALERT "raid1: Disk failure on %s, disabling device.\n"
- "raid1: Operation continuing on %d devices.\n",
- bdevname(rdev->bdev,b), conf->raid_disks - mddev->degraded);
+ printk(KERN_ALERT "md/raid1:%s: Disk failure on %s, disabling device.\n"
+ KERN_ALERT "md/raid1:%s: Operation continuing on %d devices.\n",
+ mdname(mddev), bdevname(rdev->bdev, b),
+ mdname(mddev), conf->raid_disks - mddev->degraded);
}
static void print_conf(conf_t *conf)
{
int i;
- printk("RAID1 conf printout:\n");
+ printk(KERN_DEBUG "RAID1 conf printout:\n");
if (!conf) {
- printk("(!conf)\n");
+ printk(KERN_DEBUG "(!conf)\n");
return;
}
- printk(" --- wd:%d rd:%d\n", conf->raid_disks - conf->mddev->degraded,
+ printk(KERN_DEBUG " --- wd:%d rd:%d\n", conf->raid_disks - conf->mddev->degraded,
conf->raid_disks);
rcu_read_lock();
@@ -1092,7 +1100,7 @@ static void print_conf(conf_t *conf)
char b[BDEVNAME_SIZE];
mdk_rdev_t *rdev = rcu_dereference(conf->mirrors[i].rdev);
if (rdev)
- printk(" disk %d, wo:%d, o:%d, dev:%s\n",
+ printk(KERN_DEBUG " disk %d, wo:%d, o:%d, dev:%s\n",
i, !test_bit(In_sync, &rdev->flags),
!test_bit(Faulty, &rdev->flags),
bdevname(rdev->bdev,b));
@@ -1223,7 +1231,7 @@ abort:
static void end_sync_read(struct bio *bio, int error)
{
- r1bio_t * r1_bio = (r1bio_t *)(bio->bi_private);
+ r1bio_t *r1_bio = bio->bi_private;
int i;
for (i=r1_bio->mddev->raid_disks; i--; )
@@ -1246,7 +1254,7 @@ static void end_sync_read(struct bio *bio, int error)
static void end_sync_write(struct bio *bio, int error)
{
int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
- r1bio_t * r1_bio = (r1bio_t *)(bio->bi_private);
+ r1bio_t *r1_bio = bio->bi_private;
mddev_t *mddev = r1_bio->mddev;
conf_t *conf = mddev->private;
int i;
@@ -1453,9 +1461,10 @@ static void sync_request_write(mddev_t *mddev, r1bio_t *r1_bio)
char b[BDEVNAME_SIZE];
/* Cannot read from anywhere, array is toast */
md_error(mddev, conf->mirrors[r1_bio->read_disk].rdev);
- printk(KERN_ALERT "raid1: %s: unrecoverable I/O read error"
+ printk(KERN_ALERT "md/raid1:%s: %s: unrecoverable I/O read error"
" for block %llu\n",
- bdevname(bio->bi_bdev,b),
+ mdname(mddev),
+ bdevname(bio->bi_bdev, b),
(unsigned long long)r1_bio->sector);
md_done_sync(mddev, r1_bio->sectors, 0);
put_buf(r1_bio);
@@ -1577,7 +1586,7 @@ static void fix_read_error(conf_t *conf, int read_disk,
else {
atomic_add(s, &rdev->corrected_errors);
printk(KERN_INFO
- "raid1:%s: read error corrected "
+ "md/raid1:%s: read error corrected "
"(%d sectors at %llu on %s)\n",
mdname(mddev), s,
(unsigned long long)(sect +
@@ -1682,8 +1691,9 @@ static void raid1d(mddev_t *mddev)
bio = r1_bio->bios[r1_bio->read_disk];
if ((disk=read_balance(conf, r1_bio)) == -1) {
- printk(KERN_ALERT "raid1: %s: unrecoverable I/O"
+ printk(KERN_ALERT "md/raid1:%s: %s: unrecoverable I/O"
" read error for block %llu\n",
+ mdname(mddev),
bdevname(bio->bi_bdev,b),
(unsigned long long)r1_bio->sector);
raid_end_bio_io(r1_bio);
@@ -1697,10 +1707,11 @@ static void raid1d(mddev_t *mddev)
r1_bio->bios[r1_bio->read_disk] = bio;
rdev = conf->mirrors[disk].rdev;
if (printk_ratelimit())
- printk(KERN_ERR "raid1: %s: redirecting sector %llu to"
- " another mirror\n",
- bdevname(rdev->bdev,b),
- (unsigned long long)r1_bio->sector);
+ printk(KERN_ERR "md/raid1:%s: redirecting sector %llu to"
+ " other mirror: %s\n",
+ mdname(mddev),
+ (unsigned long long)r1_bio->sector,
+ bdevname(rdev->bdev,b));
bio->bi_sector = r1_bio->sector + rdev->data_offset;
bio->bi_bdev = rdev->bdev;
bio->bi_end_io = raid1_end_read_request;
@@ -1755,13 +1766,8 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i
int still_degraded = 0;
if (!conf->r1buf_pool)
- {
-/*
- printk("sync start - bitmap %p\n", mddev->bitmap);
-*/
if (init_resync(conf))
return 0;
- }
max_sector = mddev->dev_sectors;
if (sector_nr >= max_sector) {
@@ -2042,7 +2048,7 @@ static conf_t *setup_conf(mddev_t *mddev)
err = -EIO;
if (conf->last_used < 0) {
- printk(KERN_ERR "raid1: no operational mirrors for %s\n",
+ printk(KERN_ERR "md/raid1:%s: no operational mirrors\n",
mdname(mddev));
goto abort;
}
@@ -2050,7 +2056,7 @@ static conf_t *setup_conf(mddev_t *mddev)
conf->thread = md_register_thread(raid1d, mddev, NULL);
if (!conf->thread) {
printk(KERN_ERR
- "raid1: couldn't allocate thread for %s\n",
+ "md/raid1:%s: couldn't allocate thread\n",
mdname(mddev));
goto abort;
}
@@ -2076,12 +2082,12 @@ static int run(mddev_t *mddev)
mdk_rdev_t *rdev;
if (mddev->level != 1) {
- printk("raid1: %s: raid level not set to mirroring (%d)\n",
+ printk(KERN_ERR "md/raid1:%s: raid level not set to mirroring (%d)\n",
mdname(mddev), mddev->level);
return -EIO;
}
if (mddev->reshape_position != MaxSector) {
- printk("raid1: %s: reshape_position set but not supported\n",
+ printk(KERN_ERR "md/raid1:%s: reshape_position set but not supported\n",
mdname(mddev));
return -EIO;
}
@@ -2124,11 +2130,11 @@ static int run(mddev_t *mddev)
mddev->recovery_cp = MaxSector;
if (mddev->recovery_cp != MaxSector)
- printk(KERN_NOTICE "raid1: %s is not clean"
+ printk(KERN_NOTICE "md/raid1:%s: not clean"
" -- starting background reconstruction\n",
mdname(mddev));
printk(KERN_INFO
- "raid1: raid set %s active with %d out of %d mirrors\n",
+ "md/raid1:%s: active with %d out of %d mirrors\n",
mdname(mddev), mddev->raid_disks - mddev->degraded,
mddev->raid_disks);
@@ -2152,15 +2158,14 @@ static int stop(mddev_t *mddev)
{
conf_t *conf = mddev->private;
struct bitmap *bitmap = mddev->bitmap;
- int behind_wait = 0;
/* wait for behind writes to complete */
- while (bitmap && atomic_read(&bitmap->behind_writes) > 0) {
- behind_wait++;
- printk(KERN_INFO "raid1: behind writes in progress on device %s, waiting to stop (%d)\n", mdname(mddev), behind_wait);
- set_current_state(TASK_UNINTERRUPTIBLE);
- schedule_timeout(HZ); /* wait a second */
+ if (bitmap && atomic_read(&bitmap->behind_writes) > 0) {
+ printk(KERN_INFO "md/raid1:%s: behind writes in progress - waiting to stop.\n",
+ mdname(mddev));
/* need to kick something here to make sure I/O goes? */
+ wait_event(bitmap->behind_wait,
+ atomic_read(&bitmap->behind_writes) == 0);
}
raise_barrier(conf);
@@ -2191,7 +2196,6 @@ static int raid1_resize(mddev_t *mddev, sector_t sectors)
if (mddev->array_sectors > raid1_size(mddev, sectors, 0))
return -EINVAL;
set_capacity(mddev->gendisk, mddev->array_sectors);
- mddev->changed = 1;
revalidate_disk(mddev->gendisk);
if (sectors > mddev->dev_sectors &&
mddev->recovery_cp == MaxSector) {
@@ -2286,9 +2290,9 @@ static int raid1_reshape(mddev_t *mddev)
if (sysfs_create_link(&mddev->kobj,
&rdev->kobj, nm))
printk(KERN_WARNING
- "md/raid1: cannot register "
- "%s for %s\n",
- nm, mdname(mddev));
+ "md/raid1:%s: cannot register "
+ "%s\n",
+ mdname(mddev), nm);
}
if (rdev)
newmirrors[d2++].rdev = rdev;