From 307615a26e95406c42c95916a66ba50434567e0f Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Wed, 26 Sep 2012 23:45:39 +0100 Subject: dm thin: do not set discard_zeroes_data The dm thin pool target claims to support the zeroing of discarded data areas. This turns out to be incorrect when processing discards that do not exactly cover a complete number of blocks, so the target must always set discard_zeroes_data_unsupported. The thin pool target will zero blocks when they are allocated if the skip_block_zeroing feature is not specified. The block layer may send a discard that only partly covers a block. If a thin pool block is partially discarded then there is no guarantee that the discarded data will get zeroed before it is accessed again. Due to this, thin devices cannot claim discards will always zero data. Signed-off-by: Mike Snitzer Signed-off-by: Joe Thornber Cc: stable@vger.kernel.org # 3.4+ Signed-off-by: Alasdair G Kergon --- drivers/md/dm-thin.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/md/dm-thin.c') diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c index af1fc3b2c2ad..d4209231069d 100644 --- a/drivers/md/dm-thin.c +++ b/drivers/md/dm-thin.c @@ -2274,6 +2274,7 @@ static int pool_ctr(struct dm_target *ti, unsigned argc, char **argv) * thin devices' discard limits consistent). */ ti->discards_supported = true; + ti->discard_zeroes_data_unsupported = true; } ti->private = pt; @@ -2745,7 +2746,6 @@ static void set_discard_limits(struct pool *pool, struct queue_limits *limits) * boundary is not sent to this target. */ limits->discard_granularity = pool->sectors_per_block << SECTOR_SHIFT; - limits->discard_zeroes_data = pool->pf.zero_new_blocks; } static void pool_io_hints(struct dm_target *ti, struct queue_limits *limits) -- cgit v1.2.3-58-ga151 From 9bc142dd755d360c08a91ecb107d218787a2e9db Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Wed, 26 Sep 2012 23:45:46 +0100 Subject: dm thin: tidy discard support A little thin discard code refactoring to make the next patch (dm thin: fix discard support for data devices) more readable. Pull out a couple of functions (and uses bools instead of unsigned for features). No functional changes. Signed-off-by: Mike Snitzer Signed-off-by: Joe Thornber Signed-off-by: Alasdair G Kergon --- drivers/md/dm-thin.c | 64 ++++++++++++++++++++++++++++++++-------------------- 1 file changed, 39 insertions(+), 25 deletions(-) (limited to 'drivers/md/dm-thin.c') diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c index d4209231069d..e99f4134dbd7 100644 --- a/drivers/md/dm-thin.c +++ b/drivers/md/dm-thin.c @@ -509,9 +509,9 @@ enum pool_mode { struct pool_features { enum pool_mode mode; - unsigned zero_new_blocks:1; - unsigned discard_enabled:1; - unsigned discard_passdown:1; + bool zero_new_blocks:1; + bool discard_enabled:1; + bool discard_passdown:1; }; struct thin_c; @@ -1839,6 +1839,32 @@ static void __requeue_bios(struct pool *pool) /*---------------------------------------------------------------- * Binding of control targets to a pool object *--------------------------------------------------------------*/ +static bool data_dev_supports_discard(struct pool_c *pt) +{ + struct request_queue *q = bdev_get_queue(pt->data_dev->bdev); + + return q && blk_queue_discard(q); +} + +/* + * If discard_passdown was enabled verify that the data device + * supports discards. Disable discard_passdown if not; otherwise + * -EOPNOTSUPP will be returned. + */ +static void disable_passdown_if_not_supported(struct pool_c *pt, + struct pool_features *pf) +{ + char buf[BDEVNAME_SIZE]; + + if (!pf->discard_passdown || data_dev_supports_discard(pt)) + return; + + DMWARN("Discard unsupported by data device (%s): Disabling discard passdown.", + bdevname(pt->data_dev->bdev, buf)); + + pf->discard_passdown = false; +} + static int bind_control_target(struct pool *pool, struct dm_target *ti) { struct pool_c *pt = ti->private; @@ -1855,23 +1881,9 @@ static int bind_control_target(struct pool *pool, struct dm_target *ti) pool->ti = ti; pool->low_water_blocks = pt->low_water_blocks; pool->pf = pt->pf; - set_pool_mode(pool, new_mode); - /* - * If discard_passdown was enabled verify that the data device - * supports discards. Disable discard_passdown if not; otherwise - * -EOPNOTSUPP will be returned. - */ - /* FIXME: pull this out into a sep fn. */ - if (pt->pf.discard_passdown) { - struct request_queue *q = bdev_get_queue(pt->data_dev->bdev); - if (!q || !blk_queue_discard(q)) { - char buf[BDEVNAME_SIZE]; - DMWARN("Discard unsupported by data device (%s): Disabling discard passdown.", - bdevname(pt->data_dev->bdev, buf)); - pool->pf.discard_passdown = 0; - } - } + disable_passdown_if_not_supported(pt, &pool->pf); + set_pool_mode(pool, new_mode); return 0; } @@ -1889,9 +1901,9 @@ static void unbind_control_target(struct pool *pool, struct dm_target *ti) static void pool_features_init(struct pool_features *pf) { pf->mode = PM_WRITE; - pf->zero_new_blocks = 1; - pf->discard_enabled = 1; - pf->discard_passdown = 1; + pf->zero_new_blocks = true; + pf->discard_enabled = true; + pf->discard_passdown = true; } static void __pool_destroy(struct pool *pool) @@ -2119,13 +2131,13 @@ static int parse_pool_features(struct dm_arg_set *as, struct pool_features *pf, argc--; if (!strcasecmp(arg_name, "skip_block_zeroing")) - pf->zero_new_blocks = 0; + pf->zero_new_blocks = false; else if (!strcasecmp(arg_name, "ignore_discard")) - pf->discard_enabled = 0; + pf->discard_enabled = false; else if (!strcasecmp(arg_name, "no_discard_passdown")) - pf->discard_passdown = 0; + pf->discard_passdown = false; else if (!strcasecmp(arg_name, "read_only")) pf->mode = PM_READ_ONLY; @@ -2261,6 +2273,7 @@ static int pool_ctr(struct dm_target *ti, unsigned argc, char **argv) pt->low_water_blocks = low_water_blocks; pt->pf = pf; ti->num_flush_requests = 1; + /* * Only need to enable discards if the pool should pass * them down to the data device. The thin device's discard @@ -2268,6 +2281,7 @@ static int pool_ctr(struct dm_target *ti, unsigned argc, char **argv) */ if (pf.discard_enabled && pf.discard_passdown) { ti->num_discard_requests = 1; + /* * Setting 'discards_supported' circumvents the normal * stacking of discard limits (this keeps the pool and -- cgit v1.2.3-58-ga151 From 0424caa14508f19ca8093d36c15250e0331a3a0a Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Wed, 26 Sep 2012 23:45:47 +0100 Subject: dm thin: fix discard support for data devices The discard limits that get established for a thin-pool or thin device may be incompatible with the pool's data device. Avoid this by checking the discard limits of the pool's data device. If an incompatibility is found then the pool's 'discard passdown' feature is disabled. Change thin_io_hints to ensure that a thin device always uses the same queue limits as its pool device. Introduce requested_pf to track whether or not the table line originally contained the no_discard_passdown flag and use this directly for table output. We prepare the correct setting for discard_passdown directly in bind_control_target (called from pool_io_hints) and store it in adjusted_pf rather than waiting until we have access to pool->pf in pool_preresume. Signed-off-by: Mike Snitzer Signed-off-by: Joe Thornber Signed-off-by: Alasdair G Kergon --- drivers/md/dm-thin.c | 87 ++++++++++++++++++++++++++++++++++------------------ 1 file changed, 57 insertions(+), 30 deletions(-) (limited to 'drivers/md/dm-thin.c') diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c index e99f4134dbd7..c29410af1e22 100644 --- a/drivers/md/dm-thin.c +++ b/drivers/md/dm-thin.c @@ -580,7 +580,8 @@ struct pool_c { struct dm_target_callbacks callbacks; dm_block_t low_water_blocks; - struct pool_features pf; + struct pool_features requested_pf; /* Features requested during table load */ + struct pool_features adjusted_pf; /* Features used after adjusting for constituent devices */ }; /* @@ -1848,21 +1849,36 @@ static bool data_dev_supports_discard(struct pool_c *pt) /* * If discard_passdown was enabled verify that the data device - * supports discards. Disable discard_passdown if not; otherwise - * -EOPNOTSUPP will be returned. + * supports discards. Disable discard_passdown if not. */ -static void disable_passdown_if_not_supported(struct pool_c *pt, - struct pool_features *pf) +static void disable_passdown_if_not_supported(struct pool_c *pt) { + struct pool *pool = pt->pool; + struct block_device *data_bdev = pt->data_dev->bdev; + struct queue_limits *data_limits = &bdev_get_queue(data_bdev)->limits; + sector_t block_size = pool->sectors_per_block << SECTOR_SHIFT; + const char *reason = NULL; char buf[BDEVNAME_SIZE]; - if (!pf->discard_passdown || data_dev_supports_discard(pt)) + if (!pt->adjusted_pf.discard_passdown) return; - DMWARN("Discard unsupported by data device (%s): Disabling discard passdown.", - bdevname(pt->data_dev->bdev, buf)); + if (!data_dev_supports_discard(pt)) + reason = "discard unsupported"; + + else if (data_limits->max_discard_sectors < pool->sectors_per_block) + reason = "max discard sectors smaller than a block"; - pf->discard_passdown = false; + else if (data_limits->discard_granularity > block_size) + reason = "discard granularity larger than a block"; + + else if (block_size & (data_limits->discard_granularity - 1)) + reason = "discard granularity not a factor of block size"; + + if (reason) { + DMWARN("Data device (%s) %s: Disabling discard passdown.", bdevname(data_bdev, buf), reason); + pt->adjusted_pf.discard_passdown = false; + } } static int bind_control_target(struct pool *pool, struct dm_target *ti) @@ -1873,16 +1889,15 @@ static int bind_control_target(struct pool *pool, struct dm_target *ti) * We want to make sure that degraded pools are never upgraded. */ enum pool_mode old_mode = pool->pf.mode; - enum pool_mode new_mode = pt->pf.mode; + enum pool_mode new_mode = pt->adjusted_pf.mode; if (old_mode > new_mode) new_mode = old_mode; pool->ti = ti; pool->low_water_blocks = pt->low_water_blocks; - pool->pf = pt->pf; + pool->pf = pt->adjusted_pf; - disable_passdown_if_not_supported(pt, &pool->pf); set_pool_mode(pool, new_mode); return 0; @@ -2271,7 +2286,7 @@ static int pool_ctr(struct dm_target *ti, unsigned argc, char **argv) pt->metadata_dev = metadata_dev; pt->data_dev = data_dev; pt->low_water_blocks = low_water_blocks; - pt->pf = pf; + pt->adjusted_pf = pt->requested_pf = pf; ti->num_flush_requests = 1; /* @@ -2718,7 +2733,7 @@ static int pool_status(struct dm_target *ti, status_type_t type, format_dev_t(buf2, pt->data_dev->bdev->bd_dev), (unsigned long)pool->sectors_per_block, (unsigned long long)pt->low_water_blocks); - emit_flags(&pt->pf, result, sz, maxlen); + emit_flags(&pt->requested_pf, result, sz, maxlen); break; } @@ -2747,19 +2762,21 @@ static int pool_merge(struct dm_target *ti, struct bvec_merge_data *bvm, return min(max_size, q->merge_bvec_fn(q, bvm, biovec)); } -static void set_discard_limits(struct pool *pool, struct queue_limits *limits) +static void set_discard_limits(struct pool_c *pt, struct queue_limits *limits) { - /* - * FIXME: these limits may be incompatible with the pool's data device - */ + struct pool *pool = pt->pool; + struct queue_limits *data_limits; + limits->max_discard_sectors = pool->sectors_per_block; /* - * This is just a hint, and not enforced. We have to cope with - * bios that cover a block partially. A discard that spans a block - * boundary is not sent to this target. + * discard_granularity is just a hint, and not enforced. */ - limits->discard_granularity = pool->sectors_per_block << SECTOR_SHIFT; + if (pt->adjusted_pf.discard_passdown) { + data_limits = &bdev_get_queue(pt->data_dev->bdev)->limits; + limits->discard_granularity = data_limits->discard_granularity; + } else + limits->discard_granularity = pool->sectors_per_block << SECTOR_SHIFT; } static void pool_io_hints(struct dm_target *ti, struct queue_limits *limits) @@ -2769,15 +2786,25 @@ static void pool_io_hints(struct dm_target *ti, struct queue_limits *limits) blk_limits_io_min(limits, 0); blk_limits_io_opt(limits, pool->sectors_per_block << SECTOR_SHIFT); - if (pool->pf.discard_enabled) - set_discard_limits(pool, limits); + + /* + * pt->adjusted_pf is a staging area for the actual features to use. + * They get transferred to the live pool in bind_control_target() + * called from pool_preresume(). + */ + if (!pt->adjusted_pf.discard_enabled) + return; + + disable_passdown_if_not_supported(pt); + + set_discard_limits(pt, limits); } static struct target_type pool_target = { .name = "thin-pool", .features = DM_TARGET_SINGLETON | DM_TARGET_ALWAYS_WRITEABLE | DM_TARGET_IMMUTABLE, - .version = {1, 3, 0}, + .version = {1, 4, 0}, .module = THIS_MODULE, .ctr = pool_ctr, .dtr = pool_dtr, @@ -3056,19 +3083,19 @@ static int thin_iterate_devices(struct dm_target *ti, return 0; } +/* + * A thin device always inherits its queue limits from its pool. + */ static void thin_io_hints(struct dm_target *ti, struct queue_limits *limits) { struct thin_c *tc = ti->private; - struct pool *pool = tc->pool; - blk_limits_io_min(limits, 0); - blk_limits_io_opt(limits, pool->sectors_per_block << SECTOR_SHIFT); - set_discard_limits(pool, limits); + *limits = bdev_get_queue(tc->pool_dev->bdev)->limits; } static struct target_type thin_target = { .name = "thin", - .version = {1, 3, 0}, + .version = {1, 4, 0}, .module = THIS_MODULE, .ctr = thin_ctr, .dtr = thin_dtr, -- cgit v1.2.3-58-ga151