diff options
Diffstat (limited to 'drivers/md/dm-cache-target.c')
-rw-r--r-- | drivers/md/dm-cache-target.c | 326 |
1 files changed, 132 insertions, 194 deletions
diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c index 8785134c9f1f..cf23a14f9c6a 100644 --- a/drivers/md/dm-cache-target.c +++ b/drivers/md/dm-cache-target.c @@ -408,9 +408,7 @@ struct cache { int sectors_per_block_shift; spinlock_t lock; - struct list_head deferred_cells; struct bio_list deferred_bios; - struct bio_list deferred_writethrough_bios; sector_t migration_threshold; wait_queue_head_t migration_wait; atomic_t nr_allocated_migrations; @@ -446,10 +444,10 @@ struct cache { struct dm_kcopyd_client *copier; struct workqueue_struct *wq; struct work_struct deferred_bio_worker; - struct work_struct deferred_writethrough_worker; struct work_struct migration_worker; struct delayed_work waker; struct dm_bio_prison_v2 *prison; + struct bio_set *bs; mempool_t *migration_pool; @@ -490,15 +488,6 @@ struct per_bio_data { struct dm_bio_prison_cell_v2 *cell; struct dm_hook_info hook_info; sector_t len; - - /* - * writethrough fields. These MUST remain at the end of this - * structure and the 'cache' member must be the first as it - * is used to determine the offset of the writethrough fields. - */ - struct cache *cache; - dm_cblock_t cblock; - struct dm_bio_details bio_details; }; struct dm_cache_migration { @@ -515,19 +504,19 @@ struct dm_cache_migration { /*----------------------------------------------------------------*/ -static bool writethrough_mode(struct cache_features *f) +static bool writethrough_mode(struct cache *cache) { - return f->io_mode == CM_IO_WRITETHROUGH; + return cache->features.io_mode == CM_IO_WRITETHROUGH; } -static bool writeback_mode(struct cache_features *f) +static bool writeback_mode(struct cache *cache) { - return f->io_mode == CM_IO_WRITEBACK; + return cache->features.io_mode == CM_IO_WRITEBACK; } -static inline bool passthrough_mode(struct cache_features *f) +static inline bool passthrough_mode(struct cache *cache) { - return unlikely(f->io_mode == CM_IO_PASSTHROUGH); + return unlikely(cache->features.io_mode == CM_IO_PASSTHROUGH); } /*----------------------------------------------------------------*/ @@ -537,14 +526,9 @@ static void wake_deferred_bio_worker(struct cache *cache) queue_work(cache->wq, &cache->deferred_bio_worker); } -static void wake_deferred_writethrough_worker(struct cache *cache) -{ - queue_work(cache->wq, &cache->deferred_writethrough_worker); -} - static void wake_migration_worker(struct cache *cache) { - if (passthrough_mode(&cache->features)) + if (passthrough_mode(cache)) return; queue_work(cache->wq, &cache->migration_worker); @@ -567,10 +551,13 @@ static struct dm_cache_migration *alloc_migration(struct cache *cache) struct dm_cache_migration *mg; mg = mempool_alloc(cache->migration_pool, GFP_NOWAIT); - if (mg) { - mg->cache = cache; - atomic_inc(&mg->cache->nr_allocated_migrations); - } + if (!mg) + return NULL; + + memset(mg, 0, sizeof(*mg)); + + mg->cache = cache; + atomic_inc(&cache->nr_allocated_migrations); return mg; } @@ -618,27 +605,16 @@ static unsigned lock_level(struct bio *bio) * Per bio data *--------------------------------------------------------------*/ -/* - * If using writeback, leave out struct per_bio_data's writethrough fields. - */ -#define PB_DATA_SIZE_WB (offsetof(struct per_bio_data, cache)) -#define PB_DATA_SIZE_WT (sizeof(struct per_bio_data)) - -static size_t get_per_bio_data_size(struct cache *cache) +static struct per_bio_data *get_per_bio_data(struct bio *bio) { - return writethrough_mode(&cache->features) ? PB_DATA_SIZE_WT : PB_DATA_SIZE_WB; -} - -static struct per_bio_data *get_per_bio_data(struct bio *bio, size_t data_size) -{ - struct per_bio_data *pb = dm_per_bio_data(bio, data_size); + struct per_bio_data *pb = dm_per_bio_data(bio, sizeof(struct per_bio_data)); BUG_ON(!pb); return pb; } -static struct per_bio_data *init_per_bio_data(struct bio *bio, size_t data_size) +static struct per_bio_data *init_per_bio_data(struct bio *bio) { - struct per_bio_data *pb = get_per_bio_data(bio, data_size); + struct per_bio_data *pb = get_per_bio_data(bio); pb->tick = false; pb->req_nr = dm_bio_get_target_bio_nr(bio); @@ -678,7 +654,6 @@ static void defer_bios(struct cache *cache, struct bio_list *bios) static bool bio_detain_shared(struct cache *cache, dm_oblock_t oblock, struct bio *bio) { bool r; - size_t pb_size; struct per_bio_data *pb; struct dm_cell_key_v2 key; dm_oblock_t end = to_oblock(from_oblock(oblock) + 1ULL); @@ -703,8 +678,7 @@ static bool bio_detain_shared(struct cache *cache, dm_oblock_t oblock, struct bi if (cell != cell_prealloc) free_prison_cell(cache, cell_prealloc); - pb_size = get_per_bio_data_size(cache); - pb = get_per_bio_data(bio, pb_size); + pb = get_per_bio_data(bio); pb->cell = cell; return r; @@ -856,28 +830,35 @@ static void remap_to_cache(struct cache *cache, struct bio *bio, static void check_if_tick_bio_needed(struct cache *cache, struct bio *bio) { unsigned long flags; - size_t pb_data_size = get_per_bio_data_size(cache); - struct per_bio_data *pb = get_per_bio_data(bio, pb_data_size); + struct per_bio_data *pb; spin_lock_irqsave(&cache->lock, flags); if (cache->need_tick_bio && !op_is_flush(bio->bi_opf) && bio_op(bio) != REQ_OP_DISCARD) { + pb = get_per_bio_data(bio); pb->tick = true; cache->need_tick_bio = false; } spin_unlock_irqrestore(&cache->lock, flags); } -static void remap_to_origin_clear_discard(struct cache *cache, struct bio *bio, - dm_oblock_t oblock) +static void __remap_to_origin_clear_discard(struct cache *cache, struct bio *bio, + dm_oblock_t oblock, bool bio_has_pbd) { - // FIXME: this is called way too much. - check_if_tick_bio_needed(cache, bio); + if (bio_has_pbd) + check_if_tick_bio_needed(cache, bio); remap_to_origin(cache, bio); if (bio_data_dir(bio) == WRITE) clear_discard(cache, oblock_to_dblock(cache, oblock)); } +static void remap_to_origin_clear_discard(struct cache *cache, struct bio *bio, + dm_oblock_t oblock) +{ + // FIXME: check_if_tick_bio_needed() is called way too much through this interface + __remap_to_origin_clear_discard(cache, bio, oblock, true); +} + static void remap_to_cache_dirty(struct cache *cache, struct bio *bio, dm_oblock_t oblock, dm_cblock_t cblock) { @@ -908,10 +889,10 @@ static bool accountable_bio(struct cache *cache, struct bio *bio) static void accounted_begin(struct cache *cache, struct bio *bio) { - size_t pb_data_size = get_per_bio_data_size(cache); - struct per_bio_data *pb = get_per_bio_data(bio, pb_data_size); + struct per_bio_data *pb; if (accountable_bio(cache, bio)) { + pb = get_per_bio_data(bio); pb->len = bio_sectors(bio); iot_io_begin(&cache->tracker, pb->len); } @@ -919,8 +900,7 @@ static void accounted_begin(struct cache *cache, struct bio *bio) static void accounted_complete(struct cache *cache, struct bio *bio) { - size_t pb_data_size = get_per_bio_data_size(cache); - struct per_bio_data *pb = get_per_bio_data(bio, pb_data_size); + struct per_bio_data *pb = get_per_bio_data(bio); iot_io_end(&cache->tracker, pb->len); } @@ -937,57 +917,26 @@ static void issue_op(struct bio *bio, void *context) accounted_request(cache, bio); } -static void defer_writethrough_bio(struct cache *cache, struct bio *bio) -{ - unsigned long flags; - - spin_lock_irqsave(&cache->lock, flags); - bio_list_add(&cache->deferred_writethrough_bios, bio); - spin_unlock_irqrestore(&cache->lock, flags); - - wake_deferred_writethrough_worker(cache); -} - -static void writethrough_endio(struct bio *bio) -{ - struct per_bio_data *pb = get_per_bio_data(bio, PB_DATA_SIZE_WT); - - dm_unhook_bio(&pb->hook_info, bio); - - if (bio->bi_status) { - bio_endio(bio); - return; - } - - dm_bio_restore(&pb->bio_details, bio); - remap_to_cache(pb->cache, bio, pb->cblock); - - /* - * We can't issue this bio directly, since we're in interrupt - * context. So it gets put on a bio list for processing by the - * worker thread. - */ - defer_writethrough_bio(pb->cache, bio); -} - /* - * FIXME: send in parallel, huge latency as is. * When running in writethrough mode we need to send writes to clean blocks - * to both the cache and origin devices. In future we'd like to clone the - * bio and send them in parallel, but for now we're doing them in - * series as this is easier. + * to both the cache and origin devices. Clone the bio and send them in parallel. */ -static void remap_to_origin_then_cache(struct cache *cache, struct bio *bio, - dm_oblock_t oblock, dm_cblock_t cblock) +static void remap_to_origin_and_cache(struct cache *cache, struct bio *bio, + dm_oblock_t oblock, dm_cblock_t cblock) { - struct per_bio_data *pb = get_per_bio_data(bio, PB_DATA_SIZE_WT); + struct bio *origin_bio = bio_clone_fast(bio, GFP_NOIO, cache->bs); + + BUG_ON(!origin_bio); - pb->cache = cache; - pb->cblock = cblock; - dm_hook_bio(&pb->hook_info, bio, writethrough_endio, NULL); - dm_bio_record(&pb->bio_details, bio); + bio_chain(origin_bio, bio); + /* + * Passing false to __remap_to_origin_clear_discard() skips + * all code that might use per_bio_data (since clone doesn't have it) + */ + __remap_to_origin_clear_discard(cache, origin_bio, oblock, false); + submit_bio(origin_bio); - remap_to_origin_clear_discard(pb->cache, bio, oblock); + remap_to_cache(cache, bio, cblock); } /*---------------------------------------------------------------- @@ -1201,6 +1150,18 @@ static void background_work_end(struct cache *cache) /*----------------------------------------------------------------*/ +static bool bio_writes_complete_block(struct cache *cache, struct bio *bio) +{ + return (bio_data_dir(bio) == WRITE) && + (bio->bi_iter.bi_size == (cache->sectors_per_block << SECTOR_SHIFT)); +} + +static bool optimisable_bio(struct cache *cache, struct bio *bio, dm_oblock_t block) +{ + return writeback_mode(cache) && + (is_discarded_oblock(cache, block) || bio_writes_complete_block(cache, bio)); +} + static void quiesce(struct dm_cache_migration *mg, void (*continuation)(struct work_struct *)) { @@ -1248,8 +1209,7 @@ static int copy(struct dm_cache_migration *mg, bool promote) static void bio_drop_shared_lock(struct cache *cache, struct bio *bio) { - size_t pb_data_size = get_per_bio_data_size(cache); - struct per_bio_data *pb = get_per_bio_data(bio, pb_data_size); + struct per_bio_data *pb = get_per_bio_data(bio); if (pb->cell && dm_cell_put_v2(cache->prison, pb->cell)) free_prison_cell(cache, pb->cell); @@ -1260,23 +1220,21 @@ static void overwrite_endio(struct bio *bio) { struct dm_cache_migration *mg = bio->bi_private; struct cache *cache = mg->cache; - size_t pb_data_size = get_per_bio_data_size(cache); - struct per_bio_data *pb = get_per_bio_data(bio, pb_data_size); + struct per_bio_data *pb = get_per_bio_data(bio); dm_unhook_bio(&pb->hook_info, bio); if (bio->bi_status) mg->k.input = bio->bi_status; - queue_continuation(mg->cache->wq, &mg->k); + queue_continuation(cache->wq, &mg->k); } static void overwrite(struct dm_cache_migration *mg, void (*continuation)(struct work_struct *)) { struct bio *bio = mg->overwrite_bio; - size_t pb_data_size = get_per_bio_data_size(mg->cache); - struct per_bio_data *pb = get_per_bio_data(bio, pb_data_size); + struct per_bio_data *pb = get_per_bio_data(bio); dm_hook_bio(&pb->hook_info, bio, overwrite_endio, mg); @@ -1474,13 +1432,51 @@ static void mg_upgrade_lock(struct work_struct *ws) } } +static void mg_full_copy(struct work_struct *ws) +{ + struct dm_cache_migration *mg = ws_to_mg(ws); + struct cache *cache = mg->cache; + struct policy_work *op = mg->op; + bool is_policy_promote = (op->op == POLICY_PROMOTE); + + if ((!is_policy_promote && !is_dirty(cache, op->cblock)) || + is_discarded_oblock(cache, op->oblock)) { + mg_upgrade_lock(ws); + return; + } + + init_continuation(&mg->k, mg_upgrade_lock); + + if (copy(mg, is_policy_promote)) { + DMERR_LIMIT("%s: migration copy failed", cache_device_name(cache)); + mg->k.input = BLK_STS_IOERR; + mg_complete(mg, false); + } +} + static void mg_copy(struct work_struct *ws) { - int r; struct dm_cache_migration *mg = ws_to_mg(ws); if (mg->overwrite_bio) { /* + * No exclusive lock was held when we last checked if the bio + * was optimisable. So we have to check again in case things + * have changed (eg, the block may no longer be discarded). + */ + if (!optimisable_bio(mg->cache, mg->overwrite_bio, mg->op->oblock)) { + /* + * Fallback to a real full copy after doing some tidying up. + */ + bool rb = bio_detain_shared(mg->cache, mg->op->oblock, mg->overwrite_bio); + BUG_ON(rb); /* An exclussive lock must _not_ be held for this block */ + mg->overwrite_bio = NULL; + inc_io_migrations(mg->cache); + mg_full_copy(ws); + return; + } + + /* * It's safe to do this here, even though it's new data * because all IO has been locked out of the block. * @@ -1489,26 +1485,8 @@ static void mg_copy(struct work_struct *ws) */ overwrite(mg, mg_update_metadata_after_copy); - } else { - struct cache *cache = mg->cache; - struct policy_work *op = mg->op; - bool is_policy_promote = (op->op == POLICY_PROMOTE); - - if ((!is_policy_promote && !is_dirty(cache, op->cblock)) || - is_discarded_oblock(cache, op->oblock)) { - mg_upgrade_lock(ws); - return; - } - - init_continuation(&mg->k, mg_upgrade_lock); - - r = copy(mg, is_policy_promote); - if (r) { - DMERR_LIMIT("%s: migration copy failed", cache_device_name(cache)); - mg->k.input = BLK_STS_IOERR; - mg_complete(mg, false); - } - } + } else + mg_full_copy(ws); } static int mg_lock_writes(struct dm_cache_migration *mg) @@ -1567,9 +1545,6 @@ static int mg_start(struct cache *cache, struct policy_work *op, struct bio *bio return -ENOMEM; } - memset(mg, 0, sizeof(*mg)); - - mg->cache = cache; mg->op = op; mg->overwrite_bio = bio; @@ -1703,9 +1678,6 @@ static int invalidate_start(struct cache *cache, dm_cblock_t cblock, return -ENOMEM; } - memset(mg, 0, sizeof(*mg)); - - mg->cache = cache; mg->overwrite_bio = bio; mg->invalidate_cblock = cblock; mg->invalidate_oblock = oblock; @@ -1748,26 +1720,12 @@ static void inc_miss_counter(struct cache *cache, struct bio *bio) /*----------------------------------------------------------------*/ -static bool bio_writes_complete_block(struct cache *cache, struct bio *bio) -{ - return (bio_data_dir(bio) == WRITE) && - (bio->bi_iter.bi_size == (cache->sectors_per_block << SECTOR_SHIFT)); -} - -static bool optimisable_bio(struct cache *cache, struct bio *bio, dm_oblock_t block) -{ - return writeback_mode(&cache->features) && - (is_discarded_oblock(cache, block) || bio_writes_complete_block(cache, bio)); -} - static int map_bio(struct cache *cache, struct bio *bio, dm_oblock_t block, bool *commit_needed) { int r, data_dir; bool rb, background_queued; dm_cblock_t cblock; - size_t pb_data_size = get_per_bio_data_size(cache); - struct per_bio_data *pb = get_per_bio_data(bio, pb_data_size); *commit_needed = false; @@ -1816,6 +1774,8 @@ static int map_bio(struct cache *cache, struct bio *bio, dm_oblock_t block, } if (r == -ENOENT) { + struct per_bio_data *pb = get_per_bio_data(bio); + /* * Miss. */ @@ -1823,7 +1783,6 @@ static int map_bio(struct cache *cache, struct bio *bio, dm_oblock_t block, if (pb->req_nr == 0) { accounted_begin(cache, bio); remap_to_origin_clear_discard(cache, bio, block); - } else { /* * This is a duplicate writethrough io that is no @@ -1842,18 +1801,17 @@ static int map_bio(struct cache *cache, struct bio *bio, dm_oblock_t block, * Passthrough always maps to the origin, invalidating any * cache blocks that are written to. */ - if (passthrough_mode(&cache->features)) { + if (passthrough_mode(cache)) { if (bio_data_dir(bio) == WRITE) { bio_drop_shared_lock(cache, bio); atomic_inc(&cache->stats.demotion); invalidate_start(cache, cblock, block, bio); } else remap_to_origin_clear_discard(cache, bio, block); - } else { - if (bio_data_dir(bio) == WRITE && writethrough_mode(&cache->features) && + if (bio_data_dir(bio) == WRITE && writethrough_mode(cache) && !is_dirty(cache, cblock)) { - remap_to_origin_then_cache(cache, bio, block, cblock); + remap_to_origin_and_cache(cache, bio, block, cblock); accounted_begin(cache, bio); } else remap_to_cache_dirty(cache, bio, block, cblock); @@ -1922,8 +1880,7 @@ static blk_status_t commit_op(void *context) static bool process_flush_bio(struct cache *cache, struct bio *bio) { - size_t pb_data_size = get_per_bio_data_size(cache); - struct per_bio_data *pb = get_per_bio_data(bio, pb_data_size); + struct per_bio_data *pb = get_per_bio_data(bio); if (!pb->req_nr) remap_to_origin(cache, bio); @@ -1983,28 +1940,6 @@ static void process_deferred_bios(struct work_struct *ws) schedule_commit(&cache->committer); } -static void process_deferred_writethrough_bios(struct work_struct *ws) -{ - struct cache *cache = container_of(ws, struct cache, deferred_writethrough_worker); - - unsigned long flags; - struct bio_list bios; - struct bio *bio; - - bio_list_init(&bios); - - spin_lock_irqsave(&cache->lock, flags); - bio_list_merge(&bios, &cache->deferred_writethrough_bios); - bio_list_init(&cache->deferred_writethrough_bios); - spin_unlock_irqrestore(&cache->lock, flags); - - /* - * These bios have already been through accounted_begin() - */ - while ((bio = bio_list_pop(&bios))) - generic_make_request(bio); -} - /*---------------------------------------------------------------- * Main worker loop *--------------------------------------------------------------*/ @@ -2112,6 +2047,9 @@ static void destroy(struct cache *cache) kfree(cache->ctr_args[i]); kfree(cache->ctr_args); + if (cache->bs) + bioset_free(cache->bs); + kfree(cache); } @@ -2555,8 +2493,15 @@ static int cache_create(struct cache_args *ca, struct cache **result) ti->discards_supported = true; ti->split_discard_bios = false; + ti->per_io_data_size = sizeof(struct per_bio_data); + cache->features = ca->features; - ti->per_io_data_size = get_per_bio_data_size(cache); + if (writethrough_mode(cache)) { + /* Create bioset for writethrough bios issued to origin */ + cache->bs = bioset_create(BIO_POOL_SIZE, 0, 0); + if (!cache->bs) + goto bad; + } cache->callbacks.congested_fn = cache_is_congested; dm_table_add_target_callbacks(ti->table, &cache->callbacks); @@ -2618,7 +2563,7 @@ static int cache_create(struct cache_args *ca, struct cache **result) goto bad; } - if (passthrough_mode(&cache->features)) { + if (passthrough_mode(cache)) { bool all_clean; r = dm_cache_metadata_all_clean(cache->cmd, &all_clean); @@ -2637,9 +2582,7 @@ static int cache_create(struct cache_args *ca, struct cache **result) } spin_lock_init(&cache->lock); - INIT_LIST_HEAD(&cache->deferred_cells); bio_list_init(&cache->deferred_bios); - bio_list_init(&cache->deferred_writethrough_bios); atomic_set(&cache->nr_allocated_migrations, 0); atomic_set(&cache->nr_io_migrations, 0); init_waitqueue_head(&cache->migration_wait); @@ -2678,8 +2621,6 @@ static int cache_create(struct cache_args *ca, struct cache **result) goto bad; } INIT_WORK(&cache->deferred_bio_worker, process_deferred_bios); - INIT_WORK(&cache->deferred_writethrough_worker, - process_deferred_writethrough_bios); INIT_WORK(&cache->migration_worker, check_migrations); INIT_DELAYED_WORK(&cache->waker, do_waker); @@ -2795,9 +2736,8 @@ static int cache_map(struct dm_target *ti, struct bio *bio) int r; bool commit_needed; dm_oblock_t block = get_bio_block(cache, bio); - size_t pb_data_size = get_per_bio_data_size(cache); - init_per_bio_data(bio, pb_data_size); + init_per_bio_data(bio); if (unlikely(from_oblock(block) >= from_oblock(cache->origin_blocks))) { /* * This can only occur if the io goes to a partial block at @@ -2821,13 +2761,11 @@ static int cache_map(struct dm_target *ti, struct bio *bio) return r; } -static int cache_end_io(struct dm_target *ti, struct bio *bio, - blk_status_t *error) +static int cache_end_io(struct dm_target *ti, struct bio *bio, blk_status_t *error) { struct cache *cache = ti->private; unsigned long flags; - size_t pb_data_size = get_per_bio_data_size(cache); - struct per_bio_data *pb = get_per_bio_data(bio, pb_data_size); + struct per_bio_data *pb = get_per_bio_data(bio); if (pb->tick) { policy_tick(cache->policy, false); @@ -3243,13 +3181,13 @@ static void cache_status(struct dm_target *ti, status_type_t type, else DMEMIT("1 "); - if (writethrough_mode(&cache->features)) + if (writethrough_mode(cache)) DMEMIT("writethrough "); - else if (passthrough_mode(&cache->features)) + else if (passthrough_mode(cache)) DMEMIT("passthrough "); - else if (writeback_mode(&cache->features)) + else if (writeback_mode(cache)) DMEMIT("writeback "); else { @@ -3415,7 +3353,7 @@ static int process_invalidate_cblocks_message(struct cache *cache, unsigned coun unsigned i; struct cblock_range range; - if (!passthrough_mode(&cache->features)) { + if (!passthrough_mode(cache)) { DMERR("%s: cache has to be in passthrough mode for invalidation", cache_device_name(cache)); return -EPERM; |