From 0a2bd55c194ae8322638aceb4cc276cbf8b2f8b9 Mon Sep 17 00:00:00 2001 From: Milan Broz Date: Wed, 8 Apr 2020 23:40:08 +0200 Subject: dm integrity: document allow_discard option Add decription of the allow_discard option added in commit 84597a44a9d86ac949900441cea7da0af0f2f473. Signed-off-by: Milan Broz Signed-off-by: Mike Snitzer --- Documentation/admin-guide/device-mapper/dm-integrity.rst | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/Documentation/admin-guide/device-mapper/dm-integrity.rst b/Documentation/admin-guide/device-mapper/dm-integrity.rst index c00f9f11e3f3..8439d2ae689b 100644 --- a/Documentation/admin-guide/device-mapper/dm-integrity.rst +++ b/Documentation/admin-guide/device-mapper/dm-integrity.rst @@ -182,12 +182,15 @@ fix_padding space-efficient. If this option is not present, large padding is used - that is for compatibility with older kernels. - -The journal mode (D/J), buffer_sectors, journal_watermark, commit_time can -be changed when reloading the target (load an inactive table and swap the -tables with suspend and resume). The other arguments should not be changed -when reloading the target because the layout of disk data depend on them -and the reloaded target would be non-functional. +allow_discards + Allow block discard requests (a.k.a. TRIM) for the integrity device. + Discards are only allowed to devices using internal hash. + +The journal mode (D/J), buffer_sectors, journal_watermark, commit_time and +allow_discards can be changed when reloading the target (load an inactive +table and swap the tables with suspend and resume). The other arguments +should not be changed when reloading the target because the layout of disk +data depend on them and the reloaded target would be non-functional. The layout of the formatted block device: -- cgit v1.2.3-58-ga151 From 31b22120194b5c0d460f59e0c98504de1d3f1f14 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Wed, 15 Apr 2020 11:01:38 -0400 Subject: dm writecache: fix data corruption when reloading the target The dm-writecache reads metadata in the target constructor. However, when we reload the target, there could be another active instance running on the same device. This is the sequence of operations when doing a reload: 1. construct new target 2. suspend old target 3. resume new target 4. destroy old target Metadata that were written by the old target between steps 1 and 2 would not be visible by the new target. Fix the data corruption by loading the metadata in the resume handler. Also, validate block_size is at least as large as both the devices' logical block size and only read 1 block from the metadata during target constructor -- no need to read entirety of metadata now that it is done during resume. Fixes: 48debafe4f2f ("dm: add writecache target") Cc: stable@vger.kernel.org # v4.18+ Signed-off-by: Mikulas Patocka Signed-off-by: Mike Snitzer --- drivers/md/dm-writecache.c | 52 +++++++++++++++++++++++++++++++++------------- 1 file changed, 37 insertions(+), 15 deletions(-) diff --git a/drivers/md/dm-writecache.c b/drivers/md/dm-writecache.c index 114927da9cc9..613c171b1b6d 100644 --- a/drivers/md/dm-writecache.c +++ b/drivers/md/dm-writecache.c @@ -931,6 +931,24 @@ static int writecache_alloc_entries(struct dm_writecache *wc) return 0; } +static int writecache_read_metadata(struct dm_writecache *wc, sector_t n_sectors) +{ + struct dm_io_region region; + struct dm_io_request req; + + region.bdev = wc->ssd_dev->bdev; + region.sector = wc->start_sector; + region.count = n_sectors; + req.bi_op = REQ_OP_READ; + req.bi_op_flags = REQ_SYNC; + req.mem.type = DM_IO_VMA; + req.mem.ptr.vma = (char *)wc->memory_map; + req.client = wc->dm_io; + req.notify.fn = NULL; + + return dm_io(&req, 1, ®ion, NULL); +} + static void writecache_resume(struct dm_target *ti) { struct dm_writecache *wc = ti->private; @@ -941,8 +959,18 @@ static void writecache_resume(struct dm_target *ti) wc_lock(wc); - if (WC_MODE_PMEM(wc)) + if (WC_MODE_PMEM(wc)) { persistent_memory_invalidate_cache(wc->memory_map, wc->memory_map_size); + } else { + r = writecache_read_metadata(wc, wc->metadata_sectors); + if (r) { + size_t sb_entries_offset; + writecache_error(wc, r, "unable to read metadata: %d", r); + sb_entries_offset = offsetof(struct wc_memory_superblock, entries); + memset((char *)wc->memory_map + sb_entries_offset, -1, + (wc->metadata_sectors << SECTOR_SHIFT) - sb_entries_offset); + } + } wc->tree = RB_ROOT; INIT_LIST_HEAD(&wc->lru); @@ -2102,6 +2130,12 @@ static int writecache_ctr(struct dm_target *ti, unsigned argc, char **argv) ti->error = "Invalid block size"; goto bad; } + if (wc->block_size < bdev_logical_block_size(wc->dev->bdev) || + wc->block_size < bdev_logical_block_size(wc->ssd_dev->bdev)) { + r = -EINVAL; + ti->error = "Block size is smaller than device logical block size"; + goto bad; + } wc->block_size_bits = __ffs(wc->block_size); wc->max_writeback_jobs = MAX_WRITEBACK_JOBS; @@ -2200,8 +2234,6 @@ invalid_optional: goto bad; } } else { - struct dm_io_region region; - struct dm_io_request req; size_t n_blocks, n_metadata_blocks; uint64_t n_bitmap_bits; @@ -2258,19 +2290,9 @@ invalid_optional: goto bad; } - region.bdev = wc->ssd_dev->bdev; - region.sector = wc->start_sector; - region.count = wc->metadata_sectors; - req.bi_op = REQ_OP_READ; - req.bi_op_flags = REQ_SYNC; - req.mem.type = DM_IO_VMA; - req.mem.ptr.vma = (char *)wc->memory_map; - req.client = wc->dm_io; - req.notify.fn = NULL; - - r = dm_io(&req, 1, ®ion, NULL); + r = writecache_read_metadata(wc, wc->block_size >> SECTOR_SHIFT); if (r) { - ti->error = "Unable to read metadata"; + ti->error = "Unable to read first block of metadata"; goto bad; } } -- cgit v1.2.3-58-ga151 From ad4e80a639fc61d5ecebb03caa5cdbfb91fcebfc Mon Sep 17 00:00:00 2001 From: Sunwook Eom Date: Fri, 10 Apr 2020 12:54:19 +0900 Subject: dm verity fec: fix hash block number in verity_fec_decode The error correction data is computed as if data and hash blocks were concatenated. But hash block number starts from v->hash_start. So, we have to calculate hash block number based on that. Fixes: a739ff3f543af ("dm verity: add support for forward error correction") Cc: stable@vger.kernel.org Signed-off-by: Sunwook Eom Reviewed-by: Sami Tolvanen Signed-off-by: Mike Snitzer --- drivers/md/dm-verity-fec.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/md/dm-verity-fec.c b/drivers/md/dm-verity-fec.c index 49147e634046..fb41b4f23c48 100644 --- a/drivers/md/dm-verity-fec.c +++ b/drivers/md/dm-verity-fec.c @@ -435,7 +435,7 @@ int verity_fec_decode(struct dm_verity *v, struct dm_verity_io *io, fio->level++; if (type == DM_VERITY_BLOCK_TYPE_METADATA) - block += v->data_blocks; + block = block - v->hash_start + v->data_blocks; /* * For RS(M, N), the continuous FEC data is divided into blocks of N -- cgit v1.2.3-58-ga151 From 5686dee34dbfe0238c0274e0454fa0174ac0a57a Mon Sep 17 00:00:00 2001 From: Gabriel Krisman Bertazi Date: Mon, 27 Apr 2020 20:39:11 -0400 Subject: dm multipath: use updated MPATHF_QUEUE_IO on mapping for bio-based mpath When adding devices that don't have a scsi_dh on a BIO based multipath, I was able to consistently hit the warning below and lock-up the system. The problem is that __map_bio reads the flag before it potentially being modified by choose_pgpath, and ends up using the older value. The WARN_ON below is not trivially linked to the issue. It goes like this: The activate_path delayed_work is not initialized for non-scsi_dh devices, but we always set MPATHF_QUEUE_IO, asking for initialization. That is fine, since MPATHF_QUEUE_IO would be cleared in choose_pgpath. Nevertheless, only for BIO-based mpath, we cache the flag before calling choose_pgpath, and use the older version when deciding if we should initialize the path. Therefore, we end up trying to initialize the paths, and calling the non-initialized activate_path work. [ 82.437100] ------------[ cut here ]------------ [ 82.437659] WARNING: CPU: 3 PID: 602 at kernel/workqueue.c:1624 __queue_delayed_work+0x71/0x90 [ 82.438436] Modules linked in: [ 82.438911] CPU: 3 PID: 602 Comm: systemd-udevd Not tainted 5.6.0-rc6+ #339 [ 82.439680] RIP: 0010:__queue_delayed_work+0x71/0x90 [ 82.440287] Code: c1 48 89 4a 50 81 ff 00 02 00 00 75 2a 4c 89 cf e9 94 d6 07 00 e9 7f e9 ff ff 0f 0b eb c7 0f 0b 48 81 7a 58 40 74 a8 94 74 a7 <0f> 0b 48 83 7a 48 00 74 a5 0f 0b eb a1 89 fe 4c 89 cf e9 c8 c4 07 [ 82.441719] RSP: 0018:ffffb738803977c0 EFLAGS: 00010007 [ 82.442121] RAX: ffffa086389f9740 RBX: 0000000000000002 RCX: 0000000000000000 [ 82.442718] RDX: ffffa086350dd930 RSI: ffffa0863d76f600 RDI: 0000000000000200 [ 82.443484] RBP: 0000000000000200 R08: 0000000000000000 R09: ffffa086350dd970 [ 82.444128] R10: 0000000000000000 R11: 0000000000000000 R12: ffffa086350dd930 [ 82.444773] R13: ffffa0863d76f600 R14: 0000000000000000 R15: ffffa08636738008 [ 82.445427] FS: 00007f6abfe9dd40(0000) GS:ffffa0863dd80000(0000) knlGS:00000 [ 82.446040] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 82.446478] CR2: 0000557d288db4e8 CR3: 0000000078b36000 CR4: 00000000000006e0 [ 82.447104] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 82.447561] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 82.448012] Call Trace: [ 82.448164] queue_delayed_work_on+0x6d/0x80 [ 82.448472] __pg_init_all_paths+0x7b/0xf0 [ 82.448714] pg_init_all_paths+0x26/0x40 [ 82.448980] __multipath_map_bio.isra.0+0x84/0x210 [ 82.449267] __map_bio+0x3c/0x1f0 [ 82.449468] __split_and_process_non_flush+0x14a/0x1b0 [ 82.449775] __split_and_process_bio+0xde/0x340 [ 82.450045] ? dm_get_live_table+0x5/0xb0 [ 82.450278] dm_process_bio+0x98/0x290 [ 82.450518] dm_make_request+0x54/0x120 [ 82.450778] generic_make_request+0xd2/0x3e0 [ 82.451038] ? submit_bio+0x3c/0x150 [ 82.451278] submit_bio+0x3c/0x150 [ 82.451492] mpage_readpages+0x129/0x160 [ 82.451756] ? bdev_evict_inode+0x1d0/0x1d0 [ 82.452033] read_pages+0x72/0x170 [ 82.452260] __do_page_cache_readahead+0x1ba/0x1d0 [ 82.452624] force_page_cache_readahead+0x96/0x110 [ 82.452903] generic_file_read_iter+0x84f/0xae0 [ 82.453192] ? __seccomp_filter+0x7c/0x670 [ 82.453547] new_sync_read+0x10e/0x190 [ 82.453883] vfs_read+0x9d/0x150 [ 82.454172] ksys_read+0x65/0xe0 [ 82.454466] do_syscall_64+0x4e/0x210 [ 82.454828] entry_SYSCALL_64_after_hwframe+0x49/0xbe [...] [ 82.462501] ---[ end trace bb39975e9cf45daa ]--- Cc: stable@vger.kernel.org Signed-off-by: Gabriel Krisman Bertazi Signed-off-by: Mike Snitzer --- drivers/md/dm-mpath.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c index 58fd137b6ae1..3e500098132f 100644 --- a/drivers/md/dm-mpath.c +++ b/drivers/md/dm-mpath.c @@ -585,10 +585,12 @@ static struct pgpath *__map_bio(struct multipath *m, struct bio *bio) /* Do we need to select a new pgpath? */ pgpath = READ_ONCE(m->current_pgpath); - queue_io = test_bit(MPATHF_QUEUE_IO, &m->flags); - if (!pgpath || !queue_io) + if (!pgpath || !test_bit(MPATHF_QUEUE_IO, &m->flags)) pgpath = choose_pgpath(m, bio->bi_iter.bi_size); + /* MPATHF_QUEUE_IO might have been cleared by choose_pgpath. */ + queue_io = test_bit(MPATHF_QUEUE_IO, &m->flags); + if ((pgpath && queue_io) || (!pgpath && test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags))) { /* Queue for the daemon to resubmit */ -- cgit v1.2.3-58-ga151