From 3e1a88ec96259282b9a8b45c3f1fda7a3ff4f6ea Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Sat, 9 Jan 2021 16:03:02 +0000 Subject: bio: add a helper calculating nr segments to alloc Add a helper function calculating the number of bvec segments we need to allocate to construct a bio. It doesn't change anything functionally, but will be used to not duplicate special cases in the future. Reviewed-by: Christoph Hellwig Signed-off-by: Pavel Begunkov Reviewed-by: Ming Lei Signed-off-by: Jens Axboe --- fs/block_dev.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'fs/block_dev.c') diff --git a/fs/block_dev.c b/fs/block_dev.c index 3b8963e228a1..6f5bd9950baf 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -416,7 +416,7 @@ __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages) dio->size += bio->bi_iter.bi_size; pos += bio->bi_iter.bi_size; - nr_pages = iov_iter_npages(iter, BIO_MAX_PAGES); + nr_pages = bio_iov_vecs_to_alloc(iter, BIO_MAX_PAGES); if (!nr_pages) { bool polled = false; @@ -481,9 +481,10 @@ blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter) { int nr_pages; - nr_pages = iov_iter_npages(iter, BIO_MAX_PAGES + 1); - if (!nr_pages) + if (!iov_iter_count(iter)) return 0; + + nr_pages = bio_iov_vecs_to_alloc(iter, BIO_MAX_PAGES + 1); if (is_sync_kiocb(iocb) && nr_pages <= BIO_MAX_PAGES) return __blkdev_direct_IO_simple(iocb, iter, nr_pages); -- cgit v1.2.3-58-ga151 From 767630c63bb23acf022adb265574996ca39a4645 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Thu, 7 Jan 2021 16:40:34 +0100 Subject: bdev: Do not return EBUSY if bdev discard races with write blkdev_fallocate() tries to detect whether a discard raced with an overlapping write by calling invalidate_inode_pages2_range(). However this check can give both false negatives (when writing using direct IO or when writeback already writes out the written pagecache range) and false positives (when write is not actually overlapping but ends in the same page when blocksize < pagesize). This actually causes issues for qemu which is getting confused by EBUSY errors. Fix the problem by removing this conflicting write detection since it is inherently racy and thus of little use anyway. Reported-by: Maxim Levitsky CC: "Darrick J. Wong" Link: https://lore.kernel.org/qemu-devel/20201111153913.41840-1-mlevitsk@redhat.com Signed-off-by: Jan Kara Reviewed-by: Maxim Levitsky Reviewed-by: Darrick J. Wong Reviewed-by: Christoph Hellwig Signed-off-by: Jens Axboe --- fs/block_dev.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) (limited to 'fs/block_dev.c') diff --git a/fs/block_dev.c b/fs/block_dev.c index 6f5bd9950baf..289c3dd923a4 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -1801,13 +1801,11 @@ static long blkdev_fallocate(struct file *file, int mode, loff_t start, return error; /* - * Invalidate again; if someone wandered in and dirtied a page, - * the caller will be given -EBUSY. The third argument is - * inclusive, so the rounding here is safe. + * Invalidate the page cache again; if someone wandered in and dirtied + * a page, we just discard it - userspace has no way of knowing whether + * the write happened before or after discard completing... */ - return invalidate_inode_pages2_range(bdev->bd_inode->i_mapping, - start >> PAGE_SHIFT, - end >> PAGE_SHIFT); + return truncate_bdev_range(bdev, file->f_mode, start, end); } const struct file_operations def_blk_fops = { -- cgit v1.2.3-58-ga151 From 2c2b9fd6b496b3616e9b9537ea0258b3040914f3 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sat, 9 Jan 2021 12:13:32 +0100 Subject: block: unexport truncate_bdev_range truncate_bdev_range is only used in always built-in block layer code, so remove the export and the !CONFIG_BLOCK stub. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- fs/block_dev.c | 1 - include/linux/blkdev.h | 9 ++------- 2 files changed, 2 insertions(+), 8 deletions(-) (limited to 'fs/block_dev.c') diff --git a/fs/block_dev.c b/fs/block_dev.c index 289c3dd923a4..c1fe29dac485 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -126,7 +126,6 @@ int truncate_bdev_range(struct block_device *bdev, fmode_t mode, bd_abort_claiming(bdev, truncate_bdev_range); return 0; } -EXPORT_SYMBOL(truncate_bdev_range); static void set_init_blocksize(struct block_device *bdev) { diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 20f3706b6b2e..2491e17b61c4 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1999,21 +1999,16 @@ void bdev_add(struct block_device *bdev, dev_t dev); struct block_device *I_BDEV(struct inode *inode); struct block_device *bdgrab(struct block_device *bdev); void bdput(struct block_device *); +int truncate_bdev_range(struct block_device *bdev, fmode_t mode, loff_t lstart, + loff_t lend); #ifdef CONFIG_BLOCK void invalidate_bdev(struct block_device *bdev); -int truncate_bdev_range(struct block_device *bdev, fmode_t mode, loff_t lstart, - loff_t lend); int sync_blockdev(struct block_device *bdev); #else static inline void invalidate_bdev(struct block_device *bdev) { } -static inline int truncate_bdev_range(struct block_device *bdev, fmode_t mode, - loff_t lstart, loff_t lend) -{ - return 0; -} static inline int sync_blockdev(struct block_device *bdev) { return 0; -- cgit v1.2.3-58-ga151 From c6bf3f0e25f4c0f0ecce6cf8d1c589bd9d74d3cf Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 26 Jan 2021 15:52:35 +0100 Subject: block: use an on-stack bio in blkdev_issue_flush There is no point in allocating memory for a synchronous flush. Signed-off-by: Christoph Hellwig Reviewed-by: Johannes Thumshirn Reviewed-by: Chaitanya Kulkarni Acked-by: Damien Le Moal Signed-off-by: Jens Axboe --- block/blk-flush.c | 17 ++++++----------- drivers/md/dm-zoned-metadata.c | 6 +++--- drivers/md/raid5-ppl.c | 2 +- drivers/nvme/target/io-cmd-bdev.c | 2 +- fs/block_dev.c | 2 +- fs/exfat/file.c | 2 +- fs/ext4/fast_commit.c | 4 ++-- fs/ext4/fsync.c | 2 +- fs/ext4/ialloc.c | 2 +- fs/ext4/super.c | 2 +- fs/fat/file.c | 2 +- fs/hfsplus/inode.c | 2 +- fs/hfsplus/super.c | 2 +- fs/jbd2/checkpoint.c | 2 +- fs/jbd2/commit.c | 4 ++-- fs/jbd2/recovery.c | 2 +- fs/libfs.c | 2 +- fs/nilfs2/the_nilfs.h | 2 +- fs/ocfs2/file.c | 2 +- fs/reiserfs/file.c | 2 +- fs/xfs/xfs_super.c | 2 +- fs/zonefs/super.c | 2 +- include/linux/blkdev.h | 4 ++-- 23 files changed, 33 insertions(+), 38 deletions(-) (limited to 'fs/block_dev.c') diff --git a/block/blk-flush.c b/block/blk-flush.c index 76c1624cb06c..7942ca6ed321 100644 --- a/block/blk-flush.c +++ b/block/blk-flush.c @@ -432,23 +432,18 @@ void blk_insert_flush(struct request *rq) /** * blkdev_issue_flush - queue a flush * @bdev: blockdev to issue flush for - * @gfp_mask: memory allocation flags (for bio_alloc) * * Description: * Issue a flush for the block device in question. */ -int blkdev_issue_flush(struct block_device *bdev, gfp_t gfp_mask) +int blkdev_issue_flush(struct block_device *bdev) { - struct bio *bio; - int ret = 0; + struct bio bio; - bio = bio_alloc(gfp_mask, 0); - bio_set_dev(bio, bdev); - bio->bi_opf = REQ_OP_WRITE | REQ_PREFLUSH; - - ret = submit_bio_wait(bio); - bio_put(bio); - return ret; + bio_init(&bio, NULL, 0); + bio_set_dev(&bio, bdev); + bio.bi_opf = REQ_OP_WRITE | REQ_PREFLUSH; + return submit_bio_wait(&bio); } EXPORT_SYMBOL(blkdev_issue_flush); diff --git a/drivers/md/dm-zoned-metadata.c b/drivers/md/dm-zoned-metadata.c index b298fefb022e..039d17b28938 100644 --- a/drivers/md/dm-zoned-metadata.c +++ b/drivers/md/dm-zoned-metadata.c @@ -819,7 +819,7 @@ static int dmz_write_sb(struct dmz_metadata *zmd, unsigned int set) ret = dmz_rdwr_block(dev, REQ_OP_WRITE, zmd->sb[set].block, mblk->page); if (ret == 0) - ret = blkdev_issue_flush(dev->bdev, GFP_NOIO); + ret = blkdev_issue_flush(dev->bdev); return ret; } @@ -862,7 +862,7 @@ static int dmz_write_dirty_mblocks(struct dmz_metadata *zmd, /* Flush drive cache (this will also sync data) */ if (ret == 0) - ret = blkdev_issue_flush(dev->bdev, GFP_NOIO); + ret = blkdev_issue_flush(dev->bdev); return ret; } @@ -933,7 +933,7 @@ int dmz_flush_metadata(struct dmz_metadata *zmd) /* If there are no dirty metadata blocks, just flush the device cache */ if (list_empty(&write_list)) { - ret = blkdev_issue_flush(dev->bdev, GFP_NOIO); + ret = blkdev_issue_flush(dev->bdev); goto err; } diff --git a/drivers/md/raid5-ppl.c b/drivers/md/raid5-ppl.c index d0f540296fe9..e8c118e05dfd 100644 --- a/drivers/md/raid5-ppl.c +++ b/drivers/md/raid5-ppl.c @@ -1037,7 +1037,7 @@ static int ppl_recover(struct ppl_log *log, struct ppl_header *pplhdr, } /* flush the disk cache after recovery if necessary */ - ret = blkdev_issue_flush(rdev->bdev, GFP_KERNEL); + ret = blkdev_issue_flush(rdev->bdev); out: __free_page(page); return ret; diff --git a/drivers/nvme/target/io-cmd-bdev.c b/drivers/nvme/target/io-cmd-bdev.c index 125dde3f410e..bf6e0ac9ad28 100644 --- a/drivers/nvme/target/io-cmd-bdev.c +++ b/drivers/nvme/target/io-cmd-bdev.c @@ -333,7 +333,7 @@ static void nvmet_bdev_execute_flush(struct nvmet_req *req) u16 nvmet_bdev_flush(struct nvmet_req *req) { - if (blkdev_issue_flush(req->ns->bdev, GFP_KERNEL)) + if (blkdev_issue_flush(req->ns->bdev)) return NVME_SC_INTERNAL | NVME_SC_DNR; return 0; } diff --git a/fs/block_dev.c b/fs/block_dev.c index c1fe29dac485..9d4b1a884d76 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -680,7 +680,7 @@ int blkdev_fsync(struct file *filp, loff_t start, loff_t end, int datasync) * i_mutex and doing so causes performance issues with concurrent * O_SYNC writers to a block device. */ - error = blkdev_issue_flush(bdev, GFP_KERNEL); + error = blkdev_issue_flush(bdev); if (error == -EOPNOTSUPP) error = 0; diff --git a/fs/exfat/file.c b/fs/exfat/file.c index a92478eabfa4..183ffdf4d43c 100644 --- a/fs/exfat/file.c +++ b/fs/exfat/file.c @@ -361,7 +361,7 @@ int exfat_file_fsync(struct file *filp, loff_t start, loff_t end, int datasync) if (err) return err; - return blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL); + return blkdev_issue_flush(inode->i_sb->s_bdev); } const struct file_operations exfat_file_operations = { diff --git a/fs/ext4/fast_commit.c b/fs/ext4/fast_commit.c index 0a14a7c87bf8..6e8208acfc62 100644 --- a/fs/ext4/fast_commit.c +++ b/fs/ext4/fast_commit.c @@ -1076,7 +1076,7 @@ static int ext4_fc_perform_commit(journal_t *journal) * flush before we start writing fast commit blocks. */ if (journal->j_fs_dev != journal->j_dev) - blkdev_issue_flush(journal->j_fs_dev, GFP_NOFS); + blkdev_issue_flush(journal->j_fs_dev); blk_start_plug(&plug); if (sbi->s_fc_bytes == 0) { @@ -1535,7 +1535,7 @@ static int ext4_fc_replay_inode(struct super_block *sb, struct ext4_fc_tl *tl) out: iput(inode); if (!ret) - blkdev_issue_flush(sb->s_bdev, GFP_KERNEL); + blkdev_issue_flush(sb->s_bdev); return 0; } diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c index 113bfb023a4a..027a7d7037a0 100644 --- a/fs/ext4/fsync.c +++ b/fs/ext4/fsync.c @@ -174,7 +174,7 @@ int ext4_sync_file(struct file *file, loff_t start, loff_t end, int datasync) ret = ext4_fsync_journal(inode, datasync, &needs_barrier); if (needs_barrier) { - err = blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL); + err = blkdev_issue_flush(inode->i_sb->s_bdev); if (!ret) ret = err; } diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c index b215c564bc31..20f2fcb799f5 100644 --- a/fs/ext4/ialloc.c +++ b/fs/ext4/ialloc.c @@ -1583,7 +1583,7 @@ int ext4_init_inode_table(struct super_block *sb, ext4_group_t group, if (ret < 0) goto err_out; if (barrier) - blkdev_issue_flush(sb->s_bdev, GFP_NOFS); + blkdev_issue_flush(sb->s_bdev); skip_zeroout: ext4_lock_group(sb, group); diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 9a6f9875aa34..fb5985102c1d 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -5709,7 +5709,7 @@ static int ext4_sync_fs(struct super_block *sb, int wait) needs_barrier = true; if (needs_barrier) { int err; - err = blkdev_issue_flush(sb->s_bdev, GFP_KERNEL); + err = blkdev_issue_flush(sb->s_bdev); if (!ret) ret = err; } diff --git a/fs/fat/file.c b/fs/fat/file.c index f9ee27cf4d7c..5fee74f1ad61 100644 --- a/fs/fat/file.c +++ b/fs/fat/file.c @@ -195,7 +195,7 @@ int fat_file_fsync(struct file *filp, loff_t start, loff_t end, int datasync) if (err) return err; - return blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL); + return blkdev_issue_flush(inode->i_sb->s_bdev); } diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c index e3da9e96b835..ca464328b79c 100644 --- a/fs/hfsplus/inode.c +++ b/fs/hfsplus/inode.c @@ -340,7 +340,7 @@ int hfsplus_file_fsync(struct file *file, loff_t start, loff_t end, } if (!test_bit(HFSPLUS_SB_NOBARRIER, &sbi->flags)) - blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL); + blkdev_issue_flush(inode->i_sb->s_bdev); inode_unlock(inode); diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c index 807119ae5adf..b9e3db3f855f 100644 --- a/fs/hfsplus/super.c +++ b/fs/hfsplus/super.c @@ -239,7 +239,7 @@ out: mutex_unlock(&sbi->vh_mutex); if (!test_bit(HFSPLUS_SB_NOBARRIER, &sbi->flags)) - blkdev_issue_flush(sb->s_bdev, GFP_KERNEL); + blkdev_issue_flush(sb->s_bdev); return error; } diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c index 472932b9e6bc..63b526d44886 100644 --- a/fs/jbd2/checkpoint.c +++ b/fs/jbd2/checkpoint.c @@ -416,7 +416,7 @@ int jbd2_cleanup_journal_tail(journal_t *journal) * jbd2_cleanup_journal_tail() doesn't get called all that often. */ if (journal->j_flags & JBD2_BARRIER) - blkdev_issue_flush(journal->j_fs_dev, GFP_NOFS); + blkdev_issue_flush(journal->j_fs_dev); return __jbd2_update_log_tail(journal, first_tid, blocknr); } diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c index b121d7d434c6..3cc4ab2ba7f4 100644 --- a/fs/jbd2/commit.c +++ b/fs/jbd2/commit.c @@ -825,7 +825,7 @@ start_journal_io: if (commit_transaction->t_need_data_flush && (journal->j_fs_dev != journal->j_dev) && (journal->j_flags & JBD2_BARRIER)) - blkdev_issue_flush(journal->j_fs_dev, GFP_NOFS); + blkdev_issue_flush(journal->j_fs_dev); /* Done it all: now write the commit record asynchronously. */ if (jbd2_has_feature_async_commit(journal)) { @@ -932,7 +932,7 @@ start_journal_io: stats.run.rs_blocks_logged++; if (jbd2_has_feature_async_commit(journal) && journal->j_flags & JBD2_BARRIER) { - blkdev_issue_flush(journal->j_dev, GFP_NOFS); + blkdev_issue_flush(journal->j_dev); } if (err) diff --git a/fs/jbd2/recovery.c b/fs/jbd2/recovery.c index dc0694fcfcd1..69f18fe20923 100644 --- a/fs/jbd2/recovery.c +++ b/fs/jbd2/recovery.c @@ -326,7 +326,7 @@ int jbd2_journal_recover(journal_t *journal) err = err2; /* Make sure all replayed data is on permanent storage */ if (journal->j_flags & JBD2_BARRIER) { - err2 = blkdev_issue_flush(journal->j_fs_dev, GFP_KERNEL); + err2 = blkdev_issue_flush(journal->j_fs_dev); if (!err) err = err2; } diff --git a/fs/libfs.c b/fs/libfs.c index d1c3bade9f30..8398a0efb401 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -1117,7 +1117,7 @@ int generic_file_fsync(struct file *file, loff_t start, loff_t end, err = __generic_file_fsync(file, start, end, datasync); if (err) return err; - return blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL); + return blkdev_issue_flush(inode->i_sb->s_bdev); } EXPORT_SYMBOL(generic_file_fsync); diff --git a/fs/nilfs2/the_nilfs.h b/fs/nilfs2/the_nilfs.h index b55cdeb4d169..987c8ab02aee 100644 --- a/fs/nilfs2/the_nilfs.h +++ b/fs/nilfs2/the_nilfs.h @@ -375,7 +375,7 @@ static inline int nilfs_flush_device(struct the_nilfs *nilfs) */ smp_wmb(); - err = blkdev_issue_flush(nilfs->ns_bdev, GFP_KERNEL); + err = blkdev_issue_flush(nilfs->ns_bdev); if (err != -EIO) err = 0; return err; diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index 85979e2214b3..df6d709d2ae3 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c @@ -194,7 +194,7 @@ static int ocfs2_sync_file(struct file *file, loff_t start, loff_t end, needs_barrier = true; err = jbd2_complete_transaction(journal, commit_tid); if (needs_barrier) { - ret = blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL); + ret = blkdev_issue_flush(inode->i_sb->s_bdev); if (!err) err = ret; } diff --git a/fs/reiserfs/file.c b/fs/reiserfs/file.c index 0b641ae694f1..1db0254bc38b 100644 --- a/fs/reiserfs/file.c +++ b/fs/reiserfs/file.c @@ -159,7 +159,7 @@ static int reiserfs_sync_file(struct file *filp, loff_t start, loff_t end, barrier_done = reiserfs_commit_for_inode(inode); reiserfs_write_unlock(inode->i_sb); if (barrier_done != 1 && reiserfs_barrier_flush(inode->i_sb)) - blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL); + blkdev_issue_flush(inode->i_sb->s_bdev); inode_unlock(inode); if (barrier_done < 0) return barrier_done; diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index 813be879a5e5..c3e32789829f 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -342,7 +342,7 @@ void xfs_blkdev_issue_flush( xfs_buftarg_t *buftarg) { - blkdev_issue_flush(buftarg->bt_bdev, GFP_NOFS); + blkdev_issue_flush(buftarg->bt_bdev); } STATIC void diff --git a/fs/zonefs/super.c b/fs/zonefs/super.c index faea2ed34b4a..ab68e27bb322 100644 --- a/fs/zonefs/super.c +++ b/fs/zonefs/super.c @@ -541,7 +541,7 @@ static int zonefs_file_fsync(struct file *file, loff_t start, loff_t end, if (ZONEFS_I(inode)->i_ztype == ZONEFS_ZTYPE_CNV) ret = file_write_and_wait_range(file, start, end); if (!ret) - ret = blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL); + ret = blkdev_issue_flush(inode->i_sb->s_bdev); if (ret) zonefs_io_error(inode, true); diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 2491e17b61c4..0dea268bd61b 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1288,7 +1288,7 @@ static inline bool blk_needs_flush_plug(struct task_struct *tsk) !list_empty(&plug->cb_list)); } -int blkdev_issue_flush(struct block_device *, gfp_t); +int blkdev_issue_flush(struct block_device *bdev); long nr_blockdev_pages(void); #else /* CONFIG_BLOCK */ struct blk_plug { @@ -1316,7 +1316,7 @@ static inline bool blk_needs_flush_plug(struct task_struct *tsk) return false; } -static inline int blkdev_issue_flush(struct block_device *bdev, gfp_t gfp_mask) +static inline int blkdev_issue_flush(struct block_device *bdev) { return 0; } -- cgit v1.2.3-58-ga151