diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2017-09-09 12:49:01 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2017-09-09 12:49:01 -0700 |
commit | 126e76ffbf78d9e948b641aadb265d16c57f5a3d (patch) | |
tree | 656e7838f0ec057936b80e15a774911df05c6005 /drivers/block | |
parent | fbd01410e89a66f346ba1b3c0161e1198449b746 (diff) | |
parent | 175206cf9ab63161dec74d9cd7f9992e062491f5 (diff) |
Merge branch 'for-4.14/block-postmerge' of git://git.kernel.dk/linux-block
Pull followup block layer updates from Jens Axboe:
"I ended up splitting the main pull request for this series into two,
mainly because of clashes between NVMe fixes that went into 4.13 after
the for-4.14 branches were split off. This pull request is mostly
NVMe, but not exclusively. In detail, it contains:
- Two pull request for NVMe changes from Christoph. Nothing new on
the feature front, basically just fixes all over the map for the
core bits, transport, rdma, etc.
- Series from Bart, cleaning up various bits in the BFQ scheduler.
- Series of bcache fixes, which has been lingering for a release or
two. Coly sent this in, but patches from various people in this
area.
- Set of patches for BFQ from Paolo himself, updating both
documentation and fixing some corner cases in performance.
- Series from Omar, attempting to now get the 4k loop support
correct. Our confidence level is higher this time.
- Series from Shaohua for loop as well, improving O_DIRECT
performance and fixing a use-after-free"
* 'for-4.14/block-postmerge' of git://git.kernel.dk/linux-block: (74 commits)
bcache: initialize dirty stripes in flash_dev_run()
loop: set physical block size to logical block size
bcache: fix bch_hprint crash and improve output
bcache: Update continue_at() documentation
bcache: silence static checker warning
bcache: fix for gc and write-back race
bcache: increase the number of open buckets
bcache: Correct return value for sysfs attach errors
bcache: correct cache_dirty_target in __update_writeback_rate()
bcache: gc does not work when triggering by manual command
bcache: Don't reinvent the wheel but use existing llist API
bcache: do not subtract sectors_to_gc for bypassed IO
bcache: fix sequential large write IO bypass
bcache: Fix leak of bdev reference
block/loop: remove unused field
block/loop: fix use after free
bfq: Use icq_to_bic() consistently
bfq: Suppress compiler warnings about comparisons
bfq: Check kstrtoul() return value
bfq: Declare local functions static
...
Diffstat (limited to 'drivers/block')
-rw-r--r-- | drivers/block/loop.c | 193 | ||||
-rw-r--r-- | drivers/block/loop.h | 8 |
2 files changed, 109 insertions, 92 deletions
diff --git a/drivers/block/loop.c b/drivers/block/loop.c index 407cb172d6e3..85de67334695 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -213,10 +213,13 @@ static void __loop_update_dio(struct loop_device *lo, bool dio) */ blk_mq_freeze_queue(lo->lo_queue); lo->use_dio = use_dio; - if (use_dio) + if (use_dio) { + queue_flag_clear_unlocked(QUEUE_FLAG_NOMERGES, lo->lo_queue); lo->lo_flags |= LO_FLAGS_DIRECT_IO; - else + } else { + queue_flag_set_unlocked(QUEUE_FLAG_NOMERGES, lo->lo_queue); lo->lo_flags &= ~LO_FLAGS_DIRECT_IO; + } blk_mq_unfreeze_queue(lo->lo_queue); } @@ -460,12 +463,21 @@ static void lo_complete_rq(struct request *rq) blk_mq_end_request(rq, cmd->ret < 0 ? BLK_STS_IOERR : BLK_STS_OK); } +static void lo_rw_aio_do_completion(struct loop_cmd *cmd) +{ + if (!atomic_dec_and_test(&cmd->ref)) + return; + kfree(cmd->bvec); + cmd->bvec = NULL; + blk_mq_complete_request(cmd->rq); +} + static void lo_rw_aio_complete(struct kiocb *iocb, long ret, long ret2) { struct loop_cmd *cmd = container_of(iocb, struct loop_cmd, iocb); cmd->ret = ret; - blk_mq_complete_request(cmd->rq); + lo_rw_aio_do_completion(cmd); } static int lo_rw_aio(struct loop_device *lo, struct loop_cmd *cmd, @@ -473,22 +485,51 @@ static int lo_rw_aio(struct loop_device *lo, struct loop_cmd *cmd, { struct iov_iter iter; struct bio_vec *bvec; - struct bio *bio = cmd->rq->bio; + struct request *rq = cmd->rq; + struct bio *bio = rq->bio; struct file *file = lo->lo_backing_file; + unsigned int offset; + int segments = 0; int ret; - /* nomerge for loop request queue */ - WARN_ON(cmd->rq->bio != cmd->rq->biotail); + if (rq->bio != rq->biotail) { + struct req_iterator iter; + struct bio_vec tmp; + + __rq_for_each_bio(bio, rq) + segments += bio_segments(bio); + bvec = kmalloc(sizeof(struct bio_vec) * segments, GFP_NOIO); + if (!bvec) + return -EIO; + cmd->bvec = bvec; + + /* + * The bios of the request may be started from the middle of + * the 'bvec' because of bio splitting, so we can't directly + * copy bio->bi_iov_vec to new bvec. The rq_for_each_segment + * API will take care of all details for us. + */ + rq_for_each_segment(tmp, rq, iter) { + *bvec = tmp; + bvec++; + } + bvec = cmd->bvec; + offset = 0; + } else { + /* + * Same here, this bio may be started from the middle of the + * 'bvec' because of bio splitting, so offset from the bvec + * must be passed to iov iterator + */ + offset = bio->bi_iter.bi_bvec_done; + bvec = __bvec_iter_bvec(bio->bi_io_vec, bio->bi_iter); + segments = bio_segments(bio); + } + atomic_set(&cmd->ref, 2); - bvec = __bvec_iter_bvec(bio->bi_io_vec, bio->bi_iter); iov_iter_bvec(&iter, ITER_BVEC | rw, bvec, - bio_segments(bio), blk_rq_bytes(cmd->rq)); - /* - * This bio may be started from the middle of the 'bvec' - * because of bio splitting, so offset from the bvec must - * be passed to iov iterator - */ - iter.iov_offset = bio->bi_iter.bi_bvec_done; + segments, blk_rq_bytes(rq)); + iter.iov_offset = offset; cmd->iocb.ki_pos = pos; cmd->iocb.ki_filp = file; @@ -500,6 +541,8 @@ static int lo_rw_aio(struct loop_device *lo, struct loop_cmd *cmd, else ret = call_read_iter(file, &cmd->iocb, &iter); + lo_rw_aio_do_completion(cmd); + if (ret != -EIOCBQUEUED) cmd->iocb.ki_complete(&cmd->iocb, ret, 0); return 0; @@ -546,74 +589,12 @@ static int do_req_filebacked(struct loop_device *lo, struct request *rq) } } -struct switch_request { - struct file *file; - struct completion wait; -}; - static inline void loop_update_dio(struct loop_device *lo) { __loop_update_dio(lo, io_is_direct(lo->lo_backing_file) | lo->use_dio); } -/* - * Do the actual switch; called from the BIO completion routine - */ -static void do_loop_switch(struct loop_device *lo, struct switch_request *p) -{ - struct file *file = p->file; - struct file *old_file = lo->lo_backing_file; - struct address_space *mapping; - - /* if no new file, only flush of queued bios requested */ - if (!file) - return; - - mapping = file->f_mapping; - mapping_set_gfp_mask(old_file->f_mapping, lo->old_gfp_mask); - lo->lo_backing_file = file; - lo->lo_blocksize = S_ISBLK(mapping->host->i_mode) ? - mapping->host->i_bdev->bd_block_size : PAGE_SIZE; - lo->old_gfp_mask = mapping_gfp_mask(mapping); - mapping_set_gfp_mask(mapping, lo->old_gfp_mask & ~(__GFP_IO|__GFP_FS)); - loop_update_dio(lo); -} - -/* - * loop_switch performs the hard work of switching a backing store. - * First it needs to flush existing IO, it does this by sending a magic - * BIO down the pipe. The completion of this BIO does the actual switch. - */ -static int loop_switch(struct loop_device *lo, struct file *file) -{ - struct switch_request w; - - w.file = file; - - /* freeze queue and wait for completion of scheduled requests */ - blk_mq_freeze_queue(lo->lo_queue); - - /* do the switch action */ - do_loop_switch(lo, &w); - - /* unfreeze */ - blk_mq_unfreeze_queue(lo->lo_queue); - - return 0; -} - -/* - * Helper to flush the IOs in loop, but keeping loop thread running - */ -static int loop_flush(struct loop_device *lo) -{ - /* loop not yet configured, no running thread, nothing to flush */ - if (lo->lo_state != Lo_bound) - return 0; - return loop_switch(lo, NULL); -} - static void loop_reread_partitions(struct loop_device *lo, struct block_device *bdev) { @@ -678,9 +659,14 @@ static int loop_change_fd(struct loop_device *lo, struct block_device *bdev, goto out_putf; /* and ... switch */ - error = loop_switch(lo, file); - if (error) - goto out_putf; + blk_mq_freeze_queue(lo->lo_queue); + mapping_set_gfp_mask(old_file->f_mapping, lo->old_gfp_mask); + lo->lo_backing_file = file; + lo->old_gfp_mask = mapping_gfp_mask(file->f_mapping); + mapping_set_gfp_mask(file->f_mapping, + lo->old_gfp_mask & ~(__GFP_IO|__GFP_FS)); + loop_update_dio(lo); + blk_mq_unfreeze_queue(lo->lo_queue); fput(old_file); if (lo->lo_flags & LO_FLAGS_PARTSCAN) @@ -867,7 +853,6 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode, struct file *file, *f; struct inode *inode; struct address_space *mapping; - unsigned lo_blocksize; int lo_flags = 0; int error; loff_t size; @@ -911,9 +896,6 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode, !file->f_op->write_iter) lo_flags |= LO_FLAGS_READ_ONLY; - lo_blocksize = S_ISBLK(inode->i_mode) ? - inode->i_bdev->bd_block_size : PAGE_SIZE; - error = -EFBIG; size = get_loop_size(lo, file); if ((loff_t)(sector_t)size != size) @@ -927,7 +909,6 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode, set_device_ro(bdev, (lo_flags & LO_FLAGS_READ_ONLY) != 0); lo->use_dio = false; - lo->lo_blocksize = lo_blocksize; lo->lo_device = bdev; lo->lo_flags = lo_flags; lo->lo_backing_file = file; @@ -947,7 +928,8 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode, /* let user-space know about the new size */ kobject_uevent(&disk_to_dev(bdev->bd_disk)->kobj, KOBJ_CHANGE); - set_blocksize(bdev, lo_blocksize); + set_blocksize(bdev, S_ISBLK(inode->i_mode) ? + block_size(inode->i_bdev) : PAGE_SIZE); lo->lo_state = Lo_bound; if (part_shift) @@ -1053,6 +1035,9 @@ static int loop_clr_fd(struct loop_device *lo) memset(lo->lo_encrypt_key, 0, LO_KEY_SIZE); memset(lo->lo_crypt_name, 0, LO_NAME_SIZE); memset(lo->lo_file_name, 0, LO_NAME_SIZE); + blk_queue_logical_block_size(lo->lo_queue, 512); + blk_queue_physical_block_size(lo->lo_queue, 512); + blk_queue_io_min(lo->lo_queue, 512); if (bdev) { bdput(bdev); invalidate_bdev(bdev); @@ -1336,6 +1321,26 @@ static int loop_set_dio(struct loop_device *lo, unsigned long arg) return error; } +static int loop_set_block_size(struct loop_device *lo, unsigned long arg) +{ + if (lo->lo_state != Lo_bound) + return -ENXIO; + + if (arg < 512 || arg > PAGE_SIZE || !is_power_of_2(arg)) + return -EINVAL; + + blk_mq_freeze_queue(lo->lo_queue); + + blk_queue_logical_block_size(lo->lo_queue, arg); + blk_queue_physical_block_size(lo->lo_queue, arg); + blk_queue_io_min(lo->lo_queue, arg); + loop_update_dio(lo); + + blk_mq_unfreeze_queue(lo->lo_queue); + + return 0; +} + static int lo_ioctl(struct block_device *bdev, fmode_t mode, unsigned int cmd, unsigned long arg) { @@ -1384,6 +1389,11 @@ static int lo_ioctl(struct block_device *bdev, fmode_t mode, if ((mode & FMODE_WRITE) || capable(CAP_SYS_ADMIN)) err = loop_set_dio(lo, arg); break; + case LOOP_SET_BLOCK_SIZE: + err = -EPERM; + if ((mode & FMODE_WRITE) || capable(CAP_SYS_ADMIN)) + err = loop_set_block_size(lo, arg); + break; default: err = lo->ioctl ? lo->ioctl(lo, cmd, arg) : -EINVAL; } @@ -1583,12 +1593,13 @@ static void lo_release(struct gendisk *disk, fmode_t mode) err = loop_clr_fd(lo); if (!err) return; - } else { + } else if (lo->lo_state == Lo_bound) { /* * Otherwise keep thread (if running) and config, * but flush possible ongoing bios in thread. */ - loop_flush(lo); + blk_mq_freeze_queue(lo->lo_queue); + blk_mq_unfreeze_queue(lo->lo_queue); } mutex_unlock(&lo->lo_ctl_mutex); @@ -1770,9 +1781,13 @@ static int loop_add(struct loop_device **l, int i) } lo->lo_queue->queuedata = lo; + blk_queue_max_hw_sectors(lo->lo_queue, BLK_DEF_MAX_SECTORS); + /* - * It doesn't make sense to enable merge because the I/O - * submitted to backing file is handled page by page. + * By default, we do buffer IO, so it doesn't make sense to enable + * merge because the I/O submitted to backing file is handled page by + * page. For directio mode, merge does help to dispatch bigger request + * to underlayer disk. We will enable merge once directio is enabled. */ queue_flag_set_unlocked(QUEUE_FLAG_NOMERGES, lo->lo_queue); diff --git a/drivers/block/loop.h b/drivers/block/loop.h index fecd3f97ef8c..f68c1d50802f 100644 --- a/drivers/block/loop.h +++ b/drivers/block/loop.h @@ -48,7 +48,6 @@ struct loop_device { struct file * lo_backing_file; struct block_device *lo_device; - unsigned lo_blocksize; void *key_data; gfp_t old_gfp_mask; @@ -68,10 +67,13 @@ struct loop_device { struct loop_cmd { struct kthread_work work; struct request *rq; - struct list_head list; - bool use_aio; /* use AIO interface to handle I/O */ + union { + bool use_aio; /* use AIO interface to handle I/O */ + atomic_t ref; /* only for aio */ + }; long ret; struct kiocb iocb; + struct bio_vec *bvec; }; /* Support for loadable transfer modules */ |