summaryrefslogtreecommitdiff
path: root/drivers/block
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2017-09-09 12:49:01 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2017-09-09 12:49:01 -0700
commit126e76ffbf78d9e948b641aadb265d16c57f5a3d (patch)
tree656e7838f0ec057936b80e15a774911df05c6005 /drivers/block
parentfbd01410e89a66f346ba1b3c0161e1198449b746 (diff)
parent175206cf9ab63161dec74d9cd7f9992e062491f5 (diff)
Merge branch 'for-4.14/block-postmerge' of git://git.kernel.dk/linux-block
Pull followup block layer updates from Jens Axboe: "I ended up splitting the main pull request for this series into two, mainly because of clashes between NVMe fixes that went into 4.13 after the for-4.14 branches were split off. This pull request is mostly NVMe, but not exclusively. In detail, it contains: - Two pull request for NVMe changes from Christoph. Nothing new on the feature front, basically just fixes all over the map for the core bits, transport, rdma, etc. - Series from Bart, cleaning up various bits in the BFQ scheduler. - Series of bcache fixes, which has been lingering for a release or two. Coly sent this in, but patches from various people in this area. - Set of patches for BFQ from Paolo himself, updating both documentation and fixing some corner cases in performance. - Series from Omar, attempting to now get the 4k loop support correct. Our confidence level is higher this time. - Series from Shaohua for loop as well, improving O_DIRECT performance and fixing a use-after-free" * 'for-4.14/block-postmerge' of git://git.kernel.dk/linux-block: (74 commits) bcache: initialize dirty stripes in flash_dev_run() loop: set physical block size to logical block size bcache: fix bch_hprint crash and improve output bcache: Update continue_at() documentation bcache: silence static checker warning bcache: fix for gc and write-back race bcache: increase the number of open buckets bcache: Correct return value for sysfs attach errors bcache: correct cache_dirty_target in __update_writeback_rate() bcache: gc does not work when triggering by manual command bcache: Don't reinvent the wheel but use existing llist API bcache: do not subtract sectors_to_gc for bypassed IO bcache: fix sequential large write IO bypass bcache: Fix leak of bdev reference block/loop: remove unused field block/loop: fix use after free bfq: Use icq_to_bic() consistently bfq: Suppress compiler warnings about comparisons bfq: Check kstrtoul() return value bfq: Declare local functions static ...
Diffstat (limited to 'drivers/block')
-rw-r--r--drivers/block/loop.c193
-rw-r--r--drivers/block/loop.h8
2 files changed, 109 insertions, 92 deletions
diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index 407cb172d6e3..85de67334695 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -213,10 +213,13 @@ static void __loop_update_dio(struct loop_device *lo, bool dio)
*/
blk_mq_freeze_queue(lo->lo_queue);
lo->use_dio = use_dio;
- if (use_dio)
+ if (use_dio) {
+ queue_flag_clear_unlocked(QUEUE_FLAG_NOMERGES, lo->lo_queue);
lo->lo_flags |= LO_FLAGS_DIRECT_IO;
- else
+ } else {
+ queue_flag_set_unlocked(QUEUE_FLAG_NOMERGES, lo->lo_queue);
lo->lo_flags &= ~LO_FLAGS_DIRECT_IO;
+ }
blk_mq_unfreeze_queue(lo->lo_queue);
}
@@ -460,12 +463,21 @@ static void lo_complete_rq(struct request *rq)
blk_mq_end_request(rq, cmd->ret < 0 ? BLK_STS_IOERR : BLK_STS_OK);
}
+static void lo_rw_aio_do_completion(struct loop_cmd *cmd)
+{
+ if (!atomic_dec_and_test(&cmd->ref))
+ return;
+ kfree(cmd->bvec);
+ cmd->bvec = NULL;
+ blk_mq_complete_request(cmd->rq);
+}
+
static void lo_rw_aio_complete(struct kiocb *iocb, long ret, long ret2)
{
struct loop_cmd *cmd = container_of(iocb, struct loop_cmd, iocb);
cmd->ret = ret;
- blk_mq_complete_request(cmd->rq);
+ lo_rw_aio_do_completion(cmd);
}
static int lo_rw_aio(struct loop_device *lo, struct loop_cmd *cmd,
@@ -473,22 +485,51 @@ static int lo_rw_aio(struct loop_device *lo, struct loop_cmd *cmd,
{
struct iov_iter iter;
struct bio_vec *bvec;
- struct bio *bio = cmd->rq->bio;
+ struct request *rq = cmd->rq;
+ struct bio *bio = rq->bio;
struct file *file = lo->lo_backing_file;
+ unsigned int offset;
+ int segments = 0;
int ret;
- /* nomerge for loop request queue */
- WARN_ON(cmd->rq->bio != cmd->rq->biotail);
+ if (rq->bio != rq->biotail) {
+ struct req_iterator iter;
+ struct bio_vec tmp;
+
+ __rq_for_each_bio(bio, rq)
+ segments += bio_segments(bio);
+ bvec = kmalloc(sizeof(struct bio_vec) * segments, GFP_NOIO);
+ if (!bvec)
+ return -EIO;
+ cmd->bvec = bvec;
+
+ /*
+ * The bios of the request may be started from the middle of
+ * the 'bvec' because of bio splitting, so we can't directly
+ * copy bio->bi_iov_vec to new bvec. The rq_for_each_segment
+ * API will take care of all details for us.
+ */
+ rq_for_each_segment(tmp, rq, iter) {
+ *bvec = tmp;
+ bvec++;
+ }
+ bvec = cmd->bvec;
+ offset = 0;
+ } else {
+ /*
+ * Same here, this bio may be started from the middle of the
+ * 'bvec' because of bio splitting, so offset from the bvec
+ * must be passed to iov iterator
+ */
+ offset = bio->bi_iter.bi_bvec_done;
+ bvec = __bvec_iter_bvec(bio->bi_io_vec, bio->bi_iter);
+ segments = bio_segments(bio);
+ }
+ atomic_set(&cmd->ref, 2);
- bvec = __bvec_iter_bvec(bio->bi_io_vec, bio->bi_iter);
iov_iter_bvec(&iter, ITER_BVEC | rw, bvec,
- bio_segments(bio), blk_rq_bytes(cmd->rq));
- /*
- * This bio may be started from the middle of the 'bvec'
- * because of bio splitting, so offset from the bvec must
- * be passed to iov iterator
- */
- iter.iov_offset = bio->bi_iter.bi_bvec_done;
+ segments, blk_rq_bytes(rq));
+ iter.iov_offset = offset;
cmd->iocb.ki_pos = pos;
cmd->iocb.ki_filp = file;
@@ -500,6 +541,8 @@ static int lo_rw_aio(struct loop_device *lo, struct loop_cmd *cmd,
else
ret = call_read_iter(file, &cmd->iocb, &iter);
+ lo_rw_aio_do_completion(cmd);
+
if (ret != -EIOCBQUEUED)
cmd->iocb.ki_complete(&cmd->iocb, ret, 0);
return 0;
@@ -546,74 +589,12 @@ static int do_req_filebacked(struct loop_device *lo, struct request *rq)
}
}
-struct switch_request {
- struct file *file;
- struct completion wait;
-};
-
static inline void loop_update_dio(struct loop_device *lo)
{
__loop_update_dio(lo, io_is_direct(lo->lo_backing_file) |
lo->use_dio);
}
-/*
- * Do the actual switch; called from the BIO completion routine
- */
-static void do_loop_switch(struct loop_device *lo, struct switch_request *p)
-{
- struct file *file = p->file;
- struct file *old_file = lo->lo_backing_file;
- struct address_space *mapping;
-
- /* if no new file, only flush of queued bios requested */
- if (!file)
- return;
-
- mapping = file->f_mapping;
- mapping_set_gfp_mask(old_file->f_mapping, lo->old_gfp_mask);
- lo->lo_backing_file = file;
- lo->lo_blocksize = S_ISBLK(mapping->host->i_mode) ?
- mapping->host->i_bdev->bd_block_size : PAGE_SIZE;
- lo->old_gfp_mask = mapping_gfp_mask(mapping);
- mapping_set_gfp_mask(mapping, lo->old_gfp_mask & ~(__GFP_IO|__GFP_FS));
- loop_update_dio(lo);
-}
-
-/*
- * loop_switch performs the hard work of switching a backing store.
- * First it needs to flush existing IO, it does this by sending a magic
- * BIO down the pipe. The completion of this BIO does the actual switch.
- */
-static int loop_switch(struct loop_device *lo, struct file *file)
-{
- struct switch_request w;
-
- w.file = file;
-
- /* freeze queue and wait for completion of scheduled requests */
- blk_mq_freeze_queue(lo->lo_queue);
-
- /* do the switch action */
- do_loop_switch(lo, &w);
-
- /* unfreeze */
- blk_mq_unfreeze_queue(lo->lo_queue);
-
- return 0;
-}
-
-/*
- * Helper to flush the IOs in loop, but keeping loop thread running
- */
-static int loop_flush(struct loop_device *lo)
-{
- /* loop not yet configured, no running thread, nothing to flush */
- if (lo->lo_state != Lo_bound)
- return 0;
- return loop_switch(lo, NULL);
-}
-
static void loop_reread_partitions(struct loop_device *lo,
struct block_device *bdev)
{
@@ -678,9 +659,14 @@ static int loop_change_fd(struct loop_device *lo, struct block_device *bdev,
goto out_putf;
/* and ... switch */
- error = loop_switch(lo, file);
- if (error)
- goto out_putf;
+ blk_mq_freeze_queue(lo->lo_queue);
+ mapping_set_gfp_mask(old_file->f_mapping, lo->old_gfp_mask);
+ lo->lo_backing_file = file;
+ lo->old_gfp_mask = mapping_gfp_mask(file->f_mapping);
+ mapping_set_gfp_mask(file->f_mapping,
+ lo->old_gfp_mask & ~(__GFP_IO|__GFP_FS));
+ loop_update_dio(lo);
+ blk_mq_unfreeze_queue(lo->lo_queue);
fput(old_file);
if (lo->lo_flags & LO_FLAGS_PARTSCAN)
@@ -867,7 +853,6 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode,
struct file *file, *f;
struct inode *inode;
struct address_space *mapping;
- unsigned lo_blocksize;
int lo_flags = 0;
int error;
loff_t size;
@@ -911,9 +896,6 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode,
!file->f_op->write_iter)
lo_flags |= LO_FLAGS_READ_ONLY;
- lo_blocksize = S_ISBLK(inode->i_mode) ?
- inode->i_bdev->bd_block_size : PAGE_SIZE;
-
error = -EFBIG;
size = get_loop_size(lo, file);
if ((loff_t)(sector_t)size != size)
@@ -927,7 +909,6 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode,
set_device_ro(bdev, (lo_flags & LO_FLAGS_READ_ONLY) != 0);
lo->use_dio = false;
- lo->lo_blocksize = lo_blocksize;
lo->lo_device = bdev;
lo->lo_flags = lo_flags;
lo->lo_backing_file = file;
@@ -947,7 +928,8 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode,
/* let user-space know about the new size */
kobject_uevent(&disk_to_dev(bdev->bd_disk)->kobj, KOBJ_CHANGE);
- set_blocksize(bdev, lo_blocksize);
+ set_blocksize(bdev, S_ISBLK(inode->i_mode) ?
+ block_size(inode->i_bdev) : PAGE_SIZE);
lo->lo_state = Lo_bound;
if (part_shift)
@@ -1053,6 +1035,9 @@ static int loop_clr_fd(struct loop_device *lo)
memset(lo->lo_encrypt_key, 0, LO_KEY_SIZE);
memset(lo->lo_crypt_name, 0, LO_NAME_SIZE);
memset(lo->lo_file_name, 0, LO_NAME_SIZE);
+ blk_queue_logical_block_size(lo->lo_queue, 512);
+ blk_queue_physical_block_size(lo->lo_queue, 512);
+ blk_queue_io_min(lo->lo_queue, 512);
if (bdev) {
bdput(bdev);
invalidate_bdev(bdev);
@@ -1336,6 +1321,26 @@ static int loop_set_dio(struct loop_device *lo, unsigned long arg)
return error;
}
+static int loop_set_block_size(struct loop_device *lo, unsigned long arg)
+{
+ if (lo->lo_state != Lo_bound)
+ return -ENXIO;
+
+ if (arg < 512 || arg > PAGE_SIZE || !is_power_of_2(arg))
+ return -EINVAL;
+
+ blk_mq_freeze_queue(lo->lo_queue);
+
+ blk_queue_logical_block_size(lo->lo_queue, arg);
+ blk_queue_physical_block_size(lo->lo_queue, arg);
+ blk_queue_io_min(lo->lo_queue, arg);
+ loop_update_dio(lo);
+
+ blk_mq_unfreeze_queue(lo->lo_queue);
+
+ return 0;
+}
+
static int lo_ioctl(struct block_device *bdev, fmode_t mode,
unsigned int cmd, unsigned long arg)
{
@@ -1384,6 +1389,11 @@ static int lo_ioctl(struct block_device *bdev, fmode_t mode,
if ((mode & FMODE_WRITE) || capable(CAP_SYS_ADMIN))
err = loop_set_dio(lo, arg);
break;
+ case LOOP_SET_BLOCK_SIZE:
+ err = -EPERM;
+ if ((mode & FMODE_WRITE) || capable(CAP_SYS_ADMIN))
+ err = loop_set_block_size(lo, arg);
+ break;
default:
err = lo->ioctl ? lo->ioctl(lo, cmd, arg) : -EINVAL;
}
@@ -1583,12 +1593,13 @@ static void lo_release(struct gendisk *disk, fmode_t mode)
err = loop_clr_fd(lo);
if (!err)
return;
- } else {
+ } else if (lo->lo_state == Lo_bound) {
/*
* Otherwise keep thread (if running) and config,
* but flush possible ongoing bios in thread.
*/
- loop_flush(lo);
+ blk_mq_freeze_queue(lo->lo_queue);
+ blk_mq_unfreeze_queue(lo->lo_queue);
}
mutex_unlock(&lo->lo_ctl_mutex);
@@ -1770,9 +1781,13 @@ static int loop_add(struct loop_device **l, int i)
}
lo->lo_queue->queuedata = lo;
+ blk_queue_max_hw_sectors(lo->lo_queue, BLK_DEF_MAX_SECTORS);
+
/*
- * It doesn't make sense to enable merge because the I/O
- * submitted to backing file is handled page by page.
+ * By default, we do buffer IO, so it doesn't make sense to enable
+ * merge because the I/O submitted to backing file is handled page by
+ * page. For directio mode, merge does help to dispatch bigger request
+ * to underlayer disk. We will enable merge once directio is enabled.
*/
queue_flag_set_unlocked(QUEUE_FLAG_NOMERGES, lo->lo_queue);
diff --git a/drivers/block/loop.h b/drivers/block/loop.h
index fecd3f97ef8c..f68c1d50802f 100644
--- a/drivers/block/loop.h
+++ b/drivers/block/loop.h
@@ -48,7 +48,6 @@ struct loop_device {
struct file * lo_backing_file;
struct block_device *lo_device;
- unsigned lo_blocksize;
void *key_data;
gfp_t old_gfp_mask;
@@ -68,10 +67,13 @@ struct loop_device {
struct loop_cmd {
struct kthread_work work;
struct request *rq;
- struct list_head list;
- bool use_aio; /* use AIO interface to handle I/O */
+ union {
+ bool use_aio; /* use AIO interface to handle I/O */
+ atomic_t ref; /* only for aio */
+ };
long ret;
struct kiocb iocb;
+ struct bio_vec *bvec;
};
/* Support for loadable transfer modules */