diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2021-09-25 15:44:05 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2021-09-25 15:44:05 -0700 |
commit | 2d70de4ee5931455811cd0ce692230785ae1c3ce (patch) | |
tree | 5302f6b5d939178c4b10ed5299316db321aa34b5 | |
parent | 5739844347518a0f4c327ae79e73fb101d864726 (diff) | |
parent | f278eb3d8178f9c31f8dfad7e91440e603dd7f1a (diff) |
Merge tag 'block-5.15-2021-09-25' of git://git.kernel.dk/linux-block
Pull block fixes from Jens Axboe:
- NVMe pull request via Christoph:
- keep ctrl->namespaces ordered (Christoph Hellwig)
- fix incorrect h2cdata pdu offset accounting in nvme-tcp (Sagi
Grimberg)
- handled updated hw_queues in nvme-fc more carefully (Daniel
Wagner, James Smart)
- md lock order fix (Christoph)
- fallocate locking fix (Ming)
- blktrace UAF fix (Zhihao)
- rq-qos bio tracking fix (Ming)
* tag 'block-5.15-2021-09-25' of git://git.kernel.dk/linux-block:
block: hold ->invalidate_lock in blkdev_fallocate
blktrace: Fix uaf in blk_trace access after removing by sysfs
block: don't call rq_qos_ops->done_bio if the bio isn't tracked
md: fix a lock order reversal in md_alloc
nvme: keep ctrl->namespaces ordered
nvme-tcp: fix incorrect h2cdata pdu offset accounting
nvme-fc: remove freeze/unfreeze around update_nr_hw_queues
nvme-fc: avoid race between time out and tear down
nvme-fc: update hardware queues before using them
-rw-r--r-- | block/bio.c | 2 | ||||
-rw-r--r-- | block/fops.c | 21 | ||||
-rw-r--r-- | drivers/md/md.c | 5 | ||||
-rw-r--r-- | drivers/nvme/host/core.c | 33 | ||||
-rw-r--r-- | drivers/nvme/host/fc.c | 18 | ||||
-rw-r--r-- | drivers/nvme/host/tcp.c | 13 | ||||
-rw-r--r-- | kernel/trace/blktrace.c | 8 |
7 files changed, 55 insertions, 45 deletions
diff --git a/block/bio.c b/block/bio.c index 5df3dd282e40..a6fb6a0b4295 100644 --- a/block/bio.c +++ b/block/bio.c @@ -1466,7 +1466,7 @@ again: if (!bio_integrity_endio(bio)) return; - if (bio->bi_bdev) + if (bio->bi_bdev && bio_flagged(bio, BIO_TRACKED)) rq_qos_done_bio(bio->bi_bdev->bd_disk->queue, bio); if (bio->bi_bdev && bio_flagged(bio, BIO_TRACE_COMPLETION)) { diff --git a/block/fops.c b/block/fops.c index ffce6f6c68dd..1e970c247e0e 100644 --- a/block/fops.c +++ b/block/fops.c @@ -14,6 +14,7 @@ #include <linux/task_io_accounting_ops.h> #include <linux/falloc.h> #include <linux/suspend.h> +#include <linux/fs.h> #include "blk.h" static struct inode *bdev_file_inode(struct file *file) @@ -553,7 +554,8 @@ static ssize_t blkdev_read_iter(struct kiocb *iocb, struct iov_iter *to) static long blkdev_fallocate(struct file *file, int mode, loff_t start, loff_t len) { - struct block_device *bdev = I_BDEV(bdev_file_inode(file)); + struct inode *inode = bdev_file_inode(file); + struct block_device *bdev = I_BDEV(inode); loff_t end = start + len - 1; loff_t isize; int error; @@ -580,10 +582,12 @@ static long blkdev_fallocate(struct file *file, int mode, loff_t start, if ((start | len) & (bdev_logical_block_size(bdev) - 1)) return -EINVAL; + filemap_invalidate_lock(inode->i_mapping); + /* Invalidate the page cache, including dirty pages. */ error = truncate_bdev_range(bdev, file->f_mode, start, end); if (error) - return error; + goto fail; switch (mode) { case FALLOC_FL_ZERO_RANGE: @@ -600,17 +604,12 @@ static long blkdev_fallocate(struct file *file, int mode, loff_t start, GFP_KERNEL, 0); break; default: - return -EOPNOTSUPP; + error = -EOPNOTSUPP; } - if (error) - return error; - /* - * Invalidate the page cache again; if someone wandered in and dirtied - * a page, we just discard it - userspace has no way of knowing whether - * the write happened before or after discard completing... - */ - return truncate_bdev_range(bdev, file->f_mode, start, end); + fail: + filemap_invalidate_unlock(inode->i_mapping); + return error; } const struct file_operations def_blk_fops = { diff --git a/drivers/md/md.c b/drivers/md/md.c index ae8fe54ea358..6c0c3d0d905a 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -5700,10 +5700,6 @@ static int md_alloc(dev_t dev, char *name) disk->flags |= GENHD_FL_EXT_DEVT; disk->events |= DISK_EVENT_MEDIA_CHANGE; mddev->gendisk = disk; - /* As soon as we call add_disk(), another thread could get - * through to md_open, so make sure it doesn't get too far - */ - mutex_lock(&mddev->open_mutex); add_disk(disk); error = kobject_add(&mddev->kobj, &disk_to_dev(disk)->kobj, "%s", "md"); @@ -5718,7 +5714,6 @@ static int md_alloc(dev_t dev, char *name) if (mddev->kobj.sd && sysfs_create_group(&mddev->kobj, &md_bitmap_group)) pr_debug("pointless warning\n"); - mutex_unlock(&mddev->open_mutex); abort: mutex_unlock(&disks_mutex); if (!error && mddev->kobj.sd) { diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 6600e138945e..e486845d2c7e 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -13,7 +13,6 @@ #include <linux/kernel.h> #include <linux/module.h> #include <linux/backing-dev.h> -#include <linux/list_sort.h> #include <linux/slab.h> #include <linux/types.h> #include <linux/pr.h> @@ -3716,15 +3715,6 @@ out_unlock: return ret; } -static int ns_cmp(void *priv, const struct list_head *a, - const struct list_head *b) -{ - struct nvme_ns *nsa = container_of(a, struct nvme_ns, list); - struct nvme_ns *nsb = container_of(b, struct nvme_ns, list); - - return nsa->head->ns_id - nsb->head->ns_id; -} - struct nvme_ns *nvme_find_get_ns(struct nvme_ctrl *ctrl, unsigned nsid) { struct nvme_ns *ns, *ret = NULL; @@ -3745,6 +3735,22 @@ struct nvme_ns *nvme_find_get_ns(struct nvme_ctrl *ctrl, unsigned nsid) } EXPORT_SYMBOL_NS_GPL(nvme_find_get_ns, NVME_TARGET_PASSTHRU); +/* + * Add the namespace to the controller list while keeping the list ordered. + */ +static void nvme_ns_add_to_ctrl_list(struct nvme_ns *ns) +{ + struct nvme_ns *tmp; + + list_for_each_entry_reverse(tmp, &ns->ctrl->namespaces, list) { + if (tmp->head->ns_id < ns->head->ns_id) { + list_add(&ns->list, &tmp->list); + return; + } + } + list_add(&ns->list, &ns->ctrl->namespaces); +} + static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid, struct nvme_ns_ids *ids) { @@ -3795,9 +3801,8 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid, goto out_unlink_ns; down_write(&ctrl->namespaces_rwsem); - list_add_tail(&ns->list, &ctrl->namespaces); + nvme_ns_add_to_ctrl_list(ns); up_write(&ctrl->namespaces_rwsem); - nvme_get_ctrl(ctrl); if (device_add_disk(ctrl->device, ns->disk, nvme_ns_id_attr_groups)) @@ -4080,10 +4085,6 @@ static void nvme_scan_work(struct work_struct *work) if (nvme_scan_ns_list(ctrl) != 0) nvme_scan_ns_sequential(ctrl); mutex_unlock(&ctrl->scan_lock); - - down_write(&ctrl->namespaces_rwsem); - list_sort(NULL, &ctrl->namespaces, ns_cmp); - up_write(&ctrl->namespaces_rwsem); } /* diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index b08a61ca283f..aa14ad963d91 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -2487,6 +2487,7 @@ __nvme_fc_abort_outstanding_ios(struct nvme_fc_ctrl *ctrl, bool start_queues) */ if (ctrl->ctrl.queue_count > 1) { nvme_stop_queues(&ctrl->ctrl); + nvme_sync_io_queues(&ctrl->ctrl); blk_mq_tagset_busy_iter(&ctrl->tag_set, nvme_fc_terminate_exchange, &ctrl->ctrl); blk_mq_tagset_wait_completed_request(&ctrl->tag_set); @@ -2510,6 +2511,7 @@ __nvme_fc_abort_outstanding_ios(struct nvme_fc_ctrl *ctrl, bool start_queues) * clean up the admin queue. Same thing as above. */ blk_mq_quiesce_queue(ctrl->ctrl.admin_q); + blk_sync_queue(ctrl->ctrl.admin_q); blk_mq_tagset_busy_iter(&ctrl->admin_tag_set, nvme_fc_terminate_exchange, &ctrl->ctrl); blk_mq_tagset_wait_completed_request(&ctrl->admin_tag_set); @@ -2951,6 +2953,13 @@ nvme_fc_recreate_io_queues(struct nvme_fc_ctrl *ctrl) if (ctrl->ctrl.queue_count == 1) return 0; + if (prior_ioq_cnt != nr_io_queues) { + dev_info(ctrl->ctrl.device, + "reconnect: revising io queue count from %d to %d\n", + prior_ioq_cnt, nr_io_queues); + blk_mq_update_nr_hw_queues(&ctrl->tag_set, nr_io_queues); + } + ret = nvme_fc_create_hw_io_queues(ctrl, ctrl->ctrl.sqsize + 1); if (ret) goto out_free_io_queues; @@ -2959,15 +2968,6 @@ nvme_fc_recreate_io_queues(struct nvme_fc_ctrl *ctrl) if (ret) goto out_delete_hw_queues; - if (prior_ioq_cnt != nr_io_queues) { - dev_info(ctrl->ctrl.device, - "reconnect: revising io queue count from %d to %d\n", - prior_ioq_cnt, nr_io_queues); - nvme_wait_freeze(&ctrl->ctrl); - blk_mq_update_nr_hw_queues(&ctrl->tag_set, nr_io_queues); - nvme_unfreeze(&ctrl->ctrl); - } - return 0; out_delete_hw_queues: diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index e4249b7dc056..3c1c29dd3020 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -620,7 +620,7 @@ static int nvme_tcp_setup_h2c_data_pdu(struct nvme_tcp_request *req, cpu_to_le32(data->hdr.hlen + hdgst + req->pdu_len + ddgst); data->ttag = pdu->ttag; data->command_id = nvme_cid(rq); - data->data_offset = cpu_to_le32(req->data_sent); + data->data_offset = pdu->r2t_offset; data->data_length = cpu_to_le32(req->pdu_len); return 0; } @@ -953,7 +953,15 @@ static int nvme_tcp_try_send_data(struct nvme_tcp_request *req) nvme_tcp_ddgst_update(queue->snd_hash, page, offset, ret); - /* fully successful last write*/ + /* + * update the request iterator except for the last payload send + * in the request where we don't want to modify it as we may + * compete with the RX path completing the request. + */ + if (req->data_sent + ret < req->data_len) + nvme_tcp_advance_req(req, ret); + + /* fully successful last send in current PDU */ if (last && ret == len) { if (queue->data_digest) { nvme_tcp_ddgst_final(queue->snd_hash, @@ -965,7 +973,6 @@ static int nvme_tcp_try_send_data(struct nvme_tcp_request *req) } return 1; } - nvme_tcp_advance_req(req, ret); } return -EAGAIN; } diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index c221e4c3f625..fa91f398f28b 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c @@ -1605,6 +1605,14 @@ static int blk_trace_remove_queue(struct request_queue *q) if (bt == NULL) return -EINVAL; + if (bt->trace_state == Blktrace_running) { + bt->trace_state = Blktrace_stopped; + spin_lock_irq(&running_trace_lock); + list_del_init(&bt->running_list); + spin_unlock_irq(&running_trace_lock); + relay_flush(bt->rchan); + } + put_probe_ref(); synchronize_rcu(); blk_trace_free(bt); |