diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2022-08-12 09:50:34 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2022-08-12 09:50:34 -0700 |
commit | 7a53e17accce9d310d2e522dfc701d8da7ccfa65 (patch) | |
tree | c1ccf061aee42178159cbe9c31c2c4e004b76947 /drivers/vhost | |
parent | 999324f58c41262f5b64d04b7ac54e8f79b019fd (diff) | |
parent | 93e530d2a1c4c0fcce45e01ae6c5c6287a08d3e3 (diff) |
Merge tag 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost
Pull virtio updates from Michael Tsirkin:
- A huge patchset supporting vq resize using the new vq reset
capability
- Features, fixes, and cleanups all over the place
* tag 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost: (88 commits)
vdpa/mlx5: Fix possible uninitialized return value
vdpa_sim_blk: add support for discard and write-zeroes
vdpa_sim_blk: add support for VIRTIO_BLK_T_FLUSH
vdpa_sim_blk: make vdpasim_blk_check_range usable by other requests
vdpa_sim_blk: check if sector is 0 for commands other than read or write
vdpa_sim: Implement suspend vdpa op
vhost-vdpa: uAPI to suspend the device
vhost-vdpa: introduce SUSPEND backend feature bit
vdpa: Add suspend operation
virtio-blk: Avoid use-after-free on suspend/resume
virtio_vdpa: support the arg sizes of find_vqs()
vhost-vdpa: Call ida_simple_remove() when failed
vDPA: fix 'cast to restricted le16' warnings in vdpa.c
vDPA: !FEATURES_OK should not block querying device config space
vDPA/ifcvf: support userspace to query features and MQ of a management device
vDPA/ifcvf: get_config_size should return a value no greater than dev implementation
vhost scsi: Allow user to control num virtqueues
vhost-scsi: Fix max number of virtqueues
vdpa/mlx5: Support different address spaces for control and data
vdpa/mlx5: Implement susupend virtqueue callback
...
Diffstat (limited to 'drivers/vhost')
-rw-r--r-- | drivers/vhost/scsi.c | 85 | ||||
-rw-r--r-- | drivers/vhost/vdpa.c | 38 | ||||
-rw-r--r-- | drivers/vhost/vringh.c | 78 |
3 files changed, 153 insertions, 48 deletions
diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c index 9b65509424dc..7ebf106d50c1 100644 --- a/drivers/vhost/scsi.c +++ b/drivers/vhost/scsi.c @@ -159,9 +159,13 @@ enum { }; #define VHOST_SCSI_MAX_TARGET 256 -#define VHOST_SCSI_MAX_VQ 128 +#define VHOST_SCSI_MAX_IO_VQ 1024 #define VHOST_SCSI_MAX_EVENT 128 +static unsigned vhost_scsi_max_io_vqs = 128; +module_param_named(max_io_vqs, vhost_scsi_max_io_vqs, uint, 0644); +MODULE_PARM_DESC(max_io_vqs, "Set the max number of IO virtqueues a vhost scsi device can support. The default is 128. The max is 1024."); + struct vhost_scsi_virtqueue { struct vhost_virtqueue vq; /* @@ -186,7 +190,9 @@ struct vhost_scsi { char vs_vhost_wwpn[TRANSPORT_IQN_LEN]; struct vhost_dev dev; - struct vhost_scsi_virtqueue vqs[VHOST_SCSI_MAX_VQ]; + struct vhost_scsi_virtqueue *vqs; + unsigned long *compl_bitmap; + struct vhost_scsi_inflight **old_inflight; struct vhost_work vs_completion_work; /* cmd completion work item */ struct llist_head vs_completion_list; /* cmd completion queue */ @@ -245,7 +251,7 @@ static void vhost_scsi_init_inflight(struct vhost_scsi *vs, struct vhost_virtqueue *vq; int idx, i; - for (i = 0; i < VHOST_SCSI_MAX_VQ; i++) { + for (i = 0; i < vs->dev.nvqs; i++) { vq = &vs->vqs[i].vq; mutex_lock(&vq->mutex); @@ -533,7 +539,6 @@ static void vhost_scsi_complete_cmd_work(struct vhost_work *work) { struct vhost_scsi *vs = container_of(work, struct vhost_scsi, vs_completion_work); - DECLARE_BITMAP(signal, VHOST_SCSI_MAX_VQ); struct virtio_scsi_cmd_resp v_rsp; struct vhost_scsi_cmd *cmd, *t; struct llist_node *llnode; @@ -541,7 +546,7 @@ static void vhost_scsi_complete_cmd_work(struct vhost_work *work) struct iov_iter iov_iter; int ret, vq; - bitmap_zero(signal, VHOST_SCSI_MAX_VQ); + bitmap_zero(vs->compl_bitmap, vs->dev.nvqs); llnode = llist_del_all(&vs->vs_completion_list); llist_for_each_entry_safe(cmd, t, llnode, tvc_completion_list) { se_cmd = &cmd->tvc_se_cmd; @@ -566,7 +571,7 @@ static void vhost_scsi_complete_cmd_work(struct vhost_work *work) vhost_add_used(cmd->tvc_vq, cmd->tvc_vq_desc, 0); q = container_of(cmd->tvc_vq, struct vhost_scsi_virtqueue, vq); vq = q - vs->vqs; - __set_bit(vq, signal); + __set_bit(vq, vs->compl_bitmap); } else pr_err("Faulted on virtio_scsi_cmd_resp\n"); @@ -574,8 +579,8 @@ static void vhost_scsi_complete_cmd_work(struct vhost_work *work) } vq = -1; - while ((vq = find_next_bit(signal, VHOST_SCSI_MAX_VQ, vq + 1)) - < VHOST_SCSI_MAX_VQ) + while ((vq = find_next_bit(vs->compl_bitmap, vs->dev.nvqs, vq + 1)) + < vs->dev.nvqs) vhost_signal(&vs->dev, &vs->vqs[vq].vq); } @@ -1419,26 +1424,25 @@ static void vhost_scsi_handle_kick(struct vhost_work *work) /* Callers must hold dev mutex */ static void vhost_scsi_flush(struct vhost_scsi *vs) { - struct vhost_scsi_inflight *old_inflight[VHOST_SCSI_MAX_VQ]; int i; /* Init new inflight and remember the old inflight */ - vhost_scsi_init_inflight(vs, old_inflight); + vhost_scsi_init_inflight(vs, vs->old_inflight); /* * The inflight->kref was initialized to 1. We decrement it here to * indicate the start of the flush operation so that it will reach 0 * when all the reqs are finished. */ - for (i = 0; i < VHOST_SCSI_MAX_VQ; i++) - kref_put(&old_inflight[i]->kref, vhost_scsi_done_inflight); + for (i = 0; i < vs->dev.nvqs; i++) + kref_put(&vs->old_inflight[i]->kref, vhost_scsi_done_inflight); /* Flush both the vhost poll and vhost work */ vhost_dev_flush(&vs->dev); /* Wait for all reqs issued before the flush to be finished */ - for (i = 0; i < VHOST_SCSI_MAX_VQ; i++) - wait_for_completion(&old_inflight[i]->comp); + for (i = 0; i < vs->dev.nvqs; i++) + wait_for_completion(&vs->old_inflight[i]->comp); } static void vhost_scsi_destroy_vq_cmds(struct vhost_virtqueue *vq) @@ -1601,7 +1605,7 @@ vhost_scsi_set_endpoint(struct vhost_scsi *vs, memcpy(vs->vs_vhost_wwpn, t->vhost_wwpn, sizeof(vs->vs_vhost_wwpn)); - for (i = VHOST_SCSI_VQ_IO; i < VHOST_SCSI_MAX_VQ; i++) { + for (i = VHOST_SCSI_VQ_IO; i < vs->dev.nvqs; i++) { vq = &vs->vqs[i].vq; if (!vhost_vq_is_setup(vq)) continue; @@ -1611,7 +1615,7 @@ vhost_scsi_set_endpoint(struct vhost_scsi *vs, goto destroy_vq_cmds; } - for (i = 0; i < VHOST_SCSI_MAX_VQ; i++) { + for (i = 0; i < vs->dev.nvqs; i++) { vq = &vs->vqs[i].vq; mutex_lock(&vq->mutex); vhost_vq_set_backend(vq, vs_tpg); @@ -1713,7 +1717,7 @@ vhost_scsi_clear_endpoint(struct vhost_scsi *vs, target_undepend_item(&se_tpg->tpg_group.cg_item); } if (match) { - for (i = 0; i < VHOST_SCSI_MAX_VQ; i++) { + for (i = 0; i < vs->dev.nvqs; i++) { vq = &vs->vqs[i].vq; mutex_lock(&vq->mutex); vhost_vq_set_backend(vq, NULL); @@ -1722,7 +1726,7 @@ vhost_scsi_clear_endpoint(struct vhost_scsi *vs, /* Make sure cmds are not running before tearing them down. */ vhost_scsi_flush(vs); - for (i = 0; i < VHOST_SCSI_MAX_VQ; i++) { + for (i = 0; i < vs->dev.nvqs; i++) { vq = &vs->vqs[i].vq; vhost_scsi_destroy_vq_cmds(vq); } @@ -1762,7 +1766,7 @@ static int vhost_scsi_set_features(struct vhost_scsi *vs, u64 features) return -EFAULT; } - for (i = 0; i < VHOST_SCSI_MAX_VQ; i++) { + for (i = 0; i < vs->dev.nvqs; i++) { vq = &vs->vqs[i].vq; mutex_lock(&vq->mutex); vq->acked_features = features; @@ -1776,16 +1780,40 @@ static int vhost_scsi_open(struct inode *inode, struct file *f) { struct vhost_scsi *vs; struct vhost_virtqueue **vqs; - int r = -ENOMEM, i; + int r = -ENOMEM, i, nvqs = vhost_scsi_max_io_vqs; vs = kvzalloc(sizeof(*vs), GFP_KERNEL); if (!vs) goto err_vs; - vqs = kmalloc_array(VHOST_SCSI_MAX_VQ, sizeof(*vqs), GFP_KERNEL); - if (!vqs) + if (nvqs > VHOST_SCSI_MAX_IO_VQ) { + pr_err("Invalid max_io_vqs of %d. Using %d.\n", nvqs, + VHOST_SCSI_MAX_IO_VQ); + nvqs = VHOST_SCSI_MAX_IO_VQ; + } else if (nvqs == 0) { + pr_err("Invalid max_io_vqs of %d. Using 1.\n", nvqs); + nvqs = 1; + } + nvqs += VHOST_SCSI_VQ_IO; + + vs->compl_bitmap = bitmap_alloc(nvqs, GFP_KERNEL); + if (!vs->compl_bitmap) + goto err_compl_bitmap; + + vs->old_inflight = kmalloc_array(nvqs, sizeof(*vs->old_inflight), + GFP_KERNEL | __GFP_ZERO); + if (!vs->old_inflight) + goto err_inflight; + + vs->vqs = kmalloc_array(nvqs, sizeof(*vs->vqs), + GFP_KERNEL | __GFP_ZERO); + if (!vs->vqs) goto err_vqs; + vqs = kmalloc_array(nvqs, sizeof(*vqs), GFP_KERNEL); + if (!vqs) + goto err_local_vqs; + vhost_work_init(&vs->vs_completion_work, vhost_scsi_complete_cmd_work); vhost_work_init(&vs->vs_event_work, vhost_scsi_evt_work); @@ -1796,11 +1824,11 @@ static int vhost_scsi_open(struct inode *inode, struct file *f) vqs[VHOST_SCSI_VQ_EVT] = &vs->vqs[VHOST_SCSI_VQ_EVT].vq; vs->vqs[VHOST_SCSI_VQ_CTL].vq.handle_kick = vhost_scsi_ctl_handle_kick; vs->vqs[VHOST_SCSI_VQ_EVT].vq.handle_kick = vhost_scsi_evt_handle_kick; - for (i = VHOST_SCSI_VQ_IO; i < VHOST_SCSI_MAX_VQ; i++) { + for (i = VHOST_SCSI_VQ_IO; i < nvqs; i++) { vqs[i] = &vs->vqs[i].vq; vs->vqs[i].vq.handle_kick = vhost_scsi_handle_kick; } - vhost_dev_init(&vs->dev, vqs, VHOST_SCSI_MAX_VQ, UIO_MAXIOV, + vhost_dev_init(&vs->dev, vqs, nvqs, UIO_MAXIOV, VHOST_SCSI_WEIGHT, 0, true, NULL); vhost_scsi_init_inflight(vs, NULL); @@ -1808,7 +1836,13 @@ static int vhost_scsi_open(struct inode *inode, struct file *f) f->private_data = vs; return 0; +err_local_vqs: + kfree(vs->vqs); err_vqs: + kfree(vs->old_inflight); +err_inflight: + bitmap_free(vs->compl_bitmap); +err_compl_bitmap: kvfree(vs); err_vs: return r; @@ -1826,6 +1860,9 @@ static int vhost_scsi_release(struct inode *inode, struct file *f) vhost_dev_stop(&vs->dev); vhost_dev_cleanup(&vs->dev); kfree(vs->dev.vqs); + kfree(vs->vqs); + kfree(vs->old_inflight); + bitmap_free(vs->compl_bitmap); kvfree(vs); return 0; } diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c index 23dcbfdfa13b..166044642fd5 100644 --- a/drivers/vhost/vdpa.c +++ b/drivers/vhost/vdpa.c @@ -347,6 +347,14 @@ static long vhost_vdpa_set_config(struct vhost_vdpa *v, return 0; } +static bool vhost_vdpa_can_suspend(const struct vhost_vdpa *v) +{ + struct vdpa_device *vdpa = v->vdpa; + const struct vdpa_config_ops *ops = vdpa->config; + + return ops->suspend; +} + static long vhost_vdpa_get_features(struct vhost_vdpa *v, u64 __user *featurep) { struct vdpa_device *vdpa = v->vdpa; @@ -470,6 +478,22 @@ static long vhost_vdpa_get_vqs_count(struct vhost_vdpa *v, u32 __user *argp) return 0; } +/* After a successful return of ioctl the device must not process more + * virtqueue descriptors. The device can answer to read or writes of config + * fields as if it were not suspended. In particular, writing to "queue_enable" + * with a value of 1 will not make the device start processing buffers. + */ +static long vhost_vdpa_suspend(struct vhost_vdpa *v) +{ + struct vdpa_device *vdpa = v->vdpa; + const struct vdpa_config_ops *ops = vdpa->config; + + if (!ops->suspend) + return -EOPNOTSUPP; + + return ops->suspend(vdpa); +} + static long vhost_vdpa_vring_ioctl(struct vhost_vdpa *v, unsigned int cmd, void __user *argp) { @@ -577,7 +601,11 @@ static long vhost_vdpa_unlocked_ioctl(struct file *filep, if (cmd == VHOST_SET_BACKEND_FEATURES) { if (copy_from_user(&features, featurep, sizeof(features))) return -EFAULT; - if (features & ~VHOST_VDPA_BACKEND_FEATURES) + if (features & ~(VHOST_VDPA_BACKEND_FEATURES | + BIT_ULL(VHOST_BACKEND_F_SUSPEND))) + return -EOPNOTSUPP; + if ((features & BIT_ULL(VHOST_BACKEND_F_SUSPEND)) && + !vhost_vdpa_can_suspend(v)) return -EOPNOTSUPP; vhost_set_backend_features(&v->vdev, features); return 0; @@ -628,6 +656,8 @@ static long vhost_vdpa_unlocked_ioctl(struct file *filep, break; case VHOST_GET_BACKEND_FEATURES: features = VHOST_VDPA_BACKEND_FEATURES; + if (vhost_vdpa_can_suspend(v)) + features |= BIT_ULL(VHOST_BACKEND_F_SUSPEND); if (copy_to_user(featurep, &features, sizeof(features))) r = -EFAULT; break; @@ -640,6 +670,9 @@ static long vhost_vdpa_unlocked_ioctl(struct file *filep, case VHOST_VDPA_GET_VQS_COUNT: r = vhost_vdpa_get_vqs_count(v, argp); break; + case VHOST_VDPA_SUSPEND: + r = vhost_vdpa_suspend(v); + break; default: r = vhost_dev_ioctl(&v->vdev, cmd, argp); if (r == -ENOIOCTLCMD) @@ -1076,7 +1109,7 @@ static int vhost_vdpa_alloc_domain(struct vhost_vdpa *v) if (!bus) return -EFAULT; - if (!iommu_capable(bus, IOMMU_CAP_CACHE_COHERENCY)) + if (!device_iommu_capable(dma_dev, IOMMU_CAP_CACHE_COHERENCY)) return -ENOTSUPP; v->domain = iommu_domain_alloc(bus); @@ -1363,6 +1396,7 @@ static int vhost_vdpa_probe(struct vdpa_device *vdpa) err: put_device(&v->dev); + ida_simple_remove(&vhost_vdpa_ida, v->minor); return r; } diff --git a/drivers/vhost/vringh.c b/drivers/vhost/vringh.c index eab55accf381..11f59dd06a74 100644 --- a/drivers/vhost/vringh.c +++ b/drivers/vhost/vringh.c @@ -1095,7 +1095,8 @@ EXPORT_SYMBOL(vringh_need_notify_kern); #if IS_REACHABLE(CONFIG_VHOST_IOTLB) static int iotlb_translate(const struct vringh *vrh, - u64 addr, u64 len, struct bio_vec iov[], + u64 addr, u64 len, u64 *translated, + struct bio_vec iov[], int iov_size, u32 perm) { struct vhost_iotlb_map *map; @@ -1136,43 +1137,76 @@ static int iotlb_translate(const struct vringh *vrh, spin_unlock(vrh->iotlb_lock); + if (translated) + *translated = min(len, s); + return ret; } static inline int copy_from_iotlb(const struct vringh *vrh, void *dst, void *src, size_t len) { - struct iov_iter iter; - struct bio_vec iov[16]; - int ret; + u64 total_translated = 0; - ret = iotlb_translate(vrh, (u64)(uintptr_t)src, - len, iov, 16, VHOST_MAP_RO); - if (ret < 0) - return ret; + while (total_translated < len) { + struct bio_vec iov[16]; + struct iov_iter iter; + u64 translated; + int ret; - iov_iter_bvec(&iter, READ, iov, ret, len); + ret = iotlb_translate(vrh, (u64)(uintptr_t)src, + len - total_translated, &translated, + iov, ARRAY_SIZE(iov), VHOST_MAP_RO); + if (ret == -ENOBUFS) + ret = ARRAY_SIZE(iov); + else if (ret < 0) + return ret; - ret = copy_from_iter(dst, len, &iter); + iov_iter_bvec(&iter, READ, iov, ret, translated); - return ret; + ret = copy_from_iter(dst, translated, &iter); + if (ret < 0) + return ret; + + src += translated; + dst += translated; + total_translated += translated; + } + + return total_translated; } static inline int copy_to_iotlb(const struct vringh *vrh, void *dst, void *src, size_t len) { - struct iov_iter iter; - struct bio_vec iov[16]; - int ret; + u64 total_translated = 0; - ret = iotlb_translate(vrh, (u64)(uintptr_t)dst, - len, iov, 16, VHOST_MAP_WO); - if (ret < 0) - return ret; + while (total_translated < len) { + struct bio_vec iov[16]; + struct iov_iter iter; + u64 translated; + int ret; + + ret = iotlb_translate(vrh, (u64)(uintptr_t)dst, + len - total_translated, &translated, + iov, ARRAY_SIZE(iov), VHOST_MAP_WO); + if (ret == -ENOBUFS) + ret = ARRAY_SIZE(iov); + else if (ret < 0) + return ret; - iov_iter_bvec(&iter, WRITE, iov, ret, len); + iov_iter_bvec(&iter, WRITE, iov, ret, translated); + + ret = copy_to_iter(src, translated, &iter); + if (ret < 0) + return ret; + + src += translated; + dst += translated; + total_translated += translated; + } - return copy_to_iter(src, len, &iter); + return total_translated; } static inline int getu16_iotlb(const struct vringh *vrh, @@ -1183,7 +1217,7 @@ static inline int getu16_iotlb(const struct vringh *vrh, int ret; /* Atomic read is needed for getu16 */ - ret = iotlb_translate(vrh, (u64)(uintptr_t)p, sizeof(*p), + ret = iotlb_translate(vrh, (u64)(uintptr_t)p, sizeof(*p), NULL, &iov, 1, VHOST_MAP_RO); if (ret < 0) return ret; @@ -1204,7 +1238,7 @@ static inline int putu16_iotlb(const struct vringh *vrh, int ret; /* Atomic write is needed for putu16 */ - ret = iotlb_translate(vrh, (u64)(uintptr_t)p, sizeof(*p), + ret = iotlb_translate(vrh, (u64)(uintptr_t)p, sizeof(*p), NULL, &iov, 1, VHOST_MAP_WO); if (ret < 0) return ret; |