summaryrefslogtreecommitdiff
path: root/drivers/vhost
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2022-08-12 09:50:34 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2022-08-12 09:50:34 -0700
commit7a53e17accce9d310d2e522dfc701d8da7ccfa65 (patch)
treec1ccf061aee42178159cbe9c31c2c4e004b76947 /drivers/vhost
parent999324f58c41262f5b64d04b7ac54e8f79b019fd (diff)
parent93e530d2a1c4c0fcce45e01ae6c5c6287a08d3e3 (diff)
Merge tag 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost
Pull virtio updates from Michael Tsirkin: - A huge patchset supporting vq resize using the new vq reset capability - Features, fixes, and cleanups all over the place * tag 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost: (88 commits) vdpa/mlx5: Fix possible uninitialized return value vdpa_sim_blk: add support for discard and write-zeroes vdpa_sim_blk: add support for VIRTIO_BLK_T_FLUSH vdpa_sim_blk: make vdpasim_blk_check_range usable by other requests vdpa_sim_blk: check if sector is 0 for commands other than read or write vdpa_sim: Implement suspend vdpa op vhost-vdpa: uAPI to suspend the device vhost-vdpa: introduce SUSPEND backend feature bit vdpa: Add suspend operation virtio-blk: Avoid use-after-free on suspend/resume virtio_vdpa: support the arg sizes of find_vqs() vhost-vdpa: Call ida_simple_remove() when failed vDPA: fix 'cast to restricted le16' warnings in vdpa.c vDPA: !FEATURES_OK should not block querying device config space vDPA/ifcvf: support userspace to query features and MQ of a management device vDPA/ifcvf: get_config_size should return a value no greater than dev implementation vhost scsi: Allow user to control num virtqueues vhost-scsi: Fix max number of virtqueues vdpa/mlx5: Support different address spaces for control and data vdpa/mlx5: Implement susupend virtqueue callback ...
Diffstat (limited to 'drivers/vhost')
-rw-r--r--drivers/vhost/scsi.c85
-rw-r--r--drivers/vhost/vdpa.c38
-rw-r--r--drivers/vhost/vringh.c78
3 files changed, 153 insertions, 48 deletions
diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c
index 9b65509424dc..7ebf106d50c1 100644
--- a/drivers/vhost/scsi.c
+++ b/drivers/vhost/scsi.c
@@ -159,9 +159,13 @@ enum {
};
#define VHOST_SCSI_MAX_TARGET 256
-#define VHOST_SCSI_MAX_VQ 128
+#define VHOST_SCSI_MAX_IO_VQ 1024
#define VHOST_SCSI_MAX_EVENT 128
+static unsigned vhost_scsi_max_io_vqs = 128;
+module_param_named(max_io_vqs, vhost_scsi_max_io_vqs, uint, 0644);
+MODULE_PARM_DESC(max_io_vqs, "Set the max number of IO virtqueues a vhost scsi device can support. The default is 128. The max is 1024.");
+
struct vhost_scsi_virtqueue {
struct vhost_virtqueue vq;
/*
@@ -186,7 +190,9 @@ struct vhost_scsi {
char vs_vhost_wwpn[TRANSPORT_IQN_LEN];
struct vhost_dev dev;
- struct vhost_scsi_virtqueue vqs[VHOST_SCSI_MAX_VQ];
+ struct vhost_scsi_virtqueue *vqs;
+ unsigned long *compl_bitmap;
+ struct vhost_scsi_inflight **old_inflight;
struct vhost_work vs_completion_work; /* cmd completion work item */
struct llist_head vs_completion_list; /* cmd completion queue */
@@ -245,7 +251,7 @@ static void vhost_scsi_init_inflight(struct vhost_scsi *vs,
struct vhost_virtqueue *vq;
int idx, i;
- for (i = 0; i < VHOST_SCSI_MAX_VQ; i++) {
+ for (i = 0; i < vs->dev.nvqs; i++) {
vq = &vs->vqs[i].vq;
mutex_lock(&vq->mutex);
@@ -533,7 +539,6 @@ static void vhost_scsi_complete_cmd_work(struct vhost_work *work)
{
struct vhost_scsi *vs = container_of(work, struct vhost_scsi,
vs_completion_work);
- DECLARE_BITMAP(signal, VHOST_SCSI_MAX_VQ);
struct virtio_scsi_cmd_resp v_rsp;
struct vhost_scsi_cmd *cmd, *t;
struct llist_node *llnode;
@@ -541,7 +546,7 @@ static void vhost_scsi_complete_cmd_work(struct vhost_work *work)
struct iov_iter iov_iter;
int ret, vq;
- bitmap_zero(signal, VHOST_SCSI_MAX_VQ);
+ bitmap_zero(vs->compl_bitmap, vs->dev.nvqs);
llnode = llist_del_all(&vs->vs_completion_list);
llist_for_each_entry_safe(cmd, t, llnode, tvc_completion_list) {
se_cmd = &cmd->tvc_se_cmd;
@@ -566,7 +571,7 @@ static void vhost_scsi_complete_cmd_work(struct vhost_work *work)
vhost_add_used(cmd->tvc_vq, cmd->tvc_vq_desc, 0);
q = container_of(cmd->tvc_vq, struct vhost_scsi_virtqueue, vq);
vq = q - vs->vqs;
- __set_bit(vq, signal);
+ __set_bit(vq, vs->compl_bitmap);
} else
pr_err("Faulted on virtio_scsi_cmd_resp\n");
@@ -574,8 +579,8 @@ static void vhost_scsi_complete_cmd_work(struct vhost_work *work)
}
vq = -1;
- while ((vq = find_next_bit(signal, VHOST_SCSI_MAX_VQ, vq + 1))
- < VHOST_SCSI_MAX_VQ)
+ while ((vq = find_next_bit(vs->compl_bitmap, vs->dev.nvqs, vq + 1))
+ < vs->dev.nvqs)
vhost_signal(&vs->dev, &vs->vqs[vq].vq);
}
@@ -1419,26 +1424,25 @@ static void vhost_scsi_handle_kick(struct vhost_work *work)
/* Callers must hold dev mutex */
static void vhost_scsi_flush(struct vhost_scsi *vs)
{
- struct vhost_scsi_inflight *old_inflight[VHOST_SCSI_MAX_VQ];
int i;
/* Init new inflight and remember the old inflight */
- vhost_scsi_init_inflight(vs, old_inflight);
+ vhost_scsi_init_inflight(vs, vs->old_inflight);
/*
* The inflight->kref was initialized to 1. We decrement it here to
* indicate the start of the flush operation so that it will reach 0
* when all the reqs are finished.
*/
- for (i = 0; i < VHOST_SCSI_MAX_VQ; i++)
- kref_put(&old_inflight[i]->kref, vhost_scsi_done_inflight);
+ for (i = 0; i < vs->dev.nvqs; i++)
+ kref_put(&vs->old_inflight[i]->kref, vhost_scsi_done_inflight);
/* Flush both the vhost poll and vhost work */
vhost_dev_flush(&vs->dev);
/* Wait for all reqs issued before the flush to be finished */
- for (i = 0; i < VHOST_SCSI_MAX_VQ; i++)
- wait_for_completion(&old_inflight[i]->comp);
+ for (i = 0; i < vs->dev.nvqs; i++)
+ wait_for_completion(&vs->old_inflight[i]->comp);
}
static void vhost_scsi_destroy_vq_cmds(struct vhost_virtqueue *vq)
@@ -1601,7 +1605,7 @@ vhost_scsi_set_endpoint(struct vhost_scsi *vs,
memcpy(vs->vs_vhost_wwpn, t->vhost_wwpn,
sizeof(vs->vs_vhost_wwpn));
- for (i = VHOST_SCSI_VQ_IO; i < VHOST_SCSI_MAX_VQ; i++) {
+ for (i = VHOST_SCSI_VQ_IO; i < vs->dev.nvqs; i++) {
vq = &vs->vqs[i].vq;
if (!vhost_vq_is_setup(vq))
continue;
@@ -1611,7 +1615,7 @@ vhost_scsi_set_endpoint(struct vhost_scsi *vs,
goto destroy_vq_cmds;
}
- for (i = 0; i < VHOST_SCSI_MAX_VQ; i++) {
+ for (i = 0; i < vs->dev.nvqs; i++) {
vq = &vs->vqs[i].vq;
mutex_lock(&vq->mutex);
vhost_vq_set_backend(vq, vs_tpg);
@@ -1713,7 +1717,7 @@ vhost_scsi_clear_endpoint(struct vhost_scsi *vs,
target_undepend_item(&se_tpg->tpg_group.cg_item);
}
if (match) {
- for (i = 0; i < VHOST_SCSI_MAX_VQ; i++) {
+ for (i = 0; i < vs->dev.nvqs; i++) {
vq = &vs->vqs[i].vq;
mutex_lock(&vq->mutex);
vhost_vq_set_backend(vq, NULL);
@@ -1722,7 +1726,7 @@ vhost_scsi_clear_endpoint(struct vhost_scsi *vs,
/* Make sure cmds are not running before tearing them down. */
vhost_scsi_flush(vs);
- for (i = 0; i < VHOST_SCSI_MAX_VQ; i++) {
+ for (i = 0; i < vs->dev.nvqs; i++) {
vq = &vs->vqs[i].vq;
vhost_scsi_destroy_vq_cmds(vq);
}
@@ -1762,7 +1766,7 @@ static int vhost_scsi_set_features(struct vhost_scsi *vs, u64 features)
return -EFAULT;
}
- for (i = 0; i < VHOST_SCSI_MAX_VQ; i++) {
+ for (i = 0; i < vs->dev.nvqs; i++) {
vq = &vs->vqs[i].vq;
mutex_lock(&vq->mutex);
vq->acked_features = features;
@@ -1776,16 +1780,40 @@ static int vhost_scsi_open(struct inode *inode, struct file *f)
{
struct vhost_scsi *vs;
struct vhost_virtqueue **vqs;
- int r = -ENOMEM, i;
+ int r = -ENOMEM, i, nvqs = vhost_scsi_max_io_vqs;
vs = kvzalloc(sizeof(*vs), GFP_KERNEL);
if (!vs)
goto err_vs;
- vqs = kmalloc_array(VHOST_SCSI_MAX_VQ, sizeof(*vqs), GFP_KERNEL);
- if (!vqs)
+ if (nvqs > VHOST_SCSI_MAX_IO_VQ) {
+ pr_err("Invalid max_io_vqs of %d. Using %d.\n", nvqs,
+ VHOST_SCSI_MAX_IO_VQ);
+ nvqs = VHOST_SCSI_MAX_IO_VQ;
+ } else if (nvqs == 0) {
+ pr_err("Invalid max_io_vqs of %d. Using 1.\n", nvqs);
+ nvqs = 1;
+ }
+ nvqs += VHOST_SCSI_VQ_IO;
+
+ vs->compl_bitmap = bitmap_alloc(nvqs, GFP_KERNEL);
+ if (!vs->compl_bitmap)
+ goto err_compl_bitmap;
+
+ vs->old_inflight = kmalloc_array(nvqs, sizeof(*vs->old_inflight),
+ GFP_KERNEL | __GFP_ZERO);
+ if (!vs->old_inflight)
+ goto err_inflight;
+
+ vs->vqs = kmalloc_array(nvqs, sizeof(*vs->vqs),
+ GFP_KERNEL | __GFP_ZERO);
+ if (!vs->vqs)
goto err_vqs;
+ vqs = kmalloc_array(nvqs, sizeof(*vqs), GFP_KERNEL);
+ if (!vqs)
+ goto err_local_vqs;
+
vhost_work_init(&vs->vs_completion_work, vhost_scsi_complete_cmd_work);
vhost_work_init(&vs->vs_event_work, vhost_scsi_evt_work);
@@ -1796,11 +1824,11 @@ static int vhost_scsi_open(struct inode *inode, struct file *f)
vqs[VHOST_SCSI_VQ_EVT] = &vs->vqs[VHOST_SCSI_VQ_EVT].vq;
vs->vqs[VHOST_SCSI_VQ_CTL].vq.handle_kick = vhost_scsi_ctl_handle_kick;
vs->vqs[VHOST_SCSI_VQ_EVT].vq.handle_kick = vhost_scsi_evt_handle_kick;
- for (i = VHOST_SCSI_VQ_IO; i < VHOST_SCSI_MAX_VQ; i++) {
+ for (i = VHOST_SCSI_VQ_IO; i < nvqs; i++) {
vqs[i] = &vs->vqs[i].vq;
vs->vqs[i].vq.handle_kick = vhost_scsi_handle_kick;
}
- vhost_dev_init(&vs->dev, vqs, VHOST_SCSI_MAX_VQ, UIO_MAXIOV,
+ vhost_dev_init(&vs->dev, vqs, nvqs, UIO_MAXIOV,
VHOST_SCSI_WEIGHT, 0, true, NULL);
vhost_scsi_init_inflight(vs, NULL);
@@ -1808,7 +1836,13 @@ static int vhost_scsi_open(struct inode *inode, struct file *f)
f->private_data = vs;
return 0;
+err_local_vqs:
+ kfree(vs->vqs);
err_vqs:
+ kfree(vs->old_inflight);
+err_inflight:
+ bitmap_free(vs->compl_bitmap);
+err_compl_bitmap:
kvfree(vs);
err_vs:
return r;
@@ -1826,6 +1860,9 @@ static int vhost_scsi_release(struct inode *inode, struct file *f)
vhost_dev_stop(&vs->dev);
vhost_dev_cleanup(&vs->dev);
kfree(vs->dev.vqs);
+ kfree(vs->vqs);
+ kfree(vs->old_inflight);
+ bitmap_free(vs->compl_bitmap);
kvfree(vs);
return 0;
}
diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c
index 23dcbfdfa13b..166044642fd5 100644
--- a/drivers/vhost/vdpa.c
+++ b/drivers/vhost/vdpa.c
@@ -347,6 +347,14 @@ static long vhost_vdpa_set_config(struct vhost_vdpa *v,
return 0;
}
+static bool vhost_vdpa_can_suspend(const struct vhost_vdpa *v)
+{
+ struct vdpa_device *vdpa = v->vdpa;
+ const struct vdpa_config_ops *ops = vdpa->config;
+
+ return ops->suspend;
+}
+
static long vhost_vdpa_get_features(struct vhost_vdpa *v, u64 __user *featurep)
{
struct vdpa_device *vdpa = v->vdpa;
@@ -470,6 +478,22 @@ static long vhost_vdpa_get_vqs_count(struct vhost_vdpa *v, u32 __user *argp)
return 0;
}
+/* After a successful return of ioctl the device must not process more
+ * virtqueue descriptors. The device can answer to read or writes of config
+ * fields as if it were not suspended. In particular, writing to "queue_enable"
+ * with a value of 1 will not make the device start processing buffers.
+ */
+static long vhost_vdpa_suspend(struct vhost_vdpa *v)
+{
+ struct vdpa_device *vdpa = v->vdpa;
+ const struct vdpa_config_ops *ops = vdpa->config;
+
+ if (!ops->suspend)
+ return -EOPNOTSUPP;
+
+ return ops->suspend(vdpa);
+}
+
static long vhost_vdpa_vring_ioctl(struct vhost_vdpa *v, unsigned int cmd,
void __user *argp)
{
@@ -577,7 +601,11 @@ static long vhost_vdpa_unlocked_ioctl(struct file *filep,
if (cmd == VHOST_SET_BACKEND_FEATURES) {
if (copy_from_user(&features, featurep, sizeof(features)))
return -EFAULT;
- if (features & ~VHOST_VDPA_BACKEND_FEATURES)
+ if (features & ~(VHOST_VDPA_BACKEND_FEATURES |
+ BIT_ULL(VHOST_BACKEND_F_SUSPEND)))
+ return -EOPNOTSUPP;
+ if ((features & BIT_ULL(VHOST_BACKEND_F_SUSPEND)) &&
+ !vhost_vdpa_can_suspend(v))
return -EOPNOTSUPP;
vhost_set_backend_features(&v->vdev, features);
return 0;
@@ -628,6 +656,8 @@ static long vhost_vdpa_unlocked_ioctl(struct file *filep,
break;
case VHOST_GET_BACKEND_FEATURES:
features = VHOST_VDPA_BACKEND_FEATURES;
+ if (vhost_vdpa_can_suspend(v))
+ features |= BIT_ULL(VHOST_BACKEND_F_SUSPEND);
if (copy_to_user(featurep, &features, sizeof(features)))
r = -EFAULT;
break;
@@ -640,6 +670,9 @@ static long vhost_vdpa_unlocked_ioctl(struct file *filep,
case VHOST_VDPA_GET_VQS_COUNT:
r = vhost_vdpa_get_vqs_count(v, argp);
break;
+ case VHOST_VDPA_SUSPEND:
+ r = vhost_vdpa_suspend(v);
+ break;
default:
r = vhost_dev_ioctl(&v->vdev, cmd, argp);
if (r == -ENOIOCTLCMD)
@@ -1076,7 +1109,7 @@ static int vhost_vdpa_alloc_domain(struct vhost_vdpa *v)
if (!bus)
return -EFAULT;
- if (!iommu_capable(bus, IOMMU_CAP_CACHE_COHERENCY))
+ if (!device_iommu_capable(dma_dev, IOMMU_CAP_CACHE_COHERENCY))
return -ENOTSUPP;
v->domain = iommu_domain_alloc(bus);
@@ -1363,6 +1396,7 @@ static int vhost_vdpa_probe(struct vdpa_device *vdpa)
err:
put_device(&v->dev);
+ ida_simple_remove(&vhost_vdpa_ida, v->minor);
return r;
}
diff --git a/drivers/vhost/vringh.c b/drivers/vhost/vringh.c
index eab55accf381..11f59dd06a74 100644
--- a/drivers/vhost/vringh.c
+++ b/drivers/vhost/vringh.c
@@ -1095,7 +1095,8 @@ EXPORT_SYMBOL(vringh_need_notify_kern);
#if IS_REACHABLE(CONFIG_VHOST_IOTLB)
static int iotlb_translate(const struct vringh *vrh,
- u64 addr, u64 len, struct bio_vec iov[],
+ u64 addr, u64 len, u64 *translated,
+ struct bio_vec iov[],
int iov_size, u32 perm)
{
struct vhost_iotlb_map *map;
@@ -1136,43 +1137,76 @@ static int iotlb_translate(const struct vringh *vrh,
spin_unlock(vrh->iotlb_lock);
+ if (translated)
+ *translated = min(len, s);
+
return ret;
}
static inline int copy_from_iotlb(const struct vringh *vrh, void *dst,
void *src, size_t len)
{
- struct iov_iter iter;
- struct bio_vec iov[16];
- int ret;
+ u64 total_translated = 0;
- ret = iotlb_translate(vrh, (u64)(uintptr_t)src,
- len, iov, 16, VHOST_MAP_RO);
- if (ret < 0)
- return ret;
+ while (total_translated < len) {
+ struct bio_vec iov[16];
+ struct iov_iter iter;
+ u64 translated;
+ int ret;
- iov_iter_bvec(&iter, READ, iov, ret, len);
+ ret = iotlb_translate(vrh, (u64)(uintptr_t)src,
+ len - total_translated, &translated,
+ iov, ARRAY_SIZE(iov), VHOST_MAP_RO);
+ if (ret == -ENOBUFS)
+ ret = ARRAY_SIZE(iov);
+ else if (ret < 0)
+ return ret;
- ret = copy_from_iter(dst, len, &iter);
+ iov_iter_bvec(&iter, READ, iov, ret, translated);
- return ret;
+ ret = copy_from_iter(dst, translated, &iter);
+ if (ret < 0)
+ return ret;
+
+ src += translated;
+ dst += translated;
+ total_translated += translated;
+ }
+
+ return total_translated;
}
static inline int copy_to_iotlb(const struct vringh *vrh, void *dst,
void *src, size_t len)
{
- struct iov_iter iter;
- struct bio_vec iov[16];
- int ret;
+ u64 total_translated = 0;
- ret = iotlb_translate(vrh, (u64)(uintptr_t)dst,
- len, iov, 16, VHOST_MAP_WO);
- if (ret < 0)
- return ret;
+ while (total_translated < len) {
+ struct bio_vec iov[16];
+ struct iov_iter iter;
+ u64 translated;
+ int ret;
+
+ ret = iotlb_translate(vrh, (u64)(uintptr_t)dst,
+ len - total_translated, &translated,
+ iov, ARRAY_SIZE(iov), VHOST_MAP_WO);
+ if (ret == -ENOBUFS)
+ ret = ARRAY_SIZE(iov);
+ else if (ret < 0)
+ return ret;
- iov_iter_bvec(&iter, WRITE, iov, ret, len);
+ iov_iter_bvec(&iter, WRITE, iov, ret, translated);
+
+ ret = copy_to_iter(src, translated, &iter);
+ if (ret < 0)
+ return ret;
+
+ src += translated;
+ dst += translated;
+ total_translated += translated;
+ }
- return copy_to_iter(src, len, &iter);
+ return total_translated;
}
static inline int getu16_iotlb(const struct vringh *vrh,
@@ -1183,7 +1217,7 @@ static inline int getu16_iotlb(const struct vringh *vrh,
int ret;
/* Atomic read is needed for getu16 */
- ret = iotlb_translate(vrh, (u64)(uintptr_t)p, sizeof(*p),
+ ret = iotlb_translate(vrh, (u64)(uintptr_t)p, sizeof(*p), NULL,
&iov, 1, VHOST_MAP_RO);
if (ret < 0)
return ret;
@@ -1204,7 +1238,7 @@ static inline int putu16_iotlb(const struct vringh *vrh,
int ret;
/* Atomic write is needed for putu16 */
- ret = iotlb_translate(vrh, (u64)(uintptr_t)p, sizeof(*p),
+ ret = iotlb_translate(vrh, (u64)(uintptr_t)p, sizeof(*p), NULL,
&iov, 1, VHOST_MAP_WO);
if (ret < 0)
return ret;