summaryrefslogtreecommitdiff
path: root/drivers/vhost
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2023-04-27 17:05:34 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2023-04-27 17:05:34 -0700
commit8ccd54fe45713cd458015b5b08d6098545e70543 (patch)
tree12a034b2ee42d681b2e915815c4529d6f246bcc4 /drivers/vhost
parent0835b5ee8704aef4e19b369237a762c52c7b6fb1 (diff)
parentc82729e06644f4e087f5ff0f91b8fb15e03b8890 (diff)
Merge tag 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost
Pull virtio updates from Michael Tsirkin: "virtio,vhost,vdpa: features, fixes, and cleanups: - reduction in interrupt rate in virtio - perf improvement for VDUSE - scalability for vhost-scsi - non power of 2 ring support for packed rings - better management for mlx5 vdpa - suspend for snet - VIRTIO_F_NOTIFICATION_DATA - shared backend with vdpa-sim-blk - user VA support in vdpa-sim - better struct packing for virtio and fixes, cleanups all over the place" * tag 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost: (52 commits) vhost_vdpa: fix unmap process in no-batch mode MAINTAINERS: make me a reviewer of VIRTIO CORE AND NET DRIVERS tools/virtio: fix build caused by virtio_ring changes virtio_ring: add a struct device forward declaration vdpa_sim_blk: support shared backend vdpa_sim: move buffer allocation in the devices vdpa/snet: use likely/unlikely macros in hot functions vdpa/snet: implement kick_vq_with_data callback virtio-vdpa: add VIRTIO_F_NOTIFICATION_DATA feature support virtio: add VIRTIO_F_NOTIFICATION_DATA feature support vdpa/snet: support the suspend vDPA callback vdpa/snet: support getting and setting VQ state MAINTAINERS: add vringh.h to Virtio Core and Net Drivers vringh: address kdoc warnings vdpa: address kdoc warnings virtio_ring: don't update event idx on get_buf vdpa_sim: add support for user VA vdpa_sim: replace the spinlock with a mutex to protect the state vdpa_sim: use kthread worker vdpa_sim: make devices agnostic for work management ...
Diffstat (limited to 'drivers/vhost')
-rw-r--r--drivers/vhost/scsi.c102
-rw-r--r--drivers/vhost/vdpa.c44
-rw-r--r--drivers/vhost/vhost.c6
-rw-r--r--drivers/vhost/vringh.c191
4 files changed, 245 insertions, 98 deletions
diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c
index 139ad52d0aa1..bb10fa4bb4f6 100644
--- a/drivers/vhost/scsi.c
+++ b/drivers/vhost/scsi.c
@@ -229,7 +229,10 @@ struct vhost_scsi_ctx {
struct iov_iter out_iter;
};
-/* Global spinlock to protect vhost_scsi TPG list for vhost IOCTL access */
+/*
+ * Global mutex to protect vhost_scsi TPG list for vhost IOCTLs and LIO
+ * configfs management operations.
+ */
static DEFINE_MUTEX(vhost_scsi_mutex);
static LIST_HEAD(vhost_scsi_list);
@@ -1501,7 +1504,7 @@ out:
* vhost_scsi_tpg with an active struct vhost_scsi_nexus
*
* The lock nesting rule is:
- * vhost_scsi_mutex -> vs->dev.mutex -> tpg->tv_tpg_mutex -> vq->mutex
+ * vs->dev.mutex -> vhost_scsi_mutex -> tpg->tv_tpg_mutex -> vq->mutex
*/
static int
vhost_scsi_set_endpoint(struct vhost_scsi *vs,
@@ -1515,7 +1518,6 @@ vhost_scsi_set_endpoint(struct vhost_scsi *vs,
int index, ret, i, len;
bool match = false;
- mutex_lock(&vhost_scsi_mutex);
mutex_lock(&vs->dev.mutex);
/* Verify that ring has been setup correctly. */
@@ -1536,6 +1538,7 @@ vhost_scsi_set_endpoint(struct vhost_scsi *vs,
if (vs->vs_tpg)
memcpy(vs_tpg, vs->vs_tpg, len);
+ mutex_lock(&vhost_scsi_mutex);
list_for_each_entry(tpg, &vhost_scsi_list, tv_tpg_list) {
mutex_lock(&tpg->tv_tpg_mutex);
if (!tpg->tpg_nexus) {
@@ -1551,6 +1554,7 @@ vhost_scsi_set_endpoint(struct vhost_scsi *vs,
if (!strcmp(tv_tport->tport_name, t->vhost_wwpn)) {
if (vs->vs_tpg && vs->vs_tpg[tpg->tport_tpgt]) {
mutex_unlock(&tpg->tv_tpg_mutex);
+ mutex_unlock(&vhost_scsi_mutex);
ret = -EEXIST;
goto undepend;
}
@@ -1565,6 +1569,7 @@ vhost_scsi_set_endpoint(struct vhost_scsi *vs,
if (ret) {
pr_warn("target_depend_item() failed: %d\n", ret);
mutex_unlock(&tpg->tv_tpg_mutex);
+ mutex_unlock(&vhost_scsi_mutex);
goto undepend;
}
tpg->tv_tpg_vhost_count++;
@@ -1574,6 +1579,7 @@ vhost_scsi_set_endpoint(struct vhost_scsi *vs,
}
mutex_unlock(&tpg->tv_tpg_mutex);
}
+ mutex_unlock(&vhost_scsi_mutex);
if (match) {
memcpy(vs->vs_vhost_wwpn, t->vhost_wwpn,
@@ -1629,7 +1635,6 @@ undepend:
kfree(vs_tpg);
out:
mutex_unlock(&vs->dev.mutex);
- mutex_unlock(&vhost_scsi_mutex);
return ret;
}
@@ -1645,7 +1650,6 @@ vhost_scsi_clear_endpoint(struct vhost_scsi *vs,
int index, ret, i;
u8 target;
- mutex_lock(&vhost_scsi_mutex);
mutex_lock(&vs->dev.mutex);
/* Verify that ring has been setup correctly. */
for (index = 0; index < vs->dev.nvqs; ++index) {
@@ -1666,11 +1670,10 @@ vhost_scsi_clear_endpoint(struct vhost_scsi *vs,
if (!tpg)
continue;
- mutex_lock(&tpg->tv_tpg_mutex);
tv_tport = tpg->tport;
if (!tv_tport) {
ret = -ENODEV;
- goto err_tpg;
+ goto err_dev;
}
if (strcmp(tv_tport->tport_name, t->vhost_wwpn)) {
@@ -1679,35 +1682,51 @@ vhost_scsi_clear_endpoint(struct vhost_scsi *vs,
tv_tport->tport_name, tpg->tport_tpgt,
t->vhost_wwpn, t->vhost_tpgt);
ret = -EINVAL;
- goto err_tpg;
+ goto err_dev;
}
+ match = true;
+ }
+ if (!match)
+ goto free_vs_tpg;
+
+ /* Prevent new cmds from starting and accessing the tpgs/sessions */
+ for (i = 0; i < vs->dev.nvqs; i++) {
+ vq = &vs->vqs[i].vq;
+ mutex_lock(&vq->mutex);
+ vhost_vq_set_backend(vq, NULL);
+ mutex_unlock(&vq->mutex);
+ }
+ /* Make sure cmds are not running before tearing them down. */
+ vhost_scsi_flush(vs);
+
+ for (i = 0; i < vs->dev.nvqs; i++) {
+ vq = &vs->vqs[i].vq;
+ vhost_scsi_destroy_vq_cmds(vq);
+ }
+
+ /*
+ * We can now release our hold on the tpg and sessions and userspace
+ * can free them after this point.
+ */
+ for (i = 0; i < VHOST_SCSI_MAX_TARGET; i++) {
+ target = i;
+ tpg = vs->vs_tpg[target];
+ if (!tpg)
+ continue;
+
+ mutex_lock(&tpg->tv_tpg_mutex);
+
tpg->tv_tpg_vhost_count--;
tpg->vhost_scsi = NULL;
vs->vs_tpg[target] = NULL;
- match = true;
+
mutex_unlock(&tpg->tv_tpg_mutex);
- /*
- * Release se_tpg->tpg_group.cg_item configfs dependency now
- * to allow vhost-scsi WWPN se_tpg->tpg_group shutdown to occur.
- */
+
se_tpg = &tpg->se_tpg;
target_undepend_item(&se_tpg->tpg_group.cg_item);
}
- if (match) {
- for (i = 0; i < vs->dev.nvqs; i++) {
- vq = &vs->vqs[i].vq;
- mutex_lock(&vq->mutex);
- vhost_vq_set_backend(vq, NULL);
- mutex_unlock(&vq->mutex);
- }
- /* Make sure cmds are not running before tearing them down. */
- vhost_scsi_flush(vs);
- for (i = 0; i < vs->dev.nvqs; i++) {
- vq = &vs->vqs[i].vq;
- vhost_scsi_destroy_vq_cmds(vq);
- }
- }
+free_vs_tpg:
/*
* Act as synchronize_rcu to make sure access to
* old vs->vs_tpg is finished.
@@ -1717,14 +1736,10 @@ vhost_scsi_clear_endpoint(struct vhost_scsi *vs,
vs->vs_tpg = NULL;
WARN_ON(vs->vs_events_nr);
mutex_unlock(&vs->dev.mutex);
- mutex_unlock(&vhost_scsi_mutex);
return 0;
-err_tpg:
- mutex_unlock(&tpg->tv_tpg_mutex);
err_dev:
mutex_unlock(&vs->dev.mutex);
- mutex_unlock(&vhost_scsi_mutex);
return ret;
}
@@ -1965,8 +1980,6 @@ vhost_scsi_do_plug(struct vhost_scsi_tpg *tpg,
if (!vs)
return;
- mutex_lock(&vs->dev.mutex);
-
if (plug)
reason = VIRTIO_SCSI_EVT_RESET_RESCAN;
else
@@ -1974,11 +1987,18 @@ vhost_scsi_do_plug(struct vhost_scsi_tpg *tpg,
vq = &vs->vqs[VHOST_SCSI_VQ_EVT].vq;
mutex_lock(&vq->mutex);
+ /*
+ * We can't queue events if the backend has been cleared, because
+ * we could end up queueing an event after the flush.
+ */
+ if (!vhost_vq_get_backend(vq))
+ goto unlock;
+
if (vhost_has_feature(vq, VIRTIO_SCSI_F_HOTPLUG))
vhost_scsi_send_evt(vs, tpg, lun,
VIRTIO_SCSI_T_TRANSPORT_RESET, reason);
+unlock:
mutex_unlock(&vq->mutex);
- mutex_unlock(&vs->dev.mutex);
}
static void vhost_scsi_hotplug(struct vhost_scsi_tpg *tpg, struct se_lun *lun)
@@ -1997,15 +2017,10 @@ static int vhost_scsi_port_link(struct se_portal_group *se_tpg,
struct vhost_scsi_tpg *tpg = container_of(se_tpg,
struct vhost_scsi_tpg, se_tpg);
- mutex_lock(&vhost_scsi_mutex);
-
mutex_lock(&tpg->tv_tpg_mutex);
tpg->tv_tpg_port_count++;
- mutex_unlock(&tpg->tv_tpg_mutex);
-
vhost_scsi_hotplug(tpg, lun);
-
- mutex_unlock(&vhost_scsi_mutex);
+ mutex_unlock(&tpg->tv_tpg_mutex);
return 0;
}
@@ -2016,15 +2031,10 @@ static void vhost_scsi_port_unlink(struct se_portal_group *se_tpg,
struct vhost_scsi_tpg *tpg = container_of(se_tpg,
struct vhost_scsi_tpg, se_tpg);
- mutex_lock(&vhost_scsi_mutex);
-
mutex_lock(&tpg->tv_tpg_mutex);
tpg->tv_tpg_port_count--;
- mutex_unlock(&tpg->tv_tpg_mutex);
-
vhost_scsi_hotunplug(tpg, lun);
-
- mutex_unlock(&vhost_scsi_mutex);
+ mutex_unlock(&tpg->tv_tpg_mutex);
}
static ssize_t vhost_scsi_tpg_attrib_fabric_prot_type_store(
diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c
index 135ad39958cc..8c1aefc865f0 100644
--- a/drivers/vhost/vdpa.c
+++ b/drivers/vhost/vdpa.c
@@ -219,6 +219,28 @@ static int vhost_vdpa_reset(struct vhost_vdpa *v)
return vdpa_reset(vdpa);
}
+static long vhost_vdpa_bind_mm(struct vhost_vdpa *v)
+{
+ struct vdpa_device *vdpa = v->vdpa;
+ const struct vdpa_config_ops *ops = vdpa->config;
+
+ if (!vdpa->use_va || !ops->bind_mm)
+ return 0;
+
+ return ops->bind_mm(vdpa, v->vdev.mm);
+}
+
+static void vhost_vdpa_unbind_mm(struct vhost_vdpa *v)
+{
+ struct vdpa_device *vdpa = v->vdpa;
+ const struct vdpa_config_ops *ops = vdpa->config;
+
+ if (!vdpa->use_va || !ops->unbind_mm)
+ return;
+
+ ops->unbind_mm(vdpa);
+}
+
static long vhost_vdpa_get_device_id(struct vhost_vdpa *v, u8 __user *argp)
{
struct vdpa_device *vdpa = v->vdpa;
@@ -599,9 +621,11 @@ static long vhost_vdpa_vring_ioctl(struct vhost_vdpa *v, unsigned int cmd,
if (vq->call_ctx.ctx) {
cb.callback = vhost_vdpa_virtqueue_cb;
cb.private = vq;
+ cb.trigger = vq->call_ctx.ctx;
} else {
cb.callback = NULL;
cb.private = NULL;
+ cb.trigger = NULL;
}
ops->set_vq_cb(vdpa, idx, &cb);
vhost_vdpa_setup_vq_irq(v, idx);
@@ -716,6 +740,17 @@ static long vhost_vdpa_unlocked_ioctl(struct file *filep,
break;
}
+ if (r)
+ goto out;
+
+ switch (cmd) {
+ case VHOST_SET_OWNER:
+ r = vhost_vdpa_bind_mm(v);
+ if (r)
+ vhost_dev_reset_owner(d, NULL);
+ break;
+ }
+out:
mutex_unlock(&d->mutex);
return r;
}
@@ -851,11 +886,7 @@ static void vhost_vdpa_unmap(struct vhost_vdpa *v,
if (!v->in_batch)
ops->set_map(vdpa, asid, iotlb);
}
- /* If we are in the middle of batch processing, delay the free
- * of AS until BATCH_END.
- */
- if (!v->in_batch && !iotlb->nmaps)
- vhost_vdpa_remove_as(v, asid);
+
}
static int vhost_vdpa_va_map(struct vhost_vdpa *v,
@@ -1112,8 +1143,6 @@ static int vhost_vdpa_process_iotlb_msg(struct vhost_dev *dev, u32 asid,
if (v->in_batch && ops->set_map)
ops->set_map(vdpa, asid, iotlb);
v->in_batch = false;
- if (!iotlb->nmaps)
- vhost_vdpa_remove_as(v, asid);
break;
default:
r = -EINVAL;
@@ -1287,6 +1316,7 @@ static int vhost_vdpa_release(struct inode *inode, struct file *filep)
vhost_vdpa_clean_irq(v);
vhost_vdpa_reset(v);
vhost_dev_stop(&v->vdev);
+ vhost_vdpa_unbind_mm(v);
vhost_vdpa_config_put(v);
vhost_vdpa_cleanup(v);
mutex_unlock(&d->mutex);
diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
index 6d07b42833be..10bf35a3db6e 100644
--- a/drivers/vhost/vhost.c
+++ b/drivers/vhost/vhost.c
@@ -434,8 +434,7 @@ static size_t vhost_get_avail_size(struct vhost_virtqueue *vq,
size_t event __maybe_unused =
vhost_has_feature(vq, VIRTIO_RING_F_EVENT_IDX) ? 2 : 0;
- return sizeof(*vq->avail) +
- sizeof(*vq->avail->ring) * num + event;
+ return size_add(struct_size(vq->avail, ring, num), event);
}
static size_t vhost_get_used_size(struct vhost_virtqueue *vq,
@@ -444,8 +443,7 @@ static size_t vhost_get_used_size(struct vhost_virtqueue *vq,
size_t event __maybe_unused =
vhost_has_feature(vq, VIRTIO_RING_F_EVENT_IDX) ? 2 : 0;
- return sizeof(*vq->used) +
- sizeof(*vq->used->ring) * num + event;
+ return size_add(struct_size(vq->used, ring, num), event);
}
static size_t vhost_get_desc_size(struct vhost_virtqueue *vq,
diff --git a/drivers/vhost/vringh.c b/drivers/vhost/vringh.c
index a1e27da54481..955d938eb663 100644
--- a/drivers/vhost/vringh.c
+++ b/drivers/vhost/vringh.c
@@ -636,9 +636,9 @@ static inline int xfer_to_user(const struct vringh *vrh,
* @features: the feature bits for this ring.
* @num: the number of elements.
* @weak_barriers: true if we only need memory barriers, not I/O.
- * @desc: the userpace descriptor pointer.
- * @avail: the userpace avail pointer.
- * @used: the userpace used pointer.
+ * @desc: the userspace descriptor pointer.
+ * @avail: the userspace avail pointer.
+ * @used: the userspace used pointer.
*
* Returns an error if num is invalid: you should check pointers
* yourself!
@@ -911,9 +911,9 @@ static inline int kern_xfer(const struct vringh *vrh, void *dst,
* @features: the feature bits for this ring.
* @num: the number of elements.
* @weak_barriers: true if we only need memory barriers, not I/O.
- * @desc: the userpace descriptor pointer.
- * @avail: the userpace avail pointer.
- * @used: the userpace used pointer.
+ * @desc: the userspace descriptor pointer.
+ * @avail: the userspace avail pointer.
+ * @used: the userspace used pointer.
*
* Returns an error if num is invalid.
*/
@@ -1094,10 +1094,17 @@ EXPORT_SYMBOL(vringh_need_notify_kern);
#if IS_REACHABLE(CONFIG_VHOST_IOTLB)
+struct iotlb_vec {
+ union {
+ struct iovec *iovec;
+ struct bio_vec *bvec;
+ } iov;
+ size_t count;
+};
+
static int iotlb_translate(const struct vringh *vrh,
u64 addr, u64 len, u64 *translated,
- struct bio_vec iov[],
- int iov_size, u32 perm)
+ struct iotlb_vec *ivec, u32 perm)
{
struct vhost_iotlb_map *map;
struct vhost_iotlb *iotlb = vrh->iotlb;
@@ -1107,9 +1114,11 @@ static int iotlb_translate(const struct vringh *vrh,
spin_lock(vrh->iotlb_lock);
while (len > s) {
- u64 size, pa, pfn;
+ uintptr_t io_addr;
+ size_t io_len;
+ u64 size;
- if (unlikely(ret >= iov_size)) {
+ if (unlikely(ret >= ivec->count)) {
ret = -ENOBUFS;
break;
}
@@ -1124,10 +1133,22 @@ static int iotlb_translate(const struct vringh *vrh,
}
size = map->size - addr + map->start;
- pa = map->addr + addr - map->start;
- pfn = pa >> PAGE_SHIFT;
- bvec_set_page(&iov[ret], pfn_to_page(pfn), min(len - s, size),
- pa & (PAGE_SIZE - 1));
+ io_len = min(len - s, size);
+ io_addr = map->addr - map->start + addr;
+
+ if (vrh->use_va) {
+ struct iovec *iovec = ivec->iov.iovec;
+
+ iovec[ret].iov_len = io_len;
+ iovec[ret].iov_base = (void __user *)io_addr;
+ } else {
+ u64 pfn = io_addr >> PAGE_SHIFT;
+ struct bio_vec *bvec = ivec->iov.bvec;
+
+ bvec_set_page(&bvec[ret], pfn_to_page(pfn), io_len,
+ io_addr & (PAGE_SIZE - 1));
+ }
+
s += size;
addr += size;
++ret;
@@ -1141,26 +1162,41 @@ static int iotlb_translate(const struct vringh *vrh,
return ret;
}
+#define IOTLB_IOV_STRIDE 16
+
static inline int copy_from_iotlb(const struct vringh *vrh, void *dst,
void *src, size_t len)
{
+ struct iotlb_vec ivec;
+ union {
+ struct iovec iovec[IOTLB_IOV_STRIDE];
+ struct bio_vec bvec[IOTLB_IOV_STRIDE];
+ } iov;
u64 total_translated = 0;
+ ivec.iov.iovec = iov.iovec;
+ ivec.count = IOTLB_IOV_STRIDE;
+
while (total_translated < len) {
- struct bio_vec iov[16];
struct iov_iter iter;
u64 translated;
int ret;
ret = iotlb_translate(vrh, (u64)(uintptr_t)src,
len - total_translated, &translated,
- iov, ARRAY_SIZE(iov), VHOST_MAP_RO);
+ &ivec, VHOST_MAP_RO);
if (ret == -ENOBUFS)
- ret = ARRAY_SIZE(iov);
+ ret = IOTLB_IOV_STRIDE;
else if (ret < 0)
return ret;
- iov_iter_bvec(&iter, ITER_SOURCE, iov, ret, translated);
+ if (vrh->use_va) {
+ iov_iter_init(&iter, ITER_SOURCE, ivec.iov.iovec, ret,
+ translated);
+ } else {
+ iov_iter_bvec(&iter, ITER_SOURCE, ivec.iov.bvec, ret,
+ translated);
+ }
ret = copy_from_iter(dst, translated, &iter);
if (ret < 0)
@@ -1177,23 +1213,36 @@ static inline int copy_from_iotlb(const struct vringh *vrh, void *dst,
static inline int copy_to_iotlb(const struct vringh *vrh, void *dst,
void *src, size_t len)
{
+ struct iotlb_vec ivec;
+ union {
+ struct iovec iovec[IOTLB_IOV_STRIDE];
+ struct bio_vec bvec[IOTLB_IOV_STRIDE];
+ } iov;
u64 total_translated = 0;
+ ivec.iov.iovec = iov.iovec;
+ ivec.count = IOTLB_IOV_STRIDE;
+
while (total_translated < len) {
- struct bio_vec iov[16];
struct iov_iter iter;
u64 translated;
int ret;
ret = iotlb_translate(vrh, (u64)(uintptr_t)dst,
len - total_translated, &translated,
- iov, ARRAY_SIZE(iov), VHOST_MAP_WO);
+ &ivec, VHOST_MAP_WO);
if (ret == -ENOBUFS)
- ret = ARRAY_SIZE(iov);
+ ret = IOTLB_IOV_STRIDE;
else if (ret < 0)
return ret;
- iov_iter_bvec(&iter, ITER_DEST, iov, ret, translated);
+ if (vrh->use_va) {
+ iov_iter_init(&iter, ITER_DEST, ivec.iov.iovec, ret,
+ translated);
+ } else {
+ iov_iter_bvec(&iter, ITER_DEST, ivec.iov.bvec, ret,
+ translated);
+ }
ret = copy_to_iter(src, translated, &iter);
if (ret < 0)
@@ -1210,20 +1259,36 @@ static inline int copy_to_iotlb(const struct vringh *vrh, void *dst,
static inline int getu16_iotlb(const struct vringh *vrh,
u16 *val, const __virtio16 *p)
{
- struct bio_vec iov;
- void *kaddr, *from;
+ struct iotlb_vec ivec;
+ union {
+ struct iovec iovec[1];
+ struct bio_vec bvec[1];
+ } iov;
+ __virtio16 tmp;
int ret;
+ ivec.iov.iovec = iov.iovec;
+ ivec.count = 1;
+
/* Atomic read is needed for getu16 */
- ret = iotlb_translate(vrh, (u64)(uintptr_t)p, sizeof(*p), NULL,
- &iov, 1, VHOST_MAP_RO);
+ ret = iotlb_translate(vrh, (u64)(uintptr_t)p, sizeof(*p),
+ NULL, &ivec, VHOST_MAP_RO);
if (ret < 0)
return ret;
- kaddr = kmap_atomic(iov.bv_page);
- from = kaddr + iov.bv_offset;
- *val = vringh16_to_cpu(vrh, READ_ONCE(*(__virtio16 *)from));
- kunmap_atomic(kaddr);
+ if (vrh->use_va) {
+ ret = __get_user(tmp, (__virtio16 __user *)ivec.iov.iovec[0].iov_base);
+ if (ret)
+ return ret;
+ } else {
+ void *kaddr = kmap_local_page(ivec.iov.bvec[0].bv_page);
+ void *from = kaddr + ivec.iov.bvec[0].bv_offset;
+
+ tmp = READ_ONCE(*(__virtio16 *)from);
+ kunmap_local(kaddr);
+ }
+
+ *val = vringh16_to_cpu(vrh, tmp);
return 0;
}
@@ -1231,20 +1296,36 @@ static inline int getu16_iotlb(const struct vringh *vrh,
static inline int putu16_iotlb(const struct vringh *vrh,
__virtio16 *p, u16 val)
{
- struct bio_vec iov;
- void *kaddr, *to;
+ struct iotlb_vec ivec;
+ union {
+ struct iovec iovec;
+ struct bio_vec bvec;
+ } iov;
+ __virtio16 tmp;
int ret;
+ ivec.iov.iovec = &iov.iovec;
+ ivec.count = 1;
+
/* Atomic write is needed for putu16 */
- ret = iotlb_translate(vrh, (u64)(uintptr_t)p, sizeof(*p), NULL,
- &iov, 1, VHOST_MAP_WO);
+ ret = iotlb_translate(vrh, (u64)(uintptr_t)p, sizeof(*p),
+ NULL, &ivec, VHOST_MAP_RO);
if (ret < 0)
return ret;
- kaddr = kmap_atomic(iov.bv_page);
- to = kaddr + iov.bv_offset;
- WRITE_ONCE(*(__virtio16 *)to, cpu_to_vringh16(vrh, val));
- kunmap_atomic(kaddr);
+ tmp = cpu_to_vringh16(vrh, val);
+
+ if (vrh->use_va) {
+ ret = __put_user(tmp, (__virtio16 __user *)ivec.iov.iovec[0].iov_base);
+ if (ret)
+ return ret;
+ } else {
+ void *kaddr = kmap_local_page(ivec.iov.bvec[0].bv_page);
+ void *to = kaddr + ivec.iov.bvec[0].bv_offset;
+
+ WRITE_ONCE(*(__virtio16 *)to, tmp);
+ kunmap_local(kaddr);
+ }
return 0;
}
@@ -1306,9 +1387,9 @@ static inline int putused_iotlb(const struct vringh *vrh,
* @features: the feature bits for this ring.
* @num: the number of elements.
* @weak_barriers: true if we only need memory barriers, not I/O.
- * @desc: the userpace descriptor pointer.
- * @avail: the userpace avail pointer.
- * @used: the userpace used pointer.
+ * @desc: the userspace descriptor pointer.
+ * @avail: the userspace avail pointer.
+ * @used: the userspace used pointer.
*
* Returns an error if num is invalid.
*/
@@ -1318,12 +1399,40 @@ int vringh_init_iotlb(struct vringh *vrh, u64 features,
struct vring_avail *avail,
struct vring_used *used)
{
+ vrh->use_va = false;
+
return vringh_init_kern(vrh, features, num, weak_barriers,
desc, avail, used);
}
EXPORT_SYMBOL(vringh_init_iotlb);
/**
+ * vringh_init_iotlb_va - initialize a vringh for a ring with IOTLB containing
+ * user VA.
+ * @vrh: the vringh to initialize.
+ * @features: the feature bits for this ring.
+ * @num: the number of elements.
+ * @weak_barriers: true if we only need memory barriers, not I/O.
+ * @desc: the userspace descriptor pointer.
+ * @avail: the userspace avail pointer.
+ * @used: the userspace used pointer.
+ *
+ * Returns an error if num is invalid.
+ */
+int vringh_init_iotlb_va(struct vringh *vrh, u64 features,
+ unsigned int num, bool weak_barriers,
+ struct vring_desc *desc,
+ struct vring_avail *avail,
+ struct vring_used *used)
+{
+ vrh->use_va = true;
+
+ return vringh_init_kern(vrh, features, num, weak_barriers,
+ desc, avail, used);
+}
+EXPORT_SYMBOL(vringh_init_iotlb_va);
+
+/**
* vringh_set_iotlb - initialize a vringh for a ring with IOTLB.
* @vrh: the vring
* @iotlb: iotlb associated with this vring