diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2020-06-10 13:42:09 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2020-06-10 13:42:09 -0700 |
commit | 09102704c67457c6cdea6c0394c34843484a852c (patch) | |
tree | c4faeb56ff6c94d50b56da17969b88b27c19eae5 /drivers/vhost | |
parent | 84fc461db99b2dc19e019c0a97725a3653687981 (diff) | |
parent | 044e4b09223039e571e6ec540e25552054208765 (diff) |
Merge tag 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost
Pull virtio updates from Michael Tsirkin:
- virtio-mem: paravirtualized memory hotplug
- support doorbell mapping for vdpa
- config interrupt support in ifc
- fixes all over the place
* tag 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost: (40 commits)
vhost/test: fix up after API change
virtio_mem: convert device block size into 64bit
virtio-mem: drop unnecessary initialization
ifcvf: implement config interrupt in IFCVF
vhost: replace -1 with VHOST_FILE_UNBIND in ioctls
vhost_vdpa: Support config interrupt in vdpa
ifcvf: ignore continuous setting same status value
virtio-mem: Don't rely on implicit compiler padding for requests
virtio-mem: Try to unplug the complete online memory block first
virtio-mem: Use -ETXTBSY as error code if the device is busy
virtio-mem: Unplug subblocks right-to-left
virtio-mem: Drop manual check for already present memory
virtio-mem: Add parent resource for all added "System RAM"
virtio-mem: Better retry handling
virtio-mem: Offline and remove completely unplugged memory blocks
mm/memory_hotplug: Introduce offline_and_remove_memory()
virtio-mem: Allow to offline partially unplugged memory blocks
mm: Allow to offline unmovable PageOffline() pages via MEM_GOING_OFFLINE
virtio-mem: Paravirtualized memory hotunplug part 2
virtio-mem: Paravirtualized memory hotunplug part 1
...
Diffstat (limited to 'drivers/vhost')
-rw-r--r-- | drivers/vhost/Kconfig | 17 | ||||
-rw-r--r-- | drivers/vhost/net.c | 2 | ||||
-rw-r--r-- | drivers/vhost/scsi.c | 2 | ||||
-rw-r--r-- | drivers/vhost/test.c | 2 | ||||
-rw-r--r-- | drivers/vhost/vdpa.c | 112 | ||||
-rw-r--r-- | drivers/vhost/vhost.c | 98 | ||||
-rw-r--r-- | drivers/vhost/vhost.h | 8 | ||||
-rw-r--r-- | drivers/vhost/vringh.c | 6 | ||||
-rw-r--r-- | drivers/vhost/vsock.c | 2 |
9 files changed, 195 insertions, 54 deletions
diff --git a/drivers/vhost/Kconfig b/drivers/vhost/Kconfig index c4f273793595..2c75d164b827 100644 --- a/drivers/vhost/Kconfig +++ b/drivers/vhost/Kconfig @@ -13,15 +13,6 @@ config VHOST_RING This option is selected by any driver which needs to access the host side of a virtio ring. -config VHOST_DPN - bool - depends on !ARM || AEABI - default y - help - Anything selecting VHOST or VHOST_RING must depend on VHOST_DPN. - This excludes the deprecated ARM ABI since that forces a 4 byte - alignment on all structs - incompatible with virtio spec requirements. - config VHOST tristate select VHOST_IOTLB @@ -37,7 +28,7 @@ if VHOST_MENU config VHOST_NET tristate "Host kernel accelerator for virtio net" - depends on NET && EVENTFD && (TUN || !TUN) && (TAP || !TAP) && VHOST_DPN + depends on NET && EVENTFD && (TUN || !TUN) && (TAP || !TAP) select VHOST ---help--- This kernel module can be loaded in host kernel to accelerate @@ -49,7 +40,7 @@ config VHOST_NET config VHOST_SCSI tristate "VHOST_SCSI TCM fabric driver" - depends on TARGET_CORE && EVENTFD && VHOST_DPN + depends on TARGET_CORE && EVENTFD select VHOST default n ---help--- @@ -58,7 +49,7 @@ config VHOST_SCSI config VHOST_VSOCK tristate "vhost virtio-vsock driver" - depends on VSOCKETS && EVENTFD && VHOST_DPN + depends on VSOCKETS && EVENTFD select VHOST select VIRTIO_VSOCKETS_COMMON default n @@ -72,7 +63,7 @@ config VHOST_VSOCK config VHOST_VDPA tristate "Vhost driver for vDPA-based backend" - depends on EVENTFD && VHOST_DPN + depends on EVENTFD select VHOST depends on VDPA help diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index 516519dcc8ff..e992decfec53 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c @@ -1327,7 +1327,7 @@ static int vhost_net_open(struct inode *inode, struct file *f) } vhost_dev_init(dev, vqs, VHOST_NET_VQ_MAX, UIO_MAXIOV + VHOST_NET_BATCH, - VHOST_NET_PKT_WEIGHT, VHOST_NET_WEIGHT, + VHOST_NET_PKT_WEIGHT, VHOST_NET_WEIGHT, true, NULL); vhost_poll_init(n->poll + VHOST_NET_VQ_TX, handle_tx_net, EPOLLOUT, dev); diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c index 8b104f76f324..6fb4d7ecfa19 100644 --- a/drivers/vhost/scsi.c +++ b/drivers/vhost/scsi.c @@ -1628,7 +1628,7 @@ static int vhost_scsi_open(struct inode *inode, struct file *f) vs->vqs[i].vq.handle_kick = vhost_scsi_handle_kick; } vhost_dev_init(&vs->dev, vqs, VHOST_SCSI_MAX_VQ, UIO_MAXIOV, - VHOST_SCSI_WEIGHT, 0, NULL); + VHOST_SCSI_WEIGHT, 0, true, NULL); vhost_scsi_init_inflight(vs, NULL); diff --git a/drivers/vhost/test.c b/drivers/vhost/test.c index 9a3a09005e03..0466921f4772 100644 --- a/drivers/vhost/test.c +++ b/drivers/vhost/test.c @@ -120,7 +120,7 @@ static int vhost_test_open(struct inode *inode, struct file *f) vqs[VHOST_TEST_VQ] = &n->vqs[VHOST_TEST_VQ]; n->vqs[VHOST_TEST_VQ].handle_kick = handle_vq_kick; vhost_dev_init(dev, vqs, VHOST_TEST_VQ_MAX, UIO_MAXIOV, - VHOST_TEST_PKT_WEIGHT, VHOST_TEST_WEIGHT, NULL); + VHOST_TEST_PKT_WEIGHT, VHOST_TEST_WEIGHT, true, NULL); f->private_data = n; diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c index 40e2a5747c98..7580e34f76c1 100644 --- a/drivers/vhost/vdpa.c +++ b/drivers/vhost/vdpa.c @@ -15,12 +15,14 @@ #include <linux/module.h> #include <linux/cdev.h> #include <linux/device.h> +#include <linux/mm.h> #include <linux/iommu.h> #include <linux/uuid.h> #include <linux/vdpa.h> #include <linux/nospec.h> #include <linux/vhost.h> #include <linux/virtio_net.h> +#include <linux/kernel.h> #include "vhost.h" @@ -70,6 +72,7 @@ struct vhost_vdpa { int nvqs; int virtio_id; int minor; + struct eventfd_ctx *config_ctx; }; static DEFINE_IDA(vhost_vdpa_ida); @@ -101,6 +104,17 @@ static irqreturn_t vhost_vdpa_virtqueue_cb(void *private) return IRQ_HANDLED; } +static irqreturn_t vhost_vdpa_config_cb(void *private) +{ + struct vhost_vdpa *v = private; + struct eventfd_ctx *config_ctx = v->config_ctx; + + if (config_ctx) + eventfd_signal(config_ctx, 1); + + return IRQ_HANDLED; +} + static void vhost_vdpa_reset(struct vhost_vdpa *v) { struct vdpa_device *vdpa = v->vdpa; @@ -288,6 +302,36 @@ static long vhost_vdpa_get_vring_num(struct vhost_vdpa *v, u16 __user *argp) return 0; } +static void vhost_vdpa_config_put(struct vhost_vdpa *v) +{ + if (v->config_ctx) + eventfd_ctx_put(v->config_ctx); +} + +static long vhost_vdpa_set_config_call(struct vhost_vdpa *v, u32 __user *argp) +{ + struct vdpa_callback cb; + int fd; + struct eventfd_ctx *ctx; + + cb.callback = vhost_vdpa_config_cb; + cb.private = v->vdpa; + if (copy_from_user(&fd, argp, sizeof(fd))) + return -EFAULT; + + ctx = fd == VHOST_FILE_UNBIND ? NULL : eventfd_ctx_fdget(fd); + swap(ctx, v->config_ctx); + + if (!IS_ERR_OR_NULL(ctx)) + eventfd_ctx_put(ctx); + + if (IS_ERR(v->config_ctx)) + return PTR_ERR(v->config_ctx); + + v->vdpa->config->set_config_cb(v->vdpa, &cb); + + return 0; +} static long vhost_vdpa_vring_ioctl(struct vhost_vdpa *v, unsigned int cmd, void __user *argp) { @@ -395,6 +439,9 @@ static long vhost_vdpa_unlocked_ioctl(struct file *filep, case VHOST_SET_LOG_FD: r = -ENOIOCTLCMD; break; + case VHOST_VDPA_SET_CONFIG_CALL: + r = vhost_vdpa_set_config_call(v, argp); + break; default: r = vhost_dev_ioctl(&v->vdev, cmd, argp); if (r == -ENOIOCTLCMD) @@ -694,7 +741,7 @@ static int vhost_vdpa_open(struct inode *inode, struct file *filep) vqs[i] = &v->vqs[i]; vqs[i]->handle_kick = handle_vq_kick; } - vhost_dev_init(dev, vqs, nvqs, 0, 0, 0, + vhost_dev_init(dev, vqs, nvqs, 0, 0, 0, false, vhost_vdpa_process_iotlb_msg); dev->iotlb = vhost_iotlb_alloc(0, 0); @@ -729,6 +776,7 @@ static int vhost_vdpa_release(struct inode *inode, struct file *filep) vhost_dev_stop(&v->vdev); vhost_vdpa_iotlb_free(v); vhost_vdpa_free_domain(v); + vhost_vdpa_config_put(v); vhost_dev_cleanup(&v->vdev); kfree(v->vdev.vqs); mutex_unlock(&d->mutex); @@ -739,12 +787,74 @@ static int vhost_vdpa_release(struct inode *inode, struct file *filep) return 0; } +#ifdef CONFIG_MMU +static vm_fault_t vhost_vdpa_fault(struct vm_fault *vmf) +{ + struct vhost_vdpa *v = vmf->vma->vm_file->private_data; + struct vdpa_device *vdpa = v->vdpa; + const struct vdpa_config_ops *ops = vdpa->config; + struct vdpa_notification_area notify; + struct vm_area_struct *vma = vmf->vma; + u16 index = vma->vm_pgoff; + + notify = ops->get_vq_notification(vdpa, index); + + vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); + if (remap_pfn_range(vma, vmf->address & PAGE_MASK, + notify.addr >> PAGE_SHIFT, PAGE_SIZE, + vma->vm_page_prot)) + return VM_FAULT_SIGBUS; + + return VM_FAULT_NOPAGE; +} + +static const struct vm_operations_struct vhost_vdpa_vm_ops = { + .fault = vhost_vdpa_fault, +}; + +static int vhost_vdpa_mmap(struct file *file, struct vm_area_struct *vma) +{ + struct vhost_vdpa *v = vma->vm_file->private_data; + struct vdpa_device *vdpa = v->vdpa; + const struct vdpa_config_ops *ops = vdpa->config; + struct vdpa_notification_area notify; + int index = vma->vm_pgoff; + + if (vma->vm_end - vma->vm_start != PAGE_SIZE) + return -EINVAL; + if ((vma->vm_flags & VM_SHARED) == 0) + return -EINVAL; + if (vma->vm_flags & VM_READ) + return -EINVAL; + if (index > 65535) + return -EINVAL; + if (!ops->get_vq_notification) + return -ENOTSUPP; + + /* To be safe and easily modelled by userspace, We only + * support the doorbell which sits on the page boundary and + * does not share the page with other registers. + */ + notify = ops->get_vq_notification(vdpa, index); + if (notify.addr & (PAGE_SIZE - 1)) + return -EINVAL; + if (vma->vm_end - vma->vm_start != notify.size) + return -ENOTSUPP; + + vma->vm_ops = &vhost_vdpa_vm_ops; + return 0; +} +#endif /* CONFIG_MMU */ + static const struct file_operations vhost_vdpa_fops = { .owner = THIS_MODULE, .open = vhost_vdpa_open, .release = vhost_vdpa_release, .write_iter = vhost_vdpa_chr_write_iter, .unlocked_ioctl = vhost_vdpa_unlocked_ioctl, +#ifdef CONFIG_MMU + .mmap = vhost_vdpa_mmap, +#endif /* CONFIG_MMU */ .compat_ioctl = compat_ptr_ioctl, }; diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index 596132a96cd5..062595ee1f83 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -166,11 +166,16 @@ static int vhost_poll_wakeup(wait_queue_entry_t *wait, unsigned mode, int sync, void *key) { struct vhost_poll *poll = container_of(wait, struct vhost_poll, wait); + struct vhost_work *work = &poll->work; if (!(key_to_poll(key) & poll->mask)) return 0; - vhost_poll_queue(poll); + if (!poll->dev->use_worker) + work->fn(work); + else + vhost_poll_queue(poll); + return 0; } @@ -454,6 +459,7 @@ static size_t vhost_get_desc_size(struct vhost_virtqueue *vq, void vhost_dev_init(struct vhost_dev *dev, struct vhost_virtqueue **vqs, int nvqs, int iov_limit, int weight, int byte_weight, + bool use_worker, int (*msg_handler)(struct vhost_dev *dev, struct vhost_iotlb_msg *msg)) { @@ -471,6 +477,7 @@ void vhost_dev_init(struct vhost_dev *dev, dev->iov_limit = iov_limit; dev->weight = weight; dev->byte_weight = byte_weight; + dev->use_worker = use_worker; dev->msg_handler = msg_handler; init_llist_head(&dev->work_list); init_waitqueue_head(&dev->wait); @@ -534,6 +541,36 @@ bool vhost_dev_has_owner(struct vhost_dev *dev) } EXPORT_SYMBOL_GPL(vhost_dev_has_owner); +static void vhost_attach_mm(struct vhost_dev *dev) +{ + /* No owner, become one */ + if (dev->use_worker) { + dev->mm = get_task_mm(current); + } else { + /* vDPA device does not use worker thead, so there's + * no need to hold the address space for mm. This help + * to avoid deadlock in the case of mmap() which may + * held the refcnt of the file and depends on release + * method to remove vma. + */ + dev->mm = current->mm; + mmgrab(dev->mm); + } +} + +static void vhost_detach_mm(struct vhost_dev *dev) +{ + if (!dev->mm) + return; + + if (dev->use_worker) + mmput(dev->mm); + else + mmdrop(dev->mm); + + dev->mm = NULL; +} + /* Caller should have device mutex */ long vhost_dev_set_owner(struct vhost_dev *dev) { @@ -546,21 +583,24 @@ long vhost_dev_set_owner(struct vhost_dev *dev) goto err_mm; } - /* No owner, become one */ - dev->mm = get_task_mm(current); + vhost_attach_mm(dev); + dev->kcov_handle = kcov_common_handle(); - worker = kthread_create(vhost_worker, dev, "vhost-%d", current->pid); - if (IS_ERR(worker)) { - err = PTR_ERR(worker); - goto err_worker; - } + if (dev->use_worker) { + worker = kthread_create(vhost_worker, dev, + "vhost-%d", current->pid); + if (IS_ERR(worker)) { + err = PTR_ERR(worker); + goto err_worker; + } - dev->worker = worker; - wake_up_process(worker); /* avoid contributing to loadavg */ + dev->worker = worker; + wake_up_process(worker); /* avoid contributing to loadavg */ - err = vhost_attach_cgroups(dev); - if (err) - goto err_cgroup; + err = vhost_attach_cgroups(dev); + if (err) + goto err_cgroup; + } err = vhost_dev_alloc_iovecs(dev); if (err) @@ -568,12 +608,12 @@ long vhost_dev_set_owner(struct vhost_dev *dev) return 0; err_cgroup: - kthread_stop(worker); - dev->worker = NULL; + if (dev->worker) { + kthread_stop(dev->worker); + dev->worker = NULL; + } err_worker: - if (dev->mm) - mmput(dev->mm); - dev->mm = NULL; + vhost_detach_mm(dev); dev->kcov_handle = 0; err_mm: return err; @@ -670,9 +710,7 @@ void vhost_dev_cleanup(struct vhost_dev *dev) dev->worker = NULL; dev->kcov_handle = 0; } - if (dev->mm) - mmput(dev->mm); - dev->mm = NULL; + vhost_detach_mm(dev); } EXPORT_SYMBOL_GPL(vhost_dev_cleanup); @@ -882,7 +920,7 @@ static inline void __user *__vhost_get_user(struct vhost_virtqueue *vq, #define vhost_put_user(vq, x, ptr) \ ({ \ - int ret = -EFAULT; \ + int ret; \ if (!vq->iotlb) { \ ret = __put_user(x, ptr); \ } else { \ @@ -1244,9 +1282,9 @@ static int vhost_iotlb_miss(struct vhost_virtqueue *vq, u64 iova, int access) } static bool vq_access_ok(struct vhost_virtqueue *vq, unsigned int num, - struct vring_desc __user *desc, - struct vring_avail __user *avail, - struct vring_used __user *used) + vring_desc_t __user *desc, + vring_avail_t __user *avail, + vring_used_t __user *used) { return access_ok(desc, vhost_get_desc_size(vq, num)) && @@ -1574,7 +1612,7 @@ long vhost_vring_ioctl(struct vhost_dev *d, unsigned int ioctl, void __user *arg r = -EFAULT; break; } - eventfp = f.fd == -1 ? NULL : eventfd_fget(f.fd); + eventfp = f.fd == VHOST_FILE_UNBIND ? NULL : eventfd_fget(f.fd); if (IS_ERR(eventfp)) { r = PTR_ERR(eventfp); break; @@ -1590,7 +1628,7 @@ long vhost_vring_ioctl(struct vhost_dev *d, unsigned int ioctl, void __user *arg r = -EFAULT; break; } - ctx = f.fd == -1 ? NULL : eventfd_ctx_fdget(f.fd); + ctx = f.fd == VHOST_FILE_UNBIND ? NULL : eventfd_ctx_fdget(f.fd); if (IS_ERR(ctx)) { r = PTR_ERR(ctx); break; @@ -1602,7 +1640,7 @@ long vhost_vring_ioctl(struct vhost_dev *d, unsigned int ioctl, void __user *arg r = -EFAULT; break; } - ctx = f.fd == -1 ? NULL : eventfd_ctx_fdget(f.fd); + ctx = f.fd == VHOST_FILE_UNBIND ? NULL : eventfd_ctx_fdget(f.fd); if (IS_ERR(ctx)) { r = PTR_ERR(ctx); break; @@ -1727,7 +1765,7 @@ long vhost_dev_ioctl(struct vhost_dev *d, unsigned int ioctl, void __user *argp) r = get_user(fd, (int __user *)argp); if (r < 0) break; - ctx = fd == -1 ? NULL : eventfd_ctx_fdget(fd); + ctx = fd == VHOST_FILE_UNBIND ? NULL : eventfd_ctx_fdget(fd); if (IS_ERR(ctx)) { r = PTR_ERR(ctx); break; @@ -2300,7 +2338,7 @@ static int __vhost_add_used_n(struct vhost_virtqueue *vq, struct vring_used_elem *heads, unsigned count) { - struct vring_used_elem __user *used; + vring_used_elem_t __user *used; u16 old, new; int start; diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h index f8403bd46b85..c8e96a095d3b 100644 --- a/drivers/vhost/vhost.h +++ b/drivers/vhost/vhost.h @@ -67,9 +67,9 @@ struct vhost_virtqueue { /* The actual ring of buffers. */ struct mutex mutex; unsigned int num; - struct vring_desc __user *desc; - struct vring_avail __user *avail; - struct vring_used __user *used; + vring_desc_t __user *desc; + vring_avail_t __user *avail; + vring_used_t __user *used; const struct vhost_iotlb_map *meta_iotlb[VHOST_NUM_ADDRS]; struct file *kick; struct eventfd_ctx *call_ctx; @@ -154,6 +154,7 @@ struct vhost_dev { int weight; int byte_weight; u64 kcov_handle; + bool use_worker; int (*msg_handler)(struct vhost_dev *dev, struct vhost_iotlb_msg *msg); }; @@ -161,6 +162,7 @@ struct vhost_dev { bool vhost_exceeds_weight(struct vhost_virtqueue *vq, int pkts, int total_len); void vhost_dev_init(struct vhost_dev *, struct vhost_virtqueue **vqs, int nvqs, int iov_limit, int weight, int byte_weight, + bool use_worker, int (*msg_handler)(struct vhost_dev *dev, struct vhost_iotlb_msg *msg)); long vhost_dev_set_owner(struct vhost_dev *dev); diff --git a/drivers/vhost/vringh.c b/drivers/vhost/vringh.c index ba8e0d6cfd97..e059a9a47cdf 100644 --- a/drivers/vhost/vringh.c +++ b/drivers/vhost/vringh.c @@ -620,9 +620,9 @@ static inline int xfer_to_user(const struct vringh *vrh, */ int vringh_init_user(struct vringh *vrh, u64 features, unsigned int num, bool weak_barriers, - struct vring_desc __user *desc, - struct vring_avail __user *avail, - struct vring_used __user *used) + vring_desc_t __user *desc, + vring_avail_t __user *avail, + vring_used_t __user *used) { /* Sane power of 2 please! */ if (!num || num > 0xffff || (num & (num - 1))) { diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c index fb4e944c4d0d..a483cec31d5c 100644 --- a/drivers/vhost/vsock.c +++ b/drivers/vhost/vsock.c @@ -632,7 +632,7 @@ static int vhost_vsock_dev_open(struct inode *inode, struct file *file) vhost_dev_init(&vsock->dev, vqs, ARRAY_SIZE(vsock->vqs), UIO_MAXIOV, VHOST_VSOCK_PKT_WEIGHT, - VHOST_VSOCK_WEIGHT, NULL); + VHOST_VSOCK_WEIGHT, true, NULL); file->private_data = vsock; spin_lock_init(&vsock->send_pkt_list_lock); |