diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2022-10-12 14:39:38 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2022-10-12 14:39:38 -0700 |
commit | 778ce723e93ee803ef5883619fe2391e00dbc209 (patch) | |
tree | 861f03223f34780fecce597b45588118236874d7 /drivers/xen | |
parent | 1440f576022887004f719883acb094e7e0dd4944 (diff) | |
parent | 7880672bdc975daa586e8256714d9906d30c615e (diff) |
Merge tag 'for-linus-6.1-rc1-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/xen/tip
Pull xen updates from Juergen Gross:
- Some minor typo fixes
- A fix of the Xen pcifront driver for supporting the device model to
run in a Linux stub domain
- A cleanup of the pcifront driver
- A series to enable grant-based virtio with Xen on x86
- A cleanup of Xen PV guests to distinguish between safe and faulting
MSR accesses
- Two fixes of the Xen gntdev driver
- Two fixes of the new xen grant DMA driver
* tag 'for-linus-6.1-rc1-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/xen/tip:
xen: Kconfig: Fix spelling mistake "Maxmium" -> "Maximum"
xen/pv: support selecting safe/unsafe msr accesses
xen/pv: refactor msr access functions to support safe and unsafe accesses
xen/pv: fix vendor checks for pmu emulation
xen/pv: add fault recovery control to pmu msr accesses
xen/virtio: enable grant based virtio on x86
xen/virtio: use dom0 as default backend for CONFIG_XEN_VIRTIO_FORCE_GRANT
xen/virtio: restructure xen grant dma setup
xen/pcifront: move xenstore config scanning into sub-function
xen/gntdev: Accommodate VMA splitting
xen/gntdev: Prevent leaking grants
xen/virtio: Fix potential deadlock when accessing xen_grant_dma_devices
xen/virtio: Fix n_pages calculation in xen_grant_dma_map(unmap)_page()
xen/xenbus: Fix spelling mistake "hardward" -> "hardware"
xen-pcifront: Handle missed Connected state
Diffstat (limited to 'drivers/xen')
-rw-r--r-- | drivers/xen/Kconfig | 2 | ||||
-rw-r--r-- | drivers/xen/gntdev-common.h | 3 | ||||
-rw-r--r-- | drivers/xen/gntdev.c | 80 | ||||
-rw-r--r-- | drivers/xen/grant-dma-ops.c | 112 | ||||
-rw-r--r-- | drivers/xen/xen-pciback/xenbus.c | 2 |
5 files changed, 125 insertions, 74 deletions
diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig index a65bd92121a5..d5d7c402b651 100644 --- a/drivers/xen/Kconfig +++ b/drivers/xen/Kconfig @@ -56,7 +56,7 @@ config XEN_MEMORY_HOTPLUG_LIMIT depends on XEN_HAVE_PVMMU depends on MEMORY_HOTPLUG help - Maxmium amount of memory (in GiB) that a PV guest can be + Maximum amount of memory (in GiB) that a PV guest can be expanded to when using memory hotplug. A PV guest can have more memory than this limit if is diff --git a/drivers/xen/gntdev-common.h b/drivers/xen/gntdev-common.h index 40ef379c28ab..9c286b2a1900 100644 --- a/drivers/xen/gntdev-common.h +++ b/drivers/xen/gntdev-common.h @@ -44,9 +44,10 @@ struct gntdev_unmap_notify { }; struct gntdev_grant_map { + atomic_t in_use; struct mmu_interval_notifier notifier; + bool notifier_init; struct list_head next; - struct vm_area_struct *vma; int index; int count; int flags; diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c index 84b143eef395..4d9a3050de6a 100644 --- a/drivers/xen/gntdev.c +++ b/drivers/xen/gntdev.c @@ -286,6 +286,9 @@ void gntdev_put_map(struct gntdev_priv *priv, struct gntdev_grant_map *map) */ } + if (use_ptemod && map->notifier_init) + mmu_interval_notifier_remove(&map->notifier); + if (map->notify.flags & UNMAP_NOTIFY_SEND_EVENT) { notify_remote_via_evtchn(map->notify.event); evtchn_put(map->notify.event); @@ -298,7 +301,7 @@ void gntdev_put_map(struct gntdev_priv *priv, struct gntdev_grant_map *map) static int find_grant_ptes(pte_t *pte, unsigned long addr, void *data) { struct gntdev_grant_map *map = data; - unsigned int pgnr = (addr - map->vma->vm_start) >> PAGE_SHIFT; + unsigned int pgnr = (addr - map->pages_vm_start) >> PAGE_SHIFT; int flags = map->flags | GNTMAP_application_map | GNTMAP_contains_pte | (1 << _GNTMAP_guest_avail0); u64 pte_maddr; @@ -367,8 +370,7 @@ int gntdev_map_grant_pages(struct gntdev_grant_map *map) for (i = 0; i < map->count; i++) { if (map->map_ops[i].status == GNTST_okay) { map->unmap_ops[i].handle = map->map_ops[i].handle; - if (!use_ptemod) - alloced++; + alloced++; } else if (!err) err = -EINVAL; @@ -377,8 +379,7 @@ int gntdev_map_grant_pages(struct gntdev_grant_map *map) if (use_ptemod) { if (map->kmap_ops[i].status == GNTST_okay) { - if (map->map_ops[i].status == GNTST_okay) - alloced++; + alloced++; map->kunmap_ops[i].handle = map->kmap_ops[i].handle; } else if (!err) err = -EINVAL; @@ -394,8 +395,14 @@ static void __unmap_grant_pages_done(int result, unsigned int i; struct gntdev_grant_map *map = data->data; unsigned int offset = data->unmap_ops - map->unmap_ops; + int successful_unmaps = 0; + int live_grants; for (i = 0; i < data->count; i++) { + if (map->unmap_ops[offset + i].status == GNTST_okay && + map->unmap_ops[offset + i].handle != INVALID_GRANT_HANDLE) + successful_unmaps++; + WARN_ON(map->unmap_ops[offset + i].status != GNTST_okay && map->unmap_ops[offset + i].handle != INVALID_GRANT_HANDLE); pr_debug("unmap handle=%d st=%d\n", @@ -403,6 +410,10 @@ static void __unmap_grant_pages_done(int result, map->unmap_ops[offset+i].status); map->unmap_ops[offset+i].handle = INVALID_GRANT_HANDLE; if (use_ptemod) { + if (map->kunmap_ops[offset + i].status == GNTST_okay && + map->kunmap_ops[offset + i].handle != INVALID_GRANT_HANDLE) + successful_unmaps++; + WARN_ON(map->kunmap_ops[offset + i].status != GNTST_okay && map->kunmap_ops[offset + i].handle != INVALID_GRANT_HANDLE); pr_debug("kunmap handle=%u st=%d\n", @@ -411,11 +422,15 @@ static void __unmap_grant_pages_done(int result, map->kunmap_ops[offset+i].handle = INVALID_GRANT_HANDLE; } } + /* * Decrease the live-grant counter. This must happen after the loop to * prevent premature reuse of the grants by gnttab_mmap(). */ - atomic_sub(data->count, &map->live_grants); + live_grants = atomic_sub_return(successful_unmaps, &map->live_grants); + if (WARN_ON(live_grants < 0)) + pr_err("%s: live_grants became negative (%d) after unmapping %d pages!\n", + __func__, live_grants, successful_unmaps); /* Release reference taken by __unmap_grant_pages */ gntdev_put_map(NULL, map); @@ -496,11 +511,7 @@ static void gntdev_vma_close(struct vm_area_struct *vma) struct gntdev_priv *priv = file->private_data; pr_debug("gntdev_vma_close %p\n", vma); - if (use_ptemod) { - WARN_ON(map->vma != vma); - mmu_interval_notifier_remove(&map->notifier); - map->vma = NULL; - } + vma->vm_private_data = NULL; gntdev_put_map(priv, map); } @@ -528,29 +539,30 @@ static bool gntdev_invalidate(struct mmu_interval_notifier *mn, struct gntdev_grant_map *map = container_of(mn, struct gntdev_grant_map, notifier); unsigned long mstart, mend; + unsigned long map_start, map_end; if (!mmu_notifier_range_blockable(range)) return false; + map_start = map->pages_vm_start; + map_end = map->pages_vm_start + (map->count << PAGE_SHIFT); + /* * If the VMA is split or otherwise changed the notifier is not * updated, but we don't want to process VA's outside the modified * VMA. FIXME: It would be much more understandable to just prevent * modifying the VMA in the first place. */ - if (map->vma->vm_start >= range->end || - map->vma->vm_end <= range->start) + if (map_start >= range->end || map_end <= range->start) return true; - mstart = max(range->start, map->vma->vm_start); - mend = min(range->end, map->vma->vm_end); + mstart = max(range->start, map_start); + mend = min(range->end, map_end); pr_debug("map %d+%d (%lx %lx), range %lx %lx, mrange %lx %lx\n", - map->index, map->count, - map->vma->vm_start, map->vma->vm_end, - range->start, range->end, mstart, mend); - unmap_grant_pages(map, - (mstart - map->vma->vm_start) >> PAGE_SHIFT, - (mend - mstart) >> PAGE_SHIFT); + map->index, map->count, map_start, map_end, + range->start, range->end, mstart, mend); + unmap_grant_pages(map, (mstart - map_start) >> PAGE_SHIFT, + (mend - mstart) >> PAGE_SHIFT); return true; } @@ -1030,18 +1042,15 @@ static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma) return -EINVAL; pr_debug("map %d+%d at %lx (pgoff %lx)\n", - index, count, vma->vm_start, vma->vm_pgoff); + index, count, vma->vm_start, vma->vm_pgoff); mutex_lock(&priv->lock); map = gntdev_find_map_index(priv, index, count); if (!map) goto unlock_out; - if (use_ptemod && map->vma) + if (!atomic_add_unless(&map->in_use, 1, 1)) goto unlock_out; - if (atomic_read(&map->live_grants)) { - err = -EAGAIN; - goto unlock_out; - } + refcount_inc(&map->users); vma->vm_ops = &gntdev_vmops; @@ -1062,15 +1071,16 @@ static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma) map->flags |= GNTMAP_readonly; } + map->pages_vm_start = vma->vm_start; + if (use_ptemod) { - map->vma = vma; err = mmu_interval_notifier_insert_locked( &map->notifier, vma->vm_mm, vma->vm_start, vma->vm_end - vma->vm_start, &gntdev_mmu_ops); - if (err) { - map->vma = NULL; + if (err) goto out_unlock_put; - } + + map->notifier_init = true; } mutex_unlock(&priv->lock); @@ -1087,7 +1097,6 @@ static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma) */ mmu_interval_read_begin(&map->notifier); - map->pages_vm_start = vma->vm_start; err = apply_to_page_range(vma->vm_mm, vma->vm_start, vma->vm_end - vma->vm_start, find_grant_ptes, map); @@ -1116,13 +1125,8 @@ unlock_out: out_unlock_put: mutex_unlock(&priv->lock); out_put_map: - if (use_ptemod) { + if (use_ptemod) unmap_grant_pages(map, 0, map->count); - if (map->vma) { - mmu_interval_notifier_remove(&map->notifier); - map->vma = NULL; - } - } gntdev_put_map(priv, map); return err; } diff --git a/drivers/xen/grant-dma-ops.c b/drivers/xen/grant-dma-ops.c index 8973fc1e9ccc..860f37c93af4 100644 --- a/drivers/xen/grant-dma-ops.c +++ b/drivers/xen/grant-dma-ops.c @@ -25,7 +25,7 @@ struct xen_grant_dma_data { bool broken; }; -static DEFINE_XARRAY(xen_grant_dma_devices); +static DEFINE_XARRAY_FLAGS(xen_grant_dma_devices, XA_FLAGS_LOCK_IRQ); #define XEN_GRANT_DMA_ADDR_OFF (1ULL << 63) @@ -42,14 +42,29 @@ static inline grant_ref_t dma_to_grant(dma_addr_t dma) static struct xen_grant_dma_data *find_xen_grant_dma_data(struct device *dev) { struct xen_grant_dma_data *data; + unsigned long flags; - xa_lock(&xen_grant_dma_devices); + xa_lock_irqsave(&xen_grant_dma_devices, flags); data = xa_load(&xen_grant_dma_devices, (unsigned long)dev); - xa_unlock(&xen_grant_dma_devices); + xa_unlock_irqrestore(&xen_grant_dma_devices, flags); return data; } +static int store_xen_grant_dma_data(struct device *dev, + struct xen_grant_dma_data *data) +{ + unsigned long flags; + int ret; + + xa_lock_irqsave(&xen_grant_dma_devices, flags); + ret = xa_err(__xa_store(&xen_grant_dma_devices, (unsigned long)dev, data, + GFP_ATOMIC)); + xa_unlock_irqrestore(&xen_grant_dma_devices, flags); + + return ret; +} + /* * DMA ops for Xen frontends (e.g. virtio). * @@ -153,7 +168,7 @@ static dma_addr_t xen_grant_dma_map_page(struct device *dev, struct page *page, unsigned long attrs) { struct xen_grant_dma_data *data; - unsigned int i, n_pages = PFN_UP(size); + unsigned int i, n_pages = PFN_UP(offset + size); grant_ref_t grant; dma_addr_t dma_handle; @@ -185,7 +200,8 @@ static void xen_grant_dma_unmap_page(struct device *dev, dma_addr_t dma_handle, unsigned long attrs) { struct xen_grant_dma_data *data; - unsigned int i, n_pages = PFN_UP(size); + unsigned long offset = dma_handle & (PAGE_SIZE - 1); + unsigned int i, n_pages = PFN_UP(offset + size); grant_ref_t grant; if (WARN_ON(dir == DMA_NONE)) @@ -273,72 +289,91 @@ static const struct dma_map_ops xen_grant_dma_ops = { .dma_supported = xen_grant_dma_supported, }; -bool xen_is_grant_dma_device(struct device *dev) +static bool xen_is_dt_grant_dma_device(struct device *dev) { struct device_node *iommu_np; bool has_iommu; - /* XXX Handle only DT devices for now */ - if (!dev->of_node) - return false; - iommu_np = of_parse_phandle(dev->of_node, "iommus", 0); - has_iommu = iommu_np && of_device_is_compatible(iommu_np, "xen,grant-dma"); + has_iommu = iommu_np && + of_device_is_compatible(iommu_np, "xen,grant-dma"); of_node_put(iommu_np); return has_iommu; } +bool xen_is_grant_dma_device(struct device *dev) +{ + /* XXX Handle only DT devices for now */ + if (dev->of_node) + return xen_is_dt_grant_dma_device(dev); + + return false; +} + bool xen_virtio_mem_acc(struct virtio_device *dev) { - if (IS_ENABLED(CONFIG_XEN_VIRTIO_FORCE_GRANT)) + if (IS_ENABLED(CONFIG_XEN_VIRTIO_FORCE_GRANT) || xen_pv_domain()) return true; return xen_is_grant_dma_device(dev->dev.parent); } -void xen_grant_setup_dma_ops(struct device *dev) +static int xen_dt_grant_init_backend_domid(struct device *dev, + struct xen_grant_dma_data *data) { - struct xen_grant_dma_data *data; struct of_phandle_args iommu_spec; - data = find_xen_grant_dma_data(dev); - if (data) { - dev_err(dev, "Xen grant DMA data is already created\n"); - return; - } - - /* XXX ACPI device unsupported for now */ - if (!dev->of_node) - goto err; - if (of_parse_phandle_with_args(dev->of_node, "iommus", "#iommu-cells", 0, &iommu_spec)) { dev_err(dev, "Cannot parse iommus property\n"); - goto err; + return -ESRCH; } if (!of_device_is_compatible(iommu_spec.np, "xen,grant-dma") || iommu_spec.args_count != 1) { dev_err(dev, "Incompatible IOMMU node\n"); of_node_put(iommu_spec.np); - goto err; + return -ESRCH; } of_node_put(iommu_spec.np); + /* + * The endpoint ID here means the ID of the domain where the + * corresponding backend is running + */ + data->backend_domid = iommu_spec.args[0]; + + return 0; +} + +void xen_grant_setup_dma_ops(struct device *dev) +{ + struct xen_grant_dma_data *data; + + data = find_xen_grant_dma_data(dev); + if (data) { + dev_err(dev, "Xen grant DMA data is already created\n"); + return; + } + data = devm_kzalloc(dev, sizeof(*data), GFP_KERNEL); if (!data) goto err; - /* - * The endpoint ID here means the ID of the domain where the corresponding - * backend is running - */ - data->backend_domid = iommu_spec.args[0]; + if (dev->of_node) { + if (xen_dt_grant_init_backend_domid(dev, data)) + goto err; + } else if (IS_ENABLED(CONFIG_XEN_VIRTIO_FORCE_GRANT)) { + dev_info(dev, "Using dom0 as backend\n"); + data->backend_domid = 0; + } else { + /* XXX ACPI device unsupported for now */ + goto err; + } - if (xa_err(xa_store(&xen_grant_dma_devices, (unsigned long)dev, data, - GFP_KERNEL))) { + if (store_xen_grant_dma_data(dev, data)) { dev_err(dev, "Cannot store Xen grant DMA data\n"); goto err; } @@ -348,9 +383,20 @@ void xen_grant_setup_dma_ops(struct device *dev) return; err: + devm_kfree(dev, data); dev_err(dev, "Cannot set up Xen grant DMA ops, retain platform DMA ops\n"); } +bool xen_virtio_restricted_mem_acc(struct virtio_device *dev) +{ + bool ret = xen_virtio_mem_acc(dev); + + if (ret) + xen_grant_setup_dma_ops(dev->dev.parent); + + return ret; +} + MODULE_DESCRIPTION("Xen grant DMA-mapping layer"); MODULE_AUTHOR("Juergen Gross <jgross@suse.com>"); MODULE_LICENSE("GPL"); diff --git a/drivers/xen/xen-pciback/xenbus.c b/drivers/xen/xen-pciback/xenbus.c index bde63ef677b8..d171091eec12 100644 --- a/drivers/xen/xen-pciback/xenbus.c +++ b/drivers/xen/xen-pciback/xenbus.c @@ -31,7 +31,7 @@ MODULE_PARM_DESC(passthrough, " frontend (for example, a device at 06:01.b will still appear at\n"\ " 06:01.b to the frontend). This is similar to how Xen 2.0.x\n"\ " exposed PCI devices to its driver domains. This may be required\n"\ - " for drivers which depend on finding their hardward in certain\n"\ + " for drivers which depend on finding their hardware in certain\n"\ " bus/slot locations."); static struct xen_pcibk_device *alloc_pdev(struct xenbus_device *xdev) |