summaryrefslogtreecommitdiff
path: root/drivers/xen
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2022-10-12 14:39:38 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2022-10-12 14:39:38 -0700
commit778ce723e93ee803ef5883619fe2391e00dbc209 (patch)
tree861f03223f34780fecce597b45588118236874d7 /drivers/xen
parent1440f576022887004f719883acb094e7e0dd4944 (diff)
parent7880672bdc975daa586e8256714d9906d30c615e (diff)
Merge tag 'for-linus-6.1-rc1-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/xen/tip
Pull xen updates from Juergen Gross: - Some minor typo fixes - A fix of the Xen pcifront driver for supporting the device model to run in a Linux stub domain - A cleanup of the pcifront driver - A series to enable grant-based virtio with Xen on x86 - A cleanup of Xen PV guests to distinguish between safe and faulting MSR accesses - Two fixes of the Xen gntdev driver - Two fixes of the new xen grant DMA driver * tag 'for-linus-6.1-rc1-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/xen/tip: xen: Kconfig: Fix spelling mistake "Maxmium" -> "Maximum" xen/pv: support selecting safe/unsafe msr accesses xen/pv: refactor msr access functions to support safe and unsafe accesses xen/pv: fix vendor checks for pmu emulation xen/pv: add fault recovery control to pmu msr accesses xen/virtio: enable grant based virtio on x86 xen/virtio: use dom0 as default backend for CONFIG_XEN_VIRTIO_FORCE_GRANT xen/virtio: restructure xen grant dma setup xen/pcifront: move xenstore config scanning into sub-function xen/gntdev: Accommodate VMA splitting xen/gntdev: Prevent leaking grants xen/virtio: Fix potential deadlock when accessing xen_grant_dma_devices xen/virtio: Fix n_pages calculation in xen_grant_dma_map(unmap)_page() xen/xenbus: Fix spelling mistake "hardward" -> "hardware" xen-pcifront: Handle missed Connected state
Diffstat (limited to 'drivers/xen')
-rw-r--r--drivers/xen/Kconfig2
-rw-r--r--drivers/xen/gntdev-common.h3
-rw-r--r--drivers/xen/gntdev.c80
-rw-r--r--drivers/xen/grant-dma-ops.c112
-rw-r--r--drivers/xen/xen-pciback/xenbus.c2
5 files changed, 125 insertions, 74 deletions
diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig
index a65bd92121a5..d5d7c402b651 100644
--- a/drivers/xen/Kconfig
+++ b/drivers/xen/Kconfig
@@ -56,7 +56,7 @@ config XEN_MEMORY_HOTPLUG_LIMIT
depends on XEN_HAVE_PVMMU
depends on MEMORY_HOTPLUG
help
- Maxmium amount of memory (in GiB) that a PV guest can be
+ Maximum amount of memory (in GiB) that a PV guest can be
expanded to when using memory hotplug.
A PV guest can have more memory than this limit if is
diff --git a/drivers/xen/gntdev-common.h b/drivers/xen/gntdev-common.h
index 40ef379c28ab..9c286b2a1900 100644
--- a/drivers/xen/gntdev-common.h
+++ b/drivers/xen/gntdev-common.h
@@ -44,9 +44,10 @@ struct gntdev_unmap_notify {
};
struct gntdev_grant_map {
+ atomic_t in_use;
struct mmu_interval_notifier notifier;
+ bool notifier_init;
struct list_head next;
- struct vm_area_struct *vma;
int index;
int count;
int flags;
diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c
index 84b143eef395..4d9a3050de6a 100644
--- a/drivers/xen/gntdev.c
+++ b/drivers/xen/gntdev.c
@@ -286,6 +286,9 @@ void gntdev_put_map(struct gntdev_priv *priv, struct gntdev_grant_map *map)
*/
}
+ if (use_ptemod && map->notifier_init)
+ mmu_interval_notifier_remove(&map->notifier);
+
if (map->notify.flags & UNMAP_NOTIFY_SEND_EVENT) {
notify_remote_via_evtchn(map->notify.event);
evtchn_put(map->notify.event);
@@ -298,7 +301,7 @@ void gntdev_put_map(struct gntdev_priv *priv, struct gntdev_grant_map *map)
static int find_grant_ptes(pte_t *pte, unsigned long addr, void *data)
{
struct gntdev_grant_map *map = data;
- unsigned int pgnr = (addr - map->vma->vm_start) >> PAGE_SHIFT;
+ unsigned int pgnr = (addr - map->pages_vm_start) >> PAGE_SHIFT;
int flags = map->flags | GNTMAP_application_map | GNTMAP_contains_pte |
(1 << _GNTMAP_guest_avail0);
u64 pte_maddr;
@@ -367,8 +370,7 @@ int gntdev_map_grant_pages(struct gntdev_grant_map *map)
for (i = 0; i < map->count; i++) {
if (map->map_ops[i].status == GNTST_okay) {
map->unmap_ops[i].handle = map->map_ops[i].handle;
- if (!use_ptemod)
- alloced++;
+ alloced++;
} else if (!err)
err = -EINVAL;
@@ -377,8 +379,7 @@ int gntdev_map_grant_pages(struct gntdev_grant_map *map)
if (use_ptemod) {
if (map->kmap_ops[i].status == GNTST_okay) {
- if (map->map_ops[i].status == GNTST_okay)
- alloced++;
+ alloced++;
map->kunmap_ops[i].handle = map->kmap_ops[i].handle;
} else if (!err)
err = -EINVAL;
@@ -394,8 +395,14 @@ static void __unmap_grant_pages_done(int result,
unsigned int i;
struct gntdev_grant_map *map = data->data;
unsigned int offset = data->unmap_ops - map->unmap_ops;
+ int successful_unmaps = 0;
+ int live_grants;
for (i = 0; i < data->count; i++) {
+ if (map->unmap_ops[offset + i].status == GNTST_okay &&
+ map->unmap_ops[offset + i].handle != INVALID_GRANT_HANDLE)
+ successful_unmaps++;
+
WARN_ON(map->unmap_ops[offset + i].status != GNTST_okay &&
map->unmap_ops[offset + i].handle != INVALID_GRANT_HANDLE);
pr_debug("unmap handle=%d st=%d\n",
@@ -403,6 +410,10 @@ static void __unmap_grant_pages_done(int result,
map->unmap_ops[offset+i].status);
map->unmap_ops[offset+i].handle = INVALID_GRANT_HANDLE;
if (use_ptemod) {
+ if (map->kunmap_ops[offset + i].status == GNTST_okay &&
+ map->kunmap_ops[offset + i].handle != INVALID_GRANT_HANDLE)
+ successful_unmaps++;
+
WARN_ON(map->kunmap_ops[offset + i].status != GNTST_okay &&
map->kunmap_ops[offset + i].handle != INVALID_GRANT_HANDLE);
pr_debug("kunmap handle=%u st=%d\n",
@@ -411,11 +422,15 @@ static void __unmap_grant_pages_done(int result,
map->kunmap_ops[offset+i].handle = INVALID_GRANT_HANDLE;
}
}
+
/*
* Decrease the live-grant counter. This must happen after the loop to
* prevent premature reuse of the grants by gnttab_mmap().
*/
- atomic_sub(data->count, &map->live_grants);
+ live_grants = atomic_sub_return(successful_unmaps, &map->live_grants);
+ if (WARN_ON(live_grants < 0))
+ pr_err("%s: live_grants became negative (%d) after unmapping %d pages!\n",
+ __func__, live_grants, successful_unmaps);
/* Release reference taken by __unmap_grant_pages */
gntdev_put_map(NULL, map);
@@ -496,11 +511,7 @@ static void gntdev_vma_close(struct vm_area_struct *vma)
struct gntdev_priv *priv = file->private_data;
pr_debug("gntdev_vma_close %p\n", vma);
- if (use_ptemod) {
- WARN_ON(map->vma != vma);
- mmu_interval_notifier_remove(&map->notifier);
- map->vma = NULL;
- }
+
vma->vm_private_data = NULL;
gntdev_put_map(priv, map);
}
@@ -528,29 +539,30 @@ static bool gntdev_invalidate(struct mmu_interval_notifier *mn,
struct gntdev_grant_map *map =
container_of(mn, struct gntdev_grant_map, notifier);
unsigned long mstart, mend;
+ unsigned long map_start, map_end;
if (!mmu_notifier_range_blockable(range))
return false;
+ map_start = map->pages_vm_start;
+ map_end = map->pages_vm_start + (map->count << PAGE_SHIFT);
+
/*
* If the VMA is split or otherwise changed the notifier is not
* updated, but we don't want to process VA's outside the modified
* VMA. FIXME: It would be much more understandable to just prevent
* modifying the VMA in the first place.
*/
- if (map->vma->vm_start >= range->end ||
- map->vma->vm_end <= range->start)
+ if (map_start >= range->end || map_end <= range->start)
return true;
- mstart = max(range->start, map->vma->vm_start);
- mend = min(range->end, map->vma->vm_end);
+ mstart = max(range->start, map_start);
+ mend = min(range->end, map_end);
pr_debug("map %d+%d (%lx %lx), range %lx %lx, mrange %lx %lx\n",
- map->index, map->count,
- map->vma->vm_start, map->vma->vm_end,
- range->start, range->end, mstart, mend);
- unmap_grant_pages(map,
- (mstart - map->vma->vm_start) >> PAGE_SHIFT,
- (mend - mstart) >> PAGE_SHIFT);
+ map->index, map->count, map_start, map_end,
+ range->start, range->end, mstart, mend);
+ unmap_grant_pages(map, (mstart - map_start) >> PAGE_SHIFT,
+ (mend - mstart) >> PAGE_SHIFT);
return true;
}
@@ -1030,18 +1042,15 @@ static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma)
return -EINVAL;
pr_debug("map %d+%d at %lx (pgoff %lx)\n",
- index, count, vma->vm_start, vma->vm_pgoff);
+ index, count, vma->vm_start, vma->vm_pgoff);
mutex_lock(&priv->lock);
map = gntdev_find_map_index(priv, index, count);
if (!map)
goto unlock_out;
- if (use_ptemod && map->vma)
+ if (!atomic_add_unless(&map->in_use, 1, 1))
goto unlock_out;
- if (atomic_read(&map->live_grants)) {
- err = -EAGAIN;
- goto unlock_out;
- }
+
refcount_inc(&map->users);
vma->vm_ops = &gntdev_vmops;
@@ -1062,15 +1071,16 @@ static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma)
map->flags |= GNTMAP_readonly;
}
+ map->pages_vm_start = vma->vm_start;
+
if (use_ptemod) {
- map->vma = vma;
err = mmu_interval_notifier_insert_locked(
&map->notifier, vma->vm_mm, vma->vm_start,
vma->vm_end - vma->vm_start, &gntdev_mmu_ops);
- if (err) {
- map->vma = NULL;
+ if (err)
goto out_unlock_put;
- }
+
+ map->notifier_init = true;
}
mutex_unlock(&priv->lock);
@@ -1087,7 +1097,6 @@ static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma)
*/
mmu_interval_read_begin(&map->notifier);
- map->pages_vm_start = vma->vm_start;
err = apply_to_page_range(vma->vm_mm, vma->vm_start,
vma->vm_end - vma->vm_start,
find_grant_ptes, map);
@@ -1116,13 +1125,8 @@ unlock_out:
out_unlock_put:
mutex_unlock(&priv->lock);
out_put_map:
- if (use_ptemod) {
+ if (use_ptemod)
unmap_grant_pages(map, 0, map->count);
- if (map->vma) {
- mmu_interval_notifier_remove(&map->notifier);
- map->vma = NULL;
- }
- }
gntdev_put_map(priv, map);
return err;
}
diff --git a/drivers/xen/grant-dma-ops.c b/drivers/xen/grant-dma-ops.c
index 8973fc1e9ccc..860f37c93af4 100644
--- a/drivers/xen/grant-dma-ops.c
+++ b/drivers/xen/grant-dma-ops.c
@@ -25,7 +25,7 @@ struct xen_grant_dma_data {
bool broken;
};
-static DEFINE_XARRAY(xen_grant_dma_devices);
+static DEFINE_XARRAY_FLAGS(xen_grant_dma_devices, XA_FLAGS_LOCK_IRQ);
#define XEN_GRANT_DMA_ADDR_OFF (1ULL << 63)
@@ -42,14 +42,29 @@ static inline grant_ref_t dma_to_grant(dma_addr_t dma)
static struct xen_grant_dma_data *find_xen_grant_dma_data(struct device *dev)
{
struct xen_grant_dma_data *data;
+ unsigned long flags;
- xa_lock(&xen_grant_dma_devices);
+ xa_lock_irqsave(&xen_grant_dma_devices, flags);
data = xa_load(&xen_grant_dma_devices, (unsigned long)dev);
- xa_unlock(&xen_grant_dma_devices);
+ xa_unlock_irqrestore(&xen_grant_dma_devices, flags);
return data;
}
+static int store_xen_grant_dma_data(struct device *dev,
+ struct xen_grant_dma_data *data)
+{
+ unsigned long flags;
+ int ret;
+
+ xa_lock_irqsave(&xen_grant_dma_devices, flags);
+ ret = xa_err(__xa_store(&xen_grant_dma_devices, (unsigned long)dev, data,
+ GFP_ATOMIC));
+ xa_unlock_irqrestore(&xen_grant_dma_devices, flags);
+
+ return ret;
+}
+
/*
* DMA ops for Xen frontends (e.g. virtio).
*
@@ -153,7 +168,7 @@ static dma_addr_t xen_grant_dma_map_page(struct device *dev, struct page *page,
unsigned long attrs)
{
struct xen_grant_dma_data *data;
- unsigned int i, n_pages = PFN_UP(size);
+ unsigned int i, n_pages = PFN_UP(offset + size);
grant_ref_t grant;
dma_addr_t dma_handle;
@@ -185,7 +200,8 @@ static void xen_grant_dma_unmap_page(struct device *dev, dma_addr_t dma_handle,
unsigned long attrs)
{
struct xen_grant_dma_data *data;
- unsigned int i, n_pages = PFN_UP(size);
+ unsigned long offset = dma_handle & (PAGE_SIZE - 1);
+ unsigned int i, n_pages = PFN_UP(offset + size);
grant_ref_t grant;
if (WARN_ON(dir == DMA_NONE))
@@ -273,72 +289,91 @@ static const struct dma_map_ops xen_grant_dma_ops = {
.dma_supported = xen_grant_dma_supported,
};
-bool xen_is_grant_dma_device(struct device *dev)
+static bool xen_is_dt_grant_dma_device(struct device *dev)
{
struct device_node *iommu_np;
bool has_iommu;
- /* XXX Handle only DT devices for now */
- if (!dev->of_node)
- return false;
-
iommu_np = of_parse_phandle(dev->of_node, "iommus", 0);
- has_iommu = iommu_np && of_device_is_compatible(iommu_np, "xen,grant-dma");
+ has_iommu = iommu_np &&
+ of_device_is_compatible(iommu_np, "xen,grant-dma");
of_node_put(iommu_np);
return has_iommu;
}
+bool xen_is_grant_dma_device(struct device *dev)
+{
+ /* XXX Handle only DT devices for now */
+ if (dev->of_node)
+ return xen_is_dt_grant_dma_device(dev);
+
+ return false;
+}
+
bool xen_virtio_mem_acc(struct virtio_device *dev)
{
- if (IS_ENABLED(CONFIG_XEN_VIRTIO_FORCE_GRANT))
+ if (IS_ENABLED(CONFIG_XEN_VIRTIO_FORCE_GRANT) || xen_pv_domain())
return true;
return xen_is_grant_dma_device(dev->dev.parent);
}
-void xen_grant_setup_dma_ops(struct device *dev)
+static int xen_dt_grant_init_backend_domid(struct device *dev,
+ struct xen_grant_dma_data *data)
{
- struct xen_grant_dma_data *data;
struct of_phandle_args iommu_spec;
- data = find_xen_grant_dma_data(dev);
- if (data) {
- dev_err(dev, "Xen grant DMA data is already created\n");
- return;
- }
-
- /* XXX ACPI device unsupported for now */
- if (!dev->of_node)
- goto err;
-
if (of_parse_phandle_with_args(dev->of_node, "iommus", "#iommu-cells",
0, &iommu_spec)) {
dev_err(dev, "Cannot parse iommus property\n");
- goto err;
+ return -ESRCH;
}
if (!of_device_is_compatible(iommu_spec.np, "xen,grant-dma") ||
iommu_spec.args_count != 1) {
dev_err(dev, "Incompatible IOMMU node\n");
of_node_put(iommu_spec.np);
- goto err;
+ return -ESRCH;
}
of_node_put(iommu_spec.np);
+ /*
+ * The endpoint ID here means the ID of the domain where the
+ * corresponding backend is running
+ */
+ data->backend_domid = iommu_spec.args[0];
+
+ return 0;
+}
+
+void xen_grant_setup_dma_ops(struct device *dev)
+{
+ struct xen_grant_dma_data *data;
+
+ data = find_xen_grant_dma_data(dev);
+ if (data) {
+ dev_err(dev, "Xen grant DMA data is already created\n");
+ return;
+ }
+
data = devm_kzalloc(dev, sizeof(*data), GFP_KERNEL);
if (!data)
goto err;
- /*
- * The endpoint ID here means the ID of the domain where the corresponding
- * backend is running
- */
- data->backend_domid = iommu_spec.args[0];
+ if (dev->of_node) {
+ if (xen_dt_grant_init_backend_domid(dev, data))
+ goto err;
+ } else if (IS_ENABLED(CONFIG_XEN_VIRTIO_FORCE_GRANT)) {
+ dev_info(dev, "Using dom0 as backend\n");
+ data->backend_domid = 0;
+ } else {
+ /* XXX ACPI device unsupported for now */
+ goto err;
+ }
- if (xa_err(xa_store(&xen_grant_dma_devices, (unsigned long)dev, data,
- GFP_KERNEL))) {
+ if (store_xen_grant_dma_data(dev, data)) {
dev_err(dev, "Cannot store Xen grant DMA data\n");
goto err;
}
@@ -348,9 +383,20 @@ void xen_grant_setup_dma_ops(struct device *dev)
return;
err:
+ devm_kfree(dev, data);
dev_err(dev, "Cannot set up Xen grant DMA ops, retain platform DMA ops\n");
}
+bool xen_virtio_restricted_mem_acc(struct virtio_device *dev)
+{
+ bool ret = xen_virtio_mem_acc(dev);
+
+ if (ret)
+ xen_grant_setup_dma_ops(dev->dev.parent);
+
+ return ret;
+}
+
MODULE_DESCRIPTION("Xen grant DMA-mapping layer");
MODULE_AUTHOR("Juergen Gross <jgross@suse.com>");
MODULE_LICENSE("GPL");
diff --git a/drivers/xen/xen-pciback/xenbus.c b/drivers/xen/xen-pciback/xenbus.c
index bde63ef677b8..d171091eec12 100644
--- a/drivers/xen/xen-pciback/xenbus.c
+++ b/drivers/xen/xen-pciback/xenbus.c
@@ -31,7 +31,7 @@ MODULE_PARM_DESC(passthrough,
" frontend (for example, a device at 06:01.b will still appear at\n"\
" 06:01.b to the frontend). This is similar to how Xen 2.0.x\n"\
" exposed PCI devices to its driver domains. This may be required\n"\
- " for drivers which depend on finding their hardward in certain\n"\
+ " for drivers which depend on finding their hardware in certain\n"\
" bus/slot locations.");
static struct xen_pcibk_device *alloc_pdev(struct xenbus_device *xdev)