summaryrefslogtreecommitdiff
path: root/drivers/vfio
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2022-08-06 08:59:35 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2022-08-06 08:59:35 -0700
commita9cf69d0e7f2051cca1c08ed9b34fe79da951ee9 (patch)
tree80af592fdcb2a815a35272cd8375aa03c7d2f04a /drivers/vfio
parent6614a3c3164a5df2b54abb0b3559f51041cf705b (diff)
parent099fd2c2020751737d9288f923d562e0e05977eb (diff)
Merge tag 'vfio-v6.0-rc1' of https://github.com/awilliam/linux-vfio
Pull VFIO updates from Alex Williamson: - Cleanup use of extern in function prototypes (Alex Williamson) - Simplify bus_type usage and convert to device IOMMU interfaces (Robin Murphy) - Check missed return value and fix comment typos (Bo Liu) - Split migration ops from device ops and fix races in mlx5 migration support (Yishai Hadas) - Fix missed return value check in noiommu support (Liam Ni) - Hardening to clear buffer pointer to avoid use-after-free (Schspa Shi) - Remove requirement that only the same mm can unmap a previously mapped range (Li Zhe) - Adjust semaphore release vs device open counter (Yi Liu) - Remove unused arg from SPAPR support code (Deming Wang) - Rework vfio-ccw driver to better fit new mdev framework (Eric Farman, Michael Kawano) - Replace DMA unmap notifier with callbacks (Jason Gunthorpe) - Clarify SPAPR support comment relative to iommu_ops (Alexey Kardashevskiy) - Revise page pinning API towards compatibility with future iommufd support (Nicolin Chen) - Resolve issues in vfio-ccw, including use of DMA unmap callback (Eric Farman) * tag 'vfio-v6.0-rc1' of https://github.com/awilliam/linux-vfio: (40 commits) vfio/pci: fix the wrong word vfio/ccw: Check return code from subchannel quiesce vfio/ccw: Remove FSM Close from remove handlers vfio/ccw: Add length to DMA_UNMAP checks vfio: Replace phys_pfn with pages for vfio_pin_pages() vfio/ccw: Add kmap_local_page() for memcpy vfio: Rename user_iova of vfio_dma_rw() vfio/ccw: Change pa_pfn list to pa_iova list vfio/ap: Change saved_pfn to saved_iova vfio: Pass in starting IOVA to vfio_pin/unpin_pages API vfio/ccw: Only pass in contiguous pages vfio/ap: Pass in physical address of ind to ap_aqic() drm/i915/gvt: Replace roundup with DIV_ROUND_UP vfio: Make vfio_unpin_pages() return void vfio/spapr_tce: Fix the comment vfio: Replace the iommu notifier with a device list vfio: Replace the DMA unmapping notifier with a callback vfio/ccw: Move FSM open/close to MDEV open/close vfio/ccw: Refactor vfio_ccw_mdev_reset vfio/ccw: Create a CLOSE FSM event ...
Diffstat (limited to 'drivers/vfio')
-rw-r--r--drivers/vfio/fsl-mc/vfio_fsl_mc_private.h2
-rw-r--r--drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c11
-rw-r--r--drivers/vfio/pci/mlx5/cmd.c14
-rw-r--r--drivers/vfio/pci/mlx5/cmd.h4
-rw-r--r--drivers/vfio/pci/mlx5/main.c11
-rw-r--r--drivers/vfio/pci/vfio_pci_config.c4
-rw-r--r--drivers/vfio/pci/vfio_pci_core.c7
-rw-r--r--drivers/vfio/platform/vfio_platform_private.h21
-rw-r--r--drivers/vfio/vfio.c192
-rw-r--r--drivers/vfio/vfio.h17
-rw-r--r--drivers/vfio/vfio_iommu_spapr_tce.c14
-rw-r--r--drivers/vfio/vfio_iommu_type1.c197
12 files changed, 241 insertions, 253 deletions
diff --git a/drivers/vfio/fsl-mc/vfio_fsl_mc_private.h b/drivers/vfio/fsl-mc/vfio_fsl_mc_private.h
index 4ad63ececb91..7a29f572f93d 100644
--- a/drivers/vfio/fsl-mc/vfio_fsl_mc_private.h
+++ b/drivers/vfio/fsl-mc/vfio_fsl_mc_private.h
@@ -39,7 +39,7 @@ struct vfio_fsl_mc_device {
struct vfio_fsl_mc_irq *mc_irqs;
};
-extern int vfio_fsl_mc_set_irqs_ioctl(struct vfio_fsl_mc_device *vdev,
+int vfio_fsl_mc_set_irqs_ioctl(struct vfio_fsl_mc_device *vdev,
u32 flags, unsigned int index,
unsigned int start, unsigned int count,
void *data);
diff --git a/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c b/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c
index 4def43f5f7b6..ea762e28c1cc 100644
--- a/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c
+++ b/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c
@@ -1185,7 +1185,7 @@ static int hisi_acc_vfio_pci_open_device(struct vfio_device *core_vdev)
if (ret)
return ret;
- if (core_vdev->ops->migration_set_state) {
+ if (core_vdev->mig_ops) {
ret = hisi_acc_vf_qm_init(hisi_acc_vdev);
if (ret) {
vfio_pci_core_disable(vdev);
@@ -1208,6 +1208,11 @@ static void hisi_acc_vfio_pci_close_device(struct vfio_device *core_vdev)
vfio_pci_core_close_device(core_vdev);
}
+static const struct vfio_migration_ops hisi_acc_vfio_pci_migrn_state_ops = {
+ .migration_set_state = hisi_acc_vfio_pci_set_device_state,
+ .migration_get_state = hisi_acc_vfio_pci_get_device_state,
+};
+
static const struct vfio_device_ops hisi_acc_vfio_pci_migrn_ops = {
.name = "hisi-acc-vfio-pci-migration",
.open_device = hisi_acc_vfio_pci_open_device,
@@ -1219,8 +1224,6 @@ static const struct vfio_device_ops hisi_acc_vfio_pci_migrn_ops = {
.mmap = hisi_acc_vfio_pci_mmap,
.request = vfio_pci_core_request,
.match = vfio_pci_core_match,
- .migration_set_state = hisi_acc_vfio_pci_set_device_state,
- .migration_get_state = hisi_acc_vfio_pci_get_device_state,
};
static const struct vfio_device_ops hisi_acc_vfio_pci_ops = {
@@ -1272,6 +1275,8 @@ static int hisi_acc_vfio_pci_probe(struct pci_dev *pdev, const struct pci_device
if (!ret) {
vfio_pci_core_init_device(&hisi_acc_vdev->core_device, pdev,
&hisi_acc_vfio_pci_migrn_ops);
+ hisi_acc_vdev->core_device.vdev.mig_ops =
+ &hisi_acc_vfio_pci_migrn_state_ops;
} else {
pci_warn(pdev, "migration support failed, continue with generic interface\n");
vfio_pci_core_init_device(&hisi_acc_vdev->core_device, pdev,
diff --git a/drivers/vfio/pci/mlx5/cmd.c b/drivers/vfio/pci/mlx5/cmd.c
index 9b9f33ca270a..dd5d7bfe0a49 100644
--- a/drivers/vfio/pci/mlx5/cmd.c
+++ b/drivers/vfio/pci/mlx5/cmd.c
@@ -88,6 +88,16 @@ static int mlx5fv_vf_event(struct notifier_block *nb,
return 0;
}
+void mlx5vf_cmd_close_migratable(struct mlx5vf_pci_core_device *mvdev)
+{
+ if (!mvdev->migrate_cap)
+ return;
+
+ mutex_lock(&mvdev->state_mutex);
+ mlx5vf_disable_fds(mvdev);
+ mlx5vf_state_mutex_unlock(mvdev);
+}
+
void mlx5vf_cmd_remove_migratable(struct mlx5vf_pci_core_device *mvdev)
{
if (!mvdev->migrate_cap)
@@ -98,7 +108,8 @@ void mlx5vf_cmd_remove_migratable(struct mlx5vf_pci_core_device *mvdev)
destroy_workqueue(mvdev->cb_wq);
}
-void mlx5vf_cmd_set_migratable(struct mlx5vf_pci_core_device *mvdev)
+void mlx5vf_cmd_set_migratable(struct mlx5vf_pci_core_device *mvdev,
+ const struct vfio_migration_ops *mig_ops)
{
struct pci_dev *pdev = mvdev->core_device.pdev;
int ret;
@@ -139,6 +150,7 @@ void mlx5vf_cmd_set_migratable(struct mlx5vf_pci_core_device *mvdev)
mvdev->core_device.vdev.migration_flags =
VFIO_MIGRATION_STOP_COPY |
VFIO_MIGRATION_P2P;
+ mvdev->core_device.vdev.mig_ops = mig_ops;
end:
mlx5_vf_put_core_dev(mvdev->mdev);
diff --git a/drivers/vfio/pci/mlx5/cmd.h b/drivers/vfio/pci/mlx5/cmd.h
index 6c3112fdd8b1..8208f4701a90 100644
--- a/drivers/vfio/pci/mlx5/cmd.h
+++ b/drivers/vfio/pci/mlx5/cmd.h
@@ -62,8 +62,10 @@ int mlx5vf_cmd_suspend_vhca(struct mlx5vf_pci_core_device *mvdev, u16 op_mod);
int mlx5vf_cmd_resume_vhca(struct mlx5vf_pci_core_device *mvdev, u16 op_mod);
int mlx5vf_cmd_query_vhca_migration_state(struct mlx5vf_pci_core_device *mvdev,
size_t *state_size);
-void mlx5vf_cmd_set_migratable(struct mlx5vf_pci_core_device *mvdev);
+void mlx5vf_cmd_set_migratable(struct mlx5vf_pci_core_device *mvdev,
+ const struct vfio_migration_ops *mig_ops);
void mlx5vf_cmd_remove_migratable(struct mlx5vf_pci_core_device *mvdev);
+void mlx5vf_cmd_close_migratable(struct mlx5vf_pci_core_device *mvdev);
int mlx5vf_cmd_save_vhca_state(struct mlx5vf_pci_core_device *mvdev,
struct mlx5_vf_migration_file *migf);
int mlx5vf_cmd_load_vhca_state(struct mlx5vf_pci_core_device *mvdev,
diff --git a/drivers/vfio/pci/mlx5/main.c b/drivers/vfio/pci/mlx5/main.c
index 0558d0649ddb..a9b63d15c5d3 100644
--- a/drivers/vfio/pci/mlx5/main.c
+++ b/drivers/vfio/pci/mlx5/main.c
@@ -570,10 +570,15 @@ static void mlx5vf_pci_close_device(struct vfio_device *core_vdev)
struct mlx5vf_pci_core_device *mvdev = container_of(
core_vdev, struct mlx5vf_pci_core_device, core_device.vdev);
- mlx5vf_disable_fds(mvdev);
+ mlx5vf_cmd_close_migratable(mvdev);
vfio_pci_core_close_device(core_vdev);
}
+static const struct vfio_migration_ops mlx5vf_pci_mig_ops = {
+ .migration_set_state = mlx5vf_pci_set_device_state,
+ .migration_get_state = mlx5vf_pci_get_device_state,
+};
+
static const struct vfio_device_ops mlx5vf_pci_ops = {
.name = "mlx5-vfio-pci",
.open_device = mlx5vf_pci_open_device,
@@ -585,8 +590,6 @@ static const struct vfio_device_ops mlx5vf_pci_ops = {
.mmap = vfio_pci_core_mmap,
.request = vfio_pci_core_request,
.match = vfio_pci_core_match,
- .migration_set_state = mlx5vf_pci_set_device_state,
- .migration_get_state = mlx5vf_pci_get_device_state,
};
static int mlx5vf_pci_probe(struct pci_dev *pdev,
@@ -599,7 +602,7 @@ static int mlx5vf_pci_probe(struct pci_dev *pdev,
if (!mvdev)
return -ENOMEM;
vfio_pci_core_init_device(&mvdev->core_device, pdev, &mlx5vf_pci_ops);
- mlx5vf_cmd_set_migratable(mvdev);
+ mlx5vf_cmd_set_migratable(mvdev, &mlx5vf_pci_mig_ops);
dev_set_drvdata(&pdev->dev, &mvdev->core_device);
ret = vfio_pci_core_register_device(&mvdev->core_device);
if (ret)
diff --git a/drivers/vfio/pci/vfio_pci_config.c b/drivers/vfio/pci/vfio_pci_config.c
index 9343f597182d..442d3ba4122b 100644
--- a/drivers/vfio/pci/vfio_pci_config.c
+++ b/drivers/vfio/pci/vfio_pci_config.c
@@ -222,7 +222,7 @@ static int vfio_default_config_write(struct vfio_pci_core_device *vdev, int pos,
memcpy(vdev->vconfig + pos, &virt_val, count);
}
- /* Non-virtualzed and writable bits go to hardware */
+ /* Non-virtualized and writable bits go to hardware */
if (write & ~virt) {
struct pci_dev *pdev = vdev->pdev;
__le32 phys_val = 0;
@@ -1728,7 +1728,7 @@ int vfio_config_init(struct vfio_pci_core_device *vdev)
/*
* Config space, caps and ecaps are all dword aligned, so we could
* use one byte per dword to record the type. However, there are
- * no requiremenst on the length of a capability, so the gap between
+ * no requirements on the length of a capability, so the gap between
* capabilities needs byte granularity.
*/
map = kmalloc(pdev->cfg_size, GFP_KERNEL);
diff --git a/drivers/vfio/pci/vfio_pci_core.c b/drivers/vfio/pci/vfio_pci_core.c
index 0d8d4cdfb2f0..c8d3b0450fb3 100644
--- a/drivers/vfio/pci/vfio_pci_core.c
+++ b/drivers/vfio/pci/vfio_pci_core.c
@@ -1868,6 +1868,13 @@ int vfio_pci_core_register_device(struct vfio_pci_core_device *vdev)
if (pdev->hdr_type != PCI_HEADER_TYPE_NORMAL)
return -EINVAL;
+ if (vdev->vdev.mig_ops) {
+ if (!(vdev->vdev.mig_ops->migration_get_state &&
+ vdev->vdev.mig_ops->migration_set_state) ||
+ !(vdev->vdev.migration_flags & VFIO_MIGRATION_STOP_COPY))
+ return -EINVAL;
+ }
+
/*
* Prevent binding to PFs with VFs enabled, the VFs might be in use
* by the host or other users. We cannot capture the VFs if they
diff --git a/drivers/vfio/platform/vfio_platform_private.h b/drivers/vfio/platform/vfio_platform_private.h
index 520d2a8e8375..691b43f4b2b2 100644
--- a/drivers/vfio/platform/vfio_platform_private.h
+++ b/drivers/vfio/platform/vfio_platform_private.h
@@ -78,21 +78,20 @@ struct vfio_platform_reset_node {
vfio_platform_reset_fn_t of_reset;
};
-extern int vfio_platform_probe_common(struct vfio_platform_device *vdev,
- struct device *dev);
+int vfio_platform_probe_common(struct vfio_platform_device *vdev,
+ struct device *dev);
void vfio_platform_remove_common(struct vfio_platform_device *vdev);
-extern int vfio_platform_irq_init(struct vfio_platform_device *vdev);
-extern void vfio_platform_irq_cleanup(struct vfio_platform_device *vdev);
+int vfio_platform_irq_init(struct vfio_platform_device *vdev);
+void vfio_platform_irq_cleanup(struct vfio_platform_device *vdev);
-extern int vfio_platform_set_irqs_ioctl(struct vfio_platform_device *vdev,
- uint32_t flags, unsigned index,
- unsigned start, unsigned count,
- void *data);
+int vfio_platform_set_irqs_ioctl(struct vfio_platform_device *vdev,
+ uint32_t flags, unsigned index,
+ unsigned start, unsigned count, void *data);
-extern void __vfio_platform_register_reset(struct vfio_platform_reset_node *n);
-extern void vfio_platform_unregister_reset(const char *compat,
- vfio_platform_reset_fn_t fn);
+void __vfio_platform_register_reset(struct vfio_platform_reset_node *n);
+void vfio_platform_unregister_reset(const char *compat,
+ vfio_platform_reset_fn_t fn);
#define vfio_platform_register_reset(__compat, __reset) \
static struct vfio_platform_reset_node __reset ## _node = { \
.owner = THIS_MODULE, \
diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c
index 521a3eabf0e1..7cb56c382c97 100644
--- a/drivers/vfio/vfio.c
+++ b/drivers/vfio/vfio.c
@@ -231,6 +231,9 @@ int vfio_register_iommu_driver(const struct vfio_iommu_driver_ops *ops)
{
struct vfio_iommu_driver *driver, *tmp;
+ if (WARN_ON(!ops->register_device != !ops->unregister_device))
+ return -EINVAL;
+
driver = kzalloc(sizeof(*driver), GFP_KERNEL);
if (!driver)
return -ENOMEM;
@@ -504,7 +507,9 @@ static struct vfio_group *vfio_noiommu_group_alloc(struct device *dev,
if (IS_ERR(iommu_group))
return ERR_CAST(iommu_group);
- iommu_group_set_name(iommu_group, "vfio-noiommu");
+ ret = iommu_group_set_name(iommu_group, "vfio-noiommu");
+ if (ret)
+ goto out_put_group;
ret = iommu_group_add_device(iommu_group, dev);
if (ret)
goto out_put_group;
@@ -554,7 +559,7 @@ static struct vfio_group *vfio_group_find_or_alloc(struct device *dev)
* restore cache coherency. It has to be checked here because it is only
* valid for cases where we are using iommu groups.
*/
- if (!iommu_capable(dev->bus, IOMMU_CAP_CACHE_COHERENCY)) {
+ if (!device_iommu_capable(dev, IOMMU_CAP_CACHE_COHERENCY)) {
iommu_group_put(iommu_group);
return ERR_PTR(-EINVAL);
}
@@ -1082,6 +1087,7 @@ static void vfio_device_unassign_container(struct vfio_device *device)
static struct file *vfio_device_open(struct vfio_device *device)
{
+ struct vfio_iommu_driver *iommu_driver;
struct file *filep;
int ret;
@@ -1112,6 +1118,12 @@ static struct file *vfio_device_open(struct vfio_device *device)
if (ret)
goto err_undo_count;
}
+
+ iommu_driver = device->group->container->iommu_driver;
+ if (iommu_driver && iommu_driver->ops->register_device)
+ iommu_driver->ops->register_device(
+ device->group->container->iommu_data, device);
+
up_read(&device->group->group_rwsem);
}
mutex_unlock(&device->dev_set->lock);
@@ -1146,13 +1158,19 @@ static struct file *vfio_device_open(struct vfio_device *device)
err_close_device:
mutex_lock(&device->dev_set->lock);
down_read(&device->group->group_rwsem);
- if (device->open_count == 1 && device->ops->close_device)
+ if (device->open_count == 1 && device->ops->close_device) {
device->ops->close_device(device);
+
+ iommu_driver = device->group->container->iommu_driver;
+ if (iommu_driver && iommu_driver->ops->unregister_device)
+ iommu_driver->ops->unregister_device(
+ device->group->container->iommu_data, device);
+ }
err_undo_count:
+ up_read(&device->group->group_rwsem);
device->open_count--;
if (device->open_count == 0 && device->kvm)
device->kvm = NULL;
- up_read(&device->group->group_rwsem);
mutex_unlock(&device->dev_set->lock);
module_put(device->dev->driver->owner);
err_unassign_container:
@@ -1342,12 +1360,18 @@ static const struct file_operations vfio_group_fops = {
static int vfio_device_fops_release(struct inode *inode, struct file *filep)
{
struct vfio_device *device = filep->private_data;
+ struct vfio_iommu_driver *iommu_driver;
mutex_lock(&device->dev_set->lock);
vfio_assert_device_open(device);
down_read(&device->group->group_rwsem);
if (device->open_count == 1 && device->ops->close_device)
device->ops->close_device(device);
+
+ iommu_driver = device->group->container->iommu_driver;
+ if (iommu_driver && iommu_driver->ops->unregister_device)
+ iommu_driver->ops->unregister_device(
+ device->group->container->iommu_data, device);
up_read(&device->group->group_rwsem);
device->open_count--;
if (device->open_count == 0)
@@ -1544,8 +1568,7 @@ vfio_ioctl_device_feature_mig_device_state(struct vfio_device *device,
struct file *filp = NULL;
int ret;
- if (!device->ops->migration_set_state ||
- !device->ops->migration_get_state)
+ if (!device->mig_ops)
return -ENOTTY;
ret = vfio_check_feature(flags, argsz,
@@ -1561,7 +1584,8 @@ vfio_ioctl_device_feature_mig_device_state(struct vfio_device *device,
if (flags & VFIO_DEVICE_FEATURE_GET) {
enum vfio_device_mig_state curr_state;
- ret = device->ops->migration_get_state(device, &curr_state);
+ ret = device->mig_ops->migration_get_state(device,
+ &curr_state);
if (ret)
return ret;
mig.device_state = curr_state;
@@ -1569,7 +1593,7 @@ vfio_ioctl_device_feature_mig_device_state(struct vfio_device *device,
}
/* Handle the VFIO_DEVICE_FEATURE_SET */
- filp = device->ops->migration_set_state(device, mig.device_state);
+ filp = device->mig_ops->migration_set_state(device, mig.device_state);
if (IS_ERR(filp) || !filp)
goto out_copy;
@@ -1592,8 +1616,7 @@ static int vfio_ioctl_device_feature_migration(struct vfio_device *device,
};
int ret;
- if (!device->ops->migration_set_state ||
- !device->ops->migration_get_state)
+ if (!device->mig_ops)
return -ENOTTY;
ret = vfio_check_feature(flags, argsz, VFIO_DEVICE_FEATURE_GET,
@@ -1815,6 +1838,7 @@ struct vfio_info_cap_header *vfio_info_cap_add(struct vfio_info_cap *caps,
buf = krealloc(caps->buf, caps->size + size, GFP_KERNEL);
if (!buf) {
kfree(caps->buf);
+ caps->buf = NULL;
caps->size = 0;
return ERR_PTR(-ENOMEM);
}
@@ -1913,26 +1937,25 @@ int vfio_set_irqs_validate_and_prepare(struct vfio_irq_set *hdr, int num_irqs,
EXPORT_SYMBOL(vfio_set_irqs_validate_and_prepare);
/*
- * Pin a set of guest PFNs and return their associated host PFNs for local
+ * Pin contiguous user pages and return their associated host pages for local
* domain only.
* @device [in] : device
- * @user_pfn [in]: array of user/guest PFNs to be pinned.
- * @npage [in] : count of elements in user_pfn array. This count should not
- * be greater VFIO_PIN_PAGES_MAX_ENTRIES.
+ * @iova [in] : starting IOVA of user pages to be pinned.
+ * @npage [in] : count of pages to be pinned. This count should not
+ * be greater than VFIO_PIN_PAGES_MAX_ENTRIES.
* @prot [in] : protection flags
- * @phys_pfn[out]: array of host PFNs
+ * @pages[out] : array of host pages
* Return error or number of pages pinned.
*/
-int vfio_pin_pages(struct vfio_device *device, unsigned long *user_pfn,
- int npage, int prot, unsigned long *phys_pfn)
+int vfio_pin_pages(struct vfio_device *device, dma_addr_t iova,
+ int npage, int prot, struct page **pages)
{
struct vfio_container *container;
struct vfio_group *group = device->group;
struct vfio_iommu_driver *driver;
int ret;
- if (!user_pfn || !phys_pfn || !npage ||
- !vfio_assert_device_open(device))
+ if (!pages || !npage || !vfio_assert_device_open(device))
return -EINVAL;
if (npage > VFIO_PIN_PAGES_MAX_ENTRIES)
@@ -1946,8 +1969,8 @@ int vfio_pin_pages(struct vfio_device *device, unsigned long *user_pfn,
driver = container->iommu_driver;
if (likely(driver && driver->ops->pin_pages))
ret = driver->ops->pin_pages(container->iommu_data,
- group->iommu_group, user_pfn,
- npage, prot, phys_pfn);
+ group->iommu_group, iova,
+ npage, prot, pages);
else
ret = -ENOTTY;
@@ -1956,37 +1979,28 @@ int vfio_pin_pages(struct vfio_device *device, unsigned long *user_pfn,
EXPORT_SYMBOL(vfio_pin_pages);
/*
- * Unpin set of host PFNs for local domain only.
+ * Unpin contiguous host pages for local domain only.
* @device [in] : device
- * @user_pfn [in]: array of user/guest PFNs to be unpinned. Number of user/guest
- * PFNs should not be greater than VFIO_PIN_PAGES_MAX_ENTRIES.
- * @npage [in] : count of elements in user_pfn array. This count should not
+ * @iova [in] : starting address of user pages to be unpinned.
+ * @npage [in] : count of pages to be unpinned. This count should not
* be greater than VFIO_PIN_PAGES_MAX_ENTRIES.
- * Return error or number of pages unpinned.
*/
-int vfio_unpin_pages(struct vfio_device *device, unsigned long *user_pfn,
- int npage)
+void vfio_unpin_pages(struct vfio_device *device, dma_addr_t iova, int npage)
{
struct vfio_container *container;
struct vfio_iommu_driver *driver;
- int ret;
- if (!user_pfn || !npage || !vfio_assert_device_open(device))
- return -EINVAL;
+ if (WARN_ON(npage <= 0 || npage > VFIO_PIN_PAGES_MAX_ENTRIES))
+ return;
- if (npage > VFIO_PIN_PAGES_MAX_ENTRIES)
- return -E2BIG;
+ if (WARN_ON(!vfio_assert_device_open(device)))
+ return;
/* group->container cannot change while a vfio device is open */
container = device->group->container;
driver = container->iommu_driver;
- if (likely(driver && driver->ops->unpin_pages))
- ret = driver->ops->unpin_pages(container->iommu_data, user_pfn,
- npage);
- else
- ret = -ENOTTY;
- return ret;
+ driver->ops->unpin_pages(container->iommu_data, iova, npage);
}
EXPORT_SYMBOL(vfio_unpin_pages);
@@ -2001,13 +2015,13 @@ EXPORT_SYMBOL(vfio_unpin_pages);
* not a real device DMA, it is not necessary to pin the user space memory.
*
* @device [in] : VFIO device
- * @user_iova [in] : base IOVA of a user space buffer
+ * @iova [in] : base IOVA of a user space buffer
* @data [in] : pointer to kernel buffer
* @len [in] : kernel buffer length
* @write : indicate read or write
* Return error code on failure or 0 on success.
*/
-int vfio_dma_rw(struct vfio_device *device, dma_addr_t user_iova, void *data,
+int vfio_dma_rw(struct vfio_device *device, dma_addr_t iova, void *data,
size_t len, bool write)
{
struct vfio_container *container;
@@ -2023,97 +2037,13 @@ int vfio_dma_rw(struct vfio_device *device, dma_addr_t user_iova, void *data,
if (likely(driver && driver->ops->dma_rw))
ret = driver->ops->dma_rw(container->iommu_data,
- user_iova, data, len, write);
+ iova, data, len, write);
else
ret = -ENOTTY;
return ret;
}
EXPORT_SYMBOL(vfio_dma_rw);
-static int vfio_register_iommu_notifier(struct vfio_group *group,
- unsigned long *events,
- struct notifier_block *nb)
-{
- struct vfio_container *container;
- struct vfio_iommu_driver *driver;
- int ret;
-
- lockdep_assert_held_read(&group->group_rwsem);
-
- container = group->container;
- driver = container->iommu_driver;
- if (likely(driver && driver->ops->register_notifier))
- ret = driver->ops->register_notifier(container->iommu_data,
- events, nb);
- else
- ret = -ENOTTY;
-
- return ret;
-}
-
-static int vfio_unregister_iommu_notifier(struct vfio_group *group,
- struct notifier_block *nb)
-{
- struct vfio_container *container;
- struct vfio_iommu_driver *driver;
- int ret;
-
- lockdep_assert_held_read(&group->group_rwsem);
-
- container = group->container;
- driver = container->iommu_driver;
- if (likely(driver && driver->ops->unregister_notifier))
- ret = driver->ops->unregister_notifier(container->iommu_data,
- nb);
- else
- ret = -ENOTTY;
-
- return ret;
-}
-
-int vfio_register_notifier(struct vfio_device *device,
- enum vfio_notify_type type, unsigned long *events,
- struct notifier_block *nb)
-{
- struct vfio_group *group = device->group;
- int ret;
-
- if (!nb || !events || (*events == 0) ||
- !vfio_assert_device_open(device))
- return -EINVAL;
-
- switch (type) {
- case VFIO_IOMMU_NOTIFY:
- ret = vfio_register_iommu_notifier(group, events, nb);
- break;
- default:
- ret = -EINVAL;
- }
- return ret;
-}
-EXPORT_SYMBOL(vfio_register_notifier);
-
-int vfio_unregister_notifier(struct vfio_device *device,
- enum vfio_notify_type type,
- struct notifier_block *nb)
-{
- struct vfio_group *group = device->group;
- int ret;
-
- if (!nb || !vfio_assert_device_open(device))
- return -EINVAL;
-
- switch (type) {
- case VFIO_IOMMU_NOTIFY:
- ret = vfio_unregister_iommu_notifier(group, nb);
- break;
- default:
- ret = -EINVAL;
- }
- return ret;
-}
-EXPORT_SYMBOL(vfio_unregister_notifier);
-
/*
* Module/class support
*/
@@ -2159,13 +2089,17 @@ static int __init vfio_init(void)
if (ret)
goto err_alloc_chrdev;
- pr_info(DRIVER_DESC " version: " DRIVER_VERSION "\n");
-
#ifdef CONFIG_VFIO_NOIOMMU
- vfio_register_iommu_driver(&vfio_noiommu_ops);
+ ret = vfio_register_iommu_driver(&vfio_noiommu_ops);
#endif
+ if (ret)
+ goto err_driver_register;
+
+ pr_info(DRIVER_DESC " version: " DRIVER_VERSION "\n");
return 0;
+err_driver_register:
+ unregister_chrdev_region(vfio.group_devt, MINORMASK + 1);
err_alloc_chrdev:
class_destroy(vfio.class);
vfio.class = NULL;
diff --git a/drivers/vfio/vfio.h b/drivers/vfio/vfio.h
index a67130221151..503bea6c843d 100644
--- a/drivers/vfio/vfio.h
+++ b/drivers/vfio/vfio.h
@@ -50,16 +50,15 @@ struct vfio_iommu_driver_ops {
struct iommu_group *group);
int (*pin_pages)(void *iommu_data,
struct iommu_group *group,
- unsigned long *user_pfn,
+ dma_addr_t user_iova,
int npage, int prot,
- unsigned long *phys_pfn);
- int (*unpin_pages)(void *iommu_data,
- unsigned long *user_pfn, int npage);
- int (*register_notifier)(void *iommu_data,
- unsigned long *events,
- struct notifier_block *nb);
- int (*unregister_notifier)(void *iommu_data,
- struct notifier_block *nb);
+ struct page **pages);
+ void (*unpin_pages)(void *iommu_data,
+ dma_addr_t user_iova, int npage);
+ void (*register_device)(void *iommu_data,
+ struct vfio_device *vdev);
+ void (*unregister_device)(void *iommu_data,
+ struct vfio_device *vdev);
int (*dma_rw)(void *iommu_data, dma_addr_t user_iova,
void *data, size_t count, bool write);
struct iommu_domain *(*group_iommu_domain)(void *iommu_data,
diff --git a/drivers/vfio/vfio_iommu_spapr_tce.c b/drivers/vfio/vfio_iommu_spapr_tce.c
index 708a95e61831..169f07ac162d 100644
--- a/drivers/vfio/vfio_iommu_spapr_tce.c
+++ b/drivers/vfio/vfio_iommu_spapr_tce.c
@@ -378,8 +378,7 @@ static void tce_iommu_release(void *iommu_data)
kfree(container);
}
-static void tce_iommu_unuse_page(struct tce_container *container,
- unsigned long hpa)
+static void tce_iommu_unuse_page(unsigned long hpa)
{
struct page *page;
@@ -474,7 +473,7 @@ static int tce_iommu_clear(struct tce_container *container,
continue;
}
- tce_iommu_unuse_page(container, oldhpa);
+ tce_iommu_unuse_page(oldhpa);
}
iommu_tce_kill(tbl, firstentry, pages);
@@ -524,7 +523,7 @@ static long tce_iommu_build(struct tce_container *container,
ret = iommu_tce_xchg_no_kill(container->mm, tbl, entry + i,
&hpa, &dirtmp);
if (ret) {
- tce_iommu_unuse_page(container, hpa);
+ tce_iommu_unuse_page(hpa);
pr_err("iommu_tce: %s failed ioba=%lx, tce=%lx, ret=%ld\n",
__func__, entry << tbl->it_page_shift,
tce, ret);
@@ -532,7 +531,7 @@ static long tce_iommu_build(struct tce_container *container,
}
if (dirtmp != DMA_NONE)
- tce_iommu_unuse_page(container, hpa);
+ tce_iommu_unuse_page(hpa);
tce += IOMMU_PAGE_SIZE(tbl);
}
@@ -1266,7 +1265,10 @@ static int tce_iommu_attach_group(void *iommu_data,
goto unlock_exit;
}
- /* Check if new group has the same iommu_ops (i.e. compatible) */
+ /*
+ * Check if new group has the same iommu_table_group_ops
+ * (i.e. compatible)
+ */
list_for_each_entry(tcegrp, &container->group_list, next) {
struct iommu_table_group *table_group_tmp;
diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
index c13b9290e357..db516c90a977 100644
--- a/drivers/vfio/vfio_iommu_type1.c
+++ b/drivers/vfio/vfio_iommu_type1.c
@@ -67,7 +67,8 @@ struct vfio_iommu {
struct list_head iova_list;
struct mutex lock;
struct rb_root dma_list;
- struct blocking_notifier_head notifier;
+ struct list_head device_list;
+ struct mutex device_list_lock;
unsigned int dma_avail;
unsigned int vaddr_invalid_count;
uint64_t pgsize_bitmap;
@@ -828,9 +829,9 @@ static int vfio_unpin_page_external(struct vfio_dma *dma, dma_addr_t iova,
static int vfio_iommu_type1_pin_pages(void *iommu_data,
struct iommu_group *iommu_group,
- unsigned long *user_pfn,
+ dma_addr_t user_iova,
int npage, int prot,
- unsigned long *phys_pfn)
+ struct page **pages)
{
struct vfio_iommu *iommu = iommu_data;
struct vfio_iommu_group *group;
@@ -840,7 +841,7 @@ static int vfio_iommu_type1_pin_pages(void *iommu_data,
bool do_accounting;
dma_addr_t iova;
- if (!iommu || !user_pfn || !phys_pfn)
+ if (!iommu || !pages)
return -EINVAL;
/* Supported for v2 version only */
@@ -856,7 +857,7 @@ static int vfio_iommu_type1_pin_pages(void *iommu_data,
again:
if (iommu->vaddr_invalid_count) {
for (i = 0; i < npage; i++) {
- iova = user_pfn[i] << PAGE_SHIFT;
+ iova = user_iova + PAGE_SIZE * i;
ret = vfio_find_dma_valid(iommu, iova, PAGE_SIZE, &dma);
if (ret < 0)
goto pin_done;
@@ -865,8 +866,8 @@ again:
}
}
- /* Fail if notifier list is empty */
- if (!iommu->notifier.head) {
+ /* Fail if no dma_umap notifier is registered */
+ if (list_empty(&iommu->device_list)) {
ret = -EINVAL;
goto pin_done;
}
@@ -879,9 +880,10 @@ again:
do_accounting = list_empty(&iommu->domain_list);
for (i = 0; i < npage; i++) {
+ unsigned long phys_pfn;
struct vfio_pfn *vpfn;
- iova = user_pfn[i] << PAGE_SHIFT;
+ iova = user_iova + PAGE_SIZE * i;
dma = vfio_find_dma(iommu, iova, PAGE_SIZE);
if (!dma) {
ret = -EINVAL;
@@ -895,23 +897,25 @@ again:
vpfn = vfio_iova_get_vfio_pfn(dma, iova);
if (vpfn) {
- phys_pfn[i] = vpfn->pfn;
+ pages[i] = pfn_to_page(vpfn->pfn);
continue;
}
remote_vaddr = dma->vaddr + (iova - dma->iova);
- ret = vfio_pin_page_external(dma, remote_vaddr, &phys_pfn[i],
+ ret = vfio_pin_page_external(dma, remote_vaddr, &phys_pfn,
do_accounting);
if (ret)
goto pin_unwind;
- ret = vfio_add_to_pfn_list(dma, iova, phys_pfn[i]);
+ ret = vfio_add_to_pfn_list(dma, iova, phys_pfn);
if (ret) {
- if (put_pfn(phys_pfn[i], dma->prot) && do_accounting)
+ if (put_pfn(phys_pfn, dma->prot) && do_accounting)
vfio_lock_acct(dma, -1, true);
goto pin_unwind;
}
+ pages[i] = pfn_to_page(phys_pfn);
+
if (iommu->dirty_page_tracking) {
unsigned long pgshift = __ffs(iommu->pgsize_bitmap);
@@ -934,43 +938,38 @@ again:
goto pin_done;
pin_unwind:
- phys_pfn[i] = 0;
+ pages[i] = NULL;
for (j = 0; j < i; j++) {
dma_addr_t iova;
- iova = user_pfn[j] << PAGE_SHIFT;
+ iova = user_iova + PAGE_SIZE * j;
dma = vfio_find_dma(iommu, iova, PAGE_SIZE);
vfio_unpin_page_external(dma, iova, do_accounting);
- phys_pfn[j] = 0;
+ pages[j] = NULL;
}
pin_done:
mutex_unlock(&iommu->lock);
return ret;
}
-static int vfio_iommu_type1_unpin_pages(void *iommu_data,
- unsigned long *user_pfn,
- int npage)
+static void vfio_iommu_type1_unpin_pages(void *iommu_data,
+ dma_addr_t user_iova, int npage)
{
struct vfio_iommu *iommu = iommu_data;
bool do_accounting;
int i;
- if (!iommu || !user_pfn || npage <= 0)
- return -EINVAL;
-
/* Supported for v2 version only */
- if (!iommu->v2)
- return -EACCES;
+ if (WARN_ON(!iommu->v2))
+ return;
mutex_lock(&iommu->lock);
do_accounting = list_empty(&iommu->domain_list);
for (i = 0; i < npage; i++) {
+ dma_addr_t iova = user_iova + PAGE_SIZE * i;
struct vfio_dma *dma;
- dma_addr_t iova;
- iova = user_pfn[i] << PAGE_SHIFT;
dma = vfio_find_dma(iommu, iova, PAGE_SIZE);
if (!dma)
break;
@@ -979,7 +978,8 @@ static int vfio_iommu_type1_unpin_pages(void *iommu_data,
}
mutex_unlock(&iommu->lock);
- return i > 0 ? i : -EINVAL;
+
+ WARN_ON(i != npage);
}
static long vfio_sync_unpin(struct vfio_dma *dma, struct vfio_domain *domain,
@@ -1287,6 +1287,35 @@ static int verify_bitmap_size(uint64_t npages, uint64_t bitmap_size)
return 0;
}
+/*
+ * Notify VFIO drivers using vfio_register_emulated_iommu_dev() to invalidate
+ * and unmap iovas within the range we're about to unmap. Drivers MUST unpin
+ * pages in response to an invalidation.
+ */
+static void vfio_notify_dma_unmap(struct vfio_iommu *iommu,
+ struct vfio_dma *dma)
+{
+ struct vfio_device *device;
+
+ if (list_empty(&iommu->device_list))
+ return;
+
+ /*
+ * The device is expected to call vfio_unpin_pages() for any IOVA it has
+ * pinned within the range. Since vfio_unpin_pages() will eventually
+ * call back down to this code and try to obtain the iommu->lock we must
+ * drop it.
+ */
+ mutex_lock(&iommu->device_list_lock);
+ mutex_unlock(&iommu->lock);
+
+ list_for_each_entry(device, &iommu->device_list, iommu_entry)
+ device->ops->dma_unmap(device, dma->iova, dma->size);
+
+ mutex_unlock(&iommu->device_list_lock);
+ mutex_lock(&iommu->lock);
+}
+
static int vfio_dma_do_unmap(struct vfio_iommu *iommu,
struct vfio_iommu_type1_dma_unmap *unmap,
struct vfio_bitmap *bitmap)
@@ -1377,12 +1406,6 @@ again:
if (!iommu->v2 && iova > dma->iova)
break;
- /*
- * Task with same address space who mapped this iova range is
- * allowed to unmap the iova range.
- */
- if (dma->task->mm != current->mm)
- break;
if (invalidate_vaddr) {
if (dma->vaddr_invalid) {
@@ -1406,8 +1429,6 @@ again:
}
if (!RB_EMPTY_ROOT(&dma->pfn_list)) {
- struct vfio_iommu_type1_dma_unmap nb_unmap;
-
if (dma_last == dma) {
BUG_ON(++retries > 10);
} else {
@@ -1415,20 +1436,7 @@ again:
retries = 0;
}
- nb_unmap.iova = dma->iova;
- nb_unmap.size = dma->size;
-
- /*
- * Notify anyone (mdev vendor drivers) to invalidate and
- * unmap iovas within the range we're about to unmap.
- * Vendor drivers MUST unpin pages in response to an
- * invalidation.
- */
- mutex_unlock(&iommu->lock);
- blocking_notifier_call_chain(&iommu->notifier,
- VFIO_IOMMU_NOTIFY_DMA_UNMAP,
- &nb_unmap);
- mutex_lock(&iommu->lock);
+ vfio_notify_dma_unmap(iommu, dma);
goto again;
}
@@ -1679,18 +1687,6 @@ out_unlock:
return ret;
}
-static int vfio_bus_type(struct device *dev, void *data)
-{
- struct bus_type **bus = data;
-
- if (*bus && *bus != dev->bus)
- return -EINVAL;
-
- *bus = dev->bus;
-
- return 0;
-}
-
static int vfio_iommu_replay(struct vfio_iommu *iommu,
struct vfio_domain *domain)
{
@@ -2153,13 +2149,26 @@ static void vfio_iommu_iova_insert_copy(struct vfio_iommu *iommu,
list_splice_tail(iova_copy, iova);
}
+/* Redundantly walks non-present capabilities to simplify caller */
+static int vfio_iommu_device_capable(struct device *dev, void *data)
+{
+ return device_iommu_capable(dev, (enum iommu_cap)data);
+}
+
+static int vfio_iommu_domain_alloc(struct device *dev, void *data)
+{
+ struct iommu_domain **domain = data;
+
+ *domain = iommu_domain_alloc(dev->bus);
+ return 1; /* Don't iterate */
+}
+
static int vfio_iommu_type1_attach_group(void *iommu_data,
struct iommu_group *iommu_group, enum vfio_group_type type)
{
struct vfio_iommu *iommu = iommu_data;
struct vfio_iommu_group *group;
struct vfio_domain *domain, *d;
- struct bus_type *bus = NULL;
bool resv_msi, msi_remap;
phys_addr_t resv_msi_base = 0;
struct iommu_domain_geometry *geo;
@@ -2192,18 +2201,19 @@ static int vfio_iommu_type1_attach_group(void *iommu_data,
goto out_unlock;
}
- /* Determine bus_type in order to allocate a domain */
- ret = iommu_group_for_each_dev(iommu_group, &bus, vfio_bus_type);
- if (ret)
- goto out_free_group;
-
ret = -ENOMEM;
domain = kzalloc(sizeof(*domain), GFP_KERNEL);
if (!domain)
goto out_free_group;
+ /*
+ * Going via the iommu_group iterator avoids races, and trivially gives
+ * us a representative device for the IOMMU API call. We don't actually
+ * want to iterate beyond the first device (if any).
+ */
ret = -EIO;
- domain->domain = iommu_domain_alloc(bus);
+ iommu_group_for_each_dev(iommu_group, &domain->domain,
+ vfio_iommu_domain_alloc);
if (!domain->domain)
goto out_free_domain;
@@ -2258,7 +2268,8 @@ static int vfio_iommu_type1_attach_group(void *iommu_data,
list_add(&group->next, &domain->group_list);
msi_remap = irq_domain_check_msi_remap() ||
- iommu_capable(bus, IOMMU_CAP_INTR_REMAP);
+ iommu_group_for_each_dev(iommu_group, (void *)IOMMU_CAP_INTR_REMAP,
+ vfio_iommu_device_capable);
if (!allow_unsafe_interrupts && !msi_remap) {
pr_warn("%s: No interrupt remapping support. Use the module param \"allow_unsafe_interrupts\" to enable VFIO IOMMU support on this platform\n",
@@ -2478,7 +2489,7 @@ static void vfio_iommu_type1_detach_group(void *iommu_data,
if (list_empty(&iommu->emulated_iommu_groups) &&
list_empty(&iommu->domain_list)) {
- WARN_ON(iommu->notifier.head);
+ WARN_ON(!list_empty(&iommu->device_list));
vfio_iommu_unmap_unpin_all(iommu);
}
goto detach_group_done;
@@ -2510,7 +2521,8 @@ static void vfio_iommu_type1_detach_group(void *iommu_data,
if (list_empty(&domain->group_list)) {
if (list_is_singular(&iommu->domain_list)) {
if (list_empty(&iommu->emulated_iommu_groups)) {
- WARN_ON(iommu->notifier.head);
+ WARN_ON(!list_empty(
+ &iommu->device_list));
vfio_iommu_unmap_unpin_all(iommu);
} else {
vfio_iommu_unmap_unpin_reaccount(iommu);
@@ -2571,7 +2583,8 @@ static void *vfio_iommu_type1_open(unsigned long arg)
iommu->dma_avail = dma_entry_limit;
iommu->container_open = true;
mutex_init(&iommu->lock);
- BLOCKING_INIT_NOTIFIER_HEAD(&iommu->notifier);
+ mutex_init(&iommu->device_list_lock);
+ INIT_LIST_HEAD(&iommu->device_list);
init_waitqueue_head(&iommu->vaddr_wait);
iommu->pgsize_bitmap = PAGE_MASK;
INIT_LIST_HEAD(&iommu->emulated_iommu_groups);
@@ -3008,28 +3021,40 @@ static long vfio_iommu_type1_ioctl(void *iommu_data,
}
}
-static int vfio_iommu_type1_register_notifier(void *iommu_data,
- unsigned long *events,
- struct notifier_block *nb)
+static void vfio_iommu_type1_register_device(void *iommu_data,
+ struct vfio_device *vdev)
{
struct vfio_iommu *iommu = iommu_data;
- /* clear known events */
- *events &= ~VFIO_IOMMU_NOTIFY_DMA_UNMAP;
-
- /* refuse to register if still events remaining */
- if (*events)
- return -EINVAL;
+ if (!vdev->ops->dma_unmap)
+ return;
- return blocking_notifier_chain_register(&iommu->notifier, nb);
+ /*
+ * list_empty(&iommu->device_list) is tested under the iommu->lock while
+ * iteration for dma_unmap must be done under the device_list_lock.
+ * Holding both locks here allows avoiding the device_list_lock in
+ * several fast paths. See vfio_notify_dma_unmap()
+ */
+ mutex_lock(&iommu->lock);
+ mutex_lock(&iommu->device_list_lock);
+ list_add(&vdev->iommu_entry, &iommu->device_list);
+ mutex_unlock(&iommu->device_list_lock);
+ mutex_unlock(&iommu->lock);
}
-static int vfio_iommu_type1_unregister_notifier(void *iommu_data,
- struct notifier_block *nb)
+static void vfio_iommu_type1_unregister_device(void *iommu_data,
+ struct vfio_device *vdev)
{
struct vfio_iommu *iommu = iommu_data;
- return blocking_notifier_chain_unregister(&iommu->notifier, nb);
+ if (!vdev->ops->dma_unmap)
+ return;
+
+ mutex_lock(&iommu->lock);
+ mutex_lock(&iommu->device_list_lock);
+ list_del(&vdev->iommu_entry);
+ mutex_unlock(&iommu->device_list_lock);
+ mutex_unlock(&iommu->lock);
}
static int vfio_iommu_type1_dma_rw_chunk(struct vfio_iommu *iommu,
@@ -3163,8 +3188,8 @@ static const struct vfio_iommu_driver_ops vfio_iommu_driver_ops_type1 = {
.detach_group = vfio_iommu_type1_detach_group,
.pin_pages = vfio_iommu_type1_pin_pages,
.unpin_pages = vfio_iommu_type1_unpin_pages,
- .register_notifier = vfio_iommu_type1_register_notifier,
- .unregister_notifier = vfio_iommu_type1_unregister_notifier,
+ .register_device = vfio_iommu_type1_register_device,
+ .unregister_device = vfio_iommu_type1_unregister_device,
.dma_rw = vfio_iommu_type1_dma_rw,
.group_iommu_domain = vfio_iommu_type1_group_iommu_domain,
.notify = vfio_iommu_type1_notify,