From d1877e639bc6bf1c3131eda3f9ede73f8da96c22 Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Wed, 8 Jun 2022 12:55:13 -0600 Subject: vfio: de-extern-ify function prototypes The use of 'extern' in function prototypes has been disrecommended in the kernel coding style for several years now, remove them from all vfio related files so contributors no longer need to decide between style and consistency. Reviewed-by: Kevin Tian Reviewed-by: Jason Gunthorpe Reviewed-by: Eric Farman Reviewed-by: Eric Auger Reviewed-by: Cornelia Huck Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/165471414407.203056.474032786990662279.stgit@omen Signed-off-by: Alex Williamson --- drivers/vfio/fsl-mc/vfio_fsl_mc_private.h | 2 +- drivers/vfio/platform/vfio_platform_private.h | 21 ++++++++++----------- 2 files changed, 11 insertions(+), 12 deletions(-) (limited to 'drivers/vfio') diff --git a/drivers/vfio/fsl-mc/vfio_fsl_mc_private.h b/drivers/vfio/fsl-mc/vfio_fsl_mc_private.h index 4ad63ececb91..7a29f572f93d 100644 --- a/drivers/vfio/fsl-mc/vfio_fsl_mc_private.h +++ b/drivers/vfio/fsl-mc/vfio_fsl_mc_private.h @@ -39,7 +39,7 @@ struct vfio_fsl_mc_device { struct vfio_fsl_mc_irq *mc_irqs; }; -extern int vfio_fsl_mc_set_irqs_ioctl(struct vfio_fsl_mc_device *vdev, +int vfio_fsl_mc_set_irqs_ioctl(struct vfio_fsl_mc_device *vdev, u32 flags, unsigned int index, unsigned int start, unsigned int count, void *data); diff --git a/drivers/vfio/platform/vfio_platform_private.h b/drivers/vfio/platform/vfio_platform_private.h index 520d2a8e8375..691b43f4b2b2 100644 --- a/drivers/vfio/platform/vfio_platform_private.h +++ b/drivers/vfio/platform/vfio_platform_private.h @@ -78,21 +78,20 @@ struct vfio_platform_reset_node { vfio_platform_reset_fn_t of_reset; }; -extern int vfio_platform_probe_common(struct vfio_platform_device *vdev, - struct device *dev); +int vfio_platform_probe_common(struct vfio_platform_device *vdev, + struct device *dev); void vfio_platform_remove_common(struct vfio_platform_device *vdev); -extern int vfio_platform_irq_init(struct vfio_platform_device *vdev); -extern void vfio_platform_irq_cleanup(struct vfio_platform_device *vdev); +int vfio_platform_irq_init(struct vfio_platform_device *vdev); +void vfio_platform_irq_cleanup(struct vfio_platform_device *vdev); -extern int vfio_platform_set_irqs_ioctl(struct vfio_platform_device *vdev, - uint32_t flags, unsigned index, - unsigned start, unsigned count, - void *data); +int vfio_platform_set_irqs_ioctl(struct vfio_platform_device *vdev, + uint32_t flags, unsigned index, + unsigned start, unsigned count, void *data); -extern void __vfio_platform_register_reset(struct vfio_platform_reset_node *n); -extern void vfio_platform_unregister_reset(const char *compat, - vfio_platform_reset_fn_t fn); +void __vfio_platform_register_reset(struct vfio_platform_reset_node *n); +void vfio_platform_unregister_reset(const char *compat, + vfio_platform_reset_fn_t fn); #define vfio_platform_register_reset(__compat, __reset) \ static struct vfio_platform_reset_node __reset ## _node = { \ .owner = THIS_MODULE, \ -- cgit v1.2.3-58-ga151 From eed20c782aea57b7efb42af2905dc381268b21e9 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Fri, 24 Jun 2022 18:51:44 +0100 Subject: vfio/type1: Simplify bus_type determination Since IOMMU groups are mandatory for drivers to support, it stands to reason that any device which has been successfully added to a group must be on a bus supported by that IOMMU driver, and therefore a domain viable for any device in the group must be viable for all devices in the group. This already has to be the case for the IOMMU API's internal default domain, for instance. Thus even if the group contains devices on different buses, that can only mean that the IOMMU driver actually supports such an odd topology, and so without loss of generality we can expect the bus type of any device in a group to be suitable for IOMMU API calls. Furthermore, scrutiny reveals a lack of protection for the bus being removed while vfio_iommu_type1_attach_group() is using it; the reference that VFIO holds on the iommu_group ensures that data remains valid, but does not prevent the group's membership changing underfoot. We can address both concerns by recycling vfio_bus_type() into some superficially similar logic to indirect the IOMMU API calls themselves. Each call is thus protected from races by the IOMMU group's own locking, and we no longer need to hold group-derived pointers beyond that scope. It also gives us an easy path for the IOMMU API's migration of bus-based interfaces to device-based, of which we can already take the first step with device_iommu_capable(). As with domains, any capability must in practice be consistent for devices in a given group - and after all it's still the same capability which was expected to be consistent across an entire bus! - so there's no need for any complicated validation. Signed-off-by: Robin Murphy Reviewed-by: Kevin Tian Reviewed-by: Jason Gunthorpe Link: https://lore.kernel.org/r/194a12d3434d7b38f84fa96503c7664451c8c395.1656092606.git.robin.murphy@arm.com [aw: add comment to vfio_iommu_device_capable()] Signed-off-by: Alex Williamson --- drivers/vfio/vfio_iommu_type1.c | 43 ++++++++++++++++++++++------------------- 1 file changed, 23 insertions(+), 20 deletions(-) (limited to 'drivers/vfio') diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c index c13b9290e357..c496b7d0b96f 100644 --- a/drivers/vfio/vfio_iommu_type1.c +++ b/drivers/vfio/vfio_iommu_type1.c @@ -1679,18 +1679,6 @@ out_unlock: return ret; } -static int vfio_bus_type(struct device *dev, void *data) -{ - struct bus_type **bus = data; - - if (*bus && *bus != dev->bus) - return -EINVAL; - - *bus = dev->bus; - - return 0; -} - static int vfio_iommu_replay(struct vfio_iommu *iommu, struct vfio_domain *domain) { @@ -2153,13 +2141,26 @@ static void vfio_iommu_iova_insert_copy(struct vfio_iommu *iommu, list_splice_tail(iova_copy, iova); } +/* Redundantly walks non-present capabilities to simplify caller */ +static int vfio_iommu_device_capable(struct device *dev, void *data) +{ + return device_iommu_capable(dev, (enum iommu_cap)data); +} + +static int vfio_iommu_domain_alloc(struct device *dev, void *data) +{ + struct iommu_domain **domain = data; + + *domain = iommu_domain_alloc(dev->bus); + return 1; /* Don't iterate */ +} + static int vfio_iommu_type1_attach_group(void *iommu_data, struct iommu_group *iommu_group, enum vfio_group_type type) { struct vfio_iommu *iommu = iommu_data; struct vfio_iommu_group *group; struct vfio_domain *domain, *d; - struct bus_type *bus = NULL; bool resv_msi, msi_remap; phys_addr_t resv_msi_base = 0; struct iommu_domain_geometry *geo; @@ -2192,18 +2193,19 @@ static int vfio_iommu_type1_attach_group(void *iommu_data, goto out_unlock; } - /* Determine bus_type in order to allocate a domain */ - ret = iommu_group_for_each_dev(iommu_group, &bus, vfio_bus_type); - if (ret) - goto out_free_group; - ret = -ENOMEM; domain = kzalloc(sizeof(*domain), GFP_KERNEL); if (!domain) goto out_free_group; + /* + * Going via the iommu_group iterator avoids races, and trivially gives + * us a representative device for the IOMMU API call. We don't actually + * want to iterate beyond the first device (if any). + */ ret = -EIO; - domain->domain = iommu_domain_alloc(bus); + iommu_group_for_each_dev(iommu_group, &domain->domain, + vfio_iommu_domain_alloc); if (!domain->domain) goto out_free_domain; @@ -2258,7 +2260,8 @@ static int vfio_iommu_type1_attach_group(void *iommu_data, list_add(&group->next, &domain->group_list); msi_remap = irq_domain_check_msi_remap() || - iommu_capable(bus, IOMMU_CAP_INTR_REMAP); + iommu_group_for_each_dev(iommu_group, (void *)IOMMU_CAP_INTR_REMAP, + vfio_iommu_device_capable); if (!allow_unsafe_interrupts && !msi_remap) { pr_warn("%s: No interrupt remapping support. Use the module param \"allow_unsafe_interrupts\" to enable VFIO IOMMU support on this platform\n", -- cgit v1.2.3-58-ga151 From 3b498b6656214d499d57f1e4935448821d0febf9 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Fri, 24 Jun 2022 18:59:35 +0100 Subject: vfio: Use device_iommu_capable() Use the new interface to check the capabilities for our device specifically. Reviewed-by: Lu Baolu Signed-off-by: Robin Murphy Reviewed-by: Kevin Tian Reviewed-by: Jason Gunthorpe Link: https://lore.kernel.org/r/4ea5eb64246f1ee188d1a61c3e93b37756932eb7.1656092606.git.robin.murphy@arm.com Signed-off-by: Alex Williamson --- drivers/vfio/vfio.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/vfio') diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c index 61e71c1154be..4c06b571eaba 100644 --- a/drivers/vfio/vfio.c +++ b/drivers/vfio/vfio.c @@ -605,7 +605,7 @@ int vfio_register_group_dev(struct vfio_device *device) * VFIO always sets IOMMU_CACHE because we offer no way for userspace to * restore cache coherency. */ - if (!iommu_capable(device->dev->bus, IOMMU_CAP_CACHE_COHERENCY)) + if (!device_iommu_capable(device->dev, IOMMU_CAP_CACHE_COHERENCY)) return -EINVAL; return __vfio_register_dev(device, -- cgit v1.2.3-58-ga151 From a13b1e472b93f69c35976351e59831564ed6a376 Mon Sep 17 00:00:00 2001 From: Bo Liu Date: Wed, 22 Jun 2022 00:56:51 -0400 Subject: vfio: check vfio_register_iommu_driver() return value As vfio_register_iommu_driver() can fail, we should check the return value. Signed-off-by: Bo Liu Acked-by: Cornelia Huck Link: https://lore.kernel.org/r/20220622045651.5416-1-liubo03@inspur.com Signed-off-by: Alex Williamson --- drivers/vfio/vfio.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) (limited to 'drivers/vfio') diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c index 61e71c1154be..8f435c0d7748 100644 --- a/drivers/vfio/vfio.c +++ b/drivers/vfio/vfio.c @@ -2156,13 +2156,17 @@ static int __init vfio_init(void) if (ret) goto err_alloc_chrdev; - pr_info(DRIVER_DESC " version: " DRIVER_VERSION "\n"); - #ifdef CONFIG_VFIO_NOIOMMU - vfio_register_iommu_driver(&vfio_noiommu_ops); + ret = vfio_register_iommu_driver(&vfio_noiommu_ops); #endif + if (ret) + goto err_driver_register; + + pr_info(DRIVER_DESC " version: " DRIVER_VERSION "\n"); return 0; +err_driver_register: + unregister_chrdev_region(vfio.group_devt, MINORMASK + 1); err_alloc_chrdev: class_destroy(vfio.class); vfio.class = NULL; -- cgit v1.2.3-58-ga151 From 2b1c1906286fa547845f5385ee72db74b2b0251d Mon Sep 17 00:00:00 2001 From: Yishai Hadas Date: Tue, 28 Jun 2022 18:59:09 +0300 Subject: vfio/mlx5: Protect mlx5vf_disable_fds() upon close device Protect mlx5vf_disable_fds() upon close device to be called under the state mutex as done in all other places. This will prevent a race with any other flow which calls mlx5vf_disable_fds() as of health/recovery upon MLX5_PF_NOTIFY_DISABLE_VF event. Encapsulate this functionality in a separate function named mlx5vf_cmd_close_migratable() to consider migration caps and for further usage upon close device. Fixes: 6fadb021266d ("vfio/mlx5: Implement vfio_pci driver for mlx5 devices") Reviewed-by: Kevin Tian Signed-off-by: Yishai Hadas Link: https://lore.kernel.org/r/20220628155910.171454-2-yishaih@nvidia.com Signed-off-by: Alex Williamson --- drivers/vfio/pci/mlx5/cmd.c | 10 ++++++++++ drivers/vfio/pci/mlx5/cmd.h | 1 + drivers/vfio/pci/mlx5/main.c | 2 +- 3 files changed, 12 insertions(+), 1 deletion(-) (limited to 'drivers/vfio') diff --git a/drivers/vfio/pci/mlx5/cmd.c b/drivers/vfio/pci/mlx5/cmd.c index 9b9f33ca270a..cdd0c667dc77 100644 --- a/drivers/vfio/pci/mlx5/cmd.c +++ b/drivers/vfio/pci/mlx5/cmd.c @@ -88,6 +88,16 @@ static int mlx5fv_vf_event(struct notifier_block *nb, return 0; } +void mlx5vf_cmd_close_migratable(struct mlx5vf_pci_core_device *mvdev) +{ + if (!mvdev->migrate_cap) + return; + + mutex_lock(&mvdev->state_mutex); + mlx5vf_disable_fds(mvdev); + mlx5vf_state_mutex_unlock(mvdev); +} + void mlx5vf_cmd_remove_migratable(struct mlx5vf_pci_core_device *mvdev) { if (!mvdev->migrate_cap) diff --git a/drivers/vfio/pci/mlx5/cmd.h b/drivers/vfio/pci/mlx5/cmd.h index 6c3112fdd8b1..aa692d9ce656 100644 --- a/drivers/vfio/pci/mlx5/cmd.h +++ b/drivers/vfio/pci/mlx5/cmd.h @@ -64,6 +64,7 @@ int mlx5vf_cmd_query_vhca_migration_state(struct mlx5vf_pci_core_device *mvdev, size_t *state_size); void mlx5vf_cmd_set_migratable(struct mlx5vf_pci_core_device *mvdev); void mlx5vf_cmd_remove_migratable(struct mlx5vf_pci_core_device *mvdev); +void mlx5vf_cmd_close_migratable(struct mlx5vf_pci_core_device *mvdev); int mlx5vf_cmd_save_vhca_state(struct mlx5vf_pci_core_device *mvdev, struct mlx5_vf_migration_file *migf); int mlx5vf_cmd_load_vhca_state(struct mlx5vf_pci_core_device *mvdev, diff --git a/drivers/vfio/pci/mlx5/main.c b/drivers/vfio/pci/mlx5/main.c index 0558d0649ddb..d754990f0662 100644 --- a/drivers/vfio/pci/mlx5/main.c +++ b/drivers/vfio/pci/mlx5/main.c @@ -570,7 +570,7 @@ static void mlx5vf_pci_close_device(struct vfio_device *core_vdev) struct mlx5vf_pci_core_device *mvdev = container_of( core_vdev, struct mlx5vf_pci_core_device, core_device.vdev); - mlx5vf_disable_fds(mvdev); + mlx5vf_cmd_close_migratable(mvdev); vfio_pci_core_close_device(core_vdev); } -- cgit v1.2.3-58-ga151 From 6e97eba8ad8748fabb795cffc5d9e1a7dcfd7367 Mon Sep 17 00:00:00 2001 From: Yishai Hadas Date: Tue, 28 Jun 2022 18:59:10 +0300 Subject: vfio: Split migration ops from main device ops vfio core checks whether the driver sets some migration op (e.g. set_state/get_state) and accordingly calls its op. However, currently mlx5 driver sets the above ops without regards to its migration caps. This might lead to unexpected usage/Oops if user space may call to the above ops even if the driver doesn't support migration. As for example, the migration state_mutex is not initialized in that case. The cleanest way to manage that seems to split the migration ops from the main device ops, this will let the driver setting them separately from the main ops when it's applicable. As part of that, validate ops construction on registration and include a check for VFIO_MIGRATION_STOP_COPY since the uAPI claims it must be set in migration_flags. HISI driver was changed as well to match this scheme. This scheme may enable down the road to come with some extra group of ops (e.g. DMA log) that can be set without regards to the other options based on driver caps. Fixes: 6fadb021266d ("vfio/mlx5: Implement vfio_pci driver for mlx5 devices") Reviewed-by: Kevin Tian Signed-off-by: Yishai Hadas Link: https://lore.kernel.org/r/20220628155910.171454-3-yishaih@nvidia.com Signed-off-by: Alex Williamson --- drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c | 11 +++++++--- drivers/vfio/pci/mlx5/cmd.c | 4 +++- drivers/vfio/pci/mlx5/cmd.h | 3 ++- drivers/vfio/pci/mlx5/main.c | 9 +++++--- drivers/vfio/pci/vfio_pci_core.c | 7 ++++++ drivers/vfio/vfio.c | 11 +++++----- include/linux/vfio.h | 30 +++++++++++++++++--------- 7 files changed, 51 insertions(+), 24 deletions(-) (limited to 'drivers/vfio') diff --git a/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c b/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c index 4def43f5f7b6..ea762e28c1cc 100644 --- a/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c +++ b/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c @@ -1185,7 +1185,7 @@ static int hisi_acc_vfio_pci_open_device(struct vfio_device *core_vdev) if (ret) return ret; - if (core_vdev->ops->migration_set_state) { + if (core_vdev->mig_ops) { ret = hisi_acc_vf_qm_init(hisi_acc_vdev); if (ret) { vfio_pci_core_disable(vdev); @@ -1208,6 +1208,11 @@ static void hisi_acc_vfio_pci_close_device(struct vfio_device *core_vdev) vfio_pci_core_close_device(core_vdev); } +static const struct vfio_migration_ops hisi_acc_vfio_pci_migrn_state_ops = { + .migration_set_state = hisi_acc_vfio_pci_set_device_state, + .migration_get_state = hisi_acc_vfio_pci_get_device_state, +}; + static const struct vfio_device_ops hisi_acc_vfio_pci_migrn_ops = { .name = "hisi-acc-vfio-pci-migration", .open_device = hisi_acc_vfio_pci_open_device, @@ -1219,8 +1224,6 @@ static const struct vfio_device_ops hisi_acc_vfio_pci_migrn_ops = { .mmap = hisi_acc_vfio_pci_mmap, .request = vfio_pci_core_request, .match = vfio_pci_core_match, - .migration_set_state = hisi_acc_vfio_pci_set_device_state, - .migration_get_state = hisi_acc_vfio_pci_get_device_state, }; static const struct vfio_device_ops hisi_acc_vfio_pci_ops = { @@ -1272,6 +1275,8 @@ static int hisi_acc_vfio_pci_probe(struct pci_dev *pdev, const struct pci_device if (!ret) { vfio_pci_core_init_device(&hisi_acc_vdev->core_device, pdev, &hisi_acc_vfio_pci_migrn_ops); + hisi_acc_vdev->core_device.vdev.mig_ops = + &hisi_acc_vfio_pci_migrn_state_ops; } else { pci_warn(pdev, "migration support failed, continue with generic interface\n"); vfio_pci_core_init_device(&hisi_acc_vdev->core_device, pdev, diff --git a/drivers/vfio/pci/mlx5/cmd.c b/drivers/vfio/pci/mlx5/cmd.c index cdd0c667dc77..dd5d7bfe0a49 100644 --- a/drivers/vfio/pci/mlx5/cmd.c +++ b/drivers/vfio/pci/mlx5/cmd.c @@ -108,7 +108,8 @@ void mlx5vf_cmd_remove_migratable(struct mlx5vf_pci_core_device *mvdev) destroy_workqueue(mvdev->cb_wq); } -void mlx5vf_cmd_set_migratable(struct mlx5vf_pci_core_device *mvdev) +void mlx5vf_cmd_set_migratable(struct mlx5vf_pci_core_device *mvdev, + const struct vfio_migration_ops *mig_ops) { struct pci_dev *pdev = mvdev->core_device.pdev; int ret; @@ -149,6 +150,7 @@ void mlx5vf_cmd_set_migratable(struct mlx5vf_pci_core_device *mvdev) mvdev->core_device.vdev.migration_flags = VFIO_MIGRATION_STOP_COPY | VFIO_MIGRATION_P2P; + mvdev->core_device.vdev.mig_ops = mig_ops; end: mlx5_vf_put_core_dev(mvdev->mdev); diff --git a/drivers/vfio/pci/mlx5/cmd.h b/drivers/vfio/pci/mlx5/cmd.h index aa692d9ce656..8208f4701a90 100644 --- a/drivers/vfio/pci/mlx5/cmd.h +++ b/drivers/vfio/pci/mlx5/cmd.h @@ -62,7 +62,8 @@ int mlx5vf_cmd_suspend_vhca(struct mlx5vf_pci_core_device *mvdev, u16 op_mod); int mlx5vf_cmd_resume_vhca(struct mlx5vf_pci_core_device *mvdev, u16 op_mod); int mlx5vf_cmd_query_vhca_migration_state(struct mlx5vf_pci_core_device *mvdev, size_t *state_size); -void mlx5vf_cmd_set_migratable(struct mlx5vf_pci_core_device *mvdev); +void mlx5vf_cmd_set_migratable(struct mlx5vf_pci_core_device *mvdev, + const struct vfio_migration_ops *mig_ops); void mlx5vf_cmd_remove_migratable(struct mlx5vf_pci_core_device *mvdev); void mlx5vf_cmd_close_migratable(struct mlx5vf_pci_core_device *mvdev); int mlx5vf_cmd_save_vhca_state(struct mlx5vf_pci_core_device *mvdev, diff --git a/drivers/vfio/pci/mlx5/main.c b/drivers/vfio/pci/mlx5/main.c index d754990f0662..a9b63d15c5d3 100644 --- a/drivers/vfio/pci/mlx5/main.c +++ b/drivers/vfio/pci/mlx5/main.c @@ -574,6 +574,11 @@ static void mlx5vf_pci_close_device(struct vfio_device *core_vdev) vfio_pci_core_close_device(core_vdev); } +static const struct vfio_migration_ops mlx5vf_pci_mig_ops = { + .migration_set_state = mlx5vf_pci_set_device_state, + .migration_get_state = mlx5vf_pci_get_device_state, +}; + static const struct vfio_device_ops mlx5vf_pci_ops = { .name = "mlx5-vfio-pci", .open_device = mlx5vf_pci_open_device, @@ -585,8 +590,6 @@ static const struct vfio_device_ops mlx5vf_pci_ops = { .mmap = vfio_pci_core_mmap, .request = vfio_pci_core_request, .match = vfio_pci_core_match, - .migration_set_state = mlx5vf_pci_set_device_state, - .migration_get_state = mlx5vf_pci_get_device_state, }; static int mlx5vf_pci_probe(struct pci_dev *pdev, @@ -599,7 +602,7 @@ static int mlx5vf_pci_probe(struct pci_dev *pdev, if (!mvdev) return -ENOMEM; vfio_pci_core_init_device(&mvdev->core_device, pdev, &mlx5vf_pci_ops); - mlx5vf_cmd_set_migratable(mvdev); + mlx5vf_cmd_set_migratable(mvdev, &mlx5vf_pci_mig_ops); dev_set_drvdata(&pdev->dev, &mvdev->core_device); ret = vfio_pci_core_register_device(&mvdev->core_device); if (ret) diff --git a/drivers/vfio/pci/vfio_pci_core.c b/drivers/vfio/pci/vfio_pci_core.c index a0d69ddaf90d..2efa06b1fafa 100644 --- a/drivers/vfio/pci/vfio_pci_core.c +++ b/drivers/vfio/pci/vfio_pci_core.c @@ -1855,6 +1855,13 @@ int vfio_pci_core_register_device(struct vfio_pci_core_device *vdev) if (pdev->hdr_type != PCI_HEADER_TYPE_NORMAL) return -EINVAL; + if (vdev->vdev.mig_ops) { + if (!(vdev->vdev.mig_ops->migration_get_state && + vdev->vdev.mig_ops->migration_set_state) || + !(vdev->vdev.migration_flags & VFIO_MIGRATION_STOP_COPY)) + return -EINVAL; + } + /* * Prevent binding to PFs with VFs enabled, the VFs might be in use * by the host or other users. We cannot capture the VFs if they diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c index 61e71c1154be..aac9213a783d 100644 --- a/drivers/vfio/vfio.c +++ b/drivers/vfio/vfio.c @@ -1541,8 +1541,7 @@ vfio_ioctl_device_feature_mig_device_state(struct vfio_device *device, struct file *filp = NULL; int ret; - if (!device->ops->migration_set_state || - !device->ops->migration_get_state) + if (!device->mig_ops) return -ENOTTY; ret = vfio_check_feature(flags, argsz, @@ -1558,7 +1557,8 @@ vfio_ioctl_device_feature_mig_device_state(struct vfio_device *device, if (flags & VFIO_DEVICE_FEATURE_GET) { enum vfio_device_mig_state curr_state; - ret = device->ops->migration_get_state(device, &curr_state); + ret = device->mig_ops->migration_get_state(device, + &curr_state); if (ret) return ret; mig.device_state = curr_state; @@ -1566,7 +1566,7 @@ vfio_ioctl_device_feature_mig_device_state(struct vfio_device *device, } /* Handle the VFIO_DEVICE_FEATURE_SET */ - filp = device->ops->migration_set_state(device, mig.device_state); + filp = device->mig_ops->migration_set_state(device, mig.device_state); if (IS_ERR(filp) || !filp) goto out_copy; @@ -1589,8 +1589,7 @@ static int vfio_ioctl_device_feature_migration(struct vfio_device *device, }; int ret; - if (!device->ops->migration_set_state || - !device->ops->migration_get_state) + if (!device->mig_ops) return -ENOTTY; ret = vfio_check_feature(flags, argsz, VFIO_DEVICE_FEATURE_GET, diff --git a/include/linux/vfio.h b/include/linux/vfio.h index 49580fa2073a..4d26e149db81 100644 --- a/include/linux/vfio.h +++ b/include/linux/vfio.h @@ -32,6 +32,11 @@ struct vfio_device_set { struct vfio_device { struct device *dev; const struct vfio_device_ops *ops; + /* + * mig_ops is a static property of the vfio_device which must be set + * prior to registering the vfio_device. + */ + const struct vfio_migration_ops *mig_ops; struct vfio_group *group; struct vfio_device_set *dev_set; struct list_head dev_set_list; @@ -61,16 +66,6 @@ struct vfio_device { * match, -errno for abort (ex. match with insufficient or incorrect * additional args) * @device_feature: Optional, fill in the VFIO_DEVICE_FEATURE ioctl - * @migration_set_state: Optional callback to change the migration state for - * devices that support migration. It's mandatory for - * VFIO_DEVICE_FEATURE_MIGRATION migration support. - * The returned FD is used for data transfer according to the FSM - * definition. The driver is responsible to ensure that FD reaches end - * of stream or error whenever the migration FSM leaves a data transfer - * state or before close_device() returns. - * @migration_get_state: Optional callback to get the migration state for - * devices that support migration. It's mandatory for - * VFIO_DEVICE_FEATURE_MIGRATION migration support. */ struct vfio_device_ops { char *name; @@ -87,6 +82,21 @@ struct vfio_device_ops { int (*match)(struct vfio_device *vdev, char *buf); int (*device_feature)(struct vfio_device *device, u32 flags, void __user *arg, size_t argsz); +}; + +/** + * @migration_set_state: Optional callback to change the migration state for + * devices that support migration. It's mandatory for + * VFIO_DEVICE_FEATURE_MIGRATION migration support. + * The returned FD is used for data transfer according to the FSM + * definition. The driver is responsible to ensure that FD reaches end + * of stream or error whenever the migration FSM leaves a data transfer + * state or before close_device() returns. + * @migration_get_state: Optional callback to get the migration state for + * devices that support migration. It's mandatory for + * VFIO_DEVICE_FEATURE_MIGRATION migration support. + */ +struct vfio_migration_ops { struct file *(*migration_set_state)( struct vfio_device *device, enum vfio_device_mig_state new_state); -- cgit v1.2.3-58-ga151 From 1c61d51e9695d00535d28fc2e123ba9397378707 Mon Sep 17 00:00:00 2001 From: Liam Ni Date: Sat, 25 Jun 2022 19:42:39 +0800 Subject: vfio: check iommu_group_set_name() return value As iommu_group_set_name() can fail, we should check the return value. Signed-off-by: Liam Ni Reviewed-by: Jason Gunthorpe Reviewed-by: Kevin Tian Link: https://lore.kernel.org/r/20220625114239.9301-1-zhiguangni01@gmail.com Signed-off-by: Alex Williamson --- drivers/vfio/vfio.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'drivers/vfio') diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c index 61e71c1154be..ca823eeac237 100644 --- a/drivers/vfio/vfio.c +++ b/drivers/vfio/vfio.c @@ -504,7 +504,9 @@ static struct vfio_group *vfio_noiommu_group_alloc(struct device *dev, if (IS_ERR(iommu_group)) return ERR_CAST(iommu_group); - iommu_group_set_name(iommu_group, "vfio-noiommu"); + ret = iommu_group_set_name(iommu_group, "vfio-noiommu"); + if (ret) + goto out_put_group; ret = iommu_group_add_device(iommu_group, dev); if (ret) goto out_put_group; -- cgit v1.2.3-58-ga151 From 6641085e8d7b3f061911517f79a2a15a0a21b97b Mon Sep 17 00:00:00 2001 From: Schspa Shi Date: Wed, 29 Jun 2022 10:29:48 +0800 Subject: vfio: Clear the caps->buf to NULL after free On buffer resize failure, vfio_info_cap_add() will free the buffer, report zero for the size, and return -ENOMEM. As additional hardening, also clear the buffer pointer to prevent any chance of a double free. Signed-off-by: Schspa Shi Reviewed-by: Cornelia Huck Link: https://lore.kernel.org/r/20220629022948.55608-1-schspa@gmail.com Signed-off-by: Alex Williamson --- drivers/vfio/vfio.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/vfio') diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c index 61e71c1154be..a0fb93866f61 100644 --- a/drivers/vfio/vfio.c +++ b/drivers/vfio/vfio.c @@ -1812,6 +1812,7 @@ struct vfio_info_cap_header *vfio_info_cap_add(struct vfio_info_cap *caps, buf = krealloc(caps->buf, caps->size + size, GFP_KERNEL); if (!buf) { kfree(caps->buf); + caps->buf = NULL; caps->size = 0; return ERR_PTR(-ENOMEM); } -- cgit v1.2.3-58-ga151 From ffed0518d871482e26c5826c0875bea6775446da Mon Sep 17 00:00:00 2001 From: Li Zhe Date: Mon, 27 Jun 2022 11:51:09 +0800 Subject: vfio: remove useless judgement In function vfio_dma_do_unmap(), we currently prevent process to unmap vfio dma region whose mm_struct is different from the vfio_dma->task. In our virtual machine scenario which is using kvm and qemu, this judgement stops us from liveupgrading our qemu, which uses fork() && exec() to load the new binary but the new process cannot do the VFIO_IOMMU_UNMAP_DMA action during vm exit because of this judgement. This judgement is added in commit 8f0d5bb95f76 ("vfio iommu type1: Add task structure to vfio_dma") for the security reason. But it seems that no other task who has no family relationship with old and new process can get the same vfio_dma struct here for the reason of resource isolation. So this patch delete it. Signed-off-by: Li Zhe Reviewed-by: Jason Gunthorpe Link: https://lore.kernel.org/r/20220627035109.73745-1-lizhe.67@bytedance.com Signed-off-by: Alex Williamson --- drivers/vfio/vfio_iommu_type1.c | 6 ------ 1 file changed, 6 deletions(-) (limited to 'drivers/vfio') diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c index c13b9290e357..a8ff00dad834 100644 --- a/drivers/vfio/vfio_iommu_type1.c +++ b/drivers/vfio/vfio_iommu_type1.c @@ -1377,12 +1377,6 @@ again: if (!iommu->v2 && iova > dma->iova) break; - /* - * Task with same address space who mapped this iova range is - * allowed to unmap the iova range. - */ - if (dma->task->mm != current->mm) - break; if (invalidate_vaddr) { if (dma->vaddr_invalid) { -- cgit v1.2.3-58-ga151 From 330c179976f3801526bf222b010b669bf6743098 Mon Sep 17 00:00:00 2001 From: Yi Liu Date: Mon, 27 Jun 2022 00:41:19 -0700 Subject: vfio: Move "device->open_count--" out of group_rwsem in vfio_device_open() We do not protect the vfio_device::open_count with group_rwsem elsewhere (see vfio_device_fops_release as a comparison, where we already drop group_rwsem before open_count--). So move the group_rwsem unlock prior to open_count--. This change now also drops group_rswem before setting device->kvm = NULL, but that's also OK (again, just like vfio_device_fops_release). The setting of device->kvm before open_device is technically done while holding the group_rwsem, this is done to protect the group kvm value we are copying from, and we should not be relying on that to protect the contents of device->kvm; instead we assume this value will not change until after the device is closed and while under the dev_set->lock. Cc: Matthew Rosato Cc: Jason Gunthorpe Signed-off-by: Yi Liu Reviewed-by: Matthew Rosato Reviewed-by: Kevin Tian Link: https://lore.kernel.org/r/20220627074119.523274-1-yi.l.liu@intel.com Signed-off-by: Alex Williamson --- drivers/vfio/vfio.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/vfio') diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c index 61e71c1154be..44c3bf8023ac 100644 --- a/drivers/vfio/vfio.c +++ b/drivers/vfio/vfio.c @@ -1146,10 +1146,10 @@ err_close_device: if (device->open_count == 1 && device->ops->close_device) device->ops->close_device(device); err_undo_count: + up_read(&device->group->group_rwsem); device->open_count--; if (device->open_count == 0 && device->kvm) device->kvm = NULL; - up_read(&device->group->group_rwsem); mutex_unlock(&device->dev_set->lock); module_put(device->dev->driver->owner); err_unassign_container: -- cgit v1.2.3-58-ga151 From 6577067d7f044923aa26129766d6b457030a251b Mon Sep 17 00:00:00 2001 From: Bo Liu Date: Sun, 3 Jul 2022 22:36:49 -0400 Subject: vfio/pci: fix the wrong word This patch fixes a wrong word in comment. Signed-off-by: Bo Liu Link: https://lore.kernel.org/r/20220704023649.3913-1-liubo03@inspur.com Signed-off-by: Alex Williamson --- drivers/vfio/pci/vfio_pci_config.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/vfio') diff --git a/drivers/vfio/pci/vfio_pci_config.c b/drivers/vfio/pci/vfio_pci_config.c index 9343f597182d..97e5ade6efb3 100644 --- a/drivers/vfio/pci/vfio_pci_config.c +++ b/drivers/vfio/pci/vfio_pci_config.c @@ -1728,7 +1728,7 @@ int vfio_config_init(struct vfio_pci_core_device *vdev) /* * Config space, caps and ecaps are all dword aligned, so we could * use one byte per dword to record the type. However, there are - * no requiremenst on the length of a capability, so the gap between + * no requirements on the length of a capability, so the gap between * capabilities needs byte granularity. */ map = kmalloc(pdev->cfg_size, GFP_KERNEL); -- cgit v1.2.3-58-ga151 From ff4f65e4ddcebb96bee503e644160b1f9d57dbac Mon Sep 17 00:00:00 2001 From: Deming Wang Date: Sat, 2 Jul 2022 02:46:13 -0400 Subject: vfio/spapr_tce: Remove the unused parameters container The parameter of container has been unused for tce_iommu_unuse_page. So, we should delete it. Signed-off-by: Deming Wang Reviewed-by: Alexey Kardashevskiy Link: https://lore.kernel.org/r/20220702064613.5293-1-wangdeming@inspur.com Signed-off-by: Alex Williamson --- drivers/vfio/vfio_iommu_spapr_tce.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'drivers/vfio') diff --git a/drivers/vfio/vfio_iommu_spapr_tce.c b/drivers/vfio/vfio_iommu_spapr_tce.c index 708a95e61831..ea3d17a94e94 100644 --- a/drivers/vfio/vfio_iommu_spapr_tce.c +++ b/drivers/vfio/vfio_iommu_spapr_tce.c @@ -378,8 +378,7 @@ static void tce_iommu_release(void *iommu_data) kfree(container); } -static void tce_iommu_unuse_page(struct tce_container *container, - unsigned long hpa) +static void tce_iommu_unuse_page(unsigned long hpa) { struct page *page; @@ -474,7 +473,7 @@ static int tce_iommu_clear(struct tce_container *container, continue; } - tce_iommu_unuse_page(container, oldhpa); + tce_iommu_unuse_page(oldhpa); } iommu_tce_kill(tbl, firstentry, pages); @@ -524,7 +523,7 @@ static long tce_iommu_build(struct tce_container *container, ret = iommu_tce_xchg_no_kill(container->mm, tbl, entry + i, &hpa, &dirtmp); if (ret) { - tce_iommu_unuse_page(container, hpa); + tce_iommu_unuse_page(hpa); pr_err("iommu_tce: %s failed ioba=%lx, tce=%lx, ret=%ld\n", __func__, entry << tbl->it_page_shift, tce, ret); @@ -532,7 +531,7 @@ static long tce_iommu_build(struct tce_container *container, } if (dirtmp != DMA_NONE) - tce_iommu_unuse_page(container, hpa); + tce_iommu_unuse_page(hpa); tce += IOMMU_PAGE_SIZE(tbl); } -- cgit v1.2.3-58-ga151 From ce4b4657ff18925c315855aa290e93c5fa652d96 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 19 Jul 2022 21:02:48 -0300 Subject: vfio: Replace the DMA unmapping notifier with a callback Instead of having drivers register the notifier with explicit code just have them provide a dma_unmap callback op in their driver ops and rely on the core code to wire it up. Suggested-by: Christoph Hellwig Reviewed-by: Christoph Hellwig Reviewed-by: Kevin Tian Reviewed-by: Tony Krowiak Reviewed-by: Eric Farman Reviewed-by: Zhenyu Wang Signed-off-by: Jason Gunthorpe Link: https://lore.kernel.org/r/1-v4-681e038e30fd+78-vfio_unmap_notif_jgg@nvidia.com Signed-off-by: Alex Williamson --- drivers/gpu/drm/i915/gvt/gvt.h | 1 - drivers/gpu/drm/i915/gvt/kvmgt.c | 75 +++++--------------- drivers/s390/cio/vfio_ccw_ops.c | 39 +++------- drivers/s390/cio/vfio_ccw_private.h | 2 - drivers/s390/crypto/vfio_ap_ops.c | 53 +++----------- drivers/s390/crypto/vfio_ap_private.h | 3 - drivers/vfio/vfio.c | 129 ++++++++++++---------------------- drivers/vfio/vfio.h | 3 + include/linux/vfio.h | 21 ++---- 9 files changed, 86 insertions(+), 240 deletions(-) (limited to 'drivers/vfio') diff --git a/drivers/gpu/drm/i915/gvt/gvt.h b/drivers/gpu/drm/i915/gvt/gvt.h index aee1a45da74b..705689e64011 100644 --- a/drivers/gpu/drm/i915/gvt/gvt.h +++ b/drivers/gpu/drm/i915/gvt/gvt.h @@ -226,7 +226,6 @@ struct intel_vgpu { unsigned long nr_cache_entries; struct mutex cache_lock; - struct notifier_block iommu_notifier; atomic_t released; struct kvm_page_track_notifier_node track_node; diff --git a/drivers/gpu/drm/i915/gvt/kvmgt.c b/drivers/gpu/drm/i915/gvt/kvmgt.c index e2f6c56ab342..ecd5bb37b63a 100644 --- a/drivers/gpu/drm/i915/gvt/kvmgt.c +++ b/drivers/gpu/drm/i915/gvt/kvmgt.c @@ -729,34 +729,25 @@ int intel_gvt_set_edid(struct intel_vgpu *vgpu, int port_num) return ret; } -static int intel_vgpu_iommu_notifier(struct notifier_block *nb, - unsigned long action, void *data) +static void intel_vgpu_dma_unmap(struct vfio_device *vfio_dev, u64 iova, + u64 length) { - struct intel_vgpu *vgpu = - container_of(nb, struct intel_vgpu, iommu_notifier); - - if (action == VFIO_IOMMU_NOTIFY_DMA_UNMAP) { - struct vfio_iommu_type1_dma_unmap *unmap = data; - struct gvt_dma *entry; - unsigned long iov_pfn, end_iov_pfn; + struct intel_vgpu *vgpu = vfio_dev_to_vgpu(vfio_dev); + struct gvt_dma *entry; + u64 iov_pfn = iova >> PAGE_SHIFT; + u64 end_iov_pfn = iov_pfn + length / PAGE_SIZE; - iov_pfn = unmap->iova >> PAGE_SHIFT; - end_iov_pfn = iov_pfn + unmap->size / PAGE_SIZE; + mutex_lock(&vgpu->cache_lock); + for (; iov_pfn < end_iov_pfn; iov_pfn++) { + entry = __gvt_cache_find_gfn(vgpu, iov_pfn); + if (!entry) + continue; - mutex_lock(&vgpu->cache_lock); - for (; iov_pfn < end_iov_pfn; iov_pfn++) { - entry = __gvt_cache_find_gfn(vgpu, iov_pfn); - if (!entry) - continue; - - gvt_dma_unmap_page(vgpu, entry->gfn, entry->dma_addr, - entry->size); - __gvt_cache_remove_entry(vgpu, entry); - } - mutex_unlock(&vgpu->cache_lock); + gvt_dma_unmap_page(vgpu, entry->gfn, entry->dma_addr, + entry->size); + __gvt_cache_remove_entry(vgpu, entry); } - - return NOTIFY_OK; + mutex_unlock(&vgpu->cache_lock); } static bool __kvmgt_vgpu_exist(struct intel_vgpu *vgpu) @@ -783,36 +774,20 @@ out: static int intel_vgpu_open_device(struct vfio_device *vfio_dev) { struct intel_vgpu *vgpu = vfio_dev_to_vgpu(vfio_dev); - unsigned long events; - int ret; - - vgpu->iommu_notifier.notifier_call = intel_vgpu_iommu_notifier; - events = VFIO_IOMMU_NOTIFY_DMA_UNMAP; - ret = vfio_register_notifier(vfio_dev, VFIO_IOMMU_NOTIFY, &events, - &vgpu->iommu_notifier); - if (ret != 0) { - gvt_vgpu_err("vfio_register_notifier for iommu failed: %d\n", - ret); - goto out; - } - - ret = -EEXIST; if (vgpu->attached) - goto undo_iommu; + return -EEXIST; - ret = -ESRCH; if (!vgpu->vfio_device.kvm || vgpu->vfio_device.kvm->mm != current->mm) { gvt_vgpu_err("KVM is required to use Intel vGPU\n"); - goto undo_iommu; + return -ESRCH; } kvm_get_kvm(vgpu->vfio_device.kvm); - ret = -EEXIST; if (__kvmgt_vgpu_exist(vgpu)) - goto undo_iommu; + return -EEXIST; vgpu->attached = true; @@ -831,12 +806,6 @@ static int intel_vgpu_open_device(struct vfio_device *vfio_dev) atomic_set(&vgpu->released, 0); return 0; - -undo_iommu: - vfio_unregister_notifier(vfio_dev, VFIO_IOMMU_NOTIFY, - &vgpu->iommu_notifier); -out: - return ret; } static void intel_vgpu_release_msi_eventfd_ctx(struct intel_vgpu *vgpu) @@ -853,8 +822,6 @@ static void intel_vgpu_release_msi_eventfd_ctx(struct intel_vgpu *vgpu) static void intel_vgpu_close_device(struct vfio_device *vfio_dev) { struct intel_vgpu *vgpu = vfio_dev_to_vgpu(vfio_dev); - struct drm_i915_private *i915 = vgpu->gvt->gt->i915; - int ret; if (!vgpu->attached) return; @@ -864,11 +831,6 @@ static void intel_vgpu_close_device(struct vfio_device *vfio_dev) intel_gvt_release_vgpu(vgpu); - ret = vfio_unregister_notifier(&vgpu->vfio_device, VFIO_IOMMU_NOTIFY, - &vgpu->iommu_notifier); - drm_WARN(&i915->drm, ret, - "vfio_unregister_notifier for iommu failed: %d\n", ret); - debugfs_remove(debugfs_lookup(KVMGT_DEBUGFS_FILENAME, vgpu->debugfs)); kvm_page_track_unregister_notifier(vgpu->vfio_device.kvm, @@ -1610,6 +1572,7 @@ static const struct vfio_device_ops intel_vgpu_dev_ops = { .write = intel_vgpu_write, .mmap = intel_vgpu_mmap, .ioctl = intel_vgpu_ioctl, + .dma_unmap = intel_vgpu_dma_unmap, }; static int intel_vgpu_probe(struct mdev_device *mdev) diff --git a/drivers/s390/cio/vfio_ccw_ops.c b/drivers/s390/cio/vfio_ccw_ops.c index bc2176421dc5..0047fd88f938 100644 --- a/drivers/s390/cio/vfio_ccw_ops.c +++ b/drivers/s390/cio/vfio_ccw_ops.c @@ -33,30 +33,16 @@ static int vfio_ccw_mdev_reset(struct vfio_ccw_private *private) return 0; } -static int vfio_ccw_mdev_notifier(struct notifier_block *nb, - unsigned long action, - void *data) +static void vfio_ccw_dma_unmap(struct vfio_device *vdev, u64 iova, u64 length) { struct vfio_ccw_private *private = - container_of(nb, struct vfio_ccw_private, nb); - - /* - * Vendor drivers MUST unpin pages in response to an - * invalidation. - */ - if (action == VFIO_IOMMU_NOTIFY_DMA_UNMAP) { - struct vfio_iommu_type1_dma_unmap *unmap = data; - - if (!cp_iova_pinned(&private->cp, unmap->iova)) - return NOTIFY_OK; - - if (vfio_ccw_mdev_reset(private)) - return NOTIFY_BAD; + container_of(vdev, struct vfio_ccw_private, vdev); - return NOTIFY_OK; - } + /* Drivers MUST unpin pages in response to an invalidation. */ + if (!cp_iova_pinned(&private->cp, iova)) + return; - return NOTIFY_DONE; + vfio_ccw_mdev_reset(private); } static ssize_t name_show(struct mdev_type *mtype, @@ -154,23 +140,15 @@ static int vfio_ccw_mdev_open_device(struct vfio_device *vdev) { struct vfio_ccw_private *private = container_of(vdev, struct vfio_ccw_private, vdev); - unsigned long events = VFIO_IOMMU_NOTIFY_DMA_UNMAP; int ret; /* Device cannot simply be opened again from this state */ if (private->state == VFIO_CCW_STATE_NOT_OPER) return -EINVAL; - private->nb.notifier_call = vfio_ccw_mdev_notifier; - - ret = vfio_register_notifier(vdev, VFIO_IOMMU_NOTIFY, - &events, &private->nb); - if (ret) - return ret; - ret = vfio_ccw_register_async_dev_regions(private); if (ret) - goto out_unregister; + return ret; ret = vfio_ccw_register_schib_dev_regions(private); if (ret) @@ -190,7 +168,6 @@ static int vfio_ccw_mdev_open_device(struct vfio_device *vdev) out_unregister: vfio_ccw_unregister_dev_regions(private); - vfio_unregister_notifier(vdev, VFIO_IOMMU_NOTIFY, &private->nb); return ret; } @@ -201,7 +178,6 @@ static void vfio_ccw_mdev_close_device(struct vfio_device *vdev) vfio_ccw_fsm_event(private, VFIO_CCW_EVENT_CLOSE); vfio_ccw_unregister_dev_regions(private); - vfio_unregister_notifier(vdev, VFIO_IOMMU_NOTIFY, &private->nb); } static ssize_t vfio_ccw_mdev_read_io_region(struct vfio_ccw_private *private, @@ -624,6 +600,7 @@ static const struct vfio_device_ops vfio_ccw_dev_ops = { .write = vfio_ccw_mdev_write, .ioctl = vfio_ccw_mdev_ioctl, .request = vfio_ccw_mdev_request, + .dma_unmap = vfio_ccw_dma_unmap, }; struct mdev_driver vfio_ccw_mdev_driver = { diff --git a/drivers/s390/cio/vfio_ccw_private.h b/drivers/s390/cio/vfio_ccw_private.h index abac532bf03e..cd24b7fada91 100644 --- a/drivers/s390/cio/vfio_ccw_private.h +++ b/drivers/s390/cio/vfio_ccw_private.h @@ -73,7 +73,6 @@ struct vfio_ccw_crw { * @state: internal state of the device * @completion: synchronization helper of the I/O completion * @avail: available for creating a mediated device - * @nb: notifier for vfio events * @io_region: MMIO region to input/output I/O arguments/results * @io_mutex: protect against concurrent update of I/O regions * @region: additional regions for other subchannel operations @@ -96,7 +95,6 @@ struct vfio_ccw_private { int state; struct completion *completion; atomic_t avail; - struct notifier_block nb; struct ccw_io_region *io_region; struct mutex io_mutex; struct vfio_ccw_region *region; diff --git a/drivers/s390/crypto/vfio_ap_ops.c b/drivers/s390/crypto/vfio_ap_ops.c index a7d2a95796d3..bb1a1677c5c2 100644 --- a/drivers/s390/crypto/vfio_ap_ops.c +++ b/drivers/s390/crypto/vfio_ap_ops.c @@ -1226,34 +1226,14 @@ static int vfio_ap_mdev_set_kvm(struct ap_matrix_mdev *matrix_mdev, return 0; } -/** - * vfio_ap_mdev_iommu_notifier - IOMMU notifier callback - * - * @nb: The notifier block - * @action: Action to be taken - * @data: data associated with the request - * - * For an UNMAP request, unpin the guest IOVA (the NIB guest address we - * pinned before). Other requests are ignored. - * - * Return: for an UNMAP request, NOFITY_OK; otherwise NOTIFY_DONE. - */ -static int vfio_ap_mdev_iommu_notifier(struct notifier_block *nb, - unsigned long action, void *data) +static void vfio_ap_mdev_dma_unmap(struct vfio_device *vdev, u64 iova, + u64 length) { - struct ap_matrix_mdev *matrix_mdev; - - matrix_mdev = container_of(nb, struct ap_matrix_mdev, iommu_notifier); - - if (action == VFIO_IOMMU_NOTIFY_DMA_UNMAP) { - struct vfio_iommu_type1_dma_unmap *unmap = data; - unsigned long g_pfn = unmap->iova >> PAGE_SHIFT; - - vfio_unpin_pages(&matrix_mdev->vdev, &g_pfn, 1); - return NOTIFY_OK; - } + struct ap_matrix_mdev *matrix_mdev = + container_of(vdev, struct ap_matrix_mdev, vdev); + unsigned long g_pfn = iova >> PAGE_SHIFT; - return NOTIFY_DONE; + vfio_unpin_pages(&matrix_mdev->vdev, &g_pfn, 1); } /** @@ -1380,27 +1360,11 @@ static int vfio_ap_mdev_open_device(struct vfio_device *vdev) { struct ap_matrix_mdev *matrix_mdev = container_of(vdev, struct ap_matrix_mdev, vdev); - unsigned long events; - int ret; if (!vdev->kvm) return -EINVAL; - ret = vfio_ap_mdev_set_kvm(matrix_mdev, vdev->kvm); - if (ret) - return ret; - - matrix_mdev->iommu_notifier.notifier_call = vfio_ap_mdev_iommu_notifier; - events = VFIO_IOMMU_NOTIFY_DMA_UNMAP; - ret = vfio_register_notifier(vdev, VFIO_IOMMU_NOTIFY, &events, - &matrix_mdev->iommu_notifier); - if (ret) - goto err_kvm; - return 0; - -err_kvm: - vfio_ap_mdev_unset_kvm(matrix_mdev); - return ret; + return vfio_ap_mdev_set_kvm(matrix_mdev, vdev->kvm); } static void vfio_ap_mdev_close_device(struct vfio_device *vdev) @@ -1408,8 +1372,6 @@ static void vfio_ap_mdev_close_device(struct vfio_device *vdev) struct ap_matrix_mdev *matrix_mdev = container_of(vdev, struct ap_matrix_mdev, vdev); - vfio_unregister_notifier(vdev, VFIO_IOMMU_NOTIFY, - &matrix_mdev->iommu_notifier); vfio_ap_mdev_unset_kvm(matrix_mdev); } @@ -1461,6 +1423,7 @@ static const struct vfio_device_ops vfio_ap_matrix_dev_ops = { .open_device = vfio_ap_mdev_open_device, .close_device = vfio_ap_mdev_close_device, .ioctl = vfio_ap_mdev_ioctl, + .dma_unmap = vfio_ap_mdev_dma_unmap, }; static struct mdev_driver vfio_ap_matrix_driver = { diff --git a/drivers/s390/crypto/vfio_ap_private.h b/drivers/s390/crypto/vfio_ap_private.h index a26efd804d0d..abb59d59f81b 100644 --- a/drivers/s390/crypto/vfio_ap_private.h +++ b/drivers/s390/crypto/vfio_ap_private.h @@ -81,8 +81,6 @@ struct ap_matrix { * @node: allows the ap_matrix_mdev struct to be added to a list * @matrix: the adapters, usage domains and control domains assigned to the * mediated matrix device. - * @iommu_notifier: notifier block used for specifying callback function for - * handling the VFIO_IOMMU_NOTIFY_DMA_UNMAP even * @kvm: the struct holding guest's state * @pqap_hook: the function pointer to the interception handler for the * PQAP(AQIC) instruction. @@ -92,7 +90,6 @@ struct ap_matrix_mdev { struct vfio_device vdev; struct list_head node; struct ap_matrix matrix; - struct notifier_block iommu_notifier; struct kvm *kvm; crypto_hook pqap_hook; struct mdev_device *mdev; diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c index bd84ca7c5e35..83c375fa2421 100644 --- a/drivers/vfio/vfio.c +++ b/drivers/vfio/vfio.c @@ -231,6 +231,9 @@ int vfio_register_iommu_driver(const struct vfio_iommu_driver_ops *ops) { struct vfio_iommu_driver *driver, *tmp; + if (WARN_ON(!ops->register_notifier != !ops->unregister_notifier)) + return -EINVAL; + driver = kzalloc(sizeof(*driver), GFP_KERNEL); if (!driver) return -ENOMEM; @@ -1079,8 +1082,20 @@ static void vfio_device_unassign_container(struct vfio_device *device) up_write(&device->group->group_rwsem); } +static int vfio_iommu_notifier(struct notifier_block *nb, unsigned long action, + void *data) +{ + struct vfio_device *vfio_device = + container_of(nb, struct vfio_device, iommu_nb); + struct vfio_iommu_type1_dma_unmap *unmap = data; + + vfio_device->ops->dma_unmap(vfio_device, unmap->iova, unmap->size); + return NOTIFY_OK; +} + static struct file *vfio_device_open(struct vfio_device *device) { + struct vfio_iommu_driver *iommu_driver; struct file *filep; int ret; @@ -1111,6 +1126,18 @@ static struct file *vfio_device_open(struct vfio_device *device) if (ret) goto err_undo_count; } + + iommu_driver = device->group->container->iommu_driver; + if (device->ops->dma_unmap && iommu_driver && + iommu_driver->ops->register_notifier) { + unsigned long events = VFIO_IOMMU_NOTIFY_DMA_UNMAP; + + device->iommu_nb.notifier_call = vfio_iommu_notifier; + iommu_driver->ops->register_notifier( + device->group->container->iommu_data, &events, + &device->iommu_nb); + } + up_read(&device->group->group_rwsem); } mutex_unlock(&device->dev_set->lock); @@ -1145,8 +1172,16 @@ static struct file *vfio_device_open(struct vfio_device *device) err_close_device: mutex_lock(&device->dev_set->lock); down_read(&device->group->group_rwsem); - if (device->open_count == 1 && device->ops->close_device) + if (device->open_count == 1 && device->ops->close_device) { device->ops->close_device(device); + + iommu_driver = device->group->container->iommu_driver; + if (device->ops->dma_unmap && iommu_driver && + iommu_driver->ops->unregister_notifier) + iommu_driver->ops->unregister_notifier( + device->group->container->iommu_data, + &device->iommu_nb); + } err_undo_count: up_read(&device->group->group_rwsem); device->open_count--; @@ -1341,12 +1376,20 @@ static const struct file_operations vfio_group_fops = { static int vfio_device_fops_release(struct inode *inode, struct file *filep) { struct vfio_device *device = filep->private_data; + struct vfio_iommu_driver *iommu_driver; mutex_lock(&device->dev_set->lock); vfio_assert_device_open(device); down_read(&device->group->group_rwsem); if (device->open_count == 1 && device->ops->close_device) device->ops->close_device(device); + + iommu_driver = device->group->container->iommu_driver; + if (device->ops->dma_unmap && iommu_driver && + iommu_driver->ops->unregister_notifier) + iommu_driver->ops->unregister_notifier( + device->group->container->iommu_data, + &device->iommu_nb); up_read(&device->group->group_rwsem); device->open_count--; if (device->open_count == 0) @@ -2029,90 +2072,6 @@ int vfio_dma_rw(struct vfio_device *device, dma_addr_t user_iova, void *data, } EXPORT_SYMBOL(vfio_dma_rw); -static int vfio_register_iommu_notifier(struct vfio_group *group, - unsigned long *events, - struct notifier_block *nb) -{ - struct vfio_container *container; - struct vfio_iommu_driver *driver; - int ret; - - lockdep_assert_held_read(&group->group_rwsem); - - container = group->container; - driver = container->iommu_driver; - if (likely(driver && driver->ops->register_notifier)) - ret = driver->ops->register_notifier(container->iommu_data, - events, nb); - else - ret = -ENOTTY; - - return ret; -} - -static int vfio_unregister_iommu_notifier(struct vfio_group *group, - struct notifier_block *nb) -{ - struct vfio_container *container; - struct vfio_iommu_driver *driver; - int ret; - - lockdep_assert_held_read(&group->group_rwsem); - - container = group->container; - driver = container->iommu_driver; - if (likely(driver && driver->ops->unregister_notifier)) - ret = driver->ops->unregister_notifier(container->iommu_data, - nb); - else - ret = -ENOTTY; - - return ret; -} - -int vfio_register_notifier(struct vfio_device *device, - enum vfio_notify_type type, unsigned long *events, - struct notifier_block *nb) -{ - struct vfio_group *group = device->group; - int ret; - - if (!nb || !events || (*events == 0) || - !vfio_assert_device_open(device)) - return -EINVAL; - - switch (type) { - case VFIO_IOMMU_NOTIFY: - ret = vfio_register_iommu_notifier(group, events, nb); - break; - default: - ret = -EINVAL; - } - return ret; -} -EXPORT_SYMBOL(vfio_register_notifier); - -int vfio_unregister_notifier(struct vfio_device *device, - enum vfio_notify_type type, - struct notifier_block *nb) -{ - struct vfio_group *group = device->group; - int ret; - - if (!nb || !vfio_assert_device_open(device)) - return -EINVAL; - - switch (type) { - case VFIO_IOMMU_NOTIFY: - ret = vfio_unregister_iommu_notifier(group, nb); - break; - default: - ret = -EINVAL; - } - return ret; -} -EXPORT_SYMBOL(vfio_unregister_notifier); - /* * Module/class support */ diff --git a/drivers/vfio/vfio.h b/drivers/vfio/vfio.h index a67130221151..25da02ca1568 100644 --- a/drivers/vfio/vfio.h +++ b/drivers/vfio/vfio.h @@ -33,6 +33,9 @@ enum vfio_iommu_notify_type { VFIO_IOMMU_CONTAINER_CLOSE = 0, }; +/* events for register_notifier() */ +#define VFIO_IOMMU_NOTIFY_DMA_UNMAP BIT(0) + /** * struct vfio_iommu_driver_ops - VFIO IOMMU driver callbacks */ diff --git a/include/linux/vfio.h b/include/linux/vfio.h index 4d26e149db81..1f9fc7a9be9e 100644 --- a/include/linux/vfio.h +++ b/include/linux/vfio.h @@ -49,6 +49,7 @@ struct vfio_device { unsigned int open_count; struct completion comp; struct list_head group_next; + struct notifier_block iommu_nb; }; /** @@ -65,6 +66,8 @@ struct vfio_device { * @match: Optional device name match callback (return: 0 for no-match, >0 for * match, -errno for abort (ex. match with insufficient or incorrect * additional args) + * @dma_unmap: Called when userspace unmaps IOVA from the container + * this device is attached to. * @device_feature: Optional, fill in the VFIO_DEVICE_FEATURE ioctl */ struct vfio_device_ops { @@ -80,6 +83,7 @@ struct vfio_device_ops { int (*mmap)(struct vfio_device *vdev, struct vm_area_struct *vma); void (*request)(struct vfio_device *vdev, unsigned int count); int (*match)(struct vfio_device *vdev, char *buf); + void (*dma_unmap)(struct vfio_device *vdev, u64 iova, u64 length); int (*device_feature)(struct vfio_device *device, u32 flags, void __user *arg, size_t argsz); }; @@ -164,23 +168,6 @@ int vfio_unpin_pages(struct vfio_device *device, unsigned long *user_pfn, int vfio_dma_rw(struct vfio_device *device, dma_addr_t user_iova, void *data, size_t len, bool write); -/* each type has independent events */ -enum vfio_notify_type { - VFIO_IOMMU_NOTIFY = 0, -}; - -/* events for VFIO_IOMMU_NOTIFY */ -#define VFIO_IOMMU_NOTIFY_DMA_UNMAP BIT(0) - -int vfio_register_notifier(struct vfio_device *device, - enum vfio_notify_type type, - unsigned long *required_events, - struct notifier_block *nb); -int vfio_unregister_notifier(struct vfio_device *device, - enum vfio_notify_type type, - struct notifier_block *nb); - - /* * Sub-module helpers */ -- cgit v1.2.3-58-ga151 From 8cfc5b60751bcf9b4c6bbab3f6a72d59e0156a89 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 19 Jul 2022 21:02:49 -0300 Subject: vfio: Replace the iommu notifier with a device list Instead of bouncing the function call to the driver op through a blocking notifier just have the iommu layer call it directly. Register each device that is being attached to the iommu with the lower driver which then threads them on a linked list and calls the appropriate driver op at the right time. Currently the only use is if dma_unmap() is defined. Also, fully lock all the debugging tests on the pinning path that a dma_unmap is registered. Reviewed-by: Christoph Hellwig Reviewed-by: Kevin Tian Signed-off-by: Jason Gunthorpe Link: https://lore.kernel.org/r/2-v4-681e038e30fd+78-vfio_unmap_notif_jgg@nvidia.com Signed-off-by: Alex Williamson --- drivers/vfio/vfio.c | 41 ++++------------ drivers/vfio/vfio.h | 12 ++--- drivers/vfio/vfio_iommu_type1.c | 103 +++++++++++++++++++++++++--------------- include/linux/vfio.h | 2 +- 4 files changed, 81 insertions(+), 77 deletions(-) (limited to 'drivers/vfio') diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c index 83c375fa2421..b3ce8073cfb1 100644 --- a/drivers/vfio/vfio.c +++ b/drivers/vfio/vfio.c @@ -231,7 +231,7 @@ int vfio_register_iommu_driver(const struct vfio_iommu_driver_ops *ops) { struct vfio_iommu_driver *driver, *tmp; - if (WARN_ON(!ops->register_notifier != !ops->unregister_notifier)) + if (WARN_ON(!ops->register_device != !ops->unregister_device)) return -EINVAL; driver = kzalloc(sizeof(*driver), GFP_KERNEL); @@ -1082,17 +1082,6 @@ static void vfio_device_unassign_container(struct vfio_device *device) up_write(&device->group->group_rwsem); } -static int vfio_iommu_notifier(struct notifier_block *nb, unsigned long action, - void *data) -{ - struct vfio_device *vfio_device = - container_of(nb, struct vfio_device, iommu_nb); - struct vfio_iommu_type1_dma_unmap *unmap = data; - - vfio_device->ops->dma_unmap(vfio_device, unmap->iova, unmap->size); - return NOTIFY_OK; -} - static struct file *vfio_device_open(struct vfio_device *device) { struct vfio_iommu_driver *iommu_driver; @@ -1128,15 +1117,9 @@ static struct file *vfio_device_open(struct vfio_device *device) } iommu_driver = device->group->container->iommu_driver; - if (device->ops->dma_unmap && iommu_driver && - iommu_driver->ops->register_notifier) { - unsigned long events = VFIO_IOMMU_NOTIFY_DMA_UNMAP; - - device->iommu_nb.notifier_call = vfio_iommu_notifier; - iommu_driver->ops->register_notifier( - device->group->container->iommu_data, &events, - &device->iommu_nb); - } + if (iommu_driver && iommu_driver->ops->register_device) + iommu_driver->ops->register_device( + device->group->container->iommu_data, device); up_read(&device->group->group_rwsem); } @@ -1176,11 +1159,9 @@ err_close_device: device->ops->close_device(device); iommu_driver = device->group->container->iommu_driver; - if (device->ops->dma_unmap && iommu_driver && - iommu_driver->ops->unregister_notifier) - iommu_driver->ops->unregister_notifier( - device->group->container->iommu_data, - &device->iommu_nb); + if (iommu_driver && iommu_driver->ops->unregister_device) + iommu_driver->ops->unregister_device( + device->group->container->iommu_data, device); } err_undo_count: up_read(&device->group->group_rwsem); @@ -1385,11 +1366,9 @@ static int vfio_device_fops_release(struct inode *inode, struct file *filep) device->ops->close_device(device); iommu_driver = device->group->container->iommu_driver; - if (device->ops->dma_unmap && iommu_driver && - iommu_driver->ops->unregister_notifier) - iommu_driver->ops->unregister_notifier( - device->group->container->iommu_data, - &device->iommu_nb); + if (iommu_driver && iommu_driver->ops->unregister_device) + iommu_driver->ops->unregister_device( + device->group->container->iommu_data, device); up_read(&device->group->group_rwsem); device->open_count--; if (device->open_count == 0) diff --git a/drivers/vfio/vfio.h b/drivers/vfio/vfio.h index 25da02ca1568..4a7db1f3c33e 100644 --- a/drivers/vfio/vfio.h +++ b/drivers/vfio/vfio.h @@ -33,9 +33,6 @@ enum vfio_iommu_notify_type { VFIO_IOMMU_CONTAINER_CLOSE = 0, }; -/* events for register_notifier() */ -#define VFIO_IOMMU_NOTIFY_DMA_UNMAP BIT(0) - /** * struct vfio_iommu_driver_ops - VFIO IOMMU driver callbacks */ @@ -58,11 +55,10 @@ struct vfio_iommu_driver_ops { unsigned long *phys_pfn); int (*unpin_pages)(void *iommu_data, unsigned long *user_pfn, int npage); - int (*register_notifier)(void *iommu_data, - unsigned long *events, - struct notifier_block *nb); - int (*unregister_notifier)(void *iommu_data, - struct notifier_block *nb); + void (*register_device)(void *iommu_data, + struct vfio_device *vdev); + void (*unregister_device)(void *iommu_data, + struct vfio_device *vdev); int (*dma_rw)(void *iommu_data, dma_addr_t user_iova, void *data, size_t count, bool write); struct iommu_domain *(*group_iommu_domain)(void *iommu_data, diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c index db24062fb343..026a1d2553a2 100644 --- a/drivers/vfio/vfio_iommu_type1.c +++ b/drivers/vfio/vfio_iommu_type1.c @@ -67,7 +67,8 @@ struct vfio_iommu { struct list_head iova_list; struct mutex lock; struct rb_root dma_list; - struct blocking_notifier_head notifier; + struct list_head device_list; + struct mutex device_list_lock; unsigned int dma_avail; unsigned int vaddr_invalid_count; uint64_t pgsize_bitmap; @@ -865,8 +866,8 @@ again: } } - /* Fail if notifier list is empty */ - if (!iommu->notifier.head) { + /* Fail if no dma_umap notifier is registered */ + if (list_empty(&iommu->device_list)) { ret = -EINVAL; goto pin_done; } @@ -1287,6 +1288,35 @@ static int verify_bitmap_size(uint64_t npages, uint64_t bitmap_size) return 0; } +/* + * Notify VFIO drivers using vfio_register_emulated_iommu_dev() to invalidate + * and unmap iovas within the range we're about to unmap. Drivers MUST unpin + * pages in response to an invalidation. + */ +static void vfio_notify_dma_unmap(struct vfio_iommu *iommu, + struct vfio_dma *dma) +{ + struct vfio_device *device; + + if (list_empty(&iommu->device_list)) + return; + + /* + * The device is expected to call vfio_unpin_pages() for any IOVA it has + * pinned within the range. Since vfio_unpin_pages() will eventually + * call back down to this code and try to obtain the iommu->lock we must + * drop it. + */ + mutex_lock(&iommu->device_list_lock); + mutex_unlock(&iommu->lock); + + list_for_each_entry(device, &iommu->device_list, iommu_entry) + device->ops->dma_unmap(device, dma->iova, dma->size); + + mutex_unlock(&iommu->device_list_lock); + mutex_lock(&iommu->lock); +} + static int vfio_dma_do_unmap(struct vfio_iommu *iommu, struct vfio_iommu_type1_dma_unmap *unmap, struct vfio_bitmap *bitmap) @@ -1400,8 +1430,6 @@ again: } if (!RB_EMPTY_ROOT(&dma->pfn_list)) { - struct vfio_iommu_type1_dma_unmap nb_unmap; - if (dma_last == dma) { BUG_ON(++retries > 10); } else { @@ -1409,20 +1437,7 @@ again: retries = 0; } - nb_unmap.iova = dma->iova; - nb_unmap.size = dma->size; - - /* - * Notify anyone (mdev vendor drivers) to invalidate and - * unmap iovas within the range we're about to unmap. - * Vendor drivers MUST unpin pages in response to an - * invalidation. - */ - mutex_unlock(&iommu->lock); - blocking_notifier_call_chain(&iommu->notifier, - VFIO_IOMMU_NOTIFY_DMA_UNMAP, - &nb_unmap); - mutex_lock(&iommu->lock); + vfio_notify_dma_unmap(iommu, dma); goto again; } @@ -2475,7 +2490,7 @@ static void vfio_iommu_type1_detach_group(void *iommu_data, if (list_empty(&iommu->emulated_iommu_groups) && list_empty(&iommu->domain_list)) { - WARN_ON(iommu->notifier.head); + WARN_ON(!list_empty(&iommu->device_list)); vfio_iommu_unmap_unpin_all(iommu); } goto detach_group_done; @@ -2507,7 +2522,8 @@ static void vfio_iommu_type1_detach_group(void *iommu_data, if (list_empty(&domain->group_list)) { if (list_is_singular(&iommu->domain_list)) { if (list_empty(&iommu->emulated_iommu_groups)) { - WARN_ON(iommu->notifier.head); + WARN_ON(!list_empty( + &iommu->device_list)); vfio_iommu_unmap_unpin_all(iommu); } else { vfio_iommu_unmap_unpin_reaccount(iommu); @@ -2568,7 +2584,8 @@ static void *vfio_iommu_type1_open(unsigned long arg) iommu->dma_avail = dma_entry_limit; iommu->container_open = true; mutex_init(&iommu->lock); - BLOCKING_INIT_NOTIFIER_HEAD(&iommu->notifier); + mutex_init(&iommu->device_list_lock); + INIT_LIST_HEAD(&iommu->device_list); init_waitqueue_head(&iommu->vaddr_wait); iommu->pgsize_bitmap = PAGE_MASK; INIT_LIST_HEAD(&iommu->emulated_iommu_groups); @@ -3005,28 +3022,40 @@ static long vfio_iommu_type1_ioctl(void *iommu_data, } } -static int vfio_iommu_type1_register_notifier(void *iommu_data, - unsigned long *events, - struct notifier_block *nb) +static void vfio_iommu_type1_register_device(void *iommu_data, + struct vfio_device *vdev) { struct vfio_iommu *iommu = iommu_data; - /* clear known events */ - *events &= ~VFIO_IOMMU_NOTIFY_DMA_UNMAP; - - /* refuse to register if still events remaining */ - if (*events) - return -EINVAL; + if (!vdev->ops->dma_unmap) + return; - return blocking_notifier_chain_register(&iommu->notifier, nb); + /* + * list_empty(&iommu->device_list) is tested under the iommu->lock while + * iteration for dma_unmap must be done under the device_list_lock. + * Holding both locks here allows avoiding the device_list_lock in + * several fast paths. See vfio_notify_dma_unmap() + */ + mutex_lock(&iommu->lock); + mutex_lock(&iommu->device_list_lock); + list_add(&vdev->iommu_entry, &iommu->device_list); + mutex_unlock(&iommu->device_list_lock); + mutex_unlock(&iommu->lock); } -static int vfio_iommu_type1_unregister_notifier(void *iommu_data, - struct notifier_block *nb) +static void vfio_iommu_type1_unregister_device(void *iommu_data, + struct vfio_device *vdev) { struct vfio_iommu *iommu = iommu_data; - return blocking_notifier_chain_unregister(&iommu->notifier, nb); + if (!vdev->ops->dma_unmap) + return; + + mutex_lock(&iommu->lock); + mutex_lock(&iommu->device_list_lock); + list_del(&vdev->iommu_entry); + mutex_unlock(&iommu->device_list_lock); + mutex_unlock(&iommu->lock); } static int vfio_iommu_type1_dma_rw_chunk(struct vfio_iommu *iommu, @@ -3160,8 +3189,8 @@ static const struct vfio_iommu_driver_ops vfio_iommu_driver_ops_type1 = { .detach_group = vfio_iommu_type1_detach_group, .pin_pages = vfio_iommu_type1_pin_pages, .unpin_pages = vfio_iommu_type1_unpin_pages, - .register_notifier = vfio_iommu_type1_register_notifier, - .unregister_notifier = vfio_iommu_type1_unregister_notifier, + .register_device = vfio_iommu_type1_register_device, + .unregister_device = vfio_iommu_type1_unregister_device, .dma_rw = vfio_iommu_type1_dma_rw, .group_iommu_domain = vfio_iommu_type1_group_iommu_domain, .notify = vfio_iommu_type1_notify, diff --git a/include/linux/vfio.h b/include/linux/vfio.h index 1f9fc7a9be9e..19cefbaa3d06 100644 --- a/include/linux/vfio.h +++ b/include/linux/vfio.h @@ -49,7 +49,7 @@ struct vfio_device { unsigned int open_count; struct completion comp; struct list_head group_next; - struct notifier_block iommu_nb; + struct list_head iommu_entry; }; /** -- cgit v1.2.3-58-ga151 From 9cb633acfe65101854fce456ccba4108db3ccbdb Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Thu, 14 Jul 2022 18:09:12 +1000 Subject: vfio/spapr_tce: Fix the comment Grepping for "iommu_ops" finds this spot and gives wrong impression that iommu_ops is used in here, fix the comment. Signed-off-by: Alexey Kardashevskiy Link: https://lore.kernel.org/r/20220714080912.3713509-1-aik@ozlabs.ru [aw: convert to multi-line comment] Signed-off-by: Alex Williamson --- drivers/vfio/vfio_iommu_spapr_tce.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'drivers/vfio') diff --git a/drivers/vfio/vfio_iommu_spapr_tce.c b/drivers/vfio/vfio_iommu_spapr_tce.c index ea3d17a94e94..169f07ac162d 100644 --- a/drivers/vfio/vfio_iommu_spapr_tce.c +++ b/drivers/vfio/vfio_iommu_spapr_tce.c @@ -1265,7 +1265,10 @@ static int tce_iommu_attach_group(void *iommu_data, goto unlock_exit; } - /* Check if new group has the same iommu_ops (i.e. compatible) */ + /* + * Check if new group has the same iommu_table_group_ops + * (i.e. compatible) + */ list_for_each_entry(tcegrp, &container->group_list, next) { struct iommu_table_group *table_group_tmp; -- cgit v1.2.3-58-ga151 From e8f90717ed3b58e81c480b3aa38e641c0da5a456 Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Fri, 22 Jul 2022 19:02:47 -0700 Subject: vfio: Make vfio_unpin_pages() return void There's only one caller that checks its return value with a WARN_ON_ONCE, while all other callers don't check the return value at all. Above that, an undo function should not fail. So, simplify the API to return void by embedding similar WARN_ONs. Also for users to pinpoint which condition fails, separate WARN_ON lines, yet remove the "driver->ops->unpin_pages" check, since it's unreasonable for callers to unpin on something totally random that wasn't even pinned. And remove NULL pointer checks for they would trigger oops vs. warnings. Note that npage is already validated in the vfio core, thus drop the same check in the type1 code. Suggested-by: Christoph Hellwig Reviewed-by: Christoph Hellwig Reviewed-by: Jason Gunthorpe Reviewed-by: Kirti Wankhede Tested-by: Terrence Xu Signed-off-by: Nicolin Chen Link: https://lore.kernel.org/r/20220723020256.30081-2-nicolinc@nvidia.com Signed-off-by: Alex Williamson --- Documentation/driver-api/vfio-mediated-device.rst | 2 +- drivers/gpu/drm/i915/gvt/kvmgt.c | 5 +---- drivers/vfio/vfio.c | 21 +++++++-------------- drivers/vfio/vfio.h | 2 +- drivers/vfio/vfio_iommu_type1.c | 15 ++++++--------- include/linux/vfio.h | 4 ++-- 6 files changed, 18 insertions(+), 31 deletions(-) (limited to 'drivers/vfio') diff --git a/Documentation/driver-api/vfio-mediated-device.rst b/Documentation/driver-api/vfio-mediated-device.rst index 1c57815619fd..b0fdf76b339a 100644 --- a/Documentation/driver-api/vfio-mediated-device.rst +++ b/Documentation/driver-api/vfio-mediated-device.rst @@ -265,7 +265,7 @@ driver:: int vfio_pin_pages(struct vfio_device *device, unsigned long *user_pfn, int npage, int prot, unsigned long *phys_pfn); - int vfio_unpin_pages(struct vfio_device *device, unsigned long *user_pfn, + void vfio_unpin_pages(struct vfio_device *device, unsigned long *user_pfn, int npage); These functions call back into the back-end IOMMU module by using the pin_pages diff --git a/drivers/gpu/drm/i915/gvt/kvmgt.c b/drivers/gpu/drm/i915/gvt/kvmgt.c index ecd5bb37b63a..4d32a2748958 100644 --- a/drivers/gpu/drm/i915/gvt/kvmgt.c +++ b/drivers/gpu/drm/i915/gvt/kvmgt.c @@ -231,18 +231,15 @@ static void intel_gvt_cleanup_vgpu_type_groups(struct intel_gvt *gvt) static void gvt_unpin_guest_page(struct intel_vgpu *vgpu, unsigned long gfn, unsigned long size) { - struct drm_i915_private *i915 = vgpu->gvt->gt->i915; int total_pages; int npage; - int ret; total_pages = roundup(size, PAGE_SIZE) / PAGE_SIZE; for (npage = 0; npage < total_pages; npage++) { unsigned long cur_gfn = gfn + npage; - ret = vfio_unpin_pages(&vgpu->vfio_device, &cur_gfn, 1); - drm_WARN_ON(&i915->drm, ret != 1); + vfio_unpin_pages(&vgpu->vfio_device, &cur_gfn, 1); } } diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c index b3ce8073cfb1..92b10aafae28 100644 --- a/drivers/vfio/vfio.c +++ b/drivers/vfio/vfio.c @@ -1983,31 +1983,24 @@ EXPORT_SYMBOL(vfio_pin_pages); * PFNs should not be greater than VFIO_PIN_PAGES_MAX_ENTRIES. * @npage [in] : count of elements in user_pfn array. This count should not * be greater than VFIO_PIN_PAGES_MAX_ENTRIES. - * Return error or number of pages unpinned. */ -int vfio_unpin_pages(struct vfio_device *device, unsigned long *user_pfn, - int npage) +void vfio_unpin_pages(struct vfio_device *device, unsigned long *user_pfn, + int npage) { struct vfio_container *container; struct vfio_iommu_driver *driver; - int ret; - if (!user_pfn || !npage || !vfio_assert_device_open(device)) - return -EINVAL; + if (WARN_ON(npage <= 0 || npage > VFIO_PIN_PAGES_MAX_ENTRIES)) + return; - if (npage > VFIO_PIN_PAGES_MAX_ENTRIES) - return -E2BIG; + if (WARN_ON(!vfio_assert_device_open(device))) + return; /* group->container cannot change while a vfio device is open */ container = device->group->container; driver = container->iommu_driver; - if (likely(driver && driver->ops->unpin_pages)) - ret = driver->ops->unpin_pages(container->iommu_data, user_pfn, - npage); - else - ret = -ENOTTY; - return ret; + driver->ops->unpin_pages(container->iommu_data, user_pfn, npage); } EXPORT_SYMBOL(vfio_unpin_pages); diff --git a/drivers/vfio/vfio.h b/drivers/vfio/vfio.h index 4a7db1f3c33e..6a8424b407c7 100644 --- a/drivers/vfio/vfio.h +++ b/drivers/vfio/vfio.h @@ -53,7 +53,7 @@ struct vfio_iommu_driver_ops { unsigned long *user_pfn, int npage, int prot, unsigned long *phys_pfn); - int (*unpin_pages)(void *iommu_data, + void (*unpin_pages)(void *iommu_data, unsigned long *user_pfn, int npage); void (*register_device)(void *iommu_data, struct vfio_device *vdev); diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c index 026a1d2553a2..e49fbe9968ef 100644 --- a/drivers/vfio/vfio_iommu_type1.c +++ b/drivers/vfio/vfio_iommu_type1.c @@ -949,20 +949,16 @@ pin_done: return ret; } -static int vfio_iommu_type1_unpin_pages(void *iommu_data, - unsigned long *user_pfn, - int npage) +static void vfio_iommu_type1_unpin_pages(void *iommu_data, + unsigned long *user_pfn, int npage) { struct vfio_iommu *iommu = iommu_data; bool do_accounting; int i; - if (!iommu || !user_pfn || npage <= 0) - return -EINVAL; - /* Supported for v2 version only */ - if (!iommu->v2) - return -EACCES; + if (WARN_ON(!iommu->v2)) + return; mutex_lock(&iommu->lock); @@ -980,7 +976,8 @@ static int vfio_iommu_type1_unpin_pages(void *iommu_data, } mutex_unlock(&iommu->lock); - return i > 0 ? i : -EINVAL; + + WARN_ON(i != npage); } static long vfio_sync_unpin(struct vfio_dma *dma, struct vfio_domain *domain, diff --git a/include/linux/vfio.h b/include/linux/vfio.h index 19cefbaa3d06..9f7d74c24925 100644 --- a/include/linux/vfio.h +++ b/include/linux/vfio.h @@ -163,8 +163,8 @@ bool vfio_file_has_dev(struct file *file, struct vfio_device *device); int vfio_pin_pages(struct vfio_device *device, unsigned long *user_pfn, int npage, int prot, unsigned long *phys_pfn); -int vfio_unpin_pages(struct vfio_device *device, unsigned long *user_pfn, - int npage); +void vfio_unpin_pages(struct vfio_device *device, unsigned long *user_pfn, + int npage); int vfio_dma_rw(struct vfio_device *device, dma_addr_t user_iova, void *data, size_t len, bool write); -- cgit v1.2.3-58-ga151 From 44abdd1646e1fbfb781972c0bffc90b4eb3e87b3 Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Fri, 22 Jul 2022 19:02:51 -0700 Subject: vfio: Pass in starting IOVA to vfio_pin/unpin_pages API The vfio_pin/unpin_pages() so far accepted arrays of PFNs of user IOVA. Among all three callers, there was only one caller possibly passing in a non-contiguous PFN list, which is now ensured to have contiguous PFN inputs too. Pass in the starting address with "iova" alone to simplify things, so callers no longer need to maintain a PFN list or to pin/unpin one page at a time. This also allows VFIO to use more efficient implementations of pin/unpin_pages. For now, also update vfio_iommu_type1 to fit this new parameter too, while keeping its input intact (being user_iova) since we don't want to spend too much effort swapping its parameters and local variables at that level. Reviewed-by: Christoph Hellwig Reviewed-by: Kirti Wankhede Reviewed-by: Jason Gunthorpe Reviewed-by: Kevin Tian Reviewed-by: Tony Krowiak Acked-by: Eric Farman Tested-by: Terrence Xu Tested-by: Eric Farman Signed-off-by: Nicolin Chen Link: https://lore.kernel.org/r/20220723020256.30081-6-nicolinc@nvidia.com Signed-off-by: Alex Williamson --- Documentation/driver-api/vfio-mediated-device.rst | 4 ++-- drivers/gpu/drm/i915/gvt/kvmgt.c | 18 +++++---------- drivers/s390/cio/vfio_ccw_cp.c | 4 ++-- drivers/s390/crypto/vfio_ap_ops.c | 9 ++++---- drivers/vfio/vfio.c | 27 ++++++++++------------- drivers/vfio/vfio.h | 4 ++-- drivers/vfio/vfio_iommu_type1.c | 15 ++++++------- include/linux/vfio.h | 5 ++--- 8 files changed, 37 insertions(+), 49 deletions(-) (limited to 'drivers/vfio') diff --git a/Documentation/driver-api/vfio-mediated-device.rst b/Documentation/driver-api/vfio-mediated-device.rst index b0fdf76b339a..ea32a0f13ddb 100644 --- a/Documentation/driver-api/vfio-mediated-device.rst +++ b/Documentation/driver-api/vfio-mediated-device.rst @@ -262,10 +262,10 @@ Translation APIs for Mediated Devices The following APIs are provided for translating user pfn to host pfn in a VFIO driver:: - int vfio_pin_pages(struct vfio_device *device, unsigned long *user_pfn, + int vfio_pin_pages(struct vfio_device *device, dma_addr_t iova, int npage, int prot, unsigned long *phys_pfn); - void vfio_unpin_pages(struct vfio_device *device, unsigned long *user_pfn, + void vfio_unpin_pages(struct vfio_device *device, dma_addr_t iova, int npage); These functions call back into the back-end IOMMU module by using the pin_pages diff --git a/drivers/gpu/drm/i915/gvt/kvmgt.c b/drivers/gpu/drm/i915/gvt/kvmgt.c index 2fee5695515a..8be75c282611 100644 --- a/drivers/gpu/drm/i915/gvt/kvmgt.c +++ b/drivers/gpu/drm/i915/gvt/kvmgt.c @@ -231,14 +231,8 @@ static void intel_gvt_cleanup_vgpu_type_groups(struct intel_gvt *gvt) static void gvt_unpin_guest_page(struct intel_vgpu *vgpu, unsigned long gfn, unsigned long size) { - int total_pages = DIV_ROUND_UP(size, PAGE_SIZE); - int npage; - - for (npage = 0; npage < total_pages; npage++) { - unsigned long cur_gfn = gfn + npage; - - vfio_unpin_pages(&vgpu->vfio_device, &cur_gfn, 1); - } + vfio_unpin_pages(&vgpu->vfio_device, gfn << PAGE_SHIFT, + DIV_ROUND_UP(size, PAGE_SIZE)); } /* Pin a normal or compound guest page for dma. */ @@ -255,14 +249,14 @@ static int gvt_pin_guest_page(struct intel_vgpu *vgpu, unsigned long gfn, * on stack to hold pfns. */ for (npage = 0; npage < total_pages; npage++) { - unsigned long cur_gfn = gfn + npage; + dma_addr_t cur_iova = (gfn + npage) << PAGE_SHIFT; unsigned long pfn; - ret = vfio_pin_pages(&vgpu->vfio_device, &cur_gfn, 1, + ret = vfio_pin_pages(&vgpu->vfio_device, cur_iova, 1, IOMMU_READ | IOMMU_WRITE, &pfn); if (ret != 1) { - gvt_vgpu_err("vfio_pin_pages failed for gfn 0x%lx, ret %d\n", - cur_gfn, ret); + gvt_vgpu_err("vfio_pin_pages failed for iova %pad, ret %d\n", + &cur_iova, ret); goto err; } diff --git a/drivers/s390/cio/vfio_ccw_cp.c b/drivers/s390/cio/vfio_ccw_cp.c index 3b94863ad24e..a739262f988d 100644 --- a/drivers/s390/cio/vfio_ccw_cp.c +++ b/drivers/s390/cio/vfio_ccw_cp.c @@ -114,7 +114,7 @@ static void pfn_array_unpin(struct pfn_array *pa, continue; } - vfio_unpin_pages(vdev, first, npage); + vfio_unpin_pages(vdev, *first << PAGE_SHIFT, npage); unpinned += npage; npage = 1; } @@ -146,7 +146,7 @@ static int pfn_array_pin(struct pfn_array *pa, struct vfio_device *vdev) continue; } - ret = vfio_pin_pages(vdev, first, npage, + ret = vfio_pin_pages(vdev, *first << PAGE_SHIFT, npage, IOMMU_READ | IOMMU_WRITE, &pa->pa_pfn[pinned]); if (ret < 0) { diff --git a/drivers/s390/crypto/vfio_ap_ops.c b/drivers/s390/crypto/vfio_ap_ops.c index 5781059d3ed2..70f484668ca0 100644 --- a/drivers/s390/crypto/vfio_ap_ops.c +++ b/drivers/s390/crypto/vfio_ap_ops.c @@ -124,7 +124,7 @@ static void vfio_ap_free_aqic_resources(struct vfio_ap_queue *q) q->saved_isc = VFIO_AP_ISC_INVALID; } if (q->saved_pfn && !WARN_ON(!q->matrix_mdev)) { - vfio_unpin_pages(&q->matrix_mdev->vdev, &q->saved_pfn, 1); + vfio_unpin_pages(&q->matrix_mdev->vdev, q->saved_pfn << PAGE_SHIFT, 1); q->saved_pfn = 0; } } @@ -258,7 +258,7 @@ static struct ap_queue_status vfio_ap_irq_enable(struct vfio_ap_queue *q, return status; } - ret = vfio_pin_pages(&q->matrix_mdev->vdev, &g_pfn, 1, + ret = vfio_pin_pages(&q->matrix_mdev->vdev, g_pfn << PAGE_SHIFT, 1, IOMMU_READ | IOMMU_WRITE, &h_pfn); switch (ret) { case 1: @@ -301,7 +301,7 @@ static struct ap_queue_status vfio_ap_irq_enable(struct vfio_ap_queue *q, break; case AP_RESPONSE_OTHERWISE_CHANGED: /* We could not modify IRQ setings: clear new configuration */ - vfio_unpin_pages(&q->matrix_mdev->vdev, &g_pfn, 1); + vfio_unpin_pages(&q->matrix_mdev->vdev, g_pfn << PAGE_SHIFT, 1); kvm_s390_gisc_unregister(kvm, isc); break; default: @@ -1232,9 +1232,8 @@ static void vfio_ap_mdev_dma_unmap(struct vfio_device *vdev, u64 iova, { struct ap_matrix_mdev *matrix_mdev = container_of(vdev, struct ap_matrix_mdev, vdev); - unsigned long g_pfn = iova >> PAGE_SHIFT; - vfio_unpin_pages(&matrix_mdev->vdev, &g_pfn, 1); + vfio_unpin_pages(&matrix_mdev->vdev, iova, 1); } /** diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c index 92b10aafae28..ffd1a492eea9 100644 --- a/drivers/vfio/vfio.c +++ b/drivers/vfio/vfio.c @@ -1934,17 +1934,17 @@ int vfio_set_irqs_validate_and_prepare(struct vfio_irq_set *hdr, int num_irqs, EXPORT_SYMBOL(vfio_set_irqs_validate_and_prepare); /* - * Pin a set of guest PFNs and return their associated host PFNs for local + * Pin contiguous user pages and return their associated host pages for local * domain only. * @device [in] : device - * @user_pfn [in]: array of user/guest PFNs to be pinned. - * @npage [in] : count of elements in user_pfn array. This count should not - * be greater VFIO_PIN_PAGES_MAX_ENTRIES. + * @iova [in] : starting IOVA of user pages to be pinned. + * @npage [in] : count of pages to be pinned. This count should not + * be greater than VFIO_PIN_PAGES_MAX_ENTRIES. * @prot [in] : protection flags * @phys_pfn[out]: array of host PFNs * Return error or number of pages pinned. */ -int vfio_pin_pages(struct vfio_device *device, unsigned long *user_pfn, +int vfio_pin_pages(struct vfio_device *device, dma_addr_t iova, int npage, int prot, unsigned long *phys_pfn) { struct vfio_container *container; @@ -1952,8 +1952,7 @@ int vfio_pin_pages(struct vfio_device *device, unsigned long *user_pfn, struct vfio_iommu_driver *driver; int ret; - if (!user_pfn || !phys_pfn || !npage || - !vfio_assert_device_open(device)) + if (!phys_pfn || !npage || !vfio_assert_device_open(device)) return -EINVAL; if (npage > VFIO_PIN_PAGES_MAX_ENTRIES) @@ -1967,7 +1966,7 @@ int vfio_pin_pages(struct vfio_device *device, unsigned long *user_pfn, driver = container->iommu_driver; if (likely(driver && driver->ops->pin_pages)) ret = driver->ops->pin_pages(container->iommu_data, - group->iommu_group, user_pfn, + group->iommu_group, iova, npage, prot, phys_pfn); else ret = -ENOTTY; @@ -1977,15 +1976,13 @@ int vfio_pin_pages(struct vfio_device *device, unsigned long *user_pfn, EXPORT_SYMBOL(vfio_pin_pages); /* - * Unpin set of host PFNs for local domain only. + * Unpin contiguous host pages for local domain only. * @device [in] : device - * @user_pfn [in]: array of user/guest PFNs to be unpinned. Number of user/guest - * PFNs should not be greater than VFIO_PIN_PAGES_MAX_ENTRIES. - * @npage [in] : count of elements in user_pfn array. This count should not + * @iova [in] : starting address of user pages to be unpinned. + * @npage [in] : count of pages to be unpinned. This count should not * be greater than VFIO_PIN_PAGES_MAX_ENTRIES. */ -void vfio_unpin_pages(struct vfio_device *device, unsigned long *user_pfn, - int npage) +void vfio_unpin_pages(struct vfio_device *device, dma_addr_t iova, int npage) { struct vfio_container *container; struct vfio_iommu_driver *driver; @@ -2000,7 +1997,7 @@ void vfio_unpin_pages(struct vfio_device *device, unsigned long *user_pfn, container = device->group->container; driver = container->iommu_driver; - driver->ops->unpin_pages(container->iommu_data, user_pfn, npage); + driver->ops->unpin_pages(container->iommu_data, iova, npage); } EXPORT_SYMBOL(vfio_unpin_pages); diff --git a/drivers/vfio/vfio.h b/drivers/vfio/vfio.h index 6a8424b407c7..e9767e13f00f 100644 --- a/drivers/vfio/vfio.h +++ b/drivers/vfio/vfio.h @@ -50,11 +50,11 @@ struct vfio_iommu_driver_ops { struct iommu_group *group); int (*pin_pages)(void *iommu_data, struct iommu_group *group, - unsigned long *user_pfn, + dma_addr_t user_iova, int npage, int prot, unsigned long *phys_pfn); void (*unpin_pages)(void *iommu_data, - unsigned long *user_pfn, int npage); + dma_addr_t user_iova, int npage); void (*register_device)(void *iommu_data, struct vfio_device *vdev); void (*unregister_device)(void *iommu_data, diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c index e49fbe9968ef..e629e059118c 100644 --- a/drivers/vfio/vfio_iommu_type1.c +++ b/drivers/vfio/vfio_iommu_type1.c @@ -829,7 +829,7 @@ static int vfio_unpin_page_external(struct vfio_dma *dma, dma_addr_t iova, static int vfio_iommu_type1_pin_pages(void *iommu_data, struct iommu_group *iommu_group, - unsigned long *user_pfn, + dma_addr_t user_iova, int npage, int prot, unsigned long *phys_pfn) { @@ -841,7 +841,7 @@ static int vfio_iommu_type1_pin_pages(void *iommu_data, bool do_accounting; dma_addr_t iova; - if (!iommu || !user_pfn || !phys_pfn) + if (!iommu || !phys_pfn) return -EINVAL; /* Supported for v2 version only */ @@ -857,7 +857,7 @@ static int vfio_iommu_type1_pin_pages(void *iommu_data, again: if (iommu->vaddr_invalid_count) { for (i = 0; i < npage; i++) { - iova = user_pfn[i] << PAGE_SHIFT; + iova = user_iova + PAGE_SIZE * i; ret = vfio_find_dma_valid(iommu, iova, PAGE_SIZE, &dma); if (ret < 0) goto pin_done; @@ -882,7 +882,7 @@ again: for (i = 0; i < npage; i++) { struct vfio_pfn *vpfn; - iova = user_pfn[i] << PAGE_SHIFT; + iova = user_iova + PAGE_SIZE * i; dma = vfio_find_dma(iommu, iova, PAGE_SIZE); if (!dma) { ret = -EINVAL; @@ -939,7 +939,7 @@ pin_unwind: for (j = 0; j < i; j++) { dma_addr_t iova; - iova = user_pfn[j] << PAGE_SHIFT; + iova = user_iova + PAGE_SIZE * j; dma = vfio_find_dma(iommu, iova, PAGE_SIZE); vfio_unpin_page_external(dma, iova, do_accounting); phys_pfn[j] = 0; @@ -950,7 +950,7 @@ pin_done: } static void vfio_iommu_type1_unpin_pages(void *iommu_data, - unsigned long *user_pfn, int npage) + dma_addr_t user_iova, int npage) { struct vfio_iommu *iommu = iommu_data; bool do_accounting; @@ -964,10 +964,9 @@ static void vfio_iommu_type1_unpin_pages(void *iommu_data, do_accounting = list_empty(&iommu->domain_list); for (i = 0; i < npage; i++) { + dma_addr_t iova = user_iova + PAGE_SIZE * i; struct vfio_dma *dma; - dma_addr_t iova; - iova = user_pfn[i] << PAGE_SHIFT; dma = vfio_find_dma(iommu, iova, PAGE_SIZE); if (!dma) break; diff --git a/include/linux/vfio.h b/include/linux/vfio.h index 9f7d74c24925..9e3b6abcf890 100644 --- a/include/linux/vfio.h +++ b/include/linux/vfio.h @@ -161,10 +161,9 @@ bool vfio_file_has_dev(struct file *file, struct vfio_device *device); #define VFIO_PIN_PAGES_MAX_ENTRIES (PAGE_SIZE/sizeof(unsigned long)) -int vfio_pin_pages(struct vfio_device *device, unsigned long *user_pfn, +int vfio_pin_pages(struct vfio_device *device, dma_addr_t iova, int npage, int prot, unsigned long *phys_pfn); -void vfio_unpin_pages(struct vfio_device *device, unsigned long *user_pfn, - int npage); +void vfio_unpin_pages(struct vfio_device *device, dma_addr_t iova, int npage); int vfio_dma_rw(struct vfio_device *device, dma_addr_t user_iova, void *data, size_t len, bool write); -- cgit v1.2.3-58-ga151 From 8561aa4fb7d72011c2352af2b1b9caf588942181 Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Fri, 22 Jul 2022 19:02:54 -0700 Subject: vfio: Rename user_iova of vfio_dma_rw() Following the updated vfio_pin/unpin_pages(), use the simpler "iova". Reviewed-by: Christoph Hellwig Reviewed-by: Jason Gunthorpe Reviewed-by: Kevin Tian Tested-by: Terrence Xu Tested-by: Eric Farman Signed-off-by: Nicolin Chen Link: https://lore.kernel.org/r/20220723020256.30081-9-nicolinc@nvidia.com Signed-off-by: Alex Williamson --- drivers/vfio/vfio.c | 6 +++--- include/linux/vfio.h | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'drivers/vfio') diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c index ffd1a492eea9..606a20b605ba 100644 --- a/drivers/vfio/vfio.c +++ b/drivers/vfio/vfio.c @@ -2012,13 +2012,13 @@ EXPORT_SYMBOL(vfio_unpin_pages); * not a real device DMA, it is not necessary to pin the user space memory. * * @device [in] : VFIO device - * @user_iova [in] : base IOVA of a user space buffer + * @iova [in] : base IOVA of a user space buffer * @data [in] : pointer to kernel buffer * @len [in] : kernel buffer length * @write : indicate read or write * Return error code on failure or 0 on success. */ -int vfio_dma_rw(struct vfio_device *device, dma_addr_t user_iova, void *data, +int vfio_dma_rw(struct vfio_device *device, dma_addr_t iova, void *data, size_t len, bool write) { struct vfio_container *container; @@ -2034,7 +2034,7 @@ int vfio_dma_rw(struct vfio_device *device, dma_addr_t user_iova, void *data, if (likely(driver && driver->ops->dma_rw)) ret = driver->ops->dma_rw(container->iommu_data, - user_iova, data, len, write); + iova, data, len, write); else ret = -ENOTTY; return ret; diff --git a/include/linux/vfio.h b/include/linux/vfio.h index 9e3b6abcf890..acefd663e63b 100644 --- a/include/linux/vfio.h +++ b/include/linux/vfio.h @@ -164,7 +164,7 @@ bool vfio_file_has_dev(struct file *file, struct vfio_device *device); int vfio_pin_pages(struct vfio_device *device, dma_addr_t iova, int npage, int prot, unsigned long *phys_pfn); void vfio_unpin_pages(struct vfio_device *device, dma_addr_t iova, int npage); -int vfio_dma_rw(struct vfio_device *device, dma_addr_t user_iova, +int vfio_dma_rw(struct vfio_device *device, dma_addr_t iova, void *data, size_t len, bool write); /* -- cgit v1.2.3-58-ga151 From 34a255e67615995f729254307a0581c143e03752 Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Fri, 22 Jul 2022 19:02:56 -0700 Subject: vfio: Replace phys_pfn with pages for vfio_pin_pages() Most of the callers of vfio_pin_pages() want "struct page *" and the low-level mm code to pin pages returns a list of "struct page *" too. So there's no gain in converting "struct page *" to PFN in between. Replace the output parameter "phys_pfn" list with a "pages" list, to simplify callers. This also allows us to replace the vfio_iommu_type1 implementation with a more efficient one. And drop the pfn_valid check in the gvt code, as there is no need to do such a check at a page-backed struct page pointer. For now, also update vfio_iommu_type1 to fit this new parameter too. Reviewed-by: Christoph Hellwig Reviewed-by: Kirti Wankhede Reviewed-by: Jason Gunthorpe Reviewed-by: Kevin Tian Acked-by: Eric Farman Tested-by: Terrence Xu Tested-by: Eric Farman Signed-off-by: Nicolin Chen Link: https://lore.kernel.org/r/20220723020256.30081-11-nicolinc@nvidia.com Signed-off-by: Alex Williamson --- Documentation/driver-api/vfio-mediated-device.rst | 2 +- drivers/gpu/drm/i915/gvt/kvmgt.c | 19 ++++++------------- drivers/s390/cio/vfio_ccw_cp.c | 19 +++++++++---------- drivers/s390/crypto/vfio_ap_ops.c | 6 +++--- drivers/vfio/vfio.c | 8 ++++---- drivers/vfio/vfio.h | 2 +- drivers/vfio/vfio_iommu_type1.c | 19 +++++++++++-------- include/linux/vfio.h | 2 +- 8 files changed, 36 insertions(+), 41 deletions(-) (limited to 'drivers/vfio') diff --git a/Documentation/driver-api/vfio-mediated-device.rst b/Documentation/driver-api/vfio-mediated-device.rst index ea32a0f13ddb..ba5fefcdae1a 100644 --- a/Documentation/driver-api/vfio-mediated-device.rst +++ b/Documentation/driver-api/vfio-mediated-device.rst @@ -263,7 +263,7 @@ The following APIs are provided for translating user pfn to host pfn in a VFIO driver:: int vfio_pin_pages(struct vfio_device *device, dma_addr_t iova, - int npage, int prot, unsigned long *phys_pfn); + int npage, int prot, struct page **pages); void vfio_unpin_pages(struct vfio_device *device, dma_addr_t iova, int npage); diff --git a/drivers/gpu/drm/i915/gvt/kvmgt.c b/drivers/gpu/drm/i915/gvt/kvmgt.c index 8be75c282611..e3cd58946477 100644 --- a/drivers/gpu/drm/i915/gvt/kvmgt.c +++ b/drivers/gpu/drm/i915/gvt/kvmgt.c @@ -240,7 +240,7 @@ static int gvt_pin_guest_page(struct intel_vgpu *vgpu, unsigned long gfn, unsigned long size, struct page **page) { int total_pages = DIV_ROUND_UP(size, PAGE_SIZE); - unsigned long base_pfn = 0; + struct page *base_page = NULL; int npage; int ret; @@ -250,26 +250,19 @@ static int gvt_pin_guest_page(struct intel_vgpu *vgpu, unsigned long gfn, */ for (npage = 0; npage < total_pages; npage++) { dma_addr_t cur_iova = (gfn + npage) << PAGE_SHIFT; - unsigned long pfn; + struct page *cur_page; ret = vfio_pin_pages(&vgpu->vfio_device, cur_iova, 1, - IOMMU_READ | IOMMU_WRITE, &pfn); + IOMMU_READ | IOMMU_WRITE, &cur_page); if (ret != 1) { gvt_vgpu_err("vfio_pin_pages failed for iova %pad, ret %d\n", &cur_iova, ret); goto err; } - if (!pfn_valid(pfn)) { - gvt_vgpu_err("pfn 0x%lx is not mem backed\n", pfn); - npage++; - ret = -EFAULT; - goto err; - } - if (npage == 0) - base_pfn = pfn; - else if (base_pfn + npage != pfn) { + base_page = cur_page; + else if (base_page + npage != cur_page) { gvt_vgpu_err("The pages are not continuous\n"); ret = -EINVAL; npage++; @@ -277,7 +270,7 @@ static int gvt_pin_guest_page(struct intel_vgpu *vgpu, unsigned long gfn, } } - *page = pfn_to_page(base_pfn); + *page = base_page; return 0; err: gvt_unpin_guest_page(vgpu, gfn, npage * PAGE_SIZE); diff --git a/drivers/s390/cio/vfio_ccw_cp.c b/drivers/s390/cio/vfio_ccw_cp.c index cd4ec4f6d6ff..8963f452f963 100644 --- a/drivers/s390/cio/vfio_ccw_cp.c +++ b/drivers/s390/cio/vfio_ccw_cp.c @@ -22,8 +22,8 @@ struct page_array { /* Array that stores pages need to pin. */ dma_addr_t *pa_iova; - /* Array that receives PFNs of the pages pinned. */ - unsigned long *pa_pfn; + /* Array that receives the pinned pages. */ + struct page **pa_page; /* Number of pages pinned from @pa_iova. */ int pa_nr; }; @@ -68,19 +68,19 @@ static int page_array_alloc(struct page_array *pa, u64 iova, unsigned int len) return -EINVAL; pa->pa_iova = kcalloc(pa->pa_nr, - sizeof(*pa->pa_iova) + sizeof(*pa->pa_pfn), + sizeof(*pa->pa_iova) + sizeof(*pa->pa_page), GFP_KERNEL); if (unlikely(!pa->pa_iova)) { pa->pa_nr = 0; return -ENOMEM; } - pa->pa_pfn = (unsigned long *)&pa->pa_iova[pa->pa_nr]; + pa->pa_page = (struct page **)&pa->pa_iova[pa->pa_nr]; pa->pa_iova[0] = iova; - pa->pa_pfn[0] = -1ULL; + pa->pa_page[0] = NULL; for (i = 1; i < pa->pa_nr; i++) { pa->pa_iova[i] = pa->pa_iova[i - 1] + PAGE_SIZE; - pa->pa_pfn[i] = -1ULL; + pa->pa_page[i] = NULL; } return 0; @@ -144,7 +144,7 @@ static int page_array_pin(struct page_array *pa, struct vfio_device *vdev) ret = vfio_pin_pages(vdev, *first, npage, IOMMU_READ | IOMMU_WRITE, - &pa->pa_pfn[pinned]); + &pa->pa_page[pinned]); if (ret < 0) { goto err_out; } else if (ret > 0 && ret != npage) { @@ -195,7 +195,7 @@ static inline void page_array_idal_create_words(struct page_array *pa, */ for (i = 0; i < pa->pa_nr; i++) - idaws[i] = pa->pa_pfn[i] << PAGE_SHIFT; + idaws[i] = page_to_phys(pa->pa_page[i]); /* Adjust the first IDAW, since it may not start on a page boundary */ idaws[0] += pa->pa_iova[0] & (PAGE_SIZE - 1); @@ -246,8 +246,7 @@ static long copy_from_iova(struct vfio_device *vdev, void *to, u64 iova, l = n; for (i = 0; i < pa.pa_nr; i++) { - struct page *page = pfn_to_page(pa.pa_pfn[i]); - void *from = kmap_local_page(page); + void *from = kmap_local_page(pa.pa_page[i]); m = PAGE_SIZE; if (i == 0) { diff --git a/drivers/s390/crypto/vfio_ap_ops.c b/drivers/s390/crypto/vfio_ap_ops.c index d7c38c82f694..75cd92c291e3 100644 --- a/drivers/s390/crypto/vfio_ap_ops.c +++ b/drivers/s390/crypto/vfio_ap_ops.c @@ -234,9 +234,9 @@ static struct ap_queue_status vfio_ap_irq_enable(struct vfio_ap_queue *q, struct ap_qirq_ctrl aqic_gisa = {}; struct ap_queue_status status = {}; struct kvm_s390_gisa *gisa; + struct page *h_page; int nisc; struct kvm *kvm; - unsigned long h_pfn; phys_addr_t h_nib; dma_addr_t nib; int ret; @@ -251,7 +251,7 @@ static struct ap_queue_status vfio_ap_irq_enable(struct vfio_ap_queue *q, } ret = vfio_pin_pages(&q->matrix_mdev->vdev, nib, 1, - IOMMU_READ | IOMMU_WRITE, &h_pfn); + IOMMU_READ | IOMMU_WRITE, &h_page); switch (ret) { case 1: break; @@ -267,7 +267,7 @@ static struct ap_queue_status vfio_ap_irq_enable(struct vfio_ap_queue *q, kvm = q->matrix_mdev->kvm; gisa = kvm->arch.gisa_int.origin; - h_nib = (h_pfn << PAGE_SHIFT) | (nib & ~PAGE_MASK); + h_nib = page_to_phys(h_page) | (nib & ~PAGE_MASK); aqic_gisa.gisc = isc; nisc = kvm_s390_gisc_register(kvm, isc); diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c index 606a20b605ba..8e23ca59ceed 100644 --- a/drivers/vfio/vfio.c +++ b/drivers/vfio/vfio.c @@ -1941,18 +1941,18 @@ EXPORT_SYMBOL(vfio_set_irqs_validate_and_prepare); * @npage [in] : count of pages to be pinned. This count should not * be greater than VFIO_PIN_PAGES_MAX_ENTRIES. * @prot [in] : protection flags - * @phys_pfn[out]: array of host PFNs + * @pages[out] : array of host pages * Return error or number of pages pinned. */ int vfio_pin_pages(struct vfio_device *device, dma_addr_t iova, - int npage, int prot, unsigned long *phys_pfn) + int npage, int prot, struct page **pages) { struct vfio_container *container; struct vfio_group *group = device->group; struct vfio_iommu_driver *driver; int ret; - if (!phys_pfn || !npage || !vfio_assert_device_open(device)) + if (!pages || !npage || !vfio_assert_device_open(device)) return -EINVAL; if (npage > VFIO_PIN_PAGES_MAX_ENTRIES) @@ -1967,7 +1967,7 @@ int vfio_pin_pages(struct vfio_device *device, dma_addr_t iova, if (likely(driver && driver->ops->pin_pages)) ret = driver->ops->pin_pages(container->iommu_data, group->iommu_group, iova, - npage, prot, phys_pfn); + npage, prot, pages); else ret = -ENOTTY; diff --git a/drivers/vfio/vfio.h b/drivers/vfio/vfio.h index e9767e13f00f..503bea6c843d 100644 --- a/drivers/vfio/vfio.h +++ b/drivers/vfio/vfio.h @@ -52,7 +52,7 @@ struct vfio_iommu_driver_ops { struct iommu_group *group, dma_addr_t user_iova, int npage, int prot, - unsigned long *phys_pfn); + struct page **pages); void (*unpin_pages)(void *iommu_data, dma_addr_t user_iova, int npage); void (*register_device)(void *iommu_data, diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c index e629e059118c..db516c90a977 100644 --- a/drivers/vfio/vfio_iommu_type1.c +++ b/drivers/vfio/vfio_iommu_type1.c @@ -831,7 +831,7 @@ static int vfio_iommu_type1_pin_pages(void *iommu_data, struct iommu_group *iommu_group, dma_addr_t user_iova, int npage, int prot, - unsigned long *phys_pfn) + struct page **pages) { struct vfio_iommu *iommu = iommu_data; struct vfio_iommu_group *group; @@ -841,7 +841,7 @@ static int vfio_iommu_type1_pin_pages(void *iommu_data, bool do_accounting; dma_addr_t iova; - if (!iommu || !phys_pfn) + if (!iommu || !pages) return -EINVAL; /* Supported for v2 version only */ @@ -880,6 +880,7 @@ again: do_accounting = list_empty(&iommu->domain_list); for (i = 0; i < npage; i++) { + unsigned long phys_pfn; struct vfio_pfn *vpfn; iova = user_iova + PAGE_SIZE * i; @@ -896,23 +897,25 @@ again: vpfn = vfio_iova_get_vfio_pfn(dma, iova); if (vpfn) { - phys_pfn[i] = vpfn->pfn; + pages[i] = pfn_to_page(vpfn->pfn); continue; } remote_vaddr = dma->vaddr + (iova - dma->iova); - ret = vfio_pin_page_external(dma, remote_vaddr, &phys_pfn[i], + ret = vfio_pin_page_external(dma, remote_vaddr, &phys_pfn, do_accounting); if (ret) goto pin_unwind; - ret = vfio_add_to_pfn_list(dma, iova, phys_pfn[i]); + ret = vfio_add_to_pfn_list(dma, iova, phys_pfn); if (ret) { - if (put_pfn(phys_pfn[i], dma->prot) && do_accounting) + if (put_pfn(phys_pfn, dma->prot) && do_accounting) vfio_lock_acct(dma, -1, true); goto pin_unwind; } + pages[i] = pfn_to_page(phys_pfn); + if (iommu->dirty_page_tracking) { unsigned long pgshift = __ffs(iommu->pgsize_bitmap); @@ -935,14 +938,14 @@ again: goto pin_done; pin_unwind: - phys_pfn[i] = 0; + pages[i] = NULL; for (j = 0; j < i; j++) { dma_addr_t iova; iova = user_iova + PAGE_SIZE * j; dma = vfio_find_dma(iommu, iova, PAGE_SIZE); vfio_unpin_page_external(dma, iova, do_accounting); - phys_pfn[j] = 0; + pages[j] = NULL; } pin_done: mutex_unlock(&iommu->lock); diff --git a/include/linux/vfio.h b/include/linux/vfio.h index acefd663e63b..e05ddc6fe6a5 100644 --- a/include/linux/vfio.h +++ b/include/linux/vfio.h @@ -162,7 +162,7 @@ bool vfio_file_has_dev(struct file *file, struct vfio_device *device); #define VFIO_PIN_PAGES_MAX_ENTRIES (PAGE_SIZE/sizeof(unsigned long)) int vfio_pin_pages(struct vfio_device *device, dma_addr_t iova, - int npage, int prot, unsigned long *phys_pfn); + int npage, int prot, struct page **pages); void vfio_unpin_pages(struct vfio_device *device, dma_addr_t iova, int npage); int vfio_dma_rw(struct vfio_device *device, dma_addr_t iova, void *data, size_t len, bool write); -- cgit v1.2.3-58-ga151 From 099fd2c2020751737d9288f923d562e0e05977eb Mon Sep 17 00:00:00 2001 From: Bo Liu Date: Sun, 31 Jul 2022 21:39:18 -0400 Subject: vfio/pci: fix the wrong word This patch fixes a wrong word in comment. Signed-off-by: Bo Liu Link: https://lore.kernel.org/r/20220801013918.2520-1-liubo03@inspur.com Signed-off-by: Alex Williamson --- drivers/vfio/pci/vfio_pci_config.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/vfio') diff --git a/drivers/vfio/pci/vfio_pci_config.c b/drivers/vfio/pci/vfio_pci_config.c index 97e5ade6efb3..442d3ba4122b 100644 --- a/drivers/vfio/pci/vfio_pci_config.c +++ b/drivers/vfio/pci/vfio_pci_config.c @@ -222,7 +222,7 @@ static int vfio_default_config_write(struct vfio_pci_core_device *vdev, int pos, memcpy(vdev->vconfig + pos, &virt_val, count); } - /* Non-virtualzed and writable bits go to hardware */ + /* Non-virtualized and writable bits go to hardware */ if (write & ~virt) { struct pci_dev *pdev = vdev->pdev; __le32 phys_val = 0; -- cgit v1.2.3-58-ga151