diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2015-04-14 18:06:47 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2015-04-14 18:06:47 -0700 |
commit | 8c194f3bd322a8bd44d079092d870549b8ae62d1 (patch) | |
tree | c194690d0a01b6e5313d6af133e394a056a3015f /drivers | |
parent | 07e492eb8921a8aa53fd2bf637bee3da94cc03fe (diff) | |
parent | 5a0ff17741c1785b27229a16b5ab77470d71b170 (diff) |
Merge tag 'vfio-v4.1-rc1' of git://github.com/awilliam/linux-vfio
Pull VFIO updates from Alex Williamson:
- VFIO platform bus driver support (Baptiste Reynal, Antonios Motakis,
testing and review by Eric Auger)
- Split VFIO irqfd support to separate module (Alex Williamson)
- vfio-pci VGA arbiter client (Alex Williamson)
- New vfio-pci.ids= module option (Alex Williamson)
- vfio-pci D3 power state support for idle devices (Alex Williamson)
* tag 'vfio-v4.1-rc1' of git://github.com/awilliam/linux-vfio: (30 commits)
vfio-pci: Fix use after free
vfio-pci: Move idle devices to D3hot power state
vfio-pci: Remove warning if try-reset fails
vfio-pci: Allow PCI IDs to be specified as module options
vfio-pci: Add VGA arbiter client
vfio-pci: Add module option to disable VGA region access
vgaarb: Stub vga_set_legacy_decoding()
vfio: Split virqfd into a separate module for vfio bus drivers
vfio: virqfd_lock can be static
vfio: put off the allocation of "minor" in vfio_create_group
vfio/platform: implement IRQ masking/unmasking via an eventfd
vfio: initialize the virqfd workqueue in VFIO generic code
vfio: move eventfd support code for VFIO_PCI to a separate file
vfio: pass an opaque pointer on virqfd initialization
vfio: add local lock for virqfd instead of depending on VFIO PCI
vfio: virqfd: rename vfio_pci_virqfd_init and vfio_pci_virqfd_exit
vfio: add a vfio_ prefix to virqfd_enable and virqfd_disable and export
vfio/platform: support for level sensitive interrupts
vfio/platform: trigger an interrupt via eventfd
vfio/platform: initial interrupts support code
...
Diffstat (limited to 'drivers')
-rw-r--r-- | drivers/vfio/Kconfig | 6 | ||||
-rw-r--r-- | drivers/vfio/Makefile | 4 | ||||
-rw-r--r-- | drivers/vfio/pci/Kconfig | 1 | ||||
-rw-r--r-- | drivers/vfio/pci/vfio_pci.c | 188 | ||||
-rw-r--r-- | drivers/vfio/pci/vfio_pci_intrs.c | 238 | ||||
-rw-r--r-- | drivers/vfio/pci/vfio_pci_private.h | 3 | ||||
-rw-r--r-- | drivers/vfio/platform/Kconfig | 20 | ||||
-rw-r--r-- | drivers/vfio/platform/Makefile | 8 | ||||
-rw-r--r-- | drivers/vfio/platform/vfio_amba.c | 115 | ||||
-rw-r--r-- | drivers/vfio/platform/vfio_platform.c | 103 | ||||
-rw-r--r-- | drivers/vfio/platform/vfio_platform_common.c | 521 | ||||
-rw-r--r-- | drivers/vfio/platform/vfio_platform_irq.c | 336 | ||||
-rw-r--r-- | drivers/vfio/platform/vfio_platform_private.h | 85 | ||||
-rw-r--r-- | drivers/vfio/vfio.c | 13 | ||||
-rw-r--r-- | drivers/vfio/virqfd.c | 226 |
15 files changed, 1608 insertions, 259 deletions
diff --git a/drivers/vfio/Kconfig b/drivers/vfio/Kconfig index 14e27ab32456..7d092ddc8119 100644 --- a/drivers/vfio/Kconfig +++ b/drivers/vfio/Kconfig @@ -13,6 +13,11 @@ config VFIO_SPAPR_EEH depends on EEH && VFIO_IOMMU_SPAPR_TCE default n +config VFIO_VIRQFD + tristate + depends on VFIO && EVENTFD + default n + menuconfig VFIO tristate "VFIO Non-Privileged userspace driver framework" depends on IOMMU_API @@ -27,3 +32,4 @@ menuconfig VFIO If you don't know what to do here, say N. source "drivers/vfio/pci/Kconfig" +source "drivers/vfio/platform/Kconfig" diff --git a/drivers/vfio/Makefile b/drivers/vfio/Makefile index 0b035b12600a..7b8a31f63fea 100644 --- a/drivers/vfio/Makefile +++ b/drivers/vfio/Makefile @@ -1,5 +1,9 @@ +vfio_virqfd-y := virqfd.o + obj-$(CONFIG_VFIO) += vfio.o +obj-$(CONFIG_VFIO_VIRQFD) += vfio_virqfd.o obj-$(CONFIG_VFIO_IOMMU_TYPE1) += vfio_iommu_type1.o obj-$(CONFIG_VFIO_IOMMU_SPAPR_TCE) += vfio_iommu_spapr_tce.o obj-$(CONFIG_VFIO_SPAPR_EEH) += vfio_spapr_eeh.o obj-$(CONFIG_VFIO_PCI) += pci/ +obj-$(CONFIG_VFIO_PLATFORM) += platform/ diff --git a/drivers/vfio/pci/Kconfig b/drivers/vfio/pci/Kconfig index c6bb5da2d2a7..579d83bf5358 100644 --- a/drivers/vfio/pci/Kconfig +++ b/drivers/vfio/pci/Kconfig @@ -1,6 +1,7 @@ config VFIO_PCI tristate "VFIO support for PCI devices" depends on VFIO && PCI && EVENTFD + select VFIO_VIRQFD help Support for the PCI VFIO bus driver. This is required to make use of PCI drivers using the VFIO framework. diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c index f8a186381ae8..69fab0fd15ae 100644 --- a/drivers/vfio/pci/vfio_pci.c +++ b/drivers/vfio/pci/vfio_pci.c @@ -11,6 +11,8 @@ * Author: Tom Lyon, pugs@cisco.com */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include <linux/device.h> #include <linux/eventfd.h> #include <linux/file.h> @@ -25,6 +27,7 @@ #include <linux/types.h> #include <linux/uaccess.h> #include <linux/vfio.h> +#include <linux/vgaarb.h> #include "vfio_pci_private.h" @@ -32,13 +35,81 @@ #define DRIVER_AUTHOR "Alex Williamson <alex.williamson@redhat.com>" #define DRIVER_DESC "VFIO PCI - User Level meta-driver" +static char ids[1024] __initdata; +module_param_string(ids, ids, sizeof(ids), 0); +MODULE_PARM_DESC(ids, "Initial PCI IDs to add to the vfio driver, format is \"vendor:device[:subvendor[:subdevice[:class[:class_mask]]]]\" and multiple comma separated entries can be specified"); + static bool nointxmask; module_param_named(nointxmask, nointxmask, bool, S_IRUGO | S_IWUSR); MODULE_PARM_DESC(nointxmask, "Disable support for PCI 2.3 style INTx masking. If this resolves problems for specific devices, report lspci -vvvxxx to linux-pci@vger.kernel.org so the device can be fixed automatically via the broken_intx_masking flag."); +#ifdef CONFIG_VFIO_PCI_VGA +static bool disable_vga; +module_param(disable_vga, bool, S_IRUGO); +MODULE_PARM_DESC(disable_vga, "Disable VGA resource access through vfio-pci"); +#endif + +static bool disable_idle_d3; +module_param(disable_idle_d3, bool, S_IRUGO | S_IWUSR); +MODULE_PARM_DESC(disable_idle_d3, + "Disable using the PCI D3 low power state for idle, unused devices"); + static DEFINE_MUTEX(driver_lock); +static inline bool vfio_vga_disabled(void) +{ +#ifdef CONFIG_VFIO_PCI_VGA + return disable_vga; +#else + return true; +#endif +} + +/* + * Our VGA arbiter participation is limited since we don't know anything + * about the device itself. However, if the device is the only VGA device + * downstream of a bridge and VFIO VGA support is disabled, then we can + * safely return legacy VGA IO and memory as not decoded since the user + * has no way to get to it and routing can be disabled externally at the + * bridge. + */ +static unsigned int vfio_pci_set_vga_decode(void *opaque, bool single_vga) +{ + struct vfio_pci_device *vdev = opaque; + struct pci_dev *tmp = NULL, *pdev = vdev->pdev; + unsigned char max_busnr; + unsigned int decodes; + + if (single_vga || !vfio_vga_disabled() || pci_is_root_bus(pdev->bus)) + return VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM | + VGA_RSRC_LEGACY_IO | VGA_RSRC_LEGACY_MEM; + + max_busnr = pci_bus_max_busnr(pdev->bus); + decodes = VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM; + + while ((tmp = pci_get_class(PCI_CLASS_DISPLAY_VGA << 8, tmp)) != NULL) { + if (tmp == pdev || + pci_domain_nr(tmp->bus) != pci_domain_nr(pdev->bus) || + pci_is_root_bus(tmp->bus)) + continue; + + if (tmp->bus->number >= pdev->bus->number && + tmp->bus->number <= max_busnr) { + pci_dev_put(tmp); + decodes |= VGA_RSRC_LEGACY_IO | VGA_RSRC_LEGACY_MEM; + break; + } + } + + return decodes; +} + +static inline bool vfio_pci_is_vga(struct pci_dev *pdev) +{ + return (pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA; +} + static void vfio_pci_try_bus_reset(struct vfio_pci_device *vdev); static int vfio_pci_enable(struct vfio_pci_device *vdev) @@ -48,6 +119,8 @@ static int vfio_pci_enable(struct vfio_pci_device *vdev) u16 cmd; u8 msix_pos; + pci_set_power_state(pdev, PCI_D0); + /* Don't allow our initial saved state to include busmaster */ pci_clear_master(pdev); @@ -93,10 +166,8 @@ static int vfio_pci_enable(struct vfio_pci_device *vdev) } else vdev->msix_bar = 0xFF; -#ifdef CONFIG_VFIO_PCI_VGA - if ((pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA) + if (!vfio_vga_disabled() && vfio_pci_is_vga(pdev)) vdev->has_vga = true; -#endif return 0; } @@ -153,20 +224,17 @@ static void vfio_pci_disable(struct vfio_pci_device *vdev) * Try to reset the device. The success of this is dependent on * being able to lock the device, which is not always possible. */ - if (vdev->reset_works) { - int ret = pci_try_reset_function(pdev); - if (ret) - pr_warn("%s: Failed to reset device %s (%d)\n", - __func__, dev_name(&pdev->dev), ret); - else - vdev->needs_reset = false; - } + if (vdev->reset_works && !pci_try_reset_function(pdev)) + vdev->needs_reset = false; pci_restore_state(pdev); out: pci_disable_device(pdev); vfio_pci_try_bus_reset(vdev); + + if (!disable_idle_d3) + pci_set_power_state(pdev, PCI_D3hot); } static void vfio_pci_release(void *device_data) @@ -885,6 +953,27 @@ static int vfio_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) if (ret) { iommu_group_put(group); kfree(vdev); + return ret; + } + + if (vfio_pci_is_vga(pdev)) { + vga_client_register(pdev, vdev, NULL, vfio_pci_set_vga_decode); + vga_set_legacy_decoding(pdev, + vfio_pci_set_vga_decode(vdev, false)); + } + + if (!disable_idle_d3) { + /* + * pci-core sets the device power state to an unknown value at + * bootup and after being removed from a driver. The only + * transition it allows from this unknown state is to D0, which + * typically happens when a driver calls pci_enable_device(). + * We're not ready to enable the device yet, but we do want to + * be able to get to D3. Therefore first do a D0 transition + * before going to D3. + */ + pci_set_power_state(pdev, PCI_D0); + pci_set_power_state(pdev, PCI_D3hot); } return ret; @@ -895,10 +984,21 @@ static void vfio_pci_remove(struct pci_dev *pdev) struct vfio_pci_device *vdev; vdev = vfio_del_group_dev(&pdev->dev); - if (vdev) { - iommu_group_put(pdev->dev.iommu_group); - kfree(vdev); + if (!vdev) + return; + + iommu_group_put(pdev->dev.iommu_group); + kfree(vdev); + + if (vfio_pci_is_vga(pdev)) { + vga_client_register(pdev, NULL, NULL, NULL); + vga_set_legacy_decoding(pdev, + VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM | + VGA_RSRC_LEGACY_IO | VGA_RSRC_LEGACY_MEM); } + + if (!disable_idle_d3) + pci_set_power_state(pdev, PCI_D0); } static pci_ers_result_t vfio_pci_aer_err_detected(struct pci_dev *pdev, @@ -1017,10 +1117,13 @@ static void vfio_pci_try_bus_reset(struct vfio_pci_device *vdev) put_devs: for (i = 0; i < devs.cur_index; i++) { - if (!ret) { - tmp = vfio_device_data(devs.devices[i]); + tmp = vfio_device_data(devs.devices[i]); + if (!ret) tmp->needs_reset = false; - } + + if (!tmp->refcnt && !disable_idle_d3) + pci_set_power_state(tmp->pdev, PCI_D3hot); + vfio_device_put(devs.devices[i]); } @@ -1030,10 +1133,50 @@ put_devs: static void __exit vfio_pci_cleanup(void) { pci_unregister_driver(&vfio_pci_driver); - vfio_pci_virqfd_exit(); vfio_pci_uninit_perm_bits(); } +static void __init vfio_pci_fill_ids(void) +{ + char *p, *id; + int rc; + + /* no ids passed actually */ + if (ids[0] == '\0') + return; + + /* add ids specified in the module parameter */ + p = ids; + while ((id = strsep(&p, ","))) { + unsigned int vendor, device, subvendor = PCI_ANY_ID, + subdevice = PCI_ANY_ID, class = 0, class_mask = 0; + int fields; + + if (!strlen(id)) + continue; + + fields = sscanf(id, "%x:%x:%x:%x:%x:%x", + &vendor, &device, &subvendor, &subdevice, + &class, &class_mask); + + if (fields < 2) { + pr_warn("invalid id string \"%s\"\n", id); + continue; + } + + rc = pci_add_dynid(&vfio_pci_driver, vendor, device, + subvendor, subdevice, class, class_mask, 0); + if (rc) + pr_warn("failed to add dynamic id [%04hx:%04hx[%04hx:%04hx]] class %#08x/%08x (%d)\n", + vendor, device, subvendor, subdevice, + class, class_mask, rc); + else + pr_info("add [%04hx:%04hx[%04hx:%04hx]] class %#08x/%08x\n", + vendor, device, subvendor, subdevice, + class, class_mask); + } +} + static int __init vfio_pci_init(void) { int ret; @@ -1043,21 +1186,16 @@ static int __init vfio_pci_init(void) if (ret) return ret; - /* Start the virqfd cleanup handler */ - ret = vfio_pci_virqfd_init(); - if (ret) - goto out_virqfd; - /* Register and scan for devices */ ret = pci_register_driver(&vfio_pci_driver); if (ret) goto out_driver; + vfio_pci_fill_ids(); + return 0; out_driver: - vfio_pci_virqfd_exit(); -out_virqfd: vfio_pci_uninit_perm_bits(); return ret; } diff --git a/drivers/vfio/pci/vfio_pci_intrs.c b/drivers/vfio/pci/vfio_pci_intrs.c index 2027a27546ef..1f577b4ac126 100644 --- a/drivers/vfio/pci/vfio_pci_intrs.c +++ b/drivers/vfio/pci/vfio_pci_intrs.c @@ -19,230 +19,19 @@ #include <linux/msi.h> #include <linux/pci.h> #include <linux/file.h> -#include <linux/poll.h> #include <linux/vfio.h> #include <linux/wait.h> -#include <linux/workqueue.h> #include <linux/slab.h> #include "vfio_pci_private.h" /* - * IRQfd - generic - */ -struct virqfd { - struct vfio_pci_device *vdev; - struct eventfd_ctx *eventfd; - int (*handler)(struct vfio_pci_device *, void *); - void (*thread)(struct vfio_pci_device *, void *); - void *data; - struct work_struct inject; - wait_queue_t wait; - poll_table pt; - struct work_struct shutdown; - struct virqfd **pvirqfd; -}; - -static struct workqueue_struct *vfio_irqfd_cleanup_wq; - -int __init vfio_pci_virqfd_init(void) -{ - vfio_irqfd_cleanup_wq = - create_singlethread_workqueue("vfio-irqfd-cleanup"); - if (!vfio_irqfd_cleanup_wq) - return -ENOMEM; - - return 0; -} - -void vfio_pci_virqfd_exit(void) -{ - destroy_workqueue(vfio_irqfd_cleanup_wq); -} - -static void virqfd_deactivate(struct virqfd *virqfd) -{ - queue_work(vfio_irqfd_cleanup_wq, &virqfd->shutdown); -} - -static int virqfd_wakeup(wait_queue_t *wait, unsigned mode, int sync, void *key) -{ - struct virqfd *virqfd = container_of(wait, struct virqfd, wait); - unsigned long flags = (unsigned long)key; - - if (flags & POLLIN) { - /* An event has been signaled, call function */ - if ((!virqfd->handler || - virqfd->handler(virqfd->vdev, virqfd->data)) && - virqfd->thread) - schedule_work(&virqfd->inject); - } - - if (flags & POLLHUP) { - unsigned long flags; - spin_lock_irqsave(&virqfd->vdev->irqlock, flags); - - /* - * The eventfd is closing, if the virqfd has not yet been - * queued for release, as determined by testing whether the - * vdev pointer to it is still valid, queue it now. As - * with kvm irqfds, we know we won't race against the virqfd - * going away because we hold wqh->lock to get here. - */ - if (*(virqfd->pvirqfd) == virqfd) { - *(virqfd->pvirqfd) = NULL; - virqfd_deactivate(virqfd); - } - - spin_unlock_irqrestore(&virqfd->vdev->irqlock, flags); - } - - return 0; -} - -static void virqfd_ptable_queue_proc(struct file *file, - wait_queue_head_t *wqh, poll_table *pt) -{ - struct virqfd *virqfd = container_of(pt, struct virqfd, pt); - add_wait_queue(wqh, &virqfd->wait); -} - -static void virqfd_shutdown(struct work_struct *work) -{ - struct virqfd *virqfd = container_of(work, struct virqfd, shutdown); - u64 cnt; - - eventfd_ctx_remove_wait_queue(virqfd->eventfd, &virqfd->wait, &cnt); - flush_work(&virqfd->inject); - eventfd_ctx_put(virqfd->eventfd); - - kfree(virqfd); -} - -static void virqfd_inject(struct work_struct *work) -{ - struct virqfd *virqfd = container_of(work, struct virqfd, inject); - if (virqfd->thread) - virqfd->thread(virqfd->vdev, virqfd->data); -} - -static int virqfd_enable(struct vfio_pci_device *vdev, - int (*handler)(struct vfio_pci_device *, void *), - void (*thread)(struct vfio_pci_device *, void *), - void *data, struct virqfd **pvirqfd, int fd) -{ - struct fd irqfd; - struct eventfd_ctx *ctx; - struct virqfd *virqfd; - int ret = 0; - unsigned int events; - - virqfd = kzalloc(sizeof(*virqfd), GFP_KERNEL); - if (!virqfd) - return -ENOMEM; - - virqfd->pvirqfd = pvirqfd; - virqfd->vdev = vdev; - virqfd->handler = handler; - virqfd->thread = thread; - virqfd->data = data; - - INIT_WORK(&virqfd->shutdown, virqfd_shutdown); - INIT_WORK(&virqfd->inject, virqfd_inject); - - irqfd = fdget(fd); - if (!irqfd.file) { - ret = -EBADF; - goto err_fd; - } - - ctx = eventfd_ctx_fileget(irqfd.file); - if (IS_ERR(ctx)) { - ret = PTR_ERR(ctx); - goto err_ctx; - } - - virqfd->eventfd = ctx; - - /* - * virqfds can be released by closing the eventfd or directly - * through ioctl. These are both done through a workqueue, so - * we update the pointer to the virqfd under lock to avoid - * pushing multiple jobs to release the same virqfd. - */ - spin_lock_irq(&vdev->irqlock); - - if (*pvirqfd) { - spin_unlock_irq(&vdev->irqlock); - ret = -EBUSY; - goto err_busy; - } - *pvirqfd = virqfd; - - spin_unlock_irq(&vdev->irqlock); - - /* - * Install our own custom wake-up handling so we are notified via - * a callback whenever someone signals the underlying eventfd. - */ - init_waitqueue_func_entry(&virqfd->wait, virqfd_wakeup); - init_poll_funcptr(&virqfd->pt, virqfd_ptable_queue_proc); - - events = irqfd.file->f_op->poll(irqfd.file, &virqfd->pt); - - /* - * Check if there was an event already pending on the eventfd - * before we registered and trigger it as if we didn't miss it. - */ - if (events & POLLIN) { - if ((!handler || handler(vdev, data)) && thread) - schedule_work(&virqfd->inject); - } - - /* - * Do not drop the file until the irqfd is fully initialized, - * otherwise we might race against the POLLHUP. - */ - fdput(irqfd); - - return 0; -err_busy: - eventfd_ctx_put(ctx); -err_ctx: - fdput(irqfd); -err_fd: - kfree(virqfd); - - return ret; -} - -static void virqfd_disable(struct vfio_pci_device *vdev, - struct virqfd **pvirqfd) -{ - unsigned long flags; - - spin_lock_irqsave(&vdev->irqlock, flags); - - if (*pvirqfd) { - virqfd_deactivate(*pvirqfd); - *pvirqfd = NULL; - } - - spin_unlock_irqrestore(&vdev->irqlock, flags); - - /* - * Block until we know all outstanding shutdown jobs have completed. - * Even if we don't queue the job, flush the wq to be sure it's - * been released. - */ - flush_workqueue(vfio_irqfd_cleanup_wq); -} - -/* * INTx */ -static void vfio_send_intx_eventfd(struct vfio_pci_device *vdev, void *unused) +static void vfio_send_intx_eventfd(void *opaque, void *unused) { + struct vfio_pci_device *vdev = opaque; + if (likely(is_intx(vdev) && !vdev->virq_disabled)) eventfd_signal(vdev->ctx[0].trigger, 1); } @@ -285,9 +74,9 @@ void vfio_pci_intx_mask(struct vfio_pci_device *vdev) * a signal is necessary, which can then be handled via a work queue * or directly depending on the caller. */ -static int vfio_pci_intx_unmask_handler(struct vfio_pci_device *vdev, - void *unused) +static int vfio_pci_intx_unmask_handler(void *opaque, void *unused) { + struct vfio_pci_device *vdev = opaque; struct pci_dev *pdev = vdev->pdev; unsigned long flags; int ret = 0; @@ -440,8 +229,8 @@ static int vfio_intx_set_signal(struct vfio_pci_device *vdev, int fd) static void vfio_intx_disable(struct vfio_pci_device *vdev) { vfio_intx_set_signal(vdev, -1); - virqfd_disable(vdev, &vdev->ctx[0].unmask); - virqfd_disable(vdev, &vdev->ctx[0].mask); + vfio_virqfd_disable(&vdev->ctx[0].unmask); + vfio_virqfd_disable(&vdev->ctx[0].mask); vdev->irq_type = VFIO_PCI_NUM_IRQS; vdev->num_ctx = 0; kfree(vdev->ctx); @@ -605,8 +394,8 @@ static void vfio_msi_disable(struct vfio_pci_device *vdev, bool msix) vfio_msi_set_block(vdev, 0, vdev->num_ctx, NULL, msix); for (i = 0; i < vdev->num_ctx; i++) { - virqfd_disable(vdev, &vdev->ctx[i].unmask); - virqfd_disable(vdev, &vdev->ctx[i].mask); + vfio_virqfd_disable(&vdev->ctx[i].unmask); + vfio_virqfd_disable(&vdev->ctx[i].mask); } if (msix) { @@ -639,11 +428,12 @@ static int vfio_pci_set_intx_unmask(struct vfio_pci_device *vdev, } else if (flags & VFIO_IRQ_SET_DATA_EVENTFD) { int32_t fd = *(int32_t *)data; if (fd >= 0) - return virqfd_enable(vdev, vfio_pci_intx_unmask_handler, - vfio_send_intx_eventfd, NULL, - &vdev->ctx[0].unmask, fd); + return vfio_virqfd_enable((void *) vdev, + vfio_pci_intx_unmask_handler, + vfio_send_intx_eventfd, NULL, + &vdev->ctx[0].unmask, fd); - virqfd_disable(vdev, &vdev->ctx[0].unmask); + vfio_virqfd_disable(&vdev->ctx[0].unmask); } return 0; diff --git a/drivers/vfio/pci/vfio_pci_private.h b/drivers/vfio/pci/vfio_pci_private.h index c9f9b323f152..ae0e1b4c1711 100644 --- a/drivers/vfio/pci/vfio_pci_private.h +++ b/drivers/vfio/pci/vfio_pci_private.h @@ -87,9 +87,6 @@ extern ssize_t vfio_pci_vga_rw(struct vfio_pci_device *vdev, char __user *buf, extern int vfio_pci_init_perm_bits(void); extern void vfio_pci_uninit_perm_bits(void); -extern int vfio_pci_virqfd_init(void); -extern void vfio_pci_virqfd_exit(void); - extern int vfio_config_init(struct vfio_pci_device *vdev); extern void vfio_config_free(struct vfio_pci_device *vdev); #endif /* VFIO_PCI_PRIVATE_H */ diff --git a/drivers/vfio/platform/Kconfig b/drivers/vfio/platform/Kconfig new file mode 100644 index 000000000000..9a4403e2a36c --- /dev/null +++ b/drivers/vfio/platform/Kconfig @@ -0,0 +1,20 @@ +config VFIO_PLATFORM + tristate "VFIO support for platform devices" + depends on VFIO && EVENTFD && ARM + select VFIO_VIRQFD + help + Support for platform devices with VFIO. This is required to make + use of platform devices present on the system using the VFIO + framework. + + If you don't know what to do here, say N. + +config VFIO_AMBA + tristate "VFIO support for AMBA devices" + depends on VFIO_PLATFORM && ARM_AMBA + help + Support for ARM AMBA devices with VFIO. This is required to make + use of ARM AMBA devices present on the system using the VFIO + framework. + + If you don't know what to do here, say N. diff --git a/drivers/vfio/platform/Makefile b/drivers/vfio/platform/Makefile new file mode 100644 index 000000000000..81de144c0eaa --- /dev/null +++ b/drivers/vfio/platform/Makefile @@ -0,0 +1,8 @@ + +vfio-platform-y := vfio_platform.o vfio_platform_common.o vfio_platform_irq.o + +obj-$(CONFIG_VFIO_PLATFORM) += vfio-platform.o + +vfio-amba-y := vfio_amba.o + +obj-$(CONFIG_VFIO_AMBA) += vfio-amba.o diff --git a/drivers/vfio/platform/vfio_amba.c b/drivers/vfio/platform/vfio_amba.c new file mode 100644 index 000000000000..ff0331f72526 --- /dev/null +++ b/drivers/vfio/platform/vfio_amba.c @@ -0,0 +1,115 @@ +/* + * Copyright (C) 2013 - Virtual Open Systems + * Author: Antonios Motakis <a.motakis@virtualopensystems.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include <linux/module.h> +#include <linux/slab.h> +#include <linux/vfio.h> +#include <linux/amba/bus.h> + +#include "vfio_platform_private.h" + +#define DRIVER_VERSION "0.10" +#define DRIVER_AUTHOR "Antonios Motakis <a.motakis@virtualopensystems.com>" +#define DRIVER_DESC "VFIO for AMBA devices - User Level meta-driver" + +/* probing devices from the AMBA bus */ + +static struct resource *get_amba_resource(struct vfio_platform_device *vdev, + int i) +{ + struct amba_device *adev = (struct amba_device *) vdev->opaque; + + if (i == 0) + return &adev->res; + + return NULL; +} + +static int get_amba_irq(struct vfio_platform_device *vdev, int i) +{ + struct amba_device *adev = (struct amba_device *) vdev->opaque; + int ret = 0; + + if (i < AMBA_NR_IRQS) + ret = adev->irq[i]; + + /* zero is an unset IRQ for AMBA devices */ + return ret ? ret : -ENXIO; +} + +static int vfio_amba_probe(struct amba_device *adev, const struct amba_id *id) +{ + struct vfio_platform_device *vdev; + int ret; + + vdev = kzalloc(sizeof(*vdev), GFP_KERNEL); + if (!vdev) + return -ENOMEM; + + vdev->name = kasprintf(GFP_KERNEL, "vfio-amba-%08x", adev->periphid); + if (!vdev->name) { + kfree(vdev); + return -ENOMEM; + } + + vdev->opaque = (void *) adev; + vdev->flags = VFIO_DEVICE_FLAGS_AMBA; + vdev->get_resource = get_amba_resource; + vdev->get_irq = get_amba_irq; + + ret = vfio_platform_probe_common(vdev, &adev->dev); + if (ret) { + kfree(vdev->name); + kfree(vdev); + } + + return ret; +} + +static int vfio_amba_remove(struct amba_device *adev) +{ + struct vfio_platform_device *vdev; + + vdev = vfio_platform_remove_common(&adev->dev); + if (vdev) { + kfree(vdev->name); + kfree(vdev); + return 0; + } + + return -EINVAL; +} + +static struct amba_id pl330_ids[] = { + { 0, 0 }, +}; + +MODULE_DEVICE_TABLE(amba, pl330_ids); + +static struct amba_driver vfio_amba_driver = { + .probe = vfio_amba_probe, + .remove = vfio_amba_remove, + .id_table = pl330_ids, + .drv = { + .name = "vfio-amba", + .owner = THIS_MODULE, + }, +}; + +module_amba_driver(vfio_amba_driver); + +MODULE_VERSION(DRIVER_VERSION); +MODULE_LICENSE("GPL v2"); +MODULE_AUTHOR(DRIVER_AUTHOR); +MODULE_DESCRIPTION(DRIVER_DESC); diff --git a/drivers/vfio/platform/vfio_platform.c b/drivers/vfio/platform/vfio_platform.c new file mode 100644 index 000000000000..cef645c83996 --- /dev/null +++ b/drivers/vfio/platform/vfio_platform.c @@ -0,0 +1,103 @@ +/* + * Copyright (C) 2013 - Virtual Open Systems + * Author: Antonios Motakis <a.motakis@virtualopensystems.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include <linux/module.h> +#include <linux/slab.h> +#include <linux/vfio.h> +#include <linux/platform_device.h> + +#include "vfio_platform_private.h" + +#define DRIVER_VERSION "0.10" +#define DRIVER_AUTHOR "Antonios Motakis <a.motakis@virtualopensystems.com>" +#define DRIVER_DESC "VFIO for platform devices - User Level meta-driver" + +/* probing devices from the linux platform bus */ + +static struct resource *get_platform_resource(struct vfio_platform_device *vdev, + int num) +{ + struct platform_device *dev = (struct platform_device *) vdev->opaque; + int i; + + for (i = 0; i < dev->num_resources; i++) { + struct resource *r = &dev->resource[i]; + + if (resource_type(r) & (IORESOURCE_MEM|IORESOURCE_IO)) { + if (!num) + return r; + + num--; + } + } + return NULL; +} + +static int get_platform_irq(struct vfio_platform_device *vdev, int i) +{ + struct platform_device *pdev = (struct platform_device *) vdev->opaque; + + return platform_get_irq(pdev, i); +} + +static int vfio_platform_probe(struct platform_device *pdev) +{ + struct vfio_platform_device *vdev; + int ret; + + vdev = kzalloc(sizeof(*vdev), GFP_KERNEL); + if (!vdev) + return -ENOMEM; + + vdev->opaque = (void *) pdev; + vdev->name = pdev->name; + vdev->flags = VFIO_DEVICE_FLAGS_PLATFORM; + vdev->get_resource = get_platform_resource; + vdev->get_irq = get_platform_irq; + + ret = vfio_platform_probe_common(vdev, &pdev->dev); + if (ret) + kfree(vdev); + + return ret; +} + +static int vfio_platform_remove(struct platform_device *pdev) +{ + struct vfio_platform_device *vdev; + + vdev = vfio_platform_remove_common(&pdev->dev); + if (vdev) { + kfree(vdev); + return 0; + } + + return -EINVAL; +} + +static struct platform_driver vfio_platform_driver = { + .probe = vfio_platform_probe, + .remove = vfio_platform_remove, + .driver = { + .name = "vfio-platform", + .owner = THIS_MODULE, + }, +}; + +module_platform_driver(vfio_platform_driver); + +MODULE_VERSION(DRIVER_VERSION); +MODULE_LICENSE("GPL v2"); +MODULE_AUTHOR(DRIVER_AUTHOR); +MODULE_DESCRIPTION(DRIVER_DESC); diff --git a/drivers/vfio/platform/vfio_platform_common.c b/drivers/vfio/platform/vfio_platform_common.c new file mode 100644 index 000000000000..abcff7a1aa66 --- /dev/null +++ b/drivers/vfio/platform/vfio_platform_common.c @@ -0,0 +1,521 @@ +/* + * Copyright (C) 2013 - Virtual Open Systems + * Author: Antonios Motakis <a.motakis@virtualopensystems.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include <linux/device.h> +#include <linux/iommu.h> +#include <linux/module.h> +#include <linux/mutex.h> +#include <linux/slab.h> +#include <linux/types.h> +#include <linux/uaccess.h> +#include <linux/vfio.h> + +#include "vfio_platform_private.h" + +static DEFINE_MUTEX(driver_lock); + +static int vfio_platform_regions_init(struct vfio_platform_device *vdev) +{ + int cnt = 0, i; + + while (vdev->get_resource(vdev, cnt)) + cnt++; + + vdev->regions = kcalloc(cnt, sizeof(struct vfio_platform_region), + GFP_KERNEL); + if (!vdev->regions) + return -ENOMEM; + + for (i = 0; i < cnt; i++) { + struct resource *res = + vdev->get_resource(vdev, i); + + if (!res) + goto err; + + vdev->regions[i].addr = res->start; + vdev->regions[i].size = resource_size(res); + vdev->regions[i].flags = 0; + + switch (resource_type(res)) { + case IORESOURCE_MEM: + vdev->regions[i].type = VFIO_PLATFORM_REGION_TYPE_MMIO; + vdev->regions[i].flags |= VFIO_REGION_INFO_FLAG_READ; + if (!(res->flags & IORESOURCE_READONLY)) + vdev->regions[i].flags |= + VFIO_REGION_INFO_FLAG_WRITE; + + /* + * Only regions addressed with PAGE granularity may be + * MMAPed securely. + */ + if (!(vdev->regions[i].addr & ~PAGE_MASK) && + !(vdev->regions[i].size & ~PAGE_MASK)) + vdev->regions[i].flags |= + VFIO_REGION_INFO_FLAG_MMAP; + + break; + case IORESOURCE_IO: + vdev->regions[i].type = VFIO_PLATFORM_REGION_TYPE_PIO; + break; + default: + goto err; + } + } + + vdev->num_regions = cnt; + + return 0; +err: + kfree(vdev->regions); + return -EINVAL; +} + +static void vfio_platform_regions_cleanup(struct vfio_platform_device *vdev) +{ + int i; + + for (i = 0; i < vdev->num_regions; i++) + iounmap(vdev->regions[i].ioaddr); + + vdev->num_regions = 0; + kfree(vdev->regions); +} + +static void vfio_platform_release(void *device_data) +{ + struct vfio_platform_device *vdev = device_data; + + mutex_lock(&driver_lock); + + if (!(--vdev->refcnt)) { + vfio_platform_regions_cleanup(vdev); + vfio_platform_irq_cleanup(vdev); + } + + mutex_unlock(&driver_lock); + + module_put(THIS_MODULE); +} + +static int vfio_platform_open(void *device_data) +{ + struct vfio_platform_device *vdev = device_data; + int ret; + + if (!try_module_get(THIS_MODULE)) + return -ENODEV; + + mutex_lock(&driver_lock); + + if (!vdev->refcnt) { + ret = vfio_platform_regions_init(vdev); + if (ret) + goto err_reg; + + ret = vfio_platform_irq_init(vdev); + if (ret) + goto err_irq; + } + + vdev->refcnt++; + + mutex_unlock(&driver_lock); + return 0; + +err_irq: + vfio_platform_regions_cleanup(vdev); +err_reg: + mutex_unlock(&driver_lock); + module_put(THIS_MODULE); + return ret; +} + +static long vfio_platform_ioctl(void *device_data, + unsigned int cmd, unsigned long arg) +{ + struct vfio_platform_device *vdev = device_data; + unsigned long minsz; + + if (cmd == VFIO_DEVICE_GET_INFO) { + struct vfio_device_info info; + + minsz = offsetofend(struct vfio_device_info, num_irqs); + + if (copy_from_user(&info, (void __user *)arg, minsz)) + return -EFAULT; + + if (info.argsz < minsz) + return -EINVAL; + + info.flags = vdev->flags; + info.num_regions = vdev->num_regions; + info.num_irqs = vdev->num_irqs; + + return copy_to_user((void __user *)arg, &info, minsz); + + } else if (cmd == VFIO_DEVICE_GET_REGION_INFO) { + struct vfio_region_info info; + + minsz = offsetofend(struct vfio_region_info, offset); + + if (copy_from_user(&info, (void __user *)arg, minsz)) + return -EFAULT; + + if (info.argsz < minsz) + return -EINVAL; + + if (info.index >= vdev->num_regions) + return -EINVAL; + + /* map offset to the physical address */ + info.offset = VFIO_PLATFORM_INDEX_TO_OFFSET(info.index); + info.size = vdev->regions[info.index].size; + info.flags = vdev->regions[info.index].flags; + + return copy_to_user((void __user *)arg, &info, minsz); + + } else if (cmd == VFIO_DEVICE_GET_IRQ_INFO) { + struct vfio_irq_info info; + + minsz = offsetofend(struct vfio_irq_info, count); + + if (copy_from_user(&info, (void __user *)arg, minsz)) + return -EFAULT; + + if (info.argsz < minsz) + return -EINVAL; + + if (info.index >= vdev->num_irqs) + return -EINVAL; + + info.flags = vdev->irqs[info.index].flags; + info.count = vdev->irqs[info.index].count; + + return copy_to_user((void __user *)arg, &info, minsz); + + } else if (cmd == VFIO_DEVICE_SET_IRQS) { + struct vfio_irq_set hdr; + u8 *data = NULL; + int ret = 0; + + minsz = offsetofend(struct vfio_irq_set, count); + + if (copy_from_user(&hdr, (void __user *)arg, minsz)) + return -EFAULT; + + if (hdr.argsz < minsz) + return -EINVAL; + + if (hdr.index >= vdev->num_irqs) + return -EINVAL; + + if (hdr.flags & ~(VFIO_IRQ_SET_DATA_TYPE_MASK | + VFIO_IRQ_SET_ACTION_TYPE_MASK)) + return -EINVAL; + + if (!(hdr.flags & VFIO_IRQ_SET_DATA_NONE)) { + size_t size; + + if (hdr.flags & VFIO_IRQ_SET_DATA_BOOL) + size = sizeof(uint8_t); + else if (hdr.flags & VFIO_IRQ_SET_DATA_EVENTFD) + size = sizeof(int32_t); + else + return -EINVAL; + + if (hdr.argsz - minsz < size) + return -EINVAL; + + data = memdup_user((void __user *)(arg + minsz), size); + if (IS_ERR(data)) + return PTR_ERR(data); + } + + mutex_lock(&vdev->igate); + + ret = vfio_platform_set_irqs_ioctl(vdev, hdr.flags, hdr.index, + hdr.start, hdr.count, data); + mutex_unlock(&vdev->igate); + kfree(data); + + return ret; + + } else if (cmd == VFIO_DEVICE_RESET) + return -EINVAL; + + return -ENOTTY; +} + +static ssize_t vfio_platform_read_mmio(struct vfio_platform_region reg, + char __user *buf, size_t count, + loff_t off) +{ + unsigned int done = 0; + + if (!reg.ioaddr) { + reg.ioaddr = + ioremap_nocache(reg.addr, reg.size); + + if (!reg.ioaddr) + return -ENOMEM; + } + + while (count) { + size_t filled; + + if (count >= 4 && !(off % 4)) { + u32 val; + + val = ioread32(reg.ioaddr + off); + if (copy_to_user(buf, &val, 4)) + goto err; + + filled = 4; + } else if (count >= 2 && !(off % 2)) { + u16 val; + + val = ioread16(reg.ioaddr + off); + if (copy_to_user(buf, &val, 2)) + goto err; + + filled = 2; + } else { + u8 val; + + val = ioread8(reg.ioaddr + off); + if (copy_to_user(buf, &val, 1)) + goto err; + + filled = 1; + } + + + count -= filled; + done += filled; + off += filled; + buf += filled; + } + + return done; +err: + return -EFAULT; +} + +static ssize_t vfio_platform_read(void *device_data, char __user *buf, + size_t count, loff_t *ppos) +{ + struct vfio_platform_device *vdev = device_data; + unsigned int index = VFIO_PLATFORM_OFFSET_TO_INDEX(*ppos); + loff_t off = *ppos & VFIO_PLATFORM_OFFSET_MASK; + + if (index >= vdev->num_regions) + return -EINVAL; + + if (!(vdev->regions[index].flags & VFIO_REGION_INFO_FLAG_READ)) + return -EINVAL; + + if (vdev->regions[index].type & VFIO_PLATFORM_REGION_TYPE_MMIO) + return vfio_platform_read_mmio(vdev->regions[index], + buf, count, off); + else if (vdev->regions[index].type & VFIO_PLATFORM_REGION_TYPE_PIO) + return -EINVAL; /* not implemented */ + + return -EINVAL; +} + +static ssize_t vfio_platform_write_mmio(struct vfio_platform_region reg, + const char __user *buf, size_t count, + loff_t off) +{ + unsigned int done = 0; + + if (!reg.ioaddr) { + reg.ioaddr = + ioremap_nocache(reg.addr, reg.size); + + if (!reg.ioaddr) + return -ENOMEM; + } + + while (count) { + size_t filled; + + if (count >= 4 && !(off % 4)) { + u32 val; + + if (copy_from_user(&val, buf, 4)) + goto err; + iowrite32(val, reg.ioaddr + off); + + filled = 4; + } else if (count >= 2 && !(off % 2)) { + u16 val; + + if (copy_from_user(&val, buf, 2)) + goto err; + iowrite16(val, reg.ioaddr + off); + + filled = 2; + } else { + u8 val; + + if (copy_from_user(&val, buf, 1)) + goto err; + iowrite8(val, reg.ioaddr + off); + + filled = 1; + } + + count -= filled; + done += filled; + off += filled; + buf += filled; + } + + return done; +err: + return -EFAULT; +} + +static ssize_t vfio_platform_write(void *device_data, const char __user *buf, + size_t count, loff_t *ppos) +{ + struct vfio_platform_device *vdev = device_data; + unsigned int index = VFIO_PLATFORM_OFFSET_TO_INDEX(*ppos); + loff_t off = *ppos & VFIO_PLATFORM_OFFSET_MASK; + + if (index >= vdev->num_regions) + return -EINVAL; + + if (!(vdev->regions[index].flags & VFIO_REGION_INFO_FLAG_WRITE)) + return -EINVAL; + + if (vdev->regions[index].type & VFIO_PLATFORM_REGION_TYPE_MMIO) + return vfio_platform_write_mmio(vdev->regions[index], + buf, count, off); + else if (vdev->regions[index].type & VFIO_PLATFORM_REGION_TYPE_PIO) + return -EINVAL; /* not implemented */ + + return -EINVAL; +} + +static int vfio_platform_mmap_mmio(struct vfio_platform_region region, + struct vm_area_struct *vma) +{ + u64 req_len, pgoff, req_start; + + req_len = vma->vm_end - vma->vm_start; + pgoff = vma->vm_pgoff & + ((1U << (VFIO_PLATFORM_OFFSET_SHIFT - PAGE_SHIFT)) - 1); + req_start = pgoff << PAGE_SHIFT; + + if (region.size < PAGE_SIZE || req_start + req_len > region.size) + return -EINVAL; + + vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); + vma->vm_pgoff = (region.addr >> PAGE_SHIFT) + pgoff; + + return remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff, + req_len, vma->vm_page_prot); +} + +static int vfio_platform_mmap(void *device_data, struct vm_area_struct *vma) +{ + struct vfio_platform_device *vdev = device_data; + unsigned int index; + + index = vma->vm_pgoff >> (VFIO_PLATFORM_OFFSET_SHIFT - PAGE_SHIFT); + + if (vma->vm_end < vma->vm_start) + return -EINVAL; + if (!(vma->vm_flags & VM_SHARED)) + return -EINVAL; + if (index >= vdev->num_regions) + return -EINVAL; + if (vma->vm_start & ~PAGE_MASK) + return -EINVAL; + if (vma->vm_end & ~PAGE_MASK) + return -EINVAL; + + if (!(vdev->regions[index].flags & VFIO_REGION_INFO_FLAG_MMAP)) + return -EINVAL; + + if (!(vdev->regions[index].flags & VFIO_REGION_INFO_FLAG_READ) + && (vma->vm_flags & VM_READ)) + return -EINVAL; + + if (!(vdev->regions[index].flags & VFIO_REGION_INFO_FLAG_WRITE) + && (vma->vm_flags & VM_WRITE)) + return -EINVAL; + + vma->vm_private_data = vdev; + + if (vdev->regions[index].type & VFIO_PLATFORM_REGION_TYPE_MMIO) + return vfio_platform_mmap_mmio(vdev->regions[index], vma); + + else if (vdev->regions[index].type & VFIO_PLATFORM_REGION_TYPE_PIO) + return -EINVAL; /* not implemented */ + + return -EINVAL; +} + +static const struct vfio_device_ops vfio_platform_ops = { + .name = "vfio-platform", + .open = vfio_platform_open, + .release = vfio_platform_release, + .ioctl = vfio_platform_ioctl, + .read = vfio_platform_read, + .write = vfio_platform_write, + .mmap = vfio_platform_mmap, +}; + +int vfio_platform_probe_common(struct vfio_platform_device *vdev, + struct device *dev) +{ + struct iommu_group *group; + int ret; + + if (!vdev) + return -EINVAL; + + group = iommu_group_get(dev); + if (!group) { + pr_err("VFIO: No IOMMU group for device %s\n", vdev->name); + return -EINVAL; + } + + ret = vfio_add_group_dev(dev, &vfio_platform_ops, vdev); + if (ret) { + iommu_group_put(group); + return ret; + } + + mutex_init(&vdev->igate); + + return 0; +} +EXPORT_SYMBOL_GPL(vfio_platform_probe_common); + +struct vfio_platform_device *vfio_platform_remove_common(struct device *dev) +{ + struct vfio_platform_device *vdev; + + vdev = vfio_del_group_dev(dev); + if (vdev) + iommu_group_put(dev->iommu_group); + + return vdev; +} +EXPORT_SYMBOL_GPL(vfio_platform_remove_common); diff --git a/drivers/vfio/platform/vfio_platform_irq.c b/drivers/vfio/platform/vfio_platform_irq.c new file mode 100644 index 000000000000..88bba57b30a8 --- /dev/null +++ b/drivers/vfio/platform/vfio_platform_irq.c @@ -0,0 +1,336 @@ +/* + * VFIO platform devices interrupt handling + * + * Copyright (C) 2013 - Virtual Open Systems + * Author: Antonios Motakis <a.motakis@virtualopensystems.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include <linux/eventfd.h> +#include <linux/interrupt.h> +#include <linux/slab.h> +#include <linux/types.h> +#include <linux/vfio.h> +#include <linux/irq.h> + +#include "vfio_platform_private.h" + +static void vfio_platform_mask(struct vfio_platform_irq *irq_ctx) +{ + unsigned long flags; + + spin_lock_irqsave(&irq_ctx->lock, flags); + + if (!irq_ctx->masked) { + disable_irq_nosync(irq_ctx->hwirq); + irq_ctx->masked = true; + } + + spin_unlock_irqrestore(&irq_ctx->lock, flags); +} + +static int vfio_platform_mask_handler(void *opaque, void *unused) +{ + struct vfio_platform_irq *irq_ctx = opaque; + + vfio_platform_mask(irq_ctx); + + return 0; +} + +static int vfio_platform_set_irq_mask(struct vfio_platform_device *vdev, + unsigned index, unsigned start, + unsigned count, uint32_t flags, + void *data) +{ + if (start != 0 || count != 1) + return -EINVAL; + + if (!(vdev->irqs[index].flags & VFIO_IRQ_INFO_MASKABLE)) + return -EINVAL; + + if (flags & VFIO_IRQ_SET_DATA_EVENTFD) { + int32_t fd = *(int32_t *)data; + + if (fd >= 0) + return vfio_virqfd_enable((void *) &vdev->irqs[index], + vfio_platform_mask_handler, + NULL, NULL, + &vdev->irqs[index].mask, fd); + + vfio_virqfd_disable(&vdev->irqs[index].mask); + return 0; + } + + if (flags & VFIO_IRQ_SET_DATA_NONE) { + vfio_platform_mask(&vdev->irqs[index]); + + } else if (flags & VFIO_IRQ_SET_DATA_BOOL) { + uint8_t mask = *(uint8_t *)data; + + if (mask) + vfio_platform_mask(&vdev->irqs[index]); + } + + return 0; +} + +static void vfio_platform_unmask(struct vfio_platform_irq *irq_ctx) +{ + unsigned long flags; + + spin_lock_irqsave(&irq_ctx->lock, flags); + + if (irq_ctx->masked) { + enable_irq(irq_ctx->hwirq); + irq_ctx->masked = false; + } + + spin_unlock_irqrestore(&irq_ctx->lock, flags); +} + +static int vfio_platform_unmask_handler(void *opaque, void *unused) +{ + struct vfio_platform_irq *irq_ctx = opaque; + + vfio_platform_unmask(irq_ctx); + + return 0; +} + +static int vfio_platform_set_irq_unmask(struct vfio_platform_device *vdev, + unsigned index, unsigned start, + unsigned count, uint32_t flags, + void *data) +{ + if (start != 0 || count != 1) + return -EINVAL; + + if (!(vdev->irqs[index].flags & VFIO_IRQ_INFO_MASKABLE)) + return -EINVAL; + + if (flags & VFIO_IRQ_SET_DATA_EVENTFD) { + int32_t fd = *(int32_t *)data; + + if (fd >= 0) + return vfio_virqfd_enable((void *) &vdev->irqs[index], + vfio_platform_unmask_handler, + NULL, NULL, + &vdev->irqs[index].unmask, + fd); + + vfio_virqfd_disable(&vdev->irqs[index].unmask); + return 0; + } + + if (flags & VFIO_IRQ_SET_DATA_NONE) { + vfio_platform_unmask(&vdev->irqs[index]); + + } else if (flags & VFIO_IRQ_SET_DATA_BOOL) { + uint8_t unmask = *(uint8_t *)data; + + if (unmask) + vfio_platform_unmask(&vdev->irqs[index]); + } + + return 0; +} + +static irqreturn_t vfio_automasked_irq_handler(int irq, void *dev_id) +{ + struct vfio_platform_irq *irq_ctx = dev_id; + unsigned long flags; + int ret = IRQ_NONE; + + spin_lock_irqsave(&irq_ctx->lock, flags); + + if (!irq_ctx->masked) { + ret = IRQ_HANDLED; + + /* automask maskable interrupts */ + disable_irq_nosync(irq_ctx->hwirq); + irq_ctx->masked = true; + } + + spin_unlock_irqrestore(&irq_ctx->lock, flags); + + if (ret == IRQ_HANDLED) + eventfd_signal(irq_ctx->trigger, 1); + + return ret; +} + +static irqreturn_t vfio_irq_handler(int irq, void *dev_id) +{ + struct vfio_platform_irq *irq_ctx = dev_id; + + eventfd_signal(irq_ctx->trigger, 1); + + return IRQ_HANDLED; +} + +static int vfio_set_trigger(struct vfio_platform_device *vdev, int index, + int fd, irq_handler_t handler) +{ + struct vfio_platform_irq *irq = &vdev->irqs[index]; + struct eventfd_ctx *trigger; + int ret; + + if (irq->trigger) { + free_irq(irq->hwirq, irq); + kfree(irq->name); + eventfd_ctx_put(irq->trigger); + irq->trigger = NULL; + } + + if (fd < 0) /* Disable only */ + return 0; + + irq->name = kasprintf(GFP_KERNEL, "vfio-irq[%d](%s)", + irq->hwirq, vdev->name); + if (!irq->name) + return -ENOMEM; + + trigger = eventfd_ctx_fdget(fd); + if (IS_ERR(trigger)) { + kfree(irq->name); + return PTR_ERR(trigger); + } + + irq->trigger = trigger; + + irq_set_status_flags(irq->hwirq, IRQ_NOAUTOEN); + ret = request_irq(irq->hwirq, handler, 0, irq->name, irq); + if (ret) { + kfree(irq->name); + eventfd_ctx_put(trigger); + irq->trigger = NULL; + return ret; + } + + if (!irq->masked) + enable_irq(irq->hwirq); + + return 0; +} + +static int vfio_platform_set_irq_trigger(struct vfio_platform_device *vdev, + unsigned index, unsigned start, + unsigned count, uint32_t flags, + void *data) +{ + struct vfio_platform_irq *irq = &vdev->irqs[index]; + irq_handler_t handler; + + if (vdev->irqs[index].flags & VFIO_IRQ_INFO_AUTOMASKED) + handler = vfio_automasked_irq_handler; + else + handler = vfio_irq_handler; + + if (!count && (flags & VFIO_IRQ_SET_DATA_NONE)) + return vfio_set_trigger(vdev, index, -1, handler); + + if (start != 0 || count != 1) + return -EINVAL; + + if (flags & VFIO_IRQ_SET_DATA_EVENTFD) { + int32_t fd = *(int32_t *)data; + + return vfio_set_trigger(vdev, index, fd, handler); + } + + if (flags & VFIO_IRQ_SET_DATA_NONE) { + handler(irq->hwirq, irq); + + } else if (flags & VFIO_IRQ_SET_DATA_BOOL) { + uint8_t trigger = *(uint8_t *)data; + + if (trigger) + handler(irq->hwirq, irq); + } + + return 0; +} + +int vfio_platform_set_irqs_ioctl(struct vfio_platform_device *vdev, + uint32_t flags, unsigned index, unsigned start, + unsigned count, void *data) +{ + int (*func)(struct vfio_platform_device *vdev, unsigned index, + unsigned start, unsigned count, uint32_t flags, + void *data) = NULL; + + switch (flags & VFIO_IRQ_SET_ACTION_TYPE_MASK) { + case VFIO_IRQ_SET_ACTION_MASK: + func = vfio_platform_set_irq_mask; + break; + case VFIO_IRQ_SET_ACTION_UNMASK: + func = vfio_platform_set_irq_unmask; + break; + case VFIO_IRQ_SET_ACTION_TRIGGER: + func = vfio_platform_set_irq_trigger; + break; + } + + if (!func) + return -ENOTTY; + + return func(vdev, index, start, count, flags, data); +} + +int vfio_platform_irq_init(struct vfio_platform_device *vdev) +{ + int cnt = 0, i; + + while (vdev->get_irq(vdev, cnt) >= 0) + cnt++; + + vdev->irqs = kcalloc(cnt, sizeof(struct vfio_platform_irq), GFP_KERNEL); + if (!vdev->irqs) + return -ENOMEM; + + for (i = 0; i < cnt; i++) { + int hwirq = vdev->get_irq(vdev, i); + + if (hwirq < 0) + goto err; + + spin_lock_init(&vdev->irqs[i].lock); + + vdev->irqs[i].flags = VFIO_IRQ_INFO_EVENTFD; + + if (irq_get_trigger_type(hwirq) & IRQ_TYPE_LEVEL_MASK) + vdev->irqs[i].flags |= VFIO_IRQ_INFO_MASKABLE + | VFIO_IRQ_INFO_AUTOMASKED; + + vdev->irqs[i].count = 1; + vdev->irqs[i].hwirq = hwirq; + vdev->irqs[i].masked = false; + } + + vdev->num_irqs = cnt; + + return 0; +err: + kfree(vdev->irqs); + return -EINVAL; +} + +void vfio_platform_irq_cleanup(struct vfio_platform_device *vdev) +{ + int i; + + for (i = 0; i < vdev->num_irqs; i++) + vfio_set_trigger(vdev, i, -1, NULL); + + vdev->num_irqs = 0; + kfree(vdev->irqs); +} diff --git a/drivers/vfio/platform/vfio_platform_private.h b/drivers/vfio/platform/vfio_platform_private.h new file mode 100644 index 000000000000..5d31e0473406 --- /dev/null +++ b/drivers/vfio/platform/vfio_platform_private.h @@ -0,0 +1,85 @@ +/* + * Copyright (C) 2013 - Virtual Open Systems + * Author: Antonios Motakis <a.motakis@virtualopensystems.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#ifndef VFIO_PLATFORM_PRIVATE_H +#define VFIO_PLATFORM_PRIVATE_H + +#include <linux/types.h> +#include <linux/interrupt.h> + +#define VFIO_PLATFORM_OFFSET_SHIFT 40 +#define VFIO_PLATFORM_OFFSET_MASK (((u64)(1) << VFIO_PLATFORM_OFFSET_SHIFT) - 1) + +#define VFIO_PLATFORM_OFFSET_TO_INDEX(off) \ + (off >> VFIO_PLATFORM_OFFSET_SHIFT) + +#define VFIO_PLATFORM_INDEX_TO_OFFSET(index) \ + ((u64)(index) << VFIO_PLATFORM_OFFSET_SHIFT) + +struct vfio_platform_irq { + u32 flags; + u32 count; + int hwirq; + char *name; + struct eventfd_ctx *trigger; + bool masked; + spinlock_t lock; + struct virqfd *unmask; + struct virqfd *mask; +}; + +struct vfio_platform_region { + u64 addr; + resource_size_t size; + u32 flags; + u32 type; +#define VFIO_PLATFORM_REGION_TYPE_MMIO 1 +#define VFIO_PLATFORM_REGION_TYPE_PIO 2 + void __iomem *ioaddr; +}; + +struct vfio_platform_device { + struct vfio_platform_region *regions; + u32 num_regions; + struct vfio_platform_irq *irqs; + u32 num_irqs; + int refcnt; + struct mutex igate; + + /* + * These fields should be filled by the bus specific binder + */ + void *opaque; + const char *name; + uint32_t flags; + /* callbacks to discover device resources */ + struct resource* + (*get_resource)(struct vfio_platform_device *vdev, int i); + int (*get_irq)(struct vfio_platform_device *vdev, int i); +}; + +extern int vfio_platform_probe_common(struct vfio_platform_device *vdev, + struct device *dev); +extern struct vfio_platform_device *vfio_platform_remove_common + (struct device *dev); + +extern int vfio_platform_irq_init(struct vfio_platform_device *vdev); +extern void vfio_platform_irq_cleanup(struct vfio_platform_device *vdev); + +extern int vfio_platform_set_irqs_ioctl(struct vfio_platform_device *vdev, + uint32_t flags, unsigned index, + unsigned start, unsigned count, + void *data); + +#endif /* VFIO_PLATFORM_PRIVATE_H */ diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c index 4cde85501444..0d336625ac71 100644 --- a/drivers/vfio/vfio.c +++ b/drivers/vfio/vfio.c @@ -234,22 +234,21 @@ static struct vfio_group *vfio_create_group(struct iommu_group *iommu_group) mutex_lock(&vfio.group_lock); - minor = vfio_alloc_group_minor(group); - if (minor < 0) { - vfio_group_unlock_and_free(group); - return ERR_PTR(minor); - } - /* Did we race creating this group? */ list_for_each_entry(tmp, &vfio.group_list, vfio_next) { if (tmp->iommu_group == iommu_group) { vfio_group_get(tmp); - vfio_free_group_minor(minor); vfio_group_unlock_and_free(group); return tmp; } } + minor = vfio_alloc_group_minor(group); + if (minor < 0) { + vfio_group_unlock_and_free(group); + return ERR_PTR(minor); + } + dev = device_create(vfio.class, NULL, MKDEV(MAJOR(vfio.group_devt), minor), group, "%d", iommu_group_id(iommu_group)); diff --git a/drivers/vfio/virqfd.c b/drivers/vfio/virqfd.c new file mode 100644 index 000000000000..27c89cd5d70b --- /dev/null +++ b/drivers/vfio/virqfd.c @@ -0,0 +1,226 @@ +/* + * VFIO generic eventfd code for IRQFD support. + * Derived from drivers/vfio/pci/vfio_pci_intrs.c + * + * Copyright (C) 2012 Red Hat, Inc. All rights reserved. + * Author: Alex Williamson <alex.williamson@redhat.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/vfio.h> +#include <linux/eventfd.h> +#include <linux/file.h> +#include <linux/module.h> +#include <linux/slab.h> + +#define DRIVER_VERSION "0.1" +#define DRIVER_AUTHOR "Alex Williamson <alex.williamson@redhat.com>" +#define DRIVER_DESC "IRQFD support for VFIO bus drivers" + +static struct workqueue_struct *vfio_irqfd_cleanup_wq; +static DEFINE_SPINLOCK(virqfd_lock); + +static int __init vfio_virqfd_init(void) +{ + vfio_irqfd_cleanup_wq = + create_singlethread_workqueue("vfio-irqfd-cleanup"); + if (!vfio_irqfd_cleanup_wq) + return -ENOMEM; + + return 0; +} + +static void __exit vfio_virqfd_exit(void) +{ + destroy_workqueue(vfio_irqfd_cleanup_wq); +} + +static void virqfd_deactivate(struct virqfd *virqfd) +{ + queue_work(vfio_irqfd_cleanup_wq, &virqfd->shutdown); +} + +static int virqfd_wakeup(wait_queue_t *wait, unsigned mode, int sync, void *key) +{ + struct virqfd *virqfd = container_of(wait, struct virqfd, wait); + unsigned long flags = (unsigned long)key; + + if (flags & POLLIN) { + /* An event has been signaled, call function */ + if ((!virqfd->handler || + virqfd->handler(virqfd->opaque, virqfd->data)) && + virqfd->thread) + schedule_work(&virqfd->inject); + } + + if (flags & POLLHUP) { + unsigned long flags; + spin_lock_irqsave(&virqfd_lock, flags); + + /* + * The eventfd is closing, if the virqfd has not yet been + * queued for release, as determined by testing whether the + * virqfd pointer to it is still valid, queue it now. As + * with kvm irqfds, we know we won't race against the virqfd + * going away because we hold the lock to get here. + */ + if (*(virqfd->pvirqfd) == virqfd) { + *(virqfd->pvirqfd) = NULL; + virqfd_deactivate(virqfd); + } + + spin_unlock_irqrestore(&virqfd_lock, flags); + } + + return 0; +} + +static void virqfd_ptable_queue_proc(struct file *file, + wait_queue_head_t *wqh, poll_table *pt) +{ + struct virqfd *virqfd = container_of(pt, struct virqfd, pt); + add_wait_queue(wqh, &virqfd->wait); +} + +static void virqfd_shutdown(struct work_struct *work) +{ + struct virqfd *virqfd = container_of(work, struct virqfd, shutdown); + u64 cnt; + + eventfd_ctx_remove_wait_queue(virqfd->eventfd, &virqfd->wait, &cnt); + flush_work(&virqfd->inject); + eventfd_ctx_put(virqfd->eventfd); + + kfree(virqfd); +} + +static void virqfd_inject(struct work_struct *work) +{ + struct virqfd *virqfd = container_of(work, struct virqfd, inject); + if (virqfd->thread) + virqfd->thread(virqfd->opaque, virqfd->data); +} + +int vfio_virqfd_enable(void *opaque, + int (*handler)(void *, void *), + void (*thread)(void *, void *), + void *data, struct virqfd **pvirqfd, int fd) +{ + struct fd irqfd; + struct eventfd_ctx *ctx; + struct virqfd *virqfd; + int ret = 0; + unsigned int events; + + virqfd = kzalloc(sizeof(*virqfd), GFP_KERNEL); + if (!virqfd) + return -ENOMEM; + + virqfd->pvirqfd = pvirqfd; + virqfd->opaque = opaque; + virqfd->handler = handler; + virqfd->thread = thread; + virqfd->data = data; + + INIT_WORK(&virqfd->shutdown, virqfd_shutdown); + INIT_WORK(&virqfd->inject, virqfd_inject); + + irqfd = fdget(fd); + if (!irqfd.file) { + ret = -EBADF; + goto err_fd; + } + + ctx = eventfd_ctx_fileget(irqfd.file); + if (IS_ERR(ctx)) { + ret = PTR_ERR(ctx); + goto err_ctx; + } + + virqfd->eventfd = ctx; + + /* + * virqfds can be released by closing the eventfd or directly + * through ioctl. These are both done through a workqueue, so + * we update the pointer to the virqfd under lock to avoid + * pushing multiple jobs to release the same virqfd. + */ + spin_lock_irq(&virqfd_lock); + + if (*pvirqfd) { + spin_unlock_irq(&virqfd_lock); + ret = -EBUSY; + goto err_busy; + } + *pvirqfd = virqfd; + + spin_unlock_irq(&virqfd_lock); + + /* + * Install our own custom wake-up handling so we are notified via + * a callback whenever someone signals the underlying eventfd. + */ + init_waitqueue_func_entry(&virqfd->wait, virqfd_wakeup); + init_poll_funcptr(&virqfd->pt, virqfd_ptable_queue_proc); + + events = irqfd.file->f_op->poll(irqfd.file, &virqfd->pt); + + /* + * Check if there was an event already pending on the eventfd + * before we registered and trigger it as if we didn't miss it. + */ + if (events & POLLIN) { + if ((!handler || handler(opaque, data)) && thread) + schedule_work(&virqfd->inject); + } + + /* + * Do not drop the file until the irqfd is fully initialized, + * otherwise we might race against the POLLHUP. + */ + fdput(irqfd); + + return 0; +err_busy: + eventfd_ctx_put(ctx); +err_ctx: + fdput(irqfd); +err_fd: + kfree(virqfd); + + return ret; +} +EXPORT_SYMBOL_GPL(vfio_virqfd_enable); + +void vfio_virqfd_disable(struct virqfd **pvirqfd) +{ + unsigned long flags; + + spin_lock_irqsave(&virqfd_lock, flags); + + if (*pvirqfd) { + virqfd_deactivate(*pvirqfd); + *pvirqfd = NULL; + } + + spin_unlock_irqrestore(&virqfd_lock, flags); + + /* + * Block until we know all outstanding shutdown jobs have completed. + * Even if we don't queue the job, flush the wq to be sure it's + * been released. + */ + flush_workqueue(vfio_irqfd_cleanup_wq); +} +EXPORT_SYMBOL_GPL(vfio_virqfd_disable); + +module_init(vfio_virqfd_init); +module_exit(vfio_virqfd_exit); + +MODULE_VERSION(DRIVER_VERSION); +MODULE_LICENSE("GPL v2"); +MODULE_AUTHOR(DRIVER_AUTHOR); +MODULE_DESCRIPTION(DRIVER_DESC); |