diff options
Diffstat (limited to 'drivers/xen')
-rw-r--r-- | drivers/xen/Kconfig | 8 | ||||
-rw-r--r-- | drivers/xen/balloon.c | 1 | ||||
-rw-r--r-- | drivers/xen/events/events_base.c | 111 | ||||
-rw-r--r-- | drivers/xen/events/events_fifo.c | 26 | ||||
-rw-r--r-- | drivers/xen/evtchn.c | 2 | ||||
-rw-r--r-- | drivers/xen/grant-table.c | 10 | ||||
-rw-r--r-- | drivers/xen/platform-pci.c | 2 | ||||
-rw-r--r-- | drivers/xen/privcmd.c | 407 | ||||
-rw-r--r-- | drivers/xen/xen-pciback/conf_space.c | 19 | ||||
-rw-r--r-- | drivers/xen/xen-pciback/conf_space_capability.c | 8 | ||||
-rw-r--r-- | drivers/xen/xen-pciback/conf_space_header.c | 21 | ||||
-rw-r--r-- | drivers/xen/xenbus/xenbus_dev_frontend.c | 4 | ||||
-rw-r--r-- | drivers/xen/xenbus/xenbus_probe.c | 2 |
13 files changed, 502 insertions, 119 deletions
diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig index d43153fec18e..d5989871dd5d 100644 --- a/drivers/xen/Kconfig +++ b/drivers/xen/Kconfig @@ -269,12 +269,12 @@ config XEN_PRIVCMD disaggregated Xen setups this driver might be needed for other domains, too. -config XEN_PRIVCMD_IRQFD - bool "Xen irqfd support" +config XEN_PRIVCMD_EVENTFD + bool "Xen Ioeventfd and irqfd support" depends on XEN_PRIVCMD && XEN_VIRTIO && EVENTFD help - Using the irqfd mechanism a virtio backend running in a daemon can - speed up interrupt injection into a guest. + Using the ioeventfd / irqfd mechanism a virtio backend running in a + daemon can speed up interrupt delivery from / to a guest. config XEN_ACPI_PROCESSOR tristate "Xen ACPI processor" diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c index 586a1673459e..976c6cdf9ee6 100644 --- a/drivers/xen/balloon.c +++ b/drivers/xen/balloon.c @@ -94,7 +94,6 @@ static struct ctl_table balloon_table[] = { .extra1 = SYSCTL_ZERO, .extra2 = SYSCTL_ONE, }, - { } }; #else diff --git a/drivers/xen/events/events_base.c b/drivers/xen/events/events_base.c index 3bdd5b59661d..6de6b084ea60 100644 --- a/drivers/xen/events/events_base.c +++ b/drivers/xen/events/events_base.c @@ -33,6 +33,7 @@ #include <linux/slab.h> #include <linux/irqnr.h> #include <linux/pci.h> +#include <linux/rcupdate.h> #include <linux/spinlock.h> #include <linux/cpuhotplug.h> #include <linux/atomic.h> @@ -96,6 +97,7 @@ enum xen_irq_type { struct irq_info { struct list_head list; struct list_head eoi_list; + struct rcu_work rwork; short refcnt; u8 spurious_cnt; u8 is_accounted; @@ -147,22 +149,12 @@ const struct evtchn_ops *evtchn_ops; static DEFINE_MUTEX(irq_mapping_update_lock); /* - * Lock protecting event handling loop against removing event channels. - * Adding of event channels is no issue as the associated IRQ becomes active - * only after everything is setup (before request_[threaded_]irq() the handler - * can't be entered for an event, as the event channel will be unmasked only - * then). - */ -static DEFINE_RWLOCK(evtchn_rwlock); - -/* * Lock hierarchy: * * irq_mapping_update_lock - * evtchn_rwlock - * IRQ-desc lock - * percpu eoi_list_lock - * irq_info->lock + * IRQ-desc lock + * percpu eoi_list_lock + * irq_info->lock */ static LIST_HEAD(xen_irq_list_head); @@ -306,6 +298,22 @@ static void channels_on_cpu_inc(struct irq_info *info) info->is_accounted = 1; } +static void delayed_free_irq(struct work_struct *work) +{ + struct irq_info *info = container_of(to_rcu_work(work), struct irq_info, + rwork); + unsigned int irq = info->irq; + + /* Remove the info pointer only now, with no potential users left. */ + set_info_for_irq(irq, NULL); + + kfree(info); + + /* Legacy IRQ descriptors are managed by the arch. */ + if (irq >= nr_legacy_irqs()) + irq_free_desc(irq); +} + /* Constructors for packed IRQ information. */ static int xen_irq_info_common_setup(struct irq_info *info, unsigned irq, @@ -668,33 +676,36 @@ static void xen_irq_lateeoi_worker(struct work_struct *work) eoi = container_of(to_delayed_work(work), struct lateeoi_work, delayed); - read_lock_irqsave(&evtchn_rwlock, flags); + rcu_read_lock(); while (true) { - spin_lock(&eoi->eoi_list_lock); + spin_lock_irqsave(&eoi->eoi_list_lock, flags); info = list_first_entry_or_null(&eoi->eoi_list, struct irq_info, eoi_list); - if (info == NULL || now < info->eoi_time) { - spin_unlock(&eoi->eoi_list_lock); + if (info == NULL) + break; + + if (now < info->eoi_time) { + mod_delayed_work_on(info->eoi_cpu, system_wq, + &eoi->delayed, + info->eoi_time - now); break; } list_del_init(&info->eoi_list); - spin_unlock(&eoi->eoi_list_lock); + spin_unlock_irqrestore(&eoi->eoi_list_lock, flags); info->eoi_time = 0; xen_irq_lateeoi_locked(info, false); } - if (info) - mod_delayed_work_on(info->eoi_cpu, system_wq, - &eoi->delayed, info->eoi_time - now); + spin_unlock_irqrestore(&eoi->eoi_list_lock, flags); - read_unlock_irqrestore(&evtchn_rwlock, flags); + rcu_read_unlock(); } static void xen_cpu_init_eoi(unsigned int cpu) @@ -709,16 +720,15 @@ static void xen_cpu_init_eoi(unsigned int cpu) void xen_irq_lateeoi(unsigned int irq, unsigned int eoi_flags) { struct irq_info *info; - unsigned long flags; - read_lock_irqsave(&evtchn_rwlock, flags); + rcu_read_lock(); info = info_for_irq(irq); if (info) xen_irq_lateeoi_locked(info, eoi_flags & XEN_EOI_FLAG_SPURIOUS); - read_unlock_irqrestore(&evtchn_rwlock, flags); + rcu_read_unlock(); } EXPORT_SYMBOL_GPL(xen_irq_lateeoi); @@ -732,6 +742,7 @@ static void xen_irq_init(unsigned irq) info->type = IRQT_UNBOUND; info->refcnt = -1; + INIT_RCU_WORK(&info->rwork, delayed_free_irq); set_info_for_irq(irq, info); /* @@ -789,31 +800,18 @@ static int __must_check xen_allocate_irq_gsi(unsigned gsi) static void xen_free_irq(unsigned irq) { struct irq_info *info = info_for_irq(irq); - unsigned long flags; if (WARN_ON(!info)) return; - write_lock_irqsave(&evtchn_rwlock, flags); - if (!list_empty(&info->eoi_list)) lateeoi_list_del(info); list_del(&info->list); - set_info_for_irq(irq, NULL); - WARN_ON(info->refcnt > 0); - write_unlock_irqrestore(&evtchn_rwlock, flags); - - kfree(info); - - /* Legacy IRQ descriptors are managed by the arch. */ - if (irq < nr_legacy_irqs()) - return; - - irq_free_desc(irq); + queue_rcu_work(system_wq, &info->rwork); } /* Not called for lateeoi events. */ @@ -1231,7 +1229,8 @@ static int bind_evtchn_to_irq_chip(evtchn_port_t evtchn, struct irq_chip *chip, bind_evtchn_to_cpu(evtchn, 0, false); } else { struct irq_info *info = info_for_irq(irq); - WARN_ON(info == NULL || info->type != IRQT_EVTCHN); + if (!WARN_ON(!info || info->type != IRQT_EVTCHN)) + info->refcnt++; } out: @@ -1704,14 +1703,21 @@ void handle_irq_for_port(evtchn_port_t port, struct evtchn_loop_ctrl *ctrl) generic_handle_irq(irq); } -static int __xen_evtchn_do_upcall(void) +int xen_evtchn_do_upcall(void) { struct vcpu_info *vcpu_info = __this_cpu_read(xen_vcpu); int ret = vcpu_info->evtchn_upcall_pending ? IRQ_HANDLED : IRQ_NONE; int cpu = smp_processor_id(); struct evtchn_loop_ctrl ctrl = { 0 }; - read_lock(&evtchn_rwlock); + /* + * When closing an event channel the associated IRQ must not be freed + * until all cpus have left the event handling loop. This is ensured + * by taking the rcu_read_lock() while handling events, as freeing of + * the IRQ is handled via queue_rcu_work() _after_ closing the event + * channel. + */ + rcu_read_lock(); do { vcpu_info->evtchn_upcall_pending = 0; @@ -1724,7 +1730,7 @@ static int __xen_evtchn_do_upcall(void) } while (vcpu_info->evtchn_upcall_pending); - read_unlock(&evtchn_rwlock); + rcu_read_unlock(); /* * Increment irq_epoch only now to defer EOIs only for @@ -1735,24 +1741,7 @@ static int __xen_evtchn_do_upcall(void) return ret; } - -void xen_evtchn_do_upcall(struct pt_regs *regs) -{ - struct pt_regs *old_regs = set_irq_regs(regs); - - irq_enter(); - - __xen_evtchn_do_upcall(); - - irq_exit(); - set_irq_regs(old_regs); -} - -int xen_hvm_evtchn_do_upcall(void) -{ - return __xen_evtchn_do_upcall(); -} -EXPORT_SYMBOL_GPL(xen_hvm_evtchn_do_upcall); +EXPORT_SYMBOL_GPL(xen_evtchn_do_upcall); /* Rebind a new event channel to an existing irq. */ void rebind_evtchn_irq(evtchn_port_t evtchn, int irq) diff --git a/drivers/xen/events/events_fifo.c b/drivers/xen/events/events_fifo.c index ad9fe51d3fb3..655775db7caf 100644 --- a/drivers/xen/events/events_fifo.c +++ b/drivers/xen/events/events_fifo.c @@ -226,21 +226,20 @@ static bool evtchn_fifo_is_masked(evtchn_port_t port) */ static bool clear_masked_cond(volatile event_word_t *word) { - event_word_t new, old, w; + event_word_t new, old; - w = *word; + old = *word; do { - if (!(w & (1 << EVTCHN_FIFO_MASKED))) + if (!(old & (1 << EVTCHN_FIFO_MASKED))) return true; - if (w & (1 << EVTCHN_FIFO_PENDING)) + if (old & (1 << EVTCHN_FIFO_PENDING)) return false; - old = w & ~(1 << EVTCHN_FIFO_BUSY); + old = old & ~(1 << EVTCHN_FIFO_BUSY); new = old & ~(1 << EVTCHN_FIFO_MASKED); - w = sync_cmpxchg(word, old, new); - } while (w != old); + } while (!sync_try_cmpxchg(word, &old, new)); return true; } @@ -259,17 +258,16 @@ static void evtchn_fifo_unmask(evtchn_port_t port) static uint32_t clear_linked(volatile event_word_t *word) { - event_word_t new, old, w; + event_word_t new, old; - w = *word; + old = *word; do { - old = w; - new = (w & ~((1 << EVTCHN_FIFO_LINKED) - | EVTCHN_FIFO_LINK_MASK)); - } while ((w = sync_cmpxchg(word, old, new)) != old); + new = (old & ~((1 << EVTCHN_FIFO_LINKED) + | EVTCHN_FIFO_LINK_MASK)); + } while (!sync_try_cmpxchg(word, &old, new)); - return w & EVTCHN_FIFO_LINK_MASK; + return old & EVTCHN_FIFO_LINK_MASK; } static void consume_one_event(unsigned cpu, struct evtchn_loop_ctrl *ctrl, diff --git a/drivers/xen/evtchn.c b/drivers/xen/evtchn.c index 9139a7364df5..59717628ca42 100644 --- a/drivers/xen/evtchn.c +++ b/drivers/xen/evtchn.c @@ -397,7 +397,7 @@ static int evtchn_bind_to_user(struct per_user_data *u, evtchn_port_t port, if (rc < 0) goto err; - rc = bind_evtchn_to_irqhandler_lateeoi(port, evtchn_interrupt, 0, + rc = bind_evtchn_to_irqhandler_lateeoi(port, evtchn_interrupt, IRQF_SHARED, u->name, evtchn); if (rc < 0) goto err; diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c index 35659bf70746..04a6b470b15d 100644 --- a/drivers/xen/grant-table.c +++ b/drivers/xen/grant-table.c @@ -427,16 +427,14 @@ EXPORT_SYMBOL_GPL(gnttab_grant_foreign_access); static int gnttab_end_foreign_access_ref_v1(grant_ref_t ref) { - u16 flags, nflags; - u16 *pflags; + u16 *pflags = &gnttab_shared.v1[ref].flags; + u16 flags; - pflags = &gnttab_shared.v1[ref].flags; - nflags = *pflags; + flags = *pflags; do { - flags = nflags; if (flags & (GTF_reading|GTF_writing)) return 0; - } while ((nflags = sync_cmpxchg(pflags, flags, 0)) != flags); + } while (!sync_try_cmpxchg(pflags, &flags, 0)); return 1; } diff --git a/drivers/xen/platform-pci.c b/drivers/xen/platform-pci.c index fcc819131572..544d3f9010b9 100644 --- a/drivers/xen/platform-pci.c +++ b/drivers/xen/platform-pci.c @@ -64,7 +64,7 @@ static uint64_t get_callback_via(struct pci_dev *pdev) static irqreturn_t do_hvm_evtchn_intr(int irq, void *dev_id) { - return xen_hvm_evtchn_do_upcall(); + return xen_evtchn_do_upcall(); } static int xen_allocate_irq(struct pci_dev *pdev) diff --git a/drivers/xen/privcmd.c b/drivers/xen/privcmd.c index f00ad5f5f1d4..1ce7f3c7a950 100644 --- a/drivers/xen/privcmd.c +++ b/drivers/xen/privcmd.c @@ -29,15 +29,18 @@ #include <linux/seq_file.h> #include <linux/miscdevice.h> #include <linux/moduleparam.h> +#include <linux/virtio_mmio.h> #include <asm/xen/hypervisor.h> #include <asm/xen/hypercall.h> #include <xen/xen.h> +#include <xen/events.h> #include <xen/privcmd.h> #include <xen/interface/xen.h> #include <xen/interface/memory.h> #include <xen/interface/hvm/dm_op.h> +#include <xen/interface/hvm/ioreq.h> #include <xen/features.h> #include <xen/page.h> #include <xen/xen-ops.h> @@ -782,6 +785,7 @@ static long privcmd_ioctl_mmap_resource(struct file *file, goto out; pages = vma->vm_private_data; + for (i = 0; i < kdata.num; i++) { xen_pfn_t pfn = page_to_xen_pfn(pages[i / XEN_PFN_PER_PAGE]); @@ -838,7 +842,7 @@ out: return rc; } -#ifdef CONFIG_XEN_PRIVCMD_IRQFD +#ifdef CONFIG_XEN_PRIVCMD_EVENTFD /* Irqfd support */ static struct workqueue_struct *irqfd_cleanup_wq; static DEFINE_MUTEX(irqfds_lock); @@ -935,7 +939,7 @@ static int privcmd_irqfd_assign(struct privcmd_irqfd *irqfd) return -ENOMEM; dm_op = kirqfd + 1; - if (copy_from_user(dm_op, irqfd->dm_op, irqfd->size)) { + if (copy_from_user(dm_op, u64_to_user_ptr(irqfd->dm_op), irqfd->size)) { ret = -EFAULT; goto error_kfree; } @@ -1079,6 +1083,389 @@ static void privcmd_irqfd_exit(void) destroy_workqueue(irqfd_cleanup_wq); } + +/* Ioeventfd Support */ +#define QUEUE_NOTIFY_VQ_MASK 0xFFFF + +static DEFINE_MUTEX(ioreq_lock); +static LIST_HEAD(ioreq_list); + +/* per-eventfd structure */ +struct privcmd_kernel_ioeventfd { + struct eventfd_ctx *eventfd; + struct list_head list; + u64 addr; + unsigned int addr_len; + unsigned int vq; +}; + +/* per-guest CPU / port structure */ +struct ioreq_port { + int vcpu; + unsigned int port; + struct privcmd_kernel_ioreq *kioreq; +}; + +/* per-guest structure */ +struct privcmd_kernel_ioreq { + domid_t dom; + unsigned int vcpus; + u64 uioreq; + struct ioreq *ioreq; + spinlock_t lock; /* Protects ioeventfds list */ + struct list_head ioeventfds; + struct list_head list; + struct ioreq_port ports[0]; +}; + +static irqreturn_t ioeventfd_interrupt(int irq, void *dev_id) +{ + struct ioreq_port *port = dev_id; + struct privcmd_kernel_ioreq *kioreq = port->kioreq; + struct ioreq *ioreq = &kioreq->ioreq[port->vcpu]; + struct privcmd_kernel_ioeventfd *kioeventfd; + unsigned int state = STATE_IOREQ_READY; + + if (ioreq->state != STATE_IOREQ_READY || + ioreq->type != IOREQ_TYPE_COPY || ioreq->dir != IOREQ_WRITE) + return IRQ_NONE; + + /* + * We need a barrier, smp_mb(), here to ensure reads are finished before + * `state` is updated. Since the lock implementation ensures that + * appropriate barrier will be added anyway, we can avoid adding + * explicit barrier here. + * + * Ideally we don't need to update `state` within the locks, but we do + * that here to avoid adding explicit barrier. + */ + + spin_lock(&kioreq->lock); + ioreq->state = STATE_IOREQ_INPROCESS; + + list_for_each_entry(kioeventfd, &kioreq->ioeventfds, list) { + if (ioreq->addr == kioeventfd->addr + VIRTIO_MMIO_QUEUE_NOTIFY && + ioreq->size == kioeventfd->addr_len && + (ioreq->data & QUEUE_NOTIFY_VQ_MASK) == kioeventfd->vq) { + eventfd_signal(kioeventfd->eventfd, 1); + state = STATE_IORESP_READY; + break; + } + } + spin_unlock(&kioreq->lock); + + /* + * We need a barrier, smp_mb(), here to ensure writes are finished + * before `state` is updated. Since the lock implementation ensures that + * appropriate barrier will be added anyway, we can avoid adding + * explicit barrier here. + */ + + ioreq->state = state; + + if (state == STATE_IORESP_READY) { + notify_remote_via_evtchn(port->port); + return IRQ_HANDLED; + } + + return IRQ_NONE; +} + +static void ioreq_free(struct privcmd_kernel_ioreq *kioreq) +{ + struct ioreq_port *ports = kioreq->ports; + int i; + + lockdep_assert_held(&ioreq_lock); + + list_del(&kioreq->list); + + for (i = kioreq->vcpus - 1; i >= 0; i--) + unbind_from_irqhandler(irq_from_evtchn(ports[i].port), &ports[i]); + + kfree(kioreq); +} + +static +struct privcmd_kernel_ioreq *alloc_ioreq(struct privcmd_ioeventfd *ioeventfd) +{ + struct privcmd_kernel_ioreq *kioreq; + struct mm_struct *mm = current->mm; + struct vm_area_struct *vma; + struct page **pages; + unsigned int *ports; + int ret, size, i; + + lockdep_assert_held(&ioreq_lock); + + size = struct_size(kioreq, ports, ioeventfd->vcpus); + kioreq = kzalloc(size, GFP_KERNEL); + if (!kioreq) + return ERR_PTR(-ENOMEM); + + kioreq->dom = ioeventfd->dom; + kioreq->vcpus = ioeventfd->vcpus; + kioreq->uioreq = ioeventfd->ioreq; + spin_lock_init(&kioreq->lock); + INIT_LIST_HEAD(&kioreq->ioeventfds); + + /* The memory for ioreq server must have been mapped earlier */ + mmap_write_lock(mm); + vma = find_vma(mm, (unsigned long)ioeventfd->ioreq); + if (!vma) { + pr_err("Failed to find vma for ioreq page!\n"); + mmap_write_unlock(mm); + ret = -EFAULT; + goto error_kfree; + } + + pages = vma->vm_private_data; + kioreq->ioreq = (struct ioreq *)(page_to_virt(pages[0])); + mmap_write_unlock(mm); + + size = sizeof(*ports) * kioreq->vcpus; + ports = kzalloc(size, GFP_KERNEL); + if (!ports) { + ret = -ENOMEM; + goto error_kfree; + } + + if (copy_from_user(ports, u64_to_user_ptr(ioeventfd->ports), size)) { + ret = -EFAULT; + goto error_kfree_ports; + } + + for (i = 0; i < kioreq->vcpus; i++) { + kioreq->ports[i].vcpu = i; + kioreq->ports[i].port = ports[i]; + kioreq->ports[i].kioreq = kioreq; + + ret = bind_evtchn_to_irqhandler_lateeoi(ports[i], + ioeventfd_interrupt, IRQF_SHARED, "ioeventfd", + &kioreq->ports[i]); + if (ret < 0) + goto error_unbind; + } + + kfree(ports); + + list_add_tail(&kioreq->list, &ioreq_list); + + return kioreq; + +error_unbind: + while (--i >= 0) + unbind_from_irqhandler(irq_from_evtchn(ports[i]), &kioreq->ports[i]); +error_kfree_ports: + kfree(ports); +error_kfree: + kfree(kioreq); + return ERR_PTR(ret); +} + +static struct privcmd_kernel_ioreq * +get_ioreq(struct privcmd_ioeventfd *ioeventfd, struct eventfd_ctx *eventfd) +{ + struct privcmd_kernel_ioreq *kioreq; + unsigned long flags; + + list_for_each_entry(kioreq, &ioreq_list, list) { + struct privcmd_kernel_ioeventfd *kioeventfd; + + /* + * kioreq fields can be accessed here without a lock as they are + * never updated after being added to the ioreq_list. + */ + if (kioreq->uioreq != ioeventfd->ioreq) { + continue; + } else if (kioreq->dom != ioeventfd->dom || + kioreq->vcpus != ioeventfd->vcpus) { + pr_err("Invalid ioeventfd configuration mismatch, dom (%u vs %u), vcpus (%u vs %u)\n", + kioreq->dom, ioeventfd->dom, kioreq->vcpus, + ioeventfd->vcpus); + return ERR_PTR(-EINVAL); + } + + /* Look for a duplicate eventfd for the same guest */ + spin_lock_irqsave(&kioreq->lock, flags); + list_for_each_entry(kioeventfd, &kioreq->ioeventfds, list) { + if (eventfd == kioeventfd->eventfd) { + spin_unlock_irqrestore(&kioreq->lock, flags); + return ERR_PTR(-EBUSY); + } + } + spin_unlock_irqrestore(&kioreq->lock, flags); + + return kioreq; + } + + /* Matching kioreq isn't found, allocate a new one */ + return alloc_ioreq(ioeventfd); +} + +static void ioeventfd_free(struct privcmd_kernel_ioeventfd *kioeventfd) +{ + list_del(&kioeventfd->list); + eventfd_ctx_put(kioeventfd->eventfd); + kfree(kioeventfd); +} + +static int privcmd_ioeventfd_assign(struct privcmd_ioeventfd *ioeventfd) +{ + struct privcmd_kernel_ioeventfd *kioeventfd; + struct privcmd_kernel_ioreq *kioreq; + unsigned long flags; + struct fd f; + int ret; + + /* Check for range overflow */ + if (ioeventfd->addr + ioeventfd->addr_len < ioeventfd->addr) + return -EINVAL; + + /* Vhost requires us to support length 1, 2, 4, and 8 */ + if (!(ioeventfd->addr_len == 1 || ioeventfd->addr_len == 2 || + ioeventfd->addr_len == 4 || ioeventfd->addr_len == 8)) + return -EINVAL; + + /* 4096 vcpus limit enough ? */ + if (!ioeventfd->vcpus || ioeventfd->vcpus > 4096) + return -EINVAL; + + kioeventfd = kzalloc(sizeof(*kioeventfd), GFP_KERNEL); + if (!kioeventfd) + return -ENOMEM; + + f = fdget(ioeventfd->event_fd); + if (!f.file) { + ret = -EBADF; + goto error_kfree; + } + + kioeventfd->eventfd = eventfd_ctx_fileget(f.file); + fdput(f); + + if (IS_ERR(kioeventfd->eventfd)) { + ret = PTR_ERR(kioeventfd->eventfd); + goto error_kfree; + } + + kioeventfd->addr = ioeventfd->addr; + kioeventfd->addr_len = ioeventfd->addr_len; + kioeventfd->vq = ioeventfd->vq; + + mutex_lock(&ioreq_lock); + kioreq = get_ioreq(ioeventfd, kioeventfd->eventfd); + if (IS_ERR(kioreq)) { + mutex_unlock(&ioreq_lock); + ret = PTR_ERR(kioreq); + goto error_eventfd; + } + + spin_lock_irqsave(&kioreq->lock, flags); + list_add_tail(&kioeventfd->list, &kioreq->ioeventfds); + spin_unlock_irqrestore(&kioreq->lock, flags); + + mutex_unlock(&ioreq_lock); + + return 0; + +error_eventfd: + eventfd_ctx_put(kioeventfd->eventfd); + +error_kfree: + kfree(kioeventfd); + return ret; +} + +static int privcmd_ioeventfd_deassign(struct privcmd_ioeventfd *ioeventfd) +{ + struct privcmd_kernel_ioreq *kioreq, *tkioreq; + struct eventfd_ctx *eventfd; + unsigned long flags; + int ret = 0; + + eventfd = eventfd_ctx_fdget(ioeventfd->event_fd); + if (IS_ERR(eventfd)) + return PTR_ERR(eventfd); + + mutex_lock(&ioreq_lock); + list_for_each_entry_safe(kioreq, tkioreq, &ioreq_list, list) { + struct privcmd_kernel_ioeventfd *kioeventfd, *tmp; + /* + * kioreq fields can be accessed here without a lock as they are + * never updated after being added to the ioreq_list. + */ + if (kioreq->dom != ioeventfd->dom || + kioreq->uioreq != ioeventfd->ioreq || + kioreq->vcpus != ioeventfd->vcpus) + continue; + + spin_lock_irqsave(&kioreq->lock, flags); + list_for_each_entry_safe(kioeventfd, tmp, &kioreq->ioeventfds, list) { + if (eventfd == kioeventfd->eventfd) { + ioeventfd_free(kioeventfd); + spin_unlock_irqrestore(&kioreq->lock, flags); + + if (list_empty(&kioreq->ioeventfds)) + ioreq_free(kioreq); + goto unlock; + } + } + spin_unlock_irqrestore(&kioreq->lock, flags); + break; + } + + pr_err("Ioeventfd isn't already assigned, dom: %u, addr: %llu\n", + ioeventfd->dom, ioeventfd->addr); + ret = -ENODEV; + +unlock: + mutex_unlock(&ioreq_lock); + eventfd_ctx_put(eventfd); + + return ret; +} + +static long privcmd_ioctl_ioeventfd(struct file *file, void __user *udata) +{ + struct privcmd_data *data = file->private_data; + struct privcmd_ioeventfd ioeventfd; + + if (copy_from_user(&ioeventfd, udata, sizeof(ioeventfd))) + return -EFAULT; + + /* No other flags should be set */ + if (ioeventfd.flags & ~PRIVCMD_IOEVENTFD_FLAG_DEASSIGN) + return -EINVAL; + + /* If restriction is in place, check the domid matches */ + if (data->domid != DOMID_INVALID && data->domid != ioeventfd.dom) + return -EPERM; + + if (ioeventfd.flags & PRIVCMD_IOEVENTFD_FLAG_DEASSIGN) + return privcmd_ioeventfd_deassign(&ioeventfd); + + return privcmd_ioeventfd_assign(&ioeventfd); +} + +static void privcmd_ioeventfd_exit(void) +{ + struct privcmd_kernel_ioreq *kioreq, *tmp; + unsigned long flags; + + mutex_lock(&ioreq_lock); + list_for_each_entry_safe(kioreq, tmp, &ioreq_list, list) { + struct privcmd_kernel_ioeventfd *kioeventfd, *tmp; + + spin_lock_irqsave(&kioreq->lock, flags); + list_for_each_entry_safe(kioeventfd, tmp, &kioreq->ioeventfds, list) + ioeventfd_free(kioeventfd); + spin_unlock_irqrestore(&kioreq->lock, flags); + + ioreq_free(kioreq); + } + mutex_unlock(&ioreq_lock); +} #else static inline long privcmd_ioctl_irqfd(struct file *file, void __user *udata) { @@ -1093,7 +1480,16 @@ static inline int privcmd_irqfd_init(void) static inline void privcmd_irqfd_exit(void) { } -#endif /* CONFIG_XEN_PRIVCMD_IRQFD */ + +static inline long privcmd_ioctl_ioeventfd(struct file *file, void __user *udata) +{ + return -EOPNOTSUPP; +} + +static inline void privcmd_ioeventfd_exit(void) +{ +} +#endif /* CONFIG_XEN_PRIVCMD_EVENTFD */ static long privcmd_ioctl(struct file *file, unsigned int cmd, unsigned long data) @@ -1134,6 +1530,10 @@ static long privcmd_ioctl(struct file *file, ret = privcmd_ioctl_irqfd(file, udata); break; + case IOCTL_PRIVCMD_IOEVENTFD: + ret = privcmd_ioctl_ioeventfd(file, udata); + break; + default: break; } @@ -1278,6 +1678,7 @@ err_privcmdbuf: static void __exit privcmd_exit(void) { + privcmd_ioeventfd_exit(); privcmd_irqfd_exit(); misc_deregister(&privcmd_dev); misc_deregister(&xen_privcmdbuf_dev); diff --git a/drivers/xen/xen-pciback/conf_space.c b/drivers/xen/xen-pciback/conf_space.c index 059de92aea7d..d47eee6c5143 100644 --- a/drivers/xen/xen-pciback/conf_space.c +++ b/drivers/xen/xen-pciback/conf_space.c @@ -288,12 +288,6 @@ int xen_pcibk_get_interrupt_type(struct pci_dev *dev) u16 val; int ret = 0; - err = pci_read_config_word(dev, PCI_COMMAND, &val); - if (err) - return err; - if (!(val & PCI_COMMAND_INTX_DISABLE)) - ret |= INTERRUPT_TYPE_INTX; - /* * Do not trust dev->msi(x)_enabled here, as enabling could be done * bypassing the pci_*msi* functions, by the qemu. @@ -316,6 +310,19 @@ int xen_pcibk_get_interrupt_type(struct pci_dev *dev) if (val & PCI_MSIX_FLAGS_ENABLE) ret |= INTERRUPT_TYPE_MSIX; } + + /* + * PCIe spec says device cannot use INTx if MSI/MSI-X is enabled, + * so check for INTx only when both are disabled. + */ + if (!ret) { + err = pci_read_config_word(dev, PCI_COMMAND, &val); + if (err) + return err; + if (!(val & PCI_COMMAND_INTX_DISABLE)) + ret |= INTERRUPT_TYPE_INTX; + } + return ret ?: INTERRUPT_TYPE_NONE; } diff --git a/drivers/xen/xen-pciback/conf_space_capability.c b/drivers/xen/xen-pciback/conf_space_capability.c index 097316a74126..1948a9700c8f 100644 --- a/drivers/xen/xen-pciback/conf_space_capability.c +++ b/drivers/xen/xen-pciback/conf_space_capability.c @@ -236,10 +236,16 @@ static int msi_msix_flags_write(struct pci_dev *dev, int offset, u16 new_value, return PCIBIOS_SET_FAILED; if (new_value & field_config->enable_bit) { - /* don't allow enabling together with other interrupt types */ + /* + * Don't allow enabling together with other interrupt type, but do + * allow enabling MSI(-X) while INTx is still active to please Linuxes + * MSI(-X) startup sequence. It is safe to do, as according to PCI + * spec, device with enabled MSI(-X) shouldn't use INTx. + */ int int_type = xen_pcibk_get_interrupt_type(dev); if (int_type == INTERRUPT_TYPE_NONE || + int_type == INTERRUPT_TYPE_INTX || int_type == field_config->int_type) goto write; return PCIBIOS_SET_FAILED; diff --git a/drivers/xen/xen-pciback/conf_space_header.c b/drivers/xen/xen-pciback/conf_space_header.c index 981435103af1..fc0332645966 100644 --- a/drivers/xen/xen-pciback/conf_space_header.c +++ b/drivers/xen/xen-pciback/conf_space_header.c @@ -104,24 +104,9 @@ static int command_write(struct pci_dev *dev, int offset, u16 value, void *data) pci_clear_mwi(dev); } - if (dev_data && dev_data->allow_interrupt_control) { - if ((cmd->val ^ value) & PCI_COMMAND_INTX_DISABLE) { - if (value & PCI_COMMAND_INTX_DISABLE) { - pci_intx(dev, 0); - } else { - /* Do not allow enabling INTx together with MSI or MSI-X. */ - switch (xen_pcibk_get_interrupt_type(dev)) { - case INTERRUPT_TYPE_NONE: - pci_intx(dev, 1); - break; - case INTERRUPT_TYPE_INTX: - break; - default: - return PCIBIOS_SET_FAILED; - } - } - } - } + if (dev_data && dev_data->allow_interrupt_control && + ((cmd->val ^ value) & PCI_COMMAND_INTX_DISABLE)) + pci_intx(dev, !(value & PCI_COMMAND_INTX_DISABLE)); cmd->val = value; diff --git a/drivers/xen/xenbus/xenbus_dev_frontend.c b/drivers/xen/xenbus/xenbus_dev_frontend.c index 0792fda49a15..6f56640092a9 100644 --- a/drivers/xen/xenbus/xenbus_dev_frontend.c +++ b/drivers/xen/xenbus/xenbus_dev_frontend.c @@ -82,7 +82,7 @@ struct read_buffer { struct list_head list; unsigned int cons; unsigned int len; - char msg[]; + char msg[] __counted_by(len); }; struct xenbus_file_priv { @@ -195,7 +195,7 @@ static int queue_reply(struct list_head *queue, const void *data, size_t len) if (len > XENSTORE_PAYLOAD_MAX) return -EINVAL; - rb = kmalloc(sizeof(*rb) + len, GFP_KERNEL); + rb = kmalloc(struct_size(rb, msg, len), GFP_KERNEL); if (rb == NULL) return -ENOMEM; diff --git a/drivers/xen/xenbus/xenbus_probe.c b/drivers/xen/xenbus/xenbus_probe.c index 639bf628389b..3205e5d724c8 100644 --- a/drivers/xen/xenbus/xenbus_probe.c +++ b/drivers/xen/xenbus/xenbus_probe.c @@ -1025,7 +1025,7 @@ static int __init xenbus_init(void) if (err < 0) { pr_err("xenstore_late_init couldn't bind irq err=%d\n", err); - return err; + goto out_error; } xs_init_irq = err; |