diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2021-07-09 10:19:13 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2021-07-09 10:19:13 -0700 |
commit | dcf3c935dd9e8e76c9922e88672fa4ad6a8a4df8 (patch) | |
tree | f8ce3ab321c70b666e14ed145faacc8b3c0ea82c /arch | |
parent | 7a400bf28334fc7734639db3566394e1fc80670c (diff) | |
parent | 1aee020155f364ef538370d3392969f1077b9bae (diff) |
Merge tag 'for-linus-5.14-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/rw/uml
Pull UML updates from Richard Weinberger:
- Support for optimized routines based on the host CPU
- Support for PCI via virtio
- Various fixes
* tag 'for-linus-5.14-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/rw/uml:
um: remove unneeded semicolon in um_arch.c
um: Remove the repeated declaration
um: fix error return code in winch_tramp()
um: fix error return code in slip_open()
um: Fix stack pointer alignment
um: implement flush_cache_vmap/flush_cache_vunmap
um: add a UML specific futex implementation
um: enable the use of optimized xor routines in UML
um: Add support for host CPU flags and alignment
um: allow not setting extra rpaths in the linux binary
um: virtio/pci: enable suspend/resume
um: add PCI over virtio emulation driver
um: irqs: allow invoking time-travel handler multiple times
um: time-travel/signals: fix ndelay() in interrupt
um: expose time-travel mode to userspace side
um: export signals_enabled directly
um: remove unused smp_sigio_handler() declaration
lib: add iomem emulation (logic_iomem)
um: allow disabling NO_IOMEM
Diffstat (limited to 'arch')
42 files changed, 1668 insertions, 106 deletions
diff --git a/arch/um/Kconfig b/arch/um/Kconfig index 57cfd9a1c082..0561b73cfd9a 100644 --- a/arch/um/Kconfig +++ b/arch/um/Kconfig @@ -15,7 +15,7 @@ config UML select HAVE_FUTEX_CMPXCHG if FUTEX select HAVE_DEBUG_KMEMLEAK select HAVE_DEBUG_BUGVERBOSE - select NO_DMA + select NO_DMA if !UML_DMA_EMULATION select GENERIC_IRQ_SHOW select GENERIC_CPU_DEVICES select HAVE_GCC_PLUGINS @@ -26,7 +26,22 @@ config MMU bool default y +config UML_DMA_EMULATION + bool + config NO_IOMEM + bool "disable IOMEM" if EXPERT + depends on !INDIRECT_IOMEM + default y + +config UML_IOMEM_EMULATION + bool + select INDIRECT_IOMEM + select GENERIC_PCI_IOMAP + select GENERIC_IOMAP + select NO_GENERIC_PCI_IOPORT_MAP + +config NO_IOPORT_MAP def_bool y config ISA @@ -61,6 +76,9 @@ config NR_CPUS range 1 1 default 1 +config ARCH_HAS_CACHE_LINE_SIZE + def_bool y + source "arch/$(HEADER_ARCH)/um/Kconfig" config MAY_HAVE_RUNTIME_DEPS @@ -91,6 +109,19 @@ config LD_SCRIPT_DYN depends on !LD_SCRIPT_STATIC select MODULE_REL_CRCS if MODVERSIONS +config LD_SCRIPT_DYN_RPATH + bool "set rpath in the binary" if EXPERT + default y + depends on LD_SCRIPT_DYN + help + Add /lib (and /lib64 for 64-bit) to the linux binary's rpath + explicitly. + + You may need to turn this off if compiling for nix systems + that have their libraries in random /nix directories and + might otherwise unexpected use libraries from /lib or /lib64 + instead of the desired ones. + config HOSTFS tristate "Host filesystem" help diff --git a/arch/um/Makefile b/arch/um/Makefile index 1cea46ff9bb7..12a7acef0357 100644 --- a/arch/um/Makefile +++ b/arch/um/Makefile @@ -118,7 +118,8 @@ archprepare: $(Q)$(MAKE) $(build)=$(HOST_DIR)/um include/generated/user_constants.h LINK-$(CONFIG_LD_SCRIPT_STATIC) += -static -LINK-$(CONFIG_LD_SCRIPT_DYN) += -Wl,-rpath,/lib $(call cc-option, -no-pie) +LINK-$(CONFIG_LD_SCRIPT_DYN) += $(call cc-option, -no-pie) +LINK-$(CONFIG_LD_SCRIPT_DYN_RPATH) += -Wl,-rpath,/lib CFLAGS_NO_HARDENING := $(call cc-option, -fno-PIC,) $(call cc-option, -fno-pic,) \ -fno-stack-protector $(call cc-option, -fno-stack-protector-all) diff --git a/arch/um/drivers/Kconfig b/arch/um/drivers/Kconfig index 03ba34b61115..f145842c40b9 100644 --- a/arch/um/drivers/Kconfig +++ b/arch/um/drivers/Kconfig @@ -357,3 +357,23 @@ config UML_RTC rtcwake, especially in time-travel mode. This driver enables that by providing a fake RTC clock that causes a wakeup at the right time. + +config UML_PCI_OVER_VIRTIO + bool "Enable PCI over VIRTIO device simulation" + # in theory, just VIRTIO is enough, but that causes recursion + depends on VIRTIO_UML + select FORCE_PCI + select UML_IOMEM_EMULATION + select UML_DMA_EMULATION + select PCI_MSI + select PCI_MSI_IRQ_DOMAIN + select PCI_LOCKLESS_CONFIG + +config UML_PCI_OVER_VIRTIO_DEVICE_ID + int "set the virtio device ID for PCI emulation" + default -1 + depends on UML_PCI_OVER_VIRTIO + help + There's no official device ID assigned (yet), set the one you + wish to use for experimentation here. The default of -1 is + not valid and will cause the driver to fail at probe. diff --git a/arch/um/drivers/Makefile b/arch/um/drivers/Makefile index dcc64a02f81f..803666e85414 100644 --- a/arch/um/drivers/Makefile +++ b/arch/um/drivers/Makefile @@ -64,6 +64,7 @@ obj-$(CONFIG_BLK_DEV_COW_COMMON) += cow_user.o obj-$(CONFIG_UML_RANDOM) += random.o obj-$(CONFIG_VIRTIO_UML) += virtio_uml.o obj-$(CONFIG_UML_RTC) += rtc.o +obj-$(CONFIG_UML_PCI_OVER_VIRTIO) += virt-pci.o # pcap_user.o must be added explicitly. USER_OBJS := fd.o null.o pty.o tty.o xterm.o slip_common.o pcap_user.o vde_user.o vector_user.o diff --git a/arch/um/drivers/chan_user.c b/arch/um/drivers/chan_user.c index d8845d4aac6a..6040817c036f 100644 --- a/arch/um/drivers/chan_user.c +++ b/arch/um/drivers/chan_user.c @@ -256,7 +256,8 @@ static int winch_tramp(int fd, struct tty_port *port, int *fd_out, goto out_close; } - if (os_set_fd_block(*fd_out, 0)) { + err = os_set_fd_block(*fd_out, 0); + if (err) { printk(UM_KERN_ERR "winch_tramp: failed to set thread_fd " "non-blocking.\n"); goto out_close; diff --git a/arch/um/drivers/slip_user.c b/arch/um/drivers/slip_user.c index 482a19c5105c..7334019c9e60 100644 --- a/arch/um/drivers/slip_user.c +++ b/arch/um/drivers/slip_user.c @@ -145,7 +145,8 @@ static int slip_open(void *data) } sfd = err; - if (set_up_tty(sfd)) + err = set_up_tty(sfd); + if (err) goto out_close2; pri->slave = sfd; diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c index 8e0b43cf089f..cbd4f00fe77e 100644 --- a/arch/um/drivers/ubd_kern.c +++ b/arch/um/drivers/ubd_kern.c @@ -1242,8 +1242,7 @@ static int __init ubd_driver_init(void){ * enough. So use anyway the io thread. */ } stack = alloc_stack(0, 0); - io_pid = start_io_thread(stack + PAGE_SIZE - sizeof(void *), - &thread_fd); + io_pid = start_io_thread(stack + PAGE_SIZE, &thread_fd); if(io_pid < 0){ printk(KERN_ERR "ubd : Failed to start I/O thread (errno = %d) - " diff --git a/arch/um/drivers/virt-pci.c b/arch/um/drivers/virt-pci.c new file mode 100644 index 000000000000..0b802834f40a --- /dev/null +++ b/arch/um/drivers/virt-pci.c @@ -0,0 +1,895 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2020 Intel Corporation + * Author: Johannes Berg <johannes@sipsolutions.net> + */ +#include <linux/module.h> +#include <linux/pci.h> +#include <linux/virtio.h> +#include <linux/virtio_config.h> +#include <linux/logic_iomem.h> +#include <linux/irqdomain.h> +#include <linux/virtio_pcidev.h> +#include <linux/virtio-uml.h> +#include <linux/delay.h> +#include <linux/msi.h> +#include <asm/unaligned.h> +#include <irq_kern.h> + +#define MAX_DEVICES 8 +#define MAX_MSI_VECTORS 32 +#define CFG_SPACE_SIZE 4096 + +/* for MSI-X we have a 32-bit payload */ +#define MAX_IRQ_MSG_SIZE (sizeof(struct virtio_pcidev_msg) + sizeof(u32)) +#define NUM_IRQ_MSGS 10 + +#define HANDLE_NO_FREE(ptr) ((void *)((unsigned long)(ptr) | 1)) +#define HANDLE_IS_NO_FREE(ptr) ((unsigned long)(ptr) & 1) + +struct um_pci_device { + struct virtio_device *vdev; + + /* for now just standard BARs */ + u8 resptr[PCI_STD_NUM_BARS]; + + struct virtqueue *cmd_vq, *irq_vq; + +#define UM_PCI_STAT_WAITING 0 + unsigned long status; + + int irq; +}; + +struct um_pci_device_reg { + struct um_pci_device *dev; + void __iomem *iomem; +}; + +static struct pci_host_bridge *bridge; +static DEFINE_MUTEX(um_pci_mtx); +static struct um_pci_device_reg um_pci_devices[MAX_DEVICES]; +static struct fwnode_handle *um_pci_fwnode; +static struct irq_domain *um_pci_inner_domain; +static struct irq_domain *um_pci_msi_domain; +static unsigned long um_pci_msi_used[BITS_TO_LONGS(MAX_MSI_VECTORS)]; + +#define UM_VIRT_PCI_MAXDELAY 40000 + +static int um_pci_send_cmd(struct um_pci_device *dev, + struct virtio_pcidev_msg *cmd, + unsigned int cmd_size, + const void *extra, unsigned int extra_size, + void *out, unsigned int out_size) +{ + struct scatterlist out_sg, extra_sg, in_sg; + struct scatterlist *sgs_list[] = { + [0] = &out_sg, + [1] = extra ? &extra_sg : &in_sg, + [2] = extra ? &in_sg : NULL, + }; + int delay_count = 0; + int ret, len; + bool posted; + + if (WARN_ON(cmd_size < sizeof(*cmd))) + return -EINVAL; + + switch (cmd->op) { + case VIRTIO_PCIDEV_OP_CFG_WRITE: + case VIRTIO_PCIDEV_OP_MMIO_WRITE: + case VIRTIO_PCIDEV_OP_MMIO_MEMSET: + /* in PCI, writes are posted, so don't wait */ + posted = !out; + WARN_ON(!posted); + break; + default: + posted = false; + break; + } + + if (posted) { + u8 *ncmd = kmalloc(cmd_size + extra_size, GFP_ATOMIC); + + if (ncmd) { + memcpy(ncmd, cmd, cmd_size); + if (extra) + memcpy(ncmd + cmd_size, extra, extra_size); + cmd = (void *)ncmd; + cmd_size += extra_size; + extra = NULL; + extra_size = 0; + } else { + /* try without allocating memory */ + posted = false; + } + } + + sg_init_one(&out_sg, cmd, cmd_size); + if (extra) + sg_init_one(&extra_sg, extra, extra_size); + if (out) + sg_init_one(&in_sg, out, out_size); + + /* add to internal virtio queue */ + ret = virtqueue_add_sgs(dev->cmd_vq, sgs_list, + extra ? 2 : 1, + out ? 1 : 0, + posted ? cmd : HANDLE_NO_FREE(cmd), + GFP_ATOMIC); + if (ret) + return ret; + + if (posted) { + virtqueue_kick(dev->cmd_vq); + return 0; + } + + /* kick and poll for getting a response on the queue */ + set_bit(UM_PCI_STAT_WAITING, &dev->status); + virtqueue_kick(dev->cmd_vq); + + while (1) { + void *completed = virtqueue_get_buf(dev->cmd_vq, &len); + + if (completed == HANDLE_NO_FREE(cmd)) + break; + + if (completed && !HANDLE_IS_NO_FREE(completed)) + kfree(completed); + + if (WARN_ONCE(virtqueue_is_broken(dev->cmd_vq) || + ++delay_count > UM_VIRT_PCI_MAXDELAY, + "um virt-pci delay: %d", delay_count)) { + ret = -EIO; + break; + } + udelay(1); + } + clear_bit(UM_PCI_STAT_WAITING, &dev->status); + + return ret; +} + +static unsigned long um_pci_cfgspace_read(void *priv, unsigned int offset, + int size) +{ + struct um_pci_device_reg *reg = priv; + struct um_pci_device *dev = reg->dev; + struct virtio_pcidev_msg hdr = { + .op = VIRTIO_PCIDEV_OP_CFG_READ, + .size = size, + .addr = offset, + }; + /* maximum size - we may only use parts of it */ + u8 data[8]; + + if (!dev) + return ~0ULL; + + memset(data, 0xff, sizeof(data)); + + switch (size) { + case 1: + case 2: + case 4: +#ifdef CONFIG_64BIT + case 8: +#endif + break; + default: + WARN(1, "invalid config space read size %d\n", size); + return ~0ULL; + } + + if (um_pci_send_cmd(dev, &hdr, sizeof(hdr), NULL, 0, + data, sizeof(data))) + return ~0ULL; + + switch (size) { + case 1: + return data[0]; + case 2: + return le16_to_cpup((void *)data); + case 4: + return le32_to_cpup((void *)data); +#ifdef CONFIG_64BIT + case 8: + return le64_to_cpup((void *)data); +#endif + default: + return ~0ULL; + } +} + +static void um_pci_cfgspace_write(void *priv, unsigned int offset, int size, + unsigned long val) +{ + struct um_pci_device_reg *reg = priv; + struct um_pci_device *dev = reg->dev; + struct { + struct virtio_pcidev_msg hdr; + /* maximum size - we may only use parts of it */ + u8 data[8]; + } msg = { + .hdr = { + .op = VIRTIO_PCIDEV_OP_CFG_WRITE, + .size = size, + .addr = offset, + }, + }; + + if (!dev) + return; + + switch (size) { + case 1: + msg.data[0] = (u8)val; + break; + case 2: + put_unaligned_le16(val, (void *)msg.data); + break; + case 4: + put_unaligned_le32(val, (void *)msg.data); + break; +#ifdef CONFIG_64BIT + case 8: + put_unaligned_le64(val, (void *)msg.data); + break; +#endif + default: + WARN(1, "invalid config space write size %d\n", size); + return; + } + + WARN_ON(um_pci_send_cmd(dev, &msg.hdr, sizeof(msg), NULL, 0, NULL, 0)); +} + +static const struct logic_iomem_ops um_pci_device_cfgspace_ops = { + .read = um_pci_cfgspace_read, + .write = um_pci_cfgspace_write, +}; + +static void um_pci_bar_copy_from(void *priv, void *buffer, + unsigned int offset, int size) +{ + u8 *resptr = priv; + struct um_pci_device *dev = container_of(resptr - *resptr, + struct um_pci_device, + resptr[0]); + struct virtio_pcidev_msg hdr = { + .op = VIRTIO_PCIDEV_OP_MMIO_READ, + .bar = *resptr, + .size = size, + .addr = offset, + }; + + memset(buffer, 0xff, size); + + um_pci_send_cmd(dev, &hdr, sizeof(hdr), NULL, 0, buffer, size); +} + +static unsigned long um_pci_bar_read(void *priv, unsigned int offset, + int size) +{ + /* maximum size - we may only use parts of it */ + u8 data[8]; + + switch (size) { + case 1: + case 2: + case 4: +#ifdef CONFIG_64BIT + case 8: +#endif + break; + default: + WARN(1, "invalid config space read size %d\n", size); + return ~0ULL; + } + + um_pci_bar_copy_from(priv, data, offset, size); + + switch (size) { + case 1: + return data[0]; + case 2: + return le16_to_cpup((void *)data); + case 4: + return le32_to_cpup((void *)data); +#ifdef CONFIG_64BIT + case 8: + return le64_to_cpup((void *)data); +#endif + default: + return ~0ULL; + } +} + +static void um_pci_bar_copy_to(void *priv, unsigned int offset, + const void *buffer, int size) +{ + u8 *resptr = priv; + struct um_pci_device *dev = container_of(resptr - *resptr, + struct um_pci_device, + resptr[0]); + struct virtio_pcidev_msg hdr = { + .op = VIRTIO_PCIDEV_OP_MMIO_WRITE, + .bar = *resptr, + .size = size, + .addr = offset, + }; + + um_pci_send_cmd(dev, &hdr, sizeof(hdr), buffer, size, NULL, 0); +} + +static void um_pci_bar_write(void *priv, unsigned int offset, int size, + unsigned long val) +{ + /* maximum size - we may only use parts of it */ + u8 data[8]; + + switch (size) { + case 1: + data[0] = (u8)val; + break; + case 2: + put_unaligned_le16(val, (void *)data); + break; + case 4: + put_unaligned_le32(val, (void *)data); + break; +#ifdef CONFIG_64BIT + case 8: + put_unaligned_le64(val, (void *)data); + break; +#endif + default: + WARN(1, "invalid config space write size %d\n", size); + return; + } + + um_pci_bar_copy_to(priv, offset, data, size); +} + +static void um_pci_bar_set(void *priv, unsigned int offset, u8 value, int size) +{ + u8 *resptr = priv; + struct um_pci_device *dev = container_of(resptr - *resptr, + struct um_pci_device, + resptr[0]); + struct { + struct virtio_pcidev_msg hdr; + u8 data; + } msg = { + .hdr = { + .op = VIRTIO_PCIDEV_OP_CFG_WRITE, + .bar = *resptr, + .size = size, + .addr = offset, + }, + .data = value, + }; + + um_pci_send_cmd(dev, &msg.hdr, sizeof(msg), NULL, 0, NULL, 0); +} + +static const struct logic_iomem_ops um_pci_device_bar_ops = { + .read = um_pci_bar_read, + .write = um_pci_bar_write, + .set = um_pci_bar_set, + .copy_from = um_pci_bar_copy_from, + .copy_to = um_pci_bar_copy_to, +}; + +static void __iomem *um_pci_map_bus(struct pci_bus *bus, unsigned int devfn, + int where) +{ + struct um_pci_device_reg *dev; + unsigned int busn = bus->number; + + if (busn > 0) + return NULL; + + /* not allowing functions for now ... */ + if (devfn % 8) + return NULL; + + if (devfn / 8 >= ARRAY_SIZE(um_pci_devices)) + return NULL; + + dev = &um_pci_devices[devfn / 8]; + if (!dev) + return NULL; + + return (void __iomem *)((unsigned long)dev->iomem + where); +} + +static struct pci_ops um_pci_ops = { + .map_bus = um_pci_map_bus, + .read = pci_generic_config_read, + .write = pci_generic_config_write, +}; + +static void um_pci_rescan(void) +{ + pci_lock_rescan_remove(); + pci_rescan_bus(bridge->bus); + pci_unlock_rescan_remove(); +} + +static void um_pci_irq_vq_addbuf(struct virtqueue *vq, void *buf, bool kick) +{ + struct scatterlist sg[1]; + + sg_init_one(sg, buf, MAX_IRQ_MSG_SIZE); + if (virtqueue_add_inbuf(vq, sg, 1, buf, GFP_ATOMIC)) + kfree(buf); + else if (kick) + virtqueue_kick(vq); +} + +static void um_pci_handle_irq_message(struct virtqueue *vq, + struct virtio_pcidev_msg *msg) +{ + struct virtio_device *vdev = vq->vdev; + struct um_pci_device *dev = vdev->priv; + + /* we should properly chain interrupts, but on ARCH=um we don't care */ + + switch (msg->op) { + case VIRTIO_PCIDEV_OP_INT: + generic_handle_irq(dev->irq); + break; + case VIRTIO_PCIDEV_OP_MSI: + /* our MSI message is just the interrupt number */ + if (msg->size == sizeof(u32)) + generic_handle_irq(le32_to_cpup((void *)msg->data)); + else + generic_handle_irq(le16_to_cpup((void *)msg->data)); + break; + case VIRTIO_PCIDEV_OP_PME: + /* nothing to do - we already woke up due to the message */ + break; + default: + dev_err(&vdev->dev, "unexpected virt-pci message %d\n", msg->op); + break; + } +} + +static void um_pci_cmd_vq_cb(struct virtqueue *vq) +{ + struct virtio_device *vdev = vq->vdev; + struct um_pci_device *dev = vdev->priv; + void *cmd; + int len; + + if (test_bit(UM_PCI_STAT_WAITING, &dev->status)) + return; + + while ((cmd = virtqueue_get_buf(vq, &len))) { + if (WARN_ON(HANDLE_IS_NO_FREE(cmd))) + continue; + kfree(cmd); + } +} + +static void um_pci_irq_vq_cb(struct virtqueue *vq) +{ + struct virtio_pcidev_msg *msg; + int len; + + while ((msg = virtqueue_get_buf(vq, &len))) { + if (len >= sizeof(*msg)) + um_pci_handle_irq_message(vq, msg); + + /* recycle the message buffer */ + um_pci_irq_vq_addbuf(vq, msg, true); + } +} + +static int um_pci_init_vqs(struct um_pci_device *dev) +{ + struct virtqueue *vqs[2]; + static const char *const names[2] = { "cmd", "irq" }; + vq_callback_t *cbs[2] = { um_pci_cmd_vq_cb, um_pci_irq_vq_cb }; + int err, i; + + err = virtio_find_vqs(dev->vdev, 2, vqs, cbs, names, NULL); + if (err) + return err; + + dev->cmd_vq = vqs[0]; + dev->irq_vq = vqs[1]; + + for (i = 0; i < NUM_IRQ_MSGS; i++) { + void *msg = kzalloc(MAX_IRQ_MSG_SIZE, GFP_KERNEL); + + if (msg) + um_pci_irq_vq_addbuf(dev->irq_vq, msg, false); + } + + virtqueue_kick(dev->irq_vq); + + return 0; +} + +static int um_pci_virtio_probe(struct virtio_device *vdev) +{ + struct um_pci_device *dev; + int i, free = -1; + int err = -ENOSPC; + + dev = kzalloc(sizeof(*dev), GFP_KERNEL); + if (!dev) + return -ENOMEM; + + dev->vdev = vdev; + vdev->priv = dev; + + mutex_lock(&um_pci_mtx); + for (i = 0; i < MAX_DEVICES; i++) { + if (um_pci_devices[i].dev) + continue; + free = i; + break; + } + + if (free < 0) + goto error; + + err = um_pci_init_vqs(dev); + if (err) + goto error; + + dev->irq = irq_alloc_desc(numa_node_id()); + if (dev->irq < 0) { + err = dev->irq; + goto error; + } + um_pci_devices[free].dev = dev; + vdev->priv = dev; + + mutex_unlock(&um_pci_mtx); + + device_set_wakeup_enable(&vdev->dev, true); + + /* + * In order to do suspend-resume properly, don't allow VQs + * to be suspended. + */ + virtio_uml_set_no_vq_suspend(vdev, true); + + um_pci_rescan(); + return 0; +error: + mutex_unlock(&um_pci_mtx); + kfree(dev); + return err; +} + +static void um_pci_virtio_remove(struct virtio_device *vdev) +{ + struct um_pci_device *dev = vdev->priv; + int i; + + /* Stop all virtqueues */ + vdev->config->reset(vdev); + vdev->config->del_vqs(vdev); + + device_set_wakeup_enable(&vdev->dev, false); + + mutex_lock(&um_pci_mtx); + for (i = 0; i < MAX_DEVICES; i++) { + if (um_pci_devices[i].dev != dev) + continue; + um_pci_devices[i].dev = NULL; + irq_free_desc(dev->irq); + } + mutex_unlock(&um_pci_mtx); + + um_pci_rescan(); + + kfree(dev); +} + +static struct virtio_device_id id_table[] = { + { CONFIG_UML_PCI_OVER_VIRTIO_DEVICE_ID, VIRTIO_DEV_ANY_ID }, + { 0 }, +}; +MODULE_DEVICE_TABLE(virtio, id_table); + +static struct virtio_driver um_pci_virtio_driver = { + .driver.name = "virtio-pci", + .driver.owner = THIS_MODULE, + .id_table = id_table, + .probe = um_pci_virtio_probe, + .remove = um_pci_virtio_remove, +}; + +static struct resource virt_cfgspace_resource = { + .name = "PCI config space", + .start = 0xf0000000 - MAX_DEVICES * CFG_SPACE_SIZE, + .end = 0xf0000000 - 1, + .flags = IORESOURCE_MEM, +}; + +static long um_pci_map_cfgspace(unsigned long offset, size_t size, + const struct logic_iomem_ops **ops, + void **priv) +{ + if (WARN_ON(size > CFG_SPACE_SIZE || offset % CFG_SPACE_SIZE)) + return -EINVAL; + + if (offset / CFG_SPACE_SIZE < MAX_DEVICES) { + *ops = &um_pci_device_cfgspace_ops; + *priv = &um_pci_devices[offset / CFG_SPACE_SIZE]; + return 0; + } + + WARN(1, "cannot map offset 0x%lx/0x%zx\n", offset, size); + return -ENOENT; +} + +static const struct logic_iomem_region_ops um_pci_cfgspace_ops = { + .map = um_pci_map_cfgspace, +}; + +static struct resource virt_iomem_resource = { + .name = "PCI iomem", + .start = 0xf0000000, + .end = 0xffffffff, + .flags = IORESOURCE_MEM, +}; + +struct um_pci_map_iomem_data { + unsigned long offset; + size_t size; + const struct logic_iomem_ops **ops; + void **priv; + long ret; +}; + +static int um_pci_map_iomem_walk(struct pci_dev *pdev, void *_data) +{ + struct um_pci_map_iomem_data *data = _data; + struct um_pci_device_reg *reg = &um_pci_devices[pdev->devfn / 8]; + struct um_pci_device *dev; + int i; + + if (!reg->dev) + return 0; + + for (i = 0; i < ARRAY_SIZE(dev->resptr); i++) { + struct resource *r = &pdev->resource[i]; + + if ((r->flags & IORESOURCE_TYPE_BITS) != IORESOURCE_MEM) + continue; + + /* + * must be the whole or part of the resource, + * not allowed to only overlap + */ + if (data->offset < r->start || data->offset > r->end) + continue; + if (data->offset + data->size - 1 > r->end) + continue; + + dev = reg->dev; + *data->ops = &um_pci_device_bar_ops; + dev->resptr[i] = i; + *data->priv = &dev->resptr[i]; + data->ret = data->offset - r->start; + + /* no need to continue */ + return 1; + } + + return 0; +} + +static long um_pci_map_iomem(unsigned long offset, size_t size, + const struct logic_iomem_ops **ops, + void **priv) +{ + struct um_pci_map_iomem_data data = { + /* we want the full address here */ + .offset = offset + virt_iomem_resource.start, + .size = size, + .ops = ops, + .priv = priv, + .ret = -ENOENT, + }; + + pci_walk_bus(bridge->bus, um_pci_map_iomem_walk, &data); + return data.ret; +} + +static const struct logic_iomem_region_ops um_pci_iomem_ops = { + .map = um_pci_map_iomem, +}; + +static void um_pci_compose_msi_msg(struct irq_data *data, struct msi_msg *msg) +{ + /* + * This is a very low address and not actually valid 'physical' memory + * in UML, so we can simply map MSI(-X) vectors to there, it cannot be + * legitimately written to by the device in any other way. + * We use the (virtual) IRQ number here as the message to simplify the + * code that receives the message, where for now we simply trust the + * device to send the correct message. + */ + msg->address_hi = 0; + msg->address_lo = 0xa0000; + msg->data = data->irq; +} + +static struct irq_chip um_pci_msi_bottom_irq_chip = { + .name = "UM virtio MSI", + .irq_compose_msi_msg = um_pci_compose_msi_msg, +}; + +static int um_pci_inner_domain_alloc(struct irq_domain *domain, + unsigned int virq, unsigned int nr_irqs, + void *args) +{ + unsigned long bit; + + WARN_ON(nr_irqs != 1); + + mutex_lock(&um_pci_mtx); + bit = find_first_zero_bit(um_pci_msi_used, MAX_MSI_VECTORS); + if (bit >= MAX_MSI_VECTORS) { + mutex_unlock(&um_pci_mtx); + return -ENOSPC; + } + + set_bit(bit, um_pci_msi_used); + mutex_unlock(&um_pci_mtx); + + irq_domain_set_info(domain, virq, bit, &um_pci_msi_bottom_irq_chip, + domain->host_data, handle_simple_irq, + NULL, NULL); + + return 0; +} + +static void um_pci_inner_domain_free(struct irq_domain *domain, + unsigned int virq, unsigned int nr_irqs) +{ + struct irq_data *d = irq_domain_get_irq_data(domain, virq); + + mutex_lock(&um_pci_mtx); + + if (!test_bit(d->hwirq, um_pci_msi_used)) + pr_err("trying to free unused MSI#%lu\n", d->hwirq); + else + __clear_bit(d->hwirq, um_pci_msi_used); + + mutex_unlock(&um_pci_mtx); +} + +static const struct irq_domain_ops um_pci_inner_domain_ops = { + .alloc = um_pci_inner_domain_alloc, + .free = um_pci_inner_domain_free, +}; + +static struct irq_chip um_pci_msi_irq_chip = { + .name = "UM virtio PCIe MSI", + .irq_mask = pci_msi_mask_irq, + .irq_unmask = pci_msi_unmask_irq, +}; + +static struct msi_domain_info um_pci_msi_domain_info = { + .flags = MSI_FLAG_USE_DEF_DOM_OPS | + MSI_FLAG_USE_DEF_CHIP_OPS | + MSI_FLAG_PCI_MSIX, + .chip = &um_pci_msi_irq_chip, +}; + +static struct resource busn_resource = { + .name = "PCI busn", + .start = 0, + .end = 0, + .flags = IORESOURCE_BUS, +}; + +static int um_pci_map_irq(const struct pci_dev *pdev, u8 slot, u8 pin) +{ + struct um_pci_device_reg *reg = &um_pci_devices[pdev->devfn / 8]; + + if (WARN_ON(!reg->dev)) + return -EINVAL; + + /* Yes, we map all pins to the same IRQ ... doesn't matter for now. */ + return reg->dev->irq; +} + +void *pci_root_bus_fwnode(struct pci_bus *bus) +{ + return um_pci_fwnode; +} + +int um_pci_init(void) +{ + int err, i; + + WARN_ON(logic_iomem_add_region(&virt_cfgspace_resource, + &um_pci_cfgspace_ops)); + WARN_ON(logic_iomem_add_region(&virt_iomem_resource, + &um_pci_iomem_ops)); + + if (WARN(CONFIG_UML_PCI_OVER_VIRTIO_DEVICE_ID < 0, + "No virtio device ID configured for PCI - no PCI support\n")) + return 0; + + bridge = pci_alloc_host_bridge(0); + if (!bridge) + return -ENOMEM; + + um_pci_fwnode = irq_domain_alloc_named_fwnode("um-pci"); + if (!um_pci_fwnode) { + err = -ENOMEM; + goto free; + } + + um_pci_inner_domain = __irq_domain_add(um_pci_fwnode, MAX_MSI_VECTORS, + MAX_MSI_VECTORS, 0, + &um_pci_inner_domain_ops, NULL); + if (!um_pci_inner_domain) { + err = -ENOMEM; + goto free; + } + + um_pci_msi_domain = pci_msi_create_irq_domain(um_pci_fwnode, + &um_pci_msi_domain_info, + um_pci_inner_domain); + if (!um_pci_msi_domain) { + err = -ENOMEM; + goto free; + } + + pci_add_resource(&bridge->windows, &virt_iomem_resource); + pci_add_resource(&bridge->windows, &busn_resource); + bridge->ops = &um_pci_ops; + bridge->map_irq = um_pci_map_irq; + + for (i = 0; i < MAX_DEVICES; i++) { + resource_size_t start; + + start = virt_cfgspace_resource.start + i * CFG_SPACE_SIZE; + um_pci_devices[i].iomem = ioremap(start, CFG_SPACE_SIZE); + if (WARN(!um_pci_devices[i].iomem, "failed to map %d\n", i)) { + err = -ENOMEM; + goto free; + } + } + + err = pci_host_probe(bridge); + if (err) + goto free; + + err = register_virtio_driver(&um_pci_virtio_driver); + if (err) + goto free; + return 0; +free: + if (um_pci_inner_domain) + irq_domain_remove(um_pci_inner_domain); + if (um_pci_fwnode) + irq_domain_free_fwnode(um_pci_fwnode); + pci_free_resource_list(&bridge->windows); + pci_free_host_bridge(bridge); + return err; +} +module_init(um_pci_init); + +void um_pci_exit(void) +{ + unregister_virtio_driver(&um_pci_virtio_driver); + irq_domain_remove(um_pci_msi_domain); + irq_domain_remove(um_pci_inner_domain); + pci_free_resource_list(&bridge->windows); + pci_free_host_bridge(bridge); +} +module_exit(um_pci_exit); diff --git a/arch/um/drivers/virtio_uml.c b/arch/um/drivers/virtio_uml.c index 91ddf74ca888..4412d6febade 100644 --- a/arch/um/drivers/virtio_uml.c +++ b/arch/um/drivers/virtio_uml.c @@ -56,6 +56,7 @@ struct virtio_uml_device { u8 status; u8 registered:1; u8 suspended:1; + u8 no_vq_suspend:1; u8 config_changed_irq:1; uint64_t vq_irq_vq_map; @@ -1098,6 +1099,19 @@ static void virtio_uml_release_dev(struct device *d) kfree(vu_dev); } +void virtio_uml_set_no_vq_suspend(struct virtio_device *vdev, + bool no_vq_suspend) +{ + struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev); + + if (WARN_ON(vdev->config != &virtio_uml_config_ops)) + return; + + vu_dev->no_vq_suspend = no_vq_suspend; + dev_info(&vdev->dev, "%sabled VQ suspend\n", + no_vq_suspend ? "dis" : "en"); +} + /* Platform device */ static int virtio_uml_probe(struct platform_device *pdev) @@ -1302,13 +1316,16 @@ MODULE_DEVICE_TABLE(of, virtio_uml_match); static int virtio_uml_suspend(struct platform_device *pdev, pm_message_t state) { struct virtio_uml_device *vu_dev = platform_get_drvdata(pdev); - struct virtqueue *vq; - virtio_device_for_each_vq((&vu_dev->vdev), vq) { - struct virtio_uml_vq_info *info = vq->priv; + if (!vu_dev->no_vq_suspend) { + struct virtqueue *vq; - info->suspended = true; - vhost_user_set_vring_enable(vu_dev, vq->index, false); + virtio_device_for_each_vq((&vu_dev->vdev), vq) { + struct virtio_uml_vq_info *info = vq->priv; + + info->suspended = true; + vhost_user_set_vring_enable(vu_dev, vq->index, false); + } } if (!device_may_wakeup(&vu_dev->vdev.dev)) { @@ -1322,13 +1339,16 @@ static int virtio_uml_suspend(struct platform_device *pdev, pm_message_t state) static int virtio_uml_resume(struct platform_device *pdev) { struct virtio_uml_device *vu_dev = platform_get_drvdata(pdev); - struct virtqueue *vq; - virtio_device_for_each_vq((&vu_dev->vdev), vq) { - struct virtio_uml_vq_info *info = vq->priv; + if (!vu_dev->no_vq_suspend) { + struct virtqueue *vq; + + virtio_device_for_each_vq((&vu_dev->vdev), vq) { + struct virtio_uml_vq_info *info = vq->priv; - info->suspended = false; - vhost_user_set_vring_enable(vu_dev, vq->index, true); + info->suspended = false; + vhost_user_set_vring_enable(vu_dev, vq->index, true); + } } vu_dev->suspended = false; diff --git a/arch/um/include/asm/Kbuild b/arch/um/include/asm/Kbuild index d7492e5a1bbb..e5a7b552bb38 100644 --- a/arch/um/include/asm/Kbuild +++ b/arch/um/include/asm/Kbuild @@ -7,8 +7,8 @@ generic-y += device.h generic-y += emergency-restart.h generic-y += exec.h generic-y += extable.h +generic-y += fb.h generic-y += ftrace.h -generic-y += futex.h generic-y += hw_irq.h generic-y += irq_regs.h generic-y += irq_work.h @@ -17,7 +17,6 @@ generic-y += mcs_spinlock.h generic-y += mmiowb.h generic-y += module.lds.h generic-y += param.h -generic-y += pci.h generic-y += percpu.h generic-y += preempt.h generic-y += softirq_stack.h @@ -27,3 +26,4 @@ generic-y += trace_clock.h generic-y += word-at-a-time.h generic-y += kprobes.h generic-y += mm_hooks.h +generic-y += vga.h diff --git a/arch/um/include/asm/cacheflush.h b/arch/um/include/asm/cacheflush.h new file mode 100644 index 000000000000..4c9858cd36ec --- /dev/null +++ b/arch/um/include/asm/cacheflush.h @@ -0,0 +1,9 @@ +#ifndef __UM_ASM_CACHEFLUSH_H +#define __UM_ASM_CACHEFLUSH_H + +#include <asm/tlbflush.h> +#define flush_cache_vmap flush_tlb_kernel_range +#define flush_cache_vunmap flush_tlb_kernel_range + +#include <asm-generic/cacheflush.h> +#endif /* __UM_ASM_CACHEFLUSH_H */ diff --git a/arch/um/include/asm/cpufeature.h b/arch/um/include/asm/cpufeature.h new file mode 100644 index 000000000000..19cd7ed6ec3c --- /dev/null +++ b/arch/um/include/asm/cpufeature.h @@ -0,0 +1,157 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_UM_CPUFEATURE_H +#define _ASM_UM_CPUFEATURE_H + +#include <asm/processor.h> + +#if defined(__KERNEL__) && !defined(__ASSEMBLY__) + +#include <asm/asm.h> +#include <linux/bitops.h> + +extern const char * const x86_cap_flags[NCAPINTS*32]; +extern const char * const x86_power_flags[32]; +#define X86_CAP_FMT "%s" +#define x86_cap_flag(flag) x86_cap_flags[flag] + +/* + * In order to save room, we index into this array by doing + * X86_BUG_<name> - NCAPINTS*32. + */ +extern const char * const x86_bug_flags[NBUGINTS*32]; + +#define test_cpu_cap(c, bit) \ + test_bit(bit, (unsigned long *)((c)->x86_capability)) + +/* + * There are 32 bits/features in each mask word. The high bits + * (selected with (bit>>5) give us the word number and the low 5 + * bits give us the bit/feature number inside the word. + * (1UL<<((bit)&31) gives us a mask for the feature_bit so we can + * see if it is set in the mask word. + */ +#define CHECK_BIT_IN_MASK_WORD(maskname, word, bit) \ + (((bit)>>5)==(word) && (1UL<<((bit)&31) & maskname##word )) + +#define cpu_has(c, bit) \ + test_cpu_cap(c, bit) + +#define this_cpu_has(bit) \ + (__builtin_constant_p(bit) && REQUIRED_MASK_BIT_SET(bit) ? 1 : \ + x86_this_cpu_test_bit(bit, \ + (unsigned long __percpu *)&cpu_info.x86_capability)) + +/* + * This macro is for detection of features which need kernel + * infrastructure to be used. It may *not* directly test the CPU + * itself. Use the cpu_has() family if you want true runtime + * testing of CPU features, like in hypervisor code where you are + * supporting a possible guest feature where host support for it + * is not relevant. + */ +#define cpu_feature_enabled(bit) \ + (__builtin_constant_p(bit) && DISABLED_MASK_BIT_SET(bit) ? 0 : static_cpu_has(bit)) + +#define boot_cpu_has(bit) cpu_has(&boot_cpu_data, bit) + +#define set_cpu_cap(c, bit) set_bit(bit, (unsigned long *)((c)->x86_capability)) + +extern void setup_clear_cpu_cap(unsigned int bit); + +#define setup_force_cpu_cap(bit) do { \ + set_cpu_cap(&boot_cpu_data, bit); \ + set_bit(bit, (unsigned long *)cpu_caps_set); \ +} while (0) + +#define setup_force_cpu_bug(bit) setup_force_cpu_cap(bit) + +#if defined(__clang__) && !defined(CONFIG_CC_HAS_ASM_GOTO) + +/* + * Workaround for the sake of BPF compilation which utilizes kernel + * headers, but clang does not support ASM GOTO and fails the build. + */ +#ifndef __BPF_TRACING__ +#warning "Compiler lacks ASM_GOTO support. Add -D __BPF_TRACING__ to your compiler arguments" +#endif + +#define static_cpu_has(bit) boot_cpu_has(bit) + +#else + +/* + * Static testing of CPU features. Used the same as boot_cpu_has(). It + * statically patches the target code for additional performance. Use + * static_cpu_has() only in fast paths, where every cycle counts. Which + * means that the boot_cpu_has() variant is already fast enough for the + * majority of cases and you should stick to using it as it is generally + * only two instructions: a RIP-relative MOV and a TEST. + */ +static __always_inline bool _static_cpu_has(u16 bit) +{ + asm_volatile_goto("1: jmp 6f\n" + "2:\n" + ".skip -(((5f-4f) - (2b-1b)) > 0) * " + "((5f-4f) - (2b-1b)),0x90\n" + "3:\n" + ".section .altinstructions,\"a\"\n" + " .long 1b - .\n" /* src offset */ + " .long 4f - .\n" /* repl offset */ + " .word %P[always]\n" /* always replace */ + " .byte 3b - 1b\n" /* src len */ + " .byte 5f - 4f\n" /* repl len */ + " .byte 3b - 2b\n" /* pad len */ + ".previous\n" + ".section .altinstr_replacement,\"ax\"\n" + "4: jmp %l[t_no]\n" + "5:\n" + ".previous\n" + ".section .altinstructions,\"a\"\n" + " .long 1b - .\n" /* src offset */ + " .long 0\n" /* no replacement */ + " .word %P[feature]\n" /* feature bit */ + " .byte 3b - 1b\n" /* src len */ + " .byte 0\n" /* repl len */ + " .byte 0\n" /* pad len */ + ".previous\n" + ".section .altinstr_aux,\"ax\"\n" + "6:\n" + " testb %[bitnum],%[cap_byte]\n" + " jnz %l[t_yes]\n" + " jmp %l[t_no]\n" + ".previous\n" + : : [feature] "i" (bit), + [always] "i" (X86_FEATURE_ALWAYS), + [bitnum] "i" (1 << (bit & 7)), + [cap_byte] "m" (((const char *)boot_cpu_data.x86_capability)[bit >> 3]) + : : t_yes, t_no); +t_yes: + return true; +t_no: + return false; +} + +#define static_cpu_has(bit) \ +( \ + __builtin_constant_p(boot_cpu_has(bit)) ? \ + boot_cpu_has(bit) : \ + _static_cpu_has(bit) \ +) +#endif + +#define cpu_has_bug(c, bit) cpu_has(c, (bit)) +#define set_cpu_bug(c, bit) set_cpu_cap(c, (bit)) + +#define static_cpu_has_bug(bit) static_cpu_has((bit)) +#define boot_cpu_has_bug(bit) cpu_has_bug(&boot_cpu_data, (bit)) +#define boot_cpu_set_bug(bit) set_cpu_cap(&boot_cpu_data, (bit)) + +#define MAX_CPU_FEATURES (NCAPINTS * 32) +#define cpu_have_feature boot_cpu_has + +#define CPU_FEATURE_TYPEFMT "x86,ven%04Xfam%04Xmod%04X" +#define CPU_FEATURE_TYPEVAL boot_cpu_data.x86_vendor, boot_cpu_data.x86, \ + boot_cpu_data.x86_model + +#endif /* defined(__KERNEL__) && !defined(__ASSEMBLY__) */ +#endif /* _ASM_UM_CPUFEATURE_H */ diff --git a/arch/um/include/asm/fpu/api.h b/arch/um/include/asm/fpu/api.h new file mode 100644 index 000000000000..71bfd9ef3938 --- /dev/null +++ b/arch/um/include/asm/fpu/api.h @@ -0,0 +1,20 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +#ifndef _ASM_UM_FPU_API_H +#define _ASM_UM_FPU_API_H + +/* Copyright (c) 2020 Cambridge Greys Ltd + * Copyright (c) 2020 Red Hat Inc. + * A set of "dummy" defines to allow the direct inclusion + * of x86 optimized copy, xor, etc routines into the + * UML code tree. */ + +#define kernel_fpu_begin() (void)0 +#define kernel_fpu_end() (void)0 + +static inline bool irq_fpu_usable(void) +{ + return true; +} + + +#endif diff --git a/arch/um/include/asm/futex.h b/arch/um/include/asm/futex.h new file mode 100644 index 000000000000..780aa6bfc050 --- /dev/null +++ b/arch/um/include/asm/futex.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_UM_FUTEX_H +#define _ASM_UM_FUTEX_H + +#include <linux/futex.h> +#include <linux/uaccess.h> +#include <asm/errno.h> + + +int arch_futex_atomic_op_inuser(int op, u32 oparg, int *oval, u32 __user *uaddr); +int futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, + u32 oldval, u32 newval); + +#endif diff --git a/arch/um/include/asm/io.h b/arch/um/include/asm/io.h index 6ce18d343997..9ea42cc746d9 100644 --- a/arch/um/include/asm/io.h +++ b/arch/um/include/asm/io.h @@ -3,16 +3,23 @@ #define _ASM_UM_IO_H #include <linux/types.h> +/* get emulated iomem (if desired) */ +#include <asm-generic/logic_io.h> + +#ifndef ioremap #define ioremap ioremap static inline void __iomem *ioremap(phys_addr_t offset, size_t size) { return NULL; } +#endif /* ioremap */ +#ifndef iounmap #define iounmap iounmap static inline void iounmap(void __iomem *addr) { } +#endif /* iounmap */ #include <asm-generic/io.h> diff --git a/arch/um/include/asm/irq.h b/arch/um/include/asm/irq.h index 3f5d3e8228fc..e187c789369d 100644 --- a/arch/um/include/asm/irq.h +++ b/arch/um/include/asm/irq.h @@ -31,7 +31,13 @@ #endif -#define NR_IRQS 64 +#define UM_LAST_SIGNAL_IRQ 64 +/* If we have (simulated) PCI MSI, allow 64 more interrupt numbers for it */ +#ifdef CONFIG_PCI_MSI +#define NR_IRQS (UM_LAST_SIGNAL_IRQ + 64) +#else +#define NR_IRQS UM_LAST_SIGNAL_IRQ +#endif /* CONFIG_PCI_MSI */ #include <asm-generic/irq.h> #endif diff --git a/arch/um/include/asm/irqflags.h b/arch/um/include/asm/irqflags.h index 0642ad9035d1..dab5744e9253 100644 --- a/arch/um/include/asm/irqflags.h +++ b/arch/um/include/asm/irqflags.h @@ -2,15 +2,15 @@ #ifndef __UM_IRQFLAGS_H #define __UM_IRQFLAGS_H -extern int get_signals(void); -extern int set_signals(int enable); -extern void block_signals(void); -extern void unblock_signals(void); +extern int signals_enabled; +int set_signals(int enable); +void block_signals(void); +void unblock_signals(void); #define arch_local_save_flags arch_local_save_flags static inline unsigned long arch_local_save_flags(void) { - return get_signals(); + return signals_enabled; } #define arch_local_irq_restore arch_local_irq_restore diff --git a/arch/um/include/asm/msi.h b/arch/um/include/asm/msi.h new file mode 100644 index 000000000000..c8c6c381f394 --- /dev/null +++ b/arch/um/include/asm/msi.h @@ -0,0 +1 @@ +#include <asm-generic/msi.h> diff --git a/arch/um/include/asm/pci.h b/arch/um/include/asm/pci.h new file mode 100644 index 000000000000..da13fd5519ef --- /dev/null +++ b/arch/um/include/asm/pci.h @@ -0,0 +1,39 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +#ifndef __ASM_UM_PCI_H +#define __ASM_UM_PCI_H +#include <linux/types.h> +#include <asm/io.h> + +#define PCIBIOS_MIN_IO 0 +#define PCIBIOS_MIN_MEM 0 + +#define pcibios_assign_all_busses() 1 + +extern int isa_dma_bridge_buggy; + +#ifdef CONFIG_PCI +static inline int pci_get_legacy_ide_irq(struct pci_dev *dev, int channel) +{ + /* no legacy IRQs */ + return -ENODEV; +} +#endif + +#ifdef CONFIG_PCI_DOMAINS +static inline int pci_proc_domain(struct pci_bus *bus) +{ + /* always show the domain in /proc */ + return 1; +} +#endif /* CONFIG_PCI */ + +#ifdef CONFIG_PCI_MSI_IRQ_DOMAIN +/* + * This is a bit of an annoying hack, and it assumes we only have + * the virt-pci (if anything). Which is true, but still. + */ +void *pci_root_bus_fwnode(struct pci_bus *bus); +#define pci_root_bus_fwnode pci_root_bus_fwnode +#endif + +#endif /* __ASM_UM_PCI_H */ diff --git a/arch/um/include/asm/processor-generic.h b/arch/um/include/asm/processor-generic.h index afd9b267cf81..b5cf0ed116d9 100644 --- a/arch/um/include/asm/processor-generic.h +++ b/arch/um/include/asm/processor-generic.h @@ -16,6 +16,8 @@ struct task_struct; #include <linux/prefetch.h> +#include <asm/cpufeatures.h> + struct mm_struct; struct thread_struct { @@ -90,12 +92,18 @@ extern void start_thread(struct pt_regs *regs, unsigned long entry, struct cpuinfo_um { unsigned long loops_per_jiffy; int ipi_pipe[2]; + int cache_alignment; + union { + __u32 x86_capability[NCAPINTS + NBUGINTS]; + unsigned long x86_capability_alignment; + }; }; extern struct cpuinfo_um boot_cpu_data; #define cpu_data (&boot_cpu_data) #define current_cpu_data boot_cpu_data +#define cache_line_size() (boot_cpu_data.cache_alignment) #define KSTK_REG(tsk, reg) get_thread_reg(reg, &tsk->thread.switch_buf) extern unsigned long get_wchan(struct task_struct *p); diff --git a/arch/um/include/asm/tlb.h b/arch/um/include/asm/tlb.h index ff9c62828962..0422467bda5b 100644 --- a/arch/um/include/asm/tlb.h +++ b/arch/um/include/asm/tlb.h @@ -5,7 +5,7 @@ #include <linux/mm.h> #include <asm/tlbflush.h> -#include <asm-generic/cacheflush.h> +#include <asm/cacheflush.h> #include <asm-generic/tlb.h> #endif diff --git a/arch/um/include/asm/xor.h b/arch/um/include/asm/xor.h index 36b33d62a35d..f512704a9ec7 100644 --- a/arch/um/include/asm/xor.h +++ b/arch/um/include/asm/xor.h @@ -1,7 +1,22 @@ /* SPDX-License-Identifier: GPL-2.0 */ -#include <asm-generic/xor.h> +#ifndef _ASM_UM_XOR_H +#define _ASM_UM_XOR_H + +#ifdef CONFIG_64BIT +#undef CONFIG_X86_32 +#else +#define CONFIG_X86_32 1 +#endif + +#include <asm/cpufeature.h> +#include <../../x86/include/asm/xor.h> #include <linux/time-internal.h> +#ifdef CONFIG_UML_TIME_TRAVEL_SUPPORT +#undef XOR_SELECT_TEMPLATE /* pick an arbitrary one - measuring isn't possible with inf-cpu */ #define XOR_SELECT_TEMPLATE(x) \ (time_travel_mode == TT_MODE_INFCPU ? &xor_block_8regs : NULL) +#endif + +#endif diff --git a/arch/um/include/linux/time-internal.h b/arch/um/include/linux/time-internal.h index 759956ab0108..b22226634ff6 100644 --- a/arch/um/include/linux/time-internal.h +++ b/arch/um/include/linux/time-internal.h @@ -8,17 +8,11 @@ #define __TIMER_INTERNAL_H__ #include <linux/list.h> #include <asm/bug.h> +#include <shared/timetravel.h> #define TIMER_MULTIPLIER 256 #define TIMER_MIN_DELTA 500 -enum time_travel_mode { - TT_MODE_OFF, - TT_MODE_BASIC, - TT_MODE_INFCPU, - TT_MODE_EXTERNAL, -}; - #ifdef CONFIG_UML_TIME_TRAVEL_SUPPORT struct time_travel_event { unsigned long long time; @@ -27,8 +21,6 @@ struct time_travel_event { bool pending, onstack; }; -extern enum time_travel_mode time_travel_mode; - void time_travel_sleep(void); static inline void @@ -62,8 +54,6 @@ bool time_travel_del_event(struct time_travel_event *e); struct time_travel_event { }; -#define time_travel_mode TT_MODE_OFF - static inline void time_travel_sleep(void) { } diff --git a/arch/um/include/linux/virtio-uml.h b/arch/um/include/linux/virtio-uml.h new file mode 100644 index 000000000000..2f652fa90f04 --- /dev/null +++ b/arch/um/include/linux/virtio-uml.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2021 Intel Corporation + * Author: Johannes Berg <johannes@sipsolutions.net> + */ + +#ifndef __VIRTIO_UML_H__ +#define __VIRTIO_UML_H__ + +void virtio_uml_set_no_vq_suspend(struct virtio_device *vdev, + bool no_vq_suspend); + +#endif /* __VIRTIO_UML_H__ */ diff --git a/arch/um/include/shared/irq_user.h b/arch/um/include/shared/irq_user.h index 07239e801a5b..065829f443ae 100644 --- a/arch/um/include/shared/irq_user.h +++ b/arch/um/include/shared/irq_user.h @@ -17,6 +17,7 @@ enum um_irq_type { struct siginfo; extern void sigio_handler(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs); +void sigio_run_timetravel_handlers(void); extern void free_irq_by_fd(int fd); extern void deactivate_fd(int fd, int irqnum); extern int deactivate_all_fds(void); diff --git a/arch/um/include/shared/kern_util.h b/arch/um/include/shared/kern_util.h index 2888ec812f6e..a2cfd42608a0 100644 --- a/arch/um/include/shared/kern_util.h +++ b/arch/um/include/shared/kern_util.h @@ -33,7 +33,6 @@ extern int handle_page_fault(unsigned long address, unsigned long ip, int is_write, int is_user, int *code_out); extern unsigned int do_IRQ(int irq, struct uml_pt_regs *regs); -extern int smp_sigio_handler(void); extern void initial_thread_cb(void (*proc)(void *), void *arg); extern int is_syscall(unsigned long addr); diff --git a/arch/um/include/shared/longjmp.h b/arch/um/include/shared/longjmp.h index 85a1cc290ecb..bdb2869b72b3 100644 --- a/arch/um/include/shared/longjmp.h +++ b/arch/um/include/shared/longjmp.h @@ -5,6 +5,7 @@ #include <sysdep/archsetjmp.h> #include <os.h> +extern int signals_enabled; extern int setjmp(jmp_buf); extern void longjmp(jmp_buf, int); @@ -12,13 +13,12 @@ extern void longjmp(jmp_buf, int); longjmp(*buf, val); \ } while(0) -#define UML_SETJMP(buf) ({ \ - int n; \ - volatile int enable; \ - enable = get_signals(); \ - n = setjmp(*buf); \ - if(n != 0) \ - set_signals_trace(enable); \ +#define UML_SETJMP(buf) ({ \ + int n, enable; \ + enable = *(volatile int *)&signals_enabled; \ + n = setjmp(*buf); \ + if(n != 0) \ + set_signals_trace(enable); \ n; }) #endif diff --git a/arch/um/include/shared/os.h b/arch/um/include/shared/os.h index 13d86f94cf0f..60b84edc8a68 100644 --- a/arch/um/include/shared/os.h +++ b/arch/um/include/shared/os.h @@ -187,6 +187,9 @@ int os_poll(unsigned int n, const int *fds); extern void os_early_checks(void); extern void os_check_bugs(void); extern void check_host_supports_tls(int *supports_tls, int *tls_min); +extern void get_host_cpu_features( + void (*flags_helper_func)(char *line), + void (*cache_helper_func)(char *line)); /* mem.c */ extern int create_mem_file(unsigned long long len); @@ -211,7 +214,6 @@ extern int os_protect_memory(void *addr, unsigned long len, extern int os_unmap_memory(void *addr, int len); extern int os_drop_memory(void *addr, int length); extern int can_drop_memory(void); -extern void os_flush_stdout(void); extern int os_mincore(void *addr, unsigned long len); /* execvp.c */ @@ -237,12 +239,14 @@ extern void send_sigio_to_self(void); extern int change_sig(int signal, int on); extern void block_signals(void); extern void unblock_signals(void); -extern int get_signals(void); extern int set_signals(int enable); extern int set_signals_trace(int enable); extern int os_is_signal_stack(void); extern void deliver_alarm(void); extern void register_pm_wake_signal(void); +extern void block_signals_hard(void); +extern void unblock_signals_hard(void); +extern void mark_sigio_pending(void); /* util.c */ extern void stack_protections(unsigned long address); diff --git a/arch/um/include/shared/timetravel.h b/arch/um/include/shared/timetravel.h new file mode 100644 index 000000000000..e5c3d69f1b69 --- /dev/null +++ b/arch/um/include/shared/timetravel.h @@ -0,0 +1,22 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2019-2021 Intel Corporation + */ +#ifndef _UM_TIME_TRAVEL_H_ +#define _UM_TIME_TRAVEL_H_ + +enum time_travel_mode { + TT_MODE_OFF, + TT_MODE_BASIC, + TT_MODE_INFCPU, + TT_MODE_EXTERNAL, +}; + +#if defined(UML_CONFIG_UML_TIME_TRAVEL_SUPPORT) || \ + defined(CONFIG_UML_TIME_TRAVEL_SUPPORT) +extern enum time_travel_mode time_travel_mode; +#else +#define time_travel_mode TT_MODE_OFF +#endif /* (UML_)CONFIG_UML_TIME_TRAVEL_SUPPORT */ + +#endif /* _UM_TIME_TRAVEL_H_ */ diff --git a/arch/um/kernel/Makefile b/arch/um/kernel/Makefile index e698e0c7dbdc..1d18e4e46989 100644 --- a/arch/um/kernel/Makefile +++ b/arch/um/kernel/Makefile @@ -17,18 +17,19 @@ extra-y := vmlinux.lds obj-y = config.o exec.o exitcode.o irq.o ksyms.o mem.o \ physmem.o process.o ptrace.o reboot.o sigio.o \ signal.o syscall.o sysrq.o time.o tlb.o trap.o \ - um_arch.o umid.o maccess.o kmsg_dump.o skas/ + um_arch.o umid.o maccess.o kmsg_dump.o capflags.o skas/ obj-$(CONFIG_BLK_DEV_INITRD) += initrd.o obj-$(CONFIG_GPROF) += gprof_syms.o obj-$(CONFIG_EARLY_PRINTK) += early_printk.o obj-$(CONFIG_STACKTRACE) += stacktrace.o +obj-$(CONFIG_GENERIC_PCI_IOMAP) += ioport.o USER_OBJS := config.o include arch/um/scripts/Makefile.rules -targets := config.c config.tmp +targets := config.c config.tmp capflags.c # Be careful with the below Sed code - sed is pitfall-rich! # We use sed to lower build requirements, for "embedded" builders for instance. @@ -43,6 +44,15 @@ quiet_cmd_quote1 = QUOTE $@ $(obj)/config.c: $(src)/config.c.in $(obj)/config.tmp FORCE $(call if_changed,quote2) +quiet_cmd_mkcapflags = MKCAP $@ + cmd_mkcapflags = $(CONFIG_SHELL) $(srctree)/$(src)/../../x86/kernel/cpu/mkcapflags.sh $@ $^ + +cpufeature = $(src)/../../x86/include/asm/cpufeatures.h +vmxfeature = $(src)/../../x86/include/asm/vmxfeatures.h + +$(obj)/capflags.c: $(cpufeature) $(vmxfeature) $(src)/../../x86/kernel/cpu/mkcapflags.sh FORCE + $(call if_changed,mkcapflags) + quiet_cmd_quote2 = QUOTE $@ cmd_quote2 = sed -e '/CONFIG/{' \ -e 's/"CONFIG"//' \ diff --git a/arch/um/kernel/ioport.c b/arch/um/kernel/ioport.c new file mode 100644 index 000000000000..7220615b3beb --- /dev/null +++ b/arch/um/kernel/ioport.c @@ -0,0 +1,13 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2021 Intel Corporation + * Author: Johannes Berg <johannes@sipsolutions.net> + */ +#include <asm/iomap.h> +#include <asm-generic/pci_iomap.h> + +void __iomem *__pci_ioport_map(struct pci_dev *dev, unsigned long port, + unsigned int nr) +{ + return NULL; +} diff --git a/arch/um/kernel/irq.c b/arch/um/kernel/irq.c index 82af5191e73d..a8873d9bc28b 100644 --- a/arch/um/kernel/irq.c +++ b/arch/um/kernel/irq.c @@ -56,7 +56,7 @@ struct irq_entry { static DEFINE_SPINLOCK(irq_lock); static LIST_HEAD(active_fds); -static DECLARE_BITMAP(irqs_allocated, NR_IRQS); +static DECLARE_BITMAP(irqs_allocated, UM_LAST_SIGNAL_IRQ); static bool irqs_suspended; static void irq_io_loop(struct irq_reg *irq, struct uml_pt_regs *regs) @@ -101,10 +101,12 @@ static bool irq_do_timetravel_handler(struct irq_entry *entry, if (!reg->timetravel_handler) return false; - /* prevent nesting - we'll get it again later when we SIGIO ourselves */ - if (reg->pending_on_resume) - return true; - + /* + * Handle all messages - we might get multiple even while + * interrupts are already suspended, due to suspend order + * etc. Note that time_travel_add_irq_event() will not add + * an event twice, if it's pending already "first wins". + */ reg->timetravel_handler(reg->irq, entry->fd, reg->id, ®->event); if (!reg->event.pending) @@ -123,7 +125,8 @@ static bool irq_do_timetravel_handler(struct irq_entry *entry, #endif static void sigio_reg_handler(int idx, struct irq_entry *entry, enum um_irq_type t, - struct uml_pt_regs *regs) + struct uml_pt_regs *regs, + bool timetravel_handlers_only) { struct irq_reg *reg = &entry->reg[t]; @@ -136,18 +139,29 @@ static void sigio_reg_handler(int idx, struct irq_entry *entry, enum um_irq_type if (irq_do_timetravel_handler(entry, t)) return; - if (irqs_suspended) + /* + * If we're called to only run time-travel handlers then don't + * actually proceed but mark sigio as pending (if applicable). + * For suspend/resume, timetravel_handlers_only may be true + * despite time-travel not being configured and used. + */ + if (timetravel_handlers_only) { +#ifdef CONFIG_UML_TIME_TRAVEL_SUPPORT + mark_sigio_pending(); +#endif return; + } irq_io_loop(reg, regs); } -void sigio_handler(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs) +static void _sigio_handler(struct uml_pt_regs *regs, + bool timetravel_handlers_only) { struct irq_entry *irq_entry; int n, i; - if (irqs_suspended && !um_irq_timetravel_handler_used()) + if (timetravel_handlers_only && !um_irq_timetravel_handler_used()) return; while (1) { @@ -172,14 +186,20 @@ void sigio_handler(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs) irq_entry = os_epoll_get_data_pointer(i); for (t = 0; t < NUM_IRQ_TYPES; t++) - sigio_reg_handler(i, irq_entry, t, regs); + sigio_reg_handler(i, irq_entry, t, regs, + timetravel_handlers_only); } } - if (!irqs_suspended) + if (!timetravel_handlers_only) free_irqs(); } +void sigio_handler(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs) +{ + _sigio_handler(regs, irqs_suspended); +} + static struct irq_entry *get_irq_entry_by_fd(int fd) { struct irq_entry *walk; @@ -399,7 +419,8 @@ unsigned int do_IRQ(int irq, struct uml_pt_regs *regs) void um_free_irq(int irq, void *dev) { - if (WARN(irq < 0 || irq > NR_IRQS, "freeing invalid irq %d", irq)) + if (WARN(irq < 0 || irq > UM_LAST_SIGNAL_IRQ, + "freeing invalid irq %d", irq)) return; free_irq_by_irq_and_dev(irq, dev); @@ -467,6 +488,11 @@ int um_request_irq_tt(int irq, int fd, enum um_irq_type type, devname, dev_id, timetravel_handler); } EXPORT_SYMBOL(um_request_irq_tt); + +void sigio_run_timetravel_handlers(void) +{ + _sigio_handler(NULL, true); +} #endif #ifdef CONFIG_PM_SLEEP @@ -623,7 +649,7 @@ void __init init_IRQ(void) irq_set_chip_and_handler(TIMER_IRQ, &alarm_irq_type, handle_edge_irq); - for (i = 1; i < NR_IRQS; i++) + for (i = 1; i < UM_LAST_SIGNAL_IRQ; i++) irq_set_chip_and_handler(i, &normal_irq_type, handle_edge_irq); /* Initialize EPOLL Loop */ os_setup_epoll(); diff --git a/arch/um/kernel/ksyms.c b/arch/um/kernel/ksyms.c index 8ade54a86a7e..b1e5634398d0 100644 --- a/arch/um/kernel/ksyms.c +++ b/arch/um/kernel/ksyms.c @@ -7,7 +7,7 @@ #include <os.h> EXPORT_SYMBOL(set_signals); -EXPORT_SYMBOL(get_signals); +EXPORT_SYMBOL(signals_enabled); EXPORT_SYMBOL(os_stat_fd); EXPORT_SYMBOL(os_stat_file); diff --git a/arch/um/kernel/skas/clone.c b/arch/um/kernel/skas/clone.c index 592cdb138441..5afac0fef24e 100644 --- a/arch/um/kernel/skas/clone.c +++ b/arch/um/kernel/skas/clone.c @@ -29,7 +29,7 @@ stub_clone_handler(void) long err; err = stub_syscall2(__NR_clone, CLONE_PARENT | CLONE_FILES | SIGCHLD, - (unsigned long)data + UM_KERN_PAGE_SIZE / 2 - sizeof(void *)); + (unsigned long)data + UM_KERN_PAGE_SIZE / 2); if (err) { data->parent_err = err; goto done; diff --git a/arch/um/kernel/skas/uaccess.c b/arch/um/kernel/skas/uaccess.c index 2dec915abe6f..6c76df96e858 100644 --- a/arch/um/kernel/skas/uaccess.c +++ b/arch/um/kernel/skas/uaccess.c @@ -11,6 +11,7 @@ #include <asm/current.h> #include <asm/page.h> #include <kern_util.h> +#include <asm/futex.h> #include <os.h> pte_t *virt_to_pte(struct mm_struct *mm, unsigned long addr) @@ -248,3 +249,138 @@ long __strnlen_user(const void __user *str, long len) return 0; } EXPORT_SYMBOL(__strnlen_user); + +/** + * arch_futex_atomic_op_inuser() - Atomic arithmetic operation with constant + * argument and comparison of the previous + * futex value with another constant. + * + * @encoded_op: encoded operation to execute + * @uaddr: pointer to user space address + * + * Return: + * 0 - On success + * -EFAULT - User access resulted in a page fault + * -EAGAIN - Atomic operation was unable to complete due to contention + * -ENOSYS - Operation not supported + */ + +int arch_futex_atomic_op_inuser(int op, u32 oparg, int *oval, u32 __user *uaddr) +{ + int oldval, ret; + struct page *page; + unsigned long addr = (unsigned long) uaddr; + pte_t *pte; + + ret = -EFAULT; + if (!access_ok(uaddr, sizeof(*uaddr))) + return -EFAULT; + preempt_disable(); + pte = maybe_map(addr, 1); + if (pte == NULL) + goto out_inuser; + + page = pte_page(*pte); +#ifdef CONFIG_64BIT + pagefault_disable(); + addr = (unsigned long) page_address(page) + + (((unsigned long) addr) & ~PAGE_MASK); +#else + addr = (unsigned long) kmap_atomic(page) + + ((unsigned long) addr & ~PAGE_MASK); +#endif + uaddr = (u32 *) addr; + oldval = *uaddr; + + ret = 0; + + switch (op) { + case FUTEX_OP_SET: + *uaddr = oparg; + break; + case FUTEX_OP_ADD: + *uaddr += oparg; + break; + case FUTEX_OP_OR: + *uaddr |= oparg; + break; + case FUTEX_OP_ANDN: + *uaddr &= ~oparg; + break; + case FUTEX_OP_XOR: + *uaddr ^= oparg; + break; + default: + ret = -ENOSYS; + } +#ifdef CONFIG_64BIT + pagefault_enable(); +#else + kunmap_atomic((void *)addr); +#endif + +out_inuser: + preempt_enable(); + + if (ret == 0) + *oval = oldval; + + return ret; +} +EXPORT_SYMBOL(arch_futex_atomic_op_inuser); + +/** + * futex_atomic_cmpxchg_inatomic() - Compare and exchange the content of the + * uaddr with newval if the current value is + * oldval. + * @uval: pointer to store content of @uaddr + * @uaddr: pointer to user space address + * @oldval: old value + * @newval: new value to store to @uaddr + * + * Return: + * 0 - On success + * -EFAULT - User access resulted in a page fault + * -EAGAIN - Atomic operation was unable to complete due to contention + * -ENOSYS - Function not implemented (only if !HAVE_FUTEX_CMPXCHG) + */ + +int futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, + u32 oldval, u32 newval) +{ + struct page *page; + pte_t *pte; + int ret = -EFAULT; + + if (!access_ok(uaddr, sizeof(*uaddr))) + return -EFAULT; + + preempt_disable(); + pte = maybe_map((unsigned long) uaddr, 1); + if (pte == NULL) + goto out_inatomic; + + page = pte_page(*pte); +#ifdef CONFIG_64BIT + pagefault_disable(); + uaddr = page_address(page) + (((unsigned long) uaddr) & ~PAGE_MASK); +#else + uaddr = kmap_atomic(page) + ((unsigned long) uaddr & ~PAGE_MASK); +#endif + + *uval = *uaddr; + + ret = cmpxchg(uaddr, oldval, newval); + +#ifdef CONFIG_64BIT + pagefault_enable(); +#else + kunmap_atomic(uaddr); +#endif + ret = 0; + +out_inatomic: + preempt_enable(); + return ret; +} +EXPORT_SYMBOL(futex_atomic_cmpxchg_inatomic); diff --git a/arch/um/kernel/time.c b/arch/um/kernel/time.c index e0cdb9694fb8..fddd1dec27e6 100644 --- a/arch/um/kernel/time.c +++ b/arch/um/kernel/time.c @@ -68,23 +68,15 @@ static void time_travel_handle_message(struct um_timetravel_msg *msg, int ret; /* - * Poll outside the locked section (if we're not called to only read - * the response) so we can get interrupts for e.g. virtio while we're - * here, but then we need to lock to not get interrupted between the - * read of the message and write of the ACK. + * We can't unlock here, but interrupt signals with a timetravel_handler + * (see um_request_irq_tt) get to the timetravel_handler anyway. */ if (mode != TTMH_READ) { - bool disabled = irqs_disabled(); + BUG_ON(mode == TTMH_IDLE && !irqs_disabled()); - BUG_ON(mode == TTMH_IDLE && !disabled); - - if (disabled) - local_irq_enable(); while (os_poll(1, &time_travel_ext_fd) != 0) { /* nothing */ } - if (disabled) - local_irq_disable(); } ret = os_read_file(time_travel_ext_fd, msg, sizeof(*msg)); @@ -123,15 +115,15 @@ static u64 time_travel_ext_req(u32 op, u64 time) .time = time, .seq = mseq, }; - unsigned long flags; /* - * We need to save interrupts here and only restore when we - * got the ACK - otherwise we can get interrupted and send - * another request while we're still waiting for an ACK, but - * the peer doesn't know we got interrupted and will send - * the ACKs in the same order as the message, but we'd need - * to see them in the opposite order ... + * We need to block even the timetravel handlers of SIGIO here and + * only restore their use when we got the ACK - otherwise we may + * (will) get interrupted by that, try to queue the IRQ for future + * processing and thus send another request while we're still waiting + * for an ACK, but the peer doesn't know we got interrupted and will + * send the ACKs in the same order as the message, but we'd need to + * see them in the opposite order ... * * This wouldn't matter *too* much, but some ACKs carry the * current time (for UM_TIMETRAVEL_GET) and getting another @@ -140,7 +132,7 @@ static u64 time_travel_ext_req(u32 op, u64 time) * The sequence number assignment that happens here lets us * debug such message handling issues more easily. */ - local_irq_save(flags); + block_signals_hard(); os_write_file(time_travel_ext_fd, &msg, sizeof(msg)); while (msg.op != UM_TIMETRAVEL_ACK) @@ -152,7 +144,7 @@ static u64 time_travel_ext_req(u32 op, u64 time) if (op == UM_TIMETRAVEL_GET) time_travel_set_time(msg.time); - local_irq_restore(flags); + unblock_signals_hard(); return msg.time; } @@ -352,9 +344,6 @@ void deliver_time_travel_irqs(void) while ((e = list_first_entry_or_null(&time_travel_irqs, struct time_travel_event, list))) { - WARN(e->time != time_travel_time, - "time moved from %lld to %lld before IRQ delivery\n", - time_travel_time, e->time); list_del(&e->list); e->pending = false; e->fn(e); diff --git a/arch/um/kernel/um_arch.c b/arch/um/kernel/um_arch.c index 9512253947d5..a149a5e9a16a 100644 --- a/arch/um/kernel/um_arch.c +++ b/arch/um/kernel/um_arch.c @@ -6,6 +6,7 @@ #include <linux/delay.h> #include <linux/init.h> #include <linux/mm.h> +#include <linux/ctype.h> #include <linux/module.h> #include <linux/panic_notifier.h> #include <linux/seq_file.h> @@ -17,6 +18,7 @@ #include <linux/suspend.h> #include <asm/processor.h> +#include <asm/cpufeature.h> #include <asm/sections.h> #include <asm/setup.h> #include <as-layout.h> @@ -51,9 +53,13 @@ static void __init add_arg(char *arg) */ struct cpuinfo_um boot_cpu_data = { .loops_per_jiffy = 0, - .ipi_pipe = { -1, -1 } + .ipi_pipe = { -1, -1 }, + .cache_alignment = L1_CACHE_BYTES, + .x86_capability = { 0 } }; +EXPORT_SYMBOL(boot_cpu_data); + union thread_union cpu0_irqstack __section(".data..init_irqstack") = { .thread_info = INIT_THREAD_INFO(init_task) }; @@ -63,17 +69,25 @@ static char host_info[(__NEW_UTS_LEN + 1) * 5]; static int show_cpuinfo(struct seq_file *m, void *v) { - int index = 0; + int i = 0; - seq_printf(m, "processor\t: %d\n", index); + seq_printf(m, "processor\t: %d\n", i); seq_printf(m, "vendor_id\t: User Mode Linux\n"); seq_printf(m, "model name\t: UML\n"); seq_printf(m, "mode\t\t: skas\n"); seq_printf(m, "host\t\t: %s\n", host_info); - seq_printf(m, "bogomips\t: %lu.%02lu\n\n", + seq_printf(m, "fpu\t\t: %s\n", cpu_has(&boot_cpu_data, X86_FEATURE_FPU) ? "yes" : "no"); + seq_printf(m, "flags\t\t:"); + for (i = 0; i < 32*NCAPINTS; i++) + if (cpu_has(&boot_cpu_data, i) && (x86_cap_flags[i] != NULL)) + seq_printf(m, " %s", x86_cap_flags[i]); + seq_printf(m, "\n"); + seq_printf(m, "cache_alignment\t: %d\n", boot_cpu_data.cache_alignment); + seq_printf(m, "bogomips\t: %lu.%02lu\n", loops_per_jiffy/(500000/HZ), (loops_per_jiffy/(5000/HZ)) % 100); + return 0; } @@ -262,6 +276,30 @@ EXPORT_SYMBOL(end_iomem); #define MIN_VMALLOC (32 * 1024 * 1024) +static void parse_host_cpu_flags(char *line) +{ + int i; + for (i = 0; i < 32*NCAPINTS; i++) { + if ((x86_cap_flags[i] != NULL) && strstr(line, x86_cap_flags[i])) + set_cpu_cap(&boot_cpu_data, i); + } +} +static void parse_cache_line(char *line) +{ + long res; + char *to_parse = strstr(line, ":"); + if (to_parse) { + to_parse++; + while (*to_parse != 0 && isspace(*to_parse)) { + to_parse++; + } + if (kstrtoul(to_parse, 10, &res) == 0 && is_power_of_2(res)) + boot_cpu_data.cache_alignment = res; + else + boot_cpu_data.cache_alignment = L1_CACHE_BYTES; + } +} + int __init linux_main(int argc, char **argv) { unsigned long avail, diff; @@ -298,6 +336,8 @@ int __init linux_main(int argc, char **argv) /* OS sanity checks that need to happen before the kernel runs */ os_early_checks(); + get_host_cpu_features(parse_host_cpu_flags, parse_cache_line); + brk_start = (unsigned long) sbrk(0); /* diff --git a/arch/um/os-Linux/helper.c b/arch/um/os-Linux/helper.c index 9fa6e4187d4f..32e88baf18dd 100644 --- a/arch/um/os-Linux/helper.c +++ b/arch/um/os-Linux/helper.c @@ -64,7 +64,7 @@ int run_helper(void (*pre_exec)(void *), void *pre_data, char **argv) goto out_close; } - sp = stack + UM_KERN_PAGE_SIZE - sizeof(void *); + sp = stack + UM_KERN_PAGE_SIZE; data.pre_exec = pre_exec; data.pre_data = pre_data; data.argv = argv; @@ -120,7 +120,7 @@ int run_helper_thread(int (*proc)(void *), void *arg, unsigned int flags, if (stack == 0) return -ENOMEM; - sp = stack + UM_KERN_PAGE_SIZE - sizeof(void *); + sp = stack + UM_KERN_PAGE_SIZE; pid = clone(proc, (void *) sp, flags, arg); if (pid < 0) { err = -errno; diff --git a/arch/um/os-Linux/signal.c b/arch/um/os-Linux/signal.c index 96f511d1aabe..6de99bb16113 100644 --- a/arch/um/os-Linux/signal.c +++ b/arch/um/os-Linux/signal.c @@ -18,6 +18,7 @@ #include <sysdep/mcontext.h> #include <um_malloc.h> #include <sys/ucontext.h> +#include <timetravel.h> void (*sig_info[NSIG])(int, struct siginfo *, struct uml_pt_regs *) = { [SIGTRAP] = relay_signal, @@ -62,17 +63,30 @@ static void sig_handler_common(int sig, struct siginfo *si, mcontext_t *mc) #define SIGALRM_BIT 1 #define SIGALRM_MASK (1 << SIGALRM_BIT) -static int signals_enabled; +int signals_enabled; +#ifdef UML_CONFIG_UML_TIME_TRAVEL_SUPPORT +static int signals_blocked; +#else +#define signals_blocked false +#endif static unsigned int signals_pending; static unsigned int signals_active = 0; void sig_handler(int sig, struct siginfo *si, mcontext_t *mc) { - int enabled; + int enabled = signals_enabled; - enabled = signals_enabled; - if (!enabled && (sig == SIGIO)) { - signals_pending |= SIGIO_MASK; + if ((signals_blocked || !enabled) && (sig == SIGIO)) { + /* + * In TT_MODE_EXTERNAL, need to still call time-travel + * handlers unless signals are also blocked for the + * external time message processing. This will mark + * signals_pending by itself (only if necessary.) + */ + if (!signals_blocked && time_travel_mode == TT_MODE_EXTERNAL) + sigio_run_timetravel_handlers(); + else + signals_pending |= SIGIO_MASK; return; } @@ -129,7 +143,7 @@ void set_sigstack(void *sig_stack, int size) stack_t stack = { .ss_flags = 0, .ss_sp = sig_stack, - .ss_size = size - sizeof(void *) + .ss_size = size }; if (sigaltstack(&stack, NULL) != 0) @@ -334,11 +348,6 @@ void unblock_signals(void) } } -int get_signals(void) -{ - return signals_enabled; -} - int set_signals(int enable) { int ret; @@ -368,6 +377,39 @@ int set_signals_trace(int enable) return ret; } +#ifdef UML_CONFIG_UML_TIME_TRAVEL_SUPPORT +void mark_sigio_pending(void) +{ + signals_pending |= SIGIO_MASK; +} + +void block_signals_hard(void) +{ + if (signals_blocked) + return; + signals_blocked = 1; + barrier(); +} + +void unblock_signals_hard(void) +{ + if (!signals_blocked) + return; + /* Must be set to 0 before we check the pending bits etc. */ + signals_blocked = 0; + barrier(); + + if (signals_pending && signals_enabled) { + /* this is a bit inefficient, but that's not really important */ + block_signals(); + unblock_signals(); + } else if (signals_pending & SIGIO_MASK) { + /* we need to run time-travel handlers even if not enabled */ + sigio_run_timetravel_handlers(); + } +} +#endif + int os_is_signal_stack(void) { stack_t ss; diff --git a/arch/um/os-Linux/skas/process.c b/arch/um/os-Linux/skas/process.c index fba674fac8b7..87d3129e7362 100644 --- a/arch/um/os-Linux/skas/process.c +++ b/arch/um/os-Linux/skas/process.c @@ -327,7 +327,7 @@ int start_userspace(unsigned long stub_stack) } /* set stack pointer to the end of the stack page, so it can grow downwards */ - sp = (unsigned long) stack + UM_KERN_PAGE_SIZE - sizeof(void *); + sp = (unsigned long)stack + UM_KERN_PAGE_SIZE; flags = CLONE_FILES | SIGCHLD; diff --git a/arch/um/os-Linux/start_up.c b/arch/um/os-Linux/start_up.c index f79dc338279e..8a72c99994eb 100644 --- a/arch/um/os-Linux/start_up.c +++ b/arch/um/os-Linux/start_up.c @@ -321,6 +321,38 @@ static void __init check_coredump_limit(void) os_info("%llu\n", (unsigned long long)lim.rlim_max); } +void __init get_host_cpu_features( + void (*flags_helper_func)(char *line), + void (*cache_helper_func)(char *line)) +{ + FILE *cpuinfo; + char *line = NULL; + size_t len = 0; + int done_parsing = 0; + + cpuinfo = fopen("/proc/cpuinfo", "r"); + if (cpuinfo == NULL) { + os_info("Failed to get host CPU features\n"); + } else { + while ((getline(&line, &len, cpuinfo)) != -1) { + if (strstr(line, "flags")) { + flags_helper_func(line); + done_parsing++; + } + if (strstr(line, "cache_alignment")) { + cache_helper_func(line); + done_parsing++; + } + free(line); + line = NULL; + if (done_parsing > 1) + break; + } + fclose(cpuinfo); + } +} + + void __init os_early_checks(void) { int pid; diff --git a/arch/x86/Makefile.um b/arch/x86/Makefile.um index 1db7913795f5..b3c1ae084180 100644 --- a/arch/x86/Makefile.um +++ b/arch/x86/Makefile.um @@ -44,7 +44,7 @@ ELF_FORMAT := elf64-x86-64 # Not on all 64-bit distros /lib is a symlink to /lib64. PLD is an example. -LINK-$(CONFIG_LD_SCRIPT_DYN) += -Wl,-rpath,/lib64 +LINK-$(CONFIG_LD_SCRIPT_DYN_RPATH) += -Wl,-rpath,/lib64 LINK-y += -m64 endif |