summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2024-09-27 09:55:30 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2024-09-27 09:55:30 -0700
commit653608c67ae3dce1c5dee8c620ce6016e174bbd1 (patch)
treeac055981ed8ed789284cd810c450595651d4311f
parente477dba5442c0af7acb9e8bbbbde1108a37ed39c (diff)
parent47ffe0578aee45fed3a06d5dcff76cdebb303163 (diff)
Merge tag 'for-linus-6.12-rc1a-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/xen/tip
Pull more xen updates from Juergen Gross: "A second round of Xen related changes and features: - a small fix of the xen-pciback driver for a warning issued by sparse - support PCI passthrough when using a PVH dom0 - enable loading the kernel in PVH mode at arbitrary addresses, avoiding conflicts with the memory map when running as a Xen dom0 using the host memory layout" * tag 'for-linus-6.12-rc1a-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/xen/tip: x86/pvh: Add 64bit relocation page tables x86/kernel: Move page table macros to header x86/pvh: Set phys_base when calling xen_prepare_pvh() x86/pvh: Make PVH entrypoint PIC for x86-64 xen: sync elfnote.h from xen tree xen/pciback: fix cast to restricted pci_ers_result_t and pci_power_t xen/privcmd: Add new syscall to get gsi from dev xen/pvh: Setup gsi for passthrough device xen/pci: Add a function to reset device for xen
-rw-r--r--arch/x86/include/asm/pgtable_64.h23
-rw-r--r--arch/x86/kernel/head_64.S20
-rw-r--r--arch/x86/platform/pvh/head.S161
-rw-r--r--arch/x86/xen/enlighten_pvh.c23
-rw-r--r--drivers/acpi/pci_irq.c2
-rw-r--r--drivers/xen/Kconfig1
-rw-r--r--drivers/xen/acpi.c50
-rw-r--r--drivers/xen/pci.c13
-rw-r--r--drivers/xen/privcmd.c32
-rw-r--r--drivers/xen/xen-pciback/conf_space_capability.c2
-rw-r--r--drivers/xen/xen-pciback/pci_stub.c78
-rw-r--r--include/linux/acpi.h1
-rw-r--r--include/uapi/xen/privcmd.h7
-rw-r--r--include/xen/acpi.h27
-rw-r--r--include/xen/interface/elfnote.h93
-rw-r--r--include/xen/interface/physdev.h17
-rw-r--r--include/xen/pci.h6
17 files changed, 509 insertions, 47 deletions
diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h
index 7e9db77231ac..d1426b64c1b9 100644
--- a/arch/x86/include/asm/pgtable_64.h
+++ b/arch/x86/include/asm/pgtable_64.h
@@ -270,5 +270,26 @@ static inline bool gup_fast_permitted(unsigned long start, unsigned long end)
#include <asm/pgtable-invert.h>
-#endif /* !__ASSEMBLY__ */
+#else /* __ASSEMBLY__ */
+
+#define l4_index(x) (((x) >> 39) & 511)
+#define pud_index(x) (((x) >> PUD_SHIFT) & (PTRS_PER_PUD - 1))
+
+L4_PAGE_OFFSET = l4_index(__PAGE_OFFSET_BASE_L4)
+L4_START_KERNEL = l4_index(__START_KERNEL_map)
+
+L3_START_KERNEL = pud_index(__START_KERNEL_map)
+
+#define SYM_DATA_START_PAGE_ALIGNED(name) \
+ SYM_START(name, SYM_L_GLOBAL, .balign PAGE_SIZE)
+
+/* Automate the creation of 1 to 1 mapping pmd entries */
+#define PMDS(START, PERM, COUNT) \
+ i = 0 ; \
+ .rept (COUNT) ; \
+ .quad (START) + (i << PMD_SHIFT) + (PERM) ; \
+ i = i + 1 ; \
+ .endr
+
+#endif /* __ASSEMBLY__ */
#endif /* _ASM_X86_PGTABLE_64_H */
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index 330922b328bf..16752b8dfa89 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -32,13 +32,6 @@
* We are not able to switch in one step to the final KERNEL ADDRESS SPACE
* because we need identity-mapped pages.
*/
-#define l4_index(x) (((x) >> 39) & 511)
-#define pud_index(x) (((x) >> PUD_SHIFT) & (PTRS_PER_PUD-1))
-
-L4_PAGE_OFFSET = l4_index(__PAGE_OFFSET_BASE_L4)
-L4_START_KERNEL = l4_index(__START_KERNEL_map)
-
-L3_START_KERNEL = pud_index(__START_KERNEL_map)
__HEAD
.code64
@@ -577,9 +570,6 @@ SYM_CODE_START_NOALIGN(vc_no_ghcb)
SYM_CODE_END(vc_no_ghcb)
#endif
-#define SYM_DATA_START_PAGE_ALIGNED(name) \
- SYM_START(name, SYM_L_GLOBAL, .balign PAGE_SIZE)
-
#ifdef CONFIG_MITIGATION_PAGE_TABLE_ISOLATION
/*
* Each PGD needs to be 8k long and 8k aligned. We do not
@@ -601,14 +591,6 @@ SYM_CODE_END(vc_no_ghcb)
#define PTI_USER_PGD_FILL 0
#endif
-/* Automate the creation of 1 to 1 mapping pmd entries */
-#define PMDS(START, PERM, COUNT) \
- i = 0 ; \
- .rept (COUNT) ; \
- .quad (START) + (i << PMD_SHIFT) + (PERM) ; \
- i = i + 1 ; \
- .endr
-
__INITDATA
.balign 4
@@ -708,8 +690,6 @@ SYM_DATA_START_PAGE_ALIGNED(level1_fixmap_pgt)
.endr
SYM_DATA_END(level1_fixmap_pgt)
-#undef PMDS
-
.data
.align 16
diff --git a/arch/x86/platform/pvh/head.S b/arch/x86/platform/pvh/head.S
index f7235ef87bc3..64fca49cd88f 100644
--- a/arch/x86/platform/pvh/head.S
+++ b/arch/x86/platform/pvh/head.S
@@ -7,6 +7,7 @@
.code32
.text
#define _pa(x) ((x) - __START_KERNEL_map)
+#define rva(x) ((x) - pvh_start_xen)
#include <linux/elfnote.h>
#include <linux/init.h>
@@ -15,6 +16,7 @@
#include <asm/segment.h>
#include <asm/asm.h>
#include <asm/boot.h>
+#include <asm/pgtable.h>
#include <asm/processor-flags.h>
#include <asm/msr.h>
#include <asm/nospec-branch.h>
@@ -54,7 +56,25 @@ SYM_CODE_START_LOCAL(pvh_start_xen)
UNWIND_HINT_END_OF_STACK
cld
- lgdt (_pa(gdt))
+ /*
+ * See the comment for startup_32 for more details. We need to
+ * execute a call to get the execution address to be position
+ * independent, but we don't have a stack. Save and restore the
+ * magic field of start_info in ebx, and use that as the stack.
+ */
+ mov (%ebx), %eax
+ leal 4(%ebx), %esp
+ ANNOTATE_INTRA_FUNCTION_CALL
+ call 1f
+1: popl %ebp
+ mov %eax, (%ebx)
+ subl $rva(1b), %ebp
+ movl $0, %esp
+
+ leal rva(gdt)(%ebp), %eax
+ leal rva(gdt_start)(%ebp), %ecx
+ movl %ecx, 2(%eax)
+ lgdt (%eax)
mov $PVH_DS_SEL,%eax
mov %eax,%ds
@@ -62,14 +82,14 @@ SYM_CODE_START_LOCAL(pvh_start_xen)
mov %eax,%ss
/* Stash hvm_start_info. */
- mov $_pa(pvh_start_info), %edi
+ leal rva(pvh_start_info)(%ebp), %edi
mov %ebx, %esi
- mov _pa(pvh_start_info_sz), %ecx
+ movl rva(pvh_start_info_sz)(%ebp), %ecx
shr $2,%ecx
rep
movsl
- mov $_pa(early_stack_end), %esp
+ leal rva(early_stack_end)(%ebp), %esp
/* Enable PAE mode. */
mov %cr4, %eax
@@ -83,31 +103,86 @@ SYM_CODE_START_LOCAL(pvh_start_xen)
btsl $_EFER_LME, %eax
wrmsr
+ mov %ebp, %ebx
+ subl $_pa(pvh_start_xen), %ebx /* offset */
+ jz .Lpagetable_done
+
+ /* Fixup page-tables for relocation. */
+ leal rva(pvh_init_top_pgt)(%ebp), %edi
+ movl $PTRS_PER_PGD, %ecx
+2:
+ testl $_PAGE_PRESENT, 0x00(%edi)
+ jz 1f
+ addl %ebx, 0x00(%edi)
+1:
+ addl $8, %edi
+ decl %ecx
+ jnz 2b
+
+ /* L3 ident has a single entry. */
+ leal rva(pvh_level3_ident_pgt)(%ebp), %edi
+ addl %ebx, 0x00(%edi)
+
+ leal rva(pvh_level3_kernel_pgt)(%ebp), %edi
+ addl %ebx, (PAGE_SIZE - 16)(%edi)
+ addl %ebx, (PAGE_SIZE - 8)(%edi)
+
+ /* pvh_level2_ident_pgt is fine - large pages */
+
+ /* pvh_level2_kernel_pgt needs adjustment - large pages */
+ leal rva(pvh_level2_kernel_pgt)(%ebp), %edi
+ movl $PTRS_PER_PMD, %ecx
+2:
+ testl $_PAGE_PRESENT, 0x00(%edi)
+ jz 1f
+ addl %ebx, 0x00(%edi)
+1:
+ addl $8, %edi
+ decl %ecx
+ jnz 2b
+
+.Lpagetable_done:
/* Enable pre-constructed page tables. */
- mov $_pa(init_top_pgt), %eax
+ leal rva(pvh_init_top_pgt)(%ebp), %eax
mov %eax, %cr3
mov $(X86_CR0_PG | X86_CR0_PE), %eax
mov %eax, %cr0
/* Jump to 64-bit mode. */
- ljmp $PVH_CS_SEL, $_pa(1f)
+ pushl $PVH_CS_SEL
+ leal rva(1f)(%ebp), %eax
+ pushl %eax
+ lretl
/* 64-bit entry point. */
.code64
1:
+ UNWIND_HINT_END_OF_STACK
+
/* Set base address in stack canary descriptor. */
mov $MSR_GS_BASE,%ecx
- mov $_pa(canary), %eax
+ leal canary(%rip), %eax
xor %edx, %edx
wrmsr
+ /*
+ * Calculate load offset and store in phys_base. __pa() needs
+ * phys_base set to calculate the hypercall page in xen_pvh_init().
+ */
+ movq %rbp, %rbx
+ subq $_pa(pvh_start_xen), %rbx
+ movq %rbx, phys_base(%rip)
call xen_prepare_pvh
+ /*
+ * Clear phys_base. __startup_64 will *add* to its value,
+ * so reset to 0.
+ */
+ xor %rbx, %rbx
+ movq %rbx, phys_base(%rip)
/* startup_64 expects boot_params in %rsi. */
- mov $_pa(pvh_bootparams), %rsi
- mov $_pa(startup_64), %rax
- ANNOTATE_RETPOLINE_SAFE
- jmp *%rax
+ lea pvh_bootparams(%rip), %rsi
+ jmp startup_64
#else /* CONFIG_X86_64 */
@@ -143,7 +218,7 @@ SYM_CODE_END(pvh_start_xen)
.balign 8
SYM_DATA_START_LOCAL(gdt)
.word gdt_end - gdt_start
- .long _pa(gdt_start)
+ .long _pa(gdt_start) /* x86-64 will overwrite if relocated. */
.word 0
SYM_DATA_END(gdt)
SYM_DATA_START_LOCAL(gdt_start)
@@ -163,5 +238,67 @@ SYM_DATA_START_LOCAL(early_stack)
.fill BOOT_STACK_SIZE, 1, 0
SYM_DATA_END_LABEL(early_stack, SYM_L_LOCAL, early_stack_end)
+#ifdef CONFIG_X86_64
+/*
+ * Xen PVH needs a set of identity mapped and kernel high mapping
+ * page tables. pvh_start_xen starts running on the identity mapped
+ * page tables, but xen_prepare_pvh calls into the high mapping.
+ * These page tables need to be relocatable and are only used until
+ * startup_64 transitions to init_top_pgt.
+ */
+SYM_DATA_START_PAGE_ALIGNED(pvh_init_top_pgt)
+ .quad pvh_level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE_NOENC
+ .org pvh_init_top_pgt + L4_PAGE_OFFSET * 8, 0
+ .quad pvh_level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE_NOENC
+ .org pvh_init_top_pgt + L4_START_KERNEL * 8, 0
+ /* (2^48-(2*1024*1024*1024))/(2^39) = 511 */
+ .quad pvh_level3_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE_NOENC
+SYM_DATA_END(pvh_init_top_pgt)
+
+SYM_DATA_START_PAGE_ALIGNED(pvh_level3_ident_pgt)
+ .quad pvh_level2_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE_NOENC
+ .fill 511, 8, 0
+SYM_DATA_END(pvh_level3_ident_pgt)
+SYM_DATA_START_PAGE_ALIGNED(pvh_level2_ident_pgt)
+ /*
+ * Since I easily can, map the first 1G.
+ * Don't set NX because code runs from these pages.
+ *
+ * Note: This sets _PAGE_GLOBAL despite whether
+ * the CPU supports it or it is enabled. But,
+ * the CPU should ignore the bit.
+ */
+ PMDS(0, __PAGE_KERNEL_IDENT_LARGE_EXEC, PTRS_PER_PMD)
+SYM_DATA_END(pvh_level2_ident_pgt)
+SYM_DATA_START_PAGE_ALIGNED(pvh_level3_kernel_pgt)
+ .fill L3_START_KERNEL, 8, 0
+ /* (2^48-(2*1024*1024*1024)-((2^39)*511))/(2^30) = 510 */
+ .quad pvh_level2_kernel_pgt - __START_KERNEL_map + _KERNPG_TABLE_NOENC
+ .quad 0 /* no fixmap */
+SYM_DATA_END(pvh_level3_kernel_pgt)
+
+SYM_DATA_START_PAGE_ALIGNED(pvh_level2_kernel_pgt)
+ /*
+ * Kernel high mapping.
+ *
+ * The kernel code+data+bss must be located below KERNEL_IMAGE_SIZE in
+ * virtual address space, which is 1 GiB if RANDOMIZE_BASE is enabled,
+ * 512 MiB otherwise.
+ *
+ * (NOTE: after that starts the module area, see MODULES_VADDR.)
+ *
+ * This table is eventually used by the kernel during normal runtime.
+ * Care must be taken to clear out undesired bits later, like _PAGE_RW
+ * or _PAGE_GLOBAL in some cases.
+ */
+ PMDS(0, __PAGE_KERNEL_LARGE_EXEC, KERNEL_IMAGE_SIZE / PMD_SIZE)
+SYM_DATA_END(pvh_level2_kernel_pgt)
+
+ ELFNOTE(Xen, XEN_ELFNOTE_PHYS32_RELOC,
+ .long CONFIG_PHYSICAL_ALIGN;
+ .long LOAD_PHYSICAL_ADDR;
+ .long KERNEL_IMAGE_SIZE - 1)
+#endif
+
ELFNOTE(Xen, XEN_ELFNOTE_PHYS32_ENTRY,
_ASM_PTR (pvh_start_xen - __START_KERNEL_map))
diff --git a/arch/x86/xen/enlighten_pvh.c b/arch/x86/xen/enlighten_pvh.c
index 728a4366ca85..bf68c329fc01 100644
--- a/arch/x86/xen/enlighten_pvh.c
+++ b/arch/x86/xen/enlighten_pvh.c
@@ -4,6 +4,7 @@
#include <linux/mm.h>
#include <xen/hvc-console.h>
+#include <xen/acpi.h>
#include <asm/bootparam.h>
#include <asm/io_apic.h>
@@ -28,6 +29,28 @@
bool __ro_after_init xen_pvh;
EXPORT_SYMBOL_GPL(xen_pvh);
+#ifdef CONFIG_XEN_DOM0
+int xen_pvh_setup_gsi(int gsi, int trigger, int polarity)
+{
+ int ret;
+ struct physdev_setup_gsi setup_gsi;
+
+ setup_gsi.gsi = gsi;
+ setup_gsi.triggering = (trigger == ACPI_EDGE_SENSITIVE ? 0 : 1);
+ setup_gsi.polarity = (polarity == ACPI_ACTIVE_HIGH ? 0 : 1);
+
+ ret = HYPERVISOR_physdev_op(PHYSDEVOP_setup_gsi, &setup_gsi);
+ if (ret == -EEXIST) {
+ xen_raw_printk("Already setup the GSI :%d\n", gsi);
+ ret = 0;
+ } else if (ret)
+ xen_raw_printk("Fail to setup GSI (%d)!\n", gsi);
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(xen_pvh_setup_gsi);
+#endif
+
/*
* Reserve e820 UNUSABLE regions to inflate the memory balloon.
*
diff --git a/drivers/acpi/pci_irq.c b/drivers/acpi/pci_irq.c
index ff30ceca2203..630fe0a34bc6 100644
--- a/drivers/acpi/pci_irq.c
+++ b/drivers/acpi/pci_irq.c
@@ -288,7 +288,7 @@ static int acpi_reroute_boot_interrupt(struct pci_dev *dev,
}
#endif /* CONFIG_X86_IO_APIC */
-static struct acpi_prt_entry *acpi_pci_irq_lookup(struct pci_dev *dev, int pin)
+struct acpi_prt_entry *acpi_pci_irq_lookup(struct pci_dev *dev, int pin)
{
struct acpi_prt_entry *entry = NULL;
struct pci_dev *bridge;
diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig
index f7d6f47971fd..62035fe16bb8 100644
--- a/drivers/xen/Kconfig
+++ b/drivers/xen/Kconfig
@@ -261,6 +261,7 @@ config XEN_SCSI_BACKEND
config XEN_PRIVCMD
tristate "Xen hypercall passthrough driver"
depends on XEN
+ imply CONFIG_XEN_PCIDEV_BACKEND
default m
help
The hypercall passthrough driver allows privileged user programs to
diff --git a/drivers/xen/acpi.c b/drivers/xen/acpi.c
index 6893c79fd2a1..9e2096524fbc 100644
--- a/drivers/xen/acpi.c
+++ b/drivers/xen/acpi.c
@@ -30,6 +30,7 @@
* IN THE SOFTWARE.
*/
+#include <linux/pci.h>
#include <xen/acpi.h>
#include <xen/interface/platform.h>
#include <asm/xen/hypercall.h>
@@ -75,3 +76,52 @@ int xen_acpi_notify_hypervisor_extended_sleep(u8 sleep_state,
return xen_acpi_notify_hypervisor_state(sleep_state, val_a,
val_b, true);
}
+
+struct acpi_prt_entry {
+ struct acpi_pci_id id;
+ u8 pin;
+ acpi_handle link;
+ u32 index;
+};
+
+int xen_acpi_get_gsi_info(struct pci_dev *dev,
+ int *gsi_out,
+ int *trigger_out,
+ int *polarity_out)
+{
+ int gsi;
+ u8 pin;
+ struct acpi_prt_entry *entry;
+ int trigger = ACPI_LEVEL_SENSITIVE;
+ int polarity = acpi_irq_model == ACPI_IRQ_MODEL_GIC ?
+ ACPI_ACTIVE_HIGH : ACPI_ACTIVE_LOW;
+
+ if (!dev || !gsi_out || !trigger_out || !polarity_out)
+ return -EINVAL;
+
+ pin = dev->pin;
+ if (!pin)
+ return -EINVAL;
+
+ entry = acpi_pci_irq_lookup(dev, pin);
+ if (entry) {
+ if (entry->link)
+ gsi = acpi_pci_link_allocate_irq(entry->link,
+ entry->index,
+ &trigger, &polarity,
+ NULL);
+ else
+ gsi = entry->index;
+ } else
+ gsi = -1;
+
+ if (gsi < 0)
+ return -EINVAL;
+
+ *gsi_out = gsi;
+ *trigger_out = trigger;
+ *polarity_out = polarity;
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(xen_acpi_get_gsi_info);
diff --git a/drivers/xen/pci.c b/drivers/xen/pci.c
index a2facd8f7e51..416f231809cb 100644
--- a/drivers/xen/pci.c
+++ b/drivers/xen/pci.c
@@ -173,6 +173,19 @@ static int xen_remove_device(struct device *dev)
return r;
}
+int xen_reset_device(const struct pci_dev *dev)
+{
+ struct pci_device_reset device = {
+ .dev.seg = pci_domain_nr(dev->bus),
+ .dev.bus = dev->bus->number,
+ .dev.devfn = dev->devfn,
+ .flags = PCI_DEVICE_RESET_FLR,
+ };
+
+ return HYPERVISOR_physdev_op(PHYSDEVOP_pci_device_reset, &device);
+}
+EXPORT_SYMBOL_GPL(xen_reset_device);
+
static int xen_pci_notifier(struct notifier_block *nb,
unsigned long action, void *data)
{
diff --git a/drivers/xen/privcmd.c b/drivers/xen/privcmd.c
index 54e4f285c0f4..3273cb8c2a66 100644
--- a/drivers/xen/privcmd.c
+++ b/drivers/xen/privcmd.c
@@ -46,6 +46,9 @@
#include <xen/page.h>
#include <xen/xen-ops.h>
#include <xen/balloon.h>
+#ifdef CONFIG_XEN_ACPI
+#include <xen/acpi.h>
+#endif
#include "privcmd.h"
@@ -844,6 +847,31 @@ out:
return rc;
}
+static long privcmd_ioctl_pcidev_get_gsi(struct file *file, void __user *udata)
+{
+#if defined(CONFIG_XEN_ACPI)
+ int rc = -EINVAL;
+ struct privcmd_pcidev_get_gsi kdata;
+
+ if (copy_from_user(&kdata, udata, sizeof(kdata)))
+ return -EFAULT;
+
+ if (IS_REACHABLE(CONFIG_XEN_PCIDEV_BACKEND))
+ rc = pcistub_get_gsi_from_sbdf(kdata.sbdf);
+
+ if (rc < 0)
+ return rc;
+
+ kdata.gsi = rc;
+ if (copy_to_user(udata, &kdata, sizeof(kdata)))
+ return -EFAULT;
+
+ return 0;
+#else
+ return -EINVAL;
+#endif
+}
+
#ifdef CONFIG_XEN_PRIVCMD_EVENTFD
/* Irqfd support */
static struct workqueue_struct *irqfd_cleanup_wq;
@@ -1543,6 +1571,10 @@ static long privcmd_ioctl(struct file *file,
ret = privcmd_ioctl_ioeventfd(file, udata);
break;
+ case IOCTL_PRIVCMD_PCIDEV_GET_GSI:
+ ret = privcmd_ioctl_pcidev_get_gsi(file, udata);
+ break;
+
default:
break;
}
diff --git a/drivers/xen/xen-pciback/conf_space_capability.c b/drivers/xen/xen-pciback/conf_space_capability.c
index 1948a9700c8f..cf568e899ee2 100644
--- a/drivers/xen/xen-pciback/conf_space_capability.c
+++ b/drivers/xen/xen-pciback/conf_space_capability.c
@@ -122,7 +122,7 @@ static int pm_ctrl_write(struct pci_dev *dev, int offset, u16 new_value,
if (err)
goto out;
- new_state = (pci_power_t)(new_value & PCI_PM_CTRL_STATE_MASK);
+ new_state = (__force pci_power_t)(new_value & PCI_PM_CTRL_STATE_MASK);
new_value &= PM_OK_BITS;
if ((old_value & PM_OK_BITS) != new_value) {
diff --git a/drivers/xen/xen-pciback/pci_stub.c b/drivers/xen/xen-pciback/pci_stub.c
index 4faebbb84999..2f3da5ac62cd 100644
--- a/drivers/xen/xen-pciback/pci_stub.c
+++ b/drivers/xen/xen-pciback/pci_stub.c
@@ -21,6 +21,9 @@
#include <xen/events.h>
#include <xen/pci.h>
#include <xen/xen.h>
+#ifdef CONFIG_XEN_ACPI
+#include <xen/acpi.h>
+#endif
#include <asm/xen/hypervisor.h>
#include <xen/interface/physdev.h>
#include "pciback.h"
@@ -53,6 +56,9 @@ struct pcistub_device {
struct pci_dev *dev;
struct xen_pcibk_device *pdev;/* non-NULL if struct pci_dev is in use */
+#ifdef CONFIG_XEN_ACPI
+ int gsi;
+#endif
};
/* Access to pcistub_devices & seized_devices lists and the initialize_devices
@@ -85,10 +91,23 @@ static struct pcistub_device *pcistub_device_alloc(struct pci_dev *dev)
kref_init(&psdev->kref);
spin_lock_init(&psdev->lock);
+#ifdef CONFIG_XEN_ACPI
+ psdev->gsi = -1;
+#endif
return psdev;
}
+static int pcistub_reset_device_state(struct pci_dev *dev)
+{
+ __pci_reset_function_locked(dev);
+
+ if (!xen_pv_domain())
+ return xen_reset_device(dev);
+ else
+ return 0;
+}
+
/* Don't call this directly as it's called by pcistub_device_put */
static void pcistub_device_release(struct kref *kref)
{
@@ -107,7 +126,7 @@ static void pcistub_device_release(struct kref *kref)
/* Call the reset function which does not take lock as this
* is called from "unbind" which takes a device_lock mutex.
*/
- __pci_reset_function_locked(dev);
+ pcistub_reset_device_state(dev);
if (dev_data &&
pci_load_and_free_saved_state(dev, &dev_data->pci_saved_state))
dev_info(&dev->dev, "Could not reload PCI state\n");
@@ -207,6 +226,25 @@ static struct pci_dev *pcistub_device_get_pci_dev(struct xen_pcibk_device *pdev,
return pci_dev;
}
+#ifdef CONFIG_XEN_ACPI
+int pcistub_get_gsi_from_sbdf(unsigned int sbdf)
+{
+ struct pcistub_device *psdev;
+ int domain = (sbdf >> 16) & 0xffff;
+ int bus = PCI_BUS_NUM(sbdf);
+ int slot = PCI_SLOT(sbdf);
+ int func = PCI_FUNC(sbdf);
+
+ psdev = pcistub_device_find(domain, bus, slot, func);
+
+ if (!psdev)
+ return -ENODEV;
+
+ return psdev->gsi;
+}
+EXPORT_SYMBOL_GPL(pcistub_get_gsi_from_sbdf);
+#endif
+
struct pci_dev *pcistub_get_pci_dev_by_slot(struct xen_pcibk_device *pdev,
int domain, int bus,
int slot, int func)
@@ -284,7 +322,7 @@ void pcistub_put_pci_dev(struct pci_dev *dev)
* (so it's ready for the next domain)
*/
device_lock_assert(&dev->dev);
- __pci_reset_function_locked(dev);
+ pcistub_reset_device_state(dev);
dev_data = pci_get_drvdata(dev);
ret = pci_load_saved_state(dev, dev_data->pci_saved_state);
@@ -354,11 +392,20 @@ static int pcistub_match(struct pci_dev *dev)
return found;
}
-static int pcistub_init_device(struct pci_dev *dev)
+static int pcistub_init_device(struct pcistub_device *psdev)
{
struct xen_pcibk_dev_data *dev_data;
+ struct pci_dev *dev;
+#ifdef CONFIG_XEN_ACPI
+ int gsi, trigger, polarity;
+#endif
int err = 0;
+ if (!psdev)
+ return -EINVAL;
+
+ dev = psdev->dev;
+
dev_dbg(&dev->dev, "initializing...\n");
/* The PCI backend is not intended to be a module (or to work with
@@ -420,9 +467,26 @@ static int pcistub_init_device(struct pci_dev *dev)
dev_err(&dev->dev, "Could not store PCI conf saved state!\n");
else {
dev_dbg(&dev->dev, "resetting (FLR, D3, etc) the device\n");
- __pci_reset_function_locked(dev);
+ err = pcistub_reset_device_state(dev);
+ if (err)
+ goto config_release;
pci_restore_state(dev);
}
+
+#ifdef CONFIG_XEN_ACPI
+ if (xen_initial_domain() && xen_pvh_domain()) {
+ err = xen_acpi_get_gsi_info(dev, &gsi, &trigger, &polarity);
+ if (err) {
+ dev_err(&dev->dev, "Fail to get gsi info!\n");
+ goto config_release;
+ }
+ err = xen_pvh_setup_gsi(gsi, trigger, polarity);
+ if (err)
+ goto config_release;
+ psdev->gsi = gsi;
+ }
+#endif
+
/* Now disable the device (this also ensures some private device
* data is setup before we export)
*/
@@ -462,7 +526,7 @@ static int __init pcistub_init_devices_late(void)
spin_unlock_irqrestore(&pcistub_devices_lock, flags);
- err = pcistub_init_device(psdev->dev);
+ err = pcistub_init_device(psdev);
if (err) {
dev_err(&psdev->dev->dev,
"error %d initializing device\n", err);
@@ -532,7 +596,7 @@ static int pcistub_seize(struct pci_dev *dev,
spin_unlock_irqrestore(&pcistub_devices_lock, flags);
/* don't want irqs disabled when calling pcistub_init_device */
- err = pcistub_init_device(psdev->dev);
+ err = pcistub_init_device(psdev);
spin_lock_irqsave(&pcistub_devices_lock, flags);
@@ -757,7 +821,7 @@ static pci_ers_result_t common_process(struct pcistub_device *psdev,
}
clear_bit(_PCIB_op_pending, (unsigned long *)&pdev->flags);
- res = (pci_ers_result_t)aer_op->err;
+ res = (__force pci_ers_result_t)aer_op->err;
return res;
}
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index 1655c4c23a78..4d5ee84c468b 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -363,6 +363,7 @@ void acpi_unregister_gsi (u32 gsi);
struct pci_dev;
+struct acpi_prt_entry *acpi_pci_irq_lookup(struct pci_dev *dev, int pin);
int acpi_pci_irq_enable (struct pci_dev *dev);
void acpi_penalize_isa_irq(int irq, int active);
bool acpi_isa_irq_available(int irq);
diff --git a/include/uapi/xen/privcmd.h b/include/uapi/xen/privcmd.h
index 8b8c5d1420fe..8e2c8fd44764 100644
--- a/include/uapi/xen/privcmd.h
+++ b/include/uapi/xen/privcmd.h
@@ -126,6 +126,11 @@ struct privcmd_ioeventfd {
__u8 pad[2];
};
+struct privcmd_pcidev_get_gsi {
+ __u32 sbdf;
+ __u32 gsi;
+};
+
/*
* @cmd: IOCTL_PRIVCMD_HYPERCALL
* @arg: &privcmd_hypercall_t
@@ -157,5 +162,7 @@ struct privcmd_ioeventfd {
_IOW('P', 8, struct privcmd_irqfd)
#define IOCTL_PRIVCMD_IOEVENTFD \
_IOW('P', 9, struct privcmd_ioeventfd)
+#define IOCTL_PRIVCMD_PCIDEV_GET_GSI \
+ _IOC(_IOC_NONE, 'P', 10, sizeof(struct privcmd_pcidev_get_gsi))
#endif /* __LINUX_PUBLIC_PRIVCMD_H__ */
diff --git a/include/xen/acpi.h b/include/xen/acpi.h
index b1e11863144d..daa96a22d257 100644
--- a/include/xen/acpi.h
+++ b/include/xen/acpi.h
@@ -67,10 +67,37 @@ static inline void xen_acpi_sleep_register(void)
acpi_suspend_lowlevel = xen_acpi_suspend_lowlevel;
}
}
+int xen_pvh_setup_gsi(int gsi, int trigger, int polarity);
+int xen_acpi_get_gsi_info(struct pci_dev *dev,
+ int *gsi_out,
+ int *trigger_out,
+ int *polarity_out);
#else
static inline void xen_acpi_sleep_register(void)
{
}
+
+static inline int xen_pvh_setup_gsi(int gsi, int trigger, int polarity)
+{
+ return -1;
+}
+
+static inline int xen_acpi_get_gsi_info(struct pci_dev *dev,
+ int *gsi_out,
+ int *trigger_out,
+ int *polarity_out)
+{
+ return -1;
+}
+#endif
+
+#ifdef CONFIG_XEN_PCI_STUB
+int pcistub_get_gsi_from_sbdf(unsigned int sbdf);
+#else
+static inline int pcistub_get_gsi_from_sbdf(unsigned int sbdf)
+{
+ return -1;
+}
#endif
#endif /* _XEN_ACPI_H */
diff --git a/include/xen/interface/elfnote.h b/include/xen/interface/elfnote.h
index 38deb1214613..918f47d87d7a 100644
--- a/include/xen/interface/elfnote.h
+++ b/include/xen/interface/elfnote.h
@@ -11,7 +11,9 @@
#define __XEN_PUBLIC_ELFNOTE_H__
/*
- * The notes should live in a SHT_NOTE segment and have "Xen" in the
+ * `incontents 200 elfnotes ELF notes
+ *
+ * The notes should live in a PT_NOTE segment and have "Xen" in the
* name field.
*
* Numeric types are either 4 or 8 bytes depending on the content of
@@ -22,6 +24,8 @@
*
* String values (for non-legacy) are NULL terminated ASCII, also known
* as ASCIZ type.
+ *
+ * Xen only uses ELF Notes contained in x86 binaries.
*/
/*
@@ -52,7 +56,7 @@
#define XEN_ELFNOTE_VIRT_BASE 3
/*
- * The offset of the ELF paddr field from the acutal required
+ * The offset of the ELF paddr field from the actual required
* pseudo-physical address (numeric).
*
* This is used to maintain backwards compatibility with older kernels
@@ -92,7 +96,12 @@
#define XEN_ELFNOTE_LOADER 8
/*
- * The kernel supports PAE (x86/32 only, string = "yes" or "no").
+ * The kernel supports PAE (x86/32 only, string = "yes", "no" or
+ * "bimodal").
+ *
+ * For compatibility with Xen 3.0.3 and earlier the "bimodal" setting
+ * may be given as "yes,bimodal" which will cause older Xen to treat
+ * this kernel as PAE.
*
* LEGACY: PAE (n.b. The legacy interface included a provision to
* indicate 'extended-cr3' support allowing L3 page tables to be
@@ -149,7 +158,9 @@
* The (non-default) location the initial phys-to-machine map should be
* placed at by the hypervisor (Dom0) or the tools (DomU).
* The kernel must be prepared for this mapping to be established using
- * large pages, despite such otherwise not being available to guests.
+ * large pages, despite such otherwise not being available to guests. Note
+ * that these large pages may be misaligned in PFN space (they'll obviously
+ * be aligned in MFN and virtual address spaces).
* The kernel must also be able to handle the page table pages used for
* this mapping not being accessible through the initial mapping.
* (Only x86-64 supports this at present.)
@@ -186,8 +197,80 @@
#define XEN_ELFNOTE_PHYS32_ENTRY 18
/*
+ * Physical loading constraints for PVH kernels
+ *
+ * The presence of this note indicates the kernel supports relocating itself.
+ *
+ * The note may include up to three 32bit values to place constraints on the
+ * guest physical loading addresses and alignment for a PVH kernel. Values
+ * are read in the following order:
+ * - a required start alignment (default 0x200000)
+ * - a minimum address for the start of the image (default 0; see below)
+ * - a maximum address for the last byte of the image (default 0xffffffff)
+ *
+ * When this note specifies an alignment value, it is used. Otherwise the
+ * maximum p_align value from loadable ELF Program Headers is used, if it is
+ * greater than or equal to 4k (0x1000). Otherwise, the default is used.
+ */
+#define XEN_ELFNOTE_PHYS32_RELOC 19
+
+/*
* The number of the highest elfnote defined.
*/
-#define XEN_ELFNOTE_MAX XEN_ELFNOTE_PHYS32_ENTRY
+#define XEN_ELFNOTE_MAX XEN_ELFNOTE_PHYS32_RELOC
+
+/*
+ * System information exported through crash notes.
+ *
+ * The kexec / kdump code will create one XEN_ELFNOTE_CRASH_INFO
+ * note in case of a system crash. This note will contain various
+ * information about the system, see xen/include/xen/elfcore.h.
+ */
+#define XEN_ELFNOTE_CRASH_INFO 0x1000001
+
+/*
+ * System registers exported through crash notes.
+ *
+ * The kexec / kdump code will create one XEN_ELFNOTE_CRASH_REGS
+ * note per cpu in case of a system crash. This note is architecture
+ * specific and will contain registers not saved in the "CORE" note.
+ * See xen/include/xen/elfcore.h for more information.
+ */
+#define XEN_ELFNOTE_CRASH_REGS 0x1000002
+
+
+/*
+ * xen dump-core none note.
+ * xm dump-core code will create one XEN_ELFNOTE_DUMPCORE_NONE
+ * in its dump file to indicate that the file is xen dump-core
+ * file. This note doesn't have any other information.
+ * See tools/libxc/xc_core.h for more information.
+ */
+#define XEN_ELFNOTE_DUMPCORE_NONE 0x2000000
+
+/*
+ * xen dump-core header note.
+ * xm dump-core code will create one XEN_ELFNOTE_DUMPCORE_HEADER
+ * in its dump file.
+ * See tools/libxc/xc_core.h for more information.
+ */
+#define XEN_ELFNOTE_DUMPCORE_HEADER 0x2000001
+
+/*
+ * xen dump-core xen version note.
+ * xm dump-core code will create one XEN_ELFNOTE_DUMPCORE_XEN_VERSION
+ * in its dump file. It contains the xen version obtained via the
+ * XENVER hypercall.
+ * See tools/libxc/xc_core.h for more information.
+ */
+#define XEN_ELFNOTE_DUMPCORE_XEN_VERSION 0x2000002
+
+/*
+ * xen dump-core format version note.
+ * xm dump-core code will create one XEN_ELFNOTE_DUMPCORE_FORMAT_VERSION
+ * in its dump file. It contains a format version identifier.
+ * See tools/libxc/xc_core.h for more information.
+ */
+#define XEN_ELFNOTE_DUMPCORE_FORMAT_VERSION 0x2000003
#endif /* __XEN_PUBLIC_ELFNOTE_H__ */
diff --git a/include/xen/interface/physdev.h b/include/xen/interface/physdev.h
index a237af867873..df74e65a884b 100644
--- a/include/xen/interface/physdev.h
+++ b/include/xen/interface/physdev.h
@@ -256,6 +256,13 @@ struct physdev_pci_device_add {
*/
#define PHYSDEVOP_prepare_msix 30
#define PHYSDEVOP_release_msix 31
+/*
+ * Notify the hypervisor that a PCI device has been reset, so that any
+ * internally cached state is regenerated. Should be called after any
+ * device reset performed by the hardware domain.
+ */
+#define PHYSDEVOP_pci_device_reset 32
+
struct physdev_pci_device {
/* IN */
uint16_t seg;
@@ -263,6 +270,16 @@ struct physdev_pci_device {
uint8_t devfn;
};
+struct pci_device_reset {
+ struct physdev_pci_device dev;
+#define PCI_DEVICE_RESET_COLD 0x0
+#define PCI_DEVICE_RESET_WARM 0x1
+#define PCI_DEVICE_RESET_HOT 0x2
+#define PCI_DEVICE_RESET_FLR 0x3
+#define PCI_DEVICE_RESET_MASK 0x3
+ uint32_t flags;
+};
+
#define PHYSDEVOP_DBGP_RESET_PREPARE 1
#define PHYSDEVOP_DBGP_RESET_DONE 2
diff --git a/include/xen/pci.h b/include/xen/pci.h
index b8337cf85fd1..424b8ea89ca8 100644
--- a/include/xen/pci.h
+++ b/include/xen/pci.h
@@ -4,10 +4,16 @@
#define __XEN_PCI_H__
#if defined(CONFIG_XEN_DOM0)
+int xen_reset_device(const struct pci_dev *dev);
int xen_find_device_domain_owner(struct pci_dev *dev);
int xen_register_device_domain_owner(struct pci_dev *dev, uint16_t domain);
int xen_unregister_device_domain_owner(struct pci_dev *dev);
#else
+static inline int xen_reset_device(const struct pci_dev *dev)
+{
+ return -1;
+}
+
static inline int xen_find_device_domain_owner(struct pci_dev *dev)
{
return -1;