diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2024-09-27 09:55:30 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2024-09-27 09:55:30 -0700 |
commit | 653608c67ae3dce1c5dee8c620ce6016e174bbd1 (patch) | |
tree | ac055981ed8ed789284cd810c450595651d4311f /arch | |
parent | e477dba5442c0af7acb9e8bbbbde1108a37ed39c (diff) | |
parent | 47ffe0578aee45fed3a06d5dcff76cdebb303163 (diff) |
Merge tag 'for-linus-6.12-rc1a-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/xen/tip
Pull more xen updates from Juergen Gross:
"A second round of Xen related changes and features:
- a small fix of the xen-pciback driver for a warning issued by
sparse
- support PCI passthrough when using a PVH dom0
- enable loading the kernel in PVH mode at arbitrary addresses,
avoiding conflicts with the memory map when running as a Xen dom0
using the host memory layout"
* tag 'for-linus-6.12-rc1a-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/xen/tip:
x86/pvh: Add 64bit relocation page tables
x86/kernel: Move page table macros to header
x86/pvh: Set phys_base when calling xen_prepare_pvh()
x86/pvh: Make PVH entrypoint PIC for x86-64
xen: sync elfnote.h from xen tree
xen/pciback: fix cast to restricted pci_ers_result_t and pci_power_t
xen/privcmd: Add new syscall to get gsi from dev
xen/pvh: Setup gsi for passthrough device
xen/pci: Add a function to reset device for xen
Diffstat (limited to 'arch')
-rw-r--r-- | arch/x86/include/asm/pgtable_64.h | 23 | ||||
-rw-r--r-- | arch/x86/kernel/head_64.S | 20 | ||||
-rw-r--r-- | arch/x86/platform/pvh/head.S | 161 | ||||
-rw-r--r-- | arch/x86/xen/enlighten_pvh.c | 23 |
4 files changed, 194 insertions, 33 deletions
diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h index 7e9db77231ac..d1426b64c1b9 100644 --- a/arch/x86/include/asm/pgtable_64.h +++ b/arch/x86/include/asm/pgtable_64.h @@ -270,5 +270,26 @@ static inline bool gup_fast_permitted(unsigned long start, unsigned long end) #include <asm/pgtable-invert.h> -#endif /* !__ASSEMBLY__ */ +#else /* __ASSEMBLY__ */ + +#define l4_index(x) (((x) >> 39) & 511) +#define pud_index(x) (((x) >> PUD_SHIFT) & (PTRS_PER_PUD - 1)) + +L4_PAGE_OFFSET = l4_index(__PAGE_OFFSET_BASE_L4) +L4_START_KERNEL = l4_index(__START_KERNEL_map) + +L3_START_KERNEL = pud_index(__START_KERNEL_map) + +#define SYM_DATA_START_PAGE_ALIGNED(name) \ + SYM_START(name, SYM_L_GLOBAL, .balign PAGE_SIZE) + +/* Automate the creation of 1 to 1 mapping pmd entries */ +#define PMDS(START, PERM, COUNT) \ + i = 0 ; \ + .rept (COUNT) ; \ + .quad (START) + (i << PMD_SHIFT) + (PERM) ; \ + i = i + 1 ; \ + .endr + +#endif /* __ASSEMBLY__ */ #endif /* _ASM_X86_PGTABLE_64_H */ diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index 330922b328bf..16752b8dfa89 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S @@ -32,13 +32,6 @@ * We are not able to switch in one step to the final KERNEL ADDRESS SPACE * because we need identity-mapped pages. */ -#define l4_index(x) (((x) >> 39) & 511) -#define pud_index(x) (((x) >> PUD_SHIFT) & (PTRS_PER_PUD-1)) - -L4_PAGE_OFFSET = l4_index(__PAGE_OFFSET_BASE_L4) -L4_START_KERNEL = l4_index(__START_KERNEL_map) - -L3_START_KERNEL = pud_index(__START_KERNEL_map) __HEAD .code64 @@ -577,9 +570,6 @@ SYM_CODE_START_NOALIGN(vc_no_ghcb) SYM_CODE_END(vc_no_ghcb) #endif -#define SYM_DATA_START_PAGE_ALIGNED(name) \ - SYM_START(name, SYM_L_GLOBAL, .balign PAGE_SIZE) - #ifdef CONFIG_MITIGATION_PAGE_TABLE_ISOLATION /* * Each PGD needs to be 8k long and 8k aligned. We do not @@ -601,14 +591,6 @@ SYM_CODE_END(vc_no_ghcb) #define PTI_USER_PGD_FILL 0 #endif -/* Automate the creation of 1 to 1 mapping pmd entries */ -#define PMDS(START, PERM, COUNT) \ - i = 0 ; \ - .rept (COUNT) ; \ - .quad (START) + (i << PMD_SHIFT) + (PERM) ; \ - i = i + 1 ; \ - .endr - __INITDATA .balign 4 @@ -708,8 +690,6 @@ SYM_DATA_START_PAGE_ALIGNED(level1_fixmap_pgt) .endr SYM_DATA_END(level1_fixmap_pgt) -#undef PMDS - .data .align 16 diff --git a/arch/x86/platform/pvh/head.S b/arch/x86/platform/pvh/head.S index f7235ef87bc3..64fca49cd88f 100644 --- a/arch/x86/platform/pvh/head.S +++ b/arch/x86/platform/pvh/head.S @@ -7,6 +7,7 @@ .code32 .text #define _pa(x) ((x) - __START_KERNEL_map) +#define rva(x) ((x) - pvh_start_xen) #include <linux/elfnote.h> #include <linux/init.h> @@ -15,6 +16,7 @@ #include <asm/segment.h> #include <asm/asm.h> #include <asm/boot.h> +#include <asm/pgtable.h> #include <asm/processor-flags.h> #include <asm/msr.h> #include <asm/nospec-branch.h> @@ -54,7 +56,25 @@ SYM_CODE_START_LOCAL(pvh_start_xen) UNWIND_HINT_END_OF_STACK cld - lgdt (_pa(gdt)) + /* + * See the comment for startup_32 for more details. We need to + * execute a call to get the execution address to be position + * independent, but we don't have a stack. Save and restore the + * magic field of start_info in ebx, and use that as the stack. + */ + mov (%ebx), %eax + leal 4(%ebx), %esp + ANNOTATE_INTRA_FUNCTION_CALL + call 1f +1: popl %ebp + mov %eax, (%ebx) + subl $rva(1b), %ebp + movl $0, %esp + + leal rva(gdt)(%ebp), %eax + leal rva(gdt_start)(%ebp), %ecx + movl %ecx, 2(%eax) + lgdt (%eax) mov $PVH_DS_SEL,%eax mov %eax,%ds @@ -62,14 +82,14 @@ SYM_CODE_START_LOCAL(pvh_start_xen) mov %eax,%ss /* Stash hvm_start_info. */ - mov $_pa(pvh_start_info), %edi + leal rva(pvh_start_info)(%ebp), %edi mov %ebx, %esi - mov _pa(pvh_start_info_sz), %ecx + movl rva(pvh_start_info_sz)(%ebp), %ecx shr $2,%ecx rep movsl - mov $_pa(early_stack_end), %esp + leal rva(early_stack_end)(%ebp), %esp /* Enable PAE mode. */ mov %cr4, %eax @@ -83,31 +103,86 @@ SYM_CODE_START_LOCAL(pvh_start_xen) btsl $_EFER_LME, %eax wrmsr + mov %ebp, %ebx + subl $_pa(pvh_start_xen), %ebx /* offset */ + jz .Lpagetable_done + + /* Fixup page-tables for relocation. */ + leal rva(pvh_init_top_pgt)(%ebp), %edi + movl $PTRS_PER_PGD, %ecx +2: + testl $_PAGE_PRESENT, 0x00(%edi) + jz 1f + addl %ebx, 0x00(%edi) +1: + addl $8, %edi + decl %ecx + jnz 2b + + /* L3 ident has a single entry. */ + leal rva(pvh_level3_ident_pgt)(%ebp), %edi + addl %ebx, 0x00(%edi) + + leal rva(pvh_level3_kernel_pgt)(%ebp), %edi + addl %ebx, (PAGE_SIZE - 16)(%edi) + addl %ebx, (PAGE_SIZE - 8)(%edi) + + /* pvh_level2_ident_pgt is fine - large pages */ + + /* pvh_level2_kernel_pgt needs adjustment - large pages */ + leal rva(pvh_level2_kernel_pgt)(%ebp), %edi + movl $PTRS_PER_PMD, %ecx +2: + testl $_PAGE_PRESENT, 0x00(%edi) + jz 1f + addl %ebx, 0x00(%edi) +1: + addl $8, %edi + decl %ecx + jnz 2b + +.Lpagetable_done: /* Enable pre-constructed page tables. */ - mov $_pa(init_top_pgt), %eax + leal rva(pvh_init_top_pgt)(%ebp), %eax mov %eax, %cr3 mov $(X86_CR0_PG | X86_CR0_PE), %eax mov %eax, %cr0 /* Jump to 64-bit mode. */ - ljmp $PVH_CS_SEL, $_pa(1f) + pushl $PVH_CS_SEL + leal rva(1f)(%ebp), %eax + pushl %eax + lretl /* 64-bit entry point. */ .code64 1: + UNWIND_HINT_END_OF_STACK + /* Set base address in stack canary descriptor. */ mov $MSR_GS_BASE,%ecx - mov $_pa(canary), %eax + leal canary(%rip), %eax xor %edx, %edx wrmsr + /* + * Calculate load offset and store in phys_base. __pa() needs + * phys_base set to calculate the hypercall page in xen_pvh_init(). + */ + movq %rbp, %rbx + subq $_pa(pvh_start_xen), %rbx + movq %rbx, phys_base(%rip) call xen_prepare_pvh + /* + * Clear phys_base. __startup_64 will *add* to its value, + * so reset to 0. + */ + xor %rbx, %rbx + movq %rbx, phys_base(%rip) /* startup_64 expects boot_params in %rsi. */ - mov $_pa(pvh_bootparams), %rsi - mov $_pa(startup_64), %rax - ANNOTATE_RETPOLINE_SAFE - jmp *%rax + lea pvh_bootparams(%rip), %rsi + jmp startup_64 #else /* CONFIG_X86_64 */ @@ -143,7 +218,7 @@ SYM_CODE_END(pvh_start_xen) .balign 8 SYM_DATA_START_LOCAL(gdt) .word gdt_end - gdt_start - .long _pa(gdt_start) + .long _pa(gdt_start) /* x86-64 will overwrite if relocated. */ .word 0 SYM_DATA_END(gdt) SYM_DATA_START_LOCAL(gdt_start) @@ -163,5 +238,67 @@ SYM_DATA_START_LOCAL(early_stack) .fill BOOT_STACK_SIZE, 1, 0 SYM_DATA_END_LABEL(early_stack, SYM_L_LOCAL, early_stack_end) +#ifdef CONFIG_X86_64 +/* + * Xen PVH needs a set of identity mapped and kernel high mapping + * page tables. pvh_start_xen starts running on the identity mapped + * page tables, but xen_prepare_pvh calls into the high mapping. + * These page tables need to be relocatable and are only used until + * startup_64 transitions to init_top_pgt. + */ +SYM_DATA_START_PAGE_ALIGNED(pvh_init_top_pgt) + .quad pvh_level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE_NOENC + .org pvh_init_top_pgt + L4_PAGE_OFFSET * 8, 0 + .quad pvh_level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE_NOENC + .org pvh_init_top_pgt + L4_START_KERNEL * 8, 0 + /* (2^48-(2*1024*1024*1024))/(2^39) = 511 */ + .quad pvh_level3_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE_NOENC +SYM_DATA_END(pvh_init_top_pgt) + +SYM_DATA_START_PAGE_ALIGNED(pvh_level3_ident_pgt) + .quad pvh_level2_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE_NOENC + .fill 511, 8, 0 +SYM_DATA_END(pvh_level3_ident_pgt) +SYM_DATA_START_PAGE_ALIGNED(pvh_level2_ident_pgt) + /* + * Since I easily can, map the first 1G. + * Don't set NX because code runs from these pages. + * + * Note: This sets _PAGE_GLOBAL despite whether + * the CPU supports it or it is enabled. But, + * the CPU should ignore the bit. + */ + PMDS(0, __PAGE_KERNEL_IDENT_LARGE_EXEC, PTRS_PER_PMD) +SYM_DATA_END(pvh_level2_ident_pgt) +SYM_DATA_START_PAGE_ALIGNED(pvh_level3_kernel_pgt) + .fill L3_START_KERNEL, 8, 0 + /* (2^48-(2*1024*1024*1024)-((2^39)*511))/(2^30) = 510 */ + .quad pvh_level2_kernel_pgt - __START_KERNEL_map + _KERNPG_TABLE_NOENC + .quad 0 /* no fixmap */ +SYM_DATA_END(pvh_level3_kernel_pgt) + +SYM_DATA_START_PAGE_ALIGNED(pvh_level2_kernel_pgt) + /* + * Kernel high mapping. + * + * The kernel code+data+bss must be located below KERNEL_IMAGE_SIZE in + * virtual address space, which is 1 GiB if RANDOMIZE_BASE is enabled, + * 512 MiB otherwise. + * + * (NOTE: after that starts the module area, see MODULES_VADDR.) + * + * This table is eventually used by the kernel during normal runtime. + * Care must be taken to clear out undesired bits later, like _PAGE_RW + * or _PAGE_GLOBAL in some cases. + */ + PMDS(0, __PAGE_KERNEL_LARGE_EXEC, KERNEL_IMAGE_SIZE / PMD_SIZE) +SYM_DATA_END(pvh_level2_kernel_pgt) + + ELFNOTE(Xen, XEN_ELFNOTE_PHYS32_RELOC, + .long CONFIG_PHYSICAL_ALIGN; + .long LOAD_PHYSICAL_ADDR; + .long KERNEL_IMAGE_SIZE - 1) +#endif + ELFNOTE(Xen, XEN_ELFNOTE_PHYS32_ENTRY, _ASM_PTR (pvh_start_xen - __START_KERNEL_map)) diff --git a/arch/x86/xen/enlighten_pvh.c b/arch/x86/xen/enlighten_pvh.c index 728a4366ca85..bf68c329fc01 100644 --- a/arch/x86/xen/enlighten_pvh.c +++ b/arch/x86/xen/enlighten_pvh.c @@ -4,6 +4,7 @@ #include <linux/mm.h> #include <xen/hvc-console.h> +#include <xen/acpi.h> #include <asm/bootparam.h> #include <asm/io_apic.h> @@ -28,6 +29,28 @@ bool __ro_after_init xen_pvh; EXPORT_SYMBOL_GPL(xen_pvh); +#ifdef CONFIG_XEN_DOM0 +int xen_pvh_setup_gsi(int gsi, int trigger, int polarity) +{ + int ret; + struct physdev_setup_gsi setup_gsi; + + setup_gsi.gsi = gsi; + setup_gsi.triggering = (trigger == ACPI_EDGE_SENSITIVE ? 0 : 1); + setup_gsi.polarity = (polarity == ACPI_ACTIVE_HIGH ? 0 : 1); + + ret = HYPERVISOR_physdev_op(PHYSDEVOP_setup_gsi, &setup_gsi); + if (ret == -EEXIST) { + xen_raw_printk("Already setup the GSI :%d\n", gsi); + ret = 0; + } else if (ret) + xen_raw_printk("Fail to setup GSI (%d)!\n", gsi); + + return ret; +} +EXPORT_SYMBOL_GPL(xen_pvh_setup_gsi); +#endif + /* * Reserve e820 UNUSABLE regions to inflate the memory balloon. * |