diff options
author | Paolo Bonzini <pbonzini@redhat.com> | 2023-09-23 05:35:55 -0400 |
---|---|---|
committer | Paolo Bonzini <pbonzini@redhat.com> | 2023-09-23 05:35:55 -0400 |
commit | 5804c19b80bf625c6a9925317f845e497434d6d3 (patch) | |
tree | ba4a0bf9cac721e2bf898e3be5ab64773da5a9d2 /arch | |
parent | 916e3e5f26abc165437950daff370c0693572ef4 (diff) | |
parent | 071ef070ca77e6dfe33fd78afa293e83422f0411 (diff) |
Merge tag 'kvm-riscv-fixes-6.6-1' of https://github.com/kvm-riscv/linux into HEAD
KVM/riscv fixes for 6.6, take #1
- Fix KVM_GET_REG_LIST API for ISA_EXT registers
- Fix reading ISA_EXT register of a missing extension
- Fix ISA_EXT register handling in get-reg-list test
- Fix filtering of AIA registers in get-reg-list test
Diffstat (limited to 'arch')
29 files changed, 274 insertions, 100 deletions
diff --git a/arch/parisc/include/asm/cache.h b/arch/parisc/include/asm/cache.h index e23d06b51a20..2a60d7a72f1f 100644 --- a/arch/parisc/include/asm/cache.h +++ b/arch/parisc/include/asm/cache.h @@ -37,6 +37,7 @@ extern int split_tlb; extern int dcache_stride; extern int icache_stride; extern struct pdc_cache_info cache_info; +extern struct pdc_btlb_info btlb_info; void parisc_setup_cache_timing(void); #define pdtlb(sr, addr) asm volatile("pdtlb 0(%%sr%0,%1)" \ diff --git a/arch/parisc/include/asm/mckinley.h b/arch/parisc/include/asm/mckinley.h deleted file mode 100644 index 1314390b9034..000000000000 --- a/arch/parisc/include/asm/mckinley.h +++ /dev/null @@ -1,8 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef ASM_PARISC_MCKINLEY_H -#define ASM_PARISC_MCKINLEY_H - -/* declared in arch/parisc/kernel/setup.c */ -extern struct proc_dir_entry * proc_mckinley_root; - -#endif /*ASM_PARISC_MCKINLEY_H*/ diff --git a/arch/parisc/include/asm/pdc.h b/arch/parisc/include/asm/pdc.h index 269b9a159f01..5d2d9737e579 100644 --- a/arch/parisc/include/asm/pdc.h +++ b/arch/parisc/include/asm/pdc.h @@ -44,10 +44,11 @@ int pdc_model_capabilities(unsigned long *capabilities); int pdc_model_platform_info(char *orig_prod_num, char *current_prod_num, char *serial_no); int pdc_cache_info(struct pdc_cache_info *cache); int pdc_spaceid_bits(unsigned long *space_bits); -#ifndef CONFIG_PA20 int pdc_btlb_info(struct pdc_btlb_info *btlb); +int pdc_btlb_insert(unsigned long long vpage, unsigned long physpage, unsigned long len, + unsigned long entry_info, unsigned long slot); +int pdc_btlb_purge_all(void); int pdc_mem_map_hpa(struct pdc_memory_map *r_addr, struct pdc_module_path *mod_path); -#endif /* !CONFIG_PA20 */ int pdc_pim_toc11(struct pdc_toc_pim_11 *ret); int pdc_pim_toc20(struct pdc_toc_pim_20 *ret); int pdc_lan_station_id(char *lan_addr, unsigned long net_hpa); diff --git a/arch/parisc/include/asm/processor.h b/arch/parisc/include/asm/processor.h index d77c43d32974..ff6cbdb6903b 100644 --- a/arch/parisc/include/asm/processor.h +++ b/arch/parisc/include/asm/processor.h @@ -310,6 +310,7 @@ extern void do_syscall_trace_exit(struct pt_regs *); struct seq_file; extern void early_trap_init(void); extern void collect_boot_cpu_data(void); +extern void btlb_init_per_cpu(void); extern int show_cpuinfo (struct seq_file *m, void *v); /* driver code in driver/parisc */ diff --git a/arch/parisc/include/asm/ropes.h b/arch/parisc/include/asm/ropes.h index fd96706c7234..e2d2d7e9bfde 100644 --- a/arch/parisc/include/asm/ropes.h +++ b/arch/parisc/include/asm/ropes.h @@ -29,7 +29,7 @@ struct ioc { void __iomem *ioc_hpa; /* I/O MMU base address */ char *res_map; /* resource map, bit == pdir entry */ - u64 *pdir_base; /* physical base address */ + __le64 *pdir_base; /* physical base address */ unsigned long ibase; /* pdir IOV Space base - shared w/lba_pci */ unsigned long imask; /* pdir IOV Space mask - shared w/lba_pci */ #ifdef ZX1_SUPPORT @@ -86,6 +86,9 @@ struct sba_device { struct ioc ioc[MAX_IOC]; }; +/* list of SBA's in system, see drivers/parisc/sba_iommu.c */ +extern struct sba_device *sba_list; + #define ASTRO_RUNWAY_PORT 0x582 #define IKE_MERCED_PORT 0x803 #define REO_MERCED_PORT 0x804 @@ -110,7 +113,7 @@ static inline int IS_PLUTO(struct parisc_device *d) { #define SBA_PDIR_VALID_BIT 0x8000000000000000ULL -#define SBA_AGPGART_COOKIE 0x0000badbadc0ffeeULL +#define SBA_AGPGART_COOKIE (__force __le64) 0x0000badbadc0ffeeULL #define SBA_FUNC_ID 0x0000 /* function id */ #define SBA_FCLASS 0x0008 /* function class, bist, header, rev... */ diff --git a/arch/parisc/include/asm/shmparam.h b/arch/parisc/include/asm/shmparam.h index 74f74e4d35b7..5a95b0f62b87 100644 --- a/arch/parisc/include/asm/shmparam.h +++ b/arch/parisc/include/asm/shmparam.h @@ -2,6 +2,21 @@ #ifndef _ASMPARISC_SHMPARAM_H #define _ASMPARISC_SHMPARAM_H +/* + * PA-RISC uses virtually indexed & physically tagged (VIPT) caches + * which has strict requirements when two pages to the same physical + * address are accessed through different mappings. Read the section + * "Address Aliasing" in the arch docs for more detail: + * PA-RISC 1.1 (page 3-6): + * https://parisc.wiki.kernel.org/images-parisc/6/68/Pa11_acd.pdf + * PA-RISC 2.0 (page F-5): + * https://parisc.wiki.kernel.org/images-parisc/7/73/Parisc2.0.pdf + * + * For Linux we allow kernel and userspace to map pages on page size + * granularity (SHMLBA) but have to ensure that, if two pages are + * mapped to the same physical address, the virtual and physical + * addresses modulo SHM_COLOUR are identical. + */ #define SHMLBA PAGE_SIZE /* attach addr a multiple of this */ #define SHM_COLOUR 0x00400000 /* shared mappings colouring */ diff --git a/arch/parisc/kernel/asm-offsets.c b/arch/parisc/kernel/asm-offsets.c index 94652e13c260..757816a7bd4b 100644 --- a/arch/parisc/kernel/asm-offsets.c +++ b/arch/parisc/kernel/asm-offsets.c @@ -275,6 +275,8 @@ int main(void) * and kernel data on physical huge pages */ #ifdef CONFIG_HUGETLB_PAGE DEFINE(HUGEPAGE_SIZE, 1UL << REAL_HPAGE_SHIFT); +#elif !defined(CONFIG_64BIT) + DEFINE(HUGEPAGE_SIZE, 4*1024*1024); #else DEFINE(HUGEPAGE_SIZE, PAGE_SIZE); #endif diff --git a/arch/parisc/kernel/cache.c b/arch/parisc/kernel/cache.c index 442109a48940..268d90a9325b 100644 --- a/arch/parisc/kernel/cache.c +++ b/arch/parisc/kernel/cache.c @@ -58,7 +58,7 @@ int pa_serialize_tlb_flushes __ro_after_init; struct pdc_cache_info cache_info __ro_after_init; #ifndef CONFIG_PA20 -static struct pdc_btlb_info btlb_info __ro_after_init; +struct pdc_btlb_info btlb_info __ro_after_init; #endif DEFINE_STATIC_KEY_TRUE(parisc_has_cache); @@ -264,12 +264,6 @@ parisc_cache_init(void) icache_stride = CAFL_STRIDE(cache_info.ic_conf); #undef CAFL_STRIDE -#ifndef CONFIG_PA20 - if (pdc_btlb_info(&btlb_info) < 0) { - memset(&btlb_info, 0, sizeof btlb_info); - } -#endif - if ((boot_cpu_data.pdc.capabilities & PDC_MODEL_NVA_MASK) == PDC_MODEL_NVA_UNSUPPORTED) { printk(KERN_WARNING "parisc_cache_init: Only equivalent aliasing supported!\n"); diff --git a/arch/parisc/kernel/drivers.c b/arch/parisc/kernel/drivers.c index 8f4b77648491..ed8b75948061 100644 --- a/arch/parisc/kernel/drivers.c +++ b/arch/parisc/kernel/drivers.c @@ -925,9 +925,9 @@ static __init void qemu_header(void) pr_info("#define PARISC_MODEL \"%s\"\n\n", boot_cpu_data.pdc.sys_model_name); + #define p ((unsigned long *)&boot_cpu_data.pdc.model) pr_info("#define PARISC_PDC_MODEL 0x%lx, 0x%lx, 0x%lx, " "0x%lx, 0x%lx, 0x%lx, 0x%lx, 0x%lx, 0x%lx\n\n", - #define p ((unsigned long *)&boot_cpu_data.pdc.model) p[0], p[1], p[2], p[3], p[4], p[5], p[6], p[7], p[8]); #undef p diff --git a/arch/parisc/kernel/firmware.c b/arch/parisc/kernel/firmware.c index 8f37e75f2fb9..81078abec521 100644 --- a/arch/parisc/kernel/firmware.c +++ b/arch/parisc/kernel/firmware.c @@ -687,7 +687,6 @@ int pdc_spaceid_bits(unsigned long *space_bits) return retval; } -#ifndef CONFIG_PA20 /** * pdc_btlb_info - Return block TLB information. * @btlb: The return buffer. @@ -696,18 +695,51 @@ int pdc_spaceid_bits(unsigned long *space_bits) */ int pdc_btlb_info(struct pdc_btlb_info *btlb) { - int retval; + int retval; unsigned long flags; - spin_lock_irqsave(&pdc_lock, flags); - retval = mem_pdc_call(PDC_BLOCK_TLB, PDC_BTLB_INFO, __pa(pdc_result), 0); - memcpy(btlb, pdc_result, sizeof(*btlb)); - spin_unlock_irqrestore(&pdc_lock, flags); + if (IS_ENABLED(CONFIG_PA20)) + return PDC_BAD_PROC; - if(retval < 0) { - btlb->max_size = 0; - } - return retval; + spin_lock_irqsave(&pdc_lock, flags); + retval = mem_pdc_call(PDC_BLOCK_TLB, PDC_BTLB_INFO, __pa(pdc_result), 0); + memcpy(btlb, pdc_result, sizeof(*btlb)); + spin_unlock_irqrestore(&pdc_lock, flags); + + if(retval < 0) { + btlb->max_size = 0; + } + return retval; +} + +int pdc_btlb_insert(unsigned long long vpage, unsigned long physpage, unsigned long len, + unsigned long entry_info, unsigned long slot) +{ + int retval; + unsigned long flags; + + if (IS_ENABLED(CONFIG_PA20)) + return PDC_BAD_PROC; + + spin_lock_irqsave(&pdc_lock, flags); + retval = mem_pdc_call(PDC_BLOCK_TLB, PDC_BTLB_INSERT, (unsigned long) (vpage >> 32), + (unsigned long) vpage, physpage, len, entry_info, slot); + spin_unlock_irqrestore(&pdc_lock, flags); + return retval; +} + +int pdc_btlb_purge_all(void) +{ + int retval; + unsigned long flags; + + if (IS_ENABLED(CONFIG_PA20)) + return PDC_BAD_PROC; + + spin_lock_irqsave(&pdc_lock, flags); + retval = mem_pdc_call(PDC_BLOCK_TLB, PDC_BTLB_PURGE_ALL); + spin_unlock_irqrestore(&pdc_lock, flags); + return retval; } /** @@ -728,6 +760,9 @@ int pdc_mem_map_hpa(struct pdc_memory_map *address, int retval; unsigned long flags; + if (IS_ENABLED(CONFIG_PA20)) + return PDC_BAD_PROC; + spin_lock_irqsave(&pdc_lock, flags); memcpy(pdc_result2, mod_path, sizeof(*mod_path)); retval = mem_pdc_call(PDC_MEM_MAP, PDC_MEM_MAP_HPA, __pa(pdc_result), @@ -737,7 +772,6 @@ int pdc_mem_map_hpa(struct pdc_memory_map *address, return retval; } -#endif /* !CONFIG_PA20 */ /** * pdc_lan_station_id - Get the LAN address. diff --git a/arch/parisc/kernel/head.S b/arch/parisc/kernel/head.S index fd15fd4bbb61..a171bf3c6b31 100644 --- a/arch/parisc/kernel/head.S +++ b/arch/parisc/kernel/head.S @@ -180,10 +180,10 @@ $pgt_fill_loop: std %dp,0x18(%r10) #endif -#ifdef CONFIG_64BIT - /* Get PDCE_PROC for monarch CPU. */ #define MEM_PDC_LO 0x388 #define MEM_PDC_HI 0x35C +#ifdef CONFIG_64BIT + /* Get PDCE_PROC for monarch CPU. */ ldw MEM_PDC_LO(%r0),%r3 ldw MEM_PDC_HI(%r0),%r10 depd %r10, 31, 32, %r3 /* move to upper word */ @@ -269,7 +269,17 @@ stext_pdc_ret: tovirt_r1 %r6 mtctl %r6,%cr30 /* restore task thread info */ #endif - + +#ifndef CONFIG_64BIT + /* clear all BTLBs */ + ldi PDC_BLOCK_TLB,%arg0 + load32 PA(stext_pdc_btlb_ret), %rp + ldw MEM_PDC_LO(%r0),%r3 + bv (%r3) + ldi PDC_BTLB_PURGE_ALL,%arg1 +stext_pdc_btlb_ret: +#endif + /* PARANOID: clear user scratch/user space SR's */ mtsp %r0,%sr0 mtsp %r0,%sr1 diff --git a/arch/parisc/kernel/irq.c b/arch/parisc/kernel/irq.c index 12c4d4104ade..2f81bfd4f15e 100644 --- a/arch/parisc/kernel/irq.c +++ b/arch/parisc/kernel/irq.c @@ -365,7 +365,7 @@ union irq_stack_union { volatile unsigned int lock[1]; }; -DEFINE_PER_CPU(union irq_stack_union, irq_stack_union) = { +static DEFINE_PER_CPU(union irq_stack_union, irq_stack_union) = { .slock = { 1,1,1,1 }, }; #endif diff --git a/arch/parisc/kernel/processor.c b/arch/parisc/kernel/processor.c index a0e2d37c5b3b..1fc89fa2c2d2 100644 --- a/arch/parisc/kernel/processor.c +++ b/arch/parisc/kernel/processor.c @@ -368,6 +368,8 @@ int init_per_cpu(int cpunum) /* FUTURE: Enable Performance Monitor : ccr bit 0x20 */ init_percpu_prof(cpunum); + btlb_init_per_cpu(); + return ret; } diff --git a/arch/parisc/kernel/vmlinux.lds.S b/arch/parisc/kernel/vmlinux.lds.S index 1aaa2ca09800..58694d1989c2 100644 --- a/arch/parisc/kernel/vmlinux.lds.S +++ b/arch/parisc/kernel/vmlinux.lds.S @@ -154,6 +154,7 @@ SECTIONS } /* End of data section */ + . = ALIGN(PAGE_SIZE); _edata = .; /* BSS */ diff --git a/arch/parisc/mm/init.c b/arch/parisc/mm/init.c index a088c243edea..a2a3e89f2d9a 100644 --- a/arch/parisc/mm/init.c +++ b/arch/parisc/mm/init.c @@ -32,6 +32,7 @@ #include <asm/sections.h> #include <asm/msgbuf.h> #include <asm/sparsemem.h> +#include <asm/asm-offsets.h> extern int data_start; extern void parisc_kernel_start(void); /* Kernel entry point in head.S */ @@ -720,6 +721,77 @@ void __init paging_init(void) parisc_bootmem_free(); } +static void alloc_btlb(unsigned long start, unsigned long end, int *slot, + unsigned long entry_info) +{ + const int slot_max = btlb_info.fixed_range_info.num_comb; + int min_num_pages = btlb_info.min_size; + unsigned long size; + + /* map at minimum 4 pages */ + if (min_num_pages < 4) + min_num_pages = 4; + + size = HUGEPAGE_SIZE; + while (start < end && *slot < slot_max && size >= PAGE_SIZE) { + /* starting address must have same alignment as size! */ + /* if correctly aligned and fits in double size, increase */ + if (((start & (2 * size - 1)) == 0) && + (end - start) >= (2 * size)) { + size <<= 1; + continue; + } + /* if current size alignment is too big, try smaller size */ + if ((start & (size - 1)) != 0) { + size >>= 1; + continue; + } + if ((end - start) >= size) { + if ((size >> PAGE_SHIFT) >= min_num_pages) + pdc_btlb_insert(start >> PAGE_SHIFT, __pa(start) >> PAGE_SHIFT, + size >> PAGE_SHIFT, entry_info, *slot); + (*slot)++; + start += size; + continue; + } + size /= 2; + continue; + } +} + +void btlb_init_per_cpu(void) +{ + unsigned long s, t, e; + int slot; + + /* BTLBs are not available on 64-bit CPUs */ + if (IS_ENABLED(CONFIG_PA20)) + return; + else if (pdc_btlb_info(&btlb_info) < 0) { + memset(&btlb_info, 0, sizeof btlb_info); + } + + /* insert BLTLBs for code and data segments */ + s = (uintptr_t) dereference_function_descriptor(&_stext); + e = (uintptr_t) dereference_function_descriptor(&_etext); + t = (uintptr_t) dereference_function_descriptor(&_sdata); + BUG_ON(t != e); + + /* code segments */ + slot = 0; + alloc_btlb(s, e, &slot, 0x13800000); + + /* sanity check */ + t = (uintptr_t) dereference_function_descriptor(&_edata); + e = (uintptr_t) dereference_function_descriptor(&__bss_start); + BUG_ON(t != e); + + /* data segments */ + s = (uintptr_t) dereference_function_descriptor(&_sdata); + e = (uintptr_t) dereference_function_descriptor(&__bss_stop); + alloc_btlb(s, e, &slot, 0x11800000); +} + #ifdef CONFIG_PA20 /* diff --git a/arch/riscv/include/asm/errata_list.h b/arch/riscv/include/asm/errata_list.h index e2ecd01bfac7..b55b434f0059 100644 --- a/arch/riscv/include/asm/errata_list.h +++ b/arch/riscv/include/asm/errata_list.h @@ -105,7 +105,7 @@ asm volatile(ALTERNATIVE( \ * | 31 - 25 | 24 - 20 | 19 - 15 | 14 - 12 | 11 - 7 | 6 - 0 | * 0000001 01001 rs1 000 00000 0001011 * dcache.cva rs1 (clean, virtual address) - * 0000001 00100 rs1 000 00000 0001011 + * 0000001 00101 rs1 000 00000 0001011 * * dcache.cipa rs1 (clean then invalidate, physical address) * | 31 - 25 | 24 - 20 | 19 - 15 | 14 - 12 | 11 - 7 | 6 - 0 | @@ -118,7 +118,7 @@ asm volatile(ALTERNATIVE( \ * 0000000 11001 00000 000 00000 0001011 */ #define THEAD_inval_A0 ".long 0x0265000b" -#define THEAD_clean_A0 ".long 0x0245000b" +#define THEAD_clean_A0 ".long 0x0255000b" #define THEAD_flush_A0 ".long 0x0275000b" #define THEAD_SYNC_S ".long 0x0190000b" diff --git a/arch/riscv/kernel/elf_kexec.c b/arch/riscv/kernel/elf_kexec.c index f4099059ed8f..e60fbd8660c4 100644 --- a/arch/riscv/kernel/elf_kexec.c +++ b/arch/riscv/kernel/elf_kexec.c @@ -98,7 +98,13 @@ static int elf_find_pbase(struct kimage *image, unsigned long kernel_len, kbuf.image = image; kbuf.buf_min = lowest_paddr; kbuf.buf_max = ULONG_MAX; - kbuf.buf_align = PAGE_SIZE; + + /* + * Current riscv boot protocol requires 2MB alignment for + * RV64 and 4MB alignment for RV32 + * + */ + kbuf.buf_align = PMD_SIZE; kbuf.mem = KEXEC_BUF_MEM_UNKNOWN; kbuf.memsz = ALIGN(kernel_len, PAGE_SIZE); kbuf.top_down = false; diff --git a/arch/riscv/kvm/vcpu_onereg.c b/arch/riscv/kvm/vcpu_onereg.c index 1b7e9fa265cb..b7e0e03c69b1 100644 --- a/arch/riscv/kvm/vcpu_onereg.c +++ b/arch/riscv/kvm/vcpu_onereg.c @@ -460,8 +460,11 @@ static int riscv_vcpu_get_isa_ext_single(struct kvm_vcpu *vcpu, reg_num >= ARRAY_SIZE(kvm_isa_ext_arr)) return -ENOENT; - *reg_val = 0; host_isa_ext = kvm_isa_ext_arr[reg_num]; + if (!__riscv_isa_extension_available(NULL, host_isa_ext)) + return -ENOENT; + + *reg_val = 0; if (__riscv_isa_extension_available(vcpu->arch.isa, host_isa_ext)) *reg_val = 1; /* Mark the given extension as available */ @@ -842,7 +845,7 @@ static int copy_isa_ext_reg_indices(const struct kvm_vcpu *vcpu, u64 reg = KVM_REG_RISCV | size | KVM_REG_RISCV_ISA_EXT | i; isa_ext = kvm_isa_ext_arr[i]; - if (!__riscv_isa_extension_available(vcpu->arch.isa, isa_ext)) + if (!__riscv_isa_extension_available(NULL, isa_ext)) continue; if (uindices) { diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 982b777eadc7..66bfabae8814 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -1945,6 +1945,7 @@ config EFI select UCS2_STRING select EFI_RUNTIME_WRAPPERS select ARCH_USE_MEMREMAP_PROT + select EFI_RUNTIME_MAP if KEXEC_CORE help This enables the kernel to use EFI runtime services that are available (such as the EFI variable services). @@ -2020,7 +2021,6 @@ config EFI_MAX_FAKE_MEM config EFI_RUNTIME_MAP bool "Export EFI runtime maps to sysfs" if EXPERT depends on EFI - default KEXEC_CORE help Export EFI runtime memory regions to /sys/firmware/efi/runtime-map. That memory map is required by the 2nd kernel to set up EFI virtual diff --git a/arch/x86/boot/compressed/ident_map_64.c b/arch/x86/boot/compressed/ident_map_64.c index bcc956c17872..08f93b0401bb 100644 --- a/arch/x86/boot/compressed/ident_map_64.c +++ b/arch/x86/boot/compressed/ident_map_64.c @@ -59,6 +59,14 @@ static void *alloc_pgt_page(void *context) return NULL; } + /* Consumed more tables than expected? */ + if (pages->pgt_buf_offset == BOOT_PGT_SIZE_WARN) { + debug_putstr("pgt_buf running low in " __FILE__ "\n"); + debug_putstr("Need to raise BOOT_PGT_SIZE?\n"); + debug_putaddr(pages->pgt_buf_offset); + debug_putaddr(pages->pgt_buf_size); + } + entry = pages->pgt_buf + pages->pgt_buf_offset; pages->pgt_buf_offset += PAGE_SIZE; diff --git a/arch/x86/include/asm/boot.h b/arch/x86/include/asm/boot.h index 4ae14339cb8c..b3a7cfb0d99e 100644 --- a/arch/x86/include/asm/boot.h +++ b/arch/x86/include/asm/boot.h @@ -40,23 +40,40 @@ #ifdef CONFIG_X86_64 # define BOOT_STACK_SIZE 0x4000 +/* + * Used by decompressor's startup_32() to allocate page tables for identity + * mapping of the 4G of RAM in 4-level paging mode: + * - 1 level4 table; + * - 1 level3 table; + * - 4 level2 table that maps everything with 2M pages; + * + * The additional level5 table needed for 5-level paging is allocated from + * trampoline_32bit memory. + */ # define BOOT_INIT_PGT_SIZE (6*4096) -# ifdef CONFIG_RANDOMIZE_BASE + /* - * Assuming all cross the 512GB boundary: - * 1 page for level4 - * (2+2)*4 pages for kernel, param, cmd_line, and randomized kernel - * 2 pages for first 2M (video RAM: CONFIG_X86_VERBOSE_BOOTUP). - * Total is 19 pages. + * Total number of page tables kernel_add_identity_map() can allocate, + * including page tables consumed by startup_32(). + * + * Worst-case scenario: + * - 5-level paging needs 1 level5 table; + * - KASLR needs to map kernel, boot_params, cmdline and randomized kernel, + * assuming all of them cross 256T boundary: + * + 4*2 level4 table; + * + 4*2 level3 table; + * + 4*2 level2 table; + * - X86_VERBOSE_BOOTUP needs to map the first 2M (video RAM): + * + 1 level4 table; + * + 1 level3 table; + * + 1 level2 table; + * Total: 28 tables + * + * Add 4 spare table in case decompressor touches anything beyond what is + * accounted above. Warn if it happens. */ -# ifdef CONFIG_X86_VERBOSE_BOOTUP -# define BOOT_PGT_SIZE (19*4096) -# else /* !CONFIG_X86_VERBOSE_BOOTUP */ -# define BOOT_PGT_SIZE (17*4096) -# endif -# else /* !CONFIG_RANDOMIZE_BASE */ -# define BOOT_PGT_SIZE BOOT_INIT_PGT_SIZE -# endif +# define BOOT_PGT_SIZE_WARN (28*4096) +# define BOOT_PGT_SIZE (32*4096) #else /* !CONFIG_X86_64 */ # define BOOT_STACK_SIZE 0x1000 diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h index b0994ae3bc23..c4555b269a1b 100644 --- a/arch/x86/include/asm/efi.h +++ b/arch/x86/include/asm/efi.h @@ -91,19 +91,6 @@ static inline void efi_fpu_end(void) #ifdef CONFIG_X86_32 #define EFI_X86_KERNEL_ALLOC_LIMIT (SZ_512M - 1) - -#define arch_efi_call_virt_setup() \ -({ \ - efi_fpu_begin(); \ - firmware_restrict_branch_speculation_start(); \ -}) - -#define arch_efi_call_virt_teardown() \ -({ \ - firmware_restrict_branch_speculation_end(); \ - efi_fpu_end(); \ -}) - #else /* !CONFIG_X86_32 */ #define EFI_X86_KERNEL_ALLOC_LIMIT EFI_ALLOC_LIMIT @@ -116,14 +103,6 @@ extern bool efi_disable_ibt_for_runtime; __efi_call(__VA_ARGS__); \ }) -#define arch_efi_call_virt_setup() \ -({ \ - efi_sync_low_kernel_mappings(); \ - efi_fpu_begin(); \ - firmware_restrict_branch_speculation_start(); \ - efi_enter_mm(); \ -}) - #undef arch_efi_call_virt #define arch_efi_call_virt(p, f, args...) ({ \ u64 ret, ibt = ibt_save(efi_disable_ibt_for_runtime); \ @@ -132,13 +111,6 @@ extern bool efi_disable_ibt_for_runtime; ret; \ }) -#define arch_efi_call_virt_teardown() \ -({ \ - efi_leave_mm(); \ - firmware_restrict_branch_speculation_end(); \ - efi_fpu_end(); \ -}) - #ifdef CONFIG_KASAN /* * CONFIG_KASAN may redefine memset to __memset. __memset function is present @@ -168,8 +140,8 @@ extern void efi_delete_dummy_variable(void); extern void efi_crash_gracefully_on_page_fault(unsigned long phys_addr); extern void efi_free_boot_services(void); -void efi_enter_mm(void); -void efi_leave_mm(void); +void arch_efi_call_virt_setup(void); +void arch_efi_call_virt_teardown(void); /* kexec external ABI */ struct efi_setup_data { diff --git a/arch/x86/include/asm/linkage.h b/arch/x86/include/asm/linkage.h index 97a3de7892d3..5ff49fd67732 100644 --- a/arch/x86/include/asm/linkage.h +++ b/arch/x86/include/asm/linkage.h @@ -8,6 +8,14 @@ #undef notrace #define notrace __attribute__((no_instrument_function)) +#ifdef CONFIG_64BIT +/* + * The generic version tends to create spurious ENDBR instructions under + * certain conditions. + */ +#define _THIS_IP_ ({ unsigned long __here; asm ("lea 0(%%rip), %0" : "=r" (__here)); __here; }) +#endif + #ifdef CONFIG_X86_32 #define asmlinkage CPP_ASMLINKAGE __attribute__((regparm(0))) #endif /* CONFIG_X86_32 */ diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index d9f5d7492f83..205cee567629 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c @@ -1533,7 +1533,7 @@ static void __init build_socket_tables(void) { struct uv_gam_range_entry *gre = uv_gre_table; int nums, numn, nump; - int cpu, i, lnid; + int i, lnid, apicid; int minsock = _min_socket; int maxsock = _max_socket; int minpnode = _min_pnode; @@ -1584,15 +1584,14 @@ static void __init build_socket_tables(void) /* Set socket -> node values: */ lnid = NUMA_NO_NODE; - for_each_possible_cpu(cpu) { - int nid = cpu_to_node(cpu); - int apicid, sockid; + for (apicid = 0; apicid < ARRAY_SIZE(__apicid_to_node); apicid++) { + int nid = __apicid_to_node[apicid]; + int sockid; - if (lnid == nid) + if ((nid == NUMA_NO_NODE) || (lnid == nid)) continue; lnid = nid; - apicid = per_cpu(x86_cpu_to_apicid, cpu); sockid = apicid >> uv_cpuid.socketid_shift; if (_socket_to_node[sockid - minsock] == SOCK_EMPTY) diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 4e45ff44aa07..48e040618731 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -579,7 +579,6 @@ static bool match_llc(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o) } -#if defined(CONFIG_SCHED_SMT) || defined(CONFIG_SCHED_CLUSTER) || defined(CONFIG_SCHED_MC) static inline int x86_sched_itmt_flags(void) { return sysctl_sched_itmt_enabled ? SD_ASYM_PACKING : 0; @@ -603,7 +602,14 @@ static int x86_cluster_flags(void) return cpu_cluster_flags() | x86_sched_itmt_flags(); } #endif -#endif + +static int x86_die_flags(void) +{ + if (cpu_feature_enabled(X86_FEATURE_HYBRID_CPU)) + return x86_sched_itmt_flags(); + + return 0; +} /* * Set if a package/die has multiple NUMA nodes inside. @@ -640,7 +646,7 @@ static void __init build_sched_topology(void) */ if (!x86_has_numa_in_package) { x86_topology[i++] = (struct sched_domain_topology_level){ - cpu_cpu_mask, SD_INIT_NAME(DIE) + cpu_cpu_mask, x86_die_flags, SD_INIT_NAME(DIE) }; } diff --git a/arch/x86/lib/putuser.S b/arch/x86/lib/putuser.S index 1451e0c4ae22..235bbda6fc82 100644 --- a/arch/x86/lib/putuser.S +++ b/arch/x86/lib/putuser.S @@ -56,7 +56,6 @@ SYM_FUNC_END(__put_user_1) EXPORT_SYMBOL(__put_user_1) SYM_FUNC_START(__put_user_nocheck_1) - ENDBR ASM_STAC 2: movb %al,(%_ASM_CX) xor %ecx,%ecx @@ -76,7 +75,6 @@ SYM_FUNC_END(__put_user_2) EXPORT_SYMBOL(__put_user_2) SYM_FUNC_START(__put_user_nocheck_2) - ENDBR ASM_STAC 4: movw %ax,(%_ASM_CX) xor %ecx,%ecx @@ -96,7 +94,6 @@ SYM_FUNC_END(__put_user_4) EXPORT_SYMBOL(__put_user_4) SYM_FUNC_START(__put_user_nocheck_4) - ENDBR ASM_STAC 6: movl %eax,(%_ASM_CX) xor %ecx,%ecx @@ -119,7 +116,6 @@ SYM_FUNC_END(__put_user_8) EXPORT_SYMBOL(__put_user_8) SYM_FUNC_START(__put_user_nocheck_8) - ENDBR ASM_STAC 9: mov %_ASM_AX,(%_ASM_CX) #ifdef CONFIG_X86_32 diff --git a/arch/x86/platform/efi/efi_32.c b/arch/x86/platform/efi/efi_32.c index e06a199423c0..b2cc7b4552a1 100644 --- a/arch/x86/platform/efi/efi_32.c +++ b/arch/x86/platform/efi/efi_32.c @@ -140,3 +140,15 @@ void __init efi_runtime_update_mappings(void) } } } + +void arch_efi_call_virt_setup(void) +{ + efi_fpu_begin(); + firmware_restrict_branch_speculation_start(); +} + +void arch_efi_call_virt_teardown(void) +{ + firmware_restrict_branch_speculation_end(); + efi_fpu_end(); +} diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c index 77f7ac3668cb..91d31ac422d6 100644 --- a/arch/x86/platform/efi/efi_64.c +++ b/arch/x86/platform/efi/efi_64.c @@ -474,19 +474,34 @@ void __init efi_dump_pagetable(void) * can not change under us. * It should be ensured that there are no concurrent calls to this function. */ -void efi_enter_mm(void) +static void efi_enter_mm(void) { efi_prev_mm = current->active_mm; current->active_mm = &efi_mm; switch_mm(efi_prev_mm, &efi_mm, NULL); } -void efi_leave_mm(void) +static void efi_leave_mm(void) { current->active_mm = efi_prev_mm; switch_mm(&efi_mm, efi_prev_mm, NULL); } +void arch_efi_call_virt_setup(void) +{ + efi_sync_low_kernel_mappings(); + efi_fpu_begin(); + firmware_restrict_branch_speculation_start(); + efi_enter_mm(); +} + +void arch_efi_call_virt_teardown(void) +{ + efi_leave_mm(); + firmware_restrict_branch_speculation_end(); + efi_fpu_end(); +} + static DEFINE_SPINLOCK(efi_runtime_lock); /* diff --git a/arch/x86/purgatory/Makefile b/arch/x86/purgatory/Makefile index c2a29be35c01..08aa0f25f12a 100644 --- a/arch/x86/purgatory/Makefile +++ b/arch/x86/purgatory/Makefile @@ -19,6 +19,10 @@ CFLAGS_sha256.o := -D__DISABLE_EXPORTS -D__NO_FORTIFY # optimization flags. KBUILD_CFLAGS := $(filter-out -fprofile-sample-use=% -fprofile-use=%,$(KBUILD_CFLAGS)) +# When LTO is enabled, llvm emits many text sections, which is not supported +# by kexec. Remove -flto=* flags. +KBUILD_CFLAGS := $(filter-out $(CC_FLAGS_LTO),$(KBUILD_CFLAGS)) + # When linking purgatory.ro with -r unresolved symbols are not checked, # also link a purgatory.chk binary without -r to check for unresolved symbols. PURGATORY_LDFLAGS := -e purgatory_start -z nodefaultlib |