diff options
Diffstat (limited to 'arch')
618 files changed, 7144 insertions, 5333 deletions
diff --git a/arch/Kconfig b/arch/Kconfig index 205fd23e0cad..aff2746c8af2 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -34,6 +34,29 @@ config ARCH_HAS_SUBPAGE_FAULTS config HOTPLUG_SMT bool +# Selected by HOTPLUG_CORE_SYNC_DEAD or HOTPLUG_CORE_SYNC_FULL +config HOTPLUG_CORE_SYNC + bool + +# Basic CPU dead synchronization selected by architecture +config HOTPLUG_CORE_SYNC_DEAD + bool + select HOTPLUG_CORE_SYNC + +# Full CPU synchronization with alive state selected by architecture +config HOTPLUG_CORE_SYNC_FULL + bool + select HOTPLUG_CORE_SYNC_DEAD if HOTPLUG_CPU + select HOTPLUG_CORE_SYNC + +config HOTPLUG_SPLIT_STARTUP + bool + select HOTPLUG_CORE_SYNC_FULL + +config HOTPLUG_PARALLEL + bool + select HOTPLUG_SPLIT_STARTUP + config GENERIC_ENTRY bool @@ -285,6 +308,9 @@ config ARCH_HAS_DMA_SET_UNCACHED config ARCH_HAS_DMA_CLEAR_UNCACHED bool +config ARCH_HAS_CPU_FINALIZE_INIT + bool + # Select if arch init_task must go in the __init_task_data section config ARCH_TASK_STRUCT_ON_STACK bool @@ -400,20 +426,14 @@ config HAVE_HARDLOCKUP_DETECTOR_PERF The arch chooses to use the generic perf-NMI-based hardlockup detector. Must define HAVE_PERF_EVENTS_NMI. -config HAVE_NMI_WATCHDOG - depends on HAVE_NMI - bool - help - The arch provides a low level NMI watchdog. It provides - asm/nmi.h, and defines its own arch_touch_nmi_watchdog(). - config HAVE_HARDLOCKUP_DETECTOR_ARCH bool - select HAVE_NMI_WATCHDOG help - The arch chooses to provide its own hardlockup detector, which is - a superset of the HAVE_NMI_WATCHDOG. It also conforms to config - interfaces and parameters provided by hardlockup detector subsystem. + The arch provides its own hardlockup detector implementation instead + of the generic ones. + + It uses the same command line parameters, and sysctl interface, + as the generic hardlockup detectors. config HAVE_PERF_REGS bool @@ -1188,13 +1208,6 @@ config COMPAT_32BIT_TIME config ARCH_NO_PREEMPT bool -config ARCH_EPHEMERAL_INODES - def_bool n - help - An arch should select this symbol if it doesn't keep track of inode - instances on its own, but instead relies on something else (e.g. the - host kernel for an UML kernel). - config ARCH_SUPPORTS_RT bool diff --git a/arch/alpha/include/asm/atomic.h b/arch/alpha/include/asm/atomic.h index f2861a43a61e..cbd9244571af 100644 --- a/arch/alpha/include/asm/atomic.h +++ b/arch/alpha/include/asm/atomic.h @@ -200,25 +200,6 @@ ATOMIC_OPS(xor, xor) #undef ATOMIC_OP_RETURN #undef ATOMIC_OP -#define arch_atomic64_cmpxchg(v, old, new) \ - (arch_cmpxchg(&((v)->counter), old, new)) -#define arch_atomic64_xchg(v, new) \ - (arch_xchg(&((v)->counter), new)) - -#define arch_atomic_cmpxchg(v, old, new) \ - (arch_cmpxchg(&((v)->counter), old, new)) -#define arch_atomic_xchg(v, new) \ - (arch_xchg(&((v)->counter), new)) - -/** - * arch_atomic_fetch_add_unless - add unless the number is a given value - * @v: pointer of type atomic_t - * @a: the amount to add to v... - * @u: ...unless v is equal to u. - * - * Atomically adds @a to @v, so long as it was not @u. - * Returns the old value of @v. - */ static __inline__ int arch_atomic_fetch_add_unless(atomic_t *v, int a, int u) { int c, new, old; @@ -242,15 +223,6 @@ static __inline__ int arch_atomic_fetch_add_unless(atomic_t *v, int a, int u) } #define arch_atomic_fetch_add_unless arch_atomic_fetch_add_unless -/** - * arch_atomic64_fetch_add_unless - add unless the number is a given value - * @v: pointer of type atomic64_t - * @a: the amount to add to v... - * @u: ...unless v is equal to u. - * - * Atomically adds @a to @v, so long as it was not @u. - * Returns the old value of @v. - */ static __inline__ s64 arch_atomic64_fetch_add_unless(atomic64_t *v, s64 a, s64 u) { s64 c, new, old; @@ -274,13 +246,6 @@ static __inline__ s64 arch_atomic64_fetch_add_unless(atomic64_t *v, s64 a, s64 u } #define arch_atomic64_fetch_add_unless arch_atomic64_fetch_add_unless -/* - * arch_atomic64_dec_if_positive - decrement by 1 if old value positive - * @v: pointer of type atomic_t - * - * The function returns the old value of *v minus 1, even if - * the atomic variable, v, was not decremented. - */ static inline s64 arch_atomic64_dec_if_positive(atomic64_t *v) { s64 old, tmp; diff --git a/arch/alpha/include/asm/bugs.h b/arch/alpha/include/asm/bugs.h deleted file mode 100644 index 78030d1c7e7e..000000000000 --- a/arch/alpha/include/asm/bugs.h +++ /dev/null @@ -1,20 +0,0 @@ -/* - * include/asm-alpha/bugs.h - * - * Copyright (C) 1994 Linus Torvalds - */ - -/* - * This is included by init/main.c to check for architecture-dependent bugs. - * - * Needs: - * void check_bugs(void); - */ - -/* - * I don't know of any alpha bugs yet.. Nice chip - */ - -static void check_bugs(void) -{ -} diff --git a/arch/alpha/include/uapi/asm/socket.h b/arch/alpha/include/uapi/asm/socket.h index 739891b94136..e94f621903fe 100644 --- a/arch/alpha/include/uapi/asm/socket.h +++ b/arch/alpha/include/uapi/asm/socket.h @@ -137,6 +137,9 @@ #define SO_RCVMARK 75 +#define SO_PASSPIDFD 76 +#define SO_PEERPIDFD 77 + #if !defined(__KERNEL__) #if __BITS_PER_LONG == 64 diff --git a/arch/alpha/kernel/osf_sys.c b/arch/alpha/kernel/osf_sys.c index 2a9a877a0508..d98701ee36c6 100644 --- a/arch/alpha/kernel/osf_sys.c +++ b/arch/alpha/kernel/osf_sys.c @@ -1014,8 +1014,6 @@ SYSCALL_DEFINE2(osf_settimeofday, struct timeval32 __user *, tv, return do_sys_settimeofday64(tv ? &kts : NULL, tz ? &ktz : NULL); } -asmlinkage long sys_ni_posix_timers(void); - SYSCALL_DEFINE2(osf_utimes, const char __user *, filename, struct timeval32 __user *, tvs) { diff --git a/arch/alpha/kernel/setup.c b/arch/alpha/kernel/setup.c index 33bf3a627002..b650ff1cb022 100644 --- a/arch/alpha/kernel/setup.c +++ b/arch/alpha/kernel/setup.c @@ -658,7 +658,7 @@ setup_arch(char **cmdline_p) #endif /* Default root filesystem to sda2. */ - ROOT_DEV = Root_SDA2; + ROOT_DEV = MKDEV(SCSI_DISK0_MAJOR, 2); #ifdef CONFIG_EISA /* FIXME: only set this when we actually have EISA in this box? */ diff --git a/arch/alpha/kernel/syscalls/syscall.tbl b/arch/alpha/kernel/syscalls/syscall.tbl index 8ebacf37a8cf..1f13995d00d7 100644 --- a/arch/alpha/kernel/syscalls/syscall.tbl +++ b/arch/alpha/kernel/syscalls/syscall.tbl @@ -490,3 +490,4 @@ 558 common process_mrelease sys_process_mrelease 559 common futex_waitv sys_futex_waitv 560 common set_mempolicy_home_node sys_ni_syscall +561 common cachestat sys_cachestat diff --git a/arch/arc/include/asm/atomic-spinlock.h b/arch/arc/include/asm/atomic-spinlock.h index 2c830347bfb4..89d12a60f84c 100644 --- a/arch/arc/include/asm/atomic-spinlock.h +++ b/arch/arc/include/asm/atomic-spinlock.h @@ -81,6 +81,11 @@ static inline int arch_atomic_fetch_##op(int i, atomic_t *v) \ ATOMIC_OPS(add, +=, add) ATOMIC_OPS(sub, -=, sub) +#define arch_atomic_fetch_add arch_atomic_fetch_add +#define arch_atomic_fetch_sub arch_atomic_fetch_sub +#define arch_atomic_add_return arch_atomic_add_return +#define arch_atomic_sub_return arch_atomic_sub_return + #undef ATOMIC_OPS #define ATOMIC_OPS(op, c_op, asm_op) \ ATOMIC_OP(op, c_op, asm_op) \ @@ -92,7 +97,11 @@ ATOMIC_OPS(or, |=, or) ATOMIC_OPS(xor, ^=, xor) #define arch_atomic_andnot arch_atomic_andnot + +#define arch_atomic_fetch_and arch_atomic_fetch_and #define arch_atomic_fetch_andnot arch_atomic_fetch_andnot +#define arch_atomic_fetch_or arch_atomic_fetch_or +#define arch_atomic_fetch_xor arch_atomic_fetch_xor #undef ATOMIC_OPS #undef ATOMIC_FETCH_OP diff --git a/arch/arc/include/asm/atomic.h b/arch/arc/include/asm/atomic.h index 52ee51e1ff7c..592d7fffc223 100644 --- a/arch/arc/include/asm/atomic.h +++ b/arch/arc/include/asm/atomic.h @@ -22,30 +22,6 @@ #include <asm/atomic-spinlock.h> #endif -#define arch_atomic_cmpxchg(v, o, n) \ -({ \ - arch_cmpxchg(&((v)->counter), (o), (n)); \ -}) - -#ifdef arch_cmpxchg_relaxed -#define arch_atomic_cmpxchg_relaxed(v, o, n) \ -({ \ - arch_cmpxchg_relaxed(&((v)->counter), (o), (n)); \ -}) -#endif - -#define arch_atomic_xchg(v, n) \ -({ \ - arch_xchg(&((v)->counter), (n)); \ -}) - -#ifdef arch_xchg_relaxed -#define arch_atomic_xchg_relaxed(v, n) \ -({ \ - arch_xchg_relaxed(&((v)->counter), (n)); \ -}) -#endif - /* * 64-bit atomics */ diff --git a/arch/arc/include/asm/atomic64-arcv2.h b/arch/arc/include/asm/atomic64-arcv2.h index c5a8010fdc97..6b6db981967a 100644 --- a/arch/arc/include/asm/atomic64-arcv2.h +++ b/arch/arc/include/asm/atomic64-arcv2.h @@ -159,6 +159,7 @@ arch_atomic64_cmpxchg(atomic64_t *ptr, s64 expected, s64 new) return prev; } +#define arch_atomic64_cmpxchg arch_atomic64_cmpxchg static inline s64 arch_atomic64_xchg(atomic64_t *ptr, s64 new) { @@ -179,14 +180,7 @@ static inline s64 arch_atomic64_xchg(atomic64_t *ptr, s64 new) return prev; } - -/** - * arch_atomic64_dec_if_positive - decrement by 1 if old value positive - * @v: pointer of type atomic64_t - * - * The function returns the old value of *v minus 1, even if - * the atomic variable, v, was not decremented. - */ +#define arch_atomic64_xchg arch_atomic64_xchg static inline s64 arch_atomic64_dec_if_positive(atomic64_t *v) { @@ -212,15 +206,6 @@ static inline s64 arch_atomic64_dec_if_positive(atomic64_t *v) } #define arch_atomic64_dec_if_positive arch_atomic64_dec_if_positive -/** - * arch_atomic64_fetch_add_unless - add unless the number is a given value - * @v: pointer of type atomic64_t - * @a: the amount to add to v... - * @u: ...unless v is equal to u. - * - * Atomically adds @a to @v, if it was not @u. - * Returns the old value of @v - */ static inline s64 arch_atomic64_fetch_add_unless(atomic64_t *v, s64 a, s64 u) { s64 old, temp; diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 9ed7f03ba15a..d60b73d93e03 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -5,6 +5,7 @@ config ARM select ARCH_32BIT_OFF_T select ARCH_CORRECT_STACKTRACE_ON_KRETPROBE if HAVE_KRETPROBES && FRAME_POINTER && !ARM_UNWIND select ARCH_HAS_BINFMT_FLAT + select ARCH_HAS_CPU_FINALIZE_INIT if MMU select ARCH_HAS_CURRENT_STACK_POINTER select ARCH_HAS_DEBUG_VIRTUAL if MMU select ARCH_HAS_DMA_WRITE_COMBINE if !ARM_DMA_MEM_BUFFERABLE @@ -124,6 +125,7 @@ config ARM select HAVE_SYSCALL_TRACEPOINTS select HAVE_UID16 select HAVE_VIRT_CPU_ACCOUNTING_GEN + select HOTPLUG_CORE_SYNC_DEAD if HOTPLUG_CPU select IRQ_FORCED_THREADING select LOCK_MM_AND_FIND_VMA select MODULES_USE_ELF_REL @@ -1781,7 +1783,7 @@ config VFP Say Y to include VFP support code in the kernel. This is needed if your hardware includes a VFP unit. - Please see <file:Documentation/arm/vfp/release-notes.rst> for + Please see <file:Documentation/arch/arm/vfp/release-notes.rst> for release notes and additional status information. Say N if your target does not have VFP hardware. diff --git a/arch/arm/boot/compressed/atags_to_fdt.c b/arch/arm/boot/compressed/atags_to_fdt.c index 1feb6b0f7a1f..627752f18661 100644 --- a/arch/arm/boot/compressed/atags_to_fdt.c +++ b/arch/arm/boot/compressed/atags_to_fdt.c @@ -2,6 +2,7 @@ #include <linux/libfdt_env.h> #include <asm/setup.h> #include <libfdt.h> +#include "misc.h" #if defined(CONFIG_ARM_ATAG_DTB_COMPAT_CMDLINE_EXTEND) #define do_extend_cmdline 1 diff --git a/arch/arm/boot/compressed/fdt_check_mem_start.c b/arch/arm/boot/compressed/fdt_check_mem_start.c index 9291a2661bdf..aa856567fd33 100644 --- a/arch/arm/boot/compressed/fdt_check_mem_start.c +++ b/arch/arm/boot/compressed/fdt_check_mem_start.c @@ -3,6 +3,7 @@ #include <linux/kernel.h> #include <linux/libfdt.h> #include <linux/sizes.h> +#include "misc.h" static const void *get_prop(const void *fdt, const char *node_path, const char *property, int minlen) diff --git a/arch/arm/boot/compressed/misc.c b/arch/arm/boot/compressed/misc.c index abfed1aa2baa..6b4baa6a9a50 100644 --- a/arch/arm/boot/compressed/misc.c +++ b/arch/arm/boot/compressed/misc.c @@ -103,9 +103,6 @@ static void putstr(const char *ptr) /* * gzip declarations */ -extern char input_data[]; -extern char input_data_end[]; - unsigned char *output_data; unsigned long free_mem_ptr; @@ -131,9 +128,6 @@ asmlinkage void __div0(void) error("Attempting division by 0!"); } -extern int do_decompress(u8 *input, int len, u8 *output, void (*error)(char *x)); - - void decompress_kernel(unsigned long output_start, unsigned long free_mem_ptr_p, unsigned long free_mem_ptr_end_p, diff --git a/arch/arm/boot/compressed/misc.h b/arch/arm/boot/compressed/misc.h index c958dccd1d97..6da00a26ac08 100644 --- a/arch/arm/boot/compressed/misc.h +++ b/arch/arm/boot/compressed/misc.h @@ -6,5 +6,16 @@ void error(char *x) __noreturn; extern unsigned long free_mem_ptr; extern unsigned long free_mem_end_ptr; +void __div0(void); +void +decompress_kernel(unsigned long output_start, unsigned long free_mem_ptr_p, + unsigned long free_mem_ptr_end_p, int arch_id); +void fortify_panic(const char *name); +int atags_to_fdt(void *atag_list, void *fdt, int total_space); +uint32_t fdt_check_mem_start(uint32_t mem_start, const void *fdt); +int do_decompress(u8 *input, int len, u8 *output, void (*error)(char *x)); + +extern char input_data[]; +extern char input_data_end[]; #endif diff --git a/arch/arm/common/mcpm_entry.c b/arch/arm/common/mcpm_entry.c index 8a9aeeb504dd..e013ff1168d3 100644 --- a/arch/arm/common/mcpm_entry.c +++ b/arch/arm/common/mcpm_entry.c @@ -21,7 +21,7 @@ /* * The public API for this code is documented in arch/arm/include/asm/mcpm.h. * For a comprehensive description of the main algorithm used here, please - * see Documentation/arm/cluster-pm-race-avoidance.rst. + * see Documentation/arch/arm/cluster-pm-race-avoidance.rst. */ struct sync_struct mcpm_sync; diff --git a/arch/arm/common/mcpm_head.S b/arch/arm/common/mcpm_head.S index 299495c43dfd..f590e803ca11 100644 --- a/arch/arm/common/mcpm_head.S +++ b/arch/arm/common/mcpm_head.S @@ -5,7 +5,7 @@ * Created by: Nicolas Pitre, March 2012 * Copyright: (C) 2012-2013 Linaro Limited * - * Refer to Documentation/arm/cluster-pm-race-avoidance.rst + * Refer to Documentation/arch/arm/cluster-pm-race-avoidance.rst * for details of the synchronisation algorithms used here. */ diff --git a/arch/arm/common/vlock.S b/arch/arm/common/vlock.S index 1fa09c4697ed..c5eaed5a76f0 100644 --- a/arch/arm/common/vlock.S +++ b/arch/arm/common/vlock.S @@ -6,7 +6,7 @@ * Copyright: (C) 2012-2013 Linaro Limited * * This algorithm is described in more detail in - * Documentation/arm/vlocks.rst. + * Documentation/arch/arm/vlocks.rst. */ #include <linux/linkage.h> diff --git a/arch/arm/include/asm/arm_pmuv3.h b/arch/arm/include/asm/arm_pmuv3.h index f4db3e75d75f..f3cd04ff022d 100644 --- a/arch/arm/include/asm/arm_pmuv3.h +++ b/arch/arm/include/asm/arm_pmuv3.h @@ -222,6 +222,11 @@ static inline bool kvm_pmu_counter_deferred(struct perf_event_attr *attr) return false; } +static inline bool kvm_set_pmuserenr(u64 val) +{ + return false; +} + /* PMU Version in DFR Register */ #define ARMV8_PMU_DFR_VER_NI 0 #define ARMV8_PMU_DFR_VER_V3P4 0x5 diff --git a/arch/arm/include/asm/assembler.h b/arch/arm/include/asm/assembler.h index 505a306e0271..aebe2c8f6a68 100644 --- a/arch/arm/include/asm/assembler.h +++ b/arch/arm/include/asm/assembler.h @@ -394,6 +394,23 @@ ALT_UP_B(.L0_\@) #endif .endm +/* + * Raw SMP data memory barrier + */ + .macro __smp_dmb mode +#if __LINUX_ARM_ARCH__ >= 7 + .ifeqs "\mode","arm" + dmb ish + .else + W(dmb) ish + .endif +#elif __LINUX_ARM_ARCH__ == 6 + mcr p15, 0, r0, c7, c10, 5 @ dmb +#else + .error "Incompatible SMP platform" +#endif + .endm + #if defined(CONFIG_CPU_V7M) /* * setmode is used to assert to be in svc mode during boot. For v7-M diff --git a/arch/arm/include/asm/atomic.h b/arch/arm/include/asm/atomic.h index db8512d9a918..f0e3b01afa74 100644 --- a/arch/arm/include/asm/atomic.h +++ b/arch/arm/include/asm/atomic.h @@ -197,6 +197,16 @@ static inline int arch_atomic_fetch_##op(int i, atomic_t *v) \ return val; \ } +#define arch_atomic_add_return arch_atomic_add_return +#define arch_atomic_sub_return arch_atomic_sub_return +#define arch_atomic_fetch_add arch_atomic_fetch_add +#define arch_atomic_fetch_sub arch_atomic_fetch_sub + +#define arch_atomic_fetch_and arch_atomic_fetch_and +#define arch_atomic_fetch_andnot arch_atomic_fetch_andnot +#define arch_atomic_fetch_or arch_atomic_fetch_or +#define arch_atomic_fetch_xor arch_atomic_fetch_xor + static inline int arch_atomic_cmpxchg(atomic_t *v, int old, int new) { int ret; @@ -210,8 +220,7 @@ static inline int arch_atomic_cmpxchg(atomic_t *v, int old, int new) return ret; } - -#define arch_atomic_fetch_andnot arch_atomic_fetch_andnot +#define arch_atomic_cmpxchg arch_atomic_cmpxchg #endif /* __LINUX_ARM_ARCH__ */ @@ -240,8 +249,6 @@ ATOMIC_OPS(xor, ^=, eor) #undef ATOMIC_OP_RETURN #undef ATOMIC_OP -#define arch_atomic_xchg(v, new) (arch_xchg(&((v)->counter), new)) - #ifndef CONFIG_GENERIC_ATOMIC64 typedef struct { s64 counter; diff --git a/arch/arm/include/asm/bugs.h b/arch/arm/include/asm/bugs.h index 97a312ba0840..fe385551edec 100644 --- a/arch/arm/include/asm/bugs.h +++ b/arch/arm/include/asm/bugs.h @@ -1,7 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* - * arch/arm/include/asm/bugs.h - * * Copyright (C) 1995-2003 Russell King */ #ifndef __ASM_BUGS_H @@ -10,10 +8,8 @@ extern void check_writebuffer_bugs(void); #ifdef CONFIG_MMU -extern void check_bugs(void); extern void check_other_bugs(void); #else -#define check_bugs() do { } while (0) #define check_other_bugs() do { } while (0) #endif diff --git a/arch/arm/include/asm/ftrace.h b/arch/arm/include/asm/ftrace.h index 7e9251ca29fe..5be3ddc96a50 100644 --- a/arch/arm/include/asm/ftrace.h +++ b/arch/arm/include/asm/ftrace.h @@ -75,6 +75,10 @@ static inline bool arch_syscall_match_sym_name(const char *sym, return !strcasecmp(sym, name); } +void prepare_ftrace_return(unsigned long *parent, unsigned long self, + unsigned long frame_pointer, + unsigned long stack_pointer); + #endif /* ifndef __ASSEMBLY__ */ #endif /* _ASM_ARM_FTRACE */ diff --git a/arch/arm/include/asm/irq.h b/arch/arm/include/asm/irq.h index a7c2337b0c7d..18605f1b3580 100644 --- a/arch/arm/include/asm/irq.h +++ b/arch/arm/include/asm/irq.h @@ -27,7 +27,6 @@ struct irqaction; struct pt_regs; void handle_IRQ(unsigned int, struct pt_regs *); -void init_IRQ(void); #ifdef CONFIG_SMP #include <linux/cpumask.h> diff --git a/arch/arm/include/asm/mach/arch.h b/arch/arm/include/asm/mach/arch.h index 9349e7a82c9c..2b18a258204d 100644 --- a/arch/arm/include/asm/mach/arch.h +++ b/arch/arm/include/asm/mach/arch.h @@ -56,7 +56,6 @@ struct machine_desc { void (*init_time)(void); void (*init_machine)(void); void (*init_late)(void); - void (*handle_irq)(struct pt_regs *); void (*restart)(enum reboot_mode, const char *); }; diff --git a/arch/arm/include/asm/page.h b/arch/arm/include/asm/page.h index 74bb5947b387..28c63d172a96 100644 --- a/arch/arm/include/asm/page.h +++ b/arch/arm/include/asm/page.h @@ -113,6 +113,28 @@ struct cpu_user_fns { unsigned long vaddr, struct vm_area_struct *vma); }; +void fa_copy_user_highpage(struct page *to, struct page *from, + unsigned long vaddr, struct vm_area_struct *vma); +void fa_clear_user_highpage(struct page *page, unsigned long vaddr); +void feroceon_copy_user_highpage(struct page *to, struct page *from, + unsigned long vaddr, struct vm_area_struct *vma); +void feroceon_clear_user_highpage(struct page *page, unsigned long vaddr); +void v4_mc_copy_user_highpage(struct page *to, struct page *from, + unsigned long vaddr, struct vm_area_struct *vma); +void v4_mc_clear_user_highpage(struct page *page, unsigned long vaddr); +void v4wb_copy_user_highpage(struct page *to, struct page *from, + unsigned long vaddr, struct vm_area_struct *vma); +void v4wb_clear_user_highpage(struct page *page, unsigned long vaddr); +void v4wt_copy_user_highpage(struct page *to, struct page *from, + unsigned long vaddr, struct vm_area_struct *vma); +void v4wt_clear_user_highpage(struct page *page, unsigned long vaddr); +void xsc3_mc_copy_user_highpage(struct page *to, struct page *from, + unsigned long vaddr, struct vm_area_struct *vma); +void xsc3_mc_clear_user_highpage(struct page *page, unsigned long vaddr); +void xscale_mc_copy_user_highpage(struct page *to, struct page *from, + unsigned long vaddr, struct vm_area_struct *vma); +void xscale_mc_clear_user_highpage(struct page *page, unsigned long vaddr); + #ifdef MULTI_USER extern struct cpu_user_fns cpu_user; diff --git a/arch/arm/include/asm/ptrace.h b/arch/arm/include/asm/ptrace.h index 483b8ddfcb82..7f44e88d1f25 100644 --- a/arch/arm/include/asm/ptrace.h +++ b/arch/arm/include/asm/ptrace.h @@ -193,5 +193,8 @@ static inline unsigned long it_advance(unsigned long cpsr) return cpsr; } +int syscall_trace_enter(struct pt_regs *regs); +void syscall_trace_exit(struct pt_regs *regs); + #endif /* __ASSEMBLY__ */ #endif diff --git a/arch/arm/include/asm/setup.h b/arch/arm/include/asm/setup.h index ba0872a8dcda..546af8b1e3f6 100644 --- a/arch/arm/include/asm/setup.h +++ b/arch/arm/include/asm/setup.h @@ -5,7 +5,7 @@ * Copyright (C) 1997-1999 Russell King * * Structure passed to kernel to tell it about the - * hardware it's running on. See Documentation/arm/setup.rst + * hardware it's running on. See Documentation/arch/arm/setup.rst * for more info. */ #ifndef __ASMARM_SETUP_H @@ -28,4 +28,11 @@ extern void save_atags(const struct tag *tags); static inline void save_atags(const struct tag *tags) { } #endif +struct machine_desc; +void init_default_cache_policy(unsigned long); +void paging_init(const struct machine_desc *desc); +void early_mm_init(const struct machine_desc *); +void adjust_lowmem_bounds(void); +void setup_dma_zone(const struct machine_desc *desc); + #endif diff --git a/arch/arm/include/asm/signal.h b/arch/arm/include/asm/signal.h index 430be7774402..8b84092d1518 100644 --- a/arch/arm/include/asm/signal.h +++ b/arch/arm/include/asm/signal.h @@ -22,4 +22,9 @@ typedef struct { #define __ARCH_HAS_SA_RESTORER #include <asm/sigcontext.h> + +void do_rseq_syscall(struct pt_regs *regs); +int do_work_pending(struct pt_regs *regs, unsigned int thread_flags, + int syscall); + #endif diff --git a/arch/arm/include/asm/smp.h b/arch/arm/include/asm/smp.h index 7c1c90d9f582..8c05a7f374d8 100644 --- a/arch/arm/include/asm/smp.h +++ b/arch/arm/include/asm/smp.h @@ -64,7 +64,7 @@ extern void secondary_startup_arm(void); extern int __cpu_disable(void); -extern void __cpu_die(unsigned int cpu); +static inline void __cpu_die(unsigned int cpu) { } extern void arch_send_call_function_single_ipi(int cpu); extern void arch_send_call_function_ipi_mask(const struct cpumask *mask); diff --git a/arch/arm/include/asm/spectre.h b/arch/arm/include/asm/spectre.h index 85f9e538fb32..d9c28b3b6b62 100644 --- a/arch/arm/include/asm/spectre.h +++ b/arch/arm/include/asm/spectre.h @@ -35,4 +35,8 @@ static inline void spectre_v2_update_state(unsigned int state, int spectre_bhb_update_vectors(unsigned int method); +void cpu_v7_ca8_ibe(void); +void cpu_v7_ca15_ibe(void); +void cpu_v7_bugs_init(void); + #endif diff --git a/arch/arm/include/asm/suspend.h b/arch/arm/include/asm/suspend.h index 506314265c6f..be81b9ca2ea1 100644 --- a/arch/arm/include/asm/suspend.h +++ b/arch/arm/include/asm/suspend.h @@ -13,5 +13,6 @@ extern void cpu_resume(void); extern void cpu_resume_no_hyp(void); extern void cpu_resume_arm(void); extern int cpu_suspend(unsigned long, int (*)(unsigned long)); +extern void __cpu_suspend_save(u32 *ptr, u32 ptrsz, u32 sp, u32 *save_ptr); #endif diff --git a/arch/arm/include/asm/sync_bitops.h b/arch/arm/include/asm/sync_bitops.h index 6f5d627c44a3..f46b3c570f92 100644 --- a/arch/arm/include/asm/sync_bitops.h +++ b/arch/arm/include/asm/sync_bitops.h @@ -14,14 +14,35 @@ * ops which are SMP safe even on a UP kernel. */ +/* + * Unordered + */ + #define sync_set_bit(nr, p) _set_bit(nr, p) #define sync_clear_bit(nr, p) _clear_bit(nr, p) #define sync_change_bit(nr, p) _change_bit(nr, p) -#define sync_test_and_set_bit(nr, p) _test_and_set_bit(nr, p) -#define sync_test_and_clear_bit(nr, p) _test_and_clear_bit(nr, p) -#define sync_test_and_change_bit(nr, p) _test_and_change_bit(nr, p) #define sync_test_bit(nr, addr) test_bit(nr, addr) -#define arch_sync_cmpxchg arch_cmpxchg +/* + * Fully ordered + */ + +int _sync_test_and_set_bit(int nr, volatile unsigned long * p); +#define sync_test_and_set_bit(nr, p) _sync_test_and_set_bit(nr, p) + +int _sync_test_and_clear_bit(int nr, volatile unsigned long * p); +#define sync_test_and_clear_bit(nr, p) _sync_test_and_clear_bit(nr, p) + +int _sync_test_and_change_bit(int nr, volatile unsigned long * p); +#define sync_test_and_change_bit(nr, p) _sync_test_and_change_bit(nr, p) + +#define arch_sync_cmpxchg(ptr, old, new) \ +({ \ + __typeof__(*(ptr)) __ret; \ + __smp_mb__before_atomic(); \ + __ret = arch_cmpxchg_relaxed((ptr), (old), (new)); \ + __smp_mb__after_atomic(); \ + __ret; \ +}) #endif diff --git a/arch/arm/include/asm/syscalls.h b/arch/arm/include/asm/syscalls.h new file mode 100644 index 000000000000..5912e7cffa6a --- /dev/null +++ b/arch/arm/include/asm/syscalls.h @@ -0,0 +1,51 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +#ifndef __ASM_SYSCALLS_H +#define __ASM_SYSCALLS_H + +#include <linux/linkage.h> +#include <linux/types.h> + +struct pt_regs; +asmlinkage int sys_sigreturn(struct pt_regs *regs); +asmlinkage int sys_rt_sigreturn(struct pt_regs *regs); +asmlinkage long sys_arm_fadvise64_64(int fd, int advice, + loff_t offset, loff_t len); + +struct oldabi_stat64; +asmlinkage long sys_oabi_stat64(const char __user * filename, + struct oldabi_stat64 __user * statbuf); +asmlinkage long sys_oabi_lstat64(const char __user * filename, + struct oldabi_stat64 __user * statbuf); +asmlinkage long sys_oabi_fstat64(unsigned long fd, + struct oldabi_stat64 __user * statbuf); +asmlinkage long sys_oabi_fstatat64(int dfd, + const char __user *filename, + struct oldabi_stat64 __user *statbuf, + int flag); +asmlinkage long sys_oabi_fcntl64(unsigned int fd, unsigned int cmd, + unsigned long arg); +struct oabi_epoll_event; +asmlinkage long sys_oabi_epoll_ctl(int epfd, int op, int fd, + struct oabi_epoll_event __user *event); +struct oabi_sembuf; +struct old_timespec32; +asmlinkage long sys_oabi_semtimedop(int semid, + struct oabi_sembuf __user *tsops, + unsigned nsops, + const struct old_timespec32 __user *timeout); +asmlinkage long sys_oabi_semop(int semid, struct oabi_sembuf __user *tsops, + unsigned nsops); +asmlinkage int sys_oabi_ipc(uint call, int first, int second, int third, + void __user *ptr, long fifth); +struct sockaddr; +asmlinkage long sys_oabi_bind(int fd, struct sockaddr __user *addr, int addrlen); +asmlinkage long sys_oabi_connect(int fd, struct sockaddr __user *addr, int addrlen); +asmlinkage long sys_oabi_sendto(int fd, void __user *buff, + size_t len, unsigned flags, + struct sockaddr __user *addr, + int addrlen); +struct user_msghdr; +asmlinkage long sys_oabi_sendmsg(int fd, struct user_msghdr __user *msg, unsigned flags); +asmlinkage long sys_oabi_socketcall(int call, unsigned long __user *args); + +#endif diff --git a/arch/arm/include/asm/tcm.h b/arch/arm/include/asm/tcm.h index d8bd8a4b0ede..e1f7dca86a22 100644 --- a/arch/arm/include/asm/tcm.h +++ b/arch/arm/include/asm/tcm.h @@ -9,9 +9,7 @@ #ifndef __ASMARM_TCM_H #define __ASMARM_TCM_H -#ifndef CONFIG_HAVE_TCM -#error "You should not be including tcm.h unless you have a TCM!" -#endif +#ifdef CONFIG_HAVE_TCM #include <linux/compiler.h> @@ -29,4 +27,11 @@ void tcm_free(void *addr, size_t len); bool tcm_dtcm_present(void); bool tcm_itcm_present(void); +void __init tcm_init(void); +#else +/* No TCM support, just blank inlines to be optimized out */ +static inline void tcm_init(void) +{ +} +#endif #endif diff --git a/arch/arm/include/asm/traps.h b/arch/arm/include/asm/traps.h index 987fefb0a4db..0aaefe3e1700 100644 --- a/arch/arm/include/asm/traps.h +++ b/arch/arm/include/asm/traps.h @@ -35,4 +35,13 @@ extern void ptrace_break(struct pt_regs *regs); extern void *vectors_page; +asmlinkage void dump_backtrace_stm(u32 *stack, u32 instruction, const char *loglvl); +asmlinkage void do_undefinstr(struct pt_regs *regs); +asmlinkage void handle_fiq_as_nmi(struct pt_regs *regs); +asmlinkage void bad_mode(struct pt_regs *regs, int reason); +asmlinkage int arm_syscall(int no, struct pt_regs *regs); +asmlinkage void baddataabort(int code, unsigned long instr, struct pt_regs *regs); +asmlinkage void __div0(void); +asmlinkage void handle_bad_stack(struct pt_regs *regs); + #endif diff --git a/arch/arm/include/asm/unwind.h b/arch/arm/include/asm/unwind.h index b51f85417f58..d60b09a5acfc 100644 --- a/arch/arm/include/asm/unwind.h +++ b/arch/arm/include/asm/unwind.h @@ -40,6 +40,10 @@ extern void unwind_table_del(struct unwind_table *tab); extern void unwind_backtrace(struct pt_regs *regs, struct task_struct *tsk, const char *loglvl); +void __aeabi_unwind_cpp_pr0(void); +void __aeabi_unwind_cpp_pr1(void); +void __aeabi_unwind_cpp_pr2(void); + #endif /* !__ASSEMBLY__ */ #ifdef CONFIG_ARM_UNWIND diff --git a/arch/arm/include/asm/vdso.h b/arch/arm/include/asm/vdso.h index 5b85889f82ee..422c3afa806a 100644 --- a/arch/arm/include/asm/vdso.h +++ b/arch/arm/include/asm/vdso.h @@ -24,6 +24,11 @@ static inline void arm_install_vdso(struct mm_struct *mm, unsigned long addr) #endif /* CONFIG_VDSO */ +int __vdso_clock_gettime(clockid_t clock, struct old_timespec32 *ts); +int __vdso_clock_gettime64(clockid_t clock, struct __kernel_timespec *ts); +int __vdso_gettimeofday(struct __kernel_old_timeval *tv, struct timezone *tz); +int __vdso_clock_getres(clockid_t clock_id, struct old_timespec32 *res); + #endif /* __ASSEMBLY__ */ #endif /* __KERNEL__ */ diff --git a/arch/arm/include/asm/vfp.h b/arch/arm/include/asm/vfp.h index 157ea3426158..5b57b8768bac 100644 --- a/arch/arm/include/asm/vfp.h +++ b/arch/arm/include/asm/vfp.h @@ -102,6 +102,7 @@ #ifndef __ASSEMBLY__ void vfp_disable(void); +void VFP_bounce(u32 trigger, u32 fpexc, struct pt_regs *regs); #endif #endif /* __ASM_VFP_H */ diff --git a/arch/arm/include/uapi/asm/setup.h b/arch/arm/include/uapi/asm/setup.h index 25ceda63b284..8e50e034fec7 100644 --- a/arch/arm/include/uapi/asm/setup.h +++ b/arch/arm/include/uapi/asm/setup.h @@ -9,7 +9,7 @@ * published by the Free Software Foundation. * * Structure passed to kernel to tell it about the - * hardware it's running on. See Documentation/arm/setup.rst + * hardware it's running on. See Documentation/arch/arm/setup.rst * for more info. */ #ifndef _UAPI__ASMARM_SETUP_H diff --git a/arch/arm/kernel/atags_parse.c b/arch/arm/kernel/atags_parse.c index 373b61f9a4f0..33f6eb5213a5 100644 --- a/arch/arm/kernel/atags_parse.c +++ b/arch/arm/kernel/atags_parse.c @@ -127,7 +127,7 @@ static int __init parse_tag_cmdline(const struct tag *tag) #elif defined(CONFIG_CMDLINE_FORCE) pr_warn("Ignoring tag cmdline (using the default kernel command line)\n"); #else - strlcpy(default_command_line, tag->u.cmdline.cmdline, + strscpy(default_command_line, tag->u.cmdline.cmdline, COMMAND_LINE_SIZE); #endif return 0; @@ -224,7 +224,7 @@ setup_machine_tags(void *atags_vaddr, unsigned int machine_nr) } /* parse_early_param needs a boot_command_line */ - strlcpy(boot_command_line, from, COMMAND_LINE_SIZE); + strscpy(boot_command_line, from, COMMAND_LINE_SIZE); return mdesc; } diff --git a/arch/arm/kernel/bugs.c b/arch/arm/kernel/bugs.c index 14c8dbbb7d2d..087bce6ec8e9 100644 --- a/arch/arm/kernel/bugs.c +++ b/arch/arm/kernel/bugs.c @@ -1,5 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 #include <linux/init.h> +#include <linux/cpu.h> #include <asm/bugs.h> #include <asm/proc-fns.h> @@ -11,7 +12,7 @@ void check_other_bugs(void) #endif } -void __init check_bugs(void) +void __init arch_cpu_finalize_init(void) { check_writebuffer_bugs(); check_other_bugs(); diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S index c39303e5c234..291dc48d6bed 100644 --- a/arch/arm/kernel/entry-armv.S +++ b/arch/arm/kernel/entry-armv.S @@ -875,7 +875,7 @@ ENDPROC(__bad_stack) * existing ones. This mechanism should be used only for things that are * really small and justified, and not be abused freely. * - * See Documentation/arm/kernel_user_helpers.rst for formal definitions. + * See Documentation/arch/arm/kernel_user_helpers.rst for formal definitions. */ THUMB( .arm ) diff --git a/arch/arm/kernel/fiq.c b/arch/arm/kernel/fiq.c index 98ca3e3fa847..d2c8e5313539 100644 --- a/arch/arm/kernel/fiq.c +++ b/arch/arm/kernel/fiq.c @@ -45,6 +45,7 @@ #include <asm/cacheflush.h> #include <asm/cp15.h> #include <asm/fiq.h> +#include <asm/mach/irq.h> #include <asm/irq.h> #include <asm/traps.h> diff --git a/arch/arm/kernel/head-inflate-data.c b/arch/arm/kernel/head-inflate-data.c index 89a52104d32a..225c0699a12c 100644 --- a/arch/arm/kernel/head-inflate-data.c +++ b/arch/arm/kernel/head-inflate-data.c @@ -8,16 +8,13 @@ #include <linux/init.h> #include <linux/zutil.h> +#include "head.h" /* for struct inflate_state */ #include "../../../lib/zlib_inflate/inftrees.h" #include "../../../lib/zlib_inflate/inflate.h" #include "../../../lib/zlib_inflate/infutil.h" -extern char __data_loc[]; -extern char _edata_loc[]; -extern char _sdata[]; - /* * This code is called very early during the boot process to decompress * the .data segment stored compressed in ROM. Therefore none of the global diff --git a/arch/arm/kernel/head.h b/arch/arm/kernel/head.h new file mode 100644 index 000000000000..0eb5accf7141 --- /dev/null +++ b/arch/arm/kernel/head.h @@ -0,0 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0-only + +extern char __data_loc[]; +extern char _edata_loc[]; +extern char _sdata[]; + +int __init __inflate_kernel_data(void); diff --git a/arch/arm/kernel/module.c b/arch/arm/kernel/module.c index d59c36dc0494..e74d84f58b77 100644 --- a/arch/arm/kernel/module.c +++ b/arch/arm/kernel/module.c @@ -169,8 +169,7 @@ apply_relocate(Elf32_Shdr *sechdrs, const char *strtab, unsigned int symindex, offset = __mem_to_opcode_arm(*(u32 *)loc); offset = (offset & 0x00ffffff) << 2; - if (offset & 0x02000000) - offset -= 0x04000000; + offset = sign_extend32(offset, 25); offset += sym->st_value - loc; @@ -236,7 +235,7 @@ apply_relocate(Elf32_Shdr *sechdrs, const char *strtab, unsigned int symindex, case R_ARM_MOVT_PREL: offset = tmp = __mem_to_opcode_arm(*(u32 *)loc); offset = ((offset & 0xf0000) >> 4) | (offset & 0xfff); - offset = (offset ^ 0x8000) - 0x8000; + offset = sign_extend32(offset, 15); offset += sym->st_value; if (ELF32_R_TYPE(rel->r_info) == R_ARM_MOVT_PREL || @@ -344,8 +343,7 @@ apply_relocate(Elf32_Shdr *sechdrs, const char *strtab, unsigned int symindex, ((~(j2 ^ sign) & 1) << 22) | ((upper & 0x03ff) << 12) | ((lower & 0x07ff) << 1); - if (offset & 0x01000000) - offset -= 0x02000000; + offset = sign_extend32(offset, 24); offset += sym->st_value - loc; /* @@ -401,7 +399,7 @@ apply_relocate(Elf32_Shdr *sechdrs, const char *strtab, unsigned int symindex, offset = ((upper & 0x000f) << 12) | ((upper & 0x0400) << 1) | ((lower & 0x7000) >> 4) | (lower & 0x00ff); - offset = (offset ^ 0x8000) - 0x8000; + offset = sign_extend32(offset, 15); offset += sym->st_value; if (ELF32_R_TYPE(rel->r_info) == R_ARM_THM_MOVT_PREL || diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c index 75cd4699e7b3..c66b560562b3 100644 --- a/arch/arm/kernel/setup.c +++ b/arch/arm/kernel/setup.c @@ -76,13 +76,6 @@ static int __init fpe_setup(char *line) __setup("fpe=", fpe_setup); #endif -extern void init_default_cache_policy(unsigned long); -extern void paging_init(const struct machine_desc *desc); -extern void early_mm_init(const struct machine_desc *); -extern void adjust_lowmem_bounds(void); -extern enum reboot_mode reboot_mode; -extern void setup_dma_zone(const struct machine_desc *desc); - unsigned int processor_id; EXPORT_SYMBOL(processor_id); unsigned int __machine_arch_type __read_mostly; @@ -1142,7 +1135,7 @@ void __init setup_arch(char **cmdline_p) setup_initial_init_mm(_text, _etext, _edata, _end); /* populate cmd_line too for later use, preserving boot_command_line */ - strlcpy(cmd_line, boot_command_line, COMMAND_LINE_SIZE); + strscpy(cmd_line, boot_command_line, COMMAND_LINE_SIZE); *cmdline_p = cmd_line; early_fixmap_init(); @@ -1198,10 +1191,6 @@ void __init setup_arch(char **cmdline_p) reserve_crashkernel(); -#ifdef CONFIG_GENERIC_IRQ_MULTI_HANDLER - handle_arch_irq = mdesc->handle_irq; -#endif - #ifdef CONFIG_VT #if defined(CONFIG_VGA_CONSOLE) conswitchp = &vga_con; diff --git a/arch/arm/kernel/signal.c b/arch/arm/kernel/signal.c index e07f359254c3..8d0afa11bed5 100644 --- a/arch/arm/kernel/signal.c +++ b/arch/arm/kernel/signal.c @@ -18,6 +18,7 @@ #include <asm/traps.h> #include <asm/unistd.h> #include <asm/vfp.h> +#include <asm/syscalls.h> #include "signal.h" diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c index 87f8d0e5e314..6756203e45f3 100644 --- a/arch/arm/kernel/smp.c +++ b/arch/arm/kernel/smp.c @@ -288,15 +288,11 @@ int __cpu_disable(void) } /* - * called on the thread which is asking for a CPU to be shutdown - - * waits until shutdown has completed, or it is timed out. + * called on the thread which is asking for a CPU to be shutdown after the + * shutdown completed. */ -void __cpu_die(unsigned int cpu) +void arch_cpuhp_cleanup_dead_cpu(unsigned int cpu) { - if (!cpu_wait_death(cpu, 5)) { - pr_err("CPU%u: cpu didn't die\n", cpu); - return; - } pr_debug("CPU%u: shutdown\n", cpu); clear_tasks_mm_cpumask(cpu); @@ -336,11 +332,11 @@ void __noreturn arch_cpu_idle_dead(void) flush_cache_louis(); /* - * Tell __cpu_die() that this CPU is now safe to dispose of. Once - * this returns, power and/or clocks can be removed at any point - * from this CPU and its cache by platform_cpu_kill(). + * Tell cpuhp_bp_sync_dead() that this CPU is now safe to dispose + * of. Once this returns, power and/or clocks can be removed at + * any point from this CPU and its cache by platform_cpu_kill(). */ - (void)cpu_report_death(); + cpuhp_ap_report_dead(); /* * Ensure that the cache lines associated with that completion are diff --git a/arch/arm/kernel/sys_arm.c b/arch/arm/kernel/sys_arm.c index a5f183cfecb1..0141e9bb02e8 100644 --- a/arch/arm/kernel/sys_arm.c +++ b/arch/arm/kernel/sys_arm.c @@ -24,6 +24,7 @@ #include <linux/ipc.h> #include <linux/uaccess.h> #include <linux/slab.h> +#include <asm/syscalls.h> /* * Since loff_t is a 64 bit type we avoid a lot of ABI hassle diff --git a/arch/arm/kernel/sys_oabi-compat.c b/arch/arm/kernel/sys_oabi-compat.c index 006163195d67..d00f4040a9f5 100644 --- a/arch/arm/kernel/sys_oabi-compat.c +++ b/arch/arm/kernel/sys_oabi-compat.c @@ -10,6 +10,8 @@ * Copyright: MontaVista Software, Inc. */ +#include <asm/syscalls.h> + /* * The legacy ABI and the new ARM EABI have different rules making some * syscalls incompatible especially with structure arguments. diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c index 40c7c807d67f..3bad79db5d6e 100644 --- a/arch/arm/kernel/traps.c +++ b/arch/arm/kernel/traps.c @@ -756,6 +756,7 @@ void __readwrite_bug(const char *fn) } EXPORT_SYMBOL(__readwrite_bug); +#ifdef CONFIG_MMU void __pte_error(const char *file, int line, pte_t pte) { pr_err("%s:%d: bad pte %08llx.\n", file, line, (long long)pte_val(pte)); @@ -770,6 +771,7 @@ void __pgd_error(const char *file, int line, pgd_t pgd) { pr_err("%s:%d: bad pgd %08llx.\n", file, line, (long long)pgd_val(pgd)); } +#endif asmlinkage void __div0(void) { diff --git a/arch/arm/kernel/vdso.c b/arch/arm/kernel/vdso.c index 3408269d19c7..f297d66a8a76 100644 --- a/arch/arm/kernel/vdso.c +++ b/arch/arm/kernel/vdso.c @@ -135,7 +135,7 @@ static Elf32_Sym * __init find_symbol(struct elfinfo *lib, const char *symname) if (lib->dynsym[i].st_name == 0) continue; - strlcpy(name, lib->dynstr + lib->dynsym[i].st_name, + strscpy(name, lib->dynstr + lib->dynsym[i].st_name, MAX_SYMNAME); c = strchr(name, '@'); if (c) diff --git a/arch/arm/lib/bitops.h b/arch/arm/lib/bitops.h index 95bd35991288..f069d1b2318e 100644 --- a/arch/arm/lib/bitops.h +++ b/arch/arm/lib/bitops.h @@ -28,7 +28,7 @@ UNWIND( .fnend ) ENDPROC(\name ) .endm - .macro testop, name, instr, store + .macro __testop, name, instr, store, barrier ENTRY( \name ) UNWIND( .fnstart ) ands ip, r1, #3 @@ -38,7 +38,7 @@ UNWIND( .fnstart ) mov r0, r0, lsr #5 add r1, r1, r0, lsl #2 @ Get word offset mov r3, r2, lsl r3 @ create mask - smp_dmb + \barrier #if __LINUX_ARM_ARCH__ >= 7 && defined(CONFIG_SMP) .arch_extension mp ALT_SMP(W(pldw) [r1]) @@ -50,13 +50,21 @@ UNWIND( .fnstart ) strex ip, r2, [r1] cmp ip, #0 bne 1b - smp_dmb + \barrier cmp r0, #0 movne r0, #1 2: bx lr UNWIND( .fnend ) ENDPROC(\name ) .endm + + .macro testop, name, instr, store + __testop \name, \instr, \store, smp_dmb + .endm + + .macro sync_testop, name, instr, store + __testop \name, \instr, \store, __smp_dmb + .endm #else .macro bitop, name, instr ENTRY( \name ) diff --git a/arch/arm/lib/testchangebit.S b/arch/arm/lib/testchangebit.S index 4ebecc67e6e0..f13fe9bc2399 100644 --- a/arch/arm/lib/testchangebit.S +++ b/arch/arm/lib/testchangebit.S @@ -10,3 +10,7 @@ .text testop _test_and_change_bit, eor, str + +#if __LINUX_ARM_ARCH__ >= 6 +sync_testop _sync_test_and_change_bit, eor, str +#endif diff --git a/arch/arm/lib/testclearbit.S b/arch/arm/lib/testclearbit.S index 009afa0f5b4a..4d2c5ca620eb 100644 --- a/arch/arm/lib/testclearbit.S +++ b/arch/arm/lib/testclearbit.S @@ -10,3 +10,7 @@ .text testop _test_and_clear_bit, bicne, strne + +#if __LINUX_ARM_ARCH__ >= 6 +sync_testop _sync_test_and_clear_bit, bicne, strne +#endif diff --git a/arch/arm/lib/testsetbit.S b/arch/arm/lib/testsetbit.S index f3192e55acc8..649dbab65d8d 100644 --- a/arch/arm/lib/testsetbit.S +++ b/arch/arm/lib/testsetbit.S @@ -10,3 +10,7 @@ .text testop _test_and_set_bit, orreq, streq + +#if __LINUX_ARM_ARCH__ >= 6 +sync_testop _sync_test_and_set_bit, orreq, streq +#endif diff --git a/arch/arm/lib/uaccess_with_memcpy.c b/arch/arm/lib/uaccess_with_memcpy.c index e4c2677cc1e9..2f6163f05e93 100644 --- a/arch/arm/lib/uaccess_with_memcpy.c +++ b/arch/arm/lib/uaccess_with_memcpy.c @@ -74,6 +74,9 @@ pin_page_for_write(const void __user *_addr, pte_t **ptep, spinlock_t **ptlp) return 0; pte = pte_offset_map_lock(current->mm, pmd, addr, &ptl); + if (unlikely(!pte)) + return 0; + if (unlikely(!pte_present(*pte) || !pte_young(*pte) || !pte_write(*pte) || !pte_dirty(*pte))) { pte_unmap_unlock(pte, ptl); diff --git a/arch/arm/mach-exynos/common.h b/arch/arm/mach-exynos/common.h index 29eb075b24a4..b5287ff1c542 100644 --- a/arch/arm/mach-exynos/common.h +++ b/arch/arm/mach-exynos/common.h @@ -106,7 +106,7 @@ void exynos_firmware_init(void); #define C2_STATE (1 << 3) /* * Magic values for bootloader indicating chosen low power mode. - * See also Documentation/arm/samsung/bootloader-interface.rst + * See also Documentation/arch/arm/samsung/bootloader-interface.rst */ #define EXYNOS_SLEEP_MAGIC 0x00000bad #define EXYNOS_AFTR_MAGIC 0xfcba0d10 diff --git a/arch/arm/mach-mxs/mach-mxs.c b/arch/arm/mach-mxs/mach-mxs.c index 51e47053c816..3faf9a1e3e36 100644 --- a/arch/arm/mach-mxs/mach-mxs.c +++ b/arch/arm/mach-mxs/mach-mxs.c @@ -11,7 +11,6 @@ #include <linux/err.h> #include <linux/gpio.h> #include <linux/init.h> -#include <linux/irqchip/mxs.h> #include <linux/reboot.h> #include <linux/micrel_phy.h> #include <linux/of_address.h> @@ -472,7 +471,6 @@ static const char *const mxs_dt_compat[] __initconst = { }; DT_MACHINE_START(MXS, "Freescale MXS (Device Tree)") - .handle_irq = icoll_handle_irq, .init_machine = mxs_machine_init, .init_late = mxs_pm_init, .dt_compat = mxs_dt_compat, diff --git a/arch/arm/mach-omap1/board-ams-delta.c b/arch/arm/mach-omap1/board-ams-delta.c index 9108c871d129..88139200449e 100644 --- a/arch/arm/mach-omap1/board-ams-delta.c +++ b/arch/arm/mach-omap1/board-ams-delta.c @@ -877,7 +877,6 @@ MACHINE_START(AMS_DELTA, "Amstrad E3 (Delta)") .map_io = ams_delta_map_io, .init_early = omap1_init_early, .init_irq = omap1_init_irq, - .handle_irq = omap1_handle_irq, .init_machine = ams_delta_init, .init_late = ams_delta_init_late, .init_time = omap1_timer_init, diff --git a/arch/arm/mach-omap1/board-nokia770.c b/arch/arm/mach-omap1/board-nokia770.c index a501a473ffd6..b56cea9f9d2f 100644 --- a/arch/arm/mach-omap1/board-nokia770.c +++ b/arch/arm/mach-omap1/board-nokia770.c @@ -291,7 +291,6 @@ MACHINE_START(NOKIA770, "Nokia 770") .map_io = omap1_map_io, .init_early = omap1_init_early, .init_irq = omap1_init_irq, - .handle_irq = omap1_handle_irq, .init_machine = omap_nokia770_init, .init_late = omap1_init_late, .init_time = omap1_timer_init, diff --git a/arch/arm/mach-omap1/board-osk.c b/arch/arm/mach-omap1/board-osk.c index df758c1f9237..46eda4ff4797 100644 --- a/arch/arm/mach-omap1/board-osk.c +++ b/arch/arm/mach-omap1/board-osk.c @@ -389,7 +389,6 @@ MACHINE_START(OMAP_OSK, "TI-OSK") .map_io = omap1_map_io, .init_early = omap1_init_early, .init_irq = omap1_init_irq, - .handle_irq = omap1_handle_irq, .init_machine = osk_init, .init_late = omap1_init_late, .init_time = omap1_timer_init, diff --git a/arch/arm/mach-omap1/board-palmte.c b/arch/arm/mach-omap1/board-palmte.c index f79c497f04d5..91df3dc365af 100644 --- a/arch/arm/mach-omap1/board-palmte.c +++ b/arch/arm/mach-omap1/board-palmte.c @@ -259,7 +259,6 @@ MACHINE_START(OMAP_PALMTE, "OMAP310 based Palm Tungsten E") .map_io = omap1_map_io, .init_early = omap1_init_early, .init_irq = omap1_init_irq, - .handle_irq = omap1_handle_irq, .init_machine = omap_palmte_init, .init_late = omap1_init_late, .init_time = omap1_timer_init, diff --git a/arch/arm/mach-omap1/board-sx1.c b/arch/arm/mach-omap1/board-sx1.c index 0c0cdd5e77c7..3ae295af96fd 100644 --- a/arch/arm/mach-omap1/board-sx1.c +++ b/arch/arm/mach-omap1/board-sx1.c @@ -338,7 +338,6 @@ MACHINE_START(SX1, "OMAP310 based Siemens SX1") .map_io = omap1_map_io, .init_early = omap1_init_early, .init_irq = omap1_init_irq, - .handle_irq = omap1_handle_irq, .init_machine = omap_sx1_init, .init_late = omap1_init_late, .init_time = omap1_timer_init, diff --git a/arch/arm/mach-omap1/irq.c b/arch/arm/mach-omap1/irq.c index bfc7ab010ae2..3d9e72e1eddc 100644 --- a/arch/arm/mach-omap1/irq.c +++ b/arch/arm/mach-omap1/irq.c @@ -37,6 +37,7 @@ */ #include <linux/gpio.h> #include <linux/init.h> +#include <linux/irq.h> #include <linux/module.h> #include <linux/sched.h> #include <linux/interrupt.h> @@ -254,4 +255,6 @@ void __init omap1_init_irq(void) ct = irq_data_get_chip_type(d); ct->chip.irq_unmask(d); } + + set_handle_irq(omap1_handle_irq); } diff --git a/arch/arm/mach-pxa/gumstix.c b/arch/arm/mach-pxa/gumstix.c index 72b08a9bf0fd..6b7197ae3c72 100644 --- a/arch/arm/mach-pxa/gumstix.c +++ b/arch/arm/mach-pxa/gumstix.c @@ -233,7 +233,6 @@ MACHINE_START(GUMSTIX, "Gumstix") .map_io = pxa25x_map_io, .nr_irqs = PXA_NR_IRQS, .init_irq = pxa25x_init_irq, - .handle_irq = pxa25x_handle_irq, .init_time = pxa_timer_init, .init_machine = gumstix_init, .restart = pxa_restart, diff --git a/arch/arm/mach-pxa/pxa25x.c b/arch/arm/mach-pxa/pxa25x.c index 1b83be181bab..032dc897fe94 100644 --- a/arch/arm/mach-pxa/pxa25x.c +++ b/arch/arm/mach-pxa/pxa25x.c @@ -143,6 +143,7 @@ set_pwer: void __init pxa25x_init_irq(void) { pxa_init_irq(32, pxa25x_set_wake); + set_handle_irq(pxa25x_handle_irq); } static int __init __init diff --git a/arch/arm/mach-pxa/pxa27x.c b/arch/arm/mach-pxa/pxa27x.c index 4135ba2877c4..c9b56424b653 100644 --- a/arch/arm/mach-pxa/pxa27x.c +++ b/arch/arm/mach-pxa/pxa27x.c @@ -228,6 +228,7 @@ static int pxa27x_set_wake(struct irq_data *d, unsigned int on) void __init pxa27x_init_irq(void) { pxa_init_irq(34, pxa27x_set_wake); + set_handle_irq(pxa27x_handle_irq); } static int __init diff --git a/arch/arm/mach-pxa/spitz.c b/arch/arm/mach-pxa/spitz.c index 4325bdc2b9ff..042922a0a9d6 100644 --- a/arch/arm/mach-pxa/spitz.c +++ b/arch/arm/mach-pxa/spitz.c @@ -1043,7 +1043,6 @@ MACHINE_START(SPITZ, "SHARP Spitz") .map_io = pxa27x_map_io, .nr_irqs = PXA_NR_IRQS, .init_irq = pxa27x_init_irq, - .handle_irq = pxa27x_handle_irq, .init_machine = spitz_init, .init_time = pxa_timer_init, .restart = spitz_restart, @@ -1056,7 +1055,6 @@ MACHINE_START(BORZOI, "SHARP Borzoi") .map_io = pxa27x_map_io, .nr_irqs = PXA_NR_IRQS, .init_irq = pxa27x_init_irq, - .handle_irq = pxa27x_handle_irq, .init_machine = spitz_init, .init_time = pxa_timer_init, .restart = spitz_restart, @@ -1069,7 +1067,6 @@ MACHINE_START(AKITA, "SHARP Akita") .map_io = pxa27x_map_io, .nr_irqs = PXA_NR_IRQS, .init_irq = pxa27x_init_irq, - .handle_irq = pxa27x_handle_irq, .init_machine = spitz_init, .init_time = pxa_timer_init, .restart = spitz_restart, diff --git a/arch/arm/mach-sti/Kconfig b/arch/arm/mach-sti/Kconfig index b2d45cf10a3c..b3842c971d31 100644 --- a/arch/arm/mach-sti/Kconfig +++ b/arch/arm/mach-sti/Kconfig @@ -21,7 +21,7 @@ menuconfig ARCH_STI help Include support for STMicroelectronics' STiH415/416, STiH407/10 and STiH418 family SoCs using the Device Tree for discovery. More - information can be found in Documentation/arm/sti/ and + information can be found in Documentation/arch/arm/sti/ and Documentation/devicetree. if ARCH_STI diff --git a/arch/arm/mm/Kconfig b/arch/arm/mm/Kconfig index be183ed1232d..c164cde50243 100644 --- a/arch/arm/mm/Kconfig +++ b/arch/arm/mm/Kconfig @@ -712,7 +712,7 @@ config ARM_VIRT_EXT assistance. A compliant bootloader is required in order to make maximum - use of this feature. Refer to Documentation/arm/booting.rst for + use of this feature. Refer to Documentation/arch/arm/booting.rst for details. config SWP_EMULATE @@ -904,7 +904,7 @@ config KUSER_HELPERS the CPU type fitted to the system. This permits binaries to be run on ARMv4 through to ARMv7 without modification. - See Documentation/arm/kernel_user_helpers.rst for details. + See Documentation/arch/arm/kernel_user_helpers.rst for details. However, the fixed address nature of these helpers can be used by ROP (return orientated programming) authors when creating diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c index b4a33358d2e9..bc4ed5ce3e00 100644 --- a/arch/arm/mm/dma-mapping.c +++ b/arch/arm/mm/dma-mapping.c @@ -258,12 +258,14 @@ static struct dma_contig_early_reserve dma_mmu_remap[MAX_CMA_AREAS] __initdata; static int dma_mmu_remap_num __initdata; +#ifdef CONFIG_DMA_CMA void __init dma_contiguous_early_fixup(phys_addr_t base, unsigned long size) { dma_mmu_remap[dma_mmu_remap_num].base = base; dma_mmu_remap[dma_mmu_remap_num].size = size; dma_mmu_remap_num++; } +#endif void __init dma_contiguous_remap(void) { diff --git a/arch/arm/mm/fault-armv.c b/arch/arm/mm/fault-armv.c index 0e49154454a6..ca5302b0b7ee 100644 --- a/arch/arm/mm/fault-armv.c +++ b/arch/arm/mm/fault-armv.c @@ -117,8 +117,11 @@ static int adjust_pte(struct vm_area_struct *vma, unsigned long address, * must use the nested version. This also means we need to * open-code the spin-locking. */ - ptl = pte_lockptr(vma->vm_mm, pmd); pte = pte_offset_map(pmd, address); + if (!pte) + return 0; + + ptl = pte_lockptr(vma->vm_mm, pmd); do_pte_lock(ptl); ret = do_adjust_pte(vma, address, pfn, pte); diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c index 0860eeba8bd3..fef62e4a9edd 100644 --- a/arch/arm/mm/fault.c +++ b/arch/arm/mm/fault.c @@ -85,6 +85,9 @@ void show_pte(const char *lvl, struct mm_struct *mm, unsigned long addr) break; pte = pte_offset_map(pmd, addr); + if (!pte) + break; + pr_cont(", *pte=%08llx", (long long)pte_val(*pte)); #ifndef CONFIG_ARM_LPAE pr_cont(", *ppte=%08llx", diff --git a/arch/arm/mm/fault.h b/arch/arm/mm/fault.h index 54927ba1fa6e..e8f8c1902544 100644 --- a/arch/arm/mm/fault.h +++ b/arch/arm/mm/fault.h @@ -37,5 +37,9 @@ static inline int fsr_fs(unsigned int fsr) void do_bad_area(unsigned long addr, unsigned int fsr, struct pt_regs *regs); void early_abt_enable(void); +asmlinkage void do_DataAbort(unsigned long addr, unsigned int fsr, + struct pt_regs *regs); +asmlinkage void do_PrefetchAbort(unsigned long addr, unsigned int ifsr, + struct pt_regs *regs); #endif /* __ARCH_ARM_FAULT_H */ diff --git a/arch/arm/mm/flush.c b/arch/arm/mm/flush.c index 7ff9feea13a6..2508be91b7a0 100644 --- a/arch/arm/mm/flush.c +++ b/arch/arm/mm/flush.c @@ -354,6 +354,7 @@ EXPORT_SYMBOL(flush_dcache_page); * memcpy() to/from page * if written to page, flush_dcache_page() */ +void __flush_anon_page(struct vm_area_struct *vma, struct page *page, unsigned long vmaddr); void __flush_anon_page(struct vm_area_struct *vma, struct page *page, unsigned long vmaddr) { unsigned long pfn; diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c index 463fc2a8448f..f3a52c08a200 100644 --- a/arch/arm/mm/mmu.c +++ b/arch/arm/mm/mmu.c @@ -21,6 +21,7 @@ #include <asm/sections.h> #include <asm/setup.h> #include <asm/smp_plat.h> +#include <asm/tcm.h> #include <asm/tlb.h> #include <asm/highmem.h> #include <asm/system_info.h> @@ -37,7 +38,6 @@ #include "fault.h" #include "mm.h" -#include "tcm.h" extern unsigned long __atags_pointer; diff --git a/arch/arm/mm/nommu.c b/arch/arm/mm/nommu.c index 53f2d8774fdb..43cfd06bbeba 100644 --- a/arch/arm/mm/nommu.c +++ b/arch/arm/mm/nommu.c @@ -21,6 +21,7 @@ #include <asm/cputype.h> #include <asm/mpu.h> #include <asm/procinfo.h> +#include <asm/idmap.h> #include "mm.h" diff --git a/arch/arm/mm/tcm.h b/arch/arm/mm/tcm.h deleted file mode 100644 index 6b80a760d875..000000000000 --- a/arch/arm/mm/tcm.h +++ /dev/null @@ -1,17 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Copyright (C) 2008-2009 ST-Ericsson AB - * TCM memory handling for ARM systems - * - * Author: Linus Walleij <linus.walleij@stericsson.com> - * Author: Rickard Andersson <rickard.andersson@stericsson.com> - */ - -#ifdef CONFIG_HAVE_TCM -void __init tcm_init(void); -#else -/* No TCM support, just blank inlines to be optimized out */ -static inline void tcm_init(void) -{ -} -#endif diff --git a/arch/arm/probes/kprobes/checkers-common.c b/arch/arm/probes/kprobes/checkers-common.c index 4d720990cf2a..eba7ac4725c0 100644 --- a/arch/arm/probes/kprobes/checkers-common.c +++ b/arch/arm/probes/kprobes/checkers-common.c @@ -40,7 +40,7 @@ enum probes_insn checker_stack_use_imm_0xx(probes_opcode_t insn, * Different from other insn uses imm8, the real addressing offset of * STRD in T32 encoding should be imm8 * 4. See ARMARM description. */ -enum probes_insn checker_stack_use_t32strd(probes_opcode_t insn, +static enum probes_insn checker_stack_use_t32strd(probes_opcode_t insn, struct arch_probes_insn *asi, const struct decode_header *h) { diff --git a/arch/arm/probes/kprobes/core.c b/arch/arm/probes/kprobes/core.c index 9090c3a74dcc..d8238da095df 100644 --- a/arch/arm/probes/kprobes/core.c +++ b/arch/arm/probes/kprobes/core.c @@ -233,7 +233,7 @@ singlestep(struct kprobe *p, struct pt_regs *regs, struct kprobe_ctlblk *kcb) * kprobe, and that level is reserved for user kprobe handlers, so we can't * risk encountering a new kprobe in an interrupt handler. */ -void __kprobes kprobe_handler(struct pt_regs *regs) +static void __kprobes kprobe_handler(struct pt_regs *regs) { struct kprobe *p, *cur; struct kprobe_ctlblk *kcb; diff --git a/arch/arm/probes/kprobes/opt-arm.c b/arch/arm/probes/kprobes/opt-arm.c index dbef34ed933f..7f65048380ca 100644 --- a/arch/arm/probes/kprobes/opt-arm.c +++ b/arch/arm/probes/kprobes/opt-arm.c @@ -145,8 +145,6 @@ __arch_remove_optimized_kprobe(struct optimized_kprobe *op, int dirty) } } -extern void kprobe_handler(struct pt_regs *regs); - static void optimized_callback(struct optimized_kprobe *op, struct pt_regs *regs) { diff --git a/arch/arm/probes/kprobes/test-core.c b/arch/arm/probes/kprobes/test-core.c index c562832b8627..171c7076b89f 100644 --- a/arch/arm/probes/kprobes/test-core.c +++ b/arch/arm/probes/kprobes/test-core.c @@ -720,7 +720,7 @@ static const char coverage_register_lookup[16] = { [REG_TYPE_NOSPPCX] = COVERAGE_ANY_REG | COVERAGE_SP, }; -unsigned coverage_start_registers(const struct decode_header *h) +static unsigned coverage_start_registers(const struct decode_header *h) { unsigned regs = 0; int i; diff --git a/arch/arm/probes/kprobes/test-core.h b/arch/arm/probes/kprobes/test-core.h index 56ad3c0aaeea..c7297037c162 100644 --- a/arch/arm/probes/kprobes/test-core.h +++ b/arch/arm/probes/kprobes/test-core.h @@ -454,3 +454,7 @@ void kprobe_thumb32_test_cases(void); #else void kprobe_arm_test_cases(void); #endif + +void __kprobes_test_case_start(void); +void __kprobes_test_case_end_16(void); +void __kprobes_test_case_end_32(void); diff --git a/arch/arm/tools/mach-types b/arch/arm/tools/mach-types index 9e74c7ff6b04..97e2bfa01f4b 100644 --- a/arch/arm/tools/mach-types +++ b/arch/arm/tools/mach-types @@ -7,7 +7,7 @@ # http://www.arm.linux.org.uk/developer/machines/download.php # # Please do not send patches to this file; it is automatically generated! -# To add an entry into this database, please see Documentation/arm/arm.rst, +# To add an entry into this database, please see Documentation/arch/arm/arm.rst, # or visit: # # http://www.arm.linux.org.uk/developer/machines/?action=new diff --git a/arch/arm/tools/syscall.tbl b/arch/arm/tools/syscall.tbl index ac964612d8b0..8ebed8a13874 100644 --- a/arch/arm/tools/syscall.tbl +++ b/arch/arm/tools/syscall.tbl @@ -464,3 +464,4 @@ 448 common process_mrelease sys_process_mrelease 449 common futex_waitv sys_futex_waitv 450 common set_mempolicy_home_node sys_set_mempolicy_home_node +451 common cachestat sys_cachestat diff --git a/arch/arm/vdso/vgettimeofday.c b/arch/arm/vdso/vgettimeofday.c index 1976c6f325a4..a003beacac76 100644 --- a/arch/arm/vdso/vgettimeofday.c +++ b/arch/arm/vdso/vgettimeofday.c @@ -6,6 +6,8 @@ */ #include <linux/time.h> #include <linux/types.h> +#include <asm/vdso.h> +#include <asm/unwind.h> int __vdso_clock_gettime(clockid_t clock, struct old_timespec32 *ts) diff --git a/arch/arm/vfp/vfpmodule.c b/arch/arm/vfp/vfpmodule.c index 349dcb944a93..1ba5078c1025 100644 --- a/arch/arm/vfp/vfpmodule.c +++ b/arch/arm/vfp/vfpmodule.c @@ -25,6 +25,7 @@ #include <asm/thread_notify.h> #include <asm/traps.h> #include <asm/vfp.h> +#include <asm/neon.h> #include "vfpinstr.h" #include "vfp.h" diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 92f3fff2522b..595028bd9160 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -120,6 +120,7 @@ config ARM64 select CRC32 select DCACHE_WORD_ACCESS select DYNAMIC_FTRACE if FUNCTION_TRACER + select DMA_BOUNCE_UNALIGNED_KMALLOC select DMA_DIRECT_REMAP select EDAC_SUPPORT select FRAME_POINTER @@ -203,12 +204,16 @@ config ARM64 select HAVE_FUNCTION_ERROR_INJECTION select HAVE_FUNCTION_GRAPH_TRACER select HAVE_GCC_PLUGINS + select HAVE_HARDLOCKUP_DETECTOR_PERF if PERF_EVENTS && \ + HW_PERF_EVENTS && HAVE_PERF_EVENTS_NMI select HAVE_HW_BREAKPOINT if PERF_EVENTS select HAVE_IOREMAP_PROT select HAVE_IRQ_TIME_ACCOUNTING select HAVE_KVM + select HAVE_MOD_ARCH_SPECIFIC select HAVE_NMI select HAVE_PERF_EVENTS + select HAVE_PERF_EVENTS_NMI if ARM64_PSEUDO_NMI select HAVE_PERF_REGS select HAVE_PERF_USER_STACK_DUMP select HAVE_PREEMPT_DYNAMIC_KEY @@ -222,6 +227,7 @@ config ARM64 select HAVE_KPROBES select HAVE_KRETPROBES select HAVE_GENERIC_VDSO + select HOTPLUG_CORE_SYNC_DEAD if HOTPLUG_CPU select IRQ_DOMAIN select IRQ_FORCED_THREADING select KASAN_VMALLOC if KASAN @@ -578,7 +584,6 @@ config ARM64_ERRATUM_845719 config ARM64_ERRATUM_843419 bool "Cortex-A53: 843419: A load or store might access an incorrect address" default y - select ARM64_MODULE_PLTS if MODULES help This option links the kernel with '--fix-cortex-a53-843419' and enables PLT support to replace certain ADRP instructions, which can @@ -1586,7 +1591,7 @@ config ARM64_TAGGED_ADDR_ABI When this option is enabled, user applications can opt in to a relaxed ABI via prctl() allowing tagged addresses to be passed to system calls as pointer arguments. For details, see - Documentation/arm64/tagged-address-abi.rst. + Documentation/arch/arm64/tagged-address-abi.rst. menuconfig COMPAT bool "Kernel support for 32-bit EL0" @@ -1620,7 +1625,7 @@ config KUSER_HELPERS the system. This permits binaries to be run on ARMv4 through to ARMv8 without modification. - See Documentation/arm/kernel_user_helpers.rst for details. + See Documentation/arch/arm/kernel_user_helpers.rst for details. However, the fixed address nature of these helpers can be used by ROP (return orientated programming) authors when creating @@ -2048,7 +2053,7 @@ config ARM64_MTE explicitly opt in. The mechanism for the userspace is described in: - Documentation/arm64/memory-tagging-extension.rst. + Documentation/arch/arm64/memory-tagging-extension.rst. endmenu # "ARMv8.5 architectural features" @@ -2108,26 +2113,6 @@ config ARM64_SME register state capable of holding two dimensional matrix tiles to enable various matrix operations. -config ARM64_MODULE_PLTS - bool "Use PLTs to allow module memory to spill over into vmalloc area" - depends on MODULES - select HAVE_MOD_ARCH_SPECIFIC - help - Allocate PLTs when loading modules so that jumps and calls whose - targets are too far away for their relative offsets to be encoded - in the instructions themselves can be bounced via veneers in the - module's PLT. This allows modules to be allocated in the generic - vmalloc area after the dedicated module memory area has been - exhausted. - - When running with address space randomization (KASLR), the module - region itself may be too far away for ordinary relative jumps and - calls, and so in that case, module PLTs are required and cannot be - disabled. - - Specific errata workaround(s) might also force module PLTs to be - enabled (ARM64_ERRATUM_843419). - config ARM64_PSEUDO_NMI bool "Support for NMI-like interrupts" select ARM_GIC_V3 @@ -2168,7 +2153,6 @@ config RELOCATABLE config RANDOMIZE_BASE bool "Randomize the address of the kernel image" - select ARM64_MODULE_PLTS if MODULES select RELOCATABLE help Randomizes the virtual address at which the kernel image is @@ -2199,9 +2183,8 @@ config RANDOMIZE_MODULE_REGION_FULL When this option is not set, the module region will be randomized over a limited range that contains the [_stext, _etext] interval of the core kernel, so branch relocations are almost always in range unless - ARM64_MODULE_PLTS is enabled and the region is exhausted. In this - particular case of region exhaustion, modules might be able to fall - back to a larger 2GB area. + the region is exhausted. In this particular case of region + exhaustion, modules might be able to fall back to a larger 2GB area. config CC_HAVE_STACKPROTECTOR_SYSREG def_bool $(cc-option,-mstack-protector-guard=sysreg -mstack-protector-guard-reg=sp_el0 -mstack-protector-guard-offset=0) diff --git a/arch/arm64/boot/dts/qcom/sc7180-idp.dts b/arch/arm64/boot/dts/qcom/sc7180-idp.dts index 9f052270e090..299ef5dc225a 100644 --- a/arch/arm64/boot/dts/qcom/sc7180-idp.dts +++ b/arch/arm64/boot/dts/qcom/sc7180-idp.dts @@ -393,6 +393,11 @@ qcom,spare-regs = <&tcsr_regs_2 0xb3e4>; }; +&scm { + /* TF-A firmware maps memory cached so mark dma-coherent to match. */ + dma-coherent; +}; + &sdhc_1 { status = "okay"; diff --git a/arch/arm64/boot/dts/qcom/sc7180-trogdor.dtsi b/arch/arm64/boot/dts/qcom/sc7180-trogdor.dtsi index ca6920de7ea8..1472e7f10831 100644 --- a/arch/arm64/boot/dts/qcom/sc7180-trogdor.dtsi +++ b/arch/arm64/boot/dts/qcom/sc7180-trogdor.dtsi @@ -892,6 +892,11 @@ hp_i2c: &i2c9 { qcom,spare-regs = <&tcsr_regs_2 0xb3e4>; }; +&scm { + /* TF-A firmware maps memory cached so mark dma-coherent to match. */ + dma-coherent; +}; + &sdhc_1 { status = "okay"; diff --git a/arch/arm64/boot/dts/qcom/sc7180.dtsi b/arch/arm64/boot/dts/qcom/sc7180.dtsi index f479cab8ab45..a65be760d1a7 100644 --- a/arch/arm64/boot/dts/qcom/sc7180.dtsi +++ b/arch/arm64/boot/dts/qcom/sc7180.dtsi @@ -369,7 +369,7 @@ }; firmware { - scm { + scm: scm { compatible = "qcom,scm-sc7180", "qcom,scm"; }; }; diff --git a/arch/arm64/boot/dts/qcom/sc7280-chrome-common.dtsi b/arch/arm64/boot/dts/qcom/sc7280-chrome-common.dtsi index f562e4d2b655..2e1cd219fc18 100644 --- a/arch/arm64/boot/dts/qcom/sc7280-chrome-common.dtsi +++ b/arch/arm64/boot/dts/qcom/sc7280-chrome-common.dtsi @@ -79,6 +79,11 @@ firmware-name = "ath11k/WCN6750/hw1.0/wpss.mdt"; }; +&scm { + /* TF-A firmware maps memory cached so mark dma-coherent to match. */ + dma-coherent; +}; + &wifi { status = "okay"; diff --git a/arch/arm64/boot/dts/qcom/sc7280.dtsi b/arch/arm64/boot/dts/qcom/sc7280.dtsi index 2fd1d3c0eb34..36f0bb9b3cbb 100644 --- a/arch/arm64/boot/dts/qcom/sc7280.dtsi +++ b/arch/arm64/boot/dts/qcom/sc7280.dtsi @@ -656,7 +656,7 @@ }; firmware { - scm { + scm: scm { compatible = "qcom,scm-sc7280", "qcom,scm"; }; }; diff --git a/arch/arm64/boot/dts/rockchip/rk3308.dtsi b/arch/arm64/boot/dts/rockchip/rk3308.dtsi index dd228a256a32..2ae4bb7d5e62 100644 --- a/arch/arm64/boot/dts/rockchip/rk3308.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3308.dtsi @@ -97,6 +97,7 @@ l2: l2-cache { compatible = "cache"; cache-level = <2>; + cache-unified; }; }; diff --git a/arch/arm64/boot/dts/rockchip/rk3328-rock64.dts b/arch/arm64/boot/dts/rockchip/rk3328-rock64.dts index f69a38f42d2d..0a27fa5271f5 100644 --- a/arch/arm64/boot/dts/rockchip/rk3328-rock64.dts +++ b/arch/arm64/boot/dts/rockchip/rk3328-rock64.dts @@ -37,7 +37,8 @@ vin-supply = <&vcc_io>; }; - vcc_host_5v: vcc-host-5v-regulator { + /* Common enable line for all of the rails mentioned in the labels */ + vcc_host_5v: vcc_host1_5v: vcc_otg_5v: vcc-host-5v-regulator { compatible = "regulator-fixed"; gpio = <&gpio0 RK_PA2 GPIO_ACTIVE_LOW>; pinctrl-names = "default"; @@ -48,17 +49,6 @@ vin-supply = <&vcc_sys>; }; - vcc_host1_5v: vcc_otg_5v: vcc-host1-5v-regulator { - compatible = "regulator-fixed"; - gpio = <&gpio0 RK_PA2 GPIO_ACTIVE_LOW>; - pinctrl-names = "default"; - pinctrl-0 = <&usb20_host_drv>; - regulator-name = "vcc_host1_5v"; - regulator-always-on; - regulator-boot-on; - vin-supply = <&vcc_sys>; - }; - vcc_sys: vcc-sys { compatible = "regulator-fixed"; regulator-name = "vcc_sys"; diff --git a/arch/arm64/boot/dts/rockchip/rk3328.dtsi b/arch/arm64/boot/dts/rockchip/rk3328.dtsi index 6d7a7bf72ac7..e729e7a22b23 100644 --- a/arch/arm64/boot/dts/rockchip/rk3328.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3328.dtsi @@ -103,6 +103,7 @@ l2: l2-cache0 { compatible = "cache"; cache-level = <2>; + cache-unified; }; }; diff --git a/arch/arm64/boot/dts/rockchip/rk3566-soquartz-cm4.dts b/arch/arm64/boot/dts/rockchip/rk3566-soquartz-cm4.dts index 263ce40770dd..cddf6cd2fecb 100644 --- a/arch/arm64/boot/dts/rockchip/rk3566-soquartz-cm4.dts +++ b/arch/arm64/boot/dts/rockchip/rk3566-soquartz-cm4.dts @@ -28,6 +28,16 @@ regulator-max-microvolt = <5000000>; vin-supply = <&vcc12v_dcin>; }; + + vcc_sd_pwr: vcc-sd-pwr-regulator { + compatible = "regulator-fixed"; + regulator-name = "vcc_sd_pwr"; + regulator-always-on; + regulator-boot-on; + regulator-min-microvolt = <3300000>; + regulator-max-microvolt = <3300000>; + vin-supply = <&vcc3v3_sys>; + }; }; /* phy for pcie */ @@ -130,13 +140,7 @@ }; &sdmmc0 { - vmmc-supply = <&sdmmc_pwr>; - status = "okay"; -}; - -&sdmmc_pwr { - regulator-min-microvolt = <3300000>; - regulator-max-microvolt = <3300000>; + vmmc-supply = <&vcc_sd_pwr>; status = "okay"; }; diff --git a/arch/arm64/boot/dts/rockchip/rk3566-soquartz.dtsi b/arch/arm64/boot/dts/rockchip/rk3566-soquartz.dtsi index 102e448bc026..31aa2b8efe39 100644 --- a/arch/arm64/boot/dts/rockchip/rk3566-soquartz.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3566-soquartz.dtsi @@ -104,16 +104,6 @@ regulator-max-microvolt = <3300000>; vin-supply = <&vcc5v0_sys>; }; - - sdmmc_pwr: sdmmc-pwr-regulator { - compatible = "regulator-fixed"; - enable-active-high; - gpio = <&gpio0 RK_PA5 GPIO_ACTIVE_HIGH>; - pinctrl-names = "default"; - pinctrl-0 = <&sdmmc_pwr_h>; - regulator-name = "sdmmc_pwr"; - status = "disabled"; - }; }; &cpu0 { @@ -155,6 +145,19 @@ status = "disabled"; }; +&gpio0 { + nextrst-hog { + gpio-hog; + /* + * GPIO_ACTIVE_LOW + output-low here means that the pin is set + * to high, because output-low decides the value pre-inversion. + */ + gpios = <RK_PA5 GPIO_ACTIVE_LOW>; + line-name = "nEXTRST"; + output-low; + }; +}; + &gpu { mali-supply = <&vdd_gpu>; status = "okay"; @@ -538,12 +541,6 @@ rockchip,pins = <2 RK_PC2 RK_FUNC_GPIO &pcfg_pull_none>; }; }; - - sdmmc-pwr { - sdmmc_pwr_h: sdmmc-pwr-h { - rockchip,pins = <0 RK_PA5 RK_FUNC_GPIO &pcfg_pull_none>; - }; - }; }; &pmu_io_domains { diff --git a/arch/arm64/boot/dts/rockchip/rk3568-nanopi-r5c.dts b/arch/arm64/boot/dts/rockchip/rk3568-nanopi-r5c.dts index f70ca9f0470a..c718b8dbb9c6 100644 --- a/arch/arm64/boot/dts/rockchip/rk3568-nanopi-r5c.dts +++ b/arch/arm64/boot/dts/rockchip/rk3568-nanopi-r5c.dts @@ -106,7 +106,7 @@ rockchip-key { reset_button_pin: reset-button-pin { - rockchip,pins = <4 RK_PA0 RK_FUNC_GPIO &pcfg_pull_up>; + rockchip,pins = <0 RK_PB7 RK_FUNC_GPIO &pcfg_pull_up>; }; }; }; diff --git a/arch/arm64/boot/dts/rockchip/rk3568-nanopi-r5s.dts b/arch/arm64/boot/dts/rockchip/rk3568-nanopi-r5s.dts index 2a1118f15c29..b6ad8328c7eb 100644 --- a/arch/arm64/boot/dts/rockchip/rk3568-nanopi-r5s.dts +++ b/arch/arm64/boot/dts/rockchip/rk3568-nanopi-r5s.dts @@ -134,4 +134,3 @@ }; }; }; - diff --git a/arch/arm64/boot/dts/rockchip/rk3568.dtsi b/arch/arm64/boot/dts/rockchip/rk3568.dtsi index ba67b58f05b7..f1be76a54ceb 100644 --- a/arch/arm64/boot/dts/rockchip/rk3568.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3568.dtsi @@ -94,9 +94,10 @@ power-domains = <&power RK3568_PD_PIPE>; reg = <0x3 0xc0400000 0x0 0x00400000>, <0x0 0xfe270000 0x0 0x00010000>, - <0x3 0x7f000000 0x0 0x01000000>; - ranges = <0x01000000 0x0 0x3ef00000 0x3 0x7ef00000 0x0 0x00100000>, - <0x02000000 0x0 0x00000000 0x3 0x40000000 0x0 0x3ef00000>; + <0x0 0xf2000000 0x0 0x00100000>; + ranges = <0x01000000 0x0 0xf2100000 0x0 0xf2100000 0x0 0x00100000>, + <0x02000000 0x0 0xf2200000 0x0 0xf2200000 0x0 0x01e00000>, + <0x03000000 0x0 0x40000000 0x3 0x40000000 0x0 0x40000000>; reg-names = "dbi", "apb", "config"; resets = <&cru SRST_PCIE30X1_POWERUP>; reset-names = "pipe"; @@ -146,9 +147,10 @@ power-domains = <&power RK3568_PD_PIPE>; reg = <0x3 0xc0800000 0x0 0x00400000>, <0x0 0xfe280000 0x0 0x00010000>, - <0x3 0xbf000000 0x0 0x01000000>; - ranges = <0x01000000 0x0 0x3ef00000 0x3 0xbef00000 0x0 0x00100000>, - <0x02000000 0x0 0x00000000 0x3 0x80000000 0x0 0x3ef00000>; + <0x0 0xf0000000 0x0 0x00100000>; + ranges = <0x01000000 0x0 0xf0100000 0x0 0xf0100000 0x0 0x00100000>, + <0x02000000 0x0 0xf0200000 0x0 0xf0200000 0x0 0x01e00000>, + <0x03000000 0x0 0x40000000 0x3 0x80000000 0x0 0x40000000>; reg-names = "dbi", "apb", "config"; resets = <&cru SRST_PCIE30X2_POWERUP>; reset-names = "pipe"; diff --git a/arch/arm64/boot/dts/rockchip/rk356x.dtsi b/arch/arm64/boot/dts/rockchip/rk356x.dtsi index f62e0fd881a9..61680c7ac489 100644 --- a/arch/arm64/boot/dts/rockchip/rk356x.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk356x.dtsi @@ -952,7 +952,7 @@ compatible = "rockchip,rk3568-pcie"; reg = <0x3 0xc0000000 0x0 0x00400000>, <0x0 0xfe260000 0x0 0x00010000>, - <0x3 0x3f000000 0x0 0x01000000>; + <0x0 0xf4000000 0x0 0x00100000>; reg-names = "dbi", "apb", "config"; interrupts = <GIC_SPI 75 IRQ_TYPE_LEVEL_HIGH>, <GIC_SPI 74 IRQ_TYPE_LEVEL_HIGH>, @@ -982,8 +982,9 @@ phys = <&combphy2 PHY_TYPE_PCIE>; phy-names = "pcie-phy"; power-domains = <&power RK3568_PD_PIPE>; - ranges = <0x01000000 0x0 0x3ef00000 0x3 0x3ef00000 0x0 0x00100000 - 0x02000000 0x0 0x00000000 0x3 0x00000000 0x0 0x3ef00000>; + ranges = <0x01000000 0x0 0xf4100000 0x0 0xf4100000 0x0 0x00100000>, + <0x02000000 0x0 0xf4200000 0x0 0xf4200000 0x0 0x01e00000>, + <0x03000000 0x0 0x40000000 0x3 0x00000000 0x0 0x40000000>; resets = <&cru SRST_PCIE20_POWERUP>; reset-names = "pipe"; #address-cells = <3>; diff --git a/arch/arm64/boot/dts/rockchip/rk3588s.dtsi b/arch/arm64/boot/dts/rockchip/rk3588s.dtsi index 657c019d27fa..a3124bd2e092 100644 --- a/arch/arm64/boot/dts/rockchip/rk3588s.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3588s.dtsi @@ -229,6 +229,7 @@ cache-line-size = <64>; cache-sets = <512>; cache-level = <2>; + cache-unified; next-level-cache = <&l3_cache>; }; @@ -238,6 +239,7 @@ cache-line-size = <64>; cache-sets = <512>; cache-level = <2>; + cache-unified; next-level-cache = <&l3_cache>; }; @@ -247,6 +249,7 @@ cache-line-size = <64>; cache-sets = <512>; cache-level = <2>; + cache-unified; next-level-cache = <&l3_cache>; }; @@ -256,6 +259,7 @@ cache-line-size = <64>; cache-sets = <512>; cache-level = <2>; + cache-unified; next-level-cache = <&l3_cache>; }; @@ -265,6 +269,7 @@ cache-line-size = <64>; cache-sets = <1024>; cache-level = <2>; + cache-unified; next-level-cache = <&l3_cache>; }; @@ -274,6 +279,7 @@ cache-line-size = <64>; cache-sets = <1024>; cache-level = <2>; + cache-unified; next-level-cache = <&l3_cache>; }; @@ -283,6 +289,7 @@ cache-line-size = <64>; cache-sets = <1024>; cache-level = <2>; + cache-unified; next-level-cache = <&l3_cache>; }; @@ -292,6 +299,7 @@ cache-line-size = <64>; cache-sets = <1024>; cache-level = <2>; + cache-unified; next-level-cache = <&l3_cache>; }; @@ -301,6 +309,7 @@ cache-line-size = <64>; cache-sets = <4096>; cache-level = <3>; + cache-unified; }; }; diff --git a/arch/arm64/hyperv/mshyperv.c b/arch/arm64/hyperv/mshyperv.c index a406454578f0..f1b8a04ee9f2 100644 --- a/arch/arm64/hyperv/mshyperv.c +++ b/arch/arm64/hyperv/mshyperv.c @@ -67,7 +67,7 @@ static int __init hyperv_init(void) if (ret) return ret; - ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "arm64/hyperv_init:online", + ret = cpuhp_setup_state(CPUHP_AP_HYPERV_ONLINE, "arm64/hyperv_init:online", hv_common_cpu_init, hv_common_cpu_die); if (ret < 0) { hv_common_free(); diff --git a/arch/arm64/include/asm/alternative-macros.h b/arch/arm64/include/asm/alternative-macros.h index bdf1f6bcd010..94b486192e1f 100644 --- a/arch/arm64/include/asm/alternative-macros.h +++ b/arch/arm64/include/asm/alternative-macros.h @@ -23,17 +23,17 @@ #include <linux/stringify.h> -#define ALTINSTR_ENTRY(feature) \ +#define ALTINSTR_ENTRY(cpucap) \ " .word 661b - .\n" /* label */ \ " .word 663f - .\n" /* new instruction */ \ - " .hword " __stringify(feature) "\n" /* feature bit */ \ + " .hword " __stringify(cpucap) "\n" /* cpucap */ \ " .byte 662b-661b\n" /* source len */ \ " .byte 664f-663f\n" /* replacement len */ -#define ALTINSTR_ENTRY_CB(feature, cb) \ +#define ALTINSTR_ENTRY_CB(cpucap, cb) \ " .word 661b - .\n" /* label */ \ - " .word " __stringify(cb) "- .\n" /* callback */ \ - " .hword " __stringify(feature) "\n" /* feature bit */ \ + " .word " __stringify(cb) "- .\n" /* callback */ \ + " .hword " __stringify(cpucap) "\n" /* cpucap */ \ " .byte 662b-661b\n" /* source len */ \ " .byte 664f-663f\n" /* replacement len */ @@ -53,13 +53,13 @@ * * Alternatives with callbacks do not generate replacement instructions. */ -#define __ALTERNATIVE_CFG(oldinstr, newinstr, feature, cfg_enabled) \ +#define __ALTERNATIVE_CFG(oldinstr, newinstr, cpucap, cfg_enabled) \ ".if "__stringify(cfg_enabled)" == 1\n" \ "661:\n\t" \ oldinstr "\n" \ "662:\n" \ ".pushsection .altinstructions,\"a\"\n" \ - ALTINSTR_ENTRY(feature) \ + ALTINSTR_ENTRY(cpucap) \ ".popsection\n" \ ".subsection 1\n" \ "663:\n\t" \ @@ -70,31 +70,31 @@ ".previous\n" \ ".endif\n" -#define __ALTERNATIVE_CFG_CB(oldinstr, feature, cfg_enabled, cb) \ +#define __ALTERNATIVE_CFG_CB(oldinstr, cpucap, cfg_enabled, cb) \ ".if "__stringify(cfg_enabled)" == 1\n" \ "661:\n\t" \ oldinstr "\n" \ "662:\n" \ ".pushsection .altinstructions,\"a\"\n" \ - ALTINSTR_ENTRY_CB(feature, cb) \ + ALTINSTR_ENTRY_CB(cpucap, cb) \ ".popsection\n" \ "663:\n\t" \ "664:\n\t" \ ".endif\n" -#define _ALTERNATIVE_CFG(oldinstr, newinstr, feature, cfg, ...) \ - __ALTERNATIVE_CFG(oldinstr, newinstr, feature, IS_ENABLED(cfg)) +#define _ALTERNATIVE_CFG(oldinstr, newinstr, cpucap, cfg, ...) \ + __ALTERNATIVE_CFG(oldinstr, newinstr, cpucap, IS_ENABLED(cfg)) -#define ALTERNATIVE_CB(oldinstr, feature, cb) \ - __ALTERNATIVE_CFG_CB(oldinstr, (1 << ARM64_CB_SHIFT) | (feature), 1, cb) +#define ALTERNATIVE_CB(oldinstr, cpucap, cb) \ + __ALTERNATIVE_CFG_CB(oldinstr, (1 << ARM64_CB_SHIFT) | (cpucap), 1, cb) #else #include <asm/assembler.h> -.macro altinstruction_entry orig_offset alt_offset feature orig_len alt_len +.macro altinstruction_entry orig_offset alt_offset cpucap orig_len alt_len .word \orig_offset - . .word \alt_offset - . - .hword (\feature) + .hword (\cpucap) .byte \orig_len .byte \alt_len .endm @@ -210,9 +210,9 @@ alternative_endif #endif /* __ASSEMBLY__ */ /* - * Usage: asm(ALTERNATIVE(oldinstr, newinstr, feature)); + * Usage: asm(ALTERNATIVE(oldinstr, newinstr, cpucap)); * - * Usage: asm(ALTERNATIVE(oldinstr, newinstr, feature, CONFIG_FOO)); + * Usage: asm(ALTERNATIVE(oldinstr, newinstr, cpucap, CONFIG_FOO)); * N.B. If CONFIG_FOO is specified, but not selected, the whole block * will be omitted, including oldinstr. */ @@ -224,15 +224,15 @@ alternative_endif #include <linux/types.h> static __always_inline bool -alternative_has_feature_likely(const unsigned long feature) +alternative_has_cap_likely(const unsigned long cpucap) { - compiletime_assert(feature < ARM64_NCAPS, - "feature must be < ARM64_NCAPS"); + compiletime_assert(cpucap < ARM64_NCAPS, + "cpucap must be < ARM64_NCAPS"); asm_volatile_goto( - ALTERNATIVE_CB("b %l[l_no]", %[feature], alt_cb_patch_nops) + ALTERNATIVE_CB("b %l[l_no]", %[cpucap], alt_cb_patch_nops) : - : [feature] "i" (feature) + : [cpucap] "i" (cpucap) : : l_no); @@ -242,15 +242,15 @@ l_no: } static __always_inline bool -alternative_has_feature_unlikely(const unsigned long feature) +alternative_has_cap_unlikely(const unsigned long cpucap) { - compiletime_assert(feature < ARM64_NCAPS, - "feature must be < ARM64_NCAPS"); + compiletime_assert(cpucap < ARM64_NCAPS, + "cpucap must be < ARM64_NCAPS"); asm_volatile_goto( - ALTERNATIVE("nop", "b %l[l_yes]", %[feature]) + ALTERNATIVE("nop", "b %l[l_yes]", %[cpucap]) : - : [feature] "i" (feature) + : [cpucap] "i" (cpucap) : : l_yes); diff --git a/arch/arm64/include/asm/alternative.h b/arch/arm64/include/asm/alternative.h index a38b92e11811..00d97b8a757f 100644 --- a/arch/arm64/include/asm/alternative.h +++ b/arch/arm64/include/asm/alternative.h @@ -13,7 +13,7 @@ struct alt_instr { s32 orig_offset; /* offset to original instruction */ s32 alt_offset; /* offset to replacement instruction */ - u16 cpufeature; /* cpufeature bit set for replacement */ + u16 cpucap; /* cpucap bit set for replacement */ u8 orig_len; /* size of original instruction(s) */ u8 alt_len; /* size of new instruction(s), <= orig_len */ }; @@ -23,7 +23,7 @@ typedef void (*alternative_cb_t)(struct alt_instr *alt, void __init apply_boot_alternatives(void); void __init apply_alternatives_all(void); -bool alternative_is_applied(u16 cpufeature); +bool alternative_is_applied(u16 cpucap); #ifdef CONFIG_MODULES void apply_alternatives_module(void *start, size_t length); @@ -31,5 +31,8 @@ void apply_alternatives_module(void *start, size_t length); static inline void apply_alternatives_module(void *start, size_t length) { } #endif +void alt_cb_patch_nops(struct alt_instr *alt, __le32 *origptr, + __le32 *updptr, int nr_inst); + #endif /* __ASSEMBLY__ */ #endif /* __ASM_ALTERNATIVE_H */ diff --git a/arch/arm64/include/asm/arch_timer.h b/arch/arm64/include/asm/arch_timer.h index af1fafbe7e1d..934c658ee947 100644 --- a/arch/arm64/include/asm/arch_timer.h +++ b/arch/arm64/include/asm/arch_timer.h @@ -88,13 +88,7 @@ static inline notrace u64 arch_timer_read_cntvct_el0(void) #define arch_timer_reg_read_stable(reg) \ ({ \ - u64 _val; \ - \ - preempt_disable_notrace(); \ - _val = erratum_handler(read_ ## reg)(); \ - preempt_enable_notrace(); \ - \ - _val; \ + erratum_handler(read_ ## reg)(); \ }) /* diff --git a/arch/arm64/include/asm/archrandom.h b/arch/arm64/include/asm/archrandom.h index 2f5f3da34782..b0abc64f86b0 100644 --- a/arch/arm64/include/asm/archrandom.h +++ b/arch/arm64/include/asm/archrandom.h @@ -129,4 +129,6 @@ static inline bool __init __early_cpu_has_rndr(void) return (ftr >> ID_AA64ISAR0_EL1_RNDR_SHIFT) & 0xf; } +u64 kaslr_early_init(void *fdt); + #endif /* _ASM_ARCHRANDOM_H */ diff --git a/arch/arm64/include/asm/asm-uaccess.h b/arch/arm64/include/asm/asm-uaccess.h index 75b211c98dea..5b6efe8abeeb 100644 --- a/arch/arm64/include/asm/asm-uaccess.h +++ b/arch/arm64/include/asm/asm-uaccess.h @@ -18,7 +18,6 @@ bic \tmp1, \tmp1, #TTBR_ASID_MASK sub \tmp1, \tmp1, #RESERVED_SWAPPER_OFFSET // reserved_pg_dir msr ttbr0_el1, \tmp1 // set reserved TTBR0_EL1 - isb add \tmp1, \tmp1, #RESERVED_SWAPPER_OFFSET msr ttbr1_el1, \tmp1 // set reserved ASID isb @@ -31,7 +30,6 @@ extr \tmp2, \tmp2, \tmp1, #48 ror \tmp2, \tmp2, #16 msr ttbr1_el1, \tmp2 // set the active ASID - isb msr ttbr0_el1, \tmp1 // set the non-PAN TTBR0_EL1 isb .endm diff --git a/arch/arm64/include/asm/atomic.h b/arch/arm64/include/asm/atomic.h index c9979273d389..400d279e0f8d 100644 --- a/arch/arm64/include/asm/atomic.h +++ b/arch/arm64/include/asm/atomic.h @@ -142,24 +142,6 @@ static __always_inline long arch_atomic64_dec_if_positive(atomic64_t *v) #define arch_atomic_fetch_xor_release arch_atomic_fetch_xor_release #define arch_atomic_fetch_xor arch_atomic_fetch_xor -#define arch_atomic_xchg_relaxed(v, new) \ - arch_xchg_relaxed(&((v)->counter), (new)) -#define arch_atomic_xchg_acquire(v, new) \ - arch_xchg_acquire(&((v)->counter), (new)) -#define arch_atomic_xchg_release(v, new) \ - arch_xchg_release(&((v)->counter), (new)) -#define arch_atomic_xchg(v, new) \ - arch_xchg(&((v)->counter), (new)) - -#define arch_atomic_cmpxchg_relaxed(v, old, new) \ - arch_cmpxchg_relaxed(&((v)->counter), (old), (new)) -#define arch_atomic_cmpxchg_acquire(v, old, new) \ - arch_cmpxchg_acquire(&((v)->counter), (old), (new)) -#define arch_atomic_cmpxchg_release(v, old, new) \ - arch_cmpxchg_release(&((v)->counter), (old), (new)) -#define arch_atomic_cmpxchg(v, old, new) \ - arch_cmpxchg(&((v)->counter), (old), (new)) - #define arch_atomic_andnot arch_atomic_andnot /* @@ -209,16 +191,6 @@ static __always_inline long arch_atomic64_dec_if_positive(atomic64_t *v) #define arch_atomic64_fetch_xor_release arch_atomic64_fetch_xor_release #define arch_atomic64_fetch_xor arch_atomic64_fetch_xor -#define arch_atomic64_xchg_relaxed arch_atomic_xchg_relaxed -#define arch_atomic64_xchg_acquire arch_atomic_xchg_acquire -#define arch_atomic64_xchg_release arch_atomic_xchg_release -#define arch_atomic64_xchg arch_atomic_xchg - -#define arch_atomic64_cmpxchg_relaxed arch_atomic_cmpxchg_relaxed -#define arch_atomic64_cmpxchg_acquire arch_atomic_cmpxchg_acquire -#define arch_atomic64_cmpxchg_release arch_atomic_cmpxchg_release -#define arch_atomic64_cmpxchg arch_atomic_cmpxchg - #define arch_atomic64_andnot arch_atomic64_andnot #define arch_atomic64_dec_if_positive arch_atomic64_dec_if_positive diff --git a/arch/arm64/include/asm/atomic_ll_sc.h b/arch/arm64/include/asm/atomic_ll_sc.h index cbb3d961123b..89d2ba272359 100644 --- a/arch/arm64/include/asm/atomic_ll_sc.h +++ b/arch/arm64/include/asm/atomic_ll_sc.h @@ -294,38 +294,46 @@ __CMPXCHG_CASE( , , mb_, 64, dmb ish, , l, "memory", L) #undef __CMPXCHG_CASE -#define __CMPXCHG_DBL(name, mb, rel, cl) \ -static __always_inline long \ -__ll_sc__cmpxchg_double##name(unsigned long old1, \ - unsigned long old2, \ - unsigned long new1, \ - unsigned long new2, \ - volatile void *ptr) \ +union __u128_halves { + u128 full; + struct { + u64 low, high; + }; +}; + +#define __CMPXCHG128(name, mb, rel, cl...) \ +static __always_inline u128 \ +__ll_sc__cmpxchg128##name(volatile u128 *ptr, u128 old, u128 new) \ { \ - unsigned long tmp, ret; \ + union __u128_halves r, o = { .full = (old) }, \ + n = { .full = (new) }; \ + unsigned int tmp; \ \ - asm volatile("// __cmpxchg_double" #name "\n" \ - " prfm pstl1strm, %2\n" \ - "1: ldxp %0, %1, %2\n" \ - " eor %0, %0, %3\n" \ - " eor %1, %1, %4\n" \ - " orr %1, %0, %1\n" \ - " cbnz %1, 2f\n" \ - " st" #rel "xp %w0, %5, %6, %2\n" \ - " cbnz %w0, 1b\n" \ + asm volatile("// __cmpxchg128" #name "\n" \ + " prfm pstl1strm, %[v]\n" \ + "1: ldxp %[rl], %[rh], %[v]\n" \ + " cmp %[rl], %[ol]\n" \ + " ccmp %[rh], %[oh], 0, eq\n" \ + " b.ne 2f\n" \ + " st" #rel "xp %w[tmp], %[nl], %[nh], %[v]\n" \ + " cbnz %w[tmp], 1b\n" \ " " #mb "\n" \ "2:" \ - : "=&r" (tmp), "=&r" (ret), "+Q" (*(__uint128_t *)ptr) \ - : "r" (old1), "r" (old2), "r" (new1), "r" (new2) \ - : cl); \ + : [v] "+Q" (*(u128 *)ptr), \ + [rl] "=&r" (r.low), [rh] "=&r" (r.high), \ + [tmp] "=&r" (tmp) \ + : [ol] "r" (o.low), [oh] "r" (o.high), \ + [nl] "r" (n.low), [nh] "r" (n.high) \ + : "cc", ##cl); \ \ - return ret; \ + return r.full; \ } -__CMPXCHG_DBL( , , , ) -__CMPXCHG_DBL(_mb, dmb ish, l, "memory") +__CMPXCHG128( , , ) +__CMPXCHG128(_mb, dmb ish, l, "memory") + +#undef __CMPXCHG128 -#undef __CMPXCHG_DBL #undef K #endif /* __ASM_ATOMIC_LL_SC_H */ diff --git a/arch/arm64/include/asm/atomic_lse.h b/arch/arm64/include/asm/atomic_lse.h index 319958b95cfd..87f568a94e55 100644 --- a/arch/arm64/include/asm/atomic_lse.h +++ b/arch/arm64/include/asm/atomic_lse.h @@ -281,40 +281,35 @@ __CMPXCHG_CASE(x, , mb_, 64, al, "memory") #undef __CMPXCHG_CASE -#define __CMPXCHG_DBL(name, mb, cl...) \ -static __always_inline long \ -__lse__cmpxchg_double##name(unsigned long old1, \ - unsigned long old2, \ - unsigned long new1, \ - unsigned long new2, \ - volatile void *ptr) \ +#define __CMPXCHG128(name, mb, cl...) \ +static __always_inline u128 \ +__lse__cmpxchg128##name(volatile u128 *ptr, u128 old, u128 new) \ { \ - unsigned long oldval1 = old1; \ - unsigned long oldval2 = old2; \ - register unsigned long x0 asm ("x0") = old1; \ - register unsigned long x1 asm ("x1") = old2; \ - register unsigned long x2 asm ("x2") = new1; \ - register unsigned long x3 asm ("x3") = new2; \ + union __u128_halves r, o = { .full = (old) }, \ + n = { .full = (new) }; \ + register unsigned long x0 asm ("x0") = o.low; \ + register unsigned long x1 asm ("x1") = o.high; \ + register unsigned long x2 asm ("x2") = n.low; \ + register unsigned long x3 asm ("x3") = n.high; \ register unsigned long x4 asm ("x4") = (unsigned long)ptr; \ \ asm volatile( \ __LSE_PREAMBLE \ " casp" #mb "\t%[old1], %[old2], %[new1], %[new2], %[v]\n"\ - " eor %[old1], %[old1], %[oldval1]\n" \ - " eor %[old2], %[old2], %[oldval2]\n" \ - " orr %[old1], %[old1], %[old2]" \ : [old1] "+&r" (x0), [old2] "+&r" (x1), \ - [v] "+Q" (*(__uint128_t *)ptr) \ + [v] "+Q" (*(u128 *)ptr) \ : [new1] "r" (x2), [new2] "r" (x3), [ptr] "r" (x4), \ - [oldval1] "r" (oldval1), [oldval2] "r" (oldval2) \ + [oldval1] "r" (o.low), [oldval2] "r" (o.high) \ : cl); \ \ - return x0; \ + r.low = x0; r.high = x1; \ + \ + return r.full; \ } -__CMPXCHG_DBL( , ) -__CMPXCHG_DBL(_mb, al, "memory") +__CMPXCHG128( , ) +__CMPXCHG128(_mb, al, "memory") -#undef __CMPXCHG_DBL +#undef __CMPXCHG128 #endif /* __ASM_ATOMIC_LSE_H */ diff --git a/arch/arm64/include/asm/cache.h b/arch/arm64/include/asm/cache.h index a51e6e8f3171..ceb368d33bf4 100644 --- a/arch/arm64/include/asm/cache.h +++ b/arch/arm64/include/asm/cache.h @@ -33,6 +33,7 @@ * the CPU. */ #define ARCH_DMA_MINALIGN (128) +#define ARCH_KMALLOC_MINALIGN (8) #ifndef __ASSEMBLY__ @@ -90,6 +91,8 @@ static inline int cache_line_size_of_cpu(void) int cache_line_size(void); +#define dma_get_cache_alignment cache_line_size + /* * Read the effective value of CTR_EL0. * diff --git a/arch/arm64/include/asm/cmpxchg.h b/arch/arm64/include/asm/cmpxchg.h index c6bc5d8ec3ca..d7a540736741 100644 --- a/arch/arm64/include/asm/cmpxchg.h +++ b/arch/arm64/include/asm/cmpxchg.h @@ -130,21 +130,18 @@ __CMPXCHG_CASE(mb_, 64) #undef __CMPXCHG_CASE -#define __CMPXCHG_DBL(name) \ -static inline long __cmpxchg_double##name(unsigned long old1, \ - unsigned long old2, \ - unsigned long new1, \ - unsigned long new2, \ - volatile void *ptr) \ +#define __CMPXCHG128(name) \ +static inline u128 __cmpxchg128##name(volatile u128 *ptr, \ + u128 old, u128 new) \ { \ - return __lse_ll_sc_body(_cmpxchg_double##name, \ - old1, old2, new1, new2, ptr); \ + return __lse_ll_sc_body(_cmpxchg128##name, \ + ptr, old, new); \ } -__CMPXCHG_DBL( ) -__CMPXCHG_DBL(_mb) +__CMPXCHG128( ) +__CMPXCHG128(_mb) -#undef __CMPXCHG_DBL +#undef __CMPXCHG128 #define __CMPXCHG_GEN(sfx) \ static __always_inline unsigned long __cmpxchg##sfx(volatile void *ptr, \ @@ -198,34 +195,17 @@ __CMPXCHG_GEN(_mb) #define arch_cmpxchg64 arch_cmpxchg #define arch_cmpxchg64_local arch_cmpxchg_local -/* cmpxchg_double */ -#define system_has_cmpxchg_double() 1 - -#define __cmpxchg_double_check(ptr1, ptr2) \ -({ \ - if (sizeof(*(ptr1)) != 8) \ - BUILD_BUG(); \ - VM_BUG_ON((unsigned long *)(ptr2) - (unsigned long *)(ptr1) != 1); \ -}) +/* cmpxchg128 */ +#define system_has_cmpxchg128() 1 -#define arch_cmpxchg_double(ptr1, ptr2, o1, o2, n1, n2) \ +#define arch_cmpxchg128(ptr, o, n) \ ({ \ - int __ret; \ - __cmpxchg_double_check(ptr1, ptr2); \ - __ret = !__cmpxchg_double_mb((unsigned long)(o1), (unsigned long)(o2), \ - (unsigned long)(n1), (unsigned long)(n2), \ - ptr1); \ - __ret; \ + __cmpxchg128_mb((ptr), (o), (n)); \ }) -#define arch_cmpxchg_double_local(ptr1, ptr2, o1, o2, n1, n2) \ +#define arch_cmpxchg128_local(ptr, o, n) \ ({ \ - int __ret; \ - __cmpxchg_double_check(ptr1, ptr2); \ - __ret = !__cmpxchg_double((unsigned long)(o1), (unsigned long)(o2), \ - (unsigned long)(n1), (unsigned long)(n2), \ - ptr1); \ - __ret; \ + __cmpxchg128((ptr), (o), (n)); \ }) #define __CMPWAIT_CASE(w, sfx, sz) \ diff --git a/arch/arm64/include/asm/compat.h b/arch/arm64/include/asm/compat.h index 74575c3d6987..ae904a1ad529 100644 --- a/arch/arm64/include/asm/compat.h +++ b/arch/arm64/include/asm/compat.h @@ -96,6 +96,8 @@ static inline int is_compat_thread(struct thread_info *thread) return test_ti_thread_flag(thread, TIF_32BIT); } +long compat_arm_syscall(struct pt_regs *regs, int scno); + #else /* !CONFIG_COMPAT */ static inline int is_compat_thread(struct thread_info *thread) diff --git a/arch/arm64/include/asm/cpu.h b/arch/arm64/include/asm/cpu.h index fd7a92219eea..e749838b9c5d 100644 --- a/arch/arm64/include/asm/cpu.h +++ b/arch/arm64/include/asm/cpu.h @@ -56,6 +56,7 @@ struct cpuinfo_arm64 { u64 reg_id_aa64mmfr0; u64 reg_id_aa64mmfr1; u64 reg_id_aa64mmfr2; + u64 reg_id_aa64mmfr3; u64 reg_id_aa64pfr0; u64 reg_id_aa64pfr1; u64 reg_id_aa64zfr0; diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index 6bf013fb110d..7a95c324e52a 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -107,7 +107,7 @@ extern struct arm64_ftr_reg arm64_ftr_reg_ctrel0; * CPU capabilities: * * We use arm64_cpu_capabilities to represent system features, errata work - * arounds (both used internally by kernel and tracked in cpu_hwcaps) and + * arounds (both used internally by kernel and tracked in system_cpucaps) and * ELF HWCAPs (which are exposed to user). * * To support systems with heterogeneous CPUs, we need to make sure that we @@ -419,12 +419,12 @@ static __always_inline bool is_hyp_code(void) return is_vhe_hyp_code() || is_nvhe_hyp_code(); } -extern DECLARE_BITMAP(cpu_hwcaps, ARM64_NCAPS); +extern DECLARE_BITMAP(system_cpucaps, ARM64_NCAPS); -extern DECLARE_BITMAP(boot_capabilities, ARM64_NCAPS); +extern DECLARE_BITMAP(boot_cpucaps, ARM64_NCAPS); #define for_each_available_cap(cap) \ - for_each_set_bit(cap, cpu_hwcaps, ARM64_NCAPS) + for_each_set_bit(cap, system_cpucaps, ARM64_NCAPS) bool this_cpu_has_cap(unsigned int cap); void cpu_set_feature(unsigned int num); @@ -437,7 +437,7 @@ unsigned long cpu_get_elf_hwcap2(void); static __always_inline bool system_capabilities_finalized(void) { - return alternative_has_feature_likely(ARM64_ALWAYS_SYSTEM); + return alternative_has_cap_likely(ARM64_ALWAYS_SYSTEM); } /* @@ -449,7 +449,7 @@ static __always_inline bool cpus_have_cap(unsigned int num) { if (num >= ARM64_NCAPS) return false; - return arch_test_bit(num, cpu_hwcaps); + return arch_test_bit(num, system_cpucaps); } /* @@ -464,7 +464,7 @@ static __always_inline bool __cpus_have_const_cap(int num) { if (num >= ARM64_NCAPS) return false; - return alternative_has_feature_unlikely(num); + return alternative_has_cap_unlikely(num); } /* @@ -504,16 +504,6 @@ static __always_inline bool cpus_have_const_cap(int num) return cpus_have_cap(num); } -static inline void cpus_set_cap(unsigned int num) -{ - if (num >= ARM64_NCAPS) { - pr_warn("Attempt to set an illegal CPU capability (%d >= %d)\n", - num, ARM64_NCAPS); - } else { - __set_bit(num, cpu_hwcaps); - } -} - static inline int __attribute_const__ cpuid_feature_extract_signed_field_width(u64 features, int field, int width) { diff --git a/arch/arm64/include/asm/efi.h b/arch/arm64/include/asm/efi.h index f86b157a5da3..4cf2cb053bc8 100644 --- a/arch/arm64/include/asm/efi.h +++ b/arch/arm64/include/asm/efi.h @@ -88,7 +88,7 @@ efi_status_t __efi_rt_asm_wrapper(void *, const char *, ...); * guaranteed to cover the kernel Image. * * Since the EFI stub is part of the kernel Image, we can relax the - * usual requirements in Documentation/arm64/booting.rst, which still + * usual requirements in Documentation/arch/arm64/booting.rst, which still * apply to other bootloaders, and are required for some kernel * configurations. */ @@ -166,4 +166,6 @@ static inline void efi_capsule_flush_cache_range(void *addr, int size) dcache_clean_inval_poc((unsigned long)addr, (unsigned long)addr + size); } +efi_status_t efi_handle_corrupted_x18(efi_status_t s, const char *f); + #endif /* _ASM_EFI_H */ diff --git a/arch/arm64/include/asm/el2_setup.h b/arch/arm64/include/asm/el2_setup.h index 037724b19c5c..f4c3d30bf746 100644 --- a/arch/arm64/include/asm/el2_setup.h +++ b/arch/arm64/include/asm/el2_setup.h @@ -22,6 +22,15 @@ isb .endm +.macro __init_el2_hcrx + mrs x0, id_aa64mmfr1_el1 + ubfx x0, x0, #ID_AA64MMFR1_EL1_HCX_SHIFT, #4 + cbz x0, .Lskip_hcrx_\@ + mov_q x0, HCRX_HOST_FLAGS + msr_s SYS_HCRX_EL2, x0 +.Lskip_hcrx_\@: +.endm + /* * Allow Non-secure EL1 and EL0 to access physical timer and counter. * This is not necessary for VHE, since the host kernel runs in EL2, @@ -69,7 +78,7 @@ cbz x0, .Lskip_trace_\@ // Skip if TraceBuffer is not present mrs_s x0, SYS_TRBIDR_EL1 - and x0, x0, TRBIDR_PROG + and x0, x0, TRBIDR_EL1_P cbnz x0, .Lskip_trace_\@ // If TRBE is available at EL2 mov x0, #(MDCR_EL2_E2TB_MASK << MDCR_EL2_E2TB_SHIFT) @@ -150,12 +159,21 @@ mov x0, xzr mrs x1, id_aa64pfr1_el1 ubfx x1, x1, #ID_AA64PFR1_EL1_SME_SHIFT, #4 - cbz x1, .Lset_fgt_\@ + cbz x1, .Lset_pie_fgt_\@ /* Disable nVHE traps of TPIDR2 and SMPRI */ orr x0, x0, #HFGxTR_EL2_nSMPRI_EL1_MASK orr x0, x0, #HFGxTR_EL2_nTPIDR2_EL0_MASK +.Lset_pie_fgt_\@: + mrs_s x1, SYS_ID_AA64MMFR3_EL1 + ubfx x1, x1, #ID_AA64MMFR3_EL1_S1PIE_SHIFT, #4 + cbz x1, .Lset_fgt_\@ + + /* Disable trapping of PIR_EL1 / PIRE0_EL1 */ + orr x0, x0, #HFGxTR_EL2_nPIR_EL1 + orr x0, x0, #HFGxTR_EL2_nPIRE0_EL1 + .Lset_fgt_\@: msr_s SYS_HFGRTR_EL2, x0 msr_s SYS_HFGWTR_EL2, x0 @@ -184,6 +202,7 @@ */ .macro init_el2_state __init_el2_sctlr + __init_el2_hcrx __init_el2_timers __init_el2_debug __init_el2_lor @@ -284,14 +303,6 @@ cbz x1, .Lskip_sme_\@ msr_s SYS_SMPRIMAP_EL2, xzr // Make all priorities equal - - mrs x1, id_aa64mmfr1_el1 // HCRX_EL2 present? - ubfx x1, x1, #ID_AA64MMFR1_EL1_HCX_SHIFT, #4 - cbz x1, .Lskip_sme_\@ - - mrs_s x1, SYS_HCRX_EL2 - orr x1, x1, #HCRX_EL2_SMPME_MASK // Enable priority mapping - msr_s SYS_HCRX_EL2, x1 .Lskip_sme_\@: .endm diff --git a/arch/arm64/include/asm/esr.h b/arch/arm64/include/asm/esr.h index 8487aec9b658..ae35939f395b 100644 --- a/arch/arm64/include/asm/esr.h +++ b/arch/arm64/include/asm/esr.h @@ -47,7 +47,7 @@ #define ESR_ELx_EC_DABT_LOW (0x24) #define ESR_ELx_EC_DABT_CUR (0x25) #define ESR_ELx_EC_SP_ALIGN (0x26) -/* Unallocated EC: 0x27 */ +#define ESR_ELx_EC_MOPS (0x27) #define ESR_ELx_EC_FP_EXC32 (0x28) /* Unallocated EC: 0x29 - 0x2B */ #define ESR_ELx_EC_FP_EXC64 (0x2C) @@ -75,8 +75,11 @@ #define ESR_ELx_IL_SHIFT (25) #define ESR_ELx_IL (UL(1) << ESR_ELx_IL_SHIFT) -#define ESR_ELx_ISS_MASK (ESR_ELx_IL - 1) +#define ESR_ELx_ISS_MASK (GENMASK(24, 0)) #define ESR_ELx_ISS(esr) ((esr) & ESR_ELx_ISS_MASK) +#define ESR_ELx_ISS2_SHIFT (32) +#define ESR_ELx_ISS2_MASK (GENMASK_ULL(55, 32)) +#define ESR_ELx_ISS2(esr) (((esr) & ESR_ELx_ISS2_MASK) >> ESR_ELx_ISS2_SHIFT) /* ISS field definitions shared by different classes */ #define ESR_ELx_WNR_SHIFT (6) @@ -140,6 +143,20 @@ #define ESR_ELx_CM_SHIFT (8) #define ESR_ELx_CM (UL(1) << ESR_ELx_CM_SHIFT) +/* ISS2 field definitions for Data Aborts */ +#define ESR_ELx_TnD_SHIFT (10) +#define ESR_ELx_TnD (UL(1) << ESR_ELx_TnD_SHIFT) +#define ESR_ELx_TagAccess_SHIFT (9) +#define ESR_ELx_TagAccess (UL(1) << ESR_ELx_TagAccess_SHIFT) +#define ESR_ELx_GCS_SHIFT (8) +#define ESR_ELx_GCS (UL(1) << ESR_ELx_GCS_SHIFT) +#define ESR_ELx_Overlay_SHIFT (6) +#define ESR_ELx_Overlay (UL(1) << ESR_ELx_Overlay_SHIFT) +#define ESR_ELx_DirtyBit_SHIFT (5) +#define ESR_ELx_DirtyBit (UL(1) << ESR_ELx_DirtyBit_SHIFT) +#define ESR_ELx_Xs_SHIFT (0) +#define ESR_ELx_Xs_MASK (GENMASK_ULL(4, 0)) + /* ISS field definitions for exceptions taken in to Hyp */ #define ESR_ELx_CV (UL(1) << 24) #define ESR_ELx_COND_SHIFT (20) @@ -356,6 +373,15 @@ #define ESR_ELx_SME_ISS_ZA_DISABLED 3 #define ESR_ELx_SME_ISS_ZT_DISABLED 4 +/* ISS field definitions for MOPS exceptions */ +#define ESR_ELx_MOPS_ISS_MEM_INST (UL(1) << 24) +#define ESR_ELx_MOPS_ISS_FROM_EPILOGUE (UL(1) << 18) +#define ESR_ELx_MOPS_ISS_WRONG_OPTION (UL(1) << 17) +#define ESR_ELx_MOPS_ISS_OPTION_A (UL(1) << 16) +#define ESR_ELx_MOPS_ISS_DESTREG(esr) (((esr) & (UL(0x1f) << 10)) >> 10) +#define ESR_ELx_MOPS_ISS_SRCREG(esr) (((esr) & (UL(0x1f) << 5)) >> 5) +#define ESR_ELx_MOPS_ISS_SIZEREG(esr) (((esr) & (UL(0x1f) << 0)) >> 0) + #ifndef __ASSEMBLY__ #include <asm/types.h> diff --git a/arch/arm64/include/asm/exception.h b/arch/arm64/include/asm/exception.h index e73af709cb7a..ad688e157c9b 100644 --- a/arch/arm64/include/asm/exception.h +++ b/arch/arm64/include/asm/exception.h @@ -8,16 +8,11 @@ #define __ASM_EXCEPTION_H #include <asm/esr.h> -#include <asm/kprobes.h> #include <asm/ptrace.h> #include <linux/interrupt.h> -#ifdef CONFIG_FUNCTION_GRAPH_TRACER #define __exception_irq_entry __irq_entry -#else -#define __exception_irq_entry __kprobes -#endif static inline unsigned long disr_to_esr(u64 disr) { @@ -77,6 +72,7 @@ void do_el0_svc(struct pt_regs *regs); void do_el0_svc_compat(struct pt_regs *regs); void do_el0_fpac(struct pt_regs *regs, unsigned long esr); void do_el1_fpac(struct pt_regs *regs, unsigned long esr); +void do_el0_mops(struct pt_regs *regs, unsigned long esr); void do_serror(struct pt_regs *regs, unsigned long esr); void do_notify_resume(struct pt_regs *regs, unsigned long thread_flags); diff --git a/arch/arm64/include/asm/hw_breakpoint.h b/arch/arm64/include/asm/hw_breakpoint.h index fa4c6ff3aa9b..84055329cd8b 100644 --- a/arch/arm64/include/asm/hw_breakpoint.h +++ b/arch/arm64/include/asm/hw_breakpoint.h @@ -154,4 +154,12 @@ static inline int get_num_wrps(void) ID_AA64DFR0_EL1_WRPs_SHIFT); } +#ifdef CONFIG_CPU_PM +extern void cpu_suspend_set_dbg_restorer(int (*hw_bp_restore)(unsigned int)); +#else +static inline void cpu_suspend_set_dbg_restorer(int (*hw_bp_restore)(unsigned int)) +{ +} +#endif + #endif /* __ASM_BREAKPOINT_H */ diff --git a/arch/arm64/include/asm/hwcap.h b/arch/arm64/include/asm/hwcap.h index 5d45f19fda7f..692b1ec663b2 100644 --- a/arch/arm64/include/asm/hwcap.h +++ b/arch/arm64/include/asm/hwcap.h @@ -137,6 +137,7 @@ #define KERNEL_HWCAP_SME_BI32I32 __khwcap2_feature(SME_BI32I32) #define KERNEL_HWCAP_SME_B16B16 __khwcap2_feature(SME_B16B16) #define KERNEL_HWCAP_SME_F16F16 __khwcap2_feature(SME_F16F16) +#define KERNEL_HWCAP_MOPS __khwcap2_feature(MOPS) /* * This yields a mask that user programs can use to figure out what diff --git a/arch/arm64/include/asm/image.h b/arch/arm64/include/asm/image.h index c2b13213c720..c09cf942dc92 100644 --- a/arch/arm64/include/asm/image.h +++ b/arch/arm64/include/asm/image.h @@ -27,7 +27,7 @@ /* * struct arm64_image_header - arm64 kernel image header - * See Documentation/arm64/booting.rst for details + * See Documentation/arch/arm64/booting.rst for details * * @code0: Executable code, or * @mz_header alternatively used for part of MZ header diff --git a/arch/arm64/include/asm/io.h b/arch/arm64/include/asm/io.h index 877495a0fd0c..51d92abf945e 100644 --- a/arch/arm64/include/asm/io.h +++ b/arch/arm64/include/asm/io.h @@ -22,13 +22,13 @@ * Generic IO read/write. These perform native-endian accesses. */ #define __raw_writeb __raw_writeb -static inline void __raw_writeb(u8 val, volatile void __iomem *addr) +static __always_inline void __raw_writeb(u8 val, volatile void __iomem *addr) { asm volatile("strb %w0, [%1]" : : "rZ" (val), "r" (addr)); } #define __raw_writew __raw_writew -static inline void __raw_writew(u16 val, volatile void __iomem *addr) +static __always_inline void __raw_writew(u16 val, volatile void __iomem *addr) { asm volatile("strh %w0, [%1]" : : "rZ" (val), "r" (addr)); } @@ -40,13 +40,13 @@ static __always_inline void __raw_writel(u32 val, volatile void __iomem *addr) } #define __raw_writeq __raw_writeq -static inline void __raw_writeq(u64 val, volatile void __iomem *addr) +static __always_inline void __raw_writeq(u64 val, volatile void __iomem *addr) { asm volatile("str %x0, [%1]" : : "rZ" (val), "r" (addr)); } #define __raw_readb __raw_readb -static inline u8 __raw_readb(const volatile void __iomem *addr) +static __always_inline u8 __raw_readb(const volatile void __iomem *addr) { u8 val; asm volatile(ALTERNATIVE("ldrb %w0, [%1]", @@ -57,7 +57,7 @@ static inline u8 __raw_readb(const volatile void __iomem *addr) } #define __raw_readw __raw_readw -static inline u16 __raw_readw(const volatile void __iomem *addr) +static __always_inline u16 __raw_readw(const volatile void __iomem *addr) { u16 val; @@ -80,7 +80,7 @@ static __always_inline u32 __raw_readl(const volatile void __iomem *addr) } #define __raw_readq __raw_readq -static inline u64 __raw_readq(const volatile void __iomem *addr) +static __always_inline u64 __raw_readq(const volatile void __iomem *addr) { u64 val; asm volatile(ALTERNATIVE("ldr %0, [%1]", diff --git a/arch/arm64/include/asm/irqflags.h b/arch/arm64/include/asm/irqflags.h index e0f5f6b73edd..1f31ec146d16 100644 --- a/arch/arm64/include/asm/irqflags.h +++ b/arch/arm64/include/asm/irqflags.h @@ -24,7 +24,7 @@ static __always_inline bool __irqflags_uses_pmr(void) { return IS_ENABLED(CONFIG_ARM64_PSEUDO_NMI) && - alternative_has_feature_unlikely(ARM64_HAS_GIC_PRIO_MASKING); + alternative_has_cap_unlikely(ARM64_HAS_GIC_PRIO_MASKING); } static __always_inline void __daif_local_irq_enable(void) diff --git a/arch/arm64/include/asm/kernel-pgtable.h b/arch/arm64/include/asm/kernel-pgtable.h index 186dd7f85b14..577773870b66 100644 --- a/arch/arm64/include/asm/kernel-pgtable.h +++ b/arch/arm64/include/asm/kernel-pgtable.h @@ -107,14 +107,14 @@ /* * Initial memory map attributes. */ -#define SWAPPER_PTE_FLAGS (PTE_TYPE_PAGE | PTE_AF | PTE_SHARED) -#define SWAPPER_PMD_FLAGS (PMD_TYPE_SECT | PMD_SECT_AF | PMD_SECT_S) +#define SWAPPER_PTE_FLAGS (PTE_TYPE_PAGE | PTE_AF | PTE_SHARED | PTE_UXN) +#define SWAPPER_PMD_FLAGS (PMD_TYPE_SECT | PMD_SECT_AF | PMD_SECT_S | PTE_UXN) #ifdef CONFIG_ARM64_4K_PAGES -#define SWAPPER_RW_MMUFLAGS (PMD_ATTRINDX(MT_NORMAL) | SWAPPER_PMD_FLAGS) +#define SWAPPER_RW_MMUFLAGS (PMD_ATTRINDX(MT_NORMAL) | SWAPPER_PMD_FLAGS | PTE_WRITE) #define SWAPPER_RX_MMUFLAGS (SWAPPER_RW_MMUFLAGS | PMD_SECT_RDONLY) #else -#define SWAPPER_RW_MMUFLAGS (PTE_ATTRINDX(MT_NORMAL) | SWAPPER_PTE_FLAGS) +#define SWAPPER_RW_MMUFLAGS (PTE_ATTRINDX(MT_NORMAL) | SWAPPER_PTE_FLAGS | PTE_WRITE) #define SWAPPER_RX_MMUFLAGS (SWAPPER_RW_MMUFLAGS | PTE_RDONLY) #endif diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h index baef29fcbeee..c6e12e8f2751 100644 --- a/arch/arm64/include/asm/kvm_arm.h +++ b/arch/arm64/include/asm/kvm_arm.h @@ -9,6 +9,7 @@ #include <asm/esr.h> #include <asm/memory.h> +#include <asm/sysreg.h> #include <asm/types.h> /* Hyp Configuration Register (HCR) bits */ @@ -92,6 +93,9 @@ #define HCR_HOST_NVHE_PROTECTED_FLAGS (HCR_HOST_NVHE_FLAGS | HCR_TSC) #define HCR_HOST_VHE_FLAGS (HCR_RW | HCR_TGE | HCR_E2H) +#define HCRX_GUEST_FLAGS (HCRX_EL2_SMPME | HCRX_EL2_TCR2En) +#define HCRX_HOST_FLAGS (HCRX_EL2_MSCEn | HCRX_EL2_TCR2En) + /* TCR_EL2 Registers bits */ #define TCR_EL2_RES1 ((1U << 31) | (1 << 23)) #define TCR_EL2_TBI (1 << 20) diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h index 43c3bc0f9544..86042afa86c3 100644 --- a/arch/arm64/include/asm/kvm_asm.h +++ b/arch/arm64/include/asm/kvm_asm.h @@ -267,6 +267,24 @@ extern u64 __kvm_get_mdcr_el2(void); __kvm_at_err; \ } ) +void __noreturn hyp_panic(void); +asmlinkage void kvm_unexpected_el2_exception(void); +asmlinkage void __noreturn hyp_panic(void); +asmlinkage void __noreturn hyp_panic_bad_stack(void); +asmlinkage void kvm_unexpected_el2_exception(void); +struct kvm_cpu_context; +void handle_trap(struct kvm_cpu_context *host_ctxt); +asmlinkage void __noreturn kvm_host_psci_cpu_entry(bool is_cpu_on); +void __noreturn __pkvm_init_finalise(void); +void kvm_nvhe_prepare_backtrace(unsigned long fp, unsigned long pc); +void kvm_patch_vector_branch(struct alt_instr *alt, + __le32 *origptr, __le32 *updptr, int nr_inst); +void kvm_get_kimage_voffset(struct alt_instr *alt, + __le32 *origptr, __le32 *updptr, int nr_inst); +void kvm_compute_final_ctr_el0(struct alt_instr *alt, + __le32 *origptr, __le32 *updptr, int nr_inst); +void __noreturn __cold nvhe_hyp_panic_handler(u64 esr, u64 spsr, u64 elr_virt, + u64 elr_phys, u64 par, uintptr_t vcpu, u64 far, u64 hpfar); #else /* __ASSEMBLY__ */ diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 7e7e19ef6993..d48609d95423 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -279,6 +279,7 @@ enum vcpu_sysreg { TTBR0_EL1, /* Translation Table Base Register 0 */ TTBR1_EL1, /* Translation Table Base Register 1 */ TCR_EL1, /* Translation Control Register */ + TCR2_EL1, /* Extended Translation Control Register */ ESR_EL1, /* Exception Syndrome Register */ AFSR0_EL1, /* Auxiliary Fault Status Register 0 */ AFSR1_EL1, /* Auxiliary Fault Status Register 1 */ @@ -339,6 +340,10 @@ enum vcpu_sysreg { TFSR_EL1, /* Tag Fault Status Register (EL1) */ TFSRE0_EL1, /* Tag Fault Status Register (EL0) */ + /* Permission Indirection Extension registers */ + PIR_EL1, /* Permission Indirection Register 1 (EL1) */ + PIRE0_EL1, /* Permission Indirection Register 0 (EL1) */ + /* 32bit specific registers. */ DACR32_EL2, /* Domain Access Control Register */ IFSR32_EL2, /* Instruction Fault Status Register */ @@ -699,6 +704,8 @@ struct kvm_vcpu_arch { #define SYSREGS_ON_CPU __vcpu_single_flag(sflags, BIT(4)) /* Software step state is Active-pending */ #define DBG_SS_ACTIVE_PENDING __vcpu_single_flag(sflags, BIT(5)) +/* PMUSERENR for the guest EL0 is on physical CPU */ +#define PMUSERENR_ON_CPU __vcpu_single_flag(sflags, BIT(6)) /* Pointer to the vcpu's SVE FFR for sve_{save,load}_state() */ @@ -1031,7 +1038,7 @@ void kvm_arm_clear_debug(struct kvm_vcpu *vcpu); void kvm_arm_reset_debug_ptr(struct kvm_vcpu *vcpu); #define kvm_vcpu_os_lock_enabled(vcpu) \ - (!!(__vcpu_sys_reg(vcpu, OSLSR_EL1) & SYS_OSLSR_OSLK)) + (!!(__vcpu_sys_reg(vcpu, OSLSR_EL1) & OSLSR_EL1_OSLK)) int kvm_arm_vcpu_arch_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr); @@ -1065,9 +1072,14 @@ void kvm_arch_vcpu_put_debug_state_flags(struct kvm_vcpu *vcpu); #ifdef CONFIG_KVM void kvm_set_pmu_events(u32 set, struct perf_event_attr *attr); void kvm_clr_pmu_events(u32 clr); +bool kvm_set_pmuserenr(u64 val); #else static inline void kvm_set_pmu_events(u32 set, struct perf_event_attr *attr) {} static inline void kvm_clr_pmu_events(u32 clr) {} +static inline bool kvm_set_pmuserenr(u64 val) +{ + return false; +} #endif void kvm_vcpu_load_sysregs_vhe(struct kvm_vcpu *vcpu); diff --git a/arch/arm64/include/asm/lse.h b/arch/arm64/include/asm/lse.h index f99d74826a7e..cbbcdc35c4cd 100644 --- a/arch/arm64/include/asm/lse.h +++ b/arch/arm64/include/asm/lse.h @@ -18,7 +18,7 @@ static __always_inline bool system_uses_lse_atomics(void) { - return alternative_has_feature_likely(ARM64_HAS_LSE_ATOMICS); + return alternative_has_cap_likely(ARM64_HAS_LSE_ATOMICS); } #define __lse_ll_sc_body(op, ...) \ diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h index c735afdf639b..6e0e5722f229 100644 --- a/arch/arm64/include/asm/memory.h +++ b/arch/arm64/include/asm/memory.h @@ -46,7 +46,7 @@ #define KIMAGE_VADDR (MODULES_END) #define MODULES_END (MODULES_VADDR + MODULES_VSIZE) #define MODULES_VADDR (_PAGE_END(VA_BITS_MIN)) -#define MODULES_VSIZE (SZ_128M) +#define MODULES_VSIZE (SZ_2G) #define VMEMMAP_START (-(UL(1) << (VA_BITS - VMEMMAP_SHIFT))) #define VMEMMAP_END (VMEMMAP_START + VMEMMAP_SIZE) #define PCI_IO_END (VMEMMAP_START - SZ_8M) @@ -204,15 +204,17 @@ static inline unsigned long kaslr_offset(void) return kimage_vaddr - KIMAGE_VADDR; } +#ifdef CONFIG_RANDOMIZE_BASE +void kaslr_init(void); static inline bool kaslr_enabled(void) { - /* - * The KASLR offset modulo MIN_KIMG_ALIGN is taken from the physical - * placement of the image rather than from the seed, so a displacement - * of less than MIN_KIMG_ALIGN means that no seed was provided. - */ - return kaslr_offset() >= MIN_KIMG_ALIGN; + extern bool __kaslr_is_enabled; + return __kaslr_is_enabled; } +#else +static inline void kaslr_init(void) { } +static inline bool kaslr_enabled(void) { return false; } +#endif /* * Allow all memory at the discovery stage. We will clip it later. diff --git a/arch/arm64/include/asm/mmu_context.h b/arch/arm64/include/asm/mmu_context.h index 56911691bef0..a6fb325424e7 100644 --- a/arch/arm64/include/asm/mmu_context.h +++ b/arch/arm64/include/asm/mmu_context.h @@ -39,11 +39,16 @@ static inline void contextidr_thread_switch(struct task_struct *next) /* * Set TTBR0 to reserved_pg_dir. No translations will be possible via TTBR0. */ -static inline void cpu_set_reserved_ttbr0(void) +static inline void cpu_set_reserved_ttbr0_nosync(void) { unsigned long ttbr = phys_to_ttbr(__pa_symbol(reserved_pg_dir)); write_sysreg(ttbr, ttbr0_el1); +} + +static inline void cpu_set_reserved_ttbr0(void) +{ + cpu_set_reserved_ttbr0_nosync(); isb(); } @@ -52,7 +57,6 @@ void cpu_do_switch_mm(phys_addr_t pgd_phys, struct mm_struct *mm); static inline void cpu_switch_mm(pgd_t *pgd, struct mm_struct *mm) { BUG_ON(pgd == swapper_pg_dir); - cpu_set_reserved_ttbr0(); cpu_do_switch_mm(virt_to_phys(pgd),mm); } @@ -164,7 +168,7 @@ static inline void cpu_replace_ttbr1(pgd_t *pgdp, pgd_t *idmap) * up (i.e. cpufeature framework is not up yet) and * latter only when we enable CNP via cpufeature's * enable() callback. - * Also we rely on the cpu_hwcap bit being set before + * Also we rely on the system_cpucaps bit being set before * calling the enable() function. */ ttbr1 |= TTBR_CNP_BIT; diff --git a/arch/arm64/include/asm/module.h b/arch/arm64/include/asm/module.h index 18734fed3bdd..bfa6638b4c93 100644 --- a/arch/arm64/include/asm/module.h +++ b/arch/arm64/include/asm/module.h @@ -7,7 +7,6 @@ #include <asm-generic/module.h> -#ifdef CONFIG_ARM64_MODULE_PLTS struct mod_plt_sec { int plt_shndx; int plt_num_entries; @@ -21,7 +20,6 @@ struct mod_arch_specific { /* for CONFIG_DYNAMIC_FTRACE */ struct plt_entry *ftrace_trampolines; }; -#endif u64 module_emit_plt_entry(struct module *mod, Elf64_Shdr *sechdrs, void *loc, const Elf64_Rela *rela, @@ -30,12 +28,6 @@ u64 module_emit_plt_entry(struct module *mod, Elf64_Shdr *sechdrs, u64 module_emit_veneer_for_adrp(struct module *mod, Elf64_Shdr *sechdrs, void *loc, u64 val); -#ifdef CONFIG_RANDOMIZE_BASE -extern u64 module_alloc_base; -#else -#define module_alloc_base ((u64)_etext - MODULES_VSIZE) -#endif - struct plt_entry { /* * A program that conforms to the AArch64 Procedure Call Standard diff --git a/arch/arm64/include/asm/module.lds.h b/arch/arm64/include/asm/module.lds.h index dbba4b7559aa..b9ae8349e35d 100644 --- a/arch/arm64/include/asm/module.lds.h +++ b/arch/arm64/include/asm/module.lds.h @@ -1,9 +1,7 @@ SECTIONS { -#ifdef CONFIG_ARM64_MODULE_PLTS .plt 0 : { BYTE(0) } .init.plt 0 : { BYTE(0) } .text.ftrace_trampoline 0 : { BYTE(0) } -#endif #ifdef CONFIG_KASAN_SW_TAGS /* diff --git a/arch/arm64/include/asm/percpu.h b/arch/arm64/include/asm/percpu.h index b9ba19dbdb69..9abcc8ef3087 100644 --- a/arch/arm64/include/asm/percpu.h +++ b/arch/arm64/include/asm/percpu.h @@ -140,17 +140,11 @@ PERCPU_RET_OP(add, add, ldadd) * re-enabling preemption for preemptible kernels, but doing that in a way * which builds inside a module would mean messing directly with the preempt * count. If you do this, peterz and tglx will hunt you down. + * + * Not to mention it'll break the actual preemption model for missing a + * preemption point when TIF_NEED_RESCHED gets set while preemption is + * disabled. */ -#define this_cpu_cmpxchg_double_8(ptr1, ptr2, o1, o2, n1, n2) \ -({ \ - int __ret; \ - preempt_disable_notrace(); \ - __ret = cmpxchg_double_local( raw_cpu_ptr(&(ptr1)), \ - raw_cpu_ptr(&(ptr2)), \ - o1, o2, n1, n2); \ - preempt_enable_notrace(); \ - __ret; \ -}) #define _pcp_protect(op, pcp, ...) \ ({ \ @@ -240,6 +234,22 @@ PERCPU_RET_OP(add, add, ldadd) #define this_cpu_cmpxchg_8(pcp, o, n) \ _pcp_protect_return(cmpxchg_relaxed, pcp, o, n) +#define this_cpu_cmpxchg64(pcp, o, n) this_cpu_cmpxchg_8(pcp, o, n) + +#define this_cpu_cmpxchg128(pcp, o, n) \ +({ \ + typedef typeof(pcp) pcp_op_T__; \ + u128 old__, new__, ret__; \ + pcp_op_T__ *ptr__; \ + old__ = o; \ + new__ = n; \ + preempt_disable_notrace(); \ + ptr__ = raw_cpu_ptr(&(pcp)); \ + ret__ = cmpxchg128_local((void *)ptr__, old__, new__); \ + preempt_enable_notrace(); \ + ret__; \ +}) + #ifdef __KVM_NVHE_HYPERVISOR__ extern unsigned long __hyp_per_cpu_offset(unsigned int cpu); #define __per_cpu_offset diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h index f658aafc47df..e4944d517c99 100644 --- a/arch/arm64/include/asm/pgtable-hwdef.h +++ b/arch/arm64/include/asm/pgtable-hwdef.h @@ -171,6 +171,14 @@ #define PTE_ATTRINDX_MASK (_AT(pteval_t, 7) << 2) /* + * PIIndex[3:0] encoding (Permission Indirection Extension) + */ +#define PTE_PI_IDX_0 6 /* AP[1], USER */ +#define PTE_PI_IDX_1 51 /* DBM */ +#define PTE_PI_IDX_2 53 /* PXN */ +#define PTE_PI_IDX_3 54 /* UXN */ + +/* * Memory Attribute override for Stage-2 (MemAttr[3:0]) */ #define PTE_S2_MEMATTR(t) (_AT(pteval_t, (t)) << 2) diff --git a/arch/arm64/include/asm/pgtable-prot.h b/arch/arm64/include/asm/pgtable-prot.h index 9b165117a454..eed814b00a38 100644 --- a/arch/arm64/include/asm/pgtable-prot.h +++ b/arch/arm64/include/asm/pgtable-prot.h @@ -27,6 +27,40 @@ */ #define PMD_PRESENT_INVALID (_AT(pteval_t, 1) << 59) /* only when !PMD_SECT_VALID */ +#define _PROT_DEFAULT (PTE_TYPE_PAGE | PTE_AF | PTE_SHARED) +#define _PROT_SECT_DEFAULT (PMD_TYPE_SECT | PMD_SECT_AF | PMD_SECT_S) + +#define PROT_DEFAULT (_PROT_DEFAULT | PTE_MAYBE_NG) +#define PROT_SECT_DEFAULT (_PROT_SECT_DEFAULT | PMD_MAYBE_NG) + +#define PROT_DEVICE_nGnRnE (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_WRITE | PTE_ATTRINDX(MT_DEVICE_nGnRnE)) +#define PROT_DEVICE_nGnRE (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_WRITE | PTE_ATTRINDX(MT_DEVICE_nGnRE)) +#define PROT_NORMAL_NC (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_WRITE | PTE_ATTRINDX(MT_NORMAL_NC)) +#define PROT_NORMAL (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_WRITE | PTE_ATTRINDX(MT_NORMAL)) +#define PROT_NORMAL_TAGGED (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_WRITE | PTE_ATTRINDX(MT_NORMAL_TAGGED)) + +#define PROT_SECT_DEVICE_nGnRE (PROT_SECT_DEFAULT | PMD_SECT_PXN | PMD_SECT_UXN | PMD_ATTRINDX(MT_DEVICE_nGnRE)) +#define PROT_SECT_NORMAL (PROT_SECT_DEFAULT | PMD_SECT_PXN | PMD_SECT_UXN | PTE_WRITE | PMD_ATTRINDX(MT_NORMAL)) +#define PROT_SECT_NORMAL_EXEC (PROT_SECT_DEFAULT | PMD_SECT_UXN | PMD_ATTRINDX(MT_NORMAL)) + +#define _PAGE_DEFAULT (_PROT_DEFAULT | PTE_ATTRINDX(MT_NORMAL)) + +#define _PAGE_KERNEL (PROT_NORMAL) +#define _PAGE_KERNEL_RO ((PROT_NORMAL & ~PTE_WRITE) | PTE_RDONLY) +#define _PAGE_KERNEL_ROX ((PROT_NORMAL & ~(PTE_WRITE | PTE_PXN)) | PTE_RDONLY) +#define _PAGE_KERNEL_EXEC (PROT_NORMAL & ~PTE_PXN) +#define _PAGE_KERNEL_EXEC_CONT ((PROT_NORMAL & ~PTE_PXN) | PTE_CONT) + +#define _PAGE_SHARED (_PAGE_DEFAULT | PTE_USER | PTE_RDONLY | PTE_NG | PTE_PXN | PTE_UXN | PTE_WRITE) +#define _PAGE_SHARED_EXEC (_PAGE_DEFAULT | PTE_USER | PTE_RDONLY | PTE_NG | PTE_PXN | PTE_WRITE) +#define _PAGE_READONLY (_PAGE_DEFAULT | PTE_USER | PTE_RDONLY | PTE_NG | PTE_PXN | PTE_UXN) +#define _PAGE_READONLY_EXEC (_PAGE_DEFAULT | PTE_USER | PTE_RDONLY | PTE_NG | PTE_PXN) +#define _PAGE_EXECONLY (_PAGE_DEFAULT | PTE_RDONLY | PTE_NG | PTE_PXN) + +#ifdef __ASSEMBLY__ +#define PTE_MAYBE_NG 0 +#endif + #ifndef __ASSEMBLY__ #include <asm/cpufeature.h> @@ -34,9 +68,6 @@ extern bool arm64_use_ng_mappings; -#define _PROT_DEFAULT (PTE_TYPE_PAGE | PTE_AF | PTE_SHARED) -#define _PROT_SECT_DEFAULT (PMD_TYPE_SECT | PMD_SECT_AF | PMD_SECT_S) - #define PTE_MAYBE_NG (arm64_use_ng_mappings ? PTE_NG : 0) #define PMD_MAYBE_NG (arm64_use_ng_mappings ? PMD_SECT_NG : 0) @@ -50,26 +81,11 @@ extern bool arm64_use_ng_mappings; #define PTE_MAYBE_GP 0 #endif -#define PROT_DEFAULT (_PROT_DEFAULT | PTE_MAYBE_NG) -#define PROT_SECT_DEFAULT (_PROT_SECT_DEFAULT | PMD_MAYBE_NG) - -#define PROT_DEVICE_nGnRnE (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_WRITE | PTE_ATTRINDX(MT_DEVICE_nGnRnE)) -#define PROT_DEVICE_nGnRE (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_WRITE | PTE_ATTRINDX(MT_DEVICE_nGnRE)) -#define PROT_NORMAL_NC (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_WRITE | PTE_ATTRINDX(MT_NORMAL_NC)) -#define PROT_NORMAL (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_WRITE | PTE_ATTRINDX(MT_NORMAL)) -#define PROT_NORMAL_TAGGED (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_WRITE | PTE_ATTRINDX(MT_NORMAL_TAGGED)) - -#define PROT_SECT_DEVICE_nGnRE (PROT_SECT_DEFAULT | PMD_SECT_PXN | PMD_SECT_UXN | PMD_ATTRINDX(MT_DEVICE_nGnRE)) -#define PROT_SECT_NORMAL (PROT_SECT_DEFAULT | PMD_SECT_PXN | PMD_SECT_UXN | PMD_ATTRINDX(MT_NORMAL)) -#define PROT_SECT_NORMAL_EXEC (PROT_SECT_DEFAULT | PMD_SECT_UXN | PMD_ATTRINDX(MT_NORMAL)) - -#define _PAGE_DEFAULT (_PROT_DEFAULT | PTE_ATTRINDX(MT_NORMAL)) - -#define PAGE_KERNEL __pgprot(PROT_NORMAL) -#define PAGE_KERNEL_RO __pgprot((PROT_NORMAL & ~PTE_WRITE) | PTE_RDONLY) -#define PAGE_KERNEL_ROX __pgprot((PROT_NORMAL & ~(PTE_WRITE | PTE_PXN)) | PTE_RDONLY) -#define PAGE_KERNEL_EXEC __pgprot(PROT_NORMAL & ~PTE_PXN) -#define PAGE_KERNEL_EXEC_CONT __pgprot((PROT_NORMAL & ~PTE_PXN) | PTE_CONT) +#define PAGE_KERNEL __pgprot(_PAGE_KERNEL) +#define PAGE_KERNEL_RO __pgprot(_PAGE_KERNEL_RO) +#define PAGE_KERNEL_ROX __pgprot(_PAGE_KERNEL_ROX) +#define PAGE_KERNEL_EXEC __pgprot(_PAGE_KERNEL_EXEC) +#define PAGE_KERNEL_EXEC_CONT __pgprot(_PAGE_KERNEL_EXEC_CONT) #define PAGE_S2_MEMATTR(attr, has_fwb) \ ({ \ @@ -83,12 +99,62 @@ extern bool arm64_use_ng_mappings; #define PAGE_NONE __pgprot(((_PAGE_DEFAULT) & ~PTE_VALID) | PTE_PROT_NONE | PTE_RDONLY | PTE_NG | PTE_PXN | PTE_UXN) /* shared+writable pages are clean by default, hence PTE_RDONLY|PTE_WRITE */ -#define PAGE_SHARED __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_RDONLY | PTE_NG | PTE_PXN | PTE_UXN | PTE_WRITE) -#define PAGE_SHARED_EXEC __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_RDONLY | PTE_NG | PTE_PXN | PTE_WRITE) -#define PAGE_READONLY __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_RDONLY | PTE_NG | PTE_PXN | PTE_UXN) -#define PAGE_READONLY_EXEC __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_RDONLY | PTE_NG | PTE_PXN) -#define PAGE_EXECONLY __pgprot(_PAGE_DEFAULT | PTE_RDONLY | PTE_NG | PTE_PXN) +#define PAGE_SHARED __pgprot(_PAGE_SHARED) +#define PAGE_SHARED_EXEC __pgprot(_PAGE_SHARED_EXEC) +#define PAGE_READONLY __pgprot(_PAGE_READONLY) +#define PAGE_READONLY_EXEC __pgprot(_PAGE_READONLY_EXEC) +#define PAGE_EXECONLY __pgprot(_PAGE_EXECONLY) #endif /* __ASSEMBLY__ */ +#define pte_pi_index(pte) ( \ + ((pte & BIT(PTE_PI_IDX_3)) >> (PTE_PI_IDX_3 - 3)) | \ + ((pte & BIT(PTE_PI_IDX_2)) >> (PTE_PI_IDX_2 - 2)) | \ + ((pte & BIT(PTE_PI_IDX_1)) >> (PTE_PI_IDX_1 - 1)) | \ + ((pte & BIT(PTE_PI_IDX_0)) >> (PTE_PI_IDX_0 - 0))) + +/* + * Page types used via Permission Indirection Extension (PIE). PIE uses + * the USER, DBM, PXN and UXN bits to to generate an index which is used + * to look up the actual permission in PIR_ELx and PIRE0_EL1. We define + * combinations we use on non-PIE systems with the same encoding, for + * convenience these are listed here as comments as are the unallocated + * encodings. + */ + +/* 0: PAGE_DEFAULT */ +/* 1: PTE_USER */ +/* 2: PTE_WRITE */ +/* 3: PTE_WRITE | PTE_USER */ +/* 4: PAGE_EXECONLY PTE_PXN */ +/* 5: PAGE_READONLY_EXEC PTE_PXN | PTE_USER */ +/* 6: PTE_PXN | PTE_WRITE */ +/* 7: PAGE_SHARED_EXEC PTE_PXN | PTE_WRITE | PTE_USER */ +/* 8: PAGE_KERNEL_ROX PTE_UXN */ +/* 9: PTE_UXN | PTE_USER */ +/* a: PAGE_KERNEL_EXEC PTE_UXN | PTE_WRITE */ +/* b: PTE_UXN | PTE_WRITE | PTE_USER */ +/* c: PAGE_KERNEL_RO PTE_UXN | PTE_PXN */ +/* d: PAGE_READONLY PTE_UXN | PTE_PXN | PTE_USER */ +/* e: PAGE_KERNEL PTE_UXN | PTE_PXN | PTE_WRITE */ +/* f: PAGE_SHARED PTE_UXN | PTE_PXN | PTE_WRITE | PTE_USER */ + +#define PIE_E0 ( \ + PIRx_ELx_PERM(pte_pi_index(_PAGE_EXECONLY), PIE_X_O) | \ + PIRx_ELx_PERM(pte_pi_index(_PAGE_READONLY_EXEC), PIE_RX) | \ + PIRx_ELx_PERM(pte_pi_index(_PAGE_SHARED_EXEC), PIE_RWX) | \ + PIRx_ELx_PERM(pte_pi_index(_PAGE_READONLY), PIE_R) | \ + PIRx_ELx_PERM(pte_pi_index(_PAGE_SHARED), PIE_RW)) + +#define PIE_E1 ( \ + PIRx_ELx_PERM(pte_pi_index(_PAGE_EXECONLY), PIE_NONE_O) | \ + PIRx_ELx_PERM(pte_pi_index(_PAGE_READONLY_EXEC), PIE_R) | \ + PIRx_ELx_PERM(pte_pi_index(_PAGE_SHARED_EXEC), PIE_RW) | \ + PIRx_ELx_PERM(pte_pi_index(_PAGE_READONLY), PIE_R) | \ + PIRx_ELx_PERM(pte_pi_index(_PAGE_SHARED), PIE_RW) | \ + PIRx_ELx_PERM(pte_pi_index(_PAGE_KERNEL_ROX), PIE_RX) | \ + PIRx_ELx_PERM(pte_pi_index(_PAGE_KERNEL_EXEC), PIE_RWX) | \ + PIRx_ELx_PERM(pte_pi_index(_PAGE_KERNEL_RO), PIE_R) | \ + PIRx_ELx_PERM(pte_pi_index(_PAGE_KERNEL), PIE_RW)) + #endif /* __ASM_PGTABLE_PROT_H */ diff --git a/arch/arm64/include/asm/scs.h b/arch/arm64/include/asm/scs.h index 13df982a0808..3fdae5fe3142 100644 --- a/arch/arm64/include/asm/scs.h +++ b/arch/arm64/include/asm/scs.h @@ -73,6 +73,7 @@ static inline void dynamic_scs_init(void) {} #endif int scs_patch(const u8 eh_frame[], int size); +asmlinkage void scs_patch_vmlinux(void); #endif /* __ASSEMBLY __ */ diff --git a/arch/arm64/include/asm/smp.h b/arch/arm64/include/asm/smp.h index f2d26235bfb4..9b31e6d0da17 100644 --- a/arch/arm64/include/asm/smp.h +++ b/arch/arm64/include/asm/smp.h @@ -99,7 +99,7 @@ static inline void arch_send_wakeup_ipi_mask(const struct cpumask *mask) extern int __cpu_disable(void); -extern void __cpu_die(unsigned int cpu); +static inline void __cpu_die(unsigned int cpu) { } extern void __noreturn cpu_die(void); extern void __noreturn cpu_die_early(void); diff --git a/arch/arm64/include/asm/spectre.h b/arch/arm64/include/asm/spectre.h index db7b371b367c..9cc501450486 100644 --- a/arch/arm64/include/asm/spectre.h +++ b/arch/arm64/include/asm/spectre.h @@ -100,5 +100,21 @@ bool is_spectre_bhb_affected(const struct arm64_cpu_capabilities *entry, int sco u8 spectre_bhb_loop_affected(int scope); void spectre_bhb_enable_mitigation(const struct arm64_cpu_capabilities *__unused); bool try_emulate_el1_ssbs(struct pt_regs *regs, u32 instr); + +void spectre_v4_patch_fw_mitigation_enable(struct alt_instr *alt, __le32 *origptr, + __le32 *updptr, int nr_inst); +void smccc_patch_fw_mitigation_conduit(struct alt_instr *alt, __le32 *origptr, + __le32 *updptr, int nr_inst); +void spectre_bhb_patch_loop_mitigation_enable(struct alt_instr *alt, __le32 *origptr, + __le32 *updptr, int nr_inst); +void spectre_bhb_patch_fw_mitigation_enabled(struct alt_instr *alt, __le32 *origptr, + __le32 *updptr, int nr_inst); +void spectre_bhb_patch_loop_iter(struct alt_instr *alt, + __le32 *origptr, __le32 *updptr, int nr_inst); +void spectre_bhb_patch_wa3(struct alt_instr *alt, + __le32 *origptr, __le32 *updptr, int nr_inst); +void spectre_bhb_patch_clearbhb(struct alt_instr *alt, + __le32 *origptr, __le32 *updptr, int nr_inst); + #endif /* __ASSEMBLY__ */ #endif /* __ASM_SPECTRE_H */ diff --git a/arch/arm64/include/asm/syscall_wrapper.h b/arch/arm64/include/asm/syscall_wrapper.h index d30217c21eff..17f687510c48 100644 --- a/arch/arm64/include/asm/syscall_wrapper.h +++ b/arch/arm64/include/asm/syscall_wrapper.h @@ -38,6 +38,7 @@ asmlinkage long __arm64_compat_sys_##sname(const struct pt_regs *__unused) #define COND_SYSCALL_COMPAT(name) \ + asmlinkage long __arm64_compat_sys_##name(const struct pt_regs *regs); \ asmlinkage long __weak __arm64_compat_sys_##name(const struct pt_regs *regs) \ { \ return sys_ni_syscall(); \ @@ -53,6 +54,7 @@ ALLOW_ERROR_INJECTION(__arm64_sys##name, ERRNO); \ static long __se_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__)); \ static inline long __do_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__)); \ + asmlinkage long __arm64_sys##name(const struct pt_regs *regs); \ asmlinkage long __arm64_sys##name(const struct pt_regs *regs) \ { \ return __se_sys##name(SC_ARM64_REGS_TO_ARGS(x,__VA_ARGS__)); \ @@ -73,11 +75,13 @@ asmlinkage long __arm64_sys_##sname(const struct pt_regs *__unused) #define COND_SYSCALL(name) \ + asmlinkage long __arm64_sys_##name(const struct pt_regs *regs); \ asmlinkage long __weak __arm64_sys_##name(const struct pt_regs *regs) \ { \ return sys_ni_syscall(); \ } +asmlinkage long __arm64_sys_ni_syscall(const struct pt_regs *__unused); #define SYS_NI(name) SYSCALL_ALIAS(__arm64_sys_##name, sys_ni_posix_timers); #endif /* __ASM_SYSCALL_WRAPPER_H */ diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index eefd712f2430..7a1e62631814 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -140,25 +140,17 @@ #define SYS_SVCR_SMSTART_SM_EL0 sys_reg(0, 3, 4, 3, 3) #define SYS_SVCR_SMSTOP_SMZA_EL0 sys_reg(0, 3, 4, 6, 3) -#define SYS_OSDTRRX_EL1 sys_reg(2, 0, 0, 0, 2) -#define SYS_MDCCINT_EL1 sys_reg(2, 0, 0, 2, 0) -#define SYS_MDSCR_EL1 sys_reg(2, 0, 0, 2, 2) -#define SYS_OSDTRTX_EL1 sys_reg(2, 0, 0, 3, 2) -#define SYS_OSECCR_EL1 sys_reg(2, 0, 0, 6, 2) #define SYS_DBGBVRn_EL1(n) sys_reg(2, 0, 0, n, 4) #define SYS_DBGBCRn_EL1(n) sys_reg(2, 0, 0, n, 5) #define SYS_DBGWVRn_EL1(n) sys_reg(2, 0, 0, n, 6) #define SYS_DBGWCRn_EL1(n) sys_reg(2, 0, 0, n, 7) #define SYS_MDRAR_EL1 sys_reg(2, 0, 1, 0, 0) -#define SYS_OSLAR_EL1 sys_reg(2, 0, 1, 0, 4) -#define SYS_OSLAR_OSLK BIT(0) - #define SYS_OSLSR_EL1 sys_reg(2, 0, 1, 1, 4) -#define SYS_OSLSR_OSLM_MASK (BIT(3) | BIT(0)) -#define SYS_OSLSR_OSLM_NI 0 -#define SYS_OSLSR_OSLM_IMPLEMENTED BIT(3) -#define SYS_OSLSR_OSLK BIT(1) +#define OSLSR_EL1_OSLM_MASK (BIT(3) | BIT(0)) +#define OSLSR_EL1_OSLM_NI 0 +#define OSLSR_EL1_OSLM_IMPLEMENTED BIT(3) +#define OSLSR_EL1_OSLK BIT(1) #define SYS_OSDLR_EL1 sys_reg(2, 0, 1, 3, 4) #define SYS_DBGPRCR_EL1 sys_reg(2, 0, 1, 4, 4) @@ -241,54 +233,8 @@ /*** End of Statistical Profiling Extension ***/ -/* - * TRBE Registers - */ -#define SYS_TRBLIMITR_EL1 sys_reg(3, 0, 9, 11, 0) -#define SYS_TRBPTR_EL1 sys_reg(3, 0, 9, 11, 1) -#define SYS_TRBBASER_EL1 sys_reg(3, 0, 9, 11, 2) -#define SYS_TRBSR_EL1 sys_reg(3, 0, 9, 11, 3) -#define SYS_TRBMAR_EL1 sys_reg(3, 0, 9, 11, 4) -#define SYS_TRBTRG_EL1 sys_reg(3, 0, 9, 11, 6) -#define SYS_TRBIDR_EL1 sys_reg(3, 0, 9, 11, 7) - -#define TRBLIMITR_LIMIT_MASK GENMASK_ULL(51, 0) -#define TRBLIMITR_LIMIT_SHIFT 12 -#define TRBLIMITR_NVM BIT(5) -#define TRBLIMITR_TRIG_MODE_MASK GENMASK(1, 0) -#define TRBLIMITR_TRIG_MODE_SHIFT 3 -#define TRBLIMITR_FILL_MODE_MASK GENMASK(1, 0) -#define TRBLIMITR_FILL_MODE_SHIFT 1 -#define TRBLIMITR_ENABLE BIT(0) -#define TRBPTR_PTR_MASK GENMASK_ULL(63, 0) -#define TRBPTR_PTR_SHIFT 0 -#define TRBBASER_BASE_MASK GENMASK_ULL(51, 0) -#define TRBBASER_BASE_SHIFT 12 -#define TRBSR_EC_MASK GENMASK(5, 0) -#define TRBSR_EC_SHIFT 26 -#define TRBSR_IRQ BIT(22) -#define TRBSR_TRG BIT(21) -#define TRBSR_WRAP BIT(20) -#define TRBSR_ABORT BIT(18) -#define TRBSR_STOP BIT(17) -#define TRBSR_MSS_MASK GENMASK(15, 0) -#define TRBSR_MSS_SHIFT 0 -#define TRBSR_BSC_MASK GENMASK(5, 0) -#define TRBSR_BSC_SHIFT 0 -#define TRBSR_FSC_MASK GENMASK(5, 0) -#define TRBSR_FSC_SHIFT 0 -#define TRBMAR_SHARE_MASK GENMASK(1, 0) -#define TRBMAR_SHARE_SHIFT 8 -#define TRBMAR_OUTER_MASK GENMASK(3, 0) -#define TRBMAR_OUTER_SHIFT 4 -#define TRBMAR_INNER_MASK GENMASK(3, 0) -#define TRBMAR_INNER_SHIFT 0 -#define TRBTRG_TRG_MASK GENMASK(31, 0) -#define TRBTRG_TRG_SHIFT 0 -#define TRBIDR_FLAG BIT(5) -#define TRBIDR_PROG BIT(4) -#define TRBIDR_ALIGN_MASK GENMASK(3, 0) -#define TRBIDR_ALIGN_SHIFT 0 +#define TRBSR_EL1_BSC_MASK GENMASK(5, 0) +#define TRBSR_EL1_BSC_SHIFT 0 #define SYS_PMINTENSET_EL1 sys_reg(3, 0, 9, 14, 1) #define SYS_PMINTENCLR_EL1 sys_reg(3, 0, 9, 14, 2) @@ -764,6 +710,25 @@ #define ICH_VTR_TDS_SHIFT 19 #define ICH_VTR_TDS_MASK (1 << ICH_VTR_TDS_SHIFT) +/* + * Permission Indirection Extension (PIE) permission encodings. + * Encodings with the _O suffix, have overlays applied (Permission Overlay Extension). + */ +#define PIE_NONE_O 0x0 +#define PIE_R_O 0x1 +#define PIE_X_O 0x2 +#define PIE_RX_O 0x3 +#define PIE_RW_O 0x5 +#define PIE_RWnX_O 0x6 +#define PIE_RWX_O 0x7 +#define PIE_R 0x8 +#define PIE_GCS 0x9 +#define PIE_RX 0xa +#define PIE_RW 0xc +#define PIE_RWX 0xe + +#define PIRx_ELx_PERM(idx, perm) ((perm) << ((idx) * 4)) + #define ARM64_FEATURE_FIELD_BITS 4 /* Defined for compatibility only, do not add new users. */ diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h index 848739c15de8..553d1bc559c6 100644 --- a/arch/arm64/include/asm/thread_info.h +++ b/arch/arm64/include/asm/thread_info.h @@ -55,10 +55,6 @@ struct thread_info { void arch_setup_new_exec(void); #define arch_setup_new_exec arch_setup_new_exec -void arch_release_task_struct(struct task_struct *tsk); -int arch_dup_task_struct(struct task_struct *dst, - struct task_struct *src); - #endif #define TIF_SIGPENDING 0 /* signal pending */ diff --git a/arch/arm64/include/asm/traps.h b/arch/arm64/include/asm/traps.h index 1f361e2da516..d66dfb3a72dd 100644 --- a/arch/arm64/include/asm/traps.h +++ b/arch/arm64/include/asm/traps.h @@ -29,6 +29,8 @@ void arm64_force_sig_fault(int signo, int code, unsigned long far, const char *s void arm64_force_sig_mceerr(int code, unsigned long far, short lsb, const char *str); void arm64_force_sig_ptrace_errno_trap(int errno, unsigned long far, const char *str); +int early_brk64(unsigned long addr, unsigned long esr, struct pt_regs *regs); + /* * Move regs->pc to next instruction and do necessary setup before it * is executed. diff --git a/arch/arm64/include/asm/uaccess.h b/arch/arm64/include/asm/uaccess.h index 05f4fc265428..14be5000c5a0 100644 --- a/arch/arm64/include/asm/uaccess.h +++ b/arch/arm64/include/asm/uaccess.h @@ -65,7 +65,6 @@ static inline void __uaccess_ttbr0_disable(void) ttbr &= ~TTBR_ASID_MASK; /* reserved_pg_dir placed before swapper_pg_dir */ write_sysreg(ttbr - RESERVED_SWAPPER_OFFSET, ttbr0_el1); - isb(); /* Set reserved ASID */ write_sysreg(ttbr, ttbr1_el1); isb(); @@ -89,7 +88,6 @@ static inline void __uaccess_ttbr0_enable(void) ttbr1 &= ~TTBR_ASID_MASK; /* safety measure */ ttbr1 |= ttbr0 & TTBR_ASID_MASK; write_sysreg(ttbr1, ttbr1_el1); - isb(); /* Restore user page table */ write_sysreg(ttbr0, ttbr0_el1); diff --git a/arch/arm64/include/asm/unistd.h b/arch/arm64/include/asm/unistd.h index 037feba03a51..64a514f90131 100644 --- a/arch/arm64/include/asm/unistd.h +++ b/arch/arm64/include/asm/unistd.h @@ -39,7 +39,7 @@ #define __ARM_NR_compat_set_tls (__ARM_NR_COMPAT_BASE + 5) #define __ARM_NR_COMPAT_END (__ARM_NR_COMPAT_BASE + 0x800) -#define __NR_compat_syscalls 451 +#define __NR_compat_syscalls 452 #endif #define __ARCH_WANT_SYS_CLONE diff --git a/arch/arm64/include/asm/unistd32.h b/arch/arm64/include/asm/unistd32.h index 604a2053d006..d952a28463e0 100644 --- a/arch/arm64/include/asm/unistd32.h +++ b/arch/arm64/include/asm/unistd32.h @@ -907,6 +907,8 @@ __SYSCALL(__NR_process_mrelease, sys_process_mrelease) __SYSCALL(__NR_futex_waitv, sys_futex_waitv) #define __NR_set_mempolicy_home_node 450 __SYSCALL(__NR_set_mempolicy_home_node, sys_set_mempolicy_home_node) +#define __NR_cachestat 451 +__SYSCALL(__NR_cachestat, sys_cachestat) /* * Please add new compat syscalls above this comment and update diff --git a/arch/arm64/include/uapi/asm/hwcap.h b/arch/arm64/include/uapi/asm/hwcap.h index 69a4fb749c65..a2cac4305b1e 100644 --- a/arch/arm64/include/uapi/asm/hwcap.h +++ b/arch/arm64/include/uapi/asm/hwcap.h @@ -102,5 +102,6 @@ #define HWCAP2_SME_BI32I32 (1UL << 40) #define HWCAP2_SME_B16B16 (1UL << 41) #define HWCAP2_SME_F16F16 (1UL << 42) +#define HWCAP2_MOPS (1UL << 43) #endif /* _UAPI__ASM_HWCAP_H */ diff --git a/arch/arm64/include/uapi/asm/sigcontext.h b/arch/arm64/include/uapi/asm/sigcontext.h index 656a10ea6c67..f23c1dc3f002 100644 --- a/arch/arm64/include/uapi/asm/sigcontext.h +++ b/arch/arm64/include/uapi/asm/sigcontext.h @@ -177,7 +177,7 @@ struct zt_context { * vector length beyond its initial architectural limit of 2048 bits * (16 quadwords). * - * See linux/Documentation/arm64/sve.rst for a description of the VL/VQ + * See linux/Documentation/arch/arm64/sve.rst for a description of the VL/VQ * terminology. */ #define SVE_VQ_BYTES __SVE_VQ_BYTES /* bytes per quadword */ diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile index 7c2bb4e72476..d95b3d6b471a 100644 --- a/arch/arm64/kernel/Makefile +++ b/arch/arm64/kernel/Makefile @@ -42,9 +42,9 @@ obj-$(CONFIG_COMPAT) += sigreturn32.o obj-$(CONFIG_COMPAT_ALIGNMENT_FIXUPS) += compat_alignment.o obj-$(CONFIG_KUSER_HELPERS) += kuser32.o obj-$(CONFIG_FUNCTION_TRACER) += ftrace.o entry-ftrace.o -obj-$(CONFIG_MODULES) += module.o -obj-$(CONFIG_ARM64_MODULE_PLTS) += module-plts.o +obj-$(CONFIG_MODULES) += module.o module-plts.o obj-$(CONFIG_PERF_EVENTS) += perf_regs.o perf_callchain.o +obj-$(CONFIG_HARDLOCKUP_DETECTOR_PERF) += watchdog_hld.o obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o obj-$(CONFIG_CPU_PM) += sleep.o suspend.o obj-$(CONFIG_CPU_IDLE) += cpuidle.o diff --git a/arch/arm64/kernel/alternative.c b/arch/arm64/kernel/alternative.c index d32d4ed5519b..8ff6610af496 100644 --- a/arch/arm64/kernel/alternative.c +++ b/arch/arm64/kernel/alternative.c @@ -24,8 +24,8 @@ #define ALT_ORIG_PTR(a) __ALT_PTR(a, orig_offset) #define ALT_REPL_PTR(a) __ALT_PTR(a, alt_offset) -#define ALT_CAP(a) ((a)->cpufeature & ~ARM64_CB_BIT) -#define ALT_HAS_CB(a) ((a)->cpufeature & ARM64_CB_BIT) +#define ALT_CAP(a) ((a)->cpucap & ~ARM64_CB_BIT) +#define ALT_HAS_CB(a) ((a)->cpucap & ARM64_CB_BIT) /* Volatile, as we may be patching the guts of READ_ONCE() */ static volatile int all_alternatives_applied; @@ -37,12 +37,12 @@ struct alt_region { struct alt_instr *end; }; -bool alternative_is_applied(u16 cpufeature) +bool alternative_is_applied(u16 cpucap) { - if (WARN_ON(cpufeature >= ARM64_NCAPS)) + if (WARN_ON(cpucap >= ARM64_NCAPS)) return false; - return test_bit(cpufeature, applied_alternatives); + return test_bit(cpucap, applied_alternatives); } /* @@ -121,11 +121,11 @@ static noinstr void patch_alternative(struct alt_instr *alt, * accidentally call into the cache.S code, which is patched by us at * runtime. */ -static void clean_dcache_range_nopatch(u64 start, u64 end) +static noinstr void clean_dcache_range_nopatch(u64 start, u64 end) { u64 cur, d_size, ctr_el0; - ctr_el0 = read_sanitised_ftr_reg(SYS_CTR_EL0); + ctr_el0 = arm64_ftr_reg_ctrel0.sys_val; d_size = 4 << cpuid_feature_extract_unsigned_field(ctr_el0, CTR_EL0_DminLine_SHIFT); cur = start & ~(d_size - 1); @@ -141,7 +141,7 @@ static void clean_dcache_range_nopatch(u64 start, u64 end) static void __apply_alternatives(const struct alt_region *region, bool is_module, - unsigned long *feature_mask) + unsigned long *cpucap_mask) { struct alt_instr *alt; __le32 *origptr, *updptr; @@ -151,7 +151,7 @@ static void __apply_alternatives(const struct alt_region *region, int nr_inst; int cap = ALT_CAP(alt); - if (!test_bit(cap, feature_mask)) + if (!test_bit(cap, cpucap_mask)) continue; if (!cpus_have_cap(cap)) @@ -188,11 +188,10 @@ static void __apply_alternatives(const struct alt_region *region, icache_inval_all_pou(); isb(); - /* Ignore ARM64_CB bit from feature mask */ bitmap_or(applied_alternatives, applied_alternatives, - feature_mask, ARM64_NCAPS); + cpucap_mask, ARM64_NCAPS); bitmap_and(applied_alternatives, applied_alternatives, - cpu_hwcaps, ARM64_NCAPS); + system_cpucaps, ARM64_NCAPS); } } @@ -239,7 +238,7 @@ static int __init __apply_alternatives_multi_stop(void *unused) } else { DECLARE_BITMAP(remaining_capabilities, ARM64_NCAPS); - bitmap_complement(remaining_capabilities, boot_capabilities, + bitmap_complement(remaining_capabilities, boot_cpucaps, ARM64_NCAPS); BUG_ON(all_alternatives_applied); @@ -274,7 +273,7 @@ void __init apply_boot_alternatives(void) pr_info("applying boot alternatives\n"); __apply_alternatives(&kernel_alternatives, false, - &boot_capabilities[0]); + &boot_cpucaps[0]); } #ifdef CONFIG_MODULES diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 7d7128c65161..6ea7f23b1287 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -105,11 +105,11 @@ unsigned int compat_elf_hwcap __read_mostly = COMPAT_ELF_HWCAP_DEFAULT; unsigned int compat_elf_hwcap2 __read_mostly; #endif -DECLARE_BITMAP(cpu_hwcaps, ARM64_NCAPS); -EXPORT_SYMBOL(cpu_hwcaps); -static struct arm64_cpu_capabilities const __ro_after_init *cpu_hwcaps_ptrs[ARM64_NCAPS]; +DECLARE_BITMAP(system_cpucaps, ARM64_NCAPS); +EXPORT_SYMBOL(system_cpucaps); +static struct arm64_cpu_capabilities const __ro_after_init *cpucap_ptrs[ARM64_NCAPS]; -DECLARE_BITMAP(boot_capabilities, ARM64_NCAPS); +DECLARE_BITMAP(boot_cpucaps, ARM64_NCAPS); bool arm64_use_ng_mappings = false; EXPORT_SYMBOL(arm64_use_ng_mappings); @@ -137,7 +137,7 @@ static cpumask_var_t cpu_32bit_el0_mask __cpumask_var_read_mostly; void dump_cpu_features(void) { /* file-wide pr_fmt adds "CPU features: " prefix */ - pr_emerg("0x%*pb\n", ARM64_NCAPS, &cpu_hwcaps); + pr_emerg("0x%*pb\n", ARM64_NCAPS, &system_cpucaps); } #define ARM64_CPUID_FIELDS(reg, field, min_value) \ @@ -223,6 +223,7 @@ static const struct arm64_ftr_bits ftr_id_aa64isar2[] = { ARM64_FTR_BITS(FTR_VISIBLE, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64ISAR2_EL1_CSSC_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64ISAR2_EL1_RPRFM_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_HIGHER_SAFE, ID_AA64ISAR2_EL1_BC_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR2_EL1_MOPS_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_PTR_AUTH), FTR_STRICT, FTR_EXACT, ID_AA64ISAR2_EL1_APA3_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_PTR_AUTH), @@ -364,6 +365,7 @@ static const struct arm64_ftr_bits ftr_id_aa64mmfr0[] = { static const struct arm64_ftr_bits ftr_id_aa64mmfr1[] = { ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_EL1_TIDCP1_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_EL1_AFP_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_EL1_HCX_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_EL1_ETS_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_EL1_TWED_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_EL1_XNX_SHIFT, 4, 0), @@ -396,6 +398,12 @@ static const struct arm64_ftr_bits ftr_id_aa64mmfr2[] = { ARM64_FTR_END, }; +static const struct arm64_ftr_bits ftr_id_aa64mmfr3[] = { + ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64MMFR3_EL1_S1PIE_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64MMFR3_EL1_TCRX_SHIFT, 4, 0), + ARM64_FTR_END, +}; + static const struct arm64_ftr_bits ftr_ctr[] = { ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_EXACT, 31, 1, 1), /* RES1 */ ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, CTR_EL0_DIC_SHIFT, 1, 1), @@ -722,6 +730,7 @@ static const struct __ftr_reg_entry { ARM64_FTR_REG_OVERRIDE(SYS_ID_AA64MMFR1_EL1, ftr_id_aa64mmfr1, &id_aa64mmfr1_override), ARM64_FTR_REG(SYS_ID_AA64MMFR2_EL1, ftr_id_aa64mmfr2), + ARM64_FTR_REG(SYS_ID_AA64MMFR3_EL1, ftr_id_aa64mmfr3), /* Op1 = 0, CRn = 1, CRm = 2 */ ARM64_FTR_REG(SYS_ZCR_EL1, ftr_zcr), @@ -954,24 +963,24 @@ extern const struct arm64_cpu_capabilities arm64_errata[]; static const struct arm64_cpu_capabilities arm64_features[]; static void __init -init_cpu_hwcaps_indirect_list_from_array(const struct arm64_cpu_capabilities *caps) +init_cpucap_indirect_list_from_array(const struct arm64_cpu_capabilities *caps) { for (; caps->matches; caps++) { if (WARN(caps->capability >= ARM64_NCAPS, "Invalid capability %d\n", caps->capability)) continue; - if (WARN(cpu_hwcaps_ptrs[caps->capability], + if (WARN(cpucap_ptrs[caps->capability], "Duplicate entry for capability %d\n", caps->capability)) continue; - cpu_hwcaps_ptrs[caps->capability] = caps; + cpucap_ptrs[caps->capability] = caps; } } -static void __init init_cpu_hwcaps_indirect_list(void) +static void __init init_cpucap_indirect_list(void) { - init_cpu_hwcaps_indirect_list_from_array(arm64_features); - init_cpu_hwcaps_indirect_list_from_array(arm64_errata); + init_cpucap_indirect_list_from_array(arm64_features); + init_cpucap_indirect_list_from_array(arm64_errata); } static void __init setup_boot_cpu_capabilities(void); @@ -1017,6 +1026,7 @@ void __init init_cpu_features(struct cpuinfo_arm64 *info) init_cpu_ftr_reg(SYS_ID_AA64MMFR0_EL1, info->reg_id_aa64mmfr0); init_cpu_ftr_reg(SYS_ID_AA64MMFR1_EL1, info->reg_id_aa64mmfr1); init_cpu_ftr_reg(SYS_ID_AA64MMFR2_EL1, info->reg_id_aa64mmfr2); + init_cpu_ftr_reg(SYS_ID_AA64MMFR3_EL1, info->reg_id_aa64mmfr3); init_cpu_ftr_reg(SYS_ID_AA64PFR0_EL1, info->reg_id_aa64pfr0); init_cpu_ftr_reg(SYS_ID_AA64PFR1_EL1, info->reg_id_aa64pfr1); init_cpu_ftr_reg(SYS_ID_AA64ZFR0_EL1, info->reg_id_aa64zfr0); @@ -1049,10 +1059,10 @@ void __init init_cpu_features(struct cpuinfo_arm64 *info) init_cpu_ftr_reg(SYS_GMID_EL1, info->reg_gmid); /* - * Initialize the indirect array of CPU hwcaps capabilities pointers - * before we handle the boot CPU below. + * Initialize the indirect array of CPU capabilities pointers before we + * handle the boot CPU below. */ - init_cpu_hwcaps_indirect_list(); + init_cpucap_indirect_list(); /* * Detect and enable early CPU capabilities based on the boot CPU, @@ -1262,6 +1272,8 @@ void update_cpu_features(int cpu, info->reg_id_aa64mmfr1, boot->reg_id_aa64mmfr1); taint |= check_update_ftr_reg(SYS_ID_AA64MMFR2_EL1, cpu, info->reg_id_aa64mmfr2, boot->reg_id_aa64mmfr2); + taint |= check_update_ftr_reg(SYS_ID_AA64MMFR3_EL1, cpu, + info->reg_id_aa64mmfr3, boot->reg_id_aa64mmfr3); taint |= check_update_ftr_reg(SYS_ID_AA64PFR0_EL1, cpu, info->reg_id_aa64pfr0, boot->reg_id_aa64pfr0); @@ -1391,6 +1403,7 @@ u64 __read_sysreg_by_encoding(u32 sys_id) read_sysreg_case(SYS_ID_AA64MMFR0_EL1); read_sysreg_case(SYS_ID_AA64MMFR1_EL1); read_sysreg_case(SYS_ID_AA64MMFR2_EL1); + read_sysreg_case(SYS_ID_AA64MMFR3_EL1); read_sysreg_case(SYS_ID_AA64ISAR0_EL1); read_sysreg_case(SYS_ID_AA64ISAR1_EL1); read_sysreg_case(SYS_ID_AA64ISAR2_EL1); @@ -2048,9 +2061,9 @@ static bool has_address_auth_cpucap(const struct arm64_cpu_capabilities *entry, static bool has_address_auth_metacap(const struct arm64_cpu_capabilities *entry, int scope) { - bool api = has_address_auth_cpucap(cpu_hwcaps_ptrs[ARM64_HAS_ADDRESS_AUTH_IMP_DEF], scope); - bool apa = has_address_auth_cpucap(cpu_hwcaps_ptrs[ARM64_HAS_ADDRESS_AUTH_ARCH_QARMA5], scope); - bool apa3 = has_address_auth_cpucap(cpu_hwcaps_ptrs[ARM64_HAS_ADDRESS_AUTH_ARCH_QARMA3], scope); + bool api = has_address_auth_cpucap(cpucap_ptrs[ARM64_HAS_ADDRESS_AUTH_IMP_DEF], scope); + bool apa = has_address_auth_cpucap(cpucap_ptrs[ARM64_HAS_ADDRESS_AUTH_ARCH_QARMA5], scope); + bool apa3 = has_address_auth_cpucap(cpucap_ptrs[ARM64_HAS_ADDRESS_AUTH_ARCH_QARMA3], scope); return apa || apa3 || api; } @@ -2186,6 +2199,11 @@ static void cpu_enable_dit(const struct arm64_cpu_capabilities *__unused) set_pstate_dit(1); } +static void cpu_enable_mops(const struct arm64_cpu_capabilities *__unused) +{ + sysreg_clear_set(sctlr_el1, 0, SCTLR_EL1_MSCEn); +} + /* Internal helper functions to match cpu capability type */ static bool cpucap_late_cpu_optional(const struct arm64_cpu_capabilities *cap) @@ -2235,11 +2253,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .capability = ARM64_HAS_ECV_CNTPOFF, .type = ARM64_CPUCAP_SYSTEM_FEATURE, .matches = has_cpuid_feature, - .sys_reg = SYS_ID_AA64MMFR0_EL1, - .field_pos = ID_AA64MMFR0_EL1_ECV_SHIFT, - .field_width = 4, - .sign = FTR_UNSIGNED, - .min_field_value = ID_AA64MMFR0_EL1_ECV_CNTPOFF, + ARM64_CPUID_FIELDS(ID_AA64MMFR0_EL1, ECV, CNTPOFF) }, #ifdef CONFIG_ARM64_PAN { @@ -2309,6 +2323,13 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .type = ARM64_CPUCAP_SYSTEM_FEATURE, .matches = is_kvm_protected_mode, }, + { + .desc = "HCRX_EL2 register", + .capability = ARM64_HAS_HCX, + .type = ARM64_CPUCAP_STRICT_BOOT_CPU_FEATURE, + .matches = has_cpuid_feature, + ARM64_CPUID_FIELDS(ID_AA64MMFR1_EL1, HCX, IMP) + }, #endif { .desc = "Kernel page table isolation (KPTI)", @@ -2641,6 +2662,27 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .cpu_enable = cpu_enable_dit, ARM64_CPUID_FIELDS(ID_AA64PFR0_EL1, DIT, IMP) }, + { + .desc = "Memory Copy and Memory Set instructions", + .capability = ARM64_HAS_MOPS, + .type = ARM64_CPUCAP_SYSTEM_FEATURE, + .matches = has_cpuid_feature, + .cpu_enable = cpu_enable_mops, + ARM64_CPUID_FIELDS(ID_AA64ISAR2_EL1, MOPS, IMP) + }, + { + .capability = ARM64_HAS_TCR2, + .type = ARM64_CPUCAP_SYSTEM_FEATURE, + .matches = has_cpuid_feature, + ARM64_CPUID_FIELDS(ID_AA64MMFR3_EL1, TCRX, IMP) + }, + { + .desc = "Stage-1 Permission Indirection Extension (S1PIE)", + .capability = ARM64_HAS_S1PIE, + .type = ARM64_CPUCAP_BOOT_CPU_FEATURE, + .matches = has_cpuid_feature, + ARM64_CPUID_FIELDS(ID_AA64MMFR3_EL1, S1PIE, IMP) + }, {}, }; @@ -2769,6 +2811,7 @@ static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = { HWCAP_CAP(ID_AA64ISAR2_EL1, RPRFM, IMP, CAP_HWCAP, KERNEL_HWCAP_RPRFM), HWCAP_CAP(ID_AA64ISAR2_EL1, RPRES, IMP, CAP_HWCAP, KERNEL_HWCAP_RPRES), HWCAP_CAP(ID_AA64ISAR2_EL1, WFxT, IMP, CAP_HWCAP, KERNEL_HWCAP_WFXT), + HWCAP_CAP(ID_AA64ISAR2_EL1, MOPS, IMP, CAP_HWCAP, KERNEL_HWCAP_MOPS), #ifdef CONFIG_ARM64_SME HWCAP_CAP(ID_AA64PFR1_EL1, SME, IMP, CAP_HWCAP, KERNEL_HWCAP_SME), HWCAP_CAP(ID_AA64SMFR0_EL1, FA64, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_FA64), @@ -2895,7 +2938,7 @@ static void update_cpu_capabilities(u16 scope_mask) scope_mask &= ARM64_CPUCAP_SCOPE_MASK; for (i = 0; i < ARM64_NCAPS; i++) { - caps = cpu_hwcaps_ptrs[i]; + caps = cpucap_ptrs[i]; if (!caps || !(caps->type & scope_mask) || cpus_have_cap(caps->capability) || !caps->matches(caps, cpucap_default_scope(caps))) @@ -2903,10 +2946,11 @@ static void update_cpu_capabilities(u16 scope_mask) if (caps->desc) pr_info("detected: %s\n", caps->desc); - cpus_set_cap(caps->capability); + + __set_bit(caps->capability, system_cpucaps); if ((scope_mask & SCOPE_BOOT_CPU) && (caps->type & SCOPE_BOOT_CPU)) - set_bit(caps->capability, boot_capabilities); + set_bit(caps->capability, boot_cpucaps); } } @@ -2920,7 +2964,7 @@ static int cpu_enable_non_boot_scope_capabilities(void *__unused) u16 non_boot_scope = SCOPE_ALL & ~SCOPE_BOOT_CPU; for_each_available_cap(i) { - const struct arm64_cpu_capabilities *cap = cpu_hwcaps_ptrs[i]; + const struct arm64_cpu_capabilities *cap = cpucap_ptrs[i]; if (WARN_ON(!cap)) continue; @@ -2950,7 +2994,7 @@ static void __init enable_cpu_capabilities(u16 scope_mask) for (i = 0; i < ARM64_NCAPS; i++) { unsigned int num; - caps = cpu_hwcaps_ptrs[i]; + caps = cpucap_ptrs[i]; if (!caps || !(caps->type & scope_mask)) continue; num = caps->capability; @@ -2995,7 +3039,7 @@ static void verify_local_cpu_caps(u16 scope_mask) scope_mask &= ARM64_CPUCAP_SCOPE_MASK; for (i = 0; i < ARM64_NCAPS; i++) { - caps = cpu_hwcaps_ptrs[i]; + caps = cpucap_ptrs[i]; if (!caps || !(caps->type & scope_mask)) continue; @@ -3194,7 +3238,7 @@ static void __init setup_boot_cpu_capabilities(void) bool this_cpu_has_cap(unsigned int n) { if (!WARN_ON(preemptible()) && n < ARM64_NCAPS) { - const struct arm64_cpu_capabilities *cap = cpu_hwcaps_ptrs[n]; + const struct arm64_cpu_capabilities *cap = cpucap_ptrs[n]; if (cap) return cap->matches(cap, SCOPE_LOCAL_CPU); @@ -3207,13 +3251,13 @@ EXPORT_SYMBOL_GPL(this_cpu_has_cap); /* * This helper function is used in a narrow window when, * - The system wide safe registers are set with all the SMP CPUs and, - * - The SYSTEM_FEATURE cpu_hwcaps may not have been set. + * - The SYSTEM_FEATURE system_cpucaps may not have been set. * In all other cases cpus_have_{const_}cap() should be used. */ static bool __maybe_unused __system_matches_cap(unsigned int n) { if (n < ARM64_NCAPS) { - const struct arm64_cpu_capabilities *cap = cpu_hwcaps_ptrs[n]; + const struct arm64_cpu_capabilities *cap = cpucap_ptrs[n]; if (cap) return cap->matches(cap, SCOPE_SYSTEM); diff --git a/arch/arm64/kernel/cpuidle.c b/arch/arm64/kernel/cpuidle.c index 42e19fff40ee..d1f68599c29f 100644 --- a/arch/arm64/kernel/cpuidle.c +++ b/arch/arm64/kernel/cpuidle.c @@ -13,7 +13,7 @@ #include <linux/of_device.h> #include <linux/psci.h> -#ifdef CONFIG_ACPI +#ifdef CONFIG_ACPI_PROCESSOR_IDLE #include <acpi/processor.h> diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c index eb4378c23b3c..58622dc85917 100644 --- a/arch/arm64/kernel/cpuinfo.c +++ b/arch/arm64/kernel/cpuinfo.c @@ -125,6 +125,7 @@ static const char *const hwcap_str[] = { [KERNEL_HWCAP_SME_BI32I32] = "smebi32i32", [KERNEL_HWCAP_SME_B16B16] = "smeb16b16", [KERNEL_HWCAP_SME_F16F16] = "smef16f16", + [KERNEL_HWCAP_MOPS] = "mops", }; #ifdef CONFIG_COMPAT @@ -446,6 +447,7 @@ static void __cpuinfo_store_cpu(struct cpuinfo_arm64 *info) info->reg_id_aa64mmfr0 = read_cpuid(ID_AA64MMFR0_EL1); info->reg_id_aa64mmfr1 = read_cpuid(ID_AA64MMFR1_EL1); info->reg_id_aa64mmfr2 = read_cpuid(ID_AA64MMFR2_EL1); + info->reg_id_aa64mmfr3 = read_cpuid(ID_AA64MMFR3_EL1); info->reg_id_aa64pfr0 = read_cpuid(ID_AA64PFR0_EL1); info->reg_id_aa64pfr1 = read_cpuid(ID_AA64PFR1_EL1); info->reg_id_aa64zfr0 = read_cpuid(ID_AA64ZFR0_EL1); diff --git a/arch/arm64/kernel/entry-common.c b/arch/arm64/kernel/entry-common.c index 3af3c01c93a6..6b2e0c367702 100644 --- a/arch/arm64/kernel/entry-common.c +++ b/arch/arm64/kernel/entry-common.c @@ -126,7 +126,7 @@ static __always_inline void __exit_to_user_mode(void) lockdep_hardirqs_on(CALLER_ADDR0); } -static __always_inline void prepare_exit_to_user_mode(struct pt_regs *regs) +static __always_inline void exit_to_user_mode_prepare(struct pt_regs *regs) { unsigned long flags; @@ -135,11 +135,13 @@ static __always_inline void prepare_exit_to_user_mode(struct pt_regs *regs) flags = read_thread_flags(); if (unlikely(flags & _TIF_WORK_MASK)) do_notify_resume(regs, flags); + + lockdep_sys_exit(); } static __always_inline void exit_to_user_mode(struct pt_regs *regs) { - prepare_exit_to_user_mode(regs); + exit_to_user_mode_prepare(regs); mte_check_tfsr_exit(); __exit_to_user_mode(); } @@ -611,6 +613,14 @@ static void noinstr el0_bti(struct pt_regs *regs) exit_to_user_mode(regs); } +static void noinstr el0_mops(struct pt_regs *regs, unsigned long esr) +{ + enter_from_user_mode(regs); + local_daif_restore(DAIF_PROCCTX); + do_el0_mops(regs, esr); + exit_to_user_mode(regs); +} + static void noinstr el0_inv(struct pt_regs *regs, unsigned long esr) { enter_from_user_mode(regs); @@ -688,6 +698,9 @@ asmlinkage void noinstr el0t_64_sync_handler(struct pt_regs *regs) case ESR_ELx_EC_BTI: el0_bti(regs); break; + case ESR_ELx_EC_MOPS: + el0_mops(regs, esr); + break; case ESR_ELx_EC_BREAKPT_LOW: case ESR_ELx_EC_SOFTSTP_LOW: case ESR_ELx_EC_WATCHPT_LOW: diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index ab2a6e33c052..a40e5e50fa55 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -101,12 +101,11 @@ .org .Lventry_start\@ + 128 // Did we overflow the ventry slot? .endm - .macro tramp_alias, dst, sym, tmp - mov_q \dst, TRAMP_VALIAS - adr_l \tmp, \sym - add \dst, \dst, \tmp - adr_l \tmp, .entry.tramp.text - sub \dst, \dst, \tmp + .macro tramp_alias, dst, sym + .set .Lalias\@, TRAMP_VALIAS + \sym - .entry.tramp.text + movz \dst, :abs_g2_s:.Lalias\@ + movk \dst, :abs_g1_nc:.Lalias\@ + movk \dst, :abs_g0_nc:.Lalias\@ .endm /* @@ -435,13 +434,14 @@ alternative_if_not ARM64_UNMAP_KERNEL_AT_EL0 eret alternative_else_nop_endif #ifdef CONFIG_UNMAP_KERNEL_AT_EL0 - bne 4f msr far_el1, x29 - tramp_alias x30, tramp_exit_native, x29 - br x30 -4: - tramp_alias x30, tramp_exit_compat, x29 - br x30 + + ldr_this_cpu x30, this_cpu_vector, x29 + tramp_alias x29, tramp_exit + msr vbar_el1, x30 // install vector table + ldr lr, [sp, #S_LR] // restore x30 + add sp, sp, #PT_REGS_SIZE // restore sp + br x29 #endif .else ldr lr, [sp, #S_LR] @@ -732,22 +732,6 @@ alternative_else_nop_endif .org 1b + 128 // Did we overflow the ventry slot? .endm - .macro tramp_exit, regsize = 64 - tramp_data_read_var x30, this_cpu_vector - get_this_cpu_offset x29 - ldr x30, [x30, x29] - - msr vbar_el1, x30 - ldr lr, [sp, #S_LR] - tramp_unmap_kernel x29 - .if \regsize == 64 - mrs x29, far_el1 - .endif - add sp, sp, #PT_REGS_SIZE // restore sp - eret - sb - .endm - .macro generate_tramp_vector, kpti, bhb .Lvector_start\@: .space 0x400 @@ -768,7 +752,7 @@ alternative_else_nop_endif */ .pushsection ".entry.tramp.text", "ax" .align 11 -SYM_CODE_START_NOALIGN(tramp_vectors) +SYM_CODE_START_LOCAL_NOALIGN(tramp_vectors) #ifdef CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY generate_tramp_vector kpti=1, bhb=BHB_MITIGATION_LOOP generate_tramp_vector kpti=1, bhb=BHB_MITIGATION_FW @@ -777,13 +761,12 @@ SYM_CODE_START_NOALIGN(tramp_vectors) generate_tramp_vector kpti=1, bhb=BHB_MITIGATION_NONE SYM_CODE_END(tramp_vectors) -SYM_CODE_START(tramp_exit_native) - tramp_exit -SYM_CODE_END(tramp_exit_native) - -SYM_CODE_START(tramp_exit_compat) - tramp_exit 32 -SYM_CODE_END(tramp_exit_compat) +SYM_CODE_START_LOCAL(tramp_exit) + tramp_unmap_kernel x29 + mrs x29, far_el1 // restore x29 + eret + sb +SYM_CODE_END(tramp_exit) .popsection // .entry.tramp.text #endif /* CONFIG_UNMAP_KERNEL_AT_EL0 */ @@ -1077,7 +1060,7 @@ alternative_if_not ARM64_UNMAP_KERNEL_AT_EL0 alternative_else_nop_endif #ifdef CONFIG_UNMAP_KERNEL_AT_EL0 - tramp_alias dst=x5, sym=__sdei_asm_exit_trampoline, tmp=x3 + tramp_alias dst=x5, sym=__sdei_asm_exit_trampoline br x5 #endif SYM_CODE_END(__sdei_asm_handler) diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index 2fbafa5cc7ac..7a1aeb95d7c3 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -1649,6 +1649,7 @@ void fpsimd_flush_thread(void) fpsimd_flush_thread_vl(ARM64_VEC_SME); current->thread.svcr = 0; + sme_smstop(); } current->thread.fp_type = FP_STATE_FPSIMD; diff --git a/arch/arm64/kernel/ftrace.c b/arch/arm64/kernel/ftrace.c index 432626c866a8..a650f5e11fc5 100644 --- a/arch/arm64/kernel/ftrace.c +++ b/arch/arm64/kernel/ftrace.c @@ -197,7 +197,7 @@ int ftrace_update_ftrace_func(ftrace_func_t func) static struct plt_entry *get_ftrace_plt(struct module *mod) { -#ifdef CONFIG_ARM64_MODULE_PLTS +#ifdef CONFIG_MODULES struct plt_entry *plt = mod->arch.ftrace_trampolines; return &plt[FTRACE_PLT_IDX]; @@ -249,7 +249,7 @@ static bool ftrace_find_callable_addr(struct dyn_ftrace *rec, * must use a PLT to reach it. We can only place PLTs for modules, and * only when module PLT support is built-in. */ - if (!IS_ENABLED(CONFIG_ARM64_MODULE_PLTS)) + if (!IS_ENABLED(CONFIG_MODULES)) return false; /* @@ -431,10 +431,8 @@ int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec, * * Note: 'mod' is only set at module load time. */ - if (!IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_ARGS) && - IS_ENABLED(CONFIG_ARM64_MODULE_PLTS) && mod) { + if (!IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_ARGS) && mod) return aarch64_insn_patch_text_nosync((void *)pc, new); - } if (!ftrace_find_callable_addr(rec, mod, &addr)) return -EINVAL; diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index e92caebff46a..0f5a30f109d9 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -382,7 +382,7 @@ SYM_FUNC_START_LOCAL(create_idmap) adrp x0, init_idmap_pg_dir adrp x3, _text adrp x6, _end + MAX_FDT_SIZE + SWAPPER_BLOCK_SIZE - mov x7, SWAPPER_RX_MMUFLAGS + mov_q x7, SWAPPER_RX_MMUFLAGS map_memory x0, x1, x3, x6, x7, x3, IDMAP_PGD_ORDER, x10, x11, x12, x13, x14, EXTRA_SHIFT @@ -391,7 +391,7 @@ SYM_FUNC_START_LOCAL(create_idmap) adrp x2, init_pg_dir adrp x3, init_pg_end bic x4, x2, #SWAPPER_BLOCK_SIZE - 1 - mov x5, SWAPPER_RW_MMUFLAGS + mov_q x5, SWAPPER_RW_MMUFLAGS mov x6, #SWAPPER_BLOCK_SHIFT bl remap_region @@ -402,7 +402,7 @@ SYM_FUNC_START_LOCAL(create_idmap) bfi x22, x21, #0, #SWAPPER_BLOCK_SHIFT // remapped FDT address add x3, x2, #MAX_FDT_SIZE + SWAPPER_BLOCK_SIZE bic x4, x21, #SWAPPER_BLOCK_SIZE - 1 - mov x5, SWAPPER_RW_MMUFLAGS + mov_q x5, SWAPPER_RW_MMUFLAGS mov x6, #SWAPPER_BLOCK_SHIFT bl remap_region @@ -430,7 +430,7 @@ SYM_FUNC_START_LOCAL(create_kernel_mapping) adrp x3, _text // runtime __pa(_text) sub x6, x6, x3 // _end - _text add x6, x6, x5 // runtime __va(_end) - mov x7, SWAPPER_RW_MMUFLAGS + mov_q x7, SWAPPER_RW_MMUFLAGS map_memory x0, x1, x5, x6, x7, x3, (VA_BITS - PGDIR_SHIFT), x10, x11, x12, x13, x14 diff --git a/arch/arm64/kernel/hibernate.c b/arch/arm64/kernel/hibernate.c index 788597a6b6a2..02870beb271e 100644 --- a/arch/arm64/kernel/hibernate.c +++ b/arch/arm64/kernel/hibernate.c @@ -99,7 +99,6 @@ int pfn_is_nosave(unsigned long pfn) void notrace save_processor_state(void) { - WARN_ON(num_online_cpus() != 1); } void notrace restore_processor_state(void) diff --git a/arch/arm64/kernel/hw_breakpoint.c b/arch/arm64/kernel/hw_breakpoint.c index b29a311bb055..db2a1861bb97 100644 --- a/arch/arm64/kernel/hw_breakpoint.c +++ b/arch/arm64/kernel/hw_breakpoint.c @@ -973,14 +973,6 @@ static int hw_breakpoint_reset(unsigned int cpu) return 0; } -#ifdef CONFIG_CPU_PM -extern void cpu_suspend_set_dbg_restorer(int (*hw_bp_restore)(unsigned int)); -#else -static inline void cpu_suspend_set_dbg_restorer(int (*hw_bp_restore)(unsigned int)) -{ -} -#endif - /* * One-time initialisation. */ diff --git a/arch/arm64/kernel/hyp-stub.S b/arch/arm64/kernel/hyp-stub.S index 9439240c3fcf..d63de1973ddb 100644 --- a/arch/arm64/kernel/hyp-stub.S +++ b/arch/arm64/kernel/hyp-stub.S @@ -119,6 +119,24 @@ SYM_CODE_START_LOCAL(__finalise_el2) msr ttbr1_el1, x0 mrs_s x0, SYS_MAIR_EL12 msr mair_el1, x0 + mrs x1, REG_ID_AA64MMFR3_EL1 + ubfx x1, x1, #ID_AA64MMFR3_EL1_TCRX_SHIFT, #4 + cbz x1, .Lskip_tcr2 + mrs x0, REG_TCR2_EL12 + msr REG_TCR2_EL1, x0 + + // Transfer permission indirection state + mrs x1, REG_ID_AA64MMFR3_EL1 + ubfx x1, x1, #ID_AA64MMFR3_EL1_S1PIE_SHIFT, #4 + cbz x1, .Lskip_indirection + mrs x0, REG_PIRE0_EL12 + msr REG_PIRE0_EL1, x0 + mrs x0, REG_PIR_EL12 + msr REG_PIR_EL1, x0 + +.Lskip_indirection: +.Lskip_tcr2: + isb // Hack the exception return to stay at EL2 diff --git a/arch/arm64/kernel/idreg-override.c b/arch/arm64/kernel/idreg-override.c index 370ab84fd06e..8439248c21d3 100644 --- a/arch/arm64/kernel/idreg-override.c +++ b/arch/arm64/kernel/idreg-override.c @@ -123,6 +123,7 @@ static const struct ftr_set_desc isar2 __initconst = { .fields = { FIELD("gpa3", ID_AA64ISAR2_EL1_GPA3_SHIFT, NULL), FIELD("apa3", ID_AA64ISAR2_EL1_APA3_SHIFT, NULL), + FIELD("mops", ID_AA64ISAR2_EL1_MOPS_SHIFT, NULL), {} }, }; @@ -174,6 +175,7 @@ static const struct { "id_aa64isar1.gpi=0 id_aa64isar1.gpa=0 " "id_aa64isar1.api=0 id_aa64isar1.apa=0 " "id_aa64isar2.gpa3=0 id_aa64isar2.apa3=0" }, + { "arm64.nomops", "id_aa64isar2.mops=0" }, { "arm64.nomte", "id_aa64pfr1.mte=0" }, { "nokaslr", "kaslr.disabled=1" }, }; diff --git a/arch/arm64/kernel/kaslr.c b/arch/arm64/kernel/kaslr.c index e7477f21a4c9..17f96a19781d 100644 --- a/arch/arm64/kernel/kaslr.c +++ b/arch/arm64/kernel/kaslr.c @@ -4,90 +4,35 @@ */ #include <linux/cache.h> -#include <linux/crc32.h> #include <linux/init.h> -#include <linux/libfdt.h> -#include <linux/mm_types.h> -#include <linux/sched.h> -#include <linux/types.h> -#include <linux/pgtable.h> -#include <linux/random.h> +#include <linux/printk.h> -#include <asm/fixmap.h> -#include <asm/kernel-pgtable.h> +#include <asm/cpufeature.h> #include <asm/memory.h> -#include <asm/mmu.h> -#include <asm/sections.h> -#include <asm/setup.h> -u64 __ro_after_init module_alloc_base; u16 __initdata memstart_offset_seed; struct arm64_ftr_override kaslr_feature_override __initdata; -static int __init kaslr_init(void) -{ - u64 module_range; - u32 seed; - - /* - * Set a reasonable default for module_alloc_base in case - * we end up running with module randomization disabled. - */ - module_alloc_base = (u64)_etext - MODULES_VSIZE; +bool __ro_after_init __kaslr_is_enabled = false; +void __init kaslr_init(void) +{ if (kaslr_feature_override.val & kaslr_feature_override.mask & 0xf) { pr_info("KASLR disabled on command line\n"); - return 0; - } - - if (!kaslr_enabled()) { - pr_warn("KASLR disabled due to lack of seed\n"); - return 0; + return; } - pr_info("KASLR enabled\n"); - /* - * KASAN without KASAN_VMALLOC does not expect the module region to - * intersect the vmalloc region, since shadow memory is allocated for - * each module at load time, whereas the vmalloc region will already be - * shadowed by KASAN zero pages. + * The KASLR offset modulo MIN_KIMG_ALIGN is taken from the physical + * placement of the image rather than from the seed, so a displacement + * of less than MIN_KIMG_ALIGN means that no seed was provided. */ - BUILD_BUG_ON((IS_ENABLED(CONFIG_KASAN_GENERIC) || - IS_ENABLED(CONFIG_KASAN_SW_TAGS)) && - !IS_ENABLED(CONFIG_KASAN_VMALLOC)); - - seed = get_random_u32(); - - if (IS_ENABLED(CONFIG_RANDOMIZE_MODULE_REGION_FULL)) { - /* - * Randomize the module region over a 2 GB window covering the - * kernel. This reduces the risk of modules leaking information - * about the address of the kernel itself, but results in - * branches between modules and the core kernel that are - * resolved via PLTs. (Branches between modules will be - * resolved normally.) - */ - module_range = SZ_2G - (u64)(_end - _stext); - module_alloc_base = max((u64)_end - SZ_2G, (u64)MODULES_VADDR); - } else { - /* - * Randomize the module region by setting module_alloc_base to - * a PAGE_SIZE multiple in the range [_etext - MODULES_VSIZE, - * _stext) . This guarantees that the resulting region still - * covers [_stext, _etext], and that all relative branches can - * be resolved without veneers unless this region is exhausted - * and we fall back to a larger 2GB window in module_alloc() - * when ARM64_MODULE_PLTS is enabled. - */ - module_range = MODULES_VSIZE - (u64)(_etext - _stext); + if (kaslr_offset() < MIN_KIMG_ALIGN) { + pr_warn("KASLR disabled due to lack of seed\n"); + return; } - /* use the lower 21 bits to randomize the base of the module region */ - module_alloc_base += (module_range * (seed & ((1 << 21) - 1))) >> 21; - module_alloc_base &= PAGE_MASK; - - return 0; + pr_info("KASLR enabled\n"); + __kaslr_is_enabled = true; } -subsys_initcall(kaslr_init) diff --git a/arch/arm64/kernel/kexec_image.c b/arch/arm64/kernel/kexec_image.c index 5ed6a585f21f..636be6715155 100644 --- a/arch/arm64/kernel/kexec_image.c +++ b/arch/arm64/kernel/kexec_image.c @@ -48,7 +48,7 @@ static void *image_load(struct kimage *image, /* * We require a kernel with an unambiguous Image header. Per - * Documentation/arm64/booting.rst, this is the case when image_size + * Documentation/arch/arm64/booting.rst, this is the case when image_size * is non-zero (practically speaking, since v3.17). */ h = (struct arm64_image_header *)kernel; diff --git a/arch/arm64/kernel/kuser32.S b/arch/arm64/kernel/kuser32.S index 692e9d2e31e5..af046ceac22d 100644 --- a/arch/arm64/kernel/kuser32.S +++ b/arch/arm64/kernel/kuser32.S @@ -10,7 +10,7 @@ * aarch32_setup_additional_pages() and are provided for compatibility * reasons with 32 bit (aarch32) applications that need them. * - * See Documentation/arm/kernel_user_helpers.rst for formal definitions. + * See Documentation/arch/arm/kernel_user_helpers.rst for formal definitions. */ #include <asm/unistd.h> diff --git a/arch/arm64/kernel/module-plts.c b/arch/arm64/kernel/module-plts.c index 543493bf924d..ad02058756b5 100644 --- a/arch/arm64/kernel/module-plts.c +++ b/arch/arm64/kernel/module-plts.c @@ -7,6 +7,7 @@ #include <linux/ftrace.h> #include <linux/kernel.h> #include <linux/module.h> +#include <linux/moduleloader.h> #include <linux/sort.h> static struct plt_entry __get_adrp_add_pair(u64 dst, u64 pc, diff --git a/arch/arm64/kernel/module.c b/arch/arm64/kernel/module.c index 5af4975caeb5..dd851297596e 100644 --- a/arch/arm64/kernel/module.c +++ b/arch/arm64/kernel/module.c @@ -7,6 +7,8 @@ * Author: Will Deacon <will.deacon@arm.com> */ +#define pr_fmt(fmt) "Modules: " fmt + #include <linux/bitops.h> #include <linux/elf.h> #include <linux/ftrace.h> @@ -15,52 +17,131 @@ #include <linux/kernel.h> #include <linux/mm.h> #include <linux/moduleloader.h> +#include <linux/random.h> #include <linux/scs.h> #include <linux/vmalloc.h> + #include <asm/alternative.h> #include <asm/insn.h> #include <asm/scs.h> #include <asm/sections.h> +static u64 module_direct_base __ro_after_init = 0; +static u64 module_plt_base __ro_after_init = 0; + +/* + * Choose a random page-aligned base address for a window of 'size' bytes which + * entirely contains the interval [start, end - 1]. + */ +static u64 __init random_bounding_box(u64 size, u64 start, u64 end) +{ + u64 max_pgoff, pgoff; + + if ((end - start) >= size) + return 0; + + max_pgoff = (size - (end - start)) / PAGE_SIZE; + pgoff = get_random_u32_inclusive(0, max_pgoff); + + return start - pgoff * PAGE_SIZE; +} + +/* + * Modules may directly reference data and text anywhere within the kernel + * image and other modules. References using PREL32 relocations have a +/-2G + * range, and so we need to ensure that the entire kernel image and all modules + * fall within a 2G window such that these are always within range. + * + * Modules may directly branch to functions and code within the kernel text, + * and to functions and code within other modules. These branches will use + * CALL26/JUMP26 relocations with a +/-128M range. Without PLTs, we must ensure + * that the entire kernel text and all module text falls within a 128M window + * such that these are always within range. With PLTs, we can expand this to a + * 2G window. + * + * We chose the 128M region to surround the entire kernel image (rather than + * just the text) as using the same bounds for the 128M and 2G regions ensures + * by construction that we never select a 128M region that is not a subset of + * the 2G region. For very large and unusual kernel configurations this means + * we may fall back to PLTs where they could have been avoided, but this keeps + * the logic significantly simpler. + */ +static int __init module_init_limits(void) +{ + u64 kernel_end = (u64)_end; + u64 kernel_start = (u64)_text; + u64 kernel_size = kernel_end - kernel_start; + + /* + * The default modules region is placed immediately below the kernel + * image, and is large enough to use the full 2G relocation range. + */ + BUILD_BUG_ON(KIMAGE_VADDR != MODULES_END); + BUILD_BUG_ON(MODULES_VSIZE < SZ_2G); + + if (!kaslr_enabled()) { + if (kernel_size < SZ_128M) + module_direct_base = kernel_end - SZ_128M; + if (kernel_size < SZ_2G) + module_plt_base = kernel_end - SZ_2G; + } else { + u64 min = kernel_start; + u64 max = kernel_end; + + if (IS_ENABLED(CONFIG_RANDOMIZE_MODULE_REGION_FULL)) { + pr_info("2G module region forced by RANDOMIZE_MODULE_REGION_FULL\n"); + } else { + module_direct_base = random_bounding_box(SZ_128M, min, max); + if (module_direct_base) { + min = module_direct_base; + max = module_direct_base + SZ_128M; + } + } + + module_plt_base = random_bounding_box(SZ_2G, min, max); + } + + pr_info("%llu pages in range for non-PLT usage", + module_direct_base ? (SZ_128M - kernel_size) / PAGE_SIZE : 0); + pr_info("%llu pages in range for PLT usage", + module_plt_base ? (SZ_2G - kernel_size) / PAGE_SIZE : 0); + + return 0; +} +subsys_initcall(module_init_limits); + void *module_alloc(unsigned long size) { - u64 module_alloc_end = module_alloc_base + MODULES_VSIZE; - gfp_t gfp_mask = GFP_KERNEL; - void *p; - - /* Silence the initial allocation */ - if (IS_ENABLED(CONFIG_ARM64_MODULE_PLTS)) - gfp_mask |= __GFP_NOWARN; - - if (IS_ENABLED(CONFIG_KASAN_GENERIC) || - IS_ENABLED(CONFIG_KASAN_SW_TAGS)) - /* don't exceed the static module region - see below */ - module_alloc_end = MODULES_END; - - p = __vmalloc_node_range(size, MODULE_ALIGN, module_alloc_base, - module_alloc_end, gfp_mask, PAGE_KERNEL, VM_DEFER_KMEMLEAK, - NUMA_NO_NODE, __builtin_return_address(0)); - - if (!p && IS_ENABLED(CONFIG_ARM64_MODULE_PLTS) && - (IS_ENABLED(CONFIG_KASAN_VMALLOC) || - (!IS_ENABLED(CONFIG_KASAN_GENERIC) && - !IS_ENABLED(CONFIG_KASAN_SW_TAGS)))) - /* - * KASAN without KASAN_VMALLOC can only deal with module - * allocations being served from the reserved module region, - * since the remainder of the vmalloc region is already - * backed by zero shadow pages, and punching holes into it - * is non-trivial. Since the module region is not randomized - * when KASAN is enabled without KASAN_VMALLOC, it is even - * less likely that the module region gets exhausted, so we - * can simply omit this fallback in that case. - */ - p = __vmalloc_node_range(size, MODULE_ALIGN, module_alloc_base, - module_alloc_base + SZ_2G, GFP_KERNEL, - PAGE_KERNEL, 0, NUMA_NO_NODE, - __builtin_return_address(0)); + void *p = NULL; + + /* + * Where possible, prefer to allocate within direct branch range of the + * kernel such that no PLTs are necessary. + */ + if (module_direct_base) { + p = __vmalloc_node_range(size, MODULE_ALIGN, + module_direct_base, + module_direct_base + SZ_128M, + GFP_KERNEL | __GFP_NOWARN, + PAGE_KERNEL, 0, NUMA_NO_NODE, + __builtin_return_address(0)); + } - if (p && (kasan_alloc_module_shadow(p, size, gfp_mask) < 0)) { + if (!p && module_plt_base) { + p = __vmalloc_node_range(size, MODULE_ALIGN, + module_plt_base, + module_plt_base + SZ_2G, + GFP_KERNEL | __GFP_NOWARN, + PAGE_KERNEL, 0, NUMA_NO_NODE, + __builtin_return_address(0)); + } + + if (!p) { + pr_warn_ratelimited("%s: unable to allocate memory\n", + __func__); + } + + if (p && (kasan_alloc_module_shadow(p, size, GFP_KERNEL) < 0)) { vfree(p); return NULL; } @@ -448,9 +529,7 @@ int apply_relocate_add(Elf64_Shdr *sechdrs, case R_AARCH64_CALL26: ovf = reloc_insn_imm(RELOC_OP_PREL, loc, val, 2, 26, AARCH64_INSN_IMM_26); - - if (IS_ENABLED(CONFIG_ARM64_MODULE_PLTS) && - ovf == -ERANGE) { + if (ovf == -ERANGE) { val = module_emit_plt_entry(me, sechdrs, loc, &rel[i], sym); if (!val) return -ENOEXEC; @@ -487,7 +566,7 @@ static int module_init_ftrace_plt(const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs, struct module *mod) { -#if defined(CONFIG_ARM64_MODULE_PLTS) && defined(CONFIG_DYNAMIC_FTRACE) +#if defined(CONFIG_DYNAMIC_FTRACE) const Elf_Shdr *s; struct plt_entry *plts; diff --git a/arch/arm64/kernel/mte.c b/arch/arm64/kernel/mte.c index 7e89968bd282..4c5ef9b20065 100644 --- a/arch/arm64/kernel/mte.c +++ b/arch/arm64/kernel/mte.c @@ -416,10 +416,9 @@ long get_mte_ctrl(struct task_struct *task) static int __access_remote_tags(struct mm_struct *mm, unsigned long addr, struct iovec *kiov, unsigned int gup_flags) { - struct vm_area_struct *vma; void __user *buf = kiov->iov_base; size_t len = kiov->iov_len; - int ret; + int err = 0; int write = gup_flags & FOLL_WRITE; if (!access_ok(buf, len)) @@ -429,14 +428,16 @@ static int __access_remote_tags(struct mm_struct *mm, unsigned long addr, return -EIO; while (len) { + struct vm_area_struct *vma; unsigned long tags, offset; void *maddr; - struct page *page = NULL; + struct page *page = get_user_page_vma_remote(mm, addr, + gup_flags, &vma); - ret = get_user_pages_remote(mm, addr, 1, gup_flags, &page, - &vma, NULL); - if (ret <= 0) + if (IS_ERR_OR_NULL(page)) { + err = page == NULL ? -EIO : PTR_ERR(page); break; + } /* * Only copy tags if the page has been mapped as PROT_MTE @@ -446,7 +447,7 @@ static int __access_remote_tags(struct mm_struct *mm, unsigned long addr, * was never mapped with PROT_MTE. */ if (!(vma->vm_flags & VM_MTE)) { - ret = -EOPNOTSUPP; + err = -EOPNOTSUPP; put_page(page); break; } @@ -479,7 +480,7 @@ static int __access_remote_tags(struct mm_struct *mm, unsigned long addr, kiov->iov_len = buf - kiov->iov_base; if (!kiov->iov_len) { /* check for error accessing the tracee's address space */ - if (ret <= 0) + if (err) return -EIO; else return -EFAULT; diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index b8ec7b3ac9cb..417a8a86b2db 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -296,6 +296,8 @@ void __init __no_sanitize_address setup_arch(char **cmdline_p) *cmdline_p = boot_command_line; + kaslr_init(); + /* * If know now we are going to need KPTI then use non-global * mappings from the start, avoiding the cost of rewriting diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c index 2cfc810d0a5b..e304f7ebec2a 100644 --- a/arch/arm64/kernel/signal.c +++ b/arch/arm64/kernel/signal.c @@ -23,6 +23,7 @@ #include <asm/daifflags.h> #include <asm/debug-monitors.h> #include <asm/elf.h> +#include <asm/exception.h> #include <asm/cacheflush.h> #include <asm/ucontext.h> #include <asm/unistd.h> @@ -398,7 +399,7 @@ static int restore_tpidr2_context(struct user_ctxs *user) __get_user_error(tpidr2_el0, &user->tpidr2->tpidr2, err); if (!err) - current->thread.tpidr2_el0 = tpidr2_el0; + write_sysreg_s(tpidr2_el0, SYS_TPIDR2_EL0); return err; } diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index d00d4cbb31b1..edd63894d61e 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -332,17 +332,13 @@ static int op_cpu_kill(unsigned int cpu) } /* - * called on the thread which is asking for a CPU to be shutdown - - * waits until shutdown has completed, or it is timed out. + * Called on the thread which is asking for a CPU to be shutdown after the + * shutdown completed. */ -void __cpu_die(unsigned int cpu) +void arch_cpuhp_cleanup_dead_cpu(unsigned int cpu) { int err; - if (!cpu_wait_death(cpu, 5)) { - pr_crit("CPU%u: cpu didn't die\n", cpu); - return; - } pr_debug("CPU%u: shutdown\n", cpu); /* @@ -369,8 +365,8 @@ void __noreturn cpu_die(void) local_daif_mask(); - /* Tell __cpu_die() that this CPU is now safe to dispose of */ - (void)cpu_report_death(); + /* Tell cpuhp_bp_sync_dead() that this CPU is now safe to dispose of */ + cpuhp_ap_report_dead(); /* * Actually shutdown the CPU. This must never fail. The specific hotplug diff --git a/arch/arm64/kernel/syscall.c b/arch/arm64/kernel/syscall.c index da84cf855c44..5a668d7f3c1f 100644 --- a/arch/arm64/kernel/syscall.c +++ b/arch/arm64/kernel/syscall.c @@ -147,11 +147,9 @@ static void el0_svc_common(struct pt_regs *regs, int scno, int sc_nr, * exit regardless, as the old entry assembly did. */ if (!has_syscall_work(flags) && !IS_ENABLED(CONFIG_DEBUG_RSEQ)) { - local_daif_mask(); flags = read_thread_flags(); if (!has_syscall_work(flags) && !(flags & _TIF_SINGLESTEP)) return; - local_daif_restore(DAIF_PROCCTX); } trace_exit: diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index 4bb1b8f47298..8b70759cdbb9 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -514,6 +514,63 @@ void do_el1_fpac(struct pt_regs *regs, unsigned long esr) die("Oops - FPAC", regs, esr); } +void do_el0_mops(struct pt_regs *regs, unsigned long esr) +{ + bool wrong_option = esr & ESR_ELx_MOPS_ISS_WRONG_OPTION; + bool option_a = esr & ESR_ELx_MOPS_ISS_OPTION_A; + int dstreg = ESR_ELx_MOPS_ISS_DESTREG(esr); + int srcreg = ESR_ELx_MOPS_ISS_SRCREG(esr); + int sizereg = ESR_ELx_MOPS_ISS_SIZEREG(esr); + unsigned long dst, src, size; + + dst = pt_regs_read_reg(regs, dstreg); + src = pt_regs_read_reg(regs, srcreg); + size = pt_regs_read_reg(regs, sizereg); + + /* + * Put the registers back in the original format suitable for a + * prologue instruction, using the generic return routine from the + * Arm ARM (DDI 0487I.a) rules CNTMJ and MWFQH. + */ + if (esr & ESR_ELx_MOPS_ISS_MEM_INST) { + /* SET* instruction */ + if (option_a ^ wrong_option) { + /* Format is from Option A; forward set */ + pt_regs_write_reg(regs, dstreg, dst + size); + pt_regs_write_reg(regs, sizereg, -size); + } + } else { + /* CPY* instruction */ + if (!(option_a ^ wrong_option)) { + /* Format is from Option B */ + if (regs->pstate & PSR_N_BIT) { + /* Backward copy */ + pt_regs_write_reg(regs, dstreg, dst - size); + pt_regs_write_reg(regs, srcreg, src - size); + } + } else { + /* Format is from Option A */ + if (size & BIT(63)) { + /* Forward copy */ + pt_regs_write_reg(regs, dstreg, dst + size); + pt_regs_write_reg(regs, srcreg, src + size); + pt_regs_write_reg(regs, sizereg, -size); + } + } + } + + if (esr & ESR_ELx_MOPS_ISS_FROM_EPILOGUE) + regs->pc -= 8; + else + regs->pc -= 4; + + /* + * If single stepping then finish the step before executing the + * prologue instruction. + */ + user_fastforward_single_step(current); +} + #define __user_cache_maint(insn, address, res) \ if (address >= TASK_SIZE_MAX) { \ res = -EFAULT; \ @@ -824,6 +881,7 @@ static const char *esr_class_str[] = { [ESR_ELx_EC_DABT_LOW] = "DABT (lower EL)", [ESR_ELx_EC_DABT_CUR] = "DABT (current EL)", [ESR_ELx_EC_SP_ALIGN] = "SP Alignment", + [ESR_ELx_EC_MOPS] = "MOPS", [ESR_ELx_EC_FP_EXC32] = "FP (AArch32)", [ESR_ELx_EC_FP_EXC64] = "FP (AArch64)", [ESR_ELx_EC_SERROR] = "SError", @@ -947,7 +1005,7 @@ void do_serror(struct pt_regs *regs, unsigned long esr) } /* GENERIC_BUG traps */ - +#ifdef CONFIG_GENERIC_BUG int is_valid_bugaddr(unsigned long addr) { /* @@ -959,6 +1017,7 @@ int is_valid_bugaddr(unsigned long addr) */ return 1; } +#endif static int bug_handler(struct pt_regs *regs, unsigned long esr) { @@ -1044,7 +1103,7 @@ static int kasan_handler(struct pt_regs *regs, unsigned long esr) bool recover = esr & KASAN_ESR_RECOVER; bool write = esr & KASAN_ESR_WRITE; size_t size = KASAN_ESR_SIZE(esr); - u64 addr = regs->regs[0]; + void *addr = (void *)regs->regs[0]; u64 pc = regs->pc; kasan_report(addr, size, write, pc); diff --git a/arch/arm64/kernel/watchdog_hld.c b/arch/arm64/kernel/watchdog_hld.c new file mode 100644 index 000000000000..dcd25322127c --- /dev/null +++ b/arch/arm64/kernel/watchdog_hld.c @@ -0,0 +1,36 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <linux/nmi.h> +#include <linux/cpufreq.h> +#include <linux/perf/arm_pmu.h> + +/* + * Safe maximum CPU frequency in case a particular platform doesn't implement + * cpufreq driver. Although, architecture doesn't put any restrictions on + * maximum frequency but 5 GHz seems to be safe maximum given the available + * Arm CPUs in the market which are clocked much less than 5 GHz. On the other + * hand, we can't make it much higher as it would lead to a large hard-lockup + * detection timeout on parts which are running slower (eg. 1GHz on + * Developerbox) and doesn't possess a cpufreq driver. + */ +#define SAFE_MAX_CPU_FREQ 5000000000UL // 5 GHz +u64 hw_nmi_get_sample_period(int watchdog_thresh) +{ + unsigned int cpu = smp_processor_id(); + unsigned long max_cpu_freq; + + max_cpu_freq = cpufreq_get_hw_max_freq(cpu) * 1000UL; + if (!max_cpu_freq) + max_cpu_freq = SAFE_MAX_CPU_FREQ; + + return (u64)max_cpu_freq * watchdog_thresh; +} + +bool __init arch_perf_nmi_is_available(void) +{ + /* + * hardlockup_detector_perf_init() will success even if Pseudo-NMI turns off, + * however, the pmu interrupts will act like a normal interrupt instead of + * NMI and the hardlockup detector would be broken. + */ + return arm_pmu_irq_is_nmi(); +} diff --git a/arch/arm64/kvm/debug.c b/arch/arm64/kvm/debug.c index 55f80fb93925..8725291cb00a 100644 --- a/arch/arm64/kvm/debug.c +++ b/arch/arm64/kvm/debug.c @@ -333,7 +333,7 @@ void kvm_arch_vcpu_load_debug_state_flags(struct kvm_vcpu *vcpu) /* Check if we have TRBE implemented and available at the host */ if (cpuid_feature_extract_unsigned_field(dfr0, ID_AA64DFR0_EL1_TraceBuffer_SHIFT) && - !(read_sysreg_s(SYS_TRBIDR_EL1) & TRBIDR_PROG)) + !(read_sysreg_s(SYS_TRBIDR_EL1) & TRBIDR_EL1_P)) vcpu_set_flag(vcpu, DEBUG_STATE_SAVE_TRBE); } diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h index 5c15c58f90cc..2f6e0b3e4a75 100644 --- a/arch/arm64/kvm/hyp/include/hyp/switch.h +++ b/arch/arm64/kvm/hyp/include/hyp/switch.h @@ -82,8 +82,14 @@ static inline void __activate_traps_common(struct kvm_vcpu *vcpu) * EL1 instead of being trapped to EL2. */ if (kvm_arm_support_pmu_v3()) { + struct kvm_cpu_context *hctxt; + write_sysreg(0, pmselr_el0); + + hctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt; + ctxt_sys_reg(hctxt, PMUSERENR_EL0) = read_sysreg(pmuserenr_el0); write_sysreg(ARMV8_PMU_USERENR_MASK, pmuserenr_el0); + vcpu_set_flag(vcpu, PMUSERENR_ON_CPU); } vcpu->arch.mdcr_el2_host = read_sysreg(mdcr_el2); @@ -106,8 +112,13 @@ static inline void __deactivate_traps_common(struct kvm_vcpu *vcpu) write_sysreg(vcpu->arch.mdcr_el2_host, mdcr_el2); write_sysreg(0, hstr_el2); - if (kvm_arm_support_pmu_v3()) - write_sysreg(0, pmuserenr_el0); + if (kvm_arm_support_pmu_v3()) { + struct kvm_cpu_context *hctxt; + + hctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt; + write_sysreg(ctxt_sys_reg(hctxt, PMUSERENR_EL0), pmuserenr_el0); + vcpu_clear_flag(vcpu, PMUSERENR_ON_CPU); + } if (cpus_have_final_cap(ARM64_SME)) { sysreg_clear_set_s(SYS_HFGRTR_EL2, 0, @@ -130,6 +141,9 @@ static inline void ___activate_traps(struct kvm_vcpu *vcpu) if (cpus_have_final_cap(ARM64_HAS_RAS_EXTN) && (hcr & HCR_VSE)) write_sysreg_s(vcpu->arch.vsesr_el2, SYS_VSESR_EL2); + + if (cpus_have_final_cap(ARM64_HAS_HCX)) + write_sysreg_s(HCRX_GUEST_FLAGS, SYS_HCRX_EL2); } static inline void ___deactivate_traps(struct kvm_vcpu *vcpu) @@ -144,6 +158,9 @@ static inline void ___deactivate_traps(struct kvm_vcpu *vcpu) vcpu->arch.hcr_el2 &= ~HCR_VSE; vcpu->arch.hcr_el2 |= read_sysreg(hcr_el2) & HCR_VSE; } + + if (cpus_have_final_cap(ARM64_HAS_HCX)) + write_sysreg_s(HCRX_HOST_FLAGS, SYS_HCRX_EL2); } static inline bool __populate_fault_info(struct kvm_vcpu *vcpu) diff --git a/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h b/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h index 699ea1f8d409..bb6b571ec627 100644 --- a/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h +++ b/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h @@ -44,6 +44,8 @@ static inline void __sysreg_save_el1_state(struct kvm_cpu_context *ctxt) ctxt_sys_reg(ctxt, TTBR0_EL1) = read_sysreg_el1(SYS_TTBR0); ctxt_sys_reg(ctxt, TTBR1_EL1) = read_sysreg_el1(SYS_TTBR1); ctxt_sys_reg(ctxt, TCR_EL1) = read_sysreg_el1(SYS_TCR); + if (cpus_have_final_cap(ARM64_HAS_TCR2)) + ctxt_sys_reg(ctxt, TCR2_EL1) = read_sysreg_el1(SYS_TCR2); ctxt_sys_reg(ctxt, ESR_EL1) = read_sysreg_el1(SYS_ESR); ctxt_sys_reg(ctxt, AFSR0_EL1) = read_sysreg_el1(SYS_AFSR0); ctxt_sys_reg(ctxt, AFSR1_EL1) = read_sysreg_el1(SYS_AFSR1); @@ -53,6 +55,10 @@ static inline void __sysreg_save_el1_state(struct kvm_cpu_context *ctxt) ctxt_sys_reg(ctxt, CONTEXTIDR_EL1) = read_sysreg_el1(SYS_CONTEXTIDR); ctxt_sys_reg(ctxt, AMAIR_EL1) = read_sysreg_el1(SYS_AMAIR); ctxt_sys_reg(ctxt, CNTKCTL_EL1) = read_sysreg_el1(SYS_CNTKCTL); + if (cpus_have_final_cap(ARM64_HAS_S1PIE)) { + ctxt_sys_reg(ctxt, PIR_EL1) = read_sysreg_el1(SYS_PIR); + ctxt_sys_reg(ctxt, PIRE0_EL1) = read_sysreg_el1(SYS_PIRE0); + } ctxt_sys_reg(ctxt, PAR_EL1) = read_sysreg_par(); ctxt_sys_reg(ctxt, TPIDR_EL1) = read_sysreg(tpidr_el1); @@ -114,6 +120,8 @@ static inline void __sysreg_restore_el1_state(struct kvm_cpu_context *ctxt) write_sysreg_el1(ctxt_sys_reg(ctxt, CPACR_EL1), SYS_CPACR); write_sysreg_el1(ctxt_sys_reg(ctxt, TTBR0_EL1), SYS_TTBR0); write_sysreg_el1(ctxt_sys_reg(ctxt, TTBR1_EL1), SYS_TTBR1); + if (cpus_have_final_cap(ARM64_HAS_TCR2)) + write_sysreg_el1(ctxt_sys_reg(ctxt, TCR2_EL1), SYS_TCR2); write_sysreg_el1(ctxt_sys_reg(ctxt, ESR_EL1), SYS_ESR); write_sysreg_el1(ctxt_sys_reg(ctxt, AFSR0_EL1), SYS_AFSR0); write_sysreg_el1(ctxt_sys_reg(ctxt, AFSR1_EL1), SYS_AFSR1); @@ -123,6 +131,10 @@ static inline void __sysreg_restore_el1_state(struct kvm_cpu_context *ctxt) write_sysreg_el1(ctxt_sys_reg(ctxt, CONTEXTIDR_EL1), SYS_CONTEXTIDR); write_sysreg_el1(ctxt_sys_reg(ctxt, AMAIR_EL1), SYS_AMAIR); write_sysreg_el1(ctxt_sys_reg(ctxt, CNTKCTL_EL1), SYS_CNTKCTL); + if (cpus_have_final_cap(ARM64_HAS_S1PIE)) { + write_sysreg_el1(ctxt_sys_reg(ctxt, PIR_EL1), SYS_PIR); + write_sysreg_el1(ctxt_sys_reg(ctxt, PIRE0_EL1), SYS_PIRE0); + } write_sysreg(ctxt_sys_reg(ctxt, PAR_EL1), par_el1); write_sysreg(ctxt_sys_reg(ctxt, TPIDR_EL1), tpidr_el1); diff --git a/arch/arm64/kvm/hyp/nvhe/debug-sr.c b/arch/arm64/kvm/hyp/nvhe/debug-sr.c index d756b939f296..4558c02eb352 100644 --- a/arch/arm64/kvm/hyp/nvhe/debug-sr.c +++ b/arch/arm64/kvm/hyp/nvhe/debug-sr.c @@ -56,7 +56,7 @@ static void __debug_save_trace(u64 *trfcr_el1) *trfcr_el1 = 0; /* Check if the TRBE is enabled */ - if (!(read_sysreg_s(SYS_TRBLIMITR_EL1) & TRBLIMITR_ENABLE)) + if (!(read_sysreg_s(SYS_TRBLIMITR_EL1) & TRBLIMITR_EL1_E)) return; /* * Prohibit trace generation while we are in guest. diff --git a/arch/arm64/kvm/hyp/vhe/switch.c b/arch/arm64/kvm/hyp/vhe/switch.c index 7a1aa511e7da..b37e7c96efea 100644 --- a/arch/arm64/kvm/hyp/vhe/switch.c +++ b/arch/arm64/kvm/hyp/vhe/switch.c @@ -92,14 +92,28 @@ static void __deactivate_traps(struct kvm_vcpu *vcpu) } NOKPROBE_SYMBOL(__deactivate_traps); +/* + * Disable IRQs in {activate,deactivate}_traps_vhe_{load,put}() to + * prevent a race condition between context switching of PMUSERENR_EL0 + * in __{activate,deactivate}_traps_common() and IPIs that attempts to + * update PMUSERENR_EL0. See also kvm_set_pmuserenr(). + */ void activate_traps_vhe_load(struct kvm_vcpu *vcpu) { + unsigned long flags; + + local_irq_save(flags); __activate_traps_common(vcpu); + local_irq_restore(flags); } void deactivate_traps_vhe_put(struct kvm_vcpu *vcpu) { + unsigned long flags; + + local_irq_save(flags); __deactivate_traps_common(vcpu); + local_irq_restore(flags); } static const exit_handler_fn hyp_exit_handlers[] = { diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c index 491ca7eb2a4c..560650972478 100644 --- a/arch/arm64/kvm/pmu-emul.c +++ b/arch/arm64/kvm/pmu-emul.c @@ -700,7 +700,25 @@ static struct arm_pmu *kvm_pmu_probe_armpmu(void) mutex_lock(&arm_pmus_lock); - cpu = smp_processor_id(); + /* + * It is safe to use a stale cpu to iterate the list of PMUs so long as + * the same value is used for the entirety of the loop. Given this, and + * the fact that no percpu data is used for the lookup there is no need + * to disable preemption. + * + * It is still necessary to get a valid cpu, though, to probe for the + * default PMU instance as userspace is not required to specify a PMU + * type. In order to uphold the preexisting behavior KVM selects the + * PMU instance for the core where the first call to the + * KVM_ARM_VCPU_PMU_V3_CTRL attribute group occurs. A dependent use case + * would be a user with disdain of all things big.LITTLE that affines + * the VMM to a particular cluster of cores. + * + * In any case, userspace should just do the sane thing and use the UAPI + * to select a PMU type directly. But, be wary of the baggage being + * carried here. + */ + cpu = raw_smp_processor_id(); list_for_each_entry(entry, &arm_pmus, entry) { tmp = entry->arm_pmu; diff --git a/arch/arm64/kvm/pmu.c b/arch/arm64/kvm/pmu.c index 7887133d15f0..121f1a14c829 100644 --- a/arch/arm64/kvm/pmu.c +++ b/arch/arm64/kvm/pmu.c @@ -209,3 +209,30 @@ void kvm_vcpu_pmu_restore_host(struct kvm_vcpu *vcpu) kvm_vcpu_pmu_enable_el0(events_host); kvm_vcpu_pmu_disable_el0(events_guest); } + +/* + * With VHE, keep track of the PMUSERENR_EL0 value for the host EL0 on the pCPU + * where PMUSERENR_EL0 for the guest is loaded, since PMUSERENR_EL0 is switched + * to the value for the guest on vcpu_load(). The value for the host EL0 + * will be restored on vcpu_put(), before returning to userspace. + * This isn't necessary for nVHE, as the register is context switched for + * every guest enter/exit. + * + * Return true if KVM takes care of the register. Otherwise return false. + */ +bool kvm_set_pmuserenr(u64 val) +{ + struct kvm_cpu_context *hctxt; + struct kvm_vcpu *vcpu; + + if (!kvm_arm_support_pmu_v3() || !has_vhe()) + return false; + + vcpu = kvm_get_running_vcpu(); + if (!vcpu || !vcpu_get_flag(vcpu, PMUSERENR_ON_CPU)) + return false; + + hctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt; + ctxt_sys_reg(hctxt, PMUSERENR_EL0) = val; + return true; +} diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 753aa7418149..5b5d5e5449dc 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -401,9 +401,9 @@ static bool trap_oslar_el1(struct kvm_vcpu *vcpu, return read_from_write_only(vcpu, p, r); /* Forward the OSLK bit to OSLSR */ - oslsr = __vcpu_sys_reg(vcpu, OSLSR_EL1) & ~SYS_OSLSR_OSLK; - if (p->regval & SYS_OSLAR_OSLK) - oslsr |= SYS_OSLSR_OSLK; + oslsr = __vcpu_sys_reg(vcpu, OSLSR_EL1) & ~OSLSR_EL1_OSLK; + if (p->regval & OSLAR_EL1_OSLK) + oslsr |= OSLSR_EL1_OSLK; __vcpu_sys_reg(vcpu, OSLSR_EL1) = oslsr; return true; @@ -427,7 +427,7 @@ static int set_oslsr_el1(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, * The only modifiable bit is the OSLK bit. Refuse the write if * userspace attempts to change any other bit in the register. */ - if ((val ^ rd->val) & ~SYS_OSLSR_OSLK) + if ((val ^ rd->val) & ~OSLSR_EL1_OSLK) return -EINVAL; __vcpu_sys_reg(vcpu, rd->reg) = val; @@ -1265,6 +1265,7 @@ static u64 read_id_reg(const struct kvm_vcpu *vcpu, struct sys_reg_desc const *r ARM64_FEATURE_MASK(ID_AA64ISAR2_EL1_GPA3)); if (!cpus_have_final_cap(ARM64_HAS_WFXT)) val &= ~ARM64_FEATURE_MASK(ID_AA64ISAR2_EL1_WFxT); + val &= ~ARM64_FEATURE_MASK(ID_AA64ISAR2_EL1_MOPS); break; case SYS_ID_AA64DFR0_EL1: /* Limit debug to ARMv8.0 */ @@ -1800,7 +1801,7 @@ static const struct sys_reg_desc sys_reg_descs[] = { { SYS_DESC(SYS_MDRAR_EL1), trap_raz_wi }, { SYS_DESC(SYS_OSLAR_EL1), trap_oslar_el1 }, { SYS_DESC(SYS_OSLSR_EL1), trap_oslsr_el1, reset_val, OSLSR_EL1, - SYS_OSLSR_OSLM_IMPLEMENTED, .set_user = set_oslsr_el1, }, + OSLSR_EL1_OSLM_IMPLEMENTED, .set_user = set_oslsr_el1, }, { SYS_DESC(SYS_OSDLR_EL1), trap_raz_wi }, { SYS_DESC(SYS_DBGPRCR_EL1), trap_raz_wi }, { SYS_DESC(SYS_DBGCLAIMSET_EL1), trap_raz_wi }, @@ -1891,7 +1892,7 @@ static const struct sys_reg_desc sys_reg_descs[] = { ID_SANITISED(ID_AA64MMFR0_EL1), ID_SANITISED(ID_AA64MMFR1_EL1), ID_SANITISED(ID_AA64MMFR2_EL1), - ID_UNALLOCATED(7,3), + ID_SANITISED(ID_AA64MMFR3_EL1), ID_UNALLOCATED(7,4), ID_UNALLOCATED(7,5), ID_UNALLOCATED(7,6), @@ -1911,6 +1912,7 @@ static const struct sys_reg_desc sys_reg_descs[] = { { SYS_DESC(SYS_TTBR0_EL1), access_vm_reg, reset_unknown, TTBR0_EL1 }, { SYS_DESC(SYS_TTBR1_EL1), access_vm_reg, reset_unknown, TTBR1_EL1 }, { SYS_DESC(SYS_TCR_EL1), access_vm_reg, reset_val, TCR_EL1, 0 }, + { SYS_DESC(SYS_TCR2_EL1), access_vm_reg, reset_val, TCR2_EL1, 0 }, PTRAUTH_KEY(APIA), PTRAUTH_KEY(APIB), @@ -1960,6 +1962,8 @@ static const struct sys_reg_desc sys_reg_descs[] = { { SYS_DESC(SYS_PMMIR_EL1), trap_raz_wi }, { SYS_DESC(SYS_MAIR_EL1), access_vm_reg, reset_unknown, MAIR_EL1 }, + { SYS_DESC(SYS_PIRE0_EL1), access_vm_reg, reset_unknown, PIRE0_EL1 }, + { SYS_DESC(SYS_PIR_EL1), access_vm_reg, reset_unknown, PIR_EL1 }, { SYS_DESC(SYS_AMAIR_EL1), access_vm_reg, reset_amair_el1, AMAIR_EL1 }, { SYS_DESC(SYS_LORSA_EL1), trap_loregion }, diff --git a/arch/arm64/kvm/vgic/vgic-init.c b/arch/arm64/kvm/vgic/vgic-init.c index 6eafc2c45cfc..c8c3cb812783 100644 --- a/arch/arm64/kvm/vgic/vgic-init.c +++ b/arch/arm64/kvm/vgic/vgic-init.c @@ -446,6 +446,7 @@ int vgic_lazy_init(struct kvm *kvm) int kvm_vgic_map_resources(struct kvm *kvm) { struct vgic_dist *dist = &kvm->arch.vgic; + enum vgic_type type; gpa_t dist_base; int ret = 0; @@ -460,10 +461,13 @@ int kvm_vgic_map_resources(struct kvm *kvm) if (!irqchip_in_kernel(kvm)) goto out; - if (dist->vgic_model == KVM_DEV_TYPE_ARM_VGIC_V2) + if (dist->vgic_model == KVM_DEV_TYPE_ARM_VGIC_V2) { ret = vgic_v2_map_resources(kvm); - else + type = VGIC_V2; + } else { ret = vgic_v3_map_resources(kvm); + type = VGIC_V3; + } if (ret) { __kvm_vgic_destroy(kvm); @@ -473,8 +477,7 @@ int kvm_vgic_map_resources(struct kvm *kvm) dist_base = dist->vgic_dist_base; mutex_unlock(&kvm->arch.config_lock); - ret = vgic_register_dist_iodev(kvm, dist_base, - kvm_vgic_global_state.type); + ret = vgic_register_dist_iodev(kvm, dist_base, type); if (ret) { kvm_err("Unable to register VGIC dist MMIO regions\n"); kvm_vgic_destroy(kvm); diff --git a/arch/arm64/lib/xor-neon.c b/arch/arm64/lib/xor-neon.c index 96b171995d19..f9a53b7f9842 100644 --- a/arch/arm64/lib/xor-neon.c +++ b/arch/arm64/lib/xor-neon.c @@ -10,7 +10,7 @@ #include <linux/module.h> #include <asm/neon-intrinsics.h> -void xor_arm64_neon_2(unsigned long bytes, unsigned long * __restrict p1, +static void xor_arm64_neon_2(unsigned long bytes, unsigned long * __restrict p1, const unsigned long * __restrict p2) { uint64_t *dp1 = (uint64_t *)p1; @@ -37,7 +37,7 @@ void xor_arm64_neon_2(unsigned long bytes, unsigned long * __restrict p1, } while (--lines > 0); } -void xor_arm64_neon_3(unsigned long bytes, unsigned long * __restrict p1, +static void xor_arm64_neon_3(unsigned long bytes, unsigned long * __restrict p1, const unsigned long * __restrict p2, const unsigned long * __restrict p3) { @@ -73,7 +73,7 @@ void xor_arm64_neon_3(unsigned long bytes, unsigned long * __restrict p1, } while (--lines > 0); } -void xor_arm64_neon_4(unsigned long bytes, unsigned long * __restrict p1, +static void xor_arm64_neon_4(unsigned long bytes, unsigned long * __restrict p1, const unsigned long * __restrict p2, const unsigned long * __restrict p3, const unsigned long * __restrict p4) @@ -118,7 +118,7 @@ void xor_arm64_neon_4(unsigned long bytes, unsigned long * __restrict p1, } while (--lines > 0); } -void xor_arm64_neon_5(unsigned long bytes, unsigned long * __restrict p1, +static void xor_arm64_neon_5(unsigned long bytes, unsigned long * __restrict p1, const unsigned long * __restrict p2, const unsigned long * __restrict p3, const unsigned long * __restrict p4, diff --git a/arch/arm64/mm/context.c b/arch/arm64/mm/context.c index e1e0dca01839..188197590fc9 100644 --- a/arch/arm64/mm/context.c +++ b/arch/arm64/mm/context.c @@ -364,8 +364,8 @@ void cpu_do_switch_mm(phys_addr_t pgd_phys, struct mm_struct *mm) ttbr1 &= ~TTBR_ASID_MASK; ttbr1 |= FIELD_PREP(TTBR_ASID_MASK, asid); + cpu_set_reserved_ttbr0_nosync(); write_sysreg(ttbr1, ttbr1_el1); - isb(); write_sysreg(ttbr0, ttbr0_el1); isb(); post_ttbr_update_workaround(); diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index 8a169bdb4d53..935f0a8911f9 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -66,6 +66,8 @@ static inline const struct fault_info *esr_to_debug_fault_info(unsigned long esr static void data_abort_decode(unsigned long esr) { + unsigned long iss2 = ESR_ELx_ISS2(esr); + pr_alert("Data abort info:\n"); if (esr & ESR_ELx_ISV) { @@ -78,12 +80,21 @@ static void data_abort_decode(unsigned long esr) (esr & ESR_ELx_SF) >> ESR_ELx_SF_SHIFT, (esr & ESR_ELx_AR) >> ESR_ELx_AR_SHIFT); } else { - pr_alert(" ISV = 0, ISS = 0x%08lx\n", esr & ESR_ELx_ISS_MASK); + pr_alert(" ISV = 0, ISS = 0x%08lx, ISS2 = 0x%08lx\n", + esr & ESR_ELx_ISS_MASK, iss2); } - pr_alert(" CM = %lu, WnR = %lu\n", + pr_alert(" CM = %lu, WnR = %lu, TnD = %lu, TagAccess = %lu\n", (esr & ESR_ELx_CM) >> ESR_ELx_CM_SHIFT, - (esr & ESR_ELx_WNR) >> ESR_ELx_WNR_SHIFT); + (esr & ESR_ELx_WNR) >> ESR_ELx_WNR_SHIFT, + (iss2 & ESR_ELx_TnD) >> ESR_ELx_TnD_SHIFT, + (iss2 & ESR_ELx_TagAccess) >> ESR_ELx_TagAccess_SHIFT); + + pr_alert(" GCS = %ld, Overlay = %lu, DirtyBit = %lu, Xs = %llu\n", + (iss2 & ESR_ELx_GCS) >> ESR_ELx_GCS_SHIFT, + (iss2 & ESR_ELx_Overlay) >> ESR_ELx_Overlay_SHIFT, + (iss2 & ESR_ELx_DirtyBit) >> ESR_ELx_DirtyBit_SHIFT, + (iss2 & ESR_ELx_Xs_MASK) >> ESR_ELx_Xs_SHIFT); } static void mem_abort_decode(unsigned long esr) @@ -177,6 +188,9 @@ static void show_pte(unsigned long addr) break; ptep = pte_offset_map(pmdp, addr); + if (!ptep) + break; + pte = READ_ONCE(*ptep); pr_cont(", pte=%016llx", pte_val(pte)); pte_unmap(ptep); @@ -317,7 +331,7 @@ static void report_tag_fault(unsigned long addr, unsigned long esr, * find out access size. */ bool is_write = !!(esr & ESR_ELx_WNR); - kasan_report(addr, 0, is_write, regs->pc); + kasan_report((void *)addr, 0, is_write, regs->pc); } #else /* Tag faults aren't enabled without CONFIG_KASAN_HW_TAGS. */ @@ -854,9 +868,6 @@ void do_sp_pc_abort(unsigned long addr, unsigned long esr, struct pt_regs *regs) } NOKPROBE_SYMBOL(do_sp_pc_abort); -int __init early_brk64(unsigned long addr, unsigned long esr, - struct pt_regs *regs); - /* * __refdata because early_brk64 is __init, but the reference to it is * clobbered at arch_initcall time. diff --git a/arch/arm64/mm/flush.c b/arch/arm64/mm/flush.c index 5f9379b3c8c8..4e6476094952 100644 --- a/arch/arm64/mm/flush.c +++ b/arch/arm64/mm/flush.c @@ -8,6 +8,7 @@ #include <linux/export.h> #include <linux/mm.h> +#include <linux/libnvdimm.h> #include <linux/pagemap.h> #include <asm/cacheflush.h> diff --git a/arch/arm64/mm/hugetlbpage.c b/arch/arm64/mm/hugetlbpage.c index 95364e8bdc19..21716c940682 100644 --- a/arch/arm64/mm/hugetlbpage.c +++ b/arch/arm64/mm/hugetlbpage.c @@ -307,14 +307,7 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, struct vm_area_struct *vma, return NULL; WARN_ON(addr & (sz - 1)); - /* - * Note that if this code were ever ported to the - * 32-bit arm platform then it will cause trouble in - * the case where CONFIG_HIGHPTE is set, since there - * will be no pte_unmap() to correspond with this - * pte_alloc_map(). - */ - ptep = pte_alloc_map(mm, pmdp, addr); + ptep = pte_alloc_huge(mm, pmdp, addr); } else if (sz == PMD_SIZE) { if (want_pmd_share(vma, addr) && pud_none(READ_ONCE(*pudp))) ptep = huge_pmd_share(mm, vma, addr, pudp); @@ -366,7 +359,7 @@ pte_t *huge_pte_offset(struct mm_struct *mm, return (pte_t *)pmdp; if (sz == CONT_PTE_SIZE) - return pte_offset_kernel(pmdp, (addr & CONT_PTE_MASK)); + return pte_offset_huge(pmdp, (addr & CONT_PTE_MASK)); return NULL; } diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index 66e70ca47680..d31c3a9290c5 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -69,6 +69,7 @@ phys_addr_t __ro_after_init arm64_dma_phys_limit; #define CRASH_ADDR_LOW_MAX arm64_dma_phys_limit #define CRASH_ADDR_HIGH_MAX (PHYS_MASK + 1) +#define CRASH_HIGH_SEARCH_BASE SZ_4G #define DEFAULT_CRASH_KERNEL_LOW_SIZE (128UL << 20) @@ -101,12 +102,13 @@ static int __init reserve_crashkernel_low(unsigned long long low_size) */ static void __init reserve_crashkernel(void) { - unsigned long long crash_base, crash_size; - unsigned long long crash_low_size = 0; + unsigned long long crash_low_size = 0, search_base = 0; unsigned long long crash_max = CRASH_ADDR_LOW_MAX; + unsigned long long crash_base, crash_size; char *cmdline = boot_command_line; - int ret; bool fixed_base = false; + bool high = false; + int ret; if (!IS_ENABLED(CONFIG_KEXEC_CORE)) return; @@ -129,7 +131,9 @@ static void __init reserve_crashkernel(void) else if (ret) return; + search_base = CRASH_HIGH_SEARCH_BASE; crash_max = CRASH_ADDR_HIGH_MAX; + high = true; } else if (ret || !crash_size) { /* The specified value is invalid */ return; @@ -140,31 +144,51 @@ static void __init reserve_crashkernel(void) /* User specifies base address explicitly. */ if (crash_base) { fixed_base = true; + search_base = crash_base; crash_max = crash_base + crash_size; } retry: crash_base = memblock_phys_alloc_range(crash_size, CRASH_ALIGN, - crash_base, crash_max); + search_base, crash_max); if (!crash_base) { /* - * If the first attempt was for low memory, fall back to - * high memory, the minimum required low memory will be - * reserved later. + * For crashkernel=size[KMG]@offset[KMG], print out failure + * message if can't reserve the specified region. */ - if (!fixed_base && (crash_max == CRASH_ADDR_LOW_MAX)) { + if (fixed_base) { + pr_warn("crashkernel reservation failed - memory is in use.\n"); + return; + } + + /* + * For crashkernel=size[KMG], if the first attempt was for + * low memory, fall back to high memory, the minimum required + * low memory will be reserved later. + */ + if (!high && crash_max == CRASH_ADDR_LOW_MAX) { crash_max = CRASH_ADDR_HIGH_MAX; + search_base = CRASH_ADDR_LOW_MAX; crash_low_size = DEFAULT_CRASH_KERNEL_LOW_SIZE; goto retry; } + /* + * For crashkernel=size[KMG],high, if the first attempt was + * for high memory, fall back to low memory. + */ + if (high && crash_max == CRASH_ADDR_HIGH_MAX) { + crash_max = CRASH_ADDR_LOW_MAX; + search_base = 0; + goto retry; + } pr_warn("cannot allocate crashkernel (size:0x%llx)\n", crash_size); return; } - if ((crash_base > CRASH_ADDR_LOW_MAX - crash_low_size) && - crash_low_size && reserve_crashkernel_low(crash_low_size)) { + if ((crash_base >= CRASH_ADDR_LOW_MAX) && crash_low_size && + reserve_crashkernel_low(crash_low_size)) { memblock_phys_free(crash_base, crash_size); return; } @@ -442,7 +466,12 @@ void __init bootmem_init(void) */ void __init mem_init(void) { - swiotlb_init(max_pfn > PFN_DOWN(arm64_dma_phys_limit), SWIOTLB_VERBOSE); + bool swiotlb = max_pfn > PFN_DOWN(arm64_dma_phys_limit); + + if (IS_ENABLED(CONFIG_DMA_BOUNCE_UNALIGNED_KMALLOC)) + swiotlb = true; + + swiotlb_init(swiotlb, SWIOTLB_VERBOSE); /* this will put all unused low memory onto the freelists */ memblock_free_all(); diff --git a/arch/arm64/mm/kasan_init.c b/arch/arm64/mm/kasan_init.c index e969e68de005..f17d066e85eb 100644 --- a/arch/arm64/mm/kasan_init.c +++ b/arch/arm64/mm/kasan_init.c @@ -214,7 +214,7 @@ static void __init clear_pgds(unsigned long start, static void __init kasan_init_shadow(void) { u64 kimg_shadow_start, kimg_shadow_end; - u64 mod_shadow_start, mod_shadow_end; + u64 mod_shadow_start; u64 vmalloc_shadow_end; phys_addr_t pa_start, pa_end; u64 i; @@ -223,7 +223,6 @@ static void __init kasan_init_shadow(void) kimg_shadow_end = PAGE_ALIGN((u64)kasan_mem_to_shadow(KERNEL_END)); mod_shadow_start = (u64)kasan_mem_to_shadow((void *)MODULES_VADDR); - mod_shadow_end = (u64)kasan_mem_to_shadow((void *)MODULES_END); vmalloc_shadow_end = (u64)kasan_mem_to_shadow((void *)VMALLOC_END); @@ -246,17 +245,9 @@ static void __init kasan_init_shadow(void) kasan_populate_early_shadow(kasan_mem_to_shadow((void *)PAGE_END), (void *)mod_shadow_start); - if (IS_ENABLED(CONFIG_KASAN_VMALLOC)) { - BUILD_BUG_ON(VMALLOC_START != MODULES_END); - kasan_populate_early_shadow((void *)vmalloc_shadow_end, - (void *)KASAN_SHADOW_END); - } else { - kasan_populate_early_shadow((void *)kimg_shadow_end, - (void *)KASAN_SHADOW_END); - if (kimg_shadow_start > mod_shadow_end) - kasan_populate_early_shadow((void *)mod_shadow_end, - (void *)kimg_shadow_start); - } + BUILD_BUG_ON(VMALLOC_START != MODULES_END); + kasan_populate_early_shadow((void *)vmalloc_shadow_end, + (void *)KASAN_SHADOW_END); for_each_mem_range(i, &pa_start, &pa_end) { void *start = (void *)__phys_to_virt(pa_start); diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index af6bc8403ee4..95d360805f8a 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -451,7 +451,7 @@ static phys_addr_t pgd_pgtable_alloc(int shift) void __init create_mapping_noalloc(phys_addr_t phys, unsigned long virt, phys_addr_t size, pgprot_t prot) { - if ((virt >= PAGE_END) && (virt < VMALLOC_START)) { + if (virt < PAGE_OFFSET) { pr_warn("BUG: not creating mapping for %pa at 0x%016lx - outside kernel range\n", &phys, virt); return; @@ -478,7 +478,7 @@ void __init create_pgd_mapping(struct mm_struct *mm, phys_addr_t phys, static void update_mapping_prot(phys_addr_t phys, unsigned long virt, phys_addr_t size, pgprot_t prot) { - if ((virt >= PAGE_END) && (virt < VMALLOC_START)) { + if (virt < PAGE_OFFSET) { pr_warn("BUG: not updating mapping for %pa at 0x%016lx - outside kernel range\n", &phys, virt); return; @@ -663,12 +663,17 @@ static void __init map_kernel_segment(pgd_t *pgdp, void *va_start, void *va_end, vm_area_add_early(vma); } +static pgprot_t kernel_exec_prot(void) +{ + return rodata_enabled ? PAGE_KERNEL_ROX : PAGE_KERNEL_EXEC; +} + #ifdef CONFIG_UNMAP_KERNEL_AT_EL0 static int __init map_entry_trampoline(void) { int i; - pgprot_t prot = rodata_enabled ? PAGE_KERNEL_ROX : PAGE_KERNEL_EXEC; + pgprot_t prot = kernel_exec_prot(); phys_addr_t pa_start = __pa_symbol(__entry_tramp_text_start); /* The trampoline is always mapped and can therefore be global */ @@ -723,7 +728,7 @@ static void __init map_kernel(pgd_t *pgdp) * mapping to install SW breakpoints. Allow this (only) when * explicitly requested with rodata=off. */ - pgprot_t text_prot = rodata_enabled ? PAGE_KERNEL_ROX : PAGE_KERNEL_EXEC; + pgprot_t text_prot = kernel_exec_prot(); /* * If we have a CPU that supports BTI and a kernel built for diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S index c2cb437821ca..2baeec419f62 100644 --- a/arch/arm64/mm/proc.S +++ b/arch/arm64/mm/proc.S @@ -199,7 +199,7 @@ SYM_FUNC_END(idmap_cpu_replace_ttbr1) #ifdef CONFIG_UNMAP_KERNEL_AT_EL0 -#define KPTI_NG_PTE_FLAGS (PTE_ATTRINDX(MT_NORMAL) | SWAPPER_PTE_FLAGS) +#define KPTI_NG_PTE_FLAGS (PTE_ATTRINDX(MT_NORMAL) | SWAPPER_PTE_FLAGS | PTE_WRITE) .pushsection ".idmap.text", "a" @@ -290,7 +290,7 @@ SYM_TYPED_FUNC_START(idmap_kpti_install_ng_mappings) isb mov temp_pte, x5 - mov pte_flags, #KPTI_NG_PTE_FLAGS + mov_q pte_flags, KPTI_NG_PTE_FLAGS /* Everybody is enjoying the idmap, so we can rewrite swapper. */ /* PGD */ @@ -454,6 +454,21 @@ SYM_FUNC_START(__cpu_setup) #endif /* CONFIG_ARM64_HW_AFDBM */ msr mair_el1, mair msr tcr_el1, tcr + + mrs_s x1, SYS_ID_AA64MMFR3_EL1 + ubfx x1, x1, #ID_AA64MMFR3_EL1_S1PIE_SHIFT, #4 + cbz x1, .Lskip_indirection + + mov_q x0, PIE_E0 + msr REG_PIRE0_EL1, x0 + mov_q x0, PIE_E1 + msr REG_PIR_EL1, x0 + + mov x0, TCR2_EL1x_PIE + msr REG_TCR2_EL1, x0 + +.Lskip_indirection: + /* * Prepare SCTLR */ diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c index b26da8efa616..145b540ec34f 100644 --- a/arch/arm64/net/bpf_jit_comp.c +++ b/arch/arm64/net/bpf_jit_comp.c @@ -1731,21 +1731,21 @@ static void invoke_bpf_mod_ret(struct jit_ctx *ctx, struct bpf_tramp_links *tl, } } -static void save_args(struct jit_ctx *ctx, int args_off, int nargs) +static void save_args(struct jit_ctx *ctx, int args_off, int nregs) { int i; - for (i = 0; i < nargs; i++) { + for (i = 0; i < nregs; i++) { emit(A64_STR64I(i, A64_SP, args_off), ctx); args_off += 8; } } -static void restore_args(struct jit_ctx *ctx, int args_off, int nargs) +static void restore_args(struct jit_ctx *ctx, int args_off, int nregs) { int i; - for (i = 0; i < nargs; i++) { + for (i = 0; i < nregs; i++) { emit(A64_LDR64I(i, A64_SP, args_off), ctx); args_off += 8; } @@ -1764,7 +1764,7 @@ static void restore_args(struct jit_ctx *ctx, int args_off, int nargs) */ static int prepare_trampoline(struct jit_ctx *ctx, struct bpf_tramp_image *im, struct bpf_tramp_links *tlinks, void *orig_call, - int nargs, u32 flags) + int nregs, u32 flags) { int i; int stack_size; @@ -1772,7 +1772,7 @@ static int prepare_trampoline(struct jit_ctx *ctx, struct bpf_tramp_image *im, int regs_off; int retval_off; int args_off; - int nargs_off; + int nregs_off; int ip_off; int run_ctx_off; struct bpf_tramp_links *fentry = &tlinks[BPF_TRAMP_FENTRY]; @@ -1795,11 +1795,11 @@ static int prepare_trampoline(struct jit_ctx *ctx, struct bpf_tramp_image *im, * SP + retval_off [ return value ] BPF_TRAMP_F_CALL_ORIG or * BPF_TRAMP_F_RET_FENTRY_RET * - * [ argN ] + * [ arg reg N ] * [ ... ] - * SP + args_off [ arg1 ] + * SP + args_off [ arg reg 1 ] * - * SP + nargs_off [ args count ] + * SP + nregs_off [ arg regs count ] * * SP + ip_off [ traced function ] BPF_TRAMP_F_IP_ARG flag * @@ -1816,13 +1816,13 @@ static int prepare_trampoline(struct jit_ctx *ctx, struct bpf_tramp_image *im, if (flags & BPF_TRAMP_F_IP_ARG) stack_size += 8; - nargs_off = stack_size; + nregs_off = stack_size; /* room for args count */ stack_size += 8; args_off = stack_size; /* room for args */ - stack_size += nargs * 8; + stack_size += nregs * 8; /* room for return value */ retval_off = stack_size; @@ -1865,12 +1865,12 @@ static int prepare_trampoline(struct jit_ctx *ctx, struct bpf_tramp_image *im, emit(A64_STR64I(A64_R(10), A64_SP, ip_off), ctx); } - /* save args count*/ - emit(A64_MOVZ(1, A64_R(10), nargs, 0), ctx); - emit(A64_STR64I(A64_R(10), A64_SP, nargs_off), ctx); + /* save arg regs count*/ + emit(A64_MOVZ(1, A64_R(10), nregs, 0), ctx); + emit(A64_STR64I(A64_R(10), A64_SP, nregs_off), ctx); - /* save args */ - save_args(ctx, args_off, nargs); + /* save arg regs */ + save_args(ctx, args_off, nregs); /* save callee saved registers */ emit(A64_STR64I(A64_R(19), A64_SP, regs_off), ctx); @@ -1897,7 +1897,7 @@ static int prepare_trampoline(struct jit_ctx *ctx, struct bpf_tramp_image *im, } if (flags & BPF_TRAMP_F_CALL_ORIG) { - restore_args(ctx, args_off, nargs); + restore_args(ctx, args_off, nregs); /* call original func */ emit(A64_LDR64I(A64_R(10), A64_SP, retaddr_off), ctx); emit(A64_ADR(A64_LR, AARCH64_INSN_SIZE * 2), ctx); @@ -1926,7 +1926,7 @@ static int prepare_trampoline(struct jit_ctx *ctx, struct bpf_tramp_image *im, } if (flags & BPF_TRAMP_F_RESTORE_REGS) - restore_args(ctx, args_off, nargs); + restore_args(ctx, args_off, nregs); /* restore callee saved register x19 and x20 */ emit(A64_LDR64I(A64_R(19), A64_SP, regs_off), ctx); @@ -1967,24 +1967,25 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *orig_call) { int i, ret; - int nargs = m->nr_args; + int nregs = m->nr_args; int max_insns = ((long)image_end - (long)image) / AARCH64_INSN_SIZE; struct jit_ctx ctx = { .image = NULL, .idx = 0, }; - /* the first 8 arguments are passed by registers */ - if (nargs > 8) - return -ENOTSUPP; - - /* don't support struct argument */ + /* extra registers needed for struct argument */ for (i = 0; i < MAX_BPF_FUNC_ARGS; i++) { + /* The arg_size is at most 16 bytes, enforced by the verifier. */ if (m->arg_flags[i] & BTF_FMODEL_STRUCT_ARG) - return -ENOTSUPP; + nregs += (m->arg_size[i] + 7) / 8 - 1; } - ret = prepare_trampoline(&ctx, im, tlinks, orig_call, nargs, flags); + /* the first 8 registers are used for arguments */ + if (nregs > 8) + return -ENOTSUPP; + + ret = prepare_trampoline(&ctx, im, tlinks, orig_call, nregs, flags); if (ret < 0) return ret; @@ -1995,7 +1996,7 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, ctx.idx = 0; jit_fill_hole(image, (unsigned int)(image_end - image)); - ret = prepare_trampoline(&ctx, im, tlinks, orig_call, nargs, flags); + ret = prepare_trampoline(&ctx, im, tlinks, orig_call, nregs, flags); if (ret > 0 && validate_code(&ctx) < 0) ret = -EINVAL; diff --git a/arch/arm64/tools/cpucaps b/arch/arm64/tools/cpucaps index 40ba95472594..19c23c4fa2da 100644 --- a/arch/arm64/tools/cpucaps +++ b/arch/arm64/tools/cpucaps @@ -32,16 +32,20 @@ HAS_GENERIC_AUTH_IMP_DEF HAS_GIC_CPUIF_SYSREGS HAS_GIC_PRIO_MASKING HAS_GIC_PRIO_RELAXED_SYNC +HAS_HCX HAS_LDAPR HAS_LSE_ATOMICS +HAS_MOPS HAS_NESTED_VIRT HAS_NO_FPSIMD HAS_NO_HW_PREFETCH HAS_PAN +HAS_S1PIE HAS_RAS_EXTN HAS_RNG HAS_SB HAS_STAGE2_FWB +HAS_TCR2 HAS_TIDCP1 HAS_TLB_RANGE HAS_VIRT_HOST_EXTN diff --git a/arch/arm64/tools/gen-cpucaps.awk b/arch/arm64/tools/gen-cpucaps.awk index 00c9e72a200a..8525980379d7 100755 --- a/arch/arm64/tools/gen-cpucaps.awk +++ b/arch/arm64/tools/gen-cpucaps.awk @@ -24,12 +24,12 @@ BEGIN { } /^[vA-Z0-9_]+$/ { - printf("#define ARM64_%-30s\t%d\n", $0, cap_num++) + printf("#define ARM64_%-40s\t%d\n", $0, cap_num++) next } END { - printf("#define ARM64_NCAPS\t\t\t\t%d\n", cap_num) + printf("#define ARM64_NCAPS\t\t\t\t\t%d\n", cap_num) print "" print "#endif /* __ASM_CPUCAPS_H */" } diff --git a/arch/arm64/tools/sysreg b/arch/arm64/tools/sysreg index c9a0d1fa3209..1ea4a3dc68f8 100644 --- a/arch/arm64/tools/sysreg +++ b/arch/arm64/tools/sysreg @@ -48,6 +48,61 @@ # feature that introduces them (eg, FEAT_LS64_ACCDATA introduces enumeration # item ACCDATA) though it may be more taseful to do something else. +Sysreg OSDTRRX_EL1 2 0 0 0 2 +Res0 63:32 +Field 31:0 DTRRX +EndSysreg + +Sysreg MDCCINT_EL1 2 0 0 2 0 +Res0 63:31 +Field 30 RX +Field 29 TX +Res0 28:0 +EndSysreg + +Sysreg MDSCR_EL1 2 0 0 2 2 +Res0 63:36 +Field 35 EHBWE +Field 34 EnSPM +Field 33 TTA +Field 32 EMBWE +Field 31 TFO +Field 30 RXfull +Field 29 TXfull +Res0 28 +Field 27 RXO +Field 26 TXU +Res0 25:24 +Field 23:22 INTdis +Field 21 TDA +Res0 20 +Field 19 SC2 +Res0 18:16 +Field 15 MDE +Field 14 HDE +Field 13 KDE +Field 12 TDCC +Res0 11:7 +Field 6 ERR +Res0 5:1 +Field 0 SS +EndSysreg + +Sysreg OSDTRTX_EL1 2 0 0 3 2 +Res0 63:32 +Field 31:0 DTRTX +EndSysreg + +Sysreg OSECCR_EL1 2 0 0 6 2 +Res0 63:32 +Field 31:0 EDECCR +EndSysreg + +Sysreg OSLAR_EL1 2 0 1 0 4 +Res0 63:1 +Field 0 OSLK +EndSysreg + Sysreg ID_PFR0_EL1 3 0 0 1 0 Res0 63:32 UnsignedEnum 31:28 RAS @@ -1538,6 +1593,78 @@ UnsignedEnum 3:0 CnP EndEnum EndSysreg +Sysreg ID_AA64MMFR3_EL1 3 0 0 7 3 +UnsignedEnum 63:60 Spec_FPACC + 0b0000 NI + 0b0001 IMP +EndEnum +UnsignedEnum 59:56 ADERR + 0b0000 NI + 0b0001 DEV_ASYNC + 0b0010 FEAT_ADERR + 0b0011 FEAT_ADERR_IND +EndEnum +UnsignedEnum 55:52 SDERR + 0b0000 NI + 0b0001 DEV_SYNC + 0b0010 FEAT_ADERR + 0b0011 FEAT_ADERR_IND +EndEnum +Res0 51:48 +UnsignedEnum 47:44 ANERR + 0b0000 NI + 0b0001 ASYNC + 0b0010 FEAT_ANERR + 0b0011 FEAT_ANERR_IND +EndEnum +UnsignedEnum 43:40 SNERR + 0b0000 NI + 0b0001 SYNC + 0b0010 FEAT_ANERR + 0b0011 FEAT_ANERR_IND +EndEnum +UnsignedEnum 39:36 D128_2 + 0b0000 NI + 0b0001 IMP +EndEnum +UnsignedEnum 35:32 D128 + 0b0000 NI + 0b0001 IMP +EndEnum +UnsignedEnum 31:28 MEC + 0b0000 NI + 0b0001 IMP +EndEnum +UnsignedEnum 27:24 AIE + 0b0000 NI + 0b0001 IMP +EndEnum +UnsignedEnum 23:20 S2POE + 0b0000 NI + 0b0001 IMP +EndEnum +UnsignedEnum 19:16 S1POE + 0b0000 NI + 0b0001 IMP +EndEnum +UnsignedEnum 15:12 S2PIE + 0b0000 NI + 0b0001 IMP +EndEnum +UnsignedEnum 11:8 S1PIE + 0b0000 NI + 0b0001 IMP +EndEnum +UnsignedEnum 7:4 SCTLRX + 0b0000 NI + 0b0001 IMP +EndEnum +UnsignedEnum 3:0 TCRX + 0b0000 NI + 0b0001 IMP +EndEnum +EndSysreg + Sysreg SCTLR_EL1 3 0 1 0 0 Field 63 TIDCP Field 62 SPINTMASK @@ -2034,7 +2161,17 @@ Fields ZCR_ELx EndSysreg Sysreg HCRX_EL2 3 4 1 2 2 -Res0 63:12 +Res0 63:23 +Field 22 GCSEn +Field 21 EnIDCP128 +Field 20 EnSDERR +Field 19 TMEA +Field 18 EnSNERR +Field 17 D128En +Field 16 PTTWI +Field 15 SCTLR2En +Field 14 TCR2En +Res0 13:12 Field 11 MSCEn Field 10 MCE2 Field 9 CMOW @@ -2153,6 +2290,87 @@ Sysreg TTBR1_EL1 3 0 2 0 1 Fields TTBRx_EL1 EndSysreg +SysregFields TCR2_EL1x +Res0 63:16 +Field 15 DisCH1 +Field 14 DisCH0 +Res0 13:12 +Field 11 HAFT +Field 10 PTTWI +Res0 9:6 +Field 5 D128 +Field 4 AIE +Field 3 POE +Field 2 E0POE +Field 1 PIE +Field 0 PnCH +EndSysregFields + +Sysreg TCR2_EL1 3 0 2 0 3 +Fields TCR2_EL1x +EndSysreg + +Sysreg TCR2_EL12 3 5 2 0 3 +Fields TCR2_EL1x +EndSysreg + +Sysreg TCR2_EL2 3 4 2 0 3 +Res0 63:16 +Field 15 DisCH1 +Field 14 DisCH0 +Field 13 AMEC1 +Field 12 AMEC0 +Field 11 HAFT +Field 10 PTTWI +Field 9:8 SKL1 +Field 7:6 SKL0 +Field 5 D128 +Field 4 AIE +Field 3 POE +Field 2 E0POE +Field 1 PIE +Field 0 PnCH +EndSysreg + +SysregFields PIRx_ELx +Field 63:60 Perm15 +Field 59:56 Perm14 +Field 55:52 Perm13 +Field 51:48 Perm12 +Field 47:44 Perm11 +Field 43:40 Perm10 +Field 39:36 Perm9 +Field 35:32 Perm8 +Field 31:28 Perm7 +Field 27:24 Perm6 +Field 23:20 Perm5 +Field 19:16 Perm4 +Field 15:12 Perm3 +Field 11:8 Perm2 +Field 7:4 Perm1 +Field 3:0 Perm0 +EndSysregFields + +Sysreg PIRE0_EL1 3 0 10 2 2 +Fields PIRx_ELx +EndSysreg + +Sysreg PIRE0_EL12 3 5 10 2 2 +Fields PIRx_ELx +EndSysreg + +Sysreg PIR_EL1 3 0 10 2 3 +Fields PIRx_ELx +EndSysreg + +Sysreg PIR_EL12 3 5 10 2 3 +Fields PIRx_ELx +EndSysreg + +Sysreg PIR_EL2 3 4 10 2 3 +Fields PIRx_ELx +EndSysreg + Sysreg LORSA_EL1 3 0 10 4 0 Res0 63:52 Field 51:16 SA @@ -2200,3 +2418,80 @@ Sysreg ICC_NMIAR1_EL1 3 0 12 9 5 Res0 63:24 Field 23:0 INTID EndSysreg + +Sysreg TRBLIMITR_EL1 3 0 9 11 0 +Field 63:12 LIMIT +Res0 11:7 +Field 6 XE +Field 5 nVM +Enum 4:3 TM + 0b00 STOP + 0b01 IRQ + 0b11 IGNR +EndEnum +Enum 2:1 FM + 0b00 FILL + 0b01 WRAP + 0b11 CBUF +EndEnum +Field 0 E +EndSysreg + +Sysreg TRBPTR_EL1 3 0 9 11 1 +Field 63:0 PTR +EndSysreg + +Sysreg TRBBASER_EL1 3 0 9 11 2 +Field 63:12 BASE +Res0 11:0 +EndSysreg + +Sysreg TRBSR_EL1 3 0 9 11 3 +Res0 63:56 +Field 55:32 MSS2 +Field 31:26 EC +Res0 25:24 +Field 23 DAT +Field 22 IRQ +Field 21 TRG +Field 20 WRAP +Res0 19 +Field 18 EA +Field 17 S +Res0 16 +Field 15:0 MSS +EndSysreg + +Sysreg TRBMAR_EL1 3 0 9 11 4 +Res0 63:12 +Enum 11:10 PAS + 0b00 SECURE + 0b01 NON_SECURE + 0b10 ROOT + 0b11 REALM +EndEnum +Enum 9:8 SH + 0b00 NON_SHAREABLE + 0b10 OUTER_SHAREABLE + 0b11 INNER_SHAREABLE +EndEnum +Field 7:0 Attr +EndSysreg + +Sysreg TRBTRG_EL1 3 0 9 11 6 +Res0 63:32 +Field 31:0 TRG +EndSysreg + +Sysreg TRBIDR_EL1 3 0 9 11 7 +Res0 63:12 +Enum 11:8 EA + 0b0000 NON_DESC + 0b0001 IGNORE + 0b0010 SERROR +EndEnum +Res0 7:6 +Field 5 F +Field 4 P +Field 3:0 Align +EndSysreg diff --git a/arch/csky/Kconfig b/arch/csky/Kconfig index 03e9f6666157..cf2a6fd7dff8 100644 --- a/arch/csky/Kconfig +++ b/arch/csky/Kconfig @@ -96,6 +96,7 @@ config CSKY select HAVE_REGS_AND_STACK_ACCESS_API select HAVE_STACKPROTECTOR select HAVE_SYSCALL_TRACEPOINTS + select HOTPLUG_CORE_SYNC_DEAD if HOTPLUG_CPU select LOCK_MM_AND_FIND_VMA select MAY_HAVE_SPARSE_IRQ select MODULES_USE_ELF_RELA if MODULES diff --git a/arch/csky/include/asm/atomic.h b/arch/csky/include/asm/atomic.h index 60406ef9c2bb..4dab44f6143a 100644 --- a/arch/csky/include/asm/atomic.h +++ b/arch/csky/include/asm/atomic.h @@ -195,41 +195,6 @@ arch_atomic_dec_if_positive(atomic_t *v) } #define arch_atomic_dec_if_positive arch_atomic_dec_if_positive -#define ATOMIC_OP() \ -static __always_inline \ -int arch_atomic_xchg_relaxed(atomic_t *v, int n) \ -{ \ - return __xchg_relaxed(n, &(v->counter), 4); \ -} \ -static __always_inline \ -int arch_atomic_cmpxchg_relaxed(atomic_t *v, int o, int n) \ -{ \ - return __cmpxchg_relaxed(&(v->counter), o, n, 4); \ -} \ -static __always_inline \ -int arch_atomic_cmpxchg_acquire(atomic_t *v, int o, int n) \ -{ \ - return __cmpxchg_acquire(&(v->counter), o, n, 4); \ -} \ -static __always_inline \ -int arch_atomic_cmpxchg(atomic_t *v, int o, int n) \ -{ \ - return __cmpxchg(&(v->counter), o, n, 4); \ -} - -#define ATOMIC_OPS() \ - ATOMIC_OP() - -ATOMIC_OPS() - -#define arch_atomic_xchg_relaxed arch_atomic_xchg_relaxed -#define arch_atomic_cmpxchg_relaxed arch_atomic_cmpxchg_relaxed -#define arch_atomic_cmpxchg_acquire arch_atomic_cmpxchg_acquire -#define arch_atomic_cmpxchg arch_atomic_cmpxchg - -#undef ATOMIC_OPS -#undef ATOMIC_OP - #else #include <asm-generic/atomic.h> #endif diff --git a/arch/csky/include/asm/smp.h b/arch/csky/include/asm/smp.h index 668b79ce29ea..d3db334f3196 100644 --- a/arch/csky/include/asm/smp.h +++ b/arch/csky/include/asm/smp.h @@ -23,7 +23,7 @@ void __init set_send_ipi(void (*func)(const struct cpumask *mask), int irq); int __cpu_disable(void); -void __cpu_die(unsigned int cpu); +static inline void __cpu_die(unsigned int cpu) { } #endif /* CONFIG_SMP */ diff --git a/arch/csky/kernel/smp.c b/arch/csky/kernel/smp.c index b12e2c3c387f..8e42352cbf12 100644 --- a/arch/csky/kernel/smp.c +++ b/arch/csky/kernel/smp.c @@ -291,12 +291,8 @@ int __cpu_disable(void) return 0; } -void __cpu_die(unsigned int cpu) +void arch_cpuhp_cleanup_dead_cpu(unsigned int cpu) { - if (!cpu_wait_death(cpu, 5)) { - pr_crit("CPU%u: shutdown failed\n", cpu); - return; - } pr_notice("CPU%u: shutdown\n", cpu); } @@ -304,7 +300,7 @@ void __noreturn arch_cpu_idle_dead(void) { idle_task_exit(); - cpu_report_death(); + cpuhp_ap_report_dead(); while (!secondary_stack) arch_cpu_idle(); diff --git a/arch/hexagon/include/asm/atomic.h b/arch/hexagon/include/asm/atomic.h index 6e94f8d04146..2447d083c432 100644 --- a/arch/hexagon/include/asm/atomic.h +++ b/arch/hexagon/include/asm/atomic.h @@ -28,58 +28,8 @@ static inline void arch_atomic_set(atomic_t *v, int new) #define arch_atomic_set_release(v, i) arch_atomic_set((v), (i)) -/** - * arch_atomic_read - reads a word, atomically - * @v: pointer to atomic value - * - * Assumes all word reads on our architecture are atomic. - */ #define arch_atomic_read(v) READ_ONCE((v)->counter) -/** - * arch_atomic_xchg - atomic - * @v: pointer to memory to change - * @new: new value (technically passed in a register -- see xchg) - */ -#define arch_atomic_xchg(v, new) (arch_xchg(&((v)->counter), (new))) - - -/** - * arch_atomic_cmpxchg - atomic compare-and-exchange values - * @v: pointer to value to change - * @old: desired old value to match - * @new: new value to put in - * - * Parameters are then pointer, value-in-register, value-in-register, - * and the output is the old value. - * - * Apparently this is complicated for archs that don't support - * the memw_locked like we do (or it's broken or whatever). - * - * Kind of the lynchpin of the rest of the generically defined routines. - * Remember V2 had that bug with dotnew predicate set by memw_locked. - * - * "old" is "expected" old val, __oldval is actual old value - */ -static inline int arch_atomic_cmpxchg(atomic_t *v, int old, int new) -{ - int __oldval; - - asm volatile( - "1: %0 = memw_locked(%1);\n" - " { P0 = cmp.eq(%0,%2);\n" - " if (!P0.new) jump:nt 2f; }\n" - " memw_locked(%1,P0) = %3;\n" - " if (!P0) jump 1b;\n" - "2:\n" - : "=&r" (__oldval) - : "r" (&v->counter), "r" (old), "r" (new) - : "memory", "p0" - ); - - return __oldval; -} - #define ATOMIC_OP(op) \ static inline void arch_atomic_##op(int i, atomic_t *v) \ { \ @@ -135,6 +85,11 @@ static inline int arch_atomic_fetch_##op(int i, atomic_t *v) \ ATOMIC_OPS(add) ATOMIC_OPS(sub) +#define arch_atomic_add_return arch_atomic_add_return +#define arch_atomic_sub_return arch_atomic_sub_return +#define arch_atomic_fetch_add arch_atomic_fetch_add +#define arch_atomic_fetch_sub arch_atomic_fetch_sub + #undef ATOMIC_OPS #define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_FETCH_OP(op) @@ -142,21 +97,15 @@ ATOMIC_OPS(and) ATOMIC_OPS(or) ATOMIC_OPS(xor) +#define arch_atomic_fetch_and arch_atomic_fetch_and +#define arch_atomic_fetch_or arch_atomic_fetch_or +#define arch_atomic_fetch_xor arch_atomic_fetch_xor + #undef ATOMIC_OPS #undef ATOMIC_FETCH_OP #undef ATOMIC_OP_RETURN #undef ATOMIC_OP -/** - * arch_atomic_fetch_add_unless - add unless the number is a given value - * @v: pointer to value - * @a: amount to add - * @u: unless value is equal to u - * - * Returns old value. - * - */ - static inline int arch_atomic_fetch_add_unless(atomic_t *v, int a, int u) { int __oldval; diff --git a/arch/hexagon/kernel/setup.c b/arch/hexagon/kernel/setup.c index 1880d9beaf2b..621674e86232 100644 --- a/arch/hexagon/kernel/setup.c +++ b/arch/hexagon/kernel/setup.c @@ -66,9 +66,9 @@ void __init setup_arch(char **cmdline_p) on_simulator = 0; if (p[0] != '\0') - strlcpy(boot_command_line, p, COMMAND_LINE_SIZE); + strscpy(boot_command_line, p, COMMAND_LINE_SIZE); else - strlcpy(boot_command_line, default_command_line, + strscpy(boot_command_line, default_command_line, COMMAND_LINE_SIZE); /* @@ -76,7 +76,7 @@ void __init setup_arch(char **cmdline_p) * are both picked up by the init code. If no reason to * make them different, pass the same pointer back. */ - strlcpy(cmd_line, boot_command_line, COMMAND_LINE_SIZE); + strscpy(cmd_line, boot_command_line, COMMAND_LINE_SIZE); *cmdline_p = cmd_line; parse_early_param(); diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig index 21fa63ce5ffc..2cd93e6bf0fe 100644 --- a/arch/ia64/Kconfig +++ b/arch/ia64/Kconfig @@ -9,6 +9,7 @@ menu "Processor type and features" config IA64 bool select ARCH_BINFMT_ELF_EXTRA_PHDRS + select ARCH_HAS_CPU_FINALIZE_INIT select ARCH_HAS_DMA_MARK_CLEAN select ARCH_HAS_STRNCPY_FROM_USER select ARCH_HAS_STRNLEN_USER diff --git a/arch/ia64/include/asm/atomic.h b/arch/ia64/include/asm/atomic.h index 266c429b9137..6540a628d257 100644 --- a/arch/ia64/include/asm/atomic.h +++ b/arch/ia64/include/asm/atomic.h @@ -207,13 +207,6 @@ ATOMIC64_FETCH_OP(xor, ^) #undef ATOMIC64_FETCH_OP #undef ATOMIC64_OP -#define arch_atomic_cmpxchg(v, old, new) (arch_cmpxchg(&((v)->counter), old, new)) -#define arch_atomic_xchg(v, new) (arch_xchg(&((v)->counter), new)) - -#define arch_atomic64_cmpxchg(v, old, new) \ - (arch_cmpxchg(&((v)->counter), old, new)) -#define arch_atomic64_xchg(v, new) (arch_xchg(&((v)->counter), new)) - #define arch_atomic_add(i,v) (void)arch_atomic_add_return((i), (v)) #define arch_atomic_sub(i,v) (void)arch_atomic_sub_return((i), (v)) diff --git a/arch/ia64/include/asm/bugs.h b/arch/ia64/include/asm/bugs.h deleted file mode 100644 index 0d6b9bded56c..000000000000 --- a/arch/ia64/include/asm/bugs.h +++ /dev/null @@ -1,20 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * This is included by init/main.c to check for architecture-dependent bugs. - * - * Needs: - * void check_bugs(void); - * - * Based on <asm-alpha/bugs.h>. - * - * Modified 1998, 1999, 2003 - * David Mosberger-Tang <davidm@hpl.hp.com>, Hewlett-Packard Co. - */ -#ifndef _ASM_IA64_BUGS_H -#define _ASM_IA64_BUGS_H - -#include <asm/processor.h> - -extern void check_bugs (void); - -#endif /* _ASM_IA64_BUGS_H */ diff --git a/arch/ia64/kernel/setup.c b/arch/ia64/kernel/setup.c index c05728044272..5a55ac82c13a 100644 --- a/arch/ia64/kernel/setup.c +++ b/arch/ia64/kernel/setup.c @@ -627,7 +627,7 @@ setup_arch (char **cmdline_p) * is physical disk 1 partition 1 and the Linux root disk is * physical disk 1 partition 2. */ - ROOT_DEV = Root_SDA2; /* default to second partition on first drive */ + ROOT_DEV = MKDEV(SCSI_DISK0_MAJOR, 2); if (is_uv_system()) uv_setup(cmdline_p); @@ -1067,8 +1067,7 @@ cpu_init (void) } } -void __init -check_bugs (void) +void __init arch_cpu_finalize_init(void) { ia64_patch_mckinley_e9((unsigned long) __start___mckinley_e9_bundles, (unsigned long) __end___mckinley_e9_bundles); diff --git a/arch/ia64/kernel/syscalls/syscall.tbl b/arch/ia64/kernel/syscalls/syscall.tbl index 72c929d9902b..f8c74ffeeefb 100644 --- a/arch/ia64/kernel/syscalls/syscall.tbl +++ b/arch/ia64/kernel/syscalls/syscall.tbl @@ -371,3 +371,4 @@ 448 common process_mrelease sys_process_mrelease 449 common futex_waitv sys_futex_waitv 450 common set_mempolicy_home_node sys_set_mempolicy_home_node +451 common cachestat sys_cachestat diff --git a/arch/ia64/mm/hugetlbpage.c b/arch/ia64/mm/hugetlbpage.c index 78a02e026164..adc49f2d22e8 100644 --- a/arch/ia64/mm/hugetlbpage.c +++ b/arch/ia64/mm/hugetlbpage.c @@ -41,7 +41,7 @@ huge_pte_alloc(struct mm_struct *mm, struct vm_area_struct *vma, if (pud) { pmd = pmd_alloc(mm, pud, taddr); if (pmd) - pte = pte_alloc_map(mm, pmd, taddr); + pte = pte_alloc_huge(mm, pmd, taddr); } return pte; } @@ -64,7 +64,7 @@ huge_pte_offset (struct mm_struct *mm, unsigned long addr, unsigned long sz) if (pud_present(*pud)) { pmd = pmd_offset(pud, taddr); if (pmd_present(*pmd)) - pte = pte_offset_map(pmd, taddr); + pte = pte_offset_huge(pmd, taddr); } } } diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig index 73519e13bbb3..32b9da4622ce 100644 --- a/arch/loongarch/Kconfig +++ b/arch/loongarch/Kconfig @@ -10,6 +10,7 @@ config LOONGARCH select ARCH_ENABLE_MEMORY_HOTPLUG select ARCH_ENABLE_MEMORY_HOTREMOVE select ARCH_HAS_ACPI_TABLE_UPGRADE if ACPI + select ARCH_HAS_CPU_FINALIZE_INIT select ARCH_HAS_FORTIFY_SOURCE select ARCH_HAS_NMI_SAFE_THIS_CPU_OPS select ARCH_HAS_PTE_SPECIAL diff --git a/arch/loongarch/include/asm/atomic.h b/arch/loongarch/include/asm/atomic.h index 6b9aca9ab6e9..e27f0c72d324 100644 --- a/arch/loongarch/include/asm/atomic.h +++ b/arch/loongarch/include/asm/atomic.h @@ -29,21 +29,7 @@ #define ATOMIC_INIT(i) { (i) } -/* - * arch_atomic_read - read atomic variable - * @v: pointer of type atomic_t - * - * Atomically reads the value of @v. - */ #define arch_atomic_read(v) READ_ONCE((v)->counter) - -/* - * arch_atomic_set - set atomic variable - * @v: pointer of type atomic_t - * @i: required value - * - * Atomically sets the value of @v to @i. - */ #define arch_atomic_set(v, i) WRITE_ONCE((v)->counter, (i)) #define ATOMIC_OP(op, I, asm_op) \ @@ -139,14 +125,6 @@ static inline int arch_atomic_fetch_add_unless(atomic_t *v, int a, int u) } #define arch_atomic_fetch_add_unless arch_atomic_fetch_add_unless -/* - * arch_atomic_sub_if_positive - conditionally subtract integer from atomic variable - * @i: integer value to subtract - * @v: pointer of type atomic_t - * - * Atomically test @v and subtract @i if @v is greater or equal than @i. - * The function returns the old value of @v minus @i. - */ static inline int arch_atomic_sub_if_positive(int i, atomic_t *v) { int result; @@ -181,31 +159,13 @@ static inline int arch_atomic_sub_if_positive(int i, atomic_t *v) return result; } -#define arch_atomic_cmpxchg(v, o, n) (arch_cmpxchg(&((v)->counter), (o), (n))) -#define arch_atomic_xchg(v, new) (arch_xchg(&((v)->counter), (new))) - -/* - * arch_atomic_dec_if_positive - decrement by 1 if old value positive - * @v: pointer of type atomic_t - */ #define arch_atomic_dec_if_positive(v) arch_atomic_sub_if_positive(1, v) #ifdef CONFIG_64BIT #define ATOMIC64_INIT(i) { (i) } -/* - * arch_atomic64_read - read atomic variable - * @v: pointer of type atomic64_t - * - */ #define arch_atomic64_read(v) READ_ONCE((v)->counter) - -/* - * arch_atomic64_set - set atomic variable - * @v: pointer of type atomic64_t - * @i: required value - */ #define arch_atomic64_set(v, i) WRITE_ONCE((v)->counter, (i)) #define ATOMIC64_OP(op, I, asm_op) \ @@ -300,14 +260,6 @@ static inline long arch_atomic64_fetch_add_unless(atomic64_t *v, long a, long u) } #define arch_atomic64_fetch_add_unless arch_atomic64_fetch_add_unless -/* - * arch_atomic64_sub_if_positive - conditionally subtract integer from atomic variable - * @i: integer value to subtract - * @v: pointer of type atomic64_t - * - * Atomically test @v and subtract @i if @v is greater or equal than @i. - * The function returns the old value of @v minus @i. - */ static inline long arch_atomic64_sub_if_positive(long i, atomic64_t *v) { long result; @@ -342,14 +294,6 @@ static inline long arch_atomic64_sub_if_positive(long i, atomic64_t *v) return result; } -#define arch_atomic64_cmpxchg(v, o, n) \ - ((__typeof__((v)->counter))arch_cmpxchg(&((v)->counter), (o), (n))) -#define arch_atomic64_xchg(v, new) (arch_xchg(&((v)->counter), (new))) - -/* - * arch_atomic64_dec_if_positive - decrement by 1 if old value positive - * @v: pointer of type atomic64_t - */ #define arch_atomic64_dec_if_positive(v) arch_atomic64_sub_if_positive(1, v) #endif /* CONFIG_64BIT */ diff --git a/arch/loongarch/include/asm/bugs.h b/arch/loongarch/include/asm/bugs.h deleted file mode 100644 index 98396535163b..000000000000 --- a/arch/loongarch/include/asm/bugs.h +++ /dev/null @@ -1,15 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * This is included by init/main.c to check for architecture-dependent bugs. - * - * Copyright (C) 2020-2022 Loongson Technology Corporation Limited - */ -#ifndef _ASM_BUGS_H -#define _ASM_BUGS_H - -#include <asm/cpu.h> -#include <asm/cpu-info.h> - -extern void check_bugs(void); - -#endif /* _ASM_BUGS_H */ diff --git a/arch/loongarch/include/asm/loongarch.h b/arch/loongarch/include/asm/loongarch.h index 35e8a52fea11..1c2a0a2c8830 100644 --- a/arch/loongarch/include/asm/loongarch.h +++ b/arch/loongarch/include/asm/loongarch.h @@ -1167,7 +1167,7 @@ static __always_inline void iocsr_write64(u64 val, u32 reg) #ifndef __ASSEMBLY__ -static inline u64 drdtime(void) +static __always_inline u64 drdtime(void) { int rID = 0; u64 val = 0; diff --git a/arch/loongarch/kernel/setup.c b/arch/loongarch/kernel/setup.c index 4444b13418f0..78a00359bde3 100644 --- a/arch/loongarch/kernel/setup.c +++ b/arch/loongarch/kernel/setup.c @@ -12,6 +12,7 @@ */ #include <linux/init.h> #include <linux/acpi.h> +#include <linux/cpu.h> #include <linux/dmi.h> #include <linux/efi.h> #include <linux/export.h> @@ -37,7 +38,6 @@ #include <asm/addrspace.h> #include <asm/alternative.h> #include <asm/bootinfo.h> -#include <asm/bugs.h> #include <asm/cache.h> #include <asm/cpu.h> #include <asm/dma.h> @@ -87,7 +87,7 @@ const char *get_system_type(void) return "generic-loongson-machine"; } -void __init check_bugs(void) +void __init arch_cpu_finalize_init(void) { alternative_instructions(); } diff --git a/arch/loongarch/kernel/time.c b/arch/loongarch/kernel/time.c index f377e50f3c66..c189e03cd5da 100644 --- a/arch/loongarch/kernel/time.c +++ b/arch/loongarch/kernel/time.c @@ -190,9 +190,9 @@ static u64 read_const_counter(struct clocksource *clk) return drdtime(); } -static u64 native_sched_clock(void) +static noinstr u64 sched_clock_read(void) { - return read_const_counter(NULL); + return drdtime(); } static struct clocksource clocksource_const = { @@ -211,7 +211,7 @@ int __init constant_clocksource_init(void) res = clocksource_register_hz(&clocksource_const, freq); - sched_clock_register(native_sched_clock, 64, freq); + sched_clock_register(sched_clock_read, 64, freq); pr_info("Constant clock source device register\n"); diff --git a/arch/m68k/Kconfig b/arch/m68k/Kconfig index 40198a1ebe27..dc792b321f1e 100644 --- a/arch/m68k/Kconfig +++ b/arch/m68k/Kconfig @@ -4,6 +4,7 @@ config M68K default y select ARCH_32BIT_OFF_T select ARCH_HAS_BINFMT_FLAT + select ARCH_HAS_CPU_FINALIZE_INIT if MMU select ARCH_HAS_CURRENT_STACK_POINTER select ARCH_HAS_DMA_PREP_COHERENT if HAS_DMA && MMU && !COLDFIRE select ARCH_HAS_SYNC_DMA_FOR_DEVICE if HAS_DMA diff --git a/arch/m68k/configs/amiga_defconfig b/arch/m68k/configs/amiga_defconfig index b26469a65bc1..62fdca7efce4 100644 --- a/arch/m68k/configs/amiga_defconfig +++ b/arch/m68k/configs/amiga_defconfig @@ -43,6 +43,7 @@ CONFIG_IOSCHED_BFQ=m CONFIG_BINFMT_MISC=m CONFIG_SLAB=y # CONFIG_COMPACTION is not set +CONFIG_DMAPOOL_TEST=m CONFIG_USERFAULTFD=y CONFIG_NET=y CONFIG_PACKET=y @@ -454,7 +455,6 @@ CONFIG_OCFS2_FS=m # CONFIG_OCFS2_DEBUG_MASKLOG is not set CONFIG_FANOTIFY=y CONFIG_QUOTA_NETLINK_INTERFACE=y -# CONFIG_PRINT_QUOTA_WARNING is not set CONFIG_AUTOFS_FS=m CONFIG_FUSE_FS=m CONFIG_CUSE=m diff --git a/arch/m68k/configs/apollo_defconfig b/arch/m68k/configs/apollo_defconfig index 944a49a129be..5bfbd0444bb5 100644 --- a/arch/m68k/configs/apollo_defconfig +++ b/arch/m68k/configs/apollo_defconfig @@ -39,6 +39,7 @@ CONFIG_IOSCHED_BFQ=m CONFIG_BINFMT_MISC=m CONFIG_SLAB=y # CONFIG_COMPACTION is not set +CONFIG_DMAPOOL_TEST=m CONFIG_USERFAULTFD=y CONFIG_NET=y CONFIG_PACKET=y @@ -411,7 +412,6 @@ CONFIG_OCFS2_FS=m # CONFIG_OCFS2_DEBUG_MASKLOG is not set CONFIG_FANOTIFY=y CONFIG_QUOTA_NETLINK_INTERFACE=y -# CONFIG_PRINT_QUOTA_WARNING is not set CONFIG_AUTOFS_FS=m CONFIG_FUSE_FS=m CONFIG_CUSE=m diff --git a/arch/m68k/configs/atari_defconfig b/arch/m68k/configs/atari_defconfig index a32dd884fcce..44302f11c9ea 100644 --- a/arch/m68k/configs/atari_defconfig +++ b/arch/m68k/configs/atari_defconfig @@ -46,6 +46,7 @@ CONFIG_IOSCHED_BFQ=m CONFIG_BINFMT_MISC=m CONFIG_SLAB=y # CONFIG_COMPACTION is not set +CONFIG_DMAPOOL_TEST=m CONFIG_USERFAULTFD=y CONFIG_NET=y CONFIG_PACKET=y @@ -431,7 +432,6 @@ CONFIG_OCFS2_FS=m # CONFIG_OCFS2_DEBUG_MASKLOG is not set CONFIG_FANOTIFY=y CONFIG_QUOTA_NETLINK_INTERFACE=y -# CONFIG_PRINT_QUOTA_WARNING is not set CONFIG_AUTOFS_FS=m CONFIG_FUSE_FS=m CONFIG_CUSE=m diff --git a/arch/m68k/configs/bvme6000_defconfig b/arch/m68k/configs/bvme6000_defconfig index 23b7805309bd..f3336f1774ec 100644 --- a/arch/m68k/configs/bvme6000_defconfig +++ b/arch/m68k/configs/bvme6000_defconfig @@ -36,6 +36,7 @@ CONFIG_IOSCHED_BFQ=m CONFIG_BINFMT_MISC=m CONFIG_SLAB=y # CONFIG_COMPACTION is not set +CONFIG_DMAPOOL_TEST=m CONFIG_USERFAULTFD=y CONFIG_NET=y CONFIG_PACKET=y @@ -403,7 +404,6 @@ CONFIG_OCFS2_FS=m # CONFIG_OCFS2_DEBUG_MASKLOG is not set CONFIG_FANOTIFY=y CONFIG_QUOTA_NETLINK_INTERFACE=y -# CONFIG_PRINT_QUOTA_WARNING is not set CONFIG_AUTOFS_FS=m CONFIG_FUSE_FS=m CONFIG_CUSE=m diff --git a/arch/m68k/configs/hp300_defconfig b/arch/m68k/configs/hp300_defconfig index 5605ab5c3dcf..2d1bbac68066 100644 --- a/arch/m68k/configs/hp300_defconfig +++ b/arch/m68k/configs/hp300_defconfig @@ -38,6 +38,7 @@ CONFIG_IOSCHED_BFQ=m CONFIG_BINFMT_MISC=m CONFIG_SLAB=y # CONFIG_COMPACTION is not set +CONFIG_DMAPOOL_TEST=m CONFIG_USERFAULTFD=y CONFIG_NET=y CONFIG_PACKET=y @@ -413,7 +414,6 @@ CONFIG_OCFS2_FS=m # CONFIG_OCFS2_DEBUG_MASKLOG is not set CONFIG_FANOTIFY=y CONFIG_QUOTA_NETLINK_INTERFACE=y -# CONFIG_PRINT_QUOTA_WARNING is not set CONFIG_AUTOFS_FS=m CONFIG_FUSE_FS=m CONFIG_CUSE=m diff --git a/arch/m68k/configs/mac_defconfig b/arch/m68k/configs/mac_defconfig index d0d1f9c33756..b4428dc36102 100644 --- a/arch/m68k/configs/mac_defconfig +++ b/arch/m68k/configs/mac_defconfig @@ -37,6 +37,7 @@ CONFIG_IOSCHED_BFQ=m CONFIG_BINFMT_MISC=m CONFIG_SLAB=y # CONFIG_COMPACTION is not set +CONFIG_DMAPOOL_TEST=m CONFIG_USERFAULTFD=y CONFIG_NET=y CONFIG_PACKET=y @@ -433,7 +434,6 @@ CONFIG_OCFS2_FS=m # CONFIG_OCFS2_DEBUG_MASKLOG is not set CONFIG_FANOTIFY=y CONFIG_QUOTA_NETLINK_INTERFACE=y -# CONFIG_PRINT_QUOTA_WARNING is not set CONFIG_AUTOFS_FS=m CONFIG_FUSE_FS=m CONFIG_CUSE=m diff --git a/arch/m68k/configs/multi_defconfig b/arch/m68k/configs/multi_defconfig index 6d04314ce7ea..4cd9fa4cb10c 100644 --- a/arch/m68k/configs/multi_defconfig +++ b/arch/m68k/configs/multi_defconfig @@ -57,6 +57,7 @@ CONFIG_IOSCHED_BFQ=m CONFIG_BINFMT_MISC=m CONFIG_SLAB=y # CONFIG_COMPACTION is not set +CONFIG_DMAPOOL_TEST=m CONFIG_USERFAULTFD=y CONFIG_NET=y CONFIG_PACKET=y @@ -519,7 +520,6 @@ CONFIG_OCFS2_FS=m # CONFIG_OCFS2_DEBUG_MASKLOG is not set CONFIG_FANOTIFY=y CONFIG_QUOTA_NETLINK_INTERFACE=y -# CONFIG_PRINT_QUOTA_WARNING is not set CONFIG_AUTOFS_FS=m CONFIG_FUSE_FS=m CONFIG_CUSE=m diff --git a/arch/m68k/configs/mvme147_defconfig b/arch/m68k/configs/mvme147_defconfig index e6f5ae526d08..7ee9ad50f0ad 100644 --- a/arch/m68k/configs/mvme147_defconfig +++ b/arch/m68k/configs/mvme147_defconfig @@ -35,6 +35,7 @@ CONFIG_IOSCHED_BFQ=m CONFIG_BINFMT_MISC=m CONFIG_SLAB=y # CONFIG_COMPACTION is not set +CONFIG_DMAPOOL_TEST=m CONFIG_USERFAULTFD=y CONFIG_NET=y CONFIG_PACKET=y @@ -402,7 +403,6 @@ CONFIG_OCFS2_FS=m # CONFIG_OCFS2_DEBUG_MASKLOG is not set CONFIG_FANOTIFY=y CONFIG_QUOTA_NETLINK_INTERFACE=y -# CONFIG_PRINT_QUOTA_WARNING is not set CONFIG_AUTOFS_FS=m CONFIG_FUSE_FS=m CONFIG_CUSE=m diff --git a/arch/m68k/configs/mvme16x_defconfig b/arch/m68k/configs/mvme16x_defconfig index f2d4dff4787a..2488893616dc 100644 --- a/arch/m68k/configs/mvme16x_defconfig +++ b/arch/m68k/configs/mvme16x_defconfig @@ -36,6 +36,7 @@ CONFIG_IOSCHED_BFQ=m CONFIG_BINFMT_MISC=m CONFIG_SLAB=y # CONFIG_COMPACTION is not set +CONFIG_DMAPOOL_TEST=m CONFIG_USERFAULTFD=y CONFIG_NET=y CONFIG_PACKET=y @@ -403,7 +404,6 @@ CONFIG_OCFS2_FS=m # CONFIG_OCFS2_DEBUG_MASKLOG is not set CONFIG_FANOTIFY=y CONFIG_QUOTA_NETLINK_INTERFACE=y -# CONFIG_PRINT_QUOTA_WARNING is not set CONFIG_AUTOFS_FS=m CONFIG_FUSE_FS=m CONFIG_CUSE=m diff --git a/arch/m68k/configs/q40_defconfig b/arch/m68k/configs/q40_defconfig index 907eedecd040..ffc676289f87 100644 --- a/arch/m68k/configs/q40_defconfig +++ b/arch/m68k/configs/q40_defconfig @@ -37,6 +37,7 @@ CONFIG_IOSCHED_BFQ=m CONFIG_BINFMT_MISC=m CONFIG_SLAB=y # CONFIG_COMPACTION is not set +CONFIG_DMAPOOL_TEST=m CONFIG_USERFAULTFD=y CONFIG_NET=y CONFIG_PACKET=y @@ -420,7 +421,6 @@ CONFIG_OCFS2_FS=m # CONFIG_OCFS2_DEBUG_MASKLOG is not set CONFIG_FANOTIFY=y CONFIG_QUOTA_NETLINK_INTERFACE=y -# CONFIG_PRINT_QUOTA_WARNING is not set CONFIG_AUTOFS_FS=m CONFIG_FUSE_FS=m CONFIG_CUSE=m diff --git a/arch/m68k/configs/sun3_defconfig b/arch/m68k/configs/sun3_defconfig index 9e3d47008f21..198179657ce0 100644 --- a/arch/m68k/configs/sun3_defconfig +++ b/arch/m68k/configs/sun3_defconfig @@ -402,7 +402,6 @@ CONFIG_OCFS2_FS=m # CONFIG_OCFS2_DEBUG_MASKLOG is not set CONFIG_FANOTIFY=y CONFIG_QUOTA_NETLINK_INTERFACE=y -# CONFIG_PRINT_QUOTA_WARNING is not set CONFIG_AUTOFS_FS=m CONFIG_FUSE_FS=m CONFIG_CUSE=m diff --git a/arch/m68k/configs/sun3x_defconfig b/arch/m68k/configs/sun3x_defconfig index f6540078cb4b..85364f6178d4 100644 --- a/arch/m68k/configs/sun3x_defconfig +++ b/arch/m68k/configs/sun3x_defconfig @@ -33,6 +33,7 @@ CONFIG_IOSCHED_BFQ=m CONFIG_BINFMT_MISC=m CONFIG_SLAB=y # CONFIG_COMPACTION is not set +CONFIG_DMAPOOL_TEST=m CONFIG_USERFAULTFD=y CONFIG_NET=y CONFIG_PACKET=y @@ -401,7 +402,6 @@ CONFIG_OCFS2_FS=m # CONFIG_OCFS2_DEBUG_MASKLOG is not set CONFIG_FANOTIFY=y CONFIG_QUOTA_NETLINK_INTERFACE=y -# CONFIG_PRINT_QUOTA_WARNING is not set CONFIG_AUTOFS_FS=m CONFIG_FUSE_FS=m CONFIG_CUSE=m diff --git a/arch/m68k/configs/virt_defconfig b/arch/m68k/configs/virt_defconfig index 8059bd618370..311b57e73316 100644 --- a/arch/m68k/configs/virt_defconfig +++ b/arch/m68k/configs/virt_defconfig @@ -24,8 +24,6 @@ CONFIG_SUN_PARTITION=y CONFIG_SYSV68_PARTITION=y CONFIG_NET=y CONFIG_PACKET=y -CONFIG_UNIX=y -CONFIG_INET=y CONFIG_IP_PNP=y CONFIG_IP_PNP_DHCP=y CONFIG_IP_PNP_BOOTP=y diff --git a/arch/m68k/include/asm/atomic.h b/arch/m68k/include/asm/atomic.h index cfba83d230fd..4bfbc25f6ecf 100644 --- a/arch/m68k/include/asm/atomic.h +++ b/arch/m68k/include/asm/atomic.h @@ -106,6 +106,11 @@ static inline int arch_atomic_fetch_##op(int i, atomic_t * v) \ ATOMIC_OPS(add, +=, add) ATOMIC_OPS(sub, -=, sub) +#define arch_atomic_add_return arch_atomic_add_return +#define arch_atomic_sub_return arch_atomic_sub_return +#define arch_atomic_fetch_add arch_atomic_fetch_add +#define arch_atomic_fetch_sub arch_atomic_fetch_sub + #undef ATOMIC_OPS #define ATOMIC_OPS(op, c_op, asm_op) \ ATOMIC_OP(op, c_op, asm_op) \ @@ -115,6 +120,10 @@ ATOMIC_OPS(and, &=, and) ATOMIC_OPS(or, |=, or) ATOMIC_OPS(xor, ^=, eor) +#define arch_atomic_fetch_and arch_atomic_fetch_and +#define arch_atomic_fetch_or arch_atomic_fetch_or +#define arch_atomic_fetch_xor arch_atomic_fetch_xor + #undef ATOMIC_OPS #undef ATOMIC_FETCH_OP #undef ATOMIC_OP_RETURN @@ -158,12 +167,7 @@ static inline int arch_atomic_inc_and_test(atomic_t *v) } #define arch_atomic_inc_and_test arch_atomic_inc_and_test -#ifdef CONFIG_RMW_INSNS - -#define arch_atomic_cmpxchg(v, o, n) ((int)arch_cmpxchg(&((v)->counter), (o), (n))) -#define arch_atomic_xchg(v, new) (arch_xchg(&((v)->counter), new)) - -#else /* !CONFIG_RMW_INSNS */ +#ifndef CONFIG_RMW_INSNS static inline int arch_atomic_cmpxchg(atomic_t *v, int old, int new) { @@ -177,6 +181,7 @@ static inline int arch_atomic_cmpxchg(atomic_t *v, int old, int new) local_irq_restore(flags); return prev; } +#define arch_atomic_cmpxchg arch_atomic_cmpxchg static inline int arch_atomic_xchg(atomic_t *v, int new) { @@ -189,6 +194,7 @@ static inline int arch_atomic_xchg(atomic_t *v, int new) local_irq_restore(flags); return prev; } +#define arch_atomic_xchg arch_atomic_xchg #endif /* !CONFIG_RMW_INSNS */ diff --git a/arch/m68k/include/asm/bugs.h b/arch/m68k/include/asm/bugs.h deleted file mode 100644 index 745530651e0b..000000000000 --- a/arch/m68k/include/asm/bugs.h +++ /dev/null @@ -1,21 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * include/asm-m68k/bugs.h - * - * Copyright (C) 1994 Linus Torvalds - */ - -/* - * This is included by init/main.c to check for architecture-dependent bugs. - * - * Needs: - * void check_bugs(void); - */ - -#ifdef CONFIG_MMU -extern void check_bugs(void); /* in arch/m68k/kernel/setup.c */ -#else -static void check_bugs(void) -{ -} -#endif diff --git a/arch/m68k/include/asm/mmu_context.h b/arch/m68k/include/asm/mmu_context.h index 8ed6ac14d99f..141bbdfad960 100644 --- a/arch/m68k/include/asm/mmu_context.h +++ b/arch/m68k/include/asm/mmu_context.h @@ -99,7 +99,7 @@ static inline void load_ksp_mmu(struct task_struct *task) p4d_t *p4d; pud_t *pud; pmd_t *pmd; - pte_t *pte; + pte_t *pte = NULL; unsigned long mmuar; local_irq_save(flags); @@ -139,7 +139,7 @@ static inline void load_ksp_mmu(struct task_struct *task) pte = (mmuar >= PAGE_OFFSET) ? pte_offset_kernel(pmd, mmuar) : pte_offset_map(pmd, mmuar); - if (pte_none(*pte) || !pte_present(*pte)) + if (!pte || pte_none(*pte) || !pte_present(*pte)) goto bug; set_pte(pte, pte_mkyoung(*pte)); @@ -161,6 +161,8 @@ static inline void load_ksp_mmu(struct task_struct *task) bug: pr_info("ksp load failed: mm=0x%p ksp=0x08%lx\n", mm, mmuar); end: + if (pte && mmuar < PAGE_OFFSET) + pte_unmap(pte); local_irq_restore(flags); } diff --git a/arch/m68k/kernel/setup_mm.c b/arch/m68k/kernel/setup_mm.c index fbff1cea62ca..6f1ae01f322c 100644 --- a/arch/m68k/kernel/setup_mm.c +++ b/arch/m68k/kernel/setup_mm.c @@ -10,6 +10,7 @@ */ #include <linux/kernel.h> +#include <linux/cpu.h> #include <linux/mm.h> #include <linux/sched.h> #include <linux/delay.h> @@ -504,7 +505,7 @@ static int __init proc_hardware_init(void) module_init(proc_hardware_init); #endif -void check_bugs(void) +void __init arch_cpu_finalize_init(void) { #if defined(CONFIG_FPU) && !defined(CONFIG_M68KFPU_EMU) if (m68k_fputype == 0) { diff --git a/arch/m68k/kernel/sys_m68k.c b/arch/m68k/kernel/sys_m68k.c index bd0274c7592e..c586034d2a7a 100644 --- a/arch/m68k/kernel/sys_m68k.c +++ b/arch/m68k/kernel/sys_m68k.c @@ -488,6 +488,8 @@ sys_atomic_cmpxchg_32(unsigned long newval, int oldval, int d3, int d4, int d5, if (!pmd_present(*pmd)) goto bad_access; pte = pte_offset_map_lock(mm, pmd, (unsigned long)mem, &ptl); + if (!pte) + goto bad_access; if (!pte_present(*pte) || !pte_dirty(*pte) || !pte_write(*pte)) { pte_unmap_unlock(pte, ptl); diff --git a/arch/m68k/kernel/syscalls/syscall.tbl b/arch/m68k/kernel/syscalls/syscall.tbl index b1f3940bc298..4f504783371f 100644 --- a/arch/m68k/kernel/syscalls/syscall.tbl +++ b/arch/m68k/kernel/syscalls/syscall.tbl @@ -450,3 +450,4 @@ 448 common process_mrelease sys_process_mrelease 449 common futex_waitv sys_futex_waitv 450 common set_mempolicy_home_node sys_set_mempolicy_home_node +451 common cachestat sys_cachestat diff --git a/arch/m68k/mm/mcfmmu.c b/arch/m68k/mm/mcfmmu.c index 70aa0979e027..42f45abea37a 100644 --- a/arch/m68k/mm/mcfmmu.c +++ b/arch/m68k/mm/mcfmmu.c @@ -91,7 +91,8 @@ int cf_tlb_miss(struct pt_regs *regs, int write, int dtlb, int extension_word) p4d_t *p4d; pud_t *pud; pmd_t *pmd; - pte_t *pte; + pte_t *pte = NULL; + int ret = -1; int asid; local_irq_save(flags); @@ -100,47 +101,33 @@ int cf_tlb_miss(struct pt_regs *regs, int write, int dtlb, int extension_word) regs->pc + (extension_word * sizeof(long)); mm = (!user_mode(regs) && KMAPAREA(mmuar)) ? &init_mm : current->mm; - if (!mm) { - local_irq_restore(flags); - return -1; - } + if (!mm) + goto out; pgd = pgd_offset(mm, mmuar); - if (pgd_none(*pgd)) { - local_irq_restore(flags); - return -1; - } + if (pgd_none(*pgd)) + goto out; p4d = p4d_offset(pgd, mmuar); - if (p4d_none(*p4d)) { - local_irq_restore(flags); - return -1; - } + if (p4d_none(*p4d)) + goto out; pud = pud_offset(p4d, mmuar); - if (pud_none(*pud)) { - local_irq_restore(flags); - return -1; - } + if (pud_none(*pud)) + goto out; pmd = pmd_offset(pud, mmuar); - if (pmd_none(*pmd)) { - local_irq_restore(flags); - return -1; - } + if (pmd_none(*pmd)) + goto out; pte = (KMAPAREA(mmuar)) ? pte_offset_kernel(pmd, mmuar) : pte_offset_map(pmd, mmuar); - if (pte_none(*pte) || !pte_present(*pte)) { - local_irq_restore(flags); - return -1; - } + if (!pte || pte_none(*pte) || !pte_present(*pte)) + goto out; if (write) { - if (!pte_write(*pte)) { - local_irq_restore(flags); - return -1; - } + if (!pte_write(*pte)) + goto out; set_pte(pte, pte_mkdirty(*pte)); } @@ -161,9 +148,12 @@ int cf_tlb_miss(struct pt_regs *regs, int write, int dtlb, int extension_word) mmu_write(MMUOR, MMUOR_ACC | MMUOR_UAA); else mmu_write(MMUOR, MMUOR_ITLB | MMUOR_ACC | MMUOR_UAA); - + ret = 0; +out: + if (pte && !KMAPAREA(mmuar)) + pte_unmap(pte); local_irq_restore(flags); - return 0; + return ret; } void __init cf_bootmem_alloc(void) diff --git a/arch/microblaze/include/asm/cache.h b/arch/microblaze/include/asm/cache.h index a149b3e711ec..1903988b9e23 100644 --- a/arch/microblaze/include/asm/cache.h +++ b/arch/microblaze/include/asm/cache.h @@ -18,4 +18,9 @@ #define SMP_CACHE_BYTES L1_CACHE_BYTES +/* MS be sure that SLAB allocates aligned objects */ +#define ARCH_DMA_MINALIGN L1_CACHE_BYTES + +#define ARCH_SLAB_MINALIGN L1_CACHE_BYTES + #endif /* _ASM_MICROBLAZE_CACHE_H */ diff --git a/arch/microblaze/include/asm/page.h b/arch/microblaze/include/asm/page.h index 7b9861bcd458..337f23eabc71 100644 --- a/arch/microblaze/include/asm/page.h +++ b/arch/microblaze/include/asm/page.h @@ -30,11 +30,6 @@ #ifndef __ASSEMBLY__ -/* MS be sure that SLAB allocates aligned objects */ -#define ARCH_DMA_MINALIGN L1_CACHE_BYTES - -#define ARCH_SLAB_MINALIGN L1_CACHE_BYTES - /* * PAGE_OFFSET -- the first address of the first page of memory. With MMU * it is set to the kernel start address (aligned on a page boundary). diff --git a/arch/microblaze/include/asm/setup.h b/arch/microblaze/include/asm/setup.h index a06cc1f97aa9..3657f5e78a3d 100644 --- a/arch/microblaze/include/asm/setup.h +++ b/arch/microblaze/include/asm/setup.h @@ -16,8 +16,6 @@ extern char *klimit; extern void mmu_reset(void); -void time_init(void); -void init_IRQ(void); void machine_early_init(const char *cmdline, unsigned int ram, unsigned int fdt, unsigned int msr, unsigned int tlb0, unsigned int tlb1); diff --git a/arch/microblaze/kernel/prom.c b/arch/microblaze/kernel/prom.c index c5c6186a7e8b..e424c796e297 100644 --- a/arch/microblaze/kernel/prom.c +++ b/arch/microblaze/kernel/prom.c @@ -20,7 +20,7 @@ void __init early_init_devtree(void *params) early_init_dt_scan(params); if (!strlen(boot_command_line)) - strlcpy(boot_command_line, cmd_line, COMMAND_LINE_SIZE); + strscpy(boot_command_line, cmd_line, COMMAND_LINE_SIZE); memblock_allow_resize(); diff --git a/arch/microblaze/kernel/signal.c b/arch/microblaze/kernel/signal.c index c3aebec71c0c..c78a0ff48066 100644 --- a/arch/microblaze/kernel/signal.c +++ b/arch/microblaze/kernel/signal.c @@ -194,7 +194,7 @@ static int setup_rt_frame(struct ksignal *ksig, sigset_t *set, preempt_disable(); ptep = pte_offset_map(pmdp, address); - if (pte_present(*ptep)) { + if (ptep && pte_present(*ptep)) { address = (unsigned long) page_address(pte_page(*ptep)); /* MS: I need add offset in page */ address += ((unsigned long)frame->tramp) & ~PAGE_MASK; @@ -203,7 +203,8 @@ static int setup_rt_frame(struct ksignal *ksig, sigset_t *set, invalidate_icache_range(address, address + 8); flush_dcache_range(address, address + 8); } - pte_unmap(ptep); + if (ptep) + pte_unmap(ptep); preempt_enable(); if (err) return -EFAULT; diff --git a/arch/microblaze/kernel/syscalls/syscall.tbl b/arch/microblaze/kernel/syscalls/syscall.tbl index 820145e47350..858d22bf275c 100644 --- a/arch/microblaze/kernel/syscalls/syscall.tbl +++ b/arch/microblaze/kernel/syscalls/syscall.tbl @@ -456,3 +456,4 @@ 448 common process_mrelease sys_process_mrelease 449 common futex_waitv sys_futex_waitv 450 common set_mempolicy_home_node sys_set_mempolicy_home_node +451 common cachestat sys_cachestat diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index 6796d839bcfd..d49d5fc40021 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -4,6 +4,7 @@ config MIPS default y select ARCH_32BIT_OFF_T if !64BIT select ARCH_BINFMT_ELF_STATE if MIPS_FP_SUPPORT + select ARCH_HAS_CPU_FINALIZE_INIT select ARCH_HAS_CURRENT_STACK_POINTER if !CC_IS_CLANG || CLANG_VERSION >= 140000 select ARCH_HAS_DEBUG_VIRTUAL if !64BIT select ARCH_HAS_FORTIFY_SOURCE @@ -2287,6 +2288,7 @@ config MIPS_CPS select MIPS_CM select MIPS_CPS_PM if HOTPLUG_CPU select SMP + select HOTPLUG_CORE_SYNC_DEAD if HOTPLUG_CPU select SYNC_R4K if (CEVT_R4K || CSRC_R4K) select SYS_SUPPORTS_HOTPLUG_CPU select SYS_SUPPORTS_SCHED_SMT if CPU_MIPSR6 diff --git a/arch/mips/bmips/setup.c b/arch/mips/bmips/setup.c index 549a6392a3d2..053805cb741c 100644 --- a/arch/mips/bmips/setup.c +++ b/arch/mips/bmips/setup.c @@ -178,7 +178,10 @@ void __init plat_mem_setup(void) ioport_resource.start = 0; ioport_resource.end = ~0; - /* intended to somewhat resemble ARM; see Documentation/arm/booting.rst */ + /* + * intended to somewhat resemble ARM; see + * Documentation/arch/arm/booting.rst + */ if (fw_arg0 == 0 && fw_arg1 == 0xffffffff) dtb = phys_to_virt(fw_arg2); else diff --git a/arch/mips/cavium-octeon/smp.c b/arch/mips/cavium-octeon/smp.c index 4212584e6efa..33c09688210f 100644 --- a/arch/mips/cavium-octeon/smp.c +++ b/arch/mips/cavium-octeon/smp.c @@ -345,6 +345,7 @@ void play_dead(void) int cpu = cpu_number_map(cvmx_get_core_num()); idle_task_exit(); + cpuhp_ap_report_dead(); octeon_processor_boot = 0xff; per_cpu(cpu_state, cpu) = CPU_DEAD; diff --git a/arch/mips/include/asm/atomic.h b/arch/mips/include/asm/atomic.h index 712fb5a6a568..ba188e77768b 100644 --- a/arch/mips/include/asm/atomic.h +++ b/arch/mips/include/asm/atomic.h @@ -33,17 +33,6 @@ static __always_inline void arch_##pfx##_set(pfx##_t *v, type i) \ { \ WRITE_ONCE(v->counter, i); \ } \ - \ -static __always_inline type \ -arch_##pfx##_cmpxchg(pfx##_t *v, type o, type n) \ -{ \ - return arch_cmpxchg(&v->counter, o, n); \ -} \ - \ -static __always_inline type arch_##pfx##_xchg(pfx##_t *v, type n) \ -{ \ - return arch_xchg(&v->counter, n); \ -} ATOMIC_OPS(atomic, int) diff --git a/arch/mips/include/asm/bugs.h b/arch/mips/include/asm/bugs.h index 653f78f3a685..84be74afcb9a 100644 --- a/arch/mips/include/asm/bugs.h +++ b/arch/mips/include/asm/bugs.h @@ -1,17 +1,11 @@ /* SPDX-License-Identifier: GPL-2.0 */ /* - * This is included by init/main.c to check for architecture-dependent bugs. - * * Copyright (C) 2007 Maciej W. Rozycki - * - * Needs: - * void check_bugs(void); */ #ifndef _ASM_BUGS_H #define _ASM_BUGS_H #include <linux/bug.h> -#include <linux/delay.h> #include <linux/smp.h> #include <asm/cpu.h> @@ -24,17 +18,6 @@ extern void check_bugs64_early(void); extern void check_bugs32(void); extern void check_bugs64(void); -static inline void __init check_bugs(void) -{ - unsigned int cpu = smp_processor_id(); - - cpu_data[cpu].udelay_val = loops_per_jiffy; - check_bugs32(); - - if (IS_ENABLED(CONFIG_CPU_R4X00_BUGS64)) - check_bugs64(); -} - static inline int r4k_daddiu_bug(void) { if (!IS_ENABLED(CONFIG_CPU_R4X00_BUGS64)) diff --git a/arch/mips/include/asm/fw/cfe/cfe_api.h b/arch/mips/include/asm/fw/cfe/cfe_api.h index 25df2f4deb31..b52a6a9c26f1 100644 --- a/arch/mips/include/asm/fw/cfe/cfe_api.h +++ b/arch/mips/include/asm/fw/cfe/cfe_api.h @@ -17,9 +17,6 @@ #include <linux/types.h> #include <linux/string.h> -typedef long intptr_t; - - /* * Constants */ diff --git a/arch/mips/include/asm/irq.h b/arch/mips/include/asm/irq.h index 44f9824c1d8c..75abfa834ab7 100644 --- a/arch/mips/include/asm/irq.h +++ b/arch/mips/include/asm/irq.h @@ -19,7 +19,6 @@ #define IRQ_STACK_SIZE THREAD_SIZE #define IRQ_STACK_START (IRQ_STACK_SIZE - 16) -extern void __init init_IRQ(void); extern void *irq_stack[NR_CPUS]; /* diff --git a/arch/mips/include/asm/mach-loongson32/loongson1.h b/arch/mips/include/asm/mach-loongson32/loongson1.h index eb3ddbec1752..d8f9dec0ecc3 100644 --- a/arch/mips/include/asm/mach-loongson32/loongson1.h +++ b/arch/mips/include/asm/mach-loongson32/loongson1.h @@ -47,7 +47,6 @@ #include <regs-clk.h> #include <regs-mux.h> -#include <regs-pwm.h> #include <regs-rtc.h> #include <regs-wdt.h> diff --git a/arch/mips/include/asm/mach-loongson32/regs-pwm.h b/arch/mips/include/asm/mach-loongson32/regs-pwm.h deleted file mode 100644 index ec870c82d492..000000000000 --- a/arch/mips/include/asm/mach-loongson32/regs-pwm.h +++ /dev/null @@ -1,25 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * Copyright (c) 2014 Zhang, Keguang <keguang.zhang@gmail.com> - * - * Loongson 1 PWM Register Definitions. - */ - -#ifndef __ASM_MACH_LOONGSON32_REGS_PWM_H -#define __ASM_MACH_LOONGSON32_REGS_PWM_H - -/* Loongson 1 PWM Timer Register Definitions */ -#define PWM_CNT 0x0 -#define PWM_HRC 0x4 -#define PWM_LRC 0x8 -#define PWM_CTRL 0xc - -/* PWM Control Register Bits */ -#define CNT_RST BIT(7) -#define INT_SR BIT(6) -#define INT_EN BIT(5) -#define PWM_SINGLE BIT(4) -#define PWM_OE BIT(3) -#define CNT_EN BIT(0) - -#endif /* __ASM_MACH_LOONGSON32_REGS_PWM_H */ diff --git a/arch/mips/include/asm/smp-ops.h b/arch/mips/include/asm/smp-ops.h index 0145bbfb5efb..5719ff49eff1 100644 --- a/arch/mips/include/asm/smp-ops.h +++ b/arch/mips/include/asm/smp-ops.h @@ -33,6 +33,7 @@ struct plat_smp_ops { #ifdef CONFIG_HOTPLUG_CPU int (*cpu_disable)(void); void (*cpu_die)(unsigned int cpu); + void (*cleanup_dead_cpu)(unsigned cpu); #endif #ifdef CONFIG_KEXEC void (*kexec_nonboot_cpu)(void); diff --git a/arch/mips/include/uapi/asm/socket.h b/arch/mips/include/uapi/asm/socket.h index 18f3d95ecfec..60ebaed28a4c 100644 --- a/arch/mips/include/uapi/asm/socket.h +++ b/arch/mips/include/uapi/asm/socket.h @@ -148,6 +148,9 @@ #define SO_RCVMARK 75 +#define SO_PASSPIDFD 76 +#define SO_PEERPIDFD 77 + #if !defined(__KERNEL__) #if __BITS_PER_LONG == 64 diff --git a/arch/mips/kernel/setup.c b/arch/mips/kernel/setup.c index c0e65135481b..cb871eb784a7 100644 --- a/arch/mips/kernel/setup.c +++ b/arch/mips/kernel/setup.c @@ -11,6 +11,8 @@ * Copyright (C) 2000, 2001, 2002, 2007 Maciej W. Rozycki */ #include <linux/init.h> +#include <linux/cpu.h> +#include <linux/delay.h> #include <linux/ioport.h> #include <linux/export.h> #include <linux/screen_info.h> @@ -841,3 +843,14 @@ static int __init setnocoherentio(char *str) } early_param("nocoherentio", setnocoherentio); #endif + +void __init arch_cpu_finalize_init(void) +{ + unsigned int cpu = smp_processor_id(); + + cpu_data[cpu].udelay_val = loops_per_jiffy; + check_bugs32(); + + if (IS_ENABLED(CONFIG_CPU_R4X00_BUGS64)) + check_bugs64(); +} diff --git a/arch/mips/kernel/smp-bmips.c b/arch/mips/kernel/smp-bmips.c index 15466d4cf4a0..c074ecce3fbf 100644 --- a/arch/mips/kernel/smp-bmips.c +++ b/arch/mips/kernel/smp-bmips.c @@ -392,6 +392,7 @@ static void bmips_cpu_die(unsigned int cpu) void __ref play_dead(void) { idle_task_exit(); + cpuhp_ap_report_dead(); /* flush data cache */ _dma_cache_wback_inv(0, ~0); diff --git a/arch/mips/kernel/smp-cps.c b/arch/mips/kernel/smp-cps.c index 62f677b2306f..d7fdbec232da 100644 --- a/arch/mips/kernel/smp-cps.c +++ b/arch/mips/kernel/smp-cps.c @@ -503,8 +503,7 @@ void play_dead(void) } } - /* This CPU has chosen its way out */ - (void)cpu_report_death(); + cpuhp_ap_report_dead(); cps_shutdown_this_cpu(cpu_death); @@ -527,7 +526,9 @@ static void wait_for_sibling_halt(void *ptr_cpu) } while (!(halted & TCHALT_H)); } -static void cps_cpu_die(unsigned int cpu) +static void cps_cpu_die(unsigned int cpu) { } + +static void cps_cleanup_dead_cpu(unsigned cpu) { unsigned core = cpu_core(&cpu_data[cpu]); unsigned int vpe_id = cpu_vpe_id(&cpu_data[cpu]); @@ -535,12 +536,6 @@ static void cps_cpu_die(unsigned int cpu) unsigned stat; int err; - /* Wait for the cpu to choose its way out */ - if (!cpu_wait_death(cpu, 5)) { - pr_err("CPU%u: didn't offline\n", cpu); - return; - } - /* * Now wait for the CPU to actually offline. Without doing this that * offlining may race with one or more of: @@ -624,6 +619,7 @@ static const struct plat_smp_ops cps_smp_ops = { #ifdef CONFIG_HOTPLUG_CPU .cpu_disable = cps_cpu_disable, .cpu_die = cps_cpu_die, + .cleanup_dead_cpu = cps_cleanup_dead_cpu, #endif #ifdef CONFIG_KEXEC .kexec_nonboot_cpu = cps_kexec_nonboot_cpu, diff --git a/arch/mips/kernel/smp.c b/arch/mips/kernel/smp.c index 1d93b85271ba..90c71d800b59 100644 --- a/arch/mips/kernel/smp.c +++ b/arch/mips/kernel/smp.c @@ -690,6 +690,14 @@ void flush_tlb_one(unsigned long vaddr) EXPORT_SYMBOL(flush_tlb_page); EXPORT_SYMBOL(flush_tlb_one); +#ifdef CONFIG_HOTPLUG_CORE_SYNC_DEAD +void arch_cpuhp_cleanup_dead_cpu(unsigned int cpu) +{ + if (mp_ops->cleanup_dead_cpu) + mp_ops->cleanup_dead_cpu(cpu); +} +#endif + #ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST static void tick_broadcast_callee(void *info) diff --git a/arch/mips/kernel/syscalls/syscall_n32.tbl b/arch/mips/kernel/syscalls/syscall_n32.tbl index 253ff994ed2e..1976317d4e8b 100644 --- a/arch/mips/kernel/syscalls/syscall_n32.tbl +++ b/arch/mips/kernel/syscalls/syscall_n32.tbl @@ -389,3 +389,4 @@ 448 n32 process_mrelease sys_process_mrelease 449 n32 futex_waitv sys_futex_waitv 450 n32 set_mempolicy_home_node sys_set_mempolicy_home_node +451 n32 cachestat sys_cachestat diff --git a/arch/mips/kernel/syscalls/syscall_n64.tbl b/arch/mips/kernel/syscalls/syscall_n64.tbl index 3f1886ad9d80..cfda2511badf 100644 --- a/arch/mips/kernel/syscalls/syscall_n64.tbl +++ b/arch/mips/kernel/syscalls/syscall_n64.tbl @@ -365,3 +365,4 @@ 448 n64 process_mrelease sys_process_mrelease 449 n64 futex_waitv sys_futex_waitv 450 common set_mempolicy_home_node sys_set_mempolicy_home_node +451 n64 cachestat sys_cachestat diff --git a/arch/mips/kernel/syscalls/syscall_o32.tbl b/arch/mips/kernel/syscalls/syscall_o32.tbl index 8f243e35a7b2..7692234c3768 100644 --- a/arch/mips/kernel/syscalls/syscall_o32.tbl +++ b/arch/mips/kernel/syscalls/syscall_o32.tbl @@ -438,3 +438,4 @@ 448 o32 process_mrelease sys_process_mrelease 449 o32 futex_waitv sys_futex_waitv 450 o32 set_mempolicy_home_node sys_set_mempolicy_home_node +451 o32 cachestat sys_cachestat diff --git a/arch/mips/loongson32/Kconfig b/arch/mips/loongson32/Kconfig index 2ef9da0016df..a7c500959577 100644 --- a/arch/mips/loongson32/Kconfig +++ b/arch/mips/loongson32/Kconfig @@ -35,41 +35,4 @@ config LOONGSON1_LS1C select COMMON_CLK endchoice -menuconfig CEVT_CSRC_LS1X - bool "Use PWM Timer for clockevent/clocksource" - select MIPS_EXTERNAL_TIMER - depends on CPU_LOONGSON32 - help - This option changes the default clockevent/clocksource to PWM Timer, - and is required by Loongson1 CPUFreq support. - - If unsure, say N. - -choice - prompt "Select clockevent/clocksource" - depends on CEVT_CSRC_LS1X - default TIMER_USE_PWM0 - -config TIMER_USE_PWM0 - bool "Use PWM Timer 0" - help - Use PWM Timer 0 as the default clockevent/clocksourcer. - -config TIMER_USE_PWM1 - bool "Use PWM Timer 1" - help - Use PWM Timer 1 as the default clockevent/clocksourcer. - -config TIMER_USE_PWM2 - bool "Use PWM Timer 2" - help - Use PWM Timer 2 as the default clockevent/clocksourcer. - -config TIMER_USE_PWM3 - bool "Use PWM Timer 3" - help - Use PWM Timer 3 as the default clockevent/clocksourcer. - -endchoice - endif # MACH_LOONGSON32 diff --git a/arch/mips/loongson32/common/time.c b/arch/mips/loongson32/common/time.c index 965c04aa56fd..74ad2b17918d 100644 --- a/arch/mips/loongson32/common/time.c +++ b/arch/mips/loongson32/common/time.c @@ -5,208 +5,8 @@ #include <linux/clk.h> #include <linux/of_clk.h> -#include <linux/interrupt.h> -#include <linux/sizes.h> #include <asm/time.h> -#include <loongson1.h> -#include <platform.h> - -#ifdef CONFIG_CEVT_CSRC_LS1X - -#if defined(CONFIG_TIMER_USE_PWM1) -#define LS1X_TIMER_BASE LS1X_PWM1_BASE -#define LS1X_TIMER_IRQ LS1X_PWM1_IRQ - -#elif defined(CONFIG_TIMER_USE_PWM2) -#define LS1X_TIMER_BASE LS1X_PWM2_BASE -#define LS1X_TIMER_IRQ LS1X_PWM2_IRQ - -#elif defined(CONFIG_TIMER_USE_PWM3) -#define LS1X_TIMER_BASE LS1X_PWM3_BASE -#define LS1X_TIMER_IRQ LS1X_PWM3_IRQ - -#else -#define LS1X_TIMER_BASE LS1X_PWM0_BASE -#define LS1X_TIMER_IRQ LS1X_PWM0_IRQ -#endif - -DEFINE_RAW_SPINLOCK(ls1x_timer_lock); - -static void __iomem *timer_reg_base; -static uint32_t ls1x_jiffies_per_tick; - -static inline void ls1x_pwmtimer_set_period(uint32_t period) -{ - __raw_writel(period, timer_reg_base + PWM_HRC); - __raw_writel(period, timer_reg_base + PWM_LRC); -} - -static inline void ls1x_pwmtimer_restart(void) -{ - __raw_writel(0x0, timer_reg_base + PWM_CNT); - __raw_writel(INT_EN | CNT_EN, timer_reg_base + PWM_CTRL); -} - -void __init ls1x_pwmtimer_init(void) -{ - timer_reg_base = ioremap(LS1X_TIMER_BASE, SZ_16); - if (!timer_reg_base) - panic("Failed to remap timer registers"); - - ls1x_jiffies_per_tick = DIV_ROUND_CLOSEST(mips_hpt_frequency, HZ); - - ls1x_pwmtimer_set_period(ls1x_jiffies_per_tick); - ls1x_pwmtimer_restart(); -} - -static u64 ls1x_clocksource_read(struct clocksource *cs) -{ - unsigned long flags; - int count; - u32 jifs; - static int old_count; - static u32 old_jifs; - - raw_spin_lock_irqsave(&ls1x_timer_lock, flags); - /* - * Although our caller may have the read side of xtime_lock, - * this is now a seqlock, and we are cheating in this routine - * by having side effects on state that we cannot undo if - * there is a collision on the seqlock and our caller has to - * retry. (Namely, old_jifs and old_count.) So we must treat - * jiffies as volatile despite the lock. We read jiffies - * before latching the timer count to guarantee that although - * the jiffies value might be older than the count (that is, - * the counter may underflow between the last point where - * jiffies was incremented and the point where we latch the - * count), it cannot be newer. - */ - jifs = jiffies; - /* read the count */ - count = __raw_readl(timer_reg_base + PWM_CNT); - - /* - * It's possible for count to appear to go the wrong way for this - * reason: - * - * The timer counter underflows, but we haven't handled the resulting - * interrupt and incremented jiffies yet. - * - * Previous attempts to handle these cases intelligently were buggy, so - * we just do the simple thing now. - */ - if (count < old_count && jifs == old_jifs) - count = old_count; - - old_count = count; - old_jifs = jifs; - - raw_spin_unlock_irqrestore(&ls1x_timer_lock, flags); - - return (u64) (jifs * ls1x_jiffies_per_tick) + count; -} - -static struct clocksource ls1x_clocksource = { - .name = "ls1x-pwmtimer", - .read = ls1x_clocksource_read, - .mask = CLOCKSOURCE_MASK(24), - .flags = CLOCK_SOURCE_IS_CONTINUOUS, -}; - -static irqreturn_t ls1x_clockevent_isr(int irq, void *devid) -{ - struct clock_event_device *cd = devid; - - ls1x_pwmtimer_restart(); - cd->event_handler(cd); - - return IRQ_HANDLED; -} - -static int ls1x_clockevent_set_state_periodic(struct clock_event_device *cd) -{ - raw_spin_lock(&ls1x_timer_lock); - ls1x_pwmtimer_set_period(ls1x_jiffies_per_tick); - ls1x_pwmtimer_restart(); - __raw_writel(INT_EN | CNT_EN, timer_reg_base + PWM_CTRL); - raw_spin_unlock(&ls1x_timer_lock); - - return 0; -} - -static int ls1x_clockevent_tick_resume(struct clock_event_device *cd) -{ - raw_spin_lock(&ls1x_timer_lock); - __raw_writel(INT_EN | CNT_EN, timer_reg_base + PWM_CTRL); - raw_spin_unlock(&ls1x_timer_lock); - - return 0; -} - -static int ls1x_clockevent_set_state_shutdown(struct clock_event_device *cd) -{ - raw_spin_lock(&ls1x_timer_lock); - __raw_writel(__raw_readl(timer_reg_base + PWM_CTRL) & ~CNT_EN, - timer_reg_base + PWM_CTRL); - raw_spin_unlock(&ls1x_timer_lock); - - return 0; -} - -static int ls1x_clockevent_set_next(unsigned long evt, - struct clock_event_device *cd) -{ - raw_spin_lock(&ls1x_timer_lock); - ls1x_pwmtimer_set_period(evt); - ls1x_pwmtimer_restart(); - raw_spin_unlock(&ls1x_timer_lock); - - return 0; -} - -static struct clock_event_device ls1x_clockevent = { - .name = "ls1x-pwmtimer", - .features = CLOCK_EVT_FEAT_PERIODIC, - .rating = 300, - .irq = LS1X_TIMER_IRQ, - .set_next_event = ls1x_clockevent_set_next, - .set_state_shutdown = ls1x_clockevent_set_state_shutdown, - .set_state_periodic = ls1x_clockevent_set_state_periodic, - .set_state_oneshot = ls1x_clockevent_set_state_shutdown, - .tick_resume = ls1x_clockevent_tick_resume, -}; - -static void __init ls1x_time_init(void) -{ - struct clock_event_device *cd = &ls1x_clockevent; - int ret; - - if (!mips_hpt_frequency) - panic("Invalid timer clock rate"); - - ls1x_pwmtimer_init(); - - clockevent_set_clock(cd, mips_hpt_frequency); - cd->max_delta_ns = clockevent_delta2ns(0xffffff, cd); - cd->max_delta_ticks = 0xffffff; - cd->min_delta_ns = clockevent_delta2ns(0x000300, cd); - cd->min_delta_ticks = 0x000300; - cd->cpumask = cpumask_of(smp_processor_id()); - clockevents_register_device(cd); - - ls1x_clocksource.rating = 200 + mips_hpt_frequency / 10000000; - ret = clocksource_register_hz(&ls1x_clocksource, mips_hpt_frequency); - if (ret) - panic(KERN_ERR "Failed to register clocksource: %d\n", ret); - - if (request_irq(LS1X_TIMER_IRQ, ls1x_clockevent_isr, - IRQF_PERCPU | IRQF_TIMER, "ls1x-pwmtimer", - &ls1x_clockevent)) - pr_err("Failed to register ls1x-pwmtimer interrupt\n"); -} -#endif /* CONFIG_CEVT_CSRC_LS1X */ - void __init plat_time_init(void) { struct clk *clk = NULL; @@ -214,20 +14,10 @@ void __init plat_time_init(void) /* initialize LS1X clocks */ of_clk_init(NULL); -#ifdef CONFIG_CEVT_CSRC_LS1X - /* setup LS1X PWM timer */ - clk = clk_get(NULL, "ls1x-pwmtimer"); - if (IS_ERR(clk)) - panic("unable to get timer clock, err=%ld", PTR_ERR(clk)); - - mips_hpt_frequency = clk_get_rate(clk); - ls1x_time_init(); -#else /* setup mips r4k timer */ clk = clk_get(NULL, "cpu_clk"); if (IS_ERR(clk)) panic("unable to get cpu clock, err=%ld", PTR_ERR(clk)); mips_hpt_frequency = clk_get_rate(clk) / 2; -#endif /* CONFIG_CEVT_CSRC_LS1X */ } diff --git a/arch/mips/loongson64/smp.c b/arch/mips/loongson64/smp.c index b0e8bb9fa036..cdecd7af11a6 100644 --- a/arch/mips/loongson64/smp.c +++ b/arch/mips/loongson64/smp.c @@ -775,6 +775,7 @@ void play_dead(void) void (*play_dead_at_ckseg1)(int *); idle_task_exit(); + cpuhp_ap_report_dead(); prid_imp = read_c0_prid() & PRID_IMP_MASK; prid_rev = read_c0_prid() & PRID_REV_MASK; diff --git a/arch/mips/mm/tlb-r4k.c b/arch/mips/mm/tlb-r4k.c index 1b939abbe4ca..93c2d695588a 100644 --- a/arch/mips/mm/tlb-r4k.c +++ b/arch/mips/mm/tlb-r4k.c @@ -297,7 +297,7 @@ void __update_tlb(struct vm_area_struct * vma, unsigned long address, pte_t pte) p4d_t *p4dp; pud_t *pudp; pmd_t *pmdp; - pte_t *ptep; + pte_t *ptep, *ptemap = NULL; int idx, pid; /* @@ -344,7 +344,12 @@ void __update_tlb(struct vm_area_struct * vma, unsigned long address, pte_t pte) } else #endif { - ptep = pte_offset_map(pmdp, address); + ptemap = ptep = pte_offset_map(pmdp, address); + /* + * update_mmu_cache() is called between pte_offset_map_lock() + * and pte_unmap_unlock(), so we can assume that ptep is not + * NULL here: and what should be done below if it were NULL? + */ #if defined(CONFIG_PHYS_ADDR_T_64BIT) && defined(CONFIG_CPU_MIPS32) #ifdef CONFIG_XPA @@ -373,6 +378,9 @@ void __update_tlb(struct vm_area_struct * vma, unsigned long address, pte_t pte) tlbw_use_hazard(); htw_start(); flush_micro_tlb_vm(vma); + + if (ptemap) + pte_unmap(ptemap); local_irq_restore(flags); } diff --git a/arch/nios2/kernel/cpuinfo.c b/arch/nios2/kernel/cpuinfo.c index 203870c4b86d..338849c430a5 100644 --- a/arch/nios2/kernel/cpuinfo.c +++ b/arch/nios2/kernel/cpuinfo.c @@ -47,7 +47,7 @@ void __init setup_cpuinfo(void) str = of_get_property(cpu, "altr,implementation", &len); if (str) - strlcpy(cpuinfo.cpu_impl, str, sizeof(cpuinfo.cpu_impl)); + strscpy(cpuinfo.cpu_impl, str, sizeof(cpuinfo.cpu_impl)); else strcpy(cpuinfo.cpu_impl, "<unknown>"); diff --git a/arch/nios2/kernel/setup.c b/arch/nios2/kernel/setup.c index 40bc8fb75e0b..8582ed965844 100644 --- a/arch/nios2/kernel/setup.c +++ b/arch/nios2/kernel/setup.c @@ -121,7 +121,7 @@ asmlinkage void __init nios2_boot_init(unsigned r4, unsigned r5, unsigned r6, dtb_passed = r6; if (r7) - strlcpy(cmdline_passed, (char *)r7, COMMAND_LINE_SIZE); + strscpy(cmdline_passed, (char *)r7, COMMAND_LINE_SIZE); } #endif @@ -129,10 +129,10 @@ asmlinkage void __init nios2_boot_init(unsigned r4, unsigned r5, unsigned r6, #ifndef CONFIG_CMDLINE_FORCE if (cmdline_passed[0]) - strlcpy(boot_command_line, cmdline_passed, COMMAND_LINE_SIZE); + strscpy(boot_command_line, cmdline_passed, COMMAND_LINE_SIZE); #ifdef CONFIG_NIOS2_CMDLINE_IGNORE_DTB else - strlcpy(boot_command_line, CONFIG_CMDLINE, COMMAND_LINE_SIZE); + strscpy(boot_command_line, CONFIG_CMDLINE, COMMAND_LINE_SIZE); #endif #endif diff --git a/arch/openrisc/include/asm/atomic.h b/arch/openrisc/include/asm/atomic.h index 326167e4783a..8ce67ec7c9a3 100644 --- a/arch/openrisc/include/asm/atomic.h +++ b/arch/openrisc/include/asm/atomic.h @@ -130,7 +130,4 @@ static inline int arch_atomic_fetch_add_unless(atomic_t *v, int a, int u) #include <asm/cmpxchg.h> -#define arch_atomic_xchg(ptr, v) (arch_xchg(&(ptr)->counter, (v))) -#define arch_atomic_cmpxchg(v, old, new) (arch_cmpxchg(&((v)->counter), (old), (new))) - #endif /* __ASM_OPENRISC_ATOMIC_H */ diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig index 967bde65dd0e..c0b4b1c253d1 100644 --- a/arch/parisc/Kconfig +++ b/arch/parisc/Kconfig @@ -57,6 +57,7 @@ config PARISC select HAVE_ARCH_SECCOMP_FILTER select HAVE_ARCH_TRACEHOOK select HAVE_REGS_AND_STACK_ACCESS_API + select HOTPLUG_CORE_SYNC_DEAD if HOTPLUG_CPU select GENERIC_SCHED_CLOCK select GENERIC_IRQ_MIGRATION if SMP select HAVE_UNSTABLE_SCHED_CLOCK if SMP diff --git a/arch/parisc/include/asm/atomic.h b/arch/parisc/include/asm/atomic.h index dd5a299ada69..d4f023887ff8 100644 --- a/arch/parisc/include/asm/atomic.h +++ b/arch/parisc/include/asm/atomic.h @@ -73,10 +73,6 @@ static __inline__ int arch_atomic_read(const atomic_t *v) return READ_ONCE((v)->counter); } -/* exported interface */ -#define arch_atomic_cmpxchg(v, o, n) (arch_cmpxchg(&((v)->counter), (o), (n))) -#define arch_atomic_xchg(v, new) (arch_xchg(&((v)->counter), new)) - #define ATOMIC_OP(op, c_op) \ static __inline__ void arch_atomic_##op(int i, atomic_t *v) \ { \ @@ -122,6 +118,11 @@ static __inline__ int arch_atomic_fetch_##op(int i, atomic_t *v) \ ATOMIC_OPS(add, +=) ATOMIC_OPS(sub, -=) +#define arch_atomic_add_return arch_atomic_add_return +#define arch_atomic_sub_return arch_atomic_sub_return +#define arch_atomic_fetch_add arch_atomic_fetch_add +#define arch_atomic_fetch_sub arch_atomic_fetch_sub + #undef ATOMIC_OPS #define ATOMIC_OPS(op, c_op) \ ATOMIC_OP(op, c_op) \ @@ -131,6 +132,10 @@ ATOMIC_OPS(and, &=) ATOMIC_OPS(or, |=) ATOMIC_OPS(xor, ^=) +#define arch_atomic_fetch_and arch_atomic_fetch_and +#define arch_atomic_fetch_or arch_atomic_fetch_or +#define arch_atomic_fetch_xor arch_atomic_fetch_xor + #undef ATOMIC_OPS #undef ATOMIC_FETCH_OP #undef ATOMIC_OP_RETURN @@ -185,6 +190,11 @@ static __inline__ s64 arch_atomic64_fetch_##op(s64 i, atomic64_t *v) \ ATOMIC64_OPS(add, +=) ATOMIC64_OPS(sub, -=) +#define arch_atomic64_add_return arch_atomic64_add_return +#define arch_atomic64_sub_return arch_atomic64_sub_return +#define arch_atomic64_fetch_add arch_atomic64_fetch_add +#define arch_atomic64_fetch_sub arch_atomic64_fetch_sub + #undef ATOMIC64_OPS #define ATOMIC64_OPS(op, c_op) \ ATOMIC64_OP(op, c_op) \ @@ -194,6 +204,10 @@ ATOMIC64_OPS(and, &=) ATOMIC64_OPS(or, |=) ATOMIC64_OPS(xor, ^=) +#define arch_atomic64_fetch_and arch_atomic64_fetch_and +#define arch_atomic64_fetch_or arch_atomic64_fetch_or +#define arch_atomic64_fetch_xor arch_atomic64_fetch_xor + #undef ATOMIC64_OPS #undef ATOMIC64_FETCH_OP #undef ATOMIC64_OP_RETURN @@ -218,11 +232,6 @@ arch_atomic64_read(const atomic64_t *v) return READ_ONCE((v)->counter); } -/* exported interface */ -#define arch_atomic64_cmpxchg(v, o, n) \ - ((__typeof__((v)->counter))arch_cmpxchg(&((v)->counter), (o), (n))) -#define arch_atomic64_xchg(v, new) (arch_xchg(&((v)->counter), new)) - #endif /* !CONFIG_64BIT */ diff --git a/arch/parisc/include/asm/bugs.h b/arch/parisc/include/asm/bugs.h deleted file mode 100644 index 0a7f9db6bd1c..000000000000 --- a/arch/parisc/include/asm/bugs.h +++ /dev/null @@ -1,20 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * include/asm-parisc/bugs.h - * - * Copyright (C) 1999 Mike Shaver - */ - -/* - * This is included by init/main.c to check for architecture-dependent bugs. - * - * Needs: - * void check_bugs(void); - */ - -#include <asm/processor.h> - -static inline void check_bugs(void) -{ -// identify_cpu(&boot_cpu_data); -} diff --git a/arch/parisc/include/asm/pgtable.h b/arch/parisc/include/asm/pgtable.h index e715df5385d6..5656395c95ee 100644 --- a/arch/parisc/include/asm/pgtable.h +++ b/arch/parisc/include/asm/pgtable.h @@ -472,9 +472,6 @@ static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, #define pte_same(A,B) (pte_val(A) == pte_val(B)) -struct seq_file; -extern void arch_report_meminfo(struct seq_file *m); - #endif /* !__ASSEMBLY__ */ diff --git a/arch/parisc/include/uapi/asm/socket.h b/arch/parisc/include/uapi/asm/socket.h index f486d3dfb6bb..be264c2b1a11 100644 --- a/arch/parisc/include/uapi/asm/socket.h +++ b/arch/parisc/include/uapi/asm/socket.h @@ -129,6 +129,9 @@ #define SO_RCVMARK 0x4049 +#define SO_PASSPIDFD 0x404A +#define SO_PEERPIDFD 0x404B + #if !defined(__KERNEL__) #if __BITS_PER_LONG == 64 diff --git a/arch/parisc/kernel/cache.c b/arch/parisc/kernel/cache.c index ca4a302d4365..501160250bb7 100644 --- a/arch/parisc/kernel/cache.c +++ b/arch/parisc/kernel/cache.c @@ -426,10 +426,15 @@ void flush_dcache_page(struct page *page) offset = (pgoff - mpnt->vm_pgoff) << PAGE_SHIFT; addr = mpnt->vm_start + offset; if (parisc_requires_coherency()) { + bool needs_flush = false; pte_t *ptep; ptep = get_ptep(mpnt->vm_mm, addr); - if (ptep && pte_needs_flush(*ptep)) + if (ptep) { + needs_flush = pte_needs_flush(*ptep); + pte_unmap(ptep); + } + if (needs_flush) flush_user_cache_page(mpnt, addr); } else { /* @@ -561,14 +566,20 @@ EXPORT_SYMBOL(flush_kernel_dcache_page_addr); static void flush_cache_page_if_present(struct vm_area_struct *vma, unsigned long vmaddr, unsigned long pfn) { - pte_t *ptep = get_ptep(vma->vm_mm, vmaddr); + bool needs_flush = false; + pte_t *ptep; /* * The pte check is racy and sometimes the flush will trigger * a non-access TLB miss. Hopefully, the page has already been * flushed. */ - if (ptep && pte_needs_flush(*ptep)) + ptep = get_ptep(vma->vm_mm, vmaddr); + if (ptep) { + needs_flush = pte_needs_flush(*ptep); + pte_unmap(ptep); + } + if (needs_flush) flush_cache_page(vma, vmaddr, pfn); } @@ -635,17 +646,22 @@ static void flush_cache_pages(struct vm_area_struct *vma, unsigned long start, u pte_t *ptep; for (addr = start; addr < end; addr += PAGE_SIZE) { + bool needs_flush = false; /* * The vma can contain pages that aren't present. Although * the pte search is expensive, we need the pte to find the * page pfn and to check whether the page should be flushed. */ ptep = get_ptep(vma->vm_mm, addr); - if (ptep && pte_needs_flush(*ptep)) { + if (ptep) { + needs_flush = pte_needs_flush(*ptep); + pfn = pte_pfn(*ptep); + pte_unmap(ptep); + } + if (needs_flush) { if (parisc_requires_coherency()) { flush_user_cache_page(vma, addr); } else { - pfn = pte_pfn(*ptep); if (WARN_ON(!pfn_valid(pfn))) return; __flush_cache_page(vma, addr, PFN_PHYS(pfn)); diff --git a/arch/parisc/kernel/pci-dma.c b/arch/parisc/kernel/pci-dma.c index 71ed5391f29d..415f12d5bab3 100644 --- a/arch/parisc/kernel/pci-dma.c +++ b/arch/parisc/kernel/pci-dma.c @@ -164,7 +164,7 @@ static inline void unmap_uncached_pte(pmd_t * pmd, unsigned long vaddr, pmd_clear(pmd); return; } - pte = pte_offset_map(pmd, vaddr); + pte = pte_offset_kernel(pmd, vaddr); vaddr &= ~PMD_MASK; end = vaddr + size; if (end > PMD_SIZE) diff --git a/arch/parisc/kernel/process.c b/arch/parisc/kernel/process.c index 24411ab79c30..abdbf038d643 100644 --- a/arch/parisc/kernel/process.c +++ b/arch/parisc/kernel/process.c @@ -171,8 +171,8 @@ void __noreturn arch_cpu_idle_dead(void) local_irq_disable(); - /* Tell __cpu_die() that this CPU is now safe to dispose of. */ - (void)cpu_report_death(); + /* Tell the core that this CPU is now safe to dispose of. */ + cpuhp_ap_report_dead(); /* Ensure that the cache lines are written out. */ flush_cache_all_local(); diff --git a/arch/parisc/kernel/smp.c b/arch/parisc/kernel/smp.c index b7fc859fa87d..d0eb1bd19a13 100644 --- a/arch/parisc/kernel/smp.c +++ b/arch/parisc/kernel/smp.c @@ -271,7 +271,6 @@ void arch_send_call_function_single_ipi(int cpu) static void smp_cpu_init(int cpunum) { - extern void init_IRQ(void); /* arch/parisc/kernel/irq.c */ extern void start_cpu_itimer(void); /* arch/parisc/kernel/time.c */ /* Set modes and Enable floating point coprocessor */ @@ -500,11 +499,10 @@ int __cpu_disable(void) void __cpu_die(unsigned int cpu) { pdc_cpu_rendezvous_lock(); +} - if (!cpu_wait_death(cpu, 5)) { - pr_crit("CPU%u: cpu didn't die\n", cpu); - return; - } +void arch_cpuhp_cleanup_dead_cpu(unsigned int cpu) +{ pr_info("CPU%u: is shutting down\n", cpu); /* set task's state to interruptible sleep */ diff --git a/arch/parisc/kernel/syscalls/syscall.tbl b/arch/parisc/kernel/syscalls/syscall.tbl index 0e42fceb2d5e..3c71fad78318 100644 --- a/arch/parisc/kernel/syscalls/syscall.tbl +++ b/arch/parisc/kernel/syscalls/syscall.tbl @@ -448,3 +448,4 @@ 448 common process_mrelease sys_process_mrelease 449 common futex_waitv sys_futex_waitv 450 common set_mempolicy_home_node sys_set_mempolicy_home_node +451 common cachestat sys_cachestat diff --git a/arch/parisc/mm/hugetlbpage.c b/arch/parisc/mm/hugetlbpage.c index d1d3990b83f6..a8a1a7c1e16e 100644 --- a/arch/parisc/mm/hugetlbpage.c +++ b/arch/parisc/mm/hugetlbpage.c @@ -66,7 +66,7 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, struct vm_area_struct *vma, if (pud) { pmd = pmd_alloc(mm, pud, addr); if (pmd) - pte = pte_alloc_map(mm, pmd, addr); + pte = pte_alloc_huge(mm, pmd, addr); } return pte; } @@ -90,7 +90,7 @@ pte_t *huge_pte_offset(struct mm_struct *mm, if (!pud_none(*pud)) { pmd = pmd_offset(pud, addr); if (!pmd_none(*pmd)) - pte = pte_offset_map(pmd, addr); + pte = pte_offset_huge(pmd, addr); } } } diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index a243fcdf346d..395044b705a1 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -90,8 +90,7 @@ config NMI_IPI config PPC_WATCHDOG bool - depends on HARDLOCKUP_DETECTOR - depends on HAVE_HARDLOCKUP_DETECTOR_ARCH + depends on HARDLOCKUP_DETECTOR_ARCH default y help This is a placeholder when the powerpc hardlockup detector @@ -240,7 +239,7 @@ config PPC select HAVE_GCC_PLUGINS if GCC_VERSION >= 50200 # plugin support on gcc <= 5.1 is buggy on PPC select HAVE_GENERIC_VDSO select HAVE_HARDLOCKUP_DETECTOR_ARCH if PPC_BOOK3S_64 && SMP - select HAVE_HARDLOCKUP_DETECTOR_PERF if PERF_EVENTS && HAVE_PERF_EVENTS_NMI && !HAVE_HARDLOCKUP_DETECTOR_ARCH + select HAVE_HARDLOCKUP_DETECTOR_PERF if PERF_EVENTS && HAVE_PERF_EVENTS_NMI select HAVE_HW_BREAKPOINT if PERF_EVENTS && (PPC_BOOK3S || PPC_8xx) select HAVE_IOREMAP_PROT select HAVE_IRQ_TIME_ACCOUNTING diff --git a/arch/powerpc/include/asm/atomic.h b/arch/powerpc/include/asm/atomic.h index 47228b177478..5bf6a4d49268 100644 --- a/arch/powerpc/include/asm/atomic.h +++ b/arch/powerpc/include/asm/atomic.h @@ -126,18 +126,6 @@ ATOMIC_OPS(xor, xor, "", K) #undef ATOMIC_OP_RETURN_RELAXED #undef ATOMIC_OP -#define arch_atomic_cmpxchg(v, o, n) \ - (arch_cmpxchg(&((v)->counter), (o), (n))) -#define arch_atomic_cmpxchg_relaxed(v, o, n) \ - arch_cmpxchg_relaxed(&((v)->counter), (o), (n)) -#define arch_atomic_cmpxchg_acquire(v, o, n) \ - arch_cmpxchg_acquire(&((v)->counter), (o), (n)) - -#define arch_atomic_xchg(v, new) \ - (arch_xchg(&((v)->counter), new)) -#define arch_atomic_xchg_relaxed(v, new) \ - arch_xchg_relaxed(&((v)->counter), (new)) - /** * atomic_fetch_add_unless - add unless the number is a given value * @v: pointer of type atomic_t @@ -396,18 +384,6 @@ static __inline__ s64 arch_atomic64_dec_if_positive(atomic64_t *v) } #define arch_atomic64_dec_if_positive arch_atomic64_dec_if_positive -#define arch_atomic64_cmpxchg(v, o, n) \ - (arch_cmpxchg(&((v)->counter), (o), (n))) -#define arch_atomic64_cmpxchg_relaxed(v, o, n) \ - arch_cmpxchg_relaxed(&((v)->counter), (o), (n)) -#define arch_atomic64_cmpxchg_acquire(v, o, n) \ - arch_cmpxchg_acquire(&((v)->counter), (o), (n)) - -#define arch_atomic64_xchg(v, new) \ - (arch_xchg(&((v)->counter), new)) -#define arch_atomic64_xchg_relaxed(v, new) \ - arch_xchg_relaxed(&((v)->counter), (new)) - /** * atomic64_fetch_add_unless - add unless the number is a given value * @v: pointer of type atomic64_t diff --git a/arch/powerpc/include/asm/bugs.h b/arch/powerpc/include/asm/bugs.h deleted file mode 100644 index 01b8f6ca4dbb..000000000000 --- a/arch/powerpc/include/asm/bugs.h +++ /dev/null @@ -1,15 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -#ifndef _ASM_POWERPC_BUGS_H -#define _ASM_POWERPC_BUGS_H - -/* - */ - -/* - * This file is included by 'init/main.c' to check for - * architecture-dependent bugs. - */ - -static inline void check_bugs(void) { } - -#endif /* _ASM_POWERPC_BUGS_H */ diff --git a/arch/powerpc/include/asm/cache.h b/arch/powerpc/include/asm/cache.h index ae0a68a838e8..69232231d270 100644 --- a/arch/powerpc/include/asm/cache.h +++ b/arch/powerpc/include/asm/cache.h @@ -33,6 +33,10 @@ #define IFETCH_ALIGN_BYTES (1 << IFETCH_ALIGN_SHIFT) +#ifdef CONFIG_NOT_COHERENT_CACHE +#define ARCH_DMA_MINALIGN L1_CACHE_BYTES +#endif + #if !defined(__ASSEMBLY__) #ifdef CONFIG_PPC64 diff --git a/arch/powerpc/include/asm/irq.h b/arch/powerpc/include/asm/irq.h index deadd2149426..f257cacb49a9 100644 --- a/arch/powerpc/include/asm/irq.h +++ b/arch/powerpc/include/asm/irq.h @@ -50,9 +50,14 @@ extern void *hardirq_ctx[NR_CPUS]; extern void *softirq_ctx[NR_CPUS]; void __do_IRQ(struct pt_regs *regs); -extern void __init init_IRQ(void); int irq_choose_cpu(const struct cpumask *mask); +#if defined(CONFIG_PPC_BOOK3S_64) && defined(CONFIG_NMI_IPI) +extern void arch_trigger_cpumask_backtrace(const cpumask_t *mask, + bool exclude_self); +#define arch_trigger_cpumask_backtrace arch_trigger_cpumask_backtrace +#endif + #endif /* _ASM_IRQ_H */ #endif /* __KERNEL__ */ diff --git a/arch/powerpc/include/asm/nmi.h b/arch/powerpc/include/asm/nmi.h index c3c7adef74de..49a75340c3e0 100644 --- a/arch/powerpc/include/asm/nmi.h +++ b/arch/powerpc/include/asm/nmi.h @@ -3,18 +3,10 @@ #define _ASM_NMI_H #ifdef CONFIG_PPC_WATCHDOG -extern void arch_touch_nmi_watchdog(void); long soft_nmi_interrupt(struct pt_regs *regs); -void watchdog_nmi_set_timeout_pct(u64 pct); +void watchdog_hardlockup_set_timeout_pct(u64 pct); #else -static inline void arch_touch_nmi_watchdog(void) {} -static inline void watchdog_nmi_set_timeout_pct(u64 pct) {} -#endif - -#ifdef CONFIG_NMI_IPI -extern void arch_trigger_cpumask_backtrace(const cpumask_t *mask, - bool exclude_self); -#define arch_trigger_cpumask_backtrace arch_trigger_cpumask_backtrace +static inline void watchdog_hardlockup_set_timeout_pct(u64 pct) {} #endif extern void hv_nmi_check_nonrecoverable(struct pt_regs *regs); diff --git a/arch/powerpc/include/asm/page_32.h b/arch/powerpc/include/asm/page_32.h index 56f217606327..b9ac9e3a771c 100644 --- a/arch/powerpc/include/asm/page_32.h +++ b/arch/powerpc/include/asm/page_32.h @@ -12,10 +12,6 @@ #define VM_DATA_DEFAULT_FLAGS VM_DATA_DEFAULT_FLAGS32 -#ifdef CONFIG_NOT_COHERENT_CACHE -#define ARCH_DMA_MINALIGN L1_CACHE_BYTES -#endif - #if defined(CONFIG_PPC_256K_PAGES) || \ (defined(CONFIG_PPC_8xx) && defined(CONFIG_PPC_16K_PAGES)) #define PTE_SHIFT (PAGE_SHIFT - PTE_T_LOG2 - 2) /* 1/4 of a page */ diff --git a/arch/powerpc/include/asm/pgtable.h b/arch/powerpc/include/asm/pgtable.h index 9972626ddaf6..6a88bfdaa69b 100644 --- a/arch/powerpc/include/asm/pgtable.h +++ b/arch/powerpc/include/asm/pgtable.h @@ -165,9 +165,6 @@ static inline bool is_ioremap_addr(const void *x) return addr >= IOREMAP_BASE && addr < IOREMAP_END; } - -struct seq_file; -void arch_report_meminfo(struct seq_file *m); #endif /* CONFIG_PPC64 */ #endif /* __ASSEMBLY__ */ diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index 265801a3e94c..e95660e69414 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -417,9 +417,9 @@ noinstr static void nmi_ipi_lock_start(unsigned long *flags) { raw_local_irq_save(*flags); hard_irq_disable(); - while (arch_atomic_cmpxchg(&__nmi_ipi_lock, 0, 1) == 1) { + while (raw_atomic_cmpxchg(&__nmi_ipi_lock, 0, 1) == 1) { raw_local_irq_restore(*flags); - spin_until_cond(arch_atomic_read(&__nmi_ipi_lock) == 0); + spin_until_cond(raw_atomic_read(&__nmi_ipi_lock) == 0); raw_local_irq_save(*flags); hard_irq_disable(); } @@ -427,15 +427,15 @@ noinstr static void nmi_ipi_lock_start(unsigned long *flags) noinstr static void nmi_ipi_lock(void) { - while (arch_atomic_cmpxchg(&__nmi_ipi_lock, 0, 1) == 1) - spin_until_cond(arch_atomic_read(&__nmi_ipi_lock) == 0); + while (raw_atomic_cmpxchg(&__nmi_ipi_lock, 0, 1) == 1) + spin_until_cond(raw_atomic_read(&__nmi_ipi_lock) == 0); } noinstr static void nmi_ipi_unlock(void) { smp_mb(); - WARN_ON(arch_atomic_read(&__nmi_ipi_lock) != 1); - arch_atomic_set(&__nmi_ipi_lock, 0); + WARN_ON(raw_atomic_read(&__nmi_ipi_lock) != 1); + raw_atomic_set(&__nmi_ipi_lock, 0); } noinstr static void nmi_ipi_unlock_end(unsigned long *flags) @@ -1605,6 +1605,7 @@ static void add_cpu_to_masks(int cpu) } /* Activate a secondary processor. */ +__no_stack_protector void start_secondary(void *unused) { unsigned int cpu = raw_smp_processor_id(); diff --git a/arch/powerpc/kernel/syscalls/syscall.tbl b/arch/powerpc/kernel/syscalls/syscall.tbl index a0be127475b1..8c0b08b7a80e 100644 --- a/arch/powerpc/kernel/syscalls/syscall.tbl +++ b/arch/powerpc/kernel/syscalls/syscall.tbl @@ -537,3 +537,4 @@ 448 common process_mrelease sys_process_mrelease 449 common futex_waitv sys_futex_waitv 450 nospu set_mempolicy_home_node sys_set_mempolicy_home_node +451 common cachestat sys_cachestat diff --git a/arch/powerpc/kernel/tau_6xx.c b/arch/powerpc/kernel/tau_6xx.c index 828d0f4106d2..cba6dd15de3b 100644 --- a/arch/powerpc/kernel/tau_6xx.c +++ b/arch/powerpc/kernel/tau_6xx.c @@ -200,7 +200,7 @@ static int __init TAU_init(void) tau_int_enable = IS_ENABLED(CONFIG_TAU_INT) && !strcmp(cur_cpu_spec->platform, "ppc750"); - tau_workq = alloc_workqueue("tau", WQ_UNBOUND, 1); + tau_workq = alloc_ordered_workqueue("tau", 0); if (!tau_workq) return -ENOMEM; diff --git a/arch/powerpc/kernel/watchdog.c b/arch/powerpc/kernel/watchdog.c index dbcc4a793f0b..edb2dd1f53eb 100644 --- a/arch/powerpc/kernel/watchdog.c +++ b/arch/powerpc/kernel/watchdog.c @@ -438,7 +438,7 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer) { int cpu = smp_processor_id(); - if (!(watchdog_enabled & NMI_WATCHDOG_ENABLED)) + if (!(watchdog_enabled & WATCHDOG_HARDLOCKUP_ENABLED)) return HRTIMER_NORESTART; if (!cpumask_test_cpu(cpu, &watchdog_cpumask)) @@ -479,7 +479,7 @@ static void start_watchdog(void *arg) return; } - if (!(watchdog_enabled & NMI_WATCHDOG_ENABLED)) + if (!(watchdog_enabled & WATCHDOG_HARDLOCKUP_ENABLED)) return; if (!cpumask_test_cpu(cpu, &watchdog_cpumask)) @@ -546,7 +546,7 @@ static void watchdog_calc_timeouts(void) wd_timer_period_ms = watchdog_thresh * 1000 * 2 / 5; } -void watchdog_nmi_stop(void) +void watchdog_hardlockup_stop(void) { int cpu; @@ -554,7 +554,7 @@ void watchdog_nmi_stop(void) stop_watchdog_on_cpu(cpu); } -void watchdog_nmi_start(void) +void watchdog_hardlockup_start(void) { int cpu; @@ -566,7 +566,7 @@ void watchdog_nmi_start(void) /* * Invoked from core watchdog init. */ -int __init watchdog_nmi_probe(void) +int __init watchdog_hardlockup_probe(void) { int err; @@ -582,7 +582,7 @@ int __init watchdog_nmi_probe(void) } #ifdef CONFIG_PPC_PSERIES -void watchdog_nmi_set_timeout_pct(u64 pct) +void watchdog_hardlockup_set_timeout_pct(u64 pct) { pr_info("Set the NMI watchdog timeout factor to %llu%%\n", pct); WRITE_ONCE(wd_timeout_pct, pct); diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c index 461307b89c3a..572707858d65 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_radix.c +++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c @@ -509,7 +509,7 @@ static void kvmppc_unmap_free_pmd(struct kvm *kvm, pmd_t *pmd, bool full, } else { pte_t *pte; - pte = pte_offset_map(p, 0); + pte = pte_offset_kernel(p, 0); kvmppc_unmap_free_pte(kvm, pte, full, lpid); pmd_clear(p); } diff --git a/arch/powerpc/mm/book3s64/hash_tlb.c b/arch/powerpc/mm/book3s64/hash_tlb.c index a64ea0a7ef96..21fcad97ae80 100644 --- a/arch/powerpc/mm/book3s64/hash_tlb.c +++ b/arch/powerpc/mm/book3s64/hash_tlb.c @@ -239,12 +239,16 @@ void flush_hash_table_pmd_range(struct mm_struct *mm, pmd_t *pmd, unsigned long local_irq_save(flags); arch_enter_lazy_mmu_mode(); start_pte = pte_offset_map(pmd, addr); + if (!start_pte) + goto out; for (pte = start_pte; pte < start_pte + PTRS_PER_PTE; pte++) { unsigned long pteval = pte_val(*pte); if (pteval & H_PAGE_HASHPTE) hpte_need_flush(mm, addr, pte, pteval, 0); addr += PAGE_SIZE; } + pte_unmap(start_pte); +out: arch_leave_lazy_mmu_mode(); local_irq_restore(flags); } diff --git a/arch/powerpc/mm/book3s64/iommu_api.c b/arch/powerpc/mm/book3s64/iommu_api.c index 81d7185e2ae8..d19fb1f3007d 100644 --- a/arch/powerpc/mm/book3s64/iommu_api.c +++ b/arch/powerpc/mm/book3s64/iommu_api.c @@ -105,7 +105,7 @@ static long mm_iommu_do_alloc(struct mm_struct *mm, unsigned long ua, ret = pin_user_pages(ua + (entry << PAGE_SHIFT), n, FOLL_WRITE | FOLL_LONGTERM, - mem->hpages + entry, NULL); + mem->hpages + entry); if (ret == n) { pinned += n; continue; diff --git a/arch/powerpc/mm/book3s64/radix_tlb.c b/arch/powerpc/mm/book3s64/radix_tlb.c index ce804b7bf84e..0bd4866d9824 100644 --- a/arch/powerpc/mm/book3s64/radix_tlb.c +++ b/arch/powerpc/mm/book3s64/radix_tlb.c @@ -795,12 +795,20 @@ void exit_lazy_flush_tlb(struct mm_struct *mm, bool always_flush) goto out; if (current->active_mm == mm) { + unsigned long flags; + WARN_ON_ONCE(current->mm != NULL); - /* Is a kernel thread and is using mm as the lazy tlb */ + /* + * It is a kernel thread and is using mm as the lazy tlb, so + * switch it to init_mm. This is not always called from IPI + * (e.g., flush_type_needed), so must disable irqs. + */ + local_irq_save(flags); mmgrab_lazy_tlb(&init_mm); current->active_mm = &init_mm; switch_mm_irqs_off(mm, &init_mm, current); mmdrop_lazy_tlb(mm); + local_irq_restore(flags); } /* diff --git a/arch/powerpc/mm/book3s64/subpage_prot.c b/arch/powerpc/mm/book3s64/subpage_prot.c index b75a9fb99599..0dc85556dec5 100644 --- a/arch/powerpc/mm/book3s64/subpage_prot.c +++ b/arch/powerpc/mm/book3s64/subpage_prot.c @@ -71,6 +71,8 @@ static void hpte_flush_range(struct mm_struct *mm, unsigned long addr, if (pmd_none(*pmd)) return; pte = pte_offset_map_lock(mm, pmd, addr, &ptl); + if (!pte) + return; arch_enter_lazy_mmu_mode(); for (; npages > 0; --npages) { pte_update(mm, addr, pte, 0, 0, 0); diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c index b900933507da..f7c683b672c1 100644 --- a/arch/powerpc/mm/hugetlbpage.c +++ b/arch/powerpc/mm/hugetlbpage.c @@ -183,7 +183,7 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, struct vm_area_struct *vma, return NULL; if (IS_ENABLED(CONFIG_PPC_8xx) && pshift < PMD_SHIFT) - return pte_alloc_map(mm, (pmd_t *)hpdp, addr); + return pte_alloc_huge(mm, (pmd_t *)hpdp, addr); BUG_ON(!hugepd_none(*hpdp) && !hugepd_ok(*hpdp)); diff --git a/arch/powerpc/platforms/powermac/setup.c b/arch/powerpc/platforms/powermac/setup.c index 193cc9c39422..0c41f4b005bc 100644 --- a/arch/powerpc/platforms/powermac/setup.c +++ b/arch/powerpc/platforms/powermac/setup.c @@ -76,7 +76,8 @@ int pmac_newworld; static int current_root_goodness = -1; -#define DEFAULT_ROOT_DEVICE Root_SDA1 /* sda1 - slightly silly choice */ +/* sda1 - slightly silly choice */ +#define DEFAULT_ROOT_DEVICE MKDEV(SCSI_DISK0_MAJOR, 1) sys_ctrler_t sys_ctrler = SYS_CTRLER_UNKNOWN; EXPORT_SYMBOL(sys_ctrler); diff --git a/arch/powerpc/platforms/pseries/dlpar.c b/arch/powerpc/platforms/pseries/dlpar.c index 719c97a155ed..47f8eabd1bee 100644 --- a/arch/powerpc/platforms/pseries/dlpar.c +++ b/arch/powerpc/platforms/pseries/dlpar.c @@ -564,8 +564,7 @@ int __init dlpar_workqueue_init(void) if (pseries_hp_wq) return 0; - pseries_hp_wq = alloc_workqueue("pseries hotplug workqueue", - WQ_UNBOUND, 1); + pseries_hp_wq = alloc_ordered_workqueue("pseries hotplug workqueue", 0); return pseries_hp_wq ? 0 : -ENOMEM; } diff --git a/arch/powerpc/platforms/pseries/mobility.c b/arch/powerpc/platforms/pseries/mobility.c index 6f30113b5468..cd632ba9ebff 100644 --- a/arch/powerpc/platforms/pseries/mobility.c +++ b/arch/powerpc/platforms/pseries/mobility.c @@ -750,7 +750,7 @@ static int pseries_migrate_partition(u64 handle) goto out; if (factor) - watchdog_nmi_set_timeout_pct(factor); + watchdog_hardlockup_set_timeout_pct(factor); ret = pseries_suspend(handle); if (ret == 0) { @@ -766,7 +766,7 @@ static int pseries_migrate_partition(u64 handle) pseries_cancel_migration(handle, ret); if (factor) - watchdog_nmi_set_timeout_pct(0); + watchdog_hardlockup_set_timeout_pct(0); out: vas_migration_handler(VAS_RESUME); diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c index 70c4c59a1a8f..fae747cc57d2 100644 --- a/arch/powerpc/xmon/xmon.c +++ b/arch/powerpc/xmon/xmon.c @@ -3376,12 +3376,15 @@ static void show_pte(unsigned long addr) printf("pmdp @ 0x%px = 0x%016lx\n", pmdp, pmd_val(*pmdp)); ptep = pte_offset_map(pmdp, addr); - if (pte_none(*ptep)) { + if (!ptep || pte_none(*ptep)) { + if (ptep) + pte_unmap(ptep); printf("no valid PTE\n"); return; } format_pte(ptep, pte_val(*ptep)); + pte_unmap(ptep); sync(); __delay(200); diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index a11b1c038c6d..a08917f681af 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -123,6 +123,7 @@ config RISCV select HAVE_RSEQ select HAVE_STACKPROTECTOR select HAVE_SYSCALL_TRACEPOINTS + select HOTPLUG_CORE_SYNC_DEAD if HOTPLUG_CPU select IRQ_DOMAIN select IRQ_FORCED_THREADING select KASAN_VMALLOC if KASAN diff --git a/arch/riscv/include/asm/atomic.h b/arch/riscv/include/asm/atomic.h index bba472928b53..f5dfef6c2153 100644 --- a/arch/riscv/include/asm/atomic.h +++ b/arch/riscv/include/asm/atomic.h @@ -238,78 +238,6 @@ static __always_inline s64 arch_atomic64_fetch_add_unless(atomic64_t *v, s64 a, #define arch_atomic64_fetch_add_unless arch_atomic64_fetch_add_unless #endif -/* - * atomic_{cmp,}xchg is required to have exactly the same ordering semantics as - * {cmp,}xchg and the operations that return, so they need a full barrier. - */ -#define ATOMIC_OP(c_t, prefix, size) \ -static __always_inline \ -c_t arch_atomic##prefix##_xchg_relaxed(atomic##prefix##_t *v, c_t n) \ -{ \ - return __xchg_relaxed(&(v->counter), n, size); \ -} \ -static __always_inline \ -c_t arch_atomic##prefix##_xchg_acquire(atomic##prefix##_t *v, c_t n) \ -{ \ - return __xchg_acquire(&(v->counter), n, size); \ -} \ -static __always_inline \ -c_t arch_atomic##prefix##_xchg_release(atomic##prefix##_t *v, c_t n) \ -{ \ - return __xchg_release(&(v->counter), n, size); \ -} \ -static __always_inline \ -c_t arch_atomic##prefix##_xchg(atomic##prefix##_t *v, c_t n) \ -{ \ - return __arch_xchg(&(v->counter), n, size); \ -} \ -static __always_inline \ -c_t arch_atomic##prefix##_cmpxchg_relaxed(atomic##prefix##_t *v, \ - c_t o, c_t n) \ -{ \ - return __cmpxchg_relaxed(&(v->counter), o, n, size); \ -} \ -static __always_inline \ -c_t arch_atomic##prefix##_cmpxchg_acquire(atomic##prefix##_t *v, \ - c_t o, c_t n) \ -{ \ - return __cmpxchg_acquire(&(v->counter), o, n, size); \ -} \ -static __always_inline \ -c_t arch_atomic##prefix##_cmpxchg_release(atomic##prefix##_t *v, \ - c_t o, c_t n) \ -{ \ - return __cmpxchg_release(&(v->counter), o, n, size); \ -} \ -static __always_inline \ -c_t arch_atomic##prefix##_cmpxchg(atomic##prefix##_t *v, c_t o, c_t n) \ -{ \ - return __cmpxchg(&(v->counter), o, n, size); \ -} - -#ifdef CONFIG_GENERIC_ATOMIC64 -#define ATOMIC_OPS() \ - ATOMIC_OP(int, , 4) -#else -#define ATOMIC_OPS() \ - ATOMIC_OP(int, , 4) \ - ATOMIC_OP(s64, 64, 8) -#endif - -ATOMIC_OPS() - -#define arch_atomic_xchg_relaxed arch_atomic_xchg_relaxed -#define arch_atomic_xchg_acquire arch_atomic_xchg_acquire -#define arch_atomic_xchg_release arch_atomic_xchg_release -#define arch_atomic_xchg arch_atomic_xchg -#define arch_atomic_cmpxchg_relaxed arch_atomic_cmpxchg_relaxed -#define arch_atomic_cmpxchg_acquire arch_atomic_cmpxchg_acquire -#define arch_atomic_cmpxchg_release arch_atomic_cmpxchg_release -#define arch_atomic_cmpxchg arch_atomic_cmpxchg - -#undef ATOMIC_OPS -#undef ATOMIC_OP - static __always_inline bool arch_atomic_inc_unless_negative(atomic_t *v) { int prev, rc; diff --git a/arch/riscv/include/asm/irq.h b/arch/riscv/include/asm/irq.h index 43b9ebfbd943..8e10a94430a2 100644 --- a/arch/riscv/include/asm/irq.h +++ b/arch/riscv/include/asm/irq.h @@ -16,6 +16,4 @@ void riscv_set_intc_hwnode_fn(struct fwnode_handle *(*fn)(void)); struct fwnode_handle *riscv_get_intc_hwnode(void); -extern void __init init_IRQ(void); - #endif /* _ASM_RISCV_IRQ_H */ diff --git a/arch/riscv/include/asm/smp.h b/arch/riscv/include/asm/smp.h index c4b77017ec58..0d555847cde6 100644 --- a/arch/riscv/include/asm/smp.h +++ b/arch/riscv/include/asm/smp.h @@ -70,7 +70,7 @@ asmlinkage void smp_callin(void); #if defined CONFIG_HOTPLUG_CPU int __cpu_disable(void); -void __cpu_die(unsigned int cpu); +static inline void __cpu_die(unsigned int cpu) { } #endif /* CONFIG_HOTPLUG_CPU */ #else diff --git a/arch/riscv/include/asm/timex.h b/arch/riscv/include/asm/timex.h index d6a7428f6248..a06697846e69 100644 --- a/arch/riscv/include/asm/timex.h +++ b/arch/riscv/include/asm/timex.h @@ -88,6 +88,4 @@ static inline int read_current_timer(unsigned long *timer_val) return 0; } -extern void time_init(void); - #endif /* _ASM_RISCV_TIMEX_H */ diff --git a/arch/riscv/kernel/cpu-hotplug.c b/arch/riscv/kernel/cpu-hotplug.c index a941adc7cbf2..457a18efcb11 100644 --- a/arch/riscv/kernel/cpu-hotplug.c +++ b/arch/riscv/kernel/cpu-hotplug.c @@ -8,6 +8,7 @@ #include <linux/sched.h> #include <linux/err.h> #include <linux/irq.h> +#include <linux/cpuhotplug.h> #include <linux/cpu.h> #include <linux/sched/hotplug.h> #include <asm/irq.h> @@ -49,17 +50,15 @@ int __cpu_disable(void) return ret; } +#ifdef CONFIG_HOTPLUG_CPU /* - * Called on the thread which is asking for a CPU to be shutdown. + * Called on the thread which is asking for a CPU to be shutdown, if the + * CPU reported dead to the hotplug core. */ -void __cpu_die(unsigned int cpu) +void arch_cpuhp_cleanup_dead_cpu(unsigned int cpu) { int ret = 0; - if (!cpu_wait_death(cpu, 5)) { - pr_err("CPU %u: didn't die\n", cpu); - return; - } pr_notice("CPU%u: off\n", cpu); /* Verify from the firmware if the cpu is really stopped*/ @@ -76,9 +75,10 @@ void __noreturn arch_cpu_idle_dead(void) { idle_task_exit(); - (void)cpu_report_death(); + cpuhp_ap_report_dead(); cpu_ops[smp_processor_id()]->cpu_stop(); /* It should never reach here */ BUG(); } +#endif diff --git a/arch/riscv/mm/hugetlbpage.c b/arch/riscv/mm/hugetlbpage.c index e0ef56dc57b9..542883b3b49b 100644 --- a/arch/riscv/mm/hugetlbpage.c +++ b/arch/riscv/mm/hugetlbpage.c @@ -67,7 +67,7 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, for_each_napot_order(order) { if (napot_cont_size(order) == sz) { - pte = pte_alloc_map(mm, pmd, addr & napot_cont_mask(order)); + pte = pte_alloc_huge(mm, pmd, addr & napot_cont_mask(order)); break; } } @@ -114,7 +114,7 @@ pte_t *huge_pte_offset(struct mm_struct *mm, for_each_napot_order(order) { if (napot_cont_size(order) == sz) { - pte = pte_offset_kernel(pmd, addr & napot_cont_mask(order)); + pte = pte_offset_huge(pmd, addr & napot_cont_mask(order)); break; } } diff --git a/arch/riscv/purgatory/Makefile b/arch/riscv/purgatory/Makefile index bd2e27f82532..dc20e166983e 100644 --- a/arch/riscv/purgatory/Makefile +++ b/arch/riscv/purgatory/Makefile @@ -31,7 +31,7 @@ $(obj)/strncmp.o: $(srctree)/arch/riscv/lib/strncmp.S FORCE $(obj)/sha256.o: $(srctree)/lib/crypto/sha256.c FORCE $(call if_changed_rule,cc_o_c) -CFLAGS_sha256.o := -D__DISABLE_EXPORTS +CFLAGS_sha256.o := -D__DISABLE_EXPORTS -D__NO_FORTIFY CFLAGS_string.o := -D__DISABLE_EXPORTS CFLAGS_ctype.o := -D__DISABLE_EXPORTS diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 6dab9c1be508..5b39918b7042 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -117,6 +117,7 @@ config S390 select ARCH_SUPPORTS_ATOMIC_RMW select ARCH_SUPPORTS_DEBUG_PAGEALLOC select ARCH_SUPPORTS_HUGETLBFS + select ARCH_SUPPORTS_INT128 if CC_HAS_INT128 && CC_IS_CLANG select ARCH_SUPPORTS_NUMA_BALANCING select ARCH_SUPPORTS_PER_VMA_LOCK select ARCH_USE_BUILTIN_BSWAP diff --git a/arch/s390/boot/vmem.c b/arch/s390/boot/vmem.c index acb1f8b53105..c67f59db7a51 100644 --- a/arch/s390/boot/vmem.c +++ b/arch/s390/boot/vmem.c @@ -45,6 +45,13 @@ static void pgtable_populate(unsigned long addr, unsigned long end, enum populat static pte_t pte_z; +static inline void kasan_populate(unsigned long start, unsigned long end, enum populate_mode mode) +{ + start = PAGE_ALIGN_DOWN(__sha(start)); + end = PAGE_ALIGN(__sha(end)); + pgtable_populate(start, end, mode); +} + static void kasan_populate_shadow(void) { pmd_t pmd_z = __pmd(__pa(kasan_early_shadow_pte) | _SEGMENT_ENTRY); @@ -95,17 +102,17 @@ static void kasan_populate_shadow(void) */ for_each_physmem_usable_range(i, &start, &end) - pgtable_populate(__sha(start), __sha(end), POPULATE_KASAN_MAP_SHADOW); + kasan_populate(start, end, POPULATE_KASAN_MAP_SHADOW); if (IS_ENABLED(CONFIG_KASAN_VMALLOC)) { untracked_end = VMALLOC_START; /* shallowly populate kasan shadow for vmalloc and modules */ - pgtable_populate(__sha(VMALLOC_START), __sha(MODULES_END), POPULATE_KASAN_SHALLOW); + kasan_populate(VMALLOC_START, MODULES_END, POPULATE_KASAN_SHALLOW); } else { untracked_end = MODULES_VADDR; } /* populate kasan shadow for untracked memory */ - pgtable_populate(__sha(ident_map_size), __sha(untracked_end), POPULATE_KASAN_ZERO_SHADOW); - pgtable_populate(__sha(MODULES_END), __sha(_REGION1_SIZE), POPULATE_KASAN_ZERO_SHADOW); + kasan_populate(ident_map_size, untracked_end, POPULATE_KASAN_ZERO_SHADOW); + kasan_populate(MODULES_END, _REGION1_SIZE, POPULATE_KASAN_ZERO_SHADOW); } static bool kasan_pgd_populate_zero_shadow(pgd_t *pgd, unsigned long addr, diff --git a/arch/s390/configs/debug_defconfig b/arch/s390/configs/debug_defconfig index be3bf03bf361..aa95cf6dfabb 100644 --- a/arch/s390/configs/debug_defconfig +++ b/arch/s390/configs/debug_defconfig @@ -116,6 +116,7 @@ CONFIG_UNIX=y CONFIG_UNIX_DIAG=m CONFIG_XFRM_USER=m CONFIG_NET_KEY=m +CONFIG_NET_TC_SKB_EXT=y CONFIG_SMC=m CONFIG_SMC_DIAG=m CONFIG_INET=y diff --git a/arch/s390/configs/defconfig b/arch/s390/configs/defconfig index 769c7eed8b6a..f041945f9148 100644 --- a/arch/s390/configs/defconfig +++ b/arch/s390/configs/defconfig @@ -107,6 +107,7 @@ CONFIG_UNIX=y CONFIG_UNIX_DIAG=m CONFIG_XFRM_USER=m CONFIG_NET_KEY=m +CONFIG_NET_TC_SKB_EXT=y CONFIG_SMC=m CONFIG_SMC_DIAG=m CONFIG_INET=y diff --git a/arch/s390/crypto/paes_s390.c b/arch/s390/crypto/paes_s390.c index 29dc827e0fe8..d29a9d908797 100644 --- a/arch/s390/crypto/paes_s390.c +++ b/arch/s390/crypto/paes_s390.c @@ -5,7 +5,7 @@ * s390 implementation of the AES Cipher Algorithm with protected keys. * * s390 Version: - * Copyright IBM Corp. 2017,2020 + * Copyright IBM Corp. 2017, 2023 * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com> * Harald Freudenberger <freude@de.ibm.com> */ @@ -132,7 +132,8 @@ static inline int __paes_keyblob2pkey(struct key_blob *kb, if (i > 0 && ret == -EAGAIN && in_task()) if (msleep_interruptible(1000)) return -EINTR; - ret = pkey_keyblob2pkey(kb->key, kb->keylen, pk); + ret = pkey_keyblob2pkey(kb->key, kb->keylen, + pk->protkey, &pk->len, &pk->type); if (ret == 0) break; } @@ -145,6 +146,7 @@ static inline int __paes_convert_key(struct s390_paes_ctx *ctx) int ret; struct pkey_protkey pkey; + pkey.len = sizeof(pkey.protkey); ret = __paes_keyblob2pkey(&ctx->kb, &pkey); if (ret) return ret; @@ -414,6 +416,9 @@ static inline int __xts_paes_convert_key(struct s390_pxts_ctx *ctx) { struct pkey_protkey pkey0, pkey1; + pkey0.len = sizeof(pkey0.protkey); + pkey1.len = sizeof(pkey1.protkey); + if (__paes_keyblob2pkey(&ctx->kb[0], &pkey0) || __paes_keyblob2pkey(&ctx->kb[1], &pkey1)) return -EINVAL; diff --git a/arch/s390/include/asm/asm-prototypes.h b/arch/s390/include/asm/asm-prototypes.h index c37eb921bfbf..a873e873e1ee 100644 --- a/arch/s390/include/asm/asm-prototypes.h +++ b/arch/s390/include/asm/asm-prototypes.h @@ -6,4 +6,8 @@ #include <asm/fpu/api.h> #include <asm-generic/asm-prototypes.h> +__int128_t __ashlti3(__int128_t a, int b); +__int128_t __ashrti3(__int128_t a, int b); +__int128_t __lshrti3(__int128_t a, int b); + #endif /* _ASM_S390_PROTOTYPES_H */ diff --git a/arch/s390/include/asm/cmpxchg.h b/arch/s390/include/asm/cmpxchg.h index 06e0e42f4eec..aae0315374de 100644 --- a/arch/s390/include/asm/cmpxchg.h +++ b/arch/s390/include/asm/cmpxchg.h @@ -190,38 +190,18 @@ static __always_inline unsigned long __cmpxchg(unsigned long address, #define arch_cmpxchg_local arch_cmpxchg #define arch_cmpxchg64_local arch_cmpxchg -#define system_has_cmpxchg_double() 1 +#define system_has_cmpxchg128() 1 -static __always_inline int __cmpxchg_double(unsigned long p1, unsigned long p2, - unsigned long o1, unsigned long o2, - unsigned long n1, unsigned long n2) +static __always_inline u128 arch_cmpxchg128(volatile u128 *ptr, u128 old, u128 new) { - union register_pair old = { .even = o1, .odd = o2, }; - union register_pair new = { .even = n1, .odd = n2, }; - int cc; - asm volatile( " cdsg %[old],%[new],%[ptr]\n" - " ipm %[cc]\n" - " srl %[cc],28\n" - : [cc] "=&d" (cc), [old] "+&d" (old.pair) - : [new] "d" (new.pair), - [ptr] "QS" (*(unsigned long *)p1), "Q" (*(unsigned long *)p2) + : [old] "+d" (old), [ptr] "+QS" (*ptr) + : [new] "d" (new) : "memory", "cc"); - return !cc; + return old; } -#define arch_cmpxchg_double(p1, p2, o1, o2, n1, n2) \ -({ \ - typeof(p1) __p1 = (p1); \ - typeof(p2) __p2 = (p2); \ - \ - BUILD_BUG_ON(sizeof(*(p1)) != sizeof(long)); \ - BUILD_BUG_ON(sizeof(*(p2)) != sizeof(long)); \ - VM_BUG_ON((unsigned long)((__p1) + 1) != (unsigned long)(__p2));\ - __cmpxchg_double((unsigned long)__p1, (unsigned long)__p2, \ - (unsigned long)(o1), (unsigned long)(o2), \ - (unsigned long)(n1), (unsigned long)(n2)); \ -}) +#define arch_cmpxchg128 arch_cmpxchg128 #endif /* __ASM_CMPXCHG_H */ diff --git a/arch/s390/include/asm/cpacf.h b/arch/s390/include/asm/cpacf.h index 646b12981f20..b378e2b57ad8 100644 --- a/arch/s390/include/asm/cpacf.h +++ b/arch/s390/include/asm/cpacf.h @@ -2,7 +2,7 @@ /* * CP Assist for Cryptographic Functions (CPACF) * - * Copyright IBM Corp. 2003, 2017 + * Copyright IBM Corp. 2003, 2023 * Author(s): Thomas Spatzier * Jan Glauber * Harald Freudenberger (freude@de.ibm.com) @@ -132,6 +132,11 @@ #define CPACF_PCKMO_ENC_AES_128_KEY 0x12 #define CPACF_PCKMO_ENC_AES_192_KEY 0x13 #define CPACF_PCKMO_ENC_AES_256_KEY 0x14 +#define CPACF_PCKMO_ENC_ECC_P256_KEY 0x20 +#define CPACF_PCKMO_ENC_ECC_P384_KEY 0x21 +#define CPACF_PCKMO_ENC_ECC_P521_KEY 0x22 +#define CPACF_PCKMO_ENC_ECC_ED25519_KEY 0x28 +#define CPACF_PCKMO_ENC_ECC_ED448_KEY 0x29 /* * Function codes for the PRNO (PERFORM RANDOM NUMBER OPERATION) diff --git a/arch/s390/include/asm/cpu_mf.h b/arch/s390/include/asm/cpu_mf.h index 7e417d7de568..a0de5b9b02ea 100644 --- a/arch/s390/include/asm/cpu_mf.h +++ b/arch/s390/include/asm/cpu_mf.h @@ -140,7 +140,7 @@ union hws_trailer_header { unsigned int dsdes:16; /* 48-63: size of diagnostic SDE */ unsigned long long overflow; /* 64 - Overflow Count */ }; - __uint128_t val; + u128 val; }; struct hws_trailer_entry { diff --git a/arch/s390/include/asm/os_info.h b/arch/s390/include/asm/os_info.h index 0d1c74a7a650..a4d2e103f116 100644 --- a/arch/s390/include/asm/os_info.h +++ b/arch/s390/include/asm/os_info.h @@ -16,6 +16,9 @@ #define OS_INFO_VMCOREINFO 0 #define OS_INFO_REIPL_BLOCK 1 +#define OS_INFO_FLAGS_ENTRY 2 + +#define OS_INFO_FLAG_REIPL_CLEAR (1UL << 0) struct os_info_entry { u64 addr; @@ -30,8 +33,8 @@ struct os_info { u16 version_minor; u64 crashkernel_addr; u64 crashkernel_size; - struct os_info_entry entry[2]; - u8 reserved[4024]; + struct os_info_entry entry[3]; + u8 reserved[4004]; } __packed; void os_info_init(void); diff --git a/arch/s390/include/asm/percpu.h b/arch/s390/include/asm/percpu.h index 081837b391e3..264095dd84bc 100644 --- a/arch/s390/include/asm/percpu.h +++ b/arch/s390/include/asm/percpu.h @@ -148,6 +148,22 @@ #define this_cpu_cmpxchg_4(pcp, oval, nval) arch_this_cpu_cmpxchg(pcp, oval, nval) #define this_cpu_cmpxchg_8(pcp, oval, nval) arch_this_cpu_cmpxchg(pcp, oval, nval) +#define this_cpu_cmpxchg64(pcp, o, n) this_cpu_cmpxchg_8(pcp, o, n) + +#define this_cpu_cmpxchg128(pcp, oval, nval) \ +({ \ + typedef typeof(pcp) pcp_op_T__; \ + u128 old__, new__, ret__; \ + pcp_op_T__ *ptr__; \ + old__ = oval; \ + new__ = nval; \ + preempt_disable_notrace(); \ + ptr__ = raw_cpu_ptr(&(pcp)); \ + ret__ = cmpxchg128((void *)ptr__, old__, new__); \ + preempt_enable_notrace(); \ + ret__; \ +}) + #define arch_this_cpu_xchg(pcp, nval) \ ({ \ typeof(pcp) *ptr__; \ @@ -164,24 +180,6 @@ #define this_cpu_xchg_4(pcp, nval) arch_this_cpu_xchg(pcp, nval) #define this_cpu_xchg_8(pcp, nval) arch_this_cpu_xchg(pcp, nval) -#define arch_this_cpu_cmpxchg_double(pcp1, pcp2, o1, o2, n1, n2) \ -({ \ - typeof(pcp1) *p1__; \ - typeof(pcp2) *p2__; \ - int ret__; \ - \ - preempt_disable_notrace(); \ - p1__ = raw_cpu_ptr(&(pcp1)); \ - p2__ = raw_cpu_ptr(&(pcp2)); \ - ret__ = __cmpxchg_double((unsigned long)p1__, (unsigned long)p2__, \ - (unsigned long)(o1), (unsigned long)(o2), \ - (unsigned long)(n1), (unsigned long)(n2)); \ - preempt_enable_notrace(); \ - ret__; \ -}) - -#define this_cpu_cmpxchg_double_8 arch_this_cpu_cmpxchg_double - #include <asm-generic/percpu.h> #endif /* __ARCH_S390_PERCPU__ */ diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index 6822a11c2c8a..c55f3c3365af 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -42,9 +42,6 @@ static inline void update_page_count(int level, long count) atomic_long_add(count, &direct_pages_count[level]); } -struct seq_file; -void arch_report_meminfo(struct seq_file *m); - /* * The S390 doesn't have any external MMU info: the kernel page * tables contain all the necessary information. diff --git a/arch/s390/include/asm/physmem_info.h b/arch/s390/include/asm/physmem_info.h index 8e9c582592b3..9e41a74fce9a 100644 --- a/arch/s390/include/asm/physmem_info.h +++ b/arch/s390/include/asm/physmem_info.h @@ -3,6 +3,7 @@ #define _ASM_S390_MEM_DETECT_H #include <linux/types.h> +#include <asm/page.h> enum physmem_info_source { MEM_DETECT_NONE = 0, @@ -133,7 +134,7 @@ static inline const char *get_rr_type_name(enum reserved_range_type t) #define for_each_physmem_reserved_type_range(t, range, p_start, p_end) \ for (range = &physmem_info.reserved[t], *p_start = range->start, *p_end = range->end; \ - range && range->end; range = range->chain, \ + range && range->end; range = range->chain ? __va(range->chain) : NULL, \ *p_start = range ? range->start : 0, *p_end = range ? range->end : 0) static inline struct reserved_range *__physmem_reserved_next(enum reserved_range_type *t, @@ -145,7 +146,7 @@ static inline struct reserved_range *__physmem_reserved_next(enum reserved_range return range; } if (range->chain) - return range->chain; + return __va(range->chain); while (++*t < RR_MAX) { range = &physmem_info.reserved[*t]; if (range->end) diff --git a/arch/s390/include/asm/pkey.h b/arch/s390/include/asm/pkey.h index dd3d20c332ac..47d80a7451a6 100644 --- a/arch/s390/include/asm/pkey.h +++ b/arch/s390/include/asm/pkey.h @@ -2,7 +2,7 @@ /* * Kernelspace interface to the pkey device driver * - * Copyright IBM Corp. 2016,2019 + * Copyright IBM Corp. 2016, 2023 * * Author: Harald Freudenberger <freude@de.ibm.com> * @@ -23,6 +23,6 @@ * @return 0 on success, negative errno value on failure */ int pkey_keyblob2pkey(const u8 *key, u32 keylen, - struct pkey_protkey *protkey); + u8 *protkey, u32 *protkeylen, u32 *protkeytype); #endif /* _KAPI_PKEY_H */ diff --git a/arch/s390/include/asm/thread_info.h b/arch/s390/include/asm/thread_info.h index c7c97921ed8d..a674c7d25da5 100644 --- a/arch/s390/include/asm/thread_info.h +++ b/arch/s390/include/asm/thread_info.h @@ -52,9 +52,6 @@ struct thread_info { struct task_struct; -void arch_release_task_struct(struct task_struct *tsk); -int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src); - void arch_setup_new_exec(void); #define arch_setup_new_exec arch_setup_new_exec diff --git a/arch/s390/include/asm/timex.h b/arch/s390/include/asm/timex.h index ce878e85b6e4..4d646659a5f5 100644 --- a/arch/s390/include/asm/timex.h +++ b/arch/s390/include/asm/timex.h @@ -63,7 +63,7 @@ static inline int store_tod_clock_ext_cc(union tod_clock *clk) return cc; } -static inline void store_tod_clock_ext(union tod_clock *tod) +static __always_inline void store_tod_clock_ext(union tod_clock *tod) { asm volatile("stcke %0" : "=Q" (*tod) : : "cc"); } @@ -177,7 +177,7 @@ static inline void local_tick_enable(unsigned long comp) typedef unsigned long cycles_t; -static inline unsigned long get_tod_clock(void) +static __always_inline unsigned long get_tod_clock(void) { union tod_clock clk; @@ -204,6 +204,11 @@ void init_cpu_timer(void); extern union tod_clock tod_clock_base; +static __always_inline unsigned long __get_tod_clock_monotonic(void) +{ + return get_tod_clock() - tod_clock_base.tod; +} + /** * get_clock_monotonic - returns current time in clock rate units * @@ -216,7 +221,7 @@ static inline unsigned long get_tod_clock_monotonic(void) unsigned long tod; preempt_disable_notrace(); - tod = get_tod_clock() - tod_clock_base.tod; + tod = __get_tod_clock_monotonic(); preempt_enable_notrace(); return tod; } @@ -240,7 +245,7 @@ static inline unsigned long get_tod_clock_monotonic(void) * -> ns = (th * 125) + ((tl * 125) >> 9); * */ -static inline unsigned long tod_to_ns(unsigned long todval) +static __always_inline unsigned long tod_to_ns(unsigned long todval) { return ((todval >> 9) * 125) + (((todval & 0x1ff) * 125) >> 9); } diff --git a/arch/s390/include/uapi/asm/pkey.h b/arch/s390/include/uapi/asm/pkey.h index 924b876f992c..f7bae1c63bd6 100644 --- a/arch/s390/include/uapi/asm/pkey.h +++ b/arch/s390/include/uapi/asm/pkey.h @@ -2,7 +2,7 @@ /* * Userspace interface to the pkey device driver * - * Copyright IBM Corp. 2017, 2019 + * Copyright IBM Corp. 2017, 2023 * * Author: Harald Freudenberger <freude@de.ibm.com> * @@ -32,10 +32,15 @@ #define MINKEYBLOBSIZE SECKEYBLOBSIZE /* defines for the type field within the pkey_protkey struct */ -#define PKEY_KEYTYPE_AES_128 1 -#define PKEY_KEYTYPE_AES_192 2 -#define PKEY_KEYTYPE_AES_256 3 -#define PKEY_KEYTYPE_ECC 4 +#define PKEY_KEYTYPE_AES_128 1 +#define PKEY_KEYTYPE_AES_192 2 +#define PKEY_KEYTYPE_AES_256 3 +#define PKEY_KEYTYPE_ECC 4 +#define PKEY_KEYTYPE_ECC_P256 5 +#define PKEY_KEYTYPE_ECC_P384 6 +#define PKEY_KEYTYPE_ECC_P521 7 +#define PKEY_KEYTYPE_ECC_ED25519 8 +#define PKEY_KEYTYPE_ECC_ED448 9 /* the newer ioctls use a pkey_key_type enum for type information */ enum pkey_key_type { diff --git a/arch/s390/kernel/crash_dump.c b/arch/s390/kernel/crash_dump.c index 8a617be28bb4..7af69948b290 100644 --- a/arch/s390/kernel/crash_dump.c +++ b/arch/s390/kernel/crash_dump.c @@ -568,9 +568,9 @@ static size_t get_elfcorehdr_size(int mem_chunk_cnt) int elfcorehdr_alloc(unsigned long long *addr, unsigned long long *size) { Elf64_Phdr *phdr_notes, *phdr_loads; + size_t alloc_size; int mem_chunk_cnt; void *ptr, *hdr; - u32 alloc_size; u64 hdr_off; /* If we are not in kdump or zfcp/nvme dump mode return */ diff --git a/arch/s390/kernel/entry.h b/arch/s390/kernel/entry.h index 34674e38826b..9f41853f36b9 100644 --- a/arch/s390/kernel/entry.h +++ b/arch/s390/kernel/entry.h @@ -34,14 +34,12 @@ void kernel_stack_overflow(struct pt_regs * regs); void handle_signal32(struct ksignal *ksig, sigset_t *oldset, struct pt_regs *regs); -void __init init_IRQ(void); void do_io_irq(struct pt_regs *regs); void do_ext_irq(struct pt_regs *regs); void do_restart(void *arg); void __init startup_init(void); void die(struct pt_regs *regs, const char *str); int setup_profiling_timer(unsigned int multiplier); -void __init time_init(void); unsigned long prepare_ftrace_return(unsigned long parent, unsigned long sp, unsigned long ip); struct s390_mmap_arg_struct; diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c index f44f70de9661..85a00d97a314 100644 --- a/arch/s390/kernel/ipl.c +++ b/arch/s390/kernel/ipl.c @@ -176,6 +176,8 @@ static bool reipl_fcp_clear; static bool reipl_ccw_clear; static bool reipl_eckd_clear; +static unsigned long os_info_flags; + static inline int __diag308(unsigned long subcode, unsigned long addr) { union register_pair r1; @@ -1938,6 +1940,20 @@ static void dump_reipl_run(struct shutdown_trigger *trigger) struct lowcore *abs_lc; unsigned int csum; + /* + * Set REIPL_CLEAR flag in os_info flags entry indicating + * 'clear' sysfs attribute has been set on the panicked system + * for specified reipl type. + * Always set for IPL_TYPE_NSS and IPL_TYPE_UNKNOWN. + */ + if ((reipl_type == IPL_TYPE_CCW && reipl_ccw_clear) || + (reipl_type == IPL_TYPE_ECKD && reipl_eckd_clear) || + (reipl_type == IPL_TYPE_FCP && reipl_fcp_clear) || + (reipl_type == IPL_TYPE_NVME && reipl_nvme_clear) || + reipl_type == IPL_TYPE_NSS || + reipl_type == IPL_TYPE_UNKNOWN) + os_info_flags |= OS_INFO_FLAG_REIPL_CLEAR; + os_info_entry_add(OS_INFO_FLAGS_ENTRY, &os_info_flags, sizeof(os_info_flags)); csum = (__force unsigned int) csum_partial(reipl_block_actual, reipl_block_actual->hdr.len, 0); abs_lc = get_abs_lowcore(); diff --git a/arch/s390/kernel/module.c b/arch/s390/kernel/module.c index f1b35dcdf3eb..42215f9404af 100644 --- a/arch/s390/kernel/module.c +++ b/arch/s390/kernel/module.c @@ -352,7 +352,8 @@ static int apply_rela(Elf_Rela *rela, Elf_Addr base, Elf_Sym *symtab, rc = apply_rela_bits(loc, val, 0, 64, 0, write); else if (r_type == R_390_GOTENT || r_type == R_390_GOTPLTENT) { - val += (Elf_Addr) me->mem[MOD_TEXT].base - loc; + val += (Elf_Addr)me->mem[MOD_TEXT].base + + me->arch.got_offset - loc; rc = apply_rela_bits(loc, val, 1, 32, 1, write); } break; diff --git a/arch/s390/kernel/perf_cpum_cf.c b/arch/s390/kernel/perf_cpum_cf.c index cf1b6e8a708d..90679143534b 100644 --- a/arch/s390/kernel/perf_cpum_cf.c +++ b/arch/s390/kernel/perf_cpum_cf.c @@ -76,6 +76,7 @@ static inline int ctr_stcctm(enum cpumf_ctr_set set, u64 range, u64 *dest) } struct cpu_cf_events { + refcount_t refcnt; /* Reference count */ atomic_t ctr_set[CPUMF_CTR_SET_MAX]; u64 state; /* For perf_event_open SVC */ u64 dev_state; /* For /dev/hwctr */ @@ -88,9 +89,6 @@ struct cpu_cf_events { unsigned int sets; /* # Counter set saved in memory */ }; -/* Per-CPU event structure for the counter facility */ -static DEFINE_PER_CPU(struct cpu_cf_events, cpu_cf_events); - static unsigned int cfdiag_cpu_speed; /* CPU speed for CF_DIAG trailer */ static debug_info_t *cf_dbg; @@ -103,6 +101,221 @@ static debug_info_t *cf_dbg; */ static struct cpumf_ctr_info cpumf_ctr_info; +struct cpu_cf_ptr { + struct cpu_cf_events *cpucf; +}; + +static struct cpu_cf_root { /* Anchor to per CPU data */ + refcount_t refcnt; /* Overall active events */ + struct cpu_cf_ptr __percpu *cfptr; +} cpu_cf_root; + +/* + * Serialize event initialization and event removal. Both are called from + * user space in task context with perf_event_open() and close() + * system calls. + * + * This mutex serializes functions cpum_cf_alloc_cpu() called at event + * initialization via cpumf_pmu_event_init() and function cpum_cf_free_cpu() + * called at event removal via call back function hw_perf_event_destroy() + * when the event is deleted. They are serialized to enforce correct + * bookkeeping of pointer and reference counts anchored by + * struct cpu_cf_root and the access to cpu_cf_root::refcnt and the + * per CPU pointers stored in cpu_cf_root::cfptr. + */ +static DEFINE_MUTEX(pmc_reserve_mutex); + +/* + * Get pointer to per-cpu structure. + * + * Function get_cpu_cfhw() is called from + * - cfset_copy_all(): This function is protected by cpus_read_lock(), so + * CPU hot plug remove can not happen. Event removal requires a close() + * first. + * + * Function this_cpu_cfhw() is called from perf common code functions: + * - pmu_{en|dis}able(), pmu_{add|del}()and pmu_{start|stop}(): + * All functions execute with interrupts disabled on that particular CPU. + * - cfset_ioctl_{on|off}, cfset_cpu_read(): see comment cfset_copy_all(). + * + * Therefore it is safe to access the CPU specific pointer to the event. + */ +static struct cpu_cf_events *get_cpu_cfhw(int cpu) +{ + struct cpu_cf_ptr __percpu *p = cpu_cf_root.cfptr; + + if (p) { + struct cpu_cf_ptr *q = per_cpu_ptr(p, cpu); + + return q->cpucf; + } + return NULL; +} + +static struct cpu_cf_events *this_cpu_cfhw(void) +{ + return get_cpu_cfhw(smp_processor_id()); +} + +/* Disable counter sets on dedicated CPU */ +static void cpum_cf_reset_cpu(void *flags) +{ + lcctl(0); +} + +/* Free per CPU data when the last event is removed. */ +static void cpum_cf_free_root(void) +{ + if (!refcount_dec_and_test(&cpu_cf_root.refcnt)) + return; + free_percpu(cpu_cf_root.cfptr); + cpu_cf_root.cfptr = NULL; + irq_subclass_unregister(IRQ_SUBCLASS_MEASUREMENT_ALERT); + on_each_cpu(cpum_cf_reset_cpu, NULL, 1); + debug_sprintf_event(cf_dbg, 4, "%s2 root.refcnt %u cfptr %px\n", + __func__, refcount_read(&cpu_cf_root.refcnt), + cpu_cf_root.cfptr); +} + +/* + * On initialization of first event also allocate per CPU data dynamically. + * Start with an array of pointers, the array size is the maximum number of + * CPUs possible, which might be larger than the number of CPUs currently + * online. + */ +static int cpum_cf_alloc_root(void) +{ + int rc = 0; + + if (refcount_inc_not_zero(&cpu_cf_root.refcnt)) + return rc; + + /* The memory is already zeroed. */ + cpu_cf_root.cfptr = alloc_percpu(struct cpu_cf_ptr); + if (cpu_cf_root.cfptr) { + refcount_set(&cpu_cf_root.refcnt, 1); + on_each_cpu(cpum_cf_reset_cpu, NULL, 1); + irq_subclass_register(IRQ_SUBCLASS_MEASUREMENT_ALERT); + } else { + rc = -ENOMEM; + } + + return rc; +} + +/* Free CPU counter data structure for a PMU */ +static void cpum_cf_free_cpu(int cpu) +{ + struct cpu_cf_events *cpuhw; + struct cpu_cf_ptr *p; + + mutex_lock(&pmc_reserve_mutex); + /* + * When invoked via CPU hotplug handler, there might be no events + * installed or that particular CPU might not have an + * event installed. This anchor pointer can be NULL! + */ + if (!cpu_cf_root.cfptr) + goto out; + p = per_cpu_ptr(cpu_cf_root.cfptr, cpu); + cpuhw = p->cpucf; + /* + * Might be zero when called from CPU hotplug handler and no event + * installed on that CPU, but on different CPUs. + */ + if (!cpuhw) + goto out; + + if (refcount_dec_and_test(&cpuhw->refcnt)) { + kfree(cpuhw); + p->cpucf = NULL; + } + cpum_cf_free_root(); +out: + mutex_unlock(&pmc_reserve_mutex); +} + +/* Allocate CPU counter data structure for a PMU. Called under mutex lock. */ +static int cpum_cf_alloc_cpu(int cpu) +{ + struct cpu_cf_events *cpuhw; + struct cpu_cf_ptr *p; + int rc; + + mutex_lock(&pmc_reserve_mutex); + rc = cpum_cf_alloc_root(); + if (rc) + goto unlock; + p = per_cpu_ptr(cpu_cf_root.cfptr, cpu); + cpuhw = p->cpucf; + + if (!cpuhw) { + cpuhw = kzalloc(sizeof(*cpuhw), GFP_KERNEL); + if (cpuhw) { + p->cpucf = cpuhw; + refcount_set(&cpuhw->refcnt, 1); + } else { + rc = -ENOMEM; + } + } else { + refcount_inc(&cpuhw->refcnt); + } + if (rc) { + /* + * Error in allocation of event, decrement anchor. Since + * cpu_cf_event in not created, its destroy() function is not + * invoked. Adjust the reference counter for the anchor. + */ + cpum_cf_free_root(); + } +unlock: + mutex_unlock(&pmc_reserve_mutex); + return rc; +} + +/* + * Create/delete per CPU data structures for /dev/hwctr interface and events + * created by perf_event_open(). + * If cpu is -1, track task on all available CPUs. This requires + * allocation of hardware data structures for all CPUs. This setup handles + * perf_event_open() with task context and /dev/hwctr interface. + * If cpu is non-zero install event on this CPU only. This setup handles + * perf_event_open() with CPU context. + */ +static int cpum_cf_alloc(int cpu) +{ + cpumask_var_t mask; + int rc; + + if (cpu == -1) { + if (!zalloc_cpumask_var(&mask, GFP_KERNEL)) + return -ENOMEM; + for_each_online_cpu(cpu) { + rc = cpum_cf_alloc_cpu(cpu); + if (rc) { + for_each_cpu(cpu, mask) + cpum_cf_free_cpu(cpu); + break; + } + cpumask_set_cpu(cpu, mask); + } + free_cpumask_var(mask); + } else { + rc = cpum_cf_alloc_cpu(cpu); + } + return rc; +} + +static void cpum_cf_free(int cpu) +{ + if (cpu == -1) { + for_each_online_cpu(cpu) + cpum_cf_free_cpu(cpu); + } else { + cpum_cf_free_cpu(cpu); + } +} + #define CF_DIAG_CTRSET_DEF 0xfeef /* Counter set header mark */ /* interval in seconds */ @@ -451,10 +664,10 @@ static int validate_ctr_version(const u64 config, enum cpumf_ctr_set set) */ static void cpumf_pmu_enable(struct pmu *pmu) { - struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events); + struct cpu_cf_events *cpuhw = this_cpu_cfhw(); int err; - if (cpuhw->flags & PMU_F_ENABLED) + if (!cpuhw || (cpuhw->flags & PMU_F_ENABLED)) return; err = lcctl(cpuhw->state | cpuhw->dev_state); @@ -471,11 +684,11 @@ static void cpumf_pmu_enable(struct pmu *pmu) */ static void cpumf_pmu_disable(struct pmu *pmu) { - struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events); - int err; + struct cpu_cf_events *cpuhw = this_cpu_cfhw(); u64 inactive; + int err; - if (!(cpuhw->flags & PMU_F_ENABLED)) + if (!cpuhw || !(cpuhw->flags & PMU_F_ENABLED)) return; inactive = cpuhw->state & ~((1 << CPUMF_LCCTL_ENABLE_SHIFT) - 1); @@ -487,58 +700,10 @@ static void cpumf_pmu_disable(struct pmu *pmu) cpuhw->flags &= ~PMU_F_ENABLED; } -#define PMC_INIT 0UL -#define PMC_RELEASE 1UL - -static void cpum_cf_setup_cpu(void *flags) -{ - struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events); - - switch ((unsigned long)flags) { - case PMC_INIT: - cpuhw->flags |= PMU_F_RESERVED; - break; - - case PMC_RELEASE: - cpuhw->flags &= ~PMU_F_RESERVED; - break; - } - - /* Disable CPU counter sets */ - lcctl(0); - debug_sprintf_event(cf_dbg, 5, "%s flags %#x flags %#x state %#llx\n", - __func__, *(int *)flags, cpuhw->flags, - cpuhw->state); -} - -/* Initialize the CPU-measurement counter facility */ -static int __kernel_cpumcf_begin(void) -{ - on_each_cpu(cpum_cf_setup_cpu, (void *)PMC_INIT, 1); - irq_subclass_register(IRQ_SUBCLASS_MEASUREMENT_ALERT); - - return 0; -} - -/* Release the CPU-measurement counter facility */ -static void __kernel_cpumcf_end(void) -{ - on_each_cpu(cpum_cf_setup_cpu, (void *)PMC_RELEASE, 1); - irq_subclass_unregister(IRQ_SUBCLASS_MEASUREMENT_ALERT); -} - -/* Number of perf events counting hardware events */ -static atomic_t num_events = ATOMIC_INIT(0); -/* Used to avoid races in calling reserve/release_cpumf_hardware */ -static DEFINE_MUTEX(pmc_reserve_mutex); - /* Release the PMU if event is the last perf event */ static void hw_perf_event_destroy(struct perf_event *event) { - mutex_lock(&pmc_reserve_mutex); - if (atomic_dec_return(&num_events) == 0) - __kernel_cpumcf_end(); - mutex_unlock(&pmc_reserve_mutex); + cpum_cf_free(event->cpu); } /* CPUMF <-> perf event mappings for kernel+userspace (basic set) */ @@ -562,14 +727,6 @@ static const int cpumf_generic_events_user[] = { [PERF_COUNT_HW_BUS_CYCLES] = -1, }; -static void cpumf_hw_inuse(void) -{ - mutex_lock(&pmc_reserve_mutex); - if (atomic_inc_return(&num_events) == 1) - __kernel_cpumcf_begin(); - mutex_unlock(&pmc_reserve_mutex); -} - static int is_userspace_event(u64 ev) { return cpumf_generic_events_user[PERF_COUNT_HW_CPU_CYCLES] == ev || @@ -653,7 +810,8 @@ static int __hw_perf_event_init(struct perf_event *event, unsigned int type) } /* Initialize for using the CPU-measurement counter facility */ - cpumf_hw_inuse(); + if (cpum_cf_alloc(event->cpu)) + return -ENOMEM; event->destroy = hw_perf_event_destroy; /* @@ -756,7 +914,7 @@ static void cpumf_pmu_read(struct perf_event *event) static void cpumf_pmu_start(struct perf_event *event, int flags) { - struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events); + struct cpu_cf_events *cpuhw = this_cpu_cfhw(); struct hw_perf_event *hwc = &event->hw; int i; @@ -830,7 +988,7 @@ static int cfdiag_push_sample(struct perf_event *event, static void cpumf_pmu_stop(struct perf_event *event, int flags) { - struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events); + struct cpu_cf_events *cpuhw = this_cpu_cfhw(); struct hw_perf_event *hwc = &event->hw; int i; @@ -857,8 +1015,7 @@ static void cpumf_pmu_stop(struct perf_event *event, int flags) false); if (cfdiag_diffctr(cpuhw, event->hw.config_base)) cfdiag_push_sample(event, cpuhw); - } else if (cpuhw->flags & PMU_F_RESERVED) { - /* Only update when PMU not hotplugged off */ + } else { hw_perf_event_update(event); } hwc->state |= PERF_HES_UPTODATE; @@ -867,7 +1024,7 @@ static void cpumf_pmu_stop(struct perf_event *event, int flags) static int cpumf_pmu_add(struct perf_event *event, int flags) { - struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events); + struct cpu_cf_events *cpuhw = this_cpu_cfhw(); ctr_set_enable(&cpuhw->state, event->hw.config_base); event->hw.state = PERF_HES_UPTODATE | PERF_HES_STOPPED; @@ -880,7 +1037,7 @@ static int cpumf_pmu_add(struct perf_event *event, int flags) static void cpumf_pmu_del(struct perf_event *event, int flags) { - struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events); + struct cpu_cf_events *cpuhw = this_cpu_cfhw(); int i; cpumf_pmu_stop(event, PERF_EF_UPDATE); @@ -912,29 +1069,83 @@ static struct pmu cpumf_pmu = { .read = cpumf_pmu_read, }; -static int cpum_cf_setup(unsigned int cpu, unsigned long flags) -{ - local_irq_disable(); - cpum_cf_setup_cpu((void *)flags); - local_irq_enable(); - return 0; -} +static struct cfset_session { /* CPUs and counter set bit mask */ + struct list_head head; /* Head of list of active processes */ +} cfset_session = { + .head = LIST_HEAD_INIT(cfset_session.head) +}; + +static refcount_t cfset_opencnt = REFCOUNT_INIT(0); /* Access count */ +/* + * Synchronize access to device /dev/hwc. This mutex protects against + * concurrent access to functions cfset_open() and cfset_release(). + * Same for CPU hotplug add and remove events triggering + * cpum_cf_online_cpu() and cpum_cf_offline_cpu(). + * It also serializes concurrent device ioctl access from multiple + * processes accessing /dev/hwc. + * + * The mutex protects concurrent access to the /dev/hwctr session management + * struct cfset_session and reference counting variable cfset_opencnt. + */ +static DEFINE_MUTEX(cfset_ctrset_mutex); +/* + * CPU hotplug handles only /dev/hwctr device. + * For perf_event_open() the CPU hotplug handling is done on kernel common + * code: + * - CPU add: Nothing is done since a file descriptor can not be created + * and returned to the user. + * - CPU delete: Handled by common code via pmu_disable(), pmu_stop() and + * pmu_delete(). The event itself is removed when the file descriptor is + * closed. + */ static int cfset_online_cpu(unsigned int cpu); + static int cpum_cf_online_cpu(unsigned int cpu) { - debug_sprintf_event(cf_dbg, 4, "%s cpu %d in_irq %ld\n", __func__, - cpu, in_interrupt()); - cpum_cf_setup(cpu, PMC_INIT); - return cfset_online_cpu(cpu); + int rc = 0; + + debug_sprintf_event(cf_dbg, 4, "%s cpu %d root.refcnt %d " + "opencnt %d\n", __func__, cpu, + refcount_read(&cpu_cf_root.refcnt), + refcount_read(&cfset_opencnt)); + /* + * Ignore notification for perf_event_open(). + * Handle only /dev/hwctr device sessions. + */ + mutex_lock(&cfset_ctrset_mutex); + if (refcount_read(&cfset_opencnt)) { + rc = cpum_cf_alloc_cpu(cpu); + if (!rc) + cfset_online_cpu(cpu); + } + mutex_unlock(&cfset_ctrset_mutex); + return rc; } static int cfset_offline_cpu(unsigned int cpu); + static int cpum_cf_offline_cpu(unsigned int cpu) { - debug_sprintf_event(cf_dbg, 4, "%s cpu %d\n", __func__, cpu); - cfset_offline_cpu(cpu); - return cpum_cf_setup(cpu, PMC_RELEASE); + debug_sprintf_event(cf_dbg, 4, "%s cpu %d root.refcnt %d opencnt %d\n", + __func__, cpu, refcount_read(&cpu_cf_root.refcnt), + refcount_read(&cfset_opencnt)); + /* + * During task exit processing of grouped perf events triggered by CPU + * hotplug processing, pmu_disable() is called as part of perf context + * removal process. Therefore do not trigger event removal now for + * perf_event_open() created events. Perf common code triggers event + * destruction when the event file descriptor is closed. + * + * Handle only /dev/hwctr device sessions. + */ + mutex_lock(&cfset_ctrset_mutex); + if (refcount_read(&cfset_opencnt)) { + cfset_offline_cpu(cpu); + cpum_cf_free_cpu(cpu); + } + mutex_unlock(&cfset_ctrset_mutex); + return 0; } /* Return true if store counter set multiple instruction is available */ @@ -953,13 +1164,13 @@ static void cpumf_measurement_alert(struct ext_code ext_code, return; inc_irq_stat(IRQEXT_CMC); - cpuhw = this_cpu_ptr(&cpu_cf_events); /* * Measurement alerts are shared and might happen when the PMU * is not reserved. Ignore these alerts in this case. */ - if (!(cpuhw->flags & PMU_F_RESERVED)) + cpuhw = this_cpu_cfhw(); + if (!cpuhw) return; /* counter authorization change alert */ @@ -1039,19 +1250,11 @@ out1: * counter set via normal file operations. */ -static atomic_t cfset_opencnt = ATOMIC_INIT(0); /* Access count */ -static DEFINE_MUTEX(cfset_ctrset_mutex);/* Synchronize access to hardware */ struct cfset_call_on_cpu_parm { /* Parm struct for smp_call_on_cpu */ unsigned int sets; /* Counter set bit mask */ atomic_t cpus_ack; /* # CPUs successfully executed func */ }; -static struct cfset_session { /* CPUs and counter set bit mask */ - struct list_head head; /* Head of list of active processes */ -} cfset_session = { - .head = LIST_HEAD_INIT(cfset_session.head) -}; - struct cfset_request { /* CPUs and counter set bit mask */ unsigned long ctrset; /* Bit mask of counter set to read */ cpumask_t mask; /* CPU mask to read from */ @@ -1113,11 +1316,11 @@ static void cfset_session_add(struct cfset_request *p) /* Stop all counter sets via ioctl interface */ static void cfset_ioctl_off(void *parm) { - struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events); + struct cpu_cf_events *cpuhw = this_cpu_cfhw(); struct cfset_call_on_cpu_parm *p = parm; int rc; - /* Check if any counter set used by /dev/hwc */ + /* Check if any counter set used by /dev/hwctr */ for (rc = CPUMF_CTR_SET_BASIC; rc < CPUMF_CTR_SET_MAX; ++rc) if ((p->sets & cpumf_ctr_ctl[rc])) { if (!atomic_dec_return(&cpuhw->ctr_set[rc])) { @@ -1141,7 +1344,7 @@ static void cfset_ioctl_off(void *parm) /* Start counter sets on particular CPU */ static void cfset_ioctl_on(void *parm) { - struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events); + struct cpu_cf_events *cpuhw = this_cpu_cfhw(); struct cfset_call_on_cpu_parm *p = parm; int rc; @@ -1163,7 +1366,7 @@ static void cfset_ioctl_on(void *parm) static void cfset_release_cpu(void *p) { - struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events); + struct cpu_cf_events *cpuhw = this_cpu_cfhw(); int rc; debug_sprintf_event(cf_dbg, 4, "%s state %#llx dev_state %#llx\n", @@ -1203,27 +1406,41 @@ static int cfset_release(struct inode *inode, struct file *file) kfree(file->private_data); file->private_data = NULL; } - if (!atomic_dec_return(&cfset_opencnt)) + if (refcount_dec_and_test(&cfset_opencnt)) { /* Last close */ on_each_cpu(cfset_release_cpu, NULL, 1); + cpum_cf_free(-1); + } mutex_unlock(&cfset_ctrset_mutex); - - hw_perf_event_destroy(NULL); return 0; } +/* + * Open via /dev/hwctr device. Allocate all per CPU resources on the first + * open of the device. The last close releases all per CPU resources. + * Parallel perf_event_open system calls also use per CPU resources. + * These invocations are handled via reference counting on the per CPU data + * structures. + */ static int cfset_open(struct inode *inode, struct file *file) { - if (!capable(CAP_SYS_ADMIN)) + int rc = 0; + + if (!perfmon_capable()) return -EPERM; + file->private_data = NULL; + mutex_lock(&cfset_ctrset_mutex); - if (atomic_inc_return(&cfset_opencnt) == 1) - cfset_session_init(); + if (!refcount_inc_not_zero(&cfset_opencnt)) { /* First open */ + rc = cpum_cf_alloc(-1); + if (!rc) { + cfset_session_init(); + refcount_set(&cfset_opencnt, 1); + } + } mutex_unlock(&cfset_ctrset_mutex); - cpumf_hw_inuse(); - file->private_data = NULL; /* nonseekable_open() never fails */ - return nonseekable_open(inode, file); + return rc ?: nonseekable_open(inode, file); } static int cfset_all_start(struct cfset_request *req) @@ -1280,7 +1497,7 @@ static int cfset_all_copy(unsigned long arg, cpumask_t *mask) ctrset_read = (struct s390_ctrset_read __user *)arg; uptr = ctrset_read->data; for_each_cpu(cpu, mask) { - struct cpu_cf_events *cpuhw = per_cpu_ptr(&cpu_cf_events, cpu); + struct cpu_cf_events *cpuhw = get_cpu_cfhw(cpu); struct s390_ctrset_cpudata __user *ctrset_cpudata; ctrset_cpudata = uptr; @@ -1324,7 +1541,7 @@ static size_t cfset_cpuset_read(struct s390_ctrset_setdata *p, int ctrset, /* Read all counter sets. */ static void cfset_cpu_read(void *parm) { - struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events); + struct cpu_cf_events *cpuhw = this_cpu_cfhw(); struct cfset_call_on_cpu_parm *p = parm; int set, set_size; size_t space; @@ -1348,9 +1565,9 @@ static void cfset_cpu_read(void *parm) cpuhw->used += space; cpuhw->sets += 1; } + debug_sprintf_event(cf_dbg, 4, "%s sets %d used %zd\n", __func__, + cpuhw->sets, cpuhw->used); } - debug_sprintf_event(cf_dbg, 4, "%s sets %d used %zd\n", __func__, - cpuhw->sets, cpuhw->used); } static int cfset_all_read(unsigned long arg, struct cfset_request *req) @@ -1502,6 +1719,7 @@ static struct miscdevice cfset_dev = { .name = S390_HWCTR_DEVICE, .minor = MISC_DYNAMIC_MINOR, .fops = &cfset_fops, + .mode = 0666, }; /* Hotplug add of a CPU. Scan through all active processes and add @@ -1512,7 +1730,6 @@ static int cfset_online_cpu(unsigned int cpu) struct cfset_call_on_cpu_parm p; struct cfset_request *rp; - mutex_lock(&cfset_ctrset_mutex); if (!list_empty(&cfset_session.head)) { list_for_each_entry(rp, &cfset_session.head, node) { p.sets = rp->ctrset; @@ -1520,19 +1737,18 @@ static int cfset_online_cpu(unsigned int cpu) cpumask_set_cpu(cpu, &rp->mask); } } - mutex_unlock(&cfset_ctrset_mutex); return 0; } /* Hotplug remove of a CPU. Scan through all active processes and clear * that CPU from the list of CPUs supplied with ioctl(..., START, ...). + * Adjust reference counts. */ static int cfset_offline_cpu(unsigned int cpu) { struct cfset_call_on_cpu_parm p; struct cfset_request *rp; - mutex_lock(&cfset_ctrset_mutex); if (!list_empty(&cfset_session.head)) { list_for_each_entry(rp, &cfset_session.head, node) { p.sets = rp->ctrset; @@ -1540,7 +1756,6 @@ static int cfset_offline_cpu(unsigned int cpu) cpumask_clear_cpu(cpu, &rp->mask); } } - mutex_unlock(&cfset_ctrset_mutex); return 0; } @@ -1618,7 +1833,8 @@ static int cfdiag_event_init(struct perf_event *event) } /* Initialize for using the CPU-measurement counter facility */ - cpumf_hw_inuse(); + if (cpum_cf_alloc(event->cpu)) + return -ENOMEM; event->destroy = hw_perf_event_destroy; err = cfdiag_event_init2(event); diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c index 7ef72f5ff52e..8ecfbce4ac92 100644 --- a/arch/s390/kernel/perf_cpum_sf.c +++ b/arch/s390/kernel/perf_cpum_sf.c @@ -1271,16 +1271,6 @@ static void hw_collect_samples(struct perf_event *event, unsigned long *sdbt, } } -static inline __uint128_t __cdsg(__uint128_t *ptr, __uint128_t old, __uint128_t new) -{ - asm volatile( - " cdsg %[old],%[new],%[ptr]\n" - : [old] "+d" (old), [ptr] "+QS" (*ptr) - : [new] "d" (new) - : "memory", "cc"); - return old; -} - /* hw_perf_event_update() - Process sampling buffer * @event: The perf event * @flush_all: Flag to also flush partially filled sample-data-blocks @@ -1352,7 +1342,7 @@ static void hw_perf_event_update(struct perf_event *event, int flush_all) new.f = 0; new.a = 1; new.overflow = 0; - prev.val = __cdsg(&te->header.val, old.val, new.val); + prev.val = cmpxchg128(&te->header.val, old.val, new.val); } while (prev.val != old.val); /* Advance to next sample-data-block */ @@ -1562,7 +1552,7 @@ static bool aux_set_alert(struct aux_buffer *aux, unsigned long alert_index, } new.a = 1; new.overflow = 0; - prev.val = __cdsg(&te->header.val, old.val, new.val); + prev.val = cmpxchg128(&te->header.val, old.val, new.val); } while (prev.val != old.val); return true; } @@ -1636,7 +1626,7 @@ static bool aux_reset_buffer(struct aux_buffer *aux, unsigned long range, new.a = 1; else new.a = 0; - prev.val = __cdsg(&te->header.val, old.val, new.val); + prev.val = cmpxchg128(&te->header.val, old.val, new.val); } while (prev.val != old.val); *overflow += orig_overflow; } diff --git a/arch/s390/kernel/perf_pai_crypto.c b/arch/s390/kernel/perf_pai_crypto.c index a7b339c4fd7c..fe7d1774ded1 100644 --- a/arch/s390/kernel/perf_pai_crypto.c +++ b/arch/s390/kernel/perf_pai_crypto.c @@ -36,7 +36,7 @@ struct paicrypt_map { unsigned long *page; /* Page for CPU to store counters */ struct pai_userdata *save; /* Page to store no-zero counters */ unsigned int active_events; /* # of PAI crypto users */ - unsigned int refcnt; /* Reference count mapped buffers */ + refcount_t refcnt; /* Reference count mapped buffers */ enum paievt_mode mode; /* Type of event */ struct perf_event *event; /* Perf event for sampling */ }; @@ -57,10 +57,11 @@ static void paicrypt_event_destroy(struct perf_event *event) static_branch_dec(&pai_key); mutex_lock(&pai_reserve_mutex); debug_sprintf_event(cfm_dbg, 5, "%s event %#llx cpu %d users %d" - " mode %d refcnt %d\n", __func__, + " mode %d refcnt %u\n", __func__, event->attr.config, event->cpu, - cpump->active_events, cpump->mode, cpump->refcnt); - if (!--cpump->refcnt) { + cpump->active_events, cpump->mode, + refcount_read(&cpump->refcnt)); + if (refcount_dec_and_test(&cpump->refcnt)) { debug_sprintf_event(cfm_dbg, 4, "%s page %#lx save %p\n", __func__, (unsigned long)cpump->page, cpump->save); @@ -149,8 +150,10 @@ static int paicrypt_busy(struct perf_event_attr *a, struct paicrypt_map *cpump) /* Allocate memory for counter page and counter extraction. * Only the first counting event has to allocate a page. */ - if (cpump->page) + if (cpump->page) { + refcount_inc(&cpump->refcnt); goto unlock; + } rc = -ENOMEM; cpump->page = (unsigned long *)get_zeroed_page(GFP_KERNEL); @@ -164,18 +167,18 @@ static int paicrypt_busy(struct perf_event_attr *a, struct paicrypt_map *cpump) goto unlock; } rc = 0; + refcount_set(&cpump->refcnt, 1); unlock: /* If rc is non-zero, do not set mode and reference count */ if (!rc) { - cpump->refcnt++; cpump->mode = a->sample_period ? PAI_MODE_SAMPLING : PAI_MODE_COUNTING; } debug_sprintf_event(cfm_dbg, 5, "%s sample_period %#llx users %d" - " mode %d refcnt %d page %#lx save %p rc %d\n", + " mode %d refcnt %u page %#lx save %p rc %d\n", __func__, a->sample_period, cpump->active_events, - cpump->mode, cpump->refcnt, + cpump->mode, refcount_read(&cpump->refcnt), (unsigned long)cpump->page, cpump->save, rc); mutex_unlock(&pai_reserve_mutex); return rc; diff --git a/arch/s390/kernel/perf_pai_ext.c b/arch/s390/kernel/perf_pai_ext.c index fcea307d7529..3b4f384f77f7 100644 --- a/arch/s390/kernel/perf_pai_ext.c +++ b/arch/s390/kernel/perf_pai_ext.c @@ -50,7 +50,7 @@ struct paiext_map { struct pai_userdata *save; /* Area to store non-zero counters */ enum paievt_mode mode; /* Type of event */ unsigned int active_events; /* # of PAI Extension users */ - unsigned int refcnt; + refcount_t refcnt; struct perf_event *event; /* Perf event for sampling */ struct paiext_cb *paiext_cb; /* PAI extension control block area */ }; @@ -60,14 +60,14 @@ struct paiext_mapptr { }; static struct paiext_root { /* Anchor to per CPU data */ - int refcnt; /* Overall active events */ + refcount_t refcnt; /* Overall active events */ struct paiext_mapptr __percpu *mapptr; } paiext_root; /* Free per CPU data when the last event is removed. */ static void paiext_root_free(void) { - if (!--paiext_root.refcnt) { + if (refcount_dec_and_test(&paiext_root.refcnt)) { free_percpu(paiext_root.mapptr); paiext_root.mapptr = NULL; } @@ -80,7 +80,7 @@ static void paiext_root_free(void) */ static int paiext_root_alloc(void) { - if (++paiext_root.refcnt == 1) { + if (!refcount_inc_not_zero(&paiext_root.refcnt)) { /* The memory is already zeroed. */ paiext_root.mapptr = alloc_percpu(struct paiext_mapptr); if (!paiext_root.mapptr) { @@ -91,6 +91,7 @@ static int paiext_root_alloc(void) */ return -ENOMEM; } + refcount_set(&paiext_root.refcnt, 1); } return 0; } @@ -122,7 +123,7 @@ static void paiext_event_destroy(struct perf_event *event) mutex_lock(&paiext_reserve_mutex); cpump->event = NULL; - if (!--cpump->refcnt) /* Last reference gone */ + if (refcount_dec_and_test(&cpump->refcnt)) /* Last reference gone */ paiext_free(mp); paiext_root_free(); mutex_unlock(&paiext_reserve_mutex); @@ -163,7 +164,7 @@ static int paiext_alloc(struct perf_event_attr *a, struct perf_event *event) rc = -ENOMEM; cpump = kzalloc(sizeof(*cpump), GFP_KERNEL); if (!cpump) - goto unlock; + goto undo; /* Allocate memory for counter area and counter extraction. * These are @@ -183,8 +184,9 @@ static int paiext_alloc(struct perf_event_attr *a, struct perf_event *event) GFP_KERNEL); if (!cpump->save || !cpump->area || !cpump->paiext_cb) { paiext_free(mp); - goto unlock; + goto undo; } + refcount_set(&cpump->refcnt, 1); cpump->mode = a->sample_period ? PAI_MODE_SAMPLING : PAI_MODE_COUNTING; } else { @@ -195,15 +197,15 @@ static int paiext_alloc(struct perf_event_attr *a, struct perf_event *event) if (cpump->mode == PAI_MODE_SAMPLING || (cpump->mode == PAI_MODE_COUNTING && a->sample_period)) { rc = -EBUSY; - goto unlock; + goto undo; } + refcount_inc(&cpump->refcnt); } rc = 0; cpump->event = event; - ++cpump->refcnt; -unlock: +undo: if (rc) { /* Error in allocation of event, decrement anchor. Since * the event in not created, its destroy() function is never @@ -211,6 +213,7 @@ unlock: */ paiext_root_free(); } +unlock: mutex_unlock(&paiext_reserve_mutex); /* If rc is non-zero, no increment of counter/sampler was done. */ return rc; diff --git a/arch/s390/kernel/syscalls/syscall.tbl b/arch/s390/kernel/syscalls/syscall.tbl index b68f47541169..a6935af2235c 100644 --- a/arch/s390/kernel/syscalls/syscall.tbl +++ b/arch/s390/kernel/syscalls/syscall.tbl @@ -453,3 +453,4 @@ 448 common process_mrelease sys_process_mrelease sys_process_mrelease 449 common futex_waitv sys_futex_waitv sys_futex_waitv 450 common set_mempolicy_home_node sys_set_mempolicy_home_node sys_set_mempolicy_home_node +451 common cachestat sys_cachestat sys_cachestat diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c index 6b7b6d5e3632..276278199c44 100644 --- a/arch/s390/kernel/time.c +++ b/arch/s390/kernel/time.c @@ -102,6 +102,11 @@ void __init time_early_init(void) ((long) qui.old_leap * 4096000000L); } +unsigned long long noinstr sched_clock_noinstr(void) +{ + return tod_to_ns(__get_tod_clock_monotonic()); +} + /* * Scheduler clock - returns current time in nanosec units. */ diff --git a/arch/s390/kernel/uv.c b/arch/s390/kernel/uv.c index cb2ee06df286..3c62d1b218b1 100644 --- a/arch/s390/kernel/uv.c +++ b/arch/s390/kernel/uv.c @@ -294,6 +294,8 @@ again: rc = -ENXIO; ptep = get_locked_pte(gmap->mm, uaddr, &ptelock); + if (!ptep) + goto out; if (pte_present(*ptep) && !(pte_val(*ptep) & _PAGE_INVALID) && pte_write(*ptep)) { page = pte_page(*ptep); rc = -EAGAIN; diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index da6dac36e959..9bd0a873f3b1 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c @@ -2777,7 +2777,7 @@ static struct page *get_map_page(struct kvm *kvm, u64 uaddr) mmap_read_lock(kvm->mm); get_user_pages_remote(kvm->mm, uaddr, 1, FOLL_WRITE, - &page, NULL, NULL); + &page, NULL); mmap_read_unlock(kvm->mm); return page; } diff --git a/arch/s390/lib/Makefile b/arch/s390/lib/Makefile index 580d2e3265cb..7c50eca85ca4 100644 --- a/arch/s390/lib/Makefile +++ b/arch/s390/lib/Makefile @@ -3,7 +3,7 @@ # Makefile for s390-specific library files.. # -lib-y += delay.o string.o uaccess.o find.o spinlock.o +lib-y += delay.o string.o uaccess.o find.o spinlock.o tishift.o obj-y += mem.o xor.o lib-$(CONFIG_KPROBES) += probes.o lib-$(CONFIG_UPROBES) += probes.o diff --git a/arch/s390/lib/tishift.S b/arch/s390/lib/tishift.S new file mode 100644 index 000000000000..de33cf02cfd2 --- /dev/null +++ b/arch/s390/lib/tishift.S @@ -0,0 +1,63 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#include <linux/linkage.h> +#include <asm/nospec-insn.h> +#include <asm/export.h> + + .section .noinstr.text, "ax" + + GEN_BR_THUNK %r14 + +SYM_FUNC_START(__ashlti3) + lmg %r0,%r1,0(%r3) + cije %r4,0,1f + lhi %r3,64 + sr %r3,%r4 + jnh 0f + srlg %r3,%r1,0(%r3) + sllg %r0,%r0,0(%r4) + sllg %r1,%r1,0(%r4) + ogr %r0,%r3 + j 1f +0: sllg %r0,%r1,-64(%r4) + lghi %r1,0 +1: stmg %r0,%r1,0(%r2) + BR_EX %r14 +SYM_FUNC_END(__ashlti3) +EXPORT_SYMBOL(__ashlti3) + +SYM_FUNC_START(__ashrti3) + lmg %r0,%r1,0(%r3) + cije %r4,0,1f + lhi %r3,64 + sr %r3,%r4 + jnh 0f + sllg %r3,%r0,0(%r3) + srlg %r1,%r1,0(%r4) + srag %r0,%r0,0(%r4) + ogr %r1,%r3 + j 1f +0: srag %r1,%r0,-64(%r4) + srag %r0,%r0,63 +1: stmg %r0,%r1,0(%r2) + BR_EX %r14 +SYM_FUNC_END(__ashrti3) +EXPORT_SYMBOL(__ashrti3) + +SYM_FUNC_START(__lshrti3) + lmg %r0,%r1,0(%r3) + cije %r4,0,1f + lhi %r3,64 + sr %r3,%r4 + jnh 0f + sllg %r3,%r0,0(%r3) + srlg %r1,%r1,0(%r4) + srlg %r0,%r0,0(%r4) + ogr %r1,%r3 + j 1f +0: srlg %r1,%r0,-64(%r4) + lghi %r0,0 +1: stmg %r0,%r1,0(%r2) + BR_EX %r14 +SYM_FUNC_END(__lshrti3) +EXPORT_SYMBOL(__lshrti3) diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c index dc90d1eb0d55..f4b6fc746fce 100644 --- a/arch/s390/mm/gmap.c +++ b/arch/s390/mm/gmap.c @@ -895,12 +895,12 @@ static int gmap_pte_op_fixup(struct gmap *gmap, unsigned long gaddr, /** * gmap_pte_op_end - release the page table lock - * @ptl: pointer to the spinlock pointer + * @ptep: pointer to the locked pte + * @ptl: pointer to the page table spinlock */ -static void gmap_pte_op_end(spinlock_t *ptl) +static void gmap_pte_op_end(pte_t *ptep, spinlock_t *ptl) { - if (ptl) - spin_unlock(ptl); + pte_unmap_unlock(ptep, ptl); } /** @@ -1011,7 +1011,7 @@ static int gmap_protect_pte(struct gmap *gmap, unsigned long gaddr, { int rc; pte_t *ptep; - spinlock_t *ptl = NULL; + spinlock_t *ptl; unsigned long pbits = 0; if (pmd_val(*pmdp) & _SEGMENT_ENTRY_INVALID) @@ -1025,7 +1025,7 @@ static int gmap_protect_pte(struct gmap *gmap, unsigned long gaddr, pbits |= (bits & GMAP_NOTIFY_SHADOW) ? PGSTE_VSIE_BIT : 0; /* Protect and unlock. */ rc = ptep_force_prot(gmap->mm, gaddr, ptep, prot, pbits); - gmap_pte_op_end(ptl); + gmap_pte_op_end(ptep, ptl); return rc; } @@ -1154,7 +1154,7 @@ int gmap_read_table(struct gmap *gmap, unsigned long gaddr, unsigned long *val) /* Do *NOT* clear the _PAGE_INVALID bit! */ rc = 0; } - gmap_pte_op_end(ptl); + gmap_pte_op_end(ptep, ptl); } if (!rc) break; @@ -1248,7 +1248,7 @@ static int gmap_protect_rmap(struct gmap *sg, unsigned long raddr, if (!rc) gmap_insert_rmap(sg, vmaddr, rmap); spin_unlock(&sg->guest_table_lock); - gmap_pte_op_end(ptl); + gmap_pte_op_end(ptep, ptl); } radix_tree_preload_end(); if (rc) { @@ -2156,7 +2156,7 @@ int gmap_shadow_page(struct gmap *sg, unsigned long saddr, pte_t pte) tptep = (pte_t *) gmap_table_walk(sg, saddr, 0); if (!tptep) { spin_unlock(&sg->guest_table_lock); - gmap_pte_op_end(ptl); + gmap_pte_op_end(sptep, ptl); radix_tree_preload_end(); break; } @@ -2167,7 +2167,7 @@ int gmap_shadow_page(struct gmap *sg, unsigned long saddr, pte_t pte) rmap = NULL; rc = 0; } - gmap_pte_op_end(ptl); + gmap_pte_op_end(sptep, ptl); spin_unlock(&sg->guest_table_lock); } radix_tree_preload_end(); @@ -2495,7 +2495,7 @@ void gmap_sync_dirty_log_pmd(struct gmap *gmap, unsigned long bitmap[4], continue; if (ptep_test_and_clear_uc(gmap->mm, vmaddr, ptep)) set_bit(i, bitmap); - spin_unlock(ptl); + pte_unmap_unlock(ptep, ptl); } } gmap_pmd_op_end(gmap, pmdp); @@ -2537,7 +2537,12 @@ static inline void thp_split_mm(struct mm_struct *mm) * Remove all empty zero pages from the mapping for lazy refaulting * - This must be called after mm->context.has_pgste is set, to avoid * future creation of zero pages - * - This must be called after THP was enabled + * - This must be called after THP was disabled. + * + * mm contracts with s390, that even if mm were to remove a page table, + * racing with the loop below and so causing pte_offset_map_lock() to fail, + * it will never insert a page table containing empty zero pages once + * mm_forbids_zeropage(mm) i.e. mm->context.has_pgste is set. */ static int __zap_zero_pages(pmd_t *pmd, unsigned long start, unsigned long end, struct mm_walk *walk) @@ -2549,6 +2554,8 @@ static int __zap_zero_pages(pmd_t *pmd, unsigned long start, spinlock_t *ptl; ptep = pte_offset_map_lock(walk->mm, pmd, addr, &ptl); + if (!ptep) + break; if (is_zero_pfn(pte_pfn(*ptep))) ptep_xchg_direct(walk->mm, addr, ptep, __pte(_PAGE_INVALID)); pte_unmap_unlock(ptep, ptl); diff --git a/arch/s390/mm/pageattr.c b/arch/s390/mm/pageattr.c index 5ba3bd8a7b12..ca5a418c58a8 100644 --- a/arch/s390/mm/pageattr.c +++ b/arch/s390/mm/pageattr.c @@ -4,6 +4,7 @@ * Author(s): Jan Glauber <jang@linux.vnet.ibm.com> */ #include <linux/hugetlb.h> +#include <linux/proc_fs.h> #include <linux/vmalloc.h> #include <linux/mm.h> #include <asm/cacheflush.h> diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index 6effb24de6d9..3bd2ab2a9a34 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -829,7 +829,7 @@ int set_guest_storage_key(struct mm_struct *mm, unsigned long addr, default: return -EFAULT; } - +again: ptl = pmd_lock(mm, pmdp); if (!pmd_present(*pmdp)) { spin_unlock(ptl); @@ -850,6 +850,8 @@ int set_guest_storage_key(struct mm_struct *mm, unsigned long addr, spin_unlock(ptl); ptep = pte_offset_map_lock(mm, pmdp, addr, &ptl); + if (!ptep) + goto again; new = old = pgste_get_lock(ptep); pgste_val(new) &= ~(PGSTE_GR_BIT | PGSTE_GC_BIT | PGSTE_ACC_BITS | PGSTE_FP_BIT); @@ -938,7 +940,7 @@ int reset_guest_reference_bit(struct mm_struct *mm, unsigned long addr) default: return -EFAULT; } - +again: ptl = pmd_lock(mm, pmdp); if (!pmd_present(*pmdp)) { spin_unlock(ptl); @@ -955,6 +957,8 @@ int reset_guest_reference_bit(struct mm_struct *mm, unsigned long addr) spin_unlock(ptl); ptep = pte_offset_map_lock(mm, pmdp, addr, &ptl); + if (!ptep) + goto again; new = old = pgste_get_lock(ptep); /* Reset guest reference bit only */ pgste_val(new) &= ~PGSTE_GR_BIT; @@ -1000,7 +1004,7 @@ int get_guest_storage_key(struct mm_struct *mm, unsigned long addr, default: return -EFAULT; } - +again: ptl = pmd_lock(mm, pmdp); if (!pmd_present(*pmdp)) { spin_unlock(ptl); @@ -1017,6 +1021,8 @@ int get_guest_storage_key(struct mm_struct *mm, unsigned long addr, spin_unlock(ptl); ptep = pte_offset_map_lock(mm, pmdp, addr, &ptl); + if (!ptep) + goto again; pgste = pgste_get_lock(ptep); *key = (pgste_val(pgste) & (PGSTE_ACC_BITS | PGSTE_FP_BIT)) >> 56; paddr = pte_val(*ptep) & PAGE_MASK; diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c index 5b22c6e24528..b9dcb4ae6c59 100644 --- a/arch/s390/mm/vmem.c +++ b/arch/s390/mm/vmem.c @@ -667,7 +667,15 @@ static void __init memblock_region_swap(void *a, void *b, int size) #ifdef CONFIG_KASAN #define __sha(x) ((unsigned long)kasan_mem_to_shadow((void *)x)) + +static inline int set_memory_kasan(unsigned long start, unsigned long end) +{ + start = PAGE_ALIGN_DOWN(__sha(start)); + end = PAGE_ALIGN(__sha(end)); + return set_memory_rwnx(start, (end - start) >> PAGE_SHIFT); +} #endif + /* * map whole physical memory to virtual memory (identity mapping) * we reserve enough space in the vmalloc area for vmemmap to hotplug @@ -737,10 +745,8 @@ void __init vmem_map_init(void) } #ifdef CONFIG_KASAN - for_each_mem_range(i, &base, &end) { - set_memory_rwnx(__sha(base), - (__sha(end) - __sha(base)) >> PAGE_SHIFT); - } + for_each_mem_range(i, &base, &end) + set_memory_kasan(base, end); #endif set_memory_rox((unsigned long)_stext, (unsigned long)(_etext - _stext) >> PAGE_SHIFT); diff --git a/arch/s390/purgatory/Makefile b/arch/s390/purgatory/Makefile index cc8cf5abea15..4e930f566878 100644 --- a/arch/s390/purgatory/Makefile +++ b/arch/s390/purgatory/Makefile @@ -10,7 +10,7 @@ PURGATORY_OBJS = $(addprefix $(obj)/,$(purgatory-y)) $(obj)/sha256.o: $(srctree)/lib/crypto/sha256.c FORCE $(call if_changed_rule,cc_o_c) -CFLAGS_sha256.o := -D__DISABLE_EXPORTS +CFLAGS_sha256.o := -D__DISABLE_EXPORTS -D__NO_FORTIFY $(obj)/mem.o: $(srctree)/arch/s390/lib/mem.S FORCE $(call if_changed_rule,as_o_S) diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig index 393023d09245..2b3ce4fd3956 100644 --- a/arch/sh/Kconfig +++ b/arch/sh/Kconfig @@ -6,6 +6,7 @@ config SUPERH select ARCH_ENABLE_MEMORY_HOTREMOVE if SPARSEMEM && MMU select ARCH_HAVE_NMI_SAFE_CMPXCHG if (GUSA_RB || CPU_SH4A) select ARCH_HAS_BINFMT_FLAT if !MMU + select ARCH_HAS_CPU_FINALIZE_INIT select ARCH_HAS_CURRENT_STACK_POINTER select ARCH_HAS_GIGANTIC_PAGE select ARCH_HAS_GCOV_PROFILE_ALL diff --git a/arch/sh/drivers/dma/dma-api.c b/arch/sh/drivers/dma/dma-api.c index ab9170494dcc..89cd4a3b4cca 100644 --- a/arch/sh/drivers/dma/dma-api.c +++ b/arch/sh/drivers/dma/dma-api.c @@ -198,7 +198,7 @@ int request_dma(unsigned int chan, const char *dev_id) if (atomic_xchg(&channel->busy, 1)) return -EBUSY; - strlcpy(channel->dev_id, dev_id, sizeof(channel->dev_id)); + strscpy(channel->dev_id, dev_id, sizeof(channel->dev_id)); if (info->ops->request) { result = info->ops->request(channel); diff --git a/arch/sh/include/asm/atomic-grb.h b/arch/sh/include/asm/atomic-grb.h index 059791fd394f..cf1c10f15528 100644 --- a/arch/sh/include/asm/atomic-grb.h +++ b/arch/sh/include/asm/atomic-grb.h @@ -71,6 +71,11 @@ static inline int arch_atomic_fetch_##op(int i, atomic_t *v) \ ATOMIC_OPS(add) ATOMIC_OPS(sub) +#define arch_atomic_add_return arch_atomic_add_return +#define arch_atomic_sub_return arch_atomic_sub_return +#define arch_atomic_fetch_add arch_atomic_fetch_add +#define arch_atomic_fetch_sub arch_atomic_fetch_sub + #undef ATOMIC_OPS #define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_FETCH_OP(op) @@ -78,6 +83,10 @@ ATOMIC_OPS(and) ATOMIC_OPS(or) ATOMIC_OPS(xor) +#define arch_atomic_fetch_and arch_atomic_fetch_and +#define arch_atomic_fetch_or arch_atomic_fetch_or +#define arch_atomic_fetch_xor arch_atomic_fetch_xor + #undef ATOMIC_OPS #undef ATOMIC_FETCH_OP #undef ATOMIC_OP_RETURN diff --git a/arch/sh/include/asm/atomic-irq.h b/arch/sh/include/asm/atomic-irq.h index 7665de9d00d0..b4090cc35493 100644 --- a/arch/sh/include/asm/atomic-irq.h +++ b/arch/sh/include/asm/atomic-irq.h @@ -55,6 +55,11 @@ static inline int arch_atomic_fetch_##op(int i, atomic_t *v) \ ATOMIC_OPS(add, +=) ATOMIC_OPS(sub, -=) +#define arch_atomic_add_return arch_atomic_add_return +#define arch_atomic_sub_return arch_atomic_sub_return +#define arch_atomic_fetch_add arch_atomic_fetch_add +#define arch_atomic_fetch_sub arch_atomic_fetch_sub + #undef ATOMIC_OPS #define ATOMIC_OPS(op, c_op) \ ATOMIC_OP(op, c_op) \ @@ -64,6 +69,10 @@ ATOMIC_OPS(and, &=) ATOMIC_OPS(or, |=) ATOMIC_OPS(xor, ^=) +#define arch_atomic_fetch_and arch_atomic_fetch_and +#define arch_atomic_fetch_or arch_atomic_fetch_or +#define arch_atomic_fetch_xor arch_atomic_fetch_xor + #undef ATOMIC_OPS #undef ATOMIC_FETCH_OP #undef ATOMIC_OP_RETURN diff --git a/arch/sh/include/asm/atomic-llsc.h b/arch/sh/include/asm/atomic-llsc.h index b63dcfbfa14e..9ef1fb1dd12e 100644 --- a/arch/sh/include/asm/atomic-llsc.h +++ b/arch/sh/include/asm/atomic-llsc.h @@ -73,6 +73,11 @@ static inline int arch_atomic_fetch_##op(int i, atomic_t *v) \ ATOMIC_OPS(add) ATOMIC_OPS(sub) +#define arch_atomic_add_return arch_atomic_add_return +#define arch_atomic_sub_return arch_atomic_sub_return +#define arch_atomic_fetch_add arch_atomic_fetch_add +#define arch_atomic_fetch_sub arch_atomic_fetch_sub + #undef ATOMIC_OPS #define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_FETCH_OP(op) @@ -80,6 +85,10 @@ ATOMIC_OPS(and) ATOMIC_OPS(or) ATOMIC_OPS(xor) +#define arch_atomic_fetch_and arch_atomic_fetch_and +#define arch_atomic_fetch_or arch_atomic_fetch_or +#define arch_atomic_fetch_xor arch_atomic_fetch_xor + #undef ATOMIC_OPS #undef ATOMIC_FETCH_OP #undef ATOMIC_OP_RETURN diff --git a/arch/sh/include/asm/atomic.h b/arch/sh/include/asm/atomic.h index 528bfeda78f5..7a18cb2a1c1a 100644 --- a/arch/sh/include/asm/atomic.h +++ b/arch/sh/include/asm/atomic.h @@ -30,9 +30,6 @@ #include <asm/atomic-irq.h> #endif -#define arch_atomic_xchg(v, new) (arch_xchg(&((v)->counter), new)) -#define arch_atomic_cmpxchg(v, o, n) (arch_cmpxchg(&((v)->counter), (o), (n))) - #endif /* CONFIG_CPU_J2 */ #endif /* __ASM_SH_ATOMIC_H */ diff --git a/arch/sh/include/asm/bugs.h b/arch/sh/include/asm/bugs.h deleted file mode 100644 index fe52abb69cea..000000000000 --- a/arch/sh/include/asm/bugs.h +++ /dev/null @@ -1,74 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef __ASM_SH_BUGS_H -#define __ASM_SH_BUGS_H - -/* - * This is included by init/main.c to check for architecture-dependent bugs. - * - * Needs: - * void check_bugs(void); - */ - -/* - * I don't know of any Super-H bugs yet. - */ - -#include <asm/processor.h> - -extern void select_idle_routine(void); - -static void __init check_bugs(void) -{ - extern unsigned long loops_per_jiffy; - char *p = &init_utsname()->machine[2]; /* "sh" */ - - select_idle_routine(); - - current_cpu_data.loops_per_jiffy = loops_per_jiffy; - - switch (current_cpu_data.family) { - case CPU_FAMILY_SH2: - *p++ = '2'; - break; - case CPU_FAMILY_SH2A: - *p++ = '2'; - *p++ = 'a'; - break; - case CPU_FAMILY_SH3: - *p++ = '3'; - break; - case CPU_FAMILY_SH4: - *p++ = '4'; - break; - case CPU_FAMILY_SH4A: - *p++ = '4'; - *p++ = 'a'; - break; - case CPU_FAMILY_SH4AL_DSP: - *p++ = '4'; - *p++ = 'a'; - *p++ = 'l'; - *p++ = '-'; - *p++ = 'd'; - *p++ = 's'; - *p++ = 'p'; - break; - case CPU_FAMILY_UNKNOWN: - /* - * Specifically use CPU_FAMILY_UNKNOWN rather than - * default:, so we're able to have the compiler whine - * about unhandled enumerations. - */ - break; - } - - printk("CPU: %s\n", get_cpu_subtype(¤t_cpu_data)); - -#ifndef __LITTLE_ENDIAN__ - /* 'eb' means 'Endian Big' */ - *p++ = 'e'; - *p++ = 'b'; -#endif - *p = '\0'; -} -#endif /* __ASM_SH_BUGS_H */ diff --git a/arch/sh/include/asm/cache.h b/arch/sh/include/asm/cache.h index 32dfa6b82ec6..b38dbc975581 100644 --- a/arch/sh/include/asm/cache.h +++ b/arch/sh/include/asm/cache.h @@ -14,6 +14,12 @@ #define L1_CACHE_BYTES (1 << L1_CACHE_SHIFT) +/* + * Some drivers need to perform DMA into kmalloc'ed buffers + * and so we have to increase the kmalloc minalign for this. + */ +#define ARCH_DMA_MINALIGN L1_CACHE_BYTES + #define __read_mostly __section(".data..read_mostly") #ifndef __ASSEMBLY__ diff --git a/arch/sh/include/asm/irq.h b/arch/sh/include/asm/irq.h index 1c4923502fd4..0f384b1f45ca 100644 --- a/arch/sh/include/asm/irq.h +++ b/arch/sh/include/asm/irq.h @@ -22,7 +22,6 @@ extern unsigned short *irq_mask_register; /* * PINT IRQs */ -void init_IRQ_pint(void); void make_imask_irq(unsigned int irq); static inline int generic_irq_demux(int irq) diff --git a/arch/sh/include/asm/page.h b/arch/sh/include/asm/page.h index 09ac6c7faee0..62f4b9edcb98 100644 --- a/arch/sh/include/asm/page.h +++ b/arch/sh/include/asm/page.h @@ -174,10 +174,4 @@ typedef struct page *pgtable_t; #include <asm-generic/memory_model.h> #include <asm-generic/getorder.h> -/* - * Some drivers need to perform DMA into kmalloc'ed buffers - * and so we have to increase the kmalloc minalign for this. - */ -#define ARCH_DMA_MINALIGN L1_CACHE_BYTES - #endif /* __ASM_SH_PAGE_H */ diff --git a/arch/sh/include/asm/processor.h b/arch/sh/include/asm/processor.h index 85a6c1c3c16e..73fba7c922f9 100644 --- a/arch/sh/include/asm/processor.h +++ b/arch/sh/include/asm/processor.h @@ -166,6 +166,8 @@ extern unsigned int instruction_size(unsigned int insn); #define instruction_size(insn) (2) #endif +void select_idle_routine(void); + #endif /* __ASSEMBLY__ */ #include <asm/processor_32.h> diff --git a/arch/sh/include/asm/rtc.h b/arch/sh/include/asm/rtc.h index 69dbae2949b0..7fe7002d1d50 100644 --- a/arch/sh/include/asm/rtc.h +++ b/arch/sh/include/asm/rtc.h @@ -2,8 +2,6 @@ #ifndef _ASM_RTC_H #define _ASM_RTC_H -void time_init(void); - #define RTC_CAP_4_DIGIT_YEAR (1 << 0) struct sh_rtc_platform_info { diff --git a/arch/sh/include/asm/thread_info.h b/arch/sh/include/asm/thread_info.h index 1400fbb8b423..9f19a682d315 100644 --- a/arch/sh/include/asm/thread_info.h +++ b/arch/sh/include/asm/thread_info.h @@ -84,9 +84,6 @@ static inline struct thread_info *current_thread_info(void) #define THREAD_SIZE_ORDER (THREAD_SHIFT - PAGE_SHIFT) -extern void arch_task_cache_init(void); -extern int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src); -extern void arch_release_task_struct(struct task_struct *tsk); extern void init_thread_xstate(void); #endif /* __ASSEMBLY__ */ diff --git a/arch/sh/kernel/idle.c b/arch/sh/kernel/idle.c index d662503b0665..045d93f151fd 100644 --- a/arch/sh/kernel/idle.c +++ b/arch/sh/kernel/idle.c @@ -15,6 +15,7 @@ #include <linux/irqflags.h> #include <linux/smp.h> #include <linux/atomic.h> +#include <asm/processor.h> #include <asm/smp.h> #include <asm/bl_bit.h> diff --git a/arch/sh/kernel/setup.c b/arch/sh/kernel/setup.c index af977ec4ca5e..b3da2757faaf 100644 --- a/arch/sh/kernel/setup.c +++ b/arch/sh/kernel/setup.c @@ -43,6 +43,7 @@ #include <asm/smp.h> #include <asm/mmu_context.h> #include <asm/mmzone.h> +#include <asm/processor.h> #include <asm/sparsemem.h> #include <asm/platform_early.h> @@ -304,9 +305,9 @@ void __init setup_arch(char **cmdline_p) bss_resource.end = virt_to_phys(__bss_stop)-1; #ifdef CONFIG_CMDLINE_OVERWRITE - strlcpy(command_line, CONFIG_CMDLINE, sizeof(command_line)); + strscpy(command_line, CONFIG_CMDLINE, sizeof(command_line)); #else - strlcpy(command_line, COMMAND_LINE, sizeof(command_line)); + strscpy(command_line, COMMAND_LINE, sizeof(command_line)); #ifdef CONFIG_CMDLINE_EXTEND strlcat(command_line, " ", sizeof(command_line)); strlcat(command_line, CONFIG_CMDLINE, sizeof(command_line)); @@ -354,3 +355,57 @@ int test_mode_pin(int pin) { return sh_mv.mv_mode_pins() & pin; } + +void __init arch_cpu_finalize_init(void) +{ + char *p = &init_utsname()->machine[2]; /* "sh" */ + + select_idle_routine(); + + current_cpu_data.loops_per_jiffy = loops_per_jiffy; + + switch (current_cpu_data.family) { + case CPU_FAMILY_SH2: + *p++ = '2'; + break; + case CPU_FAMILY_SH2A: + *p++ = '2'; + *p++ = 'a'; + break; + case CPU_FAMILY_SH3: + *p++ = '3'; + break; + case CPU_FAMILY_SH4: + *p++ = '4'; + break; + case CPU_FAMILY_SH4A: + *p++ = '4'; + *p++ = 'a'; + break; + case CPU_FAMILY_SH4AL_DSP: + *p++ = '4'; + *p++ = 'a'; + *p++ = 'l'; + *p++ = '-'; + *p++ = 'd'; + *p++ = 's'; + *p++ = 'p'; + break; + case CPU_FAMILY_UNKNOWN: + /* + * Specifically use CPU_FAMILY_UNKNOWN rather than + * default:, so we're able to have the compiler whine + * about unhandled enumerations. + */ + break; + } + + pr_info("CPU: %s\n", get_cpu_subtype(¤t_cpu_data)); + +#ifndef __LITTLE_ENDIAN__ + /* 'eb' means 'Endian Big' */ + *p++ = 'e'; + *p++ = 'b'; +#endif + *p = '\0'; +} diff --git a/arch/sh/kernel/syscalls/syscall.tbl b/arch/sh/kernel/syscalls/syscall.tbl index 2de85c977f54..97377e8c5025 100644 --- a/arch/sh/kernel/syscalls/syscall.tbl +++ b/arch/sh/kernel/syscalls/syscall.tbl @@ -453,3 +453,4 @@ 448 common process_mrelease sys_process_mrelease 449 common futex_waitv sys_futex_waitv 450 common set_mempolicy_home_node sys_set_mempolicy_home_node +451 common cachestat sys_cachestat diff --git a/arch/sh/mm/hugetlbpage.c b/arch/sh/mm/hugetlbpage.c index 999ab5916e69..6cb0ad73dbb9 100644 --- a/arch/sh/mm/hugetlbpage.c +++ b/arch/sh/mm/hugetlbpage.c @@ -38,7 +38,7 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, struct vm_area_struct *vma, if (pud) { pmd = pmd_alloc(mm, pud, addr); if (pmd) - pte = pte_alloc_map(mm, pmd, addr); + pte = pte_alloc_huge(mm, pmd, addr); } } } @@ -63,7 +63,7 @@ pte_t *huge_pte_offset(struct mm_struct *mm, if (pud) { pmd = pmd_offset(pud, addr); if (pmd) - pte = pte_offset_map(pmd, addr); + pte = pte_offset_huge(pmd, addr); } } } diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig index 8c196990558b..49849790e66d 100644 --- a/arch/sparc/Kconfig +++ b/arch/sparc/Kconfig @@ -33,7 +33,7 @@ config SPARC select ARCH_WANT_IPC_PARSE_VERSION select GENERIC_PCI_IOMAP select HAS_IOPORT - select HAVE_NMI_WATCHDOG if SPARC64 + select HAVE_HARDLOCKUP_DETECTOR_SPARC64 if SPARC64 select HAVE_CBPF_JIT if SPARC32 select HAVE_EBPF_JIT if SPARC64 select HAVE_DEBUG_BUGVERBOSE @@ -52,6 +52,7 @@ config SPARC config SPARC32 def_bool !64BIT select ARCH_32BIT_OFF_T + select ARCH_HAS_CPU_FINALIZE_INIT if !SMP select ARCH_HAS_SYNC_DMA_FOR_CPU select CLZ_TAB select DMA_DIRECT_REMAP diff --git a/arch/sparc/Kconfig.debug b/arch/sparc/Kconfig.debug index 6b2bec1888b3..37e003665de6 100644 --- a/arch/sparc/Kconfig.debug +++ b/arch/sparc/Kconfig.debug @@ -14,3 +14,17 @@ config FRAME_POINTER bool depends on MCOUNT default y + +config HAVE_HARDLOCKUP_DETECTOR_SPARC64 + bool + depends on HAVE_NMI + select HARDLOCKUP_DETECTOR_SPARC64 + help + Sparc64 hardlockup detector is the last one developed before adding + the common infrastructure for handling hardlockup detectors. It is + always built. It does _not_ use the common command line parameters + and sysctl interface, except for /proc/sys/kernel/nmi_watchdog. + +config HARDLOCKUP_DETECTOR_SPARC64 + bool + depends on HAVE_HARDLOCKUP_DETECTOR_SPARC64 diff --git a/arch/sparc/include/asm/atomic_32.h b/arch/sparc/include/asm/atomic_32.h index d775daa83d12..60ce2fe57fcd 100644 --- a/arch/sparc/include/asm/atomic_32.h +++ b/arch/sparc/include/asm/atomic_32.h @@ -19,17 +19,31 @@ #include <asm-generic/atomic64.h> int arch_atomic_add_return(int, atomic_t *); +#define arch_atomic_add_return arch_atomic_add_return + int arch_atomic_fetch_add(int, atomic_t *); +#define arch_atomic_fetch_add arch_atomic_fetch_add + int arch_atomic_fetch_and(int, atomic_t *); +#define arch_atomic_fetch_and arch_atomic_fetch_and + int arch_atomic_fetch_or(int, atomic_t *); +#define arch_atomic_fetch_or arch_atomic_fetch_or + int arch_atomic_fetch_xor(int, atomic_t *); +#define arch_atomic_fetch_xor arch_atomic_fetch_xor + int arch_atomic_cmpxchg(atomic_t *, int, int); +#define arch_atomic_cmpxchg arch_atomic_cmpxchg + int arch_atomic_xchg(atomic_t *, int); -int arch_atomic_fetch_add_unless(atomic_t *, int, int); -void arch_atomic_set(atomic_t *, int); +#define arch_atomic_xchg arch_atomic_xchg +int arch_atomic_fetch_add_unless(atomic_t *, int, int); #define arch_atomic_fetch_add_unless arch_atomic_fetch_add_unless +void arch_atomic_set(atomic_t *, int); + #define arch_atomic_set_release(v, i) arch_atomic_set((v), (i)) #define arch_atomic_read(v) READ_ONCE((v)->counter) diff --git a/arch/sparc/include/asm/atomic_64.h b/arch/sparc/include/asm/atomic_64.h index 077891686715..a5e9c37605a7 100644 --- a/arch/sparc/include/asm/atomic_64.h +++ b/arch/sparc/include/asm/atomic_64.h @@ -37,6 +37,16 @@ s64 arch_atomic64_fetch_##op(s64, atomic64_t *); ATOMIC_OPS(add) ATOMIC_OPS(sub) +#define arch_atomic_add_return arch_atomic_add_return +#define arch_atomic_sub_return arch_atomic_sub_return +#define arch_atomic_fetch_add arch_atomic_fetch_add +#define arch_atomic_fetch_sub arch_atomic_fetch_sub + +#define arch_atomic64_add_return arch_atomic64_add_return +#define arch_atomic64_sub_return arch_atomic64_sub_return +#define arch_atomic64_fetch_add arch_atomic64_fetch_add +#define arch_atomic64_fetch_sub arch_atomic64_fetch_sub + #undef ATOMIC_OPS #define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_FETCH_OP(op) @@ -44,22 +54,19 @@ ATOMIC_OPS(and) ATOMIC_OPS(or) ATOMIC_OPS(xor) +#define arch_atomic_fetch_and arch_atomic_fetch_and +#define arch_atomic_fetch_or arch_atomic_fetch_or +#define arch_atomic_fetch_xor arch_atomic_fetch_xor + +#define arch_atomic64_fetch_and arch_atomic64_fetch_and +#define arch_atomic64_fetch_or arch_atomic64_fetch_or +#define arch_atomic64_fetch_xor arch_atomic64_fetch_xor + #undef ATOMIC_OPS #undef ATOMIC_FETCH_OP #undef ATOMIC_OP_RETURN #undef ATOMIC_OP -#define arch_atomic_cmpxchg(v, o, n) (arch_cmpxchg(&((v)->counter), (o), (n))) - -static inline int arch_atomic_xchg(atomic_t *v, int new) -{ - return arch_xchg(&v->counter, new); -} - -#define arch_atomic64_cmpxchg(v, o, n) \ - ((__typeof__((v)->counter))arch_cmpxchg(&((v)->counter), (o), (n))) -#define arch_atomic64_xchg(v, new) (arch_xchg(&((v)->counter), new)) - s64 arch_atomic64_dec_if_positive(atomic64_t *v); #define arch_atomic64_dec_if_positive arch_atomic64_dec_if_positive diff --git a/arch/sparc/include/asm/bugs.h b/arch/sparc/include/asm/bugs.h deleted file mode 100644 index 02fa369b9c21..000000000000 --- a/arch/sparc/include/asm/bugs.h +++ /dev/null @@ -1,18 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* include/asm/bugs.h: Sparc probes for various bugs. - * - * Copyright (C) 1996, 2007 David S. Miller (davem@davemloft.net) - */ - -#ifdef CONFIG_SPARC32 -#include <asm/cpudata.h> -#endif - -extern unsigned long loops_per_jiffy; - -static void __init check_bugs(void) -{ -#if defined(CONFIG_SPARC32) && !defined(CONFIG_SMP) - cpu_data(0).udelay_val = loops_per_jiffy; -#endif -} diff --git a/arch/sparc/include/asm/irq_32.h b/arch/sparc/include/asm/irq_32.h index 43ec2609b811..6ee48321cbc2 100644 --- a/arch/sparc/include/asm/irq_32.h +++ b/arch/sparc/include/asm/irq_32.h @@ -17,7 +17,6 @@ #define irq_canonicalize(irq) (irq) -void __init init_IRQ(void); void __init sun4d_init_sbi_irq(void); #define NO_IRQ 0xffffffff diff --git a/arch/sparc/include/asm/irq_64.h b/arch/sparc/include/asm/irq_64.h index 154df2cf19f4..b436029f1ced 100644 --- a/arch/sparc/include/asm/irq_64.h +++ b/arch/sparc/include/asm/irq_64.h @@ -61,7 +61,6 @@ void sun4u_destroy_msi(unsigned int irq); unsigned int irq_alloc(unsigned int dev_handle, unsigned int dev_ino); void irq_free(unsigned int irq); -void __init init_IRQ(void); void fixup_irqs(void); static inline void set_softint(unsigned long bits) diff --git a/arch/sparc/include/asm/nmi.h b/arch/sparc/include/asm/nmi.h index 90ee7863d9fe..920dc23f443f 100644 --- a/arch/sparc/include/asm/nmi.h +++ b/arch/sparc/include/asm/nmi.h @@ -8,7 +8,6 @@ void nmi_adjust_hz(unsigned int new_hz); extern atomic_t nmi_active; -void arch_touch_nmi_watchdog(void); void start_nmi_watchdog(void *unused); void stop_nmi_watchdog(void *unused); diff --git a/arch/sparc/include/asm/timer_64.h b/arch/sparc/include/asm/timer_64.h index dcfad4613e18..ffff52c8b760 100644 --- a/arch/sparc/include/asm/timer_64.h +++ b/arch/sparc/include/asm/timer_64.h @@ -34,7 +34,6 @@ extern struct sparc64_tick_ops *tick_ops; unsigned long sparc64_get_clock_tick(unsigned int cpu); void setup_sparc64_timer(void); -void __init time_init(void); #define TICK_PRIV_BIT BIT(63) #define TICKCMP_IRQ_BIT BIT(63) diff --git a/arch/sparc/include/uapi/asm/socket.h b/arch/sparc/include/uapi/asm/socket.h index 2fda57a3ea86..682da3714686 100644 --- a/arch/sparc/include/uapi/asm/socket.h +++ b/arch/sparc/include/uapi/asm/socket.h @@ -130,6 +130,9 @@ #define SO_RCVMARK 0x0054 +#define SO_PASSPIDFD 0x0055 +#define SO_PEERPIDFD 0x0056 + #if !defined(__KERNEL__) diff --git a/arch/sparc/kernel/ioport.c b/arch/sparc/kernel/ioport.c index 4e4f3d3263e4..a8cbe403301f 100644 --- a/arch/sparc/kernel/ioport.c +++ b/arch/sparc/kernel/ioport.c @@ -191,7 +191,7 @@ static void __iomem *_sparc_alloc_io(unsigned int busno, unsigned long phys, tack += sizeof (struct resource); } - strlcpy(tack, name, XNMLN+1); + strscpy(tack, name, XNMLN+1); res->name = tack; va = _sparc_ioremap(res, busno, phys, size); diff --git a/arch/sparc/kernel/kernel.h b/arch/sparc/kernel/kernel.h index 9cd09a3ef35f..15da3c0597a5 100644 --- a/arch/sparc/kernel/kernel.h +++ b/arch/sparc/kernel/kernel.h @@ -91,7 +91,6 @@ extern int static_irq_count; extern spinlock_t irq_action_lock; void unexpected_irq(int irq, void *dev_id, struct pt_regs * regs); -void init_IRQ(void); /* sun4m_irq.c */ void sun4m_init_IRQ(void); diff --git a/arch/sparc/kernel/nmi.c b/arch/sparc/kernel/nmi.c index 060fff95a305..17cdfdbf1f3b 100644 --- a/arch/sparc/kernel/nmi.c +++ b/arch/sparc/kernel/nmi.c @@ -65,6 +65,11 @@ void arch_touch_nmi_watchdog(void) } EXPORT_SYMBOL(arch_touch_nmi_watchdog); +int __init watchdog_hardlockup_probe(void) +{ + return 0; +} + static void die_nmi(const char *str, struct pt_regs *regs, int do_panic) { int this_cpu = smp_processor_id(); @@ -282,11 +287,11 @@ __setup("nmi_watchdog=", setup_nmi_watchdog); * sparc specific NMI watchdog enable function. * Enables watchdog if it is not enabled already. */ -int watchdog_nmi_enable(unsigned int cpu) +void watchdog_hardlockup_enable(unsigned int cpu) { if (atomic_read(&nmi_active) == -1) { pr_warn("NMI watchdog cannot be enabled or disabled\n"); - return -1; + return; } /* @@ -295,17 +300,15 @@ int watchdog_nmi_enable(unsigned int cpu) * process first. */ if (!nmi_init_done) - return 0; + return; smp_call_function_single(cpu, start_nmi_watchdog, NULL, 1); - - return 0; } /* * sparc specific NMI watchdog disable function. * Disables watchdog if it is not disabled already. */ -void watchdog_nmi_disable(unsigned int cpu) +void watchdog_hardlockup_disable(unsigned int cpu) { if (atomic_read(&nmi_active) == -1) pr_warn_once("NMI watchdog cannot be enabled or disabled\n"); diff --git a/arch/sparc/kernel/setup_32.c b/arch/sparc/kernel/setup_32.c index c8e0dd99f370..1adf5c1c16b8 100644 --- a/arch/sparc/kernel/setup_32.c +++ b/arch/sparc/kernel/setup_32.c @@ -302,7 +302,7 @@ void __init setup_arch(char **cmdline_p) /* Initialize PROM console and command line. */ *cmdline_p = prom_getbootargs(); - strlcpy(boot_command_line, *cmdline_p, COMMAND_LINE_SIZE); + strscpy(boot_command_line, *cmdline_p, COMMAND_LINE_SIZE); parse_early_param(); boot_flags_init(*cmdline_p); @@ -412,3 +412,10 @@ static int __init topology_init(void) } subsys_initcall(topology_init); + +#if defined(CONFIG_SPARC32) && !defined(CONFIG_SMP) +void __init arch_cpu_finalize_init(void) +{ + cpu_data(0).udelay_val = loops_per_jiffy; +} +#endif diff --git a/arch/sparc/kernel/setup_64.c b/arch/sparc/kernel/setup_64.c index 48abee4eee29..6546ca9d4d3f 100644 --- a/arch/sparc/kernel/setup_64.c +++ b/arch/sparc/kernel/setup_64.c @@ -636,7 +636,7 @@ void __init setup_arch(char **cmdline_p) { /* Initialize PROM console and command line. */ *cmdline_p = prom_getbootargs(); - strlcpy(boot_command_line, *cmdline_p, COMMAND_LINE_SIZE); + strscpy(boot_command_line, *cmdline_p, COMMAND_LINE_SIZE); parse_early_param(); boot_flags_init(*cmdline_p); diff --git a/arch/sparc/kernel/signal32.c b/arch/sparc/kernel/signal32.c index dad38960d1a8..ca450c7bc53f 100644 --- a/arch/sparc/kernel/signal32.c +++ b/arch/sparc/kernel/signal32.c @@ -328,6 +328,8 @@ static void flush_signal_insns(unsigned long address) goto out_irqs_on; ptep = pte_offset_map(pmdp, address); + if (!ptep) + goto out_irqs_on; pte = *ptep; if (!pte_present(pte)) goto out_unmap; diff --git a/arch/sparc/kernel/syscalls/syscall.tbl b/arch/sparc/kernel/syscalls/syscall.tbl index 4398cc6fb68d..faa835f3c54a 100644 --- a/arch/sparc/kernel/syscalls/syscall.tbl +++ b/arch/sparc/kernel/syscalls/syscall.tbl @@ -496,3 +496,4 @@ 448 common process_mrelease sys_process_mrelease 449 common futex_waitv sys_futex_waitv 450 common set_mempolicy_home_node sys_set_mempolicy_home_node +451 common cachestat sys_cachestat diff --git a/arch/sparc/mm/fault_64.c b/arch/sparc/mm/fault_64.c index 69ff07bc6c07..e326caf708c6 100644 --- a/arch/sparc/mm/fault_64.c +++ b/arch/sparc/mm/fault_64.c @@ -99,6 +99,7 @@ static unsigned int get_user_insn(unsigned long tpc) local_irq_disable(); pmdp = pmd_offset(pudp, tpc); +again: if (pmd_none(*pmdp) || unlikely(pmd_bad(*pmdp))) goto out_irq_enable; @@ -115,6 +116,8 @@ static unsigned int get_user_insn(unsigned long tpc) #endif { ptep = pte_offset_map(pmdp, tpc); + if (!ptep) + goto again; pte = *ptep; if (pte_present(pte)) { pa = (pte_pfn(pte) << PAGE_SHIFT); diff --git a/arch/sparc/mm/hugetlbpage.c b/arch/sparc/mm/hugetlbpage.c index d8e0e3c7038d..d7018823206c 100644 --- a/arch/sparc/mm/hugetlbpage.c +++ b/arch/sparc/mm/hugetlbpage.c @@ -298,7 +298,7 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, struct vm_area_struct *vma, return NULL; if (sz >= PMD_SIZE) return (pte_t *)pmd; - return pte_alloc_map(mm, pmd, addr); + return pte_alloc_huge(mm, pmd, addr); } pte_t *huge_pte_offset(struct mm_struct *mm, @@ -325,7 +325,7 @@ pte_t *huge_pte_offset(struct mm_struct *mm, return NULL; if (is_hugetlb_pmd(*pmd)) return (pte_t *)pmd; - return pte_offset_map(pmd, addr); + return pte_offset_huge(pmd, addr); } void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, diff --git a/arch/sparc/mm/io-unit.c b/arch/sparc/mm/io-unit.c index bf3e6d2fe5d9..133dd42570d6 100644 --- a/arch/sparc/mm/io-unit.c +++ b/arch/sparc/mm/io-unit.c @@ -244,7 +244,7 @@ static void *iounit_alloc(struct device *dev, size_t len, long i; pmdp = pmd_off_k(addr); - ptep = pte_offset_map(pmdp, addr); + ptep = pte_offset_kernel(pmdp, addr); set_pte(ptep, mk_pte(virt_to_page(page), dvma_prot)); diff --git a/arch/sparc/mm/iommu.c b/arch/sparc/mm/iommu.c index 9e3f6933ca13..3a6caef68348 100644 --- a/arch/sparc/mm/iommu.c +++ b/arch/sparc/mm/iommu.c @@ -358,7 +358,7 @@ static void *sbus_iommu_alloc(struct device *dev, size_t len, __flush_page_to_ram(page); pmdp = pmd_off_k(addr); - ptep = pte_offset_map(pmdp, addr); + ptep = pte_offset_kernel(pmdp, addr); set_pte(ptep, mk_pte(virt_to_page(page), dvma_prot)); } diff --git a/arch/sparc/mm/tlb.c b/arch/sparc/mm/tlb.c index 9a725547578e..7ecf8556947a 100644 --- a/arch/sparc/mm/tlb.c +++ b/arch/sparc/mm/tlb.c @@ -149,6 +149,8 @@ static void tlb_batch_pmd_scan(struct mm_struct *mm, unsigned long vaddr, pte_t *pte; pte = pte_offset_map(&pmd, vaddr); + if (!pte) + return; end = vaddr + HPAGE_SIZE; while (vaddr < end) { if (pte_val(*pte) & _PAGE_VALID) { diff --git a/arch/sparc/prom/bootstr_32.c b/arch/sparc/prom/bootstr_32.c index e3b731ff00f0..1c7cd258b0dc 100644 --- a/arch/sparc/prom/bootstr_32.c +++ b/arch/sparc/prom/bootstr_32.c @@ -52,7 +52,7 @@ prom_getbootargs(void) * V3 PROM cannot supply as with more than 128 bytes * of an argument. But a smart bootstrap loader can. */ - strlcpy(barg_buf, *romvec->pv_v2bootargs.bootargs, sizeof(barg_buf)); + strscpy(barg_buf, *romvec->pv_v2bootargs.bootargs, sizeof(barg_buf)); break; default: break; diff --git a/arch/um/Kconfig b/arch/um/Kconfig index 541a9b18e343..b5e179360534 100644 --- a/arch/um/Kconfig +++ b/arch/um/Kconfig @@ -5,7 +5,7 @@ menu "UML-specific options" config UML bool default y - select ARCH_EPHEMERAL_INODES + select ARCH_HAS_CPU_FINALIZE_INIT select ARCH_HAS_FORTIFY_SOURCE select ARCH_HAS_GCOV_PROFILE_ALL select ARCH_HAS_KCOV diff --git a/arch/um/Makefile b/arch/um/Makefile index 8186d4761bda..da4d5256af2f 100644 --- a/arch/um/Makefile +++ b/arch/um/Makefile @@ -149,7 +149,7 @@ export CFLAGS_vmlinux := $(LINK-y) $(LINK_WRAPS) $(LD_FLAGS_CMDLINE) $(CC_FLAGS_ # When cleaning we don't include .config, so we don't include # TT or skas makefiles and don't clean skas_ptregs.h. CLEAN_FILES += linux x.i gmon.out -MRPROPER_FILES += arch/$(SUBARCH)/include/generated +MRPROPER_FILES += $(HOST_DIR)/include/generated archclean: @find . \( -name '*.bb' -o -name '*.bbg' -o -name '*.da' \ diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c index f4c1e6e97ad5..50206feac577 100644 --- a/arch/um/drivers/ubd_kern.c +++ b/arch/um/drivers/ubd_kern.c @@ -108,9 +108,9 @@ static inline void ubd_set_bit(__u64 bit, unsigned char *data) static DEFINE_MUTEX(ubd_lock); static DEFINE_MUTEX(ubd_mutex); /* replaces BKL, might not be needed */ -static int ubd_open(struct block_device *bdev, fmode_t mode); -static void ubd_release(struct gendisk *disk, fmode_t mode); -static int ubd_ioctl(struct block_device *bdev, fmode_t mode, +static int ubd_open(struct gendisk *disk, blk_mode_t mode); +static void ubd_release(struct gendisk *disk); +static int ubd_ioctl(struct block_device *bdev, blk_mode_t mode, unsigned int cmd, unsigned long arg); static int ubd_getgeo(struct block_device *bdev, struct hd_geometry *geo); @@ -1154,9 +1154,8 @@ static int __init ubd_driver_init(void){ device_initcall(ubd_driver_init); -static int ubd_open(struct block_device *bdev, fmode_t mode) +static int ubd_open(struct gendisk *disk, blk_mode_t mode) { - struct gendisk *disk = bdev->bd_disk; struct ubd *ubd_dev = disk->private_data; int err = 0; @@ -1171,19 +1170,12 @@ static int ubd_open(struct block_device *bdev, fmode_t mode) } ubd_dev->count++; set_disk_ro(disk, !ubd_dev->openflags.w); - - /* This should no more be needed. And it didn't work anyway to exclude - * read-write remounting of filesystems.*/ - /*if((mode & FMODE_WRITE) && !ubd_dev->openflags.w){ - if(--ubd_dev->count == 0) ubd_close_dev(ubd_dev); - err = -EROFS; - }*/ out: mutex_unlock(&ubd_mutex); return err; } -static void ubd_release(struct gendisk *disk, fmode_t mode) +static void ubd_release(struct gendisk *disk) { struct ubd *ubd_dev = disk->private_data; @@ -1397,7 +1389,7 @@ static int ubd_getgeo(struct block_device *bdev, struct hd_geometry *geo) return 0; } -static int ubd_ioctl(struct block_device *bdev, fmode_t mode, +static int ubd_ioctl(struct block_device *bdev, blk_mode_t mode, unsigned int cmd, unsigned long arg) { struct ubd *ubd_dev = bdev->bd_disk->private_data; diff --git a/arch/um/include/asm/bugs.h b/arch/um/include/asm/bugs.h deleted file mode 100644 index 4473942a0839..000000000000 --- a/arch/um/include/asm/bugs.h +++ /dev/null @@ -1,7 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef __UM_BUGS_H -#define __UM_BUGS_H - -void check_bugs(void); - -#endif diff --git a/arch/um/include/shared/user.h b/arch/um/include/shared/user.h index bda66e5a9d4e..0347a190429c 100644 --- a/arch/um/include/shared/user.h +++ b/arch/um/include/shared/user.h @@ -52,6 +52,7 @@ static inline int printk(const char *fmt, ...) extern int in_aton(char *str); extern size_t strlcpy(char *, const char *, size_t); extern size_t strlcat(char *, const char *, size_t); +extern size_t strscpy(char *, const char *, size_t); /* Copied from linux/compiler-gcc.h since we can't include it directly */ #define barrier() __asm__ __volatile__("": : :"memory") diff --git a/arch/um/kernel/um_arch.c b/arch/um/kernel/um_arch.c index 0a23a98d4ca0..918fed7ad4d8 100644 --- a/arch/um/kernel/um_arch.c +++ b/arch/um/kernel/um_arch.c @@ -3,6 +3,7 @@ * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) */ +#include <linux/cpu.h> #include <linux/delay.h> #include <linux/init.h> #include <linux/mm.h> @@ -430,7 +431,7 @@ void __init setup_arch(char **cmdline_p) } } -void __init check_bugs(void) +void __init arch_cpu_finalize_init(void) { arch_check_bugs(); os_check_bugs(); diff --git a/arch/um/os-Linux/drivers/tuntap_user.c b/arch/um/os-Linux/drivers/tuntap_user.c index 53eb3d508645..2284e9c1cbbb 100644 --- a/arch/um/os-Linux/drivers/tuntap_user.c +++ b/arch/um/os-Linux/drivers/tuntap_user.c @@ -146,7 +146,7 @@ static int tuntap_open(void *data) } memset(&ifr, 0, sizeof(ifr)); ifr.ifr_flags = IFF_TAP | IFF_NO_PI; - strlcpy(ifr.ifr_name, pri->dev_name, sizeof(ifr.ifr_name)); + strscpy(ifr.ifr_name, pri->dev_name, sizeof(ifr.ifr_name)); if (ioctl(pri->fd, TUNSETIFF, &ifr) < 0) { err = -errno; printk(UM_KERN_ERR "TUNSETIFF failed, errno = %d\n", diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index cb1031018afa..b3ebf58cf77e 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -71,6 +71,7 @@ config X86 select ARCH_HAS_ACPI_TABLE_UPGRADE if ACPI select ARCH_HAS_CACHE_LINE_SIZE select ARCH_HAS_CPU_CACHE_INVALIDATE_MEMREGION + select ARCH_HAS_CPU_FINALIZE_INIT select ARCH_HAS_CURRENT_STACK_POINTER select ARCH_HAS_DEBUG_VIRTUAL select ARCH_HAS_DEBUG_VM_PGTABLE if !X86_PAE @@ -274,7 +275,9 @@ config X86 select HAVE_UNSTABLE_SCHED_CLOCK select HAVE_USER_RETURN_NOTIFIER select HAVE_GENERIC_VDSO + select HOTPLUG_PARALLEL if SMP && X86_64 select HOTPLUG_SMT if SMP + select HOTPLUG_SPLIT_STARTUP if SMP && X86_32 select IRQ_FORCED_THREADING select LOCK_MM_AND_FIND_VMA select NEED_PER_CPU_EMBED_FIRST_CHUNK @@ -292,7 +295,6 @@ config X86 select TRACE_IRQFLAGS_NMI_SUPPORT select USER_STACKTRACE_SUPPORT select HAVE_ARCH_KCSAN if X86_64 - select X86_FEATURE_NAMES if PROC_FS select PROC_PID_ARCH_STATUS if PROC_FS select HAVE_ARCH_NODE_DEV_GROUP if X86_SGX select FUNCTION_ALIGNMENT_16B if X86_64 || X86_ALIGNMENT_16 @@ -442,17 +444,6 @@ config SMP If you don't know what to do here, say N. -config X86_FEATURE_NAMES - bool "Processor feature human-readable names" if EMBEDDED - default y - help - This option compiles in a table of x86 feature bits and corresponding - names. This is required to support /proc/cpuinfo and a few kernel - messages. You can disable this to save space, at the expense of - making those few kernel messages show numeric feature bits instead. - - If in doubt, say Y. - config X86_X2APIC bool "Support x2apic" depends on X86_LOCAL_APIC && X86_64 && (IRQ_REMAP || HYPERVISOR_GUEST) @@ -885,9 +876,11 @@ config INTEL_TDX_GUEST bool "Intel TDX (Trust Domain Extensions) - Guest Support" depends on X86_64 && CPU_SUP_INTEL depends on X86_X2APIC + depends on EFI_STUB select ARCH_HAS_CC_PLATFORM select X86_MEM_ENCRYPT select X86_MCE + select UNACCEPTED_MEMORY help Support running as a guest under Intel TDX. Without this support, the guest kernel can not boot or run under TDX. @@ -1542,11 +1535,13 @@ config X86_MEM_ENCRYPT config AMD_MEM_ENCRYPT bool "AMD Secure Memory Encryption (SME) support" depends on X86_64 && CPU_SUP_AMD + depends on EFI_STUB select DMA_COHERENT_POOL select ARCH_USE_MEMREMAP_PROT select INSTRUCTION_DECODER select ARCH_HAS_CC_PLATFORM select X86_MEM_ENCRYPT + select UNACCEPTED_MEMORY help Say yes to enable support for the encryption of system memory. This requires an AMD processor that supports Secure Memory @@ -2306,49 +2301,6 @@ config HOTPLUG_CPU def_bool y depends on SMP -config BOOTPARAM_HOTPLUG_CPU0 - bool "Set default setting of cpu0_hotpluggable" - depends on HOTPLUG_CPU - help - Set whether default state of cpu0_hotpluggable is on or off. - - Say Y here to enable CPU0 hotplug by default. If this switch - is turned on, there is no need to give cpu0_hotplug kernel - parameter and the CPU0 hotplug feature is enabled by default. - - Please note: there are two known CPU0 dependencies if you want - to enable the CPU0 hotplug feature either by this switch or by - cpu0_hotplug kernel parameter. - - First, resume from hibernate or suspend always starts from CPU0. - So hibernate and suspend are prevented if CPU0 is offline. - - Second dependency is PIC interrupts always go to CPU0. CPU0 can not - offline if any interrupt can not migrate out of CPU0. There may - be other CPU0 dependencies. - - Please make sure the dependencies are under your control before - you enable this feature. - - Say N if you don't want to enable CPU0 hotplug feature by default. - You still can enable the CPU0 hotplug feature at boot by kernel - parameter cpu0_hotplug. - -config DEBUG_HOTPLUG_CPU0 - def_bool n - prompt "Debug CPU0 hotplug" - depends on HOTPLUG_CPU - help - Enabling this option offlines CPU0 (if CPU0 can be offlined) as - soon as possible and boots up userspace with CPU0 offlined. User - can online CPU0 back after boot time. - - To debug CPU0 hotplug, you need to enable CPU0 offline/online - feature by either turning on CONFIG_BOOTPARAM_HOTPLUG_CPU0 during - compilation or giving cpu0_hotplug kernel parameter at boot. - - If unsure, say N. - config COMPAT_VDSO def_bool n prompt "Disable the 32-bit vDSO (needed for glibc 2.3.3)" diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu index 542377cd419d..00468adf180f 100644 --- a/arch/x86/Kconfig.cpu +++ b/arch/x86/Kconfig.cpu @@ -389,7 +389,7 @@ config IA32_FEAT_CTL config X86_VMX_FEATURE_NAMES def_bool y - depends on IA32_FEAT_CTL && X86_FEATURE_NAMES + depends on IA32_FEAT_CTL menuconfig PROCESSOR_SELECT bool "Supported processor vendors" if EXPERT diff --git a/arch/x86/Makefile b/arch/x86/Makefile index b39975977c03..fdc2e3abd615 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -305,6 +305,18 @@ ifeq ($(RETPOLINE_CFLAGS),) endif endif +ifdef CONFIG_UNWINDER_ORC +orc_hash_h := arch/$(SRCARCH)/include/generated/asm/orc_hash.h +orc_hash_sh := $(srctree)/scripts/orc_hash.sh +targets += $(orc_hash_h) +quiet_cmd_orc_hash = GEN $@ + cmd_orc_hash = mkdir -p $(dir $@); \ + $(CONFIG_SHELL) $(orc_hash_sh) < $< > $@ +$(orc_hash_h): $(srctree)/arch/x86/include/asm/orc_types.h $(orc_hash_sh) FORCE + $(call if_changed,orc_hash) +archprepare: $(orc_hash_h) +endif + archclean: $(Q)rm -rf $(objtree)/arch/i386 $(Q)rm -rf $(objtree)/arch/x86_64 diff --git a/arch/x86/Makefile.postlink b/arch/x86/Makefile.postlink new file mode 100644 index 000000000000..936093d29160 --- /dev/null +++ b/arch/x86/Makefile.postlink @@ -0,0 +1,47 @@ +# SPDX-License-Identifier: GPL-2.0 +# =========================================================================== +# Post-link x86 pass +# =========================================================================== +# +# 1. Separate relocations from vmlinux into vmlinux.relocs. +# 2. Strip relocations from vmlinux. + +PHONY := __archpost +__archpost: + +-include include/config/auto.conf +include $(srctree)/scripts/Kbuild.include + +CMD_RELOCS = arch/x86/tools/relocs +OUT_RELOCS = arch/x86/boot/compressed +quiet_cmd_relocs = RELOCS $(OUT_RELOCS)/$@.relocs + cmd_relocs = \ + mkdir -p $(OUT_RELOCS); \ + $(CMD_RELOCS) $@ > $(OUT_RELOCS)/$@.relocs; \ + $(CMD_RELOCS) --abs-relocs $@ + +quiet_cmd_strip_relocs = RSTRIP $@ + cmd_strip_relocs = \ + $(OBJCOPY) --remove-section='.rel.*' --remove-section='.rel__*' \ + --remove-section='.rela.*' --remove-section='.rela__*' $@ + +# `@true` prevents complaint when there is nothing to be done + +vmlinux: FORCE + @true +ifeq ($(CONFIG_X86_NEED_RELOCS),y) + $(call cmd,relocs) + $(call cmd,strip_relocs) +endif + +%.ko: FORCE + @true + +clean: + @rm -f $(OUT_RELOCS)/vmlinux.relocs + +PHONY += FORCE clean + +FORCE: + +.PHONY: $(PHONY) diff --git a/arch/x86/boot/Makefile b/arch/x86/boot/Makefile index 9e38ffaadb5d..f33e45ed1437 100644 --- a/arch/x86/boot/Makefile +++ b/arch/x86/boot/Makefile @@ -55,14 +55,12 @@ HOST_EXTRACFLAGS += -I$(srctree)/tools/include \ -include include/generated/autoconf.h \ -D__EXPORTED_HEADERS__ -ifdef CONFIG_X86_FEATURE_NAMES $(obj)/cpu.o: $(obj)/cpustr.h quiet_cmd_cpustr = CPUSTR $@ cmd_cpustr = $(obj)/mkcpustr > $@ $(obj)/cpustr.h: $(obj)/mkcpustr FORCE $(call if_changed,cpustr) -endif targets += cpustr.h # --------------------------------------------------------------------------- diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile index 6b6cfe607bdb..40d2ff503079 100644 --- a/arch/x86/boot/compressed/Makefile +++ b/arch/x86/boot/compressed/Makefile @@ -106,7 +106,8 @@ ifdef CONFIG_X86_64 endif vmlinux-objs-$(CONFIG_ACPI) += $(obj)/acpi.o -vmlinux-objs-$(CONFIG_INTEL_TDX_GUEST) += $(obj)/tdx.o $(obj)/tdcall.o +vmlinux-objs-$(CONFIG_INTEL_TDX_GUEST) += $(obj)/tdx.o $(obj)/tdcall.o $(obj)/tdx-shared.o +vmlinux-objs-$(CONFIG_UNACCEPTED_MEMORY) += $(obj)/mem.o vmlinux-objs-$(CONFIG_EFI) += $(obj)/efi.o vmlinux-objs-$(CONFIG_EFI_MIXED) += $(obj)/efi_mixed.o @@ -121,11 +122,9 @@ $(obj)/vmlinux.bin: vmlinux FORCE targets += $(patsubst $(obj)/%,%,$(vmlinux-objs-y)) vmlinux.bin.all vmlinux.relocs -CMD_RELOCS = arch/x86/tools/relocs -quiet_cmd_relocs = RELOCS $@ - cmd_relocs = $(CMD_RELOCS) $< > $@;$(CMD_RELOCS) --abs-relocs $< -$(obj)/vmlinux.relocs: vmlinux FORCE - $(call if_changed,relocs) +# vmlinux.relocs is created by the vmlinux postlink step. +$(obj)/vmlinux.relocs: vmlinux + @true vmlinux.bin.all-y := $(obj)/vmlinux.bin vmlinux.bin.all-$(CONFIG_X86_NEED_RELOCS) += $(obj)/vmlinux.relocs diff --git a/arch/x86/boot/compressed/efi.h b/arch/x86/boot/compressed/efi.h index 7db2f41b54cd..866c0af8b5b9 100644 --- a/arch/x86/boot/compressed/efi.h +++ b/arch/x86/boot/compressed/efi.h @@ -16,6 +16,7 @@ typedef guid_t efi_guid_t __aligned(__alignof__(u32)); #define ACPI_TABLE_GUID EFI_GUID(0xeb9d2d30, 0x2d88, 0x11d3, 0x9a, 0x16, 0x00, 0x90, 0x27, 0x3f, 0xc1, 0x4d) #define ACPI_20_TABLE_GUID EFI_GUID(0x8868e871, 0xe4f1, 0x11d3, 0xbc, 0x22, 0x00, 0x80, 0xc7, 0x3c, 0x88, 0x81) #define EFI_CC_BLOB_GUID EFI_GUID(0x067b1f5f, 0xcf26, 0x44c5, 0x85, 0x54, 0x93, 0xd7, 0x77, 0x91, 0x2d, 0x42) +#define LINUX_EFI_UNACCEPTED_MEM_TABLE_GUID EFI_GUID(0xd5d1de3c, 0x105c, 0x44f9, 0x9e, 0xa9, 0xbc, 0xef, 0x98, 0x12, 0x00, 0x31) #define EFI32_LOADER_SIGNATURE "EL32" #define EFI64_LOADER_SIGNATURE "EL64" @@ -32,6 +33,7 @@ typedef struct { } efi_table_hdr_t; #define EFI_CONVENTIONAL_MEMORY 7 +#define EFI_UNACCEPTED_MEMORY 15 #define EFI_MEMORY_MORE_RELIABLE \ ((u64)0x0000000000010000ULL) /* higher reliability */ @@ -104,6 +106,14 @@ struct efi_setup_data { u64 reserved[8]; }; +struct efi_unaccepted_memory { + u32 version; + u32 unit_size; + u64 phys_base; + u64 size; + unsigned long bitmap[]; +}; + static inline int efi_guidcmp (efi_guid_t left, efi_guid_t right) { return memcmp(&left, &right, sizeof (efi_guid_t)); diff --git a/arch/x86/boot/compressed/error.c b/arch/x86/boot/compressed/error.c index c881878e56d3..5313c5cb2b80 100644 --- a/arch/x86/boot/compressed/error.c +++ b/arch/x86/boot/compressed/error.c @@ -22,3 +22,22 @@ void error(char *m) while (1) asm("hlt"); } + +/* EFI libstub provides vsnprintf() */ +#ifdef CONFIG_EFI_STUB +void panic(const char *fmt, ...) +{ + static char buf[1024]; + va_list args; + int len; + + va_start(args, fmt); + len = vsnprintf(buf, sizeof(buf), fmt, args); + va_end(args); + + if (len && buf[len - 1] == '\n') + buf[len - 1] = '\0'; + + error(buf); +} +#endif diff --git a/arch/x86/boot/compressed/error.h b/arch/x86/boot/compressed/error.h index 1de5821184f1..86fe33b93715 100644 --- a/arch/x86/boot/compressed/error.h +++ b/arch/x86/boot/compressed/error.h @@ -6,5 +6,6 @@ void warn(char *m); void error(char *m) __noreturn; +void panic(const char *fmt, ...) __noreturn __cold; #endif /* BOOT_COMPRESSED_ERROR_H */ diff --git a/arch/x86/boot/compressed/kaslr.c b/arch/x86/boot/compressed/kaslr.c index 454757fbdfe5..9193acf0e9cd 100644 --- a/arch/x86/boot/compressed/kaslr.c +++ b/arch/x86/boot/compressed/kaslr.c @@ -672,6 +672,33 @@ static bool process_mem_region(struct mem_vector *region, } #ifdef CONFIG_EFI + +/* + * Only EFI_CONVENTIONAL_MEMORY and EFI_UNACCEPTED_MEMORY (if supported) are + * guaranteed to be free. + * + * Pick free memory more conservatively than the EFI spec allows: according to + * the spec, EFI_BOOT_SERVICES_{CODE|DATA} are also free memory and thus + * available to place the kernel image into, but in practice there's firmware + * where using that memory leads to crashes. Buggy vendor EFI code registers + * for an event that triggers on SetVirtualAddressMap(). The handler assumes + * that EFI_BOOT_SERVICES_DATA memory has not been touched by loader yet, which + * is probably true for Windows. + * + * Preserve EFI_BOOT_SERVICES_* regions until after SetVirtualAddressMap(). + */ +static inline bool memory_type_is_free(efi_memory_desc_t *md) +{ + if (md->type == EFI_CONVENTIONAL_MEMORY) + return true; + + if (IS_ENABLED(CONFIG_UNACCEPTED_MEMORY) && + md->type == EFI_UNACCEPTED_MEMORY) + return true; + + return false; +} + /* * Returns true if we processed the EFI memmap, which we prefer over the E820 * table if it is available. @@ -716,18 +743,7 @@ process_efi_entries(unsigned long minimum, unsigned long image_size) for (i = 0; i < nr_desc; i++) { md = efi_early_memdesc_ptr(pmap, e->efi_memdesc_size, i); - /* - * Here we are more conservative in picking free memory than - * the EFI spec allows: - * - * According to the spec, EFI_BOOT_SERVICES_{CODE|DATA} are also - * free memory and thus available to place the kernel image into, - * but in practice there's firmware where using that memory leads - * to crashes. - * - * Only EFI_CONVENTIONAL_MEMORY is guaranteed to be free. - */ - if (md->type != EFI_CONVENTIONAL_MEMORY) + if (!memory_type_is_free(md)) continue; if (efi_soft_reserve_enabled() && diff --git a/arch/x86/boot/compressed/mem.c b/arch/x86/boot/compressed/mem.c new file mode 100644 index 000000000000..3c1609245f2a --- /dev/null +++ b/arch/x86/boot/compressed/mem.c @@ -0,0 +1,86 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#include "error.h" +#include "misc.h" +#include "tdx.h" +#include "sev.h" +#include <asm/shared/tdx.h> + +/* + * accept_memory() and process_unaccepted_memory() called from EFI stub which + * runs before decompresser and its early_tdx_detect(). + * + * Enumerate TDX directly from the early users. + */ +static bool early_is_tdx_guest(void) +{ + static bool once; + static bool is_tdx; + + if (!IS_ENABLED(CONFIG_INTEL_TDX_GUEST)) + return false; + + if (!once) { + u32 eax, sig[3]; + + cpuid_count(TDX_CPUID_LEAF_ID, 0, &eax, + &sig[0], &sig[2], &sig[1]); + is_tdx = !memcmp(TDX_IDENT, sig, sizeof(sig)); + once = true; + } + + return is_tdx; +} + +void arch_accept_memory(phys_addr_t start, phys_addr_t end) +{ + /* Platform-specific memory-acceptance call goes here */ + if (early_is_tdx_guest()) { + if (!tdx_accept_memory(start, end)) + panic("TDX: Failed to accept memory\n"); + } else if (sev_snp_enabled()) { + snp_accept_memory(start, end); + } else { + error("Cannot accept memory: unknown platform\n"); + } +} + +bool init_unaccepted_memory(void) +{ + guid_t guid = LINUX_EFI_UNACCEPTED_MEM_TABLE_GUID; + struct efi_unaccepted_memory *table; + unsigned long cfg_table_pa; + unsigned int cfg_table_len; + enum efi_type et; + int ret; + + et = efi_get_type(boot_params); + if (et == EFI_TYPE_NONE) + return false; + + ret = efi_get_conf_table(boot_params, &cfg_table_pa, &cfg_table_len); + if (ret) { + warn("EFI config table not found."); + return false; + } + + table = (void *)efi_find_vendor_table(boot_params, cfg_table_pa, + cfg_table_len, guid); + if (!table) + return false; + + if (table->version != 1) + error("Unknown version of unaccepted memory table\n"); + + /* + * In many cases unaccepted_table is already set by EFI stub, but it + * has to be initialized again to cover cases when the table is not + * allocated by EFI stub or EFI stub copied the kernel image with + * efi_relocate_kernel() before the variable is set. + * + * It must be initialized before the first usage of accept_memory(). + */ + unaccepted_table = table; + + return true; +} diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c index 014ff222bf4b..94b7abcf624b 100644 --- a/arch/x86/boot/compressed/misc.c +++ b/arch/x86/boot/compressed/misc.c @@ -455,6 +455,12 @@ asmlinkage __visible void *extract_kernel(void *rmode, memptr heap, #endif debug_putstr("\nDecompressing Linux... "); + + if (init_unaccepted_memory()) { + debug_putstr("Accepting memory... "); + accept_memory(__pa(output), __pa(output) + needed_size); + } + __decompress(input_data, input_len, NULL, NULL, output, output_len, NULL, error); entry_offset = parse_elf(output); diff --git a/arch/x86/boot/compressed/misc.h b/arch/x86/boot/compressed/misc.h index 2f155a0e3041..964fe903a1cd 100644 --- a/arch/x86/boot/compressed/misc.h +++ b/arch/x86/boot/compressed/misc.h @@ -247,4 +247,14 @@ static inline unsigned long efi_find_vendor_table(struct boot_params *bp, } #endif /* CONFIG_EFI */ +#ifdef CONFIG_UNACCEPTED_MEMORY +bool init_unaccepted_memory(void); +#else +static inline bool init_unaccepted_memory(void) { return false; } +#endif + +/* Defined in EFI stub */ +extern struct efi_unaccepted_memory *unaccepted_table; +void accept_memory(phys_addr_t start, phys_addr_t end); + #endif /* BOOT_COMPRESSED_MISC_H */ diff --git a/arch/x86/boot/compressed/sev.c b/arch/x86/boot/compressed/sev.c index 014b89c89088..09dc8c187b3c 100644 --- a/arch/x86/boot/compressed/sev.c +++ b/arch/x86/boot/compressed/sev.c @@ -115,7 +115,7 @@ static enum es_result vc_read_mem(struct es_em_ctxt *ctxt, /* Include code for early handlers */ #include "../../kernel/sev-shared.c" -static inline bool sev_snp_enabled(void) +bool sev_snp_enabled(void) { return sev_status & MSR_AMD64_SEV_SNP_ENABLED; } @@ -181,6 +181,58 @@ static bool early_setup_ghcb(void) return true; } +static phys_addr_t __snp_accept_memory(struct snp_psc_desc *desc, + phys_addr_t pa, phys_addr_t pa_end) +{ + struct psc_hdr *hdr; + struct psc_entry *e; + unsigned int i; + + hdr = &desc->hdr; + memset(hdr, 0, sizeof(*hdr)); + + e = desc->entries; + + i = 0; + while (pa < pa_end && i < VMGEXIT_PSC_MAX_ENTRY) { + hdr->end_entry = i; + + e->gfn = pa >> PAGE_SHIFT; + e->operation = SNP_PAGE_STATE_PRIVATE; + if (IS_ALIGNED(pa, PMD_SIZE) && (pa_end - pa) >= PMD_SIZE) { + e->pagesize = RMP_PG_SIZE_2M; + pa += PMD_SIZE; + } else { + e->pagesize = RMP_PG_SIZE_4K; + pa += PAGE_SIZE; + } + + e++; + i++; + } + + if (vmgexit_psc(boot_ghcb, desc)) + sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_PSC); + + pvalidate_pages(desc); + + return pa; +} + +void snp_accept_memory(phys_addr_t start, phys_addr_t end) +{ + struct snp_psc_desc desc = {}; + unsigned int i; + phys_addr_t pa; + + if (!boot_ghcb && !early_setup_ghcb()) + sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_PSC); + + pa = start; + while (pa < end) + pa = __snp_accept_memory(&desc, pa, end); +} + void sev_es_shutdown_ghcb(void) { if (!boot_ghcb) diff --git a/arch/x86/boot/compressed/sev.h b/arch/x86/boot/compressed/sev.h new file mode 100644 index 000000000000..fc725a981b09 --- /dev/null +++ b/arch/x86/boot/compressed/sev.h @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * AMD SEV header for early boot related functions. + * + * Author: Tom Lendacky <thomas.lendacky@amd.com> + */ + +#ifndef BOOT_COMPRESSED_SEV_H +#define BOOT_COMPRESSED_SEV_H + +#ifdef CONFIG_AMD_MEM_ENCRYPT + +bool sev_snp_enabled(void); +void snp_accept_memory(phys_addr_t start, phys_addr_t end); + +#else + +static inline bool sev_snp_enabled(void) { return false; } +static inline void snp_accept_memory(phys_addr_t start, phys_addr_t end) { } + +#endif + +#endif diff --git a/arch/x86/boot/compressed/tdx-shared.c b/arch/x86/boot/compressed/tdx-shared.c new file mode 100644 index 000000000000..5ac43762fe13 --- /dev/null +++ b/arch/x86/boot/compressed/tdx-shared.c @@ -0,0 +1,2 @@ +#include "error.h" +#include "../../coco/tdx/tdx-shared.c" diff --git a/arch/x86/boot/compressed/tdx.c b/arch/x86/boot/compressed/tdx.c index 2d81d3cc72a1..8841b945a1e2 100644 --- a/arch/x86/boot/compressed/tdx.c +++ b/arch/x86/boot/compressed/tdx.c @@ -20,7 +20,7 @@ static inline unsigned int tdx_io_in(int size, u16 port) { struct tdx_hypercall_args args = { .r10 = TDX_HYPERCALL_STANDARD, - .r11 = EXIT_REASON_IO_INSTRUCTION, + .r11 = hcall_func(EXIT_REASON_IO_INSTRUCTION), .r12 = size, .r13 = 0, .r14 = port, @@ -36,7 +36,7 @@ static inline void tdx_io_out(int size, u16 port, u32 value) { struct tdx_hypercall_args args = { .r10 = TDX_HYPERCALL_STANDARD, - .r11 = EXIT_REASON_IO_INSTRUCTION, + .r11 = hcall_func(EXIT_REASON_IO_INSTRUCTION), .r12 = size, .r13 = 1, .r14 = port, diff --git a/arch/x86/boot/cpu.c b/arch/x86/boot/cpu.c index 0bbf4f3707d2..feb6dbd7ca86 100644 --- a/arch/x86/boot/cpu.c +++ b/arch/x86/boot/cpu.c @@ -14,9 +14,7 @@ */ #include "boot.h" -#ifdef CONFIG_X86_FEATURE_NAMES #include "cpustr.h" -#endif static char *cpu_name(int level) { @@ -35,7 +33,6 @@ static char *cpu_name(int level) static void show_cap_strs(u32 *err_flags) { int i, j; -#ifdef CONFIG_X86_FEATURE_NAMES const unsigned char *msg_strs = (const unsigned char *)x86_cap_strs; for (i = 0; i < NCAPINTS; i++) { u32 e = err_flags[i]; @@ -58,16 +55,6 @@ static void show_cap_strs(u32 *err_flags) e >>= 1; } } -#else - for (i = 0; i < NCAPINTS; i++) { - u32 e = err_flags[i]; - for (j = 0; j < 32; j++) { - if (e & 1) - printf("%d:%d ", i, j); - e >>= 1; - } - } -#endif } int validate_cpu(void) diff --git a/arch/x86/coco/core.c b/arch/x86/coco/core.c index 73f83233d25d..eeec9986570e 100644 --- a/arch/x86/coco/core.c +++ b/arch/x86/coco/core.c @@ -13,10 +13,10 @@ #include <asm/coco.h> #include <asm/processor.h> -enum cc_vendor cc_vendor __ro_after_init; +enum cc_vendor cc_vendor __ro_after_init = CC_VENDOR_NONE; static u64 cc_mask __ro_after_init; -static bool intel_cc_platform_has(enum cc_attr attr) +static bool noinstr intel_cc_platform_has(enum cc_attr attr) { switch (attr) { case CC_ATTR_GUEST_UNROLL_STRING_IO: @@ -34,7 +34,7 @@ static bool intel_cc_platform_has(enum cc_attr attr) * the other levels of SME/SEV functionality, including C-bit * based SEV-SNP, are not enabled. */ -static __maybe_unused bool amd_cc_platform_vtom(enum cc_attr attr) +static __maybe_unused __always_inline bool amd_cc_platform_vtom(enum cc_attr attr) { switch (attr) { case CC_ATTR_GUEST_MEM_ENCRYPT: @@ -58,7 +58,7 @@ static __maybe_unused bool amd_cc_platform_vtom(enum cc_attr attr) * the trampoline area must be encrypted. */ -static bool amd_cc_platform_has(enum cc_attr attr) +static bool noinstr amd_cc_platform_has(enum cc_attr attr) { #ifdef CONFIG_AMD_MEM_ENCRYPT @@ -97,7 +97,7 @@ static bool amd_cc_platform_has(enum cc_attr attr) #endif } -bool cc_platform_has(enum cc_attr attr) +bool noinstr cc_platform_has(enum cc_attr attr) { switch (cc_vendor) { case CC_VENDOR_AMD: diff --git a/arch/x86/coco/tdx/Makefile b/arch/x86/coco/tdx/Makefile index 46c55998557d..2c7dcbf1458b 100644 --- a/arch/x86/coco/tdx/Makefile +++ b/arch/x86/coco/tdx/Makefile @@ -1,3 +1,3 @@ # SPDX-License-Identifier: GPL-2.0 -obj-y += tdx.o tdcall.o +obj-y += tdx.o tdx-shared.o tdcall.o diff --git a/arch/x86/coco/tdx/tdx-shared.c b/arch/x86/coco/tdx/tdx-shared.c new file mode 100644 index 000000000000..ef20ddc37b58 --- /dev/null +++ b/arch/x86/coco/tdx/tdx-shared.c @@ -0,0 +1,71 @@ +#include <asm/tdx.h> +#include <asm/pgtable.h> + +static unsigned long try_accept_one(phys_addr_t start, unsigned long len, + enum pg_level pg_level) +{ + unsigned long accept_size = page_level_size(pg_level); + u64 tdcall_rcx; + u8 page_size; + + if (!IS_ALIGNED(start, accept_size)) + return 0; + + if (len < accept_size) + return 0; + + /* + * Pass the page physical address to the TDX module to accept the + * pending, private page. + * + * Bits 2:0 of RCX encode page size: 0 - 4K, 1 - 2M, 2 - 1G. + */ + switch (pg_level) { + case PG_LEVEL_4K: + page_size = 0; + break; + case PG_LEVEL_2M: + page_size = 1; + break; + case PG_LEVEL_1G: + page_size = 2; + break; + default: + return 0; + } + + tdcall_rcx = start | page_size; + if (__tdx_module_call(TDX_ACCEPT_PAGE, tdcall_rcx, 0, 0, 0, NULL)) + return 0; + + return accept_size; +} + +bool tdx_accept_memory(phys_addr_t start, phys_addr_t end) +{ + /* + * For shared->private conversion, accept the page using + * TDX_ACCEPT_PAGE TDX module call. + */ + while (start < end) { + unsigned long len = end - start; + unsigned long accept_size; + + /* + * Try larger accepts first. It gives chance to VMM to keep + * 1G/2M Secure EPT entries where possible and speeds up + * process by cutting number of hypercalls (if successful). + */ + + accept_size = try_accept_one(start, len, PG_LEVEL_1G); + if (!accept_size) + accept_size = try_accept_one(start, len, PG_LEVEL_2M); + if (!accept_size) + accept_size = try_accept_one(start, len, PG_LEVEL_4K); + if (!accept_size) + return false; + start += accept_size; + } + + return true; +} diff --git a/arch/x86/coco/tdx/tdx.c b/arch/x86/coco/tdx/tdx.c index e146b599260f..1d6b863c42b0 100644 --- a/arch/x86/coco/tdx/tdx.c +++ b/arch/x86/coco/tdx/tdx.c @@ -14,20 +14,6 @@ #include <asm/insn-eval.h> #include <asm/pgtable.h> -/* TDX module Call Leaf IDs */ -#define TDX_GET_INFO 1 -#define TDX_GET_VEINFO 3 -#define TDX_GET_REPORT 4 -#define TDX_ACCEPT_PAGE 6 -#define TDX_WR 8 - -/* TDCS fields. To be used by TDG.VM.WR and TDG.VM.RD module calls */ -#define TDCS_NOTIFY_ENABLES 0x9100000000000010 - -/* TDX hypercall Leaf IDs */ -#define TDVMCALL_MAP_GPA 0x10001 -#define TDVMCALL_REPORT_FATAL_ERROR 0x10003 - /* MMIO direction */ #define EPT_READ 0 #define EPT_WRITE 1 @@ -51,24 +37,6 @@ #define TDREPORT_SUBTYPE_0 0 -/* - * Wrapper for standard use of __tdx_hypercall with no output aside from - * return code. - */ -static inline u64 _tdx_hypercall(u64 fn, u64 r12, u64 r13, u64 r14, u64 r15) -{ - struct tdx_hypercall_args args = { - .r10 = TDX_HYPERCALL_STANDARD, - .r11 = fn, - .r12 = r12, - .r13 = r13, - .r14 = r14, - .r15 = r15, - }; - - return __tdx_hypercall(&args); -} - /* Called from __tdx_hypercall() for unrecoverable failure */ noinstr void __tdx_hypercall_failed(void) { @@ -76,17 +44,6 @@ noinstr void __tdx_hypercall_failed(void) panic("TDVMCALL failed. TDX module bug?"); } -/* - * The TDG.VP.VMCALL-Instruction-execution sub-functions are defined - * independently from but are currently matched 1:1 with VMX EXIT_REASONs. - * Reusing the KVM EXIT_REASON macros makes it easier to connect the host and - * guest sides of these calls. - */ -static __always_inline u64 hcall_func(u64 exit_reason) -{ - return exit_reason; -} - #ifdef CONFIG_KVM_GUEST long tdx_kvm_hypercall(unsigned int nr, unsigned long p1, unsigned long p2, unsigned long p3, unsigned long p4) @@ -745,47 +702,6 @@ static bool tdx_cache_flush_required(void) return true; } -static bool try_accept_one(phys_addr_t *start, unsigned long len, - enum pg_level pg_level) -{ - unsigned long accept_size = page_level_size(pg_level); - u64 tdcall_rcx; - u8 page_size; - - if (!IS_ALIGNED(*start, accept_size)) - return false; - - if (len < accept_size) - return false; - - /* - * Pass the page physical address to the TDX module to accept the - * pending, private page. - * - * Bits 2:0 of RCX encode page size: 0 - 4K, 1 - 2M, 2 - 1G. - */ - switch (pg_level) { - case PG_LEVEL_4K: - page_size = 0; - break; - case PG_LEVEL_2M: - page_size = 1; - break; - case PG_LEVEL_1G: - page_size = 2; - break; - default: - return false; - } - - tdcall_rcx = *start | page_size; - if (__tdx_module_call(TDX_ACCEPT_PAGE, tdcall_rcx, 0, 0, 0, NULL)) - return false; - - *start += accept_size; - return true; -} - /* * Inform the VMM of the guest's intent for this physical page: shared with * the VMM or private to the guest. The VMM is expected to change its mapping @@ -810,33 +726,34 @@ static bool tdx_enc_status_changed(unsigned long vaddr, int numpages, bool enc) if (_tdx_hypercall(TDVMCALL_MAP_GPA, start, end - start, 0, 0)) return false; - /* private->shared conversion requires only MapGPA call */ - if (!enc) - return true; + /* shared->private conversion requires memory to be accepted before use */ + if (enc) + return tdx_accept_memory(start, end); + + return true; +} +static bool tdx_enc_status_change_prepare(unsigned long vaddr, int numpages, + bool enc) +{ /* - * For shared->private conversion, accept the page using - * TDX_ACCEPT_PAGE TDX module call. + * Only handle shared->private conversion here. + * See the comment in tdx_early_init(). */ - while (start < end) { - unsigned long len = end - start; - - /* - * Try larger accepts first. It gives chance to VMM to keep - * 1G/2M SEPT entries where possible and speeds up process by - * cutting number of hypercalls (if successful). - */ - - if (try_accept_one(&start, len, PG_LEVEL_1G)) - continue; - - if (try_accept_one(&start, len, PG_LEVEL_2M)) - continue; - - if (!try_accept_one(&start, len, PG_LEVEL_4K)) - return false; - } + if (enc) + return tdx_enc_status_changed(vaddr, numpages, enc); + return true; +} +static bool tdx_enc_status_change_finish(unsigned long vaddr, int numpages, + bool enc) +{ + /* + * Only handle private->shared conversion here. + * See the comment in tdx_early_init(). + */ + if (!enc) + return tdx_enc_status_changed(vaddr, numpages, enc); return true; } @@ -852,7 +769,7 @@ void __init tdx_early_init(void) setup_force_cpu_cap(X86_FEATURE_TDX_GUEST); - cc_set_vendor(CC_VENDOR_INTEL); + cc_vendor = CC_VENDOR_INTEL; tdx_parse_tdinfo(&cc_mask); cc_set_mask(cc_mask); @@ -867,9 +784,41 @@ void __init tdx_early_init(void) */ physical_mask &= cc_mask - 1; - x86_platform.guest.enc_cache_flush_required = tdx_cache_flush_required; - x86_platform.guest.enc_tlb_flush_required = tdx_tlb_flush_required; - x86_platform.guest.enc_status_change_finish = tdx_enc_status_changed; + /* + * The kernel mapping should match the TDX metadata for the page. + * load_unaligned_zeropad() can touch memory *adjacent* to that which is + * owned by the caller and can catch even _momentary_ mismatches. Bad + * things happen on mismatch: + * + * - Private mapping => Shared Page == Guest shutdown + * - Shared mapping => Private Page == Recoverable #VE + * + * guest.enc_status_change_prepare() converts the page from + * shared=>private before the mapping becomes private. + * + * guest.enc_status_change_finish() converts the page from + * private=>shared after the mapping becomes private. + * + * In both cases there is a temporary shared mapping to a private page, + * which can result in a #VE. But, there is never a private mapping to + * a shared page. + */ + x86_platform.guest.enc_status_change_prepare = tdx_enc_status_change_prepare; + x86_platform.guest.enc_status_change_finish = tdx_enc_status_change_finish; + + x86_platform.guest.enc_cache_flush_required = tdx_cache_flush_required; + x86_platform.guest.enc_tlb_flush_required = tdx_tlb_flush_required; + + /* + * TDX intercepts the RDMSR to read the X2APIC ID in the parallel + * bringup low level code. That raises #VE which cannot be handled + * there. + * + * Intel-TDX has a secure RDMSR hypercall, but that needs to be + * implemented seperately in the low level startup ASM code. + * Until that is in place, disable parallel bringup for TDX. + */ + x86_cpuinit.parallel_bringup = false; pr_info("Guest detected\n"); } diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl index 320480a8db4f..bc0a3c941b35 100644 --- a/arch/x86/entry/syscalls/syscall_32.tbl +++ b/arch/x86/entry/syscalls/syscall_32.tbl @@ -455,3 +455,4 @@ 448 i386 process_mrelease sys_process_mrelease 449 i386 futex_waitv sys_futex_waitv 450 i386 set_mempolicy_home_node sys_set_mempolicy_home_node +451 i386 cachestat sys_cachestat diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl index c84d12608cd2..227538b0ce80 100644 --- a/arch/x86/entry/syscalls/syscall_64.tbl +++ b/arch/x86/entry/syscalls/syscall_64.tbl @@ -372,6 +372,7 @@ 448 common process_mrelease sys_process_mrelease 449 common futex_waitv sys_futex_waitv 450 common set_mempolicy_home_node sys_set_mempolicy_home_node +451 common cachestat sys_cachestat # # Due to a historical design error, certain syscalls are numbered differently diff --git a/arch/x86/entry/thunk_64.S b/arch/x86/entry/thunk_64.S index 5e37f41e5f14..27b5da2111ac 100644 --- a/arch/x86/entry/thunk_64.S +++ b/arch/x86/entry/thunk_64.S @@ -26,17 +26,7 @@ SYM_FUNC_START(\name) pushq %r11 call \func - jmp __thunk_restore -SYM_FUNC_END(\name) - _ASM_NOKPROBE(\name) - .endm - - THUNK preempt_schedule_thunk, preempt_schedule - THUNK preempt_schedule_notrace_thunk, preempt_schedule_notrace - EXPORT_SYMBOL(preempt_schedule_thunk) - EXPORT_SYMBOL(preempt_schedule_notrace_thunk) -SYM_CODE_START_LOCAL(__thunk_restore) popq %r11 popq %r10 popq %r9 @@ -48,5 +38,11 @@ SYM_CODE_START_LOCAL(__thunk_restore) popq %rdi popq %rbp RET - _ASM_NOKPROBE(__thunk_restore) -SYM_CODE_END(__thunk_restore) +SYM_FUNC_END(\name) + _ASM_NOKPROBE(\name) + .endm + +THUNK preempt_schedule_thunk, preempt_schedule +THUNK preempt_schedule_notrace_thunk, preempt_schedule_notrace +EXPORT_SYMBOL(preempt_schedule_thunk) +EXPORT_SYMBOL(preempt_schedule_notrace_thunk) diff --git a/arch/x86/entry/vdso/vgetcpu.c b/arch/x86/entry/vdso/vgetcpu.c index 0a9007c24056..e4640306b2e3 100644 --- a/arch/x86/entry/vdso/vgetcpu.c +++ b/arch/x86/entry/vdso/vgetcpu.c @@ -8,6 +8,7 @@ #include <linux/kernel.h> #include <linux/getcpu.h> #include <asm/segment.h> +#include <vdso/processor.h> notrace long __vdso_getcpu(unsigned *cpu, unsigned *node, struct getcpu_cache *unused) diff --git a/arch/x86/events/amd/core.c b/arch/x86/events/amd/core.c index bccea57dee81..abadd5f23425 100644 --- a/arch/x86/events/amd/core.c +++ b/arch/x86/events/amd/core.c @@ -374,7 +374,7 @@ static int amd_pmu_hw_config(struct perf_event *event) /* pass precise event sampling to ibs: */ if (event->attr.precise_ip && get_ibs_caps()) - return -ENOENT; + return forward_event_to_ibs(event); if (has_branch_stack(event) && !x86_pmu.lbr_nr) return -EOPNOTSUPP; diff --git a/arch/x86/events/amd/ibs.c b/arch/x86/events/amd/ibs.c index 64582954b5f6..371014802191 100644 --- a/arch/x86/events/amd/ibs.c +++ b/arch/x86/events/amd/ibs.c @@ -190,7 +190,7 @@ static struct perf_ibs *get_ibs_pmu(int type) } /* - * Use IBS for precise event sampling: + * core pmu config -> IBS config * * perf record -a -e cpu-cycles:p ... # use ibs op counting cycle count * perf record -a -e r076:p ... # same as -e cpu-cycles:p @@ -199,25 +199,9 @@ static struct perf_ibs *get_ibs_pmu(int type) * IbsOpCntCtl (bit 19) of IBS Execution Control Register (IbsOpCtl, * MSRC001_1033) is used to select either cycle or micro-ops counting * mode. - * - * The rip of IBS samples has skid 0. Thus, IBS supports precise - * levels 1 and 2 and the PERF_EFLAGS_EXACT is set. In rare cases the - * rip is invalid when IBS was not able to record the rip correctly. - * We clear PERF_EFLAGS_EXACT and take the rip from pt_regs then. - * */ -static int perf_ibs_precise_event(struct perf_event *event, u64 *config) +static int core_pmu_ibs_config(struct perf_event *event, u64 *config) { - switch (event->attr.precise_ip) { - case 0: - return -ENOENT; - case 1: - case 2: - break; - default: - return -EOPNOTSUPP; - } - switch (event->attr.type) { case PERF_TYPE_HARDWARE: switch (event->attr.config) { @@ -243,22 +227,37 @@ static int perf_ibs_precise_event(struct perf_event *event, u64 *config) return -EOPNOTSUPP; } +/* + * The rip of IBS samples has skid 0. Thus, IBS supports precise + * levels 1 and 2 and the PERF_EFLAGS_EXACT is set. In rare cases the + * rip is invalid when IBS was not able to record the rip correctly. + * We clear PERF_EFLAGS_EXACT and take the rip from pt_regs then. + */ +int forward_event_to_ibs(struct perf_event *event) +{ + u64 config = 0; + + if (!event->attr.precise_ip || event->attr.precise_ip > 2) + return -EOPNOTSUPP; + + if (!core_pmu_ibs_config(event, &config)) { + event->attr.type = perf_ibs_op.pmu.type; + event->attr.config = config; + } + return -ENOENT; +} + static int perf_ibs_init(struct perf_event *event) { struct hw_perf_event *hwc = &event->hw; struct perf_ibs *perf_ibs; u64 max_cnt, config; - int ret; perf_ibs = get_ibs_pmu(event->attr.type); - if (perf_ibs) { - config = event->attr.config; - } else { - perf_ibs = &perf_ibs_op; - ret = perf_ibs_precise_event(event, &config); - if (ret) - return ret; - } + if (!perf_ibs) + return -ENOENT; + + config = event->attr.config; if (event->pmu != &perf_ibs->pmu) return -ENOENT; diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index 89b9c1cebb61..a149fafad813 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -349,6 +349,16 @@ static struct event_constraint intel_spr_event_constraints[] = { EVENT_CONSTRAINT_END }; +static struct extra_reg intel_gnr_extra_regs[] __read_mostly = { + INTEL_UEVENT_EXTRA_REG(0x012a, MSR_OFFCORE_RSP_0, 0x3fffffffffull, RSP_0), + INTEL_UEVENT_EXTRA_REG(0x012b, MSR_OFFCORE_RSP_1, 0x3fffffffffull, RSP_1), + INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x01cd), + INTEL_UEVENT_EXTRA_REG(0x02c6, MSR_PEBS_FRONTEND, 0x9, FE), + INTEL_UEVENT_EXTRA_REG(0x03c6, MSR_PEBS_FRONTEND, 0x7fff1f, FE), + INTEL_UEVENT_EXTRA_REG(0x40ad, MSR_PEBS_FRONTEND, 0x7, FE), + INTEL_UEVENT_EXTRA_REG(0x04c2, MSR_PEBS_FRONTEND, 0x8, FE), + EVENT_EXTRA_END +}; EVENT_ATTR_STR(mem-loads, mem_ld_nhm, "event=0x0b,umask=0x10,ldlat=3"); EVENT_ATTR_STR(mem-loads, mem_ld_snb, "event=0xcd,umask=0x1,ldlat=3"); @@ -2451,7 +2461,7 @@ static void intel_pmu_disable_fixed(struct perf_event *event) intel_clear_masks(event, idx); - mask = 0xfULL << ((idx - INTEL_PMC_IDX_FIXED) * 4); + mask = intel_fixed_bits_by_idx(idx - INTEL_PMC_IDX_FIXED, INTEL_FIXED_BITS_MASK); cpuc->fixed_ctrl_val &= ~mask; } @@ -2750,25 +2760,25 @@ static void intel_pmu_enable_fixed(struct perf_event *event) * if requested: */ if (!event->attr.precise_ip) - bits |= 0x8; + bits |= INTEL_FIXED_0_ENABLE_PMI; if (hwc->config & ARCH_PERFMON_EVENTSEL_USR) - bits |= 0x2; + bits |= INTEL_FIXED_0_USER; if (hwc->config & ARCH_PERFMON_EVENTSEL_OS) - bits |= 0x1; + bits |= INTEL_FIXED_0_KERNEL; /* * ANY bit is supported in v3 and up */ if (x86_pmu.version > 2 && hwc->config & ARCH_PERFMON_EVENTSEL_ANY) - bits |= 0x4; + bits |= INTEL_FIXED_0_ANYTHREAD; idx -= INTEL_PMC_IDX_FIXED; - bits <<= (idx * 4); - mask = 0xfULL << (idx * 4); + bits = intel_fixed_bits_by_idx(idx, bits); + mask = intel_fixed_bits_by_idx(idx, INTEL_FIXED_BITS_MASK); if (x86_pmu.intel_cap.pebs_baseline && event->attr.precise_ip) { - bits |= ICL_FIXED_0_ADAPTIVE << (idx * 4); - mask |= ICL_FIXED_0_ADAPTIVE << (idx * 4); + bits |= intel_fixed_bits_by_idx(idx, ICL_FIXED_0_ADAPTIVE); + mask |= intel_fixed_bits_by_idx(idx, ICL_FIXED_0_ADAPTIVE); } cpuc->fixed_ctrl_val &= ~mask; @@ -6496,6 +6506,7 @@ __init int intel_pmu_init(void) case INTEL_FAM6_SAPPHIRERAPIDS_X: case INTEL_FAM6_EMERALDRAPIDS_X: x86_pmu.flags |= PMU_FL_MEM_LOADS_AUX; + x86_pmu.extra_regs = intel_spr_extra_regs; fallthrough; case INTEL_FAM6_GRANITERAPIDS_X: case INTEL_FAM6_GRANITERAPIDS_D: @@ -6506,7 +6517,8 @@ __init int intel_pmu_init(void) x86_pmu.event_constraints = intel_spr_event_constraints; x86_pmu.pebs_constraints = intel_spr_pebs_event_constraints; - x86_pmu.extra_regs = intel_spr_extra_regs; + if (!x86_pmu.extra_regs) + x86_pmu.extra_regs = intel_gnr_extra_regs; x86_pmu.limit_period = spr_limit_period; x86_pmu.pebs_ept = 1; x86_pmu.pebs_aliases = NULL; @@ -6650,6 +6662,7 @@ __init int intel_pmu_init(void) pmu->pebs_constraints = intel_grt_pebs_event_constraints; pmu->extra_regs = intel_grt_extra_regs; if (is_mtl(boot_cpu_data.x86_model)) { + x86_pmu.hybrid_pmu[X86_HYBRID_PMU_CORE_IDX].extra_regs = intel_gnr_extra_regs; x86_pmu.pebs_latency_data = mtl_latency_data_small; extra_attr = boot_cpu_has(X86_FEATURE_RTM) ? mtl_hybrid_extra_attr_rtm : mtl_hybrid_extra_attr; diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c index a5f9474f08e1..6c04b52f139b 100644 --- a/arch/x86/hyperv/hv_init.c +++ b/arch/x86/hyperv/hv_init.c @@ -416,7 +416,7 @@ void __init hyperv_init(void) goto free_vp_assist_page; } - cpuhp = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "x86/hyperv_init:online", + cpuhp = cpuhp_setup_state(CPUHP_AP_HYPERV_ONLINE, "x86/hyperv_init:online", hv_cpu_init, hv_cpu_die); if (cpuhp < 0) goto free_ghcb_page; diff --git a/arch/x86/hyperv/hv_vtl.c b/arch/x86/hyperv/hv_vtl.c index 1ba5d3b99b16..85d38b9f3586 100644 --- a/arch/x86/hyperv/hv_vtl.c +++ b/arch/x86/hyperv/hv_vtl.c @@ -20,6 +20,8 @@ void __init hv_vtl_init_platform(void) { pr_info("Linux runs in Hyper-V Virtual Trust Level\n"); + x86_platform.realmode_reserve = x86_init_noop; + x86_platform.realmode_init = x86_init_noop; x86_init.irqs.pre_vector_init = x86_init_noop; x86_init.timers.timer_init = x86_init_noop; diff --git a/arch/x86/hyperv/ivm.c b/arch/x86/hyperv/ivm.c index cc92388b7a99..14f46ad2ca64 100644 --- a/arch/x86/hyperv/ivm.c +++ b/arch/x86/hyperv/ivm.c @@ -17,6 +17,7 @@ #include <asm/mem_encrypt.h> #include <asm/mshyperv.h> #include <asm/hypervisor.h> +#include <asm/mtrr.h> #ifdef CONFIG_AMD_MEM_ENCRYPT @@ -364,7 +365,7 @@ void __init hv_vtom_init(void) * Set it here to indicate a vTOM VM. */ sev_status = MSR_AMD64_SNP_VTOM; - cc_set_vendor(CC_VENDOR_AMD); + cc_vendor = CC_VENDOR_AMD; cc_set_mask(ms_hyperv.shared_gpa_boundary); physical_mask &= ms_hyperv.shared_gpa_boundary - 1; @@ -372,6 +373,9 @@ void __init hv_vtom_init(void) x86_platform.guest.enc_cache_flush_required = hv_vtom_cache_flush_required; x86_platform.guest.enc_tlb_flush_required = hv_vtom_tlb_flush_required; x86_platform.guest.enc_status_change_finish = hv_vtom_set_host_visibility; + + /* Set WB as the default cache mode. */ + mtrr_overwrite_state(NULL, 0, MTRR_TYPE_WRBACK); } #endif /* CONFIG_AMD_MEM_ENCRYPT */ diff --git a/arch/x86/include/asm/Kbuild b/arch/x86/include/asm/Kbuild index 1e51650b79d7..4f1ce5fc4e19 100644 --- a/arch/x86/include/asm/Kbuild +++ b/arch/x86/include/asm/Kbuild @@ -1,6 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 +generated-y += orc_hash.h generated-y += syscalls_32.h generated-y += syscalls_64.h generated-y += syscalls_x32.h diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h index d7da28fada87..6c15a622ad60 100644 --- a/arch/x86/include/asm/alternative.h +++ b/arch/x86/include/asm/alternative.h @@ -113,7 +113,6 @@ extern void callthunks_patch_builtin_calls(void); extern void callthunks_patch_module_calls(struct callthunk_sites *sites, struct module *mod); extern void *callthunks_translate_call_dest(void *dest); -extern bool is_callthunk(void *addr); extern int x86_call_depth_emit_accounting(u8 **pprog, void *func); #else static __always_inline void callthunks_patch_builtin_calls(void) {} @@ -124,10 +123,6 @@ static __always_inline void *callthunks_translate_call_dest(void *dest) { return dest; } -static __always_inline bool is_callthunk(void *addr) -{ - return false; -} static __always_inline int x86_call_depth_emit_accounting(u8 **pprog, void *func) { diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index 3216da7074ba..98c32aa5963a 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -55,6 +55,8 @@ extern int local_apic_timer_c2_ok; extern int disable_apic; extern unsigned int lapic_timer_period; +extern int cpuid_to_apicid[]; + extern enum apic_intr_mode_id apic_intr_mode; enum apic_intr_mode_id { APIC_PIC, @@ -377,7 +379,6 @@ extern struct apic *__apicdrivers[], *__apicdrivers_end[]; * APIC functionality to boot other CPUs - only used on SMP: */ #ifdef CONFIG_SMP -extern int wakeup_secondary_cpu_via_nmi(int apicid, unsigned long start_eip); extern int lapic_can_unplug_cpu(void); #endif @@ -507,10 +508,8 @@ extern int default_check_phys_apicid_present(int phys_apicid); #endif /* CONFIG_X86_LOCAL_APIC */ #ifdef CONFIG_SMP -bool apic_id_is_primary_thread(unsigned int id); void apic_smt_update(void); #else -static inline bool apic_id_is_primary_thread(unsigned int id) { return false; } static inline void apic_smt_update(void) { } #endif diff --git a/arch/x86/include/asm/apicdef.h b/arch/x86/include/asm/apicdef.h index 68d213e83fcc..4b125e5b3187 100644 --- a/arch/x86/include/asm/apicdef.h +++ b/arch/x86/include/asm/apicdef.h @@ -2,6 +2,8 @@ #ifndef _ASM_X86_APICDEF_H #define _ASM_X86_APICDEF_H +#include <linux/bits.h> + /* * Constants for various Intel APICs. (local APIC, IOAPIC, etc.) * @@ -138,9 +140,10 @@ #define APIC_EILVT_MASKED (1 << 16) #define APIC_BASE (fix_to_virt(FIX_APIC_BASE)) -#define APIC_BASE_MSR 0x800 -#define XAPIC_ENABLE (1UL << 11) -#define X2APIC_ENABLE (1UL << 10) +#define APIC_BASE_MSR 0x800 +#define APIC_X2APIC_ID_MSR 0x802 +#define XAPIC_ENABLE BIT(11) +#define X2APIC_ENABLE BIT(10) #ifdef CONFIG_X86_32 # define MAX_IO_APICS 64 @@ -162,6 +165,7 @@ #define APIC_CPUID(apicid) ((apicid) & XAPIC_DEST_CPUS_MASK) #define NUM_APIC_CLUSTERS ((BAD_APICID + 1) >> XAPIC_DEST_CPUS_SHIFT) +#ifndef __ASSEMBLY__ /* * the local APIC register structure, memory mapped. Not terribly well * tested, but we might eventually use this one in the future - the @@ -435,4 +439,5 @@ enum apic_delivery_modes { APIC_DELIVERY_MODE_EXTINT = 7, }; +#endif /* !__ASSEMBLY__ */ #endif /* _ASM_X86_APICDEF_H */ diff --git a/arch/x86/include/asm/atomic.h b/arch/x86/include/asm/atomic.h index 5e754e895767..55a55ec04350 100644 --- a/arch/x86/include/asm/atomic.h +++ b/arch/x86/include/asm/atomic.h @@ -14,12 +14,6 @@ * resource counting etc.. */ -/** - * arch_atomic_read - read atomic variable - * @v: pointer of type atomic_t - * - * Atomically reads the value of @v. - */ static __always_inline int arch_atomic_read(const atomic_t *v) { /* @@ -29,25 +23,11 @@ static __always_inline int arch_atomic_read(const atomic_t *v) return __READ_ONCE((v)->counter); } -/** - * arch_atomic_set - set atomic variable - * @v: pointer of type atomic_t - * @i: required value - * - * Atomically sets the value of @v to @i. - */ static __always_inline void arch_atomic_set(atomic_t *v, int i) { __WRITE_ONCE(v->counter, i); } -/** - * arch_atomic_add - add integer to atomic variable - * @i: integer value to add - * @v: pointer of type atomic_t - * - * Atomically adds @i to @v. - */ static __always_inline void arch_atomic_add(int i, atomic_t *v) { asm volatile(LOCK_PREFIX "addl %1,%0" @@ -55,13 +35,6 @@ static __always_inline void arch_atomic_add(int i, atomic_t *v) : "ir" (i) : "memory"); } -/** - * arch_atomic_sub - subtract integer from atomic variable - * @i: integer value to subtract - * @v: pointer of type atomic_t - * - * Atomically subtracts @i from @v. - */ static __always_inline void arch_atomic_sub(int i, atomic_t *v) { asm volatile(LOCK_PREFIX "subl %1,%0" @@ -69,27 +42,12 @@ static __always_inline void arch_atomic_sub(int i, atomic_t *v) : "ir" (i) : "memory"); } -/** - * arch_atomic_sub_and_test - subtract value from variable and test result - * @i: integer value to subtract - * @v: pointer of type atomic_t - * - * Atomically subtracts @i from @v and returns - * true if the result is zero, or false for all - * other cases. - */ static __always_inline bool arch_atomic_sub_and_test(int i, atomic_t *v) { return GEN_BINARY_RMWcc(LOCK_PREFIX "subl", v->counter, e, "er", i); } #define arch_atomic_sub_and_test arch_atomic_sub_and_test -/** - * arch_atomic_inc - increment atomic variable - * @v: pointer of type atomic_t - * - * Atomically increments @v by 1. - */ static __always_inline void arch_atomic_inc(atomic_t *v) { asm volatile(LOCK_PREFIX "incl %0" @@ -97,12 +55,6 @@ static __always_inline void arch_atomic_inc(atomic_t *v) } #define arch_atomic_inc arch_atomic_inc -/** - * arch_atomic_dec - decrement atomic variable - * @v: pointer of type atomic_t - * - * Atomically decrements @v by 1. - */ static __always_inline void arch_atomic_dec(atomic_t *v) { asm volatile(LOCK_PREFIX "decl %0" @@ -110,69 +62,30 @@ static __always_inline void arch_atomic_dec(atomic_t *v) } #define arch_atomic_dec arch_atomic_dec -/** - * arch_atomic_dec_and_test - decrement and test - * @v: pointer of type atomic_t - * - * Atomically decrements @v by 1 and - * returns true if the result is 0, or false for all other - * cases. - */ static __always_inline bool arch_atomic_dec_and_test(atomic_t *v) { return GEN_UNARY_RMWcc(LOCK_PREFIX "decl", v->counter, e); } #define arch_atomic_dec_and_test arch_atomic_dec_and_test -/** - * arch_atomic_inc_and_test - increment and test - * @v: pointer of type atomic_t - * - * Atomically increments @v by 1 - * and returns true if the result is zero, or false for all - * other cases. - */ static __always_inline bool arch_atomic_inc_and_test(atomic_t *v) { return GEN_UNARY_RMWcc(LOCK_PREFIX "incl", v->counter, e); } #define arch_atomic_inc_and_test arch_atomic_inc_and_test -/** - * arch_atomic_add_negative - add and test if negative - * @i: integer value to add - * @v: pointer of type atomic_t - * - * Atomically adds @i to @v and returns true - * if the result is negative, or false when - * result is greater than or equal to zero. - */ static __always_inline bool arch_atomic_add_negative(int i, atomic_t *v) { return GEN_BINARY_RMWcc(LOCK_PREFIX "addl", v->counter, s, "er", i); } #define arch_atomic_add_negative arch_atomic_add_negative -/** - * arch_atomic_add_return - add integer and return - * @i: integer value to add - * @v: pointer of type atomic_t - * - * Atomically adds @i to @v and returns @i + @v - */ static __always_inline int arch_atomic_add_return(int i, atomic_t *v) { return i + xadd(&v->counter, i); } #define arch_atomic_add_return arch_atomic_add_return -/** - * arch_atomic_sub_return - subtract integer and return - * @v: pointer of type atomic_t - * @i: integer value to subtract - * - * Atomically subtracts @i from @v and returns @v - @i - */ static __always_inline int arch_atomic_sub_return(int i, atomic_t *v) { return arch_atomic_add_return(-i, v); diff --git a/arch/x86/include/asm/atomic64_32.h b/arch/x86/include/asm/atomic64_32.h index 808b4eece251..3486d91b8595 100644 --- a/arch/x86/include/asm/atomic64_32.h +++ b/arch/x86/include/asm/atomic64_32.h @@ -61,30 +61,12 @@ ATOMIC64_DECL(add_unless); #undef __ATOMIC64_DECL #undef ATOMIC64_EXPORT -/** - * arch_atomic64_cmpxchg - cmpxchg atomic64 variable - * @v: pointer to type atomic64_t - * @o: expected value - * @n: new value - * - * Atomically sets @v to @n if it was equal to @o and returns - * the old value. - */ - static __always_inline s64 arch_atomic64_cmpxchg(atomic64_t *v, s64 o, s64 n) { return arch_cmpxchg64(&v->counter, o, n); } #define arch_atomic64_cmpxchg arch_atomic64_cmpxchg -/** - * arch_atomic64_xchg - xchg atomic64 variable - * @v: pointer to type atomic64_t - * @n: value to assign - * - * Atomically xchgs the value of @v to @n and returns - * the old value. - */ static __always_inline s64 arch_atomic64_xchg(atomic64_t *v, s64 n) { s64 o; @@ -97,13 +79,6 @@ static __always_inline s64 arch_atomic64_xchg(atomic64_t *v, s64 n) } #define arch_atomic64_xchg arch_atomic64_xchg -/** - * arch_atomic64_set - set atomic64 variable - * @v: pointer to type atomic64_t - * @i: value to assign - * - * Atomically sets the value of @v to @n. - */ static __always_inline void arch_atomic64_set(atomic64_t *v, s64 i) { unsigned high = (unsigned)(i >> 32); @@ -113,12 +88,6 @@ static __always_inline void arch_atomic64_set(atomic64_t *v, s64 i) : "eax", "edx", "memory"); } -/** - * arch_atomic64_read - read atomic64 variable - * @v: pointer to type atomic64_t - * - * Atomically reads the value of @v and returns it. - */ static __always_inline s64 arch_atomic64_read(const atomic64_t *v) { s64 r; @@ -126,13 +95,6 @@ static __always_inline s64 arch_atomic64_read(const atomic64_t *v) return r; } -/** - * arch_atomic64_add_return - add and return - * @i: integer value to add - * @v: pointer to type atomic64_t - * - * Atomically adds @i to @v and returns @i + *@v - */ static __always_inline s64 arch_atomic64_add_return(s64 i, atomic64_t *v) { alternative_atomic64(add_return, @@ -142,9 +104,6 @@ static __always_inline s64 arch_atomic64_add_return(s64 i, atomic64_t *v) } #define arch_atomic64_add_return arch_atomic64_add_return -/* - * Other variants with different arithmetic operators: - */ static __always_inline s64 arch_atomic64_sub_return(s64 i, atomic64_t *v) { alternative_atomic64(sub_return, @@ -172,13 +131,6 @@ static __always_inline s64 arch_atomic64_dec_return(atomic64_t *v) } #define arch_atomic64_dec_return arch_atomic64_dec_return -/** - * arch_atomic64_add - add integer to atomic64 variable - * @i: integer value to add - * @v: pointer to type atomic64_t - * - * Atomically adds @i to @v. - */ static __always_inline s64 arch_atomic64_add(s64 i, atomic64_t *v) { __alternative_atomic64(add, add_return, @@ -187,13 +139,6 @@ static __always_inline s64 arch_atomic64_add(s64 i, atomic64_t *v) return i; } -/** - * arch_atomic64_sub - subtract the atomic64 variable - * @i: integer value to subtract - * @v: pointer to type atomic64_t - * - * Atomically subtracts @i from @v. - */ static __always_inline s64 arch_atomic64_sub(s64 i, atomic64_t *v) { __alternative_atomic64(sub, sub_return, @@ -202,12 +147,6 @@ static __always_inline s64 arch_atomic64_sub(s64 i, atomic64_t *v) return i; } -/** - * arch_atomic64_inc - increment atomic64 variable - * @v: pointer to type atomic64_t - * - * Atomically increments @v by 1. - */ static __always_inline void arch_atomic64_inc(atomic64_t *v) { __alternative_atomic64(inc, inc_return, /* no output */, @@ -215,12 +154,6 @@ static __always_inline void arch_atomic64_inc(atomic64_t *v) } #define arch_atomic64_inc arch_atomic64_inc -/** - * arch_atomic64_dec - decrement atomic64 variable - * @v: pointer to type atomic64_t - * - * Atomically decrements @v by 1. - */ static __always_inline void arch_atomic64_dec(atomic64_t *v) { __alternative_atomic64(dec, dec_return, /* no output */, @@ -228,15 +161,6 @@ static __always_inline void arch_atomic64_dec(atomic64_t *v) } #define arch_atomic64_dec arch_atomic64_dec -/** - * arch_atomic64_add_unless - add unless the number is a given value - * @v: pointer of type atomic64_t - * @a: the amount to add to v... - * @u: ...unless v is equal to u. - * - * Atomically adds @a to @v, so long as it was not @u. - * Returns non-zero if the add was done, zero otherwise. - */ static __always_inline int arch_atomic64_add_unless(atomic64_t *v, s64 a, s64 u) { unsigned low = (unsigned)u; diff --git a/arch/x86/include/asm/atomic64_64.h b/arch/x86/include/asm/atomic64_64.h index c496595bf601..3165c0feedf7 100644 --- a/arch/x86/include/asm/atomic64_64.h +++ b/arch/x86/include/asm/atomic64_64.h @@ -10,37 +10,16 @@ #define ATOMIC64_INIT(i) { (i) } -/** - * arch_atomic64_read - read atomic64 variable - * @v: pointer of type atomic64_t - * - * Atomically reads the value of @v. - * Doesn't imply a read memory barrier. - */ static __always_inline s64 arch_atomic64_read(const atomic64_t *v) { return __READ_ONCE((v)->counter); } -/** - * arch_atomic64_set - set atomic64 variable - * @v: pointer to type atomic64_t - * @i: required value - * - * Atomically sets the value of @v to @i. - */ static __always_inline void arch_atomic64_set(atomic64_t *v, s64 i) { __WRITE_ONCE(v->counter, i); } -/** - * arch_atomic64_add - add integer to atomic64 variable - * @i: integer value to add - * @v: pointer to type atomic64_t - * - * Atomically adds @i to @v. - */ static __always_inline void arch_atomic64_add(s64 i, atomic64_t *v) { asm volatile(LOCK_PREFIX "addq %1,%0" @@ -48,13 +27,6 @@ static __always_inline void arch_atomic64_add(s64 i, atomic64_t *v) : "er" (i), "m" (v->counter) : "memory"); } -/** - * arch_atomic64_sub - subtract the atomic64 variable - * @i: integer value to subtract - * @v: pointer to type atomic64_t - * - * Atomically subtracts @i from @v. - */ static __always_inline void arch_atomic64_sub(s64 i, atomic64_t *v) { asm volatile(LOCK_PREFIX "subq %1,%0" @@ -62,27 +34,12 @@ static __always_inline void arch_atomic64_sub(s64 i, atomic64_t *v) : "er" (i), "m" (v->counter) : "memory"); } -/** - * arch_atomic64_sub_and_test - subtract value from variable and test result - * @i: integer value to subtract - * @v: pointer to type atomic64_t - * - * Atomically subtracts @i from @v and returns - * true if the result is zero, or false for all - * other cases. - */ static __always_inline bool arch_atomic64_sub_and_test(s64 i, atomic64_t *v) { return GEN_BINARY_RMWcc(LOCK_PREFIX "subq", v->counter, e, "er", i); } #define arch_atomic64_sub_and_test arch_atomic64_sub_and_test -/** - * arch_atomic64_inc - increment atomic64 variable - * @v: pointer to type atomic64_t - * - * Atomically increments @v by 1. - */ static __always_inline void arch_atomic64_inc(atomic64_t *v) { asm volatile(LOCK_PREFIX "incq %0" @@ -91,12 +48,6 @@ static __always_inline void arch_atomic64_inc(atomic64_t *v) } #define arch_atomic64_inc arch_atomic64_inc -/** - * arch_atomic64_dec - decrement atomic64 variable - * @v: pointer to type atomic64_t - * - * Atomically decrements @v by 1. - */ static __always_inline void arch_atomic64_dec(atomic64_t *v) { asm volatile(LOCK_PREFIX "decq %0" @@ -105,56 +56,24 @@ static __always_inline void arch_atomic64_dec(atomic64_t *v) } #define arch_atomic64_dec arch_atomic64_dec -/** - * arch_atomic64_dec_and_test - decrement and test - * @v: pointer to type atomic64_t - * - * Atomically decrements @v by 1 and - * returns true if the result is 0, or false for all other - * cases. - */ static __always_inline bool arch_atomic64_dec_and_test(atomic64_t *v) { return GEN_UNARY_RMWcc(LOCK_PREFIX "decq", v->counter, e); } #define arch_atomic64_dec_and_test arch_atomic64_dec_and_test -/** - * arch_atomic64_inc_and_test - increment and test - * @v: pointer to type atomic64_t - * - * Atomically increments @v by 1 - * and returns true if the result is zero, or false for all - * other cases. - */ static __always_inline bool arch_atomic64_inc_and_test(atomic64_t *v) { return GEN_UNARY_RMWcc(LOCK_PREFIX "incq", v->counter, e); } #define arch_atomic64_inc_and_test arch_atomic64_inc_and_test -/** - * arch_atomic64_add_negative - add and test if negative - * @i: integer value to add - * @v: pointer to type atomic64_t - * - * Atomically adds @i to @v and returns true - * if the result is negative, or false when - * result is greater than or equal to zero. - */ static __always_inline bool arch_atomic64_add_negative(s64 i, atomic64_t *v) { return GEN_BINARY_RMWcc(LOCK_PREFIX "addq", v->counter, s, "er", i); } #define arch_atomic64_add_negative arch_atomic64_add_negative -/** - * arch_atomic64_add_return - add and return - * @i: integer value to add - * @v: pointer to type atomic64_t - * - * Atomically adds @i to @v and returns @i + @v - */ static __always_inline s64 arch_atomic64_add_return(s64 i, atomic64_t *v) { return i + xadd(&v->counter, i); diff --git a/arch/x86/include/asm/bugs.h b/arch/x86/include/asm/bugs.h index 92ae28389940..f25ca2d709d4 100644 --- a/arch/x86/include/asm/bugs.h +++ b/arch/x86/include/asm/bugs.h @@ -4,8 +4,6 @@ #include <asm/processor.h> -extern void check_bugs(void); - #if defined(CONFIG_CPU_SUP_INTEL) && defined(CONFIG_X86_32) int ppro_with_ram_bug(void); #else diff --git a/arch/x86/include/asm/cmpxchg.h b/arch/x86/include/asm/cmpxchg.h index 540573f515b7..d53636506134 100644 --- a/arch/x86/include/asm/cmpxchg.h +++ b/arch/x86/include/asm/cmpxchg.h @@ -239,29 +239,4 @@ extern void __add_wrong_size(void) #define __xadd(ptr, inc, lock) __xchg_op((ptr), (inc), xadd, lock) #define xadd(ptr, inc) __xadd((ptr), (inc), LOCK_PREFIX) -#define __cmpxchg_double(pfx, p1, p2, o1, o2, n1, n2) \ -({ \ - bool __ret; \ - __typeof__(*(p1)) __old1 = (o1), __new1 = (n1); \ - __typeof__(*(p2)) __old2 = (o2), __new2 = (n2); \ - BUILD_BUG_ON(sizeof(*(p1)) != sizeof(long)); \ - BUILD_BUG_ON(sizeof(*(p2)) != sizeof(long)); \ - VM_BUG_ON((unsigned long)(p1) % (2 * sizeof(long))); \ - VM_BUG_ON((unsigned long)((p1) + 1) != (unsigned long)(p2)); \ - asm volatile(pfx "cmpxchg%c5b %1" \ - CC_SET(e) \ - : CC_OUT(e) (__ret), \ - "+m" (*(p1)), "+m" (*(p2)), \ - "+a" (__old1), "+d" (__old2) \ - : "i" (2 * sizeof(long)), \ - "b" (__new1), "c" (__new2)); \ - __ret; \ -}) - -#define arch_cmpxchg_double(p1, p2, o1, o2, n1, n2) \ - __cmpxchg_double(LOCK_PREFIX, p1, p2, o1, o2, n1, n2) - -#define arch_cmpxchg_double_local(p1, p2, o1, o2, n1, n2) \ - __cmpxchg_double(, p1, p2, o1, o2, n1, n2) - #endif /* ASM_X86_CMPXCHG_H */ diff --git a/arch/x86/include/asm/cmpxchg_32.h b/arch/x86/include/asm/cmpxchg_32.h index 6ba80ce9438d..b5731c51f0f4 100644 --- a/arch/x86/include/asm/cmpxchg_32.h +++ b/arch/x86/include/asm/cmpxchg_32.h @@ -103,6 +103,6 @@ static inline bool __try_cmpxchg64(volatile u64 *ptr, u64 *pold, u64 new) #endif -#define system_has_cmpxchg_double() boot_cpu_has(X86_FEATURE_CX8) +#define system_has_cmpxchg64() boot_cpu_has(X86_FEATURE_CX8) #endif /* _ASM_X86_CMPXCHG_32_H */ diff --git a/arch/x86/include/asm/cmpxchg_64.h b/arch/x86/include/asm/cmpxchg_64.h index 0d3beb27b7fe..44b08b53ab32 100644 --- a/arch/x86/include/asm/cmpxchg_64.h +++ b/arch/x86/include/asm/cmpxchg_64.h @@ -20,6 +20,71 @@ arch_try_cmpxchg((ptr), (po), (n)); \ }) -#define system_has_cmpxchg_double() boot_cpu_has(X86_FEATURE_CX16) +union __u128_halves { + u128 full; + struct { + u64 low, high; + }; +}; + +#define __arch_cmpxchg128(_ptr, _old, _new, _lock) \ +({ \ + union __u128_halves o = { .full = (_old), }, \ + n = { .full = (_new), }; \ + \ + asm volatile(_lock "cmpxchg16b %[ptr]" \ + : [ptr] "+m" (*(_ptr)), \ + "+a" (o.low), "+d" (o.high) \ + : "b" (n.low), "c" (n.high) \ + : "memory"); \ + \ + o.full; \ +}) + +static __always_inline u128 arch_cmpxchg128(volatile u128 *ptr, u128 old, u128 new) +{ + return __arch_cmpxchg128(ptr, old, new, LOCK_PREFIX); +} +#define arch_cmpxchg128 arch_cmpxchg128 + +static __always_inline u128 arch_cmpxchg128_local(volatile u128 *ptr, u128 old, u128 new) +{ + return __arch_cmpxchg128(ptr, old, new,); +} +#define arch_cmpxchg128_local arch_cmpxchg128_local + +#define __arch_try_cmpxchg128(_ptr, _oldp, _new, _lock) \ +({ \ + union __u128_halves o = { .full = *(_oldp), }, \ + n = { .full = (_new), }; \ + bool ret; \ + \ + asm volatile(_lock "cmpxchg16b %[ptr]" \ + CC_SET(e) \ + : CC_OUT(e) (ret), \ + [ptr] "+m" (*ptr), \ + "+a" (o.low), "+d" (o.high) \ + : "b" (n.low), "c" (n.high) \ + : "memory"); \ + \ + if (unlikely(!ret)) \ + *(_oldp) = o.full; \ + \ + likely(ret); \ +}) + +static __always_inline bool arch_try_cmpxchg128(volatile u128 *ptr, u128 *oldp, u128 new) +{ + return __arch_try_cmpxchg128(ptr, oldp, new, LOCK_PREFIX); +} +#define arch_try_cmpxchg128 arch_try_cmpxchg128 + +static __always_inline bool arch_try_cmpxchg128_local(volatile u128 *ptr, u128 *oldp, u128 new) +{ + return __arch_try_cmpxchg128(ptr, oldp, new,); +} +#define arch_try_cmpxchg128_local arch_try_cmpxchg128_local + +#define system_has_cmpxchg128() boot_cpu_has(X86_FEATURE_CX16) #endif /* _ASM_X86_CMPXCHG_64_H */ diff --git a/arch/x86/include/asm/coco.h b/arch/x86/include/asm/coco.h index eb08796002f3..6ae2d16a7613 100644 --- a/arch/x86/include/asm/coco.h +++ b/arch/x86/include/asm/coco.h @@ -10,30 +10,13 @@ enum cc_vendor { CC_VENDOR_INTEL, }; -#ifdef CONFIG_ARCH_HAS_CC_PLATFORM extern enum cc_vendor cc_vendor; -static inline enum cc_vendor cc_get_vendor(void) -{ - return cc_vendor; -} - -static inline void cc_set_vendor(enum cc_vendor vendor) -{ - cc_vendor = vendor; -} - +#ifdef CONFIG_ARCH_HAS_CC_PLATFORM void cc_set_mask(u64 mask); u64 cc_mkenc(u64 val); u64 cc_mkdec(u64 val); #else -static inline enum cc_vendor cc_get_vendor(void) -{ - return CC_VENDOR_NONE; -} - -static inline void cc_set_vendor(enum cc_vendor vendor) { } - static inline u64 cc_mkenc(u64 val) { return val; diff --git a/arch/x86/include/asm/cpu.h b/arch/x86/include/asm/cpu.h index 78796b98a544..3a233ebff712 100644 --- a/arch/x86/include/asm/cpu.h +++ b/arch/x86/include/asm/cpu.h @@ -30,10 +30,7 @@ struct x86_cpu { #ifdef CONFIG_HOTPLUG_CPU extern int arch_register_cpu(int num); extern void arch_unregister_cpu(int); -extern void start_cpu0(void); -#ifdef CONFIG_DEBUG_HOTPLUG_CPU0 -extern int _debug_hotplug_cpu(int cpu, int action); -#endif +extern void soft_restart_cpu(void); #endif extern void ap_init_aperfmperf(void); @@ -98,4 +95,6 @@ extern u64 x86_read_arch_cap_msr(void); int intel_find_matching_signature(void *mc, unsigned int csig, int cpf); int intel_microcode_sanity_check(void *mc, bool print_err, int hdr_type); +extern struct cpumask cpus_stop_mask; + #endif /* _ASM_X86_CPU_H */ diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index ce0c8f7d3218..a26bebbdff87 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h @@ -38,15 +38,10 @@ enum cpuid_leafs #define X86_CAP_FMT_NUM "%d:%d" #define x86_cap_flag_num(flag) ((flag) >> 5), ((flag) & 31) -#ifdef CONFIG_X86_FEATURE_NAMES extern const char * const x86_cap_flags[NCAPINTS*32]; extern const char * const x86_power_flags[32]; #define X86_CAP_FMT "%s" #define x86_cap_flag(flag) x86_cap_flags[flag] -#else -#define X86_CAP_FMT X86_CAP_FMT_NUM -#define x86_cap_flag x86_cap_flag_num -#endif /* * In order to save room, we index into this array by doing diff --git a/arch/x86/include/asm/cpumask.h b/arch/x86/include/asm/cpumask.h index c5aed9e9226c..4acfd57de8f1 100644 --- a/arch/x86/include/asm/cpumask.h +++ b/arch/x86/include/asm/cpumask.h @@ -4,11 +4,6 @@ #ifndef __ASSEMBLY__ #include <linux/cpumask.h> -extern cpumask_var_t cpu_callin_mask; -extern cpumask_var_t cpu_callout_mask; -extern cpumask_var_t cpu_initialized_mask; -extern cpumask_var_t cpu_sibling_setup_mask; - extern void setup_cpu_local_masks(void); /* diff --git a/arch/x86/include/asm/doublefault.h b/arch/x86/include/asm/doublefault.h index 54a6e4a2e132..de0e88b32207 100644 --- a/arch/x86/include/asm/doublefault.h +++ b/arch/x86/include/asm/doublefault.h @@ -2,6 +2,8 @@ #ifndef _ASM_X86_DOUBLEFAULT_H #define _ASM_X86_DOUBLEFAULT_H +#include <linux/linkage.h> + #ifdef CONFIG_X86_32 extern void doublefault_init_cpu_tss(void); #else @@ -10,4 +12,6 @@ static inline void doublefault_init_cpu_tss(void) } #endif +asmlinkage void __noreturn doublefault_shim(void); + #endif /* _ASM_X86_DOUBLEFAULT_H */ diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h index 419280d263d2..8b4be7cecdb8 100644 --- a/arch/x86/include/asm/efi.h +++ b/arch/x86/include/asm/efi.h @@ -31,6 +31,8 @@ extern unsigned long efi_mixed_mode_stack_pa; #define ARCH_EFI_IRQ_FLAGS_MASK X86_EFLAGS_IF +#define EFI_UNACCEPTED_UNIT_SIZE PMD_SIZE + /* * The EFI services are called through variadic functions in many cases. These * functions are implemented in assembler and support only a fixed number of diff --git a/arch/x86/include/asm/fpu/api.h b/arch/x86/include/asm/fpu/api.h index 503a577814b2..b475d9a582b8 100644 --- a/arch/x86/include/asm/fpu/api.h +++ b/arch/x86/include/asm/fpu/api.h @@ -109,7 +109,7 @@ extern void fpu_reset_from_exception_fixup(void); /* Boot, hotplug and resume */ extern void fpu__init_cpu(void); -extern void fpu__init_system(struct cpuinfo_x86 *c); +extern void fpu__init_system(void); extern void fpu__init_check_bugs(void); extern void fpu__resume_cpu(void); diff --git a/arch/x86/include/asm/ftrace.h b/arch/x86/include/asm/ftrace.h index 5061ac98ffa1..b8d4a07f9595 100644 --- a/arch/x86/include/asm/ftrace.h +++ b/arch/x86/include/asm/ftrace.h @@ -106,6 +106,9 @@ struct dyn_arch_ftrace { #ifndef __ASSEMBLY__ +void prepare_ftrace_return(unsigned long ip, unsigned long *parent, + unsigned long frame_pointer); + #if defined(CONFIG_FUNCTION_TRACER) && defined(CONFIG_DYNAMIC_FTRACE) extern void set_ftrace_ops_ro(void); #else diff --git a/arch/x86/include/asm/irq.h b/arch/x86/include/asm/irq.h index 768aa234cbb4..29e083b92813 100644 --- a/arch/x86/include/asm/irq.h +++ b/arch/x86/include/asm/irq.h @@ -40,8 +40,6 @@ extern void __handle_irq(struct irq_desc *desc, struct pt_regs *regs); extern void init_ISA_irqs(void); -extern void __init init_IRQ(void); - #ifdef CONFIG_X86_LOCAL_APIC void arch_trigger_cpumask_backtrace(const struct cpumask *mask, bool exclude_self); diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h index 9646ed6e8c0b..180b1cbfcc4e 100644 --- a/arch/x86/include/asm/mce.h +++ b/arch/x86/include/asm/mce.h @@ -350,4 +350,7 @@ static inline void mce_amd_feature_init(struct cpuinfo_x86 *c) { } #endif static inline void mce_hygon_feature_init(struct cpuinfo_x86 *c) { return mce_amd_feature_init(c); } + +unsigned long copy_mc_fragile_handle_tail(char *to, char *from, unsigned len); + #endif /* _ASM_X86_MCE_H */ diff --git a/arch/x86/include/asm/mem_encrypt.h b/arch/x86/include/asm/mem_encrypt.h index b7126701574c..7f97a8a97e24 100644 --- a/arch/x86/include/asm/mem_encrypt.h +++ b/arch/x86/include/asm/mem_encrypt.h @@ -17,6 +17,12 @@ #include <asm/bootparam.h> +#ifdef CONFIG_X86_MEM_ENCRYPT +void __init mem_encrypt_init(void); +#else +static inline void mem_encrypt_init(void) { } +#endif + #ifdef CONFIG_AMD_MEM_ENCRYPT extern u64 sme_me_mask; @@ -87,9 +93,6 @@ static inline void mem_encrypt_free_decrypted_mem(void) { } #endif /* CONFIG_AMD_MEM_ENCRYPT */ -/* Architecture __weak replacement functions */ -void __init mem_encrypt_init(void); - void add_encrypt_protection_map(void); /* diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h index 49bb4f2bd300..88d9ef98e087 100644 --- a/arch/x86/include/asm/mshyperv.h +++ b/arch/x86/include/asm/mshyperv.h @@ -257,6 +257,11 @@ void hv_set_register(unsigned int reg, u64 value); u64 hv_get_non_nested_register(unsigned int reg); void hv_set_non_nested_register(unsigned int reg, u64 value); +static __always_inline u64 hv_raw_get_register(unsigned int reg) +{ + return __rdmsr(reg); +} + #else /* CONFIG_HYPERV */ static inline void hyperv_init(void) {} static inline void hyperv_setup_mmu_ops(void) {} diff --git a/arch/x86/include/asm/mtrr.h b/arch/x86/include/asm/mtrr.h index f0eeaf6e5f5f..090d658a85a6 100644 --- a/arch/x86/include/asm/mtrr.h +++ b/arch/x86/include/asm/mtrr.h @@ -23,14 +23,43 @@ #ifndef _ASM_X86_MTRR_H #define _ASM_X86_MTRR_H +#include <linux/bits.h> #include <uapi/asm/mtrr.h> +/* Defines for hardware MTRR registers. */ +#define MTRR_CAP_VCNT GENMASK(7, 0) +#define MTRR_CAP_FIX BIT_MASK(8) +#define MTRR_CAP_WC BIT_MASK(10) + +#define MTRR_DEF_TYPE_TYPE GENMASK(7, 0) +#define MTRR_DEF_TYPE_FE BIT_MASK(10) +#define MTRR_DEF_TYPE_E BIT_MASK(11) + +#define MTRR_DEF_TYPE_ENABLE (MTRR_DEF_TYPE_FE | MTRR_DEF_TYPE_E) +#define MTRR_DEF_TYPE_DISABLE ~(MTRR_DEF_TYPE_TYPE | MTRR_DEF_TYPE_ENABLE) + +#define MTRR_PHYSBASE_TYPE GENMASK(7, 0) +#define MTRR_PHYSBASE_RSVD GENMASK(11, 8) + +#define MTRR_PHYSMASK_RSVD GENMASK(10, 0) +#define MTRR_PHYSMASK_V BIT_MASK(11) + +struct mtrr_state_type { + struct mtrr_var_range var_ranges[MTRR_MAX_VAR_RANGES]; + mtrr_type fixed_ranges[MTRR_NUM_FIXED_RANGES]; + unsigned char enabled; + bool have_fixed; + mtrr_type def_type; +}; + /* * The following functions are for use by other drivers that cannot use * arch_phys_wc_add and arch_phys_wc_del. */ # ifdef CONFIG_MTRR void mtrr_bp_init(void); +void mtrr_overwrite_state(struct mtrr_var_range *var, unsigned int num_var, + mtrr_type def_type); extern u8 mtrr_type_lookup(u64 addr, u64 end, u8 *uniform); extern void mtrr_save_fixed_ranges(void *); extern void mtrr_save_state(void); @@ -40,7 +69,6 @@ extern int mtrr_add_page(unsigned long base, unsigned long size, unsigned int type, bool increment); extern int mtrr_del(int reg, unsigned long base, unsigned long size); extern int mtrr_del_page(int reg, unsigned long base, unsigned long size); -extern void mtrr_centaur_report_mcr(int mcr, u32 lo, u32 hi); extern void mtrr_bp_restore(void); extern int mtrr_trim_uncached_memory(unsigned long end_pfn); extern int amd_special_default_mtrr(void); @@ -48,12 +76,21 @@ void mtrr_disable(void); void mtrr_enable(void); void mtrr_generic_set_state(void); # else +static inline void mtrr_overwrite_state(struct mtrr_var_range *var, + unsigned int num_var, + mtrr_type def_type) +{ +} + static inline u8 mtrr_type_lookup(u64 addr, u64 end, u8 *uniform) { /* - * Return no-MTRRs: + * Return the default MTRR type, without any known other types in + * that range. */ - return MTRR_TYPE_INVALID; + *uniform = 1; + + return MTRR_TYPE_UNCACHABLE; } #define mtrr_save_fixed_ranges(arg) do {} while (0) #define mtrr_save_state() do {} while (0) @@ -79,9 +116,6 @@ static inline int mtrr_trim_uncached_memory(unsigned long end_pfn) { return 0; } -static inline void mtrr_centaur_report_mcr(int mcr, u32 lo, u32 hi) -{ -} #define mtrr_bp_init() do {} while (0) #define mtrr_bp_restore() do {} while (0) #define mtrr_disable() do {} while (0) @@ -121,7 +155,8 @@ struct mtrr_gentry32 { #endif /* CONFIG_COMPAT */ /* Bit fields for enabled in struct mtrr_state_type */ -#define MTRR_STATE_MTRR_FIXED_ENABLED 0x01 -#define MTRR_STATE_MTRR_ENABLED 0x02 +#define MTRR_STATE_SHIFT 10 +#define MTRR_STATE_MTRR_FIXED_ENABLED (MTRR_DEF_TYPE_FE >> MTRR_STATE_SHIFT) +#define MTRR_STATE_MTRR_ENABLED (MTRR_DEF_TYPE_E >> MTRR_STATE_SHIFT) #endif /* _ASM_X86_MTRR_H */ diff --git a/arch/x86/include/asm/nops.h b/arch/x86/include/asm/nops.h index c5573eaa5bb9..1c1b7550fa55 100644 --- a/arch/x86/include/asm/nops.h +++ b/arch/x86/include/asm/nops.h @@ -34,6 +34,8 @@ #define BYTES_NOP7 0x8d,0xb4,0x26,0x00,0x00,0x00,0x00 #define BYTES_NOP8 0x3e,BYTES_NOP7 +#define ASM_NOP_MAX 8 + #else /* @@ -47,6 +49,9 @@ * 6: osp nopl 0x00(%eax,%eax,1) * 7: nopl 0x00000000(%eax) * 8: nopl 0x00000000(%eax,%eax,1) + * 9: cs nopl 0x00000000(%eax,%eax,1) + * 10: osp cs nopl 0x00000000(%eax,%eax,1) + * 11: osp osp cs nopl 0x00000000(%eax,%eax,1) */ #define BYTES_NOP1 0x90 #define BYTES_NOP2 0x66,BYTES_NOP1 @@ -56,6 +61,15 @@ #define BYTES_NOP6 0x66,BYTES_NOP5 #define BYTES_NOP7 0x0f,0x1f,0x80,0x00,0x00,0x00,0x00 #define BYTES_NOP8 0x0f,0x1f,0x84,0x00,0x00,0x00,0x00,0x00 +#define BYTES_NOP9 0x2e,BYTES_NOP8 +#define BYTES_NOP10 0x66,BYTES_NOP9 +#define BYTES_NOP11 0x66,BYTES_NOP10 + +#define ASM_NOP9 _ASM_BYTES(BYTES_NOP9) +#define ASM_NOP10 _ASM_BYTES(BYTES_NOP10) +#define ASM_NOP11 _ASM_BYTES(BYTES_NOP11) + +#define ASM_NOP_MAX 11 #endif /* CONFIG_64BIT */ @@ -68,8 +82,6 @@ #define ASM_NOP7 _ASM_BYTES(BYTES_NOP7) #define ASM_NOP8 _ASM_BYTES(BYTES_NOP8) -#define ASM_NOP_MAX 8 - #ifndef __ASSEMBLY__ extern const unsigned char * const x86_nops[]; #endif diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index edb2b0cb8efe..55388c9f7601 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -84,12 +84,12 @@ movq $-1, PER_CPU_VAR(pcpu_hot + X86_call_depth); #define RESET_CALL_DEPTH \ - mov $0x80, %rax; \ - shl $56, %rax; \ + xor %eax, %eax; \ + bts $63, %rax; \ movq %rax, PER_CPU_VAR(pcpu_hot + X86_call_depth); #define RESET_CALL_DEPTH_FROM_CALL \ - mov $0xfc, %rax; \ + movb $0xfc, %al; \ shl $56, %rax; \ movq %rax, PER_CPU_VAR(pcpu_hot + X86_call_depth); \ CALL_THUNKS_DEBUG_INC_CALLS diff --git a/arch/x86/include/asm/orc_header.h b/arch/x86/include/asm/orc_header.h new file mode 100644 index 000000000000..07bacf3e160e --- /dev/null +++ b/arch/x86/include/asm/orc_header.h @@ -0,0 +1,19 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* Copyright (c) Meta Platforms, Inc. and affiliates. */ + +#ifndef _ORC_HEADER_H +#define _ORC_HEADER_H + +#include <linux/types.h> +#include <linux/compiler.h> +#include <asm/orc_hash.h> + +/* + * The header is currently a 20-byte hash of the ORC entry definition; see + * scripts/orc_hash.sh. + */ +#define ORC_HEADER \ + __used __section(".orc_header") __aligned(4) \ + static const u8 orc_header[] = { ORC_HASH } + +#endif /* _ORC_HEADER_H */ diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h index 13c0d63ed55e..34734d730463 100644 --- a/arch/x86/include/asm/percpu.h +++ b/arch/x86/include/asm/percpu.h @@ -210,6 +210,67 @@ do { \ (typeof(_var))(unsigned long) pco_old__; \ }) +#if defined(CONFIG_X86_32) && !defined(CONFIG_UML) +#define percpu_cmpxchg64_op(size, qual, _var, _oval, _nval) \ +({ \ + union { \ + u64 var; \ + struct { \ + u32 low, high; \ + }; \ + } old__, new__; \ + \ + old__.var = _oval; \ + new__.var = _nval; \ + \ + asm qual (ALTERNATIVE("leal %P[var], %%esi; call this_cpu_cmpxchg8b_emu", \ + "cmpxchg8b " __percpu_arg([var]), X86_FEATURE_CX8) \ + : [var] "+m" (_var), \ + "+a" (old__.low), \ + "+d" (old__.high) \ + : "b" (new__.low), \ + "c" (new__.high) \ + : "memory", "esi"); \ + \ + old__.var; \ +}) + +#define raw_cpu_cmpxchg64(pcp, oval, nval) percpu_cmpxchg64_op(8, , pcp, oval, nval) +#define this_cpu_cmpxchg64(pcp, oval, nval) percpu_cmpxchg64_op(8, volatile, pcp, oval, nval) +#endif + +#ifdef CONFIG_X86_64 +#define raw_cpu_cmpxchg64(pcp, oval, nval) percpu_cmpxchg_op(8, , pcp, oval, nval); +#define this_cpu_cmpxchg64(pcp, oval, nval) percpu_cmpxchg_op(8, volatile, pcp, oval, nval); + +#define percpu_cmpxchg128_op(size, qual, _var, _oval, _nval) \ +({ \ + union { \ + u128 var; \ + struct { \ + u64 low, high; \ + }; \ + } old__, new__; \ + \ + old__.var = _oval; \ + new__.var = _nval; \ + \ + asm qual (ALTERNATIVE("leaq %P[var], %%rsi; call this_cpu_cmpxchg16b_emu", \ + "cmpxchg16b " __percpu_arg([var]), X86_FEATURE_CX16) \ + : [var] "+m" (_var), \ + "+a" (old__.low), \ + "+d" (old__.high) \ + : "b" (new__.low), \ + "c" (new__.high) \ + : "memory", "rsi"); \ + \ + old__.var; \ +}) + +#define raw_cpu_cmpxchg128(pcp, oval, nval) percpu_cmpxchg128_op(16, , pcp, oval, nval) +#define this_cpu_cmpxchg128(pcp, oval, nval) percpu_cmpxchg128_op(16, volatile, pcp, oval, nval) +#endif + /* * this_cpu_read() makes gcc load the percpu variable every time it is * accessed while this_cpu_read_stable() allows the value to be cached. @@ -290,23 +351,6 @@ do { \ #define this_cpu_cmpxchg_2(pcp, oval, nval) percpu_cmpxchg_op(2, volatile, pcp, oval, nval) #define this_cpu_cmpxchg_4(pcp, oval, nval) percpu_cmpxchg_op(4, volatile, pcp, oval, nval) -#ifdef CONFIG_X86_CMPXCHG64 -#define percpu_cmpxchg8b_double(pcp1, pcp2, o1, o2, n1, n2) \ -({ \ - bool __ret; \ - typeof(pcp1) __o1 = (o1), __n1 = (n1); \ - typeof(pcp2) __o2 = (o2), __n2 = (n2); \ - asm volatile("cmpxchg8b "__percpu_arg(1) \ - CC_SET(z) \ - : CC_OUT(z) (__ret), "+m" (pcp1), "+m" (pcp2), "+a" (__o1), "+d" (__o2) \ - : "b" (__n1), "c" (__n2)); \ - __ret; \ -}) - -#define raw_cpu_cmpxchg_double_4 percpu_cmpxchg8b_double -#define this_cpu_cmpxchg_double_4 percpu_cmpxchg8b_double -#endif /* CONFIG_X86_CMPXCHG64 */ - /* * Per cpu atomic 64 bit operations are only available under 64 bit. * 32 bit must fall back to generic operations. @@ -329,30 +373,6 @@ do { \ #define this_cpu_add_return_8(pcp, val) percpu_add_return_op(8, volatile, pcp, val) #define this_cpu_xchg_8(pcp, nval) percpu_xchg_op(8, volatile, pcp, nval) #define this_cpu_cmpxchg_8(pcp, oval, nval) percpu_cmpxchg_op(8, volatile, pcp, oval, nval) - -/* - * Pretty complex macro to generate cmpxchg16 instruction. The instruction - * is not supported on early AMD64 processors so we must be able to emulate - * it in software. The address used in the cmpxchg16 instruction must be - * aligned to a 16 byte boundary. - */ -#define percpu_cmpxchg16b_double(pcp1, pcp2, o1, o2, n1, n2) \ -({ \ - bool __ret; \ - typeof(pcp1) __o1 = (o1), __n1 = (n1); \ - typeof(pcp2) __o2 = (o2), __n2 = (n2); \ - alternative_io("leaq %P1,%%rsi\n\tcall this_cpu_cmpxchg16b_emu\n\t", \ - "cmpxchg16b " __percpu_arg(1) "\n\tsetz %0\n\t", \ - X86_FEATURE_CX16, \ - ASM_OUTPUT2("=a" (__ret), "+m" (pcp1), \ - "+m" (pcp2), "+d" (__o2)), \ - "b" (__n1), "c" (__n2), "a" (__o1) : "rsi"); \ - __ret; \ -}) - -#define raw_cpu_cmpxchg_double_8 percpu_cmpxchg16b_double -#define this_cpu_cmpxchg_double_8 percpu_cmpxchg16b_double - #endif static __always_inline bool x86_this_cpu_constant_test_bit(unsigned int nr, diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h index abf09882f58b..85a9fd5a3ec3 100644 --- a/arch/x86/include/asm/perf_event.h +++ b/arch/x86/include/asm/perf_event.h @@ -32,11 +32,21 @@ #define ARCH_PERFMON_EVENTSEL_INV (1ULL << 23) #define ARCH_PERFMON_EVENTSEL_CMASK 0xFF000000ULL +#define INTEL_FIXED_BITS_MASK 0xFULL +#define INTEL_FIXED_BITS_STRIDE 4 +#define INTEL_FIXED_0_KERNEL (1ULL << 0) +#define INTEL_FIXED_0_USER (1ULL << 1) +#define INTEL_FIXED_0_ANYTHREAD (1ULL << 2) +#define INTEL_FIXED_0_ENABLE_PMI (1ULL << 3) + #define HSW_IN_TX (1ULL << 32) #define HSW_IN_TX_CHECKPOINTED (1ULL << 33) #define ICL_EVENTSEL_ADAPTIVE (1ULL << 34) #define ICL_FIXED_0_ADAPTIVE (1ULL << 32) +#define intel_fixed_bits_by_idx(_idx, _bits) \ + ((_bits) << ((_idx) * INTEL_FIXED_BITS_STRIDE)) + #define AMD64_EVENTSEL_INT_CORE_ENABLE (1ULL << 36) #define AMD64_EVENTSEL_GUESTONLY (1ULL << 40) #define AMD64_EVENTSEL_HOSTONLY (1ULL << 41) @@ -478,8 +488,10 @@ struct pebs_xmm { #ifdef CONFIG_X86_LOCAL_APIC extern u32 get_ibs_caps(void); +extern int forward_event_to_ibs(struct perf_event *event); #else static inline u32 get_ibs_caps(void) { return 0; } +static inline int forward_event_to_ibs(struct perf_event *event) { return -ENOENT; } #endif #ifdef CONFIG_PERF_EVENTS diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index 15ae4d6ba476..5700bb337987 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -27,6 +27,7 @@ extern pgd_t early_top_pgt[PTRS_PER_PGD]; bool __init __early_make_pgtable(unsigned long address, pmdval_t pmd); +struct seq_file; void ptdump_walk_pgd_level(struct seq_file *m, struct mm_struct *mm); void ptdump_walk_pgd_level_debugfs(struct seq_file *m, struct mm_struct *mm, bool user); diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h index 7929327abe00..a629b1b9f65a 100644 --- a/arch/x86/include/asm/pgtable_64.h +++ b/arch/x86/include/asm/pgtable_64.h @@ -237,8 +237,8 @@ static inline void native_pgd_clear(pgd_t *pgd) #define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val((pte)) }) #define __pmd_to_swp_entry(pmd) ((swp_entry_t) { pmd_val((pmd)) }) -#define __swp_entry_to_pte(x) ((pte_t) { .pte = (x).val }) -#define __swp_entry_to_pmd(x) ((pmd_t) { .pmd = (x).val }) +#define __swp_entry_to_pte(x) (__pte((x).val)) +#define __swp_entry_to_pmd(x) (__pmd((x).val)) extern void cleanup_highmap(void); diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h index 447d4bee25c4..ba3e2554799a 100644 --- a/arch/x86/include/asm/pgtable_types.h +++ b/arch/x86/include/asm/pgtable_types.h @@ -513,9 +513,6 @@ extern void native_pagetable_init(void); #define native_pagetable_init paging_init #endif -struct seq_file; -extern void arch_report_meminfo(struct seq_file *m); - enum pg_level { PG_LEVEL_NONE, PG_LEVEL_4K, diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index a1e4fa58b357..d46300e94f85 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -551,7 +551,6 @@ extern void switch_gdt_and_percpu_base(int); extern void load_direct_gdt(int); extern void load_fixmap_gdt(int); extern void cpu_init(void); -extern void cpu_init_secondary(void); extern void cpu_init_exception_handling(void); extern void cr4_init(void); diff --git a/arch/x86/include/asm/realmode.h b/arch/x86/include/asm/realmode.h index f6a1737c77be..87e5482acd0d 100644 --- a/arch/x86/include/asm/realmode.h +++ b/arch/x86/include/asm/realmode.h @@ -52,6 +52,7 @@ struct trampoline_header { u64 efer; u32 cr4; u32 flags; + u32 lock; #endif }; @@ -64,6 +65,8 @@ extern unsigned long initial_stack; extern unsigned long initial_vc_handler; #endif +extern u32 *trampoline_lock; + extern unsigned char real_mode_blob[]; extern unsigned char real_mode_relocs[]; diff --git a/arch/x86/include/asm/sev-common.h b/arch/x86/include/asm/sev-common.h index 0759af9b1acf..b463fcbd4b90 100644 --- a/arch/x86/include/asm/sev-common.h +++ b/arch/x86/include/asm/sev-common.h @@ -106,8 +106,13 @@ enum psc_op { #define GHCB_HV_FT_SNP BIT_ULL(0) #define GHCB_HV_FT_SNP_AP_CREATION BIT_ULL(1) -/* SNP Page State Change NAE event */ -#define VMGEXIT_PSC_MAX_ENTRY 253 +/* + * SNP Page State Change NAE event + * The VMGEXIT_PSC_MAX_ENTRY determines the size of the PSC structure, which + * is a local stack variable in set_pages_state(). Do not increase this value + * without evaluating the impact to stack usage. + */ +#define VMGEXIT_PSC_MAX_ENTRY 64 struct psc_hdr { u16 cur_entry; diff --git a/arch/x86/include/asm/sev.h b/arch/x86/include/asm/sev.h index 13dc2a9d23c1..66c806784c52 100644 --- a/arch/x86/include/asm/sev.h +++ b/arch/x86/include/asm/sev.h @@ -14,6 +14,7 @@ #include <asm/insn.h> #include <asm/sev-common.h> #include <asm/bootparam.h> +#include <asm/coco.h> #define GHCB_PROTOCOL_MIN 1ULL #define GHCB_PROTOCOL_MAX 2ULL @@ -80,11 +81,15 @@ extern void vc_no_ghcb(void); extern void vc_boot_ghcb(void); extern bool handle_vc_boot_ghcb(struct pt_regs *regs); +/* PVALIDATE return codes */ +#define PVALIDATE_FAIL_SIZEMISMATCH 6 + /* Software defined (when rFlags.CF = 1) */ #define PVALIDATE_FAIL_NOUPDATE 255 /* RMP page size */ #define RMP_PG_SIZE_4K 0 +#define RMP_PG_SIZE_2M 1 #define RMPADJUST_VMSA_PAGE_BIT BIT(16) @@ -136,24 +141,26 @@ struct snp_secrets_page_layout { } __packed; #ifdef CONFIG_AMD_MEM_ENCRYPT -extern struct static_key_false sev_es_enable_key; extern void __sev_es_ist_enter(struct pt_regs *regs); extern void __sev_es_ist_exit(void); static __always_inline void sev_es_ist_enter(struct pt_regs *regs) { - if (static_branch_unlikely(&sev_es_enable_key)) + if (cc_vendor == CC_VENDOR_AMD && + cc_platform_has(CC_ATTR_GUEST_STATE_ENCRYPT)) __sev_es_ist_enter(regs); } static __always_inline void sev_es_ist_exit(void) { - if (static_branch_unlikely(&sev_es_enable_key)) + if (cc_vendor == CC_VENDOR_AMD && + cc_platform_has(CC_ATTR_GUEST_STATE_ENCRYPT)) __sev_es_ist_exit(); } extern int sev_es_setup_ap_jump_table(struct real_mode_header *rmh); extern void __sev_es_nmi_complete(void); static __always_inline void sev_es_nmi_complete(void) { - if (static_branch_unlikely(&sev_es_enable_key)) + if (cc_vendor == CC_VENDOR_AMD && + cc_platform_has(CC_ATTR_GUEST_STATE_ENCRYPT)) __sev_es_nmi_complete(); } extern int __init sev_es_efi_map_ghcbs(pgd_t *pgd); @@ -192,16 +199,17 @@ struct snp_guest_request_ioctl; void setup_ghcb(void); void __init early_snp_set_memory_private(unsigned long vaddr, unsigned long paddr, - unsigned int npages); + unsigned long npages); void __init early_snp_set_memory_shared(unsigned long vaddr, unsigned long paddr, - unsigned int npages); + unsigned long npages); void __init snp_prep_memory(unsigned long paddr, unsigned int sz, enum psc_op op); -void snp_set_memory_shared(unsigned long vaddr, unsigned int npages); -void snp_set_memory_private(unsigned long vaddr, unsigned int npages); +void snp_set_memory_shared(unsigned long vaddr, unsigned long npages); +void snp_set_memory_private(unsigned long vaddr, unsigned long npages); void snp_set_wakeup_secondary_cpu(void); bool snp_init(struct boot_params *bp); void __init __noreturn snp_abort(void); int snp_issue_guest_request(u64 exit_code, struct snp_req_data *input, struct snp_guest_request_ioctl *rio); +void snp_accept_memory(phys_addr_t start, phys_addr_t end); #else static inline void sev_es_ist_enter(struct pt_regs *regs) { } static inline void sev_es_ist_exit(void) { } @@ -212,12 +220,12 @@ static inline int pvalidate(unsigned long vaddr, bool rmp_psize, bool validate) static inline int rmpadjust(unsigned long vaddr, bool rmp_psize, unsigned long attrs) { return 0; } static inline void setup_ghcb(void) { } static inline void __init -early_snp_set_memory_private(unsigned long vaddr, unsigned long paddr, unsigned int npages) { } +early_snp_set_memory_private(unsigned long vaddr, unsigned long paddr, unsigned long npages) { } static inline void __init -early_snp_set_memory_shared(unsigned long vaddr, unsigned long paddr, unsigned int npages) { } +early_snp_set_memory_shared(unsigned long vaddr, unsigned long paddr, unsigned long npages) { } static inline void __init snp_prep_memory(unsigned long paddr, unsigned int sz, enum psc_op op) { } -static inline void snp_set_memory_shared(unsigned long vaddr, unsigned int npages) { } -static inline void snp_set_memory_private(unsigned long vaddr, unsigned int npages) { } +static inline void snp_set_memory_shared(unsigned long vaddr, unsigned long npages) { } +static inline void snp_set_memory_private(unsigned long vaddr, unsigned long npages) { } static inline void snp_set_wakeup_secondary_cpu(void) { } static inline bool snp_init(struct boot_params *bp) { return false; } static inline void snp_abort(void) { } @@ -225,6 +233,8 @@ static inline int snp_issue_guest_request(u64 exit_code, struct snp_req_data *in { return -ENOTTY; } + +static inline void snp_accept_memory(phys_addr_t start, phys_addr_t end) { } #endif #endif diff --git a/arch/x86/include/asm/shared/tdx.h b/arch/x86/include/asm/shared/tdx.h index 2631e01f6e0f..7513b3bb69b7 100644 --- a/arch/x86/include/asm/shared/tdx.h +++ b/arch/x86/include/asm/shared/tdx.h @@ -10,6 +10,20 @@ #define TDX_CPUID_LEAF_ID 0x21 #define TDX_IDENT "IntelTDX " +/* TDX module Call Leaf IDs */ +#define TDX_GET_INFO 1 +#define TDX_GET_VEINFO 3 +#define TDX_GET_REPORT 4 +#define TDX_ACCEPT_PAGE 6 +#define TDX_WR 8 + +/* TDCS fields. To be used by TDG.VM.WR and TDG.VM.RD module calls */ +#define TDCS_NOTIFY_ENABLES 0x9100000000000010 + +/* TDX hypercall Leaf IDs */ +#define TDVMCALL_MAP_GPA 0x10001 +#define TDVMCALL_REPORT_FATAL_ERROR 0x10003 + #ifndef __ASSEMBLY__ /* @@ -37,8 +51,58 @@ struct tdx_hypercall_args { u64 __tdx_hypercall(struct tdx_hypercall_args *args); u64 __tdx_hypercall_ret(struct tdx_hypercall_args *args); +/* + * Wrapper for standard use of __tdx_hypercall with no output aside from + * return code. + */ +static inline u64 _tdx_hypercall(u64 fn, u64 r12, u64 r13, u64 r14, u64 r15) +{ + struct tdx_hypercall_args args = { + .r10 = TDX_HYPERCALL_STANDARD, + .r11 = fn, + .r12 = r12, + .r13 = r13, + .r14 = r14, + .r15 = r15, + }; + + return __tdx_hypercall(&args); +} + + /* Called from __tdx_hypercall() for unrecoverable failure */ void __tdx_hypercall_failed(void); +/* + * Used in __tdx_module_call() to gather the output registers' values of the + * TDCALL instruction when requesting services from the TDX module. This is a + * software only structure and not part of the TDX module/VMM ABI + */ +struct tdx_module_output { + u64 rcx; + u64 rdx; + u64 r8; + u64 r9; + u64 r10; + u64 r11; +}; + +/* Used to communicate with the TDX module */ +u64 __tdx_module_call(u64 fn, u64 rcx, u64 rdx, u64 r8, u64 r9, + struct tdx_module_output *out); + +bool tdx_accept_memory(phys_addr_t start, phys_addr_t end); + +/* + * The TDG.VP.VMCALL-Instruction-execution sub-functions are defined + * independently from but are currently matched 1:1 with VMX EXIT_REASONs. + * Reusing the KVM EXIT_REASON macros makes it easier to connect the host and + * guest sides of these calls. + */ +static __always_inline u64 hcall_func(u64 exit_reason) +{ + return exit_reason; +} + #endif /* !__ASSEMBLY__ */ #endif /* _ASM_X86_SHARED_TDX_H */ diff --git a/arch/x86/include/asm/sigframe.h b/arch/x86/include/asm/sigframe.h index 5b1ed650b124..84eab2724875 100644 --- a/arch/x86/include/asm/sigframe.h +++ b/arch/x86/include/asm/sigframe.h @@ -85,6 +85,4 @@ struct rt_sigframe_x32 { #endif /* CONFIG_X86_64 */ -void __init init_sigframe_size(void); - #endif /* _ASM_X86_SIGFRAME_H */ diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h index 4e91054c84be..600cf25dbfc6 100644 --- a/arch/x86/include/asm/smp.h +++ b/arch/x86/include/asm/smp.h @@ -38,7 +38,9 @@ struct smp_ops { void (*crash_stop_other_cpus)(void); void (*smp_send_reschedule)(int cpu); - int (*cpu_up)(unsigned cpu, struct task_struct *tidle); + void (*cleanup_dead_cpu)(unsigned cpu); + void (*poll_sync_state)(void); + int (*kick_ap_alive)(unsigned cpu, struct task_struct *tidle); int (*cpu_disable)(void); void (*cpu_die)(unsigned int cpu); void (*play_dead)(void); @@ -78,11 +80,6 @@ static inline void smp_cpus_done(unsigned int max_cpus) smp_ops.smp_cpus_done(max_cpus); } -static inline int __cpu_up(unsigned int cpu, struct task_struct *tidle) -{ - return smp_ops.cpu_up(cpu, tidle); -} - static inline int __cpu_disable(void) { return smp_ops.cpu_disable(); @@ -90,7 +87,8 @@ static inline int __cpu_disable(void) static inline void __cpu_die(unsigned int cpu) { - smp_ops.cpu_die(cpu); + if (smp_ops.cpu_die) + smp_ops.cpu_die(cpu); } static inline void __noreturn play_dead(void) @@ -121,22 +119,23 @@ void native_smp_prepare_cpus(unsigned int max_cpus); void calculate_max_logical_packages(void); void native_smp_cpus_done(unsigned int max_cpus); int common_cpu_up(unsigned int cpunum, struct task_struct *tidle); -int native_cpu_up(unsigned int cpunum, struct task_struct *tidle); +int native_kick_ap(unsigned int cpu, struct task_struct *tidle); int native_cpu_disable(void); -int common_cpu_die(unsigned int cpu); -void native_cpu_die(unsigned int cpu); void __noreturn hlt_play_dead(void); void native_play_dead(void); void play_dead_common(void); void wbinvd_on_cpu(int cpu); int wbinvd_on_all_cpus(void); -void cond_wakeup_cpu0(void); + +void smp_kick_mwait_play_dead(void); void native_smp_send_reschedule(int cpu); void native_send_call_func_ipi(const struct cpumask *mask); void native_send_call_func_single_ipi(int cpu); void x86_idle_thread_init(unsigned int cpu, struct task_struct *idle); +bool smp_park_other_cpus_in_init(void); + void smp_store_boot_cpu_info(void); void smp_store_cpu_info(int id); @@ -201,7 +200,14 @@ extern void nmi_selftest(void); #endif extern unsigned int smpboot_control; +extern unsigned long apic_mmio_base; #endif /* !__ASSEMBLY__ */ +/* Control bits for startup_64 */ +#define STARTUP_READ_APICID 0x80000000 + +/* Top 8 bits are reserved for control */ +#define STARTUP_PARALLEL_MASK 0xFF000000 + #endif /* _ASM_X86_SMP_H */ diff --git a/arch/x86/include/asm/syscall.h b/arch/x86/include/asm/syscall.h index 5b85987a5e97..4fb36fba4b5a 100644 --- a/arch/x86/include/asm/syscall.h +++ b/arch/x86/include/asm/syscall.h @@ -127,9 +127,11 @@ static inline int syscall_get_arch(struct task_struct *task) } void do_syscall_64(struct pt_regs *regs, int nr); -void do_int80_syscall_32(struct pt_regs *regs); -long do_fast_syscall_32(struct pt_regs *regs); #endif /* CONFIG_X86_32 */ +void do_int80_syscall_32(struct pt_regs *regs); +long do_fast_syscall_32(struct pt_regs *regs); +long do_SYSENTER_32(struct pt_regs *regs); + #endif /* _ASM_X86_SYSCALL_H */ diff --git a/arch/x86/include/asm/tdx.h b/arch/x86/include/asm/tdx.h index 28d889c9aa16..603e6d1e9d4a 100644 --- a/arch/x86/include/asm/tdx.h +++ b/arch/x86/include/asm/tdx.h @@ -5,6 +5,8 @@ #include <linux/init.h> #include <linux/bits.h> + +#include <asm/errno.h> #include <asm/ptrace.h> #include <asm/shared/tdx.h> @@ -21,21 +23,6 @@ #ifndef __ASSEMBLY__ /* - * Used to gather the output registers values of the TDCALL and SEAMCALL - * instructions when requesting services from the TDX module. - * - * This is a software only structure and not part of the TDX module/VMM ABI. - */ -struct tdx_module_output { - u64 rcx; - u64 rdx; - u64 r8; - u64 r9; - u64 r10; - u64 r11; -}; - -/* * Used by the #VE exception handler to gather the #VE exception * info from the TDX module. This is a software only structure * and not part of the TDX module/VMM ABI. @@ -55,10 +42,6 @@ struct ve_info { void __init tdx_early_init(void); -/* Used to communicate with the TDX module */ -u64 __tdx_module_call(u64 fn, u64 rcx, u64 rdx, u64 r8, u64 r9, - struct tdx_module_output *out); - void tdx_get_ve_info(struct ve_info *ve); bool tdx_handle_virt_exception(struct pt_regs *regs, struct ve_info *ve); diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index f1cccba52eb9..d63b02940747 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h @@ -232,9 +232,6 @@ static inline int arch_within_stack_frames(const void * const stack, current_thread_info()->status & TS_COMPAT) #endif -extern void arch_task_cache_init(void); -extern int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src); -extern void arch_release_task_struct(struct task_struct *tsk); extern void arch_setup_new_exec(void); #define arch_setup_new_exec arch_setup_new_exec #endif /* !__ASSEMBLY__ */ diff --git a/arch/x86/include/asm/time.h b/arch/x86/include/asm/time.h index a53961c64a56..f360104ed172 100644 --- a/arch/x86/include/asm/time.h +++ b/arch/x86/include/asm/time.h @@ -6,7 +6,6 @@ #include <asm/mc146818rtc.h> extern void hpet_time_init(void); -extern void time_init(void); extern bool pit_timer_init(void); extern bool tsc_clocksource_watchdog_disabled(void); diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h index 75bfaa421030..80450e1d5385 100644 --- a/arch/x86/include/asm/tlbflush.h +++ b/arch/x86/include/asm/tlbflush.h @@ -14,6 +14,8 @@ #include <asm/processor-flags.h> #include <asm/pgtable.h> +DECLARE_PER_CPU(u64, tlbstate_untag_mask); + void __flush_tlb_all(void); #define TLB_FLUSH_ALL -1UL @@ -54,15 +56,6 @@ static inline void cr4_clear_bits(unsigned long mask) local_irq_restore(flags); } -#ifdef CONFIG_ADDRESS_MASKING -DECLARE_PER_CPU(u64, tlbstate_untag_mask); - -static inline u64 current_untag_mask(void) -{ - return this_cpu_read(tlbstate_untag_mask); -} -#endif - #ifndef MODULE /* * 6 because 6 should be plenty and struct tlb_state will fit in two cache diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h index 458c891a8273..caf41c4869a0 100644 --- a/arch/x86/include/asm/topology.h +++ b/arch/x86/include/asm/topology.h @@ -31,9 +31,9 @@ * CONFIG_NUMA. */ #include <linux/numa.h> +#include <linux/cpumask.h> #ifdef CONFIG_NUMA -#include <linux/cpumask.h> #include <asm/mpspec.h> #include <asm/percpu.h> @@ -139,23 +139,31 @@ static inline int topology_max_smt_threads(void) int topology_update_package_map(unsigned int apicid, unsigned int cpu); int topology_update_die_map(unsigned int dieid, unsigned int cpu); int topology_phys_to_logical_pkg(unsigned int pkg); -int topology_phys_to_logical_die(unsigned int die, unsigned int cpu); -bool topology_is_primary_thread(unsigned int cpu); bool topology_smt_supported(void); -#else + +extern struct cpumask __cpu_primary_thread_mask; +#define cpu_primary_thread_mask ((const struct cpumask *)&__cpu_primary_thread_mask) + +/** + * topology_is_primary_thread - Check whether CPU is the primary SMT thread + * @cpu: CPU to check + */ +static inline bool topology_is_primary_thread(unsigned int cpu) +{ + return cpumask_test_cpu(cpu, cpu_primary_thread_mask); +} +#else /* CONFIG_SMP */ #define topology_max_packages() (1) static inline int topology_update_package_map(unsigned int apicid, unsigned int cpu) { return 0; } static inline int topology_update_die_map(unsigned int dieid, unsigned int cpu) { return 0; } static inline int topology_phys_to_logical_pkg(unsigned int pkg) { return 0; } -static inline int topology_phys_to_logical_die(unsigned int die, - unsigned int cpu) { return 0; } static inline int topology_max_die_per_package(void) { return 1; } static inline int topology_max_smt_threads(void) { return 1; } static inline bool topology_is_primary_thread(unsigned int cpu) { return true; } static inline bool topology_smt_supported(void) { return false; } -#endif +#endif /* !CONFIG_SMP */ static inline void arch_fix_phys_package_id(int num, u32 slot) { diff --git a/arch/x86/include/asm/tsc.h b/arch/x86/include/asm/tsc.h index fbdc3d951494..594fce0ca744 100644 --- a/arch/x86/include/asm/tsc.h +++ b/arch/x86/include/asm/tsc.h @@ -32,7 +32,6 @@ extern struct system_counterval_t convert_art_ns_to_tsc(u64 art_ns); extern void tsc_early_init(void); extern void tsc_init(void); -extern unsigned long calibrate_delay_is_known(void); extern void mark_tsc_unstable(char *reason); extern int unsynchronized_tsc(void); extern int check_tsc_unstable(void); @@ -55,12 +54,10 @@ extern bool tsc_async_resets; #ifdef CONFIG_X86_TSC extern bool tsc_store_and_check_tsc_adjust(bool bootcpu); extern void tsc_verify_tsc_adjust(bool resume); -extern void check_tsc_sync_source(int cpu); extern void check_tsc_sync_target(void); #else static inline bool tsc_store_and_check_tsc_adjust(bool bootcpu) { return false; } static inline void tsc_verify_tsc_adjust(bool resume) { } -static inline void check_tsc_sync_source(int cpu) { } static inline void check_tsc_sync_target(void) { } #endif diff --git a/arch/x86/include/asm/unaccepted_memory.h b/arch/x86/include/asm/unaccepted_memory.h new file mode 100644 index 000000000000..f5937e9866ac --- /dev/null +++ b/arch/x86/include/asm/unaccepted_memory.h @@ -0,0 +1,27 @@ +#ifndef _ASM_X86_UNACCEPTED_MEMORY_H +#define _ASM_X86_UNACCEPTED_MEMORY_H + +#include <linux/efi.h> +#include <asm/tdx.h> +#include <asm/sev.h> + +static inline void arch_accept_memory(phys_addr_t start, phys_addr_t end) +{ + /* Platform-specific memory-acceptance call goes here */ + if (cpu_feature_enabled(X86_FEATURE_TDX_GUEST)) { + if (!tdx_accept_memory(start, end)) + panic("TDX: Failed to accept memory\n"); + } else if (cc_platform_has(CC_ATTR_GUEST_SEV_SNP)) { + snp_accept_memory(start, end); + } else { + panic("Cannot accept memory: unknown platform\n"); + } +} + +static inline struct efi_unaccepted_memory *efi_get_unaccepted_table(void) +{ + if (efi.unaccepted == EFI_INVALID_TABLE_ADDR) + return NULL; + return __va(efi.unaccepted); +} +#endif diff --git a/arch/x86/include/asm/unwind_hints.h b/arch/x86/include/asm/unwind_hints.h index 01cb9692b160..85cc57cb6539 100644 --- a/arch/x86/include/asm/unwind_hints.h +++ b/arch/x86/include/asm/unwind_hints.h @@ -76,9 +76,18 @@ #else +#define UNWIND_HINT_UNDEFINED \ + UNWIND_HINT(UNWIND_HINT_TYPE_UNDEFINED, 0, 0, 0) + #define UNWIND_HINT_FUNC \ UNWIND_HINT(UNWIND_HINT_TYPE_FUNC, ORC_REG_SP, 8, 0) +#define UNWIND_HINT_SAVE \ + UNWIND_HINT(UNWIND_HINT_TYPE_SAVE, 0, 0, 0) + +#define UNWIND_HINT_RESTORE \ + UNWIND_HINT(UNWIND_HINT_TYPE_RESTORE, 0, 0, 0) + #endif /* __ASSEMBLY__ */ #endif /* _ASM_X86_UNWIND_HINTS_H */ diff --git a/arch/x86/include/asm/uv/uv_hub.h b/arch/x86/include/asm/uv/uv_hub.h index d3e3197917be..5fa76c2ced51 100644 --- a/arch/x86/include/asm/uv/uv_hub.h +++ b/arch/x86/include/asm/uv/uv_hub.h @@ -177,6 +177,7 @@ struct uv_hub_info_s { unsigned short nr_possible_cpus; unsigned short nr_online_cpus; short memory_nid; + unsigned short *node_to_socket; }; /* CPU specific info with a pointer to the hub common info struct */ @@ -519,25 +520,30 @@ static inline int uv_socket_to_node(int socket) return _uv_socket_to_node(socket, uv_hub_info->socket_to_node); } +static inline int uv_pnode_to_socket(int pnode) +{ + unsigned short *p2s = uv_hub_info->pnode_to_socket; + + return p2s ? p2s[pnode - uv_hub_info->min_pnode] : pnode; +} + /* pnode, offset --> socket virtual */ static inline void *uv_pnode_offset_to_vaddr(int pnode, unsigned long offset) { unsigned int m_val = uv_hub_info->m_val; unsigned long base; - unsigned short sockid, node, *p2s; + unsigned short sockid; if (m_val) return __va(((unsigned long)pnode << m_val) | offset); - p2s = uv_hub_info->pnode_to_socket; - sockid = p2s ? p2s[pnode - uv_hub_info->min_pnode] : pnode; - node = uv_socket_to_node(sockid); + sockid = uv_pnode_to_socket(pnode); /* limit address of previous socket is our base, except node 0 is 0 */ - if (!node) + if (sockid == 0) return __va((unsigned long)offset); - base = (unsigned long)(uv_hub_info->gr_table[node - 1].limit); + base = (unsigned long)(uv_hub_info->gr_table[sockid - 1].limit); return __va(base << UV_GAM_RANGE_SHFT | offset); } @@ -644,7 +650,7 @@ static inline int uv_cpu_blade_processor_id(int cpu) /* Blade number to Node number (UV2..UV4 is 1:1) */ static inline int uv_blade_to_node(int blade) { - return blade; + return uv_socket_to_node(blade); } /* Blade number of current cpu. Numnbered 0 .. <#blades -1> */ @@ -656,23 +662,27 @@ static inline int uv_numa_blade_id(void) /* * Convert linux node number to the UV blade number. * .. Currently for UV2 thru UV4 the node and the blade are identical. - * .. If this changes then you MUST check references to this function! + * .. UV5 needs conversion when sub-numa clustering is enabled. */ static inline int uv_node_to_blade_id(int nid) { - return nid; + unsigned short *n2s = uv_hub_info->node_to_socket; + + return n2s ? n2s[nid] : nid; } /* Convert a CPU number to the UV blade number */ static inline int uv_cpu_to_blade_id(int cpu) { - return uv_node_to_blade_id(cpu_to_node(cpu)); + return uv_cpu_hub_info(cpu)->numa_blade_id; } /* Convert a blade id to the PNODE of the blade */ static inline int uv_blade_to_pnode(int bid) { - return uv_hub_info_list(uv_blade_to_node(bid))->pnode; + unsigned short *s2p = uv_hub_info->socket_to_pnode; + + return s2p ? s2p[bid] : bid; } /* Nid of memory node on blade. -1 if no blade-local memory */ diff --git a/arch/x86/include/asm/uv/uv_mmrs.h b/arch/x86/include/asm/uv/uv_mmrs.h index 57fa67373262..bb45812889dd 100644 --- a/arch/x86/include/asm/uv/uv_mmrs.h +++ b/arch/x86/include/asm/uv/uv_mmrs.h @@ -4199,6 +4199,13 @@ union uvh_rh_gam_mmioh_overlay_config1_u { #define UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG0_NASID_SHFT 0 #define UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG0_NASID_MASK 0x0000000000007fffUL +/* UVH common defines */ +#define UVH_RH_GAM_MMIOH_REDIRECT_CONFIG0_NASID_MASK ( \ + is_uv(UV4A) ? UV4AH_RH_GAM_MMIOH_REDIRECT_CONFIG0_NASID_MASK : \ + is_uv(UV4) ? UV4H_RH_GAM_MMIOH_REDIRECT_CONFIG0_NASID_MASK : \ + is_uv(UV3) ? UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG0_NASID_MASK : \ + 0) + union uvh_rh_gam_mmioh_redirect_config0_u { unsigned long v; @@ -4247,8 +4254,8 @@ union uvh_rh_gam_mmioh_redirect_config0_u { 0) /* UV4A unique defines */ -#define UV4AH_RH_GAM_MMIOH_REDIRECT_CONFIG0_NASID_SHFT 0 -#define UV4AH_RH_GAM_MMIOH_REDIRECT_CONFIG0_NASID_MASK 0x0000000000000fffUL +#define UV4AH_RH_GAM_MMIOH_REDIRECT_CONFIG1_NASID_SHFT 0 +#define UV4AH_RH_GAM_MMIOH_REDIRECT_CONFIG1_NASID_MASK 0x0000000000000fffUL /* UV4 unique defines */ #define UV4H_RH_GAM_MMIOH_REDIRECT_CONFIG1_NASID_SHFT 0 @@ -4258,6 +4265,13 @@ union uvh_rh_gam_mmioh_redirect_config0_u { #define UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG1_NASID_SHFT 0 #define UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG1_NASID_MASK 0x0000000000007fffUL +/* UVH common defines */ +#define UVH_RH_GAM_MMIOH_REDIRECT_CONFIG1_NASID_MASK ( \ + is_uv(UV4A) ? UV4AH_RH_GAM_MMIOH_REDIRECT_CONFIG1_NASID_MASK : \ + is_uv(UV4) ? UV4H_RH_GAM_MMIOH_REDIRECT_CONFIG1_NASID_MASK : \ + is_uv(UV3) ? UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG1_NASID_MASK : \ + 0) + union uvh_rh_gam_mmioh_redirect_config1_u { unsigned long v; diff --git a/arch/x86/include/asm/vdso/gettimeofday.h b/arch/x86/include/asm/vdso/gettimeofday.h index 4cf6794f9d68..c81858d903dc 100644 --- a/arch/x86/include/asm/vdso/gettimeofday.h +++ b/arch/x86/include/asm/vdso/gettimeofday.h @@ -231,14 +231,19 @@ static u64 vread_pvclock(void) ret = __pvclock_read_cycles(pvti, rdtsc_ordered()); } while (pvclock_read_retry(pvti, version)); - return ret; + return ret & S64_MAX; } #endif #ifdef CONFIG_HYPERV_TIMER static u64 vread_hvclock(void) { - return hv_read_tsc_page(&hvclock_page); + u64 tsc, time; + + if (hv_read_tsc_page_tsc(&hvclock_page, &tsc, &time)) + return time & S64_MAX; + + return U64_MAX; } #endif @@ -246,7 +251,7 @@ static inline u64 __arch_get_hw_counter(s32 clock_mode, const struct vdso_data *vd) { if (likely(clock_mode == VDSO_CLOCKMODE_TSC)) - return (u64)rdtsc_ordered(); + return (u64)rdtsc_ordered() & S64_MAX; /* * For any memory-mapped vclock type, we need to make sure that gcc * doesn't cleverly hoist a load before the mode check. Otherwise we @@ -284,6 +289,9 @@ static inline bool arch_vdso_clocksource_ok(const struct vdso_data *vd) * which can be invalidated asynchronously and indicate invalidation by * returning U64_MAX, which can be effectively tested by checking for a * negative value after casting it to s64. + * + * This effectively forces a S64_MAX mask on the calculations, unlike the + * U64_MAX mask normally used by x86 clocksources. */ static inline bool arch_vdso_cycles_ok(u64 cycles) { @@ -303,18 +311,29 @@ static inline bool arch_vdso_cycles_ok(u64 cycles) * @last. If not then use @last, which is the base time of the current * conversion period. * - * This variant also removes the masking of the subtraction because the - * clocksource mask of all VDSO capable clocksources on x86 is U64_MAX - * which would result in a pointless operation. The compiler cannot - * optimize it away as the mask comes from the vdso data and is not compile - * time constant. + * This variant also uses a custom mask because while the clocksource mask of + * all the VDSO capable clocksources on x86 is U64_MAX, the above code uses + * U64_MASK as an exception value, additionally arch_vdso_cycles_ok() above + * declares everything with the MSB/Sign-bit set as invalid. Therefore the + * effective mask is S64_MAX. */ static __always_inline u64 vdso_calc_delta(u64 cycles, u64 last, u64 mask, u32 mult) { - if (cycles > last) - return (cycles - last) * mult; - return 0; + /* + * Due to the MSB/Sign-bit being used as invald marker (see + * arch_vdso_cycles_valid() above), the effective mask is S64_MAX. + */ + u64 delta = (cycles - last) & S64_MAX; + + /* + * Due to the above mentioned TSC wobbles, filter out negative motion. + * Per the above masking, the effective sign bit is now bit 62. + */ + if (unlikely(delta & (1ULL << 62))) + return 0; + + return delta * mult; } #define vdso_calc_delta vdso_calc_delta diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h index 88085f369ff6..5240d88db52a 100644 --- a/arch/x86/include/asm/x86_init.h +++ b/arch/x86/include/asm/x86_init.h @@ -150,7 +150,7 @@ struct x86_init_acpi { * @enc_cache_flush_required Returns true if a cache flush is needed before changing page encryption status */ struct x86_guest { - void (*enc_status_change_prepare)(unsigned long vaddr, int npages, bool enc); + bool (*enc_status_change_prepare)(unsigned long vaddr, int npages, bool enc); bool (*enc_status_change_finish)(unsigned long vaddr, int npages, bool enc); bool (*enc_tlb_flush_required)(bool enc); bool (*enc_cache_flush_required)(void); @@ -177,11 +177,14 @@ struct x86_init_ops { * struct x86_cpuinit_ops - platform specific cpu hotplug setups * @setup_percpu_clockev: set up the per cpu clock event device * @early_percpu_clock_init: early init of the per cpu clock event device + * @fixup_cpu_id: fixup function for cpuinfo_x86::phys_proc_id + * @parallel_bringup: Parallel bringup control */ struct x86_cpuinit_ops { void (*setup_percpu_clockev)(void); void (*early_percpu_clock_init)(void); void (*fixup_cpu_id)(struct cpuinfo_x86 *c, int node); + bool parallel_bringup; }; struct timespec64; diff --git a/arch/x86/include/uapi/asm/mtrr.h b/arch/x86/include/uapi/asm/mtrr.h index 376563f2bac1..3a8a8eb8ac3a 100644 --- a/arch/x86/include/uapi/asm/mtrr.h +++ b/arch/x86/include/uapi/asm/mtrr.h @@ -81,14 +81,6 @@ typedef __u8 mtrr_type; #define MTRR_NUM_FIXED_RANGES 88 #define MTRR_MAX_VAR_RANGES 256 -struct mtrr_state_type { - struct mtrr_var_range var_ranges[MTRR_MAX_VAR_RANGES]; - mtrr_type fixed_ranges[MTRR_NUM_FIXED_RANGES]; - unsigned char enabled; - unsigned char have_fixed; - mtrr_type def_type; -}; - #define MTRRphysBase_MSR(reg) (0x200 + 2 * (reg)) #define MTRRphysMask_MSR(reg) (0x200 + 2 * (reg) + 1) @@ -115,9 +107,9 @@ struct mtrr_state_type { #define MTRR_NUM_TYPES 7 /* - * Invalid MTRR memory type. mtrr_type_lookup() returns this value when - * MTRRs are disabled. Note, this value is allocated from the reserved - * values (0x7-0xff) of the MTRR memory types. + * Invalid MTRR memory type. No longer used outside of MTRR code. + * Note, this value is allocated from the reserved values (0x7-0xff) of + * the MTRR memory types. */ #define MTRR_TYPE_INVALID 0xff diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c index 1328c221af30..6dfecb27b846 100644 --- a/arch/x86/kernel/acpi/sleep.c +++ b/arch/x86/kernel/acpi/sleep.c @@ -16,6 +16,7 @@ #include <asm/cacheflush.h> #include <asm/realmode.h> #include <asm/hypervisor.h> +#include <asm/smp.h> #include <linux/ftrace.h> #include "../../realmode/rm/wakeup.h" @@ -127,7 +128,13 @@ int x86_acpi_suspend_lowlevel(void) * value is in the actual %rsp register. */ current->thread.sp = (unsigned long)temp_stack + sizeof(temp_stack); - smpboot_control = smp_processor_id(); + /* + * Ensure the CPU knows which one it is when it comes back, if + * it isn't in parallel mode and expected to work that out for + * itself. + */ + if (!(smpboot_control & STARTUP_PARALLEL_MASK)) + smpboot_control = smp_processor_id(); #endif initial_code = (unsigned long)wakeup_long64; saved_magic = 0x123456789abcdef0L; diff --git a/arch/x86/kernel/acpi/sleep.h b/arch/x86/kernel/acpi/sleep.h index 171a40c74db6..054c15a2f860 100644 --- a/arch/x86/kernel/acpi/sleep.h +++ b/arch/x86/kernel/acpi/sleep.h @@ -12,7 +12,6 @@ extern int wakeup_pmode_return; extern u8 wake_sleep_flags; -extern unsigned long acpi_copy_wakeup_routine(unsigned long); extern void wakeup_long64(void); extern void do_suspend_lowlevel(void); diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index f615e0cb6d93..72646d75b6ff 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -37,11 +37,23 @@ EXPORT_SYMBOL_GPL(alternatives_patched); #define MAX_PATCH_LEN (255-1) -static int __initdata_or_module debug_alternative; +#define DA_ALL (~0) +#define DA_ALT 0x01 +#define DA_RET 0x02 +#define DA_RETPOLINE 0x04 +#define DA_ENDBR 0x08 +#define DA_SMP 0x10 + +static unsigned int __initdata_or_module debug_alternative; static int __init debug_alt(char *str) { - debug_alternative = 1; + if (str && *str == '=') + str++; + + if (!str || kstrtouint(str, 0, &debug_alternative)) + debug_alternative = DA_ALL; + return 1; } __setup("debug-alternative", debug_alt); @@ -55,15 +67,15 @@ static int __init setup_noreplace_smp(char *str) } __setup("noreplace-smp", setup_noreplace_smp); -#define DPRINTK(fmt, args...) \ +#define DPRINTK(type, fmt, args...) \ do { \ - if (debug_alternative) \ + if (debug_alternative & DA_##type) \ printk(KERN_DEBUG pr_fmt(fmt) "\n", ##args); \ } while (0) -#define DUMP_BYTES(buf, len, fmt, args...) \ +#define DUMP_BYTES(type, buf, len, fmt, args...) \ do { \ - if (unlikely(debug_alternative)) { \ + if (unlikely(debug_alternative & DA_##type)) { \ int j; \ \ if (!(len)) \ @@ -86,6 +98,11 @@ static const unsigned char x86nops[] = BYTES_NOP6, BYTES_NOP7, BYTES_NOP8, +#ifdef CONFIG_64BIT + BYTES_NOP9, + BYTES_NOP10, + BYTES_NOP11, +#endif }; const unsigned char * const x86_nops[ASM_NOP_MAX+1] = @@ -99,19 +116,44 @@ const unsigned char * const x86_nops[ASM_NOP_MAX+1] = x86nops + 1 + 2 + 3 + 4 + 5, x86nops + 1 + 2 + 3 + 4 + 5 + 6, x86nops + 1 + 2 + 3 + 4 + 5 + 6 + 7, +#ifdef CONFIG_64BIT + x86nops + 1 + 2 + 3 + 4 + 5 + 6 + 7 + 8, + x86nops + 1 + 2 + 3 + 4 + 5 + 6 + 7 + 8 + 9, + x86nops + 1 + 2 + 3 + 4 + 5 + 6 + 7 + 8 + 9 + 10, +#endif }; -/* Use this to add nops to a buffer, then text_poke the whole buffer. */ -static void __init_or_module add_nops(void *insns, unsigned int len) +/* + * Fill the buffer with a single effective instruction of size @len. + * + * In order not to issue an ORC stack depth tracking CFI entry (Call Frame Info) + * for every single-byte NOP, try to generate the maximally available NOP of + * size <= ASM_NOP_MAX such that only a single CFI entry is generated (vs one for + * each single-byte NOPs). If @len to fill out is > ASM_NOP_MAX, pad with INT3 and + * *jump* over instead of executing long and daft NOPs. + */ +static void __init_or_module add_nop(u8 *instr, unsigned int len) { - while (len > 0) { - unsigned int noplen = len; - if (noplen > ASM_NOP_MAX) - noplen = ASM_NOP_MAX; - memcpy(insns, x86_nops[noplen], noplen); - insns += noplen; - len -= noplen; + u8 *target = instr + len; + + if (!len) + return; + + if (len <= ASM_NOP_MAX) { + memcpy(instr, x86_nops[len], len); + return; } + + if (len < 128) { + __text_gen_insn(instr, JMP8_INSN_OPCODE, instr, target, JMP8_INSN_SIZE); + instr += JMP8_INSN_SIZE; + } else { + __text_gen_insn(instr, JMP32_INSN_OPCODE, instr, target, JMP32_INSN_SIZE); + instr += JMP32_INSN_SIZE; + } + + for (;instr < target; instr++) + *instr = INT3_INSN_OPCODE; } extern s32 __retpoline_sites[], __retpoline_sites_end[]; @@ -123,133 +165,223 @@ extern s32 __smp_locks[], __smp_locks_end[]; void text_poke_early(void *addr, const void *opcode, size_t len); /* - * Are we looking at a near JMP with a 1 or 4-byte displacement. + * Matches NOP and NOPL, not any of the other possible NOPs. */ -static inline bool is_jmp(const u8 opcode) +static bool insn_is_nop(struct insn *insn) { - return opcode == 0xeb || opcode == 0xe9; + /* Anything NOP, but no REP NOP */ + if (insn->opcode.bytes[0] == 0x90 && + (!insn->prefixes.nbytes || insn->prefixes.bytes[0] != 0xF3)) + return true; + + /* NOPL */ + if (insn->opcode.bytes[0] == 0x0F && insn->opcode.bytes[1] == 0x1F) + return true; + + /* TODO: more nops */ + + return false; } -static void __init_or_module -recompute_jump(struct alt_instr *a, u8 *orig_insn, u8 *repl_insn, u8 *insn_buff) +/* + * Find the offset of the first non-NOP instruction starting at @offset + * but no further than @len. + */ +static int skip_nops(u8 *instr, int offset, int len) { - u8 *next_rip, *tgt_rip; - s32 n_dspl, o_dspl; - int repl_len; + struct insn insn; - if (a->replacementlen != 5) - return; + for (; offset < len; offset += insn.length) { + if (insn_decode_kernel(&insn, &instr[offset])) + break; - o_dspl = *(s32 *)(insn_buff + 1); + if (!insn_is_nop(&insn)) + break; + } - /* next_rip of the replacement JMP */ - next_rip = repl_insn + a->replacementlen; - /* target rip of the replacement JMP */ - tgt_rip = next_rip + o_dspl; - n_dspl = tgt_rip - orig_insn; + return offset; +} - DPRINTK("target RIP: %px, new_displ: 0x%x", tgt_rip, n_dspl); +/* + * Optimize a sequence of NOPs, possibly preceded by an unconditional jump + * to the end of the NOP sequence into a single NOP. + */ +static bool __init_or_module +__optimize_nops(u8 *instr, size_t len, struct insn *insn, int *next, int *prev, int *target) +{ + int i = *next - insn->length; - if (tgt_rip - orig_insn >= 0) { - if (n_dspl - 2 <= 127) - goto two_byte_jmp; - else - goto five_byte_jmp; - /* negative offset */ - } else { - if (((n_dspl - 2) & 0xff) == (n_dspl - 2)) - goto two_byte_jmp; - else - goto five_byte_jmp; + switch (insn->opcode.bytes[0]) { + case JMP8_INSN_OPCODE: + case JMP32_INSN_OPCODE: + *prev = i; + *target = *next + insn->immediate.value; + return false; } -two_byte_jmp: - n_dspl -= 2; + if (insn_is_nop(insn)) { + int nop = i; - insn_buff[0] = 0xeb; - insn_buff[1] = (s8)n_dspl; - add_nops(insn_buff + 2, 3); + *next = skip_nops(instr, *next, len); + if (*target && *next == *target) + nop = *prev; - repl_len = 2; - goto done; + add_nop(instr + nop, *next - nop); + DUMP_BYTES(ALT, instr, len, "%px: [%d:%d) optimized NOPs: ", instr, nop, *next); + return true; + } + + *target = 0; + return false; +} -five_byte_jmp: - n_dspl -= 5; +/* + * "noinline" to cause control flow change and thus invalidate I$ and + * cause refetch after modification. + */ +static void __init_or_module noinline optimize_nops(u8 *instr, size_t len) +{ + int prev, target = 0; - insn_buff[0] = 0xe9; - *(s32 *)&insn_buff[1] = n_dspl; + for (int next, i = 0; i < len; i = next) { + struct insn insn; - repl_len = 5; + if (insn_decode_kernel(&insn, &instr[i])) + return; -done: + next = i + insn.length; - DPRINTK("final displ: 0x%08x, JMP 0x%lx", - n_dspl, (unsigned long)orig_insn + n_dspl + repl_len); + __optimize_nops(instr, len, &insn, &next, &prev, &target); + } } /* - * optimize_nops_range() - Optimize a sequence of single byte NOPs (0x90) + * In this context, "source" is where the instructions are placed in the + * section .altinstr_replacement, for example during kernel build by the + * toolchain. + * "Destination" is where the instructions are being patched in by this + * machinery. * - * @instr: instruction byte stream - * @instrlen: length of the above - * @off: offset within @instr where the first NOP has been detected + * The source offset is: * - * Return: number of NOPs found (and replaced). + * src_imm = target - src_next_ip (1) + * + * and the target offset is: + * + * dst_imm = target - dst_next_ip (2) + * + * so rework (1) as an expression for target like: + * + * target = src_imm + src_next_ip (1a) + * + * and substitute in (2) to get: + * + * dst_imm = (src_imm + src_next_ip) - dst_next_ip (3) + * + * Now, since the instruction stream is 'identical' at src and dst (it + * is being copied after all) it can be stated that: + * + * src_next_ip = src + ip_offset + * dst_next_ip = dst + ip_offset (4) + * + * Substitute (4) in (3) and observe ip_offset being cancelled out to + * obtain: + * + * dst_imm = src_imm + (src + ip_offset) - (dst + ip_offset) + * = src_imm + src - dst + ip_offset - ip_offset + * = src_imm + src - dst (5) + * + * IOW, only the relative displacement of the code block matters. */ -static __always_inline int optimize_nops_range(u8 *instr, u8 instrlen, int off) -{ - unsigned long flags; - int i = off, nnops; - while (i < instrlen) { - if (instr[i] != 0x90) - break; +#define apply_reloc_n(n_, p_, d_) \ + do { \ + s32 v = *(s##n_ *)(p_); \ + v += (d_); \ + BUG_ON((v >> 31) != (v >> (n_-1))); \ + *(s##n_ *)(p_) = (s##n_)v; \ + } while (0) + - i++; +static __always_inline +void apply_reloc(int n, void *ptr, uintptr_t diff) +{ + switch (n) { + case 1: apply_reloc_n(8, ptr, diff); break; + case 2: apply_reloc_n(16, ptr, diff); break; + case 4: apply_reloc_n(32, ptr, diff); break; + default: BUG(); } +} - nnops = i - off; +static __always_inline +bool need_reloc(unsigned long offset, u8 *src, size_t src_len) +{ + u8 *target = src + offset; + /* + * If the target is inside the patched block, it's relative to the + * block itself and does not need relocation. + */ + return (target < src || target > src + src_len); +} - if (nnops <= 1) - return nnops; +static void __init_or_module noinline +apply_relocation(u8 *buf, size_t len, u8 *dest, u8 *src, size_t src_len) +{ + int prev, target = 0; - local_irq_save(flags); - add_nops(instr + off, nnops); - local_irq_restore(flags); + for (int next, i = 0; i < len; i = next) { + struct insn insn; - DUMP_BYTES(instr, instrlen, "%px: [%d:%d) optimized NOPs: ", instr, off, i); + if (WARN_ON_ONCE(insn_decode_kernel(&insn, &buf[i]))) + return; - return nnops; -} + next = i + insn.length; -/* - * "noinline" to cause control flow change and thus invalidate I$ and - * cause refetch after modification. - */ -static void __init_or_module noinline optimize_nops(u8 *instr, size_t len) -{ - struct insn insn; - int i = 0; + if (__optimize_nops(buf, len, &insn, &next, &prev, &target)) + continue; - /* - * Jump over the non-NOP insns and optimize single-byte NOPs into bigger - * ones. - */ - for (;;) { - if (insn_decode_kernel(&insn, &instr[i])) - return; + switch (insn.opcode.bytes[0]) { + case 0x0f: + if (insn.opcode.bytes[1] < 0x80 || + insn.opcode.bytes[1] > 0x8f) + break; - /* - * See if this and any potentially following NOPs can be - * optimized. - */ - if (insn.length == 1 && insn.opcode.bytes[0] == 0x90) - i += optimize_nops_range(instr, len, i); - else - i += insn.length; + fallthrough; /* Jcc.d32 */ + case 0x70 ... 0x7f: /* Jcc.d8 */ + case JMP8_INSN_OPCODE: + case JMP32_INSN_OPCODE: + case CALL_INSN_OPCODE: + if (need_reloc(next + insn.immediate.value, src, src_len)) { + apply_reloc(insn.immediate.nbytes, + buf + i + insn_offset_immediate(&insn), + src - dest); + } - if (i >= len) - return; + /* + * Where possible, convert JMP.d32 into JMP.d8. + */ + if (insn.opcode.bytes[0] == JMP32_INSN_OPCODE) { + s32 imm = insn.immediate.value; + imm += src - dest; + imm += JMP32_INSN_SIZE - JMP8_INSN_SIZE; + if ((imm >> 31) == (imm >> 7)) { + buf[i+0] = JMP8_INSN_OPCODE; + buf[i+1] = (s8)imm; + + memset(&buf[i+2], INT3_INSN_OPCODE, insn.length - 2); + } + } + break; + } + + if (insn_rip_relative(&insn)) { + if (need_reloc(next + insn.displacement.value, src, src_len)) { + apply_reloc(insn.displacement.nbytes, + buf + i + insn_offset_displacement(&insn), + src - dest); + } + } } } @@ -270,7 +402,7 @@ void __init_or_module noinline apply_alternatives(struct alt_instr *start, u8 *instr, *replacement; u8 insn_buff[MAX_PATCH_LEN]; - DPRINTK("alt table %px, -> %px", start, end); + DPRINTK(ALT, "alt table %px, -> %px", start, end); /* * The scan order should be from start to end. A later scanned * alternative code can overwrite previously scanned alternative code. @@ -294,47 +426,31 @@ void __init_or_module noinline apply_alternatives(struct alt_instr *start, * - feature not present but ALT_FLAG_NOT is set to mean, * patch if feature is *NOT* present. */ - if (!boot_cpu_has(a->cpuid) == !(a->flags & ALT_FLAG_NOT)) - goto next; + if (!boot_cpu_has(a->cpuid) == !(a->flags & ALT_FLAG_NOT)) { + optimize_nops(instr, a->instrlen); + continue; + } - DPRINTK("feat: %s%d*32+%d, old: (%pS (%px) len: %d), repl: (%px, len: %d)", + DPRINTK(ALT, "feat: %s%d*32+%d, old: (%pS (%px) len: %d), repl: (%px, len: %d)", (a->flags & ALT_FLAG_NOT) ? "!" : "", a->cpuid >> 5, a->cpuid & 0x1f, instr, instr, a->instrlen, replacement, a->replacementlen); - DUMP_BYTES(instr, a->instrlen, "%px: old_insn: ", instr); - DUMP_BYTES(replacement, a->replacementlen, "%px: rpl_insn: ", replacement); - memcpy(insn_buff, replacement, a->replacementlen); insn_buff_sz = a->replacementlen; - /* - * 0xe8 is a relative jump; fix the offset. - * - * Instruction length is checked before the opcode to avoid - * accessing uninitialized bytes for zero-length replacements. - */ - if (a->replacementlen == 5 && *insn_buff == 0xe8) { - *(s32 *)(insn_buff + 1) += replacement - instr; - DPRINTK("Fix CALL offset: 0x%x, CALL 0x%lx", - *(s32 *)(insn_buff + 1), - (unsigned long)instr + *(s32 *)(insn_buff + 1) + 5); - } - - if (a->replacementlen && is_jmp(replacement[0])) - recompute_jump(a, instr, replacement, insn_buff); - for (; insn_buff_sz < a->instrlen; insn_buff_sz++) insn_buff[insn_buff_sz] = 0x90; - DUMP_BYTES(insn_buff, insn_buff_sz, "%px: final_insn: ", instr); + apply_relocation(insn_buff, a->instrlen, instr, replacement, a->replacementlen); - text_poke_early(instr, insn_buff, insn_buff_sz); + DUMP_BYTES(ALT, instr, a->instrlen, "%px: old_insn: ", instr); + DUMP_BYTES(ALT, replacement, a->replacementlen, "%px: rpl_insn: ", replacement); + DUMP_BYTES(ALT, insn_buff, insn_buff_sz, "%px: final_insn: ", instr); -next: - optimize_nops(instr, a->instrlen); + text_poke_early(instr, insn_buff, insn_buff_sz); } } @@ -555,15 +671,15 @@ void __init_or_module noinline apply_retpolines(s32 *start, s32 *end) continue; } - DPRINTK("retpoline at: %pS (%px) len: %d to: %pS", + DPRINTK(RETPOLINE, "retpoline at: %pS (%px) len: %d to: %pS", addr, addr, insn.length, addr + insn.length + insn.immediate.value); len = patch_retpoline(addr, &insn, bytes); if (len == insn.length) { optimize_nops(bytes, len); - DUMP_BYTES(((u8*)addr), len, "%px: orig: ", addr); - DUMP_BYTES(((u8*)bytes), len, "%px: repl: ", addr); + DUMP_BYTES(RETPOLINE, ((u8*)addr), len, "%px: orig: ", addr); + DUMP_BYTES(RETPOLINE, ((u8*)bytes), len, "%px: repl: ", addr); text_poke_early(addr, bytes, len); } } @@ -590,13 +706,12 @@ static int patch_return(void *addr, struct insn *insn, u8 *bytes) { int i = 0; + /* Patch the custom return thunks... */ if (cpu_feature_enabled(X86_FEATURE_RETHUNK)) { - if (x86_return_thunk == __x86_return_thunk) - return -1; - i = JMP32_INSN_SIZE; __text_gen_insn(bytes, JMP32_INSN_OPCODE, addr, x86_return_thunk, i); } else { + /* ... or patch them out if not needed. */ bytes[i++] = RET_INSN_OPCODE; } @@ -609,6 +724,14 @@ void __init_or_module noinline apply_returns(s32 *start, s32 *end) { s32 *s; + /* + * Do not patch out the default return thunks if those needed are the + * ones generated by the compiler. + */ + if (cpu_feature_enabled(X86_FEATURE_RETHUNK) && + (x86_return_thunk == __x86_return_thunk)) + return; + for (s = start; s < end; s++) { void *dest = NULL, *addr = (void *)s + *s; struct insn insn; @@ -630,14 +753,14 @@ void __init_or_module noinline apply_returns(s32 *start, s32 *end) addr, dest, 5, addr)) continue; - DPRINTK("return thunk at: %pS (%px) len: %d to: %pS", + DPRINTK(RET, "return thunk at: %pS (%px) len: %d to: %pS", addr, addr, insn.length, addr + insn.length + insn.immediate.value); len = patch_return(addr, &insn, bytes); if (len == insn.length) { - DUMP_BYTES(((u8*)addr), len, "%px: orig: ", addr); - DUMP_BYTES(((u8*)bytes), len, "%px: repl: ", addr); + DUMP_BYTES(RET, ((u8*)addr), len, "%px: orig: ", addr); + DUMP_BYTES(RET, ((u8*)bytes), len, "%px: repl: ", addr); text_poke_early(addr, bytes, len); } } @@ -655,7 +778,7 @@ void __init_or_module noinline apply_returns(s32 *start, s32 *end) { } #ifdef CONFIG_X86_KERNEL_IBT -static void poison_endbr(void *addr, bool warn) +static void __init_or_module poison_endbr(void *addr, bool warn) { u32 endbr, poison = gen_endbr_poison(); @@ -667,13 +790,13 @@ static void poison_endbr(void *addr, bool warn) return; } - DPRINTK("ENDBR at: %pS (%px)", addr, addr); + DPRINTK(ENDBR, "ENDBR at: %pS (%px)", addr, addr); /* * When we have IBT, the lack of ENDBR will trigger #CP */ - DUMP_BYTES(((u8*)addr), 4, "%px: orig: ", addr); - DUMP_BYTES(((u8*)&poison), 4, "%px: repl: ", addr); + DUMP_BYTES(ENDBR, ((u8*)addr), 4, "%px: orig: ", addr); + DUMP_BYTES(ENDBR, ((u8*)&poison), 4, "%px: repl: ", addr); text_poke_early(addr, &poison, 4); } @@ -1148,7 +1271,7 @@ void __init_or_module alternatives_smp_module_add(struct module *mod, smp->locks_end = locks_end; smp->text = text; smp->text_end = text_end; - DPRINTK("locks %p -> %p, text %p -> %p, name %s\n", + DPRINTK(SMP, "locks %p -> %p, text %p -> %p, name %s\n", smp->locks, smp->locks_end, smp->text, smp->text_end, smp->name); @@ -1225,6 +1348,20 @@ int alternatives_text_reserved(void *start, void *end) #endif /* CONFIG_SMP */ #ifdef CONFIG_PARAVIRT + +/* Use this to add nops to a buffer, then text_poke the whole buffer. */ +static void __init_or_module add_nops(void *insns, unsigned int len) +{ + while (len > 0) { + unsigned int noplen = len; + if (noplen > ASM_NOP_MAX) + noplen = ASM_NOP_MAX; + memcpy(insns, x86_nops[noplen], noplen); + insns += noplen; + len -= noplen; + } +} + void __init_or_module apply_paravirt(struct paravirt_patch_site *start, struct paravirt_patch_site *end) { @@ -1332,6 +1469,35 @@ static noinline void __init int3_selftest(void) unregister_die_notifier(&int3_exception_nb); } +static __initdata int __alt_reloc_selftest_addr; + +__visible noinline void __init __alt_reloc_selftest(void *arg) +{ + WARN_ON(arg != &__alt_reloc_selftest_addr); +} + +static noinline void __init alt_reloc_selftest(void) +{ + /* + * Tests apply_relocation(). + * + * This has a relative immediate (CALL) in a place other than the first + * instruction and additionally on x86_64 we get a RIP-relative LEA: + * + * lea 0x0(%rip),%rdi # 5d0: R_X86_64_PC32 .init.data+0x5566c + * call +0 # 5d5: R_X86_64_PLT32 __alt_reloc_selftest-0x4 + * + * Getting this wrong will either crash and burn or tickle the WARN + * above. + */ + asm_inline volatile ( + ALTERNATIVE("", "lea %[mem], %%" _ASM_ARG1 "; call __alt_reloc_selftest;", X86_FEATURE_ALWAYS) + : /* output */ + : [mem] "m" (__alt_reloc_selftest_addr) + : _ASM_ARG1 + ); +} + void __init alternative_instructions(void) { int3_selftest(); @@ -1419,6 +1585,8 @@ void __init alternative_instructions(void) restart_nmi(); alternatives_patched = 1; + + alt_reloc_selftest(); } /** @@ -1799,7 +1967,7 @@ struct bp_patching_desc *try_get_desc(void) { struct bp_patching_desc *desc = &bp_desc; - if (!arch_atomic_inc_not_zero(&desc->refs)) + if (!raw_atomic_inc_not_zero(&desc->refs)) return NULL; return desc; @@ -1810,7 +1978,7 @@ static __always_inline void put_desc(void) struct bp_patching_desc *desc = &bp_desc; smp_mb__before_atomic(); - arch_atomic_dec(&desc->refs); + raw_atomic_dec(&desc->refs); } static __always_inline void *text_poke_addr(struct text_poke_loc *tp) @@ -1954,6 +2122,16 @@ static void text_poke_bp_batch(struct text_poke_loc *tp, unsigned int nr_entries atomic_set_release(&bp_desc.refs, 1); /* + * Function tracing can enable thousands of places that need to be + * updated. This can take quite some time, and with full kernel debugging + * enabled, this could cause the softlockup watchdog to trigger. + * This function gets called every 256 entries added to be patched. + * Call cond_resched() here to make sure that other tasks can get scheduled + * while processing all the functions being patched. + */ + cond_resched(); + + /* * Corresponding read barrier in int3 notifier for making sure the * nr_entries and handler are correctly ordered wrt. patching. */ diff --git a/arch/x86/kernel/amd_nb.c b/arch/x86/kernel/amd_nb.c index 7e331e8f3692..035a3db5330b 100644 --- a/arch/x86/kernel/amd_nb.c +++ b/arch/x86/kernel/amd_nb.c @@ -15,28 +15,31 @@ #include <linux/pci_ids.h> #include <asm/amd_nb.h> -#define PCI_DEVICE_ID_AMD_17H_ROOT 0x1450 -#define PCI_DEVICE_ID_AMD_17H_M10H_ROOT 0x15d0 -#define PCI_DEVICE_ID_AMD_17H_M30H_ROOT 0x1480 -#define PCI_DEVICE_ID_AMD_17H_M60H_ROOT 0x1630 -#define PCI_DEVICE_ID_AMD_17H_MA0H_ROOT 0x14b5 -#define PCI_DEVICE_ID_AMD_19H_M10H_ROOT 0x14a4 -#define PCI_DEVICE_ID_AMD_19H_M60H_ROOT 0x14d8 -#define PCI_DEVICE_ID_AMD_19H_M70H_ROOT 0x14e8 -#define PCI_DEVICE_ID_AMD_17H_DF_F4 0x1464 -#define PCI_DEVICE_ID_AMD_17H_M10H_DF_F4 0x15ec -#define PCI_DEVICE_ID_AMD_17H_M30H_DF_F4 0x1494 -#define PCI_DEVICE_ID_AMD_17H_M60H_DF_F4 0x144c -#define PCI_DEVICE_ID_AMD_17H_M70H_DF_F4 0x1444 -#define PCI_DEVICE_ID_AMD_17H_MA0H_DF_F4 0x1728 -#define PCI_DEVICE_ID_AMD_19H_DF_F4 0x1654 -#define PCI_DEVICE_ID_AMD_19H_M10H_DF_F4 0x14b1 -#define PCI_DEVICE_ID_AMD_19H_M40H_ROOT 0x14b5 -#define PCI_DEVICE_ID_AMD_19H_M40H_DF_F4 0x167d -#define PCI_DEVICE_ID_AMD_19H_M50H_DF_F4 0x166e -#define PCI_DEVICE_ID_AMD_19H_M60H_DF_F4 0x14e4 -#define PCI_DEVICE_ID_AMD_19H_M70H_DF_F4 0x14f4 -#define PCI_DEVICE_ID_AMD_19H_M78H_DF_F4 0x12fc +#define PCI_DEVICE_ID_AMD_17H_ROOT 0x1450 +#define PCI_DEVICE_ID_AMD_17H_M10H_ROOT 0x15d0 +#define PCI_DEVICE_ID_AMD_17H_M30H_ROOT 0x1480 +#define PCI_DEVICE_ID_AMD_17H_M60H_ROOT 0x1630 +#define PCI_DEVICE_ID_AMD_17H_MA0H_ROOT 0x14b5 +#define PCI_DEVICE_ID_AMD_19H_M10H_ROOT 0x14a4 +#define PCI_DEVICE_ID_AMD_19H_M40H_ROOT 0x14b5 +#define PCI_DEVICE_ID_AMD_19H_M60H_ROOT 0x14d8 +#define PCI_DEVICE_ID_AMD_19H_M70H_ROOT 0x14e8 +#define PCI_DEVICE_ID_AMD_MI200_ROOT 0x14bb + +#define PCI_DEVICE_ID_AMD_17H_DF_F4 0x1464 +#define PCI_DEVICE_ID_AMD_17H_M10H_DF_F4 0x15ec +#define PCI_DEVICE_ID_AMD_17H_M30H_DF_F4 0x1494 +#define PCI_DEVICE_ID_AMD_17H_M60H_DF_F4 0x144c +#define PCI_DEVICE_ID_AMD_17H_M70H_DF_F4 0x1444 +#define PCI_DEVICE_ID_AMD_17H_MA0H_DF_F4 0x1728 +#define PCI_DEVICE_ID_AMD_19H_DF_F4 0x1654 +#define PCI_DEVICE_ID_AMD_19H_M10H_DF_F4 0x14b1 +#define PCI_DEVICE_ID_AMD_19H_M40H_DF_F4 0x167d +#define PCI_DEVICE_ID_AMD_19H_M50H_DF_F4 0x166e +#define PCI_DEVICE_ID_AMD_19H_M60H_DF_F4 0x14e4 +#define PCI_DEVICE_ID_AMD_19H_M70H_DF_F4 0x14f4 +#define PCI_DEVICE_ID_AMD_19H_M78H_DF_F4 0x12fc +#define PCI_DEVICE_ID_AMD_MI200_DF_F4 0x14d4 /* Protect the PCI config register pairs used for SMN. */ static DEFINE_MUTEX(smn_mutex); @@ -53,6 +56,7 @@ static const struct pci_device_id amd_root_ids[] = { { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_19H_M40H_ROOT) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_19H_M60H_ROOT) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_19H_M70H_ROOT) }, + { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_MI200_ROOT) }, {} }; @@ -81,6 +85,7 @@ static const struct pci_device_id amd_nb_misc_ids[] = { { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_19H_M60H_DF_F3) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_19H_M70H_DF_F3) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_19H_M78H_DF_F3) }, + { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_MI200_DF_F3) }, {} }; @@ -101,6 +106,7 @@ static const struct pci_device_id amd_nb_link_ids[] = { { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_19H_M40H_DF_F4) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_19H_M50H_DF_F4) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_CNB17H_F4) }, + { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_MI200_DF_F4) }, {} }; diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 770557110051..af49e24b46a4 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -101,6 +101,9 @@ static int apic_extnmi __ro_after_init = APIC_EXTNMI_BSP; */ static bool virt_ext_dest_id __ro_after_init; +/* For parallel bootup. */ +unsigned long apic_mmio_base __ro_after_init; + /* * Map cpu index to physical APIC ID */ @@ -2163,6 +2166,7 @@ void __init register_lapic_address(unsigned long address) if (!x2apic_mode) { set_fixmap_nocache(FIX_APIC_BASE, address); + apic_mmio_base = APIC_BASE; apic_printk(APIC_VERBOSE, "mapped APIC to %16lx (%16lx)\n", APIC_BASE, address); } @@ -2376,7 +2380,7 @@ static int nr_logical_cpuids = 1; /* * Used to store mapping between logical CPU IDs and APIC IDs. */ -static int cpuid_to_apicid[] = { +int cpuid_to_apicid[] = { [0 ... NR_CPUS - 1] = -1, }; @@ -2386,20 +2390,31 @@ bool arch_match_cpu_phys_id(int cpu, u64 phys_id) } #ifdef CONFIG_SMP -/** - * apic_id_is_primary_thread - Check whether APIC ID belongs to a primary thread - * @apicid: APIC ID to check +static void cpu_mark_primary_thread(unsigned int cpu, unsigned int apicid) +{ + /* Isolate the SMT bit(s) in the APICID and check for 0 */ + u32 mask = (1U << (fls(smp_num_siblings) - 1)) - 1; + + if (smp_num_siblings == 1 || !(apicid & mask)) + cpumask_set_cpu(cpu, &__cpu_primary_thread_mask); +} + +/* + * Due to the utter mess of CPUID evaluation smp_num_siblings is not valid + * during early boot. Initialize the primary thread mask before SMP + * bringup. */ -bool apic_id_is_primary_thread(unsigned int apicid) +static int __init smp_init_primary_thread_mask(void) { - u32 mask; + unsigned int cpu; - if (smp_num_siblings == 1) - return true; - /* Isolate the SMT bit(s) in the APICID and check for 0 */ - mask = (1U << (fls(smp_num_siblings) - 1)) - 1; - return !(apicid & mask); + for (cpu = 0; cpu < nr_logical_cpuids; cpu++) + cpu_mark_primary_thread(cpu, cpuid_to_apicid[cpu]); + return 0; } +early_initcall(smp_init_primary_thread_mask); +#else +static inline void cpu_mark_primary_thread(unsigned int cpu, unsigned int apicid) { } #endif /* @@ -2544,6 +2559,9 @@ int generic_processor_info(int apicid, int version) set_cpu_present(cpu, true); num_processors++; + if (system_state != SYSTEM_BOOTING) + cpu_mark_primary_thread(cpu, apicid); + return cpu; } diff --git a/arch/x86/kernel/apic/x2apic_phys.c b/arch/x86/kernel/apic/x2apic_phys.c index 6bde05a86b4e..896bc41cb2ba 100644 --- a/arch/x86/kernel/apic/x2apic_phys.c +++ b/arch/x86/kernel/apic/x2apic_phys.c @@ -97,7 +97,10 @@ static void init_x2apic_ldr(void) static int x2apic_phys_probe(void) { - if (x2apic_mode && (x2apic_phys || x2apic_fadt_phys())) + if (!x2apic_mode) + return 0; + + if (x2apic_phys || x2apic_fadt_phys()) return 1; return apic == &apic_x2apic_phys; diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index 482855227964..d9384d5b4b8e 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c @@ -546,7 +546,6 @@ unsigned long sn_rtc_cycles_per_second; EXPORT_SYMBOL(sn_rtc_cycles_per_second); /* The following values are used for the per node hub info struct */ -static __initdata unsigned short *_node_to_pnode; static __initdata unsigned short _min_socket, _max_socket; static __initdata unsigned short _min_pnode, _max_pnode, _gr_table_len; static __initdata struct uv_gam_range_entry *uv_gre_table; @@ -554,6 +553,7 @@ static __initdata struct uv_gam_parameters *uv_gp_table; static __initdata unsigned short *_socket_to_node; static __initdata unsigned short *_socket_to_pnode; static __initdata unsigned short *_pnode_to_socket; +static __initdata unsigned short *_node_to_socket; static __initdata struct uv_gam_range_s *_gr_table; @@ -617,7 +617,8 @@ static __init void build_uv_gr_table(void) bytes = _gr_table_len * sizeof(struct uv_gam_range_s); grt = kzalloc(bytes, GFP_KERNEL); - BUG_ON(!grt); + if (WARN_ON_ONCE(!grt)) + return; _gr_table = grt; for (; gre->type != UV_GAM_RANGE_TYPE_UNUSED; gre++) { @@ -1022,7 +1023,7 @@ static void __init calc_mmioh_map(enum mmioh_arch index, switch (index) { case UVY_MMIOH0: mmr = UVH_RH10_GAM_MMIOH_REDIRECT_CONFIG0; - nasid_mask = UVH_RH10_GAM_MMIOH_OVERLAY_CONFIG0_BASE_MASK; + nasid_mask = UVYH_RH10_GAM_MMIOH_REDIRECT_CONFIG0_NASID_MASK; n = UVH_RH10_GAM_MMIOH_REDIRECT_CONFIG0_DEPTH; min_nasid = min_pnode; max_nasid = max_pnode; @@ -1030,7 +1031,7 @@ static void __init calc_mmioh_map(enum mmioh_arch index, break; case UVY_MMIOH1: mmr = UVH_RH10_GAM_MMIOH_REDIRECT_CONFIG1; - nasid_mask = UVH_RH10_GAM_MMIOH_OVERLAY_CONFIG1_BASE_MASK; + nasid_mask = UVYH_RH10_GAM_MMIOH_REDIRECT_CONFIG1_NASID_MASK; n = UVH_RH10_GAM_MMIOH_REDIRECT_CONFIG1_DEPTH; min_nasid = min_pnode; max_nasid = max_pnode; @@ -1038,7 +1039,7 @@ static void __init calc_mmioh_map(enum mmioh_arch index, break; case UVX_MMIOH0: mmr = UVH_RH_GAM_MMIOH_REDIRECT_CONFIG0; - nasid_mask = UVH_RH_GAM_MMIOH_OVERLAY_CONFIG0_BASE_MASK; + nasid_mask = UVH_RH_GAM_MMIOH_REDIRECT_CONFIG0_NASID_MASK; n = UVH_RH_GAM_MMIOH_REDIRECT_CONFIG0_DEPTH; min_nasid = min_pnode * 2; max_nasid = max_pnode * 2; @@ -1046,7 +1047,7 @@ static void __init calc_mmioh_map(enum mmioh_arch index, break; case UVX_MMIOH1: mmr = UVH_RH_GAM_MMIOH_REDIRECT_CONFIG1; - nasid_mask = UVH_RH_GAM_MMIOH_OVERLAY_CONFIG1_BASE_MASK; + nasid_mask = UVH_RH_GAM_MMIOH_REDIRECT_CONFIG1_NASID_MASK; n = UVH_RH_GAM_MMIOH_REDIRECT_CONFIG1_DEPTH; min_nasid = min_pnode * 2; max_nasid = max_pnode * 2; @@ -1072,8 +1073,9 @@ static void __init calc_mmioh_map(enum mmioh_arch index, /* Invalid NASID check */ if (nasid < min_nasid || max_nasid < nasid) { - pr_err("UV:%s:Invalid NASID:%x (range:%x..%x)\n", - __func__, index, min_nasid, max_nasid); + /* Not an error: unused table entries get "poison" values */ + pr_debug("UV:%s:Invalid NASID(%x):%x (range:%x..%x)\n", + __func__, index, nasid, min_nasid, max_nasid); nasid = -1; } @@ -1292,6 +1294,7 @@ static void __init uv_init_hub_info(struct uv_hub_info_s *hi) hi->nasid_shift = uv_cpuid.nasid_shift; hi->min_pnode = _min_pnode; hi->min_socket = _min_socket; + hi->node_to_socket = _node_to_socket; hi->pnode_to_socket = _pnode_to_socket; hi->socket_to_node = _socket_to_node; hi->socket_to_pnode = _socket_to_pnode; @@ -1348,7 +1351,7 @@ static void __init decode_gam_rng_tbl(unsigned long ptr) struct uv_gam_range_entry *gre = (struct uv_gam_range_entry *)ptr; unsigned long lgre = 0, gend = 0; int index = 0; - int sock_min = 999999, pnode_min = 99999; + int sock_min = INT_MAX, pnode_min = INT_MAX; int sock_max = -1, pnode_max = -1; uv_gre_table = gre; @@ -1459,11 +1462,37 @@ static int __init decode_uv_systab(void) return 0; } +/* + * Given a bitmask 'bits' representing presnt blades, numbered + * starting at 'base', masking off unused high bits of blade number + * with 'mask', update the minimum and maximum blade numbers that we + * have found. (Masking with 'mask' necessary because of BIOS + * treatment of system partitioning when creating this table we are + * interpreting.) + */ +static inline void blade_update_min_max(unsigned long bits, int base, int mask, int *min, int *max) +{ + int first, last; + + if (!bits) + return; + first = (base + __ffs(bits)) & mask; + last = (base + __fls(bits)) & mask; + + if (*min > first) + *min = first; + if (*max < last) + *max = last; +} + /* Set up physical blade translations from UVH_NODE_PRESENT_TABLE */ static __init void boot_init_possible_blades(struct uv_hub_info_s *hub_info) { unsigned long np; int i, uv_pb = 0; + int sock_min = INT_MAX, sock_max = -1, s_mask; + + s_mask = (1 << uv_cpuid.n_skt) - 1; if (UVH_NODE_PRESENT_TABLE) { pr_info("UV: NODE_PRESENT_DEPTH = %d\n", @@ -1471,35 +1500,82 @@ static __init void boot_init_possible_blades(struct uv_hub_info_s *hub_info) for (i = 0; i < UVH_NODE_PRESENT_TABLE_DEPTH; i++) { np = uv_read_local_mmr(UVH_NODE_PRESENT_TABLE + i * 8); pr_info("UV: NODE_PRESENT(%d) = 0x%016lx\n", i, np); - uv_pb += hweight64(np); + blade_update_min_max(np, i * 64, s_mask, &sock_min, &sock_max); } } if (UVH_NODE_PRESENT_0) { np = uv_read_local_mmr(UVH_NODE_PRESENT_0); pr_info("UV: NODE_PRESENT_0 = 0x%016lx\n", np); - uv_pb += hweight64(np); + blade_update_min_max(np, 0, s_mask, &sock_min, &sock_max); } if (UVH_NODE_PRESENT_1) { np = uv_read_local_mmr(UVH_NODE_PRESENT_1); pr_info("UV: NODE_PRESENT_1 = 0x%016lx\n", np); - uv_pb += hweight64(np); + blade_update_min_max(np, 64, s_mask, &sock_min, &sock_max); + } + + /* Only update if we actually found some bits indicating blades present */ + if (sock_max >= sock_min) { + _min_socket = sock_min; + _max_socket = sock_max; + uv_pb = sock_max - sock_min + 1; } if (uv_possible_blades != uv_pb) uv_possible_blades = uv_pb; - pr_info("UV: number nodes/possible blades %d\n", uv_pb); + pr_info("UV: number nodes/possible blades %d (%d - %d)\n", + uv_pb, sock_min, sock_max); +} + +static int __init alloc_conv_table(int num_elem, unsigned short **table) +{ + int i; + size_t bytes; + + bytes = num_elem * sizeof(*table[0]); + *table = kmalloc(bytes, GFP_KERNEL); + if (WARN_ON_ONCE(!*table)) + return -ENOMEM; + for (i = 0; i < num_elem; i++) + ((unsigned short *)*table)[i] = SOCK_EMPTY; + return 0; } +/* Remove conversion table if it's 1:1 */ +#define FREE_1_TO_1_TABLE(tbl, min, max, max2) free_1_to_1_table(&tbl, #tbl, min, max, max2) + +static void __init free_1_to_1_table(unsigned short **tp, char *tname, int min, int max, int max2) +{ + int i; + unsigned short *table = *tp; + + if (table == NULL) + return; + if (max != max2) + return; + for (i = 0; i < max; i++) { + if (i != table[i]) + return; + } + kfree(table); + *tp = NULL; + pr_info("UV: %s is 1:1, conversion table removed\n", tname); +} + +/* + * Build Socket Tables + * If the number of nodes is >1 per socket, socket to node table will + * contain lowest node number on that socket. + */ static void __init build_socket_tables(void) { struct uv_gam_range_entry *gre = uv_gre_table; - int num, nump; + int nums, numn, nump; int cpu, i, lnid; int minsock = _min_socket; int maxsock = _max_socket; int minpnode = _min_pnode; int maxpnode = _max_pnode; - size_t bytes; if (!gre) { if (is_uv2_hub() || is_uv3_hub()) { @@ -1507,39 +1583,36 @@ static void __init build_socket_tables(void) return; } pr_err("UV: Error: UVsystab address translations not available!\n"); - BUG(); + WARN_ON_ONCE(!gre); + return; } - /* Build socket id -> node id, pnode */ - num = maxsock - minsock + 1; - bytes = num * sizeof(_socket_to_node[0]); - _socket_to_node = kmalloc(bytes, GFP_KERNEL); - _socket_to_pnode = kmalloc(bytes, GFP_KERNEL); - + numn = num_possible_nodes(); nump = maxpnode - minpnode + 1; - bytes = nump * sizeof(_pnode_to_socket[0]); - _pnode_to_socket = kmalloc(bytes, GFP_KERNEL); - BUG_ON(!_socket_to_node || !_socket_to_pnode || !_pnode_to_socket); - - for (i = 0; i < num; i++) - _socket_to_node[i] = _socket_to_pnode[i] = SOCK_EMPTY; - - for (i = 0; i < nump; i++) - _pnode_to_socket[i] = SOCK_EMPTY; + nums = maxsock - minsock + 1; + + /* Allocate and clear tables */ + if ((alloc_conv_table(nump, &_pnode_to_socket) < 0) + || (alloc_conv_table(nums, &_socket_to_pnode) < 0) + || (alloc_conv_table(numn, &_node_to_socket) < 0) + || (alloc_conv_table(nums, &_socket_to_node) < 0)) { + kfree(_pnode_to_socket); + kfree(_socket_to_pnode); + kfree(_node_to_socket); + return; + } /* Fill in pnode/node/addr conversion list values: */ - pr_info("UV: GAM Building socket/pnode conversion tables\n"); for (; gre->type != UV_GAM_RANGE_TYPE_UNUSED; gre++) { if (gre->type == UV_GAM_RANGE_TYPE_HOLE) continue; i = gre->sockid - minsock; - /* Duplicate: */ - if (_socket_to_pnode[i] != SOCK_EMPTY) - continue; - _socket_to_pnode[i] = gre->pnode; + if (_socket_to_pnode[i] == SOCK_EMPTY) + _socket_to_pnode[i] = gre->pnode; i = gre->pnode - minpnode; - _pnode_to_socket[i] = gre->sockid; + if (_pnode_to_socket[i] == SOCK_EMPTY) + _pnode_to_socket[i] = gre->sockid; pr_info("UV: sid:%02x type:%d nasid:%04x pn:%02x pn2s:%2x\n", gre->sockid, gre->type, gre->nasid, @@ -1549,66 +1622,39 @@ static void __init build_socket_tables(void) /* Set socket -> node values: */ lnid = NUMA_NO_NODE; - for_each_present_cpu(cpu) { + for_each_possible_cpu(cpu) { int nid = cpu_to_node(cpu); int apicid, sockid; if (lnid == nid) continue; lnid = nid; + apicid = per_cpu(x86_cpu_to_apicid, cpu); sockid = apicid >> uv_cpuid.socketid_shift; - _socket_to_node[sockid - minsock] = nid; - pr_info("UV: sid:%02x: apicid:%04x node:%2d\n", - sockid, apicid, nid); - } - /* Set up physical blade to pnode translation from GAM Range Table: */ - bytes = num_possible_nodes() * sizeof(_node_to_pnode[0]); - _node_to_pnode = kmalloc(bytes, GFP_KERNEL); - BUG_ON(!_node_to_pnode); + if (_socket_to_node[sockid - minsock] == SOCK_EMPTY) + _socket_to_node[sockid - minsock] = nid; - for (lnid = 0; lnid < num_possible_nodes(); lnid++) { - unsigned short sockid; + if (_node_to_socket[nid] == SOCK_EMPTY) + _node_to_socket[nid] = sockid; - for (sockid = minsock; sockid <= maxsock; sockid++) { - if (lnid == _socket_to_node[sockid - minsock]) { - _node_to_pnode[lnid] = _socket_to_pnode[sockid - minsock]; - break; - } - } - if (sockid > maxsock) { - pr_err("UV: socket for node %d not found!\n", lnid); - BUG(); - } + pr_info("UV: sid:%02x: apicid:%04x socket:%02d node:%03x s2n:%03x\n", + sockid, + apicid, + _node_to_socket[nid], + nid, + _socket_to_node[sockid - minsock]); } /* - * If socket id == pnode or socket id == node for all nodes, + * If e.g. socket id == pnode for all pnodes, * system runs faster by removing corresponding conversion table. */ - pr_info("UV: Checking socket->node/pnode for identity maps\n"); - if (minsock == 0) { - for (i = 0; i < num; i++) - if (_socket_to_node[i] == SOCK_EMPTY || i != _socket_to_node[i]) - break; - if (i >= num) { - kfree(_socket_to_node); - _socket_to_node = NULL; - pr_info("UV: 1:1 socket_to_node table removed\n"); - } - } - if (minsock == minpnode) { - for (i = 0; i < num; i++) - if (_socket_to_pnode[i] != SOCK_EMPTY && - _socket_to_pnode[i] != i + minpnode) - break; - if (i >= num) { - kfree(_socket_to_pnode); - _socket_to_pnode = NULL; - pr_info("UV: 1:1 socket_to_pnode table removed\n"); - } - } + FREE_1_TO_1_TABLE(_socket_to_node, _min_socket, nums, numn); + FREE_1_TO_1_TABLE(_node_to_socket, _min_socket, nums, numn); + FREE_1_TO_1_TABLE(_socket_to_pnode, _min_pnode, nums, nump); + FREE_1_TO_1_TABLE(_pnode_to_socket, _min_pnode, nums, nump); } /* Check which reboot to use */ @@ -1692,12 +1738,13 @@ static __init int uv_system_init_hubless(void) static void __init uv_system_init_hub(void) { struct uv_hub_info_s hub_info = {0}; - int bytes, cpu, nodeid; - unsigned short min_pnode = 9999, max_pnode = 0; + int bytes, cpu, nodeid, bid; + unsigned short min_pnode = USHRT_MAX, max_pnode = 0; char *hub = is_uv5_hub() ? "UV500" : is_uv4_hub() ? "UV400" : is_uv3_hub() ? "UV300" : is_uv2_hub() ? "UV2000/3000" : NULL; + struct uv_hub_info_s **uv_hub_info_list_blade; if (!hub) { pr_err("UV: Unknown/unsupported UV hub\n"); @@ -1720,9 +1767,12 @@ static void __init uv_system_init_hub(void) build_uv_gr_table(); set_block_size(); uv_init_hub_info(&hub_info); - uv_possible_blades = num_possible_nodes(); - if (!_node_to_pnode) + /* If UV2 or UV3 may need to get # blades from HW */ + if (is_uv(UV2|UV3) && !uv_gre_table) boot_init_possible_blades(&hub_info); + else + /* min/max sockets set in decode_gam_rng_tbl */ + uv_possible_blades = (_max_socket - _min_socket) + 1; /* uv_num_possible_blades() is really the hub count: */ pr_info("UV: Found %d hubs, %d nodes, %d CPUs\n", uv_num_possible_blades(), num_possible_nodes(), num_possible_cpus()); @@ -1731,79 +1781,98 @@ static void __init uv_system_init_hub(void) hub_info.coherency_domain_number = sn_coherency_id; uv_rtc_init(); + /* + * __uv_hub_info_list[] is indexed by node, but there is only + * one hub_info structure per blade. First, allocate one + * structure per blade. Further down we create a per-node + * table (__uv_hub_info_list[]) pointing to hub_info + * structures for the correct blade. + */ + bytes = sizeof(void *) * uv_num_possible_blades(); - __uv_hub_info_list = kzalloc(bytes, GFP_KERNEL); - BUG_ON(!__uv_hub_info_list); + uv_hub_info_list_blade = kzalloc(bytes, GFP_KERNEL); + if (WARN_ON_ONCE(!uv_hub_info_list_blade)) + return; bytes = sizeof(struct uv_hub_info_s); - for_each_node(nodeid) { + for_each_possible_blade(bid) { struct uv_hub_info_s *new_hub; - if (__uv_hub_info_list[nodeid]) { - pr_err("UV: Node %d UV HUB already initialized!?\n", nodeid); - BUG(); + /* Allocate & fill new per hub info list */ + new_hub = (bid == 0) ? &uv_hub_info_node0 + : kzalloc_node(bytes, GFP_KERNEL, uv_blade_to_node(bid)); + if (WARN_ON_ONCE(!new_hub)) { + /* do not kfree() bid 0, which is statically allocated */ + while (--bid > 0) + kfree(uv_hub_info_list_blade[bid]); + kfree(uv_hub_info_list_blade); + return; } - /* Allocate new per hub info list */ - new_hub = (nodeid == 0) ? &uv_hub_info_node0 : kzalloc_node(bytes, GFP_KERNEL, nodeid); - BUG_ON(!new_hub); - __uv_hub_info_list[nodeid] = new_hub; - new_hub = uv_hub_info_list(nodeid); - BUG_ON(!new_hub); + uv_hub_info_list_blade[bid] = new_hub; *new_hub = hub_info; /* Use information from GAM table if available: */ - if (_node_to_pnode) - new_hub->pnode = _node_to_pnode[nodeid]; + if (uv_gre_table) + new_hub->pnode = uv_blade_to_pnode(bid); else /* Or fill in during CPU loop: */ new_hub->pnode = 0xffff; - new_hub->numa_blade_id = uv_node_to_blade_id(nodeid); + new_hub->numa_blade_id = bid; new_hub->memory_nid = NUMA_NO_NODE; new_hub->nr_possible_cpus = 0; new_hub->nr_online_cpus = 0; } + /* + * Now populate __uv_hub_info_list[] for each node with the + * pointer to the struct for the blade it resides on. + */ + + bytes = sizeof(void *) * num_possible_nodes(); + __uv_hub_info_list = kzalloc(bytes, GFP_KERNEL); + if (WARN_ON_ONCE(!__uv_hub_info_list)) { + for_each_possible_blade(bid) + /* bid 0 is statically allocated */ + if (bid != 0) + kfree(uv_hub_info_list_blade[bid]); + kfree(uv_hub_info_list_blade); + return; + } + + for_each_node(nodeid) + __uv_hub_info_list[nodeid] = uv_hub_info_list_blade[uv_node_to_blade_id(nodeid)]; + /* Initialize per CPU info: */ for_each_possible_cpu(cpu) { - int apicid = per_cpu(x86_cpu_to_apicid, cpu); - int numa_node_id; + int apicid = early_per_cpu(x86_cpu_to_apicid, cpu); + unsigned short bid; unsigned short pnode; - nodeid = cpu_to_node(cpu); - numa_node_id = numa_cpu_node(cpu); pnode = uv_apicid_to_pnode(apicid); + bid = uv_pnode_to_socket(pnode) - _min_socket; - uv_cpu_info_per(cpu)->p_uv_hub_info = uv_hub_info_list(nodeid); + uv_cpu_info_per(cpu)->p_uv_hub_info = uv_hub_info_list_blade[bid]; uv_cpu_info_per(cpu)->blade_cpu_id = uv_cpu_hub_info(cpu)->nr_possible_cpus++; if (uv_cpu_hub_info(cpu)->memory_nid == NUMA_NO_NODE) uv_cpu_hub_info(cpu)->memory_nid = cpu_to_node(cpu); - /* Init memoryless node: */ - if (nodeid != numa_node_id && - uv_hub_info_list(numa_node_id)->pnode == 0xffff) - uv_hub_info_list(numa_node_id)->pnode = pnode; - else if (uv_cpu_hub_info(cpu)->pnode == 0xffff) + if (uv_cpu_hub_info(cpu)->pnode == 0xffff) uv_cpu_hub_info(cpu)->pnode = pnode; } - for_each_node(nodeid) { - unsigned short pnode = uv_hub_info_list(nodeid)->pnode; + for_each_possible_blade(bid) { + unsigned short pnode = uv_hub_info_list_blade[bid]->pnode; - /* Add pnode info for pre-GAM list nodes without CPUs: */ - if (pnode == 0xffff) { - unsigned long paddr; + if (pnode == 0xffff) + continue; - paddr = node_start_pfn(nodeid) << PAGE_SHIFT; - pnode = uv_gpa_to_pnode(uv_soc_phys_ram_to_gpa(paddr)); - uv_hub_info_list(nodeid)->pnode = pnode; - } min_pnode = min(pnode, min_pnode); max_pnode = max(pnode, max_pnode); - pr_info("UV: UVHUB node:%2d pn:%02x nrcpus:%d\n", - nodeid, - uv_hub_info_list(nodeid)->pnode, - uv_hub_info_list(nodeid)->nr_possible_cpus); + pr_info("UV: HUB:%2d pn:%02x nrcpus:%d\n", + bid, + uv_hub_info_list_blade[bid]->pnode, + uv_hub_info_list_blade[bid]->nr_possible_cpus); } pr_info("UV: min_pnode:%02x max_pnode:%02x\n", min_pnode, max_pnode); @@ -1811,6 +1880,9 @@ static void __init uv_system_init_hub(void) map_mmr_high(max_pnode); map_mmioh_high(min_pnode, max_pnode); + kfree(uv_hub_info_list_blade); + uv_hub_info_list_blade = NULL; + uv_nmi_setup(); uv_cpu_init(); uv_setup_proc_files(0); diff --git a/arch/x86/kernel/callthunks.c b/arch/x86/kernel/callthunks.c index 22ab13966427..c06bfc086565 100644 --- a/arch/x86/kernel/callthunks.c +++ b/arch/x86/kernel/callthunks.c @@ -133,8 +133,8 @@ static bool skip_addr(void *dest) /* Accounts directly */ if (dest == ret_from_fork) return true; -#ifdef CONFIG_HOTPLUG_CPU - if (dest == start_cpu0) +#if defined(CONFIG_HOTPLUG_CPU) && defined(CONFIG_AMD_MEM_ENCRYPT) + if (dest == soft_restart_cpu) return true; #endif #ifdef CONFIG_FUNCTION_TRACER @@ -293,7 +293,8 @@ void *callthunks_translate_call_dest(void *dest) return target ? : dest; } -bool is_callthunk(void *addr) +#ifdef CONFIG_BPF_JIT +static bool is_callthunk(void *addr) { unsigned int tmpl_size = SKL_TMPL_SIZE; void *tmpl = skl_call_thunk_template; @@ -306,7 +307,6 @@ bool is_callthunk(void *addr) return !bcmp((void *)(dest - tmpl_size), tmpl, tmpl_size); } -#ifdef CONFIG_BPF_JIT int x86_call_depth_emit_accounting(u8 **pprog, void *func) { unsigned int tmpl_size = SKL_TMPL_SIZE; diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile index d7e3ceaf75c1..4350f6bfc064 100644 --- a/arch/x86/kernel/cpu/Makefile +++ b/arch/x86/kernel/cpu/Makefile @@ -27,7 +27,7 @@ obj-y += cpuid-deps.o obj-y += umwait.o obj-$(CONFIG_PROC_FS) += proc.o -obj-$(CONFIG_X86_FEATURE_NAMES) += capflags.o powerflags.o +obj-y += capflags.o powerflags.o obj-$(CONFIG_IA32_FEAT_CTL) += feat_ctl.o ifdef CONFIG_CPU_SUP_INTEL @@ -54,7 +54,6 @@ obj-$(CONFIG_X86_LOCAL_APIC) += perfctr-watchdog.o obj-$(CONFIG_HYPERVISOR_GUEST) += vmware.o hypervisor.o mshyperv.o obj-$(CONFIG_ACRN_GUEST) += acrn.o -ifdef CONFIG_X86_FEATURE_NAMES quiet_cmd_mkcapflags = MKCAP $@ cmd_mkcapflags = $(CONFIG_SHELL) $(srctree)/$(src)/mkcapflags.sh $@ $^ @@ -63,5 +62,4 @@ vmxfeature = $(src)/../../include/asm/vmxfeatures.h $(obj)/capflags.c: $(cpufeature) $(vmxfeature) $(src)/mkcapflags.sh FORCE $(call if_changed,mkcapflags) -endif targets += capflags.c diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 182af64387d0..9e2a91830f72 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -9,7 +9,6 @@ * - Andrew D. Balsa (code cleanup). */ #include <linux/init.h> -#include <linux/utsname.h> #include <linux/cpu.h> #include <linux/module.h> #include <linux/nospec.h> @@ -27,8 +26,6 @@ #include <asm/msr.h> #include <asm/vmx.h> #include <asm/paravirt.h> -#include <asm/alternative.h> -#include <asm/set_memory.h> #include <asm/intel-family.h> #include <asm/e820/api.h> #include <asm/hypervisor.h> @@ -125,21 +122,8 @@ DEFINE_STATIC_KEY_FALSE(switch_mm_cond_l1d_flush); DEFINE_STATIC_KEY_FALSE(mmio_stale_data_clear); EXPORT_SYMBOL_GPL(mmio_stale_data_clear); -void __init check_bugs(void) +void __init cpu_select_mitigations(void) { - identify_boot_cpu(); - - /* - * identify_boot_cpu() initialized SMT support information, let the - * core code know. - */ - cpu_smt_check_topology(); - - if (!IS_ENABLED(CONFIG_SMP)) { - pr_info("CPU: "); - print_cpu_info(&boot_cpu_data); - } - /* * Read the SPEC_CTRL MSR to account for reserved bits which may * have unknown values. AMD64_LS_CFG MSR is cached in the early AMD @@ -176,39 +160,6 @@ void __init check_bugs(void) md_clear_select_mitigation(); srbds_select_mitigation(); l1d_flush_select_mitigation(); - - arch_smt_update(); - -#ifdef CONFIG_X86_32 - /* - * Check whether we are able to run this kernel safely on SMP. - * - * - i386 is no longer supported. - * - In order to run on anything without a TSC, we need to be - * compiled for a i486. - */ - if (boot_cpu_data.x86 < 4) - panic("Kernel requires i486+ for 'invlpg' and other features"); - - init_utsname()->machine[1] = - '0' + (boot_cpu_data.x86 > 6 ? 6 : boot_cpu_data.x86); - alternative_instructions(); - - fpu__init_check_bugs(); -#else /* CONFIG_X86_64 */ - alternative_instructions(); - - /* - * Make sure the first 2MB area is not mapped by huge pages - * There are typically fixed size MTRRs in there and overlapping - * MTRRs into large pages causes slow downs. - * - * Right now we don't do that with gbpages because there seems - * very little benefit for that case. - */ - if (!direct_gbpages) - set_memory_4k((unsigned long)__va(0), 1); -#endif } /* diff --git a/arch/x86/kernel/cpu/cacheinfo.c b/arch/x86/kernel/cpu/cacheinfo.c index 4063e8991211..8f86eacf69f7 100644 --- a/arch/x86/kernel/cpu/cacheinfo.c +++ b/arch/x86/kernel/cpu/cacheinfo.c @@ -39,6 +39,8 @@ DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_llc_shared_map); /* Shared L2 cache maps */ DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_l2c_shared_map); +static cpumask_var_t cpu_cacheinfo_mask; + /* Kernel controls MTRR and/or PAT MSRs. */ unsigned int memory_caching_control __ro_after_init; @@ -1172,8 +1174,10 @@ void cache_bp_restore(void) cache_cpu_init(); } -static int cache_ap_init(unsigned int cpu) +static int cache_ap_online(unsigned int cpu) { + cpumask_set_cpu(cpu, cpu_cacheinfo_mask); + if (!memory_caching_control || get_cache_aps_delayed_init()) return 0; @@ -1191,11 +1195,17 @@ static int cache_ap_init(unsigned int cpu) * lock to prevent MTRR entry changes */ stop_machine_from_inactive_cpu(cache_rendezvous_handler, NULL, - cpu_callout_mask); + cpu_cacheinfo_mask); return 0; } +static int cache_ap_offline(unsigned int cpu) +{ + cpumask_clear_cpu(cpu, cpu_cacheinfo_mask); + return 0; +} + /* * Delayed cache initialization for all AP's */ @@ -1210,9 +1220,12 @@ void cache_aps_init(void) static int __init cache_ap_register(void) { + zalloc_cpumask_var(&cpu_cacheinfo_mask, GFP_KERNEL); + cpumask_set_cpu(smp_processor_id(), cpu_cacheinfo_mask); + cpuhp_setup_state_nocalls(CPUHP_AP_CACHECTRL_STARTING, "x86/cachectrl:starting", - cache_ap_init, NULL); + cache_ap_online, cache_ap_offline); return 0; } -core_initcall(cache_ap_register); +early_initcall(cache_ap_register); diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 80710a68ef7d..52683fddafaf 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -18,12 +18,16 @@ #include <linux/init.h> #include <linux/kprobes.h> #include <linux/kgdb.h> +#include <linux/mem_encrypt.h> #include <linux/smp.h> +#include <linux/cpu.h> #include <linux/io.h> #include <linux/syscore_ops.h> #include <linux/pgtable.h> #include <linux/stackprotector.h> +#include <linux/utsname.h> +#include <asm/alternative.h> #include <asm/cmdline.h> #include <asm/perf_event.h> #include <asm/mmu_context.h> @@ -59,7 +63,7 @@ #include <asm/intel-family.h> #include <asm/cpu_device_id.h> #include <asm/uv/uv.h> -#include <asm/sigframe.h> +#include <asm/set_memory.h> #include <asm/traps.h> #include <asm/sev.h> @@ -67,14 +71,6 @@ u32 elf_hwcap2 __read_mostly; -/* all of these masks are initialized in setup_cpu_local_masks() */ -cpumask_var_t cpu_initialized_mask; -cpumask_var_t cpu_callout_mask; -cpumask_var_t cpu_callin_mask; - -/* representing cpus for which sibling maps can be computed */ -cpumask_var_t cpu_sibling_setup_mask; - /* Number of siblings per CPU package */ int smp_num_siblings = 1; EXPORT_SYMBOL(smp_num_siblings); @@ -169,15 +165,6 @@ clear_ppin: clear_cpu_cap(c, info->feature); } -/* correctly size the local cpu masks */ -void __init setup_cpu_local_masks(void) -{ - alloc_bootmem_cpumask_var(&cpu_initialized_mask); - alloc_bootmem_cpumask_var(&cpu_callin_mask); - alloc_bootmem_cpumask_var(&cpu_callout_mask); - alloc_bootmem_cpumask_var(&cpu_sibling_setup_mask); -} - static void default_init(struct cpuinfo_x86 *c) { #ifdef CONFIG_X86_64 @@ -1502,12 +1489,10 @@ static void __init cpu_parse_early_param(void) if (!kstrtouint(opt, 10, &bit)) { if (bit < NCAPINTS * 32) { -#ifdef CONFIG_X86_FEATURE_NAMES /* empty-string, i.e., ""-defined feature flags */ if (!x86_cap_flags[bit]) pr_cont(" " X86_CAP_FMT_NUM, x86_cap_flag_num(bit)); else -#endif pr_cont(" " X86_CAP_FMT, x86_cap_flag(bit)); setup_clear_cpu_cap(bit); @@ -1520,7 +1505,6 @@ static void __init cpu_parse_early_param(void) continue; } -#ifdef CONFIG_X86_FEATURE_NAMES for (bit = 0; bit < 32 * NCAPINTS; bit++) { if (!x86_cap_flag(bit)) continue; @@ -1537,7 +1521,6 @@ static void __init cpu_parse_early_param(void) if (!found) pr_cont(" (unknown: %s)", opt); -#endif } pr_cont("\n"); @@ -1600,10 +1583,6 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c) sld_setup(c); - fpu__init_system(c); - - init_sigframe_size(); - #ifdef CONFIG_X86_32 /* * Regardless of whether PCID is enumerated, the SDM says @@ -2123,19 +2102,6 @@ static void dbg_restore_debug_regs(void) #define dbg_restore_debug_regs() #endif /* ! CONFIG_KGDB */ -static void wait_for_master_cpu(int cpu) -{ -#ifdef CONFIG_SMP - /* - * wait for ACK from master CPU before continuing - * with AP initialization - */ - WARN_ON(cpumask_test_and_set_cpu(cpu, cpu_initialized_mask)); - while (!cpumask_test_cpu(cpu, cpu_callout_mask)) - cpu_relax(); -#endif -} - static inline void setup_getcpu(int cpu) { unsigned long cpudata = vdso_encode_cpunode(cpu, early_cpu_to_node(cpu)); @@ -2158,11 +2124,7 @@ static inline void setup_getcpu(int cpu) } #ifdef CONFIG_X86_64 -static inline void ucode_cpu_init(int cpu) -{ - if (cpu) - load_ucode_ap(); -} +static inline void ucode_cpu_init(int cpu) { } static inline void tss_setup_ist(struct tss_struct *tss) { @@ -2239,8 +2201,6 @@ void cpu_init(void) struct task_struct *cur = current; int cpu = raw_smp_processor_id(); - wait_for_master_cpu(cpu); - ucode_cpu_init(cpu); #ifdef CONFIG_NUMA @@ -2285,26 +2245,12 @@ void cpu_init(void) doublefault_init_cpu_tss(); - fpu__init_cpu(); - if (is_uv_system()) uv_cpu_init(); load_fixmap_gdt(cpu); } -#ifdef CONFIG_SMP -void cpu_init_secondary(void) -{ - /* - * Relies on the BP having set-up the IDT tables, which are loaded - * on this CPU in cpu_init_exception_handling(). - */ - cpu_init_exception_handling(); - cpu_init(); -} -#endif - #ifdef CONFIG_MICROCODE_LATE_LOADING /** * store_cpu_caps() - Store a snapshot of CPU capabilities @@ -2362,3 +2308,69 @@ void arch_smt_update(void) /* Check whether IPI broadcasting can be enabled */ apic_smt_update(); } + +void __init arch_cpu_finalize_init(void) +{ + identify_boot_cpu(); + + /* + * identify_boot_cpu() initialized SMT support information, let the + * core code know. + */ + cpu_smt_check_topology(); + + if (!IS_ENABLED(CONFIG_SMP)) { + pr_info("CPU: "); + print_cpu_info(&boot_cpu_data); + } + + cpu_select_mitigations(); + + arch_smt_update(); + + if (IS_ENABLED(CONFIG_X86_32)) { + /* + * Check whether this is a real i386 which is not longer + * supported and fixup the utsname. + */ + if (boot_cpu_data.x86 < 4) + panic("Kernel requires i486+ for 'invlpg' and other features"); + + init_utsname()->machine[1] = + '0' + (boot_cpu_data.x86 > 6 ? 6 : boot_cpu_data.x86); + } + + /* + * Must be before alternatives because it might set or clear + * feature bits. + */ + fpu__init_system(); + fpu__init_cpu(); + + alternative_instructions(); + + if (IS_ENABLED(CONFIG_X86_64)) { + /* + * Make sure the first 2MB area is not mapped by huge pages + * There are typically fixed size MTRRs in there and overlapping + * MTRRs into large pages causes slow downs. + * + * Right now we don't do that with gbpages because there seems + * very little benefit for that case. + */ + if (!direct_gbpages) + set_memory_4k((unsigned long)__va(0), 1); + } else { + fpu__init_check_bugs(); + } + + /* + * This needs to be called before any devices perform DMA + * operations that might use the SWIOTLB bounce buffers. It will + * mark the bounce buffers as decrypted so that their usage will + * not cause "plain-text" data to be decrypted when accessed. It + * must be called after late_time_init() so that Hyper-V x86/x64 + * hypercalls work when the SWIOTLB bounce buffers are decrypted. + */ + mem_encrypt_init(); +} diff --git a/arch/x86/kernel/cpu/cpu.h b/arch/x86/kernel/cpu/cpu.h index f97b0fe13da8..1c44630d4789 100644 --- a/arch/x86/kernel/cpu/cpu.h +++ b/arch/x86/kernel/cpu/cpu.h @@ -79,6 +79,7 @@ extern void detect_ht(struct cpuinfo_x86 *c); extern void check_null_seg_clears_base(struct cpuinfo_x86 *c); unsigned int aperfmperf_get_khz(int cpu); +void cpu_select_mitigations(void); extern void x86_spec_ctrl_setup_ap(void); extern void update_srbds_msr(void); diff --git a/arch/x86/kernel/cpu/mce/amd.c b/arch/x86/kernel/cpu/mce/amd.c index 0b971f974096..5e74610b39e7 100644 --- a/arch/x86/kernel/cpu/mce/amd.c +++ b/arch/x86/kernel/cpu/mce/amd.c @@ -715,11 +715,13 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c) bool amd_mce_is_memory_error(struct mce *m) { + enum smca_bank_types bank_type; /* ErrCodeExt[20:16] */ u8 xec = (m->status >> 16) & 0x1f; + bank_type = smca_get_bank_type(m->extcpu, m->bank); if (mce_flags.smca) - return smca_get_bank_type(m->extcpu, m->bank) == SMCA_UMC && xec == 0x0; + return (bank_type == SMCA_UMC || bank_type == SMCA_UMC_V2) && xec == 0x0; return m->bank == 4 && xec == 0x8; } @@ -1050,7 +1052,7 @@ static const char *get_name(unsigned int cpu, unsigned int bank, struct threshol if (bank_type >= N_SMCA_BANK_TYPES) return NULL; - if (b && bank_type == SMCA_UMC) { + if (b && (bank_type == SMCA_UMC || bank_type == SMCA_UMC_V2)) { if (b->block < ARRAY_SIZE(smca_umc_block_names)) return smca_umc_block_names[b->block]; return NULL; diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c index 2eec60f50057..89e2aab5d34d 100644 --- a/arch/x86/kernel/cpu/mce/core.c +++ b/arch/x86/kernel/cpu/mce/core.c @@ -1022,12 +1022,12 @@ static noinstr int mce_start(int *no_way_out) if (!timeout) return ret; - arch_atomic_add(*no_way_out, &global_nwo); + raw_atomic_add(*no_way_out, &global_nwo); /* * Rely on the implied barrier below, such that global_nwo * is updated before mce_callin. */ - order = arch_atomic_inc_return(&mce_callin); + order = raw_atomic_inc_return(&mce_callin); arch_cpumask_clear_cpu(smp_processor_id(), &mce_missing_cpus); /* Enable instrumentation around calls to external facilities */ @@ -1036,10 +1036,10 @@ static noinstr int mce_start(int *no_way_out) /* * Wait for everyone. */ - while (arch_atomic_read(&mce_callin) != num_online_cpus()) { + while (raw_atomic_read(&mce_callin) != num_online_cpus()) { if (mce_timed_out(&timeout, "Timeout: Not all CPUs entered broadcast exception handler")) { - arch_atomic_set(&global_nwo, 0); + raw_atomic_set(&global_nwo, 0); goto out; } ndelay(SPINUNIT); @@ -1054,7 +1054,7 @@ static noinstr int mce_start(int *no_way_out) /* * Monarch: Starts executing now, the others wait. */ - arch_atomic_set(&mce_executing, 1); + raw_atomic_set(&mce_executing, 1); } else { /* * Subject: Now start the scanning loop one by one in @@ -1062,10 +1062,10 @@ static noinstr int mce_start(int *no_way_out) * This way when there are any shared banks it will be * only seen by one CPU before cleared, avoiding duplicates. */ - while (arch_atomic_read(&mce_executing) < order) { + while (raw_atomic_read(&mce_executing) < order) { if (mce_timed_out(&timeout, "Timeout: Subject CPUs unable to finish machine check processing")) { - arch_atomic_set(&global_nwo, 0); + raw_atomic_set(&global_nwo, 0); goto out; } ndelay(SPINUNIT); @@ -1075,7 +1075,7 @@ static noinstr int mce_start(int *no_way_out) /* * Cache the global no_way_out state. */ - *no_way_out = arch_atomic_read(&global_nwo); + *no_way_out = raw_atomic_read(&global_nwo); ret = order; @@ -1533,7 +1533,7 @@ noinstr void do_machine_check(struct pt_regs *regs) /* If this triggers there is no way to recover. Die hard. */ BUG_ON(!on_thread_stack() || !user_mode(regs)); - if (kill_current_task) + if (!mce_usable_address(&m)) queue_task_work(&m, msg, kill_me_now); else queue_task_work(&m, msg, kill_me_maybe); diff --git a/arch/x86/kernel/cpu/microcode/amd.c b/arch/x86/kernel/cpu/microcode/amd.c index f5fdeb1e3606..87208e46f7ed 100644 --- a/arch/x86/kernel/cpu/microcode/amd.c +++ b/arch/x86/kernel/cpu/microcode/amd.c @@ -78,8 +78,6 @@ static u16 find_equiv_id(struct equiv_cpu_table *et, u32 sig) if (sig == e->installed_cpu) return e->equiv_cpu; - - e++; } return 0; } @@ -596,11 +594,6 @@ void reload_ucode_amd(unsigned int cpu) } } } -static u16 __find_equiv_id(unsigned int cpu) -{ - struct ucode_cpu_info *uci = ucode_cpu_info + cpu; - return find_equiv_id(&equiv_table, uci->cpu_sig.sig); -} /* * a small, trivial cache of per-family ucode patches @@ -651,9 +644,11 @@ static void free_cache(void) static struct ucode_patch *find_patch(unsigned int cpu) { + struct ucode_cpu_info *uci = ucode_cpu_info + cpu; u16 equiv_id; - equiv_id = __find_equiv_id(cpu); + + equiv_id = find_equiv_id(&equiv_table, uci->cpu_sig.sig); if (!equiv_id) return NULL; @@ -705,7 +700,7 @@ static enum ucode_state apply_microcode_amd(int cpu) rdmsr(MSR_AMD64_PATCH_LEVEL, rev, dummy); /* need to apply patch? */ - if (rev >= mc_amd->hdr.patch_id) { + if (rev > mc_amd->hdr.patch_id) { ret = UCODE_OK; goto out; } diff --git a/arch/x86/kernel/cpu/mtrr/Makefile b/arch/x86/kernel/cpu/mtrr/Makefile index cc4f9f1cb94c..aee4bc5ad496 100644 --- a/arch/x86/kernel/cpu/mtrr/Makefile +++ b/arch/x86/kernel/cpu/mtrr/Makefile @@ -1,4 +1,4 @@ # SPDX-License-Identifier: GPL-2.0-only obj-y := mtrr.o if.o generic.o cleanup.o -obj-$(CONFIG_X86_32) += amd.o cyrix.o centaur.o +obj-$(CONFIG_X86_32) += amd.o cyrix.o centaur.o legacy.o diff --git a/arch/x86/kernel/cpu/mtrr/amd.c b/arch/x86/kernel/cpu/mtrr/amd.c index eff6ac62c0ff..ef3e8e42b782 100644 --- a/arch/x86/kernel/cpu/mtrr/amd.c +++ b/arch/x86/kernel/cpu/mtrr/amd.c @@ -110,7 +110,7 @@ amd_validate_add_page(unsigned long base, unsigned long size, unsigned int type) } const struct mtrr_ops amd_mtrr_ops = { - .vendor = X86_VENDOR_AMD, + .var_regs = 2, .set = amd_set_mtrr, .get = amd_get_mtrr, .get_free_region = generic_get_free_region, diff --git a/arch/x86/kernel/cpu/mtrr/centaur.c b/arch/x86/kernel/cpu/mtrr/centaur.c index b8a74eddde83..6f6c3ae92943 100644 --- a/arch/x86/kernel/cpu/mtrr/centaur.c +++ b/arch/x86/kernel/cpu/mtrr/centaur.c @@ -45,15 +45,6 @@ centaur_get_free_region(unsigned long base, unsigned long size, int replace_reg) return -ENOSPC; } -/* - * Report boot time MCR setups - */ -void mtrr_centaur_report_mcr(int mcr, u32 lo, u32 hi) -{ - centaur_mcr[mcr].low = lo; - centaur_mcr[mcr].high = hi; -} - static void centaur_get_mcr(unsigned int reg, unsigned long *base, unsigned long *size, mtrr_type * type) @@ -112,7 +103,7 @@ centaur_validate_add_page(unsigned long base, unsigned long size, unsigned int t } const struct mtrr_ops centaur_mtrr_ops = { - .vendor = X86_VENDOR_CENTAUR, + .var_regs = 8, .set = centaur_set_mcr, .get = centaur_get_mcr, .get_free_region = centaur_get_free_region, diff --git a/arch/x86/kernel/cpu/mtrr/cleanup.c b/arch/x86/kernel/cpu/mtrr/cleanup.c index b5f43049fa5f..18cf79d6e2c5 100644 --- a/arch/x86/kernel/cpu/mtrr/cleanup.c +++ b/arch/x86/kernel/cpu/mtrr/cleanup.c @@ -55,9 +55,6 @@ static int __initdata nr_range; static struct var_mtrr_range_state __initdata range_state[RANGE_NUM]; -static int __initdata debug_print; -#define Dprintk(x...) do { if (debug_print) pr_debug(x); } while (0) - #define BIOS_BUG_MSG \ "WARNING: BIOS bug: VAR MTRR %d contains strange UC entry under 1M, check with your system vendor!\n" @@ -79,12 +76,11 @@ x86_get_mtrr_mem_range(struct range *range, int nr_range, nr_range = add_range_with_merge(range, RANGE_NUM, nr_range, base, base + size); } - if (debug_print) { - pr_debug("After WB checking\n"); - for (i = 0; i < nr_range; i++) - pr_debug("MTRR MAP PFN: %016llx - %016llx\n", - range[i].start, range[i].end); - } + + Dprintk("After WB checking\n"); + for (i = 0; i < nr_range; i++) + Dprintk("MTRR MAP PFN: %016llx - %016llx\n", + range[i].start, range[i].end); /* Take out UC ranges: */ for (i = 0; i < num_var_ranges; i++) { @@ -112,24 +108,22 @@ x86_get_mtrr_mem_range(struct range *range, int nr_range, subtract_range(range, RANGE_NUM, extra_remove_base, extra_remove_base + extra_remove_size); - if (debug_print) { - pr_debug("After UC checking\n"); - for (i = 0; i < RANGE_NUM; i++) { - if (!range[i].end) - continue; - pr_debug("MTRR MAP PFN: %016llx - %016llx\n", - range[i].start, range[i].end); - } + Dprintk("After UC checking\n"); + for (i = 0; i < RANGE_NUM; i++) { + if (!range[i].end) + continue; + + Dprintk("MTRR MAP PFN: %016llx - %016llx\n", + range[i].start, range[i].end); } /* sort the ranges */ nr_range = clean_sort_range(range, RANGE_NUM); - if (debug_print) { - pr_debug("After sorting\n"); - for (i = 0; i < nr_range; i++) - pr_debug("MTRR MAP PFN: %016llx - %016llx\n", - range[i].start, range[i].end); - } + + Dprintk("After sorting\n"); + for (i = 0; i < nr_range; i++) + Dprintk("MTRR MAP PFN: %016llx - %016llx\n", + range[i].start, range[i].end); return nr_range; } @@ -164,16 +158,9 @@ static int __init enable_mtrr_cleanup_setup(char *str) } early_param("enable_mtrr_cleanup", enable_mtrr_cleanup_setup); -static int __init mtrr_cleanup_debug_setup(char *str) -{ - debug_print = 1; - return 0; -} -early_param("mtrr_cleanup_debug", mtrr_cleanup_debug_setup); - static void __init set_var_mtrr(unsigned int reg, unsigned long basek, unsigned long sizek, - unsigned char type, unsigned int address_bits) + unsigned char type) { u32 base_lo, base_hi, mask_lo, mask_hi; u64 base, mask; @@ -183,7 +170,7 @@ set_var_mtrr(unsigned int reg, unsigned long basek, unsigned long sizek, return; } - mask = (1ULL << address_bits) - 1; + mask = (1ULL << boot_cpu_data.x86_phys_bits) - 1; mask &= ~((((u64)sizek) << 10) - 1); base = ((u64)basek) << 10; @@ -209,7 +196,7 @@ save_var_mtrr(unsigned int reg, unsigned long basek, unsigned long sizek, range_state[reg].type = type; } -static void __init set_var_mtrr_all(unsigned int address_bits) +static void __init set_var_mtrr_all(void) { unsigned long basek, sizek; unsigned char type; @@ -220,7 +207,7 @@ static void __init set_var_mtrr_all(unsigned int address_bits) sizek = range_state[reg].size_pfn << (PAGE_SHIFT - 10); type = range_state[reg].type; - set_var_mtrr(reg, basek, sizek, type, address_bits); + set_var_mtrr(reg, basek, sizek, type); } } @@ -267,7 +254,7 @@ range_to_mtrr(unsigned int reg, unsigned long range_startk, align = max_align; sizek = 1UL << align; - if (debug_print) { + if (mtrr_debug) { char start_factor = 'K', size_factor = 'K'; unsigned long start_base, size_base; @@ -542,7 +529,7 @@ static void __init print_out_mtrr_range_state(void) start_base = to_size_factor(start_base, &start_factor); type = range_state[i].type; - pr_debug("reg %d, base: %ld%cB, range: %ld%cB, type %s\n", + Dprintk("reg %d, base: %ld%cB, range: %ld%cB, type %s\n", i, start_base, start_factor, size_base, size_factor, (type == MTRR_TYPE_UNCACHABLE) ? "UC" : @@ -680,7 +667,7 @@ static int __init mtrr_search_optimal_index(void) return index_good; } -int __init mtrr_cleanup(unsigned address_bits) +int __init mtrr_cleanup(void) { unsigned long x_remove_base, x_remove_size; unsigned long base, size, def, dummy; @@ -689,7 +676,10 @@ int __init mtrr_cleanup(unsigned address_bits) int index_good; int i; - if (!is_cpu(INTEL) || enable_mtrr_cleanup < 1) + if (!mtrr_enabled()) + return 0; + + if (!cpu_feature_enabled(X86_FEATURE_MTRR) || enable_mtrr_cleanup < 1) return 0; rdmsr(MSR_MTRRdefType, def, dummy); @@ -711,7 +701,7 @@ int __init mtrr_cleanup(unsigned address_bits) return 0; /* Print original var MTRRs at first, for debugging: */ - pr_debug("original variable MTRRs\n"); + Dprintk("original variable MTRRs\n"); print_out_mtrr_range_state(); memset(range, 0, sizeof(range)); @@ -742,8 +732,8 @@ int __init mtrr_cleanup(unsigned address_bits) mtrr_print_out_one_result(i); if (!result[i].bad) { - set_var_mtrr_all(address_bits); - pr_debug("New variable MTRRs\n"); + set_var_mtrr_all(); + Dprintk("New variable MTRRs\n"); print_out_mtrr_range_state(); return 1; } @@ -763,7 +753,7 @@ int __init mtrr_cleanup(unsigned address_bits) mtrr_calc_range_state(chunk_size, gran_size, x_remove_base, x_remove_size, i); - if (debug_print) { + if (mtrr_debug) { mtrr_print_out_one_result(i); pr_info("\n"); } @@ -786,8 +776,8 @@ int __init mtrr_cleanup(unsigned address_bits) gran_size = result[i].gran_sizek; gran_size <<= 10; x86_setup_var_mtrrs(range, nr_range, chunk_size, gran_size); - set_var_mtrr_all(address_bits); - pr_debug("New variable MTRRs\n"); + set_var_mtrr_all(); + Dprintk("New variable MTRRs\n"); print_out_mtrr_range_state(); return 1; } else { @@ -802,7 +792,7 @@ int __init mtrr_cleanup(unsigned address_bits) return 0; } #else -int __init mtrr_cleanup(unsigned address_bits) +int __init mtrr_cleanup(void) { return 0; } @@ -882,15 +872,18 @@ int __init mtrr_trim_uncached_memory(unsigned long end_pfn) /* extra one for all 0 */ int num[MTRR_NUM_TYPES + 1]; + if (!mtrr_enabled()) + return 0; + /* * Make sure we only trim uncachable memory on machines that * support the Intel MTRR architecture: */ - if (!is_cpu(INTEL) || disable_mtrr_trim) + if (!cpu_feature_enabled(X86_FEATURE_MTRR) || disable_mtrr_trim) return 0; rdmsr(MSR_MTRRdefType, def, dummy); - def &= 0xff; + def &= MTRR_DEF_TYPE_TYPE; if (def != MTRR_TYPE_UNCACHABLE) return 0; diff --git a/arch/x86/kernel/cpu/mtrr/cyrix.c b/arch/x86/kernel/cpu/mtrr/cyrix.c index 173b9e01e623..238dad57d4d6 100644 --- a/arch/x86/kernel/cpu/mtrr/cyrix.c +++ b/arch/x86/kernel/cpu/mtrr/cyrix.c @@ -235,7 +235,7 @@ static void cyrix_set_arr(unsigned int reg, unsigned long base, } const struct mtrr_ops cyrix_mtrr_ops = { - .vendor = X86_VENDOR_CYRIX, + .var_regs = 8, .set = cyrix_set_arr, .get = cyrix_get_arr, .get_free_region = cyrix_get_free_region, diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c index ee09d359e08f..2d6aa5d2e3d7 100644 --- a/arch/x86/kernel/cpu/mtrr/generic.c +++ b/arch/x86/kernel/cpu/mtrr/generic.c @@ -8,10 +8,12 @@ #include <linux/init.h> #include <linux/io.h> #include <linux/mm.h> - +#include <linux/cc_platform.h> #include <asm/processor-flags.h> #include <asm/cacheinfo.h> #include <asm/cpufeature.h> +#include <asm/hypervisor.h> +#include <asm/mshyperv.h> #include <asm/tlbflush.h> #include <asm/mtrr.h> #include <asm/msr.h> @@ -31,6 +33,55 @@ static struct fixed_range_block fixed_range_blocks[] = { {} }; +struct cache_map { + u64 start; + u64 end; + u64 flags; + u64 type:8; + u64 fixed:1; +}; + +bool mtrr_debug; + +static int __init mtrr_param_setup(char *str) +{ + int rc = 0; + + if (!str) + return -EINVAL; + if (!strcmp(str, "debug")) + mtrr_debug = true; + else + rc = -EINVAL; + + return rc; +} +early_param("mtrr", mtrr_param_setup); + +/* + * CACHE_MAP_MAX is the maximum number of memory ranges in cache_map, where + * no 2 adjacent ranges have the same cache mode (those would be merged). + * The number is based on the worst case: + * - no two adjacent fixed MTRRs share the same cache mode + * - one variable MTRR is spanning a huge area with mode WB + * - 255 variable MTRRs with mode UC all overlap with the WB MTRR, creating 2 + * additional ranges each (result like "ababababa...aba" with a = WB, b = UC), + * accounting for MTRR_MAX_VAR_RANGES * 2 - 1 range entries + * - a TOP_MEM2 area (even with overlapping an UC MTRR can't add 2 range entries + * to the possible maximum, as it always starts at 4GB, thus it can't be in + * the middle of that MTRR, unless that MTRR starts at 0, which would remove + * the initial "a" from the "abababa" pattern above) + * The map won't contain ranges with no matching MTRR (those fall back to the + * default cache mode). + */ +#define CACHE_MAP_MAX (MTRR_NUM_FIXED_RANGES + MTRR_MAX_VAR_RANGES * 2) + +static struct cache_map init_cache_map[CACHE_MAP_MAX] __initdata; +static struct cache_map *cache_map __refdata = init_cache_map; +static unsigned int cache_map_size = CACHE_MAP_MAX; +static unsigned int cache_map_n; +static unsigned int cache_map_fixed; + static unsigned long smp_changes_mask; static int mtrr_state_set; u64 mtrr_tom2; @@ -38,6 +89,9 @@ u64 mtrr_tom2; struct mtrr_state_type mtrr_state; EXPORT_SYMBOL_GPL(mtrr_state); +/* Reserved bits in the high portion of the MTRRphysBaseN MSR. */ +u32 phys_hi_rsvd; + /* * BIOS is expected to clear MtrrFixDramModEn bit, see for example * "BIOS and Kernel Developer's Guide for the AMD Athlon 64 and AMD @@ -69,175 +123,370 @@ static u64 get_mtrr_size(u64 mask) { u64 size; - mask >>= PAGE_SHIFT; - mask |= size_or_mask; + mask |= (u64)phys_hi_rsvd << 32; size = -mask; - size <<= PAGE_SHIFT; + return size; } +static u8 get_var_mtrr_state(unsigned int reg, u64 *start, u64 *size) +{ + struct mtrr_var_range *mtrr = mtrr_state.var_ranges + reg; + + if (!(mtrr->mask_lo & MTRR_PHYSMASK_V)) + return MTRR_TYPE_INVALID; + + *start = (((u64)mtrr->base_hi) << 32) + (mtrr->base_lo & PAGE_MASK); + *size = get_mtrr_size((((u64)mtrr->mask_hi) << 32) + + (mtrr->mask_lo & PAGE_MASK)); + + return mtrr->base_lo & MTRR_PHYSBASE_TYPE; +} + +static u8 get_effective_type(u8 type1, u8 type2) +{ + if (type1 == MTRR_TYPE_UNCACHABLE || type2 == MTRR_TYPE_UNCACHABLE) + return MTRR_TYPE_UNCACHABLE; + + if ((type1 == MTRR_TYPE_WRBACK && type2 == MTRR_TYPE_WRTHROUGH) || + (type1 == MTRR_TYPE_WRTHROUGH && type2 == MTRR_TYPE_WRBACK)) + return MTRR_TYPE_WRTHROUGH; + + if (type1 != type2) + return MTRR_TYPE_UNCACHABLE; + + return type1; +} + +static void rm_map_entry_at(int idx) +{ + cache_map_n--; + if (cache_map_n > idx) { + memmove(cache_map + idx, cache_map + idx + 1, + sizeof(*cache_map) * (cache_map_n - idx)); + } +} + /* - * Check and return the effective type for MTRR-MTRR type overlap. - * Returns 1 if the effective type is UNCACHEABLE, else returns 0 + * Add an entry into cache_map at a specific index. Merges adjacent entries if + * appropriate. Return the number of merges for correcting the scan index + * (this is needed as merging will reduce the number of entries, which will + * result in skipping entries in future iterations if the scan index isn't + * corrected). + * Note that the corrected index can never go below -1 (resulting in being 0 in + * the next scan iteration), as "2" is returned only if the current index is + * larger than zero. */ -static int check_type_overlap(u8 *prev, u8 *curr) +static int add_map_entry_at(u64 start, u64 end, u8 type, int idx) { - if (*prev == MTRR_TYPE_UNCACHABLE || *curr == MTRR_TYPE_UNCACHABLE) { - *prev = MTRR_TYPE_UNCACHABLE; - *curr = MTRR_TYPE_UNCACHABLE; - return 1; + bool merge_prev = false, merge_next = false; + + if (start >= end) + return 0; + + if (idx > 0) { + struct cache_map *prev = cache_map + idx - 1; + + if (!prev->fixed && start == prev->end && type == prev->type) + merge_prev = true; } - if ((*prev == MTRR_TYPE_WRBACK && *curr == MTRR_TYPE_WRTHROUGH) || - (*prev == MTRR_TYPE_WRTHROUGH && *curr == MTRR_TYPE_WRBACK)) { - *prev = MTRR_TYPE_WRTHROUGH; - *curr = MTRR_TYPE_WRTHROUGH; + if (idx < cache_map_n) { + struct cache_map *next = cache_map + idx; + + if (!next->fixed && end == next->start && type == next->type) + merge_next = true; } - if (*prev != *curr) { - *prev = MTRR_TYPE_UNCACHABLE; - *curr = MTRR_TYPE_UNCACHABLE; + if (merge_prev && merge_next) { + cache_map[idx - 1].end = cache_map[idx].end; + rm_map_entry_at(idx); + return 2; + } + if (merge_prev) { + cache_map[idx - 1].end = end; return 1; } + if (merge_next) { + cache_map[idx].start = start; + return 1; + } + + /* Sanity check: the array should NEVER be too small! */ + if (cache_map_n == cache_map_size) { + WARN(1, "MTRR cache mode memory map exhausted!\n"); + cache_map_n = cache_map_fixed; + return 0; + } + + if (cache_map_n > idx) { + memmove(cache_map + idx + 1, cache_map + idx, + sizeof(*cache_map) * (cache_map_n - idx)); + } + + cache_map[idx].start = start; + cache_map[idx].end = end; + cache_map[idx].type = type; + cache_map[idx].fixed = 0; + cache_map_n++; return 0; } -/** - * mtrr_type_lookup_fixed - look up memory type in MTRR fixed entries - * - * Return the MTRR fixed memory type of 'start'. - * - * MTRR fixed entries are divided into the following ways: - * 0x00000 - 0x7FFFF : This range is divided into eight 64KB sub-ranges - * 0x80000 - 0xBFFFF : This range is divided into sixteen 16KB sub-ranges - * 0xC0000 - 0xFFFFF : This range is divided into sixty-four 4KB sub-ranges - * - * Return Values: - * MTRR_TYPE_(type) - Matched memory type - * MTRR_TYPE_INVALID - Unmatched +/* Clear a part of an entry. Return 1 if start of entry is still valid. */ +static int clr_map_range_at(u64 start, u64 end, int idx) +{ + int ret = start != cache_map[idx].start; + u64 tmp; + + if (start == cache_map[idx].start && end == cache_map[idx].end) { + rm_map_entry_at(idx); + } else if (start == cache_map[idx].start) { + cache_map[idx].start = end; + } else if (end == cache_map[idx].end) { + cache_map[idx].end = start; + } else { + tmp = cache_map[idx].end; + cache_map[idx].end = start; + add_map_entry_at(end, tmp, cache_map[idx].type, idx + 1); + } + + return ret; +} + +/* + * Add MTRR to the map. The current map is scanned and each part of the MTRR + * either overlapping with an existing entry or with a hole in the map is + * handled separately. */ -static u8 mtrr_type_lookup_fixed(u64 start, u64 end) +static void add_map_entry(u64 start, u64 end, u8 type) { - int idx; + u8 new_type, old_type; + u64 tmp; + int i; - if (start >= 0x100000) - return MTRR_TYPE_INVALID; + for (i = 0; i < cache_map_n && start < end; i++) { + if (start >= cache_map[i].end) + continue; + + if (start < cache_map[i].start) { + /* Region start has no overlap. */ + tmp = min(end, cache_map[i].start); + i -= add_map_entry_at(start, tmp, type, i); + start = tmp; + continue; + } - /* 0x0 - 0x7FFFF */ - if (start < 0x80000) { - idx = 0; - idx += (start >> 16); - return mtrr_state.fixed_ranges[idx]; - /* 0x80000 - 0xBFFFF */ - } else if (start < 0xC0000) { - idx = 1 * 8; - idx += ((start - 0x80000) >> 14); - return mtrr_state.fixed_ranges[idx]; + new_type = get_effective_type(type, cache_map[i].type); + old_type = cache_map[i].type; + + if (cache_map[i].fixed || new_type == old_type) { + /* Cut off start of new entry. */ + start = cache_map[i].end; + continue; + } + + /* Handle only overlapping part of region. */ + tmp = min(end, cache_map[i].end); + i += clr_map_range_at(start, tmp, i); + i -= add_map_entry_at(start, tmp, new_type, i); + start = tmp; } - /* 0xC0000 - 0xFFFFF */ - idx = 3 * 8; - idx += ((start - 0xC0000) >> 12); - return mtrr_state.fixed_ranges[idx]; + /* Add rest of region after last map entry (rest might be empty). */ + add_map_entry_at(start, end, type, i); } -/** - * mtrr_type_lookup_variable - look up memory type in MTRR variable entries - * - * Return Value: - * MTRR_TYPE_(type) - Matched memory type or default memory type (unmatched) - * - * Output Arguments: - * repeat - Set to 1 when [start:end] spanned across MTRR range and type - * returned corresponds only to [start:*partial_end]. Caller has - * to lookup again for [*partial_end:end]. - * - * uniform - Set to 1 when an MTRR covers the region uniformly, i.e. the - * region is fully covered by a single MTRR entry or the default - * type. +/* Add variable MTRRs to cache map. */ +static void map_add_var(void) +{ + u64 start, size; + unsigned int i; + u8 type; + + /* + * Add AMD TOP_MEM2 area. Can't be added in mtrr_build_map(), as it + * needs to be added again when rebuilding the map due to potentially + * having moved as a result of variable MTRRs for memory below 4GB. + */ + if (mtrr_tom2) { + add_map_entry(BIT_ULL(32), mtrr_tom2, MTRR_TYPE_WRBACK); + cache_map[cache_map_n - 1].fixed = 1; + } + + for (i = 0; i < num_var_ranges; i++) { + type = get_var_mtrr_state(i, &start, &size); + if (type != MTRR_TYPE_INVALID) + add_map_entry(start, start + size, type); + } +} + +/* + * Rebuild map by replacing variable entries. Needs to be called when MTRR + * registers are being changed after boot, as such changes could include + * removals of registers, which are complicated to handle without rebuild of + * the map. */ -static u8 mtrr_type_lookup_variable(u64 start, u64 end, u64 *partial_end, - int *repeat, u8 *uniform) +void generic_rebuild_map(void) { - int i; - u64 base, mask; - u8 prev_match, curr_match; + if (mtrr_if != &generic_mtrr_ops) + return; - *repeat = 0; - *uniform = 1; + cache_map_n = cache_map_fixed; - prev_match = MTRR_TYPE_INVALID; - for (i = 0; i < num_var_ranges; ++i) { - unsigned short start_state, end_state, inclusive; + map_add_var(); +} - if (!(mtrr_state.var_ranges[i].mask_lo & (1 << 11))) - continue; +static unsigned int __init get_cache_map_size(void) +{ + return cache_map_fixed + 2 * num_var_ranges + (mtrr_tom2 != 0); +} - base = (((u64)mtrr_state.var_ranges[i].base_hi) << 32) + - (mtrr_state.var_ranges[i].base_lo & PAGE_MASK); - mask = (((u64)mtrr_state.var_ranges[i].mask_hi) << 32) + - (mtrr_state.var_ranges[i].mask_lo & PAGE_MASK); - - start_state = ((start & mask) == (base & mask)); - end_state = ((end & mask) == (base & mask)); - inclusive = ((start < base) && (end > base)); - - if ((start_state != end_state) || inclusive) { - /* - * We have start:end spanning across an MTRR. - * We split the region into either - * - * - start_state:1 - * (start:mtrr_end)(mtrr_end:end) - * - end_state:1 - * (start:mtrr_start)(mtrr_start:end) - * - inclusive:1 - * (start:mtrr_start)(mtrr_start:mtrr_end)(mtrr_end:end) - * - * depending on kind of overlap. - * - * Return the type of the first region and a pointer - * to the start of next region so that caller will be - * advised to lookup again after having adjusted start - * and end. - * - * Note: This way we handle overlaps with multiple - * entries and the default type properly. - */ - if (start_state) - *partial_end = base + get_mtrr_size(mask); - else - *partial_end = base; - - if (unlikely(*partial_end <= start)) { - WARN_ON(1); - *partial_end = start + PAGE_SIZE; - } +/* Build the cache_map containing the cache modes per memory range. */ +void __init mtrr_build_map(void) +{ + u64 start, end, size; + unsigned int i; + u8 type; - end = *partial_end - 1; /* end is inclusive */ - *repeat = 1; - *uniform = 0; + /* Add fixed MTRRs, optimize for adjacent entries with same type. */ + if (mtrr_state.enabled & MTRR_STATE_MTRR_FIXED_ENABLED) { + /* + * Start with 64k size fixed entries, preset 1st one (hence the + * loop below is starting with index 1). + */ + start = 0; + end = size = 0x10000; + type = mtrr_state.fixed_ranges[0]; + + for (i = 1; i < MTRR_NUM_FIXED_RANGES; i++) { + /* 8 64k entries, then 16 16k ones, rest 4k. */ + if (i == 8 || i == 24) + size >>= 2; + + if (mtrr_state.fixed_ranges[i] != type) { + add_map_entry(start, end, type); + start = end; + type = mtrr_state.fixed_ranges[i]; + } + end += size; } + add_map_entry(start, end, type); + } - if ((start & mask) != (base & mask)) - continue; + /* Mark fixed, they take precedence. */ + for (i = 0; i < cache_map_n; i++) + cache_map[i].fixed = 1; + cache_map_fixed = cache_map_n; - curr_match = mtrr_state.var_ranges[i].base_lo & 0xff; - if (prev_match == MTRR_TYPE_INVALID) { - prev_match = curr_match; - continue; + map_add_var(); + + pr_info("MTRR map: %u entries (%u fixed + %u variable; max %u), built from %u variable MTRRs\n", + cache_map_n, cache_map_fixed, cache_map_n - cache_map_fixed, + get_cache_map_size(), num_var_ranges + (mtrr_tom2 != 0)); + + if (mtrr_debug) { + for (i = 0; i < cache_map_n; i++) { + pr_info("%3u: %016llx-%016llx %s\n", i, + cache_map[i].start, cache_map[i].end - 1, + mtrr_attrib_to_str(cache_map[i].type)); } + } +} - *uniform = 0; - if (check_type_overlap(&prev_match, &curr_match)) - return curr_match; +/* Copy the cache_map from __initdata memory to dynamically allocated one. */ +void __init mtrr_copy_map(void) +{ + unsigned int new_size = get_cache_map_size(); + + if (!mtrr_state.enabled || !new_size) { + cache_map = NULL; + return; + } + + mutex_lock(&mtrr_mutex); + + cache_map = kcalloc(new_size, sizeof(*cache_map), GFP_KERNEL); + if (cache_map) { + memmove(cache_map, init_cache_map, + cache_map_n * sizeof(*cache_map)); + cache_map_size = new_size; + } else { + mtrr_state.enabled = 0; + pr_err("MTRRs disabled due to allocation failure for lookup map.\n"); + } + + mutex_unlock(&mtrr_mutex); +} + +/** + * mtrr_overwrite_state - set static MTRR state + * + * Used to set MTRR state via different means (e.g. with data obtained from + * a hypervisor). + * Is allowed only for special cases when running virtualized. Must be called + * from the x86_init.hyper.init_platform() hook. It can be called only once. + * The MTRR state can't be changed afterwards. To ensure that, X86_FEATURE_MTRR + * is cleared. + */ +void mtrr_overwrite_state(struct mtrr_var_range *var, unsigned int num_var, + mtrr_type def_type) +{ + unsigned int i; + + /* Only allowed to be called once before mtrr_bp_init(). */ + if (WARN_ON_ONCE(mtrr_state_set)) + return; + + /* Only allowed when running virtualized. */ + if (!cpu_feature_enabled(X86_FEATURE_HYPERVISOR)) + return; + + /* + * Only allowed for special virtualization cases: + * - when running as Hyper-V, SEV-SNP guest using vTOM + * - when running as Xen PV guest + * - when running as SEV-SNP or TDX guest to avoid unnecessary + * VMM communication/Virtualization exceptions (#VC, #VE) + */ + if (!cc_platform_has(CC_ATTR_GUEST_SEV_SNP) && + !hv_is_isolation_supported() && + !cpu_feature_enabled(X86_FEATURE_XENPV) && + !cpu_feature_enabled(X86_FEATURE_TDX_GUEST)) + return; + + /* Disable MTRR in order to disable MTRR modifications. */ + setup_clear_cpu_cap(X86_FEATURE_MTRR); + + if (var) { + if (num_var > MTRR_MAX_VAR_RANGES) { + pr_warn("Trying to overwrite MTRR state with %u variable entries\n", + num_var); + num_var = MTRR_MAX_VAR_RANGES; + } + for (i = 0; i < num_var; i++) + mtrr_state.var_ranges[i] = var[i]; + num_var_ranges = num_var; } - if (prev_match != MTRR_TYPE_INVALID) - return prev_match; + mtrr_state.def_type = def_type; + mtrr_state.enabled |= MTRR_STATE_MTRR_ENABLED; - return mtrr_state.def_type; + mtrr_state_set = 1; +} + +static u8 type_merge(u8 type, u8 new_type, u8 *uniform) +{ + u8 effective_type; + + if (type == MTRR_TYPE_INVALID) + return new_type; + + effective_type = get_effective_type(type, new_type); + if (type != effective_type) + *uniform = 0; + + return effective_type; } /** @@ -248,66 +497,49 @@ static u8 mtrr_type_lookup_variable(u64 start, u64 end, u64 *partial_end, * MTRR_TYPE_INVALID - MTRR is disabled * * Output Argument: - * uniform - Set to 1 when an MTRR covers the region uniformly, i.e. the - * region is fully covered by a single MTRR entry or the default - * type. + * uniform - Set to 1 when the returned MTRR type is valid for the whole + * region, set to 0 else. */ u8 mtrr_type_lookup(u64 start, u64 end, u8 *uniform) { - u8 type, prev_type, is_uniform = 1, dummy; - int repeat; - u64 partial_end; + u8 type = MTRR_TYPE_INVALID; + unsigned int i; - /* Make end inclusive instead of exclusive */ - end--; + if (!mtrr_state_set) { + /* Uniformity is unknown. */ + *uniform = 0; + return MTRR_TYPE_UNCACHABLE; + } - if (!mtrr_state_set) - return MTRR_TYPE_INVALID; + *uniform = 1; if (!(mtrr_state.enabled & MTRR_STATE_MTRR_ENABLED)) - return MTRR_TYPE_INVALID; + return MTRR_TYPE_UNCACHABLE; - /* - * Look up the fixed ranges first, which take priority over - * the variable ranges. - */ - if ((start < 0x100000) && - (mtrr_state.have_fixed) && - (mtrr_state.enabled & MTRR_STATE_MTRR_FIXED_ENABLED)) { - is_uniform = 0; - type = mtrr_type_lookup_fixed(start, end); - goto out; - } + for (i = 0; i < cache_map_n && start < end; i++) { + /* Region after current map entry? -> continue with next one. */ + if (start >= cache_map[i].end) + continue; - /* - * Look up the variable ranges. Look of multiple ranges matching - * this address and pick type as per MTRR precedence. - */ - type = mtrr_type_lookup_variable(start, end, &partial_end, - &repeat, &is_uniform); + /* Start of region not covered by current map entry? */ + if (start < cache_map[i].start) { + /* At least some part of region has default type. */ + type = type_merge(type, mtrr_state.def_type, uniform); + /* End of region not covered, too? -> lookup done. */ + if (end <= cache_map[i].start) + return type; + } - /* - * Common path is with repeat = 0. - * However, we can have cases where [start:end] spans across some - * MTRR ranges and/or the default type. Do repeated lookups for - * that case here. - */ - while (repeat) { - prev_type = type; - start = partial_end; - is_uniform = 0; - type = mtrr_type_lookup_variable(start, end, &partial_end, - &repeat, &dummy); + /* At least part of region covered by map entry. */ + type = type_merge(type, cache_map[i].type, uniform); - if (check_type_overlap(&prev_type, &type)) - goto out; + start = cache_map[i].end; } - if (mtrr_tom2 && (start >= (1ULL<<32)) && (end < mtrr_tom2)) - type = MTRR_TYPE_WRBACK; + /* End of region past last entry in map? -> use default type. */ + if (start < end) + type = type_merge(type, mtrr_state.def_type, uniform); -out: - *uniform = is_uniform; return type; } @@ -363,8 +595,8 @@ static void __init print_fixed_last(void) if (!last_fixed_end) return; - pr_debug(" %05X-%05X %s\n", last_fixed_start, - last_fixed_end - 1, mtrr_attrib_to_str(last_fixed_type)); + pr_info(" %05X-%05X %s\n", last_fixed_start, + last_fixed_end - 1, mtrr_attrib_to_str(last_fixed_type)); last_fixed_end = 0; } @@ -402,10 +634,10 @@ static void __init print_mtrr_state(void) unsigned int i; int high_width; - pr_debug("MTRR default type: %s\n", - mtrr_attrib_to_str(mtrr_state.def_type)); + pr_info("MTRR default type: %s\n", + mtrr_attrib_to_str(mtrr_state.def_type)); if (mtrr_state.have_fixed) { - pr_debug("MTRR fixed ranges %sabled:\n", + pr_info("MTRR fixed ranges %sabled:\n", ((mtrr_state.enabled & MTRR_STATE_MTRR_ENABLED) && (mtrr_state.enabled & MTRR_STATE_MTRR_FIXED_ENABLED)) ? "en" : "dis"); @@ -420,26 +652,27 @@ static void __init print_mtrr_state(void) /* tail */ print_fixed_last(); } - pr_debug("MTRR variable ranges %sabled:\n", - mtrr_state.enabled & MTRR_STATE_MTRR_ENABLED ? "en" : "dis"); - high_width = (__ffs64(size_or_mask) - (32 - PAGE_SHIFT) + 3) / 4; + pr_info("MTRR variable ranges %sabled:\n", + mtrr_state.enabled & MTRR_STATE_MTRR_ENABLED ? "en" : "dis"); + high_width = (boot_cpu_data.x86_phys_bits - (32 - PAGE_SHIFT) + 3) / 4; for (i = 0; i < num_var_ranges; ++i) { - if (mtrr_state.var_ranges[i].mask_lo & (1 << 11)) - pr_debug(" %u base %0*X%05X000 mask %0*X%05X000 %s\n", - i, - high_width, - mtrr_state.var_ranges[i].base_hi, - mtrr_state.var_ranges[i].base_lo >> 12, - high_width, - mtrr_state.var_ranges[i].mask_hi, - mtrr_state.var_ranges[i].mask_lo >> 12, - mtrr_attrib_to_str(mtrr_state.var_ranges[i].base_lo & 0xff)); + if (mtrr_state.var_ranges[i].mask_lo & MTRR_PHYSMASK_V) + pr_info(" %u base %0*X%05X000 mask %0*X%05X000 %s\n", + i, + high_width, + mtrr_state.var_ranges[i].base_hi, + mtrr_state.var_ranges[i].base_lo >> 12, + high_width, + mtrr_state.var_ranges[i].mask_hi, + mtrr_state.var_ranges[i].mask_lo >> 12, + mtrr_attrib_to_str(mtrr_state.var_ranges[i].base_lo & + MTRR_PHYSBASE_TYPE)); else - pr_debug(" %u disabled\n", i); + pr_info(" %u disabled\n", i); } if (mtrr_tom2) - pr_debug("TOM2: %016llx aka %lldM\n", mtrr_tom2, mtrr_tom2>>20); + pr_info("TOM2: %016llx aka %lldM\n", mtrr_tom2, mtrr_tom2>>20); } /* Grab all of the MTRR state for this CPU into *state */ @@ -452,7 +685,7 @@ bool __init get_mtrr_state(void) vrs = mtrr_state.var_ranges; rdmsr(MSR_MTRRcap, lo, dummy); - mtrr_state.have_fixed = (lo >> 8) & 1; + mtrr_state.have_fixed = lo & MTRR_CAP_FIX; for (i = 0; i < num_var_ranges; i++) get_mtrr_var_range(i, &vrs[i]); @@ -460,8 +693,8 @@ bool __init get_mtrr_state(void) get_fixed_ranges(mtrr_state.fixed_ranges); rdmsr(MSR_MTRRdefType, lo, dummy); - mtrr_state.def_type = (lo & 0xff); - mtrr_state.enabled = (lo & 0xc00) >> 10; + mtrr_state.def_type = lo & MTRR_DEF_TYPE_TYPE; + mtrr_state.enabled = (lo & MTRR_DEF_TYPE_ENABLE) >> MTRR_STATE_SHIFT; if (amd_special_default_mtrr()) { unsigned low, high; @@ -474,7 +707,8 @@ bool __init get_mtrr_state(void) mtrr_tom2 &= 0xffffff800000ULL; } - print_mtrr_state(); + if (mtrr_debug) + print_mtrr_state(); mtrr_state_set = 1; @@ -574,7 +808,7 @@ static void generic_get_mtrr(unsigned int reg, unsigned long *base, rdmsr(MTRRphysMask_MSR(reg), mask_lo, mask_hi); - if ((mask_lo & 0x800) == 0) { + if (!(mask_lo & MTRR_PHYSMASK_V)) { /* Invalid (i.e. free) range */ *base = 0; *size = 0; @@ -585,8 +819,8 @@ static void generic_get_mtrr(unsigned int reg, unsigned long *base, rdmsr(MTRRphysBase_MSR(reg), base_lo, base_hi); /* Work out the shifted address mask: */ - tmp = (u64)mask_hi << (32 - PAGE_SHIFT) | mask_lo >> PAGE_SHIFT; - mask = size_or_mask | tmp; + tmp = (u64)mask_hi << 32 | (mask_lo & PAGE_MASK); + mask = (u64)phys_hi_rsvd << 32 | tmp; /* Expand tmp with high bits to all 1s: */ hi = fls64(tmp); @@ -604,9 +838,9 @@ static void generic_get_mtrr(unsigned int reg, unsigned long *base, * This works correctly if size is a power of two, i.e. a * contiguous range: */ - *size = -mask; + *size = -mask >> PAGE_SHIFT; *base = (u64)base_hi << (32 - PAGE_SHIFT) | base_lo >> PAGE_SHIFT; - *type = base_lo & 0xff; + *type = base_lo & MTRR_PHYSBASE_TYPE; out_put_cpu: put_cpu(); @@ -644,9 +878,8 @@ static bool set_mtrr_var_ranges(unsigned int index, struct mtrr_var_range *vr) bool changed = false; rdmsr(MTRRphysBase_MSR(index), lo, hi); - if ((vr->base_lo & 0xfffff0ffUL) != (lo & 0xfffff0ffUL) - || (vr->base_hi & (size_and_mask >> (32 - PAGE_SHIFT))) != - (hi & (size_and_mask >> (32 - PAGE_SHIFT)))) { + if ((vr->base_lo & ~MTRR_PHYSBASE_RSVD) != (lo & ~MTRR_PHYSBASE_RSVD) + || (vr->base_hi & ~phys_hi_rsvd) != (hi & ~phys_hi_rsvd)) { mtrr_wrmsr(MTRRphysBase_MSR(index), vr->base_lo, vr->base_hi); changed = true; @@ -654,9 +887,8 @@ static bool set_mtrr_var_ranges(unsigned int index, struct mtrr_var_range *vr) rdmsr(MTRRphysMask_MSR(index), lo, hi); - if ((vr->mask_lo & 0xfffff800UL) != (lo & 0xfffff800UL) - || (vr->mask_hi & (size_and_mask >> (32 - PAGE_SHIFT))) != - (hi & (size_and_mask >> (32 - PAGE_SHIFT)))) { + if ((vr->mask_lo & ~MTRR_PHYSMASK_RSVD) != (lo & ~MTRR_PHYSMASK_RSVD) + || (vr->mask_hi & ~phys_hi_rsvd) != (hi & ~phys_hi_rsvd)) { mtrr_wrmsr(MTRRphysMask_MSR(index), vr->mask_lo, vr->mask_hi); changed = true; } @@ -691,11 +923,12 @@ static unsigned long set_mtrr_state(void) * Set_mtrr_restore restores the old value of MTRRdefType, * so to set it we fiddle with the saved value: */ - if ((deftype_lo & 0xff) != mtrr_state.def_type - || ((deftype_lo & 0xc00) >> 10) != mtrr_state.enabled) { + if ((deftype_lo & MTRR_DEF_TYPE_TYPE) != mtrr_state.def_type || + ((deftype_lo & MTRR_DEF_TYPE_ENABLE) >> MTRR_STATE_SHIFT) != mtrr_state.enabled) { - deftype_lo = (deftype_lo & ~0xcff) | mtrr_state.def_type | - (mtrr_state.enabled << 10); + deftype_lo = (deftype_lo & MTRR_DEF_TYPE_DISABLE) | + mtrr_state.def_type | + (mtrr_state.enabled << MTRR_STATE_SHIFT); change_mask |= MTRR_CHANGE_MASK_DEFTYPE; } @@ -708,7 +941,7 @@ void mtrr_disable(void) rdmsr(MSR_MTRRdefType, deftype_lo, deftype_hi); /* Disable MTRRs, and set the default type to uncached */ - mtrr_wrmsr(MSR_MTRRdefType, deftype_lo & ~0xcff, deftype_hi); + mtrr_wrmsr(MSR_MTRRdefType, deftype_lo & MTRR_DEF_TYPE_DISABLE, deftype_hi); } void mtrr_enable(void) @@ -762,9 +995,9 @@ static void generic_set_mtrr(unsigned int reg, unsigned long base, memset(vr, 0, sizeof(struct mtrr_var_range)); } else { vr->base_lo = base << PAGE_SHIFT | type; - vr->base_hi = (base & size_and_mask) >> (32 - PAGE_SHIFT); - vr->mask_lo = -size << PAGE_SHIFT | 0x800; - vr->mask_hi = (-size & size_and_mask) >> (32 - PAGE_SHIFT); + vr->base_hi = (base >> (32 - PAGE_SHIFT)) & ~phys_hi_rsvd; + vr->mask_lo = -size << PAGE_SHIFT | MTRR_PHYSMASK_V; + vr->mask_hi = (-size >> (32 - PAGE_SHIFT)) & ~phys_hi_rsvd; mtrr_wrmsr(MTRRphysBase_MSR(reg), vr->base_lo, vr->base_hi); mtrr_wrmsr(MTRRphysMask_MSR(reg), vr->mask_lo, vr->mask_hi); @@ -783,7 +1016,7 @@ int generic_validate_add_page(unsigned long base, unsigned long size, * For Intel PPro stepping <= 7 * must be 4 MiB aligned and not touch 0x70000000 -> 0x7003FFFF */ - if (is_cpu(INTEL) && boot_cpu_data.x86 == 6 && + if (mtrr_if == &generic_mtrr_ops && boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model == 1 && boot_cpu_data.x86_stepping <= 7) { if (base & ((1 << (22 - PAGE_SHIFT)) - 1)) { @@ -817,7 +1050,7 @@ static int generic_have_wrcomb(void) { unsigned long config, dummy; rdmsr(MSR_MTRRcap, config, dummy); - return config & (1 << 10); + return config & MTRR_CAP_WC; } int positive_have_wrcomb(void) diff --git a/arch/x86/kernel/cpu/mtrr/legacy.c b/arch/x86/kernel/cpu/mtrr/legacy.c new file mode 100644 index 000000000000..d25882fcf181 --- /dev/null +++ b/arch/x86/kernel/cpu/mtrr/legacy.c @@ -0,0 +1,90 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#include <linux/types.h> +#include <linux/slab.h> +#include <linux/syscore_ops.h> +#include <asm/cpufeature.h> +#include <asm/mtrr.h> +#include <asm/processor.h> +#include "mtrr.h" + +void mtrr_set_if(void) +{ + switch (boot_cpu_data.x86_vendor) { + case X86_VENDOR_AMD: + /* Pre-Athlon (K6) AMD CPU MTRRs */ + if (cpu_feature_enabled(X86_FEATURE_K6_MTRR)) + mtrr_if = &amd_mtrr_ops; + break; + case X86_VENDOR_CENTAUR: + if (cpu_feature_enabled(X86_FEATURE_CENTAUR_MCR)) + mtrr_if = ¢aur_mtrr_ops; + break; + case X86_VENDOR_CYRIX: + if (cpu_feature_enabled(X86_FEATURE_CYRIX_ARR)) + mtrr_if = &cyrix_mtrr_ops; + break; + default: + break; + } +} + +/* + * The suspend/resume methods are only for CPUs without MTRR. CPUs using generic + * MTRR driver don't require this. + */ +struct mtrr_value { + mtrr_type ltype; + unsigned long lbase; + unsigned long lsize; +}; + +static struct mtrr_value *mtrr_value; + +static int mtrr_save(void) +{ + int i; + + if (!mtrr_value) + return -ENOMEM; + + for (i = 0; i < num_var_ranges; i++) { + mtrr_if->get(i, &mtrr_value[i].lbase, + &mtrr_value[i].lsize, + &mtrr_value[i].ltype); + } + return 0; +} + +static void mtrr_restore(void) +{ + int i; + + for (i = 0; i < num_var_ranges; i++) { + if (mtrr_value[i].lsize) { + mtrr_if->set(i, mtrr_value[i].lbase, + mtrr_value[i].lsize, + mtrr_value[i].ltype); + } + } +} + +static struct syscore_ops mtrr_syscore_ops = { + .suspend = mtrr_save, + .resume = mtrr_restore, +}; + +void mtrr_register_syscore(void) +{ + mtrr_value = kcalloc(num_var_ranges, sizeof(*mtrr_value), GFP_KERNEL); + + /* + * The CPU has no MTRR and seems to not support SMP. They have + * specific drivers, we use a tricky method to support + * suspend/resume for them. + * + * TBD: is there any system with such CPU which supports + * suspend/resume? If no, we should remove the code. + */ + register_syscore_ops(&mtrr_syscore_ops); +} diff --git a/arch/x86/kernel/cpu/mtrr/mtrr.c b/arch/x86/kernel/cpu/mtrr/mtrr.c index 783f3210d582..767bf1c71aad 100644 --- a/arch/x86/kernel/cpu/mtrr/mtrr.c +++ b/arch/x86/kernel/cpu/mtrr/mtrr.c @@ -59,15 +59,9 @@ #define MTRR_TO_PHYS_WC_OFFSET 1000 u32 num_var_ranges; -static bool mtrr_enabled(void) -{ - return !!mtrr_if; -} unsigned int mtrr_usage_table[MTRR_MAX_VAR_RANGES]; -static DEFINE_MUTEX(mtrr_mutex); - -u64 size_or_mask, size_and_mask; +DEFINE_MUTEX(mtrr_mutex); const struct mtrr_ops *mtrr_if; @@ -105,21 +99,6 @@ static int have_wrcomb(void) return mtrr_if->have_wrcomb ? mtrr_if->have_wrcomb() : 0; } -/* This function returns the number of variable MTRRs */ -static void __init set_num_var_ranges(bool use_generic) -{ - unsigned long config = 0, dummy; - - if (use_generic) - rdmsr(MSR_MTRRcap, config, dummy); - else if (is_cpu(AMD) || is_cpu(HYGON)) - config = 2; - else if (is_cpu(CYRIX) || is_cpu(CENTAUR)) - config = 8; - - num_var_ranges = config & 0xff; -} - static void __init init_table(void) { int i, max; @@ -194,20 +173,8 @@ static inline int types_compatible(mtrr_type type1, mtrr_type type2) * Note that the mechanism is the same for UP systems, too; all the SMP stuff * becomes nops. */ -static void -set_mtrr(unsigned int reg, unsigned long base, unsigned long size, mtrr_type type) -{ - struct set_mtrr_data data = { .smp_reg = reg, - .smp_base = base, - .smp_size = size, - .smp_type = type - }; - - stop_machine(mtrr_rendezvous_handler, &data, cpu_online_mask); -} - -static void set_mtrr_cpuslocked(unsigned int reg, unsigned long base, - unsigned long size, mtrr_type type) +static void set_mtrr(unsigned int reg, unsigned long base, unsigned long size, + mtrr_type type) { struct set_mtrr_data data = { .smp_reg = reg, .smp_base = base, @@ -216,6 +183,8 @@ static void set_mtrr_cpuslocked(unsigned int reg, unsigned long base, }; stop_machine_cpuslocked(mtrr_rendezvous_handler, &data, cpu_online_mask); + + generic_rebuild_map(); } /** @@ -337,7 +306,7 @@ int mtrr_add_page(unsigned long base, unsigned long size, /* Search for an empty MTRR */ i = mtrr_if->get_free_region(base, size, replace); if (i >= 0) { - set_mtrr_cpuslocked(i, base, size, type); + set_mtrr(i, base, size, type); if (likely(replace < 0)) { mtrr_usage_table[i] = 1; } else { @@ -345,7 +314,7 @@ int mtrr_add_page(unsigned long base, unsigned long size, if (increment) mtrr_usage_table[i]++; if (unlikely(replace != i)) { - set_mtrr_cpuslocked(replace, 0, 0, 0); + set_mtrr(replace, 0, 0, 0); mtrr_usage_table[replace] = 0; } } @@ -363,7 +332,7 @@ static int mtrr_check(unsigned long base, unsigned long size) { if ((base & (PAGE_SIZE - 1)) || (size & (PAGE_SIZE - 1))) { pr_warn("size and base must be multiples of 4 kiB\n"); - pr_debug("size: 0x%lx base: 0x%lx\n", size, base); + Dprintk("size: 0x%lx base: 0x%lx\n", size, base); dump_stack(); return -1; } @@ -454,8 +423,7 @@ int mtrr_del_page(int reg, unsigned long base, unsigned long size) } } if (reg < 0) { - pr_debug("no MTRR for %lx000,%lx000 found\n", - base, size); + Dprintk("no MTRR for %lx000,%lx000 found\n", base, size); goto out; } } @@ -473,7 +441,7 @@ int mtrr_del_page(int reg, unsigned long base, unsigned long size) goto out; } if (--mtrr_usage_table[reg] < 1) - set_mtrr_cpuslocked(reg, 0, 0, 0); + set_mtrr(reg, 0, 0, 0); error = reg; out: mutex_unlock(&mtrr_mutex); @@ -574,136 +542,54 @@ int arch_phys_wc_index(int handle) } EXPORT_SYMBOL_GPL(arch_phys_wc_index); -/* The suspend/resume methods are only for CPU without MTRR. CPU using generic - * MTRR driver doesn't require this - */ -struct mtrr_value { - mtrr_type ltype; - unsigned long lbase; - unsigned long lsize; -}; - -static struct mtrr_value mtrr_value[MTRR_MAX_VAR_RANGES]; - -static int mtrr_save(void) -{ - int i; - - for (i = 0; i < num_var_ranges; i++) { - mtrr_if->get(i, &mtrr_value[i].lbase, - &mtrr_value[i].lsize, - &mtrr_value[i].ltype); - } - return 0; -} - -static void mtrr_restore(void) -{ - int i; - - for (i = 0; i < num_var_ranges; i++) { - if (mtrr_value[i].lsize) { - set_mtrr(i, mtrr_value[i].lbase, - mtrr_value[i].lsize, - mtrr_value[i].ltype); - } - } -} - - - -static struct syscore_ops mtrr_syscore_ops = { - .suspend = mtrr_save, - .resume = mtrr_restore, -}; - int __initdata changed_by_mtrr_cleanup; -#define SIZE_OR_MASK_BITS(n) (~((1ULL << ((n) - PAGE_SHIFT)) - 1)) /** - * mtrr_bp_init - initialize mtrrs on the boot CPU + * mtrr_bp_init - initialize MTRRs on the boot CPU * * This needs to be called early; before any of the other CPUs are * initialized (i.e. before smp_init()). - * */ void __init mtrr_bp_init(void) { + bool generic_mtrrs = cpu_feature_enabled(X86_FEATURE_MTRR); const char *why = "(not available)"; - u32 phys_addr; - - phys_addr = 32; + unsigned long config, dummy; - if (boot_cpu_has(X86_FEATURE_MTRR)) { - mtrr_if = &generic_mtrr_ops; - size_or_mask = SIZE_OR_MASK_BITS(36); - size_and_mask = 0x00f00000; - phys_addr = 36; + phys_hi_rsvd = GENMASK(31, boot_cpu_data.x86_phys_bits - 32); + if (!generic_mtrrs && mtrr_state.enabled) { /* - * This is an AMD specific MSR, but we assume(hope?) that - * Intel will implement it too when they extend the address - * bus of the Xeon. + * Software overwrite of MTRR state, only for generic case. + * Note that X86_FEATURE_MTRR has been reset in this case. */ - if (cpuid_eax(0x80000000) >= 0x80000008) { - phys_addr = cpuid_eax(0x80000008) & 0xff; - /* CPUID workaround for Intel 0F33/0F34 CPU */ - if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL && - boot_cpu_data.x86 == 0xF && - boot_cpu_data.x86_model == 0x3 && - (boot_cpu_data.x86_stepping == 0x3 || - boot_cpu_data.x86_stepping == 0x4)) - phys_addr = 36; - - size_or_mask = SIZE_OR_MASK_BITS(phys_addr); - size_and_mask = ~size_or_mask & 0xfffff00000ULL; - } else if (boot_cpu_data.x86_vendor == X86_VENDOR_CENTAUR && - boot_cpu_data.x86 == 6) { - /* - * VIA C* family have Intel style MTRRs, - * but don't support PAE - */ - size_or_mask = SIZE_OR_MASK_BITS(32); - size_and_mask = 0; - phys_addr = 32; - } - } else { - switch (boot_cpu_data.x86_vendor) { - case X86_VENDOR_AMD: - if (cpu_feature_enabled(X86_FEATURE_K6_MTRR)) { - /* Pre-Athlon (K6) AMD CPU MTRRs */ - mtrr_if = &amd_mtrr_ops; - size_or_mask = SIZE_OR_MASK_BITS(32); - size_and_mask = 0; - } - break; - case X86_VENDOR_CENTAUR: - if (cpu_feature_enabled(X86_FEATURE_CENTAUR_MCR)) { - mtrr_if = ¢aur_mtrr_ops; - size_or_mask = SIZE_OR_MASK_BITS(32); - size_and_mask = 0; - } - break; - case X86_VENDOR_CYRIX: - if (cpu_feature_enabled(X86_FEATURE_CYRIX_ARR)) { - mtrr_if = &cyrix_mtrr_ops; - size_or_mask = SIZE_OR_MASK_BITS(32); - size_and_mask = 0; - } - break; - default: - break; - } + init_table(); + mtrr_build_map(); + pr_info("MTRRs set to read-only\n"); + + return; } + if (generic_mtrrs) + mtrr_if = &generic_mtrr_ops; + else + mtrr_set_if(); + if (mtrr_enabled()) { - set_num_var_ranges(mtrr_if == &generic_mtrr_ops); + /* Get the number of variable MTRR ranges. */ + if (mtrr_if == &generic_mtrr_ops) + rdmsr(MSR_MTRRcap, config, dummy); + else + config = mtrr_if->var_regs; + num_var_ranges = config & MTRR_CAP_VCNT; + init_table(); if (mtrr_if == &generic_mtrr_ops) { /* BIOS may override */ if (get_mtrr_state()) { memory_caching_control |= CACHE_MTRR; - changed_by_mtrr_cleanup = mtrr_cleanup(phys_addr); + changed_by_mtrr_cleanup = mtrr_cleanup(); + mtrr_build_map(); } else { mtrr_if = NULL; why = "by BIOS"; @@ -730,8 +616,14 @@ void mtrr_save_state(void) smp_call_function_single(first_cpu, mtrr_save_fixed_ranges, NULL, 1); } -static int __init mtrr_init_finialize(void) +static int __init mtrr_init_finalize(void) { + /* + * Map might exist if mtrr_overwrite_state() has been called or if + * mtrr_enabled() returns true. + */ + mtrr_copy_map(); + if (!mtrr_enabled()) return 0; @@ -741,16 +633,8 @@ static int __init mtrr_init_finialize(void) return 0; } - /* - * The CPU has no MTRR and seems to not support SMP. They have - * specific drivers, we use a tricky method to support - * suspend/resume for them. - * - * TBD: is there any system with such CPU which supports - * suspend/resume? If no, we should remove the code. - */ - register_syscore_ops(&mtrr_syscore_ops); + mtrr_register_syscore(); return 0; } -subsys_initcall(mtrr_init_finialize); +subsys_initcall(mtrr_init_finalize); diff --git a/arch/x86/kernel/cpu/mtrr/mtrr.h b/arch/x86/kernel/cpu/mtrr/mtrr.h index 02eb5871492d..5655f253d929 100644 --- a/arch/x86/kernel/cpu/mtrr/mtrr.h +++ b/arch/x86/kernel/cpu/mtrr/mtrr.h @@ -10,10 +10,13 @@ #define MTRR_CHANGE_MASK_VARIABLE 0x02 #define MTRR_CHANGE_MASK_DEFTYPE 0x04 +extern bool mtrr_debug; +#define Dprintk(x...) do { if (mtrr_debug) pr_info(x); } while (0) + extern unsigned int mtrr_usage_table[MTRR_MAX_VAR_RANGES]; struct mtrr_ops { - u32 vendor; + u32 var_regs; void (*set)(unsigned int reg, unsigned long base, unsigned long size, mtrr_type type); void (*get)(unsigned int reg, unsigned long *base, @@ -51,18 +54,26 @@ void fill_mtrr_var_range(unsigned int index, u32 base_lo, u32 base_hi, u32 mask_lo, u32 mask_hi); bool get_mtrr_state(void); -extern u64 size_or_mask, size_and_mask; extern const struct mtrr_ops *mtrr_if; - -#define is_cpu(vnd) (mtrr_if && mtrr_if->vendor == X86_VENDOR_##vnd) +extern struct mutex mtrr_mutex; extern unsigned int num_var_ranges; extern u64 mtrr_tom2; extern struct mtrr_state_type mtrr_state; +extern u32 phys_hi_rsvd; void mtrr_state_warn(void); const char *mtrr_attrib_to_str(int x); void mtrr_wrmsr(unsigned, unsigned, unsigned); +#ifdef CONFIG_X86_32 +void mtrr_set_if(void); +void mtrr_register_syscore(void); +#else +static inline void mtrr_set_if(void) { } +static inline void mtrr_register_syscore(void) { } +#endif +void mtrr_build_map(void); +void mtrr_copy_map(void); /* CPU specific mtrr_ops vectors. */ extern const struct mtrr_ops amd_mtrr_ops; @@ -70,4 +81,14 @@ extern const struct mtrr_ops cyrix_mtrr_ops; extern const struct mtrr_ops centaur_mtrr_ops; extern int changed_by_mtrr_cleanup; -extern int mtrr_cleanup(unsigned address_bits); +extern int mtrr_cleanup(void); + +/* + * Must be used by code which uses mtrr_if to call platform-specific + * MTRR manipulation functions. + */ +static inline bool mtrr_enabled(void) +{ + return !!mtrr_if; +} +void generic_rebuild_map(void); diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c index 6ad33f355861..725344048f85 100644 --- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c +++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c @@ -726,11 +726,15 @@ unlock: static void show_rdt_tasks(struct rdtgroup *r, struct seq_file *s) { struct task_struct *p, *t; + pid_t pid; rcu_read_lock(); for_each_process_thread(p, t) { - if (is_closid_match(t, r) || is_rmid_match(t, r)) - seq_printf(s, "%d\n", t->pid); + if (is_closid_match(t, r) || is_rmid_match(t, r)) { + pid = task_pid_vnr(t); + if (pid) + seq_printf(s, "%d\n", pid); + } } rcu_read_unlock(); } @@ -2301,6 +2305,26 @@ static struct rdtgroup *kernfs_to_rdtgroup(struct kernfs_node *kn) } } +static void rdtgroup_kn_get(struct rdtgroup *rdtgrp, struct kernfs_node *kn) +{ + atomic_inc(&rdtgrp->waitcount); + kernfs_break_active_protection(kn); +} + +static void rdtgroup_kn_put(struct rdtgroup *rdtgrp, struct kernfs_node *kn) +{ + if (atomic_dec_and_test(&rdtgrp->waitcount) && + (rdtgrp->flags & RDT_DELETED)) { + if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP || + rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED) + rdtgroup_pseudo_lock_remove(rdtgrp); + kernfs_unbreak_active_protection(kn); + rdtgroup_remove(rdtgrp); + } else { + kernfs_unbreak_active_protection(kn); + } +} + struct rdtgroup *rdtgroup_kn_lock_live(struct kernfs_node *kn) { struct rdtgroup *rdtgrp = kernfs_to_rdtgroup(kn); @@ -2308,8 +2332,7 @@ struct rdtgroup *rdtgroup_kn_lock_live(struct kernfs_node *kn) if (!rdtgrp) return NULL; - atomic_inc(&rdtgrp->waitcount); - kernfs_break_active_protection(kn); + rdtgroup_kn_get(rdtgrp, kn); mutex_lock(&rdtgroup_mutex); @@ -2328,17 +2351,7 @@ void rdtgroup_kn_unlock(struct kernfs_node *kn) return; mutex_unlock(&rdtgroup_mutex); - - if (atomic_dec_and_test(&rdtgrp->waitcount) && - (rdtgrp->flags & RDT_DELETED)) { - if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP || - rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED) - rdtgroup_pseudo_lock_remove(rdtgrp); - kernfs_unbreak_active_protection(kn); - rdtgroup_remove(rdtgrp); - } else { - kernfs_unbreak_active_protection(kn); - } + rdtgroup_kn_put(rdtgrp, kn); } static int mkdir_mondata_all(struct kernfs_node *parent_kn, @@ -3505,6 +3518,133 @@ out: return ret; } +/** + * mongrp_reparent() - replace parent CTRL_MON group of a MON group + * @rdtgrp: the MON group whose parent should be replaced + * @new_prdtgrp: replacement parent CTRL_MON group for @rdtgrp + * @cpus: cpumask provided by the caller for use during this call + * + * Replaces the parent CTRL_MON group for a MON group, resulting in all member + * tasks' CLOSID immediately changing to that of the new parent group. + * Monitoring data for the group is unaffected by this operation. + */ +static void mongrp_reparent(struct rdtgroup *rdtgrp, + struct rdtgroup *new_prdtgrp, + cpumask_var_t cpus) +{ + struct rdtgroup *prdtgrp = rdtgrp->mon.parent; + + WARN_ON(rdtgrp->type != RDTMON_GROUP); + WARN_ON(new_prdtgrp->type != RDTCTRL_GROUP); + + /* Nothing to do when simply renaming a MON group. */ + if (prdtgrp == new_prdtgrp) + return; + + WARN_ON(list_empty(&prdtgrp->mon.crdtgrp_list)); + list_move_tail(&rdtgrp->mon.crdtgrp_list, + &new_prdtgrp->mon.crdtgrp_list); + + rdtgrp->mon.parent = new_prdtgrp; + rdtgrp->closid = new_prdtgrp->closid; + + /* Propagate updated closid to all tasks in this group. */ + rdt_move_group_tasks(rdtgrp, rdtgrp, cpus); + + update_closid_rmid(cpus, NULL); +} + +static int rdtgroup_rename(struct kernfs_node *kn, + struct kernfs_node *new_parent, const char *new_name) +{ + struct rdtgroup *new_prdtgrp; + struct rdtgroup *rdtgrp; + cpumask_var_t tmpmask; + int ret; + + rdtgrp = kernfs_to_rdtgroup(kn); + new_prdtgrp = kernfs_to_rdtgroup(new_parent); + if (!rdtgrp || !new_prdtgrp) + return -ENOENT; + + /* Release both kernfs active_refs before obtaining rdtgroup mutex. */ + rdtgroup_kn_get(rdtgrp, kn); + rdtgroup_kn_get(new_prdtgrp, new_parent); + + mutex_lock(&rdtgroup_mutex); + + rdt_last_cmd_clear(); + + /* + * Don't allow kernfs_to_rdtgroup() to return a parent rdtgroup if + * either kernfs_node is a file. + */ + if (kernfs_type(kn) != KERNFS_DIR || + kernfs_type(new_parent) != KERNFS_DIR) { + rdt_last_cmd_puts("Source and destination must be directories"); + ret = -EPERM; + goto out; + } + + if ((rdtgrp->flags & RDT_DELETED) || (new_prdtgrp->flags & RDT_DELETED)) { + ret = -ENOENT; + goto out; + } + + if (rdtgrp->type != RDTMON_GROUP || !kn->parent || + !is_mon_groups(kn->parent, kn->name)) { + rdt_last_cmd_puts("Source must be a MON group\n"); + ret = -EPERM; + goto out; + } + + if (!is_mon_groups(new_parent, new_name)) { + rdt_last_cmd_puts("Destination must be a mon_groups subdirectory\n"); + ret = -EPERM; + goto out; + } + + /* + * If the MON group is monitoring CPUs, the CPUs must be assigned to the + * current parent CTRL_MON group and therefore cannot be assigned to + * the new parent, making the move illegal. + */ + if (!cpumask_empty(&rdtgrp->cpu_mask) && + rdtgrp->mon.parent != new_prdtgrp) { + rdt_last_cmd_puts("Cannot move a MON group that monitors CPUs\n"); + ret = -EPERM; + goto out; + } + + /* + * Allocate the cpumask for use in mongrp_reparent() to avoid the + * possibility of failing to allocate it after kernfs_rename() has + * succeeded. + */ + if (!zalloc_cpumask_var(&tmpmask, GFP_KERNEL)) { + ret = -ENOMEM; + goto out; + } + + /* + * Perform all input validation and allocations needed to ensure + * mongrp_reparent() will succeed before calling kernfs_rename(), + * otherwise it would be necessary to revert this call if + * mongrp_reparent() failed. + */ + ret = kernfs_rename(kn, new_parent, new_name); + if (!ret) + mongrp_reparent(rdtgrp, new_prdtgrp, tmpmask); + + free_cpumask_var(tmpmask); + +out: + mutex_unlock(&rdtgroup_mutex); + rdtgroup_kn_put(rdtgrp, kn); + rdtgroup_kn_put(new_prdtgrp, new_parent); + return ret; +} + static int rdtgroup_show_options(struct seq_file *seq, struct kernfs_root *kf) { if (resctrl_arch_get_cdp_enabled(RDT_RESOURCE_L3)) @@ -3522,6 +3662,7 @@ static int rdtgroup_show_options(struct seq_file *seq, struct kernfs_root *kf) static struct kernfs_syscall_ops rdtgroup_kf_syscall_ops = { .mkdir = rdtgroup_mkdir, .rmdir = rdtgroup_rmdir, + .rename = rdtgroup_rename, .show_options = rdtgroup_show_options, }; diff --git a/arch/x86/kernel/cpu/sgx/encl.c b/arch/x86/kernel/cpu/sgx/encl.c index 2a0e90fe2abc..91fa70e51004 100644 --- a/arch/x86/kernel/cpu/sgx/encl.c +++ b/arch/x86/kernel/cpu/sgx/encl.c @@ -755,6 +755,7 @@ static void sgx_mmu_notifier_release(struct mmu_notifier *mn, { struct sgx_encl_mm *encl_mm = container_of(mn, struct sgx_encl_mm, mmu_notifier); struct sgx_encl_mm *tmp = NULL; + bool found = false; /* * The enclave itself can remove encl_mm. Note, objects can't be moved @@ -764,12 +765,13 @@ static void sgx_mmu_notifier_release(struct mmu_notifier *mn, list_for_each_entry(tmp, &encl_mm->encl->mm_list, list) { if (tmp == encl_mm) { list_del_rcu(&encl_mm->list); + found = true; break; } } spin_unlock(&encl_mm->encl->mm_lock); - if (tmp == encl_mm) { + if (found) { synchronize_srcu(&encl_mm->encl->srcu); mmu_notifier_put(mn); } diff --git a/arch/x86/kernel/cpu/sgx/ioctl.c b/arch/x86/kernel/cpu/sgx/ioctl.c index 21ca0a831b70..5d390df21440 100644 --- a/arch/x86/kernel/cpu/sgx/ioctl.c +++ b/arch/x86/kernel/cpu/sgx/ioctl.c @@ -214,7 +214,7 @@ static int __sgx_encl_add_page(struct sgx_encl *encl, if (!(vma->vm_flags & VM_MAYEXEC)) return -EACCES; - ret = get_user_pages(src, 1, 0, &src_page, NULL); + ret = get_user_pages(src, 1, 0, &src_page); if (ret < 1) return -EFAULT; diff --git a/arch/x86/kernel/doublefault_32.c b/arch/x86/kernel/doublefault_32.c index 3b58d8703094..6eaf9a6bc02f 100644 --- a/arch/x86/kernel/doublefault_32.c +++ b/arch/x86/kernel/doublefault_32.c @@ -9,6 +9,7 @@ #include <asm/processor.h> #include <asm/desc.h> #include <asm/traps.h> +#include <asm/doublefault.h> #define ptr_ok(x) ((x) > PAGE_OFFSET && (x) < PAGE_OFFSET + MAXMEM) diff --git a/arch/x86/kernel/fpu/init.c b/arch/x86/kernel/fpu/init.c index 851eb13edc01..998a08f17e33 100644 --- a/arch/x86/kernel/fpu/init.c +++ b/arch/x86/kernel/fpu/init.c @@ -53,7 +53,7 @@ void fpu__init_cpu(void) fpu__init_cpu_xstate(); } -static bool fpu__probe_without_cpuid(void) +static bool __init fpu__probe_without_cpuid(void) { unsigned long cr0; u16 fsw, fcw; @@ -71,7 +71,7 @@ static bool fpu__probe_without_cpuid(void) return fsw == 0 && (fcw & 0x103f) == 0x003f; } -static void fpu__init_system_early_generic(struct cpuinfo_x86 *c) +static void __init fpu__init_system_early_generic(void) { if (!boot_cpu_has(X86_FEATURE_CPUID) && !test_bit(X86_FEATURE_FPU, (unsigned long *)cpu_caps_cleared)) { @@ -211,10 +211,10 @@ static void __init fpu__init_system_xstate_size_legacy(void) * Called on the boot CPU once per system bootup, to set up the initial * FPU state that is later cloned into all processes: */ -void __init fpu__init_system(struct cpuinfo_x86 *c) +void __init fpu__init_system(void) { fpstate_reset(¤t->thread.fpu); - fpu__init_system_early_generic(c); + fpu__init_system_early_generic(); /* * The FPU has to be operational for some of the diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index 5e7ead52cfdb..01e8f34daf22 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -525,9 +525,6 @@ static void *addr_from_call(void *ptr) return ptr + CALL_INSN_SIZE + call.disp; } -void prepare_ftrace_return(unsigned long ip, unsigned long *parent, - unsigned long frame_pointer); - /* * If the ops->trampoline was not allocated, then it probably * has a static trampoline func, or is the ftrace caller itself. diff --git a/arch/x86/kernel/head32.c b/arch/x86/kernel/head32.c index 10c27b4261eb..246a609f889b 100644 --- a/arch/x86/kernel/head32.c +++ b/arch/x86/kernel/head32.c @@ -69,6 +69,7 @@ asmlinkage __visible void __init __noreturn i386_start_kernel(void) * to the first kernel PMD. Note the upper half of each PMD or PTE are * always zero at this stage. */ +void __init mk_early_pgtbl_32(void); void __init mk_early_pgtbl_32(void) { #ifdef __pa diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S index 67c8ed99144b..c9318993f959 100644 --- a/arch/x86/kernel/head_32.S +++ b/arch/x86/kernel/head_32.S @@ -138,20 +138,6 @@ SYM_CODE_START(startup_32) jmp .Ldefault_entry SYM_CODE_END(startup_32) -#ifdef CONFIG_HOTPLUG_CPU -/* - * Boot CPU0 entry point. It's called from play_dead(). Everything has been set - * up already except stack. We just set up stack here. Then call - * start_secondary(). - */ -SYM_FUNC_START(start_cpu0) - movl initial_stack, %ecx - movl %ecx, %esp - call *(initial_code) -1: jmp 1b -SYM_FUNC_END(start_cpu0) -#endif - /* * Non-boot CPU entry point; entered from trampoline.S * We can't lgdt here, because lgdt itself uses a data segment, but diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index 113c13376e51..c5b9289837dc 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S @@ -24,7 +24,9 @@ #include "../entry/calling.h" #include <asm/export.h> #include <asm/nospec-branch.h> +#include <asm/apicdef.h> #include <asm/fixmap.h> +#include <asm/smp.h> /* * We are not able to switch in one step to the final KERNEL ADDRESS SPACE @@ -234,8 +236,67 @@ SYM_INNER_LABEL(secondary_startup_64_no_verify, SYM_L_GLOBAL) ANNOTATE_NOENDBR // above #ifdef CONFIG_SMP + /* + * For parallel boot, the APIC ID is read from the APIC, and then + * used to look up the CPU number. For booting a single CPU, the + * CPU number is encoded in smpboot_control. + * + * Bit 31 STARTUP_READ_APICID (Read APICID from APIC) + * Bit 0-23 CPU# if STARTUP_xx flags are not set + */ movl smpboot_control(%rip), %ecx + testl $STARTUP_READ_APICID, %ecx + jnz .Lread_apicid + /* + * No control bit set, single CPU bringup. CPU number is provided + * in bit 0-23. This is also the boot CPU case (CPU number 0). + */ + andl $(~STARTUP_PARALLEL_MASK), %ecx + jmp .Lsetup_cpu +.Lread_apicid: + /* Check whether X2APIC mode is already enabled */ + mov $MSR_IA32_APICBASE, %ecx + rdmsr + testl $X2APIC_ENABLE, %eax + jnz .Lread_apicid_msr + + /* Read the APIC ID from the fix-mapped MMIO space. */ + movq apic_mmio_base(%rip), %rcx + addq $APIC_ID, %rcx + movl (%rcx), %eax + shr $24, %eax + jmp .Llookup_AP + +.Lread_apicid_msr: + mov $APIC_X2APIC_ID_MSR, %ecx + rdmsr + +.Llookup_AP: + /* EAX contains the APIC ID of the current CPU */ + xorq %rcx, %rcx + leaq cpuid_to_apicid(%rip), %rbx + +.Lfind_cpunr: + cmpl (%rbx,%rcx,4), %eax + jz .Lsetup_cpu + inc %ecx +#ifdef CONFIG_FORCE_NR_CPUS + cmpl $NR_CPUS, %ecx +#else + cmpl nr_cpu_ids(%rip), %ecx +#endif + jb .Lfind_cpunr + + /* APIC ID not found in the table. Drop the trampoline lock and bail. */ + movq trampoline_lock(%rip), %rax + movl $0, (%rax) + +1: cli + hlt + jmp 1b + +.Lsetup_cpu: /* Get the per cpu offset for the given CPU# which is in ECX */ movq __per_cpu_offset(,%rcx,8), %rdx #else @@ -252,6 +313,16 @@ SYM_INNER_LABEL(secondary_startup_64_no_verify, SYM_L_GLOBAL) movq TASK_threadsp(%rax), %rsp /* + * Now that this CPU is running on its own stack, drop the realmode + * protection. For the boot CPU the pointer is NULL! + */ + movq trampoline_lock(%rip), %rax + testq %rax, %rax + jz .Lsetup_gdt + movl $0, (%rax) + +.Lsetup_gdt: + /* * We must switch to a new descriptor in kernel space for the GDT * because soon the kernel won't have access anymore to the userspace * addresses where we're currently running on. We have to do that here @@ -375,13 +446,13 @@ SYM_CODE_END(secondary_startup_64) #include "verify_cpu.S" #include "sev_verify_cbit.S" -#ifdef CONFIG_HOTPLUG_CPU +#if defined(CONFIG_HOTPLUG_CPU) && defined(CONFIG_AMD_MEM_ENCRYPT) /* - * Boot CPU0 entry point. It's called from play_dead(). Everything has been set - * up already except stack. We just set up stack here. Then call - * start_secondary() via .Ljump_to_C_code. + * Entry point for soft restart of a CPU. Invoked from xxx_play_dead() for + * restarting the boot CPU or for restarting SEV guest CPUs after CPU hot + * unplug. Everything is set up already except the stack. */ -SYM_CODE_START(start_cpu0) +SYM_CODE_START(soft_restart_cpu) ANNOTATE_NOENDBR UNWIND_HINT_END_OF_STACK @@ -390,7 +461,7 @@ SYM_CODE_START(start_cpu0) movq TASK_threadsp(%rcx), %rsp jmp .Ljump_to_C_code -SYM_CODE_END(start_cpu0) +SYM_CODE_END(soft_restart_cpu) #endif #ifdef CONFIG_AMD_MEM_ENCRYPT @@ -433,6 +504,8 @@ SYM_DATA(initial_code, .quad x86_64_start_kernel) #ifdef CONFIG_AMD_MEM_ENCRYPT SYM_DATA(initial_vc_handler, .quad handle_vc_boot_ghcb) #endif + +SYM_DATA(trampoline_lock, .quad 0); __FINITDATA __INIT diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index 766ffe3ba313..9f668d2f3d11 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c @@ -211,6 +211,13 @@ u64 arch_irq_stat_cpu(unsigned int cpu) #ifdef CONFIG_X86_MCE_THRESHOLD sum += irq_stats(cpu)->irq_threshold_count; #endif +#ifdef CONFIG_X86_HV_CALLBACK_VECTOR + sum += irq_stats(cpu)->irq_hv_callback_count; +#endif +#if IS_ENABLED(CONFIG_HYPERV) + sum += irq_stats(cpu)->irq_hv_reenlightenment_count; + sum += irq_stats(cpu)->hyperv_stimer0_count; +#endif #ifdef CONFIG_X86_MCE sum += per_cpu(mce_exception_count, cpu); sum += per_cpu(mce_poll_count, cpu); diff --git a/arch/x86/kernel/itmt.c b/arch/x86/kernel/itmt.c index 670eb08b972a..ee4fe8cdb857 100644 --- a/arch/x86/kernel/itmt.c +++ b/arch/x86/kernel/itmt.c @@ -165,32 +165,19 @@ int arch_asym_cpu_priority(int cpu) /** * sched_set_itmt_core_prio() - Set CPU priority based on ITMT - * @prio: Priority of cpu core - * @core_cpu: The cpu number associated with the core + * @prio: Priority of @cpu + * @cpu: The CPU number * * The pstate driver will find out the max boost frequency * and call this function to set a priority proportional - * to the max boost frequency. CPU with higher boost + * to the max boost frequency. CPUs with higher boost * frequency will receive higher priority. * * No need to rebuild sched domain after updating * the CPU priorities. The sched domains have no * dependency on CPU priorities. */ -void sched_set_itmt_core_prio(int prio, int core_cpu) +void sched_set_itmt_core_prio(int prio, int cpu) { - int cpu, i = 1; - - for_each_cpu(cpu, topology_sibling_cpumask(core_cpu)) { - int smt_prio; - - /* - * Ensure that the siblings are moved to the end - * of the priority chain and only used when - * all other high priority cpus are out of capacity. - */ - smt_prio = prio * smp_num_siblings / (i * i); - per_cpu(sched_core_priority, cpu) = smt_prio; - i++; - } + per_cpu(sched_core_priority, cpu) = prio; } diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c index 0f35d44c56fe..fb8f52149be9 100644 --- a/arch/x86/kernel/kvmclock.c +++ b/arch/x86/kernel/kvmclock.c @@ -71,7 +71,7 @@ static int kvm_set_wallclock(const struct timespec64 *now) return -ENODEV; } -static noinstr u64 kvm_clock_read(void) +static u64 kvm_clock_read(void) { u64 ret; @@ -88,7 +88,7 @@ static u64 kvm_clock_get_cycles(struct clocksource *cs) static noinstr u64 kvm_sched_clock_read(void) { - return kvm_clock_read() - kvm_sched_clock_offset; + return pvclock_clocksource_read_nowd(this_cpu_pvti()) - kvm_sched_clock_offset; } static inline void kvm_sched_clock_init(bool stable) diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c index 525876e7b9f4..adc67f98819a 100644 --- a/arch/x86/kernel/ldt.c +++ b/arch/x86/kernel/ldt.c @@ -367,8 +367,10 @@ static void unmap_ldt_struct(struct mm_struct *mm, struct ldt_struct *ldt) va = (unsigned long)ldt_slot_va(ldt->slot) + offset; ptep = get_locked_pte(mm, va, &ptl); - pte_clear(mm, va, ptep); - pte_unmap_unlock(ptep, ptl); + if (!WARN_ON_ONCE(!ptep)) { + pte_clear(mm, va, ptep); + pte_unmap_unlock(ptep, ptl); + } } va = (unsigned long)ldt_slot_va(ldt->slot); diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c index 776f4b1e395b..a0c551846b35 100644 --- a/arch/x86/kernel/nmi.c +++ b/arch/x86/kernel/nmi.c @@ -496,7 +496,7 @@ DEFINE_IDTENTRY_RAW(exc_nmi) */ sev_es_nmi_complete(); if (IS_ENABLED(CONFIG_NMI_CHECK_CPU)) - arch_atomic_long_inc(&nsp->idt_calls); + raw_atomic_long_inc(&nsp->idt_calls); if (IS_ENABLED(CONFIG_SMP) && arch_cpu_is_offline(smp_processor_id())) return; diff --git a/arch/x86/kernel/platform-quirks.c b/arch/x86/kernel/platform-quirks.c index b348a672f71d..b525fe6d6657 100644 --- a/arch/x86/kernel/platform-quirks.c +++ b/arch/x86/kernel/platform-quirks.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 #include <linux/kernel.h> #include <linux/init.h> +#include <linux/pnp.h> #include <asm/setup.h> #include <asm/bios_ebda.h> diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index dac41a0072ea..ff9b80a0e3e3 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -759,15 +759,26 @@ bool xen_set_default_idle(void) } #endif +struct cpumask cpus_stop_mask; + void __noreturn stop_this_cpu(void *dummy) { + struct cpuinfo_x86 *c = this_cpu_ptr(&cpu_info); + unsigned int cpu = smp_processor_id(); + local_irq_disable(); + /* - * Remove this CPU: + * Remove this CPU from the online mask and disable it + * unconditionally. This might be redundant in case that the reboot + * vector was handled late and stop_other_cpus() sent an NMI. + * + * According to SDM and APM NMIs can be accepted even after soft + * disabling the local APIC. */ - set_cpu_online(smp_processor_id(), false); + set_cpu_online(cpu, false); disable_local_APIC(); - mcheck_cpu_clear(this_cpu_ptr(&cpu_info)); + mcheck_cpu_clear(c); /* * Use wbinvd on processors that support SME. This provides support @@ -781,8 +792,17 @@ void __noreturn stop_this_cpu(void *dummy) * Test the CPUID bit directly because the machine might've cleared * X86_FEATURE_SME due to cmdline options. */ - if (cpuid_eax(0x8000001f) & BIT(0)) + if (c->extended_cpuid_level >= 0x8000001f && (cpuid_eax(0x8000001f) & BIT(0))) native_wbinvd(); + + /* + * This brings a cache line back and dirties it, but + * native_stop_other_cpus() will overwrite cpus_stop_mask after it + * observed that all CPUs reported stop. This write will invalidate + * the related cache line on this CPU. + */ + cpumask_clear_cpu(cpu, &cpus_stop_mask); + for (;;) { /* * Use native_halt() so that memory contents don't change diff --git a/arch/x86/kernel/pvclock.c b/arch/x86/kernel/pvclock.c index 56acf53a782a..b3f81379c2fc 100644 --- a/arch/x86/kernel/pvclock.c +++ b/arch/x86/kernel/pvclock.c @@ -101,11 +101,11 @@ u64 __pvclock_clocksource_read(struct pvclock_vcpu_time_info *src, bool dowd) * updating at the same time, and one of them could be slightly behind, * making the assumption that last_value always go forward fail to hold. */ - last = arch_atomic64_read(&last_value); + last = raw_atomic64_read(&last_value); do { if (ret <= last) return last; - } while (!arch_atomic64_try_cmpxchg(&last_value, &last, ret)); + } while (!raw_atomic64_try_cmpxchg(&last_value, &last, ret)); return ret; } diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 16babff771bd..fd975a4a5200 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -796,7 +796,6 @@ static void __init early_reserve_memory(void) memblock_x86_reserve_range_setup_data(); - reserve_ibft_region(); reserve_bios_regions(); trim_snb_memory(); } @@ -1032,11 +1031,14 @@ void __init setup_arch(char **cmdline_p) if (efi_enabled(EFI_BOOT)) efi_init(); + reserve_ibft_region(); dmi_setup(); /* * VMware detection requires dmi to be available, so this * needs to be done after dmi_setup(), for the boot CPU. + * For some guest types (Xen PV, SEV-SNP, TDX) it is required to be + * called before cache_bp_init() for setting up MTRR state. */ init_hypervisor_platform(); diff --git a/arch/x86/kernel/sev-shared.c b/arch/x86/kernel/sev-shared.c index 3a5b0c9c4fcc..2eabccde94fb 100644 --- a/arch/x86/kernel/sev-shared.c +++ b/arch/x86/kernel/sev-shared.c @@ -12,6 +12,9 @@ #ifndef __BOOT_COMPRESSED #define error(v) pr_err(v) #define has_cpuflag(f) boot_cpu_has(f) +#else +#undef WARN +#define WARN(condition, format...) (!!(condition)) #endif /* I/O parameters for CPUID-related helpers */ @@ -991,3 +994,103 @@ static void __init setup_cpuid_table(const struct cc_blob_sev_info *cc_info) cpuid_ext_range_max = fn->eax; } } + +static void pvalidate_pages(struct snp_psc_desc *desc) +{ + struct psc_entry *e; + unsigned long vaddr; + unsigned int size; + unsigned int i; + bool validate; + int rc; + + for (i = 0; i <= desc->hdr.end_entry; i++) { + e = &desc->entries[i]; + + vaddr = (unsigned long)pfn_to_kaddr(e->gfn); + size = e->pagesize ? RMP_PG_SIZE_2M : RMP_PG_SIZE_4K; + validate = e->operation == SNP_PAGE_STATE_PRIVATE; + + rc = pvalidate(vaddr, size, validate); + if (rc == PVALIDATE_FAIL_SIZEMISMATCH && size == RMP_PG_SIZE_2M) { + unsigned long vaddr_end = vaddr + PMD_SIZE; + + for (; vaddr < vaddr_end; vaddr += PAGE_SIZE) { + rc = pvalidate(vaddr, RMP_PG_SIZE_4K, validate); + if (rc) + break; + } + } + + if (rc) { + WARN(1, "Failed to validate address 0x%lx ret %d", vaddr, rc); + sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_PVALIDATE); + } + } +} + +static int vmgexit_psc(struct ghcb *ghcb, struct snp_psc_desc *desc) +{ + int cur_entry, end_entry, ret = 0; + struct snp_psc_desc *data; + struct es_em_ctxt ctxt; + + vc_ghcb_invalidate(ghcb); + + /* Copy the input desc into GHCB shared buffer */ + data = (struct snp_psc_desc *)ghcb->shared_buffer; + memcpy(ghcb->shared_buffer, desc, min_t(int, GHCB_SHARED_BUF_SIZE, sizeof(*desc))); + + /* + * As per the GHCB specification, the hypervisor can resume the guest + * before processing all the entries. Check whether all the entries + * are processed. If not, then keep retrying. Note, the hypervisor + * will update the data memory directly to indicate the status, so + * reference the data->hdr everywhere. + * + * The strategy here is to wait for the hypervisor to change the page + * state in the RMP table before guest accesses the memory pages. If the + * page state change was not successful, then later memory access will + * result in a crash. + */ + cur_entry = data->hdr.cur_entry; + end_entry = data->hdr.end_entry; + + while (data->hdr.cur_entry <= data->hdr.end_entry) { + ghcb_set_sw_scratch(ghcb, (u64)__pa(data)); + + /* This will advance the shared buffer data points to. */ + ret = sev_es_ghcb_hv_call(ghcb, &ctxt, SVM_VMGEXIT_PSC, 0, 0); + + /* + * Page State Change VMGEXIT can pass error code through + * exit_info_2. + */ + if (WARN(ret || ghcb->save.sw_exit_info_2, + "SNP: PSC failed ret=%d exit_info_2=%llx\n", + ret, ghcb->save.sw_exit_info_2)) { + ret = 1; + goto out; + } + + /* Verify that reserved bit is not set */ + if (WARN(data->hdr.reserved, "Reserved bit is set in the PSC header\n")) { + ret = 1; + goto out; + } + + /* + * Sanity check that entry processing is not going backwards. + * This will happen only if hypervisor is tricking us. + */ + if (WARN(data->hdr.end_entry > end_entry || cur_entry > data->hdr.cur_entry, +"SNP: PSC processing going backward, end_entry %d (got %d) cur_entry %d (got %d)\n", + end_entry, data->hdr.end_entry, cur_entry, data->hdr.cur_entry)) { + ret = 1; + goto out; + } + } + +out: + return ret; +} diff --git a/arch/x86/kernel/sev.c b/arch/x86/kernel/sev.c index b031244d6d2d..1ee7bed453de 100644 --- a/arch/x86/kernel/sev.c +++ b/arch/x86/kernel/sev.c @@ -113,13 +113,23 @@ struct ghcb_state { }; static DEFINE_PER_CPU(struct sev_es_runtime_data*, runtime_data); -DEFINE_STATIC_KEY_FALSE(sev_es_enable_key); - static DEFINE_PER_CPU(struct sev_es_save_area *, sev_vmsa); struct sev_config { __u64 debug : 1, - __reserved : 63; + + /* + * A flag used by __set_pages_state() that indicates when the + * per-CPU GHCB has been created and registered and thus can be + * used by the BSP instead of the early boot GHCB. + * + * For APs, the per-CPU GHCB is created before they are started + * and registered upon startup, so this flag can be used globally + * for the BSP and APs. + */ + ghcbs_initialized : 1, + + __reserved : 62; }; static struct sev_config sev_cfg __read_mostly; @@ -645,32 +655,26 @@ static u64 __init get_jump_table_addr(void) return ret; } -static void pvalidate_pages(unsigned long vaddr, unsigned int npages, bool validate) -{ - unsigned long vaddr_end; - int rc; - - vaddr = vaddr & PAGE_MASK; - vaddr_end = vaddr + (npages << PAGE_SHIFT); - - while (vaddr < vaddr_end) { - rc = pvalidate(vaddr, RMP_PG_SIZE_4K, validate); - if (WARN(rc, "Failed to validate address 0x%lx ret %d", vaddr, rc)) - sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_PVALIDATE); - - vaddr = vaddr + PAGE_SIZE; - } -} - -static void __init early_set_pages_state(unsigned long paddr, unsigned int npages, enum psc_op op) +static void early_set_pages_state(unsigned long vaddr, unsigned long paddr, + unsigned long npages, enum psc_op op) { unsigned long paddr_end; u64 val; + int ret; + + vaddr = vaddr & PAGE_MASK; paddr = paddr & PAGE_MASK; paddr_end = paddr + (npages << PAGE_SHIFT); while (paddr < paddr_end) { + if (op == SNP_PAGE_STATE_SHARED) { + /* Page validation must be rescinded before changing to shared */ + ret = pvalidate(vaddr, RMP_PG_SIZE_4K, false); + if (WARN(ret, "Failed to validate address 0x%lx ret %d", paddr, ret)) + goto e_term; + } + /* * Use the MSR protocol because this function can be called before * the GHCB is established. @@ -691,7 +695,15 @@ static void __init early_set_pages_state(unsigned long paddr, unsigned int npage paddr, GHCB_MSR_PSC_RESP_VAL(val))) goto e_term; - paddr = paddr + PAGE_SIZE; + if (op == SNP_PAGE_STATE_PRIVATE) { + /* Page validation must be performed after changing to private */ + ret = pvalidate(vaddr, RMP_PG_SIZE_4K, true); + if (WARN(ret, "Failed to validate address 0x%lx ret %d", paddr, ret)) + goto e_term; + } + + vaddr += PAGE_SIZE; + paddr += PAGE_SIZE; } return; @@ -701,7 +713,7 @@ e_term: } void __init early_snp_set_memory_private(unsigned long vaddr, unsigned long paddr, - unsigned int npages) + unsigned long npages) { /* * This can be invoked in early boot while running identity mapped, so @@ -716,14 +728,11 @@ void __init early_snp_set_memory_private(unsigned long vaddr, unsigned long padd * Ask the hypervisor to mark the memory pages as private in the RMP * table. */ - early_set_pages_state(paddr, npages, SNP_PAGE_STATE_PRIVATE); - - /* Validate the memory pages after they've been added in the RMP table. */ - pvalidate_pages(vaddr, npages, true); + early_set_pages_state(vaddr, paddr, npages, SNP_PAGE_STATE_PRIVATE); } void __init early_snp_set_memory_shared(unsigned long vaddr, unsigned long paddr, - unsigned int npages) + unsigned long npages) { /* * This can be invoked in early boot while running identity mapped, so @@ -734,11 +743,8 @@ void __init early_snp_set_memory_shared(unsigned long vaddr, unsigned long paddr if (!(sev_status & MSR_AMD64_SEV_SNP_ENABLED)) return; - /* Invalidate the memory pages before they are marked shared in the RMP table. */ - pvalidate_pages(vaddr, npages, false); - /* Ask hypervisor to mark the memory pages shared in the RMP table. */ - early_set_pages_state(paddr, npages, SNP_PAGE_STATE_SHARED); + early_set_pages_state(vaddr, paddr, npages, SNP_PAGE_STATE_SHARED); } void __init snp_prep_memory(unsigned long paddr, unsigned int sz, enum psc_op op) @@ -756,96 +762,16 @@ void __init snp_prep_memory(unsigned long paddr, unsigned int sz, enum psc_op op WARN(1, "invalid memory op %d\n", op); } -static int vmgexit_psc(struct snp_psc_desc *desc) +static unsigned long __set_pages_state(struct snp_psc_desc *data, unsigned long vaddr, + unsigned long vaddr_end, int op) { - int cur_entry, end_entry, ret = 0; - struct snp_psc_desc *data; struct ghcb_state state; - struct es_em_ctxt ctxt; - unsigned long flags; - struct ghcb *ghcb; - - /* - * __sev_get_ghcb() needs to run with IRQs disabled because it is using - * a per-CPU GHCB. - */ - local_irq_save(flags); - - ghcb = __sev_get_ghcb(&state); - if (!ghcb) { - ret = 1; - goto out_unlock; - } - - /* Copy the input desc into GHCB shared buffer */ - data = (struct snp_psc_desc *)ghcb->shared_buffer; - memcpy(ghcb->shared_buffer, desc, min_t(int, GHCB_SHARED_BUF_SIZE, sizeof(*desc))); - - /* - * As per the GHCB specification, the hypervisor can resume the guest - * before processing all the entries. Check whether all the entries - * are processed. If not, then keep retrying. Note, the hypervisor - * will update the data memory directly to indicate the status, so - * reference the data->hdr everywhere. - * - * The strategy here is to wait for the hypervisor to change the page - * state in the RMP table before guest accesses the memory pages. If the - * page state change was not successful, then later memory access will - * result in a crash. - */ - cur_entry = data->hdr.cur_entry; - end_entry = data->hdr.end_entry; - - while (data->hdr.cur_entry <= data->hdr.end_entry) { - ghcb_set_sw_scratch(ghcb, (u64)__pa(data)); - - /* This will advance the shared buffer data points to. */ - ret = sev_es_ghcb_hv_call(ghcb, &ctxt, SVM_VMGEXIT_PSC, 0, 0); - - /* - * Page State Change VMGEXIT can pass error code through - * exit_info_2. - */ - if (WARN(ret || ghcb->save.sw_exit_info_2, - "SNP: PSC failed ret=%d exit_info_2=%llx\n", - ret, ghcb->save.sw_exit_info_2)) { - ret = 1; - goto out; - } - - /* Verify that reserved bit is not set */ - if (WARN(data->hdr.reserved, "Reserved bit is set in the PSC header\n")) { - ret = 1; - goto out; - } - - /* - * Sanity check that entry processing is not going backwards. - * This will happen only if hypervisor is tricking us. - */ - if (WARN(data->hdr.end_entry > end_entry || cur_entry > data->hdr.cur_entry, -"SNP: PSC processing going backward, end_entry %d (got %d) cur_entry %d (got %d)\n", - end_entry, data->hdr.end_entry, cur_entry, data->hdr.cur_entry)) { - ret = 1; - goto out; - } - } - -out: - __sev_put_ghcb(&state); - -out_unlock: - local_irq_restore(flags); - - return ret; -} - -static void __set_pages_state(struct snp_psc_desc *data, unsigned long vaddr, - unsigned long vaddr_end, int op) -{ + bool use_large_entry; struct psc_hdr *hdr; struct psc_entry *e; + unsigned long flags; unsigned long pfn; + struct ghcb *ghcb; int i; hdr = &data->hdr; @@ -854,74 +780,104 @@ static void __set_pages_state(struct snp_psc_desc *data, unsigned long vaddr, memset(data, 0, sizeof(*data)); i = 0; - while (vaddr < vaddr_end) { - if (is_vmalloc_addr((void *)vaddr)) + while (vaddr < vaddr_end && i < ARRAY_SIZE(data->entries)) { + hdr->end_entry = i; + + if (is_vmalloc_addr((void *)vaddr)) { pfn = vmalloc_to_pfn((void *)vaddr); - else + use_large_entry = false; + } else { pfn = __pa(vaddr) >> PAGE_SHIFT; + use_large_entry = true; + } e->gfn = pfn; e->operation = op; - hdr->end_entry = i; - /* - * Current SNP implementation doesn't keep track of the RMP page - * size so use 4K for simplicity. - */ - e->pagesize = RMP_PG_SIZE_4K; + if (use_large_entry && IS_ALIGNED(vaddr, PMD_SIZE) && + (vaddr_end - vaddr) >= PMD_SIZE) { + e->pagesize = RMP_PG_SIZE_2M; + vaddr += PMD_SIZE; + } else { + e->pagesize = RMP_PG_SIZE_4K; + vaddr += PAGE_SIZE; + } - vaddr = vaddr + PAGE_SIZE; e++; i++; } - if (vmgexit_psc(data)) + /* Page validation must be rescinded before changing to shared */ + if (op == SNP_PAGE_STATE_SHARED) + pvalidate_pages(data); + + local_irq_save(flags); + + if (sev_cfg.ghcbs_initialized) + ghcb = __sev_get_ghcb(&state); + else + ghcb = boot_ghcb; + + /* Invoke the hypervisor to perform the page state changes */ + if (!ghcb || vmgexit_psc(ghcb, data)) sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_PSC); + + if (sev_cfg.ghcbs_initialized) + __sev_put_ghcb(&state); + + local_irq_restore(flags); + + /* Page validation must be performed after changing to private */ + if (op == SNP_PAGE_STATE_PRIVATE) + pvalidate_pages(data); + + return vaddr; } -static void set_pages_state(unsigned long vaddr, unsigned int npages, int op) +static void set_pages_state(unsigned long vaddr, unsigned long npages, int op) { - unsigned long vaddr_end, next_vaddr; - struct snp_psc_desc *desc; + struct snp_psc_desc desc; + unsigned long vaddr_end; - desc = kmalloc(sizeof(*desc), GFP_KERNEL_ACCOUNT); - if (!desc) - panic("SNP: failed to allocate memory for PSC descriptor\n"); + /* Use the MSR protocol when a GHCB is not available. */ + if (!boot_ghcb) + return early_set_pages_state(vaddr, __pa(vaddr), npages, op); vaddr = vaddr & PAGE_MASK; vaddr_end = vaddr + (npages << PAGE_SHIFT); - while (vaddr < vaddr_end) { - /* Calculate the last vaddr that fits in one struct snp_psc_desc. */ - next_vaddr = min_t(unsigned long, vaddr_end, - (VMGEXIT_PSC_MAX_ENTRY * PAGE_SIZE) + vaddr); - - __set_pages_state(desc, vaddr, next_vaddr, op); - - vaddr = next_vaddr; - } - - kfree(desc); + while (vaddr < vaddr_end) + vaddr = __set_pages_state(&desc, vaddr, vaddr_end, op); } -void snp_set_memory_shared(unsigned long vaddr, unsigned int npages) +void snp_set_memory_shared(unsigned long vaddr, unsigned long npages) { if (!cc_platform_has(CC_ATTR_GUEST_SEV_SNP)) return; - pvalidate_pages(vaddr, npages, false); - set_pages_state(vaddr, npages, SNP_PAGE_STATE_SHARED); } -void snp_set_memory_private(unsigned long vaddr, unsigned int npages) +void snp_set_memory_private(unsigned long vaddr, unsigned long npages) { if (!cc_platform_has(CC_ATTR_GUEST_SEV_SNP)) return; set_pages_state(vaddr, npages, SNP_PAGE_STATE_PRIVATE); +} + +void snp_accept_memory(phys_addr_t start, phys_addr_t end) +{ + unsigned long vaddr; + unsigned int npages; + + if (!cc_platform_has(CC_ATTR_GUEST_SEV_SNP)) + return; + + vaddr = (unsigned long)__va(start); + npages = (end - start) >> PAGE_SHIFT; - pvalidate_pages(vaddr, npages, true); + set_pages_state(vaddr, npages, SNP_PAGE_STATE_PRIVATE); } static int snp_set_vmsa(void *va, bool vmsa) @@ -1267,6 +1223,8 @@ void setup_ghcb(void) if (cc_platform_has(CC_ATTR_GUEST_SEV_SNP)) snp_register_per_cpu_ghcb(); + sev_cfg.ghcbs_initialized = true; + return; } @@ -1328,7 +1286,7 @@ static void sev_es_play_dead(void) * If we get here, the VCPU was woken up again. Jump to CPU * startup code to get it back online. */ - start_cpu0(); + soft_restart_cpu(); } #else /* CONFIG_HOTPLUG_CPU */ #define sev_es_play_dead native_play_dead @@ -1395,9 +1353,6 @@ void __init sev_es_init_vc_handling(void) sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SNP_UNSUPPORTED); } - /* Enable SEV-ES special handling */ - static_branch_enable(&sev_es_enable_key); - /* Initialize per-cpu GHCB pages */ for_each_possible_cpu(cpu) { alloc_runtime_data(cpu); diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c index 004cb30b7419..cfeec3ee877e 100644 --- a/arch/x86/kernel/signal.c +++ b/arch/x86/kernel/signal.c @@ -182,7 +182,7 @@ get_sigframe(struct ksignal *ksig, struct pt_regs *regs, size_t frame_size, static unsigned long __ro_after_init max_frame_size; static unsigned int __ro_after_init fpu_default_state_size; -void __init init_sigframe_size(void) +static int __init init_sigframe_size(void) { fpu_default_state_size = fpu__get_fpstate_size(); @@ -194,7 +194,9 @@ void __init init_sigframe_size(void) max_frame_size = round_up(max_frame_size, FRAME_ALIGNMENT); pr_info("max sigframe size: %lu\n", max_frame_size); + return 0; } +early_initcall(init_sigframe_size); unsigned long get_sigframe_size(void) { diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c index 375b33ecafa2..7eb18ca7bd45 100644 --- a/arch/x86/kernel/smp.c +++ b/arch/x86/kernel/smp.c @@ -21,12 +21,14 @@ #include <linux/interrupt.h> #include <linux/cpu.h> #include <linux/gfp.h> +#include <linux/kexec.h> #include <asm/mtrr.h> #include <asm/tlbflush.h> #include <asm/mmu_context.h> #include <asm/proto.h> #include <asm/apic.h> +#include <asm/cpu.h> #include <asm/idtentry.h> #include <asm/nmi.h> #include <asm/mce.h> @@ -129,7 +131,7 @@ static int smp_stop_nmi_callback(unsigned int val, struct pt_regs *regs) } /* - * this function calls the 'stop' function on all other CPUs in the system. + * Disable virtualization, APIC etc. and park the CPU in a HLT loop */ DEFINE_IDTENTRY_SYSVEC(sysvec_reboot) { @@ -146,61 +148,96 @@ static int register_stop_handler(void) static void native_stop_other_cpus(int wait) { - unsigned long flags; - unsigned long timeout; + unsigned int cpu = smp_processor_id(); + unsigned long flags, timeout; if (reboot_force) return; - /* - * Use an own vector here because smp_call_function - * does lots of things not suitable in a panic situation. - */ + /* Only proceed if this is the first CPU to reach this code */ + if (atomic_cmpxchg(&stopping_cpu, -1, cpu) != -1) + return; + + /* For kexec, ensure that offline CPUs are out of MWAIT and in HLT */ + if (kexec_in_progress) + smp_kick_mwait_play_dead(); /* - * We start by using the REBOOT_VECTOR irq. - * The irq is treated as a sync point to allow critical - * regions of code on other cpus to release their spin locks - * and re-enable irqs. Jumping straight to an NMI might - * accidentally cause deadlocks with further shutdown/panic - * code. By syncing, we give the cpus up to one second to - * finish their work before we force them off with the NMI. + * 1) Send an IPI on the reboot vector to all other CPUs. + * + * The other CPUs should react on it after leaving critical + * sections and re-enabling interrupts. They might still hold + * locks, but there is nothing which can be done about that. + * + * 2) Wait for all other CPUs to report that they reached the + * HLT loop in stop_this_cpu() + * + * 3) If the system uses INIT/STARTUP for CPU bringup, then + * send all present CPUs an INIT vector, which brings them + * completely out of the way. + * + * 4) If #3 is not possible and #2 timed out send an NMI to the + * CPUs which did not yet report + * + * 5) Wait for all other CPUs to report that they reached the + * HLT loop in stop_this_cpu() + * + * #4 can obviously race against a CPU reaching the HLT loop late. + * That CPU will have reported already and the "have all CPUs + * reached HLT" condition will be true despite the fact that the + * other CPU is still handling the NMI. Again, there is no + * protection against that as "disabled" APICs still respond to + * NMIs. */ - if (num_online_cpus() > 1) { - /* did someone beat us here? */ - if (atomic_cmpxchg(&stopping_cpu, -1, safe_smp_processor_id()) != -1) - return; - - /* sync above data before sending IRQ */ - wmb(); + cpumask_copy(&cpus_stop_mask, cpu_online_mask); + cpumask_clear_cpu(cpu, &cpus_stop_mask); + if (!cpumask_empty(&cpus_stop_mask)) { apic_send_IPI_allbutself(REBOOT_VECTOR); /* * Don't wait longer than a second for IPI completion. The * wait request is not checked here because that would - * prevent an NMI shutdown attempt in case that not all + * prevent an NMI/INIT shutdown in case that not all * CPUs reach shutdown state. */ timeout = USEC_PER_SEC; - while (num_online_cpus() > 1 && timeout--) + while (!cpumask_empty(&cpus_stop_mask) && timeout--) udelay(1); } - /* if the REBOOT_VECTOR didn't work, try with the NMI */ - if (num_online_cpus() > 1) { + /* + * Park all other CPUs in INIT including "offline" CPUs, if + * possible. That's a safe place where they can't resume execution + * of HLT and then execute the HLT loop from overwritten text or + * page tables. + * + * The only downside is a broadcast MCE, but up to the point where + * the kexec() kernel brought all APs online again an MCE will just + * make HLT resume and handle the MCE. The machine crashes and burns + * due to overwritten text, page tables and data. So there is a + * choice between fire and frying pan. The result is pretty much + * the same. Chose frying pan until x86 provides a sane mechanism + * to park a CPU. + */ + if (smp_park_other_cpus_in_init()) + goto done; + + /* + * If park with INIT was not possible and the REBOOT_VECTOR didn't + * take all secondary CPUs offline, try with the NMI. + */ + if (!cpumask_empty(&cpus_stop_mask)) { /* * If NMI IPI is enabled, try to register the stop handler * and send the IPI. In any case try to wait for the other * CPUs to stop. */ if (!smp_no_nmi_ipi && !register_stop_handler()) { - /* Sync above data before sending IRQ */ - wmb(); - pr_emerg("Shutting down cpus with NMI\n"); - apic_send_IPI_allbutself(NMI_VECTOR); + for_each_cpu(cpu, &cpus_stop_mask) + apic->send_IPI(cpu, NMI_VECTOR); } /* * Don't wait longer than 10 ms if the caller didn't @@ -208,14 +245,21 @@ static void native_stop_other_cpus(int wait) * one or more CPUs do not reach shutdown state. */ timeout = USEC_PER_MSEC * 10; - while (num_online_cpus() > 1 && (wait || timeout--)) + while (!cpumask_empty(&cpus_stop_mask) && (wait || timeout--)) udelay(1); } +done: local_irq_save(flags); disable_local_APIC(); mcheck_cpu_clear(this_cpu_ptr(&cpu_info)); local_irq_restore(flags); + + /* + * Ensure that the cpus_stop_mask cache lines are invalidated on + * the other CPUs. See comment vs. SME in stop_this_cpu(). + */ + cpumask_clear(&cpus_stop_mask); } /* @@ -268,8 +312,7 @@ struct smp_ops smp_ops = { #endif .smp_send_reschedule = native_smp_send_reschedule, - .cpu_up = native_cpu_up, - .cpu_die = native_cpu_die, + .kick_ap_alive = native_kick_ap, .cpu_disable = native_cpu_disable, .play_dead = native_play_dead, diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 352f0ce1ece4..ed2d51960a7d 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -53,10 +53,13 @@ #include <linux/tboot.h> #include <linux/gfp.h> #include <linux/cpuidle.h> +#include <linux/kexec.h> #include <linux/numa.h> #include <linux/pgtable.h> #include <linux/overflow.h> #include <linux/stackprotector.h> +#include <linux/cpuhotplug.h> +#include <linux/mc146818rtc.h> #include <asm/acpi.h> #include <asm/cacheinfo.h> @@ -74,7 +77,7 @@ #include <asm/fpu/api.h> #include <asm/setup.h> #include <asm/uv/uv.h> -#include <linux/mc146818rtc.h> +#include <asm/microcode.h> #include <asm/i8259.h> #include <asm/misc.h> #include <asm/qspinlock.h> @@ -101,6 +104,26 @@ EXPORT_PER_CPU_SYMBOL(cpu_die_map); DEFINE_PER_CPU_READ_MOSTLY(struct cpuinfo_x86, cpu_info); EXPORT_PER_CPU_SYMBOL(cpu_info); +/* CPUs which are the primary SMT threads */ +struct cpumask __cpu_primary_thread_mask __read_mostly; + +/* Representing CPUs for which sibling maps can be computed */ +static cpumask_var_t cpu_sibling_setup_mask; + +struct mwait_cpu_dead { + unsigned int control; + unsigned int status; +}; + +#define CPUDEAD_MWAIT_WAIT 0xDEADBEEF +#define CPUDEAD_MWAIT_KEXEC_HLT 0x4A17DEAD + +/* + * Cache line aligned data for mwait_play_dead(). Separate on purpose so + * that it's unlikely to be touched by other CPUs. + */ +static DEFINE_PER_CPU_ALIGNED(struct mwait_cpu_dead, mwait_cpu_dead); + /* Logical package management. We might want to allocate that dynamically */ unsigned int __max_logical_packages __read_mostly; EXPORT_SYMBOL(__max_logical_packages); @@ -121,7 +144,6 @@ int arch_update_cpu_topology(void) return retval; } - static unsigned int smpboot_warm_reset_vector_count; static inline void smpboot_setup_warm_reset_vector(unsigned long start_eip) @@ -154,66 +176,63 @@ static inline void smpboot_restore_warm_reset_vector(void) } -/* - * Report back to the Boot Processor during boot time or to the caller processor - * during CPU online. - */ -static void smp_callin(void) +/* Run the next set of setup steps for the upcoming CPU */ +static void ap_starting(void) { - int cpuid; + int cpuid = smp_processor_id(); - /* - * If waken up by an INIT in an 82489DX configuration - * cpu_callout_mask guarantees we don't get here before - * an INIT_deassert IPI reaches our local APIC, so it is - * now safe to touch our local APIC. - */ - cpuid = smp_processor_id(); + /* Mop up eventual mwait_play_dead() wreckage */ + this_cpu_write(mwait_cpu_dead.status, 0); + this_cpu_write(mwait_cpu_dead.control, 0); /* - * the boot CPU has finished the init stage and is spinning - * on callin_map until we finish. We are free to set up this - * CPU, first the APIC. (this is probably redundant on most - * boards) + * If woken up by an INIT in an 82489DX configuration the alive + * synchronization guarantees that the CPU does not reach this + * point before an INIT_deassert IPI reaches the local APIC, so it + * is now safe to touch the local APIC. + * + * Set up this CPU, first the APIC, which is probably redundant on + * most boards. */ apic_ap_setup(); - /* - * Save our processor parameters. Note: this information - * is needed for clock calibration. - */ + /* Save the processor parameters. */ smp_store_cpu_info(cpuid); /* * The topology information must be up to date before - * calibrate_delay() and notify_cpu_starting(). + * notify_cpu_starting(). */ - set_cpu_sibling_map(raw_smp_processor_id()); + set_cpu_sibling_map(cpuid); ap_init_aperfmperf(); - /* - * Get our bogomips. - * Update loops_per_jiffy in cpu_data. Previous call to - * smp_store_cpu_info() stored a value that is close but not as - * accurate as the value just calculated. - */ - calibrate_delay(); - cpu_data(cpuid).loops_per_jiffy = loops_per_jiffy; pr_debug("Stack at about %p\n", &cpuid); wmb(); + /* + * This runs the AP through all the cpuhp states to its target + * state CPUHP_ONLINE. + */ notify_cpu_starting(cpuid); +} +static void ap_calibrate_delay(void) +{ /* - * Allow the master to continue. + * Calibrate the delay loop and update loops_per_jiffy in cpu_data. + * smp_store_cpu_info() stored a value that is close but not as + * accurate as the value just calculated. + * + * As this is invoked after the TSC synchronization check, + * calibrate_delay_is_known() will skip the calibration routine + * when TSC is synchronized across sockets. */ - cpumask_set_cpu(cpuid, cpu_callin_mask); + calibrate_delay(); + cpu_data(smp_processor_id()).loops_per_jiffy = loops_per_jiffy; } -static int cpu0_logical_apicid; -static int enable_start_cpu0; /* * Activate a secondary processor. */ @@ -226,24 +245,63 @@ static void notrace start_secondary(void *unused) */ cr4_init(); -#ifdef CONFIG_X86_32 - /* switch away from the initial page table */ - load_cr3(swapper_pg_dir); - __flush_tlb_all(); -#endif - cpu_init_secondary(); + /* + * 32-bit specific. 64-bit reaches this code with the correct page + * table established. Yet another historical divergence. + */ + if (IS_ENABLED(CONFIG_X86_32)) { + /* switch away from the initial page table */ + load_cr3(swapper_pg_dir); + __flush_tlb_all(); + } + + cpu_init_exception_handling(); + + /* + * 32-bit systems load the microcode from the ASM startup code for + * historical reasons. + * + * On 64-bit systems load it before reaching the AP alive + * synchronization point below so it is not part of the full per + * CPU serialized bringup part when "parallel" bringup is enabled. + * + * That's even safe when hyperthreading is enabled in the CPU as + * the core code starts the primary threads first and leaves the + * secondary threads waiting for SIPI. Loading microcode on + * physical cores concurrently is a safe operation. + * + * This covers both the Intel specific issue that concurrent + * microcode loading on SMT siblings must be prohibited and the + * vendor independent issue`that microcode loading which changes + * CPUID, MSRs etc. must be strictly serialized to maintain + * software state correctness. + */ + if (IS_ENABLED(CONFIG_X86_64)) + load_ucode_ap(); + + /* + * Synchronization point with the hotplug core. Sets this CPUs + * synchronization state to ALIVE and spin-waits for the control CPU to + * release this CPU for further bringup. + */ + cpuhp_ap_sync_alive(); + + cpu_init(); + fpu__init_cpu(); rcu_cpu_starting(raw_smp_processor_id()); x86_cpuinit.early_percpu_clock_init(); - smp_callin(); - enable_start_cpu0 = 0; + ap_starting(); + + /* Check TSC synchronization with the control CPU. */ + check_tsc_sync_target(); - /* otherwise gcc will move up smp_processor_id before the cpu_init */ - barrier(); /* - * Check TSC synchronization with the boot CPU: + * Calibrate the delay loop after the TSC synchronization check. + * This allows to skip the calibration when TSC is synchronized + * across sockets. */ - check_tsc_sync_target(); + ap_calibrate_delay(); speculative_store_bypass_ht_init(); @@ -257,7 +315,6 @@ static void notrace start_secondary(void *unused) set_cpu_online(smp_processor_id(), true); lapic_online(); unlock_vector_lock(); - cpu_set_state_online(smp_processor_id()); x86_platform.nmi_init(); /* enable local interrupts */ @@ -270,15 +327,6 @@ static void notrace start_secondary(void *unused) } /** - * topology_is_primary_thread - Check whether CPU is the primary SMT thread - * @cpu: CPU to check - */ -bool topology_is_primary_thread(unsigned int cpu) -{ - return apic_id_is_primary_thread(per_cpu(x86_cpu_to_apicid, cpu)); -} - -/** * topology_smt_supported - Check whether SMT is supported by the CPUs */ bool topology_smt_supported(void) @@ -288,6 +336,7 @@ bool topology_smt_supported(void) /** * topology_phys_to_logical_pkg - Map a physical package id to a logical + * @phys_pkg: The physical package id to map * * Returns logical package id or -1 if not found */ @@ -304,15 +353,17 @@ int topology_phys_to_logical_pkg(unsigned int phys_pkg) return -1; } EXPORT_SYMBOL(topology_phys_to_logical_pkg); + /** * topology_phys_to_logical_die - Map a physical die id to logical + * @die_id: The physical die id to map + * @cur_cpu: The CPU for which the mapping is done * * Returns logical die id or -1 if not found */ -int topology_phys_to_logical_die(unsigned int die_id, unsigned int cur_cpu) +static int topology_phys_to_logical_die(unsigned int die_id, unsigned int cur_cpu) { - int cpu; - int proc_id = cpu_data(cur_cpu).phys_proc_id; + int cpu, proc_id = cpu_data(cur_cpu).phys_proc_id; for_each_possible_cpu(cpu) { struct cpuinfo_x86 *c = &cpu_data(cpu); @@ -323,7 +374,6 @@ int topology_phys_to_logical_die(unsigned int die_id, unsigned int cur_cpu) } return -1; } -EXPORT_SYMBOL(topology_phys_to_logical_die); /** * topology_update_package_map - Update the physical to logical package map @@ -398,7 +448,7 @@ void smp_store_cpu_info(int id) c->cpu_index = id; /* * During boot time, CPU0 has this setup already. Save the info when - * bringing up AP or offlined CPU0. + * bringing up an AP. */ identify_secondary_cpu(c); c->initialized = true; @@ -552,7 +602,7 @@ static int x86_core_flags(void) #ifdef CONFIG_SCHED_SMT static int x86_smt_flags(void) { - return cpu_smt_flags() | x86_sched_itmt_flags(); + return cpu_smt_flags(); } #endif #ifdef CONFIG_SCHED_CLUSTER @@ -563,50 +613,57 @@ static int x86_cluster_flags(void) #endif #endif -static struct sched_domain_topology_level x86_numa_in_package_topology[] = { -#ifdef CONFIG_SCHED_SMT - { cpu_smt_mask, x86_smt_flags, SD_INIT_NAME(SMT) }, -#endif -#ifdef CONFIG_SCHED_CLUSTER - { cpu_clustergroup_mask, x86_cluster_flags, SD_INIT_NAME(CLS) }, -#endif -#ifdef CONFIG_SCHED_MC - { cpu_coregroup_mask, x86_core_flags, SD_INIT_NAME(MC) }, -#endif - { NULL, }, -}; +/* + * Set if a package/die has multiple NUMA nodes inside. + * AMD Magny-Cours, Intel Cluster-on-Die, and Intel + * Sub-NUMA Clustering have this. + */ +static bool x86_has_numa_in_package; -static struct sched_domain_topology_level x86_hybrid_topology[] = { -#ifdef CONFIG_SCHED_SMT - { cpu_smt_mask, x86_smt_flags, SD_INIT_NAME(SMT) }, -#endif -#ifdef CONFIG_SCHED_MC - { cpu_coregroup_mask, x86_core_flags, SD_INIT_NAME(MC) }, -#endif - { cpu_cpu_mask, SD_INIT_NAME(DIE) }, - { NULL, }, -}; +static struct sched_domain_topology_level x86_topology[6]; + +static void __init build_sched_topology(void) +{ + int i = 0; -static struct sched_domain_topology_level x86_topology[] = { #ifdef CONFIG_SCHED_SMT - { cpu_smt_mask, x86_smt_flags, SD_INIT_NAME(SMT) }, + x86_topology[i++] = (struct sched_domain_topology_level){ + cpu_smt_mask, x86_smt_flags, SD_INIT_NAME(SMT) + }; #endif #ifdef CONFIG_SCHED_CLUSTER - { cpu_clustergroup_mask, x86_cluster_flags, SD_INIT_NAME(CLS) }, + /* + * For now, skip the cluster domain on Hybrid. + */ + if (!cpu_feature_enabled(X86_FEATURE_HYBRID_CPU)) { + x86_topology[i++] = (struct sched_domain_topology_level){ + cpu_clustergroup_mask, x86_cluster_flags, SD_INIT_NAME(CLS) + }; + } #endif #ifdef CONFIG_SCHED_MC - { cpu_coregroup_mask, x86_core_flags, SD_INIT_NAME(MC) }, + x86_topology[i++] = (struct sched_domain_topology_level){ + cpu_coregroup_mask, x86_core_flags, SD_INIT_NAME(MC) + }; #endif - { cpu_cpu_mask, SD_INIT_NAME(DIE) }, - { NULL, }, -}; + /* + * When there is NUMA topology inside the package skip the DIE domain + * since the NUMA domains will auto-magically create the right spanning + * domains based on the SLIT. + */ + if (!x86_has_numa_in_package) { + x86_topology[i++] = (struct sched_domain_topology_level){ + cpu_cpu_mask, SD_INIT_NAME(DIE) + }; + } -/* - * Set if a package/die has multiple NUMA nodes inside. - * AMD Magny-Cours, Intel Cluster-on-Die, and Intel - * Sub-NUMA Clustering have this. - */ -static bool x86_has_numa_in_package; + /* + * There must be one trailing NULL entry left. + */ + BUG_ON(i >= ARRAY_SIZE(x86_topology)-1); + + set_sched_topology(x86_topology); +} void set_cpu_sibling_map(int cpu) { @@ -706,9 +763,9 @@ static void impress_friends(void) * Allow the user to impress friends. */ pr_debug("Before bogomips\n"); - for_each_possible_cpu(cpu) - if (cpumask_test_cpu(cpu, cpu_callout_mask)) - bogosum += cpu_data(cpu).loops_per_jiffy; + for_each_online_cpu(cpu) + bogosum += cpu_data(cpu).loops_per_jiffy; + pr_info("Total of %d processors activated (%lu.%02lu BogoMIPS)\n", num_online_cpus(), bogosum/(500000/HZ), @@ -795,86 +852,42 @@ static void __init smp_quirk_init_udelay(void) } /* - * Poke the other CPU in the eye via NMI to wake it up. Remember that the normal - * INIT, INIT, STARTUP sequence will reset the chip hard for us, and this - * won't ... remember to clear down the APIC, etc later. + * Wake up AP by INIT, INIT, STARTUP sequence. */ -int -wakeup_secondary_cpu_via_nmi(int apicid, unsigned long start_eip) +static void send_init_sequence(int phys_apicid) { - u32 dm = apic->dest_mode_logical ? APIC_DEST_LOGICAL : APIC_DEST_PHYSICAL; - unsigned long send_status, accept_status = 0; - int maxlvt; + int maxlvt = lapic_get_maxlvt(); - /* Target chip */ - /* Boot on the stack */ - /* Kick the second */ - apic_icr_write(APIC_DM_NMI | dm, apicid); - - pr_debug("Waiting for send to finish...\n"); - send_status = safe_apic_wait_icr_idle(); - - /* - * Give the other CPU some time to accept the IPI. - */ - udelay(200); + /* Be paranoid about clearing APIC errors. */ if (APIC_INTEGRATED(boot_cpu_apic_version)) { - maxlvt = lapic_get_maxlvt(); - if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */ + /* Due to the Pentium erratum 3AP. */ + if (maxlvt > 3) apic_write(APIC_ESR, 0); - accept_status = (apic_read(APIC_ESR) & 0xEF); + apic_read(APIC_ESR); } - pr_debug("NMI sent\n"); - if (send_status) - pr_err("APIC never delivered???\n"); - if (accept_status) - pr_err("APIC delivery error (%lx)\n", accept_status); + /* Assert INIT on the target CPU */ + apic_icr_write(APIC_INT_LEVELTRIG | APIC_INT_ASSERT | APIC_DM_INIT, phys_apicid); + safe_apic_wait_icr_idle(); - return (send_status | accept_status); + udelay(init_udelay); + + /* Deassert INIT on the target CPU */ + apic_icr_write(APIC_INT_LEVELTRIG | APIC_DM_INIT, phys_apicid); + safe_apic_wait_icr_idle(); } -static int -wakeup_secondary_cpu_via_init(int phys_apicid, unsigned long start_eip) +/* + * Wake up AP by INIT, INIT, STARTUP sequence. + */ +static int wakeup_secondary_cpu_via_init(int phys_apicid, unsigned long start_eip) { unsigned long send_status = 0, accept_status = 0; - int maxlvt, num_starts, j; + int num_starts, j, maxlvt; + preempt_disable(); maxlvt = lapic_get_maxlvt(); - - /* - * Be paranoid about clearing APIC errors. - */ - if (APIC_INTEGRATED(boot_cpu_apic_version)) { - if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */ - apic_write(APIC_ESR, 0); - apic_read(APIC_ESR); - } - - pr_debug("Asserting INIT\n"); - - /* - * Turn INIT on target chip - */ - /* - * Send IPI - */ - apic_icr_write(APIC_INT_LEVELTRIG | APIC_INT_ASSERT | APIC_DM_INIT, - phys_apicid); - - pr_debug("Waiting for send to finish...\n"); - send_status = safe_apic_wait_icr_idle(); - - udelay(init_udelay); - - pr_debug("Deasserting INIT\n"); - - /* Target chip */ - /* Send IPI */ - apic_icr_write(APIC_INT_LEVELTRIG | APIC_DM_INIT, phys_apicid); - - pr_debug("Waiting for send to finish...\n"); - send_status = safe_apic_wait_icr_idle(); + send_init_sequence(phys_apicid); mb(); @@ -945,15 +958,16 @@ wakeup_secondary_cpu_via_init(int phys_apicid, unsigned long start_eip) if (accept_status) pr_err("APIC delivery error (%lx)\n", accept_status); + preempt_enable(); return (send_status | accept_status); } /* reduce the number of lines printed when booting a large cpu count system */ static void announce_cpu(int cpu, int apicid) { + static int width, node_width, first = 1; static int current_node = NUMA_NO_NODE; int node = early_cpu_to_node(cpu); - static int width, node_width; if (!width) width = num_digits(num_possible_cpus()) + 1; /* + '#' sign */ @@ -961,10 +975,10 @@ static void announce_cpu(int cpu, int apicid) if (!node_width) node_width = num_digits(num_possible_nodes()) + 1; /* + '#' */ - if (cpu == 1) - printk(KERN_INFO "x86: Booting SMP configuration:\n"); - if (system_state < SYSTEM_RUNNING) { + if (first) + pr_info("x86: Booting SMP configuration:\n"); + if (node != current_node) { if (current_node > (-1)) pr_cont("\n"); @@ -975,77 +989,16 @@ static void announce_cpu(int cpu, int apicid) } /* Add padding for the BSP */ - if (cpu == 1) + if (first) pr_cont("%*s", width + 1, " "); + first = 0; pr_cont("%*s#%d", width - num_digits(cpu), " ", cpu); - } else pr_info("Booting Node %d Processor %d APIC 0x%x\n", node, cpu, apicid); } -static int wakeup_cpu0_nmi(unsigned int cmd, struct pt_regs *regs) -{ - int cpu; - - cpu = smp_processor_id(); - if (cpu == 0 && !cpu_online(cpu) && enable_start_cpu0) - return NMI_HANDLED; - - return NMI_DONE; -} - -/* - * Wake up AP by INIT, INIT, STARTUP sequence. - * - * Instead of waiting for STARTUP after INITs, BSP will execute the BIOS - * boot-strap code which is not a desired behavior for waking up BSP. To - * void the boot-strap code, wake up CPU0 by NMI instead. - * - * This works to wake up soft offlined CPU0 only. If CPU0 is hard offlined - * (i.e. physically hot removed and then hot added), NMI won't wake it up. - * We'll change this code in the future to wake up hard offlined CPU0 if - * real platform and request are available. - */ -static int -wakeup_cpu_via_init_nmi(int cpu, unsigned long start_ip, int apicid, - int *cpu0_nmi_registered) -{ - int id; - int boot_error; - - preempt_disable(); - - /* - * Wake up AP by INIT, INIT, STARTUP sequence. - */ - if (cpu) { - boot_error = wakeup_secondary_cpu_via_init(apicid, start_ip); - goto out; - } - - /* - * Wake up BSP by nmi. - * - * Register a NMI handler to help wake up CPU0. - */ - boot_error = register_nmi_handler(NMI_LOCAL, - wakeup_cpu0_nmi, 0, "wake_cpu0"); - - if (!boot_error) { - enable_start_cpu0 = 1; - *cpu0_nmi_registered = 1; - id = apic->dest_mode_logical ? cpu0_logical_apicid : apicid; - boot_error = wakeup_secondary_cpu_via_nmi(id, start_ip); - } - -out: - preempt_enable(); - - return boot_error; -} - int common_cpu_up(unsigned int cpu, struct task_struct *idle) { int ret; @@ -1071,17 +1024,13 @@ int common_cpu_up(unsigned int cpu, struct task_struct *idle) /* * NOTE - on most systems this is a PHYSICAL apic ID, but on multiquad * (ie clustered apic addressing mode), this is a LOGICAL apic ID. - * Returns zero if CPU booted OK, else error code from + * Returns zero if startup was successfully sent, else error code from * ->wakeup_secondary_cpu. */ -static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle, - int *cpu0_nmi_registered) +static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle) { - /* start_ip had better be page-aligned! */ unsigned long start_ip = real_mode_header->trampoline_start; - - unsigned long boot_error = 0; - unsigned long timeout; + int ret; #ifdef CONFIG_X86_64 /* If 64-bit wakeup method exists, use the 64-bit mode trampoline IP */ @@ -1094,7 +1043,7 @@ static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle, if (IS_ENABLED(CONFIG_X86_32)) { early_gdt_descr.address = (unsigned long)get_cpu_gdt_rw(cpu); initial_stack = idle->thread.sp; - } else { + } else if (!(smpboot_control & STARTUP_PARALLEL_MASK)) { smpboot_control = cpu; } @@ -1108,7 +1057,6 @@ static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle, * This grunge runs the startup process for * the targeted processor. */ - if (x86_platform.legacy.warm_reset) { pr_debug("Setting warm reset code and vector.\n"); @@ -1123,13 +1071,6 @@ static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle, } } - /* - * AP might wait on cpu_callout_mask in cpu_init() with - * cpu_initialized_mask set if previous attempt to online - * it timed-out. Clear cpu_initialized_mask so that after - * INIT/SIPI it could start with a clean state. - */ - cpumask_clear_cpu(cpu, cpu_initialized_mask); smp_mb(); /* @@ -1137,66 +1078,25 @@ static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle, * - Use a method from the APIC driver if one defined, with wakeup * straight to 64-bit mode preferred over wakeup to RM. * Otherwise, - * - Use an INIT boot APIC message for APs or NMI for BSP. + * - Use an INIT boot APIC message */ if (apic->wakeup_secondary_cpu_64) - boot_error = apic->wakeup_secondary_cpu_64(apicid, start_ip); + ret = apic->wakeup_secondary_cpu_64(apicid, start_ip); else if (apic->wakeup_secondary_cpu) - boot_error = apic->wakeup_secondary_cpu(apicid, start_ip); + ret = apic->wakeup_secondary_cpu(apicid, start_ip); else - boot_error = wakeup_cpu_via_init_nmi(cpu, start_ip, apicid, - cpu0_nmi_registered); - - if (!boot_error) { - /* - * Wait 10s total for first sign of life from AP - */ - boot_error = -1; - timeout = jiffies + 10*HZ; - while (time_before(jiffies, timeout)) { - if (cpumask_test_cpu(cpu, cpu_initialized_mask)) { - /* - * Tell AP to proceed with initialization - */ - cpumask_set_cpu(cpu, cpu_callout_mask); - boot_error = 0; - break; - } - schedule(); - } - } - - if (!boot_error) { - /* - * Wait till AP completes initial initialization - */ - while (!cpumask_test_cpu(cpu, cpu_callin_mask)) { - /* - * Allow other tasks to run while we wait for the - * AP to come online. This also gives a chance - * for the MTRR work(triggered by the AP coming online) - * to be completed in the stop machine context. - */ - schedule(); - } - } + ret = wakeup_secondary_cpu_via_init(apicid, start_ip); - if (x86_platform.legacy.warm_reset) { - /* - * Cleanup possible dangling ends... - */ - smpboot_restore_warm_reset_vector(); - } - - return boot_error; + /* If the wakeup mechanism failed, cleanup the warm reset vector */ + if (ret) + arch_cpuhp_cleanup_kick_cpu(cpu); + return ret; } -int native_cpu_up(unsigned int cpu, struct task_struct *tidle) +int native_kick_ap(unsigned int cpu, struct task_struct *tidle) { int apicid = apic->cpu_present_to_apicid(cpu); - int cpu0_nmi_registered = 0; - unsigned long flags; - int err, ret = 0; + int err; lockdep_assert_irqs_enabled(); @@ -1210,24 +1110,11 @@ int native_cpu_up(unsigned int cpu, struct task_struct *tidle) } /* - * Already booted CPU? - */ - if (cpumask_test_cpu(cpu, cpu_callin_mask)) { - pr_debug("do_boot_cpu %d Already started\n", cpu); - return -ENOSYS; - } - - /* * Save current MTRR state in case it was changed since early boot * (e.g. by the ACPI SMI) to initialize new CPUs with MTRRs in sync: */ mtrr_save_state(); - /* x86 CPUs take themselves offline, so delayed offline is OK. */ - err = cpu_check_up_prepare(cpu); - if (err && err != -EBUSY) - return err; - /* the FPU context is blank, nobody can own it */ per_cpu(fpu_fpregs_owner_ctx, cpu) = NULL; @@ -1235,41 +1122,44 @@ int native_cpu_up(unsigned int cpu, struct task_struct *tidle) if (err) return err; - err = do_boot_cpu(apicid, cpu, tidle, &cpu0_nmi_registered); - if (err) { + err = do_boot_cpu(apicid, cpu, tidle); + if (err) pr_err("do_boot_cpu failed(%d) to wakeup CPU#%u\n", err, cpu); - ret = -EIO; - goto unreg_nmi; - } - /* - * Check TSC synchronization with the AP (keep irqs disabled - * while doing so): - */ - local_irq_save(flags); - check_tsc_sync_source(cpu); - local_irq_restore(flags); + return err; +} - while (!cpu_online(cpu)) { - cpu_relax(); - touch_nmi_watchdog(); - } +int arch_cpuhp_kick_ap_alive(unsigned int cpu, struct task_struct *tidle) +{ + return smp_ops.kick_ap_alive(cpu, tidle); +} -unreg_nmi: - /* - * Clean up the nmi handler. Do this after the callin and callout sync - * to avoid impact of possible long unregister time. - */ - if (cpu0_nmi_registered) - unregister_nmi_handler(NMI_LOCAL, "wake_cpu0"); +void arch_cpuhp_cleanup_kick_cpu(unsigned int cpu) +{ + /* Cleanup possible dangling ends... */ + if (smp_ops.kick_ap_alive == native_kick_ap && x86_platform.legacy.warm_reset) + smpboot_restore_warm_reset_vector(); +} - return ret; +void arch_cpuhp_cleanup_dead_cpu(unsigned int cpu) +{ + if (smp_ops.cleanup_dead_cpu) + smp_ops.cleanup_dead_cpu(cpu); + + if (system_state == SYSTEM_RUNNING) + pr_info("CPU %u is now offline\n", cpu); +} + +void arch_cpuhp_sync_state_poll(void) +{ + if (smp_ops.poll_sync_state) + smp_ops.poll_sync_state(); } /** - * arch_disable_smp_support() - disables SMP support for x86 at runtime + * arch_disable_smp_support() - Disables SMP support for x86 at boottime */ -void arch_disable_smp_support(void) +void __init arch_disable_smp_support(void) { disable_ioapic_support(); } @@ -1361,14 +1251,6 @@ static void __init smp_cpu_index_default(void) } } -static void __init smp_get_logical_apicid(void) -{ - if (x2apic_mode) - cpu0_logical_apicid = apic_read(APIC_LDR); - else - cpu0_logical_apicid = GET_APIC_LOGICAL_ID(apic_read(APIC_LDR)); -} - void __init smp_prepare_cpus_common(void) { unsigned int i; @@ -1379,7 +1261,6 @@ void __init smp_prepare_cpus_common(void) * Setup boot CPU information */ smp_store_boot_cpu_info(); /* Final full version of the data */ - cpumask_copy(cpu_callin_mask, cpumask_of(0)); mb(); for_each_possible_cpu(i) { @@ -1390,18 +1271,24 @@ void __init smp_prepare_cpus_common(void) zalloc_cpumask_var(&per_cpu(cpu_l2c_shared_map, i), GFP_KERNEL); } - /* - * Set 'default' x86 topology, this matches default_topology() in that - * it has NUMA nodes as a topology level. See also - * native_smp_cpus_done(). - * - * Must be done before set_cpus_sibling_map() is ran. - */ - set_sched_topology(x86_topology); - set_cpu_sibling_map(0); } +#ifdef CONFIG_X86_64 +/* Establish whether parallel bringup can be supported. */ +bool __init arch_cpuhp_init_parallel_bringup(void) +{ + if (!x86_cpuinit.parallel_bringup) { + pr_info("Parallel CPU startup disabled by the platform\n"); + return false; + } + + smpboot_control = STARTUP_READ_APICID; + pr_debug("Parallel CPU startup enabled: 0x%08x\n", smpboot_control); + return true; +} +#endif + /* * Prepare for SMP bootup. * @max_cpus: configured maximum number of CPUs, It is a legacy parameter @@ -1431,8 +1318,6 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus) /* Setup local timer */ x86_init.timers.setup_percpu_clockev(); - smp_get_logical_apicid(); - pr_info("CPU0: "); print_cpu_info(&cpu_data(0)); @@ -1455,6 +1340,25 @@ void arch_thaw_secondary_cpus_end(void) cache_aps_init(); } +bool smp_park_other_cpus_in_init(void) +{ + unsigned int cpu, this_cpu = smp_processor_id(); + unsigned int apicid; + + if (apic->wakeup_secondary_cpu_64 || apic->wakeup_secondary_cpu) + return false; + + for_each_present_cpu(cpu) { + if (cpu == this_cpu) + continue; + apicid = apic->cpu_present_to_apicid(cpu); + if (apicid == BAD_APICID) + continue; + send_init_sequence(apicid); + } + return true; +} + /* * Early setup to make printk work. */ @@ -1466,9 +1370,6 @@ void __init native_smp_prepare_boot_cpu(void) if (!IS_ENABLED(CONFIG_SMP)) switch_gdt_and_percpu_base(me); - /* already set me in cpu_online_mask in boot_cpu_init() */ - cpumask_set_cpu(me, cpu_callout_mask); - cpu_set_state_online(me); native_pv_lock_init(); } @@ -1490,13 +1391,7 @@ void __init native_smp_cpus_done(unsigned int max_cpus) pr_debug("Boot done\n"); calculate_max_logical_packages(); - - /* XXX for now assume numa-in-package and hybrid don't overlap */ - if (x86_has_numa_in_package) - set_sched_topology(x86_numa_in_package_topology); - if (cpu_feature_enabled(X86_FEATURE_HYBRID_CPU)) - set_sched_topology(x86_hybrid_topology); - + build_sched_topology(); nmi_selftest(); impress_friends(); cache_aps_init(); @@ -1592,6 +1487,12 @@ __init void prefill_possible_map(void) set_cpu_possible(i, true); } +/* correctly size the local cpu masks */ +void __init setup_cpu_local_masks(void) +{ + alloc_bootmem_cpumask_var(&cpu_sibling_setup_mask); +} + #ifdef CONFIG_HOTPLUG_CPU /* Recompute SMT state for all CPUs on offline */ @@ -1650,10 +1551,6 @@ static void remove_siblinginfo(int cpu) static void remove_cpu_from_maps(int cpu) { set_cpu_online(cpu, false); - cpumask_clear_cpu(cpu, cpu_callout_mask); - cpumask_clear_cpu(cpu, cpu_callin_mask); - /* was set by cpu_init() */ - cpumask_clear_cpu(cpu, cpu_initialized_mask); numa_remove_cpu(cpu); } @@ -1704,64 +1601,27 @@ int native_cpu_disable(void) return 0; } -int common_cpu_die(unsigned int cpu) -{ - int ret = 0; - - /* We don't do anything here: idle task is faking death itself. */ - - /* They ack this in play_dead() by setting CPU_DEAD */ - if (cpu_wait_death(cpu, 5)) { - if (system_state == SYSTEM_RUNNING) - pr_info("CPU %u is now offline\n", cpu); - } else { - pr_err("CPU %u didn't die...\n", cpu); - ret = -1; - } - - return ret; -} - -void native_cpu_die(unsigned int cpu) -{ - common_cpu_die(cpu); -} - void play_dead_common(void) { idle_task_exit(); - /* Ack it */ - (void)cpu_report_death(); - + cpuhp_ap_report_dead(); /* * With physical CPU hotplug, we should halt the cpu */ local_irq_disable(); } -/** - * cond_wakeup_cpu0 - Wake up CPU0 if needed. - * - * If NMI wants to wake up CPU0, start CPU0. - */ -void cond_wakeup_cpu0(void) -{ - if (smp_processor_id() == 0 && enable_start_cpu0) - start_cpu0(); -} -EXPORT_SYMBOL_GPL(cond_wakeup_cpu0); - /* * We need to flush the caches before going to sleep, lest we have * dirty data in our caches when we come back up. */ static inline void mwait_play_dead(void) { + struct mwait_cpu_dead *md = this_cpu_ptr(&mwait_cpu_dead); unsigned int eax, ebx, ecx, edx; unsigned int highest_cstate = 0; unsigned int highest_subcstate = 0; - void *mwait_ptr; int i; if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD || @@ -1796,12 +1656,9 @@ static inline void mwait_play_dead(void) (highest_subcstate - 1); } - /* - * This should be a memory location in a cache line which is - * unlikely to be touched by other processors. The actual - * content is immaterial as it is not actually modified in any way. - */ - mwait_ptr = ¤t_thread_info()->flags; + /* Set up state for the kexec() hack below */ + md->status = CPUDEAD_MWAIT_WAIT; + md->control = CPUDEAD_MWAIT_WAIT; wbinvd(); @@ -1814,13 +1671,58 @@ static inline void mwait_play_dead(void) * case where we return around the loop. */ mb(); - clflush(mwait_ptr); + clflush(md); mb(); - __monitor(mwait_ptr, 0, 0); + __monitor(md, 0, 0); mb(); __mwait(eax, 0); - cond_wakeup_cpu0(); + if (READ_ONCE(md->control) == CPUDEAD_MWAIT_KEXEC_HLT) { + /* + * Kexec is about to happen. Don't go back into mwait() as + * the kexec kernel might overwrite text and data including + * page tables and stack. So mwait() would resume when the + * monitor cache line is written to and then the CPU goes + * south due to overwritten text, page tables and stack. + * + * Note: This does _NOT_ protect against a stray MCE, NMI, + * SMI. They will resume execution at the instruction + * following the HLT instruction and run into the problem + * which this is trying to prevent. + */ + WRITE_ONCE(md->status, CPUDEAD_MWAIT_KEXEC_HLT); + while(1) + native_halt(); + } + } +} + +/* + * Kick all "offline" CPUs out of mwait on kexec(). See comment in + * mwait_play_dead(). + */ +void smp_kick_mwait_play_dead(void) +{ + u32 newstate = CPUDEAD_MWAIT_KEXEC_HLT; + struct mwait_cpu_dead *md; + unsigned int cpu, i; + + for_each_cpu_andnot(cpu, cpu_present_mask, cpu_online_mask) { + md = per_cpu_ptr(&mwait_cpu_dead, cpu); + + /* Does it sit in mwait_play_dead() ? */ + if (READ_ONCE(md->status) != CPUDEAD_MWAIT_WAIT) + continue; + + /* Wait up to 5ms */ + for (i = 0; READ_ONCE(md->status) != newstate && i < 1000; i++) { + /* Bring it out of mwait */ + WRITE_ONCE(md->control, newstate); + udelay(5); + } + + if (READ_ONCE(md->status) != newstate) + pr_err_once("CPU%u is stuck in mwait_play_dead()\n", cpu); } } @@ -1829,11 +1731,8 @@ void __noreturn hlt_play_dead(void) if (__this_cpu_read(cpu_info.x86) >= 4) wbinvd(); - while (1) { + while (1) native_halt(); - - cond_wakeup_cpu0(); - } } void native_play_dead(void) @@ -1852,12 +1751,6 @@ int native_cpu_disable(void) return -ENOSYS; } -void native_cpu_die(unsigned int cpu) -{ - /* We said "no" in __cpu_disable */ - BUG(); -} - void native_play_dead(void) { BUG(); diff --git a/arch/x86/kernel/topology.c b/arch/x86/kernel/topology.c index 1b83377274b8..ca004e2e4469 100644 --- a/arch/x86/kernel/topology.c +++ b/arch/x86/kernel/topology.c @@ -38,102 +38,12 @@ static DEFINE_PER_CPU(struct x86_cpu, cpu_devices); #ifdef CONFIG_HOTPLUG_CPU - -#ifdef CONFIG_BOOTPARAM_HOTPLUG_CPU0 -static int cpu0_hotpluggable = 1; -#else -static int cpu0_hotpluggable; -static int __init enable_cpu0_hotplug(char *str) -{ - cpu0_hotpluggable = 1; - return 1; -} - -__setup("cpu0_hotplug", enable_cpu0_hotplug); -#endif - -#ifdef CONFIG_DEBUG_HOTPLUG_CPU0 -/* - * This function offlines a CPU as early as possible and allows userspace to - * boot up without the CPU. The CPU can be onlined back by user after boot. - * - * This is only called for debugging CPU offline/online feature. - */ -int _debug_hotplug_cpu(int cpu, int action) +int arch_register_cpu(int cpu) { - int ret; - - if (!cpu_is_hotpluggable(cpu)) - return -EINVAL; + struct x86_cpu *xc = per_cpu_ptr(&cpu_devices, cpu); - switch (action) { - case 0: - ret = remove_cpu(cpu); - if (!ret) - pr_info("DEBUG_HOTPLUG_CPU0: CPU %u is now offline\n", cpu); - else - pr_debug("Can't offline CPU%d.\n", cpu); - break; - case 1: - ret = add_cpu(cpu); - if (ret) - pr_debug("Can't online CPU%d.\n", cpu); - - break; - default: - ret = -EINVAL; - } - - return ret; -} - -static int __init debug_hotplug_cpu(void) -{ - _debug_hotplug_cpu(0, 0); - return 0; -} - -late_initcall_sync(debug_hotplug_cpu); -#endif /* CONFIG_DEBUG_HOTPLUG_CPU0 */ - -int arch_register_cpu(int num) -{ - struct cpuinfo_x86 *c = &cpu_data(num); - - /* - * Currently CPU0 is only hotpluggable on Intel platforms. Other - * vendors can add hotplug support later. - * Xen PV guests don't support CPU0 hotplug at all. - */ - if (c->x86_vendor != X86_VENDOR_INTEL || - cpu_feature_enabled(X86_FEATURE_XENPV)) - cpu0_hotpluggable = 0; - - /* - * Two known BSP/CPU0 dependencies: Resume from suspend/hibernate - * depends on BSP. PIC interrupts depend on BSP. - * - * If the BSP dependencies are under control, one can tell kernel to - * enable BSP hotplug. This basically adds a control file and - * one can attempt to offline BSP. - */ - if (num == 0 && cpu0_hotpluggable) { - unsigned int irq; - /* - * We won't take down the boot processor on i386 if some - * interrupts only are able to be serviced by the BSP in PIC. - */ - for_each_active_irq(irq) { - if (!IO_APIC_IRQ(irq) && irq_has_action(irq)) { - cpu0_hotpluggable = 0; - break; - } - } - } - if (num || cpu0_hotpluggable) - per_cpu(cpu_devices, num).cpu.hotpluggable = 1; - - return register_cpu(&per_cpu(cpu_devices, num).cpu, num); + xc->cpu.hotpluggable = cpu > 0; + return register_cpu(&xc->cpu, cpu); } EXPORT_SYMBOL(arch_register_cpu); diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 344698852146..3425c6a943e4 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -69,12 +69,10 @@ static int __init tsc_early_khz_setup(char *buf) } early_param("tsc_early_khz", tsc_early_khz_setup); -__always_inline void cyc2ns_read_begin(struct cyc2ns_data *data) +__always_inline void __cyc2ns_read(struct cyc2ns_data *data) { int seq, idx; - preempt_disable_notrace(); - do { seq = this_cpu_read(cyc2ns.seq.seqcount.sequence); idx = seq & 1; @@ -86,6 +84,12 @@ __always_inline void cyc2ns_read_begin(struct cyc2ns_data *data) } while (unlikely(seq != this_cpu_read(cyc2ns.seq.seqcount.sequence))); } +__always_inline void cyc2ns_read_begin(struct cyc2ns_data *data) +{ + preempt_disable_notrace(); + __cyc2ns_read(data); +} + __always_inline void cyc2ns_read_end(void) { preempt_enable_notrace(); @@ -115,18 +119,25 @@ __always_inline void cyc2ns_read_end(void) * -johnstul@us.ibm.com "math is hard, lets go shopping!" */ -static __always_inline unsigned long long cycles_2_ns(unsigned long long cyc) +static __always_inline unsigned long long __cycles_2_ns(unsigned long long cyc) { struct cyc2ns_data data; unsigned long long ns; - cyc2ns_read_begin(&data); + __cyc2ns_read(&data); ns = data.cyc2ns_offset; ns += mul_u64_u32_shr(cyc, data.cyc2ns_mul, data.cyc2ns_shift); - cyc2ns_read_end(); + return ns; +} +static __always_inline unsigned long long cycles_2_ns(unsigned long long cyc) +{ + unsigned long long ns; + preempt_disable_notrace(); + ns = __cycles_2_ns(cyc); + preempt_enable_notrace(); return ns; } @@ -223,7 +234,7 @@ noinstr u64 native_sched_clock(void) u64 tsc_now = rdtsc(); /* return the value in ns */ - return cycles_2_ns(tsc_now); + return __cycles_2_ns(tsc_now); } /* @@ -250,7 +261,7 @@ u64 native_sched_clock_from_tsc(u64 tsc) /* We need to define a real function for sched_clock, to override the weak default version */ #ifdef CONFIG_PARAVIRT -noinstr u64 sched_clock(void) +noinstr u64 sched_clock_noinstr(void) { return paravirt_sched_clock(); } @@ -260,11 +271,20 @@ bool using_native_sched_clock(void) return static_call_query(pv_sched_clock) == native_sched_clock; } #else -u64 sched_clock(void) __attribute__((alias("native_sched_clock"))); +u64 sched_clock_noinstr(void) __attribute__((alias("native_sched_clock"))); bool using_native_sched_clock(void) { return true; } #endif +notrace u64 sched_clock(void) +{ + u64 now; + preempt_disable_notrace(); + now = sched_clock_noinstr(); + preempt_enable_notrace(); + return now; +} + int check_tsc_unstable(void) { return tsc_unstable; @@ -1598,10 +1618,7 @@ void __init tsc_init(void) #ifdef CONFIG_SMP /* - * If we have a constant TSC and are using the TSC for the delay loop, - * we can skip clock calibration if another cpu in the same socket has already - * been calibrated. This assumes that CONSTANT_TSC applies to all - * cpus in the socket - this should be a safe assumption. + * Check whether existing calibration data can be reused. */ unsigned long calibrate_delay_is_known(void) { @@ -1609,6 +1626,21 @@ unsigned long calibrate_delay_is_known(void) int constant_tsc = cpu_has(&cpu_data(cpu), X86_FEATURE_CONSTANT_TSC); const struct cpumask *mask = topology_core_cpumask(cpu); + /* + * If TSC has constant frequency and TSC is synchronized across + * sockets then reuse CPU0 calibration. + */ + if (constant_tsc && !tsc_unstable) + return cpu_data(0).loops_per_jiffy; + + /* + * If TSC has constant frequency and TSC is not synchronized across + * sockets and this is not the first CPU in the socket, then reuse + * the calibration value of an already online CPU on that socket. + * + * This assumes that CONSTANT_TSC is consistent for all CPUs in a + * socket. + */ if (!constant_tsc || !mask) return 0; diff --git a/arch/x86/kernel/tsc_sync.c b/arch/x86/kernel/tsc_sync.c index 9452dc9664b5..bbc440c93e08 100644 --- a/arch/x86/kernel/tsc_sync.c +++ b/arch/x86/kernel/tsc_sync.c @@ -245,7 +245,6 @@ bool tsc_store_and_check_tsc_adjust(bool bootcpu) */ static atomic_t start_count; static atomic_t stop_count; -static atomic_t skip_test; static atomic_t test_runs; /* @@ -344,21 +343,14 @@ static inline unsigned int loop_timeout(int cpu) } /* - * Source CPU calls into this - it waits for the freshly booted - * target CPU to arrive and then starts the measurement: + * The freshly booted CPU initiates this via an async SMP function call. */ -void check_tsc_sync_source(int cpu) +static void check_tsc_sync_source(void *__cpu) { + unsigned int cpu = (unsigned long)__cpu; int cpus = 2; /* - * No need to check if we already know that the TSC is not - * synchronized or if we have no TSC. - */ - if (unsynchronized_tsc()) - return; - - /* * Set the maximum number of test runs to * 1 if the CPU does not provide the TSC_ADJUST MSR * 3 if the MSR is available, so the target can try to adjust @@ -368,16 +360,9 @@ void check_tsc_sync_source(int cpu) else atomic_set(&test_runs, 3); retry: - /* - * Wait for the target to start or to skip the test: - */ - while (atomic_read(&start_count) != cpus - 1) { - if (atomic_read(&skip_test) > 0) { - atomic_set(&skip_test, 0); - return; - } + /* Wait for the target to start. */ + while (atomic_read(&start_count) != cpus - 1) cpu_relax(); - } /* * Trigger the target to continue into the measurement too: @@ -397,14 +382,14 @@ retry: if (!nr_warps) { atomic_set(&test_runs, 0); - pr_debug("TSC synchronization [CPU#%d -> CPU#%d]: passed\n", + pr_debug("TSC synchronization [CPU#%d -> CPU#%u]: passed\n", smp_processor_id(), cpu); } else if (atomic_dec_and_test(&test_runs) || random_warps) { /* Force it to 0 if random warps brought us here */ atomic_set(&test_runs, 0); - pr_warn("TSC synchronization [CPU#%d -> CPU#%d]:\n", + pr_warn("TSC synchronization [CPU#%d -> CPU#%u]:\n", smp_processor_id(), cpu); pr_warn("Measured %Ld cycles TSC warp between CPUs, " "turning off TSC clock.\n", max_warp); @@ -457,11 +442,12 @@ void check_tsc_sync_target(void) * SoCs the TSC is frequency synchronized, but still the TSC ADJUST * register might have been wreckaged by the BIOS.. */ - if (tsc_store_and_check_tsc_adjust(false) || tsc_clocksource_reliable) { - atomic_inc(&skip_test); + if (tsc_store_and_check_tsc_adjust(false) || tsc_clocksource_reliable) return; - } + /* Kick the control CPU into the TSC synchronization function */ + smp_call_function_single(cpumask_first(cpu_online_mask), check_tsc_sync_source, + (unsigned long *)(unsigned long)cpu, 0); retry: /* * Register this CPU's participation and wait for the diff --git a/arch/x86/kernel/unwind_orc.c b/arch/x86/kernel/unwind_orc.c index 3ac50b7298d1..7e574cf3bf8a 100644 --- a/arch/x86/kernel/unwind_orc.c +++ b/arch/x86/kernel/unwind_orc.c @@ -7,14 +7,23 @@ #include <asm/unwind.h> #include <asm/orc_types.h> #include <asm/orc_lookup.h> +#include <asm/orc_header.h> + +ORC_HEADER; #define orc_warn(fmt, ...) \ printk_deferred_once(KERN_WARNING "WARNING: " fmt, ##__VA_ARGS__) #define orc_warn_current(args...) \ ({ \ - if (state->task == current && !state->error) \ + static bool dumped_before; \ + if (state->task == current && !state->error) { \ orc_warn(args); \ + if (unwind_debug && !dumped_before) { \ + dumped_before = true; \ + unwind_dump(state); \ + } \ + } \ }) extern int __start_orc_unwind_ip[]; @@ -23,8 +32,49 @@ extern struct orc_entry __start_orc_unwind[]; extern struct orc_entry __stop_orc_unwind[]; static bool orc_init __ro_after_init; +static bool unwind_debug __ro_after_init; static unsigned int lookup_num_blocks __ro_after_init; +static int __init unwind_debug_cmdline(char *str) +{ + unwind_debug = true; + + return 0; +} +early_param("unwind_debug", unwind_debug_cmdline); + +static void unwind_dump(struct unwind_state *state) +{ + static bool dumped_before; + unsigned long word, *sp; + struct stack_info stack_info = {0}; + unsigned long visit_mask = 0; + + if (dumped_before) + return; + + dumped_before = true; + + printk_deferred("unwind stack type:%d next_sp:%p mask:0x%lx graph_idx:%d\n", + state->stack_info.type, state->stack_info.next_sp, + state->stack_mask, state->graph_idx); + + for (sp = __builtin_frame_address(0); sp; + sp = PTR_ALIGN(stack_info.next_sp, sizeof(long))) { + if (get_stack_info(sp, state->task, &stack_info, &visit_mask)) + break; + + for (; sp < stack_info.end; sp++) { + + word = READ_ONCE_NOCHECK(*sp); + + printk_deferred("%0*lx: %0*lx (%pB)\n", BITS_PER_LONG/4, + (unsigned long)sp, BITS_PER_LONG/4, + word, (void *)word); + } + } +} + static inline unsigned long orc_ip(const int *ip) { return (unsigned long)ip + *ip; @@ -136,21 +186,6 @@ static struct orc_entry null_orc_entry = { .type = ORC_TYPE_CALL }; -#ifdef CONFIG_CALL_THUNKS -static struct orc_entry *orc_callthunk_find(unsigned long ip) -{ - if (!is_callthunk((void *)ip)) - return NULL; - - return &null_orc_entry; -} -#else -static struct orc_entry *orc_callthunk_find(unsigned long ip) -{ - return NULL; -} -#endif - /* Fake frame pointer entry -- used as a fallback for generated code */ static struct orc_entry orc_fp_entry = { .type = ORC_TYPE_CALL, @@ -203,11 +238,7 @@ static struct orc_entry *orc_find(unsigned long ip) if (orc) return orc; - orc = orc_ftrace_find(ip); - if (orc) - return orc; - - return orc_callthunk_find(ip); + return orc_ftrace_find(ip); } #ifdef CONFIG_MODULES @@ -219,7 +250,6 @@ static struct orc_entry *cur_orc_table = __start_orc_unwind; static void orc_sort_swap(void *_a, void *_b, int size) { struct orc_entry *orc_a, *orc_b; - struct orc_entry orc_tmp; int *a = _a, *b = _b, tmp; int delta = _b - _a; @@ -231,9 +261,7 @@ static void orc_sort_swap(void *_a, void *_b, int size) /* Swap the corresponding .orc_unwind entries: */ orc_a = cur_orc_table + (a - cur_orc_ip_table); orc_b = cur_orc_table + (b - cur_orc_ip_table); - orc_tmp = *orc_a; - *orc_a = *orc_b; - *orc_b = orc_tmp; + swap(*orc_a, *orc_b); } static int orc_sort_cmp(const void *_a, const void *_b) diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index 25f155205770..03c885d3640f 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S @@ -508,4 +508,8 @@ INIT_PER_CPU(irq_stack_backing_store); "fixed_percpu_data is not at start of per-cpu area"); #endif +#ifdef CONFIG_RETHUNK +. = ASSERT((__x86_return_thunk & 0x3f) == 0, "__x86_return_thunk not cacheline-aligned"); +#endif + #endif /* CONFIG_X86_64 */ diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c index d82f4fa2f1bf..a37ebd3b4773 100644 --- a/arch/x86/kernel/x86_init.c +++ b/arch/x86/kernel/x86_init.c @@ -126,12 +126,13 @@ struct x86_init_ops x86_init __initdata = { struct x86_cpuinit_ops x86_cpuinit = { .early_percpu_clock_init = x86_init_noop, .setup_percpu_clockev = setup_secondary_APIC_clock, + .parallel_bringup = true, }; static void default_nmi_init(void) { }; -static void enc_status_change_prepare_noop(unsigned long vaddr, int npages, bool enc) { } -static bool enc_status_change_finish_noop(unsigned long vaddr, int npages, bool enc) { return false; } +static bool enc_status_change_prepare_noop(unsigned long vaddr, int npages, bool enc) { return true; } +static bool enc_status_change_finish_noop(unsigned long vaddr, int npages, bool enc) { return true; } static bool enc_tlb_flush_required_noop(bool enc) { return false; } static bool enc_cache_flush_required_noop(void) { return false; } static bool is_private_mmio_noop(u64 addr) {return false; } diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 04b57a336b34..7f70207e8689 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -2799,14 +2799,13 @@ static u64 read_tsc(void) static inline u64 vgettsc(struct pvclock_clock *clock, u64 *tsc_timestamp, int *mode) { - long v; u64 tsc_pg_val; + long v; switch (clock->vclock_mode) { case VDSO_CLOCKMODE_HVCLOCK: - tsc_pg_val = hv_read_tsc_page_tsc(hv_get_tsc_page(), - tsc_timestamp); - if (tsc_pg_val != U64_MAX) { + if (hv_read_tsc_page_tsc(hv_get_tsc_page(), + tsc_timestamp, &tsc_pg_val)) { /* TSC page valid */ *mode = VDSO_CLOCKMODE_HVCLOCK; v = (tsc_pg_val - clock->cycle_last) & @@ -13162,7 +13161,7 @@ EXPORT_SYMBOL_GPL(kvm_arch_end_assignment); bool noinstr kvm_arch_has_assigned_device(struct kvm *kvm) { - return arch_atomic_read(&kvm->arch.assigned_device_count); + return raw_atomic_read(&kvm->arch.assigned_device_count); } EXPORT_SYMBOL_GPL(kvm_arch_has_assigned_device); diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile index 01932af64193..ea3a28e7b613 100644 --- a/arch/x86/lib/Makefile +++ b/arch/x86/lib/Makefile @@ -61,8 +61,9 @@ ifeq ($(CONFIG_X86_32),y) lib-y += strstr_32.o lib-y += string_32.o lib-y += memmove_32.o + lib-y += cmpxchg8b_emu.o ifneq ($(CONFIG_X86_CMPXCHG64),y) - lib-y += cmpxchg8b_emu.o atomic64_386_32.o + lib-y += atomic64_386_32.o endif else obj-y += iomap_copy_64.o diff --git a/arch/x86/lib/cmpxchg16b_emu.S b/arch/x86/lib/cmpxchg16b_emu.S index 33c70c0160ea..6962df315793 100644 --- a/arch/x86/lib/cmpxchg16b_emu.S +++ b/arch/x86/lib/cmpxchg16b_emu.S @@ -1,47 +1,54 @@ /* SPDX-License-Identifier: GPL-2.0-only */ #include <linux/linkage.h> #include <asm/percpu.h> +#include <asm/processor-flags.h> .text /* + * Emulate 'cmpxchg16b %gs:(%rsi)' + * * Inputs: * %rsi : memory location to compare * %rax : low 64 bits of old value * %rdx : high 64 bits of old value * %rbx : low 64 bits of new value * %rcx : high 64 bits of new value - * %al : Operation successful + * + * Notably this is not LOCK prefixed and is not safe against NMIs */ SYM_FUNC_START(this_cpu_cmpxchg16b_emu) -# -# Emulate 'cmpxchg16b %gs:(%rsi)' except we return the result in %al not -# via the ZF. Caller will access %al to get result. -# -# Note that this is only useful for a cpuops operation. Meaning that we -# do *not* have a fully atomic operation but just an operation that is -# *atomic* on a single cpu (as provided by the this_cpu_xx class of -# macros). -# pushfq cli - cmpq PER_CPU_VAR((%rsi)), %rax - jne .Lnot_same - cmpq PER_CPU_VAR(8(%rsi)), %rdx - jne .Lnot_same + /* if (*ptr == old) */ + cmpq PER_CPU_VAR(0(%rsi)), %rax + jne .Lnot_same + cmpq PER_CPU_VAR(8(%rsi)), %rdx + jne .Lnot_same - movq %rbx, PER_CPU_VAR((%rsi)) - movq %rcx, PER_CPU_VAR(8(%rsi)) + /* *ptr = new */ + movq %rbx, PER_CPU_VAR(0(%rsi)) + movq %rcx, PER_CPU_VAR(8(%rsi)) + + /* set ZF in EFLAGS to indicate success */ + orl $X86_EFLAGS_ZF, (%rsp) popfq - mov $1, %al RET .Lnot_same: + /* *ptr != old */ + + /* old = *ptr */ + movq PER_CPU_VAR(0(%rsi)), %rax + movq PER_CPU_VAR(8(%rsi)), %rdx + + /* clear ZF in EFLAGS to indicate failure */ + andl $(~X86_EFLAGS_ZF), (%rsp) + popfq - xor %al,%al RET SYM_FUNC_END(this_cpu_cmpxchg16b_emu) diff --git a/arch/x86/lib/cmpxchg8b_emu.S b/arch/x86/lib/cmpxchg8b_emu.S index 6a912d58fecc..49805257b125 100644 --- a/arch/x86/lib/cmpxchg8b_emu.S +++ b/arch/x86/lib/cmpxchg8b_emu.S @@ -2,10 +2,16 @@ #include <linux/linkage.h> #include <asm/export.h> +#include <asm/percpu.h> +#include <asm/processor-flags.h> .text +#ifndef CONFIG_X86_CMPXCHG64 + /* + * Emulate 'cmpxchg8b (%esi)' on UP + * * Inputs: * %esi : memory location to compare * %eax : low 32 bits of old value @@ -15,32 +21,65 @@ */ SYM_FUNC_START(cmpxchg8b_emu) -# -# Emulate 'cmpxchg8b (%esi)' on UP except we don't -# set the whole ZF thing (caller will just compare -# eax:edx with the expected value) -# pushfl cli - cmpl (%esi), %eax - jne .Lnot_same - cmpl 4(%esi), %edx - jne .Lhalf_same + cmpl 0(%esi), %eax + jne .Lnot_same + cmpl 4(%esi), %edx + jne .Lnot_same + + movl %ebx, 0(%esi) + movl %ecx, 4(%esi) - movl %ebx, (%esi) - movl %ecx, 4(%esi) + orl $X86_EFLAGS_ZF, (%esp) popfl RET .Lnot_same: - movl (%esi), %eax -.Lhalf_same: - movl 4(%esi), %edx + movl 0(%esi), %eax + movl 4(%esi), %edx + + andl $(~X86_EFLAGS_ZF), (%esp) popfl RET SYM_FUNC_END(cmpxchg8b_emu) EXPORT_SYMBOL(cmpxchg8b_emu) + +#endif + +#ifndef CONFIG_UML + +SYM_FUNC_START(this_cpu_cmpxchg8b_emu) + + pushfl + cli + + cmpl PER_CPU_VAR(0(%esi)), %eax + jne .Lnot_same2 + cmpl PER_CPU_VAR(4(%esi)), %edx + jne .Lnot_same2 + + movl %ebx, PER_CPU_VAR(0(%esi)) + movl %ecx, PER_CPU_VAR(4(%esi)) + + orl $X86_EFLAGS_ZF, (%esp) + + popfl + RET + +.Lnot_same2: + movl PER_CPU_VAR(0(%esi)), %eax + movl PER_CPU_VAR(4(%esi)), %edx + + andl $(~X86_EFLAGS_ZF), (%esp) + + popfl + RET + +SYM_FUNC_END(this_cpu_cmpxchg8b_emu) + +#endif diff --git a/arch/x86/lib/csum-partial_64.c b/arch/x86/lib/csum-partial_64.c index 50734a23034c..cea25ca8b8cf 100644 --- a/arch/x86/lib/csum-partial_64.c +++ b/arch/x86/lib/csum-partial_64.c @@ -5,22 +5,34 @@ * This file contains network checksum routines that are better done * in an architecture-specific manner due to speed. */ - + #include <linux/compiler.h> #include <linux/export.h> #include <asm/checksum.h> #include <asm/word-at-a-time.h> -static inline unsigned short from32to16(unsigned a) +static inline unsigned short from32to16(unsigned a) { - unsigned short b = a >> 16; + unsigned short b = a >> 16; asm("addw %w2,%w0\n\t" - "adcw $0,%w0\n" + "adcw $0,%w0\n" : "=r" (b) : "0" (b), "r" (a)); return b; } +static inline __wsum csum_tail(u64 temp64, int odd) +{ + unsigned int result; + + result = add32_with_carry(temp64 >> 32, temp64 & 0xffffffff); + if (unlikely(odd)) { + result = from32to16(result); + result = ((result >> 8) & 0xff) | ((result & 0xff) << 8); + } + return (__force __wsum)result; +} + /* * Do a checksum on an arbitrary memory area. * Returns a 32bit checksum. @@ -35,7 +47,7 @@ static inline unsigned short from32to16(unsigned a) __wsum csum_partial(const void *buff, int len, __wsum sum) { u64 temp64 = (__force u64)sum; - unsigned odd, result; + unsigned odd; odd = 1 & (unsigned long) buff; if (unlikely(odd)) { @@ -47,21 +59,52 @@ __wsum csum_partial(const void *buff, int len, __wsum sum) buff++; } - while (unlikely(len >= 64)) { + /* + * len == 40 is the hot case due to IPv6 headers, but annotating it likely() + * has noticeable negative affect on codegen for all other cases with + * minimal performance benefit here. + */ + if (len == 40) { asm("addq 0*8(%[src]),%[res]\n\t" "adcq 1*8(%[src]),%[res]\n\t" "adcq 2*8(%[src]),%[res]\n\t" "adcq 3*8(%[src]),%[res]\n\t" "adcq 4*8(%[src]),%[res]\n\t" - "adcq 5*8(%[src]),%[res]\n\t" - "adcq 6*8(%[src]),%[res]\n\t" - "adcq 7*8(%[src]),%[res]\n\t" "adcq $0,%[res]" - : [res] "+r" (temp64) - : [src] "r" (buff) - : "memory"); - buff += 64; - len -= 64; + : [res] "+r"(temp64) + : [src] "r"(buff), "m"(*(const char(*)[40])buff)); + return csum_tail(temp64, odd); + } + if (unlikely(len >= 64)) { + /* + * Extra accumulators for better ILP in the loop. + */ + u64 tmp_accum, tmp_carries; + + asm("xorl %k[tmp_accum],%k[tmp_accum]\n\t" + "xorl %k[tmp_carries],%k[tmp_carries]\n\t" + "subl $64, %[len]\n\t" + "1:\n\t" + "addq 0*8(%[src]),%[res]\n\t" + "adcq 1*8(%[src]),%[res]\n\t" + "adcq 2*8(%[src]),%[res]\n\t" + "adcq 3*8(%[src]),%[res]\n\t" + "adcl $0,%k[tmp_carries]\n\t" + "addq 4*8(%[src]),%[tmp_accum]\n\t" + "adcq 5*8(%[src]),%[tmp_accum]\n\t" + "adcq 6*8(%[src]),%[tmp_accum]\n\t" + "adcq 7*8(%[src]),%[tmp_accum]\n\t" + "adcl $0,%k[tmp_carries]\n\t" + "addq $64, %[src]\n\t" + "subl $64, %[len]\n\t" + "jge 1b\n\t" + "addq %[tmp_accum],%[res]\n\t" + "adcq %[tmp_carries],%[res]\n\t" + "adcq $0,%[res]" + : [tmp_accum] "=&r"(tmp_accum), + [tmp_carries] "=&r"(tmp_carries), [res] "+r"(temp64), + [len] "+r"(len), [src] "+r"(buff) + : "m"(*(const char *)buff)); } if (len & 32) { @@ -70,45 +113,37 @@ __wsum csum_partial(const void *buff, int len, __wsum sum) "adcq 2*8(%[src]),%[res]\n\t" "adcq 3*8(%[src]),%[res]\n\t" "adcq $0,%[res]" - : [res] "+r" (temp64) - : [src] "r" (buff) - : "memory"); + : [res] "+r"(temp64) + : [src] "r"(buff), "m"(*(const char(*)[32])buff)); buff += 32; } if (len & 16) { asm("addq 0*8(%[src]),%[res]\n\t" "adcq 1*8(%[src]),%[res]\n\t" "adcq $0,%[res]" - : [res] "+r" (temp64) - : [src] "r" (buff) - : "memory"); + : [res] "+r"(temp64) + : [src] "r"(buff), "m"(*(const char(*)[16])buff)); buff += 16; } if (len & 8) { asm("addq 0*8(%[src]),%[res]\n\t" "adcq $0,%[res]" - : [res] "+r" (temp64) - : [src] "r" (buff) - : "memory"); + : [res] "+r"(temp64) + : [src] "r"(buff), "m"(*(const char(*)[8])buff)); buff += 8; } if (len & 7) { - unsigned int shift = (8 - (len & 7)) * 8; + unsigned int shift = (-len << 3) & 63; unsigned long trail; trail = (load_unaligned_zeropad(buff) << shift) >> shift; asm("addq %[trail],%[res]\n\t" "adcq $0,%[res]" - : [res] "+r" (temp64) - : [trail] "r" (trail)); + : [res] "+r"(temp64) + : [trail] "r"(trail)); } - result = add32_with_carry(temp64 >> 32, temp64 & 0xffffffff); - if (unlikely(odd)) { - result = from32to16(result); - result = ((result >> 8) & 0xff) | ((result & 0xff) << 8); - } - return (__force __wsum)result; + return csum_tail(temp64, odd); } EXPORT_SYMBOL(csum_partial); @@ -118,6 +153,6 @@ EXPORT_SYMBOL(csum_partial); */ __sum16 ip_compute_csum(const void *buff, int len) { - return csum_fold(csum_partial(buff,len,0)); + return csum_fold(csum_partial(buff, len, 0)); } EXPORT_SYMBOL(ip_compute_csum); diff --git a/arch/x86/lib/getuser.S b/arch/x86/lib/getuser.S index b64a2bd1a1ef..9c63713477bb 100644 --- a/arch/x86/lib/getuser.S +++ b/arch/x86/lib/getuser.S @@ -143,43 +143,43 @@ SYM_FUNC_END(__get_user_nocheck_8) EXPORT_SYMBOL(__get_user_nocheck_8) -SYM_CODE_START_LOCAL(.Lbad_get_user_clac) +SYM_CODE_START_LOCAL(__get_user_handle_exception) ASM_CLAC .Lbad_get_user: xor %edx,%edx mov $(-EFAULT),%_ASM_AX RET -SYM_CODE_END(.Lbad_get_user_clac) +SYM_CODE_END(__get_user_handle_exception) #ifdef CONFIG_X86_32 -SYM_CODE_START_LOCAL(.Lbad_get_user_8_clac) +SYM_CODE_START_LOCAL(__get_user_8_handle_exception) ASM_CLAC bad_get_user_8: xor %edx,%edx xor %ecx,%ecx mov $(-EFAULT),%_ASM_AX RET -SYM_CODE_END(.Lbad_get_user_8_clac) +SYM_CODE_END(__get_user_8_handle_exception) #endif /* get_user */ - _ASM_EXTABLE(1b, .Lbad_get_user_clac) - _ASM_EXTABLE(2b, .Lbad_get_user_clac) - _ASM_EXTABLE(3b, .Lbad_get_user_clac) + _ASM_EXTABLE(1b, __get_user_handle_exception) + _ASM_EXTABLE(2b, __get_user_handle_exception) + _ASM_EXTABLE(3b, __get_user_handle_exception) #ifdef CONFIG_X86_64 - _ASM_EXTABLE(4b, .Lbad_get_user_clac) + _ASM_EXTABLE(4b, __get_user_handle_exception) #else - _ASM_EXTABLE(4b, .Lbad_get_user_8_clac) - _ASM_EXTABLE(5b, .Lbad_get_user_8_clac) + _ASM_EXTABLE(4b, __get_user_8_handle_exception) + _ASM_EXTABLE(5b, __get_user_8_handle_exception) #endif /* __get_user */ - _ASM_EXTABLE(6b, .Lbad_get_user_clac) - _ASM_EXTABLE(7b, .Lbad_get_user_clac) - _ASM_EXTABLE(8b, .Lbad_get_user_clac) + _ASM_EXTABLE(6b, __get_user_handle_exception) + _ASM_EXTABLE(7b, __get_user_handle_exception) + _ASM_EXTABLE(8b, __get_user_handle_exception) #ifdef CONFIG_X86_64 - _ASM_EXTABLE(9b, .Lbad_get_user_clac) + _ASM_EXTABLE(9b, __get_user_handle_exception) #else - _ASM_EXTABLE(9b, .Lbad_get_user_8_clac) - _ASM_EXTABLE(10b, .Lbad_get_user_8_clac) + _ASM_EXTABLE(9b, __get_user_8_handle_exception) + _ASM_EXTABLE(10b, __get_user_8_handle_exception) #endif diff --git a/arch/x86/lib/memmove_64.S b/arch/x86/lib/memmove_64.S index 02661861e5dd..0559b206fb11 100644 --- a/arch/x86/lib/memmove_64.S +++ b/arch/x86/lib/memmove_64.S @@ -38,10 +38,12 @@ SYM_FUNC_START(__memmove) cmp %rdi, %r8 jg 2f - /* FSRM implies ERMS => no length checks, do the copy directly */ +#define CHECK_LEN cmp $0x20, %rdx; jb 1f +#define MEMMOVE_BYTES movq %rdx, %rcx; rep movsb; RET .Lmemmove_begin_forward: - ALTERNATIVE "cmp $0x20, %rdx; jb 1f", "", X86_FEATURE_FSRM - ALTERNATIVE "", "jmp .Lmemmove_erms", X86_FEATURE_ERMS + ALTERNATIVE_2 __stringify(CHECK_LEN), \ + __stringify(CHECK_LEN; MEMMOVE_BYTES), X86_FEATURE_ERMS, \ + __stringify(MEMMOVE_BYTES), X86_FEATURE_FSRM /* * movsq instruction have many startup latency @@ -207,11 +209,6 @@ SYM_FUNC_START(__memmove) movb %r11b, (%rdi) 13: RET - -.Lmemmove_erms: - movq %rdx, %rcx - rep movsb - RET SYM_FUNC_END(__memmove) EXPORT_SYMBOL(__memmove) diff --git a/arch/x86/lib/msr.c b/arch/x86/lib/msr.c index b09cd2ad426c..47fd9bd6b91d 100644 --- a/arch/x86/lib/msr.c +++ b/arch/x86/lib/msr.c @@ -27,14 +27,14 @@ void msrs_free(struct msr *msrs) EXPORT_SYMBOL(msrs_free); /** - * Read an MSR with error handling - * + * msr_read - Read an MSR with error handling * @msr: MSR to read * @m: value to read into * * It returns read data only on success, otherwise it doesn't change the output * argument @m. * + * Return: %0 for success, otherwise an error code */ static int msr_read(u32 msr, struct msr *m) { @@ -49,10 +49,12 @@ static int msr_read(u32 msr, struct msr *m) } /** - * Write an MSR with error handling + * msr_write - Write an MSR with error handling * * @msr: MSR to write * @m: value to write + * + * Return: %0 for success, otherwise an error code */ static int msr_write(u32 msr, struct msr *m) { @@ -88,12 +90,14 @@ static inline int __flip_bit(u32 msr, u8 bit, bool set) } /** - * Set @bit in a MSR @msr. + * msr_set_bit - Set @bit in a MSR @msr. + * @msr: MSR to write + * @bit: bit number to set * - * Retval: - * < 0: An error was encountered. - * = 0: Bit was already set. - * > 0: Hardware accepted the MSR write. + * Return: + * * < 0: An error was encountered. + * * = 0: Bit was already set. + * * > 0: Hardware accepted the MSR write. */ int msr_set_bit(u32 msr, u8 bit) { @@ -101,12 +105,14 @@ int msr_set_bit(u32 msr, u8 bit) } /** - * Clear @bit in a MSR @msr. + * msr_clear_bit - Clear @bit in a MSR @msr. + * @msr: MSR to write + * @bit: bit number to clear * - * Retval: - * < 0: An error was encountered. - * = 0: Bit was already cleared. - * > 0: Hardware accepted the MSR write. + * Return: + * * < 0: An error was encountered. + * * = 0: Bit was already cleared. + * * > 0: Hardware accepted the MSR write. */ int msr_clear_bit(u32 msr, u8 bit) { diff --git a/arch/x86/lib/putuser.S b/arch/x86/lib/putuser.S index 3062d09a776d..1451e0c4ae22 100644 --- a/arch/x86/lib/putuser.S +++ b/arch/x86/lib/putuser.S @@ -131,22 +131,22 @@ SYM_FUNC_START(__put_user_nocheck_8) SYM_FUNC_END(__put_user_nocheck_8) EXPORT_SYMBOL(__put_user_nocheck_8) -SYM_CODE_START_LOCAL(.Lbad_put_user_clac) +SYM_CODE_START_LOCAL(__put_user_handle_exception) ASM_CLAC .Lbad_put_user: movl $-EFAULT,%ecx RET -SYM_CODE_END(.Lbad_put_user_clac) +SYM_CODE_END(__put_user_handle_exception) - _ASM_EXTABLE(1b, .Lbad_put_user_clac) - _ASM_EXTABLE(2b, .Lbad_put_user_clac) - _ASM_EXTABLE(3b, .Lbad_put_user_clac) - _ASM_EXTABLE(4b, .Lbad_put_user_clac) - _ASM_EXTABLE(5b, .Lbad_put_user_clac) - _ASM_EXTABLE(6b, .Lbad_put_user_clac) - _ASM_EXTABLE(7b, .Lbad_put_user_clac) - _ASM_EXTABLE(9b, .Lbad_put_user_clac) + _ASM_EXTABLE(1b, __put_user_handle_exception) + _ASM_EXTABLE(2b, __put_user_handle_exception) + _ASM_EXTABLE(3b, __put_user_handle_exception) + _ASM_EXTABLE(4b, __put_user_handle_exception) + _ASM_EXTABLE(5b, __put_user_handle_exception) + _ASM_EXTABLE(6b, __put_user_handle_exception) + _ASM_EXTABLE(7b, __put_user_handle_exception) + _ASM_EXTABLE(9b, __put_user_handle_exception) #ifdef CONFIG_X86_32 - _ASM_EXTABLE(8b, .Lbad_put_user_clac) - _ASM_EXTABLE(10b, .Lbad_put_user_clac) + _ASM_EXTABLE(8b, __put_user_handle_exception) + _ASM_EXTABLE(10b, __put_user_handle_exception) #endif diff --git a/arch/x86/lib/retpoline.S b/arch/x86/lib/retpoline.S index b3b1e376dce8..3fd066d42ec0 100644 --- a/arch/x86/lib/retpoline.S +++ b/arch/x86/lib/retpoline.S @@ -143,7 +143,7 @@ SYM_CODE_END(__x86_indirect_jump_thunk_array) * from re-poisioning the BTB prediction. */ .align 64 - .skip 63, 0xcc + .skip 64 - (__x86_return_thunk - zen_untrain_ret), 0xcc SYM_START(zen_untrain_ret, SYM_L_GLOBAL, SYM_A_NONE) ANNOTATE_NOENDBR /* diff --git a/arch/x86/lib/usercopy_64.c b/arch/x86/lib/usercopy_64.c index 003d90138e20..e9251b89a9e9 100644 --- a/arch/x86/lib/usercopy_64.c +++ b/arch/x86/lib/usercopy_64.c @@ -9,6 +9,7 @@ #include <linux/export.h> #include <linux/uaccess.h> #include <linux/highmem.h> +#include <linux/libnvdimm.h> /* * Zero Userspace diff --git a/arch/x86/math-emu/fpu_entry.c b/arch/x86/math-emu/fpu_entry.c index 7fe56c594aa6..91c52ead1226 100644 --- a/arch/x86/math-emu/fpu_entry.c +++ b/arch/x86/math-emu/fpu_entry.c @@ -32,6 +32,7 @@ #include <asm/traps.h> #include <asm/user.h> #include <asm/fpu/api.h> +#include <asm/fpu/regset.h> #include "fpu_system.h" #include "fpu_emu.h" diff --git a/arch/x86/mm/highmem_32.c b/arch/x86/mm/highmem_32.c index 2c54b76d8f84..d9efa35711ee 100644 --- a/arch/x86/mm/highmem_32.c +++ b/arch/x86/mm/highmem_32.c @@ -3,6 +3,7 @@ #include <linux/export.h> #include <linux/swap.h> /* for totalram_pages */ #include <linux/memblock.h> +#include <asm/numa.h> void __init set_highmem_pages_init(void) { diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index d4e2648a1dfb..b63403d7179d 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -45,7 +45,6 @@ #include <asm/olpc_ofw.h> #include <asm/pgalloc.h> #include <asm/sections.h> -#include <asm/paravirt.h> #include <asm/setup.h> #include <asm/set_memory.h> #include <asm/page_types.h> @@ -74,7 +73,6 @@ static pmd_t * __init one_md_table_init(pgd_t *pgd) #ifdef CONFIG_X86_PAE if (!(pgd_val(*pgd) & _PAGE_PRESENT)) { pmd_table = (pmd_t *)alloc_low_page(); - paravirt_alloc_pmd(&init_mm, __pa(pmd_table) >> PAGE_SHIFT); set_pgd(pgd, __pgd(__pa(pmd_table) | _PAGE_PRESENT)); p4d = p4d_offset(pgd, 0); pud = pud_offset(p4d, 0); @@ -99,7 +97,6 @@ static pte_t * __init one_page_table_init(pmd_t *pmd) if (!(pmd_val(*pmd) & _PAGE_PRESENT)) { pte_t *page_table = (pte_t *)alloc_low_page(); - paravirt_alloc_pte(&init_mm, __pa(page_table) >> PAGE_SHIFT); set_pmd(pmd, __pmd(__pa(page_table) | _PAGE_TABLE)); BUG_ON(page_table != pte_offset_kernel(pmd, 0)); } @@ -181,12 +178,10 @@ static pte_t *__init page_table_kmap_check(pte_t *pte, pmd_t *pmd, set_pte(newpte + i, pte[i]); *adr = (void *)(((unsigned long)(*adr)) + PAGE_SIZE); - paravirt_alloc_pte(&init_mm, __pa(newpte) >> PAGE_SHIFT); set_pmd(pmd, __pmd(__pa(newpte)|_PAGE_TABLE)); BUG_ON(newpte != pte_offset_kernel(pmd, 0)); __flush_tlb_all(); - paravirt_release_pte(__pa(pte) >> PAGE_SHIFT); pte = newpte; } BUG_ON(vaddr < fix_to_virt(FIX_KMAP_BEGIN - 1) @@ -482,7 +477,6 @@ void __init native_pagetable_init(void) pfn, pmd, __pa(pmd), pte, __pa(pte)); pte_clear(NULL, va, pte); } - paravirt_alloc_pmd(&init_mm, __pa(base) >> PAGE_SHIFT); paging_init(); } @@ -491,15 +485,8 @@ void __init native_pagetable_init(void) * point, we've been running on some set of pagetables constructed by * the boot process. * - * If we're booting on native hardware, this will be a pagetable - * constructed in arch/x86/kernel/head_32.S. The root of the - * pagetable will be swapper_pg_dir. - * - * If we're booting paravirtualized under a hypervisor, then there are - * more options: we may already be running PAE, and the pagetable may - * or may not be based in swapper_pg_dir. In any case, - * paravirt_pagetable_init() will set up swapper_pg_dir - * appropriately for the rest of the initialization to work. + * This will be a pagetable constructed in arch/x86/kernel/head_32.S. + * The root of the pagetable will be swapper_pg_dir. * * In general, pagetable_init() assumes that the pagetable may already * be partially populated, and so it avoids stomping on any existing diff --git a/arch/x86/mm/kaslr.c b/arch/x86/mm/kaslr.c index 557f0fe25dff..37db264866b6 100644 --- a/arch/x86/mm/kaslr.c +++ b/arch/x86/mm/kaslr.c @@ -172,10 +172,10 @@ void __meminit init_trampoline_kaslr(void) set_p4d(p4d_tramp, __p4d(_KERNPG_TABLE | __pa(pud_page_tramp))); - set_pgd(&trampoline_pgd_entry, - __pgd(_KERNPG_TABLE | __pa(p4d_page_tramp))); + trampoline_pgd_entry = + __pgd(_KERNPG_TABLE | __pa(p4d_page_tramp)); } else { - set_pgd(&trampoline_pgd_entry, - __pgd(_KERNPG_TABLE | __pa(pud_page_tramp))); + trampoline_pgd_entry = + __pgd(_KERNPG_TABLE | __pa(pud_page_tramp)); } } diff --git a/arch/x86/mm/mem_encrypt_amd.c b/arch/x86/mm/mem_encrypt_amd.c index e0b51c09109f..54bbd5163e8d 100644 --- a/arch/x86/mm/mem_encrypt_amd.c +++ b/arch/x86/mm/mem_encrypt_amd.c @@ -319,7 +319,7 @@ static void enc_dec_hypercall(unsigned long vaddr, int npages, bool enc) #endif } -static void amd_enc_status_change_prepare(unsigned long vaddr, int npages, bool enc) +static bool amd_enc_status_change_prepare(unsigned long vaddr, int npages, bool enc) { /* * To maintain the security guarantees of SEV-SNP guests, make sure @@ -327,6 +327,8 @@ static void amd_enc_status_change_prepare(unsigned long vaddr, int npages, bool */ if (cc_platform_has(CC_ATTR_GUEST_SEV_SNP) && !enc) snp_set_memory_shared(vaddr, npages); + + return true; } /* Return true unconditionally: return value doesn't matter for the SEV side */ @@ -501,6 +503,21 @@ void __init sme_early_init(void) x86_platform.guest.enc_status_change_finish = amd_enc_status_change_finish; x86_platform.guest.enc_tlb_flush_required = amd_enc_tlb_flush_required; x86_platform.guest.enc_cache_flush_required = amd_enc_cache_flush_required; + + /* + * AMD-SEV-ES intercepts the RDMSR to read the X2APIC ID in the + * parallel bringup low level code. That raises #VC which cannot be + * handled there. + * It does not provide a RDMSR GHCB protocol so the early startup + * code cannot directly communicate with the secure firmware. The + * alternative solution to retrieve the APIC ID via CPUID(0xb), + * which is covered by the GHCB protocol, is not viable either + * because there is no enforcement of the CPUID(0xb) provided + * "initial" APIC ID to be the same as the real APIC ID. + * Disable parallel bootup. + */ + if (sev_status & MSR_AMD64_SEV_ES_ENABLED) + x86_cpuinit.parallel_bringup = false; } void __init mem_encrypt_free_decrypted_mem(void) diff --git a/arch/x86/mm/mem_encrypt_identity.c b/arch/x86/mm/mem_encrypt_identity.c index c6efcf559d88..d73aeb16417f 100644 --- a/arch/x86/mm/mem_encrypt_identity.c +++ b/arch/x86/mm/mem_encrypt_identity.c @@ -188,7 +188,7 @@ static void __init sme_populate_pgd(struct sme_populate_pgd_data *ppd) if (pmd_large(*pmd)) return; - pte = pte_offset_map(pmd, ppd->vaddr); + pte = pte_offset_kernel(pmd, ppd->vaddr); if (pte_none(*pte)) set_pte(pte, __pte(ppd->paddr | ppd->pte_flags)); } @@ -612,7 +612,7 @@ void __init sme_enable(struct boot_params *bp) out: if (sme_me_mask) { physical_mask &= ~sme_me_mask; - cc_set_vendor(CC_VENDOR_AMD); + cc_vendor = CC_VENDOR_AMD; cc_set_mask(sme_me_mask); } } diff --git a/arch/x86/mm/pat/set_memory.c b/arch/x86/mm/pat/set_memory.c index 7159cf787613..df4182b6449f 100644 --- a/arch/x86/mm/pat/set_memory.c +++ b/arch/x86/mm/pat/set_memory.c @@ -9,6 +9,7 @@ #include <linux/mm.h> #include <linux/interrupt.h> #include <linux/seq_file.h> +#include <linux/proc_fs.h> #include <linux/debugfs.h> #include <linux/pfn.h> #include <linux/percpu.h> @@ -231,7 +232,7 @@ within_inclusive(unsigned long addr, unsigned long start, unsigned long end) * points to #2, but almost all physical-to-virtual translations point to #1. * * This is so that we can have both a directmap of all physical memory *and* - * take full advantage of the the limited (s32) immediate addressing range (2G) + * take full advantage of the limited (s32) immediate addressing range (2G) * of x86_64. * * See Documentation/arch/x86/x86_64/mm.rst for more detail. @@ -2151,7 +2152,8 @@ static int __set_memory_enc_pgtable(unsigned long addr, int numpages, bool enc) cpa_flush(&cpa, x86_platform.guest.enc_cache_flush_required()); /* Notify hypervisor that we are about to set/clr encryption attribute. */ - x86_platform.guest.enc_status_change_prepare(addr, numpages, enc); + if (!x86_platform.guest.enc_status_change_prepare(addr, numpages, enc)) + return -EIO; ret = __change_page_attr_set_clr(&cpa, 1); diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c index e4f499eb0f29..15a8009a4480 100644 --- a/arch/x86/mm/pgtable.c +++ b/arch/x86/mm/pgtable.c @@ -702,14 +702,8 @@ void p4d_clear_huge(p4d_t *p4d) * pud_set_huge - setup kernel PUD mapping * * MTRRs can override PAT memory types with 4KiB granularity. Therefore, this - * function sets up a huge page only if any of the following conditions are met: - * - * - MTRRs are disabled, or - * - * - MTRRs are enabled and the range is completely covered by a single MTRR, or - * - * - MTRRs are enabled and the corresponding MTRR memory type is WB, which - * has no effect on the requested PAT memory type. + * function sets up a huge page only if the complete range has the same MTRR + * caching mode. * * Callers should try to decrease page size (1GB -> 2MB -> 4K) if the bigger * page mapping attempt fails. @@ -718,11 +712,10 @@ void p4d_clear_huge(p4d_t *p4d) */ int pud_set_huge(pud_t *pud, phys_addr_t addr, pgprot_t prot) { - u8 mtrr, uniform; + u8 uniform; - mtrr = mtrr_type_lookup(addr, addr + PUD_SIZE, &uniform); - if ((mtrr != MTRR_TYPE_INVALID) && (!uniform) && - (mtrr != MTRR_TYPE_WRBACK)) + mtrr_type_lookup(addr, addr + PUD_SIZE, &uniform); + if (!uniform) return 0; /* Bail out if we are we on a populated non-leaf entry: */ @@ -745,11 +738,10 @@ int pud_set_huge(pud_t *pud, phys_addr_t addr, pgprot_t prot) */ int pmd_set_huge(pmd_t *pmd, phys_addr_t addr, pgprot_t prot) { - u8 mtrr, uniform; + u8 uniform; - mtrr = mtrr_type_lookup(addr, addr + PMD_SIZE, &uniform); - if ((mtrr != MTRR_TYPE_INVALID) && (!uniform) && - (mtrr != MTRR_TYPE_WRBACK)) { + mtrr_type_lookup(addr, addr + PMD_SIZE, &uniform); + if (!uniform) { pr_warn_once("%s: Cannot satisfy [mem %#010llx-%#010llx] with a huge-page mapping due to MTRR override.\n", __func__, addr, addr + PMD_SIZE); return 0; diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index 1056bbf55b17..438adb695daa 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -2570,7 +2570,7 @@ out_image: } if (bpf_jit_enable > 1) - bpf_jit_dump(prog->len, proglen, pass + 1, image); + bpf_jit_dump(prog->len, proglen, pass + 1, rw_image); if (image) { if (!prog->is_func || extra_pass) { diff --git a/arch/x86/pci/ce4100.c b/arch/x86/pci/ce4100.c index 584c25b588b4..87313701f069 100644 --- a/arch/x86/pci/ce4100.c +++ b/arch/x86/pci/ce4100.c @@ -83,7 +83,7 @@ static void ehci_reg_read(struct sim_dev_reg *reg, u32 *value) *value |= 0x100; } -void sata_revid_init(struct sim_dev_reg *reg) +static void sata_revid_init(struct sim_dev_reg *reg) { reg->sim_reg.value = 0x01060100; reg->sim_reg.mask = 0; @@ -172,7 +172,7 @@ static inline void extract_bytes(u32 *value, int reg, int len) *value &= mask; } -int bridge_read(unsigned int devfn, int reg, int len, u32 *value) +static int bridge_read(unsigned int devfn, int reg, int len, u32 *value) { u32 av_bridge_base, av_bridge_limit; int retval = 0; diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c index f3f2d87cce1b..e9f99c56f3ce 100644 --- a/arch/x86/platform/efi/efi.c +++ b/arch/x86/platform/efi/efi.c @@ -96,6 +96,9 @@ static const unsigned long * const efi_tables[] = { #ifdef CONFIG_EFI_COCO_SECRET &efi.coco_secret, #endif +#ifdef CONFIG_UNACCEPTED_MEMORY + &efi.unaccepted, +#endif }; u64 efi_setup; /* efi setup_data physical address */ diff --git a/arch/x86/platform/olpc/olpc_dt.c b/arch/x86/platform/olpc/olpc_dt.c index 75e3319e8bee..74ebd6882690 100644 --- a/arch/x86/platform/olpc/olpc_dt.c +++ b/arch/x86/platform/olpc/olpc_dt.c @@ -234,7 +234,7 @@ static int __init olpc_dt_compatible_match(phandle node, const char *compat) return 0; } -void __init olpc_dt_fixup(void) +static void __init olpc_dt_fixup(void) { phandle node; u32 board_rev; diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c index 7a4d5e911415..63230ff8cf4f 100644 --- a/arch/x86/power/cpu.c +++ b/arch/x86/power/cpu.c @@ -351,43 +351,6 @@ static int bsp_pm_callback(struct notifier_block *nb, unsigned long action, case PM_HIBERNATION_PREPARE: ret = bsp_check(); break; -#ifdef CONFIG_DEBUG_HOTPLUG_CPU0 - case PM_RESTORE_PREPARE: - /* - * When system resumes from hibernation, online CPU0 because - * 1. it's required for resume and - * 2. the CPU was online before hibernation - */ - if (!cpu_online(0)) - _debug_hotplug_cpu(0, 1); - break; - case PM_POST_RESTORE: - /* - * When a resume really happens, this code won't be called. - * - * This code is called only when user space hibernation software - * prepares for snapshot device during boot time. So we just - * call _debug_hotplug_cpu() to restore to CPU0's state prior to - * preparing the snapshot device. - * - * This works for normal boot case in our CPU0 hotplug debug - * mode, i.e. CPU0 is offline and user mode hibernation - * software initializes during boot time. - * - * If CPU0 is online and user application accesses snapshot - * device after boot time, this will offline CPU0 and user may - * see different CPU0 state before and after accessing - * the snapshot device. But hopefully this is not a case when - * user debugging CPU0 hotplug. Even if users hit this case, - * they can easily online CPU0 back. - * - * To simplify this debug code, we only consider normal boot - * case. Otherwise we need to remember CPU0's state and restore - * to that state and resolve racy conditions etc. - */ - _debug_hotplug_cpu(0, 0); - break; -#endif default: break; } diff --git a/arch/x86/purgatory/Makefile b/arch/x86/purgatory/Makefile index 42abd6af1198..c2a29be35c01 100644 --- a/arch/x86/purgatory/Makefile +++ b/arch/x86/purgatory/Makefile @@ -12,7 +12,7 @@ $(obj)/string.o: $(srctree)/arch/x86/boot/compressed/string.c FORCE $(obj)/sha256.o: $(srctree)/lib/crypto/sha256.c FORCE $(call if_changed_rule,cc_o_c) -CFLAGS_sha256.o := -D__DISABLE_EXPORTS +CFLAGS_sha256.o := -D__DISABLE_EXPORTS -D__NO_FORTIFY # When profile-guided optimization is enabled, llvm emits two different # overlapping text sections, which is not supported by kexec. Remove profile diff --git a/arch/x86/realmode/init.c b/arch/x86/realmode/init.c index af565816d2ba..788e5559549f 100644 --- a/arch/x86/realmode/init.c +++ b/arch/x86/realmode/init.c @@ -154,6 +154,9 @@ static void __init setup_real_mode(void) trampoline_header->flags = 0; + trampoline_lock = &trampoline_header->lock; + *trampoline_lock = 0; + trampoline_pgd = (u64 *) __va(real_mode_header->trampoline_pgd); /* Map the real mode stub as virtual == physical */ diff --git a/arch/x86/realmode/rm/trampoline_64.S b/arch/x86/realmode/rm/trampoline_64.S index e38d61d6562e..c9f76fae902e 100644 --- a/arch/x86/realmode/rm/trampoline_64.S +++ b/arch/x86/realmode/rm/trampoline_64.S @@ -37,6 +37,24 @@ .text .code16 +.macro LOCK_AND_LOAD_REALMODE_ESP lock_pa=0 + /* + * Make sure only one CPU fiddles with the realmode stack + */ +.Llock_rm\@: + .if \lock_pa + lock btsl $0, pa_tr_lock + .else + lock btsl $0, tr_lock + .endif + jnc 2f + pause + jmp .Llock_rm\@ +2: + # Setup stack + movl $rm_stack_end, %esp +.endm + .balign PAGE_SIZE SYM_CODE_START(trampoline_start) cli # We should be safe anyway @@ -49,8 +67,7 @@ SYM_CODE_START(trampoline_start) mov %ax, %es mov %ax, %ss - # Setup stack - movl $rm_stack_end, %esp + LOCK_AND_LOAD_REALMODE_ESP call verify_cpu # Verify the cpu supports long mode testl %eax, %eax # Check for return code @@ -93,8 +110,7 @@ SYM_CODE_START(sev_es_trampoline_start) mov %ax, %es mov %ax, %ss - # Setup stack - movl $rm_stack_end, %esp + LOCK_AND_LOAD_REALMODE_ESP jmp .Lswitch_to_protected SYM_CODE_END(sev_es_trampoline_start) @@ -177,7 +193,7 @@ SYM_CODE_START(pa_trampoline_compat) * In compatibility mode. Prep ESP and DX for startup_32, then disable * paging and complete the switch to legacy 32-bit mode. */ - movl $rm_stack_end, %esp + LOCK_AND_LOAD_REALMODE_ESP lock_pa=1 movw $__KERNEL_DS, %dx movl $(CR0_STATE & ~X86_CR0_PG), %eax @@ -241,6 +257,7 @@ SYM_DATA_START(trampoline_header) SYM_DATA(tr_efer, .space 8) SYM_DATA(tr_cr4, .space 4) SYM_DATA(tr_flags, .space 4) + SYM_DATA(tr_lock, .space 4) SYM_DATA_END(trampoline_header) #include "trampoline_common.S" diff --git a/arch/x86/video/fbdev.c b/arch/x86/video/fbdev.c index 9fd24846d094..9e9143085d19 100644 --- a/arch/x86/video/fbdev.c +++ b/arch/x86/video/fbdev.c @@ -10,6 +10,7 @@ #include <linux/pci.h> #include <linux/module.h> #include <linux/vgaarb.h> +#include <asm/fb.h> int fb_is_primary_device(struct fb_info *info) { diff --git a/arch/x86/xen/efi.c b/arch/x86/xen/efi.c index 7d7ffb9c826a..863d0d6b3edc 100644 --- a/arch/x86/xen/efi.c +++ b/arch/x86/xen/efi.c @@ -16,6 +16,8 @@ #include <asm/setup.h> #include <asm/xen/hypercall.h> +#include "xen-ops.h" + static efi_char16_t vendor[100] __initdata; static efi_system_table_t efi_systab_xen __initdata = { diff --git a/arch/x86/xen/enlighten_hvm.c b/arch/x86/xen/enlighten_hvm.c index c1cd28e915a3..a6820ca940bf 100644 --- a/arch/x86/xen/enlighten_hvm.c +++ b/arch/x86/xen/enlighten_hvm.c @@ -161,13 +161,12 @@ static int xen_cpu_up_prepare_hvm(unsigned int cpu) int rc = 0; /* - * This can happen if CPU was offlined earlier and - * offlining timed out in common_cpu_die(). + * If a CPU was offlined earlier and offlining timed out then the + * lock mechanism is still initialized. Uninit it unconditionally + * as it's safe to call even if already uninited. Interrupts and + * timer have already been handled in xen_cpu_dead_hvm(). */ - if (cpu_report_state(cpu) == CPU_DEAD_FROZEN) { - xen_smp_intr_free(cpu); - xen_uninit_lock_cpu(cpu); - } + xen_uninit_lock_cpu(cpu); if (cpu_acpi_id(cpu) != U32_MAX) per_cpu(xen_vcpu_id, cpu) = cpu_acpi_id(cpu); diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c index 093b78c8bbec..93b658248d01 100644 --- a/arch/x86/xen/enlighten_pv.c +++ b/arch/x86/xen/enlighten_pv.c @@ -68,6 +68,7 @@ #include <asm/reboot.h> #include <asm/hypervisor.h> #include <asm/mach_traps.h> +#include <asm/mtrr.h> #include <asm/mwait.h> #include <asm/pci_x86.h> #include <asm/cpu.h> @@ -119,6 +120,54 @@ static int __init parse_xen_msr_safe(char *str) } early_param("xen_msr_safe", parse_xen_msr_safe); +/* Get MTRR settings from Xen and put them into mtrr_state. */ +static void __init xen_set_mtrr_data(void) +{ +#ifdef CONFIG_MTRR + struct xen_platform_op op = { + .cmd = XENPF_read_memtype, + .interface_version = XENPF_INTERFACE_VERSION, + }; + unsigned int reg; + unsigned long mask; + uint32_t eax, width; + static struct mtrr_var_range var[MTRR_MAX_VAR_RANGES] __initdata; + + /* Get physical address width (only 64-bit cpus supported). */ + width = 36; + eax = cpuid_eax(0x80000000); + if ((eax >> 16) == 0x8000 && eax >= 0x80000008) { + eax = cpuid_eax(0x80000008); + width = eax & 0xff; + } + + for (reg = 0; reg < MTRR_MAX_VAR_RANGES; reg++) { + op.u.read_memtype.reg = reg; + if (HYPERVISOR_platform_op(&op)) + break; + + /* + * Only called in dom0, which has all RAM PFNs mapped at + * RAM MFNs, and all PCI space etc. is identity mapped. + * This means we can treat MFN == PFN regarding MTRR settings. + */ + var[reg].base_lo = op.u.read_memtype.type; + var[reg].base_lo |= op.u.read_memtype.mfn << PAGE_SHIFT; + var[reg].base_hi = op.u.read_memtype.mfn >> (32 - PAGE_SHIFT); + mask = ~((op.u.read_memtype.nr_mfns << PAGE_SHIFT) - 1); + mask &= (1UL << width) - 1; + if (mask) + mask |= MTRR_PHYSMASK_V; + var[reg].mask_lo = mask; + var[reg].mask_hi = mask >> 32; + } + + /* Only overwrite MTRR state if any MTRR could be got from Xen. */ + if (reg) + mtrr_overwrite_state(var, reg, MTRR_TYPE_UNCACHABLE); +#endif +} + static void __init xen_pv_init_platform(void) { /* PV guests can't operate virtio devices without grants. */ @@ -135,6 +184,11 @@ static void __init xen_pv_init_platform(void) /* pvclock is in shared info area */ xen_init_time_ops(); + + if (xen_initial_domain()) + xen_set_mtrr_data(); + else + mtrr_overwrite_state(NULL, 0, MTRR_TYPE_WRBACK); } static void __init xen_pv_guest_late_init(void) diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c index b3b8d289b9ab..e0a975165de7 100644 --- a/arch/x86/xen/mmu_pv.c +++ b/arch/x86/xen/mmu_pv.c @@ -86,6 +86,22 @@ #include "mmu.h" #include "debugfs.h" +/* + * Prototypes for functions called via PV_CALLEE_SAVE_REGS_THUNK() in order + * to avoid warnings with "-Wmissing-prototypes". + */ +pteval_t xen_pte_val(pte_t pte); +pgdval_t xen_pgd_val(pgd_t pgd); +pmdval_t xen_pmd_val(pmd_t pmd); +pudval_t xen_pud_val(pud_t pud); +p4dval_t xen_p4d_val(p4d_t p4d); +pte_t xen_make_pte(pteval_t pte); +pgd_t xen_make_pgd(pgdval_t pgd); +pmd_t xen_make_pmd(pmdval_t pmd); +pud_t xen_make_pud(pudval_t pud); +p4d_t xen_make_p4d(p4dval_t p4d); +pte_t xen_make_pte_init(pteval_t pte); + #ifdef CONFIG_X86_VSYSCALL_EMULATION /* l3 pud for userspace vsyscall mapping */ static pud_t level3_user_vsyscall[PTRS_PER_PUD] __page_aligned_bss; diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c index c2be3efb2ba0..8b5cf7bb1f47 100644 --- a/arch/x86/xen/setup.c +++ b/arch/x86/xen/setup.c @@ -6,6 +6,7 @@ */ #include <linux/init.h> +#include <linux/iscsi_ibft.h> #include <linux/sched.h> #include <linux/kstrtox.h> #include <linux/mm.h> @@ -764,17 +765,26 @@ char * __init xen_memory_setup(void) BUG_ON(memmap.nr_entries == 0); xen_e820_table.nr_entries = memmap.nr_entries; - /* - * Xen won't allow a 1:1 mapping to be created to UNUSABLE - * regions, so if we're using the machine memory map leave the - * region as RAM as it is in the pseudo-physical map. - * - * UNUSABLE regions in domUs are not handled and will need - * a patch in the future. - */ - if (xen_initial_domain()) + if (xen_initial_domain()) { + /* + * Xen won't allow a 1:1 mapping to be created to UNUSABLE + * regions, so if we're using the machine memory map leave the + * region as RAM as it is in the pseudo-physical map. + * + * UNUSABLE regions in domUs are not handled and will need + * a patch in the future. + */ xen_ignore_unusable(); +#ifdef CONFIG_ISCSI_IBFT_FIND + /* Reserve 0.5 MiB to 1 MiB region so iBFT can be found */ + xen_e820_table.entries[xen_e820_table.nr_entries].addr = IBFT_START; + xen_e820_table.entries[xen_e820_table.nr_entries].size = IBFT_END - IBFT_START; + xen_e820_table.entries[xen_e820_table.nr_entries].type = E820_TYPE_RESERVED; + xen_e820_table.nr_entries++; +#endif + } + /* Make sure the Xen-supplied memory map is well-ordered. */ e820__update_table(&xen_e820_table); diff --git a/arch/x86/xen/smp.h b/arch/x86/xen/smp.h index 22fb982ff971..c20cbb14c82b 100644 --- a/arch/x86/xen/smp.h +++ b/arch/x86/xen/smp.h @@ -2,6 +2,10 @@ #ifndef _XEN_SMP_H #ifdef CONFIG_SMP + +void asm_cpu_bringup_and_idle(void); +asmlinkage void cpu_bringup_and_idle(void); + extern void xen_send_IPI_mask(const struct cpumask *mask, int vector); extern void xen_send_IPI_mask_allbutself(const struct cpumask *mask, diff --git a/arch/x86/xen/smp_hvm.c b/arch/x86/xen/smp_hvm.c index b70afdff419c..ac95d1981cc0 100644 --- a/arch/x86/xen/smp_hvm.c +++ b/arch/x86/xen/smp_hvm.c @@ -55,18 +55,16 @@ static void __init xen_hvm_smp_prepare_cpus(unsigned int max_cpus) } #ifdef CONFIG_HOTPLUG_CPU -static void xen_hvm_cpu_die(unsigned int cpu) +static void xen_hvm_cleanup_dead_cpu(unsigned int cpu) { - if (common_cpu_die(cpu) == 0) { - if (xen_have_vector_callback) { - xen_smp_intr_free(cpu); - xen_uninit_lock_cpu(cpu); - xen_teardown_timer(cpu); - } + if (xen_have_vector_callback) { + xen_smp_intr_free(cpu); + xen_uninit_lock_cpu(cpu); + xen_teardown_timer(cpu); } } #else -static void xen_hvm_cpu_die(unsigned int cpu) +static void xen_hvm_cleanup_dead_cpu(unsigned int cpu) { BUG(); } @@ -77,7 +75,7 @@ void __init xen_hvm_smp_init(void) smp_ops.smp_prepare_boot_cpu = xen_hvm_smp_prepare_boot_cpu; smp_ops.smp_prepare_cpus = xen_hvm_smp_prepare_cpus; smp_ops.smp_cpus_done = xen_smp_cpus_done; - smp_ops.cpu_die = xen_hvm_cpu_die; + smp_ops.cleanup_dead_cpu = xen_hvm_cleanup_dead_cpu; if (!xen_have_vector_callback) { #ifdef CONFIG_PARAVIRT_SPINLOCKS diff --git a/arch/x86/xen/smp_pv.c b/arch/x86/xen/smp_pv.c index a9cf8c8fa074..d5ae5de2daa2 100644 --- a/arch/x86/xen/smp_pv.c +++ b/arch/x86/xen/smp_pv.c @@ -55,13 +55,13 @@ static DEFINE_PER_CPU(struct xen_common_irq, xen_irq_work) = { .irq = -1 }; static DEFINE_PER_CPU(struct xen_common_irq, xen_pmu_irq) = { .irq = -1 }; static irqreturn_t xen_irq_work_interrupt(int irq, void *dev_id); -void asm_cpu_bringup_and_idle(void); static void cpu_bringup(void) { int cpu; cr4_init(); + cpuhp_ap_sync_alive(); cpu_init(); touch_softlockup_watchdog(); @@ -83,7 +83,7 @@ static void cpu_bringup(void) set_cpu_online(cpu, true); - cpu_set_state_online(cpu); /* Implies full memory barrier. */ + smp_mb(); /* We can take interrupts now: we're officially "up". */ local_irq_enable(); @@ -254,15 +254,12 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle) struct desc_struct *gdt; unsigned long gdt_mfn; - /* used to tell cpu_init() that it can proceed with initialization */ - cpumask_set_cpu(cpu, cpu_callout_mask); if (cpumask_test_and_set_cpu(cpu, xen_cpu_initialized_map)) return 0; ctxt = kzalloc(sizeof(*ctxt), GFP_KERNEL); if (ctxt == NULL) { cpumask_clear_cpu(cpu, xen_cpu_initialized_map); - cpumask_clear_cpu(cpu, cpu_callout_mask); return -ENOMEM; } @@ -316,7 +313,7 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle) return 0; } -static int xen_pv_cpu_up(unsigned int cpu, struct task_struct *idle) +static int xen_pv_kick_ap(unsigned int cpu, struct task_struct *idle) { int rc; @@ -326,14 +323,6 @@ static int xen_pv_cpu_up(unsigned int cpu, struct task_struct *idle) xen_setup_runstate_info(cpu); - /* - * PV VCPUs are always successfully taken down (see 'while' loop - * in xen_cpu_die()), so -EBUSY is an error. - */ - rc = cpu_check_up_prepare(cpu); - if (rc) - return rc; - /* make sure interrupts start blocked */ per_cpu(xen_vcpu, cpu)->evtchn_upcall_mask = 1; @@ -343,15 +332,20 @@ static int xen_pv_cpu_up(unsigned int cpu, struct task_struct *idle) xen_pmu_init(cpu); - rc = HYPERVISOR_vcpu_op(VCPUOP_up, xen_vcpu_nr(cpu), NULL); - BUG_ON(rc); - - while (cpu_report_state(cpu) != CPU_ONLINE) - HYPERVISOR_sched_op(SCHEDOP_yield, NULL); + /* + * Why is this a BUG? If the hypercall fails then everything can be + * rolled back, no? + */ + BUG_ON(HYPERVISOR_vcpu_op(VCPUOP_up, xen_vcpu_nr(cpu), NULL)); return 0; } +static void xen_pv_poll_sync_state(void) +{ + HYPERVISOR_sched_op(SCHEDOP_yield, NULL); +} + #ifdef CONFIG_HOTPLUG_CPU static int xen_pv_cpu_disable(void) { @@ -367,18 +361,18 @@ static int xen_pv_cpu_disable(void) static void xen_pv_cpu_die(unsigned int cpu) { - while (HYPERVISOR_vcpu_op(VCPUOP_is_up, - xen_vcpu_nr(cpu), NULL)) { + while (HYPERVISOR_vcpu_op(VCPUOP_is_up, xen_vcpu_nr(cpu), NULL)) { __set_current_state(TASK_UNINTERRUPTIBLE); schedule_timeout(HZ/10); } +} - if (common_cpu_die(cpu) == 0) { - xen_smp_intr_free(cpu); - xen_uninit_lock_cpu(cpu); - xen_teardown_timer(cpu); - xen_pmu_finish(cpu); - } +static void xen_pv_cleanup_dead_cpu(unsigned int cpu) +{ + xen_smp_intr_free(cpu); + xen_uninit_lock_cpu(cpu); + xen_teardown_timer(cpu); + xen_pmu_finish(cpu); } static void __noreturn xen_pv_play_dead(void) /* used only with HOTPLUG_CPU */ @@ -400,6 +394,11 @@ static void xen_pv_cpu_die(unsigned int cpu) BUG(); } +static void xen_pv_cleanup_dead_cpu(unsigned int cpu) +{ + BUG(); +} + static void __noreturn xen_pv_play_dead(void) { BUG(); @@ -438,8 +437,10 @@ static const struct smp_ops xen_smp_ops __initconst = { .smp_prepare_cpus = xen_pv_smp_prepare_cpus, .smp_cpus_done = xen_smp_cpus_done, - .cpu_up = xen_pv_cpu_up, + .kick_ap_alive = xen_pv_kick_ap, .cpu_die = xen_pv_cpu_die, + .cleanup_dead_cpu = xen_pv_cleanup_dead_cpu, + .poll_sync_state = xen_pv_poll_sync_state, .cpu_disable = xen_pv_cpu_disable, .play_dead = xen_pv_play_dead, diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c index b74ac2562cfb..52fa5609b7f6 100644 --- a/arch/x86/xen/time.c +++ b/arch/x86/xen/time.c @@ -66,11 +66,10 @@ static noinstr u64 xen_sched_clock(void) struct pvclock_vcpu_time_info *src; u64 ret; - preempt_disable_notrace(); src = &__this_cpu_read(xen_vcpu)->time; ret = pvclock_clocksource_read_nowd(src); ret -= xen_sched_clock_offset; - preempt_enable_notrace(); + return ret; } diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h index a10903785a33..408a2aa66c69 100644 --- a/arch/x86/xen/xen-ops.h +++ b/arch/x86/xen/xen-ops.h @@ -72,8 +72,6 @@ void xen_restore_time_memory_area(void); void xen_init_time_ops(void); void xen_hvm_init_time_ops(void); -irqreturn_t xen_debug_interrupt(int irq, void *dev_id); - bool xen_vcpu_stolen(int vcpu); void xen_vcpu_setup(int cpu); @@ -148,9 +146,12 @@ int xen_cpuhp_setup(int (*cpu_up_prepare_cb)(unsigned int), void xen_pin_vcpu(int cpu); void xen_emergency_restart(void); +void xen_force_evtchn_callback(void); + #ifdef CONFIG_XEN_PV void xen_pv_pre_suspend(void); void xen_pv_post_suspend(int suspend_cancelled); +void xen_start_kernel(struct start_info *si); #else static inline void xen_pv_pre_suspend(void) {} static inline void xen_pv_post_suspend(int suspend_cancelled) {} diff --git a/arch/xtensa/Kconfig b/arch/xtensa/Kconfig index 2d0d6440b979..2a51a466779f 100644 --- a/arch/xtensa/Kconfig +++ b/arch/xtensa/Kconfig @@ -16,7 +16,6 @@ config XTENSA select ARCH_USE_MEMTEST select ARCH_USE_QUEUED_RWLOCKS select ARCH_USE_QUEUED_SPINLOCKS - select ARCH_WANT_FRAME_POINTERS select ARCH_WANT_IPC_PARSE_VERSION select BUILDTIME_TABLE_SORT select CLONE_BACKWARDS @@ -35,6 +34,7 @@ config XTENSA select HAVE_ARCH_KCSAN select HAVE_ARCH_SECCOMP_FILTER select HAVE_ARCH_TRACEHOOK + select HAVE_ASM_MODVERSIONS select HAVE_CONTEXT_TRACKING_USER select HAVE_DEBUG_KMEMLEAK select HAVE_DMA_CONTIGUOUS @@ -204,6 +204,18 @@ config XTENSA_UNALIGNED_USER Say Y here to enable unaligned memory access in user space. +config XTENSA_LOAD_STORE + bool "Load/store exception handler for memory only readable with l32" + help + The Xtensa architecture only allows reading memory attached to its + instruction bus with l32r and l32i instructions, all other + instructions raise an exception with the LoadStoreErrorCause code. + This makes it hard to use some configurations, e.g. store string + literals in FLASH memory attached to the instruction bus. + + Say Y here to enable exception handler that allows transparent + byte and 2-byte access to memory attached to instruction bus. + config HAVE_SMP bool "System Supports SMP (MX)" depends on XTENSA_VARIANT_CUSTOM diff --git a/arch/xtensa/Kconfig.debug b/arch/xtensa/Kconfig.debug index 83cc8d12fa0e..e84172a7763c 100644 --- a/arch/xtensa/Kconfig.debug +++ b/arch/xtensa/Kconfig.debug @@ -38,3 +38,11 @@ config PRINT_STACK_DEPTH help This option allows you to set the stack depth that the kernel prints in stack traces. + +config PRINT_USER_CODE_ON_UNHANDLED_EXCEPTION + bool "Dump user code around unhandled exception address" + help + Enable this option to display user code around PC of the unhandled + exception (starting at address aligned on 16 byte boundary). + This may simplify finding faulting code in the absence of other + debug facilities. diff --git a/arch/xtensa/boot/boot-redboot/Makefile b/arch/xtensa/boot/boot-redboot/Makefile index 1d1d46215b1c..c0eef3f6f32d 100644 --- a/arch/xtensa/boot/boot-redboot/Makefile +++ b/arch/xtensa/boot/boot-redboot/Makefile @@ -6,16 +6,12 @@ OBJCOPY_ARGS := -O $(if $(CONFIG_CPU_BIG_ENDIAN),elf32-xtensa-be,elf32-xtensa-le) -LD_ARGS = -T $(srctree)/$(obj)/boot.ld - boot-y := bootstrap.o targets += $(boot-y) OBJS := $(addprefix $(obj)/,$(boot-y)) LIBS := arch/xtensa/boot/lib/lib.a arch/xtensa/lib/lib.a -LIBGCC := $(shell $(CC) $(KBUILD_CFLAGS) -print-libgcc-file-name) - $(obj)/zImage.o: $(obj)/../vmlinux.bin.gz $(OBJS) $(Q)$(OBJCOPY) $(OBJCOPY_ARGS) -R .comment \ --add-section image=$< \ @@ -23,7 +19,10 @@ $(obj)/zImage.o: $(obj)/../vmlinux.bin.gz $(OBJS) $(OBJS) $@ $(obj)/zImage.elf: $(obj)/zImage.o $(LIBS) - $(Q)$(LD) $(LD_ARGS) -o $@ $^ -L/xtensa-elf/lib $(LIBGCC) + $(Q)$(LD) $(KBUILD_LDFLAGS) \ + -T $(srctree)/$(obj)/boot.ld \ + --build-id=none \ + -o $@ $^ $(obj)/../zImage.redboot: $(obj)/zImage.elf $(Q)$(OBJCOPY) -S -O binary $< $@ diff --git a/arch/xtensa/include/asm/asm-prototypes.h b/arch/xtensa/include/asm/asm-prototypes.h new file mode 100644 index 000000000000..b0da61812b85 --- /dev/null +++ b/arch/xtensa/include/asm/asm-prototypes.h @@ -0,0 +1,29 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __ASM_PROTOTYPES_H +#define __ASM_PROTOTYPES_H + +#include <asm/cacheflush.h> +#include <asm/checksum.h> +#include <asm/ftrace.h> +#include <asm/page.h> +#include <asm/string.h> +#include <asm/uaccess.h> + +#include <asm-generic/asm-prototypes.h> + +/* + * gcc internal math functions + */ +long long __ashrdi3(long long, int); +long long __ashldi3(long long, int); +long long __bswapdi2(long long); +int __bswapsi2(int); +long long __lshrdi3(long long, int); +int __divsi3(int, int); +int __modsi3(int, int); +int __mulsi3(int, int); +unsigned int __udivsi3(unsigned int, unsigned int); +unsigned int __umodsi3(unsigned int, unsigned int); +unsigned long long __umulsidi3(unsigned int, unsigned int); + +#endif /* __ASM_PROTOTYPES_H */ diff --git a/arch/xtensa/include/asm/asmmacro.h b/arch/xtensa/include/asm/asmmacro.h index e3474ca411ff..01bf7d9dbb19 100644 --- a/arch/xtensa/include/asm/asmmacro.h +++ b/arch/xtensa/include/asm/asmmacro.h @@ -11,6 +11,7 @@ #ifndef _XTENSA_ASMMACRO_H #define _XTENSA_ASMMACRO_H +#include <asm-generic/export.h> #include <asm/core.h> /* diff --git a/arch/xtensa/include/asm/atomic.h b/arch/xtensa/include/asm/atomic.h index 52da614f953c..7308b7f777d7 100644 --- a/arch/xtensa/include/asm/atomic.h +++ b/arch/xtensa/include/asm/atomic.h @@ -245,6 +245,11 @@ static inline int arch_atomic_fetch_##op(int i, atomic_t * v) \ ATOMIC_OPS(add) ATOMIC_OPS(sub) +#define arch_atomic_add_return arch_atomic_add_return +#define arch_atomic_sub_return arch_atomic_sub_return +#define arch_atomic_fetch_add arch_atomic_fetch_add +#define arch_atomic_fetch_sub arch_atomic_fetch_sub + #undef ATOMIC_OPS #define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_FETCH_OP(op) @@ -252,12 +257,13 @@ ATOMIC_OPS(and) ATOMIC_OPS(or) ATOMIC_OPS(xor) +#define arch_atomic_fetch_and arch_atomic_fetch_and +#define arch_atomic_fetch_or arch_atomic_fetch_or +#define arch_atomic_fetch_xor arch_atomic_fetch_xor + #undef ATOMIC_OPS #undef ATOMIC_FETCH_OP #undef ATOMIC_OP_RETURN #undef ATOMIC_OP -#define arch_atomic_cmpxchg(v, o, n) ((int)arch_cmpxchg(&((v)->counter), (o), (n))) -#define arch_atomic_xchg(v, new) (arch_xchg(&((v)->counter), new)) - #endif /* _XTENSA_ATOMIC_H */ diff --git a/arch/xtensa/include/asm/bugs.h b/arch/xtensa/include/asm/bugs.h deleted file mode 100644 index 69b29d198249..000000000000 --- a/arch/xtensa/include/asm/bugs.h +++ /dev/null @@ -1,18 +0,0 @@ -/* - * include/asm-xtensa/bugs.h - * - * This is included by init/main.c to check for architecture-dependent bugs. - * - * Xtensa processors don't have any bugs. :) - * - * This file is subject to the terms and conditions of the GNU General - * Public License. See the file "COPYING" in the main directory of - * this archive for more details. - */ - -#ifndef _XTENSA_BUGS_H -#define _XTENSA_BUGS_H - -static void check_bugs(void) { } - -#endif /* _XTENSA_BUGS_H */ diff --git a/arch/xtensa/include/asm/core.h b/arch/xtensa/include/asm/core.h index f856d2bcb9f3..0e1bb6f019d6 100644 --- a/arch/xtensa/include/asm/core.h +++ b/arch/xtensa/include/asm/core.h @@ -26,6 +26,14 @@ #define XCHAL_SPANNING_WAY 0 #endif +#ifndef XCHAL_HAVE_TRAX +#define XCHAL_HAVE_TRAX 0 +#endif + +#ifndef XCHAL_NUM_PERF_COUNTERS +#define XCHAL_NUM_PERF_COUNTERS 0 +#endif + #if XCHAL_HAVE_WINDOWED #if defined(CONFIG_USER_ABI_DEFAULT) || defined(CONFIG_USER_ABI_CALL0_PROBE) /* Whether windowed ABI is supported in userspace. */ diff --git a/arch/xtensa/include/asm/ftrace.h b/arch/xtensa/include/asm/ftrace.h index 6c6d9a9f185f..0ea4f84cd558 100644 --- a/arch/xtensa/include/asm/ftrace.h +++ b/arch/xtensa/include/asm/ftrace.h @@ -13,17 +13,8 @@ #include <asm/processor.h> #ifndef __ASSEMBLY__ -#define ftrace_return_address0 ({ unsigned long a0, a1; \ - __asm__ __volatile__ ( \ - "mov %0, a0\n" \ - "mov %1, a1\n" \ - : "=r"(a0), "=r"(a1)); \ - MAKE_PC_FROM_RA(a0, a1); }) - -#ifdef CONFIG_FRAME_POINTER extern unsigned long return_address(unsigned level); #define ftrace_return_address(n) return_address(n) -#endif #endif /* __ASSEMBLY__ */ #ifdef CONFIG_FUNCTION_TRACER diff --git a/arch/xtensa/include/asm/platform.h b/arch/xtensa/include/asm/platform.h index 354ca942de40..94f13fabf7cd 100644 --- a/arch/xtensa/include/asm/platform.h +++ b/arch/xtensa/include/asm/platform.h @@ -28,31 +28,11 @@ extern void platform_init(bp_tag_t*); extern void platform_setup (char **); /* - * platform_restart is called to restart the system. - */ -extern void platform_restart (void); - -/* - * platform_halt is called to stop the system and halt. - */ -extern void platform_halt (void); - -/* - * platform_power_off is called to stop the system and power it off. - */ -extern void platform_power_off (void); - -/* * platform_idle is called from the idle function. */ extern void platform_idle (void); /* - * platform_heartbeat is called every HZ - */ -extern void platform_heartbeat (void); - -/* * platform_calibrate_ccount calibrates cpu clock freq (CONFIG_XTENSA_CALIBRATE) */ extern void platform_calibrate_ccount (void); diff --git a/arch/xtensa/include/asm/string.h b/arch/xtensa/include/asm/string.h index 89b51a0c752f..ffce43513fa2 100644 --- a/arch/xtensa/include/asm/string.h +++ b/arch/xtensa/include/asm/string.h @@ -118,9 +118,6 @@ extern void *__memcpy(void *__to, __const__ void *__from, size_t __n); extern void *memmove(void *__dest, __const__ void *__src, size_t __n); extern void *__memmove(void *__dest, __const__ void *__src, size_t __n); -/* Don't build bcopy at all ... */ -#define __HAVE_ARCH_BCOPY - #if defined(CONFIG_KASAN) && !defined(__SANITIZE_ADDRESS__) /* diff --git a/arch/xtensa/include/asm/traps.h b/arch/xtensa/include/asm/traps.h index 6f74ccc0c7ea..212c3b9ff407 100644 --- a/arch/xtensa/include/asm/traps.h +++ b/arch/xtensa/include/asm/traps.h @@ -47,6 +47,7 @@ __init trap_set_handler(int cause, xtensa_exception_handler *handler); asmlinkage void fast_illegal_instruction_user(void); asmlinkage void fast_syscall_user(void); asmlinkage void fast_alloca(void); +asmlinkage void fast_load_store(void); asmlinkage void fast_unaligned(void); asmlinkage void fast_second_level_miss(void); asmlinkage void fast_store_prohibited(void); @@ -64,8 +65,14 @@ void do_unhandled(struct pt_regs *regs); static inline void __init early_trap_init(void) { static struct exc_table init_exc_table __initdata = { +#ifdef CONFIG_XTENSA_LOAD_STORE + .fast_kernel_handler[EXCCAUSE_LOAD_STORE_ERROR] = + fast_load_store, +#endif +#ifdef CONFIG_MMU .fast_kernel_handler[EXCCAUSE_DTLB_MISS] = fast_second_level_miss, +#endif }; xtensa_set_sr(&init_exc_table, excsave1); } diff --git a/arch/xtensa/kernel/align.S b/arch/xtensa/kernel/align.S index d062c732ef18..20d6b4961001 100644 --- a/arch/xtensa/kernel/align.S +++ b/arch/xtensa/kernel/align.S @@ -22,7 +22,17 @@ #include <asm/asmmacro.h> #include <asm/processor.h> -#if XCHAL_UNALIGNED_LOAD_EXCEPTION || XCHAL_UNALIGNED_STORE_EXCEPTION +#if XCHAL_UNALIGNED_LOAD_EXCEPTION || defined CONFIG_XTENSA_LOAD_STORE +#define LOAD_EXCEPTION_HANDLER +#endif + +#if XCHAL_UNALIGNED_STORE_EXCEPTION || defined LOAD_EXCEPTION_HANDLER +#define ANY_EXCEPTION_HANDLER +#endif + +#if XCHAL_HAVE_WINDOWED +#define UNALIGNED_USER_EXCEPTION +#endif /* First-level exception handler for unaligned exceptions. * @@ -58,10 +68,6 @@ * BE shift left / mask 0 0 X X */ -#if XCHAL_HAVE_WINDOWED -#define UNALIGNED_USER_EXCEPTION -#endif - #if XCHAL_HAVE_BE #define HWORD_START 16 @@ -103,7 +109,7 @@ * * 23 0 * ----------------------------- - * res 0000 0010 + * L8UI xxxx xxxx 0000 ssss tttt 0010 * L16UI xxxx xxxx 0001 ssss tttt 0010 * L32I xxxx xxxx 0010 ssss tttt 0010 * XXX 0011 ssss tttt 0010 @@ -128,9 +134,11 @@ #define OP0_L32I_N 0x8 /* load immediate narrow */ #define OP0_S32I_N 0x9 /* store immediate narrow */ +#define OP0_LSAI 0x2 /* load/store */ #define OP1_SI_MASK 0x4 /* OP1 bit set for stores */ #define OP1_SI_BIT 2 /* OP1 bit number for stores */ +#define OP1_L8UI 0x0 #define OP1_L32I 0x2 #define OP1_L16UI 0x1 #define OP1_L16SI 0x9 @@ -155,60 +163,74 @@ */ .literal_position -ENTRY(fast_unaligned) +#ifdef CONFIG_XTENSA_LOAD_STORE +ENTRY(fast_load_store) - /* Note: We don't expect the address to be aligned on a word - * boundary. After all, the processor generated that exception - * and it would be a hardware fault. - */ + call0 .Lsave_and_load_instruction - /* Save some working register */ + /* Analyze the instruction (load or store?). */ - s32i a4, a2, PT_AREG4 - s32i a5, a2, PT_AREG5 - s32i a6, a2, PT_AREG6 - s32i a7, a2, PT_AREG7 - s32i a8, a2, PT_AREG8 + extui a0, a4, INSN_OP0, 4 # get insn.op0 nibble - rsr a0, depc - s32i a0, a2, PT_AREG2 - s32i a3, a2, PT_AREG3 +#if XCHAL_HAVE_DENSITY + _beqi a0, OP0_L32I_N, 1f # L32I.N, jump +#endif + bnei a0, OP0_LSAI, .Linvalid_instruction + /* 'store indicator bit' set, jump */ + bbsi.l a4, OP1_SI_BIT + INSN_OP1, .Linvalid_instruction - rsr a3, excsave1 - movi a4, fast_unaligned_fixup - s32i a4, a3, EXC_TABLE_FIXUP +1: + movi a3, ~3 + and a3, a3, a8 # align memory address - /* Keep value of SAR in a0 */ + __ssa8 a8 - rsr a0, sar - rsr a8, excvaddr # load unaligned memory address +#ifdef CONFIG_MMU + /* l32e can't be used here even when it's available. */ + /* TODO access_ok(a3) could be used here */ + j .Linvalid_instruction +#endif + l32i a5, a3, 0 + l32i a6, a3, 4 + __src_b a3, a5, a6 # a3 has the data word - /* Now, identify one of the following load/store instructions. - * - * The only possible danger of a double exception on the - * following l32i instructions is kernel code in vmalloc - * memory. The processor was just executing at the EPC_1 - * address, and indeed, already fetched the instruction. That - * guarantees a TLB mapping, which hasn't been replaced by - * this unaligned exception handler that uses only static TLB - * mappings. However, high-level interrupt handlers might - * modify TLB entries, so for the generic case, we register a - * TABLE_FIXUP handler here, too. - */ +#if XCHAL_HAVE_DENSITY + addi a7, a7, 2 # increment PC (assume 16-bit insn) + _beqi a0, OP0_L32I_N, .Lload_w# l32i.n: jump + addi a7, a7, 1 +#else + addi a7, a7, 3 +#endif - /* a3...a6 saved on stack, a2 = SP */ + extui a5, a4, INSN_OP1, 4 + _beqi a5, OP1_L32I, .Lload_w + bnei a5, OP1_L8UI, .Lload16 + extui a3, a3, 0, 8 + j .Lload_w - /* Extract the instruction that caused the unaligned access. */ +ENDPROC(fast_load_store) +#endif - rsr a7, epc1 # load exception address - movi a3, ~3 - and a3, a3, a7 # mask lower bits +/* + * Entry condition: + * + * a0: trashed, original value saved on stack (PT_AREG0) + * a1: a1 + * a2: new stack pointer, original in DEPC + * a3: a3 + * depc: a2, original value saved on stack (PT_DEPC) + * excsave_1: dispatch table + * + * PT_DEPC >= VALID_DOUBLE_EXCEPTION_ADDRESS: double exception, DEPC + * < VALID_DOUBLE_EXCEPTION_ADDRESS: regular exception + */ - l32i a4, a3, 0 # load 2 words - l32i a5, a3, 4 +#ifdef ANY_EXCEPTION_HANDLER +ENTRY(fast_unaligned) - __ssa8 a7 - __src_b a4, a4, a5 # a4 has the instruction +#if XCHAL_UNALIGNED_LOAD_EXCEPTION || XCHAL_UNALIGNED_STORE_EXCEPTION + + call0 .Lsave_and_load_instruction /* Analyze the instruction (load or store?). */ @@ -222,12 +244,17 @@ ENTRY(fast_unaligned) /* 'store indicator bit' not set, jump */ _bbci.l a4, OP1_SI_BIT + INSN_OP1, .Lload +#endif +#if XCHAL_UNALIGNED_STORE_EXCEPTION + /* Store: Jump to table entry to get the value in the source register.*/ .Lstore:movi a5, .Lstore_table # table extui a6, a4, INSN_T, 4 # get source register addx8 a5, a6, a5 jx a5 # jump into table +#endif +#if XCHAL_UNALIGNED_LOAD_EXCEPTION /* Load: Load memory address. */ @@ -249,7 +276,7 @@ ENTRY(fast_unaligned) addi a7, a7, 2 # increment PC (assume 16-bit insn) extui a5, a4, INSN_OP0, 4 - _beqi a5, OP0_L32I_N, 1f # l32i.n: jump + _beqi a5, OP0_L32I_N, .Lload_w# l32i.n: jump addi a7, a7, 1 #else @@ -257,21 +284,26 @@ ENTRY(fast_unaligned) #endif extui a5, a4, INSN_OP1, 4 - _beqi a5, OP1_L32I, 1f # l32i: jump - + _beqi a5, OP1_L32I, .Lload_w # l32i: jump +#endif +#ifdef LOAD_EXCEPTION_HANDLER +.Lload16: extui a3, a3, 0, 16 # extract lower 16 bits - _beqi a5, OP1_L16UI, 1f + _beqi a5, OP1_L16UI, .Lload_w addi a5, a5, -OP1_L16SI - _bnez a5, .Linvalid_instruction_load + _bnez a5, .Linvalid_instruction /* sign extend value */ - +#if XCHAL_HAVE_SEXT + sext a3, a3, 15 +#else slli a3, a3, 16 srai a3, a3, 16 +#endif /* Set target register. */ -1: +.Lload_w: extui a4, a4, INSN_T, 4 # extract target register movi a5, .Lload_table addx8 a4, a4, a5 @@ -295,30 +327,32 @@ ENTRY(fast_unaligned) mov a13, a3 ; _j .Lexit; .align 8 mov a14, a3 ; _j .Lexit; .align 8 mov a15, a3 ; _j .Lexit; .align 8 - +#endif +#if XCHAL_UNALIGNED_STORE_EXCEPTION .Lstore_table: - l32i a3, a2, PT_AREG0; _j 1f; .align 8 - mov a3, a1; _j 1f; .align 8 # fishy?? - l32i a3, a2, PT_AREG2; _j 1f; .align 8 - l32i a3, a2, PT_AREG3; _j 1f; .align 8 - l32i a3, a2, PT_AREG4; _j 1f; .align 8 - l32i a3, a2, PT_AREG5; _j 1f; .align 8 - l32i a3, a2, PT_AREG6; _j 1f; .align 8 - l32i a3, a2, PT_AREG7; _j 1f; .align 8 - l32i a3, a2, PT_AREG8; _j 1f; .align 8 - mov a3, a9 ; _j 1f; .align 8 - mov a3, a10 ; _j 1f; .align 8 - mov a3, a11 ; _j 1f; .align 8 - mov a3, a12 ; _j 1f; .align 8 - mov a3, a13 ; _j 1f; .align 8 - mov a3, a14 ; _j 1f; .align 8 - mov a3, a15 ; _j 1f; .align 8 + l32i a3, a2, PT_AREG0; _j .Lstore_w; .align 8 + mov a3, a1; _j .Lstore_w; .align 8 # fishy?? + l32i a3, a2, PT_AREG2; _j .Lstore_w; .align 8 + l32i a3, a2, PT_AREG3; _j .Lstore_w; .align 8 + l32i a3, a2, PT_AREG4; _j .Lstore_w; .align 8 + l32i a3, a2, PT_AREG5; _j .Lstore_w; .align 8 + l32i a3, a2, PT_AREG6; _j .Lstore_w; .align 8 + l32i a3, a2, PT_AREG7; _j .Lstore_w; .align 8 + l32i a3, a2, PT_AREG8; _j .Lstore_w; .align 8 + mov a3, a9 ; _j .Lstore_w; .align 8 + mov a3, a10 ; _j .Lstore_w; .align 8 + mov a3, a11 ; _j .Lstore_w; .align 8 + mov a3, a12 ; _j .Lstore_w; .align 8 + mov a3, a13 ; _j .Lstore_w; .align 8 + mov a3, a14 ; _j .Lstore_w; .align 8 + mov a3, a15 ; _j .Lstore_w; .align 8 +#endif +#ifdef ANY_EXCEPTION_HANDLER /* We cannot handle this exception. */ .extern _kernel_exception -.Linvalid_instruction_load: -.Linvalid_instruction_store: +.Linvalid_instruction: movi a4, 0 rsr a3, excsave1 @@ -326,6 +360,7 @@ ENTRY(fast_unaligned) /* Restore a4...a8 and SAR, set SP, and jump to default exception. */ + l32i a0, a2, PT_SAR l32i a8, a2, PT_AREG8 l32i a7, a2, PT_AREG7 l32i a6, a2, PT_AREG6 @@ -342,9 +377,11 @@ ENTRY(fast_unaligned) 2: movi a0, _user_exception jx a0 +#endif +#if XCHAL_UNALIGNED_STORE_EXCEPTION -1: # a7: instruction pointer, a4: instruction, a3: value - + # a7: instruction pointer, a4: instruction, a3: value +.Lstore_w: movi a6, 0 # mask: ffffffff:00000000 #if XCHAL_HAVE_DENSITY @@ -361,7 +398,7 @@ ENTRY(fast_unaligned) extui a5, a4, INSN_OP1, 4 # extract OP1 _beqi a5, OP1_S32I, 1f # jump if 32 bit store - _bnei a5, OP1_S16I, .Linvalid_instruction_store + _bnei a5, OP1_S16I, .Linvalid_instruction movi a5, -1 __extl a3, a3 # get 16-bit value @@ -406,7 +443,8 @@ ENTRY(fast_unaligned) #else s32i a6, a4, 4 #endif - +#endif +#ifdef ANY_EXCEPTION_HANDLER .Lexit: #if XCHAL_HAVE_LOOPS rsr a4, lend # check if we reached LEND @@ -434,6 +472,7 @@ ENTRY(fast_unaligned) /* Restore working register */ + l32i a0, a2, PT_SAR l32i a8, a2, PT_AREG8 l32i a7, a2, PT_AREG7 l32i a6, a2, PT_AREG6 @@ -448,6 +487,59 @@ ENTRY(fast_unaligned) l32i a2, a2, PT_AREG2 rfe + .align 4 +.Lsave_and_load_instruction: + + /* Save some working register */ + + s32i a3, a2, PT_AREG3 + s32i a4, a2, PT_AREG4 + s32i a5, a2, PT_AREG5 + s32i a6, a2, PT_AREG6 + s32i a7, a2, PT_AREG7 + s32i a8, a2, PT_AREG8 + + rsr a4, depc + s32i a4, a2, PT_AREG2 + + rsr a5, sar + s32i a5, a2, PT_SAR + + rsr a3, excsave1 + movi a4, fast_unaligned_fixup + s32i a4, a3, EXC_TABLE_FIXUP + + rsr a8, excvaddr # load unaligned memory address + + /* Now, identify one of the following load/store instructions. + * + * The only possible danger of a double exception on the + * following l32i instructions is kernel code in vmalloc + * memory. The processor was just executing at the EPC_1 + * address, and indeed, already fetched the instruction. That + * guarantees a TLB mapping, which hasn't been replaced by + * this unaligned exception handler that uses only static TLB + * mappings. However, high-level interrupt handlers might + * modify TLB entries, so for the generic case, we register a + * TABLE_FIXUP handler here, too. + */ + + /* a3...a6 saved on stack, a2 = SP */ + + /* Extract the instruction that caused the unaligned access. */ + + rsr a7, epc1 # load exception address + movi a3, ~3 + and a3, a3, a7 # mask lower bits + + l32i a4, a3, 0 # load 2 words + l32i a5, a3, 4 + + __ssa8 a7 + __src_b a4, a4, a5 # a4 has the instruction + + ret +#endif ENDPROC(fast_unaligned) ENTRY(fast_unaligned_fixup) @@ -459,10 +551,11 @@ ENTRY(fast_unaligned_fixup) l32i a7, a2, PT_AREG7 l32i a6, a2, PT_AREG6 l32i a5, a2, PT_AREG5 - l32i a4, a2, PT_AREG4 + l32i a4, a2, PT_SAR l32i a0, a2, PT_AREG2 - xsr a0, depc # restore depc and a0 - wsr a0, sar + wsr a4, sar + wsr a0, depc # restore depc and a0 + l32i a4, a2, PT_AREG4 rsr a0, exccause s32i a0, a2, PT_DEPC # mark as a regular exception @@ -483,5 +576,4 @@ ENTRY(fast_unaligned_fixup) jx a0 ENDPROC(fast_unaligned_fixup) - -#endif /* XCHAL_UNALIGNED_LOAD_EXCEPTION || XCHAL_UNALIGNED_STORE_EXCEPTION */ +#endif diff --git a/arch/xtensa/kernel/mcount.S b/arch/xtensa/kernel/mcount.S index 51daaf4e0b82..309b3298258f 100644 --- a/arch/xtensa/kernel/mcount.S +++ b/arch/xtensa/kernel/mcount.S @@ -78,6 +78,7 @@ ENTRY(_mcount) #error Unsupported Xtensa ABI #endif ENDPROC(_mcount) +EXPORT_SYMBOL(_mcount) ENTRY(ftrace_stub) abi_entry_default diff --git a/arch/xtensa/kernel/platform.c b/arch/xtensa/kernel/platform.c index ac1e0e566995..926b8bf0f14c 100644 --- a/arch/xtensa/kernel/platform.c +++ b/arch/xtensa/kernel/platform.c @@ -17,27 +17,28 @@ #include <asm/platform.h> #include <asm/timex.h> -#define _F(r,f,a,b) \ - r __platform_##f a b; \ - r platform_##f a __attribute__((weak, alias("__platform_"#f))) - /* * Default functions that are used if no platform specific function is defined. - * (Please, refer to include/asm-xtensa/platform.h for more information) + * (Please, refer to arch/xtensa/include/asm/platform.h for more information) */ -_F(void, init, (bp_tag_t *first), { }); -_F(void, setup, (char** cmd), { }); -_F(void, restart, (void), { while(1); }); -_F(void, halt, (void), { while(1); }); -_F(void, power_off, (void), { while(1); }); -_F(void, idle, (void), { __asm__ __volatile__ ("waiti 0" ::: "memory"); }); -_F(void, heartbeat, (void), { }); +void __weak __init platform_init(bp_tag_t *first) +{ +} + +void __weak __init platform_setup(char **cmd) +{ +} + +void __weak platform_idle(void) +{ + __asm__ __volatile__ ("waiti 0" ::: "memory"); +} #ifdef CONFIG_XTENSA_CALIBRATE_CCOUNT -_F(void, calibrate_ccount, (void), +void __weak platform_calibrate_ccount(void) { pr_err("ERROR: Cannot calibrate cpu frequency! Assuming 10MHz.\n"); ccount_freq = 10 * 1000000UL; -}); +} #endif diff --git a/arch/xtensa/kernel/setup.c b/arch/xtensa/kernel/setup.c index 9191738f9941..aba3ff4e60d8 100644 --- a/arch/xtensa/kernel/setup.c +++ b/arch/xtensa/kernel/setup.c @@ -22,6 +22,7 @@ #include <linux/screen_info.h> #include <linux/kernel.h> #include <linux/percpu.h> +#include <linux/reboot.h> #include <linux/cpu.h> #include <linux/of.h> #include <linux/of_fdt.h> @@ -46,6 +47,7 @@ #include <asm/smp.h> #include <asm/sysmem.h> #include <asm/timex.h> +#include <asm/traps.h> #if defined(CONFIG_VGA_CONSOLE) || defined(CONFIG_DUMMY_CONSOLE) struct screen_info screen_info = { @@ -241,6 +243,12 @@ void __init early_init_devtree(void *params) void __init init_arch(bp_tag_t *bp_start) { + /* Initialize basic exception handling if configuration may need it */ + + if (IS_ENABLED(CONFIG_KASAN) || + IS_ENABLED(CONFIG_XTENSA_LOAD_STORE)) + early_trap_init(); + /* Initialize MMU. */ init_mmu(); @@ -522,19 +530,30 @@ void cpu_reset(void) void machine_restart(char * cmd) { - platform_restart(); + local_irq_disable(); + smp_send_stop(); + do_kernel_restart(cmd); + pr_err("Reboot failed -- System halted\n"); + while (1) + cpu_relax(); } void machine_halt(void) { - platform_halt(); - while (1); + local_irq_disable(); + smp_send_stop(); + do_kernel_power_off(); + while (1) + cpu_relax(); } void machine_power_off(void) { - platform_power_off(); - while (1); + local_irq_disable(); + smp_send_stop(); + do_kernel_power_off(); + while (1) + cpu_relax(); } #ifdef CONFIG_PROC_FS @@ -574,6 +593,12 @@ c_show(struct seq_file *f, void *slot) # if XCHAL_HAVE_OCD "ocd " # endif +#if XCHAL_HAVE_TRAX + "trax " +#endif +#if XCHAL_NUM_PERF_COUNTERS + "perf " +#endif #endif #if XCHAL_HAVE_DENSITY "density " @@ -623,11 +648,13 @@ c_show(struct seq_file *f, void *slot) seq_printf(f,"physical aregs\t: %d\n" "misc regs\t: %d\n" "ibreak\t\t: %d\n" - "dbreak\t\t: %d\n", + "dbreak\t\t: %d\n" + "perf counters\t: %d\n", XCHAL_NUM_AREGS, XCHAL_NUM_MISC_REGS, XCHAL_NUM_IBREAK, - XCHAL_NUM_DBREAK); + XCHAL_NUM_DBREAK, + XCHAL_NUM_PERF_COUNTERS); /* Interrupt. */ diff --git a/arch/xtensa/kernel/stacktrace.c b/arch/xtensa/kernel/stacktrace.c index 7f7755cd28f0..f643ea5e36da 100644 --- a/arch/xtensa/kernel/stacktrace.c +++ b/arch/xtensa/kernel/stacktrace.c @@ -237,8 +237,6 @@ EXPORT_SYMBOL_GPL(save_stack_trace); #endif -#ifdef CONFIG_FRAME_POINTER - struct return_addr_data { unsigned long addr; unsigned skip; @@ -271,5 +269,3 @@ unsigned long return_address(unsigned level) return r.addr; } EXPORT_SYMBOL(return_address); - -#endif diff --git a/arch/xtensa/kernel/syscalls/syscall.tbl b/arch/xtensa/kernel/syscalls/syscall.tbl index 52c94ab5c205..2b69c3c035b6 100644 --- a/arch/xtensa/kernel/syscalls/syscall.tbl +++ b/arch/xtensa/kernel/syscalls/syscall.tbl @@ -421,3 +421,4 @@ 448 common process_mrelease sys_process_mrelease 449 common futex_waitv sys_futex_waitv 450 common set_mempolicy_home_node sys_set_mempolicy_home_node +451 common cachestat sys_cachestat diff --git a/arch/xtensa/kernel/time.c b/arch/xtensa/kernel/time.c index 16b8a6273772..1c3dfea843ec 100644 --- a/arch/xtensa/kernel/time.c +++ b/arch/xtensa/kernel/time.c @@ -121,10 +121,6 @@ static irqreturn_t timer_interrupt(int irq, void *dev_id) set_linux_timer(get_linux_timer()); evt->event_handler(evt); - - /* Allow platform to do something useful (Wdog). */ - platform_heartbeat(); - return IRQ_HANDLED; } diff --git a/arch/xtensa/kernel/traps.c b/arch/xtensa/kernel/traps.c index f0a7d1c2641e..17eb180eff7c 100644 --- a/arch/xtensa/kernel/traps.c +++ b/arch/xtensa/kernel/traps.c @@ -54,9 +54,10 @@ static void do_interrupt(struct pt_regs *regs); #if XTENSA_FAKE_NMI static void do_nmi(struct pt_regs *regs); #endif -#if XCHAL_UNALIGNED_LOAD_EXCEPTION || XCHAL_UNALIGNED_STORE_EXCEPTION -static void do_unaligned_user(struct pt_regs *regs); +#ifdef CONFIG_XTENSA_LOAD_STORE +static void do_load_store(struct pt_regs *regs); #endif +static void do_unaligned_user(struct pt_regs *regs); static void do_multihit(struct pt_regs *regs); #if XTENSA_HAVE_COPROCESSORS static void do_coprocessor(struct pt_regs *regs); @@ -91,7 +92,10 @@ static dispatch_init_table_t __initdata dispatch_init_table[] = { { EXCCAUSE_SYSTEM_CALL, USER, fast_syscall_user }, { EXCCAUSE_SYSTEM_CALL, 0, system_call }, /* EXCCAUSE_INSTRUCTION_FETCH unhandled */ -/* EXCCAUSE_LOAD_STORE_ERROR unhandled*/ +#ifdef CONFIG_XTENSA_LOAD_STORE +{ EXCCAUSE_LOAD_STORE_ERROR, USER|KRNL, fast_load_store }, +{ EXCCAUSE_LOAD_STORE_ERROR, 0, do_load_store }, +#endif { EXCCAUSE_LEVEL1_INTERRUPT, 0, do_interrupt }, #ifdef SUPPORT_WINDOWED { EXCCAUSE_ALLOCA, USER|KRNL, fast_alloca }, @@ -102,9 +106,9 @@ static dispatch_init_table_t __initdata dispatch_init_table[] = { #ifdef CONFIG_XTENSA_UNALIGNED_USER { EXCCAUSE_UNALIGNED, USER, fast_unaligned }, #endif -{ EXCCAUSE_UNALIGNED, 0, do_unaligned_user }, { EXCCAUSE_UNALIGNED, KRNL, fast_unaligned }, #endif +{ EXCCAUSE_UNALIGNED, 0, do_unaligned_user }, #ifdef CONFIG_MMU { EXCCAUSE_ITLB_MISS, 0, do_page_fault }, { EXCCAUSE_ITLB_MISS, USER|KRNL, fast_second_level_miss}, @@ -171,6 +175,23 @@ __die_if_kernel(const char *str, struct pt_regs *regs, long err) die(str, regs, err); } +#ifdef CONFIG_PRINT_USER_CODE_ON_UNHANDLED_EXCEPTION +static inline void dump_user_code(struct pt_regs *regs) +{ + char buf[32]; + + if (copy_from_user(buf, (void __user *)(regs->pc & -16), sizeof(buf)) == 0) { + print_hex_dump(KERN_INFO, " ", DUMP_PREFIX_NONE, + 32, 1, buf, sizeof(buf), false); + + } +} +#else +static inline void dump_user_code(struct pt_regs *regs) +{ +} +#endif + /* * Unhandled Exceptions. Kill user task or panic if in kernel space. */ @@ -186,6 +207,7 @@ void do_unhandled(struct pt_regs *regs) "\tEXCCAUSE is %ld\n", current->comm, task_pid_nr(current), regs->pc, regs->exccause); + dump_user_code(regs); force_sig(SIGILL); } @@ -349,6 +371,19 @@ static void do_div0(struct pt_regs *regs) force_sig_fault(SIGFPE, FPE_INTDIV, (void __user *)regs->pc); } +#ifdef CONFIG_XTENSA_LOAD_STORE +static void do_load_store(struct pt_regs *regs) +{ + __die_if_kernel("Unhandled load/store exception in kernel", + regs, SIGKILL); + + pr_info_ratelimited("Load/store error to %08lx in '%s' (pid = %d, pc = %#010lx)\n", + regs->excvaddr, current->comm, + task_pid_nr(current), regs->pc); + force_sig_fault(SIGBUS, BUS_ADRERR, (void *)regs->excvaddr); +} +#endif + /* * Handle unaligned memory accesses from user space. Kill task. * @@ -356,7 +391,6 @@ static void do_div0(struct pt_regs *regs) * accesses causes from user space. */ -#if XCHAL_UNALIGNED_LOAD_EXCEPTION || XCHAL_UNALIGNED_STORE_EXCEPTION static void do_unaligned_user(struct pt_regs *regs) { __die_if_kernel("Unhandled unaligned exception in kernel", @@ -368,7 +402,6 @@ static void do_unaligned_user(struct pt_regs *regs) task_pid_nr(current), regs->pc); force_sig_fault(SIGBUS, BUS_ADRALN, (void *) regs->excvaddr); } -#endif #if XTENSA_HAVE_COPROCESSORS static void do_coprocessor(struct pt_regs *regs) @@ -534,31 +567,58 @@ static void show_trace(struct task_struct *task, unsigned long *sp, } #define STACK_DUMP_ENTRY_SIZE 4 -#define STACK_DUMP_LINE_SIZE 32 +#define STACK_DUMP_LINE_SIZE 16 static size_t kstack_depth_to_print = CONFIG_PRINT_STACK_DEPTH; -void show_stack(struct task_struct *task, unsigned long *sp, const char *loglvl) +struct stack_fragment { - size_t len, off = 0; - - if (!sp) - sp = stack_pointer(task); + size_t len; + size_t off; + u8 *sp; + const char *loglvl; +}; - len = min((-(size_t)sp) & (THREAD_SIZE - STACK_DUMP_ENTRY_SIZE), - kstack_depth_to_print * STACK_DUMP_ENTRY_SIZE); +static int show_stack_fragment_cb(struct stackframe *frame, void *data) +{ + struct stack_fragment *sf = data; - printk("%sStack:\n", loglvl); - while (off < len) { + while (sf->off < sf->len) { u8 line[STACK_DUMP_LINE_SIZE]; - size_t line_len = len - off > STACK_DUMP_LINE_SIZE ? - STACK_DUMP_LINE_SIZE : len - off; + size_t line_len = sf->len - sf->off > STACK_DUMP_LINE_SIZE ? + STACK_DUMP_LINE_SIZE : sf->len - sf->off; + bool arrow = sf->off == 0; - __memcpy(line, (u8 *)sp + off, line_len); - print_hex_dump(loglvl, " ", DUMP_PREFIX_NONE, + if (frame && frame->sp == (unsigned long)(sf->sp + sf->off)) + arrow = true; + + __memcpy(line, sf->sp + sf->off, line_len); + print_hex_dump(sf->loglvl, arrow ? "> " : " ", DUMP_PREFIX_NONE, STACK_DUMP_LINE_SIZE, STACK_DUMP_ENTRY_SIZE, line, line_len, false); - off += STACK_DUMP_LINE_SIZE; + sf->off += STACK_DUMP_LINE_SIZE; + if (arrow) + return 0; } + return 1; +} + +void show_stack(struct task_struct *task, unsigned long *sp, const char *loglvl) +{ + struct stack_fragment sf; + + if (!sp) + sp = stack_pointer(task); + + sf.len = min((-(size_t)sp) & (THREAD_SIZE - STACK_DUMP_ENTRY_SIZE), + kstack_depth_to_print * STACK_DUMP_ENTRY_SIZE); + sf.off = 0; + sf.sp = (u8 *)sp; + sf.loglvl = loglvl; + + printk("%sStack:\n", loglvl); + walk_stackframe(sp, show_stack_fragment_cb, &sf); + while (sf.off < sf.len) + show_stack_fragment_cb(NULL, &sf); show_trace(task, sp, loglvl); } diff --git a/arch/xtensa/kernel/xtensa_ksyms.c b/arch/xtensa/kernel/xtensa_ksyms.c index 17a7ef86fd0d..62d81e76e18e 100644 --- a/arch/xtensa/kernel/xtensa_ksyms.c +++ b/arch/xtensa/kernel/xtensa_ksyms.c @@ -13,71 +13,10 @@ */ #include <linux/module.h> -#include <linux/string.h> -#include <linux/mm.h> -#include <linux/interrupt.h> -#include <asm/irq.h> -#include <linux/in6.h> - -#include <linux/uaccess.h> -#include <asm/cacheflush.h> -#include <asm/checksum.h> -#include <asm/dma.h> -#include <asm/io.h> -#include <asm/page.h> -#include <asm/ftrace.h> -#ifdef CONFIG_BLK_DEV_FD -#include <asm/floppy.h> -#endif -#ifdef CONFIG_NET -#include <net/checksum.h> -#endif /* CONFIG_NET */ - - -/* - * String functions - */ -EXPORT_SYMBOL(memset); -EXPORT_SYMBOL(memcpy); -EXPORT_SYMBOL(memmove); -EXPORT_SYMBOL(__memset); -EXPORT_SYMBOL(__memcpy); -EXPORT_SYMBOL(__memmove); -#ifdef CONFIG_ARCH_HAS_STRNCPY_FROM_USER -EXPORT_SYMBOL(__strncpy_user); -#endif -EXPORT_SYMBOL(clear_page); -EXPORT_SYMBOL(copy_page); +#include <asm/pgtable.h> EXPORT_SYMBOL(empty_zero_page); -/* - * gcc internal math functions - */ -extern long long __ashrdi3(long long, int); -extern long long __ashldi3(long long, int); -extern long long __bswapdi2(long long); -extern int __bswapsi2(int); -extern long long __lshrdi3(long long, int); -extern int __divsi3(int, int); -extern int __modsi3(int, int); -extern int __mulsi3(int, int); -extern unsigned int __udivsi3(unsigned int, unsigned int); -extern unsigned int __umodsi3(unsigned int, unsigned int); -extern unsigned long long __umulsidi3(unsigned int, unsigned int); - -EXPORT_SYMBOL(__ashldi3); -EXPORT_SYMBOL(__ashrdi3); -EXPORT_SYMBOL(__bswapdi2); -EXPORT_SYMBOL(__bswapsi2); -EXPORT_SYMBOL(__lshrdi3); -EXPORT_SYMBOL(__divsi3); -EXPORT_SYMBOL(__modsi3); -EXPORT_SYMBOL(__mulsi3); -EXPORT_SYMBOL(__udivsi3); -EXPORT_SYMBOL(__umodsi3); -EXPORT_SYMBOL(__umulsidi3); - unsigned int __sync_fetch_and_and_4(volatile void *p, unsigned int v) { BUG(); @@ -89,35 +28,3 @@ unsigned int __sync_fetch_and_or_4(volatile void *p, unsigned int v) BUG(); } EXPORT_SYMBOL(__sync_fetch_and_or_4); - -/* - * Networking support - */ -EXPORT_SYMBOL(csum_partial); -EXPORT_SYMBOL(csum_partial_copy_generic); - -/* - * Architecture-specific symbols - */ -EXPORT_SYMBOL(__xtensa_copy_user); -EXPORT_SYMBOL(__invalidate_icache_range); - -/* - * Kernel hacking ... - */ - -#if defined(CONFIG_VGA_CONSOLE) || defined(CONFIG_DUMMY_CONSOLE) -// FIXME EXPORT_SYMBOL(screen_info); -#endif - -extern long common_exception_return; -EXPORT_SYMBOL(common_exception_return); - -#ifdef CONFIG_FUNCTION_TRACER -EXPORT_SYMBOL(_mcount); -#endif - -EXPORT_SYMBOL(__invalidate_dcache_range); -#if XCHAL_DCACHE_IS_WRITEBACK -EXPORT_SYMBOL(__flush_dcache_range); -#endif diff --git a/arch/xtensa/lib/Makefile b/arch/xtensa/lib/Makefile index c9c2614188f7..6e5b2232668c 100644 --- a/arch/xtensa/lib/Makefile +++ b/arch/xtensa/lib/Makefile @@ -6,7 +6,8 @@ lib-y += memcopy.o memset.o checksum.o \ ashldi3.o ashrdi3.o bswapdi2.o bswapsi2.o lshrdi3.o \ divsi3.o udivsi3.o modsi3.o umodsi3.o mulsi3.o umulsidi3.o \ - usercopy.o strncpy_user.o strnlen_user.o + usercopy.o strnlen_user.o +lib-$(CONFIG_ARCH_HAS_STRNCPY_FROM_USER) += strncpy_user.o lib-$(CONFIG_PCI) += pci-auto.o lib-$(CONFIG_KCSAN) += kcsan-stubs.o KCSAN_SANITIZE_kcsan-stubs.o := n diff --git a/arch/xtensa/lib/ashldi3.S b/arch/xtensa/lib/ashldi3.S index 67fb0da9e432..cd6b731215d3 100644 --- a/arch/xtensa/lib/ashldi3.S +++ b/arch/xtensa/lib/ashldi3.S @@ -26,3 +26,4 @@ ENTRY(__ashldi3) abi_ret_default ENDPROC(__ashldi3) +EXPORT_SYMBOL(__ashldi3) diff --git a/arch/xtensa/lib/ashrdi3.S b/arch/xtensa/lib/ashrdi3.S index cbf052c512cc..07bc6e758020 100644 --- a/arch/xtensa/lib/ashrdi3.S +++ b/arch/xtensa/lib/ashrdi3.S @@ -26,3 +26,4 @@ ENTRY(__ashrdi3) abi_ret_default ENDPROC(__ashrdi3) +EXPORT_SYMBOL(__ashrdi3) diff --git a/arch/xtensa/lib/bswapdi2.S b/arch/xtensa/lib/bswapdi2.S index d8e52e05eba6..5d94a9352887 100644 --- a/arch/xtensa/lib/bswapdi2.S +++ b/arch/xtensa/lib/bswapdi2.S @@ -19,3 +19,4 @@ ENTRY(__bswapdi2) abi_ret_default ENDPROC(__bswapdi2) +EXPORT_SYMBOL(__bswapdi2) diff --git a/arch/xtensa/lib/bswapsi2.S b/arch/xtensa/lib/bswapsi2.S index 9c1de1344f79..fbfb8613d410 100644 --- a/arch/xtensa/lib/bswapsi2.S +++ b/arch/xtensa/lib/bswapsi2.S @@ -14,3 +14,4 @@ ENTRY(__bswapsi2) abi_ret_default ENDPROC(__bswapsi2) +EXPORT_SYMBOL(__bswapsi2) diff --git a/arch/xtensa/lib/checksum.S b/arch/xtensa/lib/checksum.S index cf1bed1a5bd6..ffee6f94c8f8 100644 --- a/arch/xtensa/lib/checksum.S +++ b/arch/xtensa/lib/checksum.S @@ -169,6 +169,7 @@ ENTRY(csum_partial) j 5b /* branch to handle the remaining byte */ ENDPROC(csum_partial) +EXPORT_SYMBOL(csum_partial) /* * Copy from ds while checksumming, otherwise like csum_partial @@ -346,6 +347,7 @@ EX(10f) s8i a8, a3, 1 j 4b /* process the possible trailing odd byte */ ENDPROC(csum_partial_copy_generic) +EXPORT_SYMBOL(csum_partial_copy_generic) # Exception handler: diff --git a/arch/xtensa/lib/divsi3.S b/arch/xtensa/lib/divsi3.S index b044b4744a8b..edb3c4ad971b 100644 --- a/arch/xtensa/lib/divsi3.S +++ b/arch/xtensa/lib/divsi3.S @@ -72,3 +72,4 @@ ENTRY(__divsi3) abi_ret_default ENDPROC(__divsi3) +EXPORT_SYMBOL(__divsi3) diff --git a/arch/xtensa/lib/lshrdi3.S b/arch/xtensa/lib/lshrdi3.S index 129ef8d1725b..e432e1a40702 100644 --- a/arch/xtensa/lib/lshrdi3.S +++ b/arch/xtensa/lib/lshrdi3.S @@ -26,3 +26,4 @@ ENTRY(__lshrdi3) abi_ret_default ENDPROC(__lshrdi3) +EXPORT_SYMBOL(__lshrdi3) diff --git a/arch/xtensa/lib/memcopy.S b/arch/xtensa/lib/memcopy.S index b20d206bcb71..f60760396cee 100644 --- a/arch/xtensa/lib/memcopy.S +++ b/arch/xtensa/lib/memcopy.S @@ -273,21 +273,8 @@ WEAK(memcpy) abi_ret_default ENDPROC(__memcpy) - -/* - * void bcopy(const void *src, void *dest, size_t n); - */ - -ENTRY(bcopy) - - abi_entry_default - # a2=src, a3=dst, a4=len - mov a5, a3 - mov a3, a2 - mov a2, a5 - j .Lmovecommon # go to common code for memmove+bcopy - -ENDPROC(bcopy) +EXPORT_SYMBOL(__memcpy) +EXPORT_SYMBOL(memcpy) /* * void *memmove(void *dst, const void *src, size_t len); @@ -551,3 +538,5 @@ WEAK(memmove) abi_ret_default ENDPROC(__memmove) +EXPORT_SYMBOL(__memmove) +EXPORT_SYMBOL(memmove) diff --git a/arch/xtensa/lib/memset.S b/arch/xtensa/lib/memset.S index 59b1524fd601..262c3f39f945 100644 --- a/arch/xtensa/lib/memset.S +++ b/arch/xtensa/lib/memset.S @@ -142,6 +142,8 @@ EX(10f) s8i a3, a5, 0 abi_ret_default ENDPROC(__memset) +EXPORT_SYMBOL(__memset) +EXPORT_SYMBOL(memset) .section .fixup, "ax" .align 4 diff --git a/arch/xtensa/lib/modsi3.S b/arch/xtensa/lib/modsi3.S index d00e77181e20..c5f4295c6868 100644 --- a/arch/xtensa/lib/modsi3.S +++ b/arch/xtensa/lib/modsi3.S @@ -60,6 +60,7 @@ ENTRY(__modsi3) abi_ret_default ENDPROC(__modsi3) +EXPORT_SYMBOL(__modsi3) #if !XCHAL_HAVE_NSA .section .rodata diff --git a/arch/xtensa/lib/mulsi3.S b/arch/xtensa/lib/mulsi3.S index 91a9d7c62f96..c6b4fd46bfa9 100644 --- a/arch/xtensa/lib/mulsi3.S +++ b/arch/xtensa/lib/mulsi3.S @@ -131,3 +131,4 @@ ENTRY(__mulsi3) abi_ret_default ENDPROC(__mulsi3) +EXPORT_SYMBOL(__mulsi3) diff --git a/arch/xtensa/lib/strncpy_user.S b/arch/xtensa/lib/strncpy_user.S index 0731912227d3..9841d1694cdf 100644 --- a/arch/xtensa/lib/strncpy_user.S +++ b/arch/xtensa/lib/strncpy_user.S @@ -201,6 +201,7 @@ EX(10f) s8i a9, a11, 0 abi_ret_default ENDPROC(__strncpy_user) +EXPORT_SYMBOL(__strncpy_user) .section .fixup, "ax" .align 4 diff --git a/arch/xtensa/lib/strnlen_user.S b/arch/xtensa/lib/strnlen_user.S index 3d391dca3efb..cdcf57474164 100644 --- a/arch/xtensa/lib/strnlen_user.S +++ b/arch/xtensa/lib/strnlen_user.S @@ -133,6 +133,7 @@ EX(10f) l32i a9, a4, 0 # get word with first two bytes of string abi_ret_default ENDPROC(__strnlen_user) +EXPORT_SYMBOL(__strnlen_user) .section .fixup, "ax" .align 4 diff --git a/arch/xtensa/lib/udivsi3.S b/arch/xtensa/lib/udivsi3.S index d2477e0786cf..59ea2dfc3f72 100644 --- a/arch/xtensa/lib/udivsi3.S +++ b/arch/xtensa/lib/udivsi3.S @@ -66,3 +66,4 @@ ENTRY(__udivsi3) abi_ret_default ENDPROC(__udivsi3) +EXPORT_SYMBOL(__udivsi3) diff --git a/arch/xtensa/lib/umodsi3.S b/arch/xtensa/lib/umodsi3.S index 5f031bfa0354..d39a7e56a971 100644 --- a/arch/xtensa/lib/umodsi3.S +++ b/arch/xtensa/lib/umodsi3.S @@ -55,3 +55,4 @@ ENTRY(__umodsi3) abi_ret_default ENDPROC(__umodsi3) +EXPORT_SYMBOL(__umodsi3) diff --git a/arch/xtensa/lib/umulsidi3.S b/arch/xtensa/lib/umulsidi3.S index 136081647942..8c7a94a0c5d0 100644 --- a/arch/xtensa/lib/umulsidi3.S +++ b/arch/xtensa/lib/umulsidi3.S @@ -228,3 +228,4 @@ ENTRY(__umulsidi3) #endif /* XCHAL_NO_MUL */ ENDPROC(__umulsidi3) +EXPORT_SYMBOL(__umulsidi3) diff --git a/arch/xtensa/lib/usercopy.S b/arch/xtensa/lib/usercopy.S index 16128c094c62..2c665c0b408e 100644 --- a/arch/xtensa/lib/usercopy.S +++ b/arch/xtensa/lib/usercopy.S @@ -283,6 +283,7 @@ EX(10f) s8i a6, a5, 0 abi_ret(STACK_SIZE) ENDPROC(__xtensa_copy_user) +EXPORT_SYMBOL(__xtensa_copy_user) .section .fixup, "ax" .align 4 diff --git a/arch/xtensa/mm/kasan_init.c b/arch/xtensa/mm/kasan_init.c index 1fef24db2ff6..f00d122aa806 100644 --- a/arch/xtensa/mm/kasan_init.c +++ b/arch/xtensa/mm/kasan_init.c @@ -14,7 +14,6 @@ #include <linux/kernel.h> #include <asm/initialize_mmu.h> #include <asm/tlbflush.h> -#include <asm/traps.h> void __init kasan_early_init(void) { @@ -31,7 +30,6 @@ void __init kasan_early_init(void) BUG_ON(!pmd_none(*pmd)); set_pmd(pmd, __pmd((unsigned long)kasan_early_shadow_pte)); } - early_trap_init(); } static void __init populate(void *start, void *end) diff --git a/arch/xtensa/mm/misc.S b/arch/xtensa/mm/misc.S index 0527bf6e3211..ec36f73c4765 100644 --- a/arch/xtensa/mm/misc.S +++ b/arch/xtensa/mm/misc.S @@ -47,6 +47,7 @@ ENTRY(clear_page) abi_ret_default ENDPROC(clear_page) +EXPORT_SYMBOL(clear_page) /* * copy_page and copy_user_page are the same for non-cache-aliased configs. @@ -89,6 +90,7 @@ ENTRY(copy_page) abi_ret_default ENDPROC(copy_page) +EXPORT_SYMBOL(copy_page) #ifdef CONFIG_MMU /* @@ -367,6 +369,7 @@ ENTRY(__invalidate_icache_range) abi_ret_default ENDPROC(__invalidate_icache_range) +EXPORT_SYMBOL(__invalidate_icache_range) /* * void __flush_invalidate_dcache_range(ulong start, ulong size) @@ -397,6 +400,7 @@ ENTRY(__flush_dcache_range) abi_ret_default ENDPROC(__flush_dcache_range) +EXPORT_SYMBOL(__flush_dcache_range) /* * void _invalidate_dcache_range(ulong start, ulong size) @@ -411,6 +415,7 @@ ENTRY(__invalidate_dcache_range) abi_ret_default ENDPROC(__invalidate_dcache_range) +EXPORT_SYMBOL(__invalidate_dcache_range) /* * void _invalidate_icache_all(void) diff --git a/arch/xtensa/mm/tlb.c b/arch/xtensa/mm/tlb.c index 27a477dae232..0a11fc5f185b 100644 --- a/arch/xtensa/mm/tlb.c +++ b/arch/xtensa/mm/tlb.c @@ -179,6 +179,7 @@ static unsigned get_pte_for_vaddr(unsigned vaddr) pud_t *pud; pmd_t *pmd; pte_t *pte; + unsigned int pteval; if (!mm) mm = task->active_mm; @@ -197,7 +198,9 @@ static unsigned get_pte_for_vaddr(unsigned vaddr) pte = pte_offset_map(pmd, vaddr); if (!pte) return 0; - return pte_val(*pte); + pteval = pte_val(*pte); + pte_unmap(pte); + return pteval; } enum { diff --git a/arch/xtensa/platforms/iss/setup.c b/arch/xtensa/platforms/iss/setup.c index d3433e1bb94e..0f1fe132691e 100644 --- a/arch/xtensa/platforms/iss/setup.c +++ b/arch/xtensa/platforms/iss/setup.c @@ -16,6 +16,7 @@ #include <linux/notifier.h> #include <linux/panic_notifier.h> #include <linux/printk.h> +#include <linux/reboot.h> #include <linux/string.h> #include <asm/platform.h> @@ -24,26 +25,27 @@ #include <platform/simcall.h> -void platform_halt(void) -{ - pr_info(" ** Called platform_halt() **\n"); - simc_exit(0); -} - -void platform_power_off(void) +static int iss_power_off(struct sys_off_data *unused) { pr_info(" ** Called platform_power_off() **\n"); simc_exit(0); + return NOTIFY_DONE; } -void platform_restart(void) +static int iss_restart(struct notifier_block *this, + unsigned long event, void *ptr) { /* Flush and reset the mmu, simulate a processor reset, and * jump to the reset vector. */ cpu_reset(); - /* control never gets here */ + + return NOTIFY_DONE; } +static struct notifier_block iss_restart_block = { + .notifier_call = iss_restart, +}; + static int iss_panic_event(struct notifier_block *this, unsigned long event, void *ptr) { @@ -82,4 +84,8 @@ void __init platform_setup(char **p_cmdline) } atomic_notifier_chain_register(&panic_notifier_list, &iss_panic_block); + register_restart_handler(&iss_restart_block); + register_sys_off_handler(SYS_OFF_MODE_POWER_OFF, + SYS_OFF_PRIO_PLATFORM, + iss_power_off, NULL); } diff --git a/arch/xtensa/platforms/iss/simdisk.c b/arch/xtensa/platforms/iss/simdisk.c index f50caaa1c249..178cf96ca10a 100644 --- a/arch/xtensa/platforms/iss/simdisk.c +++ b/arch/xtensa/platforms/iss/simdisk.c @@ -120,9 +120,9 @@ static void simdisk_submit_bio(struct bio *bio) bio_endio(bio); } -static int simdisk_open(struct block_device *bdev, fmode_t mode) +static int simdisk_open(struct gendisk *disk, blk_mode_t mode) { - struct simdisk *dev = bdev->bd_disk->private_data; + struct simdisk *dev = disk->private_data; spin_lock(&dev->lock); ++dev->users; @@ -130,7 +130,7 @@ static int simdisk_open(struct block_device *bdev, fmode_t mode) return 0; } -static void simdisk_release(struct gendisk *disk, fmode_t mode) +static void simdisk_release(struct gendisk *disk) { struct simdisk *dev = disk->private_data; spin_lock(&dev->lock); diff --git a/arch/xtensa/platforms/xt2000/setup.c b/arch/xtensa/platforms/xt2000/setup.c index 0dc22c371614..258e01a51fd8 100644 --- a/arch/xtensa/platforms/xt2000/setup.c +++ b/arch/xtensa/platforms/xt2000/setup.c @@ -23,6 +23,7 @@ #include <linux/platform_device.h> #include <linux/serial.h> #include <linux/serial_8250.h> +#include <linux/timer.h> #include <asm/processor.h> #include <asm/platform.h> @@ -41,51 +42,46 @@ static void led_print (int f, char *s) break; } -void platform_halt(void) -{ - led_print (0, " HALT "); - local_irq_disable(); - while (1); -} - -void platform_power_off(void) +static int xt2000_power_off(struct sys_off_data *unused) { led_print (0, "POWEROFF"); local_irq_disable(); while (1); + return NOTIFY_DONE; } -void platform_restart(void) +static int xt2000_restart(struct notifier_block *this, + unsigned long event, void *ptr) { /* Flush and reset the mmu, simulate a processor reset, and * jump to the reset vector. */ cpu_reset(); - /* control never gets here */ + + return NOTIFY_DONE; } +static struct notifier_block xt2000_restart_block = { + .notifier_call = xt2000_restart, +}; + void __init platform_setup(char** cmdline) { led_print (0, "LINUX "); } -/* early initialization */ +/* Heartbeat. Let the LED blink. */ -void __init platform_init(bp_tag_t *first) -{ -} +static void xt2000_heartbeat(struct timer_list *unused); -/* Heartbeat. Let the LED blink. */ +static DEFINE_TIMER(heartbeat_timer, xt2000_heartbeat); -void platform_heartbeat(void) +static void xt2000_heartbeat(struct timer_list *unused) { - static int i, t; + static int i; - if (--t < 0) - { - t = 59; - led_print(7, i ? ".": " "); - i ^= 1; - } + led_print(7, i ? "." : " "); + i ^= 1; + mod_timer(&heartbeat_timer, jiffies + HZ / 2); } //#define RS_TABLE_SIZE 2 @@ -143,7 +139,11 @@ static int __init xt2000_setup_devinit(void) { platform_device_register(&xt2000_serial8250_device); platform_device_register(&xt2000_sonic_device); - + mod_timer(&heartbeat_timer, jiffies + HZ / 2); + register_restart_handler(&xt2000_restart_block); + register_sys_off_handler(SYS_OFF_MODE_POWER_OFF, + SYS_OFF_PRIO_DEFAULT, + xt2000_power_off, NULL); return 0; } diff --git a/arch/xtensa/platforms/xtfpga/setup.c b/arch/xtensa/platforms/xtfpga/setup.c index c79c1d09ea86..a2432f081710 100644 --- a/arch/xtensa/platforms/xtfpga/setup.c +++ b/arch/xtensa/platforms/xtfpga/setup.c @@ -33,23 +33,17 @@ #include <platform/lcd.h> #include <platform/hardware.h> -void platform_halt(void) -{ - lcd_disp_at_pos(" HALT ", 0); - local_irq_disable(); - while (1) - cpu_relax(); -} - -void platform_power_off(void) +static int xtfpga_power_off(struct sys_off_data *unused) { lcd_disp_at_pos("POWEROFF", 0); local_irq_disable(); while (1) cpu_relax(); + return NOTIFY_DONE; } -void platform_restart(void) +static int xtfpga_restart(struct notifier_block *this, + unsigned long event, void *ptr) { /* Try software reset first. */ WRITE_ONCE(*(u32 *)XTFPGA_SWRST_VADDR, 0xdead); @@ -58,9 +52,14 @@ void platform_restart(void) * simulate a processor reset, and jump to the reset vector. */ cpu_reset(); - /* control never gets here */ + + return NOTIFY_DONE; } +static struct notifier_block xtfpga_restart_block = { + .notifier_call = xtfpga_restart, +}; + #ifdef CONFIG_XTENSA_CALIBRATE_CCOUNT void __init platform_calibrate_ccount(void) @@ -70,6 +69,14 @@ void __init platform_calibrate_ccount(void) #endif +static void __init xtfpga_register_handlers(void) +{ + register_restart_handler(&xtfpga_restart_block); + register_sys_off_handler(SYS_OFF_MODE_POWER_OFF, + SYS_OFF_PRIO_DEFAULT, + xtfpga_power_off, NULL); +} + #ifdef CONFIG_USE_OF static void __init xtfpga_clk_setup(struct device_node *np) @@ -134,6 +141,9 @@ static int __init machine_setup(void) if ((eth = of_find_compatible_node(eth, NULL, "opencores,ethoc"))) update_local_mac(eth); of_node_put(eth); + + xtfpga_register_handlers(); + return 0; } arch_initcall(machine_setup); @@ -281,6 +291,8 @@ static int __init xtavnet_init(void) pr_info("XTFPGA: Ethernet MAC %pM\n", ethoc_pdata.hwaddr); ethoc_pdata.eth_clkfreq = *(long *)XTFPGA_CLKFRQ_VADDR; + xtfpga_register_handlers(); + return 0; } |